summaryrefslogtreecommitdiff
path: root/include/linux
diff options
context:
space:
mode:
Diffstat (limited to 'include/linux')
0 files changed, 0 insertions, 0 deletions
ion> Russell King's ARM Linux kernel treeRussell King
summaryrefslogtreecommitdiff
path: root/drivers/gpu/drm
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/gpu/drm')
-rw-r--r--drivers/gpu/drm/Kconfig444
-rw-r--r--drivers/gpu/drm/Kconfig.debug117
-rw-r--r--drivers/gpu/drm/Makefile232
-rw-r--r--drivers/gpu/drm/adp/Kconfig17
-rw-r--r--drivers/gpu/drm/adp/Makefile5
-rw-r--r--drivers/gpu/drm/adp/adp-mipi.c277
-rw-r--r--drivers/gpu/drm/adp/adp_drv.c614
-rw-r--r--drivers/gpu/drm/amd/acp/include/acp_gfx_if.h2
-rw-r--r--drivers/gpu/drm/amd/amdgpu/Kconfig92
-rw-r--r--drivers/gpu/drm/amd/amdgpu/Makefile160
-rw-r--r--drivers/gpu/drm/amd/amdgpu/ObjectID.h1
-rw-r--r--drivers/gpu/drm/amd/amdgpu/aldebaran.c473
-rw-r--r--drivers/gpu/drm/amd/amdgpu/aldebaran.h32
-rw-r--r--drivers/gpu/drm/amd/amdgpu/aldebaran_reg_init.c54
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu.h985
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_aca.c984
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_aca.h232
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_acp.c463
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c928
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_afmt.c1
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c576
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h360
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_aldebaran.c197
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_aldebaran.h27
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_arcturus.c162
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_arcturus.h31
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_fence.c37
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gc_9_4_3.c559
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c598
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.h70
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10_3.c404
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v11.c835
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v12.c384
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c243
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c167
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c792
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.h117
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c2745
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.c342
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.h51
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.c843
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.h8
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_atpx_handler.c36
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_benchmark.c246
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_bios.c279
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c134
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.h35
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_cgs.c201
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_connectors.c330
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_connectors.h1
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_cper.c591
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_cper.h105
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c1615
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_cs.h88
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_csa.c79
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_csa.h3
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c549
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h33
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c1191
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.h23
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_dev_coredump.c371
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_dev_coredump.h46
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_device.c5850
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_df.h4
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c3077
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.h24
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_display.c840
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_display.h19
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c318
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.h2
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_doorbell.h113
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_doorbell_mgr.c244
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c2186
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_drv.h6
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_eeprom.c240
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_eeprom.h37
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_encoders.c18
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_eviction_fence.c241
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_eviction_fence.h69
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_fb.c390
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_fdinfo.c124
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_fdinfo.h42
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c573
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_fru_eeprom.c449
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_fru_eeprom.h13
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_fw_attestation.c45
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c360
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_gart.h30
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c781
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_gem.h22
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c2163
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h391
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_gfxhub.h4
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c1350
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h234
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_gtt_mgr.c328
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_hdp.c84
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_hdp.h17
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_hmm.c293
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_hmm.h73
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_i2c.c105
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_i2c.h3
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c185
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c326
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_ids.h14
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_ih.c95
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_ih.h36
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_imu.h58
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_ioc32.c7
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_ip.c96
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_ip.h29
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c308
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_irq.h29
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_isp.c345
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_isp.h60
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_job.c411
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_job.h74
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_jpeg.c439
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_jpeg.h126
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c952
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_lsdma.c91
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_lsdma.h46
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_mca.c630
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_mca.h169
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c784
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h415
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_mes_ctx.h122
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_mmhub.c73
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_mmhub.h41
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c157
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_mn.h46
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h150
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_nbio.c87
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_nbio.h40
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_object.c945
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_object.h132
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_pll.c24
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_pll.h3
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_pmu.c28
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_preempt_mgr.c145
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c3971
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h464
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_psp_ta.c392
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_psp_ta.h34
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_rap.c14
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c4799
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h531
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.c2097
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.h145
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_res_cursor.h194
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_reset.c354
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_reset.h176
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c576
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h341
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_ring_mux.c576
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_ring_mux.h127
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_rlc.c304
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_rlc.h184
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_sa.c325
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_sched.c78
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.c529
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.h101
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_securedisplay.c36
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_securedisplay.h2
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_seq64.c262
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_seq64.h49
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_smuio.h19
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c263
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_sync.h9
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_test.c249
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h123
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_trace_points.c1
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c2204
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h139
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c1214
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.h278
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c639
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_umc.h146
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_umr.h85
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_umsch_mm.c550
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_umsch_mm.h246
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c1482
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_userq.h161
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.c1011
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.h77
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_utils.h91
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c379
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.h8
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c449
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_vce.h13
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c1377
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h363
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c1348
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h216
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_vkms.c659
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_vkms.h25
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c2726
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h387
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_vm_cpu.c40
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_vm_pt.c976
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_vm_sdma.c132
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_vm_tlb_fence.c111
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_vpe.c1018
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_vpe.h111
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c798
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.h92
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_xcp.c1107
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_xcp.h217
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c1168
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.h90
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgv_sriovmsg.h380
-rw-r--r--drivers/gpu/drm/amd/amdgpu/aqua_vanjaram.c986
-rw-r--r--drivers/gpu/drm/amd/amdgpu/athub_v1_0.c15
-rw-r--r--drivers/gpu/drm/amd/amdgpu/athub_v1_0.h2
-rw-r--r--drivers/gpu/drm/amd/amdgpu/athub_v2_0.c22
-rw-r--r--drivers/gpu/drm/amd/amdgpu/athub_v2_0.h2
-rw-r--r--drivers/gpu/drm/amd/amdgpu/athub_v2_1.c17
-rw-r--r--drivers/gpu/drm/amd/amdgpu/athub_v2_1.h2
-rw-r--r--drivers/gpu/drm/amd/amdgpu/athub_v3_0.c139
-rw-r--r--drivers/gpu/drm/amd/amdgpu/athub_v3_0.h30
-rw-r--r--drivers/gpu/drm/amd/amdgpu/athub_v4_1_0.c122
-rw-r--r--drivers/gpu/drm/amd/amdgpu/athub_v4_1_0.h30
-rw-r--r--drivers/gpu/drm/amd/amdgpu/atom.c288
-rw-r--r--drivers/gpu/drm/amd/amdgpu/atom.h39
-rw-r--r--drivers/gpu/drm/amd/amdgpu/atombios_crtc.c31
-rw-r--r--drivers/gpu/drm/amd/amdgpu/atombios_dp.c23
-rw-r--r--drivers/gpu/drm/amd/amdgpu/atombios_dp.h2
-rw-r--r--drivers/gpu/drm/amd/amdgpu/atombios_encoders.c109
-rw-r--r--drivers/gpu/drm/amd/amdgpu/atombios_i2c.c4
-rw-r--r--drivers/gpu/drm/amd/amdgpu/cik.c233
-rw-r--r--drivers/gpu/drm/amd/amdgpu/cik_ih.c75
-rw-r--r--drivers/gpu/drm/amd/amdgpu/cik_sdma.c153
-rw-r--r--drivers/gpu/drm/amd/amdgpu/cikd.h11
-rw-r--r--drivers/gpu/drm/amd/amdgpu/clearstate_gfx11.h997
-rw-r--r--drivers/gpu/drm/amd/amdgpu/clearstate_gfx12.h121
-rw-r--r--drivers/gpu/drm/amd/amdgpu/clearstate_gfx9.h27
-rw-r--r--drivers/gpu/drm/amd/amdgpu/clearstate_si.h24
-rw-r--r--drivers/gpu/drm/amd/amdgpu/cyan_skillfish_reg_init.c56
-rw-r--r--drivers/gpu/drm/amd/amdgpu/cz_ih.c69
-rw-r--r--drivers/gpu/drm/amd/amdgpu/dce_v10_0.c174
-rw-r--r--drivers/gpu/drm/amd/amdgpu/dce_v11_0.c3778
-rw-r--r--drivers/gpu/drm/amd/amdgpu/dce_v11_0.h32
-rw-r--r--drivers/gpu/drm/amd/amdgpu/dce_v6_0.c516
-rw-r--r--drivers/gpu/drm/amd/amdgpu/dce_v8_0.c233
-rw-r--r--drivers/gpu/drm/amd/amdgpu/dce_virtual.c769
-rw-r--r--drivers/gpu/drm/amd/amdgpu/dce_virtual.h30
-rw-r--r--drivers/gpu/drm/amd/amdgpu/df_v1_7.c6
-rw-r--r--drivers/gpu/drm/amd/amdgpu/df_v3_6.c67
-rw-r--r--drivers/gpu/drm/amd/amdgpu/df_v4_15.c45
-rw-r--r--drivers/gpu/drm/amd/amdgpu/df_v4_15.h30
-rw-r--r--drivers/gpu/drm/amd/amdgpu/df_v4_3.c61
-rw-r--r--drivers/gpu/drm/amd/amdgpu/df_v4_3.h31
-rw-r--r--drivers/gpu/drm/amd/amdgpu/df_v4_6_2.c34
-rw-r--r--drivers/gpu/drm/amd/amdgpu/df_v4_6_2.h31
-rw-r--r--[-rwxr-xr-x]drivers/gpu/drm/amd/amdgpu/dimgrey_cavefish_reg_init.c0
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c3512
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gfx_v10_0_cleaner_shader.h91
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gfx_v10_1_10_cleaner_shader.asm125
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gfx_v10_3_0_cleaner_shader.asm124
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c7538
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gfx_v11_0.h32
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gfx_v11_0_3.c110
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gfx_v11_0_3.h29
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gfx_v11_0_3_cleaner_shader.asm118
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gfx_v11_0_cleaner_shader.h56
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c5793
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gfx_v12_0.h31
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gfx_v6_0.c282
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c460
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c561
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c2504
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gfx_v9_0.h2
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gfx_v9_0_cleaner_shader.h68
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gfx_v9_4.c66
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gfx_v9_4.h12
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gfx_v9_4_2.c1938
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gfx_v9_4_2.h37
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gfx_v9_4_2_cleaner_shader.asm153
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c5062
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.h31
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3_cleaner_shader.asm153
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3_cleaner_shader.h64
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gfxhub_v11_5_0.c516
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gfxhub_v11_5_0.h29
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gfxhub_v12_0.c521
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gfxhub_v12_0.h29
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gfxhub_v1_0.c128
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gfxhub_v1_1.c65
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gfxhub_v1_2.c674
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gfxhub_v1_2.h31
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gfxhub_v2_0.c26
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gfxhub_v2_1.c197
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gfxhub_v3_0.c513
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gfxhub_v3_0.h29
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gfxhub_v3_0_3.c501
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gfxhub_v3_0_3.h29
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c666
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c1085
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gmc_v11_0.h30
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gmc_v12_0.c1070
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gmc_v12_0.h30
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c256
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c237
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c322
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c1458
-rw-r--r--drivers/gpu/drm/amd/amdgpu/hdp_v4_0.c84
-rw-r--r--drivers/gpu/drm/amd/amdgpu/hdp_v4_0.h1
-rw-r--r--drivers/gpu/drm/amd/amdgpu/hdp_v5_0.c100
-rw-r--r--drivers/gpu/drm/amd/amdgpu/hdp_v5_2.c206
-rw-r--r--drivers/gpu/drm/amd/amdgpu/hdp_v5_2.h31
-rw-r--r--drivers/gpu/drm/amd/amdgpu/hdp_v6_0.c144
-rw-r--r--drivers/gpu/drm/amd/amdgpu/hdp_v6_0.h31
-rw-r--r--drivers/gpu/drm/amd/amdgpu/hdp_v7_0.c132
-rw-r--r--drivers/gpu/drm/amd/amdgpu/hdp_v7_0.h31
-rw-r--r--drivers/gpu/drm/amd/amdgpu/iceland_ih.c76
-rw-r--r--drivers/gpu/drm/amd/amdgpu/ih_v6_0.c817
-rw-r--r--drivers/gpu/drm/amd/amdgpu/ih_v6_0.h28
-rw-r--r--drivers/gpu/drm/amd/amdgpu/ih_v6_1.c796
-rw-r--r--drivers/gpu/drm/amd/amdgpu/ih_v6_1.h28
-rw-r--r--drivers/gpu/drm/amd/amdgpu/ih_v7_0.c787
-rw-r--r--drivers/gpu/drm/amd/amdgpu/ih_v7_0.h28
-rw-r--r--drivers/gpu/drm/amd/amdgpu/imu_v11_0.c393
-rw-r--r--drivers/gpu/drm/amd/amdgpu/imu_v11_0.h30
-rw-r--r--drivers/gpu/drm/amd/amdgpu/imu_v11_0_3.c145
-rw-r--r--drivers/gpu/drm/amd/amdgpu/imu_v11_0_3.h29
-rw-r--r--drivers/gpu/drm/amd/amdgpu/imu_v12_0.c406
-rw-r--r--drivers/gpu/drm/amd/amdgpu/imu_v12_0.h30
-rw-r--r--drivers/gpu/drm/amd/amdgpu/isp_v4_1_0.c191
-rw-r--r--drivers/gpu/drm/amd/amdgpu/isp_v4_1_0.h50
-rw-r--r--drivers/gpu/drm/amd/amdgpu/isp_v4_1_1.c377
-rw-r--r--drivers/gpu/drm/amd/amdgpu/isp_v4_1_1.h49
-rw-r--r--drivers/gpu/drm/amd/amdgpu/jpeg_v1_0.c122
-rw-r--r--drivers/gpu/drm/amd/amdgpu/jpeg_v1_0.h17
-rw-r--r--drivers/gpu/drm/amd/amdgpu/jpeg_v2_0.c195
-rw-r--r--drivers/gpu/drm/amd/amdgpu/jpeg_v2_0.h21
-rw-r--r--drivers/gpu/drm/amd/amdgpu/jpeg_v2_5.c454
-rw-r--r--drivers/gpu/drm/amd/amdgpu/jpeg_v2_5.h8
-rw-r--r--drivers/gpu/drm/amd/amdgpu/jpeg_v3_0.c181
-rw-r--r--drivers/gpu/drm/amd/amdgpu/jpeg_v4_0.c878
-rw-r--r--drivers/gpu/drm/amd/amdgpu/jpeg_v4_0.h35
-rw-r--r--drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.c1486
-rw-r--r--drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.h74
-rw-r--r--drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_5.c872
-rw-r--r--drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_5.h35
-rw-r--r--drivers/gpu/drm/amd/amdgpu/jpeg_v5_0_0.c733
-rw-r--r--drivers/gpu/drm/amd/amdgpu/jpeg_v5_0_0.h35
-rw-r--r--drivers/gpu/drm/amd/amdgpu/jpeg_v5_0_1.c1101
-rw-r--r--drivers/gpu/drm/amd/amdgpu/jpeg_v5_0_1.h111
-rw-r--r--drivers/gpu/drm/amd/amdgpu/lsdma_v6_0.c121
-rw-r--r--drivers/gpu/drm/amd/amdgpu/lsdma_v6_0.h31
-rw-r--r--drivers/gpu/drm/amd/amdgpu/lsdma_v7_0.c121
-rw-r--r--drivers/gpu/drm/amd/amdgpu/lsdma_v7_0.h31
-rw-r--r--drivers/gpu/drm/amd/amdgpu/mca_v3_0.c104
-rw-r--r--drivers/gpu/drm/amd/amdgpu/mca_v3_0.h28
-rw-r--r--drivers/gpu/drm/amd/amdgpu/mes_api_def.h443
-rw-r--r--drivers/gpu/drm/amd/amdgpu/mes_userqueue.c501
-rw-r--r--drivers/gpu/drm/amd/amdgpu/mes_userqueue.h30
-rw-r--r--drivers/gpu/drm/amd/amdgpu/mes_v10_1.c1007
-rw-r--r--drivers/gpu/drm/amd/amdgpu/mes_v10_1.h29
-rw-r--r--drivers/gpu/drm/amd/amdgpu/mes_v11_0.c1760
-rw-r--r--drivers/gpu/drm/amd/amdgpu/mes_v11_0.h29
-rw-r--r--drivers/gpu/drm/amd/amdgpu/mes_v12_0.c1942
-rw-r--r--drivers/gpu/drm/amd/amdgpu/mes_v12_0.h29
-rw-r--r--drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c92
-rw-r--r--drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.h1
-rw-r--r--drivers/gpu/drm/amd/amdgpu/mmhub_v1_7.c1371
-rw-r--r--drivers/gpu/drm/amd/amdgpu/mmhub_v1_7.h29
-rw-r--r--drivers/gpu/drm/amd/amdgpu/mmhub_v1_8.c871
-rw-r--r--drivers/gpu/drm/amd/amdgpu/mmhub_v1_8.h29
-rw-r--r--drivers/gpu/drm/amd/amdgpu/mmhub_v2_0.c188
-rw-r--r--drivers/gpu/drm/amd/amdgpu/mmhub_v2_3.c58
-rw-r--r--drivers/gpu/drm/amd/amdgpu/mmhub_v3_0.c664
-rw-r--r--drivers/gpu/drm/amd/amdgpu/mmhub_v3_0.h28
-rw-r--r--drivers/gpu/drm/amd/amdgpu/mmhub_v3_0_1.c597
-rw-r--r--drivers/gpu/drm/amd/amdgpu/mmhub_v3_0_1.h28
-rw-r--r--drivers/gpu/drm/amd/amdgpu/mmhub_v3_0_2.c570
-rw-r--r--drivers/gpu/drm/amd/amdgpu/mmhub_v3_0_2.h28
-rw-r--r--drivers/gpu/drm/amd/amdgpu/mmhub_v3_3.c746
-rw-r--r--drivers/gpu/drm/amd/amdgpu/mmhub_v3_3.h29
-rw-r--r--drivers/gpu/drm/amd/amdgpu/mmhub_v4_1_0.c647
-rw-r--r--drivers/gpu/drm/amd/amdgpu/mmhub_v4_1_0.h28
-rw-r--r--drivers/gpu/drm/amd/amdgpu/mmhub_v9_4.c94
-rw-r--r--drivers/gpu/drm/amd/amdgpu/mmhub_v9_4.h1
-rw-r--r--drivers/gpu/drm/amd/amdgpu/mmsch_v1_0.h4
-rw-r--r--drivers/gpu/drm/amd/amdgpu/mmsch_v3_0.h4
-rw-r--r--drivers/gpu/drm/amd/amdgpu/mmsch_v4_0.h144
-rw-r--r--drivers/gpu/drm/amd/amdgpu/mmsch_v4_0_3.h37
-rw-r--r--drivers/gpu/drm/amd/amdgpu/mmsch_v5_0.h144
-rw-r--r--drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c178
-rw-r--r--drivers/gpu/drm/amd/amdgpu/mxgpu_ai.h20
-rw-r--r--drivers/gpu/drm/amd/amdgpu/mxgpu_nv.c294
-rw-r--r--drivers/gpu/drm/amd/amdgpu/mxgpu_nv.h21
-rw-r--r--drivers/gpu/drm/amd/amdgpu/mxgpu_vi.c30
-rw-r--r--drivers/gpu/drm/amd/amdgpu/navi10_ih.c169
-rw-r--r--drivers/gpu/drm/amd/amdgpu/navi10_reg_init.c55
-rw-r--r--drivers/gpu/drm/amd/amdgpu/navi10_sdma_pkt_open.h64
-rw-r--r--drivers/gpu/drm/amd/amdgpu/navi12_reg_init.c52
-rw-r--r--drivers/gpu/drm/amd/amdgpu/navi14_reg_init.c53
-rw-r--r--drivers/gpu/drm/amd/amdgpu/nbif_v6_3_1.c554
-rw-r--r--drivers/gpu/drm/amd/amdgpu/nbif_v6_3_1.h33
-rw-r--r--drivers/gpu/drm/amd/amdgpu/nbio_v2_3.c127
-rw-r--r--drivers/gpu/drm/amd/amdgpu/nbio_v4_3.c634
-rw-r--r--drivers/gpu/drm/amd/amdgpu/nbio_v4_3.h34
-rw-r--r--drivers/gpu/drm/amd/amdgpu/nbio_v6_1.c154
-rw-r--r--drivers/gpu/drm/amd/amdgpu/nbio_v7_0.c28
-rw-r--r--drivers/gpu/drm/amd/amdgpu/nbio_v7_11.c400
-rw-r--r--drivers/gpu/drm/amd/amdgpu/nbio_v7_11.h33
-rw-r--r--drivers/gpu/drm/amd/amdgpu/nbio_v7_2.c194
-rw-r--r--drivers/gpu/drm/amd/amdgpu/nbio_v7_4.c367
-rw-r--r--drivers/gpu/drm/amd/amdgpu/nbio_v7_4.h1
-rw-r--r--drivers/gpu/drm/amd/amdgpu/nbio_v7_7.c369
-rw-r--r--drivers/gpu/drm/amd/amdgpu/nbio_v7_7.h33
-rw-r--r--drivers/gpu/drm/amd/amdgpu/nbio_v7_9.c696
-rw-r--r--drivers/gpu/drm/amd/amdgpu/nbio_v7_9.h33
-rw-r--r--drivers/gpu/drm/amd/amdgpu/nv.c960
-rw-r--r--drivers/gpu/drm/amd/amdgpu/nv.h11
-rw-r--r--drivers/gpu/drm/amd/amdgpu/nvd.h211
-rw-r--r--drivers/gpu/drm/amd/amdgpu/psp_gfx_if.h133
-rw-r--r--drivers/gpu/drm/amd/amdgpu/psp_v10_0.c112
-rw-r--r--drivers/gpu/drm/amd/amdgpu/psp_v11_0.c431
-rw-r--r--drivers/gpu/drm/amd/amdgpu/psp_v11_0_8.c186
-rw-r--r--drivers/gpu/drm/amd/amdgpu/psp_v11_0_8.h30
-rw-r--r--drivers/gpu/drm/amd/amdgpu/psp_v12_0.c234
-rw-r--r--drivers/gpu/drm/amd/amdgpu/psp_v13_0.c977
-rw-r--r--drivers/gpu/drm/amd/amdgpu/psp_v13_0.h32
-rw-r--r--drivers/gpu/drm/amd/amdgpu/psp_v13_0_4.c355
-rw-r--r--drivers/gpu/drm/amd/amdgpu/psp_v13_0_4.h30
-rw-r--r--drivers/gpu/drm/amd/amdgpu/psp_v14_0.c705
-rw-r--r--drivers/gpu/drm/amd/amdgpu/psp_v14_0.h32
-rw-r--r--drivers/gpu/drm/amd/amdgpu/psp_v3_1.c97
-rw-r--r--drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c146
-rw-r--r--drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c159
-rw-r--r--drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c857
-rw-r--r--drivers/gpu/drm/amd/amdgpu/sdma_v4_4.c274
-rw-r--r--drivers/gpu/drm/amd/amdgpu/sdma_v4_4.h28
-rw-r--r--drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c2613
-rw-r--r--drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.h32
-rw-r--r--drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c949
-rw-r--r--drivers/gpu/drm/amd/amdgpu/sdma_v5_0.h1
-rw-r--r--drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c1097
-rw-r--r--drivers/gpu/drm/amd/amdgpu/sdma_v5_2.h1
-rw-r--r--drivers/gpu/drm/amd/amdgpu/sdma_v6_0.c1917
-rw-r--r--drivers/gpu/drm/amd/amdgpu/sdma_v6_0.h30
-rw-r--r--drivers/gpu/drm/amd/amdgpu/sdma_v6_0_0_pkt_open.h5672
-rw-r--r--drivers/gpu/drm/amd/amdgpu/sdma_v7_0.c1859
-rw-r--r--drivers/gpu/drm/amd/amdgpu/sdma_v7_0.h30
-rw-r--r--drivers/gpu/drm/amd/amdgpu/si.c753
-rw-r--r--drivers/gpu/drm/amd/amdgpu/si_dma.c204
-rw-r--r--drivers/gpu/drm/amd/amdgpu/si_enums.h246
-rw-r--r--drivers/gpu/drm/amd/amdgpu/si_ih.c91
-rw-r--r--drivers/gpu/drm/amd/amdgpu/sid.h1964
-rw-r--r--drivers/gpu/drm/amd/amdgpu/sienna_cichlid.c298
-rw-r--r--drivers/gpu/drm/amd/amdgpu/sienna_cichlid.h32
-rw-r--r--drivers/gpu/drm/amd/amdgpu/sienna_cichlid_reg_init.c54
-rw-r--r--drivers/gpu/drm/amd/amdgpu/smu_v11_0_i2c.c396
-rw-r--r--drivers/gpu/drm/amd/amdgpu/smu_v11_0_i2c.h6
-rw-r--r--drivers/gpu/drm/amd/amdgpu/smu_v13_0_10.c296
-rw-r--r--drivers/gpu/drm/amd/amdgpu/smu_v13_0_10.h32
-rw-r--r--drivers/gpu/drm/amd/amdgpu/smuio_v11_0.c7
-rw-r--r--drivers/gpu/drm/amd/amdgpu/smuio_v11_0_6.c2
-rw-r--r--drivers/gpu/drm/amd/amdgpu/smuio_v13_0.c161
-rw-r--r--drivers/gpu/drm/amd/amdgpu/smuio_v13_0.h30
-rw-r--r--drivers/gpu/drm/amd/amdgpu/smuio_v13_0_3.c109
-rw-r--r--drivers/gpu/drm/amd/amdgpu/smuio_v13_0_3.h30
-rw-r--r--drivers/gpu/drm/amd/amdgpu/smuio_v13_0_6.c41
-rw-r--r--drivers/gpu/drm/amd/amdgpu/smuio_v13_0_6.h30
-rw-r--r--drivers/gpu/drm/amd/amdgpu/smuio_v14_0_2.c62
-rw-r--r--drivers/gpu/drm/amd/amdgpu/smuio_v14_0_2.h30
-rw-r--r--drivers/gpu/drm/amd/amdgpu/smuio_v9_0.c4
-rw-r--r--drivers/gpu/drm/amd/amdgpu/soc15.c925
-rw-r--r--drivers/gpu/drm/amd/amdgpu/soc15.h32
-rw-r--r--drivers/gpu/drm/amd/amdgpu/soc15_common.h172
-rw-r--r--drivers/gpu/drm/amd/amdgpu/soc15d.h156
-rw-r--r--drivers/gpu/drm/amd/amdgpu/soc21.c1021
-rw-r--r--drivers/gpu/drm/amd/amdgpu/soc21.h30
-rw-r--r--drivers/gpu/drm/amd/amdgpu/soc24.c601
-rw-r--r--drivers/gpu/drm/amd/amdgpu/soc24.h30
-rw-r--r--drivers/gpu/drm/amd/amdgpu/ta_ras_if.h108
-rw-r--r--drivers/gpu/drm/amd/amdgpu/ta_secureDisplay_if.h49
-rw-r--r--drivers/gpu/drm/amd/amdgpu/ta_xgmi_if.h65
-rw-r--r--drivers/gpu/drm/amd/amdgpu/tonga_ih.c91
-rw-r--r--drivers/gpu/drm/amd/amdgpu/umc_v12_0.c742
-rw-r--r--drivers/gpu/drm/amd/amdgpu/umc_v12_0.h105
-rw-r--r--drivers/gpu/drm/amd/amdgpu/umc_v6_0.c2
-rw-r--r--drivers/gpu/drm/amd/amdgpu/umc_v6_1.c36
-rw-r--r--drivers/gpu/drm/amd/amdgpu/umc_v6_1.h2
-rw-r--r--drivers/gpu/drm/amd/amdgpu/umc_v6_7.c528
-rw-r--r--drivers/gpu/drm/amd/amdgpu/umc_v6_7.h77
-rw-r--r--drivers/gpu/drm/amd/amdgpu/umc_v8_10.c458
-rw-r--r--drivers/gpu/drm/amd/amdgpu/umc_v8_10.h75
-rw-r--r--drivers/gpu/drm/amd/amdgpu/umc_v8_14.c160
-rw-r--r--drivers/gpu/drm/amd/amdgpu/umc_v8_14.h51
-rw-r--r--drivers/gpu/drm/amd/amdgpu/umc_v8_7.c187
-rw-r--r--drivers/gpu/drm/amd/amdgpu/umc_v8_7.h2
-rw-r--r--drivers/gpu/drm/amd/amdgpu/umsch_mm_v4_0.c434
-rw-r--r--drivers/gpu/drm/amd/amdgpu/umsch_mm_v4_0.h30
-rw-r--r--drivers/gpu/drm/amd/amdgpu/uvd_v3_1.c134
-rw-r--r--drivers/gpu/drm/amd/amdgpu/uvd_v4_2.c99
-rw-r--r--drivers/gpu/drm/amd/amdgpu/uvd_v5_0.c107
-rw-r--r--drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c142
-rw-r--r--drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c410
-rw-r--r--drivers/gpu/drm/amd/amdgpu/vangogh_reg_init.c50
-rw-r--r--drivers/gpu/drm/amd/amdgpu/vce_v1_0.c839
-rw-r--r--drivers/gpu/drm/amd/amdgpu/vce_v1_0.h32
-rw-r--r--drivers/gpu/drm/amd/amdgpu/vce_v2_0.c104
-rw-r--r--drivers/gpu/drm/amd/amdgpu/vce_v3_0.c131
-rw-r--r--drivers/gpu/drm/amd/amdgpu/vce_v4_0.c391
-rw-r--r--drivers/gpu/drm/amd/amdgpu/vcn_sw_ring.c86
-rw-r--r--drivers/gpu/drm/amd/amdgpu/vcn_sw_ring.h44
-rw-r--r--drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c566
-rw-r--r--drivers/gpu/drm/amd/amdgpu/vcn_v2_0.c485
-rw-r--r--drivers/gpu/drm/amd/amdgpu/vcn_v2_5.c1532
-rw-r--r--drivers/gpu/drm/amd/amdgpu/vcn_v2_5.h7
-rw-r--r--drivers/gpu/drm/amd/amdgpu/vcn_v3_0.c1279
-rw-r--r--drivers/gpu/drm/amd/amdgpu/vcn_v4_0.c2315
-rw-r--r--drivers/gpu/drm/amd/amdgpu/vcn_v4_0.h35
-rw-r--r--drivers/gpu/drm/amd/amdgpu/vcn_v4_0_3.c2131
-rw-r--r--drivers/gpu/drm/amd/amdgpu/vcn_v4_0_3.h44
-rw-r--r--drivers/gpu/drm/amd/amdgpu/vcn_v4_0_5.c1723
-rw-r--r--drivers/gpu/drm/amd/amdgpu/vcn_v4_0_5.h35
-rw-r--r--drivers/gpu/drm/amd/amdgpu/vcn_v5_0_0.c1442
-rw-r--r--drivers/gpu/drm/amd/amdgpu/vcn_v5_0_0.h37
-rw-r--r--drivers/gpu/drm/amd/amdgpu/vcn_v5_0_1.c1729
-rw-r--r--drivers/gpu/drm/amd/amdgpu/vcn_v5_0_1.h39
-rw-r--r--drivers/gpu/drm/amd/amdgpu/vega10_ih.c116
-rw-r--r--drivers/gpu/drm/amd/amdgpu/vega20_ih.c260
-rw-r--r--drivers/gpu/drm/amd/amdgpu/vi.c514
-rw-r--r--drivers/gpu/drm/amd/amdgpu/vid.h1
-rw-r--r--drivers/gpu/drm/amd/amdgpu/vpe_6_1_fw_if.h218
-rw-r--r--drivers/gpu/drm/amd/amdgpu/vpe_v6_1.c398
-rw-r--r--drivers/gpu/drm/amd/amdgpu/vpe_v6_1.h29
-rw-r--r--drivers/gpu/drm/amd/amdkfd/Kconfig30
-rw-r--r--drivers/gpu/drm/amd/amdkfd/Makefile21
-rw-r--r--drivers/gpu/drm/amd/amdkfd/cik_event_interrupt.c36
-rw-r--r--drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler.h4461
-rw-r--r--drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx10.asm845
-rw-r--r--drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx12.asm1136
-rw-r--r--drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx8.asm1
-rw-r--r--drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx9.asm527
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_chardev.c2628
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_crat.c1690
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_crat.h55
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_dbgdev.c845
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_dbgdev.h230
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_dbgmgr.c158
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_dbgmgr.h293
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_debug.c1152
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_debug.h142
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_debugfs.c90
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_device.c1950
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c2592
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h167
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_cik.c153
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_v10.c57
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_v11.c90
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_v12.c90
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_v9.c72
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_vi.c207
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_doorbell.c242
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_events.c778
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_events.h7
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_flat_memory.c94
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_int_process_v10.c386
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_int_process_v11.c418
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_int_process_v9.c502
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_interrupt.c106
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_iommu.c357
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_iommu.h78
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c78
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.h7
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_migrate.c1079
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_migrate.h55
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_module.c6
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.c245
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.h95
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_cik.c187
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v10.c229
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v11.c569
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v12.c459
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c761
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_vi.c218
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c256
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_packet_manager_v9.c279
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_packet_manager_vi.c42
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_pasid.c69
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_pm4_headers.h3
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_pm4_headers_ai.h82
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_pm4_headers_aldebaran.h94
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_pm4_headers_diq.h290
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_pm4_headers_vi.h7
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_pm4_opcodes.h3
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_priv.h829
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_process.c1536
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c783
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_queue.c388
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_smi_events.c265
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_smi_events.h37
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_svm.c4279
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_svm.h275
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_topology.c1461
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_topology.h99
-rw-r--r--drivers/gpu/drm/amd/amdkfd/soc15_int.h6
-rw-r--r--drivers/gpu/drm/amd/amdxcp/Makefile25
-rw-r--r--drivers/gpu/drm/amd/amdxcp/amdgpu_xcp_drv.c152
-rw-r--r--drivers/gpu/drm/amd/amdxcp/amdgpu_xcp_drv.h30
-rw-r--r--drivers/gpu/drm/amd/display/Kconfig38
-rw-r--r--drivers/gpu/drm/amd/display/Makefile20
-rw-r--r--drivers/gpu/drm/amd/display/TODO110
-rw-r--r--drivers/gpu/drm/amd/display/amdgpu_dm/Makefile24
-rw-r--r--drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c10582
-rw-r--r--drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h669
-rw-r--r--drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c1813
-rw-r--r--drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_colorop.c209
-rw-r--r--drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_colorop.h36
-rw-r--r--drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crc.c659
-rw-r--r--drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crc.h85
-rw-r--r--drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crtc.c804
-rw-r--r--drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crtc.h51
-rw-r--r--drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_debugfs.c2405
-rw-r--r--drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_debugfs.h4
-rw-r--r--drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_hdcp.c542
-rw-r--r--drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_hdcp.h18
-rw-r--r--drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_helpers.c1091
-rw-r--r--drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_irq.c359
-rw-r--r--drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_irq.h8
-rw-r--r--drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_irq_params.h13
-rw-r--r--drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c1576
-rw-r--r--drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.h61
-rw-r--r--drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_plane.c1950
-rw-r--r--drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_plane.h68
-rw-r--r--drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_pp_smu.c291
-rw-r--r--drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_psr.c277
-rw-r--r--drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_psr.h43
-rw-r--r--drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_quirks.c178
-rw-r--r--drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_replay.c209
-rw-r--r--drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_replay.h49
-rw-r--r--drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_services.c9
-rw-r--r--drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_trace.h158
-rw-r--r--drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_wb.c215
-rw-r--r--drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_wb.h36
-rw-r--r--drivers/gpu/drm/amd/display/amdgpu_dm/dc_fpu.c112
-rw-r--r--drivers/gpu/drm/amd/display/amdgpu_dm/dc_fpu.h34
-rw-r--r--drivers/gpu/drm/amd/display/dc/Makefile58
-rw-r--r--drivers/gpu/drm/amd/display/dc/basics/Makefile9
-rw-r--r--drivers/gpu/drm/amd/display/dc/basics/bw_fixed.c (renamed from drivers/gpu/drm/amd/display/dc/calcs/bw_fixed.c)27
-rw-r--r--drivers/gpu/drm/amd/display/dc/basics/calcs_logger.h (renamed from drivers/gpu/drm/amd/display/dc/calcs/calcs_logger.h)0
-rw-r--r--drivers/gpu/drm/amd/display/dc/basics/conversion.c58
-rw-r--r--drivers/gpu/drm/amd/display/dc/basics/conversion.h7
-rw-r--r--drivers/gpu/drm/amd/display/dc/basics/custom_float.c173
-rw-r--r--drivers/gpu/drm/amd/display/dc/basics/dc_common.c23
-rw-r--r--drivers/gpu/drm/amd/display/dc/basics/dc_common.h4
-rw-r--r--drivers/gpu/drm/amd/display/dc/basics/dce_calcs.c (renamed from drivers/gpu/drm/amd/display/dc/calcs/dce_calcs.c)1220
-rw-r--r--drivers/gpu/drm/amd/display/dc/basics/fixpt31_32.c38
-rw-r--r--drivers/gpu/drm/amd/display/dc/basics/vector.c19
-rw-r--r--drivers/gpu/drm/amd/display/dc/bios/bios_parser.c205
-rw-r--r--drivers/gpu/drm/amd/display/dc/bios/bios_parser2.c1689
-rw-r--r--drivers/gpu/drm/amd/display/dc/bios/bios_parser_common.c3
-rw-r--r--drivers/gpu/drm/amd/display/dc/bios/bios_parser_helper.c15
-rw-r--r--drivers/gpu/drm/amd/display/dc/bios/bios_parser_helper.h3
-rw-r--r--drivers/gpu/drm/amd/display/dc/bios/bios_parser_types_internal2.h1
-rw-r--r--drivers/gpu/drm/amd/display/dc/bios/command_table.c322
-rw-r--r--drivers/gpu/drm/amd/display/dc/bios/command_table.h6
-rw-r--r--drivers/gpu/drm/amd/display/dc/bios/command_table2.c192
-rw-r--r--drivers/gpu/drm/amd/display/dc/bios/command_table2.h4
-rw-r--r--drivers/gpu/drm/amd/display/dc/bios/command_table_helper.c106
-rw-r--r--drivers/gpu/drm/amd/display/dc/bios/command_table_helper.h8
-rw-r--r--drivers/gpu/drm/amd/display/dc/bios/command_table_helper2.c20
-rw-r--r--drivers/gpu/drm/amd/display/dc/bios/dce110/command_table_helper_dce110.c104
-rw-r--r--drivers/gpu/drm/amd/display/dc/bios/dce112/command_table_helper2_dce112.c106
-rw-r--r--drivers/gpu/drm/amd/display/dc/bios/dce112/command_table_helper_dce112.c104
-rw-r--r--drivers/gpu/drm/amd/display/dc/bios/dce60/command_table_helper_dce60.c104
-rw-r--r--drivers/gpu/drm/amd/display/dc/bios/dce80/command_table_helper_dce80.c104
-rw-r--r--drivers/gpu/drm/amd/display/dc/calcs/Makefile68
-rw-r--r--drivers/gpu/drm/amd/display/dc/calcs/custom_float.c197
-rw-r--r--drivers/gpu/drm/amd/display/dc/clk_mgr/Makefile95
-rw-r--r--drivers/gpu/drm/amd/display/dc/clk_mgr/clk_mgr.c318
-rw-r--r--drivers/gpu/drm/amd/display/dc/clk_mgr/dce100/dce_clk_mgr.c24
-rw-r--r--drivers/gpu/drm/amd/display/dc/clk_mgr/dce110/dce110_clk_mgr.c44
-rw-r--r--drivers/gpu/drm/amd/display/dc/clk_mgr/dce112/dce112_clk_mgr.c20
-rw-r--r--drivers/gpu/drm/amd/display/dc/clk_mgr/dce120/dce120_clk_mgr.c2
-rw-r--r--drivers/gpu/drm/amd/display/dc/clk_mgr/dce60/dce60_clk_mgr.c36
-rw-r--r--drivers/gpu/drm/amd/display/dc/clk_mgr/dcn10/rv1_clk_mgr.c17
-rw-r--r--drivers/gpu/drm/amd/display/dc/clk_mgr/dcn10/rv1_clk_mgr_clk.c79
-rw-r--r--drivers/gpu/drm/amd/display/dc/clk_mgr/dcn10/rv1_clk_mgr_vbios_smu.c34
-rw-r--r--drivers/gpu/drm/amd/display/dc/clk_mgr/dcn10/rv1_clk_mgr_vbios_smu.h1
-rw-r--r--drivers/gpu/drm/amd/display/dc/clk_mgr/dcn20/dcn20_clk_mgr.c170
-rw-r--r--drivers/gpu/drm/amd/display/dc/clk_mgr/dcn20/dcn20_clk_mgr.h3
-rw-r--r--drivers/gpu/drm/amd/display/dc/clk_mgr/dcn201/dcn201_clk_mgr.c217
-rw-r--r--drivers/gpu/drm/amd/display/dc/clk_mgr/dcn201/dcn201_clk_mgr.h34
-rw-r--r--drivers/gpu/drm/amd/display/dc/clk_mgr/dcn21/rn_clk_mgr.c347
-rw-r--r--drivers/gpu/drm/amd/display/dc/clk_mgr/dcn21/rn_clk_mgr.h8
-rw-r--r--drivers/gpu/drm/amd/display/dc/clk_mgr/dcn21/rn_clk_mgr_vbios_smu.c71
-rw-r--r--drivers/gpu/drm/amd/display/dc/clk_mgr/dcn21/rn_clk_mgr_vbios_smu.h5
-rw-r--r--drivers/gpu/drm/amd/display/dc/clk_mgr/dcn30/dalsmc.h3
-rw-r--r--drivers/gpu/drm/amd/display/dc/clk_mgr/dcn30/dcn30_clk_mgr.c207
-rw-r--r--drivers/gpu/drm/amd/display/dc/clk_mgr/dcn30/dcn30_clk_mgr.h60
-rw-r--r--drivers/gpu/drm/amd/display/dc/clk_mgr/dcn30/dcn30_clk_mgr_smu_msg.c29
-rw-r--r--drivers/gpu/drm/amd/display/dc/clk_mgr/dcn30/dcn30_clk_mgr_smu_msg.h67
-rw-r--r--drivers/gpu/drm/amd/display/dc/clk_mgr/dcn30/dcn30_smu11_driver_if.h75
-rw-r--r--drivers/gpu/drm/amd/display/dc/clk_mgr/dcn30/dcn30m_clk_mgr.c36
-rw-r--r--drivers/gpu/drm/amd/display/dc/clk_mgr/dcn30/dcn30m_clk_mgr.h31
-rw-r--r--drivers/gpu/drm/amd/display/dc/clk_mgr/dcn30/dcn30m_clk_mgr_smu_msg.c118
-rw-r--r--drivers/gpu/drm/amd/display/dc/clk_mgr/dcn30/dcn30m_clk_mgr_smu_msg.h34
-rw-r--r--drivers/gpu/drm/amd/display/dc/clk_mgr/dcn301/dcn301_smu.c66
-rw-r--r--drivers/gpu/drm/amd/display/dc/clk_mgr/dcn301/vg_clk_mgr.c283
-rw-r--r--drivers/gpu/drm/amd/display/dc/clk_mgr/dcn301/vg_clk_mgr.h31
-rw-r--r--drivers/gpu/drm/amd/display/dc/clk_mgr/dcn31/dcn31_clk_mgr.c809
-rw-r--r--drivers/gpu/drm/amd/display/dc/clk_mgr/dcn31/dcn31_clk_mgr.h58
-rw-r--r--drivers/gpu/drm/amd/display/dc/clk_mgr/dcn31/dcn31_smu.c359
-rw-r--r--drivers/gpu/drm/amd/display/dc/clk_mgr/dcn31/dcn31_smu.h271
-rw-r--r--drivers/gpu/drm/amd/display/dc/clk_mgr/dcn314/dcn314_clk_mgr.c1051
-rw-r--r--drivers/gpu/drm/amd/display/dc/clk_mgr/dcn314/dcn314_clk_mgr.h73
-rw-r--r--drivers/gpu/drm/amd/display/dc/clk_mgr/dcn314/dcn314_smu.c398
-rw-r--r--drivers/gpu/drm/amd/display/dc/clk_mgr/dcn314/dcn314_smu.h110
-rw-r--r--drivers/gpu/drm/amd/display/dc/clk_mgr/dcn315/dcn315_clk_mgr.c820
-rw-r--r--drivers/gpu/drm/amd/display/dc/clk_mgr/dcn315/dcn315_clk_mgr.h50
-rw-r--r--drivers/gpu/drm/amd/display/dc/clk_mgr/dcn315/dcn315_smu.c366
-rw-r--r--drivers/gpu/drm/amd/display/dc/clk_mgr/dcn315/dcn315_smu.h130
-rw-r--r--drivers/gpu/drm/amd/display/dc/clk_mgr/dcn316/dcn316_clk_mgr.c680
-rw-r--r--drivers/gpu/drm/amd/display/dc/clk_mgr/dcn316/dcn316_clk_mgr.h49
-rw-r--r--drivers/gpu/drm/amd/display/dc/clk_mgr/dcn316/dcn316_smu.c344
-rw-r--r--drivers/gpu/drm/amd/display/dc/clk_mgr/dcn316/dcn316_smu.h139
-rw-r--r--drivers/gpu/drm/amd/display/dc/clk_mgr/dcn32/dalsmc.h74
-rw-r--r--drivers/gpu/drm/amd/display/dc/clk_mgr/dcn32/dcn32_clk_mgr.c1233
-rw-r--r--drivers/gpu/drm/amd/display/dc/clk_mgr/dcn32/dcn32_clk_mgr.h42
-rw-r--r--drivers/gpu/drm/amd/display/dc/clk_mgr/dcn32/dcn32_clk_mgr_smu_msg.c308
-rw-r--r--drivers/gpu/drm/amd/display/dc/clk_mgr/dcn32/dcn32_clk_mgr_smu_msg.h46
-rw-r--r--drivers/gpu/drm/amd/display/dc/clk_mgr/dcn32/dcn32_smu13_driver_if.h64
-rw-r--r--drivers/gpu/drm/amd/display/dc/clk_mgr/dcn35/dcn351_clk_mgr.c141
-rw-r--r--drivers/gpu/drm/amd/display/dc/clk_mgr/dcn35/dcn35_clk_mgr.c1588
-rw-r--r--drivers/gpu/drm/amd/display/dc/clk_mgr/dcn35/dcn35_clk_mgr.h67
-rw-r--r--drivers/gpu/drm/amd/display/dc/clk_mgr/dcn35/dcn35_smu.c508
-rw-r--r--drivers/gpu/drm/amd/display/dc/clk_mgr/dcn35/dcn35_smu.h220
-rw-r--r--drivers/gpu/drm/amd/display/dc/clk_mgr/dcn401/dalsmc.h55
-rw-r--r--drivers/gpu/drm/amd/display/dc/clk_mgr/dcn401/dcn401_clk_mgr.c1631
-rw-r--r--drivers/gpu/drm/amd/display/dc/clk_mgr/dcn401/dcn401_clk_mgr.h117
-rw-r--r--drivers/gpu/drm/amd/display/dc/clk_mgr/dcn401/dcn401_clk_mgr_smu_msg.c472
-rw-r--r--drivers/gpu/drm/amd/display/dc/clk_mgr/dcn401/dcn401_clk_mgr_smu_msg.h41
-rw-r--r--drivers/gpu/drm/amd/display/dc/clk_mgr/dcn401/dcn401_smu14_driver_if.h66
-rw-r--r--drivers/gpu/drm/amd/display/dc/core/dc.c5699
-rw-r--r--drivers/gpu/drm/amd/display/dc/core/dc_debug.c216
-rw-r--r--drivers/gpu/drm/amd/display/dc/core/dc_hw_sequencer.c3816
-rw-r--r--drivers/gpu/drm/amd/display/dc/core/dc_link.c3737
-rw-r--r--drivers/gpu/drm/amd/display/dc/core/dc_link_ddc.c768
-rw-r--r--drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c4585
-rw-r--r--drivers/gpu/drm/amd/display/dc/core/dc_link_enc_cfg.c745
-rw-r--r--drivers/gpu/drm/amd/display/dc/core/dc_link_exports.c530
-rw-r--r--drivers/gpu/drm/amd/display/dc/core/dc_link_hwss.c598
-rw-r--r--drivers/gpu/drm/amd/display/dc/core/dc_resource.c4576
-rw-r--r--drivers/gpu/drm/amd/display/dc/core/dc_sink.c12
-rw-r--r--drivers/gpu/drm/amd/display/dc/core/dc_stat.c90
-rw-r--r--drivers/gpu/drm/amd/display/dc/core/dc_state.c1077
-rw-r--r--drivers/gpu/drm/amd/display/dc/core/dc_stream.c929
-rw-r--r--drivers/gpu/drm/amd/display/dc/core/dc_surface.c130
-rw-r--r--drivers/gpu/drm/amd/display/dc/core/dc_vm_helper.c2
-rw-r--r--drivers/gpu/drm/amd/display/dc/dc.h2681
-rw-r--r--drivers/gpu/drm/amd/display/dc/dc_bios_types.h25
-rw-r--r--drivers/gpu/drm/amd/display/dc/dc_ddc_types.h41
-rw-r--r--drivers/gpu/drm/amd/display/dc/dc_dmub_srv.c2314
-rw-r--r--drivers/gpu/drm/amd/display/dc/dc_dmub_srv.h330
-rw-r--r--drivers/gpu/drm/amd/display/dc/dc_dp_types.h869
-rw-r--r--drivers/gpu/drm/amd/display/dc/dc_dsc.h44
-rw-r--r--drivers/gpu/drm/amd/display/dc/dc_fused_io.c148
-rw-r--r--drivers/gpu/drm/amd/display/dc/dc_fused_io.h31
-rw-r--r--drivers/gpu/drm/amd/display/dc/dc_hdmi_types.h133
-rw-r--r--drivers/gpu/drm/amd/display/dc/dc_helper.c182
-rw-r--r--drivers/gpu/drm/amd/display/dc/dc_hw_types.h448
-rw-r--r--drivers/gpu/drm/amd/display/dc/dc_link.h383
-rw-r--r--drivers/gpu/drm/amd/display/dc/dc_plane.h51
-rw-r--r--drivers/gpu/drm/amd/display/dc/dc_plane_priv.h35
-rw-r--r--drivers/gpu/drm/amd/display/dc/dc_spl_translate.c231
-rw-r--r--drivers/gpu/drm/amd/display/dc/dc_spl_translate.h23
-rw-r--r--drivers/gpu/drm/amd/display/dc/dc_stat.h43
-rw-r--r--drivers/gpu/drm/amd/display/dc/dc_state.h77
-rw-r--r--drivers/gpu/drm/amd/display/dc/dc_state_priv.h128
-rw-r--r--drivers/gpu/drm/amd/display/dc/dc_stream.h289
-rw-r--r--drivers/gpu/drm/amd/display/dc/dc_stream_priv.h75
-rw-r--r--drivers/gpu/drm/amd/display/dc/dc_trace.h5
-rw-r--r--drivers/gpu/drm/amd/display/dc/dc_types.h743
-rw-r--r--drivers/gpu/drm/amd/display/dc/dccg/Makefile103
-rw-r--r--drivers/gpu/drm/amd/display/dc/dccg/dcn20/dcn20_dccg.c (renamed from drivers/gpu/drm/amd/display/dc/dcn20/dcn20_dccg.c)36
-rw-r--r--drivers/gpu/drm/amd/display/dc/dccg/dcn20/dcn20_dccg.h526
-rw-r--r--drivers/gpu/drm/amd/display/dc/dccg/dcn201/dcn201_dccg.c85
-rw-r--r--drivers/gpu/drm/amd/display/dc/dccg/dcn201/dcn201_dccg.h37
-rw-r--r--drivers/gpu/drm/amd/display/dc/dccg/dcn21/dcn21_dccg.c (renamed from drivers/gpu/drm/amd/display/dc/dcn21/dcn21_dccg.c)5
-rw-r--r--drivers/gpu/drm/amd/display/dc/dccg/dcn21/dcn21_dccg.h (renamed from drivers/gpu/drm/amd/display/dc/dcn21/dcn21_dccg.h)1
-rw-r--r--drivers/gpu/drm/amd/display/dc/dccg/dcn30/dcn30_dccg.c (renamed from drivers/gpu/drm/amd/display/dc/dcn30/dcn30_dccg.c)3
-rw-r--r--drivers/gpu/drm/amd/display/dc/dccg/dcn30/dcn30_dccg.h (renamed from drivers/gpu/drm/amd/display/dc/dcn30/dcn30_dccg.h)16
-rw-r--r--drivers/gpu/drm/amd/display/dc/dccg/dcn301/dcn301_dccg.c (renamed from drivers/gpu/drm/amd/display/dc/dcn301/dcn301_dccg.c)3
-rw-r--r--drivers/gpu/drm/amd/display/dc/dccg/dcn301/dcn301_dccg.h (renamed from drivers/gpu/drm/amd/display/dc/dcn301/dcn301_dccg.h)6
-rw-r--r--drivers/gpu/drm/amd/display/dc/dccg/dcn302/dcn302_dccg.h (renamed from drivers/gpu/drm/amd/display/dc/dcn302/dcn302_dccg.h)0
-rw-r--r--drivers/gpu/drm/amd/display/dc/dccg/dcn303/dcn303_dccg.h64
-rw-r--r--drivers/gpu/drm/amd/display/dc/dccg/dcn31/dcn31_dccg.c878
-rw-r--r--drivers/gpu/drm/amd/display/dc/dccg/dcn31/dcn31_dccg.h241
-rw-r--r--drivers/gpu/drm/amd/display/dc/dccg/dcn314/dcn314_dccg.c407
-rw-r--r--drivers/gpu/drm/amd/display/dc/dccg/dcn314/dcn314_dccg.h211
-rw-r--r--drivers/gpu/drm/amd/display/dc/dccg/dcn32/dcn32_dccg.c375
-rw-r--r--drivers/gpu/drm/amd/display/dc/dccg/dcn32/dcn32_dccg.h125
-rw-r--r--drivers/gpu/drm/amd/display/dc/dccg/dcn35/dcn35_dccg.c2486
-rw-r--r--drivers/gpu/drm/amd/display/dc/dccg/dcn35/dcn35_dccg.h256
-rw-r--r--drivers/gpu/drm/amd/display/dc/dccg/dcn401/dcn401_dccg.c915
-rw-r--r--drivers/gpu/drm/amd/display/dc/dccg/dcn401/dcn401_dccg.h249
-rw-r--r--drivers/gpu/drm/amd/display/dc/dce/Makefile6
-rw-r--r--drivers/gpu/drm/amd/display/dc/dce/dce_abm.c4
-rw-r--r--drivers/gpu/drm/amd/display/dc/dce/dce_abm.h148
-rw-r--r--drivers/gpu/drm/amd/display/dc/dce/dce_audio.c338
-rw-r--r--drivers/gpu/drm/amd/display/dc/dce/dce_audio.h6
-rw-r--r--drivers/gpu/drm/amd/display/dc/dce/dce_aux.c351
-rw-r--r--drivers/gpu/drm/amd/display/dc/dce/dce_aux.h9
-rw-r--r--drivers/gpu/drm/amd/display/dc/dce/dce_clk_mgr.c4
-rw-r--r--drivers/gpu/drm/amd/display/dc/dce/dce_clock_source.c369
-rw-r--r--drivers/gpu/drm/amd/display/dc/dce/dce_clock_source.h59
-rw-r--r--drivers/gpu/drm/amd/display/dc/dce/dce_dmcu.c118
-rw-r--r--drivers/gpu/drm/amd/display/dc/dce/dce_i2c.c26
-rw-r--r--drivers/gpu/drm/amd/display/dc/dce/dce_i2c.h6
-rw-r--r--drivers/gpu/drm/amd/display/dc/dce/dce_i2c_hw.c116
-rw-r--r--drivers/gpu/drm/amd/display/dc/dce/dce_i2c_hw.h13
-rw-r--r--drivers/gpu/drm/amd/display/dc/dce/dce_i2c_sw.c20
-rw-r--r--drivers/gpu/drm/amd/display/dc/dce/dce_ipp.c2
-rw-r--r--drivers/gpu/drm/amd/display/dc/dce/dce_link_encoder.c130
-rw-r--r--drivers/gpu/drm/amd/display/dc/dce/dce_link_encoder.h19
-rw-r--r--drivers/gpu/drm/amd/display/dc/dce/dce_mem_input.c52
-rw-r--r--drivers/gpu/drm/amd/display/dc/dce/dce_mem_input.h2
-rw-r--r--drivers/gpu/drm/amd/display/dc/dce/dce_opp.c7
-rw-r--r--drivers/gpu/drm/amd/display/dc/dce/dce_opp.h1
-rw-r--r--drivers/gpu/drm/amd/display/dc/dce/dce_panel_cntl.c13
-rw-r--r--drivers/gpu/drm/amd/display/dc/dce/dce_stream_encoder.c48
-rw-r--r--drivers/gpu/drm/amd/display/dc/dce/dce_stream_encoder.h19
-rw-r--r--drivers/gpu/drm/amd/display/dc/dce/dce_transform.c38
-rw-r--r--drivers/gpu/drm/amd/display/dc/dce/dce_transform.h4
-rw-r--r--drivers/gpu/drm/amd/display/dc/dce/dmub_abm.c245
-rw-r--r--drivers/gpu/drm/amd/display/dc/dce/dmub_abm_lcd.c318
-rw-r--r--drivers/gpu/drm/amd/display/dc/dce/dmub_abm_lcd.h54
-rw-r--r--drivers/gpu/drm/amd/display/dc/dce/dmub_hw_lock_mgr.c59
-rw-r--r--drivers/gpu/drm/amd/display/dc/dce/dmub_hw_lock_mgr.h15
-rw-r--r--drivers/gpu/drm/amd/display/dc/dce/dmub_outbox.c52
-rw-r--r--drivers/gpu/drm/amd/display/dc/dce/dmub_outbox.h33
-rw-r--r--drivers/gpu/drm/amd/display/dc/dce/dmub_psr.c236
-rw-r--r--drivers/gpu/drm/amd/display/dc/dce/dmub_psr.h26
-rw-r--r--drivers/gpu/drm/amd/display/dc/dce/dmub_replay.c473
-rw-r--r--drivers/gpu/drm/amd/display/dc/dce/dmub_replay.h42
-rw-r--r--drivers/gpu/drm/amd/display/dc/dce100/Makefile46
-rw-r--r--drivers/gpu/drm/amd/display/dc/dce100/dce100_hw_sequencer.c142
-rw-r--r--drivers/gpu/drm/amd/display/dc/dce100/dce100_hw_sequencer.h50
-rw-r--r--drivers/gpu/drm/amd/display/dc/dce110/Makefile6
-rw-r--r--drivers/gpu/drm/amd/display/dc/dce110/dce110_compressor.c16
-rw-r--r--drivers/gpu/drm/amd/display/dc/dce110/dce110_compressor.h2
-rw-r--r--drivers/gpu/drm/amd/display/dc/dce110/dce110_hw_sequencer.c2959
-rw-r--r--drivers/gpu/drm/amd/display/dc/dce110/dce110_hw_sequencer.h95
-rw-r--r--drivers/gpu/drm/amd/display/dc/dce110/dce110_mem_input_v.c11
-rw-r--r--drivers/gpu/drm/amd/display/dc/dce110/dce110_opp_regamma_v.c2
-rw-r--r--drivers/gpu/drm/amd/display/dc/dce110/dce110_timing_generator.c225
-rw-r--r--drivers/gpu/drm/amd/display/dc/dce110/dce110_timing_generator.h5
-rw-r--r--drivers/gpu/drm/amd/display/dc/dce110/dce110_timing_generator_v.c4
-rw-r--r--drivers/gpu/drm/amd/display/dc/dce110/dce110_transform_v.c5
-rw-r--r--drivers/gpu/drm/amd/display/dc/dce112/Makefile5
-rw-r--r--drivers/gpu/drm/amd/display/dc/dce112/dce112_compressor.c3
-rw-r--r--drivers/gpu/drm/amd/display/dc/dce112/dce112_hw_sequencer.c163
-rw-r--r--drivers/gpu/drm/amd/display/dc/dce120/Makefile5
-rw-r--r--drivers/gpu/drm/amd/display/dc/dce120/dce120_hw_sequencer.c271
-rw-r--r--drivers/gpu/drm/amd/display/dc/dce120/dce120_timing_generator.c127
-rw-r--r--drivers/gpu/drm/amd/display/dc/dce60/Makefile5
-rw-r--r--drivers/gpu/drm/amd/display/dc/dce60/dce60_hw_sequencer.c432
-rw-r--r--drivers/gpu/drm/amd/display/dc/dce60/dce60_timing_generator.c4
-rw-r--r--drivers/gpu/drm/amd/display/dc/dce80/Makefile5
-rw-r--r--drivers/gpu/drm/amd/display/dc/dce80/dce80_hw_sequencer.c54
-rw-r--r--drivers/gpu/drm/amd/display/dc/dce80/dce80_timing_generator.c9
-rw-r--r--drivers/gpu/drm/amd/display/dc/dcn10/Makefile7
-rw-r--r--drivers/gpu/drm/amd/display/dc/dcn10/dcn10_cm_common.c73
-rw-r--r--drivers/gpu/drm/amd/display/dc/dcn10/dcn10_cm_common.h5
-rw-r--r--drivers/gpu/drm/amd/display/dc/dcn10/dcn10_dwb.c5
-rw-r--r--drivers/gpu/drm/amd/display/dc/dcn10/dcn10_dwb.h4
-rw-r--r--drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hubbub.h350
-rw-r--r--drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer.c3756
-rw-r--r--drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer.h211
-rw-r--r--drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer_debug.c32
-rw-r--r--drivers/gpu/drm/amd/display/dc/dcn10/dcn10_ipp.c2
-rw-r--r--drivers/gpu/drm/amd/display/dc/dcn10/dcn10_ipp.h33
-rw-r--r--drivers/gpu/drm/amd/display/dc/dcn10/dcn10_optc.h706
-rw-r--r--drivers/gpu/drm/amd/display/dc/dcn20/Makefile35
-rw-r--r--drivers/gpu/drm/amd/display/dc/dcn20/dcn20_dccg.h130
-rw-r--r--drivers/gpu/drm/amd/display/dc/dcn20/dcn20_dwb.c17
-rw-r--r--drivers/gpu/drm/amd/display/dc/dcn20/dcn20_dwb.h363
-rw-r--r--drivers/gpu/drm/amd/display/dc/dcn20/dcn20_dwb_scl.c7
-rw-r--r--drivers/gpu/drm/amd/display/dc/dcn20/dcn20_hubbub.h127
-rw-r--r--drivers/gpu/drm/amd/display/dc/dcn20/dcn20_hwseq.c2592
-rw-r--r--drivers/gpu/drm/amd/display/dc/dcn20/dcn20_optc.c486
-rw-r--r--drivers/gpu/drm/amd/display/dc/dcn20/dcn20_resource.c4048
-rw-r--r--drivers/gpu/drm/amd/display/dc/dcn20/dcn20_vmid.c3
-rw-r--r--drivers/gpu/drm/amd/display/dc/dcn20/dcn20_vmid.h6
-rw-r--r--drivers/gpu/drm/amd/display/dc/dcn201/Makefile9
-rw-r--r--drivers/gpu/drm/amd/display/dc/dcn201/dcn201_link_encoder.c208
-rw-r--r--drivers/gpu/drm/amd/display/dc/dcn201/dcn201_link_encoder.h71
-rw-r--r--drivers/gpu/drm/amd/display/dc/dcn201/dcn201_mpc.c125
-rw-r--r--drivers/gpu/drm/amd/display/dc/dcn201/dcn201_mpc.h86
-rw-r--r--drivers/gpu/drm/amd/display/dc/dcn201/dcn201_opp.c74
-rw-r--r--drivers/gpu/drm/amd/display/dc/dcn201/dcn201_opp.h73
-rw-r--r--drivers/gpu/drm/amd/display/dc/dcn21/Makefile31
-rw-r--r--drivers/gpu/drm/amd/display/dc/dcn21/dcn21_hwseq.c244
-rw-r--r--drivers/gpu/drm/amd/display/dc/dcn21/dcn21_link_encoder.c10
-rw-r--r--drivers/gpu/drm/amd/display/dc/dcn21/dcn21_resource.c2281
-rw-r--r--drivers/gpu/drm/amd/display/dc/dcn30/Makefile56
-rw-r--r--drivers/gpu/drm/amd/display/dc/dcn30/dcn30_afmt.c26
-rw-r--r--drivers/gpu/drm/amd/display/dc/dcn30/dcn30_afmt.h24
-rw-r--r--drivers/gpu/drm/amd/display/dc/dcn30/dcn30_cm_common.c280
-rw-r--r--drivers/gpu/drm/amd/display/dc/dcn30/dcn30_hubp.c534
-rw-r--r--drivers/gpu/drm/amd/display/dc/dcn30/dcn30_hwseq.c1019
-rw-r--r--drivers/gpu/drm/amd/display/dc/dcn30/dcn30_mmhubbub.c4
-rw-r--r--drivers/gpu/drm/amd/display/dc/dcn30/dcn30_mmhubbub.h15
-rw-r--r--drivers/gpu/drm/amd/display/dc/dcn30/dcn30_mpc.h674
-rw-r--r--drivers/gpu/drm/amd/display/dc/dcn30/dcn30_optc.c357
-rw-r--r--drivers/gpu/drm/amd/display/dc/dcn30/dcn30_resource.c2833
-rw-r--r--drivers/gpu/drm/amd/display/dc/dcn30/dcn30_vpg.c200
-rw-r--r--drivers/gpu/drm/amd/display/dc/dcn30/dcn30_vpg.h22
-rw-r--r--drivers/gpu/drm/amd/display/dc/dcn301/Makefile40
-rw-r--r--drivers/gpu/drm/amd/display/dc/dcn301/dcn301_panel_cntl.c11
-rw-r--r--drivers/gpu/drm/amd/display/dc/dcn302/Makefile43
-rw-r--r--drivers/gpu/drm/amd/display/dc/dcn302/dcn302_resource.c1741
-rw-r--r--drivers/gpu/drm/amd/display/dc/dcn31/Makefile13
-rw-r--r--drivers/gpu/drm/amd/display/dc/dcn31/dcn31_afmt.c92
-rw-r--r--drivers/gpu/drm/amd/display/dc/dcn31/dcn31_afmt.h126
-rw-r--r--drivers/gpu/drm/amd/display/dc/dcn31/dcn31_apg.c119
-rw-r--r--drivers/gpu/drm/amd/display/dc/dcn31/dcn31_apg.h111
-rw-r--r--drivers/gpu/drm/amd/display/dc/dcn31/dcn31_panel_cntl.c200
-rw-r--r--drivers/gpu/drm/amd/display/dc/dcn31/dcn31_panel_cntl.h43
-rw-r--r--drivers/gpu/drm/amd/display/dc/dcn31/dcn31_vpg.c92
-rw-r--r--drivers/gpu/drm/amd/display/dc/dcn31/dcn31_vpg.h163
-rw-r--r--drivers/gpu/drm/amd/display/dc/dio/Makefile117
-rw-r--r--drivers/gpu/drm/amd/display/dc/dio/dcn10/dcn10_link_encoder.c (renamed from drivers/gpu/drm/amd/display/dc/dcn10/dcn10_link_encoder.c)52
-rw-r--r--drivers/gpu/drm/amd/display/dc/dio/dcn10/dcn10_link_encoder.h (renamed from drivers/gpu/drm/amd/display/dc/dcn10/dcn10_link_encoder.h)50
-rw-r--r--drivers/gpu/drm/amd/display/dc/dio/dcn10/dcn10_stream_encoder.c (renamed from drivers/gpu/drm/amd/display/dc/dcn10/dcn10_stream_encoder.c)79
-rw-r--r--drivers/gpu/drm/amd/display/dc/dio/dcn10/dcn10_stream_encoder.h (renamed from drivers/gpu/drm/amd/display/dc/dcn10/dcn10_stream_encoder.h)83
-rw-r--r--drivers/gpu/drm/amd/display/dc/dio/dcn20/dcn20_link_encoder.c (renamed from drivers/gpu/drm/amd/display/dc/dcn20/dcn20_link_encoder.c)4
-rw-r--r--drivers/gpu/drm/amd/display/dc/dio/dcn20/dcn20_link_encoder.h (renamed from drivers/gpu/drm/amd/display/dc/dcn20/dcn20_link_encoder.h)6
-rw-r--r--drivers/gpu/drm/amd/display/dc/dio/dcn20/dcn20_stream_encoder.c (renamed from drivers/gpu/drm/amd/display/dc/dcn20/dcn20_stream_encoder.c)50
-rw-r--r--drivers/gpu/drm/amd/display/dc/dio/dcn20/dcn20_stream_encoder.h (renamed from drivers/gpu/drm/amd/display/dc/dcn20/dcn20_stream_encoder.h)4
-rw-r--r--drivers/gpu/drm/amd/display/dc/dio/dcn30/dcn30_dio_link_encoder.c (renamed from drivers/gpu/drm/amd/display/dc/dcn30/dcn30_dio_link_encoder.c)7
-rw-r--r--drivers/gpu/drm/amd/display/dc/dio/dcn30/dcn30_dio_link_encoder.h (renamed from drivers/gpu/drm/amd/display/dc/dcn30/dcn30_dio_link_encoder.h)3
-rw-r--r--drivers/gpu/drm/amd/display/dc/dio/dcn30/dcn30_dio_stream_encoder.c (renamed from drivers/gpu/drm/amd/display/dc/dcn30/dcn30_dio_stream_encoder.c)122
-rw-r--r--drivers/gpu/drm/amd/display/dc/dio/dcn30/dcn30_dio_stream_encoder.h (renamed from drivers/gpu/drm/amd/display/dc/dcn30/dcn30_dio_stream_encoder.h)69
-rw-r--r--drivers/gpu/drm/amd/display/dc/dio/dcn301/dcn301_dio_link_encoder.c (renamed from drivers/gpu/drm/amd/display/dc/dcn301/dcn301_dio_link_encoder.c)1
-rw-r--r--drivers/gpu/drm/amd/display/dc/dio/dcn301/dcn301_dio_link_encoder.h (renamed from drivers/gpu/drm/amd/display/dc/dcn301/dcn301_dio_link_encoder.h)0
-rw-r--r--drivers/gpu/drm/amd/display/dc/dio/dcn31/dcn31_dio_link_encoder.c682
-rw-r--r--drivers/gpu/drm/amd/display/dc/dio/dcn31/dcn31_dio_link_encoder.h290
-rw-r--r--drivers/gpu/drm/amd/display/dc/dio/dcn314/dcn314_dio_stream_encoder.c506
-rw-r--r--drivers/gpu/drm/amd/display/dc/dio/dcn314/dcn314_dio_stream_encoder.h355
-rw-r--r--drivers/gpu/drm/amd/display/dc/dio/dcn32/dcn32_dio_link_encoder.c328
-rw-r--r--drivers/gpu/drm/amd/display/dc/dio/dcn32/dcn32_dio_link_encoder.h53
-rw-r--r--drivers/gpu/drm/amd/display/dc/dio/dcn32/dcn32_dio_stream_encoder.c493
-rw-r--r--drivers/gpu/drm/amd/display/dc/dio/dcn32/dcn32_dio_stream_encoder.h206
-rw-r--r--drivers/gpu/drm/amd/display/dc/dio/dcn321/dcn321_dio_link_encoder.c190
-rw-r--r--drivers/gpu/drm/amd/display/dc/dio/dcn321/dcn321_dio_link_encoder.h42
-rw-r--r--drivers/gpu/drm/amd/display/dc/dio/dcn35/dcn35_dio_link_encoder.c391
-rw-r--r--drivers/gpu/drm/amd/display/dc/dio/dcn35/dcn35_dio_link_encoder.h188
-rw-r--r--drivers/gpu/drm/amd/display/dc/dio/dcn35/dcn35_dio_stream_encoder.c520
-rw-r--r--drivers/gpu/drm/amd/display/dc/dio/dcn35/dcn35_dio_stream_encoder.h332
-rw-r--r--drivers/gpu/drm/amd/display/dc/dio/dcn401/dcn401_dio_link_encoder.c322
-rw-r--r--drivers/gpu/drm/amd/display/dc/dio/dcn401/dcn401_dio_link_encoder.h134
-rw-r--r--drivers/gpu/drm/amd/display/dc/dio/dcn401/dcn401_dio_stream_encoder.c856
-rw-r--r--drivers/gpu/drm/amd/display/dc/dio/dcn401/dcn401_dio_stream_encoder.h240
-rw-r--r--drivers/gpu/drm/amd/display/dc/dm_cp_psp.h9
-rw-r--r--drivers/gpu/drm/amd/display/dc/dm_helpers.h84
-rw-r--r--drivers/gpu/drm/amd/display/dc/dm_pp_smu.h9
-rw-r--r--drivers/gpu/drm/amd/display/dc/dm_services.h31
-rw-r--r--drivers/gpu/drm/amd/display/dc/dm_services_types.h34
-rw-r--r--drivers/gpu/drm/amd/display/dc/dml/Makefile116
-rw-r--r--drivers/gpu/drm/amd/display/dc/dml/calcs/dcn_calc_auto.c (renamed from drivers/gpu/drm/amd/display/dc/calcs/dcn_calc_auto.c)22
-rw-r--r--drivers/gpu/drm/amd/display/dc/dml/calcs/dcn_calc_auto.h (renamed from drivers/gpu/drm/amd/display/dc/calcs/dcn_calc_auto.h)0
-rw-r--r--drivers/gpu/drm/amd/display/dc/dml/calcs/dcn_calc_math.c (renamed from drivers/gpu/drm/amd/display/dc/calcs/dcn_calc_math.c)16
-rw-r--r--drivers/gpu/drm/amd/display/dc/dml/calcs/dcn_calcs.c (renamed from drivers/gpu/drm/amd/display/dc/calcs/dcn_calcs.c)360
-rw-r--r--drivers/gpu/drm/amd/display/dc/dml/dc_features.h9
-rw-r--r--drivers/gpu/drm/amd/display/dc/dml/dcn10/dcn10_fpu.c159
-rw-r--r--drivers/gpu/drm/amd/display/dc/dml/dcn10/dcn10_fpu.h32
-rw-r--r--drivers/gpu/drm/amd/display/dc/dml/dcn20/dcn20_fpu.c2559
-rw-r--r--drivers/gpu/drm/amd/display/dc/dml/dcn20/dcn20_fpu.h89
-rw-r--r--drivers/gpu/drm/amd/display/dc/dml/dcn20/display_mode_vba_20.c68
-rw-r--r--drivers/gpu/drm/amd/display/dc/dml/dcn20/display_mode_vba_20v2.c54
-rw-r--r--drivers/gpu/drm/amd/display/dc/dml/dcn20/display_rq_dlg_calc_20.c214
-rw-r--r--drivers/gpu/drm/amd/display/dc/dml/dcn20/display_rq_dlg_calc_20.h4
-rw-r--r--drivers/gpu/drm/amd/display/dc/dml/dcn20/display_rq_dlg_calc_20v2.c212
-rw-r--r--drivers/gpu/drm/amd/display/dc/dml/dcn20/display_rq_dlg_calc_20v2.h4
-rw-r--r--drivers/gpu/drm/amd/display/dc/dml/dcn21/display_mode_vba_21.c337
-rw-r--r--drivers/gpu/drm/amd/display/dc/dml/dcn21/display_rq_dlg_calc_21.c220
-rw-r--r--drivers/gpu/drm/amd/display/dc/dml/dcn21/display_rq_dlg_calc_21.h4
-rw-r--r--drivers/gpu/drm/amd/display/dc/dml/dcn30/dcn30_fpu.c739
-rw-r--r--drivers/gpu/drm/amd/display/dc/dml/dcn30/dcn30_fpu.h73
-rw-r--r--drivers/gpu/drm/amd/display/dc/dml/dcn30/display_mode_vba_30.c611
-rw-r--r--drivers/gpu/drm/amd/display/dc/dml/dcn30/display_mode_vba_30.h11
-rw-r--r--drivers/gpu/drm/amd/display/dc/dml/dcn30/display_rq_dlg_calc_30.c290
-rw-r--r--drivers/gpu/drm/amd/display/dc/dml/dcn30/display_rq_dlg_calc_30.h4
-rw-r--r--drivers/gpu/drm/amd/display/dc/dml/dcn301/dcn301_fpu.c486
-rw-r--r--drivers/gpu/drm/amd/display/dc/dml/dcn301/dcn301_fpu.h41
-rw-r--r--drivers/gpu/drm/amd/display/dc/dml/dcn302/dcn302_fpu.c367
-rw-r--r--drivers/gpu/drm/amd/display/dc/dml/dcn302/dcn302_fpu.h32
-rw-r--r--drivers/gpu/drm/amd/display/dc/dml/dcn303/dcn303_fpu.c380
-rw-r--r--drivers/gpu/drm/amd/display/dc/dml/dcn303/dcn303_fpu.h32
-rw-r--r--drivers/gpu/drm/amd/display/dc/dml/dcn31/dcn31_fpu.c827
-rw-r--r--drivers/gpu/drm/amd/display/dc/dml/dcn31/dcn31_fpu.h59
-rw-r--r--drivers/gpu/drm/amd/display/dc/dml/dcn31/display_mode_vba_31.c7228
-rw-r--r--drivers/gpu/drm/amd/display/dc/dml/dcn31/display_mode_vba_31.h43
-rw-r--r--drivers/gpu/drm/amd/display/dc/dml/dcn31/display_rq_dlg_calc_31.c1591
-rw-r--r--drivers/gpu/drm/amd/display/dc/dml/dcn31/display_rq_dlg_calc_31.h69
-rw-r--r--drivers/gpu/drm/amd/display/dc/dml/dcn314/dcn314_fpu.c432
-rw-r--r--drivers/gpu/drm/amd/display/dc/dml/dcn314/dcn314_fpu.h40
-rw-r--r--drivers/gpu/drm/amd/display/dc/dml/dcn314/display_mode_vba_314.c7343
-rw-r--r--drivers/gpu/drm/amd/display/dc/dml/dcn314/display_mode_vba_314.h44
-rw-r--r--drivers/gpu/drm/amd/display/dc/dml/dcn314/display_rq_dlg_calc_314.c1678
-rw-r--r--drivers/gpu/drm/amd/display/dc/dml/dcn314/display_rq_dlg_calc_314.h70
-rw-r--r--drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c3613
-rw-r--r--drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.h80
-rw-r--r--drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_32.c3762
-rw-r--r--drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_32.h64
-rw-r--r--drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_util_32.c6350
-rw-r--r--drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_util_32.h1170
-rw-r--r--drivers/gpu/drm/amd/display/dc/dml/dcn32/display_rq_dlg_calc_32.c614
-rw-r--r--drivers/gpu/drm/amd/display/dc/dml/dcn32/display_rq_dlg_calc_32.h70
-rw-r--r--drivers/gpu/drm/amd/display/dc/dml/dcn321/dcn321_fpu.c931
-rw-r--r--drivers/gpu/drm/amd/display/dc/dml/dcn321/dcn321_fpu.h34
-rw-r--r--drivers/gpu/drm/amd/display/dc/dml/dcn35/dcn35_fpu.c620
-rw-r--r--drivers/gpu/drm/amd/display/dc/dml/dcn35/dcn35_fpu.h44
-rw-r--r--drivers/gpu/drm/amd/display/dc/dml/dcn351/dcn351_fpu.c639
-rw-r--r--drivers/gpu/drm/amd/display/dc/dml/dcn351/dcn351_fpu.h19
-rw-r--r--drivers/gpu/drm/amd/display/dc/dml/display_mode_enums.h129
-rw-r--r--drivers/gpu/drm/amd/display/dc/dml/display_mode_lib.c56
-rw-r--r--drivers/gpu/drm/amd/display/dc/dml/display_mode_lib.h25
-rw-r--r--drivers/gpu/drm/amd/display/dc/dml/display_mode_structs.h198
-rw-r--r--drivers/gpu/drm/amd/display/dc/dml/display_mode_vba.c297
-rw-r--r--drivers/gpu/drm/amd/display/dc/dml/display_mode_vba.h369
-rw-r--r--drivers/gpu/drm/amd/display/dc/dml/display_rq_dlg_helpers.c262
-rw-r--r--drivers/gpu/drm/amd/display/dc/dml/display_rq_dlg_helpers.h19
-rw-r--r--drivers/gpu/drm/amd/display/dc/dml/dml1_display_rq_dlg_calc.c290
-rw-r--r--drivers/gpu/drm/amd/display/dc/dml/dml1_display_rq_dlg_calc.h10
-rw-r--r--drivers/gpu/drm/amd/display/dc/dml/dml_inline_defs.h17
-rw-r--r--drivers/gpu/drm/amd/display/dc/dml/dsc/qp_tables.h (renamed from drivers/gpu/drm/amd/display/dc/dsc/qp_tables.h)36
-rw-r--r--drivers/gpu/drm/amd/display/dc/dml/dsc/rc_calc_fpu.c260
-rw-r--r--drivers/gpu/drm/amd/display/dc/dml/dsc/rc_calc_fpu.h90
-rw-r--r--drivers/gpu/drm/amd/display/dc/dml2_0/Makefile140
-rw-r--r--drivers/gpu/drm/amd/display/dc/dml2_0/cmntypes.h94
-rw-r--r--drivers/gpu/drm/amd/display/dc/dml2_0/display_mode_core.c10337
-rw-r--r--drivers/gpu/drm/amd/display/dc/dml2_0/display_mode_core.h204
-rw-r--r--drivers/gpu/drm/amd/display/dc/dml2_0/display_mode_core_structs.h2032
-rw-r--r--drivers/gpu/drm/amd/display/dc/dml2_0/display_mode_lib_defines.h79
-rw-r--r--drivers/gpu/drm/amd/display/dc/dml2_0/display_mode_util.c798
-rw-r--r--drivers/gpu/drm/amd/display/dc/dml2_0/display_mode_util.h74
-rw-r--r--drivers/gpu/drm/amd/display/dc/dml2_0/dml21/dml21_translation_helper.c929
-rw-r--r--drivers/gpu/drm/amd/display/dc/dml2_0/dml21/dml21_translation_helper.h28
-rw-r--r--drivers/gpu/drm/amd/display/dc/dml2_0/dml21/dml21_utils.c516
-rw-r--r--drivers/gpu/drm/amd/display/dc/dml2_0/dml21/dml21_utils.h50
-rw-r--r--drivers/gpu/drm/amd/display/dc/dml2_0/dml21/dml21_wrapper.c466
-rw-r--r--drivers/gpu/drm/amd/display/dc/dml2_0/dml21/dml21_wrapper.h135
-rw-r--r--drivers/gpu/drm/amd/display/dc/dml2_0/dml21/inc/bounding_boxes/dcn4_soc_bb.h372
-rw-r--r--drivers/gpu/drm/amd/display/dc/dml2_0/dml21/inc/dml2_external_lib_deps.h10
-rw-r--r--drivers/gpu/drm/amd/display/dc/dml2_0/dml21/inc/dml_top.h46
-rw-r--r--drivers/gpu/drm/amd/display/dc/dml2_0/dml21/inc/dml_top_dchub_registers.h191
-rw-r--r--drivers/gpu/drm/amd/display/dc/dml2_0/dml21/inc/dml_top_display_cfg_types.h526
-rw-r--r--drivers/gpu/drm/amd/display/dc/dml2_0/dml21/inc/dml_top_policy_types.h13
-rw-r--r--drivers/gpu/drm/amd/display/dc/dml2_0/dml21/inc/dml_top_soc_parameter_types.h215
-rw-r--r--drivers/gpu/drm/amd/display/dc/dml2_0/dml21/inc/dml_top_types.h744
-rw-r--r--drivers/gpu/drm/amd/display/dc/dml2_0/dml21/src/dml2_core/dml2_core_dcn4.c661
-rw-r--r--drivers/gpu/drm/amd/display/dc/dml2_0/dml21/src/dml2_core/dml2_core_dcn4.h12
-rw-r--r--drivers/gpu/drm/amd/display/dc/dml2_0/dml21/src/dml2_core/dml2_core_dcn4_calcs.c13342
-rw-r--r--drivers/gpu/drm/amd/display/dc/dml2_0/dml21/src/dml2_core/dml2_core_dcn4_calcs.h39
-rw-r--r--drivers/gpu/drm/amd/display/dc/dml2_0/dml21/src/dml2_core/dml2_core_factory.c39
-rw-r--r--drivers/gpu/drm/amd/display/dc/dml2_0/dml21/src/dml2_core/dml2_core_factory.h13
-rw-r--r--drivers/gpu/drm/amd/display/dc/dml2_0/dml21/src/dml2_core/dml2_core_shared_types.h2341
-rw-r--r--drivers/gpu/drm/amd/display/dc/dml2_0/dml21/src/dml2_core/dml2_core_utils.c788
-rw-r--r--drivers/gpu/drm/amd/display/dc/dml2_0/dml21/src/dml2_core/dml2_core_utils.h43
-rw-r--r--drivers/gpu/drm/amd/display/dc/dml2_0/dml21/src/dml2_dpmm/dml2_dpmm_dcn4.c785
-rw-r--r--drivers/gpu/drm/amd/display/dc/dml2_0/dml21/src/dml2_dpmm/dml2_dpmm_dcn4.h14
-rw-r--r--drivers/gpu/drm/amd/display/dc/dml2_0/dml21/src/dml2_dpmm/dml2_dpmm_factory.c50
-rw-r--r--drivers/gpu/drm/amd/display/dc/dml2_0/dml21/src/dml2_dpmm/dml2_dpmm_factory.h13
-rw-r--r--drivers/gpu/drm/amd/display/dc/dml2_0/dml21/src/dml2_mcg/dml2_mcg_dcn4.c198
-rw-r--r--drivers/gpu/drm/amd/display/dc/dml2_0/dml21/src/dml2_mcg/dml2_mcg_dcn4.h13
-rw-r--r--drivers/gpu/drm/amd/display/dc/dml2_0/dml21/src/dml2_mcg/dml2_mcg_factory.c39
-rw-r--r--drivers/gpu/drm/amd/display/dc/dml2_0/dml21/src/dml2_mcg/dml2_mcg_factory.h13
-rw-r--r--drivers/gpu/drm/amd/display/dc/dml2_0/dml21/src/dml2_pmo/dml2_pmo_dcn3.c706
-rw-r--r--drivers/gpu/drm/amd/display/dc/dml2_0/dml21/src/dml2_pmo/dml2_pmo_dcn3.h22
-rw-r--r--drivers/gpu/drm/amd/display/dc/dml2_0/dml21/src/dml2_pmo/dml2_pmo_dcn4_fams2.c2390
-rw-r--r--drivers/gpu/drm/amd/display/dc/dml2_0/dml21/src/dml2_pmo/dml2_pmo_dcn4_fams2.h33
-rw-r--r--drivers/gpu/drm/amd/display/dc/dml2_0/dml21/src/dml2_pmo/dml2_pmo_factory.c83
-rw-r--r--drivers/gpu/drm/amd/display/dc/dml2_0/dml21/src/dml2_pmo/dml2_pmo_factory.h13
-rw-r--r--drivers/gpu/drm/amd/display/dc/dml2_0/dml21/src/dml2_standalone_libraries/lib_float_math.c147
-rw-r--r--drivers/gpu/drm/amd/display/dc/dml2_0/dml21/src/dml2_standalone_libraries/lib_float_math.h25
-rw-r--r--drivers/gpu/drm/amd/display/dc/dml2_0/dml21/src/dml2_top/dml2_top_interfaces.c49
-rw-r--r--drivers/gpu/drm/amd/display/dc/dml2_0/dml21/src/dml2_top/dml2_top_legacy.c10
-rw-r--r--drivers/gpu/drm/amd/display/dc/dml2_0/dml21/src/dml2_top/dml2_top_legacy.h9
-rw-r--r--drivers/gpu/drm/amd/display/dc/dml2_0/dml21/src/dml2_top/dml2_top_soc15.c1170
-rw-r--r--drivers/gpu/drm/amd/display/dc/dml2_0/dml21/src/dml2_top/dml2_top_soc15.h14
-rw-r--r--drivers/gpu/drm/amd/display/dc/dml2_0/dml21/src/inc/dml2_debug.h189
-rw-r--r--drivers/gpu/drm/amd/display/dc/dml2_0/dml21/src/inc/dml2_internal_shared_types.h1010
-rw-r--r--drivers/gpu/drm/amd/display/dc/dml2_0/dml2_dc_resource_mgmt.c1174
-rw-r--r--drivers/gpu/drm/amd/display/dc/dml2_0/dml2_dc_resource_mgmt.h52
-rw-r--r--drivers/gpu/drm/amd/display/dc/dml2_0/dml2_dc_types.h43
-rw-r--r--drivers/gpu/drm/amd/display/dc/dml2_0/dml2_internal_types.h157
-rw-r--r--drivers/gpu/drm/amd/display/dc/dml2_0/dml2_mall_phantom.c911
-rw-r--r--drivers/gpu/drm/amd/display/dc/dml2_0/dml2_mall_phantom.h52
-rw-r--r--drivers/gpu/drm/amd/display/dc/dml2_0/dml2_policy.c311
-rw-r--r--drivers/gpu/drm/amd/display/dc/dml2_0/dml2_policy.h47
-rw-r--r--drivers/gpu/drm/amd/display/dc/dml2_0/dml2_translation_helper.c1528
-rw-r--r--drivers/gpu/drm/amd/display/dc/dml2_0/dml2_translation_helper.h41
-rw-r--r--drivers/gpu/drm/amd/display/dc/dml2_0/dml2_utils.c560
-rw-r--r--drivers/gpu/drm/amd/display/dc/dml2_0/dml2_utils.h149
-rw-r--r--drivers/gpu/drm/amd/display/dc/dml2_0/dml2_wrapper.c704
-rw-r--r--drivers/gpu/drm/amd/display/dc/dml2_0/dml2_wrapper.h309
-rw-r--r--drivers/gpu/drm/amd/display/dc/dml2_0/dml_assert.h32
-rw-r--r--drivers/gpu/drm/amd/display/dc/dml2_0/dml_depedencies.h34
-rw-r--r--drivers/gpu/drm/amd/display/dc/dml2_0/dml_display_rq_dlg_calc.c573
-rw-r--r--drivers/gpu/drm/amd/display/dc/dml2_0/dml_display_rq_dlg_calc.h63
-rw-r--r--drivers/gpu/drm/amd/display/dc/dml2_0/dml_logging.h32
-rw-r--r--drivers/gpu/drm/amd/display/dc/dpp/Makefile83
-rw-r--r--drivers/gpu/drm/amd/display/dc/dpp/dcn10/dcn10_dpp.c (renamed from drivers/gpu/drm/amd/display/dc/dcn10/dcn10_dpp.c)90
-rw-r--r--drivers/gpu/drm/amd/display/dc/dpp/dcn10/dcn10_dpp.h (renamed from drivers/gpu/drm/amd/display/dc/dcn10/dcn10_dpp.h)18
-rw-r--r--drivers/gpu/drm/amd/display/dc/dpp/dcn10/dcn10_dpp_cm.c (renamed from drivers/gpu/drm/amd/display/dc/dcn10/dcn10_dpp_cm.c)80
-rw-r--r--drivers/gpu/drm/amd/display/dc/dpp/dcn10/dcn10_dpp_dscl.c (renamed from drivers/gpu/drm/amd/display/dc/dcn10/dcn10_dpp_dscl.c)201
-rw-r--r--drivers/gpu/drm/amd/display/dc/dpp/dcn20/dcn20_dpp.c (renamed from drivers/gpu/drm/amd/display/dc/dcn20/dcn20_dpp.c)112
-rw-r--r--drivers/gpu/drm/amd/display/dc/dpp/dcn20/dcn20_dpp.h (renamed from drivers/gpu/drm/amd/display/dc/dcn20/dcn20_dpp.h)14
-rw-r--r--drivers/gpu/drm/amd/display/dc/dpp/dcn20/dcn20_dpp_cm.c (renamed from drivers/gpu/drm/amd/display/dc/dcn20/dcn20_dpp_cm.c)139
-rw-r--r--drivers/gpu/drm/amd/display/dc/dpp/dcn201/dcn201_dpp.c324
-rw-r--r--drivers/gpu/drm/amd/display/dc/dpp/dcn201/dcn201_dpp.h83
-rw-r--r--drivers/gpu/drm/amd/display/dc/dpp/dcn30/dcn30_dpp.c (renamed from drivers/gpu/drm/amd/display/dc/dcn30/dcn30_dpp.c)333
-rw-r--r--drivers/gpu/drm/amd/display/dc/dpp/dcn30/dcn30_dpp.h (renamed from drivers/gpu/drm/amd/display/dc/dcn30/dcn30_dpp.h)32
-rw-r--r--drivers/gpu/drm/amd/display/dc/dpp/dcn30/dcn30_dpp_cm.c (renamed from drivers/gpu/drm/amd/display/dc/dcn30/dcn30_dpp_cm.c)98
-rw-r--r--drivers/gpu/drm/amd/display/dc/dpp/dcn32/dcn32_dpp.c239
-rw-r--r--drivers/gpu/drm/amd/display/dc/dpp/dcn32/dcn32_dpp.h45
-rw-r--r--drivers/gpu/drm/amd/display/dc/dpp/dcn35/dcn35_dpp.c150
-rw-r--r--drivers/gpu/drm/amd/display/dc/dpp/dcn35/dcn35_dpp.h67
-rw-r--r--drivers/gpu/drm/amd/display/dc/dpp/dcn401/dcn401_dpp.c429
-rw-r--r--drivers/gpu/drm/amd/display/dc/dpp/dcn401/dcn401_dpp.h740
-rw-r--r--drivers/gpu/drm/amd/display/dc/dpp/dcn401/dcn401_dpp_cm.c242
-rw-r--r--drivers/gpu/drm/amd/display/dc/dpp/dcn401/dcn401_dpp_dscl.c1186
-rw-r--r--drivers/gpu/drm/amd/display/dc/dsc/Makefile52
-rw-r--r--drivers/gpu/drm/amd/display/dc/dsc/dc_dsc.c1006
-rw-r--r--drivers/gpu/drm/amd/display/dc/dsc/dcn20/dcn20_dsc.c (renamed from drivers/gpu/drm/amd/display/dc/dcn20/dcn20_dsc.c)162
-rw-r--r--drivers/gpu/drm/amd/display/dc/dsc/dcn20/dcn20_dsc.h (renamed from drivers/gpu/drm/amd/display/dc/dcn20/dcn20_dsc.h)61
-rw-r--r--drivers/gpu/drm/amd/display/dc/dsc/dcn35/dcn35_dsc.c142
-rw-r--r--drivers/gpu/drm/amd/display/dc/dsc/dcn35/dcn35_dsc.h59
-rw-r--r--drivers/gpu/drm/amd/display/dc/dsc/dcn401/dcn401_dsc.c394
-rw-r--r--drivers/gpu/drm/amd/display/dc/dsc/dcn401/dcn401_dsc.h346
-rw-r--r--drivers/gpu/drm/amd/display/dc/dsc/dsc.h (renamed from drivers/gpu/drm/amd/display/dc/inc/hw/dsc.h)15
-rw-r--r--drivers/gpu/drm/amd/display/dc/dsc/dscc_types.h7
-rw-r--r--drivers/gpu/drm/amd/display/dc/dsc/rc_calc.c332
-rw-r--r--drivers/gpu/drm/amd/display/dc/dsc/rc_calc.h53
-rw-r--r--drivers/gpu/drm/amd/display/dc/dsc/rc_calc_dpi.c18
-rw-r--r--drivers/gpu/drm/amd/display/dc/dwb/Makefile46
-rw-r--r--drivers/gpu/drm/amd/display/dc/dwb/dcn30/dcn30_cm_common.h (renamed from drivers/gpu/drm/amd/display/dc/dcn30/dcn30_cm_common.h)4
-rw-r--r--drivers/gpu/drm/amd/display/dc/dwb/dcn30/dcn30_dwb.c (renamed from drivers/gpu/drm/amd/display/dc/dcn30/dcn30_dwb.c)40
-rw-r--r--drivers/gpu/drm/amd/display/dc/dwb/dcn30/dcn30_dwb.h (renamed from drivers/gpu/drm/amd/display/dc/dcn30/dcn30_dwb.h)30
-rw-r--r--drivers/gpu/drm/amd/display/dc/dwb/dcn30/dcn30_dwb_cm.c (renamed from drivers/gpu/drm/amd/display/dc/dcn30/dcn30_dwb_cm.c)97
-rw-r--r--drivers/gpu/drm/amd/display/dc/dwb/dcn35/dcn35_dwb.c57
-rw-r--r--drivers/gpu/drm/amd/display/dc/dwb/dcn35/dcn35_dwb.h61
-rw-r--r--drivers/gpu/drm/amd/display/dc/gpio/Makefile29
-rw-r--r--drivers/gpu/drm/amd/display/dc/gpio/dce110/hw_factory_dce110.c4
-rw-r--r--drivers/gpu/drm/amd/display/dc/gpio/dcn20/hw_factory_dcn20.c6
-rw-r--r--drivers/gpu/drm/amd/display/dc/gpio/dcn20/hw_translate_dcn20.c17
-rw-r--r--drivers/gpu/drm/amd/display/dc/gpio/dcn21/hw_translate_dcn21.c30
-rw-r--r--drivers/gpu/drm/amd/display/dc/gpio/dcn30/hw_factory_dcn30.c12
-rw-r--r--drivers/gpu/drm/amd/display/dc/gpio/dcn30/hw_factory_dcn30.h2
-rw-r--r--drivers/gpu/drm/amd/display/dc/gpio/dcn30/hw_translate_dcn30.c25
-rw-r--r--drivers/gpu/drm/amd/display/dc/gpio/dcn30/hw_translate_dcn30.h2
-rw-r--r--drivers/gpu/drm/amd/display/dc/gpio/dcn315/hw_factory_dcn315.c260
-rw-r--r--drivers/gpu/drm/amd/display/dc/gpio/dcn315/hw_factory_dcn315.h31
-rw-r--r--drivers/gpu/drm/amd/display/dc/gpio/dcn315/hw_translate_dcn315.c374
-rw-r--r--drivers/gpu/drm/amd/display/dc/gpio/dcn315/hw_translate_dcn315.h33
-rw-r--r--drivers/gpu/drm/amd/display/dc/gpio/dcn32/hw_factory_dcn32.c271
-rw-r--r--drivers/gpu/drm/amd/display/dc/gpio/dcn32/hw_factory_dcn32.h31
-rw-r--r--drivers/gpu/drm/amd/display/dc/gpio/dcn32/hw_translate_dcn32.c349
-rw-r--r--drivers/gpu/drm/amd/display/dc/gpio/dcn32/hw_translate_dcn32.h33
-rw-r--r--drivers/gpu/drm/amd/display/dc/gpio/dcn401/hw_factory_dcn401.c264
-rw-r--r--drivers/gpu/drm/amd/display/dc/gpio/dcn401/hw_factory_dcn401.h11
-rw-r--r--drivers/gpu/drm/amd/display/dc/gpio/dcn401/hw_translate_dcn401.c335
-rw-r--r--drivers/gpu/drm/amd/display/dc/gpio/dcn401/hw_translate_dcn401.h13
-rw-r--r--drivers/gpu/drm/amd/display/dc/gpio/ddc_regs.h47
-rw-r--r--drivers/gpu/drm/amd/display/dc/gpio/diagnostics/hw_factory_diag.c62
-rw-r--r--drivers/gpu/drm/amd/display/dc/gpio/diagnostics/hw_factory_diag.h34
-rw-r--r--drivers/gpu/drm/amd/display/dc/gpio/diagnostics/hw_translate_diag.c41
-rw-r--r--drivers/gpu/drm/amd/display/dc/gpio/diagnostics/hw_translate_diag.h34
-rw-r--r--drivers/gpu/drm/amd/display/dc/gpio/gpio_base.c2
-rw-r--r--drivers/gpu/drm/amd/display/dc/gpio/gpio_service.c58
-rw-r--r--drivers/gpu/drm/amd/display/dc/gpio/hpd_regs.h10
-rw-r--r--drivers/gpu/drm/amd/display/dc/gpio/hw_ddc.c23
-rw-r--r--drivers/gpu/drm/amd/display/dc/gpio/hw_factory.c38
-rw-r--r--drivers/gpu/drm/amd/display/dc/gpio/hw_generic.c2
-rw-r--r--drivers/gpu/drm/amd/display/dc/gpio/hw_hpd.c12
-rw-r--r--drivers/gpu/drm/amd/display/dc/gpio/hw_translate.c33
-rw-r--r--drivers/gpu/drm/amd/display/dc/hdcp/Makefile2
-rw-r--r--drivers/gpu/drm/amd/display/dc/hdcp/hdcp_msg.c36
-rw-r--r--drivers/gpu/drm/amd/display/dc/hpo/Makefile50
-rw-r--r--drivers/gpu/drm/amd/display/dc/hpo/dcn31/dcn31_hpo_dp_link_encoder.c628
-rw-r--r--drivers/gpu/drm/amd/display/dc/hpo/dcn31/dcn31_hpo_dp_link_encoder.h235
-rw-r--r--drivers/gpu/drm/amd/display/dc/hpo/dcn31/dcn31_hpo_dp_stream_encoder.c780
-rw-r--r--drivers/gpu/drm/amd/display/dc/hpo/dcn31/dcn31_hpo_dp_stream_encoder.h245
-rw-r--r--drivers/gpu/drm/amd/display/dc/hpo/dcn32/dcn32_hpo_dp_link_encoder.c89
-rw-r--r--drivers/gpu/drm/amd/display/dc/hpo/dcn32/dcn32_hpo_dp_link_encoder.h68
-rw-r--r--drivers/gpu/drm/amd/display/dc/hubbub/Makefile104
-rw-r--r--drivers/gpu/drm/amd/display/dc/hubbub/dcn10/dcn10_hubbub.c (renamed from drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hubbub.c)42
-rw-r--r--drivers/gpu/drm/amd/display/dc/hubbub/dcn10/dcn10_hubbub.h518
-rw-r--r--drivers/gpu/drm/amd/display/dc/hubbub/dcn20/dcn20_hubbub.c (renamed from drivers/gpu/drm/amd/display/dc/dcn20/dcn20_hubbub.c)73
-rw-r--r--drivers/gpu/drm/amd/display/dc/hubbub/dcn20/dcn20_hubbub.h144
-rw-r--r--drivers/gpu/drm/amd/display/dc/hubbub/dcn201/dcn201_hubbub.c107
-rw-r--r--drivers/gpu/drm/amd/display/dc/hubbub/dcn201/dcn201_hubbub.h45
-rw-r--r--drivers/gpu/drm/amd/display/dc/hubbub/dcn21/dcn21_hubbub.c (renamed from drivers/gpu/drm/amd/display/dc/dcn21/dcn21_hubbub.c)25
-rw-r--r--drivers/gpu/drm/amd/display/dc/hubbub/dcn21/dcn21_hubbub.h (renamed from drivers/gpu/drm/amd/display/dc/dcn21/dcn21_hubbub.h)22
-rw-r--r--drivers/gpu/drm/amd/display/dc/hubbub/dcn30/dcn30_hubbub.c (renamed from drivers/gpu/drm/amd/display/dc/dcn30/dcn30_hubbub.c)22
-rw-r--r--drivers/gpu/drm/amd/display/dc/hubbub/dcn30/dcn30_hubbub.h (renamed from drivers/gpu/drm/amd/display/dc/dcn30/dcn30_hubbub.h)18
-rw-r--r--drivers/gpu/drm/amd/display/dc/hubbub/dcn301/dcn301_hubbub.c (renamed from drivers/gpu/drm/amd/display/dc/dcn301/dcn301_hubbub.c)3
-rw-r--r--drivers/gpu/drm/amd/display/dc/hubbub/dcn301/dcn301_hubbub.h (renamed from drivers/gpu/drm/amd/display/dc/dcn301/dcn301_hubbub.h)0
-rw-r--r--drivers/gpu/drm/amd/display/dc/hubbub/dcn31/dcn31_hubbub.c1096
-rw-r--r--drivers/gpu/drm/amd/display/dc/hubbub/dcn31/dcn31_hubbub.h147
-rw-r--r--drivers/gpu/drm/amd/display/dc/hubbub/dcn32/dcn32_hubbub.c1062
-rw-r--r--drivers/gpu/drm/amd/display/dc/hubbub/dcn32/dcn32_hubbub.h166
-rw-r--r--drivers/gpu/drm/amd/display/dc/hubbub/dcn35/dcn35_hubbub.c664
-rw-r--r--drivers/gpu/drm/amd/display/dc/hubbub/dcn35/dcn35_hubbub.h172
-rw-r--r--drivers/gpu/drm/amd/display/dc/hubbub/dcn401/dcn401_hubbub.c1271
-rw-r--r--drivers/gpu/drm/amd/display/dc/hubbub/dcn401/dcn401_hubbub.h206
-rw-r--r--drivers/gpu/drm/amd/display/dc/hubp/Makefile97
-rw-r--r--drivers/gpu/drm/amd/display/dc/hubp/dcn10/dcn10_hubp.c (renamed from drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hubp.c)144
-rw-r--r--drivers/gpu/drm/amd/display/dc/hubp/dcn10/dcn10_hubp.h (renamed from drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hubp.h)185
-rw-r--r--drivers/gpu/drm/amd/display/dc/hubp/dcn20/dcn20_hubp.c (renamed from drivers/gpu/drm/amd/display/dc/dcn20/dcn20_hubp.c)256
-rw-r--r--drivers/gpu/drm/amd/display/dc/hubp/dcn20/dcn20_hubp.h (renamed from drivers/gpu/drm/amd/display/dc/dcn20/dcn20_hubp.h)87
-rw-r--r--drivers/gpu/drm/amd/display/dc/hubp/dcn201/dcn201_hubp.c156
-rw-r--r--drivers/gpu/drm/amd/display/dc/hubp/dcn201/dcn201_hubp.h132
-rw-r--r--drivers/gpu/drm/amd/display/dc/hubp/dcn21/dcn21_hubp.c (renamed from drivers/gpu/drm/amd/display/dc/dcn21/dcn21_hubp.c)33
-rw-r--r--drivers/gpu/drm/amd/display/dc/hubp/dcn21/dcn21_hubp.h (renamed from drivers/gpu/drm/amd/display/dc/dcn21/dcn21_hubp.h)0
-rw-r--r--drivers/gpu/drm/amd/display/dc/hubp/dcn30/dcn30_hubp.c678
-rw-r--r--drivers/gpu/drm/amd/display/dc/hubp/dcn30/dcn30_hubp.h (renamed from drivers/gpu/drm/amd/display/dc/dcn30/dcn30_hubp.h)20
-rw-r--r--drivers/gpu/drm/amd/display/dc/hubp/dcn31/dcn31_hubp.c134
-rw-r--r--drivers/gpu/drm/amd/display/dc/hubp/dcn31/dcn31_hubp.h253
-rw-r--r--drivers/gpu/drm/amd/display/dc/hubp/dcn32/dcn32_hubp.c246
-rw-r--r--drivers/gpu/drm/amd/display/dc/hubp/dcn32/dcn32_hubp.h70
-rw-r--r--drivers/gpu/drm/amd/display/dc/hubp/dcn35/dcn35_hubp.c244
-rw-r--r--drivers/gpu/drm/amd/display/dc/hubp/dcn35/dcn35_hubp.h76
-rw-r--r--drivers/gpu/drm/amd/display/dc/hubp/dcn401/dcn401_hubp.c1097
-rw-r--r--drivers/gpu/drm/amd/display/dc/hubp/dcn401/dcn401_hubp.h373
-rw-r--r--drivers/gpu/drm/amd/display/dc/hwss/Makefile202
-rw-r--r--drivers/gpu/drm/amd/display/dc/hwss/dce/dce_hwseq.c (renamed from drivers/gpu/drm/amd/display/dc/dce/dce_hwseq.c)0
-rw-r--r--drivers/gpu/drm/amd/display/dc/hwss/dce/dce_hwseq.h (renamed from drivers/gpu/drm/amd/display/dc/dce/dce_hwseq.h)227
-rw-r--r--drivers/gpu/drm/amd/display/dc/hwss/dce100/dce100_hwseq.c172
-rw-r--r--drivers/gpu/drm/amd/display/dc/hwss/dce100/dce100_hwseq.h54
-rw-r--r--drivers/gpu/drm/amd/display/dc/hwss/dce110/dce110_hwseq.c3503
-rw-r--r--drivers/gpu/drm/amd/display/dc/hwss/dce110/dce110_hwseq.h125
-rw-r--r--drivers/gpu/drm/amd/display/dc/hwss/dce112/dce112_hwseq.c160
-rw-r--r--drivers/gpu/drm/amd/display/dc/hwss/dce112/dce112_hwseq.h (renamed from drivers/gpu/drm/amd/display/dc/dce112/dce112_hw_sequencer.h)0
-rw-r--r--drivers/gpu/drm/amd/display/dc/hwss/dce120/dce120_hwseq.c270
-rw-r--r--drivers/gpu/drm/amd/display/dc/hwss/dce120/dce120_hwseq.h (renamed from drivers/gpu/drm/amd/display/dc/dce120/dce120_hw_sequencer.h)0
-rw-r--r--drivers/gpu/drm/amd/display/dc/hwss/dce60/dce60_hwseq.c433
-rw-r--r--drivers/gpu/drm/amd/display/dc/hwss/dce60/dce60_hwseq.h (renamed from drivers/gpu/drm/amd/display/dc/dce60/dce60_hw_sequencer.h)0
-rw-r--r--drivers/gpu/drm/amd/display/dc/hwss/dce80/dce80_hwseq.c55
-rw-r--r--drivers/gpu/drm/amd/display/dc/hwss/dce80/dce80_hwseq.h (renamed from drivers/gpu/drm/amd/display/dc/dce80/dce80_hw_sequencer.h)0
-rw-r--r--drivers/gpu/drm/amd/display/dc/hwss/dcn10/dcn10_hwseq.c4147
-rw-r--r--drivers/gpu/drm/amd/display/dc/hwss/dcn10/dcn10_hwseq.h221
-rw-r--r--drivers/gpu/drm/amd/display/dc/hwss/dcn10/dcn10_init.c (renamed from drivers/gpu/drm/amd/display/dc/dcn10/dcn10_init.c)21
-rw-r--r--drivers/gpu/drm/amd/display/dc/hwss/dcn10/dcn10_init.h (renamed from drivers/gpu/drm/amd/display/dc/dcn10/dcn10_init.h)0
-rw-r--r--drivers/gpu/drm/amd/display/dc/hwss/dcn20/dcn20_hwseq.c3256
-rw-r--r--drivers/gpu/drm/amd/display/dc/hwss/dcn20/dcn20_hwseq.h (renamed from drivers/gpu/drm/amd/display/dc/dcn20/dcn20_hwseq.h)38
-rw-r--r--drivers/gpu/drm/amd/display/dc/hwss/dcn20/dcn20_init.c (renamed from drivers/gpu/drm/amd/display/dc/dcn20/dcn20_init.c)28
-rw-r--r--drivers/gpu/drm/amd/display/dc/hwss/dcn20/dcn20_init.h (renamed from drivers/gpu/drm/amd/display/dc/dcn20/dcn20_init.h)0
-rw-r--r--drivers/gpu/drm/amd/display/dc/hwss/dcn201/dcn201_hwseq.c615
-rw-r--r--drivers/gpu/drm/amd/display/dc/hwss/dcn201/dcn201_hwseq.h46
-rw-r--r--drivers/gpu/drm/amd/display/dc/hwss/dcn201/dcn201_init.c136
-rw-r--r--drivers/gpu/drm/amd/display/dc/hwss/dcn201/dcn201_init.h33
-rw-r--r--drivers/gpu/drm/amd/display/dc/hwss/dcn21/dcn21_hwseq.c302
-rw-r--r--drivers/gpu/drm/amd/display/dc/hwss/dcn21/dcn21_hwseq.h (renamed from drivers/gpu/drm/amd/display/dc/dcn21/dcn21_hwseq.h)5
-rw-r--r--drivers/gpu/drm/amd/display/dc/hwss/dcn21/dcn21_init.c (renamed from drivers/gpu/drm/amd/display/dc/dcn21/dcn21_init.c)30
-rw-r--r--drivers/gpu/drm/amd/display/dc/hwss/dcn21/dcn21_init.h (renamed from drivers/gpu/drm/amd/display/dc/dcn21/dcn21_init.h)0
-rw-r--r--drivers/gpu/drm/amd/display/dc/hwss/dcn30/dcn30_hwseq.c1282
-rw-r--r--drivers/gpu/drm/amd/display/dc/hwss/dcn30/dcn30_hwseq.h (renamed from drivers/gpu/drm/amd/display/dc/dcn30/dcn30_hwseq.h)21
-rw-r--r--drivers/gpu/drm/amd/display/dc/hwss/dcn30/dcn30_init.c (renamed from drivers/gpu/drm/amd/display/dc/dcn30/dcn30_init.c)35
-rw-r--r--drivers/gpu/drm/amd/display/dc/hwss/dcn30/dcn30_init.h (renamed from drivers/gpu/drm/amd/display/dc/dcn30/dcn30_init.h)0
-rw-r--r--drivers/gpu/drm/amd/display/dc/hwss/dcn301/dcn301_hwseq.c (renamed from drivers/gpu/drm/amd/display/dc/dcn301/dcn301_hwseq.c)0
-rw-r--r--drivers/gpu/drm/amd/display/dc/hwss/dcn301/dcn301_hwseq.h (renamed from drivers/gpu/drm/amd/display/dc/dcn301/dcn301_hwseq.h)0
-rw-r--r--drivers/gpu/drm/amd/display/dc/hwss/dcn301/dcn301_init.c (renamed from drivers/gpu/drm/amd/display/dc/dcn301/dcn301_init.c)32
-rw-r--r--drivers/gpu/drm/amd/display/dc/hwss/dcn301/dcn301_init.h (renamed from drivers/gpu/drm/amd/display/dc/dcn301/dcn301_init.h)4
-rw-r--r--drivers/gpu/drm/amd/display/dc/hwss/dcn302/dcn302_hwseq.c (renamed from drivers/gpu/drm/amd/display/dc/dcn302/dcn302_hwseq.c)34
-rw-r--r--drivers/gpu/drm/amd/display/dc/hwss/dcn302/dcn302_hwseq.h (renamed from drivers/gpu/drm/amd/display/dc/dcn302/dcn302_hwseq.h)0
-rw-r--r--drivers/gpu/drm/amd/display/dc/hwss/dcn302/dcn302_init.c (renamed from drivers/gpu/drm/amd/display/dc/dcn302/dcn302_init.c)4
-rw-r--r--drivers/gpu/drm/amd/display/dc/hwss/dcn302/dcn302_init.h (renamed from drivers/gpu/drm/amd/display/dc/dcn302/dcn302_init.h)0
-rw-r--r--drivers/gpu/drm/amd/display/dc/hwss/dcn303/dcn303_hwseq.c64
-rw-r--r--drivers/gpu/drm/amd/display/dc/hwss/dcn303/dcn303_hwseq.h37
-rw-r--r--drivers/gpu/drm/amd/display/dc/hwss/dcn303/dcn303_init.c40
-rw-r--r--drivers/gpu/drm/amd/display/dc/hwss/dcn303/dcn303_init.h33
-rw-r--r--drivers/gpu/drm/amd/display/dc/hwss/dcn31/dcn31_hwseq.c717
-rw-r--r--drivers/gpu/drm/amd/display/dc/hwss/dcn31/dcn31_hwseq.h64
-rw-r--r--drivers/gpu/drm/amd/display/dc/hwss/dcn31/dcn31_init.c158
-rw-r--r--drivers/gpu/drm/amd/display/dc/hwss/dcn31/dcn31_init.h33
-rw-r--r--drivers/gpu/drm/amd/display/dc/hwss/dcn314/dcn314_hwseq.c603
-rw-r--r--drivers/gpu/drm/amd/display/dc/hwss/dcn314/dcn314_hwseq.h52
-rw-r--r--drivers/gpu/drm/amd/display/dc/hwss/dcn314/dcn314_init.c165
-rw-r--r--drivers/gpu/drm/amd/display/dc/hwss/dcn314/dcn314_init.h34
-rw-r--r--drivers/gpu/drm/amd/display/dc/hwss/dcn32/dcn32_hwseq.c1848
-rw-r--r--drivers/gpu/drm/amd/display/dc/hwss/dcn32/dcn32_hwseq.h135
-rw-r--r--drivers/gpu/drm/amd/display/dc/hwss/dcn32/dcn32_init.c174
-rw-r--r--drivers/gpu/drm/amd/display/dc/hwss/dcn32/dcn32_init.h33
-rw-r--r--drivers/gpu/drm/amd/display/dc/hwss/dcn35/dcn35_hwseq.c1728
-rw-r--r--drivers/gpu/drm/amd/display/dc/hwss/dcn35/dcn35_hwseq.h112
-rw-r--r--drivers/gpu/drm/amd/display/dc/hwss/dcn35/dcn35_init.c184
-rw-r--r--drivers/gpu/drm/amd/display/dc/hwss/dcn35/dcn35_init.h34
-rw-r--r--drivers/gpu/drm/amd/display/dc/hwss/dcn351/Makefile28
-rw-r--r--drivers/gpu/drm/amd/display/dc/hwss/dcn351/dcn351_hwseq.c182
-rw-r--r--drivers/gpu/drm/amd/display/dc/hwss/dcn351/dcn351_hwseq.h41
-rw-r--r--drivers/gpu/drm/amd/display/dc/hwss/dcn351/dcn351_init.c172
-rw-r--r--drivers/gpu/drm/amd/display/dc/hwss/dcn351/dcn351_init.h33
-rw-r--r--drivers/gpu/drm/amd/display/dc/hwss/dcn401/dcn401_hwseq.c4045
-rw-r--r--drivers/gpu/drm/amd/display/dc/hwss/dcn401/dcn401_hwseq.h213
-rw-r--r--drivers/gpu/drm/amd/display/dc/hwss/dcn401/dcn401_init.c177
-rw-r--r--drivers/gpu/drm/amd/display/dc/hwss/dcn401/dcn401_init.h12
-rw-r--r--drivers/gpu/drm/amd/display/dc/hwss/hw_sequencer.h2025
-rw-r--r--drivers/gpu/drm/amd/display/dc/hwss/hw_sequencer_private.h242
-rw-r--r--drivers/gpu/drm/amd/display/dc/inc/clock_source.h13
-rw-r--r--drivers/gpu/drm/amd/display/dc/inc/core_status.h9
-rw-r--r--drivers/gpu/drm/amd/display/dc/inc/core_types.h418
-rw-r--r--drivers/gpu/drm/amd/display/dc/inc/dc_link_ddc.h131
-rw-r--r--drivers/gpu/drm/amd/display/dc/inc/dc_link_dp.h90
-rw-r--r--drivers/gpu/drm/amd/display/dc/inc/dcn_calcs.h25
-rw-r--r--drivers/gpu/drm/amd/display/dc/inc/hw/abm.h17
-rw-r--r--drivers/gpu/drm/amd/display/dc/inc/hw/audio.h5
-rw-r--r--drivers/gpu/drm/amd/display/dc/inc/hw/aux_engine.h14
-rw-r--r--drivers/gpu/drm/amd/display/dc/inc/hw/clk_mgr.h97
-rw-r--r--drivers/gpu/drm/amd/display/dc/inc/hw/clk_mgr_internal.h203
-rw-r--r--drivers/gpu/drm/amd/display/dc/inc/hw/cursor_reg_cache.h121
-rw-r--r--drivers/gpu/drm/amd/display/dc/inc/hw/dccg.h289
-rw-r--r--drivers/gpu/drm/amd/display/dc/inc/hw/dchubbub.h139
-rw-r--r--drivers/gpu/drm/amd/display/dc/inc/hw/dmcu.h7
-rw-r--r--drivers/gpu/drm/amd/display/dc/inc/hw/dpp.h164
-rw-r--r--drivers/gpu/drm/amd/display/dc/inc/hw/dwb.h49
-rw-r--r--drivers/gpu/drm/amd/display/dc/inc/hw/hubp.h126
-rw-r--r--drivers/gpu/drm/amd/display/dc/inc/hw/hw_shared.h80
-rw-r--r--drivers/gpu/drm/amd/display/dc/inc/hw/link_encoder.h166
-rw-r--r--drivers/gpu/drm/amd/display/dc/inc/hw/mem_input.h31
-rw-r--r--drivers/gpu/drm/amd/display/dc/inc/hw/mpc.h973
-rw-r--r--drivers/gpu/drm/amd/display/dc/inc/hw/opp.h54
-rw-r--r--drivers/gpu/drm/amd/display/dc/inc/hw/optc.h197
-rw-r--r--drivers/gpu/drm/amd/display/dc/inc/hw/panel_cntl.h4
-rw-r--r--drivers/gpu/drm/amd/display/dc/inc/hw/pg_cntl.h55
-rw-r--r--drivers/gpu/drm/amd/display/dc/inc/hw/stream_encoder.h125
-rw-r--r--drivers/gpu/drm/amd/display/dc/inc/hw/timing_generator.h237
-rw-r--r--drivers/gpu/drm/amd/display/dc/inc/hw/transform.h27
-rw-r--r--drivers/gpu/drm/amd/display/dc/inc/hw/vpg.h53
-rw-r--r--drivers/gpu/drm/amd/display/dc/inc/hw_sequencer.h252
-rw-r--r--drivers/gpu/drm/amd/display/dc/inc/hw_sequencer_private.h161
-rw-r--r--drivers/gpu/drm/amd/display/dc/inc/link_enc_cfg.h120
-rw-r--r--drivers/gpu/drm/amd/display/dc/inc/link_hwss.h105
-rw-r--r--drivers/gpu/drm/amd/display/dc/inc/link_service.h350
-rw-r--r--drivers/gpu/drm/amd/display/dc/inc/reg_helper.h38
-rw-r--r--drivers/gpu/drm/amd/display/dc/inc/resource.h504
-rw-r--r--drivers/gpu/drm/amd/display/dc/inc/soc_and_ip_translator.h24
-rw-r--r--drivers/gpu/drm/amd/display/dc/irq/Makefile92
-rw-r--r--drivers/gpu/drm/amd/display/dc/irq/dce110/irq_service_dce110.c32
-rw-r--r--drivers/gpu/drm/amd/display/dc/irq/dce120/irq_service_dce120.c43
-rw-r--r--drivers/gpu/drm/amd/display/dc/irq/dce60/irq_service_dce60.c43
-rw-r--r--drivers/gpu/drm/amd/display/dc/irq/dce80/irq_service_dce80.c45
-rw-r--r--drivers/gpu/drm/amd/display/dc/irq/dcn10/irq_service_dcn10.c52
-rw-r--r--drivers/gpu/drm/amd/display/dc/irq/dcn20/irq_service_dcn20.c74
-rw-r--r--drivers/gpu/drm/amd/display/dc/irq/dcn201/irq_service_dcn201.c339
-rw-r--r--drivers/gpu/drm/amd/display/dc/irq/dcn201/irq_service_dcn201.h34
-rw-r--r--drivers/gpu/drm/amd/display/dc/irq/dcn21/irq_service_dcn21.c123
-rw-r--r--drivers/gpu/drm/amd/display/dc/irq/dcn30/irq_service_dcn30.c110
-rw-r--r--drivers/gpu/drm/amd/display/dc/irq/dcn30/irq_service_dcn30.h3
-rw-r--r--drivers/gpu/drm/amd/display/dc/irq/dcn302/irq_service_dcn302.c91
-rw-r--r--drivers/gpu/drm/amd/display/dc/irq/dcn303/irq_service_dcn303.c283
-rw-r--r--drivers/gpu/drm/amd/display/dc/irq/dcn303/irq_service_dcn303.h34
-rw-r--r--drivers/gpu/drm/amd/display/dc/irq/dcn31/irq_service_dcn31.c404
-rw-r--r--drivers/gpu/drm/amd/display/dc/irq/dcn31/irq_service_dcn31.h34
-rw-r--r--drivers/gpu/drm/amd/display/dc/irq/dcn314/irq_service_dcn314.c406
-rw-r--r--drivers/gpu/drm/amd/display/dc/irq/dcn314/irq_service_dcn314.h35
-rw-r--r--drivers/gpu/drm/amd/display/dc/irq/dcn315/irq_service_dcn315.c411
-rw-r--r--drivers/gpu/drm/amd/display/dc/irq/dcn315/irq_service_dcn315.h34
-rw-r--r--drivers/gpu/drm/amd/display/dc/irq/dcn32/irq_service_dcn32.c436
-rw-r--r--drivers/gpu/drm/amd/display/dc/irq/dcn32/irq_service_dcn32.h35
-rw-r--r--drivers/gpu/drm/amd/display/dc/irq/dcn35/irq_service_dcn35.c400
-rw-r--r--drivers/gpu/drm/amd/display/dc/irq/dcn35/irq_service_dcn35.h34
-rw-r--r--drivers/gpu/drm/amd/display/dc/irq/dcn351/irq_service_dcn351.c382
-rw-r--r--drivers/gpu/drm/amd/display/dc/irq/dcn351/irq_service_dcn351.h12
-rw-r--r--drivers/gpu/drm/amd/display/dc/irq/dcn36/irq_service_dcn36.c381
-rw-r--r--drivers/gpu/drm/amd/display/dc/irq/dcn36/irq_service_dcn36.h12
-rw-r--r--drivers/gpu/drm/amd/display/dc/irq/dcn401/irq_service_dcn401.c414
-rw-r--r--drivers/gpu/drm/amd/display/dc/irq/dcn401/irq_service_dcn401.h13
-rw-r--r--drivers/gpu/drm/amd/display/dc/irq/irq_service.c90
-rw-r--r--drivers/gpu/drm/amd/display/dc/irq/irq_service.h10
-rw-r--r--drivers/gpu/drm/amd/display/dc/irq_types.h36
-rw-r--r--drivers/gpu/drm/amd/display/dc/link/Makefile64
-rw-r--r--drivers/gpu/drm/amd/display/dc/link/accessories/link_dp_cts.c1019
-rw-r--r--drivers/gpu/drm/amd/display/dc/link/accessories/link_dp_cts.h44
-rw-r--r--drivers/gpu/drm/amd/display/dc/link/accessories/link_dp_trace.c172
-rw-r--r--drivers/gpu/drm/amd/display/dc/link/accessories/link_dp_trace.h63
-rw-r--r--drivers/gpu/drm/amd/display/dc/link/hwss/link_hwss_dio.c353
-rw-r--r--drivers/gpu/drm/amd/display/dc/link/hwss/link_hwss_dio.h63
-rw-r--r--drivers/gpu/drm/amd/display/dc/link/hwss/link_hwss_dio_fixed_vs_pe_retimer.c199
-rw-r--r--drivers/gpu/drm/amd/display/dc/link/hwss/link_hwss_dio_fixed_vs_pe_retimer.h37
-rw-r--r--drivers/gpu/drm/amd/display/dc/link/hwss/link_hwss_dpia.c176
-rw-r--r--drivers/gpu/drm/amd/display/dc/link/hwss/link_hwss_dpia.h37
-rw-r--r--drivers/gpu/drm/amd/display/dc/link/hwss/link_hwss_hpo_dp.c228
-rw-r--r--drivers/gpu/drm/amd/display/dc/link/hwss/link_hwss_hpo_dp.h62
-rw-r--r--drivers/gpu/drm/amd/display/dc/link/hwss/link_hwss_hpo_fixed_vs_pe_retimer_dp.c233
-rw-r--r--drivers/gpu/drm/amd/display/dc/link/hwss/link_hwss_hpo_fixed_vs_pe_retimer_dp.h33
-rw-r--r--drivers/gpu/drm/amd/display/dc/link/link_detection.c1644
-rw-r--r--drivers/gpu/drm/amd/display/dc/link/link_detection.h43
-rw-r--r--drivers/gpu/drm/amd/display/dc/link/link_dpms.c2674
-rw-r--r--drivers/gpu/drm/amd/display/dc/link/link_dpms.h53
-rw-r--r--drivers/gpu/drm/amd/display/dc/link/link_factory.c916
-rw-r--r--drivers/gpu/drm/amd/display/dc/link/link_factory.h31
-rw-r--r--drivers/gpu/drm/amd/display/dc/link/link_hwss_hpo_frl.c62
-rw-r--r--drivers/gpu/drm/amd/display/dc/link/link_resource.c114
-rw-r--r--drivers/gpu/drm/amd/display/dc/link/link_resource.h32
-rw-r--r--drivers/gpu/drm/amd/display/dc/link/link_validation.c626
-rw-r--r--drivers/gpu/drm/amd/display/dc/link/link_validation.h45
-rw-r--r--drivers/gpu/drm/amd/display/dc/link/protocols/link_ddc.c604
-rw-r--r--drivers/gpu/drm/amd/display/dc/link/protocols/link_ddc.h106
-rw-r--r--drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_capability.c2599
-rw-r--r--drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_capability.h117
-rw-r--r--drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_dpia.c171
-rw-r--r--drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_dpia.h47
-rw-r--r--drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_dpia_bw.c448
-rw-r--r--drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_dpia_bw.h112
-rw-r--r--drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_irq_handler.c548
-rw-r--r--drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_irq_handler.h41
-rw-r--r--drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_phy.c210
-rw-r--r--drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_phy.h60
-rw-r--r--drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_training.c1816
-rw-r--r--drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_training.h201
-rw-r--r--drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_training_128b_132b.c267
-rw-r--r--drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_training_128b_132b.h43
-rw-r--r--drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_training_8b_10b.c491
-rw-r--r--drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_training_8b_10b.h62
-rw-r--r--drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_training_auxless.c80
-rw-r--r--drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_training_auxless.h35
-rw-r--r--drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_training_dpia.c1046
-rw-r--r--drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_training_dpia.h60
-rw-r--r--drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_training_fixed_vs_pe_retimer.c553
-rw-r--r--drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_training_fixed_vs_pe_retimer.h45
-rw-r--r--drivers/gpu/drm/amd/display/dc/link/protocols/link_dpcd.c250
-rw-r--r--drivers/gpu/drm/amd/display/dc/link/protocols/link_dpcd.h42
-rw-r--r--drivers/gpu/drm/amd/display/dc/link/protocols/link_edp_panel_control.c1426
-rw-r--r--drivers/gpu/drm/amd/display/dc/link/protocols/link_edp_panel_control.h81
-rw-r--r--drivers/gpu/drm/amd/display/dc/link/protocols/link_hpd.c240
-rw-r--r--drivers/gpu/drm/amd/display/dc/link/protocols/link_hpd.h54
-rw-r--r--drivers/gpu/drm/amd/display/dc/mmhubbub/Makefile54
-rw-r--r--drivers/gpu/drm/amd/display/dc/mmhubbub/dcn20/dcn20_mmhubbub.c (renamed from drivers/gpu/drm/amd/display/dc/dcn20/dcn20_mmhubbub.c)7
-rw-r--r--drivers/gpu/drm/amd/display/dc/mmhubbub/dcn20/dcn20_mmhubbub.h (renamed from drivers/gpu/drm/amd/display/dc/dcn20/dcn20_mmhubbub.h)7
-rw-r--r--drivers/gpu/drm/amd/display/dc/mmhubbub/dcn32/dcn32_mmhubbub.c239
-rw-r--r--drivers/gpu/drm/amd/display/dc/mmhubbub/dcn32/dcn32_mmhubbub.h211
-rw-r--r--drivers/gpu/drm/amd/display/dc/mmhubbub/dcn35/dcn35_mmhubbub.c59
-rw-r--r--drivers/gpu/drm/amd/display/dc/mmhubbub/dcn35/dcn35_mmhubbub.h75
-rw-r--r--drivers/gpu/drm/amd/display/dc/mpc/Makefile72
-rw-r--r--drivers/gpu/drm/amd/display/dc/mpc/dcn10/dcn10_mpc.c (renamed from drivers/gpu/drm/amd/display/dc/dcn10/dcn10_mpc.c)36
-rw-r--r--drivers/gpu/drm/amd/display/dc/mpc/dcn10/dcn10_mpc.h (renamed from drivers/gpu/drm/amd/display/dc/dcn10/dcn10_mpc.h)4
-rw-r--r--drivers/gpu/drm/amd/display/dc/mpc/dcn20/dcn20_mpc.c (renamed from drivers/gpu/drm/amd/display/dc/dcn20/dcn20_mpc.c)87
-rw-r--r--drivers/gpu/drm/amd/display/dc/mpc/dcn20/dcn20_mpc.h (renamed from drivers/gpu/drm/amd/display/dc/dcn20/dcn20_mpc.h)0
-rw-r--r--drivers/gpu/drm/amd/display/dc/mpc/dcn30/dcn30_mpc.c (renamed from drivers/gpu/drm/amd/display/dc/dcn30/dcn30_mpc.c)308
-rw-r--r--drivers/gpu/drm/amd/display/dc/mpc/dcn30/dcn30_mpc.h1110
-rw-r--r--drivers/gpu/drm/amd/display/dc/mpc/dcn32/dcn32_mpc.c1052
-rw-r--r--drivers/gpu/drm/amd/display/dc/mpc/dcn32/dcn32_mpc.h402
-rw-r--r--drivers/gpu/drm/amd/display/dc/mpc/dcn401/dcn401_mpc.c635
-rw-r--r--drivers/gpu/drm/amd/display/dc/mpc/dcn401/dcn401_mpc.h257
-rw-r--r--drivers/gpu/drm/amd/display/dc/opp/Makefile51
-rw-r--r--drivers/gpu/drm/amd/display/dc/opp/dcn10/dcn10_opp.c (renamed from drivers/gpu/drm/amd/display/dc/dcn10/dcn10_opp.c)79
-rw-r--r--drivers/gpu/drm/amd/display/dc/opp/dcn10/dcn10_opp.h (renamed from drivers/gpu/drm/amd/display/dc/dcn10/dcn10_opp.h)8
-rw-r--r--drivers/gpu/drm/amd/display/dc/opp/dcn20/dcn20_opp.c (renamed from drivers/gpu/drm/amd/display/dc/dcn20/dcn20_opp.c)52
-rw-r--r--drivers/gpu/drm/amd/display/dc/opp/dcn20/dcn20_opp.h (renamed from drivers/gpu/drm/amd/display/dc/dcn20/dcn20_opp.h)12
-rw-r--r--drivers/gpu/drm/amd/display/dc/opp/dcn35/dcn35_opp.c66
-rw-r--r--drivers/gpu/drm/amd/display/dc/opp/dcn35/dcn35_opp.h69
-rw-r--r--drivers/gpu/drm/amd/display/dc/optc/Makefile114
-rw-r--r--drivers/gpu/drm/amd/display/dc/optc/dcn10/dcn10_optc.c (renamed from drivers/gpu/drm/amd/display/dc/dcn10/dcn10_optc.c)366
-rw-r--r--drivers/gpu/drm/amd/display/dc/optc/dcn10/dcn10_optc.h686
-rw-r--r--drivers/gpu/drm/amd/display/dc/optc/dcn20/dcn20_optc.c580
-rw-r--r--drivers/gpu/drm/amd/display/dc/optc/dcn20/dcn20_optc.h (renamed from drivers/gpu/drm/amd/display/dc/dcn20/dcn20_optc.h)25
-rw-r--r--drivers/gpu/drm/amd/display/dc/optc/dcn201/dcn201_optc.c198
-rw-r--r--drivers/gpu/drm/amd/display/dc/optc/dcn201/dcn201_optc.h71
-rw-r--r--drivers/gpu/drm/amd/display/dc/optc/dcn30/dcn30_optc.c438
-rw-r--r--drivers/gpu/drm/amd/display/dc/optc/dcn30/dcn30_optc.h (renamed from drivers/gpu/drm/amd/display/dc/dcn30/dcn30_optc.h)38
-rw-r--r--drivers/gpu/drm/amd/display/dc/optc/dcn301/dcn301_optc.c190
-rw-r--r--drivers/gpu/drm/amd/display/dc/optc/dcn301/dcn301_optc.h36
-rw-r--r--drivers/gpu/drm/amd/display/dc/optc/dcn31/dcn31_optc.c526
-rw-r--r--drivers/gpu/drm/amd/display/dc/optc/dcn31/dcn31_optc.h279
-rw-r--r--drivers/gpu/drm/amd/display/dc/optc/dcn314/dcn314_optc.c275
-rw-r--r--drivers/gpu/drm/amd/display/dc/optc/dcn314/dcn314_optc.h262
-rw-r--r--drivers/gpu/drm/amd/display/dc/optc/dcn32/dcn32_optc.c384
-rw-r--r--drivers/gpu/drm/amd/display/dc/optc/dcn32/dcn32_optc.h195
-rw-r--r--drivers/gpu/drm/amd/display/dc/optc/dcn35/dcn35_optc.c538
-rw-r--r--drivers/gpu/drm/amd/display/dc/optc/dcn35/dcn35_optc.h83
-rw-r--r--drivers/gpu/drm/amd/display/dc/optc/dcn401/dcn401_optc.c552
-rw-r--r--drivers/gpu/drm/amd/display/dc/optc/dcn401/dcn401_optc.h196
-rw-r--r--drivers/gpu/drm/amd/display/dc/os_types.h48
-rw-r--r--drivers/gpu/drm/amd/display/dc/pg/Makefile35
-rw-r--r--drivers/gpu/drm/amd/display/dc/pg/dcn35/dcn35_pg_cntl.c573
-rw-r--r--drivers/gpu/drm/amd/display/dc/pg/dcn35/dcn35_pg_cntl.h195
-rw-r--r--drivers/gpu/drm/amd/display/dc/resource/Makefile225
-rw-r--r--drivers/gpu/drm/amd/display/dc/resource/dce100/dce100_resource.c (renamed from drivers/gpu/drm/amd/display/dc/dce100/dce100_resource.c)77
-rw-r--r--drivers/gpu/drm/amd/display/dc/resource/dce100/dce100_resource.h (renamed from drivers/gpu/drm/amd/display/dc/dce100/dce100_resource.h)9
-rw-r--r--drivers/gpu/drm/amd/display/dc/resource/dce110/dce110_resource.c (renamed from drivers/gpu/drm/amd/display/dc/dce110/dce110_resource.c)43
-rw-r--r--drivers/gpu/drm/amd/display/dc/resource/dce110/dce110_resource.h (renamed from drivers/gpu/drm/amd/display/dc/dce110/dce110_resource.h)0
-rw-r--r--drivers/gpu/drm/amd/display/dc/resource/dce112/dce112_resource.c (renamed from drivers/gpu/drm/amd/display/dc/dce112/dce112_resource.c)56
-rw-r--r--drivers/gpu/drm/amd/display/dc/resource/dce112/dce112_resource.h (renamed from drivers/gpu/drm/amd/display/dc/dce112/dce112_resource.h)4
-rw-r--r--drivers/gpu/drm/amd/display/dc/resource/dce120/dce120_resource.c (renamed from drivers/gpu/drm/amd/display/dc/dce120/dce120_resource.c)49
-rw-r--r--drivers/gpu/drm/amd/display/dc/resource/dce120/dce120_resource.h (renamed from drivers/gpu/drm/amd/display/dc/dce120/dce120_resource.h)0
-rw-r--r--drivers/gpu/drm/amd/display/dc/resource/dce60/dce60_resource.c (renamed from drivers/gpu/drm/amd/display/dc/dce60/dce60_resource.c)131
-rw-r--r--drivers/gpu/drm/amd/display/dc/resource/dce60/dce60_resource.h (renamed from drivers/gpu/drm/amd/display/dc/dce60/dce60_resource.h)0
-rw-r--r--drivers/gpu/drm/amd/display/dc/resource/dce80/dce80_resource.c (renamed from drivers/gpu/drm/amd/display/dc/dce80/dce80_resource.c)119
-rw-r--r--drivers/gpu/drm/amd/display/dc/resource/dce80/dce80_resource.h (renamed from drivers/gpu/drm/amd/display/dc/dce80/dce80_resource.h)0
-rw-r--r--drivers/gpu/drm/amd/display/dc/resource/dcn10/dcn10_resource.c (renamed from drivers/gpu/drm/amd/display/dc/dcn10/dcn10_resource.c)358
-rw-r--r--drivers/gpu/drm/amd/display/dc/resource/dcn10/dcn10_resource.h (renamed from drivers/gpu/drm/amd/display/dc/dcn10/dcn10_resource.h)5
-rw-r--r--drivers/gpu/drm/amd/display/dc/resource/dcn20/dcn20_resource.c2782
-rw-r--r--drivers/gpu/drm/amd/display/dc/resource/dcn20/dcn20_resource.h (renamed from drivers/gpu/drm/amd/display/dc/dcn20/dcn20_resource.h)50
-rw-r--r--drivers/gpu/drm/amd/display/dc/resource/dcn201/dcn201_resource.c1318
-rw-r--r--drivers/gpu/drm/amd/display/dc/resource/dcn201/dcn201_resource.h50
-rw-r--r--drivers/gpu/drm/amd/display/dc/resource/dcn21/dcn21_resource.c1720
-rw-r--r--drivers/gpu/drm/amd/display/dc/resource/dcn21/dcn21_resource.h (renamed from drivers/gpu/drm/amd/display/dc/dcn21/dcn21_resource.h)11
-rw-r--r--drivers/gpu/drm/amd/display/dc/resource/dcn30/dcn30_resource.c2636
-rw-r--r--drivers/gpu/drm/amd/display/dc/resource/dcn30/dcn30_resource.h (renamed from drivers/gpu/drm/amd/display/dc/dcn30/dcn30_resource.h)23
-rw-r--r--drivers/gpu/drm/amd/display/dc/resource/dcn301/dcn301_resource.c (renamed from drivers/gpu/drm/amd/display/dc/dcn301/dcn301_resource.c)441
-rw-r--r--drivers/gpu/drm/amd/display/dc/resource/dcn301/dcn301_resource.h (renamed from drivers/gpu/drm/amd/display/dc/dcn301/dcn301_resource.h)3
-rw-r--r--drivers/gpu/drm/amd/display/dc/resource/dcn302/dcn302_resource.c1526
-rw-r--r--drivers/gpu/drm/amd/display/dc/resource/dcn302/dcn302_resource.h (renamed from drivers/gpu/drm/amd/display/dc/dcn302/dcn302_resource.h)3
-rw-r--r--drivers/gpu/drm/amd/display/dc/resource/dcn303/dcn303_resource.c1458
-rw-r--r--drivers/gpu/drm/amd/display/dc/resource/dcn303/dcn303_resource.h38
-rw-r--r--drivers/gpu/drm/amd/display/dc/resource/dcn31/dcn31_resource.c2273
-rw-r--r--drivers/gpu/drm/amd/display/dc/resource/dcn31/dcn31_resource.h106
-rw-r--r--drivers/gpu/drm/amd/display/dc/resource/dcn314/dcn314_resource.c2159
-rw-r--r--drivers/gpu/drm/amd/display/dc/resource/dcn314/dcn314_resource.h50
-rw-r--r--drivers/gpu/drm/amd/display/dc/resource/dcn315/dcn315_resource.c2181
-rw-r--r--drivers/gpu/drm/amd/display/dc/resource/dcn315/dcn315_resource.h44
-rw-r--r--drivers/gpu/drm/amd/display/dc/resource/dcn316/dcn316_resource.c2048
-rw-r--r--drivers/gpu/drm/amd/display/dc/resource/dcn316/dcn316_resource.h44
-rw-r--r--drivers/gpu/drm/amd/display/dc/resource/dcn32/dcn32_resource.c2931
-rw-r--r--drivers/gpu/drm/amd/display/dc/resource/dcn32/dcn32_resource.h1281
-rw-r--r--drivers/gpu/drm/amd/display/dc/resource/dcn32/dcn32_resource_helpers.c780
-rw-r--r--drivers/gpu/drm/amd/display/dc/resource/dcn321/dcn321_resource.c2078
-rw-r--r--drivers/gpu/drm/amd/display/dc/resource/dcn321/dcn321_resource.h45
-rw-r--r--drivers/gpu/drm/amd/display/dc/resource/dcn35/dcn35_resource.c2223
-rw-r--r--drivers/gpu/drm/amd/display/dc/resource/dcn35/dcn35_resource.h315
-rw-r--r--drivers/gpu/drm/amd/display/dc/resource/dcn351/dcn351_resource.c2196
-rw-r--r--drivers/gpu/drm/amd/display/dc/resource/dcn351/dcn351_resource.h23
-rw-r--r--drivers/gpu/drm/amd/display/dc/resource/dcn36/dcn36_resource.c2196
-rw-r--r--drivers/gpu/drm/amd/display/dc/resource/dcn36/dcn36_resource.h73
-rw-r--r--drivers/gpu/drm/amd/display/dc/resource/dcn401/dcn401_resource.c2282
-rw-r--r--drivers/gpu/drm/amd/display/dc/resource/dcn401/dcn401_resource.h656
-rw-r--r--drivers/gpu/drm/amd/display/dc/soc_and_ip_translator/Makefile19
-rw-r--r--drivers/gpu/drm/amd/display/dc/soc_and_ip_translator/dcn401/dcn401_soc_and_ip_translator.c304
-rw-r--r--drivers/gpu/drm/amd/display/dc/soc_and_ip_translator/dcn401/dcn401_soc_and_ip_translator.h22
-rw-r--r--drivers/gpu/drm/amd/display/dc/soc_and_ip_translator/dcn42/dcn42_soc_and_ip_translator.c27
-rw-r--r--drivers/gpu/drm/amd/display/dc/soc_and_ip_translator/dcn42/dcn42_soc_and_ip_translator.h16
-rw-r--r--drivers/gpu/drm/amd/display/dc/soc_and_ip_translator/soc_and_ip_translator.c37
-rw-r--r--drivers/gpu/drm/amd/display/dc/sspl/Makefile33
-rw-r--r--drivers/gpu/drm/amd/display/dc/sspl/dc_spl.c1925
-rw-r--r--drivers/gpu/drm/amd/display/dc/sspl/dc_spl.h27
-rw-r--r--drivers/gpu/drm/amd/display/dc/sspl/dc_spl_filters.c15
-rw-r--r--drivers/gpu/drm/amd/display/dc/sspl/dc_spl_filters.h15
-rw-r--r--drivers/gpu/drm/amd/display/dc/sspl/dc_spl_isharp_filters.c553
-rw-r--r--drivers/gpu/drm/amd/display/dc/sspl/dc_spl_isharp_filters.h42
-rw-r--r--drivers/gpu/drm/amd/display/dc/sspl/dc_spl_scl_easf_filters.c2586
-rw-r--r--drivers/gpu/drm/amd/display/dc/sspl/dc_spl_scl_easf_filters.h37
-rw-r--r--drivers/gpu/drm/amd/display/dc/sspl/dc_spl_scl_filters.c1233
-rw-r--r--drivers/gpu/drm/amd/display/dc/sspl/dc_spl_scl_filters.h13
-rw-r--r--drivers/gpu/drm/amd/display/dc/sspl/dc_spl_types.h560
-rw-r--r--drivers/gpu/drm/amd/display/dc/sspl/spl_custom_float.c151
-rw-r--r--drivers/gpu/drm/amd/display/dc/sspl/spl_custom_float.h29
-rw-r--r--drivers/gpu/drm/amd/display/dc/sspl/spl_debug.h30
-rw-r--r--drivers/gpu/drm/amd/display/dc/sspl/spl_fixpt31_32.c493
-rw-r--r--drivers/gpu/drm/amd/display/dc/sspl/spl_fixpt31_32.h522
-rw-r--r--drivers/gpu/drm/amd/display/dc/sspl/spl_os_types.h56
-rw-r--r--drivers/gpu/drm/amd/display/dc/virtual/Makefile2
-rw-r--r--drivers/gpu/drm/amd/display/dc/virtual/virtual_link_encoder.c5
-rw-r--r--drivers/gpu/drm/amd/display/dc/virtual/virtual_link_hwss.c56
-rw-r--r--drivers/gpu/drm/amd/display/dc/virtual/virtual_link_hwss.h35
-rw-r--r--drivers/gpu/drm/amd/display/dc/virtual/virtual_stream_encoder.c7
-rw-r--r--drivers/gpu/drm/amd/display/dmub/dmub_srv.h572
-rw-r--r--drivers/gpu/drm/amd/display/dmub/dmub_srv_stat.h41
-rw-r--r--drivers/gpu/drm/amd/display/dmub/inc/dmub_cmd.h6849
-rw-r--r--drivers/gpu/drm/amd/display/dmub/inc/dmub_trace_buffer.h3
-rw-r--r--drivers/gpu/drm/amd/display/dmub/src/Makefile11
-rw-r--r--drivers/gpu/drm/amd/display/dmub/src/dmub_dcn20.c141
-rw-r--r--drivers/gpu/drm/amd/display/dmub/src/dmub_dcn20.h57
-rw-r--r--drivers/gpu/drm/amd/display/dmub/src/dmub_dcn21.c11
-rw-r--r--drivers/gpu/drm/amd/display/dmub/src/dmub_dcn21.h4
-rw-r--r--drivers/gpu/drm/amd/display/dmub/src/dmub_dcn30.c15
-rw-r--r--drivers/gpu/drm/amd/display/dmub/src/dmub_dcn30.h3
-rw-r--r--drivers/gpu/drm/amd/display/dmub/src/dmub_dcn301.c5
-rw-r--r--drivers/gpu/drm/amd/display/dmub/src/dmub_dcn302.c5
-rw-r--r--drivers/gpu/drm/amd/display/dmub/src/dmub_dcn303.c59
-rw-r--r--drivers/gpu/drm/amd/display/dmub/src/dmub_dcn303.h38
-rw-r--r--drivers/gpu/drm/amd/display/dmub/src/dmub_dcn31.c496
-rw-r--r--drivers/gpu/drm/amd/display/dmub/src/dmub_dcn31.h258
-rw-r--r--drivers/gpu/drm/amd/display/dmub/src/dmub_dcn314.c67
-rw-r--r--drivers/gpu/drm/amd/display/dmub/src/dmub_dcn314.h35
-rw-r--r--drivers/gpu/drm/amd/display/dmub/src/dmub_dcn315.c62
-rw-r--r--drivers/gpu/drm/amd/display/dmub/src/dmub_dcn315.h68
-rw-r--r--drivers/gpu/drm/amd/display/dmub/src/dmub_dcn316.c62
-rw-r--r--drivers/gpu/drm/amd/display/dmub/src/dmub_dcn316.h33
-rw-r--r--drivers/gpu/drm/amd/display/dmub/src/dmub_dcn32.c560
-rw-r--r--drivers/gpu/drm/amd/display/dmub/src/dmub_dcn32.h273
-rw-r--r--drivers/gpu/drm/amd/display/dmub/src/dmub_dcn35.c611
-rw-r--r--drivers/gpu/drm/amd/display/dmub/src/dmub_dcn35.h290
-rw-r--r--drivers/gpu/drm/amd/display/dmub/src/dmub_dcn351.c34
-rw-r--r--drivers/gpu/drm/amd/display/dmub/src/dmub_dcn351.h13
-rw-r--r--drivers/gpu/drm/amd/display/dmub/src/dmub_dcn36.c34
-rw-r--r--drivers/gpu/drm/amd/display/dmub/src/dmub_dcn36.h13
-rw-r--r--drivers/gpu/drm/amd/display/dmub/src/dmub_dcn401.c678
-rw-r--r--drivers/gpu/drm/amd/display/dmub/src/dmub_dcn401.h290
-rw-r--r--drivers/gpu/drm/amd/display/dmub/src/dmub_reg.h1
-rw-r--r--drivers/gpu/drm/amd/display/dmub/src/dmub_srv.c1055
-rw-r--r--drivers/gpu/drm/amd/display/dmub/src/dmub_srv_stat.c128
-rw-r--r--drivers/gpu/drm/amd/display/include/audio_types.h17
-rw-r--r--drivers/gpu/drm/amd/display/include/bios_parser_types.h28
-rw-r--r--drivers/gpu/drm/amd/display/include/dal_asic_id.h66
-rw-r--r--drivers/gpu/drm/amd/display/include/dal_types.h13
-rw-r--r--drivers/gpu/drm/amd/display/include/ddc_service_types.h14
-rw-r--r--drivers/gpu/drm/amd/display/include/dpcd_defs.h61
-rw-r--r--drivers/gpu/drm/amd/display/include/fixed31_32.h10
-rw-r--r--drivers/gpu/drm/amd/display/include/gpio_service_interface.h7
-rw-r--r--drivers/gpu/drm/amd/display/include/grph_object_ctrl_defs.h28
-rw-r--r--drivers/gpu/drm/amd/display/include/grph_object_defs.h10
-rw-r--r--drivers/gpu/drm/amd/display/include/grph_object_id.h18
-rw-r--r--drivers/gpu/drm/amd/display/include/hdcp_msg_types.h108
-rw-r--r--drivers/gpu/drm/amd/display/include/hdcp_types.h103
-rw-r--r--drivers/gpu/drm/amd/display/include/i2caux_interface.h79
-rw-r--r--drivers/gpu/drm/amd/display/include/link_service_types.h123
-rw-r--r--drivers/gpu/drm/amd/display/include/logger_interface.h9
-rw-r--r--drivers/gpu/drm/amd/display/include/logger_types.h141
-rw-r--r--drivers/gpu/drm/amd/display/include/set_mode_types.h8
-rw-r--r--drivers/gpu/drm/amd/display/include/signal_types.h53
-rw-r--r--drivers/gpu/drm/amd/display/modules/color/color_gamma.c600
-rw-r--r--drivers/gpu/drm/amd/display/modules/color/color_gamma.h18
-rw-r--r--drivers/gpu/drm/amd/display/modules/freesync/freesync.c483
-rw-r--r--drivers/gpu/drm/amd/display/modules/hdcp/hdcp.c115
-rw-r--r--drivers/gpu/drm/amd/display/modules/hdcp/hdcp.h24
-rw-r--r--drivers/gpu/drm/amd/display/modules/hdcp/hdcp1_execution.c51
-rw-r--r--drivers/gpu/drm/amd/display/modules/hdcp/hdcp1_transition.c14
-rw-r--r--drivers/gpu/drm/amd/display/modules/hdcp/hdcp2_execution.c101
-rw-r--r--drivers/gpu/drm/amd/display/modules/hdcp/hdcp2_transition.c71
-rw-r--r--drivers/gpu/drm/amd/display/modules/hdcp/hdcp_ddc.c99
-rw-r--r--drivers/gpu/drm/amd/display/modules/hdcp/hdcp_log.c194
-rw-r--r--drivers/gpu/drm/amd/display/modules/hdcp/hdcp_log.h90
-rw-r--r--drivers/gpu/drm/amd/display/modules/hdcp/hdcp_psp.c209
-rw-r--r--drivers/gpu/drm/amd/display/modules/hdcp/hdcp_psp.h48
-rw-r--r--drivers/gpu/drm/amd/display/modules/inc/mod_freesync.h71
-rw-r--r--drivers/gpu/drm/amd/display/modules/inc/mod_hdcp.h212
-rw-r--r--drivers/gpu/drm/amd/display/modules/inc/mod_info_packet.h40
-rw-r--r--drivers/gpu/drm/amd/display/modules/inc/mod_stats.h6
-rw-r--r--drivers/gpu/drm/amd/display/modules/info_packet/info_packet.c121
-rw-r--r--drivers/gpu/drm/amd/display/modules/power/power_helpers.c362
-rw-r--r--drivers/gpu/drm/amd/display/modules/power/power_helpers.h37
-rw-r--r--drivers/gpu/drm/amd/display/modules/vmid/vmid.c2
-rw-r--r--drivers/gpu/drm/amd/include/aldebaran_ip_offset.h1738
-rw-r--r--drivers/gpu/drm/amd/include/amd_acpi.h22
-rw-r--r--drivers/gpu/drm/amd/include/amd_cper.h269
-rw-r--r--drivers/gpu/drm/amd/include/amd_pcie.h18
-rw-r--r--drivers/gpu/drm/amd/include/amd_shared.h310
-rw-r--r--drivers/gpu/drm/amd/include/amdgpu_reg_state.h153
-rw-r--r--drivers/gpu/drm/amd/include/arct_ip_offset.h6
-rw-r--r--drivers/gpu/drm/amd/include/asic_reg/athub/athub_1_8_0_offset.h411
-rw-r--r--drivers/gpu/drm/amd/include/asic_reg/athub/athub_1_8_0_sh_mask.h1807
-rw-r--r--drivers/gpu/drm/amd/include/asic_reg/athub/athub_3_0_0_offset.h259
-rw-r--r--drivers/gpu/drm/amd/include/asic_reg/athub/athub_3_0_0_sh_mask.h1246
-rw-r--r--drivers/gpu/drm/amd/include/asic_reg/athub/athub_4_1_0_offset.h287
-rw-r--r--drivers/gpu/drm/amd/include/asic_reg/athub/athub_4_1_0_sh_mask.h1348
-rw-r--r--drivers/gpu/drm/amd/include/asic_reg/clk/clk_11_0_1_offset.h32
-rw-r--r--drivers/gpu/drm/amd/include/asic_reg/clk/clk_11_0_1_sh_mask.h37
-rw-r--r--drivers/gpu/drm/amd/include/asic_reg/dce/dce_10_0_sh_mask.h2
-rw-r--r--drivers/gpu/drm/amd/include/asic_reg/dce/dce_11_0_d.h1
-rw-r--r--drivers/gpu/drm/amd/include/asic_reg/dce/dce_11_0_sh_mask.h4
-rw-r--r--drivers/gpu/drm/amd/include/asic_reg/dce/dce_11_2_sh_mask.h2
-rw-r--r--drivers/gpu/drm/amd/include/asic_reg/dce/dce_12_0_sh_mask.h2
-rw-r--r--drivers/gpu/drm/amd/include/asic_reg/dce/dce_6_0_d.h7
-rw-r--r--drivers/gpu/drm/amd/include/asic_reg/dce/dce_6_0_sh_mask.h6
-rw-r--r--drivers/gpu/drm/amd/include/asic_reg/dce/dce_8_0_sh_mask.h2
-rw-r--r--drivers/gpu/drm/amd/include/asic_reg/dcn/dcn_1_0_sh_mask.h2
-rw-r--r--drivers/gpu/drm/amd/include/asic_reg/dcn/dcn_2_0_0_sh_mask.h2
-rw-r--r--drivers/gpu/drm/amd/include/asic_reg/dcn/dcn_2_0_1_offset.h6193
-rw-r--r--drivers/gpu/drm/amd/include/asic_reg/dcn/dcn_2_0_1_sh_mask.h22091
-rw-r--r--drivers/gpu/drm/amd/include/asic_reg/dcn/dcn_2_1_0_sh_mask.h2
-rw-r--r--drivers/gpu/drm/amd/include/asic_reg/dcn/dcn_3_0_0_offset.h144
-rw-r--r--drivers/gpu/drm/amd/include/asic_reg/dcn/dcn_3_0_0_sh_mask.h78
-rw-r--r--drivers/gpu/drm/amd/include/asic_reg/dcn/dcn_3_0_1_sh_mask.h4
-rw-r--r--drivers/gpu/drm/amd/include/asic_reg/dcn/dcn_3_0_2_offset.h114
-rw-r--r--drivers/gpu/drm/amd/include/asic_reg/dcn/dcn_3_0_2_sh_mask.h59
-rw-r--r--drivers/gpu/drm/amd/include/asic_reg/dcn/dcn_3_0_3_offset.h8471
-rw-r--r--drivers/gpu/drm/amd/include/asic_reg/dcn/dcn_3_0_3_sh_mask.h35366
-rw-r--r--drivers/gpu/drm/amd/include/asic_reg/dcn/dcn_3_1_2_offset.h15089
-rw-r--r--drivers/gpu/drm/amd/include/asic_reg/dcn/dcn_3_1_2_sh_mask.h60782
-rw-r--r--drivers/gpu/drm/amd/include/asic_reg/dcn/dcn_3_1_4_offset.h15245
-rw-r--r--drivers/gpu/drm/amd/include/asic_reg/dcn/dcn_3_1_4_sh_mask.h61832
-rw-r--r--drivers/gpu/drm/amd/include/asic_reg/dcn/dcn_3_1_5_offset.h15195
-rw-r--r--drivers/gpu/drm/amd/include/asic_reg/dcn/dcn_3_1_5_sh_mask.h62071
-rw-r--r--drivers/gpu/drm/amd/include/asic_reg/dcn/dcn_3_1_6_offset.h15686
-rw-r--r--drivers/gpu/drm/amd/include/asic_reg/dcn/dcn_3_1_6_sh_mask.h62727
-rw-r--r--drivers/gpu/drm/amd/include/asic_reg/dcn/dcn_3_2_0_offset.h14737
-rw-r--r--drivers/gpu/drm/amd/include/asic_reg/dcn/dcn_3_2_0_sh_mask.h222948
-rw-r--r--drivers/gpu/drm/amd/include/asic_reg/dcn/dcn_3_2_1_offset.h14596
-rw-r--r--drivers/gpu/drm/amd/include/asic_reg/dcn/dcn_3_2_1_sh_mask.h56598
-rw-r--r--drivers/gpu/drm/amd/include/asic_reg/dcn/dcn_3_5_0_offset.h15279
-rw-r--r--drivers/gpu/drm/amd/include/asic_reg/dcn/dcn_3_5_0_sh_mask.h53485
-rw-r--r--drivers/gpu/drm/amd/include/asic_reg/dcn/dcn_3_5_1_offset.h15259
-rw-r--r--drivers/gpu/drm/amd/include/asic_reg/dcn/dcn_3_5_1_sh_mask.h53464
-rw-r--r--drivers/gpu/drm/amd/include/asic_reg/dcn/dcn_3_6_0_offset.h15485
-rw-r--r--drivers/gpu/drm/amd/include/asic_reg/dcn/dcn_3_6_0_sh_mask.h61940
-rw-r--r--drivers/gpu/drm/amd/include/asic_reg/dcn/dcn_4_1_0_offset.h16662
-rw-r--r--drivers/gpu/drm/amd/include/asic_reg/dcn/dcn_4_1_0_sh_mask.h145870
-rw-r--r--drivers/gpu/drm/amd/include/asic_reg/df/df_3_6_offset.h8
-rw-r--r--drivers/gpu/drm/amd/include/asic_reg/df/df_3_6_sh_mask.h133
-rw-r--r--drivers/gpu/drm/amd/include/asic_reg/df/df_4_15_offset.h28
-rw-r--r--drivers/gpu/drm/amd/include/asic_reg/df/df_4_15_sh_mask.h28
-rw-r--r--drivers/gpu/drm/amd/include/asic_reg/df/df_4_3_offset.h30
-rw-r--r--drivers/gpu/drm/amd/include/asic_reg/df/df_4_3_sh_mask.h157
-rw-r--r--drivers/gpu/drm/amd/include/asic_reg/dpcs/dpcs_2_0_3_offset.h151
-rw-r--r--drivers/gpu/drm/amd/include/asic_reg/dpcs/dpcs_2_0_3_sh_mask.h952
-rw-r--r--drivers/gpu/drm/amd/include/asic_reg/dpcs/dpcs_3_0_0_offset.h (renamed from drivers/gpu/drm/amd/include/asic_reg/dcn/dpcs_3_0_0_offset.h)31
-rw-r--r--drivers/gpu/drm/amd/include/asic_reg/dpcs/dpcs_3_0_0_sh_mask.h (renamed from drivers/gpu/drm/amd/include/asic_reg/dcn/dpcs_3_0_0_sh_mask.h)11
-rw-r--r--drivers/gpu/drm/amd/include/asic_reg/dpcs/dpcs_3_0_3_offset.h204
-rw-r--r--drivers/gpu/drm/amd/include/asic_reg/dpcs/dpcs_3_0_3_sh_mask.h1194
-rw-r--r--drivers/gpu/drm/amd/include/asic_reg/dpcs/dpcs_3_1_4_offset.h7215
-rw-r--r--drivers/gpu/drm/amd/include/asic_reg/dpcs/dpcs_3_1_4_sh_mask.h55194
-rw-r--r--drivers/gpu/drm/amd/include/asic_reg/dpcs/dpcs_4_2_0_offset.h11973
-rw-r--r--drivers/gpu/drm/amd/include/asic_reg/dpcs/dpcs_4_2_0_sh_mask.h103385
-rw-r--r--drivers/gpu/drm/amd/include/asic_reg/dpcs/dpcs_4_2_2_offset.h11957
-rw-r--r--drivers/gpu/drm/amd/include/asic_reg/dpcs/dpcs_4_2_2_sh_mask.h103633
-rw-r--r--drivers/gpu/drm/amd/include/asic_reg/dpcs/dpcs_4_2_3_offset.h11969
-rw-r--r--drivers/gpu/drm/amd/include/asic_reg/dpcs/dpcs_4_2_3_sh_mask.h136141
-rw-r--r--drivers/gpu/drm/amd/include/asic_reg/gc/gc_10_1_0_offset.h30
-rw-r--r--drivers/gpu/drm/amd/include/asic_reg/gc/gc_10_1_0_sh_mask.h123
-rw-r--r--drivers/gpu/drm/amd/include/asic_reg/gc/gc_10_3_0_offset.h128
-rw-r--r--drivers/gpu/drm/amd/include/asic_reg/gc/gc_10_3_0_sh_mask.h1536
-rw-r--r--drivers/gpu/drm/amd/include/asic_reg/gc/gc_11_0_0_default.h6114
-rw-r--r--drivers/gpu/drm/amd/include/asic_reg/gc/gc_11_0_0_offset.h11685
-rw-r--r--drivers/gpu/drm/amd/include/asic_reg/gc/gc_11_0_0_sh_mask.h41664
-rw-r--r--drivers/gpu/drm/amd/include/asic_reg/gc/gc_11_0_3_offset.h12094
-rw-r--r--drivers/gpu/drm/amd/include/asic_reg/gc/gc_11_0_3_sh_mask.h44690
-rw-r--r--drivers/gpu/drm/amd/include/asic_reg/gc/gc_11_5_0_offset.h10002
-rw-r--r--drivers/gpu/drm/amd/include/asic_reg/gc/gc_11_5_0_sh_mask.h36579
-rw-r--r--drivers/gpu/drm/amd/include/asic_reg/gc/gc_12_0_0_offset.h11061
-rw-r--r--drivers/gpu/drm/amd/include/asic_reg/gc/gc_12_0_0_sh_mask.h40550
-rw-r--r--drivers/gpu/drm/amd/include/asic_reg/gc/gc_9_0_sh_mask.h4
-rw-r--r--drivers/gpu/drm/amd/include/asic_reg/gc/gc_9_4_1_sh_mask.h16
-rw-r--r--drivers/gpu/drm/amd/include/asic_reg/gc/gc_9_4_2_offset.h7687
-rw-r--r--drivers/gpu/drm/amd/include/asic_reg/gc/gc_9_4_2_sh_mask.h33003
-rw-r--r--drivers/gpu/drm/amd/include/asic_reg/gc/gc_9_4_3_offset.h7450
-rw-r--r--drivers/gpu/drm/amd/include/asic_reg/gc/gc_9_4_3_sh_mask.h31649
-rw-r--r--drivers/gpu/drm/amd/include/asic_reg/gca/gfx_6_0_d.h2
-rw-r--r--drivers/gpu/drm/amd/include/asic_reg/hdp/hdp_4_0_sh_mask.h2
-rw-r--r--drivers/gpu/drm/amd/include/asic_reg/hdp/hdp_4_4_2_offset.h219
-rw-r--r--drivers/gpu/drm/amd/include/asic_reg/hdp/hdp_4_4_2_sh_mask.h663
-rw-r--r--drivers/gpu/drm/amd/include/asic_reg/hdp/hdp_5_2_1_offset.h217
-rw-r--r--drivers/gpu/drm/amd/include/asic_reg/hdp/hdp_5_2_1_sh_mask.h684
-rw-r--r--drivers/gpu/drm/amd/include/asic_reg/hdp/hdp_6_0_0_offset.h209
-rw-r--r--drivers/gpu/drm/amd/include/asic_reg/hdp/hdp_6_0_0_sh_mask.h646
-rw-r--r--drivers/gpu/drm/amd/include/asic_reg/hdp/hdp_7_0_0_offset.h219
-rw-r--r--drivers/gpu/drm/amd/include/asic_reg/hdp/hdp_7_0_0_sh_mask.h735
-rw-r--r--drivers/gpu/drm/amd/include/asic_reg/lsdma/lsdma_6_0_0_offset.h391
-rw-r--r--drivers/gpu/drm/amd/include/asic_reg/lsdma/lsdma_6_0_0_sh_mask.h1439
-rw-r--r--drivers/gpu/drm/amd/include/asic_reg/lsdma/lsdma_7_0_0_offset.h388
-rw-r--r--drivers/gpu/drm/amd/include/asic_reg/lsdma/lsdma_7_0_0_sh_mask.h1411
-rw-r--r--drivers/gpu/drm/amd/include/asic_reg/mmhub/mmhub_1_0_offset.h23
-rw-r--r--drivers/gpu/drm/amd/include/asic_reg/mmhub/mmhub_1_7_offset.h5125
-rw-r--r--drivers/gpu/drm/amd/include/asic_reg/mmhub/mmhub_1_7_sh_mask.h32178
-rw-r--r--drivers/gpu/drm/amd/include/asic_reg/mmhub/mmhub_1_8_0_offset.h3366
-rw-r--r--drivers/gpu/drm/amd/include/asic_reg/mmhub/mmhub_1_8_0_sh_mask.h22628
-rw-r--r--drivers/gpu/drm/amd/include/asic_reg/mmhub/mmhub_3_0_0_offset.h1529
-rw-r--r--drivers/gpu/drm/amd/include/asic_reg/mmhub/mmhub_3_0_0_sh_mask.h7478
-rw-r--r--drivers/gpu/drm/amd/include/asic_reg/mmhub/mmhub_3_0_1_offset.h1769
-rw-r--r--drivers/gpu/drm/amd/include/asic_reg/mmhub/mmhub_3_0_1_sh_mask.h7483
-rw-r--r--drivers/gpu/drm/amd/include/asic_reg/mmhub/mmhub_3_0_2_offset.h1425
-rw-r--r--drivers/gpu/drm/amd/include/asic_reg/mmhub/mmhub_3_0_2_sh_mask.h7228
-rw-r--r--drivers/gpu/drm/amd/include/asic_reg/mmhub/mmhub_3_3_0_offset.h1395
-rw-r--r--drivers/gpu/drm/amd/include/asic_reg/mmhub/mmhub_3_3_0_sh_mask.h6722
-rw-r--r--drivers/gpu/drm/amd/include/asic_reg/mmhub/mmhub_4_1_0_offset.h1341
-rw-r--r--drivers/gpu/drm/amd/include/asic_reg/mmhub/mmhub_4_1_0_sh_mask.h6943
-rw-r--r--drivers/gpu/drm/amd/include/asic_reg/mmhub/mmhub_9_4_1_offset.h36
-rw-r--r--drivers/gpu/drm/amd/include/asic_reg/mmhub/mmhub_9_4_1_sh_mask.h56
-rw-r--r--drivers/gpu/drm/amd/include/asic_reg/mp/mp_11_0_8_offset.h352
-rw-r--r--drivers/gpu/drm/amd/include/asic_reg/mp/mp_11_0_8_sh_mask.h355
-rw-r--r--drivers/gpu/drm/amd/include/asic_reg/mp/mp_11_0_offset.h7
-rw-r--r--drivers/gpu/drm/amd/include/asic_reg/mp/mp_11_0_sh_mask.h12
-rw-r--r--drivers/gpu/drm/amd/include/asic_reg/mp/mp_13_0_0_offset.h461
-rw-r--r--drivers/gpu/drm/amd/include/asic_reg/mp/mp_13_0_0_sh_mask.h682
-rw-r--r--drivers/gpu/drm/amd/include/asic_reg/mp/mp_13_0_2_offset.h409
-rw-r--r--drivers/gpu/drm/amd/include/asic_reg/mp/mp_13_0_2_sh_mask.h631
-rw-r--r--drivers/gpu/drm/amd/include/asic_reg/mp/mp_13_0_4_offset.h402
-rw-r--r--drivers/gpu/drm/amd/include/asic_reg/mp/mp_13_0_4_sh_mask.h595
-rw-r--r--drivers/gpu/drm/amd/include/asic_reg/mp/mp_13_0_5_offset.h455
-rw-r--r--drivers/gpu/drm/amd/include/asic_reg/mp/mp_13_0_5_sh_mask.h672
-rw-r--r--drivers/gpu/drm/amd/include/asic_reg/mp/mp_13_0_6_offset.h456
-rw-r--r--drivers/gpu/drm/amd/include/asic_reg/mp/mp_13_0_6_sh_mask.h702
-rw-r--r--drivers/gpu/drm/amd/include/asic_reg/mp/mp_13_0_8_offset.h410
-rw-r--r--drivers/gpu/drm/amd/include/asic_reg/mp/mp_13_0_8_sh_mask.h603
-rw-r--r--drivers/gpu/drm/amd/include/asic_reg/mp/mp_14_0_0_offset.h359
-rw-r--r--drivers/gpu/drm/amd/include/asic_reg/mp/mp_14_0_0_sh_mask.h534
-rw-r--r--drivers/gpu/drm/amd/include/asic_reg/mp/mp_14_0_2_offset.h468
-rw-r--r--drivers/gpu/drm/amd/include/asic_reg/mp/mp_14_0_2_sh_mask.h692
-rw-r--r--drivers/gpu/drm/amd/include/asic_reg/nbif/nbif_6_3_1_offset.h11287
-rw-r--r--drivers/gpu/drm/amd/include/asic_reg/nbif/nbif_6_3_1_sh_mask.h32806
-rw-r--r--drivers/gpu/drm/amd/include/asic_reg/nbio/nbio_4_3_0_offset.h17381
-rw-r--r--drivers/gpu/drm/amd/include/asic_reg/nbio/nbio_4_3_0_sh_mask.h82050
-rw-r--r--drivers/gpu/drm/amd/include/asic_reg/nbio/nbio_7_11_0_offset.h9406
-rw-r--r--drivers/gpu/drm/amd/include/asic_reg/nbio/nbio_7_11_0_sh_mask.h57899
-rw-r--r--drivers/gpu/drm/amd/include/asic_reg/nbio/nbio_7_2_0_offset.h2
-rw-r--r--drivers/gpu/drm/amd/include/asic_reg/nbio/nbio_7_2_0_sh_mask.h12
-rw-r--r--drivers/gpu/drm/amd/include/asic_reg/nbio/nbio_7_7_0_offset.h29660
-rw-r--r--drivers/gpu/drm/amd/include/asic_reg/nbio/nbio_7_7_0_sh_mask.h154426
-rw-r--r--drivers/gpu/drm/amd/include/asic_reg/nbio/nbio_7_9_0_offset.h10004
-rw-r--r--drivers/gpu/drm/amd/include/asic_reg/nbio/nbio_7_9_0_sh_mask.h38900
-rw-r--r--drivers/gpu/drm/amd/include/asic_reg/oss/oss_1_0_d.h23
-rw-r--r--drivers/gpu/drm/amd/include/asic_reg/oss/oss_1_0_sh_mask.h41
-rw-r--r--drivers/gpu/drm/amd/include/asic_reg/oss/osssys_4_2_0_offset.h6
-rw-r--r--drivers/gpu/drm/amd/include/asic_reg/oss/osssys_4_2_0_sh_mask.h11
-rw-r--r--drivers/gpu/drm/amd/include/asic_reg/oss/osssys_4_4_2_offset.h263
-rw-r--r--drivers/gpu/drm/amd/include/asic_reg/oss/osssys_4_4_2_sh_mask.h995
-rw-r--r--drivers/gpu/drm/amd/include/asic_reg/oss/osssys_6_0_0_offset.h267
-rw-r--r--drivers/gpu/drm/amd/include/asic_reg/oss/osssys_6_0_0_sh_mask.h979
-rw-r--r--drivers/gpu/drm/amd/include/asic_reg/oss/osssys_6_1_0_offset.h279
-rw-r--r--drivers/gpu/drm/amd/include/asic_reg/oss/osssys_6_1_0_sh_mask.h1019
-rw-r--r--drivers/gpu/drm/amd/include/asic_reg/oss/osssys_7_0_0_offset.h279
-rw-r--r--drivers/gpu/drm/amd/include/asic_reg/oss/osssys_7_0_0_sh_mask.h1029
-rw-r--r--drivers/gpu/drm/amd/include/asic_reg/pcie/pcie_6_1_0_offset.h630
-rw-r--r--drivers/gpu/drm/amd/include/asic_reg/pcie/pcie_6_1_0_sh_mask.h4250
-rw-r--r--drivers/gpu/drm/amd/include/asic_reg/sdma/sdma_4_4_0_offset.h5224
-rw-r--r--drivers/gpu/drm/amd/include/asic_reg/sdma/sdma_4_4_0_sh_mask.h13922
-rw-r--r--drivers/gpu/drm/amd/include/asic_reg/sdma/sdma_4_4_2_offset.h1113
-rw-r--r--drivers/gpu/drm/amd/include/asic_reg/sdma/sdma_4_4_2_sh_mask.h3300
-rw-r--r--drivers/gpu/drm/amd/include/asic_reg/smu/smu_6_0_d.h44
-rw-r--r--drivers/gpu/drm/amd/include/asic_reg/smu/smu_6_0_sh_mask.h188
-rw-r--r--drivers/gpu/drm/amd/include/asic_reg/smuio/smuio_10_0_2_offset.h102
-rw-r--r--drivers/gpu/drm/amd/include/asic_reg/smuio/smuio_10_0_2_sh_mask.h184
-rw-r--r--drivers/gpu/drm/amd/include/asic_reg/smuio/smuio_13_0_2_offset.h516
-rw-r--r--drivers/gpu/drm/amd/include/asic_reg/smuio/smuio_13_0_2_sh_mask.h1163
-rw-r--r--drivers/gpu/drm/amd/include/asic_reg/smuio/smuio_13_0_3_offset.h177
-rw-r--r--drivers/gpu/drm/amd/include/asic_reg/smuio/smuio_13_0_3_sh_mask.h428
-rw-r--r--drivers/gpu/drm/amd/include/asic_reg/smuio/smuio_13_0_6_offset.h517
-rw-r--r--drivers/gpu/drm/amd/include/asic_reg/smuio/smuio_13_0_6_sh_mask.h1178
-rw-r--r--drivers/gpu/drm/amd/include/asic_reg/smuio/smuio_14_0_2_offset.h511
-rw-r--r--drivers/gpu/drm/amd/include/asic_reg/smuio/smuio_14_0_2_sh_mask.h1106
-rw-r--r--drivers/gpu/drm/amd/include/asic_reg/thm/thm_11_0_2_offset.h6
-rw-r--r--drivers/gpu/drm/amd/include/asic_reg/thm/thm_11_0_2_sh_mask.h3
-rw-r--r--drivers/gpu/drm/amd/include/asic_reg/thm/thm_13_0_2_offset.h346
-rw-r--r--drivers/gpu/drm/amd/include/asic_reg/thm/thm_13_0_2_sh_mask.h1297
-rw-r--r--drivers/gpu/drm/amd/include/asic_reg/thm/thm_14_0_2_offset.h228
-rw-r--r--drivers/gpu/drm/amd/include/asic_reg/thm/thm_14_0_2_sh_mask.h940
-rw-r--r--drivers/gpu/drm/amd/include/asic_reg/umc/umc_12_0_0_offset.h33
-rw-r--r--drivers/gpu/drm/amd/include/asic_reg/umc/umc_12_0_0_sh_mask.h95
-rw-r--r--drivers/gpu/drm/amd/include/asic_reg/umc/umc_6_7_0_offset.h2626
-rw-r--r--drivers/gpu/drm/amd/include/asic_reg/umc/umc_6_7_0_sh_mask.h10796
-rw-r--r--drivers/gpu/drm/amd/include/asic_reg/umc/umc_8_10_0_offset.h35
-rw-r--r--drivers/gpu/drm/amd/include/asic_reg/umc/umc_8_10_0_sh_mask.h97
-rw-r--r--drivers/gpu/drm/amd/include/asic_reg/umc/umc_8_14_0_offset.h29
-rw-r--r--drivers/gpu/drm/amd/include/asic_reg/umc/umc_8_14_0_sh_mask.h37
-rw-r--r--drivers/gpu/drm/amd/include/asic_reg/umc/umc_8_7_0_sh_mask.h179
-rw-r--r--drivers/gpu/drm/amd/include/asic_reg/uvd/uvd_4_0_sh_mask.h2
-rw-r--r--drivers/gpu/drm/amd/include/asic_reg/vce/vce_1_0_d.h5
-rw-r--r--drivers/gpu/drm/amd/include/asic_reg/vce/vce_1_0_sh_mask.h10
-rw-r--r--drivers/gpu/drm/amd/include/asic_reg/vcn/vcn_2_5_offset.h14
-rw-r--r--drivers/gpu/drm/amd/include/asic_reg/vcn/vcn_2_5_sh_mask.h51
-rw-r--r--drivers/gpu/drm/amd/include/asic_reg/vcn/vcn_2_6_0_offset.h1462
-rw-r--r--drivers/gpu/drm/amd/include/asic_reg/vcn/vcn_2_6_0_sh_mask.h4535
-rw-r--r--drivers/gpu/drm/amd/include/asic_reg/vcn/vcn_4_0_0_offset.h2032
-rw-r--r--drivers/gpu/drm/amd/include/asic_reg/vcn/vcn_4_0_0_sh_mask.h8937
-rw-r--r--drivers/gpu/drm/amd/include/asic_reg/vcn/vcn_4_0_3_offset.h2367
-rw-r--r--drivers/gpu/drm/amd/include/asic_reg/vcn/vcn_4_0_3_sh_mask.h10919
-rw-r--r--drivers/gpu/drm/amd/include/asic_reg/vcn/vcn_4_0_5_offset.h1797
-rw-r--r--drivers/gpu/drm/amd/include/asic_reg/vcn/vcn_4_0_5_sh_mask.h8614
-rw-r--r--drivers/gpu/drm/amd/include/asic_reg/vcn/vcn_5_0_0_offset.h1694
-rw-r--r--drivers/gpu/drm/amd/include/asic_reg/vcn/vcn_5_0_0_sh_mask.h7666
-rw-r--r--drivers/gpu/drm/amd/include/asic_reg/vpe/vpe_6_1_0_offset.h1553
-rw-r--r--drivers/gpu/drm/amd/include/asic_reg/vpe/vpe_6_1_0_sh_mask.h4393
-rw-r--r--drivers/gpu/drm/amd/include/asic_reg/xgmi/xgmi_6_1_0_sh_mask.h87
-rw-r--r--drivers/gpu/drm/amd/include/atom-bits.h2
-rw-r--r--drivers/gpu/drm/amd/include/atombios.h62
-rw-r--r--drivers/gpu/drm/amd/include/atomfirmware.h904
-rw-r--r--drivers/gpu/drm/amd/include/beige_goby_ip_offset.h1272
-rw-r--r--drivers/gpu/drm/amd/include/cgs_common.h23
-rw-r--r--drivers/gpu/drm/amd/include/cyan_skillfish_ip_offset.h712
-rw-r--r--drivers/gpu/drm/amd/include/dimgrey_cavefish_ip_offset.h6
-rw-r--r--drivers/gpu/drm/amd/include/discovery.h301
-rw-r--r--drivers/gpu/drm/amd/include/dm_pp_interface.h10
-rw-r--r--drivers/gpu/drm/amd/include/ivsrcid/dcn/irqsrcs_dcn_1_0.h4
-rw-r--r--drivers/gpu/drm/amd/include/ivsrcid/gfx/irqsrcs_gfx_11_0_0.h80
-rw-r--r--drivers/gpu/drm/amd/include/ivsrcid/gfx/irqsrcs_gfx_12_0_0.h74
-rw-r--r--drivers/gpu/drm/amd/include/ivsrcid/isp/irqsrcs_isp_4_1.h62
-rw-r--r--drivers/gpu/drm/amd/include/ivsrcid/vcn/irqsrcs_vcn_2_0.h4
-rw-r--r--drivers/gpu/drm/amd/include/ivsrcid/vcn/irqsrcs_vcn_4_0.h44
-rw-r--r--drivers/gpu/drm/amd/include/ivsrcid/vcn/irqsrcs_vcn_5_0.h47
-rw-r--r--drivers/gpu/drm/amd/include/ivsrcid/vpe/irqsrcs_vpe_6_1.h40
-rw-r--r--drivers/gpu/drm/amd/include/kgd_kfd_interface.h149
-rw-r--r--drivers/gpu/drm/amd/include/kgd_pp_interface.h1410
-rw-r--r--drivers/gpu/drm/amd/include/mes_v11_api_def.h709
-rw-r--r--drivers/gpu/drm/amd/include/mes_v12_api_def.h907
-rw-r--r--drivers/gpu/drm/amd/include/navi10_enum.h2
-rw-r--r--drivers/gpu/drm/amd/include/navi12_ip_offset.h6
-rw-r--r--drivers/gpu/drm/amd/include/navi14_ip_offset.h6
-rw-r--r--drivers/gpu/drm/amd/include/pptable.h95
-rw-r--r--drivers/gpu/drm/amd/include/renoir_ip_offset.h6
-rw-r--r--drivers/gpu/drm/amd/include/sienna_cichlid_ip_offset.h6
-rw-r--r--drivers/gpu/drm/amd/include/soc15_hw_ip.h4
-rw-r--r--drivers/gpu/drm/amd/include/soc15_ih_clientid.h38
-rw-r--r--drivers/gpu/drm/amd/include/soc21_enum.h22477
-rw-r--r--drivers/gpu/drm/amd/include/soc24_enum.h21073
-rw-r--r--drivers/gpu/drm/amd/include/umsch_mm_4_0_api_def.h446
-rw-r--r--drivers/gpu/drm/amd/include/v10_structs.h3
-rw-r--r--drivers/gpu/drm/amd/include/v11_structs.h1189
-rw-r--r--drivers/gpu/drm/amd/include/v12_structs.h1189
-rw-r--r--drivers/gpu/drm/amd/include/v9_structs.h30
-rw-r--r--drivers/gpu/drm/amd/include/vangogh_ip_offset.h6
-rw-r--r--drivers/gpu/drm/amd/include/vega10_ip_offset.h6
-rw-r--r--drivers/gpu/drm/amd/include/vega20_ip_offset.h78
-rw-r--r--drivers/gpu/drm/amd/include/vi_structs.h32
-rw-r--r--drivers/gpu/drm/amd/include/yellow_carp_offset.h1365
-rw-r--r--drivers/gpu/drm/amd/pm/Makefile15
-rw-r--r--drivers/gpu/drm/amd/pm/amdgpu_dpm.c3157
-rw-r--r--drivers/gpu/drm/amd/pm/amdgpu_dpm_internal.c95
-rw-r--r--drivers/gpu/drm/amd/pm/amdgpu_pm.c4496
-rw-r--r--drivers/gpu/drm/amd/pm/inc/amdgpu_dpm.h462
-rw-r--r--drivers/gpu/drm/amd/pm/inc/amdgpu_dpm_internal.h28
-rw-r--r--drivers/gpu/drm/amd/pm/inc/amdgpu_pm.h47
-rw-r--r--drivers/gpu/drm/amd/pm/inc/amdgpu_smu.h1356
-rw-r--r--drivers/gpu/drm/amd/pm/inc/smu11_driver_if_sienna_cichlid.h1268
-rw-r--r--drivers/gpu/drm/amd/pm/inc/smu9_driver_if.h486
-rw-r--r--drivers/gpu/drm/amd/pm/inc/smu_types.h347
-rw-r--r--drivers/gpu/drm/amd/pm/inc/vega12/smu9_driver_if.h767
-rw-r--r--drivers/gpu/drm/amd/pm/legacy-dpm/Makefile32
-rw-r--r--drivers/gpu/drm/amd/pm/legacy-dpm/cik_dpm.h (renamed from drivers/gpu/drm/amd/pm/powerplay/cik_dpm.h)0
-rw-r--r--drivers/gpu/drm/amd/pm/legacy-dpm/kv_dpm.c (renamed from drivers/gpu/drm/amd/pm/powerplay/kv_dpm.c)265
-rw-r--r--drivers/gpu/drm/amd/pm/legacy-dpm/kv_dpm.h (renamed from drivers/gpu/drm/amd/pm/powerplay/kv_dpm.h)0
-rw-r--r--drivers/gpu/drm/amd/pm/legacy-dpm/kv_smc.c (renamed from drivers/gpu/drm/amd/pm/powerplay/kv_smc.c)0
-rw-r--r--drivers/gpu/drm/amd/pm/legacy-dpm/legacy_dpm.c1011
-rw-r--r--drivers/gpu/drm/amd/pm/legacy-dpm/legacy_dpm.h37
-rw-r--r--drivers/gpu/drm/amd/pm/legacy-dpm/ppsmc.h (renamed from drivers/gpu/drm/amd/pm/powerplay/ppsmc.h)0
-rw-r--r--drivers/gpu/drm/amd/pm/legacy-dpm/r600_dpm.h (renamed from drivers/gpu/drm/amd/pm/powerplay/r600_dpm.h)3
-rw-r--r--drivers/gpu/drm/amd/pm/legacy-dpm/si_dpm.c (renamed from drivers/gpu/drm/amd/pm/powerplay/si_dpm.c)990
-rw-r--r--drivers/gpu/drm/amd/pm/legacy-dpm/si_dpm.h465
-rw-r--r--drivers/gpu/drm/amd/pm/legacy-dpm/si_smc.c (renamed from drivers/gpu/drm/amd/pm/powerplay/si_smc.c)66
-rw-r--r--drivers/gpu/drm/amd/pm/legacy-dpm/sislands_smc.h (renamed from drivers/gpu/drm/amd/pm/powerplay/sislands_smc.h)107
-rw-r--r--drivers/gpu/drm/amd/pm/powerplay/Makefile4
-rw-r--r--drivers/gpu/drm/amd/pm/powerplay/amd_powerplay.c654
-rw-r--r--drivers/gpu/drm/amd/pm/powerplay/hwmgr/ci_baco.c21
-rw-r--r--drivers/gpu/drm/amd/pm/powerplay/hwmgr/common_baco.c2
-rw-r--r--drivers/gpu/drm/amd/pm/powerplay/hwmgr/fiji_baco.c24
-rw-r--r--drivers/gpu/drm/amd/pm/powerplay/hwmgr/hardwaremanager.c17
-rw-r--r--drivers/gpu/drm/amd/pm/powerplay/hwmgr/hwmgr.c6
-rw-r--r--drivers/gpu/drm/amd/pm/powerplay/hwmgr/polaris_baco.c30
-rw-r--r--drivers/gpu/drm/amd/pm/powerplay/hwmgr/pp_psm.c23
-rw-r--r--drivers/gpu/drm/amd/pm/powerplay/hwmgr/ppatomctrl.c483
-rw-r--r--drivers/gpu/drm/amd/pm/powerplay/hwmgr/ppatomctrl.h8
-rw-r--r--drivers/gpu/drm/amd/pm/powerplay/hwmgr/ppatomfwctrl.c82
-rw-r--r--drivers/gpu/drm/amd/pm/powerplay/hwmgr/ppatomfwctrl.h10
-rw-r--r--drivers/gpu/drm/amd/pm/powerplay/hwmgr/ppevvmath.h555
-rw-r--r--drivers/gpu/drm/amd/pm/powerplay/hwmgr/pptable_v1_0.h39
-rw-r--r--drivers/gpu/drm/amd/pm/powerplay/hwmgr/process_pptables_v1_0.c4
-rw-r--r--drivers/gpu/drm/amd/pm/powerplay/hwmgr/processpptables.c6
-rw-r--r--drivers/gpu/drm/amd/pm/powerplay/hwmgr/smu10_hwmgr.c160
-rw-r--r--drivers/gpu/drm/amd/pm/powerplay/hwmgr/smu10_hwmgr.h2
-rw-r--r--drivers/gpu/drm/amd/pm/powerplay/hwmgr/smu7_baco.c5
-rw-r--r--drivers/gpu/drm/amd/pm/powerplay/hwmgr/smu7_baco.h2
-rw-r--r--drivers/gpu/drm/amd/pm/powerplay/hwmgr/smu7_clockpowergating.c2
-rw-r--r--drivers/gpu/drm/amd/pm/powerplay/hwmgr/smu7_clockpowergating.h1
-rw-r--r--drivers/gpu/drm/amd/pm/powerplay/hwmgr/smu7_hwmgr.c351
-rw-r--r--drivers/gpu/drm/amd/pm/powerplay/hwmgr/smu7_powertune.c27
-rw-r--r--drivers/gpu/drm/amd/pm/powerplay/hwmgr/smu7_thermal.c33
-rw-r--r--drivers/gpu/drm/amd/pm/powerplay/hwmgr/smu7_thermal.h4
-rw-r--r--drivers/gpu/drm/amd/pm/powerplay/hwmgr/smu8_hwmgr.c70
-rw-r--r--drivers/gpu/drm/amd/pm/powerplay/hwmgr/smu9_baco.c5
-rw-r--r--drivers/gpu/drm/amd/pm/powerplay/hwmgr/smu9_baco.h2
-rw-r--r--drivers/gpu/drm/amd/pm/powerplay/hwmgr/smu_helper.c31
-rw-r--r--drivers/gpu/drm/amd/pm/powerplay/hwmgr/smu_helper.h15
-rw-r--r--drivers/gpu/drm/amd/pm/powerplay/hwmgr/tonga_baco.c30
-rw-r--r--drivers/gpu/drm/amd/pm/powerplay/hwmgr/vega10_baco.c26
-rw-r--r--drivers/gpu/drm/amd/pm/powerplay/hwmgr/vega10_hwmgr.c594
-rw-r--r--drivers/gpu/drm/amd/pm/powerplay/hwmgr/vega10_powertune.c130
-rw-r--r--drivers/gpu/drm/amd/pm/powerplay/hwmgr/vega10_pptable.h48
-rw-r--r--drivers/gpu/drm/amd/pm/powerplay/hwmgr/vega10_processpptables.c7
-rw-r--r--drivers/gpu/drm/amd/pm/powerplay/hwmgr/vega10_thermal.c43
-rw-r--r--drivers/gpu/drm/amd/pm/powerplay/hwmgr/vega10_thermal.h4
-rw-r--r--drivers/gpu/drm/amd/pm/powerplay/hwmgr/vega12_baco.c12
-rw-r--r--drivers/gpu/drm/amd/pm/powerplay/hwmgr/vega12_hwmgr.c167
-rw-r--r--drivers/gpu/drm/amd/pm/powerplay/hwmgr/vega12_hwmgr.h3
-rw-r--r--drivers/gpu/drm/amd/pm/powerplay/hwmgr/vega12_pptable.h11
-rw-r--r--drivers/gpu/drm/amd/pm/powerplay/hwmgr/vega12_processpptables.c6
-rw-r--r--drivers/gpu/drm/amd/pm/powerplay/hwmgr/vega12_thermal.c7
-rw-r--r--drivers/gpu/drm/amd/pm/powerplay/hwmgr/vega20_baco.c14
-rw-r--r--drivers/gpu/drm/amd/pm/powerplay/hwmgr/vega20_baco.h2
-rw-r--r--drivers/gpu/drm/amd/pm/powerplay/hwmgr/vega20_hwmgr.c220
-rw-r--r--drivers/gpu/drm/amd/pm/powerplay/hwmgr/vega20_hwmgr.h6
-rw-r--r--drivers/gpu/drm/amd/pm/powerplay/hwmgr/vega20_pptable.h19
-rw-r--r--drivers/gpu/drm/amd/pm/powerplay/hwmgr/vega20_processpptables.c575
-rw-r--r--drivers/gpu/drm/amd/pm/powerplay/hwmgr/vega20_thermal.c36
-rw-r--r--drivers/gpu/drm/amd/pm/powerplay/hwmgr/vega20_thermal.h4
-rw-r--r--drivers/gpu/drm/amd/pm/powerplay/inc/amd_powerplay.h (renamed from drivers/gpu/drm/amd/pm/inc/amd_powerplay.h)0
-rw-r--r--drivers/gpu/drm/amd/pm/powerplay/inc/cz_ppsmc.h (renamed from drivers/gpu/drm/amd/pm/inc/cz_ppsmc.h)0
-rw-r--r--drivers/gpu/drm/amd/pm/powerplay/inc/fiji_ppsmc.h (renamed from drivers/gpu/drm/amd/pm/inc/fiji_ppsmc.h)0
-rw-r--r--drivers/gpu/drm/amd/pm/powerplay/inc/hardwaremanager.h (renamed from drivers/gpu/drm/amd/pm/inc/hardwaremanager.h)6
-rw-r--r--drivers/gpu/drm/amd/pm/powerplay/inc/hwmgr.h (renamed from drivers/gpu/drm/amd/pm/inc/hwmgr.h)51
-rw-r--r--drivers/gpu/drm/amd/pm/powerplay/inc/polaris10_pwrvirus.h (renamed from drivers/gpu/drm/amd/pm/inc/polaris10_pwrvirus.h)0
-rw-r--r--drivers/gpu/drm/amd/pm/powerplay/inc/power_state.h (renamed from drivers/gpu/drm/amd/pm/inc/power_state.h)1
-rw-r--r--drivers/gpu/drm/amd/pm/powerplay/inc/pp_debug.h (renamed from drivers/gpu/drm/amd/pm/inc/pp_debug.h)0
-rw-r--r--drivers/gpu/drm/amd/pm/powerplay/inc/pp_endian.h (renamed from drivers/gpu/drm/amd/pm/inc/pp_endian.h)0
-rw-r--r--drivers/gpu/drm/amd/pm/powerplay/inc/pp_thermal.h (renamed from drivers/gpu/drm/amd/pm/inc/pp_thermal.h)6
-rw-r--r--drivers/gpu/drm/amd/pm/powerplay/inc/ppinterrupt.h (renamed from drivers/gpu/drm/amd/pm/inc/ppinterrupt.h)0
-rw-r--r--drivers/gpu/drm/amd/pm/powerplay/inc/rv_ppsmc.h (renamed from drivers/gpu/drm/amd/pm/inc/rv_ppsmc.h)1
-rw-r--r--drivers/gpu/drm/amd/pm/powerplay/inc/smu10.h (renamed from drivers/gpu/drm/amd/pm/inc/smu10.h)0
-rw-r--r--drivers/gpu/drm/amd/pm/powerplay/inc/smu10_driver_if.h (renamed from drivers/gpu/drm/amd/pm/inc/smu10_driver_if.h)0
-rw-r--r--drivers/gpu/drm/amd/pm/powerplay/inc/smu11_driver_if.h (renamed from drivers/gpu/drm/amd/pm/inc/smu11_driver_if.h)2
-rw-r--r--drivers/gpu/drm/amd/pm/powerplay/inc/smu7.h (renamed from drivers/gpu/drm/amd/pm/inc/smu7.h)6
-rw-r--r--drivers/gpu/drm/amd/pm/powerplay/inc/smu71.h (renamed from drivers/gpu/drm/amd/pm/inc/smu71.h)22
-rw-r--r--drivers/gpu/drm/amd/pm/powerplay/inc/smu71_discrete.h (renamed from drivers/gpu/drm/amd/pm/inc/smu71_discrete.h)0
-rw-r--r--drivers/gpu/drm/amd/pm/powerplay/inc/smu72.h (renamed from drivers/gpu/drm/amd/pm/inc/smu72.h)0
-rw-r--r--drivers/gpu/drm/amd/pm/powerplay/inc/smu72_discrete.h (renamed from drivers/gpu/drm/amd/pm/inc/smu72_discrete.h)0
-rw-r--r--drivers/gpu/drm/amd/pm/powerplay/inc/smu73.h (renamed from drivers/gpu/drm/amd/pm/inc/smu73.h)45
-rw-r--r--drivers/gpu/drm/amd/pm/powerplay/inc/smu73_discrete.h (renamed from drivers/gpu/drm/amd/pm/inc/smu73_discrete.h)73
-rw-r--r--drivers/gpu/drm/amd/pm/powerplay/inc/smu74.h (renamed from drivers/gpu/drm/amd/pm/inc/smu74.h)0
-rw-r--r--drivers/gpu/drm/amd/pm/powerplay/inc/smu74_discrete.h (renamed from drivers/gpu/drm/amd/pm/inc/smu74_discrete.h)0
-rw-r--r--drivers/gpu/drm/amd/pm/powerplay/inc/smu75.h (renamed from drivers/gpu/drm/amd/pm/inc/smu75.h)12
-rw-r--r--drivers/gpu/drm/amd/pm/powerplay/inc/smu75_discrete.h (renamed from drivers/gpu/drm/amd/pm/inc/smu75_discrete.h)0
-rw-r--r--drivers/gpu/drm/amd/pm/powerplay/inc/smu7_common.h (renamed from drivers/gpu/drm/amd/pm/inc/smu7_common.h)0
-rw-r--r--drivers/gpu/drm/amd/pm/powerplay/inc/smu7_discrete.h (renamed from drivers/gpu/drm/amd/pm/inc/smu7_discrete.h)0
-rw-r--r--drivers/gpu/drm/amd/pm/powerplay/inc/smu7_fusion.h (renamed from drivers/gpu/drm/amd/pm/inc/smu7_fusion.h)42
-rw-r--r--drivers/gpu/drm/amd/pm/powerplay/inc/smu7_ppsmc.h (renamed from drivers/gpu/drm/amd/pm/inc/smu7_ppsmc.h)0
-rw-r--r--drivers/gpu/drm/amd/pm/powerplay/inc/smu8.h (renamed from drivers/gpu/drm/amd/pm/inc/smu8.h)0
-rw-r--r--drivers/gpu/drm/amd/pm/powerplay/inc/smu8_fusion.h (renamed from drivers/gpu/drm/amd/pm/inc/smu8_fusion.h)0
-rw-r--r--drivers/gpu/drm/amd/pm/powerplay/inc/smu9.h (renamed from drivers/gpu/drm/amd/pm/inc/smu9.h)0
-rw-r--r--drivers/gpu/drm/amd/pm/powerplay/inc/smu9_driver_if.h488
-rw-r--r--drivers/gpu/drm/amd/pm/powerplay/inc/smu_ucode_xfer_cz.h (renamed from drivers/gpu/drm/amd/pm/inc/smu_ucode_xfer_cz.h)2
-rw-r--r--drivers/gpu/drm/amd/pm/powerplay/inc/smu_ucode_xfer_vi.h (renamed from drivers/gpu/drm/amd/pm/inc/smu_ucode_xfer_vi.h)0
-rw-r--r--drivers/gpu/drm/amd/pm/powerplay/inc/smumgr.h (renamed from drivers/gpu/drm/amd/pm/inc/smumgr.h)0
-rw-r--r--drivers/gpu/drm/amd/pm/powerplay/inc/tonga_ppsmc.h (renamed from drivers/gpu/drm/amd/pm/inc/tonga_ppsmc.h)0
-rw-r--r--drivers/gpu/drm/amd/pm/powerplay/inc/vega10_ppsmc.h (renamed from drivers/gpu/drm/amd/pm/inc/vega10_ppsmc.h)0
-rw-r--r--drivers/gpu/drm/amd/pm/powerplay/inc/vega12/smu9_driver_if.h769
-rw-r--r--drivers/gpu/drm/amd/pm/powerplay/inc/vega12_ppsmc.h (renamed from drivers/gpu/drm/amd/pm/inc/vega12_ppsmc.h)0
-rw-r--r--drivers/gpu/drm/amd/pm/powerplay/inc/vega20_ppsmc.h (renamed from drivers/gpu/drm/amd/pm/inc/vega20_ppsmc.h)0
-rw-r--r--drivers/gpu/drm/amd/pm/powerplay/si_dpm.h1015
-rw-r--r--drivers/gpu/drm/amd/pm/powerplay/smumgr/ci_smumgr.c10
-rw-r--r--drivers/gpu/drm/amd/pm/powerplay/smumgr/fiji_smumgr.c9
-rw-r--r--drivers/gpu/drm/amd/pm/powerplay/smumgr/iceland_smumgr.c12
-rw-r--r--drivers/gpu/drm/amd/pm/powerplay/smumgr/polaris10_smumgr.c11
-rw-r--r--drivers/gpu/drm/amd/pm/powerplay/smumgr/smu10_smumgr.c23
-rw-r--r--drivers/gpu/drm/amd/pm/powerplay/smumgr/smu7_smumgr.c49
-rw-r--r--drivers/gpu/drm/amd/pm/powerplay/smumgr/smu7_smumgr.h2
-rw-r--r--drivers/gpu/drm/amd/pm/powerplay/smumgr/smu9_smumgr.c2
-rw-r--r--drivers/gpu/drm/amd/pm/powerplay/smumgr/tonga_smumgr.c9
-rw-r--r--drivers/gpu/drm/amd/pm/powerplay/smumgr/vega10_smumgr.c10
-rw-r--r--drivers/gpu/drm/amd/pm/powerplay/smumgr/vega12_smumgr.c28
-rw-r--r--drivers/gpu/drm/amd/pm/powerplay/smumgr/vega20_smumgr.c16
-rw-r--r--drivers/gpu/drm/amd/pm/powerplay/smumgr/vegam_smumgr.c7
-rw-r--r--drivers/gpu/drm/amd/pm/swsmu/Makefile2
-rw-r--r--drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c3060
-rw-r--r--drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h1812
-rw-r--r--drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/aldebaran_ppsmc.h131
-rw-r--r--drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/arcturus_ppsmc.h (renamed from drivers/gpu/drm/amd/pm/inc/arcturus_ppsmc.h)7
-rw-r--r--drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu11_driver_if_arcturus.h (renamed from drivers/gpu/drm/amd/pm/inc/smu11_driver_if_arcturus.h)2
-rw-r--r--drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu11_driver_if_cyan_skillfish.h79
-rw-r--r--drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu11_driver_if_navi10.h (renamed from drivers/gpu/drm/amd/pm/inc/smu11_driver_if_navi10.h)100
-rw-r--r--drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu11_driver_if_sienna_cichlid.h1835
-rw-r--r--drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu11_driver_if_vangogh.h (renamed from drivers/gpu/drm/amd/pm/inc/smu11_driver_if_vangogh.h)44
-rw-r--r--drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu12_driver_if.h (renamed from drivers/gpu/drm/amd/pm/inc/smu12_driver_if.h)0
-rw-r--r--drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu13_driver_if_aldebaran.h557
-rw-r--r--drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu13_driver_if_v13_0_0.h1632
-rw-r--r--drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu13_driver_if_v13_0_4.h282
-rw-r--r--drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu13_driver_if_v13_0_5.h139
-rw-r--r--drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu13_driver_if_v13_0_6.h243
-rw-r--r--drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu13_driver_if_v13_0_7.h1622
-rw-r--r--drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu13_driver_if_yellow_carp.h222
-rw-r--r--drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu14_driver_if_v14_0.h1889
-rw-r--r--drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu14_driver_if_v14_0_0.h263
-rw-r--r--drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu_v11_0_7_ppsmc.h (renamed from drivers/gpu/drm/amd/pm/inc/smu_v11_0_7_ppsmc.h)4
-rw-r--r--drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu_v11_0_ppsmc.h (renamed from drivers/gpu/drm/amd/pm/inc/smu_v11_0_ppsmc.h)0
-rw-r--r--drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu_v11_5_pmfw.h (renamed from drivers/gpu/drm/amd/pm/inc/smu_v11_5_pmfw.h)0
-rw-r--r--drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu_v11_5_ppsmc.h (renamed from drivers/gpu/drm/amd/pm/inc/smu_v11_5_ppsmc.h)5
-rw-r--r--drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu_v11_8_pmfw.h152
-rw-r--r--drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu_v11_8_ppsmc.h77
-rw-r--r--drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu_v12_0_ppsmc.h (renamed from drivers/gpu/drm/amd/pm/inc/smu_v12_0_ppsmc.h)0
-rw-r--r--drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu_v13_0_0_ppsmc.h150
-rw-r--r--drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu_v13_0_12_pmfw.h378
-rw-r--r--drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu_v13_0_12_ppsmc.h146
-rw-r--r--drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu_v13_0_1_pmfw.h141
-rw-r--r--drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu_v13_0_1_ppsmc.h97
-rw-r--r--drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu_v13_0_4_pmfw.h137
-rw-r--r--drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu_v13_0_4_ppsmc.h137
-rw-r--r--drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu_v13_0_5_pmfw.h126
-rw-r--r--drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu_v13_0_5_ppsmc.h74
-rw-r--r--drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu_v13_0_6_pmfw.h471
-rw-r--r--drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu_v13_0_6_ppsmc.h123
-rw-r--r--drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu_v13_0_7_ppsmc.h140
-rw-r--r--drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu_v14_0_0_pmfw.h194
-rw-r--r--drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu_v14_0_0_ppsmc.h141
-rw-r--r--drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu_v14_0_2_ppsmc.h150
-rw-r--r--drivers/gpu/drm/amd/pm/swsmu/inc/smu_11_0_cdr_table.h (renamed from drivers/gpu/drm/amd/pm/inc/smu_11_0_cdr_table.h)6
-rw-r--r--drivers/gpu/drm/amd/pm/swsmu/inc/smu_types.h488
-rw-r--r--drivers/gpu/drm/amd/pm/swsmu/inc/smu_v11_0.h (renamed from drivers/gpu/drm/amd/pm/inc/smu_v11_0.h)70
-rw-r--r--drivers/gpu/drm/amd/pm/swsmu/inc/smu_v11_0_7_pptable.h (renamed from drivers/gpu/drm/amd/pm/inc/smu_v11_0_7_pptable.h)9
-rw-r--r--drivers/gpu/drm/amd/pm/swsmu/inc/smu_v11_0_pptable.h (renamed from drivers/gpu/drm/amd/pm/inc/smu_v11_0_pptable.h)24
-rw-r--r--drivers/gpu/drm/amd/pm/swsmu/inc/smu_v12_0.h (renamed from drivers/gpu/drm/amd/pm/inc/smu_v12_0.h)4
-rw-r--r--drivers/gpu/drm/amd/pm/swsmu/inc/smu_v13_0.h302
-rw-r--r--drivers/gpu/drm/amd/pm/swsmu/inc/smu_v13_0_0_pptable.h198
-rw-r--r--drivers/gpu/drm/amd/pm/swsmu/inc/smu_v13_0_7_pptable.h198
-rw-r--r--drivers/gpu/drm/amd/pm/swsmu/inc/smu_v13_0_pptable.h169
-rw-r--r--drivers/gpu/drm/amd/pm/swsmu/inc/smu_v14_0.h249
-rw-r--r--drivers/gpu/drm/amd/pm/swsmu/inc/smu_v14_0_2_pptable.h204
-rw-r--r--drivers/gpu/drm/amd/pm/swsmu/smu11/Makefile1
-rw-r--r--drivers/gpu/drm/amd/pm/swsmu/smu11/arcturus_ppt.c1566
-rw-r--r--drivers/gpu/drm/amd/pm/swsmu/smu11/cyan_skillfish_ppt.c606
-rw-r--r--drivers/gpu/drm/amd/pm/swsmu/smu11/cyan_skillfish_ppt.h29
-rw-r--r--drivers/gpu/drm/amd/pm/swsmu/smu11/navi10_ppt.c1776
-rw-r--r--drivers/gpu/drm/amd/pm/swsmu/smu11/sienna_cichlid_ppt.c2655
-rw-r--r--drivers/gpu/drm/amd/pm/swsmu/smu11/sienna_cichlid_ppt.h8
-rw-r--r--drivers/gpu/drm/amd/pm/swsmu/smu11/smu_v11_0.c552
-rw-r--r--drivers/gpu/drm/amd/pm/swsmu/smu11/vangogh_ppt.c1149
-rw-r--r--drivers/gpu/drm/amd/pm/swsmu/smu12/renoir_ppt.c406
-rw-r--r--drivers/gpu/drm/amd/pm/swsmu/smu12/smu_v12_0.c148
-rw-r--r--drivers/gpu/drm/amd/pm/swsmu/smu13/Makefile31
-rw-r--r--drivers/gpu/drm/amd/pm/swsmu/smu13/aldebaran_ppt.c2158
-rw-r--r--drivers/gpu/drm/amd/pm/swsmu/smu13/aldebaran_ppt.h72
-rw-r--r--drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0.c2510
-rw-r--r--drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c3302
-rw-r--r--drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.h28
-rw-r--r--drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_12_ppt.c1073
-rw-r--r--drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_4_ppt.c1148
-rw-r--r--drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_4_ppt.h28
-rw-r--r--drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_5_ppt.c1140
-rw-r--r--drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_5_ppt.h29
-rw-r--r--drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c4086
-rw-r--r--drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.h265
-rw-r--r--drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c2888
-rw-r--r--drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.h28
-rw-r--r--drivers/gpu/drm/amd/pm/swsmu/smu13/yellow_carp_ppt.c1367
-rw-r--r--drivers/gpu/drm/amd/pm/swsmu/smu13/yellow_carp_ppt.h28
-rw-r--r--drivers/gpu/drm/amd/pm/swsmu/smu14/Makefile30
-rw-r--r--drivers/gpu/drm/amd/pm/swsmu/smu14/smu_v14_0.c1977
-rw-r--r--drivers/gpu/drm/amd/pm/swsmu/smu14/smu_v14_0_0_ppt.c1723
-rw-r--r--drivers/gpu/drm/amd/pm/swsmu/smu14/smu_v14_0_0_ppt.h28
-rw-r--r--drivers/gpu/drm/amd/pm/swsmu/smu14/smu_v14_0_2_ppt.c2932
-rw-r--r--drivers/gpu/drm/amd/pm/swsmu/smu14/smu_v14_0_2_ppt.h28
-rw-r--r--drivers/gpu/drm/amd/pm/swsmu/smu_cmn.c867
-rw-r--r--drivers/gpu/drm/amd/pm/swsmu/smu_cmn.h192
-rw-r--r--drivers/gpu/drm/amd/pm/swsmu/smu_internal.h24
-rw-r--r--drivers/gpu/drm/amd/ras/Makefile34
-rw-r--r--drivers/gpu/drm/amd/ras/ras_mgr/Makefile33
-rw-r--r--drivers/gpu/drm/amd/ras/ras_mgr/amdgpu_ras_cmd.c285
-rw-r--r--drivers/gpu/drm/amd/ras/ras_mgr/amdgpu_ras_cmd.h54
-rw-r--r--drivers/gpu/drm/amd/ras/ras_mgr/amdgpu_ras_eeprom_i2c.c182
-rw-r--r--drivers/gpu/drm/amd/ras/ras_mgr/amdgpu_ras_eeprom_i2c.h27
-rw-r--r--drivers/gpu/drm/amd/ras/ras_mgr/amdgpu_ras_mgr.c648
-rw-r--r--drivers/gpu/drm/amd/ras/ras_mgr/amdgpu_ras_mgr.h83
-rw-r--r--drivers/gpu/drm/amd/ras/ras_mgr/amdgpu_ras_mp1_v13_0.c94
-rw-r--r--drivers/gpu/drm/amd/ras/ras_mgr/amdgpu_ras_mp1_v13_0.h30
-rw-r--r--drivers/gpu/drm/amd/ras/ras_mgr/amdgpu_ras_nbio_v7_9.c125
-rw-r--r--drivers/gpu/drm/amd/ras/ras_mgr/amdgpu_ras_nbio_v7_9.h30
-rw-r--r--drivers/gpu/drm/amd/ras/ras_mgr/amdgpu_ras_process.c190
-rw-r--r--drivers/gpu/drm/amd/ras/ras_mgr/amdgpu_ras_process.h41
-rw-r--r--drivers/gpu/drm/amd/ras/ras_mgr/amdgpu_ras_sys.c279
-rw-r--r--drivers/gpu/drm/amd/ras/ras_mgr/ras_sys.h110
-rw-r--r--drivers/gpu/drm/amd/ras/rascore/Makefile44
-rw-r--r--drivers/gpu/drm/amd/ras/rascore/ras.h370
-rw-r--r--drivers/gpu/drm/amd/ras/rascore/ras_aca.c672
-rw-r--r--drivers/gpu/drm/amd/ras/rascore/ras_aca.h164
-rw-r--r--drivers/gpu/drm/amd/ras/rascore/ras_aca_v1_0.c379
-rw-r--r--drivers/gpu/drm/amd/ras/rascore/ras_aca_v1_0.h71
-rw-r--r--drivers/gpu/drm/amd/ras/rascore/ras_cmd.c522
-rw-r--r--drivers/gpu/drm/amd/ras/rascore/ras_cmd.h426
-rw-r--r--drivers/gpu/drm/amd/ras/rascore/ras_core.c603
-rw-r--r--drivers/gpu/drm/amd/ras/rascore/ras_core_status.h37
-rw-r--r--drivers/gpu/drm/amd/ras/rascore/ras_cper.c315
-rw-r--r--drivers/gpu/drm/amd/ras/rascore/ras_cper.h304
-rw-r--r--drivers/gpu/drm/amd/ras/rascore/ras_eeprom.c1339
-rw-r--r--drivers/gpu/drm/amd/ras/rascore/ras_eeprom.h197
-rw-r--r--drivers/gpu/drm/amd/ras/rascore/ras_gfx.c70
-rw-r--r--drivers/gpu/drm/amd/ras/rascore/ras_gfx.h43
-rw-r--r--drivers/gpu/drm/amd/ras/rascore/ras_gfx_v9_0.c426
-rw-r--r--drivers/gpu/drm/amd/ras/rascore/ras_gfx_v9_0.h259
-rw-r--r--drivers/gpu/drm/amd/ras/rascore/ras_log_ring.c317
-rw-r--r--drivers/gpu/drm/amd/ras/rascore/ras_log_ring.h93
-rw-r--r--drivers/gpu/drm/amd/ras/rascore/ras_mp1.c81
-rw-r--r--drivers/gpu/drm/amd/ras/rascore/ras_mp1.h50
-rw-r--r--drivers/gpu/drm/amd/ras/rascore/ras_mp1_v13_0.c105
-rw-r--r--drivers/gpu/drm/amd/ras/rascore/ras_mp1_v13_0.h30
-rw-r--r--drivers/gpu/drm/amd/ras/rascore/ras_nbio.c96
-rw-r--r--drivers/gpu/drm/amd/ras/rascore/ras_nbio.h46
-rw-r--r--drivers/gpu/drm/amd/ras/rascore/ras_nbio_v7_9.c123
-rw-r--r--drivers/gpu/drm/amd/ras/rascore/ras_nbio_v7_9.h31
-rw-r--r--drivers/gpu/drm/amd/ras/rascore/ras_process.c322
-rw-r--r--drivers/gpu/drm/amd/ras/rascore/ras_process.h53
-rw-r--r--drivers/gpu/drm/amd/ras/rascore/ras_psp.c750
-rw-r--r--drivers/gpu/drm/amd/ras/rascore/ras_psp.h145
-rw-r--r--drivers/gpu/drm/amd/ras/rascore/ras_psp_v13_0.c46
-rw-r--r--drivers/gpu/drm/amd/ras/rascore/ras_psp_v13_0.h31
-rw-r--r--drivers/gpu/drm/amd/ras/rascore/ras_ta_if.h231
-rw-r--r--drivers/gpu/drm/amd/ras/rascore/ras_umc.c707
-rw-r--r--drivers/gpu/drm/amd/ras/rascore/ras_umc.h166
-rw-r--r--drivers/gpu/drm/amd/ras/rascore/ras_umc_v12_0.c511
-rw-r--r--drivers/gpu/drm/amd/ras/rascore/ras_umc_v12_0.h314
-rw-r--r--drivers/gpu/drm/arc/Kconfig10
-rw-r--r--drivers/gpu/drm/arc/Makefile3
-rw-r--r--drivers/gpu/drm/arc/arcpgu.h37
-rw-r--r--drivers/gpu/drm/arc/arcpgu_crtc.c217
-rw-r--r--drivers/gpu/drm/arc/arcpgu_drv.c224
-rw-r--r--drivers/gpu/drm/arc/arcpgu_hdmi.c48
-rw-r--r--drivers/gpu/drm/arc/arcpgu_regs.h31
-rw-r--r--drivers/gpu/drm/arc/arcpgu_sim.c108
-rw-r--r--drivers/gpu/drm/arm/Kconfig9
-rw-r--r--drivers/gpu/drm/arm/display/Kconfig4
-rw-r--r--drivers/gpu/drm/arm/display/include/malidp_utils.h5
-rw-r--r--drivers/gpu/drm/arm/display/komeda/Makefile4
-rw-r--r--drivers/gpu/drm/arm/display/komeda/d71/d71_component.c14
-rw-r--r--drivers/gpu/drm/arm/display/komeda/d71/d71_dev.c6
-rw-r--r--drivers/gpu/drm/arm/display/komeda/komeda_color_mgmt.c5
-rw-r--r--drivers/gpu/drm/arm/display/komeda/komeda_crtc.c86
-rw-r--r--drivers/gpu/drm/arm/display/komeda/komeda_dev.c16
-rw-r--r--drivers/gpu/drm/arm/display/komeda/komeda_drv.c82
-rw-r--r--drivers/gpu/drm/arm/display/komeda/komeda_format_caps.h11
-rw-r--r--drivers/gpu/drm/arm/display/komeda/komeda_framebuffer.c20
-rw-r--r--drivers/gpu/drm/arm/display/komeda/komeda_framebuffer.h1
-rw-r--r--drivers/gpu/drm/arm/display/komeda/komeda_kms.c73
-rw-r--r--drivers/gpu/drm/arm/display/komeda/komeda_kms.h9
-rw-r--r--drivers/gpu/drm/arm/display/komeda/komeda_pipeline.c23
-rw-r--r--drivers/gpu/drm/arm/display/komeda/komeda_pipeline_state.c56
-rw-r--r--drivers/gpu/drm/arm/display/komeda/komeda_plane.c46
-rw-r--r--drivers/gpu/drm/arm/display/komeda/komeda_wb_connector.c11
-rw-r--r--drivers/gpu/drm/arm/hdlcd_crtc.c85
-rw-r--r--drivers/gpu/drm/arm/hdlcd_drv.c246
-rw-r--r--drivers/gpu/drm/arm/hdlcd_drv.h3
-rw-r--r--drivers/gpu/drm/arm/malidp_crtc.c15
-rw-r--r--drivers/gpu/drm/arm/malidp_drv.c124
-rw-r--r--drivers/gpu/drm/arm/malidp_drv.h2
-rw-r--r--drivers/gpu/drm/arm/malidp_hw.c10
-rw-r--r--drivers/gpu/drm/arm/malidp_mw.c35
-rw-r--r--drivers/gpu/drm/arm/malidp_planes.c175
-rw-r--r--drivers/gpu/drm/arm/malidp_regs.h2
-rw-r--r--drivers/gpu/drm/armada/Kconfig2
-rw-r--r--drivers/gpu/drm/armada/Makefile3
-rw-r--r--drivers/gpu/drm/armada/armada_510.c1
-rw-r--r--drivers/gpu/drm/armada/armada_crtc.c31
-rw-r--r--drivers/gpu/drm/armada/armada_debugfs.c2
-rw-r--r--drivers/gpu/drm/armada/armada_drm.h13
-rw-r--r--drivers/gpu/drm/armada/armada_drv.c58
-rw-r--r--drivers/gpu/drm/armada/armada_fb.c14
-rw-r--r--drivers/gpu/drm/armada/armada_fb.h6
-rw-r--r--drivers/gpu/drm/armada/armada_fbdev.c112
-rw-r--r--drivers/gpu/drm/armada/armada_gem.c24
-rw-r--r--drivers/gpu/drm/armada/armada_overlay.c130
-rw-r--r--drivers/gpu/drm/armada/armada_plane.c148
-rw-r--r--drivers/gpu/drm/armada/armada_plane.h4
-rw-r--r--drivers/gpu/drm/aspeed/Kconfig3
-rw-r--r--drivers/gpu/drm/aspeed/aspeed_gfx.h9
-rw-r--r--drivers/gpu/drm/aspeed/aspeed_gfx_crtc.c26
-rw-r--r--drivers/gpu/drm/aspeed/aspeed_gfx_drv.c117
-rw-r--r--drivers/gpu/drm/aspeed/aspeed_gfx_out.c2
-rw-r--r--drivers/gpu/drm/ast/Kconfig9
-rw-r--r--drivers/gpu/drm/ast/Makefile21
-rw-r--r--drivers/gpu/drm/ast/ast_2000.c257
-rw-r--r--drivers/gpu/drm/ast/ast_2100.c480
-rw-r--r--drivers/gpu/drm/ast/ast_2200.c92
-rw-r--r--drivers/gpu/drm/ast/ast_2300.c1463
-rw-r--r--drivers/gpu/drm/ast/ast_2400.c100
-rw-r--r--drivers/gpu/drm/ast/ast_2500.c675
-rw-r--r--drivers/gpu/drm/ast/ast_2600.c116
-rw-r--r--drivers/gpu/drm/ast/ast_cursor.c414
-rw-r--r--drivers/gpu/drm/ast/ast_ddc.c187
-rw-r--r--drivers/gpu/drm/ast/ast_ddc.h11
-rw-r--r--drivers/gpu/drm/ast/ast_dp.c577
-rw-r--r--drivers/gpu/drm/ast/ast_dp501.c375
-rw-r--r--drivers/gpu/drm/ast/ast_dram_tables.h207
-rw-r--r--drivers/gpu/drm/ast/ast_drv.c398
-rw-r--r--drivers/gpu/drm/ast/ast_drv.h575
-rw-r--r--drivers/gpu/drm/ast/ast_main.c462
-rw-r--r--drivers/gpu/drm/ast/ast_mm.c67
-rw-r--r--drivers/gpu/drm/ast/ast_mode.c1336
-rw-r--r--drivers/gpu/drm/ast/ast_post.c2039
-rw-r--r--drivers/gpu/drm/ast/ast_post.h50
-rw-r--r--drivers/gpu/drm/ast/ast_reg.h78
-rw-r--r--drivers/gpu/drm/ast/ast_sil164.c126
-rw-r--r--drivers/gpu/drm/ast/ast_tables.h242
-rw-r--r--drivers/gpu/drm/ast/ast_vbios.c241
-rw-r--r--drivers/gpu/drm/ast/ast_vbios.h108
-rw-r--r--drivers/gpu/drm/ast/ast_vga.c126
-rw-r--r--drivers/gpu/drm/atmel-hlcdc/Kconfig6
-rw-r--r--drivers/gpu/drm/atmel-hlcdc/atmel_hlcdc_crtc.c216
-rw-r--r--drivers/gpu/drm/atmel-hlcdc/atmel_hlcdc_dc.c302
-rw-r--r--drivers/gpu/drm/atmel-hlcdc/atmel_hlcdc_dc.h140
-rw-r--r--drivers/gpu/drm/atmel-hlcdc/atmel_hlcdc_output.c4
-rw-r--r--drivers/gpu/drm/atmel-hlcdc/atmel_hlcdc_plane.c546
-rw-r--r--drivers/gpu/drm/bochs/Kconfig11
-rw-r--r--drivers/gpu/drm/bochs/Makefile4
-rw-r--r--drivers/gpu/drm/bochs/bochs.h97
-rw-r--r--drivers/gpu/drm/bochs/bochs_drv.c204
-rw-r--r--drivers/gpu/drm/bochs/bochs_hw.c300
-rw-r--r--drivers/gpu/drm/bochs/bochs_kms.c170
-rw-r--r--drivers/gpu/drm/bochs/bochs_mm.c24
-rw-r--r--drivers/gpu/drm/bridge/Kconfig233
-rw-r--r--drivers/gpu/drm/bridge/Makefile23
-rw-r--r--drivers/gpu/drm/bridge/adv7511/Kconfig5
-rw-r--r--drivers/gpu/drm/bridge/adv7511/adv7511.h100
-rw-r--r--drivers/gpu/drm/bridge/adv7511/adv7511_audio.c90
-rw-r--r--drivers/gpu/drm/bridge/adv7511/adv7511_cec.c210
-rw-r--r--drivers/gpu/drm/bridge/adv7511/adv7511_drv.c708
-rw-r--r--drivers/gpu/drm/bridge/adv7511/adv7533.c76
-rw-r--r--drivers/gpu/drm/bridge/analogix/Kconfig11
-rw-r--r--drivers/gpu/drm/bridge/analogix/analogix-anx6345.c103
-rw-r--r--drivers/gpu/drm/bridge/analogix/analogix-anx78xx.c122
-rw-r--r--drivers/gpu/drm/bridge/analogix/analogix-i2c-dptx.c4
-rw-r--r--drivers/gpu/drm/bridge/analogix/analogix_dp_core.c667
-rw-r--r--drivers/gpu/drm/bridge/analogix/analogix_dp_core.h37
-rw-r--r--drivers/gpu/drm/bridge/analogix/analogix_dp_reg.c223
-rw-r--r--drivers/gpu/drm/bridge/analogix/analogix_dp_reg.h9
-rw-r--r--drivers/gpu/drm/bridge/analogix/anx7625.c1646
-rw-r--r--drivers/gpu/drm/bridge/analogix/anx7625.h128
-rw-r--r--drivers/gpu/drm/bridge/aux-bridge.c150
-rw-r--r--drivers/gpu/drm/bridge/aux-hpd-bridge.c211
-rw-r--r--drivers/gpu/drm/bridge/cadence/Kconfig26
-rw-r--r--drivers/gpu/drm/bridge/cadence/Makefile5
-rw-r--r--drivers/gpu/drm/bridge/cadence/cdns-dsi-core.c1410
-rw-r--r--drivers/gpu/drm/bridge/cadence/cdns-dsi-core.h82
-rw-r--r--drivers/gpu/drm/bridge/cadence/cdns-dsi-j721e.c51
-rw-r--r--drivers/gpu/drm/bridge/cadence/cdns-dsi-j721e.h16
-rw-r--r--drivers/gpu/drm/bridge/cadence/cdns-mhdp8546-core.c423
-rw-r--r--drivers/gpu/drm/bridge/cadence/cdns-mhdp8546-core.h26
-rw-r--r--drivers/gpu/drm/bridge/cadence/cdns-mhdp8546-hdcp.c543
-rw-r--r--drivers/gpu/drm/bridge/cadence/cdns-mhdp8546-hdcp.h89
-rw-r--r--drivers/gpu/drm/bridge/cadence/cdns-mhdp8546-j721e.c9
-rw-r--r--drivers/gpu/drm/bridge/cadence/cdns-mhdp8546-j721e.h2
-rw-r--r--drivers/gpu/drm/bridge/cdns-dsi.c1305
-rw-r--r--drivers/gpu/drm/bridge/chipone-icn6211.c824
-rw-r--r--drivers/gpu/drm/bridge/chrontel-ch7033.c39
-rw-r--r--drivers/gpu/drm/bridge/cros-ec-anx7688.c189
-rw-r--r--drivers/gpu/drm/bridge/display-connector.c204
-rw-r--r--drivers/gpu/drm/bridge/fsl-ldb.c405
-rw-r--r--drivers/gpu/drm/bridge/imx/Kconfig102
-rw-r--r--drivers/gpu/drm/bridge/imx/Makefile11
-rw-r--r--drivers/gpu/drm/bridge/imx/imx-ldb-helper.c230
-rw-r--r--drivers/gpu/drm/bridge/imx/imx-ldb-helper.h95
-rw-r--r--drivers/gpu/drm/bridge/imx/imx-legacy-bridge.c91
-rw-r--r--drivers/gpu/drm/bridge/imx/imx8mp-hdmi-pai.c158
-rw-r--r--drivers/gpu/drm/bridge/imx/imx8mp-hdmi-pvi.c207
-rw-r--r--drivers/gpu/drm/bridge/imx/imx8mp-hdmi-tx.c213
-rw-r--r--drivers/gpu/drm/bridge/imx/imx8qm-ldb.c591
-rw-r--r--drivers/gpu/drm/bridge/imx/imx8qxp-ldb.c721
-rw-r--r--drivers/gpu/drm/bridge/imx/imx8qxp-pixel-combiner.c443
-rw-r--r--drivers/gpu/drm/bridge/imx/imx8qxp-pixel-link.c422
-rw-r--r--drivers/gpu/drm/bridge/imx/imx8qxp-pxl2dpi.c480
-rw-r--r--drivers/gpu/drm/bridge/imx/imx93-mipi-dsi.c915
-rw-r--r--drivers/gpu/drm/bridge/ite-it6263.c930
-rw-r--r--drivers/gpu/drm/bridge/ite-it6505.c3683
-rw-r--r--drivers/gpu/drm/bridge/ite-it66121.c1653
-rw-r--r--drivers/gpu/drm/bridge/lontium-lt8912b.c838
-rw-r--r--drivers/gpu/drm/bridge/lontium-lt9211.c799
-rw-r--r--drivers/gpu/drm/bridge/lontium-lt9611.c683
-rw-r--r--drivers/gpu/drm/bridge/lontium-lt9611uxc.c240
-rw-r--r--drivers/gpu/drm/bridge/lvds-codec.c95
-rw-r--r--drivers/gpu/drm/bridge/megachips-stdpxxxx-ge-b850v3-fw.c170
-rw-r--r--drivers/gpu/drm/bridge/microchip-lvds.c229
-rw-r--r--drivers/gpu/drm/bridge/nwl-dsi.c225
-rw-r--r--drivers/gpu/drm/bridge/nwl-dsi.h4
-rw-r--r--drivers/gpu/drm/bridge/nxp-ptn3460.c66
-rw-r--r--drivers/gpu/drm/bridge/panel.c287
-rw-r--r--drivers/gpu/drm/bridge/parade-ps8622.c34
-rw-r--r--drivers/gpu/drm/bridge/parade-ps8640.c687
-rw-r--r--drivers/gpu/drm/bridge/samsung-dsim.c2321
-rw-r--r--drivers/gpu/drm/bridge/sii902x.c336
-rw-r--r--drivers/gpu/drm/bridge/sii9234.c25
-rw-r--r--drivers/gpu/drm/bridge/sil-sii8620.c31
-rw-r--r--drivers/gpu/drm/bridge/simple-bridge.c84
-rw-r--r--drivers/gpu/drm/bridge/ssd2825.c775
-rw-r--r--drivers/gpu/drm/bridge/synopsys/Kconfig41
-rw-r--r--drivers/gpu/drm/bridge/synopsys/Makefile5
-rw-r--r--drivers/gpu/drm/bridge/synopsys/dw-dp.c2097
-rw-r--r--drivers/gpu/drm/bridge/synopsys/dw-hdmi-ahb-audio.c52
-rw-r--r--drivers/gpu/drm/bridge/synopsys/dw-hdmi-audio.h4
-rw-r--r--drivers/gpu/drm/bridge/synopsys/dw-hdmi-cec.c51
-rw-r--r--drivers/gpu/drm/bridge/synopsys/dw-hdmi-gp-audio.c200
-rw-r--r--drivers/gpu/drm/bridge/synopsys/dw-hdmi-i2s-audio.c21
-rw-r--r--drivers/gpu/drm/bridge/synopsys/dw-hdmi-qp.c1343
-rw-r--r--drivers/gpu/drm/bridge/synopsys/dw-hdmi-qp.h848
-rw-r--r--drivers/gpu/drm/bridge/synopsys/dw-hdmi.c508
-rw-r--r--drivers/gpu/drm/bridge/synopsys/dw-hdmi.h16
-rw-r--r--drivers/gpu/drm/bridge/synopsys/dw-mipi-dsi.c205
-rw-r--r--drivers/gpu/drm/bridge/synopsys/dw-mipi-dsi2.c1032
-rw-r--r--drivers/gpu/drm/bridge/tc358762.c111
-rw-r--r--drivers/gpu/drm/bridge/tc358764.c134
-rw-r--r--drivers/gpu/drm/bridge/tc358767.c1275
-rw-r--r--drivers/gpu/drm/bridge/tc358768.c570
-rw-r--r--drivers/gpu/drm/bridge/tc358775.c249
-rw-r--r--drivers/gpu/drm/bridge/tda998x_drv.c (renamed from drivers/gpu/drm/i2c/tda998x_drv.c)101
-rw-r--r--drivers/gpu/drm/bridge/thc63lvd1024.c38
-rw-r--r--drivers/gpu/drm/bridge/ti-dlpc3433.c417
-rw-r--r--drivers/gpu/drm/bridge/ti-sn65dsi83.c1038
-rw-r--r--drivers/gpu/drm/bridge/ti-sn65dsi86.c1883
-rw-r--r--drivers/gpu/drm/bridge/ti-tdp158.c115
-rw-r--r--drivers/gpu/drm/bridge/ti-tfp410.c104
-rw-r--r--drivers/gpu/drm/bridge/ti-tpd12s015.c25
-rw-r--r--drivers/gpu/drm/bridge/waveshare-dsi.c203
-rw-r--r--drivers/gpu/drm/ci/arm.config70
-rw-r--r--drivers/gpu/drm/ci/arm64.config210
-rw-r--r--drivers/gpu/drm/ci/build-igt.sh74
-rw-r--r--drivers/gpu/drm/ci/build.sh161
-rw-r--r--drivers/gpu/drm/ci/build.yml226
-rw-r--r--drivers/gpu/drm/ci/check-devicetrees.yml50
-rwxr-xr-xdrivers/gpu/drm/ci/check-patch.py57
-rw-r--r--drivers/gpu/drm/ci/container.yml77
-rwxr-xr-xdrivers/gpu/drm/ci/dt-binding-check.sh19
-rwxr-xr-xdrivers/gpu/drm/ci/dtbs-check.sh22
-rw-r--r--drivers/gpu/drm/ci/gitlab-ci.yml349
-rwxr-xr-xdrivers/gpu/drm/ci/igt_runner.sh101
-rw-r--r--drivers/gpu/drm/ci/image-tags.yml18
-rwxr-xr-xdrivers/gpu/drm/ci/kunit.sh16
-rw-r--r--drivers/gpu/drm/ci/kunit.yml37
-rwxr-xr-xdrivers/gpu/drm/ci/lava-submit.sh105
-rwxr-xr-xdrivers/gpu/drm/ci/setup-llvm-links.sh13
-rw-r--r--drivers/gpu/drm/ci/static-checks.yml12
-rw-r--r--drivers/gpu/drm/ci/test.yml471
-rw-r--r--drivers/gpu/drm/ci/x86_64.config112
-rw-r--r--drivers/gpu/drm/ci/xfails/amdgpu-stoney-fails.txt40
-rw-r--r--drivers/gpu/drm/ci/xfails/amdgpu-stoney-flakes.txt34
-rw-r--r--drivers/gpu/drm/ci/xfails/amdgpu-stoney-skips.txt40
-rw-r--r--drivers/gpu/drm/ci/xfails/i915-amly-fails.txt48
-rw-r--r--drivers/gpu/drm/ci/xfails/i915-amly-flakes.txt69
-rw-r--r--drivers/gpu/drm/ci/xfails/i915-amly-skips.txt27
-rw-r--r--drivers/gpu/drm/ci/xfails/i915-apl-fails.txt44
-rw-r--r--drivers/gpu/drm/ci/xfails/i915-apl-flakes.txt13
-rw-r--r--drivers/gpu/drm/ci/xfails/i915-apl-skips.txt31
-rw-r--r--drivers/gpu/drm/ci/xfails/i915-cml-fails.txt74
-rw-r--r--drivers/gpu/drm/ci/xfails/i915-cml-flakes.txt34
-rw-r--r--drivers/gpu/drm/ci/xfails/i915-cml-skips.txt27
-rw-r--r--drivers/gpu/drm/ci/xfails/i915-glk-fails.txt47
-rw-r--r--drivers/gpu/drm/ci/xfails/i915-glk-flakes.txt20
-rw-r--r--drivers/gpu/drm/ci/xfails/i915-glk-skips.txt328
-rw-r--r--drivers/gpu/drm/ci/xfails/i915-jsl-fails.txt46
-rw-r--r--drivers/gpu/drm/ci/xfails/i915-jsl-flakes.txt6
-rw-r--r--drivers/gpu/drm/ci/xfails/i915-jsl-skips.txt21
-rw-r--r--drivers/gpu/drm/ci/xfails/i915-kbl-fails.txt23
-rw-r--r--drivers/gpu/drm/ci/xfails/i915-kbl-flakes.txt6
-rw-r--r--drivers/gpu/drm/ci/xfails/i915-kbl-skips.txt152
-rw-r--r--drivers/gpu/drm/ci/xfails/i915-tgl-fails.txt30
-rw-r--r--drivers/gpu/drm/ci/xfails/i915-tgl-skips.txt51
-rw-r--r--drivers/gpu/drm/ci/xfails/i915-whl-fails.txt52
-rw-r--r--drivers/gpu/drm/ci/xfails/i915-whl-flakes.txt13
-rw-r--r--drivers/gpu/drm/ci/xfails/i915-whl-skips.txt24
-rw-r--r--drivers/gpu/drm/ci/xfails/mediatek-mt8173-fails.txt44
-rw-r--r--drivers/gpu/drm/ci/xfails/mediatek-mt8173-flakes.txt55
-rw-r--r--drivers/gpu/drm/ci/xfails/mediatek-mt8173-skips.txt23
-rw-r--r--drivers/gpu/drm/ci/xfails/mediatek-mt8183-fails.txt38
-rw-r--r--drivers/gpu/drm/ci/xfails/mediatek-mt8183-flakes.txt41
-rw-r--r--drivers/gpu/drm/ci/xfails/mediatek-mt8183-skips.txt23
-rw-r--r--drivers/gpu/drm/ci/xfails/meson-g12b-fails.txt12
-rw-r--r--drivers/gpu/drm/ci/xfails/meson-g12b-skips.txt18
-rw-r--r--drivers/gpu/drm/ci/xfails/msm-apq8016-fails.txt5
-rw-r--r--drivers/gpu/drm/ci/xfails/msm-apq8016-skips.txt17
-rw-r--r--drivers/gpu/drm/ci/xfails/msm-apq8096-fails.txt2
-rw-r--r--drivers/gpu/drm/ci/xfails/msm-apq8096-flakes.txt6
-rw-r--r--drivers/gpu/drm/ci/xfails/msm-apq8096-skips.txt28
-rw-r--r--drivers/gpu/drm/ci/xfails/msm-sc7180-trogdor-kingoftown-fails.txt17
-rw-r--r--drivers/gpu/drm/ci/xfails/msm-sc7180-trogdor-kingoftown-flakes.txt41
-rw-r--r--drivers/gpu/drm/ci/xfails/msm-sc7180-trogdor-kingoftown-skips.txt39
-rw-r--r--drivers/gpu/drm/ci/xfails/msm-sc7180-trogdor-lazor-limozeen-fails.txt17
-rw-r--r--drivers/gpu/drm/ci/xfails/msm-sc7180-trogdor-lazor-limozeen-flakes.txt20
-rw-r--r--drivers/gpu/drm/ci/xfails/msm-sc7180-trogdor-lazor-limozeen-skips.txt30
-rw-r--r--drivers/gpu/drm/ci/xfails/msm-sm8350-hdk-fails.txt10
-rw-r--r--drivers/gpu/drm/ci/xfails/msm-sm8350-hdk-flakes.txt6
-rw-r--r--drivers/gpu/drm/ci/xfails/msm-sm8350-hdk-skips.txt212
-rw-r--r--drivers/gpu/drm/ci/xfails/panfrost-g12b-fails.txt2
-rw-r--r--drivers/gpu/drm/ci/xfails/panfrost-g12b-skips.txt24
-rw-r--r--drivers/gpu/drm/ci/xfails/panfrost-mt8183-fails.txt2
-rw-r--r--drivers/gpu/drm/ci/xfails/panfrost-mt8183-skips.txt24
-rw-r--r--drivers/gpu/drm/ci/xfails/panfrost-rk3288-fails.txt2
-rw-r--r--drivers/gpu/drm/ci/xfails/panfrost-rk3288-skips.txt27
-rw-r--r--drivers/gpu/drm/ci/xfails/panfrost-rk3399-fails.txt2
-rw-r--r--drivers/gpu/drm/ci/xfails/panfrost-rk3399-flakes.txt6
-rw-r--r--drivers/gpu/drm/ci/xfails/panfrost-rk3399-skips.txt27
-rw-r--r--drivers/gpu/drm/ci/xfails/rockchip-rk3288-fails.txt9
-rw-r--r--drivers/gpu/drm/ci/xfails/rockchip-rk3288-flakes.txt34
-rw-r--r--drivers/gpu/drm/ci/xfails/rockchip-rk3288-skips.txt21
-rw-r--r--drivers/gpu/drm/ci/xfails/rockchip-rk3399-fails.txt74
-rw-r--r--drivers/gpu/drm/ci/xfails/rockchip-rk3399-flakes.txt146
-rw-r--r--drivers/gpu/drm/ci/xfails/rockchip-rk3399-skips.txt26
-rw-r--r--drivers/gpu/drm/ci/xfails/virtio_gpu-none-fails.txt190
-rw-r--r--drivers/gpu/drm/ci/xfails/virtio_gpu-none-skips.txt31
-rw-r--r--drivers/gpu/drm/ci/xfails/vkms-none-fails.txt25
-rw-r--r--drivers/gpu/drm/ci/xfails/vkms-none-flakes.txt118
-rw-r--r--drivers/gpu/drm/ci/xfails/vkms-none-skips.txt811
-rw-r--r--drivers/gpu/drm/clients/Kconfig123
-rw-r--r--drivers/gpu/drm/clients/Makefile8
-rw-r--r--drivers/gpu/drm/clients/drm_client_internal.h25
-rw-r--r--drivers/gpu/drm/clients/drm_client_setup.c98
-rw-r--r--drivers/gpu/drm/clients/drm_fbdev_client.c176
-rw-r--r--drivers/gpu/drm/clients/drm_log.c441
-rw-r--r--drivers/gpu/drm/display/Kconfig108
-rw-r--r--drivers/gpu/drm/display/Makefile31
-rw-r--r--drivers/gpu/drm/display/drm_bridge_connector.c887
-rw-r--r--drivers/gpu/drm/display/drm_display_helper_mod.c22
-rw-r--r--drivers/gpu/drm/display/drm_dp_aux_bus.c392
-rw-r--r--drivers/gpu/drm/display/drm_dp_aux_dev.c (renamed from drivers/gpu/drm/drm_dp_aux_dev.c)14
-rw-r--r--drivers/gpu/drm/display/drm_dp_cec.c (renamed from drivers/gpu/drm/drm_dp_cec.c)79
-rw-r--r--drivers/gpu/drm/display/drm_dp_dual_mode_helper.c (renamed from drivers/gpu/drm/drm_dp_dual_mode_helper.c)133
-rw-r--r--drivers/gpu/drm/display/drm_dp_helper.c4840
-rw-r--r--drivers/gpu/drm/display/drm_dp_helper_internal.h33
-rw-r--r--drivers/gpu/drm/display/drm_dp_mst_topology.c6192
-rw-r--r--drivers/gpu/drm/display/drm_dp_mst_topology_internal.h (renamed from drivers/gpu/drm/drm_dp_mst_topology_internal.h)4
-rw-r--r--drivers/gpu/drm/display/drm_dp_tunnel.c1951
-rw-r--r--drivers/gpu/drm/display/drm_dsc_helper.c1566
-rw-r--r--drivers/gpu/drm/display/drm_hdcp_helper.c421
-rw-r--r--drivers/gpu/drm/display/drm_hdmi_audio_helper.c195
-rw-r--r--drivers/gpu/drm/display/drm_hdmi_cec_helper.c193
-rw-r--r--drivers/gpu/drm/display/drm_hdmi_cec_notifier_helper.c65
-rw-r--r--drivers/gpu/drm/display/drm_hdmi_helper.c427
-rw-r--r--drivers/gpu/drm/display/drm_hdmi_state_helper.c1180
-rw-r--r--drivers/gpu/drm/display/drm_scdc_helper.c278
-rw-r--r--drivers/gpu/drm/drm_agpsupport.c446
-rw-r--r--drivers/gpu/drm/drm_atomic.c712
-rw-r--r--drivers/gpu/drm/drm_atomic_helper.c1228
-rw-r--r--drivers/gpu/drm/drm_atomic_state_helper.c176
-rw-r--r--drivers/gpu/drm/drm_atomic_uapi.c450
-rw-r--r--drivers/gpu/drm/drm_auth.c192
-rw-r--r--drivers/gpu/drm/drm_blend.c27
-rw-r--r--drivers/gpu/drm/drm_bridge.c785
-rw-r--r--drivers/gpu/drm/drm_bridge_connector.c380
-rw-r--r--drivers/gpu/drm/drm_bridge_helper.c60
-rw-r--r--drivers/gpu/drm/drm_buddy.c1326
-rw-r--r--drivers/gpu/drm/drm_bufs.c1609
-rw-r--r--drivers/gpu/drm/drm_cache.c174
-rw-r--r--drivers/gpu/drm/drm_client.c397
-rw-r--r--drivers/gpu/drm/drm_client_event.c214
-rw-r--r--drivers/gpu/drm/drm_client_modeset.c431
-rw-r--r--drivers/gpu/drm/drm_client_sysrq.c65
-rw-r--r--drivers/gpu/drm/drm_color_mgmt.c297
-rw-r--r--drivers/gpu/drm/drm_colorop.c599
-rw-r--r--drivers/gpu/drm/drm_connector.c1624
-rw-r--r--drivers/gpu/drm/drm_context.c524
-rw-r--r--drivers/gpu/drm/drm_crtc.c230
-rw-r--r--drivers/gpu/drm/drm_crtc_helper.c152
-rw-r--r--drivers/gpu/drm/drm_crtc_helper_internal.h42
-rw-r--r--drivers/gpu/drm/drm_crtc_internal.h65
-rw-r--r--drivers/gpu/drm/drm_damage_helper.c77
-rw-r--r--drivers/gpu/drm/drm_debugfs.c622
-rw-r--r--drivers/gpu/drm/drm_debugfs_crc.c9
-rw-r--r--drivers/gpu/drm/drm_displayid.c204
-rw-r--r--drivers/gpu/drm/drm_displayid_internal.h185
-rw-r--r--drivers/gpu/drm/drm_dma.c172
-rw-r--r--drivers/gpu/drm/drm_dp_helper.c3084
-rw-r--r--drivers/gpu/drm/drm_dp_mst_topology.c5874
-rw-r--r--drivers/gpu/drm/drm_draw.c157
-rw-r--r--drivers/gpu/drm/drm_draw_internal.h56
-rw-r--r--drivers/gpu/drm/drm_drv.c467
-rw-r--r--drivers/gpu/drm/drm_dsc.c408
-rw-r--r--drivers/gpu/drm/drm_dumb_buffers.c176
-rw-r--r--drivers/gpu/drm/drm_edid.c4821
-rw-r--r--drivers/gpu/drm/drm_edid_load.c272
-rw-r--r--drivers/gpu/drm/drm_eld.c57
-rw-r--r--drivers/gpu/drm/drm_encoder.c81
-rw-r--r--drivers/gpu/drm/drm_encoder_slave.c182
-rw-r--r--drivers/gpu/drm/drm_exec.c341
-rw-r--r--drivers/gpu/drm/drm_fb_cma_helper.c99
-rw-r--r--drivers/gpu/drm/drm_fb_dma_helper.c197
-rw-r--r--drivers/gpu/drm/drm_fb_helper.c1554
-rw-r--r--drivers/gpu/drm/drm_fbdev_dma.c323
-rw-r--r--drivers/gpu/drm/drm_fbdev_shmem.c203
-rw-r--r--drivers/gpu/drm/drm_fbdev_ttm.c234
-rw-r--r--drivers/gpu/drm/drm_file.c549
-rw-r--r--drivers/gpu/drm/drm_flip_work.c28
-rw-r--r--drivers/gpu/drm/drm_format_helper.c1472
-rw-r--r--drivers/gpu/drm/drm_format_internal.h174
-rw-r--r--drivers/gpu/drm/drm_fourcc.c175
-rw-r--r--drivers/gpu/drm/drm_framebuffer.c326
-rw-r--r--drivers/gpu/drm/drm_gem.c725
-rw-r--r--drivers/gpu/drm/drm_gem_atomic_helper.c478
-rw-r--r--drivers/gpu/drm/drm_gem_cma_helper.c558
-rw-r--r--drivers/gpu/drm/drm_gem_dma_helper.c608
-rw-r--r--drivers/gpu/drm/drm_gem_framebuffer_helper.c329
-rw-r--r--drivers/gpu/drm/drm_gem_shmem_helper.c765
-rw-r--r--drivers/gpu/drm/drm_gem_ttm_helper.c47
-rw-r--r--drivers/gpu/drm/drm_gem_vram_helper.c484
-rw-r--r--drivers/gpu/drm/drm_gpusvm.c1635
-rw-r--r--drivers/gpu/drm/drm_gpuvm.c3207
-rw-r--r--drivers/gpu/drm/drm_hashtab.c209
-rw-r--r--drivers/gpu/drm/drm_hdcp.c423
-rw-r--r--drivers/gpu/drm/drm_internal.h111
-rw-r--r--drivers/gpu/drm/drm_ioc32.c665
-rw-r--r--drivers/gpu/drm/drm_ioctl.c229
-rw-r--r--drivers/gpu/drm/drm_irq.c292
-rw-r--r--drivers/gpu/drm/drm_kms_helper_common.c56
-rw-r--r--drivers/gpu/drm/drm_lease.c353
-rw-r--r--drivers/gpu/drm/drm_legacy.h214
-rw-r--r--drivers/gpu/drm/drm_legacy_misc.c107
-rw-r--r--drivers/gpu/drm/drm_lock.c373
-rw-r--r--drivers/gpu/drm/drm_managed.c65
-rw-r--r--drivers/gpu/drm/drm_memory.c139
-rw-r--r--drivers/gpu/drm/drm_mipi_dbi.c389
-rw-r--r--drivers/gpu/drm/drm_mipi_dsi.c936
-rw-r--r--drivers/gpu/drm/drm_mm.c51
-rw-r--r--drivers/gpu/drm/drm_mode_config.c48
-rw-r--r--drivers/gpu/drm/drm_mode_object.c81
-rw-r--r--drivers/gpu/drm/drm_modes.c819
-rw-r--r--drivers/gpu/drm/drm_modeset_helper.c106
-rw-r--r--drivers/gpu/drm/drm_modeset_lock.c62
-rw-r--r--drivers/gpu/drm/drm_of.c254
-rw-r--r--drivers/gpu/drm/drm_pagemap.c882
-rw-r--r--drivers/gpu/drm/drm_panel.c462
-rw-r--r--drivers/gpu/drm/drm_panel_backlight_quirks.c156
-rw-r--r--drivers/gpu/drm/drm_panel_orientation_quirks.c336
-rw-r--r--drivers/gpu/drm/drm_panic.c1037
-rw-r--r--drivers/gpu/drm/drm_panic_qr.rs1016
-rw-r--r--drivers/gpu/drm/drm_pci.c268
-rw-r--r--drivers/gpu/drm/drm_plane.c640
-rw-r--r--drivers/gpu/drm/drm_plane_helper.c101
-rw-r--r--drivers/gpu/drm/drm_prime.c267
-rw-r--r--drivers/gpu/drm/drm_print.c176
-rw-r--r--drivers/gpu/drm/drm_privacy_screen.c472
-rw-r--r--drivers/gpu/drm/drm_privacy_screen_x86.c106
-rw-r--r--drivers/gpu/drm/drm_probe_helper.c808
-rw-r--r--drivers/gpu/drm/drm_property.c74
-rw-r--r--drivers/gpu/drm/drm_rect.c3
-rw-r--r--drivers/gpu/drm/drm_scatter.c220
-rw-r--r--drivers/gpu/drm/drm_scdc_helper.c249
-rw-r--r--drivers/gpu/drm/drm_self_refresh_helper.c1
-rw-r--r--drivers/gpu/drm/drm_simple_kms_helper.c152
-rw-r--r--drivers/gpu/drm/drm_suballoc.c459
-rw-r--r--drivers/gpu/drm/drm_syncobj.c381
-rw-r--r--drivers/gpu/drm/drm_sysfs.c324
-rw-r--r--drivers/gpu/drm/drm_vblank.c517
-rw-r--r--drivers/gpu/drm/drm_vblank_helper.c176
-rw-r--r--drivers/gpu/drm/drm_vblank_work.c31
-rw-r--r--drivers/gpu/drm/drm_vm.c667
-rw-r--r--drivers/gpu/drm/drm_vma_manager.c79
-rw-r--r--drivers/gpu/drm/drm_writeback.c242
-rw-r--r--drivers/gpu/drm/etnaviv/cmdstream.xml.h52
-rw-r--r--drivers/gpu/drm/etnaviv/common.xml.h12
-rw-r--r--drivers/gpu/drm/etnaviv/etnaviv_buffer.c20
-rw-r--r--drivers/gpu/drm/etnaviv/etnaviv_cmd_parser.c1
-rw-r--r--drivers/gpu/drm/etnaviv/etnaviv_cmdbuf.c10
-rw-r--r--drivers/gpu/drm/etnaviv/etnaviv_drv.c195
-rw-r--r--drivers/gpu/drm/etnaviv/etnaviv_drv.h13
-rw-r--r--drivers/gpu/drm/etnaviv/etnaviv_dump.c32
-rw-r--r--drivers/gpu/drm/etnaviv/etnaviv_gem.c146
-rw-r--r--drivers/gpu/drm/etnaviv/etnaviv_gem.h14
-rw-r--r--drivers/gpu/drm/etnaviv/etnaviv_gem_prime.c45
-rw-r--r--drivers/gpu/drm/etnaviv/etnaviv_gem_submit.c125
-rw-r--r--drivers/gpu/drm/etnaviv/etnaviv_gpu.c564
-rw-r--r--drivers/gpu/drm/etnaviv/etnaviv_gpu.h55
-rw-r--r--drivers/gpu/drm/etnaviv/etnaviv_hwdb.c233
-rw-r--r--drivers/gpu/drm/etnaviv/etnaviv_iommu.c4
-rw-r--r--drivers/gpu/drm/etnaviv/etnaviv_iommu_v2.c8
-rw-r--r--drivers/gpu/drm/etnaviv/etnaviv_mmu.c135
-rw-r--r--drivers/gpu/drm/etnaviv/etnaviv_mmu.h6
-rw-r--r--drivers/gpu/drm/etnaviv/etnaviv_perfmon.c8
-rw-r--r--drivers/gpu/drm/etnaviv/etnaviv_sched.c147
-rw-r--r--drivers/gpu/drm/etnaviv/etnaviv_sched.h3
-rw-r--r--drivers/gpu/drm/etnaviv/state.xml.h103
-rw-r--r--drivers/gpu/drm/etnaviv/state_blt.xml.h22
-rw-r--r--drivers/gpu/drm/etnaviv/state_hi.xml.h95
-rw-r--r--drivers/gpu/drm/exynos/Kconfig7
-rw-r--r--drivers/gpu/drm/exynos/exynos5433_drm_decon.c46
-rw-r--r--drivers/gpu/drm/exynos/exynos7_drm_decon.c213
-rw-r--r--drivers/gpu/drm/exynos/exynos_dp.c34
-rw-r--r--drivers/gpu/drm/exynos/exynos_drm_crtc.c5
-rw-r--r--drivers/gpu/drm/exynos/exynos_drm_crtc.h3
-rw-r--r--drivers/gpu/drm/exynos/exynos_drm_dma.c9
-rw-r--r--drivers/gpu/drm/exynos/exynos_drm_dpi.c3
-rw-r--r--drivers/gpu/drm/exynos/exynos_drm_drv.c73
-rw-r--r--drivers/gpu/drm/exynos/exynos_drm_drv.h6
-rw-r--r--drivers/gpu/drm/exynos/exynos_drm_dsi.c1917
-rw-r--r--drivers/gpu/drm/exynos/exynos_drm_fb.c13
-rw-r--r--drivers/gpu/drm/exynos/exynos_drm_fb.h1
-rw-r--r--drivers/gpu/drm/exynos/exynos_drm_fbdev.c174
-rw-r--r--drivers/gpu/drm/exynos/exynos_drm_fbdev.h31
-rw-r--r--drivers/gpu/drm/exynos/exynos_drm_fimc.c50
-rw-r--r--drivers/gpu/drm/exynos/exynos_drm_fimd.c126
-rw-r--r--drivers/gpu/drm/exynos/exynos_drm_g2d.c46
-rw-r--r--drivers/gpu/drm/exynos/exynos_drm_g2d.h4
-rw-r--r--drivers/gpu/drm/exynos/exynos_drm_gem.c60
-rw-r--r--drivers/gpu/drm/exynos/exynos_drm_gem.h5
-rw-r--r--drivers/gpu/drm/exynos/exynos_drm_gsc.c47
-rw-r--r--drivers/gpu/drm/exynos/exynos_drm_ipp.c38
-rw-r--r--drivers/gpu/drm/exynos/exynos_drm_mic.c49
-rw-r--r--drivers/gpu/drm/exynos/exynos_drm_plane.c26
-rw-r--r--drivers/gpu/drm/exynos/exynos_drm_rotator.c30
-rw-r--r--drivers/gpu/drm/exynos/exynos_drm_scaler.c36
-rw-r--r--drivers/gpu/drm/exynos/exynos_drm_vidi.c95
-rw-r--r--drivers/gpu/drm/exynos/exynos_hdmi.c89
-rw-r--r--drivers/gpu/drm/exynos/exynos_mixer.c33
-rw-r--r--drivers/gpu/drm/exynos/regs-decon7.h15
-rw-r--r--drivers/gpu/drm/fsl-dcu/Kconfig4
-rw-r--r--drivers/gpu/drm/fsl-dcu/fsl_dcu_drm_drv.c132
-rw-r--r--drivers/gpu/drm/fsl-dcu/fsl_dcu_drm_drv.h3
-rw-r--r--drivers/gpu/drm/fsl-dcu/fsl_dcu_drm_kms.c1
-rw-r--r--drivers/gpu/drm/fsl-dcu/fsl_dcu_drm_plane.c44
-rw-r--r--drivers/gpu/drm/fsl-dcu/fsl_dcu_drm_rgb.c6
-rw-r--r--drivers/gpu/drm/fsl-dcu/fsl_tcon.c2
-rw-r--r--drivers/gpu/drm/gma500/Kconfig19
-rw-r--r--drivers/gpu/drm/gma500/Makefile19
-rw-r--r--drivers/gpu/drm/gma500/accel_2d.c60
-rw-r--r--drivers/gpu/drm/gma500/backlight.c116
-rw-r--r--drivers/gpu/drm/gma500/blitter.c43
-rw-r--r--drivers/gpu/drm/gma500/blitter.h16
-rw-r--r--drivers/gpu/drm/gma500/cdv_device.c93
-rw-r--r--drivers/gpu/drm/gma500/cdv_device.h1
-rw-r--r--drivers/gpu/drm/gma500/cdv_intel_crt.c70
-rw-r--r--drivers/gpu/drm/gma500/cdv_intel_display.c44
-rw-r--r--drivers/gpu/drm/gma500/cdv_intel_dp.c95
-rw-r--r--drivers/gpu/drm/gma500/cdv_intel_hdmi.c108
-rw-r--r--drivers/gpu/drm/gma500/cdv_intel_lvds.c119
-rw-r--r--drivers/gpu/drm/gma500/fbdev.c196
-rw-r--r--drivers/gpu/drm/gma500/framebuffer.c400
-rw-r--r--drivers/gpu/drm/gma500/gem.c376
-rw-r--r--drivers/gpu/drm/gma500/gem.h39
-rw-r--r--drivers/gpu/drm/gma500/gma_device.c2
-rw-r--r--drivers/gpu/drm/gma500/gma_display.c143
-rw-r--r--drivers/gpu/drm/gma500/gma_display.h13
-rw-r--r--drivers/gpu/drm/gma500/gtt.c580
-rw-r--r--drivers/gpu/drm/gma500/gtt.h37
-rw-r--r--drivers/gpu/drm/gma500/intel_bios.c36
-rw-r--r--drivers/gpu/drm/gma500/intel_bios.h4
-rw-r--r--drivers/gpu/drm/gma500/intel_gmbus.c23
-rw-r--r--drivers/gpu/drm/gma500/intel_i2c.c36
-rw-r--r--drivers/gpu/drm/gma500/mid_bios.c17
-rw-r--r--drivers/gpu/drm/gma500/mmu.c62
-rw-r--r--drivers/gpu/drm/gma500/mmu.h2
-rw-r--r--drivers/gpu/drm/gma500/oaktrail_crtc.c48
-rw-r--r--drivers/gpu/drm/gma500/oaktrail_device.c89
-rw-r--r--drivers/gpu/drm/gma500/oaktrail_hdmi.c27
-rw-r--r--drivers/gpu/drm/gma500/oaktrail_hdmi_i2c.c4
-rw-r--r--drivers/gpu/drm/gma500/oaktrail_lvds.c88
-rw-r--r--drivers/gpu/drm/gma500/oaktrail_lvds_i2c.c38
-rw-r--r--drivers/gpu/drm/gma500/opregion.c28
-rw-r--r--drivers/gpu/drm/gma500/power.c183
-rw-r--r--drivers/gpu/drm/gma500/power.h18
-rw-r--r--drivers/gpu/drm/gma500/psb_device.c118
-rw-r--r--drivers/gpu/drm/gma500/psb_drv.c303
-rw-r--r--drivers/gpu/drm/gma500/psb_drv.h190
-rw-r--r--drivers/gpu/drm/gma500/psb_intel_display.c70
-rw-r--r--drivers/gpu/drm/gma500/psb_intel_drv.h49
-rw-r--r--drivers/gpu/drm/gma500/psb_intel_lvds.c134
-rw-r--r--drivers/gpu/drm/gma500/psb_intel_modes.c33
-rw-r--r--drivers/gpu/drm/gma500/psb_intel_reg.h32
-rw-r--r--drivers/gpu/drm/gma500/psb_intel_sdvo.c110
-rw-r--r--drivers/gpu/drm/gma500/psb_irq.c309
-rw-r--r--drivers/gpu/drm/gma500/psb_irq.h23
-rw-r--r--drivers/gpu/drm/gma500/psb_lid.c80
-rw-r--r--drivers/gpu/drm/gud/Kconfig15
-rw-r--r--drivers/gpu/drm/gud/Makefile4
-rw-r--r--drivers/gpu/drm/gud/gud_connector.c725
-rw-r--r--drivers/gpu/drm/gud/gud_drv.c699
-rw-r--r--drivers/gpu/drm/gud/gud_internal.h167
-rw-r--r--drivers/gpu/drm/gud/gud_pipe.c644
-rw-r--r--drivers/gpu/drm/hisilicon/hibmc/Kconfig7
-rw-r--r--drivers/gpu/drm/hisilicon/hibmc/Makefile4
-rw-r--r--drivers/gpu/drm/hisilicon/hibmc/dp/dp_aux.c168
-rw-r--r--drivers/gpu/drm/hisilicon/hibmc/dp/dp_comm.h69
-rw-r--r--drivers/gpu/drm/hisilicon/hibmc/dp/dp_config.h21
-rw-r--r--drivers/gpu/drm/hisilicon/hibmc/dp/dp_hw.c307
-rw-r--r--drivers/gpu/drm/hisilicon/hibmc/dp/dp_hw.h64
-rw-r--r--drivers/gpu/drm/hisilicon/hibmc/dp/dp_link.c392
-rw-r--r--drivers/gpu/drm/hisilicon/hibmc/dp/dp_reg.h132
-rw-r--r--drivers/gpu/drm/hisilicon/hibmc/dp/dp_serdes.c71
-rw-r--r--drivers/gpu/drm/hisilicon/hibmc/hibmc_drm_de.c42
-rw-r--r--drivers/gpu/drm/hisilicon/hibmc/hibmc_drm_debugfs.c104
-rw-r--r--drivers/gpu/drm/hisilicon/hibmc/hibmc_drm_dp.c182
-rw-r--r--drivers/gpu/drm/hisilicon/hibmc/hibmc_drm_drv.c136
-rw-r--r--drivers/gpu/drm/hisilicon/hibmc/hibmc_drm_drv.h38
-rw-r--r--drivers/gpu/drm/hisilicon/hibmc/hibmc_drm_i2c.c47
-rw-r--r--drivers/gpu/drm/hisilicon/hibmc/hibmc_drm_vdac.c48
-rw-r--r--drivers/gpu/drm/hisilicon/kirin/Kconfig6
-rw-r--r--drivers/gpu/drm/hisilicon/kirin/dw_drm_dsi.c79
-rw-r--r--drivers/gpu/drm/hisilicon/kirin/dw_dsi_reg.h4
-rw-r--r--drivers/gpu/drm/hisilicon/kirin/kirin_ade_reg.h4
-rw-r--r--drivers/gpu/drm/hisilicon/kirin/kirin_drm_ade.c73
-rw-r--r--drivers/gpu/drm/hisilicon/kirin/kirin_drm_drv.c36
-rw-r--r--drivers/gpu/drm/hisilicon/kirin/kirin_drm_drv.h2
-rw-r--r--drivers/gpu/drm/hyperv/Makefile8
-rw-r--r--drivers/gpu/drm/hyperv/hyperv_drm.h55
-rw-r--r--drivers/gpu/drm/hyperv/hyperv_drm_drv.c263
-rw-r--r--drivers/gpu/drm/hyperv/hyperv_drm_modeset.c339
-rw-r--r--drivers/gpu/drm/hyperv/hyperv_drm_proto.c528
-rw-r--r--drivers/gpu/drm/i2c/Kconfig36
-rw-r--r--drivers/gpu/drm/i2c/Makefile10
-rw-r--r--drivers/gpu/drm/i2c/tda9950.c510
-rw-r--r--drivers/gpu/drm/i810/Makefile8
-rw-r--r--drivers/gpu/drm/i810/i810_dma.c1266
-rw-r--r--drivers/gpu/drm/i810/i810_drv.c101
-rw-r--r--drivers/gpu/drm/i810/i810_drv.h246
-rw-r--r--drivers/gpu/drm/i915/Kconfig108
-rw-r--r--drivers/gpu/drm/i915/Kconfig.debug47
-rw-r--r--drivers/gpu/drm/i915/Kconfig.profile40
-rw-r--r--drivers/gpu/drm/i915/Kconfig.unstable29
-rw-r--r--drivers/gpu/drm/i915/Makefile340
-rw-r--r--drivers/gpu/drm/i915/TODO.txt41
-rw-r--r--drivers/gpu/drm/i915/display/bxt_dpio_phy_regs.h273
-rw-r--r--drivers/gpu/drm/i915/display/dvo_ch7017.c20
-rw-r--r--drivers/gpu/drm/i915/display/dvo_ch7xxx.c46
-rw-r--r--drivers/gpu/drm/i915/display/dvo_ivch.c22
-rw-r--r--drivers/gpu/drm/i915/display/dvo_ns2501.c32
-rw-r--r--drivers/gpu/drm/i915/display/dvo_sil164.c37
-rw-r--r--drivers/gpu/drm/i915/display/dvo_tfp410.c24
-rw-r--r--drivers/gpu/drm/i915/display/g4x_dp.c1430
-rw-r--r--drivers/gpu/drm/i915/display/g4x_dp.h45
-rw-r--r--drivers/gpu/drm/i915/display/g4x_hdmi.c779
-rw-r--r--drivers/gpu/drm/i915/display/g4x_hdmi.h36
-rw-r--r--drivers/gpu/drm/i915/display/hsw_ips.c374
-rw-r--r--drivers/gpu/drm/i915/display/hsw_ips.h62
-rw-r--r--drivers/gpu/drm/i915/display/i9xx_display_sr.c97
-rw-r--r--drivers/gpu/drm/i915/display/i9xx_display_sr.h14
-rw-r--r--drivers/gpu/drm/i915/display/i9xx_plane.c878
-rw-r--r--drivers/gpu/drm/i915/display/i9xx_plane.h44
-rw-r--r--drivers/gpu/drm/i915/display/i9xx_plane_regs.h112
-rw-r--r--drivers/gpu/drm/i915/display/i9xx_wm.c4190
-rw-r--r--drivers/gpu/drm/i915/display/i9xx_wm.h37
-rw-r--r--drivers/gpu/drm/i915/display/i9xx_wm_regs.h257
-rw-r--r--drivers/gpu/drm/i915/display/icl_dsi.c1252
-rw-r--r--drivers/gpu/drm/i915/display/icl_dsi.h17
-rw-r--r--drivers/gpu/drm/i915/display/icl_dsi_regs.h343
-rw-r--r--drivers/gpu/drm/i915/display/intel_acpi.c129
-rw-r--r--drivers/gpu/drm/i915/display/intel_acpi.h15
-rw-r--r--drivers/gpu/drm/i915/display/intel_alpm.c600
-rw-r--r--drivers/gpu/drm/i915/display/intel_alpm.h41
-rw-r--r--drivers/gpu/drm/i915/display/intel_atomic.c299
-rw-r--r--drivers/gpu/drm/i915/display/intel_atomic.h8
-rw-r--r--drivers/gpu/drm/i915/display/intel_atomic_plane.c583
-rw-r--r--drivers/gpu/drm/i915/display/intel_atomic_plane.h65
-rw-r--r--drivers/gpu/drm/i915/display/intel_audio.c1287
-rw-r--r--drivers/gpu/drm/i915/display/intel_audio.h19
-rw-r--r--drivers/gpu/drm/i915/display/intel_audio_regs.h170
-rw-r--r--drivers/gpu/drm/i915/display/intel_backlight.c1857
-rw-r--r--drivers/gpu/drm/i915/display/intel_backlight.h52
-rw-r--r--drivers/gpu/drm/i915/display/intel_backlight_regs.h121
-rw-r--r--drivers/gpu/drm/i915/display/intel_bios.c3289
-rw-r--r--drivers/gpu/drm/i915/display/intel_bios.h266
-rw-r--r--drivers/gpu/drm/i915/display/intel_bo.c81
-rw-r--r--drivers/gpu/drm/i915/display/intel_bo.h29
-rw-r--r--drivers/gpu/drm/i915/display/intel_bw.c1406
-rw-r--r--drivers/gpu/drm/i915/display/intel_bw.h60
-rw-r--r--drivers/gpu/drm/i915/display/intel_casf.c290
-rw-r--r--drivers/gpu/drm/i915/display/intel_casf.h21
-rw-r--r--drivers/gpu/drm/i915/display/intel_casf_regs.h33
-rw-r--r--drivers/gpu/drm/i915/display/intel_cdclk.c3466
-rw-r--r--drivers/gpu/drm/i915/display/intel_cdclk.h111
-rw-r--r--drivers/gpu/drm/i915/display/intel_cmtg.c188
-rw-r--r--drivers/gpu/drm/i915/display/intel_cmtg.h13
-rw-r--r--drivers/gpu/drm/i915/display/intel_cmtg_regs.h21
-rw-r--r--drivers/gpu/drm/i915/display/intel_color.c3776
-rw-r--r--drivers/gpu/drm/i915/display/intel_color.h41
-rw-r--r--drivers/gpu/drm/i915/display/intel_color_pipeline.c99
-rw-r--r--drivers/gpu/drm/i915/display/intel_color_pipeline.h14
-rw-r--r--drivers/gpu/drm/i915/display/intel_color_regs.h348
-rw-r--r--drivers/gpu/drm/i915/display/intel_colorop.c35
-rw-r--r--drivers/gpu/drm/i915/display/intel_colorop.h15
-rw-r--r--drivers/gpu/drm/i915/display/intel_combo_phy.c349
-rw-r--r--drivers/gpu/drm/i915/display/intel_combo_phy.h8
-rw-r--r--drivers/gpu/drm/i915/display/intel_combo_phy_regs.h167
-rw-r--r--drivers/gpu/drm/i915/display/intel_connector.c169
-rw-r--r--drivers/gpu/drm/i915/display/intel_connector.h12
-rw-r--r--drivers/gpu/drm/i915/display/intel_crt.c619
-rw-r--r--drivers/gpu/drm/i915/display/intel_crt.h23
-rw-r--r--drivers/gpu/drm/i915/display/intel_crt_regs.h50
-rw-r--r--drivers/gpu/drm/i915/display/intel_crtc.c567
-rw-r--r--drivers/gpu/drm/i915/display/intel_crtc.h54
-rw-r--r--drivers/gpu/drm/i915/display/intel_crtc_state_dump.c388
-rw-r--r--drivers/gpu/drm/i915/display/intel_crtc_state_dump.h18
-rw-r--r--drivers/gpu/drm/i915/display/intel_csr.c819
-rw-r--r--drivers/gpu/drm/i915/display/intel_csr.h21
-rw-r--r--drivers/gpu/drm/i915/display/intel_cursor.c658
-rw-r--r--drivers/gpu/drm/i915/display/intel_cursor.h9
-rw-r--r--drivers/gpu/drm/i915/display/intel_cursor_regs.h112
-rw-r--r--drivers/gpu/drm/i915/display/intel_cx0_phy.c3642
-rw-r--r--drivers/gpu/drm/i915/display/intel_cx0_phy.h70
-rw-r--r--drivers/gpu/drm/i915/display/intel_cx0_phy_regs.h436
-rw-r--r--drivers/gpu/drm/i915/display/intel_dbuf_bw.c295
-rw-r--r--drivers/gpu/drm/i915/display/intel_dbuf_bw.h37
-rw-r--r--drivers/gpu/drm/i915/display/intel_ddi.c4324
-rw-r--r--drivers/gpu/drm/i915/display/intel_ddi.h61
-rw-r--r--drivers/gpu/drm/i915/display/intel_ddi_buf_trans.c2718
-rw-r--r--drivers/gpu/drm/i915/display/intel_ddi_buf_trans.h91
-rw-r--r--drivers/gpu/drm/i915/display/intel_de.h224
-rw-r--r--drivers/gpu/drm/i915/display/intel_display.c14437
-rw-r--r--drivers/gpu/drm/i915/display/intel_display.h542
-rw-r--r--drivers/gpu/drm/i915/display/intel_display_conversion.c21
-rw-r--r--drivers/gpu/drm/i915/display/intel_display_conversion.h16
-rw-r--r--drivers/gpu/drm/i915/display/intel_display_core.h627
-rw-r--r--drivers/gpu/drm/i915/display/intel_display_debugfs.c2369
-rw-r--r--drivers/gpu/drm/i915/display/intel_display_debugfs.h18
-rw-r--r--drivers/gpu/drm/i915/display/intel_display_debugfs_params.c177
-rw-r--r--drivers/gpu/drm/i915/display/intel_display_debugfs_params.h13
-rw-r--r--drivers/gpu/drm/i915/display/intel_display_device.c1985
-rw-r--r--drivers/gpu/drm/i915/display/intel_display_device.h326
-rw-r--r--drivers/gpu/drm/i915/display/intel_display_driver.c797
-rw-r--r--drivers/gpu/drm/i915/display/intel_display_driver.h42
-rw-r--r--drivers/gpu/drm/i915/display/intel_display_irq.c2486
-rw-r--r--drivers/gpu/drm/i915/display/intel_display_irq.h94
-rw-r--r--drivers/gpu/drm/i915/display/intel_display_jiffies.h43
-rw-r--r--drivers/gpu/drm/i915/display/intel_display_limits.h150
-rw-r--r--drivers/gpu/drm/i915/display/intel_display_params.c242
-rw-r--r--drivers/gpu/drm/i915/display/intel_display_params.h65
-rw-r--r--drivers/gpu/drm/i915/display/intel_display_power.c5804
-rw-r--r--drivers/gpu/drm/i915/display/intel_display_power.h390
-rw-r--r--drivers/gpu/drm/i915/display/intel_display_power_map.c1933
-rw-r--r--drivers/gpu/drm/i915/display/intel_display_power_map.h14
-rw-r--r--drivers/gpu/drm/i915/display/intel_display_power_well.c2069
-rw-r--r--drivers/gpu/drm/i915/display/intel_display_power_well.h181
-rw-r--r--drivers/gpu/drm/i915/display/intel_display_reg_defs.h49
-rw-r--r--drivers/gpu/drm/i915/display/intel_display_regs.h2934
-rw-r--r--drivers/gpu/drm/i915/display/intel_display_reset.c127
-rw-r--r--drivers/gpu/drm/i915/display/intel_display_reset.h20
-rw-r--r--drivers/gpu/drm/i915/display/intel_display_rpm.c62
-rw-r--r--drivers/gpu/drm/i915/display/intel_display_rpm.h37
-rw-r--r--drivers/gpu/drm/i915/display/intel_display_rps.c108
-rw-r--r--drivers/gpu/drm/i915/display/intel_display_rps.h46
-rw-r--r--drivers/gpu/drm/i915/display/intel_display_snapshot.c79
-rw-r--r--drivers/gpu/drm/i915/display/intel_display_snapshot.h16
-rw-r--r--drivers/gpu/drm/i915/display/intel_display_trace.c9
-rw-r--r--drivers/gpu/drm/i915/display/intel_display_trace.h858
-rw-r--r--drivers/gpu/drm/i915/display/intel_display_types.h1315
-rw-r--r--drivers/gpu/drm/i915/display/intel_display_utils.c32
-rw-r--r--drivers/gpu/drm/i915/display/intel_display_utils.h31
-rw-r--r--drivers/gpu/drm/i915/display/intel_display_wa.c79
-rw-r--r--drivers/gpu/drm/i915/display/intel_display_wa.h36
-rw-r--r--drivers/gpu/drm/i915/display/intel_dkl_phy.c117
-rw-r--r--drivers/gpu/drm/i915/display/intel_dkl_phy.h25
-rw-r--r--drivers/gpu/drm/i915/display/intel_dkl_phy_regs.h207
-rw-r--r--drivers/gpu/drm/i915/display/intel_dmc.c1757
-rw-r--r--drivers/gpu/drm/i915/display/intel_dmc.h51
-rw-r--r--drivers/gpu/drm/i915/display/intel_dmc_regs.h587
-rw-r--r--drivers/gpu/drm/i915/display/intel_dmc_wl.c506
-rw-r--r--drivers/gpu/drm/i915/display/intel_dmc_wl.h41
-rw-r--r--drivers/gpu/drm/i915/display/intel_dp.c8129
-rw-r--r--drivers/gpu/drm/i915/display/intel_dp.h197
-rw-r--r--drivers/gpu/drm/i915/display/intel_dp_aux.c467
-rw-r--r--drivers/gpu/drm/i915/display/intel_dp_aux.h11
-rw-r--r--drivers/gpu/drm/i915/display/intel_dp_aux_backlight.c723
-rw-r--r--drivers/gpu/drm/i915/display/intel_dp_aux_regs.h106
-rw-r--r--drivers/gpu/drm/i915/display/intel_dp_hdcp.c396
-rw-r--r--drivers/gpu/drm/i915/display/intel_dp_hdcp.h15
-rw-r--r--drivers/gpu/drm/i915/display/intel_dp_link_training.c1844
-rw-r--r--drivers/gpu/drm/i915/display/intel_dp_link_training.h32
-rw-r--r--drivers/gpu/drm/i915/display/intel_dp_mst.c2104
-rw-r--r--drivers/gpu/drm/i915/display/intel_dp_mst.h23
-rw-r--r--drivers/gpu/drm/i915/display/intel_dp_test.c764
-rw-r--r--drivers/gpu/drm/i915/display/intel_dp_test.h23
-rw-r--r--drivers/gpu/drm/i915/display/intel_dp_tunnel.c814
-rw-r--r--drivers/gpu/drm/i915/display/intel_dp_tunnel.h133
-rw-r--r--drivers/gpu/drm/i915/display/intel_dpio_phy.c767
-rw-r--r--drivers/gpu/drm/i915/display/intel_dpio_phy.h155
-rw-r--r--drivers/gpu/drm/i915/display/intel_dpll.c1777
-rw-r--r--drivers/gpu/drm/i915/display/intel_dpll.h57
-rw-r--r--drivers/gpu/drm/i915/display/intel_dpll_mgr.c3912
-rw-r--r--drivers/gpu/drm/i915/display/intel_dpll_mgr.h281
-rw-r--r--drivers/gpu/drm/i915/display/intel_dpt.c326
-rw-r--r--drivers/gpu/drm/i915/display/intel_dpt.h26
-rw-r--r--drivers/gpu/drm/i915/display/intel_dpt_common.c35
-rw-r--r--drivers/gpu/drm/i915/display/intel_dpt_common.h13
-rw-r--r--drivers/gpu/drm/i915/display/intel_drrs.c421
-rw-r--r--drivers/gpu/drm/i915/display/intel_drrs.h33
-rw-r--r--drivers/gpu/drm/i915/display/intel_dsb.c1100
-rw-r--r--drivers/gpu/drm/i915/display/intel_dsb.h90
-rw-r--r--drivers/gpu/drm/i915/display/intel_dsb_buffer.c82
-rw-r--r--drivers/gpu/drm/i915/display/intel_dsb_buffer.h29
-rw-r--r--drivers/gpu/drm/i915/display/intel_dsb_regs.h100
-rw-r--r--drivers/gpu/drm/i915/display/intel_dsi.c80
-rw-r--r--drivers/gpu/drm/i915/display/intel_dsi.h57
-rw-r--r--drivers/gpu/drm/i915/display/intel_dsi_dcs_backlight.c60
-rw-r--r--drivers/gpu/drm/i915/display/intel_dsi_vbt.c779
-rw-r--r--drivers/gpu/drm/i915/display/intel_dsi_vbt.h20
-rw-r--r--drivers/gpu/drm/i915/display/intel_dsi_vbt_defs.h197
-rw-r--r--drivers/gpu/drm/i915/display/intel_dvo.c494
-rw-r--r--drivers/gpu/drm/i915/display/intel_dvo.h10
-rw-r--r--drivers/gpu/drm/i915/display/intel_dvo_dev.h46
-rw-r--r--drivers/gpu/drm/i915/display/intel_dvo_regs.h54
-rw-r--r--drivers/gpu/drm/i915/display/intel_encoder.c123
-rw-r--r--drivers/gpu/drm/i915/display/intel_encoder.h26
-rw-r--r--drivers/gpu/drm/i915/display/intel_fb.c2397
-rw-r--r--drivers/gpu/drm/i915/display/intel_fb.h127
-rw-r--r--drivers/gpu/drm/i915/display/intel_fb_bo.c101
-rw-r--r--drivers/gpu/drm/i915/display/intel_fb_bo.h25
-rw-r--r--drivers/gpu/drm/i915/display/intel_fb_pin.c356
-rw-r--r--drivers/gpu/drm/i915/display/intel_fb_pin.h33
-rw-r--r--drivers/gpu/drm/i915/display/intel_fbc.c2654
-rw-r--r--drivers/gpu/drm/i915/display/intel_fbc.h47
-rw-r--r--drivers/gpu/drm/i915/display/intel_fbc_regs.h129
-rw-r--r--drivers/gpu/drm/i915/display/intel_fbdev.c666
-rw-r--r--drivers/gpu/drm/i915/display/intel_fbdev.h49
-rw-r--r--drivers/gpu/drm/i915/display/intel_fbdev_fb.c106
-rw-r--r--drivers/gpu/drm/i915/display/intel_fbdev_fb.h23
-rw-r--r--drivers/gpu/drm/i915/display/intel_fdi.c856
-rw-r--r--drivers/gpu/drm/i915/display/intel_fdi.h29
-rw-r--r--drivers/gpu/drm/i915/display/intel_fdi_regs.h151
-rw-r--r--drivers/gpu/drm/i915/display/intel_fifo_underrun.c270
-rw-r--r--drivers/gpu/drm/i915/display/intel_fifo_underrun.h20
-rw-r--r--drivers/gpu/drm/i915/display/intel_fixed.h (renamed from drivers/gpu/drm/i915/i915_fixed.h)0
-rw-r--r--drivers/gpu/drm/i915/display/intel_flipq.c472
-rw-r--r--drivers/gpu/drm/i915/display/intel_flipq.h37
-rw-r--r--drivers/gpu/drm/i915/display/intel_frontbuffer.c217
-rw-r--r--drivers/gpu/drm/i915/display/intel_frontbuffer.h77
-rw-r--r--drivers/gpu/drm/i915/display/intel_global_state.c214
-rw-r--r--drivers/gpu/drm/i915/display/intel_global_state.h49
-rw-r--r--drivers/gpu/drm/i915/display/intel_gmbus.c580
-rw-r--r--drivers/gpu/drm/i915/display/intel_gmbus.h17
-rw-r--r--drivers/gpu/drm/i915/display/intel_gmbus_regs.h81
-rw-r--r--drivers/gpu/drm/i915/display/intel_hdcp.c1564
-rw-r--r--drivers/gpu/drm/i915/display/intel_hdcp.h24
-rw-r--r--drivers/gpu/drm/i915/display/intel_hdcp_gsc.c239
-rw-r--r--drivers/gpu/drm/i915/display/intel_hdcp_gsc.h22
-rw-r--r--drivers/gpu/drm/i915/display/intel_hdcp_gsc_message.c672
-rw-r--r--drivers/gpu/drm/i915/display/intel_hdcp_gsc_message.h14
-rw-r--r--drivers/gpu/drm/i915/display/intel_hdcp_regs.h272
-rw-r--r--drivers/gpu/drm/i915/display/intel_hdcp_shim.h137
-rw-r--r--drivers/gpu/drm/i915/display/intel_hdmi.c2246
-rw-r--r--drivers/gpu/drm/i915/display/intel_hdmi.h39
-rw-r--r--drivers/gpu/drm/i915/display/intel_hotplug.c941
-rw-r--r--drivers/gpu/drm/i915/display/intel_hotplug.h30
-rw-r--r--drivers/gpu/drm/i915/display/intel_hotplug_irq.c1495
-rw-r--r--drivers/gpu/drm/i915/display/intel_hotplug_irq.h35
-rw-r--r--drivers/gpu/drm/i915/display/intel_hti.c43
-rw-r--r--drivers/gpu/drm/i915/display/intel_hti.h18
-rw-r--r--drivers/gpu/drm/i915/display/intel_hti_regs.h16
-rw-r--r--drivers/gpu/drm/i915/display/intel_link_bw.c497
-rw-r--r--drivers/gpu/drm/i915/display/intel_link_bw.h39
-rw-r--r--drivers/gpu/drm/i915/display/intel_load_detect.c225
-rw-r--r--drivers/gpu/drm/i915/display/intel_load_detect.h20
-rw-r--r--drivers/gpu/drm/i915/display/intel_lpe_audio.c143
-rw-r--r--drivers/gpu/drm/i915/display/intel_lpe_audio.h32
-rw-r--r--drivers/gpu/drm/i915/display/intel_lspcon.c247
-rw-r--r--drivers/gpu/drm/i915/display/intel_lspcon.h25
-rw-r--r--drivers/gpu/drm/i915/display/intel_lt_phy.c2327
-rw-r--r--drivers/gpu/drm/i915/display/intel_lt_phy.h47
-rw-r--r--drivers/gpu/drm/i915/display/intel_lt_phy_regs.h90
-rw-r--r--drivers/gpu/drm/i915/display/intel_lvds.c572
-rw-r--r--drivers/gpu/drm/i915/display/intel_lvds.h31
-rw-r--r--drivers/gpu/drm/i915/display/intel_lvds_regs.h65
-rw-r--r--drivers/gpu/drm/i915/display/intel_mg_phy_regs.h282
-rw-r--r--drivers/gpu/drm/i915/display/intel_modeset_lock.c50
-rw-r--r--drivers/gpu/drm/i915/display/intel_modeset_lock.h33
-rw-r--r--drivers/gpu/drm/i915/display/intel_modeset_setup.c1012
-rw-r--r--drivers/gpu/drm/i915/display/intel_modeset_setup.h15
-rw-r--r--drivers/gpu/drm/i915/display/intel_modeset_verify.c258
-rw-r--r--drivers/gpu/drm/i915/display/intel_modeset_verify.h16
-rw-r--r--drivers/gpu/drm/i915/display/intel_opregion.c584
-rw-r--r--drivers/gpu/drm/i915/display/intel_opregion.h109
-rw-r--r--drivers/gpu/drm/i915/display/intel_overlay.c310
-rw-r--r--drivers/gpu/drm/i915/display/intel_overlay.h66
-rw-r--r--drivers/gpu/drm/i915/display/intel_panel.c2345
-rw-r--r--drivers/gpu/drm/i915/display/intel_panel.h86
-rw-r--r--drivers/gpu/drm/i915/display/intel_panic.c27
-rw-r--r--drivers/gpu/drm/i915/display/intel_panic.h14
-rw-r--r--drivers/gpu/drm/i915/display/intel_pch.c340
-rw-r--r--drivers/gpu/drm/i915/display/intel_pch.h56
-rw-r--r--drivers/gpu/drm/i915/display/intel_pch_display.c644
-rw-r--r--drivers/gpu/drm/i915/display/intel_pch_display.h98
-rw-r--r--drivers/gpu/drm/i915/display/intel_pch_refclk.c684
-rw-r--r--drivers/gpu/drm/i915/display/intel_pch_refclk.h45
-rw-r--r--drivers/gpu/drm/i915/display/intel_pfit.c731
-rw-r--r--drivers/gpu/drm/i915/display/intel_pfit.h29
-rw-r--r--drivers/gpu/drm/i915/display/intel_pfit_regs.h79
-rw-r--r--drivers/gpu/drm/i915/display/intel_pipe_crc.c199
-rw-r--r--drivers/gpu/drm/i915/display/intel_pipe_crc.h1
-rw-r--r--drivers/gpu/drm/i915/display/intel_pipe_crc_regs.h152
-rw-r--r--drivers/gpu/drm/i915/display/intel_plane.c1766
-rw-r--r--drivers/gpu/drm/i915/display/intel_plane.h91
-rw-r--r--drivers/gpu/drm/i915/display/intel_plane_initial.c440
-rw-r--r--drivers/gpu/drm/i915/display/intel_plane_initial.h15
-rw-r--r--drivers/gpu/drm/i915/display/intel_pmdemand.c675
-rw-r--r--drivers/gpu/drm/i915/display/intel_pmdemand.h39
-rw-r--r--drivers/gpu/drm/i915/display/intel_pps.c1394
-rw-r--r--drivers/gpu/drm/i915/display/intel_pps.h29
-rw-r--r--drivers/gpu/drm/i915/display/intel_pps_regs.h77
-rw-r--r--drivers/gpu/drm/i915/display/intel_psr.c4083
-rw-r--r--drivers/gpu/drm/i915/display/intel_psr.h78
-rw-r--r--drivers/gpu/drm/i915/display/intel_psr_regs.h342
-rw-r--r--drivers/gpu/drm/i915/display/intel_qp_tables.c475
-rw-r--r--drivers/gpu/drm/i915/display/intel_qp_tables.h14
-rw-r--r--drivers/gpu/drm/i915/display/intel_quirks.c171
-rw-r--r--drivers/gpu/drm/i915/display/intel_quirks.h23
-rw-r--r--drivers/gpu/drm/i915/display/intel_sbi.c92
-rw-r--r--drivers/gpu/drm/i915/display/intel_sbi.h27
-rw-r--r--drivers/gpu/drm/i915/display/intel_sbi_regs.h65
-rw-r--r--drivers/gpu/drm/i915/display/intel_sdvo.c1289
-rw-r--r--drivers/gpu/drm/i915/display/intel_sdvo.h21
-rw-r--r--drivers/gpu/drm/i915/display/intel_sdvo_regs.h4
-rw-r--r--drivers/gpu/drm/i915/display/intel_snps_hdmi_pll.c366
-rw-r--r--drivers/gpu/drm/i915/display/intel_snps_hdmi_pll.h17
-rw-r--r--drivers/gpu/drm/i915/display/intel_snps_phy.c2028
-rw-r--r--drivers/gpu/drm/i915/display/intel_snps_phy.h38
-rw-r--r--drivers/gpu/drm/i915/display/intel_snps_phy_regs.h75
-rw-r--r--drivers/gpu/drm/i915/display/intel_sprite.c905
-rw-r--r--drivers/gpu/drm/i915/display/intel_sprite.h41
-rw-r--r--drivers/gpu/drm/i915/display/intel_sprite_regs.h388
-rw-r--r--drivers/gpu/drm/i915/display/intel_sprite_uapi.c128
-rw-r--r--drivers/gpu/drm/i915/display/intel_sprite_uapi.h15
-rw-r--r--drivers/gpu/drm/i915/display/intel_tc.c2029
-rw-r--r--drivers/gpu/drm/i915/display/intel_tc.h99
-rw-r--r--drivers/gpu/drm/i915/display/intel_tdf.h25
-rw-r--r--drivers/gpu/drm/i915/display/intel_tv.c404
-rw-r--r--drivers/gpu/drm/i915/display/intel_tv.h10
-rw-r--r--drivers/gpu/drm/i915/display/intel_tv_regs.h490
-rw-r--r--drivers/gpu/drm/i915/display/intel_vblank.c781
-rw-r--r--drivers/gpu/drm/i915/display/intel_vblank.h53
-rw-r--r--drivers/gpu/drm/i915/display/intel_vbt_defs.h1189
-rw-r--r--drivers/gpu/drm/i915/display/intel_vdsc.c1670
-rw-r--r--drivers/gpu/drm/i915/display/intel_vdsc.h27
-rw-r--r--drivers/gpu/drm/i915/display/intel_vdsc_regs.h354
-rw-r--r--drivers/gpu/drm/i915/display/intel_vga.c148
-rw-r--r--drivers/gpu/drm/i915/display/intel_vga.h12
-rw-r--r--drivers/gpu/drm/i915/display/intel_vga_regs.h36
-rw-r--r--drivers/gpu/drm/i915/display/intel_vrr.c861
-rw-r--r--drivers/gpu/drm/i915/display/intel_vrr.h35
-rw-r--r--drivers/gpu/drm/i915/display/intel_vrr_regs.h126
-rw-r--r--drivers/gpu/drm/i915/display/intel_wm.c404
-rw-r--r--drivers/gpu/drm/i915/display/intel_wm.h36
-rw-r--r--drivers/gpu/drm/i915/display/intel_wm_types.h76
-rw-r--r--drivers/gpu/drm/i915/display/skl_prefill.c157
-rw-r--r--drivers/gpu/drm/i915/display/skl_prefill.h46
-rw-r--r--drivers/gpu/drm/i915/display/skl_scaler.c1032
-rw-r--r--drivers/gpu/drm/i915/display/skl_scaler.h52
-rw-r--r--drivers/gpu/drm/i915/display/skl_universal_plane.c2223
-rw-r--r--drivers/gpu/drm/i915/display/skl_universal_plane.h21
-rw-r--r--drivers/gpu/drm/i915/display/skl_universal_plane_regs.h585
-rw-r--r--drivers/gpu/drm/i915/display/skl_watermark.c4108
-rw-r--r--drivers/gpu/drm/i915/display/skl_watermark.h86
-rw-r--r--drivers/gpu/drm/i915/display/skl_watermark_regs.h87
-rw-r--r--drivers/gpu/drm/i915/display/vlv_clock.c88
-rw-r--r--drivers/gpu/drm/i915/display/vlv_clock.h38
-rw-r--r--drivers/gpu/drm/i915/display/vlv_dpio_phy_regs.h309
-rw-r--r--drivers/gpu/drm/i915/display/vlv_dsi.c1131
-rw-r--r--drivers/gpu/drm/i915/display/vlv_dsi.h31
-rw-r--r--drivers/gpu/drm/i915/display/vlv_dsi_pll.c374
-rw-r--r--drivers/gpu/drm/i915/display/vlv_dsi_pll.h52
-rw-r--r--drivers/gpu/drm/i915/display/vlv_dsi_pll_regs.h109
-rw-r--r--drivers/gpu/drm/i915/display/vlv_dsi_regs.h475
-rw-r--r--drivers/gpu/drm/i915/display/vlv_sideband.c50
-rw-r--r--drivers/gpu/drm/i915/display/vlv_sideband.h156
-rw-r--r--drivers/gpu/drm/i915/dma_resv_utils.c17
-rw-r--r--drivers/gpu/drm/i915/dma_resv_utils.h13
-rw-r--r--drivers/gpu/drm/i915/gem/i915_gem_busy.c98
-rw-r--r--drivers/gpu/drm/i915/gem/i915_gem_clflush.c52
-rw-r--r--drivers/gpu/drm/i915/gem/i915_gem_clflush.h3
-rw-r--r--drivers/gpu/drm/i915/gem/i915_gem_client_blt.c355
-rw-r--r--drivers/gpu/drm/i915/gem/i915_gem_client_blt.h21
-rw-r--r--drivers/gpu/drm/i915/gem/i915_gem_context.c2765
-rw-r--r--drivers/gpu/drm/i915/gem/i915_gem_context.h30
-rw-r--r--drivers/gpu/drm/i915/gem/i915_gem_context_types.h263
-rw-r--r--drivers/gpu/drm/i915/gem/i915_gem_create.c454
-rw-r--r--drivers/gpu/drm/i915/gem/i915_gem_create.h17
-rw-r--r--drivers/gpu/drm/i915/gem/i915_gem_dmabuf.c220
-rw-r--r--drivers/gpu/drm/i915/gem/i915_gem_dmabuf.h18
-rw-r--r--drivers/gpu/drm/i915/gem/i915_gem_domain.c208
-rw-r--r--drivers/gpu/drm/i915/gem/i915_gem_domain.h15
-rw-r--r--drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c2240
-rw-r--r--drivers/gpu/drm/i915/gem/i915_gem_fence.c95
-rw-r--r--drivers/gpu/drm/i915/gem/i915_gem_internal.c83
-rw-r--r--drivers/gpu/drm/i915/gem/i915_gem_internal.h23
-rw-r--r--drivers/gpu/drm/i915/gem/i915_gem_ioctls.h5
-rw-r--r--drivers/gpu/drm/i915/gem/i915_gem_lmem.c117
-rw-r--r--drivers/gpu/drm/i915/gem/i915_gem_lmem.h19
-rw-r--r--drivers/gpu/drm/i915/gem/i915_gem_mman.c447
-rw-r--r--drivers/gpu/drm/i915/gem/i915_gem_mman.h6
-rw-r--r--drivers/gpu/drm/i915/gem/i915_gem_object.c745
-rw-r--r--drivers/gpu/drm/i915/gem/i915_gem_object.h561
-rw-r--r--drivers/gpu/drm/i915/gem/i915_gem_object_blt.c455
-rw-r--r--drivers/gpu/drm/i915/gem/i915_gem_object_blt.h39
-rw-r--r--drivers/gpu/drm/i915/gem/i915_gem_object_frontbuffer.c103
-rw-r--r--drivers/gpu/drm/i915/gem/i915_gem_object_frontbuffer.h84
-rw-r--r--drivers/gpu/drm/i915/gem/i915_gem_object_types.h541
-rw-r--r--drivers/gpu/drm/i915/gem/i915_gem_pages.c351
-rw-r--r--drivers/gpu/drm/i915/gem/i915_gem_phys.c136
-rw-r--r--drivers/gpu/drm/i915/gem/i915_gem_pm.c142
-rw-r--r--drivers/gpu/drm/i915/gem/i915_gem_pm.h4
-rw-r--r--drivers/gpu/drm/i915/gem/i915_gem_region.c277
-rw-r--r--drivers/gpu/drm/i915/gem/i915_gem_region.h48
-rw-r--r--drivers/gpu/drm/i915/gem/i915_gem_shmem.c533
-rw-r--r--drivers/gpu/drm/i915/gem/i915_gem_shrinker.c245
-rw-r--r--drivers/gpu/drm/i915/gem/i915_gem_shrinker.h4
-rw-r--r--drivers/gpu/drm/i915/gem/i915_gem_stolen.c686
-rw-r--r--drivers/gpu/drm/i915/gem/i915_gem_stolen.h41
-rw-r--r--drivers/gpu/drm/i915/gem/i915_gem_throttle.c7
-rw-r--r--drivers/gpu/drm/i915/gem/i915_gem_tiling.c70
-rw-r--r--drivers/gpu/drm/i915/gem/i915_gem_tiling.h20
-rw-r--r--drivers/gpu/drm/i915/gem/i915_gem_ttm.c1381
-rw-r--r--drivers/gpu/drm/i915/gem/i915_gem_ttm.h107
-rw-r--r--drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c757
-rw-r--r--drivers/gpu/drm/i915/gem/i915_gem_ttm_move.h42
-rw-r--r--drivers/gpu/drm/i915/gem/i915_gem_ttm_pm.c233
-rw-r--r--drivers/gpu/drm/i915/gem/i915_gem_ttm_pm.h26
-rw-r--r--drivers/gpu/drm/i915/gem/i915_gem_userptr.c961
-rw-r--r--drivers/gpu/drm/i915/gem/i915_gem_wait.c190
-rw-r--r--drivers/gpu/drm/i915/gem/i915_gemfs.c53
-rw-r--r--drivers/gpu/drm/i915/gem/i915_gemfs.h6
-rw-r--r--drivers/gpu/drm/i915/gem/selftests/huge_gem_object.c12
-rw-r--r--drivers/gpu/drm/i915/gem/selftests/huge_pages.c666
-rw-r--r--drivers/gpu/drm/i915/gem/selftests/i915_gem_client_blt.c432
-rw-r--r--drivers/gpu/drm/i915/gem/selftests/i915_gem_coherency.c48
-rw-r--r--drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c482
-rw-r--r--drivers/gpu/drm/i915/gem/selftests/i915_gem_dmabuf.c298
-rw-r--r--drivers/gpu/drm/i915/gem/selftests/i915_gem_execbuffer.c186
-rw-r--r--drivers/gpu/drm/i915/gem/selftests/i915_gem_migrate.c528
-rw-r--r--drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c701
-rw-r--r--drivers/gpu/drm/i915/gem/selftests/i915_gem_object.c14
-rw-r--r--drivers/gpu/drm/i915/gem/selftests/i915_gem_object_blt.c597
-rw-r--r--drivers/gpu/drm/i915/gem/selftests/i915_gem_phys.c11
-rw-r--r--drivers/gpu/drm/i915/gem/selftests/igt_gem_utils.c32
-rw-r--r--drivers/gpu/drm/i915/gem/selftests/igt_gem_utils.h14
-rw-r--r--drivers/gpu/drm/i915/gem/selftests/mock_context.c100
-rw-r--r--drivers/gpu/drm/i915/gem/selftests/mock_context.h7
-rw-r--r--drivers/gpu/drm/i915/gem/selftests/mock_dmabuf.c10
-rw-r--r--drivers/gpu/drm/i915/gt/debugfs_engines.c36
-rw-r--r--drivers/gpu/drm/i915/gt/debugfs_engines.h14
-rw-r--r--drivers/gpu/drm/i915/gt/debugfs_gt.c47
-rw-r--r--drivers/gpu/drm/i915/gt/debugfs_gt.h38
-rw-r--r--drivers/gpu/drm/i915/gt/debugfs_gt_pm.c630
-rw-r--r--drivers/gpu/drm/i915/gt/debugfs_gt_pm.h14
-rw-r--r--drivers/gpu/drm/i915/gt/gen2_engine_cs.c39
-rw-r--r--drivers/gpu/drm/i915/gt/gen2_engine_cs.h6
-rw-r--r--drivers/gpu/drm/i915/gt/gen6_engine_cs.c9
-rw-r--r--drivers/gpu/drm/i915/gt/gen6_ppgtt.c213
-rw-r--r--drivers/gpu/drm/i915/gt/gen6_ppgtt.h6
-rw-r--r--drivers/gpu/drm/i915/gt/gen6_renderstate.c20
-rw-r--r--drivers/gpu/drm/i915/gt/gen7_renderclear.c9
-rw-r--r--drivers/gpu/drm/i915/gt/gen7_renderstate.c20
-rw-r--r--drivers/gpu/drm/i915/gt/gen8_engine_cs.c404
-rw-r--r--drivers/gpu/drm/i915/gt/gen8_engine_cs.h31
-rw-r--r--drivers/gpu/drm/i915/gt/gen8_ppgtt.c545
-rw-r--r--drivers/gpu/drm/i915/gt/gen8_ppgtt.h10
-rw-r--r--drivers/gpu/drm/i915/gt/gen8_renderstate.c20
-rw-r--r--drivers/gpu/drm/i915/gt/gen9_renderstate.c20
-rw-r--r--drivers/gpu/drm/i915/gt/intel_breadcrumbs.c101
-rw-r--r--drivers/gpu/drm/i915/gt/intel_breadcrumbs.h16
-rw-r--r--drivers/gpu/drm/i915/gt/intel_breadcrumbs_types.h10
-rw-r--r--drivers/gpu/drm/i915/gt/intel_context.c222
-rw-r--r--drivers/gpu/drm/i915/gt/intel_context.h185
-rw-r--r--drivers/gpu/drm/i915/gt/intel_context_param.c63
-rw-r--r--drivers/gpu/drm/i915/gt/intel_context_param.h11
-rw-r--r--drivers/gpu/drm/i915/gt/intel_context_sseu.c3
-rw-r--r--drivers/gpu/drm/i915/gt/intel_context_types.h214
-rw-r--r--drivers/gpu/drm/i915/gt/intel_engine.h178
-rw-r--r--drivers/gpu/drm/i915/gt/intel_engine_cs.c1479
-rw-r--r--drivers/gpu/drm/i915/gt/intel_engine_heartbeat.c153
-rw-r--r--drivers/gpu/drm/i915/gt/intel_engine_heartbeat.h7
-rw-r--r--drivers/gpu/drm/i915/gt/intel_engine_pm.c124
-rw-r--r--drivers/gpu/drm/i915/gt/intel_engine_pm.h43
-rw-r--r--drivers/gpu/drm/i915/gt/intel_engine_regs.h269
-rw-r--r--drivers/gpu/drm/i915/gt/intel_engine_stats.h33
-rw-r--r--drivers/gpu/drm/i915/gt/intel_engine_types.h317
-rw-r--r--drivers/gpu/drm/i915/gt/intel_engine_user.c82
-rw-r--r--drivers/gpu/drm/i915/gt/intel_engine_user.h3
-rw-r--r--drivers/gpu/drm/i915/gt/intel_execlists_submission.c1116
-rw-r--r--drivers/gpu/drm/i915/gt/intel_execlists_submission.h15
-rw-r--r--drivers/gpu/drm/i915/gt/intel_ggtt.c1361
-rw-r--r--drivers/gpu/drm/i915/gt/intel_ggtt_fencing.c112
-rw-r--r--drivers/gpu/drm/i915/gt/intel_ggtt_fencing.h21
-rw-r--r--drivers/gpu/drm/i915/gt/intel_ggtt_gmch.c140
-rw-r--r--drivers/gpu/drm/i915/gt/intel_ggtt_gmch.h27
-rw-r--r--drivers/gpu/drm/i915/gt/intel_gpu_commands.h125
-rw-r--r--drivers/gpu/drm/i915/gt/intel_gsc.c321
-rw-r--r--drivers/gpu/drm/i915/gt/intel_gsc.h43
-rw-r--r--drivers/gpu/drm/i915/gt/intel_gt.c602
-rw-r--r--drivers/gpu/drm/i915/gt/intel_gt.h142
-rw-r--r--drivers/gpu/drm/i915/gt/intel_gt_buffer_pool.c69
-rw-r--r--drivers/gpu/drm/i915/gt/intel_gt_buffer_pool.h5
-rw-r--r--drivers/gpu/drm/i915/gt/intel_gt_buffer_pool_types.h4
-rw-r--r--drivers/gpu/drm/i915/gt/intel_gt_ccs_mode.c39
-rw-r--r--drivers/gpu/drm/i915/gt/intel_gt_ccs_mode.h13
-rw-r--r--drivers/gpu/drm/i915/gt/intel_gt_clock_utils.c224
-rw-r--r--drivers/gpu/drm/i915/gt/intel_gt_debugfs.c122
-rw-r--r--drivers/gpu/drm/i915/gt/intel_gt_debugfs.h53
-rw-r--r--drivers/gpu/drm/i915/gt/intel_gt_defines.h11
-rw-r--r--drivers/gpu/drm/i915/gt/intel_gt_engines_debugfs.c36
-rw-r--r--drivers/gpu/drm/i915/gt/intel_gt_engines_debugfs.h14
-rw-r--r--drivers/gpu/drm/i915/gt/intel_gt_irq.c329
-rw-r--r--drivers/gpu/drm/i915/gt/intel_gt_irq.h26
-rw-r--r--drivers/gpu/drm/i915/gt/intel_gt_mcr.c870
-rw-r--r--drivers/gpu/drm/i915/gt/intel_gt_mcr.h71
-rw-r--r--drivers/gpu/drm/i915/gt/intel_gt_pm.c109
-rw-r--r--drivers/gpu/drm/i915/gt/intel_gt_pm.h65
-rw-r--r--drivers/gpu/drm/i915/gt/intel_gt_pm_debugfs.c601
-rw-r--r--drivers/gpu/drm/i915/gt/intel_gt_pm_debugfs.h20
-rw-r--r--drivers/gpu/drm/i915/gt/intel_gt_pm_irq.c22
-rw-r--r--drivers/gpu/drm/i915/gt/intel_gt_pm_irq.h3
-rw-r--r--drivers/gpu/drm/i915/gt/intel_gt_print.h57
-rw-r--r--drivers/gpu/drm/i915/gt/intel_gt_regs.h1641
-rw-r--r--drivers/gpu/drm/i915/gt/intel_gt_requests.c68
-rw-r--r--drivers/gpu/drm/i915/gt/intel_gt_requests.h12
-rw-r--r--drivers/gpu/drm/i915/gt/intel_gt_sysfs.c116
-rw-r--r--drivers/gpu/drm/i915/gt/intel_gt_sysfs.h31
-rw-r--r--drivers/gpu/drm/i915/gt/intel_gt_sysfs_pm.c930
-rw-r--r--drivers/gpu/drm/i915/gt/intel_gt_sysfs_pm.h15
-rw-r--r--drivers/gpu/drm/i915/gt/intel_gt_types.h179
-rw-r--r--drivers/gpu/drm/i915/gt/intel_gtt.c415
-rw-r--r--drivers/gpu/drm/i915/gt/intel_gtt.h272
-rw-r--r--drivers/gpu/drm/i915/gt/intel_hwconfig.h21
-rw-r--r--drivers/gpu/drm/i915/gt/intel_llc.c41
-rw-r--r--drivers/gpu/drm/i915/gt/intel_llc.h3
-rw-r--r--drivers/gpu/drm/i915/gt/intel_llc_types.h3
-rw-r--r--drivers/gpu/drm/i915/gt/intel_lrc.c670
-rw-r--r--drivers/gpu/drm/i915/gt/intel_lrc.h81
-rw-r--r--drivers/gpu/drm/i915/gt/intel_lrc_reg.h23
-rw-r--r--drivers/gpu/drm/i915/gt/intel_migrate.c1164
-rw-r--r--drivers/gpu/drm/i915/gt/intel_migrate.h65
-rw-r--r--drivers/gpu/drm/i915/gt/intel_migrate_types.h15
-rw-r--r--drivers/gpu/drm/i915/gt/intel_mocs.c252
-rw-r--r--drivers/gpu/drm/i915/gt/intel_mocs.h23
-rw-r--r--drivers/gpu/drm/i915/gt/intel_ppgtt.c90
-rw-r--r--drivers/gpu/drm/i915/gt/intel_rc6.c358
-rw-r--r--drivers/gpu/drm/i915/gt/intel_rc6.h16
-rw-r--r--drivers/gpu/drm/i915/gt/intel_rc6_types.h20
-rw-r--r--drivers/gpu/drm/i915/gt/intel_region_lmem.c290
-rw-r--r--drivers/gpu/drm/i915/gt/intel_region_lmem.h5
-rw-r--r--drivers/gpu/drm/i915/gt/intel_renderstate.c41
-rw-r--r--drivers/gpu/drm/i915/gt/intel_renderstate.h21
-rw-r--r--drivers/gpu/drm/i915/gt/intel_reset.c744
-rw-r--r--drivers/gpu/drm/i915/gt/intel_reset.h11
-rw-r--r--drivers/gpu/drm/i915/gt/intel_reset_types.h9
-rw-r--r--drivers/gpu/drm/i915/gt/intel_ring.c52
-rw-r--r--drivers/gpu/drm/i915/gt/intel_ring.h6
-rw-r--r--drivers/gpu/drm/i915/gt/intel_ring_submission.c557
-rw-r--r--drivers/gpu/drm/i915/gt/intel_ring_types.h3
-rw-r--r--drivers/gpu/drm/i915/gt/intel_rps.c1189
-rw-r--r--drivers/gpu/drm/i915/gt/intel_rps.h37
-rw-r--r--drivers/gpu/drm/i915/gt/intel_rps_types.h22
-rw-r--r--drivers/gpu/drm/i915/gt/intel_sa_media.c48
-rw-r--r--drivers/gpu/drm/i915/gt/intel_sa_media.h15
-rw-r--r--drivers/gpu/drm/i915/gt/intel_sseu.c601
-rw-r--r--drivers/gpu/drm/i915/gt/intel_sseu.h128
-rw-r--r--drivers/gpu/drm/i915/gt/intel_sseu_debugfs.c89
-rw-r--r--drivers/gpu/drm/i915/gt/intel_timeline.c446
-rw-r--r--drivers/gpu/drm/i915/gt/intel_timeline.h24
-rw-r--r--drivers/gpu/drm/i915/gt/intel_timeline_types.h20
-rw-r--r--drivers/gpu/drm/i915/gt/intel_tlb.c173
-rw-r--r--drivers/gpu/drm/i915/gt/intel_tlb.h29
-rw-r--r--drivers/gpu/drm/i915/gt/intel_wopcm.c324
-rw-r--r--drivers/gpu/drm/i915/gt/intel_wopcm.h (renamed from drivers/gpu/drm/i915/intel_wopcm.h)3
-rw-r--r--drivers/gpu/drm/i915/gt/intel_workarounds.c2187
-rw-r--r--drivers/gpu/drm/i915/gt/intel_workarounds.h5
-rw-r--r--drivers/gpu/drm/i915/gt/intel_workarounds_types.h16
-rw-r--r--drivers/gpu/drm/i915/gt/mock_engine.c158
-rw-r--r--drivers/gpu/drm/i915/gt/mock_engine.h21
-rw-r--r--drivers/gpu/drm/i915/gt/selftest_context.c26
-rw-r--r--drivers/gpu/drm/i915/gt/selftest_engine.c5
-rw-r--r--drivers/gpu/drm/i915/gt/selftest_engine.h3
-rw-r--r--drivers/gpu/drm/i915/gt/selftest_engine_cs.c88
-rw-r--r--drivers/gpu/drm/i915/gt/selftest_engine_heartbeat.c161
-rw-r--r--drivers/gpu/drm/i915/gt/selftest_engine_heartbeat.h2
-rw-r--r--drivers/gpu/drm/i915/gt/selftest_engine_pm.c56
-rw-r--r--drivers/gpu/drm/i915/gt/selftest_execlists.c669
-rw-r--r--drivers/gpu/drm/i915/gt/selftest_gt_pm.c62
-rw-r--r--drivers/gpu/drm/i915/gt/selftest_hangcheck.c509
-rw-r--r--drivers/gpu/drm/i915/gt/selftest_llc.c13
-rw-r--r--drivers/gpu/drm/i915/gt/selftest_llc.h3
-rw-r--r--drivers/gpu/drm/i915/gt/selftest_lrc.c354
-rw-r--r--drivers/gpu/drm/i915/gt/selftest_migrate.c1039
-rw-r--r--drivers/gpu/drm/i915/gt/selftest_mocs.c108
-rw-r--r--drivers/gpu/drm/i915/gt/selftest_rc6.c96
-rw-r--r--drivers/gpu/drm/i915/gt/selftest_rc6.h3
-rw-r--r--drivers/gpu/drm/i915/gt/selftest_reset.c28
-rw-r--r--drivers/gpu/drm/i915/gt/selftest_ring_submission.c16
-rw-r--r--drivers/gpu/drm/i915/gt/selftest_rps.c102
-rw-r--r--drivers/gpu/drm/i915/gt/selftest_slpc.c557
-rw-r--r--drivers/gpu/drm/i915/gt/selftest_timeline.c210
-rw-r--r--drivers/gpu/drm/i915/gt/selftest_tlb.c405
-rw-r--r--drivers/gpu/drm/i915/gt/selftest_workarounds.c306
-rw-r--r--drivers/gpu/drm/i915/gt/shaders/README6
-rw-r--r--drivers/gpu/drm/i915/gt/shaders/clear_kernel/hsw.asm2
-rw-r--r--drivers/gpu/drm/i915/gt/shaders/clear_kernel/ivb.asm2
-rw-r--r--drivers/gpu/drm/i915/gt/shmem_utils.c45
-rw-r--r--drivers/gpu/drm/i915/gt/shmem_utils.h3
-rw-r--r--drivers/gpu/drm/i915/gt/sysfs_engines.c103
-rw-r--r--drivers/gpu/drm/i915/gt/uc/abi/guc_actions_abi.h217
-rw-r--r--drivers/gpu/drm/i915/gt/uc/abi/guc_actions_slpc_abi.h275
-rw-r--r--drivers/gpu/drm/i915/gt/uc/abi/guc_communication_ctb_abi.h170
-rw-r--r--drivers/gpu/drm/i915/gt/uc/abi/guc_communication_mmio_abi.h49
-rw-r--r--drivers/gpu/drm/i915/gt/uc/abi/guc_errors_abi.h60
-rw-r--r--drivers/gpu/drm/i915/gt/uc/abi/guc_klvs_abi.h113
-rw-r--r--drivers/gpu/drm/i915/gt/uc/abi/guc_messages_abi.h264
-rw-r--r--drivers/gpu/drm/i915/gt/uc/guc_capture_fwif.h226
-rw-r--r--drivers/gpu/drm/i915/gt/uc/intel_gsc_binary_headers.h135
-rw-r--r--drivers/gpu/drm/i915/gt/uc/intel_gsc_fw.c524
-rw-r--r--drivers/gpu/drm/i915/gt/uc/intel_gsc_fw.h21
-rw-r--r--drivers/gpu/drm/i915/gt/uc/intel_gsc_proxy.c429
-rw-r--r--drivers/gpu/drm/i915/gt/uc/intel_gsc_proxy.h18
-rw-r--r--drivers/gpu/drm/i915/gt/uc/intel_gsc_uc.c358
-rw-r--r--drivers/gpu/drm/i915/gt/uc/intel_gsc_uc.h85
-rw-r--r--drivers/gpu/drm/i915/gt/uc/intel_gsc_uc_debugfs.c39
-rw-r--r--drivers/gpu/drm/i915/gt/uc/intel_gsc_uc_debugfs.h14
-rw-r--r--drivers/gpu/drm/i915/gt/uc/intel_gsc_uc_heci_cmd_submit.c231
-rw-r--r--drivers/gpu/drm/i915/gt/uc/intel_gsc_uc_heci_cmd_submit.h93
-rw-r--r--drivers/gpu/drm/i915/gt/uc/intel_guc.c611
-rw-r--r--drivers/gpu/drm/i915/gt/uc/intel_guc.h407
-rw-r--r--drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c976
-rw-r--r--drivers/gpu/drm/i915/gt/uc/intel_guc_ads.h10
-rw-r--r--drivers/gpu/drm/i915/gt/uc/intel_guc_capture.c1654
-rw-r--r--drivers/gpu/drm/i915/gt/uc/intel_guc_capture.h35
-rw-r--r--drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c1355
-rw-r--r--drivers/gpu/drm/i915/gt/uc/intel_guc_ct.h67
-rw-r--r--drivers/gpu/drm/i915/gt/uc/intel_guc_debugfs.c114
-rw-r--r--drivers/gpu/drm/i915/gt/uc/intel_guc_fw.c259
-rw-r--r--drivers/gpu/drm/i915/gt/uc/intel_guc_fwif.h627
-rw-r--r--drivers/gpu/drm/i915/gt/uc/intel_guc_hwconfig.c167
-rw-r--r--drivers/gpu/drm/i915/gt/uc/intel_guc_log.c429
-rw-r--r--drivers/gpu/drm/i915/gt/uc/intel_guc_log.h49
-rw-r--r--drivers/gpu/drm/i915/gt/uc/intel_guc_log_debugfs.c67
-rw-r--r--drivers/gpu/drm/i915/gt/uc/intel_guc_print.h51
-rw-r--r--drivers/gpu/drm/i915/gt/uc/intel_guc_rc.c81
-rw-r--r--drivers/gpu/drm/i915/gt/uc/intel_guc_rc.h31
-rw-r--r--drivers/gpu/drm/i915/gt/uc/intel_guc_reg.h23
-rw-r--r--drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c907
-rw-r--r--drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.h51
-rw-r--r--drivers/gpu/drm/i915/gt/uc/intel_guc_slpc_types.h52
-rw-r--r--drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c5834
-rw-r--r--drivers/gpu/drm/i915/gt/uc/intel_guc_submission.h25
-rw-r--r--drivers/gpu/drm/i915/gt/uc/intel_huc.c629
-rw-r--r--drivers/gpu/drm/i915/gt/uc/intel_huc.h63
-rw-r--r--drivers/gpu/drm/i915/gt/uc/intel_huc_debugfs.c6
-rw-r--r--drivers/gpu/drm/i915/gt/uc/intel_huc_fw.c260
-rw-r--r--drivers/gpu/drm/i915/gt/uc/intel_huc_fw.h7
-rw-r--r--drivers/gpu/drm/i915/gt/uc/intel_huc_print.h21
-rw-r--r--drivers/gpu/drm/i915/gt/uc/intel_uc.c374
-rw-r--r--drivers/gpu/drm/i915/gt/uc/intel_uc.h21
-rw-r--r--drivers/gpu/drm/i915/gt/uc/intel_uc_debugfs.c30
-rw-r--r--drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c1222
-rw-r--r--drivers/gpu/drm/i915/gt/uc/intel_uc_fw.h99
-rw-r--r--drivers/gpu/drm/i915/gt/uc/intel_uc_fw_abi.h8
-rw-r--r--drivers/gpu/drm/i915/gt/uc/selftest_guc.c419
-rw-r--r--drivers/gpu/drm/i915/gt/uc/selftest_guc_hangcheck.c162
-rw-r--r--drivers/gpu/drm/i915/gt/uc/selftest_guc_multi_lrc.c184
-rw-r--r--drivers/gpu/drm/i915/gvt/Makefile30
-rw-r--r--drivers/gpu/drm/i915/gvt/aperture_gm.c38
-rw-r--r--drivers/gpu/drm/i915/gvt/cfg_space.c98
-rw-r--r--drivers/gpu/drm/i915/gvt/cmd_parser.c97
-rw-r--r--drivers/gpu/drm/i915/gvt/debugfs.c57
-rw-r--r--drivers/gpu/drm/i915/gvt/display.c209
-rw-r--r--drivers/gpu/drm/i915/gvt/display.h69
-rw-r--r--drivers/gpu/drm/i915/gvt/dmabuf.c85
-rw-r--r--drivers/gpu/drm/i915/gvt/dmabuf.h2
-rw-r--r--drivers/gpu/drm/i915/gvt/edid.c59
-rw-r--r--drivers/gpu/drm/i915/gvt/edid.h12
-rw-r--r--drivers/gpu/drm/i915/gvt/execlist.c12
-rw-r--r--drivers/gpu/drm/i915/gvt/fb_decoder.c211
-rw-r--r--drivers/gpu/drm/i915/gvt/fb_decoder.h15
-rw-r--r--drivers/gpu/drm/i915/gvt/firmware.c60
-rw-r--r--drivers/gpu/drm/i915/gvt/gtt.c319
-rw-r--r--drivers/gpu/drm/i915/gvt/gtt.h8
-rw-r--r--drivers/gpu/drm/i915/gvt/gvt.c458
-rw-r--r--drivers/gpu/drm/i915/gvt/gvt.h222
-rw-r--r--drivers/gpu/drm/i915/gvt/handlers.c1620
-rw-r--r--drivers/gpu/drm/i915/gvt/hypercall.h82
-rw-r--r--drivers/gpu/drm/i915/gvt/interrupt.c110
-rw-r--r--drivers/gpu/drm/i915/gvt/interrupt.h42
-rw-r--r--drivers/gpu/drm/i915/gvt/kvmgt.c1598
-rw-r--r--drivers/gpu/drm/i915/gvt/mmio.c18
-rw-r--r--drivers/gpu/drm/i915/gvt/mmio.h6
-rw-r--r--drivers/gpu/drm/i915/gvt/mmio_context.c53
-rw-r--r--drivers/gpu/drm/i915/gvt/mmio_context.h12
-rw-r--r--drivers/gpu/drm/i915/gvt/mpt.h400
-rw-r--r--drivers/gpu/drm/i915/gvt/opregion.c159
-rw-r--r--drivers/gpu/drm/i915/gvt/page_track.c22
-rw-r--r--drivers/gpu/drm/i915/gvt/reg.h13
-rw-r--r--drivers/gpu/drm/i915/gvt/sched_policy.c10
-rw-r--r--drivers/gpu/drm/i915/gvt/scheduler.c82
-rw-r--r--drivers/gpu/drm/i915/gvt/scheduler.h4
-rw-r--r--drivers/gpu/drm/i915/gvt/trace.h4
-rw-r--r--drivers/gpu/drm/i915/gvt/vgpu.c257
-rw-r--r--drivers/gpu/drm/i915/i915_active.c246
-rw-r--r--drivers/gpu/drm/i915/i915_active.h46
-rw-r--r--drivers/gpu/drm/i915/i915_active_types.h7
-rw-r--r--drivers/gpu/drm/i915/i915_buddy.c432
-rw-r--r--drivers/gpu/drm/i915/i915_buddy.h128
-rw-r--r--drivers/gpu/drm/i915/i915_cmd_parser.c219
-rw-r--r--drivers/gpu/drm/i915/i915_cmd_parser.h26
-rw-r--r--drivers/gpu/drm/i915/i915_config.c7
-rw-r--r--drivers/gpu/drm/i915/i915_config.h23
-rw-r--r--drivers/gpu/drm/i915/i915_debugfs.c640
-rw-r--r--drivers/gpu/drm/i915/i915_debugfs_params.c73
-rw-r--r--drivers/gpu/drm/i915/i915_deps.c237
-rw-r--r--drivers/gpu/drm/i915/i915_deps.h45
-rw-r--r--drivers/gpu/drm/i915/i915_driver.c1847
-rw-r--r--drivers/gpu/drm/i915/i915_driver.h33
-rw-r--r--drivers/gpu/drm/i915/i915_drm_client.c220
-rw-r--r--drivers/gpu/drm/i915/i915_drm_client.h92
-rw-r--r--drivers/gpu/drm/i915/i915_drv.c1765
-rw-r--r--drivers/gpu/drm/i915/i915_drv.h1957
-rw-r--r--drivers/gpu/drm/i915/i915_file_private.h111
-rw-r--r--drivers/gpu/drm/i915/i915_gem.c635
-rw-r--r--drivers/gpu/drm/i915/i915_gem.h101
-rw-r--r--drivers/gpu/drm/i915/i915_gem_evict.c175
-rw-r--r--drivers/gpu/drm/i915/i915_gem_evict.h30
-rw-r--r--drivers/gpu/drm/i915/i915_gem_gtt.c32
-rw-r--r--drivers/gpu/drm/i915/i915_gem_gtt.h7
-rw-r--r--drivers/gpu/drm/i915/i915_gem_ww.c63
-rw-r--r--drivers/gpu/drm/i915/i915_gem_ww.h41
-rw-r--r--drivers/gpu/drm/i915/i915_getparam.c57
-rw-r--r--drivers/gpu/drm/i915/i915_getparam.h15
-rw-r--r--drivers/gpu/drm/i915/i915_globals.c161
-rw-r--r--drivers/gpu/drm/i915/i915_globals.h38
-rw-r--r--drivers/gpu/drm/i915/i915_gpu_error.c1323
-rw-r--r--drivers/gpu/drm/i915/i915_gpu_error.h135
-rw-r--r--drivers/gpu/drm/i915/i915_gtt_view_types.h59
-rw-r--r--drivers/gpu/drm/i915/i915_hwmon.c994
-rw-r--r--drivers/gpu/drm/i915/i915_hwmon.h27
-rw-r--r--drivers/gpu/drm/i915/i915_ioc32.c1
-rw-r--r--drivers/gpu/drm/i915/i915_ioctl.c94
-rw-r--r--drivers/gpu/drm/i915/i915_ioctl.h14
-rw-r--r--drivers/gpu/drm/i915/i915_iosf_mbi.h36
-rw-r--r--drivers/gpu/drm/i915/i915_irq.c4025
-rw-r--r--drivers/gpu/drm/i915/i915_irq.h133
-rw-r--r--drivers/gpu/drm/i915/i915_jiffies.h16
-rw-r--r--drivers/gpu/drm/i915/i915_list_util.h23
-rw-r--r--drivers/gpu/drm/i915/i915_memcpy.c6
-rw-r--r--drivers/gpu/drm/i915/i915_memcpy.h2
-rw-r--r--drivers/gpu/drm/i915/i915_mitigations.c1
-rw-r--r--drivers/gpu/drm/i915/i915_mm.c44
-rw-r--r--drivers/gpu/drm/i915/i915_mm.h36
-rw-r--r--drivers/gpu/drm/i915/i915_mmio_range.c18
-rw-r--r--drivers/gpu/drm/i915/i915_mmio_range.h19
-rw-r--r--drivers/gpu/drm/i915/i915_module.c129
-rw-r--r--drivers/gpu/drm/i915/i915_params.c215
-rw-r--r--drivers/gpu/drm/i915/i915_params.h36
-rw-r--r--drivers/gpu/drm/i915/i915_pci.c863
-rw-r--r--drivers/gpu/drm/i915/i915_pci.h18
-rw-r--r--drivers/gpu/drm/i915/i915_perf.c1757
-rw-r--r--drivers/gpu/drm/i915/i915_perf.h8
-rw-r--r--drivers/gpu/drm/i915/i915_perf_oa_regs.h215
-rw-r--r--drivers/gpu/drm/i915/i915_perf_types.h130
-rw-r--r--drivers/gpu/drm/i915/i915_pmu.c629
-rw-r--r--drivers/gpu/drm/i915/i915_pmu.h45
-rw-r--r--drivers/gpu/drm/i915/i915_priolist_types.h11
-rw-r--r--drivers/gpu/drm/i915/i915_ptr_util.h66
-rw-r--r--drivers/gpu/drm/i915/i915_query.c230
-rw-r--r--drivers/gpu/drm/i915/i915_reg.h11983
-rw-r--r--drivers/gpu/drm/i915/i915_reg_defs.h229
-rw-r--r--drivers/gpu/drm/i915/i915_request.c738
-rw-r--r--drivers/gpu/drm/i915/i915_request.h188
-rw-r--r--drivers/gpu/drm/i915/i915_scatterlist.c203
-rw-r--r--drivers/gpu/drm/i915/i915_scatterlist.h134
-rw-r--r--drivers/gpu/drm/i915/i915_scheduler.c270
-rw-r--r--drivers/gpu/drm/i915/i915_scheduler.h71
-rw-r--r--drivers/gpu/drm/i915/i915_scheduler_types.h111
-rw-r--r--drivers/gpu/drm/i915/i915_selftest.h4
-rw-r--r--drivers/gpu/drm/i915/i915_suspend.c121
-rw-r--r--drivers/gpu/drm/i915/i915_suspend.h14
-rw-r--r--drivers/gpu/drm/i915/i915_sw_fence.c105
-rw-r--r--drivers/gpu/drm/i915/i915_sw_fence.h30
-rw-r--r--drivers/gpu/drm/i915/i915_sw_fence_work.c7
-rw-r--r--drivers/gpu/drm/i915/i915_sw_fence_work.h2
-rw-r--r--drivers/gpu/drm/i915/i915_switcheroo.c18
-rw-r--r--drivers/gpu/drm/i915/i915_syncmap.c19
-rw-r--r--drivers/gpu/drm/i915/i915_sysfs.c464
-rw-r--r--drivers/gpu/drm/i915/i915_sysfs.h3
-rw-r--r--drivers/gpu/drm/i915/i915_tasklet.h43
-rw-r--r--drivers/gpu/drm/i915/i915_timer_util.c36
-rw-r--r--drivers/gpu/drm/i915/i915_timer_util.h23
-rw-r--r--drivers/gpu/drm/i915/i915_trace.h719
-rw-r--r--drivers/gpu/drm/i915/i915_ttm_buddy_manager.c440
-rw-r--r--drivers/gpu/drm/i915/i915_ttm_buddy_manager.h72
-rw-r--r--drivers/gpu/drm/i915/i915_user_extensions.c2
-rw-r--r--drivers/gpu/drm/i915/i915_utils.c92
-rw-r--r--drivers/gpu/drm/i915/i915_utils.h377
-rw-r--r--drivers/gpu/drm/i915/i915_vgpu.c6
-rw-r--r--drivers/gpu/drm/i915/i915_vma.c1429
-rw-r--r--drivers/gpu/drm/i915/i915_vma.h195
-rw-r--r--drivers/gpu/drm/i915/i915_vma_resource.c425
-rw-r--r--drivers/gpu/drm/i915/i915_vma_resource.h261
-rw-r--r--drivers/gpu/drm/i915/i915_vma_types.h102
-rw-r--r--drivers/gpu/drm/i915/i915_wait_util.h119
-rw-r--r--drivers/gpu/drm/i915/intel_clock_gating.c801
-rw-r--r--drivers/gpu/drm/i915/intel_clock_gating.h14
-rw-r--r--drivers/gpu/drm/i915/intel_cpu_info.c44
-rw-r--r--drivers/gpu/drm/i915/intel_cpu_info.h13
-rw-r--r--drivers/gpu/drm/i915/intel_device_info.c457
-rw-r--r--drivers/gpu/drm/i915/intel_device_info.h187
-rw-r--r--drivers/gpu/drm/i915/intel_dram.c558
-rw-r--r--drivers/gpu/drm/i915/intel_dram.h14
-rw-r--r--drivers/gpu/drm/i915/intel_gvt.c259
-rw-r--r--drivers/gpu/drm/i915/intel_gvt.h32
-rw-r--r--drivers/gpu/drm/i915/intel_gvt_mmio_table.c1320
-rw-r--r--drivers/gpu/drm/i915/intel_mchbar_regs.h258
-rw-r--r--drivers/gpu/drm/i915/intel_memory_region.c422
-rw-r--r--drivers/gpu/drm/i915/intel_memory_region.h115
-rw-r--r--drivers/gpu/drm/i915/intel_pch.c265
-rw-r--r--drivers/gpu/drm/i915/intel_pch.h86
-rw-r--r--drivers/gpu/drm/i915/intel_pci_config.h110
-rw-r--r--drivers/gpu/drm/i915/intel_pcode.c301
-rw-r--r--drivers/gpu/drm/i915/intel_pcode.h39
-rw-r--r--drivers/gpu/drm/i915/intel_pm.c7848
-rw-r--r--drivers/gpu/drm/i915/intel_pm.h101
-rw-r--r--drivers/gpu/drm/i915/intel_region_ttm.c266
-rw-r--r--drivers/gpu/drm/i915/intel_region_ttm.h44
-rw-r--r--drivers/gpu/drm/i915/intel_runtime_pm.c401
-rw-r--r--drivers/gpu/drm/i915/intel_runtime_pm.h73
-rw-r--r--drivers/gpu/drm/i915/intel_sideband.c572
-rw-r--r--drivers/gpu/drm/i915/intel_sideband.h143
-rw-r--r--drivers/gpu/drm/i915/intel_step.c272
-rw-r--r--drivers/gpu/drm/i915/intel_step.h80
-rw-r--r--drivers/gpu/drm/i915/intel_uncore.c1441
-rw-r--r--drivers/gpu/drm/i915/intel_uncore.h157
-rw-r--r--drivers/gpu/drm/i915/intel_uncore_trace.c7
-rw-r--r--drivers/gpu/drm/i915/intel_uncore_trace.h49
-rw-r--r--drivers/gpu/drm/i915/intel_wakeref.c118
-rw-r--r--drivers/gpu/drm/i915/intel_wakeref.h123
-rw-r--r--drivers/gpu/drm/i915/intel_wopcm.c277
-rw-r--r--drivers/gpu/drm/i915/pxp/intel_pxp.c544
-rw-r--r--drivers/gpu/drm/i915/pxp/intel_pxp.h38
-rw-r--r--drivers/gpu/drm/i915/pxp/intel_pxp_cmd.c143
-rw-r--r--drivers/gpu/drm/i915/pxp/intel_pxp_cmd.h15
-rw-r--r--drivers/gpu/drm/i915/pxp/intel_pxp_cmd_interface_42.h43
-rw-r--r--drivers/gpu/drm/i915/pxp/intel_pxp_cmd_interface_43.h61
-rw-r--r--drivers/gpu/drm/i915/pxp/intel_pxp_cmd_interface_cmn.h42
-rw-r--r--drivers/gpu/drm/i915/pxp/intel_pxp_debugfs.c91
-rw-r--r--drivers/gpu/drm/i915/pxp/intel_pxp_debugfs.h21
-rw-r--r--drivers/gpu/drm/i915/pxp/intel_pxp_gsccs.c448
-rw-r--r--drivers/gpu/drm/i915/pxp/intel_pxp_gsccs.h45
-rw-r--r--drivers/gpu/drm/i915/pxp/intel_pxp_huc.c73
-rw-r--r--drivers/gpu/drm/i915/pxp/intel_pxp_huc.h13
-rw-r--r--drivers/gpu/drm/i915/pxp/intel_pxp_irq.c109
-rw-r--r--drivers/gpu/drm/i915/pxp/intel_pxp_irq.h40
-rw-r--r--drivers/gpu/drm/i915/pxp/intel_pxp_pm.c80
-rw-r--r--drivers/gpu/drm/i915/pxp/intel_pxp_pm.h38
-rw-r--r--drivers/gpu/drm/i915/pxp/intel_pxp_regs.h27
-rw-r--r--drivers/gpu/drm/i915/pxp/intel_pxp_session.c192
-rw-r--r--drivers/gpu/drm/i915/pxp/intel_pxp_session.h25
-rw-r--r--drivers/gpu/drm/i915/pxp/intel_pxp_tee.c413
-rw-r--r--drivers/gpu/drm/i915/pxp/intel_pxp_tee.h22
-rw-r--r--drivers/gpu/drm/i915/pxp/intel_pxp_types.h130
-rw-r--r--drivers/gpu/drm/i915/selftests/i915_active.c17
-rw-r--r--drivers/gpu/drm/i915/selftests/i915_buddy.c741
-rw-r--r--drivers/gpu/drm/i915/selftests/i915_gem.c55
-rw-r--r--drivers/gpu/drm/i915/selftests/i915_gem_evict.c60
-rw-r--r--drivers/gpu/drm/i915/selftests/i915_gem_gtt.c840
-rw-r--r--drivers/gpu/drm/i915/selftests/i915_live_selftests.h9
-rw-r--r--drivers/gpu/drm/i915/selftests/i915_mock_selftests.h2
-rw-r--r--drivers/gpu/drm/i915/selftests/i915_perf.c31
-rw-r--r--drivers/gpu/drm/i915/selftests/i915_perf_selftests.h2
-rw-r--r--drivers/gpu/drm/i915/selftests/i915_random.h2
-rw-r--r--drivers/gpu/drm/i915/selftests/i915_request.c800
-rw-r--r--drivers/gpu/drm/i915/selftests/i915_selftest.c95
-rw-r--r--drivers/gpu/drm/i915/selftests/i915_sw_fence.c18
-rw-r--r--drivers/gpu/drm/i915/selftests/i915_syncmap.c2
-rw-r--r--drivers/gpu/drm/i915/selftests/i915_vma.c339
-rw-r--r--drivers/gpu/drm/i915/selftests/igt_flush_test.c38
-rw-r--r--drivers/gpu/drm/i915/selftests/igt_live_test.c50
-rw-r--r--drivers/gpu/drm/i915/selftests/igt_live_test.h3
-rw-r--r--drivers/gpu/drm/i915/selftests/igt_mmap.c25
-rw-r--r--drivers/gpu/drm/i915/selftests/igt_mmap.h12
-rw-r--r--drivers/gpu/drm/i915/selftests/igt_reset.c2
-rw-r--r--drivers/gpu/drm/i915/selftests/igt_spinner.c187
-rw-r--r--drivers/gpu/drm/i915/selftests/igt_spinner.h5
-rw-r--r--drivers/gpu/drm/i915/selftests/intel_memory_region.c521
-rw-r--r--drivers/gpu/drm/i915/selftests/intel_scheduler_helpers.c101
-rw-r--r--drivers/gpu/drm/i915/selftests/intel_scheduler_helpers.h35
-rw-r--r--drivers/gpu/drm/i915/selftests/intel_uncore.c61
-rw-r--r--drivers/gpu/drm/i915/selftests/lib_sw_fence.c14
-rw-r--r--drivers/gpu/drm/i915/selftests/librapl.c14
-rw-r--r--drivers/gpu/drm/i915/selftests/librapl.h4
-rw-r--r--drivers/gpu/drm/i915/selftests/mock_gem_device.c126
-rw-r--r--drivers/gpu/drm/i915/selftests/mock_gtt.c40
-rw-r--r--drivers/gpu/drm/i915/selftests/mock_gtt.h3
-rw-r--r--drivers/gpu/drm/i915/selftests/mock_region.c88
-rw-r--r--drivers/gpu/drm/i915/selftests/mock_region.h3
-rw-r--r--drivers/gpu/drm/i915/selftests/mock_request.c2
-rw-r--r--drivers/gpu/drm/i915/selftests/mock_uncore.c2
-rw-r--r--drivers/gpu/drm/i915/selftests/scatterlist.c6
-rw-r--r--drivers/gpu/drm/i915/soc/intel_dram.c823
-rw-r--r--drivers/gpu/drm/i915/soc/intel_dram.h45
-rw-r--r--drivers/gpu/drm/i915/soc/intel_gmch.c188
-rw-r--r--drivers/gpu/drm/i915/soc/intel_gmch.h20
-rw-r--r--drivers/gpu/drm/i915/soc/intel_rom.c161
-rw-r--r--drivers/gpu/drm/i915/soc/intel_rom.h25
-rw-r--r--drivers/gpu/drm/i915/vlv_iosf_sb.c231
-rw-r--r--drivers/gpu/drm/i915/vlv_iosf_sb.h37
-rw-r--r--drivers/gpu/drm/i915/vlv_iosf_sb_reg.h180
-rw-r--r--drivers/gpu/drm/i915/vlv_suspend.c39
-rw-r--r--drivers/gpu/drm/imagination/Kconfig20
-rw-r--r--drivers/gpu/drm/imagination/Makefile35
-rw-r--r--drivers/gpu/drm/imagination/pvr_ccb.c646
-rw-r--r--drivers/gpu/drm/imagination/pvr_ccb.h71
-rw-r--r--drivers/gpu/drm/imagination/pvr_cccb.c267
-rw-r--r--drivers/gpu/drm/imagination/pvr_cccb.h110
-rw-r--r--drivers/gpu/drm/imagination/pvr_context.c485
-rw-r--r--drivers/gpu/drm/imagination/pvr_context.h226
-rw-r--r--drivers/gpu/drm/imagination/pvr_debugfs.c52
-rw-r--r--drivers/gpu/drm/imagination/pvr_debugfs.h29
-rw-r--r--drivers/gpu/drm/imagination/pvr_device.c773
-rw-r--r--drivers/gpu/drm/imagination/pvr_device.h794
-rw-r--r--drivers/gpu/drm/imagination/pvr_device_info.c255
-rw-r--r--drivers/gpu/drm/imagination/pvr_device_info.h186
-rw-r--r--drivers/gpu/drm/imagination/pvr_drv.c1535
-rw-r--r--drivers/gpu/drm/imagination/pvr_drv.h128
-rw-r--r--drivers/gpu/drm/imagination/pvr_free_list.c624
-rw-r--r--drivers/gpu/drm/imagination/pvr_free_list.h195
-rw-r--r--drivers/gpu/drm/imagination/pvr_fw.c1515
-rw-r--r--drivers/gpu/drm/imagination/pvr_fw.h496
-rw-r--r--drivers/gpu/drm/imagination/pvr_fw_info.h135
-rw-r--r--drivers/gpu/drm/imagination/pvr_fw_meta.c561
-rw-r--r--drivers/gpu/drm/imagination/pvr_fw_meta.h14
-rw-r--r--drivers/gpu/drm/imagination/pvr_fw_mips.c199
-rw-r--r--drivers/gpu/drm/imagination/pvr_fw_mips.h49
-rw-r--r--drivers/gpu/drm/imagination/pvr_fw_riscv.c165
-rw-r--r--drivers/gpu/drm/imagination/pvr_fw_startstop.c323
-rw-r--r--drivers/gpu/drm/imagination/pvr_fw_startstop.h13
-rw-r--r--drivers/gpu/drm/imagination/pvr_fw_trace.c466
-rw-r--r--drivers/gpu/drm/imagination/pvr_fw_trace.h76
-rw-r--r--drivers/gpu/drm/imagination/pvr_fw_util.c66
-rw-r--r--drivers/gpu/drm/imagination/pvr_gem.c416
-rw-r--r--drivers/gpu/drm/imagination/pvr_gem.h172
-rw-r--r--drivers/gpu/drm/imagination/pvr_hwrt.c546
-rw-r--r--drivers/gpu/drm/imagination/pvr_hwrt.h166
-rw-r--r--drivers/gpu/drm/imagination/pvr_job.c780
-rw-r--r--drivers/gpu/drm/imagination/pvr_job.h161
-rw-r--r--drivers/gpu/drm/imagination/pvr_mmu.c2646
-rw-r--r--drivers/gpu/drm/imagination/pvr_mmu.h108
-rw-r--r--drivers/gpu/drm/imagination/pvr_params.c147
-rw-r--r--drivers/gpu/drm/imagination/pvr_params.h72
-rw-r--r--drivers/gpu/drm/imagination/pvr_power.c703
-rw-r--r--drivers/gpu/drm/imagination/pvr_power.h59
-rw-r--r--drivers/gpu/drm/imagination/pvr_queue.c1453
-rw-r--r--drivers/gpu/drm/imagination/pvr_queue.h173
-rw-r--r--drivers/gpu/drm/imagination/pvr_rogue_cr_defs.h6308
-rw-r--r--drivers/gpu/drm/imagination/pvr_rogue_cr_defs_client.h159
-rw-r--r--drivers/gpu/drm/imagination/pvr_rogue_defs.h179
-rw-r--r--drivers/gpu/drm/imagination/pvr_rogue_fwif.h2188
-rw-r--r--drivers/gpu/drm/imagination/pvr_rogue_fwif_check.h493
-rw-r--r--drivers/gpu/drm/imagination/pvr_rogue_fwif_client.h373
-rw-r--r--drivers/gpu/drm/imagination/pvr_rogue_fwif_client_check.h133
-rw-r--r--drivers/gpu/drm/imagination/pvr_rogue_fwif_common.h60
-rw-r--r--drivers/gpu/drm/imagination/pvr_rogue_fwif_dev_info.h113
-rw-r--r--drivers/gpu/drm/imagination/pvr_rogue_fwif_resetframework.h28
-rw-r--r--drivers/gpu/drm/imagination/pvr_rogue_fwif_sf.h1648
-rw-r--r--drivers/gpu/drm/imagination/pvr_rogue_fwif_shared.h258
-rw-r--r--drivers/gpu/drm/imagination/pvr_rogue_fwif_shared_check.h108
-rw-r--r--drivers/gpu/drm/imagination/pvr_rogue_fwif_stream.h78
-rw-r--r--drivers/gpu/drm/imagination/pvr_rogue_heap_config.h113
-rw-r--r--drivers/gpu/drm/imagination/pvr_rogue_meta.h356
-rw-r--r--drivers/gpu/drm/imagination/pvr_rogue_mips.h335
-rw-r--r--drivers/gpu/drm/imagination/pvr_rogue_mips_check.h58
-rw-r--r--drivers/gpu/drm/imagination/pvr_rogue_mmu_defs.h136
-rw-r--r--drivers/gpu/drm/imagination/pvr_rogue_riscv.h41
-rw-r--r--drivers/gpu/drm/imagination/pvr_stream.c281
-rw-r--r--drivers/gpu/drm/imagination/pvr_stream.h75
-rw-r--r--drivers/gpu/drm/imagination/pvr_stream_defs.c351
-rw-r--r--drivers/gpu/drm/imagination/pvr_stream_defs.h16
-rw-r--r--drivers/gpu/drm/imagination/pvr_sync.c289
-rw-r--r--drivers/gpu/drm/imagination/pvr_sync.h84
-rw-r--r--drivers/gpu/drm/imagination/pvr_vm.c1194
-rw-r--r--drivers/gpu/drm/imagination/pvr_vm.h70
-rw-r--r--drivers/gpu/drm/imagination/pvr_vm_mips.c237
-rw-r--r--drivers/gpu/drm/imagination/pvr_vm_mips.h22
-rw-r--r--drivers/gpu/drm/imx/Kconfig44
-rw-r--r--drivers/gpu/drm/imx/Makefile12
-rw-r--r--drivers/gpu/drm/imx/dc/Kconfig13
-rw-r--r--drivers/gpu/drm/imx/dc/Makefile7
-rw-r--r--drivers/gpu/drm/imx/dc/dc-cf.c172
-rw-r--r--drivers/gpu/drm/imx/dc/dc-crtc.c555
-rw-r--r--drivers/gpu/drm/imx/dc/dc-de.c177
-rw-r--r--drivers/gpu/drm/imx/dc/dc-de.h59
-rw-r--r--drivers/gpu/drm/imx/dc/dc-drv.c293
-rw-r--r--drivers/gpu/drm/imx/dc/dc-drv.h102
-rw-r--r--drivers/gpu/drm/imx/dc/dc-ed.c288
-rw-r--r--drivers/gpu/drm/imx/dc/dc-fg.c376
-rw-r--r--drivers/gpu/drm/imx/dc/dc-fl.c185
-rw-r--r--drivers/gpu/drm/imx/dc/dc-fu.c258
-rw-r--r--drivers/gpu/drm/imx/dc/dc-fu.h129
-rw-r--r--drivers/gpu/drm/imx/dc/dc-fw.c222
-rw-r--r--drivers/gpu/drm/imx/dc/dc-ic.c282
-rw-r--r--drivers/gpu/drm/imx/dc/dc-kms.c143
-rw-r--r--drivers/gpu/drm/imx/dc/dc-kms.h131
-rw-r--r--drivers/gpu/drm/imx/dc/dc-lb.c325
-rw-r--r--drivers/gpu/drm/imx/dc/dc-pe.c158
-rw-r--r--drivers/gpu/drm/imx/dc/dc-pe.h101
-rw-r--r--drivers/gpu/drm/imx/dc/dc-plane.c224
-rw-r--r--drivers/gpu/drm/imx/dc/dc-tc.c141
-rw-r--r--drivers/gpu/drm/imx/dcss/Kconfig8
-rw-r--r--drivers/gpu/drm/imx/dcss/dcss-blkctl.c13
-rw-r--r--drivers/gpu/drm/imx/dcss/dcss-crtc.c6
-rw-r--r--drivers/gpu/drm/imx/dcss/dcss-ctxld.c14
-rw-r--r--drivers/gpu/drm/imx/dcss/dcss-dev.c48
-rw-r--r--drivers/gpu/drm/imx/dcss/dcss-dev.h8
-rw-r--r--drivers/gpu/drm/imx/dcss/dcss-dpr.c21
-rw-r--r--drivers/gpu/drm/imx/dcss/dcss-drv.c42
-rw-r--r--drivers/gpu/drm/imx/dcss/dcss-dtg.c30
-rw-r--r--drivers/gpu/drm/imx/dcss/dcss-kms.c35
-rw-r--r--drivers/gpu/drm/imx/dcss/dcss-kms.h1
-rw-r--r--drivers/gpu/drm/imx/dcss/dcss-plane.c84
-rw-r--r--drivers/gpu/drm/imx/dcss/dcss-scaler.c25
-rw-r--r--drivers/gpu/drm/imx/dcss/dcss-ss.c12
-rw-r--r--drivers/gpu/drm/imx/imx-drm.h45
-rw-r--r--drivers/gpu/drm/imx/ipuv3/Kconfig49
-rw-r--r--drivers/gpu/drm/imx/ipuv3/Makefile11
-rw-r--r--drivers/gpu/drm/imx/ipuv3/dw_hdmi-imx.c (renamed from drivers/gpu/drm/imx/dw_hdmi-imx.c)13
-rw-r--r--drivers/gpu/drm/imx/ipuv3/imx-drm-core.c (renamed from drivers/gpu/drm/imx/imx-drm-core.c)86
-rw-r--r--drivers/gpu/drm/imx/ipuv3/imx-drm.h31
-rw-r--r--drivers/gpu/drm/imx/ipuv3/imx-ldb.c (renamed from drivers/gpu/drm/imx/imx-ldb.c)236
-rw-r--r--drivers/gpu/drm/imx/ipuv3/imx-tve.c (renamed from drivers/gpu/drm/imx/imx-tve.c)54
-rw-r--r--drivers/gpu/drm/imx/ipuv3/ipuv3-crtc.c (renamed from drivers/gpu/drm/imx/ipuv3-crtc.c)25
-rw-r--r--drivers/gpu/drm/imx/ipuv3/ipuv3-plane.c (renamed from drivers/gpu/drm/imx/ipuv3-plane.c)262
-rw-r--r--drivers/gpu/drm/imx/ipuv3/ipuv3-plane.h (renamed from drivers/gpu/drm/imx/ipuv3-plane.h)0
-rw-r--r--drivers/gpu/drm/imx/ipuv3/parallel-display.c288
-rw-r--r--drivers/gpu/drm/imx/lcdc/Kconfig10
-rw-r--r--drivers/gpu/drm/imx/lcdc/Makefile1
-rw-r--r--drivers/gpu/drm/imx/lcdc/imx-lcdc.c537
-rw-r--r--drivers/gpu/drm/imx/parallel-display.c390
-rw-r--r--drivers/gpu/drm/ingenic/Kconfig18
-rw-r--r--drivers/gpu/drm/ingenic/Makefile1
-rw-r--r--drivers/gpu/drm/ingenic/ingenic-drm-drv.c731
-rw-r--r--drivers/gpu/drm/ingenic/ingenic-drm.h42
-rw-r--r--drivers/gpu/drm/ingenic/ingenic-dw-hdmi.c103
-rw-r--r--drivers/gpu/drm/ingenic/ingenic-ipu.c235
-rw-r--r--drivers/gpu/drm/kmb/Kconfig6
-rw-r--r--drivers/gpu/drm/kmb/kmb_crtc.c42
-rw-r--r--drivers/gpu/drm/kmb/kmb_drv.c77
-rw-r--r--drivers/gpu/drm/kmb/kmb_drv.h17
-rw-r--r--drivers/gpu/drm/kmb/kmb_dsi.c41
-rw-r--r--drivers/gpu/drm/kmb/kmb_dsi.h2
-rw-r--r--drivers/gpu/drm/kmb/kmb_plane.c198
-rw-r--r--drivers/gpu/drm/kmb/kmb_plane.h11
-rw-r--r--drivers/gpu/drm/kmb/kmb_regs.h3
-rw-r--r--drivers/gpu/drm/lib/drm_random.c4
-rw-r--r--drivers/gpu/drm/lib/drm_random.h4
-rw-r--r--drivers/gpu/drm/lima/lima_bcast.c12
-rw-r--r--drivers/gpu/drm/lima/lima_bcast.h3
-rw-r--r--drivers/gpu/drm/lima/lima_ctx.c3
-rw-r--r--drivers/gpu/drm/lima/lima_ctx.h1
-rw-r--r--drivers/gpu/drm/lima/lima_devfreq.c63
-rw-r--r--drivers/gpu/drm/lima/lima_devfreq.h5
-rw-r--r--drivers/gpu/drm/lima/lima_device.c4
-rw-r--r--drivers/gpu/drm/lima/lima_drv.c42
-rw-r--r--drivers/gpu/drm/lima/lima_drv.h5
-rw-r--r--drivers/gpu/drm/lima/lima_gem.c95
-rw-r--r--drivers/gpu/drm/lima/lima_gp.c49
-rw-r--r--drivers/gpu/drm/lima/lima_l2_cache.c6
-rw-r--r--drivers/gpu/drm/lima/lima_mmu.c23
-rw-r--r--drivers/gpu/drm/lima/lima_pmu.c3
-rw-r--r--drivers/gpu/drm/lima/lima_pp.c59
-rw-r--r--drivers/gpu/drm/lima/lima_sched.c115
-rw-r--r--drivers/gpu/drm/lima/lima_sched.h13
-rw-r--r--drivers/gpu/drm/lima/lima_trace.h8
-rw-r--r--drivers/gpu/drm/logicvc/Kconfig12
-rw-r--r--drivers/gpu/drm/logicvc/Makefile9
-rw-r--r--drivers/gpu/drm/logicvc/logicvc_crtc.c280
-rw-r--r--drivers/gpu/drm/logicvc/logicvc_crtc.h21
-rw-r--r--drivers/gpu/drm/logicvc/logicvc_drm.c505
-rw-r--r--drivers/gpu/drm/logicvc/logicvc_drm.h67
-rw-r--r--drivers/gpu/drm/logicvc/logicvc_interface.c213
-rw-r--r--drivers/gpu/drm/logicvc/logicvc_interface.h28
-rw-r--r--drivers/gpu/drm/logicvc/logicvc_layer.c630
-rw-r--r--drivers/gpu/drm/logicvc/logicvc_layer.h64
-rw-r--r--drivers/gpu/drm/logicvc/logicvc_mode.c76
-rw-r--r--drivers/gpu/drm/logicvc/logicvc_mode.h15
-rw-r--r--drivers/gpu/drm/logicvc/logicvc_of.c185
-rw-r--r--drivers/gpu/drm/logicvc/logicvc_of.h46
-rw-r--r--drivers/gpu/drm/logicvc/logicvc_regs.h80
-rw-r--r--drivers/gpu/drm/loongson/Kconfig20
-rw-r--r--drivers/gpu/drm/loongson/Makefile22
-rw-r--r--drivers/gpu/drm/loongson/loongson_device.c102
-rw-r--r--drivers/gpu/drm/loongson/loongson_module.c33
-rw-r--r--drivers/gpu/drm/loongson/loongson_module.h12
-rw-r--r--drivers/gpu/drm/loongson/lsdc_benchmark.c134
-rw-r--r--drivers/gpu/drm/loongson/lsdc_benchmark.h13
-rw-r--r--drivers/gpu/drm/loongson/lsdc_crtc.c1026
-rw-r--r--drivers/gpu/drm/loongson/lsdc_debugfs.c111
-rw-r--r--drivers/gpu/drm/loongson/lsdc_drv.c463
-rw-r--r--drivers/gpu/drm/loongson/lsdc_drv.h388
-rw-r--r--drivers/gpu/drm/loongson/lsdc_gem.c290
-rw-r--r--drivers/gpu/drm/loongson/lsdc_gem.h37
-rw-r--r--drivers/gpu/drm/loongson/lsdc_gfxpll.c199
-rw-r--r--drivers/gpu/drm/loongson/lsdc_gfxpll.h52
-rw-r--r--drivers/gpu/drm/loongson/lsdc_i2c.c179
-rw-r--r--drivers/gpu/drm/loongson/lsdc_i2c.h29
-rw-r--r--drivers/gpu/drm/loongson/lsdc_irq.c75
-rw-r--r--drivers/gpu/drm/loongson/lsdc_irq.h16
-rw-r--r--drivers/gpu/drm/loongson/lsdc_output.h21
-rw-r--r--drivers/gpu/drm/loongson/lsdc_output_7a1000.c178
-rw-r--r--drivers/gpu/drm/loongson/lsdc_output_7a2000.c552
-rw-r--r--drivers/gpu/drm/loongson/lsdc_pixpll.c484
-rw-r--r--drivers/gpu/drm/loongson/lsdc_pixpll.h86
-rw-r--r--drivers/gpu/drm/loongson/lsdc_plane.c794
-rw-r--r--drivers/gpu/drm/loongson/lsdc_probe.c56
-rw-r--r--drivers/gpu/drm/loongson/lsdc_probe.h12
-rw-r--r--drivers/gpu/drm/loongson/lsdc_regs.h406
-rw-r--r--drivers/gpu/drm/loongson/lsdc_ttm.c589
-rw-r--r--drivers/gpu/drm/loongson/lsdc_ttm.h99
-rw-r--r--drivers/gpu/drm/mcde/Kconfig5
-rw-r--r--drivers/gpu/drm/mcde/mcde_clk_div.c14
-rw-r--r--drivers/gpu/drm/mcde/mcde_display.c19
-rw-r--r--drivers/gpu/drm/mcde/mcde_drv.c48
-rw-r--r--drivers/gpu/drm/mcde/mcde_dsi.c36
-rw-r--r--drivers/gpu/drm/mediatek/Kconfig45
-rw-r--r--drivers/gpu/drm/mediatek/Makefile29
-rw-r--r--drivers/gpu/drm/mediatek/mtk_cec.c50
-rw-r--r--drivers/gpu/drm/mediatek/mtk_crtc.c1186
-rw-r--r--drivers/gpu/drm/mediatek/mtk_crtc.h29
-rw-r--r--drivers/gpu/drm/mediatek/mtk_ddp_comp.c710
-rw-r--r--drivers/gpu/drm/mediatek/mtk_ddp_comp.h365
-rw-r--r--drivers/gpu/drm/mediatek/mtk_disp_aal.c226
-rw-r--r--drivers/gpu/drm/mediatek/mtk_disp_ccorr.c71
-rw-r--r--drivers/gpu/drm/mediatek/mtk_disp_color.c40
-rw-r--r--drivers/gpu/drm/mediatek/mtk_disp_drv.h103
-rw-r--r--drivers/gpu/drm/mediatek/mtk_disp_gamma.c234
-rw-r--r--drivers/gpu/drm/mediatek/mtk_disp_merge.c376
-rw-r--r--drivers/gpu/drm/mediatek/mtk_disp_ovl.c421
-rw-r--r--drivers/gpu/drm/mediatek/mtk_disp_ovl_adaptor.c679
-rw-r--r--drivers/gpu/drm/mediatek/mtk_disp_rdma.c140
-rw-r--r--drivers/gpu/drm/mediatek/mtk_dp.c2951
-rw-r--r--drivers/gpu/drm/mediatek/mtk_dp_reg.h362
-rw-r--r--drivers/gpu/drm/mediatek/mtk_dpi.c937
-rw-r--r--drivers/gpu/drm/mediatek/mtk_dpi_regs.h27
-rw-r--r--drivers/gpu/drm/mediatek/mtk_drm_crtc.c853
-rw-r--r--drivers/gpu/drm/mediatek/mtk_drm_crtc.h26
-rw-r--r--drivers/gpu/drm/mediatek/mtk_drm_ddp_comp.c538
-rw-r--r--drivers/gpu/drm/mediatek/mtk_drm_ddp_comp.h204
-rw-r--r--drivers/gpu/drm/mediatek/mtk_drm_drv.c1032
-rw-r--r--drivers/gpu/drm/mediatek/mtk_drm_drv.h52
-rw-r--r--drivers/gpu/drm/mediatek/mtk_drm_gem.c286
-rw-r--r--drivers/gpu/drm/mediatek/mtk_drm_gem.h51
-rw-r--r--drivers/gpu/drm/mediatek/mtk_drm_plane.c252
-rw-r--r--drivers/gpu/drm/mediatek/mtk_drm_plane.h44
-rw-r--r--drivers/gpu/drm/mediatek/mtk_dsi.c791
-rw-r--r--drivers/gpu/drm/mediatek/mtk_ethdr.c396
-rw-r--r--drivers/gpu/drm/mediatek/mtk_ethdr.h26
-rw-r--r--drivers/gpu/drm/mediatek/mtk_gem.c289
-rw-r--r--drivers/gpu/drm/mediatek/mtk_gem.h48
-rw-r--r--drivers/gpu/drm/mediatek/mtk_hdmi.c1014
-rw-r--r--drivers/gpu/drm/mediatek/mtk_hdmi.h14
-rw-r--r--drivers/gpu/drm/mediatek/mtk_hdmi_common.c456
-rw-r--r--drivers/gpu/drm/mediatek/mtk_hdmi_common.h198
-rw-r--r--drivers/gpu/drm/mediatek/mtk_hdmi_ddc.c35
-rw-r--r--drivers/gpu/drm/mediatek/mtk_hdmi_ddc_v2.c396
-rw-r--r--drivers/gpu/drm/mediatek/mtk_hdmi_regs_v2.h263
-rw-r--r--drivers/gpu/drm/mediatek/mtk_hdmi_v2.c1521
-rw-r--r--drivers/gpu/drm/mediatek/mtk_mdp_rdma.c347
-rw-r--r--drivers/gpu/drm/mediatek/mtk_mdp_rdma.h20
-rw-r--r--drivers/gpu/drm/mediatek/mtk_padding.c154
-rw-r--r--drivers/gpu/drm/mediatek/mtk_plane.c367
-rw-r--r--drivers/gpu/drm/mediatek/mtk_plane.h54
-rw-r--r--drivers/gpu/drm/meson/Kconfig17
-rw-r--r--drivers/gpu/drm/meson/Makefile4
-rw-r--r--drivers/gpu/drm/meson/meson_drv.c237
-rw-r--r--drivers/gpu/drm/meson/meson_drv.h11
-rw-r--r--drivers/gpu/drm/meson/meson_dw_hdmi.c459
-rw-r--r--drivers/gpu/drm/meson/meson_dw_mipi_dsi.c358
-rw-r--r--drivers/gpu/drm/meson/meson_dw_mipi_dsi.h160
-rw-r--r--drivers/gpu/drm/meson/meson_encoder_cvbs.c298
-rw-r--r--drivers/gpu/drm/meson/meson_encoder_cvbs.h30
-rw-r--r--drivers/gpu/drm/meson/meson_encoder_dsi.c171
-rw-r--r--drivers/gpu/drm/meson/meson_encoder_dsi.h13
-rw-r--r--drivers/gpu/drm/meson/meson_encoder_hdmi.c496
-rw-r--r--drivers/gpu/drm/meson/meson_encoder_hdmi.h13
-rw-r--r--drivers/gpu/drm/meson/meson_osd_afbcd.c41
-rw-r--r--drivers/gpu/drm/meson/meson_osd_afbcd.h1
-rw-r--r--drivers/gpu/drm/meson/meson_overlay.c48
-rw-r--r--drivers/gpu/drm/meson/meson_plane.c85
-rw-r--r--drivers/gpu/drm/meson/meson_registers.h30
-rw-r--r--drivers/gpu/drm/meson/meson_vclk.c226
-rw-r--r--drivers/gpu/drm/meson/meson_vclk.h13
-rw-r--r--drivers/gpu/drm/meson/meson_venc.c250
-rw-r--r--drivers/gpu/drm/meson/meson_venc.h6
-rw-r--r--drivers/gpu/drm/meson/meson_venc_cvbs.c293
-rw-r--r--drivers/gpu/drm/meson/meson_venc_cvbs.h29
-rw-r--r--drivers/gpu/drm/meson/meson_viu.c36
-rw-r--r--drivers/gpu/drm/meson/meson_vpp.c2
-rw-r--r--drivers/gpu/drm/meson/meson_vpp.h2
-rw-r--r--drivers/gpu/drm/mga/Makefile11
-rw-r--r--drivers/gpu/drm/mga/mga_dma.c1167
-rw-r--r--drivers/gpu/drm/mga/mga_drv.c104
-rw-r--r--drivers/gpu/drm/mga/mga_drv.h687
-rw-r--r--drivers/gpu/drm/mga/mga_ioc32.c196
-rw-r--r--drivers/gpu/drm/mga/mga_irq.c169
-rw-r--r--drivers/gpu/drm/mga/mga_state.c1098
-rw-r--r--drivers/gpu/drm/mga/mga_warp.c167
-rw-r--r--drivers/gpu/drm/mgag200/Kconfig15
-rw-r--r--drivers/gpu/drm/mgag200/Makefile17
-rw-r--r--drivers/gpu/drm/mgag200/mgag200_bmc.c99
-rw-r--r--drivers/gpu/drm/mgag200/mgag200_ddc.c178
-rw-r--r--drivers/gpu/drm/mgag200/mgag200_ddc.h11
-rw-r--r--drivers/gpu/drm/mgag200/mgag200_drv.c435
-rw-r--r--drivers/gpu/drm/mgag200/mgag200_drv.h348
-rw-r--r--drivers/gpu/drm/mgag200/mgag200_g200.c408
-rw-r--r--drivers/gpu/drm/mgag200/mgag200_g200eh.c280
-rw-r--r--drivers/gpu/drm/mgag200/mgag200_g200eh3.c185
-rw-r--r--drivers/gpu/drm/mgag200/mgag200_g200eh5.c205
-rw-r--r--drivers/gpu/drm/mgag200/mgag200_g200er.c313
-rw-r--r--drivers/gpu/drm/mgag200/mgag200_g200ev.c318
-rw-r--r--drivers/gpu/drm/mgag200/mgag200_g200ew3.c203
-rw-r--r--drivers/gpu/drm/mgag200/mgag200_g200se.c518
-rw-r--r--drivers/gpu/drm/mgag200/mgag200_g200wb.c327
-rw-r--r--drivers/gpu/drm/mgag200/mgag200_i2c.c159
-rw-r--r--drivers/gpu/drm/mgag200/mgag200_mm.c137
-rw-r--r--drivers/gpu/drm/mgag200/mgag200_mode.c1780
-rw-r--r--drivers/gpu/drm/mgag200/mgag200_reg.h22
-rw-r--r--drivers/gpu/drm/mgag200/mgag200_vga.c73
-rw-r--r--drivers/gpu/drm/mgag200/mgag200_vga_bmc.c157
-rw-r--r--drivers/gpu/drm/msm/.gitignore1
-rw-r--r--drivers/gpu/drm/msm/Kconfig134
-rw-r--r--drivers/gpu/drm/msm/Makefile198
-rw-r--r--drivers/gpu/drm/msm/adreno/a2xx.xml.h3188
-rw-r--r--drivers/gpu/drm/msm/adreno/a2xx_catalog.c48
-rw-r--r--drivers/gpu/drm/msm/adreno/a2xx_gpu.c112
-rw-r--r--drivers/gpu/drm/msm/adreno/a2xx_gpu.h7
-rw-r--r--drivers/gpu/drm/msm/adreno/a2xx_gpummu.c122
-rw-r--r--drivers/gpu/drm/msm/adreno/a3xx.xml.h3247
-rw-r--r--drivers/gpu/drm/msm/adreno/a3xx_catalog.c88
-rw-r--r--drivers/gpu/drm/msm/adreno/a3xx_gpu.c114
-rw-r--r--drivers/gpu/drm/msm/adreno/a3xx_gpu.h2
-rw-r--r--drivers/gpu/drm/msm/adreno/a4xx.xml.h4275
-rw-r--r--drivers/gpu/drm/msm/adreno/a4xx_catalog.c48
-rw-r--r--drivers/gpu/drm/msm/adreno/a4xx_gpu.c100
-rw-r--r--drivers/gpu/drm/msm/adreno/a4xx_gpu.h2
-rw-r--r--drivers/gpu/drm/msm/adreno/a5xx.xml.h5438
-rw-r--r--drivers/gpu/drm/msm/adreno/a5xx_catalog.c153
-rw-r--r--drivers/gpu/drm/msm/adreno/a5xx_debugfs.c20
-rw-r--r--drivers/gpu/drm/msm/adreno/a5xx_gpu.c368
-rw-r--r--drivers/gpu/drm/msm/adreno/a5xx_gpu.h3
-rw-r--r--drivers/gpu/drm/msm/adreno/a5xx_power.c10
-rw-r--r--drivers/gpu/drm/msm/adreno/a5xx_preempt.c54
-rw-r--r--drivers/gpu/drm/msm/adreno/a6xx.xml.h7689
-rw-r--r--drivers/gpu/drm/msm/adreno/a6xx_catalog.c1961
-rw-r--r--drivers/gpu/drm/msm/adreno/a6xx_gmu.c1306
-rw-r--r--drivers/gpu/drm/msm/adreno/a6xx_gmu.h85
-rw-r--r--drivers/gpu/drm/msm/adreno/a6xx_gmu.xml.h479
-rw-r--r--drivers/gpu/drm/msm/adreno/a6xx_gpu.c2659
-rw-r--r--drivers/gpu/drm/msm/adreno/a6xx_gpu.h251
-rw-r--r--drivers/gpu/drm/msm/adreno/a6xx_gpu_state.c1049
-rw-r--r--drivers/gpu/drm/msm/adreno/a6xx_gpu_state.h473
-rw-r--r--drivers/gpu/drm/msm/adreno/a6xx_hfi.c541
-rw-r--r--drivers/gpu/drm/msm/adreno/a6xx_hfi.h54
-rw-r--r--drivers/gpu/drm/msm/adreno/a6xx_preempt.c477
-rw-r--r--drivers/gpu/drm/msm/adreno/a8xx_gpu.c1201
-rw-r--r--drivers/gpu/drm/msm/adreno/adreno_common.xml.h680
-rw-r--r--drivers/gpu/drm/msm/adreno/adreno_device.c623
-rw-r--r--drivers/gpu/drm/msm/adreno/adreno_gen7_0_0_snapshot.h935
-rw-r--r--drivers/gpu/drm/msm/adreno/adreno_gen7_2_0_snapshot.h755
-rw-r--r--drivers/gpu/drm/msm/adreno/adreno_gen7_9_0_snapshot.h1446
-rw-r--r--drivers/gpu/drm/msm/adreno/adreno_gpu.c610
-rw-r--r--drivers/gpu/drm/msm/adreno/adreno_gpu.h517
-rw-r--r--drivers/gpu/drm/msm/adreno/adreno_pm4.xml.h2316
-rw-r--r--drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_10_0_sm8650.h453
-rw-r--r--drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_12_0_sm8750.h494
-rw-r--r--drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_12_2_glymur.h541
-rw-r--r--drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_1_14_msm8937.h205
-rw-r--r--drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_1_15_msm8917.h184
-rw-r--r--drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_1_16_msm8953.h212
-rw-r--r--drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_1_7_msm8996.h328
-rw-r--r--drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_3_0_msm8998.h318
-rw-r--r--drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_3_2_sdm660.h282
-rw-r--r--drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_3_3_sdm630.h220
-rw-r--r--drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_4_0_sdm845.h333
-rw-r--r--drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_4_1_sdm670.h152
-rw-r--r--drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_5_0_sm8150.h389
-rw-r--r--drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_5_1_sc8180x.h413
-rw-r--r--drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_5_2_sm7150.h317
-rw-r--r--drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_5_3_sm6150.h254
-rw-r--r--drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_5_4_sm6125.h225
-rw-r--r--drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_6_0_sm8250.h388
-rw-r--r--drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_6_2_sc7180.h219
-rw-r--r--drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_6_3_sm6115.h152
-rw-r--r--drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_6_4_sm6350.h237
-rw-r--r--drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_6_5_qcm2290.h145
-rw-r--r--drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_6_9_sm6375.h163
-rw-r--r--drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_7_0_sm8350.h400
-rw-r--r--drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_7_2_sc7280.h261
-rw-r--r--drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_8_0_sc8280xp.h429
-rw-r--r--drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_8_1_sm8450.h413
-rw-r--r--drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_8_4_sa8775p.h453
-rw-r--r--drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_9_0_sm8550.h408
-rw-r--r--drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_9_1_sar2130p.h408
-rw-r--r--drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_9_2_x1e80100.h449
-rw-r--r--drivers/gpu/drm/msm/disp/dpu1/dpu_core_irq.c402
-rw-r--r--drivers/gpu/drm/msm/disp/dpu1/dpu_core_irq.h113
-rw-r--r--drivers/gpu/drm/msm/disp/dpu1/dpu_core_perf.c359
-rw-r--r--drivers/gpu/drm/msm/disp/dpu1/dpu_core_perf.h84
-rw-r--r--drivers/gpu/drm/msm/disp/dpu1/dpu_crtc.c1303
-rw-r--r--drivers/gpu/drm/msm/disp/dpu1/dpu_crtc.h84
-rw-r--r--drivers/gpu/drm/msm/disp/dpu1/dpu_encoder.c2135
-rw-r--r--drivers/gpu/drm/msm/disp/dpu1/dpu_encoder.h165
-rw-r--r--drivers/gpu/drm/msm/disp/dpu1/dpu_encoder_phys.h236
-rw-r--r--drivers/gpu/drm/msm/disp/dpu1/dpu_encoder_phys_cmd.c559
-rw-r--r--drivers/gpu/drm/msm/disp/dpu1/dpu_encoder_phys_vid.c413
-rw-r--r--drivers/gpu/drm/msm/disp/dpu1/dpu_encoder_phys_wb.c697
-rw-r--r--drivers/gpu/drm/msm/disp/dpu1/dpu_formats.c866
-rw-r--r--drivers/gpu/drm/msm/disp/dpu1/dpu_formats.h67
-rw-r--r--drivers/gpu/drm/msm/disp/dpu1/dpu_hw_blk.c139
-rw-r--r--drivers/gpu/drm/msm/disp/dpu1/dpu_hw_blk.h45
-rw-r--r--drivers/gpu/drm/msm/disp/dpu1/dpu_hw_catalog.c1233
-rw-r--r--drivers/gpu/drm/msm/disp/dpu1/dpu_hw_catalog.h502
-rw-r--r--drivers/gpu/drm/msm/disp/dpu1/dpu_hw_cdm.c257
-rw-r--r--drivers/gpu/drm/msm/disp/dpu1/dpu_hw_cdm.h134
-rw-r--r--drivers/gpu/drm/msm/disp/dpu1/dpu_hw_ctl.c699
-rw-r--r--drivers/gpu/drm/msm/disp/dpu1/dpu_hw_ctl.h165
-rw-r--r--drivers/gpu/drm/msm/disp/dpu1/dpu_hw_cwb.c75
-rw-r--r--drivers/gpu/drm/msm/disp/dpu1/dpu_hw_cwb.h70
-rw-r--r--drivers/gpu/drm/msm/disp/dpu1/dpu_hw_dsc.c216
-rw-r--r--drivers/gpu/drm/msm/disp/dpu1/dpu_hw_dsc.h79
-rw-r--r--drivers/gpu/drm/msm/disp/dpu1/dpu_hw_dsc_1_2.c396
-rw-r--r--drivers/gpu/drm/msm/disp/dpu1/dpu_hw_dspp.c91
-rw-r--r--drivers/gpu/drm/msm/disp/dpu1/dpu_hw_dspp.h20
-rw-r--r--drivers/gpu/drm/msm/disp/dpu1/dpu_hw_interrupts.c1460
-rw-r--r--drivers/gpu/drm/msm/disp/dpu1/dpu_hw_interrupts.h205
-rw-r--r--drivers/gpu/drm/msm/disp/dpu1/dpu_hw_intf.c518
-rw-r--r--drivers/gpu/drm/msm/disp/dpu1/dpu_hw_intf.h96
-rw-r--r--drivers/gpu/drm/msm/disp/dpu1/dpu_hw_lm.c299
-rw-r--r--drivers/gpu/drm/msm/disp/dpu1/dpu_hw_lm.h53
-rw-r--r--drivers/gpu/drm/msm/disp/dpu1/dpu_hw_mdss.h272
-rw-r--r--drivers/gpu/drm/msm/disp/dpu1/dpu_hw_merge3d.c65
-rw-r--r--drivers/gpu/drm/msm/disp/dpu1/dpu_hw_merge3d.h22
-rw-r--r--drivers/gpu/drm/msm/disp/dpu1/dpu_hw_pingpong.c187
-rw-r--r--drivers/gpu/drm/msm/disp/dpu1/dpu_hw_pingpong.h95
-rw-r--r--drivers/gpu/drm/msm/disp/dpu1/dpu_hw_sspp.c657
-rw-r--r--drivers/gpu/drm/msm/disp/dpu1/dpu_hw_sspp.h225
-rw-r--r--drivers/gpu/drm/msm/disp/dpu1/dpu_hw_top.c205
-rw-r--r--drivers/gpu/drm/msm/disp/dpu1/dpu_hw_top.h37
-rw-r--r--drivers/gpu/drm/msm/disp/dpu1/dpu_hw_util.c207
-rw-r--r--drivers/gpu/drm/msm/disp/dpu1/dpu_hw_util.h81
-rw-r--r--drivers/gpu/drm/msm/disp/dpu1/dpu_hw_vbif.c57
-rw-r--r--drivers/gpu/drm/msm/disp/dpu1/dpu_hw_vbif.h14
-rw-r--r--drivers/gpu/drm/msm/disp/dpu1/dpu_hw_wb.c249
-rw-r--r--drivers/gpu/drm/msm/disp/dpu1/dpu_hw_wb.h83
-rw-r--r--drivers/gpu/drm/msm/disp/dpu1/dpu_hwio.h31
-rw-r--r--drivers/gpu/drm/msm/disp/dpu1/dpu_io_util.c187
-rw-r--r--drivers/gpu/drm/msm/disp/dpu1/dpu_io_util.h40
-rw-r--r--drivers/gpu/drm/msm/disp/dpu1/dpu_kms.c1258
-rw-r--r--drivers/gpu/drm/msm/disp/dpu1/dpu_kms.h150
-rw-r--r--drivers/gpu/drm/msm/disp/dpu1/dpu_mdss.c324
-rw-r--r--drivers/gpu/drm/msm/disp/dpu1/dpu_plane.c2280
-rw-r--r--drivers/gpu/drm/msm/disp/dpu1/dpu_plane.h115
-rw-r--r--drivers/gpu/drm/msm/disp/dpu1/dpu_rm.c931
-rw-r--r--drivers/gpu/drm/msm/disp/dpu1/dpu_rm.h123
-rw-r--r--drivers/gpu/drm/msm/disp/dpu1/dpu_trace.h298
-rw-r--r--drivers/gpu/drm/msm/disp/dpu1/dpu_vbif.c112
-rw-r--r--drivers/gpu/drm/msm/disp/dpu1/dpu_vbif.h22
-rw-r--r--drivers/gpu/drm/msm/disp/dpu1/dpu_writeback.c144
-rw-r--r--drivers/gpu/drm/msm/disp/dpu1/dpu_writeback.h32
-rw-r--r--drivers/gpu/drm/msm/disp/mdp4/mdp4.xml.h1174
-rw-r--r--drivers/gpu/drm/msm/disp/mdp4/mdp4_crtc.c64
-rw-r--r--drivers/gpu/drm/msm/disp/mdp4/mdp4_dsi_encoder.c35
-rw-r--r--drivers/gpu/drm/msm/disp/mdp4/mdp4_dtv_encoder.c37
-rw-r--r--drivers/gpu/drm/msm/disp/mdp4/mdp4_irq.c9
-rw-r--r--drivers/gpu/drm/msm/disp/mdp4/mdp4_kms.c344
-rw-r--r--drivers/gpu/drm/msm/disp/mdp4/mdp4_kms.h28
-rw-r--r--drivers/gpu/drm/msm/disp/mdp4/mdp4_lcdc_encoder.c137
-rw-r--r--drivers/gpu/drm/msm/disp/mdp4/mdp4_lvds_connector.c120
-rw-r--r--drivers/gpu/drm/msm/disp/mdp4/mdp4_lvds_pll.c88
-rw-r--r--drivers/gpu/drm/msm/disp/mdp4/mdp4_plane.c181
-rw-r--r--drivers/gpu/drm/msm/disp/mdp5/mdp5.xml.h1968
-rw-r--r--drivers/gpu/drm/msm/disp/mdp5/mdp5_cfg.c340
-rw-r--r--drivers/gpu/drm/msm/disp/mdp5/mdp5_cfg.h25
-rw-r--r--drivers/gpu/drm/msm/disp/mdp5/mdp5_cmd_encoder.c64
-rw-r--r--drivers/gpu/drm/msm/disp/mdp5/mdp5_crtc.c96
-rw-r--r--drivers/gpu/drm/msm/disp/mdp5/mdp5_ctl.c26
-rw-r--r--drivers/gpu/drm/msm/disp/mdp5/mdp5_ctl.h1
-rw-r--r--drivers/gpu/drm/msm/disp/mdp5/mdp5_encoder.c71
-rw-r--r--drivers/gpu/drm/msm/disp/mdp5/mdp5_irq.c11
-rw-r--r--drivers/gpu/drm/msm/disp/mdp5/mdp5_kms.c389
-rw-r--r--drivers/gpu/drm/msm/disp/mdp5/mdp5_kms.h24
-rw-r--r--drivers/gpu/drm/msm/disp/mdp5/mdp5_mdss.c279
-rw-r--r--drivers/gpu/drm/msm/disp/mdp5/mdp5_mixer.c23
-rw-r--r--drivers/gpu/drm/msm/disp/mdp5/mdp5_mixer.h8
-rw-r--r--drivers/gpu/drm/msm/disp/mdp5/mdp5_pipe.c26
-rw-r--r--drivers/gpu/drm/msm/disp/mdp5/mdp5_pipe.h6
-rw-r--r--drivers/gpu/drm/msm/disp/mdp5/mdp5_plane.c379
-rw-r--r--drivers/gpu/drm/msm/disp/mdp5/mdp5_smp.c41
-rw-r--r--drivers/gpu/drm/msm/disp/mdp5/mdp5_smp.h7
-rw-r--r--drivers/gpu/drm/msm/disp/mdp_common.xml.h104
-rw-r--r--drivers/gpu/drm/msm/disp/mdp_format.c632
-rw-r--r--drivers/gpu/drm/msm/disp/mdp_format.h77
-rw-r--r--drivers/gpu/drm/msm/disp/mdp_kms.h18
-rw-r--r--drivers/gpu/drm/msm/disp/msm_disp_snapshot.c138
-rw-r--r--drivers/gpu/drm/msm/disp/msm_disp_snapshot.h145
-rw-r--r--drivers/gpu/drm/msm/disp/msm_disp_snapshot_util.c193
-rw-r--r--drivers/gpu/drm/msm/dp/dp_audio.c655
-rw-r--r--drivers/gpu/drm/msm/dp/dp_audio.h55
-rw-r--r--drivers/gpu/drm/msm/dp/dp_aux.c615
-rw-r--r--drivers/gpu/drm/msm/dp/dp_aux.h37
-rw-r--r--drivers/gpu/drm/msm/dp/dp_catalog.c1055
-rw-r--r--drivers/gpu/drm/msm/dp/dp_catalog.h132
-rw-r--r--drivers/gpu/drm/msm/dp/dp_ctrl.c1736
-rw-r--r--drivers/gpu/drm/msm/dp/dp_ctrl.h55
-rw-r--r--drivers/gpu/drm/msm/dp/dp_debug.c465
-rw-r--r--drivers/gpu/drm/msm/dp/dp_debug.h54
-rw-r--r--drivers/gpu/drm/msm/dp/dp_display.c1798
-rw-r--r--drivers/gpu/drm/msm/dp/dp_display.h37
-rw-r--r--drivers/gpu/drm/msm/dp/dp_drm.c407
-rw-r--r--drivers/gpu/drm/msm/dp/dp_drm.h33
-rw-r--r--drivers/gpu/drm/msm/dp/dp_hpd.c69
-rw-r--r--drivers/gpu/drm/msm/dp/dp_hpd.h80
-rw-r--r--drivers/gpu/drm/msm/dp/dp_link.c805
-rw-r--r--drivers/gpu/drm/msm/dp/dp_link.h91
-rw-r--r--drivers/gpu/drm/msm/dp/dp_panel.c716
-rw-r--r--drivers/gpu/drm/msm/dp/dp_panel.h74
-rw-r--r--drivers/gpu/drm/msm/dp/dp_parser.c293
-rw-r--r--drivers/gpu/drm/msm/dp/dp_parser.h136
-rw-r--r--drivers/gpu/drm/msm/dp/dp_power.c408
-rw-r--r--drivers/gpu/drm/msm/dp/dp_power.h107
-rw-r--r--drivers/gpu/drm/msm/dp/dp_reg.h58
-rw-r--r--drivers/gpu/drm/msm/dp/dp_utils.c88
-rw-r--r--drivers/gpu/drm/msm/dp/dp_utils.h36
-rw-r--r--drivers/gpu/drm/msm/dsi/dsi.c173
-rw-r--r--drivers/gpu/drm/msm/dsi/dsi.h141
-rw-r--r--drivers/gpu/drm/msm/dsi/dsi.xml.h2313
-rw-r--r--drivers/gpu/drm/msm/dsi/dsi_cfg.c302
-rw-r--r--drivers/gpu/drm/msm/dsi/dsi_cfg.h19
-rw-r--r--drivers/gpu/drm/msm/dsi/dsi_host.c1281
-rw-r--r--drivers/gpu/drm/msm/dsi/dsi_manager.c644
-rw-r--r--drivers/gpu/drm/msm/dsi/mmss_cc.xml.h124
-rw-r--r--drivers/gpu/drm/msm/dsi/phy/dsi_phy.c411
-rw-r--r--drivers/gpu/drm/msm/dsi/phy/dsi_phy.h75
-rw-r--r--drivers/gpu/drm/msm/dsi/phy/dsi_phy_10nm.c957
-rw-r--r--drivers/gpu/drm/msm/dsi/phy/dsi_phy_14nm.c1110
-rw-r--r--drivers/gpu/drm/msm/dsi/phy/dsi_phy_20nm.c136
-rw-r--r--drivers/gpu/drm/msm/dsi/phy/dsi_phy_28nm.c944
-rw-r--r--drivers/gpu/drm/msm/dsi/phy/dsi_phy_28nm_8960.c624
-rw-r--r--drivers/gpu/drm/msm/dsi/phy/dsi_phy_7nm.c1428
-rw-r--r--drivers/gpu/drm/msm/dsi/pll/dsi_pll.c184
-rw-r--r--drivers/gpu/drm/msm/dsi/pll/dsi_pll.h130
-rw-r--r--drivers/gpu/drm/msm/dsi/pll/dsi_pll_10nm.c881
-rw-r--r--drivers/gpu/drm/msm/dsi/pll/dsi_pll_14nm.c1096
-rw-r--r--drivers/gpu/drm/msm/dsi/pll/dsi_pll_28nm.c643
-rw-r--r--drivers/gpu/drm/msm/dsi/pll/dsi_pll_28nm_8960.c526
-rw-r--r--drivers/gpu/drm/msm/dsi/pll/dsi_pll_7nm.c912
-rw-r--r--drivers/gpu/drm/msm/dsi/sfpb.xml.h63
-rw-r--r--drivers/gpu/drm/msm/edp/edp.c198
-rw-r--r--drivers/gpu/drm/msm/edp/edp.h78
-rw-r--r--drivers/gpu/drm/msm/edp/edp.xml.h380
-rw-r--r--drivers/gpu/drm/msm/edp/edp_aux.c264
-rw-r--r--drivers/gpu/drm/msm/edp/edp_bridge.c111
-rw-r--r--drivers/gpu/drm/msm/edp/edp_connector.c132
-rw-r--r--drivers/gpu/drm/msm/edp/edp_ctrl.c1375
-rw-r--r--drivers/gpu/drm/msm/edp/edp_phy.c98
-rw-r--r--drivers/gpu/drm/msm/hdmi/hdmi.c646
-rw-r--r--drivers/gpu/drm/msm/hdmi/hdmi.h113
-rw-r--r--drivers/gpu/drm/msm/hdmi/hdmi.xml.h1370
-rw-r--r--drivers/gpu/drm/msm/hdmi/hdmi_audio.c199
-rw-r--r--drivers/gpu/drm/msm/hdmi/hdmi_bridge.c472
-rw-r--r--drivers/gpu/drm/msm/hdmi/hdmi_connector.c451
-rw-r--r--drivers/gpu/drm/msm/hdmi/hdmi_hdcp.c2
-rw-r--r--drivers/gpu/drm/msm/hdmi/hdmi_hpd.c215
-rw-r--r--drivers/gpu/drm/msm/hdmi/hdmi_i2c.c15
-rw-r--r--drivers/gpu/drm/msm/hdmi/hdmi_phy.c53
-rw-r--r--drivers/gpu/drm/msm/hdmi/hdmi_phy_8996.c58
-rw-r--r--drivers/gpu/drm/msm/hdmi/hdmi_phy_8998.c765
-rw-r--r--drivers/gpu/drm/msm/hdmi/hdmi_phy_8x60.c12
-rw-r--r--drivers/gpu/drm/msm/hdmi/hdmi_pll_8960.c39
-rw-r--r--drivers/gpu/drm/msm/hdmi/qfprom.xml.h54
-rw-r--r--drivers/gpu/drm/msm/msm_atomic.c89
-rw-r--r--drivers/gpu/drm/msm/msm_debugfs.c283
-rw-r--r--drivers/gpu/drm/msm/msm_drv.c1266
-rw-r--r--drivers/gpu/drm/msm/msm_drv.h599
-rw-r--r--drivers/gpu/drm/msm/msm_dsc_helper.h27
-rw-r--r--drivers/gpu/drm/msm/msm_fb.c116
-rw-r--r--drivers/gpu/drm/msm/msm_fbdev.c194
-rw-r--r--drivers/gpu/drm/msm/msm_fence.c171
-rw-r--r--drivers/gpu/drm/msm/msm_fence.h82
-rw-r--r--drivers/gpu/drm/msm/msm_gem.c1200
-rw-r--r--drivers/gpu/drm/msm/msm_gem.h428
-rw-r--r--drivers/gpu/drm/msm/msm_gem_prime.c100
-rw-r--r--drivers/gpu/drm/msm/msm_gem_shrinker.c291
-rw-r--r--drivers/gpu/drm/msm/msm_gem_submit.c870
-rw-r--r--drivers/gpu/drm/msm/msm_gem_vma.c1630
-rw-r--r--drivers/gpu/drm/msm/msm_gpu.c693
-rw-r--r--drivers/gpu/drm/msm/msm_gpu.h523
-rw-r--r--drivers/gpu/drm/msm/msm_gpu_devfreq.c375
-rw-r--r--drivers/gpu/drm/msm/msm_gpu_trace.h77
-rw-r--r--drivers/gpu/drm/msm/msm_gpummu.c116
-rw-r--r--drivers/gpu/drm/msm/msm_io_utils.c161
-rw-r--r--drivers/gpu/drm/msm/msm_iommu.c571
-rw-r--r--drivers/gpu/drm/msm/msm_kms.c388
-rw-r--r--drivers/gpu/drm/msm/msm_kms.h121
-rw-r--r--drivers/gpu/drm/msm/msm_mdss.c605
-rw-r--r--drivers/gpu/drm/msm/msm_mmu.h56
-rw-r--r--drivers/gpu/drm/msm/msm_perf.c10
-rw-r--r--drivers/gpu/drm/msm/msm_rd.c112
-rw-r--r--drivers/gpu/drm/msm/msm_ringbuffer.c80
-rw-r--r--drivers/gpu/drm/msm/msm_ringbuffer.h61
-rw-r--r--drivers/gpu/drm/msm/msm_submitqueue.c219
-rw-r--r--drivers/gpu/drm/msm/msm_syncobj.c172
-rw-r--r--drivers/gpu/drm/msm/msm_syncobj.h37
-rw-r--r--drivers/gpu/drm/msm/registers/.gitignore4
-rw-r--r--drivers/gpu/drm/msm/registers/adreno/a2xx.xml1865
-rw-r--r--drivers/gpu/drm/msm/registers/adreno/a3xx.xml1751
-rw-r--r--drivers/gpu/drm/msm/registers/adreno/a4xx.xml2409
-rw-r--r--drivers/gpu/drm/msm/registers/adreno/a5xx.xml3039
-rw-r--r--drivers/gpu/drm/msm/registers/adreno/a6xx.xml5021
-rw-r--r--drivers/gpu/drm/msm/registers/adreno/a6xx_descriptors.xml158
-rw-r--r--drivers/gpu/drm/msm/registers/adreno/a6xx_enums.xml429
-rw-r--r--drivers/gpu/drm/msm/registers/adreno/a6xx_gmu.xml267
-rw-r--r--drivers/gpu/drm/msm/registers/adreno/a6xx_perfcntrs.xml600
-rw-r--r--drivers/gpu/drm/msm/registers/adreno/a7xx_enums.xml216
-rw-r--r--drivers/gpu/drm/msm/registers/adreno/a7xx_perfcntrs.xml1030
-rw-r--r--drivers/gpu/drm/msm/registers/adreno/a8xx_descriptors.xml121
-rw-r--r--drivers/gpu/drm/msm/registers/adreno/a8xx_enums.xml299
-rw-r--r--drivers/gpu/drm/msm/registers/adreno/adreno_common.xml412
-rw-r--r--drivers/gpu/drm/msm/registers/adreno/adreno_pm4.xml2445
-rw-r--r--drivers/gpu/drm/msm/registers/display/dsi.xml390
-rw-r--r--drivers/gpu/drm/msm/registers/display/dsi_phy_10nm.xml102
-rw-r--r--drivers/gpu/drm/msm/registers/display/dsi_phy_14nm.xml135
-rw-r--r--drivers/gpu/drm/msm/registers/display/dsi_phy_20nm.xml100
-rw-r--r--drivers/gpu/drm/msm/registers/display/dsi_phy_28nm.xml180
-rw-r--r--drivers/gpu/drm/msm/registers/display/dsi_phy_28nm_8960.xml134
-rw-r--r--drivers/gpu/drm/msm/registers/display/dsi_phy_7nm.xml261
-rw-r--r--drivers/gpu/drm/msm/registers/display/edp.xml239
-rw-r--r--drivers/gpu/drm/msm/registers/display/hdmi.xml1104
-rw-r--r--drivers/gpu/drm/msm/registers/display/mdp4.xml504
-rw-r--r--drivers/gpu/drm/msm/registers/display/mdp5.xml790
-rw-r--r--drivers/gpu/drm/msm/registers/display/mdp_common.xml90
-rw-r--r--drivers/gpu/drm/msm/registers/display/mdss.xml38
-rw-r--r--drivers/gpu/drm/msm/registers/display/msm.xml32
-rw-r--r--drivers/gpu/drm/msm/registers/display/sfpb.xml17
-rw-r--r--drivers/gpu/drm/msm/registers/freedreno_copyright.xml40
-rw-r--r--drivers/gpu/drm/msm/registers/gen_header.py1007
-rw-r--r--drivers/gpu/drm/msm/registers/rules-fd.xsd404
-rw-r--r--drivers/gpu/drm/mxsfb/Kconfig23
-rw-r--r--drivers/gpu/drm/mxsfb/Makefile2
-rw-r--r--drivers/gpu/drm/mxsfb/lcdif_drv.c381
-rw-r--r--drivers/gpu/drm/mxsfb/lcdif_drv.h42
-rw-r--r--drivers/gpu/drm/mxsfb/lcdif_kms.c780
-rw-r--r--drivers/gpu/drm/mxsfb/lcdif_regs.h265
-rw-r--r--drivers/gpu/drm/mxsfb/mxsfb_drv.c178
-rw-r--r--drivers/gpu/drm/mxsfb/mxsfb_drv.h6
-rw-r--r--drivers/gpu/drm/mxsfb/mxsfb_kms.c369
-rw-r--r--drivers/gpu/drm/mxsfb/mxsfb_regs.h10
-rw-r--r--drivers/gpu/drm/nouveau/Kbuild20
-rw-r--r--drivers/gpu/drm/nouveau/Kconfig45
-rw-r--r--drivers/gpu/drm/nouveau/dispnv04/Kbuild3
-rw-r--r--drivers/gpu/drm/nouveau/dispnv04/crtc.c167
-rw-r--r--drivers/gpu/drm/nouveau/dispnv04/dac.c4
-rw-r--r--drivers/gpu/drm/nouveau/dispnv04/dfp.c21
-rw-r--r--drivers/gpu/drm/nouveau/dispnv04/disp.c28
-rw-r--r--drivers/gpu/drm/nouveau/dispnv04/disp.h9
-rw-r--r--drivers/gpu/drm/nouveau/dispnv04/hw.c9
-rw-r--r--drivers/gpu/drm/nouveau/dispnv04/i2c/Kbuild5
-rw-r--r--drivers/gpu/drm/nouveau/dispnv04/i2c/ch7006_drv.c (renamed from drivers/gpu/drm/i2c/ch7006_drv.c)50
-rw-r--r--drivers/gpu/drm/nouveau/dispnv04/i2c/ch7006_mode.c (renamed from drivers/gpu/drm/i2c/ch7006_mode.c)8
-rw-r--r--drivers/gpu/drm/nouveau/dispnv04/i2c/ch7006_priv.h (renamed from drivers/gpu/drm/i2c/ch7006_priv.h)12
-rw-r--r--drivers/gpu/drm/nouveau/dispnv04/i2c/sil164_drv.c (renamed from drivers/gpu/drm/i2c/sil164_drv.c)46
-rw-r--r--drivers/gpu/drm/nouveau/dispnv04/nouveau_i2c_encoder.c145
-rw-r--r--drivers/gpu/drm/nouveau/dispnv04/overlay.c13
-rw-r--r--drivers/gpu/drm/nouveau/dispnv04/tvmodesnv17.c1
-rw-r--r--drivers/gpu/drm/nouveau/dispnv04/tvnv04.c30
-rw-r--r--drivers/gpu/drm/nouveau/dispnv04/tvnv17.c25
-rw-r--r--drivers/gpu/drm/nouveau/dispnv50/Kbuild5
-rw-r--r--drivers/gpu/drm/nouveau/dispnv50/atom.h6
-rw-r--r--drivers/gpu/drm/nouveau/dispnv50/base507c.c43
-rw-r--r--drivers/gpu/drm/nouveau/dispnv50/base827c.c2
-rw-r--r--drivers/gpu/drm/nouveau/dispnv50/base907c.c16
-rw-r--r--drivers/gpu/drm/nouveau/dispnv50/base917c.c2
-rw-r--r--drivers/gpu/drm/nouveau/dispnv50/core.c2
-rw-r--r--drivers/gpu/drm/nouveau/dispnv50/core.h6
-rw-r--r--drivers/gpu/drm/nouveau/dispnv50/core507d.c13
-rw-r--r--drivers/gpu/drm/nouveau/dispnv50/corec37d.c9
-rw-r--r--drivers/gpu/drm/nouveau/dispnv50/corec57d.c6
-rw-r--r--drivers/gpu/drm/nouveau/dispnv50/coreca7d.c122
-rw-r--r--drivers/gpu/drm/nouveau/dispnv50/crc.c98
-rw-r--r--drivers/gpu/drm/nouveau/dispnv50/crc.h8
-rw-r--r--drivers/gpu/drm/nouveau/dispnv50/crc907d.c12
-rw-r--r--drivers/gpu/drm/nouveau/dispnv50/crcc37d.c49
-rw-r--r--drivers/gpu/drm/nouveau/dispnv50/crcc37d.h40
-rw-r--r--drivers/gpu/drm/nouveau/dispnv50/crcc57d.c58
-rw-r--r--drivers/gpu/drm/nouveau/dispnv50/crcca7d.c98
-rw-r--r--drivers/gpu/drm/nouveau/dispnv50/curs.c1
-rw-r--r--drivers/gpu/drm/nouveau/dispnv50/curs507a.c51
-rw-r--r--drivers/gpu/drm/nouveau/dispnv50/dac507d.c2
-rw-r--r--drivers/gpu/drm/nouveau/dispnv50/dac907d.c2
-rw-r--r--drivers/gpu/drm/nouveau/dispnv50/disp.c1467
-rw-r--r--drivers/gpu/drm/nouveau/dispnv50/disp.h19
-rw-r--r--drivers/gpu/drm/nouveau/dispnv50/head.c89
-rw-r--r--drivers/gpu/drm/nouveau/dispnv50/head.h9
-rw-r--r--drivers/gpu/drm/nouveau/dispnv50/head507d.c24
-rw-r--r--drivers/gpu/drm/nouveau/dispnv50/head827d.c10
-rw-r--r--drivers/gpu/drm/nouveau/dispnv50/head907d.c32
-rw-r--r--drivers/gpu/drm/nouveau/dispnv50/head917d.c7
-rw-r--r--drivers/gpu/drm/nouveau/dispnv50/headc37d.c19
-rw-r--r--drivers/gpu/drm/nouveau/dispnv50/headc57d.c25
-rw-r--r--drivers/gpu/drm/nouveau/dispnv50/headca7d.c297
-rw-r--r--drivers/gpu/drm/nouveau/dispnv50/oimm507b.c6
-rw-r--r--drivers/gpu/drm/nouveau/dispnv50/ovly507e.c28
-rw-r--r--drivers/gpu/drm/nouveau/dispnv50/ovly827e.c2
-rw-r--r--drivers/gpu/drm/nouveau/dispnv50/ovly907e.c2
-rw-r--r--drivers/gpu/drm/nouveau/dispnv50/pior507d.c2
-rw-r--r--drivers/gpu/drm/nouveau/dispnv50/sor507d.c2
-rw-r--r--drivers/gpu/drm/nouveau/dispnv50/sor907d.c2
-rw-r--r--drivers/gpu/drm/nouveau/dispnv50/sorc37d.c2
-rw-r--r--drivers/gpu/drm/nouveau/dispnv50/tile.h63
-rw-r--r--drivers/gpu/drm/nouveau/dispnv50/wimm.c1
-rw-r--r--drivers/gpu/drm/nouveau/dispnv50/wimmc37b.c14
-rw-r--r--drivers/gpu/drm/nouveau/dispnv50/wndw.c223
-rw-r--r--drivers/gpu/drm/nouveau/dispnv50/wndw.h17
-rw-r--r--drivers/gpu/drm/nouveau/dispnv50/wndwc37e.c43
-rw-r--r--drivers/gpu/drm/nouveau/dispnv50/wndwc57e.c19
-rw-r--r--drivers/gpu/drm/nouveau/dispnv50/wndwc67e.c2
-rw-r--r--drivers/gpu/drm/nouveau/dispnv50/wndwca7e.c242
-rw-r--r--drivers/gpu/drm/nouveau/gv100_fence.c98
-rw-r--r--drivers/gpu/drm/nouveau/include/dispnv04/i2c/ch7006.h87
-rw-r--r--drivers/gpu/drm/nouveau/include/dispnv04/i2c/encoder_i2c.h220
-rw-r--r--drivers/gpu/drm/nouveau/include/dispnv04/i2c/sil164.h64
-rw-r--r--drivers/gpu/drm/nouveau/include/nvfw/acr.h85
-rw-r--r--drivers/gpu/drm/nouveau/include/nvfw/hs.h32
-rw-r--r--drivers/gpu/drm/nouveau/include/nvfw/ls.h51
-rw-r--r--drivers/gpu/drm/nouveau/include/nvfw/sec2.h45
-rw-r--r--drivers/gpu/drm/nouveau/include/nvhw/class/cl907d.h3
-rw-r--r--drivers/gpu/drm/nouveau/include/nvhw/class/clc36f.h137
-rw-r--r--drivers/gpu/drm/nouveau/include/nvhw/class/clc57d.h69
-rw-r--r--drivers/gpu/drm/nouveau/include/nvhw/class/clc97b.h22
-rw-r--r--drivers/gpu/drm/nouveau/include/nvhw/class/clca7d.h868
-rw-r--r--drivers/gpu/drm/nouveau/include/nvhw/class/clca7e.h137
-rw-r--r--drivers/gpu/drm/nouveau/include/nvhw/drf.h4
-rw-r--r--drivers/gpu/drm/nouveau/include/nvhw/ref/gb100/dev_hshub_base.h28
-rw-r--r--drivers/gpu/drm/nouveau/include/nvhw/ref/gb10b/dev_fbhub.h18
-rw-r--r--drivers/gpu/drm/nouveau/include/nvhw/ref/gb202/dev_ce.h12
-rw-r--r--drivers/gpu/drm/nouveau/include/nvhw/ref/gb202/dev_therm.h17
-rw-r--r--drivers/gpu/drm/nouveau/include/nvhw/ref/gh100/dev_falcon_v4.h20
-rw-r--r--drivers/gpu/drm/nouveau/include/nvhw/ref/gh100/dev_fb.h15
-rw-r--r--drivers/gpu/drm/nouveau/include/nvhw/ref/gh100/dev_fsp_pri.h28
-rw-r--r--drivers/gpu/drm/nouveau/include/nvhw/ref/gh100/dev_mmu.h173
-rw-r--r--drivers/gpu/drm/nouveau/include/nvhw/ref/gh100/dev_riscv_pri.h14
-rw-r--r--drivers/gpu/drm/nouveau/include/nvhw/ref/gh100/dev_therm.h17
-rw-r--r--drivers/gpu/drm/nouveau/include/nvhw/ref/gh100/dev_xtl_ep_pri.h10
-rw-r--r--drivers/gpu/drm/nouveau/include/nvhw/ref/gh100/pri_nv_xal_ep.h13
-rw-r--r--drivers/gpu/drm/nouveau/include/nvif/chan.h76
-rw-r--r--drivers/gpu/drm/nouveau/include/nvif/cl0046.h23
-rw-r--r--drivers/gpu/drm/nouveau/include/nvif/cl006b.h12
-rw-r--r--drivers/gpu/drm/nouveau/include/nvif/cl0080.h15
-rw-r--r--drivers/gpu/drm/nouveau/include/nvif/cl506e.h13
-rw-r--r--drivers/gpu/drm/nouveau/include/nvif/cl506f.h14
-rw-r--r--drivers/gpu/drm/nouveau/include/nvif/cl5070.h100
-rw-r--r--drivers/gpu/drm/nouveau/include/nvif/cl507a.h12
-rw-r--r--drivers/gpu/drm/nouveau/include/nvif/cl507b.h12
-rw-r--r--drivers/gpu/drm/nouveau/include/nvif/cl507c.h13
-rw-r--r--drivers/gpu/drm/nouveau/include/nvif/cl507d.h12
-rw-r--r--drivers/gpu/drm/nouveau/include/nvif/cl507e.h13
-rw-r--r--drivers/gpu/drm/nouveau/include/nvif/cl826e.h15
-rw-r--r--drivers/gpu/drm/nouveau/include/nvif/cl826f.h16
-rw-r--r--drivers/gpu/drm/nouveau/include/nvif/cl906f.h16
-rw-r--r--drivers/gpu/drm/nouveau/include/nvif/cla06f.h18
-rw-r--r--drivers/gpu/drm/nouveau/include/nvif/class.h249
-rw-r--r--drivers/gpu/drm/nouveau/include/nvif/clb069.h5
-rw-r--r--drivers/gpu/drm/nouveau/include/nvif/clc36f.h19
-rw-r--r--drivers/gpu/drm/nouveau/include/nvif/clc37b.h11
-rw-r--r--drivers/gpu/drm/nouveau/include/nvif/clc37e.h13
-rw-r--r--drivers/gpu/drm/nouveau/include/nvif/client.h12
-rw-r--r--drivers/gpu/drm/nouveau/include/nvif/conn.h38
-rw-r--r--drivers/gpu/drm/nouveau/include/nvif/device.h37
-rw-r--r--drivers/gpu/drm/nouveau/include/nvif/disp.h3
-rw-r--r--drivers/gpu/drm/nouveau/include/nvif/driver.h7
-rw-r--r--drivers/gpu/drm/nouveau/include/nvif/event.h91
-rw-r--r--drivers/gpu/drm/nouveau/include/nvif/head.h23
-rw-r--r--drivers/gpu/drm/nouveau/include/nvif/if0000.h10
-rw-r--r--drivers/gpu/drm/nouveau/include/nvif/if0002.h39
-rw-r--r--drivers/gpu/drm/nouveau/include/nvif/if0003.h34
-rw-r--r--drivers/gpu/drm/nouveau/include/nvif/if0004.h5
-rw-r--r--drivers/gpu/drm/nouveau/include/nvif/if000c.h27
-rw-r--r--drivers/gpu/drm/nouveau/include/nvif/if000e.h26
-rw-r--r--drivers/gpu/drm/nouveau/include/nvif/if0010.h14
-rw-r--r--drivers/gpu/drm/nouveau/include/nvif/if0011.h33
-rw-r--r--drivers/gpu/drm/nouveau/include/nvif/if0012.h294
-rw-r--r--drivers/gpu/drm/nouveau/include/nvif/if0013.h35
-rw-r--r--drivers/gpu/drm/nouveau/include/nvif/if0014.h13
-rw-r--r--drivers/gpu/drm/nouveau/include/nvif/if0020.h45
-rw-r--r--drivers/gpu/drm/nouveau/include/nvif/if0021.h16
-rw-r--r--drivers/gpu/drm/nouveau/include/nvif/ioctl.h98
-rw-r--r--drivers/gpu/drm/nouveau/include/nvif/list.h353
-rw-r--r--drivers/gpu/drm/nouveau/include/nvif/log.h51
-rw-r--r--drivers/gpu/drm/nouveau/include/nvif/notify.h35
-rw-r--r--drivers/gpu/drm/nouveau/include/nvif/object.h32
-rw-r--r--drivers/gpu/drm/nouveau/include/nvif/os.h21
-rw-r--r--drivers/gpu/drm/nouveau/include/nvif/outp.h116
-rw-r--r--drivers/gpu/drm/nouveau/include/nvif/printf.h9
-rw-r--r--drivers/gpu/drm/nouveau/include/nvif/push.h14
-rw-r--r--drivers/gpu/drm/nouveau/include/nvif/push906f.h1
-rw-r--r--drivers/gpu/drm/nouveau/include/nvif/pushc97b.h18
-rw-r--r--drivers/gpu/drm/nouveau/include/nvif/vmm.h19
-rw-r--r--drivers/gpu/drm/nouveau/include/nvkm/core/client.h18
-rw-r--r--drivers/gpu/drm/nouveau/include/nvkm/core/device.h34
-rw-r--r--drivers/gpu/drm/nouveau/include/nvkm/core/engine.h14
-rw-r--r--drivers/gpu/drm/nouveau/include/nvkm/core/event.h66
-rw-r--r--drivers/gpu/drm/nouveau/include/nvkm/core/falcon.h174
-rw-r--r--drivers/gpu/drm/nouveau/include/nvkm/core/firmware.h29
-rw-r--r--drivers/gpu/drm/nouveau/include/nvkm/core/intr.h73
-rw-r--r--drivers/gpu/drm/nouveau/include/nvkm/core/ioctl.h2
-rw-r--r--drivers/gpu/drm/nouveau/include/nvkm/core/layout.h17
-rw-r--r--drivers/gpu/drm/nouveau/include/nvkm/core/memory.h3
-rw-r--r--drivers/gpu/drm/nouveau/include/nvkm/core/notify.h39
-rw-r--r--drivers/gpu/drm/nouveau/include/nvkm/core/object.h16
-rw-r--r--drivers/gpu/drm/nouveau/include/nvkm/core/oclass.h2
-rw-r--r--drivers/gpu/drm/nouveau/include/nvkm/core/os.h34
-rw-r--r--drivers/gpu/drm/nouveau/include/nvkm/core/pci.h1
-rw-r--r--drivers/gpu/drm/nouveau/include/nvkm/core/subdev.h44
-rw-r--r--drivers/gpu/drm/nouveau/include/nvkm/core/tegra.h4
-rw-r--r--drivers/gpu/drm/nouveau/include/nvkm/engine/ce.h2
-rw-r--r--drivers/gpu/drm/nouveau/include/nvkm/engine/disp.h56
-rw-r--r--drivers/gpu/drm/nouveau/include/nvkm/engine/falcon.h82
-rw-r--r--drivers/gpu/drm/nouveau/include/nvkm/engine/fifo.h105
-rw-r--r--drivers/gpu/drm/nouveau/include/nvkm/engine/gr.h1
-rw-r--r--drivers/gpu/drm/nouveau/include/nvkm/engine/nvdec.h2
-rw-r--r--drivers/gpu/drm/nouveau/include/nvkm/engine/nvenc.h1
-rw-r--r--drivers/gpu/drm/nouveau/include/nvkm/engine/pm.h29
-rw-r--r--drivers/gpu/drm/nouveau/include/nvkm/engine/sec2.h5
-rw-r--r--drivers/gpu/drm/nouveau/include/nvkm/subdev/acr.h29
-rw-r--r--drivers/gpu/drm/nouveau/include/nvkm/subdev/bar.h4
-rw-r--r--drivers/gpu/drm/nouveau/include/nvkm/subdev/bios.h1
-rw-r--r--drivers/gpu/drm/nouveau/include/nvkm/subdev/bios/dcb.h1
-rw-r--r--drivers/gpu/drm/nouveau/include/nvkm/subdev/clk.h4
-rw-r--r--drivers/gpu/drm/nouveau/include/nvkm/subdev/fault.h8
-rw-r--r--drivers/gpu/drm/nouveau/include/nvkm/subdev/fb.h21
-rw-r--r--drivers/gpu/drm/nouveau/include/nvkm/subdev/fsp.h24
-rw-r--r--drivers/gpu/drm/nouveau/include/nvkm/subdev/gpio.h6
-rw-r--r--drivers/gpu/drm/nouveau/include/nvkm/subdev/gsp.h484
-rw-r--r--drivers/gpu/drm/nouveau/include/nvkm/subdev/i2c.h23
-rw-r--r--drivers/gpu/drm/nouveau/include/nvkm/subdev/instmem.h7
-rw-r--r--drivers/gpu/drm/nouveau/include/nvkm/subdev/ltc.h16
-rw-r--r--drivers/gpu/drm/nouveau/include/nvkm/subdev/mc.h6
-rw-r--r--drivers/gpu/drm/nouveau/include/nvkm/subdev/mmu.h36
-rw-r--r--drivers/gpu/drm/nouveau/include/nvkm/subdev/pci.h3
-rw-r--r--drivers/gpu/drm/nouveau/include/nvkm/subdev/secboot.h66
-rw-r--r--drivers/gpu/drm/nouveau/include/nvkm/subdev/top.h1
-rw-r--r--drivers/gpu/drm/nouveau/include/nvkm/subdev/vfn.h23
-rw-r--r--drivers/gpu/drm/nouveau/include/nvrm/nvtypes.h26
-rw-r--r--drivers/gpu/drm/nouveau/nouveau_abi16.c467
-rw-r--r--drivers/gpu/drm/nouveau/nouveau_abi16.h61
-rw-r--r--drivers/gpu/drm/nouveau/nouveau_acpi.c35
-rw-r--r--drivers/gpu/drm/nouveau/nouveau_acpi.h4
-rw-r--r--drivers/gpu/drm/nouveau/nouveau_backlight.c265
-rw-r--r--drivers/gpu/drm/nouveau/nouveau_bios.c38
-rw-r--r--drivers/gpu/drm/nouveau/nouveau_bios.h1
-rw-r--r--drivers/gpu/drm/nouveau/nouveau_bo.c534
-rw-r--r--drivers/gpu/drm/nouveau/nouveau_bo.h73
-rw-r--r--drivers/gpu/drm/nouveau/nouveau_bo0039.c10
-rw-r--r--drivers/gpu/drm/nouveau/nouveau_bo5039.c8
-rw-r--r--drivers/gpu/drm/nouveau/nouveau_bo74c1.c4
-rw-r--r--drivers/gpu/drm/nouveau/nouveau_bo85b5.c6
-rw-r--r--drivers/gpu/drm/nouveau/nouveau_bo9039.c8
-rw-r--r--drivers/gpu/drm/nouveau/nouveau_bo90b5.c6
-rw-r--r--drivers/gpu/drm/nouveau/nouveau_boa0b5.c6
-rw-r--r--drivers/gpu/drm/nouveau/nouveau_chan.c508
-rw-r--r--drivers/gpu/drm/nouveau/nouveau_chan.h38
-rw-r--r--drivers/gpu/drm/nouveau/nouveau_connector.c409
-rw-r--r--drivers/gpu/drm/nouveau/nouveau_connector.h31
-rw-r--r--drivers/gpu/drm/nouveau/nouveau_crtc.h7
-rw-r--r--drivers/gpu/drm/nouveau/nouveau_debugfs.c83
-rw-r--r--drivers/gpu/drm/nouveau/nouveau_debugfs.h15
-rw-r--r--drivers/gpu/drm/nouveau/nouveau_display.c245
-rw-r--r--drivers/gpu/drm/nouveau/nouveau_display.h8
-rw-r--r--drivers/gpu/drm/nouveau/nouveau_dma.c102
-rw-r--r--drivers/gpu/drm/nouveau/nouveau_dma.h21
-rw-r--r--drivers/gpu/drm/nouveau/nouveau_dmem.c407
-rw-r--r--drivers/gpu/drm/nouveau/nouveau_dp.c381
-rw-r--r--drivers/gpu/drm/nouveau/nouveau_drm.c607
-rw-r--r--drivers/gpu/drm/nouveau/nouveau_drv.h177
-rw-r--r--drivers/gpu/drm/nouveau/nouveau_encoder.h59
-rw-r--r--drivers/gpu/drm/nouveau/nouveau_exec.c408
-rw-r--r--drivers/gpu/drm/nouveau/nouveau_exec.h60
-rw-r--r--drivers/gpu/drm/nouveau/nouveau_fbcon.c612
-rw-r--r--drivers/gpu/drm/nouveau/nouveau_fbcon.h82
-rw-r--r--drivers/gpu/drm/nouveau/nouveau_fence.c292
-rw-r--r--drivers/gpu/drm/nouveau/nouveau_fence.h20
-rw-r--r--drivers/gpu/drm/nouveau/nouveau_gem.c212
-rw-r--r--drivers/gpu/drm/nouveau/nouveau_gem.h3
-rw-r--r--drivers/gpu/drm/nouveau/nouveau_hwmon.c143
-rw-r--r--drivers/gpu/drm/nouveau/nouveau_ioc32.c4
-rw-r--r--drivers/gpu/drm/nouveau/nouveau_led.c2
-rw-r--r--drivers/gpu/drm/nouveau/nouveau_led.h2
-rw-r--r--drivers/gpu/drm/nouveau/nouveau_mem.c103
-rw-r--r--drivers/gpu/drm/nouveau/nouveau_mem.h35
-rw-r--r--drivers/gpu/drm/nouveau/nouveau_nvif.c38
-rw-r--r--drivers/gpu/drm/nouveau/nouveau_platform.c36
-rw-r--r--drivers/gpu/drm/nouveau/nouveau_prime.c38
-rw-r--r--drivers/gpu/drm/nouveau/nouveau_sched.c524
-rw-r--r--drivers/gpu/drm/nouveau/nouveau_sched.h118
-rw-r--r--drivers/gpu/drm/nouveau/nouveau_sgdma.c11
-rw-r--r--drivers/gpu/drm/nouveau/nouveau_svm.c276
-rw-r--r--drivers/gpu/drm/nouveau/nouveau_svm.h3
-rw-r--r--drivers/gpu/drm/nouveau/nouveau_ttm.c154
-rw-r--r--drivers/gpu/drm/nouveau/nouveau_ttm.h9
-rw-r--r--drivers/gpu/drm/nouveau/nouveau_usif.c398
-rw-r--r--drivers/gpu/drm/nouveau/nouveau_usif.h10
-rw-r--r--drivers/gpu/drm/nouveau/nouveau_uvmm.c2005
-rw-r--r--drivers/gpu/drm/nouveau/nouveau_uvmm.h99
-rw-r--r--drivers/gpu/drm/nouveau/nouveau_vga.c32
-rw-r--r--drivers/gpu/drm/nouveau/nouveau_vga.h1
-rw-r--r--drivers/gpu/drm/nouveau/nouveau_vmm.c11
-rw-r--r--drivers/gpu/drm/nouveau/nv04_fbcon.c257
-rw-r--r--drivers/gpu/drm/nouveau/nv04_fence.c2
-rw-r--r--drivers/gpu/drm/nouveau/nv10_fence.c8
-rw-r--r--drivers/gpu/drm/nouveau/nv17_fence.c27
-rw-r--r--drivers/gpu/drm/nouveau/nv50_display.h4
-rw-r--r--drivers/gpu/drm/nouveau/nv50_fbcon.c299
-rw-r--r--drivers/gpu/drm/nouveau/nv50_fence.c19
-rw-r--r--drivers/gpu/drm/nouveau/nv84_fence.c49
-rw-r--r--drivers/gpu/drm/nouveau/nvc0_fbcon.c297
-rw-r--r--drivers/gpu/drm/nouveau/nvc0_fence.c4
-rw-r--r--drivers/gpu/drm/nouveau/nvif/Kbuild11
-rw-r--r--drivers/gpu/drm/nouveau/nvif/chan.c159
-rw-r--r--drivers/gpu/drm/nouveau/nvif/chan506f.c72
-rw-r--r--drivers/gpu/drm/nouveau/nvif/chan906f.c93
-rw-r--r--drivers/gpu/drm/nouveau/nvif/chanc36f.c77
-rw-r--r--drivers/gpu/drm/nouveau/nvif/client.c35
-rw-r--r--drivers/gpu/drm/nouveau/nvif/conn.c87
-rw-r--r--drivers/gpu/drm/nouveau/nvif/device.c15
-rw-r--r--drivers/gpu/drm/nouveau/nvif/disp.c64
-rw-r--r--drivers/gpu/drm/nouveau/nvif/driver.c32
-rw-r--r--drivers/gpu/drm/nouveau/nvif/event.c81
-rw-r--r--drivers/gpu/drm/nouveau/nvif/head.c58
-rw-r--r--drivers/gpu/drm/nouveau/nvif/mmu.c3
-rw-r--r--drivers/gpu/drm/nouveau/nvif/notify.c210
-rw-r--r--drivers/gpu/drm/nouveau/nvif/object.c81
-rw-r--r--drivers/gpu/drm/nouveau/nvif/outp.c556
-rw-r--r--drivers/gpu/drm/nouveau/nvif/user.c6
-rw-r--r--drivers/gpu/drm/nouveau/nvif/vmm.c101
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/core/Kbuild3
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/core/client.c197
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/core/engine.c85
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/core/enum.c2
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/core/event.c162
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/core/firmware.c204
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/core/intr.c442
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/core/ioctl.c207
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/core/memory.c15
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/core/notify.c163
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/core/object.c78
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/core/oproxy.c58
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/core/subdev.c117
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/core/uevent.c157
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/Kbuild1
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/ce/Kbuild3
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/ce/ga100.c96
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/ce/ga102.c50
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/ce/gb202.c16
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/ce/gt215.c8
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/ce/gv100.c24
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/ce/priv.h14
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/ce/tu102.c6
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/cipher/g84.c11
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/device/acpi.c6
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/device/base.c947
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/device/ctrl.c2
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/device/pci.c43
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/device/priv.h4
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/device/tegra.c89
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/device/user.c108
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/disp/Kbuild119
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/disp/base.c366
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/disp/baseg84.c74
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/disp/basegf119.c108
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/disp/basegp102.c32
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/disp/basenv50.c119
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/disp/capsgv100.c60
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/disp/chan.c251
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/disp/chan.h139
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/disp/changf119.c62
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/disp/changv100.c34
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/disp/channv50.c364
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/disp/channv50.h193
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/disp/conn.c53
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/disp/conn.h9
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/disp/coreg84.c111
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/disp/coreg94.c57
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/disp/coregf119.c231
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/disp/coregk104.c126
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/disp/coregp102.c70
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/disp/coregv100.c205
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/disp/corenv50.c234
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/disp/cursgf119.c32
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/disp/cursgp102.c32
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/disp/cursgv100.c81
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/disp/cursnv50.c64
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/disp/dacgf119.c70
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/disp/dacnv50.c121
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/disp/dmacgf119.c96
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/disp/dmacgp102.c64
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/disp/dmacgv100.c79
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/disp/dmacnv50.c137
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/disp/dp.c719
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/disp/dp.h50
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/disp/g84.c313
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/disp/g94.c343
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/disp/ga102.c122
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/disp/gf119.c1062
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/disp/gk104.c297
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/disp/gk110.c23
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/disp/gm107.c77
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/disp/gm200.c160
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/disp/gp100.c50
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/disp/gp102.c144
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/disp/gt200.c73
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/disp/gt215.c231
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/disp/gv100.c902
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/disp/hdagf119.c62
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/disp/hdagt215.c51
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/disp/hdagv100.c30
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/disp/hdmig84.c91
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/disp/hdmigf119.c82
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/disp/hdmigk104.c82
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/disp/hdmigm200.c36
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/disp/hdmigt215.c91
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/disp/hdmigv100.c85
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/disp/head.c42
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/disp/head.h24
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/disp/headgf119.c104
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/disp/headgv100.c105
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/disp/headnv04.c74
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/disp/headnv50.c99
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/disp/ior.c10
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/disp/ior.h125
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/disp/mcp77.c39
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/disp/mcp89.c53
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/disp/nv04.c55
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/disp/nv50.c1341
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/disp/nv50.h102
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/disp/oimmgf119.c32
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/disp/oimmgp102.c32
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/disp/oimmnv50.c64
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/disp/outp.c184
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/disp/outp.h91
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/disp/ovlyg84.c71
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/disp/ovlygf119.c95
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/disp/ovlygk104.c97
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/disp/ovlygp102.c32
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/disp/ovlygt200.c74
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/disp/ovlynv50.c107
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/disp/piocgf119.c78
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/disp/piocnv50.c87
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/disp/piornv50.c139
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/disp/priv.h80
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/disp/rootg84.c55
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/disp/rootg94.c55
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/disp/rootga102.c52
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/disp/rootgf119.c55
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/disp/rootgk104.c55
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/disp/rootgk110.c55
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/disp/rootgm107.c55
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/disp/rootgm200.c55
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/disp/rootgp100.c55
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/disp/rootgp102.c55
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/disp/rootgt200.c55
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/disp/rootgt215.c55
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/disp/rootgv100.c53
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/disp/rootnv04.c98
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/disp/rootnv50.c360
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/disp/rootnv50.h45
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/disp/roottu102.c53
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/disp/sorg84.c38
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/disp/sorg94.c291
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/disp/sorga102.c140
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/disp/sorgf119.c196
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/disp/sorgk104.c54
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/disp/sorgm107.c68
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/disp/sorgm200.c160
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/disp/sorgp100.c93
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/disp/sorgt215.c69
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/disp/sorgv100.c155
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/disp/sormcp77.c48
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/disp/sormcp89.c53
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/disp/sornv50.c106
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/disp/sortu102.c129
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/disp/tu102.c110
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/disp/uconn.c225
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/disp/udisp.c114
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/disp/uhead.c127
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/disp/uoutp.c665
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/disp/wimmgv100.c82
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/disp/wndwgv100.c184
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/dma/user.c15
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/falcon.c10
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/fifo/Kbuild34
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/fifo/base.c501
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/fifo/cgrp.c255
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/fifo/cgrp.h76
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/fifo/chan.c660
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/fifo/chan.h99
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/fifo/chang84.c263
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/fifo/changf100.h29
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/fifo/changk104.h54
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/fifo/channv04.h29
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/fifo/channv50.c276
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/fifo/channv50.h55
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/fifo/chid.c111
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/fifo/chid.h25
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/fifo/dmag84.c94
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/fifo/dmanv04.c226
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/fifo/dmanv10.c97
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/fifo/dmanv17.c98
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/fifo/dmanv40.c254
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/fifo/dmanv50.c92
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/fifo/g84.c254
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/fifo/g98.c70
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/fifo/ga100.c626
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/fifo/ga102.c51
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/fifo/gb202.c14
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/fifo/gf100.c942
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/fifo/gf100.h38
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/fifo/gk104.c1506
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/fifo/gk104.h168
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/fifo/gk110.c105
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/fifo/gk208.c59
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/fifo/gk20a.c33
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/fifo/gm107.c109
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/fifo/gm200.c49
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/fifo/gm20b.c45
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/fifo/gp100.c99
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/fifo/gp10b.c46
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/fifo/gpfifog84.c95
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/fifo/gpfifogf100.c308
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/fifo/gpfifogk104.c358
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/fifo/gpfifogv100.c244
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/fifo/gpfifonv50.c93
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/fifo/gpfifotu102.c83
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/fifo/gv100.c251
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/fifo/nv04.c344
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/fifo/nv04.h23
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/fifo/nv10.c94
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/fifo/nv17.c103
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/fifo/nv40.c198
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/fifo/nv50.c382
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/fifo/nv50.h20
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/fifo/priv.h232
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/fifo/runl.c430
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/fifo/runl.h137
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/fifo/runq.c45
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/fifo/runq.h31
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/fifo/tu102.c473
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/fifo/ucgrp.c125
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/fifo/uchan.c418
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/fifo/user.h8
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/fifo/usergv100.c45
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/fifo/usertu102.c45
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/gr/Kbuild2
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/gr/base.c33
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxga102.c77
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgf100.c224
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgf100.h78
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgf104.c2
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgf108.c21
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgf110.c2
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgf117.c24
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgf119.c2
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgk104.c44
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgk110.c12
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgk110b.c3
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgk208.c3
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgk20a.c12
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgm107.c72
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgm200.c4
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgm20b.c12
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgp100.c80
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgp102.c60
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgp104.c2
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgp107.c2
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgv100.c62
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxtu102.c35
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/gr/ga102.c358
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/gr/gf100.c509
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/gr/gf100.h81
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/gr/gf104.c3
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/gr/gf108.c5
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/gr/gf110.c3
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/gr/gf117.c7
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/gr/gf119.c3
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/gr/gk104.c5
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/gr/gk110.c3
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/gr/gk110b.c3
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/gr/gk208.c3
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/gr/gk20a.c119
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/gr/gm107.c3
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/gr/gm200.c8
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/gr/gm20b.c1
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/gr/gp100.c5
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/gr/gp102.c9
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/gr/gp104.c3
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/gr/gp107.c3
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/gr/gp108.c4
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/gr/gp10b.c3
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/gr/gv100.c203
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/gr/nv04.c4
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/gr/nv10.c4
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/gr/nv10.h2
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/gr/nv20.c14
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/gr/nv25.c4
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/gr/nv2a.c4
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/gr/nv30.c4
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/gr/nv34.c4
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/gr/nv35.c4
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/gr/nv40.c8
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/gr/nv40.h4
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/gr/nv50.c12
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/gr/nv50.h2
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/gr/priv.h6
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/gr/tu102.c39
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/mpeg/nv31.c7
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/mpeg/nv31.h4
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/mpeg/nv44.c9
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/mpeg/priv.h2
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/nvdec/Kbuild2
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/nvdec/base.c7
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/nvdec/ga102.c63
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/nvdec/gm107.c21
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/nvdec/priv.h6
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/nvdec/tu102.c34
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/nvenc/Kbuild1
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/nvenc/base.c4
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/nvenc/gm107.c13
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/nvenc/priv.h2
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/nvenc/tu102.c34
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/pm/Kbuild11
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/pm/base.c868
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/pm/g84.c165
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/pm/gf100.c243
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/pm/gf100.h20
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/pm/gf108.c66
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/pm/gf117.c80
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/pm/gk104.c184
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/pm/gt200.c157
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/pm/gt215.c138
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/pm/nv40.c123
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/pm/nv40.h15
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/pm/nv50.c175
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/pm/priv.h105
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/sec/g98.c6
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/sec2/Kbuild3
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/sec2/base.c98
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/sec2/ga102.c203
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/sec2/gp102.c116
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/sec2/priv.h12
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/sec2/r535.c54
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/sec2/tu102.c34
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/sw/base.c5
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/sw/chan.c26
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/sw/chan.h5
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/sw/gf100.c22
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/sw/nv04.c2
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/sw/nv10.c2
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/sw/nv50.c26
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/sw/nv50.h4
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/sw/nvsw.c35
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/sw/priv.h2
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/falcon/Kbuild7
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/falcon/base.c261
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/falcon/cmdq.c2
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/falcon/fw.c363
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/falcon/ga100.c68
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/falcon/ga102.c154
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/falcon/gm200.c350
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/falcon/gp102.c82
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/falcon/msgq.c10
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/falcon/priv.h8
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/falcon/qmgr.h9
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/falcon/tu102.c28
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/falcon/v1.c235
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/nvfw/acr.c41
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/nvfw/hs.c39
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/nvfw/ls.c72
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/subdev/Kbuild2
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/subdev/acr/Kbuild4
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/subdev/acr/base.c154
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/subdev/acr/ga100.c49
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/subdev/acr/ga102.c330
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/subdev/acr/gm200.c205
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/subdev/acr/gm20b.c44
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/subdev/acr/gp102.c37
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/subdev/acr/gp108.c55
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/subdev/acr/gp10b.c2
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/subdev/acr/gv100.c67
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/subdev/acr/hsfw.c176
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/subdev/acr/lsfw.c144
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/subdev/acr/priv.h104
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/subdev/acr/tu102.c41
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/subdev/bar/base.c10
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/subdev/bar/gf100.c14
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/subdev/bar/nv50.c4
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/subdev/bar/priv.h3
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/subdev/bar/tu102.c4
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/subdev/bios/base.c12
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/subdev/bios/init.c138
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/subdev/bios/pmu.c5
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/subdev/bios/power_budget.c3
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/subdev/bios/priv.h2
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/subdev/bios/shadow.c10
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/subdev/bios/shadowof.c7
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/subdev/bios/shadowrom.c14
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/subdev/bus/gf100.c19
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/subdev/bus/nv31.c6
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/subdev/bus/nv50.c6
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/subdev/clk/Kbuild2
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/subdev/clk/base.c28
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/subdev/clk/gk20a.c5
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/subdev/clk/gk20a.h1
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/subdev/clk/gk20a_devfreq.c320
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/subdev/clk/gk20a_devfreq.h24
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/subdev/clk/gm20b.c7
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/subdev/clk/gp10b.c185
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/subdev/clk/gp10b.h18
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/subdev/clk/nv50.c1
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/subdev/devinit/Kbuild2
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/subdev/devinit/base.c3
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/subdev/devinit/fbmem.h4
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/subdev/devinit/g84.c5
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/subdev/devinit/g98.c5
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/subdev/devinit/ga100.c15
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/subdev/devinit/gf100.c5
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/subdev/devinit/gm107.c17
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/subdev/devinit/gm200.c8
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/subdev/devinit/gt215.c5
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/subdev/devinit/mcp89.c5
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/subdev/devinit/nv50.c5
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/subdev/devinit/nv50.h2
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/subdev/devinit/priv.h5
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/subdev/devinit/r535.c51
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/subdev/devinit/tu102.c27
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/subdev/fault/base.c21
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/subdev/fault/gp100.c2
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/subdev/fault/gv100.c41
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/subdev/fault/priv.h3
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/subdev/fault/tu102.c126
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/subdev/fault/user.c32
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/subdev/fb/Kbuild8
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/subdev/fb/base.c81
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/subdev/fb/ga100.c13
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/subdev/fb/ga102.c41
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/subdev/fb/gb100.c37
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/subdev/fb/gb202.c33
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/subdev/fb/gf100.c28
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/subdev/fb/gf100.h4
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/subdev/fb/gf108.c1
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/subdev/fb/gh100.c33
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/subdev/fb/gk104.c1
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/subdev/fb/gk110.c1
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/subdev/fb/gm107.c1
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/subdev/fb/gm200.c4
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/subdev/fb/gm20b.c1
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/subdev/fb/gp100.c18
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/subdev/fb/gp102.c110
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/subdev/fb/gp10b.c1
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/subdev/fb/gv100.c15
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/subdev/fb/nv50.c52
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/subdev/fb/nv50.h2
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/subdev/fb/priv.h24
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/subdev/fb/r535.c87
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/subdev/fb/ram.c54
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/subdev/fb/ram.h4
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/subdev/fb/ramga102.c40
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/subdev/fb/ramgp100.c2
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/subdev/fb/ramgp102.c31
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/subdev/fb/tu102.c61
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/subdev/fsp/Kbuild8
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/subdev/fsp/base.c66
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/subdev/fsp/gb100.c24
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/subdev/fsp/gb202.c45
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/subdev/fsp/gh100.c275
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/subdev/fsp/priv.h29
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/subdev/fuse/gm107.c5
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/subdev/gpio/base.c27
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/subdev/gpio/ga102.c5
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/subdev/gpio/gk104.c5
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/subdev/gsp/Kbuild12
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/subdev/gsp/ad102.c66
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/subdev/gsp/base.c118
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/subdev/gsp/fwsec.c360
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/subdev/gsp/ga100.c75
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/subdev/gsp/ga102.c200
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/subdev/gsp/gb100.c35
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/subdev/gsp/gb202.c38
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/subdev/gsp/gh100.c358
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/subdev/gsp/gv100.c35
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/subdev/gsp/priv.h77
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/subdev/gsp/rm/Kbuild19
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/subdev/gsp/rm/ad10x.c39
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/subdev/gsp/rm/client.c49
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/subdev/gsp/rm/engine.c189
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/subdev/gsp/rm/engine.h20
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/subdev/gsp/rm/ga100.c28
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/subdev/gsp/rm/ga1xx.c39
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/subdev/gsp/rm/gb10x.c30
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/subdev/gsp/rm/gb20x.c44
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/subdev/gsp/rm/gh100.c30
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/subdev/gsp/rm/gpu.h70
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/subdev/gsp/rm/gr.c87
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/subdev/gsp/rm/gr.h55
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/subdev/gsp/rm/handles.h18
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/subdev/gsp/rm/nvdec.c33
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/subdev/gsp/rm/nvenc.c33
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/subdev/gsp/rm/r535/Kbuild25
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/subdev/gsp/rm/r535/alloc.c112
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/subdev/gsp/rm/r535/bar.c202
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/subdev/gsp/rm/r535/ce.c46
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/subdev/gsp/rm/r535/client.c45
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/subdev/gsp/rm/r535/ctrl.c93
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/subdev/gsp/rm/r535/device.c148
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/subdev/gsp/rm/r535/disp.c1793
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/subdev/gsp/rm/r535/fbsr.c327
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/subdev/gsp/rm/r535/fifo.c617
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/subdev/gsp/rm/r535/gr.c356
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/subdev/gsp/rm/r535/gsp.c2205
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/subdev/gsp/rm/r535/nvdec.c45
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/subdev/gsp/rm/r535/nvenc.c45
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/subdev/gsp/rm/r535/nvjpg.c45
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/subdev/gsp/rm/r535/nvrm/alloc.h36
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/subdev/gsp/rm/r535/nvrm/bar.h29
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/subdev/gsp/rm/r535/nvrm/ce.h15
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/subdev/gsp/rm/r535/nvrm/client.h20
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/subdev/gsp/rm/r535/nvrm/ctrl.h21
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/subdev/gsp/rm/r535/nvrm/device.h30
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/subdev/gsp/rm/r535/nvrm/disp.h741
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/subdev/gsp/rm/r535/nvrm/engine.h260
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/subdev/gsp/rm/r535/nvrm/event.h47
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/subdev/gsp/rm/r535/nvrm/fbsr.h106
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/subdev/gsp/rm/r535/nvrm/fifo.h350
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/subdev/gsp/rm/r535/nvrm/gr.h73
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/subdev/gsp/rm/r535/nvrm/gsp.h825
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/subdev/gsp/rm/r535/nvrm/msgfn.h53
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/subdev/gsp/rm/r535/nvrm/nvdec.h17
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/subdev/gsp/rm/r535/nvrm/nvenc.h17
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/subdev/gsp/rm/r535/nvrm/nvjpg.h17
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/subdev/gsp/rm/r535/nvrm/ofa.h16
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/subdev/gsp/rm/r535/nvrm/rpcfn.h225
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/subdev/gsp/rm/r535/nvrm/vmm.h132
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/subdev/gsp/rm/r535/ofa.c44
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/subdev/gsp/rm/r535/rm.c52
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/subdev/gsp/rm/r535/rpc.c698
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/subdev/gsp/rm/r535/vmm.c191
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/subdev/gsp/rm/r570/Kbuild9
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/subdev/gsp/rm/r570/client.c28
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/subdev/gsp/rm/r570/disp.c263
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/subdev/gsp/rm/r570/fbsr.c149
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/subdev/gsp/rm/r570/fifo.c217
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/subdev/gsp/rm/r570/gr.c191
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/subdev/gsp/rm/r570/gsp.c216
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/subdev/gsp/rm/r570/nvrm/client.h21
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/subdev/gsp/rm/r570/nvrm/disp.h355
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/subdev/gsp/rm/r570/nvrm/engine.h318
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/subdev/gsp/rm/r570/nvrm/fbsr.h19
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/subdev/gsp/rm/r570/nvrm/fifo.h213
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/subdev/gsp/rm/r570/nvrm/gr.h79
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/subdev/gsp/rm/r570/nvrm/gsp.h634
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/subdev/gsp/rm/r570/nvrm/msgfn.h57
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/subdev/gsp/rm/r570/nvrm/ofa.h17
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/subdev/gsp/rm/r570/nvrm/rpcfn.h249
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/subdev/gsp/rm/r570/ofa.c28
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/subdev/gsp/rm/r570/rm.c99
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/subdev/gsp/rm/rm.h191
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/subdev/gsp/rm/rpc.h18
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/subdev/gsp/rm/tu1xx.c38
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/subdev/gsp/tu102.c445
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/subdev/gsp/tu116.c61
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/subdev/i2c/Kbuild2
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/subdev/i2c/anx9805.c2
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/subdev/i2c/aux.c212
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/subdev/i2c/auxch.c215
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/subdev/i2c/auxch.h (renamed from drivers/gpu/drm/nouveau/nvkm/subdev/i2c/aux.h)0
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/subdev/i2c/auxg94.c2
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/subdev/i2c/auxgf119.c2
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/subdev/i2c/auxgm200.c2
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/subdev/i2c/base.c38
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/subdev/i2c/gm200.c5
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/subdev/i2c/padg94.c2
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/subdev/i2c/padgf119.c2
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/subdev/i2c/padgm200.c2
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/subdev/instmem/Kbuild1
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/subdev/instmem/base.c55
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/subdev/instmem/gh100.c28
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/subdev/instmem/gk20a.c23
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/subdev/instmem/nv04.c45
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/subdev/instmem/nv40.c10
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/subdev/instmem/nv50.c78
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/subdev/instmem/priv.h21
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/subdev/ltc/Kbuild1
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/subdev/ltc/base.c10
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/subdev/ltc/ga102.c62
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/subdev/ltc/gf100.c3
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/subdev/ltc/gk104.c3
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/subdev/ltc/gm107.c3
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/subdev/ltc/gm200.c3
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/subdev/ltc/gp100.c3
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/subdev/ltc/gp102.c8
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/subdev/ltc/gp10b.c12
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/subdev/ltc/priv.h3
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/subdev/mc/Kbuild1
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/subdev/mc/base.c130
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/subdev/mc/g84.c35
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/subdev/mc/g98.c35
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/subdev/mc/ga100.c55
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/subdev/mc/gf100.c82
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/subdev/mc/gk104.c38
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/subdev/mc/gk20a.c9
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/subdev/mc/gp100.c131
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/subdev/mc/gp10b.c11
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/subdev/mc/gt215.c63
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/subdev/mc/nv04.c93
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/subdev/mc/nv11.c21
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/subdev/mc/nv17.c23
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/subdev/mc/nv44.c7
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/subdev/mc/nv50.c29
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/subdev/mc/priv.h50
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/subdev/mc/tu102.c136
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/subdev/mmu/Kbuild2
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/subdev/mmu/base.c4
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/subdev/mmu/gh100.c25
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/subdev/mmu/memgf100.c2
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/subdev/mmu/memnv04.c2
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/subdev/mmu/memnv50.c2
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/subdev/mmu/priv.h8
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/subdev/mmu/tu102.c6
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/subdev/mmu/umem.c6
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/subdev/mmu/umem.h1
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/subdev/mmu/ummu.c2
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/subdev/mmu/uvmm.c247
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmm.c212
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmm.h33
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmmgf100.c16
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmmgh100.c306
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmmgp100.c100
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmmgp10b.c4
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmmnv50.c27
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmmtu102.c12
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/subdev/pci/Kbuild1
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/subdev/pci/base.c54
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/subdev/pci/g84.c5
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/subdev/pci/g92.c5
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/subdev/pci/g94.c5
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/subdev/pci/gf100.c5
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/subdev/pci/gf106.c5
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/subdev/pci/gh100.c30
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/subdev/pci/gk104.c5
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/subdev/pci/gp100.c4
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/subdev/pci/nv04.c25
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/subdev/pci/nv40.c25
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/subdev/pci/nv46.c4
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/subdev/pci/nv4c.c4
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/subdev/pci/pcie.c7
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/subdev/pci/priv.h11
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/subdev/pmu/base.c53
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/subdev/pmu/gk20a.c1
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/subdev/pmu/gm200.c39
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/subdev/pmu/gm20b.c59
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/subdev/pmu/gp102.c35
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/subdev/pmu/gp10b.c16
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/subdev/pmu/gt215.c33
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/subdev/pmu/priv.h8
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/subdev/privring/gm200.c5
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/subdev/therm/fanpwm.c2
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/subdev/therm/fantog.c2
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/subdev/therm/gf119.c1
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/subdev/therm/gp100.c5
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/subdev/top/base.c13
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/subdev/top/ga100.c18
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/subdev/top/gk104.c11
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/subdev/top/priv.h2
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/subdev/vfn/Kbuild8
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/subdev/vfn/base.c60
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/subdev/vfn/ga100.c52
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/subdev/vfn/gv100.c36
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/subdev/vfn/priv.h30
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/subdev/vfn/r535.c57
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/subdev/vfn/tu102.c113
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/subdev/vfn/uvfn.c67
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/subdev/volt/base.c2
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/subdev/volt/gk20a.c4
-rw-r--r--drivers/gpu/drm/nova/Kconfig16
-rw-r--r--drivers/gpu/drm/nova/Makefile3
-rw-r--r--drivers/gpu/drm/nova/driver.rs71
-rw-r--r--drivers/gpu/drm/nova/file.rs69
-rw-r--r--drivers/gpu/drm/nova/gem.rs47
-rw-r--r--drivers/gpu/drm/nova/nova.rs17
-rw-r--r--drivers/gpu/drm/omapdrm/Kconfig10
-rw-r--r--drivers/gpu/drm/omapdrm/Makefile1
-rw-r--r--drivers/gpu/drm/omapdrm/dss/base.c28
-rw-r--r--drivers/gpu/drm/omapdrm/dss/dispc.c213
-rw-r--r--drivers/gpu/drm/omapdrm/dss/dpi.c13
-rw-r--r--drivers/gpu/drm/omapdrm/dss/dsi.c70
-rw-r--r--drivers/gpu/drm/omapdrm/dss/dss.c45
-rw-r--r--drivers/gpu/drm/omapdrm/dss/dss.h18
-rw-r--r--drivers/gpu/drm/omapdrm/dss/hdmi4.c60
-rw-r--r--drivers/gpu/drm/omapdrm/dss/hdmi4_cec.c14
-rw-r--r--drivers/gpu/drm/omapdrm/dss/hdmi4_cec.h14
-rw-r--r--drivers/gpu/drm/omapdrm/dss/hdmi4_core.c4
-rw-r--r--drivers/gpu/drm/omapdrm/dss/hdmi5.c50
-rw-r--r--drivers/gpu/drm/omapdrm/dss/hdmi5_core.c22
-rw-r--r--drivers/gpu/drm/omapdrm/dss/hdmi5_core.h1
-rw-r--r--drivers/gpu/drm/omapdrm/dss/hdmi_phy.c5
-rw-r--r--drivers/gpu/drm/omapdrm/dss/hdmi_pll.c4
-rw-r--r--drivers/gpu/drm/omapdrm/dss/omapdss.h12
-rw-r--r--drivers/gpu/drm/omapdrm/dss/sdi.c31
-rw-r--r--drivers/gpu/drm/omapdrm/dss/venc.c40
-rw-r--r--drivers/gpu/drm/omapdrm/dss/video-pll.c8
-rw-r--r--drivers/gpu/drm/omapdrm/omap_crtc.c2
-rw-r--r--drivers/gpu/drm/omapdrm/omap_debugfs.c8
-rw-r--r--drivers/gpu/drm/omapdrm/omap_dmm_priv.h10
-rw-r--r--drivers/gpu/drm/omapdrm/omap_dmm_tiler.c31
-rw-r--r--drivers/gpu/drm/omapdrm/omap_dmm_tiler.h10
-rw-r--r--drivers/gpu/drm/omapdrm/omap_drv.c304
-rw-r--r--drivers/gpu/drm/omapdrm/omap_drv.h30
-rw-r--r--drivers/gpu/drm/omapdrm/omap_encoder.c4
-rw-r--r--drivers/gpu/drm/omapdrm/omap_fb.c64
-rw-r--r--drivers/gpu/drm/omapdrm/omap_fb.h9
-rw-r--r--drivers/gpu/drm/omapdrm/omap_fbdev.c239
-rw-r--r--drivers/gpu/drm/omapdrm/omap_fbdev.h15
-rw-r--r--drivers/gpu/drm/omapdrm/omap_gem.c320
-rw-r--r--drivers/gpu/drm/omapdrm/omap_gem.h6
-rw-r--r--drivers/gpu/drm/omapdrm/omap_gem_dmabuf.c51
-rw-r--r--drivers/gpu/drm/omapdrm/omap_irq.c18
-rw-r--r--drivers/gpu/drm/omapdrm/omap_overlay.c212
-rw-r--r--drivers/gpu/drm/omapdrm/omap_overlay.h35
-rw-r--r--drivers/gpu/drm/omapdrm/omap_plane.c414
-rw-r--r--drivers/gpu/drm/omapdrm/omap_plane.h1
-rw-r--r--drivers/gpu/drm/omapdrm/tcm-sita.c10
-rw-r--r--drivers/gpu/drm/panel/Kconfig756
-rw-r--r--drivers/gpu/drm/panel/Makefile63
-rw-r--r--drivers/gpu/drm/panel/panel-abt-y030xx067a.c66
-rw-r--r--drivers/gpu/drm/panel/panel-arm-versatile.c13
-rw-r--r--drivers/gpu/drm/panel/panel-asus-z00t-tm5p5-n35596.c189
-rw-r--r--drivers/gpu/drm/panel/panel-auo-a030jtn01.c307
-rw-r--r--drivers/gpu/drm/panel/panel-boe-bf060y8m-aj0.c401
-rw-r--r--drivers/gpu/drm/panel/panel-boe-himax8279d.c74
-rw-r--r--drivers/gpu/drm/panel/panel-boe-td4320.c247
-rw-r--r--drivers/gpu/drm/panel/panel-boe-th101mb31ig002-28a.c438
-rw-r--r--drivers/gpu/drm/panel/panel-boe-tv101wum-ll2.c241
-rw-r--r--drivers/gpu/drm/panel/panel-boe-tv101wum-nl6.c1868
-rw-r--r--drivers/gpu/drm/panel/panel-dsi-cm.c63
-rw-r--r--drivers/gpu/drm/panel/panel-ebbg-ft8719.c247
-rw-r--r--drivers/gpu/drm/panel/panel-edp.c2217
-rw-r--r--drivers/gpu/drm/panel/panel-elida-kd35t133.c177
-rw-r--r--drivers/gpu/drm/panel/panel-feixin-k101-im2ba02.c29
-rw-r--r--drivers/gpu/drm/panel/panel-feiyang-fy07024di26a30d.c47
-rw-r--r--drivers/gpu/drm/panel/panel-himax-hx8279.c1296
-rw-r--r--drivers/gpu/drm/panel/panel-himax-hx83102.c1088
-rw-r--r--drivers/gpu/drm/panel/panel-himax-hx83112a.c347
-rw-r--r--drivers/gpu/drm/panel/panel-himax-hx83112b.c430
-rw-r--r--drivers/gpu/drm/panel/panel-himax-hx8394.c845
-rw-r--r--drivers/gpu/drm/panel/panel-hydis-hv101hd1.c188
-rw-r--r--drivers/gpu/drm/panel/panel-ilitek-ili9322.c27
-rw-r--r--drivers/gpu/drm/panel/panel-ilitek-ili9341.c588
-rw-r--r--drivers/gpu/drm/panel/panel-ilitek-ili9805.c405
-rw-r--r--drivers/gpu/drm/panel/panel-ilitek-ili9806e.c565
-rw-r--r--drivers/gpu/drm/panel/panel-ilitek-ili9881c.c2259
-rw-r--r--drivers/gpu/drm/panel/panel-ilitek-ili9882t.c768
-rw-r--r--drivers/gpu/drm/panel/panel-innolux-ej030na.c309
-rw-r--r--drivers/gpu/drm/panel/panel-innolux-p079zca.c358
-rw-r--r--drivers/gpu/drm/panel/panel-jadard-jd9365da-h3.c1216
-rw-r--r--drivers/gpu/drm/panel/panel-jdi-fhd-r63452.c244
-rw-r--r--drivers/gpu/drm/panel/panel-jdi-lpm102a188a.c475
-rw-r--r--drivers/gpu/drm/panel/panel-jdi-lt070me05000.c98
-rw-r--r--drivers/gpu/drm/panel/panel-khadas-ts050.c1184
-rw-r--r--drivers/gpu/drm/panel/panel-kingdisplay-kd097d04.c74
-rw-r--r--drivers/gpu/drm/panel/panel-leadtek-ltk050h3146w.c400
-rw-r--r--drivers/gpu/drm/panel/panel-leadtek-ltk500hd1829.c310
-rw-r--r--drivers/gpu/drm/panel/panel-lg-lb035q02.c14
-rw-r--r--drivers/gpu/drm/panel/panel-lg-ld070wx3.c184
-rw-r--r--drivers/gpu/drm/panel/panel-lg-lg4573.c15
-rw-r--r--drivers/gpu/drm/panel/panel-lg-sw43408.c320
-rw-r--r--drivers/gpu/drm/panel/panel-lincolntech-lcd197.c261
-rw-r--r--drivers/gpu/drm/panel/panel-lvds.c90
-rw-r--r--drivers/gpu/drm/panel/panel-magnachip-d53e6ea8966.c520
-rw-r--r--drivers/gpu/drm/panel/panel-mantix-mlaf057we51.c113
-rw-r--r--drivers/gpu/drm/panel/panel-nec-nl8048hl11.c14
-rw-r--r--drivers/gpu/drm/panel/panel-newvision-nv3051d.c541
-rw-r--r--drivers/gpu/drm/panel/panel-newvision-nv3052c.c686
-rw-r--r--drivers/gpu/drm/panel/panel-novatek-nt35510.c465
-rw-r--r--drivers/gpu/drm/panel/panel-novatek-nt35560.c480
-rw-r--r--drivers/gpu/drm/panel/panel-novatek-nt35950.c608
-rw-r--r--drivers/gpu/drm/panel/panel-novatek-nt36523.c1272
-rw-r--r--drivers/gpu/drm/panel/panel-novatek-nt36672a.c65
-rw-r--r--drivers/gpu/drm/panel/panel-novatek-nt36672e.c608
-rw-r--r--drivers/gpu/drm/panel/panel-novatek-nt37801.c340
-rw-r--r--drivers/gpu/drm/panel/panel-novatek-nt39016.c36
-rw-r--r--drivers/gpu/drm/panel/panel-olimex-lcd-olinuxino.c62
-rw-r--r--drivers/gpu/drm/panel/panel-orisetech-ota5601a.c360
-rw-r--r--drivers/gpu/drm/panel/panel-orisetech-otm8009a.c124
-rw-r--r--drivers/gpu/drm/panel/panel-osd-osd101t2587-53ts.c59
-rw-r--r--drivers/gpu/drm/panel/panel-panasonic-vvx10f034n00.c71
-rw-r--r--drivers/gpu/drm/panel/panel-raspberrypi-touchscreen.c40
-rw-r--r--drivers/gpu/drm/panel/panel-raydium-rm67191.c41
-rw-r--r--drivers/gpu/drm/panel/panel-raydium-rm67200.c493
-rw-r--r--drivers/gpu/drm/panel/panel-raydium-rm68200.c53
-rw-r--r--drivers/gpu/drm/panel/panel-raydium-rm692e5.c373
-rw-r--r--drivers/gpu/drm/panel/panel-raydium-rm69380.c309
-rw-r--r--drivers/gpu/drm/panel/panel-renesas-r61307.c325
-rw-r--r--drivers/gpu/drm/panel/panel-renesas-r69328.c281
-rw-r--r--drivers/gpu/drm/panel/panel-ronbo-rb070d30.c33
-rw-r--r--drivers/gpu/drm/panel/panel-samsung-ams581vf01.c283
-rw-r--r--drivers/gpu/drm/panel/panel-samsung-ams639rq08.c329
-rw-r--r--drivers/gpu/drm/panel/panel-samsung-atna33xc20.c370
-rw-r--r--drivers/gpu/drm/panel/panel-samsung-db7430.c348
-rw-r--r--drivers/gpu/drm/panel/panel-samsung-ld9040.c55
-rw-r--r--drivers/gpu/drm/panel/panel-samsung-s6d16d0.c21
-rw-r--r--drivers/gpu/drm/panel/panel-samsung-s6d27a1.c318
-rw-r--r--drivers/gpu/drm/panel/panel-samsung-s6d7aa0.c492
-rw-r--r--drivers/gpu/drm/panel/panel-samsung-s6e3fa7.c256
-rw-r--r--drivers/gpu/drm/panel/panel-samsung-s6e3fc2x01.c385
-rw-r--r--drivers/gpu/drm/panel/panel-samsung-s6e3ha2.c31
-rw-r--r--drivers/gpu/drm/panel/panel-samsung-s6e3ha8.c342
-rw-r--r--drivers/gpu/drm/panel/panel-samsung-s6e63j0x03.c50
-rw-r--r--drivers/gpu/drm/panel/panel-samsung-s6e63m0-dsi.c23
-rw-r--r--drivers/gpu/drm/panel/panel-samsung-s6e63m0-spi.c87
-rw-r--r--drivers/gpu/drm/panel/panel-samsung-s6e63m0.c72
-rw-r--r--drivers/gpu/drm/panel/panel-samsung-s6e63m0.h35
-rw-r--r--drivers/gpu/drm/panel/panel-samsung-s6e88a0-ams427ap24.c768
-rw-r--r--drivers/gpu/drm/panel/panel-samsung-s6e88a0-ams452ef01.c125
-rw-r--r--drivers/gpu/drm/panel/panel-samsung-s6e8aa0.c19
-rw-r--r--drivers/gpu/drm/panel/panel-samsung-s6e8aa5x01-ams561ra01.c981
-rw-r--r--drivers/gpu/drm/panel/panel-samsung-sofef00.c240
-rw-r--r--drivers/gpu/drm/panel/panel-seiko-43wvf1g.c73
-rw-r--r--drivers/gpu/drm/panel/panel-sharp-lq079l1sx01.c225
-rw-r--r--drivers/gpu/drm/panel/panel-sharp-lq101r1sx01.c80
-rw-r--r--drivers/gpu/drm/panel/panel-sharp-ls037v7dw01.c35
-rw-r--r--drivers/gpu/drm/panel/panel-sharp-ls043t1le01.c122
-rw-r--r--drivers/gpu/drm/panel/panel-sharp-ls060t1sx01.c278
-rw-r--r--drivers/gpu/drm/panel/panel-simple.c2987
-rw-r--r--drivers/gpu/drm/panel/panel-sitronix-st7701.c1372
-rw-r--r--drivers/gpu/drm/panel/panel-sitronix-st7703.c861
-rw-r--r--drivers/gpu/drm/panel/panel-sitronix-st7789v.c374
-rw-r--r--drivers/gpu/drm/panel/panel-sony-acx424akp.c521
-rw-r--r--drivers/gpu/drm/panel/panel-sony-acx565akm.c37
-rw-r--r--drivers/gpu/drm/panel/panel-sony-td4353-jdi.c257
-rw-r--r--drivers/gpu/drm/panel/panel-sony-tulip-truly-nt35521.c517
-rw-r--r--drivers/gpu/drm/panel/panel-startek-kd070fhfid015.c350
-rw-r--r--drivers/gpu/drm/panel/panel-summit.c134
-rw-r--r--drivers/gpu/drm/panel/panel-synaptics-r63353.c331
-rw-r--r--drivers/gpu/drm/panel/panel-synaptics-tddi.c277
-rw-r--r--drivers/gpu/drm/panel/panel-tdo-tl070wsh30.c27
-rw-r--r--drivers/gpu/drm/panel/panel-tpo-td028ttec1.c21
-rw-r--r--drivers/gpu/drm/panel/panel-tpo-td043mtea1.c32
-rw-r--r--drivers/gpu/drm/panel/panel-tpo-tpg110.c23
-rw-r--r--drivers/gpu/drm/panel/panel-truly-nt35597.c34
-rw-r--r--drivers/gpu/drm/panel/panel-visionox-g2647fb105.c280
-rw-r--r--drivers/gpu/drm/panel/panel-visionox-r66451.c349
-rw-r--r--drivers/gpu/drm/panel/panel-visionox-rm69299.c360
-rw-r--r--drivers/gpu/drm/panel/panel-visionox-rm692e5.c442
-rw-r--r--drivers/gpu/drm/panel/panel-visionox-vtdr6130.c325
-rw-r--r--drivers/gpu/drm/panel/panel-widechips-ws2401.c446
-rw-r--r--drivers/gpu/drm/panel/panel-xinpeng-xpp055c272.c255
-rw-r--r--drivers/gpu/drm/panfrost/Kconfig4
-rw-r--r--drivers/gpu/drm/panfrost/Makefile3
-rw-r--r--drivers/gpu/drm/panfrost/panfrost_devfreq.c129
-rw-r--r--drivers/gpu/drm/panfrost/panfrost_devfreq.h7
-rw-r--r--drivers/gpu/drm/panfrost/panfrost_device.c381
-rw-r--r--drivers/gpu/drm/panfrost/panfrost_device.h202
-rw-r--r--drivers/gpu/drm/panfrost/panfrost_drv.c672
-rw-r--r--drivers/gpu/drm/panfrost/panfrost_dump.c241
-rw-r--r--drivers/gpu/drm/panfrost/panfrost_dump.h12
-rw-r--r--drivers/gpu/drm/panfrost/panfrost_features.h229
-rw-r--r--drivers/gpu/drm/panfrost/panfrost_gem.c282
-rw-r--r--drivers/gpu/drm/panfrost/panfrost_gem.h78
-rw-r--r--drivers/gpu/drm/panfrost/panfrost_gem_shrinker.c41
-rw-r--r--drivers/gpu/drm/panfrost/panfrost_gpu.c253
-rw-r--r--drivers/gpu/drm/panfrost/panfrost_gpu.h6
-rw-r--r--drivers/gpu/drm/panfrost/panfrost_issues.h21
-rw-r--r--drivers/gpu/drm/panfrost/panfrost_job.c1020
-rw-r--r--drivers/gpu/drm/panfrost/panfrost_job.h51
-rw-r--r--drivers/gpu/drm/panfrost/panfrost_mmu.c638
-rw-r--r--drivers/gpu/drm/panfrost/panfrost_mmu.h9
-rw-r--r--drivers/gpu/drm/panfrost/panfrost_perfcnt.c53
-rw-r--r--drivers/gpu/drm/panfrost/panfrost_regs.h96
-rw-r--r--drivers/gpu/drm/panthor/Kconfig23
-rw-r--r--drivers/gpu/drm/panthor/Makefile16
-rw-r--r--drivers/gpu/drm/panthor/panthor_devfreq.c334
-rw-r--r--drivers/gpu/drm/panthor/panthor_devfreq.h23
-rw-r--r--drivers/gpu/drm/panthor/panthor_device.c612
-rw-r--r--drivers/gpu/drm/panthor/panthor_device.h548
-rw-r--r--drivers/gpu/drm/panthor/panthor_drv.c1765
-rw-r--r--drivers/gpu/drm/panthor/panthor_fw.c1506
-rw-r--r--drivers/gpu/drm/panthor/panthor_fw.h531
-rw-r--r--drivers/gpu/drm/panthor/panthor_gem.c455
-rw-r--r--drivers/gpu/drm/panthor/panthor_gem.h202
-rw-r--r--drivers/gpu/drm/panthor/panthor_gpu.c396
-rw-r--r--drivers/gpu/drm/panthor/panthor_gpu.h55
-rw-r--r--drivers/gpu/drm/panthor/panthor_heap.c632
-rw-r--r--drivers/gpu/drm/panthor/panthor_heap.h41
-rw-r--r--drivers/gpu/drm/panthor/panthor_hw.c224
-rw-r--r--drivers/gpu/drm/panthor/panthor_hw.h56
-rw-r--r--drivers/gpu/drm/panthor/panthor_mmu.c2833
-rw-r--r--drivers/gpu/drm/panthor/panthor_mmu.h106
-rw-r--r--drivers/gpu/drm/panthor/panthor_pwr.c549
-rw-r--r--drivers/gpu/drm/panthor/panthor_pwr.h23
-rw-r--r--drivers/gpu/drm/panthor/panthor_regs.h291
-rw-r--r--drivers/gpu/drm/panthor/panthor_sched.c4149
-rw-r--r--drivers/gpu/drm/panthor/panthor_sched.h57
-rw-r--r--drivers/gpu/drm/pl111/Kconfig6
-rw-r--r--drivers/gpu/drm/pl111/pl111_display.c32
-rw-r--r--drivers/gpu/drm/pl111/pl111_drm.h4
-rw-r--r--drivers/gpu/drm/pl111/pl111_drv.c34
-rw-r--r--drivers/gpu/drm/pl111/pl111_versatile.c16
-rw-r--r--drivers/gpu/drm/qxl/Kconfig4
-rw-r--r--drivers/gpu/drm/qxl/qxl_cmd.c30
-rw-r--r--drivers/gpu/drm/qxl/qxl_debugfs.c19
-rw-r--r--drivers/gpu/drm/qxl/qxl_display.c433
-rw-r--r--drivers/gpu/drm/qxl/qxl_draw.c15
-rw-r--r--drivers/gpu/drm/qxl/qxl_drv.c75
-rw-r--r--drivers/gpu/drm/qxl/qxl_drv.h64
-rw-r--r--drivers/gpu/drm/qxl/qxl_dumb.c26
-rw-r--r--drivers/gpu/drm/qxl/qxl_gem.c30
-rw-r--r--drivers/gpu/drm/qxl/qxl_image.c4
-rw-r--r--drivers/gpu/drm/qxl/qxl_ioctl.c57
-rw-r--r--drivers/gpu/drm/qxl/qxl_irq.c11
-rw-r--r--drivers/gpu/drm/qxl/qxl_kms.c41
-rw-r--r--drivers/gpu/drm/qxl/qxl_object.c95
-rw-r--r--drivers/gpu/drm/qxl/qxl_object.h14
-rw-r--r--drivers/gpu/drm/qxl/qxl_prime.c18
-rw-r--r--drivers/gpu/drm/qxl/qxl_release.c164
-rw-r--r--drivers/gpu/drm/qxl/qxl_ttm.c106
-rw-r--r--drivers/gpu/drm/r128/Makefile10
-rw-r--r--drivers/gpu/drm/r128/ati_pcigart.c210
-rw-r--r--drivers/gpu/drm/r128/ati_pcigart.h31
-rw-r--r--drivers/gpu/drm/r128/r128_cce.c945
-rw-r--r--drivers/gpu/drm/r128/r128_drv.c114
-rw-r--r--drivers/gpu/drm/r128/r128_drv.h544
-rw-r--r--drivers/gpu/drm/r128/r128_ioc32.c199
-rw-r--r--drivers/gpu/drm/r128/r128_irq.c118
-rw-r--r--drivers/gpu/drm/r128/r128_state.c1640
-rw-r--r--drivers/gpu/drm/radeon/.gitignore2
-rw-r--r--drivers/gpu/drm/radeon/Kconfig38
-rw-r--r--drivers/gpu/drm/radeon/Makefile15
-rw-r--r--drivers/gpu/drm/radeon/atom-bits.h2
-rw-r--r--drivers/gpu/drm/radeon/atom.c70
-rw-r--r--drivers/gpu/drm/radeon/atom.h4
-rw-r--r--drivers/gpu/drm/radeon/atombios.h75
-rw-r--r--drivers/gpu/drm/radeon/atombios_crtc.c53
-rw-r--r--drivers/gpu/drm/radeon/atombios_dp.c30
-rw-r--r--drivers/gpu/drm/radeon/atombios_encoders.c144
-rw-r--r--drivers/gpu/drm/radeon/atombios_i2c.c2
-rw-r--r--drivers/gpu/drm/radeon/btc_dpm.c90
-rw-r--r--drivers/gpu/drm/radeon/cayman_blit_shaders.c320
-rw-r--r--drivers/gpu/drm/radeon/cayman_blit_shaders.h294
-rw-r--r--drivers/gpu/drm/radeon/ci_dpm.c116
-rw-r--r--drivers/gpu/drm/radeon/ci_dpm.h6
-rw-r--r--drivers/gpu/drm/radeon/cik.c146
-rw-r--r--drivers/gpu/drm/radeon/cik_blit_shaders.c246
-rw-r--r--drivers/gpu/drm/radeon/cik_blit_shaders.h219
-rw-r--r--drivers/gpu/drm/radeon/clearstate_cayman.h9
-rw-r--r--drivers/gpu/drm/radeon/clearstate_ci.h3
-rw-r--r--drivers/gpu/drm/radeon/clearstate_evergreen.h8
-rw-r--r--drivers/gpu/drm/radeon/clearstate_si.h3
-rw-r--r--drivers/gpu/drm/radeon/cypress_dpm.c8
-rw-r--r--drivers/gpu/drm/radeon/dce3_1_afmt.c1
-rw-r--r--drivers/gpu/drm/radeon/dce6_afmt.c3
-rw-r--r--drivers/gpu/drm/radeon/evergreen.c55
-rw-r--r--drivers/gpu/drm/radeon/evergreen.h1
-rw-r--r--drivers/gpu/drm/radeon/evergreen_blit_shaders.c303
-rw-r--r--drivers/gpu/drm/radeon/evergreen_blit_shaders.h278
-rw-r--r--drivers/gpu/drm/radeon/evergreen_cs.c657
-rw-r--r--drivers/gpu/drm/radeon/evergreen_hdmi.c6
-rw-r--r--drivers/gpu/drm/radeon/evergreen_reg.h10
-rw-r--r--drivers/gpu/drm/radeon/evergreen_smc.h9
-rw-r--r--drivers/gpu/drm/radeon/kv_dpm.c11
-rw-r--r--drivers/gpu/drm/radeon/kv_smc.c2
-rw-r--r--drivers/gpu/drm/radeon/ni.c41
-rw-r--r--drivers/gpu/drm/radeon/ni_dpm.c163
-rw-r--r--drivers/gpu/drm/radeon/ni_dpm.h12
-rw-r--r--drivers/gpu/drm/radeon/nislands_smc.h95
-rw-r--r--drivers/gpu/drm/radeon/pptable.h12
-rw-r--r--drivers/gpu/drm/radeon/r100.c410
-rw-r--r--drivers/gpu/drm/radeon/r200.c34
-rw-r--r--drivers/gpu/drm/radeon/r300.c118
-rw-r--r--drivers/gpu/drm/radeon/r300_reg.h6
-rw-r--r--drivers/gpu/drm/radeon/r420.c38
-rw-r--r--drivers/gpu/drm/radeon/r520.c6
-rw-r--r--drivers/gpu/drm/radeon/r600.c53
-rw-r--r--drivers/gpu/drm/radeon/r600_blit_shaders.c719
-rw-r--r--drivers/gpu/drm/radeon/r600_blit_shaders.h38
-rw-r--r--drivers/gpu/drm/radeon/r600_cs.c465
-rw-r--r--drivers/gpu/drm/radeon/r600_dpm.c20
-rw-r--r--drivers/gpu/drm/radeon/r600_dpm.h3
-rw-r--r--drivers/gpu/drm/radeon/r600_hdmi.c24
-rw-r--r--drivers/gpu/drm/radeon/radeon.h180
-rw-r--r--drivers/gpu/drm/radeon/radeon_acpi.c15
-rw-r--r--drivers/gpu/drm/radeon/radeon_acpi.h9
-rw-r--r--drivers/gpu/drm/radeon/radeon_agp.c118
-rw-r--r--drivers/gpu/drm/radeon/radeon_asic.c9
-rw-r--r--drivers/gpu/drm/radeon/radeon_asic.h12
-rw-r--r--drivers/gpu/drm/radeon/radeon_atombios.c130
-rw-r--r--drivers/gpu/drm/radeon/radeon_atpx_handler.c26
-rw-r--r--drivers/gpu/drm/radeon/radeon_audio.c151
-rw-r--r--drivers/gpu/drm/radeon/radeon_audio.h10
-rw-r--r--drivers/gpu/drm/radeon/radeon_bios.c39
-rw-r--r--drivers/gpu/drm/radeon/radeon_combios.c49
-rw-r--r--drivers/gpu/drm/radeon/radeon_connectors.c169
-rw-r--r--drivers/gpu/drm/radeon/radeon_cs.c84
-rw-r--r--drivers/gpu/drm/radeon/radeon_device.c156
-rw-r--r--drivers/gpu/drm/radeon/radeon_display.c121
-rw-r--r--drivers/gpu/drm/radeon/radeon_dp_auxch.c5
-rw-r--r--drivers/gpu/drm/radeon/radeon_dp_mst.c768
-rw-r--r--drivers/gpu/drm/radeon/radeon_drv.c257
-rw-r--r--drivers/gpu/drm/radeon/radeon_drv.h16
-rw-r--r--drivers/gpu/drm/radeon/radeon_encoders.c55
-rw-r--r--drivers/gpu/drm/radeon/radeon_fb.c401
-rw-r--r--drivers/gpu/drm/radeon/radeon_fbdev.c293
-rw-r--r--drivers/gpu/drm/radeon/radeon_fence.c232
-rw-r--r--drivers/gpu/drm/radeon/radeon_gart.c48
-rw-r--r--drivers/gpu/drm/radeon/radeon_gem.c182
-rw-r--r--drivers/gpu/drm/radeon/radeon_i2c.c3
-rw-r--r--drivers/gpu/drm/radeon/radeon_ib.c40
-rw-r--r--drivers/gpu/drm/radeon/radeon_irq_kms.c73
-rw-r--r--drivers/gpu/drm/radeon/radeon_kms.c109
-rw-r--r--drivers/gpu/drm/radeon/radeon_kms.h4
-rw-r--r--drivers/gpu/drm/radeon/radeon_legacy_crtc.c6
-rw-r--r--drivers/gpu/drm/radeon/radeon_legacy_encoders.c50
-rw-r--r--drivers/gpu/drm/radeon/radeon_legacy_tv.c7
-rw-r--r--drivers/gpu/drm/radeon/radeon_mn.c4
-rw-r--r--drivers/gpu/drm/radeon/radeon_mode.h95
-rw-r--r--drivers/gpu/drm/radeon/radeon_object.c129
-rw-r--r--drivers/gpu/drm/radeon/radeon_object.h37
-rw-r--r--drivers/gpu/drm/radeon/radeon_pm.c112
-rw-r--r--drivers/gpu/drm/radeon/radeon_prime.c15
-rw-r--r--drivers/gpu/drm/radeon/radeon_ring.c82
-rw-r--r--drivers/gpu/drm/radeon/radeon_sa.c316
-rw-r--r--drivers/gpu/drm/radeon/radeon_semaphore.c4
-rw-r--r--drivers/gpu/drm/radeon/radeon_sync.c22
-rw-r--r--drivers/gpu/drm/radeon/radeon_test.c12
-rw-r--r--drivers/gpu/drm/radeon/radeon_trace.h2
-rw-r--r--drivers/gpu/drm/radeon/radeon_ttm.c283
-rw-r--r--drivers/gpu/drm/radeon/radeon_ttm.h1
-rw-r--r--drivers/gpu/drm/radeon/radeon_uvd.c47
-rw-r--r--drivers/gpu/drm/radeon/radeon_vce.c14
-rw-r--r--drivers/gpu/drm/radeon/radeon_vm.c26
-rw-r--r--drivers/gpu/drm/radeon/rs400.c56
-rw-r--r--drivers/gpu/drm/radeon/rs600.c38
-rw-r--r--drivers/gpu/drm/radeon/rs690.c6
-rw-r--r--drivers/gpu/drm/radeon/rv515.c66
-rw-r--r--drivers/gpu/drm/radeon/rv6xx_dpm.h3
-rw-r--r--drivers/gpu/drm/radeon/rv740_dpm.c8
-rw-r--r--drivers/gpu/drm/radeon/rv770.c50
-rw-r--r--drivers/gpu/drm/radeon/rv770_dpm.c4
-rw-r--r--drivers/gpu/drm/radeon/rv770_smc.c36
-rw-r--r--drivers/gpu/drm/radeon/rv770_smc.h27
-rw-r--r--drivers/gpu/drm/radeon/si.c158
-rw-r--r--drivers/gpu/drm/radeon/si_blit_shaders.c253
-rw-r--r--drivers/gpu/drm/radeon/si_blit_shaders.h223
-rw-r--r--drivers/gpu/drm/radeon/si_dpm.c314
-rw-r--r--drivers/gpu/drm/radeon/si_dpm.h21
-rw-r--r--drivers/gpu/drm/radeon/sid.h2
-rw-r--r--drivers/gpu/drm/radeon/sislands_smc.h95
-rw-r--r--drivers/gpu/drm/radeon/smu7.h6
-rw-r--r--drivers/gpu/drm/radeon/smu7_discrete.h51
-rw-r--r--drivers/gpu/drm/radeon/smu7_fusion.h42
-rw-r--r--drivers/gpu/drm/radeon/sumo_dpm.c24
-rw-r--r--drivers/gpu/drm/radeon/trinity_dpm.c26
-rw-r--r--drivers/gpu/drm/radeon/trinity_dpm.h3
-rw-r--r--drivers/gpu/drm/radeon/uvd_v1_0.c2
-rw-r--r--drivers/gpu/drm/rcar-du/Kconfig52
-rw-r--r--drivers/gpu/drm/rcar-du/Makefile28
-rw-r--r--drivers/gpu/drm/rcar-du/rcar_du_drv.c643
-rw-r--r--drivers/gpu/drm/rcar-du/rcar_du_encoder.c130
-rw-r--r--drivers/gpu/drm/rcar-du/rcar_du_of.c323
-rw-r--r--drivers/gpu/drm/rcar-du/rcar_du_of.h20
-rw-r--r--drivers/gpu/drm/rcar-du/rcar_du_of_lvds_r8a7790.dts69
-rw-r--r--drivers/gpu/drm/rcar-du/rcar_du_of_lvds_r8a7791.dts43
-rw-r--r--drivers/gpu/drm/rcar-du/rcar_du_of_lvds_r8a7793.dts43
-rw-r--r--drivers/gpu/drm/rcar-du/rcar_du_of_lvds_r8a7795.dts43
-rw-r--r--drivers/gpu/drm/rcar-du/rcar_du_of_lvds_r8a7796.dts43
-rw-r--r--drivers/gpu/drm/rcar-du/rcar_du_vsp.c429
-rw-r--r--drivers/gpu/drm/rcar-du/rcar_lvds.h32
-rw-r--r--drivers/gpu/drm/renesas/Kconfig5
-rw-r--r--drivers/gpu/drm/renesas/Makefile5
-rw-r--r--drivers/gpu/drm/renesas/rcar-du/Kconfig77
-rw-r--r--drivers/gpu/drm/renesas/rcar-du/Makefile16
-rw-r--r--drivers/gpu/drm/renesas/rcar-du/rcar_cmm.c (renamed from drivers/gpu/drm/rcar-du/rcar_cmm.c)11
-rw-r--r--drivers/gpu/drm/renesas/rcar-du/rcar_cmm.h (renamed from drivers/gpu/drm/rcar-du/rcar_cmm.h)2
-rw-r--r--drivers/gpu/drm/renesas/rcar-du/rcar_du_crtc.c (renamed from drivers/gpu/drm/rcar-du/rcar_du_crtc.c)138
-rw-r--r--drivers/gpu/drm/renesas/rcar-du/rcar_du_crtc.h (renamed from drivers/gpu/drm/rcar-du/rcar_du_crtc.h)13
-rw-r--r--drivers/gpu/drm/renesas/rcar-du/rcar_du_drv.c763
-rw-r--r--drivers/gpu/drm/renesas/rcar-du/rcar_du_drv.h (renamed from drivers/gpu/drm/rcar-du/rcar_du_drv.h)33
-rw-r--r--drivers/gpu/drm/renesas/rcar-du/rcar_du_encoder.c137
-rw-r--r--drivers/gpu/drm/renesas/rcar-du/rcar_du_encoder.h (renamed from drivers/gpu/drm/rcar-du/rcar_du_encoder.h)2
-rw-r--r--drivers/gpu/drm/renesas/rcar-du/rcar_du_group.c (renamed from drivers/gpu/drm/rcar-du/rcar_du_group.c)72
-rw-r--r--drivers/gpu/drm/renesas/rcar-du/rcar_du_group.h (renamed from drivers/gpu/drm/rcar-du/rcar_du_group.h)2
-rw-r--r--drivers/gpu/drm/renesas/rcar-du/rcar_du_kms.c (renamed from drivers/gpu/drm/rcar-du/rcar_du_kms.c)120
-rw-r--r--drivers/gpu/drm/renesas/rcar-du/rcar_du_kms.h (renamed from drivers/gpu/drm/rcar-du/rcar_du_kms.h)9
-rw-r--r--drivers/gpu/drm/renesas/rcar-du/rcar_du_plane.c (renamed from drivers/gpu/drm/rcar-du/rcar_du_plane.c)92
-rw-r--r--drivers/gpu/drm/renesas/rcar-du/rcar_du_plane.h (renamed from drivers/gpu/drm/rcar-du/rcar_du_plane.h)4
-rw-r--r--drivers/gpu/drm/renesas/rcar-du/rcar_du_regs.h (renamed from drivers/gpu/drm/rcar-du/rcar_du_regs.h)44
-rw-r--r--drivers/gpu/drm/renesas/rcar-du/rcar_du_vsp.c537
-rw-r--r--drivers/gpu/drm/renesas/rcar-du/rcar_du_vsp.h (renamed from drivers/gpu/drm/rcar-du/rcar_du_vsp.h)2
-rw-r--r--drivers/gpu/drm/renesas/rcar-du/rcar_du_writeback.c (renamed from drivers/gpu/drm/rcar-du/rcar_du_writeback.c)12
-rw-r--r--drivers/gpu/drm/renesas/rcar-du/rcar_du_writeback.h (renamed from drivers/gpu/drm/rcar-du/rcar_du_writeback.h)2
-rw-r--r--drivers/gpu/drm/renesas/rcar-du/rcar_dw_hdmi.c (renamed from drivers/gpu/drm/rcar-du/rcar_dw_hdmi.c)5
-rw-r--r--drivers/gpu/drm/renesas/rcar-du/rcar_lvds.c (renamed from drivers/gpu/drm/rcar-du/rcar_lvds.c)393
-rw-r--r--drivers/gpu/drm/renesas/rcar-du/rcar_lvds.h41
-rw-r--r--drivers/gpu/drm/renesas/rcar-du/rcar_lvds_regs.h (renamed from drivers/gpu/drm/rcar-du/rcar_lvds_regs.h)2
-rw-r--r--drivers/gpu/drm/renesas/rcar-du/rcar_mipi_dsi.c1350
-rw-r--r--drivers/gpu/drm/renesas/rcar-du/rcar_mipi_dsi.h31
-rw-r--r--drivers/gpu/drm/renesas/rcar-du/rcar_mipi_dsi_regs.h319
-rw-r--r--drivers/gpu/drm/renesas/rz-du/Kconfig28
-rw-r--r--drivers/gpu/drm/renesas/rz-du/Makefile10
-rw-r--r--drivers/gpu/drm/renesas/rz-du/rzg2l_du_crtc.c422
-rw-r--r--drivers/gpu/drm/renesas/rz-du/rzg2l_du_crtc.h89
-rw-r--r--drivers/gpu/drm/renesas/rz-du/rzg2l_du_drv.c199
-rw-r--r--drivers/gpu/drm/renesas/rz-du/rzg2l_du_drv.h78
-rw-r--r--drivers/gpu/drm/renesas/rz-du/rzg2l_du_encoder.c126
-rw-r--r--drivers/gpu/drm/renesas/rz-du/rzg2l_du_encoder.h32
-rw-r--r--drivers/gpu/drm/renesas/rz-du/rzg2l_du_kms.c479
-rw-r--r--drivers/gpu/drm/renesas/rz-du/rzg2l_du_kms.h42
-rw-r--r--drivers/gpu/drm/renesas/rz-du/rzg2l_du_vsp.c355
-rw-r--r--drivers/gpu/drm/renesas/rz-du/rzg2l_du_vsp.h82
-rw-r--r--drivers/gpu/drm/renesas/rz-du/rzg2l_mipi_dsi.c1084
-rw-r--r--drivers/gpu/drm/renesas/rz-du/rzg2l_mipi_dsi_regs.h203
-rw-r--r--drivers/gpu/drm/renesas/shmobile/Kconfig16
-rw-r--r--drivers/gpu/drm/renesas/shmobile/Makefile7
-rw-r--r--drivers/gpu/drm/renesas/shmobile/shmob_drm_crtc.c622
-rw-r--r--drivers/gpu/drm/renesas/shmobile/shmob_drm_crtc.h46
-rw-r--r--drivers/gpu/drm/renesas/shmobile/shmob_drm_drv.c297
-rw-r--r--drivers/gpu/drm/renesas/shmobile/shmob_drm_drv.h52
-rw-r--r--drivers/gpu/drm/renesas/shmobile/shmob_drm_kms.c189
-rw-r--r--drivers/gpu/drm/renesas/shmobile/shmob_drm_kms.h32
-rw-r--r--drivers/gpu/drm/renesas/shmobile/shmob_drm_plane.c328
-rw-r--r--drivers/gpu/drm/renesas/shmobile/shmob_drm_plane.h20
-rw-r--r--drivers/gpu/drm/renesas/shmobile/shmob_drm_regs.h (renamed from drivers/gpu/drm/shmobile/shmob_drm_regs.h)0
-rw-r--r--drivers/gpu/drm/rockchip/Kconfig62
-rw-r--r--drivers/gpu/drm/rockchip/Makefile8
-rw-r--r--drivers/gpu/drm/rockchip/analogix_dp-rockchip.c314
-rw-r--r--drivers/gpu/drm/rockchip/cdn-dp-core.c335
-rw-r--r--drivers/gpu/drm/rockchip/cdn-dp-core.h12
-rw-r--r--drivers/gpu/drm/rockchip/cdn-dp-reg.c6
-rw-r--r--drivers/gpu/drm/rockchip/cdn-dp-reg.h4
-rw-r--r--drivers/gpu/drm/rockchip/dw-mipi-dsi-rockchip.c715
-rw-r--r--drivers/gpu/drm/rockchip/dw-mipi-dsi2-rockchip.c508
-rw-r--r--drivers/gpu/drm/rockchip/dw_dp-rockchip.c150
-rw-r--r--drivers/gpu/drm/rockchip/dw_hdmi-rockchip.c379
-rw-r--r--drivers/gpu/drm/rockchip/dw_hdmi_qp-rockchip.c669
-rw-r--r--drivers/gpu/drm/rockchip/inno_hdmi.c1037
-rw-r--r--drivers/gpu/drm/rockchip/inno_hdmi.h354
-rw-r--r--drivers/gpu/drm/rockchip/rk3066_hdmi.c364
-rw-r--r--drivers/gpu/drm/rockchip/rk3066_hdmi.h2
-rw-r--r--drivers/gpu/drm/rockchip/rockchip_drm_drv.c235
-rw-r--r--drivers/gpu/drm/rockchip/rockchip_drm_drv.h55
-rw-r--r--drivers/gpu/drm/rockchip/rockchip_drm_fb.c70
-rw-r--r--drivers/gpu/drm/rockchip/rockchip_drm_fb.h8
-rw-r--r--drivers/gpu/drm/rockchip/rockchip_drm_fbdev.c163
-rw-r--r--drivers/gpu/drm/rockchip/rockchip_drm_fbdev.h24
-rw-r--r--drivers/gpu/drm/rockchip/rockchip_drm_gem.c97
-rw-r--r--drivers/gpu/drm/rockchip/rockchip_drm_gem.h14
-rw-r--r--drivers/gpu/drm/rockchip/rockchip_drm_vop.c389
-rw-r--r--drivers/gpu/drm/rockchip/rockchip_drm_vop.h39
-rw-r--r--drivers/gpu/drm/rockchip/rockchip_drm_vop2.c2811
-rw-r--r--drivers/gpu/drm/rockchip/rockchip_drm_vop2.h866
-rw-r--r--drivers/gpu/drm/rockchip/rockchip_lvds.c194
-rw-r--r--drivers/gpu/drm/rockchip/rockchip_lvds.h23
-rw-r--r--drivers/gpu/drm/rockchip/rockchip_rgb.c53
-rw-r--r--drivers/gpu/drm/rockchip/rockchip_rgb.h8
-rw-r--r--drivers/gpu/drm/rockchip/rockchip_vop2_reg.c2612
-rw-r--r--drivers/gpu/drm/rockchip/rockchip_vop_reg.c310
-rw-r--r--drivers/gpu/drm/rockchip/rockchip_vop_reg.h7
-rw-r--r--drivers/gpu/drm/savage/Makefile9
-rw-r--r--drivers/gpu/drm/savage/savage_bci.c1080
-rw-r--r--drivers/gpu/drm/savage/savage_drv.c91
-rw-r--r--drivers/gpu/drm/savage/savage_drv.h580
-rw-r--r--drivers/gpu/drm/savage/savage_state.c1169
-rw-r--r--drivers/gpu/drm/scheduler/.kunitconfig12
-rw-r--r--drivers/gpu/drm/scheduler/Makefile2
-rw-r--r--drivers/gpu/drm/scheduler/gpu_scheduler_trace.h140
-rw-r--r--drivers/gpu/drm/scheduler/sched_entity.c536
-rw-r--r--drivers/gpu/drm/scheduler/sched_fence.c134
-rw-r--r--drivers/gpu/drm/scheduler/sched_internal.h91
-rw-r--r--drivers/gpu/drm/scheduler/sched_main.c1262
-rw-r--r--drivers/gpu/drm/scheduler/tests/Makefile7
-rw-r--r--drivers/gpu/drm/scheduler/tests/mock_scheduler.c370
-rw-r--r--drivers/gpu/drm/scheduler/tests/sched_tests.h224
-rw-r--r--drivers/gpu/drm/scheduler/tests/tests_basic.c563
-rw-r--r--drivers/gpu/drm/selftests/Makefile7
-rw-r--r--drivers/gpu/drm/selftests/drm_cmdline_selftests.h68
-rw-r--r--drivers/gpu/drm/selftests/drm_mm_selftests.h28
-rw-r--r--drivers/gpu/drm/selftests/drm_modeset_selftests.h40
-rw-r--r--drivers/gpu/drm/selftests/drm_selftest.c109
-rw-r--r--drivers/gpu/drm/selftests/drm_selftest.h41
-rw-r--r--drivers/gpu/drm/selftests/test-drm_cmdline_parser.c1141
-rw-r--r--drivers/gpu/drm/selftests/test-drm_damage_helper.c811
-rw-r--r--drivers/gpu/drm/selftests/test-drm_dp_mst_helper.c273
-rw-r--r--drivers/gpu/drm/selftests/test-drm_format.c280
-rw-r--r--drivers/gpu/drm/selftests/test-drm_framebuffer.c350
-rw-r--r--drivers/gpu/drm/selftests/test-drm_mm.c2487
-rw-r--r--drivers/gpu/drm/selftests/test-drm_modeset_common.c32
-rw-r--r--drivers/gpu/drm/selftests/test-drm_modeset_common.h52
-rw-r--r--drivers/gpu/drm/selftests/test-drm_plane_helper.c219
-rw-r--r--drivers/gpu/drm/selftests/test-drm_rect.c223
-rw-r--r--drivers/gpu/drm/shmobile/Kconfig13
-rw-r--r--drivers/gpu/drm/shmobile/Makefile8
-rw-r--r--drivers/gpu/drm/shmobile/shmob_drm_backlight.c86
-rw-r--r--drivers/gpu/drm/shmobile/shmob_drm_backlight.h19
-rw-r--r--drivers/gpu/drm/shmobile/shmob_drm_crtc.c683
-rw-r--r--drivers/gpu/drm/shmobile/shmob_drm_crtc.h55
-rw-r--r--drivers/gpu/drm/shmobile/shmob_drm_drv.c300
-rw-r--r--drivers/gpu/drm/shmobile/shmob_drm_drv.h41
-rw-r--r--drivers/gpu/drm/shmobile/shmob_drm_kms.c150
-rw-r--r--drivers/gpu/drm/shmobile/shmob_drm_kms.h29
-rw-r--r--drivers/gpu/drm/shmobile/shmob_drm_plane.c259
-rw-r--r--drivers/gpu/drm/shmobile/shmob_drm_plane.h19
-rw-r--r--drivers/gpu/drm/sis/Makefile10
-rw-r--r--drivers/gpu/drm/sis/sis_drv.c142
-rw-r--r--drivers/gpu/drm/sis/sis_drv.h80
-rw-r--r--drivers/gpu/drm/sis/sis_mm.c363
-rw-r--r--drivers/gpu/drm/sitronix/Kconfig42
-rw-r--r--drivers/gpu/drm/sitronix/Makefile3
-rw-r--r--drivers/gpu/drm/sitronix/st7571-i2c.c1083
-rw-r--r--drivers/gpu/drm/sitronix/st7586.c (renamed from drivers/gpu/drm/tiny/st7586.c)106
-rw-r--r--drivers/gpu/drm/sitronix/st7735r.c (renamed from drivers/gpu/drm/tiny/st7735r.c)38
-rw-r--r--drivers/gpu/drm/solomon/Kconfig32
-rw-r--r--drivers/gpu/drm/solomon/Makefile3
-rw-r--r--drivers/gpu/drm/solomon/ssd130x-i2c.c126
-rw-r--r--drivers/gpu/drm/solomon/ssd130x-spi.c193
-rw-r--r--drivers/gpu/drm/solomon/ssd130x.c2040
-rw-r--r--drivers/gpu/drm/solomon/ssd130x.h113
-rw-r--r--drivers/gpu/drm/sprd/Kconfig12
-rw-r--r--drivers/gpu/drm/sprd/Makefile8
-rw-r--r--drivers/gpu/drm/sprd/megacores_pll.c305
-rw-r--r--drivers/gpu/drm/sprd/sprd_dpu.c872
-rw-r--r--drivers/gpu/drm/sprd/sprd_dpu.h109
-rw-r--r--drivers/gpu/drm/sprd/sprd_drm.c199
-rw-r--r--drivers/gpu/drm/sprd/sprd_drm.h19
-rw-r--r--drivers/gpu/drm/sprd/sprd_dsi.c1066
-rw-r--r--drivers/gpu/drm/sprd/sprd_dsi.h126
-rw-r--r--drivers/gpu/drm/sti/Kconfig6
-rw-r--r--drivers/gpu/drm/sti/Makefile2
-rw-r--r--drivers/gpu/drm/sti/sti_compositor.c20
-rw-r--r--drivers/gpu/drm/sti/sti_crtc.c1
-rw-r--r--drivers/gpu/drm/sti/sti_cursor.c62
-rw-r--r--drivers/gpu/drm/sti/sti_drv.c54
-rw-r--r--drivers/gpu/drm/sti/sti_dvo.c61
-rw-r--r--drivers/gpu/drm/sti/sti_gdp.c105
-rw-r--r--drivers/gpu/drm/sti/sti_hda.c73
-rw-r--r--drivers/gpu/drm/sti/sti_hdmi.c119
-rw-r--r--drivers/gpu/drm/sti/sti_hdmi.h4
-rw-r--r--drivers/gpu/drm/sti/sti_hdmi_tx3g4c28phy.c4
-rw-r--r--drivers/gpu/drm/sti/sti_hqvdp.c127
-rw-r--r--drivers/gpu/drm/sti/sti_mixer.c2
-rw-r--r--drivers/gpu/drm/sti/sti_plane.c12
-rw-r--r--drivers/gpu/drm/sti/sti_plane.h2
-rw-r--r--drivers/gpu/drm/sti/sti_tvout.c36
-rw-r--r--drivers/gpu/drm/sti/sti_vtg.c22
-rw-r--r--drivers/gpu/drm/stm/Kconfig18
-rw-r--r--drivers/gpu/drm/stm/Makefile2
-rw-r--r--drivers/gpu/drm/stm/drv.c60
-rw-r--r--drivers/gpu/drm/stm/dw_mipi_dsi-stm.c429
-rw-r--r--drivers/gpu/drm/stm/ltdc.c1478
-rw-r--r--drivers/gpu/drm/stm/ltdc.h29
-rw-r--r--drivers/gpu/drm/stm/lvds.c1222
-rw-r--r--drivers/gpu/drm/sun4i/Kconfig36
-rw-r--r--drivers/gpu/drm/sun4i/Makefile2
-rw-r--r--drivers/gpu/drm/sun4i/sun4i_backend.c80
-rw-r--r--drivers/gpu/drm/sun4i/sun4i_crtc.c3
-rw-r--r--drivers/gpu/drm/sun4i/sun4i_dotclock.c206
-rw-r--r--drivers/gpu/drm/sun4i/sun4i_drv.c63
-rw-r--r--drivers/gpu/drm/sun4i/sun4i_framebuffer.c1
-rw-r--r--drivers/gpu/drm/sun4i/sun4i_frontend.c40
-rw-r--r--drivers/gpu/drm/sun4i/sun4i_frontend.h1
-rw-r--r--drivers/gpu/drm/sun4i/sun4i_hdmi.h1
-rw-r--r--drivers/gpu/drm/sun4i/sun4i_hdmi_ddc_clk.c12
-rw-r--r--drivers/gpu/drm/sun4i/sun4i_hdmi_enc.c212
-rw-r--r--drivers/gpu/drm/sun4i/sun4i_hdmi_i2c.c7
-rw-r--r--drivers/gpu/drm/sun4i/sun4i_layer.c34
-rw-r--r--drivers/gpu/drm/sun4i/sun4i_lvds.c4
-rw-r--r--drivers/gpu/drm/sun4i/sun4i_rgb.c4
-rw-r--r--drivers/gpu/drm/sun4i/sun4i_tcon.c88
-rw-r--r--drivers/gpu/drm/sun4i/sun4i_tcon.h1
-rw-r--r--drivers/gpu/drm/sun4i/sun4i_tcon_dclk.c208
-rw-r--r--drivers/gpu/drm/sun4i/sun4i_tcon_dclk.h (renamed from drivers/gpu/drm/sun4i/sun4i_dotclock.h)0
-rw-r--r--drivers/gpu/drm/sun4i/sun4i_tv.c203
-rw-r--r--drivers/gpu/drm/sun4i/sun6i_drc.c4
-rw-r--r--drivers/gpu/drm/sun4i/sun6i_mipi_dsi.c100
-rw-r--r--drivers/gpu/drm/sun4i/sun6i_mipi_dsi.h7
-rw-r--r--drivers/gpu/drm/sun4i/sun8i_csc.c120
-rw-r--r--drivers/gpu/drm/sun4i/sun8i_csc.h21
-rw-r--r--drivers/gpu/drm/sun4i/sun8i_dw_hdmi.c121
-rw-r--r--drivers/gpu/drm/sun4i/sun8i_dw_hdmi.h20
-rw-r--r--drivers/gpu/drm/sun4i/sun8i_hdmi_phy.c373
-rw-r--r--drivers/gpu/drm/sun4i/sun8i_mixer.c439
-rw-r--r--drivers/gpu/drm/sun4i/sun8i_mixer.h118
-rw-r--r--drivers/gpu/drm/sun4i/sun8i_tcon_top.c28
-rw-r--r--drivers/gpu/drm/sun4i/sun8i_ui_layer.c321
-rw-r--r--drivers/gpu/drm/sun4i/sun8i_ui_layer.h30
-rw-r--r--drivers/gpu/drm/sun4i/sun8i_ui_scaler.c44
-rw-r--r--drivers/gpu/drm/sun4i/sun8i_ui_scaler.h4
-rw-r--r--drivers/gpu/drm/sun4i/sun8i_vi_layer.c346
-rw-r--r--drivers/gpu/drm/sun4i/sun8i_vi_layer.h36
-rw-r--r--drivers/gpu/drm/sun4i/sun8i_vi_scaler.c49
-rw-r--r--drivers/gpu/drm/sun4i/sun8i_vi_scaler.h6
-rw-r--r--drivers/gpu/drm/sun4i/sunxi_engine.h40
-rw-r--r--drivers/gpu/drm/sysfb/Kconfig76
-rw-r--r--drivers/gpu/drm/sysfb/Makefile12
-rw-r--r--drivers/gpu/drm/sysfb/drm_sysfb.c35
-rw-r--r--drivers/gpu/drm/sysfb/drm_sysfb_helper.h218
-rw-r--r--drivers/gpu/drm/sysfb/drm_sysfb_modeset.c608
-rw-r--r--drivers/gpu/drm/sysfb/drm_sysfb_screen_info.c104
-rw-r--r--drivers/gpu/drm/sysfb/efidrm.c390
-rw-r--r--drivers/gpu/drm/sysfb/ofdrm.c1145
-rw-r--r--drivers/gpu/drm/sysfb/simpledrm.c885
-rw-r--r--drivers/gpu/drm/sysfb/vesadrm.c652
-rw-r--r--drivers/gpu/drm/tdfx/Makefile8
-rw-r--r--drivers/gpu/drm/tdfx/tdfx_drv.c90
-rw-r--r--drivers/gpu/drm/tdfx/tdfx_drv.h47
-rw-r--r--drivers/gpu/drm/tegra/Kconfig13
-rw-r--r--drivers/gpu/drm/tegra/Makefile10
-rw-r--r--drivers/gpu/drm/tegra/dc.c908
-rw-r--r--drivers/gpu/drm/tegra/dc.h55
-rw-r--r--drivers/gpu/drm/tegra/dp.c80
-rw-r--r--drivers/gpu/drm/tegra/dp.h2
-rw-r--r--drivers/gpu/drm/tegra/dpaux.c69
-rw-r--r--drivers/gpu/drm/tegra/drm.c256
-rw-r--r--drivers/gpu/drm/tegra/drm.h72
-rw-r--r--drivers/gpu/drm/tegra/dsi.c126
-rw-r--r--drivers/gpu/drm/tegra/falcon.c30
-rw-r--r--drivers/gpu/drm/tegra/falcon.h2
-rw-r--r--drivers/gpu/drm/tegra/fb.c263
-rw-r--r--drivers/gpu/drm/tegra/fbdev.c143
-rw-r--r--drivers/gpu/drm/tegra/firewall.c257
-rw-r--r--drivers/gpu/drm/tegra/gem.c308
-rw-r--r--drivers/gpu/drm/tegra/gem.h29
-rw-r--r--drivers/gpu/drm/tegra/gr2d.c160
-rw-r--r--drivers/gpu/drm/tegra/gr3d.c336
-rw-r--r--drivers/gpu/drm/tegra/hdmi.c315
-rw-r--r--drivers/gpu/drm/tegra/hub.c305
-rw-r--r--drivers/gpu/drm/tegra/hub.h4
-rw-r--r--drivers/gpu/drm/tegra/nvdec.c578
-rw-r--r--drivers/gpu/drm/tegra/nvjpg.c330
-rw-r--r--drivers/gpu/drm/tegra/output.c59
-rw-r--r--drivers/gpu/drm/tegra/plane.c293
-rw-r--r--drivers/gpu/drm/tegra/plane.h21
-rw-r--r--drivers/gpu/drm/tegra/rgb.c109
-rw-r--r--drivers/gpu/drm/tegra/riscv.c106
-rw-r--r--drivers/gpu/drm/tegra/riscv.h30
-rw-r--r--drivers/gpu/drm/tegra/sor.c159
-rw-r--r--drivers/gpu/drm/tegra/submit.c684
-rw-r--r--drivers/gpu/drm/tegra/submit.h21
-rw-r--r--drivers/gpu/drm/tegra/uapi.c362
-rw-r--r--drivers/gpu/drm/tegra/uapi.h57
-rw-r--r--drivers/gpu/drm/tegra/vic.c292
-rw-r--r--drivers/gpu/drm/tegra/vic.h1
-rw-r--r--drivers/gpu/drm/tests/.kunitconfig5
-rw-r--r--drivers/gpu/drm/tests/Makefile30
-rw-r--r--drivers/gpu/drm/tests/drm_atomic_state_test.c379
-rw-r--r--drivers/gpu/drm/tests/drm_atomic_test.c153
-rw-r--r--drivers/gpu/drm/tests/drm_bridge_test.c521
-rw-r--r--drivers/gpu/drm/tests/drm_buddy_test.c893
-rw-r--r--drivers/gpu/drm/tests/drm_client_modeset_test.c200
-rw-r--r--drivers/gpu/drm/tests/drm_cmdline_parser_test.c1079
-rw-r--r--drivers/gpu/drm/tests/drm_connector_test.c1818
-rw-r--r--drivers/gpu/drm/tests/drm_damage_helper_test.c640
-rw-r--r--drivers/gpu/drm/tests/drm_dp_mst_helper_test.c577
-rw-r--r--drivers/gpu/drm/tests/drm_exec_test.c222
-rw-r--r--drivers/gpu/drm/tests/drm_fixp_test.c71
-rw-r--r--drivers/gpu/drm/tests/drm_format_helper_test.c1740
-rw-r--r--drivers/gpu/drm/tests/drm_format_test.c360
-rw-r--r--drivers/gpu/drm/tests/drm_framebuffer_test.c725
-rw-r--r--drivers/gpu/drm/tests/drm_gem_shmem_test.c385
-rw-r--r--drivers/gpu/drm/tests/drm_hdmi_state_helper_test.c2334
-rw-r--r--drivers/gpu/drm/tests/drm_kunit_edid.h864
-rw-r--r--drivers/gpu/drm/tests/drm_kunit_helpers.c400
-rw-r--r--drivers/gpu/drm/tests/drm_managed_test.c117
-rw-r--r--drivers/gpu/drm/tests/drm_mm_test.c361
-rw-r--r--drivers/gpu/drm/tests/drm_modes_test.c208
-rw-r--r--drivers/gpu/drm/tests/drm_plane_helper_test.c319
-rw-r--r--drivers/gpu/drm/tests/drm_probe_helper_test.c217
-rw-r--r--drivers/gpu/drm/tests/drm_rect_test.c530
-rw-r--r--drivers/gpu/drm/tests/drm_sysfb_modeset_test.c168
-rw-r--r--drivers/gpu/drm/tidss/Kconfig6
-rw-r--r--drivers/gpu/drm/tidss/Makefile3
-rw-r--r--drivers/gpu/drm/tidss/tidss_crtc.c76
-rw-r--r--drivers/gpu/drm/tidss/tidss_dispc.c935
-rw-r--r--drivers/gpu/drm/tidss/tidss_dispc.h42
-rw-r--r--drivers/gpu/drm/tidss/tidss_dispc_regs.h107
-rw-r--r--drivers/gpu/drm/tidss/tidss_drv.c101
-rw-r--r--drivers/gpu/drm/tidss/tidss_drv.h16
-rw-r--r--drivers/gpu/drm/tidss/tidss_encoder.c147
-rw-r--r--drivers/gpu/drm/tidss/tidss_encoder.h5
-rw-r--r--drivers/gpu/drm/tidss/tidss_irq.c76
-rw-r--r--drivers/gpu/drm/tidss/tidss_irq.h8
-rw-r--r--drivers/gpu/drm/tidss/tidss_kms.c30
-rw-r--r--drivers/gpu/drm/tidss/tidss_oldi.c619
-rw-r--r--drivers/gpu/drm/tidss/tidss_oldi.h43
-rw-r--r--drivers/gpu/drm/tidss/tidss_plane.c109
-rw-r--r--drivers/gpu/drm/tidss/tidss_plane.h4
-rw-r--r--drivers/gpu/drm/tidss/tidss_scale_coefs.h2
-rw-r--r--drivers/gpu/drm/tilcdc/Kconfig4
-rw-r--r--drivers/gpu/drm/tilcdc/tilcdc_crtc.c63
-rw-r--r--drivers/gpu/drm/tilcdc/tilcdc_drv.c120
-rw-r--r--drivers/gpu/drm/tilcdc/tilcdc_drv.h3
-rw-r--r--drivers/gpu/drm/tilcdc/tilcdc_external.c12
-rw-r--r--drivers/gpu/drm/tilcdc/tilcdc_panel.c14
-rw-r--r--drivers/gpu/drm/tilcdc/tilcdc_plane.c56
-rw-r--r--drivers/gpu/drm/tiny/Kconfig146
-rw-r--r--drivers/gpu/drm/tiny/Makefile11
-rw-r--r--drivers/gpu/drm/tiny/appletbdrm.c834
-rw-r--r--drivers/gpu/drm/tiny/arcpgu.c435
-rw-r--r--drivers/gpu/drm/tiny/bochs.c840
-rw-r--r--drivers/gpu/drm/tiny/cirrus-qemu.c675
-rw-r--r--drivers/gpu/drm/tiny/cirrus.c661
-rw-r--r--drivers/gpu/drm/tiny/gm12u320.c102
-rw-r--r--drivers/gpu/drm/tiny/hx8357d.c31
-rw-r--r--drivers/gpu/drm/tiny/ili9163.c222
-rw-r--r--drivers/gpu/drm/tiny/ili9225.c80
-rw-r--r--drivers/gpu/drm/tiny/ili9341.c37
-rw-r--r--drivers/gpu/drm/tiny/ili9486.c56
-rw-r--r--drivers/gpu/drm/tiny/mi0283qt.c38
-rw-r--r--drivers/gpu/drm/tiny/panel-mipi-dbi.c458
-rw-r--r--drivers/gpu/drm/tiny/pixpaper.c1166
-rw-r--r--drivers/gpu/drm/tiny/repaper.c111
-rw-r--r--drivers/gpu/drm/tiny/sharp-memory.c669
-rw-r--r--drivers/gpu/drm/ttm/Makefile8
-rw-r--r--drivers/gpu/drm/ttm/tests/.kunitconfig3
-rw-r--r--drivers/gpu/drm/ttm/tests/Makefile11
-rw-r--r--drivers/gpu/drm/ttm/tests/TODO27
-rw-r--r--drivers/gpu/drm/ttm/tests/ttm_bo_test.c637
-rw-r--r--drivers/gpu/drm/ttm/tests/ttm_bo_validate_test.c1176
-rw-r--r--drivers/gpu/drm/ttm/tests/ttm_device_test.c206
-rw-r--r--drivers/gpu/drm/ttm/tests/ttm_kunit_helpers.c304
-rw-r--r--drivers/gpu/drm/ttm/tests/ttm_kunit_helpers.h52
-rw-r--r--drivers/gpu/drm/ttm/tests/ttm_mock_manager.c238
-rw-r--r--drivers/gpu/drm/ttm/tests/ttm_mock_manager.h30
-rw-r--r--drivers/gpu/drm/ttm/tests/ttm_pool_test.c437
-rw-r--r--drivers/gpu/drm/ttm/tests/ttm_resource_test.c337
-rw-r--r--drivers/gpu/drm/ttm/tests/ttm_tt_test.c402
-rw-r--r--drivers/gpu/drm/ttm/ttm_agp_backend.c13
-rw-r--r--drivers/gpu/drm/ttm/ttm_backup.c182
-rw-r--r--drivers/gpu/drm/ttm/ttm_bo.c1915
-rw-r--r--drivers/gpu/drm/ttm/ttm_bo_internal.h60
-rw-r--r--drivers/gpu/drm/ttm/ttm_bo_util.c1021
-rw-r--r--drivers/gpu/drm/ttm/ttm_bo_vm.c395
-rw-r--r--drivers/gpu/drm/ttm/ttm_device.c322
-rw-r--r--drivers/gpu/drm/ttm/ttm_execbuf_util.c37
-rw-r--r--drivers/gpu/drm/ttm/ttm_memory.c683
-rw-r--r--drivers/gpu/drm/ttm/ttm_module.c104
-rw-r--r--drivers/gpu/drm/ttm/ttm_module.h11
-rw-r--r--drivers/gpu/drm/ttm/ttm_pool.c1054
-rw-r--r--drivers/gpu/drm/ttm/ttm_pool_internal.h25
-rw-r--r--drivers/gpu/drm/ttm/ttm_range_manager.c149
-rw-r--r--drivers/gpu/drm/ttm/ttm_resource.c895
-rw-r--r--drivers/gpu/drm/ttm/ttm_sys_manager.c49
-rw-r--r--drivers/gpu/drm/ttm/ttm_tt.c385
-rw-r--r--drivers/gpu/drm/tve200/Kconfig5
-rw-r--r--drivers/gpu/drm/tve200/tve200_display.c17
-rw-r--r--drivers/gpu/drm/tve200/tve200_drv.c41
-rw-r--r--drivers/gpu/drm/tyr/Kconfig19
-rw-r--r--drivers/gpu/drm/tyr/Makefile3
-rw-r--r--drivers/gpu/drm/tyr/driver.rs205
-rw-r--r--drivers/gpu/drm/tyr/file.rs56
-rw-r--r--drivers/gpu/drm/tyr/gem.rs18
-rw-r--r--drivers/gpu/drm/tyr/gpu.rs219
-rw-r--r--drivers/gpu/drm/tyr/regs.rs108
-rw-r--r--drivers/gpu/drm/tyr/tyr.rs22
-rw-r--r--drivers/gpu/drm/udl/Kconfig2
-rw-r--r--drivers/gpu/drm/udl/Makefile8
-rw-r--r--drivers/gpu/drm/udl/udl_connector.c138
-rw-r--r--drivers/gpu/drm/udl/udl_connector.h15
-rw-r--r--drivers/gpu/drm/udl/udl_drv.c34
-rw-r--r--drivers/gpu/drm/udl/udl_drv.h53
-rw-r--r--drivers/gpu/drm/udl/udl_edid.c81
-rw-r--r--drivers/gpu/drm/udl/udl_edid.h15
-rw-r--r--drivers/gpu/drm/udl/udl_main.c307
-rw-r--r--drivers/gpu/drm/udl/udl_modeset.c578
-rw-r--r--drivers/gpu/drm/udl/udl_proto.h68
-rw-r--r--drivers/gpu/drm/udl/udl_transfer.c61
-rw-r--r--drivers/gpu/drm/v3d/Kconfig5
-rw-r--r--drivers/gpu/drm/v3d/Makefile6
-rw-r--r--drivers/gpu/drm/v3d/v3d_bo.c124
-rw-r--r--drivers/gpu/drm/v3d/v3d_debugfs.c247
-rw-r--r--drivers/gpu/drm/v3d/v3d_drv.c240
-rw-r--r--drivers/gpu/drm/v3d/v3d_drv.h363
-rw-r--r--drivers/gpu/drm/v3d/v3d_fence.c11
-rw-r--r--drivers/gpu/drm/v3d/v3d_gem.c702
-rw-r--r--drivers/gpu/drm/v3d/v3d_gemfs.c62
-rw-r--r--drivers/gpu/drm/v3d/v3d_irq.c202
-rw-r--r--drivers/gpu/drm/v3d/v3d_mmu.c95
-rw-r--r--drivers/gpu/drm/v3d/v3d_perfmon.c496
-rw-r--r--drivers/gpu/drm/v3d/v3d_performance_counters.h33
-rw-r--r--drivers/gpu/drm/v3d/v3d_regs.h143
-rw-r--r--drivers/gpu/drm/v3d/v3d_sched.c758
-rw-r--r--drivers/gpu/drm/v3d/v3d_submit.c1407
-rw-r--r--drivers/gpu/drm/v3d/v3d_sysfs.c66
-rw-r--r--drivers/gpu/drm/v3d/v3d_trace.h57
-rw-r--r--drivers/gpu/drm/vboxvideo/Kconfig1
-rw-r--r--drivers/gpu/drm/vboxvideo/hgsmi_base.c58
-rw-r--r--drivers/gpu/drm/vboxvideo/modesetting.c20
-rw-r--r--drivers/gpu/drm/vboxvideo/vbox_drv.c62
-rw-r--r--drivers/gpu/drm/vboxvideo/vbox_drv.h3
-rw-r--r--drivers/gpu/drm/vboxvideo/vbox_irq.c17
-rw-r--r--drivers/gpu/drm/vboxvideo/vbox_main.c31
-rw-r--r--drivers/gpu/drm/vboxvideo/vbox_mode.c106
-rw-r--r--drivers/gpu/drm/vboxvideo/vbox_ttm.c23
-rw-r--r--drivers/gpu/drm/vboxvideo/vboxvideo.h6
-rw-r--r--drivers/gpu/drm/vboxvideo/vboxvideo_guest.h2
-rw-r--r--drivers/gpu/drm/vc4/Kconfig31
-rw-r--r--drivers/gpu/drm/vc4/Makefile7
-rw-r--r--drivers/gpu/drm/vc4/tests/.kunitconfig13
-rw-r--r--drivers/gpu/drm/vc4/tests/vc4_mock.c207
-rw-r--r--drivers/gpu/drm/vc4/tests/vc4_mock.h61
-rw-r--r--drivers/gpu/drm/vc4/tests/vc4_mock_crtc.c41
-rw-r--r--drivers/gpu/drm/vc4/tests/vc4_mock_output.c176
-rw-r--r--drivers/gpu/drm/vc4/tests/vc4_mock_plane.c25
-rw-r--r--drivers/gpu/drm/vc4/tests/vc4_test_pv_muxing.c1140
-rw-r--r--drivers/gpu/drm/vc4/vc4_bo.c170
-rw-r--r--drivers/gpu/drm/vc4/vc4_crtc.c770
-rw-r--r--drivers/gpu/drm/vc4/vc4_debugfs.c62
-rw-r--r--drivers/gpu/drm/vc4/vc4_dpi.c271
-rw-r--r--drivers/gpu/drm/vc4/vc4_drv.c254
-rw-r--r--drivers/gpu/drm/vc4/vc4_drv.h399
-rw-r--r--drivers/gpu/drm/vc4/vc4_dsi.c472
-rw-r--r--drivers/gpu/drm/vc4/vc4_gem.c326
-rw-r--r--drivers/gpu/drm/vc4/vc4_hdmi.c2801
-rw-r--r--drivers/gpu/drm/vc4/vc4_hdmi.h129
-rw-r--r--drivers/gpu/drm/vc4/vc4_hdmi_phy.c683
-rw-r--r--drivers/gpu/drm/vc4/vc4_hdmi_regs.h277
-rw-r--r--drivers/gpu/drm/vc4/vc4_hvs.c1470
-rw-r--r--drivers/gpu/drm/vc4/vc4_irq.c76
-rw-r--r--drivers/gpu/drm/vc4/vc4_kms.c531
-rw-r--r--drivers/gpu/drm/vc4/vc4_perfmon.c70
-rw-r--r--drivers/gpu/drm/vc4/vc4_plane.c1680
-rw-r--r--drivers/gpu/drm/vc4/vc4_regs.h402
-rw-r--r--drivers/gpu/drm/vc4/vc4_render_cl.c46
-rw-r--r--drivers/gpu/drm/vc4/vc4_trace.h95
-rw-r--r--drivers/gpu/drm/vc4/vc4_txp.c232
-rw-r--r--drivers/gpu/drm/vc4/vc4_v3d.c118
-rw-r--r--drivers/gpu/drm/vc4/vc4_validate.c69
-rw-r--r--drivers/gpu/drm/vc4/vc4_validate_shaders.c8
-rw-r--r--drivers/gpu/drm/vc4/vc4_vec.c680
-rw-r--r--drivers/gpu/drm/vgem/vgem_drv.c376
-rw-r--r--drivers/gpu/drm/vgem/vgem_drv.h11
-rw-r--r--drivers/gpu/drm/vgem/vgem_fence.c36
-rw-r--r--drivers/gpu/drm/via/Makefile8
-rw-r--r--drivers/gpu/drm/via/via_3d_reg.h1650
-rw-r--r--drivers/gpu/drm/via/via_dma.c745
-rw-r--r--drivers/gpu/drm/via/via_dmablit.c804
-rw-r--r--drivers/gpu/drm/via/via_dmablit.h140
-rw-r--r--drivers/gpu/drm/via/via_drv.c124
-rw-r--r--drivers/gpu/drm/via/via_drv.h229
-rw-r--r--drivers/gpu/drm/via/via_irq.c388
-rw-r--r--drivers/gpu/drm/via/via_map.c131
-rw-r--r--drivers/gpu/drm/via/via_mm.c242
-rw-r--r--drivers/gpu/drm/via/via_verifier.c1110
-rw-r--r--drivers/gpu/drm/via/via_verifier.h62
-rw-r--r--drivers/gpu/drm/via/via_video.c94
-rw-r--r--drivers/gpu/drm/virtio/Kconfig12
-rw-r--r--drivers/gpu/drm/virtio/Makefile2
-rw-r--r--drivers/gpu/drm/virtio/virtgpu_debugfs.c6
-rw-r--r--drivers/gpu/drm/virtio/virtgpu_display.c71
-rw-r--r--drivers/gpu/drm/virtio/virtgpu_drv.c147
-rw-r--r--drivers/gpu/drm/virtio/virtgpu_drv.h101
-rw-r--r--drivers/gpu/drm/virtio/virtgpu_fence.c46
-rw-r--r--drivers/gpu/drm/virtio/virtgpu_gem.c71
-rw-r--r--drivers/gpu/drm/virtio/virtgpu_ioctl.c343
-rw-r--r--drivers/gpu/drm/virtio/virtgpu_kms.c120
-rw-r--r--drivers/gpu/drm/virtio/virtgpu_object.c125
-rw-r--r--drivers/gpu/drm/virtio/virtgpu_plane.c362
-rw-r--r--drivers/gpu/drm/virtio/virtgpu_prime.c223
-rw-r--r--drivers/gpu/drm/virtio/virtgpu_submit.c542
-rw-r--r--drivers/gpu/drm/virtio/virtgpu_trace.h26
-rw-r--r--drivers/gpu/drm/virtio/virtgpu_vq.c273
-rw-r--r--drivers/gpu/drm/virtio/virtgpu_vram.c72
-rw-r--r--drivers/gpu/drm/vkms/Kconfig32
-rw-r--r--drivers/gpu/drm/vkms/Makefile9
-rw-r--r--drivers/gpu/drm/vkms/tests/.kunitconfig4
-rw-r--r--drivers/gpu/drm/vkms/tests/Makefile8
-rw-r--r--drivers/gpu/drm/vkms/tests/vkms_color_test.c414
-rw-r--r--drivers/gpu/drm/vkms/tests/vkms_config_test.c1029
-rw-r--r--drivers/gpu/drm/vkms/tests/vkms_format_test.c279
-rw-r--r--drivers/gpu/drm/vkms/vkms_colorop.c120
-rw-r--r--drivers/gpu/drm/vkms/vkms_composer.c691
-rw-r--r--drivers/gpu/drm/vkms/vkms_composer.h28
-rw-r--r--drivers/gpu/drm/vkms/vkms_config.c649
-rw-r--r--drivers/gpu/drm/vkms/vkms_config.h489
-rw-r--r--drivers/gpu/drm/vkms/vkms_configfs.c843
-rw-r--r--drivers/gpu/drm/vkms/vkms_configfs.h8
-rw-r--r--drivers/gpu/drm/vkms/vkms_connector.c96
-rw-r--r--drivers/gpu/drm/vkms/vkms_connector.h35
-rw-r--r--drivers/gpu/drm/vkms/vkms_crtc.c142
-rw-r--r--drivers/gpu/drm/vkms/vkms_drv.c143
-rw-r--r--drivers/gpu/drm/vkms/vkms_drv.h281
-rw-r--r--drivers/gpu/drm/vkms/vkms_formats.c971
-rw-r--r--drivers/gpu/drm/vkms/vkms_formats.h21
-rw-r--r--drivers/gpu/drm/vkms/vkms_luts.c811
-rw-r--r--drivers/gpu/drm/vkms/vkms_luts.h12
-rw-r--r--drivers/gpu/drm/vkms/vkms_output.c174
-rw-r--r--drivers/gpu/drm/vkms/vkms_plane.c236
-rw-r--r--drivers/gpu/drm/vkms/vkms_writeback.c126
-rw-r--r--drivers/gpu/drm/vmwgfx/Kconfig21
-rw-r--r--drivers/gpu/drm/vmwgfx/Makefile8
-rw-r--r--drivers/gpu/drm/vmwgfx/device_include/includeCheck.h3
-rw-r--r--drivers/gpu/drm/vmwgfx/device_include/svga3d_caps.h111
-rw-r--r--drivers/gpu/drm/vmwgfx/device_include/svga3d_cmd.h3682
-rw-r--r--drivers/gpu/drm/vmwgfx/device_include/svga3d_devcaps.h799
-rw-r--r--drivers/gpu/drm/vmwgfx/device_include/svga3d_dx.h3505
-rw-r--r--drivers/gpu/drm/vmwgfx/device_include/svga3d_limits.h105
-rw-r--r--drivers/gpu/drm/vmwgfx/device_include/svga3d_reg.h17
-rw-r--r--drivers/gpu/drm/vmwgfx/device_include/svga3d_surfacedefs.h3128
-rw-r--r--drivers/gpu/drm/vmwgfx/device_include/svga3d_types.h3021
-rw-r--r--drivers/gpu/drm/vmwgfx/device_include/svga_escape.h70
-rw-r--r--drivers/gpu/drm/vmwgfx/device_include/svga_overlay.h205
-rw-r--r--drivers/gpu/drm/vmwgfx/device_include/svga_reg.h2824
-rw-r--r--drivers/gpu/drm/vmwgfx/device_include/svga_types.h51
-rw-r--r--drivers/gpu/drm/vmwgfx/device_include/vm_basic_types.h130
-rw-r--r--drivers/gpu/drm/vmwgfx/device_include/vmware_pack_begin.h2
-rw-r--r--drivers/gpu/drm/vmwgfx/device_include/vmware_pack_end.h2
-rw-r--r--drivers/gpu/drm/vmwgfx/ttm_lock.c194
-rw-r--r--drivers/gpu/drm/vmwgfx/ttm_lock.h218
-rw-r--r--drivers/gpu/drm/vmwgfx/ttm_object.c324
-rw-r--r--drivers/gpu/drm/vmwgfx/ttm_object.h95
-rw-r--r--drivers/gpu/drm/vmwgfx/vmw_surface_cache.h545
-rw-r--r--drivers/gpu/drm/vmwgfx/vmwgfx_binding.c87
-rw-r--r--drivers/gpu/drm/vmwgfx/vmwgfx_binding.h4
-rw-r--r--drivers/gpu/drm/vmwgfx/vmwgfx_blit.c162
-rw-r--r--drivers/gpu/drm/vmwgfx/vmwgfx_bo.c1052
-rw-r--r--drivers/gpu/drm/vmwgfx/vmwgfx_bo.h238
-rw-r--r--drivers/gpu/drm/vmwgfx/vmwgfx_cmd.c164
-rw-r--r--drivers/gpu/drm/vmwgfx/vmwgfx_cmdbuf.c83
-rw-r--r--drivers/gpu/drm/vmwgfx/vmwgfx_cmdbuf_res.c102
-rw-r--r--drivers/gpu/drm/vmwgfx/vmwgfx_context.c99
-rw-r--r--drivers/gpu/drm/vmwgfx/vmwgfx_cotable.c142
-rw-r--r--drivers/gpu/drm/vmwgfx/vmwgfx_cursor_plane.c858
-rw-r--r--drivers/gpu/drm/vmwgfx/vmwgfx_cursor_plane.h82
-rw-r--r--drivers/gpu/drm/vmwgfx/vmwgfx_devcaps.c143
-rw-r--r--drivers/gpu/drm/vmwgfx/vmwgfx_devcaps.h50
-rw-r--r--drivers/gpu/drm/vmwgfx/vmwgfx_drv.c1016
-rw-r--r--drivers/gpu/drm/vmwgfx/vmwgfx_drv.h731
-rw-r--r--drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c568
-rw-r--r--drivers/gpu/drm/vmwgfx/vmwgfx_fb.c845
-rw-r--r--drivers/gpu/drm/vmwgfx/vmwgfx_fence.c624
-rw-r--r--drivers/gpu/drm/vmwgfx/vmwgfx_fence.h26
-rw-r--r--drivers/gpu/drm/vmwgfx/vmwgfx_gem.c350
-rw-r--r--drivers/gpu/drm/vmwgfx/vmwgfx_gmr.c3
-rw-r--r--drivers/gpu/drm/vmwgfx/vmwgfx_gmrid_manager.c92
-rw-r--r--drivers/gpu/drm/vmwgfx/vmwgfx_ioctl.c198
-rw-r--r--drivers/gpu/drm/vmwgfx/vmwgfx_irq.c215
-rw-r--r--drivers/gpu/drm/vmwgfx/vmwgfx_kms.c1613
-rw-r--r--drivers/gpu/drm/vmwgfx/vmwgfx_kms.h168
-rw-r--r--drivers/gpu/drm/vmwgfx/vmwgfx_ldu.c241
-rw-r--r--drivers/gpu/drm/vmwgfx/vmwgfx_mksstat.h146
-rw-r--r--drivers/gpu/drm/vmwgfx/vmwgfx_mob.c120
-rw-r--r--drivers/gpu/drm/vmwgfx/vmwgfx_msg.c746
-rw-r--r--drivers/gpu/drm/vmwgfx/vmwgfx_msg.h214
-rw-r--r--drivers/gpu/drm/vmwgfx/vmwgfx_msg_arm64.h204
-rw-r--r--drivers/gpu/drm/vmwgfx/vmwgfx_msg_x86.h42
-rw-r--r--drivers/gpu/drm/vmwgfx/vmwgfx_overlay.c50
-rw-r--r--drivers/gpu/drm/vmwgfx/vmwgfx_page_dirty.c239
-rw-r--r--drivers/gpu/drm/vmwgfx/vmwgfx_prime.c40
-rw-r--r--drivers/gpu/drm/vmwgfx/vmwgfx_reg.h4
-rw-r--r--drivers/gpu/drm/vmwgfx/vmwgfx_resource.c401
-rw-r--r--drivers/gpu/drm/vmwgfx/vmwgfx_resource_priv.h10
-rw-r--r--drivers/gpu/drm/vmwgfx/vmwgfx_scrn.c206
-rw-r--r--drivers/gpu/drm/vmwgfx/vmwgfx_shader.c180
-rw-r--r--drivers/gpu/drm/vmwgfx/vmwgfx_simple_resource.c36
-rw-r--r--drivers/gpu/drm/vmwgfx/vmwgfx_so.c37
-rw-r--r--drivers/gpu/drm/vmwgfx/vmwgfx_so.h6
-rw-r--r--drivers/gpu/drm/vmwgfx/vmwgfx_stdu.c704
-rw-r--r--drivers/gpu/drm/vmwgfx/vmwgfx_streamoutput.c43
-rw-r--r--drivers/gpu/drm/vmwgfx/vmwgfx_surface.c739
-rw-r--r--drivers/gpu/drm/vmwgfx/vmwgfx_system_manager.c90
-rw-r--r--drivers/gpu/drm/vmwgfx/vmwgfx_thp.c180
-rw-r--r--drivers/gpu/drm/vmwgfx/vmwgfx_ttm_buffer.c399
-rw-r--r--drivers/gpu/drm/vmwgfx/vmwgfx_ttm_glue.c92
-rw-r--r--drivers/gpu/drm/vmwgfx/vmwgfx_va.c8
-rw-r--r--drivers/gpu/drm/vmwgfx/vmwgfx_validation.c251
-rw-r--r--drivers/gpu/drm/vmwgfx/vmwgfx_validation.h88
-rw-r--r--drivers/gpu/drm/vmwgfx/vmwgfx_vkms.c633
-rw-r--r--drivers/gpu/drm/vmwgfx/vmwgfx_vkms.h75
-rw-r--r--drivers/gpu/drm/xe/.gitignore4
-rw-r--r--drivers/gpu/drm/xe/.kunitconfig18
-rw-r--r--drivers/gpu/drm/xe/Kconfig141
-rw-r--r--drivers/gpu/drm/xe/Kconfig.debug114
-rw-r--r--drivers/gpu/drm/xe/Kconfig.profile55
-rw-r--r--drivers/gpu/drm/xe/Makefile384
-rw-r--r--drivers/gpu/drm/xe/abi/gsc_command_header_abi.h46
-rw-r--r--drivers/gpu/drm/xe/abi/gsc_mkhi_commands_abi.h39
-rw-r--r--drivers/gpu/drm/xe/abi/gsc_proxy_commands_abi.h44
-rw-r--r--drivers/gpu/drm/xe/abi/gsc_pxp_commands_abi.h100
-rw-r--r--drivers/gpu/drm/xe/abi/guc_actions_abi.h281
-rw-r--r--drivers/gpu/drm/xe/abi/guc_actions_slpc_abi.h279
-rw-r--r--drivers/gpu/drm/xe/abi/guc_actions_sriov_abi.h659
-rw-r--r--drivers/gpu/drm/xe/abi/guc_capture_abi.h186
-rw-r--r--drivers/gpu/drm/xe/abi/guc_communication_ctb_abi.h129
-rw-r--r--drivers/gpu/drm/xe/abi/guc_communication_mmio_abi.h49
-rw-r--r--drivers/gpu/drm/xe/abi/guc_errors_abi.h101
-rw-r--r--drivers/gpu/drm/xe/abi/guc_klvs_abi.h428
-rw-r--r--drivers/gpu/drm/xe/abi/guc_log_abi.h75
-rw-r--r--drivers/gpu/drm/xe/abi/guc_messages_abi.h251
-rw-r--r--drivers/gpu/drm/xe/abi/guc_relay_actions_abi.h247
-rw-r--r--drivers/gpu/drm/xe/abi/guc_relay_communication_abi.h118
-rw-r--r--drivers/gpu/drm/xe/compat-i915-headers/gem/i915_gem_object.h13
-rw-r--r--drivers/gpu/drm/xe/compat-i915-headers/gem/i915_gem_stolen.h40
-rw-r--r--drivers/gpu/drm/xe/compat-i915-headers/gt/intel_gt_types.h11
-rw-r--r--drivers/gpu/drm/xe/compat-i915-headers/i915_active.h22
-rw-r--r--drivers/gpu/drm/xe/compat-i915-headers/i915_active_types.h13
-rw-r--r--drivers/gpu/drm/xe/compat-i915-headers/i915_config.h19
-rw-r--r--drivers/gpu/drm/xe/compat-i915-headers/i915_drv.h37
-rw-r--r--drivers/gpu/drm/xe/compat-i915-headers/i915_gtt_view_types.h7
-rw-r--r--drivers/gpu/drm/xe/compat-i915-headers/i915_irq.h6
-rw-r--r--drivers/gpu/drm/xe/compat-i915-headers/i915_reg.h6
-rw-r--r--drivers/gpu/drm/xe/compat-i915-headers/i915_reg_defs.h6
-rw-r--r--drivers/gpu/drm/xe/compat-i915-headers/i915_utils.h13
-rw-r--r--drivers/gpu/drm/xe/compat-i915-headers/i915_vgpu.h18
-rw-r--r--drivers/gpu/drm/xe/compat-i915-headers/i915_vma.h36
-rw-r--r--drivers/gpu/drm/xe/compat-i915-headers/intel_clock_gating.h6
-rw-r--r--drivers/gpu/drm/xe/compat-i915-headers/intel_mchbar_regs.h6
-rw-r--r--drivers/gpu/drm/xe/compat-i915-headers/intel_pci_config.h6
-rw-r--r--drivers/gpu/drm/xe/compat-i915-headers/intel_pcode.h11
-rw-r--r--drivers/gpu/drm/xe/compat-i915-headers/intel_step.h14
-rw-r--r--drivers/gpu/drm/xe/compat-i915-headers/intel_uncore.h162
-rw-r--r--drivers/gpu/drm/xe/compat-i915-headers/intel_uncore_trace.h6
-rw-r--r--drivers/gpu/drm/xe/compat-i915-headers/intel_wakeref.h10
-rw-r--r--drivers/gpu/drm/xe/compat-i915-headers/pxp/intel_pxp.h29
-rw-r--r--drivers/gpu/drm/xe/compat-i915-headers/soc/intel_dram.h6
-rw-r--r--drivers/gpu/drm/xe/compat-i915-headers/soc/intel_gmch.h6
-rw-r--r--drivers/gpu/drm/xe/compat-i915-headers/soc/intel_rom.h6
-rw-r--r--drivers/gpu/drm/xe/compat-i915-headers/vlv_iosf_sb.h42
-rw-r--r--drivers/gpu/drm/xe/compat-i915-headers/vlv_iosf_sb_reg.h6
-rw-r--r--drivers/gpu/drm/xe/display/ext/i915_irq.c85
-rw-r--r--drivers/gpu/drm/xe/display/intel_bo.c103
-rw-r--r--drivers/gpu/drm/xe/display/intel_fb_bo.c91
-rw-r--r--drivers/gpu/drm/xe/display/intel_fbdev_fb.c91
-rw-r--r--drivers/gpu/drm/xe/display/xe_display.c560
-rw-r--r--drivers/gpu/drm/xe/display/xe_display.h71
-rw-r--r--drivers/gpu/drm/xe/display/xe_display_misc.c16
-rw-r--r--drivers/gpu/drm/xe/display/xe_display_rpm.c74
-rw-r--r--drivers/gpu/drm/xe/display/xe_display_rpm.h11
-rw-r--r--drivers/gpu/drm/xe/display/xe_display_wa.c19
-rw-r--r--drivers/gpu/drm/xe/display/xe_dsb_buffer.c79
-rw-r--r--drivers/gpu/drm/xe/display/xe_fb_pin.c482
-rw-r--r--drivers/gpu/drm/xe/display/xe_hdcp_gsc.c213
-rw-r--r--drivers/gpu/drm/xe/display/xe_panic.c102
-rw-r--r--drivers/gpu/drm/xe/display/xe_plane_initial.c319
-rw-r--r--drivers/gpu/drm/xe/display/xe_stolen.c123
-rw-r--r--drivers/gpu/drm/xe/display/xe_tdf.c15
-rw-r--r--drivers/gpu/drm/xe/instructions/xe_alu_commands.h79
-rw-r--r--drivers/gpu/drm/xe/instructions/xe_gfx_state_commands.h18
-rw-r--r--drivers/gpu/drm/xe/instructions/xe_gfxpipe_commands.h164
-rw-r--r--drivers/gpu/drm/xe/instructions/xe_gpu_commands.h77
-rw-r--r--drivers/gpu/drm/xe/instructions/xe_gsc_commands.h36
-rw-r--r--drivers/gpu/drm/xe/instructions/xe_instr_defs.h35
-rw-r--r--drivers/gpu/drm/xe/instructions/xe_mfx_commands.h28
-rw-r--r--drivers/gpu/drm/xe/instructions/xe_mi_commands.h84
-rw-r--r--drivers/gpu/drm/xe/regs/xe_bars.h12
-rw-r--r--drivers/gpu/drm/xe/regs/xe_engine_regs.h220
-rw-r--r--drivers/gpu/drm/xe/regs/xe_eu_stall_regs.h29
-rw-r--r--drivers/gpu/drm/xe/regs/xe_gsc_regs.h58
-rw-r--r--drivers/gpu/drm/xe/regs/xe_gt_regs.h620
-rw-r--r--drivers/gpu/drm/xe/regs/xe_gtt_defs.h37
-rw-r--r--drivers/gpu/drm/xe/regs/xe_guc_regs.h154
-rw-r--r--drivers/gpu/drm/xe/regs/xe_hw_error_regs.h20
-rw-r--r--drivers/gpu/drm/xe/regs/xe_i2c_regs.h23
-rw-r--r--drivers/gpu/drm/xe/regs/xe_irq_regs.h96
-rw-r--r--drivers/gpu/drm/xe/regs/xe_lrc_layout.h43
-rw-r--r--drivers/gpu/drm/xe/regs/xe_mchbar_regs.h48
-rw-r--r--drivers/gpu/drm/xe/regs/xe_oa_regs.h103
-rw-r--r--drivers/gpu/drm/xe/regs/xe_pcode_regs.h27
-rw-r--r--drivers/gpu/drm/xe/regs/xe_pmt.h35
-rw-r--r--drivers/gpu/drm/xe/regs/xe_pxp_regs.h23
-rw-r--r--drivers/gpu/drm/xe/regs/xe_reg_defs.h137
-rw-r--r--drivers/gpu/drm/xe/regs/xe_regs.h65
-rw-r--r--drivers/gpu/drm/xe/tests/Makefile13
-rw-r--r--drivers/gpu/drm/xe/tests/xe_args_test.c221
-rw-r--r--drivers/gpu/drm/xe/tests/xe_bo.c637
-rw-r--r--drivers/gpu/drm/xe/tests/xe_dma_buf.c301
-rw-r--r--drivers/gpu/drm/xe/tests/xe_gt_sriov_pf_config_kunit.c208
-rw-r--r--drivers/gpu/drm/xe/tests/xe_guc_buf_kunit.c333
-rw-r--r--drivers/gpu/drm/xe/tests/xe_guc_db_mgr_test.c201
-rw-r--r--drivers/gpu/drm/xe/tests/xe_guc_g2g_test.c776
-rw-r--r--drivers/gpu/drm/xe/tests/xe_guc_id_mgr_test.c136
-rw-r--r--drivers/gpu/drm/xe/tests/xe_guc_relay_test.c522
-rw-r--r--drivers/gpu/drm/xe/tests/xe_kunit_helpers.c129
-rw-r--r--drivers/gpu/drm/xe/tests/xe_kunit_helpers.h19
-rw-r--r--drivers/gpu/drm/xe/tests/xe_live_test_mod.c25
-rw-r--r--drivers/gpu/drm/xe/tests/xe_lmtt_test.c73
-rw-r--r--drivers/gpu/drm/xe/tests/xe_migrate.c791
-rw-r--r--drivers/gpu/drm/xe/tests/xe_mocs.c204
-rw-r--r--drivers/gpu/drm/xe/tests/xe_pci.c412
-rw-r--r--drivers/gpu/drm/xe/tests/xe_pci_test.c76
-rw-r--r--drivers/gpu/drm/xe/tests/xe_pci_test.h36
-rw-r--r--drivers/gpu/drm/xe/tests/xe_rtp_test.c548
-rw-r--r--drivers/gpu/drm/xe/tests/xe_sriov_pf_service_kunit.c227
-rw-r--r--drivers/gpu/drm/xe/tests/xe_test.h63
-rw-r--r--drivers/gpu/drm/xe/tests/xe_test_mod.c10
-rw-r--r--drivers/gpu/drm/xe/tests/xe_wa_test.c73
-rw-r--r--drivers/gpu/drm/xe/xe_args.h143
-rw-r--r--drivers/gpu/drm/xe/xe_assert.h176
-rw-r--r--drivers/gpu/drm/xe/xe_bb.c148
-rw-r--r--drivers/gpu/drm/xe/xe_bb.h28
-rw-r--r--drivers/gpu/drm/xe/xe_bb_types.h20
-rw-r--r--drivers/gpu/drm/xe/xe_bo.c3666
-rw-r--r--drivers/gpu/drm/xe/xe_bo.h454
-rw-r--r--drivers/gpu/drm/xe/xe_bo_doc.h179
-rw-r--r--drivers/gpu/drm/xe/xe_bo_evict.c352
-rw-r--r--drivers/gpu/drm/xe/xe_bo_evict.h21
-rw-r--r--drivers/gpu/drm/xe/xe_bo_types.h113
-rw-r--r--drivers/gpu/drm/xe/xe_configfs.c1291
-rw-r--r--drivers/gpu/drm/xe/xe_configfs.h47
-rw-r--r--drivers/gpu/drm/xe/xe_debugfs.c440
-rw-r--r--drivers/gpu/drm/xe/xe_debugfs.h17
-rw-r--r--drivers/gpu/drm/xe/xe_dep_job_types.h29
-rw-r--r--drivers/gpu/drm/xe/xe_dep_scheduler.c143
-rw-r--r--drivers/gpu/drm/xe/xe_dep_scheduler.h21
-rw-r--r--drivers/gpu/drm/xe/xe_devcoredump.c522
-rw-r--r--drivers/gpu/drm/xe/xe_devcoredump.h35
-rw-r--r--drivers/gpu/drm/xe/xe_devcoredump_types.h93
-rw-r--r--drivers/gpu/drm/xe/xe_device.c1297
-rw-r--r--drivers/gpu/drm/xe/xe_device.h212
-rw-r--r--drivers/gpu/drm/xe/xe_device_sysfs.c296
-rw-r--r--drivers/gpu/drm/xe/xe_device_sysfs.h13
-rw-r--r--drivers/gpu/drm/xe/xe_device_types.h726
-rw-r--r--drivers/gpu/drm/xe/xe_device_wa_oob.rules5
-rw-r--r--drivers/gpu/drm/xe/xe_dma_buf.c369
-rw-r--r--drivers/gpu/drm/xe/xe_dma_buf.h15
-rw-r--r--drivers/gpu/drm/xe/xe_drm_client.c406
-rw-r--r--drivers/gpu/drm/xe/xe_drm_client.h70
-rw-r--r--drivers/gpu/drm/xe/xe_drv.h22
-rw-r--r--drivers/gpu/drm/xe/xe_eu_stall.c989
-rw-r--r--drivers/gpu/drm/xe/xe_eu_stall.h25
-rw-r--r--drivers/gpu/drm/xe/xe_exec.c367
-rw-r--r--drivers/gpu/drm/xe/xe_exec.h14
-rw-r--r--drivers/gpu/drm/xe/xe_exec_queue.c1251
-rw-r--r--drivers/gpu/drm/xe/xe_exec_queue.h114
-rw-r--r--drivers/gpu/drm/xe/xe_exec_queue_types.h242
-rw-r--r--drivers/gpu/drm/xe/xe_execlist.c495
-rw-r--r--drivers/gpu/drm/xe/xe_execlist.h21
-rw-r--r--drivers/gpu/drm/xe/xe_execlist_types.h51
-rw-r--r--drivers/gpu/drm/xe/xe_force_wake.c261
-rw-r--r--drivers/gpu/drm/xe/xe_force_wake.h64
-rw-r--r--drivers/gpu/drm/xe/xe_force_wake_types.h110
-rw-r--r--drivers/gpu/drm/xe/xe_gen_wa_oob.c212
-rw-r--r--drivers/gpu/drm/xe/xe_ggtt.c1124
-rw-r--r--drivers/gpu/drm/xe/xe_ggtt.h61
-rw-r--r--drivers/gpu/drm/xe/xe_ggtt_types.h85
-rw-r--r--drivers/gpu/drm/xe/xe_gpu_scheduler.c138
-rw-r--r--drivers/gpu/drm/xe/xe_gpu_scheduler.h120
-rw-r--r--drivers/gpu/drm/xe/xe_gpu_scheduler_types.h57
-rw-r--r--drivers/gpu/drm/xe/xe_gsc.c640
-rw-r--r--drivers/gpu/drm/xe/xe_gsc.h27
-rw-r--r--drivers/gpu/drm/xe/xe_gsc_debugfs.c71
-rw-r--r--drivers/gpu/drm/xe/xe_gsc_debugfs.h14
-rw-r--r--drivers/gpu/drm/xe/xe_gsc_proxy.c535
-rw-r--r--drivers/gpu/drm/xe/xe_gsc_proxy.h21
-rw-r--r--drivers/gpu/drm/xe/xe_gsc_submit.c220
-rw-r--r--drivers/gpu/drm/xe/xe_gsc_submit.h32
-rw-r--r--drivers/gpu/drm/xe/xe_gsc_types.h74
-rw-r--r--drivers/gpu/drm/xe/xe_gt.c1069
-rw-r--r--drivers/gpu/drm/xe/xe_gt.h146
-rw-r--r--drivers/gpu/drm/xe/xe_gt_ccs_mode.c202
-rw-r--r--drivers/gpu/drm/xe/xe_gt_ccs_mode.h24
-rw-r--r--drivers/gpu/drm/xe/xe_gt_clock.c88
-rw-r--r--drivers/gpu/drm/xe/xe_gt_clock.h16
-rw-r--r--drivers/gpu/drm/xe/xe_gt_debugfs.c379
-rw-r--r--drivers/gpu/drm/xe/xe_gt_debugfs.h16
-rw-r--r--drivers/gpu/drm/xe/xe_gt_freq.c304
-rw-r--r--drivers/gpu/drm/xe/xe_gt_freq.h13
-rw-r--r--drivers/gpu/drm/xe/xe_gt_idle.c416
-rw-r--r--drivers/gpu/drm/xe/xe_gt_idle.h22
-rw-r--r--drivers/gpu/drm/xe/xe_gt_idle_types.h43
-rw-r--r--drivers/gpu/drm/xe/xe_gt_mcr.c888
-rw-r--r--drivers/gpu/drm/xe/xe_gt_mcr.h74
-rw-r--r--drivers/gpu/drm/xe/xe_gt_printk.h128
-rw-r--r--drivers/gpu/drm/xe/xe_gt_sriov_pf.c283
-rw-r--r--drivers/gpu/drm/xe/xe_gt_sriov_pf.h43
-rw-r--r--drivers/gpu/drm/xe/xe_gt_sriov_pf_config.c2938
-rw-r--r--drivers/gpu/drm/xe/xe_gt_sriov_pf_config.h92
-rw-r--r--drivers/gpu/drm/xe/xe_gt_sriov_pf_config_types.h59
-rw-r--r--drivers/gpu/drm/xe/xe_gt_sriov_pf_control.c2157
-rw-r--r--drivers/gpu/drm/xe/xe_gt_sriov_pf_control.h43
-rw-r--r--drivers/gpu/drm/xe/xe_gt_sriov_pf_control_types.h137
-rw-r--r--drivers/gpu/drm/xe/xe_gt_sriov_pf_debugfs.c622
-rw-r--r--drivers/gpu/drm/xe/xe_gt_sriov_pf_debugfs.h19
-rw-r--r--drivers/gpu/drm/xe/xe_gt_sriov_pf_helpers.h35
-rw-r--r--drivers/gpu/drm/xe/xe_gt_sriov_pf_migration.c1069
-rw-r--r--drivers/gpu/drm/xe/xe_gt_sriov_pf_migration.h54
-rw-r--r--drivers/gpu/drm/xe/xe_gt_sriov_pf_migration_types.h28
-rw-r--r--drivers/gpu/drm/xe/xe_gt_sriov_pf_monitor.c147
-rw-r--r--drivers/gpu/drm/xe/xe_gt_sriov_pf_monitor.h27
-rw-r--r--drivers/gpu/drm/xe/xe_gt_sriov_pf_monitor_types.h22
-rw-r--r--drivers/gpu/drm/xe/xe_gt_sriov_pf_policy.c435
-rw-r--r--drivers/gpu/drm/xe/xe_gt_sriov_pf_policy.h25
-rw-r--r--drivers/gpu/drm/xe/xe_gt_sriov_pf_policy_types.h31
-rw-r--r--drivers/gpu/drm/xe/xe_gt_sriov_pf_service.c425
-rw-r--r--drivers/gpu/drm/xe/xe_gt_sriov_pf_service.h34
-rw-r--r--drivers/gpu/drm/xe/xe_gt_sriov_pf_service_types.h52
-rw-r--r--drivers/gpu/drm/xe/xe_gt_sriov_pf_types.h65
-rw-r--r--drivers/gpu/drm/xe/xe_gt_sriov_printk.h37
-rw-r--r--drivers/gpu/drm/xe/xe_gt_sriov_vf.c1377
-rw-r--r--drivers/gpu/drm/xe/xe_gt_sriov_vf.h43
-rw-r--r--drivers/gpu/drm/xe/xe_gt_sriov_vf_debugfs.c72
-rw-r--r--drivers/gpu/drm/xe/xe_gt_sriov_vf_debugfs.h14
-rw-r--r--drivers/gpu/drm/xe/xe_gt_sriov_vf_types.h81
-rw-r--r--drivers/gpu/drm/xe/xe_gt_stats.c101
-rw-r--r--drivers/gpu/drm/xe/xe_gt_stats.h26
-rw-r--r--drivers/gpu/drm/xe/xe_gt_stats_types.h51
-rw-r--r--drivers/gpu/drm/xe/xe_gt_sysfs.c55
-rw-r--r--drivers/gpu/drm/xe/xe_gt_sysfs.h19
-rw-r--r--drivers/gpu/drm/xe/xe_gt_sysfs_types.h26
-rw-r--r--drivers/gpu/drm/xe/xe_gt_throttle.c270
-rw-r--r--drivers/gpu/drm/xe/xe_gt_throttle.h17
-rw-r--r--drivers/gpu/drm/xe/xe_gt_topology.c372
-rw-r--r--drivers/gpu/drm/xe/xe_gt_topology.h56
-rw-r--r--drivers/gpu/drm/xe/xe_gt_types.h370
-rw-r--r--drivers/gpu/drm/xe/xe_guard.h119
-rw-r--r--drivers/gpu/drm/xe/xe_guc.c1719
-rw-r--r--drivers/gpu/drm/xe/xe_guc.h97
-rw-r--r--drivers/gpu/drm/xe/xe_guc_ads.c1013
-rw-r--r--drivers/gpu/drm/xe/xe_guc_ads.h18
-rw-r--r--drivers/gpu/drm/xe/xe_guc_ads_types.h29
-rw-r--r--drivers/gpu/drm/xe/xe_guc_buf.c207
-rw-r--r--drivers/gpu/drm/xe/xe_guc_buf.h49
-rw-r--r--drivers/gpu/drm/xe/xe_guc_buf_types.h28
-rw-r--r--drivers/gpu/drm/xe/xe_guc_capture.c2044
-rw-r--r--drivers/gpu/drm/xe/xe_guc_capture.h61
-rw-r--r--drivers/gpu/drm/xe/xe_guc_capture_types.h70
-rw-r--r--drivers/gpu/drm/xe/xe_guc_ct.c2148
-rw-r--r--drivers/gpu/drm/xe/xe_guc_ct.h86
-rw-r--r--drivers/gpu/drm/xe/xe_guc_ct_types.h173
-rw-r--r--drivers/gpu/drm/xe/xe_guc_db_mgr.c267
-rw-r--r--drivers/gpu/drm/xe/xe_guc_db_mgr.h22
-rw-r--r--drivers/gpu/drm/xe/xe_guc_debugfs.c146
-rw-r--r--drivers/gpu/drm/xe/xe_guc_debugfs.h14
-rw-r--r--drivers/gpu/drm/xe/xe_guc_engine_activity.c521
-rw-r--r--drivers/gpu/drm/xe/xe_guc_engine_activity.h22
-rw-r--r--drivers/gpu/drm/xe/xe_guc_engine_activity_types.h102
-rw-r--r--drivers/gpu/drm/xe/xe_guc_exec_queue_types.h71
-rw-r--r--drivers/gpu/drm/xe/xe_guc_fwif.h366
-rw-r--r--drivers/gpu/drm/xe/xe_guc_hwconfig.c202
-rw-r--r--drivers/gpu/drm/xe/xe_guc_hwconfig.h20
-rw-r--r--drivers/gpu/drm/xe/xe_guc_hxg_helpers.h108
-rw-r--r--drivers/gpu/drm/xe/xe_guc_id_mgr.c280
-rw-r--r--drivers/gpu/drm/xe/xe_guc_id_mgr.h22
-rw-r--r--drivers/gpu/drm/xe/xe_guc_klv_helpers.c148
-rw-r--r--drivers/gpu/drm/xe/xe_guc_klv_helpers.h64
-rw-r--r--drivers/gpu/drm/xe/xe_guc_klv_thresholds_set.h71
-rw-r--r--drivers/gpu/drm/xe/xe_guc_klv_thresholds_set_types.h68
-rw-r--r--drivers/gpu/drm/xe/xe_guc_log.c375
-rw-r--r--drivers/gpu/drm/xe/xe_guc_log.h61
-rw-r--r--drivers/gpu/drm/xe/xe_guc_log_types.h57
-rw-r--r--drivers/gpu/drm/xe/xe_guc_pagefault.c95
-rw-r--r--drivers/gpu/drm/xe/xe_guc_pagefault.h15
-rw-r--r--drivers/gpu/drm/xe/xe_guc_pc.c1439
-rw-r--r--drivers/gpu/drm/xe/xe_guc_pc.h46
-rw-r--r--drivers/gpu/drm/xe/xe_guc_pc_types.h40
-rw-r--r--drivers/gpu/drm/xe/xe_guc_relay.c967
-rw-r--r--drivers/gpu/drm/xe/xe_guc_relay.h37
-rw-r--r--drivers/gpu/drm/xe/xe_guc_relay_types.h40
-rw-r--r--drivers/gpu/drm/xe/xe_guc_submit.c2986
-rw-r--r--drivers/gpu/drm/xe/xe_guc_submit.h52
-rw-r--r--drivers/gpu/drm/xe/xe_guc_submit_types.h144
-rw-r--r--drivers/gpu/drm/xe/xe_guc_tlb_inval.c242
-rw-r--r--drivers/gpu/drm/xe/xe_guc_tlb_inval.h19
-rw-r--r--drivers/gpu/drm/xe/xe_guc_types.h129
-rw-r--r--drivers/gpu/drm/xe/xe_heci_gsc.c249
-rw-r--r--drivers/gpu/drm/xe/xe_heci_gsc.h40
-rw-r--r--drivers/gpu/drm/xe/xe_huc.c318
-rw-r--r--drivers/gpu/drm/xe/xe_huc.h28
-rw-r--r--drivers/gpu/drm/xe/xe_huc_debugfs.c71
-rw-r--r--drivers/gpu/drm/xe/xe_huc_debugfs.h14
-rw-r--r--drivers/gpu/drm/xe/xe_huc_types.h24
-rw-r--r--drivers/gpu/drm/xe/xe_hw_engine.c1111
-rw-r--r--drivers/gpu/drm/xe/xe_hw_engine.h82
-rw-r--r--drivers/gpu/drm/xe/xe_hw_engine_class_sysfs.c685
-rw-r--r--drivers/gpu/drm/xe/xe_hw_engine_class_sysfs.h43
-rw-r--r--drivers/gpu/drm/xe/xe_hw_engine_group.c344
-rw-r--r--drivers/gpu/drm/xe/xe_hw_engine_group.h29
-rw-r--r--drivers/gpu/drm/xe/xe_hw_engine_group_types.h51
-rw-r--r--drivers/gpu/drm/xe/xe_hw_engine_types.h185
-rw-r--r--drivers/gpu/drm/xe/xe_hw_error.c182
-rw-r--r--drivers/gpu/drm/xe/xe_hw_error.h15
-rw-r--r--drivers/gpu/drm/xe/xe_hw_fence.c268
-rw-r--r--drivers/gpu/drm/xe/xe_hw_fence.h33
-rw-r--r--drivers/gpu/drm/xe/xe_hw_fence_types.h75
-rw-r--r--drivers/gpu/drm/xe/xe_hwmon.c1334
-rw-r--r--drivers/gpu/drm/xe/xe_hwmon.h19
-rw-r--r--drivers/gpu/drm/xe/xe_i2c.c372
-rw-r--r--drivers/gpu/drm/xe/xe_i2c.h68
-rw-r--r--drivers/gpu/drm/xe/xe_irq.c1050
-rw-r--r--drivers/gpu/drm/xe/xe_irq.h26
-rw-r--r--drivers/gpu/drm/xe/xe_late_bind_fw.c464
-rw-r--r--drivers/gpu/drm/xe/xe_late_bind_fw.h17
-rw-r--r--drivers/gpu/drm/xe/xe_late_bind_fw_types.h75
-rw-r--r--drivers/gpu/drm/xe/xe_lmtt.c573
-rw-r--r--drivers/gpu/drm/xe/xe_lmtt.h28
-rw-r--r--drivers/gpu/drm/xe/xe_lmtt_2l.c150
-rw-r--r--drivers/gpu/drm/xe/xe_lmtt_ml.c161
-rw-r--r--drivers/gpu/drm/xe/xe_lmtt_types.h63
-rw-r--r--drivers/gpu/drm/xe/xe_lrc.c2414
-rw-r--r--drivers/gpu/drm/xe/xe_lrc.h163
-rw-r--r--drivers/gpu/drm/xe/xe_lrc_types.h61
-rw-r--r--drivers/gpu/drm/xe/xe_macros.h22
-rw-r--r--drivers/gpu/drm/xe/xe_map.h93
-rw-r--r--drivers/gpu/drm/xe/xe_memirq.c547
-rw-r--r--drivers/gpu/drm/xe/xe_memirq.h30
-rw-r--r--drivers/gpu/drm/xe/xe_memirq_types.h37
-rw-r--r--drivers/gpu/drm/xe/xe_migrate.c2470
-rw-r--r--drivers/gpu/drm/xe/xe_migrate.h174
-rw-r--r--drivers/gpu/drm/xe/xe_migrate_doc.h88
-rw-r--r--drivers/gpu/drm/xe/xe_mmio.c410
-rw-r--r--drivers/gpu/drm/xe/xe_mmio.h49
-rw-r--r--drivers/gpu/drm/xe/xe_mmio_gem.c226
-rw-r--r--drivers/gpu/drm/xe/xe_mmio_gem.h20
-rw-r--r--drivers/gpu/drm/xe/xe_mocs.c838
-rw-r--r--drivers/gpu/drm/xe/xe_mocs.h16
-rw-r--r--drivers/gpu/drm/xe/xe_module.c183
-rw-r--r--drivers/gpu/drm/xe/xe_module.h31
-rw-r--r--drivers/gpu/drm/xe/xe_nvm.c170
-rw-r--r--drivers/gpu/drm/xe/xe_nvm.h15
-rw-r--r--drivers/gpu/drm/xe/xe_oa.c2816
-rw-r--r--drivers/gpu/drm/xe/xe_oa.h25
-rw-r--r--drivers/gpu/drm/xe/xe_oa_types.h271
-rw-r--r--drivers/gpu/drm/xe/xe_observation.c106
-rw-r--r--drivers/gpu/drm/xe/xe_observation.h20
-rw-r--r--drivers/gpu/drm/xe/xe_pagefault.c444
-rw-r--r--drivers/gpu/drm/xe/xe_pagefault.h19
-rw-r--r--drivers/gpu/drm/xe/xe_pagefault_types.h136
-rw-r--r--drivers/gpu/drm/xe/xe_pat.c580
-rw-r--r--drivers/gpu/drm/xe/xe_pat.h61
-rw-r--r--drivers/gpu/drm/xe/xe_pci.c1255
-rw-r--r--drivers/gpu/drm/xe/xe_pci.h15
-rw-r--r--drivers/gpu/drm/xe/xe_pci_sriov.c266
-rw-r--r--drivers/gpu/drm/xe/xe_pci_sriov.h21
-rw-r--r--drivers/gpu/drm/xe/xe_pci_types.h82
-rw-r--r--drivers/gpu/drm/xe/xe_pcode.c380
-rw-r--r--drivers/gpu/drm/xe/xe_pcode.h45
-rw-r--r--drivers/gpu/drm/xe/xe_pcode_api.h94
-rw-r--r--drivers/gpu/drm/xe/xe_platform_types.h42
-rw-r--r--drivers/gpu/drm/xe/xe_pm.c980
-rw-r--r--drivers/gpu/drm/xe/xe_pm.h57
-rw-r--r--drivers/gpu/drm/xe/xe_pmu.c600
-rw-r--r--drivers/gpu/drm/xe/xe_pmu.h18
-rw-r--r--drivers/gpu/drm/xe/xe_pmu_types.h39
-rw-r--r--drivers/gpu/drm/xe/xe_preempt_fence.c184
-rw-r--r--drivers/gpu/drm/xe/xe_preempt_fence.h61
-rw-r--r--drivers/gpu/drm/xe/xe_preempt_fence_types.h34
-rw-r--r--drivers/gpu/drm/xe/xe_printk.h129
-rw-r--r--drivers/gpu/drm/xe/xe_psmi.c294
-rw-r--r--drivers/gpu/drm/xe/xe_psmi.h14
-rw-r--r--drivers/gpu/drm/xe/xe_pt.c2565
-rw-r--r--drivers/gpu/drm/xe/xe_pt.h52
-rw-r--r--drivers/gpu/drm/xe/xe_pt_types.h124
-rw-r--r--drivers/gpu/drm/xe/xe_pt_walk.c161
-rw-r--r--drivers/gpu/drm/xe/xe_pt_walk.h152
-rw-r--r--drivers/gpu/drm/xe/xe_pxp.c949
-rw-r--r--drivers/gpu/drm/xe/xe_pxp.h35
-rw-r--r--drivers/gpu/drm/xe/xe_pxp_debugfs.c129
-rw-r--r--drivers/gpu/drm/xe/xe_pxp_debugfs.h13
-rw-r--r--drivers/gpu/drm/xe/xe_pxp_submit.c602
-rw-r--r--drivers/gpu/drm/xe/xe_pxp_submit.h22
-rw-r--r--drivers/gpu/drm/xe/xe_pxp_types.h135
-rw-r--r--drivers/gpu/drm/xe/xe_query.c812
-rw-r--r--drivers/gpu/drm/xe/xe_query.h14
-rw-r--r--drivers/gpu/drm/xe/xe_range_fence.c161
-rw-r--r--drivers/gpu/drm/xe/xe_range_fence.h75
-rw-r--r--drivers/gpu/drm/xe/xe_reg_sr.c216
-rw-r--r--drivers/gpu/drm/xe/xe_reg_sr.h28
-rw-r--r--drivers/gpu/drm/xe/xe_reg_sr_types.h31
-rw-r--r--drivers/gpu/drm/xe/xe_reg_whitelist.c221
-rw-r--r--drivers/gpu/drm/xe/xe_reg_whitelist.h23
-rw-r--r--drivers/gpu/drm/xe/xe_res_cursor.h356
-rw-r--r--drivers/gpu/drm/xe/xe_ring_ops.c525
-rw-r--r--drivers/gpu/drm/xe/xe_ring_ops.h17
-rw-r--r--drivers/gpu/drm/xe/xe_ring_ops_types.h22
-rw-r--r--drivers/gpu/drm/xe/xe_rtp.c387
-rw-r--r--drivers/gpu/drm/xe/xe_rtp.h506
-rw-r--r--drivers/gpu/drm/xe/xe_rtp_helpers.h83
-rw-r--r--drivers/gpu/drm/xe/xe_rtp_types.h131
-rw-r--r--drivers/gpu/drm/xe/xe_sa.c151
-rw-r--r--drivers/gpu/drm/xe/xe_sa.h72
-rw-r--r--drivers/gpu/drm/xe/xe_sa_types.h19
-rw-r--r--drivers/gpu/drm/xe/xe_sched_job.c359
-rw-r--r--drivers/gpu/drm/xe/xe_sched_job.h95
-rw-r--r--drivers/gpu/drm/xe/xe_sched_job_types.h79
-rw-r--r--drivers/gpu/drm/xe/xe_shrinker.c306
-rw-r--r--drivers/gpu/drm/xe/xe_shrinker.h16
-rw-r--r--drivers/gpu/drm/xe/xe_sriov.c176
-rw-r--r--drivers/gpu/drm/xe/xe_sriov.h48
-rw-r--r--drivers/gpu/drm/xe/xe_sriov_packet.c520
-rw-r--r--drivers/gpu/drm/xe/xe_sriov_packet.h30
-rw-r--r--drivers/gpu/drm/xe/xe_sriov_packet_types.h75
-rw-r--r--drivers/gpu/drm/xe/xe_sriov_pf.c283
-rw-r--r--drivers/gpu/drm/xe/xe_sriov_pf.h31
-rw-r--r--drivers/gpu/drm/xe/xe_sriov_pf_control.c279
-rw-r--r--drivers/gpu/drm/xe/xe_sriov_pf_control.h22
-rw-r--r--drivers/gpu/drm/xe/xe_sriov_pf_debugfs.c395
-rw-r--r--drivers/gpu/drm/xe/xe_sriov_pf_debugfs.h18
-rw-r--r--drivers/gpu/drm/xe/xe_sriov_pf_helpers.h73
-rw-r--r--drivers/gpu/drm/xe/xe_sriov_pf_migration.c365
-rw-r--r--drivers/gpu/drm/xe/xe_sriov_pf_migration.h30
-rw-r--r--drivers/gpu/drm/xe/xe_sriov_pf_migration_types.h37
-rw-r--r--drivers/gpu/drm/xe/xe_sriov_pf_provision.c438
-rw-r--r--drivers/gpu/drm/xe/xe_sriov_pf_provision.h45
-rw-r--r--drivers/gpu/drm/xe/xe_sriov_pf_provision_types.h36
-rw-r--r--drivers/gpu/drm/xe/xe_sriov_pf_service.c216
-rw-r--r--drivers/gpu/drm/xe/xe_sriov_pf_service.h23
-rw-r--r--drivers/gpu/drm/xe/xe_sriov_pf_service_types.h36
-rw-r--r--drivers/gpu/drm/xe/xe_sriov_pf_sysfs.c647
-rw-r--r--drivers/gpu/drm/xe/xe_sriov_pf_sysfs.h16
-rw-r--r--drivers/gpu/drm/xe/xe_sriov_pf_types.h70
-rw-r--r--drivers/gpu/drm/xe/xe_sriov_printk.h46
-rw-r--r--drivers/gpu/drm/xe/xe_sriov_types.h40
-rw-r--r--drivers/gpu/drm/xe/xe_sriov_vf.c211
-rw-r--r--drivers/gpu/drm/xe/xe_sriov_vf.h20
-rw-r--r--drivers/gpu/drm/xe/xe_sriov_vf_ccs.c480
-rw-r--r--drivers/gpu/drm/xe/xe_sriov_vf_ccs.h35
-rw-r--r--drivers/gpu/drm/xe/xe_sriov_vf_ccs_types.h51
-rw-r--r--drivers/gpu/drm/xe/xe_sriov_vf_types.h47
-rw-r--r--drivers/gpu/drm/xe/xe_sriov_vfio.c80
-rw-r--r--drivers/gpu/drm/xe/xe_step.c259
-rw-r--r--drivers/gpu/drm/xe/xe_step.h23
-rw-r--r--drivers/gpu/drm/xe/xe_step_types.h76
-rw-r--r--drivers/gpu/drm/xe/xe_survivability_mode.c377
-rw-r--r--drivers/gpu/drm/xe/xe_survivability_mode.h18
-rw-r--r--drivers/gpu/drm/xe/xe_survivability_mode_types.h43
-rw-r--r--drivers/gpu/drm/xe/xe_svm.c1537
-rw-r--r--drivers/gpu/drm/xe/xe_svm.h389
-rw-r--r--drivers/gpu/drm/xe/xe_sync.c406
-rw-r--r--drivers/gpu/drm/xe/xe_sync.h47
-rw-r--r--drivers/gpu/drm/xe/xe_sync_types.h31
-rw-r--r--drivers/gpu/drm/xe/xe_tile.c217
-rw-r--r--drivers/gpu/drm/xe/xe_tile.h26
-rw-r--r--drivers/gpu/drm/xe/xe_tile_debugfs.c142
-rw-r--r--drivers/gpu/drm/xe/xe_tile_debugfs.h16
-rw-r--r--drivers/gpu/drm/xe/xe_tile_printk.h127
-rw-r--r--drivers/gpu/drm/xe/xe_tile_sriov_pf_debugfs.c253
-rw-r--r--drivers/gpu/drm/xe/xe_tile_sriov_pf_debugfs.h15
-rw-r--r--drivers/gpu/drm/xe/xe_tile_sriov_printk.h33
-rw-r--r--drivers/gpu/drm/xe/xe_tile_sriov_vf.c350
-rw-r--r--drivers/gpu/drm/xe/xe_tile_sriov_vf.h23
-rw-r--r--drivers/gpu/drm/xe/xe_tile_sriov_vf_types.h23
-rw-r--r--drivers/gpu/drm/xe/xe_tile_sysfs.c61
-rw-r--r--drivers/gpu/drm/xe/xe_tile_sysfs.h19
-rw-r--r--drivers/gpu/drm/xe/xe_tile_sysfs_types.h27
-rw-r--r--drivers/gpu/drm/xe/xe_tlb_inval.c433
-rw-r--r--drivers/gpu/drm/xe/xe_tlb_inval.h46
-rw-r--r--drivers/gpu/drm/xe/xe_tlb_inval_job.c285
-rw-r--r--drivers/gpu/drm/xe/xe_tlb_inval_job.h34
-rw-r--r--drivers/gpu/drm/xe/xe_tlb_inval_types.h130
-rw-r--r--drivers/gpu/drm/xe/xe_trace.c9
-rw-r--r--drivers/gpu/drm/xe/xe_trace.h474
-rw-r--r--drivers/gpu/drm/xe/xe_trace_bo.c9
-rw-r--r--drivers/gpu/drm/xe/xe_trace_bo.h263
-rw-r--r--drivers/gpu/drm/xe/xe_trace_guc.c9
-rw-r--r--drivers/gpu/drm/xe/xe_trace_guc.h159
-rw-r--r--drivers/gpu/drm/xe/xe_trace_lrc.c9
-rw-r--r--drivers/gpu/drm/xe/xe_trace_lrc.h52
-rw-r--r--drivers/gpu/drm/xe/xe_ttm_stolen_mgr.c351
-rw-r--r--drivers/gpu/drm/xe/xe_ttm_stolen_mgr.h21
-rw-r--r--drivers/gpu/drm/xe/xe_ttm_sys_mgr.c120
-rw-r--r--drivers/gpu/drm/xe/xe_ttm_sys_mgr.h13
-rw-r--r--drivers/gpu/drm/xe/xe_ttm_vram_mgr.c480
-rw-r--r--drivers/gpu/drm/xe/xe_ttm_vram_mgr.h46
-rw-r--r--drivers/gpu/drm/xe/xe_ttm_vram_mgr_types.h48
-rw-r--r--drivers/gpu/drm/xe/xe_tuning.c247
-rw-r--r--drivers/gpu/drm/xe/xe_tuning.h19
-rw-r--r--drivers/gpu/drm/xe/xe_uc.c316
-rw-r--r--drivers/gpu/drm/xe/xe_uc.h25
-rw-r--r--drivers/gpu/drm/xe/xe_uc_debugfs.c30
-rw-r--r--drivers/gpu/drm/xe/xe_uc_debugfs.h14
-rw-r--r--drivers/gpu/drm/xe/xe_uc_fw.c956
-rw-r--r--drivers/gpu/drm/xe/xe_uc_fw.h193
-rw-r--r--drivers/gpu/drm/xe/xe_uc_fw_abi.h405
-rw-r--r--drivers/gpu/drm/xe/xe_uc_fw_types.h155
-rw-r--r--drivers/gpu/drm/xe/xe_uc_types.h28
-rw-r--r--drivers/gpu/drm/xe/xe_userptr.c322
-rw-r--r--drivers/gpu/drm/xe/xe_userptr.h107
-rw-r--r--drivers/gpu/drm/xe/xe_validation.c278
-rw-r--r--drivers/gpu/drm/xe/xe_validation.h192
-rw-r--r--drivers/gpu/drm/xe/xe_vm.c4410
-rw-r--r--drivers/gpu/drm/xe/xe_vm.h414
-rw-r--r--drivers/gpu/drm/xe/xe_vm_doc.h555
-rw-r--r--drivers/gpu/drm/xe/xe_vm_madvise.c431
-rw-r--r--drivers/gpu/drm/xe/xe_vm_madvise.h15
-rw-r--r--drivers/gpu/drm/xe/xe_vm_types.h477
-rw-r--r--drivers/gpu/drm/xe/xe_vram.c462
-rw-r--r--drivers/gpu/drm/xe/xe_vram.h25
-rw-r--r--drivers/gpu/drm/xe/xe_vram_freq.c124
-rw-r--r--drivers/gpu/drm/xe/xe_vram_freq.h13
-rw-r--r--drivers/gpu/drm/xe/xe_vram_types.h85
-rw-r--r--drivers/gpu/drm/xe/xe_vsec.c225
-rw-r--r--drivers/gpu/drm/xe/xe_vsec.h15
-rw-r--r--drivers/gpu/drm/xe/xe_wa.c1154
-rw-r--r--drivers/gpu/drm/xe/xe_wa.h53
-rw-r--r--drivers/gpu/drm/xe/xe_wa_oob.rules78
-rw-r--r--drivers/gpu/drm/xe/xe_wait_user_fence.c183
-rw-r--r--drivers/gpu/drm/xe/xe_wait_user_fence.h15
-rw-r--r--drivers/gpu/drm/xe/xe_wopcm.c273
-rw-r--r--drivers/gpu/drm/xe/xe_wopcm.h16
-rw-r--r--drivers/gpu/drm/xe/xe_wopcm_types.h26
-rw-r--r--drivers/gpu/drm/xen/Kconfig10
-rw-r--r--drivers/gpu/drm/xen/xen_drm_front.c38
-rw-r--r--drivers/gpu/drm/xen/xen_drm_front.h9
-rw-r--r--drivers/gpu/drm/xen/xen_drm_front_conn.c1
-rw-r--r--drivers/gpu/drm/xen/xen_drm_front_conn.h1
-rw-r--r--drivers/gpu/drm/xen/xen_drm_front_evtchnl.c43
-rw-r--r--drivers/gpu/drm/xen/xen_drm_front_gem.c117
-rw-r--r--drivers/gpu/drm/xen/xen_drm_front_gem.h13
-rw-r--r--drivers/gpu/drm/xen/xen_drm_front_kms.c7
-rw-r--r--drivers/gpu/drm/xlnx/Kconfig19
-rw-r--r--drivers/gpu/drm/xlnx/Makefile3
-rw-r--r--drivers/gpu/drm/xlnx/zynqmp_disp.c1083
-rw-r--r--drivers/gpu/drm/xlnx/zynqmp_disp.h41
-rw-r--r--drivers/gpu/drm/xlnx/zynqmp_disp_regs.h15
-rw-r--r--drivers/gpu/drm/xlnx/zynqmp_dp.c1422
-rw-r--r--drivers/gpu/drm/xlnx/zynqmp_dp.h11
-rw-r--r--drivers/gpu/drm/xlnx/zynqmp_dp_audio.c448
-rw-r--r--drivers/gpu/drm/xlnx/zynqmp_dpsub.c295
-rw-r--r--drivers/gpu/drm/xlnx/zynqmp_dpsub.h59
-rw-r--r--drivers/gpu/drm/xlnx/zynqmp_kms.c548
-rw-r--r--drivers/gpu/drm/xlnx/zynqmp_kms.h46
-rw-r--r--drivers/gpu/drm/zte/Kconfig11
-rw-r--r--drivers/gpu/drm/zte/Makefile10
-rw-r--r--drivers/gpu/drm/zte/zx_common_regs.h28
-rw-r--r--drivers/gpu/drm/zte/zx_drm_drv.c190
-rw-r--r--drivers/gpu/drm/zte/zx_drm_drv.h34
-rw-r--r--drivers/gpu/drm/zte/zx_hdmi.c760
-rw-r--r--drivers/gpu/drm/zte/zx_hdmi_regs.h66
-rw-r--r--drivers/gpu/drm/zte/zx_plane.c528
-rw-r--r--drivers/gpu/drm/zte/zx_plane.h26
-rw-r--r--drivers/gpu/drm/zte/zx_plane_regs.h120
-rw-r--r--drivers/gpu/drm/zte/zx_tvenc.c400
-rw-r--r--drivers/gpu/drm/zte/zx_tvenc_regs.h27
-rw-r--r--drivers/gpu/drm/zte/zx_vga.c527
-rw-r--r--drivers/gpu/drm/zte/zx_vga_regs.h33
-rw-r--r--drivers/gpu/drm/zte/zx_vou.c921
-rw-r--r--drivers/gpu/drm/zte/zx_vou.h64
-rw-r--r--drivers/gpu/drm/zte/zx_vou_regs.h212
6787 files changed, 4013164 insertions, 472001 deletions
diff --git a/drivers/gpu/drm/Kconfig b/drivers/gpu/drm/Kconfig
index e392a90ca687..7e6bc0b3a589 100644
--- a/drivers/gpu/drm/Kconfig
+++ b/drivers/gpu/drm/Kconfig
@@ -10,14 +10,13 @@ menuconfig DRM
depends on (AGP || AGP=n) && !EMULATED_CMPXCHG && HAS_DMA
select DRM_PANEL_ORIENTATION_QUIRKS
select HDMI
- select FB_CMDLINE
select I2C
- select I2C_ALGOBIT
select DMA_SHARED_BUFFER
select SYNC_FILE
# gallium uses SYS_kcmp for os_same_file_description() to de-duplicate
# device and dmabuf fd. Let's make sure that is available for our userspace.
select KCMP
+ select VIDEO
help
Kernel-level support for the Direct Rendering Infrastructure (DRI)
introduced in XFree86 4.0. If you say Y here, you need to select
@@ -27,81 +26,115 @@ menuconfig DRM
details. You should also select and configure AGP
(/dev/agpgart) support if it is available for your platform.
+menu "DRM debugging options"
+depends on DRM
+source "drivers/gpu/drm/Kconfig.debug"
+endmenu
+
+if DRM
+
config DRM_MIPI_DBI
tristate
depends on DRM
+ select DRM_KMS_HELPER
config DRM_MIPI_DSI
bool
depends on DRM
-config DRM_DP_AUX_CHARDEV
- bool "DRM DP AUX Interface"
+config DRM_KMS_HELPER
+ tristate
depends on DRM
+ select FB_CORE if DRM_FBDEV_EMULATION
help
- Choose this option to enable a /dev/drm_dp_auxN node that allows to
- read and write values to arbitrary DPCD registers on the DP aux
- channel.
-
-config DRM_DEBUG_MM
- bool "Insert extra checks and debug info into the DRM range managers"
- default n
- depends on DRM=y
- depends on STACKTRACE_SUPPORT
- select STACKDEPOT
- help
- Enable allocation tracking of memory manager and leak detection on
- shutdown.
-
- Recommended for driver developers only.
+ CRTC helpers for KMS drivers.
- If in doubt, say "N".
+config DRM_DRAW
+ bool
+ depends on DRM
-config DRM_DEBUG_SELFTEST
- tristate "kselftests for DRM"
+config DRM_PANIC
+ bool "Display a user-friendly message when a kernel panic occurs"
depends on DRM
- depends on DEBUG_KERNEL
- select PRIME_NUMBERS
- select DRM_LIB_RANDOM
- select DRM_KMS_HELPER
- select DRM_EXPORT_FOR_TESTS if m
- default n
+ select FONT_SUPPORT
+ select DRM_DRAW
help
- This option provides kernel modules that can be used to run
- various selftests on parts of the DRM api. This option is not
- useful for distributions or general kernels, but only for kernel
- developers working on DRM and associated drivers.
-
+ Enable a drm panic handler, which will display a user-friendly message
+ when a kernel panic occurs. It's useful when using a user-space
+ console instead of fbcon.
+ It will only work if your graphic driver supports this feature.
+ To support Hi-DPI Display, you can enable bigger fonts like
+ FONT_TER16x32
+
+config DRM_PANIC_FOREGROUND_COLOR
+ hex "Drm panic screen foreground color, in RGB"
+ depends on DRM_PANIC
+ default 0xffffff
+
+config DRM_PANIC_BACKGROUND_COLOR
+ hex "Drm panic screen background color, in RGB"
+ depends on DRM_PANIC
+ default 0x000000
+
+config DRM_PANIC_DEBUG
+ bool "Add a debug fs entry to trigger drm_panic"
+ depends on DRM_PANIC && DEBUG_FS
+ help
+ Add dri/[device]/drm_panic_plane_x in the kernel debugfs, to force the
+ panic handler to write the panic message to this plane scanout buffer.
+ This is unsafe and should not be enabled on a production build.
If in doubt, say "N".
-config DRM_KMS_HELPER
- tristate
- depends on DRM
+config DRM_PANIC_SCREEN
+ string "Panic screen formatter"
+ default "user"
+ depends on DRM_PANIC
help
- CRTC helpers for KMS drivers.
-
-config DRM_KMS_FB_HELPER
- bool
- depends on DRM_KMS_HELPER
- select FB
- select FRAMEBUFFER_CONSOLE if !EXPERT
- select FRAMEBUFFER_CONSOLE_DETECT_PRIMARY if FRAMEBUFFER_CONSOLE
- select FB_SYS_FOPS
- select FB_SYS_FILLRECT
- select FB_SYS_COPYAREA
- select FB_SYS_IMAGEBLIT
- select FB_CFB_FILLRECT
- select FB_CFB_COPYAREA
- select FB_CFB_IMAGEBLIT
- select FB_DEFERRED_IO
+ This option enable to choose what will be displayed when a kernel
+ panic occurs. You can choose between "user", a short message telling
+ the user to reboot the system, or "kmsg" which will display the last
+ lines of kmsg.
+ This can also be overridden by drm.panic_screen=xxxx kernel parameter
+ or by writing to /sys/module/drm/parameters/panic_screen sysfs entry
+ Default is "user"
+
+config DRM_PANIC_SCREEN_QR_CODE
+ bool "Add a panic screen with a QR code"
+ depends on DRM_PANIC && RUST
+ select ZLIB_DEFLATE
+ help
+ This option adds a QR code generator, and a panic screen with a QR
+ code. The QR code will contain the last lines of kmsg and other debug
+ information. This should be easier for the user to report a kernel
+ panic, with all debug information available.
+ To use this panic screen, also set DRM_PANIC_SCREEN to "qr_code"
+
+config DRM_PANIC_SCREEN_QR_CODE_URL
+ string "Base URL of the QR code in the panic screen"
+ depends on DRM_PANIC_SCREEN_QR_CODE
help
- FBDEV helpers for KMS drivers.
+ This option sets the base URL to report the kernel panic. If it's set
+ the QR code will contain the URL and the kmsg compressed with zlib as
+ a URL parameter. If it's empty, the QR code will contain the kmsg as
+ uncompressed text only.
+ There is a demo code in javascript, to decode and uncompress the kmsg
+ data from the URL parameter at https://github.com/kdj0c/panic_report
+
+config DRM_PANIC_SCREEN_QR_VERSION
+ int "Maximum version (size) of the QR code."
+ depends on DRM_PANIC_SCREEN_QR_CODE
+ default 40
+ help
+ This option limits the version (or size) of the QR code. QR code
+ version ranges from Version 1 (21x21) to Version 40 (177x177).
+ Smaller QR code are easier to read, but will contain less debugging
+ data. Default is 40.
config DRM_DEBUG_DP_MST_TOPOLOGY_REFS
bool "Enable refcount backtrace history in the DP MST helpers"
depends on STACKTRACE_SUPPORT
select STACKDEPOT
- depends on DRM_KMS_HELPER
+ select DRM_KMS_HELPER
depends on DEBUG_KERNEL
depends on EXPERT
help
@@ -112,47 +145,31 @@ config DRM_DEBUG_DP_MST_TOPOLOGY_REFS
This has the potential to use a lot of memory and print some very
large kernel messages. If in doubt, say "N".
-config DRM_FBDEV_EMULATION
- bool "Enable legacy fbdev support for your modesetting driver"
- depends on DRM
- select DRM_KMS_HELPER
- select DRM_KMS_FB_HELPER
- default y
+config DRM_DEBUG_MODESET_LOCK
+ bool "Enable backtrace history for lock contention"
+ depends on STACKTRACE_SUPPORT
+ depends on DEBUG_KERNEL
+ depends on EXPERT
+ select STACKDEPOT
+ default y if DEBUG_WW_MUTEX_SLOWPATH
help
- Choose this option if you have a need for the legacy fbdev
- support. Note that this support also provides the linux console
- support on top of your modesetting driver.
+ Enable debug tracing of failures to gracefully handle drm modeset lock
+ contention. A history of each drm modeset lock path hitting -EDEADLK
+ will be saved until gracefully handled, and the backtrace will be
+ printed when attempting to lock a contended lock.
- If in doubt, say "Y".
-
-config DRM_FBDEV_OVERALLOC
- int "Overallocation of the fbdev buffer"
- depends on DRM_FBDEV_EMULATION
- default 100
- help
- Defines the fbdev buffer overallocation in percent. Default
- is 100. Typical values for double buffering will be 200,
- triple buffering 300.
+ If in doubt, say "N".
-config DRM_FBDEV_LEAK_PHYS_SMEM
- bool "Shamelessly allow leaking of fbdev physical address (DANGEROUS)"
- depends on DRM_FBDEV_EMULATION && EXPERT
- default n
+config DRM_CLIENT
+ bool
+ depends on DRM
help
- In order to keep user-space compatibility, we want in certain
- use-cases to keep leaking the fbdev physical address to the
- user-space program handling the fbdev buffer.
- This affects, not only, Amlogic, Allwinner or Rockchip devices
- with ARM Mali GPUs using an userspace Blob.
- This option is not supported by upstream developers and should be
- removed as soon as possible and be considered as a broken and
- legacy behaviour from a modern fbdev device driver.
+ Enables support for DRM clients. DRM drivers that need
+ struct drm_client_dev and its interfaces should select this
+ option. Drivers that support the default clients should
+ select DRM_CLIENT_SELECTION instead.
- Please send any bug reports when using this to your proprietary
- software vendor that requires this.
-
- If in doubt, say "N" or spread the word to your closed source
- library vendor.
+source "drivers/gpu/drm/clients/Kconfig"
config DRM_LOAD_EDID_FIRMWARE
bool "Allow to specify an EDID data set instead of probing for it"
@@ -166,25 +183,46 @@ config DRM_LOAD_EDID_FIRMWARE
default case is N. Details and instructions how to build your own
EDID data are given in Documentation/admin-guide/edid.rst.
-config DRM_DP_CEC
- bool "Enable DisplayPort CEC-Tunneling-over-AUX HDMI support"
- depends on DRM
- select CEC_CORE
- help
- Choose this option if you want to enable HDMI CEC support for
- DisplayPort/USB-C to HDMI adapters.
-
- Note: not all adapters support this feature, and even for those
- that do support this they often do not hook up the CEC pin.
+source "drivers/gpu/drm/display/Kconfig"
config DRM_TTM
tristate
depends on DRM && MMU
+ select SHMEM
help
GPU memory management subsystem for devices with multiple
GPU memory types. Will be enabled automatically if a device driver
uses it.
+config DRM_EXEC
+ tristate
+ depends on DRM
+ help
+ Execution context for command submissions
+
+config DRM_GPUVM
+ tristate
+ depends on DRM
+ select DRM_EXEC
+ help
+ GPU-VM representation providing helpers to manage a GPUs virtual
+ address space
+
+config DRM_GPUSVM
+ tristate
+ depends on DRM && DEVICE_PRIVATE
+ select HMM_MIRROR
+ select MMU_NOTIFIER
+ help
+ GPU-SVM representation providing helpers to manage a GPUs shared
+ virtual memory
+
+config DRM_BUDDY
+ tristate
+ depends on DRM
+ help
+ A page based buddy allocator
+
config DRM_VRAM_HELPER
tristate
depends on DRM
@@ -195,103 +233,66 @@ config DRM_TTM_HELPER
tristate
depends on DRM
select DRM_TTM
+ select DRM_KMS_HELPER if DRM_FBDEV_EMULATION
+ select FB_CORE if DRM_FBDEV_EMULATION
+ select FB_SYSMEM_HELPERS_DEFERRED if DRM_FBDEV_EMULATION
help
Helpers for ttm-based gem objects
-config DRM_GEM_CMA_HELPER
- bool
- depends on DRM
- help
- Choose this if you need the GEM CMA helper functions
-
-config DRM_KMS_CMA_HELPER
- bool
+config DRM_GEM_DMA_HELPER
+ tristate
depends on DRM
- select DRM_GEM_CMA_HELPER
+ select DRM_KMS_HELPER if DRM_FBDEV_EMULATION
+ select FB_CORE if DRM_FBDEV_EMULATION
+ select FB_DMAMEM_HELPERS_DEFERRED if DRM_FBDEV_EMULATION
help
- Choose this if you need the KMS CMA helper functions
+ Choose this if you need the GEM DMA helper functions
config DRM_GEM_SHMEM_HELPER
- bool
- depends on DRM
+ tristate
+ depends on DRM && MMU
+ select DRM_KMS_HELPER if DRM_FBDEV_EMULATION
+ select FB_CORE if DRM_FBDEV_EMULATION
+ select FB_SYSMEM_HELPERS_DEFERRED if DRM_FBDEV_EMULATION
help
Choose this if you need the GEM shmem helper functions
+config DRM_SUBALLOC_HELPER
+ tristate
+ depends on DRM
+
config DRM_SCHED
tristate
depends on DRM
-source "drivers/gpu/drm/i2c/Kconfig"
+source "drivers/gpu/drm/sysfb/Kconfig"
source "drivers/gpu/drm/arm/Kconfig"
-config DRM_RADEON
- tristate "ATI Radeon"
- depends on DRM && PCI && MMU
- select FW_LOADER
- select DRM_KMS_HELPER
- select DRM_TTM
- select DRM_TTM_HELPER
- select POWER_SUPPLY
- select HWMON
- select BACKLIGHT_CLASS_DEVICE
- select INTERVAL_TREE
- help
- Choose this option if you have an ATI Radeon graphics card. There
- are both PCI and AGP versions. You don't need to choose this to
- run the Radeon in plain VGA mode.
-
- If M is selected, the module will be called radeon.
-
source "drivers/gpu/drm/radeon/Kconfig"
-config DRM_AMDGPU
- tristate "AMD GPU"
- depends on DRM && PCI && MMU
- select FW_LOADER
- select DRM_KMS_HELPER
- select DRM_SCHED
- select DRM_TTM
- select DRM_TTM_HELPER
- select POWER_SUPPLY
- select HWMON
- select BACKLIGHT_CLASS_DEVICE
- select INTERVAL_TREE
- select CHASH
- help
- Choose this option if you have a recent AMD Radeon graphics card.
-
- If M is selected, the module will be called amdgpu.
-
source "drivers/gpu/drm/amd/amdgpu/Kconfig"
source "drivers/gpu/drm/nouveau/Kconfig"
+source "drivers/gpu/drm/nova/Kconfig"
+
source "drivers/gpu/drm/i915/Kconfig"
+source "drivers/gpu/drm/xe/Kconfig"
+
source "drivers/gpu/drm/kmb/Kconfig"
config DRM_VGEM
tristate "Virtual GEM provider"
- depends on DRM
+ depends on DRM && MMU
+ select DRM_GEM_SHMEM_HELPER
help
Choose this option to get a virtual graphics memory manager,
as used by Mesa's software renderer for enhanced performance.
If M is selected the module will be called vgem.
-config DRM_VKMS
- tristate "Virtual KMS (EXPERIMENTAL)"
- depends on DRM
- select DRM_KMS_HELPER
- select DRM_GEM_SHMEM_HELPER
- select CRC32
- default n
- help
- Virtual Kernel Mode-Setting (VKMS) is used for testing or for
- running GPU in a headless machines. Choose this option to get
- a VKMS.
-
- If M is selected the module will be called vkms.
+source "drivers/gpu/drm/vkms/Kconfig"
source "drivers/gpu/drm/exynos/Kconfig"
@@ -311,9 +312,7 @@ source "drivers/gpu/drm/armada/Kconfig"
source "drivers/gpu/drm/atmel-hlcdc/Kconfig"
-source "drivers/gpu/drm/rcar-du/Kconfig"
-
-source "drivers/gpu/drm/shmobile/Kconfig"
+source "drivers/gpu/drm/renesas/Kconfig"
source "drivers/gpu/drm/sun4i/Kconfig"
@@ -323,8 +322,6 @@ source "drivers/gpu/drm/tilcdc/Kconfig"
source "drivers/gpu/drm/qxl/Kconfig"
-source "drivers/gpu/drm/bochs/Kconfig"
-
source "drivers/gpu/drm/virtio/Kconfig"
source "drivers/gpu/drm/msm/Kconfig"
@@ -349,15 +346,15 @@ source "drivers/gpu/drm/v3d/Kconfig"
source "drivers/gpu/drm/vc4/Kconfig"
-source "drivers/gpu/drm/etnaviv/Kconfig"
+source "drivers/gpu/drm/loongson/Kconfig"
-source "drivers/gpu/drm/arc/Kconfig"
+source "drivers/gpu/drm/etnaviv/Kconfig"
source "drivers/gpu/drm/hisilicon/Kconfig"
-source "drivers/gpu/drm/mediatek/Kconfig"
+source "drivers/gpu/drm/logicvc/Kconfig"
-source "drivers/gpu/drm/zte/Kconfig"
+source "drivers/gpu/drm/mediatek/Kconfig"
source "drivers/gpu/drm/mxsfb/Kconfig"
@@ -377,99 +374,58 @@ source "drivers/gpu/drm/lima/Kconfig"
source "drivers/gpu/drm/panfrost/Kconfig"
+source "drivers/gpu/drm/panthor/Kconfig"
+
source "drivers/gpu/drm/aspeed/Kconfig"
source "drivers/gpu/drm/mcde/Kconfig"
source "drivers/gpu/drm/tidss/Kconfig"
+source "drivers/gpu/drm/adp/Kconfig"
+
source "drivers/gpu/drm/xlnx/Kconfig"
-# Keep legacy drivers last
+source "drivers/gpu/drm/gud/Kconfig"
-menuconfig DRM_LEGACY
- bool "Enable legacy drivers (DANGEROUS)"
- depends on DRM && MMU
- help
- Enable legacy DRI1 drivers. Those drivers expose unsafe and dangerous
- APIs to user-space, which can be used to circumvent access
- restrictions and other security measures. For backwards compatibility
- those drivers are still available, but their use is highly
- inadvisable and might harm your system.
+source "drivers/gpu/drm/sitronix/Kconfig"
- You are recommended to use the safe modeset-only drivers instead, and
- perform 3D emulation in user-space.
+source "drivers/gpu/drm/solomon/Kconfig"
- Unless you have strong reasons to go rogue, say "N".
+source "drivers/gpu/drm/sprd/Kconfig"
-if DRM_LEGACY
+source "drivers/gpu/drm/imagination/Kconfig"
-config DRM_TDFX
- tristate "3dfx Banshee/Voodoo3+"
- depends on DRM && PCI
- help
- Choose this option if you have a 3dfx Banshee or Voodoo3 (or later),
- graphics card. If M is selected, the module will be called tdfx.
+source "drivers/gpu/drm/tyr/Kconfig"
-config DRM_R128
- tristate "ATI Rage 128"
- depends on DRM && PCI
- select FW_LOADER
- help
- Choose this option if you have an ATI Rage 128 graphics card. If M
- is selected, the module will be called r128. AGP support for
- this card is strongly suggested (unless you have a PCI version).
-
-config DRM_I810
- tristate "Intel I810"
- # !PREEMPTION because of missing ioctl locking
- depends on DRM && AGP && AGP_INTEL && (!PREEMPTION || BROKEN)
- help
- Choose this option if you have an Intel I810 graphics card. If M is
- selected, the module will be called i810. AGP support is required
- for this driver to work.
-
-config DRM_MGA
- tristate "Matrox g200/g400"
- depends on DRM && PCI
- select FW_LOADER
- help
- Choose this option if you have a Matrox G200, G400 or G450 graphics
- card. If M is selected, the module will be called mga. AGP
- support is required for this driver to work.
-
-config DRM_SIS
- tristate "SiS video cards"
- depends on DRM && AGP
- depends on FB_SIS || FB_SIS=n
+config DRM_HYPERV
+ tristate "DRM Support for Hyper-V synthetic video device"
+ depends on DRM && PCI && HYPERV_VMBUS
+ select DRM_CLIENT_SELECTION
+ select DRM_KMS_HELPER
+ select DRM_GEM_SHMEM_HELPER
help
- Choose this option if you have a SiS 630 or compatible video
- chipset. If M is selected the module will be called sis. AGP
- support is required for this driver to work.
+ This is a KMS driver for Hyper-V synthetic video device. Choose this
+ option if you would like to enable drm driver for Hyper-V virtual
+ machine. Unselect Hyper-V framebuffer driver (CONFIG_FB_HYPERV) so
+ that DRM driver is used by default.
-config DRM_VIA
- tristate "Via unichrome video cards"
- depends on DRM && PCI
- help
- Choose this option if you have a Via unichrome or compatible video
- chipset. If M is selected the module will be called via.
+ If M is selected the module will be called hyperv_drm.
-config DRM_SAVAGE
- tristate "Savage video cards"
- depends on DRM && PCI
- help
- Choose this option if you have a Savage3D/4/SuperSavage/Pro/Twister
- chipset. If M is selected the module will be called savage.
+# Separate option as not all DRM drivers use it
+config DRM_PANEL_BACKLIGHT_QUIRKS
+ tristate
-endif # DRM_LEGACY
+config DRM_LIB_RANDOM
+ bool
+ default n
-config DRM_EXPORT_FOR_TESTS
+config DRM_PRIVACY_SCREEN
bool
+ default n
+
+endif
# Separate option because drm_panel_orientation_quirks.c is shared with fbdev
config DRM_PANEL_ORIENTATION_QUIRKS
tristate
-
-config DRM_LIB_RANDOM
- bool
- default n
diff --git a/drivers/gpu/drm/Kconfig.debug b/drivers/gpu/drm/Kconfig.debug
new file mode 100644
index 000000000000..05dc43c0b8c5
--- /dev/null
+++ b/drivers/gpu/drm/Kconfig.debug
@@ -0,0 +1,117 @@
+config DRM_USE_DYNAMIC_DEBUG
+ bool "use dynamic debug to implement drm.debug"
+ default n
+ depends on BROKEN
+ depends on DRM
+ depends on DYNAMIC_DEBUG || DYNAMIC_DEBUG_CORE
+ depends on JUMP_LABEL
+ help
+ Use dynamic-debug to avoid drm_debug_enabled() runtime overheads.
+ Due to callsite counts in DRM drivers (~4k in amdgpu) and 56
+ bytes per callsite, the .data costs can be substantial, and
+ are therefore configurable.
+
+config DRM_WERROR
+ bool "Compile the drm subsystem with warnings as errors"
+ depends on DRM && EXPERT
+ depends on !WERROR
+ default n
+ help
+ A kernel build should not cause any compiler warnings, and this
+ enables the '-Werror' flag to enforce that rule in the drm subsystem.
+
+ The drm subsystem enables more warnings than the kernel default, so
+ this config option is disabled by default.
+
+ If in doubt, say N.
+
+config DRM_HEADER_TEST
+ bool "Ensure DRM headers are self-contained and pass kernel-doc"
+ depends on DRM && EXPERT && BROKEN
+ default n
+ help
+ Ensure the DRM subsystem headers both under drivers/gpu/drm and
+ include/drm compile, are self-contained, have header guards, and have
+ no kernel-doc warnings.
+
+ If in doubt, say N.
+
+config DRM_DEBUG_MM
+ bool "Insert extra checks and debug info into the DRM range managers"
+ default n
+ depends on DRM
+ depends on STACKTRACE_SUPPORT
+ select STACKDEPOT
+ help
+ Enable allocation tracking of memory manager and leak detection on
+ shutdown.
+
+ Recommended for driver developers only.
+
+ If in doubt, say "N".
+
+config DRM_KUNIT_TEST_HELPERS
+ tristate
+ depends on DRM && KUNIT
+ select DRM_KMS_HELPER
+ help
+ KUnit Helpers for KMS drivers.
+
+config DRM_KUNIT_TEST
+ tristate "KUnit tests for DRM" if !KUNIT_ALL_TESTS
+ depends on DRM && KUNIT && MMU
+ select DRM_BRIDGE_CONNECTOR
+ select DRM_BUDDY
+ select DRM_DISPLAY_DP_HELPER
+ select DRM_DISPLAY_HDMI_STATE_HELPER
+ select DRM_DISPLAY_HELPER
+ select DRM_EXEC
+ select DRM_EXPORT_FOR_TESTS if m
+ select DRM_GEM_SHMEM_HELPER
+ select DRM_KUNIT_TEST_HELPERS
+ select DRM_LIB_RANDOM
+ select DRM_SYSFB_HELPER
+ select PRIME_NUMBERS
+ default KUNIT_ALL_TESTS
+ help
+ This builds unit tests for DRM. This option is not useful for
+ distributions or general kernels, but only for kernel
+ developers working on DRM and associated drivers.
+
+ For more information on KUnit and unit tests in general,
+ please refer to the KUnit documentation in
+ Documentation/dev-tools/kunit/.
+
+ If in doubt, say "N".
+
+config DRM_TTM_KUNIT_TEST
+ tristate "KUnit tests for TTM" if !KUNIT_ALL_TESTS
+ default n
+ depends on DRM && KUNIT && MMU && (UML || COMPILE_TEST)
+ select DRM_TTM
+ select DRM_BUDDY
+ select DRM_EXPORT_FOR_TESTS if m
+ select DRM_KUNIT_TEST_HELPERS
+ default KUNIT_ALL_TESTS
+ help
+ Enables unit tests for TTM, a GPU memory manager subsystem used
+ to manage memory buffers. This option is mostly useful for kernel
+ developers. It depends on (UML || COMPILE_TEST) since no other driver
+ which uses TTM can be loaded while running the tests.
+
+ If in doubt, say "N".
+
+config DRM_SCHED_KUNIT_TEST
+ tristate "KUnit tests for the DRM scheduler" if !KUNIT_ALL_TESTS
+ select DRM_SCHED
+ depends on DRM && KUNIT
+ default KUNIT_ALL_TESTS
+ help
+ Choose this option to build unit tests for the DRM scheduler.
+
+ Recommended for driver developers only.
+
+ If in doubt, say "N".
+
+config DRM_EXPORT_FOR_TESTS
+ bool
diff --git a/drivers/gpu/drm/Makefile b/drivers/gpu/drm/Makefile
index 926adef289db..0e1c668b46d2 100644
--- a/drivers/gpu/drm/Makefile
+++ b/drivers/gpu/drm/Makefile
@@ -3,85 +3,188 @@
# Makefile for the drm device driver. This driver provides support for the
# Direct Rendering Infrastructure (DRI) in XFree86 4.1.0 and higher.
-drm-y := drm_auth.o drm_cache.o \
- drm_file.o drm_gem.o drm_ioctl.o drm_irq.o \
- drm_drv.o \
- drm_sysfs.o drm_hashtab.o drm_mm.o \
- drm_crtc.o drm_fourcc.o drm_modes.o drm_edid.o \
- drm_encoder_slave.o \
- drm_trace_points.o drm_prime.o \
- drm_rect.o drm_vma_manager.o drm_flip_work.o \
- drm_modeset_lock.o drm_atomic.o drm_bridge.o \
- drm_framebuffer.o drm_connector.o drm_blend.o \
- drm_encoder.o drm_mode_object.o drm_property.o \
- drm_plane.o drm_color_mgmt.o drm_print.o \
- drm_dumb_buffers.o drm_mode_config.o drm_vblank.o \
- drm_syncobj.o drm_lease.o drm_writeback.o drm_client.o \
- drm_client_modeset.o drm_atomic_uapi.o drm_hdcp.o \
- drm_managed.o drm_vblank_work.o
-
-drm-$(CONFIG_DRM_LEGACY) += drm_bufs.o drm_context.o drm_dma.o drm_legacy_misc.o drm_lock.o \
- drm_memory.o drm_scatter.o drm_vm.o
+CFLAGS-$(CONFIG_DRM_USE_DYNAMIC_DEBUG) += -DDYNAMIC_DEBUG_MODULE
+
+# Unconditionally enable W=1 warnings locally
+# --- begin copy-paste W=1 warnings from scripts/Makefile.warn
+subdir-ccflags-y += -Wextra -Wunused -Wno-unused-parameter
+subdir-ccflags-y += $(call cc-option, -Wrestrict)
+subdir-ccflags-y += -Wmissing-format-attribute
+subdir-ccflags-y += -Wold-style-definition
+subdir-ccflags-y += -Wmissing-include-dirs
+subdir-ccflags-y += $(call cc-option, -Wunused-but-set-variable)
+subdir-ccflags-y += $(call cc-option, -Wunused-const-variable)
+subdir-ccflags-y += $(call cc-option, -Wpacked-not-aligned)
+subdir-ccflags-y += $(call cc-option, -Wformat-overflow)
+# FIXME: fix -Wformat-truncation warnings and uncomment
+#subdir-ccflags-y += $(call cc-option, -Wformat-truncation)
+subdir-ccflags-y += $(call cc-option, -Wstringop-truncation)
+# The following turn off the warnings enabled by -Wextra
+ifeq ($(findstring 2, $(KBUILD_EXTRA_WARN)),)
+subdir-ccflags-y += -Wno-missing-field-initializers
+subdir-ccflags-y += -Wno-type-limits
+subdir-ccflags-y += -Wno-shift-negative-value
+endif
+ifeq ($(findstring 3, $(KBUILD_EXTRA_WARN)),)
+subdir-ccflags-y += -Wno-sign-compare
+endif
+# --- end copy-paste
+
+# Enable -Werror in CI and development
+subdir-ccflags-$(CONFIG_DRM_WERROR) += -Werror
+
+drm-y := \
+ drm_atomic.o \
+ drm_atomic_uapi.o \
+ drm_auth.o \
+ drm_blend.o \
+ drm_bridge.o \
+ drm_cache.o \
+ drm_color_mgmt.o \
+ drm_colorop.o \
+ drm_connector.o \
+ drm_crtc.o \
+ drm_displayid.o \
+ drm_drv.o \
+ drm_dumb_buffers.o \
+ drm_edid.o \
+ drm_eld.o \
+ drm_encoder.o \
+ drm_file.o \
+ drm_fourcc.o \
+ drm_framebuffer.o \
+ drm_gem.o \
+ drm_ioctl.o \
+ drm_lease.o \
+ drm_managed.o \
+ drm_mm.o \
+ drm_mode_config.o \
+ drm_mode_object.o \
+ drm_modes.o \
+ drm_modeset_lock.o \
+ drm_plane.o \
+ drm_prime.o \
+ drm_print.o \
+ drm_property.o \
+ drm_rect.o \
+ drm_syncobj.o \
+ drm_sysfs.o \
+ drm_trace_points.o \
+ drm_vblank.o \
+ drm_vblank_work.o \
+ drm_vma_manager.o \
+ drm_writeback.o
+drm-$(CONFIG_DRM_CLIENT) += \
+ drm_client.o \
+ drm_client_event.o \
+ drm_client_modeset.o \
+ drm_client_sysrq.o
drm-$(CONFIG_DRM_LIB_RANDOM) += lib/drm_random.o
drm-$(CONFIG_COMPAT) += drm_ioc32.o
-drm-$(CONFIG_DRM_GEM_CMA_HELPER) += drm_gem_cma_helper.o
-drm-$(CONFIG_DRM_GEM_SHMEM_HELPER) += drm_gem_shmem_helper.o
drm-$(CONFIG_DRM_PANEL) += drm_panel.o
drm-$(CONFIG_OF) += drm_of.o
-drm-$(CONFIG_AGP) += drm_agpsupport.o
drm-$(CONFIG_PCI) += drm_pci.o
-drm-$(CONFIG_DEBUG_FS) += drm_debugfs.o drm_debugfs_crc.o
+drm-$(CONFIG_DEBUG_FS) += \
+ drm_debugfs.o \
+ drm_debugfs_crc.o
drm-$(CONFIG_DRM_LOAD_EDID_FIRMWARE) += drm_edid_load.o
+drm-$(CONFIG_DRM_PRIVACY_SCREEN) += \
+ drm_privacy_screen.o \
+ drm_privacy_screen_x86.o
+drm-$(CONFIG_DRM_ACCEL) += ../../accel/drm_accel.o
+drm-$(CONFIG_DRM_PANIC) += drm_panic.o
+drm-$(CONFIG_DRM_DRAW) += drm_draw.o
+drm-$(CONFIG_DRM_PANIC_SCREEN_QR_CODE) += drm_panic_qr.o
+obj-$(CONFIG_DRM) += drm.o
+
+obj-$(CONFIG_DRM_PANEL_ORIENTATION_QUIRKS) += drm_panel_orientation_quirks.o
+obj-$(CONFIG_DRM_PANEL_BACKLIGHT_QUIRKS) += drm_panel_backlight_quirks.o
+
+#
+# Memory-management helpers
+#
+#
+obj-$(CONFIG_DRM_EXEC) += drm_exec.o
+obj-$(CONFIG_DRM_GPUVM) += drm_gpuvm.o
+
+drm_gpusvm_helper-y := \
+ drm_gpusvm.o\
+ drm_pagemap.o
+obj-$(CONFIG_DRM_GPUSVM) += drm_gpusvm_helper.o
+
+obj-$(CONFIG_DRM_BUDDY) += drm_buddy.o
+
+drm_dma_helper-y := drm_gem_dma_helper.o
+drm_dma_helper-$(CONFIG_DRM_FBDEV_EMULATION) += drm_fbdev_dma.o
+drm_dma_helper-$(CONFIG_DRM_KMS_HELPER) += drm_fb_dma_helper.o
+obj-$(CONFIG_DRM_GEM_DMA_HELPER) += drm_dma_helper.o
+
+drm_shmem_helper-y := drm_gem_shmem_helper.o
+drm_shmem_helper-$(CONFIG_DRM_FBDEV_EMULATION) += drm_fbdev_shmem.o
+obj-$(CONFIG_DRM_GEM_SHMEM_HELPER) += drm_shmem_helper.o
+
+drm_suballoc_helper-y := drm_suballoc.o
+obj-$(CONFIG_DRM_SUBALLOC_HELPER) += drm_suballoc_helper.o
drm_vram_helper-y := drm_gem_vram_helper.o
obj-$(CONFIG_DRM_VRAM_HELPER) += drm_vram_helper.o
drm_ttm_helper-y := drm_gem_ttm_helper.o
+drm_ttm_helper-$(CONFIG_DRM_FBDEV_EMULATION) += drm_fbdev_ttm.o
obj-$(CONFIG_DRM_TTM_HELPER) += drm_ttm_helper.o
-drm_kms_helper-y := drm_bridge_connector.o drm_crtc_helper.o drm_dp_helper.o \
- drm_dsc.o drm_probe_helper.o \
- drm_plane_helper.o drm_dp_mst_topology.o drm_atomic_helper.o \
- drm_kms_helper_common.o drm_dp_dual_mode_helper.o \
- drm_simple_kms_helper.o drm_modeset_helper.o \
- drm_scdc_helper.o drm_gem_framebuffer_helper.o \
- drm_atomic_state_helper.o drm_damage_helper.o \
- drm_format_helper.o drm_self_refresh_helper.o
+#
+# Modesetting helpers
+#
+drm_kms_helper-y := \
+ drm_atomic_helper.o \
+ drm_atomic_state_helper.o \
+ drm_bridge_helper.o \
+ drm_crtc_helper.o \
+ drm_damage_helper.o \
+ drm_flip_work.o \
+ drm_format_helper.o \
+ drm_gem_atomic_helper.o \
+ drm_gem_framebuffer_helper.o \
+ drm_kms_helper_common.o \
+ drm_modeset_helper.o \
+ drm_plane_helper.o \
+ drm_probe_helper.o \
+ drm_self_refresh_helper.o \
+ drm_simple_kms_helper.o \
+ drm_vblank_helper.o
drm_kms_helper-$(CONFIG_DRM_PANEL_BRIDGE) += bridge/panel.o
drm_kms_helper-$(CONFIG_DRM_FBDEV_EMULATION) += drm_fb_helper.o
-drm_kms_helper-$(CONFIG_DRM_KMS_CMA_HELPER) += drm_fb_cma_helper.o
-drm_kms_helper-$(CONFIG_DRM_DP_AUX_CHARDEV) += drm_dp_aux_dev.o
-drm_kms_helper-$(CONFIG_DRM_DP_CEC) += drm_dp_cec.o
-
obj-$(CONFIG_DRM_KMS_HELPER) += drm_kms_helper.o
-obj-$(CONFIG_DRM_DEBUG_SELFTEST) += selftests/
-obj-$(CONFIG_DRM) += drm.o
+#
+# Drivers and the rest
+#
+
+obj-y += tests/
+
obj-$(CONFIG_DRM_MIPI_DBI) += drm_mipi_dbi.o
obj-$(CONFIG_DRM_MIPI_DSI) += drm_mipi_dsi.o
-obj-$(CONFIG_DRM_PANEL_ORIENTATION_QUIRKS) += drm_panel_orientation_quirks.o
obj-y += arm/
+obj-y += clients/
+obj-y += display/
obj-$(CONFIG_DRM_TTM) += ttm/
obj-$(CONFIG_DRM_SCHED) += scheduler/
-obj-$(CONFIG_DRM_TDFX) += tdfx/
-obj-$(CONFIG_DRM_R128) += r128/
obj-$(CONFIG_DRM_RADEON)+= radeon/
obj-$(CONFIG_DRM_AMDGPU)+= amd/amdgpu/
-obj-$(CONFIG_DRM_MGA) += mga/
-obj-$(CONFIG_DRM_I810) += i810/
+obj-$(CONFIG_DRM_AMDGPU)+= amd/amdxcp/
obj-$(CONFIG_DRM_I915) += i915/
+obj-$(CONFIG_DRM_XE) += xe/
obj-$(CONFIG_DRM_KMB_DISPLAY) += kmb/
obj-$(CONFIG_DRM_MGAG200) += mgag200/
obj-$(CONFIG_DRM_V3D) += v3d/
obj-$(CONFIG_DRM_VC4) += vc4/
-obj-$(CONFIG_DRM_SIS) += sis/
-obj-$(CONFIG_DRM_SAVAGE)+= savage/
obj-$(CONFIG_DRM_VMWGFX)+= vmwgfx/
-obj-$(CONFIG_DRM_VIA) +=via/
obj-$(CONFIG_DRM_VGEM) += vgem/
obj-$(CONFIG_DRM_VKMS) += vkms/
obj-$(CONFIG_DRM_NOUVEAU) +=nouveau/
+obj-$(CONFIG_DRM_NOVA) += nova/
obj-$(CONFIG_DRM_EXYNOS) +=exynos/
obj-$(CONFIG_DRM_ROCKCHIP) +=rockchip/
obj-$(CONFIG_DRM_GMA500) += gma500/
@@ -89,13 +192,11 @@ obj-$(CONFIG_DRM_UDL) += udl/
obj-$(CONFIG_DRM_AST) += ast/
obj-$(CONFIG_DRM_ARMADA) += armada/
obj-$(CONFIG_DRM_ATMEL_HLCDC) += atmel-hlcdc/
-obj-y += rcar-du/
-obj-$(CONFIG_DRM_SHMOBILE) +=shmobile/
+obj-y += renesas/
obj-y += omapdrm/
obj-$(CONFIG_DRM_SUN4I) += sun4i/
obj-y += tilcdc/
obj-$(CONFIG_DRM_QXL) += qxl/
-obj-$(CONFIG_DRM_BOCHS) += bochs/
obj-$(CONFIG_DRM_VIRTIO_GPU) += virtio/
obj-$(CONFIG_DRM_MSM) += msm/
obj-$(CONFIG_DRM_TEGRA) += tegra/
@@ -103,25 +204,52 @@ obj-$(CONFIG_DRM_STM) += stm/
obj-$(CONFIG_DRM_STI) += sti/
obj-y += imx/
obj-$(CONFIG_DRM_INGENIC) += ingenic/
+obj-$(CONFIG_DRM_LOGICVC) += logicvc/
obj-$(CONFIG_DRM_MEDIATEK) += mediatek/
obj-$(CONFIG_DRM_MESON) += meson/
-obj-y += i2c/
obj-y += panel/
obj-y += bridge/
obj-$(CONFIG_DRM_FSL_DCU) += fsl-dcu/
obj-$(CONFIG_DRM_ETNAVIV) += etnaviv/
-obj-$(CONFIG_DRM_ARCPGU)+= arc/
obj-y += hisilicon/
-obj-$(CONFIG_DRM_ZTE) += zte/
-obj-$(CONFIG_DRM_MXSFB) += mxsfb/
+obj-y += mxsfb/
+obj-y += sysfb/
obj-y += tiny/
obj-$(CONFIG_DRM_PL111) += pl111/
obj-$(CONFIG_DRM_TVE200) += tve200/
+obj-$(CONFIG_DRM_ADP) += adp/
obj-$(CONFIG_DRM_XEN) += xen/
obj-$(CONFIG_DRM_VBOXVIDEO) += vboxvideo/
obj-$(CONFIG_DRM_LIMA) += lima/
obj-$(CONFIG_DRM_PANFROST) += panfrost/
+obj-$(CONFIG_DRM_PANTHOR) += panthor/
+obj-$(CONFIG_DRM_TYR) += tyr/
obj-$(CONFIG_DRM_ASPEED_GFX) += aspeed/
obj-$(CONFIG_DRM_MCDE) += mcde/
obj-$(CONFIG_DRM_TIDSS) += tidss/
obj-y += xlnx/
+obj-y += gud/
+obj-$(CONFIG_DRM_HYPERV) += hyperv/
+obj-y += sitronix/
+obj-y += solomon/
+obj-$(CONFIG_DRM_SPRD) += sprd/
+obj-$(CONFIG_DRM_LOONGSON) += loongson/
+obj-$(CONFIG_DRM_POWERVR) += imagination/
+
+# Ensure drm headers are self-contained and pass kernel-doc
+hdrtest-files := \
+ $(shell cd $(src) && find . -maxdepth 1 -name 'drm_*.h') \
+ $(shell cd $(src) && find display lib -name '*.h')
+
+always-$(CONFIG_DRM_HEADER_TEST) += \
+ $(patsubst %.h,%.hdrtest, $(hdrtest-files))
+
+# Include the header twice to detect missing include guard.
+quiet_cmd_hdrtest = HDRTEST $(patsubst %.hdrtest,%.h,$@)
+ cmd_hdrtest = \
+ $(CC) $(c_flags) -fsyntax-only -x c /dev/null -include $< -include $<; \
+ PYTHONDONTWRITEBYTECODE=1 $(PYTHON3) $(KERNELDOC) -none $(if $(CONFIG_WERROR)$(CONFIG_DRM_WERROR),-Werror) $<; \
+ touch $@
+
+$(obj)/%.hdrtest: $(src)/%.h FORCE
+ $(call if_changed_dep,hdrtest)
diff --git a/drivers/gpu/drm/adp/Kconfig b/drivers/gpu/drm/adp/Kconfig
new file mode 100644
index 000000000000..9fcc27eb200d
--- /dev/null
+++ b/drivers/gpu/drm/adp/Kconfig
@@ -0,0 +1,17 @@
+# SPDX-License-Identifier: GPL-2.0-only OR MIT
+config DRM_ADP
+ tristate "DRM Support for pre-DCP Apple display controllers"
+ depends on DRM && OF && ARM64
+ depends on ARCH_APPLE || COMPILE_TEST
+ select DRM_KMS_HELPER
+ select DRM_BRIDGE_CONNECTOR
+ select DRM_DISPLAY_HELPER
+ select DRM_KMS_DMA_HELPER
+ select DRM_GEM_DMA_HELPER
+ select DRM_PANEL_BRIDGE
+ select VIDEOMODE_HELPERS
+ select DRM_MIPI_DSI
+ help
+ Chose this option if you have an Apple Arm laptop with a touchbar.
+
+ If M is selected, this module will be called adpdrm.
diff --git a/drivers/gpu/drm/adp/Makefile b/drivers/gpu/drm/adp/Makefile
new file mode 100644
index 000000000000..8e7b618edd35
--- /dev/null
+++ b/drivers/gpu/drm/adp/Makefile
@@ -0,0 +1,5 @@
+# SPDX-License-Identifier: GPL-2.0-only OR MIT
+
+adpdrm-y := adp_drv.o
+adpdrm-mipi-y := adp-mipi.o
+obj-$(CONFIG_DRM_ADP) += adpdrm.o adpdrm-mipi.o
diff --git a/drivers/gpu/drm/adp/adp-mipi.c b/drivers/gpu/drm/adp/adp-mipi.c
new file mode 100644
index 000000000000..cba7d32150a9
--- /dev/null
+++ b/drivers/gpu/drm/adp/adp-mipi.c
@@ -0,0 +1,277 @@
+// SPDX-License-Identifier: GPL-2.0-only
+
+#include <linux/component.h>
+#include <linux/iopoll.h>
+#include <linux/of.h>
+#include <linux/platform_device.h>
+
+#include <drm/drm_bridge.h>
+#include <drm/drm_mipi_dsi.h>
+
+#define DSI_GEN_HDR 0x6c
+#define DSI_GEN_PLD_DATA 0x70
+
+#define DSI_CMD_PKT_STATUS 0x74
+
+#define GEN_PLD_R_EMPTY BIT(4)
+#define GEN_PLD_W_FULL BIT(3)
+#define GEN_PLD_W_EMPTY BIT(2)
+#define GEN_CMD_FULL BIT(1)
+#define GEN_CMD_EMPTY BIT(0)
+#define GEN_RD_CMD_BUSY BIT(6)
+#define CMD_PKT_STATUS_TIMEOUT_US 20000
+
+struct adp_mipi_drv_private {
+ struct mipi_dsi_host dsi;
+ struct drm_bridge bridge;
+ struct drm_bridge *next_bridge;
+ void __iomem *mipi;
+};
+
+#define mipi_to_adp(x) container_of(x, struct adp_mipi_drv_private, dsi)
+
+static int adp_dsi_gen_pkt_hdr_write(struct adp_mipi_drv_private *adp, u32 hdr_val)
+{
+ int ret;
+ u32 val, mask;
+
+ ret = readl_poll_timeout(adp->mipi + DSI_CMD_PKT_STATUS,
+ val, !(val & GEN_CMD_FULL), 1000,
+ CMD_PKT_STATUS_TIMEOUT_US);
+ if (ret) {
+ dev_err(adp->dsi.dev, "failed to get available command FIFO\n");
+ return ret;
+ }
+
+ writel(hdr_val, adp->mipi + DSI_GEN_HDR);
+
+ mask = GEN_CMD_EMPTY | GEN_PLD_W_EMPTY;
+ ret = readl_poll_timeout(adp->mipi + DSI_CMD_PKT_STATUS,
+ val, (val & mask) == mask,
+ 1000, CMD_PKT_STATUS_TIMEOUT_US);
+ if (ret) {
+ dev_err(adp->dsi.dev, "failed to write command FIFO\n");
+ return ret;
+ }
+
+ return 0;
+}
+
+static int adp_dsi_write(struct adp_mipi_drv_private *adp,
+ const struct mipi_dsi_packet *packet)
+{
+ const u8 *tx_buf = packet->payload;
+ int len = packet->payload_length, pld_data_bytes = sizeof(u32), ret;
+ __le32 word;
+ u32 val;
+
+ while (len) {
+ if (len < pld_data_bytes) {
+ word = 0;
+ memcpy(&word, tx_buf, len);
+ writel(le32_to_cpu(word), adp->mipi + DSI_GEN_PLD_DATA);
+ len = 0;
+ } else {
+ memcpy(&word, tx_buf, pld_data_bytes);
+ writel(le32_to_cpu(word), adp->mipi + DSI_GEN_PLD_DATA);
+ tx_buf += pld_data_bytes;
+ len -= pld_data_bytes;
+ }
+
+ ret = readl_poll_timeout(adp->mipi + DSI_CMD_PKT_STATUS,
+ val, !(val & GEN_PLD_W_FULL), 1000,
+ CMD_PKT_STATUS_TIMEOUT_US);
+ if (ret) {
+ dev_err(adp->dsi.dev,
+ "failed to get available write payload FIFO\n");
+ return ret;
+ }
+ }
+
+ word = 0;
+ memcpy(&word, packet->header, sizeof(packet->header));
+ return adp_dsi_gen_pkt_hdr_write(adp, le32_to_cpu(word));
+}
+
+static int adp_dsi_read(struct adp_mipi_drv_private *adp,
+ const struct mipi_dsi_msg *msg)
+{
+ int i, j, ret, len = msg->rx_len;
+ u8 *buf = msg->rx_buf;
+ u32 val;
+
+ /* Wait end of the read operation */
+ ret = readl_poll_timeout(adp->mipi + DSI_CMD_PKT_STATUS,
+ val, !(val & GEN_RD_CMD_BUSY),
+ 1000, CMD_PKT_STATUS_TIMEOUT_US);
+ if (ret) {
+ dev_err(adp->dsi.dev, "Timeout during read operation\n");
+ return ret;
+ }
+
+ for (i = 0; i < len; i += 4) {
+ /* Read fifo must not be empty before all bytes are read */
+ ret = readl_poll_timeout(adp->mipi + DSI_CMD_PKT_STATUS,
+ val, !(val & GEN_PLD_R_EMPTY),
+ 1000, CMD_PKT_STATUS_TIMEOUT_US);
+ if (ret) {
+ dev_err(adp->dsi.dev, "Read payload FIFO is empty\n");
+ return ret;
+ }
+
+ val = readl(adp->mipi + DSI_GEN_PLD_DATA);
+ for (j = 0; j < 4 && j + i < len; j++)
+ buf[i + j] = val >> (8 * j);
+ }
+
+ return ret;
+}
+
+static ssize_t adp_dsi_host_transfer(struct mipi_dsi_host *host,
+ const struct mipi_dsi_msg *msg)
+{
+ struct adp_mipi_drv_private *adp = mipi_to_adp(host);
+ struct mipi_dsi_packet packet;
+ int ret, nb_bytes;
+
+ ret = mipi_dsi_create_packet(&packet, msg);
+ if (ret) {
+ dev_err(adp->dsi.dev, "failed to create packet: %d\n", ret);
+ return ret;
+ }
+
+ ret = adp_dsi_write(adp, &packet);
+ if (ret)
+ return ret;
+
+ if (msg->rx_buf && msg->rx_len) {
+ ret = adp_dsi_read(adp, msg);
+ if (ret)
+ return ret;
+ nb_bytes = msg->rx_len;
+ } else {
+ nb_bytes = packet.size;
+ }
+
+ return nb_bytes;
+}
+
+static int adp_dsi_bind(struct device *dev, struct device *master, void *data)
+{
+ return 0;
+}
+
+static void adp_dsi_unbind(struct device *dev, struct device *master, void *data)
+{
+}
+
+static const struct component_ops adp_dsi_component_ops = {
+ .bind = adp_dsi_bind,
+ .unbind = adp_dsi_unbind,
+};
+
+static int adp_dsi_host_attach(struct mipi_dsi_host *host,
+ struct mipi_dsi_device *dev)
+{
+ struct adp_mipi_drv_private *adp = mipi_to_adp(host);
+ struct drm_bridge *next;
+ int ret;
+
+ next = devm_drm_of_get_bridge(adp->dsi.dev, adp->dsi.dev->of_node, 1, 0);
+ if (IS_ERR(next))
+ return PTR_ERR(next);
+
+ adp->next_bridge = next;
+
+ drm_bridge_add(&adp->bridge);
+
+ ret = component_add(host->dev, &adp_dsi_component_ops);
+ if (ret) {
+ pr_err("failed to add dsi_host component: %d\n", ret);
+ drm_bridge_remove(&adp->bridge);
+ return ret;
+ }
+
+ return 0;
+}
+
+static int adp_dsi_host_detach(struct mipi_dsi_host *host,
+ struct mipi_dsi_device *dev)
+{
+ struct adp_mipi_drv_private *adp = mipi_to_adp(host);
+
+ component_del(host->dev, &adp_dsi_component_ops);
+ drm_bridge_remove(&adp->bridge);
+ return 0;
+}
+
+static const struct mipi_dsi_host_ops adp_dsi_host_ops = {
+ .transfer = adp_dsi_host_transfer,
+ .attach = adp_dsi_host_attach,
+ .detach = adp_dsi_host_detach,
+};
+
+static int adp_dsi_bridge_attach(struct drm_bridge *bridge,
+ struct drm_encoder *encoder,
+ enum drm_bridge_attach_flags flags)
+{
+ struct adp_mipi_drv_private *adp =
+ container_of(bridge, struct adp_mipi_drv_private, bridge);
+
+ return drm_bridge_attach(encoder, adp->next_bridge, bridge, flags);
+}
+
+static const struct drm_bridge_funcs adp_dsi_bridge_funcs = {
+ .attach = adp_dsi_bridge_attach,
+};
+
+static int adp_mipi_probe(struct platform_device *pdev)
+{
+ struct adp_mipi_drv_private *adp;
+
+ adp = devm_drm_bridge_alloc(&pdev->dev, struct adp_mipi_drv_private,
+ bridge, &adp_dsi_bridge_funcs);
+ if (IS_ERR(adp))
+ return PTR_ERR(adp);
+
+ adp->mipi = devm_platform_ioremap_resource(pdev, 0);
+ if (IS_ERR(adp->mipi)) {
+ dev_err(&pdev->dev, "failed to map mipi mmio");
+ return PTR_ERR(adp->mipi);
+ }
+
+ adp->dsi.dev = &pdev->dev;
+ adp->dsi.ops = &adp_dsi_host_ops;
+ adp->bridge.of_node = pdev->dev.of_node;
+ adp->bridge.type = DRM_MODE_CONNECTOR_DSI;
+ dev_set_drvdata(&pdev->dev, adp);
+ return mipi_dsi_host_register(&adp->dsi);
+}
+
+static void adp_mipi_remove(struct platform_device *pdev)
+{
+ struct device *dev = &pdev->dev;
+ struct adp_mipi_drv_private *adp = dev_get_drvdata(dev);
+
+ mipi_dsi_host_unregister(&adp->dsi);
+}
+
+static const struct of_device_id adp_mipi_of_match[] = {
+ { .compatible = "apple,h7-display-pipe-mipi", },
+ { },
+};
+MODULE_DEVICE_TABLE(of, adp_mipi_of_match);
+
+static struct platform_driver adp_mipi_platform_driver = {
+ .driver = {
+ .name = "adp-mipi",
+ .of_match_table = adp_mipi_of_match,
+ },
+ .probe = adp_mipi_probe,
+ .remove = adp_mipi_remove,
+};
+
+module_platform_driver(adp_mipi_platform_driver);
+
+MODULE_DESCRIPTION("Apple Display Pipe MIPI driver");
+MODULE_LICENSE("GPL");
diff --git a/drivers/gpu/drm/adp/adp_drv.c b/drivers/gpu/drm/adp/adp_drv.c
new file mode 100644
index 000000000000..4554cf75565e
--- /dev/null
+++ b/drivers/gpu/drm/adp/adp_drv.c
@@ -0,0 +1,614 @@
+// SPDX-License-Identifier: GPL-2.0-only
+
+#include <linux/component.h>
+#include <linux/iopoll.h>
+#include <linux/of.h>
+#include <linux/platform_device.h>
+
+#include <drm/drm_atomic.h>
+#include <drm/drm_atomic_helper.h>
+#include <drm/drm_bridge.h>
+#include <drm/drm_bridge_connector.h>
+#include <drm/drm_drv.h>
+#include <drm/drm_fb_dma_helper.h>
+#include <drm/drm_framebuffer.h>
+#include <drm/drm_gem_atomic_helper.h>
+#include <drm/drm_gem_dma_helper.h>
+#include <drm/drm_gem_framebuffer_helper.h>
+#include <drm/drm_of.h>
+#include <drm/drm_print.h>
+#include <drm/drm_probe_helper.h>
+#include <drm/drm_vblank.h>
+
+#define ADP_INT_STATUS 0x34
+#define ADP_INT_STATUS_INT_MASK 0x7
+#define ADP_INT_STATUS_VBLANK 0x1
+#define ADP_CTRL 0x100
+#define ADP_CTRL_VBLANK_ON 0x12
+#define ADP_CTRL_FIFO_ON 0x601
+#define ADP_SCREEN_SIZE 0x0c
+#define ADP_SCREEN_HSIZE GENMASK(15, 0)
+#define ADP_SCREEN_VSIZE GENMASK(31, 16)
+
+#define ADBE_FIFO 0x10c0
+#define ADBE_FIFO_SYNC 0xc0000000
+
+#define ADBE_BLEND_BYPASS 0x2020
+#define ADBE_BLEND_EN1 0x2028
+#define ADBE_BLEND_EN2 0x2074
+#define ADBE_BLEND_EN3 0x202c
+#define ADBE_BLEND_EN4 0x2034
+#define ADBE_MASK_BUF 0x2200
+
+#define ADBE_SRC_START 0x4040
+#define ADBE_SRC_SIZE 0x4048
+#define ADBE_DST_START 0x4050
+#define ADBE_DST_SIZE 0x4054
+#define ADBE_STRIDE 0x4038
+#define ADBE_FB_BASE 0x4030
+
+#define ADBE_LAYER_EN1 0x4020
+#define ADBE_LAYER_EN2 0x4068
+#define ADBE_LAYER_EN3 0x40b4
+#define ADBE_LAYER_EN4 0x40f4
+#define ADBE_SCALE_CTL 0x40ac
+#define ADBE_SCALE_CTL_BYPASS 0x100000
+
+#define ADBE_LAYER_CTL 0x1038
+#define ADBE_LAYER_CTL_ENABLE 0x10000
+
+#define ADBE_PIX_FMT 0x402c
+#define ADBE_PIX_FMT_XRGB32 0x53e4001
+
+static int adp_open(struct inode *inode, struct file *filp)
+{
+ /*
+ * The modesetting driver does not check the non-desktop connector
+ * property and keeps the device open and locked. If the touchbar daemon
+ * opens the device first, modesetting breaks the whole X session.
+ * Simply refuse to open the device for X11 server processes as
+ * workaround.
+ */
+ if (current->comm[0] == 'X')
+ return -EBUSY;
+
+ return drm_open(inode, filp);
+}
+
+static const struct file_operations adp_fops = {
+ .owner = THIS_MODULE,
+ .open = adp_open,
+ .release = drm_release,
+ .unlocked_ioctl = drm_ioctl,
+ .compat_ioctl = drm_compat_ioctl,
+ .poll = drm_poll,
+ .read = drm_read,
+ .llseek = noop_llseek,
+ .mmap = drm_gem_mmap,
+ .fop_flags = FOP_UNSIGNED_OFFSET,
+ DRM_GEM_DMA_UNMAPPED_AREA_FOPS
+};
+
+static int adp_drm_gem_dumb_create(struct drm_file *file_priv,
+ struct drm_device *drm,
+ struct drm_mode_create_dumb *args)
+{
+ args->height = ALIGN(args->height, 64);
+ args->size = args->pitch * args->height;
+
+ return drm_gem_dma_dumb_create_internal(file_priv, drm, args);
+}
+
+static const struct drm_driver adp_driver = {
+ .driver_features = DRIVER_GEM | DRIVER_MODESET | DRIVER_ATOMIC,
+ .fops = &adp_fops,
+ DRM_GEM_DMA_DRIVER_OPS_VMAP_WITH_DUMB_CREATE(adp_drm_gem_dumb_create),
+ .name = "adp",
+ .desc = "Apple Display Pipe DRM Driver",
+ .major = 0,
+ .minor = 1,
+};
+
+struct adp_drv_private {
+ struct drm_device drm;
+ struct drm_crtc crtc;
+ struct drm_encoder *encoder;
+ struct drm_connector *connector;
+ struct drm_bridge *next_bridge;
+ void __iomem *be;
+ void __iomem *fe;
+ u32 *mask_buf;
+ u64 mask_buf_size;
+ dma_addr_t mask_iova;
+ int be_irq;
+ int fe_irq;
+ struct drm_pending_vblank_event *event;
+};
+
+#define to_adp(x) container_of(x, struct adp_drv_private, drm)
+#define crtc_to_adp(x) container_of(x, struct adp_drv_private, crtc)
+
+static int adp_plane_atomic_check(struct drm_plane *plane,
+ struct drm_atomic_state *state)
+{
+ struct drm_plane_state *new_plane_state;
+ struct drm_crtc_state *crtc_state;
+
+ new_plane_state = drm_atomic_get_new_plane_state(state, plane);
+
+ if (!new_plane_state->crtc)
+ return 0;
+
+ crtc_state = drm_atomic_get_crtc_state(state, new_plane_state->crtc);
+ if (IS_ERR(crtc_state))
+ return PTR_ERR(crtc_state);
+
+ return drm_atomic_helper_check_plane_state(new_plane_state,
+ crtc_state,
+ DRM_PLANE_NO_SCALING,
+ DRM_PLANE_NO_SCALING,
+ true, true);
+}
+
+static void adp_plane_atomic_update(struct drm_plane *plane,
+ struct drm_atomic_state *state)
+{
+ struct adp_drv_private *adp;
+ struct drm_rect src_rect;
+ struct drm_gem_dma_object *obj;
+ struct drm_framebuffer *fb;
+ struct drm_plane_state *new_state = drm_atomic_get_new_plane_state(state, plane);
+ u32 src_pos, src_size, dst_pos, dst_size;
+
+ if (!plane || !new_state)
+ return;
+
+ fb = new_state->fb;
+ if (!fb)
+ return;
+ adp = to_adp(plane->dev);
+
+ drm_rect_fp_to_int(&src_rect, &new_state->src);
+ src_pos = src_rect.x1 << 16 | src_rect.y1;
+ dst_pos = new_state->dst.x1 << 16 | new_state->dst.y1;
+ src_size = drm_rect_width(&src_rect) << 16 | drm_rect_height(&src_rect);
+ dst_size = drm_rect_width(&new_state->dst) << 16 |
+ drm_rect_height(&new_state->dst);
+ writel(src_pos, adp->be + ADBE_SRC_START);
+ writel(src_size, adp->be + ADBE_SRC_SIZE);
+ writel(dst_pos, adp->be + ADBE_DST_START);
+ writel(dst_size, adp->be + ADBE_DST_SIZE);
+ writel(fb->pitches[0], adp->be + ADBE_STRIDE);
+ obj = drm_fb_dma_get_gem_obj(fb, 0);
+ if (obj)
+ writel(obj->dma_addr + fb->offsets[0], adp->be + ADBE_FB_BASE);
+
+ writel(BIT(0), adp->be + ADBE_LAYER_EN1);
+ writel(BIT(0), adp->be + ADBE_LAYER_EN2);
+ writel(BIT(0), adp->be + ADBE_LAYER_EN3);
+ writel(BIT(0), adp->be + ADBE_LAYER_EN4);
+ writel(ADBE_SCALE_CTL_BYPASS, adp->be + ADBE_SCALE_CTL);
+ writel(ADBE_LAYER_CTL_ENABLE | BIT(0), adp->be + ADBE_LAYER_CTL);
+ writel(ADBE_PIX_FMT_XRGB32, adp->be + ADBE_PIX_FMT);
+}
+
+static void adp_plane_atomic_disable(struct drm_plane *plane,
+ struct drm_atomic_state *state)
+{
+ struct adp_drv_private *adp = to_adp(plane->dev);
+
+ writel(0x0, adp->be + ADBE_LAYER_EN1);
+ writel(0x0, adp->be + ADBE_LAYER_EN2);
+ writel(0x0, adp->be + ADBE_LAYER_EN3);
+ writel(0x0, adp->be + ADBE_LAYER_EN4);
+ writel(ADBE_LAYER_CTL_ENABLE, adp->be + ADBE_LAYER_CTL);
+}
+
+static const struct drm_plane_helper_funcs adp_plane_helper_funcs = {
+ .atomic_check = adp_plane_atomic_check,
+ .atomic_update = adp_plane_atomic_update,
+ .atomic_disable = adp_plane_atomic_disable,
+ DRM_GEM_SHADOW_PLANE_HELPER_FUNCS
+};
+
+static const struct drm_plane_funcs adp_plane_funcs = {
+ .update_plane = drm_atomic_helper_update_plane,
+ .disable_plane = drm_atomic_helper_disable_plane,
+ DRM_GEM_SHADOW_PLANE_FUNCS
+};
+
+static const u32 plane_formats[] = {
+ DRM_FORMAT_XRGB8888,
+};
+
+#define ALL_CRTCS 1
+
+static struct drm_plane *adp_plane_new(struct adp_drv_private *adp)
+{
+ struct drm_device *drm = &adp->drm;
+ struct drm_plane *plane;
+
+ plane = __drmm_universal_plane_alloc(drm, sizeof(struct drm_plane), 0,
+ ALL_CRTCS, &adp_plane_funcs,
+ plane_formats, ARRAY_SIZE(plane_formats),
+ NULL, DRM_PLANE_TYPE_PRIMARY, "plane");
+ if (IS_ERR(plane)) {
+ drm_err(drm, "failed to allocate plane");
+ return plane;
+ }
+
+ drm_plane_helper_add(plane, &adp_plane_helper_funcs);
+ return plane;
+}
+
+static void adp_enable_vblank(struct adp_drv_private *adp)
+{
+ u32 cur_ctrl;
+
+ writel(ADP_INT_STATUS_INT_MASK, adp->fe + ADP_INT_STATUS);
+
+ cur_ctrl = readl(adp->fe + ADP_CTRL);
+ writel(cur_ctrl | ADP_CTRL_VBLANK_ON, adp->fe + ADP_CTRL);
+}
+
+static int adp_crtc_enable_vblank(struct drm_crtc *crtc)
+{
+ struct drm_device *dev = crtc->dev;
+ struct adp_drv_private *adp = to_adp(dev);
+
+ adp_enable_vblank(adp);
+
+ return 0;
+}
+
+static void adp_disable_vblank(struct adp_drv_private *adp)
+{
+ u32 cur_ctrl;
+
+ cur_ctrl = readl(adp->fe + ADP_CTRL);
+ writel(cur_ctrl & ~ADP_CTRL_VBLANK_ON, adp->fe + ADP_CTRL);
+ writel(ADP_INT_STATUS_INT_MASK, adp->fe + ADP_INT_STATUS);
+}
+
+static void adp_crtc_disable_vblank(struct drm_crtc *crtc)
+{
+ struct drm_device *dev = crtc->dev;
+ struct adp_drv_private *adp = to_adp(dev);
+
+ adp_disable_vblank(adp);
+}
+
+static void adp_crtc_atomic_enable(struct drm_crtc *crtc,
+ struct drm_atomic_state *state)
+{
+ struct adp_drv_private *adp = crtc_to_adp(crtc);
+
+ writel(BIT(0), adp->be + ADBE_BLEND_EN2);
+ writel(BIT(4), adp->be + ADBE_BLEND_EN1);
+ writel(BIT(0), adp->be + ADBE_BLEND_EN3);
+ writel(BIT(0), adp->be + ADBE_BLEND_BYPASS);
+ writel(BIT(0), adp->be + ADBE_BLEND_EN4);
+ drm_crtc_vblank_on(crtc);
+}
+
+static void adp_crtc_atomic_disable(struct drm_crtc *crtc,
+ struct drm_atomic_state *state)
+{
+ struct adp_drv_private *adp = crtc_to_adp(crtc);
+ struct drm_crtc_state *old_state = drm_atomic_get_old_crtc_state(state, crtc);
+
+ drm_atomic_helper_disable_planes_on_crtc(old_state, false);
+
+ writel(0x0, adp->be + ADBE_BLEND_EN2);
+ writel(0x0, adp->be + ADBE_BLEND_EN1);
+ writel(0x0, adp->be + ADBE_BLEND_EN3);
+ writel(0x0, adp->be + ADBE_BLEND_BYPASS);
+ writel(0x0, adp->be + ADBE_BLEND_EN4);
+ drm_crtc_vblank_off(crtc);
+}
+
+static void adp_crtc_atomic_flush(struct drm_crtc *crtc,
+ struct drm_atomic_state *state)
+{
+ u32 frame_num = 1;
+ unsigned long flags;
+ struct adp_drv_private *adp = crtc_to_adp(crtc);
+ struct drm_crtc_state *new_state = drm_atomic_get_new_crtc_state(state, crtc);
+ u64 new_size = ALIGN(new_state->mode.hdisplay *
+ new_state->mode.vdisplay * 4, PAGE_SIZE);
+
+ if (new_size != adp->mask_buf_size) {
+ if (adp->mask_buf)
+ dma_free_coherent(crtc->dev->dev, adp->mask_buf_size,
+ adp->mask_buf, adp->mask_iova);
+ adp->mask_buf = NULL;
+ if (new_size != 0) {
+ adp->mask_buf = dma_alloc_coherent(crtc->dev->dev, new_size,
+ &adp->mask_iova, GFP_KERNEL);
+ memset(adp->mask_buf, 0xFF, new_size);
+ writel(adp->mask_iova, adp->be + ADBE_MASK_BUF);
+ }
+ adp->mask_buf_size = new_size;
+ }
+ writel(ADBE_FIFO_SYNC | frame_num, adp->be + ADBE_FIFO);
+ //FIXME: use adbe flush interrupt
+ if (crtc->state->event) {
+ struct drm_pending_vblank_event *event = crtc->state->event;
+
+ crtc->state->event = NULL;
+ spin_lock_irqsave(&crtc->dev->event_lock, flags);
+
+ if (drm_crtc_vblank_get(crtc) != 0)
+ drm_crtc_send_vblank_event(crtc, event);
+ else
+ adp->event = event;
+
+ spin_unlock_irqrestore(&crtc->dev->event_lock, flags);
+ }
+}
+
+static const struct drm_crtc_funcs adp_crtc_funcs = {
+ .destroy = drm_crtc_cleanup,
+ .set_config = drm_atomic_helper_set_config,
+ .page_flip = drm_atomic_helper_page_flip,
+ .reset = drm_atomic_helper_crtc_reset,
+ .atomic_duplicate_state = drm_atomic_helper_crtc_duplicate_state,
+ .atomic_destroy_state = drm_atomic_helper_crtc_destroy_state,
+ .enable_vblank = adp_crtc_enable_vblank,
+ .disable_vblank = adp_crtc_disable_vblank,
+};
+
+
+static const struct drm_crtc_helper_funcs adp_crtc_helper_funcs = {
+ .atomic_enable = adp_crtc_atomic_enable,
+ .atomic_disable = adp_crtc_atomic_disable,
+ .atomic_flush = adp_crtc_atomic_flush,
+};
+
+static int adp_setup_crtc(struct adp_drv_private *adp)
+{
+ struct drm_device *drm = &adp->drm;
+ struct drm_plane *primary;
+ int ret;
+
+ primary = adp_plane_new(adp);
+ if (IS_ERR(primary))
+ return PTR_ERR(primary);
+
+ ret = drm_crtc_init_with_planes(drm, &adp->crtc, primary,
+ NULL, &adp_crtc_funcs, NULL);
+ if (ret)
+ return ret;
+
+ drm_crtc_helper_add(&adp->crtc, &adp_crtc_helper_funcs);
+ return 0;
+}
+
+static const struct drm_mode_config_funcs adp_mode_config_funcs = {
+ .fb_create = drm_gem_fb_create_with_dirty,
+ .atomic_check = drm_atomic_helper_check,
+ .atomic_commit = drm_atomic_helper_commit,
+};
+
+static int adp_setup_mode_config(struct adp_drv_private *adp)
+{
+ struct drm_device *drm = &adp->drm;
+ int ret;
+ u32 size;
+
+ ret = drmm_mode_config_init(drm);
+ if (ret)
+ return ret;
+
+ /*
+ * Query screen size restrict the frame buffer size to the screen size
+ * aligned to the next multiple of 64. This is not necessary but can be
+ * used as simple check for non-desktop devices.
+ * Xorg's modesetting driver does not care about the connector
+ * "non-desktop" property. The max frame buffer width or height can be
+ * easily checked and a device can be reject if the max width/height is
+ * smaller than 120 for example.
+ * Any touchbar daemon is not limited by this small framebuffer size.
+ */
+ size = readl(adp->fe + ADP_SCREEN_SIZE);
+
+ drm->mode_config.min_width = 32;
+ drm->mode_config.min_height = 32;
+ drm->mode_config.max_width = ALIGN(FIELD_GET(ADP_SCREEN_HSIZE, size), 64);
+ drm->mode_config.max_height = ALIGN(FIELD_GET(ADP_SCREEN_VSIZE, size), 64);
+ drm->mode_config.preferred_depth = 24;
+ drm->mode_config.prefer_shadow = 0;
+ drm->mode_config.funcs = &adp_mode_config_funcs;
+
+ ret = adp_setup_crtc(adp);
+ if (ret) {
+ drm_err(drm, "failed to create crtc");
+ return ret;
+ }
+
+ adp->encoder = drmm_plain_encoder_alloc(drm, NULL, DRM_MODE_ENCODER_DSI, NULL);
+ if (IS_ERR(adp->encoder)) {
+ drm_err(drm, "failed to init encoder");
+ return PTR_ERR(adp->encoder);
+ }
+ adp->encoder->possible_crtcs = ALL_CRTCS;
+
+ ret = drm_bridge_attach(adp->encoder, adp->next_bridge, NULL,
+ DRM_BRIDGE_ATTACH_NO_CONNECTOR);
+ if (ret) {
+ drm_err(drm, "failed to init bridge chain");
+ return ret;
+ }
+
+ adp->connector = drm_bridge_connector_init(drm, adp->encoder);
+ if (IS_ERR(adp->connector))
+ return PTR_ERR(adp->connector);
+
+ drm_connector_attach_encoder(adp->connector, adp->encoder);
+
+ ret = drm_vblank_init(drm, drm->mode_config.num_crtc);
+ if (ret < 0) {
+ drm_err(drm, "failed to initialize vblank");
+ return ret;
+ }
+
+ drm_mode_config_reset(drm);
+
+ return 0;
+}
+
+static int adp_parse_of(struct platform_device *pdev, struct adp_drv_private *adp)
+{
+ struct device *dev = &pdev->dev;
+
+ adp->be = devm_platform_ioremap_resource_byname(pdev, "be");
+ if (IS_ERR(adp->be)) {
+ dev_err(dev, "failed to map display backend mmio");
+ return PTR_ERR(adp->be);
+ }
+
+ adp->fe = devm_platform_ioremap_resource_byname(pdev, "fe");
+ if (IS_ERR(adp->fe)) {
+ dev_err(dev, "failed to map display pipe mmio");
+ return PTR_ERR(adp->fe);
+ }
+
+ adp->be_irq = platform_get_irq_byname(pdev, "be");
+ if (adp->be_irq < 0)
+ return adp->be_irq;
+
+ adp->fe_irq = platform_get_irq_byname(pdev, "fe");
+ if (adp->fe_irq < 0)
+ return adp->fe_irq;
+
+ return 0;
+}
+
+static irqreturn_t adp_fe_irq(int irq, void *arg)
+{
+ struct adp_drv_private *adp = (struct adp_drv_private *)arg;
+ u32 int_status;
+ u32 int_ctl;
+
+ int_status = readl(adp->fe + ADP_INT_STATUS);
+ if (int_status & ADP_INT_STATUS_VBLANK) {
+ drm_crtc_handle_vblank(&adp->crtc);
+ spin_lock(&adp->crtc.dev->event_lock);
+ if (adp->event) {
+ int_ctl = readl(adp->fe + ADP_CTRL);
+ if ((int_ctl & 0xF00) == 0x600) {
+ drm_crtc_send_vblank_event(&adp->crtc, adp->event);
+ adp->event = NULL;
+ drm_crtc_vblank_put(&adp->crtc);
+ }
+ }
+ spin_unlock(&adp->crtc.dev->event_lock);
+ }
+
+ writel(int_status, adp->fe + ADP_INT_STATUS);
+
+
+ return IRQ_HANDLED;
+}
+
+static int adp_drm_bind(struct device *dev)
+{
+ struct drm_device *drm = dev_get_drvdata(dev);
+ struct adp_drv_private *adp = to_adp(drm);
+ int err;
+
+ writel(ADP_CTRL_FIFO_ON, adp->fe + ADP_CTRL);
+
+ adp->next_bridge = drmm_of_get_bridge(&adp->drm, dev->of_node, 0, 0);
+ if (IS_ERR(adp->next_bridge)) {
+ dev_err(dev, "failed to find next bridge");
+ return PTR_ERR(adp->next_bridge);
+ }
+
+ err = adp_setup_mode_config(adp);
+ if (err < 0)
+ return err;
+
+ err = request_irq(adp->fe_irq, adp_fe_irq, 0, "adp-fe", adp);
+ if (err)
+ return err;
+
+ err = drm_dev_register(&adp->drm, 0);
+ if (err)
+ return err;
+
+ return 0;
+}
+
+static void adp_drm_unbind(struct device *dev)
+{
+ struct drm_device *drm = dev_get_drvdata(dev);
+ struct adp_drv_private *adp = to_adp(drm);
+
+ drm_dev_unregister(drm);
+ drm_atomic_helper_shutdown(drm);
+ free_irq(adp->fe_irq, adp);
+}
+
+static const struct component_master_ops adp_master_ops = {
+ .bind = adp_drm_bind,
+ .unbind = adp_drm_unbind,
+};
+
+static int compare_dev(struct device *dev, void *data)
+{
+ return dev->of_node == data;
+}
+
+static int adp_probe(struct platform_device *pdev)
+{
+ struct device_node *port;
+ struct component_match *match = NULL;
+ struct adp_drv_private *adp;
+ int err;
+
+ adp = devm_drm_dev_alloc(&pdev->dev, &adp_driver, struct adp_drv_private, drm);
+ if (IS_ERR(adp))
+ return PTR_ERR(adp);
+
+ dev_set_drvdata(&pdev->dev, &adp->drm);
+
+ err = adp_parse_of(pdev, adp);
+ if (err < 0)
+ return err;
+
+ port = of_graph_get_remote_node(pdev->dev.of_node, 0, 0);
+ if (!port)
+ return -ENODEV;
+
+ drm_of_component_match_add(&pdev->dev, &match, compare_dev, port);
+ of_node_put(port);
+
+ return component_master_add_with_match(&pdev->dev, &adp_master_ops, match);
+}
+
+static void adp_remove(struct platform_device *pdev)
+{
+ component_master_del(&pdev->dev, &adp_master_ops);
+ dev_set_drvdata(&pdev->dev, NULL);
+}
+
+static const struct of_device_id adp_of_match[] = {
+ { .compatible = "apple,h7-display-pipe", },
+ { },
+};
+MODULE_DEVICE_TABLE(of, adp_of_match);
+
+static struct platform_driver adp_platform_driver = {
+ .driver = {
+ .name = "adp",
+ .of_match_table = adp_of_match,
+ },
+ .probe = adp_probe,
+ .remove = adp_remove,
+};
+
+module_platform_driver(adp_platform_driver);
+
+MODULE_DESCRIPTION("Apple Display Pipe DRM driver");
+MODULE_LICENSE("GPL");
diff --git a/drivers/gpu/drm/amd/acp/include/acp_gfx_if.h b/drivers/gpu/drm/amd/acp/include/acp_gfx_if.h
index feab8eb7f2a8..b26710cae801 100644
--- a/drivers/gpu/drm/amd/acp/include/acp_gfx_if.h
+++ b/drivers/gpu/drm/amd/acp/include/acp_gfx_if.h
@@ -19,7 +19,7 @@
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*
-*/
+ */
#ifndef _ACP_GFX_IF_H
#define _ACP_GFX_IF_H
diff --git a/drivers/gpu/drm/amd/amdgpu/Kconfig b/drivers/gpu/drm/amd/amdgpu/Kconfig
index 9375e7f12420..7f515be5185d 100644
--- a/drivers/gpu/drm/amd/amdgpu/Kconfig
+++ b/drivers/gpu/drm/amd/amdgpu/Kconfig
@@ -1,47 +1,107 @@
# SPDX-License-Identifier: MIT
+
+config DRM_AMDGPU
+ tristate "AMD GPU"
+ depends on DRM && PCI
+ depends on !UML
+ select FW_LOADER
+ select DRM_CLIENT
+ select DRM_CLIENT_SELECTION
+ select DRM_DISPLAY_DP_HELPER
+ select DRM_DISPLAY_DSC_HELPER
+ select DRM_DISPLAY_HDMI_HELPER
+ select DRM_DISPLAY_HDCP_HELPER
+ select DRM_DISPLAY_HELPER
+ select DRM_KMS_HELPER
+ select DRM_SCHED
+ select DRM_TTM
+ select DRM_TTM_HELPER
+ select POWER_SUPPLY
+ select HWMON
+ select I2C
+ select I2C_ALGOBIT
+ select CRC16
+ select BACKLIGHT_CLASS_DEVICE
+ select INTERVAL_TREE
+ select DRM_BUDDY
+ select DRM_SUBALLOC_HELPER
+ select DRM_EXEC
+ select DRM_PANEL_BACKLIGHT_QUIRKS
+ # amdgpu depends on ACPI_VIDEO when ACPI is enabled, for select to work
+ # ACPI_VIDEO's dependencies must also be selected.
+ select INPUT if ACPI
+ select ACPI_VIDEO if ACPI
+ # On x86 ACPI_VIDEO also needs ACPI_WMI
+ select X86_PLATFORM_DEVICES if ACPI && X86
+ select ACPI_WMI if ACPI && X86
+ help
+ Choose this option if you have a recent AMD Radeon graphics card.
+
+ If M is selected, the module will be called amdgpu.
+
config DRM_AMDGPU_SI
bool "Enable amdgpu support for SI parts"
depends on DRM_AMDGPU
help
- Choose this option if you want to enable experimental support
- for SI asics.
+ Choose this option if you want to enable support
+ for SI (Southern Islands) asics.
- SI is already supported in radeon. Experimental support for SI
- in amdgpu will be disabled by default and is still provided by
- radeon. Use module options to override this:
+ SI (Southern Islands) are first generation GCN GPUs,
+ supported by both drivers: radeon (old) and amdgpu (new).
+ By default, SI dedicated GPUs are supported by amdgpu.
- radeon.si_support=0 amdgpu.si_support=1
+ Use module options to override this:
+ To use radeon for SI,
+ radeon.si_support=1 amdgpu.si_support=0
config DRM_AMDGPU_CIK
bool "Enable amdgpu support for CIK parts"
depends on DRM_AMDGPU
help
- Choose this option if you want to enable support for CIK asics.
+ Choose this option if you want to enable support for CIK (Sea
+ Islands) asics.
- CIK is already supported in radeon. Support for CIK in amdgpu
- will be disabled by default and is still provided by radeon.
- Use module options to override this:
+ CIK (Sea Islands) are second generation GCN GPUs,
+ supported by both drivers: radeon (old) and amdgpu (new).
+ By default,
+ CIK dedicated GPUs are supported by amdgpu
+ CIK APUs are supported by radeon
+ Use module options to override this:
+ To use amdgpu for CIK,
radeon.cik_support=0 amdgpu.cik_support=1
+ To use radeon for CIK,
+ radeon.cik_support=1 amdgpu.cik_support=0
config DRM_AMDGPU_USERPTR
bool "Always enable userptr write support"
depends on DRM_AMDGPU
- depends on MMU
select HMM_MIRROR
select MMU_NOTIFIER
help
This option selects CONFIG_HMM and CONFIG_HMM_MIRROR if it
isn't already selected to enabled full userptr support.
-config DRM_AMDGPU_GART_DEBUGFS
- bool "Allow GART access through debugfs"
+config DRM_AMD_ISP
+ bool "Enable AMD Image Signal Processor IP support"
+ depends on DRM_AMDGPU && ACPI
+ select MFD_CORE
+ select PM_GENERIC_DOMAINS if PM
+ help
+ Choose this option to enable ISP IP support for AMD SOCs.
+ This adds the ISP (Image Signal Processor) IP driver and wires
+ it up into the amdgpu driver. It is required for cameras
+ on APUs which utilize mipi cameras.
+
+config DRM_AMDGPU_WERROR
+ bool "Force the compiler to throw an error instead of a warning when compiling"
depends on DRM_AMDGPU
- depends on DEBUG_FS
+ depends on EXPERT
+ depends on !COMPILE_TEST
default n
help
- Selecting this option creates a debugfs file to inspect the mapped
- pages. Uses more memory for housekeeping, enable only for debugging.
+ Add -Werror to the build flags for amdgpu.ko.
+ Only enable this if you are warning code for amdgpu.ko.
source "drivers/gpu/drm/amd/acp/Kconfig"
source "drivers/gpu/drm/amd/display/Kconfig"
diff --git a/drivers/gpu/drm/amd/amdgpu/Makefile b/drivers/gpu/drm/amd/amdgpu/Makefile
index 13ebb1f71e49..c88760fb52ea 100644
--- a/drivers/gpu/drm/amd/amdgpu/Makefile
+++ b/drivers/gpu/drm/amd/amdgpu/Makefile
@@ -1,5 +1,5 @@
#
-# Copyright 2017 Advanced Micro Devices, Inc.
+# Copyright 2017-2024 Advanced Micro Devices, Inc. All rights reserved.
#
# Permission is hereby granted, free of charge, to any person obtaining a
# copy of this software and associated documentation files (the "Software"),
@@ -23,7 +23,7 @@
# Makefile for the drm device driver. This driver provides support for the
# Direct Rendering Infrastructure (DRI) in XFree86 4.1.0 and higher.
-FULL_AMD_PATH=$(srctree)/$(src)/..
+FULL_AMD_PATH=$(src)/..
DISPLAY_FOLDER_NAME=display
FULL_AMD_DISPLAY_PATH = $(FULL_AMD_PATH)/$(DISPLAY_FOLDER_NAME)
@@ -34,29 +34,42 @@ ccflags-y := -I$(FULL_AMD_PATH)/include/asic_reg \
-I$(FULL_AMD_PATH)/acp/include \
-I$(FULL_AMD_DISPLAY_PATH) \
-I$(FULL_AMD_DISPLAY_PATH)/include \
+ -I$(FULL_AMD_DISPLAY_PATH)/modules/inc \
-I$(FULL_AMD_DISPLAY_PATH)/dc \
-I$(FULL_AMD_DISPLAY_PATH)/amdgpu_dm \
- -I$(FULL_AMD_PATH)/amdkfd
+ -I$(FULL_AMD_PATH)/amdkfd \
+ -I$(FULL_AMD_PATH)/ras/ras_mgr
+
+# Locally disable W=1 warnings enabled in drm subsystem Makefile
+subdir-ccflags-y += -Wno-override-init
+subdir-ccflags-$(CONFIG_DRM_AMDGPU_WERROR) += -Werror
amdgpu-y := amdgpu_drv.o
# add KMS driver
-amdgpu-y += amdgpu_device.o amdgpu_kms.o \
+amdgpu-y += amdgpu_device.o amdgpu_doorbell_mgr.o amdgpu_kms.o \
amdgpu_atombios.o atombios_crtc.o amdgpu_connectors.o \
atom.o amdgpu_fence.o amdgpu_ttm.o amdgpu_object.o amdgpu_gart.o \
amdgpu_encoders.o amdgpu_display.o amdgpu_i2c.o \
- amdgpu_fb.o amdgpu_gem.o amdgpu_ring.o \
- amdgpu_cs.o amdgpu_bios.o amdgpu_benchmark.o amdgpu_test.o \
+ amdgpu_gem.o amdgpu_ring.o \
+ amdgpu_cs.o amdgpu_bios.o amdgpu_benchmark.o \
atombios_dp.o amdgpu_afmt.o amdgpu_trace_points.o \
atombios_encoders.o amdgpu_sa.o atombios_i2c.o \
- amdgpu_dma_buf.o amdgpu_vm.o amdgpu_ib.o amdgpu_pll.o \
+ amdgpu_dma_buf.o amdgpu_vm.o amdgpu_vm_pt.o amdgpu_vm_tlb_fence.o \
+ amdgpu_ib.o amdgpu_pll.o \
amdgpu_ucode.o amdgpu_bo_list.o amdgpu_ctx.o amdgpu_sync.o \
- amdgpu_gtt_mgr.o amdgpu_vram_mgr.o amdgpu_virt.o amdgpu_atomfirmware.o \
- amdgpu_vf_error.o amdgpu_sched.o amdgpu_debugfs.o amdgpu_ids.o \
- amdgpu_gmc.o amdgpu_mmhub.o amdgpu_xgmi.o amdgpu_csa.o amdgpu_ras.o amdgpu_vm_cpu.o \
+ amdgpu_gtt_mgr.o amdgpu_preempt_mgr.o amdgpu_vram_mgr.o amdgpu_virt.o \
+ amdgpu_atomfirmware.o amdgpu_vf_error.o amdgpu_sched.o \
+ amdgpu_debugfs.o amdgpu_ids.o amdgpu_gmc.o amdgpu_mmhub.o amdgpu_hdp.o \
+ amdgpu_xgmi.o amdgpu_csa.o amdgpu_ras.o amdgpu_vm_cpu.o \
amdgpu_vm_sdma.o amdgpu_discovery.o amdgpu_ras_eeprom.o amdgpu_nbio.o \
amdgpu_umc.o smu_v11_0_i2c.o amdgpu_fru_eeprom.o amdgpu_rap.o \
- amdgpu_fw_attestation.o amdgpu_securedisplay.o
+ amdgpu_fw_attestation.o amdgpu_securedisplay.o \
+ amdgpu_eeprom.o amdgpu_mca.o amdgpu_psp_ta.o amdgpu_lsdma.o \
+ amdgpu_ring_mux.o amdgpu_xcp.o amdgpu_seq64.o amdgpu_aca.o amdgpu_dev_coredump.o \
+ amdgpu_cper.o amdgpu_userq_fence.o amdgpu_eviction_fence.o amdgpu_ip.o
+
+amdgpu-$(CONFIG_PROC_FS) += amdgpu_fdinfo.o
amdgpu-$(CONFIG_PERF_EVENTS) += amdgpu_pmu.o
@@ -65,29 +78,37 @@ amdgpu-$(CONFIG_DRM_AMDGPU_CIK)+= cik.o cik_ih.o \
dce_v8_0.o gfx_v7_0.o cik_sdma.o uvd_v4_2.o vce_v2_0.o
amdgpu-$(CONFIG_DRM_AMDGPU_SI)+= si.o gmc_v6_0.o gfx_v6_0.o si_ih.o si_dma.o dce_v6_0.o \
- uvd_v3_1.o
+ uvd_v3_1.o vce_v1_0.o
amdgpu-y += \
vi.o mxgpu_vi.o nbio_v6_1.o soc15.o emu_soc.o mxgpu_ai.o nbio_v7_0.o vega10_reg_init.o \
- vega20_reg_init.o nbio_v7_4.o nbio_v2_3.o nv.o navi10_reg_init.o navi14_reg_init.o \
- arct_reg_init.o navi12_reg_init.o mxgpu_nv.o sienna_cichlid_reg_init.o vangogh_reg_init.o \
- nbio_v7_2.o dimgrey_cavefish_reg_init.o hdp_v4_0.o hdp_v5_0.o
+ vega20_reg_init.o nbio_v7_4.o nbio_v2_3.o nv.o arct_reg_init.o mxgpu_nv.o \
+ nbio_v7_2.o hdp_v4_0.o hdp_v5_0.o aldebaran_reg_init.o aldebaran.o soc21.o soc24.o \
+ sienna_cichlid.o smu_v13_0_10.o nbio_v4_3.o hdp_v6_0.o nbio_v7_7.o hdp_v5_2.o lsdma_v6_0.o \
+ nbio_v7_9.o aqua_vanjaram.o nbio_v7_11.o lsdma_v7_0.o hdp_v7_0.o nbif_v6_3_1.o \
+ cyan_skillfish_reg_init.o
# add DF block
amdgpu-y += \
df_v1_7.o \
- df_v3_6.o
+ df_v3_6.o \
+ df_v4_3.o \
+ df_v4_6_2.o \
+ df_v4_15.o
# add GMC block
amdgpu-y += \
gmc_v7_0.o \
gmc_v8_0.o \
gfxhub_v1_0.o mmhub_v1_0.o gmc_v9_0.o gfxhub_v1_1.o mmhub_v9_4.o \
- gfxhub_v2_0.o mmhub_v2_0.o gmc_v10_0.o gfxhub_v2_1.o mmhub_v2_3.o
+ gfxhub_v2_0.o mmhub_v2_0.o gmc_v10_0.o gfxhub_v2_1.o mmhub_v2_3.o \
+ mmhub_v1_7.o gfxhub_v3_0.o mmhub_v3_0.o mmhub_v3_0_2.o gmc_v11_0.o \
+ mmhub_v3_0_1.o gfxhub_v3_0_3.o gfxhub_v1_2.o mmhub_v1_8.o mmhub_v3_3.o \
+ gfxhub_v11_5_0.o mmhub_v4_1_0.o gfxhub_v12_0.o gmc_v12_0.o
# add UMC block
amdgpu-y += \
- umc_v6_1.o umc_v6_0.o umc_v8_7.o
+ umc_v6_0.o umc_v6_1.o umc_v6_7.o umc_v8_7.o umc_v8_10.o umc_v12_0.o umc_v8_14.o
# add IH block
amdgpu-y += \
@@ -98,7 +119,10 @@ amdgpu-y += \
cz_ih.o \
vega10_ih.o \
vega20_ih.o \
- navi10_ih.o
+ navi10_ih.o \
+ ih_v6_0.o \
+ ih_v6_1.o \
+ ih_v7_0.o
# add PSP block
amdgpu-y += \
@@ -106,13 +130,16 @@ amdgpu-y += \
psp_v3_1.o \
psp_v10_0.o \
psp_v11_0.o \
- psp_v12_0.o
+ psp_v11_0_8.o \
+ psp_v12_0.o \
+ psp_v13_0.o \
+ psp_v13_0_4.o \
+ psp_v14_0.o
# add DCE block
amdgpu-y += \
dce_v10_0.o \
- dce_v11_0.o \
- dce_virtual.o
+ amdgpu_vkms.o
# add GFX block
amdgpu-y += \
@@ -121,7 +148,15 @@ amdgpu-y += \
gfx_v8_0.o \
gfx_v9_0.o \
gfx_v9_4.o \
- gfx_v10_0.o
+ gfx_v9_4_2.o \
+ gfx_v9_4_3.o \
+ gfx_v10_0.o \
+ imu_v11_0.o \
+ gfx_v11_0.o \
+ gfx_v11_0_3.o \
+ imu_v11_0_3.o \
+ gfx_v12_0.o \
+ imu_v12_0.o
# add async DMA block
amdgpu-y += \
@@ -129,12 +164,21 @@ amdgpu-y += \
sdma_v2_4.o \
sdma_v3_0.o \
sdma_v4_0.o \
+ sdma_v4_4.o \
+ sdma_v4_4_2.o \
sdma_v5_0.o \
- sdma_v5_2.o
+ sdma_v5_2.o \
+ sdma_v6_0.o \
+ sdma_v7_0.o
# add MES block
amdgpu-y += \
- mes_v10_1.o
+ amdgpu_mes.o \
+ mes_v11_0.o \
+ mes_v12_0.o \
+
+# add GFX userqueue support
+amdgpu-y += mes_userqueue.o
# add UVD block
amdgpu-y += \
@@ -152,31 +196,70 @@ amdgpu-y += \
# add VCN and JPEG block
amdgpu-y += \
amdgpu_vcn.o \
+ vcn_sw_ring.o \
vcn_v1_0.o \
vcn_v2_0.o \
vcn_v2_5.o \
vcn_v3_0.o \
+ vcn_v4_0.o \
+ vcn_v4_0_3.o \
+ vcn_v4_0_5.o \
+ vcn_v5_0_0.o \
+ vcn_v5_0_1.o \
amdgpu_jpeg.o \
jpeg_v1_0.o \
jpeg_v2_0.o \
jpeg_v2_5.o \
- jpeg_v3_0.o
+ jpeg_v3_0.o \
+ jpeg_v4_0.o \
+ jpeg_v4_0_3.o \
+ jpeg_v4_0_5.o \
+ jpeg_v5_0_0.o \
+ jpeg_v5_0_1.o
+
+# add VPE block
+amdgpu-y += \
+ amdgpu_vpe.o \
+ vpe_v6_1.o
+# add UMSCH block
+amdgpu-y += \
+ amdgpu_umsch_mm.o \
+ umsch_mm_v4_0.o
+
+#
# add ATHUB block
amdgpu-y += \
athub_v1_0.o \
athub_v2_0.o \
- athub_v2_1.o
+ athub_v2_1.o \
+ athub_v3_0.o \
+ athub_v4_1_0.o
# add SMUIO block
amdgpu-y += \
smuio_v9_0.o \
smuio_v11_0.o \
- smuio_v11_0_6.o
+ smuio_v11_0_6.o \
+ smuio_v13_0.o \
+ smuio_v13_0_3.o \
+ smuio_v13_0_6.o \
+ smuio_v14_0_2.o
+
+# add reset block
+amdgpu-y += \
+ amdgpu_reset.o
+
+# add MCA block
+amdgpu-y += \
+ mca_v3_0.o
# add amdkfd interfaces
amdgpu-y += amdgpu_amdkfd.o
+# add gfx usermode queue
+amdgpu-y += amdgpu_userq.o
+
ifneq ($(CONFIG_HSA_AMD),)
AMDKFD_PATH := ../amdkfd
include $(FULL_AMD_PATH)/amdkfd/Makefile
@@ -187,8 +270,12 @@ amdgpu-y += \
amdgpu_amdkfd_gfx_v8.o \
amdgpu_amdkfd_gfx_v9.o \
amdgpu_amdkfd_arcturus.o \
+ amdgpu_amdkfd_aldebaran.o \
+ amdgpu_amdkfd_gc_9_4_3.o \
amdgpu_amdkfd_gfx_v10.o \
- amdgpu_amdkfd_gfx_v10_3.o
+ amdgpu_amdkfd_gfx_v10_3.o \
+ amdgpu_amdkfd_gfx_v11.o \
+ amdgpu_amdkfd_gfx_v12.o
ifneq ($(CONFIG_DRM_AMDGPU_CIK),)
amdgpu-y += amdgpu_amdkfd_gfx_v7.o
@@ -215,7 +302,7 @@ endif
amdgpu-$(CONFIG_COMPAT) += amdgpu_ioc32.o
amdgpu-$(CONFIG_VGA_SWITCHEROO) += amdgpu_atpx_handler.o
amdgpu-$(CONFIG_ACPI) += amdgpu_acpi.o
-amdgpu-$(CONFIG_HMM_MIRROR) += amdgpu_mn.o
+amdgpu-$(CONFIG_HMM_MIRROR) += amdgpu_hmm.o
include $(FULL_AMD_PATH)/pm/Makefile
@@ -230,4 +317,17 @@ amdgpu-y += $(AMD_DISPLAY_FILES)
endif
+# add isp block
+ifneq ($(CONFIG_DRM_AMD_ISP),)
+amdgpu-y += \
+ amdgpu_isp.o \
+ isp_v4_1_0.o \
+ isp_v4_1_1.o
+endif
+
+AMD_GPU_RAS_PATH := ../ras
+AMD_GPU_RAS_FULL_PATH := $(FULL_AMD_PATH)/ras
+include $(AMD_GPU_RAS_FULL_PATH)/Makefile
+amdgpu-y += $(AMD_GPU_RAS_FILES)
+
obj-$(CONFIG_DRM_AMDGPU)+= amdgpu.o
diff --git a/drivers/gpu/drm/amd/amdgpu/ObjectID.h b/drivers/gpu/drm/amd/amdgpu/ObjectID.h
index 5b393622f592..a0f0a17e224f 100644
--- a/drivers/gpu/drm/amd/amdgpu/ObjectID.h
+++ b/drivers/gpu/drm/amd/amdgpu/ObjectID.h
@@ -119,6 +119,7 @@
#define CONNECTOR_OBJECT_ID_eDP 0x14
#define CONNECTOR_OBJECT_ID_MXM 0x15
#define CONNECTOR_OBJECT_ID_LVDS_eDP 0x16
+#define CONNECTOR_OBJECT_ID_USBC 0x17
/* deleted */
diff --git a/drivers/gpu/drm/amd/amdgpu/aldebaran.c b/drivers/gpu/drm/amd/amdgpu/aldebaran.c
new file mode 100644
index 000000000000..daa7b23bc775
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/aldebaran.c
@@ -0,0 +1,473 @@
+/*
+ * Copyright 2021 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#include "aldebaran.h"
+#include "amdgpu_reset.h"
+#include "amdgpu_amdkfd.h"
+#include "amdgpu_dpm.h"
+#include "amdgpu_job.h"
+#include "amdgpu_ring.h"
+#include "amdgpu_ras.h"
+#include "amdgpu_psp.h"
+#include "amdgpu_xgmi.h"
+
+static bool aldebaran_is_mode2_default(struct amdgpu_reset_control *reset_ctl)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)reset_ctl->handle;
+
+ if ((amdgpu_ip_version(adev, MP1_HWIP, 0) == IP_VERSION(13, 0, 2) &&
+ adev->gmc.xgmi.connected_to_cpu))
+ return true;
+
+ return false;
+}
+
+static struct amdgpu_reset_handler *
+aldebaran_get_reset_handler(struct amdgpu_reset_control *reset_ctl,
+ struct amdgpu_reset_context *reset_context)
+{
+ struct amdgpu_reset_handler *handler;
+ struct amdgpu_device *adev = (struct amdgpu_device *)reset_ctl->handle;
+ int i;
+
+ if (reset_context->method == AMD_RESET_METHOD_NONE) {
+ if (aldebaran_is_mode2_default(reset_ctl))
+ reset_context->method = AMD_RESET_METHOD_MODE2;
+ else
+ reset_context->method = amdgpu_asic_reset_method(adev);
+ }
+
+ if (reset_context->method != AMD_RESET_METHOD_NONE) {
+ dev_dbg(adev->dev, "Getting reset handler for method %d\n",
+ reset_context->method);
+ for_each_handler(i, handler, reset_ctl) {
+ if (handler->reset_method == reset_context->method)
+ return handler;
+ }
+ }
+
+ dev_dbg(adev->dev, "Reset handler not found!\n");
+
+ return NULL;
+}
+
+static inline uint32_t aldebaran_get_ip_block_mask(struct amdgpu_device *adev)
+{
+ uint32_t ip_block_mask = BIT(AMD_IP_BLOCK_TYPE_GFX) |
+ BIT(AMD_IP_BLOCK_TYPE_SDMA);
+
+ if (adev->aid_mask)
+ ip_block_mask |= BIT(AMD_IP_BLOCK_TYPE_IH);
+
+ return ip_block_mask;
+}
+
+static int aldebaran_mode2_suspend_ip(struct amdgpu_device *adev)
+{
+ uint32_t ip_block_mask = aldebaran_get_ip_block_mask(adev);
+ uint32_t ip_block;
+ int r, i;
+
+ /* Skip suspend of SDMA IP versions >= 4.4.2. They are multi-aid */
+ if (adev->aid_mask)
+ ip_block_mask &= ~BIT(AMD_IP_BLOCK_TYPE_SDMA);
+
+ amdgpu_device_set_pg_state(adev, AMD_PG_STATE_UNGATE);
+ amdgpu_device_set_cg_state(adev, AMD_CG_STATE_UNGATE);
+
+ for (i = adev->num_ip_blocks - 1; i >= 0; i--) {
+ ip_block = BIT(adev->ip_blocks[i].version->type);
+ if (!(ip_block_mask & ip_block))
+ continue;
+
+ r = amdgpu_ip_block_suspend(&adev->ip_blocks[i]);
+ if (r)
+ return r;
+ }
+
+ return 0;
+}
+
+static int
+aldebaran_mode2_prepare_hwcontext(struct amdgpu_reset_control *reset_ctl,
+ struct amdgpu_reset_context *reset_context)
+{
+ int r = 0;
+ struct amdgpu_device *adev = (struct amdgpu_device *)reset_ctl->handle;
+
+ dev_dbg(adev->dev, "Aldebaran prepare hw context\n");
+ /* Don't suspend on bare metal if we are not going to HW reset the ASIC */
+ if (!amdgpu_sriov_vf(adev))
+ r = aldebaran_mode2_suspend_ip(adev);
+
+ return r;
+}
+
+static void aldebaran_async_reset(struct work_struct *work)
+{
+ struct amdgpu_reset_handler *handler;
+ struct amdgpu_reset_control *reset_ctl =
+ container_of(work, struct amdgpu_reset_control, reset_work);
+ struct amdgpu_device *adev = (struct amdgpu_device *)reset_ctl->handle;
+ int i;
+
+ for_each_handler(i, handler, reset_ctl) {
+ if (handler->reset_method == reset_ctl->active_reset) {
+ dev_dbg(adev->dev, "Resetting device\n");
+ handler->do_reset(adev);
+ break;
+ }
+ }
+}
+
+static int aldebaran_mode2_reset(struct amdgpu_device *adev)
+{
+ /* disable BM */
+ pci_clear_master(adev->pdev);
+ adev->asic_reset_res = amdgpu_dpm_mode2_reset(adev);
+ return adev->asic_reset_res;
+}
+
+static int
+aldebaran_mode2_perform_reset(struct amdgpu_reset_control *reset_ctl,
+ struct amdgpu_reset_context *reset_context)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)reset_ctl->handle;
+ struct list_head *reset_device_list = reset_context->reset_device_list;
+ struct amdgpu_device *tmp_adev = NULL;
+ int r = 0;
+
+ dev_dbg(adev->dev, "aldebaran perform hw reset\n");
+
+ if (reset_device_list == NULL)
+ return -EINVAL;
+
+ if (amdgpu_ip_version(adev, MP1_HWIP, 0) == IP_VERSION(13, 0, 2) &&
+ reset_context->hive == NULL) {
+ /* Wrong context, return error */
+ return -EINVAL;
+ }
+
+ list_for_each_entry(tmp_adev, reset_device_list, reset_list) {
+ mutex_lock(&tmp_adev->reset_cntl->reset_lock);
+ tmp_adev->reset_cntl->active_reset = AMD_RESET_METHOD_MODE2;
+ }
+ /*
+ * Mode2 reset doesn't need any sync between nodes in XGMI hive, instead launch
+ * them together so that they can be completed asynchronously on multiple nodes
+ */
+ list_for_each_entry(tmp_adev, reset_device_list, reset_list) {
+ /* For XGMI run all resets in parallel to speed up the process */
+ if (tmp_adev->gmc.xgmi.num_physical_nodes > 1) {
+ if (!queue_work(system_unbound_wq,
+ &tmp_adev->reset_cntl->reset_work))
+ r = -EALREADY;
+ } else
+ r = aldebaran_mode2_reset(tmp_adev);
+ if (r) {
+ dev_err(tmp_adev->dev,
+ "ASIC reset failed with error, %d for drm dev, %s",
+ r, adev_to_drm(tmp_adev)->unique);
+ break;
+ }
+ }
+
+ /* For XGMI wait for all resets to complete before proceed */
+ if (!r) {
+ list_for_each_entry(tmp_adev, reset_device_list, reset_list) {
+ if (tmp_adev->gmc.xgmi.num_physical_nodes > 1) {
+ flush_work(&tmp_adev->reset_cntl->reset_work);
+ r = tmp_adev->asic_reset_res;
+ if (r)
+ break;
+ }
+ }
+ }
+
+ list_for_each_entry(tmp_adev, reset_device_list, reset_list) {
+ mutex_unlock(&tmp_adev->reset_cntl->reset_lock);
+ tmp_adev->reset_cntl->active_reset = AMD_RESET_METHOD_NONE;
+ }
+
+ return r;
+}
+
+static int aldebaran_mode2_restore_ip(struct amdgpu_device *adev)
+{
+ struct amdgpu_firmware_info *ucode_list[AMDGPU_UCODE_ID_MAXIMUM];
+ uint32_t ip_block_mask = aldebaran_get_ip_block_mask(adev);
+ struct amdgpu_firmware_info *ucode;
+ struct amdgpu_ip_block *cmn_block;
+ struct amdgpu_ip_block *ih_block;
+ int ucode_count = 0;
+ int i, r;
+
+ dev_dbg(adev->dev, "Reloading ucodes after reset\n");
+ for (i = 0; i < adev->firmware.max_ucodes; i++) {
+ ucode = &adev->firmware.ucode[i];
+ if (!ucode->fw)
+ continue;
+ switch (ucode->ucode_id) {
+ case AMDGPU_UCODE_ID_SDMA0:
+ case AMDGPU_UCODE_ID_SDMA1:
+ case AMDGPU_UCODE_ID_SDMA2:
+ case AMDGPU_UCODE_ID_SDMA3:
+ case AMDGPU_UCODE_ID_SDMA4:
+ case AMDGPU_UCODE_ID_SDMA5:
+ case AMDGPU_UCODE_ID_SDMA6:
+ case AMDGPU_UCODE_ID_SDMA7:
+ case AMDGPU_UCODE_ID_CP_MEC1:
+ case AMDGPU_UCODE_ID_CP_MEC1_JT:
+ case AMDGPU_UCODE_ID_RLC_RESTORE_LIST_CNTL:
+ case AMDGPU_UCODE_ID_RLC_RESTORE_LIST_GPM_MEM:
+ case AMDGPU_UCODE_ID_RLC_RESTORE_LIST_SRM_MEM:
+ case AMDGPU_UCODE_ID_RLC_G:
+ ucode_list[ucode_count++] = ucode;
+ break;
+ default:
+ break;
+ }
+ }
+
+ /* Reinit NBIF block */
+ cmn_block =
+ amdgpu_device_ip_get_ip_block(adev, AMD_IP_BLOCK_TYPE_COMMON);
+ if (unlikely(!cmn_block)) {
+ dev_err(adev->dev, "Failed to get BIF handle\n");
+ return -EINVAL;
+ }
+ r = amdgpu_ip_block_resume(cmn_block);
+ if (r)
+ return r;
+
+ if (ip_block_mask & BIT(AMD_IP_BLOCK_TYPE_IH)) {
+ ih_block = amdgpu_device_ip_get_ip_block(adev,
+ AMD_IP_BLOCK_TYPE_IH);
+ if (unlikely(!ih_block)) {
+ dev_err(adev->dev, "Failed to get IH handle\n");
+ return -EINVAL;
+ }
+ r = amdgpu_ip_block_resume(ih_block);
+ if (r)
+ return r;
+ }
+
+ /* Reinit GFXHUB */
+ adev->gfxhub.funcs->init(adev);
+ r = adev->gfxhub.funcs->gart_enable(adev);
+ if (r) {
+ dev_err(adev->dev, "GFXHUB gart reenable failed after reset\n");
+ return r;
+ }
+
+ /* Reload GFX firmware */
+ r = psp_load_fw_list(&adev->psp, ucode_list, ucode_count);
+ if (r) {
+ dev_err(adev->dev, "GFX ucode load failed after reset\n");
+ return r;
+ }
+
+ /* Resume RLC, FW needs RLC alive to complete reset process */
+ adev->gfx.rlc.funcs->resume(adev);
+
+ /* Wait for FW reset event complete */
+ r = amdgpu_dpm_wait_for_event(adev, SMU_EVENT_RESET_COMPLETE, 0);
+ if (r) {
+ dev_err(adev->dev,
+ "Failed to get response from firmware after reset\n");
+ return r;
+ }
+
+ for (i = 0; i < adev->num_ip_blocks; i++) {
+ if (!(adev->ip_blocks[i].version->type ==
+ AMD_IP_BLOCK_TYPE_GFX ||
+ adev->ip_blocks[i].version->type ==
+ AMD_IP_BLOCK_TYPE_SDMA))
+ continue;
+
+ r = amdgpu_ip_block_resume(&adev->ip_blocks[i]);
+ if (r)
+ return r;
+ }
+
+ for (i = 0; i < adev->num_ip_blocks; i++) {
+ if (!(adev->ip_blocks[i].version->type ==
+ AMD_IP_BLOCK_TYPE_GFX ||
+ adev->ip_blocks[i].version->type ==
+ AMD_IP_BLOCK_TYPE_SDMA ||
+ adev->ip_blocks[i].version->type ==
+ AMD_IP_BLOCK_TYPE_COMMON))
+ continue;
+
+ if (adev->ip_blocks[i].version->funcs->late_init) {
+ r = adev->ip_blocks[i].version->funcs->late_init(
+ &adev->ip_blocks[i]);
+ if (r) {
+ dev_err(adev->dev,
+ "late_init of IP block <%s> failed %d after reset\n",
+ adev->ip_blocks[i].version->funcs->name,
+ r);
+ return r;
+ }
+ }
+ adev->ip_blocks[i].status.late_initialized = true;
+ }
+
+ amdgpu_device_set_cg_state(adev, AMD_CG_STATE_GATE);
+ amdgpu_device_set_pg_state(adev, AMD_PG_STATE_GATE);
+
+ return r;
+}
+
+static int
+aldebaran_mode2_restore_hwcontext(struct amdgpu_reset_control *reset_ctl,
+ struct amdgpu_reset_context *reset_context)
+{
+ struct list_head *reset_device_list = reset_context->reset_device_list;
+ struct amdgpu_device *tmp_adev = NULL;
+ struct amdgpu_ras *con;
+ int r;
+
+ if (reset_device_list == NULL)
+ return -EINVAL;
+
+ if (amdgpu_ip_version(reset_context->reset_req_dev, MP1_HWIP, 0) ==
+ IP_VERSION(13, 0, 2) &&
+ reset_context->hive == NULL) {
+ /* Wrong context, return error */
+ return -EINVAL;
+ }
+
+ list_for_each_entry(tmp_adev, reset_device_list, reset_list) {
+ amdgpu_set_init_level(tmp_adev,
+ AMDGPU_INIT_LEVEL_RESET_RECOVERY);
+ dev_info(tmp_adev->dev,
+ "GPU reset succeeded, trying to resume\n");
+ /*TBD: Ideally should clear only GFX, SDMA blocks*/
+ amdgpu_ras_clear_err_state(tmp_adev);
+ r = aldebaran_mode2_restore_ip(tmp_adev);
+ if (r)
+ goto end;
+
+ /*
+ * Add this ASIC as tracked as reset was already
+ * complete successfully.
+ */
+ amdgpu_register_gpu_instance(tmp_adev);
+
+ /* Resume RAS, ecc_irq */
+ con = amdgpu_ras_get_context(tmp_adev);
+ if (!amdgpu_sriov_vf(tmp_adev) && con) {
+ if (tmp_adev->sdma.ras &&
+ tmp_adev->sdma.ras->ras_block.ras_late_init) {
+ r = tmp_adev->sdma.ras->ras_block.ras_late_init(tmp_adev,
+ &tmp_adev->sdma.ras->ras_block.ras_comm);
+ if (r) {
+ dev_err(tmp_adev->dev, "SDMA failed to execute ras_late_init! ret:%d\n", r);
+ goto end;
+ }
+ }
+
+ if (tmp_adev->gfx.ras &&
+ tmp_adev->gfx.ras->ras_block.ras_late_init) {
+ r = tmp_adev->gfx.ras->ras_block.ras_late_init(tmp_adev,
+ &tmp_adev->gfx.ras->ras_block.ras_comm);
+ if (r) {
+ dev_err(tmp_adev->dev, "GFX failed to execute ras_late_init! ret:%d\n", r);
+ goto end;
+ }
+ }
+ }
+
+ amdgpu_ras_resume(tmp_adev);
+
+ /* Update PSP FW topology after reset */
+ if (reset_context->hive &&
+ tmp_adev->gmc.xgmi.num_physical_nodes > 1)
+ r = amdgpu_xgmi_update_topology(reset_context->hive,
+ tmp_adev);
+
+ if (!r) {
+ amdgpu_set_init_level(tmp_adev,
+ AMDGPU_INIT_LEVEL_DEFAULT);
+ amdgpu_irq_gpu_reset_resume_helper(tmp_adev);
+
+ r = amdgpu_ib_ring_tests(tmp_adev);
+ if (r) {
+ dev_err(tmp_adev->dev,
+ "ib ring test failed (%d).\n", r);
+ r = -EAGAIN;
+ tmp_adev->asic_reset_res = r;
+ goto end;
+ }
+ }
+ }
+
+end:
+ return r;
+}
+
+static struct amdgpu_reset_handler aldebaran_mode2_handler = {
+ .reset_method = AMD_RESET_METHOD_MODE2,
+ .prepare_env = NULL,
+ .prepare_hwcontext = aldebaran_mode2_prepare_hwcontext,
+ .perform_reset = aldebaran_mode2_perform_reset,
+ .restore_hwcontext = aldebaran_mode2_restore_hwcontext,
+ .restore_env = NULL,
+ .do_reset = aldebaran_mode2_reset,
+};
+
+static struct amdgpu_reset_handler
+ *aldebaran_rst_handlers[AMDGPU_RESET_MAX_HANDLERS] = {
+ &aldebaran_mode2_handler,
+ &xgmi_reset_on_init_handler,
+ };
+
+int aldebaran_reset_init(struct amdgpu_device *adev)
+{
+ struct amdgpu_reset_control *reset_ctl;
+
+ reset_ctl = kzalloc(sizeof(*reset_ctl), GFP_KERNEL);
+ if (!reset_ctl)
+ return -ENOMEM;
+
+ reset_ctl->handle = adev;
+ reset_ctl->async_reset = aldebaran_async_reset;
+ reset_ctl->active_reset = AMD_RESET_METHOD_NONE;
+ reset_ctl->get_reset_handler = aldebaran_get_reset_handler;
+
+ INIT_WORK(&reset_ctl->reset_work, reset_ctl->async_reset);
+ /* Only mode2 is handled through reset control now */
+ reset_ctl->reset_handlers = &aldebaran_rst_handlers;
+
+ adev->reset_cntl = reset_ctl;
+
+ return 0;
+}
+
+int aldebaran_reset_fini(struct amdgpu_device *adev)
+{
+ kfree(adev->reset_cntl);
+ adev->reset_cntl = NULL;
+ return 0;
+}
diff --git a/drivers/gpu/drm/amd/amdgpu/aldebaran.h b/drivers/gpu/drm/amd/amdgpu/aldebaran.h
new file mode 100644
index 000000000000..a07db5454d49
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/aldebaran.h
@@ -0,0 +1,32 @@
+/*
+ * Copyright 2021 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#ifndef __ALDEBARAN_H__
+#define __ALDEBARAN_H__
+
+#include "amdgpu.h"
+
+int aldebaran_reset_init(struct amdgpu_device *adev);
+int aldebaran_reset_fini(struct amdgpu_device *adev);
+
+#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/aldebaran_reg_init.c b/drivers/gpu/drm/amd/amdgpu/aldebaran_reg_init.c
new file mode 100644
index 000000000000..28e6c9ab8767
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/aldebaran_reg_init.c
@@ -0,0 +1,54 @@
+/*
+ * Copyright 2020 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+#include "amdgpu.h"
+#include "soc15.h"
+
+#include "soc15_common.h"
+#include "aldebaran_ip_offset.h"
+
+int aldebaran_reg_base_init(struct amdgpu_device *adev)
+{
+ /* HW has more IP blocks, only initialized the block needed by our driver */
+ uint32_t i;
+ for (i = 0 ; i < MAX_INSTANCE ; ++i) {
+ adev->reg_offset[GC_HWIP][i] = (uint32_t *)(&(GC_BASE.instance[i]));
+ adev->reg_offset[HDP_HWIP][i] = (uint32_t *)(&(HDP_BASE.instance[i]));
+ adev->reg_offset[MMHUB_HWIP][i] = (uint32_t *)(&(MMHUB_BASE.instance[i]));
+ adev->reg_offset[ATHUB_HWIP][i] = (uint32_t *)(&(ATHUB_BASE.instance[i]));
+ adev->reg_offset[NBIO_HWIP][i] = (uint32_t *)(&(NBIO_BASE.instance[i]));
+ adev->reg_offset[MP0_HWIP][i] = (uint32_t *)(&(MP0_BASE.instance[i]));
+ adev->reg_offset[MP1_HWIP][i] = (uint32_t *)(&(MP1_BASE.instance[i]));
+ adev->reg_offset[DF_HWIP][i] = (uint32_t *)(&(DF_BASE.instance[i]));
+ adev->reg_offset[OSSSYS_HWIP][i] = (uint32_t *)(&(OSSSYS_BASE.instance[i]));
+ adev->reg_offset[SDMA0_HWIP][i] = (uint32_t *)(&(SDMA0_BASE.instance[i]));
+ adev->reg_offset[SDMA1_HWIP][i] = (uint32_t *)(&(SDMA1_BASE.instance[i]));
+ adev->reg_offset[SDMA2_HWIP][i] = (uint32_t *)(&(SDMA2_BASE.instance[i]));
+ adev->reg_offset[SDMA3_HWIP][i] = (uint32_t *)(&(SDMA3_BASE.instance[i]));
+ adev->reg_offset[SDMA4_HWIP][i] = (uint32_t *)(&(SDMA4_BASE.instance[i]));
+ adev->reg_offset[SMUIO_HWIP][i] = (uint32_t *)(&(SMUIO_BASE.instance[i]));
+ adev->reg_offset[THM_HWIP][i] = (uint32_t *)(&(THM_BASE.instance[i]));
+ adev->reg_offset[UMC_HWIP][i] = (uint32_t *)(&(UMC_BASE.instance[i]));
+ adev->reg_offset[VCN_HWIP][i] = (uint32_t *)(&(VCN_BASE.instance[i]));
+ }
+ return 0;
+}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
index b6879d97c9c9..9f9774f58ce1 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -50,23 +50,20 @@
#include <linux/hashtable.h>
#include <linux/dma-fence.h>
#include <linux/pci.h>
-#include <linux/aer.h>
-#include <drm/ttm/ttm_bo_api.h>
-#include <drm/ttm/ttm_bo_driver.h>
+#include <drm/ttm/ttm_bo.h>
#include <drm/ttm/ttm_placement.h>
-#include <drm/ttm/ttm_execbuf_util.h>
#include <drm/amdgpu_drm.h>
#include <drm/drm_gem.h>
#include <drm/drm_ioctl.h>
-#include <drm/gpu_scheduler.h>
#include <kgd_kfd_interface.h>
#include "dm_pp_interface.h"
#include "kgd_pp_interface.h"
#include "amd_shared.h"
+#include "amdgpu_utils.h"
#include "amdgpu_mode.h"
#include "amdgpu_ih.h"
#include "amdgpu_irq.h"
@@ -83,15 +80,18 @@
#include "amdgpu_vce.h"
#include "amdgpu_vcn.h"
#include "amdgpu_jpeg.h"
-#include "amdgpu_mn.h"
+#include "amdgpu_vpe.h"
+#include "amdgpu_umsch_mm.h"
#include "amdgpu_gmc.h"
#include "amdgpu_gfx.h"
#include "amdgpu_sdma.h"
+#include "amdgpu_lsdma.h"
#include "amdgpu_nbio.h"
#include "amdgpu_hdp.h"
#include "amdgpu_dm.h"
#include "amdgpu_virt.h"
#include "amdgpu_csa.h"
+#include "amdgpu_mes_ctx.h"
#include "amdgpu_gart.h"
#include "amdgpu_debugfs.h"
#include "amdgpu_job.h"
@@ -99,7 +99,6 @@
#include "amdgpu_gem.h"
#include "amdgpu_doorbell.h"
#include "amdgpu_amdkfd.h"
-#include "amdgpu_smu.h"
#include "amdgpu_discovery.h"
#include "amdgpu_mes.h"
#include "amdgpu_umc.h"
@@ -107,18 +106,30 @@
#include "amdgpu_gfxhub.h"
#include "amdgpu_df.h"
#include "amdgpu_smuio.h"
-#include "amdgpu_hdp.h"
+#include "amdgpu_fdinfo.h"
+#include "amdgpu_mca.h"
+#include "amdgpu_aca.h"
+#include "amdgpu_ras.h"
+#include "amdgpu_cper.h"
+#include "amdgpu_xcp.h"
+#include "amdgpu_seq64.h"
+#include "amdgpu_reg_state.h"
+#include "amdgpu_userq.h"
+#include "amdgpu_eviction_fence.h"
+#if defined(CONFIG_DRM_AMD_ISP)
+#include "amdgpu_isp.h"
+#endif
-#define MAX_GPU_INSTANCE 16
+#define MAX_GPU_INSTANCE 64
-struct amdgpu_gpu_instance
-{
+#define GFX_SLICE_PERIOD_MS 250
+
+struct amdgpu_gpu_instance {
struct amdgpu_device *adev;
int mgpu_fan_enabled;
};
-struct amdgpu_mgpu_info
-{
+struct amdgpu_mgpu_info {
struct amdgpu_gpu_instance gpu_ins[MAX_GPU_INSTANCE];
struct mutex mutex;
uint32_t num_gpu;
@@ -126,19 +137,37 @@ struct amdgpu_mgpu_info
uint32_t num_apu;
};
+enum amdgpu_ss {
+ AMDGPU_SS_DRV_LOAD,
+ AMDGPU_SS_DEV_D0,
+ AMDGPU_SS_DEV_D3,
+ AMDGPU_SS_DRV_UNLOAD
+};
+
+struct amdgpu_hwip_reg_entry {
+ u32 hwip;
+ u32 inst;
+ u32 seg;
+ u32 reg_offset;
+ const char *reg_name;
+};
+
+struct amdgpu_watchdog_timer {
+ bool timeout_fatal_disable;
+ uint32_t period; /* maxCycles = (1 << period), the number of cycles before a timeout */
+};
+
#define AMDGPU_MAX_TIMEOUT_PARAM_LENGTH 256
/*
* Modules parameters.
*/
extern int amdgpu_modeset;
-extern int amdgpu_vram_limit;
+extern unsigned int amdgpu_vram_limit;
extern int amdgpu_vis_vram_limit;
extern int amdgpu_gart_size;
extern int amdgpu_gtt_size;
extern int amdgpu_moverate;
-extern int amdgpu_benchmarking;
-extern int amdgpu_testing;
extern int amdgpu_audio;
extern int amdgpu_disp_priority;
extern int amdgpu_hw_i2c;
@@ -164,40 +193,59 @@ extern int amdgpu_sched_jobs;
extern int amdgpu_sched_hw_submission;
extern uint amdgpu_pcie_gen_cap;
extern uint amdgpu_pcie_lane_cap;
-extern uint amdgpu_cg_mask;
+extern u64 amdgpu_cg_mask;
extern uint amdgpu_pg_mask;
extern uint amdgpu_sdma_phase_quantum;
extern char *amdgpu_disable_cu;
extern char *amdgpu_virtual_display;
extern uint amdgpu_pp_feature_mask;
extern uint amdgpu_force_long_training;
-extern int amdgpu_job_hang_limit;
extern int amdgpu_lbpw;
extern int amdgpu_compute_multipipe;
extern int amdgpu_gpu_recovery;
extern int amdgpu_emu_mode;
extern uint amdgpu_smu_memory_pool_size;
+extern int amdgpu_smu_pptable_id;
extern uint amdgpu_dc_feature_mask;
+extern uint amdgpu_freesync_vid_mode;
extern uint amdgpu_dc_debug_mask;
-extern uint amdgpu_dm_abm_level;
+extern uint amdgpu_dc_visual_confirm;
+extern int amdgpu_dm_abm_level;
+extern int amdgpu_backlight;
+extern int amdgpu_damage_clips;
extern struct amdgpu_mgpu_info mgpu_info;
extern int amdgpu_ras_enable;
extern uint amdgpu_ras_mask;
extern int amdgpu_bad_page_threshold;
+extern bool amdgpu_ignore_bad_page_threshold;
+extern struct amdgpu_watchdog_timer amdgpu_watchdog_timer;
extern int amdgpu_async_gfx_ring;
extern int amdgpu_mcbp;
extern int amdgpu_discovery;
extern int amdgpu_mes;
+extern int amdgpu_mes_log_enable;
+extern int amdgpu_mes_kiq;
+extern int amdgpu_uni_mes;
extern int amdgpu_noretry;
extern int amdgpu_force_asic_type;
+extern int amdgpu_smartshift_bias;
+extern int amdgpu_use_xgmi_p2p;
+extern int amdgpu_mtype_local;
+extern int amdgpu_enforce_isolation;
#ifdef CONFIG_HSA_AMD
extern int sched_policy;
extern bool debug_evictions;
extern bool no_system_mem_limit;
+extern int halt_if_hws_hang;
+extern uint amdgpu_svm_default_granularity;
#else
static const int __maybe_unused sched_policy = KFD_SCHED_POLICY_HWS;
static const bool __maybe_unused debug_evictions; /* = false */
static const bool __maybe_unused no_system_mem_limit;
+static const int __maybe_unused halt_if_hws_hang;
+#endif
+#ifdef CONFIG_HSA_AMD_P2P
+extern bool pcie_p2p;
#endif
extern int amdgpu_tmz;
@@ -211,9 +259,23 @@ extern int amdgpu_cik_support;
#endif
extern int amdgpu_num_kcq;
+#define AMDGPU_VCNFW_LOG_SIZE (32 * 1024)
+#define AMDGPU_UMSCHFW_LOG_SIZE (32 * 1024)
+extern int amdgpu_vcnfw_log;
+extern int amdgpu_sg_display;
+extern int amdgpu_umsch_mm;
+extern int amdgpu_seamless;
+extern int amdgpu_umsch_mm_fwlog;
+
+extern int amdgpu_user_partt_mode;
+extern int amdgpu_agp;
+extern int amdgpu_rebar;
+
+extern int amdgpu_wbrf;
+extern int amdgpu_user_queue;
+
#define AMDGPU_VM_MAX_NUM_CTX 4096
#define AMDGPU_SG_THRESHOLD (256*1024*1024)
-#define AMDGPU_DEFAULT_GTT_SIZE_MB 3072ULL /* 3GB by default */
#define AMDGPU_WAIT_IDLE_TIMEOUT_IN_MS 3000
#define AMDGPU_MAX_USEC_TIMEOUT 100000 /* 100 ms */
#define AMDGPU_FENCE_JIFFIES_TIMEOUT (HZ / 2)
@@ -243,20 +305,32 @@ extern int amdgpu_num_kcq;
#define AMDGPU_RESET_VCE (1 << 13)
#define AMDGPU_RESET_VCE1 (1 << 14)
+/* reset mask */
+#define AMDGPU_RESET_TYPE_FULL (1 << 0) /* full adapter reset, mode1/mode2/BACO/etc. */
+#define AMDGPU_RESET_TYPE_SOFT_RESET (1 << 1) /* IP level soft reset */
+#define AMDGPU_RESET_TYPE_PER_QUEUE (1 << 2) /* per queue */
+#define AMDGPU_RESET_TYPE_PER_PIPE (1 << 3) /* per pipe */
+
/* max cursor sizes (in pixels) */
#define CIK_CURSOR_WIDTH 128
#define CIK_CURSOR_HEIGHT 128
+/* smart shift bias level limits */
+#define AMDGPU_SMARTSHIFT_MAX_BIAS (100)
+#define AMDGPU_SMARTSHIFT_MIN_BIAS (-100)
+
+/* Extra time delay(in ms) to eliminate the influence of temperature momentary fluctuation */
+#define AMDGPU_SWCTF_EXTRA_DELAY 50
+
+struct amdgpu_xcp_mgr;
struct amdgpu_device;
-struct amdgpu_ib;
-struct amdgpu_cs_parser;
-struct amdgpu_job;
struct amdgpu_irq_src;
struct amdgpu_fpriv;
struct amdgpu_bo_va_mapping;
-struct amdgpu_atif;
struct kfd_vm_fault_info;
struct amdgpu_hive_info;
+struct amdgpu_reset_context;
+struct amdgpu_reset_control;
enum amdgpu_cp_irq {
AMDGPU_CP_IRQ_GFX_ME0_PIPE0_EOP = 0,
@@ -284,7 +358,6 @@ enum amdgpu_kiq_irq {
AMDGPU_CP_KIQ_IRQ_DRIVER0 = 0,
AMDGPU_CP_KIQ_IRQ_LAST
};
-
#define MAX_KIQ_REG_WAIT 5000 /* in usecs, 5ms */
#define MAX_KIQ_REG_BAILOUT_INTERVAL 5 /* in msecs, 5ms */
#define MAX_KIQ_REG_TRY 1000
@@ -296,13 +369,18 @@ int amdgpu_device_ip_set_powergating_state(void *dev,
enum amd_ip_block_type block_type,
enum amd_powergating_state state);
void amdgpu_device_ip_get_clockgating_state(struct amdgpu_device *adev,
- u32 *flags);
+ u64 *flags);
int amdgpu_device_ip_wait_for_idle(struct amdgpu_device *adev,
enum amd_ip_block_type block_type);
-bool amdgpu_device_ip_is_idle(struct amdgpu_device *adev,
+bool amdgpu_device_ip_is_hw(struct amdgpu_device *adev,
+ enum amd_ip_block_type block_type);
+bool amdgpu_device_ip_is_valid(struct amdgpu_device *adev,
enum amd_ip_block_type block_type);
+int amdgpu_ip_block_suspend(struct amdgpu_ip_block *ip_block);
-#define AMDGPU_MAX_IP_NUM 16
+int amdgpu_ip_block_resume(struct amdgpu_ip_block *ip_block);
+
+#define AMDGPU_MAX_IP_NUM AMD_IP_BLOCK_TYPE_NUM
struct amdgpu_ip_block_status {
bool valid;
@@ -320,12 +398,10 @@ struct amdgpu_ip_block_version {
const struct amd_ip_funcs *funcs;
};
-#define HW_REV(_Major, _Minor, _Rev) \
- ((((uint32_t) (_Major)) << 16) | ((uint32_t) (_Minor) << 8) | ((uint32_t) (_Rev)))
-
struct amdgpu_ip_block {
struct amdgpu_ip_block_status status;
const struct amdgpu_ip_block_version *version;
+ struct amdgpu_device *adev;
};
int amdgpu_device_ip_block_version_cmp(struct amdgpu_device *adev,
@@ -344,7 +420,9 @@ int amdgpu_device_ip_block_add(struct amdgpu_device *adev,
*/
bool amdgpu_get_bios(struct amdgpu_device *adev);
bool amdgpu_read_bios(struct amdgpu_device *adev);
-
+bool amdgpu_soc15_read_bios_from_rom(struct amdgpu_device *adev,
+ u8 *bios, u32 length_bytes);
+void amdgpu_bios_release(struct amdgpu_device *adev);
/*
* Clocks
*/
@@ -359,7 +437,6 @@ struct amdgpu_clock {
uint32_t default_mclk;
uint32_t default_sclk;
uint32_t default_dispclk;
- uint32_t current_dispclk;
uint32_t dp_extclk;
uint32_t max_pixel_clock;
};
@@ -388,34 +465,13 @@ struct amdgpu_clock {
* alignment).
*/
-#define AMDGPU_SA_NUM_FENCE_LISTS 32
-
struct amdgpu_sa_manager {
- wait_queue_head_t wq;
- struct amdgpu_bo *bo;
- struct list_head *hole;
- struct list_head flist[AMDGPU_SA_NUM_FENCE_LISTS];
- struct list_head olist;
- unsigned size;
- uint64_t gpu_addr;
- void *cpu_ptr;
- uint32_t domain;
- uint32_t align;
-};
-
-/* sub-allocation buffer */
-struct amdgpu_sa_bo {
- struct list_head olist;
- struct list_head flist;
- struct amdgpu_sa_manager *manager;
- unsigned soffset;
- unsigned eoffset;
- struct dma_fence *fence;
+ struct drm_suballoc_manager base;
+ struct amdgpu_bo *bo;
+ uint64_t gpu_addr;
+ void *cpu_ptr;
};
-int amdgpu_fence_slab_init(void);
-void amdgpu_fence_slab_fini(void);
-
/*
* IRQS.
*/
@@ -429,28 +485,12 @@ struct amdgpu_flip_work {
uint64_t base;
struct drm_pending_vblank_event *event;
struct amdgpu_bo *old_abo;
- struct dma_fence *excl;
unsigned shared_count;
struct dma_fence **shared;
struct dma_fence_cb cb;
bool async;
};
-
-/*
- * CP & rings.
- */
-
-struct amdgpu_ib {
- struct amdgpu_sa_bo *sa_bo;
- uint32_t length_dw;
- uint64_t gpu_addr;
- uint32_t *ptr;
- uint32_t flags;
-};
-
-extern const struct drm_sched_backend_ops amdgpu_sched_ops;
-
/*
* file private structure
*/
@@ -459,97 +499,83 @@ struct amdgpu_fpriv {
struct amdgpu_vm vm;
struct amdgpu_bo_va *prt_va;
struct amdgpu_bo_va *csa_va;
+ struct amdgpu_bo_va *seq64_va;
struct mutex bo_list_lock;
struct idr bo_list_handles;
struct amdgpu_ctx_mgr ctx_mgr;
-};
-
-int amdgpu_file_to_fpriv(struct file *filp, struct amdgpu_fpriv **fpriv);
-
-int amdgpu_ib_get(struct amdgpu_device *adev, struct amdgpu_vm *vm,
- unsigned size,
- enum amdgpu_ib_pool_type pool,
- struct amdgpu_ib *ib);
-void amdgpu_ib_free(struct amdgpu_device *adev, struct amdgpu_ib *ib,
- struct dma_fence *f);
-int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs,
- struct amdgpu_ib *ibs, struct amdgpu_job *job,
- struct dma_fence **f);
-int amdgpu_ib_pool_init(struct amdgpu_device *adev);
-void amdgpu_ib_pool_fini(struct amdgpu_device *adev);
-int amdgpu_ib_ring_tests(struct amdgpu_device *adev);
-
-/*
- * CS.
- */
-struct amdgpu_cs_chunk {
- uint32_t chunk_id;
- uint32_t length_dw;
- void *kdata;
-};
+ struct amdgpu_userq_mgr userq_mgr;
-struct amdgpu_cs_post_dep {
- struct drm_syncobj *syncobj;
- struct dma_fence_chain *chain;
- u64 point;
-};
+ /* Eviction fence infra */
+ struct amdgpu_eviction_fence_mgr evf_mgr;
-struct amdgpu_cs_parser {
- struct amdgpu_device *adev;
- struct drm_file *filp;
- struct amdgpu_ctx *ctx;
-
- /* chunks */
- unsigned nchunks;
- struct amdgpu_cs_chunk *chunks;
-
- /* scheduler job object */
- struct amdgpu_job *job;
- struct drm_sched_entity *entity;
-
- /* buffer objects */
- struct ww_acquire_ctx ticket;
- struct amdgpu_bo_list *bo_list;
- struct amdgpu_mn *mn;
- struct amdgpu_bo_list_entry vm_pd;
- struct list_head validated;
- struct dma_fence *fence;
- uint64_t bytes_moved_threshold;
- uint64_t bytes_moved_vis_threshold;
- uint64_t bytes_moved;
- uint64_t bytes_moved_vis;
-
- /* user fence */
- struct amdgpu_bo_list_entry uf_entry;
-
- unsigned num_post_deps;
- struct amdgpu_cs_post_dep *post_deps;
+ /** GPU partition selection */
+ uint32_t xcp_id;
};
-static inline u32 amdgpu_get_ib_value(struct amdgpu_cs_parser *p,
- uint32_t ib_idx, int idx)
-{
- return p->job->ibs[ib_idx].ptr[idx];
-}
-
-static inline void amdgpu_set_ib_value(struct amdgpu_cs_parser *p,
- uint32_t ib_idx, int idx,
- uint32_t value)
-{
- p->job->ibs[ib_idx].ptr[idx] = value;
-}
+int amdgpu_file_to_fpriv(struct file *filp, struct amdgpu_fpriv **fpriv);
/*
* Writeback
*/
-#define AMDGPU_MAX_WB 256 /* Reserve at most 256 WB slots for amdgpu-owned rings. */
+#define AMDGPU_MAX_WB 1024 /* Reserve at most 1024 WB slots for amdgpu-owned rings. */
+/**
+ * amdgpu_wb - This struct is used for small GPU memory allocation.
+ *
+ * This struct is used to allocate a small amount of GPU memory that can be
+ * used to shadow certain states into the memory. This is especially useful for
+ * providing easy CPU access to some states without requiring register access
+ * (e.g., if some block is power gated, reading register may be problematic).
+ *
+ * Note: the term writeback was initially used because many of the amdgpu
+ * components had some level of writeback memory, and this struct initially
+ * described those components.
+ */
struct amdgpu_wb {
+
+ /**
+ * @wb_obj:
+ *
+ * Buffer Object used for the writeback memory.
+ */
struct amdgpu_bo *wb_obj;
- volatile uint32_t *wb;
+
+ /**
+ * @wb:
+ *
+ * Pointer to the first writeback slot. In terms of CPU address
+ * this value can be accessed directly by using the offset as an index.
+ * For the GPU address, it is necessary to use gpu_addr and the offset.
+ */
+ uint32_t *wb;
+
+ /**
+ * @gpu_addr:
+ *
+ * Writeback base address in the GPU.
+ */
uint64_t gpu_addr;
- u32 num_wb; /* Number of wb slots actually reserved for amdgpu. */
+
+ /**
+ * @num_wb:
+ *
+ * Number of writeback slots reserved for amdgpu.
+ */
+ u32 num_wb;
+
+ /**
+ * @used:
+ *
+ * Track the writeback slot already used.
+ */
unsigned long used[DIV_ROUND_UP(AMDGPU_MAX_WB, BITS_PER_LONG)];
+
+ /**
+ * @lock:
+ *
+ * Protects read and write of the used field array.
+ */
+ spinlock_t lock;
};
int amdgpu_device_wb_get(struct amdgpu_device *adev, u32 *wb);
@@ -558,13 +584,7 @@ void amdgpu_device_wb_free(struct amdgpu_device *adev, u32 wb);
/*
* Benchmarking
*/
-void amdgpu_benchmark(struct amdgpu_device *adev, int test_number);
-
-
-/*
- * Testing
- */
-void amdgpu_test_moves(struct amdgpu_device *adev);
+int amdgpu_benchmark(struct amdgpu_device *adev, int test_number);
/*
* ASIC specific register table accessible by UMD
@@ -574,13 +594,62 @@ struct amdgpu_allowed_register_entry {
bool grbm_indexed;
};
+/**
+ * enum amd_reset_method - Methods for resetting AMD GPU devices
+ *
+ * @AMD_RESET_METHOD_NONE: The device will not be reset.
+ * @AMD_RESET_LEGACY: Method reserved for SI, CIK and VI ASICs.
+ * @AMD_RESET_MODE0: Reset the entire ASIC. Not currently available for the
+ * any device.
+ * @AMD_RESET_MODE1: Resets all IP blocks on the ASIC (SDMA, GFX, VCN, etc.)
+ * individually. Suitable only for some discrete GPU, not
+ * available for all ASICs.
+ * @AMD_RESET_MODE2: Resets a lesser level of IPs compared to MODE1. Which IPs
+ * are reset depends on the ASIC. Notably doesn't reset IPs
+ * shared with the CPU on APUs or the memory controllers (so
+ * VRAM is not lost). Not available on all ASICs.
+ * @AMD_RESET_LINK: Triggers SW-UP link reset on other GPUs
+ * @AMD_RESET_BACO: BACO (Bus Alive, Chip Off) method powers off and on the card
+ * but without powering off the PCI bus. Suitable only for
+ * discrete GPUs.
+ * @AMD_RESET_PCI: Does a full bus reset using core Linux subsystem PCI reset
+ * and does a secondary bus reset or FLR, depending on what the
+ * underlying hardware supports.
+ *
+ * Methods available for AMD GPU driver for resetting the device. Not all
+ * methods are suitable for every device. User can override the method using
+ * module parameter `reset_method`.
+ */
enum amd_reset_method {
+ AMD_RESET_METHOD_NONE = -1,
AMD_RESET_METHOD_LEGACY = 0,
AMD_RESET_METHOD_MODE0,
AMD_RESET_METHOD_MODE1,
AMD_RESET_METHOD_MODE2,
+ AMD_RESET_METHOD_LINK,
AMD_RESET_METHOD_BACO,
AMD_RESET_METHOD_PCI,
+ AMD_RESET_METHOD_ON_INIT,
+};
+
+struct amdgpu_video_codec_info {
+ u32 codec_type;
+ u32 max_width;
+ u32 max_height;
+ u32 max_pixels_per_frame;
+ u32 max_level;
+};
+
+#define codec_info_build(type, width, height, level) \
+ .codec_type = type,\
+ .max_width = width,\
+ .max_height = height,\
+ .max_pixels_per_frame = height * width,\
+ .max_level = level,
+
+struct amdgpu_video_codecs {
+ const u32 codec_count;
+ const struct amdgpu_video_codec_info *codec_array;
};
/*
@@ -622,11 +691,20 @@ struct amdgpu_asic_funcs {
/* PCIe replay counter */
uint64_t (*get_pcie_replay_count)(struct amdgpu_device *adev);
/* device supports BACO */
- bool (*supports_baco)(struct amdgpu_device *adev);
+ int (*supports_baco)(struct amdgpu_device *adev);
/* pre asic_init quirks */
void (*pre_asic_init)(struct amdgpu_device *adev);
/* enter/exit umd stable pstate */
int (*update_umd_stable_pstate)(struct amdgpu_device *adev, bool enter);
+ /* query video codecs */
+ int (*query_video_codecs)(struct amdgpu_device *adev, bool encode,
+ const struct amdgpu_video_codecs **codecs);
+ /* encode "> 32bits" smn addressing */
+ u64 (*encode_ext_smn_addressing)(int ext_id);
+
+ ssize_t (*get_reg_state)(struct amdgpu_device *adev,
+ enum amdgpu_reg_state reg_state, void *buf,
+ size_t max_size);
};
/*
@@ -643,27 +721,13 @@ int amdgpu_cs_wait_fences_ioctl(struct drm_device *dev, void *data,
struct drm_file *filp);
/* VRAM scratch page for HDP bug, default vram page */
-struct amdgpu_vram_scratch {
+struct amdgpu_mem_scratch {
struct amdgpu_bo *robj;
- volatile uint32_t *ptr;
+ uint32_t *ptr;
u64 gpu_addr;
};
/*
- * ACPI
- */
-struct amdgpu_atcs_functions {
- bool get_ext_state;
- bool pcie_perf_req;
- bool pcie_dev_rdy;
- bool pcie_bus_width;
-};
-
-struct amdgpu_atcs {
- struct amdgpu_atcs_functions functions;
-};
-
-/*
* CGS
*/
struct cgs_device *amdgpu_cgs_create_device(struct amdgpu_device *adev);
@@ -675,15 +739,22 @@ void amdgpu_cgs_destroy_device(struct cgs_device *cgs_device);
typedef uint32_t (*amdgpu_rreg_t)(struct amdgpu_device*, uint32_t);
typedef void (*amdgpu_wreg_t)(struct amdgpu_device*, uint32_t, uint32_t);
+typedef uint32_t (*amdgpu_rreg_ext_t)(struct amdgpu_device*, uint64_t);
+typedef void (*amdgpu_wreg_ext_t)(struct amdgpu_device*, uint64_t, uint32_t);
+
typedef uint64_t (*amdgpu_rreg64_t)(struct amdgpu_device*, uint32_t);
typedef void (*amdgpu_wreg64_t)(struct amdgpu_device*, uint32_t, uint64_t);
+typedef uint64_t (*amdgpu_rreg64_ext_t)(struct amdgpu_device*, uint64_t);
+typedef void (*amdgpu_wreg64_ext_t)(struct amdgpu_device*, uint64_t, uint64_t);
+
typedef uint32_t (*amdgpu_block_rreg_t)(struct amdgpu_device*, uint32_t, uint32_t);
typedef void (*amdgpu_block_wreg_t)(struct amdgpu_device*, uint32_t, uint32_t, uint32_t);
struct amdgpu_mmio_remap {
u32 reg_offset;
resource_size_t bus_addr;
+ struct amdgpu_bo *bo;
};
/* Define the HW IP blocks will be used in driver , add more if necessary */
@@ -698,6 +769,7 @@ enum amd_hw_ip_block_type {
SDMA5_HWIP,
SDMA6_HWIP,
SDMA7_HWIP,
+ LSDMA_HWIP,
MMHUB_HWIP,
ATHUB_HWIP,
NBIO_HWIP,
@@ -706,7 +778,9 @@ enum amd_hw_ip_block_type {
UVD_HWIP,
VCN_HWIP = UVD_HWIP,
JPEG_HWIP = VCN_HWIP,
+ VCN1_HWIP,
VCE_HWIP,
+ VPE_HWIP,
DF_HWIP,
DCE_HWIP,
OSSSYS_HWIP,
@@ -717,10 +791,50 @@ enum amd_hw_ip_block_type {
CLK_HWIP,
UMC_HWIP,
RSMU_HWIP,
+ XGMI_HWIP,
+ DCI_HWIP,
+ PCIE_HWIP,
+ ISP_HWIP,
MAX_HWIP
};
-#define HWIP_MAX_INSTANCE 8
+#define HWIP_MAX_INSTANCE 44
+
+#define HW_ID_MAX 300
+#define IP_VERSION_FULL(mj, mn, rv, var, srev) \
+ (((mj) << 24) | ((mn) << 16) | ((rv) << 8) | ((var) << 4) | (srev))
+#define IP_VERSION(mj, mn, rv) IP_VERSION_FULL(mj, mn, rv, 0, 0)
+#define IP_VERSION_MAJ(ver) ((ver) >> 24)
+#define IP_VERSION_MIN(ver) (((ver) >> 16) & 0xFF)
+#define IP_VERSION_REV(ver) (((ver) >> 8) & 0xFF)
+#define IP_VERSION_VARIANT(ver) (((ver) >> 4) & 0xF)
+#define IP_VERSION_SUBREV(ver) ((ver) & 0xF)
+#define IP_VERSION_MAJ_MIN_REV(ver) ((ver) >> 8)
+
+struct amdgpu_ip_map_info {
+ /* Map of logical to actual dev instances/mask */
+ uint32_t dev_inst[MAX_HWIP][HWIP_MAX_INSTANCE];
+ int8_t (*logical_to_dev_inst)(struct amdgpu_device *adev,
+ enum amd_hw_ip_block_type block,
+ int8_t inst);
+ uint32_t (*logical_to_dev_mask)(struct amdgpu_device *adev,
+ enum amd_hw_ip_block_type block,
+ uint32_t mask);
+};
+
+enum amdgpu_uid_type {
+ AMDGPU_UID_TYPE_XCD,
+ AMDGPU_UID_TYPE_AID,
+ AMDGPU_UID_TYPE_SOC,
+ AMDGPU_UID_TYPE_MAX
+};
+
+#define AMDGPU_UID_INST_MAX 8 /* max number of instances for each UID type */
+
+struct amdgpu_uid {
+ uint64_t uid[AMDGPU_UID_TYPE_MAX][AMDGPU_UID_INST_MAX];
+ struct amdgpu_device *adev;
+};
struct amd_powerplay {
void *pp_handle;
@@ -766,8 +880,73 @@ struct amd_powerplay {
(rid == 0x01) || \
(rid == 0x10))))
+struct amdgpu_mqd_prop {
+ uint64_t mqd_gpu_addr;
+ uint64_t hqd_base_gpu_addr;
+ uint64_t rptr_gpu_addr;
+ uint64_t wptr_gpu_addr;
+ uint32_t queue_size;
+ bool use_doorbell;
+ uint32_t doorbell_index;
+ uint64_t eop_gpu_addr;
+ uint32_t hqd_pipe_priority;
+ uint32_t hqd_queue_priority;
+ bool allow_tunneling;
+ bool hqd_active;
+ uint64_t shadow_addr;
+ uint64_t gds_bkup_addr;
+ uint64_t csa_addr;
+ uint64_t fence_address;
+ bool tmz_queue;
+ bool kernel_queue;
+};
+
+struct amdgpu_mqd {
+ unsigned mqd_size;
+ int (*init_mqd)(struct amdgpu_device *adev, void *mqd,
+ struct amdgpu_mqd_prop *p);
+};
+
+struct amdgpu_pcie_reset_ctx {
+ bool in_link_reset;
+ bool occurs_dpc;
+ bool audio_suspended;
+ struct pci_dev *swus;
+ struct pci_saved_state *swus_pcistate;
+ struct pci_saved_state *swds_pcistate;
+};
+
+/*
+ * Custom Init levels could be defined for different situations where a full
+ * initialization of all hardware blocks are not expected. Sample cases are
+ * custom init sequences after resume after S0i3/S3, reset on initialization,
+ * partial reset of blocks etc. Presently, this defines only two levels. Levels
+ * are described in corresponding struct definitions - amdgpu_init_default,
+ * amdgpu_init_minimal_xgmi.
+ */
+enum amdgpu_init_lvl_id {
+ AMDGPU_INIT_LEVEL_DEFAULT,
+ AMDGPU_INIT_LEVEL_MINIMAL_XGMI,
+ AMDGPU_INIT_LEVEL_RESET_RECOVERY,
+};
+
+struct amdgpu_init_level {
+ enum amdgpu_init_lvl_id level;
+ uint32_t hwini_ip_block_mask;
+};
+
#define AMDGPU_RESET_MAGIC_NUM 64
#define AMDGPU_MAX_DF_PERFMONS 4
+struct amdgpu_reset_domain;
+struct amdgpu_fru_info;
+
+enum amdgpu_enforce_isolation_mode {
+ AMDGPU_ENFORCE_ISOLATION_DISABLE = 0,
+ AMDGPU_ENFORCE_ISOLATION_ENABLE = 1,
+ AMDGPU_ENFORCE_ISOLATION_ENABLE_LEGACY = 2,
+ AMDGPU_ENFORCE_ISOLATION_NO_CLEANER_SHADER = 3,
+};
+
struct amdgpu_device {
struct device *dev;
struct pci_dev *pdev;
@@ -777,6 +956,7 @@ struct amdgpu_device {
struct amdgpu_acp acp;
#endif
struct amdgpu_hive_info *hive;
+ struct amdgpu_xcp_mgr *xcp_mgr;
/* ASIC */
enum amd_asic_type asic_type;
uint32_t family;
@@ -790,15 +970,9 @@ struct amdgpu_device {
bool need_swiotlb;
bool accel_working;
struct notifier_block acpi_nb;
+ struct notifier_block pm_nb;
struct amdgpu_i2c_chan *i2c_bus[AMDGPU_MAX_I2C_BUS];
- struct amdgpu_debugfs debugfs[AMDGPU_DEBUGFS_MAX_COMPONENTS];
- unsigned debugfs_count;
-#if defined(CONFIG_DEBUG_FS)
- struct dentry *debugfs_preempt;
- struct dentry *debugfs_regs[AMDGPU_DEBUGFS_MAX_COMPONENTS];
-#endif
- struct amdgpu_atif *atif;
- struct amdgpu_atcs atcs;
+ struct debugfs_blob_wrapper debugfs_vbios_blob;
struct mutex srbm_mutex;
/* GRBM index mutex. Protects concurrent access to GRBM index */
struct mutex grbm_idx_mutex;
@@ -830,8 +1004,12 @@ struct amdgpu_device {
amdgpu_wreg_t pcie_wreg;
amdgpu_rreg_t pciep_rreg;
amdgpu_wreg_t pciep_wreg;
+ amdgpu_rreg_ext_t pcie_rreg_ext;
+ amdgpu_wreg_ext_t pcie_wreg_ext;
amdgpu_rreg64_t pcie_rreg64;
amdgpu_wreg64_t pcie_wreg64;
+ amdgpu_rreg64_ext_t pcie_rreg64_ext;
+ amdgpu_wreg64_ext_t pcie_wreg64_ext;
/* protects concurrent UVD register access */
spinlock_t uvd_ctx_idx_lock;
amdgpu_rreg_t uvd_ctx_rreg;
@@ -852,8 +1030,6 @@ struct amdgpu_device {
spinlock_t audio_endpt_idx_lock;
amdgpu_block_rreg_t audio_endpt_rreg;
amdgpu_block_wreg_t audio_endpt_wreg;
- void __iomem *rio_mem;
- resource_size_t rio_mem_size;
struct amdgpu_doorbell doorbell;
/* clock/pll info */
@@ -865,11 +1041,11 @@ struct amdgpu_device {
dma_addr_t dummy_page_addr;
struct amdgpu_vm_manager vm_manager;
struct amdgpu_vmhub vmhub[AMDGPU_MAX_VMHUBS];
- unsigned num_vmhubs;
+ DECLARE_BITMAP(vmhubs_mask, AMDGPU_MAX_VMHUBS);
/* memory management */
struct amdgpu_mman mman;
- struct amdgpu_vram_scratch vram_scratch;
+ struct amdgpu_mem_scratch mem_scratch;
struct amdgpu_wb wb;
atomic64_t num_bytes_moved;
atomic64_t num_evictions;
@@ -886,21 +1062,28 @@ struct amdgpu_device {
u32 log2_max_MBps;
} mm_stats;
+ /* discovery*/
+ struct amdgpu_discovery_info discovery;
+
/* display */
bool enable_virtual_display;
+ struct amdgpu_vkms_output *amdgpu_vkms_output;
struct amdgpu_mode_info mode_info;
/* For pre-DCE11. DCE11 and later are in "struct amdgpu_device->dm" */
- struct work_struct hotplug_work;
+ struct delayed_work hotplug_work;
struct amdgpu_irq_src crtc_irq;
struct amdgpu_irq_src vline0_irq;
struct amdgpu_irq_src vupdate_irq;
struct amdgpu_irq_src pageflip_irq;
struct amdgpu_irq_src hpd_irq;
+ struct amdgpu_irq_src dmub_trace_irq;
+ struct amdgpu_irq_src dmub_outbox_irq;
/* rings */
u64 fence_context;
unsigned num_rings;
struct amdgpu_ring *rings[AMDGPU_MAX_RINGS];
+ struct dma_fence __rcu *gang_submit;
bool ib_pool_ready;
struct amdgpu_sa_manager ib_pools[AMDGPU_IB_POOL_MAX];
struct amdgpu_sched gpu_sched[AMDGPU_HW_IP_NUM][AMDGPU_RING_PRIO_MAX];
@@ -910,14 +1093,8 @@ struct amdgpu_device {
/* powerplay */
struct amd_powerplay powerplay;
- bool pp_force_state_enabled;
-
- /* smu */
- struct smu_context smu;
-
- /* dpm */
struct amdgpu_pm pm;
- u32 cg_flags;
+ u64 cg_flags;
u32 pg_flags;
/* nbio */
@@ -941,6 +1118,9 @@ struct amdgpu_device {
/* sdma */
struct amdgpu_sdma sdma;
+ /* lsdma */
+ struct amdgpu_lsdma lsdma;
+
/* uvd */
struct amdgpu_uvd uvd;
@@ -953,6 +1133,13 @@ struct amdgpu_device {
/* jpeg */
struct amdgpu_jpeg jpeg;
+ /* vpe */
+ struct amdgpu_vpe vpe;
+
+ /* umsch */
+ struct amdgpu_umsch_mm umsch_mm;
+ bool enable_umsch_mm;
+
/* firmwares */
struct amdgpu_firmware firmware;
@@ -962,8 +1149,8 @@ struct amdgpu_device {
/* GDS */
struct amdgpu_gds gds;
- /* KFD */
- struct amdgpu_kfd_dev kfd;
+ /* for userq and VM fences */
+ struct amdgpu_seq64 seq64;
/* UMC */
struct amdgpu_umc umc;
@@ -971,14 +1158,45 @@ struct amdgpu_device {
/* display related functionality */
struct amdgpu_display_manager dm;
+#if defined(CONFIG_DRM_AMD_ISP)
+ /* isp */
+ struct amdgpu_isp isp;
+#endif
+
/* mes */
bool enable_mes;
+ bool enable_mes_kiq;
+ bool enable_uni_mes;
struct amdgpu_mes mes;
+ struct amdgpu_mqd mqds[AMDGPU_HW_IP_NUM];
+ const struct amdgpu_userq_funcs *userq_funcs[AMDGPU_HW_IP_NUM];
+
+ /* xarray used to retrieve the user queue fence driver reference
+ * in the EOP interrupt handler to signal the particular user
+ * queue fence.
+ */
+ struct xarray userq_xa;
+ /**
+ * @userq_doorbell_xa: Global user queue map (doorbell index → queue)
+ * Key: doorbell_index (unique global identifier for the queue)
+ * Value: struct amdgpu_usermode_queue
+ */
+ struct xarray userq_doorbell_xa;
/* df */
struct amdgpu_df df;
+ /* MCA */
+ struct amdgpu_mca mca;
+
+ /* ACA */
+ struct amdgpu_aca aca;
+
+ /* CPER */
+ struct amdgpu_cper cper;
+
struct amdgpu_ip_block ip_blocks[AMDGPU_MAX_IP_NUM];
+ uint32_t harvest_ip_mask;
int num_ip_blocks;
struct mutex mn_lock;
DECLARE_HASHTABLE(mn_hash, 7);
@@ -990,71 +1208,139 @@ struct amdgpu_device {
/* soc15 register offset based on ip, instance and segment */
uint32_t *reg_offset[MAX_HWIP][HWIP_MAX_INSTANCE];
+ struct amdgpu_ip_map_info ip_map;
/* delayed work_func for deferring clockgating during resume */
struct delayed_work delayed_init_work;
struct amdgpu_virt virt;
- /* link all shadow bo */
- struct list_head shadow_list;
- struct mutex shadow_list_lock;
-
/* record hw reset is performed */
bool has_hw_reset;
u8 reset_magic[AMDGPU_RESET_MAGIC_NUM];
/* s3/s4 mask */
bool in_suspend;
- bool in_hibernate;
-
- /*
- * The combination flag in_poweroff_reboot_com used to identify the poweroff
- * and reboot opt in the s0i3 system-wide suspend.
- */
- bool in_poweroff_reboot_com;
+ bool in_s3;
+ bool in_s4;
+ bool in_s0ix;
+ suspend_state_t last_suspend_state;
- atomic_t in_gpu_reset;
enum pp_mp1_state mp1_state;
- struct rw_semaphore reset_sem;
struct amdgpu_doorbell_index doorbell_index;
struct mutex notifier_lock;
int asic_reset_res;
struct work_struct xgmi_reset_work;
+ struct list_head reset_list;
long gfx_timeout;
long sdma_timeout;
long video_timeout;
long compute_timeout;
+ long psp_timeout;
uint64_t unique_id;
uint64_t df_perfmon_config_assign_mask[AMDGPU_MAX_DF_PERFMONS];
/* enable runtime pm on the device */
- bool runpm;
bool in_runpm;
bool has_pr3;
- bool pm_sysfs_en;
bool ucode_sysfs_en;
- /* Chip product information */
- char product_number[16];
- char product_name[32];
- char serial[20];
-
- struct amdgpu_autodump autodump;
-
+ struct amdgpu_fru_info *fru_info;
atomic_t throttling_logging_enabled;
struct ratelimit_state throttling_logging_rs;
- uint32_t ras_features;
+ uint32_t ras_hw_enabled;
+ uint32_t ras_enabled;
+ bool ras_default_ecc_enabled;
- bool in_pci_err_recovery;
+ bool no_hw_access;
struct pci_saved_state *pci_state;
+ pci_channel_state_t pci_channel_state;
+
+ struct amdgpu_pcie_reset_ctx pcie_reset_ctx;
+
+ /* Track auto wait count on s_barrier settings */
+ bool barrier_has_auto_waitcnt;
+
+ struct amdgpu_reset_control *reset_cntl;
+ uint32_t ip_versions[MAX_HWIP][HWIP_MAX_INSTANCE];
+
+ bool ram_is_direct_mapped;
+
+ struct list_head ras_list;
+
+ struct amdgpu_reset_domain *reset_domain;
+
+ struct mutex benchmark_mutex;
+
+ bool scpm_enabled;
+ uint32_t scpm_status;
+
+ struct work_struct reset_work;
+
+ bool dc_enabled;
+ /* Mask of active clusters */
+ uint32_t aid_mask;
+
+ /* Debug */
+ bool debug_vm;
+ bool debug_largebar;
+ bool debug_disable_soft_recovery;
+ bool debug_use_vram_fw_buf;
+ bool debug_enable_ras_aca;
+ bool debug_exp_resets;
+ bool debug_disable_gpu_ring_reset;
+ bool debug_vm_userptr;
+ bool debug_disable_ce_logs;
+ bool debug_enable_ce_cs;
+
+ /* Protection for the following isolation structure */
+ struct mutex enforce_isolation_mutex;
+ enum amdgpu_enforce_isolation_mode enforce_isolation[MAX_XCP];
+ struct amdgpu_isolation {
+ void *owner;
+ struct dma_fence *spearhead;
+ struct amdgpu_sync active;
+ struct amdgpu_sync prev;
+ } isolation[MAX_XCP];
+
+ struct amdgpu_init_level *init_lvl;
+
+ /* This flag is used to determine how VRAM allocations are handled for APUs
+ * in KFD: VRAM or GTT.
+ */
+ bool apu_prefer_gtt;
+
+ bool userq_halt_for_enforce_isolation;
+ struct work_struct userq_reset_work;
+ struct amdgpu_uid *uid_info;
+
+ /* KFD
+ * Must be last --ends in a flexible-array member.
+ */
+ struct amdgpu_kfd_dev kfd;
};
+static inline uint32_t amdgpu_ip_version(const struct amdgpu_device *adev,
+ uint8_t ip, uint8_t inst)
+{
+ /* This considers only major/minor/rev and ignores
+ * subrevision/variant fields.
+ */
+ return adev->ip_versions[ip][inst] & ~0xFFU;
+}
+
+static inline uint32_t amdgpu_ip_version_full(const struct amdgpu_device *adev,
+ uint8_t ip, uint8_t inst)
+{
+ /* This returns full version - major/minor/rev/variant/subrevision */
+ return adev->ip_versions[ip][inst];
+}
+
static inline struct amdgpu_device *drm_to_adev(struct drm_device *ddev)
{
return container_of(ddev, struct amdgpu_device, ddev);
@@ -1065,59 +1351,94 @@ static inline struct drm_device *adev_to_drm(struct amdgpu_device *adev)
return &adev->ddev;
}
-static inline struct amdgpu_device *amdgpu_ttm_adev(struct ttm_bo_device *bdev)
+static inline struct amdgpu_device *amdgpu_ttm_adev(struct ttm_device *bdev)
{
return container_of(bdev, struct amdgpu_device, mman.bdev);
}
+static inline bool amdgpu_is_multi_aid(struct amdgpu_device *adev)
+{
+ return !!adev->aid_mask;
+}
+
int amdgpu_device_init(struct amdgpu_device *adev,
uint32_t flags);
-void amdgpu_device_fini(struct amdgpu_device *adev);
+void amdgpu_device_fini_hw(struct amdgpu_device *adev);
+void amdgpu_device_fini_sw(struct amdgpu_device *adev);
+
int amdgpu_gpu_wait_for_idle(struct amdgpu_device *adev);
+void amdgpu_device_mm_access(struct amdgpu_device *adev, loff_t pos,
+ void *buf, size_t size, bool write);
+size_t amdgpu_device_aper_access(struct amdgpu_device *adev, loff_t pos,
+ void *buf, size_t size, bool write);
+
void amdgpu_device_vram_access(struct amdgpu_device *adev, loff_t pos,
- uint32_t *buf, size_t size, bool write);
+ void *buf, size_t size, bool write);
+uint32_t amdgpu_device_wait_on_rreg(struct amdgpu_device *adev,
+ uint32_t inst, uint32_t reg_addr, char reg_name[],
+ uint32_t expected_value, uint32_t mask);
uint32_t amdgpu_device_rreg(struct amdgpu_device *adev,
uint32_t reg, uint32_t acc_flags);
+u32 amdgpu_device_indirect_rreg_ext(struct amdgpu_device *adev,
+ u64 reg_addr);
+uint32_t amdgpu_device_xcc_rreg(struct amdgpu_device *adev,
+ uint32_t reg, uint32_t acc_flags,
+ uint32_t xcc_id);
void amdgpu_device_wreg(struct amdgpu_device *adev,
uint32_t reg, uint32_t v,
uint32_t acc_flags);
+void amdgpu_device_indirect_wreg_ext(struct amdgpu_device *adev,
+ u64 reg_addr, u32 reg_data);
+void amdgpu_device_xcc_wreg(struct amdgpu_device *adev,
+ uint32_t reg, uint32_t v,
+ uint32_t acc_flags,
+ uint32_t xcc_id);
void amdgpu_mm_wreg_mmio_rlc(struct amdgpu_device *adev,
- uint32_t reg, uint32_t v);
+ uint32_t reg, uint32_t v, uint32_t xcc_id);
void amdgpu_mm_wreg8(struct amdgpu_device *adev, uint32_t offset, uint8_t value);
uint8_t amdgpu_mm_rreg8(struct amdgpu_device *adev, uint32_t offset);
-u32 amdgpu_io_rreg(struct amdgpu_device *adev, u32 reg);
-void amdgpu_io_wreg(struct amdgpu_device *adev, u32 reg, u32 v);
-
u32 amdgpu_device_indirect_rreg(struct amdgpu_device *adev,
- u32 pcie_index, u32 pcie_data,
u32 reg_addr);
u64 amdgpu_device_indirect_rreg64(struct amdgpu_device *adev,
- u32 pcie_index, u32 pcie_data,
u32 reg_addr);
+u64 amdgpu_device_indirect_rreg64_ext(struct amdgpu_device *adev,
+ u64 reg_addr);
void amdgpu_device_indirect_wreg(struct amdgpu_device *adev,
- u32 pcie_index, u32 pcie_data,
u32 reg_addr, u32 reg_data);
void amdgpu_device_indirect_wreg64(struct amdgpu_device *adev,
- u32 pcie_index, u32 pcie_data,
u32 reg_addr, u64 reg_data);
-
-bool amdgpu_device_asic_has_dc_support(enum amd_asic_type asic_type);
+void amdgpu_device_indirect_wreg64_ext(struct amdgpu_device *adev,
+ u64 reg_addr, u64 reg_data);
+u32 amdgpu_device_get_rev_id(struct amdgpu_device *adev);
+bool amdgpu_device_asic_has_dc_support(struct pci_dev *pdev,
+ enum amd_asic_type asic_type);
bool amdgpu_device_has_dc_support(struct amdgpu_device *adev);
+void amdgpu_device_set_sriov_virtual_display(struct amdgpu_device *adev);
+
+int amdgpu_device_pre_asic_reset(struct amdgpu_device *adev,
+ struct amdgpu_reset_context *reset_context);
+
+int amdgpu_do_asic_reset(struct list_head *device_list_handle,
+ struct amdgpu_reset_context *reset_context);
+
+int amdgpu_device_reinit_after_reset(struct amdgpu_reset_context *reset_context);
+
int emu_soc_asic_init(struct amdgpu_device *adev);
/*
* Registers read & write functions.
*/
#define AMDGPU_REGS_NO_KIQ (1<<1)
+#define AMDGPU_REGS_RLC (1<<2)
#define RREG32_NO_KIQ(reg) amdgpu_device_rreg(adev, (reg), AMDGPU_REGS_NO_KIQ)
#define WREG32_NO_KIQ(reg, v) amdgpu_device_wreg(adev, (reg), (v), AMDGPU_REGS_NO_KIQ)
-#define RREG32_KIQ(reg) amdgpu_kiq_rreg(adev, (reg))
-#define WREG32_KIQ(reg, v) amdgpu_kiq_wreg(adev, (reg), (v))
+#define RREG32_KIQ(reg) amdgpu_kiq_rreg(adev, (reg), 0)
+#define WREG32_KIQ(reg, v) amdgpu_kiq_wreg(adev, (reg), (v), 0)
#define RREG8(reg) amdgpu_mm_rreg8(adev, (reg))
#define WREG8(reg, v) amdgpu_mm_wreg8(adev, (reg), (v))
@@ -1127,12 +1448,18 @@ int emu_soc_asic_init(struct amdgpu_device *adev);
#define WREG32(reg, v) amdgpu_device_wreg(adev, (reg), (v), 0)
#define REG_SET(FIELD, v) (((v) << FIELD##_SHIFT) & FIELD##_MASK)
#define REG_GET(FIELD, v) (((v) << FIELD##_SHIFT) & FIELD##_MASK)
+#define RREG32_XCC(reg, inst) amdgpu_device_xcc_rreg(adev, (reg), 0, inst)
+#define WREG32_XCC(reg, v, inst) amdgpu_device_xcc_wreg(adev, (reg), (v), 0, inst)
#define RREG32_PCIE(reg) adev->pcie_rreg(adev, (reg))
#define WREG32_PCIE(reg, v) adev->pcie_wreg(adev, (reg), (v))
#define RREG32_PCIE_PORT(reg) adev->pciep_rreg(adev, (reg))
#define WREG32_PCIE_PORT(reg, v) adev->pciep_wreg(adev, (reg), (v))
+#define RREG32_PCIE_EXT(reg) adev->pcie_rreg_ext(adev, (reg))
+#define WREG32_PCIE_EXT(reg, v) adev->pcie_wreg_ext(adev, (reg), (v))
#define RREG64_PCIE(reg) adev->pcie_rreg64(adev, (reg))
#define WREG64_PCIE(reg, v) adev->pcie_wreg64(adev, (reg), (v))
+#define RREG64_PCIE_EXT(reg) adev->pcie_rreg64_ext(adev, (reg))
+#define WREG64_PCIE_EXT(reg, v) adev->pcie_wreg64_ext(adev, (reg), (v))
#define RREG32_SMC(reg) adev->smc_rreg(adev, (reg))
#define WREG32_SMC(reg, v) adev->smc_wreg(adev, (reg), (v))
#define RREG32_UVD_CTX(reg) adev->uvd_ctx_rreg(adev, (reg))
@@ -1171,8 +1498,6 @@ int emu_soc_asic_init(struct amdgpu_device *adev);
} while (0)
#define DREG32_SYS(sqf, adev, reg) seq_printf((sqf), #reg " : 0x%08X\n", amdgpu_device_rreg((adev), (reg), false))
-#define RREG32_IO(reg) amdgpu_io_rreg(adev, (reg))
-#define WREG32_IO(reg, v) amdgpu_io_wreg(adev, (reg), (v))
#define REG_FIELD_SHIFT(reg, field) reg##__##field##__SHIFT
#define REG_FIELD_MASK(reg, field) reg##__##field##_MASK
@@ -1190,6 +1515,7 @@ int emu_soc_asic_init(struct amdgpu_device *adev);
#define WREG32_FIELD_OFFSET(reg, offset, field, val) \
WREG32(mm##reg + offset, (RREG32(mm##reg + offset) & ~REG_FIELD_MASK(reg, field)) | (val) << REG_FIELD_SHIFT(reg, field))
+#define AMDGPU_GET_REG_FIELD(x, h, l) (((x) & GENMASK_ULL(h, l)) >> (l))
/*
* BIOS helpers.
*/
@@ -1200,7 +1526,8 @@ int emu_soc_asic_init(struct amdgpu_device *adev);
/*
* ASICs macro.
*/
-#define amdgpu_asic_set_vga_state(adev, state) (adev)->asic_funcs->set_vga_state((adev), (state))
+#define amdgpu_asic_set_vga_state(adev, state) \
+ ((adev)->asic_funcs->set_vga_state ? (adev)->asic_funcs->set_vga_state((adev), (state)) : 0)
#define amdgpu_asic_reset(adev) (adev)->asic_funcs->reset((adev))
#define amdgpu_asic_reset_method(adev) (adev)->asic_funcs->reset_method((adev))
#define amdgpu_asic_get_xclk(adev) (adev)->asic_funcs->get_xclk((adev))
@@ -1213,10 +1540,6 @@ int emu_soc_asic_init(struct amdgpu_device *adev);
#define amdgpu_asic_read_bios_from_rom(adev, b, l) (adev)->asic_funcs->read_bios_from_rom((adev), (b), (l))
#define amdgpu_asic_read_register(adev, se, sh, offset, v)((adev)->asic_funcs->read_register((adev), (se), (sh), (offset), (v)))
#define amdgpu_asic_get_config_memsize(adev) (adev)->asic_funcs->get_config_memsize((adev))
-#define amdgpu_asic_flush_hdp(adev, r) \
- ((adev)->asic_funcs->flush_hdp ? (adev)->asic_funcs->flush_hdp((adev), (r)) : (adev)->hdp.funcs->flush_hdp((adev), (r)))
-#define amdgpu_asic_invalidate_hdp(adev, r) \
- ((adev)->asic_funcs->invalidate_hdp ? (adev)->asic_funcs->invalidate_hdp((adev), (r)) : (adev)->hdp.funcs->invalidate_hdp((adev), (r)))
#define amdgpu_asic_need_full_reset(adev) (adev)->asic_funcs->need_full_reset((adev))
#define amdgpu_asic_init_doorbell_index(adev) (adev)->asic_funcs->init_doorbell_index((adev))
#define amdgpu_asic_get_pcie_usage(adev, cnt0, cnt1) ((adev)->asic_funcs->get_pcie_usage((adev), (cnt0), (cnt1)))
@@ -1226,17 +1549,26 @@ int emu_soc_asic_init(struct amdgpu_device *adev);
#define amdgpu_asic_pre_asic_init(adev) (adev)->asic_funcs->pre_asic_init((adev))
#define amdgpu_asic_update_umd_stable_pstate(adev, enter) \
((adev)->asic_funcs->update_umd_stable_pstate ? (adev)->asic_funcs->update_umd_stable_pstate((adev), (enter)) : 0)
+#define amdgpu_asic_query_video_codecs(adev, e, c) (adev)->asic_funcs->query_video_codecs((adev), (e), (c))
+
+#define amdgpu_inc_vram_lost(adev) atomic_inc(&((adev)->vram_lost_counter))
-#define amdgpu_inc_vram_lost(adev) atomic_inc(&((adev)->vram_lost_counter));
+#define BIT_MASK_UPPER(i) ((i) >= BITS_PER_LONG ? 0 : ~0UL << (i))
+#define for_each_inst(i, inst_mask) \
+ for (i = ffs(inst_mask); i-- != 0; \
+ i = ffs(inst_mask & BIT_MASK_UPPER(i + 1)))
/* Common functions */
bool amdgpu_device_has_job_running(struct amdgpu_device *adev);
bool amdgpu_device_should_recover_gpu(struct amdgpu_device *adev);
int amdgpu_device_gpu_recover(struct amdgpu_device *adev,
- struct amdgpu_job* job);
+ struct amdgpu_job *job,
+ struct amdgpu_reset_context *reset_context);
void amdgpu_device_pci_config_reset(struct amdgpu_device *adev);
int amdgpu_device_pci_reset(struct amdgpu_device *adev);
bool amdgpu_device_need_post(struct amdgpu_device *adev);
+bool amdgpu_device_seamless_boot_supported(struct amdgpu_device *adev);
+bool amdgpu_device_should_use_aspm(struct amdgpu_device *adev);
void amdgpu_cs_report_moved_bytes(struct amdgpu_device *adev, u64 num_bytes,
u64 num_vis_bytes);
@@ -1245,13 +1577,38 @@ void amdgpu_device_program_register_sequence(struct amdgpu_device *adev,
const u32 *registers,
const u32 array_size);
-bool amdgpu_device_supports_atpx(struct drm_device *dev);
-bool amdgpu_device_supports_boco(struct drm_device *dev);
-bool amdgpu_device_supports_baco(struct drm_device *dev);
+int amdgpu_device_mode1_reset(struct amdgpu_device *adev);
+int amdgpu_device_link_reset(struct amdgpu_device *adev);
+bool amdgpu_device_supports_atpx(struct amdgpu_device *adev);
+bool amdgpu_device_supports_px(struct amdgpu_device *adev);
+bool amdgpu_device_supports_boco(struct amdgpu_device *adev);
+bool amdgpu_device_supports_smart_shift(struct amdgpu_device *adev);
+int amdgpu_device_supports_baco(struct amdgpu_device *adev);
+void amdgpu_device_detect_runtime_pm_mode(struct amdgpu_device *adev);
bool amdgpu_device_is_peer_accessible(struct amdgpu_device *adev,
struct amdgpu_device *peer_adev);
-int amdgpu_device_baco_enter(struct drm_device *dev);
-int amdgpu_device_baco_exit(struct drm_device *dev);
+int amdgpu_device_baco_enter(struct amdgpu_device *adev);
+int amdgpu_device_baco_exit(struct amdgpu_device *adev);
+
+void amdgpu_device_flush_hdp(struct amdgpu_device *adev,
+ struct amdgpu_ring *ring);
+void amdgpu_device_invalidate_hdp(struct amdgpu_device *adev,
+ struct amdgpu_ring *ring);
+
+void amdgpu_device_halt(struct amdgpu_device *adev);
+u32 amdgpu_device_pcie_port_rreg(struct amdgpu_device *adev,
+ u32 reg);
+void amdgpu_device_pcie_port_wreg(struct amdgpu_device *adev,
+ u32 reg, u32 v);
+struct dma_fence *amdgpu_device_get_gang(struct amdgpu_device *adev);
+struct dma_fence *amdgpu_device_switch_gang(struct amdgpu_device *adev,
+ struct dma_fence *gang);
+struct dma_fence *amdgpu_device_enforce_isolation(struct amdgpu_device *adev,
+ struct amdgpu_ring *ring,
+ struct amdgpu_job *job);
+bool amdgpu_device_has_display_hardware(struct amdgpu_device *adev);
+ssize_t amdgpu_get_soft_full_reset_mask(struct amdgpu_ring *ring);
+ssize_t amdgpu_show_reset_mask(char *buf, uint32_t supported_reset);
/* atpx handler */
#if defined(CONFIG_VGA_SWITCHEROO)
@@ -1259,23 +1616,15 @@ void amdgpu_register_atpx_handler(void);
void amdgpu_unregister_atpx_handler(void);
bool amdgpu_has_atpx_dgpu_power_cntl(void);
bool amdgpu_is_atpx_hybrid(void);
-bool amdgpu_atpx_dgpu_req_power_for_displays(void);
bool amdgpu_has_atpx(void);
#else
static inline void amdgpu_register_atpx_handler(void) {}
static inline void amdgpu_unregister_atpx_handler(void) {}
static inline bool amdgpu_has_atpx_dgpu_power_cntl(void) { return false; }
static inline bool amdgpu_is_atpx_hybrid(void) { return false; }
-static inline bool amdgpu_atpx_dgpu_req_power_for_displays(void) { return false; }
static inline bool amdgpu_has_atpx(void) { return false; }
#endif
-#if defined(CONFIG_VGA_SWITCHEROO) && defined(CONFIG_ACPI)
-void *amdgpu_atpx_get_dhandle(void);
-#else
-static inline void *amdgpu_atpx_get_dhandle(void) { return NULL; }
-#endif
-
/*
* KMS
*/
@@ -1284,18 +1633,18 @@ extern const int amdgpu_max_kms_ioctl;
int amdgpu_driver_load_kms(struct amdgpu_device *adev, unsigned long flags);
void amdgpu_driver_unload_kms(struct drm_device *dev);
-void amdgpu_driver_lastclose_kms(struct drm_device *dev);
int amdgpu_driver_open_kms(struct drm_device *dev, struct drm_file *file_priv);
void amdgpu_driver_postclose_kms(struct drm_device *dev,
struct drm_file *file_priv);
-int amdgpu_device_ip_suspend(struct amdgpu_device *adev);
+void amdgpu_driver_release_kms(struct drm_device *dev);
+
+int amdgpu_device_prepare(struct drm_device *dev);
+void amdgpu_device_complete(struct drm_device *dev);
int amdgpu_device_suspend(struct drm_device *dev, bool fbcon);
int amdgpu_device_resume(struct drm_device *dev, bool fbcon);
u32 amdgpu_get_vblank_counter_kms(struct drm_crtc *crtc);
int amdgpu_enable_vblank_kms(struct drm_crtc *crtc);
void amdgpu_disable_vblank_kms(struct drm_crtc *crtc);
-long amdgpu_kms_compat_ioctl(struct file *filp, unsigned int cmd,
- unsigned long arg);
int amdgpu_info_ioctl(struct drm_device *dev, void *data,
struct drm_file *filp);
@@ -1319,33 +1668,79 @@ struct amdgpu_afmt_acr {
struct amdgpu_afmt_acr amdgpu_afmt_acr(uint32_t clock);
/* amdgpu_acpi.c */
+
+struct amdgpu_numa_info {
+ uint64_t size;
+ int pxm;
+ int nid;
+};
+
+/* ATCS Device/Driver State */
+#define AMDGPU_ATCS_PSC_DEV_STATE_D0 0
+#define AMDGPU_ATCS_PSC_DEV_STATE_D3_HOT 3
+#define AMDGPU_ATCS_PSC_DRV_STATE_OPR 0
+#define AMDGPU_ATCS_PSC_DRV_STATE_NOT_OPR 1
+
#if defined(CONFIG_ACPI)
int amdgpu_acpi_init(struct amdgpu_device *adev);
void amdgpu_acpi_fini(struct amdgpu_device *adev);
bool amdgpu_acpi_is_pcie_performance_request_supported(struct amdgpu_device *adev);
+bool amdgpu_acpi_is_power_shift_control_supported(void);
int amdgpu_acpi_pcie_performance_request(struct amdgpu_device *adev,
u8 perf_req, bool advertise);
+int amdgpu_acpi_power_shift_control(struct amdgpu_device *adev,
+ u8 dev_state, bool drv_state);
+int amdgpu_acpi_smart_shift_update(struct amdgpu_device *adev,
+ enum amdgpu_ss ss_state);
int amdgpu_acpi_pcie_notify_device_ready(struct amdgpu_device *adev);
-
-void amdgpu_acpi_get_backlight_caps(struct amdgpu_device *adev,
- struct amdgpu_dm_backlight_caps *caps);
-bool amdgpu_acpi_is_s0ix_supported(struct amdgpu_device *adev);
+int amdgpu_acpi_get_tmr_info(struct amdgpu_device *adev, u64 *tmr_offset,
+ u64 *tmr_size);
+int amdgpu_acpi_get_mem_info(struct amdgpu_device *adev, int xcc_id,
+ struct amdgpu_numa_info *numa_info);
+
+void amdgpu_acpi_get_backlight_caps(struct amdgpu_dm_backlight_caps *caps);
+bool amdgpu_acpi_should_gpu_reset(struct amdgpu_device *adev);
+void amdgpu_acpi_detect(void);
+void amdgpu_acpi_release(void);
#else
static inline int amdgpu_acpi_init(struct amdgpu_device *adev) { return 0; }
+static inline int amdgpu_acpi_get_tmr_info(struct amdgpu_device *adev,
+ u64 *tmr_offset, u64 *tmr_size)
+{
+ return -EINVAL;
+}
+static inline int amdgpu_acpi_get_mem_info(struct amdgpu_device *adev,
+ int xcc_id,
+ struct amdgpu_numa_info *numa_info)
+{
+ return -EINVAL;
+}
static inline void amdgpu_acpi_fini(struct amdgpu_device *adev) { }
-static inline bool amdgpu_acpi_is_s0ix_supported(struct amdgpu_device *adev) { return false; }
+static inline bool amdgpu_acpi_should_gpu_reset(struct amdgpu_device *adev) { return false; }
+static inline void amdgpu_acpi_detect(void) { }
+static inline void amdgpu_acpi_release(void) { }
+static inline bool amdgpu_acpi_is_power_shift_control_supported(void) { return false; }
+static inline int amdgpu_acpi_power_shift_control(struct amdgpu_device *adev,
+ u8 dev_state, bool drv_state) { return 0; }
+static inline int amdgpu_acpi_smart_shift_update(struct amdgpu_device *adev,
+ enum amdgpu_ss ss_state)
+{
+ return 0;
+}
+static inline void amdgpu_acpi_get_backlight_caps(struct amdgpu_dm_backlight_caps *caps) { }
#endif
-int amdgpu_cs_find_mapping(struct amdgpu_cs_parser *parser,
- uint64_t addr, struct amdgpu_bo **bo,
- struct amdgpu_bo_va_mapping **mapping);
-
-#if defined(CONFIG_DRM_AMD_DC)
-int amdgpu_dm_display_resume(struct amdgpu_device *adev );
+#if defined(CONFIG_ACPI) && defined(CONFIG_SUSPEND)
+bool amdgpu_acpi_is_s3_active(struct amdgpu_device *adev);
+bool amdgpu_acpi_is_s0ix_active(struct amdgpu_device *adev);
#else
-static inline int amdgpu_dm_display_resume(struct amdgpu_device *adev) { return 0; }
+static inline bool amdgpu_acpi_is_s0ix_active(struct amdgpu_device *adev) { return false; }
+static inline bool amdgpu_acpi_is_s3_active(struct amdgpu_device *adev) { return false; }
#endif
+#if defined(CONFIG_DRM_AMD_ISP)
+int amdgpu_acpi_get_isp4_dev(struct acpi_device **dev);
+#endif
void amdgpu_register_gpu_instance(struct amdgpu_device *adev);
void amdgpu_unregister_gpu_instance(struct amdgpu_device *adev);
@@ -1359,6 +1754,22 @@ void amdgpu_pci_resume(struct pci_dev *pdev);
bool amdgpu_device_cache_pci_state(struct pci_dev *pdev);
bool amdgpu_device_load_pci_state(struct pci_dev *pdev);
+bool amdgpu_device_skip_hw_access(struct amdgpu_device *adev);
+
+int amdgpu_device_set_cg_state(struct amdgpu_device *adev,
+ enum amd_clockgating_state state);
+int amdgpu_device_set_pg_state(struct amdgpu_device *adev,
+ enum amd_powergating_state state);
+
+static inline bool amdgpu_device_has_timeouts_enabled(struct amdgpu_device *adev)
+{
+ return amdgpu_gpu_recovery != 0 &&
+ adev->gfx_timeout != MAX_SCHEDULE_TIMEOUT &&
+ adev->compute_timeout != MAX_SCHEDULE_TIMEOUT &&
+ adev->sdma_timeout != MAX_SCHEDULE_TIMEOUT &&
+ adev->video_timeout != MAX_SCHEDULE_TIMEOUT;
+}
+
#include "amdgpu_object.h"
static inline bool amdgpu_is_tmz(struct amdgpu_device *adev)
@@ -1366,8 +1777,32 @@ static inline bool amdgpu_is_tmz(struct amdgpu_device *adev)
return adev->gmc.tmz_enabled;
}
-static inline int amdgpu_in_reset(struct amdgpu_device *adev)
+int amdgpu_in_reset(struct amdgpu_device *adev);
+
+extern const struct attribute_group amdgpu_vram_mgr_attr_group;
+extern const struct attribute_group amdgpu_gtt_mgr_attr_group;
+extern const struct attribute_group amdgpu_flash_attr_group;
+
+void amdgpu_set_init_level(struct amdgpu_device *adev,
+ enum amdgpu_init_lvl_id lvl);
+
+static inline int amdgpu_device_bus_status_check(struct amdgpu_device *adev)
{
- return atomic_read(&adev->in_gpu_reset);
+ u32 status;
+ int r;
+
+ r = pci_read_config_dword(adev->pdev, PCI_COMMAND, &status);
+ if (r || PCI_POSSIBLE_ERROR(status)) {
+ dev_err(adev->dev, "device lost from bus!");
+ return -ENODEV;
+ }
+
+ return 0;
}
+
+void amdgpu_device_set_uid(struct amdgpu_uid *uid_info,
+ enum amdgpu_uid_type type, uint8_t inst,
+ uint64_t uid);
+uint64_t amdgpu_device_get_uid(struct amdgpu_uid *uid_info,
+ enum amdgpu_uid_type type, uint8_t inst);
#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_aca.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_aca.c
new file mode 100644
index 000000000000..9b3180449150
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_aca.c
@@ -0,0 +1,984 @@
+/*
+ * Copyright 2023 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#include <linux/list.h>
+#include "amdgpu.h"
+#include "amdgpu_aca.h"
+#include "amdgpu_ras.h"
+
+#define ACA_BANK_HWID(type, hwid, mcatype) [ACA_HWIP_TYPE_##type] = {hwid, mcatype}
+
+typedef int bank_handler_t(struct aca_handle *handle, struct aca_bank *bank, enum aca_smu_type type, void *data);
+
+static struct aca_hwip aca_hwid_mcatypes[ACA_HWIP_TYPE_COUNT] = {
+ ACA_BANK_HWID(SMU, 0x01, 0x01),
+ ACA_BANK_HWID(PCS_XGMI, 0x50, 0x00),
+ ACA_BANK_HWID(UMC, 0x96, 0x00),
+};
+
+static void aca_banks_init(struct aca_banks *banks)
+{
+ if (!banks)
+ return;
+
+ memset(banks, 0, sizeof(*banks));
+ INIT_LIST_HEAD(&banks->list);
+}
+
+static int aca_banks_add_bank(struct aca_banks *banks, struct aca_bank *bank)
+{
+ struct aca_bank_node *node;
+
+ if (!bank)
+ return -EINVAL;
+
+ node = kvzalloc(sizeof(*node), GFP_KERNEL);
+ if (!node)
+ return -ENOMEM;
+
+ memcpy(&node->bank, bank, sizeof(*bank));
+
+ INIT_LIST_HEAD(&node->node);
+ list_add_tail(&node->node, &banks->list);
+
+ banks->nr_banks++;
+
+ return 0;
+}
+
+static void aca_banks_release(struct aca_banks *banks)
+{
+ struct aca_bank_node *node, *tmp;
+
+ if (list_empty(&banks->list))
+ return;
+
+ list_for_each_entry_safe(node, tmp, &banks->list, node) {
+ list_del(&node->node);
+ kvfree(node);
+ banks->nr_banks--;
+ }
+}
+
+static int aca_smu_get_valid_aca_count(struct amdgpu_device *adev, enum aca_smu_type type, u32 *count)
+{
+ struct amdgpu_aca *aca = &adev->aca;
+ const struct aca_smu_funcs *smu_funcs = aca->smu_funcs;
+
+ if (!count)
+ return -EINVAL;
+
+ if (!smu_funcs || !smu_funcs->get_valid_aca_count)
+ return -EOPNOTSUPP;
+
+ return smu_funcs->get_valid_aca_count(adev, type, count);
+}
+
+static struct aca_regs_dump {
+ const char *name;
+ int reg_idx;
+} aca_regs[] = {
+ {"CONTROL", ACA_REG_IDX_CTL},
+ {"STATUS", ACA_REG_IDX_STATUS},
+ {"ADDR", ACA_REG_IDX_ADDR},
+ {"MISC", ACA_REG_IDX_MISC0},
+ {"CONFIG", ACA_REG_IDX_CONFIG},
+ {"IPID", ACA_REG_IDX_IPID},
+ {"SYND", ACA_REG_IDX_SYND},
+ {"DESTAT", ACA_REG_IDX_DESTAT},
+ {"DEADDR", ACA_REG_IDX_DEADDR},
+ {"CONTROL_MASK", ACA_REG_IDX_CTL_MASK},
+};
+
+static void aca_smu_bank_dump(struct amdgpu_device *adev, int idx, int total, struct aca_bank *bank,
+ struct ras_query_context *qctx)
+{
+ u64 event_id = qctx ? qctx->evid.event_id : RAS_EVENT_INVALID_ID;
+ int i;
+
+ if (adev->debug_disable_ce_logs &&
+ bank->smu_err_type == ACA_SMU_TYPE_CE &&
+ !ACA_BANK_ERR_IS_DEFFERED(bank))
+ return;
+
+ RAS_EVENT_LOG(adev, event_id, HW_ERR "Accelerator Check Architecture events logged\n");
+ /* plus 1 for output format, e.g: ACA[08/08]: xxxx */
+ for (i = 0; i < ARRAY_SIZE(aca_regs); i++)
+ RAS_EVENT_LOG(adev, event_id, HW_ERR "ACA[%02d/%02d].%s=0x%016llx\n",
+ idx + 1, total, aca_regs[i].name, bank->regs[aca_regs[i].reg_idx]);
+
+ if (ACA_REG__STATUS__SCRUB(bank->regs[ACA_REG_IDX_STATUS]))
+ RAS_EVENT_LOG(adev, event_id, HW_ERR "hardware error logged by the scrubber\n");
+}
+
+static bool aca_bank_hwip_is_matched(struct aca_bank *bank, enum aca_hwip_type type)
+{
+
+ struct aca_hwip *hwip;
+ int hwid, mcatype;
+ u64 ipid;
+
+ if (!bank || type == ACA_HWIP_TYPE_UNKNOW)
+ return false;
+
+ hwip = &aca_hwid_mcatypes[type];
+ if (!hwip->hwid)
+ return false;
+
+ ipid = bank->regs[ACA_REG_IDX_IPID];
+ hwid = ACA_REG__IPID__HARDWAREID(ipid);
+ mcatype = ACA_REG__IPID__MCATYPE(ipid);
+
+ return hwip->hwid == hwid && hwip->mcatype == mcatype;
+}
+
+static int aca_smu_get_valid_aca_banks(struct amdgpu_device *adev, enum aca_smu_type type,
+ int start, int count,
+ struct aca_banks *banks, struct ras_query_context *qctx)
+{
+ struct amdgpu_aca *aca = &adev->aca;
+ const struct aca_smu_funcs *smu_funcs = aca->smu_funcs;
+ struct aca_bank bank;
+ int i, max_count, ret;
+
+ if (!count)
+ return 0;
+
+ if (!smu_funcs || !smu_funcs->get_valid_aca_bank)
+ return -EOPNOTSUPP;
+
+ switch (type) {
+ case ACA_SMU_TYPE_UE:
+ max_count = smu_funcs->max_ue_bank_count;
+ break;
+ case ACA_SMU_TYPE_CE:
+ max_count = smu_funcs->max_ce_bank_count;
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ if (start + count > max_count)
+ return -EINVAL;
+
+ count = min_t(int, count, max_count);
+ for (i = 0; i < count; i++) {
+ memset(&bank, 0, sizeof(bank));
+ ret = smu_funcs->get_valid_aca_bank(adev, type, start + i, &bank);
+ if (ret)
+ return ret;
+
+ bank.smu_err_type = type;
+
+ /*
+ * Poison being consumed when injecting a UE while running background workloads,
+ * which are unexpected.
+ */
+ if (type == ACA_SMU_TYPE_UE &&
+ ACA_REG__STATUS__POISON(bank.regs[ACA_REG_IDX_STATUS]) &&
+ !aca_bank_hwip_is_matched(&bank, ACA_HWIP_TYPE_UMC))
+ continue;
+
+ aca_smu_bank_dump(adev, i, count, &bank, qctx);
+
+ ret = aca_banks_add_bank(banks, &bank);
+ if (ret)
+ return ret;
+ }
+
+ return 0;
+}
+
+static bool aca_bank_is_valid(struct aca_handle *handle, struct aca_bank *bank, enum aca_smu_type type)
+{
+ const struct aca_bank_ops *bank_ops = handle->bank_ops;
+
+ /* Parse all deferred errors with UMC aca handle */
+ if (ACA_BANK_ERR_IS_DEFFERED(bank))
+ return handle->hwip == ACA_HWIP_TYPE_UMC;
+
+ if (!aca_bank_hwip_is_matched(bank, handle->hwip))
+ return false;
+
+ if (!bank_ops->aca_bank_is_valid)
+ return true;
+
+ return bank_ops->aca_bank_is_valid(handle, bank, type, handle->data);
+}
+
+static struct aca_bank_error *new_bank_error(struct aca_error *aerr, struct aca_bank_info *info)
+{
+ struct aca_bank_error *bank_error;
+
+ bank_error = kvzalloc(sizeof(*bank_error), GFP_KERNEL);
+ if (!bank_error)
+ return NULL;
+
+ INIT_LIST_HEAD(&bank_error->node);
+ memcpy(&bank_error->info, info, sizeof(*info));
+
+ mutex_lock(&aerr->lock);
+ list_add_tail(&bank_error->node, &aerr->list);
+ aerr->nr_errors++;
+ mutex_unlock(&aerr->lock);
+
+ return bank_error;
+}
+
+static struct aca_bank_error *find_bank_error(struct aca_error *aerr, struct aca_bank_info *info)
+{
+ struct aca_bank_error *bank_error = NULL;
+ struct aca_bank_info *tmp_info;
+ bool found = false;
+
+ mutex_lock(&aerr->lock);
+ list_for_each_entry(bank_error, &aerr->list, node) {
+ tmp_info = &bank_error->info;
+ if (tmp_info->socket_id == info->socket_id &&
+ tmp_info->die_id == info->die_id) {
+ found = true;
+ goto out_unlock;
+ }
+ }
+
+out_unlock:
+ mutex_unlock(&aerr->lock);
+
+ return found ? bank_error : NULL;
+}
+
+static void aca_bank_error_remove(struct aca_error *aerr, struct aca_bank_error *bank_error)
+{
+ if (!aerr || !bank_error)
+ return;
+
+ list_del(&bank_error->node);
+ aerr->nr_errors--;
+
+ kvfree(bank_error);
+}
+
+static struct aca_bank_error *get_bank_error(struct aca_error *aerr, struct aca_bank_info *info)
+{
+ struct aca_bank_error *bank_error;
+
+ if (!aerr || !info)
+ return NULL;
+
+ bank_error = find_bank_error(aerr, info);
+ if (bank_error)
+ return bank_error;
+
+ return new_bank_error(aerr, info);
+}
+
+int aca_error_cache_log_bank_error(struct aca_handle *handle, struct aca_bank_info *info,
+ enum aca_error_type type, u64 count)
+{
+ struct aca_error_cache *error_cache = &handle->error_cache;
+ struct aca_bank_error *bank_error;
+ struct aca_error *aerr;
+
+ if (!handle || !info || type >= ACA_ERROR_TYPE_COUNT)
+ return -EINVAL;
+
+ if (!count)
+ return 0;
+
+ aerr = &error_cache->errors[type];
+ bank_error = get_bank_error(aerr, info);
+ if (!bank_error)
+ return -ENOMEM;
+
+ bank_error->count += count;
+
+ return 0;
+}
+
+static int aca_bank_parser(struct aca_handle *handle, struct aca_bank *bank, enum aca_smu_type type)
+{
+ const struct aca_bank_ops *bank_ops = handle->bank_ops;
+
+ if (!bank)
+ return -EINVAL;
+
+ if (!bank_ops->aca_bank_parser)
+ return -EOPNOTSUPP;
+
+ return bank_ops->aca_bank_parser(handle, bank, type,
+ handle->data);
+}
+
+static int handler_aca_log_bank_error(struct aca_handle *handle, struct aca_bank *bank,
+ enum aca_smu_type type, void *data)
+{
+ int ret;
+
+ ret = aca_bank_parser(handle, bank, type);
+ if (ret)
+ return ret;
+
+ return 0;
+}
+
+static int aca_dispatch_bank(struct aca_handle_manager *mgr, struct aca_bank *bank,
+ enum aca_smu_type type, bank_handler_t handler, void *data)
+{
+ struct aca_handle *handle;
+ int ret;
+
+ if (list_empty(&mgr->list))
+ return 0;
+
+ list_for_each_entry(handle, &mgr->list, node) {
+ if (!aca_bank_is_valid(handle, bank, type))
+ continue;
+
+ ret = handler(handle, bank, type, data);
+ if (ret)
+ return ret;
+ }
+
+ return 0;
+}
+
+static int aca_dispatch_banks(struct aca_handle_manager *mgr, struct aca_banks *banks,
+ enum aca_smu_type type, bank_handler_t handler, void *data)
+{
+ struct aca_bank_node *node;
+ struct aca_bank *bank;
+ int ret;
+
+ if (!mgr || !banks)
+ return -EINVAL;
+
+ /* pre check to avoid unnecessary operations */
+ if (list_empty(&mgr->list) || list_empty(&banks->list))
+ return 0;
+
+ list_for_each_entry(node, &banks->list, node) {
+ bank = &node->bank;
+
+ ret = aca_dispatch_bank(mgr, bank, type, handler, data);
+ if (ret)
+ return ret;
+ }
+
+ return 0;
+}
+
+static bool aca_bank_should_update(struct amdgpu_device *adev, enum aca_smu_type type)
+{
+ struct amdgpu_aca *aca = &adev->aca;
+ bool ret = true;
+
+ /*
+ * Because the UE Valid MCA count will only be cleared after reset,
+ * in order to avoid repeated counting of the error count,
+ * the aca bank is only updated once during the gpu recovery stage.
+ */
+ if (type == ACA_SMU_TYPE_UE) {
+ if (amdgpu_ras_intr_triggered())
+ ret = atomic_cmpxchg(&aca->ue_update_flag, 0, 1) == 0;
+ else
+ atomic_set(&aca->ue_update_flag, 0);
+ }
+
+ return ret;
+}
+
+static void aca_banks_generate_cper(struct amdgpu_device *adev,
+ enum aca_smu_type type,
+ struct aca_banks *banks,
+ int count)
+{
+ struct aca_bank_node *node;
+ struct aca_bank *bank;
+ int r;
+
+ if (!adev->cper.enabled)
+ return;
+
+ if (!banks || !count) {
+ dev_warn(adev->dev, "fail to generate cper records\n");
+ return;
+ }
+
+ /* UEs must be encoded into separate CPER entries */
+ if (type == ACA_SMU_TYPE_UE) {
+ struct aca_banks de_banks;
+
+ aca_banks_init(&de_banks);
+ list_for_each_entry(node, &banks->list, node) {
+ bank = &node->bank;
+ if (bank->aca_err_type == ACA_ERROR_TYPE_DEFERRED) {
+ r = aca_banks_add_bank(&de_banks, bank);
+ if (r)
+ dev_warn(adev->dev, "fail to add de banks, ret = %d\n", r);
+ } else {
+ if (amdgpu_cper_generate_ue_record(adev, bank))
+ dev_warn(adev->dev, "fail to generate ue cper records\n");
+ }
+ }
+
+ if (!list_empty(&de_banks.list)) {
+ if (amdgpu_cper_generate_ce_records(adev, &de_banks, de_banks.nr_banks))
+ dev_warn(adev->dev, "fail to generate de cper records\n");
+ }
+
+ aca_banks_release(&de_banks);
+ } else {
+ /*
+ * SMU_TYPE_CE banks are combined into 1 CPER entries,
+ * they could be CEs or DEs or both
+ */
+ if (amdgpu_cper_generate_ce_records(adev, banks, count))
+ dev_warn(adev->dev, "fail to generate ce cper records\n");
+ }
+}
+
+static int aca_banks_update(struct amdgpu_device *adev, enum aca_smu_type type,
+ bank_handler_t handler, struct ras_query_context *qctx, void *data)
+{
+ struct amdgpu_aca *aca = &adev->aca;
+ struct aca_banks banks;
+ u32 count = 0;
+ int ret;
+
+ if (list_empty(&aca->mgr.list))
+ return 0;
+
+ if (!aca_bank_should_update(adev, type))
+ return 0;
+
+ ret = aca_smu_get_valid_aca_count(adev, type, &count);
+ if (ret)
+ return ret;
+
+ if (!count)
+ return 0;
+
+ aca_banks_init(&banks);
+
+ ret = aca_smu_get_valid_aca_banks(adev, type, 0, count, &banks, qctx);
+ if (ret)
+ goto err_release_banks;
+
+ if (list_empty(&banks.list)) {
+ ret = 0;
+ goto err_release_banks;
+ }
+
+ ret = aca_dispatch_banks(&aca->mgr, &banks, type,
+ handler, data);
+ if (ret)
+ goto err_release_banks;
+
+ aca_banks_generate_cper(adev, type, &banks, count);
+
+err_release_banks:
+ aca_banks_release(&banks);
+
+ return ret;
+}
+
+static int aca_log_aca_error_data(struct aca_bank_error *bank_error, enum aca_error_type type, struct ras_err_data *err_data)
+{
+ struct aca_bank_info *info;
+ struct amdgpu_smuio_mcm_config_info mcm_info;
+ u64 count;
+
+ if (type >= ACA_ERROR_TYPE_COUNT)
+ return -EINVAL;
+
+ count = bank_error->count;
+ if (!count)
+ return 0;
+
+ info = &bank_error->info;
+ mcm_info.die_id = info->die_id;
+ mcm_info.socket_id = info->socket_id;
+
+ switch (type) {
+ case ACA_ERROR_TYPE_UE:
+ amdgpu_ras_error_statistic_ue_count(err_data, &mcm_info, count);
+ break;
+ case ACA_ERROR_TYPE_CE:
+ amdgpu_ras_error_statistic_ce_count(err_data, &mcm_info, count);
+ break;
+ case ACA_ERROR_TYPE_DEFERRED:
+ amdgpu_ras_error_statistic_de_count(err_data, &mcm_info, count);
+ break;
+ default:
+ break;
+ }
+
+ return 0;
+}
+
+static int aca_log_aca_error(struct aca_handle *handle, enum aca_error_type type, struct ras_err_data *err_data)
+{
+ struct aca_error_cache *error_cache = &handle->error_cache;
+ struct aca_error *aerr = &error_cache->errors[type];
+ struct aca_bank_error *bank_error, *tmp;
+
+ mutex_lock(&aerr->lock);
+
+ if (list_empty(&aerr->list))
+ goto out_unlock;
+
+ list_for_each_entry_safe(bank_error, tmp, &aerr->list, node) {
+ aca_log_aca_error_data(bank_error, type, err_data);
+ aca_bank_error_remove(aerr, bank_error);
+ }
+
+out_unlock:
+ mutex_unlock(&aerr->lock);
+
+ return 0;
+}
+
+static int __aca_get_error_data(struct amdgpu_device *adev, struct aca_handle *handle, enum aca_error_type type,
+ struct ras_err_data *err_data, struct ras_query_context *qctx)
+{
+ enum aca_smu_type smu_type;
+ int ret;
+
+ switch (type) {
+ case ACA_ERROR_TYPE_UE:
+ smu_type = ACA_SMU_TYPE_UE;
+ break;
+ case ACA_ERROR_TYPE_CE:
+ case ACA_ERROR_TYPE_DEFERRED:
+ smu_type = ACA_SMU_TYPE_CE;
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ /* update aca bank to aca source error_cache first */
+ ret = aca_banks_update(adev, smu_type, handler_aca_log_bank_error, qctx, NULL);
+ if (ret)
+ return ret;
+
+ /* DEs may contain in CEs or UEs */
+ if (type != ACA_ERROR_TYPE_DEFERRED)
+ aca_log_aca_error(handle, ACA_ERROR_TYPE_DEFERRED, err_data);
+
+ return aca_log_aca_error(handle, type, err_data);
+}
+
+static bool aca_handle_is_valid(struct aca_handle *handle)
+{
+ if (!handle->mask || !list_empty(&handle->node))
+ return false;
+
+ return true;
+}
+
+int amdgpu_aca_get_error_data(struct amdgpu_device *adev, struct aca_handle *handle,
+ enum aca_error_type type, struct ras_err_data *err_data,
+ struct ras_query_context *qctx)
+{
+ if (!handle || !err_data)
+ return -EINVAL;
+
+ if (aca_handle_is_valid(handle))
+ return -EOPNOTSUPP;
+
+ if ((type < 0) || (!(BIT(type) & handle->mask)))
+ return 0;
+
+ return __aca_get_error_data(adev, handle, type, err_data, qctx);
+}
+
+static void aca_error_init(struct aca_error *aerr, enum aca_error_type type)
+{
+ mutex_init(&aerr->lock);
+ INIT_LIST_HEAD(&aerr->list);
+ aerr->type = type;
+ aerr->nr_errors = 0;
+}
+
+static void aca_init_error_cache(struct aca_handle *handle)
+{
+ struct aca_error_cache *error_cache = &handle->error_cache;
+ int type;
+
+ for (type = ACA_ERROR_TYPE_UE; type < ACA_ERROR_TYPE_COUNT; type++)
+ aca_error_init(&error_cache->errors[type], type);
+}
+
+static void aca_error_fini(struct aca_error *aerr)
+{
+ struct aca_bank_error *bank_error, *tmp;
+
+ mutex_lock(&aerr->lock);
+ if (list_empty(&aerr->list))
+ goto out_unlock;
+
+ list_for_each_entry_safe(bank_error, tmp, &aerr->list, node)
+ aca_bank_error_remove(aerr, bank_error);
+
+out_unlock:
+ mutex_destroy(&aerr->lock);
+}
+
+static void aca_fini_error_cache(struct aca_handle *handle)
+{
+ struct aca_error_cache *error_cache = &handle->error_cache;
+ int type;
+
+ for (type = ACA_ERROR_TYPE_UE; type < ACA_ERROR_TYPE_COUNT; type++)
+ aca_error_fini(&error_cache->errors[type]);
+}
+
+static int add_aca_handle(struct amdgpu_device *adev, struct aca_handle_manager *mgr, struct aca_handle *handle,
+ const char *name, const struct aca_info *ras_info, void *data)
+{
+ memset(handle, 0, sizeof(*handle));
+
+ handle->adev = adev;
+ handle->mgr = mgr;
+ handle->name = name;
+ handle->hwip = ras_info->hwip;
+ handle->mask = ras_info->mask;
+ handle->bank_ops = ras_info->bank_ops;
+ handle->data = data;
+ aca_init_error_cache(handle);
+
+ INIT_LIST_HEAD(&handle->node);
+ list_add_tail(&handle->node, &mgr->list);
+ mgr->nr_handles++;
+
+ return 0;
+}
+
+static ssize_t aca_sysfs_read(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ struct aca_handle *handle = container_of(attr, struct aca_handle, aca_attr);
+
+ /* NOTE: the aca cache will be auto cleared once read,
+ * So the driver should unify the query entry point, forward request to ras query interface directly */
+ return amdgpu_ras_aca_sysfs_read(dev, attr, handle, buf, handle->data);
+}
+
+static int add_aca_sysfs(struct amdgpu_device *adev, struct aca_handle *handle)
+{
+ struct device_attribute *aca_attr = &handle->aca_attr;
+
+ snprintf(handle->attr_name, sizeof(handle->attr_name) - 1, "aca_%s", handle->name);
+ aca_attr->show = aca_sysfs_read;
+ aca_attr->attr.name = handle->attr_name;
+ aca_attr->attr.mode = S_IRUGO;
+ sysfs_attr_init(&aca_attr->attr);
+
+ return sysfs_add_file_to_group(&adev->dev->kobj,
+ &aca_attr->attr,
+ "ras");
+}
+
+int amdgpu_aca_add_handle(struct amdgpu_device *adev, struct aca_handle *handle,
+ const char *name, const struct aca_info *ras_info, void *data)
+{
+ struct amdgpu_aca *aca = &adev->aca;
+ int ret;
+
+ if (!amdgpu_aca_is_enabled(adev))
+ return 0;
+
+ ret = add_aca_handle(adev, &aca->mgr, handle, name, ras_info, data);
+ if (ret)
+ return ret;
+
+ return add_aca_sysfs(adev, handle);
+}
+
+static void remove_aca_handle(struct aca_handle *handle)
+{
+ struct aca_handle_manager *mgr = handle->mgr;
+
+ aca_fini_error_cache(handle);
+ list_del(&handle->node);
+ mgr->nr_handles--;
+}
+
+static void remove_aca_sysfs(struct aca_handle *handle)
+{
+ struct amdgpu_device *adev = handle->adev;
+ struct device_attribute *aca_attr = &handle->aca_attr;
+
+ if (adev->dev->kobj.sd)
+ sysfs_remove_file_from_group(&adev->dev->kobj,
+ &aca_attr->attr,
+ "ras");
+}
+
+void amdgpu_aca_remove_handle(struct aca_handle *handle)
+{
+ if (!handle || list_empty(&handle->node))
+ return;
+
+ remove_aca_sysfs(handle);
+ remove_aca_handle(handle);
+}
+
+static int aca_manager_init(struct aca_handle_manager *mgr)
+{
+ INIT_LIST_HEAD(&mgr->list);
+ mgr->nr_handles = 0;
+
+ return 0;
+}
+
+static void aca_manager_fini(struct aca_handle_manager *mgr)
+{
+ struct aca_handle *handle, *tmp;
+
+ if (list_empty(&mgr->list))
+ return;
+
+ list_for_each_entry_safe(handle, tmp, &mgr->list, node)
+ amdgpu_aca_remove_handle(handle);
+}
+
+bool amdgpu_aca_is_enabled(struct amdgpu_device *adev)
+{
+ return (adev->aca.is_enabled ||
+ adev->debug_enable_ras_aca);
+}
+
+int amdgpu_aca_init(struct amdgpu_device *adev)
+{
+ struct amdgpu_aca *aca = &adev->aca;
+ int ret;
+
+ atomic_set(&aca->ue_update_flag, 0);
+
+ ret = aca_manager_init(&aca->mgr);
+ if (ret)
+ return ret;
+
+ return 0;
+}
+
+void amdgpu_aca_fini(struct amdgpu_device *adev)
+{
+ struct amdgpu_aca *aca = &adev->aca;
+
+ aca_manager_fini(&aca->mgr);
+
+ atomic_set(&aca->ue_update_flag, 0);
+}
+
+int amdgpu_aca_reset(struct amdgpu_device *adev)
+{
+ struct amdgpu_aca *aca = &adev->aca;
+
+ atomic_set(&aca->ue_update_flag, 0);
+
+ return 0;
+}
+
+void amdgpu_aca_set_smu_funcs(struct amdgpu_device *adev, const struct aca_smu_funcs *smu_funcs)
+{
+ struct amdgpu_aca *aca = &adev->aca;
+
+ WARN_ON(aca->smu_funcs);
+ aca->smu_funcs = smu_funcs;
+}
+
+int aca_bank_info_decode(struct aca_bank *bank, struct aca_bank_info *info)
+{
+ u64 ipid;
+ u32 instidhi, instidlo;
+
+ if (!bank || !info)
+ return -EINVAL;
+
+ ipid = bank->regs[ACA_REG_IDX_IPID];
+ info->hwid = ACA_REG__IPID__HARDWAREID(ipid);
+ info->mcatype = ACA_REG__IPID__MCATYPE(ipid);
+ /*
+ * Unfied DieID Format: SAASS. A:AID, S:Socket.
+ * Unfied DieID[4:4] = InstanceId[0:0]
+ * Unfied DieID[0:3] = InstanceIdHi[0:3]
+ */
+ instidhi = ACA_REG__IPID__INSTANCEIDHI(ipid);
+ instidlo = ACA_REG__IPID__INSTANCEIDLO(ipid);
+ info->die_id = ((instidhi >> 2) & 0x03);
+ info->socket_id = ((instidlo & 0x1) << 2) | (instidhi & 0x03);
+
+ return 0;
+}
+
+static int aca_bank_get_error_code(struct amdgpu_device *adev, struct aca_bank *bank)
+{
+ struct amdgpu_aca *aca = &adev->aca;
+ const struct aca_smu_funcs *smu_funcs = aca->smu_funcs;
+
+ if (!smu_funcs || !smu_funcs->parse_error_code)
+ return -EOPNOTSUPP;
+
+ return smu_funcs->parse_error_code(adev, bank);
+}
+
+int aca_bank_check_error_codes(struct amdgpu_device *adev, struct aca_bank *bank, int *err_codes, int size)
+{
+ int i, error_code;
+
+ if (!bank || !err_codes)
+ return -EINVAL;
+
+ error_code = aca_bank_get_error_code(adev, bank);
+ if (error_code < 0)
+ return error_code;
+
+ for (i = 0; i < size; i++) {
+ if (err_codes[i] == error_code)
+ return 0;
+ }
+
+ return -EINVAL;
+}
+
+int amdgpu_aca_smu_set_debug_mode(struct amdgpu_device *adev, bool en)
+{
+ struct amdgpu_aca *aca = &adev->aca;
+ const struct aca_smu_funcs *smu_funcs = aca->smu_funcs;
+
+ if (!smu_funcs || !smu_funcs->set_debug_mode)
+ return -EOPNOTSUPP;
+
+ return smu_funcs->set_debug_mode(adev, en);
+}
+
+#if defined(CONFIG_DEBUG_FS)
+static int amdgpu_aca_smu_debug_mode_set(void *data, u64 val)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)data;
+ int ret;
+
+ ret = amdgpu_ras_set_aca_debug_mode(adev, val ? true : false);
+ if (ret)
+ return ret;
+
+ dev_info(adev->dev, "amdgpu set smu aca debug mode %s success\n", val ? "on" : "off");
+
+ return 0;
+}
+
+static void aca_dump_entry(struct seq_file *m, struct aca_bank *bank, enum aca_smu_type type, int idx)
+{
+ struct aca_bank_info info;
+ int i, ret;
+
+ ret = aca_bank_info_decode(bank, &info);
+ if (ret)
+ return;
+
+ seq_printf(m, "aca entry[%d].type: %s\n", idx, type == ACA_SMU_TYPE_UE ? "UE" : "CE");
+ seq_printf(m, "aca entry[%d].info: socketid:%d aid:%d hwid:0x%03x mcatype:0x%04x\n",
+ idx, info.socket_id, info.die_id, info.hwid, info.mcatype);
+
+ for (i = 0; i < ARRAY_SIZE(aca_regs); i++)
+ seq_printf(m, "aca entry[%d].regs[%d]: 0x%016llx\n", idx, aca_regs[i].reg_idx, bank->regs[aca_regs[i].reg_idx]);
+}
+
+struct aca_dump_context {
+ struct seq_file *m;
+ int idx;
+};
+
+static int handler_aca_bank_dump(struct aca_handle *handle, struct aca_bank *bank,
+ enum aca_smu_type type, void *data)
+{
+ struct aca_dump_context *ctx = (struct aca_dump_context *)data;
+
+ aca_dump_entry(ctx->m, bank, type, ctx->idx++);
+
+ return handler_aca_log_bank_error(handle, bank, type, NULL);
+}
+
+static int aca_dump_show(struct seq_file *m, enum aca_smu_type type)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)m->private;
+ struct aca_dump_context context = {
+ .m = m,
+ .idx = 0,
+ };
+
+ return aca_banks_update(adev, type, handler_aca_bank_dump, NULL, (void *)&context);
+}
+
+static int aca_dump_ce_show(struct seq_file *m, void *unused)
+{
+ return aca_dump_show(m, ACA_SMU_TYPE_CE);
+}
+
+static int aca_dump_ce_open(struct inode *inode, struct file *file)
+{
+ return single_open(file, aca_dump_ce_show, inode->i_private);
+}
+
+static const struct file_operations aca_ce_dump_debug_fops = {
+ .owner = THIS_MODULE,
+ .open = aca_dump_ce_open,
+ .read = seq_read,
+ .llseek = seq_lseek,
+ .release = single_release,
+};
+
+static int aca_dump_ue_show(struct seq_file *m, void *unused)
+{
+ return aca_dump_show(m, ACA_SMU_TYPE_UE);
+}
+
+static int aca_dump_ue_open(struct inode *inode, struct file *file)
+{
+ return single_open(file, aca_dump_ue_show, inode->i_private);
+}
+
+static const struct file_operations aca_ue_dump_debug_fops = {
+ .owner = THIS_MODULE,
+ .open = aca_dump_ue_open,
+ .read = seq_read,
+ .llseek = seq_lseek,
+ .release = single_release,
+};
+
+DEFINE_DEBUGFS_ATTRIBUTE(aca_debug_mode_fops, NULL, amdgpu_aca_smu_debug_mode_set, "%llu\n");
+#endif
+
+void amdgpu_aca_smu_debugfs_init(struct amdgpu_device *adev, struct dentry *root)
+{
+#if defined(CONFIG_DEBUG_FS)
+ if (!root)
+ return;
+
+ debugfs_create_file("aca_debug_mode", 0200, root, adev, &aca_debug_mode_fops);
+ debugfs_create_file("aca_ue_dump", 0400, root, adev, &aca_ue_dump_debug_fops);
+ debugfs_create_file("aca_ce_dump", 0400, root, adev, &aca_ce_dump_debug_fops);
+#endif
+}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_aca.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_aca.h
new file mode 100644
index 000000000000..38c88897e1ec
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_aca.h
@@ -0,0 +1,232 @@
+/*
+ * Copyright 2023 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#ifndef __AMDGPU_ACA_H__
+#define __AMDGPU_ACA_H__
+
+#include <linux/list.h>
+
+struct ras_err_data;
+struct ras_query_context;
+
+#define ACA_MAX_REGS_COUNT (16)
+
+#define ACA_REG_FIELD(x, h, l) (((x) & GENMASK_ULL(h, l)) >> l)
+#define ACA_REG__STATUS__VAL(x) ACA_REG_FIELD(x, 63, 63)
+#define ACA_REG__STATUS__OVERFLOW(x) ACA_REG_FIELD(x, 62, 62)
+#define ACA_REG__STATUS__UC(x) ACA_REG_FIELD(x, 61, 61)
+#define ACA_REG__STATUS__EN(x) ACA_REG_FIELD(x, 60, 60)
+#define ACA_REG__STATUS__MISCV(x) ACA_REG_FIELD(x, 59, 59)
+#define ACA_REG__STATUS__ADDRV(x) ACA_REG_FIELD(x, 58, 58)
+#define ACA_REG__STATUS__PCC(x) ACA_REG_FIELD(x, 57, 57)
+#define ACA_REG__STATUS__ERRCOREIDVAL(x) ACA_REG_FIELD(x, 56, 56)
+#define ACA_REG__STATUS__TCC(x) ACA_REG_FIELD(x, 55, 55)
+#define ACA_REG__STATUS__SYNDV(x) ACA_REG_FIELD(x, 53, 53)
+#define ACA_REG__STATUS__CECC(x) ACA_REG_FIELD(x, 46, 46)
+#define ACA_REG__STATUS__UECC(x) ACA_REG_FIELD(x, 45, 45)
+#define ACA_REG__STATUS__DEFERRED(x) ACA_REG_FIELD(x, 44, 44)
+#define ACA_REG__STATUS__POISON(x) ACA_REG_FIELD(x, 43, 43)
+#define ACA_REG__STATUS__SCRUB(x) ACA_REG_FIELD(x, 40, 40)
+#define ACA_REG__STATUS__ERRCOREID(x) ACA_REG_FIELD(x, 37, 32)
+#define ACA_REG__STATUS__ADDRLSB(x) ACA_REG_FIELD(x, 29, 24)
+#define ACA_REG__STATUS__ERRORCODEEXT(x) ACA_REG_FIELD(x, 21, 16)
+#define ACA_REG__STATUS__ERRORCODE(x) ACA_REG_FIELD(x, 15, 0)
+
+#define ACA_REG__IPID__MCATYPE(x) ACA_REG_FIELD(x, 63, 48)
+#define ACA_REG__IPID__INSTANCEIDHI(x) ACA_REG_FIELD(x, 47, 44)
+#define ACA_REG__IPID__HARDWAREID(x) ACA_REG_FIELD(x, 43, 32)
+#define ACA_REG__IPID__INSTANCEIDLO(x) ACA_REG_FIELD(x, 31, 0)
+
+#define ACA_REG__MISC0__VALID(x) ACA_REG_FIELD(x, 63, 63)
+#define ACA_REG__MISC0__OVRFLW(x) ACA_REG_FIELD(x, 48, 48)
+#define ACA_REG__MISC0__ERRCNT(x) ACA_REG_FIELD(x, 43, 32)
+
+#define ACA_REG__SYND__ERRORINFORMATION(x) ACA_REG_FIELD(x, 17, 0)
+
+/* NOTE: The following codes refers to the smu header file */
+#define ACA_EXTERROR_CODE_CE 0x3a
+#define ACA_EXTERROR_CODE_FAULT 0x3b
+
+#define ACA_ERROR_UE_MASK BIT_MASK(ACA_ERROR_TYPE_UE)
+#define ACA_ERROR_CE_MASK BIT_MASK(ACA_ERROR_TYPE_CE)
+#define ACA_ERROR_DEFERRED_MASK BIT_MASK(ACA_ERROR_TYPE_DEFERRED)
+
+#define mmSMNAID_AID0_MCA_SMU 0x03b30400 /* SMN AID AID0 */
+#define mmSMNAID_XCD0_MCA_SMU 0x36430400 /* SMN AID XCD0 */
+#define mmSMNAID_XCD1_MCA_SMU 0x38430400 /* SMN AID XCD1 */
+#define mmSMNXCD_XCD0_MCA_SMU 0x40430400 /* SMN XCD XCD0 */
+
+#define ACA_BANK_ERR_IS_DEFFERED(bank) \
+ (ACA_REG__STATUS__POISON((bank)->regs[ACA_REG_IDX_STATUS]) || \
+ ACA_REG__STATUS__DEFERRED((bank)->regs[ACA_REG_IDX_STATUS]))
+
+enum aca_reg_idx {
+ ACA_REG_IDX_CTL = 0,
+ ACA_REG_IDX_STATUS = 1,
+ ACA_REG_IDX_ADDR = 2,
+ ACA_REG_IDX_MISC0 = 3,
+ ACA_REG_IDX_CONFIG = 4,
+ ACA_REG_IDX_IPID = 5,
+ ACA_REG_IDX_SYND = 6,
+ ACA_REG_IDX_DESTAT = 8,
+ ACA_REG_IDX_DEADDR = 9,
+ ACA_REG_IDX_CTL_MASK = 10,
+ ACA_REG_IDX_COUNT = 16,
+};
+
+enum aca_hwip_type {
+ ACA_HWIP_TYPE_UNKNOW = -1,
+ ACA_HWIP_TYPE_PSP = 0,
+ ACA_HWIP_TYPE_UMC,
+ ACA_HWIP_TYPE_SMU,
+ ACA_HWIP_TYPE_PCS_XGMI,
+ ACA_HWIP_TYPE_COUNT,
+};
+
+enum aca_error_type {
+ ACA_ERROR_TYPE_INVALID = -1,
+ ACA_ERROR_TYPE_UE = 0,
+ ACA_ERROR_TYPE_CE,
+ ACA_ERROR_TYPE_DEFERRED,
+ ACA_ERROR_TYPE_COUNT
+};
+
+enum aca_smu_type {
+ ACA_SMU_TYPE_INVALID = -1,
+ ACA_SMU_TYPE_UE = 0,
+ ACA_SMU_TYPE_CE,
+ ACA_SMU_TYPE_COUNT,
+};
+
+struct aca_hwip {
+ int hwid;
+ int mcatype;
+};
+
+struct aca_bank {
+ enum aca_error_type aca_err_type;
+ enum aca_smu_type smu_err_type;
+ u64 regs[ACA_MAX_REGS_COUNT];
+};
+
+struct aca_bank_node {
+ struct aca_bank bank;
+ struct list_head node;
+};
+
+struct aca_banks {
+ int nr_banks;
+ struct list_head list;
+};
+
+struct aca_bank_info {
+ int die_id;
+ int socket_id;
+ int hwid;
+ int mcatype;
+};
+
+struct aca_bank_error {
+ struct list_head node;
+ struct aca_bank_info info;
+ u64 count;
+};
+
+struct aca_error {
+ struct list_head list;
+ struct mutex lock;
+ enum aca_error_type type;
+ int nr_errors;
+};
+
+struct aca_handle_manager {
+ struct list_head list;
+ int nr_handles;
+};
+
+struct aca_error_cache {
+ struct aca_error errors[ACA_ERROR_TYPE_COUNT];
+};
+
+struct aca_handle {
+ struct list_head node;
+ enum aca_hwip_type hwip;
+ struct amdgpu_device *adev;
+ struct aca_handle_manager *mgr;
+ struct aca_error_cache error_cache;
+ const struct aca_bank_ops *bank_ops;
+ struct device_attribute aca_attr;
+ char attr_name[64];
+ const char *name;
+ u32 mask;
+ void *data;
+};
+
+struct aca_bank_ops {
+ int (*aca_bank_parser)(struct aca_handle *handle, struct aca_bank *bank, enum aca_smu_type type, void *data);
+ bool (*aca_bank_is_valid)(struct aca_handle *handle, struct aca_bank *bank, enum aca_smu_type type,
+ void *data);
+};
+
+struct aca_smu_funcs {
+ int max_ue_bank_count;
+ int max_ce_bank_count;
+ int (*set_debug_mode)(struct amdgpu_device *adev, bool enable);
+ int (*get_valid_aca_count)(struct amdgpu_device *adev, enum aca_smu_type type, u32 *count);
+ int (*get_valid_aca_bank)(struct amdgpu_device *adev, enum aca_smu_type type, int idx, struct aca_bank *bank);
+ int (*parse_error_code)(struct amdgpu_device *adev, struct aca_bank *bank);
+};
+
+struct amdgpu_aca {
+ struct aca_handle_manager mgr;
+ const struct aca_smu_funcs *smu_funcs;
+ atomic_t ue_update_flag;
+ bool is_enabled;
+};
+
+struct aca_info {
+ enum aca_hwip_type hwip;
+ const struct aca_bank_ops *bank_ops;
+ u32 mask;
+};
+
+int amdgpu_aca_init(struct amdgpu_device *adev);
+void amdgpu_aca_fini(struct amdgpu_device *adev);
+int amdgpu_aca_reset(struct amdgpu_device *adev);
+void amdgpu_aca_set_smu_funcs(struct amdgpu_device *adev, const struct aca_smu_funcs *smu_funcs);
+bool amdgpu_aca_is_enabled(struct amdgpu_device *adev);
+
+int aca_bank_info_decode(struct aca_bank *bank, struct aca_bank_info *info);
+int aca_bank_check_error_codes(struct amdgpu_device *adev, struct aca_bank *bank, int *err_codes, int size);
+
+int amdgpu_aca_add_handle(struct amdgpu_device *adev, struct aca_handle *handle,
+ const char *name, const struct aca_info *aca_info, void *data);
+void amdgpu_aca_remove_handle(struct aca_handle *handle);
+int amdgpu_aca_get_error_data(struct amdgpu_device *adev, struct aca_handle *handle,
+ enum aca_error_type type, struct ras_err_data *err_data,
+ struct ras_query_context *qctx);
+int amdgpu_aca_smu_set_debug_mode(struct amdgpu_device *adev, bool en);
+void amdgpu_aca_smu_debugfs_init(struct amdgpu_device *adev, struct dentry *root);
+int aca_error_cache_log_bank_error(struct aca_handle *handle, struct aca_bank_info *info,
+ enum aca_error_type type, u64 count);
+#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_acp.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_acp.c
index b8655ff73a65..381ef205b0df 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_acp.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_acp.c
@@ -29,6 +29,8 @@
#include <linux/platform_device.h>
#include <sound/designware_i2s.h>
#include <sound/pcm.h>
+#include <linux/acpi.h>
+#include <linux/dmi.h>
#include "amdgpu.h"
#include "atom.h"
@@ -36,6 +38,7 @@
#include "acp_gfx_if.h"
+#define ST_JADEITE 1
#define ACP_TILE_ON_MASK 0x03
#define ACP_TILE_OFF_MASK 0x02
#define ACP_TILE_ON_RETAIN_REG_MASK 0x1f
@@ -85,6 +88,8 @@
#define ACP_DEVS 4
#define ACP_SRC_ID 162
+static unsigned long acp_machine_id;
+
enum {
ACP_TILE_P1 = 0,
ACP_TILE_P2,
@@ -93,9 +98,9 @@ enum {
ACP_TILE_DSP2,
};
-static int acp_sw_init(void *handle)
+static int acp_sw_init(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
adev->acp.parent = adev->dev;
@@ -107,9 +112,9 @@ static int acp_sw_init(void *handle)
return 0;
}
-static int acp_sw_fini(void *handle)
+static int acp_sw_fini(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
if (adev->acp.cgs_device)
amdgpu_cgs_destroy_device(adev->acp.cgs_device);
@@ -128,16 +133,14 @@ static int acp_poweroff(struct generic_pm_domain *genpd)
struct amdgpu_device *adev;
apd = container_of(genpd, struct acp_pm_domain, gpd);
- if (apd != NULL) {
- adev = apd->adev;
+ adev = apd->adev;
/* call smu to POWER GATE ACP block
* smu will
* 1. turn off the acp clock
* 2. power off the acp tiles
* 3. check and enter ulv state
*/
- amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_ACP, true);
- }
+ amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_ACP, true, 0);
return 0;
}
@@ -147,60 +150,93 @@ static int acp_poweron(struct generic_pm_domain *genpd)
struct amdgpu_device *adev;
apd = container_of(genpd, struct acp_pm_domain, gpd);
- if (apd != NULL) {
- adev = apd->adev;
+ adev = apd->adev;
/* call smu to UNGATE ACP block
* smu will
* 1. exit ulv
* 2. turn on acp clock
* 3. power on acp tiles
*/
- amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_ACP, false);
- }
+ amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_ACP, false, 0);
return 0;
}
-static struct device *get_mfd_cell_dev(const char *device_name, int r)
+static int acp_genpd_add_device(struct device *dev, void *data)
+{
+ struct generic_pm_domain *gpd = data;
+ int ret;
+
+ ret = pm_genpd_add_device(gpd, dev);
+ if (ret)
+ dev_err(dev, "Failed to add dev to genpd %d\n", ret);
+
+ return ret;
+}
+
+static int acp_genpd_remove_device(struct device *dev, void *data)
{
- char auto_dev_name[25];
- struct device *dev;
+ int ret;
- snprintf(auto_dev_name, sizeof(auto_dev_name),
- "%s.%d.auto", device_name, r);
- dev = bus_find_device_by_name(&platform_bus_type, NULL, auto_dev_name);
- dev_info(dev, "device %s added to pm domain\n", auto_dev_name);
+ ret = pm_genpd_remove_device(dev);
+ if (ret)
+ dev_err(dev, "Failed to remove dev from genpd %d\n", ret);
- return dev;
+ /* Continue to remove */
+ return 0;
}
+static int acp_quirk_cb(const struct dmi_system_id *id)
+{
+ acp_machine_id = ST_JADEITE;
+ return 1;
+}
+
+static const struct dmi_system_id acp_quirk_table[] = {
+ {
+ .callback = acp_quirk_cb,
+ .matches = {
+ DMI_EXACT_MATCH(DMI_BOARD_VENDOR, "AMD"),
+ DMI_EXACT_MATCH(DMI_PRODUCT_NAME, "Jadeite"),
+ }
+ },
+ {
+ .callback = acp_quirk_cb,
+ .matches = {
+ DMI_EXACT_MATCH(DMI_BOARD_VENDOR, "IP3 Technology CO.,Ltd."),
+ DMI_EXACT_MATCH(DMI_PRODUCT_NAME, "ASN1D"),
+ },
+ },
+ {
+ .callback = acp_quirk_cb,
+ .matches = {
+ DMI_EXACT_MATCH(DMI_BOARD_VENDOR, "Standard"),
+ DMI_EXACT_MATCH(DMI_PRODUCT_NAME, "ASN10"),
+ },
+ },
+ {}
+};
+
/**
* acp_hw_init - start and test ACP block
*
- * @handle: handle used to pass amdgpu_device pointer
+ * @ip_block: Pointer to the amdgpu_ip_block for this hw instance.
*
*/
-static int acp_hw_init(void *handle)
+static int acp_hw_init(struct amdgpu_ip_block *ip_block)
{
- int r, i;
- uint64_t acp_base;
+ int r;
+ u64 acp_base;
u32 val = 0;
u32 count = 0;
- struct device *dev;
struct i2s_platform_data *i2s_pdata = NULL;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
-
- const struct amdgpu_ip_block *ip_block =
- amdgpu_device_ip_get_ip_block(adev, AMD_IP_BLOCK_TYPE_ACP);
-
- if (!ip_block)
- return -EINVAL;
+ struct amdgpu_device *adev = ip_block->adev;
r = amd_acp_hw_init(adev->acp.cgs_device,
ip_block->version->major, ip_block->version->minor);
/* -ENODEV means board uses AZ rather than ACP */
if (r == -ENODEV) {
- amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_ACP, true);
+ amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_ACP, true, 0);
return 0;
} else if (r) {
return r;
@@ -210,146 +246,208 @@ static int acp_hw_init(void *handle)
return -EINVAL;
acp_base = adev->rmmio_base;
-
-
adev->acp.acp_genpd = kzalloc(sizeof(struct acp_pm_domain), GFP_KERNEL);
- if (adev->acp.acp_genpd == NULL)
+ if (!adev->acp.acp_genpd)
return -ENOMEM;
adev->acp.acp_genpd->gpd.name = "ACP_AUDIO";
adev->acp.acp_genpd->gpd.power_off = acp_poweroff;
adev->acp.acp_genpd->gpd.power_on = acp_poweron;
-
-
adev->acp.acp_genpd->adev = adev;
pm_genpd_init(&adev->acp.acp_genpd->gpd, NULL, false);
+ dmi_check_system(acp_quirk_table);
+ switch (acp_machine_id) {
+ case ST_JADEITE:
+ {
+ adev->acp.acp_cell = kcalloc(2, sizeof(struct mfd_cell),
+ GFP_KERNEL);
+ if (!adev->acp.acp_cell) {
+ r = -ENOMEM;
+ goto failure;
+ }
- adev->acp.acp_cell = kcalloc(ACP_DEVS, sizeof(struct mfd_cell),
- GFP_KERNEL);
-
- if (adev->acp.acp_cell == NULL) {
- r = -ENOMEM;
- goto failure;
- }
-
- adev->acp.acp_res = kcalloc(5, sizeof(struct resource), GFP_KERNEL);
- if (adev->acp.acp_res == NULL) {
- r = -ENOMEM;
- goto failure;
- }
+ adev->acp.acp_res = kcalloc(3, sizeof(struct resource), GFP_KERNEL);
+ if (!adev->acp.acp_res) {
+ r = -ENOMEM;
+ goto failure;
+ }
- i2s_pdata = kcalloc(3, sizeof(struct i2s_platform_data), GFP_KERNEL);
- if (i2s_pdata == NULL) {
- r = -ENOMEM;
- goto failure;
- }
+ i2s_pdata = kcalloc(1, sizeof(struct i2s_platform_data), GFP_KERNEL);
+ if (!i2s_pdata) {
+ r = -ENOMEM;
+ goto failure;
+ }
- switch (adev->asic_type) {
- case CHIP_STONEY:
i2s_pdata[0].quirks = DW_I2S_QUIRK_COMP_REG_OFFSET |
- DW_I2S_QUIRK_16BIT_IDX_OVERRIDE;
+ DW_I2S_QUIRK_16BIT_IDX_OVERRIDE;
+ i2s_pdata[0].cap = DWC_I2S_PLAY | DWC_I2S_RECORD;
+ i2s_pdata[0].snd_rates = SNDRV_PCM_RATE_8000_96000;
+ i2s_pdata[0].i2s_reg_comp1 = ACP_I2S_COMP1_CAP_REG_OFFSET;
+ i2s_pdata[0].i2s_reg_comp2 = ACP_I2S_COMP2_CAP_REG_OFFSET;
+
+ adev->acp.acp_res[0].name = "acp2x_dma";
+ adev->acp.acp_res[0].flags = IORESOURCE_MEM;
+ adev->acp.acp_res[0].start = acp_base;
+ adev->acp.acp_res[0].end = acp_base + ACP_DMA_REGS_END;
+
+ adev->acp.acp_res[1].name = "acp2x_dw_i2s_play_cap";
+ adev->acp.acp_res[1].flags = IORESOURCE_MEM;
+ adev->acp.acp_res[1].start = acp_base + ACP_I2S_CAP_REGS_START;
+ adev->acp.acp_res[1].end = acp_base + ACP_I2S_CAP_REGS_END;
+
+ adev->acp.acp_res[2].name = "acp2x_dma_irq";
+ adev->acp.acp_res[2].flags = IORESOURCE_IRQ;
+ adev->acp.acp_res[2].start = amdgpu_irq_create_mapping(adev, 162);
+ adev->acp.acp_res[2].end = adev->acp.acp_res[2].start;
+
+ adev->acp.acp_cell[0].name = "acp_audio_dma";
+ adev->acp.acp_cell[0].id = 0;
+ adev->acp.acp_cell[0].num_resources = 3;
+ adev->acp.acp_cell[0].resources = &adev->acp.acp_res[0];
+ adev->acp.acp_cell[0].platform_data = &adev->asic_type;
+ adev->acp.acp_cell[0].pdata_size = sizeof(adev->asic_type);
+
+ adev->acp.acp_cell[1].name = "designware-i2s";
+ adev->acp.acp_cell[1].id = 1;
+ adev->acp.acp_cell[1].num_resources = 1;
+ adev->acp.acp_cell[1].resources = &adev->acp.acp_res[1];
+ adev->acp.acp_cell[1].platform_data = &i2s_pdata[0];
+ adev->acp.acp_cell[1].pdata_size = sizeof(struct i2s_platform_data);
+ r = mfd_add_devices(adev->acp.parent, 0, adev->acp.acp_cell, 2, NULL, 0, NULL);
+ if (r)
+ goto failure;
+ r = device_for_each_child(adev->acp.parent, &adev->acp.acp_genpd->gpd,
+ acp_genpd_add_device);
+ if (r)
+ goto failure;
break;
- default:
- i2s_pdata[0].quirks = DW_I2S_QUIRK_COMP_REG_OFFSET;
}
- i2s_pdata[0].cap = DWC_I2S_PLAY;
- i2s_pdata[0].snd_rates = SNDRV_PCM_RATE_8000_96000;
- i2s_pdata[0].i2s_reg_comp1 = ACP_I2S_COMP1_PLAY_REG_OFFSET;
- i2s_pdata[0].i2s_reg_comp2 = ACP_I2S_COMP2_PLAY_REG_OFFSET;
- switch (adev->asic_type) {
- case CHIP_STONEY:
- i2s_pdata[1].quirks = DW_I2S_QUIRK_COMP_REG_OFFSET |
- DW_I2S_QUIRK_COMP_PARAM1 |
- DW_I2S_QUIRK_16BIT_IDX_OVERRIDE;
- break;
default:
- i2s_pdata[1].quirks = DW_I2S_QUIRK_COMP_REG_OFFSET |
- DW_I2S_QUIRK_COMP_PARAM1;
- }
+ adev->acp.acp_cell = kcalloc(ACP_DEVS, sizeof(struct mfd_cell),
+ GFP_KERNEL);
- i2s_pdata[1].cap = DWC_I2S_RECORD;
- i2s_pdata[1].snd_rates = SNDRV_PCM_RATE_8000_96000;
- i2s_pdata[1].i2s_reg_comp1 = ACP_I2S_COMP1_CAP_REG_OFFSET;
- i2s_pdata[1].i2s_reg_comp2 = ACP_I2S_COMP2_CAP_REG_OFFSET;
+ if (!adev->acp.acp_cell) {
+ r = -ENOMEM;
+ goto failure;
+ }
- i2s_pdata[2].quirks = DW_I2S_QUIRK_COMP_REG_OFFSET;
- switch (adev->asic_type) {
- case CHIP_STONEY:
- i2s_pdata[2].quirks |= DW_I2S_QUIRK_16BIT_IDX_OVERRIDE;
- break;
- default:
- break;
- }
+ adev->acp.acp_res = kcalloc(5, sizeof(struct resource), GFP_KERNEL);
+ if (!adev->acp.acp_res) {
+ r = -ENOMEM;
+ goto failure;
+ }
- i2s_pdata[2].cap = DWC_I2S_PLAY | DWC_I2S_RECORD;
- i2s_pdata[2].snd_rates = SNDRV_PCM_RATE_8000_96000;
- i2s_pdata[2].i2s_reg_comp1 = ACP_BT_COMP1_REG_OFFSET;
- i2s_pdata[2].i2s_reg_comp2 = ACP_BT_COMP2_REG_OFFSET;
-
- adev->acp.acp_res[0].name = "acp2x_dma";
- adev->acp.acp_res[0].flags = IORESOURCE_MEM;
- adev->acp.acp_res[0].start = acp_base;
- adev->acp.acp_res[0].end = acp_base + ACP_DMA_REGS_END;
-
- adev->acp.acp_res[1].name = "acp2x_dw_i2s_play";
- adev->acp.acp_res[1].flags = IORESOURCE_MEM;
- adev->acp.acp_res[1].start = acp_base + ACP_I2S_PLAY_REGS_START;
- adev->acp.acp_res[1].end = acp_base + ACP_I2S_PLAY_REGS_END;
-
- adev->acp.acp_res[2].name = "acp2x_dw_i2s_cap";
- adev->acp.acp_res[2].flags = IORESOURCE_MEM;
- adev->acp.acp_res[2].start = acp_base + ACP_I2S_CAP_REGS_START;
- adev->acp.acp_res[2].end = acp_base + ACP_I2S_CAP_REGS_END;
-
- adev->acp.acp_res[3].name = "acp2x_dw_bt_i2s_play_cap";
- adev->acp.acp_res[3].flags = IORESOURCE_MEM;
- adev->acp.acp_res[3].start = acp_base + ACP_BT_PLAY_REGS_START;
- adev->acp.acp_res[3].end = acp_base + ACP_BT_PLAY_REGS_END;
-
- adev->acp.acp_res[4].name = "acp2x_dma_irq";
- adev->acp.acp_res[4].flags = IORESOURCE_IRQ;
- adev->acp.acp_res[4].start = amdgpu_irq_create_mapping(adev, 162);
- adev->acp.acp_res[4].end = adev->acp.acp_res[4].start;
-
- adev->acp.acp_cell[0].name = "acp_audio_dma";
- adev->acp.acp_cell[0].num_resources = 5;
- adev->acp.acp_cell[0].resources = &adev->acp.acp_res[0];
- adev->acp.acp_cell[0].platform_data = &adev->asic_type;
- adev->acp.acp_cell[0].pdata_size = sizeof(adev->asic_type);
-
- adev->acp.acp_cell[1].name = "designware-i2s";
- adev->acp.acp_cell[1].num_resources = 1;
- adev->acp.acp_cell[1].resources = &adev->acp.acp_res[1];
- adev->acp.acp_cell[1].platform_data = &i2s_pdata[0];
- adev->acp.acp_cell[1].pdata_size = sizeof(struct i2s_platform_data);
-
- adev->acp.acp_cell[2].name = "designware-i2s";
- adev->acp.acp_cell[2].num_resources = 1;
- adev->acp.acp_cell[2].resources = &adev->acp.acp_res[2];
- adev->acp.acp_cell[2].platform_data = &i2s_pdata[1];
- adev->acp.acp_cell[2].pdata_size = sizeof(struct i2s_platform_data);
-
- adev->acp.acp_cell[3].name = "designware-i2s";
- adev->acp.acp_cell[3].num_resources = 1;
- adev->acp.acp_cell[3].resources = &adev->acp.acp_res[3];
- adev->acp.acp_cell[3].platform_data = &i2s_pdata[2];
- adev->acp.acp_cell[3].pdata_size = sizeof(struct i2s_platform_data);
-
- r = mfd_add_hotplug_devices(adev->acp.parent, adev->acp.acp_cell,
- ACP_DEVS);
- if (r)
- goto failure;
-
- for (i = 0; i < ACP_DEVS ; i++) {
- dev = get_mfd_cell_dev(adev->acp.acp_cell[i].name, i);
- r = pm_genpd_add_device(&adev->acp.acp_genpd->gpd, dev);
- if (r) {
- dev_err(dev, "Failed to add dev to genpd\n");
+ i2s_pdata = kcalloc(3, sizeof(struct i2s_platform_data), GFP_KERNEL);
+ if (!i2s_pdata) {
+ r = -ENOMEM;
goto failure;
}
- }
+ switch (adev->asic_type) {
+ case CHIP_STONEY:
+ i2s_pdata[0].quirks = DW_I2S_QUIRK_COMP_REG_OFFSET |
+ DW_I2S_QUIRK_16BIT_IDX_OVERRIDE;
+ break;
+ default:
+ i2s_pdata[0].quirks = DW_I2S_QUIRK_COMP_REG_OFFSET;
+ }
+ i2s_pdata[0].cap = DWC_I2S_PLAY;
+ i2s_pdata[0].snd_rates = SNDRV_PCM_RATE_8000_96000;
+ i2s_pdata[0].i2s_reg_comp1 = ACP_I2S_COMP1_PLAY_REG_OFFSET;
+ i2s_pdata[0].i2s_reg_comp2 = ACP_I2S_COMP2_PLAY_REG_OFFSET;
+ switch (adev->asic_type) {
+ case CHIP_STONEY:
+ i2s_pdata[1].quirks = DW_I2S_QUIRK_COMP_REG_OFFSET |
+ DW_I2S_QUIRK_COMP_PARAM1 |
+ DW_I2S_QUIRK_16BIT_IDX_OVERRIDE;
+ break;
+ default:
+ i2s_pdata[1].quirks = DW_I2S_QUIRK_COMP_REG_OFFSET |
+ DW_I2S_QUIRK_COMP_PARAM1;
+ }
+
+ i2s_pdata[1].cap = DWC_I2S_RECORD;
+ i2s_pdata[1].snd_rates = SNDRV_PCM_RATE_8000_96000;
+ i2s_pdata[1].i2s_reg_comp1 = ACP_I2S_COMP1_CAP_REG_OFFSET;
+ i2s_pdata[1].i2s_reg_comp2 = ACP_I2S_COMP2_CAP_REG_OFFSET;
+
+ i2s_pdata[2].quirks = DW_I2S_QUIRK_COMP_REG_OFFSET;
+ switch (adev->asic_type) {
+ case CHIP_STONEY:
+ i2s_pdata[2].quirks |= DW_I2S_QUIRK_16BIT_IDX_OVERRIDE;
+ break;
+ default:
+ break;
+ }
+
+ i2s_pdata[2].cap = DWC_I2S_PLAY | DWC_I2S_RECORD;
+ i2s_pdata[2].snd_rates = SNDRV_PCM_RATE_8000_96000;
+ i2s_pdata[2].i2s_reg_comp1 = ACP_BT_COMP1_REG_OFFSET;
+ i2s_pdata[2].i2s_reg_comp2 = ACP_BT_COMP2_REG_OFFSET;
+
+ adev->acp.acp_res[0].name = "acp2x_dma";
+ adev->acp.acp_res[0].flags = IORESOURCE_MEM;
+ adev->acp.acp_res[0].start = acp_base;
+ adev->acp.acp_res[0].end = acp_base + ACP_DMA_REGS_END;
+
+ adev->acp.acp_res[1].name = "acp2x_dw_i2s_play";
+ adev->acp.acp_res[1].flags = IORESOURCE_MEM;
+ adev->acp.acp_res[1].start = acp_base + ACP_I2S_PLAY_REGS_START;
+ adev->acp.acp_res[1].end = acp_base + ACP_I2S_PLAY_REGS_END;
+
+ adev->acp.acp_res[2].name = "acp2x_dw_i2s_cap";
+ adev->acp.acp_res[2].flags = IORESOURCE_MEM;
+ adev->acp.acp_res[2].start = acp_base + ACP_I2S_CAP_REGS_START;
+ adev->acp.acp_res[2].end = acp_base + ACP_I2S_CAP_REGS_END;
+
+ adev->acp.acp_res[3].name = "acp2x_dw_bt_i2s_play_cap";
+ adev->acp.acp_res[3].flags = IORESOURCE_MEM;
+ adev->acp.acp_res[3].start = acp_base + ACP_BT_PLAY_REGS_START;
+ adev->acp.acp_res[3].end = acp_base + ACP_BT_PLAY_REGS_END;
+
+ adev->acp.acp_res[4].name = "acp2x_dma_irq";
+ adev->acp.acp_res[4].flags = IORESOURCE_IRQ;
+ adev->acp.acp_res[4].start = amdgpu_irq_create_mapping(adev, 162);
+ adev->acp.acp_res[4].end = adev->acp.acp_res[4].start;
+
+ adev->acp.acp_cell[0].name = "acp_audio_dma";
+ adev->acp.acp_cell[0].id = 0;
+ adev->acp.acp_cell[0].num_resources = 5;
+ adev->acp.acp_cell[0].resources = &adev->acp.acp_res[0];
+ adev->acp.acp_cell[0].platform_data = &adev->asic_type;
+ adev->acp.acp_cell[0].pdata_size = sizeof(adev->asic_type);
+
+ adev->acp.acp_cell[1].name = "designware-i2s";
+ adev->acp.acp_cell[1].id = 1;
+ adev->acp.acp_cell[1].num_resources = 1;
+ adev->acp.acp_cell[1].resources = &adev->acp.acp_res[1];
+ adev->acp.acp_cell[1].platform_data = &i2s_pdata[0];
+ adev->acp.acp_cell[1].pdata_size = sizeof(struct i2s_platform_data);
+
+ adev->acp.acp_cell[2].name = "designware-i2s";
+ adev->acp.acp_cell[2].id = 2;
+ adev->acp.acp_cell[2].num_resources = 1;
+ adev->acp.acp_cell[2].resources = &adev->acp.acp_res[2];
+ adev->acp.acp_cell[2].platform_data = &i2s_pdata[1];
+ adev->acp.acp_cell[2].pdata_size = sizeof(struct i2s_platform_data);
+
+ adev->acp.acp_cell[3].name = "designware-i2s";
+ adev->acp.acp_cell[3].id = 3;
+ adev->acp.acp_cell[3].num_resources = 1;
+ adev->acp.acp_cell[3].resources = &adev->acp.acp_res[3];
+ adev->acp.acp_cell[3].platform_data = &i2s_pdata[2];
+ adev->acp.acp_cell[3].pdata_size = sizeof(struct i2s_platform_data);
+
+ r = mfd_add_devices(adev->acp.parent, 0, adev->acp.acp_cell, ACP_DEVS, NULL, 0, NULL);
+ if (r)
+ goto failure;
+
+ r = device_for_each_child(adev->acp.parent, &adev->acp.acp_genpd->gpd,
+ acp_genpd_add_device);
+ if (r)
+ goto failure;
+ }
/* Assert Soft reset of ACP */
val = cgs_read_register(adev->acp.cgs_device, mmACP_SOFT_RESET);
@@ -405,20 +503,18 @@ failure:
/**
* acp_hw_fini - stop the hardware block
*
- * @handle: handle used to pass amdgpu_device pointer
+ * @ip_block: Pointer to the amdgpu_ip_block for this hw instance.
*
*/
-static int acp_hw_fini(void *handle)
+static int acp_hw_fini(struct amdgpu_ip_block *ip_block)
{
- int i, ret;
u32 val = 0;
u32 count = 0;
- struct device *dev;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
/* return early if no ACP */
if (!adev->acp.acp_genpd) {
- amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_ACP, false);
+ amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_ACP, false, 0);
return 0;
}
@@ -458,13 +554,8 @@ static int acp_hw_fini(void *handle)
udelay(100);
}
- for (i = 0; i < ACP_DEVS ; i++) {
- dev = get_mfd_cell_dev(adev->acp.acp_cell[i].name, i);
- ret = pm_genpd_remove_device(dev);
- /* If removal fails, dont giveup and try rest */
- if (ret)
- dev_err(dev, "remove dev from genpd failed\n");
- }
+ device_for_each_child(adev->acp.parent, NULL,
+ acp_genpd_remove_device);
mfd_remove_devices(adev->acp.parent);
kfree(adev->acp.acp_res);
@@ -474,67 +565,50 @@ static int acp_hw_fini(void *handle)
return 0;
}
-static int acp_suspend(void *handle)
+static int acp_suspend(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
/* power up on suspend */
if (!adev->acp.acp_cell)
- amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_ACP, false);
+ amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_ACP, false, 0);
return 0;
}
-static int acp_resume(void *handle)
+static int acp_resume(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
/* power down again on resume */
if (!adev->acp.acp_cell)
- amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_ACP, true);
- return 0;
-}
-
-static int acp_early_init(void *handle)
-{
+ amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_ACP, true, 0);
return 0;
}
-static bool acp_is_idle(void *handle)
+static bool acp_is_idle(struct amdgpu_ip_block *ip_block)
{
return true;
}
-static int acp_wait_for_idle(void *handle)
-{
- return 0;
-}
-
-static int acp_soft_reset(void *handle)
-{
- return 0;
-}
-
-static int acp_set_clockgating_state(void *handle,
+static int acp_set_clockgating_state(struct amdgpu_ip_block *ip_block,
enum amd_clockgating_state state)
{
return 0;
}
-static int acp_set_powergating_state(void *handle,
+static int acp_set_powergating_state(struct amdgpu_ip_block *ip_block,
enum amd_powergating_state state)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
bool enable = (state == AMD_PG_STATE_GATE);
- amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_ACP, enable);
+ amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_ACP, enable, 0);
return 0;
}
static const struct amd_ip_funcs acp_ip_funcs = {
.name = "acp_ip",
- .early_init = acp_early_init,
- .late_init = NULL,
.sw_init = acp_sw_init,
.sw_fini = acp_sw_fini,
.hw_init = acp_hw_init,
@@ -542,14 +616,11 @@ static const struct amd_ip_funcs acp_ip_funcs = {
.suspend = acp_suspend,
.resume = acp_resume,
.is_idle = acp_is_idle,
- .wait_for_idle = acp_wait_for_idle,
- .soft_reset = acp_soft_reset,
.set_clockgating_state = acp_set_clockgating_state,
.set_powergating_state = acp_set_powergating_state,
};
-const struct amdgpu_ip_block_version acp_ip_block =
-{
+const struct amdgpu_ip_block_version acp_ip_block = {
.type = AMD_IP_BLOCK_TYPE_ACP,
.major = 2,
.minor = 2,
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c
index 36a741d63ddc..d31460a9e958 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: MIT
/*
* Copyright 2012 Advanced Micro Devices, Inc.
*
@@ -23,19 +24,60 @@
#include <linux/pci.h>
#include <linux/acpi.h>
+#include <linux/backlight.h>
#include <linux/slab.h>
+#include <linux/xarray.h>
#include <linux/power_supply.h>
#include <linux/pm_runtime.h>
+#include <linux/suspend.h>
#include <acpi/video.h>
#include <acpi/actbl.h>
-#include <drm/drm_crtc_helper.h>
#include "amdgpu.h"
#include "amdgpu_pm.h"
#include "amdgpu_display.h"
#include "amd_acpi.h"
#include "atom.h"
+/* Declare GUID for AMD _DSM method for XCCs */
+static const guid_t amd_xcc_dsm_guid = GUID_INIT(0x8267f5d5, 0xa556, 0x44f2,
+ 0xb8, 0xb4, 0x45, 0x56, 0x2e,
+ 0x8c, 0x5b, 0xec);
+
+#define AMD_XCC_HID_START 3000
+#define AMD_XCC_DSM_GET_NUM_FUNCS 0
+#define AMD_XCC_DSM_GET_SUPP_MODE 1
+#define AMD_XCC_DSM_GET_XCP_MODE 2
+#define AMD_XCC_DSM_GET_VF_XCC_MAPPING 4
+#define AMD_XCC_DSM_GET_TMR_INFO 5
+#define AMD_XCC_DSM_NUM_FUNCS 5
+
+#define AMD_XCC_MAX_HID 24
+
+struct xarray numa_info_xa;
+
+/* Encapsulates the XCD acpi object information */
+struct amdgpu_acpi_xcc_info {
+ struct list_head list;
+ struct amdgpu_numa_info *numa_info;
+ uint8_t xcp_node;
+ uint8_t phy_id;
+ acpi_handle handle;
+};
+
+struct amdgpu_acpi_dev_info {
+ struct list_head list;
+ struct list_head xcc_list;
+ uint32_t sbdf;
+ uint16_t supp_xcp_mode;
+ uint16_t xcp_mode;
+ uint16_t mem_mode;
+ uint64_t tmr_base;
+ uint64_t tmr_size;
+};
+
+struct list_head amdgpu_acpi_dev_list;
+
struct amdgpu_atif_notification_cfg {
bool enabled;
int command_code;
@@ -65,18 +107,35 @@ struct amdgpu_atif {
struct amdgpu_atif_notifications notifications;
struct amdgpu_atif_functions functions;
struct amdgpu_atif_notification_cfg notification_cfg;
-#if defined(CONFIG_BACKLIGHT_CLASS_DEVICE) || defined(CONFIG_BACKLIGHT_CLASS_DEVICE_MODULE)
struct backlight_device *bd;
-#endif
struct amdgpu_dm_backlight_caps backlight_caps;
};
+struct amdgpu_atcs_functions {
+ bool get_ext_state;
+ bool pcie_perf_req;
+ bool pcie_dev_rdy;
+ bool pcie_bus_width;
+ bool power_shift_control;
+};
+
+struct amdgpu_atcs {
+ acpi_handle handle;
+
+ struct amdgpu_atcs_functions functions;
+};
+
+static struct amdgpu_acpi_priv {
+ struct amdgpu_atif atif;
+ struct amdgpu_atcs atcs;
+} amdgpu_acpi_priv;
+
/* Call the ATIF method
*/
/**
* amdgpu_atif_call - call an ATIF method
*
- * @handle: acpi handle
+ * @atif: atif structure
* @function: the ATIF function to execute
* @params: ATIF function params
*
@@ -88,6 +147,7 @@ static union acpi_object *amdgpu_atif_call(struct amdgpu_atif *atif,
struct acpi_buffer *params)
{
acpi_status status;
+ union acpi_object *obj;
union acpi_object atif_arg_elements[2];
struct acpi_object_list atif_arg;
struct acpi_buffer buffer = { ACPI_ALLOCATE_BUFFER, NULL };
@@ -110,16 +170,24 @@ static union acpi_object *amdgpu_atif_call(struct amdgpu_atif *atif,
status = acpi_evaluate_object(atif->handle, NULL, &atif_arg,
&buffer);
+ obj = (union acpi_object *)buffer.pointer;
- /* Fail only if calling the method fails and ATIF is supported */
- if (ACPI_FAILURE(status) && status != AE_NOT_FOUND) {
+ /* Fail if calling the method fails */
+ if (ACPI_FAILURE(status)) {
DRM_DEBUG_DRIVER("failed to evaluate ATIF got %s\n",
acpi_format_exception(status));
- kfree(buffer.pointer);
+ kfree(obj);
return NULL;
}
- return buffer.pointer;
+ if (obj->type != ACPI_TYPE_BUFFER) {
+ DRM_DEBUG_DRIVER("bad object returned from ATIF: %d\n",
+ obj->type);
+ kfree(obj);
+ return NULL;
+ }
+
+ return obj;
}
/**
@@ -166,7 +234,6 @@ static void amdgpu_atif_parse_functions(struct amdgpu_atif_functions *f, u32 mas
/**
* amdgpu_atif_verify_interface - verify ATIF
*
- * @handle: acpi handle
* @atif: amdgpu atif struct
*
* Execute the ATIF_FUNCTION_VERIFY_INTERFACE ATIF function
@@ -208,40 +275,10 @@ out:
return err;
}
-static acpi_handle amdgpu_atif_probe_handle(acpi_handle dhandle)
-{
- acpi_handle handle = NULL;
- char acpi_method_name[255] = { 0 };
- struct acpi_buffer buffer = { sizeof(acpi_method_name), acpi_method_name };
- acpi_status status;
-
- /* For PX/HG systems, ATIF and ATPX are in the iGPU's namespace, on dGPU only
- * systems, ATIF is in the dGPU's namespace.
- */
- status = acpi_get_handle(dhandle, "ATIF", &handle);
- if (ACPI_SUCCESS(status))
- goto out;
-
- if (amdgpu_has_atpx()) {
- status = acpi_get_handle(amdgpu_atpx_get_dhandle(), "ATIF",
- &handle);
- if (ACPI_SUCCESS(status))
- goto out;
- }
-
- DRM_DEBUG_DRIVER("No ATIF handle found\n");
- return NULL;
-out:
- acpi_get_name(handle, ACPI_FULL_PATHNAME, &buffer);
- DRM_DEBUG_DRIVER("Found ATIF handle %s\n", acpi_method_name);
- return handle;
-}
-
/**
* amdgpu_atif_get_notification_params - determine notify configuration
*
- * @handle: acpi handle
- * @n: atif notification configuration struct
+ * @atif: acpi handle
*
* Execute the ATIF_FUNCTION_GET_SYSTEM_PARAMETERS ATIF function
* to determine if a notifier is used and if so which one
@@ -304,7 +341,7 @@ out:
/**
* amdgpu_atif_query_backlight_caps - get min and max backlight input signal
*
- * @handle: acpi handle
+ * @atif: acpi handle
*
* Execute the QUERY_BRIGHTNESS_TRANSFER_CHARACTERISTICS ATIF function
* to determine the acceptable range of backlight values
@@ -355,6 +392,12 @@ static int amdgpu_atif_query_backlight_caps(struct amdgpu_atif *atif)
characteristics.min_input_signal;
atif->backlight_caps.max_input_signal =
characteristics.max_input_signal;
+ atif->backlight_caps.ac_level = characteristics.ac_level;
+ atif->backlight_caps.dc_level = characteristics.dc_level;
+ atif->backlight_caps.data_points = characteristics.number_of_points;
+ memcpy(atif->backlight_caps.luminance_data,
+ characteristics.data_points,
+ sizeof(atif->backlight_caps.luminance_data));
out:
kfree(info);
return err;
@@ -363,7 +406,7 @@ out:
/**
* amdgpu_atif_get_sbios_requests - get requested sbios event
*
- * @handle: acpi handle
+ * @atif: acpi handle
* @req: atif sbios request struct
*
* Execute the ATIF_FUNCTION_GET_SYSTEM_BIOS_REQUESTS ATIF function
@@ -416,7 +459,7 @@ out:
static int amdgpu_atif_handler(struct amdgpu_device *adev,
struct acpi_bus_event *event)
{
- struct amdgpu_atif *atif = adev->atif;
+ struct amdgpu_atif *atif = &amdgpu_acpi_priv.atif;
int count;
DRM_DEBUG_DRIVER("event, device_class = %s, type = %#x\n",
@@ -426,8 +469,7 @@ static int amdgpu_atif_handler(struct amdgpu_device *adev,
return NOTIFY_DONE;
/* Is this actually our event? */
- if (!atif ||
- !atif->notification_cfg.enabled ||
+ if (!atif->notification_cfg.enabled ||
event->type != atif->notification_cfg.command_code) {
/* These events will generate keypresses otherwise */
if (event->type == ACPI_VIDEO_NOTIFY_PROBE)
@@ -448,7 +490,6 @@ static int amdgpu_atif_handler(struct amdgpu_device *adev,
DRM_DEBUG_DRIVER("ATIF: %d pending SBIOS requests\n", count);
if (req.pending & ATIF_PANEL_BRIGHTNESS_CHANGE_REQUEST) {
-#if defined(CONFIG_BACKLIGHT_CLASS_DEVICE) || defined(CONFIG_BACKLIGHT_CLASS_DEVICE_MODULE)
if (atif->bd) {
DRM_DEBUG_DRIVER("Changing brightness to %d\n",
req.backlight_level);
@@ -459,7 +500,6 @@ static int amdgpu_atif_handler(struct amdgpu_device *adev,
*/
backlight_device_set_brightness(atif->bd, req.backlight_level);
}
-#endif
}
if (req.pending & ATIF_DGPU_DISPLAY_EVENT) {
@@ -467,7 +507,6 @@ static int amdgpu_atif_handler(struct amdgpu_device *adev,
pm_runtime_get_sync(adev_to_drm(adev)->dev);
/* Just fire off a uevent and let userspace tell us what to do */
drm_helper_hpd_irq_event(adev_to_drm(adev));
- pm_runtime_mark_last_busy(adev_to_drm(adev)->dev);
pm_runtime_put_autosuspend(adev_to_drm(adev)->dev);
}
}
@@ -487,14 +526,15 @@ static int amdgpu_atif_handler(struct amdgpu_device *adev,
/**
* amdgpu_atcs_call - call an ATCS method
*
- * @handle: acpi handle
+ * @atcs: atcs structure
* @function: the ATCS function to execute
* @params: ATCS function params
*
* Executes the requested ATCS function (all asics).
* Returns a pointer to the acpi output buffer.
*/
-static union acpi_object *amdgpu_atcs_call(acpi_handle handle, int function,
+static union acpi_object *amdgpu_atcs_call(struct amdgpu_atcs *atcs,
+ int function,
struct acpi_buffer *params)
{
acpi_status status;
@@ -518,7 +558,7 @@ static union acpi_object *amdgpu_atcs_call(acpi_handle handle, int function,
atcs_arg_elements[1].integer.value = 0;
}
- status = acpi_evaluate_object(handle, "ATCS", &atcs_arg, &buffer);
+ status = acpi_evaluate_object(atcs->handle, NULL, &atcs_arg, &buffer);
/* Fail only if calling the method fails and ATIF is supported */
if (ACPI_FAILURE(status) && status != AE_NOT_FOUND) {
@@ -547,12 +587,12 @@ static void amdgpu_atcs_parse_functions(struct amdgpu_atcs_functions *f, u32 mas
f->pcie_perf_req = mask & ATCS_PCIE_PERFORMANCE_REQUEST_SUPPORTED;
f->pcie_dev_rdy = mask & ATCS_PCIE_DEVICE_READY_NOTIFICATION_SUPPORTED;
f->pcie_bus_width = mask & ATCS_SET_PCIE_BUS_WIDTH_SUPPORTED;
+ f->power_shift_control = mask & ATCS_SET_POWER_SHIFT_CONTROL_SUPPORTED;
}
/**
* amdgpu_atcs_verify_interface - verify ATCS
*
- * @handle: acpi handle
* @atcs: amdgpu atcs struct
*
* Execute the ATCS_FUNCTION_VERIFY_INTERFACE ATCS function
@@ -560,15 +600,14 @@ static void amdgpu_atcs_parse_functions(struct amdgpu_atcs_functions *f, u32 mas
* (all asics).
* returns 0 on success, error on failure.
*/
-static int amdgpu_atcs_verify_interface(acpi_handle handle,
- struct amdgpu_atcs *atcs)
+static int amdgpu_atcs_verify_interface(struct amdgpu_atcs *atcs)
{
union acpi_object *info;
struct atcs_verify_interface output;
size_t size;
int err = 0;
- info = amdgpu_atcs_call(handle, ATCS_FUNCTION_VERIFY_INTERFACE, NULL);
+ info = amdgpu_atcs_call(atcs, ATCS_FUNCTION_VERIFY_INTERFACE, NULL);
if (!info)
return -EIO;
@@ -605,7 +644,7 @@ out:
*/
bool amdgpu_acpi_is_pcie_performance_request_supported(struct amdgpu_device *adev)
{
- struct amdgpu_atcs *atcs = &adev->atcs;
+ struct amdgpu_atcs *atcs = &amdgpu_acpi_priv.atcs;
if (atcs->functions.pcie_perf_req && atcs->functions.pcie_dev_rdy)
return true;
@@ -614,6 +653,18 @@ bool amdgpu_acpi_is_pcie_performance_request_supported(struct amdgpu_device *ade
}
/**
+ * amdgpu_acpi_is_power_shift_control_supported
+ *
+ * Check if the ATCS power shift control method
+ * is supported.
+ * returns true if supported, false if not.
+ */
+bool amdgpu_acpi_is_power_shift_control_supported(void)
+{
+ return amdgpu_acpi_priv.atcs.functions.power_shift_control;
+}
+
+/**
* amdgpu_acpi_pcie_notify_device_ready
*
* @adev: amdgpu_device pointer
@@ -624,19 +675,13 @@ bool amdgpu_acpi_is_pcie_performance_request_supported(struct amdgpu_device *ade
*/
int amdgpu_acpi_pcie_notify_device_ready(struct amdgpu_device *adev)
{
- acpi_handle handle;
union acpi_object *info;
- struct amdgpu_atcs *atcs = &adev->atcs;
-
- /* Get the device handle */
- handle = ACPI_HANDLE(&adev->pdev->dev);
- if (!handle)
- return -EINVAL;
+ struct amdgpu_atcs *atcs = &amdgpu_acpi_priv.atcs;
if (!atcs->functions.pcie_dev_rdy)
return -EINVAL;
- info = amdgpu_atcs_call(handle, ATCS_FUNCTION_PCIE_DEVICE_READY_NOTIFICATION, NULL);
+ info = amdgpu_atcs_call(atcs, ATCS_FUNCTION_PCIE_DEVICE_READY_NOTIFICATION, NULL);
if (!info)
return -EIO;
@@ -659,9 +704,8 @@ int amdgpu_acpi_pcie_notify_device_ready(struct amdgpu_device *adev)
int amdgpu_acpi_pcie_performance_request(struct amdgpu_device *adev,
u8 perf_req, bool advertise)
{
- acpi_handle handle;
union acpi_object *info;
- struct amdgpu_atcs *atcs = &adev->atcs;
+ struct amdgpu_atcs *atcs = &amdgpu_acpi_priv.atcs;
struct atcs_pref_req_input atcs_input;
struct atcs_pref_req_output atcs_output;
struct acpi_buffer params;
@@ -671,17 +715,12 @@ int amdgpu_acpi_pcie_performance_request(struct amdgpu_device *adev,
if (amdgpu_acpi_pcie_notify_device_ready(adev))
return -EINVAL;
- /* Get the device handle */
- handle = ACPI_HANDLE(&adev->pdev->dev);
- if (!handle)
- return -EINVAL;
-
if (!atcs->functions.pcie_perf_req)
return -EINVAL;
atcs_input.size = sizeof(struct atcs_pref_req_input);
/* client id (bit 2-0: func num, 7-3: dev num, 15-8: bus num) */
- atcs_input.client_id = adev->pdev->devfn | (adev->pdev->bus->number << 8);
+ atcs_input.client_id = pci_dev_id(adev->pdev);
atcs_input.valid_flags_mask = ATCS_VALID_FLAGS_MASK;
atcs_input.flags = ATCS_WAIT_FOR_COMPLETION;
if (advertise)
@@ -693,7 +732,7 @@ int amdgpu_acpi_pcie_performance_request(struct amdgpu_device *adev,
params.pointer = &atcs_input;
while (retry--) {
- info = amdgpu_atcs_call(handle, ATCS_FUNCTION_PCIE_PERFORMANCE_REQUEST, &params);
+ info = amdgpu_atcs_call(atcs, ATCS_FUNCTION_PCIE_PERFORMANCE_REQUEST, &params);
if (!info)
return -EIO;
@@ -727,6 +766,438 @@ int amdgpu_acpi_pcie_performance_request(struct amdgpu_device *adev,
}
/**
+ * amdgpu_acpi_power_shift_control
+ *
+ * @adev: amdgpu_device pointer
+ * @dev_state: device acpi state
+ * @drv_state: driver state
+ *
+ * Executes the POWER_SHIFT_CONTROL method to
+ * communicate current dGPU device state and
+ * driver state to APU/SBIOS.
+ * returns 0 on success, error on failure.
+ */
+int amdgpu_acpi_power_shift_control(struct amdgpu_device *adev,
+ u8 dev_state, bool drv_state)
+{
+ union acpi_object *info;
+ struct amdgpu_atcs *atcs = &amdgpu_acpi_priv.atcs;
+ struct atcs_pwr_shift_input atcs_input;
+ struct acpi_buffer params;
+
+ if (!amdgpu_acpi_is_power_shift_control_supported())
+ return -EINVAL;
+
+ atcs_input.size = sizeof(struct atcs_pwr_shift_input);
+ /* dGPU id (bit 2-0: func num, 7-3: dev num, 15-8: bus num) */
+ atcs_input.dgpu_id = pci_dev_id(adev->pdev);
+ atcs_input.dev_acpi_state = dev_state;
+ atcs_input.drv_state = drv_state;
+
+ params.length = sizeof(struct atcs_pwr_shift_input);
+ params.pointer = &atcs_input;
+
+ info = amdgpu_atcs_call(atcs, ATCS_FUNCTION_POWER_SHIFT_CONTROL, &params);
+ if (!info) {
+ DRM_ERROR("ATCS PSC update failed\n");
+ return -EIO;
+ }
+
+ kfree(info);
+ return 0;
+}
+
+/**
+ * amdgpu_acpi_smart_shift_update - update dGPU device state to SBIOS
+ *
+ * @adev: amdgpu device pointer
+ * @ss_state: current smart shift event
+ *
+ * returns 0 on success,
+ * otherwise return error number.
+ */
+int amdgpu_acpi_smart_shift_update(struct amdgpu_device *adev,
+ enum amdgpu_ss ss_state)
+{
+ int r;
+
+ if (!amdgpu_device_supports_smart_shift(adev))
+ return 0;
+
+ switch (ss_state) {
+ /* SBIOS trigger “stop”, “enable” and “start” at D0, Driver Operational.
+ * SBIOS trigger “stop” at D3, Driver Not Operational.
+ * SBIOS trigger “stop” and “disable” at D0, Driver NOT operational.
+ */
+ case AMDGPU_SS_DRV_LOAD:
+ r = amdgpu_acpi_power_shift_control(adev,
+ AMDGPU_ATCS_PSC_DEV_STATE_D0,
+ AMDGPU_ATCS_PSC_DRV_STATE_OPR);
+ break;
+ case AMDGPU_SS_DEV_D0:
+ r = amdgpu_acpi_power_shift_control(adev,
+ AMDGPU_ATCS_PSC_DEV_STATE_D0,
+ AMDGPU_ATCS_PSC_DRV_STATE_OPR);
+ break;
+ case AMDGPU_SS_DEV_D3:
+ r = amdgpu_acpi_power_shift_control(adev,
+ AMDGPU_ATCS_PSC_DEV_STATE_D3_HOT,
+ AMDGPU_ATCS_PSC_DRV_STATE_NOT_OPR);
+ break;
+ case AMDGPU_SS_DRV_UNLOAD:
+ r = amdgpu_acpi_power_shift_control(adev,
+ AMDGPU_ATCS_PSC_DEV_STATE_D0,
+ AMDGPU_ATCS_PSC_DRV_STATE_NOT_OPR);
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ return r;
+}
+
+#ifdef CONFIG_ACPI_NUMA
+static inline uint64_t amdgpu_acpi_get_numa_size(int nid)
+{
+ /* This is directly using si_meminfo_node implementation as the
+ * function is not exported.
+ */
+ int zone_type;
+ uint64_t managed_pages = 0;
+
+ pg_data_t *pgdat = NODE_DATA(nid);
+
+ for (zone_type = 0; zone_type < MAX_NR_ZONES; zone_type++)
+ managed_pages +=
+ zone_managed_pages(&pgdat->node_zones[zone_type]);
+ return managed_pages * PAGE_SIZE;
+}
+
+static struct amdgpu_numa_info *amdgpu_acpi_get_numa_info(uint32_t pxm)
+{
+ struct amdgpu_numa_info *numa_info;
+ int nid;
+
+ numa_info = xa_load(&numa_info_xa, pxm);
+
+ if (!numa_info) {
+ struct sysinfo info;
+
+ numa_info = kzalloc(sizeof(*numa_info), GFP_KERNEL);
+ if (!numa_info)
+ return NULL;
+
+ nid = pxm_to_node(pxm);
+ numa_info->pxm = pxm;
+ numa_info->nid = nid;
+
+ if (numa_info->nid == NUMA_NO_NODE) {
+ si_meminfo(&info);
+ numa_info->size = info.totalram * info.mem_unit;
+ } else {
+ numa_info->size = amdgpu_acpi_get_numa_size(nid);
+ }
+ xa_store(&numa_info_xa, numa_info->pxm, numa_info, GFP_KERNEL);
+ }
+
+ return numa_info;
+}
+#endif
+
+/**
+ * amdgpu_acpi_get_node_id - obtain the NUMA node id for corresponding amdgpu
+ * acpi device handle
+ *
+ * @handle: acpi handle
+ * @numa_info: amdgpu_numa_info structure holding numa information
+ *
+ * Queries the ACPI interface to fetch the corresponding NUMA Node ID for a
+ * given amdgpu acpi device.
+ *
+ * Returns ACPI STATUS OK with Node ID on success or the corresponding failure reason
+ */
+static acpi_status amdgpu_acpi_get_node_id(acpi_handle handle,
+ struct amdgpu_numa_info **numa_info)
+{
+#ifdef CONFIG_ACPI_NUMA
+ u64 pxm;
+ acpi_status status;
+
+ if (!numa_info)
+ return_ACPI_STATUS(AE_ERROR);
+
+ status = acpi_evaluate_integer(handle, "_PXM", NULL, &pxm);
+
+ if (ACPI_FAILURE(status))
+ return status;
+
+ *numa_info = amdgpu_acpi_get_numa_info(pxm);
+
+ if (!*numa_info)
+ return_ACPI_STATUS(AE_ERROR);
+
+ return_ACPI_STATUS(AE_OK);
+#else
+ return_ACPI_STATUS(AE_NOT_EXIST);
+#endif
+}
+
+static struct amdgpu_acpi_dev_info *amdgpu_acpi_get_dev(u32 sbdf)
+{
+ struct amdgpu_acpi_dev_info *acpi_dev;
+
+ if (list_empty(&amdgpu_acpi_dev_list))
+ return NULL;
+
+ list_for_each_entry(acpi_dev, &amdgpu_acpi_dev_list, list)
+ if (acpi_dev->sbdf == sbdf)
+ return acpi_dev;
+
+ return NULL;
+}
+
+static int amdgpu_acpi_dev_init(struct amdgpu_acpi_dev_info **dev_info,
+ struct amdgpu_acpi_xcc_info *xcc_info, u32 sbdf)
+{
+ struct amdgpu_acpi_dev_info *tmp;
+ union acpi_object *obj;
+ int ret = -ENOENT;
+
+ *dev_info = NULL;
+ tmp = kzalloc(sizeof(struct amdgpu_acpi_dev_info), GFP_KERNEL);
+ if (!tmp)
+ return -ENOMEM;
+
+ INIT_LIST_HEAD(&tmp->xcc_list);
+ INIT_LIST_HEAD(&tmp->list);
+ tmp->sbdf = sbdf;
+
+ obj = acpi_evaluate_dsm_typed(xcc_info->handle, &amd_xcc_dsm_guid, 0,
+ AMD_XCC_DSM_GET_SUPP_MODE, NULL,
+ ACPI_TYPE_INTEGER);
+
+ if (!obj) {
+ acpi_handle_debug(xcc_info->handle,
+ "_DSM function %d evaluation failed",
+ AMD_XCC_DSM_GET_SUPP_MODE);
+ ret = -ENOENT;
+ goto out;
+ }
+
+ tmp->supp_xcp_mode = obj->integer.value & 0xFFFF;
+ ACPI_FREE(obj);
+
+ obj = acpi_evaluate_dsm_typed(xcc_info->handle, &amd_xcc_dsm_guid, 0,
+ AMD_XCC_DSM_GET_XCP_MODE, NULL,
+ ACPI_TYPE_INTEGER);
+
+ if (!obj) {
+ acpi_handle_debug(xcc_info->handle,
+ "_DSM function %d evaluation failed",
+ AMD_XCC_DSM_GET_XCP_MODE);
+ ret = -ENOENT;
+ goto out;
+ }
+
+ tmp->xcp_mode = obj->integer.value & 0xFFFF;
+ tmp->mem_mode = (obj->integer.value >> 32) & 0xFFFF;
+ ACPI_FREE(obj);
+
+ /* Evaluate DSMs and fill XCC information */
+ obj = acpi_evaluate_dsm_typed(xcc_info->handle, &amd_xcc_dsm_guid, 0,
+ AMD_XCC_DSM_GET_TMR_INFO, NULL,
+ ACPI_TYPE_PACKAGE);
+
+ if (!obj || obj->package.count < 2) {
+ acpi_handle_debug(xcc_info->handle,
+ "_DSM function %d evaluation failed",
+ AMD_XCC_DSM_GET_TMR_INFO);
+ ret = -ENOENT;
+ goto out;
+ }
+
+ tmp->tmr_base = obj->package.elements[0].integer.value;
+ tmp->tmr_size = obj->package.elements[1].integer.value;
+ ACPI_FREE(obj);
+
+ DRM_DEBUG_DRIVER(
+ "New dev(%x): Supported xcp mode: %x curr xcp_mode : %x mem mode : %x, tmr base: %llx tmr size: %llx ",
+ tmp->sbdf, tmp->supp_xcp_mode, tmp->xcp_mode, tmp->mem_mode,
+ tmp->tmr_base, tmp->tmr_size);
+ list_add_tail(&tmp->list, &amdgpu_acpi_dev_list);
+ *dev_info = tmp;
+
+ return 0;
+
+out:
+ if (obj)
+ ACPI_FREE(obj);
+ kfree(tmp);
+
+ return ret;
+}
+
+static int amdgpu_acpi_get_xcc_info(struct amdgpu_acpi_xcc_info *xcc_info,
+ u32 *sbdf)
+{
+ union acpi_object *obj;
+ acpi_status status;
+ int ret = -ENOENT;
+
+ obj = acpi_evaluate_dsm_typed(xcc_info->handle, &amd_xcc_dsm_guid, 0,
+ AMD_XCC_DSM_GET_NUM_FUNCS, NULL,
+ ACPI_TYPE_INTEGER);
+
+ if (!obj || obj->integer.value != AMD_XCC_DSM_NUM_FUNCS)
+ goto out;
+ ACPI_FREE(obj);
+
+ /* Evaluate DSMs and fill XCC information */
+ obj = acpi_evaluate_dsm_typed(xcc_info->handle, &amd_xcc_dsm_guid, 0,
+ AMD_XCC_DSM_GET_VF_XCC_MAPPING, NULL,
+ ACPI_TYPE_INTEGER);
+
+ if (!obj) {
+ acpi_handle_debug(xcc_info->handle,
+ "_DSM function %d evaluation failed",
+ AMD_XCC_DSM_GET_VF_XCC_MAPPING);
+ ret = -EINVAL;
+ goto out;
+ }
+
+ /* PF xcc id [39:32] */
+ xcc_info->phy_id = (obj->integer.value >> 32) & 0xFF;
+ /* xcp node of this xcc [47:40] */
+ xcc_info->xcp_node = (obj->integer.value >> 40) & 0xFF;
+ /* PF domain of this xcc [31:16] */
+ *sbdf = (obj->integer.value) & 0xFFFF0000;
+ /* PF bus/dev/fn of this xcc [63:48] */
+ *sbdf |= (obj->integer.value >> 48) & 0xFFFF;
+ ACPI_FREE(obj);
+ obj = NULL;
+
+ status =
+ amdgpu_acpi_get_node_id(xcc_info->handle, &xcc_info->numa_info);
+
+ /* TODO: check if this check is required */
+ if (ACPI_SUCCESS(status))
+ ret = 0;
+out:
+ if (obj)
+ ACPI_FREE(obj);
+
+ return ret;
+}
+
+static int amdgpu_acpi_enumerate_xcc(void)
+{
+ struct amdgpu_acpi_dev_info *dev_info = NULL;
+ struct amdgpu_acpi_xcc_info *xcc_info;
+ struct acpi_device *acpi_dev;
+ char hid[ACPI_ID_LEN];
+ int ret, id;
+ u32 sbdf;
+
+ INIT_LIST_HEAD(&amdgpu_acpi_dev_list);
+ xa_init(&numa_info_xa);
+
+ for (id = 0; id < AMD_XCC_MAX_HID; id++) {
+ sprintf(hid, "%s%d", "AMD", AMD_XCC_HID_START + id);
+ acpi_dev = acpi_dev_get_first_match_dev(hid, NULL, -1);
+ /* These ACPI objects are expected to be in sequential order. If
+ * one is not found, no need to check the rest.
+ */
+ if (!acpi_dev) {
+ DRM_DEBUG_DRIVER("No matching acpi device found for %s",
+ hid);
+ break;
+ }
+
+ xcc_info = kzalloc(sizeof(struct amdgpu_acpi_xcc_info),
+ GFP_KERNEL);
+ if (!xcc_info) {
+ DRM_ERROR("Failed to allocate memory for xcc info\n");
+ return -ENOMEM;
+ }
+
+ INIT_LIST_HEAD(&xcc_info->list);
+ xcc_info->handle = acpi_device_handle(acpi_dev);
+ acpi_dev_put(acpi_dev);
+
+ ret = amdgpu_acpi_get_xcc_info(xcc_info, &sbdf);
+ if (ret) {
+ kfree(xcc_info);
+ continue;
+ }
+
+ dev_info = amdgpu_acpi_get_dev(sbdf);
+
+ if (!dev_info)
+ ret = amdgpu_acpi_dev_init(&dev_info, xcc_info, sbdf);
+
+ if (ret == -ENOMEM)
+ return ret;
+
+ if (!dev_info) {
+ kfree(xcc_info);
+ continue;
+ }
+
+ list_add_tail(&xcc_info->list, &dev_info->xcc_list);
+ }
+
+ return 0;
+}
+
+int amdgpu_acpi_get_tmr_info(struct amdgpu_device *adev, u64 *tmr_offset,
+ u64 *tmr_size)
+{
+ struct amdgpu_acpi_dev_info *dev_info;
+ u32 sbdf;
+
+ if (!tmr_offset || !tmr_size)
+ return -EINVAL;
+
+ sbdf = (pci_domain_nr(adev->pdev->bus) << 16);
+ sbdf |= pci_dev_id(adev->pdev);
+ dev_info = amdgpu_acpi_get_dev(sbdf);
+ if (!dev_info)
+ return -ENOENT;
+
+ *tmr_offset = dev_info->tmr_base;
+ *tmr_size = dev_info->tmr_size;
+
+ return 0;
+}
+
+int amdgpu_acpi_get_mem_info(struct amdgpu_device *adev, int xcc_id,
+ struct amdgpu_numa_info *numa_info)
+{
+ struct amdgpu_acpi_dev_info *dev_info;
+ struct amdgpu_acpi_xcc_info *xcc_info;
+ u32 sbdf;
+
+ if (!numa_info)
+ return -EINVAL;
+
+ sbdf = (pci_domain_nr(adev->pdev->bus) << 16);
+ sbdf |= pci_dev_id(adev->pdev);
+ dev_info = amdgpu_acpi_get_dev(sbdf);
+ if (!dev_info)
+ return -ENOENT;
+
+ list_for_each_entry(xcc_info, &dev_info->xcc_list, list) {
+ if (xcc_info->phy_id == xcc_id) {
+ memcpy(numa_info, xcc_info->numa_info,
+ sizeof(*numa_info));
+ return 0;
+ }
+ }
+
+ return -ENOENT;
+}
+
+/**
* amdgpu_acpi_event - handle notify events
*
* @nb: notifier block
@@ -769,50 +1240,15 @@ static int amdgpu_acpi_event(struct notifier_block *nb,
*/
int amdgpu_acpi_init(struct amdgpu_device *adev)
{
- acpi_handle handle, atif_handle;
- struct amdgpu_atif *atif;
- struct amdgpu_atcs *atcs = &adev->atcs;
- int ret;
-
- /* Get the device handle */
- handle = ACPI_HANDLE(&adev->pdev->dev);
-
- if (!adev->bios || !handle)
- return 0;
-
- /* Call the ATCS method */
- ret = amdgpu_atcs_verify_interface(handle, atcs);
- if (ret) {
- DRM_DEBUG_DRIVER("Call to ATCS verify_interface failed: %d\n", ret);
- }
-
- /* Probe for ATIF, and initialize it if found */
- atif_handle = amdgpu_atif_probe_handle(handle);
- if (!atif_handle)
- goto out;
-
- atif = kzalloc(sizeof(*atif), GFP_KERNEL);
- if (!atif) {
- DRM_WARN("Not enough memory to initialize ATIF\n");
- goto out;
- }
- atif->handle = atif_handle;
-
- /* Call the ATIF method */
- ret = amdgpu_atif_verify_interface(atif);
- if (ret) {
- DRM_DEBUG_DRIVER("Call to ATIF verify_interface failed: %d\n", ret);
- kfree(atif);
- goto out;
- }
- adev->atif = atif;
+ struct amdgpu_atif *atif = &amdgpu_acpi_priv.atif;
-#if defined(CONFIG_BACKLIGHT_CLASS_DEVICE) || defined(CONFIG_BACKLIGHT_CLASS_DEVICE_MODULE)
if (atif->notifications.brightness_change) {
- if (amdgpu_device_has_dc_support(adev)) {
+ if (adev->dc_enabled) {
#if defined(CONFIG_DRM_AMD_DC)
struct amdgpu_display_manager *dm = &adev->dm;
- atif->bd = dm->backlight_dev;
+
+ if (dm->backlight_dev[0])
+ atif->bd = dm->backlight_dev[0];
#endif
} else {
struct drm_encoder *tmp;
@@ -825,6 +1261,7 @@ int amdgpu_acpi_init(struct amdgpu_device *adev)
if ((enc->devices & (ATOM_DEVICE_LCD_SUPPORT)) &&
enc->enc_priv) {
struct amdgpu_encoder_atom_dig *dig = enc->enc_priv;
+
if (dig->bl_dev) {
atif->bd = dig->bl_dev;
break;
@@ -833,7 +1270,152 @@ int amdgpu_acpi_init(struct amdgpu_device *adev)
}
}
}
+ adev->acpi_nb.notifier_call = amdgpu_acpi_event;
+ register_acpi_notifier(&adev->acpi_nb);
+
+ return 0;
+}
+
+void amdgpu_acpi_get_backlight_caps(struct amdgpu_dm_backlight_caps *caps)
+{
+ struct amdgpu_atif *atif = &amdgpu_acpi_priv.atif;
+
+ memcpy(caps, &atif->backlight_caps, sizeof(*caps));
+}
+
+/**
+ * amdgpu_acpi_fini - tear down driver acpi support
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * Unregisters with the acpi notifier chain (all asics).
+ */
+void amdgpu_acpi_fini(struct amdgpu_device *adev)
+{
+ unregister_acpi_notifier(&adev->acpi_nb);
+}
+
+/**
+ * amdgpu_atif_pci_probe_handle - look up the ATIF handle
+ *
+ * @pdev: pci device
+ *
+ * Look up the ATIF handles (all asics).
+ * Returns true if the handle is found, false if not.
+ */
+static bool amdgpu_atif_pci_probe_handle(struct pci_dev *pdev)
+{
+ char acpi_method_name[255] = { 0 };
+ struct acpi_buffer buffer = {sizeof(acpi_method_name), acpi_method_name};
+ acpi_handle dhandle, atif_handle;
+ acpi_status status;
+ int ret;
+
+ dhandle = ACPI_HANDLE(&pdev->dev);
+ if (!dhandle)
+ return false;
+
+ status = acpi_get_handle(dhandle, "ATIF", &atif_handle);
+ if (ACPI_FAILURE(status))
+ return false;
+
+ amdgpu_acpi_priv.atif.handle = atif_handle;
+ acpi_get_name(amdgpu_acpi_priv.atif.handle, ACPI_FULL_PATHNAME, &buffer);
+ DRM_DEBUG_DRIVER("Found ATIF handle %s\n", acpi_method_name);
+ ret = amdgpu_atif_verify_interface(&amdgpu_acpi_priv.atif);
+ if (ret) {
+ amdgpu_acpi_priv.atif.handle = 0;
+ return false;
+ }
+ return true;
+}
+
+/**
+ * amdgpu_atcs_pci_probe_handle - look up the ATCS handle
+ *
+ * @pdev: pci device
+ *
+ * Look up the ATCS handles (all asics).
+ * Returns true if the handle is found, false if not.
+ */
+static bool amdgpu_atcs_pci_probe_handle(struct pci_dev *pdev)
+{
+ char acpi_method_name[255] = { 0 };
+ struct acpi_buffer buffer = { sizeof(acpi_method_name), acpi_method_name };
+ acpi_handle dhandle, atcs_handle;
+ acpi_status status;
+ int ret;
+
+ dhandle = ACPI_HANDLE(&pdev->dev);
+ if (!dhandle)
+ return false;
+
+ status = acpi_get_handle(dhandle, "ATCS", &atcs_handle);
+ if (ACPI_FAILURE(status))
+ return false;
+
+ amdgpu_acpi_priv.atcs.handle = atcs_handle;
+ acpi_get_name(amdgpu_acpi_priv.atcs.handle, ACPI_FULL_PATHNAME, &buffer);
+ DRM_DEBUG_DRIVER("Found ATCS handle %s\n", acpi_method_name);
+ ret = amdgpu_atcs_verify_interface(&amdgpu_acpi_priv.atcs);
+ if (ret) {
+ amdgpu_acpi_priv.atcs.handle = 0;
+ return false;
+ }
+ return true;
+}
+
+
+/**
+ * amdgpu_acpi_should_gpu_reset
+ *
+ * @adev: amdgpu_device_pointer
+ *
+ * returns true if should reset GPU, false if not
+ */
+bool amdgpu_acpi_should_gpu_reset(struct amdgpu_device *adev)
+{
+ if ((adev->flags & AMD_IS_APU) &&
+ adev->gfx.imu.funcs) /* Not need to do mode2 reset for IMU enabled APUs */
+ return false;
+
+ if ((adev->flags & AMD_IS_APU) &&
+ amdgpu_acpi_is_s3_active(adev))
+ return false;
+
+ if (amdgpu_sriov_vf(adev))
+ return false;
+
+#if IS_ENABLED(CONFIG_SUSPEND)
+ return pm_suspend_target_state != PM_SUSPEND_TO_IDLE;
+#else
+ return true;
#endif
+}
+
+/*
+ * amdgpu_acpi_detect - detect ACPI ATIF/ATCS methods
+ *
+ * Check if we have the ATIF/ATCS methods and populate
+ * the structures in the driver.
+ */
+void amdgpu_acpi_detect(void)
+{
+ struct amdgpu_atif *atif = &amdgpu_acpi_priv.atif;
+ struct amdgpu_atcs *atcs = &amdgpu_acpi_priv.atcs;
+ struct pci_dev *pdev = NULL;
+ int ret;
+
+ while ((pdev = pci_get_base_class(PCI_BASE_CLASS_DISPLAY, pdev))) {
+ if ((pdev->class != PCI_CLASS_DISPLAY_VGA << 8) &&
+ (pdev->class != PCI_CLASS_DISPLAY_OTHER << 8))
+ continue;
+
+ if (!atif->handle)
+ amdgpu_atif_pci_probe_handle(pdev);
+ if (!atcs->handle)
+ amdgpu_atcs_pci_probe_handle(pdev);
+ }
if (atif->functions.sbios_requests && !atif->functions.system_params) {
/* XXX check this workraround, if sbios request function is
@@ -864,50 +1446,120 @@ int amdgpu_acpi_init(struct amdgpu_device *adev)
atif->backlight_caps.caps_valid = false;
}
-out:
- adev->acpi_nb.notifier_call = amdgpu_acpi_event;
- register_acpi_notifier(&adev->acpi_nb);
-
- return ret;
+ amdgpu_acpi_enumerate_xcc();
}
-void amdgpu_acpi_get_backlight_caps(struct amdgpu_device *adev,
- struct amdgpu_dm_backlight_caps *caps)
+void amdgpu_acpi_release(void)
{
- if (!adev->atif) {
- caps->caps_valid = false;
+ struct amdgpu_acpi_dev_info *dev_info, *dev_tmp;
+ struct amdgpu_acpi_xcc_info *xcc_info, *xcc_tmp;
+ struct amdgpu_numa_info *numa_info;
+ unsigned long index;
+
+ xa_for_each(&numa_info_xa, index, numa_info) {
+ kfree(numa_info);
+ xa_erase(&numa_info_xa, index);
+ }
+
+ if (list_empty(&amdgpu_acpi_dev_list))
return;
+
+ list_for_each_entry_safe(dev_info, dev_tmp, &amdgpu_acpi_dev_list,
+ list) {
+ list_for_each_entry_safe(xcc_info, xcc_tmp, &dev_info->xcc_list,
+ list) {
+ list_del(&xcc_info->list);
+ kfree(xcc_info);
+ }
+
+ list_del(&dev_info->list);
+ kfree(dev_info);
}
- caps->caps_valid = adev->atif->backlight_caps.caps_valid;
- caps->min_input_signal = adev->atif->backlight_caps.min_input_signal;
- caps->max_input_signal = adev->atif->backlight_caps.max_input_signal;
}
+#if IS_ENABLED(CONFIG_SUSPEND)
/**
- * amdgpu_acpi_fini - tear down driver acpi support
+ * amdgpu_acpi_is_s3_active
*
- * @adev: amdgpu_device pointer
+ * @adev: amdgpu_device_pointer
*
- * Unregisters with the acpi notifier chain (all asics).
+ * returns true if supported, false if not.
*/
-void amdgpu_acpi_fini(struct amdgpu_device *adev)
+bool amdgpu_acpi_is_s3_active(struct amdgpu_device *adev)
{
- unregister_acpi_notifier(&adev->acpi_nb);
- kfree(adev->atif);
+ return !(adev->flags & AMD_IS_APU) ||
+ (pm_suspend_target_state == PM_SUSPEND_MEM);
}
/**
- * amdgpu_acpi_is_s0ix_supported
+ * amdgpu_acpi_is_s0ix_active
+ *
+ * @adev: amdgpu_device_pointer
*
* returns true if supported, false if not.
*/
-bool amdgpu_acpi_is_s0ix_supported(struct amdgpu_device *adev)
+bool amdgpu_acpi_is_s0ix_active(struct amdgpu_device *adev)
{
-#if defined(CONFIG_AMD_PMC)
- if (acpi_gbl_FADT.flags & ACPI_FADT_LOW_POWER_S0) {
- if (adev->flags & AMD_IS_APU)
- return true;
+ if (!(adev->flags & AMD_IS_APU) ||
+ (pm_suspend_target_state != PM_SUSPEND_TO_IDLE))
+ return false;
+
+ if (adev->asic_type < CHIP_RAVEN)
+ return false;
+
+ if (!(adev->pm.pp_feature & PP_GFXOFF_MASK))
+ return false;
+
+ /*
+ * If ACPI_FADT_LOW_POWER_S0 is not set in the FADT, it is generally
+ * risky to do any special firmware-related preparations for entering
+ * S0ix even though the system is suspending to idle, so return false
+ * in that case.
+ */
+ if (!(acpi_gbl_FADT.flags & ACPI_FADT_LOW_POWER_S0)) {
+ dev_err_once(adev->dev,
+ "Power consumption will be higher as BIOS has not been configured for suspend-to-idle.\n"
+ "To use suspend-to-idle change the sleep mode in BIOS setup.\n");
+ return false;
}
-#endif
+
+#if !IS_ENABLED(CONFIG_AMD_PMC)
+ dev_err_once(adev->dev,
+ "Power consumption will be higher as the kernel has not been compiled with CONFIG_AMD_PMC.\n");
return false;
+#else
+ return true;
+#endif /* CONFIG_AMD_PMC */
+}
+#endif /* CONFIG_SUSPEND */
+
+#if IS_ENABLED(CONFIG_DRM_AMD_ISP)
+static const struct acpi_device_id isp_sensor_ids[] = {
+ { "OMNI5C10" },
+ { }
+};
+
+static int isp_match_acpi_device_ids(struct device *dev, const void *data)
+{
+ return acpi_match_device(data, dev) ? 1 : 0;
+}
+
+int amdgpu_acpi_get_isp4_dev(struct acpi_device **dev)
+{
+ struct device *pdev __free(put_device) = NULL;
+ struct acpi_device *acpi_pdev;
+
+ pdev = bus_find_device(&platform_bus_type, NULL, isp_sensor_ids,
+ isp_match_acpi_device_ids);
+ if (!pdev)
+ return -EINVAL;
+
+ acpi_pdev = ACPI_COMPANION(pdev);
+ if (!acpi_pdev)
+ return -ENODEV;
+
+ *dev = acpi_pdev;
+
+ return 0;
}
+#endif /* CONFIG_DRM_AMD_ISP */
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_afmt.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_afmt.c
index a4d65973bf7c..80771b1480ff 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_afmt.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_afmt.c
@@ -100,6 +100,7 @@ struct amdgpu_afmt_acr amdgpu_afmt_acr(uint32_t clock)
amdgpu_afmt_calc_cts(clock, &res.cts_32khz, &res.n_32khz, 32000);
amdgpu_afmt_calc_cts(clock, &res.cts_44_1khz, &res.n_44_1khz, 44100);
amdgpu_afmt_calc_cts(clock, &res.cts_48khz, &res.n_48khz, 48000);
+ res.clock = clock;
return res;
}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
index c5343a5eecbe..a2879d2b7c8e 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: MIT
/*
* Copyright 2014 Advanced Micro Devices, Inc.
*
@@ -21,15 +22,20 @@
*/
#include "amdgpu_amdkfd.h"
+#include "amd_pcie.h"
#include "amd_shared.h"
#include "amdgpu.h"
#include "amdgpu_gfx.h"
#include "amdgpu_dma_buf.h"
+#include <drm/ttm/ttm_tt.h>
#include <linux/module.h>
#include <linux/dma-buf.h>
#include "amdgpu_xgmi.h"
#include <uapi/linux/kfd_ioctl.h>
+#include "amdgpu_ras.h"
+#include "amdgpu_umc.h"
+#include "amdgpu_reset.h"
/* Total memory size in system memory and all GPU VRAM. Used to
* estimate worst case amount of memory to reserve for page tables
@@ -44,11 +50,10 @@ int amdgpu_amdkfd_init(void)
int ret;
si_meminfo(&si);
- amdgpu_amdkfd_total_mem_size = si.totalram - si.totalhigh;
+ amdgpu_amdkfd_total_mem_size = si.freeram - si.freehigh;
amdgpu_amdkfd_total_mem_size *= si.mem_unit;
ret = kgd2kfd_init();
- amdgpu_amdkfd_gpuvm_init_mem_limits();
kfd_initialized = !ret;
return ret;
@@ -69,11 +74,7 @@ void amdgpu_amdkfd_device_probe(struct amdgpu_device *adev)
if (!kfd_initialized)
return;
- adev->kfd.dev = kgd2kfd_probe((struct kgd_dev *)adev,
- adev->pdev, adev->asic_type, vf);
-
- if (adev->kfd.dev)
- amdgpu_amdkfd_total_mem_size += adev->gmc.real_vram_size;
+ adev->kfd.dev = kgd2kfd_probe(adev, vf);
}
/**
@@ -95,13 +96,24 @@ static void amdgpu_doorbell_get_kfd_info(struct amdgpu_device *adev,
size_t *start_offset)
{
/*
- * The first num_doorbells are used by amdgpu.
+ * The first num_kernel_doorbells are used by amdgpu.
* amdkfd takes whatever's left in the aperture.
*/
- if (adev->doorbell.size > adev->doorbell.num_doorbells * sizeof(u32)) {
+ if (adev->enable_mes) {
+ /*
+ * With MES enabled, we only need to initialize
+ * the base address. The size and offset are
+ * not initialized as AMDGPU manages the whole
+ * doorbell space.
+ */
+ *aperture_base = adev->doorbell.base;
+ *aperture_size = 0;
+ *start_offset = 0;
+ } else if (adev->doorbell.size > adev->doorbell.num_kernel_doorbells *
+ sizeof(u32)) {
*aperture_base = adev->doorbell.base;
*aperture_size = adev->doorbell.size;
- *start_offset = adev->doorbell.num_doorbells * sizeof(u32);
+ *start_offset = adev->doorbell.num_kernel_doorbells * sizeof(u32);
} else {
*aperture_base = 0;
*aperture_size = 0;
@@ -109,11 +121,57 @@ static void amdgpu_doorbell_get_kfd_info(struct amdgpu_device *adev,
}
}
+
+static void amdgpu_amdkfd_reset_work(struct work_struct *work)
+{
+ struct amdgpu_device *adev = container_of(work, struct amdgpu_device,
+ kfd.reset_work);
+
+ struct amdgpu_reset_context reset_context;
+
+ memset(&reset_context, 0, sizeof(reset_context));
+
+ reset_context.method = AMD_RESET_METHOD_NONE;
+ reset_context.reset_req_dev = adev;
+ reset_context.src = adev->enable_mes ?
+ AMDGPU_RESET_SRC_MES :
+ AMDGPU_RESET_SRC_HWS;
+ clear_bit(AMDGPU_NEED_FULL_RESET, &reset_context.flags);
+
+ amdgpu_device_gpu_recover(adev, NULL, &reset_context);
+}
+
+static const struct drm_client_funcs kfd_client_funcs = {
+ .unregister = drm_client_release,
+};
+
+int amdgpu_amdkfd_drm_client_create(struct amdgpu_device *adev)
+{
+ int ret;
+
+ if (!adev->kfd.init_complete || adev->kfd.client.dev)
+ return 0;
+
+ ret = drm_client_init(&adev->ddev, &adev->kfd.client, "kfd",
+ &kfd_client_funcs);
+ if (ret) {
+ dev_err(adev->dev, "Failed to init DRM client: %d\n",
+ ret);
+ return ret;
+ }
+
+ drm_client_register(&adev->kfd.client);
+
+ return 0;
+}
+
void amdgpu_amdkfd_device_init(struct amdgpu_device *adev)
{
int i;
int last_valid_bit;
+ amdgpu_amdkfd_gpuvm_init_mem_limits();
+
if (adev->kfd.dev) {
struct kgd2kfd_shared_resources gpu_resources = {
.compute_vmid_bitmap =
@@ -126,15 +184,15 @@ void amdgpu_amdkfd_device_init(struct amdgpu_device *adev)
AMDGPU_GMC_HOLE_START),
.drm_render_minor = adev_to_drm(adev)->render->index,
.sdma_doorbell_idx = adev->doorbell_index.sdma_engine,
-
+ .enable_mes = adev->enable_mes,
};
/* this is going to have a few of the MSBs set that we need to
* clear
*/
bitmap_complement(gpu_resources.cp_queue_bitmap,
- adev->gfx.mec.queue_bitmap,
- KGD_MAX_QUEUES);
+ adev->gfx.mec_bitmap[0].queue_bitmap,
+ AMDGPU_MAX_QUEUES);
/* According to linux/bitmap.h we shouldn't use bitmap_clear if
* nbits is not compile time constant
@@ -142,7 +200,7 @@ void amdgpu_amdkfd_device_init(struct amdgpu_device *adev)
last_valid_bit = 1 /* only first MEC can have compute queues */
* adev->gfx.mec.num_pipe_per_mec
* adev->gfx.mec.num_queue_per_pipe;
- for (i = last_valid_bit; i < KGD_MAX_QUEUES; ++i)
+ for (i = last_valid_bit; i < AMDGPU_MAX_QUEUES; ++i)
clear_bit(i, gpu_resources.cp_queue_bitmap);
amdgpu_doorbell_get_kfd_info(adev,
@@ -165,15 +223,21 @@ void amdgpu_amdkfd_device_init(struct amdgpu_device *adev)
adev->doorbell_index.last_non_cp;
}
- kgd2kfd_device_init(adev->kfd.dev, adev_to_drm(adev), &gpu_resources);
+ adev->kfd.init_complete = kgd2kfd_device_init(adev->kfd.dev,
+ &gpu_resources);
+
+ amdgpu_amdkfd_total_mem_size += adev->gmc.real_vram_size;
+
+ INIT_WORK(&adev->kfd.reset_work, amdgpu_amdkfd_reset_work);
}
}
-void amdgpu_amdkfd_device_fini(struct amdgpu_device *adev)
+void amdgpu_amdkfd_device_fini_sw(struct amdgpu_device *adev)
{
if (adev->kfd.dev) {
kgd2kfd_device_exit(adev->kfd.dev);
adev->kfd.dev = NULL;
+ amdgpu_amdkfd_total_mem_size -= adev->gmc.real_vram_size;
}
}
@@ -184,28 +248,53 @@ void amdgpu_amdkfd_interrupt(struct amdgpu_device *adev,
kgd2kfd_interrupt(adev->kfd.dev, ih_ring_entry);
}
-void amdgpu_amdkfd_suspend(struct amdgpu_device *adev, bool run_pm)
+void amdgpu_amdkfd_suspend(struct amdgpu_device *adev, bool suspend_proc)
+{
+ if (adev->kfd.dev) {
+ if (adev->in_s0ix)
+ kgd2kfd_stop_sched_all_nodes(adev->kfd.dev);
+ else
+ kgd2kfd_suspend(adev->kfd.dev, suspend_proc);
+ }
+}
+
+int amdgpu_amdkfd_resume(struct amdgpu_device *adev, bool resume_proc)
+{
+ int r = 0;
+
+ if (adev->kfd.dev) {
+ if (adev->in_s0ix)
+ r = kgd2kfd_start_sched_all_nodes(adev->kfd.dev);
+ else
+ r = kgd2kfd_resume(adev->kfd.dev, resume_proc);
+ }
+
+ return r;
+}
+
+void amdgpu_amdkfd_suspend_process(struct amdgpu_device *adev)
{
if (adev->kfd.dev)
- kgd2kfd_suspend(adev->kfd.dev, run_pm);
+ kgd2kfd_suspend_process(adev->kfd.dev);
}
-int amdgpu_amdkfd_resume(struct amdgpu_device *adev, bool run_pm)
+int amdgpu_amdkfd_resume_process(struct amdgpu_device *adev)
{
int r = 0;
if (adev->kfd.dev)
- r = kgd2kfd_resume(adev->kfd.dev, run_pm);
+ r = kgd2kfd_resume_process(adev->kfd.dev);
return r;
}
-int amdgpu_amdkfd_pre_reset(struct amdgpu_device *adev)
+int amdgpu_amdkfd_pre_reset(struct amdgpu_device *adev,
+ struct amdgpu_reset_context *reset_context)
{
int r = 0;
if (adev->kfd.dev)
- r = kgd2kfd_pre_reset(adev->kfd.dev);
+ r = kgd2kfd_pre_reset(adev->kfd.dev, reset_context);
return r;
}
@@ -220,19 +309,17 @@ int amdgpu_amdkfd_post_reset(struct amdgpu_device *adev)
return r;
}
-void amdgpu_amdkfd_gpu_reset(struct kgd_dev *kgd)
+void amdgpu_amdkfd_gpu_reset(struct amdgpu_device *adev)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)kgd;
-
if (amdgpu_device_should_recover_gpu(adev))
- amdgpu_device_gpu_recover(adev, NULL);
+ amdgpu_reset_domain_schedule(adev->reset_domain,
+ &adev->kfd.reset_work);
}
-int amdgpu_amdkfd_alloc_gtt_mem(struct kgd_dev *kgd, size_t size,
+int amdgpu_amdkfd_alloc_gtt_mem(struct amdgpu_device *adev, size_t size,
void **mem_obj, uint64_t *gpu_addr,
void **cpu_ptr, bool cp_mqd_gfx9)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)kgd;
struct amdgpu_bo *bo = NULL;
struct amdgpu_bo_param bp;
int r;
@@ -245,6 +332,7 @@ int amdgpu_amdkfd_alloc_gtt_mem(struct kgd_dev *kgd, size_t size,
bp.flags = AMDGPU_GEM_CREATE_CPU_GTT_USWC;
bp.type = ttm_bo_type_kernel;
bp.resv = NULL;
+ bp.bo_ptr_size = sizeof(struct amdgpu_bo);
if (cp_mqd_gfx9)
bp.flags |= AMDGPU_GEM_CREATE_CP_MQD_GFX9;
@@ -300,22 +388,25 @@ allocate_mem_reserve_bo_failed:
return r;
}
-void amdgpu_amdkfd_free_gtt_mem(struct kgd_dev *kgd, void *mem_obj)
+void amdgpu_amdkfd_free_gtt_mem(struct amdgpu_device *adev, void **mem_obj)
{
- struct amdgpu_bo *bo = (struct amdgpu_bo *) mem_obj;
+ struct amdgpu_bo **bo = (struct amdgpu_bo **) mem_obj;
- amdgpu_bo_reserve(bo, true);
- amdgpu_bo_kunmap(bo);
- amdgpu_bo_unpin(bo);
- amdgpu_bo_unreserve(bo);
- amdgpu_bo_unref(&(bo));
+ if (!bo || !*bo)
+ return;
+
+ (void)amdgpu_bo_reserve(*bo, true);
+ amdgpu_bo_kunmap(*bo);
+ amdgpu_bo_unpin(*bo);
+ amdgpu_bo_unreserve(*bo);
+ amdgpu_bo_unref(bo);
}
-int amdgpu_amdkfd_alloc_gws(struct kgd_dev *kgd, size_t size,
+int amdgpu_amdkfd_alloc_gws(struct amdgpu_device *adev, size_t size,
void **mem_obj)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)kgd;
struct amdgpu_bo *bo = NULL;
+ struct amdgpu_bo_user *ubo;
struct amdgpu_bo_param bp;
int r;
@@ -326,30 +417,30 @@ int amdgpu_amdkfd_alloc_gws(struct kgd_dev *kgd, size_t size,
bp.flags = AMDGPU_GEM_CREATE_NO_CPU_ACCESS;
bp.type = ttm_bo_type_device;
bp.resv = NULL;
+ bp.bo_ptr_size = sizeof(struct amdgpu_bo);
- r = amdgpu_bo_create(adev, &bp, &bo);
+ r = amdgpu_bo_create_user(adev, &bp, &ubo);
if (r) {
dev_err(adev->dev,
"failed to allocate gws BO for amdkfd (%d)\n", r);
return r;
}
+ bo = &ubo->bo;
*mem_obj = bo;
return 0;
}
-void amdgpu_amdkfd_free_gws(struct kgd_dev *kgd, void *mem_obj)
+void amdgpu_amdkfd_free_gws(struct amdgpu_device *adev, void *mem_obj)
{
struct amdgpu_bo *bo = (struct amdgpu_bo *)mem_obj;
amdgpu_bo_unref(&bo);
}
-uint32_t amdgpu_amdkfd_get_fw_version(struct kgd_dev *kgd,
+uint32_t amdgpu_amdkfd_get_fw_version(struct amdgpu_device *adev,
enum kgd_engine_type type)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)kgd;
-
switch (type) {
case KGD_ENGINE_PFP:
return adev->gfx.pfp_fw_version;
@@ -382,17 +473,27 @@ uint32_t amdgpu_amdkfd_get_fw_version(struct kgd_dev *kgd,
return 0;
}
-void amdgpu_amdkfd_get_local_mem_info(struct kgd_dev *kgd,
- struct kfd_local_mem_info *mem_info)
+void amdgpu_amdkfd_get_local_mem_info(struct amdgpu_device *adev,
+ struct kfd_local_mem_info *mem_info,
+ struct amdgpu_xcp *xcp)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)kgd;
-
memset(mem_info, 0, sizeof(*mem_info));
- mem_info->local_mem_size_public = adev->gmc.visible_vram_size;
- mem_info->local_mem_size_private = adev->gmc.real_vram_size -
+ if (xcp) {
+ if (adev->gmc.real_vram_size == adev->gmc.visible_vram_size)
+ mem_info->local_mem_size_public =
+ KFD_XCP_MEMORY_SIZE(adev, xcp->id);
+ else
+ mem_info->local_mem_size_private =
+ KFD_XCP_MEMORY_SIZE(adev, xcp->id);
+ } else if (adev->apu_prefer_gtt) {
+ mem_info->local_mem_size_public = (ttm_tt_pages_limit() << PAGE_SHIFT);
+ mem_info->local_mem_size_private = 0;
+ } else {
+ mem_info->local_mem_size_public = adev->gmc.visible_vram_size;
+ mem_info->local_mem_size_private = adev->gmc.real_vram_size -
adev->gmc.visible_vram_size;
-
+ }
mem_info->vram_width = adev->gmc.vram_width;
pr_debug("Address base: %pap public 0x%llx private 0x%llx\n",
@@ -400,9 +501,7 @@ void amdgpu_amdkfd_get_local_mem_info(struct kgd_dev *kgd,
mem_info->local_mem_size_public,
mem_info->local_mem_size_private);
- if (amdgpu_sriov_vf(adev))
- mem_info->mem_clk_max = adev->clock.default_mclk / 100;
- else if (adev->pm.dpm_enabled) {
+ if (adev->pm.dpm_enabled) {
if (amdgpu_emu_mode == 1)
mem_info->mem_clk_max = 0;
else
@@ -411,58 +510,28 @@ void amdgpu_amdkfd_get_local_mem_info(struct kgd_dev *kgd,
mem_info->mem_clk_max = 100;
}
-uint64_t amdgpu_amdkfd_get_gpu_clock_counter(struct kgd_dev *kgd)
+uint64_t amdgpu_amdkfd_get_gpu_clock_counter(struct amdgpu_device *adev)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)kgd;
-
if (adev->gfx.funcs->get_gpu_clock_counter)
return adev->gfx.funcs->get_gpu_clock_counter(adev);
return 0;
}
-uint32_t amdgpu_amdkfd_get_max_engine_clock_in_mhz(struct kgd_dev *kgd)
+uint32_t amdgpu_amdkfd_get_max_engine_clock_in_mhz(struct amdgpu_device *adev)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)kgd;
-
/* the sclk is in quantas of 10kHz */
- if (amdgpu_sriov_vf(adev))
- return adev->clock.default_sclk / 100;
- else if (adev->pm.dpm_enabled)
+ if (adev->pm.dpm_enabled)
return amdgpu_dpm_get_sclk(adev, false) / 100;
else
return 100;
}
-void amdgpu_amdkfd_get_cu_info(struct kgd_dev *kgd, struct kfd_cu_info *cu_info)
-{
- struct amdgpu_device *adev = (struct amdgpu_device *)kgd;
- struct amdgpu_cu_info acu_info = adev->gfx.cu_info;
-
- memset(cu_info, 0, sizeof(*cu_info));
- if (sizeof(cu_info->cu_bitmap) != sizeof(acu_info.bitmap))
- return;
-
- cu_info->cu_active_number = acu_info.number;
- cu_info->cu_ao_mask = acu_info.ao_cu_mask;
- memcpy(&cu_info->cu_bitmap[0], &acu_info.bitmap[0],
- sizeof(acu_info.bitmap));
- cu_info->num_shader_engines = adev->gfx.config.max_shader_engines;
- cu_info->num_shader_arrays_per_engine = adev->gfx.config.max_sh_per_se;
- cu_info->num_cu_per_sh = adev->gfx.config.max_cu_per_sh;
- cu_info->simd_per_cu = acu_info.simd_per_cu;
- cu_info->max_waves_per_simd = acu_info.max_waves_per_simd;
- cu_info->wave_front_size = acu_info.wave_front_size;
- cu_info->max_scratch_slots_per_cu = acu_info.max_scratch_slots_per_cu;
- cu_info->lds_size = acu_info.lds_size;
-}
-
-int amdgpu_amdkfd_get_dmabuf_info(struct kgd_dev *kgd, int dma_buf_fd,
- struct kgd_dev **dma_buf_kgd,
+int amdgpu_amdkfd_get_dmabuf_info(struct amdgpu_device *adev, int dma_buf_fd,
+ struct amdgpu_device **dmabuf_adev,
uint64_t *bo_size, void *metadata_buffer,
size_t buffer_size, uint32_t *metadata_size,
- uint32_t *flags)
+ uint32_t *flags, int8_t *xcp_id)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)kgd;
struct dma_buf *dma_buf;
struct drm_gem_object *obj;
struct amdgpu_bo *bo;
@@ -490,12 +559,10 @@ int amdgpu_amdkfd_get_dmabuf_info(struct kgd_dev *kgd, int dma_buf_fd,
goto out_put;
r = 0;
- if (dma_buf_kgd)
- *dma_buf_kgd = (struct kgd_dev *)adev;
+ if (dmabuf_adev)
+ *dmabuf_adev = adev;
if (bo_size)
*bo_size = amdgpu_bo_size(bo);
- if (metadata_size)
- *metadata_size = bo->metadata_size;
if (metadata_buffer)
r = amdgpu_bo_get_metadata(bo, metadata_buffer, buffer_size,
metadata_size, &metadata_flags);
@@ -507,82 +574,76 @@ int amdgpu_amdkfd_get_dmabuf_info(struct kgd_dev *kgd, int dma_buf_fd,
if (bo->flags & AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED)
*flags |= KFD_IOC_ALLOC_MEM_FLAGS_PUBLIC;
}
+ if (xcp_id)
+ *xcp_id = bo->xcp_id;
out_put:
dma_buf_put(dma_buf);
return r;
}
-uint64_t amdgpu_amdkfd_get_vram_usage(struct kgd_dev *kgd)
-{
- struct amdgpu_device *adev = (struct amdgpu_device *)kgd;
- struct ttm_resource_manager *vram_man = ttm_manager_type(&adev->mman.bdev, TTM_PL_VRAM);
-
- return amdgpu_vram_mgr_usage(vram_man);
-}
-
-uint64_t amdgpu_amdkfd_get_hive_id(struct kgd_dev *kgd)
-{
- struct amdgpu_device *adev = (struct amdgpu_device *)kgd;
-
- return adev->gmc.xgmi.hive_id;
-}
-
-uint64_t amdgpu_amdkfd_get_unique_id(struct kgd_dev *kgd)
-{
- struct amdgpu_device *adev = (struct amdgpu_device *)kgd;
-
- return adev->unique_id;
-}
-
-uint8_t amdgpu_amdkfd_get_xgmi_hops_count(struct kgd_dev *dst, struct kgd_dev *src)
+int amdgpu_amdkfd_get_pcie_bandwidth_mbytes(struct amdgpu_device *adev, bool is_min)
{
- struct amdgpu_device *peer_adev = (struct amdgpu_device *)src;
- struct amdgpu_device *adev = (struct amdgpu_device *)dst;
- int ret = amdgpu_xgmi_get_hops_count(adev, peer_adev);
+ int num_lanes_shift = (is_min ? ffs(adev->pm.pcie_mlw_mask) :
+ fls(adev->pm.pcie_mlw_mask)) - 1;
+ int gen_speed_shift = (is_min ? ffs(adev->pm.pcie_gen_mask &
+ CAIL_PCIE_LINK_SPEED_SUPPORT_MASK) :
+ fls(adev->pm.pcie_gen_mask &
+ CAIL_PCIE_LINK_SPEED_SUPPORT_MASK)) - 1;
+ uint32_t num_lanes_mask = 1 << num_lanes_shift;
+ uint32_t gen_speed_mask = 1 << gen_speed_shift;
+ int num_lanes_factor = 0, gen_speed_mbits_factor = 0;
- if (ret < 0) {
- DRM_ERROR("amdgpu: failed to get xgmi hops count between node %d and %d. ret = %d\n",
- adev->gmc.xgmi.physical_node_id,
- peer_adev->gmc.xgmi.physical_node_id, ret);
- ret = 0;
+ switch (num_lanes_mask) {
+ case CAIL_PCIE_LINK_WIDTH_SUPPORT_X1:
+ num_lanes_factor = 1;
+ break;
+ case CAIL_PCIE_LINK_WIDTH_SUPPORT_X2:
+ num_lanes_factor = 2;
+ break;
+ case CAIL_PCIE_LINK_WIDTH_SUPPORT_X4:
+ num_lanes_factor = 4;
+ break;
+ case CAIL_PCIE_LINK_WIDTH_SUPPORT_X8:
+ num_lanes_factor = 8;
+ break;
+ case CAIL_PCIE_LINK_WIDTH_SUPPORT_X12:
+ num_lanes_factor = 12;
+ break;
+ case CAIL_PCIE_LINK_WIDTH_SUPPORT_X16:
+ num_lanes_factor = 16;
+ break;
+ case CAIL_PCIE_LINK_WIDTH_SUPPORT_X32:
+ num_lanes_factor = 32;
+ break;
}
- return (uint8_t)ret;
-}
-
-uint64_t amdgpu_amdkfd_get_mmio_remap_phys_addr(struct kgd_dev *kgd)
-{
- struct amdgpu_device *adev = (struct amdgpu_device *)kgd;
-
- return adev->rmmio_remap.bus_addr;
-}
-uint32_t amdgpu_amdkfd_get_num_gws(struct kgd_dev *kgd)
-{
- struct amdgpu_device *adev = (struct amdgpu_device *)kgd;
-
- return adev->gds.gws_size;
-}
-
-uint32_t amdgpu_amdkfd_get_asic_rev_id(struct kgd_dev *kgd)
-{
- struct amdgpu_device *adev = (struct amdgpu_device *)kgd;
-
- return adev->rev_id;
-}
-
-int amdgpu_amdkfd_get_noretry(struct kgd_dev *kgd)
-{
- struct amdgpu_device *adev = (struct amdgpu_device *)kgd;
+ switch (gen_speed_mask) {
+ case CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1:
+ gen_speed_mbits_factor = 2500;
+ break;
+ case CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2:
+ gen_speed_mbits_factor = 5000;
+ break;
+ case CAIL_PCIE_LINK_SPEED_SUPPORT_GEN3:
+ gen_speed_mbits_factor = 8000;
+ break;
+ case CAIL_PCIE_LINK_SPEED_SUPPORT_GEN4:
+ gen_speed_mbits_factor = 16000;
+ break;
+ case CAIL_PCIE_LINK_SPEED_SUPPORT_GEN5:
+ gen_speed_mbits_factor = 32000;
+ break;
+ }
- return adev->gmc.noretry;
+ return (num_lanes_factor * gen_speed_mbits_factor)/BITS_PER_BYTE;
}
-int amdgpu_amdkfd_submit_ib(struct kgd_dev *kgd, enum kgd_engine_type engine,
+int amdgpu_amdkfd_submit_ib(struct amdgpu_device *adev,
+ enum kgd_engine_type engine,
uint32_t vmid, uint64_t gpu_addr,
uint32_t *ib_cmd, uint32_t ib_len)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)kgd;
struct amdgpu_job *job;
struct amdgpu_ib *ib;
struct amdgpu_ring *ring;
@@ -605,7 +666,7 @@ int amdgpu_amdkfd_submit_ib(struct kgd_dev *kgd, enum kgd_engine_type engine,
goto err;
}
- ret = amdgpu_job_alloc(adev, 1, &job, NULL);
+ ret = amdgpu_job_alloc(adev, NULL, NULL, NULL, 1, &job, 0);
if (ret)
goto err;
@@ -617,6 +678,7 @@ int amdgpu_amdkfd_submit_ib(struct kgd_dev *kgd, enum kgd_engine_type engine,
ib->length_dw = ib_len;
/* This works for NO_HWS. TODO: need to handle without knowing VMID */
job->vmid = vmid;
+ job->num_ibs = 1;
ret = amdgpu_ib_schedule(ring, 1, ib, job, &f);
@@ -625,25 +687,32 @@ int amdgpu_amdkfd_submit_ib(struct kgd_dev *kgd, enum kgd_engine_type engine,
goto err_ib_sched;
}
+ /* Drop the initial kref_init count (see drm_sched_main as example) */
+ dma_fence_put(f);
ret = dma_fence_wait(f, false);
err_ib_sched:
- dma_fence_put(f);
amdgpu_job_free(job);
err:
return ret;
}
-void amdgpu_amdkfd_set_compute_idle(struct kgd_dev *kgd, bool idle)
+void amdgpu_amdkfd_set_compute_idle(struct amdgpu_device *adev, bool idle)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)kgd;
-
- /* Temp workaround to fix the soft hang observed in certain compute
- * applications if GFXOFF is enabled.
- */
- if (adev->asic_type == CHIP_SIENNA_CICHLID) {
+ enum amd_powergating_state state = idle ? AMD_PG_STATE_GATE : AMD_PG_STATE_UNGATE;
+ if ((IP_VERSION_MAJ(amdgpu_ip_version(adev, GC_HWIP, 0)) == 11 &&
+ ((adev->mes.kiq_version & AMDGPU_MES_VERSION_MASK) <= 64)) ||
+ (IP_VERSION_MAJ(amdgpu_ip_version(adev, GC_HWIP, 0)) == 12)) {
pr_debug("GFXOFF is %s\n", idle ? "enabled" : "disabled");
amdgpu_gfx_off_ctrl(adev, idle);
+ } else if ((IP_VERSION_MAJ(amdgpu_ip_version(adev, GC_HWIP, 0)) == 9) &&
+ (adev->flags & AMD_IS_APU)) {
+ /* Disable GFXOFF and PG. Temporary workaround
+ * to fix some compute applications issue on GFX9.
+ */
+ struct amdgpu_ip_block *gfx_block = amdgpu_device_ip_get_ip_block(adev, AMD_IP_BLOCK_TYPE_GFX);
+ if (gfx_block != NULL)
+ gfx_block->version->funcs->set_powergating_state((void *)gfx_block, state);
}
amdgpu_dpm_switch_power_profile(adev,
PP_SMC_POWER_PROFILE_COMPUTE,
@@ -658,37 +727,186 @@ bool amdgpu_amdkfd_is_kfd_vmid(struct amdgpu_device *adev, u32 vmid)
return false;
}
-int amdgpu_amdkfd_flush_gpu_tlb_vmid(struct kgd_dev *kgd, uint16_t vmid)
+bool amdgpu_amdkfd_have_atomics_support(struct amdgpu_device *adev)
+{
+ return adev->have_atomics_support;
+}
+
+void amdgpu_amdkfd_debug_mem_fence(struct amdgpu_device *adev)
+{
+ amdgpu_device_flush_hdp(adev, NULL);
+}
+
+bool amdgpu_amdkfd_is_fed(struct amdgpu_device *adev)
+{
+ return amdgpu_ras_get_fed_status(adev);
+}
+
+void amdgpu_amdkfd_ras_pasid_poison_consumption_handler(struct amdgpu_device *adev,
+ enum amdgpu_ras_block block, uint16_t pasid,
+ pasid_notify pasid_fn, void *data, uint32_t reset)
+{
+ amdgpu_umc_pasid_poison_handler(adev, block, pasid, pasid_fn, data, reset);
+}
+
+void amdgpu_amdkfd_ras_poison_consumption_handler(struct amdgpu_device *adev,
+ enum amdgpu_ras_block block, uint32_t reset)
+{
+ amdgpu_umc_pasid_poison_handler(adev, block, 0, NULL, NULL, reset);
+}
+
+int amdgpu_amdkfd_send_close_event_drain_irq(struct amdgpu_device *adev,
+ uint32_t *payload)
+{
+ int ret;
+
+ /* Device or IH ring is not ready so bail. */
+ ret = amdgpu_ih_wait_on_checkpoint_process_ts(adev, &adev->irq.ih);
+ if (ret)
+ return ret;
+
+ /* Send payload to fence KFD interrupts */
+ amdgpu_amdkfd_interrupt(adev, payload);
+
+ return 0;
+}
+
+int amdgpu_amdkfd_check_and_lock_kfd(struct amdgpu_device *adev)
+{
+ return kgd2kfd_check_and_lock_kfd(adev->kfd.dev);
+}
+
+void amdgpu_amdkfd_unlock_kfd(struct amdgpu_device *adev)
+{
+ kgd2kfd_unlock_kfd(adev->kfd.dev);
+}
+
+
+u64 amdgpu_amdkfd_xcp_memory_size(struct amdgpu_device *adev, int xcp_id)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)kgd;
+ s8 mem_id = KFD_XCP_MEM_ID(adev, xcp_id);
+ u64 tmp;
- if (adev->family == AMDGPU_FAMILY_AI) {
- int i;
+ if (adev->gmc.num_mem_partitions && xcp_id >= 0 && mem_id >= 0) {
+ if (adev->gmc.is_app_apu && adev->gmc.num_mem_partitions == 1) {
+ /* In NPS1 mode, we should restrict the vram reporting
+ * tied to the ttm_pages_limit which is 1/2 of the system
+ * memory. For other partition modes, the HBM is uniformly
+ * divided already per numa node reported. If user wants to
+ * go beyond the default ttm limit and maximize the ROCm
+ * allocations, they can go up to max ttm and sysmem limits.
+ */
- for (i = 0; i < adev->num_vmhubs; i++)
- amdgpu_gmc_flush_gpu_tlb(adev, vmid, i, 0);
+ tmp = (ttm_tt_pages_limit() << PAGE_SHIFT) / num_online_nodes();
+ } else {
+ tmp = adev->gmc.mem_partitions[mem_id].size;
+ }
+ do_div(tmp, adev->xcp_mgr->num_xcp_per_mem_partition);
+ return ALIGN_DOWN(tmp, PAGE_SIZE);
+ } else if (adev->apu_prefer_gtt) {
+ return (ttm_tt_pages_limit() << PAGE_SHIFT);
} else {
- amdgpu_gmc_flush_gpu_tlb(adev, vmid, AMDGPU_GFXHUB_0, 0);
+ return adev->gmc.real_vram_size;
}
+}
- return 0;
+int amdgpu_amdkfd_unmap_hiq(struct amdgpu_device *adev, u32 doorbell_off,
+ u32 inst)
+{
+ struct amdgpu_kiq *kiq = &adev->gfx.kiq[inst];
+ struct amdgpu_ring *kiq_ring = &kiq->ring;
+ struct amdgpu_ring_funcs *ring_funcs;
+ struct amdgpu_ring *ring;
+ int r = 0;
+
+ if (!kiq->pmf || !kiq->pmf->kiq_unmap_queues)
+ return -EINVAL;
+
+ if (!kiq_ring->sched.ready || amdgpu_in_reset(adev))
+ return 0;
+
+ ring_funcs = kzalloc(sizeof(*ring_funcs), GFP_KERNEL);
+ if (!ring_funcs)
+ return -ENOMEM;
+
+ ring = kzalloc(sizeof(*ring), GFP_KERNEL);
+ if (!ring) {
+ r = -ENOMEM;
+ goto free_ring_funcs;
+ }
+
+ ring_funcs->type = AMDGPU_RING_TYPE_COMPUTE;
+ ring->doorbell_index = doorbell_off;
+ ring->funcs = ring_funcs;
+
+ spin_lock(&kiq->ring_lock);
+
+ if (amdgpu_ring_alloc(kiq_ring, kiq->pmf->unmap_queues_size)) {
+ spin_unlock(&kiq->ring_lock);
+ r = -ENOMEM;
+ goto free_ring;
+ }
+
+ kiq->pmf->kiq_unmap_queues(kiq_ring, ring, RESET_QUEUES, 0, 0);
+
+ /* Submit unmap queue packet */
+ amdgpu_ring_commit(kiq_ring);
+ /*
+ * Ring test will do a basic scratch register change check. Just run
+ * this to ensure that unmap queues that is submitted before got
+ * processed successfully before returning.
+ */
+ r = amdgpu_ring_test_helper(kiq_ring);
+
+ spin_unlock(&kiq->ring_lock);
+
+free_ring:
+ kfree(ring);
+
+free_ring_funcs:
+ kfree(ring_funcs);
+
+ return r;
}
-int amdgpu_amdkfd_flush_gpu_tlb_pasid(struct kgd_dev *kgd, uint16_t pasid)
+/* Stop scheduling on KFD */
+int amdgpu_amdkfd_stop_sched(struct amdgpu_device *adev, uint32_t node_id)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)kgd;
- const uint32_t flush_type = 0;
- bool all_hub = false;
+ if (!adev->kfd.init_complete)
+ return 0;
- if (adev->family == AMDGPU_FAMILY_AI)
- all_hub = true;
+ return kgd2kfd_stop_sched(adev->kfd.dev, node_id);
+}
- return amdgpu_gmc_flush_gpu_tlb_pasid(adev, pasid, flush_type, all_hub);
+/* Start scheduling on KFD */
+int amdgpu_amdkfd_start_sched(struct amdgpu_device *adev, uint32_t node_id)
+{
+ if (!adev->kfd.init_complete)
+ return 0;
+
+ return kgd2kfd_start_sched(adev->kfd.dev, node_id);
}
-bool amdgpu_amdkfd_have_atomics_support(struct kgd_dev *kgd)
+/* check if there are KFD queues active */
+bool amdgpu_amdkfd_compute_active(struct amdgpu_device *adev, uint32_t node_id)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)kgd;
+ if (!adev->kfd.init_complete)
+ return false;
- return adev->have_atomics_support;
+ return kgd2kfd_compute_active(adev->kfd.dev, node_id);
+}
+
+/* Config CGTT_SQ_CLK_CTRL */
+int amdgpu_amdkfd_config_sq_perfmon(struct amdgpu_device *adev, uint32_t xcp_id,
+ bool core_override_enable, bool reg_override_enable, bool perfmon_override_enable)
+{
+ int r;
+
+ if (!adev->kfd.init_complete)
+ return 0;
+
+ r = psp_config_sq_perfmon(&adev->psp, xcp_id, core_override_enable,
+ reg_override_enable, perfmon_override_enable);
+
+ return r;
}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
index a81d9cacf9b8..8bdfcde2029b 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
@@ -25,24 +25,44 @@
#ifndef AMDGPU_AMDKFD_H_INCLUDED
#define AMDGPU_AMDKFD_H_INCLUDED
+#include <linux/list.h>
#include <linux/types.h>
#include <linux/mm.h>
#include <linux/kthread.h>
#include <linux/workqueue.h>
+#include <linux/mmu_notifier.h>
+#include <linux/memremap.h>
#include <kgd_kfd_interface.h>
-#include <drm/ttm/ttm_execbuf_util.h>
+#include <drm/drm_client.h>
#include "amdgpu_sync.h"
#include "amdgpu_vm.h"
+#include "amdgpu_xcp.h"
extern uint64_t amdgpu_amdkfd_total_mem_size;
+enum TLB_FLUSH_TYPE {
+ TLB_FLUSH_LEGACY = 0,
+ TLB_FLUSH_LIGHTWEIGHT,
+ TLB_FLUSH_HEAVYWEIGHT
+};
+
struct amdgpu_device;
+struct kfd_process_device;
+struct amdgpu_reset_context;
+
+enum kfd_mem_attachment_type {
+ KFD_MEM_ATT_SHARED, /* Share kgd_mem->bo or another attachment's */
+ KFD_MEM_ATT_USERPTR, /* SG bo to DMA map pages from a userptr bo */
+ KFD_MEM_ATT_DMABUF, /* DMAbuf to DMA map TTM BOs */
+ KFD_MEM_ATT_SG /* Tag to DMA map SG BOs */
+};
-struct kfd_bo_va_list {
- struct list_head bo_list;
- struct amdgpu_bo_va *bo_va;
- void *kgd_dev;
+struct kfd_mem_attachment {
+ struct list_head list;
+ enum kfd_mem_attachment_type type;
bool is_mapped;
+ struct amdgpu_bo_va *bo_va;
+ struct amdgpu_device *adev;
uint64_t va;
uint64_t pte_flags;
};
@@ -50,21 +70,23 @@ struct kfd_bo_va_list {
struct kgd_mem {
struct mutex lock;
struct amdgpu_bo *bo;
- struct list_head bo_va_list;
+ struct dma_buf *dmabuf;
+ struct amdgpu_hmm_range *range;
+ struct list_head attachments;
/* protected by amdkfd_process_info.lock */
- struct ttm_validate_buffer validate_list;
- struct ttm_validate_buffer resv_list;
+ struct list_head validate_list;
uint32_t domain;
unsigned int mapped_to_gpu_memory;
uint64_t va;
uint32_t alloc_flags;
- atomic_t invalid;
+ uint32_t invalid;
struct amdkfd_process_info *process_info;
struct amdgpu_sync sync;
+ uint32_t gem_handle;
bool aql_queue;
bool is_imported;
};
@@ -75,11 +97,23 @@ struct amdgpu_amdkfd_fence {
struct mm_struct *mm;
spinlock_t lock;
char timeline_name[TASK_COMM_LEN];
+ struct svm_range_bo *svm_bo;
};
struct amdgpu_kfd_dev {
struct kfd_dev *dev;
- uint64_t vram_used;
+ int64_t vram_used[MAX_XCP];
+ uint64_t vram_used_aligned[MAX_XCP];
+ bool init_complete;
+ struct work_struct reset_work;
+
+ /* Client for KFD BO GEM handle allocations */
+ struct drm_client_dev client;
+
+ /* HMM page migration MEMORY_DEVICE_PRIVATE mapping
+ * Must be last --ends in a flexible-array member.
+ */
+ struct dev_pagemap pgmap;
};
enum kgd_engine_type {
@@ -112,47 +146,63 @@ struct amdkfd_process_info {
struct amdgpu_amdkfd_fence *eviction_fence;
/* MMU-notifier related fields */
- atomic_t evicted_bos;
+ struct mutex notifier_lock;
+ uint32_t evicted_bos;
struct delayed_work restore_userptr_work;
struct pid *pid;
+ bool block_mmu_notifications;
};
int amdgpu_amdkfd_init(void);
void amdgpu_amdkfd_fini(void);
-void amdgpu_amdkfd_suspend(struct amdgpu_device *adev, bool run_pm);
-int amdgpu_amdkfd_resume(struct amdgpu_device *adev, bool run_pm);
+void amdgpu_amdkfd_suspend(struct amdgpu_device *adev, bool suspend_proc);
+int amdgpu_amdkfd_resume(struct amdgpu_device *adev, bool resume_proc);
+void amdgpu_amdkfd_suspend_process(struct amdgpu_device *adev);
+int amdgpu_amdkfd_resume_process(struct amdgpu_device *adev);
void amdgpu_amdkfd_interrupt(struct amdgpu_device *adev,
const void *ih_ring_entry);
void amdgpu_amdkfd_device_probe(struct amdgpu_device *adev);
void amdgpu_amdkfd_device_init(struct amdgpu_device *adev);
-void amdgpu_amdkfd_device_fini(struct amdgpu_device *adev);
-int amdgpu_amdkfd_submit_ib(struct kgd_dev *kgd, enum kgd_engine_type engine,
+void amdgpu_amdkfd_device_fini_sw(struct amdgpu_device *adev);
+int amdgpu_amdkfd_check_and_lock_kfd(struct amdgpu_device *adev);
+void amdgpu_amdkfd_unlock_kfd(struct amdgpu_device *adev);
+int amdgpu_amdkfd_submit_ib(struct amdgpu_device *adev,
+ enum kgd_engine_type engine,
uint32_t vmid, uint64_t gpu_addr,
uint32_t *ib_cmd, uint32_t ib_len);
-void amdgpu_amdkfd_set_compute_idle(struct kgd_dev *kgd, bool idle);
-bool amdgpu_amdkfd_have_atomics_support(struct kgd_dev *kgd);
-int amdgpu_amdkfd_flush_gpu_tlb_vmid(struct kgd_dev *kgd, uint16_t vmid);
-int amdgpu_amdkfd_flush_gpu_tlb_pasid(struct kgd_dev *kgd, uint16_t pasid);
+void amdgpu_amdkfd_set_compute_idle(struct amdgpu_device *adev, bool idle);
+bool amdgpu_amdkfd_have_atomics_support(struct amdgpu_device *adev);
bool amdgpu_amdkfd_is_kfd_vmid(struct amdgpu_device *adev, u32 vmid);
-int amdgpu_amdkfd_pre_reset(struct amdgpu_device *adev);
+int amdgpu_amdkfd_pre_reset(struct amdgpu_device *adev,
+ struct amdgpu_reset_context *reset_context);
int amdgpu_amdkfd_post_reset(struct amdgpu_device *adev);
-void amdgpu_amdkfd_gpu_reset(struct kgd_dev *kgd);
+void amdgpu_amdkfd_gpu_reset(struct amdgpu_device *adev);
int amdgpu_queue_mask_bit_to_set_resource_bit(struct amdgpu_device *adev,
int queue_bit);
struct amdgpu_amdkfd_fence *amdgpu_amdkfd_fence_create(u64 context,
- struct mm_struct *mm);
+ struct mm_struct *mm,
+ struct svm_range_bo *svm_bo);
+
+int amdgpu_amdkfd_drm_client_create(struct amdgpu_device *adev);
+#if defined(CONFIG_DEBUG_FS)
+int kfd_debugfs_kfd_mem_limits(struct seq_file *m, void *data);
+#endif
#if IS_ENABLED(CONFIG_HSA_AMD)
bool amdkfd_fence_check_mm(struct dma_fence *f, struct mm_struct *mm);
struct amdgpu_amdkfd_fence *to_amdgpu_amdkfd_fence(struct dma_fence *f);
-int amdgpu_amdkfd_remove_fence_on_pt_pd_bos(struct amdgpu_bo *bo);
-int amdgpu_amdkfd_evict_userptr(struct kgd_mem *mem, struct mm_struct *mm);
+void amdgpu_amdkfd_remove_all_eviction_fences(struct amdgpu_bo *bo);
+int amdgpu_amdkfd_evict_userptr(struct mmu_interval_notifier *mni,
+ unsigned long cur_seq, struct kgd_mem *mem);
+int amdgpu_amdkfd_bo_validate_and_fence(struct amdgpu_bo *bo,
+ uint32_t domain,
+ struct dma_fence *fence);
#else
static inline
bool amdkfd_fence_check_mm(struct dma_fence *f, struct mm_struct *mm)
@@ -167,47 +217,58 @@ struct amdgpu_amdkfd_fence *to_amdgpu_amdkfd_fence(struct dma_fence *f)
}
static inline
-int amdgpu_amdkfd_remove_fence_on_pt_pd_bos(struct amdgpu_bo *bo)
+void amdgpu_amdkfd_remove_all_eviction_fences(struct amdgpu_bo *bo)
{
- return 0;
}
static inline
-int amdgpu_amdkfd_evict_userptr(struct kgd_mem *mem, struct mm_struct *mm)
+int amdgpu_amdkfd_evict_userptr(struct mmu_interval_notifier *mni,
+ unsigned long cur_seq, struct kgd_mem *mem)
+{
+ return 0;
+}
+static inline
+int amdgpu_amdkfd_bo_validate_and_fence(struct amdgpu_bo *bo,
+ uint32_t domain,
+ struct dma_fence *fence)
{
return 0;
}
#endif
/* Shared API */
-int amdgpu_amdkfd_alloc_gtt_mem(struct kgd_dev *kgd, size_t size,
+int amdgpu_amdkfd_alloc_gtt_mem(struct amdgpu_device *adev, size_t size,
void **mem_obj, uint64_t *gpu_addr,
void **cpu_ptr, bool mqd_gfx9);
-void amdgpu_amdkfd_free_gtt_mem(struct kgd_dev *kgd, void *mem_obj);
-int amdgpu_amdkfd_alloc_gws(struct kgd_dev *kgd, size_t size, void **mem_obj);
-void amdgpu_amdkfd_free_gws(struct kgd_dev *kgd, void *mem_obj);
+void amdgpu_amdkfd_free_gtt_mem(struct amdgpu_device *adev, void **mem_obj);
+int amdgpu_amdkfd_alloc_gws(struct amdgpu_device *adev, size_t size,
+ void **mem_obj);
+void amdgpu_amdkfd_free_gws(struct amdgpu_device *adev, void *mem_obj);
int amdgpu_amdkfd_add_gws_to_process(void *info, void *gws, struct kgd_mem **mem);
int amdgpu_amdkfd_remove_gws_from_process(void *info, void *mem);
-uint32_t amdgpu_amdkfd_get_fw_version(struct kgd_dev *kgd,
+uint32_t amdgpu_amdkfd_get_fw_version(struct amdgpu_device *adev,
enum kgd_engine_type type);
-void amdgpu_amdkfd_get_local_mem_info(struct kgd_dev *kgd,
- struct kfd_local_mem_info *mem_info);
-uint64_t amdgpu_amdkfd_get_gpu_clock_counter(struct kgd_dev *kgd);
-
-uint32_t amdgpu_amdkfd_get_max_engine_clock_in_mhz(struct kgd_dev *kgd);
-void amdgpu_amdkfd_get_cu_info(struct kgd_dev *kgd, struct kfd_cu_info *cu_info);
-int amdgpu_amdkfd_get_dmabuf_info(struct kgd_dev *kgd, int dma_buf_fd,
- struct kgd_dev **dmabuf_kgd,
+void amdgpu_amdkfd_get_local_mem_info(struct amdgpu_device *adev,
+ struct kfd_local_mem_info *mem_info,
+ struct amdgpu_xcp *xcp);
+uint64_t amdgpu_amdkfd_get_gpu_clock_counter(struct amdgpu_device *adev);
+
+uint32_t amdgpu_amdkfd_get_max_engine_clock_in_mhz(struct amdgpu_device *adev);
+int amdgpu_amdkfd_get_dmabuf_info(struct amdgpu_device *adev, int dma_buf_fd,
+ struct amdgpu_device **dmabuf_adev,
uint64_t *bo_size, void *metadata_buffer,
size_t buffer_size, uint32_t *metadata_size,
- uint32_t *flags);
-uint64_t amdgpu_amdkfd_get_vram_usage(struct kgd_dev *kgd);
-uint64_t amdgpu_amdkfd_get_hive_id(struct kgd_dev *kgd);
-uint64_t amdgpu_amdkfd_get_unique_id(struct kgd_dev *kgd);
-uint64_t amdgpu_amdkfd_get_mmio_remap_phys_addr(struct kgd_dev *kgd);
-uint32_t amdgpu_amdkfd_get_num_gws(struct kgd_dev *kgd);
-uint32_t amdgpu_amdkfd_get_asic_rev_id(struct kgd_dev *kgd);
-int amdgpu_amdkfd_get_noretry(struct kgd_dev *kgd);
-uint8_t amdgpu_amdkfd_get_xgmi_hops_count(struct kgd_dev *dst, struct kgd_dev *src);
+ uint32_t *flags, int8_t *xcp_id);
+int amdgpu_amdkfd_get_pcie_bandwidth_mbytes(struct amdgpu_device *adev, bool is_min);
+int amdgpu_amdkfd_send_close_event_drain_irq(struct amdgpu_device *adev,
+ uint32_t *payload);
+int amdgpu_amdkfd_unmap_hiq(struct amdgpu_device *adev, u32 doorbell_off,
+ u32 inst);
+int amdgpu_amdkfd_start_sched(struct amdgpu_device *adev, uint32_t node_id);
+int amdgpu_amdkfd_stop_sched(struct amdgpu_device *adev, uint32_t node_id);
+int amdgpu_amdkfd_config_sq_perfmon(struct amdgpu_device *adev, uint32_t xcp_id,
+ bool core_override_enable, bool reg_override_enable, bool perfmon_override_enable);
+bool amdgpu_amdkfd_compute_active(struct amdgpu_device *adev, uint32_t node_id);
+
/* Read user wptr from a specified user address space with page fault
* disabled. The memory must be pinned and mapped to the hardware when
@@ -233,46 +294,87 @@ uint8_t amdgpu_amdkfd_get_xgmi_hops_count(struct kgd_dev *dst, struct kgd_dev *s
})
/* GPUVM API */
-int amdgpu_amdkfd_gpuvm_create_process_vm(struct kgd_dev *kgd, u32 pasid,
- void **vm, void **process_info,
- struct dma_fence **ef);
-int amdgpu_amdkfd_gpuvm_acquire_process_vm(struct kgd_dev *kgd,
- struct file *filp, u32 pasid,
- void **vm, void **process_info,
+#define drm_priv_to_vm(drm_priv) \
+ (&((struct amdgpu_fpriv *) \
+ ((struct drm_file *)(drm_priv))->driver_priv)->vm)
+
+int amdgpu_amdkfd_gpuvm_acquire_process_vm(struct amdgpu_device *adev,
+ struct amdgpu_vm *avm,
+ void **process_info,
struct dma_fence **ef);
-void amdgpu_amdkfd_gpuvm_destroy_process_vm(struct kgd_dev *kgd, void *vm);
-void amdgpu_amdkfd_gpuvm_release_process_vm(struct kgd_dev *kgd, void *vm);
-uint64_t amdgpu_amdkfd_gpuvm_get_process_page_dir(void *vm);
+uint64_t amdgpu_amdkfd_gpuvm_get_process_page_dir(void *drm_priv);
+size_t amdgpu_amdkfd_get_available_memory(struct amdgpu_device *adev,
+ uint8_t xcp_id);
int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu(
- struct kgd_dev *kgd, uint64_t va, uint64_t size,
- void *vm, struct kgd_mem **mem,
- uint64_t *offset, uint32_t flags);
+ struct amdgpu_device *adev, uint64_t va, uint64_t size,
+ void *drm_priv, struct kgd_mem **mem,
+ uint64_t *offset, uint32_t flags, bool criu_resume);
int amdgpu_amdkfd_gpuvm_free_memory_of_gpu(
- struct kgd_dev *kgd, struct kgd_mem *mem, uint64_t *size);
-int amdgpu_amdkfd_gpuvm_map_memory_to_gpu(
- struct kgd_dev *kgd, struct kgd_mem *mem, void *vm);
+ struct amdgpu_device *adev, struct kgd_mem *mem, void *drm_priv,
+ uint64_t *size);
+int amdgpu_amdkfd_gpuvm_map_memory_to_gpu(struct amdgpu_device *adev,
+ struct kgd_mem *mem, void *drm_priv);
int amdgpu_amdkfd_gpuvm_unmap_memory_from_gpu(
- struct kgd_dev *kgd, struct kgd_mem *mem, void *vm);
+ struct amdgpu_device *adev, struct kgd_mem *mem, void *drm_priv);
+int amdgpu_amdkfd_gpuvm_dmaunmap_mem(struct kgd_mem *mem, void *drm_priv);
int amdgpu_amdkfd_gpuvm_sync_memory(
- struct kgd_dev *kgd, struct kgd_mem *mem, bool intr);
-int amdgpu_amdkfd_gpuvm_map_gtt_bo_to_kernel(struct kgd_dev *kgd,
- struct kgd_mem *mem, void **kptr, uint64_t *size);
+ struct amdgpu_device *adev, struct kgd_mem *mem, bool intr);
+int amdgpu_amdkfd_gpuvm_map_gtt_bo_to_kernel(struct kgd_mem *mem,
+ void **kptr, uint64_t *size);
+void amdgpu_amdkfd_gpuvm_unmap_gtt_bo_from_kernel(struct kgd_mem *mem);
+
+int amdgpu_amdkfd_map_gtt_bo_to_gart(struct amdgpu_bo *bo, struct amdgpu_bo **bo_gart);
+
int amdgpu_amdkfd_gpuvm_restore_process_bos(void *process_info,
- struct dma_fence **ef);
-int amdgpu_amdkfd_gpuvm_get_vm_fault_info(struct kgd_dev *kgd,
+ struct dma_fence __rcu **ef);
+int amdgpu_amdkfd_gpuvm_get_vm_fault_info(struct amdgpu_device *adev,
struct kfd_vm_fault_info *info);
-int amdgpu_amdkfd_gpuvm_import_dmabuf(struct kgd_dev *kgd,
- struct dma_buf *dmabuf,
- uint64_t va, void *vm,
- struct kgd_mem **mem, uint64_t *size,
- uint64_t *mmap_offset);
-int amdgpu_amdkfd_get_tile_config(struct kgd_dev *kgd,
+int amdgpu_amdkfd_gpuvm_import_dmabuf_fd(struct amdgpu_device *adev, int fd,
+ uint64_t va, void *drm_priv,
+ struct kgd_mem **mem, uint64_t *size,
+ uint64_t *mmap_offset);
+int amdgpu_amdkfd_gpuvm_export_dmabuf(struct kgd_mem *mem,
+ struct dma_buf **dmabuf);
+void amdgpu_amdkfd_debug_mem_fence(struct amdgpu_device *adev);
+int amdgpu_amdkfd_get_tile_config(struct amdgpu_device *adev,
struct tile_config *config);
+void amdgpu_amdkfd_ras_poison_consumption_handler(struct amdgpu_device *adev,
+ enum amdgpu_ras_block block, uint32_t reset);
+
+void amdgpu_amdkfd_ras_pasid_poison_consumption_handler(struct amdgpu_device *adev,
+ enum amdgpu_ras_block block, uint16_t pasid,
+ pasid_notify pasid_fn, void *data, uint32_t reset);
+
+bool amdgpu_amdkfd_is_fed(struct amdgpu_device *adev);
+bool amdgpu_amdkfd_bo_mapped_to_dev(void *drm_priv, struct kgd_mem *mem);
+void amdgpu_amdkfd_block_mmu_notifications(void *p);
+int amdgpu_amdkfd_criu_resume(void *p);
+int amdgpu_amdkfd_reserve_mem_limit(struct amdgpu_device *adev,
+ uint64_t size, u32 alloc_flag, int8_t xcp_id);
+void amdgpu_amdkfd_unreserve_mem_limit(struct amdgpu_device *adev,
+ uint64_t size, u32 alloc_flag, int8_t xcp_id);
+
+u64 amdgpu_amdkfd_xcp_memory_size(struct amdgpu_device *adev, int xcp_id);
+
+#define KFD_XCP_MEM_ID(adev, xcp_id) \
+ ((adev)->xcp_mgr && (xcp_id) >= 0 ?\
+ (adev)->xcp_mgr->xcp[(xcp_id)].mem_id : -1)
+
+#define KFD_XCP_MEMORY_SIZE(adev, xcp_id) amdgpu_amdkfd_xcp_memory_size((adev), (xcp_id))
+
+
#if IS_ENABLED(CONFIG_HSA_AMD)
void amdgpu_amdkfd_gpuvm_init_mem_limits(void);
void amdgpu_amdkfd_gpuvm_destroy_cb(struct amdgpu_device *adev,
struct amdgpu_vm *vm);
-void amdgpu_amdkfd_unreserve_memory_limit(struct amdgpu_bo *bo);
+
+/**
+ * @amdgpu_amdkfd_release_notify() - Notify KFD when GEM object is released
+ *
+ * Allows KFD to release its resources associated with the GEM object.
+ */
+void amdgpu_amdkfd_release_notify(struct amdgpu_bo *bo);
+void amdgpu_amdkfd_reserve_system_mem(uint64_t size);
#else
static inline
void amdgpu_amdkfd_gpuvm_init_mem_limits(void)
@@ -286,31 +388,53 @@ void amdgpu_amdkfd_gpuvm_destroy_cb(struct amdgpu_device *adev,
}
static inline
-void amdgpu_amdkfd_unreserve_memory_limit(struct amdgpu_bo *bo)
+void amdgpu_amdkfd_release_notify(struct amdgpu_bo *bo)
{
}
#endif
+
+#if IS_ENABLED(CONFIG_HSA_AMD_SVM)
+int kgd2kfd_init_zone_device(struct amdgpu_device *adev);
+#else
+static inline
+int kgd2kfd_init_zone_device(struct amdgpu_device *adev)
+{
+ return 0;
+}
+#endif
+
/* KGD2KFD callbacks */
-int kgd2kfd_quiesce_mm(struct mm_struct *mm);
+int kgd2kfd_quiesce_mm(struct mm_struct *mm, uint32_t trigger);
int kgd2kfd_resume_mm(struct mm_struct *mm);
int kgd2kfd_schedule_evict_and_restore_process(struct mm_struct *mm,
struct dma_fence *fence);
#if IS_ENABLED(CONFIG_HSA_AMD)
int kgd2kfd_init(void);
void kgd2kfd_exit(void);
-struct kfd_dev *kgd2kfd_probe(struct kgd_dev *kgd, struct pci_dev *pdev,
- unsigned int asic_type, bool vf);
+struct kfd_dev *kgd2kfd_probe(struct amdgpu_device *adev, bool vf);
bool kgd2kfd_device_init(struct kfd_dev *kfd,
- struct drm_device *ddev,
const struct kgd2kfd_shared_resources *gpu_resources);
void kgd2kfd_device_exit(struct kfd_dev *kfd);
-void kgd2kfd_suspend(struct kfd_dev *kfd, bool run_pm);
-int kgd2kfd_resume(struct kfd_dev *kfd, bool run_pm);
-int kgd2kfd_pre_reset(struct kfd_dev *kfd);
+void kgd2kfd_suspend(struct kfd_dev *kfd, bool suspend_proc);
+int kgd2kfd_resume(struct kfd_dev *kfd, bool resume_proc);
+void kgd2kfd_suspend_process(struct kfd_dev *kfd);
+int kgd2kfd_resume_process(struct kfd_dev *kfd);
+int kgd2kfd_pre_reset(struct kfd_dev *kfd,
+ struct amdgpu_reset_context *reset_context);
int kgd2kfd_post_reset(struct kfd_dev *kfd);
void kgd2kfd_interrupt(struct kfd_dev *kfd, const void *ih_ring_entry);
void kgd2kfd_set_sram_ecc_flag(struct kfd_dev *kfd);
-void kgd2kfd_smi_event_throttle(struct kfd_dev *kfd, uint32_t throttle_bitmask);
+void kgd2kfd_smi_event_throttle(struct kfd_dev *kfd, uint64_t throttle_bitmask);
+int kgd2kfd_check_and_lock_kfd(struct kfd_dev *kfd);
+void kgd2kfd_unlock_kfd(struct kfd_dev *kfd);
+int kgd2kfd_start_sched(struct kfd_dev *kfd, uint32_t node_id);
+int kgd2kfd_start_sched_all_nodes(struct kfd_dev *kfd);
+int kgd2kfd_stop_sched(struct kfd_dev *kfd, uint32_t node_id);
+int kgd2kfd_stop_sched_all_nodes(struct kfd_dev *kfd);
+bool kgd2kfd_compute_active(struct kfd_dev *kfd, uint32_t node_id);
+bool kgd2kfd_vmfault_fast_path(struct amdgpu_device *adev, struct amdgpu_iv_entry *entry,
+ bool retry_fault);
+
#else
static inline int kgd2kfd_init(void)
{
@@ -322,14 +446,13 @@ static inline void kgd2kfd_exit(void)
}
static inline
-struct kfd_dev *kgd2kfd_probe(struct kgd_dev *kgd, struct pci_dev *pdev,
- unsigned int asic_type, bool vf)
+struct kfd_dev *kgd2kfd_probe(struct amdgpu_device *adev, bool vf)
{
return NULL;
}
static inline
-bool kgd2kfd_device_init(struct kfd_dev *kfd, struct drm_device *ddev,
+bool kgd2kfd_device_init(struct kfd_dev *kfd,
const struct kgd2kfd_shared_resources *gpu_resources)
{
return false;
@@ -339,16 +462,26 @@ static inline void kgd2kfd_device_exit(struct kfd_dev *kfd)
{
}
-static inline void kgd2kfd_suspend(struct kfd_dev *kfd, bool run_pm)
+static inline void kgd2kfd_suspend(struct kfd_dev *kfd, bool suspend_proc)
+{
+}
+
+static inline int kgd2kfd_resume(struct kfd_dev *kfd, bool resume_proc)
+{
+ return 0;
+}
+
+static inline void kgd2kfd_suspend_process(struct kfd_dev *kfd)
{
}
-static inline int kgd2kfd_resume(struct kfd_dev *kfd, bool run_pm)
+static inline int kgd2kfd_resume_process(struct kfd_dev *kfd)
{
return 0;
}
-static inline int kgd2kfd_pre_reset(struct kfd_dev *kfd)
+static inline int kgd2kfd_pre_reset(struct kfd_dev *kfd,
+ struct amdgpu_reset_context *reset_context)
{
return 0;
}
@@ -369,8 +502,49 @@ void kgd2kfd_set_sram_ecc_flag(struct kfd_dev *kfd)
}
static inline
-void kgd2kfd_smi_event_throttle(struct kfd_dev *kfd, uint32_t throttle_bitmask)
+void kgd2kfd_smi_event_throttle(struct kfd_dev *kfd, uint64_t throttle_bitmask)
+{
+}
+
+static inline int kgd2kfd_check_and_lock_kfd(struct kfd_dev *kfd)
+{
+ return 0;
+}
+
+static inline void kgd2kfd_unlock_kfd(struct kfd_dev *kfd)
+{
+}
+
+static inline int kgd2kfd_start_sched(struct kfd_dev *kfd, uint32_t node_id)
+{
+ return 0;
+}
+
+static inline int kgd2kfd_start_sched_all_nodes(struct kfd_dev *kfd)
+{
+ return 0;
+}
+
+static inline int kgd2kfd_stop_sched(struct kfd_dev *kfd, uint32_t node_id)
{
+ return 0;
+}
+
+static inline int kgd2kfd_stop_sched_all_nodes(struct kfd_dev *kfd)
+{
+ return 0;
}
+
+static inline bool kgd2kfd_compute_active(struct kfd_dev *kfd, uint32_t node_id)
+{
+ return false;
+}
+
+static inline bool kgd2kfd_vmfault_fast_path(struct amdgpu_device *adev, struct amdgpu_iv_entry *entry,
+ bool retry_fault)
+{
+ return false;
+}
+
#endif
#endif /* AMDGPU_AMDKFD_H_INCLUDED */
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_aldebaran.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_aldebaran.c
new file mode 100644
index 000000000000..7e9f7a280c1b
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_aldebaran.c
@@ -0,0 +1,197 @@
+/*
+ * Copyright 2020 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+#include "amdgpu.h"
+#include "amdgpu_amdkfd.h"
+#include "amdgpu_amdkfd_arcturus.h"
+#include "amdgpu_amdkfd_gfx_v9.h"
+#include "amdgpu_amdkfd_aldebaran.h"
+#include "gc/gc_9_4_2_offset.h"
+#include "gc/gc_9_4_2_sh_mask.h"
+#include <uapi/linux/kfd_ioctl.h>
+
+/*
+ * Returns TRAP_EN, EXCP_EN and EXCP_REPLACE.
+ *
+ * restore_dbg_registers is ignored here but is a general interface requirement
+ * for devices that support GFXOFF and where the RLC save/restore list
+ * does not support hw registers for debugging i.e. the driver has to manually
+ * initialize the debug mode registers after it has disabled GFX off during the
+ * debug session.
+ */
+uint32_t kgd_aldebaran_enable_debug_trap(struct amdgpu_device *adev,
+ bool restore_dbg_registers,
+ uint32_t vmid)
+{
+ uint32_t data = 0;
+
+ data = REG_SET_FIELD(data, SPI_GDBG_PER_VMID_CNTL, TRAP_EN, 1);
+ data = REG_SET_FIELD(data, SPI_GDBG_PER_VMID_CNTL, EXCP_EN, 0);
+ data = REG_SET_FIELD(data, SPI_GDBG_PER_VMID_CNTL, EXCP_REPLACE, 0);
+
+ return data;
+}
+
+/* returns TRAP_EN, EXCP_EN and EXCP_REPLACE. */
+static uint32_t kgd_aldebaran_disable_debug_trap(struct amdgpu_device *adev,
+ bool keep_trap_enabled,
+ uint32_t vmid)
+{
+ uint32_t data = 0;
+
+ data = REG_SET_FIELD(data, SPI_GDBG_PER_VMID_CNTL, TRAP_EN, keep_trap_enabled);
+ data = REG_SET_FIELD(data, SPI_GDBG_PER_VMID_CNTL, EXCP_EN, 0);
+ data = REG_SET_FIELD(data, SPI_GDBG_PER_VMID_CNTL, EXCP_REPLACE, 0);
+
+ return data;
+}
+
+static int kgd_aldebaran_validate_trap_override_request(struct amdgpu_device *adev,
+ uint32_t trap_override,
+ uint32_t *trap_mask_supported)
+{
+ *trap_mask_supported &= KFD_DBG_TRAP_MASK_FP_INVALID |
+ KFD_DBG_TRAP_MASK_FP_INPUT_DENORMAL |
+ KFD_DBG_TRAP_MASK_FP_DIVIDE_BY_ZERO |
+ KFD_DBG_TRAP_MASK_FP_OVERFLOW |
+ KFD_DBG_TRAP_MASK_FP_UNDERFLOW |
+ KFD_DBG_TRAP_MASK_FP_INEXACT |
+ KFD_DBG_TRAP_MASK_INT_DIVIDE_BY_ZERO |
+ KFD_DBG_TRAP_MASK_DBG_ADDRESS_WATCH |
+ KFD_DBG_TRAP_MASK_DBG_MEMORY_VIOLATION;
+
+ if (trap_override != KFD_DBG_TRAP_OVERRIDE_OR &&
+ trap_override != KFD_DBG_TRAP_OVERRIDE_REPLACE)
+ return -EPERM;
+
+ return 0;
+}
+
+/* returns TRAP_EN, EXCP_EN and EXCP_RPLACE. */
+static uint32_t kgd_aldebaran_set_wave_launch_trap_override(struct amdgpu_device *adev,
+ uint32_t vmid,
+ uint32_t trap_override,
+ uint32_t trap_mask_bits,
+ uint32_t trap_mask_request,
+ uint32_t *trap_mask_prev,
+ uint32_t kfd_dbg_trap_cntl_prev)
+
+{
+ uint32_t data = 0;
+
+ *trap_mask_prev = REG_GET_FIELD(kfd_dbg_trap_cntl_prev, SPI_GDBG_PER_VMID_CNTL, EXCP_EN);
+ trap_mask_bits = (trap_mask_bits & trap_mask_request) |
+ (*trap_mask_prev & ~trap_mask_request);
+
+ data = REG_SET_FIELD(data, SPI_GDBG_PER_VMID_CNTL, TRAP_EN, 1);
+ data = REG_SET_FIELD(data, SPI_GDBG_PER_VMID_CNTL, EXCP_EN, trap_mask_bits);
+ data = REG_SET_FIELD(data, SPI_GDBG_PER_VMID_CNTL, EXCP_REPLACE, trap_override);
+
+ return data;
+}
+
+uint32_t kgd_aldebaran_set_wave_launch_mode(struct amdgpu_device *adev,
+ uint8_t wave_launch_mode,
+ uint32_t vmid)
+{
+ uint32_t data = 0;
+
+ data = REG_SET_FIELD(data, SPI_GDBG_PER_VMID_CNTL, LAUNCH_MODE, wave_launch_mode);
+
+ return data;
+}
+
+#define TCP_WATCH_STRIDE (regTCP_WATCH1_ADDR_H - regTCP_WATCH0_ADDR_H)
+static uint32_t kgd_gfx_aldebaran_set_address_watch(
+ struct amdgpu_device *adev,
+ uint64_t watch_address,
+ uint32_t watch_address_mask,
+ uint32_t watch_id,
+ uint32_t watch_mode,
+ uint32_t debug_vmid,
+ uint32_t inst)
+{
+ uint32_t watch_address_high;
+ uint32_t watch_address_low;
+ uint32_t watch_address_cntl;
+
+ watch_address_cntl = 0;
+ watch_address_low = lower_32_bits(watch_address);
+ watch_address_high = upper_32_bits(watch_address) & 0xffff;
+
+ watch_address_cntl = REG_SET_FIELD(watch_address_cntl,
+ TCP_WATCH0_CNTL,
+ MODE,
+ watch_mode);
+
+ watch_address_cntl = REG_SET_FIELD(watch_address_cntl,
+ TCP_WATCH0_CNTL,
+ MASK,
+ watch_address_mask >> 6);
+
+ watch_address_cntl = REG_SET_FIELD(watch_address_cntl,
+ TCP_WATCH0_CNTL,
+ VALID,
+ 1);
+
+ WREG32_RLC((SOC15_REG_OFFSET(GC, 0, regTCP_WATCH0_ADDR_H) +
+ (watch_id * TCP_WATCH_STRIDE)),
+ watch_address_high);
+
+ WREG32_RLC((SOC15_REG_OFFSET(GC, 0, regTCP_WATCH0_ADDR_L) +
+ (watch_id * TCP_WATCH_STRIDE)),
+ watch_address_low);
+
+ return watch_address_cntl;
+}
+
+const struct kfd2kgd_calls aldebaran_kfd2kgd = {
+ .program_sh_mem_settings = kgd_gfx_v9_program_sh_mem_settings,
+ .set_pasid_vmid_mapping = kgd_gfx_v9_set_pasid_vmid_mapping,
+ .init_interrupts = kgd_gfx_v9_init_interrupts,
+ .hqd_load = kgd_gfx_v9_hqd_load,
+ .hiq_mqd_load = kgd_gfx_v9_hiq_mqd_load,
+ .hqd_sdma_load = kgd_arcturus_hqd_sdma_load,
+ .hqd_dump = kgd_gfx_v9_hqd_dump,
+ .hqd_sdma_dump = kgd_arcturus_hqd_sdma_dump,
+ .hqd_is_occupied = kgd_gfx_v9_hqd_is_occupied,
+ .hqd_sdma_is_occupied = kgd_arcturus_hqd_sdma_is_occupied,
+ .hqd_destroy = kgd_gfx_v9_hqd_destroy,
+ .hqd_sdma_destroy = kgd_arcturus_hqd_sdma_destroy,
+ .wave_control_execute = kgd_gfx_v9_wave_control_execute,
+ .get_atc_vmid_pasid_mapping_info =
+ kgd_gfx_v9_get_atc_vmid_pasid_mapping_info,
+ .set_vm_context_page_table_base = kgd_gfx_v9_set_vm_context_page_table_base,
+ .get_cu_occupancy = kgd_gfx_v9_get_cu_occupancy,
+ .enable_debug_trap = kgd_aldebaran_enable_debug_trap,
+ .disable_debug_trap = kgd_aldebaran_disable_debug_trap,
+ .validate_trap_override_request = kgd_aldebaran_validate_trap_override_request,
+ .set_wave_launch_trap_override = kgd_aldebaran_set_wave_launch_trap_override,
+ .set_wave_launch_mode = kgd_aldebaran_set_wave_launch_mode,
+ .set_address_watch = kgd_gfx_aldebaran_set_address_watch,
+ .clear_address_watch = kgd_gfx_v9_clear_address_watch,
+ .get_iq_wait_times = kgd_gfx_v9_get_iq_wait_times,
+ .build_dequeue_wait_counts_packet_info = kgd_gfx_v9_build_dequeue_wait_counts_packet_info,
+ .program_trap_handler_settings = kgd_gfx_v9_program_trap_handler_settings,
+ .hqd_get_pq_addr = kgd_gfx_v9_hqd_get_pq_addr,
+ .hqd_reset = kgd_gfx_v9_hqd_reset,
+ .hqd_sdma_get_doorbell = kgd_gfx_v9_hqd_sdma_get_doorbell
+};
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_aldebaran.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_aldebaran.h
new file mode 100644
index 000000000000..a7bdaf8d82dd
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_aldebaran.h
@@ -0,0 +1,27 @@
+/*
+ * Copyright 2023 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+uint32_t kgd_aldebaran_enable_debug_trap(struct amdgpu_device *adev,
+ bool restore_dbg_registers,
+ uint32_t vmid);
+uint32_t kgd_aldebaran_set_wave_launch_mode(struct amdgpu_device *adev,
+ uint8_t wave_launch_mode,
+ uint32_t vmid);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_arcturus.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_arcturus.c
index 604757a1e440..1105a09e55dc 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_arcturus.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_arcturus.c
@@ -20,11 +20,12 @@
* OTHER DEALINGS IN THE SOFTWARE.
*/
#include <linux/module.h>
-#include <linux/fdtable.h>
#include <linux/uaccess.h>
#include <linux/firmware.h>
#include "amdgpu.h"
#include "amdgpu_amdkfd.h"
+#include "amdgpu_amdkfd_arcturus.h"
+#include "amdgpu_reset.h"
#include "sdma0/sdma0_4_2_2_offset.h"
#include "sdma0/sdma0_4_2_2_sh_mask.h"
#include "sdma1/sdma1_4_2_2_offset.h"
@@ -47,6 +48,8 @@
#include "amdgpu_amdkfd_gfx_v9.h"
#include "gfxhub_v1_0.h"
#include "mmhub_v9_4.h"
+#include "gc/gc_9_0_offset.h"
+#include "gc/gc_9_0_sh_mask.h"
#define HQD_N_REGS 56
#define DUMP_REG(addr) do { \
@@ -56,11 +59,6 @@
(*dump)[i++][1] = RREG32(addr); \
} while (0)
-static inline struct amdgpu_device *get_amdgpu_device(struct kgd_dev *kgd)
-{
- return (struct amdgpu_device *)kgd;
-}
-
static inline struct v9_sdma_mqd *get_sdma_mqd(void *mqd)
{
return (struct v9_sdma_mqd *)mqd;
@@ -122,10 +120,9 @@ static uint32_t get_sdma_rlc_reg_offset(struct amdgpu_device *adev,
return sdma_rlc_reg_offset;
}
-static int kgd_hqd_sdma_load(struct kgd_dev *kgd, void *mqd,
+int kgd_arcturus_hqd_sdma_load(struct amdgpu_device *adev, void *mqd,
uint32_t __user *wptr, struct mm_struct *mm)
{
- struct amdgpu_device *adev = get_amdgpu_device(kgd);
struct v9_sdma_mqd *m;
uint32_t sdma_rlc_reg_offset;
unsigned long end_jiffies;
@@ -192,18 +189,17 @@ static int kgd_hqd_sdma_load(struct kgd_dev *kgd, void *mqd,
return 0;
}
-static int kgd_hqd_sdma_dump(struct kgd_dev *kgd,
+int kgd_arcturus_hqd_sdma_dump(struct amdgpu_device *adev,
uint32_t engine_id, uint32_t queue_id,
uint32_t (**dump)[2], uint32_t *n_regs)
{
- struct amdgpu_device *adev = get_amdgpu_device(kgd);
uint32_t sdma_rlc_reg_offset = get_sdma_rlc_reg_offset(adev,
engine_id, queue_id);
uint32_t i = 0, reg;
#undef HQD_N_REGS
#define HQD_N_REGS (19+6+7+10)
- *dump = kmalloc_array(HQD_N_REGS * 2, sizeof(uint32_t), GFP_KERNEL);
+ *dump = kmalloc_array(HQD_N_REGS, sizeof(**dump), GFP_KERNEL);
if (*dump == NULL)
return -ENOMEM;
@@ -224,9 +220,9 @@ static int kgd_hqd_sdma_dump(struct kgd_dev *kgd,
return 0;
}
-static bool kgd_hqd_sdma_is_occupied(struct kgd_dev *kgd, void *mqd)
+bool kgd_arcturus_hqd_sdma_is_occupied(struct amdgpu_device *adev,
+ void *mqd)
{
- struct amdgpu_device *adev = get_amdgpu_device(kgd);
struct v9_sdma_mqd *m;
uint32_t sdma_rlc_reg_offset;
uint32_t sdma_rlc_rb_cntl;
@@ -243,10 +239,9 @@ static bool kgd_hqd_sdma_is_occupied(struct kgd_dev *kgd, void *mqd)
return false;
}
-static int kgd_hqd_sdma_destroy(struct kgd_dev *kgd, void *mqd,
+int kgd_arcturus_hqd_sdma_destroy(struct amdgpu_device *adev, void *mqd,
unsigned int utimeout)
{
- struct amdgpu_device *adev = get_amdgpu_device(kgd);
struct v9_sdma_mqd *m;
uint32_t sdma_rlc_reg_offset;
uint32_t temp;
@@ -283,26 +278,147 @@ static int kgd_hqd_sdma_destroy(struct kgd_dev *kgd, void *mqd,
return 0;
}
+/*
+ * Helper used to suspend/resume gfx pipe for image post process work to set
+ * barrier behaviour.
+ */
+static int suspend_resume_compute_scheduler(struct amdgpu_device *adev, bool suspend)
+{
+ int i, r = 0;
+
+ for (i = 0; i < adev->gfx.num_compute_rings; i++) {
+ struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
+
+ if (!amdgpu_ring_sched_ready(ring))
+ continue;
+
+ /* stop secheduler and drain ring. */
+ if (suspend) {
+ drm_sched_stop(&ring->sched, NULL);
+ r = amdgpu_fence_wait_empty(ring);
+ if (r)
+ goto out;
+ } else {
+ drm_sched_start(&ring->sched, 0);
+ }
+ }
+
+out:
+ /* return on resume or failure to drain rings. */
+ if (!suspend || r)
+ return r;
+
+ return amdgpu_device_ip_wait_for_idle(adev, AMD_IP_BLOCK_TYPE_GFX);
+}
+
+static void set_barrier_auto_waitcnt(struct amdgpu_device *adev, bool enable_waitcnt)
+{
+ uint32_t data;
+
+ WRITE_ONCE(adev->barrier_has_auto_waitcnt, enable_waitcnt);
+
+ if (!down_read_trylock(&adev->reset_domain->sem))
+ return;
+
+ amdgpu_amdkfd_suspend(adev, true);
+
+ if (suspend_resume_compute_scheduler(adev, true))
+ goto out;
+
+ data = RREG32(SOC15_REG_OFFSET(GC, 0, mmSQ_CONFIG));
+ data = REG_SET_FIELD(data, SQ_CONFIG, DISABLE_BARRIER_WAITCNT,
+ !enable_waitcnt);
+ WREG32(SOC15_REG_OFFSET(GC, 0, mmSQ_CONFIG), data);
+
+out:
+ suspend_resume_compute_scheduler(adev, false);
+
+ amdgpu_amdkfd_resume(adev, true);
+
+ up_read(&adev->reset_domain->sem);
+}
+
+/*
+ * restore_dbg_registers is ignored here but is a general interface requirement
+ * for devices that support GFXOFF and where the RLC save/restore list
+ * does not support hw registers for debugging i.e. the driver has to manually
+ * initialize the debug mode registers after it has disabled GFX off during the
+ * debug session.
+ */
+static uint32_t kgd_arcturus_enable_debug_trap(struct amdgpu_device *adev,
+ bool restore_dbg_registers,
+ uint32_t vmid)
+{
+ mutex_lock(&adev->grbm_idx_mutex);
+
+ kgd_gfx_v9_set_wave_launch_stall(adev, vmid, true);
+
+ set_barrier_auto_waitcnt(adev, true);
+
+ WREG32(SOC15_REG_OFFSET(GC, 0, mmSPI_GDBG_TRAP_MASK), 0);
+
+ kgd_gfx_v9_set_wave_launch_stall(adev, vmid, false);
+
+ mutex_unlock(&adev->grbm_idx_mutex);
+
+ return 0;
+}
+
+/*
+ * keep_trap_enabled is ignored here but is a general interface requirement
+ * for devices that support multi-process debugging where the performance
+ * overhead from trap temporary setup needs to be bypassed when the debug
+ * session has ended.
+ */
+static uint32_t kgd_arcturus_disable_debug_trap(struct amdgpu_device *adev,
+ bool keep_trap_enabled,
+ uint32_t vmid)
+{
+
+ mutex_lock(&adev->grbm_idx_mutex);
+
+ kgd_gfx_v9_set_wave_launch_stall(adev, vmid, true);
+
+ set_barrier_auto_waitcnt(adev, false);
+
+ WREG32(SOC15_REG_OFFSET(GC, 0, mmSPI_GDBG_TRAP_MASK), 0);
+
+ kgd_gfx_v9_set_wave_launch_stall(adev, vmid, false);
+
+ mutex_unlock(&adev->grbm_idx_mutex);
+
+ return 0;
+}
const struct kfd2kgd_calls arcturus_kfd2kgd = {
.program_sh_mem_settings = kgd_gfx_v9_program_sh_mem_settings,
.set_pasid_vmid_mapping = kgd_gfx_v9_set_pasid_vmid_mapping,
.init_interrupts = kgd_gfx_v9_init_interrupts,
.hqd_load = kgd_gfx_v9_hqd_load,
.hiq_mqd_load = kgd_gfx_v9_hiq_mqd_load,
- .hqd_sdma_load = kgd_hqd_sdma_load,
+ .hqd_sdma_load = kgd_arcturus_hqd_sdma_load,
.hqd_dump = kgd_gfx_v9_hqd_dump,
- .hqd_sdma_dump = kgd_hqd_sdma_dump,
+ .hqd_sdma_dump = kgd_arcturus_hqd_sdma_dump,
.hqd_is_occupied = kgd_gfx_v9_hqd_is_occupied,
- .hqd_sdma_is_occupied = kgd_hqd_sdma_is_occupied,
+ .hqd_sdma_is_occupied = kgd_arcturus_hqd_sdma_is_occupied,
.hqd_destroy = kgd_gfx_v9_hqd_destroy,
- .hqd_sdma_destroy = kgd_hqd_sdma_destroy,
- .address_watch_disable = kgd_gfx_v9_address_watch_disable,
- .address_watch_execute = kgd_gfx_v9_address_watch_execute,
+ .hqd_sdma_destroy = kgd_arcturus_hqd_sdma_destroy,
.wave_control_execute = kgd_gfx_v9_wave_control_execute,
- .address_watch_get_offset = kgd_gfx_v9_address_watch_get_offset,
.get_atc_vmid_pasid_mapping_info =
kgd_gfx_v9_get_atc_vmid_pasid_mapping_info,
.set_vm_context_page_table_base =
kgd_gfx_v9_set_vm_context_page_table_base,
- .get_cu_occupancy = kgd_gfx_v9_get_cu_occupancy
+ .enable_debug_trap = kgd_arcturus_enable_debug_trap,
+ .disable_debug_trap = kgd_arcturus_disable_debug_trap,
+ .validate_trap_override_request = kgd_gfx_v9_validate_trap_override_request,
+ .set_wave_launch_trap_override = kgd_gfx_v9_set_wave_launch_trap_override,
+ .set_wave_launch_mode = kgd_gfx_v9_set_wave_launch_mode,
+ .set_address_watch = kgd_gfx_v9_set_address_watch,
+ .clear_address_watch = kgd_gfx_v9_clear_address_watch,
+ .get_iq_wait_times = kgd_gfx_v9_get_iq_wait_times,
+ .build_dequeue_wait_counts_packet_info = kgd_gfx_v9_build_dequeue_wait_counts_packet_info,
+ .get_cu_occupancy = kgd_gfx_v9_get_cu_occupancy,
+ .program_trap_handler_settings = kgd_gfx_v9_program_trap_handler_settings,
+ .hqd_get_pq_addr = kgd_gfx_v9_hqd_get_pq_addr,
+ .hqd_reset = kgd_gfx_v9_hqd_reset,
+ .hqd_sdma_get_doorbell = kgd_gfx_v9_hqd_sdma_get_doorbell
};
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_arcturus.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_arcturus.h
new file mode 100644
index 000000000000..756c1a5679c0
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_arcturus.h
@@ -0,0 +1,31 @@
+/*
+ * Copyright 2020 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+int kgd_arcturus_hqd_sdma_load(struct amdgpu_device *adev, void *mqd,
+ uint32_t __user *wptr, struct mm_struct *mm);
+int kgd_arcturus_hqd_sdma_dump(struct amdgpu_device *adev,
+ uint32_t engine_id, uint32_t queue_id,
+ uint32_t (**dump)[2], uint32_t *n_regs);
+bool kgd_arcturus_hqd_sdma_is_occupied(struct amdgpu_device *adev,
+ void *mqd);
+int kgd_arcturus_hqd_sdma_destroy(struct amdgpu_device *adev, void *mqd,
+ unsigned int utimeout);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_fence.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_fence.c
index 3107b9575929..1ef758ac5076 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_fence.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_fence.c
@@ -28,6 +28,7 @@
#include <linux/slab.h>
#include <linux/sched/mm.h>
#include "amdgpu_amdkfd.h"
+#include "kfd_svm.h"
static const struct dma_fence_ops amdkfd_fence_ops;
static atomic_t fence_seq = ATOMIC_INIT(0);
@@ -40,13 +41,13 @@ static atomic_t fence_seq = ATOMIC_INIT(0);
* All the BOs in a process share an eviction fence. When process X wants
* to map VRAM memory but TTM can't find enough space, TTM will attempt to
* evict BOs from its LRU list. TTM checks if the BO is valuable to evict
- * by calling ttm_bo_driver->eviction_valuable().
+ * by calling ttm_device_funcs->eviction_valuable().
*
- * ttm_bo_driver->eviction_valuable() - will return false if the BO belongs
+ * ttm_device_funcs->eviction_valuable() - will return false if the BO belongs
* to process X. Otherwise, it will return true to indicate BO can be
* evicted by TTM.
*
- * If ttm_bo_driver->eviction_valuable returns true, then TTM will continue
+ * If ttm_device_funcs->eviction_valuable returns true, then TTM will continue
* the evcition process for that BO by calling ttm_bo_evict --> amdgpu_bo_move
* --> amdgpu_copy_buffer(). This sets up job in GPU scheduler.
*
@@ -60,7 +61,8 @@ static atomic_t fence_seq = ATOMIC_INIT(0);
*/
struct amdgpu_amdkfd_fence *amdgpu_amdkfd_fence_create(u64 context,
- struct mm_struct *mm)
+ struct mm_struct *mm,
+ struct svm_range_bo *svm_bo)
{
struct amdgpu_amdkfd_fence *fence;
@@ -73,7 +75,7 @@ struct amdgpu_amdkfd_fence *amdgpu_amdkfd_fence_create(u64 context,
fence->mm = mm;
get_task_comm(fence->timeline_name, current);
spin_lock_init(&fence->lock);
-
+ fence->svm_bo = svm_bo;
dma_fence_init(&fence->base, &amdkfd_fence_ops, &fence->lock,
context, atomic_inc_return(&fence_seq));
@@ -88,7 +90,7 @@ struct amdgpu_amdkfd_fence *to_amdgpu_amdkfd_fence(struct dma_fence *f)
return NULL;
fence = container_of(f, struct amdgpu_amdkfd_fence, base);
- if (fence && f->ops == &amdkfd_fence_ops)
+ if (f->ops == &amdkfd_fence_ops)
return fence;
return NULL;
@@ -111,6 +113,8 @@ static const char *amdkfd_fence_get_timeline_name(struct dma_fence *f)
* a KFD BO and schedules a job to move the BO.
* If fence is already signaled return true.
* If fence is not signaled schedule a evict KFD process work item.
+ *
+ * @f: dma_fence
*/
static bool amdkfd_fence_enable_signaling(struct dma_fence *f)
{
@@ -122,16 +126,20 @@ static bool amdkfd_fence_enable_signaling(struct dma_fence *f)
if (dma_fence_is_signaled(f))
return true;
- if (!kgd2kfd_schedule_evict_and_restore_process(fence->mm, f))
- return true;
-
+ if (!fence->svm_bo) {
+ if (!kgd2kfd_schedule_evict_and_restore_process(fence->mm, f))
+ return true;
+ } else {
+ if (!svm_range_schedule_evict_svm_bo(fence))
+ return true;
+ }
return false;
}
/**
* amdkfd_fence_release - callback that fence can be freed
*
- * @fence: fence
+ * @f: dma_fence
*
* This function is called when the reference count becomes zero.
* Drops the mm_struct reference and RCU schedules freeing up the fence.
@@ -151,11 +159,14 @@ static void amdkfd_fence_release(struct dma_fence *f)
}
/**
- * amdkfd_fence_check_mm - Check if @mm is same as that of the fence @f
- * if same return TRUE else return FALSE.
+ * amdkfd_fence_check_mm - Check whether to prevent eviction of @f by @mm
*
* @f: [IN] fence
* @mm: [IN] mm that needs to be verified
+ *
+ * Check if @mm is same as that of the fence @f, if same return TRUE else
+ * return FALSE.
+ * For svm bo, which support vram overcommitment, always return FALSE.
*/
bool amdkfd_fence_check_mm(struct dma_fence *f, struct mm_struct *mm)
{
@@ -163,7 +174,7 @@ bool amdkfd_fence_check_mm(struct dma_fence *f, struct mm_struct *mm)
if (!fence)
return false;
- else if (fence->mm == mm)
+ else if (fence->mm == mm && !fence->svm_bo)
return true;
return false;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gc_9_4_3.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gc_9_4_3.c
new file mode 100644
index 000000000000..89a45a9218f3
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gc_9_4_3.c
@@ -0,0 +1,559 @@
+/*
+ * Copyright 2021 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+#include "amdgpu.h"
+#include "amdgpu_amdkfd.h"
+#include "amdgpu_amdkfd_gfx_v9.h"
+#include "amdgpu_amdkfd_aldebaran.h"
+#include "gc/gc_9_4_3_offset.h"
+#include "gc/gc_9_4_3_sh_mask.h"
+#include "athub/athub_1_8_0_offset.h"
+#include "athub/athub_1_8_0_sh_mask.h"
+#include "oss/osssys_4_4_2_offset.h"
+#include "oss/osssys_4_4_2_sh_mask.h"
+#include "v9_structs.h"
+#include "soc15.h"
+#include "sdma/sdma_4_4_2_offset.h"
+#include "sdma/sdma_4_4_2_sh_mask.h"
+#include <uapi/linux/kfd_ioctl.h>
+
+static inline struct v9_sdma_mqd *get_sdma_mqd(void *mqd)
+{
+ return (struct v9_sdma_mqd *)mqd;
+}
+
+static uint32_t get_sdma_rlc_reg_offset(struct amdgpu_device *adev,
+ unsigned int engine_id,
+ unsigned int queue_id)
+{
+ uint32_t sdma_engine_reg_base =
+ SOC15_REG_OFFSET(SDMA0, GET_INST(SDMA0, engine_id),
+ regSDMA_RLC0_RB_CNTL) -
+ regSDMA_RLC0_RB_CNTL;
+ uint32_t retval = sdma_engine_reg_base +
+ queue_id * (regSDMA_RLC1_RB_CNTL - regSDMA_RLC0_RB_CNTL);
+
+ pr_debug("RLC register offset for SDMA%d RLC%d: 0x%x\n", engine_id,
+ queue_id, retval);
+ return retval;
+}
+
+static int kgd_gfx_v9_4_3_hqd_sdma_load(struct amdgpu_device *adev, void *mqd,
+ uint32_t __user *wptr, struct mm_struct *mm)
+{
+ struct v9_sdma_mqd *m;
+ uint32_t sdma_rlc_reg_offset;
+ unsigned long end_jiffies;
+ uint32_t data;
+ uint64_t data64;
+ uint64_t __user *wptr64 = (uint64_t __user *)wptr;
+
+ m = get_sdma_mqd(mqd);
+ sdma_rlc_reg_offset = get_sdma_rlc_reg_offset(adev, m->sdma_engine_id,
+ m->sdma_queue_id);
+
+ WREG32(sdma_rlc_reg_offset + regSDMA_RLC0_RB_CNTL,
+ m->sdmax_rlcx_rb_cntl & (~SDMA_RLC0_RB_CNTL__RB_ENABLE_MASK));
+
+ end_jiffies = msecs_to_jiffies(2000) + jiffies;
+ while (true) {
+ data = RREG32(sdma_rlc_reg_offset + regSDMA_RLC0_CONTEXT_STATUS);
+ if (data & SDMA_RLC0_CONTEXT_STATUS__IDLE_MASK)
+ break;
+ if (time_after(jiffies, end_jiffies)) {
+ pr_err("SDMA RLC not idle in %s\n", __func__);
+ return -ETIME;
+ }
+ usleep_range(500, 1000);
+ }
+
+ WREG32(sdma_rlc_reg_offset + regSDMA_RLC0_DOORBELL_OFFSET,
+ m->sdmax_rlcx_doorbell_offset);
+
+ data = REG_SET_FIELD(m->sdmax_rlcx_doorbell, SDMA_RLC0_DOORBELL,
+ ENABLE, 1);
+ WREG32(sdma_rlc_reg_offset + regSDMA_RLC0_DOORBELL, data);
+ WREG32(sdma_rlc_reg_offset + regSDMA_RLC0_RB_RPTR,
+ m->sdmax_rlcx_rb_rptr);
+ WREG32(sdma_rlc_reg_offset + regSDMA_RLC0_RB_RPTR_HI,
+ m->sdmax_rlcx_rb_rptr_hi);
+
+ WREG32(sdma_rlc_reg_offset + regSDMA_RLC0_MINOR_PTR_UPDATE, 1);
+ if (read_user_wptr(mm, wptr64, data64)) {
+ WREG32(sdma_rlc_reg_offset + regSDMA_RLC0_RB_WPTR,
+ lower_32_bits(data64));
+ WREG32(sdma_rlc_reg_offset + regSDMA_RLC0_RB_WPTR_HI,
+ upper_32_bits(data64));
+ } else {
+ WREG32(sdma_rlc_reg_offset + regSDMA_RLC0_RB_WPTR,
+ m->sdmax_rlcx_rb_rptr);
+ WREG32(sdma_rlc_reg_offset + regSDMA_RLC0_RB_WPTR_HI,
+ m->sdmax_rlcx_rb_rptr_hi);
+ }
+ WREG32(sdma_rlc_reg_offset + regSDMA_RLC0_MINOR_PTR_UPDATE, 0);
+
+ WREG32(sdma_rlc_reg_offset + regSDMA_RLC0_RB_BASE, m->sdmax_rlcx_rb_base);
+ WREG32(sdma_rlc_reg_offset + regSDMA_RLC0_RB_BASE_HI,
+ m->sdmax_rlcx_rb_base_hi);
+ WREG32(sdma_rlc_reg_offset + regSDMA_RLC0_RB_RPTR_ADDR_LO,
+ m->sdmax_rlcx_rb_rptr_addr_lo);
+ WREG32(sdma_rlc_reg_offset + regSDMA_RLC0_RB_RPTR_ADDR_HI,
+ m->sdmax_rlcx_rb_rptr_addr_hi);
+
+ data = REG_SET_FIELD(m->sdmax_rlcx_rb_cntl, SDMA_RLC0_RB_CNTL,
+ RB_ENABLE, 1);
+ WREG32(sdma_rlc_reg_offset + regSDMA_RLC0_RB_CNTL, data);
+
+ return 0;
+}
+
+static int kgd_gfx_v9_4_3_hqd_sdma_dump(struct amdgpu_device *adev,
+ uint32_t engine_id, uint32_t queue_id,
+ uint32_t (**dump)[2], uint32_t *n_regs)
+{
+ uint32_t sdma_rlc_reg_offset = get_sdma_rlc_reg_offset(adev,
+ engine_id, queue_id);
+ uint32_t i = 0, reg;
+#undef HQD_N_REGS
+#define HQD_N_REGS (19+6+7+12)
+#define DUMP_REG(addr) do { \
+ if (WARN_ON_ONCE(i >= HQD_N_REGS)) \
+ break; \
+ (*dump)[i][0] = (addr) << 2; \
+ (*dump)[i++][1] = RREG32(addr); \
+ } while (0)
+
+ *dump = kmalloc_array(HQD_N_REGS, sizeof(**dump), GFP_KERNEL);
+ if (*dump == NULL)
+ return -ENOMEM;
+
+ for (reg = regSDMA_RLC0_RB_CNTL; reg <= regSDMA_RLC0_DOORBELL; reg++)
+ DUMP_REG(sdma_rlc_reg_offset + reg);
+ for (reg = regSDMA_RLC0_STATUS; reg <= regSDMA_RLC0_CSA_ADDR_HI; reg++)
+ DUMP_REG(sdma_rlc_reg_offset + reg);
+ for (reg = regSDMA_RLC0_IB_SUB_REMAIN;
+ reg <= regSDMA_RLC0_MINOR_PTR_UPDATE; reg++)
+ DUMP_REG(sdma_rlc_reg_offset + reg);
+ for (reg = regSDMA_RLC0_MIDCMD_DATA0;
+ reg <= regSDMA_RLC0_MIDCMD_CNTL; reg++)
+ DUMP_REG(sdma_rlc_reg_offset + reg);
+
+ WARN_ON_ONCE(i != HQD_N_REGS);
+ *n_regs = i;
+
+ return 0;
+}
+
+static bool kgd_gfx_v9_4_3_hqd_sdma_is_occupied(struct amdgpu_device *adev, void *mqd)
+{
+ struct v9_sdma_mqd *m;
+ uint32_t sdma_rlc_reg_offset;
+ uint32_t sdma_rlc_rb_cntl;
+
+ m = get_sdma_mqd(mqd);
+ sdma_rlc_reg_offset = get_sdma_rlc_reg_offset(adev, m->sdma_engine_id,
+ m->sdma_queue_id);
+
+ sdma_rlc_rb_cntl = RREG32(sdma_rlc_reg_offset + regSDMA_RLC0_RB_CNTL);
+
+ if (sdma_rlc_rb_cntl & SDMA_RLC0_RB_CNTL__RB_ENABLE_MASK)
+ return true;
+
+ return false;
+}
+
+static int kgd_gfx_v9_4_3_hqd_sdma_destroy(struct amdgpu_device *adev, void *mqd,
+ unsigned int utimeout)
+{
+ struct v9_sdma_mqd *m;
+ uint32_t sdma_rlc_reg_offset;
+ uint32_t temp;
+ unsigned long end_jiffies = (utimeout * HZ / 1000) + jiffies;
+
+ m = get_sdma_mqd(mqd);
+ sdma_rlc_reg_offset = get_sdma_rlc_reg_offset(adev, m->sdma_engine_id,
+ m->sdma_queue_id);
+
+ temp = RREG32(sdma_rlc_reg_offset + regSDMA_RLC0_RB_CNTL);
+ temp = temp & ~SDMA_RLC0_RB_CNTL__RB_ENABLE_MASK;
+ WREG32(sdma_rlc_reg_offset + regSDMA_RLC0_RB_CNTL, temp);
+
+ while (true) {
+ temp = RREG32(sdma_rlc_reg_offset + regSDMA_RLC0_CONTEXT_STATUS);
+ if (temp & SDMA_RLC0_CONTEXT_STATUS__IDLE_MASK)
+ break;
+ if (time_after(jiffies, end_jiffies)) {
+ pr_err("SDMA RLC not idle in %s\n", __func__);
+ return -ETIME;
+ }
+ usleep_range(500, 1000);
+ }
+
+ WREG32(sdma_rlc_reg_offset + regSDMA_RLC0_DOORBELL, 0);
+ WREG32(sdma_rlc_reg_offset + regSDMA_RLC0_RB_CNTL,
+ RREG32(sdma_rlc_reg_offset + regSDMA_RLC0_RB_CNTL) |
+ SDMA_RLC0_RB_CNTL__RB_ENABLE_MASK);
+
+ m->sdmax_rlcx_rb_rptr =
+ RREG32(sdma_rlc_reg_offset + regSDMA_RLC0_RB_RPTR);
+ m->sdmax_rlcx_rb_rptr_hi =
+ RREG32(sdma_rlc_reg_offset + regSDMA_RLC0_RB_RPTR_HI);
+
+ return 0;
+}
+
+static int kgd_gfx_v9_4_3_set_pasid_vmid_mapping(struct amdgpu_device *adev,
+ u32 pasid, unsigned int vmid, uint32_t xcc_inst)
+{
+ unsigned long timeout;
+ unsigned int reg;
+ unsigned int phy_inst = GET_INST(GC, xcc_inst);
+ /* Every two XCCs share one AID */
+ unsigned int aid = phy_inst / 2;
+
+ /*
+ * We have to assume that there is no outstanding mapping.
+ * The ATC_VMID_PASID_MAPPING_UPDATE_STATUS bit could be 0 because
+ * a mapping is in progress or because a mapping finished
+ * and the SW cleared it.
+ * So the protocol is to always wait & clear.
+ */
+ uint32_t pasid_mapping = (pasid == 0) ? 0 : (uint32_t)pasid |
+ ATC_VMID0_PASID_MAPPING__VALID_MASK;
+
+ WREG32(SOC15_REG_OFFSET(ATHUB, 0,
+ regATC_VMID0_PASID_MAPPING) + vmid, pasid_mapping);
+
+ timeout = jiffies + msecs_to_jiffies(10);
+ while (!(RREG32(SOC15_REG_OFFSET(ATHUB, 0,
+ regATC_VMID_PASID_MAPPING_UPDATE_STATUS)) &
+ (1U << vmid))) {
+ if (time_after(jiffies, timeout)) {
+ pr_err("Fail to program VMID-PASID mapping\n");
+ return -ETIME;
+ }
+ cpu_relax();
+ }
+
+ WREG32(SOC15_REG_OFFSET(ATHUB, 0,
+ regATC_VMID_PASID_MAPPING_UPDATE_STATUS),
+ 1U << vmid);
+
+ reg = RREG32(SOC15_REG_OFFSET(OSSSYS, 0, regIH_VMID_LUT_INDEX));
+ /* Every 4 numbers is a cycle. 1st is AID, 2nd and 3rd are XCDs,
+ * and the 4th is reserved. Therefore "aid * 4 + (xcc_inst % 2) + 1"
+ * programs _LUT for XCC and "aid * 4" for AID where the XCC connects
+ * to.
+ */
+ WREG32(SOC15_REG_OFFSET(OSSSYS, 0, regIH_VMID_LUT_INDEX),
+ aid * 4 + (phy_inst % 2) + 1);
+ WREG32(SOC15_REG_OFFSET(OSSSYS, 0, regIH_VMID_0_LUT) + vmid,
+ pasid_mapping);
+ WREG32(SOC15_REG_OFFSET(OSSSYS, 0, regIH_VMID_LUT_INDEX),
+ aid * 4);
+ WREG32(SOC15_REG_OFFSET(OSSSYS, 0, regIH_VMID_0_LUT_MM) + vmid,
+ pasid_mapping);
+ WREG32(SOC15_REG_OFFSET(OSSSYS, 0, regIH_VMID_LUT_INDEX), reg);
+
+ return 0;
+}
+
+static inline struct v9_mqd *get_mqd(void *mqd)
+{
+ return (struct v9_mqd *)mqd;
+}
+
+static int kgd_gfx_v9_4_3_hqd_load(struct amdgpu_device *adev, void *mqd,
+ uint32_t pipe_id, uint32_t queue_id,
+ uint32_t __user *wptr, uint32_t wptr_shift,
+ uint32_t wptr_mask, struct mm_struct *mm, uint32_t inst)
+{
+ struct v9_mqd *m;
+ uint32_t *mqd_hqd;
+ uint32_t reg, hqd_base, hqd_end, data;
+
+ m = get_mqd(mqd);
+
+ kgd_gfx_v9_acquire_queue(adev, pipe_id, queue_id, inst);
+
+ /* HQD registers extend to CP_HQD_AQL_DISPATCH_ID_HI */
+ mqd_hqd = &m->cp_mqd_base_addr_lo;
+ hqd_base = SOC15_REG_OFFSET(GC, GET_INST(GC, inst), regCP_MQD_BASE_ADDR);
+ hqd_end = SOC15_REG_OFFSET(GC, GET_INST(GC, inst), regCP_HQD_AQL_DISPATCH_ID_HI);
+
+ for (reg = hqd_base; reg <= hqd_end; reg++)
+ WREG32_XCC(reg, mqd_hqd[reg - hqd_base], inst);
+
+
+ /* Activate doorbell logic before triggering WPTR poll. */
+ data = REG_SET_FIELD(m->cp_hqd_pq_doorbell_control,
+ CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_EN, 1);
+ WREG32_SOC15_RLC(GC, GET_INST(GC, inst), regCP_HQD_PQ_DOORBELL_CONTROL, data);
+
+ if (wptr) {
+ /* Don't read wptr with get_user because the user
+ * context may not be accessible (if this function
+ * runs in a work queue). Instead trigger a one-shot
+ * polling read from memory in the CP. This assumes
+ * that wptr is GPU-accessible in the queue's VMID via
+ * ATC or SVM. WPTR==RPTR before starting the poll so
+ * the CP starts fetching new commands from the right
+ * place.
+ *
+ * Guessing a 64-bit WPTR from a 32-bit RPTR is a bit
+ * tricky. Assume that the queue didn't overflow. The
+ * number of valid bits in the 32-bit RPTR depends on
+ * the queue size. The remaining bits are taken from
+ * the saved 64-bit WPTR. If the WPTR wrapped, add the
+ * queue size.
+ */
+ uint32_t queue_size =
+ 2 << REG_GET_FIELD(m->cp_hqd_pq_control,
+ CP_HQD_PQ_CONTROL, QUEUE_SIZE);
+ uint64_t guessed_wptr = m->cp_hqd_pq_rptr & (queue_size - 1);
+
+ if ((m->cp_hqd_pq_wptr_lo & (queue_size - 1)) < guessed_wptr)
+ guessed_wptr += queue_size;
+ guessed_wptr += m->cp_hqd_pq_wptr_lo & ~(queue_size - 1);
+ guessed_wptr += (uint64_t)m->cp_hqd_pq_wptr_hi << 32;
+
+ WREG32_SOC15_RLC(GC, GET_INST(GC, inst), regCP_HQD_PQ_WPTR_LO,
+ lower_32_bits(guessed_wptr));
+ WREG32_SOC15_RLC(GC, GET_INST(GC, inst), regCP_HQD_PQ_WPTR_HI,
+ upper_32_bits(guessed_wptr));
+ WREG32_SOC15_RLC(GC, GET_INST(GC, inst), regCP_HQD_PQ_WPTR_POLL_ADDR,
+ lower_32_bits((uintptr_t)wptr));
+ WREG32_SOC15_RLC(GC, GET_INST(GC, inst), regCP_HQD_PQ_WPTR_POLL_ADDR_HI,
+ upper_32_bits((uintptr_t)wptr));
+ WREG32_SOC15_RLC(GC, GET_INST(GC, inst), regCP_PQ_WPTR_POLL_CNTL1,
+ (uint32_t)kgd_gfx_v9_get_queue_mask(adev, pipe_id, queue_id));
+ }
+
+ /* Start the EOP fetcher */
+ WREG32_SOC15_RLC(GC, GET_INST(GC, inst), regCP_HQD_EOP_RPTR,
+ REG_SET_FIELD(m->cp_hqd_eop_rptr, CP_HQD_EOP_RPTR, INIT_FETCHER, 1));
+
+ data = REG_SET_FIELD(m->cp_hqd_active, CP_HQD_ACTIVE, ACTIVE, 1);
+ WREG32_SOC15_RLC(GC, GET_INST(GC, inst), regCP_HQD_ACTIVE, data);
+
+ kgd_gfx_v9_release_queue(adev, inst);
+
+ return 0;
+}
+
+/* returns TRAP_EN, EXCP_EN and EXCP_REPLACE. */
+static uint32_t kgd_gfx_v9_4_3_disable_debug_trap(struct amdgpu_device *adev,
+ bool keep_trap_enabled,
+ uint32_t vmid)
+{
+ uint32_t data = 0;
+
+ data = REG_SET_FIELD(data, SPI_GDBG_PER_VMID_CNTL, TRAP_EN, 1);
+ data = REG_SET_FIELD(data, SPI_GDBG_PER_VMID_CNTL, EXCP_EN, 0);
+ data = REG_SET_FIELD(data, SPI_GDBG_PER_VMID_CNTL, EXCP_REPLACE, 0);
+
+ return data;
+}
+
+static int kgd_gfx_v9_4_3_validate_trap_override_request(
+ struct amdgpu_device *adev,
+ uint32_t trap_override,
+ uint32_t *trap_mask_supported)
+{
+ *trap_mask_supported &= KFD_DBG_TRAP_MASK_FP_INVALID |
+ KFD_DBG_TRAP_MASK_FP_INPUT_DENORMAL |
+ KFD_DBG_TRAP_MASK_FP_DIVIDE_BY_ZERO |
+ KFD_DBG_TRAP_MASK_FP_OVERFLOW |
+ KFD_DBG_TRAP_MASK_FP_UNDERFLOW |
+ KFD_DBG_TRAP_MASK_FP_INEXACT |
+ KFD_DBG_TRAP_MASK_INT_DIVIDE_BY_ZERO |
+ KFD_DBG_TRAP_MASK_DBG_ADDRESS_WATCH |
+ KFD_DBG_TRAP_MASK_DBG_MEMORY_VIOLATION |
+ KFD_DBG_TRAP_MASK_TRAP_ON_WAVE_START |
+ KFD_DBG_TRAP_MASK_TRAP_ON_WAVE_END;
+
+ if (trap_override != KFD_DBG_TRAP_OVERRIDE_OR &&
+ trap_override != KFD_DBG_TRAP_OVERRIDE_REPLACE)
+ return -EPERM;
+
+ return 0;
+}
+
+static uint32_t trap_mask_map_sw_to_hw(uint32_t mask)
+{
+ uint32_t trap_on_start = (mask & KFD_DBG_TRAP_MASK_TRAP_ON_WAVE_START) ? 1 : 0;
+ uint32_t trap_on_end = (mask & KFD_DBG_TRAP_MASK_TRAP_ON_WAVE_END) ? 1 : 0;
+ uint32_t excp_en = mask & (KFD_DBG_TRAP_MASK_FP_INVALID |
+ KFD_DBG_TRAP_MASK_FP_INPUT_DENORMAL |
+ KFD_DBG_TRAP_MASK_FP_DIVIDE_BY_ZERO |
+ KFD_DBG_TRAP_MASK_FP_OVERFLOW |
+ KFD_DBG_TRAP_MASK_FP_UNDERFLOW |
+ KFD_DBG_TRAP_MASK_FP_INEXACT |
+ KFD_DBG_TRAP_MASK_INT_DIVIDE_BY_ZERO |
+ KFD_DBG_TRAP_MASK_DBG_ADDRESS_WATCH |
+ KFD_DBG_TRAP_MASK_DBG_MEMORY_VIOLATION);
+ uint32_t ret;
+
+ ret = REG_SET_FIELD(0, SPI_GDBG_PER_VMID_CNTL, EXCP_EN, excp_en);
+ ret = REG_SET_FIELD(ret, SPI_GDBG_PER_VMID_CNTL, TRAP_ON_START, trap_on_start);
+ ret = REG_SET_FIELD(ret, SPI_GDBG_PER_VMID_CNTL, TRAP_ON_END, trap_on_end);
+
+ return ret;
+}
+
+static uint32_t trap_mask_map_hw_to_sw(uint32_t mask)
+{
+ uint32_t ret = REG_GET_FIELD(mask, SPI_GDBG_PER_VMID_CNTL, EXCP_EN);
+
+ if (REG_GET_FIELD(mask, SPI_GDBG_PER_VMID_CNTL, TRAP_ON_START))
+ ret |= KFD_DBG_TRAP_MASK_TRAP_ON_WAVE_START;
+
+ if (REG_GET_FIELD(mask, SPI_GDBG_PER_VMID_CNTL, TRAP_ON_END))
+ ret |= KFD_DBG_TRAP_MASK_TRAP_ON_WAVE_END;
+
+ return ret;
+}
+
+/* returns TRAP_EN, EXCP_EN and EXCP_REPLACE. */
+static uint32_t kgd_gfx_v9_4_3_set_wave_launch_trap_override(
+ struct amdgpu_device *adev,
+ uint32_t vmid,
+ uint32_t trap_override,
+ uint32_t trap_mask_bits,
+ uint32_t trap_mask_request,
+ uint32_t *trap_mask_prev,
+ uint32_t kfd_dbg_trap_cntl_prev)
+
+{
+ uint32_t data = 0;
+
+ *trap_mask_prev = trap_mask_map_hw_to_sw(kfd_dbg_trap_cntl_prev);
+
+ data = (trap_mask_bits & trap_mask_request) |
+ (*trap_mask_prev & ~trap_mask_request);
+ data = trap_mask_map_sw_to_hw(data);
+
+ data = REG_SET_FIELD(data, SPI_GDBG_PER_VMID_CNTL, TRAP_EN, 1);
+ data = REG_SET_FIELD(data, SPI_GDBG_PER_VMID_CNTL, EXCP_REPLACE, trap_override);
+
+ return data;
+}
+
+#define TCP_WATCH_STRIDE (regTCP_WATCH1_ADDR_H - regTCP_WATCH0_ADDR_H)
+static uint32_t kgd_gfx_v9_4_3_set_address_watch(
+ struct amdgpu_device *adev,
+ uint64_t watch_address,
+ uint32_t watch_address_mask,
+ uint32_t watch_id,
+ uint32_t watch_mode,
+ uint32_t debug_vmid,
+ uint32_t inst)
+{
+ uint32_t watch_address_high;
+ uint32_t watch_address_low;
+ uint32_t watch_address_cntl;
+
+ watch_address_cntl = 0;
+ watch_address_low = lower_32_bits(watch_address);
+ watch_address_high = upper_32_bits(watch_address) & 0xffff;
+
+ watch_address_cntl = REG_SET_FIELD(watch_address_cntl,
+ TCP_WATCH0_CNTL,
+ MODE,
+ watch_mode);
+
+ watch_address_cntl = REG_SET_FIELD(watch_address_cntl,
+ TCP_WATCH0_CNTL,
+ MASK,
+ watch_address_mask >> 7);
+
+ watch_address_cntl = REG_SET_FIELD(watch_address_cntl,
+ TCP_WATCH0_CNTL,
+ VALID,
+ 1);
+
+ WREG32_XCC((SOC15_REG_OFFSET(GC, GET_INST(GC, inst),
+ regTCP_WATCH0_ADDR_H) +
+ (watch_id * TCP_WATCH_STRIDE)),
+ watch_address_high, inst);
+
+ WREG32_XCC((SOC15_REG_OFFSET(GC, GET_INST(GC, inst),
+ regTCP_WATCH0_ADDR_L) +
+ (watch_id * TCP_WATCH_STRIDE)),
+ watch_address_low, inst);
+
+ return watch_address_cntl;
+}
+
+static uint32_t kgd_gfx_v9_4_3_clear_address_watch(struct amdgpu_device *adev,
+ uint32_t watch_id)
+{
+ return 0;
+}
+
+static uint32_t kgd_gfx_v9_4_3_hqd_sdma_get_doorbell(struct amdgpu_device *adev,
+ int engine, int queue)
+{
+ uint32_t reg_offset = get_sdma_rlc_reg_offset(adev, engine, queue);
+ uint32_t status = RREG32(regSDMA_RLC0_CONTEXT_STATUS + reg_offset);
+ uint32_t doorbell_off = RREG32(regSDMA_RLC0_DOORBELL_OFFSET + reg_offset);
+ bool is_active = !!REG_GET_FIELD(status, SDMA_RLC0_CONTEXT_STATUS, SELECTED);
+
+ return is_active ? doorbell_off >> 2 : 0;
+}
+
+const struct kfd2kgd_calls gc_9_4_3_kfd2kgd = {
+ .program_sh_mem_settings = kgd_gfx_v9_program_sh_mem_settings,
+ .set_pasid_vmid_mapping = kgd_gfx_v9_4_3_set_pasid_vmid_mapping,
+ .init_interrupts = kgd_gfx_v9_init_interrupts,
+ .hqd_load = kgd_gfx_v9_4_3_hqd_load,
+ .hiq_mqd_load = kgd_gfx_v9_hiq_mqd_load,
+ .hqd_sdma_load = kgd_gfx_v9_4_3_hqd_sdma_load,
+ .hqd_dump = kgd_gfx_v9_hqd_dump,
+ .hqd_sdma_dump = kgd_gfx_v9_4_3_hqd_sdma_dump,
+ .hqd_is_occupied = kgd_gfx_v9_hqd_is_occupied,
+ .hqd_sdma_is_occupied = kgd_gfx_v9_4_3_hqd_sdma_is_occupied,
+ .hqd_destroy = kgd_gfx_v9_hqd_destroy,
+ .hqd_sdma_destroy = kgd_gfx_v9_4_3_hqd_sdma_destroy,
+ .wave_control_execute = kgd_gfx_v9_wave_control_execute,
+ .get_atc_vmid_pasid_mapping_info =
+ kgd_gfx_v9_get_atc_vmid_pasid_mapping_info,
+ .set_vm_context_page_table_base =
+ kgd_gfx_v9_set_vm_context_page_table_base,
+ .get_cu_occupancy = kgd_gfx_v9_get_cu_occupancy,
+ .program_trap_handler_settings =
+ kgd_gfx_v9_program_trap_handler_settings,
+ .build_dequeue_wait_counts_packet_info =
+ kgd_gfx_v9_build_dequeue_wait_counts_packet_info,
+ .get_iq_wait_times = kgd_gfx_v9_get_iq_wait_times,
+ .enable_debug_trap = kgd_aldebaran_enable_debug_trap,
+ .disable_debug_trap = kgd_gfx_v9_4_3_disable_debug_trap,
+ .validate_trap_override_request =
+ kgd_gfx_v9_4_3_validate_trap_override_request,
+ .set_wave_launch_trap_override =
+ kgd_gfx_v9_4_3_set_wave_launch_trap_override,
+ .set_wave_launch_mode = kgd_aldebaran_set_wave_launch_mode,
+ .set_address_watch = kgd_gfx_v9_4_3_set_address_watch,
+ .clear_address_watch = kgd_gfx_v9_4_3_clear_address_watch,
+ .hqd_get_pq_addr = kgd_gfx_v9_hqd_get_pq_addr,
+ .hqd_reset = kgd_gfx_v9_hqd_reset,
+ .hqd_sdma_get_doorbell = kgd_gfx_v9_4_3_hqd_sdma_get_doorbell
+};
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c
index 62aa1a6f64ed..0239114fb6c4 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c
@@ -21,6 +21,7 @@
*/
#include "amdgpu.h"
#include "amdgpu_amdkfd.h"
+#include "amdgpu_amdkfd_gfx_v10.h"
#include "gc/gc_10_1_0_offset.h"
#include "gc/gc_10_1_0_sh_mask.h"
#include "athub/athub_2_0_0_offset.h"
@@ -31,6 +32,7 @@
#include "v10_structs.h"
#include "nv.h"
#include "nvd.h"
+#include <uapi/linux/kfd_ioctl.h>
enum hqd_dequeue_request_type {
NO_ACTION = 0,
@@ -39,37 +41,26 @@ enum hqd_dequeue_request_type {
SAVE_WAVES
};
-static inline struct amdgpu_device *get_amdgpu_device(struct kgd_dev *kgd)
-{
- return (struct amdgpu_device *)kgd;
-}
-
-static void lock_srbm(struct kgd_dev *kgd, uint32_t mec, uint32_t pipe,
+static void lock_srbm(struct amdgpu_device *adev, uint32_t mec, uint32_t pipe,
uint32_t queue, uint32_t vmid)
{
- struct amdgpu_device *adev = get_amdgpu_device(kgd);
-
mutex_lock(&adev->srbm_mutex);
nv_grbm_select(adev, mec, pipe, queue, vmid);
}
-static void unlock_srbm(struct kgd_dev *kgd)
+static void unlock_srbm(struct amdgpu_device *adev)
{
- struct amdgpu_device *adev = get_amdgpu_device(kgd);
-
nv_grbm_select(adev, 0, 0, 0, 0);
mutex_unlock(&adev->srbm_mutex);
}
-static void acquire_queue(struct kgd_dev *kgd, uint32_t pipe_id,
+static void acquire_queue(struct amdgpu_device *adev, uint32_t pipe_id,
uint32_t queue_id)
{
- struct amdgpu_device *adev = get_amdgpu_device(kgd);
-
uint32_t mec = (pipe_id / adev->gfx.mec.num_pipe_per_mec) + 1;
uint32_t pipe = (pipe_id % adev->gfx.mec.num_pipe_per_mec);
- lock_srbm(kgd, mec, pipe, queue_id, 0);
+ lock_srbm(adev, mec, pipe, queue_id, 0);
}
static uint64_t get_queue_mask(struct amdgpu_device *adev,
@@ -81,33 +72,29 @@ static uint64_t get_queue_mask(struct amdgpu_device *adev,
return 1ull << bit;
}
-static void release_queue(struct kgd_dev *kgd)
+static void release_queue(struct amdgpu_device *adev)
{
- unlock_srbm(kgd);
+ unlock_srbm(adev);
}
-static void kgd_program_sh_mem_settings(struct kgd_dev *kgd, uint32_t vmid,
+static void kgd_program_sh_mem_settings(struct amdgpu_device *adev, uint32_t vmid,
uint32_t sh_mem_config,
uint32_t sh_mem_ape1_base,
uint32_t sh_mem_ape1_limit,
- uint32_t sh_mem_bases)
+ uint32_t sh_mem_bases, uint32_t inst)
{
- struct amdgpu_device *adev = get_amdgpu_device(kgd);
-
- lock_srbm(kgd, 0, 0, 0, vmid);
+ lock_srbm(adev, 0, 0, 0, vmid);
- WREG32(SOC15_REG_OFFSET(GC, 0, mmSH_MEM_CONFIG), sh_mem_config);
- WREG32(SOC15_REG_OFFSET(GC, 0, mmSH_MEM_BASES), sh_mem_bases);
+ WREG32_SOC15(GC, 0, mmSH_MEM_CONFIG, sh_mem_config);
+ WREG32_SOC15(GC, 0, mmSH_MEM_BASES, sh_mem_bases);
/* APE1 no longer exists on GFX9 */
- unlock_srbm(kgd);
+ unlock_srbm(adev);
}
-static int kgd_set_pasid_vmid_mapping(struct kgd_dev *kgd, u32 pasid,
- unsigned int vmid)
+static int kgd_set_pasid_vmid_mapping(struct amdgpu_device *adev, u32 pasid,
+ unsigned int vmid, uint32_t inst)
{
- struct amdgpu_device *adev = get_amdgpu_device(kgd);
-
/*
* We have to assume that there is no outstanding mapping.
* The ATC_VMID_PASID_MAPPING_UPDATE_STATUS bit could be 0 because
@@ -150,22 +137,22 @@ static int kgd_set_pasid_vmid_mapping(struct kgd_dev *kgd, u32 pasid,
* but still works
*/
-static int kgd_init_interrupts(struct kgd_dev *kgd, uint32_t pipe_id)
+static int kgd_init_interrupts(struct amdgpu_device *adev, uint32_t pipe_id,
+ uint32_t inst)
{
- struct amdgpu_device *adev = get_amdgpu_device(kgd);
uint32_t mec;
uint32_t pipe;
mec = (pipe_id / adev->gfx.mec.num_pipe_per_mec) + 1;
pipe = (pipe_id % adev->gfx.mec.num_pipe_per_mec);
- lock_srbm(kgd, mec, pipe, 0, 0);
+ lock_srbm(adev, mec, pipe, 0, 0);
- WREG32(SOC15_REG_OFFSET(GC, 0, mmCPC_INT_CNTL),
+ WREG32_SOC15(GC, 0, mmCPC_INT_CNTL,
CP_INT_CNTL_RING0__TIME_STAMP_INT_ENABLE_MASK |
CP_INT_CNTL_RING0__OPCODE_ERROR_INT_ENABLE_MASK);
- unlock_srbm(kgd);
+ unlock_srbm(adev);
return 0;
}
@@ -218,12 +205,11 @@ static inline struct v10_sdma_mqd *get_sdma_mqd(void *mqd)
return (struct v10_sdma_mqd *)mqd;
}
-static int kgd_hqd_load(struct kgd_dev *kgd, void *mqd, uint32_t pipe_id,
- uint32_t queue_id, uint32_t __user *wptr,
- uint32_t wptr_shift, uint32_t wptr_mask,
- struct mm_struct *mm)
+static int kgd_hqd_load(struct amdgpu_device *adev, void *mqd,
+ uint32_t pipe_id, uint32_t queue_id,
+ uint32_t __user *wptr, uint32_t wptr_shift,
+ uint32_t wptr_mask, struct mm_struct *mm, uint32_t inst)
{
- struct amdgpu_device *adev = get_amdgpu_device(kgd);
struct v10_compute_mqd *m;
uint32_t *mqd_hqd;
uint32_t reg, hqd_base, data;
@@ -231,7 +217,7 @@ static int kgd_hqd_load(struct kgd_dev *kgd, void *mqd, uint32_t pipe_id,
m = get_mqd(mqd);
pr_debug("Load hqd of pipe %d queue %d\n", pipe_id, queue_id);
- acquire_queue(kgd, pipe_id, queue_id);
+ acquire_queue(adev, pipe_id, queue_id);
/* HQD registers extend from CP_MQD_BASE_ADDR to CP_HQD_EOP_WPTR_MEM. */
mqd_hqd = &m->cp_mqd_base_addr_lo;
@@ -239,13 +225,13 @@ static int kgd_hqd_load(struct kgd_dev *kgd, void *mqd, uint32_t pipe_id,
for (reg = hqd_base;
reg <= SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_WPTR_HI); reg++)
- WREG32(reg, mqd_hqd[reg - hqd_base]);
+ WREG32_SOC15_IP(GC, reg, mqd_hqd[reg - hqd_base]);
/* Activate doorbell logic before triggering WPTR poll. */
data = REG_SET_FIELD(m->cp_hqd_pq_doorbell_control,
CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_EN, 1);
- WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL), data);
+ WREG32_SOC15(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL, data);
if (wptr) {
/* Don't read wptr with get_user because the user
@@ -274,46 +260,45 @@ static int kgd_hqd_load(struct kgd_dev *kgd, void *mqd, uint32_t pipe_id,
guessed_wptr += m->cp_hqd_pq_wptr_lo & ~(queue_size - 1);
guessed_wptr += (uint64_t)m->cp_hqd_pq_wptr_hi << 32;
- WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_WPTR_LO),
+ WREG32_SOC15(GC, 0, mmCP_HQD_PQ_WPTR_LO,
lower_32_bits(guessed_wptr));
- WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_WPTR_HI),
+ WREG32_SOC15(GC, 0, mmCP_HQD_PQ_WPTR_HI,
upper_32_bits(guessed_wptr));
- WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR),
+ WREG32_SOC15(GC, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR,
lower_32_bits((uint64_t)wptr));
- WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR_HI),
+ WREG32_SOC15(GC, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR_HI,
upper_32_bits((uint64_t)wptr));
pr_debug("%s setting CP_PQ_WPTR_POLL_CNTL1 to %x\n", __func__,
(uint32_t)get_queue_mask(adev, pipe_id, queue_id));
- WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_PQ_WPTR_POLL_CNTL1),
+ WREG32_SOC15(GC, 0, mmCP_PQ_WPTR_POLL_CNTL1,
(uint32_t)get_queue_mask(adev, pipe_id, queue_id));
}
/* Start the EOP fetcher */
- WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_EOP_RPTR),
+ WREG32_SOC15(GC, 0, mmCP_HQD_EOP_RPTR,
REG_SET_FIELD(m->cp_hqd_eop_rptr,
CP_HQD_EOP_RPTR, INIT_FETCHER, 1));
data = REG_SET_FIELD(m->cp_hqd_active, CP_HQD_ACTIVE, ACTIVE, 1);
- WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_ACTIVE), data);
+ WREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE, data);
- release_queue(kgd);
+ release_queue(adev);
return 0;
}
-static int kgd_hiq_mqd_load(struct kgd_dev *kgd, void *mqd,
+static int kgd_hiq_mqd_load(struct amdgpu_device *adev, void *mqd,
uint32_t pipe_id, uint32_t queue_id,
- uint32_t doorbell_off)
+ uint32_t doorbell_off, uint32_t inst)
{
- struct amdgpu_device *adev = get_amdgpu_device(kgd);
- struct amdgpu_ring *kiq_ring = &adev->gfx.kiq.ring;
+ struct amdgpu_ring *kiq_ring = &adev->gfx.kiq[0].ring;
struct v10_compute_mqd *m;
uint32_t mec, pipe;
int r;
m = get_mqd(mqd);
- acquire_queue(kgd, pipe_id, queue_id);
+ acquire_queue(adev, pipe_id, queue_id);
mec = (pipe_id / adev->gfx.mec.num_pipe_per_mec) + 1;
pipe = (pipe_id % adev->gfx.mec.num_pipe_per_mec);
@@ -321,7 +306,7 @@ static int kgd_hiq_mqd_load(struct kgd_dev *kgd, void *mqd,
pr_debug("kfd: set HIQ, mec:%d, pipe:%d, queue:%d.\n",
mec, pipe, queue_id);
- spin_lock(&adev->gfx.kiq.ring_lock);
+ spin_lock(&adev->gfx.kiq[0].ring_lock);
r = amdgpu_ring_alloc(kiq_ring, 7);
if (r) {
pr_err("Failed to alloc KIQ (%d).\n", r);
@@ -348,37 +333,36 @@ static int kgd_hiq_mqd_load(struct kgd_dev *kgd, void *mqd,
amdgpu_ring_commit(kiq_ring);
out_unlock:
- spin_unlock(&adev->gfx.kiq.ring_lock);
- release_queue(kgd);
+ spin_unlock(&adev->gfx.kiq[0].ring_lock);
+ release_queue(adev);
return r;
}
-static int kgd_hqd_dump(struct kgd_dev *kgd,
+static int kgd_hqd_dump(struct amdgpu_device *adev,
uint32_t pipe_id, uint32_t queue_id,
- uint32_t (**dump)[2], uint32_t *n_regs)
+ uint32_t (**dump)[2], uint32_t *n_regs, uint32_t inst)
{
- struct amdgpu_device *adev = get_amdgpu_device(kgd);
uint32_t i = 0, reg;
#define HQD_N_REGS 56
#define DUMP_REG(addr) do { \
if (WARN_ON_ONCE(i >= HQD_N_REGS)) \
break; \
(*dump)[i][0] = (addr) << 2; \
- (*dump)[i++][1] = RREG32(addr); \
+ (*dump)[i++][1] = RREG32_SOC15_IP(GC, addr); \
} while (0)
- *dump = kmalloc(HQD_N_REGS*2*sizeof(uint32_t), GFP_KERNEL);
+ *dump = kmalloc_array(HQD_N_REGS, sizeof(**dump), GFP_KERNEL);
if (*dump == NULL)
return -ENOMEM;
- acquire_queue(kgd, pipe_id, queue_id);
+ acquire_queue(adev, pipe_id, queue_id);
for (reg = SOC15_REG_OFFSET(GC, 0, mmCP_MQD_BASE_ADDR);
reg <= SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_WPTR_HI); reg++)
DUMP_REG(reg);
- release_queue(kgd);
+ release_queue(adev);
WARN_ON_ONCE(i != HQD_N_REGS);
*n_regs = i;
@@ -386,10 +370,9 @@ static int kgd_hqd_dump(struct kgd_dev *kgd,
return 0;
}
-static int kgd_hqd_sdma_load(struct kgd_dev *kgd, void *mqd,
+static int kgd_hqd_sdma_load(struct amdgpu_device *adev, void *mqd,
uint32_t __user *wptr, struct mm_struct *mm)
{
- struct amdgpu_device *adev = get_amdgpu_device(kgd);
struct v10_sdma_mqd *m;
uint32_t sdma_rlc_reg_offset;
unsigned long end_jiffies;
@@ -456,18 +439,17 @@ static int kgd_hqd_sdma_load(struct kgd_dev *kgd, void *mqd,
return 0;
}
-static int kgd_hqd_sdma_dump(struct kgd_dev *kgd,
+static int kgd_hqd_sdma_dump(struct amdgpu_device *adev,
uint32_t engine_id, uint32_t queue_id,
uint32_t (**dump)[2], uint32_t *n_regs)
{
- struct amdgpu_device *adev = get_amdgpu_device(kgd);
uint32_t sdma_rlc_reg_offset = get_sdma_rlc_reg_offset(adev,
engine_id, queue_id);
uint32_t i = 0, reg;
#undef HQD_N_REGS
#define HQD_N_REGS (19+6+7+10)
- *dump = kmalloc(HQD_N_REGS*2*sizeof(uint32_t), GFP_KERNEL);
+ *dump = kmalloc_array(HQD_N_REGS, sizeof(**dump), GFP_KERNEL);
if (*dump == NULL)
return -ENOMEM;
@@ -488,31 +470,30 @@ static int kgd_hqd_sdma_dump(struct kgd_dev *kgd,
return 0;
}
-static bool kgd_hqd_is_occupied(struct kgd_dev *kgd, uint64_t queue_address,
- uint32_t pipe_id, uint32_t queue_id)
+static bool kgd_hqd_is_occupied(struct amdgpu_device *adev,
+ uint64_t queue_address, uint32_t pipe_id,
+ uint32_t queue_id, uint32_t inst)
{
- struct amdgpu_device *adev = get_amdgpu_device(kgd);
uint32_t act;
bool retval = false;
uint32_t low, high;
- acquire_queue(kgd, pipe_id, queue_id);
- act = RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_ACTIVE));
+ acquire_queue(adev, pipe_id, queue_id);
+ act = RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE);
if (act) {
low = lower_32_bits(queue_address >> 8);
high = upper_32_bits(queue_address >> 8);
- if (low == RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_BASE)) &&
- high == RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_BASE_HI)))
+ if (low == RREG32_SOC15(GC, 0, mmCP_HQD_PQ_BASE) &&
+ high == RREG32_SOC15(GC, 0, mmCP_HQD_PQ_BASE_HI))
retval = true;
}
- release_queue(kgd);
+ release_queue(adev);
return retval;
}
-static bool kgd_hqd_sdma_is_occupied(struct kgd_dev *kgd, void *mqd)
+static bool kgd_hqd_sdma_is_occupied(struct amdgpu_device *adev, void *mqd)
{
- struct amdgpu_device *adev = get_amdgpu_device(kgd);
struct v10_sdma_mqd *m;
uint32_t sdma_rlc_reg_offset;
uint32_t sdma_rlc_rb_cntl;
@@ -529,12 +510,11 @@ static bool kgd_hqd_sdma_is_occupied(struct kgd_dev *kgd, void *mqd)
return false;
}
-static int kgd_hqd_destroy(struct kgd_dev *kgd, void *mqd,
+static int kgd_hqd_destroy(struct amdgpu_device *adev, void *mqd,
enum kfd_preempt_type reset_type,
unsigned int utimeout, uint32_t pipe_id,
- uint32_t queue_id)
+ uint32_t queue_id, uint32_t inst)
{
- struct amdgpu_device *adev = get_amdgpu_device(kgd);
enum hqd_dequeue_request_type type;
unsigned long end_jiffies;
uint32_t temp;
@@ -548,7 +528,7 @@ static int kgd_hqd_destroy(struct kgd_dev *kgd, void *mqd,
int retry;
#endif
- acquire_queue(kgd, pipe_id, queue_id);
+ acquire_queue(adev, pipe_id, queue_id);
if (m->cp_hqd_vmid == 0)
WREG32_FIELD15(GC, 0, RLC_CP_SCHEDULERS, scheduler1, 0);
@@ -560,6 +540,9 @@ static int kgd_hqd_destroy(struct kgd_dev *kgd, void *mqd,
case KFD_PREEMPT_TYPE_WAVEFRONT_RESET:
type = RESET_WAVES;
break;
+ case KFD_PREEMPT_TYPE_WAVEFRONT_SAVE:
+ type = SAVE_WAVES;
+ break;
default:
type = DRAIN_PIPE;
break;
@@ -621,29 +604,28 @@ loop:
preempt_enable();
#endif
- WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_DEQUEUE_REQUEST), type);
+ WREG32_SOC15(GC, 0, mmCP_HQD_DEQUEUE_REQUEST, type);
end_jiffies = (utimeout * HZ / 1000) + jiffies;
while (true) {
- temp = RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_ACTIVE));
+ temp = RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE);
if (!(temp & CP_HQD_ACTIVE__ACTIVE_MASK))
break;
if (time_after(jiffies, end_jiffies)) {
pr_err("cp queue preemption time out.\n");
- release_queue(kgd);
+ release_queue(adev);
return -ETIME;
}
usleep_range(500, 1000);
}
- release_queue(kgd);
+ release_queue(adev);
return 0;
}
-static int kgd_hqd_sdma_destroy(struct kgd_dev *kgd, void *mqd,
+static int kgd_hqd_sdma_destroy(struct amdgpu_device *adev, void *mqd,
unsigned int utimeout)
{
- struct amdgpu_device *adev = get_amdgpu_device(kgd);
struct v10_sdma_mqd *m;
uint32_t sdma_rlc_reg_offset;
uint32_t temp;
@@ -680,11 +662,10 @@ static int kgd_hqd_sdma_destroy(struct kgd_dev *kgd, void *mqd,
return 0;
}
-static bool get_atc_vmid_pasid_mapping_info(struct kgd_dev *kgd,
+static bool get_atc_vmid_pasid_mapping_info(struct amdgpu_device *adev,
uint8_t vmid, uint16_t *p_pasid)
{
uint32_t value;
- struct amdgpu_device *adev = (struct amdgpu_device *) kgd;
value = RREG32(SOC15_REG_OFFSET(ATHUB, 0, mmATC_VMID0_PASID_MAPPING)
+ vmid);
@@ -693,31 +674,16 @@ static bool get_atc_vmid_pasid_mapping_info(struct kgd_dev *kgd,
return !!(value & ATC_VMID0_PASID_MAPPING__VALID_MASK);
}
-static int kgd_address_watch_disable(struct kgd_dev *kgd)
-{
- return 0;
-}
-
-static int kgd_address_watch_execute(struct kgd_dev *kgd,
- unsigned int watch_point_id,
- uint32_t cntl_val,
- uint32_t addr_hi,
- uint32_t addr_lo)
-{
- return 0;
-}
-
-static int kgd_wave_control_execute(struct kgd_dev *kgd,
+static int kgd_wave_control_execute(struct amdgpu_device *adev,
uint32_t gfx_index_val,
- uint32_t sq_cmd)
+ uint32_t sq_cmd, uint32_t inst)
{
- struct amdgpu_device *adev = get_amdgpu_device(kgd);
uint32_t data = 0;
mutex_lock(&adev->grbm_idx_mutex);
- WREG32(SOC15_REG_OFFSET(GC, 0, mmGRBM_GFX_INDEX), gfx_index_val);
- WREG32(SOC15_REG_OFFSET(GC, 0, mmSQ_CMD), sq_cmd);
+ WREG32_SOC15(GC, 0, mmGRBM_GFX_INDEX, gfx_index_val);
+ WREG32_SOC15(GC, 0, mmSQ_CMD, sq_cmd);
data = REG_SET_FIELD(data, GRBM_GFX_INDEX,
INSTANCE_BROADCAST_WRITES, 1);
@@ -726,24 +692,15 @@ static int kgd_wave_control_execute(struct kgd_dev *kgd,
data = REG_SET_FIELD(data, GRBM_GFX_INDEX,
SE_BROADCAST_WRITES, 1);
- WREG32(SOC15_REG_OFFSET(GC, 0, mmGRBM_GFX_INDEX), data);
+ WREG32_SOC15(GC, 0, mmGRBM_GFX_INDEX, data);
mutex_unlock(&adev->grbm_idx_mutex);
return 0;
}
-static uint32_t kgd_address_watch_get_offset(struct kgd_dev *kgd,
- unsigned int watch_point_id,
- unsigned int reg_offset)
+static void set_vm_context_page_table_base(struct amdgpu_device *adev,
+ uint32_t vmid, uint64_t page_table_base)
{
- return 0;
-}
-
-static void set_vm_context_page_table_base(struct kgd_dev *kgd, uint32_t vmid,
- uint64_t page_table_base)
-{
- struct amdgpu_device *adev = get_amdgpu_device(kgd);
-
if (!amdgpu_amdkfd_is_kfd_vmid(adev, vmid)) {
pr_err("trying to set page table base for wrong VMID %u\n",
vmid);
@@ -754,6 +711,385 @@ static void set_vm_context_page_table_base(struct kgd_dev *kgd, uint32_t vmid,
adev->gfxhub.funcs->setup_vm_pt_regs(adev, vmid, page_table_base);
}
+/*
+ * GFX10 helper for wave launch stall requirements on debug trap setting.
+ *
+ * vmid:
+ * Target VMID to stall/unstall.
+ *
+ * stall:
+ * 0-unstall wave launch (enable), 1-stall wave launch (disable).
+ * After wavefront launch has been stalled, allocated waves must drain from
+ * SPI in order for debug trap settings to take effect on those waves.
+ * This is roughly a ~3500 clock cycle wait on SPI where a read on
+ * SPI_GDBG_WAVE_CNTL translates to ~32 clock cycles.
+ * KGD_GFX_V10_WAVE_LAUNCH_SPI_DRAIN_LATENCY indicates the number of reads required.
+ *
+ * NOTE: We can afford to clear the entire STALL_VMID field on unstall
+ * because current GFX10 chips cannot support multi-process debugging due to
+ * trap configuration and masking being limited to global scope. Always
+ * assume single process conditions.
+ *
+ */
+
+#define KGD_GFX_V10_WAVE_LAUNCH_SPI_DRAIN_LATENCY 110
+static void kgd_gfx_v10_set_wave_launch_stall(struct amdgpu_device *adev, uint32_t vmid, bool stall)
+{
+ uint32_t data = RREG32(SOC15_REG_OFFSET(GC, 0, mmSPI_GDBG_WAVE_CNTL));
+ int i;
+
+ data = REG_SET_FIELD(data, SPI_GDBG_WAVE_CNTL, STALL_VMID,
+ stall ? 1 << vmid : 0);
+
+ WREG32(SOC15_REG_OFFSET(GC, 0, mmSPI_GDBG_WAVE_CNTL), data);
+
+ if (!stall)
+ return;
+
+ for (i = 0; i < KGD_GFX_V10_WAVE_LAUNCH_SPI_DRAIN_LATENCY; i++)
+ RREG32(SOC15_REG_OFFSET(GC, 0, mmSPI_GDBG_WAVE_CNTL));
+}
+
+uint32_t kgd_gfx_v10_enable_debug_trap(struct amdgpu_device *adev,
+ bool restore_dbg_registers,
+ uint32_t vmid)
+{
+
+ mutex_lock(&adev->grbm_idx_mutex);
+
+ kgd_gfx_v10_set_wave_launch_stall(adev, vmid, true);
+
+ /* assume gfx off is disabled for the debug session if rlc restore not supported. */
+ if (restore_dbg_registers) {
+ uint32_t data = 0;
+
+ data = REG_SET_FIELD(data, SPI_GDBG_TRAP_CONFIG,
+ VMID_SEL, 1 << vmid);
+ data = REG_SET_FIELD(data, SPI_GDBG_TRAP_CONFIG,
+ TRAP_EN, 1);
+ WREG32(SOC15_REG_OFFSET(GC, 0, mmSPI_GDBG_TRAP_CONFIG), data);
+ WREG32(SOC15_REG_OFFSET(GC, 0, mmSPI_GDBG_TRAP_DATA0), 0);
+ WREG32(SOC15_REG_OFFSET(GC, 0, mmSPI_GDBG_TRAP_DATA1), 0);
+
+ kgd_gfx_v10_set_wave_launch_stall(adev, vmid, false);
+
+ mutex_unlock(&adev->grbm_idx_mutex);
+
+ return 0;
+ }
+
+ WREG32(SOC15_REG_OFFSET(GC, 0, mmSPI_GDBG_TRAP_MASK), 0);
+
+ kgd_gfx_v10_set_wave_launch_stall(adev, vmid, false);
+
+ mutex_unlock(&adev->grbm_idx_mutex);
+
+ return 0;
+}
+
+uint32_t kgd_gfx_v10_disable_debug_trap(struct amdgpu_device *adev,
+ bool keep_trap_enabled,
+ uint32_t vmid)
+{
+ mutex_lock(&adev->grbm_idx_mutex);
+
+ kgd_gfx_v10_set_wave_launch_stall(adev, vmid, true);
+
+ WREG32(SOC15_REG_OFFSET(GC, 0, mmSPI_GDBG_TRAP_MASK), 0);
+
+ kgd_gfx_v10_set_wave_launch_stall(adev, vmid, false);
+
+ mutex_unlock(&adev->grbm_idx_mutex);
+
+ return 0;
+}
+
+int kgd_gfx_v10_validate_trap_override_request(struct amdgpu_device *adev,
+ uint32_t trap_override,
+ uint32_t *trap_mask_supported)
+{
+ *trap_mask_supported &= KFD_DBG_TRAP_MASK_DBG_ADDRESS_WATCH;
+
+ /* The SPI_GDBG_TRAP_MASK register is global and affects all
+ * processes. Only allow OR-ing the address-watch bit, since
+ * this only affects processes under the debugger. Other bits
+ * should stay 0 to avoid the debugger interfering with other
+ * processes.
+ */
+ if (trap_override != KFD_DBG_TRAP_OVERRIDE_OR)
+ return -EINVAL;
+
+ return 0;
+}
+
+uint32_t kgd_gfx_v10_set_wave_launch_trap_override(struct amdgpu_device *adev,
+ uint32_t vmid,
+ uint32_t trap_override,
+ uint32_t trap_mask_bits,
+ uint32_t trap_mask_request,
+ uint32_t *trap_mask_prev,
+ uint32_t kfd_dbg_trap_cntl_prev)
+{
+ uint32_t data, wave_cntl_prev;
+
+ mutex_lock(&adev->grbm_idx_mutex);
+
+ wave_cntl_prev = RREG32(SOC15_REG_OFFSET(GC, 0, mmSPI_GDBG_WAVE_CNTL));
+
+ kgd_gfx_v10_set_wave_launch_stall(adev, vmid, true);
+
+ data = RREG32(SOC15_REG_OFFSET(GC, 0, mmSPI_GDBG_TRAP_MASK));
+ *trap_mask_prev = REG_GET_FIELD(data, SPI_GDBG_TRAP_MASK, EXCP_EN);
+
+ trap_mask_bits = (trap_mask_bits & trap_mask_request) |
+ (*trap_mask_prev & ~trap_mask_request);
+
+ data = REG_SET_FIELD(data, SPI_GDBG_TRAP_MASK, EXCP_EN, trap_mask_bits);
+ data = REG_SET_FIELD(data, SPI_GDBG_TRAP_MASK, REPLACE, trap_override);
+ WREG32(SOC15_REG_OFFSET(GC, 0, mmSPI_GDBG_TRAP_MASK), data);
+
+ /* We need to preserve wave launch mode stall settings. */
+ WREG32(SOC15_REG_OFFSET(GC, 0, mmSPI_GDBG_WAVE_CNTL), wave_cntl_prev);
+
+ mutex_unlock(&adev->grbm_idx_mutex);
+
+ return 0;
+}
+
+uint32_t kgd_gfx_v10_set_wave_launch_mode(struct amdgpu_device *adev,
+ uint8_t wave_launch_mode,
+ uint32_t vmid)
+{
+ uint32_t data = 0;
+ bool is_mode_set = !!wave_launch_mode;
+
+ mutex_lock(&adev->grbm_idx_mutex);
+
+ kgd_gfx_v10_set_wave_launch_stall(adev, vmid, true);
+
+ data = REG_SET_FIELD(data, SPI_GDBG_WAVE_CNTL2,
+ VMID_MASK, is_mode_set ? 1 << vmid : 0);
+ data = REG_SET_FIELD(data, SPI_GDBG_WAVE_CNTL2,
+ MODE, is_mode_set ? wave_launch_mode : 0);
+ WREG32(SOC15_REG_OFFSET(GC, 0, mmSPI_GDBG_WAVE_CNTL2), data);
+
+ kgd_gfx_v10_set_wave_launch_stall(adev, vmid, false);
+
+ mutex_unlock(&adev->grbm_idx_mutex);
+
+ return 0;
+}
+
+#define TCP_WATCH_STRIDE (mmTCP_WATCH1_ADDR_H - mmTCP_WATCH0_ADDR_H)
+#define SQ_WATCH_STRIDE (mmSQ_WATCH1_ADDR_H - mmSQ_WATCH0_ADDR_H)
+uint32_t kgd_gfx_v10_set_address_watch(struct amdgpu_device *adev,
+ uint64_t watch_address,
+ uint32_t watch_address_mask,
+ uint32_t watch_id,
+ uint32_t watch_mode,
+ uint32_t debug_vmid,
+ uint32_t inst)
+{
+ /* SQ_WATCH?_ADDR_* and TCP_WATCH?_ADDR_* are programmed with the
+ * same values.
+ */
+ uint32_t watch_address_high;
+ uint32_t watch_address_low;
+ uint32_t tcp_watch_address_cntl;
+ uint32_t sq_watch_address_cntl;
+
+ watch_address_low = lower_32_bits(watch_address);
+ watch_address_high = upper_32_bits(watch_address) & 0xffff;
+
+ tcp_watch_address_cntl = 0;
+ tcp_watch_address_cntl = REG_SET_FIELD(tcp_watch_address_cntl,
+ TCP_WATCH0_CNTL,
+ VMID,
+ debug_vmid);
+ tcp_watch_address_cntl = REG_SET_FIELD(tcp_watch_address_cntl,
+ TCP_WATCH0_CNTL,
+ MODE,
+ watch_mode);
+ tcp_watch_address_cntl = REG_SET_FIELD(tcp_watch_address_cntl,
+ TCP_WATCH0_CNTL,
+ MASK,
+ watch_address_mask >> 7);
+
+ sq_watch_address_cntl = 0;
+ sq_watch_address_cntl = REG_SET_FIELD(sq_watch_address_cntl,
+ SQ_WATCH0_CNTL,
+ VMID,
+ debug_vmid);
+ sq_watch_address_cntl = REG_SET_FIELD(sq_watch_address_cntl,
+ SQ_WATCH0_CNTL,
+ MODE,
+ watch_mode);
+ sq_watch_address_cntl = REG_SET_FIELD(sq_watch_address_cntl,
+ SQ_WATCH0_CNTL,
+ MASK,
+ watch_address_mask >> 6);
+
+ /* Turning off this watch point until we set all the registers */
+ tcp_watch_address_cntl = REG_SET_FIELD(tcp_watch_address_cntl,
+ TCP_WATCH0_CNTL,
+ VALID,
+ 0);
+ WREG32((SOC15_REG_OFFSET(GC, 0, mmTCP_WATCH0_CNTL) +
+ (watch_id * TCP_WATCH_STRIDE)),
+ tcp_watch_address_cntl);
+
+ sq_watch_address_cntl = REG_SET_FIELD(sq_watch_address_cntl,
+ SQ_WATCH0_CNTL,
+ VALID,
+ 0);
+ WREG32((SOC15_REG_OFFSET(GC, 0, mmSQ_WATCH0_CNTL) +
+ (watch_id * SQ_WATCH_STRIDE)),
+ sq_watch_address_cntl);
+
+ /* Program {TCP,SQ}_WATCH?_ADDR* */
+ WREG32((SOC15_REG_OFFSET(GC, 0, mmTCP_WATCH0_ADDR_H) +
+ (watch_id * TCP_WATCH_STRIDE)),
+ watch_address_high);
+ WREG32((SOC15_REG_OFFSET(GC, 0, mmTCP_WATCH0_ADDR_L) +
+ (watch_id * TCP_WATCH_STRIDE)),
+ watch_address_low);
+
+ WREG32((SOC15_REG_OFFSET(GC, 0, mmSQ_WATCH0_ADDR_H) +
+ (watch_id * SQ_WATCH_STRIDE)),
+ watch_address_high);
+ WREG32((SOC15_REG_OFFSET(GC, 0, mmSQ_WATCH0_ADDR_L) +
+ (watch_id * SQ_WATCH_STRIDE)),
+ watch_address_low);
+
+ /* Enable the watch point */
+ tcp_watch_address_cntl = REG_SET_FIELD(tcp_watch_address_cntl,
+ TCP_WATCH0_CNTL,
+ VALID,
+ 1);
+ WREG32((SOC15_REG_OFFSET(GC, 0, mmTCP_WATCH0_CNTL) +
+ (watch_id * TCP_WATCH_STRIDE)),
+ tcp_watch_address_cntl);
+
+ sq_watch_address_cntl = REG_SET_FIELD(sq_watch_address_cntl,
+ SQ_WATCH0_CNTL,
+ VALID,
+ 1);
+ WREG32((SOC15_REG_OFFSET(GC, 0, mmSQ_WATCH0_CNTL) +
+ (watch_id * SQ_WATCH_STRIDE)),
+ sq_watch_address_cntl);
+
+ return 0;
+}
+
+uint32_t kgd_gfx_v10_clear_address_watch(struct amdgpu_device *adev,
+ uint32_t watch_id)
+{
+ uint32_t watch_address_cntl;
+
+ watch_address_cntl = 0;
+
+ WREG32((SOC15_REG_OFFSET(GC, 0, mmTCP_WATCH0_CNTL) +
+ (watch_id * TCP_WATCH_STRIDE)),
+ watch_address_cntl);
+
+ WREG32((SOC15_REG_OFFSET(GC, 0, mmSQ_WATCH0_CNTL) +
+ (watch_id * SQ_WATCH_STRIDE)),
+ watch_address_cntl);
+
+ return 0;
+}
+#undef TCP_WATCH_STRIDE
+#undef SQ_WATCH_STRIDE
+
+
+/* kgd_gfx_v10_get_iq_wait_times: Returns the mmCP_IQ_WAIT_TIME1/2 values
+ * The values read are:
+ * ib_offload_wait_time -- Wait Count for Indirect Buffer Offloads.
+ * atomic_offload_wait_time -- Wait Count for L2 and GDS Atomics Offloads.
+ * wrm_offload_wait_time -- Wait Count for WAIT_REG_MEM Offloads.
+ * gws_wait_time -- Wait Count for Global Wave Syncs.
+ * que_sleep_wait_time -- Wait Count for Dequeue Retry.
+ * sch_wave_wait_time -- Wait Count for Scheduling Wave Message.
+ * sem_rearm_wait_time -- Wait Count for Semaphore re-arm.
+ * deq_retry_wait_time -- Wait Count for Global Wave Syncs.
+ */
+void kgd_gfx_v10_get_iq_wait_times(struct amdgpu_device *adev,
+ uint32_t *wait_times,
+ uint32_t inst)
+
+{
+ *wait_times = RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_IQ_WAIT_TIME2));
+}
+
+void kgd_gfx_v10_build_dequeue_wait_counts_packet_info(struct amdgpu_device *adev,
+ uint32_t wait_times,
+ uint32_t sch_wave,
+ uint32_t que_sleep,
+ uint32_t *reg_offset,
+ uint32_t *reg_data)
+{
+ *reg_data = wait_times;
+
+ if (sch_wave)
+ *reg_data = REG_SET_FIELD(*reg_data,
+ CP_IQ_WAIT_TIME2,
+ SCH_WAVE,
+ sch_wave);
+ if (que_sleep)
+ *reg_data = REG_SET_FIELD(*reg_data,
+ CP_IQ_WAIT_TIME2,
+ QUE_SLEEP,
+ que_sleep);
+
+ *reg_offset = SOC15_REG_OFFSET(GC, 0, mmCP_IQ_WAIT_TIME2);
+}
+
+static void program_trap_handler_settings(struct amdgpu_device *adev,
+ uint32_t vmid, uint64_t tba_addr, uint64_t tma_addr,
+ uint32_t inst)
+{
+ lock_srbm(adev, 0, 0, 0, vmid);
+
+ /*
+ * Program TBA registers
+ */
+ WREG32(SOC15_REG_OFFSET(GC, 0, mmSQ_SHADER_TBA_LO),
+ lower_32_bits(tba_addr >> 8));
+ WREG32(SOC15_REG_OFFSET(GC, 0, mmSQ_SHADER_TBA_HI),
+ upper_32_bits(tba_addr >> 8) |
+ (1 << SQ_SHADER_TBA_HI__TRAP_EN__SHIFT));
+
+ /*
+ * Program TMA registers
+ */
+ WREG32(SOC15_REG_OFFSET(GC, 0, mmSQ_SHADER_TMA_LO),
+ lower_32_bits(tma_addr >> 8));
+ WREG32(SOC15_REG_OFFSET(GC, 0, mmSQ_SHADER_TMA_HI),
+ upper_32_bits(tma_addr >> 8));
+
+ unlock_srbm(adev);
+}
+
+uint64_t kgd_gfx_v10_hqd_get_pq_addr(struct amdgpu_device *adev,
+ uint32_t pipe_id, uint32_t queue_id,
+ uint32_t inst)
+{
+ return 0;
+}
+
+uint64_t kgd_gfx_v10_hqd_reset(struct amdgpu_device *adev,
+ uint32_t pipe_id, uint32_t queue_id,
+ uint32_t inst, unsigned int utimeout)
+{
+ return 0;
+}
+
+uint32_t kgd_gfx_v10_hqd_sdma_get_doorbell(struct amdgpu_device *adev,
+ int engine, int queue)
+{
+ return 0;
+}
+
const struct kfd2kgd_calls gfx_v10_kfd2kgd = {
.program_sh_mem_settings = kgd_program_sh_mem_settings,
.set_pasid_vmid_mapping = kgd_set_pasid_vmid_mapping,
@@ -767,11 +1103,21 @@ const struct kfd2kgd_calls gfx_v10_kfd2kgd = {
.hqd_sdma_is_occupied = kgd_hqd_sdma_is_occupied,
.hqd_destroy = kgd_hqd_destroy,
.hqd_sdma_destroy = kgd_hqd_sdma_destroy,
- .address_watch_disable = kgd_address_watch_disable,
- .address_watch_execute = kgd_address_watch_execute,
.wave_control_execute = kgd_wave_control_execute,
- .address_watch_get_offset = kgd_address_watch_get_offset,
.get_atc_vmid_pasid_mapping_info =
get_atc_vmid_pasid_mapping_info,
.set_vm_context_page_table_base = set_vm_context_page_table_base,
+ .enable_debug_trap = kgd_gfx_v10_enable_debug_trap,
+ .disable_debug_trap = kgd_gfx_v10_disable_debug_trap,
+ .validate_trap_override_request = kgd_gfx_v10_validate_trap_override_request,
+ .set_wave_launch_trap_override = kgd_gfx_v10_set_wave_launch_trap_override,
+ .set_wave_launch_mode = kgd_gfx_v10_set_wave_launch_mode,
+ .set_address_watch = kgd_gfx_v10_set_address_watch,
+ .clear_address_watch = kgd_gfx_v10_clear_address_watch,
+ .get_iq_wait_times = kgd_gfx_v10_get_iq_wait_times,
+ .build_dequeue_wait_counts_packet_info = kgd_gfx_v10_build_dequeue_wait_counts_packet_info,
+ .program_trap_handler_settings = program_trap_handler_settings,
+ .hqd_get_pq_addr = kgd_gfx_v10_hqd_get_pq_addr,
+ .hqd_reset = kgd_gfx_v10_hqd_reset,
+ .hqd_sdma_get_doorbell = kgd_gfx_v10_hqd_sdma_get_doorbell
};
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.h
new file mode 100644
index 000000000000..a4c607c88178
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.h
@@ -0,0 +1,70 @@
+/*
+ * Copyright 2023 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+uint32_t kgd_gfx_v10_enable_debug_trap(struct amdgpu_device *adev,
+ bool restore_dbg_registers,
+ uint32_t vmid);
+uint32_t kgd_gfx_v10_disable_debug_trap(struct amdgpu_device *adev,
+ bool keep_trap_enabled,
+ uint32_t vmid);
+int kgd_gfx_v10_validate_trap_override_request(struct amdgpu_device *adev,
+ uint32_t trap_override,
+ uint32_t *trap_mask_supported);
+uint32_t kgd_gfx_v10_set_wave_launch_trap_override(struct amdgpu_device *adev,
+ uint32_t vmid,
+ uint32_t trap_override,
+ uint32_t trap_mask_bits,
+ uint32_t trap_mask_request,
+ uint32_t *trap_mask_prev,
+ uint32_t kfd_dbg_trap_cntl_prev);
+uint32_t kgd_gfx_v10_set_wave_launch_mode(struct amdgpu_device *adev,
+ uint8_t wave_launch_mode,
+ uint32_t vmid);
+uint32_t kgd_gfx_v10_set_address_watch(struct amdgpu_device *adev,
+ uint64_t watch_address,
+ uint32_t watch_address_mask,
+ uint32_t watch_id,
+ uint32_t watch_mode,
+ uint32_t debug_vmid,
+ uint32_t inst);
+uint32_t kgd_gfx_v10_clear_address_watch(struct amdgpu_device *adev,
+ uint32_t watch_id);
+void kgd_gfx_v10_get_iq_wait_times(struct amdgpu_device *adev,
+ uint32_t *wait_times,
+ uint32_t inst);
+void kgd_gfx_v10_build_dequeue_wait_counts_packet_info(struct amdgpu_device *adev,
+ uint32_t wait_times,
+ uint32_t sch_wave,
+ uint32_t que_sleep,
+ uint32_t *reg_offset,
+ uint32_t *reg_data);
+uint64_t kgd_gfx_v10_hqd_get_pq_addr(struct amdgpu_device *adev,
+ uint32_t pipe_id,
+ uint32_t queue_id,
+ uint32_t inst);
+uint64_t kgd_gfx_v10_hqd_reset(struct amdgpu_device *adev,
+ uint32_t pipe_id,
+ uint32_t queue_id,
+ uint32_t inst,
+ unsigned int utimeout);
+uint32_t kgd_gfx_v10_hqd_sdma_get_doorbell(struct amdgpu_device *adev,
+ int engine, int queue);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10_3.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10_3.c
index fad3b91f74f5..f2278a0937ff 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10_3.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10_3.c
@@ -22,10 +22,13 @@
#include <linux/mmu_context.h>
#include "amdgpu.h"
#include "amdgpu_amdkfd.h"
+#include "amdgpu_amdkfd_gfx_v10.h"
#include "gc/gc_10_3_0_offset.h"
#include "gc/gc_10_3_0_sh_mask.h"
#include "oss/osssys_5_0_0_offset.h"
#include "oss/osssys_5_0_0_sh_mask.h"
+#include "athub/athub_2_1_0_offset.h"
+#include "athub/athub_2_1_0_sh_mask.h"
#include "soc15_common.h"
#include "v10_structs.h"
#include "nv.h"
@@ -38,37 +41,26 @@ enum hqd_dequeue_request_type {
SAVE_WAVES
};
-static inline struct amdgpu_device *get_amdgpu_device(struct kgd_dev *kgd)
-{
- return (struct amdgpu_device *)kgd;
-}
-
-static void lock_srbm(struct kgd_dev *kgd, uint32_t mec, uint32_t pipe,
+static void lock_srbm(struct amdgpu_device *adev, uint32_t mec, uint32_t pipe,
uint32_t queue, uint32_t vmid)
{
- struct amdgpu_device *adev = get_amdgpu_device(kgd);
-
mutex_lock(&adev->srbm_mutex);
nv_grbm_select(adev, mec, pipe, queue, vmid);
}
-static void unlock_srbm(struct kgd_dev *kgd)
+static void unlock_srbm(struct amdgpu_device *adev)
{
- struct amdgpu_device *adev = get_amdgpu_device(kgd);
-
nv_grbm_select(adev, 0, 0, 0, 0);
mutex_unlock(&adev->srbm_mutex);
}
-static void acquire_queue(struct kgd_dev *kgd, uint32_t pipe_id,
+static void acquire_queue(struct amdgpu_device *adev, uint32_t pipe_id,
uint32_t queue_id)
{
- struct amdgpu_device *adev = get_amdgpu_device(kgd);
-
uint32_t mec = (pipe_id / adev->gfx.mec.num_pipe_per_mec) + 1;
uint32_t pipe = (pipe_id % adev->gfx.mec.num_pipe_per_mec);
- lock_srbm(kgd, mec, pipe, queue_id, 0);
+ lock_srbm(adev, mec, pipe, queue_id, 0);
}
static uint64_t get_queue_mask(struct amdgpu_device *adev,
@@ -80,34 +72,30 @@ static uint64_t get_queue_mask(struct amdgpu_device *adev,
return 1ull << bit;
}
-static void release_queue(struct kgd_dev *kgd)
+static void release_queue(struct amdgpu_device *adev)
{
- unlock_srbm(kgd);
+ unlock_srbm(adev);
}
-static void program_sh_mem_settings_v10_3(struct kgd_dev *kgd, uint32_t vmid,
+static void program_sh_mem_settings_v10_3(struct amdgpu_device *adev, uint32_t vmid,
uint32_t sh_mem_config,
uint32_t sh_mem_ape1_base,
uint32_t sh_mem_ape1_limit,
- uint32_t sh_mem_bases)
+ uint32_t sh_mem_bases, uint32_t inst)
{
- struct amdgpu_device *adev = get_amdgpu_device(kgd);
+ lock_srbm(adev, 0, 0, 0, vmid);
- lock_srbm(kgd, 0, 0, 0, vmid);
-
- WREG32(SOC15_REG_OFFSET(GC, 0, mmSH_MEM_CONFIG), sh_mem_config);
- WREG32(SOC15_REG_OFFSET(GC, 0, mmSH_MEM_BASES), sh_mem_bases);
+ WREG32_SOC15(GC, 0, mmSH_MEM_CONFIG, sh_mem_config);
+ WREG32_SOC15(GC, 0, mmSH_MEM_BASES, sh_mem_bases);
/* APE1 no longer exists on GFX9 */
- unlock_srbm(kgd);
+ unlock_srbm(adev);
}
/* ATC is defeatured on Sienna_Cichlid */
-static int set_pasid_vmid_mapping_v10_3(struct kgd_dev *kgd, unsigned int pasid,
- unsigned int vmid)
+static int set_pasid_vmid_mapping_v10_3(struct amdgpu_device *adev, unsigned int pasid,
+ unsigned int vmid, uint32_t inst)
{
- struct amdgpu_device *adev = get_amdgpu_device(kgd);
-
uint32_t value = pasid << IH_VMID_0_LUT__PASID__SHIFT;
/* Mapping vmid to pasid also for IH block */
@@ -118,22 +106,22 @@ static int set_pasid_vmid_mapping_v10_3(struct kgd_dev *kgd, unsigned int pasid,
return 0;
}
-static int init_interrupts_v10_3(struct kgd_dev *kgd, uint32_t pipe_id)
+static int init_interrupts_v10_3(struct amdgpu_device *adev, uint32_t pipe_id,
+ uint32_t inst)
{
- struct amdgpu_device *adev = get_amdgpu_device(kgd);
uint32_t mec;
uint32_t pipe;
mec = (pipe_id / adev->gfx.mec.num_pipe_per_mec) + 1;
pipe = (pipe_id % adev->gfx.mec.num_pipe_per_mec);
- lock_srbm(kgd, mec, pipe, 0, 0);
+ lock_srbm(adev, mec, pipe, 0, 0);
- WREG32(SOC15_REG_OFFSET(GC, 0, mmCPC_INT_CNTL),
+ WREG32_SOC15(GC, 0, mmCPC_INT_CNTL,
CP_INT_CNTL_RING0__TIME_STAMP_INT_ENABLE_MASK |
CP_INT_CNTL_RING0__OPCODE_ERROR_INT_ENABLE_MASK);
- unlock_srbm(kgd);
+ unlock_srbm(adev);
return 0;
}
@@ -156,16 +144,16 @@ static uint32_t get_sdma_rlc_reg_offset(struct amdgpu_device *adev,
mmSDMA0_RLC0_RB_CNTL) - mmSDMA0_RLC0_RB_CNTL;
break;
case 1:
- sdma_engine_reg_base = SOC15_REG_OFFSET(SDMA1, 0,
+ sdma_engine_reg_base = SOC15_REG_OFFSET(SDMA0, 0,
mmSDMA1_RLC0_RB_CNTL) - mmSDMA0_RLC0_RB_CNTL;
break;
case 2:
- sdma_engine_reg_base = SOC15_REG_OFFSET(SDMA2, 0,
- mmSDMA2_RLC0_RB_CNTL) - mmSDMA2_RLC0_RB_CNTL;
+ sdma_engine_reg_base = SOC15_REG_OFFSET(SDMA0, 0,
+ mmSDMA2_RLC0_RB_CNTL) - mmSDMA0_RLC0_RB_CNTL;
break;
case 3:
- sdma_engine_reg_base = SOC15_REG_OFFSET(SDMA3, 0,
- mmSDMA3_RLC0_RB_CNTL) - mmSDMA2_RLC0_RB_CNTL;
+ sdma_engine_reg_base = SOC15_REG_OFFSET(SDMA0, 0,
+ mmSDMA3_RLC0_RB_CNTL) - mmSDMA0_RLC0_RB_CNTL;
break;
}
@@ -188,12 +176,11 @@ static inline struct v10_sdma_mqd *get_sdma_mqd(void *mqd)
return (struct v10_sdma_mqd *)mqd;
}
-static int hqd_load_v10_3(struct kgd_dev *kgd, void *mqd, uint32_t pipe_id,
- uint32_t queue_id, uint32_t __user *wptr,
- uint32_t wptr_shift, uint32_t wptr_mask,
- struct mm_struct *mm)
+static int hqd_load_v10_3(struct amdgpu_device *adev, void *mqd,
+ uint32_t pipe_id, uint32_t queue_id,
+ uint32_t __user *wptr, uint32_t wptr_shift,
+ uint32_t wptr_mask, struct mm_struct *mm, uint32_t inst)
{
- struct amdgpu_device *adev = get_amdgpu_device(kgd);
struct v10_compute_mqd *m;
uint32_t *mqd_hqd;
uint32_t reg, hqd_base, data;
@@ -201,7 +188,7 @@ static int hqd_load_v10_3(struct kgd_dev *kgd, void *mqd, uint32_t pipe_id,
m = get_mqd(mqd);
pr_debug("Load hqd of pipe %d queue %d\n", pipe_id, queue_id);
- acquire_queue(kgd, pipe_id, queue_id);
+ acquire_queue(adev, pipe_id, queue_id);
/* HIQ is set during driver init period with vmid set to 0*/
if (m->cp_hqd_vmid == 0) {
@@ -212,10 +199,10 @@ static int hqd_load_v10_3(struct kgd_dev *kgd, void *mqd, uint32_t pipe_id,
pr_debug("kfd: set HIQ, mec:%d, pipe:%d, queue:%d.\n",
mec, pipe, queue_id);
- value = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_CP_SCHEDULERS));
+ value = RREG32_SOC15(GC, 0, mmRLC_CP_SCHEDULERS);
value = REG_SET_FIELD(value, RLC_CP_SCHEDULERS, scheduler1,
((mec << 5) | (pipe << 3) | queue_id | 0x80));
- WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_CP_SCHEDULERS), value);
+ WREG32_SOC15(GC, 0, mmRLC_CP_SCHEDULERS, value);
}
/* HQD registers extend from CP_MQD_BASE_ADDR to CP_HQD_EOP_WPTR_MEM. */
@@ -224,13 +211,13 @@ static int hqd_load_v10_3(struct kgd_dev *kgd, void *mqd, uint32_t pipe_id,
for (reg = hqd_base;
reg <= SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_WPTR_HI); reg++)
- WREG32(reg, mqd_hqd[reg - hqd_base]);
+ WREG32_SOC15_IP(GC, reg, mqd_hqd[reg - hqd_base]);
/* Activate doorbell logic before triggering WPTR poll. */
data = REG_SET_FIELD(m->cp_hqd_pq_doorbell_control,
CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_EN, 1);
- WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL), data);
+ WREG32_SOC15(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL, data);
if (wptr) {
/* Don't read wptr with get_user because the user
@@ -259,17 +246,17 @@ static int hqd_load_v10_3(struct kgd_dev *kgd, void *mqd, uint32_t pipe_id,
guessed_wptr += m->cp_hqd_pq_wptr_lo & ~(queue_size - 1);
guessed_wptr += (uint64_t)m->cp_hqd_pq_wptr_hi << 32;
- WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_WPTR_LO),
+ WREG32_SOC15(GC, 0, mmCP_HQD_PQ_WPTR_LO,
lower_32_bits(guessed_wptr));
- WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_WPTR_HI),
+ WREG32_SOC15(GC, 0, mmCP_HQD_PQ_WPTR_HI,
upper_32_bits(guessed_wptr));
- WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR),
+ WREG32_SOC15(GC, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR,
lower_32_bits((uint64_t)wptr));
- WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR_HI),
+ WREG32_SOC15(GC, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR_HI,
upper_32_bits((uint64_t)wptr));
pr_debug("%s setting CP_PQ_WPTR_POLL_CNTL1 to %x\n", __func__,
(uint32_t)get_queue_mask(adev, pipe_id, queue_id));
- WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_PQ_WPTR_POLL_CNTL1),
+ WREG32_SOC15(GC, 0, mmCP_PQ_WPTR_POLL_CNTL1,
(uint32_t)get_queue_mask(adev, pipe_id, queue_id));
}
@@ -279,26 +266,25 @@ static int hqd_load_v10_3(struct kgd_dev *kgd, void *mqd, uint32_t pipe_id,
CP_HQD_EOP_RPTR, INIT_FETCHER, 1));
data = REG_SET_FIELD(m->cp_hqd_active, CP_HQD_ACTIVE, ACTIVE, 1);
- WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_ACTIVE), data);
+ WREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE, data);
- release_queue(kgd);
+ release_queue(adev);
return 0;
}
-static int hiq_mqd_load_v10_3(struct kgd_dev *kgd, void *mqd,
+static int hiq_mqd_load_v10_3(struct amdgpu_device *adev, void *mqd,
uint32_t pipe_id, uint32_t queue_id,
- uint32_t doorbell_off)
+ uint32_t doorbell_off, uint32_t inst)
{
- struct amdgpu_device *adev = get_amdgpu_device(kgd);
- struct amdgpu_ring *kiq_ring = &adev->gfx.kiq.ring;
+ struct amdgpu_ring *kiq_ring = &adev->gfx.kiq[0].ring;
struct v10_compute_mqd *m;
uint32_t mec, pipe;
int r;
m = get_mqd(mqd);
- acquire_queue(kgd, pipe_id, queue_id);
+ acquire_queue(adev, pipe_id, queue_id);
mec = (pipe_id / adev->gfx.mec.num_pipe_per_mec) + 1;
pipe = (pipe_id % adev->gfx.mec.num_pipe_per_mec);
@@ -306,7 +292,7 @@ static int hiq_mqd_load_v10_3(struct kgd_dev *kgd, void *mqd,
pr_debug("kfd: set HIQ, mec:%d, pipe:%d, queue:%d.\n",
mec, pipe, queue_id);
- spin_lock(&adev->gfx.kiq.ring_lock);
+ spin_lock(&adev->gfx.kiq[0].ring_lock);
r = amdgpu_ring_alloc(kiq_ring, 7);
if (r) {
pr_err("Failed to alloc KIQ (%d).\n", r);
@@ -333,37 +319,36 @@ static int hiq_mqd_load_v10_3(struct kgd_dev *kgd, void *mqd,
amdgpu_ring_commit(kiq_ring);
out_unlock:
- spin_unlock(&adev->gfx.kiq.ring_lock);
- release_queue(kgd);
+ spin_unlock(&adev->gfx.kiq[0].ring_lock);
+ release_queue(adev);
return r;
}
-static int hqd_dump_v10_3(struct kgd_dev *kgd,
+static int hqd_dump_v10_3(struct amdgpu_device *adev,
uint32_t pipe_id, uint32_t queue_id,
- uint32_t (**dump)[2], uint32_t *n_regs)
+ uint32_t (**dump)[2], uint32_t *n_regs, uint32_t inst)
{
- struct amdgpu_device *adev = get_amdgpu_device(kgd);
uint32_t i = 0, reg;
#define HQD_N_REGS 56
#define DUMP_REG(addr) do { \
if (WARN_ON_ONCE(i >= HQD_N_REGS)) \
break; \
(*dump)[i][0] = (addr) << 2; \
- (*dump)[i++][1] = RREG32(addr); \
+ (*dump)[i++][1] = RREG32_SOC15_IP(GC, addr); \
} while (0)
- *dump = kmalloc(HQD_N_REGS*2*sizeof(uint32_t), GFP_KERNEL);
+ *dump = kmalloc_array(HQD_N_REGS, sizeof(**dump), GFP_KERNEL);
if (*dump == NULL)
return -ENOMEM;
- acquire_queue(kgd, pipe_id, queue_id);
+ acquire_queue(adev, pipe_id, queue_id);
for (reg = SOC15_REG_OFFSET(GC, 0, mmCP_MQD_BASE_ADDR);
reg <= SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_WPTR_HI); reg++)
DUMP_REG(reg);
- release_queue(kgd);
+ release_queue(adev);
WARN_ON_ONCE(i != HQD_N_REGS);
*n_regs = i;
@@ -371,10 +356,9 @@ static int hqd_dump_v10_3(struct kgd_dev *kgd,
return 0;
}
-static int hqd_sdma_load_v10_3(struct kgd_dev *kgd, void *mqd,
+static int hqd_sdma_load_v10_3(struct amdgpu_device *adev, void *mqd,
uint32_t __user *wptr, struct mm_struct *mm)
{
- struct amdgpu_device *adev = get_amdgpu_device(kgd);
struct v10_sdma_mqd *m;
uint32_t sdma_rlc_reg_offset;
unsigned long end_jiffies;
@@ -441,18 +425,17 @@ static int hqd_sdma_load_v10_3(struct kgd_dev *kgd, void *mqd,
return 0;
}
-static int hqd_sdma_dump_v10_3(struct kgd_dev *kgd,
+static int hqd_sdma_dump_v10_3(struct amdgpu_device *adev,
uint32_t engine_id, uint32_t queue_id,
uint32_t (**dump)[2], uint32_t *n_regs)
{
- struct amdgpu_device *adev = get_amdgpu_device(kgd);
uint32_t sdma_rlc_reg_offset = get_sdma_rlc_reg_offset(adev,
engine_id, queue_id);
uint32_t i = 0, reg;
#undef HQD_N_REGS
-#define HQD_N_REGS (19+6+7+10)
+#define HQD_N_REGS (19+6+7+12)
- *dump = kmalloc(HQD_N_REGS*2*sizeof(uint32_t), GFP_KERNEL);
+ *dump = kmalloc_array(HQD_N_REGS, sizeof(**dump), GFP_KERNEL);
if (*dump == NULL)
return -ENOMEM;
@@ -473,31 +456,31 @@ static int hqd_sdma_dump_v10_3(struct kgd_dev *kgd,
return 0;
}
-static bool hqd_is_occupied_v10_3(struct kgd_dev *kgd, uint64_t queue_address,
- uint32_t pipe_id, uint32_t queue_id)
+static bool hqd_is_occupied_v10_3(struct amdgpu_device *adev,
+ uint64_t queue_address, uint32_t pipe_id,
+ uint32_t queue_id, uint32_t inst)
{
- struct amdgpu_device *adev = get_amdgpu_device(kgd);
uint32_t act;
bool retval = false;
uint32_t low, high;
- acquire_queue(kgd, pipe_id, queue_id);
- act = RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_ACTIVE));
+ acquire_queue(adev, pipe_id, queue_id);
+ act = RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE);
if (act) {
low = lower_32_bits(queue_address >> 8);
high = upper_32_bits(queue_address >> 8);
- if (low == RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_BASE)) &&
- high == RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_BASE_HI)))
+ if (low == RREG32_SOC15(GC, 0, mmCP_HQD_PQ_BASE) &&
+ high == RREG32_SOC15(GC, 0, mmCP_HQD_PQ_BASE_HI))
retval = true;
}
- release_queue(kgd);
+ release_queue(adev);
return retval;
}
-static bool hqd_sdma_is_occupied_v10_3(struct kgd_dev *kgd, void *mqd)
+static bool hqd_sdma_is_occupied_v10_3(struct amdgpu_device *adev,
+ void *mqd)
{
- struct amdgpu_device *adev = get_amdgpu_device(kgd);
struct v10_sdma_mqd *m;
uint32_t sdma_rlc_reg_offset;
uint32_t sdma_rlc_rb_cntl;
@@ -514,18 +497,17 @@ static bool hqd_sdma_is_occupied_v10_3(struct kgd_dev *kgd, void *mqd)
return false;
}
-static int hqd_destroy_v10_3(struct kgd_dev *kgd, void *mqd,
+static int hqd_destroy_v10_3(struct amdgpu_device *adev, void *mqd,
enum kfd_preempt_type reset_type,
unsigned int utimeout, uint32_t pipe_id,
- uint32_t queue_id)
+ uint32_t queue_id, uint32_t inst)
{
- struct amdgpu_device *adev = get_amdgpu_device(kgd);
enum hqd_dequeue_request_type type;
unsigned long end_jiffies;
uint32_t temp;
struct v10_compute_mqd *m = get_mqd(mqd);
- acquire_queue(kgd, pipe_id, queue_id);
+ acquire_queue(adev, pipe_id, queue_id);
if (m->cp_hqd_vmid == 0)
WREG32_FIELD15(GC, 0, RLC_CP_SCHEDULERS, scheduler1, 0);
@@ -537,35 +519,37 @@ static int hqd_destroy_v10_3(struct kgd_dev *kgd, void *mqd,
case KFD_PREEMPT_TYPE_WAVEFRONT_RESET:
type = RESET_WAVES;
break;
+ case KFD_PREEMPT_TYPE_WAVEFRONT_SAVE:
+ type = SAVE_WAVES;
+ break;
default:
type = DRAIN_PIPE;
break;
}
- WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_DEQUEUE_REQUEST), type);
+ WREG32_SOC15(GC, 0, mmCP_HQD_DEQUEUE_REQUEST, type);
end_jiffies = (utimeout * HZ / 1000) + jiffies;
while (true) {
- temp = RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_ACTIVE));
+ temp = RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE);
if (!(temp & CP_HQD_ACTIVE__ACTIVE_MASK))
break;
if (time_after(jiffies, end_jiffies)) {
pr_err("cp queue pipe %d queue %d preemption failed\n",
pipe_id, queue_id);
- release_queue(kgd);
+ release_queue(adev);
return -ETIME;
}
usleep_range(500, 1000);
}
- release_queue(kgd);
+ release_queue(adev);
return 0;
}
-static int hqd_sdma_destroy_v10_3(struct kgd_dev *kgd, void *mqd,
+static int hqd_sdma_destroy_v10_3(struct amdgpu_device *adev, void *mqd,
unsigned int utimeout)
{
- struct amdgpu_device *adev = get_amdgpu_device(kgd);
struct v10_sdma_mqd *m;
uint32_t sdma_rlc_reg_offset;
uint32_t temp;
@@ -602,31 +586,15 @@ static int hqd_sdma_destroy_v10_3(struct kgd_dev *kgd, void *mqd,
return 0;
}
-
-static int address_watch_disable_v10_3(struct kgd_dev *kgd)
-{
- return 0;
-}
-
-static int address_watch_execute_v10_3(struct kgd_dev *kgd,
- unsigned int watch_point_id,
- uint32_t cntl_val,
- uint32_t addr_hi,
- uint32_t addr_lo)
-{
- return 0;
-}
-
-static int wave_control_execute_v10_3(struct kgd_dev *kgd,
+static int wave_control_execute_v10_3(struct amdgpu_device *adev,
uint32_t gfx_index_val,
- uint32_t sq_cmd)
+ uint32_t sq_cmd, uint32_t inst)
{
- struct amdgpu_device *adev = get_amdgpu_device(kgd);
uint32_t data = 0;
mutex_lock(&adev->grbm_idx_mutex);
- WREG32(SOC15_REG_OFFSET(GC, 0, mmGRBM_GFX_INDEX), gfx_index_val);
+ WREG32_SOC15(GC, 0, mmGRBM_GFX_INDEX, gfx_index_val);
WREG32(SOC15_REG_OFFSET(GC, 0, mmSQ_CMD), sq_cmd);
data = REG_SET_FIELD(data, GRBM_GFX_INDEX,
@@ -636,170 +604,56 @@ static int wave_control_execute_v10_3(struct kgd_dev *kgd,
data = REG_SET_FIELD(data, GRBM_GFX_INDEX,
SE_BROADCAST_WRITES, 1);
- WREG32(SOC15_REG_OFFSET(GC, 0, mmGRBM_GFX_INDEX), data);
- mutex_unlock(&adev->grbm_idx_mutex);
-
- return 0;
-}
-
-static uint32_t address_watch_get_offset_v10_3(struct kgd_dev *kgd,
- unsigned int watch_point_id,
- unsigned int reg_offset)
-{
- return 0;
-}
-
-static void set_vm_context_page_table_base_v10_3(struct kgd_dev *kgd, uint32_t vmid,
- uint64_t page_table_base)
-{
- struct amdgpu_device *adev = get_amdgpu_device(kgd);
-
- /* SDMA is on gfxhub as well for Navi1* series */
- adev->gfxhub.funcs->setup_vm_pt_regs(adev, vmid, page_table_base);
-}
-
-#if 0
-uint32_t enable_debug_trap_v10_3(struct kgd_dev *kgd,
- uint32_t trap_debug_wave_launch_mode,
- uint32_t vmid)
-{
- struct amdgpu_device *adev = get_amdgpu_device(kgd);
- uint32_t data = 0;
- uint32_t orig_wave_cntl_value;
- uint32_t orig_stall_vmid;
-
- mutex_lock(&adev->grbm_idx_mutex);
-
- orig_wave_cntl_value = RREG32(SOC15_REG_OFFSET(GC,
- 0,
- mmSPI_GDBG_WAVE_CNTL));
- orig_stall_vmid = REG_GET_FIELD(orig_wave_cntl_value,
- SPI_GDBG_WAVE_CNTL,
- STALL_VMID);
-
- data = REG_SET_FIELD(data, SPI_GDBG_WAVE_CNTL, STALL_RA, 1);
- WREG32(SOC15_REG_OFFSET(GC, 0, mmSPI_GDBG_WAVE_CNTL), data);
-
- data = 0;
- WREG32(SOC15_REG_OFFSET(GC, 0, mmSPI_GDBG_TRAP_MASK), data);
-
- WREG32(SOC15_REG_OFFSET(GC, 0, mmSPI_GDBG_WAVE_CNTL), orig_stall_vmid);
-
+ WREG32_SOC15(GC, 0, mmGRBM_GFX_INDEX, data);
mutex_unlock(&adev->grbm_idx_mutex);
return 0;
}
-uint32_t disable_debug_trap_v10_3(struct kgd_dev *kgd)
+static bool get_atc_vmid_pasid_mapping_info_v10_3(struct amdgpu_device *adev,
+ uint8_t vmid, uint16_t *p_pasid)
{
- struct amdgpu_device *adev = get_amdgpu_device(kgd);
+ uint32_t value;
- mutex_lock(&adev->grbm_idx_mutex);
-
- WREG32(SOC15_REG_OFFSET(GC, 0, mmSPI_GDBG_TRAP_MASK), 0);
+ value = RREG32(SOC15_REG_OFFSET(ATHUB, 0, mmATC_VMID0_PASID_MAPPING)
+ + vmid);
+ *p_pasid = value & ATC_VMID0_PASID_MAPPING__PASID_MASK;
- mutex_unlock(&adev->grbm_idx_mutex);
-
- return 0;
+ return !!(value & ATC_VMID0_PASID_MAPPING__VALID_MASK);
}
-uint32_t set_wave_launch_trap_override_v10_3(struct kgd_dev *kgd,
- uint32_t trap_override,
- uint32_t trap_mask)
+static void set_vm_context_page_table_base_v10_3(struct amdgpu_device *adev,
+ uint32_t vmid, uint64_t page_table_base)
{
- struct amdgpu_device *adev = get_amdgpu_device(kgd);
- uint32_t data = 0;
-
- mutex_lock(&adev->grbm_idx_mutex);
-
- data = RREG32(SOC15_REG_OFFSET(GC, 0, mmSPI_GDBG_WAVE_CNTL));
- data = REG_SET_FIELD(data, SPI_GDBG_WAVE_CNTL, STALL_RA, 1);
- WREG32(SOC15_REG_OFFSET(GC, 0, mmSPI_GDBG_WAVE_CNTL), data);
-
- data = 0;
- data = REG_SET_FIELD(data, SPI_GDBG_TRAP_MASK,
- EXCP_EN, trap_mask);
- data = REG_SET_FIELD(data, SPI_GDBG_TRAP_MASK,
- REPLACE, trap_override);
- WREG32(SOC15_REG_OFFSET(GC, 0, mmSPI_GDBG_TRAP_MASK), data);
-
- data = RREG32(SOC15_REG_OFFSET(GC, 0, mmSPI_GDBG_WAVE_CNTL));
- data = REG_SET_FIELD(data, SPI_GDBG_WAVE_CNTL, STALL_RA, 0);
- WREG32(SOC15_REG_OFFSET(GC, 0, mmSPI_GDBG_WAVE_CNTL), data);
-
- mutex_unlock(&adev->grbm_idx_mutex);
-
- return 0;
-}
-
-uint32_t set_wave_launch_mode_v10_3(struct kgd_dev *kgd,
- uint8_t wave_launch_mode,
- uint32_t vmid)
-{
- struct amdgpu_device *adev = get_amdgpu_device(kgd);
- uint32_t data = 0;
- bool is_stall_mode;
- bool is_mode_set;
-
- is_stall_mode = (wave_launch_mode == 4);
- is_mode_set = (wave_launch_mode != 0 && wave_launch_mode != 4);
-
- mutex_lock(&adev->grbm_idx_mutex);
-
- data = REG_SET_FIELD(data, SPI_GDBG_WAVE_CNTL2,
- VMID_MASK, is_mode_set ? 1 << vmid : 0);
- data = REG_SET_FIELD(data, SPI_GDBG_WAVE_CNTL2,
- MODE, is_mode_set ? wave_launch_mode : 0);
- WREG32(SOC15_REG_OFFSET(GC, 0, mmSPI_GDBG_WAVE_CNTL2), data);
-
- data = RREG32(SOC15_REG_OFFSET(GC, 0, mmSPI_GDBG_WAVE_CNTL));
- data = REG_SET_FIELD(data, SPI_GDBG_WAVE_CNTL,
- STALL_VMID, is_stall_mode ? 1 << vmid : 0);
- data = REG_SET_FIELD(data, SPI_GDBG_WAVE_CNTL,
- STALL_RA, is_stall_mode ? 1 : 0);
- WREG32(SOC15_REG_OFFSET(GC, 0, mmSPI_GDBG_WAVE_CNTL), data);
-
- mutex_unlock(&adev->grbm_idx_mutex);
-
- return 0;
+ /* SDMA is on gfxhub as well for Navi1* series */
+ adev->gfxhub.funcs->setup_vm_pt_regs(adev, vmid, page_table_base);
}
-/* kgd_get_iq_wait_times: Returns the mmCP_IQ_WAIT_TIME1/2 values
- * The values read are:
- * ib_offload_wait_time -- Wait Count for Indirect Buffer Offloads.
- * atomic_offload_wait_time -- Wait Count for L2 and GDS Atomics Offloads.
- * wrm_offload_wait_time -- Wait Count for WAIT_REG_MEM Offloads.
- * gws_wait_time -- Wait Count for Global Wave Syncs.
- * que_sleep_wait_time -- Wait Count for Dequeue Retry.
- * sch_wave_wait_time -- Wait Count for Scheduling Wave Message.
- * sem_rearm_wait_time -- Wait Count for Semaphore re-arm.
- * deq_retry_wait_time -- Wait Count for Global Wave Syncs.
- */
-void get_iq_wait_times_v10_3(struct kgd_dev *kgd,
- uint32_t *wait_times)
-
+static void program_trap_handler_settings_v10_3(struct amdgpu_device *adev,
+ uint32_t vmid, uint64_t tba_addr, uint64_t tma_addr,
+ uint32_t inst)
{
- struct amdgpu_device *adev = get_amdgpu_device(kgd);
+ lock_srbm(adev, 0, 0, 0, vmid);
- *wait_times = RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_IQ_WAIT_TIME2));
-}
-
-void build_grace_period_packet_info_v10_3(struct kgd_dev *kgd,
- uint32_t wait_times,
- uint32_t grace_period,
- uint32_t *reg_offset,
- uint32_t *reg_data)
-{
- *reg_data = wait_times;
+ /*
+ * Program TBA registers
+ */
+ WREG32(SOC15_REG_OFFSET(GC, 0, mmSQ_SHADER_TBA_LO),
+ lower_32_bits(tba_addr >> 8));
+ WREG32(SOC15_REG_OFFSET(GC, 0, mmSQ_SHADER_TBA_HI),
+ upper_32_bits(tba_addr >> 8) |
+ (1 << SQ_SHADER_TBA_HI__TRAP_EN__SHIFT));
- *reg_data = REG_SET_FIELD(*reg_data,
- CP_IQ_WAIT_TIME2,
- SCH_WAVE,
- grace_period);
+ /*
+ * Program TMA registers
+ */
+ WREG32(SOC15_REG_OFFSET(GC, 0, mmSQ_SHADER_TMA_LO),
+ lower_32_bits(tma_addr >> 8));
+ WREG32(SOC15_REG_OFFSET(GC, 0, mmSQ_SHADER_TMA_HI),
+ upper_32_bits(tma_addr >> 8));
- *reg_offset = mmCP_IQ_WAIT_TIME2;
+ unlock_srbm(adev);
}
-#endif
const struct kfd2kgd_calls gfx_v10_3_kfd2kgd = {
.program_sh_mem_settings = program_sh_mem_settings_v10_3,
@@ -814,18 +668,20 @@ const struct kfd2kgd_calls gfx_v10_3_kfd2kgd = {
.hqd_sdma_is_occupied = hqd_sdma_is_occupied_v10_3,
.hqd_destroy = hqd_destroy_v10_3,
.hqd_sdma_destroy = hqd_sdma_destroy_v10_3,
- .address_watch_disable = address_watch_disable_v10_3,
- .address_watch_execute = address_watch_execute_v10_3,
.wave_control_execute = wave_control_execute_v10_3,
- .address_watch_get_offset = address_watch_get_offset_v10_3,
- .get_atc_vmid_pasid_mapping_info = NULL,
+ .get_atc_vmid_pasid_mapping_info = get_atc_vmid_pasid_mapping_info_v10_3,
.set_vm_context_page_table_base = set_vm_context_page_table_base_v10_3,
-#if 0
- .enable_debug_trap = enable_debug_trap_v10_3,
- .disable_debug_trap = disable_debug_trap_v10_3,
- .set_wave_launch_trap_override = set_wave_launch_trap_override_v10_3,
- .set_wave_launch_mode = set_wave_launch_mode_v10_3,
- .get_iq_wait_times = get_iq_wait_times_v10_3,
- .build_grace_period_packet_info = build_grace_period_packet_info_v10_3,
-#endif
+ .program_trap_handler_settings = program_trap_handler_settings_v10_3,
+ .get_iq_wait_times = kgd_gfx_v10_get_iq_wait_times,
+ .build_dequeue_wait_counts_packet_info = kgd_gfx_v10_build_dequeue_wait_counts_packet_info,
+ .enable_debug_trap = kgd_gfx_v10_enable_debug_trap,
+ .disable_debug_trap = kgd_gfx_v10_disable_debug_trap,
+ .validate_trap_override_request = kgd_gfx_v10_validate_trap_override_request,
+ .set_wave_launch_trap_override = kgd_gfx_v10_set_wave_launch_trap_override,
+ .set_wave_launch_mode = kgd_gfx_v10_set_wave_launch_mode,
+ .set_address_watch = kgd_gfx_v10_set_address_watch,
+ .clear_address_watch = kgd_gfx_v10_clear_address_watch,
+ .hqd_get_pq_addr = kgd_gfx_v10_hqd_get_pq_addr,
+ .hqd_reset = kgd_gfx_v10_hqd_reset,
+ .hqd_sdma_get_doorbell = kgd_gfx_v10_hqd_sdma_get_doorbell
};
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v11.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v11.c
new file mode 100644
index 000000000000..aaccf0b9947d
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v11.c
@@ -0,0 +1,835 @@
+/*
+ * Copyright 2021 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+#include <linux/mmu_context.h>
+#include "amdgpu.h"
+#include "amdgpu_amdkfd.h"
+#include "gc/gc_11_0_0_offset.h"
+#include "gc/gc_11_0_0_sh_mask.h"
+#include "oss/osssys_6_0_0_offset.h"
+#include "oss/osssys_6_0_0_sh_mask.h"
+#include "soc15_common.h"
+#include "soc15d.h"
+#include "v11_structs.h"
+#include "soc21.h"
+#include <uapi/linux/kfd_ioctl.h>
+
+enum hqd_dequeue_request_type {
+ NO_ACTION = 0,
+ DRAIN_PIPE,
+ RESET_WAVES,
+ SAVE_WAVES
+};
+
+static void lock_srbm(struct amdgpu_device *adev, uint32_t mec, uint32_t pipe,
+ uint32_t queue, uint32_t vmid)
+{
+ mutex_lock(&adev->srbm_mutex);
+ soc21_grbm_select(adev, mec, pipe, queue, vmid);
+}
+
+static void unlock_srbm(struct amdgpu_device *adev)
+{
+ soc21_grbm_select(adev, 0, 0, 0, 0);
+ mutex_unlock(&adev->srbm_mutex);
+}
+
+static void acquire_queue(struct amdgpu_device *adev, uint32_t pipe_id,
+ uint32_t queue_id)
+{
+ uint32_t mec = (pipe_id / adev->gfx.mec.num_pipe_per_mec) + 1;
+ uint32_t pipe = (pipe_id % adev->gfx.mec.num_pipe_per_mec);
+
+ lock_srbm(adev, mec, pipe, queue_id, 0);
+}
+
+static uint64_t get_queue_mask(struct amdgpu_device *adev,
+ uint32_t pipe_id, uint32_t queue_id)
+{
+ unsigned int bit = pipe_id * adev->gfx.mec.num_queue_per_pipe +
+ queue_id;
+
+ return 1ull << bit;
+}
+
+static void release_queue(struct amdgpu_device *adev)
+{
+ unlock_srbm(adev);
+}
+
+static void program_sh_mem_settings_v11(struct amdgpu_device *adev, uint32_t vmid,
+ uint32_t sh_mem_config,
+ uint32_t sh_mem_ape1_base,
+ uint32_t sh_mem_ape1_limit,
+ uint32_t sh_mem_bases, uint32_t inst)
+{
+ lock_srbm(adev, 0, 0, 0, vmid);
+
+ WREG32(SOC15_REG_OFFSET(GC, 0, regSH_MEM_CONFIG), sh_mem_config);
+ WREG32(SOC15_REG_OFFSET(GC, 0, regSH_MEM_BASES), sh_mem_bases);
+
+ unlock_srbm(adev);
+}
+
+static int set_pasid_vmid_mapping_v11(struct amdgpu_device *adev, unsigned int pasid,
+ unsigned int vmid, uint32_t inst)
+{
+ uint32_t value = pasid << IH_VMID_0_LUT__PASID__SHIFT;
+
+ /* Mapping vmid to pasid also for IH block */
+ pr_debug("mapping vmid %d -> pasid %d in IH block for GFX client\n",
+ vmid, pasid);
+ WREG32(SOC15_REG_OFFSET(OSSSYS, 0, regIH_VMID_0_LUT) + vmid, value);
+
+ return 0;
+}
+
+static int init_interrupts_v11(struct amdgpu_device *adev, uint32_t pipe_id,
+ uint32_t inst)
+{
+ uint32_t mec;
+ uint32_t pipe;
+
+ mec = (pipe_id / adev->gfx.mec.num_pipe_per_mec) + 1;
+ pipe = (pipe_id % adev->gfx.mec.num_pipe_per_mec);
+
+ lock_srbm(adev, mec, pipe, 0, 0);
+
+ WREG32_SOC15(GC, 0, regCPC_INT_CNTL,
+ CP_INT_CNTL_RING0__TIME_STAMP_INT_ENABLE_MASK |
+ CP_INT_CNTL_RING0__OPCODE_ERROR_INT_ENABLE_MASK);
+
+ unlock_srbm(adev);
+
+ return 0;
+}
+
+static uint32_t get_sdma_rlc_reg_offset(struct amdgpu_device *adev,
+ unsigned int engine_id,
+ unsigned int queue_id)
+{
+ uint32_t sdma_engine_reg_base = 0;
+ uint32_t sdma_rlc_reg_offset;
+
+ switch (engine_id) {
+ case 0:
+ sdma_engine_reg_base = SOC15_REG_OFFSET(SDMA0, 0,
+ regSDMA0_QUEUE0_RB_CNTL) - regSDMA0_QUEUE0_RB_CNTL;
+ break;
+ case 1:
+ sdma_engine_reg_base = SOC15_REG_OFFSET(SDMA1, 0,
+ regSDMA1_QUEUE0_RB_CNTL) - regSDMA0_QUEUE0_RB_CNTL;
+ break;
+ default:
+ BUG();
+ }
+
+ sdma_rlc_reg_offset = sdma_engine_reg_base
+ + queue_id * (regSDMA0_QUEUE1_RB_CNTL - regSDMA0_QUEUE0_RB_CNTL);
+
+ pr_debug("RLC register offset for SDMA%d RLC%d: 0x%x\n", engine_id,
+ queue_id, sdma_rlc_reg_offset);
+
+ return sdma_rlc_reg_offset;
+}
+
+static inline struct v11_compute_mqd *get_mqd(void *mqd)
+{
+ return (struct v11_compute_mqd *)mqd;
+}
+
+static inline struct v11_sdma_mqd *get_sdma_mqd(void *mqd)
+{
+ return (struct v11_sdma_mqd *)mqd;
+}
+
+static int hqd_load_v11(struct amdgpu_device *adev, void *mqd, uint32_t pipe_id,
+ uint32_t queue_id, uint32_t __user *wptr,
+ uint32_t wptr_shift, uint32_t wptr_mask,
+ struct mm_struct *mm, uint32_t inst)
+{
+ struct v11_compute_mqd *m;
+ uint32_t *mqd_hqd;
+ uint32_t reg, hqd_base, data;
+
+ m = get_mqd(mqd);
+
+ pr_debug("Load hqd of pipe %d queue %d\n", pipe_id, queue_id);
+ acquire_queue(adev, pipe_id, queue_id);
+
+ /* HIQ is set during driver init period with vmid set to 0*/
+ if (m->cp_hqd_vmid == 0) {
+ uint32_t value, mec, pipe;
+
+ mec = (pipe_id / adev->gfx.mec.num_pipe_per_mec) + 1;
+ pipe = (pipe_id % adev->gfx.mec.num_pipe_per_mec);
+
+ pr_debug("kfd: set HIQ, mec:%d, pipe:%d, queue:%d.\n",
+ mec, pipe, queue_id);
+ value = RREG32(SOC15_REG_OFFSET(GC, 0, regRLC_CP_SCHEDULERS));
+ value = REG_SET_FIELD(value, RLC_CP_SCHEDULERS, scheduler1,
+ ((mec << 5) | (pipe << 3) | queue_id | 0x80));
+ WREG32(SOC15_REG_OFFSET(GC, 0, regRLC_CP_SCHEDULERS), value);
+ }
+
+ /* HQD registers extend from CP_MQD_BASE_ADDR to CP_HQD_EOP_WPTR_MEM. */
+ mqd_hqd = &m->cp_mqd_base_addr_lo;
+ hqd_base = SOC15_REG_OFFSET(GC, 0, regCP_MQD_BASE_ADDR);
+
+ for (reg = hqd_base;
+ reg <= SOC15_REG_OFFSET(GC, 0, regCP_HQD_PQ_WPTR_HI); reg++)
+ WREG32(reg, mqd_hqd[reg - hqd_base]);
+
+
+ /* Activate doorbell logic before triggering WPTR poll. */
+ data = REG_SET_FIELD(m->cp_hqd_pq_doorbell_control,
+ CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_EN, 1);
+ WREG32(SOC15_REG_OFFSET(GC, 0, regCP_HQD_PQ_DOORBELL_CONTROL), data);
+
+ if (wptr) {
+ /* Don't read wptr with get_user because the user
+ * context may not be accessible (if this function
+ * runs in a work queue). Instead trigger a one-shot
+ * polling read from memory in the CP. This assumes
+ * that wptr is GPU-accessible in the queue's VMID via
+ * ATC or SVM. WPTR==RPTR before starting the poll so
+ * the CP starts fetching new commands from the right
+ * place.
+ *
+ * Guessing a 64-bit WPTR from a 32-bit RPTR is a bit
+ * tricky. Assume that the queue didn't overflow. The
+ * number of valid bits in the 32-bit RPTR depends on
+ * the queue size. The remaining bits are taken from
+ * the saved 64-bit WPTR. If the WPTR wrapped, add the
+ * queue size.
+ */
+ uint32_t queue_size =
+ 2 << REG_GET_FIELD(m->cp_hqd_pq_control,
+ CP_HQD_PQ_CONTROL, QUEUE_SIZE);
+ uint64_t guessed_wptr = m->cp_hqd_pq_rptr & (queue_size - 1);
+
+ if ((m->cp_hqd_pq_wptr_lo & (queue_size - 1)) < guessed_wptr)
+ guessed_wptr += queue_size;
+ guessed_wptr += m->cp_hqd_pq_wptr_lo & ~(queue_size - 1);
+ guessed_wptr += (uint64_t)m->cp_hqd_pq_wptr_hi << 32;
+
+ WREG32(SOC15_REG_OFFSET(GC, 0, regCP_HQD_PQ_WPTR_LO),
+ lower_32_bits(guessed_wptr));
+ WREG32(SOC15_REG_OFFSET(GC, 0, regCP_HQD_PQ_WPTR_HI),
+ upper_32_bits(guessed_wptr));
+ WREG32(SOC15_REG_OFFSET(GC, 0, regCP_HQD_PQ_WPTR_POLL_ADDR),
+ lower_32_bits((uint64_t)wptr));
+ WREG32(SOC15_REG_OFFSET(GC, 0, regCP_HQD_PQ_WPTR_POLL_ADDR_HI),
+ upper_32_bits((uint64_t)wptr));
+ pr_debug("%s setting CP_PQ_WPTR_POLL_CNTL1 to %x\n", __func__,
+ (uint32_t)get_queue_mask(adev, pipe_id, queue_id));
+ WREG32(SOC15_REG_OFFSET(GC, 0, regCP_PQ_WPTR_POLL_CNTL1),
+ (uint32_t)get_queue_mask(adev, pipe_id, queue_id));
+ }
+
+ /* Start the EOP fetcher */
+ WREG32(SOC15_REG_OFFSET(GC, 0, regCP_HQD_EOP_RPTR),
+ REG_SET_FIELD(m->cp_hqd_eop_rptr,
+ CP_HQD_EOP_RPTR, INIT_FETCHER, 1));
+
+ data = REG_SET_FIELD(m->cp_hqd_active, CP_HQD_ACTIVE, ACTIVE, 1);
+ WREG32(SOC15_REG_OFFSET(GC, 0, regCP_HQD_ACTIVE), data);
+
+ release_queue(adev);
+
+ return 0;
+}
+
+static int hiq_mqd_load_v11(struct amdgpu_device *adev, void *mqd,
+ uint32_t pipe_id, uint32_t queue_id,
+ uint32_t doorbell_off, uint32_t inst)
+{
+ struct amdgpu_ring *kiq_ring = &adev->gfx.kiq[0].ring;
+ struct v11_compute_mqd *m;
+ uint32_t mec, pipe;
+ int r;
+
+ m = get_mqd(mqd);
+
+ acquire_queue(adev, pipe_id, queue_id);
+
+ mec = (pipe_id / adev->gfx.mec.num_pipe_per_mec) + 1;
+ pipe = (pipe_id % adev->gfx.mec.num_pipe_per_mec);
+
+ pr_debug("kfd: set HIQ, mec:%d, pipe:%d, queue:%d.\n",
+ mec, pipe, queue_id);
+
+ spin_lock(&adev->gfx.kiq[0].ring_lock);
+ r = amdgpu_ring_alloc(kiq_ring, 7);
+ if (r) {
+ pr_err("Failed to alloc KIQ (%d).\n", r);
+ goto out_unlock;
+ }
+
+ amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_MAP_QUEUES, 5));
+ amdgpu_ring_write(kiq_ring,
+ PACKET3_MAP_QUEUES_QUEUE_SEL(0) | /* Queue_Sel */
+ PACKET3_MAP_QUEUES_VMID(m->cp_hqd_vmid) | /* VMID */
+ PACKET3_MAP_QUEUES_QUEUE(queue_id) |
+ PACKET3_MAP_QUEUES_PIPE(pipe) |
+ PACKET3_MAP_QUEUES_ME((mec - 1)) |
+ PACKET3_MAP_QUEUES_QUEUE_TYPE(0) | /*queue_type: normal compute queue */
+ PACKET3_MAP_QUEUES_ALLOC_FORMAT(0) | /* alloc format: all_on_one_pipe */
+ PACKET3_MAP_QUEUES_ENGINE_SEL(1) | /* engine_sel: hiq */
+ PACKET3_MAP_QUEUES_NUM_QUEUES(1)); /* num_queues: must be 1 */
+ amdgpu_ring_write(kiq_ring,
+ PACKET3_MAP_QUEUES_DOORBELL_OFFSET(doorbell_off));
+ amdgpu_ring_write(kiq_ring, m->cp_mqd_base_addr_lo);
+ amdgpu_ring_write(kiq_ring, m->cp_mqd_base_addr_hi);
+ amdgpu_ring_write(kiq_ring, m->cp_hqd_pq_wptr_poll_addr_lo);
+ amdgpu_ring_write(kiq_ring, m->cp_hqd_pq_wptr_poll_addr_hi);
+ amdgpu_ring_commit(kiq_ring);
+
+out_unlock:
+ spin_unlock(&adev->gfx.kiq[0].ring_lock);
+ release_queue(adev);
+
+ return r;
+}
+
+static int hqd_dump_v11(struct amdgpu_device *adev,
+ uint32_t pipe_id, uint32_t queue_id,
+ uint32_t (**dump)[2], uint32_t *n_regs, uint32_t inst)
+{
+ uint32_t i = 0, reg;
+#define HQD_N_REGS 56
+#define DUMP_REG(addr) do { \
+ if (WARN_ON_ONCE(i >= HQD_N_REGS)) \
+ break; \
+ (*dump)[i][0] = (addr) << 2; \
+ (*dump)[i++][1] = RREG32(addr); \
+ } while (0)
+
+ *dump = kmalloc_array(HQD_N_REGS, sizeof(**dump), GFP_KERNEL);
+ if (*dump == NULL)
+ return -ENOMEM;
+
+ acquire_queue(adev, pipe_id, queue_id);
+
+ for (reg = SOC15_REG_OFFSET(GC, 0, regCP_MQD_BASE_ADDR);
+ reg <= SOC15_REG_OFFSET(GC, 0, regCP_HQD_PQ_WPTR_HI); reg++)
+ DUMP_REG(reg);
+
+ release_queue(adev);
+
+ WARN_ON_ONCE(i != HQD_N_REGS);
+ *n_regs = i;
+
+ return 0;
+}
+
+static int hqd_sdma_load_v11(struct amdgpu_device *adev, void *mqd,
+ uint32_t __user *wptr, struct mm_struct *mm)
+{
+ struct v11_sdma_mqd *m;
+ uint32_t sdma_rlc_reg_offset;
+ unsigned long end_jiffies;
+ uint32_t data;
+ uint64_t data64;
+ uint64_t __user *wptr64 = (uint64_t __user *)wptr;
+
+ m = get_sdma_mqd(mqd);
+ sdma_rlc_reg_offset = get_sdma_rlc_reg_offset(adev, m->sdma_engine_id,
+ m->sdma_queue_id);
+
+ WREG32(sdma_rlc_reg_offset + regSDMA0_QUEUE0_RB_CNTL,
+ m->sdmax_rlcx_rb_cntl & (~SDMA0_QUEUE0_RB_CNTL__RB_ENABLE_MASK));
+
+ end_jiffies = msecs_to_jiffies(2000) + jiffies;
+ while (true) {
+ data = RREG32(sdma_rlc_reg_offset + regSDMA0_QUEUE0_CONTEXT_STATUS);
+ if (data & SDMA0_QUEUE0_CONTEXT_STATUS__IDLE_MASK)
+ break;
+ if (time_after(jiffies, end_jiffies)) {
+ pr_err("SDMA RLC not idle in %s\n", __func__);
+ return -ETIME;
+ }
+ usleep_range(500, 1000);
+ }
+
+ WREG32(sdma_rlc_reg_offset + regSDMA0_QUEUE0_DOORBELL_OFFSET,
+ m->sdmax_rlcx_doorbell_offset);
+
+ data = REG_SET_FIELD(m->sdmax_rlcx_doorbell, SDMA0_QUEUE0_DOORBELL,
+ ENABLE, 1);
+ WREG32(sdma_rlc_reg_offset + regSDMA0_QUEUE0_DOORBELL, data);
+ WREG32(sdma_rlc_reg_offset + regSDMA0_QUEUE0_RB_RPTR,
+ m->sdmax_rlcx_rb_rptr);
+ WREG32(sdma_rlc_reg_offset + regSDMA0_QUEUE0_RB_RPTR_HI,
+ m->sdmax_rlcx_rb_rptr_hi);
+
+ WREG32(sdma_rlc_reg_offset + regSDMA0_QUEUE0_MINOR_PTR_UPDATE, 1);
+ if (read_user_wptr(mm, wptr64, data64)) {
+ WREG32(sdma_rlc_reg_offset + regSDMA0_QUEUE0_RB_WPTR,
+ lower_32_bits(data64));
+ WREG32(sdma_rlc_reg_offset + regSDMA0_QUEUE0_RB_WPTR_HI,
+ upper_32_bits(data64));
+ } else {
+ WREG32(sdma_rlc_reg_offset + regSDMA0_QUEUE0_RB_WPTR,
+ m->sdmax_rlcx_rb_rptr);
+ WREG32(sdma_rlc_reg_offset + regSDMA0_QUEUE0_RB_WPTR_HI,
+ m->sdmax_rlcx_rb_rptr_hi);
+ }
+ WREG32(sdma_rlc_reg_offset + regSDMA0_QUEUE0_MINOR_PTR_UPDATE, 0);
+
+ WREG32(sdma_rlc_reg_offset + regSDMA0_QUEUE0_RB_BASE, m->sdmax_rlcx_rb_base);
+ WREG32(sdma_rlc_reg_offset + regSDMA0_QUEUE0_RB_BASE_HI,
+ m->sdmax_rlcx_rb_base_hi);
+ WREG32(sdma_rlc_reg_offset + regSDMA0_QUEUE0_RB_RPTR_ADDR_LO,
+ m->sdmax_rlcx_rb_rptr_addr_lo);
+ WREG32(sdma_rlc_reg_offset + regSDMA0_QUEUE0_RB_RPTR_ADDR_HI,
+ m->sdmax_rlcx_rb_rptr_addr_hi);
+
+ data = REG_SET_FIELD(m->sdmax_rlcx_rb_cntl, SDMA0_QUEUE0_RB_CNTL,
+ RB_ENABLE, 1);
+ WREG32(sdma_rlc_reg_offset + regSDMA0_QUEUE0_RB_CNTL, data);
+
+ return 0;
+}
+
+static int hqd_sdma_dump_v11(struct amdgpu_device *adev,
+ uint32_t engine_id, uint32_t queue_id,
+ uint32_t (**dump)[2], uint32_t *n_regs)
+{
+ uint32_t sdma_rlc_reg_offset = get_sdma_rlc_reg_offset(adev,
+ engine_id, queue_id);
+ uint32_t i = 0, reg;
+#undef HQD_N_REGS
+#define HQD_N_REGS (7+11+1+12+12)
+
+ *dump = kmalloc_array(HQD_N_REGS, sizeof(**dump), GFP_KERNEL);
+ if (*dump == NULL)
+ return -ENOMEM;
+
+ for (reg = regSDMA0_QUEUE0_RB_CNTL;
+ reg <= regSDMA0_QUEUE0_RB_WPTR_HI; reg++)
+ DUMP_REG(sdma_rlc_reg_offset + reg);
+ for (reg = regSDMA0_QUEUE0_RB_RPTR_ADDR_HI;
+ reg <= regSDMA0_QUEUE0_DOORBELL; reg++)
+ DUMP_REG(sdma_rlc_reg_offset + reg);
+ for (reg = regSDMA0_QUEUE0_DOORBELL_LOG;
+ reg <= regSDMA0_QUEUE0_DOORBELL_LOG; reg++)
+ DUMP_REG(sdma_rlc_reg_offset + reg);
+ for (reg = regSDMA0_QUEUE0_DOORBELL_OFFSET;
+ reg <= regSDMA0_QUEUE0_RB_PREEMPT; reg++)
+ DUMP_REG(sdma_rlc_reg_offset + reg);
+ for (reg = regSDMA0_QUEUE0_MIDCMD_DATA0;
+ reg <= regSDMA0_QUEUE0_MIDCMD_CNTL; reg++)
+ DUMP_REG(sdma_rlc_reg_offset + reg);
+
+ WARN_ON_ONCE(i != HQD_N_REGS);
+ *n_regs = i;
+
+ return 0;
+}
+
+static bool hqd_is_occupied_v11(struct amdgpu_device *adev, uint64_t queue_address,
+ uint32_t pipe_id, uint32_t queue_id, uint32_t inst)
+{
+ uint32_t act;
+ bool retval = false;
+ uint32_t low, high;
+
+ acquire_queue(adev, pipe_id, queue_id);
+ act = RREG32(SOC15_REG_OFFSET(GC, 0, regCP_HQD_ACTIVE));
+ if (act) {
+ low = lower_32_bits(queue_address >> 8);
+ high = upper_32_bits(queue_address >> 8);
+
+ if (low == RREG32(SOC15_REG_OFFSET(GC, 0, regCP_HQD_PQ_BASE)) &&
+ high == RREG32(SOC15_REG_OFFSET(GC, 0, regCP_HQD_PQ_BASE_HI)))
+ retval = true;
+ }
+ release_queue(adev);
+ return retval;
+}
+
+static bool hqd_sdma_is_occupied_v11(struct amdgpu_device *adev, void *mqd)
+{
+ struct v11_sdma_mqd *m;
+ uint32_t sdma_rlc_reg_offset;
+ uint32_t sdma_rlc_rb_cntl;
+
+ m = get_sdma_mqd(mqd);
+ sdma_rlc_reg_offset = get_sdma_rlc_reg_offset(adev, m->sdma_engine_id,
+ m->sdma_queue_id);
+
+ sdma_rlc_rb_cntl = RREG32(sdma_rlc_reg_offset + regSDMA0_QUEUE0_RB_CNTL);
+
+ if (sdma_rlc_rb_cntl & SDMA0_QUEUE0_RB_CNTL__RB_ENABLE_MASK)
+ return true;
+
+ return false;
+}
+
+static int hqd_destroy_v11(struct amdgpu_device *adev, void *mqd,
+ enum kfd_preempt_type reset_type,
+ unsigned int utimeout, uint32_t pipe_id,
+ uint32_t queue_id, uint32_t inst)
+{
+ enum hqd_dequeue_request_type type;
+ unsigned long end_jiffies;
+ uint32_t temp;
+ struct v11_compute_mqd *m = get_mqd(mqd);
+
+ acquire_queue(adev, pipe_id, queue_id);
+
+ if (m->cp_hqd_vmid == 0)
+ WREG32_FIELD15_PREREG(GC, 0, RLC_CP_SCHEDULERS, scheduler1, 0);
+
+ switch (reset_type) {
+ case KFD_PREEMPT_TYPE_WAVEFRONT_DRAIN:
+ type = DRAIN_PIPE;
+ break;
+ case KFD_PREEMPT_TYPE_WAVEFRONT_RESET:
+ type = RESET_WAVES;
+ break;
+ default:
+ type = DRAIN_PIPE;
+ break;
+ }
+
+ WREG32(SOC15_REG_OFFSET(GC, 0, regCP_HQD_DEQUEUE_REQUEST), type);
+
+ end_jiffies = (utimeout * HZ / 1000) + jiffies;
+ while (true) {
+ temp = RREG32(SOC15_REG_OFFSET(GC, 0, regCP_HQD_ACTIVE));
+ if (!(temp & CP_HQD_ACTIVE__ACTIVE_MASK))
+ break;
+ if (time_after(jiffies, end_jiffies)) {
+ pr_err("cp queue pipe %d queue %d preemption failed\n",
+ pipe_id, queue_id);
+ release_queue(adev);
+ return -ETIME;
+ }
+ usleep_range(500, 1000);
+ }
+
+ release_queue(adev);
+ return 0;
+}
+
+static int hqd_sdma_destroy_v11(struct amdgpu_device *adev, void *mqd,
+ unsigned int utimeout)
+{
+ struct v11_sdma_mqd *m;
+ uint32_t sdma_rlc_reg_offset;
+ uint32_t temp;
+ unsigned long end_jiffies = (utimeout * HZ / 1000) + jiffies;
+
+ m = get_sdma_mqd(mqd);
+ sdma_rlc_reg_offset = get_sdma_rlc_reg_offset(adev, m->sdma_engine_id,
+ m->sdma_queue_id);
+
+ temp = RREG32(sdma_rlc_reg_offset + regSDMA0_QUEUE0_RB_CNTL);
+ temp = temp & ~SDMA0_QUEUE0_RB_CNTL__RB_ENABLE_MASK;
+ WREG32(sdma_rlc_reg_offset + regSDMA0_QUEUE0_RB_CNTL, temp);
+
+ while (true) {
+ temp = RREG32(sdma_rlc_reg_offset + regSDMA0_QUEUE0_CONTEXT_STATUS);
+ if (temp & SDMA0_QUEUE0_CONTEXT_STATUS__IDLE_MASK)
+ break;
+ if (time_after(jiffies, end_jiffies)) {
+ pr_err("SDMA RLC not idle in %s\n", __func__);
+ return -ETIME;
+ }
+ usleep_range(500, 1000);
+ }
+
+ WREG32(sdma_rlc_reg_offset + regSDMA0_QUEUE0_DOORBELL, 0);
+ WREG32(sdma_rlc_reg_offset + regSDMA0_QUEUE0_RB_CNTL,
+ RREG32(sdma_rlc_reg_offset + regSDMA0_QUEUE0_RB_CNTL) |
+ SDMA0_QUEUE0_RB_CNTL__RB_ENABLE_MASK);
+
+ m->sdmax_rlcx_rb_rptr = RREG32(sdma_rlc_reg_offset + regSDMA0_QUEUE0_RB_RPTR);
+ m->sdmax_rlcx_rb_rptr_hi =
+ RREG32(sdma_rlc_reg_offset + regSDMA0_QUEUE0_RB_RPTR_HI);
+
+ return 0;
+}
+
+static int wave_control_execute_v11(struct amdgpu_device *adev,
+ uint32_t gfx_index_val,
+ uint32_t sq_cmd, uint32_t inst)
+{
+ uint32_t data = 0;
+
+ mutex_lock(&adev->grbm_idx_mutex);
+
+ WREG32(SOC15_REG_OFFSET(GC, 0, regGRBM_GFX_INDEX), gfx_index_val);
+ WREG32(SOC15_REG_OFFSET(GC, 0, regSQ_CMD), sq_cmd);
+
+ data = REG_SET_FIELD(data, GRBM_GFX_INDEX,
+ INSTANCE_BROADCAST_WRITES, 1);
+ data = REG_SET_FIELD(data, GRBM_GFX_INDEX,
+ SA_BROADCAST_WRITES, 1);
+ data = REG_SET_FIELD(data, GRBM_GFX_INDEX,
+ SE_BROADCAST_WRITES, 1);
+
+ WREG32(SOC15_REG_OFFSET(GC, 0, regGRBM_GFX_INDEX), data);
+ mutex_unlock(&adev->grbm_idx_mutex);
+
+ return 0;
+}
+
+static void set_vm_context_page_table_base_v11(struct amdgpu_device *adev,
+ uint32_t vmid, uint64_t page_table_base)
+{
+ if (!amdgpu_amdkfd_is_kfd_vmid(adev, vmid)) {
+ pr_err("trying to set page table base for wrong VMID %u\n",
+ vmid);
+ return;
+ }
+
+ /* SDMA is on gfxhub as well for gfx11 adapters */
+ adev->gfxhub.funcs->setup_vm_pt_regs(adev, vmid, page_table_base);
+}
+
+/*
+ * Returns TRAP_EN, EXCP_EN and EXCP_REPLACE.
+ *
+ * restore_dbg_registers is ignored here but is a general interface requirement
+ * for devices that support GFXOFF and where the RLC save/restore list
+ * does not support hw registers for debugging i.e. the driver has to manually
+ * initialize the debug mode registers after it has disabled GFX off during the
+ * debug session.
+ */
+static uint32_t kgd_gfx_v11_enable_debug_trap(struct amdgpu_device *adev,
+ bool restore_dbg_registers,
+ uint32_t vmid)
+{
+ uint32_t data = 0;
+
+ data = REG_SET_FIELD(data, SPI_GDBG_PER_VMID_CNTL, TRAP_EN, 1);
+ data = REG_SET_FIELD(data, SPI_GDBG_PER_VMID_CNTL, EXCP_EN, 0);
+ data = REG_SET_FIELD(data, SPI_GDBG_PER_VMID_CNTL, EXCP_REPLACE, 0);
+
+ return data;
+}
+
+/* Returns TRAP_EN, EXCP_EN and EXCP_REPLACE. */
+static uint32_t kgd_gfx_v11_disable_debug_trap(struct amdgpu_device *adev,
+ bool keep_trap_enabled,
+ uint32_t vmid)
+{
+ uint32_t data = 0;
+
+ data = REG_SET_FIELD(data, SPI_GDBG_PER_VMID_CNTL, TRAP_EN, 1);
+ data = REG_SET_FIELD(data, SPI_GDBG_PER_VMID_CNTL, EXCP_EN, 0);
+ data = REG_SET_FIELD(data, SPI_GDBG_PER_VMID_CNTL, EXCP_REPLACE, 0);
+
+ return data;
+}
+
+static int kgd_gfx_v11_validate_trap_override_request(struct amdgpu_device *adev,
+ uint32_t trap_override,
+ uint32_t *trap_mask_supported)
+{
+ *trap_mask_supported &= KFD_DBG_TRAP_MASK_FP_INVALID |
+ KFD_DBG_TRAP_MASK_FP_INPUT_DENORMAL |
+ KFD_DBG_TRAP_MASK_FP_DIVIDE_BY_ZERO |
+ KFD_DBG_TRAP_MASK_FP_OVERFLOW |
+ KFD_DBG_TRAP_MASK_FP_UNDERFLOW |
+ KFD_DBG_TRAP_MASK_FP_INEXACT |
+ KFD_DBG_TRAP_MASK_INT_DIVIDE_BY_ZERO |
+ KFD_DBG_TRAP_MASK_DBG_ADDRESS_WATCH |
+ KFD_DBG_TRAP_MASK_DBG_MEMORY_VIOLATION;
+
+ if (amdgpu_ip_version(adev, GC_HWIP, 0) >= IP_VERSION(11, 0, 4))
+ *trap_mask_supported |= KFD_DBG_TRAP_MASK_TRAP_ON_WAVE_START |
+ KFD_DBG_TRAP_MASK_TRAP_ON_WAVE_END;
+
+ if (trap_override != KFD_DBG_TRAP_OVERRIDE_OR &&
+ trap_override != KFD_DBG_TRAP_OVERRIDE_REPLACE)
+ return -EPERM;
+
+ return 0;
+}
+
+static uint32_t trap_mask_map_sw_to_hw(uint32_t mask)
+{
+ uint32_t trap_on_start = (mask & KFD_DBG_TRAP_MASK_TRAP_ON_WAVE_START) ? 1 : 0;
+ uint32_t trap_on_end = (mask & KFD_DBG_TRAP_MASK_TRAP_ON_WAVE_END) ? 1 : 0;
+ uint32_t excp_en = mask & (KFD_DBG_TRAP_MASK_FP_INVALID |
+ KFD_DBG_TRAP_MASK_FP_INPUT_DENORMAL |
+ KFD_DBG_TRAP_MASK_FP_DIVIDE_BY_ZERO |
+ KFD_DBG_TRAP_MASK_FP_OVERFLOW |
+ KFD_DBG_TRAP_MASK_FP_UNDERFLOW |
+ KFD_DBG_TRAP_MASK_FP_INEXACT |
+ KFD_DBG_TRAP_MASK_INT_DIVIDE_BY_ZERO |
+ KFD_DBG_TRAP_MASK_DBG_ADDRESS_WATCH |
+ KFD_DBG_TRAP_MASK_DBG_MEMORY_VIOLATION);
+ uint32_t ret;
+
+ ret = REG_SET_FIELD(0, SPI_GDBG_PER_VMID_CNTL, EXCP_EN, excp_en);
+ ret = REG_SET_FIELD(ret, SPI_GDBG_PER_VMID_CNTL, TRAP_ON_START, trap_on_start);
+ ret = REG_SET_FIELD(ret, SPI_GDBG_PER_VMID_CNTL, TRAP_ON_END, trap_on_end);
+
+ return ret;
+}
+
+static uint32_t trap_mask_map_hw_to_sw(uint32_t mask)
+{
+ uint32_t ret = REG_GET_FIELD(mask, SPI_GDBG_PER_VMID_CNTL, EXCP_EN);
+
+ if (REG_GET_FIELD(mask, SPI_GDBG_PER_VMID_CNTL, TRAP_ON_START))
+ ret |= KFD_DBG_TRAP_MASK_TRAP_ON_WAVE_START;
+
+ if (REG_GET_FIELD(mask, SPI_GDBG_PER_VMID_CNTL, TRAP_ON_END))
+ ret |= KFD_DBG_TRAP_MASK_TRAP_ON_WAVE_END;
+
+ return ret;
+}
+
+/* Returns TRAP_EN, EXCP_EN and EXCP_REPLACE. */
+static uint32_t kgd_gfx_v11_set_wave_launch_trap_override(struct amdgpu_device *adev,
+ uint32_t vmid,
+ uint32_t trap_override,
+ uint32_t trap_mask_bits,
+ uint32_t trap_mask_request,
+ uint32_t *trap_mask_prev,
+ uint32_t kfd_dbg_trap_cntl_prev)
+{
+ uint32_t data = 0;
+
+ *trap_mask_prev = trap_mask_map_hw_to_sw(kfd_dbg_trap_cntl_prev);
+
+ data = (trap_mask_bits & trap_mask_request) | (*trap_mask_prev & ~trap_mask_request);
+ data = trap_mask_map_sw_to_hw(data);
+
+ data = REG_SET_FIELD(data, SPI_GDBG_PER_VMID_CNTL, TRAP_EN, 1);
+ data = REG_SET_FIELD(data, SPI_GDBG_PER_VMID_CNTL, EXCP_REPLACE, trap_override);
+
+ return data;
+}
+
+static uint32_t kgd_gfx_v11_set_wave_launch_mode(struct amdgpu_device *adev,
+ uint8_t wave_launch_mode,
+ uint32_t vmid)
+{
+ uint32_t data = 0;
+
+ data = REG_SET_FIELD(data, SPI_GDBG_PER_VMID_CNTL, LAUNCH_MODE, wave_launch_mode);
+
+ return data;
+}
+
+#define TCP_WATCH_STRIDE (regTCP_WATCH1_ADDR_H - regTCP_WATCH0_ADDR_H)
+static uint32_t kgd_gfx_v11_set_address_watch(struct amdgpu_device *adev,
+ uint64_t watch_address,
+ uint32_t watch_address_mask,
+ uint32_t watch_id,
+ uint32_t watch_mode,
+ uint32_t debug_vmid,
+ uint32_t inst)
+{
+ uint32_t watch_address_high;
+ uint32_t watch_address_low;
+ uint32_t watch_address_cntl;
+
+ watch_address_cntl = 0;
+ watch_address_low = lower_32_bits(watch_address);
+ watch_address_high = upper_32_bits(watch_address) & 0xffff;
+
+ watch_address_cntl = REG_SET_FIELD(watch_address_cntl,
+ TCP_WATCH0_CNTL,
+ MODE,
+ watch_mode);
+
+ watch_address_cntl = REG_SET_FIELD(watch_address_cntl,
+ TCP_WATCH0_CNTL,
+ MASK,
+ watch_address_mask >> 7);
+
+ watch_address_cntl = REG_SET_FIELD(watch_address_cntl,
+ TCP_WATCH0_CNTL,
+ VALID,
+ 1);
+
+ WREG32_RLC((SOC15_REG_OFFSET(GC, 0, regTCP_WATCH0_ADDR_H) +
+ (watch_id * TCP_WATCH_STRIDE)),
+ watch_address_high);
+
+ WREG32_RLC((SOC15_REG_OFFSET(GC, 0, regTCP_WATCH0_ADDR_L) +
+ (watch_id * TCP_WATCH_STRIDE)),
+ watch_address_low);
+
+ return watch_address_cntl;
+}
+
+static uint32_t kgd_gfx_v11_clear_address_watch(struct amdgpu_device *adev,
+ uint32_t watch_id)
+{
+ return 0;
+}
+
+static uint64_t kgd_gfx_v11_hqd_get_pq_addr(struct amdgpu_device *adev,
+ uint32_t pipe_id, uint32_t queue_id,
+ uint32_t inst)
+{
+ return 0;
+}
+
+static uint64_t kgd_gfx_v11_hqd_reset(struct amdgpu_device *adev,
+ uint32_t pipe_id, uint32_t queue_id,
+ uint32_t inst, unsigned int utimeout)
+{
+ return 0;
+}
+
+static uint32_t kgd_gfx_v11_hqd_sdma_get_doorbell(struct amdgpu_device *adev,
+ int engine, int queue)
+{
+ return 0;
+}
+
+const struct kfd2kgd_calls gfx_v11_kfd2kgd = {
+ .program_sh_mem_settings = program_sh_mem_settings_v11,
+ .set_pasid_vmid_mapping = set_pasid_vmid_mapping_v11,
+ .init_interrupts = init_interrupts_v11,
+ .hqd_load = hqd_load_v11,
+ .hiq_mqd_load = hiq_mqd_load_v11,
+ .hqd_sdma_load = hqd_sdma_load_v11,
+ .hqd_dump = hqd_dump_v11,
+ .hqd_sdma_dump = hqd_sdma_dump_v11,
+ .hqd_is_occupied = hqd_is_occupied_v11,
+ .hqd_sdma_is_occupied = hqd_sdma_is_occupied_v11,
+ .hqd_destroy = hqd_destroy_v11,
+ .hqd_sdma_destroy = hqd_sdma_destroy_v11,
+ .wave_control_execute = wave_control_execute_v11,
+ .get_atc_vmid_pasid_mapping_info = NULL,
+ .set_vm_context_page_table_base = set_vm_context_page_table_base_v11,
+ .enable_debug_trap = kgd_gfx_v11_enable_debug_trap,
+ .disable_debug_trap = kgd_gfx_v11_disable_debug_trap,
+ .validate_trap_override_request = kgd_gfx_v11_validate_trap_override_request,
+ .set_wave_launch_trap_override = kgd_gfx_v11_set_wave_launch_trap_override,
+ .set_wave_launch_mode = kgd_gfx_v11_set_wave_launch_mode,
+ .set_address_watch = kgd_gfx_v11_set_address_watch,
+ .clear_address_watch = kgd_gfx_v11_clear_address_watch,
+ .hqd_get_pq_addr = kgd_gfx_v11_hqd_get_pq_addr,
+ .hqd_reset = kgd_gfx_v11_hqd_reset,
+ .hqd_sdma_get_doorbell = kgd_gfx_v11_hqd_sdma_get_doorbell
+};
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v12.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v12.c
new file mode 100644
index 000000000000..e0ceab400b2d
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v12.c
@@ -0,0 +1,384 @@
+/*
+ * Copyright 2023 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include "amdgpu.h"
+#include "amdgpu_amdkfd.h"
+#include "gc/gc_12_0_0_offset.h"
+#include "gc/gc_12_0_0_sh_mask.h"
+#include "soc24.h"
+#include <uapi/linux/kfd_ioctl.h>
+
+static void lock_srbm(struct amdgpu_device *adev, uint32_t mec, uint32_t pipe,
+ uint32_t queue, uint32_t vmid)
+{
+ mutex_lock(&adev->srbm_mutex);
+ soc24_grbm_select(adev, mec, pipe, queue, vmid);
+}
+
+static void unlock_srbm(struct amdgpu_device *adev)
+{
+ soc24_grbm_select(adev, 0, 0, 0, 0);
+ mutex_unlock(&adev->srbm_mutex);
+}
+
+static void acquire_queue(struct amdgpu_device *adev, uint32_t pipe_id,
+ uint32_t queue_id)
+{
+ uint32_t mec = (pipe_id / adev->gfx.mec.num_pipe_per_mec) + 1;
+ uint32_t pipe = (pipe_id % adev->gfx.mec.num_pipe_per_mec);
+
+ lock_srbm(adev, mec, pipe, queue_id, 0);
+}
+
+static void release_queue(struct amdgpu_device *adev)
+{
+ unlock_srbm(adev);
+}
+
+static int init_interrupts_v12(struct amdgpu_device *adev, uint32_t pipe_id, uint32_t inst)
+{
+ uint32_t mec;
+ uint32_t pipe;
+
+ mec = (pipe_id / adev->gfx.mec.num_pipe_per_mec) + 1;
+ pipe = (pipe_id % adev->gfx.mec.num_pipe_per_mec);
+
+ lock_srbm(adev, mec, pipe, 0, 0);
+
+ WREG32_SOC15(GC, 0, regCPC_INT_CNTL,
+ CP_INT_CNTL_RING0__TIME_STAMP_INT_ENABLE_MASK |
+ CP_INT_CNTL_RING0__OPCODE_ERROR_INT_ENABLE_MASK);
+
+ unlock_srbm(adev);
+
+ return 0;
+}
+
+static uint32_t get_sdma_rlc_reg_offset(struct amdgpu_device *adev,
+ unsigned int engine_id,
+ unsigned int queue_id)
+{
+ uint32_t sdma_engine_reg_base = 0;
+ uint32_t sdma_rlc_reg_offset;
+
+ switch (engine_id) {
+ case 0:
+ sdma_engine_reg_base = SOC15_REG_OFFSET(SDMA0, 0,
+ regSDMA0_QUEUE0_RB_CNTL) - regSDMA0_QUEUE0_RB_CNTL;
+ break;
+ case 1:
+ sdma_engine_reg_base = SOC15_REG_OFFSET(SDMA1, 0,
+ regSDMA1_QUEUE0_RB_CNTL) - regSDMA0_QUEUE0_RB_CNTL;
+ break;
+ default:
+ BUG();
+ }
+
+ sdma_rlc_reg_offset = sdma_engine_reg_base
+ + queue_id * (regSDMA0_QUEUE1_RB_CNTL - regSDMA0_QUEUE0_RB_CNTL);
+
+ pr_debug("RLC register offset for SDMA%d RLC%d: 0x%x\n", engine_id,
+ queue_id, sdma_rlc_reg_offset);
+
+ return sdma_rlc_reg_offset;
+}
+
+static int hqd_dump_v12(struct amdgpu_device *adev,
+ uint32_t pipe_id, uint32_t queue_id,
+ uint32_t (**dump)[2], uint32_t *n_regs, uint32_t inst)
+{
+ uint32_t i = 0, reg;
+#define HQD_N_REGS 56
+#define DUMP_REG(addr) do { \
+ if (WARN_ON_ONCE(i >= HQD_N_REGS)) \
+ break; \
+ (*dump)[i][0] = (addr) << 2; \
+ (*dump)[i++][1] = RREG32(addr); \
+ } while (0)
+
+ *dump = kmalloc_array(HQD_N_REGS, sizeof(**dump), GFP_KERNEL);
+ if (*dump == NULL)
+ return -ENOMEM;
+
+ acquire_queue(adev, pipe_id, queue_id);
+
+ for (reg = SOC15_REG_OFFSET(GC, 0, regCP_MQD_BASE_ADDR);
+ reg <= SOC15_REG_OFFSET(GC, 0, regCP_HQD_PQ_WPTR_HI); reg++)
+ DUMP_REG(reg);
+
+ release_queue(adev);
+
+ WARN_ON_ONCE(i != HQD_N_REGS);
+ *n_regs = i;
+
+ return 0;
+}
+
+static int hqd_sdma_dump_v12(struct amdgpu_device *adev,
+ uint32_t engine_id, uint32_t queue_id,
+ uint32_t (**dump)[2], uint32_t *n_regs)
+{
+ uint32_t sdma_rlc_reg_offset = get_sdma_rlc_reg_offset(adev,
+ engine_id, queue_id);
+ uint32_t i = 0, reg;
+
+ const uint32_t first_reg = regSDMA0_QUEUE0_RB_CNTL;
+ const uint32_t last_reg = regSDMA0_QUEUE0_CONTEXT_STATUS;
+#undef HQD_N_REGS
+#define HQD_N_REGS (last_reg - first_reg + 1)
+
+ *dump = kmalloc_array(HQD_N_REGS, sizeof(**dump), GFP_KERNEL);
+ if (*dump == NULL)
+ return -ENOMEM;
+
+ for (reg = first_reg;
+ reg <= last_reg; reg++)
+ DUMP_REG(sdma_rlc_reg_offset + reg);
+
+ WARN_ON_ONCE(i != HQD_N_REGS);
+ *n_regs = i;
+
+ return 0;
+}
+
+static int wave_control_execute_v12(struct amdgpu_device *adev,
+ uint32_t gfx_index_val,
+ uint32_t sq_cmd, uint32_t inst)
+{
+ uint32_t data = 0;
+
+ mutex_lock(&adev->grbm_idx_mutex);
+
+ WREG32(SOC15_REG_OFFSET(GC, 0, regGRBM_GFX_INDEX), gfx_index_val);
+ WREG32(SOC15_REG_OFFSET(GC, 0, regSQ_CMD), sq_cmd);
+
+ data = REG_SET_FIELD(data, GRBM_GFX_INDEX,
+ INSTANCE_BROADCAST_WRITES, 1);
+ data = REG_SET_FIELD(data, GRBM_GFX_INDEX,
+ SA_BROADCAST_WRITES, 1);
+ data = REG_SET_FIELD(data, GRBM_GFX_INDEX,
+ SE_BROADCAST_WRITES, 1);
+
+ WREG32(SOC15_REG_OFFSET(GC, 0, regGRBM_GFX_INDEX), data);
+ mutex_unlock(&adev->grbm_idx_mutex);
+
+ return 0;
+}
+
+/* returns TRAP_EN, EXCP_EN and EXCP_REPLACE. */
+static uint32_t kgd_gfx_v12_enable_debug_trap(struct amdgpu_device *adev,
+ bool restore_dbg_registers,
+ uint32_t vmid)
+{
+ uint32_t data = 0;
+
+ data = REG_SET_FIELD(data, SPI_GDBG_PER_VMID_CNTL, TRAP_EN, 1);
+ data = REG_SET_FIELD(data, SPI_GDBG_PER_VMID_CNTL, EXCP_EN, 0);
+ data = REG_SET_FIELD(data, SPI_GDBG_PER_VMID_CNTL, EXCP_REPLACE, 0);
+
+ return data;
+}
+
+/* returns TRAP_EN, EXCP_EN and EXCP_REPLACE. */
+static uint32_t kgd_gfx_v12_disable_debug_trap(struct amdgpu_device *adev,
+ bool keep_trap_enabled,
+ uint32_t vmid)
+{
+ uint32_t data = 0;
+
+ data = REG_SET_FIELD(data, SPI_GDBG_PER_VMID_CNTL, TRAP_EN, 1);
+ data = REG_SET_FIELD(data, SPI_GDBG_PER_VMID_CNTL, EXCP_EN, 0);
+ data = REG_SET_FIELD(data, SPI_GDBG_PER_VMID_CNTL, EXCP_REPLACE, 0);
+
+ return data;
+}
+
+static int kgd_gfx_v12_validate_trap_override_request(struct amdgpu_device *adev,
+ uint32_t trap_override,
+ uint32_t *trap_mask_supported)
+{
+ *trap_mask_supported &= KFD_DBG_TRAP_MASK_FP_INVALID |
+ KFD_DBG_TRAP_MASK_FP_INPUT_DENORMAL |
+ KFD_DBG_TRAP_MASK_FP_DIVIDE_BY_ZERO |
+ KFD_DBG_TRAP_MASK_FP_OVERFLOW |
+ KFD_DBG_TRAP_MASK_FP_UNDERFLOW |
+ KFD_DBG_TRAP_MASK_FP_INEXACT |
+ KFD_DBG_TRAP_MASK_INT_DIVIDE_BY_ZERO |
+ KFD_DBG_TRAP_MASK_DBG_ADDRESS_WATCH |
+ KFD_DBG_TRAP_MASK_DBG_MEMORY_VIOLATION |
+ KFD_DBG_TRAP_MASK_TRAP_ON_WAVE_START |
+ KFD_DBG_TRAP_MASK_TRAP_ON_WAVE_END;
+
+
+ if (trap_override != KFD_DBG_TRAP_OVERRIDE_OR &&
+ trap_override != KFD_DBG_TRAP_OVERRIDE_REPLACE)
+ return -EPERM;
+
+ return 0;
+}
+
+static uint32_t trap_mask_map_sw_to_hw(uint32_t mask)
+{
+ uint32_t trap_on_start = (mask & KFD_DBG_TRAP_MASK_TRAP_ON_WAVE_START) ? 1 : 0;
+ uint32_t trap_on_end = (mask & KFD_DBG_TRAP_MASK_TRAP_ON_WAVE_END) ? 1 : 0;
+ uint32_t excp_en = mask & (KFD_DBG_TRAP_MASK_FP_INVALID |
+ KFD_DBG_TRAP_MASK_FP_INPUT_DENORMAL |
+ KFD_DBG_TRAP_MASK_FP_DIVIDE_BY_ZERO |
+ KFD_DBG_TRAP_MASK_FP_OVERFLOW |
+ KFD_DBG_TRAP_MASK_FP_UNDERFLOW |
+ KFD_DBG_TRAP_MASK_FP_INEXACT |
+ KFD_DBG_TRAP_MASK_INT_DIVIDE_BY_ZERO |
+ KFD_DBG_TRAP_MASK_DBG_ADDRESS_WATCH |
+ KFD_DBG_TRAP_MASK_DBG_MEMORY_VIOLATION);
+ uint32_t ret;
+
+ ret = REG_SET_FIELD(0, SPI_GDBG_PER_VMID_CNTL, EXCP_EN, excp_en);
+ ret = REG_SET_FIELD(ret, SPI_GDBG_PER_VMID_CNTL, TRAP_ON_START, trap_on_start);
+ ret = REG_SET_FIELD(ret, SPI_GDBG_PER_VMID_CNTL, TRAP_ON_END, trap_on_end);
+
+ return ret;
+}
+
+static uint32_t trap_mask_map_hw_to_sw(uint32_t mask)
+{
+ uint32_t ret = REG_GET_FIELD(mask, SPI_GDBG_PER_VMID_CNTL, EXCP_EN);
+
+ if (REG_GET_FIELD(mask, SPI_GDBG_PER_VMID_CNTL, TRAP_ON_START))
+ ret |= KFD_DBG_TRAP_MASK_TRAP_ON_WAVE_START;
+
+ if (REG_GET_FIELD(mask, SPI_GDBG_PER_VMID_CNTL, TRAP_ON_END))
+ ret |= KFD_DBG_TRAP_MASK_TRAP_ON_WAVE_END;
+
+ return ret;
+}
+
+/* returns TRAP_EN, EXCP_EN and EXCP_REPLACE. */
+static uint32_t kgd_gfx_v12_set_wave_launch_trap_override(struct amdgpu_device *adev,
+ uint32_t vmid,
+ uint32_t trap_override,
+ uint32_t trap_mask_bits,
+ uint32_t trap_mask_request,
+ uint32_t *trap_mask_prev,
+ uint32_t kfd_dbg_trap_cntl_prev)
+
+{
+ uint32_t data = 0;
+
+ *trap_mask_prev = trap_mask_map_hw_to_sw(kfd_dbg_trap_cntl_prev);
+
+ data = (trap_mask_bits & trap_mask_request) | (*trap_mask_prev & ~trap_mask_request);
+ data = trap_mask_map_sw_to_hw(data);
+
+ data = REG_SET_FIELD(data, SPI_GDBG_PER_VMID_CNTL, TRAP_EN, 1);
+ data = REG_SET_FIELD(data, SPI_GDBG_PER_VMID_CNTL, EXCP_REPLACE, trap_override);
+
+ return data;
+}
+
+/* returns STALL_VMID or LAUNCH_MODE. */
+static uint32_t kgd_gfx_v12_set_wave_launch_mode(struct amdgpu_device *adev,
+ uint8_t wave_launch_mode,
+ uint32_t vmid)
+{
+ uint32_t data = 0;
+ bool is_stall_mode = wave_launch_mode == 4;
+
+ if (is_stall_mode)
+ data = REG_SET_FIELD(data, SPI_GDBG_PER_VMID_CNTL, STALL_VMID,
+ 1);
+ else
+ data = REG_SET_FIELD(data, SPI_GDBG_PER_VMID_CNTL, LAUNCH_MODE,
+ wave_launch_mode);
+
+ return data;
+}
+
+#define TCP_WATCH_STRIDE (regTCP_WATCH1_ADDR_H - regTCP_WATCH0_ADDR_H)
+static uint32_t kgd_gfx_v12_set_address_watch(struct amdgpu_device *adev,
+ uint64_t watch_address,
+ uint32_t watch_address_mask,
+ uint32_t watch_id,
+ uint32_t watch_mode,
+ uint32_t debug_vmid,
+ uint32_t inst)
+{
+ uint32_t watch_address_high;
+ uint32_t watch_address_low;
+ uint32_t watch_address_cntl;
+
+ watch_address_cntl = 0;
+ watch_address_low = lower_32_bits(watch_address);
+ watch_address_high = upper_32_bits(watch_address) & 0xffff;
+
+ watch_address_cntl = REG_SET_FIELD(watch_address_cntl,
+ TCP_WATCH0_CNTL,
+ MODE,
+ watch_mode);
+
+ watch_address_cntl = REG_SET_FIELD(watch_address_cntl,
+ TCP_WATCH0_CNTL,
+ MASK,
+ watch_address_mask >> 7);
+
+ watch_address_cntl = REG_SET_FIELD(watch_address_cntl,
+ TCP_WATCH0_CNTL,
+ VALID,
+ 1);
+
+ WREG32_RLC((SOC15_REG_OFFSET(GC, 0, regTCP_WATCH0_ADDR_H) +
+ (watch_id * TCP_WATCH_STRIDE)),
+ watch_address_high);
+
+ WREG32_RLC((SOC15_REG_OFFSET(GC, 0, regTCP_WATCH0_ADDR_L) +
+ (watch_id * TCP_WATCH_STRIDE)),
+ watch_address_low);
+
+ return watch_address_cntl;
+}
+
+static uint32_t kgd_gfx_v12_clear_address_watch(struct amdgpu_device *adev,
+ uint32_t watch_id)
+{
+ return 0;
+}
+
+static uint32_t kgd_gfx_v12_hqd_sdma_get_doorbell(struct amdgpu_device *adev,
+ int engine, int queue)
+{
+ return 0;
+}
+
+const struct kfd2kgd_calls gfx_v12_kfd2kgd = {
+ .init_interrupts = init_interrupts_v12,
+ .hqd_dump = hqd_dump_v12,
+ .hqd_sdma_dump = hqd_sdma_dump_v12,
+ .wave_control_execute = wave_control_execute_v12,
+ .get_atc_vmid_pasid_mapping_info = NULL,
+ .enable_debug_trap = kgd_gfx_v12_enable_debug_trap,
+ .disable_debug_trap = kgd_gfx_v12_disable_debug_trap,
+ .validate_trap_override_request = kgd_gfx_v12_validate_trap_override_request,
+ .set_wave_launch_trap_override = kgd_gfx_v12_set_wave_launch_trap_override,
+ .set_wave_launch_mode = kgd_gfx_v12_set_wave_launch_mode,
+ .set_address_watch = kgd_gfx_v12_set_address_watch,
+ .clear_address_watch = kgd_gfx_v12_clear_address_watch,
+ .hqd_sdma_get_doorbell = kgd_gfx_v12_hqd_sdma_get_doorbell
+};
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c
index b91d27e39bad..df77558e03ef 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c
@@ -45,105 +45,54 @@ enum {
MAX_WATCH_ADDRESSES = 4
};
-enum {
- ADDRESS_WATCH_REG_ADDR_HI = 0,
- ADDRESS_WATCH_REG_ADDR_LO,
- ADDRESS_WATCH_REG_CNTL,
- ADDRESS_WATCH_REG_MAX
-};
-
-/* not defined in the CI/KV reg file */
-enum {
- ADDRESS_WATCH_REG_CNTL_ATC_BIT = 0x10000000UL,
- ADDRESS_WATCH_REG_CNTL_DEFAULT_MASK = 0x00FFFFFF,
- ADDRESS_WATCH_REG_ADDLOW_MASK_EXTENSION = 0x03000000,
- /* extend the mask to 26 bits to match the low address field */
- ADDRESS_WATCH_REG_ADDLOW_SHIFT = 6,
- ADDRESS_WATCH_REG_ADDHIGH_MASK = 0xFFFF
-};
-
-static const uint32_t watchRegs[MAX_WATCH_ADDRESSES * ADDRESS_WATCH_REG_MAX] = {
- mmTCP_WATCH0_ADDR_H, mmTCP_WATCH0_ADDR_L, mmTCP_WATCH0_CNTL,
- mmTCP_WATCH1_ADDR_H, mmTCP_WATCH1_ADDR_L, mmTCP_WATCH1_CNTL,
- mmTCP_WATCH2_ADDR_H, mmTCP_WATCH2_ADDR_L, mmTCP_WATCH2_CNTL,
- mmTCP_WATCH3_ADDR_H, mmTCP_WATCH3_ADDR_L, mmTCP_WATCH3_CNTL
-};
-
-union TCP_WATCH_CNTL_BITS {
- struct {
- uint32_t mask:24;
- uint32_t vmid:4;
- uint32_t atc:1;
- uint32_t mode:2;
- uint32_t valid:1;
- } bitfields, bits;
- uint32_t u32All;
- signed int i32All;
- float f32All;
-};
-
-static inline struct amdgpu_device *get_amdgpu_device(struct kgd_dev *kgd)
-{
- return (struct amdgpu_device *)kgd;
-}
-
-static void lock_srbm(struct kgd_dev *kgd, uint32_t mec, uint32_t pipe,
+static void lock_srbm(struct amdgpu_device *adev, uint32_t mec, uint32_t pipe,
uint32_t queue, uint32_t vmid)
{
- struct amdgpu_device *adev = get_amdgpu_device(kgd);
uint32_t value = PIPEID(pipe) | MEID(mec) | VMID(vmid) | QUEUEID(queue);
mutex_lock(&adev->srbm_mutex);
WREG32(mmSRBM_GFX_CNTL, value);
}
-static void unlock_srbm(struct kgd_dev *kgd)
+static void unlock_srbm(struct amdgpu_device *adev)
{
- struct amdgpu_device *adev = get_amdgpu_device(kgd);
-
WREG32(mmSRBM_GFX_CNTL, 0);
mutex_unlock(&adev->srbm_mutex);
}
-static void acquire_queue(struct kgd_dev *kgd, uint32_t pipe_id,
+static void acquire_queue(struct amdgpu_device *adev, uint32_t pipe_id,
uint32_t queue_id)
{
- struct amdgpu_device *adev = get_amdgpu_device(kgd);
-
uint32_t mec = (pipe_id / adev->gfx.mec.num_pipe_per_mec) + 1;
uint32_t pipe = (pipe_id % adev->gfx.mec.num_pipe_per_mec);
- lock_srbm(kgd, mec, pipe, queue_id, 0);
+ lock_srbm(adev, mec, pipe, queue_id, 0);
}
-static void release_queue(struct kgd_dev *kgd)
+static void release_queue(struct amdgpu_device *adev)
{
- unlock_srbm(kgd);
+ unlock_srbm(adev);
}
-static void kgd_program_sh_mem_settings(struct kgd_dev *kgd, uint32_t vmid,
+static void kgd_program_sh_mem_settings(struct amdgpu_device *adev, uint32_t vmid,
uint32_t sh_mem_config,
uint32_t sh_mem_ape1_base,
uint32_t sh_mem_ape1_limit,
- uint32_t sh_mem_bases)
+ uint32_t sh_mem_bases, uint32_t inst)
{
- struct amdgpu_device *adev = get_amdgpu_device(kgd);
-
- lock_srbm(kgd, 0, 0, 0, vmid);
+ lock_srbm(adev, 0, 0, 0, vmid);
WREG32(mmSH_MEM_CONFIG, sh_mem_config);
WREG32(mmSH_MEM_APE1_BASE, sh_mem_ape1_base);
WREG32(mmSH_MEM_APE1_LIMIT, sh_mem_ape1_limit);
WREG32(mmSH_MEM_BASES, sh_mem_bases);
- unlock_srbm(kgd);
+ unlock_srbm(adev);
}
-static int kgd_set_pasid_vmid_mapping(struct kgd_dev *kgd, u32 pasid,
- unsigned int vmid)
+static int kgd_set_pasid_vmid_mapping(struct amdgpu_device *adev, u32 pasid,
+ unsigned int vmid, uint32_t inst)
{
- struct amdgpu_device *adev = get_amdgpu_device(kgd);
-
/*
* We have to assume that there is no outstanding mapping.
* The ATC_VMID_PASID_MAPPING_UPDATE_STATUS bit could be 0 because
@@ -165,21 +114,21 @@ static int kgd_set_pasid_vmid_mapping(struct kgd_dev *kgd, u32 pasid,
return 0;
}
-static int kgd_init_interrupts(struct kgd_dev *kgd, uint32_t pipe_id)
+static int kgd_init_interrupts(struct amdgpu_device *adev, uint32_t pipe_id,
+ uint32_t inst)
{
- struct amdgpu_device *adev = get_amdgpu_device(kgd);
uint32_t mec;
uint32_t pipe;
mec = (pipe_id / adev->gfx.mec.num_pipe_per_mec) + 1;
pipe = (pipe_id % adev->gfx.mec.num_pipe_per_mec);
- lock_srbm(kgd, mec, pipe, 0, 0);
+ lock_srbm(adev, mec, pipe, 0, 0);
WREG32(mmCPC_INT_CNTL, CP_INT_CNTL_RING0__TIME_STAMP_INT_ENABLE_MASK |
CP_INT_CNTL_RING0__OPCODE_ERROR_INT_ENABLE_MASK);
- unlock_srbm(kgd);
+ unlock_srbm(adev);
return 0;
}
@@ -207,12 +156,11 @@ static inline struct cik_sdma_rlc_registers *get_sdma_mqd(void *mqd)
return (struct cik_sdma_rlc_registers *)mqd;
}
-static int kgd_hqd_load(struct kgd_dev *kgd, void *mqd, uint32_t pipe_id,
- uint32_t queue_id, uint32_t __user *wptr,
- uint32_t wptr_shift, uint32_t wptr_mask,
- struct mm_struct *mm)
+static int kgd_hqd_load(struct amdgpu_device *adev, void *mqd,
+ uint32_t pipe_id, uint32_t queue_id,
+ uint32_t __user *wptr, uint32_t wptr_shift,
+ uint32_t wptr_mask, struct mm_struct *mm, uint32_t inst)
{
- struct amdgpu_device *adev = get_amdgpu_device(kgd);
struct cik_mqd *m;
uint32_t *mqd_hqd;
uint32_t reg, wptr_val, data;
@@ -220,7 +168,7 @@ static int kgd_hqd_load(struct kgd_dev *kgd, void *mqd, uint32_t pipe_id,
m = get_mqd(mqd);
- acquire_queue(kgd, pipe_id, queue_id);
+ acquire_queue(adev, pipe_id, queue_id);
/* HQD registers extend from CP_MQD_BASE_ADDR to CP_MQD_CONTROL. */
mqd_hqd = &m->cp_mqd_base_addr_lo;
@@ -237,27 +185,26 @@ static int kgd_hqd_load(struct kgd_dev *kgd, void *mqd, uint32_t pipe_id,
/* read_user_ptr may take the mm->mmap_lock.
* release srbm_mutex to avoid circular dependency between
- * srbm_mutex->mm_sem->reservation_ww_class_mutex->srbm_mutex.
+ * srbm_mutex->mmap_lock->reservation_ww_class_mutex->srbm_mutex.
*/
- release_queue(kgd);
+ release_queue(adev);
valid_wptr = read_user_wptr(mm, wptr, wptr_val);
- acquire_queue(kgd, pipe_id, queue_id);
+ acquire_queue(adev, pipe_id, queue_id);
if (valid_wptr)
WREG32(mmCP_HQD_PQ_WPTR, (wptr_val << wptr_shift) & wptr_mask);
data = REG_SET_FIELD(m->cp_hqd_active, CP_HQD_ACTIVE, ACTIVE, 1);
WREG32(mmCP_HQD_ACTIVE, data);
- release_queue(kgd);
+ release_queue(adev);
return 0;
}
-static int kgd_hqd_dump(struct kgd_dev *kgd,
+static int kgd_hqd_dump(struct amdgpu_device *adev,
uint32_t pipe_id, uint32_t queue_id,
- uint32_t (**dump)[2], uint32_t *n_regs)
+ uint32_t (**dump)[2], uint32_t *n_regs, uint32_t inst)
{
- struct amdgpu_device *adev = get_amdgpu_device(kgd);
uint32_t i = 0, reg;
#define HQD_N_REGS (35+4)
#define DUMP_REG(addr) do { \
@@ -267,11 +214,11 @@ static int kgd_hqd_dump(struct kgd_dev *kgd,
(*dump)[i++][1] = RREG32(addr); \
} while (0)
- *dump = kmalloc_array(HQD_N_REGS * 2, sizeof(uint32_t), GFP_KERNEL);
+ *dump = kmalloc_array(HQD_N_REGS, sizeof(**dump), GFP_KERNEL);
if (*dump == NULL)
return -ENOMEM;
- acquire_queue(kgd, pipe_id, queue_id);
+ acquire_queue(adev, pipe_id, queue_id);
DUMP_REG(mmCOMPUTE_STATIC_THREAD_MGMT_SE0);
DUMP_REG(mmCOMPUTE_STATIC_THREAD_MGMT_SE1);
@@ -281,7 +228,7 @@ static int kgd_hqd_dump(struct kgd_dev *kgd,
for (reg = mmCP_MQD_BASE_ADDR; reg <= mmCP_MQD_CONTROL; reg++)
DUMP_REG(reg);
- release_queue(kgd);
+ release_queue(adev);
WARN_ON_ONCE(i != HQD_N_REGS);
*n_regs = i;
@@ -289,10 +236,9 @@ static int kgd_hqd_dump(struct kgd_dev *kgd,
return 0;
}
-static int kgd_hqd_sdma_load(struct kgd_dev *kgd, void *mqd,
+static int kgd_hqd_sdma_load(struct amdgpu_device *adev, void *mqd,
uint32_t __user *wptr, struct mm_struct *mm)
{
- struct amdgpu_device *adev = get_amdgpu_device(kgd);
struct cik_sdma_rlc_registers *m;
unsigned long end_jiffies;
uint32_t sdma_rlc_reg_offset;
@@ -345,18 +291,17 @@ static int kgd_hqd_sdma_load(struct kgd_dev *kgd, void *mqd,
return 0;
}
-static int kgd_hqd_sdma_dump(struct kgd_dev *kgd,
+static int kgd_hqd_sdma_dump(struct amdgpu_device *adev,
uint32_t engine_id, uint32_t queue_id,
uint32_t (**dump)[2], uint32_t *n_regs)
{
- struct amdgpu_device *adev = get_amdgpu_device(kgd);
uint32_t sdma_offset = engine_id * SDMA1_REGISTER_OFFSET +
queue_id * KFD_CIK_SDMA_QUEUE_OFFSET;
uint32_t i = 0, reg;
#undef HQD_N_REGS
#define HQD_N_REGS (19+4)
- *dump = kmalloc_array(HQD_N_REGS * 2, sizeof(uint32_t), GFP_KERNEL);
+ *dump = kmalloc_array(HQD_N_REGS, sizeof(**dump), GFP_KERNEL);
if (*dump == NULL)
return -ENOMEM;
@@ -372,15 +317,15 @@ static int kgd_hqd_sdma_dump(struct kgd_dev *kgd,
return 0;
}
-static bool kgd_hqd_is_occupied(struct kgd_dev *kgd, uint64_t queue_address,
- uint32_t pipe_id, uint32_t queue_id)
+static bool kgd_hqd_is_occupied(struct amdgpu_device *adev,
+ uint64_t queue_address, uint32_t pipe_id,
+ uint32_t queue_id, uint32_t inst)
{
- struct amdgpu_device *adev = get_amdgpu_device(kgd);
uint32_t act;
bool retval = false;
uint32_t low, high;
- acquire_queue(kgd, pipe_id, queue_id);
+ acquire_queue(adev, pipe_id, queue_id);
act = RREG32(mmCP_HQD_ACTIVE);
if (act) {
low = lower_32_bits(queue_address >> 8);
@@ -390,13 +335,12 @@ static bool kgd_hqd_is_occupied(struct kgd_dev *kgd, uint64_t queue_address,
high == RREG32(mmCP_HQD_PQ_BASE_HI))
retval = true;
}
- release_queue(kgd);
+ release_queue(adev);
return retval;
}
-static bool kgd_hqd_sdma_is_occupied(struct kgd_dev *kgd, void *mqd)
+static bool kgd_hqd_sdma_is_occupied(struct amdgpu_device *adev, void *mqd)
{
- struct amdgpu_device *adev = get_amdgpu_device(kgd);
struct cik_sdma_rlc_registers *m;
uint32_t sdma_rlc_reg_offset;
uint32_t sdma_rlc_rb_cntl;
@@ -412,12 +356,11 @@ static bool kgd_hqd_sdma_is_occupied(struct kgd_dev *kgd, void *mqd)
return false;
}
-static int kgd_hqd_destroy(struct kgd_dev *kgd, void *mqd,
+static int kgd_hqd_destroy(struct amdgpu_device *adev, void *mqd,
enum kfd_preempt_type reset_type,
unsigned int utimeout, uint32_t pipe_id,
- uint32_t queue_id)
+ uint32_t queue_id, uint32_t inst)
{
- struct amdgpu_device *adev = get_amdgpu_device(kgd);
uint32_t temp;
enum hqd_dequeue_request_type type;
unsigned long flags, end_jiffies;
@@ -426,7 +369,7 @@ static int kgd_hqd_destroy(struct kgd_dev *kgd, void *mqd,
if (amdgpu_in_reset(adev))
return -EIO;
- acquire_queue(kgd, pipe_id, queue_id);
+ acquire_queue(adev, pipe_id, queue_id);
WREG32(mmCP_HQD_PQ_DOORBELL_CONTROL, 0);
switch (reset_type) {
@@ -504,20 +447,19 @@ loop:
break;
if (time_after(jiffies, end_jiffies)) {
pr_err("cp queue preemption time out\n");
- release_queue(kgd);
+ release_queue(adev);
return -ETIME;
}
usleep_range(500, 1000);
}
- release_queue(kgd);
+ release_queue(adev);
return 0;
}
-static int kgd_hqd_sdma_destroy(struct kgd_dev *kgd, void *mqd,
+static int kgd_hqd_sdma_destroy(struct amdgpu_device *adev, void *mqd,
unsigned int utimeout)
{
- struct amdgpu_device *adev = get_amdgpu_device(kgd);
struct cik_sdma_rlc_registers *m;
uint32_t sdma_rlc_reg_offset;
uint32_t temp;
@@ -551,62 +493,10 @@ static int kgd_hqd_sdma_destroy(struct kgd_dev *kgd, void *mqd,
return 0;
}
-static int kgd_address_watch_disable(struct kgd_dev *kgd)
-{
- struct amdgpu_device *adev = get_amdgpu_device(kgd);
- union TCP_WATCH_CNTL_BITS cntl;
- unsigned int i;
-
- cntl.u32All = 0;
-
- cntl.bitfields.valid = 0;
- cntl.bitfields.mask = ADDRESS_WATCH_REG_CNTL_DEFAULT_MASK;
- cntl.bitfields.atc = 1;
-
- /* Turning off this address until we set all the registers */
- for (i = 0; i < MAX_WATCH_ADDRESSES; i++)
- WREG32(watchRegs[i * ADDRESS_WATCH_REG_MAX +
- ADDRESS_WATCH_REG_CNTL], cntl.u32All);
-
- return 0;
-}
-
-static int kgd_address_watch_execute(struct kgd_dev *kgd,
- unsigned int watch_point_id,
- uint32_t cntl_val,
- uint32_t addr_hi,
- uint32_t addr_lo)
-{
- struct amdgpu_device *adev = get_amdgpu_device(kgd);
- union TCP_WATCH_CNTL_BITS cntl;
-
- cntl.u32All = cntl_val;
-
- /* Turning off this watch point until we set all the registers */
- cntl.bitfields.valid = 0;
- WREG32(watchRegs[watch_point_id * ADDRESS_WATCH_REG_MAX +
- ADDRESS_WATCH_REG_CNTL], cntl.u32All);
-
- WREG32(watchRegs[watch_point_id * ADDRESS_WATCH_REG_MAX +
- ADDRESS_WATCH_REG_ADDR_HI], addr_hi);
-
- WREG32(watchRegs[watch_point_id * ADDRESS_WATCH_REG_MAX +
- ADDRESS_WATCH_REG_ADDR_LO], addr_lo);
-
- /* Enable the watch point */
- cntl.bitfields.valid = 1;
-
- WREG32(watchRegs[watch_point_id * ADDRESS_WATCH_REG_MAX +
- ADDRESS_WATCH_REG_CNTL], cntl.u32All);
-
- return 0;
-}
-
-static int kgd_wave_control_execute(struct kgd_dev *kgd,
+static int kgd_wave_control_execute(struct amdgpu_device *adev,
uint32_t gfx_index_val,
- uint32_t sq_cmd)
+ uint32_t sq_cmd, uint32_t inst)
{
- struct amdgpu_device *adev = get_amdgpu_device(kgd);
uint32_t data;
mutex_lock(&adev->grbm_idx_mutex);
@@ -627,18 +517,10 @@ static int kgd_wave_control_execute(struct kgd_dev *kgd,
return 0;
}
-static uint32_t kgd_address_watch_get_offset(struct kgd_dev *kgd,
- unsigned int watch_point_id,
- unsigned int reg_offset)
-{
- return watchRegs[watch_point_id * ADDRESS_WATCH_REG_MAX + reg_offset];
-}
-
-static bool get_atc_vmid_pasid_mapping_info(struct kgd_dev *kgd,
+static bool get_atc_vmid_pasid_mapping_info(struct amdgpu_device *adev,
uint8_t vmid, uint16_t *p_pasid)
{
uint32_t value;
- struct amdgpu_device *adev = (struct amdgpu_device *) kgd;
value = RREG32(mmATC_VMID0_PASID_MAPPING + vmid);
*p_pasid = value & ATC_VMID0_PASID_MAPPING__PASID_MASK;
@@ -646,21 +528,17 @@ static bool get_atc_vmid_pasid_mapping_info(struct kgd_dev *kgd,
return !!(value & ATC_VMID0_PASID_MAPPING__VALID_MASK);
}
-static void set_scratch_backing_va(struct kgd_dev *kgd,
+static void set_scratch_backing_va(struct amdgpu_device *adev,
uint64_t va, uint32_t vmid)
{
- struct amdgpu_device *adev = (struct amdgpu_device *) kgd;
-
- lock_srbm(kgd, 0, 0, 0, vmid);
+ lock_srbm(adev, 0, 0, 0, vmid);
WREG32(mmSH_HIDDEN_PRIVATE_BASE_VMID, va);
- unlock_srbm(kgd);
+ unlock_srbm(adev);
}
-static void set_vm_context_page_table_base(struct kgd_dev *kgd, uint32_t vmid,
- uint64_t page_table_base)
+static void set_vm_context_page_table_base(struct amdgpu_device *adev,
+ uint32_t vmid, uint64_t page_table_base)
{
- struct amdgpu_device *adev = get_amdgpu_device(kgd);
-
if (!amdgpu_amdkfd_is_kfd_vmid(adev, vmid)) {
pr_err("trying to set page table base for wrong VMID\n");
return;
@@ -676,15 +554,20 @@ static void set_vm_context_page_table_base(struct kgd_dev *kgd, uint32_t vmid,
* @vmid: vmid pointer
* read vmid from register (CIK).
*/
-static uint32_t read_vmid_from_vmfault_reg(struct kgd_dev *kgd)
+static uint32_t read_vmid_from_vmfault_reg(struct amdgpu_device *adev)
{
- struct amdgpu_device *adev = get_amdgpu_device(kgd);
-
uint32_t status = RREG32(mmVM_CONTEXT1_PROTECTION_FAULT_STATUS);
return REG_GET_FIELD(status, VM_CONTEXT1_PROTECTION_FAULT_STATUS, VMID);
}
+static uint32_t kgd_hqd_sdma_get_doorbell(struct amdgpu_device *adev,
+ int engine, int queue)
+
+{
+ return 0;
+}
+
const struct kfd2kgd_calls gfx_v7_kfd2kgd = {
.program_sh_mem_settings = kgd_program_sh_mem_settings,
.set_pasid_vmid_mapping = kgd_set_pasid_vmid_mapping,
@@ -697,12 +580,10 @@ const struct kfd2kgd_calls gfx_v7_kfd2kgd = {
.hqd_sdma_is_occupied = kgd_hqd_sdma_is_occupied,
.hqd_destroy = kgd_hqd_destroy,
.hqd_sdma_destroy = kgd_hqd_sdma_destroy,
- .address_watch_disable = kgd_address_watch_disable,
- .address_watch_execute = kgd_address_watch_execute,
.wave_control_execute = kgd_wave_control_execute,
- .address_watch_get_offset = kgd_address_watch_get_offset,
.get_atc_vmid_pasid_mapping_info = get_atc_vmid_pasid_mapping_info,
.set_scratch_backing_va = set_scratch_backing_va,
.set_vm_context_page_table_base = set_vm_context_page_table_base,
.read_vmid_from_vmfault_reg = read_vmid_from_vmfault_reg,
+ .hqd_sdma_get_doorbell = kgd_hqd_sdma_get_doorbell,
};
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c
index 5ce0ce704a21..e68c0fa8d751 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c
@@ -39,68 +39,54 @@ enum hqd_dequeue_request_type {
RESET_WAVES
};
-static inline struct amdgpu_device *get_amdgpu_device(struct kgd_dev *kgd)
-{
- return (struct amdgpu_device *)kgd;
-}
-
-static void lock_srbm(struct kgd_dev *kgd, uint32_t mec, uint32_t pipe,
+static void lock_srbm(struct amdgpu_device *adev, uint32_t mec, uint32_t pipe,
uint32_t queue, uint32_t vmid)
{
- struct amdgpu_device *adev = get_amdgpu_device(kgd);
uint32_t value = PIPEID(pipe) | MEID(mec) | VMID(vmid) | QUEUEID(queue);
mutex_lock(&adev->srbm_mutex);
WREG32(mmSRBM_GFX_CNTL, value);
}
-static void unlock_srbm(struct kgd_dev *kgd)
+static void unlock_srbm(struct amdgpu_device *adev)
{
- struct amdgpu_device *adev = get_amdgpu_device(kgd);
-
WREG32(mmSRBM_GFX_CNTL, 0);
mutex_unlock(&adev->srbm_mutex);
}
-static void acquire_queue(struct kgd_dev *kgd, uint32_t pipe_id,
+static void acquire_queue(struct amdgpu_device *adev, uint32_t pipe_id,
uint32_t queue_id)
{
- struct amdgpu_device *adev = get_amdgpu_device(kgd);
-
uint32_t mec = (pipe_id / adev->gfx.mec.num_pipe_per_mec) + 1;
uint32_t pipe = (pipe_id % adev->gfx.mec.num_pipe_per_mec);
- lock_srbm(kgd, mec, pipe, queue_id, 0);
+ lock_srbm(adev, mec, pipe, queue_id, 0);
}
-static void release_queue(struct kgd_dev *kgd)
+static void release_queue(struct amdgpu_device *adev)
{
- unlock_srbm(kgd);
+ unlock_srbm(adev);
}
-static void kgd_program_sh_mem_settings(struct kgd_dev *kgd, uint32_t vmid,
+static void kgd_program_sh_mem_settings(struct amdgpu_device *adev, uint32_t vmid,
uint32_t sh_mem_config,
uint32_t sh_mem_ape1_base,
uint32_t sh_mem_ape1_limit,
- uint32_t sh_mem_bases)
+ uint32_t sh_mem_bases, uint32_t inst)
{
- struct amdgpu_device *adev = get_amdgpu_device(kgd);
-
- lock_srbm(kgd, 0, 0, 0, vmid);
+ lock_srbm(adev, 0, 0, 0, vmid);
WREG32(mmSH_MEM_CONFIG, sh_mem_config);
WREG32(mmSH_MEM_APE1_BASE, sh_mem_ape1_base);
WREG32(mmSH_MEM_APE1_LIMIT, sh_mem_ape1_limit);
WREG32(mmSH_MEM_BASES, sh_mem_bases);
- unlock_srbm(kgd);
+ unlock_srbm(adev);
}
-static int kgd_set_pasid_vmid_mapping(struct kgd_dev *kgd, u32 pasid,
- unsigned int vmid)
+static int kgd_set_pasid_vmid_mapping(struct amdgpu_device *adev, u32 pasid,
+ unsigned int vmid, uint32_t inst)
{
- struct amdgpu_device *adev = get_amdgpu_device(kgd);
-
/*
* We have to assume that there is no outstanding mapping.
* The ATC_VMID_PASID_MAPPING_UPDATE_STATUS bit could be 0 because
@@ -123,21 +109,21 @@ static int kgd_set_pasid_vmid_mapping(struct kgd_dev *kgd, u32 pasid,
return 0;
}
-static int kgd_init_interrupts(struct kgd_dev *kgd, uint32_t pipe_id)
+static int kgd_init_interrupts(struct amdgpu_device *adev, uint32_t pipe_id,
+ uint32_t inst)
{
- struct amdgpu_device *adev = get_amdgpu_device(kgd);
uint32_t mec;
uint32_t pipe;
mec = (pipe_id / adev->gfx.mec.num_pipe_per_mec) + 1;
pipe = (pipe_id % adev->gfx.mec.num_pipe_per_mec);
- lock_srbm(kgd, mec, pipe, 0, 0);
+ lock_srbm(adev, mec, pipe, 0, 0);
WREG32(mmCPC_INT_CNTL, CP_INT_CNTL_RING0__TIME_STAMP_INT_ENABLE_MASK |
CP_INT_CNTL_RING0__OPCODE_ERROR_INT_ENABLE_MASK);
- unlock_srbm(kgd);
+ unlock_srbm(adev);
return 0;
}
@@ -165,12 +151,11 @@ static inline struct vi_sdma_mqd *get_sdma_mqd(void *mqd)
return (struct vi_sdma_mqd *)mqd;
}
-static int kgd_hqd_load(struct kgd_dev *kgd, void *mqd, uint32_t pipe_id,
- uint32_t queue_id, uint32_t __user *wptr,
- uint32_t wptr_shift, uint32_t wptr_mask,
- struct mm_struct *mm)
+static int kgd_hqd_load(struct amdgpu_device *adev, void *mqd,
+ uint32_t pipe_id, uint32_t queue_id,
+ uint32_t __user *wptr, uint32_t wptr_shift,
+ uint32_t wptr_mask, struct mm_struct *mm, uint32_t inst)
{
- struct amdgpu_device *adev = get_amdgpu_device(kgd);
struct vi_mqd *m;
uint32_t *mqd_hqd;
uint32_t reg, wptr_val, data;
@@ -178,7 +163,7 @@ static int kgd_hqd_load(struct kgd_dev *kgd, void *mqd, uint32_t pipe_id,
m = get_mqd(mqd);
- acquire_queue(kgd, pipe_id, queue_id);
+ acquire_queue(adev, pipe_id, queue_id);
/* HIQ is set during driver init period with vmid set to 0*/
if (m->cp_hqd_vmid == 0) {
@@ -206,7 +191,7 @@ static int kgd_hqd_load(struct kgd_dev *kgd, void *mqd, uint32_t pipe_id,
* on ASICs that do not support context-save.
* EOP writes/reads can start anywhere in the ring.
*/
- if (get_amdgpu_device(kgd)->asic_type != CHIP_TONGA) {
+ if (adev->asic_type != CHIP_TONGA) {
WREG32(mmCP_HQD_EOP_RPTR, m->cp_hqd_eop_rptr);
WREG32(mmCP_HQD_EOP_WPTR, m->cp_hqd_eop_wptr);
WREG32(mmCP_HQD_EOP_WPTR_MEM, m->cp_hqd_eop_wptr_mem);
@@ -224,27 +209,26 @@ static int kgd_hqd_load(struct kgd_dev *kgd, void *mqd, uint32_t pipe_id,
/* read_user_ptr may take the mm->mmap_lock.
* release srbm_mutex to avoid circular dependency between
- * srbm_mutex->mm_sem->reservation_ww_class_mutex->srbm_mutex.
+ * srbm_mutex->mmap_lock->reservation_ww_class_mutex->srbm_mutex.
*/
- release_queue(kgd);
+ release_queue(adev);
valid_wptr = read_user_wptr(mm, wptr, wptr_val);
- acquire_queue(kgd, pipe_id, queue_id);
+ acquire_queue(adev, pipe_id, queue_id);
if (valid_wptr)
WREG32(mmCP_HQD_PQ_WPTR, (wptr_val << wptr_shift) & wptr_mask);
data = REG_SET_FIELD(m->cp_hqd_active, CP_HQD_ACTIVE, ACTIVE, 1);
WREG32(mmCP_HQD_ACTIVE, data);
- release_queue(kgd);
+ release_queue(adev);
return 0;
}
-static int kgd_hqd_dump(struct kgd_dev *kgd,
+static int kgd_hqd_dump(struct amdgpu_device *adev,
uint32_t pipe_id, uint32_t queue_id,
- uint32_t (**dump)[2], uint32_t *n_regs)
+ uint32_t (**dump)[2], uint32_t *n_regs, uint32_t inst)
{
- struct amdgpu_device *adev = get_amdgpu_device(kgd);
uint32_t i = 0, reg;
#define HQD_N_REGS (54+4)
#define DUMP_REG(addr) do { \
@@ -254,11 +238,11 @@ static int kgd_hqd_dump(struct kgd_dev *kgd,
(*dump)[i++][1] = RREG32(addr); \
} while (0)
- *dump = kmalloc_array(HQD_N_REGS * 2, sizeof(uint32_t), GFP_KERNEL);
+ *dump = kmalloc_array(HQD_N_REGS, sizeof(**dump), GFP_KERNEL);
if (*dump == NULL)
return -ENOMEM;
- acquire_queue(kgd, pipe_id, queue_id);
+ acquire_queue(adev, pipe_id, queue_id);
DUMP_REG(mmCOMPUTE_STATIC_THREAD_MGMT_SE0);
DUMP_REG(mmCOMPUTE_STATIC_THREAD_MGMT_SE1);
@@ -268,7 +252,7 @@ static int kgd_hqd_dump(struct kgd_dev *kgd,
for (reg = mmCP_MQD_BASE_ADDR; reg <= mmCP_HQD_EOP_DONES; reg++)
DUMP_REG(reg);
- release_queue(kgd);
+ release_queue(adev);
WARN_ON_ONCE(i != HQD_N_REGS);
*n_regs = i;
@@ -276,10 +260,9 @@ static int kgd_hqd_dump(struct kgd_dev *kgd,
return 0;
}
-static int kgd_hqd_sdma_load(struct kgd_dev *kgd, void *mqd,
+static int kgd_hqd_sdma_load(struct amdgpu_device *adev, void *mqd,
uint32_t __user *wptr, struct mm_struct *mm)
{
- struct amdgpu_device *adev = get_amdgpu_device(kgd);
struct vi_sdma_mqd *m;
unsigned long end_jiffies;
uint32_t sdma_rlc_reg_offset;
@@ -331,18 +314,17 @@ static int kgd_hqd_sdma_load(struct kgd_dev *kgd, void *mqd,
return 0;
}
-static int kgd_hqd_sdma_dump(struct kgd_dev *kgd,
+static int kgd_hqd_sdma_dump(struct amdgpu_device *adev,
uint32_t engine_id, uint32_t queue_id,
uint32_t (**dump)[2], uint32_t *n_regs)
{
- struct amdgpu_device *adev = get_amdgpu_device(kgd);
uint32_t sdma_offset = engine_id * SDMA1_REGISTER_OFFSET +
queue_id * KFD_VI_SDMA_QUEUE_OFFSET;
uint32_t i = 0, reg;
#undef HQD_N_REGS
#define HQD_N_REGS (19+4+2+3+7)
- *dump = kmalloc_array(HQD_N_REGS * 2, sizeof(uint32_t), GFP_KERNEL);
+ *dump = kmalloc_array(HQD_N_REGS, sizeof(**dump), GFP_KERNEL);
if (*dump == NULL)
return -ENOMEM;
@@ -367,15 +349,15 @@ static int kgd_hqd_sdma_dump(struct kgd_dev *kgd,
return 0;
}
-static bool kgd_hqd_is_occupied(struct kgd_dev *kgd, uint64_t queue_address,
- uint32_t pipe_id, uint32_t queue_id)
+static bool kgd_hqd_is_occupied(struct amdgpu_device *adev,
+ uint64_t queue_address, uint32_t pipe_id,
+ uint32_t queue_id, uint32_t inst)
{
- struct amdgpu_device *adev = get_amdgpu_device(kgd);
uint32_t act;
bool retval = false;
uint32_t low, high;
- acquire_queue(kgd, pipe_id, queue_id);
+ acquire_queue(adev, pipe_id, queue_id);
act = RREG32(mmCP_HQD_ACTIVE);
if (act) {
low = lower_32_bits(queue_address >> 8);
@@ -385,13 +367,12 @@ static bool kgd_hqd_is_occupied(struct kgd_dev *kgd, uint64_t queue_address,
high == RREG32(mmCP_HQD_PQ_BASE_HI))
retval = true;
}
- release_queue(kgd);
+ release_queue(adev);
return retval;
}
-static bool kgd_hqd_sdma_is_occupied(struct kgd_dev *kgd, void *mqd)
+static bool kgd_hqd_sdma_is_occupied(struct amdgpu_device *adev, void *mqd)
{
- struct amdgpu_device *adev = get_amdgpu_device(kgd);
struct vi_sdma_mqd *m;
uint32_t sdma_rlc_reg_offset;
uint32_t sdma_rlc_rb_cntl;
@@ -407,12 +388,11 @@ static bool kgd_hqd_sdma_is_occupied(struct kgd_dev *kgd, void *mqd)
return false;
}
-static int kgd_hqd_destroy(struct kgd_dev *kgd, void *mqd,
+static int kgd_hqd_destroy(struct amdgpu_device *adev, void *mqd,
enum kfd_preempt_type reset_type,
unsigned int utimeout, uint32_t pipe_id,
- uint32_t queue_id)
+ uint32_t queue_id, uint32_t inst)
{
- struct amdgpu_device *adev = get_amdgpu_device(kgd);
uint32_t temp;
enum hqd_dequeue_request_type type;
unsigned long flags, end_jiffies;
@@ -422,7 +402,7 @@ static int kgd_hqd_destroy(struct kgd_dev *kgd, void *mqd,
if (amdgpu_in_reset(adev))
return -EIO;
- acquire_queue(kgd, pipe_id, queue_id);
+ acquire_queue(adev, pipe_id, queue_id);
if (m->cp_hqd_vmid == 0)
WREG32_FIELD(RLC_CP_SCHEDULERS, scheduler1, 0);
@@ -502,20 +482,19 @@ loop:
break;
if (time_after(jiffies, end_jiffies)) {
pr_err("cp queue preemption time out.\n");
- release_queue(kgd);
+ release_queue(adev);
return -ETIME;
}
usleep_range(500, 1000);
}
- release_queue(kgd);
+ release_queue(adev);
return 0;
}
-static int kgd_hqd_sdma_destroy(struct kgd_dev *kgd, void *mqd,
+static int kgd_hqd_sdma_destroy(struct amdgpu_device *adev, void *mqd,
unsigned int utimeout)
{
- struct amdgpu_device *adev = get_amdgpu_device(kgd);
struct vi_sdma_mqd *m;
uint32_t sdma_rlc_reg_offset;
uint32_t temp;
@@ -549,11 +528,10 @@ static int kgd_hqd_sdma_destroy(struct kgd_dev *kgd, void *mqd,
return 0;
}
-static bool get_atc_vmid_pasid_mapping_info(struct kgd_dev *kgd,
+static bool get_atc_vmid_pasid_mapping_info(struct amdgpu_device *adev,
uint8_t vmid, uint16_t *p_pasid)
{
uint32_t value;
- struct amdgpu_device *adev = (struct amdgpu_device *) kgd;
value = RREG32(mmATC_VMID0_PASID_MAPPING + vmid);
*p_pasid = value & ATC_VMID0_PASID_MAPPING__PASID_MASK;
@@ -561,25 +539,10 @@ static bool get_atc_vmid_pasid_mapping_info(struct kgd_dev *kgd,
return !!(value & ATC_VMID0_PASID_MAPPING__VALID_MASK);
}
-static int kgd_address_watch_disable(struct kgd_dev *kgd)
-{
- return 0;
-}
-
-static int kgd_address_watch_execute(struct kgd_dev *kgd,
- unsigned int watch_point_id,
- uint32_t cntl_val,
- uint32_t addr_hi,
- uint32_t addr_lo)
-{
- return 0;
-}
-
-static int kgd_wave_control_execute(struct kgd_dev *kgd,
+static int kgd_wave_control_execute(struct amdgpu_device *adev,
uint32_t gfx_index_val,
- uint32_t sq_cmd)
+ uint32_t sq_cmd, uint32_t inst)
{
- struct amdgpu_device *adev = get_amdgpu_device(kgd);
uint32_t data = 0;
mutex_lock(&adev->grbm_idx_mutex);
@@ -600,28 +563,17 @@ static int kgd_wave_control_execute(struct kgd_dev *kgd,
return 0;
}
-static uint32_t kgd_address_watch_get_offset(struct kgd_dev *kgd,
- unsigned int watch_point_id,
- unsigned int reg_offset)
-{
- return 0;
-}
-
-static void set_scratch_backing_va(struct kgd_dev *kgd,
+static void set_scratch_backing_va(struct amdgpu_device *adev,
uint64_t va, uint32_t vmid)
{
- struct amdgpu_device *adev = (struct amdgpu_device *) kgd;
-
- lock_srbm(kgd, 0, 0, 0, vmid);
+ lock_srbm(adev, 0, 0, 0, vmid);
WREG32(mmSH_HIDDEN_PRIVATE_BASE_VMID, va);
- unlock_srbm(kgd);
+ unlock_srbm(adev);
}
-static void set_vm_context_page_table_base(struct kgd_dev *kgd, uint32_t vmid,
- uint64_t page_table_base)
+static void set_vm_context_page_table_base(struct amdgpu_device *adev,
+ uint32_t vmid, uint64_t page_table_base)
{
- struct amdgpu_device *adev = get_amdgpu_device(kgd);
-
if (!amdgpu_amdkfd_is_kfd_vmid(adev, vmid)) {
pr_err("trying to set page table base for wrong VMID\n");
return;
@@ -630,6 +582,13 @@ static void set_vm_context_page_table_base(struct kgd_dev *kgd, uint32_t vmid,
lower_32_bits(page_table_base));
}
+static uint32_t kgd_hqd_sdma_get_doorbell(struct amdgpu_device *adev,
+ int engine, int queue)
+
+{
+ return 0;
+}
+
const struct kfd2kgd_calls gfx_v8_kfd2kgd = {
.program_sh_mem_settings = kgd_program_sh_mem_settings,
.set_pasid_vmid_mapping = kgd_set_pasid_vmid_mapping,
@@ -642,12 +601,10 @@ const struct kfd2kgd_calls gfx_v8_kfd2kgd = {
.hqd_sdma_is_occupied = kgd_hqd_sdma_is_occupied,
.hqd_destroy = kgd_hqd_destroy,
.hqd_sdma_destroy = kgd_hqd_sdma_destroy,
- .address_watch_disable = kgd_address_watch_disable,
- .address_watch_execute = kgd_address_watch_execute,
.wave_control_execute = kgd_wave_control_execute,
- .address_watch_get_offset = kgd_address_watch_get_offset,
.get_atc_vmid_pasid_mapping_info =
get_atc_vmid_pasid_mapping_info,
.set_scratch_backing_va = set_scratch_backing_va,
.set_vm_context_page_table_base = set_vm_context_page_table_base,
+ .hqd_sdma_get_doorbell = kgd_hqd_sdma_get_doorbell,
};
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c
index b43e68fc1378..088d09cc7a72 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c
@@ -38,47 +38,38 @@
#include "soc15d.h"
#include "gfx_v9_0.h"
#include "amdgpu_amdkfd_gfx_v9.h"
+#include <uapi/linux/kfd_ioctl.h>
enum hqd_dequeue_request_type {
NO_ACTION = 0,
DRAIN_PIPE,
- RESET_WAVES
+ RESET_WAVES,
+ SAVE_WAVES
};
-static inline struct amdgpu_device *get_amdgpu_device(struct kgd_dev *kgd)
+static void kgd_gfx_v9_lock_srbm(struct amdgpu_device *adev, uint32_t mec, uint32_t pipe,
+ uint32_t queue, uint32_t vmid, uint32_t inst)
{
- return (struct amdgpu_device *)kgd;
-}
-
-static void lock_srbm(struct kgd_dev *kgd, uint32_t mec, uint32_t pipe,
- uint32_t queue, uint32_t vmid)
-{
- struct amdgpu_device *adev = get_amdgpu_device(kgd);
-
mutex_lock(&adev->srbm_mutex);
- soc15_grbm_select(adev, mec, pipe, queue, vmid);
+ soc15_grbm_select(adev, mec, pipe, queue, vmid, GET_INST(GC, inst));
}
-static void unlock_srbm(struct kgd_dev *kgd)
+static void kgd_gfx_v9_unlock_srbm(struct amdgpu_device *adev, uint32_t inst)
{
- struct amdgpu_device *adev = get_amdgpu_device(kgd);
-
- soc15_grbm_select(adev, 0, 0, 0, 0);
+ soc15_grbm_select(adev, 0, 0, 0, 0, GET_INST(GC, inst));
mutex_unlock(&adev->srbm_mutex);
}
-static void acquire_queue(struct kgd_dev *kgd, uint32_t pipe_id,
- uint32_t queue_id)
+void kgd_gfx_v9_acquire_queue(struct amdgpu_device *adev, uint32_t pipe_id,
+ uint32_t queue_id, uint32_t inst)
{
- struct amdgpu_device *adev = get_amdgpu_device(kgd);
-
uint32_t mec = (pipe_id / adev->gfx.mec.num_pipe_per_mec) + 1;
uint32_t pipe = (pipe_id % adev->gfx.mec.num_pipe_per_mec);
- lock_srbm(kgd, mec, pipe, queue_id, 0);
+ kgd_gfx_v9_lock_srbm(adev, mec, pipe, queue_id, 0, inst);
}
-static uint64_t get_queue_mask(struct amdgpu_device *adev,
+uint64_t kgd_gfx_v9_get_queue_mask(struct amdgpu_device *adev,
uint32_t pipe_id, uint32_t queue_id)
{
unsigned int bit = pipe_id * adev->gfx.mec.num_queue_per_pipe +
@@ -87,33 +78,29 @@ static uint64_t get_queue_mask(struct amdgpu_device *adev,
return 1ull << bit;
}
-static void release_queue(struct kgd_dev *kgd)
+void kgd_gfx_v9_release_queue(struct amdgpu_device *adev, uint32_t inst)
{
- unlock_srbm(kgd);
+ kgd_gfx_v9_unlock_srbm(adev, inst);
}
-void kgd_gfx_v9_program_sh_mem_settings(struct kgd_dev *kgd, uint32_t vmid,
+void kgd_gfx_v9_program_sh_mem_settings(struct amdgpu_device *adev, uint32_t vmid,
uint32_t sh_mem_config,
uint32_t sh_mem_ape1_base,
uint32_t sh_mem_ape1_limit,
- uint32_t sh_mem_bases)
+ uint32_t sh_mem_bases, uint32_t inst)
{
- struct amdgpu_device *adev = get_amdgpu_device(kgd);
+ kgd_gfx_v9_lock_srbm(adev, 0, 0, 0, vmid, inst);
- lock_srbm(kgd, 0, 0, 0, vmid);
-
- WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmSH_MEM_CONFIG), sh_mem_config);
- WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmSH_MEM_BASES), sh_mem_bases);
+ WREG32_SOC15_RLC(GC, GET_INST(GC, inst), mmSH_MEM_CONFIG, sh_mem_config);
+ WREG32_SOC15_RLC(GC, GET_INST(GC, inst), mmSH_MEM_BASES, sh_mem_bases);
/* APE1 no longer exists on GFX9 */
- unlock_srbm(kgd);
+ kgd_gfx_v9_unlock_srbm(adev, inst);
}
-int kgd_gfx_v9_set_pasid_vmid_mapping(struct kgd_dev *kgd, u32 pasid,
- unsigned int vmid)
+int kgd_gfx_v9_set_pasid_vmid_mapping(struct amdgpu_device *adev, u32 pasid,
+ unsigned int vmid, uint32_t inst)
{
- struct amdgpu_device *adev = get_amdgpu_device(kgd);
-
/*
* We have to assume that there is no outstanding mapping.
* The ATC_VMID_PASID_MAPPING_UPDATE_STATUS bit could be 0 because
@@ -170,22 +157,22 @@ int kgd_gfx_v9_set_pasid_vmid_mapping(struct kgd_dev *kgd, u32 pasid,
* but still works
*/
-int kgd_gfx_v9_init_interrupts(struct kgd_dev *kgd, uint32_t pipe_id)
+int kgd_gfx_v9_init_interrupts(struct amdgpu_device *adev, uint32_t pipe_id,
+ uint32_t inst)
{
- struct amdgpu_device *adev = get_amdgpu_device(kgd);
uint32_t mec;
uint32_t pipe;
mec = (pipe_id / adev->gfx.mec.num_pipe_per_mec) + 1;
pipe = (pipe_id % adev->gfx.mec.num_pipe_per_mec);
- lock_srbm(kgd, mec, pipe, 0, 0);
+ kgd_gfx_v9_lock_srbm(adev, mec, pipe, 0, 0, inst);
- WREG32(SOC15_REG_OFFSET(GC, 0, mmCPC_INT_CNTL),
+ WREG32_SOC15(GC, GET_INST(GC, inst), mmCPC_INT_CNTL,
CP_INT_CNTL_RING0__TIME_STAMP_INT_ENABLE_MASK |
CP_INT_CNTL_RING0__OPCODE_ERROR_INT_ENABLE_MASK);
- unlock_srbm(kgd);
+ kgd_gfx_v9_unlock_srbm(adev, inst);
return 0;
}
@@ -232,33 +219,33 @@ static inline struct v9_sdma_mqd *get_sdma_mqd(void *mqd)
return (struct v9_sdma_mqd *)mqd;
}
-int kgd_gfx_v9_hqd_load(struct kgd_dev *kgd, void *mqd, uint32_t pipe_id,
- uint32_t queue_id, uint32_t __user *wptr,
- uint32_t wptr_shift, uint32_t wptr_mask,
- struct mm_struct *mm)
+int kgd_gfx_v9_hqd_load(struct amdgpu_device *adev, void *mqd,
+ uint32_t pipe_id, uint32_t queue_id,
+ uint32_t __user *wptr, uint32_t wptr_shift,
+ uint32_t wptr_mask, struct mm_struct *mm,
+ uint32_t inst)
{
- struct amdgpu_device *adev = get_amdgpu_device(kgd);
struct v9_mqd *m;
uint32_t *mqd_hqd;
uint32_t reg, hqd_base, data;
m = get_mqd(mqd);
- acquire_queue(kgd, pipe_id, queue_id);
+ kgd_gfx_v9_acquire_queue(adev, pipe_id, queue_id, inst);
/* HQD registers extend from CP_MQD_BASE_ADDR to CP_HQD_EOP_WPTR_MEM. */
mqd_hqd = &m->cp_mqd_base_addr_lo;
- hqd_base = SOC15_REG_OFFSET(GC, 0, mmCP_MQD_BASE_ADDR);
+ hqd_base = SOC15_REG_OFFSET(GC, GET_INST(GC, inst), mmCP_MQD_BASE_ADDR);
for (reg = hqd_base;
- reg <= SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_WPTR_HI); reg++)
- WREG32_RLC(reg, mqd_hqd[reg - hqd_base]);
+ reg <= SOC15_REG_OFFSET(GC, GET_INST(GC, inst), mmCP_HQD_PQ_WPTR_HI); reg++)
+ WREG32_XCC(reg, mqd_hqd[reg - hqd_base], inst);
/* Activate doorbell logic before triggering WPTR poll. */
data = REG_SET_FIELD(m->cp_hqd_pq_doorbell_control,
CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_EN, 1);
- WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL), data);
+ WREG32_SOC15_RLC(GC, GET_INST(GC, inst), mmCP_HQD_PQ_DOORBELL_CONTROL, data);
if (wptr) {
/* Don't read wptr with get_user because the user
@@ -287,44 +274,42 @@ int kgd_gfx_v9_hqd_load(struct kgd_dev *kgd, void *mqd, uint32_t pipe_id,
guessed_wptr += m->cp_hqd_pq_wptr_lo & ~(queue_size - 1);
guessed_wptr += (uint64_t)m->cp_hqd_pq_wptr_hi << 32;
- WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_WPTR_LO),
- lower_32_bits(guessed_wptr));
- WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_WPTR_HI),
- upper_32_bits(guessed_wptr));
- WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR),
- lower_32_bits((uintptr_t)wptr));
- WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR_HI),
- upper_32_bits((uintptr_t)wptr));
- WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_PQ_WPTR_POLL_CNTL1),
- (uint32_t)get_queue_mask(adev, pipe_id, queue_id));
+ WREG32_SOC15_RLC(GC, GET_INST(GC, inst), mmCP_HQD_PQ_WPTR_LO,
+ lower_32_bits(guessed_wptr));
+ WREG32_SOC15_RLC(GC, GET_INST(GC, inst), mmCP_HQD_PQ_WPTR_HI,
+ upper_32_bits(guessed_wptr));
+ WREG32_SOC15_RLC(GC, GET_INST(GC, inst), mmCP_HQD_PQ_WPTR_POLL_ADDR,
+ lower_32_bits((uintptr_t)wptr));
+ WREG32_SOC15_RLC(GC, GET_INST(GC, inst), mmCP_HQD_PQ_WPTR_POLL_ADDR_HI,
+ upper_32_bits((uintptr_t)wptr));
+ WREG32_SOC15_RLC(GC, GET_INST(GC, inst), mmCP_PQ_WPTR_POLL_CNTL1,
+ (uint32_t)kgd_gfx_v9_get_queue_mask(adev, pipe_id, queue_id));
}
/* Start the EOP fetcher */
- WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_EOP_RPTR),
- REG_SET_FIELD(m->cp_hqd_eop_rptr,
- CP_HQD_EOP_RPTR, INIT_FETCHER, 1));
+ WREG32_SOC15_RLC(GC, GET_INST(GC, inst), mmCP_HQD_EOP_RPTR,
+ REG_SET_FIELD(m->cp_hqd_eop_rptr, CP_HQD_EOP_RPTR, INIT_FETCHER, 1));
data = REG_SET_FIELD(m->cp_hqd_active, CP_HQD_ACTIVE, ACTIVE, 1);
- WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_ACTIVE), data);
+ WREG32_SOC15_RLC(GC, GET_INST(GC, inst), mmCP_HQD_ACTIVE, data);
- release_queue(kgd);
+ kgd_gfx_v9_release_queue(adev, inst);
return 0;
}
-int kgd_gfx_v9_hiq_mqd_load(struct kgd_dev *kgd, void *mqd,
+int kgd_gfx_v9_hiq_mqd_load(struct amdgpu_device *adev, void *mqd,
uint32_t pipe_id, uint32_t queue_id,
- uint32_t doorbell_off)
+ uint32_t doorbell_off, uint32_t inst)
{
- struct amdgpu_device *adev = get_amdgpu_device(kgd);
- struct amdgpu_ring *kiq_ring = &adev->gfx.kiq.ring;
+ struct amdgpu_ring *kiq_ring = &adev->gfx.kiq[inst].ring;
struct v9_mqd *m;
uint32_t mec, pipe;
int r;
m = get_mqd(mqd);
- acquire_queue(kgd, pipe_id, queue_id);
+ kgd_gfx_v9_acquire_queue(adev, pipe_id, queue_id, inst);
mec = (pipe_id / adev->gfx.mec.num_pipe_per_mec) + 1;
pipe = (pipe_id % adev->gfx.mec.num_pipe_per_mec);
@@ -332,7 +317,7 @@ int kgd_gfx_v9_hiq_mqd_load(struct kgd_dev *kgd, void *mqd,
pr_debug("kfd: set HIQ, mec:%d, pipe:%d, queue:%d.\n",
mec, pipe, queue_id);
- spin_lock(&adev->gfx.kiq.ring_lock);
+ spin_lock(&adev->gfx.kiq[inst].ring_lock);
r = amdgpu_ring_alloc(kiq_ring, 7);
if (r) {
pr_err("Failed to alloc KIQ (%d).\n", r);
@@ -359,17 +344,16 @@ int kgd_gfx_v9_hiq_mqd_load(struct kgd_dev *kgd, void *mqd,
amdgpu_ring_commit(kiq_ring);
out_unlock:
- spin_unlock(&adev->gfx.kiq.ring_lock);
- release_queue(kgd);
+ spin_unlock(&adev->gfx.kiq[inst].ring_lock);
+ kgd_gfx_v9_release_queue(adev, inst);
return r;
}
-int kgd_gfx_v9_hqd_dump(struct kgd_dev *kgd,
+int kgd_gfx_v9_hqd_dump(struct amdgpu_device *adev,
uint32_t pipe_id, uint32_t queue_id,
- uint32_t (**dump)[2], uint32_t *n_regs)
+ uint32_t (**dump)[2], uint32_t *n_regs, uint32_t inst)
{
- struct amdgpu_device *adev = get_amdgpu_device(kgd);
uint32_t i = 0, reg;
#define HQD_N_REGS 56
#define DUMP_REG(addr) do { \
@@ -379,17 +363,17 @@ int kgd_gfx_v9_hqd_dump(struct kgd_dev *kgd,
(*dump)[i++][1] = RREG32(addr); \
} while (0)
- *dump = kmalloc_array(HQD_N_REGS * 2, sizeof(uint32_t), GFP_KERNEL);
+ *dump = kmalloc_array(HQD_N_REGS, sizeof(**dump), GFP_KERNEL);
if (*dump == NULL)
return -ENOMEM;
- acquire_queue(kgd, pipe_id, queue_id);
+ kgd_gfx_v9_acquire_queue(adev, pipe_id, queue_id, inst);
- for (reg = SOC15_REG_OFFSET(GC, 0, mmCP_MQD_BASE_ADDR);
- reg <= SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_WPTR_HI); reg++)
+ for (reg = SOC15_REG_OFFSET(GC, GET_INST(GC, inst), mmCP_MQD_BASE_ADDR);
+ reg <= SOC15_REG_OFFSET(GC, GET_INST(GC, inst), mmCP_HQD_PQ_WPTR_HI); reg++)
DUMP_REG(reg);
- release_queue(kgd);
+ kgd_gfx_v9_release_queue(adev, inst);
WARN_ON_ONCE(i != HQD_N_REGS);
*n_regs = i;
@@ -397,10 +381,9 @@ int kgd_gfx_v9_hqd_dump(struct kgd_dev *kgd,
return 0;
}
-static int kgd_hqd_sdma_load(struct kgd_dev *kgd, void *mqd,
+static int kgd_hqd_sdma_load(struct amdgpu_device *adev, void *mqd,
uint32_t __user *wptr, struct mm_struct *mm)
{
- struct amdgpu_device *adev = get_amdgpu_device(kgd);
struct v9_sdma_mqd *m;
uint32_t sdma_rlc_reg_offset;
unsigned long end_jiffies;
@@ -467,18 +450,17 @@ static int kgd_hqd_sdma_load(struct kgd_dev *kgd, void *mqd,
return 0;
}
-static int kgd_hqd_sdma_dump(struct kgd_dev *kgd,
+static int kgd_hqd_sdma_dump(struct amdgpu_device *adev,
uint32_t engine_id, uint32_t queue_id,
uint32_t (**dump)[2], uint32_t *n_regs)
{
- struct amdgpu_device *adev = get_amdgpu_device(kgd);
uint32_t sdma_rlc_reg_offset = get_sdma_rlc_reg_offset(adev,
engine_id, queue_id);
uint32_t i = 0, reg;
#undef HQD_N_REGS
#define HQD_N_REGS (19+6+7+10)
- *dump = kmalloc_array(HQD_N_REGS * 2, sizeof(uint32_t), GFP_KERNEL);
+ *dump = kmalloc_array(HQD_N_REGS, sizeof(**dump), GFP_KERNEL);
if (*dump == NULL)
return -ENOMEM;
@@ -499,31 +481,30 @@ static int kgd_hqd_sdma_dump(struct kgd_dev *kgd,
return 0;
}
-bool kgd_gfx_v9_hqd_is_occupied(struct kgd_dev *kgd, uint64_t queue_address,
- uint32_t pipe_id, uint32_t queue_id)
+bool kgd_gfx_v9_hqd_is_occupied(struct amdgpu_device *adev,
+ uint64_t queue_address, uint32_t pipe_id,
+ uint32_t queue_id, uint32_t inst)
{
- struct amdgpu_device *adev = get_amdgpu_device(kgd);
uint32_t act;
bool retval = false;
uint32_t low, high;
- acquire_queue(kgd, pipe_id, queue_id);
- act = RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_ACTIVE));
+ kgd_gfx_v9_acquire_queue(adev, pipe_id, queue_id, inst);
+ act = RREG32_SOC15(GC, GET_INST(GC, inst), mmCP_HQD_ACTIVE);
if (act) {
low = lower_32_bits(queue_address >> 8);
high = upper_32_bits(queue_address >> 8);
- if (low == RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_BASE)) &&
- high == RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_BASE_HI)))
+ if (low == RREG32_SOC15(GC, GET_INST(GC, inst), mmCP_HQD_PQ_BASE) &&
+ high == RREG32_SOC15(GC, GET_INST(GC, inst), mmCP_HQD_PQ_BASE_HI))
retval = true;
}
- release_queue(kgd);
+ kgd_gfx_v9_release_queue(adev, inst);
return retval;
}
-static bool kgd_hqd_sdma_is_occupied(struct kgd_dev *kgd, void *mqd)
+static bool kgd_hqd_sdma_is_occupied(struct amdgpu_device *adev, void *mqd)
{
- struct amdgpu_device *adev = get_amdgpu_device(kgd);
struct v9_sdma_mqd *m;
uint32_t sdma_rlc_reg_offset;
uint32_t sdma_rlc_rb_cntl;
@@ -540,12 +521,11 @@ static bool kgd_hqd_sdma_is_occupied(struct kgd_dev *kgd, void *mqd)
return false;
}
-int kgd_gfx_v9_hqd_destroy(struct kgd_dev *kgd, void *mqd,
+int kgd_gfx_v9_hqd_destroy(struct amdgpu_device *adev, void *mqd,
enum kfd_preempt_type reset_type,
unsigned int utimeout, uint32_t pipe_id,
- uint32_t queue_id)
+ uint32_t queue_id, uint32_t inst)
{
- struct amdgpu_device *adev = get_amdgpu_device(kgd);
enum hqd_dequeue_request_type type;
unsigned long end_jiffies;
uint32_t temp;
@@ -554,10 +534,10 @@ int kgd_gfx_v9_hqd_destroy(struct kgd_dev *kgd, void *mqd,
if (amdgpu_in_reset(adev))
return -EIO;
- acquire_queue(kgd, pipe_id, queue_id);
+ kgd_gfx_v9_acquire_queue(adev, pipe_id, queue_id, inst);
if (m->cp_hqd_vmid == 0)
- WREG32_FIELD15_RLC(GC, 0, RLC_CP_SCHEDULERS, scheduler1, 0);
+ WREG32_FIELD15_RLC(GC, GET_INST(GC, inst), RLC_CP_SCHEDULERS, scheduler1, 0);
switch (reset_type) {
case KFD_PREEMPT_TYPE_WAVEFRONT_DRAIN:
@@ -566,34 +546,36 @@ int kgd_gfx_v9_hqd_destroy(struct kgd_dev *kgd, void *mqd,
case KFD_PREEMPT_TYPE_WAVEFRONT_RESET:
type = RESET_WAVES;
break;
+ case KFD_PREEMPT_TYPE_WAVEFRONT_SAVE:
+ type = SAVE_WAVES;
+ break;
default:
type = DRAIN_PIPE;
break;
}
- WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_DEQUEUE_REQUEST), type);
+ WREG32_SOC15_RLC(GC, GET_INST(GC, inst), mmCP_HQD_DEQUEUE_REQUEST, type);
end_jiffies = (utimeout * HZ / 1000) + jiffies;
while (true) {
- temp = RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_ACTIVE));
+ temp = RREG32_SOC15(GC, GET_INST(GC, inst), mmCP_HQD_ACTIVE);
if (!(temp & CP_HQD_ACTIVE__ACTIVE_MASK))
break;
if (time_after(jiffies, end_jiffies)) {
pr_err("cp queue preemption time out.\n");
- release_queue(kgd);
+ kgd_gfx_v9_release_queue(adev, inst);
return -ETIME;
}
usleep_range(500, 1000);
}
- release_queue(kgd);
+ kgd_gfx_v9_release_queue(adev, inst);
return 0;
}
-static int kgd_hqd_sdma_destroy(struct kgd_dev *kgd, void *mqd,
+static int kgd_hqd_sdma_destroy(struct amdgpu_device *adev, void *mqd,
unsigned int utimeout)
{
- struct amdgpu_device *adev = get_amdgpu_device(kgd);
struct v9_sdma_mqd *m;
uint32_t sdma_rlc_reg_offset;
uint32_t temp;
@@ -630,11 +612,10 @@ static int kgd_hqd_sdma_destroy(struct kgd_dev *kgd, void *mqd,
return 0;
}
-bool kgd_gfx_v9_get_atc_vmid_pasid_mapping_info(struct kgd_dev *kgd,
+bool kgd_gfx_v9_get_atc_vmid_pasid_mapping_info(struct amdgpu_device *adev,
uint8_t vmid, uint16_t *p_pasid)
{
uint32_t value;
- struct amdgpu_device *adev = (struct amdgpu_device *) kgd;
value = RREG32(SOC15_REG_OFFSET(ATHUB, 0, mmATC_VMID0_PASID_MAPPING)
+ vmid);
@@ -643,57 +624,295 @@ bool kgd_gfx_v9_get_atc_vmid_pasid_mapping_info(struct kgd_dev *kgd,
return !!(value & ATC_VMID0_PASID_MAPPING__VALID_MASK);
}
-int kgd_gfx_v9_address_watch_disable(struct kgd_dev *kgd)
+int kgd_gfx_v9_wave_control_execute(struct amdgpu_device *adev,
+ uint32_t gfx_index_val,
+ uint32_t sq_cmd, uint32_t inst)
+{
+ uint32_t data = 0;
+
+ mutex_lock(&adev->grbm_idx_mutex);
+
+ WREG32_SOC15_RLC_SHADOW(GC, GET_INST(GC, inst), mmGRBM_GFX_INDEX, gfx_index_val);
+ WREG32_SOC15(GC, GET_INST(GC, inst), mmSQ_CMD, sq_cmd);
+
+ data = REG_SET_FIELD(data, GRBM_GFX_INDEX,
+ INSTANCE_BROADCAST_WRITES, 1);
+ data = REG_SET_FIELD(data, GRBM_GFX_INDEX,
+ SH_BROADCAST_WRITES, 1);
+ data = REG_SET_FIELD(data, GRBM_GFX_INDEX,
+ SE_BROADCAST_WRITES, 1);
+
+ WREG32_SOC15_RLC_SHADOW(GC, GET_INST(GC, inst), mmGRBM_GFX_INDEX, data);
+ mutex_unlock(&adev->grbm_idx_mutex);
+
+ return 0;
+}
+
+/*
+ * GFX9 helper for wave launch stall requirements on debug trap setting.
+ *
+ * vmid:
+ * Target VMID to stall/unstall.
+ *
+ * stall:
+ * 0-unstall wave launch (enable), 1-stall wave launch (disable).
+ * After wavefront launch has been stalled, allocated waves must drain from
+ * SPI in order for debug trap settings to take effect on those waves.
+ * This is roughly a ~96 clock cycle wait on SPI where a read on
+ * SPI_GDBG_WAVE_CNTL translates to ~32 clock cycles.
+ * KGD_GFX_V9_WAVE_LAUNCH_SPI_DRAIN_LATENCY indicates the number of reads required.
+ *
+ * NOTE: We can afford to clear the entire STALL_VMID field on unstall
+ * because GFX9.4.1 cannot support multi-process debugging due to trap
+ * configuration and masking being limited to global scope. Always assume
+ * single process conditions.
+ */
+#define KGD_GFX_V9_WAVE_LAUNCH_SPI_DRAIN_LATENCY 3
+void kgd_gfx_v9_set_wave_launch_stall(struct amdgpu_device *adev,
+ uint32_t vmid,
+ bool stall)
+{
+ int i;
+ uint32_t data = RREG32(SOC15_REG_OFFSET(GC, 0, mmSPI_GDBG_WAVE_CNTL));
+
+ if (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 1))
+ data = REG_SET_FIELD(data, SPI_GDBG_WAVE_CNTL, STALL_VMID,
+ stall ? 1 << vmid : 0);
+ else
+ data = REG_SET_FIELD(data, SPI_GDBG_WAVE_CNTL, STALL_RA,
+ stall ? 1 : 0);
+
+ WREG32(SOC15_REG_OFFSET(GC, 0, mmSPI_GDBG_WAVE_CNTL), data);
+
+ if (!stall)
+ return;
+
+ for (i = 0; i < KGD_GFX_V9_WAVE_LAUNCH_SPI_DRAIN_LATENCY; i++)
+ RREG32(SOC15_REG_OFFSET(GC, 0, mmSPI_GDBG_WAVE_CNTL));
+}
+
+/*
+ * restore_dbg_registers is ignored here but is a general interface requirement
+ * for devices that support GFXOFF and where the RLC save/restore list
+ * does not support hw registers for debugging i.e. the driver has to manually
+ * initialize the debug mode registers after it has disabled GFX off during the
+ * debug session.
+ */
+uint32_t kgd_gfx_v9_enable_debug_trap(struct amdgpu_device *adev,
+ bool restore_dbg_registers,
+ uint32_t vmid)
{
+ mutex_lock(&adev->grbm_idx_mutex);
+
+ kgd_gfx_v9_set_wave_launch_stall(adev, vmid, true);
+
+ WREG32(SOC15_REG_OFFSET(GC, 0, mmSPI_GDBG_TRAP_MASK), 0);
+
+ kgd_gfx_v9_set_wave_launch_stall(adev, vmid, false);
+
+ mutex_unlock(&adev->grbm_idx_mutex);
+
return 0;
}
-int kgd_gfx_v9_address_watch_execute(struct kgd_dev *kgd,
- unsigned int watch_point_id,
- uint32_t cntl_val,
- uint32_t addr_hi,
- uint32_t addr_lo)
+/*
+ * keep_trap_enabled is ignored here but is a general interface requirement
+ * for devices that support multi-process debugging where the performance
+ * overhead from trap temporary setup needs to be bypassed when the debug
+ * session has ended.
+ */
+uint32_t kgd_gfx_v9_disable_debug_trap(struct amdgpu_device *adev,
+ bool keep_trap_enabled,
+ uint32_t vmid)
{
+ mutex_lock(&adev->grbm_idx_mutex);
+
+ kgd_gfx_v9_set_wave_launch_stall(adev, vmid, true);
+
+ WREG32(SOC15_REG_OFFSET(GC, 0, mmSPI_GDBG_TRAP_MASK), 0);
+
+ kgd_gfx_v9_set_wave_launch_stall(adev, vmid, false);
+
+ mutex_unlock(&adev->grbm_idx_mutex);
+
return 0;
}
-int kgd_gfx_v9_wave_control_execute(struct kgd_dev *kgd,
- uint32_t gfx_index_val,
- uint32_t sq_cmd)
+int kgd_gfx_v9_validate_trap_override_request(struct amdgpu_device *adev,
+ uint32_t trap_override,
+ uint32_t *trap_mask_supported)
+{
+ *trap_mask_supported &= KFD_DBG_TRAP_MASK_DBG_ADDRESS_WATCH;
+
+ /* The SPI_GDBG_TRAP_MASK register is global and affects all
+ * processes. Only allow OR-ing the address-watch bit, since
+ * this only affects processes under the debugger. Other bits
+ * should stay 0 to avoid the debugger interfering with other
+ * processes.
+ */
+ if (trap_override != KFD_DBG_TRAP_OVERRIDE_OR)
+ return -EINVAL;
+
+ return 0;
+}
+
+uint32_t kgd_gfx_v9_set_wave_launch_trap_override(struct amdgpu_device *adev,
+ uint32_t vmid,
+ uint32_t trap_override,
+ uint32_t trap_mask_bits,
+ uint32_t trap_mask_request,
+ uint32_t *trap_mask_prev,
+ uint32_t kfd_dbg_cntl_prev)
+{
+ uint32_t data, wave_cntl_prev;
+
+ mutex_lock(&adev->grbm_idx_mutex);
+
+ wave_cntl_prev = RREG32(SOC15_REG_OFFSET(GC, 0, mmSPI_GDBG_WAVE_CNTL));
+
+ kgd_gfx_v9_set_wave_launch_stall(adev, vmid, true);
+
+ data = RREG32(SOC15_REG_OFFSET(GC, 0, mmSPI_GDBG_TRAP_MASK));
+ *trap_mask_prev = REG_GET_FIELD(data, SPI_GDBG_TRAP_MASK, EXCP_EN);
+
+ trap_mask_bits = (trap_mask_bits & trap_mask_request) |
+ (*trap_mask_prev & ~trap_mask_request);
+
+ data = REG_SET_FIELD(data, SPI_GDBG_TRAP_MASK, EXCP_EN, trap_mask_bits);
+ data = REG_SET_FIELD(data, SPI_GDBG_TRAP_MASK, REPLACE, trap_override);
+ WREG32(SOC15_REG_OFFSET(GC, 0, mmSPI_GDBG_TRAP_MASK), data);
+
+ /* We need to preserve wave launch mode stall settings. */
+ WREG32(SOC15_REG_OFFSET(GC, 0, mmSPI_GDBG_WAVE_CNTL), wave_cntl_prev);
+
+ mutex_unlock(&adev->grbm_idx_mutex);
+
+ return 0;
+}
+
+uint32_t kgd_gfx_v9_set_wave_launch_mode(struct amdgpu_device *adev,
+ uint8_t wave_launch_mode,
+ uint32_t vmid)
{
- struct amdgpu_device *adev = get_amdgpu_device(kgd);
uint32_t data = 0;
+ bool is_mode_set = !!wave_launch_mode;
mutex_lock(&adev->grbm_idx_mutex);
- WREG32_SOC15_RLC_SHADOW(GC, 0, mmGRBM_GFX_INDEX, gfx_index_val);
- WREG32(SOC15_REG_OFFSET(GC, 0, mmSQ_CMD), sq_cmd);
+ kgd_gfx_v9_set_wave_launch_stall(adev, vmid, true);
- data = REG_SET_FIELD(data, GRBM_GFX_INDEX,
- INSTANCE_BROADCAST_WRITES, 1);
- data = REG_SET_FIELD(data, GRBM_GFX_INDEX,
- SH_BROADCAST_WRITES, 1);
- data = REG_SET_FIELD(data, GRBM_GFX_INDEX,
- SE_BROADCAST_WRITES, 1);
+ data = REG_SET_FIELD(data, SPI_GDBG_WAVE_CNTL2,
+ VMID_MASK, is_mode_set ? 1 << vmid : 0);
+ data = REG_SET_FIELD(data, SPI_GDBG_WAVE_CNTL2,
+ MODE, is_mode_set ? wave_launch_mode : 0);
+ WREG32(SOC15_REG_OFFSET(GC, 0, mmSPI_GDBG_WAVE_CNTL2), data);
+
+ kgd_gfx_v9_set_wave_launch_stall(adev, vmid, false);
- WREG32_SOC15_RLC_SHADOW(GC, 0, mmGRBM_GFX_INDEX, data);
mutex_unlock(&adev->grbm_idx_mutex);
return 0;
}
-uint32_t kgd_gfx_v9_address_watch_get_offset(struct kgd_dev *kgd,
- unsigned int watch_point_id,
- unsigned int reg_offset)
+#define TCP_WATCH_STRIDE (mmTCP_WATCH1_ADDR_H - mmTCP_WATCH0_ADDR_H)
+uint32_t kgd_gfx_v9_set_address_watch(struct amdgpu_device *adev,
+ uint64_t watch_address,
+ uint32_t watch_address_mask,
+ uint32_t watch_id,
+ uint32_t watch_mode,
+ uint32_t debug_vmid,
+ uint32_t inst)
{
+ uint32_t watch_address_high;
+ uint32_t watch_address_low;
+ uint32_t watch_address_cntl;
+
+ watch_address_cntl = 0;
+
+ watch_address_low = lower_32_bits(watch_address);
+ watch_address_high = upper_32_bits(watch_address) & 0xffff;
+
+ watch_address_cntl = REG_SET_FIELD(watch_address_cntl,
+ TCP_WATCH0_CNTL,
+ VMID,
+ debug_vmid);
+ watch_address_cntl = REG_SET_FIELD(watch_address_cntl,
+ TCP_WATCH0_CNTL,
+ MODE,
+ watch_mode);
+ watch_address_cntl = REG_SET_FIELD(watch_address_cntl,
+ TCP_WATCH0_CNTL,
+ MASK,
+ watch_address_mask >> 6);
+
+ /* Turning off this watch point until we set all the registers */
+ watch_address_cntl = REG_SET_FIELD(watch_address_cntl,
+ TCP_WATCH0_CNTL,
+ VALID,
+ 0);
+
+ WREG32_RLC((SOC15_REG_OFFSET(GC, 0, mmTCP_WATCH0_CNTL) +
+ (watch_id * TCP_WATCH_STRIDE)),
+ watch_address_cntl);
+
+ WREG32_RLC((SOC15_REG_OFFSET(GC, 0, mmTCP_WATCH0_ADDR_H) +
+ (watch_id * TCP_WATCH_STRIDE)),
+ watch_address_high);
+
+ WREG32_RLC((SOC15_REG_OFFSET(GC, 0, mmTCP_WATCH0_ADDR_L) +
+ (watch_id * TCP_WATCH_STRIDE)),
+ watch_address_low);
+
+ /* Enable the watch point */
+ watch_address_cntl = REG_SET_FIELD(watch_address_cntl,
+ TCP_WATCH0_CNTL,
+ VALID,
+ 1);
+
+ WREG32_RLC((SOC15_REG_OFFSET(GC, 0, mmTCP_WATCH0_CNTL) +
+ (watch_id * TCP_WATCH_STRIDE)),
+ watch_address_cntl);
+
return 0;
}
-void kgd_gfx_v9_set_vm_context_page_table_base(struct kgd_dev *kgd,
- uint32_t vmid, uint64_t page_table_base)
+uint32_t kgd_gfx_v9_clear_address_watch(struct amdgpu_device *adev,
+ uint32_t watch_id)
{
- struct amdgpu_device *adev = get_amdgpu_device(kgd);
+ uint32_t watch_address_cntl;
+
+ watch_address_cntl = 0;
+ WREG32_RLC((SOC15_REG_OFFSET(GC, 0, mmTCP_WATCH0_CNTL) +
+ (watch_id * TCP_WATCH_STRIDE)),
+ watch_address_cntl);
+
+ return 0;
+}
+
+/* kgd_gfx_v9_get_iq_wait_times: Returns the mmCP_IQ_WAIT_TIME1/2 values
+ * The values read are:
+ * ib_offload_wait_time -- Wait Count for Indirect Buffer Offloads.
+ * atomic_offload_wait_time -- Wait Count for L2 and GDS Atomics Offloads.
+ * wrm_offload_wait_time -- Wait Count for WAIT_REG_MEM Offloads.
+ * gws_wait_time -- Wait Count for Global Wave Syncs.
+ * que_sleep_wait_time -- Wait Count for Dequeue Retry.
+ * sch_wave_wait_time -- Wait Count for Scheduling Wave Message.
+ * sem_rearm_wait_time -- Wait Count for Semaphore re-arm.
+ * deq_retry_wait_time -- Wait Count for Global Wave Syncs.
+ */
+void kgd_gfx_v9_get_iq_wait_times(struct amdgpu_device *adev,
+ uint32_t *wait_times,
+ uint32_t inst)
+
+{
+ *wait_times = RREG32_SOC15_RLC(GC, GET_INST(GC, inst),
+ mmCP_IQ_WAIT_TIME2);
+}
+
+void kgd_gfx_v9_set_vm_context_page_table_base(struct amdgpu_device *adev,
+ uint32_t vmid, uint64_t page_table_base)
+{
if (!amdgpu_amdkfd_is_kfd_vmid(adev, vmid)) {
pr_err("trying to set page table base for wrong VMID %u\n",
vmid);
@@ -719,57 +938,58 @@ static void unlock_spi_csq_mutexes(struct amdgpu_device *adev)
}
/**
- * @get_wave_count: Read device registers to get number of waves in flight for
+ * get_wave_count: Read device registers to get number of waves in flight for
* a particular queue. The method also returns the VMID associated with the
* queue.
*
* @adev: Handle of device whose registers are to be read
* @queue_idx: Index of queue in the queue-map bit-field
- * @wave_cnt: Output parameter updated with number of waves in flight
- * @vmid: Output parameter updated with VMID of queue whose wave count
- * is being collected
+ * @queue_cnt: Stores the wave count and doorbell offset for an active queue
+ * @inst: xcc's instance number on a multi-XCC setup
*/
static void get_wave_count(struct amdgpu_device *adev, int queue_idx,
- int *wave_cnt, int *vmid)
+ struct kfd_cu_occupancy *queue_cnt, uint32_t inst)
{
int pipe_idx;
int queue_slot;
unsigned int reg_val;
-
+ unsigned int wave_cnt;
/*
* Program GRBM with appropriate MEID, PIPEID, QUEUEID and VMID
* parameters to read out waves in flight. Get VMID if there are
* non-zero waves in flight.
*/
- *vmid = 0xFF;
- *wave_cnt = 0;
pipe_idx = queue_idx / adev->gfx.mec.num_queue_per_pipe;
queue_slot = queue_idx % adev->gfx.mec.num_queue_per_pipe;
- soc15_grbm_select(adev, 1, pipe_idx, queue_slot, 0);
- reg_val = RREG32(SOC15_REG_OFFSET(GC, 0, mmSPI_CSQ_WF_ACTIVE_COUNT_0) +
- queue_slot);
- *wave_cnt = reg_val & SPI_CSQ_WF_ACTIVE_COUNT_0__COUNT_MASK;
- if (*wave_cnt != 0)
- *vmid = (RREG32_SOC15(GC, 0, mmCP_HQD_VMID) &
- CP_HQD_VMID__VMID_MASK) >> CP_HQD_VMID__VMID__SHIFT;
+ soc15_grbm_select(adev, 1, pipe_idx, queue_slot, 0, GET_INST(GC, inst));
+ reg_val = RREG32_SOC15_IP(GC, SOC15_REG_OFFSET(GC, GET_INST(GC, inst),
+ mmSPI_CSQ_WF_ACTIVE_COUNT_0) + queue_slot);
+ wave_cnt = reg_val & SPI_CSQ_WF_ACTIVE_COUNT_0__COUNT_MASK;
+ if (wave_cnt != 0) {
+ queue_cnt->wave_cnt += wave_cnt;
+ queue_cnt->doorbell_off =
+ (RREG32_SOC15(GC, GET_INST(GC, inst), mmCP_HQD_PQ_DOORBELL_CONTROL) &
+ CP_HQD_PQ_DOORBELL_CONTROL__DOORBELL_OFFSET_MASK) >>
+ CP_HQD_PQ_DOORBELL_CONTROL__DOORBELL_OFFSET__SHIFT;
+ }
}
/**
- * @kgd_gfx_v9_get_cu_occupancy: Reads relevant registers associated with each
+ * kgd_gfx_v9_get_cu_occupancy: Reads relevant registers associated with each
* shader engine and aggregates the number of waves that are in flight for the
* process whose pasid is provided as a parameter. The process could have ZERO
* or more queues running and submitting waves to compute units.
*
- * @kgd: Handle of device from which to get number of waves in flight
- * @pasid: Identifies the process for which this query call is invoked
- * @wave_cnt: Output parameter updated with number of waves in flight that
- * belong to process with given pasid
+ * @adev: Handle of device from which to get number of waves in flight
+ * @cu_occupancy: Array that gets filled with wave_cnt and doorbell offset
+ * for comparison later.
* @max_waves_per_cu: Output parameter updated with maximum number of waves
- * possible per Compute Unit
+ * possible per Compute Unit
+ * @inst: xcc's instance number on a multi-XCC setup
*
- * @note: It's possible that the device has too many queues (oversubscription)
+ * Note: It's possible that the device has too many queues (oversubscription)
* in which case a VMID could be remapped to a different PASID. This could lead
- * to an iaccurate wave count. Following is a high-level sequence:
+ * to an inaccurate wave count. Following is a high-level sequence:
* Time T1: vmid = getVmid(); vmid is associated with Pasid P1
* Time T2: passId = getPasId(vmid); vmid is associated with Pasid P2
* In the sequence above wave count obtained from time T1 will be incorrectly
@@ -790,94 +1010,226 @@ static void get_wave_count(struct amdgpu_device *adev, int queue_idx,
* number of waves that are in flight for the queue at specified index. The
* index ranges from 0 to 7.
*
- * If non-zero waves are in flight, read CP_HQD_VMID register to obtain VMID
- * of the wave(s).
+ * If non-zero waves are in flight, store the corresponding doorbell offset
+ * of the queue, along with the wave count.
*
- * Determine if VMID from above step maps to pasid provided as parameter. If
- * it matches agrregate the wave count. That the VMID will not match pasid is
- * a normal condition i.e. a device is expected to support multiple queues
- * from multiple proceses.
+ * Determine if the queue belongs to the process by comparing the doorbell
+ * offset against the process's queues. If it matches, aggregate the wave
+ * count for the process.
*
* Reading registers referenced above involves programming GRBM appropriately
*/
-void kgd_gfx_v9_get_cu_occupancy(struct kgd_dev *kgd, int pasid,
- int *pasid_wave_cnt, int *max_waves_per_cu)
+void kgd_gfx_v9_get_cu_occupancy(struct amdgpu_device *adev,
+ struct kfd_cu_occupancy *cu_occupancy,
+ int *max_waves_per_cu, uint32_t inst)
{
int qidx;
- int vmid;
int se_idx;
- int sh_idx;
int se_cnt;
- int sh_cnt;
- int wave_cnt;
int queue_map;
- int pasid_tmp;
int max_queue_cnt;
- int vmid_wave_cnt = 0;
- struct amdgpu_device *adev;
- DECLARE_BITMAP(cp_queue_bitmap, KGD_MAX_QUEUES);
+ DECLARE_BITMAP(cp_queue_bitmap, AMDGPU_MAX_QUEUES);
- adev = get_amdgpu_device(kgd);
lock_spi_csq_mutexes(adev);
- soc15_grbm_select(adev, 1, 0, 0, 0);
+ soc15_grbm_select(adev, 1, 0, 0, 0, GET_INST(GC, inst));
/*
* Iterate through the shader engines and arrays of the device
* to get number of waves in flight
*/
- bitmap_complement(cp_queue_bitmap, adev->gfx.mec.queue_bitmap,
- KGD_MAX_QUEUES);
+ bitmap_complement(cp_queue_bitmap, adev->gfx.mec_bitmap[0].queue_bitmap,
+ AMDGPU_MAX_QUEUES);
max_queue_cnt = adev->gfx.mec.num_pipe_per_mec *
adev->gfx.mec.num_queue_per_pipe;
- sh_cnt = adev->gfx.config.max_sh_per_se;
se_cnt = adev->gfx.config.max_shader_engines;
for (se_idx = 0; se_idx < se_cnt; se_idx++) {
- for (sh_idx = 0; sh_idx < sh_cnt; sh_idx++) {
+ amdgpu_gfx_select_se_sh(adev, se_idx, 0, 0xffffffff, inst);
+ queue_map = RREG32_SOC15(GC, GET_INST(GC, inst), mmSPI_CSQ_WF_ACTIVE_STATUS);
+
+ /*
+ * Assumption: queue map encodes following schema: four
+ * pipes per each micro-engine, with each pipe mapping
+ * eight queues. This schema is true for GFX9 devices
+ * and must be verified for newer device families
+ */
+ for (qidx = 0; qidx < max_queue_cnt; qidx++) {
+ /* Skip qeueus that are not associated with
+ * compute functions
+ */
+ if (!test_bit(qidx, cp_queue_bitmap))
+ continue;
- gfx_v9_0_select_se_sh(adev, se_idx, sh_idx, 0xffffffff);
- queue_map = RREG32(SOC15_REG_OFFSET(GC, 0,
- mmSPI_CSQ_WF_ACTIVE_STATUS));
+ if (!(queue_map & (1 << qidx)))
+ continue;
- /*
- * Assumption: queue map encodes following schema: four
- * pipes per each micro-engine, with each pipe mapping
- * eight queues. This schema is true for GFX9 devices
- * and must be verified for newer device families
- */
- for (qidx = 0; qidx < max_queue_cnt; qidx++) {
-
- /* Skip qeueus that are not associated with
- * compute functions
- */
- if (!test_bit(qidx, cp_queue_bitmap))
- continue;
-
- if (!(queue_map & (1 << qidx)))
- continue;
-
- /* Get number of waves in flight and aggregate them */
- get_wave_count(adev, qidx, &wave_cnt, &vmid);
- if (wave_cnt != 0) {
- pasid_tmp =
- RREG32(SOC15_REG_OFFSET(OSSSYS, 0,
- mmIH_VMID_0_LUT) + vmid);
- if (pasid_tmp == pasid)
- vmid_wave_cnt += wave_cnt;
- }
- }
+ /* Get number of waves in flight and aggregate them */
+ get_wave_count(adev, qidx, &cu_occupancy[qidx],
+ inst);
}
}
- gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
- soc15_grbm_select(adev, 0, 0, 0, 0);
+ amdgpu_gfx_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, inst);
+ soc15_grbm_select(adev, 0, 0, 0, 0, GET_INST(GC, inst));
unlock_spi_csq_mutexes(adev);
/* Update the output parameters and return */
- *pasid_wave_cnt = vmid_wave_cnt;
*max_waves_per_cu = adev->gfx.cu_info.simd_per_cu *
adev->gfx.cu_info.max_waves_per_simd;
}
+void kgd_gfx_v9_build_dequeue_wait_counts_packet_info(struct amdgpu_device *adev,
+ uint32_t wait_times,
+ uint32_t sch_wave,
+ uint32_t que_sleep,
+ uint32_t *reg_offset,
+ uint32_t *reg_data)
+{
+ *reg_data = wait_times;
+
+ if (sch_wave)
+ *reg_data = REG_SET_FIELD(*reg_data,
+ CP_IQ_WAIT_TIME2,
+ SCH_WAVE,
+ sch_wave);
+ if (que_sleep)
+ *reg_data = REG_SET_FIELD(*reg_data,
+ CP_IQ_WAIT_TIME2,
+ QUE_SLEEP,
+ que_sleep);
+
+ *reg_offset = SOC15_REG_OFFSET(GC, 0, mmCP_IQ_WAIT_TIME2);
+}
+
+void kgd_gfx_v9_program_trap_handler_settings(struct amdgpu_device *adev,
+ uint32_t vmid, uint64_t tba_addr, uint64_t tma_addr, uint32_t inst)
+{
+ kgd_gfx_v9_lock_srbm(adev, 0, 0, 0, vmid, inst);
+
+ /*
+ * Program TBA registers
+ */
+ WREG32_SOC15(GC, GET_INST(GC, inst), mmSQ_SHADER_TBA_LO,
+ lower_32_bits(tba_addr >> 8));
+ WREG32_SOC15(GC, GET_INST(GC, inst), mmSQ_SHADER_TBA_HI,
+ upper_32_bits(tba_addr >> 8));
+
+ /*
+ * Program TMA registers
+ */
+ WREG32_SOC15(GC, GET_INST(GC, inst), mmSQ_SHADER_TMA_LO,
+ lower_32_bits(tma_addr >> 8));
+ WREG32_SOC15(GC, GET_INST(GC, inst), mmSQ_SHADER_TMA_HI,
+ upper_32_bits(tma_addr >> 8));
+
+ kgd_gfx_v9_unlock_srbm(adev, inst);
+}
+
+uint64_t kgd_gfx_v9_hqd_get_pq_addr(struct amdgpu_device *adev,
+ uint32_t pipe_id, uint32_t queue_id,
+ uint32_t inst)
+{
+ uint32_t low, high;
+ uint64_t queue_addr = 0;
+
+ kgd_gfx_v9_acquire_queue(adev, pipe_id, queue_id, inst);
+ amdgpu_gfx_rlc_enter_safe_mode(adev, inst);
+
+ if (!RREG32_SOC15(GC, GET_INST(GC, inst), mmCP_HQD_ACTIVE))
+ goto unlock_out;
+
+ low = RREG32_SOC15(GC, GET_INST(GC, inst), mmCP_HQD_PQ_BASE);
+ high = RREG32_SOC15(GC, GET_INST(GC, inst), mmCP_HQD_PQ_BASE_HI);
+
+ /* only concerned with user queues. */
+ if (!high)
+ goto unlock_out;
+
+ queue_addr = (((queue_addr | high) << 32) | low) << 8;
+
+unlock_out:
+ amdgpu_gfx_rlc_exit_safe_mode(adev, inst);
+ kgd_gfx_v9_release_queue(adev, inst);
+
+ return queue_addr;
+}
+
+/* assume queue acquired */
+static int kgd_gfx_v9_hqd_dequeue_wait(struct amdgpu_device *adev, uint32_t inst,
+ unsigned int utimeout)
+{
+ unsigned long end_jiffies = (utimeout * HZ / 1000) + jiffies;
+
+ while (true) {
+ uint32_t temp = RREG32_SOC15(GC, GET_INST(GC, inst), mmCP_HQD_ACTIVE);
+
+ if (!(temp & CP_HQD_ACTIVE__ACTIVE_MASK))
+ return 0;
+
+ if (time_after(jiffies, end_jiffies))
+ return -ETIME;
+
+ usleep_range(500, 1000);
+ }
+}
+
+uint64_t kgd_gfx_v9_hqd_reset(struct amdgpu_device *adev,
+ uint32_t pipe_id, uint32_t queue_id,
+ uint32_t inst, unsigned int utimeout)
+{
+ uint32_t low, high, pipe_reset_data = 0;
+ uint64_t queue_addr = 0;
+
+ kgd_gfx_v9_acquire_queue(adev, pipe_id, queue_id, inst);
+ amdgpu_gfx_rlc_enter_safe_mode(adev, inst);
+
+ if (!RREG32_SOC15(GC, GET_INST(GC, inst), mmCP_HQD_ACTIVE))
+ goto unlock_out;
+
+ low = RREG32_SOC15(GC, GET_INST(GC, inst), mmCP_HQD_PQ_BASE);
+ high = RREG32_SOC15(GC, GET_INST(GC, inst), mmCP_HQD_PQ_BASE_HI);
+
+ /* only concerned with user queues. */
+ if (!high)
+ goto unlock_out;
+
+ queue_addr = (((queue_addr | high) << 32) | low) << 8;
+
+ pr_debug("Attempting queue reset on XCC %i pipe id %i queue id %i\n",
+ inst, pipe_id, queue_id);
+
+ /* assume previous dequeue request issued will take affect after reset */
+ WREG32_SOC15(GC, GET_INST(GC, inst), mmSPI_COMPUTE_QUEUE_RESET, 0x1);
+
+ if (!kgd_gfx_v9_hqd_dequeue_wait(adev, inst, utimeout))
+ goto unlock_out;
+
+ pr_debug("Attempting pipe reset on XCC %i pipe id %i\n", inst, pipe_id);
+
+ pipe_reset_data = REG_SET_FIELD(pipe_reset_data, CP_MEC_CNTL, MEC_ME1_PIPE0_RESET, 1);
+ pipe_reset_data = pipe_reset_data << pipe_id;
+
+ WREG32_SOC15(GC, GET_INST(GC, inst), mmCP_MEC_CNTL, pipe_reset_data);
+ WREG32_SOC15(GC, GET_INST(GC, inst), mmCP_MEC_CNTL, 0);
+
+ if (kgd_gfx_v9_hqd_dequeue_wait(adev, inst, utimeout))
+ queue_addr = 0;
+
+unlock_out:
+ pr_debug("queue reset on XCC %i pipe id %i queue id %i %s\n",
+ inst, pipe_id, queue_id, !!queue_addr ? "succeeded!" : "failed!");
+ amdgpu_gfx_rlc_exit_safe_mode(adev, inst);
+ kgd_gfx_v9_release_queue(adev, inst);
+
+ return queue_addr;
+}
+
+uint32_t kgd_gfx_v9_hqd_sdma_get_doorbell(struct amdgpu_device *adev,
+ int engine, int queue)
+
+{
+ return 0;
+}
+
const struct kfd2kgd_calls gfx_v9_kfd2kgd = {
.program_sh_mem_settings = kgd_gfx_v9_program_sh_mem_settings,
.set_pasid_vmid_mapping = kgd_gfx_v9_set_pasid_vmid_mapping,
@@ -891,12 +1243,22 @@ const struct kfd2kgd_calls gfx_v9_kfd2kgd = {
.hqd_sdma_is_occupied = kgd_hqd_sdma_is_occupied,
.hqd_destroy = kgd_gfx_v9_hqd_destroy,
.hqd_sdma_destroy = kgd_hqd_sdma_destroy,
- .address_watch_disable = kgd_gfx_v9_address_watch_disable,
- .address_watch_execute = kgd_gfx_v9_address_watch_execute,
.wave_control_execute = kgd_gfx_v9_wave_control_execute,
- .address_watch_get_offset = kgd_gfx_v9_address_watch_get_offset,
.get_atc_vmid_pasid_mapping_info =
kgd_gfx_v9_get_atc_vmid_pasid_mapping_info,
.set_vm_context_page_table_base = kgd_gfx_v9_set_vm_context_page_table_base,
+ .enable_debug_trap = kgd_gfx_v9_enable_debug_trap,
+ .disable_debug_trap = kgd_gfx_v9_disable_debug_trap,
+ .validate_trap_override_request = kgd_gfx_v9_validate_trap_override_request,
+ .set_wave_launch_trap_override = kgd_gfx_v9_set_wave_launch_trap_override,
+ .set_wave_launch_mode = kgd_gfx_v9_set_wave_launch_mode,
+ .set_address_watch = kgd_gfx_v9_set_address_watch,
+ .clear_address_watch = kgd_gfx_v9_clear_address_watch,
+ .get_iq_wait_times = kgd_gfx_v9_get_iq_wait_times,
+ .build_dequeue_wait_counts_packet_info = kgd_gfx_v9_build_dequeue_wait_counts_packet_info,
.get_cu_occupancy = kgd_gfx_v9_get_cu_occupancy,
+ .program_trap_handler_settings = kgd_gfx_v9_program_trap_handler_settings,
+ .hqd_get_pq_addr = kgd_gfx_v9_hqd_get_pq_addr,
+ .hqd_reset = kgd_gfx_v9_hqd_reset,
+ .hqd_sdma_get_doorbell = kgd_gfx_v9_hqd_sdma_get_doorbell
};
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.h
index e64deba8900f..704452ca62f8 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.h
@@ -20,48 +20,97 @@
* OTHER DEALINGS IN THE SOFTWARE.
*/
-
-
-void kgd_gfx_v9_program_sh_mem_settings(struct kgd_dev *kgd, uint32_t vmid,
+void kgd_gfx_v9_program_sh_mem_settings(struct amdgpu_device *adev, uint32_t vmid,
uint32_t sh_mem_config,
uint32_t sh_mem_ape1_base, uint32_t sh_mem_ape1_limit,
- uint32_t sh_mem_bases);
-int kgd_gfx_v9_set_pasid_vmid_mapping(struct kgd_dev *kgd, u32 pasid,
- unsigned int vmid);
-int kgd_gfx_v9_init_interrupts(struct kgd_dev *kgd, uint32_t pipe_id);
-int kgd_gfx_v9_hqd_load(struct kgd_dev *kgd, void *mqd, uint32_t pipe_id,
+ uint32_t sh_mem_bases, uint32_t inst);
+int kgd_gfx_v9_set_pasid_vmid_mapping(struct amdgpu_device *adev, u32 pasid,
+ unsigned int vmid, uint32_t inst);
+int kgd_gfx_v9_init_interrupts(struct amdgpu_device *adev, uint32_t pipe_id,
+ uint32_t inst);
+int kgd_gfx_v9_hqd_load(struct amdgpu_device *adev, void *mqd, uint32_t pipe_id,
uint32_t queue_id, uint32_t __user *wptr,
uint32_t wptr_shift, uint32_t wptr_mask,
- struct mm_struct *mm);
-int kgd_gfx_v9_hiq_mqd_load(struct kgd_dev *kgd, void *mqd,
+ struct mm_struct *mm, uint32_t inst);
+int kgd_gfx_v9_hiq_mqd_load(struct amdgpu_device *adev, void *mqd,
uint32_t pipe_id, uint32_t queue_id,
- uint32_t doorbell_off);
-int kgd_gfx_v9_hqd_dump(struct kgd_dev *kgd,
+ uint32_t doorbell_off, uint32_t inst);
+int kgd_gfx_v9_hqd_dump(struct amdgpu_device *adev,
uint32_t pipe_id, uint32_t queue_id,
- uint32_t (**dump)[2], uint32_t *n_regs);
-bool kgd_gfx_v9_hqd_is_occupied(struct kgd_dev *kgd, uint64_t queue_address,
- uint32_t pipe_id, uint32_t queue_id);
-int kgd_gfx_v9_hqd_destroy(struct kgd_dev *kgd, void *mqd,
+ uint32_t (**dump)[2], uint32_t *n_regs, uint32_t inst);
+bool kgd_gfx_v9_hqd_is_occupied(struct amdgpu_device *adev,
+ uint64_t queue_address, uint32_t pipe_id,
+ uint32_t queue_id, uint32_t inst);
+int kgd_gfx_v9_hqd_destroy(struct amdgpu_device *adev, void *mqd,
enum kfd_preempt_type reset_type,
unsigned int utimeout, uint32_t pipe_id,
- uint32_t queue_id);
-int kgd_gfx_v9_address_watch_disable(struct kgd_dev *kgd);
-int kgd_gfx_v9_address_watch_execute(struct kgd_dev *kgd,
- unsigned int watch_point_id,
- uint32_t cntl_val,
- uint32_t addr_hi,
- uint32_t addr_lo);
-int kgd_gfx_v9_wave_control_execute(struct kgd_dev *kgd,
+ uint32_t queue_id, uint32_t inst);
+int kgd_gfx_v9_wave_control_execute(struct amdgpu_device *adev,
uint32_t gfx_index_val,
- uint32_t sq_cmd);
-uint32_t kgd_gfx_v9_address_watch_get_offset(struct kgd_dev *kgd,
- unsigned int watch_point_id,
- unsigned int reg_offset);
-
-bool kgd_gfx_v9_get_atc_vmid_pasid_mapping_info(struct kgd_dev *kgd,
+ uint32_t sq_cmd, uint32_t inst);
+bool kgd_gfx_v9_get_atc_vmid_pasid_mapping_info(struct amdgpu_device *adev,
uint8_t vmid, uint16_t *p_pasid);
-
-void kgd_gfx_v9_set_vm_context_page_table_base(struct kgd_dev *kgd,
+void kgd_gfx_v9_set_vm_context_page_table_base(struct amdgpu_device *adev,
uint32_t vmid, uint64_t page_table_base);
-void kgd_gfx_v9_get_cu_occupancy(struct kgd_dev *kgd, int pasid,
- int *pasid_wave_cnt, int *max_waves_per_cu);
+void kgd_gfx_v9_get_cu_occupancy(struct amdgpu_device *adev,
+ struct kfd_cu_occupancy *cu_occupancy,
+ int *max_waves_per_cu, uint32_t inst);
+void kgd_gfx_v9_program_trap_handler_settings(struct amdgpu_device *adev,
+ uint32_t vmid, uint64_t tba_addr, uint64_t tma_addr,
+ uint32_t inst);
+void kgd_gfx_v9_acquire_queue(struct amdgpu_device *adev, uint32_t pipe_id,
+ uint32_t queue_id, uint32_t inst);
+uint64_t kgd_gfx_v9_get_queue_mask(struct amdgpu_device *adev,
+ uint32_t pipe_id, uint32_t queue_id);
+void kgd_gfx_v9_release_queue(struct amdgpu_device *adev, uint32_t inst);
+void kgd_gfx_v9_set_wave_launch_stall(struct amdgpu_device *adev,
+ uint32_t vmid,
+ bool stall);
+uint32_t kgd_gfx_v9_enable_debug_trap(struct amdgpu_device *adev,
+ bool restore_dbg_registers,
+ uint32_t vmid);
+uint32_t kgd_gfx_v9_disable_debug_trap(struct amdgpu_device *adev,
+ bool keep_trap_enabled,
+ uint32_t vmid);
+int kgd_gfx_v9_validate_trap_override_request(struct amdgpu_device *adev,
+ uint32_t trap_override,
+ uint32_t *trap_mask_supported);
+uint32_t kgd_gfx_v9_set_wave_launch_mode(struct amdgpu_device *adev,
+ uint8_t wave_launch_mode,
+ uint32_t vmid);
+uint32_t kgd_gfx_v9_set_wave_launch_trap_override(struct amdgpu_device *adev,
+ uint32_t vmid,
+ uint32_t trap_override,
+ uint32_t trap_mask_bits,
+ uint32_t trap_mask_request,
+ uint32_t *trap_mask_prev,
+ uint32_t kfd_dbg_trap_cntl_prev);
+uint32_t kgd_gfx_v9_set_address_watch(struct amdgpu_device *adev,
+ uint64_t watch_address,
+ uint32_t watch_address_mask,
+ uint32_t watch_id,
+ uint32_t watch_mode,
+ uint32_t debug_vmid,
+ uint32_t inst);
+uint32_t kgd_gfx_v9_clear_address_watch(struct amdgpu_device *adev,
+ uint32_t watch_id);
+void kgd_gfx_v9_get_iq_wait_times(struct amdgpu_device *adev,
+ uint32_t *wait_times,
+ uint32_t inst);
+void kgd_gfx_v9_build_dequeue_wait_counts_packet_info(struct amdgpu_device *adev,
+ uint32_t wait_times,
+ uint32_t sch_wave,
+ uint32_t que_sleep,
+ uint32_t *reg_offset,
+ uint32_t *reg_data);
+uint64_t kgd_gfx_v9_hqd_get_pq_addr(struct amdgpu_device *adev,
+ uint32_t pipe_id,
+ uint32_t queue_id,
+ uint32_t inst);
+uint64_t kgd_gfx_v9_hqd_reset(struct amdgpu_device *adev,
+ uint32_t pipe_id,
+ uint32_t queue_id,
+ uint32_t inst,
+ unsigned int utimeout);
+uint32_t kgd_gfx_v9_hqd_sdma_get_doorbell(struct amdgpu_device *adev,
+ int engine, int queue);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
index ac0a432a9bf7..b1c24c8fa686 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: MIT
/*
* Copyright 2014-2018 Advanced Micro Devices, Inc.
*
@@ -24,21 +25,32 @@
#include <linux/pagemap.h>
#include <linux/sched/mm.h>
#include <linux/sched/task.h>
+#include <drm/ttm/ttm_tt.h>
+
+#include <drm/drm_exec.h>
#include "amdgpu_object.h"
#include "amdgpu_gem.h"
#include "amdgpu_vm.h"
+#include "amdgpu_hmm.h"
#include "amdgpu_amdkfd.h"
#include "amdgpu_dma_buf.h"
#include <uapi/linux/kfd_ioctl.h>
-
-/* BO flag to indicate a KFD userptr BO */
-#define AMDGPU_AMDKFD_USERPTR_BO (1ULL << 63)
+#include "amdgpu_xgmi.h"
+#include "kfd_priv.h"
+#include "kfd_smi_events.h"
/* Userptr restore delay, just long enough to allow consecutive VM
* changes to accumulate
*/
#define AMDGPU_USERPTR_RESTORE_DELAY_MS 1
+#define AMDGPU_RESERVE_MEM_LIMIT (3UL << 29)
+
+/*
+ * Align VRAM availability to 2MB to avoid fragmentation caused by 4K allocations in the tail 2MB
+ * BO chunk
+ */
+#define VRAM_AVAILABLITY_ALIGN (1 << 21)
/* Impose limit on how much memory KFD can use */
static struct {
@@ -49,12 +61,6 @@ static struct {
spinlock_t mem_limit_lock;
} kfd_mem_limit;
-/* Struct used for amdgpu_amdkfd_bo_validate */
-struct amdgpu_vm_parser {
- uint32_t domain;
- bool wait;
-};
-
static const char * const domain_bit_to_string[] = {
"CPU",
"GTT",
@@ -68,22 +74,35 @@ static const char * const domain_bit_to_string[] = {
static void amdgpu_amdkfd_restore_userptr_worker(struct work_struct *work);
-
-static inline struct amdgpu_device *get_amdgpu_device(struct kgd_dev *kgd)
-{
- return (struct amdgpu_device *)kgd;
-}
-
-static bool check_if_add_bo_to_vm(struct amdgpu_vm *avm,
+static bool kfd_mem_is_attached(struct amdgpu_vm *avm,
struct kgd_mem *mem)
{
- struct kfd_bo_va_list *entry;
+ struct kfd_mem_attachment *entry;
- list_for_each_entry(entry, &mem->bo_va_list, bo_list)
+ list_for_each_entry(entry, &mem->attachments, list)
if (entry->bo_va->base.vm == avm)
- return false;
+ return true;
+
+ return false;
+}
- return true;
+/**
+ * reuse_dmamap() - Check whether adev can share the original
+ * userptr BO
+ *
+ * If both adev and bo_adev are in direct mapping or
+ * in the same iommu group, they can share the original BO.
+ *
+ * @adev: Device to which can or cannot share the original BO
+ * @bo_adev: Device to which allocated BO belongs to
+ *
+ * Return: returns true if adev can share original userptr BO,
+ * false otherwise.
+ */
+static bool reuse_dmamap(struct amdgpu_device *adev, struct amdgpu_device *bo_adev)
+{
+ return (adev->ram_is_direct_mapped && bo_adev->ram_is_direct_mapped) ||
+ (adev->dev->iommu_group == bo_adev->dev->iommu_group);
}
/* Set memory usage limits. Current, limits are
@@ -95,18 +114,31 @@ void amdgpu_amdkfd_gpuvm_init_mem_limits(void)
struct sysinfo si;
uint64_t mem;
+ if (kfd_mem_limit.max_system_mem_limit)
+ return;
+
si_meminfo(&si);
mem = si.totalram - si.totalhigh;
mem *= si.mem_unit;
spin_lock_init(&kfd_mem_limit.mem_limit_lock);
- kfd_mem_limit.max_system_mem_limit = mem - (mem >> 4);
- kfd_mem_limit.max_ttm_mem_limit = (mem >> 1) - (mem >> 3);
+ kfd_mem_limit.max_system_mem_limit = mem - (mem >> 6);
+ if (kfd_mem_limit.max_system_mem_limit < 2 * AMDGPU_RESERVE_MEM_LIMIT)
+ kfd_mem_limit.max_system_mem_limit >>= 1;
+ else
+ kfd_mem_limit.max_system_mem_limit -= AMDGPU_RESERVE_MEM_LIMIT;
+
+ kfd_mem_limit.max_ttm_mem_limit = ttm_tt_pages_limit() << PAGE_SHIFT;
pr_debug("Kernel memory limit %lluM, TTM limit %lluM\n",
(kfd_mem_limit.max_system_mem_limit >> 20),
(kfd_mem_limit.max_ttm_mem_limit >> 20));
}
+void amdgpu_amdkfd_reserve_system_mem(uint64_t size)
+{
+ kfd_mem_limit.system_mem_used += size;
+}
+
/* Estimate page table size needed to represent a given memory size
*
* With 4KB pages, we need one 8 byte PTE for each 4KB of memory
@@ -117,105 +149,215 @@ void amdgpu_amdkfd_gpuvm_init_mem_limits(void)
* compromise that should work in most cases without reserving too
* much memory for page tables unnecessarily (factor 16K, >> 14).
*/
-#define ESTIMATE_PT_SIZE(mem_size) ((mem_size) >> 14)
-static int amdgpu_amdkfd_reserve_mem_limit(struct amdgpu_device *adev,
- uint64_t size, u32 domain, bool sg)
+#define ESTIMATE_PT_SIZE(mem_size) max(((mem_size) >> 14), AMDGPU_VM_RESERVED_VRAM)
+
+/**
+ * amdgpu_amdkfd_reserve_mem_limit() - Decrease available memory by size
+ * of buffer.
+ *
+ * @adev: Device to which allocated BO belongs to
+ * @size: Size of buffer, in bytes, encapsulated by B0. This should be
+ * equivalent to amdgpu_bo_size(BO)
+ * @alloc_flag: Flag used in allocating a BO as noted above
+ * @xcp_id: xcp_id is used to get xcp from xcp manager, one xcp is
+ * managed as one compute node in driver for app
+ *
+ * Return:
+ * returns -ENOMEM in case of error, ZERO otherwise
+ */
+int amdgpu_amdkfd_reserve_mem_limit(struct amdgpu_device *adev,
+ uint64_t size, u32 alloc_flag, int8_t xcp_id)
{
uint64_t reserved_for_pt =
ESTIMATE_PT_SIZE(amdgpu_amdkfd_total_mem_size);
- size_t acc_size, system_mem_needed, ttm_mem_needed, vram_needed;
+ struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
+ uint64_t reserved_for_ras = (con ? con->reserved_pages_in_bytes : 0);
+ size_t system_mem_needed, ttm_mem_needed, vram_needed;
int ret = 0;
+ uint64_t vram_size = 0;
- acc_size = ttm_bo_dma_acc_size(&adev->mman.bdev, size,
- sizeof(struct amdgpu_bo));
-
+ system_mem_needed = 0;
+ ttm_mem_needed = 0;
vram_needed = 0;
- if (domain == AMDGPU_GEM_DOMAIN_GTT) {
- /* TTM GTT memory */
- system_mem_needed = acc_size + size;
- ttm_mem_needed = acc_size + size;
- } else if (domain == AMDGPU_GEM_DOMAIN_CPU && !sg) {
- /* Userptr */
- system_mem_needed = acc_size + size;
- ttm_mem_needed = acc_size;
- } else {
- /* VRAM and SG */
- system_mem_needed = acc_size;
- ttm_mem_needed = acc_size;
- if (domain == AMDGPU_GEM_DOMAIN_VRAM)
- vram_needed = size;
+ if (alloc_flag & KFD_IOC_ALLOC_MEM_FLAGS_GTT) {
+ system_mem_needed = size;
+ ttm_mem_needed = size;
+ } else if (alloc_flag & KFD_IOC_ALLOC_MEM_FLAGS_VRAM) {
+ /*
+ * Conservatively round up the allocation requirement to 2 MB
+ * to avoid fragmentation caused by 4K allocations in the tail
+ * 2M BO chunk.
+ */
+ vram_needed = size;
+ /*
+ * For GFX 9.4.3, get the VRAM size from XCP structs
+ */
+ if (WARN_ONCE(xcp_id < 0, "invalid XCP ID %d", xcp_id))
+ return -EINVAL;
+
+ vram_size = KFD_XCP_MEMORY_SIZE(adev, xcp_id);
+ if (adev->apu_prefer_gtt) {
+ system_mem_needed = size;
+ ttm_mem_needed = size;
+ }
+ } else if (alloc_flag & KFD_IOC_ALLOC_MEM_FLAGS_USERPTR) {
+ system_mem_needed = size;
+ } else if (!(alloc_flag &
+ (KFD_IOC_ALLOC_MEM_FLAGS_DOORBELL |
+ KFD_IOC_ALLOC_MEM_FLAGS_MMIO_REMAP))) {
+ pr_err("%s: Invalid BO type %#x\n", __func__, alloc_flag);
+ return -ENOMEM;
}
spin_lock(&kfd_mem_limit.mem_limit_lock);
if (kfd_mem_limit.system_mem_used + system_mem_needed >
- kfd_mem_limit.max_system_mem_limit)
+ kfd_mem_limit.max_system_mem_limit) {
pr_debug("Set no_system_mem_limit=1 if using shared memory\n");
+ if (!no_system_mem_limit) {
+ ret = -ENOMEM;
+ goto release;
+ }
+ }
- if ((kfd_mem_limit.system_mem_used + system_mem_needed >
- kfd_mem_limit.max_system_mem_limit && !no_system_mem_limit) ||
- (kfd_mem_limit.ttm_mem_used + ttm_mem_needed >
- kfd_mem_limit.max_ttm_mem_limit) ||
- (adev->kfd.vram_used + vram_needed >
- adev->gmc.real_vram_size - reserved_for_pt)) {
+ if (kfd_mem_limit.ttm_mem_used + ttm_mem_needed >
+ kfd_mem_limit.max_ttm_mem_limit) {
ret = -ENOMEM;
- } else {
- kfd_mem_limit.system_mem_used += system_mem_needed;
- kfd_mem_limit.ttm_mem_used += ttm_mem_needed;
- adev->kfd.vram_used += vram_needed;
+ goto release;
}
+ /*if is_app_apu is false and apu_prefer_gtt is true, it is an APU with
+ * carve out < gtt. In that case, VRAM allocation will go to gtt domain, skip
+ * VRAM check since ttm_mem_limit check already cover this allocation
+ */
+
+ if (adev && xcp_id >= 0 && (!adev->apu_prefer_gtt || adev->gmc.is_app_apu)) {
+ uint64_t vram_available =
+ vram_size - reserved_for_pt - reserved_for_ras -
+ atomic64_read(&adev->vram_pin_size);
+ if (adev->kfd.vram_used[xcp_id] + vram_needed > vram_available) {
+ ret = -ENOMEM;
+ goto release;
+ }
+ }
+
+ /* Update memory accounting by decreasing available system
+ * memory, TTM memory and GPU memory as computed above
+ */
+ WARN_ONCE(vram_needed && !adev,
+ "adev reference can't be null when vram is used");
+ if (adev && xcp_id >= 0) {
+ adev->kfd.vram_used[xcp_id] += vram_needed;
+ adev->kfd.vram_used_aligned[xcp_id] +=
+ adev->apu_prefer_gtt ?
+ vram_needed :
+ ALIGN(vram_needed, VRAM_AVAILABLITY_ALIGN);
+ }
+ kfd_mem_limit.system_mem_used += system_mem_needed;
+ kfd_mem_limit.ttm_mem_used += ttm_mem_needed;
+
+release:
spin_unlock(&kfd_mem_limit.mem_limit_lock);
return ret;
}
-static void unreserve_mem_limit(struct amdgpu_device *adev,
- uint64_t size, u32 domain, bool sg)
+void amdgpu_amdkfd_unreserve_mem_limit(struct amdgpu_device *adev,
+ uint64_t size, u32 alloc_flag, int8_t xcp_id)
{
- size_t acc_size;
-
- acc_size = ttm_bo_dma_acc_size(&adev->mman.bdev, size,
- sizeof(struct amdgpu_bo));
-
spin_lock(&kfd_mem_limit.mem_limit_lock);
- if (domain == AMDGPU_GEM_DOMAIN_GTT) {
- kfd_mem_limit.system_mem_used -= (acc_size + size);
- kfd_mem_limit.ttm_mem_used -= (acc_size + size);
- } else if (domain == AMDGPU_GEM_DOMAIN_CPU && !sg) {
- kfd_mem_limit.system_mem_used -= (acc_size + size);
- kfd_mem_limit.ttm_mem_used -= acc_size;
- } else {
- kfd_mem_limit.system_mem_used -= acc_size;
- kfd_mem_limit.ttm_mem_used -= acc_size;
- if (domain == AMDGPU_GEM_DOMAIN_VRAM) {
- adev->kfd.vram_used -= size;
- WARN_ONCE(adev->kfd.vram_used < 0,
- "kfd VRAM memory accounting unbalanced");
+
+ if (alloc_flag & KFD_IOC_ALLOC_MEM_FLAGS_GTT) {
+ kfd_mem_limit.system_mem_used -= size;
+ kfd_mem_limit.ttm_mem_used -= size;
+ } else if (alloc_flag & KFD_IOC_ALLOC_MEM_FLAGS_VRAM) {
+ WARN_ONCE(!adev,
+ "adev reference can't be null when alloc mem flags vram is set");
+ if (WARN_ONCE(xcp_id < 0, "invalid XCP ID %d", xcp_id))
+ goto release;
+
+ if (adev) {
+ adev->kfd.vram_used[xcp_id] -= size;
+ if (adev->apu_prefer_gtt) {
+ adev->kfd.vram_used_aligned[xcp_id] -= size;
+ kfd_mem_limit.system_mem_used -= size;
+ kfd_mem_limit.ttm_mem_used -= size;
+ } else {
+ adev->kfd.vram_used_aligned[xcp_id] -=
+ ALIGN(size, VRAM_AVAILABLITY_ALIGN);
+ }
}
- }
- WARN_ONCE(kfd_mem_limit.system_mem_used < 0,
- "kfd system memory accounting unbalanced");
+ } else if (alloc_flag & KFD_IOC_ALLOC_MEM_FLAGS_USERPTR) {
+ kfd_mem_limit.system_mem_used -= size;
+ } else if (!(alloc_flag &
+ (KFD_IOC_ALLOC_MEM_FLAGS_DOORBELL |
+ KFD_IOC_ALLOC_MEM_FLAGS_MMIO_REMAP))) {
+ pr_err("%s: Invalid BO type %#x\n", __func__, alloc_flag);
+ goto release;
+ }
+ WARN_ONCE(adev && xcp_id >= 0 && adev->kfd.vram_used[xcp_id] < 0,
+ "KFD VRAM memory accounting unbalanced for xcp: %d", xcp_id);
WARN_ONCE(kfd_mem_limit.ttm_mem_used < 0,
- "kfd TTM memory accounting unbalanced");
+ "KFD TTM memory accounting unbalanced");
+ WARN_ONCE(kfd_mem_limit.system_mem_used < 0,
+ "KFD system memory accounting unbalanced");
+release:
spin_unlock(&kfd_mem_limit.mem_limit_lock);
}
-void amdgpu_amdkfd_unreserve_memory_limit(struct amdgpu_bo *bo)
+void amdgpu_amdkfd_release_notify(struct amdgpu_bo *bo)
{
struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev);
- u32 domain = bo->preferred_domains;
- bool sg = (bo->preferred_domains == AMDGPU_GEM_DOMAIN_CPU);
+ u32 alloc_flags = bo->kfd_bo->alloc_flags;
+ u64 size = amdgpu_bo_size(bo);
- if (bo->flags & AMDGPU_AMDKFD_USERPTR_BO) {
- domain = AMDGPU_GEM_DOMAIN_CPU;
- sg = false;
- }
+ amdgpu_amdkfd_unreserve_mem_limit(adev, size, alloc_flags,
+ bo->xcp_id);
- unreserve_mem_limit(adev, amdgpu_bo_size(bo), domain, sg);
+ kfree(bo->kfd_bo);
}
+/**
+ * create_dmamap_sg_bo() - Creates a amdgpu_bo object to reflect information
+ * about USERPTR or DOOREBELL or MMIO BO.
+ *
+ * @adev: Device for which dmamap BO is being created
+ * @mem: BO of peer device that is being DMA mapped. Provides parameters
+ * in building the dmamap BO
+ * @bo_out: Output parameter updated with handle of dmamap BO
+ */
+static int
+create_dmamap_sg_bo(struct amdgpu_device *adev,
+ struct kgd_mem *mem, struct amdgpu_bo **bo_out)
+{
+ struct drm_gem_object *gem_obj;
+ int ret;
+ uint64_t flags = 0;
+
+ ret = amdgpu_bo_reserve(mem->bo, false);
+ if (ret)
+ return ret;
+
+ if (mem->alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_USERPTR)
+ flags |= mem->bo->flags & (AMDGPU_GEM_CREATE_COHERENT |
+ AMDGPU_GEM_CREATE_UNCACHED);
+
+ ret = amdgpu_gem_object_create(adev, mem->bo->tbo.base.size, 1,
+ AMDGPU_GEM_DOMAIN_CPU, AMDGPU_GEM_CREATE_PREEMPTIBLE | flags,
+ ttm_bo_type_sg, mem->bo->tbo.base.resv, &gem_obj, 0);
+
+ amdgpu_bo_unreserve(mem->bo);
+
+ if (ret) {
+ pr_err("Error in creating DMA mappable SG BO on domain: %d\n", ret);
+ return -EINVAL;
+ }
+
+ *bo_out = gem_to_amdgpu_bo(gem_obj);
+ (*bo_out)->parent = amdgpu_bo_ref(mem->bo);
+ return ret;
+}
/* amdgpu_amdkfd_remove_eviction_fence - Removes eviction fence from BO's
* reservation object.
@@ -229,90 +371,47 @@ void amdgpu_amdkfd_unreserve_memory_limit(struct amdgpu_bo *bo)
static int amdgpu_amdkfd_remove_eviction_fence(struct amdgpu_bo *bo,
struct amdgpu_amdkfd_fence *ef)
{
- struct dma_resv *resv = bo->tbo.base.resv;
- struct dma_resv_list *old, *new;
- unsigned int i, j, k;
+ struct dma_fence *replacement;
if (!ef)
return -EINVAL;
- old = dma_resv_get_list(resv);
- if (!old)
- return 0;
-
- new = kmalloc(struct_size(new, shared, old->shared_max), GFP_KERNEL);
- if (!new)
- return -ENOMEM;
-
- /* Go through all the shared fences in the resevation object and sort
- * the interesting ones to the end of the list.
+ /* TODO: Instead of block before we should use the fence of the page
+ * table update and TLB flush here directly.
*/
- for (i = 0, j = old->shared_count, k = 0; i < old->shared_count; ++i) {
- struct dma_fence *f;
-
- f = rcu_dereference_protected(old->shared[i],
- dma_resv_held(resv));
-
- if (f->context == ef->base.context)
- RCU_INIT_POINTER(new->shared[--j], f);
- else
- RCU_INIT_POINTER(new->shared[k++], f);
- }
- new->shared_max = old->shared_max;
- new->shared_count = k;
-
- /* Install the new fence list, seqcount provides the barriers */
- write_seqcount_begin(&resv->seq);
- RCU_INIT_POINTER(resv->fence, new);
- write_seqcount_end(&resv->seq);
-
- /* Drop the references to the removed fences or move them to ef_list */
- for (i = j, k = 0; i < old->shared_count; ++i) {
- struct dma_fence *f;
-
- f = rcu_dereference_protected(new->shared[i],
- dma_resv_held(resv));
- dma_fence_put(f);
- }
- kfree_rcu(old, rcu);
-
+ replacement = dma_fence_get_stub();
+ dma_resv_replace_fences(bo->tbo.base.resv, ef->base.context,
+ replacement, DMA_RESV_USAGE_BOOKKEEP);
+ dma_fence_put(replacement);
return 0;
}
-int amdgpu_amdkfd_remove_fence_on_pt_pd_bos(struct amdgpu_bo *bo)
+/**
+ * amdgpu_amdkfd_remove_all_eviction_fences - Remove all eviction fences
+ * @bo: the BO where to remove the evictions fences from.
+ *
+ * This functions should only be used on release when all references to the BO
+ * are already dropped. We remove the eviction fence from the private copy of
+ * the dma_resv object here since that is what is used during release to
+ * determine of the BO is idle or not.
+ */
+void amdgpu_amdkfd_remove_all_eviction_fences(struct amdgpu_bo *bo)
{
- struct amdgpu_bo *root = bo;
- struct amdgpu_vm_bo_base *vm_bo;
- struct amdgpu_vm *vm;
- struct amdkfd_process_info *info;
- struct amdgpu_amdkfd_fence *ef;
- int ret;
-
- /* we can always get vm_bo from root PD bo.*/
- while (root->parent)
- root = root->parent;
-
- vm_bo = root->vm_bo;
- if (!vm_bo)
- return 0;
-
- vm = vm_bo->vm;
- if (!vm)
- return 0;
+ struct dma_resv *resv = &bo->tbo.base._resv;
+ struct dma_fence *fence, *stub;
+ struct dma_resv_iter cursor;
- info = vm->process_info;
- if (!info || !info->eviction_fence)
- return 0;
-
- ef = container_of(dma_fence_get(&info->eviction_fence->base),
- struct amdgpu_amdkfd_fence, base);
+ dma_resv_assert_held(resv);
- BUG_ON(!dma_resv_trylock(bo->tbo.base.resv));
- ret = amdgpu_amdkfd_remove_eviction_fence(bo, ef);
- dma_resv_unlock(bo->tbo.base.resv);
+ stub = dma_fence_get_stub();
+ dma_resv_for_each_fence(&cursor, resv, DMA_RESV_USAGE_BOOKKEEP, fence) {
+ if (!to_amdgpu_amdkfd_fence(fence))
+ continue;
- dma_fence_put(&ef->base);
- return ret;
+ dma_resv_replace_fences(resv, fence->context, stub,
+ DMA_RESV_USAGE_BOOKKEEP);
+ }
+ dma_fence_put(stub);
}
static int amdgpu_amdkfd_bo_validate(struct amdgpu_bo *bo, uint32_t domain,
@@ -325,6 +424,10 @@ static int amdgpu_amdkfd_bo_validate(struct amdgpu_bo *bo, uint32_t domain,
"Called with userptr BO"))
return -EINVAL;
+ /* bo has been pinned, not need validate it */
+ if (bo->tbo.pin_count)
+ return 0;
+
amdgpu_bo_placement_from_domain(bo, domain);
ret = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx);
@@ -337,11 +440,35 @@ validate_fail:
return ret;
}
-static int amdgpu_amdkfd_validate(void *param, struct amdgpu_bo *bo)
+int amdgpu_amdkfd_bo_validate_and_fence(struct amdgpu_bo *bo,
+ uint32_t domain,
+ struct dma_fence *fence)
{
- struct amdgpu_vm_parser *p = param;
+ int ret = amdgpu_bo_reserve(bo, false);
+
+ if (ret)
+ return ret;
+
+ ret = amdgpu_amdkfd_bo_validate(bo, domain, true);
+ if (ret)
+ goto unreserve_out;
- return amdgpu_amdkfd_bo_validate(bo, p->domain, p->wait);
+ ret = dma_resv_reserve_fences(bo->tbo.base.resv, 1);
+ if (ret)
+ goto unreserve_out;
+
+ dma_resv_add_fence(bo->tbo.base.resv, fence,
+ DMA_RESV_USAGE_BOOKKEEP);
+
+unreserve_out:
+ amdgpu_bo_unreserve(bo);
+
+ return ret;
+}
+
+static int amdgpu_amdkfd_validate_vm_bo(void *_unused, struct amdgpu_bo *bo)
+{
+ return amdgpu_amdkfd_bo_validate(bo, bo->allowed_domains, false);
}
/* vm_validate_pt_pd_bos - Validate page table and directory BOs
@@ -351,45 +478,28 @@ static int amdgpu_amdkfd_validate(void *param, struct amdgpu_bo *bo)
* again. Page directories are only updated after updating page
* tables.
*/
-static int vm_validate_pt_pd_bos(struct amdgpu_vm *vm)
+static int vm_validate_pt_pd_bos(struct amdgpu_vm *vm,
+ struct ww_acquire_ctx *ticket)
{
- struct amdgpu_bo *pd = vm->root.base.bo;
+ struct amdgpu_bo *pd = vm->root.bo;
struct amdgpu_device *adev = amdgpu_ttm_adev(pd->tbo.bdev);
- struct amdgpu_vm_parser param;
int ret;
- param.domain = AMDGPU_GEM_DOMAIN_VRAM;
- param.wait = false;
-
- ret = amdgpu_vm_validate_pt_bos(adev, vm, amdgpu_amdkfd_validate,
- &param);
+ ret = amdgpu_vm_validate(adev, vm, ticket,
+ amdgpu_amdkfd_validate_vm_bo, NULL);
if (ret) {
pr_err("failed to validate PT BOs\n");
return ret;
}
- ret = amdgpu_amdkfd_validate(&param, pd);
- if (ret) {
- pr_err("failed to validate PD\n");
- return ret;
- }
-
- vm->pd_phys_addr = amdgpu_gmc_pd_addr(vm->root.base.bo);
-
- if (vm->use_cpu_for_update) {
- ret = amdgpu_bo_kmap(pd, NULL);
- if (ret) {
- pr_err("failed to kmap PD, ret=%d\n", ret);
- return ret;
- }
- }
+ vm->pd_phys_addr = amdgpu_gmc_pd_addr(vm->root.bo);
return 0;
}
static int vm_update_pds(struct amdgpu_vm *vm, struct amdgpu_sync *sync)
{
- struct amdgpu_bo *pd = vm->root.base.bo;
+ struct amdgpu_bo *pd = vm->root.bo;
struct amdgpu_device *adev = amdgpu_ttm_adev(pd->tbo.bdev);
int ret;
@@ -397,151 +507,535 @@ static int vm_update_pds(struct amdgpu_vm *vm, struct amdgpu_sync *sync)
if (ret)
return ret;
- return amdgpu_sync_fence(sync, vm->last_update);
+ return amdgpu_sync_fence(sync, vm->last_update, GFP_KERNEL);
}
-static uint64_t get_pte_flags(struct amdgpu_device *adev, struct kgd_mem *mem)
+static uint64_t get_pte_flags(struct amdgpu_device *adev, struct amdgpu_vm *vm,
+ struct kgd_mem *mem)
{
- struct amdgpu_device *bo_adev = amdgpu_ttm_adev(mem->bo->tbo.bdev);
- bool coherent = mem->alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_COHERENT;
- uint32_t mapping_flags;
+ uint32_t mapping_flags = AMDGPU_VM_PAGE_READABLE |
+ AMDGPU_VM_MTYPE_DEFAULT;
- mapping_flags = AMDGPU_VM_PAGE_READABLE;
if (mem->alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_WRITABLE)
mapping_flags |= AMDGPU_VM_PAGE_WRITEABLE;
if (mem->alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_EXECUTABLE)
mapping_flags |= AMDGPU_VM_PAGE_EXECUTABLE;
- switch (adev->asic_type) {
- case CHIP_ARCTURUS:
- if (mem->alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_VRAM) {
- if (bo_adev == adev)
- mapping_flags |= coherent ?
- AMDGPU_VM_MTYPE_CC : AMDGPU_VM_MTYPE_RW;
- else
- mapping_flags |= AMDGPU_VM_MTYPE_UC;
- } else {
- mapping_flags |= coherent ?
- AMDGPU_VM_MTYPE_UC : AMDGPU_VM_MTYPE_NC;
- }
+ return mapping_flags;
+}
+
+/**
+ * create_sg_table() - Create an sg_table for a contiguous DMA addr range
+ * @addr: The starting address to point to
+ * @size: Size of memory area in bytes being pointed to
+ *
+ * Allocates an instance of sg_table and initializes it to point to memory
+ * area specified by input parameters. The address used to build is assumed
+ * to be DMA mapped, if needed.
+ *
+ * DOORBELL or MMIO BOs use only one scatterlist node in their sg_table
+ * because they are physically contiguous.
+ *
+ * Return: Initialized instance of SG Table or NULL
+ */
+static struct sg_table *create_sg_table(uint64_t addr, uint32_t size)
+{
+ struct sg_table *sg = kmalloc(sizeof(*sg), GFP_KERNEL);
+
+ if (!sg)
+ return NULL;
+ if (sg_alloc_table(sg, 1, GFP_KERNEL)) {
+ kfree(sg);
+ return NULL;
+ }
+ sg_dma_address(sg->sgl) = addr;
+ sg->sgl->length = size;
+#ifdef CONFIG_NEED_SG_DMA_LENGTH
+ sg->sgl->dma_length = size;
+#endif
+ return sg;
+}
+
+static int
+kfd_mem_dmamap_userptr(struct kgd_mem *mem,
+ struct kfd_mem_attachment *attachment)
+{
+ enum dma_data_direction direction =
+ mem->alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_WRITABLE ?
+ DMA_BIDIRECTIONAL : DMA_TO_DEVICE;
+ struct ttm_operation_ctx ctx = {.interruptible = true};
+ struct amdgpu_bo *bo = attachment->bo_va->base.bo;
+ struct amdgpu_device *adev = attachment->adev;
+ struct ttm_tt *src_ttm = mem->bo->tbo.ttm;
+ struct ttm_tt *ttm = bo->tbo.ttm;
+ int ret;
+
+ if (WARN_ON(ttm->num_pages != src_ttm->num_pages))
+ return -EINVAL;
+
+ ttm->sg = kmalloc(sizeof(*ttm->sg), GFP_KERNEL);
+ if (unlikely(!ttm->sg))
+ return -ENOMEM;
+
+ /* Same sequence as in amdgpu_ttm_tt_pin_userptr */
+ ret = sg_alloc_table_from_pages(ttm->sg, src_ttm->pages,
+ ttm->num_pages, 0,
+ (u64)ttm->num_pages << PAGE_SHIFT,
+ GFP_KERNEL);
+ if (unlikely(ret))
+ goto free_sg;
+
+ ret = dma_map_sgtable(adev->dev, ttm->sg, direction, 0);
+ if (unlikely(ret))
+ goto release_sg;
+
+ amdgpu_bo_placement_from_domain(bo, AMDGPU_GEM_DOMAIN_GTT);
+ ret = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx);
+ if (ret)
+ goto unmap_sg;
+
+ return 0;
+
+unmap_sg:
+ dma_unmap_sgtable(adev->dev, ttm->sg, direction, 0);
+release_sg:
+ pr_err("DMA map userptr failed: %d\n", ret);
+ sg_free_table(ttm->sg);
+free_sg:
+ kfree(ttm->sg);
+ ttm->sg = NULL;
+ return ret;
+}
+
+static int
+kfd_mem_dmamap_dmabuf(struct kfd_mem_attachment *attachment)
+{
+ struct ttm_operation_ctx ctx = {.interruptible = true};
+ struct amdgpu_bo *bo = attachment->bo_va->base.bo;
+
+ amdgpu_bo_placement_from_domain(bo, AMDGPU_GEM_DOMAIN_GTT);
+ return ttm_bo_validate(&bo->tbo, &bo->placement, &ctx);
+}
+
+/**
+ * kfd_mem_dmamap_sg_bo() - Create DMA mapped sg_table to access DOORBELL or MMIO BO
+ * @mem: SG BO of the DOORBELL or MMIO resource on the owning device
+ * @attachment: Virtual address attachment of the BO on accessing device
+ *
+ * An access request from the device that owns DOORBELL does not require DMA mapping.
+ * This is because the request doesn't go through PCIe root complex i.e. it instead
+ * loops back. The need to DMA map arises only when accessing peer device's DOORBELL
+ *
+ * In contrast, all access requests for MMIO need to be DMA mapped without regard to
+ * device ownership. This is because access requests for MMIO go through PCIe root
+ * complex.
+ *
+ * This is accomplished in two steps:
+ * - Obtain DMA mapped address of DOORBELL or MMIO memory that could be used
+ * in updating requesting device's page table
+ * - Signal TTM to mark memory pointed to by requesting device's BO as GPU
+ * accessible. This allows an update of requesting device's page table
+ * with entries associated with DOOREBELL or MMIO memory
+ *
+ * This method is invoked in the following contexts:
+ * - Mapping of DOORBELL or MMIO BO of same or peer device
+ * - Validating an evicted DOOREBELL or MMIO BO on device seeking access
+ *
+ * Return: ZERO if successful, NON-ZERO otherwise
+ */
+static int
+kfd_mem_dmamap_sg_bo(struct kgd_mem *mem,
+ struct kfd_mem_attachment *attachment)
+{
+ struct ttm_operation_ctx ctx = {.interruptible = true};
+ struct amdgpu_bo *bo = attachment->bo_va->base.bo;
+ struct amdgpu_device *adev = attachment->adev;
+ struct ttm_tt *ttm = bo->tbo.ttm;
+ enum dma_data_direction dir;
+ dma_addr_t dma_addr;
+ bool mmio;
+ int ret;
+
+ /* Expect SG Table of dmapmap BO to be NULL */
+ mmio = (mem->alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_MMIO_REMAP);
+ if (unlikely(ttm->sg)) {
+ pr_err("SG Table of %d BO for peer device is UNEXPECTEDLY NON-NULL", mmio);
+ return -EINVAL;
+ }
+
+ dir = mem->alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_WRITABLE ?
+ DMA_BIDIRECTIONAL : DMA_TO_DEVICE;
+ dma_addr = mem->bo->tbo.sg->sgl->dma_address;
+ pr_debug("%d BO size: %d\n", mmio, mem->bo->tbo.sg->sgl->length);
+ pr_debug("%d BO address before DMA mapping: %llx\n", mmio, dma_addr);
+ dma_addr = dma_map_resource(adev->dev, dma_addr,
+ mem->bo->tbo.sg->sgl->length, dir, DMA_ATTR_SKIP_CPU_SYNC);
+ ret = dma_mapping_error(adev->dev, dma_addr);
+ if (unlikely(ret))
+ return ret;
+ pr_debug("%d BO address after DMA mapping: %llx\n", mmio, dma_addr);
+
+ ttm->sg = create_sg_table(dma_addr, mem->bo->tbo.sg->sgl->length);
+ if (unlikely(!ttm->sg)) {
+ ret = -ENOMEM;
+ goto unmap_sg;
+ }
+
+ amdgpu_bo_placement_from_domain(bo, AMDGPU_GEM_DOMAIN_GTT);
+ ret = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx);
+ if (unlikely(ret))
+ goto free_sg;
+
+ return ret;
+
+free_sg:
+ sg_free_table(ttm->sg);
+ kfree(ttm->sg);
+ ttm->sg = NULL;
+unmap_sg:
+ dma_unmap_resource(adev->dev, dma_addr, mem->bo->tbo.sg->sgl->length,
+ dir, DMA_ATTR_SKIP_CPU_SYNC);
+ return ret;
+}
+
+static int
+kfd_mem_dmamap_attachment(struct kgd_mem *mem,
+ struct kfd_mem_attachment *attachment)
+{
+ switch (attachment->type) {
+ case KFD_MEM_ATT_SHARED:
+ return 0;
+ case KFD_MEM_ATT_USERPTR:
+ return kfd_mem_dmamap_userptr(mem, attachment);
+ case KFD_MEM_ATT_DMABUF:
+ return kfd_mem_dmamap_dmabuf(attachment);
+ case KFD_MEM_ATT_SG:
+ return kfd_mem_dmamap_sg_bo(mem, attachment);
+ default:
+ WARN_ON_ONCE(1);
+ }
+ return -EINVAL;
+}
+
+static void
+kfd_mem_dmaunmap_userptr(struct kgd_mem *mem,
+ struct kfd_mem_attachment *attachment)
+{
+ enum dma_data_direction direction =
+ mem->alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_WRITABLE ?
+ DMA_BIDIRECTIONAL : DMA_TO_DEVICE;
+ struct ttm_operation_ctx ctx = {.interruptible = false};
+ struct amdgpu_bo *bo = attachment->bo_va->base.bo;
+ struct amdgpu_device *adev = attachment->adev;
+ struct ttm_tt *ttm = bo->tbo.ttm;
+
+ if (unlikely(!ttm->sg))
+ return;
+
+ amdgpu_bo_placement_from_domain(bo, AMDGPU_GEM_DOMAIN_CPU);
+ (void)ttm_bo_validate(&bo->tbo, &bo->placement, &ctx);
+
+ dma_unmap_sgtable(adev->dev, ttm->sg, direction, 0);
+ sg_free_table(ttm->sg);
+ kfree(ttm->sg);
+ ttm->sg = NULL;
+}
+
+static void
+kfd_mem_dmaunmap_dmabuf(struct kfd_mem_attachment *attachment)
+{
+ /* This is a no-op. We don't want to trigger eviction fences when
+ * unmapping DMABufs. Therefore the invalidation (moving to system
+ * domain) is done in kfd_mem_dmamap_dmabuf.
+ */
+}
+
+/**
+ * kfd_mem_dmaunmap_sg_bo() - Free DMA mapped sg_table of DOORBELL or MMIO BO
+ * @mem: SG BO of the DOORBELL or MMIO resource on the owning device
+ * @attachment: Virtual address attachment of the BO on accessing device
+ *
+ * The method performs following steps:
+ * - Signal TTM to mark memory pointed to by BO as GPU inaccessible
+ * - Free SG Table that is used to encapsulate DMA mapped memory of
+ * peer device's DOORBELL or MMIO memory
+ *
+ * This method is invoked in the following contexts:
+ * UNMapping of DOORBELL or MMIO BO on a device having access to its memory
+ * Eviction of DOOREBELL or MMIO BO on device having access to its memory
+ *
+ * Return: void
+ */
+static void
+kfd_mem_dmaunmap_sg_bo(struct kgd_mem *mem,
+ struct kfd_mem_attachment *attachment)
+{
+ struct ttm_operation_ctx ctx = {.interruptible = true};
+ struct amdgpu_bo *bo = attachment->bo_va->base.bo;
+ struct amdgpu_device *adev = attachment->adev;
+ struct ttm_tt *ttm = bo->tbo.ttm;
+ enum dma_data_direction dir;
+
+ if (unlikely(!ttm->sg)) {
+ pr_debug("SG Table of BO is NULL");
+ return;
+ }
+
+ amdgpu_bo_placement_from_domain(bo, AMDGPU_GEM_DOMAIN_CPU);
+ (void)ttm_bo_validate(&bo->tbo, &bo->placement, &ctx);
+
+ dir = mem->alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_WRITABLE ?
+ DMA_BIDIRECTIONAL : DMA_TO_DEVICE;
+ dma_unmap_resource(adev->dev, ttm->sg->sgl->dma_address,
+ ttm->sg->sgl->length, dir, DMA_ATTR_SKIP_CPU_SYNC);
+ sg_free_table(ttm->sg);
+ kfree(ttm->sg);
+ ttm->sg = NULL;
+ bo->tbo.sg = NULL;
+}
+
+static void
+kfd_mem_dmaunmap_attachment(struct kgd_mem *mem,
+ struct kfd_mem_attachment *attachment)
+{
+ switch (attachment->type) {
+ case KFD_MEM_ATT_SHARED:
+ break;
+ case KFD_MEM_ATT_USERPTR:
+ kfd_mem_dmaunmap_userptr(mem, attachment);
+ break;
+ case KFD_MEM_ATT_DMABUF:
+ kfd_mem_dmaunmap_dmabuf(attachment);
+ break;
+ case KFD_MEM_ATT_SG:
+ kfd_mem_dmaunmap_sg_bo(mem, attachment);
break;
default:
- mapping_flags |= coherent ?
- AMDGPU_VM_MTYPE_UC : AMDGPU_VM_MTYPE_NC;
+ WARN_ON_ONCE(1);
}
+}
+
+static int kfd_mem_export_dmabuf(struct kgd_mem *mem)
+{
+ if (!mem->dmabuf) {
+ struct amdgpu_device *bo_adev;
+ struct dma_buf *dmabuf;
- return amdgpu_gem_va_map_flags(adev, mapping_flags);
+ bo_adev = amdgpu_ttm_adev(mem->bo->tbo.bdev);
+ dmabuf = drm_gem_prime_handle_to_dmabuf(&bo_adev->ddev, bo_adev->kfd.client.file,
+ mem->gem_handle,
+ mem->alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_WRITABLE ?
+ DRM_RDWR : 0);
+ if (IS_ERR(dmabuf))
+ return PTR_ERR(dmabuf);
+ mem->dmabuf = dmabuf;
+ }
+
+ return 0;
}
-/* add_bo_to_vm - Add a BO to a VM
+static int
+kfd_mem_attach_dmabuf(struct amdgpu_device *adev, struct kgd_mem *mem,
+ struct amdgpu_bo **bo)
+{
+ struct drm_gem_object *gobj;
+ int ret;
+
+ ret = kfd_mem_export_dmabuf(mem);
+ if (ret)
+ return ret;
+
+ gobj = amdgpu_gem_prime_import(adev_to_drm(adev), mem->dmabuf);
+ if (IS_ERR(gobj))
+ return PTR_ERR(gobj);
+
+ *bo = gem_to_amdgpu_bo(gobj);
+ (*bo)->flags |= AMDGPU_GEM_CREATE_PREEMPTIBLE;
+
+ return 0;
+}
+
+/* kfd_mem_attach - Add a BO to a VM
*
* Everything that needs to bo done only once when a BO is first added
* to a VM. It can later be mapped and unmapped many times without
* repeating these steps.
*
+ * 0. Create BO for DMA mapping, if needed
* 1. Allocate and initialize BO VA entry data structure
* 2. Add BO to the VM
* 3. Determine ASIC-specific PTE flags
* 4. Alloc page tables and directories if needed
* 4a. Validate new page tables and directories
*/
-static int add_bo_to_vm(struct amdgpu_device *adev, struct kgd_mem *mem,
- struct amdgpu_vm *vm, bool is_aql,
- struct kfd_bo_va_list **p_bo_va_entry)
+static int kfd_mem_attach(struct amdgpu_device *adev, struct kgd_mem *mem,
+ struct amdgpu_vm *vm, bool is_aql)
{
- int ret;
- struct kfd_bo_va_list *bo_va_entry;
- struct amdgpu_bo *bo = mem->bo;
+ struct amdgpu_device *bo_adev = amdgpu_ttm_adev(mem->bo->tbo.bdev);
+ unsigned long bo_size = mem->bo->tbo.base.size;
uint64_t va = mem->va;
- struct list_head *list_bo_va = &mem->bo_va_list;
- unsigned long bo_size = bo->tbo.base.size;
+ struct kfd_mem_attachment *attachment[2] = {NULL, NULL};
+ struct amdgpu_bo *bo[2] = {NULL, NULL};
+ struct amdgpu_bo_va *bo_va;
+ bool same_hive = false;
+ int i, ret;
if (!va) {
pr_err("Invalid VA when adding BO to VM\n");
return -EINVAL;
}
- if (is_aql)
- va += bo_size;
-
- bo_va_entry = kzalloc(sizeof(*bo_va_entry), GFP_KERNEL);
- if (!bo_va_entry)
- return -ENOMEM;
+ /* Determine access to VRAM, MMIO and DOORBELL BOs of peer devices
+ *
+ * The access path of MMIO and DOORBELL BOs of is always over PCIe.
+ * In contrast the access path of VRAM BOs depens upon the type of
+ * link that connects the peer device. Access over PCIe is allowed
+ * if peer device has large BAR. In contrast, access over xGMI is
+ * allowed for both small and large BAR configurations of peer device
+ */
+ if ((adev != bo_adev && !adev->apu_prefer_gtt) &&
+ ((mem->domain == AMDGPU_GEM_DOMAIN_VRAM) ||
+ (mem->alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_DOORBELL) ||
+ (mem->alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_MMIO_REMAP))) {
+ if (mem->domain == AMDGPU_GEM_DOMAIN_VRAM)
+ same_hive = amdgpu_xgmi_same_hive(adev, bo_adev);
+ if (!same_hive && !amdgpu_device_is_peer_accessible(bo_adev, adev))
+ return -EINVAL;
+ }
- pr_debug("\t add VA 0x%llx - 0x%llx to vm %p\n", va,
- va + bo_size, vm);
+ for (i = 0; i <= is_aql; i++) {
+ attachment[i] = kzalloc(sizeof(*attachment[i]), GFP_KERNEL);
+ if (unlikely(!attachment[i])) {
+ ret = -ENOMEM;
+ goto unwind;
+ }
- /* Add BO to VM internal data structures*/
- bo_va_entry->bo_va = amdgpu_vm_bo_add(adev, vm, bo);
- if (!bo_va_entry->bo_va) {
- ret = -EINVAL;
- pr_err("Failed to add BO object to VM. ret == %d\n",
- ret);
- goto err_vmadd;
- }
+ pr_debug("\t add VA 0x%llx - 0x%llx to vm %p\n", va,
+ va + bo_size, vm);
- bo_va_entry->va = va;
- bo_va_entry->pte_flags = get_pte_flags(adev, mem);
- bo_va_entry->kgd_dev = (void *)adev;
- list_add(&bo_va_entry->bo_list, list_bo_va);
+ if ((adev == bo_adev && !(mem->alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_MMIO_REMAP)) ||
+ (amdgpu_ttm_tt_get_usermm(mem->bo->tbo.ttm) && reuse_dmamap(adev, bo_adev)) ||
+ (mem->domain == AMDGPU_GEM_DOMAIN_GTT && reuse_dmamap(adev, bo_adev)) ||
+ same_hive) {
+ /* Mappings on the local GPU, or VRAM mappings in the
+ * local hive, or userptr, or GTT mapping can reuse dma map
+ * address space share the original BO
+ */
+ attachment[i]->type = KFD_MEM_ATT_SHARED;
+ bo[i] = mem->bo;
+ drm_gem_object_get(&bo[i]->tbo.base);
+ } else if (i > 0) {
+ /* Multiple mappings on the same GPU share the BO */
+ attachment[i]->type = KFD_MEM_ATT_SHARED;
+ bo[i] = bo[0];
+ drm_gem_object_get(&bo[i]->tbo.base);
+ } else if (amdgpu_ttm_tt_get_usermm(mem->bo->tbo.ttm)) {
+ /* Create an SG BO to DMA-map userptrs on other GPUs */
+ attachment[i]->type = KFD_MEM_ATT_USERPTR;
+ ret = create_dmamap_sg_bo(adev, mem, &bo[i]);
+ if (ret)
+ goto unwind;
+ /* Handle DOORBELL BOs of peer devices and MMIO BOs of local and peer devices */
+ } else if (mem->bo->tbo.type == ttm_bo_type_sg) {
+ WARN_ONCE(!(mem->alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_DOORBELL ||
+ mem->alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_MMIO_REMAP),
+ "Handing invalid SG BO in ATTACH request");
+ attachment[i]->type = KFD_MEM_ATT_SG;
+ ret = create_dmamap_sg_bo(adev, mem, &bo[i]);
+ if (ret)
+ goto unwind;
+ /* Enable acces to GTT and VRAM BOs of peer devices */
+ } else if (mem->domain == AMDGPU_GEM_DOMAIN_GTT ||
+ mem->domain == AMDGPU_GEM_DOMAIN_VRAM) {
+ attachment[i]->type = KFD_MEM_ATT_DMABUF;
+ ret = kfd_mem_attach_dmabuf(adev, mem, &bo[i]);
+ if (ret)
+ goto unwind;
+ pr_debug("Employ DMABUF mechanism to enable peer GPU access\n");
+ } else {
+ WARN_ONCE(true, "Handling invalid ATTACH request");
+ ret = -EINVAL;
+ goto unwind;
+ }
- if (p_bo_va_entry)
- *p_bo_va_entry = bo_va_entry;
+ /* Add BO to VM internal data structures */
+ ret = amdgpu_bo_reserve(bo[i], false);
+ if (ret) {
+ pr_debug("Unable to reserve BO during memory attach");
+ goto unwind;
+ }
+ bo_va = amdgpu_vm_bo_find(vm, bo[i]);
+ if (!bo_va)
+ bo_va = amdgpu_vm_bo_add(adev, vm, bo[i]);
+ else
+ ++bo_va->ref_count;
+ attachment[i]->bo_va = bo_va;
+ amdgpu_bo_unreserve(bo[i]);
+ if (unlikely(!attachment[i]->bo_va)) {
+ ret = -ENOMEM;
+ pr_err("Failed to add BO object to VM. ret == %d\n",
+ ret);
+ goto unwind;
+ }
+ attachment[i]->va = va;
+ attachment[i]->pte_flags = get_pte_flags(adev, vm, mem);
+ attachment[i]->adev = adev;
+ list_add(&attachment[i]->list, &mem->attachments);
- /* Allocate validate page tables if needed */
- ret = vm_validate_pt_pd_bos(vm);
- if (ret) {
- pr_err("validate_pt_pd_bos() failed\n");
- goto err_alloc_pts;
+ va += bo_size;
}
return 0;
-err_alloc_pts:
- amdgpu_vm_bo_rmv(adev, bo_va_entry->bo_va);
- list_del(&bo_va_entry->bo_list);
-err_vmadd:
- kfree(bo_va_entry);
+unwind:
+ for (; i >= 0; i--) {
+ if (!attachment[i])
+ continue;
+ if (attachment[i]->bo_va) {
+ (void)amdgpu_bo_reserve(bo[i], true);
+ if (--attachment[i]->bo_va->ref_count == 0)
+ amdgpu_vm_bo_del(adev, attachment[i]->bo_va);
+ amdgpu_bo_unreserve(bo[i]);
+ list_del(&attachment[i]->list);
+ }
+ if (bo[i])
+ drm_gem_object_put(&bo[i]->tbo.base);
+ kfree(attachment[i]);
+ }
return ret;
}
-static void remove_bo_from_vm(struct amdgpu_device *adev,
- struct kfd_bo_va_list *entry, unsigned long size)
+static void kfd_mem_detach(struct kfd_mem_attachment *attachment)
{
- pr_debug("\t remove VA 0x%llx - 0x%llx in entry %p\n",
- entry->va,
- entry->va + size, entry);
- amdgpu_vm_bo_rmv(adev, entry->bo_va);
- list_del(&entry->bo_list);
- kfree(entry);
+ struct amdgpu_bo *bo = attachment->bo_va->base.bo;
+
+ pr_debug("\t remove VA 0x%llx in entry %p\n",
+ attachment->va, attachment);
+ if (--attachment->bo_va->ref_count == 0)
+ amdgpu_vm_bo_del(attachment->adev, attachment->bo_va);
+ drm_gem_object_put(&bo->tbo.base);
+ list_del(&attachment->list);
+ kfree(attachment);
}
static void add_kgd_mem_to_kfd_bo_list(struct kgd_mem *mem,
struct amdkfd_process_info *process_info,
bool userptr)
{
- struct ttm_validate_buffer *entry = &mem->validate_list;
- struct amdgpu_bo *bo = mem->bo;
-
- INIT_LIST_HEAD(&entry->head);
- entry->num_shared = 1;
- entry->bo = &bo->tbo;
mutex_lock(&process_info->lock);
if (userptr)
- list_add_tail(&entry->head, &process_info->userptr_valid_list);
+ list_add_tail(&mem->validate_list,
+ &process_info->userptr_valid_list);
else
- list_add_tail(&entry->head, &process_info->kfd_bo_list);
+ list_add_tail(&mem->validate_list, &process_info->kfd_bo_list);
mutex_unlock(&process_info->lock);
}
static void remove_kgd_mem_from_kfd_bo_list(struct kgd_mem *mem,
struct amdkfd_process_info *process_info)
{
- struct ttm_validate_buffer *bo_list_entry;
-
- bo_list_entry = &mem->validate_list;
mutex_lock(&process_info->lock);
- list_del(&bo_list_entry->head);
+ list_del(&mem->validate_list);
mutex_unlock(&process_info->lock);
}
@@ -557,11 +1051,13 @@ static void remove_kgd_mem_from_kfd_bo_list(struct kgd_mem *mem,
*
* Returns 0 for success, negative errno for errors.
*/
-static int init_user_pages(struct kgd_mem *mem, uint64_t user_addr)
+static int init_user_pages(struct kgd_mem *mem, uint64_t user_addr,
+ bool criu_resume)
{
struct amdkfd_process_info *process_info = mem->process_info;
struct amdgpu_bo *bo = mem->bo;
struct ttm_operation_ctx ctx = { true, false };
+ struct amdgpu_hmm_range *range;
int ret = 0;
mutex_lock(&process_info->lock);
@@ -572,16 +1068,40 @@ static int init_user_pages(struct kgd_mem *mem, uint64_t user_addr)
goto out;
}
- ret = amdgpu_mn_register(bo, user_addr);
+ ret = amdgpu_hmm_register(bo, user_addr);
if (ret) {
pr_err("%s: Failed to register MMU notifier: %d\n",
__func__, ret);
goto out;
}
- ret = amdgpu_ttm_tt_get_user_pages(bo, bo->tbo.ttm->pages);
+ if (criu_resume) {
+ /*
+ * During a CRIU restore operation, the userptr buffer objects
+ * will be validated in the restore_userptr_work worker at a
+ * later stage when it is scheduled by another ioctl called by
+ * CRIU master process for the target pid for restore.
+ */
+ mutex_lock(&process_info->notifier_lock);
+ mem->invalid++;
+ mutex_unlock(&process_info->notifier_lock);
+ mutex_unlock(&process_info->lock);
+ return 0;
+ }
+
+ range = amdgpu_hmm_range_alloc(NULL);
+ if (unlikely(!range)) {
+ ret = -ENOMEM;
+ goto unregister_out;
+ }
+
+ ret = amdgpu_ttm_tt_get_user_pages(bo, range);
if (ret) {
- pr_err("%s: Failed to get user pages: %d\n", __func__, ret);
+ amdgpu_hmm_range_free(range);
+ if (ret == -EAGAIN)
+ pr_debug("Failed to get user pages, try again\n");
+ else
+ pr_err("%s: Failed to get user pages: %d\n", __func__, ret);
goto unregister_out;
}
@@ -590,6 +1110,9 @@ static int init_user_pages(struct kgd_mem *mem, uint64_t user_addr)
pr_err("%s: Failed to reserve BO\n", __func__);
goto release_out;
}
+
+ amdgpu_ttm_tt_set_user_pages(bo->tbo.ttm, range);
+
amdgpu_bo_placement_from_domain(bo, mem->domain);
ret = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx);
if (ret)
@@ -597,10 +1120,10 @@ static int init_user_pages(struct kgd_mem *mem, uint64_t user_addr)
amdgpu_bo_unreserve(bo);
release_out:
- amdgpu_ttm_tt_get_user_pages_done(bo->tbo.ttm);
+ amdgpu_hmm_range_free(range);
unregister_out:
if (ret)
- amdgpu_mn_unregister(bo);
+ amdgpu_hmm_unregister(bo);
out:
mutex_unlock(&process_info->lock);
return ret;
@@ -612,13 +1135,12 @@ out:
* object can track VM updates.
*/
struct bo_vm_reservation_context {
- struct amdgpu_bo_list_entry kfd_bo; /* BO list entry for the KFD BO */
- unsigned int n_vms; /* Number of VMs reserved */
- struct amdgpu_bo_list_entry *vm_pd; /* Array of VM BO list entries */
- struct ww_acquire_ctx ticket; /* Reservation ticket */
- struct list_head list, duplicates; /* BO lists */
- struct amdgpu_sync *sync; /* Pointer to sync object */
- bool reserved; /* Whether BOs are reserved */
+ /* DRM execution context for the reservation */
+ struct drm_exec exec;
+ /* Number of VMs reserved */
+ unsigned int n_vms;
+ /* Pointer to sync object */
+ struct amdgpu_sync *sync;
};
enum bo_vm_match {
@@ -642,35 +1164,26 @@ static int reserve_bo_and_vm(struct kgd_mem *mem,
WARN_ON(!vm);
- ctx->reserved = false;
ctx->n_vms = 1;
ctx->sync = &mem->sync;
+ drm_exec_init(&ctx->exec, DRM_EXEC_INTERRUPTIBLE_WAIT, 0);
+ drm_exec_until_all_locked(&ctx->exec) {
+ ret = amdgpu_vm_lock_pd(vm, &ctx->exec, 2);
+ drm_exec_retry_on_contention(&ctx->exec);
+ if (unlikely(ret))
+ goto error;
- INIT_LIST_HEAD(&ctx->list);
- INIT_LIST_HEAD(&ctx->duplicates);
-
- ctx->vm_pd = kcalloc(ctx->n_vms, sizeof(*ctx->vm_pd), GFP_KERNEL);
- if (!ctx->vm_pd)
- return -ENOMEM;
-
- ctx->kfd_bo.priority = 0;
- ctx->kfd_bo.tv.bo = &bo->tbo;
- ctx->kfd_bo.tv.num_shared = 1;
- list_add(&ctx->kfd_bo.tv.head, &ctx->list);
-
- amdgpu_vm_get_pd_bo(vm, &ctx->list, &ctx->vm_pd[0]);
-
- ret = ttm_eu_reserve_buffers(&ctx->ticket, &ctx->list,
- false, &ctx->duplicates);
- if (ret) {
- pr_err("Failed to reserve buffers in ttm.\n");
- kfree(ctx->vm_pd);
- ctx->vm_pd = NULL;
- return ret;
+ ret = drm_exec_prepare_obj(&ctx->exec, &bo->tbo.base, 1);
+ drm_exec_retry_on_contention(&ctx->exec);
+ if (unlikely(ret))
+ goto error;
}
-
- ctx->reserved = true;
return 0;
+
+error:
+ pr_err("Failed to reserve buffers in ttm.\n");
+ drm_exec_fini(&ctx->exec);
+ return ret;
}
/**
@@ -687,63 +1200,40 @@ static int reserve_bo_and_cond_vms(struct kgd_mem *mem,
struct amdgpu_vm *vm, enum bo_vm_match map_type,
struct bo_vm_reservation_context *ctx)
{
+ struct kfd_mem_attachment *entry;
struct amdgpu_bo *bo = mem->bo;
- struct kfd_bo_va_list *entry;
- unsigned int i;
int ret;
- ctx->reserved = false;
- ctx->n_vms = 0;
- ctx->vm_pd = NULL;
ctx->sync = &mem->sync;
+ drm_exec_init(&ctx->exec, DRM_EXEC_INTERRUPTIBLE_WAIT |
+ DRM_EXEC_IGNORE_DUPLICATES, 0);
+ drm_exec_until_all_locked(&ctx->exec) {
+ ctx->n_vms = 0;
+ list_for_each_entry(entry, &mem->attachments, list) {
+ if ((vm && vm != entry->bo_va->base.vm) ||
+ (entry->is_mapped != map_type
+ && map_type != BO_VM_ALL))
+ continue;
- INIT_LIST_HEAD(&ctx->list);
- INIT_LIST_HEAD(&ctx->duplicates);
-
- list_for_each_entry(entry, &mem->bo_va_list, bo_list) {
- if ((vm && vm != entry->bo_va->base.vm) ||
- (entry->is_mapped != map_type
- && map_type != BO_VM_ALL))
- continue;
-
- ctx->n_vms++;
- }
-
- if (ctx->n_vms != 0) {
- ctx->vm_pd = kcalloc(ctx->n_vms, sizeof(*ctx->vm_pd),
- GFP_KERNEL);
- if (!ctx->vm_pd)
- return -ENOMEM;
- }
-
- ctx->kfd_bo.priority = 0;
- ctx->kfd_bo.tv.bo = &bo->tbo;
- ctx->kfd_bo.tv.num_shared = 1;
- list_add(&ctx->kfd_bo.tv.head, &ctx->list);
-
- i = 0;
- list_for_each_entry(entry, &mem->bo_va_list, bo_list) {
- if ((vm && vm != entry->bo_va->base.vm) ||
- (entry->is_mapped != map_type
- && map_type != BO_VM_ALL))
- continue;
-
- amdgpu_vm_get_pd_bo(entry->bo_va->base.vm, &ctx->list,
- &ctx->vm_pd[i]);
- i++;
- }
+ ret = amdgpu_vm_lock_pd(entry->bo_va->base.vm,
+ &ctx->exec, 2);
+ drm_exec_retry_on_contention(&ctx->exec);
+ if (unlikely(ret))
+ goto error;
+ ++ctx->n_vms;
+ }
- ret = ttm_eu_reserve_buffers(&ctx->ticket, &ctx->list,
- false, &ctx->duplicates);
- if (ret) {
- pr_err("Failed to reserve buffers in ttm.\n");
- kfree(ctx->vm_pd);
- ctx->vm_pd = NULL;
- return ret;
+ ret = drm_exec_prepare_obj(&ctx->exec, &bo->tbo.base, 1);
+ drm_exec_retry_on_contention(&ctx->exec);
+ if (unlikely(ret))
+ goto error;
}
-
- ctx->reserved = true;
return 0;
+
+error:
+ pr_err("Failed to reserve buffers in ttm.\n");
+ drm_exec_fini(&ctx->exec);
+ return ret;
}
/**
@@ -764,40 +1254,48 @@ static int unreserve_bo_and_vms(struct bo_vm_reservation_context *ctx,
if (wait)
ret = amdgpu_sync_wait(ctx->sync, intr);
- if (ctx->reserved)
- ttm_eu_backoff_reservation(&ctx->ticket, &ctx->list);
- kfree(ctx->vm_pd);
-
+ drm_exec_fini(&ctx->exec);
ctx->sync = NULL;
-
- ctx->reserved = false;
- ctx->vm_pd = NULL;
-
return ret;
}
-static int unmap_bo_from_gpuvm(struct amdgpu_device *adev,
- struct kfd_bo_va_list *entry,
+static int unmap_bo_from_gpuvm(struct kgd_mem *mem,
+ struct kfd_mem_attachment *entry,
struct amdgpu_sync *sync)
{
struct amdgpu_bo_va *bo_va = entry->bo_va;
+ struct amdgpu_device *adev = entry->adev;
struct amdgpu_vm *vm = bo_va->base.vm;
- amdgpu_vm_bo_unmap(adev, bo_va, entry->va);
+ if (bo_va->queue_refcount) {
+ pr_debug("bo_va->queue_refcount %d\n", bo_va->queue_refcount);
+ return -EBUSY;
+ }
- amdgpu_vm_clear_freed(adev, vm, &bo_va->last_pt_update);
+ (void)amdgpu_vm_bo_unmap(adev, bo_va, entry->va);
- amdgpu_sync_fence(sync, bo_va->last_pt_update);
+ /* VM entity stopped if process killed, don't clear freed pt bo */
+ if (!amdgpu_vm_ready(vm))
+ return 0;
+
+ (void)amdgpu_vm_clear_freed(adev, vm, &bo_va->last_pt_update);
+
+ (void)amdgpu_sync_fence(sync, bo_va->last_pt_update, GFP_KERNEL);
return 0;
}
-static int update_gpuvm_pte(struct amdgpu_device *adev,
- struct kfd_bo_va_list *entry,
- struct amdgpu_sync *sync)
+static int update_gpuvm_pte(struct kgd_mem *mem,
+ struct kfd_mem_attachment *entry,
+ struct amdgpu_sync *sync)
{
- int ret;
struct amdgpu_bo_va *bo_va = entry->bo_va;
+ struct amdgpu_device *adev = entry->adev;
+ int ret;
+
+ ret = kfd_mem_dmamap_attachment(mem, entry);
+ if (ret)
+ return ret;
/* Update the page tables */
ret = amdgpu_vm_bo_update(adev, bo_va, false);
@@ -806,17 +1304,18 @@ static int update_gpuvm_pte(struct amdgpu_device *adev,
return ret;
}
- return amdgpu_sync_fence(sync, bo_va->last_pt_update);
+ return amdgpu_sync_fence(sync, bo_va->last_pt_update, GFP_KERNEL);
}
-static int map_bo_to_gpuvm(struct amdgpu_device *adev,
- struct kfd_bo_va_list *entry, struct amdgpu_sync *sync,
- bool no_update_pte)
+static int map_bo_to_gpuvm(struct kgd_mem *mem,
+ struct kfd_mem_attachment *entry,
+ struct amdgpu_sync *sync,
+ bool no_update_pte)
{
int ret;
/* Set virtual address for the allocation */
- ret = amdgpu_vm_bo_map(adev, entry->bo_va, entry->va, 0,
+ ret = amdgpu_vm_bo_map(entry->adev, entry->bo_va, entry->va, 0,
amdgpu_bo_size(entry->bo_va->base.bo),
entry->pte_flags);
if (ret) {
@@ -828,7 +1327,7 @@ static int map_bo_to_gpuvm(struct amdgpu_device *adev,
if (no_update_pte)
return 0;
- ret = update_gpuvm_pte(adev, entry, sync);
+ ret = update_gpuvm_pte(mem, entry, sync);
if (ret) {
pr_err("update_gpuvm_pte() failed\n");
goto update_gpuvm_pte_failed;
@@ -837,36 +1336,20 @@ static int map_bo_to_gpuvm(struct amdgpu_device *adev,
return 0;
update_gpuvm_pte_failed:
- unmap_bo_from_gpuvm(adev, entry, sync);
+ unmap_bo_from_gpuvm(mem, entry, sync);
+ kfd_mem_dmaunmap_attachment(mem, entry);
return ret;
}
-static struct sg_table *create_doorbell_sg(uint64_t addr, uint32_t size)
-{
- struct sg_table *sg = kmalloc(sizeof(*sg), GFP_KERNEL);
-
- if (!sg)
- return NULL;
- if (sg_alloc_table(sg, 1, GFP_KERNEL)) {
- kfree(sg);
- return NULL;
- }
- sg->sgl->dma_address = addr;
- sg->sgl->length = size;
-#ifdef CONFIG_NEED_SG_DMA_LENGTH
- sg->sgl->dma_length = size;
-#endif
- return sg;
-}
-
-static int process_validate_vms(struct amdkfd_process_info *process_info)
+static int process_validate_vms(struct amdkfd_process_info *process_info,
+ struct ww_acquire_ctx *ticket)
{
struct amdgpu_vm *peer_vm;
int ret;
list_for_each_entry(peer_vm, &process_info->vm_list_head,
vm_list_node) {
- ret = vm_validate_pt_pd_bos(peer_vm);
+ ret = vm_validate_pt_pd_bos(peer_vm, ticket);
if (ret)
return ret;
}
@@ -882,7 +1365,7 @@ static int process_sync_pds_resv(struct amdkfd_process_info *process_info,
list_for_each_entry(peer_vm, &process_info->vm_list_head,
vm_list_node) {
- struct amdgpu_bo *pd = peer_vm->root.base.bo;
+ struct amdgpu_bo *pd = peer_vm->root.bo;
ret = amdgpu_sync_resv(NULL, sync, pd->tbo.base.resv,
AMDGPU_SYNC_NE_OWNER,
@@ -922,6 +1405,7 @@ static int init_kfd_vm(struct amdgpu_vm *vm, void **process_info,
return -ENOMEM;
mutex_init(&info->lock);
+ mutex_init(&info->notifier_lock);
INIT_LIST_HEAD(&info->vm_list_head);
INIT_LIST_HEAD(&info->kfd_bo_list);
INIT_LIST_HEAD(&info->userptr_valid_list);
@@ -929,7 +1413,8 @@ static int init_kfd_vm(struct amdgpu_vm *vm, void **process_info,
info->eviction_fence =
amdgpu_amdkfd_fence_create(dma_fence_context_alloc(1),
- current->mm);
+ current->mm,
+ NULL);
if (!info->eviction_fence) {
pr_err("Failed to create eviction fence\n");
ret = -ENOMEM;
@@ -937,41 +1422,42 @@ static int init_kfd_vm(struct amdgpu_vm *vm, void **process_info,
}
info->pid = get_task_pid(current->group_leader, PIDTYPE_PID);
- atomic_set(&info->evicted_bos, 0);
INIT_DELAYED_WORK(&info->restore_userptr_work,
amdgpu_amdkfd_restore_userptr_worker);
*process_info = info;
- *ef = dma_fence_get(&info->eviction_fence->base);
}
vm->process_info = *process_info;
/* Validate page directory and attach eviction fence */
- ret = amdgpu_bo_reserve(vm->root.base.bo, true);
+ ret = amdgpu_bo_reserve(vm->root.bo, true);
if (ret)
goto reserve_pd_fail;
- ret = vm_validate_pt_pd_bos(vm);
+ ret = vm_validate_pt_pd_bos(vm, NULL);
if (ret) {
pr_err("validate_pt_pd_bos() failed\n");
goto validate_pd_fail;
}
- ret = amdgpu_bo_sync_wait(vm->root.base.bo,
+ ret = amdgpu_bo_sync_wait(vm->root.bo,
AMDGPU_FENCE_OWNER_KFD, false);
if (ret)
goto wait_pd_fail;
- ret = dma_resv_reserve_shared(vm->root.base.bo->tbo.base.resv, 1);
+ ret = dma_resv_reserve_fences(vm->root.bo->tbo.base.resv, 1);
if (ret)
goto reserve_shared_fail;
- amdgpu_bo_fence(vm->root.base.bo,
- &vm->process_info->eviction_fence->base, true);
- amdgpu_bo_unreserve(vm->root.base.bo);
+ dma_resv_add_fence(vm->root.bo->tbo.base.resv,
+ &vm->process_info->eviction_fence->base,
+ DMA_RESV_USAGE_BOOKKEEP);
+ amdgpu_bo_unreserve(vm->root.bo);
/* Update process info */
mutex_lock(&vm->process_info->lock);
list_add_tail(&vm->vm_list_node,
&(vm->process_info->vm_list_head));
vm->process_info->n_vms++;
+ if (ef)
+ *ef = dma_fence_get(&vm->process_info->eviction_fence->base);
mutex_unlock(&vm->process_info->lock);
return 0;
@@ -979,67 +1465,92 @@ static int init_kfd_vm(struct amdgpu_vm *vm, void **process_info,
reserve_shared_fail:
wait_pd_fail:
validate_pd_fail:
- amdgpu_bo_unreserve(vm->root.base.bo);
+ amdgpu_bo_unreserve(vm->root.bo);
reserve_pd_fail:
vm->process_info = NULL;
if (info) {
- /* Two fence references: one in info and one in *ef */
dma_fence_put(&info->eviction_fence->base);
- dma_fence_put(*ef);
- *ef = NULL;
*process_info = NULL;
put_pid(info->pid);
create_evict_fence_fail:
mutex_destroy(&info->lock);
+ mutex_destroy(&info->notifier_lock);
kfree(info);
}
return ret;
}
-int amdgpu_amdkfd_gpuvm_create_process_vm(struct kgd_dev *kgd, u32 pasid,
- void **vm, void **process_info,
- struct dma_fence **ef)
+/**
+ * amdgpu_amdkfd_gpuvm_pin_bo() - Pins a BO using following criteria
+ * @bo: Handle of buffer object being pinned
+ * @domain: Domain into which BO should be pinned
+ *
+ * - USERPTR BOs are UNPINNABLE and will return error
+ * - All other BO types (GTT, VRAM, MMIO and DOORBELL) will have their
+ * PIN count incremented. It is valid to PIN a BO multiple times
+ *
+ * Return: ZERO if successful in pinning, Non-Zero in case of error.
+ */
+static int amdgpu_amdkfd_gpuvm_pin_bo(struct amdgpu_bo *bo, u32 domain)
{
- struct amdgpu_device *adev = get_amdgpu_device(kgd);
- struct amdgpu_vm *new_vm;
- int ret;
+ int ret = 0;
- new_vm = kzalloc(sizeof(*new_vm), GFP_KERNEL);
- if (!new_vm)
- return -ENOMEM;
+ ret = amdgpu_bo_reserve(bo, false);
+ if (unlikely(ret))
+ return ret;
- /* Initialize AMDGPU part of the VM */
- ret = amdgpu_vm_init(adev, new_vm, AMDGPU_VM_CONTEXT_COMPUTE, pasid);
- if (ret) {
- pr_err("Failed init vm ret %d\n", ret);
- goto amdgpu_vm_init_fail;
+ if (bo->flags & AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS) {
+ /*
+ * If bo is not contiguous on VRAM, move to system memory first to ensure
+ * we can get contiguous VRAM space after evicting other BOs.
+ */
+ if (!(bo->tbo.resource->placement & TTM_PL_FLAG_CONTIGUOUS)) {
+ struct ttm_operation_ctx ctx = { true, false };
+
+ amdgpu_bo_placement_from_domain(bo, AMDGPU_GEM_DOMAIN_GTT);
+ ret = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx);
+ if (unlikely(ret)) {
+ pr_debug("validate bo 0x%p to GTT failed %d\n", &bo->tbo, ret);
+ goto out;
+ }
+ }
}
- /* Initialize KFD part of the VM and process info */
- ret = init_kfd_vm(new_vm, process_info, ef);
+ ret = amdgpu_bo_pin(bo, domain);
if (ret)
- goto init_kfd_vm_fail;
+ pr_err("Error in Pinning BO to domain: %d\n", domain);
- *vm = (void *) new_vm;
+ amdgpu_bo_sync_wait(bo, AMDGPU_FENCE_OWNER_KFD, false);
+out:
+ amdgpu_bo_unreserve(bo);
+ return ret;
+}
- return 0;
+/**
+ * amdgpu_amdkfd_gpuvm_unpin_bo() - Unpins BO using following criteria
+ * @bo: Handle of buffer object being unpinned
+ *
+ * - Is a illegal request for USERPTR BOs and is ignored
+ * - All other BO types (GTT, VRAM, MMIO and DOORBELL) will have their
+ * PIN count decremented. Calls to UNPIN must balance calls to PIN
+ */
+static void amdgpu_amdkfd_gpuvm_unpin_bo(struct amdgpu_bo *bo)
+{
+ int ret = 0;
-init_kfd_vm_fail:
- amdgpu_vm_fini(adev, new_vm);
-amdgpu_vm_init_fail:
- kfree(new_vm);
- return ret;
+ ret = amdgpu_bo_reserve(bo, false);
+ if (unlikely(ret))
+ return;
+
+ amdgpu_bo_unpin(bo);
+ amdgpu_bo_unreserve(bo);
}
-int amdgpu_amdkfd_gpuvm_acquire_process_vm(struct kgd_dev *kgd,
- struct file *filp, u32 pasid,
- void **vm, void **process_info,
+int amdgpu_amdkfd_gpuvm_acquire_process_vm(struct amdgpu_device *adev,
+ struct amdgpu_vm *avm,
+ void **process_info,
struct dma_fence **ef)
{
- struct amdgpu_device *adev = get_amdgpu_device(kgd);
- struct drm_file *drm_priv = filp->private_data;
- struct amdgpu_fpriv *drv_priv = drm_priv->driver_priv;
- struct amdgpu_vm *avm = &drv_priv->vm;
int ret;
/* Already a compute VM? */
@@ -1047,7 +1558,7 @@ int amdgpu_amdkfd_gpuvm_acquire_process_vm(struct kgd_dev *kgd,
return -EINVAL;
/* Convert VM into a compute VM */
- ret = amdgpu_vm_make_compute(adev, avm, pasid);
+ ret = amdgpu_vm_make_compute(adev, avm);
if (ret)
return ret;
@@ -1056,7 +1567,7 @@ int amdgpu_amdkfd_gpuvm_acquire_process_vm(struct kgd_dev *kgd,
if (ret)
return ret;
- *vm = (void *)avm;
+ amdgpu_vm_set_task_info(avm);
return 0;
}
@@ -1065,16 +1576,10 @@ void amdgpu_amdkfd_gpuvm_destroy_cb(struct amdgpu_device *adev,
struct amdgpu_vm *vm)
{
struct amdkfd_process_info *process_info = vm->process_info;
- struct amdgpu_bo *pd = vm->root.base.bo;
if (!process_info)
return;
- /* Release eviction fence from PD */
- amdgpu_bo_reserve(pd, false);
- amdgpu_bo_fence(pd, NULL, false);
- amdgpu_bo_unreserve(pd);
-
/* Update process info */
mutex_lock(&process_info->lock);
process_info->n_vms--;
@@ -1093,68 +1598,114 @@ void amdgpu_amdkfd_gpuvm_destroy_cb(struct amdgpu_device *adev,
cancel_delayed_work_sync(&process_info->restore_userptr_work);
put_pid(process_info->pid);
mutex_destroy(&process_info->lock);
+ mutex_destroy(&process_info->notifier_lock);
kfree(process_info);
}
}
-void amdgpu_amdkfd_gpuvm_destroy_process_vm(struct kgd_dev *kgd, void *vm)
+uint64_t amdgpu_amdkfd_gpuvm_get_process_page_dir(void *drm_priv)
{
- struct amdgpu_device *adev = get_amdgpu_device(kgd);
- struct amdgpu_vm *avm = (struct amdgpu_vm *)vm;
-
- if (WARN_ON(!kgd || !vm))
- return;
-
- pr_debug("Destroying process vm %p\n", vm);
+ struct amdgpu_vm *avm = drm_priv_to_vm(drm_priv);
+ struct amdgpu_bo *pd = avm->root.bo;
+ struct amdgpu_device *adev = amdgpu_ttm_adev(pd->tbo.bdev);
- /* Release the VM context */
- amdgpu_vm_fini(adev, avm);
- kfree(vm);
+ if (adev->asic_type < CHIP_VEGA10)
+ return avm->pd_phys_addr >> AMDGPU_GPU_PAGE_SHIFT;
+ return avm->pd_phys_addr;
}
-void amdgpu_amdkfd_gpuvm_release_process_vm(struct kgd_dev *kgd, void *vm)
+void amdgpu_amdkfd_block_mmu_notifications(void *p)
{
- struct amdgpu_device *adev = get_amdgpu_device(kgd);
- struct amdgpu_vm *avm = (struct amdgpu_vm *)vm;
+ struct amdkfd_process_info *pinfo = (struct amdkfd_process_info *)p;
- if (WARN_ON(!kgd || !vm))
- return;
+ mutex_lock(&pinfo->lock);
+ WRITE_ONCE(pinfo->block_mmu_notifications, true);
+ mutex_unlock(&pinfo->lock);
+}
- pr_debug("Releasing process vm %p\n", vm);
+int amdgpu_amdkfd_criu_resume(void *p)
+{
+ int ret = 0;
+ struct amdkfd_process_info *pinfo = (struct amdkfd_process_info *)p;
+
+ mutex_lock(&pinfo->lock);
+ pr_debug("scheduling work\n");
+ mutex_lock(&pinfo->notifier_lock);
+ pinfo->evicted_bos++;
+ mutex_unlock(&pinfo->notifier_lock);
+ if (!READ_ONCE(pinfo->block_mmu_notifications)) {
+ ret = -EINVAL;
+ goto out_unlock;
+ }
+ WRITE_ONCE(pinfo->block_mmu_notifications, false);
+ queue_delayed_work(system_freezable_wq,
+ &pinfo->restore_userptr_work, 0);
- /* The original pasid of amdgpu vm has already been
- * released during making a amdgpu vm to a compute vm
- * The current pasid is managed by kfd and will be
- * released on kfd process destroy. Set amdgpu pasid
- * to 0 to avoid duplicate release.
- */
- amdgpu_vm_release_compute(adev, avm);
+out_unlock:
+ mutex_unlock(&pinfo->lock);
+ return ret;
}
-uint64_t amdgpu_amdkfd_gpuvm_get_process_page_dir(void *vm)
+size_t amdgpu_amdkfd_get_available_memory(struct amdgpu_device *adev,
+ uint8_t xcp_id)
{
- struct amdgpu_vm *avm = (struct amdgpu_vm *)vm;
- struct amdgpu_bo *pd = avm->root.base.bo;
- struct amdgpu_device *adev = amdgpu_ttm_adev(pd->tbo.bdev);
+ uint64_t reserved_for_pt =
+ ESTIMATE_PT_SIZE(amdgpu_amdkfd_total_mem_size);
+ struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
+ uint64_t reserved_for_ras = (con ? con->reserved_pages_in_bytes : 0);
+ ssize_t available;
+ uint64_t vram_available, system_mem_available, ttm_mem_available;
- if (adev->asic_type < CHIP_VEGA10)
- return avm->pd_phys_addr >> AMDGPU_GPU_PAGE_SHIFT;
- return avm->pd_phys_addr;
+ spin_lock(&kfd_mem_limit.mem_limit_lock);
+ if (adev->apu_prefer_gtt && !adev->gmc.is_app_apu)
+ vram_available = KFD_XCP_MEMORY_SIZE(adev, xcp_id)
+ - adev->kfd.vram_used_aligned[xcp_id];
+ else
+ vram_available = KFD_XCP_MEMORY_SIZE(adev, xcp_id)
+ - adev->kfd.vram_used_aligned[xcp_id]
+ - atomic64_read(&adev->vram_pin_size)
+ - reserved_for_pt
+ - reserved_for_ras;
+
+ if (adev->apu_prefer_gtt) {
+ system_mem_available = no_system_mem_limit ?
+ kfd_mem_limit.max_system_mem_limit :
+ kfd_mem_limit.max_system_mem_limit -
+ kfd_mem_limit.system_mem_used;
+
+ ttm_mem_available = kfd_mem_limit.max_ttm_mem_limit -
+ kfd_mem_limit.ttm_mem_used;
+
+ available = min3(system_mem_available, ttm_mem_available,
+ vram_available);
+ available = ALIGN_DOWN(available, PAGE_SIZE);
+ } else {
+ available = ALIGN_DOWN(vram_available, VRAM_AVAILABLITY_ALIGN);
+ }
+
+ spin_unlock(&kfd_mem_limit.mem_limit_lock);
+
+ if (available < 0)
+ available = 0;
+
+ return available;
}
int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu(
- struct kgd_dev *kgd, uint64_t va, uint64_t size,
- void *vm, struct kgd_mem **mem,
- uint64_t *offset, uint32_t flags)
+ struct amdgpu_device *adev, uint64_t va, uint64_t size,
+ void *drm_priv, struct kgd_mem **mem,
+ uint64_t *offset, uint32_t flags, bool criu_resume)
{
- struct amdgpu_device *adev = get_amdgpu_device(kgd);
- struct amdgpu_vm *avm = (struct amdgpu_vm *)vm;
+ struct amdgpu_vm *avm = drm_priv_to_vm(drm_priv);
+ struct amdgpu_fpriv *fpriv = container_of(avm, struct amdgpu_fpriv, vm);
enum ttm_bo_type bo_type = ttm_bo_type_device;
struct sg_table *sg = NULL;
uint64_t user_addr = 0;
struct amdgpu_bo *bo;
- struct drm_gem_object *gobj;
+ struct drm_gem_object *gobj = NULL;
u32 domain, alloc_domain;
+ uint64_t aligned_size;
+ int8_t xcp_id = -1;
u64 alloc_flags;
int ret;
@@ -1163,41 +1714,60 @@ int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu(
*/
if (flags & KFD_IOC_ALLOC_MEM_FLAGS_VRAM) {
domain = alloc_domain = AMDGPU_GEM_DOMAIN_VRAM;
- alloc_flags = AMDGPU_GEM_CREATE_VRAM_WIPE_ON_RELEASE;
- alloc_flags |= (flags & KFD_IOC_ALLOC_MEM_FLAGS_PUBLIC) ?
- AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED :
- AMDGPU_GEM_CREATE_NO_CPU_ACCESS;
+
+ if (adev->apu_prefer_gtt) {
+ domain = AMDGPU_GEM_DOMAIN_GTT;
+ alloc_domain = AMDGPU_GEM_DOMAIN_GTT;
+ alloc_flags = 0;
+ } else {
+ alloc_flags = AMDGPU_GEM_CREATE_VRAM_WIPE_ON_RELEASE;
+ alloc_flags |= (flags & KFD_IOC_ALLOC_MEM_FLAGS_PUBLIC) ?
+ AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED : 0;
+
+ /* For contiguous VRAM allocation */
+ if (flags & KFD_IOC_ALLOC_MEM_FLAGS_CONTIGUOUS)
+ alloc_flags |= AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS;
+ }
+ xcp_id = fpriv->xcp_id == AMDGPU_XCP_NO_PARTITION ?
+ 0 : fpriv->xcp_id;
} else if (flags & KFD_IOC_ALLOC_MEM_FLAGS_GTT) {
domain = alloc_domain = AMDGPU_GEM_DOMAIN_GTT;
alloc_flags = 0;
- } else if (flags & KFD_IOC_ALLOC_MEM_FLAGS_USERPTR) {
- domain = AMDGPU_GEM_DOMAIN_GTT;
- alloc_domain = AMDGPU_GEM_DOMAIN_CPU;
- alloc_flags = 0;
- if (!offset || !*offset)
- return -EINVAL;
- user_addr = untagged_addr(*offset);
- } else if (flags & (KFD_IOC_ALLOC_MEM_FLAGS_DOORBELL |
- KFD_IOC_ALLOC_MEM_FLAGS_MMIO_REMAP)) {
+ } else {
domain = AMDGPU_GEM_DOMAIN_GTT;
alloc_domain = AMDGPU_GEM_DOMAIN_CPU;
- bo_type = ttm_bo_type_sg;
- alloc_flags = 0;
- if (size > UINT_MAX)
+ alloc_flags = AMDGPU_GEM_CREATE_PREEMPTIBLE;
+
+ if (flags & KFD_IOC_ALLOC_MEM_FLAGS_USERPTR) {
+ if (!offset || !*offset)
+ return -EINVAL;
+ user_addr = untagged_addr(*offset);
+ } else if (flags & (KFD_IOC_ALLOC_MEM_FLAGS_DOORBELL |
+ KFD_IOC_ALLOC_MEM_FLAGS_MMIO_REMAP)) {
+ bo_type = ttm_bo_type_sg;
+ if (size > UINT_MAX)
+ return -EINVAL;
+ sg = create_sg_table(*offset, size);
+ if (!sg)
+ return -ENOMEM;
+ } else {
return -EINVAL;
- sg = create_doorbell_sg(*offset, size);
- if (!sg)
- return -ENOMEM;
- } else {
- return -EINVAL;
+ }
}
+ if (flags & KFD_IOC_ALLOC_MEM_FLAGS_COHERENT)
+ alloc_flags |= AMDGPU_GEM_CREATE_COHERENT;
+ if (flags & KFD_IOC_ALLOC_MEM_FLAGS_EXT_COHERENT)
+ alloc_flags |= AMDGPU_GEM_CREATE_EXT_COHERENT;
+ if (flags & KFD_IOC_ALLOC_MEM_FLAGS_UNCACHED)
+ alloc_flags |= AMDGPU_GEM_CREATE_UNCACHED;
+
*mem = kzalloc(sizeof(struct kgd_mem), GFP_KERNEL);
if (!*mem) {
ret = -ENOMEM;
goto err;
}
- INIT_LIST_HEAD(&(*mem)->bo_va_list);
+ INIT_LIST_HEAD(&(*mem)->attachments);
mutex_init(&(*mem)->lock);
(*mem)->aql_queue = !!(flags & KFD_IOC_ALLOC_MEM_FLAGS_AQL_QUEUE_MEM);
@@ -1206,28 +1776,39 @@ int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu(
* the memory.
*/
if ((*mem)->aql_queue)
- size = size >> 1;
+ size >>= 1;
+ aligned_size = PAGE_ALIGN(size);
(*mem)->alloc_flags = flags;
amdgpu_sync_create(&(*mem)->sync);
- ret = amdgpu_amdkfd_reserve_mem_limit(adev, size, alloc_domain, !!sg);
+ ret = amdgpu_amdkfd_reserve_mem_limit(adev, aligned_size, flags,
+ xcp_id);
if (ret) {
pr_debug("Insufficient memory\n");
goto err_reserve_limit;
}
- pr_debug("\tcreate BO VA 0x%llx size 0x%llx domain %s\n",
- va, size, domain_string(alloc_domain));
+ pr_debug("\tcreate BO VA 0x%llx size 0x%llx domain %s xcp_id %d\n",
+ va, (*mem)->aql_queue ? size << 1 : size,
+ domain_string(alloc_domain), xcp_id);
- ret = amdgpu_gem_object_create(adev, size, 1, alloc_domain, alloc_flags,
- bo_type, NULL, &gobj);
+ ret = amdgpu_gem_object_create(adev, aligned_size, 1, alloc_domain, alloc_flags,
+ bo_type, NULL, &gobj, xcp_id + 1);
if (ret) {
pr_debug("Failed to create BO on domain %s. ret %d\n",
domain_string(alloc_domain), ret);
goto err_bo_create;
}
+ ret = drm_vma_node_allow(&gobj->vma_node, drm_priv);
+ if (ret) {
+ pr_debug("Failed to allow vma node access. ret %d\n", ret);
+ goto err_node_allow;
+ }
+ ret = drm_gem_handle_create(adev->kfd.client.file, gobj, &(*mem)->gem_handle);
+ if (ret)
+ goto err_gem_handle_create;
bo = gem_to_amdgpu_bo(gobj);
if (bo_type == ttm_bo_type_sg) {
bo->tbo.sg = sg;
@@ -1236,18 +1817,38 @@ int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu(
bo->kfd_bo = *mem;
(*mem)->bo = bo;
if (user_addr)
- bo->flags |= AMDGPU_AMDKFD_USERPTR_BO;
+ bo->flags |= AMDGPU_AMDKFD_CREATE_USERPTR_BO;
(*mem)->va = va;
(*mem)->domain = domain;
(*mem)->mapped_to_gpu_memory = 0;
(*mem)->process_info = avm->process_info;
+
add_kgd_mem_to_kfd_bo_list(*mem, avm->process_info, user_addr);
if (user_addr) {
- ret = init_user_pages(*mem, user_addr);
+ pr_debug("creating userptr BO for user_addr = %llx\n", user_addr);
+ ret = init_user_pages(*mem, user_addr, criu_resume);
if (ret)
goto allocate_init_user_pages_failed;
+ } else if (flags & (KFD_IOC_ALLOC_MEM_FLAGS_DOORBELL |
+ KFD_IOC_ALLOC_MEM_FLAGS_MMIO_REMAP)) {
+ ret = amdgpu_amdkfd_gpuvm_pin_bo(bo, AMDGPU_GEM_DOMAIN_GTT);
+ if (ret) {
+ pr_err("Pinning MMIO/DOORBELL BO during ALLOC FAILED\n");
+ goto err_pin_bo;
+ }
+ bo->allowed_domains = AMDGPU_GEM_DOMAIN_GTT;
+ bo->preferred_domains = AMDGPU_GEM_DOMAIN_GTT;
+ } else {
+ mutex_lock(&avm->process_info->lock);
+ if (avm->process_info->eviction_fence &&
+ !dma_fence_is_signaled(&avm->process_info->eviction_fence->base))
+ ret = amdgpu_amdkfd_bo_validate_and_fence(bo, domain,
+ &avm->process_info->eviction_fence->base);
+ mutex_unlock(&avm->process_info->lock);
+ if (ret)
+ goto err_validate_bo;
}
if (offset)
@@ -1256,15 +1857,24 @@ int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu(
return 0;
allocate_init_user_pages_failed:
+err_pin_bo:
+err_validate_bo:
remove_kgd_mem_from_kfd_bo_list(*mem, avm->process_info);
- amdgpu_bo_unref(&bo);
+ drm_gem_handle_delete(adev->kfd.client.file, (*mem)->gem_handle);
+err_gem_handle_create:
+ drm_vma_node_revoke(&gobj->vma_node, drm_priv);
+err_node_allow:
/* Don't unreserve system mem limit twice */
goto err_reserve_limit;
err_bo_create:
- unreserve_mem_limit(adev, size, alloc_domain, !!sg);
+ amdgpu_amdkfd_unreserve_mem_limit(adev, aligned_size, flags, xcp_id);
err_reserve_limit:
+ amdgpu_sync_free(&(*mem)->sync);
mutex_destroy(&(*mem)->lock);
- kfree(*mem);
+ if (gobj)
+ drm_gem_object_put(gobj);
+ else
+ kfree(*mem);
err:
if (sg) {
sg_free_table(sg);
@@ -1274,18 +1884,27 @@ err:
}
int amdgpu_amdkfd_gpuvm_free_memory_of_gpu(
- struct kgd_dev *kgd, struct kgd_mem *mem, uint64_t *size)
+ struct amdgpu_device *adev, struct kgd_mem *mem, void *drm_priv,
+ uint64_t *size)
{
struct amdkfd_process_info *process_info = mem->process_info;
unsigned long bo_size = mem->bo->tbo.base.size;
- struct kfd_bo_va_list *entry, *tmp;
+ bool use_release_notifier = (mem->bo->kfd_bo == mem);
+ struct kfd_mem_attachment *entry, *tmp;
struct bo_vm_reservation_context ctx;
- struct ttm_validate_buffer *bo_list_entry;
unsigned int mapped_to_gpu_memory;
int ret;
bool is_imported = false;
mutex_lock(&mem->lock);
+
+ /* Unpin MMIO/DOORBELL BO's that were pinned during allocation */
+ if (mem->alloc_flags &
+ (KFD_IOC_ALLOC_MEM_FLAGS_DOORBELL |
+ KFD_IOC_ALLOC_MEM_FLAGS_MMIO_REMAP)) {
+ amdgpu_amdkfd_gpuvm_unpin_bo(mem->bo);
+ }
+
mapped_to_gpu_memory = mem->mapped_to_gpu_memory;
is_imported = mem->is_imported;
mutex_unlock(&mem->lock);
@@ -1300,31 +1919,32 @@ int amdgpu_amdkfd_gpuvm_free_memory_of_gpu(
}
/* Make sure restore workers don't access the BO any more */
- bo_list_entry = &mem->validate_list;
mutex_lock(&process_info->lock);
- list_del(&bo_list_entry->head);
+ list_del(&mem->validate_list);
mutex_unlock(&process_info->lock);
- /* No more MMU notifiers */
- amdgpu_mn_unregister(mem->bo);
+ /* Cleanup user pages and MMU notifiers */
+ if (amdgpu_ttm_tt_get_usermm(mem->bo->tbo.ttm)) {
+ amdgpu_hmm_unregister(mem->bo);
+ mutex_lock(&process_info->notifier_lock);
+ amdgpu_hmm_range_free(mem->range);
+ mutex_unlock(&process_info->notifier_lock);
+ }
ret = reserve_bo_and_cond_vms(mem, NULL, BO_VM_ALL, &ctx);
if (unlikely(ret))
return ret;
- /* The eviction fence should be removed by the last unmap.
- * TODO: Log an error condition if the bo still has the eviction fence
- * attached
- */
amdgpu_amdkfd_remove_eviction_fence(mem->bo,
process_info->eviction_fence);
pr_debug("Release VA 0x%llx - 0x%llx\n", mem->va,
mem->va + bo_size * (1 + mem->aql_queue));
/* Remove from VM internal data structures */
- list_for_each_entry_safe(entry, tmp, &mem->bo_va_list, bo_list)
- remove_bo_from_vm((struct amdgpu_device *)entry->kgd_dev,
- entry, bo_size);
+ list_for_each_entry_safe(entry, tmp, &mem->attachments, list) {
+ kfd_mem_dmaunmap_attachment(mem, entry);
+ kfd_mem_detach(entry);
+ }
ret = unreserve_bo_and_vms(&ctx, false, false);
@@ -1340,36 +1960,52 @@ int amdgpu_amdkfd_gpuvm_free_memory_of_gpu(
}
/* Update the size of the BO being freed if it was allocated from
- * VRAM and is not imported.
+ * VRAM and is not imported. For APP APU VRAM allocations are done
+ * in GTT domain
*/
if (size) {
- if ((mem->bo->preferred_domains == AMDGPU_GEM_DOMAIN_VRAM) &&
- (!is_imported))
+ if (!is_imported &&
+ mem->alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_VRAM)
*size = bo_size;
else
*size = 0;
}
/* Free the BO*/
- drm_gem_object_put(&mem->bo->tbo.base);
+ drm_vma_node_revoke(&mem->bo->tbo.base.vma_node, drm_priv);
+ drm_gem_handle_delete(adev->kfd.client.file, mem->gem_handle);
+ if (mem->dmabuf) {
+ dma_buf_put(mem->dmabuf);
+ mem->dmabuf = NULL;
+ }
mutex_destroy(&mem->lock);
- kfree(mem);
+
+ /* If this releases the last reference, it will end up calling
+ * amdgpu_amdkfd_release_notify and kfree the mem struct. That's why
+ * this needs to be the last call here.
+ */
+ drm_gem_object_put(&mem->bo->tbo.base);
+
+ /*
+ * For kgd_mem allocated in amdgpu_amdkfd_gpuvm_import_dmabuf(),
+ * explicitly free it here.
+ */
+ if (!use_release_notifier)
+ kfree(mem);
return ret;
}
int amdgpu_amdkfd_gpuvm_map_memory_to_gpu(
- struct kgd_dev *kgd, struct kgd_mem *mem, void *vm)
+ struct amdgpu_device *adev, struct kgd_mem *mem,
+ void *drm_priv)
{
- struct amdgpu_device *adev = get_amdgpu_device(kgd);
- struct amdgpu_vm *avm = (struct amdgpu_vm *)vm;
+ struct amdgpu_vm *avm = drm_priv_to_vm(drm_priv);
int ret;
struct amdgpu_bo *bo;
uint32_t domain;
- struct kfd_bo_va_list *entry;
+ struct kfd_mem_attachment *entry;
struct bo_vm_reservation_context ctx;
- struct kfd_bo_va_list *bo_va_entry = NULL;
- struct kfd_bo_va_list *bo_va_entry_aql = NULL;
unsigned long bo_size;
bool is_invalid_userptr = false;
@@ -1385,14 +2021,14 @@ int amdgpu_amdkfd_gpuvm_map_memory_to_gpu(
*/
mutex_lock(&mem->process_info->lock);
- /* Lock mmap-sem. If we find an invalid userptr BO, we can be
+ /* Lock notifier lock. If we find an invalid userptr BO, we can be
* sure that the MMU notifier is no longer running
* concurrently and the queues are actually stopped
*/
if (amdgpu_ttm_tt_get_usermm(bo->tbo.ttm)) {
- mmap_write_lock(current->mm);
- is_invalid_userptr = atomic_read(&mem->invalid);
- mmap_write_unlock(current->mm);
+ mutex_lock(&mem->process_info->notifier_lock);
+ is_invalid_userptr = !!mem->invalid;
+ mutex_unlock(&mem->process_info->notifier_lock);
}
mutex_lock(&mem->lock);
@@ -1403,9 +2039,15 @@ int amdgpu_amdkfd_gpuvm_map_memory_to_gpu(
pr_debug("Map VA 0x%llx - 0x%llx to vm %p domain %s\n",
mem->va,
mem->va + bo_size * (1 + mem->aql_queue),
- vm, domain_string(domain));
+ avm, domain_string(domain));
+
+ if (!kfd_mem_is_attached(avm, mem)) {
+ ret = kfd_mem_attach(adev, mem, avm, mem->aql_queue);
+ if (ret)
+ goto out;
+ }
- ret = reserve_bo_and_vm(mem, vm, &ctx);
+ ret = reserve_bo_and_vm(mem, avm, &ctx);
if (unlikely(ret))
goto out;
@@ -1415,80 +2057,44 @@ int amdgpu_amdkfd_gpuvm_map_memory_to_gpu(
* the next restore worker
*/
if (amdgpu_ttm_tt_get_usermm(bo->tbo.ttm) &&
- bo->tbo.mem.mem_type == TTM_PL_SYSTEM)
+ bo->tbo.resource->mem_type == TTM_PL_SYSTEM)
is_invalid_userptr = true;
- if (check_if_add_bo_to_vm(avm, mem)) {
- ret = add_bo_to_vm(adev, mem, avm, false,
- &bo_va_entry);
- if (ret)
- goto add_bo_to_vm_failed;
- if (mem->aql_queue) {
- ret = add_bo_to_vm(adev, mem, avm,
- true, &bo_va_entry_aql);
- if (ret)
- goto add_bo_to_vm_failed_aql;
- }
- } else {
- ret = vm_validate_pt_pd_bos(avm);
- if (unlikely(ret))
- goto add_bo_to_vm_failed;
- }
-
- if (mem->mapped_to_gpu_memory == 0 &&
- !amdgpu_ttm_tt_get_usermm(bo->tbo.ttm)) {
- /* Validate BO only once. The eviction fence gets added to BO
- * the first time it is mapped. Validate will wait for all
- * background evictions to complete.
- */
- ret = amdgpu_amdkfd_bo_validate(bo, domain, true);
- if (ret) {
- pr_debug("Validate failed\n");
- goto map_bo_to_gpuvm_failed;
- }
- }
+ ret = vm_validate_pt_pd_bos(avm, NULL);
+ if (unlikely(ret))
+ goto out_unreserve;
- list_for_each_entry(entry, &mem->bo_va_list, bo_list) {
- if (entry->bo_va->base.vm == vm && !entry->is_mapped) {
- pr_debug("\t map VA 0x%llx - 0x%llx in entry %p\n",
- entry->va, entry->va + bo_size,
- entry);
+ list_for_each_entry(entry, &mem->attachments, list) {
+ if (entry->bo_va->base.vm != avm || entry->is_mapped)
+ continue;
- ret = map_bo_to_gpuvm(adev, entry, ctx.sync,
- is_invalid_userptr);
- if (ret) {
- pr_err("Failed to map bo to gpuvm\n");
- goto map_bo_to_gpuvm_failed;
- }
+ pr_debug("\t map VA 0x%llx - 0x%llx in entry %p\n",
+ entry->va, entry->va + bo_size, entry);
- ret = vm_update_pds(vm, ctx.sync);
- if (ret) {
- pr_err("Failed to update page directories\n");
- goto map_bo_to_gpuvm_failed;
- }
+ ret = map_bo_to_gpuvm(mem, entry, ctx.sync,
+ is_invalid_userptr);
+ if (ret) {
+ pr_err("Failed to map bo to gpuvm\n");
+ goto out_unreserve;
+ }
- entry->is_mapped = true;
- mem->mapped_to_gpu_memory++;
- pr_debug("\t INC mapping count %d\n",
- mem->mapped_to_gpu_memory);
+ ret = vm_update_pds(avm, ctx.sync);
+ if (ret) {
+ pr_err("Failed to update page directories\n");
+ goto out_unreserve;
}
+
+ entry->is_mapped = true;
+ mem->mapped_to_gpu_memory++;
+ pr_debug("\t INC mapping count %d\n",
+ mem->mapped_to_gpu_memory);
}
- if (!amdgpu_ttm_tt_get_usermm(bo->tbo.ttm) && !bo->tbo.pin_count)
- amdgpu_bo_fence(bo,
- &avm->process_info->eviction_fence->base,
- true);
ret = unreserve_bo_and_vms(&ctx, false, false);
goto out;
-map_bo_to_gpuvm_failed:
- if (bo_va_entry_aql)
- remove_bo_from_vm(adev, bo_va_entry_aql, bo_size);
-add_bo_to_vm_failed_aql:
- if (bo_va_entry)
- remove_bo_from_vm(adev, bo_va_entry, bo_size);
-add_bo_to_vm_failed:
+out_unreserve:
unreserve_bo_and_vms(&ctx, false, false);
out:
mutex_unlock(&mem->process_info->lock);
@@ -1496,20 +2102,49 @@ out:
return ret;
}
+int amdgpu_amdkfd_gpuvm_dmaunmap_mem(struct kgd_mem *mem, void *drm_priv)
+{
+ struct kfd_mem_attachment *entry;
+ struct amdgpu_vm *vm;
+ int ret;
+
+ vm = drm_priv_to_vm(drm_priv);
+
+ mutex_lock(&mem->lock);
+
+ ret = amdgpu_bo_reserve(mem->bo, true);
+ if (ret)
+ goto out;
+
+ list_for_each_entry(entry, &mem->attachments, list) {
+ if (entry->bo_va->base.vm != vm)
+ continue;
+ if (entry->bo_va->base.bo->tbo.ttm &&
+ !entry->bo_va->base.bo->tbo.ttm->sg)
+ continue;
+
+ kfd_mem_dmaunmap_attachment(mem, entry);
+ }
+
+ amdgpu_bo_unreserve(mem->bo);
+out:
+ mutex_unlock(&mem->lock);
+
+ return ret;
+}
+
int amdgpu_amdkfd_gpuvm_unmap_memory_from_gpu(
- struct kgd_dev *kgd, struct kgd_mem *mem, void *vm)
+ struct amdgpu_device *adev, struct kgd_mem *mem, void *drm_priv)
{
- struct amdgpu_device *adev = get_amdgpu_device(kgd);
- struct amdkfd_process_info *process_info =
- ((struct amdgpu_vm *)vm)->process_info;
+ struct amdgpu_vm *avm = drm_priv_to_vm(drm_priv);
unsigned long bo_size = mem->bo->tbo.base.size;
- struct kfd_bo_va_list *entry;
+ struct kfd_mem_attachment *entry;
struct bo_vm_reservation_context ctx;
int ret;
mutex_lock(&mem->lock);
- ret = reserve_bo_and_cond_vms(mem, vm, BO_VM_MAPPED, &ctx);
+ ret = reserve_bo_and_cond_vms(mem, avm, BO_VM_MAPPED, &ctx);
if (unlikely(ret))
goto out;
/* If no VMs were reserved, it means the BO wasn't actually mapped */
@@ -1518,45 +2153,32 @@ int amdgpu_amdkfd_gpuvm_unmap_memory_from_gpu(
goto unreserve_out;
}
- ret = vm_validate_pt_pd_bos((struct amdgpu_vm *)vm);
+ ret = vm_validate_pt_pd_bos(avm, NULL);
if (unlikely(ret))
goto unreserve_out;
pr_debug("Unmap VA 0x%llx - 0x%llx from vm %p\n",
mem->va,
mem->va + bo_size * (1 + mem->aql_queue),
- vm);
-
- list_for_each_entry(entry, &mem->bo_va_list, bo_list) {
- if (entry->bo_va->base.vm == vm && entry->is_mapped) {
- pr_debug("\t unmap VA 0x%llx - 0x%llx from entry %p\n",
- entry->va,
- entry->va + bo_size,
- entry);
-
- ret = unmap_bo_from_gpuvm(adev, entry, ctx.sync);
- if (ret == 0) {
- entry->is_mapped = false;
- } else {
- pr_err("failed to unmap VA 0x%llx\n",
- mem->va);
- goto unreserve_out;
- }
+ avm);
- mem->mapped_to_gpu_memory--;
- pr_debug("\t DEC mapping count %d\n",
- mem->mapped_to_gpu_memory);
- }
- }
+ list_for_each_entry(entry, &mem->attachments, list) {
+ if (entry->bo_va->base.vm != avm || !entry->is_mapped)
+ continue;
- /* If BO is unmapped from all VMs, unfence it. It can be evicted if
- * required.
- */
- if (mem->mapped_to_gpu_memory == 0 &&
- !amdgpu_ttm_tt_get_usermm(mem->bo->tbo.ttm) &&
- !mem->bo->tbo.pin_count)
- amdgpu_amdkfd_remove_eviction_fence(mem->bo,
- process_info->eviction_fence);
+ pr_debug("\t unmap VA 0x%llx - 0x%llx from entry %p\n",
+ entry->va, entry->va + bo_size, entry);
+
+ ret = unmap_bo_from_gpuvm(mem, entry, ctx.sync);
+ if (ret)
+ goto unreserve_out;
+
+ entry->is_mapped = false;
+
+ mem->mapped_to_gpu_memory--;
+ pr_debug("\t DEC mapping count %d\n",
+ mem->mapped_to_gpu_memory);
+ }
unreserve_out:
unreserve_bo_and_vms(&ctx, false, false);
@@ -1566,7 +2188,7 @@ out:
}
int amdgpu_amdkfd_gpuvm_sync_memory(
- struct kgd_dev *kgd, struct kgd_mem *mem, bool intr)
+ struct amdgpu_device *adev, struct kgd_mem *mem, bool intr)
{
struct amdgpu_sync sync;
int ret;
@@ -1582,8 +2204,69 @@ int amdgpu_amdkfd_gpuvm_sync_memory(
return ret;
}
-int amdgpu_amdkfd_gpuvm_map_gtt_bo_to_kernel(struct kgd_dev *kgd,
- struct kgd_mem *mem, void **kptr, uint64_t *size)
+/**
+ * amdgpu_amdkfd_map_gtt_bo_to_gart - Map BO to GART and increment reference count
+ * @bo: Buffer object to be mapped
+ * @bo_gart: Return bo reference
+ *
+ * Before return, bo reference count is incremented. To release the reference and unpin/
+ * unmap the BO, call amdgpu_amdkfd_free_gtt_mem.
+ */
+int amdgpu_amdkfd_map_gtt_bo_to_gart(struct amdgpu_bo *bo, struct amdgpu_bo **bo_gart)
+{
+ int ret;
+
+ ret = amdgpu_bo_reserve(bo, true);
+ if (ret) {
+ pr_err("Failed to reserve bo. ret %d\n", ret);
+ goto err_reserve_bo_failed;
+ }
+
+ ret = amdgpu_bo_pin(bo, AMDGPU_GEM_DOMAIN_GTT);
+ if (ret) {
+ pr_err("Failed to pin bo. ret %d\n", ret);
+ goto err_pin_bo_failed;
+ }
+
+ ret = amdgpu_ttm_alloc_gart(&bo->tbo);
+ if (ret) {
+ pr_err("Failed to bind bo to GART. ret %d\n", ret);
+ goto err_map_bo_gart_failed;
+ }
+
+ amdgpu_amdkfd_remove_eviction_fence(
+ bo, bo->vm_bo->vm->process_info->eviction_fence);
+
+ amdgpu_bo_unreserve(bo);
+
+ *bo_gart = amdgpu_bo_ref(bo);
+
+ return 0;
+
+err_map_bo_gart_failed:
+ amdgpu_bo_unpin(bo);
+err_pin_bo_failed:
+ amdgpu_bo_unreserve(bo);
+err_reserve_bo_failed:
+
+ return ret;
+}
+
+/** amdgpu_amdkfd_gpuvm_map_gtt_bo_to_kernel() - Map a GTT BO for kernel CPU access
+ *
+ * @mem: Buffer object to be mapped for CPU access
+ * @kptr[out]: pointer in kernel CPU address space
+ * @size[out]: size of the buffer
+ *
+ * Pins the BO and maps it for kernel CPU access. The eviction fence is removed
+ * from the BO, since pinned BOs cannot be evicted. The bo must remain on the
+ * validate_list, so the GPU mapping can be restored after a page table was
+ * evicted.
+ *
+ * Return: 0 on success, error code on failure
+ */
+int amdgpu_amdkfd_gpuvm_map_gtt_bo_to_kernel(struct kgd_mem *mem,
+ void **kptr, uint64_t *size)
{
int ret;
struct amdgpu_bo *bo = mem->bo;
@@ -1593,9 +2276,6 @@ int amdgpu_amdkfd_gpuvm_map_gtt_bo_to_kernel(struct kgd_dev *kgd,
return -EINVAL;
}
- /* delete kgd_mem from kfd_bo_list to avoid re-validating
- * this BO in BO's restoring after eviction.
- */
mutex_lock(&mem->process_info->lock);
ret = amdgpu_bo_reserve(bo, true);
@@ -1618,7 +2298,6 @@ int amdgpu_amdkfd_gpuvm_map_gtt_bo_to_kernel(struct kgd_dev *kgd,
amdgpu_amdkfd_remove_eviction_fence(
bo, mem->process_info->eviction_fence);
- list_del_init(&mem->validate_list.head);
if (size)
*size = amdgpu_bo_size(bo);
@@ -1638,39 +2317,44 @@ bo_reserve_failed:
return ret;
}
-int amdgpu_amdkfd_gpuvm_get_vm_fault_info(struct kgd_dev *kgd,
- struct kfd_vm_fault_info *mem)
+/** amdgpu_amdkfd_gpuvm_map_gtt_bo_to_kernel() - Unmap a GTT BO for kernel CPU access
+ *
+ * @mem: Buffer object to be unmapped for CPU access
+ *
+ * Removes the kernel CPU mapping and unpins the BO. It does not restore the
+ * eviction fence, so this function should only be used for cleanup before the
+ * BO is destroyed.
+ */
+void amdgpu_amdkfd_gpuvm_unmap_gtt_bo_from_kernel(struct kgd_mem *mem)
{
- struct amdgpu_device *adev;
+ struct amdgpu_bo *bo = mem->bo;
+
+ (void)amdgpu_bo_reserve(bo, true);
+ amdgpu_bo_kunmap(bo);
+ amdgpu_bo_unpin(bo);
+ amdgpu_bo_unreserve(bo);
+}
- adev = (struct amdgpu_device *)kgd;
- if (atomic_read(&adev->gmc.vm_fault_info_updated) == 1) {
+int amdgpu_amdkfd_gpuvm_get_vm_fault_info(struct amdgpu_device *adev,
+ struct kfd_vm_fault_info *mem)
+{
+ if (atomic_read_acquire(&adev->gmc.vm_fault_info_updated) == 1) {
*mem = *adev->gmc.vm_fault_info;
- mb();
- atomic_set(&adev->gmc.vm_fault_info_updated, 0);
+ atomic_set_release(&adev->gmc.vm_fault_info_updated, 0);
}
return 0;
}
-int amdgpu_amdkfd_gpuvm_import_dmabuf(struct kgd_dev *kgd,
- struct dma_buf *dma_buf,
- uint64_t va, void *vm,
- struct kgd_mem **mem, uint64_t *size,
- uint64_t *mmap_offset)
+static int import_obj_create(struct amdgpu_device *adev,
+ struct dma_buf *dma_buf,
+ struct drm_gem_object *obj,
+ uint64_t va, void *drm_priv,
+ struct kgd_mem **mem, uint64_t *size,
+ uint64_t *mmap_offset)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)kgd;
- struct drm_gem_object *obj;
+ struct amdgpu_vm *avm = drm_priv_to_vm(drm_priv);
struct amdgpu_bo *bo;
- struct amdgpu_vm *avm = (struct amdgpu_vm *)vm;
-
- if (dma_buf->ops != &amdgpu_dmabuf_ops)
- /* Can't handle non-graphics buffers */
- return -EINVAL;
-
- obj = dma_buf->priv;
- if (drm_to_adev(obj->dev) != adev)
- /* Can't handle buffers from other devices */
- return -EINVAL;
+ int ret;
bo = gem_to_amdgpu_bo(obj);
if (!(bo->preferred_domains & (AMDGPU_GEM_DOMAIN_VRAM |
@@ -1682,13 +2366,17 @@ int amdgpu_amdkfd_gpuvm_import_dmabuf(struct kgd_dev *kgd,
if (!*mem)
return -ENOMEM;
+ ret = drm_vma_node_allow(&obj->vma_node, drm_priv);
+ if (ret)
+ goto err_free_mem;
+
if (size)
*size = amdgpu_bo_size(bo);
if (mmap_offset)
*mmap_offset = amdgpu_bo_mmap_offset(bo);
- INIT_LIST_HEAD(&(*mem)->bo_va_list);
+ INIT_LIST_HEAD(&(*mem)->attachments);
mutex_init(&(*mem)->lock);
(*mem)->alloc_flags =
@@ -1697,48 +2385,131 @@ int amdgpu_amdkfd_gpuvm_import_dmabuf(struct kgd_dev *kgd,
| KFD_IOC_ALLOC_MEM_FLAGS_WRITABLE
| KFD_IOC_ALLOC_MEM_FLAGS_EXECUTABLE;
- drm_gem_object_get(&bo->tbo.base);
+ get_dma_buf(dma_buf);
+ (*mem)->dmabuf = dma_buf;
(*mem)->bo = bo;
(*mem)->va = va;
- (*mem)->domain = (bo->preferred_domains & AMDGPU_GEM_DOMAIN_VRAM) ?
- AMDGPU_GEM_DOMAIN_VRAM : AMDGPU_GEM_DOMAIN_GTT;
+ (*mem)->domain = (bo->preferred_domains & AMDGPU_GEM_DOMAIN_VRAM) &&
+ !adev->apu_prefer_gtt ?
+ AMDGPU_GEM_DOMAIN_VRAM : AMDGPU_GEM_DOMAIN_GTT;
+
(*mem)->mapped_to_gpu_memory = 0;
(*mem)->process_info = avm->process_info;
add_kgd_mem_to_kfd_bo_list(*mem, avm->process_info, false);
amdgpu_sync_create(&(*mem)->sync);
(*mem)->is_imported = true;
+ mutex_lock(&avm->process_info->lock);
+ if (avm->process_info->eviction_fence &&
+ !dma_fence_is_signaled(&avm->process_info->eviction_fence->base))
+ ret = amdgpu_amdkfd_bo_validate_and_fence(bo, (*mem)->domain,
+ &avm->process_info->eviction_fence->base);
+ mutex_unlock(&avm->process_info->lock);
+ if (ret)
+ goto err_remove_mem;
+
return 0;
+
+err_remove_mem:
+ remove_kgd_mem_from_kfd_bo_list(*mem, avm->process_info);
+ drm_vma_node_revoke(&obj->vma_node, drm_priv);
+err_free_mem:
+ kfree(*mem);
+ return ret;
+}
+
+int amdgpu_amdkfd_gpuvm_import_dmabuf_fd(struct amdgpu_device *adev, int fd,
+ uint64_t va, void *drm_priv,
+ struct kgd_mem **mem, uint64_t *size,
+ uint64_t *mmap_offset)
+{
+ struct drm_gem_object *obj;
+ uint32_t handle;
+ int ret;
+
+ ret = drm_gem_prime_fd_to_handle(&adev->ddev, adev->kfd.client.file, fd,
+ &handle);
+ if (ret)
+ return ret;
+ obj = drm_gem_object_lookup(adev->kfd.client.file, handle);
+ if (!obj) {
+ ret = -EINVAL;
+ goto err_release_handle;
+ }
+
+ ret = import_obj_create(adev, obj->dma_buf, obj, va, drm_priv, mem, size,
+ mmap_offset);
+ if (ret)
+ goto err_put_obj;
+
+ (*mem)->gem_handle = handle;
+
+ return 0;
+
+err_put_obj:
+ drm_gem_object_put(obj);
+err_release_handle:
+ drm_gem_handle_delete(adev->kfd.client.file, handle);
+ return ret;
+}
+
+int amdgpu_amdkfd_gpuvm_export_dmabuf(struct kgd_mem *mem,
+ struct dma_buf **dma_buf)
+{
+ int ret;
+
+ mutex_lock(&mem->lock);
+ ret = kfd_mem_export_dmabuf(mem);
+ if (ret)
+ goto out;
+
+ get_dma_buf(mem->dmabuf);
+ *dma_buf = mem->dmabuf;
+out:
+ mutex_unlock(&mem->lock);
+ return ret;
}
/* Evict a userptr BO by stopping the queues if necessary
*
* Runs in MMU notifier, may be in RECLAIM_FS context. This means it
* cannot do any memory allocations, and cannot take any locks that
- * are held elsewhere while allocating memory. Therefore this is as
- * simple as possible, using atomic counters.
+ * are held elsewhere while allocating memory.
*
* It doesn't do anything to the BO itself. The real work happens in
* restore, where we get updated page addresses. This function only
* ensures that GPU access to the BO is stopped.
*/
-int amdgpu_amdkfd_evict_userptr(struct kgd_mem *mem,
- struct mm_struct *mm)
+int amdgpu_amdkfd_evict_userptr(struct mmu_interval_notifier *mni,
+ unsigned long cur_seq, struct kgd_mem *mem)
{
struct amdkfd_process_info *process_info = mem->process_info;
- int evicted_bos;
int r = 0;
- atomic_inc(&mem->invalid);
- evicted_bos = atomic_inc_return(&process_info->evicted_bos);
- if (evicted_bos == 1) {
+ /* Do not process MMU notifications during CRIU restore until
+ * KFD_CRIU_OP_RESUME IOCTL is received
+ */
+ if (READ_ONCE(process_info->block_mmu_notifications))
+ return 0;
+
+ mutex_lock(&process_info->notifier_lock);
+ mmu_interval_set_seq(mni, cur_seq);
+
+ mem->invalid++;
+ if (++process_info->evicted_bos == 1) {
/* First eviction, stop the queues */
- r = kgd2kfd_quiesce_mm(mm);
- if (r)
+ r = kgd2kfd_quiesce_mm(mni->mm,
+ KFD_QUEUE_EVICTION_TRIGGER_USERPTR);
+
+ if (r && r != -ESRCH)
pr_err("Failed to quiesce KFD\n");
- schedule_delayed_work(&process_info->restore_userptr_work,
- msecs_to_jiffies(AMDGPU_USERPTR_RESTORE_DELAY_MS));
+
+ if (r != -ESRCH)
+ queue_delayed_work(system_freezable_wq,
+ &process_info->restore_userptr_work,
+ msecs_to_jiffies(AMDGPU_USERPTR_RESTORE_DELAY_MS));
}
+ mutex_unlock(&process_info->notifier_lock);
return r;
}
@@ -1755,137 +2526,169 @@ static int update_invalid_user_pages(struct amdkfd_process_info *process_info,
struct kgd_mem *mem, *tmp_mem;
struct amdgpu_bo *bo;
struct ttm_operation_ctx ctx = { false, false };
- int invalid, ret;
+ uint32_t invalid;
+ int ret = 0;
- /* Move all invalidated BOs to the userptr_inval_list and
- * release their user pages by migration to the CPU domain
- */
+ mutex_lock(&process_info->notifier_lock);
+
+ /* Move all invalidated BOs to the userptr_inval_list */
list_for_each_entry_safe(mem, tmp_mem,
&process_info->userptr_valid_list,
- validate_list.head) {
- if (!atomic_read(&mem->invalid))
- continue; /* BO is still valid */
-
- bo = mem->bo;
-
- if (amdgpu_bo_reserve(bo, true))
- return -EAGAIN;
- amdgpu_bo_placement_from_domain(bo, AMDGPU_GEM_DOMAIN_CPU);
- ret = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx);
- amdgpu_bo_unreserve(bo);
- if (ret) {
- pr_err("%s: Failed to invalidate userptr BO\n",
- __func__);
- return -EAGAIN;
- }
-
- list_move_tail(&mem->validate_list.head,
- &process_info->userptr_inval_list);
- }
-
- if (list_empty(&process_info->userptr_inval_list))
- return 0; /* All evicted userptr BOs were freed */
+ validate_list)
+ if (mem->invalid)
+ list_move_tail(&mem->validate_list,
+ &process_info->userptr_inval_list);
/* Go through userptr_inval_list and update any invalid user_pages */
list_for_each_entry(mem, &process_info->userptr_inval_list,
- validate_list.head) {
- invalid = atomic_read(&mem->invalid);
+ validate_list) {
+ invalid = mem->invalid;
if (!invalid)
/* BO hasn't been invalidated since the last
- * revalidation attempt. Keep its BO list.
+ * revalidation attempt. Keep its page list.
*/
continue;
bo = mem->bo;
+ amdgpu_hmm_range_free(mem->range);
+ mem->range = NULL;
+
+ /* BO reservations and getting user pages (hmm_range_fault)
+ * must happen outside the notifier lock
+ */
+ mutex_unlock(&process_info->notifier_lock);
+
+ /* Move the BO to system (CPU) domain if necessary to unmap
+ * and free the SG table
+ */
+ if (bo->tbo.resource->mem_type != TTM_PL_SYSTEM) {
+ if (amdgpu_bo_reserve(bo, true))
+ return -EAGAIN;
+ amdgpu_bo_placement_from_domain(bo, AMDGPU_GEM_DOMAIN_CPU);
+ ret = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx);
+ amdgpu_bo_unreserve(bo);
+ if (ret) {
+ pr_err("%s: Failed to invalidate userptr BO\n",
+ __func__);
+ return -EAGAIN;
+ }
+ }
+
+ mem->range = amdgpu_hmm_range_alloc(NULL);
+ if (unlikely(!mem->range))
+ return -ENOMEM;
/* Get updated user pages */
- ret = amdgpu_ttm_tt_get_user_pages(bo, bo->tbo.ttm->pages);
+ ret = amdgpu_ttm_tt_get_user_pages(bo, mem->range);
if (ret) {
- pr_debug("%s: Failed to get user pages: %d\n",
- __func__, ret);
+ amdgpu_hmm_range_free(mem->range);
+ mem->range = NULL;
+ pr_debug("Failed %d to get user pages\n", ret);
+
+ /* Return -EFAULT bad address error as success. It will
+ * fail later with a VM fault if the GPU tries to access
+ * it. Better than hanging indefinitely with stalled
+ * user mode queues.
+ *
+ * Return other error -EBUSY or -ENOMEM to retry restore
+ */
+ if (ret != -EFAULT)
+ return ret;
- /* Return error -EBUSY or -ENOMEM, retry restore */
- return ret;
+ /* If applications unmap memory before destroying the userptr
+ * from the KFD, trigger a segmentation fault in VM debug mode.
+ */
+ if (amdgpu_ttm_adev(bo->tbo.bdev)->debug_vm_userptr) {
+ struct kfd_process *p;
+
+ pr_err("Pid %d unmapped memory before destroying userptr at GPU addr 0x%llx\n",
+ pid_nr(process_info->pid), mem->va);
+
+ // Send GPU VM fault to user space
+ p = kfd_lookup_process_by_pid(process_info->pid);
+ if (p) {
+ kfd_signal_vm_fault_event_with_userptr(p, mem->va);
+ kfd_unref_process(p);
+ }
+ }
+
+ ret = 0;
}
- /*
- * FIXME: Cannot ignore the return code, must hold
- * notifier_lock
- */
- amdgpu_ttm_tt_get_user_pages_done(bo->tbo.ttm);
+ amdgpu_ttm_tt_set_user_pages(bo->tbo.ttm, mem->range);
+
+ mutex_lock(&process_info->notifier_lock);
/* Mark the BO as valid unless it was invalidated
* again concurrently.
*/
- if (atomic_cmpxchg(&mem->invalid, invalid, 0) != invalid)
- return -EAGAIN;
+ if (mem->invalid != invalid) {
+ ret = -EAGAIN;
+ goto unlock_out;
+ }
+ /* set mem valid if mem has hmm range associated */
+ if (mem->range)
+ mem->invalid = 0;
}
- return 0;
+unlock_out:
+ mutex_unlock(&process_info->notifier_lock);
+
+ return ret;
}
/* Validate invalid userptr BOs
*
- * Validates BOs on the userptr_inval_list, and moves them back to the
- * userptr_valid_list. Also updates GPUVM page tables with new page
- * addresses and waits for the page table updates to complete.
+ * Validates BOs on the userptr_inval_list. Also updates GPUVM page tables
+ * with new page addresses and waits for the page table updates to complete.
*/
static int validate_invalid_user_pages(struct amdkfd_process_info *process_info)
{
- struct amdgpu_bo_list_entry *pd_bo_list_entries;
- struct list_head resv_list, duplicates;
- struct ww_acquire_ctx ticket;
+ struct ttm_operation_ctx ctx = { false, false };
struct amdgpu_sync sync;
+ struct drm_exec exec;
struct amdgpu_vm *peer_vm;
struct kgd_mem *mem, *tmp_mem;
struct amdgpu_bo *bo;
- struct ttm_operation_ctx ctx = { false, false };
- int i, ret;
-
- pd_bo_list_entries = kcalloc(process_info->n_vms,
- sizeof(struct amdgpu_bo_list_entry),
- GFP_KERNEL);
- if (!pd_bo_list_entries) {
- pr_err("%s: Failed to allocate PD BO list entries\n", __func__);
- ret = -ENOMEM;
- goto out_no_mem;
- }
-
- INIT_LIST_HEAD(&resv_list);
- INIT_LIST_HEAD(&duplicates);
+ int ret;
- /* Get all the page directory BOs that need to be reserved */
- i = 0;
- list_for_each_entry(peer_vm, &process_info->vm_list_head,
- vm_list_node)
- amdgpu_vm_get_pd_bo(peer_vm, &resv_list,
- &pd_bo_list_entries[i++]);
- /* Add the userptr_inval_list entries to resv_list */
- list_for_each_entry(mem, &process_info->userptr_inval_list,
- validate_list.head) {
- list_add_tail(&mem->resv_list.head, &resv_list);
- mem->resv_list.bo = mem->validate_list.bo;
- mem->resv_list.num_shared = mem->validate_list.num_shared;
- }
+ amdgpu_sync_create(&sync);
+ drm_exec_init(&exec, 0, 0);
/* Reserve all BOs and page tables for validation */
- ret = ttm_eu_reserve_buffers(&ticket, &resv_list, false, &duplicates);
- WARN(!list_empty(&duplicates), "Duplicates should be empty");
- if (ret)
- goto out_free;
+ drm_exec_until_all_locked(&exec) {
+ /* Reserve all the page directories */
+ list_for_each_entry(peer_vm, &process_info->vm_list_head,
+ vm_list_node) {
+ ret = amdgpu_vm_lock_pd(peer_vm, &exec, 2);
+ drm_exec_retry_on_contention(&exec);
+ if (unlikely(ret))
+ goto unreserve_out;
+ }
- amdgpu_sync_create(&sync);
+ /* Reserve the userptr_inval_list entries to resv_list */
+ list_for_each_entry(mem, &process_info->userptr_inval_list,
+ validate_list) {
+ struct drm_gem_object *gobj;
- ret = process_validate_vms(process_info);
+ gobj = &mem->bo->tbo.base;
+ ret = drm_exec_prepare_obj(&exec, gobj, 1);
+ drm_exec_retry_on_contention(&exec);
+ if (unlikely(ret))
+ goto unreserve_out;
+ }
+ }
+
+ ret = process_validate_vms(process_info, NULL);
if (ret)
goto unreserve_out;
/* Validate BOs and update GPUVM page tables */
list_for_each_entry_safe(mem, tmp_mem,
&process_info->userptr_inval_list,
- validate_list.head) {
- struct kfd_bo_va_list *bo_va_entry;
+ validate_list) {
+ struct kfd_mem_attachment *attachment;
bo = mem->bo;
@@ -1899,26 +2702,24 @@ static int validate_invalid_user_pages(struct amdkfd_process_info *process_info)
}
}
- list_move_tail(&mem->validate_list.head,
- &process_info->userptr_valid_list);
-
/* Update mapping. If the BO was not validated
* (because we couldn't get user pages), this will
* clear the page table entries, which will result in
* VM faults if the GPU tries to access the invalid
* memory.
*/
- list_for_each_entry(bo_va_entry, &mem->bo_va_list, bo_list) {
- if (!bo_va_entry->is_mapped)
+ list_for_each_entry(attachment, &mem->attachments, list) {
+ if (!attachment->is_mapped)
continue;
- ret = update_gpuvm_pte((struct amdgpu_device *)
- bo_va_entry->kgd_dev,
- bo_va_entry, &sync);
+ kfd_mem_dmaunmap_attachment(mem, attachment);
+ ret = update_gpuvm_pte(mem, attachment, &sync);
if (ret) {
pr_err("%s: update PTE failed\n", __func__);
/* make sure this gets validated again */
- atomic_inc(&mem->invalid);
+ mutex_lock(&process_info->notifier_lock);
+ mem->invalid++;
+ mutex_unlock(&process_info->notifier_lock);
goto unreserve_out;
}
}
@@ -1928,12 +2729,51 @@ static int validate_invalid_user_pages(struct amdkfd_process_info *process_info)
ret = process_update_pds(process_info, &sync);
unreserve_out:
- ttm_eu_backoff_reservation(&ticket, &resv_list);
+ drm_exec_fini(&exec);
amdgpu_sync_wait(&sync, false);
amdgpu_sync_free(&sync);
-out_free:
- kfree(pd_bo_list_entries);
-out_no_mem:
+
+ return ret;
+}
+
+/* Confirm that all user pages are valid while holding the notifier lock
+ *
+ * Moves valid BOs from the userptr_inval_list back to userptr_val_list.
+ */
+static int confirm_valid_user_pages_locked(struct amdkfd_process_info *process_info)
+{
+ struct kgd_mem *mem, *tmp_mem;
+ int ret = 0;
+
+ list_for_each_entry_safe(mem, tmp_mem,
+ &process_info->userptr_inval_list,
+ validate_list) {
+ bool valid;
+
+ /* keep mem without hmm range at userptr_inval_list */
+ if (!mem->range)
+ continue;
+
+ /* Only check mem with hmm range associated */
+ valid = amdgpu_hmm_range_valid(mem->range);
+ amdgpu_hmm_range_free(mem->range);
+
+ mem->range = NULL;
+ if (!valid) {
+ WARN(!mem->invalid, "Invalid BO not marked invalid");
+ ret = -EAGAIN;
+ continue;
+ }
+
+ if (mem->invalid) {
+ WARN(1, "Valid BO is marked invalid");
+ ret = -EAGAIN;
+ continue;
+ }
+
+ list_move_tail(&mem->validate_list,
+ &process_info->userptr_valid_list);
+ }
return ret;
}
@@ -1952,9 +2792,11 @@ static void amdgpu_amdkfd_restore_userptr_worker(struct work_struct *work)
restore_userptr_work);
struct task_struct *usertask;
struct mm_struct *mm;
- int evicted_bos;
+ uint32_t evicted_bos;
- evicted_bos = atomic_read(&process_info->evicted_bos);
+ mutex_lock(&process_info->notifier_lock);
+ evicted_bos = process_info->evicted_bos;
+ mutex_unlock(&process_info->notifier_lock);
if (!evicted_bos)
return;
@@ -1977,9 +2819,6 @@ static void amdgpu_amdkfd_restore_userptr_worker(struct work_struct *work)
* and we can just restart the queues.
*/
if (!list_empty(&process_info->userptr_inval_list)) {
- if (atomic_read(&process_info->evicted_bos) != evicted_bos)
- goto unlock_out; /* Concurrent eviction, try again */
-
if (validate_invalid_user_pages(process_info))
goto unlock_out;
}
@@ -1988,10 +2827,17 @@ static void amdgpu_amdkfd_restore_userptr_worker(struct work_struct *work)
* be a first eviction that calls quiesce_mm. The eviction
* reference counting inside KFD will handle this case.
*/
- if (atomic_cmpxchg(&process_info->evicted_bos, evicted_bos, 0) !=
- evicted_bos)
- goto unlock_out;
- evicted_bos = 0;
+ mutex_lock(&process_info->notifier_lock);
+ if (process_info->evicted_bos != evicted_bos)
+ goto unlock_notifier_out;
+
+ if (confirm_valid_user_pages_locked(process_info)) {
+ WARN(1, "User pages unexpectedly invalid");
+ goto unlock_notifier_out;
+ }
+
+ process_info->evicted_bos = evicted_bos = 0;
+
if (kgd2kfd_resume_mm(mm)) {
pr_err("%s: Failed to resume KFD\n", __func__);
/* No recovery from this failure. Probably the CP is
@@ -1999,15 +2845,38 @@ static void amdgpu_amdkfd_restore_userptr_worker(struct work_struct *work)
*/
}
+unlock_notifier_out:
+ mutex_unlock(&process_info->notifier_lock);
unlock_out:
mutex_unlock(&process_info->lock);
- mmput(mm);
- put_task_struct(usertask);
/* If validation failed, reschedule another attempt */
- if (evicted_bos)
- schedule_delayed_work(&process_info->restore_userptr_work,
+ if (evicted_bos) {
+ queue_delayed_work(system_freezable_wq,
+ &process_info->restore_userptr_work,
msecs_to_jiffies(AMDGPU_USERPTR_RESTORE_DELAY_MS));
+
+ kfd_smi_event_queue_restore_rescheduled(mm);
+ }
+ mmput(mm);
+ put_task_struct(usertask);
+}
+
+static void replace_eviction_fence(struct dma_fence __rcu **ef,
+ struct dma_fence *new_ef)
+{
+ struct dma_fence *old_ef = rcu_replace_pointer(*ef, new_ef, true
+ /* protected by process_info->lock */);
+
+ /* If we're replacing an unsignaled eviction fence, that fence will
+ * never be signaled, and if anyone is still waiting on that fence,
+ * they will hang forever. This should never happen. We should only
+ * replace the fence in restore_work that only gets scheduled after
+ * eviction work signaled the fence.
+ */
+ WARN_ONCE(!dma_fence_is_signaled(old_ef),
+ "Replacing unsignaled eviction fence");
+ dma_fence_put(old_ef);
}
/** amdgpu_amdkfd_gpuvm_restore_process_bos - Restore all BOs for the given
@@ -2028,74 +2897,61 @@ unlock_out:
* 7. Add fence to all PD and PT BOs.
* 8. Unreserve all BOs
*/
-int amdgpu_amdkfd_gpuvm_restore_process_bos(void *info, struct dma_fence **ef)
+int amdgpu_amdkfd_gpuvm_restore_process_bos(void *info, struct dma_fence __rcu **ef)
{
- struct amdgpu_bo_list_entry *pd_bo_list;
struct amdkfd_process_info *process_info = info;
struct amdgpu_vm *peer_vm;
struct kgd_mem *mem;
- struct bo_vm_reservation_context ctx;
- struct amdgpu_amdkfd_fence *new_fence;
- int ret = 0, i;
struct list_head duplicate_save;
struct amdgpu_sync sync_obj;
unsigned long failed_size = 0;
unsigned long total_size = 0;
+ struct drm_exec exec;
+ int ret;
INIT_LIST_HEAD(&duplicate_save);
- INIT_LIST_HEAD(&ctx.list);
- INIT_LIST_HEAD(&ctx.duplicates);
-
- pd_bo_list = kcalloc(process_info->n_vms,
- sizeof(struct amdgpu_bo_list_entry),
- GFP_KERNEL);
- if (!pd_bo_list)
- return -ENOMEM;
- i = 0;
mutex_lock(&process_info->lock);
- list_for_each_entry(peer_vm, &process_info->vm_list_head,
- vm_list_node)
- amdgpu_vm_get_pd_bo(peer_vm, &ctx.list, &pd_bo_list[i++]);
- /* Reserve all BOs and page tables/directory. Add all BOs from
- * kfd_bo_list to ctx.list
- */
- list_for_each_entry(mem, &process_info->kfd_bo_list,
- validate_list.head) {
-
- list_add_tail(&mem->resv_list.head, &ctx.list);
- mem->resv_list.bo = mem->validate_list.bo;
- mem->resv_list.num_shared = mem->validate_list.num_shared;
- }
+ drm_exec_init(&exec, DRM_EXEC_IGNORE_DUPLICATES, 0);
+ drm_exec_until_all_locked(&exec) {
+ list_for_each_entry(peer_vm, &process_info->vm_list_head,
+ vm_list_node) {
+ ret = amdgpu_vm_lock_pd(peer_vm, &exec, 2);
+ drm_exec_retry_on_contention(&exec);
+ if (unlikely(ret)) {
+ pr_err("Locking VM PD failed, ret: %d\n", ret);
+ goto ttm_reserve_fail;
+ }
+ }
- ret = ttm_eu_reserve_buffers(&ctx.ticket, &ctx.list,
- false, &duplicate_save);
- if (ret) {
- pr_debug("Memory eviction: TTM Reserve Failed. Try again\n");
- goto ttm_reserve_fail;
+ /* Reserve all BOs and page tables/directory. Add all BOs from
+ * kfd_bo_list to ctx.list
+ */
+ list_for_each_entry(mem, &process_info->kfd_bo_list,
+ validate_list) {
+ struct drm_gem_object *gobj;
+
+ gobj = &mem->bo->tbo.base;
+ ret = drm_exec_prepare_obj(&exec, gobj, 1);
+ drm_exec_retry_on_contention(&exec);
+ if (unlikely(ret)) {
+ pr_err("drm_exec_prepare_obj failed, ret: %d\n", ret);
+ goto ttm_reserve_fail;
+ }
+ }
}
amdgpu_sync_create(&sync_obj);
- /* Validate PDs and PTs */
- ret = process_validate_vms(process_info);
- if (ret)
- goto validate_map_fail;
-
- ret = process_sync_pds_resv(process_info, &sync_obj);
- if (ret) {
- pr_debug("Memory eviction: Failed to sync to PD BO moving fence. Try again\n");
- goto validate_map_fail;
- }
-
- /* Validate BOs and map them to GPUVM (update VM page tables). */
+ /* Validate BOs managed by KFD */
list_for_each_entry(mem, &process_info->kfd_bo_list,
- validate_list.head) {
+ validate_list) {
struct amdgpu_bo *bo = mem->bo;
uint32_t domain = mem->domain;
- struct kfd_bo_va_list *bo_va_entry;
+ struct dma_resv_iter cursor;
+ struct dma_fence *fence;
total_size += amdgpu_bo_size(bo);
@@ -2110,19 +2966,11 @@ int amdgpu_amdkfd_gpuvm_restore_process_bos(void *info, struct dma_fence **ef)
goto validate_map_fail;
}
}
- ret = amdgpu_sync_fence(&sync_obj, bo->tbo.moving);
- if (ret) {
- pr_debug("Memory eviction: Sync BO fence failed. Try again\n");
- goto validate_map_fail;
- }
- list_for_each_entry(bo_va_entry, &mem->bo_va_list,
- bo_list) {
- ret = update_gpuvm_pte((struct amdgpu_device *)
- bo_va_entry->kgd_dev,
- bo_va_entry,
- &sync_obj);
+ dma_resv_for_each_fence(&cursor, bo->tbo.base.resv,
+ DMA_RESV_USAGE_KERNEL, fence) {
+ ret = amdgpu_sync_fence(&sync_obj, fence, GFP_KERNEL);
if (ret) {
- pr_debug("Memory eviction: update PTE failed. Try again\n");
+ pr_debug("Memory eviction: Sync BO fence failed. Try again\n");
goto validate_map_fail;
}
}
@@ -2131,6 +2979,59 @@ int amdgpu_amdkfd_gpuvm_restore_process_bos(void *info, struct dma_fence **ef)
if (failed_size)
pr_debug("0x%lx/0x%lx in system\n", failed_size, total_size);
+ /* Validate PDs, PTs and evicted DMABuf imports last. Otherwise BO
+ * validations above would invalidate DMABuf imports again.
+ */
+ ret = process_validate_vms(process_info, &exec.ticket);
+ if (ret) {
+ pr_debug("Validating VMs failed, ret: %d\n", ret);
+ goto validate_map_fail;
+ }
+
+ /* Update mappings managed by KFD. */
+ list_for_each_entry(mem, &process_info->kfd_bo_list,
+ validate_list) {
+ struct kfd_mem_attachment *attachment;
+
+ list_for_each_entry(attachment, &mem->attachments, list) {
+ if (!attachment->is_mapped)
+ continue;
+
+ kfd_mem_dmaunmap_attachment(mem, attachment);
+ ret = update_gpuvm_pte(mem, attachment, &sync_obj);
+ if (ret) {
+ pr_debug("Memory eviction: update PTE failed. Try again\n");
+ goto validate_map_fail;
+ }
+ }
+ }
+
+ /* Update mappings not managed by KFD */
+ list_for_each_entry(peer_vm, &process_info->vm_list_head,
+ vm_list_node) {
+ struct amdgpu_device *adev = amdgpu_ttm_adev(
+ peer_vm->root.bo->tbo.bdev);
+
+ struct amdgpu_fpriv *fpriv =
+ container_of(peer_vm, struct amdgpu_fpriv, vm);
+
+ ret = amdgpu_vm_bo_update(adev, fpriv->prt_va, false);
+ if (ret) {
+ dev_dbg(adev->dev,
+ "Memory eviction: handle PRT moved failed, pid %8d. Try again.\n",
+ pid_nr(process_info->pid));
+ goto validate_map_fail;
+ }
+
+ ret = amdgpu_vm_handle_moved(adev, peer_vm, &exec.ticket);
+ if (ret) {
+ dev_dbg(adev->dev,
+ "Memory eviction: handle moved failed, pid %8d. Try again.\n",
+ pid_nr(process_info->pid));
+ goto validate_map_fail;
+ }
+ }
+
/* Update page directories */
ret = process_update_pds(process_info, &sync_obj);
if (ret) {
@@ -2138,45 +3039,72 @@ int amdgpu_amdkfd_gpuvm_restore_process_bos(void *info, struct dma_fence **ef)
goto validate_map_fail;
}
+ /* Sync with fences on all the page tables. They implicitly depend on any
+ * move fences from amdgpu_vm_handle_moved above.
+ */
+ ret = process_sync_pds_resv(process_info, &sync_obj);
+ if (ret) {
+ pr_debug("Memory eviction: Failed to sync to PD BO moving fence. Try again\n");
+ goto validate_map_fail;
+ }
+
/* Wait for validate and PT updates to finish */
amdgpu_sync_wait(&sync_obj, false);
- /* Release old eviction fence and create new one, because fence only
- * goes from unsignaled to signaled, fence cannot be reused.
- * Use context and mm from the old fence.
+ /* The old eviction fence may be unsignaled if restore happens
+ * after a GPU reset or suspend/resume. Keep the old fence in that
+ * case. Otherwise release the old eviction fence and create new
+ * one, because fence only goes from unsignaled to signaled once
+ * and cannot be reused. Use context and mm from the old fence.
+ *
+ * If an old eviction fence signals after this check, that's OK.
+ * Anyone signaling an eviction fence must stop the queues first
+ * and schedule another restore worker.
*/
- new_fence = amdgpu_amdkfd_fence_create(
+ if (dma_fence_is_signaled(&process_info->eviction_fence->base)) {
+ struct amdgpu_amdkfd_fence *new_fence =
+ amdgpu_amdkfd_fence_create(
process_info->eviction_fence->base.context,
- process_info->eviction_fence->mm);
- if (!new_fence) {
- pr_err("Failed to create eviction fence\n");
- ret = -ENOMEM;
- goto validate_map_fail;
+ process_info->eviction_fence->mm,
+ NULL);
+
+ if (!new_fence) {
+ pr_err("Failed to create eviction fence\n");
+ ret = -ENOMEM;
+ goto validate_map_fail;
+ }
+ dma_fence_put(&process_info->eviction_fence->base);
+ process_info->eviction_fence = new_fence;
+ replace_eviction_fence(ef, dma_fence_get(&new_fence->base));
+ } else {
+ WARN_ONCE(*ef != &process_info->eviction_fence->base,
+ "KFD eviction fence doesn't match KGD process_info");
}
- dma_fence_put(&process_info->eviction_fence->base);
- process_info->eviction_fence = new_fence;
- *ef = dma_fence_get(&new_fence->base);
- /* Attach new eviction fence to all BOs */
- list_for_each_entry(mem, &process_info->kfd_bo_list,
- validate_list.head)
- amdgpu_bo_fence(mem->bo,
- &process_info->eviction_fence->base, true);
+ /* Attach new eviction fence to all BOs except pinned ones */
+ list_for_each_entry(mem, &process_info->kfd_bo_list, validate_list) {
+ if (mem->bo->tbo.pin_count)
+ continue;
- /* Attach eviction fence to PD / PT BOs */
+ dma_resv_add_fence(mem->bo->tbo.base.resv,
+ &process_info->eviction_fence->base,
+ DMA_RESV_USAGE_BOOKKEEP);
+ }
+ /* Attach eviction fence to PD / PT BOs and DMABuf imports */
list_for_each_entry(peer_vm, &process_info->vm_list_head,
vm_list_node) {
- struct amdgpu_bo *bo = peer_vm->root.base.bo;
+ struct amdgpu_bo *bo = peer_vm->root.bo;
- amdgpu_bo_fence(bo, &process_info->eviction_fence->base, true);
+ dma_resv_add_fence(bo->tbo.base.resv,
+ &process_info->eviction_fence->base,
+ DMA_RESV_USAGE_BOOKKEEP);
}
validate_map_fail:
- ttm_eu_backoff_reservation(&ctx.ticket, &ctx.list);
amdgpu_sync_free(&sync_obj);
ttm_reserve_fail:
+ drm_exec_fini(&exec);
mutex_unlock(&process_info->lock);
- kfree(pd_bo_list);
return ret;
}
@@ -2194,7 +3122,7 @@ int amdgpu_amdkfd_add_gws_to_process(void *info, void *gws, struct kgd_mem **mem
return -ENOMEM;
mutex_init(&(*mem)->lock);
- INIT_LIST_HEAD(&(*mem)->bo_va_list);
+ INIT_LIST_HEAD(&(*mem)->attachments);
(*mem)->bo = amdgpu_bo_ref(gws_bo);
(*mem)->domain = AMDGPU_GEM_DOMAIN_GWS;
(*mem)->process_info = process_info;
@@ -2219,10 +3147,12 @@ int amdgpu_amdkfd_add_gws_to_process(void *info, void *gws, struct kgd_mem **mem
* Add process eviction fence to bo so they can
* evict each other.
*/
- ret = dma_resv_reserve_shared(gws_bo->tbo.base.resv, 1);
+ ret = dma_resv_reserve_fences(gws_bo->tbo.base.resv, 1);
if (ret)
goto reserve_shared_fail;
- amdgpu_bo_fence(gws_bo, &process_info->eviction_fence->base, true);
+ dma_resv_add_fence(gws_bo->tbo.base.resv,
+ &process_info->eviction_fence->base,
+ DMA_RESV_USAGE_BOOKKEEP);
amdgpu_bo_unreserve(gws_bo);
mutex_unlock(&(*mem)->process_info->lock);
@@ -2271,11 +3201,9 @@ int amdgpu_amdkfd_remove_gws_from_process(void *info, void *mem)
}
/* Returns GPU-specific tiling mode information */
-int amdgpu_amdkfd_get_tile_config(struct kgd_dev *kgd,
+int amdgpu_amdkfd_get_tile_config(struct amdgpu_device *adev,
struct tile_config *config)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)kgd;
-
config->gb_addr_config = adev->gfx.config.gb_addr_config;
config->tile_config_ptr = adev->gfx.config.tile_mode_array;
config->num_tile_configs =
@@ -2291,3 +3219,34 @@ int amdgpu_amdkfd_get_tile_config(struct kgd_dev *kgd,
return 0;
}
+
+bool amdgpu_amdkfd_bo_mapped_to_dev(void *drm_priv, struct kgd_mem *mem)
+{
+ struct amdgpu_vm *vm = drm_priv_to_vm(drm_priv);
+ struct kfd_mem_attachment *entry;
+
+ list_for_each_entry(entry, &mem->attachments, list) {
+ if (entry->is_mapped && entry->bo_va->base.vm == vm)
+ return true;
+ }
+ return false;
+}
+
+#if defined(CONFIG_DEBUG_FS)
+
+int kfd_debugfs_kfd_mem_limits(struct seq_file *m, void *data)
+{
+
+ spin_lock(&kfd_mem_limit.mem_limit_lock);
+ seq_printf(m, "System mem used %lldM out of %lluM\n",
+ (kfd_mem_limit.system_mem_used >> 20),
+ (kfd_mem_limit.max_system_mem_limit >> 20));
+ seq_printf(m, "TTM mem used %lldM out of %lluM\n",
+ (kfd_mem_limit.ttm_mem_used >> 20),
+ (kfd_mem_limit.max_ttm_mem_limit >> 20));
+ spin_unlock(&kfd_mem_limit.mem_limit_lock);
+
+ return 0;
+}
+
+#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.c
index 86add0f4ea4d..763f2b8dcf13 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.c
@@ -36,13 +36,6 @@
#include "atombios_encoders.h"
#include "bif/bif_4_1_d.h"
-static void amdgpu_atombios_lookup_i2c_gpio_quirks(struct amdgpu_device *adev,
- ATOM_GPIO_I2C_ASSIGMENT *gpio,
- u8 index)
-{
-
-}
-
static struct amdgpu_i2c_bus_rec amdgpu_atombios_get_bus_rec_for_i2c_gpio(ATOM_GPIO_I2C_ASSIGMENT *gpio)
{
struct amdgpu_i2c_bus_rec i2c;
@@ -108,9 +101,6 @@ struct amdgpu_i2c_bus_rec amdgpu_atombios_lookup_i2c_gpio(struct amdgpu_device *
gpio = &i2c_info->asGPIO_Info[0];
for (i = 0; i < num_indices; i++) {
-
- amdgpu_atombios_lookup_i2c_gpio_quirks(adev, gpio, i);
-
if (gpio->sucI2cId.ucAccess == id) {
i2c = amdgpu_atombios_get_bus_rec_for_i2c_gpio(gpio);
break;
@@ -142,8 +132,6 @@ void amdgpu_atombios_i2c_init(struct amdgpu_device *adev)
gpio = &i2c_info->asGPIO_Info[0];
for (i = 0; i < num_indices; i++) {
- amdgpu_atombios_lookup_i2c_gpio_quirks(adev, gpio, i);
-
i2c = amdgpu_atombios_get_bus_rec_for_i2c_gpio(gpio);
if (i2c.valid) {
@@ -156,6 +144,38 @@ void amdgpu_atombios_i2c_init(struct amdgpu_device *adev)
}
}
+void amdgpu_atombios_oem_i2c_init(struct amdgpu_device *adev, u8 i2c_id)
+{
+ struct atom_context *ctx = adev->mode_info.atom_context;
+ ATOM_GPIO_I2C_ASSIGMENT *gpio;
+ struct amdgpu_i2c_bus_rec i2c;
+ int index = GetIndexIntoMasterTable(DATA, GPIO_I2C_Info);
+ struct _ATOM_GPIO_I2C_INFO *i2c_info;
+ uint16_t data_offset, size;
+ int i, num_indices;
+ char stmp[32];
+
+ if (amdgpu_atom_parse_data_header(ctx, index, &size, NULL, NULL, &data_offset)) {
+ i2c_info = (struct _ATOM_GPIO_I2C_INFO *)(ctx->bios + data_offset);
+
+ num_indices = (size - sizeof(ATOM_COMMON_TABLE_HEADER)) /
+ sizeof(ATOM_GPIO_I2C_ASSIGMENT);
+
+ gpio = &i2c_info->asGPIO_Info[0];
+ for (i = 0; i < num_indices; i++) {
+ i2c = amdgpu_atombios_get_bus_rec_for_i2c_gpio(gpio);
+
+ if (i2c.valid && i2c.i2c_id == i2c_id) {
+ sprintf(stmp, "OEM 0x%x", i2c.i2c_id);
+ adev->i2c_bus[i] = amdgpu_i2c_create(adev_to_drm(adev), &i2c, stmp);
+ break;
+ }
+ gpio = (ATOM_GPIO_I2C_ASSIGMENT *)
+ ((u8 *)gpio + sizeof(ATOM_GPIO_I2C_ASSIGMENT));
+ }
+ }
+}
+
struct amdgpu_gpio_rec
amdgpu_atombios_lookup_gpio(struct amdgpu_device *adev,
u8 id)
@@ -686,7 +706,6 @@ int amdgpu_atombios_get_clock_info(struct amdgpu_device *adev)
}
adev->clock.dp_extclk =
le16_to_cpu(firmware_info->info_21.usUniphyDPModeExtClkFreq);
- adev->clock.current_dispclk = adev->clock.default_dispclk;
adev->clock.max_pixel_clock = le16_to_cpu(firmware_info->info.usMaxPixelClock);
if (adev->clock.max_pixel_clock == 0)
@@ -1018,7 +1037,9 @@ int amdgpu_atombios_get_clock_dividers(struct amdgpu_device *adev,
if (clock_type == COMPUTE_ENGINE_PLL_PARAM) {
args.v3.ulClockParams = cpu_to_le32((clock_type << 24) | clock);
- amdgpu_atom_execute_table(adev->mode_info.atom_context, index, (uint32_t *)&args);
+ if (amdgpu_atom_execute_table(adev->mode_info.atom_context,
+ index, (uint32_t *)&args, sizeof(args)))
+ return -EINVAL;
dividers->post_div = args.v3.ucPostDiv;
dividers->enable_post_div = (args.v3.ucCntlFlag &
@@ -1038,7 +1059,9 @@ int amdgpu_atombios_get_clock_dividers(struct amdgpu_device *adev,
if (strobe_mode)
args.v5.ucInputFlag = ATOM_PLL_INPUT_FLAG_PLL_STROBE_MODE_EN;
- amdgpu_atom_execute_table(adev->mode_info.atom_context, index, (uint32_t *)&args);
+ if (amdgpu_atom_execute_table(adev->mode_info.atom_context,
+ index, (uint32_t *)&args, sizeof(args)))
+ return -EINVAL;
dividers->post_div = args.v5.ucPostDiv;
dividers->enable_post_div = (args.v5.ucCntlFlag &
@@ -1056,7 +1079,9 @@ int amdgpu_atombios_get_clock_dividers(struct amdgpu_device *adev,
/* fusion */
args.v4.ulClock = cpu_to_le32(clock); /* 10 khz */
- amdgpu_atom_execute_table(adev->mode_info.atom_context, index, (uint32_t *)&args);
+ if (amdgpu_atom_execute_table(adev->mode_info.atom_context,
+ index, (uint32_t *)&args, sizeof(args)))
+ return -EINVAL;
dividers->post_divider = dividers->post_div = args.v4.ucPostDiv;
dividers->real_clock = le32_to_cpu(args.v4.ulClock);
@@ -1067,7 +1092,9 @@ int amdgpu_atombios_get_clock_dividers(struct amdgpu_device *adev,
args.v6_in.ulClock.ulComputeClockFlag = clock_type;
args.v6_in.ulClock.ulClockFreq = cpu_to_le32(clock); /* 10 khz */
- amdgpu_atom_execute_table(adev->mode_info.atom_context, index, (uint32_t *)&args);
+ if (amdgpu_atom_execute_table(adev->mode_info.atom_context,
+ index, (uint32_t *)&args, sizeof(args)))
+ return -EINVAL;
dividers->whole_fb_div = le16_to_cpu(args.v6_out.ulFbDiv.usFbDiv);
dividers->frac_fb_div = le16_to_cpu(args.v6_out.ulFbDiv.usFbDivFrac);
@@ -1083,6 +1110,7 @@ int amdgpu_atombios_get_clock_dividers(struct amdgpu_device *adev,
return 0;
}
+#ifdef CONFIG_DRM_AMDGPU_SI
int amdgpu_atombios_get_memory_pll_dividers(struct amdgpu_device *adev,
u32 clock,
bool strobe_mode,
@@ -1108,7 +1136,9 @@ int amdgpu_atombios_get_memory_pll_dividers(struct amdgpu_device *adev,
if (strobe_mode)
args.ucInputFlag |= MPLL_INPUT_FLAG_STROBE_MODE_EN;
- amdgpu_atom_execute_table(adev->mode_info.atom_context, index, (uint32_t *)&args);
+ if (amdgpu_atom_execute_table(adev->mode_info.atom_context,
+ index, (uint32_t *)&args, sizeof(args)))
+ return -EINVAL;
mpll_param->clkfrac = le16_to_cpu(args.ulFbDiv.usFbDivFrac);
mpll_param->clkf = le16_to_cpu(args.ulFbDiv.usFbDiv);
@@ -1134,8 +1164,8 @@ int amdgpu_atombios_get_memory_pll_dividers(struct amdgpu_device *adev,
return 0;
}
-void amdgpu_atombios_set_engine_dram_timings(struct amdgpu_device *adev,
- u32 eng_clock, u32 mem_clock)
+int amdgpu_atombios_set_engine_dram_timings(struct amdgpu_device *adev,
+ u32 eng_clock, u32 mem_clock)
{
SET_ENGINE_CLOCK_PS_ALLOCATION args;
int index = GetIndexIntoMasterTable(COMMAND, DynamicMemorySettings);
@@ -1150,7 +1180,8 @@ void amdgpu_atombios_set_engine_dram_timings(struct amdgpu_device *adev,
if (mem_clock)
args.sReserved.ulClock = cpu_to_le32(mem_clock & SET_CLOCK_FREQ_MASK);
- amdgpu_atom_execute_table(adev->mode_info.atom_context, index, (uint32_t *)&args);
+ return amdgpu_atom_execute_table(adev->mode_info.atom_context, index,
+ (uint32_t *)&args, sizeof(args));
}
void amdgpu_atombios_get_default_voltages(struct amdgpu_device *adev,
@@ -1204,7 +1235,9 @@ int amdgpu_atombios_get_max_vddc(struct amdgpu_device *adev, u8 voltage_type,
args.v2.ucVoltageMode = 0;
args.v2.usVoltageLevel = 0;
- amdgpu_atom_execute_table(adev->mode_info.atom_context, index, (uint32_t *)&args);
+ if (amdgpu_atom_execute_table(adev->mode_info.atom_context,
+ index, (uint32_t *)&args, sizeof(args)))
+ return -EINVAL;
*voltage = le16_to_cpu(args.v2.usVoltageLevel);
break;
@@ -1213,7 +1246,9 @@ int amdgpu_atombios_get_max_vddc(struct amdgpu_device *adev, u8 voltage_type,
args.v3.ucVoltageMode = ATOM_GET_VOLTAGE_LEVEL;
args.v3.usVoltageLevel = cpu_to_le16(voltage_id);
- amdgpu_atom_execute_table(adev->mode_info.atom_context, index, (uint32_t *)&args);
+ if (amdgpu_atom_execute_table(adev->mode_info.atom_context,
+ index, (uint32_t *)&args, sizeof(args)))
+ return -EINVAL;
*voltage = le16_to_cpu(args.v3.usVoltageLevel);
break;
@@ -1232,157 +1267,6 @@ int amdgpu_atombios_get_leakage_vddc_based_on_leakage_idx(struct amdgpu_device *
return amdgpu_atombios_get_max_vddc(adev, VOLTAGE_TYPE_VDDC, leakage_idx, voltage);
}
-int amdgpu_atombios_get_leakage_id_from_vbios(struct amdgpu_device *adev,
- u16 *leakage_id)
-{
- union set_voltage args;
- int index = GetIndexIntoMasterTable(COMMAND, SetVoltage);
- u8 frev, crev;
-
- if (!amdgpu_atom_parse_cmd_header(adev->mode_info.atom_context, index, &frev, &crev))
- return -EINVAL;
-
- switch (crev) {
- case 3:
- case 4:
- args.v3.ucVoltageType = 0;
- args.v3.ucVoltageMode = ATOM_GET_LEAKAGE_ID;
- args.v3.usVoltageLevel = 0;
-
- amdgpu_atom_execute_table(adev->mode_info.atom_context, index, (uint32_t *)&args);
-
- *leakage_id = le16_to_cpu(args.v3.usVoltageLevel);
- break;
- default:
- DRM_ERROR("Unknown table version %d, %d\n", frev, crev);
- return -EINVAL;
- }
-
- return 0;
-}
-
-int amdgpu_atombios_get_leakage_vddc_based_on_leakage_params(struct amdgpu_device *adev,
- u16 *vddc, u16 *vddci,
- u16 virtual_voltage_id,
- u16 vbios_voltage_id)
-{
- int index = GetIndexIntoMasterTable(DATA, ASIC_ProfilingInfo);
- u8 frev, crev;
- u16 data_offset, size;
- int i, j;
- ATOM_ASIC_PROFILING_INFO_V2_1 *profile;
- u16 *leakage_bin, *vddc_id_buf, *vddc_buf, *vddci_id_buf, *vddci_buf;
-
- *vddc = 0;
- *vddci = 0;
-
- if (!amdgpu_atom_parse_data_header(adev->mode_info.atom_context, index, &size,
- &frev, &crev, &data_offset))
- return -EINVAL;
-
- profile = (ATOM_ASIC_PROFILING_INFO_V2_1 *)
- (adev->mode_info.atom_context->bios + data_offset);
-
- switch (frev) {
- case 1:
- return -EINVAL;
- case 2:
- switch (crev) {
- case 1:
- if (size < sizeof(ATOM_ASIC_PROFILING_INFO_V2_1))
- return -EINVAL;
- leakage_bin = (u16 *)
- (adev->mode_info.atom_context->bios + data_offset +
- le16_to_cpu(profile->usLeakageBinArrayOffset));
- vddc_id_buf = (u16 *)
- (adev->mode_info.atom_context->bios + data_offset +
- le16_to_cpu(profile->usElbVDDC_IdArrayOffset));
- vddc_buf = (u16 *)
- (adev->mode_info.atom_context->bios + data_offset +
- le16_to_cpu(profile->usElbVDDC_LevelArrayOffset));
- vddci_id_buf = (u16 *)
- (adev->mode_info.atom_context->bios + data_offset +
- le16_to_cpu(profile->usElbVDDCI_IdArrayOffset));
- vddci_buf = (u16 *)
- (adev->mode_info.atom_context->bios + data_offset +
- le16_to_cpu(profile->usElbVDDCI_LevelArrayOffset));
-
- if (profile->ucElbVDDC_Num > 0) {
- for (i = 0; i < profile->ucElbVDDC_Num; i++) {
- if (vddc_id_buf[i] == virtual_voltage_id) {
- for (j = 0; j < profile->ucLeakageBinNum; j++) {
- if (vbios_voltage_id <= leakage_bin[j]) {
- *vddc = vddc_buf[j * profile->ucElbVDDC_Num + i];
- break;
- }
- }
- break;
- }
- }
- }
- if (profile->ucElbVDDCI_Num > 0) {
- for (i = 0; i < profile->ucElbVDDCI_Num; i++) {
- if (vddci_id_buf[i] == virtual_voltage_id) {
- for (j = 0; j < profile->ucLeakageBinNum; j++) {
- if (vbios_voltage_id <= leakage_bin[j]) {
- *vddci = vddci_buf[j * profile->ucElbVDDCI_Num + i];
- break;
- }
- }
- break;
- }
- }
- }
- break;
- default:
- DRM_ERROR("Unknown table version %d, %d\n", frev, crev);
- return -EINVAL;
- }
- break;
- default:
- DRM_ERROR("Unknown table version %d, %d\n", frev, crev);
- return -EINVAL;
- }
-
- return 0;
-}
-
-union get_voltage_info {
- struct _GET_VOLTAGE_INFO_INPUT_PARAMETER_V1_2 in;
- struct _GET_EVV_VOLTAGE_INFO_OUTPUT_PARAMETER_V1_2 evv_out;
-};
-
-int amdgpu_atombios_get_voltage_evv(struct amdgpu_device *adev,
- u16 virtual_voltage_id,
- u16 *voltage)
-{
- int index = GetIndexIntoMasterTable(COMMAND, GetVoltageInfo);
- u32 entry_id;
- u32 count = adev->pm.dpm.dyn_state.vddc_dependency_on_sclk.count;
- union get_voltage_info args;
-
- for (entry_id = 0; entry_id < count; entry_id++) {
- if (adev->pm.dpm.dyn_state.vddc_dependency_on_sclk.entries[entry_id].v ==
- virtual_voltage_id)
- break;
- }
-
- if (entry_id >= count)
- return -EINVAL;
-
- args.in.ucVoltageType = VOLTAGE_TYPE_VDDC;
- args.in.ucVoltageMode = ATOM_GET_VOLTAGE_EVV_VOLTAGE;
- args.in.usVoltageLevel = cpu_to_le16(virtual_voltage_id);
- args.in.ulSCLKFreq =
- cpu_to_le32(adev->pm.dpm.dyn_state.vddc_dependency_on_sclk.entries[entry_id].clk);
-
- amdgpu_atom_execute_table(adev->mode_info.atom_context, index, (uint32_t *)&args);
-
- *voltage = le16_to_cpu(args.evv_out.usVoltageLevel);
-
- return 0;
-}
-
union voltage_object_info {
struct _ATOM_VOLTAGE_OBJECT_INFO v1;
struct _ATOM_VOLTAGE_OBJECT_INFO_V2 v2;
@@ -1626,6 +1510,8 @@ int amdgpu_atombios_init_mc_reg_table(struct amdgpu_device *adev,
(u32)le32_to_cpu(*((u32 *)reg_data + j));
j++;
} else if ((reg_table->mc_reg_address[i].pre_reg_data & LOW_NIBBLE_MASK) == DATA_EQU_PREV) {
+ if (i == 0)
+ continue;
reg_table->mc_reg_table_entry[num_ranges].mc_data[i] =
reg_table->mc_reg_table_entry[num_ranges].mc_data[i - 1];
}
@@ -1654,6 +1540,7 @@ int amdgpu_atombios_init_mc_reg_table(struct amdgpu_device *adev,
}
return -EINVAL;
}
+#endif
bool amdgpu_atombios_has_gpu_virtualization_table(struct amdgpu_device *adev)
{
@@ -1720,6 +1607,18 @@ void amdgpu_atombios_scratch_regs_engine_hung(struct amdgpu_device *adev,
WREG32(adev->bios_scratch_reg_offset + 3, tmp);
}
+void amdgpu_atombios_scratch_regs_set_backlight_level(struct amdgpu_device *adev,
+ u32 backlight_level)
+{
+ u32 tmp = RREG32(adev->bios_scratch_reg_offset + 2);
+
+ tmp &= ~ATOM_S2_CURRENT_BL_LEVEL_MASK;
+ tmp |= (backlight_level << ATOM_S2_CURRENT_BL_LEVEL_SHIFT) &
+ ATOM_S2_CURRENT_BL_LEVEL_MASK;
+
+ WREG32(adev->bios_scratch_reg_offset + 2, tmp);
+}
+
bool amdgpu_atombios_scratch_need_asic_init(struct amdgpu_device *adev)
{
u32 tmp = RREG32(adev->bios_scratch_reg_offset + 7);
@@ -1905,53 +1804,64 @@ static uint32_t cail_reg_read(struct card_info *info, uint32_t reg)
return r;
}
-/**
- * cail_ioreg_write - write IO register
- *
- * @info: atom card_info pointer
- * @reg: IO register offset
- * @val: value to write to the pll register
- *
- * Provides a IO register accessor for the atom interpreter (r4xx+).
- */
-static void cail_ioreg_write(struct card_info *info, uint32_t reg, uint32_t val)
+static ssize_t amdgpu_atombios_get_vbios_version(struct device *dev,
+ struct device_attribute *attr,
+ char *buf)
{
- struct amdgpu_device *adev = drm_to_adev(info->dev);
+ struct drm_device *ddev = dev_get_drvdata(dev);
+ struct amdgpu_device *adev = drm_to_adev(ddev);
+ struct atom_context *ctx = adev->mode_info.atom_context;
- WREG32_IO(reg, val);
+ return sysfs_emit(buf, "%s\n", ctx->vbios_pn);
}
-/**
- * cail_ioreg_read - read IO register
- *
- * @info: atom card_info pointer
- * @reg: IO register offset
- *
- * Provides an IO register accessor for the atom interpreter (r4xx+).
- * Returns the value of the IO register.
- */
-static uint32_t cail_ioreg_read(struct card_info *info, uint32_t reg)
+static ssize_t amdgpu_atombios_get_vbios_build(struct device *dev,
+ struct device_attribute *attr,
+ char *buf)
{
- struct amdgpu_device *adev = drm_to_adev(info->dev);
- uint32_t r;
+ struct drm_device *ddev = dev_get_drvdata(dev);
+ struct amdgpu_device *adev = drm_to_adev(ddev);
+ struct atom_context *ctx = adev->mode_info.atom_context;
- r = RREG32_IO(reg);
- return r;
+ return sysfs_emit(buf, "%s\n", ctx->build_num);
}
-static ssize_t amdgpu_atombios_get_vbios_version(struct device *dev,
- struct device_attribute *attr,
- char *buf)
+static DEVICE_ATTR(vbios_version, 0444, amdgpu_atombios_get_vbios_version,
+ NULL);
+static DEVICE_ATTR(vbios_build, 0444, amdgpu_atombios_get_vbios_build, NULL);
+
+static struct attribute *amdgpu_vbios_version_attrs[] = {
+ &dev_attr_vbios_version.attr, &dev_attr_vbios_build.attr, NULL
+};
+
+static umode_t amdgpu_vbios_version_attrs_is_visible(struct kobject *kobj,
+ struct attribute *attr,
+ int index)
{
+ struct device *dev = kobj_to_dev(kobj);
struct drm_device *ddev = dev_get_drvdata(dev);
struct amdgpu_device *adev = drm_to_adev(ddev);
struct atom_context *ctx = adev->mode_info.atom_context;
- return snprintf(buf, PAGE_SIZE, "%s\n", ctx->vbios_version);
+ if (attr == &dev_attr_vbios_build.attr && !strlen(ctx->build_num))
+ return 0;
+
+ return attr->mode;
}
-static DEVICE_ATTR(vbios_version, 0444, amdgpu_atombios_get_vbios_version,
- NULL);
+const struct attribute_group amdgpu_vbios_version_attr_group = {
+ .attrs = amdgpu_vbios_version_attrs,
+ .is_visible = amdgpu_vbios_version_attrs_is_visible,
+};
+
+int amdgpu_atombios_sysfs_init(struct amdgpu_device *adev)
+{
+ if (adev->mode_info.atom_context)
+ return devm_device_add_group(adev->dev,
+ &amdgpu_vbios_version_attr_group);
+
+ return 0;
+}
/**
* amdgpu_atombios_fini - free the driver info and callbacks for atombios
@@ -1972,7 +1882,6 @@ void amdgpu_atombios_fini(struct amdgpu_device *adev)
adev->mode_info.atom_context = NULL;
kfree(adev->mode_info.atom_card_info);
adev->mode_info.atom_card_info = NULL;
- device_remove_file(adev->dev, &dev_attr_vbios_version);
}
/**
@@ -1989,7 +1898,6 @@ int amdgpu_atombios_init(struct amdgpu_device *adev)
{
struct card_info *atom_card_info =
kzalloc(sizeof(struct card_info), GFP_KERNEL);
- int ret;
if (!atom_card_info)
return -ENOMEM;
@@ -1998,15 +1906,6 @@ int amdgpu_atombios_init(struct amdgpu_device *adev)
atom_card_info->dev = adev_to_drm(adev);
atom_card_info->reg_read = cail_reg_read;
atom_card_info->reg_write = cail_reg_write;
- /* needed for iio ops */
- if (adev->rio_mem) {
- atom_card_info->ioreg_read = cail_ioreg_read;
- atom_card_info->ioreg_write = cail_ioreg_write;
- } else {
- DRM_DEBUG("PCI I/O BAR is not found. Using MMIO to access ATOM BIOS\n");
- atom_card_info->ioreg_read = cail_reg_read;
- atom_card_info->ioreg_write = cail_reg_write;
- }
atom_card_info->mc_read = cail_mc_read;
atom_card_info->mc_write = cail_mc_write;
atom_card_info->pll_read = cail_pll_read;
@@ -2022,17 +1921,14 @@ int amdgpu_atombios_init(struct amdgpu_device *adev)
if (adev->is_atom_fw) {
amdgpu_atomfirmware_scratch_regs_init(adev);
amdgpu_atomfirmware_allocate_fb_scratch(adev);
+ /* cached firmware_flags for further usage */
+ adev->mode_info.firmware_flags =
+ amdgpu_atomfirmware_query_firmware_capability(adev);
} else {
amdgpu_atombios_scratch_regs_init(adev);
amdgpu_atombios_allocate_fb_scratch(adev);
}
- ret = device_create_file(adev->dev, &dev_attr_vbios_version);
- if (ret) {
- DRM_ERROR("Failed to create device file for VBIOS version\n");
- return ret;
- }
-
return 0;
}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.h
index 1321ec09c734..867bc5c5ce67 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.h
@@ -89,8 +89,7 @@ struct atom_memory_info {
#define MAX_AC_TIMING_ENTRIES 16
-struct atom_memory_clock_range_table
-{
+struct atom_memory_clock_range_table {
u8 num_entries;
u8 rsv[3];
u32 mclk[MAX_AC_TIMING_ENTRIES];
@@ -118,14 +117,12 @@ struct atom_mc_reg_table {
#define MAX_VOLTAGE_ENTRIES 32
-struct atom_voltage_table_entry
-{
+struct atom_voltage_table_entry {
u16 value;
u32 smio_low;
};
-struct atom_voltage_table
-{
+struct atom_voltage_table {
u32 count;
u32 mask_low;
u32 phase_delay;
@@ -139,6 +136,7 @@ amdgpu_atombios_lookup_gpio(struct amdgpu_device *adev,
struct amdgpu_i2c_bus_rec amdgpu_atombios_lookup_i2c_gpio(struct amdgpu_device *adev,
uint8_t id);
void amdgpu_atombios_i2c_init(struct amdgpu_device *adev);
+void amdgpu_atombios_oem_i2c_init(struct amdgpu_device *adev, u8 i2c_id);
bool amdgpu_atombios_has_dce_engine_info(struct amdgpu_device *adev);
@@ -160,25 +158,14 @@ int amdgpu_atombios_get_clock_dividers(struct amdgpu_device *adev,
bool strobe_mode,
struct atom_clock_dividers *dividers);
+#ifdef CONFIG_DRM_AMDGPU_SI
int amdgpu_atombios_get_memory_pll_dividers(struct amdgpu_device *adev,
u32 clock,
bool strobe_mode,
struct atom_mpll_param *mpll_param);
-void amdgpu_atombios_set_engine_dram_timings(struct amdgpu_device *adev,
- u32 eng_clock, u32 mem_clock);
-
-int amdgpu_atombios_get_leakage_id_from_vbios(struct amdgpu_device *adev,
- u16 *leakage_id);
-
-int amdgpu_atombios_get_leakage_vddc_based_on_leakage_params(struct amdgpu_device *adev,
- u16 *vddc, u16 *vddci,
- u16 virtual_voltage_id,
- u16 vbios_voltage_id);
-
-int amdgpu_atombios_get_voltage_evv(struct amdgpu_device *adev,
- u16 virtual_voltage_id,
- u16 *voltage);
+int amdgpu_atombios_set_engine_dram_timings(struct amdgpu_device *adev,
+ u32 eng_clock, u32 mem_clock);
bool
amdgpu_atombios_is_voltage_gpio(struct amdgpu_device *adev,
@@ -191,30 +178,33 @@ int amdgpu_atombios_get_voltage_table(struct amdgpu_device *adev,
int amdgpu_atombios_init_mc_reg_table(struct amdgpu_device *adev,
u8 module_index,
struct atom_mc_reg_table *reg_table);
+int amdgpu_atombios_get_max_vddc(struct amdgpu_device *adev, u8 voltage_type,
+ u16 voltage_id, u16 *voltage);
+int amdgpu_atombios_get_leakage_vddc_based_on_leakage_idx(struct amdgpu_device *adev,
+ u16 *voltage,
+ u16 leakage_idx);
+void amdgpu_atombios_get_default_voltages(struct amdgpu_device *adev,
+ u16 *vddc, u16 *vddci, u16 *mvdd);
+int amdgpu_atombios_get_svi2_info(struct amdgpu_device *adev,
+ u8 voltage_type,
+ u8 *svd_gpio_id, u8 *svc_gpio_id);
+#endif
bool amdgpu_atombios_has_gpu_virtualization_table(struct amdgpu_device *adev);
void amdgpu_atombios_scratch_regs_lock(struct amdgpu_device *adev, bool lock);
void amdgpu_atombios_scratch_regs_engine_hung(struct amdgpu_device *adev,
bool hung);
+void amdgpu_atombios_scratch_regs_set_backlight_level(struct amdgpu_device *adev,
+ u32 backlight_level);
bool amdgpu_atombios_scratch_need_asic_init(struct amdgpu_device *adev);
void amdgpu_atombios_copy_swap(u8 *dst, u8 *src, u8 num_bytes, bool to_le);
-int amdgpu_atombios_get_max_vddc(struct amdgpu_device *adev, u8 voltage_type,
- u16 voltage_id, u16 *voltage);
-int amdgpu_atombios_get_leakage_vddc_based_on_leakage_idx(struct amdgpu_device *adev,
- u16 *voltage,
- u16 leakage_idx);
-void amdgpu_atombios_get_default_voltages(struct amdgpu_device *adev,
- u16 *vddc, u16 *vddci, u16 *mvdd);
int amdgpu_atombios_get_clock_dividers(struct amdgpu_device *adev,
u8 clock_type,
u32 clock,
bool strobe_mode,
struct atom_clock_dividers *dividers);
-int amdgpu_atombios_get_svi2_info(struct amdgpu_device *adev,
- u8 voltage_type,
- u8 *svd_gpio_id, u8 *svc_gpio_id);
int amdgpu_atombios_get_data_table(struct amdgpu_device *adev,
uint32_t table,
@@ -225,5 +215,6 @@ int amdgpu_atombios_get_data_table(struct amdgpu_device *adev,
void amdgpu_atombios_fini(struct amdgpu_device *adev);
int amdgpu_atombios_init(struct amdgpu_device *adev);
+int amdgpu_atombios_sysfs_init(struct amdgpu_device *adev);
#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.c
index 6107ac91db25..636385c80f64 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.c
@@ -29,23 +29,60 @@
#include "atombios.h"
#include "soc15_hw_ip.h"
-bool amdgpu_atomfirmware_gpu_supports_virtualization(struct amdgpu_device *adev)
+union firmware_info {
+ struct atom_firmware_info_v3_1 v31;
+ struct atom_firmware_info_v3_2 v32;
+ struct atom_firmware_info_v3_3 v33;
+ struct atom_firmware_info_v3_4 v34;
+ struct atom_firmware_info_v3_5 v35;
+};
+
+/*
+ * Helper function to query firmware capability
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * Return firmware_capability in firmwareinfo table on success or 0 if not
+ */
+uint32_t amdgpu_atomfirmware_query_firmware_capability(struct amdgpu_device *adev)
{
- int index = get_index_into_master_table(atom_master_list_of_data_tables_v2_1,
- firmwareinfo);
- uint16_t data_offset;
+ struct amdgpu_mode_info *mode_info = &adev->mode_info;
+ int index;
+ u16 data_offset, size;
+ union firmware_info *firmware_info;
+ u8 frev, crev;
+ u32 fw_cap = 0;
- if (amdgpu_atom_parse_data_header(adev->mode_info.atom_context, index, NULL,
- NULL, NULL, &data_offset)) {
- struct atom_firmware_info_v3_1 *firmware_info =
- (struct atom_firmware_info_v3_1 *)(adev->mode_info.atom_context->bios +
- data_offset);
+ index = get_index_into_master_table(atom_master_list_of_data_tables_v2_1,
+ firmwareinfo);
- if (le32_to_cpu(firmware_info->firmware_capability) &
- ATOM_FIRMWARE_CAP_GPU_VIRTUALIZATION)
- return true;
+ if (amdgpu_atom_parse_data_header(adev->mode_info.atom_context,
+ index, &size, &frev, &crev, &data_offset)) {
+ /* support firmware_info 3.1 + */
+ if ((frev == 3 && crev >= 1) || (frev > 3)) {
+ firmware_info = (union firmware_info *)
+ (mode_info->atom_context->bios + data_offset);
+ fw_cap = le32_to_cpu(firmware_info->v31.firmware_capability);
+ }
}
- return false;
+
+ return fw_cap;
+}
+
+/*
+ * Helper function to query gpu virtualizaiton capability
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * Return true if gpu virtualization is supported or false if not
+ */
+bool amdgpu_atomfirmware_gpu_virtualization_supported(struct amdgpu_device *adev)
+{
+ u32 fw_cap;
+
+ fw_cap = adev->mode_info.firmware_flags;
+
+ return (fw_cap & ATOM_FIRMWARE_CAP_GPU_VIRTUALIZATION) ? true : false;
}
void amdgpu_atomfirmware_scratch_regs_init(struct amdgpu_device *adev)
@@ -65,39 +102,104 @@ void amdgpu_atomfirmware_scratch_regs_init(struct amdgpu_device *adev)
}
}
+static int amdgpu_atomfirmware_allocate_fb_v2_1(struct amdgpu_device *adev,
+ struct vram_usagebyfirmware_v2_1 *fw_usage, int *usage_bytes)
+{
+ u32 start_addr, fw_size, drv_size;
+
+ start_addr = le32_to_cpu(fw_usage->start_address_in_kb);
+ fw_size = le16_to_cpu(fw_usage->used_by_firmware_in_kb);
+ drv_size = le16_to_cpu(fw_usage->used_by_driver_in_kb);
+
+ DRM_DEBUG("atom firmware v2_1 requested %08x %dkb fw %dkb drv\n",
+ start_addr,
+ fw_size,
+ drv_size);
+
+ if ((start_addr & ATOM_VRAM_OPERATION_FLAGS_MASK) ==
+ (u32)(ATOM_VRAM_BLOCK_SRIOV_MSG_SHARE_RESERVATION <<
+ ATOM_VRAM_OPERATION_FLAGS_SHIFT)) {
+ /* Firmware request VRAM reservation for SR-IOV */
+ adev->mman.fw_vram_usage_start_offset = (start_addr &
+ (~ATOM_VRAM_OPERATION_FLAGS_MASK)) << 10;
+ adev->mman.fw_vram_usage_size = fw_size << 10;
+ /* Use the default scratch size */
+ *usage_bytes = 0;
+ } else {
+ *usage_bytes = drv_size << 10;
+ }
+ return 0;
+}
+
+static int amdgpu_atomfirmware_allocate_fb_v2_2(struct amdgpu_device *adev,
+ struct vram_usagebyfirmware_v2_2 *fw_usage, int *usage_bytes)
+{
+ u32 fw_start_addr, fw_size, drv_start_addr, drv_size;
+
+ fw_start_addr = le32_to_cpu(fw_usage->fw_region_start_address_in_kb);
+ fw_size = le16_to_cpu(fw_usage->used_by_firmware_in_kb);
+
+ drv_start_addr = le32_to_cpu(fw_usage->driver_region0_start_address_in_kb);
+ drv_size = le32_to_cpu(fw_usage->used_by_driver_region0_in_kb);
+
+ DRM_DEBUG("atom requested fw start at %08x %dkb and drv start at %08x %dkb\n",
+ fw_start_addr,
+ fw_size,
+ drv_start_addr,
+ drv_size);
+
+ if (amdgpu_sriov_vf(adev) &&
+ ((fw_start_addr & (ATOM_VRAM_BLOCK_NEEDS_NO_RESERVATION <<
+ ATOM_VRAM_OPERATION_FLAGS_SHIFT)) == 0)) {
+ /* Firmware request VRAM reservation for SR-IOV */
+ adev->mman.fw_vram_usage_start_offset = (fw_start_addr &
+ (~ATOM_VRAM_OPERATION_FLAGS_MASK)) << 10;
+ adev->mman.fw_vram_usage_size = fw_size << 10;
+ }
+
+ if (amdgpu_sriov_vf(adev) &&
+ ((drv_start_addr & (ATOM_VRAM_BLOCK_NEEDS_NO_RESERVATION <<
+ ATOM_VRAM_OPERATION_FLAGS_SHIFT)) == 0)) {
+ /* driver request VRAM reservation for SR-IOV */
+ adev->mman.drv_vram_usage_start_offset = (drv_start_addr &
+ (~ATOM_VRAM_OPERATION_FLAGS_MASK)) << 10;
+ adev->mman.drv_vram_usage_size = drv_size << 10;
+ }
+
+ *usage_bytes = 0;
+ return 0;
+}
+
int amdgpu_atomfirmware_allocate_fb_scratch(struct amdgpu_device *adev)
{
struct atom_context *ctx = adev->mode_info.atom_context;
int index = get_index_into_master_table(atom_master_list_of_data_tables_v2_1,
vram_usagebyfirmware);
- struct vram_usagebyfirmware_v2_1 *firmware_usage;
- uint32_t start_addr, size;
- uint16_t data_offset;
+ struct vram_usagebyfirmware_v2_1 *fw_usage_v2_1;
+ struct vram_usagebyfirmware_v2_2 *fw_usage_v2_2;
+ u16 data_offset;
+ u8 frev, crev;
int usage_bytes = 0;
- if (amdgpu_atom_parse_data_header(ctx, index, NULL, NULL, NULL, &data_offset)) {
- firmware_usage = (struct vram_usagebyfirmware_v2_1 *)(ctx->bios + data_offset);
- DRM_DEBUG("atom firmware requested %08x %dkb fw %dkb drv\n",
- le32_to_cpu(firmware_usage->start_address_in_kb),
- le16_to_cpu(firmware_usage->used_by_firmware_in_kb),
- le16_to_cpu(firmware_usage->used_by_driver_in_kb));
-
- start_addr = le32_to_cpu(firmware_usage->start_address_in_kb);
- size = le16_to_cpu(firmware_usage->used_by_firmware_in_kb);
-
- if ((uint32_t)(start_addr & ATOM_VRAM_OPERATION_FLAGS_MASK) ==
- (uint32_t)(ATOM_VRAM_BLOCK_SRIOV_MSG_SHARE_RESERVATION <<
- ATOM_VRAM_OPERATION_FLAGS_SHIFT)) {
- /* Firmware request VRAM reservation for SR-IOV */
- adev->mman.fw_vram_usage_start_offset = (start_addr &
- (~ATOM_VRAM_OPERATION_FLAGS_MASK)) << 10;
- adev->mman.fw_vram_usage_size = size << 10;
- /* Use the default scratch size */
- usage_bytes = 0;
- } else {
- usage_bytes = le16_to_cpu(firmware_usage->used_by_driver_in_kb) << 10;
+ /* Skip atomfirmware allocation for SRIOV VFs when dynamic crit regn is enabled */
+ if (!(amdgpu_sriov_vf(adev) && adev->virt.is_dynamic_crit_regn_enabled)) {
+ if (amdgpu_atom_parse_data_header(ctx, index, NULL, &frev, &crev, &data_offset)) {
+ if (frev == 2 && crev == 1) {
+ fw_usage_v2_1 =
+ (struct vram_usagebyfirmware_v2_1 *)(ctx->bios + data_offset);
+ amdgpu_atomfirmware_allocate_fb_v2_1(adev,
+ fw_usage_v2_1,
+ &usage_bytes);
+ } else if (frev >= 2 && crev >= 2) {
+ fw_usage_v2_2 =
+ (struct vram_usagebyfirmware_v2_2 *)(ctx->bios + data_offset);
+ amdgpu_atomfirmware_allocate_fb_v2_2(adev,
+ fw_usage_v2_2,
+ &usage_bytes);
+ }
}
}
+
ctx->scratch_size_bytes = 0;
if (usage_bytes == 0)
usage_bytes = 20 * 1024;
@@ -113,22 +215,29 @@ union igp_info {
struct atom_integrated_system_info_v1_11 v11;
struct atom_integrated_system_info_v1_12 v12;
struct atom_integrated_system_info_v2_1 v21;
+ struct atom_integrated_system_info_v2_3 v23;
};
union umc_info {
struct atom_umc_info_v3_1 v31;
+ struct atom_umc_info_v3_2 v32;
+ struct atom_umc_info_v3_3 v33;
+ struct atom_umc_info_v4_0 v40;
};
union vram_info {
struct atom_vram_info_header_v2_3 v23;
struct atom_vram_info_header_v2_4 v24;
struct atom_vram_info_header_v2_5 v25;
+ struct atom_vram_info_header_v2_6 v26;
+ struct atom_vram_info_header_v3_0 v30;
};
union vram_module {
struct atom_vram_module_v9 v9;
struct atom_vram_module_v10 v10;
struct atom_vram_module_v11 v11;
+ struct atom_vram_module_v3_0 v30;
};
static int convert_atom_mem_type_to_vram_type(struct amdgpu_device *adev,
@@ -147,13 +256,17 @@ static int convert_atom_mem_type_to_vram_type(struct amdgpu_device *adev,
vram_type = AMDGPU_VRAM_TYPE_DDR3;
break;
case Ddr4MemType:
- case LpDdr4MemType:
vram_type = AMDGPU_VRAM_TYPE_DDR4;
break;
+ case LpDdr4MemType:
+ vram_type = AMDGPU_VRAM_TYPE_LPDDR4;
+ break;
case Ddr5MemType:
- case LpDdr5MemType:
vram_type = AMDGPU_VRAM_TYPE_DDR5;
break;
+ case LpDdr5MemType:
+ vram_type = AMDGPU_VRAM_TYPE_LPDDR5;
+ break;
default:
vram_type = AMDGPU_VRAM_TYPE_UNKNOWN;
break;
@@ -164,11 +277,16 @@ static int convert_atom_mem_type_to_vram_type(struct amdgpu_device *adev,
vram_type = AMDGPU_VRAM_TYPE_GDDR5;
break;
case ATOM_DGPU_VRAM_TYPE_HBM2:
+ case ATOM_DGPU_VRAM_TYPE_HBM2E:
+ case ATOM_DGPU_VRAM_TYPE_HBM3:
vram_type = AMDGPU_VRAM_TYPE_HBM;
break;
case ATOM_DGPU_VRAM_TYPE_GDDR6:
vram_type = AMDGPU_VRAM_TYPE_GDDR6;
break;
+ case ATOM_DGPU_VRAM_TYPE_HBM3E:
+ vram_type = AMDGPU_VRAM_TYPE_HBM3E;
+ break;
default:
vram_type = AMDGPU_VRAM_TYPE_UNKNOWN;
break;
@@ -178,7 +296,6 @@ static int convert_atom_mem_type_to_vram_type(struct amdgpu_device *adev,
return vram_type;
}
-
int
amdgpu_atomfirmware_get_vram_info(struct amdgpu_device *adev,
int *vram_width, int *vram_type,
@@ -189,6 +306,7 @@ amdgpu_atomfirmware_get_vram_info(struct amdgpu_device *adev,
u16 data_offset, size;
union igp_info *igp_info;
union vram_info *vram_info;
+ union umc_info *umc_info;
union vram_module *vram_module;
u8 frev, crev;
u8 mem_type;
@@ -200,10 +318,16 @@ amdgpu_atomfirmware_get_vram_info(struct amdgpu_device *adev,
if (adev->flags & AMD_IS_APU)
index = get_index_into_master_table(atom_master_list_of_data_tables_v2_1,
integratedsysteminfo);
- else
- index = get_index_into_master_table(atom_master_list_of_data_tables_v2_1,
- vram_info);
-
+ else {
+ switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
+ case IP_VERSION(12, 0, 0):
+ case IP_VERSION(12, 0, 1):
+ index = get_index_into_master_table(atom_master_list_of_data_tables_v2_1, umc_info);
+ break;
+ default:
+ index = get_index_into_master_table(atom_master_list_of_data_tables_v2_1, vram_info);
+ }
+ }
if (amdgpu_atom_parse_data_header(mode_info->atom_context,
index, &size,
&frev, &crev, &data_offset)) {
@@ -218,10 +342,13 @@ amdgpu_atomfirmware_get_vram_info(struct amdgpu_device *adev,
mem_channel_number = igp_info->v11.umachannelnumber;
if (!mem_channel_number)
mem_channel_number = 1;
- /* channel width is 64 */
- if (vram_width)
- *vram_width = mem_channel_number * 64;
mem_type = igp_info->v11.memorytype;
+ if (mem_type == LpDdr5MemType)
+ mem_channel_width = 32;
+ else
+ mem_channel_width = 64;
+ if (vram_width)
+ *vram_width = mem_channel_number * mem_channel_width;
if (vram_type)
*vram_type = convert_atom_mem_type_to_vram_type(adev, mem_type);
break;
@@ -236,10 +363,27 @@ amdgpu_atomfirmware_get_vram_info(struct amdgpu_device *adev,
mem_channel_number = igp_info->v21.umachannelnumber;
if (!mem_channel_number)
mem_channel_number = 1;
- /* channel width is 64 */
- if (vram_width)
- *vram_width = mem_channel_number * 64;
mem_type = igp_info->v21.memorytype;
+ if (mem_type == LpDdr5MemType)
+ mem_channel_width = 32;
+ else
+ mem_channel_width = 64;
+ if (vram_width)
+ *vram_width = mem_channel_number * mem_channel_width;
+ if (vram_type)
+ *vram_type = convert_atom_mem_type_to_vram_type(adev, mem_type);
+ break;
+ case 3:
+ mem_channel_number = igp_info->v23.umachannelnumber;
+ if (!mem_channel_number)
+ mem_channel_number = 1;
+ mem_type = igp_info->v23.memorytype;
+ if (mem_type == LpDdr5MemType)
+ mem_channel_width = 32;
+ else
+ mem_channel_width = 64;
+ if (vram_width)
+ *vram_width = mem_channel_number * mem_channel_width;
if (vram_type)
*vram_type = convert_atom_mem_type_to_vram_type(adev, mem_type);
break;
@@ -251,75 +395,150 @@ amdgpu_atomfirmware_get_vram_info(struct amdgpu_device *adev,
return -EINVAL;
}
} else {
- vram_info = (union vram_info *)
- (mode_info->atom_context->bios + data_offset);
- module_id = (RREG32(adev->bios_scratch_reg_offset + 4) & 0x00ff0000) >> 16;
- switch (crev) {
- case 3:
- if (module_id > vram_info->v23.vram_module_num)
- module_id = 0;
- vram_module = (union vram_module *)vram_info->v23.vram_module;
- while (i < module_id) {
- vram_module = (union vram_module *)
- ((u8 *)vram_module + vram_module->v9.vram_module_size);
- i++;
- }
- mem_type = vram_module->v9.memory_type;
- if (vram_type)
- *vram_type = convert_atom_mem_type_to_vram_type(adev, mem_type);
- mem_channel_number = vram_module->v9.channel_num;
- mem_channel_width = vram_module->v9.channel_width;
- if (vram_width)
- *vram_width = mem_channel_number * (1 << mem_channel_width);
- mem_vendor = (vram_module->v9.vender_rev_id) & 0xF;
- if (vram_vendor)
- *vram_vendor = mem_vendor;
- break;
- case 4:
- if (module_id > vram_info->v24.vram_module_num)
- module_id = 0;
- vram_module = (union vram_module *)vram_info->v24.vram_module;
- while (i < module_id) {
- vram_module = (union vram_module *)
- ((u8 *)vram_module + vram_module->v10.vram_module_size);
- i++;
- }
- mem_type = vram_module->v10.memory_type;
- if (vram_type)
- *vram_type = convert_atom_mem_type_to_vram_type(adev, mem_type);
- mem_channel_number = vram_module->v10.channel_num;
- mem_channel_width = vram_module->v10.channel_width;
- if (vram_width)
- *vram_width = mem_channel_number * (1 << mem_channel_width);
- mem_vendor = (vram_module->v10.vender_rev_id) & 0xF;
- if (vram_vendor)
- *vram_vendor = mem_vendor;
- break;
- case 5:
- if (module_id > vram_info->v25.vram_module_num)
- module_id = 0;
- vram_module = (union vram_module *)vram_info->v25.vram_module;
- while (i < module_id) {
- vram_module = (union vram_module *)
- ((u8 *)vram_module + vram_module->v11.vram_module_size);
- i++;
- }
- mem_type = vram_module->v11.memory_type;
- if (vram_type)
- *vram_type = convert_atom_mem_type_to_vram_type(adev, mem_type);
- mem_channel_number = vram_module->v11.channel_num;
- mem_channel_width = vram_module->v11.channel_width;
- if (vram_width)
- *vram_width = mem_channel_number * (1 << mem_channel_width);
- mem_vendor = (vram_module->v11.vender_rev_id) & 0xF;
- if (vram_vendor)
- *vram_vendor = mem_vendor;
+ switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
+ case IP_VERSION(12, 0, 0):
+ case IP_VERSION(12, 0, 1):
+ umc_info = (union umc_info *)(mode_info->atom_context->bios + data_offset);
+
+ if (frev == 4) {
+ switch (crev) {
+ case 0:
+ mem_channel_number = le32_to_cpu(umc_info->v40.channel_num);
+ mem_type = le32_to_cpu(umc_info->v40.vram_type);
+ mem_channel_width = le32_to_cpu(umc_info->v40.channel_width);
+ mem_vendor = RREG32(adev->bios_scratch_reg_offset + 4) & 0xF;
+ if (vram_vendor)
+ *vram_vendor = mem_vendor;
+ if (vram_type)
+ *vram_type = convert_atom_mem_type_to_vram_type(adev, mem_type);
+ if (vram_width)
+ *vram_width = mem_channel_number * (1 << mem_channel_width);
+ break;
+ default:
+ return -EINVAL;
+ }
+ } else
+ return -EINVAL;
break;
default:
- return -EINVAL;
+ vram_info = (union vram_info *)
+ (mode_info->atom_context->bios + data_offset);
+
+ module_id = (RREG32(adev->bios_scratch_reg_offset + 4) & 0x00ff0000) >> 16;
+ if (frev == 3) {
+ switch (crev) {
+ /* v30 */
+ case 0:
+ vram_module = (union vram_module *)vram_info->v30.vram_module;
+ mem_vendor = (vram_module->v30.dram_vendor_id) & 0xF;
+ if (vram_vendor)
+ *vram_vendor = mem_vendor;
+ mem_type = vram_info->v30.memory_type;
+ if (vram_type)
+ *vram_type = convert_atom_mem_type_to_vram_type(adev, mem_type);
+ mem_channel_number = vram_info->v30.channel_num;
+ mem_channel_width = vram_info->v30.channel_width;
+ if (vram_width)
+ *vram_width = mem_channel_number * 16;
+ break;
+ default:
+ return -EINVAL;
+ }
+ } else if (frev == 2) {
+ switch (crev) {
+ /* v23 */
+ case 3:
+ if (module_id > vram_info->v23.vram_module_num)
+ module_id = 0;
+ vram_module = (union vram_module *)vram_info->v23.vram_module;
+ while (i < module_id) {
+ vram_module = (union vram_module *)
+ ((u8 *)vram_module + vram_module->v9.vram_module_size);
+ i++;
+ }
+ mem_type = vram_module->v9.memory_type;
+ if (vram_type)
+ *vram_type = convert_atom_mem_type_to_vram_type(adev, mem_type);
+ mem_channel_number = vram_module->v9.channel_num;
+ mem_channel_width = vram_module->v9.channel_width;
+ if (vram_width)
+ *vram_width = mem_channel_number * (1 << mem_channel_width);
+ mem_vendor = (vram_module->v9.vender_rev_id) & 0xF;
+ if (vram_vendor)
+ *vram_vendor = mem_vendor;
+ break;
+ /* v24 */
+ case 4:
+ if (module_id > vram_info->v24.vram_module_num)
+ module_id = 0;
+ vram_module = (union vram_module *)vram_info->v24.vram_module;
+ while (i < module_id) {
+ vram_module = (union vram_module *)
+ ((u8 *)vram_module + vram_module->v10.vram_module_size);
+ i++;
+ }
+ mem_type = vram_module->v10.memory_type;
+ if (vram_type)
+ *vram_type = convert_atom_mem_type_to_vram_type(adev, mem_type);
+ mem_channel_number = vram_module->v10.channel_num;
+ mem_channel_width = vram_module->v10.channel_width;
+ if (vram_width)
+ *vram_width = mem_channel_number * (1 << mem_channel_width);
+ mem_vendor = (vram_module->v10.vender_rev_id) & 0xF;
+ if (vram_vendor)
+ *vram_vendor = mem_vendor;
+ break;
+ /* v25 */
+ case 5:
+ if (module_id > vram_info->v25.vram_module_num)
+ module_id = 0;
+ vram_module = (union vram_module *)vram_info->v25.vram_module;
+ while (i < module_id) {
+ vram_module = (union vram_module *)
+ ((u8 *)vram_module + vram_module->v11.vram_module_size);
+ i++;
+ }
+ mem_type = vram_module->v11.memory_type;
+ if (vram_type)
+ *vram_type = convert_atom_mem_type_to_vram_type(adev, mem_type);
+ mem_channel_number = vram_module->v11.channel_num;
+ mem_channel_width = vram_module->v11.channel_width;
+ if (vram_width)
+ *vram_width = mem_channel_number * (1 << mem_channel_width);
+ mem_vendor = (vram_module->v11.vender_rev_id) & 0xF;
+ if (vram_vendor)
+ *vram_vendor = mem_vendor;
+ break;
+ /* v26 */
+ case 6:
+ if (module_id > vram_info->v26.vram_module_num)
+ module_id = 0;
+ vram_module = (union vram_module *)vram_info->v26.vram_module;
+ while (i < module_id) {
+ vram_module = (union vram_module *)
+ ((u8 *)vram_module + vram_module->v9.vram_module_size);
+ i++;
+ }
+ mem_type = vram_module->v9.memory_type;
+ if (vram_type)
+ *vram_type = convert_atom_mem_type_to_vram_type(adev, mem_type);
+ mem_channel_number = vram_module->v9.channel_num;
+ mem_channel_width = vram_module->v9.channel_width;
+ if (vram_width)
+ *vram_width = mem_channel_number * (1 << mem_channel_width);
+ mem_vendor = (vram_module->v9.vender_rev_id) & 0xF;
+ if (vram_vendor)
+ *vram_vendor = mem_vendor;
+ break;
+ default:
+ return -EINVAL;
+ }
+ } else {
+ /* invalid frev */
+ return -EINVAL;
+ }
}
}
-
}
return 0;
@@ -336,65 +555,163 @@ bool amdgpu_atomfirmware_mem_ecc_supported(struct amdgpu_device *adev)
u16 data_offset, size;
union umc_info *umc_info;
u8 frev, crev;
- bool ecc_default_enabled = false;
+ bool mem_ecc_enabled = false;
+ u8 umc_config;
+ u32 umc_config1;
+ adev->ras_default_ecc_enabled = false;
index = get_index_into_master_table(atom_master_list_of_data_tables_v2_1,
umc_info);
if (amdgpu_atom_parse_data_header(mode_info->atom_context,
index, &size, &frev, &crev, &data_offset)) {
- /* support umc_info 3.1+ */
- if ((frev == 3 && crev >= 1) || (frev > 3)) {
- umc_info = (union umc_info *)
- (mode_info->atom_context->bios + data_offset);
- ecc_default_enabled =
- (le32_to_cpu(umc_info->v31.umc_config) &
- UMC_CONFIG__DEFAULT_MEM_ECC_ENABLE) ? true : false;
+ umc_info = (union umc_info *)(mode_info->atom_context->bios + data_offset);
+ if (frev == 3) {
+ switch (crev) {
+ case 1:
+ umc_config = le32_to_cpu(umc_info->v31.umc_config);
+ mem_ecc_enabled =
+ (umc_config & UMC_CONFIG__DEFAULT_MEM_ECC_ENABLE) ? true : false;
+ break;
+ case 2:
+ umc_config = le32_to_cpu(umc_info->v32.umc_config);
+ mem_ecc_enabled =
+ (umc_config & UMC_CONFIG__DEFAULT_MEM_ECC_ENABLE) ? true : false;
+ break;
+ case 3:
+ umc_config = le32_to_cpu(umc_info->v33.umc_config);
+ umc_config1 = le32_to_cpu(umc_info->v33.umc_config1);
+ mem_ecc_enabled =
+ ((umc_config & UMC_CONFIG__DEFAULT_MEM_ECC_ENABLE) ||
+ (umc_config1 & UMC_CONFIG1__ENABLE_ECC_CAPABLE)) ? true : false;
+ adev->ras_default_ecc_enabled =
+ (umc_config & UMC_CONFIG__DEFAULT_MEM_ECC_ENABLE) ? true : false;
+ break;
+ default:
+ /* unsupported crev */
+ return false;
+ }
+ } else if (frev == 4) {
+ switch (crev) {
+ case 0:
+ umc_config = le32_to_cpu(umc_info->v40.umc_config);
+ umc_config1 = le32_to_cpu(umc_info->v40.umc_config1);
+ mem_ecc_enabled =
+ (umc_config1 & UMC_CONFIG1__ENABLE_ECC_CAPABLE) ? true : false;
+ adev->ras_default_ecc_enabled =
+ (umc_config & UMC_CONFIG__DEFAULT_MEM_ECC_ENABLE) ? true : false;
+ break;
+ default:
+ /* unsupported crev */
+ return false;
+ }
+ } else {
+ /* unsupported frev */
+ return false;
}
}
- return ecc_default_enabled;
+ return mem_ecc_enabled;
}
-union firmware_info {
- struct atom_firmware_info_v3_1 v31;
- struct atom_firmware_info_v3_2 v32;
- struct atom_firmware_info_v3_3 v33;
- struct atom_firmware_info_v3_4 v34;
-};
-
/*
+ * Helper function to query sram ecc capablity
+ *
+ * @adev: amdgpu_device pointer
+ *
* Return true if vbios supports sram ecc or false if not
*/
bool amdgpu_atomfirmware_sram_ecc_supported(struct amdgpu_device *adev)
{
+ u32 fw_cap;
+
+ fw_cap = adev->mode_info.firmware_flags;
+
+ return (fw_cap & ATOM_FIRMWARE_CAP_SRAM_ECC) ? true : false;
+}
+
+/*
+ * Helper function to query dynamic boot config capability
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * Return true if vbios supports dynamic boot config or false if not
+ */
+bool amdgpu_atomfirmware_dynamic_boot_config_supported(struct amdgpu_device *adev)
+{
+ u32 fw_cap;
+
+ fw_cap = adev->mode_info.firmware_flags;
+
+ return (fw_cap & ATOM_FIRMWARE_CAP_DYNAMIC_BOOT_CFG_ENABLE) ? true : false;
+}
+
+/**
+ * amdgpu_atomfirmware_ras_rom_addr -- Get the RAS EEPROM addr from VBIOS
+ * @adev: amdgpu_device pointer
+ * @i2c_address: pointer to u8; if not NULL, will contain
+ * the RAS EEPROM address if the function returns true
+ *
+ * Return true if VBIOS supports RAS EEPROM address reporting,
+ * else return false. If true and @i2c_address is not NULL,
+ * will contain the RAS ROM address.
+ */
+bool amdgpu_atomfirmware_ras_rom_addr(struct amdgpu_device *adev,
+ u8 *i2c_address)
+{
struct amdgpu_mode_info *mode_info = &adev->mode_info;
int index;
u16 data_offset, size;
union firmware_info *firmware_info;
u8 frev, crev;
- bool sram_ecc_supported = false;
index = get_index_into_master_table(atom_master_list_of_data_tables_v2_1,
- firmwareinfo);
+ firmwareinfo);
if (amdgpu_atom_parse_data_header(adev->mode_info.atom_context,
- index, &size, &frev, &crev, &data_offset)) {
- /* support firmware_info 3.1 + */
- if ((frev == 3 && crev >=1) || (frev > 3)) {
+ index, &size, &frev, &crev,
+ &data_offset)) {
+ /* support firmware_info 3.4 + */
+ if ((frev == 3 && crev >= 4) || (frev > 3)) {
firmware_info = (union firmware_info *)
(mode_info->atom_context->bios + data_offset);
- sram_ecc_supported =
- (le32_to_cpu(firmware_info->v31.firmware_capability) &
- ATOM_FIRMWARE_CAP_SRAM_ECC) ? true : false;
+ /* The ras_rom_i2c_slave_addr should ideally
+ * be a 19-bit EEPROM address, which would be
+ * used as is by the driver; see top of
+ * amdgpu_eeprom.c.
+ *
+ * When this is the case, 0 is of course a
+ * valid RAS EEPROM address, in which case,
+ * we'll drop the first "if (firm...)" and only
+ * leave the check for the pointer.
+ *
+ * The reason this works right now is because
+ * ras_rom_i2c_slave_addr contains the EEPROM
+ * device type qualifier 1010b in the top 4
+ * bits.
+ */
+ if (firmware_info->v34.ras_rom_i2c_slave_addr) {
+ if (i2c_address)
+ *i2c_address = firmware_info->v34.ras_rom_i2c_slave_addr;
+ return true;
+ }
}
}
- return sram_ecc_supported;
+ return false;
}
+
union smu_info {
struct atom_smu_info_v3_1 v31;
+ struct atom_smu_info_v4_0 v40;
+};
+
+union gfx_info {
+ struct atom_gfx_info_v2_2 v22;
+ struct atom_gfx_info_v2_4 v24;
+ struct atom_gfx_info_v2_7 v27;
+ struct atom_gfx_info_v3_0 v30;
};
int amdgpu_atomfirmware_get_clock_info(struct amdgpu_device *adev)
@@ -422,10 +739,6 @@ int amdgpu_atomfirmware_get_clock_info(struct amdgpu_device *adev)
adev->pm.current_sclk = adev->clock.default_sclk;
adev->pm.current_mclk = adev->clock.default_mclk;
- /* not technically a clock, but... */
- adev->mode_info.firmware_flags =
- le32_to_cpu(firmware_info->v31.firmware_capability);
-
ret = 0;
}
@@ -438,7 +751,10 @@ int amdgpu_atomfirmware_get_clock_info(struct amdgpu_device *adev)
data_offset);
/* system clock */
- spll->reference_freq = le32_to_cpu(smu_info->v31.core_refclk_10khz);
+ if (frev == 3)
+ spll->reference_freq = le32_to_cpu(smu_info->v31.core_refclk_10khz);
+ else if (frev == 4)
+ spll->reference_freq = le32_to_cpu(smu_info->v40.core_refclk_10khz);
spll->reference_div = 0;
spll->min_post_div = 1;
@@ -475,13 +791,33 @@ int amdgpu_atomfirmware_get_clock_info(struct amdgpu_device *adev)
ret = 0;
}
+ /* if asic is Navi+, the rlc reference clock is used for system clock
+ * from vbios gfx_info table */
+ if (adev->asic_type >= CHIP_NAVI10) {
+ index = get_index_into_master_table(atom_master_list_of_data_tables_v2_1,
+ gfx_info);
+ if (amdgpu_atom_parse_data_header(mode_info->atom_context, index, NULL,
+ &frev, &crev, &data_offset)) {
+ union gfx_info *gfx_info = (union gfx_info *)
+ (mode_info->atom_context->bios + data_offset);
+ if ((frev == 3) ||
+ (frev == 2 && crev == 6)) {
+ spll->reference_freq = le32_to_cpu(gfx_info->v30.golden_tsc_count_lower_refclk);
+ ret = 0;
+ } else if ((frev == 2) &&
+ (crev >= 2) &&
+ (crev != 6)) {
+ spll->reference_freq = le32_to_cpu(gfx_info->v22.rlc_gpu_timer_refclk);
+ ret = 0;
+ } else {
+ BUG();
+ }
+ }
+ }
+
return ret;
}
-union gfx_info {
- struct atom_gfx_info_v2_4 v24;
-};
-
int amdgpu_atomfirmware_get_gfx_info(struct amdgpu_device *adev)
{
struct amdgpu_mode_info *mode_info = &adev->mode_info;
@@ -495,26 +831,58 @@ int amdgpu_atomfirmware_get_gfx_info(struct amdgpu_device *adev)
&frev, &crev, &data_offset)) {
union gfx_info *gfx_info = (union gfx_info *)
(mode_info->atom_context->bios + data_offset);
- switch (crev) {
- case 4:
- adev->gfx.config.max_shader_engines = gfx_info->v24.max_shader_engines;
- adev->gfx.config.max_cu_per_sh = gfx_info->v24.max_cu_per_sh;
- adev->gfx.config.max_sh_per_se = gfx_info->v24.max_sh_per_se;
- adev->gfx.config.max_backends_per_se = gfx_info->v24.max_backends_per_se;
- adev->gfx.config.max_texture_channel_caches = gfx_info->v24.max_texture_channel_caches;
- adev->gfx.config.max_gprs = le16_to_cpu(gfx_info->v24.gc_num_gprs);
- adev->gfx.config.max_gs_threads = gfx_info->v24.gc_num_max_gs_thds;
- adev->gfx.config.gs_vgt_table_depth = gfx_info->v24.gc_gs_table_depth;
- adev->gfx.config.gs_prim_buffer_depth =
- le16_to_cpu(gfx_info->v24.gc_gsprim_buff_depth);
- adev->gfx.config.double_offchip_lds_buf =
- gfx_info->v24.gc_double_offchip_lds_buffer;
- adev->gfx.cu_info.wave_front_size = le16_to_cpu(gfx_info->v24.gc_wave_size);
- adev->gfx.cu_info.max_waves_per_simd = le16_to_cpu(gfx_info->v24.gc_max_waves_per_simd);
- adev->gfx.cu_info.max_scratch_slots_per_cu = gfx_info->v24.gc_max_scratch_slots_per_cu;
- adev->gfx.cu_info.lds_size = le16_to_cpu(gfx_info->v24.gc_lds_size);
- return 0;
- default:
+ if (frev == 2) {
+ switch (crev) {
+ case 4:
+ adev->gfx.config.max_shader_engines = gfx_info->v24.max_shader_engines;
+ adev->gfx.config.max_cu_per_sh = gfx_info->v24.max_cu_per_sh;
+ adev->gfx.config.max_sh_per_se = gfx_info->v24.max_sh_per_se;
+ adev->gfx.config.max_backends_per_se = gfx_info->v24.max_backends_per_se;
+ adev->gfx.config.max_texture_channel_caches = gfx_info->v24.max_texture_channel_caches;
+ adev->gfx.config.max_gprs = le16_to_cpu(gfx_info->v24.gc_num_gprs);
+ adev->gfx.config.max_gs_threads = gfx_info->v24.gc_num_max_gs_thds;
+ adev->gfx.config.gs_vgt_table_depth = gfx_info->v24.gc_gs_table_depth;
+ adev->gfx.config.gs_prim_buffer_depth =
+ le16_to_cpu(gfx_info->v24.gc_gsprim_buff_depth);
+ adev->gfx.config.double_offchip_lds_buf =
+ gfx_info->v24.gc_double_offchip_lds_buffer;
+ adev->gfx.cu_info.wave_front_size = le16_to_cpu(gfx_info->v24.gc_wave_size);
+ adev->gfx.cu_info.max_waves_per_simd = le16_to_cpu(gfx_info->v24.gc_max_waves_per_simd);
+ adev->gfx.cu_info.max_scratch_slots_per_cu = gfx_info->v24.gc_max_scratch_slots_per_cu;
+ adev->gfx.cu_info.lds_size = le16_to_cpu(gfx_info->v24.gc_lds_size);
+ return 0;
+ case 7:
+ adev->gfx.config.max_shader_engines = gfx_info->v27.max_shader_engines;
+ adev->gfx.config.max_cu_per_sh = gfx_info->v27.max_cu_per_sh;
+ adev->gfx.config.max_sh_per_se = gfx_info->v27.max_sh_per_se;
+ adev->gfx.config.max_backends_per_se = gfx_info->v27.max_backends_per_se;
+ adev->gfx.config.max_texture_channel_caches = gfx_info->v27.max_texture_channel_caches;
+ adev->gfx.config.max_gprs = le16_to_cpu(gfx_info->v27.gc_num_gprs);
+ adev->gfx.config.max_gs_threads = gfx_info->v27.gc_num_max_gs_thds;
+ adev->gfx.config.gs_vgt_table_depth = gfx_info->v27.gc_gs_table_depth;
+ adev->gfx.config.gs_prim_buffer_depth = le16_to_cpu(gfx_info->v27.gc_gsprim_buff_depth);
+ adev->gfx.config.double_offchip_lds_buf = gfx_info->v27.gc_double_offchip_lds_buffer;
+ adev->gfx.cu_info.wave_front_size = le16_to_cpu(gfx_info->v27.gc_wave_size);
+ adev->gfx.cu_info.max_waves_per_simd = le16_to_cpu(gfx_info->v27.gc_max_waves_per_simd);
+ adev->gfx.cu_info.max_scratch_slots_per_cu = gfx_info->v27.gc_max_scratch_slots_per_cu;
+ adev->gfx.cu_info.lds_size = le16_to_cpu(gfx_info->v27.gc_lds_size);
+ return 0;
+ default:
+ return -EINVAL;
+ }
+ } else if (frev == 3) {
+ switch (crev) {
+ case 0:
+ adev->gfx.config.max_shader_engines = gfx_info->v30.max_shader_engines;
+ adev->gfx.config.max_cu_per_sh = gfx_info->v30.max_cu_per_sh;
+ adev->gfx.config.max_sh_per_se = gfx_info->v30.max_sh_per_se;
+ adev->gfx.config.max_backends_per_se = gfx_info->v30.max_backends_per_se;
+ adev->gfx.config.max_texture_channel_caches = gfx_info->v30.max_texture_channel_caches;
+ return 0;
+ default:
+ return -EINVAL;
+ }
+ } else {
return -EINVAL;
}
@@ -523,67 +891,19 @@ int amdgpu_atomfirmware_get_gfx_info(struct amdgpu_device *adev)
}
/*
- * Check if VBIOS supports GDDR6 training data save/restore
+ * Helper function to query two stage mem training capability
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * Return true if two stage mem training is supported or false if not
*/
-static bool gddr6_mem_train_vbios_support(struct amdgpu_device *adev)
+bool amdgpu_atomfirmware_mem_training_supported(struct amdgpu_device *adev)
{
- uint16_t data_offset;
- int index;
+ u32 fw_cap;
- index = get_index_into_master_table(atom_master_list_of_data_tables_v2_1,
- firmwareinfo);
- if (amdgpu_atom_parse_data_header(adev->mode_info.atom_context, index, NULL,
- NULL, NULL, &data_offset)) {
- struct atom_firmware_info_v3_1 *firmware_info =
- (struct atom_firmware_info_v3_1 *)(adev->mode_info.atom_context->bios +
- data_offset);
-
- DRM_DEBUG("atom firmware capability:0x%08x.\n",
- le32_to_cpu(firmware_info->firmware_capability));
-
- if (le32_to_cpu(firmware_info->firmware_capability) &
- ATOM_FIRMWARE_CAP_ENABLE_2STAGE_BIST_TRAINING)
- return true;
- }
-
- return false;
-}
-
-int amdgpu_mem_train_support(struct amdgpu_device *adev)
-{
- int ret;
- uint32_t major, minor, revision, hw_v;
-
- if (gddr6_mem_train_vbios_support(adev)) {
- amdgpu_discovery_get_ip_version(adev, MP0_HWID, &major, &minor, &revision);
- hw_v = HW_REV(major, minor, revision);
- /*
- * treat 0 revision as a special case since register for MP0 and MMHUB is missing
- * for some Navi10 A0, preventing driver from discovering the hwip information since
- * none of the functions will be initialized, it should not cause any problems
- */
- switch (hw_v) {
- case HW_REV(11, 0, 0):
- case HW_REV(11, 0, 5):
- case HW_REV(11, 0, 7):
- case HW_REV(11, 0, 11):
- case HW_REV(11, 0, 12):
- ret = 1;
- break;
- default:
- DRM_ERROR("memory training vbios supports but psp hw(%08x)"
- " doesn't support!\n", hw_v);
- ret = -1;
- break;
- }
- } else {
- ret = 0;
- hw_v = -1;
- }
+ fw_cap = adev->mode_info.firmware_flags;
-
- DRM_DEBUG("mp0 hw_v %08x, ret:%d.\n", hw_v, ret);
- return ret;
+ return (fw_cap & ATOM_FIRMWARE_CAP_ENABLE_2STAGE_BIST_TRAINING) ? true : false;
}
int amdgpu_atomfirmware_get_fw_reserved_fb_size(struct amdgpu_device *adev)
@@ -605,7 +925,7 @@ int amdgpu_atomfirmware_get_fw_reserved_fb_size(struct amdgpu_device *adev)
firmware_info = (union firmware_info *)(ctx->bios + data_offset);
- if (frev !=3)
+ if (frev != 3)
return -EINVAL;
switch (crev) {
@@ -613,6 +933,10 @@ int amdgpu_atomfirmware_get_fw_reserved_fb_size(struct amdgpu_device *adev)
fw_reserved_fb_size =
(firmware_info->v34.fw_reserved_size_in_kb << 10);
break;
+ case 5:
+ fw_reserved_fb_size =
+ (firmware_info->v35.fw_reserved_size_in_kb << 10);
+ break;
default:
fw_reserved_fb_size = 0;
break;
@@ -620,3 +944,68 @@ int amdgpu_atomfirmware_get_fw_reserved_fb_size(struct amdgpu_device *adev)
return fw_reserved_fb_size;
}
+
+/*
+ * Helper function to execute asic_init table
+ *
+ * @adev: amdgpu_device pointer
+ * @fb_reset: flag to indicate whether fb is reset or not
+ *
+ * Return 0 if succeed, otherwise failed
+ */
+int amdgpu_atomfirmware_asic_init(struct amdgpu_device *adev, bool fb_reset)
+{
+ struct amdgpu_mode_info *mode_info = &adev->mode_info;
+ struct atom_context *ctx;
+ uint8_t frev, crev;
+ uint16_t data_offset;
+ uint32_t bootup_sclk_in10khz, bootup_mclk_in10khz;
+ struct asic_init_ps_allocation_v2_1 asic_init_ps_v2_1;
+ int index;
+
+ if (!mode_info)
+ return -EINVAL;
+
+ ctx = mode_info->atom_context;
+ if (!ctx)
+ return -EINVAL;
+
+ /* query bootup sclk/mclk from firmware_info table */
+ index = get_index_into_master_table(atom_master_list_of_data_tables_v2_1,
+ firmwareinfo);
+ if (amdgpu_atom_parse_data_header(ctx, index, NULL,
+ &frev, &crev, &data_offset)) {
+ union firmware_info *firmware_info =
+ (union firmware_info *)(ctx->bios +
+ data_offset);
+
+ bootup_sclk_in10khz =
+ le32_to_cpu(firmware_info->v31.bootup_sclk_in10khz);
+ bootup_mclk_in10khz =
+ le32_to_cpu(firmware_info->v31.bootup_mclk_in10khz);
+ } else {
+ return -EINVAL;
+ }
+
+ index = get_index_into_master_table(atom_master_list_of_command_functions_v2_1,
+ asic_init);
+ if (amdgpu_atom_parse_cmd_header(mode_info->atom_context, index, &frev, &crev)) {
+ if (frev == 2 && crev >= 1) {
+ memset(&asic_init_ps_v2_1, 0, sizeof(asic_init_ps_v2_1));
+ asic_init_ps_v2_1.param.engineparam.sclkfreqin10khz = bootup_sclk_in10khz;
+ asic_init_ps_v2_1.param.memparam.mclkfreqin10khz = bootup_mclk_in10khz;
+ asic_init_ps_v2_1.param.engineparam.engineflag = b3NORMAL_ENGINE_INIT;
+ if (!fb_reset)
+ asic_init_ps_v2_1.param.memparam.memflag = b3DRAM_SELF_REFRESH_EXIT;
+ else
+ asic_init_ps_v2_1.param.memparam.memflag = 0;
+ } else {
+ return -EINVAL;
+ }
+ } else {
+ return -EINVAL;
+ }
+
+ return amdgpu_atom_execute_table(ctx, ATOM_CMD_INIT, (uint32_t *)&asic_init_ps_v2_1,
+ sizeof(asic_init_ps_v2_1));
+}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.h
index 9f0d4356e8df..649b5530d8ae 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.h
@@ -26,7 +26,8 @@
#define get_index_into_master_table(master_table, table_name) (offsetof(struct master_table, table_name) / sizeof(uint16_t))
-bool amdgpu_atomfirmware_gpu_supports_virtualization(struct amdgpu_device *adev);
+uint32_t amdgpu_atomfirmware_query_firmware_capability(struct amdgpu_device *adev);
+bool amdgpu_atomfirmware_gpu_virtualization_supported(struct amdgpu_device *adev);
void amdgpu_atomfirmware_scratch_regs_init(struct amdgpu_device *adev);
int amdgpu_atomfirmware_allocate_fb_scratch(struct amdgpu_device *adev);
int amdgpu_atomfirmware_get_vram_info(struct amdgpu_device *adev,
@@ -35,7 +36,10 @@ int amdgpu_atomfirmware_get_clock_info(struct amdgpu_device *adev);
int amdgpu_atomfirmware_get_gfx_info(struct amdgpu_device *adev);
bool amdgpu_atomfirmware_mem_ecc_supported(struct amdgpu_device *adev);
bool amdgpu_atomfirmware_sram_ecc_supported(struct amdgpu_device *adev);
+bool amdgpu_atomfirmware_ras_rom_addr(struct amdgpu_device *adev, uint8_t *i2c_address);
+bool amdgpu_atomfirmware_mem_training_supported(struct amdgpu_device *adev);
+bool amdgpu_atomfirmware_dynamic_boot_config_supported(struct amdgpu_device *adev);
int amdgpu_atomfirmware_get_fw_reserved_fb_size(struct amdgpu_device *adev);
-int amdgpu_mem_train_support(struct amdgpu_device *adev);
+int amdgpu_atomfirmware_asic_init(struct amdgpu_device *adev, bool fb_reset);
#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_atpx_handler.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_atpx_handler.c
index 7abe9500c0c6..3893e6fc2f03 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_atpx_handler.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_atpx_handler.c
@@ -11,6 +11,7 @@
#include <linux/pci.h>
#include <linux/delay.h>
+#include "amdgpu.h"
#include "amd_acpi.h"
#define AMDGPU_PX_QUIRK_FORCE_ATPX (1 << 0)
@@ -73,28 +74,21 @@ struct atpx_mux {
u16 mux;
} __packed;
-bool amdgpu_has_atpx(void) {
+bool amdgpu_has_atpx(void)
+{
return amdgpu_atpx_priv.atpx_detected;
}
-bool amdgpu_has_atpx_dgpu_power_cntl(void) {
+bool amdgpu_has_atpx_dgpu_power_cntl(void)
+{
return amdgpu_atpx_priv.atpx.functions.power_cntl;
}
-bool amdgpu_is_atpx_hybrid(void) {
+bool amdgpu_is_atpx_hybrid(void)
+{
return amdgpu_atpx_priv.atpx.is_hybrid;
}
-bool amdgpu_atpx_dgpu_req_power_for_displays(void) {
- return amdgpu_atpx_priv.atpx.dgpu_req_power_for_displays;
-}
-
-#if defined(CONFIG_ACPI)
-void *amdgpu_atpx_get_dhandle(void) {
- return amdgpu_atpx_priv.dhandle;
-}
-#endif
-
/**
* amdgpu_atpx_call - call an ATPX method
*
@@ -133,7 +127,7 @@ static union acpi_object *amdgpu_atpx_call(acpi_handle handle, int function,
/* Fail only if calling the method fails and ATPX is supported */
if (ACPI_FAILURE(status) && status != AE_NOT_FOUND) {
- printk("failed to evaluate ATPX got %s\n",
+ pr_err("failed to evaluate ATPX got %s\n",
acpi_format_exception(status));
kfree(buffer.pointer);
return NULL;
@@ -165,7 +159,7 @@ static void amdgpu_atpx_parse_functions(struct amdgpu_atpx_functions *f, u32 mas
}
/**
- * amdgpu_atpx_validate_functions - validate ATPX functions
+ * amdgpu_atpx_validate - validate ATPX functions
*
* @atpx: amdgpu atpx struct
*
@@ -189,7 +183,7 @@ static int amdgpu_atpx_validate(struct amdgpu_atpx *atpx)
size = *(u16 *) info->buffer.pointer;
if (size < 10) {
- printk("ATPX buffer is too small: %zu\n", size);
+ pr_err("ATPX buffer is too small: %zu\n", size);
kfree(info);
return -EINVAL;
}
@@ -222,11 +216,11 @@ static int amdgpu_atpx_validate(struct amdgpu_atpx *atpx)
atpx->is_hybrid = false;
if (valid_bits & ATPX_MS_HYBRID_GFX_SUPPORTED) {
if (amdgpu_atpx_priv.quirks & AMDGPU_PX_QUIRK_FORCE_ATPX) {
- printk("ATPX Hybrid Graphics, forcing to ATPX\n");
+ pr_warn("ATPX Hybrid Graphics, forcing to ATPX\n");
atpx->functions.power_cntl = true;
atpx->is_hybrid = false;
} else {
- printk("ATPX Hybrid Graphics\n");
+ pr_notice("ATPX Hybrid Graphics\n");
/*
* Disable legacy PM methods only when pcie port PM is usable,
* otherwise the device might fail to power off or power on.
@@ -268,7 +262,7 @@ static int amdgpu_atpx_verify_interface(struct amdgpu_atpx *atpx)
size = *(u16 *) info->buffer.pointer;
if (size < 8) {
- printk("ATPX buffer is too small: %zu\n", size);
+ pr_err("ATPX buffer is too small: %zu\n", size);
err = -EINVAL;
goto out;
}
@@ -277,8 +271,8 @@ static int amdgpu_atpx_verify_interface(struct amdgpu_atpx *atpx)
memcpy(&output, info->buffer.pointer, size);
/* TODO: check version? */
- printk("ATPX version %u, functions 0x%08x\n",
- output.version, output.function_bits);
+ pr_notice("ATPX version %u, functions 0x%08x\n",
+ output.version, output.function_bits);
amdgpu_atpx_parse_functions(&atpx->functions, output.function_bits);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_benchmark.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_benchmark.c
index d9b35df33806..199693369c7c 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_benchmark.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_benchmark.c
@@ -29,18 +29,17 @@
#define AMDGPU_BENCHMARK_COMMON_MODES_N 17
static int amdgpu_benchmark_do_move(struct amdgpu_device *adev, unsigned size,
- uint64_t saddr, uint64_t daddr, int n)
+ uint64_t saddr, uint64_t daddr, int n, s64 *time_ms)
{
- unsigned long start_jiffies;
- unsigned long end_jiffies;
+ ktime_t stime, etime;
struct dma_fence *fence;
int i, r;
- start_jiffies = jiffies;
+ stime = ktime_get();
for (i = 0; i < n; i++) {
struct amdgpu_ring *ring = adev->mman.buffer_funcs_ring;
r = amdgpu_copy_buffer(ring, saddr, daddr, size, NULL, &fence,
- false, false, false);
+ false, false, 0);
if (r)
goto exit_do_move;
r = dma_fence_wait(fence, false);
@@ -48,118 +47,81 @@ static int amdgpu_benchmark_do_move(struct amdgpu_device *adev, unsigned size,
if (r)
goto exit_do_move;
}
- end_jiffies = jiffies;
- r = jiffies_to_msecs(end_jiffies - start_jiffies);
exit_do_move:
+ etime = ktime_get();
+ *time_ms = ktime_ms_delta(etime, stime);
+
return r;
}
-static void amdgpu_benchmark_log_results(int n, unsigned size,
- unsigned int time,
+static void amdgpu_benchmark_log_results(struct amdgpu_device *adev,
+ int n, unsigned size,
+ s64 time_ms,
unsigned sdomain, unsigned ddomain,
char *kind)
{
- unsigned int throughput = (n * (size >> 10)) / time;
- DRM_INFO("amdgpu: %s %u bo moves of %u kB from"
- " %d to %d in %u ms, throughput: %u Mb/s or %u MB/s\n",
- kind, n, size >> 10, sdomain, ddomain, time,
+ s64 throughput = (n * (size >> 10));
+
+ throughput = div64_s64(throughput, time_ms);
+
+ dev_info(adev->dev, "amdgpu: %s %u bo moves of %u kB from"
+ " %d to %d in %lld ms, throughput: %lld Mb/s or %lld MB/s\n",
+ kind, n, size >> 10, sdomain, ddomain, time_ms,
throughput * 8, throughput);
}
-static void amdgpu_benchmark_move(struct amdgpu_device *adev, unsigned size,
- unsigned sdomain, unsigned ddomain)
+static int amdgpu_benchmark_move(struct amdgpu_device *adev, unsigned size,
+ unsigned sdomain, unsigned ddomain)
{
struct amdgpu_bo *dobj = NULL;
struct amdgpu_bo *sobj = NULL;
- struct amdgpu_bo_param bp;
uint64_t saddr, daddr;
+ s64 time_ms;
int r, n;
- int time;
-
- memset(&bp, 0, sizeof(bp));
- bp.size = size;
- bp.byte_align = PAGE_SIZE;
- bp.domain = sdomain;
- bp.flags = 0;
- bp.type = ttm_bo_type_kernel;
- bp.resv = NULL;
+
n = AMDGPU_BENCHMARK_ITERATIONS;
- r = amdgpu_bo_create(adev, &bp, &sobj);
- if (r) {
- goto out_cleanup;
- }
- r = amdgpu_bo_reserve(sobj, false);
- if (unlikely(r != 0))
- goto out_cleanup;
- r = amdgpu_bo_pin(sobj, sdomain);
- if (r) {
- amdgpu_bo_unreserve(sobj);
- goto out_cleanup;
- }
- r = amdgpu_ttm_alloc_gart(&sobj->tbo);
- amdgpu_bo_unreserve(sobj);
- if (r) {
- goto out_cleanup;
- }
- saddr = amdgpu_bo_gpu_offset(sobj);
- bp.domain = ddomain;
- r = amdgpu_bo_create(adev, &bp, &dobj);
- if (r) {
- goto out_cleanup;
- }
- r = amdgpu_bo_reserve(dobj, false);
- if (unlikely(r != 0))
- goto out_cleanup;
- r = amdgpu_bo_pin(dobj, ddomain);
- if (r) {
- amdgpu_bo_unreserve(sobj);
+
+ r = amdgpu_bo_create_kernel(adev, size,
+ PAGE_SIZE, sdomain,
+ &sobj,
+ &saddr,
+ NULL);
+ if (r)
goto out_cleanup;
- }
- r = amdgpu_ttm_alloc_gart(&dobj->tbo);
- amdgpu_bo_unreserve(dobj);
- if (r) {
+ r = amdgpu_bo_create_kernel(adev, size,
+ PAGE_SIZE, ddomain,
+ &dobj,
+ &daddr,
+ NULL);
+ if (r)
goto out_cleanup;
- }
- daddr = amdgpu_bo_gpu_offset(dobj);
if (adev->mman.buffer_funcs) {
- time = amdgpu_benchmark_do_move(adev, size, saddr, daddr, n);
- if (time < 0)
+ r = amdgpu_benchmark_do_move(adev, size, saddr, daddr, n, &time_ms);
+ if (r)
goto out_cleanup;
- if (time > 0)
- amdgpu_benchmark_log_results(n, size, time,
+ else
+ amdgpu_benchmark_log_results(adev, n, size, time_ms,
sdomain, ddomain, "dma");
}
out_cleanup:
/* Check error value now. The value can be overwritten when clean up.*/
- if (r) {
- DRM_ERROR("Error while benchmarking BO move.\n");
- }
+ if (r < 0)
+ dev_info(adev->dev, "Error while benchmarking BO move.\n");
- if (sobj) {
- r = amdgpu_bo_reserve(sobj, true);
- if (likely(r == 0)) {
- amdgpu_bo_unpin(sobj);
- amdgpu_bo_unreserve(sobj);
- }
- amdgpu_bo_unref(&sobj);
- }
- if (dobj) {
- r = amdgpu_bo_reserve(dobj, true);
- if (likely(r == 0)) {
- amdgpu_bo_unpin(dobj);
- amdgpu_bo_unreserve(dobj);
- }
- amdgpu_bo_unref(&dobj);
- }
+ if (sobj)
+ amdgpu_bo_free_kernel(&sobj, &saddr, NULL);
+ if (dobj)
+ amdgpu_bo_free_kernel(&dobj, &daddr, NULL);
+ return r;
}
-void amdgpu_benchmark(struct amdgpu_device *adev, int test_number)
+int amdgpu_benchmark(struct amdgpu_device *adev, int test_number)
{
- int i;
+ int i, r;
static const int common_modes[AMDGPU_BENCHMARK_COMMON_MODES_N] = {
640 * 480 * 4,
720 * 480 * 4,
@@ -180,63 +142,119 @@ void amdgpu_benchmark(struct amdgpu_device *adev, int test_number)
1920 * 1200 * 4
};
+ mutex_lock(&adev->benchmark_mutex);
switch (test_number) {
case 1:
+ dev_info(adev->dev,
+ "benchmark test: %d (simple test, VRAM to GTT and GTT to VRAM)\n",
+ test_number);
/* simple test, VRAM to GTT and GTT to VRAM */
- amdgpu_benchmark_move(adev, 1024*1024, AMDGPU_GEM_DOMAIN_GTT,
- AMDGPU_GEM_DOMAIN_VRAM);
- amdgpu_benchmark_move(adev, 1024*1024, AMDGPU_GEM_DOMAIN_VRAM,
- AMDGPU_GEM_DOMAIN_GTT);
+ r = amdgpu_benchmark_move(adev, 1024*1024, AMDGPU_GEM_DOMAIN_GTT,
+ AMDGPU_GEM_DOMAIN_VRAM);
+ if (r)
+ goto done;
+ r = amdgpu_benchmark_move(adev, 1024*1024, AMDGPU_GEM_DOMAIN_VRAM,
+ AMDGPU_GEM_DOMAIN_GTT);
+ if (r)
+ goto done;
break;
case 2:
+ dev_info(adev->dev,
+ "benchmark test: %d (simple test, VRAM to VRAM)\n",
+ test_number);
/* simple test, VRAM to VRAM */
- amdgpu_benchmark_move(adev, 1024*1024, AMDGPU_GEM_DOMAIN_VRAM,
- AMDGPU_GEM_DOMAIN_VRAM);
+ r = amdgpu_benchmark_move(adev, 1024*1024, AMDGPU_GEM_DOMAIN_VRAM,
+ AMDGPU_GEM_DOMAIN_VRAM);
+ if (r)
+ goto done;
break;
case 3:
+ dev_info(adev->dev,
+ "benchmark test: %d (GTT to VRAM, buffer size sweep, powers of 2)\n",
+ test_number);
/* GTT to VRAM, buffer size sweep, powers of 2 */
- for (i = 1; i <= 16384; i <<= 1)
- amdgpu_benchmark_move(adev, i * AMDGPU_GPU_PAGE_SIZE,
- AMDGPU_GEM_DOMAIN_GTT,
- AMDGPU_GEM_DOMAIN_VRAM);
+ for (i = 1; i <= 16384; i <<= 1) {
+ r = amdgpu_benchmark_move(adev, i * AMDGPU_GPU_PAGE_SIZE,
+ AMDGPU_GEM_DOMAIN_GTT,
+ AMDGPU_GEM_DOMAIN_VRAM);
+ if (r)
+ goto done;
+ }
break;
case 4:
+ dev_info(adev->dev,
+ "benchmark test: %d (VRAM to GTT, buffer size sweep, powers of 2)\n",
+ test_number);
/* VRAM to GTT, buffer size sweep, powers of 2 */
- for (i = 1; i <= 16384; i <<= 1)
- amdgpu_benchmark_move(adev, i * AMDGPU_GPU_PAGE_SIZE,
- AMDGPU_GEM_DOMAIN_VRAM,
- AMDGPU_GEM_DOMAIN_GTT);
+ for (i = 1; i <= 16384; i <<= 1) {
+ r = amdgpu_benchmark_move(adev, i * AMDGPU_GPU_PAGE_SIZE,
+ AMDGPU_GEM_DOMAIN_VRAM,
+ AMDGPU_GEM_DOMAIN_GTT);
+ if (r)
+ goto done;
+ }
break;
case 5:
+ dev_info(adev->dev,
+ "benchmark test: %d (VRAM to VRAM, buffer size sweep, powers of 2)\n",
+ test_number);
/* VRAM to VRAM, buffer size sweep, powers of 2 */
- for (i = 1; i <= 16384; i <<= 1)
- amdgpu_benchmark_move(adev, i * AMDGPU_GPU_PAGE_SIZE,
- AMDGPU_GEM_DOMAIN_VRAM,
- AMDGPU_GEM_DOMAIN_VRAM);
+ for (i = 1; i <= 16384; i <<= 1) {
+ r = amdgpu_benchmark_move(adev, i * AMDGPU_GPU_PAGE_SIZE,
+ AMDGPU_GEM_DOMAIN_VRAM,
+ AMDGPU_GEM_DOMAIN_VRAM);
+ if (r)
+ goto done;
+ }
break;
case 6:
+ dev_info(adev->dev,
+ "benchmark test: %d (GTT to VRAM, buffer size sweep, common modes)\n",
+ test_number);
/* GTT to VRAM, buffer size sweep, common modes */
- for (i = 0; i < AMDGPU_BENCHMARK_COMMON_MODES_N; i++)
- amdgpu_benchmark_move(adev, common_modes[i],
- AMDGPU_GEM_DOMAIN_GTT,
- AMDGPU_GEM_DOMAIN_VRAM);
+ for (i = 0; i < AMDGPU_BENCHMARK_COMMON_MODES_N; i++) {
+ r = amdgpu_benchmark_move(adev, common_modes[i],
+ AMDGPU_GEM_DOMAIN_GTT,
+ AMDGPU_GEM_DOMAIN_VRAM);
+ if (r)
+ goto done;
+ }
break;
case 7:
+ dev_info(adev->dev,
+ "benchmark test: %d (VRAM to GTT, buffer size sweep, common modes)\n",
+ test_number);
/* VRAM to GTT, buffer size sweep, common modes */
- for (i = 0; i < AMDGPU_BENCHMARK_COMMON_MODES_N; i++)
- amdgpu_benchmark_move(adev, common_modes[i],
- AMDGPU_GEM_DOMAIN_VRAM,
- AMDGPU_GEM_DOMAIN_GTT);
+ for (i = 0; i < AMDGPU_BENCHMARK_COMMON_MODES_N; i++) {
+ r = amdgpu_benchmark_move(adev, common_modes[i],
+ AMDGPU_GEM_DOMAIN_VRAM,
+ AMDGPU_GEM_DOMAIN_GTT);
+ if (r)
+ goto done;
+ }
break;
case 8:
+ dev_info(adev->dev,
+ "benchmark test: %d (VRAM to VRAM, buffer size sweep, common modes)\n",
+ test_number);
/* VRAM to VRAM, buffer size sweep, common modes */
- for (i = 0; i < AMDGPU_BENCHMARK_COMMON_MODES_N; i++)
- amdgpu_benchmark_move(adev, common_modes[i],
+ for (i = 0; i < AMDGPU_BENCHMARK_COMMON_MODES_N; i++) {
+ r = amdgpu_benchmark_move(adev, common_modes[i],
AMDGPU_GEM_DOMAIN_VRAM,
AMDGPU_GEM_DOMAIN_VRAM);
+ if (r)
+ goto done;
+ }
break;
default:
- DRM_ERROR("Unknown benchmark\n");
+ dev_info(adev->dev, "Unknown benchmark %d\n", test_number);
+ r = -EINVAL;
+ break;
}
+
+done:
+ mutex_unlock(&adev->benchmark_mutex);
+
+ return r;
}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_bios.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_bios.c
index cfb1a9a04477..35d04e69aec0 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_bios.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_bios.c
@@ -29,6 +29,7 @@
#include "amdgpu.h"
#include "atom.h"
+#include <linux/device.h>
#include <linux/pci.h>
#include <linux/slab.h>
#include <linux/acpi.h>
@@ -46,75 +47,104 @@
/* Check if current bios is an ATOM BIOS.
* Return true if it is ATOM BIOS. Otherwise, return false.
*/
-static bool check_atom_bios(uint8_t *bios, size_t size)
+static bool check_atom_bios(struct amdgpu_device *adev, size_t size)
{
uint16_t tmp, bios_header_start;
+ uint8_t *bios = adev->bios;
if (!bios || size < 0x49) {
- DRM_INFO("vbios mem is null or mem size is wrong\n");
+ dev_dbg(adev->dev, "VBIOS mem is null or mem size is wrong\n");
return false;
}
if (!AMD_IS_VALID_VBIOS(bios)) {
- DRM_INFO("BIOS signature incorrect %x %x\n", bios[0], bios[1]);
+ dev_dbg(adev->dev, "VBIOS signature incorrect %x %x\n", bios[0],
+ bios[1]);
return false;
}
bios_header_start = bios[0x48] | (bios[0x49] << 8);
if (!bios_header_start) {
- DRM_INFO("Can't locate bios header\n");
+ dev_dbg(adev->dev, "Can't locate VBIOS header\n");
return false;
}
tmp = bios_header_start + 4;
if (size < tmp) {
- DRM_INFO("BIOS header is broken\n");
+ dev_dbg(adev->dev, "VBIOS header is broken\n");
return false;
}
if (!memcmp(bios + tmp, "ATOM", 4) ||
!memcmp(bios + tmp, "MOTA", 4)) {
- DRM_DEBUG("ATOMBIOS detected\n");
+ dev_dbg(adev->dev, "ATOMBIOS detected\n");
return true;
}
return false;
}
+void amdgpu_bios_release(struct amdgpu_device *adev)
+{
+ kfree(adev->bios);
+ adev->bios = NULL;
+ adev->bios_size = 0;
+}
+
/* If you boot an IGP board with a discrete card as the primary,
* the IGP rom is not accessible via the rom bar as the IGP rom is
* part of the system bios. On boot, the system bios puts a
* copy of the igp rom at the start of vram if a discrete card is
* present.
+ * For SR-IOV, if dynamic critical region is not enabled,
+ * the vbios image is also put at the start of VRAM in the VF.
*/
-static bool igp_read_bios_from_vram(struct amdgpu_device *adev)
+static bool amdgpu_read_bios_from_vram(struct amdgpu_device *adev)
{
- uint8_t __iomem *bios;
+ uint8_t __iomem *bios = NULL;
resource_size_t vram_base;
- resource_size_t size = 256 * 1024; /* ??? */
+ u32 size = 256U * 1024U; /* ??? */
if (!(adev->flags & AMD_IS_APU))
if (amdgpu_device_need_post(adev))
return false;
+ /* FB BAR not enabled */
+ if (pci_resource_len(adev->pdev, 0) == 0)
+ return false;
+
adev->bios = NULL;
vram_base = pci_resource_start(adev->pdev, 0);
- bios = ioremap_wc(vram_base, size);
- if (!bios) {
- return false;
- }
adev->bios = kmalloc(size, GFP_KERNEL);
- if (!adev->bios) {
- iounmap(bios);
+ if (!adev->bios)
return false;
+
+ /* For SRIOV with dynamic critical region is enabled,
+ * the vbios image is put at a dynamic offset of VRAM in the VF.
+ * If dynamic critical region is disabled, follow the existing logic as on baremetal.
+ */
+ if (amdgpu_sriov_vf(adev) && adev->virt.is_dynamic_crit_regn_enabled) {
+ if (amdgpu_virt_get_dynamic_data_info(adev,
+ AMD_SRIOV_MSG_VBIOS_IMG_TABLE_ID, adev->bios, &size)) {
+ amdgpu_bios_release(adev);
+ return false;
+ }
+ } else {
+ bios = ioremap_wc(vram_base, size);
+ if (!bios) {
+ amdgpu_bios_release(adev);
+ return false;
+ }
+
+ memcpy_fromio(adev->bios, bios, size);
+ iounmap(bios);
}
+
adev->bios_size = size;
- memcpy_fromio(adev->bios, bios, size);
- iounmap(bios);
- if (!check_atom_bios(adev->bios, size)) {
- kfree(adev->bios);
+ if (!check_atom_bios(adev, size)) {
+ amdgpu_bios_release(adev);
return false;
}
@@ -129,9 +159,8 @@ bool amdgpu_read_bios(struct amdgpu_device *adev)
adev->bios = NULL;
/* XXX: some cards may return 0 for rom size? ddx has a workaround */
bios = pci_map_rom(adev->pdev, &size);
- if (!bios) {
+ if (!bios)
return false;
- }
adev->bios = kzalloc(size, GFP_KERNEL);
if (adev->bios == NULL) {
@@ -142,8 +171,8 @@ bool amdgpu_read_bios(struct amdgpu_device *adev)
memcpy_fromio(adev->bios, bios, size);
pci_unmap_rom(adev->pdev, bios);
- if (!check_atom_bios(adev->bios, size)) {
- kfree(adev->bios);
+ if (!check_atom_bios(adev, size)) {
+ amdgpu_bios_release(adev);
return false;
}
@@ -164,9 +193,9 @@ static bool amdgpu_read_bios_from_rom(struct amdgpu_device *adev)
header[AMD_VBIOS_SIGNATURE_END] = 0;
if ((!AMD_IS_VALID_VBIOS(header)) ||
- 0 != memcmp((char *)&header[AMD_VBIOS_SIGNATURE_OFFSET],
- AMD_VBIOS_SIGNATURE,
- strlen(AMD_VBIOS_SIGNATURE)))
+ memcmp((char *)&header[AMD_VBIOS_SIGNATURE_OFFSET],
+ AMD_VBIOS_SIGNATURE,
+ strlen(AMD_VBIOS_SIGNATURE)) != 0)
return false;
/* valid vbios, go on */
@@ -182,8 +211,8 @@ static bool amdgpu_read_bios_from_rom(struct amdgpu_device *adev)
/* read complete BIOS */
amdgpu_asic_read_bios_from_rom(adev, adev->bios, len);
- if (!check_atom_bios(adev->bios, len)) {
- kfree(adev->bios);
+ if (!check_atom_bios(adev, len)) {
+ amdgpu_bios_release(adev);
return false;
}
@@ -212,14 +241,15 @@ static bool amdgpu_read_platform_bios(struct amdgpu_device *adev)
memcpy_fromio(adev->bios, bios, romlen);
iounmap(bios);
- if (!check_atom_bios(adev->bios, romlen))
+ if (!check_atom_bios(adev, romlen))
goto free_bios;
adev->bios_size = romlen;
return true;
free_bios:
- kfree(adev->bios);
+ amdgpu_bios_release(adev);
+
return false;
}
@@ -260,7 +290,7 @@ static int amdgpu_atrm_call(acpi_handle atrm_handle, uint8_t *bios,
status = acpi_evaluate_object(atrm_handle, NULL, &atrm_arg, &buffer);
if (ACPI_FAILURE(status)) {
- printk("failed to evaluate ATRM got %s\n", acpi_format_exception(status));
+ DRM_ERROR("failed to evaluate ATRM got %s\n", acpi_format_exception(status));
return -ENODEV;
}
@@ -281,11 +311,15 @@ static bool amdgpu_atrm_get_bios(struct amdgpu_device *adev)
acpi_status status;
bool found = false;
- /* ATRM is for the discrete card only */
- if (adev->flags & AMD_IS_APU)
+ /* ATRM is for on-platform devices only */
+ if (dev_is_removable(&adev->pdev->dev))
return false;
- while ((pdev = pci_get_class(PCI_CLASS_DISPLAY_VGA << 8, pdev)) != NULL) {
+ while ((pdev = pci_get_base_class(PCI_BASE_CLASS_DISPLAY, pdev))) {
+ if ((pdev->class != PCI_CLASS_DISPLAY_VGA << 8) &&
+ (pdev->class != PCI_CLASS_DISPLAY_OTHER << 8))
+ continue;
+
dhandle = ACPI_HANDLE(&pdev->dev);
if (!dhandle)
continue;
@@ -297,26 +331,13 @@ static bool amdgpu_atrm_get_bios(struct amdgpu_device *adev)
}
}
- if (!found) {
- while ((pdev = pci_get_class(PCI_CLASS_DISPLAY_OTHER << 8, pdev)) != NULL) {
- dhandle = ACPI_HANDLE(&pdev->dev);
- if (!dhandle)
- continue;
-
- status = acpi_get_handle(dhandle, "ATRM", &atrm_handle);
- if (ACPI_SUCCESS(status)) {
- found = true;
- break;
- }
- }
- }
-
if (!found)
return false;
+ pci_dev_put(pdev);
adev->bios = kmalloc(size, GFP_KERNEL);
if (!adev->bios) {
- DRM_ERROR("Unable to allocate bios\n");
+ dev_err(adev->dev, "Unable to allocate bios\n");
return false;
}
@@ -329,8 +350,8 @@ static bool amdgpu_atrm_get_bios(struct amdgpu_device *adev)
break;
}
- if (!check_atom_bios(adev->bios, size)) {
- kfree(adev->bios);
+ if (!check_atom_bios(adev, size)) {
+ amdgpu_bios_release(adev);
return false;
}
adev->bios_size = size;
@@ -345,11 +366,8 @@ static inline bool amdgpu_atrm_get_bios(struct amdgpu_device *adev)
static bool amdgpu_read_disabled_bios(struct amdgpu_device *adev)
{
- if (adev->flags & AMD_IS_APU)
- return igp_read_bios_from_vram(adev);
- else
- return (!adev->asic_funcs || !adev->asic_funcs->read_disabled_bios) ?
- false : amdgpu_asic_read_disabled_bios(adev);
+ return (!adev->asic_funcs || !adev->asic_funcs->read_disabled_bios) ?
+ false : amdgpu_asic_read_disabled_bios(adev);
}
#ifdef CONFIG_ACPI
@@ -358,13 +376,13 @@ static bool amdgpu_acpi_vfct_bios(struct amdgpu_device *adev)
struct acpi_table_header *hdr;
acpi_size tbl_size;
UEFI_ACPI_VFCT *vfct;
- unsigned offset;
+ unsigned int offset;
if (!ACPI_SUCCESS(acpi_get_table("VFCT", 1, &hdr)))
return false;
tbl_size = hdr->length;
if (tbl_size < sizeof(UEFI_ACPI_VFCT)) {
- DRM_ERROR("ACPI VFCT table present but broken (too short #1)\n");
+ dev_info(adev->dev, "ACPI VFCT table present but broken (too short #1),skipping\n");
return false;
}
@@ -377,13 +395,13 @@ static bool amdgpu_acpi_vfct_bios(struct amdgpu_device *adev)
offset += sizeof(VFCT_IMAGE_HEADER);
if (offset > tbl_size) {
- DRM_ERROR("ACPI VFCT image header truncated\n");
+ dev_info(adev->dev, "ACPI VFCT image header truncated,skipping\n");
return false;
}
offset += vhdr->ImageLength;
if (offset > tbl_size) {
- DRM_ERROR("ACPI VFCT image truncated\n");
+ dev_info(adev->dev, "ACPI VFCT image truncated,skipping\n");
return false;
}
@@ -397,8 +415,8 @@ static bool amdgpu_acpi_vfct_bios(struct amdgpu_device *adev)
vhdr->ImageLength,
GFP_KERNEL);
- if (!check_atom_bios(adev->bios, vhdr->ImageLength)) {
- kfree(adev->bios);
+ if (!check_atom_bios(adev, vhdr->ImageLength)) {
+ amdgpu_bios_release(adev);
return false;
}
adev->bios_size = vhdr->ImageLength;
@@ -406,7 +424,7 @@ static bool amdgpu_acpi_vfct_bios(struct amdgpu_device *adev)
}
}
- DRM_ERROR("ACPI VFCT table present but broken (too short #2)\n");
+ dev_info(adev->dev, "ACPI VFCT table present but broken (too short #2),skipping\n");
return false;
}
#else
@@ -416,7 +434,43 @@ static inline bool amdgpu_acpi_vfct_bios(struct amdgpu_device *adev)
}
#endif
-bool amdgpu_get_bios(struct amdgpu_device *adev)
+static bool amdgpu_get_bios_apu(struct amdgpu_device *adev)
+{
+ if (amdgpu_acpi_vfct_bios(adev)) {
+ dev_info(adev->dev, "Fetched VBIOS from VFCT\n");
+ goto success;
+ }
+
+ if (amdgpu_read_bios_from_vram(adev)) {
+ dev_info(adev->dev, "Fetched VBIOS from VRAM BAR\n");
+ goto success;
+ }
+
+ if (amdgpu_read_bios(adev)) {
+ dev_info(adev->dev, "Fetched VBIOS from ROM BAR\n");
+ goto success;
+ }
+
+ if (amdgpu_read_platform_bios(adev)) {
+ dev_info(adev->dev, "Fetched VBIOS from platform\n");
+ goto success;
+ }
+
+ dev_err(adev->dev, "Unable to locate a BIOS ROM\n");
+ return false;
+
+success:
+ return true;
+}
+
+static bool amdgpu_prefer_rom_resource(struct amdgpu_device *adev)
+{
+ struct resource *res = &adev->pdev->resource[PCI_ROM_RESOURCE];
+
+ return (res->flags & IORESOURCE_ROM_SHADOW);
+}
+
+static bool amdgpu_get_bios_dgpu(struct amdgpu_device *adev)
{
if (amdgpu_atrm_get_bios(adev)) {
dev_info(adev->dev, "Fetched VBIOS from ATRM\n");
@@ -428,14 +482,33 @@ bool amdgpu_get_bios(struct amdgpu_device *adev)
goto success;
}
- if (igp_read_bios_from_vram(adev)) {
+ /* this is required for SR-IOV */
+ if (amdgpu_read_bios_from_vram(adev)) {
dev_info(adev->dev, "Fetched VBIOS from VRAM BAR\n");
goto success;
}
- if (amdgpu_read_bios(adev)) {
- dev_info(adev->dev, "Fetched VBIOS from ROM BAR\n");
- goto success;
+ if (amdgpu_prefer_rom_resource(adev)) {
+ if (amdgpu_read_bios(adev)) {
+ dev_info(adev->dev, "Fetched VBIOS from ROM BAR\n");
+ goto success;
+ }
+
+ if (amdgpu_read_platform_bios(adev)) {
+ dev_info(adev->dev, "Fetched VBIOS from platform\n");
+ goto success;
+ }
+
+ } else {
+ if (amdgpu_read_platform_bios(adev)) {
+ dev_info(adev->dev, "Fetched VBIOS from platform\n");
+ goto success;
+ }
+
+ if (amdgpu_read_bios(adev)) {
+ dev_info(adev->dev, "Fetched VBIOS from ROM BAR\n");
+ goto success;
+ }
}
if (amdgpu_read_bios_from_rom(adev)) {
@@ -448,15 +521,71 @@ bool amdgpu_get_bios(struct amdgpu_device *adev)
goto success;
}
- if (amdgpu_read_platform_bios(adev)) {
- dev_info(adev->dev, "Fetched VBIOS from platform\n");
- goto success;
- }
-
- DRM_ERROR("Unable to locate a BIOS ROM\n");
+ dev_err(adev->dev, "Unable to locate a BIOS ROM\n");
return false;
success:
- adev->is_atom_fw = (adev->asic_type >= CHIP_VEGA10) ? true : false;
+ return true;
+}
+
+bool amdgpu_get_bios(struct amdgpu_device *adev)
+{
+ bool found;
+
+ if (adev->flags & AMD_IS_APU)
+ found = amdgpu_get_bios_apu(adev);
+ else
+ found = amdgpu_get_bios_dgpu(adev);
+
+ if (found)
+ adev->is_atom_fw = adev->asic_type >= CHIP_VEGA10;
+
+ return found;
+}
+
+/* helper function for soc15 and onwards to read bios from rom */
+bool amdgpu_soc15_read_bios_from_rom(struct amdgpu_device *adev,
+ u8 *bios, u32 length_bytes)
+{
+ u32 *dw_ptr;
+ u32 i, length_dw;
+ u32 rom_offset;
+ u32 rom_index_offset;
+ u32 rom_data_offset;
+
+ if (bios == NULL)
+ return false;
+ if (length_bytes == 0)
+ return false;
+ /* APU vbios image is part of sbios image */
+ if (adev->flags & AMD_IS_APU)
+ return false;
+ if (!adev->smuio.funcs ||
+ !adev->smuio.funcs->get_rom_index_offset ||
+ !adev->smuio.funcs->get_rom_data_offset)
+ return false;
+
+ dw_ptr = (u32 *)bios;
+ length_dw = ALIGN(length_bytes, 4) / 4;
+
+ rom_index_offset =
+ adev->smuio.funcs->get_rom_index_offset(adev);
+ rom_data_offset =
+ adev->smuio.funcs->get_rom_data_offset(adev);
+
+ if (adev->nbio.funcs &&
+ adev->nbio.funcs->get_rom_offset) {
+ rom_offset = adev->nbio.funcs->get_rom_offset(adev);
+ rom_offset = rom_offset << 17;
+ } else {
+ rom_offset = 0;
+ }
+
+ /* set rom index to rom_offset */
+ WREG32(rom_index_offset, rom_offset);
+ /* read out the rom data */
+ for (i = 0; i < length_dw; i++)
+ dw_ptr[i] = RREG32(rom_data_offset);
+
return true;
}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c
index 15c45b2a3983..66fb37b64388 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c
@@ -28,6 +28,7 @@
* Christian König <deathsimple@vodafone.de>
*/
+#include <linux/sort.h>
#include <linux/uaccess.h>
#include "amdgpu.h"
@@ -40,7 +41,7 @@ static void amdgpu_bo_list_free_rcu(struct rcu_head *rcu)
{
struct amdgpu_bo_list *list = container_of(rcu, struct amdgpu_bo_list,
rhead);
-
+ mutex_destroy(&list->bo_list_mutex);
kvfree(list);
}
@@ -50,44 +51,41 @@ static void amdgpu_bo_list_free(struct kref *ref)
refcount);
struct amdgpu_bo_list_entry *e;
- amdgpu_bo_list_for_each_entry(e, list) {
- struct amdgpu_bo *bo = ttm_to_amdgpu_bo(e->tv.bo);
+ amdgpu_bo_list_for_each_entry(e, list)
+ amdgpu_bo_unref(&e->bo);
+ call_rcu(&list->rhead, amdgpu_bo_list_free_rcu);
+}
- amdgpu_bo_unref(&bo);
- }
+static int amdgpu_bo_list_entry_cmp(const void *_a, const void *_b)
+{
+ const struct amdgpu_bo_list_entry *a = _a, *b = _b;
- call_rcu(&list->rhead, amdgpu_bo_list_free_rcu);
+ if (a->priority > b->priority)
+ return 1;
+ if (a->priority < b->priority)
+ return -1;
+ return 0;
}
int amdgpu_bo_list_create(struct amdgpu_device *adev, struct drm_file *filp,
struct drm_amdgpu_bo_list_entry *info,
- unsigned num_entries, struct amdgpu_bo_list **result)
+ size_t num_entries, struct amdgpu_bo_list **result)
{
unsigned last_entry = 0, first_userptr = num_entries;
struct amdgpu_bo_list_entry *array;
struct amdgpu_bo_list *list;
uint64_t total_size = 0;
- size_t size;
unsigned i;
int r;
- if (num_entries > (SIZE_MAX - sizeof(struct amdgpu_bo_list))
- / sizeof(struct amdgpu_bo_list_entry))
- return -EINVAL;
-
- size = sizeof(struct amdgpu_bo_list);
- size += num_entries * sizeof(struct amdgpu_bo_list_entry);
- list = kvmalloc(size, GFP_KERNEL);
+ list = kvzalloc(struct_size(list, entries, num_entries), GFP_KERNEL);
if (!list)
return -ENOMEM;
kref_init(&list->refcount);
- list->gds_obj = NULL;
- list->gws_obj = NULL;
- list->oa_obj = NULL;
- array = amdgpu_bo_list_array_entry(list, 0);
- memset(array, 0, num_entries * sizeof(struct amdgpu_bo_list_entry));
+ list->num_entries = num_entries;
+ array = list->entries;
for (i = 0; i < num_entries; ++i) {
struct amdgpu_bo_list_entry *entry;
@@ -118,7 +116,7 @@ int amdgpu_bo_list_create(struct amdgpu_device *adev, struct drm_file *filp,
entry->priority = min(info[i].bo_priority,
AMDGPU_BO_LIST_MAX_PRIORITY);
- entry->tv.bo = &bo->tbo;
+ entry->bo = bo;
if (bo->preferred_domains == AMDGPU_GEM_DOMAIN_GDS)
list->gds_obj = bo;
@@ -132,24 +130,20 @@ int amdgpu_bo_list_create(struct amdgpu_device *adev, struct drm_file *filp,
}
list->first_userptr = first_userptr;
- list->num_entries = num_entries;
+ sort(array, last_entry, sizeof(struct amdgpu_bo_list_entry),
+ amdgpu_bo_list_entry_cmp, NULL);
trace_amdgpu_cs_bo_status(list->num_entries, total_size);
+ mutex_init(&list->bo_list_mutex);
*result = list;
return 0;
error_free:
- for (i = 0; i < last_entry; ++i) {
- struct amdgpu_bo *bo = ttm_to_amdgpu_bo(array[i].tv.bo);
-
- amdgpu_bo_unref(&bo);
- }
- for (i = first_userptr; i < num_entries; ++i) {
- struct amdgpu_bo *bo = ttm_to_amdgpu_bo(array[i].tv.bo);
-
- amdgpu_bo_unref(&bo);
- }
+ for (i = 0; i < last_entry; ++i)
+ amdgpu_bo_unref(&array[i].bo);
+ for (i = first_userptr; i < num_entries; ++i)
+ amdgpu_bo_unref(&array[i].bo);
kvfree(list);
return r;
@@ -178,43 +172,10 @@ int amdgpu_bo_list_get(struct amdgpu_fpriv *fpriv, int id,
}
rcu_read_unlock();
+ *result = NULL;
return -ENOENT;
}
-void amdgpu_bo_list_get_list(struct amdgpu_bo_list *list,
- struct list_head *validated)
-{
- /* This is based on the bucket sort with O(n) time complexity.
- * An item with priority "i" is added to bucket[i]. The lists are then
- * concatenated in descending order.
- */
- struct list_head bucket[AMDGPU_BO_LIST_NUM_BUCKETS];
- struct amdgpu_bo_list_entry *e;
- unsigned i;
-
- for (i = 0; i < AMDGPU_BO_LIST_NUM_BUCKETS; i++)
- INIT_LIST_HEAD(&bucket[i]);
-
- /* Since buffers which appear sooner in the relocation list are
- * likely to be used more often than buffers which appear later
- * in the list, the sort mustn't change the ordering of buffers
- * with the same priority, i.e. it must be stable.
- */
- amdgpu_bo_list_for_each_entry(e, list) {
- struct amdgpu_bo *bo = ttm_to_amdgpu_bo(e->tv.bo);
- unsigned priority = e->priority;
-
- if (!bo->parent)
- list_add_tail(&e->tv.head, &bucket[priority]);
-
- e->user_pages = NULL;
- }
-
- /* Connect the sorted buckets in the output list. */
- for (i = 0; i < AMDGPU_BO_LIST_NUM_BUCKETS; i++)
- list_splice(&bucket[i], validated);
-}
-
void amdgpu_bo_list_put(struct amdgpu_bo_list *list)
{
kref_put(&list->refcount, amdgpu_bo_list_free);
@@ -223,43 +184,36 @@ void amdgpu_bo_list_put(struct amdgpu_bo_list *list)
int amdgpu_bo_create_list_entry_array(struct drm_amdgpu_bo_list_in *in,
struct drm_amdgpu_bo_list_entry **info_param)
{
- const void __user *uptr = u64_to_user_ptr(in->bo_info_ptr);
const uint32_t info_size = sizeof(struct drm_amdgpu_bo_list_entry);
+ const void __user *uptr = u64_to_user_ptr(in->bo_info_ptr);
+ const uint32_t bo_info_size = in->bo_info_size;
+ const uint32_t bo_number = in->bo_number;
struct drm_amdgpu_bo_list_entry *info;
- int r;
-
- info = kvmalloc_array(in->bo_number, info_size, GFP_KERNEL);
- if (!info)
- return -ENOMEM;
/* copy the handle array from userspace to a kernel buffer */
- r = -EFAULT;
- if (likely(info_size == in->bo_info_size)) {
- unsigned long bytes = in->bo_number *
- in->bo_info_size;
-
- if (copy_from_user(info, uptr, bytes))
- goto error_free;
-
+ if (likely(info_size == bo_info_size)) {
+ info = vmemdup_array_user(uptr, bo_number, info_size);
+ if (IS_ERR(info))
+ return PTR_ERR(info);
} else {
- unsigned long bytes = min(in->bo_info_size, info_size);
+ const uint32_t bytes = min(bo_info_size, info_size);
unsigned i;
- memset(info, 0, in->bo_number * info_size);
- for (i = 0; i < in->bo_number; ++i) {
- if (copy_from_user(&info[i], uptr, bytes))
- goto error_free;
+ info = kvmalloc_array(bo_number, info_size, GFP_KERNEL);
+ if (!info)
+ return -ENOMEM;
- uptr += in->bo_info_size;
+ memset(info, 0, bo_number * info_size);
+ for (i = 0; i < bo_number; ++i, uptr += bo_info_size) {
+ if (copy_from_user(&info[i], uptr, bytes)) {
+ kvfree(info);
+ return -EFAULT;
+ }
}
}
*info_param = info;
return 0;
-
-error_free:
- kvfree(info);
- return r;
}
int amdgpu_bo_list_ioctl(struct drm_device *dev, void *data,
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.h
index a130e766cbdb..2b5e7c46a39d 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.h
@@ -23,19 +23,22 @@
#ifndef __AMDGPU_BO_LIST_H__
#define __AMDGPU_BO_LIST_H__
-#include <drm/ttm/ttm_execbuf_util.h>
#include <drm/amdgpu_drm.h>
+struct hmm_range;
+
+struct drm_file;
+
struct amdgpu_device;
struct amdgpu_bo;
struct amdgpu_bo_va;
struct amdgpu_fpriv;
struct amdgpu_bo_list_entry {
- struct ttm_validate_buffer tv;
+ struct amdgpu_bo *bo;
struct amdgpu_bo_va *bo_va;
uint32_t priority;
- struct page **user_pages;
+ struct amdgpu_hmm_range *range;
bool user_invalidated;
};
@@ -47,12 +50,16 @@ struct amdgpu_bo_list {
struct amdgpu_bo *oa_obj;
unsigned first_userptr;
unsigned num_entries;
+
+ /* Protect access during command submission.
+ */
+ struct mutex bo_list_mutex;
+
+ struct amdgpu_bo_list_entry entries[] __counted_by(num_entries);
};
int amdgpu_bo_list_get(struct amdgpu_fpriv *fpriv, int id,
struct amdgpu_bo_list **result);
-void amdgpu_bo_list_get_list(struct amdgpu_bo_list *list,
- struct list_head *validated);
void amdgpu_bo_list_put(struct amdgpu_bo_list *list);
int amdgpu_bo_create_list_entry_array(struct drm_amdgpu_bo_list_in *in,
struct drm_amdgpu_bo_list_entry **info_param);
@@ -60,25 +67,17 @@ int amdgpu_bo_create_list_entry_array(struct drm_amdgpu_bo_list_in *in,
int amdgpu_bo_list_create(struct amdgpu_device *adev,
struct drm_file *filp,
struct drm_amdgpu_bo_list_entry *info,
- unsigned num_entries,
+ size_t num_entries,
struct amdgpu_bo_list **list);
-static inline struct amdgpu_bo_list_entry *
-amdgpu_bo_list_array_entry(struct amdgpu_bo_list *list, unsigned index)
-{
- struct amdgpu_bo_list_entry *array = (void *)&list[1];
-
- return &array[index];
-}
-
#define amdgpu_bo_list_for_each_entry(e, list) \
- for (e = amdgpu_bo_list_array_entry(list, 0); \
- e != amdgpu_bo_list_array_entry(list, (list)->num_entries); \
+ for (e = list->entries; \
+ e != &list->entries[list->num_entries]; \
++e)
#define amdgpu_bo_list_for_each_userptr_entry(e, list) \
- for (e = amdgpu_bo_list_array_entry(list, (list)->first_userptr); \
- e != amdgpu_bo_list_array_entry(list, (list)->num_entries); \
+ for (e = &list->entries[list->first_userptr]; \
+ e != &list->entries[list->num_entries]; \
++e)
#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cgs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cgs.c
index f1a050379190..004a6a9d6b9f 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cgs.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cgs.c
@@ -41,13 +41,13 @@ struct amdgpu_cgs_device {
((struct amdgpu_cgs_device *)cgs_device)->adev
-static uint32_t amdgpu_cgs_read_register(struct cgs_device *cgs_device, unsigned offset)
+static uint32_t amdgpu_cgs_read_register(struct cgs_device *cgs_device, unsigned int offset)
{
CGS_FUNC_ADEV;
return RREG32(offset);
}
-static void amdgpu_cgs_write_register(struct cgs_device *cgs_device, unsigned offset,
+static void amdgpu_cgs_write_register(struct cgs_device *cgs_device, unsigned int offset,
uint32_t value)
{
CGS_FUNC_ADEV;
@@ -56,7 +56,7 @@ static void amdgpu_cgs_write_register(struct cgs_device *cgs_device, unsigned of
static uint32_t amdgpu_cgs_read_ind_register(struct cgs_device *cgs_device,
enum cgs_ind_reg space,
- unsigned index)
+ unsigned int index)
{
CGS_FUNC_ADEV;
switch (space) {
@@ -84,7 +84,7 @@ static uint32_t amdgpu_cgs_read_ind_register(struct cgs_device *cgs_device,
static void amdgpu_cgs_write_ind_register(struct cgs_device *cgs_device,
enum cgs_ind_reg space,
- unsigned index, uint32_t value)
+ unsigned int index, uint32_t value)
{
CGS_FUNC_ADEV;
switch (space) {
@@ -163,38 +163,38 @@ static uint16_t amdgpu_get_firmware_version(struct cgs_device *cgs_device,
uint16_t fw_version = 0;
switch (type) {
- case CGS_UCODE_ID_SDMA0:
- fw_version = adev->sdma.instance[0].fw_version;
- break;
- case CGS_UCODE_ID_SDMA1:
- fw_version = adev->sdma.instance[1].fw_version;
- break;
- case CGS_UCODE_ID_CP_CE:
- fw_version = adev->gfx.ce_fw_version;
- break;
- case CGS_UCODE_ID_CP_PFP:
- fw_version = adev->gfx.pfp_fw_version;
- break;
- case CGS_UCODE_ID_CP_ME:
- fw_version = adev->gfx.me_fw_version;
- break;
- case CGS_UCODE_ID_CP_MEC:
- fw_version = adev->gfx.mec_fw_version;
- break;
- case CGS_UCODE_ID_CP_MEC_JT1:
- fw_version = adev->gfx.mec_fw_version;
- break;
- case CGS_UCODE_ID_CP_MEC_JT2:
- fw_version = adev->gfx.mec_fw_version;
- break;
- case CGS_UCODE_ID_RLC_G:
- fw_version = adev->gfx.rlc_fw_version;
- break;
- case CGS_UCODE_ID_STORAGE:
- break;
- default:
- DRM_ERROR("firmware type %d do not have version\n", type);
- break;
+ case CGS_UCODE_ID_SDMA0:
+ fw_version = adev->sdma.instance[0].fw_version;
+ break;
+ case CGS_UCODE_ID_SDMA1:
+ fw_version = adev->sdma.instance[1].fw_version;
+ break;
+ case CGS_UCODE_ID_CP_CE:
+ fw_version = adev->gfx.ce_fw_version;
+ break;
+ case CGS_UCODE_ID_CP_PFP:
+ fw_version = adev->gfx.pfp_fw_version;
+ break;
+ case CGS_UCODE_ID_CP_ME:
+ fw_version = adev->gfx.me_fw_version;
+ break;
+ case CGS_UCODE_ID_CP_MEC:
+ fw_version = adev->gfx.mec_fw_version;
+ break;
+ case CGS_UCODE_ID_CP_MEC_JT1:
+ fw_version = adev->gfx.mec_fw_version;
+ break;
+ case CGS_UCODE_ID_CP_MEC_JT2:
+ fw_version = adev->gfx.mec_fw_version;
+ break;
+ case CGS_UCODE_ID_RLC_G:
+ fw_version = adev->gfx.rlc_fw_version;
+ break;
+ case CGS_UCODE_ID_STORAGE:
+ break;
+ default:
+ DRM_ERROR("firmware type %d do not have version\n", type);
+ break;
}
return fw_version;
}
@@ -205,7 +205,7 @@ static int amdgpu_cgs_get_firmware_info(struct cgs_device *cgs_device,
{
CGS_FUNC_ADEV;
- if ((CGS_UCODE_ID_SMU != type) && (CGS_UCODE_ID_SMU_SK != type)) {
+ if (type != CGS_UCODE_ID_SMU && type != CGS_UCODE_ID_SMU_SK) {
uint64_t gpu_addr;
uint32_t data_size;
const struct gfx_firmware_header_v1_0 *header;
@@ -213,6 +213,9 @@ static int amdgpu_cgs_get_firmware_info(struct cgs_device *cgs_device,
struct amdgpu_firmware_info *ucode;
id = fw_type_convert(cgs_device, type);
+ if (id >= AMDGPU_UCODE_ID_MAXIMUM)
+ return -EINVAL;
+
ucode = &adev->firmware.ucode[id];
if (ucode->fw == NULL)
return -EINVAL;
@@ -232,7 +235,7 @@ static int amdgpu_cgs_get_firmware_info(struct cgs_device *cgs_device,
info->mc_addr = gpu_addr;
info->version = (uint16_t)le32_to_cpu(header->header.ucode_version);
- if (CGS_UCODE_ID_CP_MEC == type)
+ if (type == CGS_UCODE_ID_CP_MEC)
info->image_size = le32_to_cpu(header->jt_offset) << 2;
info->fw_version = amdgpu_get_firmware_version(cgs_device, type);
@@ -249,83 +252,22 @@ static int amdgpu_cgs_get_firmware_info(struct cgs_device *cgs_device,
if (!adev->pm.fw) {
switch (adev->asic_type) {
- case CHIP_TAHITI:
- strcpy(fw_name, "radeon/tahiti_smc.bin");
- break;
- case CHIP_PITCAIRN:
- if ((adev->pdev->revision == 0x81) &&
- ((adev->pdev->device == 0x6810) ||
- (adev->pdev->device == 0x6811))) {
- info->is_kicker = true;
- strcpy(fw_name, "radeon/pitcairn_k_smc.bin");
- } else {
- strcpy(fw_name, "radeon/pitcairn_smc.bin");
- }
- break;
- case CHIP_VERDE:
- if (((adev->pdev->device == 0x6820) &&
- ((adev->pdev->revision == 0x81) ||
- (adev->pdev->revision == 0x83))) ||
- ((adev->pdev->device == 0x6821) &&
- ((adev->pdev->revision == 0x83) ||
- (adev->pdev->revision == 0x87))) ||
- ((adev->pdev->revision == 0x87) &&
- ((adev->pdev->device == 0x6823) ||
- (adev->pdev->device == 0x682b)))) {
- info->is_kicker = true;
- strcpy(fw_name, "radeon/verde_k_smc.bin");
- } else {
- strcpy(fw_name, "radeon/verde_smc.bin");
- }
- break;
- case CHIP_OLAND:
- if (((adev->pdev->revision == 0x81) &&
- ((adev->pdev->device == 0x6600) ||
- (adev->pdev->device == 0x6604) ||
- (adev->pdev->device == 0x6605) ||
- (adev->pdev->device == 0x6610))) ||
- ((adev->pdev->revision == 0x83) &&
- (adev->pdev->device == 0x6610))) {
- info->is_kicker = true;
- strcpy(fw_name, "radeon/oland_k_smc.bin");
- } else {
- strcpy(fw_name, "radeon/oland_smc.bin");
- }
- break;
- case CHIP_HAINAN:
- if (((adev->pdev->revision == 0x81) &&
- (adev->pdev->device == 0x6660)) ||
- ((adev->pdev->revision == 0x83) &&
- ((adev->pdev->device == 0x6660) ||
- (adev->pdev->device == 0x6663) ||
- (adev->pdev->device == 0x6665) ||
- (adev->pdev->device == 0x6667)))) {
- info->is_kicker = true;
- strcpy(fw_name, "radeon/hainan_k_smc.bin");
- } else if ((adev->pdev->revision == 0xc3) &&
- (adev->pdev->device == 0x6665)) {
- info->is_kicker = true;
- strcpy(fw_name, "radeon/banks_k_2_smc.bin");
- } else {
- strcpy(fw_name, "radeon/hainan_smc.bin");
- }
- break;
case CHIP_BONAIRE:
if ((adev->pdev->revision == 0x80) ||
(adev->pdev->revision == 0x81) ||
(adev->pdev->device == 0x665f)) {
info->is_kicker = true;
- strcpy(fw_name, "amdgpu/bonaire_k_smc.bin");
+ strscpy(fw_name, "amdgpu/bonaire_k_smc.bin");
} else {
- strcpy(fw_name, "amdgpu/bonaire_smc.bin");
+ strscpy(fw_name, "amdgpu/bonaire_smc.bin");
}
break;
case CHIP_HAWAII:
if (adev->pdev->revision == 0x80) {
info->is_kicker = true;
- strcpy(fw_name, "amdgpu/hawaii_k_smc.bin");
+ strscpy(fw_name, "amdgpu/hawaii_k_smc.bin");
} else {
- strcpy(fw_name, "amdgpu/hawaii_smc.bin");
+ strscpy(fw_name, "amdgpu/hawaii_smc.bin");
}
break;
case CHIP_TOPAZ:
@@ -335,93 +277,88 @@ static int amdgpu_cgs_get_firmware_info(struct cgs_device *cgs_device,
((adev->pdev->device == 0x6900) && (adev->pdev->revision == 0xD1)) ||
((adev->pdev->device == 0x6900) && (adev->pdev->revision == 0xD3))) {
info->is_kicker = true;
- strcpy(fw_name, "amdgpu/topaz_k_smc.bin");
+ strscpy(fw_name, "amdgpu/topaz_k_smc.bin");
} else
- strcpy(fw_name, "amdgpu/topaz_smc.bin");
+ strscpy(fw_name, "amdgpu/topaz_smc.bin");
break;
case CHIP_TONGA:
if (((adev->pdev->device == 0x6939) && (adev->pdev->revision == 0xf1)) ||
((adev->pdev->device == 0x6938) && (adev->pdev->revision == 0xf1))) {
info->is_kicker = true;
- strcpy(fw_name, "amdgpu/tonga_k_smc.bin");
+ strscpy(fw_name, "amdgpu/tonga_k_smc.bin");
} else
- strcpy(fw_name, "amdgpu/tonga_smc.bin");
+ strscpy(fw_name, "amdgpu/tonga_smc.bin");
break;
case CHIP_FIJI:
- strcpy(fw_name, "amdgpu/fiji_smc.bin");
+ strscpy(fw_name, "amdgpu/fiji_smc.bin");
break;
case CHIP_POLARIS11:
if (type == CGS_UCODE_ID_SMU) {
if (ASICID_IS_P21(adev->pdev->device, adev->pdev->revision)) {
info->is_kicker = true;
- strcpy(fw_name, "amdgpu/polaris11_k_smc.bin");
+ strscpy(fw_name, "amdgpu/polaris11_k_smc.bin");
} else if (ASICID_IS_P31(adev->pdev->device, adev->pdev->revision)) {
info->is_kicker = true;
- strcpy(fw_name, "amdgpu/polaris11_k2_smc.bin");
+ strscpy(fw_name, "amdgpu/polaris11_k2_smc.bin");
} else {
- strcpy(fw_name, "amdgpu/polaris11_smc.bin");
+ strscpy(fw_name, "amdgpu/polaris11_smc.bin");
}
} else if (type == CGS_UCODE_ID_SMU_SK) {
- strcpy(fw_name, "amdgpu/polaris11_smc_sk.bin");
+ strscpy(fw_name, "amdgpu/polaris11_smc_sk.bin");
}
break;
case CHIP_POLARIS10:
if (type == CGS_UCODE_ID_SMU) {
if (ASICID_IS_P20(adev->pdev->device, adev->pdev->revision)) {
info->is_kicker = true;
- strcpy(fw_name, "amdgpu/polaris10_k_smc.bin");
+ strscpy(fw_name, "amdgpu/polaris10_k_smc.bin");
} else if (ASICID_IS_P30(adev->pdev->device, adev->pdev->revision)) {
info->is_kicker = true;
- strcpy(fw_name, "amdgpu/polaris10_k2_smc.bin");
+ strscpy(fw_name, "amdgpu/polaris10_k2_smc.bin");
} else {
- strcpy(fw_name, "amdgpu/polaris10_smc.bin");
+ strscpy(fw_name, "amdgpu/polaris10_smc.bin");
}
} else if (type == CGS_UCODE_ID_SMU_SK) {
- strcpy(fw_name, "amdgpu/polaris10_smc_sk.bin");
+ strscpy(fw_name, "amdgpu/polaris10_smc_sk.bin");
}
break;
case CHIP_POLARIS12:
if (ASICID_IS_P23(adev->pdev->device, adev->pdev->revision)) {
info->is_kicker = true;
- strcpy(fw_name, "amdgpu/polaris12_k_smc.bin");
+ strscpy(fw_name, "amdgpu/polaris12_k_smc.bin");
} else {
- strcpy(fw_name, "amdgpu/polaris12_smc.bin");
+ strscpy(fw_name, "amdgpu/polaris12_smc.bin");
}
break;
case CHIP_VEGAM:
- strcpy(fw_name, "amdgpu/vegam_smc.bin");
+ strscpy(fw_name, "amdgpu/vegam_smc.bin");
break;
case CHIP_VEGA10:
if ((adev->pdev->device == 0x687f) &&
((adev->pdev->revision == 0xc0) ||
(adev->pdev->revision == 0xc1) ||
(adev->pdev->revision == 0xc3)))
- strcpy(fw_name, "amdgpu/vega10_acg_smc.bin");
+ strscpy(fw_name, "amdgpu/vega10_acg_smc.bin");
else
- strcpy(fw_name, "amdgpu/vega10_smc.bin");
+ strscpy(fw_name, "amdgpu/vega10_smc.bin");
break;
case CHIP_VEGA12:
- strcpy(fw_name, "amdgpu/vega12_smc.bin");
+ strscpy(fw_name, "amdgpu/vega12_smc.bin");
break;
case CHIP_VEGA20:
- strcpy(fw_name, "amdgpu/vega20_smc.bin");
+ strscpy(fw_name, "amdgpu/vega20_smc.bin");
break;
default:
DRM_ERROR("SMC firmware not supported\n");
return -EINVAL;
}
- err = request_firmware(&adev->pm.fw, fw_name, adev->dev);
- if (err) {
- DRM_ERROR("Failed to request firmware\n");
- return err;
- }
-
- err = amdgpu_ucode_validate(adev->pm.fw);
+ err = amdgpu_ucode_request(adev, &adev->pm.fw,
+ AMDGPU_UCODE_REQUIRED,
+ "%s", fw_name);
if (err) {
DRM_ERROR("Failed to load firmware \"%s\"", fw_name);
- release_firmware(adev->pm.fw);
- adev->pm.fw = NULL;
+ amdgpu_ucode_release(&adev->pm.fw);
return err;
}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_connectors.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_connectors.c
index b9c11c2b2885..9f96d568acf2 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_connectors.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_connectors.c
@@ -24,9 +24,10 @@
* Alex Deucher
*/
+#include <drm/display/drm_dp_helper.h>
+#include <drm/drm_crtc_helper.h>
#include <drm/drm_edid.h>
-#include <drm/drm_fb_helper.h>
-#include <drm/drm_dp_helper.h>
+#include <drm/drm_modeset_helper_vtables.h>
#include <drm/drm_probe_helper.h>
#include <drm/amdgpu_drm.h>
#include "amdgpu.h"
@@ -102,13 +103,13 @@ int amdgpu_connector_get_monitor_bpc(struct drm_connector *connector)
struct amdgpu_connector *amdgpu_connector = to_amdgpu_connector(connector);
struct amdgpu_connector_atom_dig *dig_connector;
int bpc = 8;
- unsigned mode_clock, max_tmds_clock;
+ unsigned int mode_clock, max_tmds_clock;
switch (connector->connector_type) {
case DRM_MODE_CONNECTOR_DVII:
case DRM_MODE_CONNECTOR_HDMIB:
if (amdgpu_connector->use_digital) {
- if (drm_detect_hdmi_monitor(amdgpu_connector_edid(connector))) {
+ if (connector->display_info.is_hdmi) {
if (connector->display_info.bpc)
bpc = connector->display_info.bpc;
}
@@ -116,7 +117,7 @@ int amdgpu_connector_get_monitor_bpc(struct drm_connector *connector)
break;
case DRM_MODE_CONNECTOR_DVID:
case DRM_MODE_CONNECTOR_HDMIA:
- if (drm_detect_hdmi_monitor(amdgpu_connector_edid(connector))) {
+ if (connector->display_info.is_hdmi) {
if (connector->display_info.bpc)
bpc = connector->display_info.bpc;
}
@@ -125,7 +126,7 @@ int amdgpu_connector_get_monitor_bpc(struct drm_connector *connector)
dig_connector = amdgpu_connector->con_priv;
if ((dig_connector->dp_sink_type == CONNECTOR_OBJECT_ID_DISPLAYPORT) ||
(dig_connector->dp_sink_type == CONNECTOR_OBJECT_ID_eDP) ||
- drm_detect_hdmi_monitor(amdgpu_connector_edid(connector))) {
+ connector->display_info.is_hdmi) {
if (connector->display_info.bpc)
bpc = connector->display_info.bpc;
}
@@ -149,7 +150,7 @@ int amdgpu_connector_get_monitor_bpc(struct drm_connector *connector)
break;
}
- if (drm_detect_hdmi_monitor(amdgpu_connector_edid(connector))) {
+ if (connector->display_info.is_hdmi) {
/*
* Pre DCE-8 hw can't handle > 12 bpc, and more than 12 bpc doesn't make
* much sense without support for > 12 bpc framebuffers. RGB 4:4:4 at
@@ -175,7 +176,7 @@ int amdgpu_connector_get_monitor_bpc(struct drm_connector *connector)
/* Check if bpc is within clock limit. Try to degrade gracefully otherwise */
if ((bpc == 12) && (mode_clock * 3/2 > max_tmds_clock)) {
- if ((connector->display_info.edid_hdmi_dc_modes & DRM_EDID_HDMI_DC_30) &&
+ if ((connector->display_info.edid_hdmi_rgb444_dc_modes & DRM_EDID_HDMI_DC_30) &&
(mode_clock * 5/4 <= max_tmds_clock))
bpc = 10;
else
@@ -245,36 +246,10 @@ amdgpu_connector_find_encoder(struct drm_connector *connector,
return NULL;
}
-struct edid *amdgpu_connector_edid(struct drm_connector *connector)
-{
- struct amdgpu_connector *amdgpu_connector = to_amdgpu_connector(connector);
- struct drm_property_blob *edid_blob = connector->edid_blob_ptr;
-
- if (amdgpu_connector->edid) {
- return amdgpu_connector->edid;
- } else if (edid_blob) {
- struct edid *edid = kmemdup(edid_blob->data, edid_blob->length, GFP_KERNEL);
- if (edid)
- amdgpu_connector->edid = edid;
- }
- return amdgpu_connector->edid;
-}
-
static struct edid *
amdgpu_connector_get_hardcoded_edid(struct amdgpu_device *adev)
{
- struct edid *edid;
-
- if (adev->mode_info.bios_hardcoded_edid) {
- edid = kmalloc(adev->mode_info.bios_hardcoded_edid_size, GFP_KERNEL);
- if (edid) {
- memcpy((unsigned char *)edid,
- (unsigned char *)adev->mode_info.bios_hardcoded_edid,
- adev->mode_info.bios_hardcoded_edid_size);
- return edid;
- }
- }
- return NULL;
+ return drm_edid_duplicate(drm_edid_raw(adev->mode_info.bios_hardcoded_edid));
}
static void amdgpu_connector_get_edid(struct drm_connector *connector)
@@ -315,8 +290,10 @@ static void amdgpu_connector_get_edid(struct drm_connector *connector)
if (!amdgpu_connector->edid) {
/* some laptops provide a hardcoded edid in rom for LCDs */
if (((connector->connector_type == DRM_MODE_CONNECTOR_LVDS) ||
- (connector->connector_type == DRM_MODE_CONNECTOR_eDP)))
+ (connector->connector_type == DRM_MODE_CONNECTOR_eDP))) {
amdgpu_connector->edid = amdgpu_connector_get_hardcoded_edid(adev);
+ drm_connector_update_edid_property(connector, amdgpu_connector->edid);
+ }
}
}
@@ -387,6 +364,9 @@ amdgpu_connector_lcd_native_mode(struct drm_encoder *encoder)
native_mode->vdisplay != 0 &&
native_mode->clock != 0) {
mode = drm_mode_duplicate(dev, native_mode);
+ if (!mode)
+ return NULL;
+
mode->type = DRM_MODE_TYPE_PREFERRED | DRM_MODE_TYPE_DRIVER;
drm_mode_set_name(mode);
@@ -401,6 +381,9 @@ amdgpu_connector_lcd_native_mode(struct drm_encoder *encoder)
* simpler.
*/
mode = drm_cvt_mode(dev, native_mode->hdisplay, native_mode->vdisplay, 60, true, false, false);
+ if (!mode)
+ return NULL;
+
mode->type = DRM_MODE_TYPE_PREFERRED | DRM_MODE_TYPE_DRIVER;
DRM_DEBUG_KMS("Adding cvt approximation of native panel mode %s\n", mode->name);
}
@@ -415,30 +398,28 @@ static void amdgpu_connector_add_common_modes(struct drm_encoder *encoder,
struct drm_display_mode *mode = NULL;
struct drm_display_mode *native_mode = &amdgpu_encoder->native_mode;
int i;
- static const struct mode_size {
+ int n;
+ struct mode_size {
+ char name[DRM_DISPLAY_MODE_LEN];
int w;
int h;
- } common_modes[17] = {
- { 640, 480},
- { 720, 480},
- { 800, 600},
- { 848, 480},
- {1024, 768},
- {1152, 768},
- {1280, 720},
- {1280, 800},
- {1280, 854},
- {1280, 960},
- {1280, 1024},
- {1440, 900},
- {1400, 1050},
- {1680, 1050},
- {1600, 1200},
- {1920, 1080},
- {1920, 1200}
+ } common_modes[] = {
+ { "640x480", 640, 480},
+ { "800x600", 800, 600},
+ { "1024x768", 1024, 768},
+ { "1280x720", 1280, 720},
+ { "1280x800", 1280, 800},
+ {"1280x1024", 1280, 1024},
+ { "1440x900", 1440, 900},
+ {"1680x1050", 1680, 1050},
+ {"1600x1200", 1600, 1200},
+ {"1920x1080", 1920, 1080},
+ {"1920x1200", 1920, 1200}
};
- for (i = 0; i < 17; i++) {
+ n = ARRAY_SIZE(common_modes);
+
+ for (i = 0; i < n; i++) {
if (amdgpu_encoder->devices & (ATOM_DEVICE_TV_SUPPORT)) {
if (common_modes[i].w > 1024 ||
common_modes[i].h > 768)
@@ -451,10 +432,12 @@ static void amdgpu_connector_add_common_modes(struct drm_encoder *encoder,
common_modes[i].h == native_mode->vdisplay))
continue;
}
- if (common_modes[i].w < 320 || common_modes[i].h < 200)
- continue;
mode = drm_cvt_mode(dev, common_modes[i].w, common_modes[i].h, 60, false, false, false);
+ if (!mode)
+ return;
+ strscpy(mode->name, common_modes[i].name, DRM_DISPLAY_MODE_LEN);
+
drm_mode_probed_add(connector, mode);
}
}
@@ -579,16 +562,26 @@ static int amdgpu_connector_set_property(struct drm_connector *connector,
amdgpu_encoder = to_amdgpu_encoder(connector->encoder);
} else {
const struct drm_connector_helper_funcs *connector_funcs = connector->helper_private;
+
amdgpu_encoder = to_amdgpu_encoder(connector_funcs->best_encoder(connector));
}
switch (val) {
default:
- case DRM_MODE_SCALE_NONE: rmx_type = RMX_OFF; break;
- case DRM_MODE_SCALE_CENTER: rmx_type = RMX_CENTER; break;
- case DRM_MODE_SCALE_ASPECT: rmx_type = RMX_ASPECT; break;
- case DRM_MODE_SCALE_FULLSCREEN: rmx_type = RMX_FULL; break;
+ case DRM_MODE_SCALE_NONE:
+ rmx_type = RMX_OFF;
+ break;
+ case DRM_MODE_SCALE_CENTER:
+ rmx_type = RMX_CENTER;
+ break;
+ case DRM_MODE_SCALE_ASPECT:
+ rmx_type = RMX_ASPECT;
+ break;
+ case DRM_MODE_SCALE_FULLSCREEN:
+ rmx_type = RMX_FULL;
+ break;
}
+
if (amdgpu_encoder->rmx_type == rmx_type)
return 0;
@@ -617,7 +610,7 @@ amdgpu_connector_fixup_lcd_native_mode(struct drm_encoder *encoder,
if (mode->type & DRM_MODE_TYPE_PREFERRED) {
if (mode->hdisplay != native_mode->hdisplay ||
mode->vdisplay != native_mode->vdisplay)
- memcpy(native_mode, mode, sizeof(*mode));
+ drm_mode_copy(native_mode, mode);
}
}
@@ -626,7 +619,7 @@ amdgpu_connector_fixup_lcd_native_mode(struct drm_encoder *encoder,
list_for_each_entry_safe(mode, t, &connector->probed_modes, head) {
if (mode->hdisplay == native_mode->hdisplay &&
mode->vdisplay == native_mode->vdisplay) {
- *native_mode = *mode;
+ drm_mode_copy(native_mode, mode);
drm_mode_set_crtcinfo(native_mode, CRTC_INTERLACE_HALVE_V);
DRM_DEBUG_KMS("Determined LVDS native mode details from EDID\n");
break;
@@ -678,7 +671,7 @@ static int amdgpu_connector_lvds_get_modes(struct drm_connector *connector)
}
static enum drm_mode_status amdgpu_connector_lvds_mode_valid(struct drm_connector *connector,
- struct drm_display_mode *mode)
+ const struct drm_display_mode *mode)
{
struct drm_encoder *encoder = amdgpu_connector_best_single_encoder(connector);
@@ -741,10 +734,8 @@ amdgpu_connector_lvds_detect(struct drm_connector *connector, bool force)
amdgpu_connector_update_scratch_regs(connector, ret);
- if (!drm_kms_helper_is_poll_worker()) {
- pm_runtime_mark_last_busy(connector->dev->dev);
+ if (!drm_kms_helper_is_poll_worker())
pm_runtime_put_autosuspend(connector->dev->dev);
- }
return ret;
}
@@ -786,16 +777,26 @@ static int amdgpu_connector_set_lcd_property(struct drm_connector *connector,
amdgpu_encoder = to_amdgpu_encoder(connector->encoder);
else {
const struct drm_connector_helper_funcs *connector_funcs = connector->helper_private;
+
amdgpu_encoder = to_amdgpu_encoder(connector_funcs->best_encoder(connector));
}
switch (value) {
- case DRM_MODE_SCALE_NONE: rmx_type = RMX_OFF; break;
- case DRM_MODE_SCALE_CENTER: rmx_type = RMX_CENTER; break;
- case DRM_MODE_SCALE_ASPECT: rmx_type = RMX_ASPECT; break;
+ case DRM_MODE_SCALE_NONE:
+ rmx_type = RMX_OFF;
+ break;
+ case DRM_MODE_SCALE_CENTER:
+ rmx_type = RMX_CENTER;
+ break;
+ case DRM_MODE_SCALE_ASPECT:
+ rmx_type = RMX_ASPECT;
+ break;
default:
- case DRM_MODE_SCALE_FULLSCREEN: rmx_type = RMX_FULL; break;
+ case DRM_MODE_SCALE_FULLSCREEN:
+ rmx_type = RMX_FULL;
+ break;
}
+
if (amdgpu_encoder->rmx_type == rmx_type)
return 0;
@@ -827,12 +828,13 @@ static int amdgpu_connector_vga_get_modes(struct drm_connector *connector)
amdgpu_connector_get_edid(connector);
ret = amdgpu_connector_ddc_get_modes(connector);
+ amdgpu_get_native_mode(connector);
return ret;
}
static enum drm_mode_status amdgpu_connector_vga_mode_valid(struct drm_connector *connector,
- struct drm_display_mode *mode)
+ const struct drm_display_mode *mode)
{
struct drm_device *dev = connector->dev;
struct amdgpu_device *adev = drm_to_adev(dev);
@@ -915,10 +917,8 @@ amdgpu_connector_vga_detect(struct drm_connector *connector, bool force)
amdgpu_connector_update_scratch_regs(connector, ret);
out:
- if (!drm_kms_helper_is_poll_worker()) {
- pm_runtime_mark_last_busy(connector->dev->dev);
+ if (!drm_kms_helper_is_poll_worker())
pm_runtime_put_autosuspend(connector->dev->dev);
- }
return ret;
}
@@ -958,6 +958,41 @@ amdgpu_connector_check_hpd_status_unchanged(struct drm_connector *connector)
return false;
}
+static void amdgpu_connector_shared_ddc(enum drm_connector_status *status,
+ struct drm_connector *connector,
+ struct amdgpu_connector *amdgpu_connector)
+{
+ struct drm_connector *list_connector;
+ struct drm_connector_list_iter iter;
+ struct amdgpu_connector *list_amdgpu_connector;
+ struct drm_device *dev = connector->dev;
+ struct amdgpu_device *adev = drm_to_adev(dev);
+
+ if (amdgpu_connector->shared_ddc && *status == connector_status_connected) {
+ drm_connector_list_iter_begin(dev, &iter);
+ drm_for_each_connector_iter(list_connector,
+ &iter) {
+ if (connector == list_connector)
+ continue;
+ list_amdgpu_connector = to_amdgpu_connector(list_connector);
+ if (list_amdgpu_connector->shared_ddc &&
+ list_amdgpu_connector->ddc_bus->rec.i2c_id ==
+ amdgpu_connector->ddc_bus->rec.i2c_id) {
+ /* cases where both connectors are digital */
+ if (list_connector->connector_type != DRM_MODE_CONNECTOR_VGA) {
+ /* hpd is our only option in this case */
+ if (!amdgpu_display_hpd_sense(adev,
+ amdgpu_connector->hpd.hpd)) {
+ amdgpu_connector_free_edid(connector);
+ *status = connector_status_disconnected;
+ }
+ }
+ }
+ }
+ drm_connector_list_iter_end(&iter);
+ }
+}
+
/*
* DVI is complicated
* Do a DDC probe, if DDC probe passes, get the full EDID so
@@ -988,13 +1023,33 @@ amdgpu_connector_dvi_detect(struct drm_connector *connector, bool force)
}
}
+ if (amdgpu_connector->detected_hpd_without_ddc) {
+ force = true;
+ amdgpu_connector->detected_hpd_without_ddc = false;
+ }
+
if (!force && amdgpu_connector_check_hpd_status_unchanged(connector)) {
ret = connector->status;
goto exit;
}
- if (amdgpu_connector->ddc_bus)
+ if (amdgpu_connector->ddc_bus) {
dret = amdgpu_display_ddc_probe(amdgpu_connector, false);
+
+ /* Sometimes the pins required for the DDC probe on DVI
+ * connectors don't make contact at the same time that the ones
+ * for HPD do. If the DDC probe fails even though we had an HPD
+ * signal, try again later
+ */
+ if (!dret && !force &&
+ amdgpu_display_hpd_sense(adev, amdgpu_connector->hpd.hpd)) {
+ DRM_DEBUG_KMS("hpd detected without ddc, retrying in 1 second\n");
+ amdgpu_connector->detected_hpd_without_ddc = true;
+ schedule_delayed_work(&adev->hotplug_work,
+ msecs_to_jiffies(1000));
+ goto exit;
+ }
+ }
if (dret) {
amdgpu_connector->detected_by_load = false;
amdgpu_connector_free_edid(connector);
@@ -1024,32 +1079,7 @@ amdgpu_connector_dvi_detect(struct drm_connector *connector, bool force)
* DDC line. The latter is more complex because with DVI<->HDMI adapters
* you don't really know what's connected to which port as both are digital.
*/
- if (amdgpu_connector->shared_ddc && (ret == connector_status_connected)) {
- struct drm_connector *list_connector;
- struct drm_connector_list_iter iter;
- struct amdgpu_connector *list_amdgpu_connector;
-
- drm_connector_list_iter_begin(dev, &iter);
- drm_for_each_connector_iter(list_connector,
- &iter) {
- if (connector == list_connector)
- continue;
- list_amdgpu_connector = to_amdgpu_connector(list_connector);
- if (list_amdgpu_connector->shared_ddc &&
- (list_amdgpu_connector->ddc_bus->rec.i2c_id ==
- amdgpu_connector->ddc_bus->rec.i2c_id)) {
- /* cases where both connectors are digital */
- if (list_connector->connector_type != DRM_MODE_CONNECTOR_VGA) {
- /* hpd is our only option in this case */
- if (!amdgpu_display_hpd_sense(adev, amdgpu_connector->hpd.hpd)) {
- amdgpu_connector_free_edid(connector);
- ret = connector_status_disconnected;
- }
- }
- }
- }
- drm_connector_list_iter_end(&iter);
- }
+ amdgpu_connector_shared_ddc(&ret, connector, amdgpu_connector);
}
}
@@ -1097,7 +1127,8 @@ amdgpu_connector_dvi_detect(struct drm_connector *connector, bool force)
/* assume digital unless load detected otherwise */
amdgpu_connector->use_digital = true;
lret = encoder_funcs->detect(encoder, connector);
- DRM_DEBUG_KMS("load_detect %x returned: %x\n",encoder->encoder_type,lret);
+ DRM_DEBUG_KMS("load_detect %x returned: %x\n",
+ encoder->encoder_type, lret);
if (lret == connector_status_connected)
amdgpu_connector->use_digital = false;
}
@@ -1111,10 +1142,8 @@ out:
amdgpu_connector_update_scratch_regs(connector, ret);
exit:
- if (!drm_kms_helper_is_poll_worker()) {
- pm_runtime_mark_last_busy(connector->dev->dev);
+ if (!drm_kms_helper_is_poll_worker())
pm_runtime_put_autosuspend(connector->dev->dev);
- }
return ret;
}
@@ -1150,35 +1179,76 @@ amdgpu_connector_dvi_encoder(struct drm_connector *connector)
static void amdgpu_connector_dvi_force(struct drm_connector *connector)
{
struct amdgpu_connector *amdgpu_connector = to_amdgpu_connector(connector);
+
if (connector->force == DRM_FORCE_ON)
amdgpu_connector->use_digital = false;
if (connector->force == DRM_FORCE_ON_DIGITAL)
amdgpu_connector->use_digital = true;
}
+/**
+ * amdgpu_max_hdmi_pixel_clock - Return max supported HDMI (TMDS) pixel clock
+ * @adev: pointer to amdgpu_device
+ *
+ * Return: maximum supported HDMI (TMDS) pixel clock in KHz.
+ */
+static int amdgpu_max_hdmi_pixel_clock(const struct amdgpu_device *adev)
+{
+ if (adev->asic_type >= CHIP_POLARIS10)
+ return 600000;
+ else if (adev->asic_type >= CHIP_TONGA)
+ return 300000;
+ else
+ return 297000;
+}
+
+/**
+ * amdgpu_connector_dvi_mode_valid - Validate a mode on DVI/HDMI connectors
+ * @connector: DRM connector to validate the mode on
+ * @mode: display mode to validate
+ *
+ * Validate the given display mode on DVI and HDMI connectors, including
+ * analog signals on DVI-I.
+ *
+ * Return: drm_mode_status indicating whether the mode is valid.
+ */
static enum drm_mode_status amdgpu_connector_dvi_mode_valid(struct drm_connector *connector,
- struct drm_display_mode *mode)
+ const struct drm_display_mode *mode)
{
struct drm_device *dev = connector->dev;
struct amdgpu_device *adev = drm_to_adev(dev);
struct amdgpu_connector *amdgpu_connector = to_amdgpu_connector(connector);
+ const int max_hdmi_pixel_clock = amdgpu_max_hdmi_pixel_clock(adev);
+ const int max_dvi_single_link_pixel_clock = 165000;
+ int max_digital_pixel_clock_khz;
/* XXX check mode bandwidth */
- if (amdgpu_connector->use_digital && (mode->clock > 165000)) {
- if ((amdgpu_connector->connector_object_id == CONNECTOR_OBJECT_ID_DUAL_LINK_DVI_I) ||
- (amdgpu_connector->connector_object_id == CONNECTOR_OBJECT_ID_DUAL_LINK_DVI_D) ||
- (amdgpu_connector->connector_object_id == CONNECTOR_OBJECT_ID_HDMI_TYPE_B)) {
- return MODE_OK;
- } else if (drm_detect_hdmi_monitor(amdgpu_connector_edid(connector))) {
- /* HDMI 1.3+ supports max clock of 340 Mhz */
- if (mode->clock > 340000)
- return MODE_CLOCK_HIGH;
- else
- return MODE_OK;
- } else {
- return MODE_CLOCK_HIGH;
+ if (amdgpu_connector->use_digital) {
+ switch (amdgpu_connector->connector_object_id) {
+ case CONNECTOR_OBJECT_ID_HDMI_TYPE_A:
+ max_digital_pixel_clock_khz = max_hdmi_pixel_clock;
+ break;
+ case CONNECTOR_OBJECT_ID_SINGLE_LINK_DVI_I:
+ case CONNECTOR_OBJECT_ID_SINGLE_LINK_DVI_D:
+ max_digital_pixel_clock_khz = max_dvi_single_link_pixel_clock;
+ break;
+ case CONNECTOR_OBJECT_ID_DUAL_LINK_DVI_I:
+ case CONNECTOR_OBJECT_ID_DUAL_LINK_DVI_D:
+ case CONNECTOR_OBJECT_ID_HDMI_TYPE_B:
+ max_digital_pixel_clock_khz = max_dvi_single_link_pixel_clock * 2;
+ break;
}
+
+ /* When the display EDID claims that it's an HDMI display,
+ * we use the HDMI encoder mode of the display HW,
+ * so we should verify against the max HDMI clock here.
+ */
+ if (connector->display_info.is_hdmi)
+ max_digital_pixel_clock_khz = max_hdmi_pixel_clock;
+
+ if (mode->clock > max_digital_pixel_clock_khz)
+ return MODE_CLOCK_HIGH;
}
/* check against the max pixel clock */
@@ -1384,6 +1454,7 @@ amdgpu_connector_dp_detect(struct drm_connector *connector, bool force)
ret = connector_status_connected;
else if (amdgpu_connector->dac_load_detect) { /* try load detection */
const struct drm_encoder_helper_funcs *encoder_funcs = encoder->helper_private;
+
ret = encoder_funcs->detect(encoder, connector);
}
}
@@ -1409,10 +1480,8 @@ amdgpu_connector_dp_detect(struct drm_connector *connector, bool force)
amdgpu_connector_update_scratch_regs(connector, ret);
out:
- if (!drm_kms_helper_is_poll_worker()) {
- pm_runtime_mark_last_busy(connector->dev->dev);
+ if (!drm_kms_helper_is_poll_worker())
pm_runtime_put_autosuspend(connector->dev->dev);
- }
if (connector->connector_type == DRM_MODE_CONNECTOR_DisplayPort ||
connector->connector_type == DRM_MODE_CONNECTOR_eDP)
@@ -1424,7 +1493,7 @@ out:
}
static enum drm_mode_status amdgpu_connector_dp_mode_valid(struct drm_connector *connector,
- struct drm_display_mode *mode)
+ const struct drm_display_mode *mode)
{
struct amdgpu_connector *amdgpu_connector = to_amdgpu_connector(connector);
struct amdgpu_connector_atom_dig *amdgpu_dig_connector = amdgpu_connector->con_priv;
@@ -1462,7 +1531,7 @@ static enum drm_mode_status amdgpu_connector_dp_mode_valid(struct drm_connector
(amdgpu_dig_connector->dp_sink_type == CONNECTOR_OBJECT_ID_eDP)) {
return amdgpu_atombios_dp_mode_valid_helper(connector, mode);
} else {
- if (drm_detect_hdmi_monitor(amdgpu_connector_edid(connector))) {
+ if (connector->display_info.is_hdmi) {
/* HDMI 1.3+ supports max clock of 340 Mhz */
if (mode->clock > 340000)
return MODE_CLOCK_HIGH;
@@ -1664,10 +1733,12 @@ amdgpu_connector_add(struct amdgpu_device *adev,
adev->mode_info.dither_property,
AMDGPU_FMT_DITHER_DISABLE);
- if (amdgpu_audio != 0)
+ if (amdgpu_audio != 0) {
drm_object_attach_property(&amdgpu_connector->base.base,
adev->mode_info.audio_property,
AMDGPU_AUDIO_AUTO);
+ amdgpu_connector->audio = AMDGPU_AUDIO_AUTO;
+ }
subpixel_order = SubPixelHorizontalRGB;
connector->interlace_allowed = true;
@@ -1789,6 +1860,7 @@ amdgpu_connector_add(struct amdgpu_device *adev,
drm_object_attach_property(&amdgpu_connector->base.base,
adev->mode_info.audio_property,
AMDGPU_AUDIO_AUTO);
+ amdgpu_connector->audio = AMDGPU_AUDIO_AUTO;
}
drm_object_attach_property(&amdgpu_connector->base.base,
adev->mode_info.dither_property,
@@ -1842,6 +1914,7 @@ amdgpu_connector_add(struct amdgpu_device *adev,
drm_object_attach_property(&amdgpu_connector->base.base,
adev->mode_info.audio_property,
AMDGPU_AUDIO_AUTO);
+ amdgpu_connector->audio = AMDGPU_AUDIO_AUTO;
}
drm_object_attach_property(&amdgpu_connector->base.base,
adev->mode_info.dither_property,
@@ -1892,6 +1965,7 @@ amdgpu_connector_add(struct amdgpu_device *adev,
drm_object_attach_property(&amdgpu_connector->base.base,
adev->mode_info.audio_property,
AMDGPU_AUDIO_AUTO);
+ amdgpu_connector->audio = AMDGPU_AUDIO_AUTO;
}
drm_object_attach_property(&amdgpu_connector->base.base,
adev->mode_info.dither_property,
@@ -1956,7 +2030,7 @@ amdgpu_connector_add(struct amdgpu_device *adev,
if (amdgpu_connector->hpd.hpd == AMDGPU_HPD_NONE) {
if (i2c_bus->valid) {
connector->polled = DRM_CONNECTOR_POLL_CONNECT |
- DRM_CONNECTOR_POLL_DISCONNECT;
+ DRM_CONNECTOR_POLL_DISCONNECT;
}
} else
connector->polled = DRM_CONNECTOR_POLL_HPD;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_connectors.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_connectors.h
index 61fcef15ad72..eff833b6ed31 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_connectors.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_connectors.h
@@ -24,7 +24,6 @@
#ifndef __AMDGPU_CONNECTORS_H__
#define __AMDGPU_CONNECTORS_H__
-struct edid *amdgpu_connector_edid(struct drm_connector *connector);
void amdgpu_connector_hotplug(struct drm_connector *connector);
int amdgpu_connector_get_monitor_bpc(struct drm_connector *connector);
u16 amdgpu_connector_encoder_get_dp_bridge_encoder_id(struct drm_connector *connector);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cper.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cper.c
new file mode 100644
index 000000000000..425a3e564360
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cper.c
@@ -0,0 +1,591 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright 2025 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+#include <linux/list.h>
+#include "amdgpu.h"
+
+static const guid_t MCE = CPER_NOTIFY_MCE;
+static const guid_t CMC = CPER_NOTIFY_CMC;
+static const guid_t BOOT = BOOT_TYPE;
+
+static const guid_t CRASHDUMP = AMD_CRASHDUMP;
+static const guid_t RUNTIME = AMD_GPU_NONSTANDARD_ERROR;
+
+static void __inc_entry_length(struct cper_hdr *hdr, uint32_t size)
+{
+ hdr->record_length += size;
+}
+
+static void amdgpu_cper_get_timestamp(struct cper_timestamp *timestamp)
+{
+ struct tm tm;
+ time64_t now = ktime_get_real_seconds();
+
+ time64_to_tm(now, 0, &tm);
+ timestamp->seconds = tm.tm_sec;
+ timestamp->minutes = tm.tm_min;
+ timestamp->hours = tm.tm_hour;
+ timestamp->flag = 0;
+ timestamp->day = tm.tm_mday;
+ timestamp->month = 1 + tm.tm_mon;
+ timestamp->year = (1900 + tm.tm_year) % 100;
+ timestamp->century = (1900 + tm.tm_year) / 100;
+}
+
+void amdgpu_cper_entry_fill_hdr(struct amdgpu_device *adev,
+ struct cper_hdr *hdr,
+ enum amdgpu_cper_type type,
+ enum cper_error_severity sev)
+{
+ char record_id[16];
+
+ hdr->signature[0] = 'C';
+ hdr->signature[1] = 'P';
+ hdr->signature[2] = 'E';
+ hdr->signature[3] = 'R';
+ hdr->revision = CPER_HDR_REV_1;
+ hdr->signature_end = 0xFFFFFFFF;
+ hdr->error_severity = sev;
+
+ hdr->valid_bits.platform_id = 1;
+ hdr->valid_bits.timestamp = 1;
+
+ amdgpu_cper_get_timestamp(&hdr->timestamp);
+
+ snprintf(record_id, 9, "%d:%X",
+ (adev->smuio.funcs && adev->smuio.funcs->get_socket_id) ?
+ adev->smuio.funcs->get_socket_id(adev) :
+ 0,
+ atomic_inc_return(&adev->cper.unique_id));
+ memcpy(hdr->record_id, record_id, 8);
+
+ snprintf(hdr->platform_id, 16, "0x%04X:0x%04X",
+ adev->pdev->vendor, adev->pdev->device);
+ /* pmfw version should be part of creator_id according to CPER spec */
+ snprintf(hdr->creator_id, 16, "%s", CPER_CREATOR_ID_AMDGPU);
+
+ switch (type) {
+ case AMDGPU_CPER_TYPE_BOOT:
+ hdr->notify_type = BOOT;
+ break;
+ case AMDGPU_CPER_TYPE_FATAL:
+ case AMDGPU_CPER_TYPE_BP_THRESHOLD:
+ hdr->notify_type = MCE;
+ break;
+ case AMDGPU_CPER_TYPE_RUNTIME:
+ if (sev == CPER_SEV_NON_FATAL_CORRECTED)
+ hdr->notify_type = CMC;
+ else
+ hdr->notify_type = MCE;
+ break;
+ default:
+ dev_err(adev->dev, "Unknown CPER Type\n");
+ break;
+ }
+
+ __inc_entry_length(hdr, HDR_LEN);
+}
+
+static int amdgpu_cper_entry_fill_section_desc(struct amdgpu_device *adev,
+ struct cper_sec_desc *section_desc,
+ bool bp_threshold,
+ bool poison,
+ enum cper_error_severity sev,
+ guid_t sec_type,
+ uint32_t section_length,
+ uint32_t section_offset)
+{
+ section_desc->revision_minor = CPER_SEC_MINOR_REV_1;
+ section_desc->revision_major = CPER_SEC_MAJOR_REV_22;
+ section_desc->sec_offset = section_offset;
+ section_desc->sec_length = section_length;
+ section_desc->valid_bits.fru_text = 1;
+ section_desc->flag_bits.primary = 1;
+ section_desc->severity = sev;
+ section_desc->sec_type = sec_type;
+
+ snprintf(section_desc->fru_text, 20, "OAM%d",
+ (adev->smuio.funcs && adev->smuio.funcs->get_socket_id) ?
+ adev->smuio.funcs->get_socket_id(adev) :
+ 0);
+
+ if (bp_threshold)
+ section_desc->flag_bits.exceed_err_threshold = 1;
+ if (poison)
+ section_desc->flag_bits.latent_err = 1;
+
+ return 0;
+}
+
+int amdgpu_cper_entry_fill_fatal_section(struct amdgpu_device *adev,
+ struct cper_hdr *hdr,
+ uint32_t idx,
+ struct cper_sec_crashdump_reg_data reg_data)
+{
+ struct cper_sec_desc *section_desc;
+ struct cper_sec_crashdump_fatal *section;
+
+ section_desc = (struct cper_sec_desc *)((uint8_t *)hdr + SEC_DESC_OFFSET(idx));
+ section = (struct cper_sec_crashdump_fatal *)((uint8_t *)hdr +
+ FATAL_SEC_OFFSET(hdr->sec_cnt, idx));
+
+ amdgpu_cper_entry_fill_section_desc(adev, section_desc, false, false,
+ CPER_SEV_FATAL, CRASHDUMP, FATAL_SEC_LEN,
+ FATAL_SEC_OFFSET(hdr->sec_cnt, idx));
+
+ section->body.reg_ctx_type = CPER_CTX_TYPE_CRASH;
+ section->body.reg_arr_size = sizeof(reg_data);
+ section->body.data = reg_data;
+
+ __inc_entry_length(hdr, SEC_DESC_LEN + FATAL_SEC_LEN);
+
+ return 0;
+}
+
+int amdgpu_cper_entry_fill_runtime_section(struct amdgpu_device *adev,
+ struct cper_hdr *hdr,
+ uint32_t idx,
+ enum cper_error_severity sev,
+ uint32_t *reg_dump,
+ uint32_t reg_count)
+{
+ struct cper_sec_desc *section_desc;
+ struct cper_sec_nonstd_err *section;
+ bool poison;
+
+ poison = sev != CPER_SEV_NON_FATAL_CORRECTED;
+ section_desc = (struct cper_sec_desc *)((uint8_t *)hdr + SEC_DESC_OFFSET(idx));
+ section = (struct cper_sec_nonstd_err *)((uint8_t *)hdr +
+ NONSTD_SEC_OFFSET(hdr->sec_cnt, idx));
+
+ amdgpu_cper_entry_fill_section_desc(adev, section_desc, false, poison,
+ sev, RUNTIME, NONSTD_SEC_LEN,
+ NONSTD_SEC_OFFSET(hdr->sec_cnt, idx));
+
+ reg_count = umin(reg_count, CPER_ACA_REG_COUNT);
+
+ section->hdr.valid_bits.err_info_cnt = 1;
+ section->hdr.valid_bits.err_context_cnt = 1;
+
+ section->info.error_type = RUNTIME;
+ section->info.ms_chk_bits.err_type_valid = 1;
+ section->ctx.reg_ctx_type = CPER_CTX_TYPE_CRASH;
+ section->ctx.reg_arr_size = sizeof(section->ctx.reg_dump);
+
+ memcpy(section->ctx.reg_dump, reg_dump, reg_count * sizeof(uint32_t));
+
+ __inc_entry_length(hdr, SEC_DESC_LEN + NONSTD_SEC_LEN);
+
+ return 0;
+}
+
+int amdgpu_cper_entry_fill_bad_page_threshold_section(struct amdgpu_device *adev,
+ struct cper_hdr *hdr,
+ uint32_t idx)
+{
+ struct cper_sec_desc *section_desc;
+ struct cper_sec_nonstd_err *section;
+ uint32_t socket_id;
+
+ section_desc = (struct cper_sec_desc *)((uint8_t *)hdr + SEC_DESC_OFFSET(idx));
+ section = (struct cper_sec_nonstd_err *)((uint8_t *)hdr +
+ NONSTD_SEC_OFFSET(hdr->sec_cnt, idx));
+
+ amdgpu_cper_entry_fill_section_desc(adev, section_desc, true, false,
+ CPER_SEV_FATAL, RUNTIME, NONSTD_SEC_LEN,
+ NONSTD_SEC_OFFSET(hdr->sec_cnt, idx));
+
+ section->hdr.valid_bits.err_info_cnt = 1;
+ section->hdr.valid_bits.err_context_cnt = 1;
+
+ section->info.error_type = RUNTIME;
+ section->info.valid_bits.ms_chk = 1;
+ section->info.ms_chk_bits.err_type_valid = 1;
+ section->info.ms_chk_bits.err_type = 1;
+ section->info.ms_chk_bits.pcc = 1;
+ section->ctx.reg_ctx_type = CPER_CTX_TYPE_CRASH;
+ section->ctx.reg_arr_size = sizeof(section->ctx.reg_dump);
+
+ /* Hardcoded Reg dump for bad page threshold CPER */
+ socket_id = (adev->smuio.funcs && adev->smuio.funcs->get_socket_id) ?
+ adev->smuio.funcs->get_socket_id(adev) :
+ 0;
+ section->ctx.reg_dump[CPER_ACA_REG_CTL_LO] = 0x1;
+ section->ctx.reg_dump[CPER_ACA_REG_CTL_HI] = 0x0;
+ section->ctx.reg_dump[CPER_ACA_REG_STATUS_LO] = 0x137;
+ section->ctx.reg_dump[CPER_ACA_REG_STATUS_HI] = 0xB0000000;
+ section->ctx.reg_dump[CPER_ACA_REG_ADDR_LO] = 0x0;
+ section->ctx.reg_dump[CPER_ACA_REG_ADDR_HI] = 0x0;
+ section->ctx.reg_dump[CPER_ACA_REG_MISC0_LO] = 0x0;
+ section->ctx.reg_dump[CPER_ACA_REG_MISC0_HI] = 0x0;
+ section->ctx.reg_dump[CPER_ACA_REG_CONFIG_LO] = 0x2;
+ section->ctx.reg_dump[CPER_ACA_REG_CONFIG_HI] = 0x1ff;
+ section->ctx.reg_dump[CPER_ACA_REG_IPID_LO] = (socket_id / 4) & 0x01;
+ section->ctx.reg_dump[CPER_ACA_REG_IPID_HI] = 0x096 | (((socket_id % 4) & 0x3) << 12);
+ section->ctx.reg_dump[CPER_ACA_REG_SYND_LO] = 0x0;
+ section->ctx.reg_dump[CPER_ACA_REG_SYND_HI] = 0x0;
+
+ __inc_entry_length(hdr, SEC_DESC_LEN + NONSTD_SEC_LEN);
+
+ return 0;
+}
+
+struct cper_hdr *amdgpu_cper_alloc_entry(struct amdgpu_device *adev,
+ enum amdgpu_cper_type type,
+ uint16_t section_count)
+{
+ struct cper_hdr *hdr;
+ uint32_t size = 0;
+
+ size += HDR_LEN;
+ size += (SEC_DESC_LEN * section_count);
+
+ switch (type) {
+ case AMDGPU_CPER_TYPE_RUNTIME:
+ case AMDGPU_CPER_TYPE_BP_THRESHOLD:
+ size += (NONSTD_SEC_LEN * section_count);
+ break;
+ case AMDGPU_CPER_TYPE_FATAL:
+ size += (FATAL_SEC_LEN * section_count);
+ break;
+ case AMDGPU_CPER_TYPE_BOOT:
+ size += (BOOT_SEC_LEN * section_count);
+ break;
+ default:
+ dev_err(adev->dev, "Unknown CPER Type!\n");
+ return NULL;
+ }
+
+ hdr = kzalloc(size, GFP_KERNEL);
+ if (!hdr)
+ return NULL;
+
+ /* Save this early */
+ hdr->sec_cnt = section_count;
+
+ return hdr;
+}
+
+int amdgpu_cper_generate_ue_record(struct amdgpu_device *adev,
+ struct aca_bank *bank)
+{
+ struct cper_hdr *fatal = NULL;
+ struct cper_sec_crashdump_reg_data reg_data = { 0 };
+ struct amdgpu_ring *ring = &adev->cper.ring_buf;
+ int ret;
+
+ fatal = amdgpu_cper_alloc_entry(adev, AMDGPU_CPER_TYPE_FATAL, 1);
+ if (!fatal) {
+ dev_err(adev->dev, "fail to alloc cper entry for ue record\n");
+ return -ENOMEM;
+ }
+
+ reg_data.status_lo = lower_32_bits(bank->regs[ACA_REG_IDX_STATUS]);
+ reg_data.status_hi = upper_32_bits(bank->regs[ACA_REG_IDX_STATUS]);
+ reg_data.addr_lo = lower_32_bits(bank->regs[ACA_REG_IDX_ADDR]);
+ reg_data.addr_hi = upper_32_bits(bank->regs[ACA_REG_IDX_ADDR]);
+ reg_data.ipid_lo = lower_32_bits(bank->regs[ACA_REG_IDX_IPID]);
+ reg_data.ipid_hi = upper_32_bits(bank->regs[ACA_REG_IDX_IPID]);
+ reg_data.synd_lo = lower_32_bits(bank->regs[ACA_REG_IDX_SYND]);
+ reg_data.synd_hi = upper_32_bits(bank->regs[ACA_REG_IDX_SYND]);
+
+ amdgpu_cper_entry_fill_hdr(adev, fatal, AMDGPU_CPER_TYPE_FATAL, CPER_SEV_FATAL);
+ ret = amdgpu_cper_entry_fill_fatal_section(adev, fatal, 0, reg_data);
+ if (ret)
+ return ret;
+
+ amdgpu_cper_ring_write(ring, fatal, fatal->record_length);
+ kfree(fatal);
+
+ return 0;
+}
+
+int amdgpu_cper_generate_bp_threshold_record(struct amdgpu_device *adev)
+{
+ struct cper_hdr *bp_threshold = NULL;
+ struct amdgpu_ring *ring = &adev->cper.ring_buf;
+ int ret;
+
+ bp_threshold = amdgpu_cper_alloc_entry(adev, AMDGPU_CPER_TYPE_BP_THRESHOLD, 1);
+ if (!bp_threshold) {
+ dev_err(adev->dev, "fail to alloc cper entry for bad page threshold record\n");
+ return -ENOMEM;
+ }
+
+ amdgpu_cper_entry_fill_hdr(adev, bp_threshold,
+ AMDGPU_CPER_TYPE_BP_THRESHOLD,
+ CPER_SEV_FATAL);
+ ret = amdgpu_cper_entry_fill_bad_page_threshold_section(adev, bp_threshold, 0);
+ if (ret)
+ return ret;
+
+ amdgpu_cper_ring_write(ring, bp_threshold, bp_threshold->record_length);
+ kfree(bp_threshold);
+
+ return 0;
+}
+
+static enum cper_error_severity amdgpu_aca_err_type_to_cper_sev(struct amdgpu_device *adev,
+ enum aca_error_type aca_err_type)
+{
+ switch (aca_err_type) {
+ case ACA_ERROR_TYPE_UE:
+ return CPER_SEV_FATAL;
+ case ACA_ERROR_TYPE_CE:
+ return CPER_SEV_NON_FATAL_CORRECTED;
+ case ACA_ERROR_TYPE_DEFERRED:
+ return CPER_SEV_NON_FATAL_UNCORRECTED;
+ default:
+ dev_err(adev->dev, "Unknown ACA error type!\n");
+ return CPER_SEV_FATAL;
+ }
+}
+
+int amdgpu_cper_generate_ce_records(struct amdgpu_device *adev,
+ struct aca_banks *banks,
+ uint16_t bank_count)
+{
+ struct cper_hdr *corrected = NULL;
+ enum cper_error_severity sev = CPER_SEV_NON_FATAL_CORRECTED;
+ struct amdgpu_ring *ring = &adev->cper.ring_buf;
+ uint32_t reg_data[CPER_ACA_REG_COUNT] = { 0 };
+ struct aca_bank_node *node;
+ struct aca_bank *bank;
+ uint32_t i = 0;
+ int ret;
+
+ corrected = amdgpu_cper_alloc_entry(adev, AMDGPU_CPER_TYPE_RUNTIME, bank_count);
+ if (!corrected) {
+ dev_err(adev->dev, "fail to allocate cper entry for ce records\n");
+ return -ENOMEM;
+ }
+
+ /* Raise severity if any DE is detected in the ACA bank list */
+ list_for_each_entry(node, &banks->list, node) {
+ bank = &node->bank;
+ if (bank->aca_err_type == ACA_ERROR_TYPE_DEFERRED) {
+ sev = CPER_SEV_NON_FATAL_UNCORRECTED;
+ break;
+ }
+ }
+
+ amdgpu_cper_entry_fill_hdr(adev, corrected, AMDGPU_CPER_TYPE_RUNTIME, sev);
+
+ /* Combine CE and DE in cper record */
+ list_for_each_entry(node, &banks->list, node) {
+ bank = &node->bank;
+ reg_data[CPER_ACA_REG_CTL_LO] = lower_32_bits(bank->regs[ACA_REG_IDX_CTL]);
+ reg_data[CPER_ACA_REG_CTL_HI] = upper_32_bits(bank->regs[ACA_REG_IDX_CTL]);
+ reg_data[CPER_ACA_REG_STATUS_LO] = lower_32_bits(bank->regs[ACA_REG_IDX_STATUS]);
+ reg_data[CPER_ACA_REG_STATUS_HI] = upper_32_bits(bank->regs[ACA_REG_IDX_STATUS]);
+ reg_data[CPER_ACA_REG_ADDR_LO] = lower_32_bits(bank->regs[ACA_REG_IDX_ADDR]);
+ reg_data[CPER_ACA_REG_ADDR_HI] = upper_32_bits(bank->regs[ACA_REG_IDX_ADDR]);
+ reg_data[CPER_ACA_REG_MISC0_LO] = lower_32_bits(bank->regs[ACA_REG_IDX_MISC0]);
+ reg_data[CPER_ACA_REG_MISC0_HI] = upper_32_bits(bank->regs[ACA_REG_IDX_MISC0]);
+ reg_data[CPER_ACA_REG_CONFIG_LO] = lower_32_bits(bank->regs[ACA_REG_IDX_CONFIG]);
+ reg_data[CPER_ACA_REG_CONFIG_HI] = upper_32_bits(bank->regs[ACA_REG_IDX_CONFIG]);
+ reg_data[CPER_ACA_REG_IPID_LO] = lower_32_bits(bank->regs[ACA_REG_IDX_IPID]);
+ reg_data[CPER_ACA_REG_IPID_HI] = upper_32_bits(bank->regs[ACA_REG_IDX_IPID]);
+ reg_data[CPER_ACA_REG_SYND_LO] = lower_32_bits(bank->regs[ACA_REG_IDX_SYND]);
+ reg_data[CPER_ACA_REG_SYND_HI] = upper_32_bits(bank->regs[ACA_REG_IDX_SYND]);
+
+ ret = amdgpu_cper_entry_fill_runtime_section(adev, corrected, i++,
+ amdgpu_aca_err_type_to_cper_sev(adev, bank->aca_err_type),
+ reg_data, CPER_ACA_REG_COUNT);
+ if (ret)
+ return ret;
+ }
+
+ amdgpu_cper_ring_write(ring, corrected, corrected->record_length);
+ kfree(corrected);
+
+ return 0;
+}
+
+static bool amdgpu_cper_is_hdr(struct amdgpu_ring *ring, u64 pos)
+{
+ struct cper_hdr *chdr;
+
+ chdr = (struct cper_hdr *)&(ring->ring[pos]);
+ return strcmp(chdr->signature, "CPER") ? false : true;
+}
+
+static u32 amdgpu_cper_ring_get_ent_sz(struct amdgpu_ring *ring, u64 pos)
+{
+ struct cper_hdr *chdr;
+ u64 p;
+ u32 chunk, rec_len = 0;
+
+ chdr = (struct cper_hdr *)&(ring->ring[pos]);
+ chunk = ring->ring_size - (pos << 2);
+
+ if (!strcmp(chdr->signature, "CPER")) {
+ rec_len = chdr->record_length;
+ goto calc;
+ }
+
+ /* ring buffer is not full, no cper data after ring->wptr */
+ if (ring->count_dw)
+ goto calc;
+
+ for (p = pos + 1; p <= ring->buf_mask; p++) {
+ chdr = (struct cper_hdr *)&(ring->ring[p]);
+ if (!strcmp(chdr->signature, "CPER")) {
+ rec_len = (p - pos) << 2;
+ goto calc;
+ }
+ }
+
+calc:
+ if (!rec_len)
+ return chunk;
+ else
+ return umin(rec_len, chunk);
+}
+
+void amdgpu_cper_ring_write(struct amdgpu_ring *ring, void *src, int count)
+{
+ u64 pos, wptr_old, rptr;
+ int rec_cnt_dw = count >> 2;
+ u32 chunk, ent_sz;
+ u8 *s = (u8 *)src;
+
+ if (count >= ring->ring_size - 4) {
+ dev_err(ring->adev->dev,
+ "CPER data size(%d) is larger than ring size(%d)\n",
+ count, ring->ring_size - 4);
+
+ return;
+ }
+
+ mutex_lock(&ring->adev->cper.ring_lock);
+
+ wptr_old = ring->wptr;
+ rptr = *ring->rptr_cpu_addr & ring->ptr_mask;
+
+ while (count) {
+ ent_sz = amdgpu_cper_ring_get_ent_sz(ring, ring->wptr);
+ chunk = umin(ent_sz, count);
+
+ memcpy(&ring->ring[ring->wptr], s, chunk);
+
+ ring->wptr += (chunk >> 2);
+ ring->wptr &= ring->ptr_mask;
+ count -= chunk;
+ s += chunk;
+ }
+
+ if (ring->count_dw < rec_cnt_dw)
+ ring->count_dw = 0;
+
+ /* the buffer is overflow, adjust rptr */
+ if (((wptr_old < rptr) && (rptr <= ring->wptr)) ||
+ ((ring->wptr < wptr_old) && (wptr_old < rptr)) ||
+ ((rptr <= ring->wptr) && (ring->wptr < wptr_old))) {
+ pos = (ring->wptr + 1) & ring->ptr_mask;
+
+ do {
+ ent_sz = amdgpu_cper_ring_get_ent_sz(ring, pos);
+
+ rptr += (ent_sz >> 2);
+ rptr &= ring->ptr_mask;
+ *ring->rptr_cpu_addr = rptr;
+
+ pos = rptr;
+ } while (!amdgpu_cper_is_hdr(ring, rptr));
+ }
+
+ if (ring->count_dw >= rec_cnt_dw)
+ ring->count_dw -= rec_cnt_dw;
+ mutex_unlock(&ring->adev->cper.ring_lock);
+}
+
+static u64 amdgpu_cper_ring_get_rptr(struct amdgpu_ring *ring)
+{
+ return *(ring->rptr_cpu_addr);
+}
+
+static u64 amdgpu_cper_ring_get_wptr(struct amdgpu_ring *ring)
+{
+ return ring->wptr;
+}
+
+static const struct amdgpu_ring_funcs cper_ring_funcs = {
+ .type = AMDGPU_RING_TYPE_CPER,
+ .align_mask = 0xff,
+ .support_64bit_ptrs = false,
+ .get_rptr = amdgpu_cper_ring_get_rptr,
+ .get_wptr = amdgpu_cper_ring_get_wptr,
+};
+
+static int amdgpu_cper_ring_init(struct amdgpu_device *adev)
+{
+ struct amdgpu_ring *ring = &(adev->cper.ring_buf);
+
+ mutex_init(&adev->cper.ring_lock);
+
+ ring->adev = NULL;
+ ring->ring_obj = NULL;
+ ring->use_doorbell = false;
+ ring->no_scheduler = true;
+ ring->funcs = &cper_ring_funcs;
+
+ sprintf(ring->name, "cper");
+ return amdgpu_ring_init(adev, ring, CPER_MAX_RING_SIZE, NULL, 0,
+ AMDGPU_RING_PRIO_DEFAULT, NULL);
+}
+
+int amdgpu_cper_init(struct amdgpu_device *adev)
+{
+ int r;
+
+ if (!amdgpu_aca_is_enabled(adev) && !amdgpu_sriov_ras_cper_en(adev))
+ return 0;
+
+ r = amdgpu_cper_ring_init(adev);
+ if (r) {
+ dev_err(adev->dev, "failed to initialize cper ring, r = %d\n", r);
+ return r;
+ }
+
+ mutex_init(&adev->cper.cper_lock);
+
+ adev->cper.enabled = true;
+ adev->cper.max_count = CPER_MAX_ALLOWED_COUNT;
+
+ return 0;
+}
+
+int amdgpu_cper_fini(struct amdgpu_device *adev)
+{
+ if (!amdgpu_aca_is_enabled(adev) && !amdgpu_sriov_ras_cper_en(adev))
+ return 0;
+
+ adev->cper.enabled = false;
+
+ amdgpu_ring_fini(&(adev->cper.ring_buf));
+ adev->cper.count = 0;
+ adev->cper.wptr = 0;
+
+ return 0;
+}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cper.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_cper.h
new file mode 100644
index 000000000000..353421807387
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cper.h
@@ -0,0 +1,105 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright 2025 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#ifndef __AMDGPU_CPER_H__
+#define __AMDGPU_CPER_H__
+
+#include "amd_cper.h"
+#include "amdgpu_aca.h"
+
+#define CPER_MAX_ALLOWED_COUNT 0x1000
+#define CPER_MAX_RING_SIZE 0X100000
+#define HDR_LEN (sizeof(struct cper_hdr))
+#define SEC_DESC_LEN (sizeof(struct cper_sec_desc))
+
+#define BOOT_SEC_LEN (sizeof(struct cper_sec_crashdump_boot))
+#define FATAL_SEC_LEN (sizeof(struct cper_sec_crashdump_fatal))
+#define NONSTD_SEC_LEN (sizeof(struct cper_sec_nonstd_err))
+
+#define SEC_DESC_OFFSET(idx) (HDR_LEN + (SEC_DESC_LEN * idx))
+
+#define BOOT_SEC_OFFSET(count, idx) (HDR_LEN + (SEC_DESC_LEN * count) + (BOOT_SEC_LEN * idx))
+#define FATAL_SEC_OFFSET(count, idx) (HDR_LEN + (SEC_DESC_LEN * count) + (FATAL_SEC_LEN * idx))
+#define NONSTD_SEC_OFFSET(count, idx) (HDR_LEN + (SEC_DESC_LEN * count) + (NONSTD_SEC_LEN * idx))
+
+enum amdgpu_cper_type {
+ AMDGPU_CPER_TYPE_RUNTIME,
+ AMDGPU_CPER_TYPE_FATAL,
+ AMDGPU_CPER_TYPE_BOOT,
+ AMDGPU_CPER_TYPE_BP_THRESHOLD,
+};
+
+struct amdgpu_cper {
+ bool enabled;
+
+ atomic_t unique_id;
+ struct mutex cper_lock;
+
+ /* Lifetime CPERs generated */
+ uint32_t count;
+ uint32_t max_count;
+
+ uint32_t wptr;
+
+ void *ring[CPER_MAX_ALLOWED_COUNT];
+ struct amdgpu_ring ring_buf;
+ struct mutex ring_lock;
+};
+
+void amdgpu_cper_entry_fill_hdr(struct amdgpu_device *adev,
+ struct cper_hdr *hdr,
+ enum amdgpu_cper_type type,
+ enum cper_error_severity sev);
+int amdgpu_cper_entry_fill_fatal_section(struct amdgpu_device *adev,
+ struct cper_hdr *hdr,
+ uint32_t idx,
+ struct cper_sec_crashdump_reg_data reg_data);
+int amdgpu_cper_entry_fill_runtime_section(struct amdgpu_device *adev,
+ struct cper_hdr *hdr,
+ uint32_t idx,
+ enum cper_error_severity sev,
+ uint32_t *reg_dump,
+ uint32_t reg_count);
+int amdgpu_cper_entry_fill_bad_page_threshold_section(struct amdgpu_device *adev,
+ struct cper_hdr *hdr,
+ uint32_t section_idx);
+
+struct cper_hdr *amdgpu_cper_alloc_entry(struct amdgpu_device *adev,
+ enum amdgpu_cper_type type,
+ uint16_t section_count);
+/* UE must be encoded into separated cper entries, 1 UE 1 cper */
+int amdgpu_cper_generate_ue_record(struct amdgpu_device *adev,
+ struct aca_bank *bank);
+/* CEs and DEs are combined into 1 cper entry */
+int amdgpu_cper_generate_ce_records(struct amdgpu_device *adev,
+ struct aca_banks *banks,
+ uint16_t bank_count);
+/* Bad page threshold is encoded into separated cper entry */
+int amdgpu_cper_generate_bp_threshold_record(struct amdgpu_device *adev);
+void amdgpu_cper_ring_write(struct amdgpu_ring *ring,
+ void *src, int count);
+int amdgpu_cper_init(struct amdgpu_device *adev);
+int amdgpu_cper_fini(struct amdgpu_device *adev);
+
+#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
index 3e240b952e79..ecdfe6cb36cc 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
@@ -32,58 +32,127 @@
#include <drm/amdgpu_drm.h>
#include <drm/drm_syncobj.h>
+#include <drm/ttm/ttm_tt.h>
+
+#include "amdgpu_cs.h"
#include "amdgpu.h"
#include "amdgpu_trace.h"
#include "amdgpu_gmc.h"
#include "amdgpu_gem.h"
#include "amdgpu_ras.h"
+#include "amdgpu_hmm.h"
+
+static int amdgpu_cs_parser_init(struct amdgpu_cs_parser *p,
+ struct amdgpu_device *adev,
+ struct drm_file *filp,
+ union drm_amdgpu_cs *cs)
+{
+ struct amdgpu_fpriv *fpriv = filp->driver_priv;
+
+ if (cs->in.num_chunks == 0)
+ return -EINVAL;
+
+ memset(p, 0, sizeof(*p));
+ p->adev = adev;
+ p->filp = filp;
+
+ p->ctx = amdgpu_ctx_get(fpriv, cs->in.ctx_id);
+ if (!p->ctx)
+ return -EINVAL;
+
+ if (atomic_read(&p->ctx->guilty)) {
+ amdgpu_ctx_put(p->ctx);
+ return -ECANCELED;
+ }
+
+ amdgpu_sync_create(&p->sync);
+ drm_exec_init(&p->exec, DRM_EXEC_INTERRUPTIBLE_WAIT |
+ DRM_EXEC_IGNORE_DUPLICATES, 0);
+ return 0;
+}
+
+static int amdgpu_cs_job_idx(struct amdgpu_cs_parser *p,
+ struct drm_amdgpu_cs_chunk_ib *chunk_ib)
+{
+ struct drm_sched_entity *entity;
+ unsigned int i;
+ int r;
+
+ r = amdgpu_ctx_get_entity(p->ctx, chunk_ib->ip_type,
+ chunk_ib->ip_instance,
+ chunk_ib->ring, &entity);
+ if (r)
+ return r;
+
+ /*
+ * Abort if there is no run queue associated with this entity.
+ * Possibly because of disabled HW IP.
+ */
+ if (entity->rq == NULL)
+ return -EINVAL;
+
+ /* Check if we can add this IB to some existing job */
+ for (i = 0; i < p->gang_size; ++i)
+ if (p->entities[i] == entity)
+ return i;
+
+ /* If not increase the gang size if possible */
+ if (i == AMDGPU_CS_GANG_SIZE)
+ return -EINVAL;
-static int amdgpu_cs_user_fence_chunk(struct amdgpu_cs_parser *p,
- struct drm_amdgpu_cs_chunk_fence *data,
- uint32_t *offset)
+ p->entities[i] = entity;
+ p->gang_size = i + 1;
+ return i;
+}
+
+static int amdgpu_cs_p1_ib(struct amdgpu_cs_parser *p,
+ struct drm_amdgpu_cs_chunk_ib *chunk_ib,
+ unsigned int *num_ibs)
+{
+ int r;
+
+ r = amdgpu_cs_job_idx(p, chunk_ib);
+ if (r < 0)
+ return r;
+
+ if (num_ibs[r] >= amdgpu_ring_max_ibs(chunk_ib->ip_type))
+ return -EINVAL;
+
+ ++(num_ibs[r]);
+ p->gang_leader_idx = r;
+ return 0;
+}
+
+static int amdgpu_cs_p1_user_fence(struct amdgpu_cs_parser *p,
+ struct drm_amdgpu_cs_chunk_fence *data,
+ uint32_t *offset)
{
struct drm_gem_object *gobj;
- struct amdgpu_bo *bo;
unsigned long size;
- int r;
gobj = drm_gem_object_lookup(p->filp, data->handle);
if (gobj == NULL)
return -EINVAL;
- bo = amdgpu_bo_ref(gem_to_amdgpu_bo(gobj));
- p->uf_entry.priority = 0;
- p->uf_entry.tv.bo = &bo->tbo;
- /* One for TTM and one for the CS job */
- p->uf_entry.tv.num_shared = 2;
-
+ p->uf_bo = amdgpu_bo_ref(gem_to_amdgpu_bo(gobj));
drm_gem_object_put(gobj);
- size = amdgpu_bo_size(bo);
- if (size != PAGE_SIZE || (data->offset + 8) > size) {
- r = -EINVAL;
- goto error_unref;
- }
+ size = amdgpu_bo_size(p->uf_bo);
+ if (size != PAGE_SIZE || data->offset > (size - 8))
+ return -EINVAL;
- if (amdgpu_ttm_tt_get_usermm(bo->tbo.ttm)) {
- r = -EINVAL;
- goto error_unref;
- }
+ if (amdgpu_ttm_tt_get_usermm(p->uf_bo->tbo.ttm))
+ return -EINVAL;
*offset = data->offset;
-
return 0;
-
-error_unref:
- amdgpu_bo_unref(&bo);
- return r;
}
-static int amdgpu_cs_bo_handles_chunk(struct amdgpu_cs_parser *p,
- struct drm_amdgpu_bo_list_in *data)
+static int amdgpu_cs_p1_bo_handles(struct amdgpu_cs_parser *p,
+ struct drm_amdgpu_bo_list_in *data)
{
+ struct drm_amdgpu_bo_list_entry *info;
int r;
- struct drm_amdgpu_bo_list_entry *info = NULL;
r = amdgpu_bo_create_list_entry_array(data, &info);
if (r)
@@ -103,48 +172,27 @@ error_free:
return r;
}
-static int amdgpu_cs_parser_init(struct amdgpu_cs_parser *p, union drm_amdgpu_cs *cs)
+/* Copy the data from userspace and go over it the first time */
+static int amdgpu_cs_pass1(struct amdgpu_cs_parser *p,
+ union drm_amdgpu_cs *cs)
{
struct amdgpu_fpriv *fpriv = p->filp->driver_priv;
+ unsigned int num_ibs[AMDGPU_CS_GANG_SIZE] = { };
struct amdgpu_vm *vm = &fpriv->vm;
- uint64_t *chunk_array_user;
uint64_t *chunk_array;
- unsigned size, num_ibs = 0;
uint32_t uf_offset = 0;
- int i;
+ size_t size;
int ret;
+ int i;
- if (cs->in.num_chunks == 0)
- return 0;
-
- chunk_array = kmalloc_array(cs->in.num_chunks, sizeof(uint64_t), GFP_KERNEL);
- if (!chunk_array)
- return -ENOMEM;
-
- p->ctx = amdgpu_ctx_get(fpriv, cs->in.ctx_id);
- if (!p->ctx) {
- ret = -EINVAL;
- goto free_chunk;
- }
-
- mutex_lock(&p->ctx->lock);
-
- /* skip guilty context job */
- if (atomic_read(&p->ctx->guilty) == 1) {
- ret = -ECANCELED;
- goto free_chunk;
- }
-
- /* get chunks */
- chunk_array_user = u64_to_user_ptr(cs->in.chunks);
- if (copy_from_user(chunk_array, chunk_array_user,
- sizeof(uint64_t)*cs->in.num_chunks)) {
- ret = -EFAULT;
- goto free_chunk;
- }
+ chunk_array = memdup_array_user(u64_to_user_ptr(cs->in.chunks),
+ cs->in.num_chunks,
+ sizeof(uint64_t));
+ if (IS_ERR(chunk_array))
+ return PTR_ERR(chunk_array);
p->nchunks = cs->in.num_chunks;
- p->chunks = kmalloc_array(p->nchunks, sizeof(struct amdgpu_cs_chunk),
+ p->chunks = kvmalloc_array(p->nchunks, sizeof(struct amdgpu_cs_chunk),
GFP_KERNEL);
if (!p->chunks) {
ret = -ENOMEM;
@@ -152,9 +200,8 @@ static int amdgpu_cs_parser_init(struct amdgpu_cs_parser *p, union drm_amdgpu_cs
}
for (i = 0; i < p->nchunks; i++) {
- struct drm_amdgpu_cs_chunk __user **chunk_ptr = NULL;
+ struct drm_amdgpu_cs_chunk __user *chunk_ptr = NULL;
struct drm_amdgpu_cs_chunk user_chunk;
- uint32_t __user *cdata;
chunk_ptr = u64_to_user_ptr(chunk_array[i]);
if (copy_from_user(&user_chunk, chunk_ptr,
@@ -167,50 +214,50 @@ static int amdgpu_cs_parser_init(struct amdgpu_cs_parser *p, union drm_amdgpu_cs
p->chunks[i].length_dw = user_chunk.length_dw;
size = p->chunks[i].length_dw;
- cdata = u64_to_user_ptr(user_chunk.chunk_data);
- p->chunks[i].kdata = kvmalloc_array(size, sizeof(uint32_t), GFP_KERNEL);
- if (p->chunks[i].kdata == NULL) {
- ret = -ENOMEM;
+ p->chunks[i].kdata = vmemdup_array_user(u64_to_user_ptr(user_chunk.chunk_data),
+ size,
+ sizeof(uint32_t));
+ if (IS_ERR(p->chunks[i].kdata)) {
+ ret = PTR_ERR(p->chunks[i].kdata);
i--;
goto free_partial_kdata;
}
size *= sizeof(uint32_t);
- if (copy_from_user(p->chunks[i].kdata, cdata, size)) {
- ret = -EFAULT;
- goto free_partial_kdata;
- }
+ /* Assume the worst on the following checks */
+ ret = -EINVAL;
switch (p->chunks[i].chunk_id) {
case AMDGPU_CHUNK_ID_IB:
- ++num_ibs;
+ if (size < sizeof(struct drm_amdgpu_cs_chunk_ib))
+ goto free_partial_kdata;
+
+ ret = amdgpu_cs_p1_ib(p, p->chunks[i].kdata, num_ibs);
+ if (ret)
+ goto free_partial_kdata;
break;
case AMDGPU_CHUNK_ID_FENCE:
- size = sizeof(struct drm_amdgpu_cs_chunk_fence);
- if (p->chunks[i].length_dw * sizeof(uint32_t) < size) {
- ret = -EINVAL;
+ if (size < sizeof(struct drm_amdgpu_cs_chunk_fence))
goto free_partial_kdata;
- }
- ret = amdgpu_cs_user_fence_chunk(p, p->chunks[i].kdata,
- &uf_offset);
+ ret = amdgpu_cs_p1_user_fence(p, p->chunks[i].kdata,
+ &uf_offset);
if (ret)
goto free_partial_kdata;
-
break;
case AMDGPU_CHUNK_ID_BO_HANDLES:
- size = sizeof(struct drm_amdgpu_bo_list_in);
- if (p->chunks[i].length_dw * sizeof(uint32_t) < size) {
- ret = -EINVAL;
+ if (size < sizeof(struct drm_amdgpu_bo_list_in))
goto free_partial_kdata;
- }
- ret = amdgpu_cs_bo_handles_chunk(p, p->chunks[i].kdata);
- if (ret)
+ /* Only a single BO list is allowed to simplify handling. */
+ if (p->bo_list)
goto free_partial_kdata;
+ ret = amdgpu_cs_p1_bo_handles(p, p->chunks[i].kdata);
+ if (ret)
+ goto free_partial_kdata;
break;
case AMDGPU_CHUNK_ID_DEPENDENCIES:
@@ -219,26 +266,55 @@ static int amdgpu_cs_parser_init(struct amdgpu_cs_parser *p, union drm_amdgpu_cs
case AMDGPU_CHUNK_ID_SCHEDULED_DEPENDENCIES:
case AMDGPU_CHUNK_ID_SYNCOBJ_TIMELINE_WAIT:
case AMDGPU_CHUNK_ID_SYNCOBJ_TIMELINE_SIGNAL:
+ case AMDGPU_CHUNK_ID_CP_GFX_SHADOW:
break;
default:
- ret = -EINVAL;
goto free_partial_kdata;
}
}
- ret = amdgpu_job_alloc(p->adev, num_ibs, &p->job, vm);
- if (ret)
+ if (!p->gang_size || (amdgpu_sriov_vf(p->adev) && p->gang_size > 1)) {
+ ret = -EINVAL;
goto free_all_kdata;
+ }
+
+ for (i = 0; i < p->gang_size; ++i) {
+ ret = amdgpu_job_alloc(p->adev, vm, p->entities[i], vm,
+ num_ibs[i], &p->jobs[i],
+ p->filp->client_id);
+ if (ret)
+ goto free_all_kdata;
+ switch (p->adev->enforce_isolation[fpriv->xcp_id]) {
+ case AMDGPU_ENFORCE_ISOLATION_DISABLE:
+ default:
+ p->jobs[i]->enforce_isolation = false;
+ p->jobs[i]->run_cleaner_shader = false;
+ break;
+ case AMDGPU_ENFORCE_ISOLATION_ENABLE:
+ p->jobs[i]->enforce_isolation = true;
+ p->jobs[i]->run_cleaner_shader = true;
+ break;
+ case AMDGPU_ENFORCE_ISOLATION_ENABLE_LEGACY:
+ p->jobs[i]->enforce_isolation = true;
+ p->jobs[i]->run_cleaner_shader = false;
+ break;
+ case AMDGPU_ENFORCE_ISOLATION_NO_CLEANER_SHADER:
+ p->jobs[i]->enforce_isolation = true;
+ p->jobs[i]->run_cleaner_shader = false;
+ break;
+ }
+ }
+ p->gang_leader = p->jobs[p->gang_leader_idx];
- if (p->ctx->vram_lost_counter != p->job->vram_lost_counter) {
+ if (p->ctx->generation != p->gang_leader->generation) {
ret = -ECANCELED;
goto free_all_kdata;
}
- if (p->uf_entry.tv.bo)
- p->job->uf_addr = uf_offset;
- kfree(chunk_array);
+ if (p->uf_bo)
+ p->gang_leader->uf_addr = uf_offset;
+ kvfree(chunk_array);
/* Use this opportunity to fill in task info for the vm */
amdgpu_vm_set_task_info(vm);
@@ -250,15 +326,340 @@ free_all_kdata:
free_partial_kdata:
for (; i >= 0; i--)
kvfree(p->chunks[i].kdata);
- kfree(p->chunks);
+ kvfree(p->chunks);
p->chunks = NULL;
p->nchunks = 0;
free_chunk:
- kfree(chunk_array);
+ kvfree(chunk_array);
return ret;
}
+static int amdgpu_cs_p2_ib(struct amdgpu_cs_parser *p,
+ struct amdgpu_cs_chunk *chunk,
+ unsigned int *ce_preempt,
+ unsigned int *de_preempt)
+{
+ struct drm_amdgpu_cs_chunk_ib *chunk_ib = chunk->kdata;
+ struct amdgpu_fpriv *fpriv = p->filp->driver_priv;
+ struct amdgpu_vm *vm = &fpriv->vm;
+ struct amdgpu_ring *ring;
+ struct amdgpu_job *job;
+ struct amdgpu_ib *ib;
+ int r;
+
+ r = amdgpu_cs_job_idx(p, chunk_ib);
+ if (r < 0)
+ return r;
+
+ job = p->jobs[r];
+ ring = amdgpu_job_ring(job);
+ ib = &job->ibs[job->num_ibs++];
+
+ /* submissions to kernel queues are disabled */
+ if (ring->no_user_submission)
+ return -EINVAL;
+
+ /* MM engine doesn't support user fences */
+ if (p->uf_bo && ring->funcs->no_user_fence)
+ return -EINVAL;
+
+ if (!p->adev->debug_enable_ce_cs &&
+ chunk_ib->flags & AMDGPU_IB_FLAG_CE) {
+ dev_err_ratelimited(p->adev->dev, "CE CS is blocked, use debug=0x400 to override\n");
+ return -EINVAL;
+ }
+
+ if (chunk_ib->ip_type == AMDGPU_HW_IP_GFX &&
+ chunk_ib->flags & AMDGPU_IB_FLAG_PREEMPT) {
+ if (chunk_ib->flags & AMDGPU_IB_FLAG_CE)
+ (*ce_preempt)++;
+ else
+ (*de_preempt)++;
+
+ /* Each GFX command submit allows only 1 IB max
+ * preemptible for CE & DE */
+ if (*ce_preempt > 1 || *de_preempt > 1)
+ return -EINVAL;
+ }
+
+ if (chunk_ib->flags & AMDGPU_IB_FLAG_PREAMBLE)
+ job->preamble_status |= AMDGPU_PREAMBLE_IB_PRESENT;
+
+ r = amdgpu_ib_get(p->adev, vm, ring->funcs->parse_cs ?
+ chunk_ib->ib_bytes : 0,
+ AMDGPU_IB_POOL_DELAYED, ib);
+ if (r) {
+ drm_err(adev_to_drm(p->adev), "Failed to get ib !\n");
+ return r;
+ }
+
+ ib->gpu_addr = chunk_ib->va_start;
+ ib->length_dw = chunk_ib->ib_bytes / 4;
+ ib->flags = chunk_ib->flags;
+ return 0;
+}
+
+static int amdgpu_cs_p2_dependencies(struct amdgpu_cs_parser *p,
+ struct amdgpu_cs_chunk *chunk)
+{
+ struct drm_amdgpu_cs_chunk_dep *deps = chunk->kdata;
+ struct amdgpu_fpriv *fpriv = p->filp->driver_priv;
+ unsigned int num_deps;
+ int i, r;
+
+ num_deps = chunk->length_dw * 4 /
+ sizeof(struct drm_amdgpu_cs_chunk_dep);
+
+ for (i = 0; i < num_deps; ++i) {
+ struct amdgpu_ctx *ctx;
+ struct drm_sched_entity *entity;
+ struct dma_fence *fence;
+
+ ctx = amdgpu_ctx_get(fpriv, deps[i].ctx_id);
+ if (ctx == NULL)
+ return -EINVAL;
+
+ r = amdgpu_ctx_get_entity(ctx, deps[i].ip_type,
+ deps[i].ip_instance,
+ deps[i].ring, &entity);
+ if (r) {
+ amdgpu_ctx_put(ctx);
+ return r;
+ }
+
+ fence = amdgpu_ctx_get_fence(ctx, entity, deps[i].handle);
+ amdgpu_ctx_put(ctx);
+
+ if (IS_ERR(fence))
+ return PTR_ERR(fence);
+ else if (!fence)
+ continue;
+
+ if (chunk->chunk_id == AMDGPU_CHUNK_ID_SCHEDULED_DEPENDENCIES) {
+ struct drm_sched_fence *s_fence;
+ struct dma_fence *old = fence;
+
+ s_fence = to_drm_sched_fence(fence);
+ fence = dma_fence_get(&s_fence->scheduled);
+ dma_fence_put(old);
+ }
+
+ r = amdgpu_sync_fence(&p->sync, fence, GFP_KERNEL);
+ dma_fence_put(fence);
+ if (r)
+ return r;
+ }
+ return 0;
+}
+
+static int amdgpu_syncobj_lookup_and_add(struct amdgpu_cs_parser *p,
+ uint32_t handle, u64 point,
+ u64 flags)
+{
+ struct dma_fence *fence;
+ int r;
+
+ r = drm_syncobj_find_fence(p->filp, handle, point, flags, &fence);
+ if (r) {
+ drm_err(adev_to_drm(p->adev), "syncobj %u failed to find fence @ %llu (%d)!\n",
+ handle, point, r);
+ return r;
+ }
+
+ r = amdgpu_sync_fence(&p->sync, fence, GFP_KERNEL);
+ dma_fence_put(fence);
+ return r;
+}
+
+static int amdgpu_cs_p2_syncobj_in(struct amdgpu_cs_parser *p,
+ struct amdgpu_cs_chunk *chunk)
+{
+ struct drm_amdgpu_cs_chunk_sem *deps = chunk->kdata;
+ unsigned int num_deps;
+ int i, r;
+
+ num_deps = chunk->length_dw * 4 /
+ sizeof(struct drm_amdgpu_cs_chunk_sem);
+ for (i = 0; i < num_deps; ++i) {
+ r = amdgpu_syncobj_lookup_and_add(p, deps[i].handle, 0, 0);
+ if (r)
+ return r;
+ }
+
+ return 0;
+}
+
+static int amdgpu_cs_p2_syncobj_timeline_wait(struct amdgpu_cs_parser *p,
+ struct amdgpu_cs_chunk *chunk)
+{
+ struct drm_amdgpu_cs_chunk_syncobj *syncobj_deps = chunk->kdata;
+ unsigned int num_deps;
+ int i, r;
+
+ num_deps = chunk->length_dw * 4 /
+ sizeof(struct drm_amdgpu_cs_chunk_syncobj);
+ for (i = 0; i < num_deps; ++i) {
+ r = amdgpu_syncobj_lookup_and_add(p, syncobj_deps[i].handle,
+ syncobj_deps[i].point,
+ syncobj_deps[i].flags);
+ if (r)
+ return r;
+ }
+
+ return 0;
+}
+
+static int amdgpu_cs_p2_syncobj_out(struct amdgpu_cs_parser *p,
+ struct amdgpu_cs_chunk *chunk)
+{
+ struct drm_amdgpu_cs_chunk_sem *deps = chunk->kdata;
+ unsigned int num_deps;
+ int i;
+
+ num_deps = chunk->length_dw * 4 /
+ sizeof(struct drm_amdgpu_cs_chunk_sem);
+
+ if (p->post_deps)
+ return -EINVAL;
+
+ p->post_deps = kmalloc_array(num_deps, sizeof(*p->post_deps),
+ GFP_KERNEL);
+ p->num_post_deps = 0;
+
+ if (!p->post_deps)
+ return -ENOMEM;
+
+
+ for (i = 0; i < num_deps; ++i) {
+ p->post_deps[i].syncobj =
+ drm_syncobj_find(p->filp, deps[i].handle);
+ if (!p->post_deps[i].syncobj)
+ return -EINVAL;
+ p->post_deps[i].chain = NULL;
+ p->post_deps[i].point = 0;
+ p->num_post_deps++;
+ }
+
+ return 0;
+}
+
+static int amdgpu_cs_p2_syncobj_timeline_signal(struct amdgpu_cs_parser *p,
+ struct amdgpu_cs_chunk *chunk)
+{
+ struct drm_amdgpu_cs_chunk_syncobj *syncobj_deps = chunk->kdata;
+ unsigned int num_deps;
+ int i;
+
+ num_deps = chunk->length_dw * 4 /
+ sizeof(struct drm_amdgpu_cs_chunk_syncobj);
+
+ if (p->post_deps)
+ return -EINVAL;
+
+ p->post_deps = kmalloc_array(num_deps, sizeof(*p->post_deps),
+ GFP_KERNEL);
+ p->num_post_deps = 0;
+
+ if (!p->post_deps)
+ return -ENOMEM;
+
+ for (i = 0; i < num_deps; ++i) {
+ struct amdgpu_cs_post_dep *dep = &p->post_deps[i];
+
+ dep->chain = NULL;
+ if (syncobj_deps[i].point) {
+ dep->chain = dma_fence_chain_alloc();
+ if (!dep->chain)
+ return -ENOMEM;
+ }
+
+ dep->syncobj = drm_syncobj_find(p->filp,
+ syncobj_deps[i].handle);
+ if (!dep->syncobj) {
+ dma_fence_chain_free(dep->chain);
+ return -EINVAL;
+ }
+ dep->point = syncobj_deps[i].point;
+ p->num_post_deps++;
+ }
+
+ return 0;
+}
+
+static int amdgpu_cs_p2_shadow(struct amdgpu_cs_parser *p,
+ struct amdgpu_cs_chunk *chunk)
+{
+ struct drm_amdgpu_cs_chunk_cp_gfx_shadow *shadow = chunk->kdata;
+ int i;
+
+ if (shadow->flags & ~AMDGPU_CS_CHUNK_CP_GFX_SHADOW_FLAGS_INIT_SHADOW)
+ return -EINVAL;
+
+ for (i = 0; i < p->gang_size; ++i) {
+ p->jobs[i]->shadow_va = shadow->shadow_va;
+ p->jobs[i]->csa_va = shadow->csa_va;
+ p->jobs[i]->gds_va = shadow->gds_va;
+ p->jobs[i]->init_shadow =
+ shadow->flags & AMDGPU_CS_CHUNK_CP_GFX_SHADOW_FLAGS_INIT_SHADOW;
+ }
+
+ return 0;
+}
+
+static int amdgpu_cs_pass2(struct amdgpu_cs_parser *p)
+{
+ unsigned int ce_preempt = 0, de_preempt = 0;
+ int i, r;
+
+ for (i = 0; i < p->nchunks; ++i) {
+ struct amdgpu_cs_chunk *chunk;
+
+ chunk = &p->chunks[i];
+
+ switch (chunk->chunk_id) {
+ case AMDGPU_CHUNK_ID_IB:
+ r = amdgpu_cs_p2_ib(p, chunk, &ce_preempt, &de_preempt);
+ if (r)
+ return r;
+ break;
+ case AMDGPU_CHUNK_ID_DEPENDENCIES:
+ case AMDGPU_CHUNK_ID_SCHEDULED_DEPENDENCIES:
+ r = amdgpu_cs_p2_dependencies(p, chunk);
+ if (r)
+ return r;
+ break;
+ case AMDGPU_CHUNK_ID_SYNCOBJ_IN:
+ r = amdgpu_cs_p2_syncobj_in(p, chunk);
+ if (r)
+ return r;
+ break;
+ case AMDGPU_CHUNK_ID_SYNCOBJ_OUT:
+ r = amdgpu_cs_p2_syncobj_out(p, chunk);
+ if (r)
+ return r;
+ break;
+ case AMDGPU_CHUNK_ID_SYNCOBJ_TIMELINE_WAIT:
+ r = amdgpu_cs_p2_syncobj_timeline_wait(p, chunk);
+ if (r)
+ return r;
+ break;
+ case AMDGPU_CHUNK_ID_SYNCOBJ_TIMELINE_SIGNAL:
+ r = amdgpu_cs_p2_syncobj_timeline_signal(p, chunk);
+ if (r)
+ return r;
+ break;
+ case AMDGPU_CHUNK_ID_CP_GFX_SHADOW:
+ r = amdgpu_cs_p2_shadow(p, chunk);
+ if (r)
+ return r;
+ break;
+ }
+ }
+
+ return 0;
+}
+
/* Convert microseconds to bytes. */
static u64 us_to_bytes(struct amdgpu_device *adev, s64 us)
{
@@ -298,7 +699,6 @@ static void amdgpu_cs_get_threshold_for_moves(struct amdgpu_device *adev,
{
s64 time_us, increment_us;
u64 free_vram, total_vram, used_vram;
- struct ttm_resource_manager *vram_man = ttm_manager_type(&adev->mman.bdev, TTM_PL_VRAM);
/* Allow a maximum of 200 accumulated ms. This is basically per-IB
* throttling.
*
@@ -308,14 +708,14 @@ static void amdgpu_cs_get_threshold_for_moves(struct amdgpu_device *adev,
*/
const s64 us_upper_bound = 200000;
- if (!adev->mm_stats.log2_max_MBps) {
+ if ((!adev->mm_stats.log2_max_MBps) || !ttm_resource_manager_used(&adev->mman.vram_mgr.manager)) {
*max_bytes = 0;
*max_vis_bytes = 0;
return;
}
total_vram = adev->gmc.real_vram_size - atomic64_read(&adev->vram_pin_size);
- used_vram = amdgpu_vram_mgr_usage(vram_man);
+ used_vram = ttm_resource_manager_usage(&adev->mman.vram_mgr.manager);
free_vram = used_vram >= total_vram ? 0 : total_vram - used_vram;
spin_lock(&adev->mm_stats.lock);
@@ -342,7 +742,7 @@ static void amdgpu_cs_get_threshold_for_moves(struct amdgpu_device *adev,
if (free_vram >= 128 * 1024 * 1024 || free_vram >= total_vram / 8) {
s64 min_us;
- /* Be more aggresive on dGPUs. Try to fill a portion of free
+ /* Be more aggressive on dGPUs. Try to fill a portion of free
* VRAM now.
*/
if (!(adev->flags & AMD_IS_APU))
@@ -362,10 +762,11 @@ static void amdgpu_cs_get_threshold_for_moves(struct amdgpu_device *adev,
if (!amdgpu_gmc_vram_full_visible(&adev->gmc)) {
u64 total_vis_vram = adev->gmc.visible_vram_size;
u64 used_vis_vram =
- amdgpu_vram_mgr_vis_usage(vram_man);
+ amdgpu_vram_mgr_vis_usage(&adev->mman.vram_mgr);
if (used_vis_vram < total_vis_vram) {
u64 free_vis_vram = total_vis_vram - used_vis_vram;
+
adev->mm_stats.accum_us_vis = min(adev->mm_stats.accum_us_vis +
increment_us, us_upper_bound);
@@ -396,10 +797,10 @@ void amdgpu_cs_report_moved_bytes(struct amdgpu_device *adev, u64 num_bytes,
spin_unlock(&adev->mm_stats.lock);
}
-static int amdgpu_cs_bo_validate(struct amdgpu_cs_parser *p,
- struct amdgpu_bo *bo)
+static int amdgpu_cs_bo_validate(void *param, struct amdgpu_bo *bo)
{
struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev);
+ struct amdgpu_cs_parser *p = param;
struct ttm_operation_ctx ctx = {
.interruptible = true,
.no_wait_gpu = false,
@@ -440,7 +841,7 @@ retry:
p->bytes_moved += ctx.bytes_moved;
if (!amdgpu_gmc_vram_full_visible(&adev->gmc) &&
- amdgpu_bo_in_cpu_visible_vram(bo))
+ amdgpu_res_cpu_visible(adev, bo->tbo.resource))
p->bytes_moved_vis += ctx.bytes_moved;
if (unlikely(r == -ENOMEM) && domain != bo->allowed_domains) {
@@ -451,72 +852,18 @@ retry:
return r;
}
-static int amdgpu_cs_validate(void *param, struct amdgpu_bo *bo)
-{
- struct amdgpu_cs_parser *p = param;
- int r;
-
- r = amdgpu_cs_bo_validate(p, bo);
- if (r)
- return r;
-
- if (bo->shadow)
- r = amdgpu_cs_bo_validate(p, bo->shadow);
-
- return r;
-}
-
-static int amdgpu_cs_list_validate(struct amdgpu_cs_parser *p,
- struct list_head *validated)
-{
- struct ttm_operation_ctx ctx = { true, false };
- struct amdgpu_bo_list_entry *lobj;
- int r;
-
- list_for_each_entry(lobj, validated, tv.head) {
- struct amdgpu_bo *bo = ttm_to_amdgpu_bo(lobj->tv.bo);
- struct mm_struct *usermm;
-
- usermm = amdgpu_ttm_tt_get_usermm(bo->tbo.ttm);
- if (usermm && usermm != current->mm)
- return -EPERM;
-
- if (amdgpu_ttm_tt_is_userptr(bo->tbo.ttm) &&
- lobj->user_invalidated && lobj->user_pages) {
- amdgpu_bo_placement_from_domain(bo,
- AMDGPU_GEM_DOMAIN_CPU);
- r = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx);
- if (r)
- return r;
-
- amdgpu_ttm_tt_set_user_pages(bo->tbo.ttm,
- lobj->user_pages);
- }
-
- r = amdgpu_cs_validate(p, bo);
- if (r)
- return r;
-
- kvfree(lobj->user_pages);
- lobj->user_pages = NULL;
- }
- return 0;
-}
-
static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p,
union drm_amdgpu_cs *cs)
{
struct amdgpu_fpriv *fpriv = p->filp->driver_priv;
+ struct ttm_operation_ctx ctx = { true, false };
struct amdgpu_vm *vm = &fpriv->vm;
struct amdgpu_bo_list_entry *e;
- struct list_head duplicates;
- struct amdgpu_bo *gds;
- struct amdgpu_bo *gws;
- struct amdgpu_bo *oa;
+ struct drm_gem_object *obj;
+ unsigned long index;
+ unsigned int i;
int r;
- INIT_LIST_HEAD(&p->validated);
-
/* p->bo_list could already be assigned if AMDGPU_CHUNK_ID_BO_HANDLES is present */
if (cs->in.bo_list_handle) {
if (p->bo_list)
@@ -534,44 +881,27 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p,
return r;
}
- /* One for TTM and one for the CS job */
- amdgpu_bo_list_for_each_entry(e, p->bo_list)
- e->tv.num_shared = 2;
-
- amdgpu_bo_list_get_list(p->bo_list, &p->validated);
-
- INIT_LIST_HEAD(&duplicates);
- amdgpu_vm_get_pd_bo(&fpriv->vm, &p->validated, &p->vm_pd);
-
- if (p->uf_entry.tv.bo && !ttm_to_amdgpu_bo(p->uf_entry.tv.bo)->parent)
- list_add(&p->uf_entry.tv.head, &p->validated);
+ mutex_lock(&p->bo_list->bo_list_mutex);
/* Get userptr backing pages. If pages are updated after registered
* in amdgpu_gem_userptr_ioctl(), amdgpu_cs_list_validate() will do
* amdgpu_ttm_backend_bind() to flush and invalidate new pages
*/
amdgpu_bo_list_for_each_userptr_entry(e, p->bo_list) {
- struct amdgpu_bo *bo = ttm_to_amdgpu_bo(e->tv.bo);
bool userpage_invalidated = false;
- int i;
+ struct amdgpu_bo *bo = e->bo;
- e->user_pages = kvmalloc_array(bo->tbo.ttm->num_pages,
- sizeof(struct page *),
- GFP_KERNEL | __GFP_ZERO);
- if (!e->user_pages) {
- DRM_ERROR("calloc failure\n");
+ e->range = amdgpu_hmm_range_alloc(NULL);
+ if (unlikely(!e->range))
return -ENOMEM;
- }
- r = amdgpu_ttm_tt_get_user_pages(bo, e->user_pages);
- if (r) {
- kvfree(e->user_pages);
- e->user_pages = NULL;
- return r;
- }
+ r = amdgpu_ttm_tt_get_user_pages(bo, e->range);
+ if (r)
+ goto out_free_user_pages;
for (i = 0; i < bo->tbo.ttm->num_pages; i++) {
- if (bo->tbo.ttm->pages[i] != e->user_pages[i]) {
+ if (bo->tbo.ttm->pages[i] !=
+ hmm_pfn_to_page(e->range->hmm_range.hmm_pfns[i])) {
userpage_invalidated = true;
break;
}
@@ -579,12 +909,53 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p,
e->user_invalidated = userpage_invalidated;
}
- r = ttm_eu_reserve_buffers(&p->ticket, &p->validated, true,
- &duplicates);
- if (unlikely(r != 0)) {
- if (r != -ERESTARTSYS)
- DRM_ERROR("ttm_eu_reserve_buffers failed.\n");
- goto out;
+ drm_exec_until_all_locked(&p->exec) {
+ r = amdgpu_vm_lock_pd(&fpriv->vm, &p->exec, 1 + p->gang_size);
+ drm_exec_retry_on_contention(&p->exec);
+ if (unlikely(r))
+ goto out_free_user_pages;
+
+ amdgpu_bo_list_for_each_entry(e, p->bo_list) {
+ /* One fence for TTM and one for each CS job */
+ r = drm_exec_prepare_obj(&p->exec, &e->bo->tbo.base,
+ 1 + p->gang_size);
+ drm_exec_retry_on_contention(&p->exec);
+ if (unlikely(r))
+ goto out_free_user_pages;
+
+ e->bo_va = amdgpu_vm_bo_find(vm, e->bo);
+ }
+
+ if (p->uf_bo) {
+ r = drm_exec_prepare_obj(&p->exec, &p->uf_bo->tbo.base,
+ 1 + p->gang_size);
+ drm_exec_retry_on_contention(&p->exec);
+ if (unlikely(r))
+ goto out_free_user_pages;
+ }
+ }
+
+ amdgpu_bo_list_for_each_userptr_entry(e, p->bo_list) {
+ struct mm_struct *usermm;
+
+ usermm = amdgpu_ttm_tt_get_usermm(e->bo->tbo.ttm);
+ if (usermm && usermm != current->mm) {
+ r = -EPERM;
+ goto out_free_user_pages;
+ }
+
+ if (amdgpu_ttm_tt_is_userptr(e->bo->tbo.ttm) &&
+ e->user_invalidated) {
+ amdgpu_bo_placement_from_domain(e->bo,
+ AMDGPU_GEM_DOMAIN_CPU);
+ r = ttm_bo_validate(&e->bo->tbo, &e->bo->placement,
+ &ctx);
+ if (r)
+ goto out_free_user_pages;
+
+ amdgpu_ttm_tt_set_user_pages(e->bo->tbo.ttm,
+ e->range);
+ }
}
amdgpu_cs_get_threshold_for_moves(p->adev, &p->bytes_moved_threshold,
@@ -592,205 +963,161 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p,
p->bytes_moved = 0;
p->bytes_moved_vis = 0;
- r = amdgpu_vm_validate_pt_bos(p->adev, &fpriv->vm,
- amdgpu_cs_validate, p);
+ r = amdgpu_vm_validate(p->adev, &fpriv->vm, NULL,
+ amdgpu_cs_bo_validate, p);
if (r) {
- DRM_ERROR("amdgpu_vm_validate_pt_bos() failed.\n");
- goto error_validate;
+ drm_err(adev_to_drm(p->adev), "amdgpu_vm_validate() failed.\n");
+ goto out_free_user_pages;
}
- r = amdgpu_cs_list_validate(p, &duplicates);
- if (r)
- goto error_validate;
+ drm_exec_for_each_locked_object(&p->exec, index, obj) {
+ r = amdgpu_cs_bo_validate(p, gem_to_amdgpu_bo(obj));
+ if (unlikely(r))
+ goto out_free_user_pages;
+ }
- r = amdgpu_cs_list_validate(p, &p->validated);
- if (r)
- goto error_validate;
+ if (p->uf_bo) {
+ r = amdgpu_ttm_alloc_gart(&p->uf_bo->tbo);
+ if (unlikely(r))
+ goto out_free_user_pages;
+
+ p->gang_leader->uf_addr += amdgpu_bo_gpu_offset(p->uf_bo);
+ }
amdgpu_cs_report_moved_bytes(p->adev, p->bytes_moved,
p->bytes_moved_vis);
- gds = p->bo_list->gds_obj;
- gws = p->bo_list->gws_obj;
- oa = p->bo_list->oa_obj;
-
- amdgpu_bo_list_for_each_entry(e, p->bo_list) {
- struct amdgpu_bo *bo = ttm_to_amdgpu_bo(e->tv.bo);
+ for (i = 0; i < p->gang_size; ++i)
+ amdgpu_job_set_resources(p->jobs[i], p->bo_list->gds_obj,
+ p->bo_list->gws_obj,
+ p->bo_list->oa_obj);
+ return 0;
- /* Make sure we use the exclusive slot for shared BOs */
- if (bo->prime_shared_count)
- e->tv.num_shared = 0;
- e->bo_va = amdgpu_vm_bo_find(vm, bo);
+out_free_user_pages:
+ amdgpu_bo_list_for_each_userptr_entry(e, p->bo_list) {
+ amdgpu_hmm_range_free(e->range);
+ e->range = NULL;
}
+ mutex_unlock(&p->bo_list->bo_list_mutex);
+ return r;
+}
- if (gds) {
- p->job->gds_base = amdgpu_bo_gpu_offset(gds) >> PAGE_SHIFT;
- p->job->gds_size = amdgpu_bo_size(gds) >> PAGE_SHIFT;
- }
- if (gws) {
- p->job->gws_base = amdgpu_bo_gpu_offset(gws) >> PAGE_SHIFT;
- p->job->gws_size = amdgpu_bo_size(gws) >> PAGE_SHIFT;
- }
- if (oa) {
- p->job->oa_base = amdgpu_bo_gpu_offset(oa) >> PAGE_SHIFT;
- p->job->oa_size = amdgpu_bo_size(oa) >> PAGE_SHIFT;
- }
+static void trace_amdgpu_cs_ibs(struct amdgpu_cs_parser *p)
+{
+ int i, j;
- if (!r && p->uf_entry.tv.bo) {
- struct amdgpu_bo *uf = ttm_to_amdgpu_bo(p->uf_entry.tv.bo);
+ if (!trace_amdgpu_cs_enabled())
+ return;
- r = amdgpu_ttm_alloc_gart(&uf->tbo);
- p->job->uf_addr += amdgpu_bo_gpu_offset(uf);
- }
+ for (i = 0; i < p->gang_size; ++i) {
+ struct amdgpu_job *job = p->jobs[i];
-error_validate:
- if (r)
- ttm_eu_backoff_reservation(&p->ticket, &p->validated);
-out:
- return r;
+ for (j = 0; j < job->num_ibs; ++j)
+ trace_amdgpu_cs(p, job, &job->ibs[j]);
+ }
}
-static int amdgpu_cs_sync_rings(struct amdgpu_cs_parser *p)
+static int amdgpu_cs_patch_ibs(struct amdgpu_cs_parser *p,
+ struct amdgpu_job *job)
{
- struct amdgpu_fpriv *fpriv = p->filp->driver_priv;
- struct amdgpu_bo_list_entry *e;
+ struct amdgpu_ring *ring = amdgpu_job_ring(job);
+ unsigned int i;
int r;
- list_for_each_entry(e, &p->validated, tv.head) {
- struct amdgpu_bo *bo = ttm_to_amdgpu_bo(e->tv.bo);
- struct dma_resv *resv = bo->tbo.base.resv;
- enum amdgpu_sync_mode sync_mode;
+ /* Only for UVD/VCE VM emulation */
+ if (!ring->funcs->parse_cs && !ring->funcs->patch_cs_in_place)
+ return 0;
- sync_mode = amdgpu_bo_explicit_sync(bo) ?
- AMDGPU_SYNC_EXPLICIT : AMDGPU_SYNC_NE_OWNER;
- r = amdgpu_sync_resv(p->adev, &p->job->sync, resv, sync_mode,
- &fpriv->vm);
- if (r)
+ for (i = 0; i < job->num_ibs; ++i) {
+ struct amdgpu_ib *ib = &job->ibs[i];
+ struct amdgpu_bo_va_mapping *m;
+ struct amdgpu_bo *aobj;
+ uint64_t va_start;
+ uint8_t *kptr;
+
+ va_start = ib->gpu_addr & AMDGPU_GMC_HOLE_MASK;
+ r = amdgpu_cs_find_mapping(p, va_start, &aobj, &m);
+ if (r) {
+ drm_err(adev_to_drm(p->adev), "IB va_start is invalid\n");
return r;
- }
- return 0;
-}
+ }
-/**
- * cs_parser_fini() - clean parser states
- * @parser: parser structure holding parsing context.
- * @error: error number
- * @backoff: indicator to backoff the reservation
- *
- * If error is set than unvalidate buffer, otherwise just free memory
- * used by parsing context.
- **/
-static void amdgpu_cs_parser_fini(struct amdgpu_cs_parser *parser, int error,
- bool backoff)
-{
- unsigned i;
+ if ((va_start + ib->length_dw * 4) >
+ (m->last + 1) * AMDGPU_GPU_PAGE_SIZE) {
+ drm_err(adev_to_drm(p->adev), "IB va_start+ib_bytes is invalid\n");
+ return -EINVAL;
+ }
- if (error && backoff)
- ttm_eu_backoff_reservation(&parser->ticket,
- &parser->validated);
+ /* the IB should be reserved at this point */
+ r = amdgpu_bo_kmap(aobj, (void **)&kptr);
+ if (r)
+ return r;
- for (i = 0; i < parser->num_post_deps; i++) {
- drm_syncobj_put(parser->post_deps[i].syncobj);
- kfree(parser->post_deps[i].chain);
- }
- kfree(parser->post_deps);
+ kptr += va_start - (m->start * AMDGPU_GPU_PAGE_SIZE);
- dma_fence_put(parser->fence);
+ if (ring->funcs->parse_cs) {
+ memcpy(ib->ptr, kptr, ib->length_dw * 4);
+ amdgpu_bo_kunmap(aobj);
- if (parser->ctx) {
- mutex_unlock(&parser->ctx->lock);
- amdgpu_ctx_put(parser->ctx);
+ r = amdgpu_ring_parse_cs(ring, p, job, ib);
+ if (r)
+ return r;
+
+ if (ib->sa_bo)
+ ib->gpu_addr = amdgpu_sa_bo_gpu_addr(ib->sa_bo);
+ } else {
+ ib->ptr = (uint32_t *)kptr;
+ r = amdgpu_ring_patch_cs_in_place(ring, p, job, ib);
+ amdgpu_bo_kunmap(aobj);
+ if (r)
+ return r;
+ }
}
- if (parser->bo_list)
- amdgpu_bo_list_put(parser->bo_list);
- for (i = 0; i < parser->nchunks; i++)
- kvfree(parser->chunks[i].kdata);
- kfree(parser->chunks);
- if (parser->job)
- amdgpu_job_free(parser->job);
- if (parser->uf_entry.tv.bo) {
- struct amdgpu_bo *uf = ttm_to_amdgpu_bo(parser->uf_entry.tv.bo);
+ return 0;
+}
+
+static int amdgpu_cs_patch_jobs(struct amdgpu_cs_parser *p)
+{
+ unsigned int i;
+ int r;
- amdgpu_bo_unref(&uf);
+ for (i = 0; i < p->gang_size; ++i) {
+ r = amdgpu_cs_patch_ibs(p, p->jobs[i]);
+ if (r)
+ return r;
}
+ return 0;
}
static int amdgpu_cs_vm_handling(struct amdgpu_cs_parser *p)
{
- struct amdgpu_ring *ring = to_amdgpu_ring(p->entity->rq->sched);
struct amdgpu_fpriv *fpriv = p->filp->driver_priv;
+ struct amdgpu_job *job = p->gang_leader;
struct amdgpu_device *adev = p->adev;
struct amdgpu_vm *vm = &fpriv->vm;
struct amdgpu_bo_list_entry *e;
struct amdgpu_bo_va *bo_va;
- struct amdgpu_bo *bo;
+ unsigned int i;
int r;
- /* Only for UVD/VCE VM emulation */
- if (ring->funcs->parse_cs || ring->funcs->patch_cs_in_place) {
- unsigned i, j;
-
- for (i = 0, j = 0; i < p->nchunks && j < p->job->num_ibs; i++) {
- struct drm_amdgpu_cs_chunk_ib *chunk_ib;
- struct amdgpu_bo_va_mapping *m;
- struct amdgpu_bo *aobj = NULL;
- struct amdgpu_cs_chunk *chunk;
- uint64_t offset, va_start;
- struct amdgpu_ib *ib;
- uint8_t *kptr;
-
- chunk = &p->chunks[i];
- ib = &p->job->ibs[j];
- chunk_ib = chunk->kdata;
-
- if (chunk->chunk_id != AMDGPU_CHUNK_ID_IB)
- continue;
-
- va_start = chunk_ib->va_start & AMDGPU_GMC_HOLE_MASK;
- r = amdgpu_cs_find_mapping(p, va_start, &aobj, &m);
- if (r) {
- DRM_ERROR("IB va_start is invalid\n");
- return r;
- }
+ /*
+ * We can't use gang submit on with reserved VMIDs when the VM changes
+ * can't be invalidated by more than one engine at the same time.
+ */
+ if (p->gang_size > 1 && !adev->vm_manager.concurrent_flush) {
+ for (i = 0; i < p->gang_size; ++i) {
+ struct drm_sched_entity *entity = p->entities[i];
+ struct drm_gpu_scheduler *sched = entity->rq->sched;
+ struct amdgpu_ring *ring = to_amdgpu_ring(sched);
- if ((va_start + chunk_ib->ib_bytes) >
- (m->last + 1) * AMDGPU_GPU_PAGE_SIZE) {
- DRM_ERROR("IB va_start+ib_bytes is invalid\n");
+ if (amdgpu_vmid_uses_reserved(vm, ring->vm_hub))
return -EINVAL;
- }
-
- /* the IB should be reserved at this point */
- r = amdgpu_bo_kmap(aobj, (void **)&kptr);
- if (r) {
- return r;
- }
-
- offset = m->start * AMDGPU_GPU_PAGE_SIZE;
- kptr += va_start - offset;
-
- if (ring->funcs->parse_cs) {
- memcpy(ib->ptr, kptr, chunk_ib->ib_bytes);
- amdgpu_bo_kunmap(aobj);
-
- r = amdgpu_ring_parse_cs(ring, p, j);
- if (r)
- return r;
- } else {
- ib->ptr = (uint32_t *)kptr;
- r = amdgpu_ring_patch_cs_in_place(ring, p, j);
- amdgpu_bo_kunmap(aobj);
- if (r)
- return r;
- }
-
- j++;
}
}
- if (!p->job->vm)
- return amdgpu_cs_sync_rings(p);
-
+ if (!amdgpu_vm_ready(vm))
+ return -EINVAL;
r = amdgpu_vm_clear_freed(adev, vm, NULL);
if (r)
@@ -800,28 +1127,30 @@ static int amdgpu_cs_vm_handling(struct amdgpu_cs_parser *p)
if (r)
return r;
- r = amdgpu_sync_vm_fence(&p->job->sync, fpriv->prt_va->last_pt_update);
+ r = amdgpu_sync_fence(&p->sync, fpriv->prt_va->last_pt_update,
+ GFP_KERNEL);
if (r)
return r;
- if (amdgpu_mcbp || amdgpu_sriov_vf(adev)) {
+ if (fpriv->csa_va) {
bo_va = fpriv->csa_va;
BUG_ON(!bo_va);
r = amdgpu_vm_bo_update(adev, bo_va, false);
if (r)
return r;
- r = amdgpu_sync_vm_fence(&p->job->sync, bo_va->last_pt_update);
+ r = amdgpu_sync_fence(&p->sync, bo_va->last_pt_update,
+ GFP_KERNEL);
if (r)
return r;
}
+ /* FIXME: In theory this loop shouldn't be needed any more when
+ * amdgpu_vm_handle_moved handles all moved BOs that are reserved
+ * with p->ticket. But removing it caused test regressions, so I'm
+ * leaving it here for now.
+ */
amdgpu_bo_list_for_each_entry(e, p->bo_list) {
- /* ignore duplicates */
- bo = ttm_to_amdgpu_bo(e->tv.bo);
- if (!bo)
- continue;
-
bo_va = e->bo_va;
if (bo_va == NULL)
continue;
@@ -830,12 +1159,13 @@ static int amdgpu_cs_vm_handling(struct amdgpu_cs_parser *p)
if (r)
return r;
- r = amdgpu_sync_vm_fence(&p->job->sync, bo_va->last_pt_update);
+ r = amdgpu_sync_fence(&p->sync, bo_va->last_pt_update,
+ GFP_KERNEL);
if (r)
return r;
}
- r = amdgpu_vm_handle_moved(adev, vm);
+ r = amdgpu_vm_handle_moved(adev, vm, &p->exec.ticket);
if (r)
return r;
@@ -843,345 +1173,93 @@ static int amdgpu_cs_vm_handling(struct amdgpu_cs_parser *p)
if (r)
return r;
- r = amdgpu_sync_vm_fence(&p->job->sync, vm->last_update);
+ r = amdgpu_sync_fence(&p->sync, vm->last_update, GFP_KERNEL);
if (r)
return r;
- p->job->vm_pd_addr = amdgpu_gmc_pd_addr(vm->root.base.bo);
+ for (i = 0; i < p->gang_size; ++i) {
+ job = p->jobs[i];
- if (amdgpu_vm_debug) {
+ if (!job->vm)
+ continue;
+
+ job->vm_pd_addr = amdgpu_gmc_pd_addr(vm->root.bo);
+ }
+
+ if (adev->debug_vm) {
/* Invalidate all BOs to test for userspace bugs */
amdgpu_bo_list_for_each_entry(e, p->bo_list) {
- struct amdgpu_bo *bo = ttm_to_amdgpu_bo(e->tv.bo);
+ struct amdgpu_bo *bo = e->bo;
/* ignore duplicates */
if (!bo)
continue;
- amdgpu_vm_bo_invalidate(adev, bo, false);
- }
- }
-
- return amdgpu_cs_sync_rings(p);
-}
-
-static int amdgpu_cs_ib_fill(struct amdgpu_device *adev,
- struct amdgpu_cs_parser *parser)
-{
- struct amdgpu_fpriv *fpriv = parser->filp->driver_priv;
- struct amdgpu_vm *vm = &fpriv->vm;
- int r, ce_preempt = 0, de_preempt = 0;
- struct amdgpu_ring *ring;
- int i, j;
-
- for (i = 0, j = 0; i < parser->nchunks && j < parser->job->num_ibs; i++) {
- struct amdgpu_cs_chunk *chunk;
- struct amdgpu_ib *ib;
- struct drm_amdgpu_cs_chunk_ib *chunk_ib;
- struct drm_sched_entity *entity;
-
- chunk = &parser->chunks[i];
- ib = &parser->job->ibs[j];
- chunk_ib = (struct drm_amdgpu_cs_chunk_ib *)chunk->kdata;
-
- if (chunk->chunk_id != AMDGPU_CHUNK_ID_IB)
- continue;
-
- if (chunk_ib->ip_type == AMDGPU_HW_IP_GFX &&
- (amdgpu_mcbp || amdgpu_sriov_vf(adev))) {
- if (chunk_ib->flags & AMDGPU_IB_FLAG_PREEMPT) {
- if (chunk_ib->flags & AMDGPU_IB_FLAG_CE)
- ce_preempt++;
- else
- de_preempt++;
- }
-
- /* each GFX command submit allows 0 or 1 IB preemptible for CE & DE */
- if (ce_preempt > 1 || de_preempt > 1)
- return -EINVAL;
- }
-
- r = amdgpu_ctx_get_entity(parser->ctx, chunk_ib->ip_type,
- chunk_ib->ip_instance, chunk_ib->ring,
- &entity);
- if (r)
- return r;
-
- if (chunk_ib->flags & AMDGPU_IB_FLAG_PREAMBLE)
- parser->job->preamble_status |=
- AMDGPU_PREAMBLE_IB_PRESENT;
-
- if (parser->entity && parser->entity != entity)
- return -EINVAL;
-
- /* Return if there is no run queue associated with this entity.
- * Possibly because of disabled HW IP*/
- if (entity->rq == NULL)
- return -EINVAL;
-
- parser->entity = entity;
-
- ring = to_amdgpu_ring(entity->rq->sched);
- r = amdgpu_ib_get(adev, vm, ring->funcs->parse_cs ?
- chunk_ib->ib_bytes : 0,
- AMDGPU_IB_POOL_DELAYED, ib);
- if (r) {
- DRM_ERROR("Failed to get ib !\n");
- return r;
+ amdgpu_vm_bo_invalidate(bo, false);
}
-
- ib->gpu_addr = chunk_ib->va_start;
- ib->length_dw = chunk_ib->ib_bytes / 4;
- ib->flags = chunk_ib->flags;
-
- j++;
}
- /* MM engine doesn't support user fences */
- ring = to_amdgpu_ring(parser->entity->rq->sched);
- if (parser->job->uf_addr && ring->funcs->no_user_fence)
- return -EINVAL;
-
- return amdgpu_ctx_wait_prev_fence(parser->ctx, parser->entity);
-}
-
-static int amdgpu_cs_process_fence_dep(struct amdgpu_cs_parser *p,
- struct amdgpu_cs_chunk *chunk)
-{
- struct amdgpu_fpriv *fpriv = p->filp->driver_priv;
- unsigned num_deps;
- int i, r;
- struct drm_amdgpu_cs_chunk_dep *deps;
-
- deps = (struct drm_amdgpu_cs_chunk_dep *)chunk->kdata;
- num_deps = chunk->length_dw * 4 /
- sizeof(struct drm_amdgpu_cs_chunk_dep);
-
- for (i = 0; i < num_deps; ++i) {
- struct amdgpu_ctx *ctx;
- struct drm_sched_entity *entity;
- struct dma_fence *fence;
-
- ctx = amdgpu_ctx_get(fpriv, deps[i].ctx_id);
- if (ctx == NULL)
- return -EINVAL;
-
- r = amdgpu_ctx_get_entity(ctx, deps[i].ip_type,
- deps[i].ip_instance,
- deps[i].ring, &entity);
- if (r) {
- amdgpu_ctx_put(ctx);
- return r;
- }
-
- fence = amdgpu_ctx_get_fence(ctx, entity, deps[i].handle);
- amdgpu_ctx_put(ctx);
-
- if (IS_ERR(fence))
- return PTR_ERR(fence);
- else if (!fence)
- continue;
-
- if (chunk->chunk_id == AMDGPU_CHUNK_ID_SCHEDULED_DEPENDENCIES) {
- struct drm_sched_fence *s_fence;
- struct dma_fence *old = fence;
-
- s_fence = to_drm_sched_fence(fence);
- fence = dma_fence_get(&s_fence->scheduled);
- dma_fence_put(old);
- }
-
- r = amdgpu_sync_fence(&p->job->sync, fence);
- dma_fence_put(fence);
- if (r)
- return r;
- }
return 0;
}
-static int amdgpu_syncobj_lookup_and_add_to_sync(struct amdgpu_cs_parser *p,
- uint32_t handle, u64 point,
- u64 flags)
+static int amdgpu_cs_sync_rings(struct amdgpu_cs_parser *p)
{
+ struct amdgpu_fpriv *fpriv = p->filp->driver_priv;
+ struct drm_gpu_scheduler *sched;
+ struct drm_gem_object *obj;
struct dma_fence *fence;
+ unsigned long index;
+ unsigned int i;
int r;
- r = drm_syncobj_find_fence(p->filp, handle, point, flags, &fence);
+ r = amdgpu_ctx_wait_prev_fence(p->ctx, p->entities[p->gang_leader_idx]);
if (r) {
- DRM_ERROR("syncobj %u failed to find fence @ %llu (%d)!\n",
- handle, point, r);
+ if (r != -ERESTARTSYS)
+ drm_err(adev_to_drm(p->adev), "amdgpu_ctx_wait_prev_fence failed.\n");
return r;
}
- r = amdgpu_sync_fence(&p->job->sync, fence);
- dma_fence_put(fence);
-
- return r;
-}
+ drm_exec_for_each_locked_object(&p->exec, index, obj) {
+ struct amdgpu_bo *bo = gem_to_amdgpu_bo(obj);
-static int amdgpu_cs_process_syncobj_in_dep(struct amdgpu_cs_parser *p,
- struct amdgpu_cs_chunk *chunk)
-{
- struct drm_amdgpu_cs_chunk_sem *deps;
- unsigned num_deps;
- int i, r;
+ struct dma_resv *resv = bo->tbo.base.resv;
+ enum amdgpu_sync_mode sync_mode;
- deps = (struct drm_amdgpu_cs_chunk_sem *)chunk->kdata;
- num_deps = chunk->length_dw * 4 /
- sizeof(struct drm_amdgpu_cs_chunk_sem);
- for (i = 0; i < num_deps; ++i) {
- r = amdgpu_syncobj_lookup_and_add_to_sync(p, deps[i].handle,
- 0, 0);
+ sync_mode = amdgpu_bo_explicit_sync(bo) ?
+ AMDGPU_SYNC_EXPLICIT : AMDGPU_SYNC_NE_OWNER;
+ r = amdgpu_sync_resv(p->adev, &p->sync, resv, sync_mode,
+ &fpriv->vm);
if (r)
return r;
}
- return 0;
-}
-
-
-static int amdgpu_cs_process_syncobj_timeline_in_dep(struct amdgpu_cs_parser *p,
- struct amdgpu_cs_chunk *chunk)
-{
- struct drm_amdgpu_cs_chunk_syncobj *syncobj_deps;
- unsigned num_deps;
- int i, r;
-
- syncobj_deps = (struct drm_amdgpu_cs_chunk_syncobj *)chunk->kdata;
- num_deps = chunk->length_dw * 4 /
- sizeof(struct drm_amdgpu_cs_chunk_syncobj);
- for (i = 0; i < num_deps; ++i) {
- r = amdgpu_syncobj_lookup_and_add_to_sync(p,
- syncobj_deps[i].handle,
- syncobj_deps[i].point,
- syncobj_deps[i].flags);
+ for (i = 0; i < p->gang_size; ++i) {
+ r = amdgpu_sync_push_to_job(&p->sync, p->jobs[i]);
if (r)
return r;
}
- return 0;
-}
-
-static int amdgpu_cs_process_syncobj_out_dep(struct amdgpu_cs_parser *p,
- struct amdgpu_cs_chunk *chunk)
-{
- struct drm_amdgpu_cs_chunk_sem *deps;
- unsigned num_deps;
- int i;
-
- deps = (struct drm_amdgpu_cs_chunk_sem *)chunk->kdata;
- num_deps = chunk->length_dw * 4 /
- sizeof(struct drm_amdgpu_cs_chunk_sem);
-
- if (p->post_deps)
- return -EINVAL;
-
- p->post_deps = kmalloc_array(num_deps, sizeof(*p->post_deps),
- GFP_KERNEL);
- p->num_post_deps = 0;
-
- if (!p->post_deps)
- return -ENOMEM;
-
-
- for (i = 0; i < num_deps; ++i) {
- p->post_deps[i].syncobj =
- drm_syncobj_find(p->filp, deps[i].handle);
- if (!p->post_deps[i].syncobj)
- return -EINVAL;
- p->post_deps[i].chain = NULL;
- p->post_deps[i].point = 0;
- p->num_post_deps++;
- }
-
- return 0;
-}
-
-
-static int amdgpu_cs_process_syncobj_timeline_out_dep(struct amdgpu_cs_parser *p,
- struct amdgpu_cs_chunk *chunk)
-{
- struct drm_amdgpu_cs_chunk_syncobj *syncobj_deps;
- unsigned num_deps;
- int i;
+ sched = p->gang_leader->base.entity->rq->sched;
+ while ((fence = amdgpu_sync_get_fence(&p->sync))) {
+ struct drm_sched_fence *s_fence = to_drm_sched_fence(fence);
- syncobj_deps = (struct drm_amdgpu_cs_chunk_syncobj *)chunk->kdata;
- num_deps = chunk->length_dw * 4 /
- sizeof(struct drm_amdgpu_cs_chunk_syncobj);
-
- if (p->post_deps)
- return -EINVAL;
-
- p->post_deps = kmalloc_array(num_deps, sizeof(*p->post_deps),
- GFP_KERNEL);
- p->num_post_deps = 0;
-
- if (!p->post_deps)
- return -ENOMEM;
-
- for (i = 0; i < num_deps; ++i) {
- struct amdgpu_cs_post_dep *dep = &p->post_deps[i];
-
- dep->chain = NULL;
- if (syncobj_deps[i].point) {
- dep->chain = kmalloc(sizeof(*dep->chain), GFP_KERNEL);
- if (!dep->chain)
- return -ENOMEM;
- }
-
- dep->syncobj = drm_syncobj_find(p->filp,
- syncobj_deps[i].handle);
- if (!dep->syncobj) {
- kfree(dep->chain);
- return -EINVAL;
+ /*
+ * When we have an dependency it might be necessary to insert a
+ * pipeline sync to make sure that all caches etc are flushed and the
+ * next job actually sees the results from the previous one
+ * before we start executing on the same scheduler ring.
+ */
+ if (!s_fence || s_fence->sched != sched) {
+ dma_fence_put(fence);
+ continue;
}
- dep->point = syncobj_deps[i].point;
- p->num_post_deps++;
- }
-
- return 0;
-}
-
-static int amdgpu_cs_dependencies(struct amdgpu_device *adev,
- struct amdgpu_cs_parser *p)
-{
- int i, r;
-
- for (i = 0; i < p->nchunks; ++i) {
- struct amdgpu_cs_chunk *chunk;
-
- chunk = &p->chunks[i];
- switch (chunk->chunk_id) {
- case AMDGPU_CHUNK_ID_DEPENDENCIES:
- case AMDGPU_CHUNK_ID_SCHEDULED_DEPENDENCIES:
- r = amdgpu_cs_process_fence_dep(p, chunk);
- if (r)
- return r;
- break;
- case AMDGPU_CHUNK_ID_SYNCOBJ_IN:
- r = amdgpu_cs_process_syncobj_in_dep(p, chunk);
- if (r)
- return r;
- break;
- case AMDGPU_CHUNK_ID_SYNCOBJ_OUT:
- r = amdgpu_cs_process_syncobj_out_dep(p, chunk);
- if (r)
- return r;
- break;
- case AMDGPU_CHUNK_ID_SYNCOBJ_TIMELINE_WAIT:
- r = amdgpu_cs_process_syncobj_timeline_in_dep(p, chunk);
- if (r)
- return r;
- break;
- case AMDGPU_CHUNK_ID_SYNCOBJ_TIMELINE_SIGNAL:
- r = amdgpu_cs_process_syncobj_timeline_out_dep(p, chunk);
- if (r)
- return r;
- break;
- }
+ r = amdgpu_sync_fence(&p->gang_leader->explicit_sync, fence,
+ GFP_KERNEL);
+ dma_fence_put(fence);
+ if (r)
+ return r;
}
-
return 0;
}
@@ -1206,18 +1284,36 @@ static int amdgpu_cs_submit(struct amdgpu_cs_parser *p,
union drm_amdgpu_cs *cs)
{
struct amdgpu_fpriv *fpriv = p->filp->driver_priv;
- struct drm_sched_entity *entity = p->entity;
+ struct amdgpu_job *leader = p->gang_leader;
struct amdgpu_bo_list_entry *e;
- struct amdgpu_job *job;
+ struct drm_gem_object *gobj;
+ unsigned long index;
+ unsigned int i;
uint64_t seq;
int r;
- job = p->job;
- p->job = NULL;
+ for (i = 0; i < p->gang_size; ++i)
+ drm_sched_job_arm(&p->jobs[i]->base);
- r = drm_sched_job_init(&job->base, entity, &fpriv->vm);
- if (r)
- goto error_unlock;
+ for (i = 0; i < p->gang_size; ++i) {
+ struct dma_fence *fence;
+
+ if (p->jobs[i] == leader)
+ continue;
+
+ fence = &p->jobs[i]->base.s_fence->scheduled;
+ dma_fence_get(fence);
+ r = drm_sched_job_add_dependency(&leader->base, fence);
+ if (r) {
+ dma_fence_put(fence);
+ return r;
+ }
+ }
+
+ if (p->gang_size > 1) {
+ for (i = 0; i < p->gang_size; ++i)
+ amdgpu_job_set_gang_leader(p->jobs[i], leader);
+ }
/* No memory allocation is allowed while holding the notifier lock.
* The lock is held until amdgpu_cs_submit is finished and fence is
@@ -1228,69 +1324,100 @@ static int amdgpu_cs_submit(struct amdgpu_cs_parser *p,
/* If userptr are invalidated after amdgpu_cs_parser_bos(), return
* -EAGAIN, drmIoctl in libdrm will restart the amdgpu_cs_ioctl.
*/
+ r = 0;
amdgpu_bo_list_for_each_userptr_entry(e, p->bo_list) {
- struct amdgpu_bo *bo = ttm_to_amdgpu_bo(e->tv.bo);
-
- r |= !amdgpu_ttm_tt_get_user_pages_done(bo->tbo.ttm);
+ r |= !amdgpu_hmm_range_valid(e->range);
+ amdgpu_hmm_range_free(e->range);
+ e->range = NULL;
}
if (r) {
r = -EAGAIN;
- goto error_abort;
+ mutex_unlock(&p->adev->notifier_lock);
+ return r;
}
- p->fence = dma_fence_get(&job->base.s_fence->finished);
+ p->fence = dma_fence_get(&leader->base.s_fence->finished);
+ drm_exec_for_each_locked_object(&p->exec, index, gobj) {
+
+ ttm_bo_move_to_lru_tail_unlocked(&gem_to_amdgpu_bo(gobj)->tbo);
+
+ /* Everybody except for the gang leader uses READ */
+ for (i = 0; i < p->gang_size; ++i) {
+ if (p->jobs[i] == leader)
+ continue;
+
+ dma_resv_add_fence(gobj->resv,
+ &p->jobs[i]->base.s_fence->finished,
+ DMA_RESV_USAGE_READ);
+ }
+
+ /* The gang leader as remembered as writer */
+ dma_resv_add_fence(gobj->resv, p->fence, DMA_RESV_USAGE_WRITE);
+ }
- amdgpu_ctx_add_fence(p->ctx, entity, p->fence, &seq);
+ seq = amdgpu_ctx_add_fence(p->ctx, p->entities[p->gang_leader_idx],
+ p->fence);
amdgpu_cs_post_dependencies(p);
- if ((job->preamble_status & AMDGPU_PREAMBLE_IB_PRESENT) &&
+ if ((leader->preamble_status & AMDGPU_PREAMBLE_IB_PRESENT) &&
!p->ctx->preamble_presented) {
- job->preamble_status |= AMDGPU_PREAMBLE_IB_PRESENT_FIRST;
+ leader->preamble_status |= AMDGPU_PREAMBLE_IB_PRESENT_FIRST;
p->ctx->preamble_presented = true;
}
cs->out.handle = seq;
- job->uf_sequence = seq;
-
- amdgpu_job_free_resources(job);
-
- trace_amdgpu_cs_ioctl(job);
- amdgpu_vm_bo_trace_cs(&fpriv->vm, &p->ticket);
- drm_sched_entity_push_job(&job->base, entity);
+ leader->uf_sequence = seq;
+
+ amdgpu_vm_bo_trace_cs(&fpriv->vm, &p->exec.ticket);
+ for (i = 0; i < p->gang_size; ++i) {
+ amdgpu_job_free_resources(p->jobs[i]);
+ trace_amdgpu_cs_ioctl(p->jobs[i]);
+ drm_sched_entity_push_job(&p->jobs[i]->base);
+ p->jobs[i] = NULL;
+ }
amdgpu_vm_move_to_lru_tail(p->adev, &fpriv->vm);
- ttm_eu_fence_buffer_objects(&p->ticket, &p->validated, p->fence);
mutex_unlock(&p->adev->notifier_lock);
-
+ mutex_unlock(&p->bo_list->bo_list_mutex);
return 0;
-
-error_abort:
- drm_sched_job_cleanup(&job->base);
- mutex_unlock(&p->adev->notifier_lock);
-
-error_unlock:
- amdgpu_job_free(job);
- return r;
}
-static void trace_amdgpu_cs_ibs(struct amdgpu_cs_parser *parser)
+/* Cleanup the parser structure */
+static void amdgpu_cs_parser_fini(struct amdgpu_cs_parser *parser)
{
- int i;
+ unsigned int i;
- if (!trace_amdgpu_cs_enabled())
- return;
+ amdgpu_sync_free(&parser->sync);
+ drm_exec_fini(&parser->exec);
- for (i = 0; i < parser->job->num_ibs; i++)
- trace_amdgpu_cs(parser, i);
+ for (i = 0; i < parser->num_post_deps; i++) {
+ drm_syncobj_put(parser->post_deps[i].syncobj);
+ kfree(parser->post_deps[i].chain);
+ }
+ kfree(parser->post_deps);
+
+ dma_fence_put(parser->fence);
+
+ if (parser->ctx)
+ amdgpu_ctx_put(parser->ctx);
+ if (parser->bo_list)
+ amdgpu_bo_list_put(parser->bo_list);
+
+ for (i = 0; i < parser->nchunks; i++)
+ kvfree(parser->chunks[i].kdata);
+ kvfree(parser->chunks);
+ for (i = 0; i < parser->gang_size; ++i) {
+ if (parser->jobs[i])
+ amdgpu_job_free(parser->jobs[i]);
+ }
+ amdgpu_bo_unref(&parser->uf_bo);
}
int amdgpu_cs_ioctl(struct drm_device *dev, void *data, struct drm_file *filp)
{
struct amdgpu_device *adev = drm_to_adev(dev);
- union drm_amdgpu_cs *cs = data;
- struct amdgpu_cs_parser parser = {};
- bool reserved_buffers = false;
+ struct amdgpu_cs_parser parser;
int r;
if (amdgpu_ras_intr_triggered())
@@ -1299,48 +1426,55 @@ int amdgpu_cs_ioctl(struct drm_device *dev, void *data, struct drm_file *filp)
if (!adev->accel_working)
return -EBUSY;
- parser.adev = adev;
- parser.filp = filp;
-
- r = amdgpu_cs_parser_init(&parser, data);
+ r = amdgpu_cs_parser_init(&parser, adev, filp, data);
if (r) {
- if (printk_ratelimit())
- DRM_ERROR("Failed to initialize parser %d!\n", r);
- goto out;
+ drm_err_ratelimited(dev, "Failed to initialize parser %d!\n", r);
+ return r;
}
- r = amdgpu_cs_ib_fill(adev, &parser);
+ r = amdgpu_cs_pass1(&parser, data);
if (r)
- goto out;
+ goto error_fini;
- r = amdgpu_cs_dependencies(adev, &parser);
- if (r) {
- DRM_ERROR("Failed in the dependencies handling %d!\n", r);
- goto out;
- }
+ r = amdgpu_cs_pass2(&parser);
+ if (r)
+ goto error_fini;
r = amdgpu_cs_parser_bos(&parser, data);
if (r) {
if (r == -ENOMEM)
- DRM_ERROR("Not enough memory for command submission!\n");
+ drm_err(dev, "Not enough memory for command submission!\n");
else if (r != -ERESTARTSYS && r != -EAGAIN)
- DRM_ERROR("Failed to process the buffer list %d!\n", r);
- goto out;
+ drm_dbg(dev, "Failed to process the buffer list %d!\n", r);
+ goto error_fini;
}
- reserved_buffers = true;
+ r = amdgpu_cs_patch_jobs(&parser);
+ if (r)
+ goto error_backoff;
+
+ r = amdgpu_cs_vm_handling(&parser);
+ if (r)
+ goto error_backoff;
+
+ r = amdgpu_cs_sync_rings(&parser);
+ if (r)
+ goto error_backoff;
trace_amdgpu_cs_ibs(&parser);
- r = amdgpu_cs_vm_handling(&parser);
+ r = amdgpu_cs_submit(&parser, data);
if (r)
- goto out;
+ goto error_backoff;
- r = amdgpu_cs_submit(&parser, cs);
+ amdgpu_cs_parser_fini(&parser);
+ return 0;
-out:
- amdgpu_cs_parser_fini(&parser, r, reserved_buffers);
+error_backoff:
+ mutex_unlock(&parser.bo_list->bo_list_mutex);
+error_fini:
+ amdgpu_cs_parser_fini(&parser);
return r;
}
@@ -1483,12 +1617,13 @@ int amdgpu_cs_fence_to_handle_ioctl(struct drm_device *dev, void *data,
return 0;
default:
+ dma_fence_put(fence);
return -EINVAL;
}
}
/**
- * amdgpu_cs_wait_all_fence - wait on all fences to signal
+ * amdgpu_cs_wait_all_fences - wait on all fences to signal
*
* @adev: amdgpu device
* @filp: file private
@@ -1515,15 +1650,15 @@ static int amdgpu_cs_wait_all_fences(struct amdgpu_device *adev,
continue;
r = dma_fence_wait_timeout(fence, true, timeout);
+ if (r > 0 && fence->error)
+ r = fence->error;
+
dma_fence_put(fence);
if (r < 0)
return r;
if (r == 0)
break;
-
- if (fence->error)
- return fence->error;
}
memset(wait, 0, sizeof(*wait));
@@ -1609,37 +1744,28 @@ int amdgpu_cs_wait_fences_ioctl(struct drm_device *dev, void *data,
{
struct amdgpu_device *adev = drm_to_adev(dev);
union drm_amdgpu_wait_fences *wait = data;
- uint32_t fence_count = wait->in.fence_count;
- struct drm_amdgpu_fence *fences_user;
struct drm_amdgpu_fence *fences;
int r;
/* Get the fences from userspace */
- fences = kmalloc_array(fence_count, sizeof(struct drm_amdgpu_fence),
- GFP_KERNEL);
- if (fences == NULL)
- return -ENOMEM;
-
- fences_user = u64_to_user_ptr(wait->in.fences);
- if (copy_from_user(fences, fences_user,
- sizeof(struct drm_amdgpu_fence) * fence_count)) {
- r = -EFAULT;
- goto err_free_fences;
- }
+ fences = memdup_array_user(u64_to_user_ptr(wait->in.fences),
+ wait->in.fence_count,
+ sizeof(struct drm_amdgpu_fence));
+ if (IS_ERR(fences))
+ return PTR_ERR(fences);
if (wait->in.wait_all)
r = amdgpu_cs_wait_all_fences(adev, filp, wait, fences);
else
r = amdgpu_cs_wait_any_fence(adev, filp, wait, fences);
-err_free_fences:
kfree(fences);
return r;
}
/**
- * amdgpu_cs_find_bo_va - find bo_va for VM address
+ * amdgpu_cs_find_mapping - find bo_va for VM address
*
* @parser: command submission parser context
* @addr: VM address
@@ -1658,7 +1784,7 @@ int amdgpu_cs_find_mapping(struct amdgpu_cs_parser *parser,
struct ttm_operation_ctx ctx = { false, false };
struct amdgpu_vm *vm = &fpriv->vm;
struct amdgpu_bo_va_mapping *mapping;
- int r;
+ int i, r;
addr /= AMDGPU_GPU_PAGE_SIZE;
@@ -1670,12 +1796,17 @@ int amdgpu_cs_find_mapping(struct amdgpu_cs_parser *parser,
*map = mapping;
/* Double check that the BO is reserved by this CS */
- if (dma_resv_locking_ctx((*bo)->tbo.base.resv) != &parser->ticket)
+ if (dma_resv_locking_ctx((*bo)->tbo.base.resv) != &parser->exec.ticket)
return -EINVAL;
- if (!((*bo)->flags & AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS)) {
- (*bo)->flags |= AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS;
+ /* Make sure VRAM is allocated contigiously */
+ (*bo)->flags |= AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS;
+ if ((*bo)->tbo.resource->mem_type == TTM_PL_VRAM &&
+ !((*bo)->tbo.resource->placement & TTM_PL_FLAG_CONTIGUOUS)) {
+
amdgpu_bo_placement_from_domain(*bo, (*bo)->allowed_domains);
+ for (i = 0; i < (*bo)->placement.num_placement; i++)
+ (*bo)->placements[i].flags |= TTM_PL_FLAG_CONTIGUOUS;
r = ttm_bo_validate(&(*bo)->tbo, &(*bo)->placement, &ctx);
if (r)
return r;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.h
new file mode 100644
index 000000000000..39c33ad100cb
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.h
@@ -0,0 +1,88 @@
+/*
+ * Copyright 2022 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+#ifndef __AMDGPU_CS_H__
+#define __AMDGPU_CS_H__
+
+#include <linux/ww_mutex.h>
+#include <drm/drm_exec.h>
+
+#include "amdgpu_job.h"
+#include "amdgpu_bo_list.h"
+#include "amdgpu_ring.h"
+
+#define AMDGPU_CS_GANG_SIZE 4
+
+struct amdgpu_bo_va_mapping;
+
+struct amdgpu_cs_chunk {
+ uint32_t chunk_id;
+ uint32_t length_dw;
+ void *kdata;
+};
+
+struct amdgpu_cs_post_dep {
+ struct drm_syncobj *syncobj;
+ struct dma_fence_chain *chain;
+ u64 point;
+};
+
+struct amdgpu_cs_parser {
+ struct amdgpu_device *adev;
+ struct drm_file *filp;
+ struct amdgpu_ctx *ctx;
+
+ /* chunks */
+ unsigned nchunks;
+ struct amdgpu_cs_chunk *chunks;
+
+ /* scheduler job objects */
+ unsigned int gang_size;
+ unsigned int gang_leader_idx;
+ struct drm_sched_entity *entities[AMDGPU_CS_GANG_SIZE];
+ struct amdgpu_job *jobs[AMDGPU_CS_GANG_SIZE];
+ struct amdgpu_job *gang_leader;
+
+ /* buffer objects */
+ struct drm_exec exec;
+ struct amdgpu_bo_list *bo_list;
+ struct amdgpu_mn *mn;
+ struct dma_fence *fence;
+ uint64_t bytes_moved_threshold;
+ uint64_t bytes_moved_vis_threshold;
+ uint64_t bytes_moved;
+ uint64_t bytes_moved_vis;
+
+ /* user fence */
+ struct amdgpu_bo *uf_bo;
+
+ unsigned num_post_deps;
+ struct amdgpu_cs_post_dep *post_deps;
+
+ struct amdgpu_sync sync;
+};
+
+int amdgpu_cs_find_mapping(struct amdgpu_cs_parser *parser,
+ uint64_t addr, struct amdgpu_bo **bo,
+ struct amdgpu_bo_va_mapping **mapping);
+
+#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_csa.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_csa.c
index da21e60bb827..02138aa55793 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_csa.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_csa.c
@@ -22,13 +22,14 @@
* * Author: Monk.liu@amd.com
*/
+#include <drm/drm_exec.h>
+
#include "amdgpu.h"
uint64_t amdgpu_csa_vaddr(struct amdgpu_device *adev)
{
- uint64_t addr = adev->vm_manager.max_pfn << AMDGPU_GPU_PAGE_SHIFT;
+ uint64_t addr = AMDGPU_VA_RESERVED_CSA_START(adev);
- addr -= AMDGPU_VA_RESERVED_SIZE;
addr = amdgpu_gmc_sign_extend(addr);
return addr;
@@ -65,31 +66,25 @@ int amdgpu_map_static_csa(struct amdgpu_device *adev, struct amdgpu_vm *vm,
struct amdgpu_bo *bo, struct amdgpu_bo_va **bo_va,
uint64_t csa_addr, uint32_t size)
{
- struct ww_acquire_ctx ticket;
- struct list_head list;
- struct amdgpu_bo_list_entry pd;
- struct ttm_validate_buffer csa_tv;
+ struct drm_exec exec;
int r;
- INIT_LIST_HEAD(&list);
- INIT_LIST_HEAD(&csa_tv.head);
- csa_tv.bo = &bo->tbo;
- csa_tv.num_shared = 1;
-
- list_add(&csa_tv.head, &list);
- amdgpu_vm_get_pd_bo(vm, &list, &pd);
-
- r = ttm_eu_reserve_buffers(&ticket, &list, true, NULL);
- if (r) {
- DRM_ERROR("failed to reserve CSA,PD BOs: err=%d\n", r);
- return r;
+ drm_exec_init(&exec, DRM_EXEC_INTERRUPTIBLE_WAIT, 0);
+ drm_exec_until_all_locked(&exec) {
+ r = amdgpu_vm_lock_pd(vm, &exec, 0);
+ if (likely(!r))
+ r = drm_exec_lock_obj(&exec, &bo->tbo.base);
+ drm_exec_retry_on_contention(&exec);
+ if (unlikely(r)) {
+ DRM_ERROR("failed to reserve CSA,PD BOs: err=%d\n", r);
+ goto error;
+ }
}
*bo_va = amdgpu_vm_bo_add(adev, vm, bo);
if (!*bo_va) {
- ttm_eu_backoff_reservation(&ticket, &list);
- DRM_ERROR("failed to create bo_va for static CSA\n");
- return -ENOMEM;
+ r = -ENOMEM;
+ goto error;
}
r = amdgpu_vm_bo_map(adev, *bo_va, csa_addr, 0, size,
@@ -98,11 +93,43 @@ int amdgpu_map_static_csa(struct amdgpu_device *adev, struct amdgpu_vm *vm,
if (r) {
DRM_ERROR("failed to do bo_map on static CSA, err=%d\n", r);
- amdgpu_vm_bo_rmv(adev, *bo_va);
- ttm_eu_backoff_reservation(&ticket, &list);
- return r;
+ amdgpu_vm_bo_del(adev, *bo_va);
+ goto error;
}
- ttm_eu_backoff_reservation(&ticket, &list);
- return 0;
+error:
+ drm_exec_fini(&exec);
+ return r;
+}
+
+int amdgpu_unmap_static_csa(struct amdgpu_device *adev, struct amdgpu_vm *vm,
+ struct amdgpu_bo *bo, struct amdgpu_bo_va *bo_va,
+ uint64_t csa_addr)
+{
+ struct drm_exec exec;
+ int r;
+
+ drm_exec_init(&exec, 0, 0);
+ drm_exec_until_all_locked(&exec) {
+ r = amdgpu_vm_lock_pd(vm, &exec, 0);
+ if (likely(!r))
+ r = drm_exec_lock_obj(&exec, &bo->tbo.base);
+ drm_exec_retry_on_contention(&exec);
+ if (unlikely(r)) {
+ DRM_ERROR("failed to reserve CSA,PD BOs: err=%d\n", r);
+ goto error;
+ }
+ }
+
+ r = amdgpu_vm_bo_unmap(adev, bo_va, csa_addr);
+ if (r) {
+ DRM_ERROR("failed to do bo_unmap on static CSA, err=%d\n", r);
+ goto error;
+ }
+
+ amdgpu_vm_bo_del(adev, bo_va);
+
+error:
+ drm_exec_fini(&exec);
+ return r;
}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_csa.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_csa.h
index 524b4437a021..7dfc1f2012eb 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_csa.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_csa.h
@@ -34,6 +34,9 @@ int amdgpu_allocate_static_csa(struct amdgpu_device *adev, struct amdgpu_bo **bo
int amdgpu_map_static_csa(struct amdgpu_device *adev, struct amdgpu_vm *vm,
struct amdgpu_bo *bo, struct amdgpu_bo_va **bo_va,
uint64_t csa_addr, uint32_t size);
+int amdgpu_unmap_static_csa(struct amdgpu_device *adev, struct amdgpu_vm *vm,
+ struct amdgpu_bo *bo, struct amdgpu_bo_va *bo_va,
+ uint64_t csa_addr);
void amdgpu_free_static_csa(struct amdgpu_bo **bo);
#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
index 0350205c4897..afedea02188d 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
@@ -23,6 +23,7 @@
*/
#include <drm/drm_auth.h>
+#include <drm/drm_drv.h>
#include "amdgpu.h"
#include "amdgpu_sched.h"
#include "amdgpu_ras.h"
@@ -41,16 +42,66 @@ const unsigned int amdgpu_ctx_num_entities[AMDGPU_HW_IP_NUM] = {
[AMDGPU_HW_IP_VCN_DEC] = 1,
[AMDGPU_HW_IP_VCN_ENC] = 1,
[AMDGPU_HW_IP_VCN_JPEG] = 1,
+ [AMDGPU_HW_IP_VPE] = 1,
};
-static int amdgpu_ctx_priority_permit(struct drm_file *filp,
- enum drm_sched_priority priority)
+bool amdgpu_ctx_priority_is_valid(int32_t ctx_prio)
{
- if (priority < 0 || priority >= DRM_SCHED_PRIORITY_COUNT)
- return -EINVAL;
+ switch (ctx_prio) {
+ case AMDGPU_CTX_PRIORITY_VERY_LOW:
+ case AMDGPU_CTX_PRIORITY_LOW:
+ case AMDGPU_CTX_PRIORITY_NORMAL:
+ case AMDGPU_CTX_PRIORITY_HIGH:
+ case AMDGPU_CTX_PRIORITY_VERY_HIGH:
+ return true;
+ default:
+ case AMDGPU_CTX_PRIORITY_UNSET:
+ /* UNSET priority is not valid and we don't carry that
+ * around, but set it to NORMAL in the only place this
+ * function is called, amdgpu_ctx_ioctl().
+ */
+ return false;
+ }
+}
+
+static enum drm_sched_priority
+amdgpu_ctx_to_drm_sched_prio(int32_t ctx_prio)
+{
+ switch (ctx_prio) {
+ case AMDGPU_CTX_PRIORITY_UNSET:
+ pr_warn_once("AMD-->DRM context priority value UNSET-->NORMAL");
+ return DRM_SCHED_PRIORITY_NORMAL;
+
+ case AMDGPU_CTX_PRIORITY_VERY_LOW:
+ return DRM_SCHED_PRIORITY_LOW;
+
+ case AMDGPU_CTX_PRIORITY_LOW:
+ return DRM_SCHED_PRIORITY_LOW;
+
+ case AMDGPU_CTX_PRIORITY_NORMAL:
+ return DRM_SCHED_PRIORITY_NORMAL;
+
+ case AMDGPU_CTX_PRIORITY_HIGH:
+ return DRM_SCHED_PRIORITY_HIGH;
+ case AMDGPU_CTX_PRIORITY_VERY_HIGH:
+ return DRM_SCHED_PRIORITY_HIGH;
+
+ /* This should not happen as we sanitized userspace provided priority
+ * already, WARN if this happens.
+ */
+ default:
+ WARN(1, "Invalid context priority %d\n", ctx_prio);
+ return DRM_SCHED_PRIORITY_NORMAL;
+ }
+
+}
+
+static int amdgpu_ctx_priority_permit(struct drm_file *filp,
+ int32_t priority)
+{
/* NORMAL and below are accessible by everyone */
- if (priority <= DRM_SCHED_PRIORITY_NORMAL)
+ if (priority <= AMDGPU_CTX_PRIORITY_NORMAL)
return 0;
if (capable(CAP_SYS_NICE))
@@ -62,26 +113,52 @@ static int amdgpu_ctx_priority_permit(struct drm_file *filp,
return -EACCES;
}
-static enum gfx_pipe_priority amdgpu_ctx_sched_prio_to_compute_prio(enum drm_sched_priority prio)
+static enum amdgpu_gfx_pipe_priority amdgpu_ctx_prio_to_gfx_pipe_prio(int32_t prio)
{
switch (prio) {
- case DRM_SCHED_PRIORITY_HIGH:
- case DRM_SCHED_PRIORITY_KERNEL:
+ case AMDGPU_CTX_PRIORITY_HIGH:
+ case AMDGPU_CTX_PRIORITY_VERY_HIGH:
return AMDGPU_GFX_PIPE_PRIO_HIGH;
default:
return AMDGPU_GFX_PIPE_PRIO_NORMAL;
}
}
-static unsigned int amdgpu_ctx_prio_sched_to_hw(struct amdgpu_device *adev,
- enum drm_sched_priority prio,
- u32 hw_ip)
+static enum amdgpu_ring_priority_level amdgpu_ctx_sched_prio_to_ring_prio(int32_t prio)
+{
+ switch (prio) {
+ case AMDGPU_CTX_PRIORITY_HIGH:
+ return AMDGPU_RING_PRIO_1;
+ case AMDGPU_CTX_PRIORITY_VERY_HIGH:
+ return AMDGPU_RING_PRIO_2;
+ default:
+ return AMDGPU_RING_PRIO_0;
+ }
+}
+
+static unsigned int amdgpu_ctx_get_hw_prio(struct amdgpu_ctx *ctx, u32 hw_ip)
{
+ struct amdgpu_device *adev = ctx->mgr->adev;
unsigned int hw_prio;
+ int32_t ctx_prio;
+
+ ctx_prio = (ctx->override_priority == AMDGPU_CTX_PRIORITY_UNSET) ?
+ ctx->init_priority : ctx->override_priority;
+
+ switch (hw_ip) {
+ case AMDGPU_HW_IP_GFX:
+ case AMDGPU_HW_IP_COMPUTE:
+ hw_prio = amdgpu_ctx_prio_to_gfx_pipe_prio(ctx_prio);
+ break;
+ case AMDGPU_HW_IP_VCE:
+ case AMDGPU_HW_IP_VCN_ENC:
+ hw_prio = amdgpu_ctx_sched_prio_to_ring_prio(ctx_prio);
+ break;
+ default:
+ hw_prio = AMDGPU_RING_PRIO_DEFAULT;
+ break;
+ }
- hw_prio = (hw_ip == AMDGPU_HW_IP_COMPUTE) ?
- amdgpu_ctx_sched_prio_to_compute_prio(prio) :
- AMDGPU_RING_PRIO_DEFAULT;
hw_ip = array_index_nospec(hw_ip, AMDGPU_HW_IP_NUM);
if (adev->gpu_sched[hw_ip][hw_prio].num_scheds == 0)
hw_prio = AMDGPU_RING_PRIO_DEFAULT;
@@ -89,15 +166,50 @@ static unsigned int amdgpu_ctx_prio_sched_to_hw(struct amdgpu_device *adev,
return hw_prio;
}
+/* Calculate the time spend on the hw */
+static ktime_t amdgpu_ctx_fence_time(struct dma_fence *fence)
+{
+ struct drm_sched_fence *s_fence;
+
+ if (!fence)
+ return ns_to_ktime(0);
+
+ /* When the fence is not even scheduled it can't have spend time */
+ s_fence = to_drm_sched_fence(fence);
+ if (!test_bit(DMA_FENCE_FLAG_TIMESTAMP_BIT, &s_fence->scheduled.flags))
+ return ns_to_ktime(0);
+
+ /* When it is still running account how much already spend */
+ if (!test_bit(DMA_FENCE_FLAG_TIMESTAMP_BIT, &s_fence->finished.flags))
+ return ktime_sub(ktime_get(), s_fence->scheduled.timestamp);
+
+ return ktime_sub(s_fence->finished.timestamp,
+ s_fence->scheduled.timestamp);
+}
+
+static ktime_t amdgpu_ctx_entity_time(struct amdgpu_ctx *ctx,
+ struct amdgpu_ctx_entity *centity)
+{
+ ktime_t res = ns_to_ktime(0);
+ uint32_t i;
+
+ spin_lock(&ctx->ring_lock);
+ for (i = 0; i < amdgpu_sched_jobs; i++) {
+ res = ktime_add(res, amdgpu_ctx_fence_time(centity->fences[i]));
+ }
+ spin_unlock(&ctx->ring_lock);
+ return res;
+}
+
static int amdgpu_ctx_init_entity(struct amdgpu_ctx *ctx, u32 hw_ip,
- const u32 ring)
+ const u32 ring)
{
- struct amdgpu_device *adev = ctx->adev;
- struct amdgpu_ctx_entity *entity;
struct drm_gpu_scheduler **scheds = NULL, *sched = NULL;
- unsigned num_scheds = 0;
- unsigned int hw_prio;
- enum drm_sched_priority priority;
+ struct amdgpu_device *adev = ctx->mgr->adev;
+ struct amdgpu_ctx_entity *entity;
+ enum drm_sched_priority drm_prio;
+ unsigned int hw_prio, num_scheds;
+ int32_t ctx_prio;
int r;
entity = kzalloc(struct_size(entity, fences, amdgpu_sched_jobs),
@@ -105,14 +217,27 @@ static int amdgpu_ctx_init_entity(struct amdgpu_ctx *ctx, u32 hw_ip,
if (!entity)
return -ENOMEM;
+ ctx_prio = (ctx->override_priority == AMDGPU_CTX_PRIORITY_UNSET) ?
+ ctx->init_priority : ctx->override_priority;
+ entity->hw_ip = hw_ip;
entity->sequence = 1;
- priority = (ctx->override_priority == DRM_SCHED_PRIORITY_UNSET) ?
- ctx->init_priority : ctx->override_priority;
- hw_prio = amdgpu_ctx_prio_sched_to_hw(adev, priority, hw_ip);
+ hw_prio = amdgpu_ctx_get_hw_prio(ctx, hw_ip);
+ drm_prio = amdgpu_ctx_to_drm_sched_prio(ctx_prio);
hw_ip = array_index_nospec(hw_ip, AMDGPU_HW_IP_NUM);
- scheds = adev->gpu_sched[hw_ip][hw_prio].sched;
- num_scheds = adev->gpu_sched[hw_ip][hw_prio].num_scheds;
+
+ if (!(adev)->xcp_mgr) {
+ scheds = adev->gpu_sched[hw_ip][hw_prio].sched;
+ num_scheds = adev->gpu_sched[hw_ip][hw_prio].num_scheds;
+ } else {
+ struct amdgpu_fpriv *fpriv;
+
+ fpriv = container_of(ctx->ctx_mgr, struct amdgpu_fpriv, ctx_mgr);
+ r = amdgpu_xcp_select_scheds(adev, hw_ip, hw_prio, fpriv,
+ &num_scheds, &scheds);
+ if (r)
+ goto error_free_entity;
+ }
/* disable load balance if the hw engine retains context among dependent jobs */
if (hw_ip == AMDGPU_HW_IP_VCN_ENC ||
@@ -124,25 +249,79 @@ static int amdgpu_ctx_init_entity(struct amdgpu_ctx *ctx, u32 hw_ip,
num_scheds = 1;
}
- r = drm_sched_entity_init(&entity->entity, priority, scheds, num_scheds,
+ r = drm_sched_entity_init(&entity->entity, drm_prio, scheds, num_scheds,
&ctx->guilty);
if (r)
goto error_free_entity;
- ctx->entities[hw_ip][ring] = entity;
+ /* It's not an error if we fail to install the new entity */
+ if (cmpxchg(&ctx->entities[hw_ip][ring], NULL, entity))
+ goto cleanup_entity;
+
return 0;
+cleanup_entity:
+ drm_sched_entity_fini(&entity->entity);
+
error_free_entity:
kfree(entity);
return r;
}
-static int amdgpu_ctx_init(struct amdgpu_device *adev,
- enum drm_sched_priority priority,
- struct drm_file *filp,
- struct amdgpu_ctx *ctx)
+static ktime_t amdgpu_ctx_fini_entity(struct amdgpu_device *adev,
+ struct amdgpu_ctx_entity *entity)
+{
+ ktime_t res = ns_to_ktime(0);
+ int i;
+
+ if (!entity)
+ return res;
+
+ for (i = 0; i < amdgpu_sched_jobs; ++i) {
+ res = ktime_add(res, amdgpu_ctx_fence_time(entity->fences[i]));
+ dma_fence_put(entity->fences[i]);
+ }
+
+ amdgpu_xcp_release_sched(adev, entity);
+
+ kfree(entity);
+ return res;
+}
+
+static int amdgpu_ctx_get_stable_pstate(struct amdgpu_ctx *ctx,
+ u32 *stable_pstate)
+{
+ struct amdgpu_device *adev = ctx->mgr->adev;
+ enum amd_dpm_forced_level current_level;
+
+ current_level = amdgpu_dpm_get_performance_level(adev);
+
+ switch (current_level) {
+ case AMD_DPM_FORCED_LEVEL_PROFILE_STANDARD:
+ *stable_pstate = AMDGPU_CTX_STABLE_PSTATE_STANDARD;
+ break;
+ case AMD_DPM_FORCED_LEVEL_PROFILE_MIN_SCLK:
+ *stable_pstate = AMDGPU_CTX_STABLE_PSTATE_MIN_SCLK;
+ break;
+ case AMD_DPM_FORCED_LEVEL_PROFILE_MIN_MCLK:
+ *stable_pstate = AMDGPU_CTX_STABLE_PSTATE_MIN_MCLK;
+ break;
+ case AMD_DPM_FORCED_LEVEL_PROFILE_PEAK:
+ *stable_pstate = AMDGPU_CTX_STABLE_PSTATE_PEAK;
+ break;
+ default:
+ *stable_pstate = AMDGPU_CTX_STABLE_PSTATE_NONE;
+ break;
+ }
+ return 0;
+}
+
+static int amdgpu_ctx_init(struct amdgpu_ctx_mgr *mgr, int32_t priority,
+ struct drm_file *filp, struct amdgpu_ctx *ctx)
{
+ struct amdgpu_fpriv *fpriv = filp->driver_priv;
+ u32 current_stable_pstate;
int r;
r = amdgpu_ctx_priority_permit(filp, priority);
@@ -151,52 +330,104 @@ static int amdgpu_ctx_init(struct amdgpu_device *adev,
memset(ctx, 0, sizeof(*ctx));
- ctx->adev = adev;
-
kref_init(&ctx->refcount);
+ ctx->mgr = mgr;
spin_lock_init(&ctx->ring_lock);
- mutex_init(&ctx->lock);
- ctx->reset_counter = atomic_read(&adev->gpu_reset_counter);
+ ctx->reset_counter = atomic_read(&mgr->adev->gpu_reset_counter);
ctx->reset_counter_query = ctx->reset_counter;
- ctx->vram_lost_counter = atomic_read(&adev->vram_lost_counter);
+ ctx->generation = amdgpu_vm_generation(mgr->adev, &fpriv->vm);
ctx->init_priority = priority;
- ctx->override_priority = DRM_SCHED_PRIORITY_UNSET;
+ ctx->override_priority = AMDGPU_CTX_PRIORITY_UNSET;
+ r = amdgpu_ctx_get_stable_pstate(ctx, &current_stable_pstate);
+ if (r)
+ return r;
+
+ if (mgr->adev->pm.stable_pstate_ctx)
+ ctx->stable_pstate = mgr->adev->pm.stable_pstate_ctx->stable_pstate;
+ else
+ ctx->stable_pstate = current_stable_pstate;
+
+ ctx->ctx_mgr = &(fpriv->ctx_mgr);
return 0;
}
-static void amdgpu_ctx_fini_entity(struct amdgpu_ctx_entity *entity)
+static int amdgpu_ctx_set_stable_pstate(struct amdgpu_ctx *ctx,
+ u32 stable_pstate)
{
+ struct amdgpu_device *adev = ctx->mgr->adev;
+ enum amd_dpm_forced_level level;
+ u32 current_stable_pstate;
+ int r;
- int i;
+ mutex_lock(&adev->pm.stable_pstate_ctx_lock);
+ if (adev->pm.stable_pstate_ctx && adev->pm.stable_pstate_ctx != ctx) {
+ r = -EBUSY;
+ goto done;
+ }
- if (!entity)
- return;
+ r = amdgpu_ctx_get_stable_pstate(ctx, &current_stable_pstate);
+ if (r || (stable_pstate == current_stable_pstate))
+ goto done;
- for (i = 0; i < amdgpu_sched_jobs; ++i)
- dma_fence_put(entity->fences[i]);
+ switch (stable_pstate) {
+ case AMDGPU_CTX_STABLE_PSTATE_NONE:
+ level = AMD_DPM_FORCED_LEVEL_AUTO;
+ break;
+ case AMDGPU_CTX_STABLE_PSTATE_STANDARD:
+ level = AMD_DPM_FORCED_LEVEL_PROFILE_STANDARD;
+ break;
+ case AMDGPU_CTX_STABLE_PSTATE_MIN_SCLK:
+ level = AMD_DPM_FORCED_LEVEL_PROFILE_MIN_SCLK;
+ break;
+ case AMDGPU_CTX_STABLE_PSTATE_MIN_MCLK:
+ level = AMD_DPM_FORCED_LEVEL_PROFILE_MIN_MCLK;
+ break;
+ case AMDGPU_CTX_STABLE_PSTATE_PEAK:
+ level = AMD_DPM_FORCED_LEVEL_PROFILE_PEAK;
+ break;
+ default:
+ r = -EINVAL;
+ goto done;
+ }
- kfree(entity);
+ r = amdgpu_dpm_force_performance_level(adev, level);
+
+ if (level == AMD_DPM_FORCED_LEVEL_AUTO)
+ adev->pm.stable_pstate_ctx = NULL;
+ else
+ adev->pm.stable_pstate_ctx = ctx;
+done:
+ mutex_unlock(&adev->pm.stable_pstate_ctx_lock);
+
+ return r;
}
static void amdgpu_ctx_fini(struct kref *ref)
{
struct amdgpu_ctx *ctx = container_of(ref, struct amdgpu_ctx, refcount);
- struct amdgpu_device *adev = ctx->adev;
- unsigned i, j;
+ struct amdgpu_ctx_mgr *mgr = ctx->mgr;
+ struct amdgpu_device *adev = mgr->adev;
+ unsigned i, j, idx;
if (!adev)
return;
for (i = 0; i < AMDGPU_HW_IP_NUM; ++i) {
for (j = 0; j < AMDGPU_MAX_ENTITY_NUM; ++j) {
- amdgpu_ctx_fini_entity(ctx->entities[i][j]);
- ctx->entities[i][j] = NULL;
+ ktime_t spend;
+
+ spend = amdgpu_ctx_fini_entity(adev, ctx->entities[i][j]);
+ atomic64_add(ktime_to_ns(spend), &mgr->time_spend[i]);
}
}
- mutex_destroy(&ctx->lock);
+ if (drm_dev_enter(adev_to_drm(adev), &idx)) {
+ amdgpu_ctx_set_stable_pstate(ctx, ctx->stable_pstate);
+ drm_dev_exit(idx);
+ }
+
kfree(ctx);
}
@@ -204,6 +435,7 @@ int amdgpu_ctx_get_entity(struct amdgpu_ctx *ctx, u32 hw_ip, u32 instance,
u32 ring, struct drm_sched_entity **entity)
{
int r;
+ struct drm_sched_entity *ctx_entity;
if (hw_ip >= AMDGPU_HW_IP_NUM) {
DRM_ERROR("unknown HW IP type: %d\n", hw_ip);
@@ -227,14 +459,21 @@ int amdgpu_ctx_get_entity(struct amdgpu_ctx *ctx, u32 hw_ip, u32 instance,
return r;
}
- *entity = &ctx->entities[hw_ip][ring]->entity;
+ ctx_entity = &ctx->entities[hw_ip][ring]->entity;
+ r = drm_sched_entity_error(ctx_entity);
+ if (r) {
+ DRM_DEBUG("error entity %p\n", ctx_entity);
+ return r;
+ }
+
+ *entity = ctx_entity;
return 0;
}
static int amdgpu_ctx_alloc(struct amdgpu_device *adev,
struct amdgpu_fpriv *fpriv,
struct drm_file *filp,
- enum drm_sched_priority priority,
+ int32_t priority,
uint32_t *id)
{
struct amdgpu_ctx_mgr *mgr = &fpriv->ctx_mgr;
@@ -254,7 +493,7 @@ static int amdgpu_ctx_alloc(struct amdgpu_device *adev,
}
*id = (uint32_t)r;
- r = amdgpu_ctx_init(adev, priority, filp, ctx);
+ r = amdgpu_ctx_init(mgr, priority, filp, ctx);
if (r) {
idr_remove(&mgr->ctx_handles, *id);
*id = 0;
@@ -331,13 +570,15 @@ static int amdgpu_ctx_query(struct amdgpu_device *adev,
return 0;
}
+#define AMDGPU_RAS_COUNTE_DELAY_MS 3000
+
static int amdgpu_ctx_query2(struct amdgpu_device *adev,
- struct amdgpu_fpriv *fpriv, uint32_t id,
- union drm_amdgpu_ctx_out *out)
+ struct amdgpu_fpriv *fpriv, uint32_t id,
+ union drm_amdgpu_ctx_out *out)
{
+ struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
struct amdgpu_ctx *ctx;
struct amdgpu_ctx_mgr *mgr;
- unsigned long ras_counter;
if (!fpriv)
return -EINVAL;
@@ -356,64 +597,129 @@ static int amdgpu_ctx_query2(struct amdgpu_device *adev,
if (ctx->reset_counter != atomic_read(&adev->gpu_reset_counter))
out->state.flags |= AMDGPU_CTX_QUERY2_FLAGS_RESET;
- if (ctx->vram_lost_counter != atomic_read(&adev->vram_lost_counter))
+ if (ctx->generation != amdgpu_vm_generation(adev, &fpriv->vm))
out->state.flags |= AMDGPU_CTX_QUERY2_FLAGS_VRAMLOST;
if (atomic_read(&ctx->guilty))
out->state.flags |= AMDGPU_CTX_QUERY2_FLAGS_GUILTY;
- /*query ue count*/
- ras_counter = amdgpu_ras_query_error_count(adev, false);
- /*ras counter is monotonic increasing*/
- if (ras_counter != ctx->ras_counter_ue) {
- out->state.flags |= AMDGPU_CTX_QUERY2_FLAGS_RAS_UE;
- ctx->ras_counter_ue = ras_counter;
- }
+ if (amdgpu_in_reset(adev))
+ out->state.flags |= AMDGPU_CTX_QUERY2_FLAGS_RESET_IN_PROGRESS;
+
+ if (adev->ras_enabled && con) {
+ /* Return the cached values in O(1),
+ * and schedule delayed work to cache
+ * new vaues.
+ */
+ int ce_count, ue_count;
- /*query ce count*/
- ras_counter = amdgpu_ras_query_error_count(adev, true);
- if (ras_counter != ctx->ras_counter_ce) {
- out->state.flags |= AMDGPU_CTX_QUERY2_FLAGS_RAS_CE;
- ctx->ras_counter_ce = ras_counter;
+ ce_count = atomic_read(&con->ras_ce_count);
+ ue_count = atomic_read(&con->ras_ue_count);
+
+ if (ce_count != ctx->ras_counter_ce) {
+ ctx->ras_counter_ce = ce_count;
+ out->state.flags |= AMDGPU_CTX_QUERY2_FLAGS_RAS_CE;
+ }
+
+ if (ue_count != ctx->ras_counter_ue) {
+ ctx->ras_counter_ue = ue_count;
+ out->state.flags |= AMDGPU_CTX_QUERY2_FLAGS_RAS_UE;
+ }
+
+ schedule_delayed_work(&con->ras_counte_delay_work,
+ msecs_to_jiffies(AMDGPU_RAS_COUNTE_DELAY_MS));
}
mutex_unlock(&mgr->lock);
return 0;
}
+static int amdgpu_ctx_stable_pstate(struct amdgpu_device *adev,
+ struct amdgpu_fpriv *fpriv, uint32_t id,
+ bool set, u32 *stable_pstate)
+{
+ struct amdgpu_ctx *ctx;
+ struct amdgpu_ctx_mgr *mgr;
+ int r;
+
+ if (!fpriv)
+ return -EINVAL;
+
+ mgr = &fpriv->ctx_mgr;
+ mutex_lock(&mgr->lock);
+ ctx = idr_find(&mgr->ctx_handles, id);
+ if (!ctx) {
+ mutex_unlock(&mgr->lock);
+ return -EINVAL;
+ }
+
+ if (set)
+ r = amdgpu_ctx_set_stable_pstate(ctx, *stable_pstate);
+ else
+ r = amdgpu_ctx_get_stable_pstate(ctx, stable_pstate);
+
+ mutex_unlock(&mgr->lock);
+ return r;
+}
+
int amdgpu_ctx_ioctl(struct drm_device *dev, void *data,
struct drm_file *filp)
{
int r;
- uint32_t id;
- enum drm_sched_priority priority;
+ uint32_t id, stable_pstate;
+ int32_t priority;
union drm_amdgpu_ctx *args = data;
struct amdgpu_device *adev = drm_to_adev(dev);
struct amdgpu_fpriv *fpriv = filp->driver_priv;
id = args->in.ctx_id;
- r = amdgpu_to_sched_priority(args->in.priority, &priority);
+ priority = args->in.priority;
- /* For backwards compatibility reasons, we need to accept
- * ioctls with garbage in the priority field */
- if (r == -EINVAL)
- priority = DRM_SCHED_PRIORITY_NORMAL;
+ /* For backwards compatibility, we need to accept ioctls with garbage
+ * in the priority field. Garbage values in the priority field, result
+ * in the priority being set to NORMAL.
+ */
+ if (!amdgpu_ctx_priority_is_valid(priority))
+ priority = AMDGPU_CTX_PRIORITY_NORMAL;
switch (args->in.op) {
case AMDGPU_CTX_OP_ALLOC_CTX:
+ if (args->in.flags)
+ return -EINVAL;
r = amdgpu_ctx_alloc(adev, fpriv, filp, priority, &id);
args->out.alloc.ctx_id = id;
break;
case AMDGPU_CTX_OP_FREE_CTX:
+ if (args->in.flags)
+ return -EINVAL;
r = amdgpu_ctx_free(fpriv, id);
break;
case AMDGPU_CTX_OP_QUERY_STATE:
+ if (args->in.flags)
+ return -EINVAL;
r = amdgpu_ctx_query(adev, fpriv, id, &args->out);
break;
case AMDGPU_CTX_OP_QUERY_STATE2:
+ if (args->in.flags)
+ return -EINVAL;
r = amdgpu_ctx_query2(adev, fpriv, id, &args->out);
break;
+ case AMDGPU_CTX_OP_GET_STABLE_PSTATE:
+ if (args->in.flags)
+ return -EINVAL;
+ r = amdgpu_ctx_stable_pstate(adev, fpriv, id, false, &stable_pstate);
+ if (!r)
+ args->out.pstate.flags = stable_pstate;
+ break;
+ case AMDGPU_CTX_OP_SET_STABLE_PSTATE:
+ if (args->in.flags & ~AMDGPU_CTX_STABLE_PSTATE_FLAGS_MASK)
+ return -EINVAL;
+ stable_pstate = args->in.flags & AMDGPU_CTX_STABLE_PSTATE_FLAGS_MASK;
+ if (stable_pstate > AMDGPU_CTX_STABLE_PSTATE_PEAK)
+ return -EINVAL;
+ r = amdgpu_ctx_stable_pstate(adev, fpriv, id, true, &stable_pstate);
+ break;
default:
return -EINVAL;
}
@@ -448,9 +754,9 @@ int amdgpu_ctx_put(struct amdgpu_ctx *ctx)
return 0;
}
-void amdgpu_ctx_add_fence(struct amdgpu_ctx *ctx,
- struct drm_sched_entity *entity,
- struct dma_fence *fence, uint64_t *handle)
+uint64_t amdgpu_ctx_add_fence(struct amdgpu_ctx *ctx,
+ struct drm_sched_entity *entity,
+ struct dma_fence *fence)
{
struct amdgpu_ctx_entity *centity = to_amdgpu_ctx_entity(entity);
uint64_t seq = centity->sequence;
@@ -459,8 +765,7 @@ void amdgpu_ctx_add_fence(struct amdgpu_ctx *ctx,
idx = seq & (amdgpu_sched_jobs - 1);
other = centity->fences[idx];
- if (other)
- BUG_ON(!dma_fence_is_signaled(other));
+ WARN_ON(other && !dma_fence_is_signaled(other));
dma_fence_get(fence);
@@ -469,9 +774,11 @@ void amdgpu_ctx_add_fence(struct amdgpu_ctx *ctx,
centity->sequence++;
spin_unlock(&ctx->ring_lock);
+ atomic64_add(ktime_to_ns(amdgpu_ctx_fence_time(other)),
+ &ctx->mgr->time_spend[centity->hw_ip]);
+
dma_fence_put(other);
- if (handle)
- *handle = seq;
+ return seq;
}
struct dma_fence *amdgpu_ctx_get_fence(struct amdgpu_ctx *ctx,
@@ -504,22 +811,22 @@ struct dma_fence *amdgpu_ctx_get_fence(struct amdgpu_ctx *ctx,
}
static void amdgpu_ctx_set_entity_priority(struct amdgpu_ctx *ctx,
- struct amdgpu_ctx_entity *aentity,
- int hw_ip,
- enum drm_sched_priority priority)
+ struct amdgpu_ctx_entity *aentity,
+ int hw_ip,
+ int32_t priority)
{
- struct amdgpu_device *adev = ctx->adev;
+ struct amdgpu_device *adev = ctx->mgr->adev;
unsigned int hw_prio;
struct drm_gpu_scheduler **scheds = NULL;
unsigned num_scheds;
/* set sw priority */
- drm_sched_entity_set_priority(&aentity->entity, priority);
+ drm_sched_entity_set_priority(&aentity->entity,
+ amdgpu_ctx_to_drm_sched_prio(priority));
/* set hw priority */
- if (hw_ip == AMDGPU_HW_IP_COMPUTE) {
- hw_prio = amdgpu_ctx_prio_sched_to_hw(adev, priority,
- AMDGPU_HW_IP_COMPUTE);
+ if (hw_ip == AMDGPU_HW_IP_COMPUTE || hw_ip == AMDGPU_HW_IP_GFX) {
+ hw_prio = amdgpu_ctx_get_hw_prio(ctx, hw_ip);
hw_prio = array_index_nospec(hw_prio, AMDGPU_RING_PRIO_MAX);
scheds = adev->gpu_sched[hw_ip][hw_prio].sched;
num_scheds = adev->gpu_sched[hw_ip][hw_prio].num_scheds;
@@ -529,14 +836,14 @@ static void amdgpu_ctx_set_entity_priority(struct amdgpu_ctx *ctx,
}
void amdgpu_ctx_priority_override(struct amdgpu_ctx *ctx,
- enum drm_sched_priority priority)
+ int32_t priority)
{
- enum drm_sched_priority ctx_prio;
+ int32_t ctx_prio;
unsigned i, j;
ctx->override_priority = priority;
- ctx_prio = (ctx->override_priority == DRM_SCHED_PRIORITY_UNSET) ?
+ ctx_prio = (ctx->override_priority == AMDGPU_CTX_PRIORITY_UNSET) ?
ctx->init_priority : ctx->override_priority;
for (i = 0; i < AMDGPU_HW_IP_NUM; ++i) {
for (j = 0; j < amdgpu_ctx_num_entities[i]; ++j) {
@@ -573,10 +880,17 @@ int amdgpu_ctx_wait_prev_fence(struct amdgpu_ctx *ctx,
return r;
}
-void amdgpu_ctx_mgr_init(struct amdgpu_ctx_mgr *mgr)
+void amdgpu_ctx_mgr_init(struct amdgpu_ctx_mgr *mgr,
+ struct amdgpu_device *adev)
{
+ unsigned int i;
+
+ mgr->adev = adev;
mutex_init(&mgr->lock);
- idr_init(&mgr->ctx_handles);
+ idr_init_base(&mgr->ctx_handles, 1);
+
+ for (i = 0; i < AMDGPU_HW_IP_NUM; ++i)
+ atomic64_set(&mgr->time_spend[i], 0);
}
long amdgpu_ctx_mgr_entity_flush(struct amdgpu_ctx_mgr *mgr, long timeout)
@@ -605,7 +919,7 @@ long amdgpu_ctx_mgr_entity_flush(struct amdgpu_ctx_mgr *mgr, long timeout)
return timeout;
}
-void amdgpu_ctx_mgr_entity_fini(struct amdgpu_ctx_mgr *mgr)
+static void amdgpu_ctx_mgr_entity_fini(struct amdgpu_ctx_mgr *mgr)
{
struct amdgpu_ctx *ctx;
struct idr *idp;
@@ -630,24 +944,49 @@ void amdgpu_ctx_mgr_entity_fini(struct amdgpu_ctx_mgr *mgr)
drm_sched_entity_fini(entity);
}
}
+ kref_put(&ctx->refcount, amdgpu_ctx_fini);
}
}
void amdgpu_ctx_mgr_fini(struct amdgpu_ctx_mgr *mgr)
{
+ amdgpu_ctx_mgr_entity_fini(mgr);
+ idr_destroy(&mgr->ctx_handles);
+ mutex_destroy(&mgr->lock);
+}
+
+void amdgpu_ctx_mgr_usage(struct amdgpu_ctx_mgr *mgr,
+ ktime_t usage[AMDGPU_HW_IP_NUM])
+{
struct amdgpu_ctx *ctx;
- struct idr *idp;
+ unsigned int hw_ip, i;
uint32_t id;
- amdgpu_ctx_mgr_entity_fini(mgr);
-
- idp = &mgr->ctx_handles;
+ /*
+ * This is a little bit racy because it can be that a ctx or a fence are
+ * destroyed just in the moment we try to account them. But that is ok
+ * since exactly that case is explicitely allowed by the interface.
+ */
+ mutex_lock(&mgr->lock);
+ for (hw_ip = 0; hw_ip < AMDGPU_HW_IP_NUM; ++hw_ip) {
+ uint64_t ns = atomic64_read(&mgr->time_spend[hw_ip]);
- idr_for_each_entry(idp, ctx, id) {
- if (kref_put(&ctx->refcount, amdgpu_ctx_fini) != 1)
- DRM_ERROR("ctx %p is still alive\n", ctx);
+ usage[hw_ip] = ns_to_ktime(ns);
}
- idr_destroy(&mgr->ctx_handles);
- mutex_destroy(&mgr->lock);
+ idr_for_each_entry(&mgr->ctx_handles, ctx, id) {
+ for (hw_ip = 0; hw_ip < AMDGPU_HW_IP_NUM; ++hw_ip) {
+ for (i = 0; i < amdgpu_ctx_num_entities[hw_ip]; ++i) {
+ struct amdgpu_ctx_entity *centity;
+ ktime_t spend;
+
+ centity = ctx->entities[hw_ip][i];
+ if (!centity)
+ continue;
+ spend = amdgpu_ctx_entity_time(ctx, centity);
+ usage[hw_ip] = ktime_add(usage[hw_ip], spend);
+ }
+ }
+ }
+ mutex_unlock(&mgr->lock);
}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h
index f54e10314661..090dfe86f75b 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h
@@ -23,15 +23,20 @@
#ifndef __AMDGPU_CTX_H__
#define __AMDGPU_CTX_H__
+#include <linux/ktime.h>
+#include <linux/types.h>
+
#include "amdgpu_ring.h"
struct drm_device;
struct drm_file;
struct amdgpu_fpriv;
+struct amdgpu_ctx_mgr;
#define AMDGPU_MAX_ENTITY_NUM 4
struct amdgpu_ctx_entity {
+ uint32_t hw_ip;
uint64_t sequence;
struct drm_sched_entity entity;
struct dma_fence *fences[];
@@ -39,19 +44,20 @@ struct amdgpu_ctx_entity {
struct amdgpu_ctx {
struct kref refcount;
- struct amdgpu_device *adev;
+ struct amdgpu_ctx_mgr *mgr;
unsigned reset_counter;
unsigned reset_counter_query;
- uint32_t vram_lost_counter;
+ uint64_t generation;
spinlock_t ring_lock;
struct amdgpu_ctx_entity *entities[AMDGPU_HW_IP_NUM][AMDGPU_MAX_ENTITY_NUM];
bool preamble_presented;
- enum drm_sched_priority init_priority;
- enum drm_sched_priority override_priority;
- struct mutex lock;
+ int32_t init_priority;
+ int32_t override_priority;
atomic_t guilty;
unsigned long ras_counter_ce;
unsigned long ras_counter_ue;
+ uint32_t stable_pstate;
+ struct amdgpu_ctx_mgr *ctx_mgr;
};
struct amdgpu_ctx_mgr {
@@ -59,6 +65,7 @@ struct amdgpu_ctx_mgr {
struct mutex lock;
/* protected by lock */
struct idr ctx_handles;
+ atomic64_t time_spend[AMDGPU_HW_IP_NUM];
};
extern const unsigned int amdgpu_ctx_num_entities[AMDGPU_HW_IP_NUM];
@@ -68,14 +75,14 @@ int amdgpu_ctx_put(struct amdgpu_ctx *ctx);
int amdgpu_ctx_get_entity(struct amdgpu_ctx *ctx, u32 hw_ip, u32 instance,
u32 ring, struct drm_sched_entity **entity);
-void amdgpu_ctx_add_fence(struct amdgpu_ctx *ctx,
- struct drm_sched_entity *entity,
- struct dma_fence *fence, uint64_t *seq);
+uint64_t amdgpu_ctx_add_fence(struct amdgpu_ctx *ctx,
+ struct drm_sched_entity *entity,
+ struct dma_fence *fence);
struct dma_fence *amdgpu_ctx_get_fence(struct amdgpu_ctx *ctx,
struct drm_sched_entity *entity,
uint64_t seq);
-void amdgpu_ctx_priority_override(struct amdgpu_ctx *ctx,
- enum drm_sched_priority priority);
+bool amdgpu_ctx_priority_is_valid(int32_t ctx_prio);
+void amdgpu_ctx_priority_override(struct amdgpu_ctx *ctx, int32_t ctx_prio);
int amdgpu_ctx_ioctl(struct drm_device *dev, void *data,
struct drm_file *filp);
@@ -83,9 +90,11 @@ int amdgpu_ctx_ioctl(struct drm_device *dev, void *data,
int amdgpu_ctx_wait_prev_fence(struct amdgpu_ctx *ctx,
struct drm_sched_entity *entity);
-void amdgpu_ctx_mgr_init(struct amdgpu_ctx_mgr *mgr);
-void amdgpu_ctx_mgr_entity_fini(struct amdgpu_ctx_mgr *mgr);
+void amdgpu_ctx_mgr_init(struct amdgpu_ctx_mgr *mgr,
+ struct amdgpu_device *adev);
long amdgpu_ctx_mgr_entity_flush(struct amdgpu_ctx_mgr *mgr, long timeout);
void amdgpu_ctx_mgr_fini(struct amdgpu_ctx_mgr *mgr);
+void amdgpu_ctx_mgr_usage(struct amdgpu_ctx_mgr *mgr,
+ ktime_t usage[AMDGPU_HW_IP_NUM]);
#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c
index 43059ead733b..62d43b8cbe58 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c
@@ -27,8 +27,6 @@
#include <linux/pci.h>
#include <linux/uaccess.h>
#include <linux/pm_runtime.h>
-#include <linux/poll.h>
-#include <drm/drm_debugfs.h>
#include "amdgpu.h"
#include "amdgpu_pm.h"
@@ -37,126 +35,13 @@
#include "amdgpu_rap.h"
#include "amdgpu_securedisplay.h"
#include "amdgpu_fw_attestation.h"
+#include "amdgpu_umr.h"
-/**
- * amdgpu_debugfs_add_files - Add simple debugfs entries
- *
- * @adev: Device to attach debugfs entries to
- * @files: Array of function callbacks that respond to reads
- * @nfiles: Number of callbacks to register
- *
- */
-int amdgpu_debugfs_add_files(struct amdgpu_device *adev,
- const struct drm_info_list *files,
- unsigned nfiles)
-{
- unsigned i;
-
- for (i = 0; i < adev->debugfs_count; i++) {
- if (adev->debugfs[i].files == files) {
- /* Already registered */
- return 0;
- }
- }
-
- i = adev->debugfs_count + 1;
- if (i > AMDGPU_DEBUGFS_MAX_COMPONENTS) {
- DRM_ERROR("Reached maximum number of debugfs components.\n");
- DRM_ERROR("Report so we increase "
- "AMDGPU_DEBUGFS_MAX_COMPONENTS.\n");
- return -EINVAL;
- }
- adev->debugfs[adev->debugfs_count].files = files;
- adev->debugfs[adev->debugfs_count].num_files = nfiles;
- adev->debugfs_count = i;
-#if defined(CONFIG_DEBUG_FS)
- drm_debugfs_create_files(files, nfiles,
- adev_to_drm(adev)->primary->debugfs_root,
- adev_to_drm(adev)->primary);
-#endif
- return 0;
-}
-
-int amdgpu_debugfs_wait_dump(struct amdgpu_device *adev)
-{
-#if defined(CONFIG_DEBUG_FS)
- unsigned long timeout = 600 * HZ;
- int ret;
-
- wake_up_interruptible(&adev->autodump.gpu_hang);
-
- ret = wait_for_completion_interruptible_timeout(&adev->autodump.dumping, timeout);
- if (ret == 0) {
- pr_err("autodump: timeout, move on to gpu recovery\n");
- return -ETIMEDOUT;
- }
-#endif
- return 0;
-}
+#include "amdgpu_reset.h"
+#include "amdgpu_psp_ta.h"
#if defined(CONFIG_DEBUG_FS)
-static int amdgpu_debugfs_autodump_open(struct inode *inode, struct file *file)
-{
- struct amdgpu_device *adev = inode->i_private;
- int ret;
-
- file->private_data = adev;
-
- ret = down_read_killable(&adev->reset_sem);
- if (ret)
- return ret;
-
- if (adev->autodump.dumping.done) {
- reinit_completion(&adev->autodump.dumping);
- ret = 0;
- } else {
- ret = -EBUSY;
- }
-
- up_read(&adev->reset_sem);
-
- return ret;
-}
-
-static int amdgpu_debugfs_autodump_release(struct inode *inode, struct file *file)
-{
- struct amdgpu_device *adev = file->private_data;
-
- complete_all(&adev->autodump.dumping);
- return 0;
-}
-
-static unsigned int amdgpu_debugfs_autodump_poll(struct file *file, struct poll_table_struct *poll_table)
-{
- struct amdgpu_device *adev = file->private_data;
-
- poll_wait(file, &adev->autodump.gpu_hang, poll_table);
-
- if (amdgpu_in_reset(adev))
- return POLLIN | POLLRDNORM | POLLWRNORM;
-
- return 0;
-}
-
-static const struct file_operations autodump_debug_fops = {
- .owner = THIS_MODULE,
- .open = amdgpu_debugfs_autodump_open,
- .poll = amdgpu_debugfs_autodump_poll,
- .release = amdgpu_debugfs_autodump_release,
-};
-
-static void amdgpu_debugfs_autodump_init(struct amdgpu_device *adev)
-{
- init_completion(&adev->autodump.dumping);
- complete_all(&adev->autodump.dumping);
- init_waitqueue_head(&adev->autodump.gpu_hang);
-
- debugfs_create_file("amdgpu_autodump", 0600,
- adev_to_drm(adev)->primary->debugfs_root,
- adev, &autodump_debug_fops);
-}
-
/**
* amdgpu_debugfs_process_reg_op - Handle MMIO register reads/writes
*
@@ -171,14 +56,14 @@ static void amdgpu_debugfs_autodump_init(struct amdgpu_device *adev)
*
* Bit 62: Indicates a GRBM bank switch is needed
* Bit 61: Indicates a SRBM bank switch is needed (implies bit 62 is
- * zero)
+ * zero)
* Bits 24..33: The SE or ME selector if needed
* Bits 34..43: The SH (or SA) or PIPE selector if needed
* Bits 44..53: The INSTANCE (or CU/WGP) or QUEUE selector if needed
*
* Bit 23: Indicates that the PM power gating lock should be held
- * This is necessary to read registers that might be
- * unreliable during a power gating transistion.
+ * This is necessary to read registers that might be
+ * unreliable during a power gating transistion.
*
* The lower bits are the BYTE offset of the register to read. This
* allows reading multiple registers in a single call and having
@@ -191,7 +76,7 @@ static int amdgpu_debugfs_process_reg_op(bool read, struct file *f,
ssize_t result = 0;
int r;
bool pm_pg_lock, use_bank, use_ring;
- unsigned instance_bank, sh_bank, se_bank, me, pipe, queue, vmid;
+ unsigned int instance_bank, sh_bank, se_bank, me, pipe, queue, vmid;
pm_pg_lock = use_bank = use_ring = false;
instance_bank = sh_bank = se_bank = me = pipe = queue = vmid = 0;
@@ -244,17 +129,16 @@ static int amdgpu_debugfs_process_reg_op(bool read, struct file *f,
if (use_bank) {
if ((sh_bank != 0xFFFFFFFF && sh_bank >= adev->gfx.config.max_sh_per_se) ||
(se_bank != 0xFFFFFFFF && se_bank >= adev->gfx.config.max_shader_engines)) {
- pm_runtime_mark_last_busy(adev_to_drm(adev)->dev);
pm_runtime_put_autosuspend(adev_to_drm(adev)->dev);
amdgpu_virt_disable_access_debugfs(adev);
return -EINVAL;
}
mutex_lock(&adev->grbm_idx_mutex);
amdgpu_gfx_select_se_sh(adev, se_bank,
- sh_bank, instance_bank);
+ sh_bank, instance_bank, 0);
} else if (use_ring) {
mutex_lock(&adev->srbm_mutex);
- amdgpu_gfx_select_me_pipe_q(adev, me, pipe, queue, vmid);
+ amdgpu_gfx_select_me_pipe_q(adev, me, pipe, queue, vmid, 0);
}
if (pm_pg_lock)
@@ -269,7 +153,7 @@ static int amdgpu_debugfs_process_reg_op(bool read, struct file *f,
} else {
r = get_user(value, (uint32_t *)buf);
if (!r)
- amdgpu_mm_wreg_mmio_rlc(adev, *pos >> 2, value);
+ amdgpu_mm_wreg_mmio_rlc(adev, *pos >> 2, value, 0);
}
if (r) {
result = r;
@@ -284,17 +168,16 @@ static int amdgpu_debugfs_process_reg_op(bool read, struct file *f,
end:
if (use_bank) {
- amdgpu_gfx_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
+ amdgpu_gfx_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, 0);
mutex_unlock(&adev->grbm_idx_mutex);
} else if (use_ring) {
- amdgpu_gfx_select_me_pipe_q(adev, 0, 0, 0, 0);
+ amdgpu_gfx_select_me_pipe_q(adev, 0, 0, 0, 0, 0);
mutex_unlock(&adev->srbm_mutex);
}
if (pm_pg_lock)
mutex_unlock(&adev->pm.mutex);
- pm_runtime_mark_last_busy(adev_to_drm(adev)->dev);
pm_runtime_put_autosuspend(adev_to_drm(adev)->dev);
amdgpu_virt_disable_access_debugfs(adev);
@@ -319,6 +202,301 @@ static ssize_t amdgpu_debugfs_regs_write(struct file *f, const char __user *buf,
return amdgpu_debugfs_process_reg_op(false, f, (char __user *)buf, size, pos);
}
+static int amdgpu_debugfs_regs2_open(struct inode *inode, struct file *file)
+{
+ struct amdgpu_debugfs_regs2_data *rd;
+
+ rd = kzalloc(sizeof(*rd), GFP_KERNEL);
+ if (!rd)
+ return -ENOMEM;
+ rd->adev = file_inode(file)->i_private;
+ file->private_data = rd;
+ mutex_init(&rd->lock);
+
+ return 0;
+}
+
+static int amdgpu_debugfs_regs2_release(struct inode *inode, struct file *file)
+{
+ struct amdgpu_debugfs_regs2_data *rd = file->private_data;
+
+ mutex_destroy(&rd->lock);
+ kfree(file->private_data);
+ return 0;
+}
+
+static ssize_t amdgpu_debugfs_regs2_op(struct file *f, char __user *buf, u32 offset, size_t size, int write_en)
+{
+ struct amdgpu_debugfs_regs2_data *rd = f->private_data;
+ struct amdgpu_device *adev = rd->adev;
+ ssize_t result = 0;
+ int r;
+ uint32_t value;
+
+ if (size & 0x3 || offset & 0x3)
+ return -EINVAL;
+
+ r = pm_runtime_get_sync(adev_to_drm(adev)->dev);
+ if (r < 0) {
+ pm_runtime_put_autosuspend(adev_to_drm(adev)->dev);
+ return r;
+ }
+
+ r = amdgpu_virt_enable_access_debugfs(adev);
+ if (r < 0) {
+ pm_runtime_put_autosuspend(adev_to_drm(adev)->dev);
+ return r;
+ }
+
+ mutex_lock(&rd->lock);
+
+ if (rd->id.use_grbm) {
+ if ((rd->id.grbm.sh != 0xFFFFFFFF && rd->id.grbm.sh >= adev->gfx.config.max_sh_per_se) ||
+ (rd->id.grbm.se != 0xFFFFFFFF && rd->id.grbm.se >= adev->gfx.config.max_shader_engines)) {
+ pm_runtime_put_autosuspend(adev_to_drm(adev)->dev);
+ amdgpu_virt_disable_access_debugfs(adev);
+ mutex_unlock(&rd->lock);
+ return -EINVAL;
+ }
+ mutex_lock(&adev->grbm_idx_mutex);
+ amdgpu_gfx_select_se_sh(adev, rd->id.grbm.se,
+ rd->id.grbm.sh,
+ rd->id.grbm.instance, rd->id.xcc_id);
+ }
+
+ if (rd->id.use_srbm) {
+ mutex_lock(&adev->srbm_mutex);
+ amdgpu_gfx_select_me_pipe_q(adev, rd->id.srbm.me, rd->id.srbm.pipe,
+ rd->id.srbm.queue, rd->id.srbm.vmid, rd->id.xcc_id);
+ }
+
+ if (rd->id.pg_lock)
+ mutex_lock(&adev->pm.mutex);
+
+ while (size) {
+ if (!write_en) {
+ value = RREG32(offset >> 2);
+ r = put_user(value, (uint32_t *)buf);
+ } else {
+ r = get_user(value, (uint32_t *)buf);
+ if (!r)
+ amdgpu_mm_wreg_mmio_rlc(adev, offset >> 2, value, rd->id.xcc_id);
+ }
+ if (r) {
+ result = r;
+ goto end;
+ }
+ offset += 4;
+ size -= 4;
+ result += 4;
+ buf += 4;
+ }
+end:
+ if (rd->id.use_grbm) {
+ amdgpu_gfx_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, rd->id.xcc_id);
+ mutex_unlock(&adev->grbm_idx_mutex);
+ }
+
+ if (rd->id.use_srbm) {
+ amdgpu_gfx_select_me_pipe_q(adev, 0, 0, 0, 0, rd->id.xcc_id);
+ mutex_unlock(&adev->srbm_mutex);
+ }
+
+ if (rd->id.pg_lock)
+ mutex_unlock(&adev->pm.mutex);
+
+ mutex_unlock(&rd->lock);
+
+ pm_runtime_put_autosuspend(adev_to_drm(adev)->dev);
+
+ amdgpu_virt_disable_access_debugfs(adev);
+ return result;
+}
+
+static long amdgpu_debugfs_regs2_ioctl(struct file *f, unsigned int cmd, unsigned long data)
+{
+ struct amdgpu_debugfs_regs2_data *rd = f->private_data;
+ struct amdgpu_debugfs_regs2_iocdata v1_data;
+ int r;
+
+ mutex_lock(&rd->lock);
+
+ switch (cmd) {
+ case AMDGPU_DEBUGFS_REGS2_IOC_SET_STATE_V2:
+ r = copy_from_user(&rd->id, (struct amdgpu_debugfs_regs2_iocdata_v2 *)data,
+ sizeof(rd->id));
+ if (r)
+ r = -EINVAL;
+ goto done;
+ case AMDGPU_DEBUGFS_REGS2_IOC_SET_STATE:
+ r = copy_from_user(&v1_data, (struct amdgpu_debugfs_regs2_iocdata *)data,
+ sizeof(v1_data));
+ if (r) {
+ r = -EINVAL;
+ goto done;
+ }
+ goto v1_copy;
+ default:
+ r = -EINVAL;
+ goto done;
+ }
+
+v1_copy:
+ rd->id.use_srbm = v1_data.use_srbm;
+ rd->id.use_grbm = v1_data.use_grbm;
+ rd->id.pg_lock = v1_data.pg_lock;
+ rd->id.grbm.se = v1_data.grbm.se;
+ rd->id.grbm.sh = v1_data.grbm.sh;
+ rd->id.grbm.instance = v1_data.grbm.instance;
+ rd->id.srbm.me = v1_data.srbm.me;
+ rd->id.srbm.pipe = v1_data.srbm.pipe;
+ rd->id.srbm.queue = v1_data.srbm.queue;
+ rd->id.xcc_id = 0;
+done:
+ mutex_unlock(&rd->lock);
+ return r;
+}
+
+static ssize_t amdgpu_debugfs_regs2_read(struct file *f, char __user *buf, size_t size, loff_t *pos)
+{
+ return amdgpu_debugfs_regs2_op(f, buf, *pos, size, 0);
+}
+
+static ssize_t amdgpu_debugfs_regs2_write(struct file *f, const char __user *buf, size_t size, loff_t *pos)
+{
+ return amdgpu_debugfs_regs2_op(f, (char __user *)buf, *pos, size, 1);
+}
+
+static int amdgpu_debugfs_gprwave_open(struct inode *inode, struct file *file)
+{
+ struct amdgpu_debugfs_gprwave_data *rd;
+
+ rd = kzalloc(sizeof(*rd), GFP_KERNEL);
+ if (!rd)
+ return -ENOMEM;
+ rd->adev = file_inode(file)->i_private;
+ file->private_data = rd;
+ mutex_init(&rd->lock);
+
+ return 0;
+}
+
+static int amdgpu_debugfs_gprwave_release(struct inode *inode, struct file *file)
+{
+ struct amdgpu_debugfs_gprwave_data *rd = file->private_data;
+
+ mutex_destroy(&rd->lock);
+ kfree(file->private_data);
+ return 0;
+}
+
+static ssize_t amdgpu_debugfs_gprwave_read(struct file *f, char __user *buf, size_t size, loff_t *pos)
+{
+ struct amdgpu_debugfs_gprwave_data *rd = f->private_data;
+ struct amdgpu_device *adev = rd->adev;
+ ssize_t result = 0;
+ int r;
+ uint32_t *data, x;
+
+ if (size > 4096 || size & 0x3 || *pos & 0x3)
+ return -EINVAL;
+
+ r = pm_runtime_get_sync(adev_to_drm(adev)->dev);
+ if (r < 0) {
+ pm_runtime_put_autosuspend(adev_to_drm(adev)->dev);
+ return r;
+ }
+
+ r = amdgpu_virt_enable_access_debugfs(adev);
+ if (r < 0) {
+ pm_runtime_put_autosuspend(adev_to_drm(adev)->dev);
+ return r;
+ }
+
+ data = kcalloc(1024, sizeof(*data), GFP_KERNEL);
+ if (!data) {
+ pm_runtime_put_autosuspend(adev_to_drm(adev)->dev);
+ amdgpu_virt_disable_access_debugfs(adev);
+ return -ENOMEM;
+ }
+
+ /* switch to the specific se/sh/cu */
+ mutex_lock(&adev->grbm_idx_mutex);
+ amdgpu_gfx_select_se_sh(adev, rd->id.se, rd->id.sh, rd->id.cu, rd->id.xcc_id);
+
+ if (!rd->id.gpr_or_wave) {
+ x = 0;
+ if (adev->gfx.funcs->read_wave_data)
+ adev->gfx.funcs->read_wave_data(adev, rd->id.xcc_id, rd->id.simd, rd->id.wave, data, &x);
+ } else {
+ x = size >> 2;
+ if (rd->id.gpr.vpgr_or_sgpr) {
+ if (adev->gfx.funcs->read_wave_vgprs)
+ adev->gfx.funcs->read_wave_vgprs(adev, rd->id.xcc_id, rd->id.simd, rd->id.wave, rd->id.gpr.thread, *pos, size>>2, data);
+ } else {
+ if (adev->gfx.funcs->read_wave_sgprs)
+ adev->gfx.funcs->read_wave_sgprs(adev, rd->id.xcc_id, rd->id.simd, rd->id.wave, *pos, size>>2, data);
+ }
+ }
+
+ amdgpu_gfx_select_se_sh(adev, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, rd->id.xcc_id);
+ mutex_unlock(&adev->grbm_idx_mutex);
+
+ pm_runtime_put_autosuspend(adev_to_drm(adev)->dev);
+
+ if (!x) {
+ result = -EINVAL;
+ goto done;
+ }
+
+ while (size && (*pos < x * 4)) {
+ uint32_t value;
+
+ value = data[*pos >> 2];
+ r = put_user(value, (uint32_t *)buf);
+ if (r) {
+ result = r;
+ goto done;
+ }
+
+ result += 4;
+ buf += 4;
+ *pos += 4;
+ size -= 4;
+ }
+
+done:
+ amdgpu_virt_disable_access_debugfs(adev);
+ kfree(data);
+ return result;
+}
+
+static long amdgpu_debugfs_gprwave_ioctl(struct file *f, unsigned int cmd, unsigned long data)
+{
+ struct amdgpu_debugfs_gprwave_data *rd = f->private_data;
+ int r = 0;
+
+ mutex_lock(&rd->lock);
+
+ switch (cmd) {
+ case AMDGPU_DEBUGFS_GPRWAVE_IOC_SET_STATE:
+ if (copy_from_user(&rd->id,
+ (struct amdgpu_debugfs_gprwave_iocdata *)data,
+ sizeof(rd->id)))
+ r = -EFAULT;
+ goto done;
+ default:
+ r = -EINVAL;
+ goto done;
+ }
+
+done:
+ mutex_unlock(&rd->lock);
+ return r;
+}
+
+
+
/**
* amdgpu_debugfs_regs_pcie_read - Read from a PCIE register
@@ -357,14 +535,14 @@ static ssize_t amdgpu_debugfs_regs_pcie_read(struct file *f, char __user *buf,
while (size) {
uint32_t value;
- value = RREG32_PCIE(*pos);
+ if (upper_32_bits(*pos))
+ value = RREG32_PCIE_EXT(*pos);
+ else
+ value = RREG32_PCIE(*pos);
+
r = put_user(value, (uint32_t *)buf);
- if (r) {
- pm_runtime_mark_last_busy(adev_to_drm(adev)->dev);
- pm_runtime_put_autosuspend(adev_to_drm(adev)->dev);
- amdgpu_virt_disable_access_debugfs(adev);
- return r;
- }
+ if (r)
+ goto out;
result += 4;
buf += 4;
@@ -372,11 +550,11 @@ static ssize_t amdgpu_debugfs_regs_pcie_read(struct file *f, char __user *buf,
size -= 4;
}
- pm_runtime_mark_last_busy(adev_to_drm(adev)->dev);
+ r = result;
+out:
pm_runtime_put_autosuspend(adev_to_drm(adev)->dev);
-
amdgpu_virt_disable_access_debugfs(adev);
- return result;
+ return r;
}
/**
@@ -417,14 +595,13 @@ static ssize_t amdgpu_debugfs_regs_pcie_write(struct file *f, const char __user
uint32_t value;
r = get_user(value, (uint32_t *)buf);
- if (r) {
- pm_runtime_mark_last_busy(adev_to_drm(adev)->dev);
- pm_runtime_put_autosuspend(adev_to_drm(adev)->dev);
- amdgpu_virt_disable_access_debugfs(adev);
- return r;
- }
+ if (r)
+ goto out;
- WREG32_PCIE(*pos, value);
+ if (upper_32_bits(*pos))
+ WREG32_PCIE_EXT(*pos, value);
+ else
+ WREG32_PCIE(*pos, value);
result += 4;
buf += 4;
@@ -432,11 +609,11 @@ static ssize_t amdgpu_debugfs_regs_pcie_write(struct file *f, const char __user
size -= 4;
}
- pm_runtime_mark_last_busy(adev_to_drm(adev)->dev);
+ r = result;
+out:
pm_runtime_put_autosuspend(adev_to_drm(adev)->dev);
-
amdgpu_virt_disable_access_debugfs(adev);
- return result;
+ return r;
}
/**
@@ -461,6 +638,9 @@ static ssize_t amdgpu_debugfs_regs_didt_read(struct file *f, char __user *buf,
if (size & 0x3 || *pos & 0x3)
return -EINVAL;
+ if (!adev->didt_rreg)
+ return -EOPNOTSUPP;
+
r = pm_runtime_get_sync(adev_to_drm(adev)->dev);
if (r < 0) {
pm_runtime_put_autosuspend(adev_to_drm(adev)->dev);
@@ -478,12 +658,8 @@ static ssize_t amdgpu_debugfs_regs_didt_read(struct file *f, char __user *buf,
value = RREG32_DIDT(*pos >> 2);
r = put_user(value, (uint32_t *)buf);
- if (r) {
- pm_runtime_mark_last_busy(adev_to_drm(adev)->dev);
- pm_runtime_put_autosuspend(adev_to_drm(adev)->dev);
- amdgpu_virt_disable_access_debugfs(adev);
- return r;
- }
+ if (r)
+ goto out;
result += 4;
buf += 4;
@@ -491,11 +667,11 @@ static ssize_t amdgpu_debugfs_regs_didt_read(struct file *f, char __user *buf,
size -= 4;
}
- pm_runtime_mark_last_busy(adev_to_drm(adev)->dev);
+ r = result;
+out:
pm_runtime_put_autosuspend(adev_to_drm(adev)->dev);
-
amdgpu_virt_disable_access_debugfs(adev);
- return result;
+ return r;
}
/**
@@ -520,6 +696,9 @@ static ssize_t amdgpu_debugfs_regs_didt_write(struct file *f, const char __user
if (size & 0x3 || *pos & 0x3)
return -EINVAL;
+ if (!adev->didt_wreg)
+ return -EOPNOTSUPP;
+
r = pm_runtime_get_sync(adev_to_drm(adev)->dev);
if (r < 0) {
pm_runtime_put_autosuspend(adev_to_drm(adev)->dev);
@@ -536,12 +715,8 @@ static ssize_t amdgpu_debugfs_regs_didt_write(struct file *f, const char __user
uint32_t value;
r = get_user(value, (uint32_t *)buf);
- if (r) {
- pm_runtime_mark_last_busy(adev_to_drm(adev)->dev);
- pm_runtime_put_autosuspend(adev_to_drm(adev)->dev);
- amdgpu_virt_disable_access_debugfs(adev);
- return r;
- }
+ if (r)
+ goto out;
WREG32_DIDT(*pos >> 2, value);
@@ -551,11 +726,11 @@ static ssize_t amdgpu_debugfs_regs_didt_write(struct file *f, const char __user
size -= 4;
}
- pm_runtime_mark_last_busy(adev_to_drm(adev)->dev);
+ r = result;
+out:
pm_runtime_put_autosuspend(adev_to_drm(adev)->dev);
-
amdgpu_virt_disable_access_debugfs(adev);
- return result;
+ return r;
}
/**
@@ -577,6 +752,9 @@ static ssize_t amdgpu_debugfs_regs_smc_read(struct file *f, char __user *buf,
ssize_t result = 0;
int r;
+ if (!adev->smc_rreg)
+ return -EOPNOTSUPP;
+
if (size & 0x3 || *pos & 0x3)
return -EINVAL;
@@ -597,12 +775,8 @@ static ssize_t amdgpu_debugfs_regs_smc_read(struct file *f, char __user *buf,
value = RREG32_SMC(*pos);
r = put_user(value, (uint32_t *)buf);
- if (r) {
- pm_runtime_mark_last_busy(adev_to_drm(adev)->dev);
- pm_runtime_put_autosuspend(adev_to_drm(adev)->dev);
- amdgpu_virt_disable_access_debugfs(adev);
- return r;
- }
+ if (r)
+ goto out;
result += 4;
buf += 4;
@@ -610,11 +784,11 @@ static ssize_t amdgpu_debugfs_regs_smc_read(struct file *f, char __user *buf,
size -= 4;
}
- pm_runtime_mark_last_busy(adev_to_drm(adev)->dev);
+ r = result;
+out:
pm_runtime_put_autosuspend(adev_to_drm(adev)->dev);
-
amdgpu_virt_disable_access_debugfs(adev);
- return result;
+ return r;
}
/**
@@ -636,6 +810,9 @@ static ssize_t amdgpu_debugfs_regs_smc_write(struct file *f, const char __user *
ssize_t result = 0;
int r;
+ if (!adev->smc_wreg)
+ return -EOPNOTSUPP;
+
if (size & 0x3 || *pos & 0x3)
return -EINVAL;
@@ -655,12 +832,8 @@ static ssize_t amdgpu_debugfs_regs_smc_write(struct file *f, const char __user *
uint32_t value;
r = get_user(value, (uint32_t *)buf);
- if (r) {
- pm_runtime_mark_last_busy(adev_to_drm(adev)->dev);
- pm_runtime_put_autosuspend(adev_to_drm(adev)->dev);
- amdgpu_virt_disable_access_debugfs(adev);
- return r;
- }
+ if (r)
+ goto out;
WREG32_SMC(*pos, value);
@@ -670,11 +843,11 @@ static ssize_t amdgpu_debugfs_regs_smc_write(struct file *f, const char __user *
size -= 4;
}
- pm_runtime_mark_last_busy(adev_to_drm(adev)->dev);
+ r = result;
+out:
pm_runtime_put_autosuspend(adev_to_drm(adev)->dev);
-
amdgpu_virt_disable_access_debugfs(adev);
- return result;
+ return r;
}
/**
@@ -707,7 +880,7 @@ static ssize_t amdgpu_debugfs_gca_config_read(struct file *f, char __user *buf,
return -ENOMEM;
/* version, increment each time something is added */
- config[no_regs++] = 3;
+ config[no_regs++] = 5;
config[no_regs++] = adev->gfx.config.max_shader_engines;
config[no_regs++] = adev->gfx.config.max_tile_pipes;
config[no_regs++] = adev->gfx.config.max_cu_per_sh;
@@ -735,7 +908,7 @@ static ssize_t amdgpu_debugfs_gca_config_read(struct file *f, char __user *buf,
/* rev==1 */
config[no_regs++] = adev->rev_id;
config[no_regs++] = adev->pg_flags;
- config[no_regs++] = adev->cg_flags;
+ config[no_regs++] = lower_32_bits(adev->cg_flags);
/* rev==2 */
config[no_regs++] = adev->family;
@@ -747,6 +920,13 @@ static ssize_t amdgpu_debugfs_gca_config_read(struct file *f, char __user *buf,
config[no_regs++] = adev->pdev->subsystem_device;
config[no_regs++] = adev->pdev->subsystem_vendor;
+ /* rev==4 APU flag */
+ config[no_regs++] = adev->flags & AMD_IS_APU ? 1 : 0;
+
+ /* rev==5 PG/CG flag upper 32bit */
+ config[no_regs++] = 0;
+ config[no_regs++] = upper_32_bits(adev->cg_flags);
+
while (size && (*pos < no_regs * 4)) {
uint32_t value;
@@ -812,7 +992,6 @@ static ssize_t amdgpu_debugfs_sensor_read(struct file *f, char __user *buf,
r = amdgpu_dpm_read_sensor(adev, idx, &values[0], &valuesize);
- pm_runtime_mark_last_busy(adev_to_drm(adev)->dev);
pm_runtime_put_autosuspend(adev_to_drm(adev)->dev);
if (r) {
@@ -850,7 +1029,7 @@ static ssize_t amdgpu_debugfs_sensor_read(struct file *f, char __user *buf,
* The offset being sought changes which wave that the status data
* will be returned for. The bits are used as follows:
*
- * Bits 0..6: Byte offset into data
+ * Bits 0..6: Byte offset into data
* Bits 7..14: SE selector
* Bits 15..22: SH/SA selector
* Bits 23..30: CU/{WGP+SIMD} selector
@@ -894,16 +1073,15 @@ static ssize_t amdgpu_debugfs_wave_read(struct file *f, char __user *buf,
/* switch to the specific se/sh/cu */
mutex_lock(&adev->grbm_idx_mutex);
- amdgpu_gfx_select_se_sh(adev, se, sh, cu);
+ amdgpu_gfx_select_se_sh(adev, se, sh, cu, 0);
x = 0;
if (adev->gfx.funcs->read_wave_data)
- adev->gfx.funcs->read_wave_data(adev, simd, wave, data, &x);
+ adev->gfx.funcs->read_wave_data(adev, 0, simd, wave, data, &x);
- amdgpu_gfx_select_se_sh(adev, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF);
+ amdgpu_gfx_select_se_sh(adev, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0);
mutex_unlock(&adev->grbm_idx_mutex);
- pm_runtime_mark_last_busy(adev_to_drm(adev)->dev);
pm_runtime_put_autosuspend(adev_to_drm(adev)->dev);
if (!x) {
@@ -988,20 +1166,19 @@ static ssize_t amdgpu_debugfs_gpr_read(struct file *f, char __user *buf,
/* switch to the specific se/sh/cu */
mutex_lock(&adev->grbm_idx_mutex);
- amdgpu_gfx_select_se_sh(adev, se, sh, cu);
+ amdgpu_gfx_select_se_sh(adev, se, sh, cu, 0);
if (bank == 0) {
if (adev->gfx.funcs->read_wave_vgprs)
- adev->gfx.funcs->read_wave_vgprs(adev, simd, wave, thread, offset, size>>2, data);
+ adev->gfx.funcs->read_wave_vgprs(adev, 0, simd, wave, thread, offset, size>>2, data);
} else {
if (adev->gfx.funcs->read_wave_sgprs)
- adev->gfx.funcs->read_wave_sgprs(adev, simd, wave, offset, size>>2, data);
+ adev->gfx.funcs->read_wave_sgprs(adev, 0, simd, wave, offset, size>>2, data);
}
- amdgpu_gfx_select_se_sh(adev, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF);
+ amdgpu_gfx_select_se_sh(adev, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0);
mutex_unlock(&adev->grbm_idx_mutex);
- pm_runtime_mark_last_busy(adev_to_drm(adev)->dev);
pm_runtime_put_autosuspend(adev_to_drm(adev)->dev);
while (size) {
@@ -1030,7 +1207,155 @@ err:
}
/**
- * amdgpu_debugfs_regs_gfxoff_write - Enable/disable GFXOFF
+ * amdgpu_debugfs_gfxoff_residency_read - Read GFXOFF residency
+ *
+ * @f: open file handle
+ * @buf: User buffer to store read data in
+ * @size: Number of bytes to read
+ * @pos: Offset to seek to
+ *
+ * Read the last residency value logged. It doesn't auto update, one needs to
+ * stop logging before getting the current value.
+ */
+static ssize_t amdgpu_debugfs_gfxoff_residency_read(struct file *f, char __user *buf,
+ size_t size, loff_t *pos)
+{
+ struct amdgpu_device *adev = file_inode(f)->i_private;
+ ssize_t result = 0;
+ int r;
+
+ if (size & 0x3 || *pos & 0x3)
+ return -EINVAL;
+
+ r = pm_runtime_get_sync(adev_to_drm(adev)->dev);
+ if (r < 0) {
+ pm_runtime_put_autosuspend(adev_to_drm(adev)->dev);
+ return r;
+ }
+
+ while (size) {
+ uint32_t value;
+
+ r = amdgpu_get_gfx_off_residency(adev, &value);
+ if (r)
+ goto out;
+
+ r = put_user(value, (uint32_t *)buf);
+ if (r)
+ goto out;
+
+ result += 4;
+ buf += 4;
+ *pos += 4;
+ size -= 4;
+ }
+
+ r = result;
+out:
+ pm_runtime_put_autosuspend(adev_to_drm(adev)->dev);
+
+ return r;
+}
+
+/**
+ * amdgpu_debugfs_gfxoff_residency_write - Log GFXOFF Residency
+ *
+ * @f: open file handle
+ * @buf: User buffer to write data from
+ * @size: Number of bytes to write
+ * @pos: Offset to seek to
+ *
+ * Write a 32-bit non-zero to start logging; write a 32-bit zero to stop
+ */
+static ssize_t amdgpu_debugfs_gfxoff_residency_write(struct file *f, const char __user *buf,
+ size_t size, loff_t *pos)
+{
+ struct amdgpu_device *adev = file_inode(f)->i_private;
+ ssize_t result = 0;
+ int r;
+
+ if (size & 0x3 || *pos & 0x3)
+ return -EINVAL;
+
+ r = pm_runtime_get_sync(adev_to_drm(adev)->dev);
+ if (r < 0) {
+ pm_runtime_put_autosuspend(adev_to_drm(adev)->dev);
+ return r;
+ }
+
+ while (size) {
+ u32 value;
+
+ r = get_user(value, (uint32_t *)buf);
+ if (r)
+ goto out;
+
+ amdgpu_set_gfx_off_residency(adev, value ? true : false);
+
+ result += 4;
+ buf += 4;
+ *pos += 4;
+ size -= 4;
+ }
+
+ r = result;
+out:
+ pm_runtime_put_autosuspend(adev_to_drm(adev)->dev);
+
+ return r;
+}
+
+
+/**
+ * amdgpu_debugfs_gfxoff_count_read - Read GFXOFF entry count
+ *
+ * @f: open file handle
+ * @buf: User buffer to store read data in
+ * @size: Number of bytes to read
+ * @pos: Offset to seek to
+ */
+static ssize_t amdgpu_debugfs_gfxoff_count_read(struct file *f, char __user *buf,
+ size_t size, loff_t *pos)
+{
+ struct amdgpu_device *adev = file_inode(f)->i_private;
+ ssize_t result = 0;
+ int r;
+
+ if (size & 0x3 || *pos & 0x3)
+ return -EINVAL;
+
+ r = pm_runtime_get_sync(adev_to_drm(adev)->dev);
+ if (r < 0) {
+ pm_runtime_put_autosuspend(adev_to_drm(adev)->dev);
+ return r;
+ }
+
+ while (size) {
+ u64 value = 0;
+
+ r = amdgpu_get_gfx_off_entrycount(adev, &value);
+ if (r)
+ goto out;
+
+ r = put_user(value, (u64 *)buf);
+ if (r)
+ goto out;
+
+ result += 4;
+ buf += 4;
+ *pos += 4;
+ size -= 4;
+ }
+
+ r = result;
+out:
+ pm_runtime_put_autosuspend(adev_to_drm(adev)->dev);
+
+ return r;
+}
+
+/**
+ * amdgpu_debugfs_gfxoff_write - Enable/disable GFXOFF
*
* @f: open file handle
* @buf: User buffer to write data from
@@ -1059,11 +1384,8 @@ static ssize_t amdgpu_debugfs_gfxoff_write(struct file *f, const char __user *bu
uint32_t value;
r = get_user(value, (uint32_t *)buf);
- if (r) {
- pm_runtime_mark_last_busy(adev_to_drm(adev)->dev);
- pm_runtime_put_autosuspend(adev_to_drm(adev)->dev);
- return r;
- }
+ if (r)
+ goto out;
amdgpu_gfx_off_ctrl(adev, value ? true : false);
@@ -1073,15 +1395,16 @@ static ssize_t amdgpu_debugfs_gfxoff_write(struct file *f, const char __user *bu
size -= 4;
}
- pm_runtime_mark_last_busy(adev_to_drm(adev)->dev);
+ r = result;
+out:
pm_runtime_put_autosuspend(adev_to_drm(adev)->dev);
- return result;
+ return r;
}
/**
- * amdgpu_debugfs_regs_gfxoff_status - read gfxoff status
+ * amdgpu_debugfs_gfxoff_read - read gfxoff status
*
* @f: open file handle
* @buf: User buffer to store read data in
@@ -1099,25 +1422,57 @@ static ssize_t amdgpu_debugfs_gfxoff_read(struct file *f, char __user *buf,
return -EINVAL;
r = pm_runtime_get_sync(adev_to_drm(adev)->dev);
- if (r < 0)
+ if (r < 0) {
+ pm_runtime_put_autosuspend(adev_to_drm(adev)->dev);
return r;
+ }
while (size) {
- uint32_t value;
+ u32 value = adev->gfx.gfx_off_state;
+
+ r = put_user(value, (u32 *)buf);
+ if (r)
+ goto out;
+
+ result += 4;
+ buf += 4;
+ *pos += 4;
+ size -= 4;
+ }
+
+ r = result;
+out:
+ pm_runtime_put_autosuspend(adev_to_drm(adev)->dev);
+
+ return r;
+}
+
+static ssize_t amdgpu_debugfs_gfxoff_status_read(struct file *f, char __user *buf,
+ size_t size, loff_t *pos)
+{
+ struct amdgpu_device *adev = file_inode(f)->i_private;
+ ssize_t result = 0;
+ int r;
+
+ if (size & 0x3 || *pos & 0x3)
+ return -EINVAL;
+
+ r = pm_runtime_get_sync(adev_to_drm(adev)->dev);
+ if (r < 0) {
+ pm_runtime_put_autosuspend(adev_to_drm(adev)->dev);
+ return r;
+ }
+
+ while (size) {
+ u32 value;
r = amdgpu_get_gfx_off_status(adev, &value);
- if (r) {
- pm_runtime_mark_last_busy(adev_to_drm(adev)->dev);
- pm_runtime_put_autosuspend(adev_to_drm(adev)->dev);
- return r;
- }
+ if (r)
+ goto out;
- r = put_user(value, (uint32_t *)buf);
- if (r) {
- pm_runtime_mark_last_busy(adev_to_drm(adev)->dev);
- pm_runtime_put_autosuspend(adev_to_drm(adev)->dev);
- return r;
- }
+ r = put_user(value, (u32 *)buf);
+ if (r)
+ goto out;
result += 4;
buf += 4;
@@ -1125,12 +1480,32 @@ static ssize_t amdgpu_debugfs_gfxoff_read(struct file *f, char __user *buf,
size -= 4;
}
- pm_runtime_mark_last_busy(adev_to_drm(adev)->dev);
+ r = result;
+out:
pm_runtime_put_autosuspend(adev_to_drm(adev)->dev);
- return result;
+ return r;
}
+static const struct file_operations amdgpu_debugfs_regs2_fops = {
+ .owner = THIS_MODULE,
+ .unlocked_ioctl = amdgpu_debugfs_regs2_ioctl,
+ .read = amdgpu_debugfs_regs2_read,
+ .write = amdgpu_debugfs_regs2_write,
+ .open = amdgpu_debugfs_regs2_open,
+ .release = amdgpu_debugfs_regs2_release,
+ .llseek = default_llseek
+};
+
+static const struct file_operations amdgpu_debugfs_gprwave_fops = {
+ .owner = THIS_MODULE,
+ .unlocked_ioctl = amdgpu_debugfs_gprwave_ioctl,
+ .read = amdgpu_debugfs_gprwave_read,
+ .open = amdgpu_debugfs_gprwave_open,
+ .release = amdgpu_debugfs_gprwave_release,
+ .llseek = default_llseek
+};
+
static const struct file_operations amdgpu_debugfs_regs_fops = {
.owner = THIS_MODULE,
.read = amdgpu_debugfs_regs_read,
@@ -1186,8 +1561,29 @@ static const struct file_operations amdgpu_debugfs_gfxoff_fops = {
.llseek = default_llseek
};
+static const struct file_operations amdgpu_debugfs_gfxoff_status_fops = {
+ .owner = THIS_MODULE,
+ .read = amdgpu_debugfs_gfxoff_status_read,
+ .llseek = default_llseek
+};
+
+static const struct file_operations amdgpu_debugfs_gfxoff_count_fops = {
+ .owner = THIS_MODULE,
+ .read = amdgpu_debugfs_gfxoff_count_read,
+ .llseek = default_llseek
+};
+
+static const struct file_operations amdgpu_debugfs_gfxoff_residency_fops = {
+ .owner = THIS_MODULE,
+ .read = amdgpu_debugfs_gfxoff_residency_read,
+ .write = amdgpu_debugfs_gfxoff_residency_write,
+ .llseek = default_llseek
+};
+
static const struct file_operations *debugfs_regs[] = {
&amdgpu_debugfs_regs_fops,
+ &amdgpu_debugfs_regs2_fops,
+ &amdgpu_debugfs_gprwave_fops,
&amdgpu_debugfs_regs_didt_fops,
&amdgpu_debugfs_regs_pcie_fops,
&amdgpu_debugfs_regs_smc_fops,
@@ -1196,10 +1592,15 @@ static const struct file_operations *debugfs_regs[] = {
&amdgpu_debugfs_wave_fops,
&amdgpu_debugfs_gpr_fops,
&amdgpu_debugfs_gfxoff_fops,
+ &amdgpu_debugfs_gfxoff_status_fops,
+ &amdgpu_debugfs_gfxoff_count_fops,
+ &amdgpu_debugfs_gfxoff_residency_fops,
};
-static const char *debugfs_regs_names[] = {
+static const char * const debugfs_regs_names[] = {
"amdgpu_regs",
+ "amdgpu_regs2",
+ "amdgpu_gprwave",
"amdgpu_regs_didt",
"amdgpu_regs_pcie",
"amdgpu_regs_smc",
@@ -1208,11 +1609,14 @@ static const char *debugfs_regs_names[] = {
"amdgpu_wave",
"amdgpu_gpr",
"amdgpu_gfxoff",
+ "amdgpu_gfxoff_status",
+ "amdgpu_gfxoff_count",
+ "amdgpu_gfxoff_residency",
};
/**
* amdgpu_debugfs_regs_init - Initialize debugfs entries that provide
- * register access.
+ * register access.
*
* @adev: The device to attach the debugfs entries to
*/
@@ -1224,31 +1628,29 @@ int amdgpu_debugfs_regs_init(struct amdgpu_device *adev)
for (i = 0; i < ARRAY_SIZE(debugfs_regs); i++) {
ent = debugfs_create_file(debugfs_regs_names[i],
- S_IFREG | S_IRUGO, root,
+ S_IFREG | 0400, root,
adev, debugfs_regs[i]);
if (!i && !IS_ERR_OR_NULL(ent))
i_size_write(ent->d_inode, adev->rmmio_size);
- adev->debugfs_regs[i] = ent;
}
return 0;
}
-static int amdgpu_debugfs_test_ib(struct seq_file *m, void *data)
+static int amdgpu_debugfs_test_ib_show(struct seq_file *m, void *unused)
{
- struct drm_info_node *node = (struct drm_info_node *) m->private;
- struct drm_device *dev = node->minor->dev;
- struct amdgpu_device *adev = drm_to_adev(dev);
+ struct amdgpu_device *adev = m->private;
+ struct drm_device *dev = adev_to_drm(adev);
int r = 0, i;
r = pm_runtime_get_sync(dev->dev);
if (r < 0) {
- pm_runtime_put_autosuspend(adev_to_drm(adev)->dev);
+ pm_runtime_put_autosuspend(dev->dev);
return r;
}
/* Avoid accidently unparking the sched thread during GPU reset */
- r = down_read_killable(&adev->reset_sem);
+ r = down_write_killable(&adev->reset_domain->sem);
if (r)
return r;
@@ -1256,94 +1658,96 @@ static int amdgpu_debugfs_test_ib(struct seq_file *m, void *data)
for (i = 0; i < AMDGPU_MAX_RINGS; i++) {
struct amdgpu_ring *ring = adev->rings[i];
- if (!ring || !ring->sched.thread)
+ if (!amdgpu_ring_sched_ready(ring))
continue;
- kthread_park(ring->sched.thread);
+ drm_sched_wqueue_stop(&ring->sched);
}
- seq_printf(m, "run ib test:\n");
+ seq_puts(m, "run ib test:\n");
r = amdgpu_ib_ring_tests(adev);
if (r)
seq_printf(m, "ib ring tests failed (%d).\n", r);
else
- seq_printf(m, "ib ring tests passed.\n");
+ seq_puts(m, "ib ring tests passed.\n");
/* go on the scheduler */
for (i = 0; i < AMDGPU_MAX_RINGS; i++) {
struct amdgpu_ring *ring = adev->rings[i];
- if (!ring || !ring->sched.thread)
+ if (!amdgpu_ring_sched_ready(ring))
continue;
- kthread_unpark(ring->sched.thread);
+ drm_sched_wqueue_start(&ring->sched);
}
- up_read(&adev->reset_sem);
+ up_write(&adev->reset_domain->sem);
- pm_runtime_mark_last_busy(dev->dev);
pm_runtime_put_autosuspend(dev->dev);
return 0;
}
-static int amdgpu_debugfs_get_vbios_dump(struct seq_file *m, void *data)
+static int amdgpu_debugfs_evict_vram(void *data, u64 *val)
{
- struct drm_info_node *node = (struct drm_info_node *) m->private;
- struct drm_device *dev = node->minor->dev;
- struct amdgpu_device *adev = drm_to_adev(dev);
+ struct amdgpu_device *adev = (struct amdgpu_device *)data;
+ struct drm_device *dev = adev_to_drm(adev);
+ int r;
+
+ r = pm_runtime_get_sync(dev->dev);
+ if (r < 0) {
+ pm_runtime_put_autosuspend(dev->dev);
+ return r;
+ }
+
+ *val = amdgpu_ttm_evict_resources(adev, TTM_PL_VRAM);
+
+ pm_runtime_put_autosuspend(dev->dev);
- seq_write(m, adev->bios, adev->bios_size);
return 0;
}
-static int amdgpu_debugfs_evict_vram(struct seq_file *m, void *data)
+
+static int amdgpu_debugfs_evict_gtt(void *data, u64 *val)
{
- struct drm_info_node *node = (struct drm_info_node *)m->private;
- struct drm_device *dev = node->minor->dev;
- struct amdgpu_device *adev = drm_to_adev(dev);
+ struct amdgpu_device *adev = (struct amdgpu_device *)data;
+ struct drm_device *dev = adev_to_drm(adev);
int r;
r = pm_runtime_get_sync(dev->dev);
if (r < 0) {
- pm_runtime_put_autosuspend(adev_to_drm(adev)->dev);
+ pm_runtime_put_autosuspend(dev->dev);
return r;
}
- seq_printf(m, "(%d)\n", amdgpu_bo_evict_vram(adev));
+ *val = amdgpu_ttm_evict_resources(adev, TTM_PL_TT);
- pm_runtime_mark_last_busy(dev->dev);
pm_runtime_put_autosuspend(dev->dev);
return 0;
}
-static int amdgpu_debugfs_evict_gtt(struct seq_file *m, void *data)
+static int amdgpu_debugfs_benchmark(void *data, u64 val)
{
- struct drm_info_node *node = (struct drm_info_node *)m->private;
- struct drm_device *dev = node->minor->dev;
- struct amdgpu_device *adev = drm_to_adev(dev);
- struct ttm_resource_manager *man;
+ struct amdgpu_device *adev = (struct amdgpu_device *)data;
+ struct drm_device *dev = adev_to_drm(adev);
int r;
r = pm_runtime_get_sync(dev->dev);
if (r < 0) {
- pm_runtime_put_autosuspend(adev_to_drm(adev)->dev);
+ pm_runtime_put_autosuspend(dev->dev);
return r;
}
- man = ttm_manager_type(&adev->mman.bdev, TTM_PL_TT);
- r = ttm_resource_manager_evict_all(&adev->mman.bdev, man);
- seq_printf(m, "(%d)\n", r);
+ r = amdgpu_benchmark(adev, val);
- pm_runtime_mark_last_busy(dev->dev);
pm_runtime_put_autosuspend(dev->dev);
- return 0;
+ return r;
}
-static int amdgpu_debugfs_vm_info(struct seq_file *m, void *data)
+static int amdgpu_debugfs_vm_info_show(struct seq_file *m, void *unused)
{
- struct drm_info_node *node = (struct drm_info_node *)m->private;
- struct drm_device *dev = node->minor->dev;
+ struct amdgpu_device *adev = m->private;
+ struct drm_device *dev = adev_to_drm(adev);
struct drm_file *file;
int r;
@@ -1354,14 +1758,19 @@ static int amdgpu_debugfs_vm_info(struct seq_file *m, void *data)
list_for_each_entry(file, &dev->filelist, lhead) {
struct amdgpu_fpriv *fpriv = file->driver_priv;
struct amdgpu_vm *vm = &fpriv->vm;
+ struct amdgpu_task_info *ti;
+
+ ti = amdgpu_vm_get_task_info_vm(vm);
+ if (ti) {
+ seq_printf(m, "pid:%d\tProcess:%s ----------\n", ti->task.pid, ti->process_name);
+ amdgpu_vm_put_task_info(ti);
+ }
- seq_printf(m, "pid:%d\tProcess:%s ----------\n",
- vm->task_info.pid, vm->task_info.process_name);
- r = amdgpu_bo_reserve(vm->root.base.bo, true);
+ r = amdgpu_bo_reserve(vm->root.bo, true);
if (r)
break;
amdgpu_debugfs_vm_bo_info(vm, m);
- amdgpu_bo_unreserve(vm->root.base.bo);
+ amdgpu_bo_unreserve(vm->root.bo);
}
mutex_unlock(&dev->filelist_mutex);
@@ -1369,13 +1778,14 @@ static int amdgpu_debugfs_vm_info(struct seq_file *m, void *data)
return r;
}
-static const struct drm_info_list amdgpu_debugfs_list[] = {
- {"amdgpu_vbios", amdgpu_debugfs_get_vbios_dump},
- {"amdgpu_test_ib", &amdgpu_debugfs_test_ib},
- {"amdgpu_evict_vram", &amdgpu_debugfs_evict_vram},
- {"amdgpu_evict_gtt", &amdgpu_debugfs_evict_gtt},
- {"amdgpu_vm_info", &amdgpu_debugfs_vm_info},
-};
+DEFINE_SHOW_ATTRIBUTE(amdgpu_debugfs_test_ib);
+DEFINE_SHOW_ATTRIBUTE(amdgpu_debugfs_vm_info);
+DEFINE_DEBUGFS_ATTRIBUTE(amdgpu_evict_vram_fops, amdgpu_debugfs_evict_vram,
+ NULL, "%lld\n");
+DEFINE_DEBUGFS_ATTRIBUTE(amdgpu_evict_gtt_fops, amdgpu_debugfs_evict_gtt,
+ NULL, "%lld\n");
+DEFINE_DEBUGFS_ATTRIBUTE(amdgpu_benchmark_fops, NULL, amdgpu_debugfs_benchmark,
+ "%lld\n");
static void amdgpu_ib_preempt_fences_swap(struct amdgpu_ring *ring,
struct dma_fence **fences)
@@ -1468,7 +1878,7 @@ no_preempt:
continue;
}
job = to_amdgpu_job(s_job);
- if (preempted && job->fence == fence)
+ if (preempted && (&job->hw_fence->base) == fence)
/* mark the job as preempted */
job->preemption_status |= AMDGPU_IB_PREEMPTED;
}
@@ -1477,7 +1887,7 @@ no_preempt:
static int amdgpu_debugfs_ib_preempt(void *data, u64 val)
{
- int r, resched, length;
+ int r, length;
struct amdgpu_ring *ring;
struct dma_fence **fences = NULL;
struct amdgpu_device *adev = (struct amdgpu_device *)data;
@@ -1487,7 +1897,8 @@ static int amdgpu_debugfs_ib_preempt(void *data, u64 val)
ring = adev->rings[val];
- if (!ring || !ring->funcs->preempt_ib || !ring->sched.thread)
+ if (!amdgpu_ring_sched_ready(ring) ||
+ !ring->funcs->preempt_ib)
return -EINVAL;
/* the last preemption failed */
@@ -1500,14 +1911,12 @@ static int amdgpu_debugfs_ib_preempt(void *data, u64 val)
return -ENOMEM;
/* Avoid accidently unparking the sched thread during GPU reset */
- r = down_read_killable(&adev->reset_sem);
+ r = down_read_killable(&adev->reset_domain->sem);
if (r)
goto pro_end;
/* stop the scheduler */
- kthread_park(ring->sched.thread);
-
- resched = ttm_bo_lock_delayed_workqueue(&adev->mman.bdev);
+ drm_sched_wqueue_stop(&ring->sched);
/* preempt the IB */
r = amdgpu_ring_preempt_ib(ring);
@@ -1541,11 +1950,9 @@ static int amdgpu_debugfs_ib_preempt(void *data, u64 val)
failure:
/* restart the scheduler */
- kthread_unpark(ring->sched.thread);
-
- up_read(&adev->reset_sem);
+ drm_sched_wqueue_start(&ring->sched);
- ttm_bo_unlock_delayed_workqueue(&adev->mman.bdev, resched);
+ up_read(&adev->reset_domain->sem);
pro_end:
kfree(fences);
@@ -1559,7 +1966,7 @@ static int amdgpu_debugfs_sclk_set(void *data, u64 val)
uint32_t max_freq, min_freq;
struct amdgpu_device *adev = (struct amdgpu_device *)data;
- if (amdgpu_sriov_vf(adev) && !amdgpu_sriov_is_pp_one_vf(adev))
+ if (amdgpu_sriov_multi_vf_mode(adev))
return -EINVAL;
ret = pm_runtime_get_sync(adev_to_drm(adev)->dev);
@@ -1568,89 +1975,77 @@ static int amdgpu_debugfs_sclk_set(void *data, u64 val)
return ret;
}
- if (is_support_sw_smu(adev)) {
- ret = smu_get_dpm_freq_range(&adev->smu, SMU_SCLK, &min_freq, &max_freq);
- if (ret || val > max_freq || val < min_freq)
- return -EINVAL;
- ret = smu_set_soft_freq_range(&adev->smu, SMU_SCLK, (uint32_t)val, (uint32_t)val);
- } else {
- return 0;
+ ret = amdgpu_dpm_get_dpm_freq_range(adev, PP_SCLK, &min_freq, &max_freq);
+ if (ret == -EOPNOTSUPP) {
+ ret = 0;
+ goto out;
+ }
+ if (ret || val > max_freq || val < min_freq) {
+ ret = -EINVAL;
+ goto out;
}
- pm_runtime_mark_last_busy(adev_to_drm(adev)->dev);
- pm_runtime_put_autosuspend(adev_to_drm(adev)->dev);
-
+ ret = amdgpu_dpm_set_soft_freq_range(adev, PP_SCLK, (uint32_t)val, (uint32_t)val);
if (ret)
- return -EINVAL;
+ ret = -EINVAL;
- return 0;
+out:
+ pm_runtime_put_autosuspend(adev_to_drm(adev)->dev);
+
+ return ret;
}
-DEFINE_SIMPLE_ATTRIBUTE(fops_ib_preempt, NULL,
+DEFINE_DEBUGFS_ATTRIBUTE(fops_ib_preempt, NULL,
amdgpu_debugfs_ib_preempt, "%llu\n");
-DEFINE_SIMPLE_ATTRIBUTE(fops_sclk_set, NULL,
+DEFINE_DEBUGFS_ATTRIBUTE(fops_sclk_set, NULL,
amdgpu_debugfs_sclk_set, "%llu\n");
int amdgpu_debugfs_init(struct amdgpu_device *adev)
{
+ struct dentry *root = adev_to_drm(adev)->primary->debugfs_root;
+ struct dentry *ent;
int r, i;
- adev->debugfs_preempt =
- debugfs_create_file("amdgpu_preempt_ib", 0600,
- adev_to_drm(adev)->primary->debugfs_root, adev,
- &fops_ib_preempt);
- if (!(adev->debugfs_preempt)) {
+ if (!debugfs_initialized())
+ return 0;
+
+ debugfs_create_x32("amdgpu_smu_debug", 0600, root,
+ &adev->pm.smu_debug_mask);
+
+ ent = debugfs_create_file("amdgpu_preempt_ib", 0600, root, adev,
+ &fops_ib_preempt);
+ if (IS_ERR(ent)) {
DRM_ERROR("unable to create amdgpu_preempt_ib debugsfs file\n");
- return -EIO;
+ return PTR_ERR(ent);
}
- adev->smu.debugfs_sclk =
- debugfs_create_file("amdgpu_force_sclk", 0200,
- adev_to_drm(adev)->primary->debugfs_root, adev,
- &fops_sclk_set);
- if (!(adev->smu.debugfs_sclk)) {
+ ent = debugfs_create_file("amdgpu_force_sclk", 0200, root, adev,
+ &fops_sclk_set);
+ if (IS_ERR(ent)) {
DRM_ERROR("unable to create amdgpu_set_sclk debugsfs file\n");
- return -EIO;
+ return PTR_ERR(ent);
}
/* Register debugfs entries for amdgpu_ttm */
- r = amdgpu_ttm_debugfs_init(adev);
- if (r) {
- DRM_ERROR("Failed to init debugfs\n");
- return r;
- }
-
- r = amdgpu_debugfs_pm_init(adev);
- if (r) {
- DRM_ERROR("Failed to register debugfs file for dpm!\n");
- return r;
- }
-
- if (amdgpu_debugfs_sa_init(adev)) {
- dev_err(adev->dev, "failed to register debugfs file for SA\n");
- }
-
- if (amdgpu_debugfs_fence_init(adev))
- dev_err(adev->dev, "fence debugfs file creation failed\n");
-
- r = amdgpu_debugfs_gem_init(adev);
- if (r)
- DRM_ERROR("registering gem debugfs failed (%d).\n", r);
+ amdgpu_ttm_debugfs_init(adev);
+ amdgpu_debugfs_pm_init(adev);
+ amdgpu_debugfs_sa_init(adev);
+ amdgpu_debugfs_fence_init(adev);
+ amdgpu_debugfs_gem_init(adev);
r = amdgpu_debugfs_regs_init(adev);
if (r)
DRM_ERROR("registering register debugfs failed (%d).\n", r);
- r = amdgpu_debugfs_firmware_init(adev);
- if (r)
- DRM_ERROR("registering firmware debugfs failed (%d).\n", r);
+ amdgpu_debugfs_firmware_init(adev);
+ amdgpu_ta_if_debugfs_init(adev);
+
+ amdgpu_debugfs_mes_event_log_init(adev);
#if defined(CONFIG_DRM_AMD_DC)
- if (amdgpu_device_has_dc_support(adev)) {
- if (dtn_debugfs_init(adev))
- DRM_ERROR("amdgpu: failed initialize dtn debugfs support.\n");
- }
+ if (adev->dc_enabled)
+ dtn_debugfs_init(adev);
#endif
for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
@@ -1659,23 +2054,110 @@ int amdgpu_debugfs_init(struct amdgpu_device *adev)
if (!ring)
continue;
- if (amdgpu_debugfs_ring_init(adev, ring)) {
- DRM_ERROR("Failed to register debugfs file for rings !\n");
- }
+ amdgpu_debugfs_ring_init(adev, ring);
}
- amdgpu_ras_debugfs_create_all(adev);
+ for (i = 0; i < adev->vcn.num_vcn_inst; i++) {
+ if (!amdgpu_vcnfw_log)
+ break;
- amdgpu_debugfs_autodump_init(adev);
+ if (adev->vcn.harvest_config & (1 << i))
+ continue;
- amdgpu_rap_debugfs_init(adev);
+ amdgpu_debugfs_vcn_fwlog_init(adev, i, &adev->vcn.inst[i]);
+ }
- amdgpu_securedisplay_debugfs_init(adev);
+ if (amdgpu_umsch_mm & amdgpu_umsch_mm_fwlog)
+ amdgpu_debugfs_umsch_fwlog_init(adev, &adev->umsch_mm);
+ amdgpu_debugfs_vcn_sched_mask_init(adev);
+ amdgpu_debugfs_jpeg_sched_mask_init(adev);
+ amdgpu_debugfs_gfx_sched_mask_init(adev);
+ amdgpu_debugfs_compute_sched_mask_init(adev);
+ amdgpu_debugfs_sdma_sched_mask_init(adev);
+
+ amdgpu_ras_debugfs_create_all(adev);
+ amdgpu_rap_debugfs_init(adev);
+ amdgpu_securedisplay_debugfs_init(adev);
amdgpu_fw_attestation_debugfs_init(adev);
+ amdgpu_psp_debugfs_init(adev);
+
+ debugfs_create_file("amdgpu_evict_vram", 0400, root, adev,
+ &amdgpu_evict_vram_fops);
+ debugfs_create_file("amdgpu_evict_gtt", 0400, root, adev,
+ &amdgpu_evict_gtt_fops);
+ debugfs_create_file("amdgpu_test_ib", 0400, root, adev,
+ &amdgpu_debugfs_test_ib_fops);
+ debugfs_create_file("amdgpu_vm_info", 0444, root, adev,
+ &amdgpu_debugfs_vm_info_fops);
+ debugfs_create_file("amdgpu_benchmark", 0200, root, adev,
+ &amdgpu_benchmark_fops);
+
+ adev->debugfs_vbios_blob.data = adev->bios;
+ adev->debugfs_vbios_blob.size = adev->bios_size;
+ debugfs_create_blob("amdgpu_vbios", 0444, root,
+ &adev->debugfs_vbios_blob);
+
+ if (adev->discovery.debugfs_blob.size)
+ debugfs_create_blob("amdgpu_discovery", 0444, root,
+ &adev->discovery.debugfs_blob);
- return amdgpu_debugfs_add_files(adev, amdgpu_debugfs_list,
- ARRAY_SIZE(amdgpu_debugfs_list));
+ return 0;
+}
+
+static int amdgpu_pt_info_read(struct seq_file *m, void *unused)
+{
+ struct drm_file *file;
+ struct amdgpu_fpriv *fpriv;
+ struct amdgpu_bo *root_bo;
+ struct amdgpu_device *adev;
+ int r;
+
+ file = m->private;
+ if (!file)
+ return -EINVAL;
+
+ adev = drm_to_adev(file->minor->dev);
+ fpriv = file->driver_priv;
+ if (!fpriv || !fpriv->vm.root.bo)
+ return -ENODEV;
+
+ root_bo = amdgpu_bo_ref(fpriv->vm.root.bo);
+ r = amdgpu_bo_reserve(root_bo, true);
+ if (r) {
+ amdgpu_bo_unref(&root_bo);
+ return -EINVAL;
+ }
+
+ seq_printf(m, "pd_address: 0x%llx\n", amdgpu_gmc_pd_addr(fpriv->vm.root.bo));
+ seq_printf(m, "max_pfn: 0x%llx\n", adev->vm_manager.max_pfn);
+ seq_printf(m, "num_level: 0x%x\n", adev->vm_manager.num_level);
+ seq_printf(m, "block_size: 0x%x\n", adev->vm_manager.block_size);
+ seq_printf(m, "fragment_size: 0x%x\n", adev->vm_manager.fragment_size);
+
+ amdgpu_bo_unreserve(root_bo);
+ amdgpu_bo_unref(&root_bo);
+
+ return 0;
+}
+
+static int amdgpu_pt_info_open(struct inode *inode, struct file *file)
+{
+ return single_open(file, amdgpu_pt_info_read, inode->i_private);
+}
+
+static const struct file_operations amdgpu_pt_info_fops = {
+ .owner = THIS_MODULE,
+ .open = amdgpu_pt_info_open,
+ .read = seq_read,
+ .llseek = seq_lseek,
+ .release = single_release,
+};
+
+void amdgpu_debugfs_vm_init(struct drm_file *file)
+{
+ debugfs_create_file("vm_pagetable_info", 0444, file->debugfs_client, file,
+ &amdgpu_pt_info_fops);
}
#else
@@ -1687,4 +2169,7 @@ int amdgpu_debugfs_regs_init(struct amdgpu_device *adev)
{
return 0;
}
+void amdgpu_debugfs_vm_init(struct drm_file *file)
+{
+}
#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.h
index 2803884d338d..e7b3c38e5186 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.h
@@ -22,27 +22,16 @@
* OTHER DEALINGS IN THE SOFTWARE.
*
*/
-
/*
* Debugfs
*/
-struct amdgpu_debugfs {
- const struct drm_info_list *files;
- unsigned num_files;
-};
-
-struct amdgpu_autodump {
- struct completion dumping;
- struct wait_queue_head gpu_hang;
-};
int amdgpu_debugfs_regs_init(struct amdgpu_device *adev);
int amdgpu_debugfs_init(struct amdgpu_device *adev);
void amdgpu_debugfs_fini(struct amdgpu_device *adev);
-int amdgpu_debugfs_add_files(struct amdgpu_device *adev,
- const struct drm_info_list *files,
- unsigned nfiles);
-int amdgpu_debugfs_fence_init(struct amdgpu_device *adev);
-int amdgpu_debugfs_firmware_init(struct amdgpu_device *adev);
-int amdgpu_debugfs_gem_init(struct amdgpu_device *adev);
-int amdgpu_debugfs_wait_dump(struct amdgpu_device *adev);
+void amdgpu_debugfs_fence_init(struct amdgpu_device *adev);
+void amdgpu_debugfs_firmware_init(struct amdgpu_device *adev);
+void amdgpu_debugfs_gem_init(struct amdgpu_device *adev);
+void amdgpu_debugfs_mes_event_log_init(struct amdgpu_device *adev);
+void amdgpu_debugfs_vm_init(struct drm_file *file);
+
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_dev_coredump.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_dev_coredump.c
new file mode 100644
index 000000000000..4e2fe6674db8
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_dev_coredump.c
@@ -0,0 +1,371 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright 2024 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#include <generated/utsrelease.h>
+#include <linux/devcoredump.h>
+#include "amdgpu_dev_coredump.h"
+#include "atom.h"
+
+#ifndef CONFIG_DEV_COREDUMP
+void amdgpu_coredump(struct amdgpu_device *adev, bool skip_vram_check,
+ bool vram_lost, struct amdgpu_job *job)
+{
+}
+#else
+
+const char *hw_ip_names[MAX_HWIP] = {
+ [GC_HWIP] = "GC",
+ [HDP_HWIP] = "HDP",
+ [SDMA0_HWIP] = "SDMA0",
+ [SDMA1_HWIP] = "SDMA1",
+ [SDMA2_HWIP] = "SDMA2",
+ [SDMA3_HWIP] = "SDMA3",
+ [SDMA4_HWIP] = "SDMA4",
+ [SDMA5_HWIP] = "SDMA5",
+ [SDMA6_HWIP] = "SDMA6",
+ [SDMA7_HWIP] = "SDMA7",
+ [LSDMA_HWIP] = "LSDMA",
+ [MMHUB_HWIP] = "MMHUB",
+ [ATHUB_HWIP] = "ATHUB",
+ [NBIO_HWIP] = "NBIO",
+ [MP0_HWIP] = "MP0",
+ [MP1_HWIP] = "MP1",
+ [UVD_HWIP] = "UVD/JPEG/VCN",
+ [VCN1_HWIP] = "VCN1",
+ [VCE_HWIP] = "VCE",
+ [VPE_HWIP] = "VPE",
+ [DF_HWIP] = "DF",
+ [DCE_HWIP] = "DCE",
+ [OSSSYS_HWIP] = "OSSSYS",
+ [SMUIO_HWIP] = "SMUIO",
+ [PWR_HWIP] = "PWR",
+ [NBIF_HWIP] = "NBIF",
+ [THM_HWIP] = "THM",
+ [CLK_HWIP] = "CLK",
+ [UMC_HWIP] = "UMC",
+ [RSMU_HWIP] = "RSMU",
+ [XGMI_HWIP] = "XGMI",
+ [DCI_HWIP] = "DCI",
+ [PCIE_HWIP] = "PCIE",
+};
+
+static void amdgpu_devcoredump_fw_info(struct amdgpu_device *adev,
+ struct drm_printer *p)
+{
+ uint32_t version;
+ uint32_t feature;
+ uint8_t smu_program, smu_major, smu_minor, smu_debug;
+ struct atom_context *ctx = adev->mode_info.atom_context;
+
+ drm_printf(p, "VCE feature version: %u, fw version: 0x%08x\n",
+ adev->vce.fb_version, adev->vce.fw_version);
+ drm_printf(p, "UVD feature version: %u, fw version: 0x%08x\n", 0,
+ adev->uvd.fw_version);
+ drm_printf(p, "GMC feature version: %u, fw version: 0x%08x\n", 0,
+ adev->gmc.fw_version);
+ drm_printf(p, "ME feature version: %u, fw version: 0x%08x\n",
+ adev->gfx.me_feature_version, adev->gfx.me_fw_version);
+ drm_printf(p, "PFP feature version: %u, fw version: 0x%08x\n",
+ adev->gfx.pfp_feature_version, adev->gfx.pfp_fw_version);
+ drm_printf(p, "CE feature version: %u, fw version: 0x%08x\n",
+ adev->gfx.ce_feature_version, adev->gfx.ce_fw_version);
+ drm_printf(p, "RLC feature version: %u, fw version: 0x%08x\n",
+ adev->gfx.rlc_feature_version, adev->gfx.rlc_fw_version);
+
+ drm_printf(p, "RLC SRLC feature version: %u, fw version: 0x%08x\n",
+ adev->gfx.rlc_srlc_feature_version,
+ adev->gfx.rlc_srlc_fw_version);
+ drm_printf(p, "RLC SRLG feature version: %u, fw version: 0x%08x\n",
+ adev->gfx.rlc_srlg_feature_version,
+ adev->gfx.rlc_srlg_fw_version);
+ drm_printf(p, "RLC SRLS feature version: %u, fw version: 0x%08x\n",
+ adev->gfx.rlc_srls_feature_version,
+ adev->gfx.rlc_srls_fw_version);
+ drm_printf(p, "RLCP feature version: %u, fw version: 0x%08x\n",
+ adev->gfx.rlcp_ucode_feature_version,
+ adev->gfx.rlcp_ucode_version);
+ drm_printf(p, "RLCV feature version: %u, fw version: 0x%08x\n",
+ adev->gfx.rlcv_ucode_feature_version,
+ adev->gfx.rlcv_ucode_version);
+ drm_printf(p, "MEC feature version: %u, fw version: 0x%08x\n",
+ adev->gfx.mec_feature_version, adev->gfx.mec_fw_version);
+
+ if (adev->gfx.mec2_fw)
+ drm_printf(p, "MEC2 feature version: %u, fw version: 0x%08x\n",
+ adev->gfx.mec2_feature_version,
+ adev->gfx.mec2_fw_version);
+
+ drm_printf(p, "IMU feature version: %u, fw version: 0x%08x\n", 0,
+ adev->gfx.imu_fw_version);
+ drm_printf(p, "PSP SOS feature version: %u, fw version: 0x%08x\n",
+ adev->psp.sos.feature_version, adev->psp.sos.fw_version);
+ drm_printf(p, "PSP ASD feature version: %u, fw version: 0x%08x\n",
+ adev->psp.asd_context.bin_desc.feature_version,
+ adev->psp.asd_context.bin_desc.fw_version);
+
+ drm_printf(p, "TA XGMI feature version: 0x%08x, fw version: 0x%08x\n",
+ adev->psp.xgmi_context.context.bin_desc.feature_version,
+ adev->psp.xgmi_context.context.bin_desc.fw_version);
+ drm_printf(p, "TA RAS feature version: 0x%08x, fw version: 0x%08x\n",
+ adev->psp.ras_context.context.bin_desc.feature_version,
+ adev->psp.ras_context.context.bin_desc.fw_version);
+ drm_printf(p, "TA HDCP feature version: 0x%08x, fw version: 0x%08x\n",
+ adev->psp.hdcp_context.context.bin_desc.feature_version,
+ adev->psp.hdcp_context.context.bin_desc.fw_version);
+ drm_printf(p, "TA DTM feature version: 0x%08x, fw version: 0x%08x\n",
+ adev->psp.dtm_context.context.bin_desc.feature_version,
+ adev->psp.dtm_context.context.bin_desc.fw_version);
+ drm_printf(p, "TA RAP feature version: 0x%08x, fw version: 0x%08x\n",
+ adev->psp.rap_context.context.bin_desc.feature_version,
+ adev->psp.rap_context.context.bin_desc.fw_version);
+ drm_printf(p,
+ "TA SECURE DISPLAY feature version: 0x%08x, fw version: 0x%08x\n",
+ adev->psp.securedisplay_context.context.bin_desc.feature_version,
+ adev->psp.securedisplay_context.context.bin_desc.fw_version);
+
+ /* SMC firmware */
+ version = adev->pm.fw_version;
+
+ smu_program = (version >> 24) & 0xff;
+ smu_major = (version >> 16) & 0xff;
+ smu_minor = (version >> 8) & 0xff;
+ smu_debug = (version >> 0) & 0xff;
+ drm_printf(p,
+ "SMC feature version: %u, program: %d, fw version: 0x%08x (%d.%d.%d)\n",
+ 0, smu_program, version, smu_major, smu_minor, smu_debug);
+
+ /* SDMA firmware */
+ for (int i = 0; i < adev->sdma.num_instances; i++) {
+ drm_printf(p,
+ "SDMA%d feature version: %u, firmware version: 0x%08x\n",
+ i, adev->sdma.instance[i].feature_version,
+ adev->sdma.instance[i].fw_version);
+ }
+
+ drm_printf(p, "VCN feature version: %u, fw version: 0x%08x\n", 0,
+ adev->vcn.fw_version);
+ drm_printf(p, "DMCU feature version: %u, fw version: 0x%08x\n", 0,
+ adev->dm.dmcu_fw_version);
+ drm_printf(p, "DMCUB feature version: %u, fw version: 0x%08x\n", 0,
+ adev->dm.dmcub_fw_version);
+ drm_printf(p, "PSP TOC feature version: %u, fw version: 0x%08x\n",
+ adev->psp.toc.feature_version, adev->psp.toc.fw_version);
+
+ version = adev->mes.kiq_version & AMDGPU_MES_VERSION_MASK;
+ feature = (adev->mes.kiq_version & AMDGPU_MES_FEAT_VERSION_MASK) >>
+ AMDGPU_MES_FEAT_VERSION_SHIFT;
+ drm_printf(p, "MES_KIQ feature version: %u, fw version: 0x%08x\n",
+ feature, version);
+
+ version = adev->mes.sched_version & AMDGPU_MES_VERSION_MASK;
+ feature = (adev->mes.sched_version & AMDGPU_MES_FEAT_VERSION_MASK) >>
+ AMDGPU_MES_FEAT_VERSION_SHIFT;
+ drm_printf(p, "MES feature version: %u, fw version: 0x%08x\n", feature,
+ version);
+
+ drm_printf(p, "VPE feature version: %u, fw version: 0x%08x\n",
+ adev->vpe.feature_version, adev->vpe.fw_version);
+
+ drm_printf(p, "\nVBIOS Information\n");
+ drm_printf(p, "vbios name : %s\n", ctx->name);
+ drm_printf(p, "vbios pn : %s\n", ctx->vbios_pn);
+ drm_printf(p, "vbios version : %d\n", ctx->version);
+ drm_printf(p, "vbios ver_str : %s\n", ctx->vbios_ver_str);
+ drm_printf(p, "vbios date : %s\n", ctx->date);
+}
+
+static ssize_t
+amdgpu_devcoredump_read(char *buffer, loff_t offset, size_t count,
+ void *data, size_t datalen)
+{
+ struct drm_printer p;
+ struct amdgpu_coredump_info *coredump = data;
+ struct drm_print_iterator iter;
+ struct amdgpu_vm_fault_info *fault_info;
+ struct amdgpu_ip_block *ip_block;
+ int ver;
+
+ iter.data = buffer;
+ iter.offset = 0;
+ iter.start = offset;
+ iter.remain = count;
+
+ p = drm_coredump_printer(&iter);
+
+ drm_printf(&p, "**** AMDGPU Device Coredump ****\n");
+ drm_printf(&p, "version: " AMDGPU_COREDUMP_VERSION "\n");
+ drm_printf(&p, "kernel: " UTS_RELEASE "\n");
+ drm_printf(&p, "module: " KBUILD_MODNAME "\n");
+ drm_printf(&p, "time: %ptSp\n", &coredump->reset_time);
+
+ if (coredump->reset_task_info.task.pid)
+ drm_printf(&p, "process_name: %s PID: %d\n",
+ coredump->reset_task_info.process_name,
+ coredump->reset_task_info.task.pid);
+
+ /* SOC Information */
+ drm_printf(&p, "\nSOC Information\n");
+ drm_printf(&p, "SOC Device id: %d\n", coredump->adev->pdev->device);
+ drm_printf(&p, "SOC PCI Revision id: %d\n", coredump->adev->pdev->revision);
+ drm_printf(&p, "SOC Family: %d\n", coredump->adev->family);
+ drm_printf(&p, "SOC Revision id: %d\n", coredump->adev->rev_id);
+ drm_printf(&p, "SOC External Revision id: %d\n", coredump->adev->external_rev_id);
+
+ /* Memory Information */
+ drm_printf(&p, "\nSOC Memory Information\n");
+ drm_printf(&p, "real vram size: %llu\n", coredump->adev->gmc.real_vram_size);
+ drm_printf(&p, "visible vram size: %llu\n", coredump->adev->gmc.visible_vram_size);
+ drm_printf(&p, "gtt size: %llu\n", coredump->adev->mman.gtt_mgr.manager.size);
+
+ /* GDS Config */
+ drm_printf(&p, "\nGDS Config\n");
+ drm_printf(&p, "gds: total size: %d\n", coredump->adev->gds.gds_size);
+ drm_printf(&p, "gds: compute partition size: %d\n", coredump->adev->gds.gds_size);
+ drm_printf(&p, "gds: gws per compute partition: %d\n", coredump->adev->gds.gws_size);
+ drm_printf(&p, "gds: os per compute partition: %d\n", coredump->adev->gds.oa_size);
+
+ /* HWIP Version Information */
+ drm_printf(&p, "\nHW IP Version Information\n");
+ for (int i = 1; i < MAX_HWIP; i++) {
+ for (int j = 0; j < HWIP_MAX_INSTANCE; j++) {
+ ver = coredump->adev->ip_versions[i][j];
+ if (ver)
+ drm_printf(&p, "HWIP: %s[%d][%d]: v%d.%d.%d.%d.%d\n",
+ hw_ip_names[i], i, j,
+ IP_VERSION_MAJ(ver),
+ IP_VERSION_MIN(ver),
+ IP_VERSION_REV(ver),
+ IP_VERSION_VARIANT(ver),
+ IP_VERSION_SUBREV(ver));
+ }
+ }
+
+ /* IP firmware information */
+ drm_printf(&p, "\nIP Firmwares\n");
+ amdgpu_devcoredump_fw_info(coredump->adev, &p);
+
+ if (coredump->ring) {
+ drm_printf(&p, "\nRing timed out details\n");
+ drm_printf(&p, "IP Type: %d Ring Name: %s\n",
+ coredump->ring->funcs->type,
+ coredump->ring->name);
+ }
+
+ /* Add page fault information */
+ fault_info = &coredump->adev->vm_manager.fault_info;
+ drm_printf(&p, "\n[%s] Page fault observed\n",
+ fault_info->vmhub ? "mmhub" : "gfxhub");
+ drm_printf(&p, "Faulty page starting at address: 0x%016llx\n", fault_info->addr);
+ drm_printf(&p, "Protection fault status register: 0x%x\n\n", fault_info->status);
+
+ /* dump the ip state for each ip */
+ drm_printf(&p, "IP Dump\n");
+ for (int i = 0; i < coredump->adev->num_ip_blocks; i++) {
+ ip_block = &coredump->adev->ip_blocks[i];
+ if (ip_block->version->funcs->print_ip_state) {
+ drm_printf(&p, "IP: %s\n", ip_block->version->funcs->name);
+ ip_block->version->funcs->print_ip_state(ip_block, &p);
+ drm_printf(&p, "\n");
+ }
+ }
+
+ /* Add ring buffer information */
+ drm_printf(&p, "Ring buffer information\n");
+ for (int i = 0; i < coredump->adev->num_rings; i++) {
+ int j = 0;
+ struct amdgpu_ring *ring = coredump->adev->rings[i];
+
+ drm_printf(&p, "ring name: %s\n", ring->name);
+ drm_printf(&p, "Rptr: 0x%llx Wptr: 0x%llx RB mask: %x\n",
+ amdgpu_ring_get_rptr(ring),
+ amdgpu_ring_get_wptr(ring),
+ ring->buf_mask);
+ drm_printf(&p, "Ring size in dwords: %d\n",
+ ring->ring_size / 4);
+ drm_printf(&p, "Ring contents\n");
+ drm_printf(&p, "Offset \t Value\n");
+
+ while (j < ring->ring_size) {
+ drm_printf(&p, "0x%x \t 0x%x\n", j, ring->ring[j / 4]);
+ j += 4;
+ }
+ }
+
+ if (coredump->skip_vram_check)
+ drm_printf(&p, "VRAM lost check is skipped!\n");
+ else if (coredump->reset_vram_lost)
+ drm_printf(&p, "VRAM is lost due to GPU reset!\n");
+
+ return count - iter.remain;
+}
+
+static void amdgpu_devcoredump_free(void *data)
+{
+ kfree(data);
+}
+
+void amdgpu_coredump(struct amdgpu_device *adev, bool skip_vram_check,
+ bool vram_lost, struct amdgpu_job *job)
+{
+ struct drm_device *dev = adev_to_drm(adev);
+ struct amdgpu_coredump_info *coredump;
+ struct drm_sched_job *s_job;
+
+ coredump = kzalloc(sizeof(*coredump), GFP_NOWAIT);
+
+ if (!coredump) {
+ DRM_ERROR("%s: failed to allocate memory for coredump\n", __func__);
+ return;
+ }
+
+ coredump->skip_vram_check = skip_vram_check;
+ coredump->reset_vram_lost = vram_lost;
+
+ if (job && job->pasid) {
+ struct amdgpu_task_info *ti;
+
+ ti = amdgpu_vm_get_task_info_pasid(adev, job->pasid);
+ if (ti) {
+ coredump->reset_task_info = *ti;
+ amdgpu_vm_put_task_info(ti);
+ }
+ }
+
+ if (job) {
+ s_job = &job->base;
+ coredump->ring = to_amdgpu_ring(s_job->sched);
+ }
+
+ coredump->adev = adev;
+
+ ktime_get_ts64(&coredump->reset_time);
+
+ dev_coredumpm(dev->dev, THIS_MODULE, coredump, 0, GFP_NOWAIT,
+ amdgpu_devcoredump_read, amdgpu_devcoredump_free);
+
+ drm_info(dev, "AMDGPU device coredump file has been created\n");
+ drm_info(dev, "Check your /sys/class/drm/card%d/device/devcoredump/data\n",
+ dev->primary->index);
+}
+#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_dev_coredump.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_dev_coredump.h
new file mode 100644
index 000000000000..ef9772c6bcc9
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_dev_coredump.h
@@ -0,0 +1,46 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright 2024 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#ifndef __AMDGPU_DEV_COREDUMP_H__
+#define __AMDGPU_DEV_COREDUMP_H__
+
+#include "amdgpu.h"
+
+#ifdef CONFIG_DEV_COREDUMP
+
+#define AMDGPU_COREDUMP_VERSION "1"
+
+struct amdgpu_coredump_info {
+ struct amdgpu_device *adev;
+ struct amdgpu_task_info reset_task_info;
+ struct timespec64 reset_time;
+ bool skip_vram_check;
+ bool reset_vram_lost;
+ struct amdgpu_ring *ring;
+};
+#endif
+
+void amdgpu_coredump(struct amdgpu_device *adev, bool skip_vram_check,
+ bool vram_lost, struct amdgpu_job *job);
+#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index 6447cd6ca5a8..58c3ffe707d1 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -25,15 +25,24 @@
* Alex Deucher
* Jerome Glisse
*/
+
+#include <linux/aperture.h>
#include <linux/power_supply.h>
#include <linux/kthread.h>
#include <linux/module.h>
#include <linux/console.h>
#include <linux/slab.h>
+#include <linux/iommu.h>
+#include <linux/pci.h>
+#include <linux/pci-p2pdma.h>
+#include <linux/apple-gmux.h>
#include <drm/drm_atomic_helper.h>
+#include <drm/drm_client_event.h>
+#include <drm/drm_crtc_helper.h>
#include <drm/drm_probe_helper.h>
#include <drm/amdgpu_drm.h>
+#include <linux/device.h>
#include <linux/vgaarb.h>
#include <linux/vga_switcheroo.h>
#include <linux/efi.h>
@@ -54,7 +63,6 @@
#include "soc15.h"
#include "nv.h"
#include "bif/bif_4_1_d.h"
-#include <linux/pci.h>
#include <linux/firmware.h>
#include "amdgpu_vf_error.h"
@@ -63,26 +71,44 @@
#include "amdgpu_xgmi.h"
#include "amdgpu_ras.h"
+#include "amdgpu_ras_mgr.h"
#include "amdgpu_pmu.h"
#include "amdgpu_fru_eeprom.h"
+#include "amdgpu_reset.h"
+#include "amdgpu_virt.h"
+#include "amdgpu_dev_coredump.h"
#include <linux/suspend.h>
#include <drm/task_barrier.h>
#include <linux/pm_runtime.h>
+#include <drm/drm_drv.h>
+
+#if IS_ENABLED(CONFIG_X86)
+#include <asm/intel-family.h>
+#include <asm/cpu_device_id.h>
+#endif
+
MODULE_FIRMWARE("amdgpu/vega10_gpu_info.bin");
MODULE_FIRMWARE("amdgpu/vega12_gpu_info.bin");
MODULE_FIRMWARE("amdgpu/raven_gpu_info.bin");
MODULE_FIRMWARE("amdgpu/picasso_gpu_info.bin");
MODULE_FIRMWARE("amdgpu/raven2_gpu_info.bin");
MODULE_FIRMWARE("amdgpu/arcturus_gpu_info.bin");
-MODULE_FIRMWARE("amdgpu/renoir_gpu_info.bin");
-MODULE_FIRMWARE("amdgpu/navi10_gpu_info.bin");
-MODULE_FIRMWARE("amdgpu/navi14_gpu_info.bin");
MODULE_FIRMWARE("amdgpu/navi12_gpu_info.bin");
-MODULE_FIRMWARE("amdgpu/vangogh_gpu_info.bin");
+MODULE_FIRMWARE("amdgpu/cyan_skillfish_gpu_info.bin");
#define AMDGPU_RESUME_MS 2000
+#define AMDGPU_MAX_RETRY_LIMIT 2
+#define AMDGPU_RETRY_SRIOV_RESET(r) ((r) == -EBUSY || (r) == -ETIMEDOUT || (r) == -EINVAL)
+#define AMDGPU_PCIE_INDEX_FALLBACK (0x38 >> 2)
+#define AMDGPU_PCIE_INDEX_HI_FALLBACK (0x44 >> 2)
+#define AMDGPU_PCIE_DATA_FALLBACK (0x3C >> 2)
+
+#define AMDGPU_VBIOS_SKIP (1U << 0)
+#define AMDGPU_VBIOS_OPTIONAL (1U << 1)
+
+static const struct drm_driver amdgpu_kms_driver;
const char *amdgpu_asic_name[] = {
"TAHITI",
@@ -110,23 +136,91 @@ const char *amdgpu_asic_name[] = {
"RAVEN",
"ARCTURUS",
"RENOIR",
+ "ALDEBARAN",
"NAVI10",
+ "CYAN_SKILLFISH",
"NAVI14",
"NAVI12",
"SIENNA_CICHLID",
"NAVY_FLOUNDER",
"VANGOGH",
"DIMGREY_CAVEFISH",
+ "BEIGE_GOBY",
+ "YELLOW_CARP",
+ "IP DISCOVERY",
"LAST",
};
+#define AMDGPU_IP_BLK_MASK_ALL GENMASK(AMD_IP_BLOCK_TYPE_NUM - 1, 0)
+/*
+ * Default init level where all blocks are expected to be initialized. This is
+ * the level of initialization expected by default and also after a full reset
+ * of the device.
+ */
+struct amdgpu_init_level amdgpu_init_default = {
+ .level = AMDGPU_INIT_LEVEL_DEFAULT,
+ .hwini_ip_block_mask = AMDGPU_IP_BLK_MASK_ALL,
+};
+
+struct amdgpu_init_level amdgpu_init_recovery = {
+ .level = AMDGPU_INIT_LEVEL_RESET_RECOVERY,
+ .hwini_ip_block_mask = AMDGPU_IP_BLK_MASK_ALL,
+};
+
+/*
+ * Minimal blocks needed to be initialized before a XGMI hive can be reset. This
+ * is used for cases like reset on initialization where the entire hive needs to
+ * be reset before first use.
+ */
+struct amdgpu_init_level amdgpu_init_minimal_xgmi = {
+ .level = AMDGPU_INIT_LEVEL_MINIMAL_XGMI,
+ .hwini_ip_block_mask =
+ BIT(AMD_IP_BLOCK_TYPE_GMC) | BIT(AMD_IP_BLOCK_TYPE_SMC) |
+ BIT(AMD_IP_BLOCK_TYPE_COMMON) | BIT(AMD_IP_BLOCK_TYPE_IH) |
+ BIT(AMD_IP_BLOCK_TYPE_PSP)
+};
+
+static int amdgpu_device_ip_resume_phase1(struct amdgpu_device *adev);
+static int amdgpu_device_ip_resume_phase2(struct amdgpu_device *adev);
+static int amdgpu_device_ip_resume_phase3(struct amdgpu_device *adev);
+
+static void amdgpu_device_load_switch_state(struct amdgpu_device *adev);
+
+static inline bool amdgpu_ip_member_of_hwini(struct amdgpu_device *adev,
+ enum amd_ip_block_type block)
+{
+ return (adev->init_lvl->hwini_ip_block_mask & (1U << block)) != 0;
+}
+
+void amdgpu_set_init_level(struct amdgpu_device *adev,
+ enum amdgpu_init_lvl_id lvl)
+{
+ switch (lvl) {
+ case AMDGPU_INIT_LEVEL_MINIMAL_XGMI:
+ adev->init_lvl = &amdgpu_init_minimal_xgmi;
+ break;
+ case AMDGPU_INIT_LEVEL_RESET_RECOVERY:
+ adev->init_lvl = &amdgpu_init_recovery;
+ break;
+ case AMDGPU_INIT_LEVEL_DEFAULT:
+ fallthrough;
+ default:
+ adev->init_lvl = &amdgpu_init_default;
+ break;
+ }
+}
+
+static inline void amdgpu_device_stop_pending_resets(struct amdgpu_device *adev);
+static int amdgpu_device_pm_notifier(struct notifier_block *nb, unsigned long mode,
+ void *data);
+
/**
* DOC: pcie_replay_count
*
* The amdgpu driver provides a sysfs API for reporting the total number
- * of PCIe replays (NAKs)
+ * of PCIe replays (NAKs).
* The file pcie_replay_count is used for this and returns the total
- * number of replays as a sum of the NAKs generated and NAKs received
+ * number of replays as a sum of the NAKs generated and NAKs received.
*/
static ssize_t amdgpu_device_get_pcie_replay_count(struct device *dev,
@@ -136,93 +230,206 @@ static ssize_t amdgpu_device_get_pcie_replay_count(struct device *dev,
struct amdgpu_device *adev = drm_to_adev(ddev);
uint64_t cnt = amdgpu_asic_get_pcie_replay_count(adev);
- return snprintf(buf, PAGE_SIZE, "%llu\n", cnt);
+ return sysfs_emit(buf, "%llu\n", cnt);
}
-static DEVICE_ATTR(pcie_replay_count, S_IRUGO,
+static DEVICE_ATTR(pcie_replay_count, 0444,
amdgpu_device_get_pcie_replay_count, NULL);
-static void amdgpu_device_get_pcie_info(struct amdgpu_device *adev);
+static int amdgpu_device_attr_sysfs_init(struct amdgpu_device *adev)
+{
+ int ret = 0;
-/**
- * DOC: product_name
- *
- * The amdgpu driver provides a sysfs API for reporting the product name
- * for the device
- * The file serial_number is used for this and returns the product name
- * as returned from the FRU.
- * NOTE: This is only available for certain server cards
- */
+ if (amdgpu_nbio_is_replay_cnt_supported(adev))
+ ret = sysfs_create_file(&adev->dev->kobj,
+ &dev_attr_pcie_replay_count.attr);
-static ssize_t amdgpu_device_get_product_name(struct device *dev,
- struct device_attribute *attr, char *buf)
+ return ret;
+}
+
+static void amdgpu_device_attr_sysfs_fini(struct amdgpu_device *adev)
+{
+ if (amdgpu_nbio_is_replay_cnt_supported(adev))
+ sysfs_remove_file(&adev->dev->kobj,
+ &dev_attr_pcie_replay_count.attr);
+}
+
+static ssize_t amdgpu_sysfs_reg_state_get(struct file *f, struct kobject *kobj,
+ const struct bin_attribute *attr, char *buf,
+ loff_t ppos, size_t count)
{
+ struct device *dev = kobj_to_dev(kobj);
struct drm_device *ddev = dev_get_drvdata(dev);
struct amdgpu_device *adev = drm_to_adev(ddev);
+ ssize_t bytes_read;
+
+ switch (ppos) {
+ case AMDGPU_SYS_REG_STATE_XGMI:
+ bytes_read = amdgpu_asic_get_reg_state(
+ adev, AMDGPU_REG_STATE_TYPE_XGMI, buf, count);
+ break;
+ case AMDGPU_SYS_REG_STATE_WAFL:
+ bytes_read = amdgpu_asic_get_reg_state(
+ adev, AMDGPU_REG_STATE_TYPE_WAFL, buf, count);
+ break;
+ case AMDGPU_SYS_REG_STATE_PCIE:
+ bytes_read = amdgpu_asic_get_reg_state(
+ adev, AMDGPU_REG_STATE_TYPE_PCIE, buf, count);
+ break;
+ case AMDGPU_SYS_REG_STATE_USR:
+ bytes_read = amdgpu_asic_get_reg_state(
+ adev, AMDGPU_REG_STATE_TYPE_USR, buf, count);
+ break;
+ case AMDGPU_SYS_REG_STATE_USR_1:
+ bytes_read = amdgpu_asic_get_reg_state(
+ adev, AMDGPU_REG_STATE_TYPE_USR_1, buf, count);
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ return bytes_read;
+}
+
+static const BIN_ATTR(reg_state, 0444, amdgpu_sysfs_reg_state_get, NULL,
+ AMDGPU_SYS_REG_STATE_END);
+
+int amdgpu_reg_state_sysfs_init(struct amdgpu_device *adev)
+{
+ int ret;
+
+ if (!amdgpu_asic_get_reg_state_supported(adev))
+ return 0;
+
+ ret = sysfs_create_bin_file(&adev->dev->kobj, &bin_attr_reg_state);
+
+ return ret;
+}
+
+void amdgpu_reg_state_sysfs_fini(struct amdgpu_device *adev)
+{
+ if (!amdgpu_asic_get_reg_state_supported(adev))
+ return;
+ sysfs_remove_bin_file(&adev->dev->kobj, &bin_attr_reg_state);
+}
+
+int amdgpu_ip_block_suspend(struct amdgpu_ip_block *ip_block)
+{
+ int r;
+
+ if (ip_block->version->funcs->suspend) {
+ r = ip_block->version->funcs->suspend(ip_block);
+ if (r) {
+ dev_err(ip_block->adev->dev,
+ "suspend of IP block <%s> failed %d\n",
+ ip_block->version->funcs->name, r);
+ return r;
+ }
+ }
- return snprintf(buf, PAGE_SIZE, "%s\n", adev->product_name);
+ ip_block->status.hw = false;
+ return 0;
}
-static DEVICE_ATTR(product_name, S_IRUGO,
- amdgpu_device_get_product_name, NULL);
+int amdgpu_ip_block_resume(struct amdgpu_ip_block *ip_block)
+{
+ int r;
+
+ if (ip_block->version->funcs->resume) {
+ r = ip_block->version->funcs->resume(ip_block);
+ if (r) {
+ dev_err(ip_block->adev->dev,
+ "resume of IP block <%s> failed %d\n",
+ ip_block->version->funcs->name, r);
+ return r;
+ }
+ }
+
+ ip_block->status.hw = true;
+ return 0;
+}
/**
- * DOC: product_number
+ * DOC: board_info
+ *
+ * The amdgpu driver provides a sysfs API for giving board related information.
+ * It provides the form factor information in the format
+ *
+ * type : form factor
+ *
+ * Possible form factor values
+ *
+ * - "cem" - PCIE CEM card
+ * - "oam" - Open Compute Accelerator Module
+ * - "unknown" - Not known
*
- * The amdgpu driver provides a sysfs API for reporting the part number
- * for the device
- * The file serial_number is used for this and returns the part number
- * as returned from the FRU.
- * NOTE: This is only available for certain server cards
*/
-static ssize_t amdgpu_device_get_product_number(struct device *dev,
- struct device_attribute *attr, char *buf)
+static ssize_t amdgpu_device_get_board_info(struct device *dev,
+ struct device_attribute *attr,
+ char *buf)
{
struct drm_device *ddev = dev_get_drvdata(dev);
struct amdgpu_device *adev = drm_to_adev(ddev);
+ enum amdgpu_pkg_type pkg_type = AMDGPU_PKG_TYPE_CEM;
+ const char *pkg;
+
+ if (adev->smuio.funcs && adev->smuio.funcs->get_pkg_type)
+ pkg_type = adev->smuio.funcs->get_pkg_type(adev);
- return snprintf(buf, PAGE_SIZE, "%s\n", adev->product_number);
+ switch (pkg_type) {
+ case AMDGPU_PKG_TYPE_CEM:
+ pkg = "cem";
+ break;
+ case AMDGPU_PKG_TYPE_OAM:
+ pkg = "oam";
+ break;
+ default:
+ pkg = "unknown";
+ break;
+ }
+
+ return sysfs_emit(buf, "%s : %s\n", "type", pkg);
}
-static DEVICE_ATTR(product_number, S_IRUGO,
- amdgpu_device_get_product_number, NULL);
+static DEVICE_ATTR(board_info, 0444, amdgpu_device_get_board_info, NULL);
-/**
- * DOC: serial_number
- *
- * The amdgpu driver provides a sysfs API for reporting the serial number
- * for the device
- * The file serial_number is used for this and returns the serial number
- * as returned from the FRU.
- * NOTE: This is only available for certain server cards
- */
+static struct attribute *amdgpu_board_attrs[] = {
+ &dev_attr_board_info.attr,
+ NULL,
+};
-static ssize_t amdgpu_device_get_serial_number(struct device *dev,
- struct device_attribute *attr, char *buf)
+static umode_t amdgpu_board_attrs_is_visible(struct kobject *kobj,
+ struct attribute *attr, int n)
{
+ struct device *dev = kobj_to_dev(kobj);
struct drm_device *ddev = dev_get_drvdata(dev);
struct amdgpu_device *adev = drm_to_adev(ddev);
- return snprintf(buf, PAGE_SIZE, "%s\n", adev->serial);
+ if (adev->flags & AMD_IS_APU)
+ return 0;
+
+ return attr->mode;
}
-static DEVICE_ATTR(serial_number, S_IRUGO,
- amdgpu_device_get_serial_number, NULL);
+static const struct attribute_group amdgpu_board_attrs_group = {
+ .attrs = amdgpu_board_attrs,
+ .is_visible = amdgpu_board_attrs_is_visible
+};
+
+static void amdgpu_device_get_pcie_info(struct amdgpu_device *adev);
/**
- * amdgpu_device_supports_atpx - Is the device a dGPU with HG/PX power control
+ * amdgpu_device_supports_px - Is the device a dGPU with ATPX power control
*
- * @dev: drm_device pointer
+ * @adev: amdgpu device pointer
*
- * Returns true if the device is a dGPU with HG/PX power control,
+ * Returns true if the device is a dGPU with ATPX power control,
* otherwise return false.
*/
-bool amdgpu_device_supports_atpx(struct drm_device *dev)
+bool amdgpu_device_supports_px(struct amdgpu_device *adev)
{
- struct amdgpu_device *adev = drm_to_adev(dev);
-
- if (adev->flags & AMD_IS_PX)
+ if ((adev->flags & AMD_IS_PX) && !amdgpu_is_atpx_hybrid())
return true;
return false;
}
@@ -230,16 +437,18 @@ bool amdgpu_device_supports_atpx(struct drm_device *dev)
/**
* amdgpu_device_supports_boco - Is the device a dGPU with ACPI power resources
*
- * @dev: drm_device pointer
+ * @adev: amdgpu device pointer
*
- * Returns true if the device is a dGPU with HG/PX power control,
+ * Returns true if the device is a dGPU with ACPI power control,
* otherwise return false.
*/
-bool amdgpu_device_supports_boco(struct drm_device *dev)
+bool amdgpu_device_supports_boco(struct amdgpu_device *adev)
{
- struct amdgpu_device *adev = drm_to_adev(dev);
+ if (!IS_ENABLED(CONFIG_HOTPLUG_PCI_PCIE))
+ return false;
- if (adev->has_pr3)
+ if (adev->has_pr3 ||
+ ((adev->flags & AMD_IS_PX) && amdgpu_is_atpx_hybrid()))
return true;
return false;
}
@@ -247,24 +456,114 @@ bool amdgpu_device_supports_boco(struct drm_device *dev)
/**
* amdgpu_device_supports_baco - Does the device support BACO
*
- * @dev: drm_device pointer
+ * @adev: amdgpu device pointer
*
- * Returns true if the device supporte BACO,
- * otherwise return false.
+ * Return:
+ * 1 if the device supports BACO;
+ * 3 if the device supports MACO (only works if BACO is supported)
+ * otherwise return 0.
*/
-bool amdgpu_device_supports_baco(struct drm_device *dev)
+int amdgpu_device_supports_baco(struct amdgpu_device *adev)
{
- struct amdgpu_device *adev = drm_to_adev(dev);
-
return amdgpu_asic_supports_baco(adev);
}
+void amdgpu_device_detect_runtime_pm_mode(struct amdgpu_device *adev)
+{
+ int bamaco_support;
+
+ adev->pm.rpm_mode = AMDGPU_RUNPM_NONE;
+ bamaco_support = amdgpu_device_supports_baco(adev);
+
+ switch (amdgpu_runtime_pm) {
+ case 2:
+ if (bamaco_support & MACO_SUPPORT) {
+ adev->pm.rpm_mode = AMDGPU_RUNPM_BAMACO;
+ dev_info(adev->dev, "Forcing BAMACO for runtime pm\n");
+ } else if (bamaco_support == BACO_SUPPORT) {
+ adev->pm.rpm_mode = AMDGPU_RUNPM_BACO;
+ dev_info(adev->dev, "Requested mode BAMACO not available,fallback to use BACO\n");
+ }
+ break;
+ case 1:
+ if (bamaco_support & BACO_SUPPORT) {
+ adev->pm.rpm_mode = AMDGPU_RUNPM_BACO;
+ dev_info(adev->dev, "Forcing BACO for runtime pm\n");
+ }
+ break;
+ case -1:
+ case -2:
+ if (amdgpu_device_supports_px(adev)) {
+ /* enable PX as runtime mode */
+ adev->pm.rpm_mode = AMDGPU_RUNPM_PX;
+ dev_info(adev->dev, "Using ATPX for runtime pm\n");
+ } else if (amdgpu_device_supports_boco(adev)) {
+ /* enable boco as runtime mode */
+ adev->pm.rpm_mode = AMDGPU_RUNPM_BOCO;
+ dev_info(adev->dev, "Using BOCO for runtime pm\n");
+ } else {
+ if (!bamaco_support)
+ goto no_runtime_pm;
+
+ switch (adev->asic_type) {
+ case CHIP_VEGA20:
+ case CHIP_ARCTURUS:
+ /* BACO are not supported on vega20 and arctrus */
+ break;
+ case CHIP_VEGA10:
+ /* enable BACO as runpm mode if noretry=0 */
+ if (!adev->gmc.noretry && !amdgpu_passthrough(adev))
+ adev->pm.rpm_mode = AMDGPU_RUNPM_BACO;
+ break;
+ default:
+ /* enable BACO as runpm mode on CI+ */
+ if (!amdgpu_passthrough(adev))
+ adev->pm.rpm_mode = AMDGPU_RUNPM_BACO;
+ break;
+ }
+
+ if (adev->pm.rpm_mode == AMDGPU_RUNPM_BACO) {
+ if (bamaco_support & MACO_SUPPORT) {
+ adev->pm.rpm_mode = AMDGPU_RUNPM_BAMACO;
+ dev_info(adev->dev, "Using BAMACO for runtime pm\n");
+ } else {
+ dev_info(adev->dev, "Using BACO for runtime pm\n");
+ }
+ }
+ }
+ break;
+ case 0:
+ dev_info(adev->dev, "runtime pm is manually disabled\n");
+ break;
+ default:
+ break;
+ }
+
+no_runtime_pm:
+ if (adev->pm.rpm_mode == AMDGPU_RUNPM_NONE)
+ dev_info(adev->dev, "Runtime PM not available\n");
+}
+/**
+ * amdgpu_device_supports_smart_shift - Is the device dGPU with
+ * smart shift support
+ *
+ * @adev: amdgpu device pointer
+ *
+ * Returns true if the device is a dGPU with Smart Shift support,
+ * otherwise returns false.
+ */
+bool amdgpu_device_supports_smart_shift(struct amdgpu_device *adev)
+{
+ return (amdgpu_device_supports_boco(adev) &&
+ amdgpu_acpi_is_power_shift_control_supported());
+}
+
/*
* VRAM access helper functions
*/
/**
- * amdgpu_device_vram_access - read/write a buffer in vram
+ * amdgpu_device_mm_access - access vram by MM_INDEX/MM_DATA
*
* @adev: amdgpu_device pointer
* @pos: offset of the buffer in vram
@@ -272,59 +571,147 @@ bool amdgpu_device_supports_baco(struct drm_device *dev)
* @size: read/write size, sizeof(@buf) must > @size
* @write: true - write to vram, otherwise - read from vram
*/
-void amdgpu_device_vram_access(struct amdgpu_device *adev, loff_t pos,
- uint32_t *buf, size_t size, bool write)
+void amdgpu_device_mm_access(struct amdgpu_device *adev, loff_t pos,
+ void *buf, size_t size, bool write)
{
unsigned long flags;
- uint32_t hi = ~0;
+ uint32_t hi = ~0, tmp = 0;
+ uint32_t *data = buf;
uint64_t last;
+ int idx;
+
+ if (!drm_dev_enter(adev_to_drm(adev), &idx))
+ return;
+ BUG_ON(!IS_ALIGNED(pos, 4) || !IS_ALIGNED(size, 4));
+
+ spin_lock_irqsave(&adev->mmio_idx_lock, flags);
+ for (last = pos + size; pos < last; pos += 4) {
+ tmp = pos >> 31;
+ WREG32_NO_KIQ(mmMM_INDEX, ((uint32_t)pos) | 0x80000000);
+ if (tmp != hi) {
+ WREG32_NO_KIQ(mmMM_INDEX_HI, tmp);
+ hi = tmp;
+ }
+ if (write)
+ WREG32_NO_KIQ(mmMM_DATA, *data++);
+ else
+ *data++ = RREG32_NO_KIQ(mmMM_DATA);
+ }
+
+ spin_unlock_irqrestore(&adev->mmio_idx_lock, flags);
+ drm_dev_exit(idx);
+}
+
+/**
+ * amdgpu_device_aper_access - access vram by vram aperture
+ *
+ * @adev: amdgpu_device pointer
+ * @pos: offset of the buffer in vram
+ * @buf: virtual address of the buffer in system memory
+ * @size: read/write size, sizeof(@buf) must > @size
+ * @write: true - write to vram, otherwise - read from vram
+ *
+ * The return value means how many bytes have been transferred.
+ */
+size_t amdgpu_device_aper_access(struct amdgpu_device *adev, loff_t pos,
+ void *buf, size_t size, bool write)
+{
#ifdef CONFIG_64BIT
+ void __iomem *addr;
+ size_t count = 0;
+ uint64_t last;
+
+ if (!adev->mman.aper_base_kaddr)
+ return 0;
+
last = min(pos + size, adev->gmc.visible_vram_size);
if (last > pos) {
- void __iomem *addr = adev->mman.aper_base_kaddr + pos;
- size_t count = last - pos;
+ addr = adev->mman.aper_base_kaddr + pos;
+ count = last - pos;
if (write) {
memcpy_toio(addr, buf, count);
+ /* Make sure HDP write cache flush happens without any reordering
+ * after the system memory contents are sent over PCIe device
+ */
mb();
- amdgpu_asic_flush_hdp(adev, NULL);
+ amdgpu_device_flush_hdp(adev, NULL);
} else {
- amdgpu_asic_invalidate_hdp(adev, NULL);
+ amdgpu_device_invalidate_hdp(adev, NULL);
+ /* Make sure HDP read cache is invalidated before issuing a read
+ * to the PCIe device
+ */
mb();
memcpy_fromio(buf, addr, count);
}
- if (count == size)
- return;
-
- pos += count;
- buf += count / 4;
- size -= count;
}
+
+ return count;
+#else
+ return 0;
#endif
+}
- spin_lock_irqsave(&adev->mmio_idx_lock, flags);
- for (last = pos + size; pos < last; pos += 4) {
- uint32_t tmp = pos >> 31;
+/**
+ * amdgpu_device_vram_access - read/write a buffer in vram
+ *
+ * @adev: amdgpu_device pointer
+ * @pos: offset of the buffer in vram
+ * @buf: virtual address of the buffer in system memory
+ * @size: read/write size, sizeof(@buf) must > @size
+ * @write: true - write to vram, otherwise - read from vram
+ */
+void amdgpu_device_vram_access(struct amdgpu_device *adev, loff_t pos,
+ void *buf, size_t size, bool write)
+{
+ size_t count;
- WREG32_NO_KIQ(mmMM_INDEX, ((uint32_t)pos) | 0x80000000);
- if (tmp != hi) {
- WREG32_NO_KIQ(mmMM_INDEX_HI, tmp);
- hi = tmp;
- }
- if (write)
- WREG32_NO_KIQ(mmMM_DATA, *buf++);
- else
- *buf++ = RREG32_NO_KIQ(mmMM_DATA);
+ /* try to using vram apreature to access vram first */
+ count = amdgpu_device_aper_access(adev, pos, buf, size, write);
+ size -= count;
+ if (size) {
+ /* using MM to access rest vram */
+ pos += count;
+ buf += count;
+ amdgpu_device_mm_access(adev, pos, buf, size, write);
}
- spin_unlock_irqrestore(&adev->mmio_idx_lock, flags);
}
/*
* register access helper functions.
*/
+
+/* Check if hw access should be skipped because of hotplug or device error */
+bool amdgpu_device_skip_hw_access(struct amdgpu_device *adev)
+{
+ if (adev->no_hw_access)
+ return true;
+
+#ifdef CONFIG_LOCKDEP
+ /*
+ * This is a bit complicated to understand, so worth a comment. What we assert
+ * here is that the GPU reset is not running on another thread in parallel.
+ *
+ * For this we trylock the read side of the reset semaphore, if that succeeds
+ * we know that the reset is not running in parallel.
+ *
+ * If the trylock fails we assert that we are either already holding the read
+ * side of the lock or are the reset thread itself and hold the write side of
+ * the lock.
+ */
+ if (in_task()) {
+ if (down_read_trylock(&adev->reset_domain->sem))
+ up_read(&adev->reset_domain->sem);
+ else
+ lockdep_assert_held(&adev->reset_domain->sem);
+ }
+#endif
+ return false;
+}
+
/**
* amdgpu_device_rreg - read a memory mapped IO or indirect register
*
@@ -339,15 +726,15 @@ uint32_t amdgpu_device_rreg(struct amdgpu_device *adev,
{
uint32_t ret;
- if (adev->in_pci_err_recovery)
+ if (amdgpu_device_skip_hw_access(adev))
return 0;
if ((reg * 4) < adev->rmmio_size) {
if (!(acc_flags & AMDGPU_REGS_NO_KIQ) &&
amdgpu_sriov_runtime(adev) &&
- down_read_trylock(&adev->reset_sem)) {
- ret = amdgpu_kiq_rreg(adev, reg);
- up_read(&adev->reset_sem);
+ down_read_trylock(&adev->reset_domain->sem)) {
+ ret = amdgpu_kiq_rreg(adev, reg, 0);
+ up_read(&adev->reset_domain->sem);
} else {
ret = readl(((void __iomem *)adev->rmmio) + (reg * 4));
}
@@ -363,8 +750,7 @@ uint32_t amdgpu_device_rreg(struct amdgpu_device *adev,
/*
* MMIO register read with bytes helper functions
* @offset:bytes offset from MMIO start
- *
-*/
+ */
/**
* amdgpu_mm_rreg8 - read a memory mapped IO register
@@ -376,7 +762,7 @@ uint32_t amdgpu_device_rreg(struct amdgpu_device *adev,
*/
uint8_t amdgpu_mm_rreg8(struct amdgpu_device *adev, uint32_t offset)
{
- if (adev->in_pci_err_recovery)
+ if (amdgpu_device_skip_hw_access(adev))
return 0;
if (offset < adev->rmmio_size)
@@ -384,12 +770,55 @@ uint8_t amdgpu_mm_rreg8(struct amdgpu_device *adev, uint32_t offset)
BUG();
}
+
+/**
+ * amdgpu_device_xcc_rreg - read a memory mapped IO or indirect register with specific XCC
+ *
+ * @adev: amdgpu_device pointer
+ * @reg: dword aligned register offset
+ * @acc_flags: access flags which require special behavior
+ * @xcc_id: xcc accelerated compute core id
+ *
+ * Returns the 32 bit value from the offset specified.
+ */
+uint32_t amdgpu_device_xcc_rreg(struct amdgpu_device *adev,
+ uint32_t reg, uint32_t acc_flags,
+ uint32_t xcc_id)
+{
+ uint32_t ret, rlcg_flag;
+
+ if (amdgpu_device_skip_hw_access(adev))
+ return 0;
+
+ if ((reg * 4) < adev->rmmio_size) {
+ if (amdgpu_sriov_vf(adev) &&
+ !amdgpu_sriov_runtime(adev) &&
+ adev->gfx.rlc.rlcg_reg_access_supported &&
+ amdgpu_virt_get_rlcg_reg_access_flag(adev, acc_flags,
+ GC_HWIP, false,
+ &rlcg_flag)) {
+ ret = amdgpu_virt_rlcg_reg_rw(adev, reg, 0, rlcg_flag, GET_INST(GC, xcc_id));
+ } else if (!(acc_flags & AMDGPU_REGS_NO_KIQ) &&
+ amdgpu_sriov_runtime(adev) &&
+ down_read_trylock(&adev->reset_domain->sem)) {
+ ret = amdgpu_kiq_rreg(adev, reg, xcc_id);
+ up_read(&adev->reset_domain->sem);
+ } else {
+ ret = readl(((void __iomem *)adev->rmmio) + (reg * 4));
+ }
+ } else {
+ ret = adev->pcie_rreg(adev, reg * 4);
+ }
+
+ return ret;
+}
+
/*
* MMIO register write with bytes helper functions
* @offset:bytes offset from MMIO start
* @value: the value want to be written to the register
- *
-*/
+ */
+
/**
* amdgpu_mm_wreg8 - read a memory mapped IO register
*
@@ -401,7 +830,7 @@ uint8_t amdgpu_mm_rreg8(struct amdgpu_device *adev, uint32_t offset)
*/
void amdgpu_mm_wreg8(struct amdgpu_device *adev, uint32_t offset, uint8_t value)
{
- if (adev->in_pci_err_recovery)
+ if (amdgpu_device_skip_hw_access(adev))
return;
if (offset < adev->rmmio_size)
@@ -424,15 +853,15 @@ void amdgpu_device_wreg(struct amdgpu_device *adev,
uint32_t reg, uint32_t v,
uint32_t acc_flags)
{
- if (adev->in_pci_err_recovery)
+ if (amdgpu_device_skip_hw_access(adev))
return;
if ((reg * 4) < adev->rmmio_size) {
if (!(acc_flags & AMDGPU_REGS_NO_KIQ) &&
amdgpu_sriov_runtime(adev) &&
- down_read_trylock(&adev->reset_sem)) {
- amdgpu_kiq_wreg(adev, reg, v);
- up_read(&adev->reset_sem);
+ down_read_trylock(&adev->reset_domain->sem)) {
+ amdgpu_kiq_wreg(adev, reg, v, 0);
+ up_read(&adev->reset_domain->sem);
} else {
writel(v, ((void __iomem *)adev->rmmio) + (reg * 4));
}
@@ -443,220 +872,240 @@ void amdgpu_device_wreg(struct amdgpu_device *adev,
trace_amdgpu_device_wreg(adev->pdev->device, reg, v);
}
-/*
- * amdgpu_mm_wreg_mmio_rlc - write register either with mmio or with RLC path if in range
+/**
+ * amdgpu_mm_wreg_mmio_rlc - write register either with direct/indirect mmio or with RLC path if in range
*
- * this function is invoked only the debugfs register access
- * */
+ * @adev: amdgpu_device pointer
+ * @reg: mmio/rlc register
+ * @v: value to write
+ * @xcc_id: xcc accelerated compute core id
+ *
+ * this function is invoked only for the debugfs register access
+ */
void amdgpu_mm_wreg_mmio_rlc(struct amdgpu_device *adev,
- uint32_t reg, uint32_t v)
+ uint32_t reg, uint32_t v,
+ uint32_t xcc_id)
{
- if (adev->in_pci_err_recovery)
+ if (amdgpu_device_skip_hw_access(adev))
return;
if (amdgpu_sriov_fullaccess(adev) &&
adev->gfx.rlc.funcs &&
adev->gfx.rlc.funcs->is_rlcg_access_range) {
if (adev->gfx.rlc.funcs->is_rlcg_access_range(adev, reg))
- return adev->gfx.rlc.funcs->rlcg_wreg(adev, reg, v);
+ return amdgpu_sriov_wreg(adev, reg, v, 0, 0, xcc_id);
+ } else if ((reg * 4) >= adev->rmmio_size) {
+ adev->pcie_wreg(adev, reg * 4, v);
} else {
writel(v, ((void __iomem *)adev->rmmio) + (reg * 4));
}
}
/**
- * amdgpu_io_rreg - read an IO register
- *
- * @adev: amdgpu_device pointer
- * @reg: dword aligned register offset
- *
- * Returns the 32 bit value from the offset specified.
- */
-u32 amdgpu_io_rreg(struct amdgpu_device *adev, u32 reg)
-{
- if (adev->in_pci_err_recovery)
- return 0;
-
- if ((reg * 4) < adev->rio_mem_size)
- return ioread32(adev->rio_mem + (reg * 4));
- else {
- iowrite32((reg * 4), adev->rio_mem + (mmMM_INDEX * 4));
- return ioread32(adev->rio_mem + (mmMM_DATA * 4));
- }
-}
-
-/**
- * amdgpu_io_wreg - write to an IO register
+ * amdgpu_device_xcc_wreg - write to a memory mapped IO or indirect register with specific XCC
*
* @adev: amdgpu_device pointer
* @reg: dword aligned register offset
* @v: 32 bit value to write to the register
+ * @acc_flags: access flags which require special behavior
+ * @xcc_id: xcc accelerated compute core id
*
* Writes the value specified to the offset specified.
*/
-void amdgpu_io_wreg(struct amdgpu_device *adev, u32 reg, u32 v)
+void amdgpu_device_xcc_wreg(struct amdgpu_device *adev,
+ uint32_t reg, uint32_t v,
+ uint32_t acc_flags, uint32_t xcc_id)
{
- if (adev->in_pci_err_recovery)
+ uint32_t rlcg_flag;
+
+ if (amdgpu_device_skip_hw_access(adev))
return;
- if ((reg * 4) < adev->rio_mem_size)
- iowrite32(v, adev->rio_mem + (reg * 4));
- else {
- iowrite32((reg * 4), adev->rio_mem + (mmMM_INDEX * 4));
- iowrite32(v, adev->rio_mem + (mmMM_DATA * 4));
+ if ((reg * 4) < adev->rmmio_size) {
+ if (amdgpu_sriov_vf(adev) &&
+ !amdgpu_sriov_runtime(adev) &&
+ adev->gfx.rlc.rlcg_reg_access_supported &&
+ amdgpu_virt_get_rlcg_reg_access_flag(adev, acc_flags,
+ GC_HWIP, true,
+ &rlcg_flag)) {
+ amdgpu_virt_rlcg_reg_rw(adev, reg, v, rlcg_flag, GET_INST(GC, xcc_id));
+ } else if (!(acc_flags & AMDGPU_REGS_NO_KIQ) &&
+ amdgpu_sriov_runtime(adev) &&
+ down_read_trylock(&adev->reset_domain->sem)) {
+ amdgpu_kiq_wreg(adev, reg, v, xcc_id);
+ up_read(&adev->reset_domain->sem);
+ } else {
+ writel(v, ((void __iomem *)adev->rmmio) + (reg * 4));
+ }
+ } else {
+ adev->pcie_wreg(adev, reg * 4, v);
}
}
/**
- * amdgpu_mm_rdoorbell - read a doorbell dword
+ * amdgpu_device_indirect_rreg - read an indirect register
*
* @adev: amdgpu_device pointer
- * @index: doorbell index
+ * @reg_addr: indirect register address to read from
*
- * Returns the value in the doorbell aperture at the
- * requested doorbell index (CIK).
+ * Returns the value of indirect register @reg_addr
*/
-u32 amdgpu_mm_rdoorbell(struct amdgpu_device *adev, u32 index)
+u32 amdgpu_device_indirect_rreg(struct amdgpu_device *adev,
+ u32 reg_addr)
{
- if (adev->in_pci_err_recovery)
- return 0;
+ unsigned long flags, pcie_index, pcie_data;
+ void __iomem *pcie_index_offset;
+ void __iomem *pcie_data_offset;
+ u32 r;
- if (index < adev->doorbell.num_doorbells) {
- return readl(adev->doorbell.ptr + index);
- } else {
- DRM_ERROR("reading beyond doorbell aperture: 0x%08x!\n", index);
- return 0;
- }
+ pcie_index = adev->nbio.funcs->get_pcie_index_offset(adev);
+ pcie_data = adev->nbio.funcs->get_pcie_data_offset(adev);
+
+ spin_lock_irqsave(&adev->pcie_idx_lock, flags);
+ pcie_index_offset = (void __iomem *)adev->rmmio + pcie_index * 4;
+ pcie_data_offset = (void __iomem *)adev->rmmio + pcie_data * 4;
+
+ writel(reg_addr, pcie_index_offset);
+ readl(pcie_index_offset);
+ r = readl(pcie_data_offset);
+ spin_unlock_irqrestore(&adev->pcie_idx_lock, flags);
+
+ return r;
}
-/**
- * amdgpu_mm_wdoorbell - write a doorbell dword
- *
- * @adev: amdgpu_device pointer
- * @index: doorbell index
- * @v: value to write
- *
- * Writes @v to the doorbell aperture at the
- * requested doorbell index (CIK).
- */
-void amdgpu_mm_wdoorbell(struct amdgpu_device *adev, u32 index, u32 v)
+u32 amdgpu_device_indirect_rreg_ext(struct amdgpu_device *adev,
+ u64 reg_addr)
{
- if (adev->in_pci_err_recovery)
- return;
+ unsigned long flags, pcie_index, pcie_index_hi, pcie_data;
+ u32 r;
+ void __iomem *pcie_index_offset;
+ void __iomem *pcie_index_hi_offset;
+ void __iomem *pcie_data_offset;
- if (index < adev->doorbell.num_doorbells) {
- writel(v, adev->doorbell.ptr + index);
+ if (unlikely(!adev->nbio.funcs)) {
+ pcie_index = AMDGPU_PCIE_INDEX_FALLBACK;
+ pcie_data = AMDGPU_PCIE_DATA_FALLBACK;
} else {
- DRM_ERROR("writing beyond doorbell aperture: 0x%08x!\n", index);
+ pcie_index = adev->nbio.funcs->get_pcie_index_offset(adev);
+ pcie_data = adev->nbio.funcs->get_pcie_data_offset(adev);
}
-}
-/**
- * amdgpu_mm_rdoorbell64 - read a doorbell Qword
- *
- * @adev: amdgpu_device pointer
- * @index: doorbell index
- *
- * Returns the value in the doorbell aperture at the
- * requested doorbell index (VEGA10+).
- */
-u64 amdgpu_mm_rdoorbell64(struct amdgpu_device *adev, u32 index)
-{
- if (adev->in_pci_err_recovery)
- return 0;
-
- if (index < adev->doorbell.num_doorbells) {
- return atomic64_read((atomic64_t *)(adev->doorbell.ptr + index));
+ if (reg_addr >> 32) {
+ if (unlikely(!adev->nbio.funcs))
+ pcie_index_hi = AMDGPU_PCIE_INDEX_HI_FALLBACK;
+ else
+ pcie_index_hi = adev->nbio.funcs->get_pcie_index_hi_offset(adev);
} else {
- DRM_ERROR("reading beyond doorbell aperture: 0x%08x!\n", index);
- return 0;
+ pcie_index_hi = 0;
}
-}
-/**
- * amdgpu_mm_wdoorbell64 - write a doorbell Qword
- *
- * @adev: amdgpu_device pointer
- * @index: doorbell index
- * @v: value to write
- *
- * Writes @v to the doorbell aperture at the
- * requested doorbell index (VEGA10+).
- */
-void amdgpu_mm_wdoorbell64(struct amdgpu_device *adev, u32 index, u64 v)
-{
- if (adev->in_pci_err_recovery)
- return;
+ spin_lock_irqsave(&adev->pcie_idx_lock, flags);
+ pcie_index_offset = (void __iomem *)adev->rmmio + pcie_index * 4;
+ pcie_data_offset = (void __iomem *)adev->rmmio + pcie_data * 4;
+ if (pcie_index_hi != 0)
+ pcie_index_hi_offset = (void __iomem *)adev->rmmio +
+ pcie_index_hi * 4;
- if (index < adev->doorbell.num_doorbells) {
- atomic64_set((atomic64_t *)(adev->doorbell.ptr + index), v);
- } else {
- DRM_ERROR("writing beyond doorbell aperture: 0x%08x!\n", index);
+ writel(reg_addr, pcie_index_offset);
+ readl(pcie_index_offset);
+ if (pcie_index_hi != 0) {
+ writel((reg_addr >> 32) & 0xff, pcie_index_hi_offset);
+ readl(pcie_index_hi_offset);
}
+ r = readl(pcie_data_offset);
+
+ /* clear the high bits */
+ if (pcie_index_hi != 0) {
+ writel(0, pcie_index_hi_offset);
+ readl(pcie_index_hi_offset);
+ }
+
+ spin_unlock_irqrestore(&adev->pcie_idx_lock, flags);
+
+ return r;
}
/**
- * amdgpu_device_indirect_rreg - read an indirect register
+ * amdgpu_device_indirect_rreg64 - read a 64bits indirect register
*
* @adev: amdgpu_device pointer
- * @pcie_index: mmio register offset
- * @pcie_data: mmio register offset
* @reg_addr: indirect register address to read from
*
* Returns the value of indirect register @reg_addr
*/
-u32 amdgpu_device_indirect_rreg(struct amdgpu_device *adev,
- u32 pcie_index, u32 pcie_data,
- u32 reg_addr)
+u64 amdgpu_device_indirect_rreg64(struct amdgpu_device *adev,
+ u32 reg_addr)
{
- unsigned long flags;
- u32 r;
+ unsigned long flags, pcie_index, pcie_data;
void __iomem *pcie_index_offset;
void __iomem *pcie_data_offset;
+ u64 r;
+
+ pcie_index = adev->nbio.funcs->get_pcie_index_offset(adev);
+ pcie_data = adev->nbio.funcs->get_pcie_data_offset(adev);
spin_lock_irqsave(&adev->pcie_idx_lock, flags);
pcie_index_offset = (void __iomem *)adev->rmmio + pcie_index * 4;
pcie_data_offset = (void __iomem *)adev->rmmio + pcie_data * 4;
+ /* read low 32 bits */
writel(reg_addr, pcie_index_offset);
readl(pcie_index_offset);
r = readl(pcie_data_offset);
+ /* read high 32 bits */
+ writel(reg_addr + 4, pcie_index_offset);
+ readl(pcie_index_offset);
+ r |= ((u64)readl(pcie_data_offset) << 32);
spin_unlock_irqrestore(&adev->pcie_idx_lock, flags);
return r;
}
-/**
- * amdgpu_device_indirect_rreg64 - read a 64bits indirect register
- *
- * @adev: amdgpu_device pointer
- * @pcie_index: mmio register offset
- * @pcie_data: mmio register offset
- * @reg_addr: indirect register address to read from
- *
- * Returns the value of indirect register @reg_addr
- */
-u64 amdgpu_device_indirect_rreg64(struct amdgpu_device *adev,
- u32 pcie_index, u32 pcie_data,
- u32 reg_addr)
+u64 amdgpu_device_indirect_rreg64_ext(struct amdgpu_device *adev,
+ u64 reg_addr)
{
- unsigned long flags;
- u64 r;
+ unsigned long flags, pcie_index, pcie_data;
+ unsigned long pcie_index_hi = 0;
void __iomem *pcie_index_offset;
+ void __iomem *pcie_index_hi_offset;
void __iomem *pcie_data_offset;
+ u64 r;
+
+ pcie_index = adev->nbio.funcs->get_pcie_index_offset(adev);
+ pcie_data = adev->nbio.funcs->get_pcie_data_offset(adev);
+ if ((reg_addr >> 32) && (adev->nbio.funcs->get_pcie_index_hi_offset))
+ pcie_index_hi = adev->nbio.funcs->get_pcie_index_hi_offset(adev);
spin_lock_irqsave(&adev->pcie_idx_lock, flags);
pcie_index_offset = (void __iomem *)adev->rmmio + pcie_index * 4;
pcie_data_offset = (void __iomem *)adev->rmmio + pcie_data * 4;
+ if (pcie_index_hi != 0)
+ pcie_index_hi_offset = (void __iomem *)adev->rmmio +
+ pcie_index_hi * 4;
/* read low 32 bits */
writel(reg_addr, pcie_index_offset);
readl(pcie_index_offset);
+ if (pcie_index_hi != 0) {
+ writel((reg_addr >> 32) & 0xff, pcie_index_hi_offset);
+ readl(pcie_index_hi_offset);
+ }
r = readl(pcie_data_offset);
/* read high 32 bits */
writel(reg_addr + 4, pcie_index_offset);
readl(pcie_index_offset);
+ if (pcie_index_hi != 0) {
+ writel((reg_addr >> 32) & 0xff, pcie_index_hi_offset);
+ readl(pcie_index_hi_offset);
+ }
r |= ((u64)readl(pcie_data_offset) << 32);
+
+ /* clear the high bits */
+ if (pcie_index_hi != 0) {
+ writel(0, pcie_index_hi_offset);
+ readl(pcie_index_hi_offset);
+ }
+
spin_unlock_irqrestore(&adev->pcie_idx_lock, flags);
return r;
@@ -666,28 +1115,68 @@ u64 amdgpu_device_indirect_rreg64(struct amdgpu_device *adev,
* amdgpu_device_indirect_wreg - write an indirect register address
*
* @adev: amdgpu_device pointer
- * @pcie_index: mmio register offset
- * @pcie_data: mmio register offset
* @reg_addr: indirect register offset
* @reg_data: indirect register data
*
*/
void amdgpu_device_indirect_wreg(struct amdgpu_device *adev,
- u32 pcie_index, u32 pcie_data,
u32 reg_addr, u32 reg_data)
{
- unsigned long flags;
+ unsigned long flags, pcie_index, pcie_data;
+ void __iomem *pcie_index_offset;
+ void __iomem *pcie_data_offset;
+
+ pcie_index = adev->nbio.funcs->get_pcie_index_offset(adev);
+ pcie_data = adev->nbio.funcs->get_pcie_data_offset(adev);
+
+ spin_lock_irqsave(&adev->pcie_idx_lock, flags);
+ pcie_index_offset = (void __iomem *)adev->rmmio + pcie_index * 4;
+ pcie_data_offset = (void __iomem *)adev->rmmio + pcie_data * 4;
+
+ writel(reg_addr, pcie_index_offset);
+ readl(pcie_index_offset);
+ writel(reg_data, pcie_data_offset);
+ readl(pcie_data_offset);
+ spin_unlock_irqrestore(&adev->pcie_idx_lock, flags);
+}
+
+void amdgpu_device_indirect_wreg_ext(struct amdgpu_device *adev,
+ u64 reg_addr, u32 reg_data)
+{
+ unsigned long flags, pcie_index, pcie_index_hi, pcie_data;
void __iomem *pcie_index_offset;
+ void __iomem *pcie_index_hi_offset;
void __iomem *pcie_data_offset;
+ pcie_index = adev->nbio.funcs->get_pcie_index_offset(adev);
+ pcie_data = adev->nbio.funcs->get_pcie_data_offset(adev);
+ if ((reg_addr >> 32) && (adev->nbio.funcs->get_pcie_index_hi_offset))
+ pcie_index_hi = adev->nbio.funcs->get_pcie_index_hi_offset(adev);
+ else
+ pcie_index_hi = 0;
+
spin_lock_irqsave(&adev->pcie_idx_lock, flags);
pcie_index_offset = (void __iomem *)adev->rmmio + pcie_index * 4;
pcie_data_offset = (void __iomem *)adev->rmmio + pcie_data * 4;
+ if (pcie_index_hi != 0)
+ pcie_index_hi_offset = (void __iomem *)adev->rmmio +
+ pcie_index_hi * 4;
writel(reg_addr, pcie_index_offset);
readl(pcie_index_offset);
+ if (pcie_index_hi != 0) {
+ writel((reg_addr >> 32) & 0xff, pcie_index_hi_offset);
+ readl(pcie_index_hi_offset);
+ }
writel(reg_data, pcie_data_offset);
readl(pcie_data_offset);
+
+ /* clear the high bits */
+ if (pcie_index_hi != 0) {
+ writel(0, pcie_index_hi_offset);
+ readl(pcie_index_hi_offset);
+ }
+
spin_unlock_irqrestore(&adev->pcie_idx_lock, flags);
}
@@ -695,20 +1184,20 @@ void amdgpu_device_indirect_wreg(struct amdgpu_device *adev,
* amdgpu_device_indirect_wreg64 - write a 64bits indirect register address
*
* @adev: amdgpu_device pointer
- * @pcie_index: mmio register offset
- * @pcie_data: mmio register offset
* @reg_addr: indirect register offset
* @reg_data: indirect register data
*
*/
void amdgpu_device_indirect_wreg64(struct amdgpu_device *adev,
- u32 pcie_index, u32 pcie_data,
u32 reg_addr, u64 reg_data)
{
- unsigned long flags;
+ unsigned long flags, pcie_index, pcie_data;
void __iomem *pcie_index_offset;
void __iomem *pcie_data_offset;
+ pcie_index = adev->nbio.funcs->get_pcie_index_offset(adev);
+ pcie_data = adev->nbio.funcs->get_pcie_data_offset(adev);
+
spin_lock_irqsave(&adev->pcie_idx_lock, flags);
pcie_index_offset = (void __iomem *)adev->rmmio + pcie_index * 4;
pcie_data_offset = (void __iomem *)adev->rmmio + pcie_data * 4;
@@ -726,6 +1215,67 @@ void amdgpu_device_indirect_wreg64(struct amdgpu_device *adev,
spin_unlock_irqrestore(&adev->pcie_idx_lock, flags);
}
+void amdgpu_device_indirect_wreg64_ext(struct amdgpu_device *adev,
+ u64 reg_addr, u64 reg_data)
+{
+ unsigned long flags, pcie_index, pcie_data;
+ unsigned long pcie_index_hi = 0;
+ void __iomem *pcie_index_offset;
+ void __iomem *pcie_index_hi_offset;
+ void __iomem *pcie_data_offset;
+
+ pcie_index = adev->nbio.funcs->get_pcie_index_offset(adev);
+ pcie_data = adev->nbio.funcs->get_pcie_data_offset(adev);
+ if ((reg_addr >> 32) && (adev->nbio.funcs->get_pcie_index_hi_offset))
+ pcie_index_hi = adev->nbio.funcs->get_pcie_index_hi_offset(adev);
+
+ spin_lock_irqsave(&adev->pcie_idx_lock, flags);
+ pcie_index_offset = (void __iomem *)adev->rmmio + pcie_index * 4;
+ pcie_data_offset = (void __iomem *)adev->rmmio + pcie_data * 4;
+ if (pcie_index_hi != 0)
+ pcie_index_hi_offset = (void __iomem *)adev->rmmio +
+ pcie_index_hi * 4;
+
+ /* write low 32 bits */
+ writel(reg_addr, pcie_index_offset);
+ readl(pcie_index_offset);
+ if (pcie_index_hi != 0) {
+ writel((reg_addr >> 32) & 0xff, pcie_index_hi_offset);
+ readl(pcie_index_hi_offset);
+ }
+ writel((u32)(reg_data & 0xffffffffULL), pcie_data_offset);
+ readl(pcie_data_offset);
+ /* write high 32 bits */
+ writel(reg_addr + 4, pcie_index_offset);
+ readl(pcie_index_offset);
+ if (pcie_index_hi != 0) {
+ writel((reg_addr >> 32) & 0xff, pcie_index_hi_offset);
+ readl(pcie_index_hi_offset);
+ }
+ writel((u32)(reg_data >> 32), pcie_data_offset);
+ readl(pcie_data_offset);
+
+ /* clear the high bits */
+ if (pcie_index_hi != 0) {
+ writel(0, pcie_index_hi_offset);
+ readl(pcie_index_hi_offset);
+ }
+
+ spin_unlock_irqrestore(&adev->pcie_idx_lock, flags);
+}
+
+/**
+ * amdgpu_device_get_rev_id - query device rev_id
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * Return device rev_id
+ */
+u32 amdgpu_device_get_rev_id(struct amdgpu_device *adev)
+{
+ return adev->nbio.funcs->get_rev_id(adev);
+}
+
/**
* amdgpu_invalid_rreg - dummy reg read function
*
@@ -738,7 +1288,14 @@ void amdgpu_device_indirect_wreg64(struct amdgpu_device *adev,
*/
static uint32_t amdgpu_invalid_rreg(struct amdgpu_device *adev, uint32_t reg)
{
- DRM_ERROR("Invalid callback to read register 0x%04X\n", reg);
+ dev_err(adev->dev, "Invalid callback to read register 0x%04X\n", reg);
+ BUG();
+ return 0;
+}
+
+static uint32_t amdgpu_invalid_rreg_ext(struct amdgpu_device *adev, uint64_t reg)
+{
+ dev_err(adev->dev, "Invalid callback to read register 0x%llX\n", reg);
BUG();
return 0;
}
@@ -755,8 +1312,17 @@ static uint32_t amdgpu_invalid_rreg(struct amdgpu_device *adev, uint32_t reg)
*/
static void amdgpu_invalid_wreg(struct amdgpu_device *adev, uint32_t reg, uint32_t v)
{
- DRM_ERROR("Invalid callback to write register 0x%04X with 0x%08X\n",
- reg, v);
+ dev_err(adev->dev,
+ "Invalid callback to write register 0x%04X with 0x%08X\n", reg,
+ v);
+ BUG();
+}
+
+static void amdgpu_invalid_wreg_ext(struct amdgpu_device *adev, uint64_t reg, uint32_t v)
+{
+ dev_err(adev->dev,
+ "Invalid callback to write register 0x%llX with 0x%08X\n", reg,
+ v);
BUG();
}
@@ -772,7 +1338,15 @@ static void amdgpu_invalid_wreg(struct amdgpu_device *adev, uint32_t reg, uint32
*/
static uint64_t amdgpu_invalid_rreg64(struct amdgpu_device *adev, uint32_t reg)
{
- DRM_ERROR("Invalid callback to read 64 bit register 0x%04X\n", reg);
+ dev_err(adev->dev, "Invalid callback to read 64 bit register 0x%04X\n",
+ reg);
+ BUG();
+ return 0;
+}
+
+static uint64_t amdgpu_invalid_rreg64_ext(struct amdgpu_device *adev, uint64_t reg)
+{
+ dev_err(adev->dev, "Invalid callback to read register 0x%llX\n", reg);
BUG();
return 0;
}
@@ -789,8 +1363,17 @@ static uint64_t amdgpu_invalid_rreg64(struct amdgpu_device *adev, uint32_t reg)
*/
static void amdgpu_invalid_wreg64(struct amdgpu_device *adev, uint32_t reg, uint64_t v)
{
- DRM_ERROR("Invalid callback to write 64 bit register 0x%04X with 0x%08llX\n",
- reg, v);
+ dev_err(adev->dev,
+ "Invalid callback to write 64 bit register 0x%04X with 0x%08llX\n",
+ reg, v);
+ BUG();
+}
+
+static void amdgpu_invalid_wreg64_ext(struct amdgpu_device *adev, uint64_t reg, uint64_t v)
+{
+ dev_err(adev->dev,
+ "Invalid callback to write 64 bit register 0x%llX with 0x%08llX\n",
+ reg, v);
BUG();
}
@@ -808,8 +1391,9 @@ static void amdgpu_invalid_wreg64(struct amdgpu_device *adev, uint32_t reg, uint
static uint32_t amdgpu_block_invalid_rreg(struct amdgpu_device *adev,
uint32_t block, uint32_t reg)
{
- DRM_ERROR("Invalid callback to read register 0x%04X in block 0x%04X\n",
- reg, block);
+ dev_err(adev->dev,
+ "Invalid callback to read register 0x%04X in block 0x%04X\n",
+ reg, block);
BUG();
return 0;
}
@@ -829,11 +1413,23 @@ static void amdgpu_block_invalid_wreg(struct amdgpu_device *adev,
uint32_t block,
uint32_t reg, uint32_t v)
{
- DRM_ERROR("Invalid block callback to write register 0x%04X in block 0x%04X with 0x%08X\n",
- reg, block, v);
+ dev_err(adev->dev,
+ "Invalid block callback to write register 0x%04X in block 0x%04X with 0x%08X\n",
+ reg, block, v);
BUG();
}
+static uint32_t amdgpu_device_get_vbios_flags(struct amdgpu_device *adev)
+{
+ if (hweight32(adev->aid_mask) && (adev->flags & AMD_IS_APU))
+ return AMDGPU_VBIOS_SKIP;
+
+ if (hweight32(adev->aid_mask) && amdgpu_passthrough(adev))
+ return AMDGPU_VBIOS_OPTIONAL;
+
+ return 0;
+}
+
/**
* amdgpu_device_asic_init - Wrapper for atom asic_init
*
@@ -843,38 +1439,62 @@ static void amdgpu_block_invalid_wreg(struct amdgpu_device *adev,
*/
static int amdgpu_device_asic_init(struct amdgpu_device *adev)
{
+ uint32_t flags;
+ bool optional;
+ int ret;
+
amdgpu_asic_pre_asic_init(adev);
+ flags = amdgpu_device_get_vbios_flags(adev);
+ optional = !!(flags & (AMDGPU_VBIOS_OPTIONAL | AMDGPU_VBIOS_SKIP));
+
+ if (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 3) ||
+ amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 4) ||
+ amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 5, 0) ||
+ amdgpu_ip_version(adev, GC_HWIP, 0) >= IP_VERSION(11, 0, 0)) {
+ amdgpu_psp_wait_for_bootloader(adev);
+ if (optional && !adev->bios)
+ return 0;
- return amdgpu_atom_asic_init(adev->mode_info.atom_context);
+ ret = amdgpu_atomfirmware_asic_init(adev, true);
+ return ret;
+ } else {
+ if (optional && !adev->bios)
+ return 0;
+
+ return amdgpu_atom_asic_init(adev->mode_info.atom_context);
+ }
+
+ return 0;
}
/**
- * amdgpu_device_vram_scratch_init - allocate the VRAM scratch page
+ * amdgpu_device_mem_scratch_init - allocate the VRAM scratch page
*
* @adev: amdgpu_device pointer
*
* Allocates a scratch page of VRAM for use by various things in the
* driver.
*/
-static int amdgpu_device_vram_scratch_init(struct amdgpu_device *adev)
+static int amdgpu_device_mem_scratch_init(struct amdgpu_device *adev)
{
- return amdgpu_bo_create_kernel(adev, AMDGPU_GPU_PAGE_SIZE,
- PAGE_SIZE, AMDGPU_GEM_DOMAIN_VRAM,
- &adev->vram_scratch.robj,
- &adev->vram_scratch.gpu_addr,
- (void **)&adev->vram_scratch.ptr);
+ return amdgpu_bo_create_kernel(adev, AMDGPU_GPU_PAGE_SIZE, PAGE_SIZE,
+ AMDGPU_GEM_DOMAIN_VRAM |
+ AMDGPU_GEM_DOMAIN_GTT,
+ &adev->mem_scratch.robj,
+ &adev->mem_scratch.gpu_addr,
+ (void **)&adev->mem_scratch.ptr);
}
/**
- * amdgpu_device_vram_scratch_fini - Free the VRAM scratch page
+ * amdgpu_device_mem_scratch_fini - Free the VRAM scratch page
*
* @adev: amdgpu_device pointer
*
* Frees the VRAM scratch page.
*/
-static void amdgpu_device_vram_scratch_fini(struct amdgpu_device *adev)
+static void amdgpu_device_mem_scratch_fini(struct amdgpu_device *adev)
{
- amdgpu_bo_free_kernel(&adev->vram_scratch.robj, NULL, NULL);
+ amdgpu_bo_free_kernel(&adev->mem_scratch.robj, NULL, NULL);
}
/**
@@ -884,7 +1504,7 @@ static void amdgpu_device_vram_scratch_fini(struct amdgpu_device *adev)
* @registers: pointer to the register array
* @array_size: size of the register array
*
- * Programs an array or registers with and and or masks.
+ * Programs an array or registers with and or masks.
* This is a helper for setting golden registers.
*/
void amdgpu_device_program_register_sequence(struct amdgpu_device *adev,
@@ -897,7 +1517,7 @@ void amdgpu_device_program_register_sequence(struct amdgpu_device *adev,
if (array_size % 3)
return;
- for (i = 0; i < array_size; i +=3) {
+ for (i = 0; i < array_size; i += 3) {
reg = registers[i + 0];
and_mask = registers[i + 1];
or_mask = registers[i + 2];
@@ -942,76 +1562,6 @@ int amdgpu_device_pci_reset(struct amdgpu_device *adev)
}
/*
- * GPU doorbell aperture helpers function.
- */
-/**
- * amdgpu_device_doorbell_init - Init doorbell driver information.
- *
- * @adev: amdgpu_device pointer
- *
- * Init doorbell driver information (CIK)
- * Returns 0 on success, error on failure.
- */
-static int amdgpu_device_doorbell_init(struct amdgpu_device *adev)
-{
-
- /* No doorbell on SI hardware generation */
- if (adev->asic_type < CHIP_BONAIRE) {
- adev->doorbell.base = 0;
- adev->doorbell.size = 0;
- adev->doorbell.num_doorbells = 0;
- adev->doorbell.ptr = NULL;
- return 0;
- }
-
- if (pci_resource_flags(adev->pdev, 2) & IORESOURCE_UNSET)
- return -EINVAL;
-
- amdgpu_asic_init_doorbell_index(adev);
-
- /* doorbell bar mapping */
- adev->doorbell.base = pci_resource_start(adev->pdev, 2);
- adev->doorbell.size = pci_resource_len(adev->pdev, 2);
-
- adev->doorbell.num_doorbells = min_t(u32, adev->doorbell.size / sizeof(u32),
- adev->doorbell_index.max_assignment+1);
- if (adev->doorbell.num_doorbells == 0)
- return -EINVAL;
-
- /* For Vega, reserve and map two pages on doorbell BAR since SDMA
- * paging queue doorbell use the second page. The
- * AMDGPU_DOORBELL64_MAX_ASSIGNMENT definition assumes all the
- * doorbells are in the first page. So with paging queue enabled,
- * the max num_doorbells should + 1 page (0x400 in dword)
- */
- if (adev->asic_type >= CHIP_VEGA10)
- adev->doorbell.num_doorbells += 0x400;
-
- adev->doorbell.ptr = ioremap(adev->doorbell.base,
- adev->doorbell.num_doorbells *
- sizeof(u32));
- if (adev->doorbell.ptr == NULL)
- return -ENOMEM;
-
- return 0;
-}
-
-/**
- * amdgpu_device_doorbell_fini - Tear down doorbell driver information.
- *
- * @adev: amdgpu_device pointer
- *
- * Tear down doorbell driver information (CIK)
- */
-static void amdgpu_device_doorbell_fini(struct amdgpu_device *adev)
-{
- iounmap(adev->doorbell.ptr);
- adev->doorbell.ptr = NULL;
-}
-
-
-
-/*
* amdgpu_device_wb_*()
* Writeback is the method by which the GPU updates special pages in memory
* with the status of certain GPU events (fences, ring pointers,etc.).
@@ -1036,7 +1586,7 @@ static void amdgpu_device_wb_fini(struct amdgpu_device *adev)
}
/**
- * amdgpu_device_wb_init- Init Writeback driver info and allocate memory
+ * amdgpu_device_wb_init - Init Writeback driver info and allocate memory
*
* @adev: amdgpu_device pointer
*
@@ -1080,13 +1630,17 @@ static int amdgpu_device_wb_init(struct amdgpu_device *adev)
*/
int amdgpu_device_wb_get(struct amdgpu_device *adev, u32 *wb)
{
- unsigned long offset = find_first_zero_bit(adev->wb.used, adev->wb.num_wb);
+ unsigned long flags, offset;
+ spin_lock_irqsave(&adev->wb.lock, flags);
+ offset = find_first_zero_bit(adev->wb.used, adev->wb.num_wb);
if (offset < adev->wb.num_wb) {
__set_bit(offset, adev->wb.used);
+ spin_unlock_irqrestore(&adev->wb.lock, flags);
*wb = offset << 3; /* convert to dw offset */
return 0;
} else {
+ spin_unlock_irqrestore(&adev->wb.lock, flags);
return -EINVAL;
}
}
@@ -1101,9 +1655,13 @@ int amdgpu_device_wb_get(struct amdgpu_device *adev, u32 *wb)
*/
void amdgpu_device_wb_free(struct amdgpu_device *adev, u32 wb)
{
+ unsigned long flags;
+
wb >>= 3;
+ spin_lock_irqsave(&adev->wb.lock, flags);
if (wb < adev->wb.num_wb)
__clear_bit(wb, adev->wb.used);
+ spin_unlock_irqrestore(&adev->wb.lock, flags);
}
/**
@@ -1120,14 +1678,33 @@ int amdgpu_device_resize_fb_bar(struct amdgpu_device *adev)
int rbar_size = pci_rebar_bytes_to_size(adev->gmc.real_vram_size);
struct pci_bus *root;
struct resource *res;
- unsigned i;
+ int max_size, r;
+ unsigned int i;
u16 cmd;
- int r;
+
+ if (!IS_ENABLED(CONFIG_PHYS_ADDR_T_64BIT))
+ return 0;
/* Bypass for VF */
if (amdgpu_sriov_vf(adev))
return 0;
+ if (!amdgpu_rebar)
+ return 0;
+
+ /* resizing on Dell G5 SE platforms causes problems with runtime pm */
+ if ((amdgpu_runtime_pm != 0) &&
+ adev->pdev->vendor == PCI_VENDOR_ID_ATI &&
+ adev->pdev->device == 0x731f &&
+ adev->pdev->subsystem_vendor == PCI_VENDOR_ID_DELL)
+ return 0;
+
+ /* PCI_EXT_CAP_ID_VNDR extended capability is located at 0x100 */
+ if (!pci_find_ext_capability(adev->pdev, PCI_EXT_CAP_ID_VNDR))
+ dev_warn(
+ adev->dev,
+ "System can't access extended configuration space, please check!!\n");
+
/* skip if the bios has already enabled large BAR */
if (adev->gmc.real_vram_size &&
(pci_resource_len(adev->pdev, 0) >= adev->gmc.real_vram_size))
@@ -1149,33 +1726,32 @@ int amdgpu_device_resize_fb_bar(struct amdgpu_device *adev)
return 0;
/* Limit the BAR size to what is available */
- rbar_size = min(fls(pci_rebar_get_possible_sizes(adev->pdev, 0)) - 1,
- rbar_size);
+ max_size = pci_rebar_get_max_size(adev->pdev, 0);
+ if (max_size < 0)
+ return 0;
+ rbar_size = min(max_size, rbar_size);
/* Disable memory decoding while we change the BAR addresses and size */
pci_read_config_word(adev->pdev, PCI_COMMAND, &cmd);
pci_write_config_word(adev->pdev, PCI_COMMAND,
cmd & ~PCI_COMMAND_MEMORY);
- /* Free the VRAM and doorbell BAR, we most likely need to move both. */
- amdgpu_device_doorbell_fini(adev);
- if (adev->asic_type >= CHIP_BONAIRE)
- pci_release_resource(adev->pdev, 2);
+ /* Tear down doorbell as resizing will release BARs */
+ amdgpu_doorbell_fini(adev);
- pci_release_resource(adev->pdev, 0);
-
- r = pci_resize_resource(adev->pdev, 0, rbar_size);
+ r = pci_resize_resource(adev->pdev, 0, rbar_size,
+ (adev->asic_type >= CHIP_BONAIRE) ? 1 << 5
+ : 1 << 2);
if (r == -ENOSPC)
- DRM_INFO("Not enough PCI address space for a large BAR.");
+ dev_info(adev->dev,
+ "Not enough PCI address space for a large BAR.");
else if (r && r != -ENOTSUPP)
- DRM_ERROR("Problem resizing BAR0 (%d).", r);
-
- pci_assign_unassigned_bus_resources(adev->pdev->bus);
+ dev_err(adev->dev, "Problem resizing BAR0 (%d).", r);
/* When the doorbell or fb BAR isn't available we have no chance of
* using the device.
*/
- r = amdgpu_device_doorbell_init(adev);
+ r = amdgpu_doorbell_init(adev);
if (r || (pci_resource_flags(adev->pdev, 0) & IORESOURCE_UNSET))
return -ENODEV;
@@ -1198,11 +1774,17 @@ int amdgpu_device_resize_fb_bar(struct amdgpu_device *adev)
*/
bool amdgpu_device_need_post(struct amdgpu_device *adev)
{
- uint32_t reg;
+ uint32_t reg, flags;
if (amdgpu_sriov_vf(adev))
return false;
+ flags = amdgpu_device_get_vbios_flags(adev);
+ if (flags & AMDGPU_VBIOS_SKIP)
+ return false;
+ if ((flags & AMDGPU_VBIOS_OPTIONAL) && !adev->bios)
+ return false;
+
if (amdgpu_passthrough(adev)) {
/* for FIJI: In whole GPU pass-through virtualization case, after VM reboot
* some old smc fw still need driver do vPost otherwise gpu hang, while
@@ -1212,17 +1794,23 @@ bool amdgpu_device_need_post(struct amdgpu_device *adev)
if (adev->asic_type == CHIP_FIJI) {
int err;
uint32_t fw_ver;
+
err = request_firmware(&adev->pm.fw, "amdgpu/fiji_smc.bin", adev->dev);
- /* force vPost if error occured */
+ /* force vPost if error occurred */
if (err)
return true;
fw_ver = *((uint32_t *)adev->pm.fw->data + 69);
+ release_firmware(adev->pm.fw);
if (fw_ver < 0x00160e00)
return true;
}
}
+ /* Don't post if we need to reset whole hive on init */
+ if (adev->init_lvl->level == AMDGPU_INIT_LEVEL_MINIMAL_XGMI)
+ return false;
+
if (adev->has_hw_reset) {
adev->has_hw_reset = false;
return true;
@@ -1241,19 +1829,140 @@ bool amdgpu_device_need_post(struct amdgpu_device *adev)
return true;
}
+/*
+ * Check whether seamless boot is supported.
+ *
+ * So far we only support seamless boot on DCE 3.0 or later.
+ * If users report that it works on older ASICS as well, we may
+ * loosen this.
+ */
+bool amdgpu_device_seamless_boot_supported(struct amdgpu_device *adev)
+{
+ switch (amdgpu_seamless) {
+ case -1:
+ break;
+ case 1:
+ return true;
+ case 0:
+ return false;
+ default:
+ dev_err(adev->dev, "Invalid value for amdgpu.seamless: %d\n",
+ amdgpu_seamless);
+ return false;
+ }
+
+ if (!(adev->flags & AMD_IS_APU))
+ return false;
+
+ if (adev->mman.keep_stolen_vga_memory)
+ return false;
+
+ return amdgpu_ip_version(adev, DCE_HWIP, 0) >= IP_VERSION(3, 0, 0);
+}
+
+/*
+ * Intel hosts such as Rocket Lake, Alder Lake, Raptor Lake and Sapphire Rapids
+ * don't support dynamic speed switching. Until we have confirmation from Intel
+ * that a specific host supports it, it's safer that we keep it disabled for all.
+ *
+ * https://edc.intel.com/content/www/us/en/design/products/platforms/details/raptor-lake-s/13th-generation-core-processors-datasheet-volume-1-of-2/005/pci-express-support/
+ * https://gitlab.freedesktop.org/drm/amd/-/issues/2663
+ */
+static bool amdgpu_device_pcie_dynamic_switching_supported(struct amdgpu_device *adev)
+{
+#if IS_ENABLED(CONFIG_X86)
+ struct cpuinfo_x86 *c = &cpu_data(0);
+
+ /* eGPU change speeds based on USB4 fabric conditions */
+ if (dev_is_removable(adev->dev))
+ return true;
+
+ if (c->x86_vendor == X86_VENDOR_INTEL)
+ return false;
+#endif
+ return true;
+}
+
+static bool amdgpu_device_aspm_support_quirk(struct amdgpu_device *adev)
+{
+ /* Enabling ASPM causes randoms hangs on Tahiti and Oland on Zen4.
+ * It's unclear if this is a platform-specific or GPU-specific issue.
+ * Disable ASPM on SI for the time being.
+ */
+ if (adev->family == AMDGPU_FAMILY_SI)
+ return true;
+
+#if IS_ENABLED(CONFIG_X86)
+ struct cpuinfo_x86 *c = &cpu_data(0);
+
+ if (!(amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(12, 0, 0) ||
+ amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(12, 0, 1)))
+ return false;
+
+ if (c->x86 == 6 &&
+ adev->pm.pcie_gen_mask & CAIL_PCIE_LINK_SPEED_SUPPORT_GEN5) {
+ switch (c->x86_model) {
+ case VFM_MODEL(INTEL_ALDERLAKE):
+ case VFM_MODEL(INTEL_ALDERLAKE_L):
+ case VFM_MODEL(INTEL_RAPTORLAKE):
+ case VFM_MODEL(INTEL_RAPTORLAKE_P):
+ case VFM_MODEL(INTEL_RAPTORLAKE_S):
+ return true;
+ default:
+ return false;
+ }
+ } else {
+ return false;
+ }
+#else
+ return false;
+#endif
+}
+
+/**
+ * amdgpu_device_should_use_aspm - check if the device should program ASPM
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * Confirm whether the module parameter and pcie bridge agree that ASPM should
+ * be set for this device.
+ *
+ * Returns true if it should be used or false if not.
+ */
+bool amdgpu_device_should_use_aspm(struct amdgpu_device *adev)
+{
+ switch (amdgpu_aspm) {
+ case -1:
+ break;
+ case 0:
+ return false;
+ case 1:
+ return true;
+ default:
+ return false;
+ }
+ if (adev->flags & AMD_IS_APU)
+ return false;
+ if (amdgpu_device_aspm_support_quirk(adev))
+ return false;
+ return pcie_aspm_enabled(adev->pdev);
+}
+
/* if we get transitioned to only one device, take VGA back */
/**
* amdgpu_device_vga_set_decode - enable/disable vga decode
*
- * @cookie: amdgpu_device pointer
+ * @pdev: PCI device pointer
* @state: enable/disable vga decode
*
* Enable/disable vga decode (all asics).
* Returns VGA resource flags.
*/
-static unsigned int amdgpu_device_vga_set_decode(void *cookie, bool state)
+static unsigned int amdgpu_device_vga_set_decode(struct pci_dev *pdev,
+ bool state)
{
- struct amdgpu_device *adev = cookie;
+ struct amdgpu_device *adev = drm_to_adev(pci_get_drvdata(pdev));
+
amdgpu_asic_set_vga_state(adev, state);
if (state)
return VGA_RSRC_LEGACY_IO | VGA_RSRC_LEGACY_MEM |
@@ -1276,7 +1985,8 @@ static void amdgpu_device_check_block_size(struct amdgpu_device *adev)
{
/* defines number of bits in page table versus page directory,
* a page is 4KB so we have 12 bits offset, minimum 9 bits in the
- * page table and the remaining bits are in the page directory */
+ * page table and the remaining bits are in the page directory
+ */
if (amdgpu_vm_block_size == -1)
return;
@@ -1320,7 +2030,7 @@ static void amdgpu_device_check_smu_prv_buffer_size(struct amdgpu_device *adev)
return;
if (!is_os_64) {
- DRM_WARN("Not 64-bit OS, feature not supported\n");
+ dev_warn(adev->dev, "Not 64-bit OS, feature not supported\n");
goto def_value;
}
si_meminfo(&si);
@@ -1335,7 +2045,7 @@ static void amdgpu_device_check_smu_prv_buffer_size(struct amdgpu_device *adev)
if (total_memory < dram_size_seven_GB)
goto def_value1;
} else {
- DRM_WARN("Smu memory pool size not supported\n");
+ dev_warn(adev->dev, "Smu memory pool size not supported\n");
goto def_value;
}
adev->pm.smu_prv_buffer_size = amdgpu_smu_memory_pool_size << 28;
@@ -1343,11 +2053,48 @@ static void amdgpu_device_check_smu_prv_buffer_size(struct amdgpu_device *adev)
return;
def_value1:
- DRM_WARN("No enough system memory\n");
+ dev_warn(adev->dev, "No enough system memory\n");
def_value:
adev->pm.smu_prv_buffer_size = 0;
}
+static int amdgpu_device_init_apu_flags(struct amdgpu_device *adev)
+{
+ if (!(adev->flags & AMD_IS_APU) ||
+ adev->asic_type < CHIP_RAVEN)
+ return 0;
+
+ switch (adev->asic_type) {
+ case CHIP_RAVEN:
+ if (adev->pdev->device == 0x15dd)
+ adev->apu_flags |= AMD_APU_IS_RAVEN;
+ if (adev->pdev->device == 0x15d8)
+ adev->apu_flags |= AMD_APU_IS_PICASSO;
+ break;
+ case CHIP_RENOIR:
+ if ((adev->pdev->device == 0x1636) ||
+ (adev->pdev->device == 0x164c))
+ adev->apu_flags |= AMD_APU_IS_RENOIR;
+ else
+ adev->apu_flags |= AMD_APU_IS_GREEN_SARDINE;
+ break;
+ case CHIP_VANGOGH:
+ adev->apu_flags |= AMD_APU_IS_VANGOGH;
+ break;
+ case CHIP_YELLOW_CARP:
+ break;
+ case CHIP_CYAN_SKILLFISH:
+ if ((adev->pdev->device == 0x13FE) ||
+ (adev->pdev->device == 0x143F))
+ adev->apu_flags |= AMD_APU_IS_CYAN_SKILLFISH2;
+ break;
+ default:
+ break;
+ }
+
+ return 0;
+}
+
/**
* amdgpu_device_check_arguments - validate module params
*
@@ -1358,11 +2105,13 @@ def_value:
*/
static int amdgpu_device_check_arguments(struct amdgpu_device *adev)
{
+ int i;
+
if (amdgpu_sched_jobs < 4) {
dev_warn(adev->dev, "sched jobs (%d) must be at least 4\n",
amdgpu_sched_jobs);
amdgpu_sched_jobs = 4;
- } else if (!is_power_of_2(amdgpu_sched_jobs)){
+ } else if (!is_power_of_2(amdgpu_sched_jobs)) {
dev_warn(adev->dev, "sched jobs (%d) must be a power of 2\n",
amdgpu_sched_jobs);
amdgpu_sched_jobs = roundup_pow_of_two(amdgpu_sched_jobs);
@@ -1399,6 +2148,11 @@ static int amdgpu_device_check_arguments(struct amdgpu_device *adev)
amdgpu_sched_hw_submission = roundup_pow_of_two(amdgpu_sched_hw_submission);
}
+ if (amdgpu_reset_method < -1 || amdgpu_reset_method > 4) {
+ dev_warn(adev->dev, "invalid option for reset method, reverting to default\n");
+ amdgpu_reset_method = -1;
+ }
+
amdgpu_device_check_smu_prv_buffer_size(adev);
amdgpu_device_check_vm_size(adev);
@@ -1407,9 +2161,31 @@ static int amdgpu_device_check_arguments(struct amdgpu_device *adev)
adev->firmware.load_type = amdgpu_ucode_get_load_type(adev, amdgpu_fw_load_type);
- amdgpu_gmc_tmz_set(adev);
-
- amdgpu_gmc_noretry_set(adev);
+ for (i = 0; i < MAX_XCP; i++) {
+ switch (amdgpu_enforce_isolation) {
+ case -1:
+ case 0:
+ default:
+ /* disable */
+ adev->enforce_isolation[i] = AMDGPU_ENFORCE_ISOLATION_DISABLE;
+ break;
+ case 1:
+ /* enable */
+ adev->enforce_isolation[i] =
+ AMDGPU_ENFORCE_ISOLATION_ENABLE;
+ break;
+ case 2:
+ /* enable legacy mode */
+ adev->enforce_isolation[i] =
+ AMDGPU_ENFORCE_ISOLATION_ENABLE_LEGACY;
+ break;
+ case 3:
+ /* enable only process isolation without submitting cleaner shader */
+ adev->enforce_isolation[i] =
+ AMDGPU_ENFORCE_ISOLATION_NO_CLEANER_SHADER;
+ break;
+ }
+ }
return 0;
}
@@ -1420,7 +2196,7 @@ static int amdgpu_device_check_arguments(struct amdgpu_device *adev)
* @pdev: pci dev pointer
* @state: vga_switcheroo state
*
- * Callback for the switcheroo driver. Suspends or resumes the
+ * Callback for the switcheroo driver. Suspends or resumes
* the asics before or after it is powered up using ACPI methods.
*/
static void amdgpu_switcheroo_set_state(struct pci_dev *pdev,
@@ -1429,7 +2205,8 @@ static void amdgpu_switcheroo_set_state(struct pci_dev *pdev,
struct drm_device *dev = pci_get_drvdata(pdev);
int r;
- if (amdgpu_device_supports_atpx(dev) && state == VGA_SWITCHEROO_OFF)
+ if (amdgpu_device_supports_px(drm_to_adev(dev)) &&
+ state == VGA_SWITCHEROO_OFF)
return;
if (state == VGA_SWITCHEROO_ON) {
@@ -1441,13 +2218,15 @@ static void amdgpu_switcheroo_set_state(struct pci_dev *pdev,
amdgpu_device_load_pci_state(pdev);
r = pci_enable_device(pdev);
if (r)
- DRM_WARN("pci_enable_device failed (%d)\n", r);
+ dev_warn(&pdev->dev, "pci_enable_device failed (%d)\n",
+ r);
amdgpu_device_resume(dev, true);
dev->switch_power_state = DRM_SWITCH_POWER_ON;
} else {
- pr_info("switched off\n");
+ dev_info(&pdev->dev, "switched off\n");
dev->switch_power_state = DRM_SWITCH_POWER_CHANGING;
+ amdgpu_device_prepare(dev);
amdgpu_device_suspend(dev, true);
amdgpu_device_cache_pci_state(pdev);
/* Shut down the device */
@@ -1470,7 +2249,7 @@ static bool amdgpu_switcheroo_can_switch(struct pci_dev *pdev)
{
struct drm_device *dev = pci_get_drvdata(pdev);
- /*
+ /*
* FIXME: open_count is protected by drm_global_mutex but that would lead to
* locking inversion with the driver load path. And the access here is
* completely racy anyway. So don't bother with locking for now.
@@ -1510,10 +2289,11 @@ int amdgpu_device_ip_set_clockgating_state(void *dev,
if (!adev->ip_blocks[i].version->funcs->set_clockgating_state)
continue;
r = adev->ip_blocks[i].version->funcs->set_clockgating_state(
- (void *)adev, state);
+ &adev->ip_blocks[i], state);
if (r)
- DRM_ERROR("set_clockgating_state of IP block <%s> failed %d\n",
- adev->ip_blocks[i].version->funcs->name, r);
+ dev_err(adev->dev,
+ "set_clockgating_state of IP block <%s> failed %d\n",
+ adev->ip_blocks[i].version->funcs->name, r);
}
return r;
}
@@ -1544,10 +2324,11 @@ int amdgpu_device_ip_set_powergating_state(void *dev,
if (!adev->ip_blocks[i].version->funcs->set_powergating_state)
continue;
r = adev->ip_blocks[i].version->funcs->set_powergating_state(
- (void *)adev, state);
+ &adev->ip_blocks[i], state);
if (r)
- DRM_ERROR("set_powergating_state of IP block <%s> failed %d\n",
- adev->ip_blocks[i].version->funcs->name, r);
+ dev_err(adev->dev,
+ "set_powergating_state of IP block <%s> failed %d\n",
+ adev->ip_blocks[i].version->funcs->name, r);
}
return r;
}
@@ -1564,7 +2345,7 @@ int amdgpu_device_ip_set_powergating_state(void *dev,
* clockgating is enabled.
*/
void amdgpu_device_ip_get_clockgating_state(struct amdgpu_device *adev,
- u32 *flags)
+ u64 *flags)
{
int i;
@@ -1572,7 +2353,8 @@ void amdgpu_device_ip_get_clockgating_state(struct amdgpu_device *adev,
if (!adev->ip_blocks[i].status.valid)
continue;
if (adev->ip_blocks[i].version->funcs->get_clockgating_state)
- adev->ip_blocks[i].version->funcs->get_clockgating_state((void *)adev, flags);
+ adev->ip_blocks[i].version->funcs->get_clockgating_state(
+ &adev->ip_blocks[i], flags);
}
}
@@ -1594,9 +2376,12 @@ int amdgpu_device_ip_wait_for_idle(struct amdgpu_device *adev,
if (!adev->ip_blocks[i].status.valid)
continue;
if (adev->ip_blocks[i].version->type == block_type) {
- r = adev->ip_blocks[i].version->funcs->wait_for_idle((void *)adev);
- if (r)
- return r;
+ if (adev->ip_blocks[i].version->funcs->wait_for_idle) {
+ r = adev->ip_blocks[i].version->funcs->wait_for_idle(
+ &adev->ip_blocks[i]);
+ if (r)
+ return r;
+ }
break;
}
}
@@ -1605,26 +2390,45 @@ int amdgpu_device_ip_wait_for_idle(struct amdgpu_device *adev,
}
/**
- * amdgpu_device_ip_is_idle - is the hardware IP idle
+ * amdgpu_device_ip_is_hw - is the hardware IP enabled
*
* @adev: amdgpu_device pointer
* @block_type: Type of hardware IP (SMU, GFX, UVD, etc.)
*
- * Check if the hardware IP is idle or not.
- * Returns true if it the IP is idle, false if not.
+ * Check if the hardware IP is enable or not.
+ * Returns true if it the IP is enable, false if not.
*/
-bool amdgpu_device_ip_is_idle(struct amdgpu_device *adev,
- enum amd_ip_block_type block_type)
+bool amdgpu_device_ip_is_hw(struct amdgpu_device *adev,
+ enum amd_ip_block_type block_type)
{
int i;
for (i = 0; i < adev->num_ip_blocks; i++) {
- if (!adev->ip_blocks[i].status.valid)
- continue;
if (adev->ip_blocks[i].version->type == block_type)
- return adev->ip_blocks[i].version->funcs->is_idle((void *)adev);
+ return adev->ip_blocks[i].status.hw;
}
- return true;
+ return false;
+}
+
+/**
+ * amdgpu_device_ip_is_valid - is the hardware IP valid
+ *
+ * @adev: amdgpu_device pointer
+ * @block_type: Type of hardware IP (SMU, GFX, UVD, etc.)
+ *
+ * Check if the hardware IP is valid or not.
+ * Returns true if it the IP is valid, false if not.
+ */
+bool amdgpu_device_ip_is_valid(struct amdgpu_device *adev,
+ enum amd_ip_block_type block_type)
+{
+ int i;
+
+ for (i = 0; i < adev->num_ip_blocks; i++) {
+ if (adev->ip_blocks[i].version->type == block_type)
+ return adev->ip_blocks[i].status.valid;
+ }
+ return false;
}
@@ -1675,6 +2479,34 @@ int amdgpu_device_ip_block_version_cmp(struct amdgpu_device *adev,
return 1;
}
+static const char *ip_block_names[] = {
+ [AMD_IP_BLOCK_TYPE_COMMON] = "common",
+ [AMD_IP_BLOCK_TYPE_GMC] = "gmc",
+ [AMD_IP_BLOCK_TYPE_IH] = "ih",
+ [AMD_IP_BLOCK_TYPE_SMC] = "smu",
+ [AMD_IP_BLOCK_TYPE_PSP] = "psp",
+ [AMD_IP_BLOCK_TYPE_DCE] = "dce",
+ [AMD_IP_BLOCK_TYPE_GFX] = "gfx",
+ [AMD_IP_BLOCK_TYPE_SDMA] = "sdma",
+ [AMD_IP_BLOCK_TYPE_UVD] = "uvd",
+ [AMD_IP_BLOCK_TYPE_VCE] = "vce",
+ [AMD_IP_BLOCK_TYPE_ACP] = "acp",
+ [AMD_IP_BLOCK_TYPE_VCN] = "vcn",
+ [AMD_IP_BLOCK_TYPE_MES] = "mes",
+ [AMD_IP_BLOCK_TYPE_JPEG] = "jpeg",
+ [AMD_IP_BLOCK_TYPE_VPE] = "vpe",
+ [AMD_IP_BLOCK_TYPE_UMSCH_MM] = "umsch_mm",
+ [AMD_IP_BLOCK_TYPE_ISP] = "isp",
+ [AMD_IP_BLOCK_TYPE_RAS] = "ras",
+};
+
+static const char *ip_block_name(struct amdgpu_device *adev, enum amd_ip_block_type type)
+{
+ int idx = (int)type;
+
+ return idx < ARRAY_SIZE(ip_block_names) ? ip_block_names[idx] : "unknown";
+}
+
/**
* amdgpu_device_ip_block_add
*
@@ -1690,8 +2522,28 @@ int amdgpu_device_ip_block_add(struct amdgpu_device *adev,
if (!ip_block_version)
return -EINVAL;
- DRM_INFO("add ip block number %d <%s>\n", adev->num_ip_blocks,
- ip_block_version->funcs->name);
+ switch (ip_block_version->type) {
+ case AMD_IP_BLOCK_TYPE_VCN:
+ if (adev->harvest_ip_mask & AMD_HARVEST_IP_VCN_MASK)
+ return 0;
+ break;
+ case AMD_IP_BLOCK_TYPE_JPEG:
+ if (adev->harvest_ip_mask & AMD_HARVEST_IP_JPEG_MASK)
+ return 0;
+ break;
+ default:
+ break;
+ }
+
+ dev_info(adev->dev, "detected ip block number %d <%s_v%d_%d_%d> (%s)\n",
+ adev->num_ip_blocks,
+ ip_block_name(adev, ip_block_version->type),
+ ip_block_version->major,
+ ip_block_version->minor,
+ ip_block_version->rev,
+ ip_block_version->funcs->name);
+
+ adev->ip_blocks[adev->num_ip_blocks].adev = adev;
adev->ip_blocks[adev->num_ip_blocks++].version = ip_block_version;
@@ -1707,7 +2559,7 @@ int amdgpu_device_ip_block_add(struct amdgpu_device *adev,
* the module parameter virtual_display. This feature provides a virtual
* display hardware on headless boards or in virtualized environments.
* This function parses and validates the configuration string specified by
- * the user and configues the virtual display configuration (number of
+ * the user and configures the virtual display configuration (number of
* virtual connectors, crtcs, etc.) specified.
*/
static void amdgpu_device_enable_virtual_display(struct amdgpu_device *adev)
@@ -1746,73 +2598,46 @@ static void amdgpu_device_enable_virtual_display(struct amdgpu_device *adev)
}
}
- DRM_INFO("virtual display string:%s, %s:virtual_display:%d, num_crtc:%d\n",
- amdgpu_virtual_display, pci_address_name,
- adev->enable_virtual_display, adev->mode_info.num_crtc);
+ dev_info(
+ adev->dev,
+ "virtual display string:%s, %s:virtual_display:%d, num_crtc:%d\n",
+ amdgpu_virtual_display, pci_address_name,
+ adev->enable_virtual_display, adev->mode_info.num_crtc);
kfree(pciaddstr);
}
}
+void amdgpu_device_set_sriov_virtual_display(struct amdgpu_device *adev)
+{
+ if (amdgpu_sriov_vf(adev) && !adev->enable_virtual_display) {
+ adev->mode_info.num_crtc = 1;
+ adev->enable_virtual_display = true;
+ dev_info(adev->dev, "virtual_display:%d, num_crtc:%d\n",
+ adev->enable_virtual_display,
+ adev->mode_info.num_crtc);
+ }
+}
+
/**
* amdgpu_device_parse_gpu_info_fw - parse gpu info firmware
*
* @adev: amdgpu_device pointer
*
* Parses the asic configuration parameters specified in the gpu info
- * firmware and makes them availale to the driver for use in configuring
+ * firmware and makes them available to the driver for use in configuring
* the asic.
* Returns 0 on success, -EINVAL on failure.
*/
static int amdgpu_device_parse_gpu_info_fw(struct amdgpu_device *adev)
{
const char *chip_name;
- char fw_name[40];
int err;
const struct gpu_info_firmware_header_v1_0 *hdr;
adev->firmware.gpu_info_fw = NULL;
- if (adev->mman.discovery_bin) {
- amdgpu_discovery_get_gfx_info(adev);
-
- /*
- * FIXME: The bounding box is still needed by Navi12, so
- * temporarily read it from gpu_info firmware. Should be droped
- * when DAL no longer needs it.
- */
- if (adev->asic_type != CHIP_NAVI12)
- return 0;
- }
-
switch (adev->asic_type) {
-#ifdef CONFIG_DRM_AMDGPU_SI
- case CHIP_VERDE:
- case CHIP_TAHITI:
- case CHIP_PITCAIRN:
- case CHIP_OLAND:
- case CHIP_HAINAN:
-#endif
-#ifdef CONFIG_DRM_AMDGPU_CIK
- case CHIP_BONAIRE:
- case CHIP_HAWAII:
- case CHIP_KAVERI:
- case CHIP_KABINI:
- case CHIP_MULLINS:
-#endif
- case CHIP_TOPAZ:
- case CHIP_TONGA:
- case CHIP_FIJI:
- case CHIP_POLARIS10:
- case CHIP_POLARIS11:
- case CHIP_POLARIS12:
- case CHIP_VEGAM:
- case CHIP_CARRIZO:
- case CHIP_STONEY:
- case CHIP_VEGA20:
- case CHIP_SIENNA_CICHLID:
- case CHIP_NAVY_FLOUNDER:
- case CHIP_DIMGREY_CAVEFISH:
default:
return 0;
case CHIP_VEGA10:
@@ -1832,39 +2657,25 @@ static int amdgpu_device_parse_gpu_info_fw(struct amdgpu_device *adev)
case CHIP_ARCTURUS:
chip_name = "arcturus";
break;
- case CHIP_RENOIR:
- if (adev->apu_flags & AMD_APU_IS_RENOIR)
- chip_name = "renoir";
- else
- chip_name = "green_sardine";
- break;
- case CHIP_NAVI10:
- chip_name = "navi10";
- break;
- case CHIP_NAVI14:
- chip_name = "navi14";
- break;
case CHIP_NAVI12:
+ if (adev->discovery.bin)
+ return 0;
chip_name = "navi12";
break;
- case CHIP_VANGOGH:
- chip_name = "vangogh";
+ case CHIP_CYAN_SKILLFISH:
+ if (adev->discovery.bin)
+ return 0;
+ chip_name = "cyan_skillfish";
break;
}
- snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_gpu_info.bin", chip_name);
- err = request_firmware(&adev->firmware.gpu_info_fw, fw_name, adev->dev);
- if (err) {
- dev_err(adev->dev,
- "Failed to load gpu_info firmware \"%s\"\n",
- fw_name);
- goto out;
- }
- err = amdgpu_ucode_validate(adev->firmware.gpu_info_fw);
+ err = amdgpu_ucode_request(adev, &adev->firmware.gpu_info_fw,
+ AMDGPU_UCODE_OPTIONAL,
+ "amdgpu/%s_gpu_info.bin", chip_name);
if (err) {
dev_err(adev->dev,
- "Failed to validate gpu_info firmware \"%s\"\n",
- fw_name);
+ "Failed to get gpu_info firmware \"%s_gpu_info.bin\"\n",
+ chip_name);
goto out;
}
@@ -1879,7 +2690,7 @@ static int amdgpu_device_parse_gpu_info_fw(struct amdgpu_device *adev)
le32_to_cpu(hdr->header.ucode_array_offset_bytes));
/*
- * Should be droped when DAL no longer needs it.
+ * Should be dropped when DAL no longer needs it.
*/
if (adev->asic_type == CHIP_NAVI12)
goto parse_soc_bounding_box;
@@ -1935,6 +2746,24 @@ out:
return err;
}
+static void amdgpu_uid_init(struct amdgpu_device *adev)
+{
+ /* Initialize the UID for the device */
+ adev->uid_info = kzalloc(sizeof(struct amdgpu_uid), GFP_KERNEL);
+ if (!adev->uid_info) {
+ dev_warn(adev->dev, "Failed to allocate memory for UID\n");
+ return;
+ }
+ adev->uid_info->adev = adev;
+}
+
+static void amdgpu_uid_fini(struct amdgpu_device *adev)
+{
+ /* Free the UID memory */
+ kfree(adev->uid_info);
+ adev->uid_info = NULL;
+}
+
/**
* amdgpu_device_ip_early_init - run early init for hardware IPs
*
@@ -1947,6 +2776,10 @@ out:
*/
static int amdgpu_device_ip_early_init(struct amdgpu_device *adev)
{
+ struct amdgpu_ip_block *ip_block;
+ struct pci_dev *parent;
+ bool total, skip_bios;
+ uint32_t bios_flags;
int i, r;
amdgpu_device_enable_virtual_display(adev);
@@ -1955,6 +2788,10 @@ static int amdgpu_device_ip_early_init(struct amdgpu_device *adev)
r = amdgpu_virt_request_full_gpu(adev, true);
if (r)
return r;
+
+ r = amdgpu_virt_init_critical_region(adev);
+ if (r)
+ return r;
}
switch (adev->asic_type) {
@@ -2004,68 +2841,69 @@ static int amdgpu_device_ip_early_init(struct amdgpu_device *adev)
if (r)
return r;
break;
- case CHIP_VEGA10:
- case CHIP_VEGA12:
- case CHIP_VEGA20:
- case CHIP_RAVEN:
- case CHIP_ARCTURUS:
- case CHIP_RENOIR:
- if (adev->flags & AMD_IS_APU)
- adev->family = AMDGPU_FAMILY_RV;
- else
- adev->family = AMDGPU_FAMILY_AI;
-
- r = soc15_set_ip_blocks(adev);
+ default:
+ r = amdgpu_discovery_set_ip_blocks(adev);
if (r)
return r;
break;
- case CHIP_NAVI10:
- case CHIP_NAVI14:
- case CHIP_NAVI12:
- case CHIP_SIENNA_CICHLID:
- case CHIP_NAVY_FLOUNDER:
- case CHIP_DIMGREY_CAVEFISH:
- case CHIP_VANGOGH:
- if (adev->asic_type == CHIP_VANGOGH)
- adev->family = AMDGPU_FAMILY_VGH;
- else
- adev->family = AMDGPU_FAMILY_NV;
+ }
- r = nv_set_ip_blocks(adev);
- if (r)
- return r;
- break;
- default:
- /* FIXME: not supported yet */
- return -EINVAL;
+ /* Check for IP version 9.4.3 with A0 hardware */
+ if (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 3) &&
+ !amdgpu_device_get_rev_id(adev)) {
+ dev_err(adev->dev, "Unsupported A0 hardware\n");
+ return -ENODEV; /* device unsupported - no device error */
}
- amdgpu_amdkfd_device_probe(adev);
+ if (amdgpu_has_atpx() &&
+ (amdgpu_is_atpx_hybrid() ||
+ amdgpu_has_atpx_dgpu_power_cntl()) &&
+ ((adev->flags & AMD_IS_APU) == 0) &&
+ !dev_is_removable(&adev->pdev->dev))
+ adev->flags |= AMD_IS_PX;
+
+ if (!(adev->flags & AMD_IS_APU)) {
+ parent = pcie_find_root_port(adev->pdev);
+ adev->has_pr3 = parent ? pci_pr3_present(parent) : false;
+ }
adev->pm.pp_feature = amdgpu_pp_feature_mask;
if (amdgpu_sriov_vf(adev) || sched_policy == KFD_SCHED_POLICY_NO_HWS)
adev->pm.pp_feature &= ~PP_GFXOFF_MASK;
+ if (amdgpu_sriov_vf(adev) && adev->asic_type == CHIP_SIENNA_CICHLID)
+ adev->pm.pp_feature &= ~PP_OVERDRIVE_MASK;
+ if (!amdgpu_device_pcie_dynamic_switching_supported(adev))
+ adev->pm.pp_feature &= ~PP_PCIE_DPM_MASK;
+ adev->virt.is_xgmi_node_migrate_enabled = false;
+ if (amdgpu_sriov_vf(adev)) {
+ adev->virt.is_xgmi_node_migrate_enabled =
+ amdgpu_ip_version((adev), GC_HWIP, 0) == IP_VERSION(9, 4, 4);
+ }
+
+ total = true;
for (i = 0; i < adev->num_ip_blocks; i++) {
+ ip_block = &adev->ip_blocks[i];
+
if ((amdgpu_ip_block_mask & (1 << i)) == 0) {
- DRM_ERROR("disabled ip block: %d <%s>\n",
- i, adev->ip_blocks[i].version->funcs->name);
+ dev_warn(adev->dev, "disabled ip block: %d <%s>\n", i,
+ adev->ip_blocks[i].version->funcs->name);
adev->ip_blocks[i].status.valid = false;
- } else {
- if (adev->ip_blocks[i].version->funcs->early_init) {
- r = adev->ip_blocks[i].version->funcs->early_init((void *)adev);
- if (r == -ENOENT) {
- adev->ip_blocks[i].status.valid = false;
- } else if (r) {
- DRM_ERROR("early_init of IP block <%s> failed %d\n",
- adev->ip_blocks[i].version->funcs->name, r);
- return r;
- } else {
- adev->ip_blocks[i].status.valid = true;
- }
+ } else if (ip_block->version->funcs->early_init) {
+ r = ip_block->version->funcs->early_init(ip_block);
+ if (r == -ENOENT) {
+ adev->ip_blocks[i].status.valid = false;
+ } else if (r) {
+ dev_err(adev->dev,
+ "early_init of IP block <%s> failed %d\n",
+ adev->ip_blocks[i].version->funcs->name,
+ r);
+ total = false;
} else {
adev->ip_blocks[i].status.valid = true;
}
+ } else {
+ adev->ip_blocks[i].status.valid = true;
}
/* get the vbios after the asic_funcs are set up */
if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON) {
@@ -2073,18 +2911,51 @@ static int amdgpu_device_ip_early_init(struct amdgpu_device *adev)
if (r)
return r;
+ bios_flags = amdgpu_device_get_vbios_flags(adev);
+ skip_bios = !!(bios_flags & AMDGPU_VBIOS_SKIP);
/* Read BIOS */
- if (!amdgpu_get_bios(adev))
- return -EINVAL;
-
- r = amdgpu_atombios_init(adev);
- if (r) {
- dev_err(adev->dev, "amdgpu_atombios_init failed\n");
- amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_ATOMBIOS_INIT_FAIL, 0, 0);
- return r;
+ if (!skip_bios) {
+ bool optional =
+ !!(bios_flags & AMDGPU_VBIOS_OPTIONAL);
+ if (!amdgpu_get_bios(adev) && !optional)
+ return -EINVAL;
+
+ if (optional && !adev->bios)
+ dev_info(
+ adev->dev,
+ "VBIOS image optional, proceeding without VBIOS image");
+
+ if (adev->bios) {
+ r = amdgpu_atombios_init(adev);
+ if (r) {
+ dev_err(adev->dev,
+ "amdgpu_atombios_init failed\n");
+ amdgpu_vf_error_put(
+ adev,
+ AMDGIM_ERROR_VF_ATOMBIOS_INIT_FAIL,
+ 0, 0);
+ return r;
+ }
+ }
}
+
+ /*get pf2vf msg info at it's earliest time*/
+ if (amdgpu_sriov_vf(adev))
+ amdgpu_virt_init_data_exchange(adev);
+
}
}
+ if (!total)
+ return -ENODEV;
+
+ if (adev->gmc.xgmi.supported)
+ amdgpu_xgmi_early_init(adev);
+
+ if (amdgpu_is_multi_aid(adev))
+ amdgpu_uid_init(adev);
+ ip_block = amdgpu_device_ip_get_ip_block(adev, AMD_IP_BLOCK_TYPE_GFX);
+ if (ip_block->status.valid != false)
+ amdgpu_amdkfd_device_probe(adev);
adev->cg_flags &= amdgpu_cg_mask;
adev->pg_flags &= amdgpu_pg_mask;
@@ -2101,13 +2972,18 @@ static int amdgpu_device_ip_hw_init_phase1(struct amdgpu_device *adev)
continue;
if (adev->ip_blocks[i].status.hw)
continue;
+ if (!amdgpu_ip_member_of_hwini(
+ adev, adev->ip_blocks[i].version->type))
+ continue;
if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON ||
(amdgpu_sriov_vf(adev) && (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP)) ||
adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_IH) {
- r = adev->ip_blocks[i].version->funcs->hw_init(adev);
+ r = adev->ip_blocks[i].version->funcs->hw_init(&adev->ip_blocks[i]);
if (r) {
- DRM_ERROR("hw_init of IP block <%s> failed %d\n",
- adev->ip_blocks[i].version->funcs->name, r);
+ dev_err(adev->dev,
+ "hw_init of IP block <%s> failed %d\n",
+ adev->ip_blocks[i].version->funcs->name,
+ r);
return r;
}
adev->ip_blocks[i].status.hw = true;
@@ -2126,10 +3002,14 @@ static int amdgpu_device_ip_hw_init_phase2(struct amdgpu_device *adev)
continue;
if (adev->ip_blocks[i].status.hw)
continue;
- r = adev->ip_blocks[i].version->funcs->hw_init(adev);
+ if (!amdgpu_ip_member_of_hwini(
+ adev, adev->ip_blocks[i].version->type))
+ continue;
+ r = adev->ip_blocks[i].version->funcs->hw_init(&adev->ip_blocks[i]);
if (r) {
- DRM_ERROR("hw_init of IP block <%s> failed %d\n",
- adev->ip_blocks[i].version->funcs->name, r);
+ dev_err(adev->dev,
+ "hw_init of IP block <%s> failed %d\n",
+ adev->ip_blocks[i].version->funcs->name, r);
return r;
}
adev->ip_blocks[i].status.hw = true;
@@ -2149,27 +3029,33 @@ static int amdgpu_device_fw_loading(struct amdgpu_device *adev)
if (adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_PSP)
continue;
+ if (!amdgpu_ip_member_of_hwini(adev,
+ AMD_IP_BLOCK_TYPE_PSP))
+ break;
+
+ if (!adev->ip_blocks[i].status.sw)
+ continue;
+
/* no need to do the fw loading again if already done*/
if (adev->ip_blocks[i].status.hw == true)
break;
if (amdgpu_in_reset(adev) || adev->in_suspend) {
- r = adev->ip_blocks[i].version->funcs->resume(adev);
- if (r) {
- DRM_ERROR("resume of IP block <%s> failed %d\n",
- adev->ip_blocks[i].version->funcs->name, r);
+ r = amdgpu_ip_block_resume(&adev->ip_blocks[i]);
+ if (r)
return r;
- }
} else {
- r = adev->ip_blocks[i].version->funcs->hw_init(adev);
+ r = adev->ip_blocks[i].version->funcs->hw_init(&adev->ip_blocks[i]);
if (r) {
- DRM_ERROR("hw_init of IP block <%s> failed %d\n",
- adev->ip_blocks[i].version->funcs->name, r);
+ dev_err(adev->dev,
+ "hw_init of IP block <%s> failed %d\n",
+ adev->ip_blocks[i]
+ .version->funcs->name,
+ r);
return r;
}
+ adev->ip_blocks[i].status.hw = true;
}
-
- adev->ip_blocks[i].status.hw = true;
break;
}
}
@@ -2180,6 +3066,74 @@ static int amdgpu_device_fw_loading(struct amdgpu_device *adev)
return r;
}
+static int amdgpu_device_init_schedulers(struct amdgpu_device *adev)
+{
+ struct drm_sched_init_args args = {
+ .ops = &amdgpu_sched_ops,
+ .num_rqs = DRM_SCHED_PRIORITY_COUNT,
+ .timeout_wq = adev->reset_domain->wq,
+ .dev = adev->dev,
+ };
+ long timeout;
+ int r, i;
+
+ for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
+ struct amdgpu_ring *ring = adev->rings[i];
+
+ /* No need to setup the GPU scheduler for rings that don't need it */
+ if (!ring || ring->no_scheduler)
+ continue;
+
+ switch (ring->funcs->type) {
+ case AMDGPU_RING_TYPE_GFX:
+ timeout = adev->gfx_timeout;
+ break;
+ case AMDGPU_RING_TYPE_COMPUTE:
+ timeout = adev->compute_timeout;
+ break;
+ case AMDGPU_RING_TYPE_SDMA:
+ timeout = adev->sdma_timeout;
+ break;
+ default:
+ timeout = adev->video_timeout;
+ break;
+ }
+
+ args.timeout = timeout;
+ args.credit_limit = ring->num_hw_submission;
+ args.score = ring->sched_score;
+ args.name = ring->name;
+
+ r = drm_sched_init(&ring->sched, &args);
+ if (r) {
+ dev_err(adev->dev,
+ "Failed to create scheduler on ring %s.\n",
+ ring->name);
+ return r;
+ }
+ r = amdgpu_uvd_entity_init(adev, ring);
+ if (r) {
+ dev_err(adev->dev,
+ "Failed to create UVD scheduling entity on ring %s.\n",
+ ring->name);
+ return r;
+ }
+ r = amdgpu_vce_entity_init(adev, ring);
+ if (r) {
+ dev_err(adev->dev,
+ "Failed to create VCE scheduling entity on ring %s.\n",
+ ring->name);
+ return r;
+ }
+ }
+
+ if (adev->xcp_mgr)
+ amdgpu_xcp_update_partition_sched_list(adev);
+
+ return 0;
+}
+
+
/**
* amdgpu_device_ip_init - run init for hardware IPs
*
@@ -2193,6 +3147,7 @@ static int amdgpu_device_fw_loading(struct amdgpu_device *adev)
*/
static int amdgpu_device_ip_init(struct amdgpu_device *adev)
{
+ bool init_badpage;
int i, r;
r = amdgpu_ras_init(adev);
@@ -2202,43 +3157,77 @@ static int amdgpu_device_ip_init(struct amdgpu_device *adev)
for (i = 0; i < adev->num_ip_blocks; i++) {
if (!adev->ip_blocks[i].status.valid)
continue;
- r = adev->ip_blocks[i].version->funcs->sw_init((void *)adev);
- if (r) {
- DRM_ERROR("sw_init of IP block <%s> failed %d\n",
- adev->ip_blocks[i].version->funcs->name, r);
- goto init_failed;
+ if (adev->ip_blocks[i].version->funcs->sw_init) {
+ r = adev->ip_blocks[i].version->funcs->sw_init(&adev->ip_blocks[i]);
+ if (r) {
+ dev_err(adev->dev,
+ "sw_init of IP block <%s> failed %d\n",
+ adev->ip_blocks[i].version->funcs->name,
+ r);
+ goto init_failed;
+ }
}
adev->ip_blocks[i].status.sw = true;
- /* need to do gmc hw init early so we can allocate gpu mem */
- if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC) {
- r = amdgpu_device_vram_scratch_init(adev);
+ if (!amdgpu_ip_member_of_hwini(
+ adev, adev->ip_blocks[i].version->type))
+ continue;
+
+ if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON) {
+ /* need to do common hw init early so everything is set up for gmc */
+ r = adev->ip_blocks[i].version->funcs->hw_init(&adev->ip_blocks[i]);
+ if (r) {
+ dev_err(adev->dev, "hw_init %d failed %d\n", i,
+ r);
+ goto init_failed;
+ }
+ adev->ip_blocks[i].status.hw = true;
+ } else if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC) {
+ /* need to do gmc hw init early so we can allocate gpu mem */
+ /* Try to reserve bad pages early */
+ if (amdgpu_sriov_vf(adev))
+ amdgpu_virt_exchange_data(adev);
+
+ r = amdgpu_device_mem_scratch_init(adev);
if (r) {
- DRM_ERROR("amdgpu_vram_scratch_init failed %d\n", r);
+ dev_err(adev->dev,
+ "amdgpu_mem_scratch_init failed %d\n",
+ r);
goto init_failed;
}
- r = adev->ip_blocks[i].version->funcs->hw_init((void *)adev);
+ r = adev->ip_blocks[i].version->funcs->hw_init(&adev->ip_blocks[i]);
if (r) {
- DRM_ERROR("hw_init %d failed %d\n", i, r);
+ dev_err(adev->dev, "hw_init %d failed %d\n", i,
+ r);
goto init_failed;
}
r = amdgpu_device_wb_init(adev);
if (r) {
- DRM_ERROR("amdgpu_device_wb_init failed %d\n", r);
+ dev_err(adev->dev,
+ "amdgpu_device_wb_init failed %d\n", r);
goto init_failed;
}
adev->ip_blocks[i].status.hw = true;
/* right after GMC hw init, we create CSA */
- if (amdgpu_mcbp || amdgpu_sriov_vf(adev)) {
+ if (adev->gfx.mcbp) {
r = amdgpu_allocate_static_csa(adev, &adev->virt.csa_obj,
- AMDGPU_GEM_DOMAIN_VRAM,
- AMDGPU_CSA_SIZE);
+ AMDGPU_GEM_DOMAIN_VRAM |
+ AMDGPU_GEM_DOMAIN_GTT,
+ AMDGPU_CSA_SIZE);
if (r) {
- DRM_ERROR("allocate CSA failed %d\n", r);
+ dev_err(adev->dev,
+ "allocate CSA failed %d\n", r);
goto init_failed;
}
}
+
+ r = amdgpu_seq64_init(adev);
+ if (r) {
+ dev_err(adev->dev, "allocate seq64 failed %d\n",
+ r);
+ goto init_failed;
+ }
}
}
@@ -2283,19 +3272,58 @@ static int amdgpu_device_ip_init(struct amdgpu_device *adev)
* Note: theoretically, this should be called before all vram allocations
* to protect retired page from abusing
*/
- r = amdgpu_ras_recovery_init(adev);
+ init_badpage = (adev->init_lvl->level != AMDGPU_INIT_LEVEL_MINIMAL_XGMI);
+ r = amdgpu_ras_recovery_init(adev, init_badpage);
if (r)
goto init_failed;
- if (adev->gmc.xgmi.num_physical_nodes > 1)
- amdgpu_xgmi_add_device(adev);
- amdgpu_amdkfd_device_init(adev);
+ /**
+ * In case of XGMI grab extra reference for reset domain for this device
+ */
+ if (adev->gmc.xgmi.num_physical_nodes > 1) {
+ if (amdgpu_xgmi_add_device(adev) == 0) {
+ if (!amdgpu_sriov_vf(adev)) {
+ struct amdgpu_hive_info *hive = amdgpu_get_xgmi_hive(adev);
+
+ if (WARN_ON(!hive)) {
+ r = -ENOENT;
+ goto init_failed;
+ }
+
+ if (!hive->reset_domain ||
+ !amdgpu_reset_get_reset_domain(hive->reset_domain)) {
+ r = -ENOENT;
+ amdgpu_put_xgmi_hive(hive);
+ goto init_failed;
+ }
+
+ /* Drop the early temporary reset domain we created for device */
+ amdgpu_reset_put_reset_domain(adev->reset_domain);
+ adev->reset_domain = hive->reset_domain;
+ amdgpu_put_xgmi_hive(hive);
+ }
+ }
+ }
+
+ r = amdgpu_device_init_schedulers(adev);
+ if (r)
+ goto init_failed;
+
+ if (adev->mman.buffer_funcs_ring->sched.ready)
+ amdgpu_ttm_set_buffer_funcs_status(adev, true);
+
+ /* Don't init kfd if whole hive need to be reset during init */
+ if (adev->init_lvl->level != AMDGPU_INIT_LEVEL_MINIMAL_XGMI) {
+ kgd2kfd_init_zone_device(adev);
+ amdgpu_amdkfd_device_init(adev);
+ }
amdgpu_fru_get_product_info(adev);
+ if (!amdgpu_sriov_vf(adev) || amdgpu_sriov_ras_cper_en(adev))
+ r = amdgpu_cper_init(adev);
+
init_failed:
- if (amdgpu_sriov_vf(adev))
- amdgpu_virt_release_full_gpu(adev, true);
return r;
}
@@ -2307,7 +3335,7 @@ init_failed:
*
* Writes a reset magic value to the gart pointer in VRAM. The driver calls
* this function before a GPU reset. If the value is retained after a
- * GPU reset, VRAM has not been lost. Some GPU resets may destry VRAM contents.
+ * GPU reset, VRAM has not been lost. Some GPU resets may destroy VRAM contents.
*/
static void amdgpu_device_fill_reset_magic(struct amdgpu_device *adev)
{
@@ -2338,6 +3366,8 @@ static bool amdgpu_device_check_vram_lost(struct amdgpu_device *adev)
* always assumed to be lost.
*/
switch (amdgpu_asic_reset_method(adev)) {
+ case AMD_RESET_METHOD_LEGACY:
+ case AMD_RESET_METHOD_LINK:
case AMD_RESET_METHOD_BACO:
case AMD_RESET_METHOD_MODE1:
return true;
@@ -2359,8 +3389,8 @@ static bool amdgpu_device_check_vram_lost(struct amdgpu_device *adev)
* Returns 0 on success, negative error code on failure.
*/
-static int amdgpu_device_set_cg_state(struct amdgpu_device *adev,
- enum amd_clockgating_state state)
+int amdgpu_device_set_cg_state(struct amdgpu_device *adev,
+ enum amd_clockgating_state state)
{
int i, j, r;
@@ -2371,6 +3401,11 @@ static int amdgpu_device_set_cg_state(struct amdgpu_device *adev,
i = state == AMD_CG_STATE_GATE ? j : adev->num_ip_blocks - j - 1;
if (!adev->ip_blocks[i].status.late_initialized)
continue;
+ /* skip CG for GFX, SDMA on S0ix */
+ if (adev->in_s0ix &&
+ (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GFX ||
+ adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SDMA))
+ continue;
/* skip CG for VCE/UVD, it's handled specially */
if (adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_UVD &&
adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_VCE &&
@@ -2378,11 +3413,13 @@ static int amdgpu_device_set_cg_state(struct amdgpu_device *adev,
adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_JPEG &&
adev->ip_blocks[i].version->funcs->set_clockgating_state) {
/* enable clockgating to save power */
- r = adev->ip_blocks[i].version->funcs->set_clockgating_state((void *)adev,
+ r = adev->ip_blocks[i].version->funcs->set_clockgating_state(&adev->ip_blocks[i],
state);
if (r) {
- DRM_ERROR("set_clockgating_state(gate) of IP block <%s> failed %d\n",
- adev->ip_blocks[i].version->funcs->name, r);
+ dev_err(adev->dev,
+ "set_clockgating_state(gate) of IP block <%s> failed %d\n",
+ adev->ip_blocks[i].version->funcs->name,
+ r);
return r;
}
}
@@ -2391,7 +3428,8 @@ static int amdgpu_device_set_cg_state(struct amdgpu_device *adev,
return 0;
}
-static int amdgpu_device_set_pg_state(struct amdgpu_device *adev, enum amd_powergating_state state)
+int amdgpu_device_set_pg_state(struct amdgpu_device *adev,
+ enum amd_powergating_state state)
{
int i, j, r;
@@ -2402,18 +3440,26 @@ static int amdgpu_device_set_pg_state(struct amdgpu_device *adev, enum amd_power
i = state == AMD_PG_STATE_GATE ? j : adev->num_ip_blocks - j - 1;
if (!adev->ip_blocks[i].status.late_initialized)
continue;
- /* skip CG for VCE/UVD, it's handled specially */
+ /* skip PG for GFX, SDMA on S0ix */
+ if (adev->in_s0ix &&
+ (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GFX ||
+ adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SDMA))
+ continue;
+ /* skip CG for VCE/UVD/VPE, it's handled specially */
if (adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_UVD &&
adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_VCE &&
adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_VCN &&
+ adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_VPE &&
adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_JPEG &&
adev->ip_blocks[i].version->funcs->set_powergating_state) {
/* enable powergating to save power */
- r = adev->ip_blocks[i].version->funcs->set_powergating_state((void *)adev,
+ r = adev->ip_blocks[i].version->funcs->set_powergating_state(&adev->ip_blocks[i],
state);
if (r) {
- DRM_ERROR("set_powergating_state(gate) of IP block <%s> failed %d\n",
- adev->ip_blocks[i].version->funcs->name, r);
+ dev_err(adev->dev,
+ "set_powergating_state(gate) of IP block <%s> failed %d\n",
+ adev->ip_blocks[i].version->funcs->name,
+ r);
return r;
}
}
@@ -2440,7 +3486,7 @@ static int amdgpu_device_enable_mgpu_fan_boost(void)
for (i = 0; i < mgpu_info.num_dgpu; i++) {
gpu_ins = &(mgpu_info.gpu_ins[i]);
adev = gpu_ins->adev;
- if (!(adev->flags & AMD_IS_APU) &&
+ if (!(adev->flags & AMD_IS_APU || amdgpu_sriov_multi_vf_mode(adev)) &&
!gpu_ins->mgpu_fan_enabled) {
ret = amdgpu_dpm_enable_mgpu_fan_boost(adev);
if (ret)
@@ -2477,17 +3523,26 @@ static int amdgpu_device_ip_late_init(struct amdgpu_device *adev)
if (!adev->ip_blocks[i].status.hw)
continue;
if (adev->ip_blocks[i].version->funcs->late_init) {
- r = adev->ip_blocks[i].version->funcs->late_init((void *)adev);
+ r = adev->ip_blocks[i].version->funcs->late_init(&adev->ip_blocks[i]);
if (r) {
- DRM_ERROR("late_init of IP block <%s> failed %d\n",
- adev->ip_blocks[i].version->funcs->name, r);
+ dev_err(adev->dev,
+ "late_init of IP block <%s> failed %d\n",
+ adev->ip_blocks[i].version->funcs->name,
+ r);
return r;
}
}
adev->ip_blocks[i].status.late_initialized = true;
}
- amdgpu_ras_set_error_query_ready(adev, true);
+ r = amdgpu_ras_late_init(adev);
+ if (r) {
+ dev_err(adev->dev, "amdgpu_ras_late_init failed %d", r);
+ return r;
+ }
+
+ if (!amdgpu_reset_in_recovery(adev))
+ amdgpu_ras_set_error_query_ready(adev, true);
amdgpu_device_set_cg_state(adev, AMD_CG_STATE_GATE);
amdgpu_device_set_pg_state(adev, AMD_PG_STATE_GATE);
@@ -2496,8 +3551,13 @@ static int amdgpu_device_ip_late_init(struct amdgpu_device *adev)
r = amdgpu_device_enable_mgpu_fan_boost();
if (r)
- DRM_ERROR("enable mgpu fan boost failed (%d).\n", r);
+ dev_err(adev->dev, "enable mgpu fan boost failed (%d).\n", r);
+ /* For passthrough configuration on arcturus and aldebaran, enable special handling SBR */
+ if (amdgpu_passthrough(adev) &&
+ ((adev->asic_type == CHIP_ARCTURUS && adev->gmc.xgmi.num_physical_nodes > 1) ||
+ adev->asic_type == CHIP_ALDEBARAN))
+ amdgpu_dpm_handle_passthrough_sbr(adev, true);
if (adev->gmc.xgmi.num_physical_nodes > 1) {
mutex_lock(&mgpu_info.mutex);
@@ -2524,7 +3584,9 @@ static int amdgpu_device_ip_late_init(struct amdgpu_device *adev)
r = amdgpu_xgmi_set_pstate(gpu_instance->adev,
AMDGPU_XGMI_PSTATE_MIN);
if (r) {
- DRM_ERROR("pstate setting failed (%d).\n", r);
+ dev_err(adev->dev,
+ "pstate setting failed (%d).\n",
+ r);
break;
}
}
@@ -2536,64 +3598,130 @@ static int amdgpu_device_ip_late_init(struct amdgpu_device *adev)
return 0;
}
+static void amdgpu_ip_block_hw_fini(struct amdgpu_ip_block *ip_block)
+{
+ struct amdgpu_device *adev = ip_block->adev;
+ int r;
+
+ if (!ip_block->version->funcs->hw_fini) {
+ dev_err(adev->dev, "hw_fini of IP block <%s> not defined\n",
+ ip_block->version->funcs->name);
+ } else {
+ r = ip_block->version->funcs->hw_fini(ip_block);
+ /* XXX handle errors */
+ if (r) {
+ dev_dbg(adev->dev,
+ "hw_fini of IP block <%s> failed %d\n",
+ ip_block->version->funcs->name, r);
+ }
+ }
+
+ ip_block->status.hw = false;
+}
+
/**
- * amdgpu_device_ip_fini - run fini for hardware IPs
+ * amdgpu_device_smu_fini_early - smu hw_fini wrapper
*
* @adev: amdgpu_device pointer
*
- * Main teardown pass for hardware IPs. The list of all the hardware
- * IPs that make up the asic is walked and the hw_fini and sw_fini callbacks
- * are run. hw_fini tears down the hardware associated with each IP
- * and sw_fini tears down any software state associated with each IP.
- * Returns 0 on success, negative error code on failure.
+ * For ASICs need to disable SMC first
*/
-static int amdgpu_device_ip_fini(struct amdgpu_device *adev)
+static void amdgpu_device_smu_fini_early(struct amdgpu_device *adev)
{
- int i, r;
-
- if (amdgpu_sriov_vf(adev) && adev->virt.ras_init_done)
- amdgpu_virt_release_ras_err_handler_data(adev);
-
- amdgpu_ras_pre_fini(adev);
-
- if (adev->gmc.xgmi.num_physical_nodes > 1)
- amdgpu_xgmi_remove_device(adev);
-
- amdgpu_device_set_pg_state(adev, AMD_PG_STATE_UNGATE);
- amdgpu_device_set_cg_state(adev, AMD_CG_STATE_UNGATE);
+ int i;
- amdgpu_amdkfd_device_fini(adev);
+ if (amdgpu_ip_version(adev, GC_HWIP, 0) > IP_VERSION(9, 0, 0))
+ return;
- /* need to disable SMC first */
for (i = 0; i < adev->num_ip_blocks; i++) {
if (!adev->ip_blocks[i].status.hw)
continue;
if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SMC) {
- r = adev->ip_blocks[i].version->funcs->hw_fini((void *)adev);
- /* XXX handle errors */
- if (r) {
- DRM_DEBUG("hw_fini of IP block <%s> failed %d\n",
- adev->ip_blocks[i].version->funcs->name, r);
- }
- adev->ip_blocks[i].status.hw = false;
+ amdgpu_ip_block_hw_fini(&adev->ip_blocks[i]);
break;
}
}
+}
- for (i = adev->num_ip_blocks - 1; i >= 0; i--) {
- if (!adev->ip_blocks[i].status.hw)
+static int amdgpu_device_ip_fini_early(struct amdgpu_device *adev)
+{
+ int i, r;
+
+ for (i = 0; i < adev->num_ip_blocks; i++) {
+ if (!adev->ip_blocks[i].version->funcs->early_fini)
continue;
- r = adev->ip_blocks[i].version->funcs->hw_fini((void *)adev);
- /* XXX handle errors */
+ r = adev->ip_blocks[i].version->funcs->early_fini(&adev->ip_blocks[i]);
if (r) {
- DRM_DEBUG("hw_fini of IP block <%s> failed %d\n",
- adev->ip_blocks[i].version->funcs->name, r);
+ dev_dbg(adev->dev,
+ "early_fini of IP block <%s> failed %d\n",
+ adev->ip_blocks[i].version->funcs->name, r);
}
+ }
+
+ amdgpu_device_set_pg_state(adev, AMD_PG_STATE_UNGATE);
+ amdgpu_device_set_cg_state(adev, AMD_CG_STATE_UNGATE);
+
+ amdgpu_amdkfd_suspend(adev, true);
+ amdgpu_userq_suspend(adev);
+
+ /* Workaround for ASICs need to disable SMC first */
+ amdgpu_device_smu_fini_early(adev);
+
+ for (i = adev->num_ip_blocks - 1; i >= 0; i--) {
+ if (!adev->ip_blocks[i].status.hw)
+ continue;
- adev->ip_blocks[i].status.hw = false;
+ amdgpu_ip_block_hw_fini(&adev->ip_blocks[i]);
}
+ if (amdgpu_sriov_vf(adev)) {
+ if (amdgpu_virt_release_full_gpu(adev, false))
+ dev_err(adev->dev,
+ "failed to release exclusive mode on fini\n");
+ }
+
+ /*
+ * Driver reload on the APU can fail due to firmware validation because
+ * the PSP is always running, as it is shared across the whole SoC.
+ * This same issue does not occur on dGPU because it has a mechanism
+ * that checks whether the PSP is running. A solution for those issues
+ * in the APU is to trigger a GPU reset, but this should be done during
+ * the unload phase to avoid adding boot latency and screen flicker.
+ */
+ if ((adev->flags & AMD_IS_APU) && !adev->gmc.is_app_apu) {
+ r = amdgpu_asic_reset(adev);
+ if (r)
+ dev_err(adev->dev, "asic reset on %s failed\n", __func__);
+ }
+
+ return 0;
+}
+
+/**
+ * amdgpu_device_ip_fini - run fini for hardware IPs
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * Main teardown pass for hardware IPs. The list of all the hardware
+ * IPs that make up the asic is walked and the hw_fini and sw_fini callbacks
+ * are run. hw_fini tears down the hardware associated with each IP
+ * and sw_fini tears down any software state associated with each IP.
+ * Returns 0 on success, negative error code on failure.
+ */
+static int amdgpu_device_ip_fini(struct amdgpu_device *adev)
+{
+ int i, r;
+
+ amdgpu_cper_fini(adev);
+
+ if (amdgpu_sriov_vf(adev) && adev->virt.ras_init_done)
+ amdgpu_virt_release_ras_err_handler_data(adev);
+
+ if (adev->gmc.xgmi.num_physical_nodes > 1)
+ amdgpu_xgmi_remove_device(adev);
+
+ amdgpu_amdkfd_device_fini_sw(adev);
for (i = adev->num_ip_blocks - 1; i >= 0; i--) {
if (!adev->ip_blocks[i].status.sw)
@@ -2603,15 +3731,20 @@ static int amdgpu_device_ip_fini(struct amdgpu_device *adev)
amdgpu_ucode_free_bo(adev);
amdgpu_free_static_csa(&adev->virt.csa_obj);
amdgpu_device_wb_fini(adev);
- amdgpu_device_vram_scratch_fini(adev);
+ amdgpu_device_mem_scratch_fini(adev);
amdgpu_ib_pool_fini(adev);
+ amdgpu_seq64_fini(adev);
+ amdgpu_doorbell_fini(adev);
}
-
- r = adev->ip_blocks[i].version->funcs->sw_fini((void *)adev);
- /* XXX handle errors */
- if (r) {
- DRM_DEBUG("sw_fini of IP block <%s> failed %d\n",
- adev->ip_blocks[i].version->funcs->name, r);
+ if (adev->ip_blocks[i].version->funcs->sw_fini) {
+ r = adev->ip_blocks[i].version->funcs->sw_fini(&adev->ip_blocks[i]);
+ /* XXX handle errors */
+ if (r) {
+ dev_dbg(adev->dev,
+ "sw_fini of IP block <%s> failed %d\n",
+ adev->ip_blocks[i].version->funcs->name,
+ r);
+ }
}
adev->ip_blocks[i].status.sw = false;
adev->ip_blocks[i].status.valid = false;
@@ -2621,15 +3754,12 @@ static int amdgpu_device_ip_fini(struct amdgpu_device *adev)
if (!adev->ip_blocks[i].status.late_initialized)
continue;
if (adev->ip_blocks[i].version->funcs->late_fini)
- adev->ip_blocks[i].version->funcs->late_fini((void *)adev);
+ adev->ip_blocks[i].version->funcs->late_fini(&adev->ip_blocks[i]);
adev->ip_blocks[i].status.late_initialized = false;
}
amdgpu_ras_fini(adev);
-
- if (amdgpu_sriov_vf(adev))
- if (amdgpu_virt_release_full_gpu(adev, false))
- DRM_ERROR("failed to release exclusive mode on fini\n");
+ amdgpu_uid_fini(adev);
return 0;
}
@@ -2647,7 +3777,7 @@ static void amdgpu_device_delayed_init_work_handler(struct work_struct *work)
r = amdgpu_ib_ring_tests(adev);
if (r)
- DRM_ERROR("ib ring test failed (%d).\n", r);
+ dev_err(adev->dev, "ib ring test failed (%d).\n", r);
}
static void amdgpu_device_delay_enable_gfx_off(struct work_struct *work)
@@ -2655,12 +3785,11 @@ static void amdgpu_device_delay_enable_gfx_off(struct work_struct *work)
struct amdgpu_device *adev =
container_of(work, struct amdgpu_device, gfx.gfx_off_delay_work.work);
- mutex_lock(&adev->gfx.gfx_off_mutex);
- if (!adev->gfx.gfx_off_state && !adev->gfx.gfx_off_req_count) {
- if (!amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_GFX, true))
- adev->gfx.gfx_off_state = true;
- }
- mutex_unlock(&adev->gfx.gfx_off_mutex);
+ WARN_ON_ONCE(adev->gfx.gfx_off_state);
+ WARN_ON_ONCE(adev->gfx.gfx_off_req_count);
+
+ if (!amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_GFX, true, 0))
+ adev->gfx.gfx_off_state = true;
}
/**
@@ -2676,13 +3805,18 @@ static void amdgpu_device_delay_enable_gfx_off(struct work_struct *work)
*/
static int amdgpu_device_ip_suspend_phase1(struct amdgpu_device *adev)
{
- int i, r;
+ int i, r, rec;
- if (adev->in_poweroff_reboot_com ||
- !amdgpu_acpi_is_s0ix_supported(adev) || amdgpu_in_reset(adev)) {
- amdgpu_device_set_pg_state(adev, AMD_PG_STATE_UNGATE);
- amdgpu_device_set_cg_state(adev, AMD_CG_STATE_UNGATE);
- }
+ amdgpu_device_set_pg_state(adev, AMD_PG_STATE_UNGATE);
+ amdgpu_device_set_cg_state(adev, AMD_CG_STATE_UNGATE);
+
+ /*
+ * Per PMFW team's suggestion, driver needs to handle gfxoff
+ * and df cstate features disablement for gpu reset(e.g. Mode1Reset)
+ * scenario. Add the missing df cstate disablement here.
+ */
+ if (amdgpu_dpm_set_df_cstate(adev, DF_CSTATE_DISALLOW))
+ dev_warn(adev->dev, "Failed to disallow df cstate");
for (i = adev->num_ip_blocks - 1; i >= 0; i--) {
if (!adev->ip_blocks[i].status.valid)
@@ -2692,19 +3826,25 @@ static int amdgpu_device_ip_suspend_phase1(struct amdgpu_device *adev)
if (adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_DCE)
continue;
- /* XXX handle errors */
- r = adev->ip_blocks[i].version->funcs->suspend(adev);
- /* XXX handle errors */
- if (r) {
- DRM_ERROR("suspend of IP block <%s> failed %d\n",
- adev->ip_blocks[i].version->funcs->name, r);
- return r;
- }
-
- adev->ip_blocks[i].status.hw = false;
+ r = amdgpu_ip_block_suspend(&adev->ip_blocks[i]);
+ if (r)
+ goto unwind;
}
return 0;
+unwind:
+ rec = amdgpu_device_ip_resume_phase3(adev);
+ if (rec)
+ dev_err(adev->dev,
+ "amdgpu_device_ip_resume_phase3 failed during unwind: %d\n",
+ rec);
+
+ amdgpu_dpm_set_df_cstate(adev, DF_CSTATE_ALLOW);
+
+ amdgpu_device_set_pg_state(adev, AMD_PG_STATE_GATE);
+ amdgpu_device_set_cg_state(adev, AMD_CG_STATE_GATE);
+
+ return r;
}
/**
@@ -2720,7 +3860,10 @@ static int amdgpu_device_ip_suspend_phase1(struct amdgpu_device *adev)
*/
static int amdgpu_device_ip_suspend_phase2(struct amdgpu_device *adev)
{
- int i, r;
+ int i, r, rec;
+
+ if (adev->in_s0ix)
+ amdgpu_dpm_gfx_state_change(adev, sGpuChangeState_D3Entry);
for (i = adev->num_ip_blocks - 1; i >= 0; i--) {
if (!adev->ip_blocks[i].status.valid)
@@ -2734,29 +3877,96 @@ static int amdgpu_device_ip_suspend_phase2(struct amdgpu_device *adev)
adev->ip_blocks[i].status.hw = false;
continue;
}
- /* XXX handle errors */
- r = adev->ip_blocks[i].version->funcs->suspend(adev);
- /* XXX handle errors */
- if (r) {
- DRM_ERROR("suspend of IP block <%s> failed %d\n",
- adev->ip_blocks[i].version->funcs->name, r);
- }
- adev->ip_blocks[i].status.hw = false;
+
+ /* skip unnecessary suspend if we do not initialize them yet */
+ if (!amdgpu_ip_member_of_hwini(
+ adev, adev->ip_blocks[i].version->type))
+ continue;
+
+ /* Since we skip suspend for S0i3, we need to cancel the delayed
+ * idle work here as the suspend callback never gets called.
+ */
+ if (adev->in_s0ix &&
+ adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GFX &&
+ amdgpu_ip_version(adev, GC_HWIP, 0) >= IP_VERSION(10, 0, 0))
+ cancel_delayed_work_sync(&adev->gfx.idle_work);
+ /* skip suspend of gfx/mes and psp for S0ix
+ * gfx is in gfxoff state, so on resume it will exit gfxoff just
+ * like at runtime. PSP is also part of the always on hardware
+ * so no need to suspend it.
+ */
+ if (adev->in_s0ix &&
+ (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP ||
+ adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GFX ||
+ adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_MES))
+ continue;
+
+ /* SDMA 5.x+ is part of GFX power domain so it's covered by GFXOFF */
+ if (adev->in_s0ix &&
+ (amdgpu_ip_version(adev, SDMA0_HWIP, 0) >=
+ IP_VERSION(5, 0, 0)) &&
+ (adev->ip_blocks[i].version->type ==
+ AMD_IP_BLOCK_TYPE_SDMA))
+ continue;
+
+ /* Once swPSP provides the IMU, RLC FW binaries to TOS during cold-boot.
+ * These are in TMR, hence are expected to be reused by PSP-TOS to reload
+ * from this location and RLC Autoload automatically also gets loaded
+ * from here based on PMFW -> PSP message during re-init sequence.
+ * Therefore, the psp suspend & resume should be skipped to avoid destroy
+ * the TMR and reload FWs again for IMU enabled APU ASICs.
+ */
+ if (amdgpu_in_reset(adev) &&
+ (adev->flags & AMD_IS_APU) && adev->gfx.imu.funcs &&
+ adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP)
+ continue;
+
+ r = amdgpu_ip_block_suspend(&adev->ip_blocks[i]);
+ if (r)
+ goto unwind;
+
/* handle putting the SMC in the appropriate state */
- if(!amdgpu_sriov_vf(adev)){
+ if (!amdgpu_sriov_vf(adev)) {
if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SMC) {
r = amdgpu_dpm_set_mp1_state(adev, adev->mp1_state);
if (r) {
- DRM_ERROR("SMC failed to set mp1 state %d, %d\n",
- adev->mp1_state, r);
- return r;
+ dev_err(adev->dev,
+ "SMC failed to set mp1 state %d, %d\n",
+ adev->mp1_state, r);
+ goto unwind;
}
}
}
- adev->ip_blocks[i].status.hw = false;
}
return 0;
+unwind:
+ /* suspend phase 2 = resume phase 1 + resume phase 2 */
+ rec = amdgpu_device_ip_resume_phase1(adev);
+ if (rec) {
+ dev_err(adev->dev,
+ "amdgpu_device_ip_resume_phase1 failed during unwind: %d\n",
+ rec);
+ return r;
+ }
+
+ rec = amdgpu_device_fw_loading(adev);
+ if (rec) {
+ dev_err(adev->dev,
+ "amdgpu_device_fw_loading failed during unwind: %d\n",
+ rec);
+ return r;
+ }
+
+ rec = amdgpu_device_ip_resume_phase2(adev);
+ if (rec) {
+ dev_err(adev->dev,
+ "amdgpu_device_ip_resume_phase2 failed during unwind: %d\n",
+ rec);
+ return r;
+ }
+
+ return r;
}
/**
@@ -2770,12 +3980,16 @@ static int amdgpu_device_ip_suspend_phase2(struct amdgpu_device *adev)
* in each IP into a state suitable for suspend.
* Returns 0 on success, negative error code on failure.
*/
-int amdgpu_device_ip_suspend(struct amdgpu_device *adev)
+static int amdgpu_device_ip_suspend(struct amdgpu_device *adev)
{
int r;
- if (amdgpu_sriov_vf(adev))
+ if (amdgpu_sriov_vf(adev)) {
+ amdgpu_virt_fini_data_exchange(adev);
amdgpu_virt_request_full_gpu(adev, false);
+ }
+
+ amdgpu_ttm_set_buffer_funcs_status(adev, false);
r = amdgpu_device_ip_suspend_phase1(adev);
if (r)
@@ -2793,13 +4007,13 @@ static int amdgpu_device_ip_reinit_early_sriov(struct amdgpu_device *adev)
int i, r;
static enum amd_ip_block_type ip_order[] = {
- AMD_IP_BLOCK_TYPE_GMC,
AMD_IP_BLOCK_TYPE_COMMON,
+ AMD_IP_BLOCK_TYPE_GMC,
AMD_IP_BLOCK_TYPE_PSP,
AMD_IP_BLOCK_TYPE_IH,
};
- for (i = 0; i < ARRAY_SIZE(ip_order); i++) {
+ for (i = 0; i < adev->num_ip_blocks; i++) {
int j;
struct amdgpu_ip_block *block;
@@ -2812,10 +4026,12 @@ static int amdgpu_device_ip_reinit_early_sriov(struct amdgpu_device *adev)
!block->status.valid)
continue;
- r = block->version->funcs->hw_init(adev);
- DRM_INFO("RE-INIT-early: %s %s\n", block->version->funcs->name, r?"failed":"succeeded");
- if (r)
+ r = block->version->funcs->hw_init(&adev->ip_blocks[i]);
+ if (r) {
+ dev_err(adev->dev, "RE-INIT-early: %s failed\n",
+ block->version->funcs->name);
return r;
+ }
block->status.hw = true;
}
}
@@ -2825,43 +4041,44 @@ static int amdgpu_device_ip_reinit_early_sriov(struct amdgpu_device *adev)
static int amdgpu_device_ip_reinit_late_sriov(struct amdgpu_device *adev)
{
- int i, r;
+ struct amdgpu_ip_block *block;
+ int i, r = 0;
static enum amd_ip_block_type ip_order[] = {
AMD_IP_BLOCK_TYPE_SMC,
AMD_IP_BLOCK_TYPE_DCE,
AMD_IP_BLOCK_TYPE_GFX,
AMD_IP_BLOCK_TYPE_SDMA,
+ AMD_IP_BLOCK_TYPE_MES,
AMD_IP_BLOCK_TYPE_UVD,
AMD_IP_BLOCK_TYPE_VCE,
- AMD_IP_BLOCK_TYPE_VCN
+ AMD_IP_BLOCK_TYPE_VCN,
+ AMD_IP_BLOCK_TYPE_JPEG
};
for (i = 0; i < ARRAY_SIZE(ip_order); i++) {
- int j;
- struct amdgpu_ip_block *block;
+ block = amdgpu_device_ip_get_ip_block(adev, ip_order[i]);
- for (j = 0; j < adev->num_ip_blocks; j++) {
- block = &adev->ip_blocks[j];
-
- if (block->version->type != ip_order[i] ||
- !block->status.valid ||
- block->status.hw)
- continue;
+ if (!block)
+ continue;
- if (block->version->type == AMD_IP_BLOCK_TYPE_SMC)
- r = block->version->funcs->resume(adev);
- else
- r = block->version->funcs->hw_init(adev);
+ if (block->status.valid && !block->status.hw) {
+ if (block->version->type == AMD_IP_BLOCK_TYPE_SMC) {
+ r = amdgpu_ip_block_resume(block);
+ } else {
+ r = block->version->funcs->hw_init(block);
+ }
- DRM_INFO("RE-INIT-late: %s %s\n", block->version->funcs->name, r?"failed":"succeeded");
- if (r)
- return r;
+ if (r) {
+ dev_err(adev->dev, "RE-INIT-late: %s failed\n",
+ block->version->funcs->name);
+ break;
+ }
block->status.hw = true;
}
}
- return 0;
+ return r;
}
/**
@@ -2885,15 +4102,12 @@ static int amdgpu_device_ip_resume_phase1(struct amdgpu_device *adev)
continue;
if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON ||
adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC ||
- adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_IH) {
+ adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_IH ||
+ (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP && amdgpu_sriov_vf(adev))) {
- r = adev->ip_blocks[i].version->funcs->resume(adev);
- if (r) {
- DRM_ERROR("resume of IP block <%s> failed %d\n",
- adev->ip_blocks[i].version->funcs->name, r);
+ r = amdgpu_ip_block_resume(&adev->ip_blocks[i]);
+ if (r)
return r;
- }
- adev->ip_blocks[i].status.hw = true;
}
}
@@ -2905,7 +4119,7 @@ static int amdgpu_device_ip_resume_phase1(struct amdgpu_device *adev)
*
* @adev: amdgpu_device pointer
*
- * First resume function for hardware IPs. The list of all the hardware
+ * Second resume function for hardware IPs. The list of all the hardware
* IPs that make up the asic is walked and the resume callbacks are run for
* all blocks except COMMON, GMC, and IH. resume puts the hardware into a
* functional state after a suspend and updates the software state as
@@ -2923,15 +4137,42 @@ static int amdgpu_device_ip_resume_phase2(struct amdgpu_device *adev)
if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON ||
adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC ||
adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_IH ||
+ adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_DCE ||
adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP)
continue;
- r = adev->ip_blocks[i].version->funcs->resume(adev);
- if (r) {
- DRM_ERROR("resume of IP block <%s> failed %d\n",
- adev->ip_blocks[i].version->funcs->name, r);
+ r = amdgpu_ip_block_resume(&adev->ip_blocks[i]);
+ if (r)
return r;
+ }
+
+ return 0;
+}
+
+/**
+ * amdgpu_device_ip_resume_phase3 - run resume for hardware IPs
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * Third resume function for hardware IPs. The list of all the hardware
+ * IPs that make up the asic is walked and the resume callbacks are run for
+ * all DCE. resume puts the hardware into a functional state after a suspend
+ * and updates the software state as necessary. This function is also used
+ * for restoring the GPU after a GPU reset.
+ *
+ * Returns 0 on success, negative error code on failure.
+ */
+static int amdgpu_device_ip_resume_phase3(struct amdgpu_device *adev)
+{
+ int i, r;
+
+ for (i = 0; i < adev->num_ip_blocks; i++) {
+ if (!adev->ip_blocks[i].status.valid || adev->ip_blocks[i].status.hw)
+ continue;
+ if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_DCE) {
+ r = amdgpu_ip_block_resume(&adev->ip_blocks[i]);
+ if (r)
+ return r;
}
- adev->ip_blocks[i].status.hw = true;
}
return 0;
@@ -2944,7 +4185,7 @@ static int amdgpu_device_ip_resume_phase2(struct amdgpu_device *adev)
*
* Main resume function for hardware IPs. The hardware IPs
* are split into two resume functions because they are
- * are also used in in recovering from a GPU reset and some additional
+ * also used in recovering from a GPU reset and some additional
* steps need to be take between them. In this case (S3/S4) they are
* run sequentially.
* Returns 0 on success, negative error code on failure.
@@ -2963,6 +4204,16 @@ static int amdgpu_device_ip_resume(struct amdgpu_device *adev)
r = amdgpu_device_ip_resume_phase2(adev);
+ if (adev->mman.buffer_funcs_ring->sched.ready)
+ amdgpu_ttm_set_buffer_funcs_status(adev, true);
+
+ if (r)
+ return r;
+
+ amdgpu_fence_driver_hw_init(adev);
+
+ r = amdgpu_device_ip_resume_phase3(adev);
+
return r;
}
@@ -2977,7 +4228,7 @@ static void amdgpu_device_detect_sriov_bios(struct amdgpu_device *adev)
{
if (amdgpu_sriov_vf(adev)) {
if (adev->is_atom_fw) {
- if (amdgpu_atomfirmware_gpu_supports_virtualization(adev))
+ if (amdgpu_atomfirmware_gpu_virtualization_supported(adev))
adev->virt.caps |= AMDGPU_SRIOV_CAPS_SRIOV_VBIOS;
} else {
if (amdgpu_atombios_has_gpu_virtualization_table(adev))
@@ -2992,63 +4243,49 @@ static void amdgpu_device_detect_sriov_bios(struct amdgpu_device *adev)
/**
* amdgpu_device_asic_has_dc_support - determine if DC supports the asic
*
+ * @pdev : pci device context
* @asic_type: AMD asic type
*
* Check if there is DC (new modesetting infrastructre) support for an asic.
* returns true if DC has support, false if not.
*/
-bool amdgpu_device_asic_has_dc_support(enum amd_asic_type asic_type)
+bool amdgpu_device_asic_has_dc_support(struct pci_dev *pdev,
+ enum amd_asic_type asic_type)
{
switch (asic_type) {
+#ifdef CONFIG_DRM_AMDGPU_SI
+ case CHIP_HAINAN:
+#endif
+ case CHIP_TOPAZ:
+ /* chips with no display hardware */
+ return false;
#if defined(CONFIG_DRM_AMD_DC)
-#if defined(CONFIG_DRM_AMD_DC_SI)
case CHIP_TAHITI:
case CHIP_PITCAIRN:
case CHIP_VERDE:
case CHIP_OLAND:
-#endif
- case CHIP_BONAIRE:
+ return amdgpu_dc != 0 && IS_ENABLED(CONFIG_DRM_AMD_DC_SI);
case CHIP_KAVERI:
case CHIP_KABINI:
case CHIP_MULLINS:
/*
* We have systems in the wild with these ASICs that require
- * LVDS and VGA support which is not supported with DC.
+ * TRAVIS and NUTMEG support which is not supported with DC.
*
* Fallback to the non-DC driver here by default so as not to
* cause regressions.
*/
return amdgpu_dc > 0;
- case CHIP_HAWAII:
- case CHIP_CARRIZO:
- case CHIP_STONEY:
- case CHIP_POLARIS10:
- case CHIP_POLARIS11:
- case CHIP_POLARIS12:
- case CHIP_VEGAM:
- case CHIP_TONGA:
- case CHIP_FIJI:
- case CHIP_VEGA10:
- case CHIP_VEGA12:
- case CHIP_VEGA20:
-#if defined(CONFIG_DRM_AMD_DC_DCN)
- case CHIP_RAVEN:
- case CHIP_NAVI10:
- case CHIP_NAVI14:
- case CHIP_NAVI12:
- case CHIP_RENOIR:
- case CHIP_SIENNA_CICHLID:
- case CHIP_NAVY_FLOUNDER:
- case CHIP_DIMGREY_CAVEFISH:
- case CHIP_VANGOGH:
-#endif
+ default:
return amdgpu_dc != 0;
-#endif
+#else
default:
if (amdgpu_dc > 0)
- DRM_INFO_ONCE("Display Core has been requested via kernel parameter "
- "but isn't supported by ASIC, ignoring\n");
+ dev_info_once(
+ &pdev->dev,
+ "Display Core has been requested via kernel parameter but isn't supported by ASIC, ignoring\n");
return false;
+#endif
}
}
@@ -3061,13 +4298,13 @@ bool amdgpu_device_asic_has_dc_support(enum amd_asic_type asic_type)
*/
bool amdgpu_device_has_dc_support(struct amdgpu_device *adev)
{
- if (amdgpu_sriov_vf(adev) || adev->enable_virtual_display)
+ if (adev->enable_virtual_display ||
+ (adev->harvest_ip_mask & AMD_HARVEST_IP_DMU_MASK))
return false;
- return amdgpu_device_asic_has_dc_support(adev->asic_type);
+ return amdgpu_device_asic_has_dc_support(adev->pdev, adev->asic_type);
}
-
static void amdgpu_device_xgmi_reset_func(struct work_struct *__work)
{
struct amdgpu_device *adev =
@@ -3087,19 +4324,18 @@ static void amdgpu_device_xgmi_reset_func(struct work_struct *__work)
if (amdgpu_asic_reset_method(adev) == AMD_RESET_METHOD_BACO) {
task_barrier_enter(&hive->tb);
- adev->asic_reset_res = amdgpu_device_baco_enter(adev_to_drm(adev));
+ adev->asic_reset_res = amdgpu_device_baco_enter(adev);
if (adev->asic_reset_res)
goto fail;
task_barrier_exit(&hive->tb);
- adev->asic_reset_res = amdgpu_device_baco_exit(adev_to_drm(adev));
+ adev->asic_reset_res = amdgpu_device_baco_exit(adev);
if (adev->asic_reset_res)
goto fail;
- if (adev->mmhub.funcs && adev->mmhub.funcs->reset_ras_error_count)
- adev->mmhub.funcs->reset_ras_error_count(adev);
+ amdgpu_ras_reset_error_count(adev, AMDGPU_RAS_BLOCK__MMHUB);
} else {
task_barrier_full(&hive->tb);
@@ -3108,7 +4344,8 @@ static void amdgpu_device_xgmi_reset_func(struct work_struct *__work)
fail:
if (adev->asic_reset_res)
- DRM_WARN("ASIC reset failed with error, %d for drm dev, %s",
+ dev_warn(adev->dev,
+ "ASIC reset failed with error, %d for drm dev, %s",
adev->asic_reset_res, adev_to_drm(adev)->unique);
amdgpu_put_xgmi_hive(hive);
}
@@ -3121,77 +4358,155 @@ static int amdgpu_device_get_job_timeout_settings(struct amdgpu_device *adev)
long timeout;
int ret = 0;
- /*
- * By default timeout for non compute jobs is 10000.
- * And there is no timeout enforced on compute jobs.
- * In SR-IOV or passthrough mode, timeout for compute
- * jobs are 60000 by default.
- */
- adev->gfx_timeout = msecs_to_jiffies(10000);
- adev->sdma_timeout = adev->video_timeout = adev->gfx_timeout;
- if (amdgpu_sriov_vf(adev))
- adev->compute_timeout = amdgpu_sriov_is_pp_one_vf(adev) ?
- msecs_to_jiffies(60000) : msecs_to_jiffies(10000);
- else if (amdgpu_passthrough(adev))
- adev->compute_timeout = msecs_to_jiffies(60000);
- else
- adev->compute_timeout = MAX_SCHEDULE_TIMEOUT;
+ /* By default timeout for all queues is 2 sec */
+ adev->gfx_timeout = adev->compute_timeout = adev->sdma_timeout =
+ adev->video_timeout = msecs_to_jiffies(2000);
- if (strnlen(input, AMDGPU_MAX_TIMEOUT_PARAM_LENGTH)) {
- while ((timeout_setting = strsep(&input, ",")) &&
- strnlen(timeout_setting, AMDGPU_MAX_TIMEOUT_PARAM_LENGTH)) {
- ret = kstrtol(timeout_setting, 0, &timeout);
- if (ret)
- return ret;
+ if (!strnlen(input, AMDGPU_MAX_TIMEOUT_PARAM_LENGTH))
+ return 0;
- if (timeout == 0) {
- index++;
- continue;
- } else if (timeout < 0) {
- timeout = MAX_SCHEDULE_TIMEOUT;
- } else {
- timeout = msecs_to_jiffies(timeout);
- }
+ while ((timeout_setting = strsep(&input, ",")) &&
+ strnlen(timeout_setting, AMDGPU_MAX_TIMEOUT_PARAM_LENGTH)) {
+ ret = kstrtol(timeout_setting, 0, &timeout);
+ if (ret)
+ return ret;
- switch (index++) {
- case 0:
- adev->gfx_timeout = timeout;
- break;
- case 1:
- adev->compute_timeout = timeout;
- break;
- case 2:
- adev->sdma_timeout = timeout;
- break;
- case 3:
- adev->video_timeout = timeout;
- break;
- default:
- break;
- }
+ if (timeout == 0) {
+ index++;
+ continue;
+ } else if (timeout < 0) {
+ timeout = MAX_SCHEDULE_TIMEOUT;
+ dev_warn(adev->dev, "lockup timeout disabled");
+ add_taint(TAINT_SOFTLOCKUP, LOCKDEP_STILL_OK);
+ } else {
+ timeout = msecs_to_jiffies(timeout);
}
- /*
- * There is only one value specified and
- * it should apply to all non-compute jobs.
- */
- if (index == 1) {
- adev->sdma_timeout = adev->video_timeout = adev->gfx_timeout;
- if (amdgpu_sriov_vf(adev) || amdgpu_passthrough(adev))
- adev->compute_timeout = adev->gfx_timeout;
+
+ switch (index++) {
+ case 0:
+ adev->gfx_timeout = timeout;
+ break;
+ case 1:
+ adev->compute_timeout = timeout;
+ break;
+ case 2:
+ adev->sdma_timeout = timeout;
+ break;
+ case 3:
+ adev->video_timeout = timeout;
+ break;
+ default:
+ break;
}
}
+ /* When only one value specified apply it to all queues. */
+ if (index == 1)
+ adev->gfx_timeout = adev->compute_timeout = adev->sdma_timeout =
+ adev->video_timeout = timeout;
+
return ret;
}
-static const struct attribute *amdgpu_dev_attributes[] = {
- &dev_attr_product_name.attr,
- &dev_attr_product_number.attr,
- &dev_attr_serial_number.attr,
- &dev_attr_pcie_replay_count.attr,
- NULL
-};
+/**
+ * amdgpu_device_check_iommu_direct_map - check if RAM direct mapped to GPU
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * RAM direct mapped to GPU if IOMMU is not enabled or is pass through mode
+ */
+static void amdgpu_device_check_iommu_direct_map(struct amdgpu_device *adev)
+{
+ struct iommu_domain *domain;
+ domain = iommu_get_domain_for_dev(adev->dev);
+ if (!domain || domain->type == IOMMU_DOMAIN_IDENTITY)
+ adev->ram_is_direct_mapped = true;
+}
+
+#if defined(CONFIG_HSA_AMD_P2P)
+/**
+ * amdgpu_device_check_iommu_remap - Check if DMA remapping is enabled.
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * return if IOMMU remapping bar address
+ */
+static bool amdgpu_device_check_iommu_remap(struct amdgpu_device *adev)
+{
+ struct iommu_domain *domain;
+
+ domain = iommu_get_domain_for_dev(adev->dev);
+ if (domain && (domain->type == IOMMU_DOMAIN_DMA ||
+ domain->type == IOMMU_DOMAIN_DMA_FQ))
+ return true;
+
+ return false;
+}
+#endif
+
+static void amdgpu_device_set_mcbp(struct amdgpu_device *adev)
+{
+ if (amdgpu_mcbp == 1)
+ adev->gfx.mcbp = true;
+ else if (amdgpu_mcbp == 0)
+ adev->gfx.mcbp = false;
+
+ if (amdgpu_sriov_vf(adev))
+ adev->gfx.mcbp = true;
+
+ if (adev->gfx.mcbp)
+ dev_info(adev->dev, "MCBP is enabled\n");
+}
+
+static int amdgpu_device_sys_interface_init(struct amdgpu_device *adev)
+{
+ int r;
+
+ r = amdgpu_atombios_sysfs_init(adev);
+ if (r)
+ drm_err(&adev->ddev,
+ "registering atombios sysfs failed (%d).\n", r);
+
+ r = amdgpu_pm_sysfs_init(adev);
+ if (r)
+ dev_err(adev->dev, "registering pm sysfs failed (%d).\n", r);
+
+ r = amdgpu_ucode_sysfs_init(adev);
+ if (r) {
+ adev->ucode_sysfs_en = false;
+ dev_err(adev->dev, "Creating firmware sysfs failed (%d).\n", r);
+ } else
+ adev->ucode_sysfs_en = true;
+
+ r = amdgpu_device_attr_sysfs_init(adev);
+ if (r)
+ dev_err(adev->dev, "Could not create amdgpu device attr\n");
+
+ r = devm_device_add_group(adev->dev, &amdgpu_board_attrs_group);
+ if (r)
+ dev_err(adev->dev,
+ "Could not create amdgpu board attributes\n");
+
+ amdgpu_fru_sysfs_init(adev);
+ amdgpu_reg_state_sysfs_init(adev);
+ amdgpu_xcp_sysfs_init(adev);
+
+ return r;
+}
+
+static void amdgpu_device_sys_interface_fini(struct amdgpu_device *adev)
+{
+ if (adev->pm.sysfs_initialized)
+ amdgpu_pm_sysfs_fini(adev);
+ if (adev->ucode_sysfs_en)
+ amdgpu_ucode_sysfs_fini(adev);
+ amdgpu_device_attr_sysfs_fini(adev);
+ amdgpu_fru_sysfs_fini(adev);
+
+ amdgpu_reg_state_sysfs_fini(adev);
+ amdgpu_xcp_sysfs_fini(adev);
+}
/**
* amdgpu_device_init - initialize the driver
@@ -3206,11 +4521,11 @@ static const struct attribute *amdgpu_dev_attributes[] = {
int amdgpu_device_init(struct amdgpu_device *adev,
uint32_t flags)
{
- struct drm_device *ddev = adev_to_drm(adev);
struct pci_dev *pdev = adev->pdev;
int r, i;
- bool atpx = false;
+ bool px = false;
u32 max_MBps;
+ int tmp;
adev->shutdown = false;
adev->flags = flags;
@@ -3226,11 +4541,13 @@ int amdgpu_device_init(struct amdgpu_device *adev,
adev->gmc.gart_size = 512 * 1024 * 1024;
adev->accel_working = false;
adev->num_rings = 0;
+ RCU_INIT_POINTER(adev->gang_submit, dma_fence_get_stub());
adev->mman.buffer_funcs = NULL;
adev->mman.buffer_funcs_ring = NULL;
adev->vm_manager.vm_pte_funcs = NULL;
adev->vm_manager.vm_pte_num_scheds = 0;
adev->gmc.gmc_funcs = NULL;
+ adev->harvest_ip_mask = 0x0;
adev->fence_context = dma_fence_context_alloc(AMDGPU_MAX_RINGS);
bitmap_zero(adev->gfx.pipe_reserve_bitmap, AMDGPU_MAX_COMPUTE_QUEUES);
@@ -3238,10 +4555,14 @@ int amdgpu_device_init(struct amdgpu_device *adev,
adev->smc_wreg = &amdgpu_invalid_wreg;
adev->pcie_rreg = &amdgpu_invalid_rreg;
adev->pcie_wreg = &amdgpu_invalid_wreg;
+ adev->pcie_rreg_ext = &amdgpu_invalid_rreg_ext;
+ adev->pcie_wreg_ext = &amdgpu_invalid_wreg_ext;
adev->pciep_rreg = &amdgpu_invalid_rreg;
adev->pciep_wreg = &amdgpu_invalid_wreg;
adev->pcie_rreg64 = &amdgpu_invalid_rreg64;
adev->pcie_wreg64 = &amdgpu_invalid_wreg64;
+ adev->pcie_rreg64_ext = &amdgpu_invalid_rreg64_ext;
+ adev->pcie_wreg64_ext = &amdgpu_invalid_wreg64_ext;
adev->uvd_ctx_rreg = &amdgpu_invalid_rreg;
adev->uvd_ctx_wreg = &amdgpu_invalid_wreg;
adev->didt_rreg = &amdgpu_invalid_rreg;
@@ -3251,27 +4572,43 @@ int amdgpu_device_init(struct amdgpu_device *adev,
adev->audio_endpt_rreg = &amdgpu_block_invalid_rreg;
adev->audio_endpt_wreg = &amdgpu_block_invalid_wreg;
- DRM_INFO("initializing kernel modesetting (%s 0x%04X:0x%04X 0x%04X:0x%04X 0x%02X).\n",
- amdgpu_asic_name[adev->asic_type], pdev->vendor, pdev->device,
- pdev->subsystem_vendor, pdev->subsystem_device, pdev->revision);
+ dev_info(
+ adev->dev,
+ "initializing kernel modesetting (%s 0x%04X:0x%04X 0x%04X:0x%04X 0x%02X).\n",
+ amdgpu_asic_name[adev->asic_type], pdev->vendor, pdev->device,
+ pdev->subsystem_vendor, pdev->subsystem_device, pdev->revision);
/* mutex initialization are all done here so we
- * can recall function without having locking issues */
- atomic_set(&adev->irq.ih.lock, 0);
+ * can recall function without having locking issues
+ */
mutex_init(&adev->firmware.mutex);
mutex_init(&adev->pm.mutex);
mutex_init(&adev->gfx.gpu_clock_mutex);
mutex_init(&adev->srbm_mutex);
mutex_init(&adev->gfx.pipe_reserve_mutex);
mutex_init(&adev->gfx.gfx_off_mutex);
+ mutex_init(&adev->gfx.partition_mutex);
mutex_init(&adev->grbm_idx_mutex);
mutex_init(&adev->mn_lock);
mutex_init(&adev->virt.vf_errors.lock);
hash_init(adev->mn_hash);
- atomic_set(&adev->in_gpu_reset, 0);
- init_rwsem(&adev->reset_sem);
mutex_init(&adev->psp.mutex);
mutex_init(&adev->notifier_lock);
+ mutex_init(&adev->pm.stable_pstate_ctx_lock);
+ mutex_init(&adev->benchmark_mutex);
+ mutex_init(&adev->gfx.reset_sem_mutex);
+ /* Initialize the mutex for cleaner shader isolation between GFX and compute processes */
+ mutex_init(&adev->enforce_isolation_mutex);
+ for (i = 0; i < MAX_XCP; ++i) {
+ adev->isolation[i].spearhead = dma_fence_get_stub();
+ amdgpu_sync_create(&adev->isolation[i].active);
+ amdgpu_sync_create(&adev->isolation[i].prev);
+ }
+ mutex_init(&adev->gfx.userq_sch_mutex);
+ mutex_init(&adev->gfx.workload_profile_mutex);
+ mutex_init(&adev->vcn.workload_profile_mutex);
+
+ amdgpu_device_init_apu_flags(adev);
r = amdgpu_device_check_arguments(adev);
if (r)
@@ -3286,18 +4623,45 @@ int amdgpu_device_init(struct amdgpu_device *adev,
spin_lock_init(&adev->se_cac_idx_lock);
spin_lock_init(&adev->audio_endpt_idx_lock);
spin_lock_init(&adev->mm_stats.lock);
+ spin_lock_init(&adev->virt.rlcg_reg_lock);
+ spin_lock_init(&adev->wb.lock);
+
+ xa_init_flags(&adev->userq_xa, XA_FLAGS_LOCK_IRQ);
+
+ INIT_LIST_HEAD(&adev->reset_list);
+
+ INIT_LIST_HEAD(&adev->ras_list);
- INIT_LIST_HEAD(&adev->shadow_list);
- mutex_init(&adev->shadow_list_lock);
+ INIT_LIST_HEAD(&adev->pm.od_kobj_list);
+
+ xa_init(&adev->userq_doorbell_xa);
INIT_DELAYED_WORK(&adev->delayed_init_work,
amdgpu_device_delayed_init_work_handler);
INIT_DELAYED_WORK(&adev->gfx.gfx_off_delay_work,
amdgpu_device_delay_enable_gfx_off);
+ /*
+ * Initialize the enforce_isolation work structures for each XCP
+ * partition. This work handler is responsible for enforcing shader
+ * isolation on AMD GPUs. It counts the number of emitted fences for
+ * each GFX and compute ring. If there are any fences, it schedules
+ * the `enforce_isolation_work` to be run after a delay. If there are
+ * no fences, it signals the Kernel Fusion Driver (KFD) to resume the
+ * runqueue.
+ */
+ for (i = 0; i < MAX_XCP; i++) {
+ INIT_DELAYED_WORK(&adev->gfx.enforce_isolation[i].work,
+ amdgpu_gfx_enforce_isolation_handler);
+ adev->gfx.enforce_isolation[i].adev = adev;
+ adev->gfx.enforce_isolation[i].xcp_id = i;
+ }
INIT_WORK(&adev->xgmi_reset_work, amdgpu_device_xgmi_reset_func);
+ INIT_WORK(&adev->userq_reset_work, amdgpu_userq_reset_work);
adev->gfx.gfx_off_req_count = 1;
+ adev->gfx.gfx_off_residency = 0;
+ adev->gfx.gfx_off_entrycount = 0;
adev->pm.ac_power = power_supply_is_system_supplied() > 0;
atomic_set(&adev->throttling_logging_enabled, 1);
@@ -3309,6 +4673,7 @@ int amdgpu_device_init(struct amdgpu_device *adev,
* for throttling interrupt) = 60 seconds.
*/
ratelimit_state_init(&adev->throttling_logging_rs, (60 - 1) * HZ, 1);
+
ratelimit_set_flags(&adev->throttling_logging_rs, RATELIMIT_MSG_ON_RELEASE);
/* Registers mapping */
@@ -3321,76 +4686,106 @@ int amdgpu_device_init(struct amdgpu_device *adev,
adev->rmmio_size = pci_resource_len(adev->pdev, 2);
}
+ for (i = 0; i < AMD_IP_BLOCK_TYPE_NUM; i++)
+ atomic_set(&adev->pm.pwr_state[i], POWER_STATE_UNKNOWN);
+
adev->rmmio = ioremap(adev->rmmio_base, adev->rmmio_size);
- if (adev->rmmio == NULL) {
+ if (!adev->rmmio)
return -ENOMEM;
- }
- DRM_INFO("register mmio base: 0x%08X\n", (uint32_t)adev->rmmio_base);
- DRM_INFO("register mmio size: %u\n", (unsigned)adev->rmmio_size);
-
- /* io port mapping */
- for (i = 0; i < DEVICE_COUNT_RESOURCE; i++) {
- if (pci_resource_flags(adev->pdev, i) & IORESOURCE_IO) {
- adev->rio_mem_size = pci_resource_len(adev->pdev, i);
- adev->rio_mem = pci_iomap(adev->pdev, i, adev->rio_mem_size);
- break;
- }
- }
- if (adev->rio_mem == NULL)
- DRM_INFO("PCI I/O BAR is not found.\n");
-
- /* enable PCIE atomic ops */
- r = pci_enable_atomic_ops_to_root(adev->pdev,
- PCI_EXP_DEVCAP2_ATOMIC_COMP32 |
- PCI_EXP_DEVCAP2_ATOMIC_COMP64);
- if (r) {
- adev->have_atomics_support = false;
- DRM_INFO("PCIE atomic ops is not supported\n");
- } else {
- adev->have_atomics_support = true;
- }
- amdgpu_device_get_pcie_info(adev);
-
- if (amdgpu_mcbp)
- DRM_INFO("MCBP is enabled\n");
+ dev_info(adev->dev, "register mmio base: 0x%08X\n",
+ (uint32_t)adev->rmmio_base);
+ dev_info(adev->dev, "register mmio size: %u\n",
+ (unsigned int)adev->rmmio_size);
- if (amdgpu_mes && adev->asic_type >= CHIP_NAVI10)
- adev->enable_mes = true;
+ /*
+ * Reset domain needs to be present early, before XGMI hive discovered
+ * (if any) and initialized to use reset sem and in_gpu reset flag
+ * early on during init and before calling to RREG32.
+ */
+ adev->reset_domain = amdgpu_reset_create_reset_domain(SINGLE_DEVICE, "amdgpu-reset-dev");
+ if (!adev->reset_domain)
+ return -ENOMEM;
/* detect hw virtualization here */
- amdgpu_detect_virtualization(adev);
+ amdgpu_virt_init(adev);
+
+ amdgpu_device_get_pcie_info(adev);
r = amdgpu_device_get_job_timeout_settings(adev);
if (r) {
dev_err(adev->dev, "invalid lockup_timeout parameter syntax\n");
- goto failed_unmap;
+ return r;
}
+ amdgpu_device_set_mcbp(adev);
+
+ /*
+ * By default, use default mode where all blocks are expected to be
+ * initialized. At present a 'swinit' of blocks is required to be
+ * completed before the need for a different level is detected.
+ */
+ amdgpu_set_init_level(adev, AMDGPU_INIT_LEVEL_DEFAULT);
/* early init functions */
r = amdgpu_device_ip_early_init(adev);
if (r)
- goto failed_unmap;
+ return r;
- /* doorbell bar mapping and doorbell index init*/
- amdgpu_device_doorbell_init(adev);
+ /*
+ * No need to remove conflicting FBs for non-display class devices.
+ * This prevents the sysfb from being freed accidently.
+ */
+ if ((pdev->class >> 8) == PCI_CLASS_DISPLAY_VGA ||
+ (pdev->class >> 8) == PCI_CLASS_DISPLAY_OTHER) {
+ /* Get rid of things like offb */
+ r = aperture_remove_conflicting_pci_devices(adev->pdev, amdgpu_kms_driver.name);
+ if (r)
+ return r;
+ }
- /* if we have > 1 VGA cards, then disable the amdgpu VGA resources */
- /* this will fail for cards that aren't VGA class devices, just
- * ignore it */
- if ((adev->pdev->class >> 8) == PCI_CLASS_DISPLAY_VGA)
- vga_client_register(adev->pdev, adev, NULL, amdgpu_device_vga_set_decode);
+ /* Enable TMZ based on IP_VERSION */
+ amdgpu_gmc_tmz_set(adev);
- if (amdgpu_device_supports_atpx(ddev))
- atpx = true;
- if (amdgpu_has_atpx() &&
- (amdgpu_is_atpx_hybrid() ||
- amdgpu_has_atpx_dgpu_power_cntl()) &&
- !pci_is_thunderbolt_attached(adev->pdev))
- vga_switcheroo_register_client(adev->pdev,
- &amdgpu_switcheroo_ops, atpx);
- if (atpx)
- vga_switcheroo_init_domain_pm_ops(adev->dev, &adev->vga_pm_domain);
+ if (amdgpu_sriov_vf(adev) &&
+ amdgpu_ip_version(adev, GC_HWIP, 0) >= IP_VERSION(10, 3, 0))
+ /* VF MMIO access (except mailbox range) from CPU
+ * will be blocked during sriov runtime
+ */
+ adev->virt.caps |= AMDGPU_VF_MMIO_ACCESS_PROTECT;
+
+ amdgpu_gmc_noretry_set(adev);
+ /* Need to get xgmi info early to decide the reset behavior*/
+ if (adev->gmc.xgmi.supported) {
+ r = adev->gfxhub.funcs->get_xgmi_info(adev);
+ if (r)
+ return r;
+ }
+
+ /* enable PCIE atomic ops */
+ if (amdgpu_sriov_vf(adev)) {
+ if (adev->virt.fw_reserve.p_pf2vf)
+ adev->have_atomics_support = ((struct amd_sriov_msg_pf2vf_info *)
+ adev->virt.fw_reserve.p_pf2vf)->pcie_atomic_ops_support_flags ==
+ (PCI_EXP_DEVCAP2_ATOMIC_COMP32 | PCI_EXP_DEVCAP2_ATOMIC_COMP64);
+ /* APUs w/ gfx9 onwards doesn't reply on PCIe atomics, rather it is a
+ * internal path natively support atomics, set have_atomics_support to true.
+ */
+ } else if ((adev->flags & AMD_IS_APU) &&
+ (amdgpu_ip_version(adev, GC_HWIP, 0) >
+ IP_VERSION(9, 0, 0))) {
+ adev->have_atomics_support = true;
+ } else {
+ adev->have_atomics_support =
+ !pci_enable_atomic_ops_to_root(adev->pdev,
+ PCI_EXP_DEVCAP2_ATOMIC_COMP32 |
+ PCI_EXP_DEVCAP2_ATOMIC_COMP64);
+ }
+
+ if (!adev->have_atomics_support)
+ dev_info(adev->dev, "PCIE atomic ops is not supported\n");
+
+ /* doorbell bar mapping and doorbell index init*/
+ amdgpu_doorbell_init(adev);
if (amdgpu_emu_mode == 1) {
/* post the asic on emulation mode */
@@ -3398,22 +4793,39 @@ int amdgpu_device_init(struct amdgpu_device *adev,
goto fence_driver_init;
}
+ amdgpu_reset_init(adev);
+
/* detect if we are with an SRIOV vbios */
- amdgpu_device_detect_sriov_bios(adev);
+ if (adev->bios)
+ amdgpu_device_detect_sriov_bios(adev);
/* check if we need to reset the asic
* E.g., driver was not cleanly unloaded previously, etc.
*/
if (!amdgpu_sriov_vf(adev) && amdgpu_asic_need_reset_on_init(adev)) {
- r = amdgpu_asic_reset(adev);
+ if (adev->gmc.xgmi.num_physical_nodes) {
+ dev_info(adev->dev, "Pending hive reset.\n");
+ amdgpu_set_init_level(adev,
+ AMDGPU_INIT_LEVEL_MINIMAL_XGMI);
+ } else if (amdgpu_ip_version(adev, MP1_HWIP, 0) == IP_VERSION(13, 0, 10) &&
+ !amdgpu_device_has_display_hardware(adev)) {
+ r = psp_gpu_reset(adev);
+ } else {
+ tmp = amdgpu_reset_method;
+ /* It should do a default reset when loading or reloading the driver,
+ * regardless of the module parameter reset_method.
+ */
+ amdgpu_reset_method = AMD_RESET_METHOD_NONE;
+ r = amdgpu_asic_reset(adev);
+ amdgpu_reset_method = tmp;
+ }
+
if (r) {
- dev_err(adev->dev, "asic reset on init failed\n");
- goto failed;
+ dev_err(adev->dev, "asic reset on init failed\n");
+ goto failed;
}
}
- pci_enable_pcie_error_reporting(adev->pdev);
-
/* Post card if necessary */
if (amdgpu_device_need_post(adev)) {
if (!adev->bios) {
@@ -3421,7 +4833,7 @@ int amdgpu_device_init(struct amdgpu_device *adev,
r = -EINVAL;
goto failed;
}
- DRM_INFO("GPU posting now...\n");
+ dev_info(adev->dev, "GPU posting now...\n");
r = amdgpu_device_asic_init(adev);
if (r) {
dev_err(adev->dev, "gpu post error!\n");
@@ -3429,32 +4841,33 @@ int amdgpu_device_init(struct amdgpu_device *adev,
}
}
- if (adev->is_atom_fw) {
- /* Initialize clocks */
- r = amdgpu_atomfirmware_get_clock_info(adev);
- if (r) {
- dev_err(adev->dev, "amdgpu_atomfirmware_get_clock_info failed\n");
- amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_ATOMBIOS_GET_CLOCK_FAIL, 0, 0);
- goto failed;
- }
- } else {
- /* Initialize clocks */
- r = amdgpu_atombios_get_clock_info(adev);
- if (r) {
- dev_err(adev->dev, "amdgpu_atombios_get_clock_info failed\n");
- amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_ATOMBIOS_GET_CLOCK_FAIL, 0, 0);
- goto failed;
+ if (adev->bios) {
+ if (adev->is_atom_fw) {
+ /* Initialize clocks */
+ r = amdgpu_atomfirmware_get_clock_info(adev);
+ if (r) {
+ dev_err(adev->dev, "amdgpu_atomfirmware_get_clock_info failed\n");
+ amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_ATOMBIOS_GET_CLOCK_FAIL, 0, 0);
+ goto failed;
+ }
+ } else {
+ /* Initialize clocks */
+ r = amdgpu_atombios_get_clock_info(adev);
+ if (r) {
+ dev_err(adev->dev, "amdgpu_atombios_get_clock_info failed\n");
+ amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_ATOMBIOS_GET_CLOCK_FAIL, 0, 0);
+ goto failed;
+ }
+ /* init i2c buses */
+ amdgpu_i2c_init(adev);
}
- /* init i2c buses */
- if (!amdgpu_device_has_dc_support(adev))
- amdgpu_atombios_i2c_init(adev);
}
fence_driver_init:
/* Fence driver */
- r = amdgpu_fence_driver_init(adev);
+ r = amdgpu_fence_driver_sw_init(adev);
if (r) {
- dev_err(adev->dev, "amdgpu_fence_driver_init failed\n");
+ dev_err(adev->dev, "amdgpu_fence_driver_sw_init failed\n");
amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_FENCE_INIT_FAIL, 0, 0);
goto failed;
}
@@ -3464,23 +4877,13 @@ fence_driver_init:
r = amdgpu_device_ip_init(adev);
if (r) {
- /* failed in exclusive mode due to timeout */
- if (amdgpu_sriov_vf(adev) &&
- !amdgpu_sriov_runtime(adev) &&
- amdgpu_virt_mmio_blocked(adev) &&
- !amdgpu_virt_wait_reset(adev)) {
- dev_err(adev->dev, "VF exclusive mode timeout\n");
- /* Don't send request since VF is inactive. */
- adev->virt.caps &= ~AMDGPU_SRIOV_CAPS_RUNTIME;
- adev->virt.ops = NULL;
- r = -EAGAIN;
- goto failed;
- }
dev_err(adev->dev, "amdgpu_device_ip_init failed\n");
amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_AMDGPU_INIT_FAIL, 0, 0);
- goto failed;
+ goto release_ras_con;
}
+ amdgpu_fence_driver_hw_init(adev);
+
dev_info(adev->dev,
"SE %d, SH per SE %d, CU per SH %d, active_cu_number %d\n",
adev->gfx.config.max_shader_engines,
@@ -3500,35 +4903,6 @@ fence_driver_init:
/* Get a log2 for easy divisions. */
adev->mm_stats.log2_max_MBps = ilog2(max(1u, max_MBps));
- amdgpu_fbdev_init(adev);
-
- r = amdgpu_pm_sysfs_init(adev);
- if (r) {
- adev->pm_sysfs_en = false;
- DRM_ERROR("registering pm debugfs failed (%d).\n", r);
- } else
- adev->pm_sysfs_en = true;
-
- r = amdgpu_ucode_sysfs_init(adev);
- if (r) {
- adev->ucode_sysfs_en = false;
- DRM_ERROR("Creating firmware sysfs failed (%d).\n", r);
- } else
- adev->ucode_sysfs_en = true;
-
- if ((amdgpu_testing & 1)) {
- if (adev->accel_working)
- amdgpu_test_moves(adev);
- else
- DRM_INFO("amdgpu: acceleration disabled, skipping move tests\n");
- }
- if (amdgpu_benchmarking) {
- if (adev->accel_working)
- amdgpu_benchmark(adev, amdgpu_benchmarking);
- else
- DRM_INFO("amdgpu: acceleration disabled, skipping benchmarks\n");
- }
-
/*
* Register gpu instance before amdgpu_device_enable_mgpu_fan_boost.
* Otherwise the mgpu fan boost feature will be skipped due to the
@@ -3539,25 +4913,32 @@ fence_driver_init:
/* enable clockgating, etc. after ib tests, etc. since some blocks require
* explicit gating rather than handling it automatically.
*/
- r = amdgpu_device_ip_late_init(adev);
- if (r) {
- dev_err(adev->dev, "amdgpu_device_ip_late_init failed\n");
- amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_AMDGPU_LATE_INIT_FAIL, 0, r);
- goto failed;
+ if (adev->init_lvl->level != AMDGPU_INIT_LEVEL_MINIMAL_XGMI) {
+ r = amdgpu_device_ip_late_init(adev);
+ if (r) {
+ dev_err(adev->dev, "amdgpu_device_ip_late_init failed\n");
+ amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_AMDGPU_LATE_INIT_FAIL, 0, r);
+ goto release_ras_con;
+ }
+ /* must succeed. */
+ amdgpu_ras_resume(adev);
+ queue_delayed_work(system_wq, &adev->delayed_init_work,
+ msecs_to_jiffies(AMDGPU_RESUME_MS));
}
- /* must succeed. */
- amdgpu_ras_resume(adev);
-
- queue_delayed_work(system_wq, &adev->delayed_init_work,
- msecs_to_jiffies(AMDGPU_RESUME_MS));
-
- if (amdgpu_sriov_vf(adev))
+ if (amdgpu_sriov_vf(adev)) {
+ amdgpu_virt_release_full_gpu(adev, true);
flush_delayed_work(&adev->delayed_init_work);
+ }
- r = sysfs_create_files(&adev->dev->kobj, amdgpu_dev_attributes);
- if (r)
- dev_err(adev->dev, "Could not create amdgpu device attr\n");
+ if (adev->init_lvl->level == AMDGPU_INIT_LEVEL_MINIMAL_XGMI)
+ amdgpu_xgmi_reset_on_init(adev);
+ /*
+ * Place those sysfs registering after `late_init`. As some of those
+ * operations performed in `late_init` might affect the sysfs
+ * interfaces creating.
+ */
+ r = amdgpu_device_sys_interface_init(adev);
if (IS_ENABLED(CONFIG_PERF_EVENTS))
r = amdgpu_pmu_init(adev);
@@ -3568,39 +4949,99 @@ fence_driver_init:
if (amdgpu_device_cache_pci_state(adev->pdev))
pci_restore_state(pdev);
+ /* if we have > 1 VGA cards, then disable the amdgpu VGA resources */
+ /* this will fail for cards that aren't VGA class devices, just
+ * ignore it
+ */
+ if ((adev->pdev->class >> 8) == PCI_CLASS_DISPLAY_VGA)
+ vga_client_register(adev->pdev, amdgpu_device_vga_set_decode);
+
+ px = amdgpu_device_supports_px(adev);
+
+ if (px || (!dev_is_removable(&adev->pdev->dev) &&
+ apple_gmux_detect(NULL, NULL)))
+ vga_switcheroo_register_client(adev->pdev,
+ &amdgpu_switcheroo_ops, px);
+
+ if (px)
+ vga_switcheroo_init_domain_pm_ops(adev->dev, &adev->vga_pm_domain);
+
+ amdgpu_device_check_iommu_direct_map(adev);
+
+ adev->pm_nb.notifier_call = amdgpu_device_pm_notifier;
+ r = register_pm_notifier(&adev->pm_nb);
+ if (r)
+ goto failed;
+
return 0;
+release_ras_con:
+ if (amdgpu_sriov_vf(adev))
+ amdgpu_virt_release_full_gpu(adev, true);
+
+ /* failed in exclusive mode due to timeout */
+ if (amdgpu_sriov_vf(adev) &&
+ !amdgpu_sriov_runtime(adev) &&
+ amdgpu_virt_mmio_blocked(adev) &&
+ !amdgpu_virt_wait_reset(adev)) {
+ dev_err(adev->dev, "VF exclusive mode timeout\n");
+ /* Don't send request since VF is inactive. */
+ adev->virt.caps &= ~AMDGPU_SRIOV_CAPS_RUNTIME;
+ adev->virt.ops = NULL;
+ r = -EAGAIN;
+ }
+ amdgpu_release_ras_context(adev);
+
failed:
amdgpu_vf_error_trans_all(adev);
- if (atpx)
- vga_switcheroo_fini_domain_pm_ops(adev->dev);
-failed_unmap:
+ return r;
+}
+
+static void amdgpu_device_unmap_mmio(struct amdgpu_device *adev)
+{
+
+ /* Clear all CPU mappings pointing to this device */
+ unmap_mapping_range(adev->ddev.anon_inode->i_mapping, 0, 0, 1);
+
+ /* Unmap all mapped bars - Doorbell, registers and VRAM */
+ amdgpu_doorbell_fini(adev);
+
iounmap(adev->rmmio);
adev->rmmio = NULL;
+ if (adev->mman.aper_base_kaddr)
+ iounmap(adev->mman.aper_base_kaddr);
+ adev->mman.aper_base_kaddr = NULL;
- return r;
+ /* Memory manager related */
+ if (!adev->gmc.xgmi.connected_to_cpu && !adev->gmc.is_app_apu) {
+ arch_phys_wc_del(adev->gmc.vram_mtrr);
+ arch_io_free_memtype_wc(adev->gmc.aper_base, adev->gmc.aper_size);
+ }
}
/**
- * amdgpu_device_fini - tear down the driver
+ * amdgpu_device_fini_hw - tear down the driver
*
* @adev: amdgpu_device pointer
*
* Tear down the driver info (all asics).
* Called at driver shutdown.
*/
-void amdgpu_device_fini(struct amdgpu_device *adev)
+void amdgpu_device_fini_hw(struct amdgpu_device *adev)
{
dev_info(adev->dev, "amdgpu: finishing device.\n");
flush_delayed_work(&adev->delayed_init_work);
+
+ if (adev->mman.initialized)
+ drain_workqueue(adev->mman.bdev.wq);
adev->shutdown = true;
- kfree(adev->pci_state);
+ unregister_pm_notifier(&adev->pm_nb);
/* make sure IB test finished before entering exclusive mode
* to avoid preemption on IB test
- * */
+ */
if (amdgpu_sriov_vf(adev)) {
amdgpu_virt_request_full_gpu(adev, false);
amdgpu_virt_fini_data_exchange(adev);
@@ -3608,150 +5049,363 @@ void amdgpu_device_fini(struct amdgpu_device *adev)
/* disable all interrupts */
amdgpu_irq_disable_all(adev);
- if (adev->mode_info.mode_config_initialized){
- if (!amdgpu_device_has_dc_support(adev))
+ if (adev->mode_info.mode_config_initialized) {
+ if (!drm_drv_uses_atomic_modeset(adev_to_drm(adev)))
drm_helper_force_disable_all(adev_to_drm(adev));
else
drm_atomic_helper_shutdown(adev_to_drm(adev));
}
- amdgpu_fence_driver_fini(adev);
- if (adev->pm_sysfs_en)
- amdgpu_pm_sysfs_fini(adev);
- amdgpu_fbdev_fini(adev);
+ amdgpu_fence_driver_hw_fini(adev);
+
+ amdgpu_device_sys_interface_fini(adev);
+
+ /* disable ras feature must before hw fini */
+ amdgpu_ras_pre_fini(adev);
+
+ amdgpu_ttm_set_buffer_funcs_status(adev, false);
+
+ amdgpu_device_ip_fini_early(adev);
+
+ amdgpu_irq_fini_hw(adev);
+
+ if (adev->mman.initialized)
+ ttm_device_clear_dma_mappings(&adev->mman.bdev);
+
+ amdgpu_gart_dummy_page_fini(adev);
+
+ if (drm_dev_is_unplugged(adev_to_drm(adev)))
+ amdgpu_device_unmap_mmio(adev);
+
+}
+
+void amdgpu_device_fini_sw(struct amdgpu_device *adev)
+{
+ int i, idx;
+ bool px;
+
amdgpu_device_ip_fini(adev);
- release_firmware(adev->firmware.gpu_info_fw);
- adev->firmware.gpu_info_fw = NULL;
+ amdgpu_fence_driver_sw_fini(adev);
+ amdgpu_ucode_release(&adev->firmware.gpu_info_fw);
adev->accel_working = false;
+ dma_fence_put(rcu_dereference_protected(adev->gang_submit, true));
+ for (i = 0; i < MAX_XCP; ++i) {
+ dma_fence_put(adev->isolation[i].spearhead);
+ amdgpu_sync_free(&adev->isolation[i].active);
+ amdgpu_sync_free(&adev->isolation[i].prev);
+ }
+
+ amdgpu_reset_fini(adev);
+
/* free i2c buses */
- if (!amdgpu_device_has_dc_support(adev))
- amdgpu_i2c_fini(adev);
+ amdgpu_i2c_fini(adev);
- if (amdgpu_emu_mode != 1)
- amdgpu_atombios_fini(adev);
+ if (adev->bios) {
+ if (amdgpu_emu_mode != 1)
+ amdgpu_atombios_fini(adev);
+ amdgpu_bios_release(adev);
+ }
- kfree(adev->bios);
- adev->bios = NULL;
- if (amdgpu_has_atpx() &&
- (amdgpu_is_atpx_hybrid() ||
- amdgpu_has_atpx_dgpu_power_cntl()) &&
- !pci_is_thunderbolt_attached(adev->pdev))
+ kfree(adev->fru_info);
+ adev->fru_info = NULL;
+
+ kfree(adev->xcp_mgr);
+ adev->xcp_mgr = NULL;
+
+ px = amdgpu_device_supports_px(adev);
+
+ if (px || (!dev_is_removable(&adev->pdev->dev) &&
+ apple_gmux_detect(NULL, NULL)))
vga_switcheroo_unregister_client(adev->pdev);
- if (amdgpu_device_supports_atpx(adev_to_drm(adev)))
+
+ if (px)
vga_switcheroo_fini_domain_pm_ops(adev->dev);
+
if ((adev->pdev->class >> 8) == PCI_CLASS_DISPLAY_VGA)
- vga_client_register(adev->pdev, NULL, NULL, NULL);
- if (adev->rio_mem)
- pci_iounmap(adev->pdev, adev->rio_mem);
- adev->rio_mem = NULL;
- iounmap(adev->rmmio);
- adev->rmmio = NULL;
- amdgpu_device_doorbell_fini(adev);
+ vga_client_unregister(adev->pdev);
- if (adev->ucode_sysfs_en)
- amdgpu_ucode_sysfs_fini(adev);
+ if (drm_dev_enter(adev_to_drm(adev), &idx)) {
+
+ iounmap(adev->rmmio);
+ adev->rmmio = NULL;
+ drm_dev_exit(idx);
+ }
- sysfs_remove_files(&adev->dev->kobj, amdgpu_dev_attributes);
if (IS_ENABLED(CONFIG_PERF_EVENTS))
amdgpu_pmu_fini(adev);
- if (adev->mman.discovery_bin)
+ if (adev->discovery.bin)
amdgpu_discovery_fini(adev);
+
+ amdgpu_reset_put_reset_domain(adev->reset_domain);
+ adev->reset_domain = NULL;
+
+ kfree(adev->pci_state);
+ kfree(adev->pcie_reset_ctx.swds_pcistate);
+ kfree(adev->pcie_reset_ctx.swus_pcistate);
}
+/**
+ * amdgpu_device_evict_resources - evict device resources
+ * @adev: amdgpu device object
+ *
+ * Evicts all ttm device resources(vram BOs, gart table) from the lru list
+ * of the vram memory type. Mainly used for evicting device resources
+ * at suspend time.
+ *
+ */
+static int amdgpu_device_evict_resources(struct amdgpu_device *adev)
+{
+ int ret;
+
+ /* No need to evict vram on APUs unless going to S4 */
+ if (!adev->in_s4 && (adev->flags & AMD_IS_APU))
+ return 0;
+
+ /* No need to evict when going to S5 through S4 callbacks */
+ if (system_state == SYSTEM_POWER_OFF)
+ return 0;
+
+ ret = amdgpu_ttm_evict_resources(adev, TTM_PL_VRAM);
+ if (ret) {
+ dev_warn(adev->dev, "evicting device resources failed\n");
+ return ret;
+ }
+
+ if (adev->in_s4) {
+ ret = ttm_device_prepare_hibernation(&adev->mman.bdev);
+ if (ret)
+ dev_err(adev->dev, "prepare hibernation failed, %d\n", ret);
+ }
+ return ret;
+}
/*
* Suspend & resume.
*/
/**
+ * amdgpu_device_pm_notifier - Notification block for Suspend/Hibernate events
+ * @nb: notifier block
+ * @mode: suspend mode
+ * @data: data
+ *
+ * This function is called when the system is about to suspend or hibernate.
+ * It is used to set the appropriate flags so that eviction can be optimized
+ * in the pm prepare callback.
+ */
+static int amdgpu_device_pm_notifier(struct notifier_block *nb, unsigned long mode,
+ void *data)
+{
+ struct amdgpu_device *adev = container_of(nb, struct amdgpu_device, pm_nb);
+
+ switch (mode) {
+ case PM_HIBERNATION_PREPARE:
+ adev->in_s4 = true;
+ break;
+ case PM_POST_HIBERNATION:
+ adev->in_s4 = false;
+ break;
+ }
+
+ return NOTIFY_DONE;
+}
+
+/**
+ * amdgpu_device_prepare - prepare for device suspend
+ *
+ * @dev: drm dev pointer
+ *
+ * Prepare to put the hw in the suspend state (all asics).
+ * Returns 0 for success or an error on failure.
+ * Called at driver suspend.
+ */
+int amdgpu_device_prepare(struct drm_device *dev)
+{
+ struct amdgpu_device *adev = drm_to_adev(dev);
+ int i, r;
+
+ if (dev->switch_power_state == DRM_SWITCH_POWER_OFF)
+ return 0;
+
+ /* Evict the majority of BOs before starting suspend sequence */
+ r = amdgpu_device_evict_resources(adev);
+ if (r)
+ return r;
+
+ flush_delayed_work(&adev->gfx.gfx_off_delay_work);
+
+ for (i = 0; i < adev->num_ip_blocks; i++) {
+ if (!adev->ip_blocks[i].status.valid)
+ continue;
+ if (!adev->ip_blocks[i].version->funcs->prepare_suspend)
+ continue;
+ r = adev->ip_blocks[i].version->funcs->prepare_suspend(&adev->ip_blocks[i]);
+ if (r)
+ return r;
+ }
+
+ return 0;
+}
+
+/**
+ * amdgpu_device_complete - complete power state transition
+ *
+ * @dev: drm dev pointer
+ *
+ * Undo the changes from amdgpu_device_prepare. This will be
+ * called on all resume transitions, including those that failed.
+ */
+void amdgpu_device_complete(struct drm_device *dev)
+{
+ struct amdgpu_device *adev = drm_to_adev(dev);
+ int i;
+
+ for (i = 0; i < adev->num_ip_blocks; i++) {
+ if (!adev->ip_blocks[i].status.valid)
+ continue;
+ if (!adev->ip_blocks[i].version->funcs->complete)
+ continue;
+ adev->ip_blocks[i].version->funcs->complete(&adev->ip_blocks[i]);
+ }
+}
+
+/**
* amdgpu_device_suspend - initiate device suspend
*
* @dev: drm dev pointer
- * @fbcon : notify the fbdev of suspend
+ * @notify_clients: notify in-kernel DRM clients
*
* Puts the hw in the suspend state (all asics).
* Returns 0 for success or an error on failure.
* Called at driver suspend.
*/
-int amdgpu_device_suspend(struct drm_device *dev, bool fbcon)
+int amdgpu_device_suspend(struct drm_device *dev, bool notify_clients)
{
- struct amdgpu_device *adev;
- struct drm_crtc *crtc;
- struct drm_connector *connector;
- struct drm_connector_list_iter iter;
- int r;
-
- adev = drm_to_adev(dev);
+ struct amdgpu_device *adev = drm_to_adev(dev);
+ int r, rec;
if (dev->switch_power_state == DRM_SWITCH_POWER_OFF)
return 0;
adev->in_suspend = true;
- drm_kms_helper_poll_disable(dev);
- if (fbcon)
- amdgpu_fbdev_set_suspend(adev, 1);
+ if (amdgpu_sriov_vf(adev)) {
+ if (!adev->in_runpm)
+ amdgpu_amdkfd_suspend_process(adev);
+ amdgpu_virt_fini_data_exchange(adev);
+ r = amdgpu_virt_request_full_gpu(adev, false);
+ if (r)
+ return r;
+ }
- cancel_delayed_work_sync(&adev->delayed_init_work);
+ r = amdgpu_acpi_smart_shift_update(adev, AMDGPU_SS_DEV_D3);
+ if (r)
+ goto unwind_sriov;
- if (!amdgpu_device_has_dc_support(adev)) {
- /* turn off display hw */
- drm_modeset_lock_all(dev);
- drm_connector_list_iter_begin(dev, &iter);
- drm_for_each_connector_iter(connector, &iter)
- drm_helper_connector_dpms(connector,
- DRM_MODE_DPMS_OFF);
- drm_connector_list_iter_end(&iter);
- drm_modeset_unlock_all(dev);
- /* unpin the front buffers and cursors */
- list_for_each_entry(crtc, &dev->mode_config.crtc_list, head) {
- struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc);
- struct drm_framebuffer *fb = crtc->primary->fb;
- struct amdgpu_bo *robj;
-
- if (amdgpu_crtc->cursor_bo && !adev->enable_virtual_display) {
- struct amdgpu_bo *aobj = gem_to_amdgpu_bo(amdgpu_crtc->cursor_bo);
- r = amdgpu_bo_reserve(aobj, true);
- if (r == 0) {
- amdgpu_bo_unpin(aobj);
- amdgpu_bo_unreserve(aobj);
- }
- }
+ if (notify_clients)
+ drm_client_dev_suspend(adev_to_drm(adev));
- if (fb == NULL || fb->obj[0] == NULL) {
- continue;
- }
- robj = gem_to_amdgpu_bo(fb->obj[0]);
- /* don't unpin kernel fb objects */
- if (!amdgpu_fbdev_robj_is_fb(adev, robj)) {
- r = amdgpu_bo_reserve(robj, true);
- if (r == 0) {
- amdgpu_bo_unpin(robj);
- amdgpu_bo_unreserve(robj);
- }
- }
- }
- }
+ cancel_delayed_work_sync(&adev->delayed_init_work);
amdgpu_ras_suspend(adev);
r = amdgpu_device_ip_suspend_phase1(adev);
+ if (r)
+ goto unwind_smartshift;
+
+ amdgpu_amdkfd_suspend(adev, !amdgpu_sriov_vf(adev) && !adev->in_runpm);
+ r = amdgpu_userq_suspend(adev);
+ if (r)
+ goto unwind_ip_phase1;
- amdgpu_amdkfd_suspend(adev, adev->in_runpm);
+ r = amdgpu_device_evict_resources(adev);
+ if (r)
+ goto unwind_userq;
- /* evict vram memory */
- amdgpu_bo_evict_vram(adev);
+ amdgpu_ttm_set_buffer_funcs_status(adev, false);
- amdgpu_fence_driver_suspend(adev);
+ amdgpu_fence_driver_hw_fini(adev);
- if (adev->in_poweroff_reboot_com ||
- !amdgpu_acpi_is_s0ix_supported(adev) || amdgpu_in_reset(adev))
- r = amdgpu_device_ip_suspend_phase2(adev);
- else
- amdgpu_gfx_state_change_set(adev, sGpuChangeState_D3Entry);
- /* evict remaining vram memory
- * This second call to evict vram is to evict the gart page table
- * using the CPU.
+ r = amdgpu_device_ip_suspend_phase2(adev);
+ if (r)
+ goto unwind_evict;
+
+ if (amdgpu_sriov_vf(adev))
+ amdgpu_virt_release_full_gpu(adev, false);
+
+ return 0;
+
+unwind_evict:
+ if (adev->mman.buffer_funcs_ring->sched.ready)
+ amdgpu_ttm_set_buffer_funcs_status(adev, true);
+ amdgpu_fence_driver_hw_init(adev);
+
+unwind_userq:
+ rec = amdgpu_userq_resume(adev);
+ if (rec) {
+ dev_warn(adev->dev, "failed to re-initialize user queues: %d\n", rec);
+ return r;
+ }
+ rec = amdgpu_amdkfd_resume(adev, !amdgpu_sriov_vf(adev) && !adev->in_runpm);
+ if (rec) {
+ dev_warn(adev->dev, "failed to re-initialize kfd: %d\n", rec);
+ return r;
+ }
+
+unwind_ip_phase1:
+ /* suspend phase 1 = resume phase 3 */
+ rec = amdgpu_device_ip_resume_phase3(adev);
+ if (rec) {
+ dev_warn(adev->dev, "failed to re-initialize IPs phase1: %d\n", rec);
+ return r;
+ }
+
+unwind_smartshift:
+ rec = amdgpu_acpi_smart_shift_update(adev, AMDGPU_SS_DEV_D0);
+ if (rec) {
+ dev_warn(adev->dev, "failed to re-update smart shift: %d\n", rec);
+ return r;
+ }
+
+ if (notify_clients)
+ drm_client_dev_resume(adev_to_drm(adev));
+
+ amdgpu_ras_resume(adev);
+
+unwind_sriov:
+ if (amdgpu_sriov_vf(adev)) {
+ rec = amdgpu_virt_request_full_gpu(adev, true);
+ if (rec) {
+ dev_warn(adev->dev, "failed to reinitialize sriov: %d\n", rec);
+ return r;
+ }
+ }
+
+ adev->in_suspend = adev->in_s0ix = adev->in_s3 = false;
+
+ return r;
+}
+
+static inline int amdgpu_virt_resume(struct amdgpu_device *adev)
+{
+ int r;
+ unsigned int prev_physical_node_id = adev->gmc.xgmi.physical_node_id;
+
+ /* During VM resume, QEMU programming of VF MSIX table (register GFXMSIX_VECT0_ADDR_LO)
+ * may not work. The access could be blocked by nBIF protection as VF isn't in
+ * exclusive access mode. Exclusive access is enabled now, disable/enable MSIX
+ * so that QEMU reprograms MSIX table.
*/
- amdgpu_bo_evict_vram(adev);
+ amdgpu_restore_msix(adev);
+
+ r = adev->gfxhub.funcs->get_xgmi_info(adev);
+ if (r)
+ return r;
+
+ dev_info(adev->dev, "xgmi node, old id %d, new id %d\n",
+ prev_physical_node_id, adev->gmc.xgmi.physical_node_id);
+
+ adev->vm_manager.vram_base_offset = adev->gfxhub.funcs->get_mc_fb_offset(adev);
+ adev->vm_manager.vram_base_offset +=
+ adev->gmc.xgmi.physical_node_id * adev->gmc.xgmi.node_segment_size;
return 0;
}
@@ -3760,25 +5414,34 @@ int amdgpu_device_suspend(struct drm_device *dev, bool fbcon)
* amdgpu_device_resume - initiate device resume
*
* @dev: drm dev pointer
- * @fbcon : notify the fbdev of resume
+ * @notify_clients: notify in-kernel DRM clients
*
* Bring the hw back to operating state (all asics).
* Returns 0 for success or an error on failure.
* Called at driver resume.
*/
-int amdgpu_device_resume(struct drm_device *dev, bool fbcon)
+int amdgpu_device_resume(struct drm_device *dev, bool notify_clients)
{
- struct drm_connector *connector;
- struct drm_connector_list_iter iter;
struct amdgpu_device *adev = drm_to_adev(dev);
- struct drm_crtc *crtc;
int r = 0;
+ if (amdgpu_sriov_vf(adev)) {
+ r = amdgpu_virt_request_full_gpu(adev, true);
+ if (r)
+ return r;
+ }
+
+ if (amdgpu_virt_xgmi_migrate_enabled(adev)) {
+ r = amdgpu_virt_resume(adev);
+ if (r)
+ goto exit;
+ }
+
if (dev->switch_power_state == DRM_SWITCH_POWER_OFF)
return 0;
- if (amdgpu_acpi_is_s0ix_supported(adev))
- amdgpu_gfx_state_change_set(adev, sGpuChangeState_D0Entry);
+ if (adev->in_s0ix)
+ amdgpu_dpm_gfx_state_change(adev, sGpuChangeState_D0Entry);
/* post card */
if (amdgpu_device_need_post(adev)) {
@@ -3788,90 +5451,74 @@ int amdgpu_device_resume(struct drm_device *dev, bool fbcon)
}
r = amdgpu_device_ip_resume(adev);
+
if (r) {
dev_err(adev->dev, "amdgpu_device_ip_resume failed (%d).\n", r);
- return r;
+ goto exit;
}
- amdgpu_fence_driver_resume(adev);
+ r = amdgpu_amdkfd_resume(adev, !amdgpu_sriov_vf(adev) && !adev->in_runpm);
+ if (r)
+ goto exit;
+
+ r = amdgpu_userq_resume(adev);
+ if (r)
+ goto exit;
r = amdgpu_device_ip_late_init(adev);
if (r)
- return r;
+ goto exit;
queue_delayed_work(system_wq, &adev->delayed_init_work,
msecs_to_jiffies(AMDGPU_RESUME_MS));
+exit:
+ if (amdgpu_sriov_vf(adev)) {
+ amdgpu_virt_init_data_exchange(adev);
+ amdgpu_virt_release_full_gpu(adev, true);
- if (!amdgpu_device_has_dc_support(adev)) {
- /* pin cursors */
- list_for_each_entry(crtc, &dev->mode_config.crtc_list, head) {
- struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc);
-
- if (amdgpu_crtc->cursor_bo && !adev->enable_virtual_display) {
- struct amdgpu_bo *aobj = gem_to_amdgpu_bo(amdgpu_crtc->cursor_bo);
- r = amdgpu_bo_reserve(aobj, true);
- if (r == 0) {
- r = amdgpu_bo_pin(aobj, AMDGPU_GEM_DOMAIN_VRAM);
- if (r != 0)
- dev_err(adev->dev, "Failed to pin cursor BO (%d)\n", r);
- amdgpu_crtc->cursor_addr = amdgpu_bo_gpu_offset(aobj);
- amdgpu_bo_unreserve(aobj);
- }
- }
- }
+ if (!r && !adev->in_runpm)
+ r = amdgpu_amdkfd_resume_process(adev);
}
- r = amdgpu_amdkfd_resume(adev, adev->in_runpm);
+
if (r)
return r;
/* Make sure IB tests flushed */
flush_delayed_work(&adev->delayed_init_work);
- /* blat the mode back in */
- if (fbcon) {
- if (!amdgpu_device_has_dc_support(adev)) {
- /* pre DCE11 */
- drm_helper_resume_force_mode(dev);
-
- /* turn on display hw */
- drm_modeset_lock_all(dev);
-
- drm_connector_list_iter_begin(dev, &iter);
- drm_for_each_connector_iter(connector, &iter)
- drm_helper_connector_dpms(connector,
- DRM_MODE_DPMS_ON);
- drm_connector_list_iter_end(&iter);
-
- drm_modeset_unlock_all(dev);
- }
- amdgpu_fbdev_set_suspend(adev, 0);
- }
-
- drm_kms_helper_poll_enable(dev);
+ if (notify_clients)
+ drm_client_dev_resume(adev_to_drm(adev));
amdgpu_ras_resume(adev);
- /*
- * Most of the connector probing functions try to acquire runtime pm
- * refs to ensure that the GPU is powered on when connector polling is
- * performed. Since we're calling this from a runtime PM callback,
- * trying to acquire rpm refs will cause us to deadlock.
- *
- * Since we're guaranteed to be holding the rpm lock, it's safe to
- * temporarily disable the rpm helpers so this doesn't deadlock us.
- */
+ if (adev->mode_info.num_crtc) {
+ /*
+ * Most of the connector probing functions try to acquire runtime pm
+ * refs to ensure that the GPU is powered on when connector polling is
+ * performed. Since we're calling this from a runtime PM callback,
+ * trying to acquire rpm refs will cause us to deadlock.
+ *
+ * Since we're guaranteed to be holding the rpm lock, it's safe to
+ * temporarily disable the rpm helpers so this doesn't deadlock us.
+ */
#ifdef CONFIG_PM
- dev->dev->power.disable_depth++;
+ dev->dev->power.disable_depth++;
#endif
- if (!amdgpu_device_has_dc_support(adev))
- drm_helper_hpd_irq_event(dev);
- else
- drm_kms_helper_hotplug_event(dev);
+ if (!adev->dc_enabled)
+ drm_helper_hpd_irq_event(dev);
+ else
+ drm_kms_helper_hotplug_event(dev);
#ifdef CONFIG_PM
- dev->dev->power.disable_depth--;
+ dev->dev->power.disable_depth--;
#endif
+ }
+
+ amdgpu_vram_mgr_clear_reset_blocks(adev);
adev->in_suspend = false;
+ if (amdgpu_acpi_smart_shift_update(adev, AMDGPU_SS_DEV_D0))
+ dev_warn(adev->dev, "smart shift update failed\n");
+
return 0;
}
@@ -3901,7 +5548,8 @@ static bool amdgpu_device_ip_check_soft_reset(struct amdgpu_device *adev)
continue;
if (adev->ip_blocks[i].version->funcs->check_soft_reset)
adev->ip_blocks[i].status.hang =
- adev->ip_blocks[i].version->funcs->check_soft_reset(adev);
+ adev->ip_blocks[i].version->funcs->check_soft_reset(
+ &adev->ip_blocks[i]);
if (adev->ip_blocks[i].status.hang) {
dev_info(adev->dev, "IP block:%s is hung!\n", adev->ip_blocks[i].version->funcs->name);
asic_hang = true;
@@ -3930,7 +5578,7 @@ static int amdgpu_device_ip_pre_soft_reset(struct amdgpu_device *adev)
continue;
if (adev->ip_blocks[i].status.hang &&
adev->ip_blocks[i].version->funcs->pre_soft_reset) {
- r = adev->ip_blocks[i].version->funcs->pre_soft_reset(adev);
+ r = adev->ip_blocks[i].version->funcs->pre_soft_reset(&adev->ip_blocks[i]);
if (r)
return r;
}
@@ -3992,7 +5640,7 @@ static int amdgpu_device_ip_soft_reset(struct amdgpu_device *adev)
continue;
if (adev->ip_blocks[i].status.hang &&
adev->ip_blocks[i].version->funcs->soft_reset) {
- r = adev->ip_blocks[i].version->funcs->soft_reset(adev);
+ r = adev->ip_blocks[i].version->funcs->soft_reset(&adev->ip_blocks[i]);
if (r)
return r;
}
@@ -4021,7 +5669,7 @@ static int amdgpu_device_ip_post_soft_reset(struct amdgpu_device *adev)
continue;
if (adev->ip_blocks[i].status.hang &&
adev->ip_blocks[i].version->funcs->post_soft_reset)
- r = adev->ip_blocks[i].version->funcs->post_soft_reset(adev);
+ r = adev->ip_blocks[i].version->funcs->post_soft_reset(&adev->ip_blocks[i]);
if (r)
return r;
}
@@ -4030,104 +5678,44 @@ static int amdgpu_device_ip_post_soft_reset(struct amdgpu_device *adev)
}
/**
- * amdgpu_device_recover_vram - Recover some VRAM contents
- *
- * @adev: amdgpu_device pointer
- *
- * Restores the contents of VRAM buffers from the shadows in GTT. Used to
- * restore things like GPUVM page tables after a GPU reset where
- * the contents of VRAM might be lost.
- *
- * Returns:
- * 0 on success, negative error code on failure.
- */
-static int amdgpu_device_recover_vram(struct amdgpu_device *adev)
-{
- struct dma_fence *fence = NULL, *next = NULL;
- struct amdgpu_bo *shadow;
- long r = 1, tmo;
-
- if (amdgpu_sriov_runtime(adev))
- tmo = msecs_to_jiffies(8000);
- else
- tmo = msecs_to_jiffies(100);
-
- dev_info(adev->dev, "recover vram bo from shadow start\n");
- mutex_lock(&adev->shadow_list_lock);
- list_for_each_entry(shadow, &adev->shadow_list, shadow_list) {
-
- /* No need to recover an evicted BO */
- if (shadow->tbo.mem.mem_type != TTM_PL_TT ||
- shadow->tbo.mem.start == AMDGPU_BO_INVALID_OFFSET ||
- shadow->parent->tbo.mem.mem_type != TTM_PL_VRAM)
- continue;
-
- r = amdgpu_bo_restore_shadow(shadow, &next);
- if (r)
- break;
-
- if (fence) {
- tmo = dma_fence_wait_timeout(fence, false, tmo);
- dma_fence_put(fence);
- fence = next;
- if (tmo == 0) {
- r = -ETIMEDOUT;
- break;
- } else if (tmo < 0) {
- r = tmo;
- break;
- }
- } else {
- fence = next;
- }
- }
- mutex_unlock(&adev->shadow_list_lock);
-
- if (fence)
- tmo = dma_fence_wait_timeout(fence, false, tmo);
- dma_fence_put(fence);
-
- if (r < 0 || tmo <= 0) {
- dev_err(adev->dev, "recover vram bo from shadow failed, r is %ld, tmo is %ld\n", r, tmo);
- return -EIO;
- }
-
- dev_info(adev->dev, "recover vram bo from shadow done\n");
- return 0;
-}
-
-
-/**
* amdgpu_device_reset_sriov - reset ASIC for SR-IOV vf
*
* @adev: amdgpu_device pointer
- * @from_hypervisor: request from hypervisor
+ * @reset_context: amdgpu reset context pointer
*
* do VF FLR and reinitialize Asic
* return 0 means succeeded otherwise failed
*/
static int amdgpu_device_reset_sriov(struct amdgpu_device *adev,
- bool from_hypervisor)
+ struct amdgpu_reset_context *reset_context)
{
int r;
+ struct amdgpu_hive_info *hive = NULL;
- if (from_hypervisor)
+ if (test_bit(AMDGPU_HOST_FLR, &reset_context->flags)) {
+ if (!amdgpu_ras_get_fed_status(adev))
+ amdgpu_virt_ready_to_reset(adev);
+ amdgpu_virt_wait_reset(adev);
+ clear_bit(AMDGPU_HOST_FLR, &reset_context->flags);
r = amdgpu_virt_request_full_gpu(adev, true);
- else
+ } else {
r = amdgpu_virt_reset_gpu(adev);
+ }
if (r)
return r;
- amdgpu_amdkfd_pre_reset(adev);
+ amdgpu_ras_clear_err_state(adev);
+ amdgpu_irq_gpu_reset_resume_helper(adev);
+
+ /* some sw clean up VF needs to do before recover */
+ amdgpu_virt_post_reset(adev);
/* Resume IP prior to SMC */
r = amdgpu_device_ip_reinit_early_sriov(adev);
if (r)
- goto error;
+ return r;
amdgpu_virt_init_data_exchange(adev);
- /* we need recover gart prior to run SMC/CP/SDMA resume */
- amdgpu_gtt_mgr_recover(ttm_manager_type(&adev->mman.bdev, TTM_PL_TT));
r = amdgpu_device_fw_loading(adev);
if (r)
@@ -4136,45 +5724,64 @@ static int amdgpu_device_reset_sriov(struct amdgpu_device *adev,
/* now we are okay to resume SMC/CP/SDMA */
r = amdgpu_device_ip_reinit_late_sriov(adev);
if (r)
- goto error;
+ return r;
+
+ hive = amdgpu_get_xgmi_hive(adev);
+ /* Update PSP FW topology after reset */
+ if (hive && adev->gmc.xgmi.num_physical_nodes > 1)
+ r = amdgpu_xgmi_update_topology(hive, adev);
+ if (hive)
+ amdgpu_put_xgmi_hive(hive);
+ if (r)
+ return r;
- amdgpu_irq_gpu_reset_resume_helper(adev);
r = amdgpu_ib_ring_tests(adev);
- amdgpu_amdkfd_post_reset(adev);
+ if (r)
+ return r;
-error:
- amdgpu_virt_release_full_gpu(adev, true);
- if (!r && adev->virt.gim_feature & AMDGIM_FEATURE_GIM_FLR_VRAMLOST) {
+ if (adev->virt.gim_feature & AMDGIM_FEATURE_GIM_FLR_VRAMLOST)
amdgpu_inc_vram_lost(adev);
- r = amdgpu_device_recover_vram(adev);
- }
- return r;
+ /* need to be called during full access so we can't do it later like
+ * bare-metal does.
+ */
+ amdgpu_amdkfd_post_reset(adev);
+ amdgpu_virt_release_full_gpu(adev, true);
+
+ /* Aldebaran and gfx_11_0_3 support ras in SRIOV, so need resume ras during reset */
+ if (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 2) ||
+ amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 3) ||
+ amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 4) ||
+ amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 5, 0) ||
+ amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(11, 0, 3))
+ amdgpu_ras_resume(adev);
+
+ amdgpu_virt_ras_telemetry_post_reset(adev);
+
+ return 0;
}
/**
- * amdgpu_device_has_job_running - check if there is any job in mirror list
+ * amdgpu_device_has_job_running - check if there is any unfinished job
*
* @adev: amdgpu_device pointer
*
- * check if there is any job in mirror list
+ * check if there is any job running on the device when guest driver receives
+ * FLR notification from host driver. If there are still jobs running, then
+ * the guest driver will not respond the FLR reset. Instead, let the job hit
+ * the timeout and guest driver then issue the reset request.
*/
bool amdgpu_device_has_job_running(struct amdgpu_device *adev)
{
int i;
- struct drm_sched_job *job;
for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
struct amdgpu_ring *ring = adev->rings[i];
- if (!ring || !ring->sched.thread)
+ if (!amdgpu_ring_sched_ready(ring))
continue;
- spin_lock(&ring->sched.job_list_lock);
- job = list_first_entry_or_null(&ring->sched.pending_list,
- struct drm_sched_job, list);
- spin_unlock(&ring->sched.job_list_lock);
- if (job)
+ if (amdgpu_fence_count_emitted(ring))
return true;
}
return false;
@@ -4190,43 +5797,37 @@ bool amdgpu_device_has_job_running(struct amdgpu_device *adev)
*/
bool amdgpu_device_should_recover_gpu(struct amdgpu_device *adev)
{
- if (!amdgpu_device_ip_check_soft_reset(adev)) {
- dev_info(adev->dev, "Timeout, but no hardware hang detected.\n");
- return false;
- }
if (amdgpu_gpu_recovery == 0)
goto disabled;
+ /* Skip soft reset check in fatal error mode */
+ if (!amdgpu_ras_is_poison_mode_supported(adev))
+ return true;
+
if (amdgpu_sriov_vf(adev))
return true;
if (amdgpu_gpu_recovery == -1) {
switch (adev->asic_type) {
- case CHIP_BONAIRE:
- case CHIP_HAWAII:
- case CHIP_TOPAZ:
- case CHIP_TONGA:
- case CHIP_FIJI:
- case CHIP_POLARIS10:
- case CHIP_POLARIS11:
- case CHIP_POLARIS12:
- case CHIP_VEGAM:
- case CHIP_VEGA20:
- case CHIP_VEGA10:
- case CHIP_VEGA12:
- case CHIP_RAVEN:
- case CHIP_ARCTURUS:
- case CHIP_RENOIR:
- case CHIP_NAVI10:
- case CHIP_NAVI14:
- case CHIP_NAVI12:
- case CHIP_SIENNA_CICHLID:
- case CHIP_NAVY_FLOUNDER:
- case CHIP_DIMGREY_CAVEFISH:
- break;
- default:
+#ifdef CONFIG_DRM_AMDGPU_SI
+ case CHIP_VERDE:
+ case CHIP_TAHITI:
+ case CHIP_PITCAIRN:
+ case CHIP_OLAND:
+ case CHIP_HAINAN:
+#endif
+#ifdef CONFIG_DRM_AMDGPU_CIK
+ case CHIP_KAVERI:
+ case CHIP_KABINI:
+ case CHIP_MULLINS:
+#endif
+ case CHIP_CARRIZO:
+ case CHIP_STONEY:
+ case CHIP_CYAN_SKILLFISH:
goto disabled;
+ default:
+ break;
}
}
@@ -4237,42 +5838,135 @@ disabled:
return false;
}
+int amdgpu_device_mode1_reset(struct amdgpu_device *adev)
+{
+ u32 i;
+ int ret = 0;
+
+ if (adev->bios)
+ amdgpu_atombios_scratch_regs_engine_hung(adev, true);
+
+ dev_info(adev->dev, "GPU mode1 reset\n");
+
+ /* Cache the state before bus master disable. The saved config space
+ * values are used in other cases like restore after mode-2 reset.
+ */
+ amdgpu_device_cache_pci_state(adev->pdev);
+
+ /* disable BM */
+ pci_clear_master(adev->pdev);
+
+ if (amdgpu_dpm_is_mode1_reset_supported(adev)) {
+ dev_info(adev->dev, "GPU smu mode1 reset\n");
+ ret = amdgpu_dpm_mode1_reset(adev);
+ } else {
+ dev_info(adev->dev, "GPU psp mode1 reset\n");
+ ret = psp_gpu_reset(adev);
+ }
+
+ if (ret)
+ goto mode1_reset_failed;
+
+ amdgpu_device_load_pci_state(adev->pdev);
+ ret = amdgpu_psp_wait_for_bootloader(adev);
+ if (ret)
+ goto mode1_reset_failed;
+
+ /* wait for asic to come out of reset */
+ for (i = 0; i < adev->usec_timeout; i++) {
+ u32 memsize = adev->nbio.funcs->get_memsize(adev);
+
+ if (memsize != 0xffffffff)
+ break;
+ udelay(1);
+ }
+
+ if (i >= adev->usec_timeout) {
+ ret = -ETIMEDOUT;
+ goto mode1_reset_failed;
+ }
+
+ if (adev->bios)
+ amdgpu_atombios_scratch_regs_engine_hung(adev, false);
+
+ return 0;
+
+mode1_reset_failed:
+ dev_err(adev->dev, "GPU mode1 reset failed\n");
+ return ret;
+}
+
+int amdgpu_device_link_reset(struct amdgpu_device *adev)
+{
+ int ret = 0;
+
+ dev_info(adev->dev, "GPU link reset\n");
-static int amdgpu_device_pre_asic_reset(struct amdgpu_device *adev,
- struct amdgpu_job *job,
- bool *need_full_reset_arg)
+ if (!amdgpu_reset_in_dpc(adev))
+ ret = amdgpu_dpm_link_reset(adev);
+
+ if (ret)
+ goto link_reset_failed;
+
+ ret = amdgpu_psp_wait_for_bootloader(adev);
+ if (ret)
+ goto link_reset_failed;
+
+ return 0;
+
+link_reset_failed:
+ dev_err(adev->dev, "GPU link reset failed\n");
+ return ret;
+}
+
+int amdgpu_device_pre_asic_reset(struct amdgpu_device *adev,
+ struct amdgpu_reset_context *reset_context)
{
int i, r = 0;
- bool need_full_reset = *need_full_reset_arg;
+ struct amdgpu_job *job = NULL;
+ struct amdgpu_device *tmp_adev = reset_context->reset_req_dev;
+ bool need_full_reset =
+ test_bit(AMDGPU_NEED_FULL_RESET, &reset_context->flags);
- amdgpu_debugfs_wait_dump(adev);
+ if (reset_context->reset_req_dev == adev)
+ job = reset_context->job;
- if (amdgpu_sriov_vf(adev)) {
- /* stop the data exchange thread */
- amdgpu_virt_fini_data_exchange(adev);
- }
+ if (amdgpu_sriov_vf(adev))
+ amdgpu_virt_pre_reset(adev);
+
+ amdgpu_fence_driver_isr_toggle(adev, true);
/* block all schedulers and reset given job's ring */
for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
struct amdgpu_ring *ring = adev->rings[i];
- if (!ring || !ring->sched.thread)
+ if (!amdgpu_ring_sched_ready(ring))
continue;
/* after all hw jobs are reset, hw fence is meaningless, so force_completion */
amdgpu_fence_driver_force_completion(ring);
}
- if(job)
+ amdgpu_fence_driver_isr_toggle(adev, false);
+
+ if (job && job->vm)
drm_sched_increase_karma(&job->base);
+ r = amdgpu_reset_prepare_hwcontext(adev, reset_context);
+ /* If reset handler not implemented, continue; otherwise return */
+ if (r == -EOPNOTSUPP)
+ r = 0;
+ else
+ return r;
+
/* Don't suspend on bare metal if we are not going to HW reset the ASIC */
if (!amdgpu_sriov_vf(adev)) {
if (!need_full_reset)
need_full_reset = amdgpu_device_ip_need_full_reset(adev);
- if (!need_full_reset) {
+ if (!need_full_reset && amdgpu_gpu_recovery &&
+ amdgpu_device_ip_check_soft_reset(adev)) {
amdgpu_device_ip_pre_soft_reset(adev);
r = amdgpu_device_ip_soft_reset(adev);
amdgpu_device_ip_post_soft_reset(adev);
@@ -4282,98 +5976,100 @@ static int amdgpu_device_pre_asic_reset(struct amdgpu_device *adev,
}
}
+ if (!test_bit(AMDGPU_SKIP_COREDUMP, &reset_context->flags)) {
+ dev_info(tmp_adev->dev, "Dumping IP State\n");
+ /* Trigger ip dump before we reset the asic */
+ for (i = 0; i < tmp_adev->num_ip_blocks; i++)
+ if (tmp_adev->ip_blocks[i].version->funcs->dump_ip_state)
+ tmp_adev->ip_blocks[i].version->funcs
+ ->dump_ip_state((void *)&tmp_adev->ip_blocks[i]);
+ dev_info(tmp_adev->dev, "Dumping IP State Completed\n");
+ }
+
if (need_full_reset)
r = amdgpu_device_ip_suspend(adev);
-
- *need_full_reset_arg = need_full_reset;
+ if (need_full_reset)
+ set_bit(AMDGPU_NEED_FULL_RESET, &reset_context->flags);
+ else
+ clear_bit(AMDGPU_NEED_FULL_RESET,
+ &reset_context->flags);
}
return r;
}
-static int amdgpu_do_asic_reset(struct amdgpu_hive_info *hive,
- struct list_head *device_list_handle,
- bool *need_full_reset_arg,
- bool skip_hw_reset)
+int amdgpu_device_reinit_after_reset(struct amdgpu_reset_context *reset_context)
{
- struct amdgpu_device *tmp_adev = NULL;
- bool need_full_reset = *need_full_reset_arg, vram_lost = false;
- int r = 0;
+ struct list_head *device_list_handle;
+ bool full_reset, vram_lost = false;
+ struct amdgpu_device *tmp_adev;
+ int r, init_level;
- /*
- * ASIC reset has to be done on all HGMI hive nodes ASAP
- * to allow proper links negotiation in FW (within 1 sec)
- */
- if (!skip_hw_reset && need_full_reset) {
- list_for_each_entry(tmp_adev, device_list_handle, gmc.xgmi.head) {
- /* For XGMI run all resets in parallel to speed up the process */
- if (tmp_adev->gmc.xgmi.num_physical_nodes > 1) {
- if (!queue_work(system_unbound_wq, &tmp_adev->xgmi_reset_work))
- r = -EALREADY;
- } else
- r = amdgpu_asic_reset(tmp_adev);
-
- if (r) {
- dev_err(tmp_adev->dev, "ASIC reset failed with error, %d for drm dev, %s",
- r, adev_to_drm(tmp_adev)->unique);
- break;
- }
- }
+ device_list_handle = reset_context->reset_device_list;
- /* For XGMI wait for all resets to complete before proceed */
- if (!r) {
- list_for_each_entry(tmp_adev, device_list_handle,
- gmc.xgmi.head) {
- if (tmp_adev->gmc.xgmi.num_physical_nodes > 1) {
- flush_work(&tmp_adev->xgmi_reset_work);
- r = tmp_adev->asic_reset_res;
- if (r)
- break;
- }
- }
- }
- }
+ if (!device_list_handle)
+ return -EINVAL;
- if (!r && amdgpu_ras_intr_triggered()) {
- list_for_each_entry(tmp_adev, device_list_handle, gmc.xgmi.head) {
- if (tmp_adev->mmhub.funcs &&
- tmp_adev->mmhub.funcs->reset_ras_error_count)
- tmp_adev->mmhub.funcs->reset_ras_error_count(tmp_adev);
- }
+ full_reset = test_bit(AMDGPU_NEED_FULL_RESET, &reset_context->flags);
- amdgpu_ras_intr_cleared();
- }
+ /**
+ * If it's reset on init, it's default init level, otherwise keep level
+ * as recovery level.
+ */
+ if (reset_context->method == AMD_RESET_METHOD_ON_INIT)
+ init_level = AMDGPU_INIT_LEVEL_DEFAULT;
+ else
+ init_level = AMDGPU_INIT_LEVEL_RESET_RECOVERY;
- list_for_each_entry(tmp_adev, device_list_handle, gmc.xgmi.head) {
- if (need_full_reset) {
+ r = 0;
+ list_for_each_entry(tmp_adev, device_list_handle, reset_list) {
+ amdgpu_set_init_level(tmp_adev, init_level);
+ if (full_reset) {
/* post card */
- if (amdgpu_device_asic_init(tmp_adev))
+ amdgpu_reset_set_dpc_status(tmp_adev, false);
+ amdgpu_ras_clear_err_state(tmp_adev);
+ r = amdgpu_device_asic_init(tmp_adev);
+ if (r) {
dev_warn(tmp_adev->dev, "asic atom init failed!");
-
- if (!r) {
+ } else {
dev_info(tmp_adev->dev, "GPU reset succeeded, trying to resume\n");
+
r = amdgpu_device_ip_resume_phase1(tmp_adev);
if (r)
goto out;
vram_lost = amdgpu_device_check_vram_lost(tmp_adev);
+
+ if (!test_bit(AMDGPU_SKIP_COREDUMP, &reset_context->flags))
+ amdgpu_coredump(tmp_adev, false, vram_lost, reset_context->job);
+
if (vram_lost) {
- DRM_INFO("VRAM is lost due to GPU reset!\n");
+ dev_info(
+ tmp_adev->dev,
+ "VRAM is lost due to GPU reset!\n");
amdgpu_inc_vram_lost(tmp_adev);
}
- r = amdgpu_gtt_mgr_recover(ttm_manager_type(&tmp_adev->mman.bdev, TTM_PL_TT));
- if (r)
- goto out;
-
r = amdgpu_device_fw_loading(tmp_adev);
if (r)
return r;
+ r = amdgpu_xcp_restore_partition_mode(
+ tmp_adev->xcp_mgr);
+ if (r)
+ goto out;
+
r = amdgpu_device_ip_resume_phase2(tmp_adev);
if (r)
goto out;
+ if (tmp_adev->mman.buffer_funcs_ring->sched.ready)
+ amdgpu_ttm_set_buffer_funcs_status(tmp_adev, true);
+
+ r = amdgpu_device_ip_resume_phase3(tmp_adev);
+ if (r)
+ goto out;
+
if (vram_lost)
amdgpu_device_fill_reset_magic(tmp_adev);
@@ -4383,11 +6079,19 @@ static int amdgpu_do_asic_reset(struct amdgpu_hive_info *hive,
*/
amdgpu_register_gpu_instance(tmp_adev);
+ if (!reset_context->hive &&
+ tmp_adev->gmc.xgmi.num_physical_nodes > 1)
+ amdgpu_xgmi_add_device(tmp_adev);
+
r = amdgpu_device_ip_late_init(tmp_adev);
if (r)
goto out;
- amdgpu_fbdev_set_suspend(tmp_adev, 0);
+ r = amdgpu_userq_post_reset(tmp_adev, vram_lost);
+ if (r)
+ goto out;
+
+ drm_client_dev_resume(adev_to_drm(tmp_adev));
/*
* The GPU enters bad state once faulty pages
@@ -4399,7 +6103,7 @@ static int amdgpu_do_asic_reset(struct amdgpu_hive_info *hive,
* bad_page_threshold value to fix this once
* probing driver again.
*/
- if (!amdgpu_ras_check_err_threshold(tmp_adev)) {
+ if (!amdgpu_ras_is_rma(tmp_adev)) {
/* must succeed. */
amdgpu_ras_resume(tmp_adev);
} else {
@@ -4408,50 +6112,120 @@ static int amdgpu_do_asic_reset(struct amdgpu_hive_info *hive,
}
/* Update PSP FW topology after reset */
- if (hive && tmp_adev->gmc.xgmi.num_physical_nodes > 1)
- r = amdgpu_xgmi_update_topology(hive, tmp_adev);
+ if (reset_context->hive &&
+ tmp_adev->gmc.xgmi.num_physical_nodes > 1)
+ r = amdgpu_xgmi_update_topology(
+ reset_context->hive, tmp_adev);
}
}
out:
if (!r) {
+ /* IP init is complete now, set level as default */
+ amdgpu_set_init_level(tmp_adev,
+ AMDGPU_INIT_LEVEL_DEFAULT);
amdgpu_irq_gpu_reset_resume_helper(tmp_adev);
r = amdgpu_ib_ring_tests(tmp_adev);
if (r) {
dev_err(tmp_adev->dev, "ib ring test failed (%d).\n", r);
- r = amdgpu_device_ip_suspend(tmp_adev);
- need_full_reset = true;
r = -EAGAIN;
goto end;
}
}
- if (!r)
- r = amdgpu_device_recover_vram(tmp_adev);
- else
+ if (r)
tmp_adev->asic_reset_res = r;
}
end:
- *need_full_reset_arg = need_full_reset;
return r;
}
-static bool amdgpu_device_lock_adev(struct amdgpu_device *adev,
- struct amdgpu_hive_info *hive)
+int amdgpu_do_asic_reset(struct list_head *device_list_handle,
+ struct amdgpu_reset_context *reset_context)
{
- if (atomic_cmpxchg(&adev->in_gpu_reset, 0, 1) != 0)
- return false;
+ struct amdgpu_device *tmp_adev = NULL;
+ bool need_full_reset, skip_hw_reset;
+ int r = 0;
- if (hive) {
- down_write_nest_lock(&adev->reset_sem, &hive->hive_lock);
- } else {
- down_write(&adev->reset_sem);
+ /* Try reset handler method first */
+ tmp_adev = list_first_entry(device_list_handle, struct amdgpu_device,
+ reset_list);
+
+ reset_context->reset_device_list = device_list_handle;
+ r = amdgpu_reset_perform_reset(tmp_adev, reset_context);
+ /* If reset handler not implemented, continue; otherwise return */
+ if (r == -EOPNOTSUPP)
+ r = 0;
+ else
+ return r;
+
+ /* Reset handler not implemented, use the default method */
+ need_full_reset =
+ test_bit(AMDGPU_NEED_FULL_RESET, &reset_context->flags);
+ skip_hw_reset = test_bit(AMDGPU_SKIP_HW_RESET, &reset_context->flags);
+
+ /*
+ * ASIC reset has to be done on all XGMI hive nodes ASAP
+ * to allow proper links negotiation in FW (within 1 sec)
+ */
+ if (!skip_hw_reset && need_full_reset) {
+ list_for_each_entry(tmp_adev, device_list_handle, reset_list) {
+ /* For XGMI run all resets in parallel to speed up the process */
+ if (tmp_adev->gmc.xgmi.num_physical_nodes > 1) {
+ if (!queue_work(system_unbound_wq,
+ &tmp_adev->xgmi_reset_work))
+ r = -EALREADY;
+ } else
+ r = amdgpu_asic_reset(tmp_adev);
+
+ if (r) {
+ dev_err(tmp_adev->dev,
+ "ASIC reset failed with error, %d for drm dev, %s",
+ r, adev_to_drm(tmp_adev)->unique);
+ goto out;
+ }
+ }
+
+ /* For XGMI wait for all resets to complete before proceed */
+ if (!r) {
+ list_for_each_entry(tmp_adev, device_list_handle,
+ reset_list) {
+ if (tmp_adev->gmc.xgmi.num_physical_nodes > 1) {
+ flush_work(&tmp_adev->xgmi_reset_work);
+ r = tmp_adev->asic_reset_res;
+ if (r)
+ break;
+ }
+ }
+ }
+ }
+
+ if (!r && amdgpu_ras_intr_triggered()) {
+ list_for_each_entry(tmp_adev, device_list_handle, reset_list) {
+ amdgpu_ras_reset_error_count(tmp_adev,
+ AMDGPU_RAS_BLOCK__MMHUB);
+ }
+
+ amdgpu_ras_intr_cleared();
}
- atomic_inc(&adev->gpu_reset_counter);
+ r = amdgpu_device_reinit_after_reset(reset_context);
+ if (r == -EAGAIN)
+ set_bit(AMDGPU_NEED_FULL_RESET, &reset_context->flags);
+ else
+ clear_bit(AMDGPU_NEED_FULL_RESET, &reset_context->flags);
+
+out:
+ return r;
+}
+
+static void amdgpu_device_set_mp1_state(struct amdgpu_device *adev)
+{
+
switch (amdgpu_asic_reset_method(adev)) {
case AMD_RESET_METHOD_MODE1:
+ case AMD_RESET_METHOD_LINK:
adev->mp1_state = PP_MP1_STATE_SHUTDOWN;
break;
case AMD_RESET_METHOD_MODE2:
@@ -4461,56 +6235,12 @@ static bool amdgpu_device_lock_adev(struct amdgpu_device *adev,
adev->mp1_state = PP_MP1_STATE_NONE;
break;
}
-
- return true;
}
-static void amdgpu_device_unlock_adev(struct amdgpu_device *adev)
+static void amdgpu_device_unset_mp1_state(struct amdgpu_device *adev)
{
amdgpu_vf_error_trans_all(adev);
adev->mp1_state = PP_MP1_STATE_NONE;
- atomic_set(&adev->in_gpu_reset, 0);
- up_write(&adev->reset_sem);
-}
-
-/*
- * to lockup a list of amdgpu devices in a hive safely, if not a hive
- * with multiple nodes, it will be similar as amdgpu_device_lock_adev.
- *
- * unlock won't require roll back.
- */
-static int amdgpu_device_lock_hive_adev(struct amdgpu_device *adev, struct amdgpu_hive_info *hive)
-{
- struct amdgpu_device *tmp_adev = NULL;
-
- if (adev->gmc.xgmi.num_physical_nodes > 1) {
- if (!hive) {
- dev_err(adev->dev, "Hive is NULL while device has multiple xgmi nodes");
- return -ENODEV;
- }
- list_for_each_entry(tmp_adev, &hive->device_list, gmc.xgmi.head) {
- if (!amdgpu_device_lock_adev(tmp_adev, hive))
- goto roll_back;
- }
- } else if (!amdgpu_device_lock_adev(adev, hive))
- return -EAGAIN;
-
- return 0;
-roll_back:
- if (!list_is_first(&tmp_adev->gmc.xgmi.head, &hive->device_list)) {
- /*
- * if the lockup iteration break in the middle of a hive,
- * it may means there may has a race issue,
- * or a hive device locked up independently.
- * we may be in trouble and may not, so will try to roll back
- * the lock and give out a warnning.
- */
- dev_warn(tmp_adev->dev, "Hive lock iteration broke in the middle. Rolling back to unlock");
- list_for_each_entry_continue_reverse(tmp_adev, &hive->device_list, gmc.xgmi.head) {
- amdgpu_device_unlock_adev(tmp_adev);
- }
- }
- return -EAGAIN;
}
static void amdgpu_device_resume_display_audio(struct amdgpu_device *adev)
@@ -4523,6 +6253,8 @@ static void amdgpu_device_resume_display_audio(struct amdgpu_device *adev)
pm_runtime_enable(&(p->dev));
pm_runtime_resume(&(p->dev));
}
+
+ pci_dev_put(p);
}
static int amdgpu_device_suspend_display_audio(struct amdgpu_device *adev)
@@ -4561,6 +6293,7 @@ static int amdgpu_device_suspend_display_audio(struct amdgpu_device *adev)
if (expires < ktime_get_mono_fast_ns()) {
dev_warn(adev->dev, "failed to suspend display audio\n");
+ pci_dev_put(p);
/* TODO: abort the succeeding gpu reset? */
return -ETIMEDOUT;
}
@@ -4568,104 +6301,107 @@ static int amdgpu_device_suspend_display_audio(struct amdgpu_device *adev)
pm_runtime_disable(&(p->dev));
+ pci_dev_put(p);
return 0;
}
-/**
- * amdgpu_device_gpu_recover - reset the asic and recover scheduler
- *
- * @adev: amdgpu_device pointer
- * @job: which job trigger hang
- *
- * Attempt to reset the GPU if it has hung (all asics).
- * Attempt to do soft-reset or full-reset and reinitialize Asic
- * Returns 0 for success or an error on failure.
- */
-
-int amdgpu_device_gpu_recover(struct amdgpu_device *adev,
- struct amdgpu_job *job)
+static inline void amdgpu_device_stop_pending_resets(struct amdgpu_device *adev)
{
- struct list_head device_list, *device_list_handle = NULL;
- bool need_full_reset = false;
- bool job_signaled = false;
- struct amdgpu_hive_info *hive = NULL;
- struct amdgpu_device *tmp_adev = NULL;
- int i, r = 0;
- bool need_emergency_restart = false;
- bool audio_suspended = false;
+ struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
- /*
- * Special case: RAS triggered and full reset isn't supported
- */
- need_emergency_restart = amdgpu_ras_need_emergency_restart(adev);
+#if defined(CONFIG_DEBUG_FS)
+ if (!amdgpu_sriov_vf(adev))
+ cancel_work(&adev->reset_work);
+#endif
+ cancel_work(&adev->userq_reset_work);
- /*
- * Flush RAM to disk so that after reboot
- * the user can read log and see why the system rebooted.
- */
- if (need_emergency_restart && amdgpu_ras_get_context(adev)->reboot) {
- DRM_WARN("Emergency reboot.");
+ if (adev->kfd.dev)
+ cancel_work(&adev->kfd.reset_work);
- ksys_sync_helper();
- emergency_restart();
- }
+ if (amdgpu_sriov_vf(adev))
+ cancel_work(&adev->virt.flr_work);
- dev_info(adev->dev, "GPU %s begin!\n",
- need_emergency_restart ? "jobs stop":"reset");
+ if (con && adev->ras_enabled)
+ cancel_work(&con->recovery_work);
- /*
- * Here we trylock to avoid chain of resets executing from
- * either trigger by jobs on different adevs in XGMI hive or jobs on
- * different schedulers for same device while this TO handler is running.
- * We always reset all schedulers for device and all devices for XGMI
- * hive so that should take care of them too.
- */
- hive = amdgpu_get_xgmi_hive(adev);
- if (hive) {
- if (atomic_cmpxchg(&hive->in_reset, 0, 1) != 0) {
- DRM_INFO("Bailing on TDR for s_job:%llx, hive: %llx as another already in progress",
- job ? job->base.id : -1, hive->hive_id);
- amdgpu_put_xgmi_hive(hive);
- if (job)
- drm_sched_increase_karma(&job->base);
- return 0;
- }
- mutex_lock(&hive->hive_lock);
- }
+}
- /*
- * lock the device before we try to operate the linked list
- * if didn't get the device lock, don't touch the linked list since
- * others may iterating it.
- */
- r = amdgpu_device_lock_hive_adev(adev, hive);
- if (r) {
- dev_info(adev->dev, "Bailing on TDR for s_job:%llx, as another already in progress",
- job ? job->base.id : -1);
+static int amdgpu_device_health_check(struct list_head *device_list_handle)
+{
+ struct amdgpu_device *tmp_adev;
+ int ret = 0;
- /* even we skipped this reset, still need to set the job to guilty */
- if (job)
- drm_sched_increase_karma(&job->base);
- goto skip_recovery;
+ list_for_each_entry(tmp_adev, device_list_handle, reset_list) {
+ ret |= amdgpu_device_bus_status_check(tmp_adev);
}
+ return ret;
+}
+
+static void amdgpu_device_recovery_prepare(struct amdgpu_device *adev,
+ struct list_head *device_list,
+ struct amdgpu_hive_info *hive)
+{
+ struct amdgpu_device *tmp_adev = NULL;
+
/*
* Build list of devices to reset.
* In case we are in XGMI hive mode, resort the device list
* to put adev in the 1st position.
*/
- INIT_LIST_HEAD(&device_list);
- if (adev->gmc.xgmi.num_physical_nodes > 1) {
- if (!list_is_first(&adev->gmc.xgmi.head, &hive->device_list))
- list_rotate_to_front(&adev->gmc.xgmi.head, &hive->device_list);
- device_list_handle = &hive->device_list;
+ if (!amdgpu_sriov_vf(adev) && (adev->gmc.xgmi.num_physical_nodes > 1) && hive) {
+ list_for_each_entry(tmp_adev, &hive->device_list, gmc.xgmi.head) {
+ list_add_tail(&tmp_adev->reset_list, device_list);
+ if (adev->shutdown)
+ tmp_adev->shutdown = true;
+ if (amdgpu_reset_in_dpc(adev))
+ tmp_adev->pcie_reset_ctx.in_link_reset = true;
+ }
+ if (!list_is_first(&adev->reset_list, device_list))
+ list_rotate_to_front(&adev->reset_list, device_list);
} else {
- list_add_tail(&adev->gmc.xgmi.head, &device_list);
- device_list_handle = &device_list;
+ list_add_tail(&adev->reset_list, device_list);
}
+}
+
+static void amdgpu_device_recovery_get_reset_lock(struct amdgpu_device *adev,
+ struct list_head *device_list)
+{
+ struct amdgpu_device *tmp_adev = NULL;
+
+ if (list_empty(device_list))
+ return;
+ tmp_adev =
+ list_first_entry(device_list, struct amdgpu_device, reset_list);
+ amdgpu_device_lock_reset_domain(tmp_adev->reset_domain);
+}
+
+static void amdgpu_device_recovery_put_reset_lock(struct amdgpu_device *adev,
+ struct list_head *device_list)
+{
+ struct amdgpu_device *tmp_adev = NULL;
+
+ if (list_empty(device_list))
+ return;
+ tmp_adev =
+ list_first_entry(device_list, struct amdgpu_device, reset_list);
+ amdgpu_device_unlock_reset_domain(tmp_adev->reset_domain);
+}
+
+static void amdgpu_device_halt_activities(struct amdgpu_device *adev,
+ struct amdgpu_job *job,
+ struct amdgpu_reset_context *reset_context,
+ struct list_head *device_list,
+ struct amdgpu_hive_info *hive,
+ bool need_emergency_restart)
+{
+ struct amdgpu_device *tmp_adev = NULL;
+ int i;
/* block all schedulers and reset given job's ring */
- list_for_each_entry(tmp_adev, device_list_handle, gmc.xgmi.head) {
+ list_for_each_entry(tmp_adev, device_list, reset_list) {
+ amdgpu_device_set_mp1_state(tmp_adev);
+
/*
* Try to put the audio codec into suspend state
* before gpu reset started.
@@ -4677,32 +6413,33 @@ int amdgpu_device_gpu_recover(struct amdgpu_device *adev,
* some audio codec errors.
*/
if (!amdgpu_device_suspend_display_audio(tmp_adev))
- audio_suspended = true;
+ tmp_adev->pcie_reset_ctx.audio_suspended = true;
amdgpu_ras_set_error_query_ready(tmp_adev, false);
cancel_delayed_work_sync(&tmp_adev->delayed_init_work);
- if (!amdgpu_sriov_vf(tmp_adev))
- amdgpu_amdkfd_pre_reset(tmp_adev);
+ amdgpu_amdkfd_pre_reset(tmp_adev, reset_context);
/*
- * Mark these ASICs to be reseted as untracked first
+ * Mark these ASICs to be reset as untracked first
* And add them back after reset completed
*/
amdgpu_unregister_gpu_instance(tmp_adev);
- amdgpu_fbdev_set_suspend(tmp_adev, 1);
+ drm_client_dev_suspend(adev_to_drm(tmp_adev));
/* disable ras on ALL IPs */
- if (!need_emergency_restart &&
- amdgpu_device_ip_need_full_reset(tmp_adev))
+ if (!need_emergency_restart && !amdgpu_reset_in_dpc(adev) &&
+ amdgpu_device_ip_need_full_reset(tmp_adev))
amdgpu_ras_suspend(tmp_adev);
+ amdgpu_userq_pre_reset(tmp_adev);
+
for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
struct amdgpu_ring *ring = tmp_adev->rings[i];
- if (!ring || !ring->sched.thread)
+ if (!amdgpu_ring_sched_ready(ring))
continue;
drm_sched_stop(&ring->sched, job ? &job->base : NULL);
@@ -4710,29 +6447,21 @@ int amdgpu_device_gpu_recover(struct amdgpu_device *adev,
if (need_emergency_restart)
amdgpu_job_stop_all_jobs_on_sched(&ring->sched);
}
+ atomic_inc(&tmp_adev->gpu_reset_counter);
}
+}
- if (need_emergency_restart)
- goto skip_sched_resume;
-
- /*
- * Must check guilty signal here since after this point all old
- * HW fences are force signaled.
- *
- * job->base holds a reference to parent fence
- */
- if (job && job->base.s_fence->parent &&
- dma_fence_is_signaled(job->base.s_fence->parent)) {
- job_signaled = true;
- dev_info(adev->dev, "Guilty job already signaled, skipping HW reset");
- goto skip_hw_reset;
- }
+static int amdgpu_device_asic_reset(struct amdgpu_device *adev,
+ struct list_head *device_list,
+ struct amdgpu_reset_context *reset_context)
+{
+ struct amdgpu_device *tmp_adev = NULL;
+ int retry_limit = AMDGPU_MAX_RETRY_LIMIT;
+ int r = 0;
retry: /* Rest of adevs pre asic reset from XGMI hive. */
- list_for_each_entry(tmp_adev, device_list_handle, gmc.xgmi.head) {
- r = amdgpu_device_pre_asic_reset(tmp_adev,
- (tmp_adev == adev) ? job : NULL,
- &need_full_reset);
+ list_for_each_entry(tmp_adev, device_list, reset_list) {
+ r = amdgpu_device_pre_asic_reset(tmp_adev, reset_context);
/*TODO Should we stop ?*/
if (r) {
dev_err(tmp_adev->dev, "GPU pre asic reset failed with err, %d for drm dev, %s ",
@@ -4742,86 +6471,349 @@ retry: /* Rest of adevs pre asic reset from XGMI hive. */
}
/* Actual ASIC resets if needed.*/
- /* TODO Implement XGMI hive reset logic for SRIOV */
+ /* Host driver will handle XGMI hive reset for SRIOV */
if (amdgpu_sriov_vf(adev)) {
- r = amdgpu_device_reset_sriov(adev, job ? false : true);
+
+ /* Bail out of reset early */
+ if (amdgpu_ras_is_rma(adev))
+ return -ENODEV;
+
+ if (amdgpu_ras_get_fed_status(adev) || amdgpu_virt_rcvd_ras_interrupt(adev)) {
+ dev_dbg(adev->dev, "Detected RAS error, wait for FLR completion\n");
+ amdgpu_ras_set_fed(adev, true);
+ set_bit(AMDGPU_HOST_FLR, &reset_context->flags);
+ }
+
+ r = amdgpu_device_reset_sriov(adev, reset_context);
+ if (AMDGPU_RETRY_SRIOV_RESET(r) && (retry_limit--) > 0) {
+ amdgpu_virt_release_full_gpu(adev, true);
+ goto retry;
+ }
if (r)
adev->asic_reset_res = r;
} else {
- r = amdgpu_do_asic_reset(hive, device_list_handle, &need_full_reset, false);
+ r = amdgpu_do_asic_reset(device_list, reset_context);
if (r && r == -EAGAIN)
goto retry;
}
-skip_hw_reset:
+ list_for_each_entry(tmp_adev, device_list, reset_list) {
+ /*
+ * Drop any pending non scheduler resets queued before reset is done.
+ * Any reset scheduled after this point would be valid. Scheduler resets
+ * were already dropped during drm_sched_stop and no new ones can come
+ * in before drm_sched_start.
+ */
+ amdgpu_device_stop_pending_resets(tmp_adev);
+ }
+
+ return r;
+}
+
+static int amdgpu_device_sched_resume(struct list_head *device_list,
+ struct amdgpu_reset_context *reset_context,
+ bool job_signaled)
+{
+ struct amdgpu_device *tmp_adev = NULL;
+ int i, r = 0;
/* Post ASIC reset for all devs .*/
- list_for_each_entry(tmp_adev, device_list_handle, gmc.xgmi.head) {
+ list_for_each_entry(tmp_adev, device_list, reset_list) {
for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
struct amdgpu_ring *ring = tmp_adev->rings[i];
- if (!ring || !ring->sched.thread)
+ if (!amdgpu_ring_sched_ready(ring))
continue;
- /* No point to resubmit jobs if we didn't HW reset*/
- if (!tmp_adev->asic_reset_res && !job_signaled)
- drm_sched_resubmit_jobs(&ring->sched);
-
- drm_sched_start(&ring->sched, !tmp_adev->asic_reset_res);
+ drm_sched_start(&ring->sched, 0);
}
- if (!amdgpu_device_has_dc_support(tmp_adev) && !job_signaled) {
+ if (!drm_drv_uses_atomic_modeset(adev_to_drm(tmp_adev)) && !job_signaled)
drm_helper_resume_force_mode(adev_to_drm(tmp_adev));
- }
- tmp_adev->asic_reset_res = 0;
-
- if (r) {
- /* bad news, how to tell it to userspace ? */
- dev_info(tmp_adev->dev, "GPU reset(%d) failed\n", atomic_read(&tmp_adev->gpu_reset_counter));
- amdgpu_vf_error_put(tmp_adev, AMDGIM_ERROR_VF_GPU_RESET_FAIL, 0, r);
+ if (tmp_adev->asic_reset_res) {
+ /* bad news, how to tell it to userspace ?
+ * for ras error, we should report GPU bad status instead of
+ * reset failure
+ */
+ if (reset_context->src != AMDGPU_RESET_SRC_RAS ||
+ !amdgpu_ras_eeprom_check_err_threshold(tmp_adev))
+ dev_info(
+ tmp_adev->dev,
+ "GPU reset(%d) failed with error %d \n",
+ atomic_read(
+ &tmp_adev->gpu_reset_counter),
+ tmp_adev->asic_reset_res);
+ amdgpu_vf_error_put(tmp_adev,
+ AMDGIM_ERROR_VF_GPU_RESET_FAIL, 0,
+ tmp_adev->asic_reset_res);
+ if (!r)
+ r = tmp_adev->asic_reset_res;
+ tmp_adev->asic_reset_res = 0;
} else {
- dev_info(tmp_adev->dev, "GPU reset(%d) succeeded!\n", atomic_read(&tmp_adev->gpu_reset_counter));
+ dev_info(tmp_adev->dev, "GPU reset(%d) succeeded!\n",
+ atomic_read(&tmp_adev->gpu_reset_counter));
+ if (amdgpu_acpi_smart_shift_update(tmp_adev,
+ AMDGPU_SS_DEV_D0))
+ dev_warn(tmp_adev->dev,
+ "smart shift update failed\n");
}
}
-skip_sched_resume:
- list_for_each_entry(tmp_adev, device_list_handle, gmc.xgmi.head) {
- /*unlock kfd: SRIOV would do it separately */
+ return r;
+}
+
+static void amdgpu_device_gpu_resume(struct amdgpu_device *adev,
+ struct list_head *device_list,
+ bool need_emergency_restart)
+{
+ struct amdgpu_device *tmp_adev = NULL;
+
+ list_for_each_entry(tmp_adev, device_list, reset_list) {
+ /* unlock kfd: SRIOV would do it separately */
if (!need_emergency_restart && !amdgpu_sriov_vf(tmp_adev))
- amdgpu_amdkfd_post_reset(tmp_adev);
- if (audio_suspended)
+ amdgpu_amdkfd_post_reset(tmp_adev);
+
+ /* kfd_post_reset will do nothing if kfd device is not initialized,
+ * need to bring up kfd here if it's not be initialized before
+ */
+ if (!adev->kfd.init_complete)
+ amdgpu_amdkfd_device_init(adev);
+
+ if (tmp_adev->pcie_reset_ctx.audio_suspended)
amdgpu_device_resume_display_audio(tmp_adev);
- amdgpu_device_unlock_adev(tmp_adev);
+
+ amdgpu_device_unset_mp1_state(tmp_adev);
+
+ amdgpu_ras_set_error_query_ready(tmp_adev, true);
+
+ }
+}
+
+
+/**
+ * amdgpu_device_gpu_recover - reset the asic and recover scheduler
+ *
+ * @adev: amdgpu_device pointer
+ * @job: which job trigger hang
+ * @reset_context: amdgpu reset context pointer
+ *
+ * Attempt to reset the GPU if it has hung (all asics).
+ * Attempt to do soft-reset or full-reset and reinitialize Asic
+ * Returns 0 for success or an error on failure.
+ */
+
+int amdgpu_device_gpu_recover(struct amdgpu_device *adev,
+ struct amdgpu_job *job,
+ struct amdgpu_reset_context *reset_context)
+{
+ struct list_head device_list;
+ bool job_signaled = false;
+ struct amdgpu_hive_info *hive = NULL;
+ int r = 0;
+ bool need_emergency_restart = false;
+
+ /*
+ * If it reaches here because of hang/timeout and a RAS error is
+ * detected at the same time, let RAS recovery take care of it.
+ */
+ if (amdgpu_ras_is_err_state(adev, AMDGPU_RAS_BLOCK__ANY) &&
+ !amdgpu_sriov_vf(adev) &&
+ reset_context->src != AMDGPU_RESET_SRC_RAS) {
+ dev_dbg(adev->dev,
+ "Gpu recovery from source: %d yielding to RAS error recovery handling",
+ reset_context->src);
+ return 0;
}
-skip_recovery:
+ /*
+ * Special case: RAS triggered and full reset isn't supported
+ */
+ need_emergency_restart = amdgpu_ras_need_emergency_restart(adev);
+
+ /*
+ * Flush RAM to disk so that after reboot
+ * the user can read log and see why the system rebooted.
+ */
+ if (need_emergency_restart && amdgpu_ras_get_context(adev) &&
+ amdgpu_ras_get_context(adev)->reboot) {
+ dev_warn(adev->dev, "Emergency reboot.");
+
+ ksys_sync_helper();
+ emergency_restart();
+ }
+
+ dev_info(adev->dev, "GPU %s begin!. Source: %d\n",
+ need_emergency_restart ? "jobs stop" : "reset",
+ reset_context->src);
+
+ if (!amdgpu_sriov_vf(adev))
+ hive = amdgpu_get_xgmi_hive(adev);
+ if (hive)
+ mutex_lock(&hive->hive_lock);
+
+ reset_context->job = job;
+ reset_context->hive = hive;
+ INIT_LIST_HEAD(&device_list);
+
+ amdgpu_device_recovery_prepare(adev, &device_list, hive);
+
+ if (!amdgpu_sriov_vf(adev)) {
+ r = amdgpu_device_health_check(&device_list);
+ if (r)
+ goto end_reset;
+ }
+
+ /* Cannot be called after locking reset domain */
+ amdgpu_ras_pre_reset(adev, &device_list);
+
+ /* We need to lock reset domain only once both for XGMI and single device */
+ amdgpu_device_recovery_get_reset_lock(adev, &device_list);
+
+ amdgpu_device_halt_activities(adev, job, reset_context, &device_list,
+ hive, need_emergency_restart);
+ if (need_emergency_restart)
+ goto skip_sched_resume;
+ /*
+ * Must check guilty signal here since after this point all old
+ * HW fences are force signaled.
+ *
+ * job->base holds a reference to parent fence
+ */
+ if (job && dma_fence_is_signaled(&job->hw_fence->base)) {
+ job_signaled = true;
+ dev_info(adev->dev, "Guilty job already signaled, skipping HW reset");
+ goto skip_hw_reset;
+ }
+
+ r = amdgpu_device_asic_reset(adev, &device_list, reset_context);
+ if (r)
+ goto reset_unlock;
+skip_hw_reset:
+ r = amdgpu_device_sched_resume(&device_list, reset_context, job_signaled);
+ if (r)
+ goto reset_unlock;
+skip_sched_resume:
+ amdgpu_device_gpu_resume(adev, &device_list, need_emergency_restart);
+reset_unlock:
+ amdgpu_device_recovery_put_reset_lock(adev, &device_list);
+ amdgpu_ras_post_reset(adev, &device_list);
+end_reset:
if (hive) {
- atomic_set(&hive->in_reset, 0);
mutex_unlock(&hive->hive_lock);
amdgpu_put_xgmi_hive(hive);
}
- if (r && r != -EAGAIN)
+ if (r)
dev_info(adev->dev, "GPU reset end with ret = %d\n", r);
+
+ atomic_set(&adev->reset_domain->reset_res, r);
+
+ if (!r) {
+ struct amdgpu_task_info *ti = NULL;
+
+ if (job)
+ ti = amdgpu_vm_get_task_info_pasid(adev, job->pasid);
+
+ drm_dev_wedged_event(adev_to_drm(adev), DRM_WEDGE_RECOVERY_NONE,
+ ti ? &ti->task : NULL);
+
+ amdgpu_vm_put_task_info(ti);
+ }
+
return r;
}
/**
+ * amdgpu_device_partner_bandwidth - find the bandwidth of appropriate partner
+ *
+ * @adev: amdgpu_device pointer
+ * @speed: pointer to the speed of the link
+ * @width: pointer to the width of the link
+ *
+ * Evaluate the hierarchy to find the speed and bandwidth capabilities of the
+ * first physical partner to an AMD dGPU.
+ * This will exclude any virtual switches and links.
+ */
+static void amdgpu_device_partner_bandwidth(struct amdgpu_device *adev,
+ enum pci_bus_speed *speed,
+ enum pcie_link_width *width)
+{
+ struct pci_dev *parent = adev->pdev;
+
+ if (!speed || !width)
+ return;
+
+ *speed = PCI_SPEED_UNKNOWN;
+ *width = PCIE_LNK_WIDTH_UNKNOWN;
+
+ if (amdgpu_device_pcie_dynamic_switching_supported(adev)) {
+ while ((parent = pci_upstream_bridge(parent))) {
+ /* skip upstream/downstream switches internal to dGPU*/
+ if (parent->vendor == PCI_VENDOR_ID_ATI)
+ continue;
+ *speed = pcie_get_speed_cap(parent);
+ *width = pcie_get_width_cap(parent);
+ break;
+ }
+ } else {
+ /* use the current speeds rather than max if switching is not supported */
+ pcie_bandwidth_available(adev->pdev, NULL, speed, width);
+ }
+}
+
+/**
+ * amdgpu_device_gpu_bandwidth - find the bandwidth of the GPU
+ *
+ * @adev: amdgpu_device pointer
+ * @speed: pointer to the speed of the link
+ * @width: pointer to the width of the link
+ *
+ * Evaluate the hierarchy to find the speed and bandwidth capabilities of the
+ * AMD dGPU which may be a virtual upstream bridge.
+ */
+static void amdgpu_device_gpu_bandwidth(struct amdgpu_device *adev,
+ enum pci_bus_speed *speed,
+ enum pcie_link_width *width)
+{
+ struct pci_dev *parent = adev->pdev;
+
+ if (!speed || !width)
+ return;
+
+ parent = pci_upstream_bridge(parent);
+ if (parent && parent->vendor == PCI_VENDOR_ID_ATI) {
+ /* use the upstream/downstream switches internal to dGPU */
+ *speed = pcie_get_speed_cap(parent);
+ *width = pcie_get_width_cap(parent);
+ while ((parent = pci_upstream_bridge(parent))) {
+ if (parent->vendor == PCI_VENDOR_ID_ATI) {
+ /* use the upstream/downstream switches internal to dGPU */
+ *speed = pcie_get_speed_cap(parent);
+ *width = pcie_get_width_cap(parent);
+ }
+ }
+ } else {
+ /* use the device itself */
+ *speed = pcie_get_speed_cap(adev->pdev);
+ *width = pcie_get_width_cap(adev->pdev);
+ }
+}
+
+/**
* amdgpu_device_get_pcie_info - fence pcie info about the PCIE slot
*
* @adev: amdgpu_device pointer
*
- * Fetchs and stores in the driver the PCIE capabilities (gen speed
+ * Fetches and stores in the driver the PCIE capabilities (gen speed
* and lanes) of the slot the device is in. Handles APUs and
* virtualized environments where PCIE config space may not be available.
*/
static void amdgpu_device_get_pcie_info(struct amdgpu_device *adev)
{
- struct pci_dev *pdev;
enum pci_bus_speed speed_cap, platform_speed_cap;
- enum pcie_link_width platform_link_width;
+ enum pcie_link_width platform_link_width, link_width;
if (amdgpu_pcie_gen_cap)
adev->pm.pcie_gen_mask = amdgpu_pcie_gen_cap;
@@ -4830,7 +6822,7 @@ static void amdgpu_device_get_pcie_info(struct amdgpu_device *adev)
adev->pm.pcie_mlw_mask = amdgpu_pcie_lane_cap;
/* covers APUs as well */
- if (pci_is_root_bus(adev->pdev->bus)) {
+ if (pci_is_root_bus(adev->pdev->bus) && !amdgpu_passthrough(adev)) {
if (adev->pm.pcie_gen_mask == 0)
adev->pm.pcie_gen_mask = AMDGPU_DEFAULT_PCIE_GEN_MASK;
if (adev->pm.pcie_mlw_mask == 0)
@@ -4841,13 +6833,12 @@ static void amdgpu_device_get_pcie_info(struct amdgpu_device *adev)
if (adev->pm.pcie_gen_mask && adev->pm.pcie_mlw_mask)
return;
- pcie_bandwidth_available(adev->pdev, NULL,
- &platform_speed_cap, &platform_link_width);
+ amdgpu_device_partner_bandwidth(adev, &platform_speed_cap,
+ &platform_link_width);
+ amdgpu_device_gpu_bandwidth(adev, &speed_cap, &link_width);
if (adev->pm.pcie_gen_mask == 0) {
/* asic caps */
- pdev = adev->pdev;
- speed_cap = pcie_get_speed_cap(pdev);
if (speed_cap == PCI_SPEED_UNKNOWN) {
adev->pm.pcie_gen_mask |= (CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1 |
CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN2 |
@@ -4903,51 +6894,103 @@ static void amdgpu_device_get_pcie_info(struct amdgpu_device *adev)
}
}
if (adev->pm.pcie_mlw_mask == 0) {
+ /* asic caps */
+ if (link_width == PCIE_LNK_WIDTH_UNKNOWN) {
+ adev->pm.pcie_mlw_mask |= AMDGPU_DEFAULT_ASIC_PCIE_MLW_MASK;
+ } else {
+ switch (link_width) {
+ case PCIE_LNK_X32:
+ adev->pm.pcie_mlw_mask |= (CAIL_ASIC_PCIE_LINK_WIDTH_SUPPORT_X32 |
+ CAIL_ASIC_PCIE_LINK_WIDTH_SUPPORT_X16 |
+ CAIL_ASIC_PCIE_LINK_WIDTH_SUPPORT_X12 |
+ CAIL_ASIC_PCIE_LINK_WIDTH_SUPPORT_X8 |
+ CAIL_ASIC_PCIE_LINK_WIDTH_SUPPORT_X4 |
+ CAIL_ASIC_PCIE_LINK_WIDTH_SUPPORT_X2 |
+ CAIL_ASIC_PCIE_LINK_WIDTH_SUPPORT_X1);
+ break;
+ case PCIE_LNK_X16:
+ adev->pm.pcie_mlw_mask |= (CAIL_ASIC_PCIE_LINK_WIDTH_SUPPORT_X16 |
+ CAIL_ASIC_PCIE_LINK_WIDTH_SUPPORT_X12 |
+ CAIL_ASIC_PCIE_LINK_WIDTH_SUPPORT_X8 |
+ CAIL_ASIC_PCIE_LINK_WIDTH_SUPPORT_X4 |
+ CAIL_ASIC_PCIE_LINK_WIDTH_SUPPORT_X2 |
+ CAIL_ASIC_PCIE_LINK_WIDTH_SUPPORT_X1);
+ break;
+ case PCIE_LNK_X12:
+ adev->pm.pcie_mlw_mask |= (CAIL_ASIC_PCIE_LINK_WIDTH_SUPPORT_X12 |
+ CAIL_ASIC_PCIE_LINK_WIDTH_SUPPORT_X8 |
+ CAIL_ASIC_PCIE_LINK_WIDTH_SUPPORT_X4 |
+ CAIL_ASIC_PCIE_LINK_WIDTH_SUPPORT_X2 |
+ CAIL_ASIC_PCIE_LINK_WIDTH_SUPPORT_X1);
+ break;
+ case PCIE_LNK_X8:
+ adev->pm.pcie_mlw_mask |= (CAIL_ASIC_PCIE_LINK_WIDTH_SUPPORT_X8 |
+ CAIL_ASIC_PCIE_LINK_WIDTH_SUPPORT_X4 |
+ CAIL_ASIC_PCIE_LINK_WIDTH_SUPPORT_X2 |
+ CAIL_ASIC_PCIE_LINK_WIDTH_SUPPORT_X1);
+ break;
+ case PCIE_LNK_X4:
+ adev->pm.pcie_mlw_mask |= (CAIL_ASIC_PCIE_LINK_WIDTH_SUPPORT_X4 |
+ CAIL_ASIC_PCIE_LINK_WIDTH_SUPPORT_X2 |
+ CAIL_ASIC_PCIE_LINK_WIDTH_SUPPORT_X1);
+ break;
+ case PCIE_LNK_X2:
+ adev->pm.pcie_mlw_mask |= (CAIL_ASIC_PCIE_LINK_WIDTH_SUPPORT_X2 |
+ CAIL_ASIC_PCIE_LINK_WIDTH_SUPPORT_X1);
+ break;
+ case PCIE_LNK_X1:
+ adev->pm.pcie_mlw_mask |= CAIL_ASIC_PCIE_LINK_WIDTH_SUPPORT_X1;
+ break;
+ default:
+ break;
+ }
+ }
+ /* platform caps */
if (platform_link_width == PCIE_LNK_WIDTH_UNKNOWN) {
adev->pm.pcie_mlw_mask |= AMDGPU_DEFAULT_PCIE_MLW_MASK;
} else {
switch (platform_link_width) {
case PCIE_LNK_X32:
- adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X32 |
- CAIL_PCIE_LINK_WIDTH_SUPPORT_X16 |
- CAIL_PCIE_LINK_WIDTH_SUPPORT_X12 |
- CAIL_PCIE_LINK_WIDTH_SUPPORT_X8 |
- CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 |
- CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
- CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
+ adev->pm.pcie_mlw_mask |= (CAIL_PCIE_LINK_WIDTH_SUPPORT_X32 |
+ CAIL_PCIE_LINK_WIDTH_SUPPORT_X16 |
+ CAIL_PCIE_LINK_WIDTH_SUPPORT_X12 |
+ CAIL_PCIE_LINK_WIDTH_SUPPORT_X8 |
+ CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 |
+ CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
+ CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
break;
case PCIE_LNK_X16:
- adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X16 |
- CAIL_PCIE_LINK_WIDTH_SUPPORT_X12 |
- CAIL_PCIE_LINK_WIDTH_SUPPORT_X8 |
- CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 |
- CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
- CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
+ adev->pm.pcie_mlw_mask |= (CAIL_PCIE_LINK_WIDTH_SUPPORT_X16 |
+ CAIL_PCIE_LINK_WIDTH_SUPPORT_X12 |
+ CAIL_PCIE_LINK_WIDTH_SUPPORT_X8 |
+ CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 |
+ CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
+ CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
break;
case PCIE_LNK_X12:
- adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X12 |
- CAIL_PCIE_LINK_WIDTH_SUPPORT_X8 |
- CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 |
- CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
- CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
+ adev->pm.pcie_mlw_mask |= (CAIL_PCIE_LINK_WIDTH_SUPPORT_X12 |
+ CAIL_PCIE_LINK_WIDTH_SUPPORT_X8 |
+ CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 |
+ CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
+ CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
break;
case PCIE_LNK_X8:
- adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X8 |
- CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 |
- CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
- CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
+ adev->pm.pcie_mlw_mask |= (CAIL_PCIE_LINK_WIDTH_SUPPORT_X8 |
+ CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 |
+ CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
+ CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
break;
case PCIE_LNK_X4:
- adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 |
- CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
- CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
+ adev->pm.pcie_mlw_mask |= (CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 |
+ CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
+ CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
break;
case PCIE_LNK_X2:
- adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
- CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
+ adev->pm.pcie_mlw_mask |= (CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
+ CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
break;
case PCIE_LNK_X1:
- adev->pm.pcie_mlw_mask = CAIL_PCIE_LINK_WIDTH_SUPPORT_X1;
+ adev->pm.pcie_mlw_mask |= CAIL_PCIE_LINK_WIDTH_SUPPORT_X1;
break;
default:
break;
@@ -4956,51 +6999,81 @@ static void amdgpu_device_get_pcie_info(struct amdgpu_device *adev)
}
}
-int amdgpu_device_baco_enter(struct drm_device *dev)
+/**
+ * amdgpu_device_is_peer_accessible - Check peer access through PCIe BAR
+ *
+ * @adev: amdgpu_device pointer
+ * @peer_adev: amdgpu_device pointer for peer device trying to access @adev
+ *
+ * Return true if @peer_adev can access (DMA) @adev through the PCIe
+ * BAR, i.e. @adev is "large BAR" and the BAR matches the DMA mask of
+ * @peer_adev.
+ */
+bool amdgpu_device_is_peer_accessible(struct amdgpu_device *adev,
+ struct amdgpu_device *peer_adev)
+{
+#ifdef CONFIG_HSA_AMD_P2P
+ bool p2p_access =
+ !adev->gmc.xgmi.connected_to_cpu &&
+ !(pci_p2pdma_distance(adev->pdev, peer_adev->dev, false) < 0);
+ if (!p2p_access)
+ dev_info(adev->dev, "PCIe P2P access from peer device %s is not supported by the chipset\n",
+ pci_name(peer_adev->pdev));
+
+ bool is_large_bar = adev->gmc.visible_vram_size &&
+ adev->gmc.real_vram_size == adev->gmc.visible_vram_size;
+ bool p2p_addressable = amdgpu_device_check_iommu_remap(peer_adev);
+
+ if (!p2p_addressable) {
+ uint64_t address_mask = peer_adev->dev->dma_mask ?
+ ~*peer_adev->dev->dma_mask : ~((1ULL << 32) - 1);
+ resource_size_t aper_limit =
+ adev->gmc.aper_base + adev->gmc.aper_size - 1;
+
+ p2p_addressable = !(adev->gmc.aper_base & address_mask ||
+ aper_limit & address_mask);
+ }
+ return pcie_p2p && is_large_bar && p2p_access && p2p_addressable;
+#else
+ return false;
+#endif
+}
+
+int amdgpu_device_baco_enter(struct amdgpu_device *adev)
{
- struct amdgpu_device *adev = drm_to_adev(dev);
struct amdgpu_ras *ras = amdgpu_ras_get_context(adev);
- if (!amdgpu_device_supports_baco(adev_to_drm(adev)))
+ if (!amdgpu_device_supports_baco(adev))
return -ENOTSUPP;
- if (ras && ras->supported && adev->nbio.funcs->enable_doorbell_interrupt)
+ if (ras && adev->ras_enabled &&
+ adev->nbio.funcs->enable_doorbell_interrupt)
adev->nbio.funcs->enable_doorbell_interrupt(adev, false);
return amdgpu_dpm_baco_enter(adev);
}
-int amdgpu_device_baco_exit(struct drm_device *dev)
+int amdgpu_device_baco_exit(struct amdgpu_device *adev)
{
- struct amdgpu_device *adev = drm_to_adev(dev);
struct amdgpu_ras *ras = amdgpu_ras_get_context(adev);
int ret = 0;
- if (!amdgpu_device_supports_baco(adev_to_drm(adev)))
+ if (!amdgpu_device_supports_baco(adev))
return -ENOTSUPP;
ret = amdgpu_dpm_baco_exit(adev);
if (ret)
return ret;
- if (ras && ras->supported && adev->nbio.funcs->enable_doorbell_interrupt)
+ if (ras && adev->ras_enabled &&
+ adev->nbio.funcs->enable_doorbell_interrupt)
adev->nbio.funcs->enable_doorbell_interrupt(adev, true);
- return 0;
-}
-
-static void amdgpu_cancel_all_tdr(struct amdgpu_device *adev)
-{
- int i;
-
- for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
- struct amdgpu_ring *ring = adev->rings[i];
+ if (amdgpu_passthrough(adev) && adev->nbio.funcs &&
+ adev->nbio.funcs->clear_doorbell_interrupt)
+ adev->nbio.funcs->clear_doorbell_interrupt(adev);
- if (!ring || !ring->sched.thread)
- continue;
-
- cancel_delayed_work_sync(&ring->sched.work_tdr);
- }
+ return 0;
}
/**
@@ -5016,45 +7089,52 @@ pci_ers_result_t amdgpu_pci_error_detected(struct pci_dev *pdev, pci_channel_sta
{
struct drm_device *dev = pci_get_drvdata(pdev);
struct amdgpu_device *adev = drm_to_adev(dev);
- int i;
+ struct amdgpu_hive_info *hive __free(xgmi_put_hive) =
+ amdgpu_get_xgmi_hive(adev);
+ struct amdgpu_reset_context reset_context;
+ struct list_head device_list;
- DRM_INFO("PCI error: detected callback, state(%d)!!\n", state);
+ dev_info(adev->dev, "PCI error: detected callback!!\n");
- if (adev->gmc.xgmi.num_physical_nodes > 1) {
- DRM_WARN("No support for XGMI hive yet...");
- return PCI_ERS_RESULT_DISCONNECT;
- }
+ adev->pci_channel_state = state;
switch (state) {
case pci_channel_io_normal:
+ dev_info(adev->dev, "pci_channel_io_normal: state(%d)!!\n", state);
return PCI_ERS_RESULT_CAN_RECOVER;
- /* Fatal error, prepare for slot reset */
case pci_channel_io_frozen:
- /*
- * Cancel and wait for all TDRs in progress if failing to
- * set adev->in_gpu_reset in amdgpu_device_lock_adev
- *
- * Locking adev->reset_sem will prevent any external access
- * to GPU during PCI error recovery
- */
- while (!amdgpu_device_lock_adev(adev, NULL))
- amdgpu_cancel_all_tdr(adev);
-
- /*
- * Block any work scheduling as we do for regular GPU reset
- * for the duration of the recovery
- */
- for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
- struct amdgpu_ring *ring = adev->rings[i];
-
- if (!ring || !ring->sched.thread)
- continue;
+ /* Fatal error, prepare for slot reset */
+ dev_info(adev->dev, "pci_channel_io_frozen: state(%d)!!\n", state);
+ if (hive) {
+ /* Hive devices should be able to support FW based
+ * link reset on other devices, if not return.
+ */
+ if (!amdgpu_dpm_is_link_reset_supported(adev)) {
+ dev_warn(adev->dev,
+ "No support for XGMI hive yet...\n");
+ return PCI_ERS_RESULT_DISCONNECT;
+ }
+ /* Set dpc status only if device is part of hive
+ * Non-hive devices should be able to recover after
+ * link reset.
+ */
+ amdgpu_reset_set_dpc_status(adev, true);
- drm_sched_stop(&ring->sched, NULL);
+ mutex_lock(&hive->hive_lock);
}
+ memset(&reset_context, 0, sizeof(reset_context));
+ INIT_LIST_HEAD(&device_list);
+
+ amdgpu_device_recovery_prepare(adev, &device_list, hive);
+ amdgpu_device_recovery_get_reset_lock(adev, &device_list);
+ amdgpu_device_halt_activities(adev, NULL, &reset_context, &device_list,
+ hive, false);
+ if (hive)
+ mutex_unlock(&hive->hive_lock);
return PCI_ERS_RESULT_NEED_RESET;
case pci_channel_io_perm_failure:
/* Permanent error, prepare for device removal */
+ dev_info(adev->dev, "pci_channel_io_perm_failure: state(%d)!!\n", state);
return PCI_ERS_RESULT_DISCONNECT;
}
@@ -5067,8 +7147,10 @@ pci_ers_result_t amdgpu_pci_error_detected(struct pci_dev *pdev, pci_channel_sta
*/
pci_ers_result_t amdgpu_pci_mmio_enabled(struct pci_dev *pdev)
{
+ struct drm_device *dev = pci_get_drvdata(pdev);
+ struct amdgpu_device *adev = drm_to_adev(dev);
- DRM_INFO("PCI error: mmio enabled callback!!\n");
+ dev_info(adev->dev, "PCI error: mmio enabled callback!!\n");
/* TODO - dump whatever for debugging purposes */
@@ -5092,19 +7174,38 @@ pci_ers_result_t amdgpu_pci_slot_reset(struct pci_dev *pdev)
{
struct drm_device *dev = pci_get_drvdata(pdev);
struct amdgpu_device *adev = drm_to_adev(dev);
- int r, i;
- bool need_full_reset = true;
- u32 memsize;
+ struct amdgpu_reset_context reset_context;
+ struct amdgpu_device *tmp_adev;
+ struct amdgpu_hive_info *hive;
struct list_head device_list;
+ struct pci_dev *link_dev;
+ int r = 0, i, timeout;
+ u32 memsize;
+ u16 status;
- DRM_INFO("PCI error: slot reset callback!!\n");
+ dev_info(adev->dev, "PCI error: slot reset callback!!\n");
- INIT_LIST_HEAD(&device_list);
- list_add_tail(&adev->gmc.xgmi.head, &device_list);
+ memset(&reset_context, 0, sizeof(reset_context));
- /* wait for asic to come out of reset */
- msleep(500);
+ if (adev->pcie_reset_ctx.swus)
+ link_dev = adev->pcie_reset_ctx.swus;
+ else
+ link_dev = adev->pdev;
+ /* wait for asic to come out of reset, timeout = 10s */
+ timeout = 10000;
+ do {
+ usleep_range(10000, 10500);
+ r = pci_read_config_word(link_dev, PCI_VENDOR_ID, &status);
+ timeout -= 10;
+ } while (timeout > 0 && (status != PCI_VENDOR_ID_ATI) &&
+ (status != PCI_VENDOR_ID_AMD));
+
+ if ((status != PCI_VENDOR_ID_ATI) && (status != PCI_VENDOR_ID_AMD)) {
+ r = -ETIME;
+ goto out;
+ }
+ amdgpu_device_load_switch_state(adev);
/* Restore PCI confspace */
amdgpu_device_load_pci_state(pdev);
@@ -5121,23 +7222,43 @@ pci_ers_result_t amdgpu_pci_slot_reset(struct pci_dev *pdev)
goto out;
}
- adev->in_pci_err_recovery = true;
- r = amdgpu_device_pre_asic_reset(adev, NULL, &need_full_reset);
- adev->in_pci_err_recovery = false;
- if (r)
- goto out;
+ reset_context.method = AMD_RESET_METHOD_NONE;
+ reset_context.reset_req_dev = adev;
+ set_bit(AMDGPU_NEED_FULL_RESET, &reset_context.flags);
+ set_bit(AMDGPU_SKIP_COREDUMP, &reset_context.flags);
+ INIT_LIST_HEAD(&device_list);
- r = amdgpu_do_asic_reset(NULL, &device_list, &need_full_reset, true);
+ hive = amdgpu_get_xgmi_hive(adev);
+ if (hive) {
+ mutex_lock(&hive->hive_lock);
+ reset_context.hive = hive;
+ list_for_each_entry(tmp_adev, &hive->device_list, gmc.xgmi.head) {
+ tmp_adev->pcie_reset_ctx.in_link_reset = true;
+ list_add_tail(&tmp_adev->reset_list, &device_list);
+ }
+ } else {
+ set_bit(AMDGPU_SKIP_HW_RESET, &reset_context.flags);
+ list_add_tail(&adev->reset_list, &device_list);
+ }
+ r = amdgpu_device_asic_reset(adev, &device_list, &reset_context);
out:
if (!r) {
if (amdgpu_device_cache_pci_state(adev->pdev))
pci_restore_state(adev->pdev);
-
- DRM_INFO("PCIe error recovery succeeded\n");
+ dev_info(adev->dev, "PCIe error recovery succeeded\n");
} else {
- DRM_ERROR("PCIe error recovery failed, err:%d", r);
- amdgpu_device_unlock_adev(adev);
+ dev_err(adev->dev, "PCIe error recovery failed, err:%d\n", r);
+ if (hive) {
+ list_for_each_entry(tmp_adev, &device_list, reset_list)
+ amdgpu_device_unset_mp1_state(tmp_adev);
+ }
+ amdgpu_device_recovery_put_reset_lock(adev, &device_list);
+ }
+
+ if (hive) {
+ mutex_unlock(&hive->hive_lock);
+ amdgpu_put_xgmi_hive(hive);
}
return r ? PCI_ERS_RESULT_DISCONNECT : PCI_ERS_RESULT_RECOVERED;
@@ -5154,23 +7275,95 @@ void amdgpu_pci_resume(struct pci_dev *pdev)
{
struct drm_device *dev = pci_get_drvdata(pdev);
struct amdgpu_device *adev = drm_to_adev(dev);
- int i;
+ struct list_head device_list;
+ struct amdgpu_hive_info *hive = NULL;
+ struct amdgpu_device *tmp_adev = NULL;
+ dev_info(adev->dev, "PCI error: resume callback!!\n");
- DRM_INFO("PCI error: resume callback!!\n");
+ /* Only continue execution for the case of pci_channel_io_frozen */
+ if (adev->pci_channel_state != pci_channel_io_frozen)
+ return;
- for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
- struct amdgpu_ring *ring = adev->rings[i];
+ INIT_LIST_HEAD(&device_list);
- if (!ring || !ring->sched.thread)
- continue;
+ hive = amdgpu_get_xgmi_hive(adev);
+ if (hive) {
+ mutex_lock(&hive->hive_lock);
+ list_for_each_entry(tmp_adev, &hive->device_list, gmc.xgmi.head) {
+ tmp_adev->pcie_reset_ctx.in_link_reset = false;
+ list_add_tail(&tmp_adev->reset_list, &device_list);
+ }
+ } else
+ list_add_tail(&adev->reset_list, &device_list);
+
+ amdgpu_device_sched_resume(&device_list, NULL, NULL);
+ amdgpu_device_gpu_resume(adev, &device_list, false);
+ amdgpu_device_recovery_put_reset_lock(adev, &device_list);
+
+ if (hive) {
+ mutex_unlock(&hive->hive_lock);
+ amdgpu_put_xgmi_hive(hive);
+ }
+}
+
+static void amdgpu_device_cache_switch_state(struct amdgpu_device *adev)
+{
+ struct pci_dev *swus, *swds;
+ int r;
+
+ swds = pci_upstream_bridge(adev->pdev);
+ if (!swds || swds->vendor != PCI_VENDOR_ID_ATI ||
+ pci_pcie_type(swds) != PCI_EXP_TYPE_DOWNSTREAM)
+ return;
+ swus = pci_upstream_bridge(swds);
+ if (!swus ||
+ (swus->vendor != PCI_VENDOR_ID_ATI &&
+ swus->vendor != PCI_VENDOR_ID_AMD) ||
+ pci_pcie_type(swus) != PCI_EXP_TYPE_UPSTREAM)
+ return;
+
+ /* If already saved, return */
+ if (adev->pcie_reset_ctx.swus)
+ return;
+ /* Upstream bridge is ATI, assume it's SWUS/DS architecture */
+ r = pci_save_state(swds);
+ if (r)
+ return;
+ adev->pcie_reset_ctx.swds_pcistate = pci_store_saved_state(swds);
+
+ r = pci_save_state(swus);
+ if (r)
+ return;
+ adev->pcie_reset_ctx.swus_pcistate = pci_store_saved_state(swus);
+
+ adev->pcie_reset_ctx.swus = swus;
+}
+
+static void amdgpu_device_load_switch_state(struct amdgpu_device *adev)
+{
+ struct pci_dev *pdev;
+ int r;
+ if (!adev->pcie_reset_ctx.swds_pcistate ||
+ !adev->pcie_reset_ctx.swus_pcistate)
+ return;
- drm_sched_resubmit_jobs(&ring->sched);
- drm_sched_start(&ring->sched, true);
+ pdev = adev->pcie_reset_ctx.swus;
+ r = pci_load_saved_state(pdev, adev->pcie_reset_ctx.swus_pcistate);
+ if (!r) {
+ pci_restore_state(pdev);
+ } else {
+ dev_warn(adev->dev, "Failed to load SWUS state, err:%d\n", r);
+ return;
}
- amdgpu_device_unlock_adev(adev);
+ pdev = pci_upstream_bridge(adev->pdev);
+ r = pci_load_saved_state(pdev, adev->pcie_reset_ctx.swds_pcistate);
+ if (!r)
+ pci_restore_state(pdev);
+ else
+ dev_warn(adev->dev, "Failed to load SWDS state, err:%d\n", r);
}
bool amdgpu_device_cache_pci_state(struct pci_dev *pdev)
@@ -5179,6 +7372,9 @@ bool amdgpu_device_cache_pci_state(struct pci_dev *pdev)
struct amdgpu_device *adev = drm_to_adev(dev);
int r;
+ if (amdgpu_sriov_vf(adev))
+ return false;
+
r = pci_save_state(pdev);
if (!r) {
kfree(adev->pci_state);
@@ -5186,14 +7382,16 @@ bool amdgpu_device_cache_pci_state(struct pci_dev *pdev)
adev->pci_state = pci_store_saved_state(pdev);
if (!adev->pci_state) {
- DRM_ERROR("Failed to store PCI saved state");
+ dev_err(adev->dev, "Failed to store PCI saved state");
return false;
}
} else {
- DRM_WARN("Failed to save PCI state, err:%d\n", r);
+ dev_warn(adev->dev, "Failed to save PCI state, err:%d\n", r);
return false;
}
+ amdgpu_device_cache_switch_state(adev);
+
return true;
}
@@ -5211,11 +7409,429 @@ bool amdgpu_device_load_pci_state(struct pci_dev *pdev)
if (!r) {
pci_restore_state(pdev);
} else {
- DRM_WARN("Failed to load PCI state, err:%d\n", r);
+ dev_warn(adev->dev, "Failed to load PCI state, err:%d\n", r);
return false;
}
return true;
}
+void amdgpu_device_flush_hdp(struct amdgpu_device *adev,
+ struct amdgpu_ring *ring)
+{
+#ifdef CONFIG_X86_64
+ if ((adev->flags & AMD_IS_APU) && !amdgpu_passthrough(adev))
+ return;
+#endif
+ if (adev->gmc.xgmi.connected_to_cpu)
+ return;
+
+ if (ring && ring->funcs->emit_hdp_flush) {
+ amdgpu_ring_emit_hdp_flush(ring);
+ return;
+ }
+
+ if (!ring && amdgpu_sriov_runtime(adev)) {
+ if (!amdgpu_kiq_hdp_flush(adev))
+ return;
+ }
+
+ amdgpu_hdp_flush(adev, ring);
+}
+
+void amdgpu_device_invalidate_hdp(struct amdgpu_device *adev,
+ struct amdgpu_ring *ring)
+{
+#ifdef CONFIG_X86_64
+ if ((adev->flags & AMD_IS_APU) && !amdgpu_passthrough(adev))
+ return;
+#endif
+ if (adev->gmc.xgmi.connected_to_cpu)
+ return;
+
+ amdgpu_hdp_invalidate(adev, ring);
+}
+
+int amdgpu_in_reset(struct amdgpu_device *adev)
+{
+ return atomic_read(&adev->reset_domain->in_gpu_reset);
+}
+
+/**
+ * amdgpu_device_halt() - bring hardware to some kind of halt state
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * Bring hardware to some kind of halt state so that no one can touch it
+ * any more. It will help to maintain error context when error occurred.
+ * Compare to a simple hang, the system will keep stable at least for SSH
+ * access. Then it should be trivial to inspect the hardware state and
+ * see what's going on. Implemented as following:
+ *
+ * 1. drm_dev_unplug() makes device inaccessible to user space(IOCTLs, etc),
+ * clears all CPU mappings to device, disallows remappings through page faults
+ * 2. amdgpu_irq_disable_all() disables all interrupts
+ * 3. amdgpu_fence_driver_hw_fini() signals all HW fences
+ * 4. set adev->no_hw_access to avoid potential crashes after setp 5
+ * 5. amdgpu_device_unmap_mmio() clears all MMIO mappings
+ * 6. pci_disable_device() and pci_wait_for_pending_transaction()
+ * flush any in flight DMA operations
+ */
+void amdgpu_device_halt(struct amdgpu_device *adev)
+{
+ struct pci_dev *pdev = adev->pdev;
+ struct drm_device *ddev = adev_to_drm(adev);
+
+ amdgpu_xcp_dev_unplug(adev);
+ drm_dev_unplug(ddev);
+
+ amdgpu_irq_disable_all(adev);
+
+ amdgpu_fence_driver_hw_fini(adev);
+
+ adev->no_hw_access = true;
+
+ amdgpu_device_unmap_mmio(adev);
+
+ pci_disable_device(pdev);
+ pci_wait_for_pending_transaction(pdev);
+}
+
+u32 amdgpu_device_pcie_port_rreg(struct amdgpu_device *adev,
+ u32 reg)
+{
+ unsigned long flags, address, data;
+ u32 r;
+ address = adev->nbio.funcs->get_pcie_port_index_offset(adev);
+ data = adev->nbio.funcs->get_pcie_port_data_offset(adev);
+
+ spin_lock_irqsave(&adev->pcie_idx_lock, flags);
+ WREG32(address, reg * 4);
+ (void)RREG32(address);
+ r = RREG32(data);
+ spin_unlock_irqrestore(&adev->pcie_idx_lock, flags);
+ return r;
+}
+
+void amdgpu_device_pcie_port_wreg(struct amdgpu_device *adev,
+ u32 reg, u32 v)
+{
+ unsigned long flags, address, data;
+
+ address = adev->nbio.funcs->get_pcie_port_index_offset(adev);
+ data = adev->nbio.funcs->get_pcie_port_data_offset(adev);
+
+ spin_lock_irqsave(&adev->pcie_idx_lock, flags);
+ WREG32(address, reg * 4);
+ (void)RREG32(address);
+ WREG32(data, v);
+ (void)RREG32(data);
+ spin_unlock_irqrestore(&adev->pcie_idx_lock, flags);
+}
+
+/**
+ * amdgpu_device_get_gang - return a reference to the current gang
+ * @adev: amdgpu_device pointer
+ *
+ * Returns: A new reference to the current gang leader.
+ */
+struct dma_fence *amdgpu_device_get_gang(struct amdgpu_device *adev)
+{
+ struct dma_fence *fence;
+
+ rcu_read_lock();
+ fence = dma_fence_get_rcu_safe(&adev->gang_submit);
+ rcu_read_unlock();
+ return fence;
+}
+
+/**
+ * amdgpu_device_switch_gang - switch to a new gang
+ * @adev: amdgpu_device pointer
+ * @gang: the gang to switch to
+ *
+ * Try to switch to a new gang.
+ * Returns: NULL if we switched to the new gang or a reference to the current
+ * gang leader.
+ */
+struct dma_fence *amdgpu_device_switch_gang(struct amdgpu_device *adev,
+ struct dma_fence *gang)
+{
+ struct dma_fence *old = NULL;
+
+ dma_fence_get(gang);
+ do {
+ dma_fence_put(old);
+ old = amdgpu_device_get_gang(adev);
+ if (old == gang)
+ break;
+
+ if (!dma_fence_is_signaled(old)) {
+ dma_fence_put(gang);
+ return old;
+ }
+
+ } while (cmpxchg((struct dma_fence __force **)&adev->gang_submit,
+ old, gang) != old);
+
+ /*
+ * Drop it once for the exchanged reference in adev and once for the
+ * thread local reference acquired in amdgpu_device_get_gang().
+ */
+ dma_fence_put(old);
+ dma_fence_put(old);
+ return NULL;
+}
+
+/**
+ * amdgpu_device_enforce_isolation - enforce HW isolation
+ * @adev: the amdgpu device pointer
+ * @ring: the HW ring the job is supposed to run on
+ * @job: the job which is about to be pushed to the HW ring
+ *
+ * Makes sure that only one client at a time can use the GFX block.
+ * Returns: The dependency to wait on before the job can be pushed to the HW.
+ * The function is called multiple times until NULL is returned.
+ */
+struct dma_fence *amdgpu_device_enforce_isolation(struct amdgpu_device *adev,
+ struct amdgpu_ring *ring,
+ struct amdgpu_job *job)
+{
+ struct amdgpu_isolation *isolation = &adev->isolation[ring->xcp_id];
+ struct drm_sched_fence *f = job->base.s_fence;
+ struct dma_fence *dep;
+ void *owner;
+ int r;
+
+ /*
+ * For now enforce isolation only for the GFX block since we only need
+ * the cleaner shader on those rings.
+ */
+ if (ring->funcs->type != AMDGPU_RING_TYPE_GFX &&
+ ring->funcs->type != AMDGPU_RING_TYPE_COMPUTE)
+ return NULL;
+
+ /*
+ * All submissions where enforce isolation is false are handled as if
+ * they come from a single client. Use ~0l as the owner to distinct it
+ * from kernel submissions where the owner is NULL.
+ */
+ owner = job->enforce_isolation ? f->owner : (void *)~0l;
+
+ mutex_lock(&adev->enforce_isolation_mutex);
+
+ /*
+ * The "spearhead" submission is the first one which changes the
+ * ownership to its client. We always need to wait for it to be
+ * pushed to the HW before proceeding with anything.
+ */
+ if (&f->scheduled != isolation->spearhead &&
+ !dma_fence_is_signaled(isolation->spearhead)) {
+ dep = isolation->spearhead;
+ goto out_grab_ref;
+ }
+
+ if (isolation->owner != owner) {
+
+ /*
+ * Wait for any gang to be assembled before switching to a
+ * different owner or otherwise we could deadlock the
+ * submissions.
+ */
+ if (!job->gang_submit) {
+ dep = amdgpu_device_get_gang(adev);
+ if (!dma_fence_is_signaled(dep))
+ goto out_return_dep;
+ dma_fence_put(dep);
+ }
+
+ dma_fence_put(isolation->spearhead);
+ isolation->spearhead = dma_fence_get(&f->scheduled);
+ amdgpu_sync_move(&isolation->active, &isolation->prev);
+ trace_amdgpu_isolation(isolation->owner, owner);
+ isolation->owner = owner;
+ }
+
+ /*
+ * Specifying the ring here helps to pipeline submissions even when
+ * isolation is enabled. If that is not desired for testing NULL can be
+ * used instead of the ring to enforce a CPU round trip while switching
+ * between clients.
+ */
+ dep = amdgpu_sync_peek_fence(&isolation->prev, ring);
+ r = amdgpu_sync_fence(&isolation->active, &f->finished, GFP_NOWAIT);
+ if (r)
+ dev_warn(adev->dev, "OOM tracking isolation\n");
+
+out_grab_ref:
+ dma_fence_get(dep);
+out_return_dep:
+ mutex_unlock(&adev->enforce_isolation_mutex);
+ return dep;
+}
+
+bool amdgpu_device_has_display_hardware(struct amdgpu_device *adev)
+{
+ switch (adev->asic_type) {
+#ifdef CONFIG_DRM_AMDGPU_SI
+ case CHIP_HAINAN:
+#endif
+ case CHIP_TOPAZ:
+ /* chips with no display hardware */
+ return false;
+#ifdef CONFIG_DRM_AMDGPU_SI
+ case CHIP_TAHITI:
+ case CHIP_PITCAIRN:
+ case CHIP_VERDE:
+ case CHIP_OLAND:
+#endif
+#ifdef CONFIG_DRM_AMDGPU_CIK
+ case CHIP_BONAIRE:
+ case CHIP_HAWAII:
+ case CHIP_KAVERI:
+ case CHIP_KABINI:
+ case CHIP_MULLINS:
+#endif
+ case CHIP_TONGA:
+ case CHIP_FIJI:
+ case CHIP_POLARIS10:
+ case CHIP_POLARIS11:
+ case CHIP_POLARIS12:
+ case CHIP_VEGAM:
+ case CHIP_CARRIZO:
+ case CHIP_STONEY:
+ /* chips with display hardware */
+ return true;
+ default:
+ /* IP discovery */
+ if (!amdgpu_ip_version(adev, DCE_HWIP, 0) ||
+ (adev->harvest_ip_mask & AMD_HARVEST_IP_DMU_MASK))
+ return false;
+ return true;
+ }
+}
+
+uint32_t amdgpu_device_wait_on_rreg(struct amdgpu_device *adev,
+ uint32_t inst, uint32_t reg_addr, char reg_name[],
+ uint32_t expected_value, uint32_t mask)
+{
+ uint32_t ret = 0;
+ uint32_t old_ = 0;
+ uint32_t tmp_ = RREG32(reg_addr);
+ uint32_t loop = adev->usec_timeout;
+
+ while ((tmp_ & (mask)) != (expected_value)) {
+ if (old_ != tmp_) {
+ loop = adev->usec_timeout;
+ old_ = tmp_;
+ } else
+ udelay(1);
+ tmp_ = RREG32(reg_addr);
+ loop--;
+ if (!loop) {
+ dev_warn(
+ adev->dev,
+ "Register(%d) [%s] failed to reach value 0x%08x != 0x%08xn",
+ inst, reg_name, (uint32_t)expected_value,
+ (uint32_t)(tmp_ & (mask)));
+ ret = -ETIMEDOUT;
+ break;
+ }
+ }
+ return ret;
+}
+
+ssize_t amdgpu_get_soft_full_reset_mask(struct amdgpu_ring *ring)
+{
+ ssize_t size = 0;
+
+ if (!ring || !ring->adev)
+ return size;
+
+ if (amdgpu_device_should_recover_gpu(ring->adev))
+ size |= AMDGPU_RESET_TYPE_FULL;
+
+ if (unlikely(!ring->adev->debug_disable_soft_recovery) &&
+ !amdgpu_sriov_vf(ring->adev) && ring->funcs->soft_recovery)
+ size |= AMDGPU_RESET_TYPE_SOFT_RESET;
+
+ return size;
+}
+
+ssize_t amdgpu_show_reset_mask(char *buf, uint32_t supported_reset)
+{
+ ssize_t size = 0;
+
+ if (supported_reset == 0) {
+ size += sysfs_emit_at(buf, size, "unsupported");
+ size += sysfs_emit_at(buf, size, "\n");
+ return size;
+
+ }
+
+ if (supported_reset & AMDGPU_RESET_TYPE_SOFT_RESET)
+ size += sysfs_emit_at(buf, size, "soft ");
+
+ if (supported_reset & AMDGPU_RESET_TYPE_PER_QUEUE)
+ size += sysfs_emit_at(buf, size, "queue ");
+
+ if (supported_reset & AMDGPU_RESET_TYPE_PER_PIPE)
+ size += sysfs_emit_at(buf, size, "pipe ");
+
+ if (supported_reset & AMDGPU_RESET_TYPE_FULL)
+ size += sysfs_emit_at(buf, size, "full ");
+
+ size += sysfs_emit_at(buf, size, "\n");
+ return size;
+}
+
+void amdgpu_device_set_uid(struct amdgpu_uid *uid_info,
+ enum amdgpu_uid_type type, uint8_t inst,
+ uint64_t uid)
+{
+ if (!uid_info)
+ return;
+
+ if (type >= AMDGPU_UID_TYPE_MAX) {
+ dev_err_once(uid_info->adev->dev, "Invalid UID type %d\n",
+ type);
+ return;
+ }
+
+ if (inst >= AMDGPU_UID_INST_MAX) {
+ dev_err_once(uid_info->adev->dev, "Invalid UID instance %d\n",
+ inst);
+ return;
+ }
+
+ if (uid_info->uid[type][inst] != 0) {
+ dev_warn_once(
+ uid_info->adev->dev,
+ "Overwriting existing UID %llu for type %d instance %d\n",
+ uid_info->uid[type][inst], type, inst);
+ }
+
+ uid_info->uid[type][inst] = uid;
+}
+
+u64 amdgpu_device_get_uid(struct amdgpu_uid *uid_info,
+ enum amdgpu_uid_type type, uint8_t inst)
+{
+ if (!uid_info)
+ return 0;
+
+ if (type >= AMDGPU_UID_TYPE_MAX) {
+ dev_err_once(uid_info->adev->dev, "Invalid UID type %d\n",
+ type);
+ return 0;
+ }
+
+ if (inst >= AMDGPU_UID_INST_MAX) {
+ dev_err_once(uid_info->adev->dev, "Invalid UID instance %d\n",
+ inst);
+ return 0;
+ }
+
+ return uid_info->uid[type][inst];
+}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_df.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_df.h
index 52488bb45112..eb605e79ae0e 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_df.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_df.h
@@ -33,6 +33,7 @@ struct amdgpu_df_hash_status {
struct amdgpu_df_funcs {
void (*sw_init)(struct amdgpu_device *adev);
void (*sw_fini)(struct amdgpu_device *adev);
+ void (*hw_init)(struct amdgpu_device *adev);
void (*enable_broadcast_mode)(struct amdgpu_device *adev,
bool enable);
u32 (*get_fb_channel_number)(struct amdgpu_device *adev);
@@ -40,7 +41,7 @@ struct amdgpu_df_funcs {
void (*update_medium_grain_clock_gating)(struct amdgpu_device *adev,
bool enable);
void (*get_clockgating_state)(struct amdgpu_device *adev,
- u32 *flags);
+ u64 *flags);
void (*enable_ecc_force_par_wr_rmw)(struct amdgpu_device *adev,
bool enable);
int (*pmc_start)(struct amdgpu_device *adev, uint64_t config,
@@ -52,6 +53,7 @@ struct amdgpu_df_funcs {
uint64_t (*get_fica)(struct amdgpu_device *adev, uint32_t ficaa_val);
void (*set_fica)(struct amdgpu_device *adev, uint32_t ficaa_val,
uint32_t ficadl_val, uint32_t ficadh_val);
+ bool (*query_ras_poison_mode)(struct amdgpu_device *adev);
};
struct amdgpu_df {
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c
index b2dbcb4df020..fa2a22dfa048 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c
@@ -1,5 +1,5 @@
/*
- * Copyright 2018 Advanced Micro Devices, Inc.
+ * Copyright 2018-2024 Advanced Micro Devices, Inc. All rights reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
@@ -21,16 +21,115 @@
*
*/
+#include <linux/firmware.h>
+
#include "amdgpu.h"
#include "amdgpu_discovery.h"
#include "soc15_hw_ip.h"
#include "discovery.h"
+#include "amdgpu_ras.h"
+
+#include "soc15.h"
+#include "gfx_v9_0.h"
+#include "gfx_v9_4_3.h"
+#include "gmc_v9_0.h"
+#include "df_v1_7.h"
+#include "df_v3_6.h"
+#include "df_v4_3.h"
+#include "df_v4_6_2.h"
+#include "df_v4_15.h"
+#include "nbio_v6_1.h"
+#include "nbio_v7_0.h"
+#include "nbio_v7_4.h"
+#include "nbio_v7_9.h"
+#include "nbio_v7_11.h"
+#include "hdp_v4_0.h"
+#include "vega10_ih.h"
+#include "vega20_ih.h"
+#include "sdma_v4_0.h"
+#include "sdma_v4_4_2.h"
+#include "uvd_v7_0.h"
+#include "vce_v4_0.h"
+#include "vcn_v1_0.h"
+#include "vcn_v2_5.h"
+#include "jpeg_v2_5.h"
+#include "smuio_v9_0.h"
+#include "gmc_v10_0.h"
+#include "gmc_v11_0.h"
+#include "gmc_v12_0.h"
+#include "gfxhub_v2_0.h"
+#include "mmhub_v2_0.h"
+#include "nbio_v2_3.h"
+#include "nbio_v4_3.h"
+#include "nbio_v7_2.h"
+#include "nbio_v7_7.h"
+#include "nbif_v6_3_1.h"
+#include "hdp_v5_0.h"
+#include "hdp_v5_2.h"
+#include "hdp_v6_0.h"
+#include "hdp_v7_0.h"
+#include "nv.h"
+#include "soc21.h"
+#include "soc24.h"
+#include "navi10_ih.h"
+#include "ih_v6_0.h"
+#include "ih_v6_1.h"
+#include "ih_v7_0.h"
+#include "gfx_v10_0.h"
+#include "gfx_v11_0.h"
+#include "gfx_v12_0.h"
+#include "sdma_v5_0.h"
+#include "sdma_v5_2.h"
+#include "sdma_v6_0.h"
+#include "sdma_v7_0.h"
+#include "lsdma_v6_0.h"
+#include "lsdma_v7_0.h"
+#include "vcn_v2_0.h"
+#include "jpeg_v2_0.h"
+#include "vcn_v3_0.h"
+#include "jpeg_v3_0.h"
+#include "vcn_v4_0.h"
+#include "jpeg_v4_0.h"
+#include "vcn_v4_0_3.h"
+#include "jpeg_v4_0_3.h"
+#include "vcn_v4_0_5.h"
+#include "jpeg_v4_0_5.h"
+#include "amdgpu_vkms.h"
+#include "mes_v11_0.h"
+#include "mes_v12_0.h"
+#include "smuio_v11_0.h"
+#include "smuio_v11_0_6.h"
+#include "smuio_v13_0.h"
+#include "smuio_v13_0_3.h"
+#include "smuio_v13_0_6.h"
+#include "smuio_v14_0_2.h"
+#include "vcn_v5_0_0.h"
+#include "vcn_v5_0_1.h"
+#include "jpeg_v5_0_0.h"
+#include "jpeg_v5_0_1.h"
+#include "amdgpu_ras_mgr.h"
+
+#include "amdgpu_vpe.h"
+#if defined(CONFIG_DRM_AMD_ISP)
+#include "amdgpu_isp.h"
+#endif
+MODULE_FIRMWARE("amdgpu/ip_discovery.bin");
+MODULE_FIRMWARE("amdgpu/vega10_ip_discovery.bin");
+MODULE_FIRMWARE("amdgpu/vega12_ip_discovery.bin");
+MODULE_FIRMWARE("amdgpu/vega20_ip_discovery.bin");
+MODULE_FIRMWARE("amdgpu/raven_ip_discovery.bin");
+MODULE_FIRMWARE("amdgpu/raven2_ip_discovery.bin");
+MODULE_FIRMWARE("amdgpu/picasso_ip_discovery.bin");
+MODULE_FIRMWARE("amdgpu/arcturus_ip_discovery.bin");
+MODULE_FIRMWARE("amdgpu/aldebaran_ip_discovery.bin");
+
+#define mmIP_DISCOVERY_VERSION 0x16A00
#define mmRCC_CONFIG_MEMSIZE 0xde3
+#define mmMP0_SMN_C2PMSG_33 0x16061
#define mmMM_INDEX 0x0
#define mmMM_INDEX_HI 0x6
#define mmMM_DATA 0x1
-#define HW_ID_MAX 300
static const char *hw_id_names[HW_ID_MAX] = {
[MP1_HWID] = "MP1",
@@ -66,6 +165,9 @@ static const char *hw_id_names[HW_ID_MAX] = {
[HDP_HWID] = "HDP",
[SDMA0_HWID] = "SDMA0",
[SDMA1_HWID] = "SDMA1",
+ [SDMA2_HWID] = "SDMA2",
+ [SDMA3_HWID] = "SDMA3",
+ [LSDMA_HWID] = "LSDMA",
[ISP_HWID] = "ISP",
[DBGU_IO_HWID] = "DBGU_IO",
[DF_HWID] = "DF",
@@ -106,6 +208,7 @@ static const char *hw_id_names[HW_ID_MAX] = {
[XGMI_HWID] = "XGMI",
[XGBE_HWID] = "XGBE",
[MP0_HWID] = "MP0",
+ [VPE_HWID] = "VPE",
};
static int hw_id_map[MAX_HWIP] = {
@@ -113,6 +216,9 @@ static int hw_id_map[MAX_HWIP] = {
[HDP_HWIP] = HDP_HWID,
[SDMA0_HWIP] = SDMA0_HWID,
[SDMA1_HWIP] = SDMA1_HWID,
+ [SDMA2_HWIP] = SDMA2_HWID,
+ [SDMA3_HWIP] = SDMA3_HWID,
+ [LSDMA_HWIP] = LSDMA_HWID,
[MMHUB_HWIP] = MMHUB_HWID,
[ATHUB_HWIP] = ATHUB_HWID,
[NBIO_HWIP] = NBIF_HWID,
@@ -129,15 +235,126 @@ static int hw_id_map[MAX_HWIP] = {
[THM_HWIP] = THM_HWID,
[CLK_HWIP] = CLKA_HWID,
[UMC_HWIP] = UMC_HWID,
+ [XGMI_HWIP] = XGMI_HWID,
+ [DCI_HWIP] = DCI_HWID,
+ [PCIE_HWIP] = PCIE_HWID,
+ [VPE_HWIP] = VPE_HWID,
+ [ISP_HWIP] = ISP_HWID,
};
-static int amdgpu_discovery_read_binary(struct amdgpu_device *adev, uint8_t *binary)
+static int amdgpu_discovery_read_binary_from_sysmem(struct amdgpu_device *adev, uint8_t *binary)
+{
+ u64 tmr_offset, tmr_size, pos;
+ void *discv_regn;
+ int ret;
+
+ ret = amdgpu_acpi_get_tmr_info(adev, &tmr_offset, &tmr_size);
+ if (ret)
+ return ret;
+
+ pos = tmr_offset + tmr_size - DISCOVERY_TMR_OFFSET;
+
+ /* This region is read-only and reserved from system use */
+ discv_regn = memremap(pos, adev->discovery.size, MEMREMAP_WC);
+ if (discv_regn) {
+ memcpy(binary, discv_regn, adev->discovery.size);
+ memunmap(discv_regn);
+ return 0;
+ }
+
+ return -ENOENT;
+}
+
+#define IP_DISCOVERY_V2 2
+#define IP_DISCOVERY_V4 4
+
+static int amdgpu_discovery_read_binary_from_mem(struct amdgpu_device *adev,
+ uint8_t *binary)
+{
+ bool sz_valid = true;
+ uint64_t vram_size;
+ int i, ret = 0;
+ u32 msg;
+
+ if (!amdgpu_sriov_vf(adev)) {
+ /* It can take up to two second for IFWI init to complete on some dGPUs,
+ * but generally it should be in the 60-100ms range. Normally this starts
+ * as soon as the device gets power so by the time the OS loads this has long
+ * completed. However, when a card is hotplugged via e.g., USB4, we need to
+ * wait for this to complete. Once the C2PMSG is updated, we can
+ * continue.
+ */
+
+ for (i = 0; i < 2000; i++) {
+ msg = RREG32(mmMP0_SMN_C2PMSG_33);
+ if (msg & 0x80000000)
+ break;
+ msleep(1);
+ }
+ }
+
+ vram_size = RREG32(mmRCC_CONFIG_MEMSIZE);
+ if (!vram_size || vram_size == U32_MAX)
+ sz_valid = false;
+ else
+ vram_size <<= 20;
+
+ /*
+ * If in VRAM, discovery TMR is marked for reservation. If it is in system mem,
+ * then it is not required to be reserved.
+ */
+ if (sz_valid) {
+ if (amdgpu_sriov_vf(adev) && adev->virt.is_dynamic_crit_regn_enabled) {
+ /* For SRIOV VFs with dynamic critical region enabled,
+ * we will get the IPD binary via below call.
+ * If dynamic critical is disabled, fall through to normal seq.
+ */
+ if (amdgpu_virt_get_dynamic_data_info(adev,
+ AMD_SRIOV_MSG_IPD_TABLE_ID, binary,
+ &adev->discovery.size)) {
+ dev_err(adev->dev,
+ "failed to read discovery info from dynamic critical region.");
+ ret = -EINVAL;
+ goto exit;
+ }
+ } else {
+ uint64_t pos = vram_size - DISCOVERY_TMR_OFFSET;
+
+ amdgpu_device_vram_access(adev, pos, (uint32_t *)binary,
+ adev->discovery.size, false);
+ adev->discovery.reserve_tmr = true;
+ }
+ } else {
+ ret = amdgpu_discovery_read_binary_from_sysmem(adev, binary);
+ }
+
+ if (ret)
+ dev_err(adev->dev,
+ "failed to read discovery info from memory, vram size read: %llx",
+ vram_size);
+exit:
+ return ret;
+}
+
+static int amdgpu_discovery_read_binary_from_file(struct amdgpu_device *adev,
+ uint8_t *binary,
+ const char *fw_name)
{
- uint64_t vram_size = (uint64_t)RREG32(mmRCC_CONFIG_MEMSIZE) << 20;
- uint64_t pos = vram_size - DISCOVERY_TMR_OFFSET;
+ const struct firmware *fw;
+ int r;
+
+ r = firmware_request_nowarn(&fw, fw_name, adev->dev);
+ if (r) {
+ if (amdgpu_discovery == 2)
+ dev_err(adev->dev, "can't load firmware \"%s\"\n", fw_name);
+ else
+ drm_info(&adev->ddev, "Optional firmware \"%s\" was not found\n", fw_name);
+ return r;
+ }
+
+ memcpy((u8 *)binary, (u8 *)fw->data, fw->size);
+ release_firmware(fw);
- amdgpu_device_vram_access(adev, pos, (uint32_t *)binary,
- adev->mman.discovery_tmr_size, false);
return 0;
}
@@ -158,44 +375,152 @@ static inline bool amdgpu_discovery_verify_checksum(uint8_t *data, uint32_t size
return !!(amdgpu_discovery_calculate_checksum(data, size) == expected);
}
+static inline bool amdgpu_discovery_verify_binary_signature(uint8_t *binary)
+{
+ struct binary_header *bhdr;
+ bhdr = (struct binary_header *)binary;
+
+ return (le32_to_cpu(bhdr->binary_signature) == BINARY_SIGNATURE);
+}
+
+static void amdgpu_discovery_harvest_config_quirk(struct amdgpu_device *adev)
+{
+ /*
+ * So far, apply this quirk only on those Navy Flounder boards which
+ * have a bad harvest table of VCN config.
+ */
+ if ((amdgpu_ip_version(adev, UVD_HWIP, 1) == IP_VERSION(3, 0, 1)) &&
+ (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(10, 3, 2))) {
+ switch (adev->pdev->revision) {
+ case 0xC1:
+ case 0xC2:
+ case 0xC3:
+ case 0xC5:
+ case 0xC7:
+ case 0xCF:
+ case 0xDF:
+ adev->vcn.harvest_config |= AMDGPU_VCN_HARVEST_VCN1;
+ adev->vcn.inst_mask &= ~AMDGPU_VCN_HARVEST_VCN1;
+ break;
+ default:
+ break;
+ }
+ }
+}
+
+static int amdgpu_discovery_verify_npsinfo(struct amdgpu_device *adev,
+ struct binary_header *bhdr)
+{
+ uint8_t *discovery_bin = adev->discovery.bin;
+ struct table_info *info;
+ uint16_t checksum;
+ uint16_t offset;
+
+ info = &bhdr->table_list[NPS_INFO];
+ offset = le16_to_cpu(info->offset);
+ checksum = le16_to_cpu(info->checksum);
+
+ struct nps_info_header *nhdr =
+ (struct nps_info_header *)(discovery_bin + offset);
+
+ if (le32_to_cpu(nhdr->table_id) != NPS_INFO_TABLE_ID) {
+ dev_dbg(adev->dev, "invalid ip discovery nps info table id\n");
+ return -EINVAL;
+ }
+
+ if (!amdgpu_discovery_verify_checksum(discovery_bin + offset,
+ le32_to_cpu(nhdr->size_bytes),
+ checksum)) {
+ dev_dbg(adev->dev, "invalid nps info data table checksum\n");
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+static const char *amdgpu_discovery_get_fw_name(struct amdgpu_device *adev)
+{
+ if (amdgpu_discovery == 2) {
+ /* Assume there is valid discovery TMR in VRAM even if binary is sideloaded */
+ adev->discovery.reserve_tmr = true;
+ return "amdgpu/ip_discovery.bin";
+ }
+
+ switch (adev->asic_type) {
+ case CHIP_VEGA10:
+ return "amdgpu/vega10_ip_discovery.bin";
+ case CHIP_VEGA12:
+ return "amdgpu/vega12_ip_discovery.bin";
+ case CHIP_RAVEN:
+ if (adev->apu_flags & AMD_APU_IS_RAVEN2)
+ return "amdgpu/raven2_ip_discovery.bin";
+ else if (adev->apu_flags & AMD_APU_IS_PICASSO)
+ return "amdgpu/picasso_ip_discovery.bin";
+ else
+ return "amdgpu/raven_ip_discovery.bin";
+ case CHIP_VEGA20:
+ return "amdgpu/vega20_ip_discovery.bin";
+ case CHIP_ARCTURUS:
+ return "amdgpu/arcturus_ip_discovery.bin";
+ case CHIP_ALDEBARAN:
+ return "amdgpu/aldebaran_ip_discovery.bin";
+ default:
+ return NULL;
+ }
+}
+
static int amdgpu_discovery_init(struct amdgpu_device *adev)
{
struct table_info *info;
struct binary_header *bhdr;
- struct ip_discovery_header *ihdr;
- struct gpu_info_header *ghdr;
+ uint8_t *discovery_bin;
+ const char *fw_name;
uint16_t offset;
uint16_t size;
uint16_t checksum;
int r;
- adev->mman.discovery_tmr_size = DISCOVERY_TMR_SIZE;
- adev->mman.discovery_bin = kzalloc(adev->mman.discovery_tmr_size, GFP_KERNEL);
- if (!adev->mman.discovery_bin)
+ adev->discovery.bin = kzalloc(DISCOVERY_TMR_SIZE, GFP_KERNEL);
+ if (!adev->discovery.bin)
return -ENOMEM;
+ adev->discovery.size = DISCOVERY_TMR_SIZE;
+ adev->discovery.debugfs_blob.data = adev->discovery.bin;
+ adev->discovery.debugfs_blob.size = adev->discovery.size;
- r = amdgpu_discovery_read_binary(adev, adev->mman.discovery_bin);
- if (r) {
- DRM_ERROR("failed to read ip discovery binary\n");
- goto out;
+ discovery_bin = adev->discovery.bin;
+ /* Read from file if it is the preferred option */
+ fw_name = amdgpu_discovery_get_fw_name(adev);
+ if (fw_name != NULL) {
+ drm_dbg(&adev->ddev, "use ip discovery information from file");
+ r = amdgpu_discovery_read_binary_from_file(adev, discovery_bin,
+ fw_name);
+ if (r)
+ goto out;
+ } else {
+ drm_dbg(&adev->ddev, "use ip discovery information from memory");
+ r = amdgpu_discovery_read_binary_from_mem(adev, discovery_bin);
+ if (r)
+ goto out;
}
- bhdr = (struct binary_header *)adev->mman.discovery_bin;
-
- if (le32_to_cpu(bhdr->binary_signature) != BINARY_SIGNATURE) {
- DRM_ERROR("invalid ip discovery binary signature\n");
+ /* check the ip discovery binary signature */
+ if (!amdgpu_discovery_verify_binary_signature(discovery_bin)) {
+ dev_err(adev->dev,
+ "get invalid ip discovery binary signature\n");
r = -EINVAL;
goto out;
}
+ bhdr = (struct binary_header *)discovery_bin;
+
offset = offsetof(struct binary_header, binary_checksum) +
sizeof(bhdr->binary_checksum);
- size = bhdr->binary_size - offset;
- checksum = bhdr->binary_checksum;
+ size = le16_to_cpu(bhdr->binary_size) - offset;
+ checksum = le16_to_cpu(bhdr->binary_checksum);
- if (!amdgpu_discovery_verify_checksum(adev->mman.discovery_bin + offset,
- size, checksum)) {
- DRM_ERROR("invalid ip discovery binary checksum\n");
+ if (!amdgpu_discovery_verify_checksum(discovery_bin + offset, size,
+ checksum)) {
+ dev_err(adev->dev, "invalid ip discovery binary checksum\n");
r = -EINVAL;
goto out;
}
@@ -203,79 +528,880 @@ static int amdgpu_discovery_init(struct amdgpu_device *adev)
info = &bhdr->table_list[IP_DISCOVERY];
offset = le16_to_cpu(info->offset);
checksum = le16_to_cpu(info->checksum);
- ihdr = (struct ip_discovery_header *)(adev->mman.discovery_bin + offset);
- if (le32_to_cpu(ihdr->signature) != DISCOVERY_TABLE_SIGNATURE) {
- DRM_ERROR("invalid ip discovery data table signature\n");
- r = -EINVAL;
- goto out;
- }
+ if (offset) {
+ struct ip_discovery_header *ihdr =
+ (struct ip_discovery_header *)(discovery_bin + offset);
+ if (le32_to_cpu(ihdr->signature) != DISCOVERY_TABLE_SIGNATURE) {
+ dev_err(adev->dev, "invalid ip discovery data table signature\n");
+ r = -EINVAL;
+ goto out;
+ }
- if (!amdgpu_discovery_verify_checksum(adev->mman.discovery_bin + offset,
- ihdr->size, checksum)) {
- DRM_ERROR("invalid ip discovery data table checksum\n");
- r = -EINVAL;
- goto out;
+ if (!amdgpu_discovery_verify_checksum(discovery_bin + offset,
+ le16_to_cpu(ihdr->size),
+ checksum)) {
+ dev_err(adev->dev, "invalid ip discovery data table checksum\n");
+ r = -EINVAL;
+ goto out;
+ }
}
info = &bhdr->table_list[GC];
offset = le16_to_cpu(info->offset);
checksum = le16_to_cpu(info->checksum);
- ghdr = (struct gpu_info_header *)(adev->mman.discovery_bin + offset);
- if (!amdgpu_discovery_verify_checksum(adev->mman.discovery_bin + offset,
- ghdr->size, checksum)) {
- DRM_ERROR("invalid gc data table checksum\n");
- r = -EINVAL;
- goto out;
+ if (offset) {
+ struct gpu_info_header *ghdr =
+ (struct gpu_info_header *)(discovery_bin + offset);
+
+ if (le32_to_cpu(ghdr->table_id) != GC_TABLE_ID) {
+ dev_err(adev->dev, "invalid ip discovery gc table id\n");
+ r = -EINVAL;
+ goto out;
+ }
+
+ if (!amdgpu_discovery_verify_checksum(discovery_bin + offset,
+ le32_to_cpu(ghdr->size),
+ checksum)) {
+ dev_err(adev->dev, "invalid gc data table checksum\n");
+ r = -EINVAL;
+ goto out;
+ }
+ }
+
+ info = &bhdr->table_list[HARVEST_INFO];
+ offset = le16_to_cpu(info->offset);
+ checksum = le16_to_cpu(info->checksum);
+
+ if (offset) {
+ struct harvest_info_header *hhdr =
+ (struct harvest_info_header *)(discovery_bin + offset);
+
+ if (le32_to_cpu(hhdr->signature) != HARVEST_TABLE_SIGNATURE) {
+ dev_err(adev->dev, "invalid ip discovery harvest table signature\n");
+ r = -EINVAL;
+ goto out;
+ }
+
+ if (!amdgpu_discovery_verify_checksum(
+ discovery_bin + offset,
+ sizeof(struct harvest_table), checksum)) {
+ dev_err(adev->dev, "invalid harvest data table checksum\n");
+ r = -EINVAL;
+ goto out;
+ }
+ }
+
+ info = &bhdr->table_list[VCN_INFO];
+ offset = le16_to_cpu(info->offset);
+ checksum = le16_to_cpu(info->checksum);
+
+ if (offset) {
+ struct vcn_info_header *vhdr =
+ (struct vcn_info_header *)(discovery_bin + offset);
+
+ if (le32_to_cpu(vhdr->table_id) != VCN_INFO_TABLE_ID) {
+ dev_err(adev->dev, "invalid ip discovery vcn table id\n");
+ r = -EINVAL;
+ goto out;
+ }
+
+ if (!amdgpu_discovery_verify_checksum(
+ discovery_bin + offset,
+ le32_to_cpu(vhdr->size_bytes), checksum)) {
+ dev_err(adev->dev, "invalid vcn data table checksum\n");
+ r = -EINVAL;
+ goto out;
+ }
+ }
+
+ info = &bhdr->table_list[MALL_INFO];
+ offset = le16_to_cpu(info->offset);
+ checksum = le16_to_cpu(info->checksum);
+
+ if (0 && offset) {
+ struct mall_info_header *mhdr =
+ (struct mall_info_header *)(discovery_bin + offset);
+
+ if (le32_to_cpu(mhdr->table_id) != MALL_INFO_TABLE_ID) {
+ dev_err(adev->dev, "invalid ip discovery mall table id\n");
+ r = -EINVAL;
+ goto out;
+ }
+
+ if (!amdgpu_discovery_verify_checksum(
+ discovery_bin + offset,
+ le32_to_cpu(mhdr->size_bytes), checksum)) {
+ dev_err(adev->dev, "invalid mall data table checksum\n");
+ r = -EINVAL;
+ goto out;
+ }
}
return 0;
out:
- kfree(adev->mman.discovery_bin);
- adev->mman.discovery_bin = NULL;
-
+ kfree(adev->discovery.bin);
+ adev->discovery.bin = NULL;
+ if ((amdgpu_discovery != 2) &&
+ (RREG32(mmIP_DISCOVERY_VERSION) == 4))
+ amdgpu_ras_query_boot_status(adev, 4);
return r;
}
+static void amdgpu_discovery_sysfs_fini(struct amdgpu_device *adev);
+
void amdgpu_discovery_fini(struct amdgpu_device *adev)
{
- kfree(adev->mman.discovery_bin);
- adev->mman.discovery_bin = NULL;
+ amdgpu_discovery_sysfs_fini(adev);
+ kfree(adev->discovery.bin);
+ adev->discovery.bin = NULL;
+}
+
+static int amdgpu_discovery_validate_ip(struct amdgpu_device *adev,
+ uint8_t instance, uint16_t hw_id)
+{
+ if (instance >= HWIP_MAX_INSTANCE) {
+ dev_err(adev->dev,
+ "Unexpected instance_number (%d) from ip discovery blob\n",
+ instance);
+ return -EINVAL;
+ }
+ if (hw_id >= HW_ID_MAX) {
+ dev_err(adev->dev,
+ "Unexpected hw_id (%d) from ip discovery blob\n",
+ hw_id);
+ return -EINVAL;
+ }
+
+ return 0;
}
-int amdgpu_discovery_reg_base_init(struct amdgpu_device *adev)
+static void amdgpu_discovery_read_harvest_bit_per_ip(struct amdgpu_device *adev,
+ uint32_t *vcn_harvest_count)
{
+ uint8_t *discovery_bin = adev->discovery.bin;
struct binary_header *bhdr;
struct ip_discovery_header *ihdr;
struct die_header *dhdr;
struct ip *ip;
+ uint16_t die_offset, ip_offset, num_dies, num_ips;
+ uint16_t hw_id;
+ uint8_t inst;
+ int i, j;
+
+ bhdr = (struct binary_header *)discovery_bin;
+ ihdr = (struct ip_discovery_header
+ *)(discovery_bin +
+ le16_to_cpu(bhdr->table_list[IP_DISCOVERY].offset));
+ num_dies = le16_to_cpu(ihdr->num_dies);
+
+ /* scan harvest bit of all IP data structures */
+ for (i = 0; i < num_dies; i++) {
+ die_offset = le16_to_cpu(ihdr->die_info[i].die_offset);
+ dhdr = (struct die_header *)(discovery_bin + die_offset);
+ num_ips = le16_to_cpu(dhdr->num_ips);
+ ip_offset = die_offset + sizeof(*dhdr);
+
+ for (j = 0; j < num_ips; j++) {
+ ip = (struct ip *)(discovery_bin + ip_offset);
+ inst = ip->number_instance;
+ hw_id = le16_to_cpu(ip->hw_id);
+ if (amdgpu_discovery_validate_ip(adev, inst, hw_id))
+ goto next_ip;
+
+ if (ip->harvest == 1) {
+ switch (hw_id) {
+ case VCN_HWID:
+ (*vcn_harvest_count)++;
+ if (inst == 0) {
+ adev->vcn.harvest_config |= AMDGPU_VCN_HARVEST_VCN0;
+ adev->vcn.inst_mask &=
+ ~AMDGPU_VCN_HARVEST_VCN0;
+ adev->jpeg.inst_mask &=
+ ~AMDGPU_VCN_HARVEST_VCN0;
+ } else {
+ adev->vcn.harvest_config |= AMDGPU_VCN_HARVEST_VCN1;
+ adev->vcn.inst_mask &=
+ ~AMDGPU_VCN_HARVEST_VCN1;
+ adev->jpeg.inst_mask &=
+ ~AMDGPU_VCN_HARVEST_VCN1;
+ }
+ break;
+ case DMU_HWID:
+ adev->harvest_ip_mask |= AMD_HARVEST_IP_DMU_MASK;
+ break;
+ default:
+ break;
+ }
+ }
+next_ip:
+ ip_offset += struct_size(ip, base_address,
+ ip->num_base_address);
+ }
+ }
+}
+
+static void amdgpu_discovery_read_from_harvest_table(struct amdgpu_device *adev,
+ uint32_t *vcn_harvest_count,
+ uint32_t *umc_harvest_count)
+{
+ uint8_t *discovery_bin = adev->discovery.bin;
+ struct binary_header *bhdr;
+ struct harvest_table *harvest_info;
+ u16 offset;
+ int i;
+ uint32_t umc_harvest_config = 0;
+
+ bhdr = (struct binary_header *)discovery_bin;
+ offset = le16_to_cpu(bhdr->table_list[HARVEST_INFO].offset);
+
+ if (!offset) {
+ dev_err(adev->dev, "invalid harvest table offset\n");
+ return;
+ }
+
+ harvest_info = (struct harvest_table *)(discovery_bin + offset);
+
+ for (i = 0; i < 32; i++) {
+ if (le16_to_cpu(harvest_info->list[i].hw_id) == 0)
+ break;
+
+ switch (le16_to_cpu(harvest_info->list[i].hw_id)) {
+ case VCN_HWID:
+ (*vcn_harvest_count)++;
+ adev->vcn.harvest_config |=
+ (1 << harvest_info->list[i].number_instance);
+ adev->jpeg.harvest_config |=
+ (1 << harvest_info->list[i].number_instance);
+
+ adev->vcn.inst_mask &=
+ ~(1U << harvest_info->list[i].number_instance);
+ adev->jpeg.inst_mask &=
+ ~(1U << harvest_info->list[i].number_instance);
+ break;
+ case DMU_HWID:
+ adev->harvest_ip_mask |= AMD_HARVEST_IP_DMU_MASK;
+ break;
+ case UMC_HWID:
+ umc_harvest_config |=
+ 1 << (le16_to_cpu(harvest_info->list[i].number_instance));
+ (*umc_harvest_count)++;
+ break;
+ case GC_HWID:
+ adev->gfx.xcc_mask &=
+ ~(1U << harvest_info->list[i].number_instance);
+ break;
+ case SDMA0_HWID:
+ adev->sdma.sdma_mask &=
+ ~(1U << harvest_info->list[i].number_instance);
+ break;
+#if defined(CONFIG_DRM_AMD_ISP)
+ case ISP_HWID:
+ adev->isp.harvest_config |=
+ ~(1U << harvest_info->list[i].number_instance);
+ break;
+#endif
+ default:
+ break;
+ }
+ }
+
+ adev->umc.active_mask = ((1 << adev->umc.node_inst_num) - 1) &
+ ~umc_harvest_config;
+}
+
+/* ================================================== */
+
+struct ip_hw_instance {
+ struct kobject kobj; /* ip_discovery/die/#die/#hw_id/#instance/<attrs...> */
+
+ int hw_id;
+ u8 num_instance;
+ u8 major, minor, revision;
+ u8 harvest;
+
+ int num_base_addresses;
+ u32 base_addr[] __counted_by(num_base_addresses);
+};
+
+struct ip_hw_id {
+ struct kset hw_id_kset; /* ip_discovery/die/#die/#hw_id/, contains ip_hw_instance */
+ int hw_id;
+};
+
+struct ip_die_entry {
+ struct kset ip_kset; /* ip_discovery/die/#die/, contains ip_hw_id */
+ u16 num_ips;
+};
+
+/* -------------------------------------------------- */
+
+struct ip_hw_instance_attr {
+ struct attribute attr;
+ ssize_t (*show)(struct ip_hw_instance *ip_hw_instance, char *buf);
+};
+
+static ssize_t hw_id_show(struct ip_hw_instance *ip_hw_instance, char *buf)
+{
+ return sysfs_emit(buf, "%d\n", ip_hw_instance->hw_id);
+}
+
+static ssize_t num_instance_show(struct ip_hw_instance *ip_hw_instance, char *buf)
+{
+ return sysfs_emit(buf, "%d\n", ip_hw_instance->num_instance);
+}
+
+static ssize_t major_show(struct ip_hw_instance *ip_hw_instance, char *buf)
+{
+ return sysfs_emit(buf, "%d\n", ip_hw_instance->major);
+}
+
+static ssize_t minor_show(struct ip_hw_instance *ip_hw_instance, char *buf)
+{
+ return sysfs_emit(buf, "%d\n", ip_hw_instance->minor);
+}
+
+static ssize_t revision_show(struct ip_hw_instance *ip_hw_instance, char *buf)
+{
+ return sysfs_emit(buf, "%d\n", ip_hw_instance->revision);
+}
+
+static ssize_t harvest_show(struct ip_hw_instance *ip_hw_instance, char *buf)
+{
+ return sysfs_emit(buf, "0x%01X\n", ip_hw_instance->harvest);
+}
+
+static ssize_t num_base_addresses_show(struct ip_hw_instance *ip_hw_instance, char *buf)
+{
+ return sysfs_emit(buf, "%d\n", ip_hw_instance->num_base_addresses);
+}
+
+static ssize_t base_addr_show(struct ip_hw_instance *ip_hw_instance, char *buf)
+{
+ ssize_t res, at;
+ int ii;
+
+ for (res = at = ii = 0; ii < ip_hw_instance->num_base_addresses; ii++) {
+ /* Here we satisfy the condition that, at + size <= PAGE_SIZE.
+ */
+ if (at + 12 > PAGE_SIZE)
+ break;
+ res = sysfs_emit_at(buf, at, "0x%08X\n",
+ ip_hw_instance->base_addr[ii]);
+ if (res <= 0)
+ break;
+ at += res;
+ }
+
+ return res < 0 ? res : at;
+}
+
+static struct ip_hw_instance_attr ip_hw_attr[] = {
+ __ATTR_RO(hw_id),
+ __ATTR_RO(num_instance),
+ __ATTR_RO(major),
+ __ATTR_RO(minor),
+ __ATTR_RO(revision),
+ __ATTR_RO(harvest),
+ __ATTR_RO(num_base_addresses),
+ __ATTR_RO(base_addr),
+};
+
+static struct attribute *ip_hw_instance_attrs[ARRAY_SIZE(ip_hw_attr) + 1];
+ATTRIBUTE_GROUPS(ip_hw_instance);
+
+#define to_ip_hw_instance(x) container_of(x, struct ip_hw_instance, kobj)
+#define to_ip_hw_instance_attr(x) container_of(x, struct ip_hw_instance_attr, attr)
+
+static ssize_t ip_hw_instance_attr_show(struct kobject *kobj,
+ struct attribute *attr,
+ char *buf)
+{
+ struct ip_hw_instance *ip_hw_instance = to_ip_hw_instance(kobj);
+ struct ip_hw_instance_attr *ip_hw_attr = to_ip_hw_instance_attr(attr);
+
+ if (!ip_hw_attr->show)
+ return -EIO;
+
+ return ip_hw_attr->show(ip_hw_instance, buf);
+}
+
+static const struct sysfs_ops ip_hw_instance_sysfs_ops = {
+ .show = ip_hw_instance_attr_show,
+};
+
+static void ip_hw_instance_release(struct kobject *kobj)
+{
+ struct ip_hw_instance *ip_hw_instance = to_ip_hw_instance(kobj);
+
+ kfree(ip_hw_instance);
+}
+
+static const struct kobj_type ip_hw_instance_ktype = {
+ .release = ip_hw_instance_release,
+ .sysfs_ops = &ip_hw_instance_sysfs_ops,
+ .default_groups = ip_hw_instance_groups,
+};
+
+/* -------------------------------------------------- */
+
+#define to_ip_hw_id(x) container_of(to_kset(x), struct ip_hw_id, hw_id_kset)
+
+static void ip_hw_id_release(struct kobject *kobj)
+{
+ struct ip_hw_id *ip_hw_id = to_ip_hw_id(kobj);
+
+ if (!list_empty(&ip_hw_id->hw_id_kset.list))
+ DRM_ERROR("ip_hw_id->hw_id_kset is not empty");
+ kfree(ip_hw_id);
+}
+
+static const struct kobj_type ip_hw_id_ktype = {
+ .release = ip_hw_id_release,
+ .sysfs_ops = &kobj_sysfs_ops,
+};
+
+/* -------------------------------------------------- */
+
+static void die_kobj_release(struct kobject *kobj);
+static void ip_disc_release(struct kobject *kobj);
+
+struct ip_die_entry_attribute {
+ struct attribute attr;
+ ssize_t (*show)(struct ip_die_entry *ip_die_entry, char *buf);
+};
+
+#define to_ip_die_entry_attr(x) container_of(x, struct ip_die_entry_attribute, attr)
+
+static ssize_t num_ips_show(struct ip_die_entry *ip_die_entry, char *buf)
+{
+ return sysfs_emit(buf, "%d\n", ip_die_entry->num_ips);
+}
+
+/* If there are more ip_die_entry attrs, other than the number of IPs,
+ * we can make this intro an array of attrs, and then initialize
+ * ip_die_entry_attrs in a loop.
+ */
+static struct ip_die_entry_attribute num_ips_attr =
+ __ATTR_RO(num_ips);
+
+static struct attribute *ip_die_entry_attrs[] = {
+ &num_ips_attr.attr,
+ NULL,
+};
+ATTRIBUTE_GROUPS(ip_die_entry); /* ip_die_entry_groups */
+
+#define to_ip_die_entry(x) container_of(to_kset(x), struct ip_die_entry, ip_kset)
+
+static ssize_t ip_die_entry_attr_show(struct kobject *kobj,
+ struct attribute *attr,
+ char *buf)
+{
+ struct ip_die_entry_attribute *ip_die_entry_attr = to_ip_die_entry_attr(attr);
+ struct ip_die_entry *ip_die_entry = to_ip_die_entry(kobj);
+
+ if (!ip_die_entry_attr->show)
+ return -EIO;
+
+ return ip_die_entry_attr->show(ip_die_entry, buf);
+}
+
+static void ip_die_entry_release(struct kobject *kobj)
+{
+ struct ip_die_entry *ip_die_entry = to_ip_die_entry(kobj);
+
+ if (!list_empty(&ip_die_entry->ip_kset.list))
+ DRM_ERROR("ip_die_entry->ip_kset is not empty");
+ kfree(ip_die_entry);
+}
+
+static const struct sysfs_ops ip_die_entry_sysfs_ops = {
+ .show = ip_die_entry_attr_show,
+};
+
+static const struct kobj_type ip_die_entry_ktype = {
+ .release = ip_die_entry_release,
+ .sysfs_ops = &ip_die_entry_sysfs_ops,
+ .default_groups = ip_die_entry_groups,
+};
+
+static const struct kobj_type die_kobj_ktype = {
+ .release = die_kobj_release,
+ .sysfs_ops = &kobj_sysfs_ops,
+};
+
+static const struct kobj_type ip_discovery_ktype = {
+ .release = ip_disc_release,
+ .sysfs_ops = &kobj_sysfs_ops,
+};
+
+struct ip_discovery_top {
+ struct kobject kobj; /* ip_discovery/ */
+ struct kset die_kset; /* ip_discovery/die/, contains ip_die_entry */
+ struct amdgpu_device *adev;
+};
+
+static void die_kobj_release(struct kobject *kobj)
+{
+ struct ip_discovery_top *ip_top = container_of(to_kset(kobj),
+ struct ip_discovery_top,
+ die_kset);
+ if (!list_empty(&ip_top->die_kset.list))
+ DRM_ERROR("ip_top->die_kset is not empty");
+}
+
+static void ip_disc_release(struct kobject *kobj)
+{
+ struct ip_discovery_top *ip_top = container_of(kobj, struct ip_discovery_top,
+ kobj);
+ struct amdgpu_device *adev = ip_top->adev;
+
+ kfree(ip_top);
+ adev->discovery.ip_top = NULL;
+}
+
+static uint8_t amdgpu_discovery_get_harvest_info(struct amdgpu_device *adev,
+ uint16_t hw_id, uint8_t inst)
+{
+ uint8_t harvest = 0;
+
+ /* Until a uniform way is figured, get mask based on hwid */
+ switch (hw_id) {
+ case VCN_HWID:
+ /* VCN vs UVD+VCE */
+ if (!amdgpu_ip_version(adev, VCE_HWIP, 0))
+ harvest = ((1 << inst) & adev->vcn.inst_mask) == 0;
+ break;
+ case DMU_HWID:
+ if (adev->harvest_ip_mask & AMD_HARVEST_IP_DMU_MASK)
+ harvest = 0x1;
+ break;
+ case UMC_HWID:
+ /* TODO: It needs another parsing; for now, ignore.*/
+ break;
+ case GC_HWID:
+ harvest = ((1 << inst) & adev->gfx.xcc_mask) == 0;
+ break;
+ case SDMA0_HWID:
+ harvest = ((1 << inst) & adev->sdma.sdma_mask) == 0;
+ break;
+ default:
+ break;
+ }
+
+ return harvest;
+}
+
+static int amdgpu_discovery_sysfs_ips(struct amdgpu_device *adev,
+ struct ip_die_entry *ip_die_entry,
+ const size_t _ip_offset, const int num_ips,
+ bool reg_base_64)
+{
+ uint8_t *discovery_bin = adev->discovery.bin;
+ int ii, jj, kk, res;
+ uint16_t hw_id;
+ uint8_t inst;
+
+ DRM_DEBUG("num_ips:%d", num_ips);
+
+ /* Find all IPs of a given HW ID, and add their instance to
+ * #die/#hw_id/#instance/<attributes>
+ */
+ for (ii = 0; ii < HW_ID_MAX; ii++) {
+ struct ip_hw_id *ip_hw_id = NULL;
+ size_t ip_offset = _ip_offset;
+
+ for (jj = 0; jj < num_ips; jj++) {
+ struct ip_v4 *ip;
+ struct ip_hw_instance *ip_hw_instance;
+
+ ip = (struct ip_v4 *)(discovery_bin + ip_offset);
+ inst = ip->instance_number;
+ hw_id = le16_to_cpu(ip->hw_id);
+ if (amdgpu_discovery_validate_ip(adev, inst, hw_id) ||
+ hw_id != ii)
+ goto next_ip;
+
+ DRM_DEBUG("match:%d @ ip_offset:%zu", ii, ip_offset);
+
+ /* We have a hw_id match; register the hw
+ * block if not yet registered.
+ */
+ if (!ip_hw_id) {
+ ip_hw_id = kzalloc(sizeof(*ip_hw_id), GFP_KERNEL);
+ if (!ip_hw_id)
+ return -ENOMEM;
+ ip_hw_id->hw_id = ii;
+
+ kobject_set_name(&ip_hw_id->hw_id_kset.kobj, "%d", ii);
+ ip_hw_id->hw_id_kset.kobj.kset = &ip_die_entry->ip_kset;
+ ip_hw_id->hw_id_kset.kobj.ktype = &ip_hw_id_ktype;
+ res = kset_register(&ip_hw_id->hw_id_kset);
+ if (res) {
+ DRM_ERROR("Couldn't register ip_hw_id kset");
+ kfree(ip_hw_id);
+ return res;
+ }
+ if (hw_id_names[ii]) {
+ res = sysfs_create_link(&ip_die_entry->ip_kset.kobj,
+ &ip_hw_id->hw_id_kset.kobj,
+ hw_id_names[ii]);
+ if (res) {
+ DRM_ERROR("Couldn't create IP link %s in IP Die:%s\n",
+ hw_id_names[ii],
+ kobject_name(&ip_die_entry->ip_kset.kobj));
+ }
+ }
+ }
+
+ /* Now register its instance.
+ */
+ ip_hw_instance = kzalloc(struct_size(ip_hw_instance,
+ base_addr,
+ ip->num_base_address),
+ GFP_KERNEL);
+ if (!ip_hw_instance) {
+ DRM_ERROR("no memory for ip_hw_instance");
+ return -ENOMEM;
+ }
+ ip_hw_instance->hw_id = le16_to_cpu(ip->hw_id); /* == ii */
+ ip_hw_instance->num_instance = ip->instance_number;
+ ip_hw_instance->major = ip->major;
+ ip_hw_instance->minor = ip->minor;
+ ip_hw_instance->revision = ip->revision;
+ ip_hw_instance->harvest =
+ amdgpu_discovery_get_harvest_info(
+ adev, ip_hw_instance->hw_id,
+ ip_hw_instance->num_instance);
+ ip_hw_instance->num_base_addresses = ip->num_base_address;
+
+ for (kk = 0; kk < ip_hw_instance->num_base_addresses; kk++) {
+ if (reg_base_64)
+ ip_hw_instance->base_addr[kk] =
+ lower_32_bits(le64_to_cpu(ip->base_address_64[kk])) & 0x3FFFFFFF;
+ else
+ ip_hw_instance->base_addr[kk] = ip->base_address[kk];
+ }
+
+ kobject_init(&ip_hw_instance->kobj, &ip_hw_instance_ktype);
+ ip_hw_instance->kobj.kset = &ip_hw_id->hw_id_kset;
+ res = kobject_add(&ip_hw_instance->kobj, NULL,
+ "%d", ip_hw_instance->num_instance);
+next_ip:
+ if (reg_base_64)
+ ip_offset += struct_size(ip, base_address_64,
+ ip->num_base_address);
+ else
+ ip_offset += struct_size(ip, base_address,
+ ip->num_base_address);
+ }
+ }
+
+ return 0;
+}
+
+static int amdgpu_discovery_sysfs_recurse(struct amdgpu_device *adev)
+{
+ struct ip_discovery_top *ip_top = adev->discovery.ip_top;
+ uint8_t *discovery_bin = adev->discovery.bin;
+ struct binary_header *bhdr;
+ struct ip_discovery_header *ihdr;
+ struct die_header *dhdr;
+ struct kset *die_kset = &ip_top->die_kset;
+ u16 num_dies, die_offset, num_ips;
+ size_t ip_offset;
+ int ii, res;
+
+ bhdr = (struct binary_header *)discovery_bin;
+ ihdr = (struct ip_discovery_header
+ *)(discovery_bin +
+ le16_to_cpu(bhdr->table_list[IP_DISCOVERY].offset));
+ num_dies = le16_to_cpu(ihdr->num_dies);
+
+ DRM_DEBUG("number of dies: %d\n", num_dies);
+
+ for (ii = 0; ii < num_dies; ii++) {
+ struct ip_die_entry *ip_die_entry;
+
+ die_offset = le16_to_cpu(ihdr->die_info[ii].die_offset);
+ dhdr = (struct die_header *)(discovery_bin + die_offset);
+ num_ips = le16_to_cpu(dhdr->num_ips);
+ ip_offset = die_offset + sizeof(*dhdr);
+
+ /* Add the die to the kset.
+ *
+ * dhdr->die_id == ii, which was checked in
+ * amdgpu_discovery_reg_base_init().
+ */
+
+ ip_die_entry = kzalloc(sizeof(*ip_die_entry), GFP_KERNEL);
+ if (!ip_die_entry)
+ return -ENOMEM;
+
+ ip_die_entry->num_ips = num_ips;
+
+ kobject_set_name(&ip_die_entry->ip_kset.kobj, "%d", le16_to_cpu(dhdr->die_id));
+ ip_die_entry->ip_kset.kobj.kset = die_kset;
+ ip_die_entry->ip_kset.kobj.ktype = &ip_die_entry_ktype;
+ res = kset_register(&ip_die_entry->ip_kset);
+ if (res) {
+ DRM_ERROR("Couldn't register ip_die_entry kset");
+ kfree(ip_die_entry);
+ return res;
+ }
+
+ amdgpu_discovery_sysfs_ips(adev, ip_die_entry, ip_offset, num_ips, !!ihdr->base_addr_64_bit);
+ }
+
+ return 0;
+}
+
+static int amdgpu_discovery_sysfs_init(struct amdgpu_device *adev)
+{
+ uint8_t *discovery_bin = adev->discovery.bin;
+ struct ip_discovery_top *ip_top;
+ struct kset *die_kset;
+ int res, ii;
+
+ if (!discovery_bin)
+ return -EINVAL;
+
+ ip_top = kzalloc(sizeof(*ip_top), GFP_KERNEL);
+ if (!ip_top)
+ return -ENOMEM;
+
+ ip_top->adev = adev;
+ adev->discovery.ip_top = ip_top;
+ res = kobject_init_and_add(&ip_top->kobj, &ip_discovery_ktype,
+ &adev->dev->kobj, "ip_discovery");
+ if (res) {
+ DRM_ERROR("Couldn't init and add ip_discovery/");
+ goto Err;
+ }
+
+ die_kset = &ip_top->die_kset;
+ kobject_set_name(&die_kset->kobj, "%s", "die");
+ die_kset->kobj.parent = &ip_top->kobj;
+ die_kset->kobj.ktype = &die_kobj_ktype;
+ res = kset_register(&ip_top->die_kset);
+ if (res) {
+ DRM_ERROR("Couldn't register die_kset");
+ goto Err;
+ }
+
+ for (ii = 0; ii < ARRAY_SIZE(ip_hw_attr); ii++)
+ ip_hw_instance_attrs[ii] = &ip_hw_attr[ii].attr;
+ ip_hw_instance_attrs[ii] = NULL;
+
+ res = amdgpu_discovery_sysfs_recurse(adev);
+
+ return res;
+Err:
+ kobject_put(&ip_top->kobj);
+ return res;
+}
+
+/* -------------------------------------------------- */
+
+#define list_to_kobj(el) container_of(el, struct kobject, entry)
+
+static void amdgpu_discovery_sysfs_ip_hw_free(struct ip_hw_id *ip_hw_id)
+{
+ struct list_head *el, *tmp;
+ struct kset *hw_id_kset;
+
+ hw_id_kset = &ip_hw_id->hw_id_kset;
+ spin_lock(&hw_id_kset->list_lock);
+ list_for_each_prev_safe(el, tmp, &hw_id_kset->list) {
+ list_del_init(el);
+ spin_unlock(&hw_id_kset->list_lock);
+ /* kobject is embedded in ip_hw_instance */
+ kobject_put(list_to_kobj(el));
+ spin_lock(&hw_id_kset->list_lock);
+ }
+ spin_unlock(&hw_id_kset->list_lock);
+ kobject_put(&ip_hw_id->hw_id_kset.kobj);
+}
+
+static void amdgpu_discovery_sysfs_die_free(struct ip_die_entry *ip_die_entry)
+{
+ struct list_head *el, *tmp;
+ struct kset *ip_kset;
+
+ ip_kset = &ip_die_entry->ip_kset;
+ spin_lock(&ip_kset->list_lock);
+ list_for_each_prev_safe(el, tmp, &ip_kset->list) {
+ list_del_init(el);
+ spin_unlock(&ip_kset->list_lock);
+ amdgpu_discovery_sysfs_ip_hw_free(to_ip_hw_id(list_to_kobj(el)));
+ spin_lock(&ip_kset->list_lock);
+ }
+ spin_unlock(&ip_kset->list_lock);
+ kobject_put(&ip_die_entry->ip_kset.kobj);
+}
+
+static void amdgpu_discovery_sysfs_fini(struct amdgpu_device *adev)
+{
+ struct ip_discovery_top *ip_top = adev->discovery.ip_top;
+ struct list_head *el, *tmp;
+ struct kset *die_kset;
+
+ die_kset = &ip_top->die_kset;
+ spin_lock(&die_kset->list_lock);
+ list_for_each_prev_safe(el, tmp, &die_kset->list) {
+ list_del_init(el);
+ spin_unlock(&die_kset->list_lock);
+ amdgpu_discovery_sysfs_die_free(to_ip_die_entry(list_to_kobj(el)));
+ spin_lock(&die_kset->list_lock);
+ }
+ spin_unlock(&die_kset->list_lock);
+ kobject_put(&ip_top->die_kset.kobj);
+ kobject_put(&ip_top->kobj);
+}
+
+/* ================================================== */
+
+static int amdgpu_discovery_reg_base_init(struct amdgpu_device *adev)
+{
+ uint8_t num_base_address, subrev, variant;
+ struct binary_header *bhdr;
+ struct ip_discovery_header *ihdr;
+ struct die_header *dhdr;
+ uint8_t *discovery_bin;
+ struct ip_v4 *ip;
uint16_t die_offset;
uint16_t ip_offset;
uint16_t num_dies;
+ uint32_t wafl_ver;
uint16_t num_ips;
- uint8_t num_base_address;
+ uint16_t hw_id;
+ uint8_t inst;
int hw_ip;
int i, j, k;
int r;
r = amdgpu_discovery_init(adev);
- if (r) {
- DRM_ERROR("amdgpu_discovery_init failed\n");
+ if (r)
return r;
- }
-
- bhdr = (struct binary_header *)adev->mman.discovery_bin;
- ihdr = (struct ip_discovery_header *)(adev->mman.discovery_bin +
- le16_to_cpu(bhdr->table_list[IP_DISCOVERY].offset));
+ discovery_bin = adev->discovery.bin;
+ wafl_ver = 0;
+ adev->gfx.xcc_mask = 0;
+ adev->sdma.sdma_mask = 0;
+ adev->vcn.inst_mask = 0;
+ adev->jpeg.inst_mask = 0;
+ bhdr = (struct binary_header *)discovery_bin;
+ ihdr = (struct ip_discovery_header
+ *)(discovery_bin +
+ le16_to_cpu(bhdr->table_list[IP_DISCOVERY].offset));
num_dies = le16_to_cpu(ihdr->num_dies);
DRM_DEBUG("number of dies: %d\n", num_dies);
for (i = 0; i < num_dies; i++) {
die_offset = le16_to_cpu(ihdr->die_info[i].die_offset);
- dhdr = (struct die_header *)(adev->mman.discovery_bin + die_offset);
+ dhdr = (struct die_header *)(discovery_bin + die_offset);
num_ips = le16_to_cpu(dhdr->num_ips);
ip_offset = die_offset + sizeof(*dhdr);
@@ -289,122 +1415,1827 @@ int amdgpu_discovery_reg_base_init(struct amdgpu_device *adev)
le16_to_cpu(dhdr->die_id), num_ips);
for (j = 0; j < num_ips; j++) {
- ip = (struct ip *)(adev->mman.discovery_bin + ip_offset);
+ ip = (struct ip_v4 *)(discovery_bin + ip_offset);
+
+ inst = ip->instance_number;
+ hw_id = le16_to_cpu(ip->hw_id);
+ if (amdgpu_discovery_validate_ip(adev, inst, hw_id))
+ goto next_ip;
+
num_base_address = ip->num_base_address;
DRM_DEBUG("%s(%d) #%d v%d.%d.%d:\n",
hw_id_names[le16_to_cpu(ip->hw_id)],
le16_to_cpu(ip->hw_id),
- ip->number_instance,
+ ip->instance_number,
ip->major, ip->minor,
ip->revision);
+ if (le16_to_cpu(ip->hw_id) == VCN_HWID) {
+ /* Bit [5:0]: original revision value
+ * Bit [7:6]: en/decode capability:
+ * 0b00 : VCN function normally
+ * 0b10 : encode is disabled
+ * 0b01 : decode is disabled
+ */
+ if (adev->vcn.num_vcn_inst <
+ AMDGPU_MAX_VCN_INSTANCES) {
+ adev->vcn.inst[adev->vcn.num_vcn_inst].vcn_config =
+ ip->revision & 0xc0;
+ adev->vcn.num_vcn_inst++;
+ adev->vcn.inst_mask |=
+ (1U << ip->instance_number);
+ adev->jpeg.inst_mask |=
+ (1U << ip->instance_number);
+ } else {
+ dev_err(adev->dev, "Too many VCN instances: %d vs %d\n",
+ adev->vcn.num_vcn_inst + 1,
+ AMDGPU_MAX_VCN_INSTANCES);
+ }
+ ip->revision &= ~0xc0;
+ }
+ if (le16_to_cpu(ip->hw_id) == SDMA0_HWID ||
+ le16_to_cpu(ip->hw_id) == SDMA1_HWID ||
+ le16_to_cpu(ip->hw_id) == SDMA2_HWID ||
+ le16_to_cpu(ip->hw_id) == SDMA3_HWID) {
+ if (adev->sdma.num_instances <
+ AMDGPU_MAX_SDMA_INSTANCES) {
+ adev->sdma.num_instances++;
+ adev->sdma.sdma_mask |=
+ (1U << ip->instance_number);
+ } else {
+ dev_err(adev->dev, "Too many SDMA instances: %d vs %d\n",
+ adev->sdma.num_instances + 1,
+ AMDGPU_MAX_SDMA_INSTANCES);
+ }
+ }
+
+ if (le16_to_cpu(ip->hw_id) == VPE_HWID) {
+ if (adev->vpe.num_instances < AMDGPU_MAX_VPE_INSTANCES)
+ adev->vpe.num_instances++;
+ else
+ dev_err(adev->dev, "Too many VPE instances: %d vs %d\n",
+ adev->vpe.num_instances + 1,
+ AMDGPU_MAX_VPE_INSTANCES);
+ }
+
+ if (le16_to_cpu(ip->hw_id) == UMC_HWID) {
+ adev->gmc.num_umc++;
+ adev->umc.node_inst_num++;
+ }
+
+ if (le16_to_cpu(ip->hw_id) == GC_HWID)
+ adev->gfx.xcc_mask |=
+ (1U << ip->instance_number);
+
+ if (!wafl_ver && le16_to_cpu(ip->hw_id) == WAFLC_HWID)
+ wafl_ver = IP_VERSION_FULL(ip->major, ip->minor,
+ ip->revision, 0, 0);
+
for (k = 0; k < num_base_address; k++) {
/*
* convert the endianness of base addresses in place,
* so that we don't need to convert them when accessing adev->reg_offset.
*/
- ip->base_address[k] = le32_to_cpu(ip->base_address[k]);
+ if (ihdr->base_addr_64_bit)
+ /* Truncate the 64bit base address from ip discovery
+ * and only store lower 32bit ip base in reg_offset[].
+ * Bits > 32 follows ASIC specific format, thus just
+ * discard them and handle it within specific ASIC.
+ * By this way reg_offset[] and related helpers can
+ * stay unchanged.
+ * The base address is in dwords, thus clear the
+ * highest 2 bits to store.
+ */
+ ip->base_address[k] =
+ lower_32_bits(le64_to_cpu(ip->base_address_64[k])) & 0x3FFFFFFF;
+ else
+ ip->base_address[k] = le32_to_cpu(ip->base_address[k]);
DRM_DEBUG("\t0x%08x\n", ip->base_address[k]);
}
for (hw_ip = 0; hw_ip < MAX_HWIP; hw_ip++) {
- if (hw_id_map[hw_ip] == le16_to_cpu(ip->hw_id)) {
+ if (hw_id_map[hw_ip] == le16_to_cpu(ip->hw_id) &&
+ hw_id_map[hw_ip] != 0) {
DRM_DEBUG("set register base offset for %s\n",
hw_id_names[le16_to_cpu(ip->hw_id)]);
- adev->reg_offset[hw_ip][ip->number_instance] =
+ adev->reg_offset[hw_ip][ip->instance_number] =
ip->base_address;
- }
+ /* Instance support is somewhat inconsistent.
+ * SDMA is a good example. Sienna cichlid has 4 total
+ * SDMA instances, each enumerated separately (HWIDs
+ * 42, 43, 68, 69). Arcturus has 8 total SDMA instances,
+ * but they are enumerated as multiple instances of the
+ * same HWIDs (4x HWID 42, 4x HWID 43). UMC is another
+ * example. On most chips there are multiple instances
+ * with the same HWID.
+ */
+
+ if (ihdr->version < 3) {
+ subrev = 0;
+ variant = 0;
+ } else {
+ subrev = ip->sub_revision;
+ variant = ip->variant;
+ }
+ adev->ip_versions[hw_ip]
+ [ip->instance_number] =
+ IP_VERSION_FULL(ip->major,
+ ip->minor,
+ ip->revision,
+ variant,
+ subrev);
+ }
}
- ip_offset += sizeof(*ip) + 4 * (ip->num_base_address - 1);
+next_ip:
+ if (ihdr->base_addr_64_bit)
+ ip_offset += struct_size(ip, base_address_64, ip->num_base_address);
+ else
+ ip_offset += struct_size(ip, base_address, ip->num_base_address);
}
}
+ if (wafl_ver && !adev->ip_versions[XGMI_HWIP][0])
+ adev->ip_versions[XGMI_HWIP][0] = wafl_ver;
+
return 0;
}
-int amdgpu_discovery_get_ip_version(struct amdgpu_device *adev, int hw_id,
- int *major, int *minor, int *revision)
+static void amdgpu_discovery_harvest_ip(struct amdgpu_device *adev)
{
- struct binary_header *bhdr;
+ uint8_t *discovery_bin = adev->discovery.bin;
struct ip_discovery_header *ihdr;
- struct die_header *dhdr;
- struct ip *ip;
- uint16_t die_offset;
- uint16_t ip_offset;
- uint16_t num_dies;
- uint16_t num_ips;
- int i, j;
+ struct binary_header *bhdr;
+ int vcn_harvest_count = 0;
+ int umc_harvest_count = 0;
+ uint16_t offset, ihdr_ver;
+
+ bhdr = (struct binary_header *)discovery_bin;
+ offset = le16_to_cpu(bhdr->table_list[IP_DISCOVERY].offset);
+ ihdr = (struct ip_discovery_header *)(discovery_bin + offset);
+ ihdr_ver = le16_to_cpu(ihdr->version);
+ /*
+ * Harvest table does not fit Navi1x and legacy GPUs,
+ * so read harvest bit per IP data structure to set
+ * harvest configuration.
+ */
+ if (amdgpu_ip_version(adev, GC_HWIP, 0) < IP_VERSION(10, 2, 0) &&
+ ihdr_ver <= 2) {
+ if ((adev->pdev->device == 0x731E &&
+ (adev->pdev->revision == 0xC6 ||
+ adev->pdev->revision == 0xC7)) ||
+ (adev->pdev->device == 0x7340 &&
+ adev->pdev->revision == 0xC9) ||
+ (adev->pdev->device == 0x7360 &&
+ adev->pdev->revision == 0xC7))
+ amdgpu_discovery_read_harvest_bit_per_ip(adev,
+ &vcn_harvest_count);
+ } else {
+ amdgpu_discovery_read_from_harvest_table(adev,
+ &vcn_harvest_count,
+ &umc_harvest_count);
+ }
- if (!adev->mman.discovery_bin) {
+ amdgpu_discovery_harvest_config_quirk(adev);
+
+ if (vcn_harvest_count == adev->vcn.num_vcn_inst) {
+ adev->harvest_ip_mask |= AMD_HARVEST_IP_VCN_MASK;
+ adev->harvest_ip_mask |= AMD_HARVEST_IP_JPEG_MASK;
+ }
+
+ if (umc_harvest_count < adev->gmc.num_umc) {
+ adev->gmc.num_umc -= umc_harvest_count;
+ }
+}
+
+union gc_info {
+ struct gc_info_v1_0 v1;
+ struct gc_info_v1_1 v1_1;
+ struct gc_info_v1_2 v1_2;
+ struct gc_info_v1_3 v1_3;
+ struct gc_info_v2_0 v2;
+ struct gc_info_v2_1 v2_1;
+};
+
+static int amdgpu_discovery_get_gfx_info(struct amdgpu_device *adev)
+{
+ uint8_t *discovery_bin = adev->discovery.bin;
+ struct binary_header *bhdr;
+ union gc_info *gc_info;
+ u16 offset;
+
+ if (!discovery_bin) {
DRM_ERROR("ip discovery uninitialized\n");
return -EINVAL;
}
- bhdr = (struct binary_header *)adev->mman.discovery_bin;
- ihdr = (struct ip_discovery_header *)(adev->mman.discovery_bin +
- le16_to_cpu(bhdr->table_list[IP_DISCOVERY].offset));
- num_dies = le16_to_cpu(ihdr->num_dies);
+ bhdr = (struct binary_header *)discovery_bin;
+ offset = le16_to_cpu(bhdr->table_list[GC].offset);
- for (i = 0; i < num_dies; i++) {
- die_offset = le16_to_cpu(ihdr->die_info[i].die_offset);
- dhdr = (struct die_header *)(adev->mman.discovery_bin + die_offset);
- num_ips = le16_to_cpu(dhdr->num_ips);
- ip_offset = die_offset + sizeof(*dhdr);
+ if (!offset)
+ return 0;
- for (j = 0; j < num_ips; j++) {
- ip = (struct ip *)(adev->mman.discovery_bin + ip_offset);
-
- if (le16_to_cpu(ip->hw_id) == hw_id) {
- if (major)
- *major = ip->major;
- if (minor)
- *minor = ip->minor;
- if (revision)
- *revision = ip->revision;
- return 0;
- }
- ip_offset += sizeof(*ip) + 4 * (ip->num_base_address - 1);
+ gc_info = (union gc_info *)(discovery_bin + offset);
+
+ switch (le16_to_cpu(gc_info->v1.header.version_major)) {
+ case 1:
+ adev->gfx.config.max_shader_engines = le32_to_cpu(gc_info->v1.gc_num_se);
+ adev->gfx.config.max_cu_per_sh = 2 * (le32_to_cpu(gc_info->v1.gc_num_wgp0_per_sa) +
+ le32_to_cpu(gc_info->v1.gc_num_wgp1_per_sa));
+ adev->gfx.config.max_sh_per_se = le32_to_cpu(gc_info->v1.gc_num_sa_per_se);
+ adev->gfx.config.max_backends_per_se = le32_to_cpu(gc_info->v1.gc_num_rb_per_se);
+ adev->gfx.config.max_texture_channel_caches = le32_to_cpu(gc_info->v1.gc_num_gl2c);
+ adev->gfx.config.max_gprs = le32_to_cpu(gc_info->v1.gc_num_gprs);
+ adev->gfx.config.max_gs_threads = le32_to_cpu(gc_info->v1.gc_num_max_gs_thds);
+ adev->gfx.config.gs_vgt_table_depth = le32_to_cpu(gc_info->v1.gc_gs_table_depth);
+ adev->gfx.config.gs_prim_buffer_depth = le32_to_cpu(gc_info->v1.gc_gsprim_buff_depth);
+ adev->gfx.config.double_offchip_lds_buf = le32_to_cpu(gc_info->v1.gc_double_offchip_lds_buffer);
+ adev->gfx.cu_info.wave_front_size = le32_to_cpu(gc_info->v1.gc_wave_size);
+ adev->gfx.cu_info.max_waves_per_simd = le32_to_cpu(gc_info->v1.gc_max_waves_per_simd);
+ adev->gfx.cu_info.max_scratch_slots_per_cu = le32_to_cpu(gc_info->v1.gc_max_scratch_slots_per_cu);
+ adev->gfx.cu_info.lds_size = le32_to_cpu(gc_info->v1.gc_lds_size);
+ adev->gfx.config.num_sc_per_sh = le32_to_cpu(gc_info->v1.gc_num_sc_per_se) /
+ le32_to_cpu(gc_info->v1.gc_num_sa_per_se);
+ adev->gfx.config.num_packer_per_sc = le32_to_cpu(gc_info->v1.gc_num_packer_per_sc);
+ if (le16_to_cpu(gc_info->v1.header.version_minor) >= 1) {
+ adev->gfx.config.gc_num_tcp_per_sa = le32_to_cpu(gc_info->v1_1.gc_num_tcp_per_sa);
+ adev->gfx.config.gc_num_sdp_interface = le32_to_cpu(gc_info->v1_1.gc_num_sdp_interface);
+ adev->gfx.config.gc_num_tcps = le32_to_cpu(gc_info->v1_1.gc_num_tcps);
}
+ if (le16_to_cpu(gc_info->v1.header.version_minor) >= 2) {
+ adev->gfx.config.gc_num_tcp_per_wpg = le32_to_cpu(gc_info->v1_2.gc_num_tcp_per_wpg);
+ adev->gfx.config.gc_tcp_l1_size = le32_to_cpu(gc_info->v1_2.gc_tcp_l1_size);
+ adev->gfx.config.gc_num_sqc_per_wgp = le32_to_cpu(gc_info->v1_2.gc_num_sqc_per_wgp);
+ adev->gfx.config.gc_l1_instruction_cache_size_per_sqc = le32_to_cpu(gc_info->v1_2.gc_l1_instruction_cache_size_per_sqc);
+ adev->gfx.config.gc_l1_data_cache_size_per_sqc = le32_to_cpu(gc_info->v1_2.gc_l1_data_cache_size_per_sqc);
+ adev->gfx.config.gc_gl1c_per_sa = le32_to_cpu(gc_info->v1_2.gc_gl1c_per_sa);
+ adev->gfx.config.gc_gl1c_size_per_instance = le32_to_cpu(gc_info->v1_2.gc_gl1c_size_per_instance);
+ adev->gfx.config.gc_gl2c_per_gpu = le32_to_cpu(gc_info->v1_2.gc_gl2c_per_gpu);
+ }
+ if (le16_to_cpu(gc_info->v1.header.version_minor) >= 3) {
+ adev->gfx.config.gc_tcp_size_per_cu = le32_to_cpu(gc_info->v1_3.gc_tcp_size_per_cu);
+ adev->gfx.config.gc_tcp_cache_line_size = le32_to_cpu(gc_info->v1_3.gc_tcp_cache_line_size);
+ adev->gfx.config.gc_instruction_cache_size_per_sqc = le32_to_cpu(gc_info->v1_3.gc_instruction_cache_size_per_sqc);
+ adev->gfx.config.gc_instruction_cache_line_size = le32_to_cpu(gc_info->v1_3.gc_instruction_cache_line_size);
+ adev->gfx.config.gc_scalar_data_cache_size_per_sqc = le32_to_cpu(gc_info->v1_3.gc_scalar_data_cache_size_per_sqc);
+ adev->gfx.config.gc_scalar_data_cache_line_size = le32_to_cpu(gc_info->v1_3.gc_scalar_data_cache_line_size);
+ adev->gfx.config.gc_tcc_size = le32_to_cpu(gc_info->v1_3.gc_tcc_size);
+ adev->gfx.config.gc_tcc_cache_line_size = le32_to_cpu(gc_info->v1_3.gc_tcc_cache_line_size);
+ }
+ break;
+ case 2:
+ adev->gfx.config.max_shader_engines = le32_to_cpu(gc_info->v2.gc_num_se);
+ adev->gfx.config.max_cu_per_sh = le32_to_cpu(gc_info->v2.gc_num_cu_per_sh);
+ adev->gfx.config.max_sh_per_se = le32_to_cpu(gc_info->v2.gc_num_sh_per_se);
+ adev->gfx.config.max_backends_per_se = le32_to_cpu(gc_info->v2.gc_num_rb_per_se);
+ adev->gfx.config.max_texture_channel_caches = le32_to_cpu(gc_info->v2.gc_num_tccs);
+ adev->gfx.config.max_gprs = le32_to_cpu(gc_info->v2.gc_num_gprs);
+ adev->gfx.config.max_gs_threads = le32_to_cpu(gc_info->v2.gc_num_max_gs_thds);
+ adev->gfx.config.gs_vgt_table_depth = le32_to_cpu(gc_info->v2.gc_gs_table_depth);
+ adev->gfx.config.gs_prim_buffer_depth = le32_to_cpu(gc_info->v2.gc_gsprim_buff_depth);
+ adev->gfx.config.double_offchip_lds_buf = le32_to_cpu(gc_info->v2.gc_double_offchip_lds_buffer);
+ adev->gfx.cu_info.wave_front_size = le32_to_cpu(gc_info->v2.gc_wave_size);
+ adev->gfx.cu_info.max_waves_per_simd = le32_to_cpu(gc_info->v2.gc_max_waves_per_simd);
+ adev->gfx.cu_info.max_scratch_slots_per_cu = le32_to_cpu(gc_info->v2.gc_max_scratch_slots_per_cu);
+ adev->gfx.cu_info.lds_size = le32_to_cpu(gc_info->v2.gc_lds_size);
+ adev->gfx.config.num_sc_per_sh = le32_to_cpu(gc_info->v2.gc_num_sc_per_se) /
+ le32_to_cpu(gc_info->v2.gc_num_sh_per_se);
+ adev->gfx.config.num_packer_per_sc = le32_to_cpu(gc_info->v2.gc_num_packer_per_sc);
+ if (le16_to_cpu(gc_info->v2.header.version_minor) == 1) {
+ adev->gfx.config.gc_num_tcp_per_sa = le32_to_cpu(gc_info->v2_1.gc_num_tcp_per_sh);
+ adev->gfx.config.gc_tcp_size_per_cu = le32_to_cpu(gc_info->v2_1.gc_tcp_size_per_cu);
+ adev->gfx.config.gc_num_sdp_interface = le32_to_cpu(gc_info->v2_1.gc_num_sdp_interface); /* per XCD */
+ adev->gfx.config.gc_num_cu_per_sqc = le32_to_cpu(gc_info->v2_1.gc_num_cu_per_sqc);
+ adev->gfx.config.gc_l1_instruction_cache_size_per_sqc = le32_to_cpu(gc_info->v2_1.gc_instruction_cache_size_per_sqc);
+ adev->gfx.config.gc_l1_data_cache_size_per_sqc = le32_to_cpu(gc_info->v2_1.gc_scalar_data_cache_size_per_sqc);
+ adev->gfx.config.gc_tcc_size = le32_to_cpu(gc_info->v2_1.gc_tcc_size); /* per XCD */
+ }
+ break;
+ default:
+ dev_err(adev->dev,
+ "Unhandled GC info table %d.%d\n",
+ le16_to_cpu(gc_info->v1.header.version_major),
+ le16_to_cpu(gc_info->v1.header.version_minor));
+ return -EINVAL;
+ }
+ return 0;
+}
+
+union mall_info {
+ struct mall_info_v1_0 v1;
+ struct mall_info_v2_0 v2;
+};
+
+static int amdgpu_discovery_get_mall_info(struct amdgpu_device *adev)
+{
+ uint8_t *discovery_bin = adev->discovery.bin;
+ struct binary_header *bhdr;
+ union mall_info *mall_info;
+ u32 u, mall_size_per_umc, m_s_present, half_use;
+ u64 mall_size;
+ u16 offset;
+
+ if (!discovery_bin) {
+ DRM_ERROR("ip discovery uninitialized\n");
+ return -EINVAL;
}
- return -EINVAL;
+ bhdr = (struct binary_header *)discovery_bin;
+ offset = le16_to_cpu(bhdr->table_list[MALL_INFO].offset);
+
+ if (!offset)
+ return 0;
+
+ mall_info = (union mall_info *)(discovery_bin + offset);
+
+ switch (le16_to_cpu(mall_info->v1.header.version_major)) {
+ case 1:
+ mall_size = 0;
+ mall_size_per_umc = le32_to_cpu(mall_info->v1.mall_size_per_m);
+ m_s_present = le32_to_cpu(mall_info->v1.m_s_present);
+ half_use = le32_to_cpu(mall_info->v1.m_half_use);
+ for (u = 0; u < adev->gmc.num_umc; u++) {
+ if (m_s_present & (1 << u))
+ mall_size += mall_size_per_umc * 2;
+ else if (half_use & (1 << u))
+ mall_size += mall_size_per_umc / 2;
+ else
+ mall_size += mall_size_per_umc;
+ }
+ adev->gmc.mall_size = mall_size;
+ adev->gmc.m_half_use = half_use;
+ break;
+ case 2:
+ mall_size_per_umc = le32_to_cpu(mall_info->v2.mall_size_per_umc);
+ adev->gmc.mall_size = (uint64_t)mall_size_per_umc * adev->gmc.num_umc;
+ break;
+ default:
+ dev_err(adev->dev,
+ "Unhandled MALL info table %d.%d\n",
+ le16_to_cpu(mall_info->v1.header.version_major),
+ le16_to_cpu(mall_info->v1.header.version_minor));
+ return -EINVAL;
+ }
+ return 0;
}
-int amdgpu_discovery_get_gfx_info(struct amdgpu_device *adev)
+union vcn_info {
+ struct vcn_info_v1_0 v1;
+};
+
+static int amdgpu_discovery_get_vcn_info(struct amdgpu_device *adev)
{
+ uint8_t *discovery_bin = adev->discovery.bin;
struct binary_header *bhdr;
- struct gc_info_v1_0 *gc_info;
+ union vcn_info *vcn_info;
+ u16 offset;
+ int v;
- if (!adev->mman.discovery_bin) {
+ if (!discovery_bin) {
DRM_ERROR("ip discovery uninitialized\n");
return -EINVAL;
}
- bhdr = (struct binary_header *)adev->mman.discovery_bin;
- gc_info = (struct gc_info_v1_0 *)(adev->mman.discovery_bin +
- le16_to_cpu(bhdr->table_list[GC].offset));
-
- adev->gfx.config.max_shader_engines = le32_to_cpu(gc_info->gc_num_se);
- adev->gfx.config.max_cu_per_sh = 2 * (le32_to_cpu(gc_info->gc_num_wgp0_per_sa) +
- le32_to_cpu(gc_info->gc_num_wgp1_per_sa));
- adev->gfx.config.max_sh_per_se = le32_to_cpu(gc_info->gc_num_sa_per_se);
- adev->gfx.config.max_backends_per_se = le32_to_cpu(gc_info->gc_num_rb_per_se);
- adev->gfx.config.max_texture_channel_caches = le32_to_cpu(gc_info->gc_num_gl2c);
- adev->gfx.config.max_gprs = le32_to_cpu(gc_info->gc_num_gprs);
- adev->gfx.config.max_gs_threads = le32_to_cpu(gc_info->gc_num_max_gs_thds);
- adev->gfx.config.gs_vgt_table_depth = le32_to_cpu(gc_info->gc_gs_table_depth);
- adev->gfx.config.gs_prim_buffer_depth = le32_to_cpu(gc_info->gc_gsprim_buff_depth);
- adev->gfx.config.double_offchip_lds_buf = le32_to_cpu(gc_info->gc_double_offchip_lds_buffer);
- adev->gfx.cu_info.wave_front_size = le32_to_cpu(gc_info->gc_wave_size);
- adev->gfx.cu_info.max_waves_per_simd = le32_to_cpu(gc_info->gc_max_waves_per_simd);
- adev->gfx.cu_info.max_scratch_slots_per_cu = le32_to_cpu(gc_info->gc_max_scratch_slots_per_cu);
- adev->gfx.cu_info.lds_size = le32_to_cpu(gc_info->gc_lds_size);
- adev->gfx.config.num_sc_per_sh = le32_to_cpu(gc_info->gc_num_sc_per_se) /
- le32_to_cpu(gc_info->gc_num_sa_per_se);
- adev->gfx.config.num_packer_per_sc = le32_to_cpu(gc_info->gc_num_packer_per_sc);
+ /* num_vcn_inst is currently limited to AMDGPU_MAX_VCN_INSTANCES
+ * which is smaller than VCN_INFO_TABLE_MAX_NUM_INSTANCES
+ * but that may change in the future with new GPUs so keep this
+ * check for defensive purposes.
+ */
+ if (adev->vcn.num_vcn_inst > VCN_INFO_TABLE_MAX_NUM_INSTANCES) {
+ dev_err(adev->dev, "invalid vcn instances\n");
+ return -EINVAL;
+ }
+
+ bhdr = (struct binary_header *)discovery_bin;
+ offset = le16_to_cpu(bhdr->table_list[VCN_INFO].offset);
+
+ if (!offset)
+ return 0;
+
+ vcn_info = (union vcn_info *)(discovery_bin + offset);
+
+ switch (le16_to_cpu(vcn_info->v1.header.version_major)) {
+ case 1:
+ /* num_vcn_inst is currently limited to AMDGPU_MAX_VCN_INSTANCES
+ * so this won't overflow.
+ */
+ for (v = 0; v < adev->vcn.num_vcn_inst; v++) {
+ adev->vcn.inst[v].vcn_codec_disable_mask =
+ le32_to_cpu(vcn_info->v1.instance_info[v].fuse_data.all_bits);
+ }
+ break;
+ default:
+ dev_err(adev->dev,
+ "Unhandled VCN info table %d.%d\n",
+ le16_to_cpu(vcn_info->v1.header.version_major),
+ le16_to_cpu(vcn_info->v1.header.version_minor));
+ return -EINVAL;
+ }
+ return 0;
+}
+
+union nps_info {
+ struct nps_info_v1_0 v1;
+};
+
+static int amdgpu_discovery_refresh_nps_info(struct amdgpu_device *adev,
+ union nps_info *nps_data)
+{
+ uint64_t vram_size, pos, offset;
+ struct nps_info_header *nhdr;
+ struct binary_header bhdr;
+ uint16_t checksum;
+
+ vram_size = (uint64_t)RREG32(mmRCC_CONFIG_MEMSIZE) << 20;
+ pos = vram_size - DISCOVERY_TMR_OFFSET;
+ amdgpu_device_vram_access(adev, pos, &bhdr, sizeof(bhdr), false);
+
+ offset = le16_to_cpu(bhdr.table_list[NPS_INFO].offset);
+ checksum = le16_to_cpu(bhdr.table_list[NPS_INFO].checksum);
+
+ amdgpu_device_vram_access(adev, (pos + offset), nps_data,
+ sizeof(*nps_data), false);
+
+ nhdr = (struct nps_info_header *)(nps_data);
+ if (!amdgpu_discovery_verify_checksum((uint8_t *)nps_data,
+ le32_to_cpu(nhdr->size_bytes),
+ checksum)) {
+ dev_err(adev->dev, "nps data refresh, checksum mismatch\n");
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+int amdgpu_discovery_get_nps_info(struct amdgpu_device *adev,
+ uint32_t *nps_type,
+ struct amdgpu_gmc_memrange **ranges,
+ int *range_cnt, bool refresh)
+{
+ uint8_t *discovery_bin = adev->discovery.bin;
+ struct amdgpu_gmc_memrange *mem_ranges;
+ struct binary_header *bhdr;
+ union nps_info *nps_info;
+ union nps_info nps_data;
+ u16 offset;
+ int i, r;
+
+ if (!nps_type || !range_cnt || !ranges)
+ return -EINVAL;
+
+ if (refresh) {
+ r = amdgpu_discovery_refresh_nps_info(adev, &nps_data);
+ if (r)
+ return r;
+ nps_info = &nps_data;
+ } else {
+ if (!discovery_bin) {
+ dev_err(adev->dev,
+ "fetch mem range failed, ip discovery uninitialized\n");
+ return -EINVAL;
+ }
+
+ bhdr = (struct binary_header *)discovery_bin;
+ offset = le16_to_cpu(bhdr->table_list[NPS_INFO].offset);
+
+ if (!offset)
+ return -ENOENT;
+
+ /* If verification fails, return as if NPS table doesn't exist */
+ if (amdgpu_discovery_verify_npsinfo(adev, bhdr))
+ return -ENOENT;
+
+ nps_info = (union nps_info *)(discovery_bin + offset);
+ }
+
+ switch (le16_to_cpu(nps_info->v1.header.version_major)) {
+ case 1:
+ mem_ranges = kvcalloc(nps_info->v1.count,
+ sizeof(*mem_ranges),
+ GFP_KERNEL);
+ if (!mem_ranges)
+ return -ENOMEM;
+ *nps_type = nps_info->v1.nps_type;
+ *range_cnt = nps_info->v1.count;
+ for (i = 0; i < *range_cnt; i++) {
+ mem_ranges[i].base_address =
+ nps_info->v1.instance_info[i].base_address;
+ mem_ranges[i].limit_address =
+ nps_info->v1.instance_info[i].limit_address;
+ mem_ranges[i].nid_mask = -1;
+ mem_ranges[i].flags = 0;
+ }
+ *ranges = mem_ranges;
+ break;
+ default:
+ dev_err(adev->dev, "Unhandled NPS info table %d.%d\n",
+ le16_to_cpu(nps_info->v1.header.version_major),
+ le16_to_cpu(nps_info->v1.header.version_minor));
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+static int amdgpu_discovery_set_common_ip_blocks(struct amdgpu_device *adev)
+{
+ /* what IP to use for this? */
+ switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
+ case IP_VERSION(9, 0, 1):
+ case IP_VERSION(9, 1, 0):
+ case IP_VERSION(9, 2, 1):
+ case IP_VERSION(9, 2, 2):
+ case IP_VERSION(9, 3, 0):
+ case IP_VERSION(9, 4, 0):
+ case IP_VERSION(9, 4, 1):
+ case IP_VERSION(9, 4, 2):
+ case IP_VERSION(9, 4, 3):
+ case IP_VERSION(9, 4, 4):
+ case IP_VERSION(9, 5, 0):
+ amdgpu_device_ip_block_add(adev, &vega10_common_ip_block);
+ break;
+ case IP_VERSION(10, 1, 10):
+ case IP_VERSION(10, 1, 1):
+ case IP_VERSION(10, 1, 2):
+ case IP_VERSION(10, 1, 3):
+ case IP_VERSION(10, 1, 4):
+ case IP_VERSION(10, 3, 0):
+ case IP_VERSION(10, 3, 1):
+ case IP_VERSION(10, 3, 2):
+ case IP_VERSION(10, 3, 3):
+ case IP_VERSION(10, 3, 4):
+ case IP_VERSION(10, 3, 5):
+ case IP_VERSION(10, 3, 6):
+ case IP_VERSION(10, 3, 7):
+ amdgpu_device_ip_block_add(adev, &nv_common_ip_block);
+ break;
+ case IP_VERSION(11, 0, 0):
+ case IP_VERSION(11, 0, 1):
+ case IP_VERSION(11, 0, 2):
+ case IP_VERSION(11, 0, 3):
+ case IP_VERSION(11, 0, 4):
+ case IP_VERSION(11, 5, 0):
+ case IP_VERSION(11, 5, 1):
+ case IP_VERSION(11, 5, 2):
+ case IP_VERSION(11, 5, 3):
+ amdgpu_device_ip_block_add(adev, &soc21_common_ip_block);
+ break;
+ case IP_VERSION(12, 0, 0):
+ case IP_VERSION(12, 0, 1):
+ amdgpu_device_ip_block_add(adev, &soc24_common_ip_block);
+ break;
+ default:
+ dev_err(adev->dev,
+ "Failed to add common ip block(GC_HWIP:0x%x)\n",
+ amdgpu_ip_version(adev, GC_HWIP, 0));
+ return -EINVAL;
+ }
+ return 0;
+}
+
+static int amdgpu_discovery_set_gmc_ip_blocks(struct amdgpu_device *adev)
+{
+ /* use GC or MMHUB IP version */
+ switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
+ case IP_VERSION(9, 0, 1):
+ case IP_VERSION(9, 1, 0):
+ case IP_VERSION(9, 2, 1):
+ case IP_VERSION(9, 2, 2):
+ case IP_VERSION(9, 3, 0):
+ case IP_VERSION(9, 4, 0):
+ case IP_VERSION(9, 4, 1):
+ case IP_VERSION(9, 4, 2):
+ case IP_VERSION(9, 4, 3):
+ case IP_VERSION(9, 4, 4):
+ case IP_VERSION(9, 5, 0):
+ amdgpu_device_ip_block_add(adev, &gmc_v9_0_ip_block);
+ break;
+ case IP_VERSION(10, 1, 10):
+ case IP_VERSION(10, 1, 1):
+ case IP_VERSION(10, 1, 2):
+ case IP_VERSION(10, 1, 3):
+ case IP_VERSION(10, 1, 4):
+ case IP_VERSION(10, 3, 0):
+ case IP_VERSION(10, 3, 1):
+ case IP_VERSION(10, 3, 2):
+ case IP_VERSION(10, 3, 3):
+ case IP_VERSION(10, 3, 4):
+ case IP_VERSION(10, 3, 5):
+ case IP_VERSION(10, 3, 6):
+ case IP_VERSION(10, 3, 7):
+ amdgpu_device_ip_block_add(adev, &gmc_v10_0_ip_block);
+ break;
+ case IP_VERSION(11, 0, 0):
+ case IP_VERSION(11, 0, 1):
+ case IP_VERSION(11, 0, 2):
+ case IP_VERSION(11, 0, 3):
+ case IP_VERSION(11, 0, 4):
+ case IP_VERSION(11, 5, 0):
+ case IP_VERSION(11, 5, 1):
+ case IP_VERSION(11, 5, 2):
+ case IP_VERSION(11, 5, 3):
+ amdgpu_device_ip_block_add(adev, &gmc_v11_0_ip_block);
+ break;
+ case IP_VERSION(12, 0, 0):
+ case IP_VERSION(12, 0, 1):
+ amdgpu_device_ip_block_add(adev, &gmc_v12_0_ip_block);
+ break;
+ default:
+ dev_err(adev->dev, "Failed to add gmc ip block(GC_HWIP:0x%x)\n",
+ amdgpu_ip_version(adev, GC_HWIP, 0));
+ return -EINVAL;
+ }
+ return 0;
+}
+
+static int amdgpu_discovery_set_ih_ip_blocks(struct amdgpu_device *adev)
+{
+ switch (amdgpu_ip_version(adev, OSSSYS_HWIP, 0)) {
+ case IP_VERSION(4, 0, 0):
+ case IP_VERSION(4, 0, 1):
+ case IP_VERSION(4, 1, 0):
+ case IP_VERSION(4, 1, 1):
+ case IP_VERSION(4, 3, 0):
+ amdgpu_device_ip_block_add(adev, &vega10_ih_ip_block);
+ break;
+ case IP_VERSION(4, 2, 0):
+ case IP_VERSION(4, 2, 1):
+ case IP_VERSION(4, 4, 0):
+ case IP_VERSION(4, 4, 2):
+ case IP_VERSION(4, 4, 5):
+ amdgpu_device_ip_block_add(adev, &vega20_ih_ip_block);
+ break;
+ case IP_VERSION(5, 0, 0):
+ case IP_VERSION(5, 0, 1):
+ case IP_VERSION(5, 0, 2):
+ case IP_VERSION(5, 0, 3):
+ case IP_VERSION(5, 2, 0):
+ case IP_VERSION(5, 2, 1):
+ amdgpu_device_ip_block_add(adev, &navi10_ih_ip_block);
+ break;
+ case IP_VERSION(6, 0, 0):
+ case IP_VERSION(6, 0, 1):
+ case IP_VERSION(6, 0, 2):
+ amdgpu_device_ip_block_add(adev, &ih_v6_0_ip_block);
+ break;
+ case IP_VERSION(6, 1, 0):
+ amdgpu_device_ip_block_add(adev, &ih_v6_1_ip_block);
+ break;
+ case IP_VERSION(7, 0, 0):
+ amdgpu_device_ip_block_add(adev, &ih_v7_0_ip_block);
+ break;
+ default:
+ dev_err(adev->dev,
+ "Failed to add ih ip block(OSSSYS_HWIP:0x%x)\n",
+ amdgpu_ip_version(adev, OSSSYS_HWIP, 0));
+ return -EINVAL;
+ }
+ return 0;
+}
+
+static int amdgpu_discovery_set_psp_ip_blocks(struct amdgpu_device *adev)
+{
+ switch (amdgpu_ip_version(adev, MP0_HWIP, 0)) {
+ case IP_VERSION(9, 0, 0):
+ amdgpu_device_ip_block_add(adev, &psp_v3_1_ip_block);
+ break;
+ case IP_VERSION(10, 0, 0):
+ case IP_VERSION(10, 0, 1):
+ amdgpu_device_ip_block_add(adev, &psp_v10_0_ip_block);
+ break;
+ case IP_VERSION(11, 0, 0):
+ case IP_VERSION(11, 0, 2):
+ case IP_VERSION(11, 0, 4):
+ case IP_VERSION(11, 0, 5):
+ case IP_VERSION(11, 0, 9):
+ case IP_VERSION(11, 0, 7):
+ case IP_VERSION(11, 0, 11):
+ case IP_VERSION(11, 0, 12):
+ case IP_VERSION(11, 0, 13):
+ case IP_VERSION(11, 5, 0):
+ case IP_VERSION(11, 5, 2):
+ amdgpu_device_ip_block_add(adev, &psp_v11_0_ip_block);
+ break;
+ case IP_VERSION(11, 0, 8):
+ amdgpu_device_ip_block_add(adev, &psp_v11_0_8_ip_block);
+ break;
+ case IP_VERSION(11, 0, 3):
+ case IP_VERSION(12, 0, 1):
+ amdgpu_device_ip_block_add(adev, &psp_v12_0_ip_block);
+ break;
+ case IP_VERSION(13, 0, 0):
+ case IP_VERSION(13, 0, 1):
+ case IP_VERSION(13, 0, 2):
+ case IP_VERSION(13, 0, 3):
+ case IP_VERSION(13, 0, 5):
+ case IP_VERSION(13, 0, 6):
+ case IP_VERSION(13, 0, 7):
+ case IP_VERSION(13, 0, 8):
+ case IP_VERSION(13, 0, 10):
+ case IP_VERSION(13, 0, 11):
+ case IP_VERSION(13, 0, 12):
+ case IP_VERSION(13, 0, 14):
+ case IP_VERSION(14, 0, 0):
+ case IP_VERSION(14, 0, 1):
+ case IP_VERSION(14, 0, 4):
+ amdgpu_device_ip_block_add(adev, &psp_v13_0_ip_block);
+ break;
+ case IP_VERSION(13, 0, 4):
+ amdgpu_device_ip_block_add(adev, &psp_v13_0_4_ip_block);
+ break;
+ case IP_VERSION(14, 0, 2):
+ case IP_VERSION(14, 0, 3):
+ case IP_VERSION(14, 0, 5):
+ amdgpu_device_ip_block_add(adev, &psp_v14_0_ip_block);
+ break;
+ default:
+ dev_err(adev->dev,
+ "Failed to add psp ip block(MP0_HWIP:0x%x)\n",
+ amdgpu_ip_version(adev, MP0_HWIP, 0));
+ return -EINVAL;
+ }
+ return 0;
+}
+
+static int amdgpu_discovery_set_smu_ip_blocks(struct amdgpu_device *adev)
+{
+ switch (amdgpu_ip_version(adev, MP1_HWIP, 0)) {
+ case IP_VERSION(9, 0, 0):
+ case IP_VERSION(10, 0, 0):
+ case IP_VERSION(10, 0, 1):
+ case IP_VERSION(11, 0, 2):
+ if (adev->asic_type == CHIP_ARCTURUS)
+ amdgpu_device_ip_block_add(adev, &smu_v11_0_ip_block);
+ else
+ amdgpu_device_ip_block_add(adev, &pp_smu_ip_block);
+ break;
+ case IP_VERSION(11, 0, 0):
+ case IP_VERSION(11, 0, 5):
+ case IP_VERSION(11, 0, 9):
+ case IP_VERSION(11, 0, 7):
+ case IP_VERSION(11, 0, 11):
+ case IP_VERSION(11, 0, 12):
+ case IP_VERSION(11, 0, 13):
+ case IP_VERSION(11, 5, 0):
+ case IP_VERSION(11, 5, 2):
+ amdgpu_device_ip_block_add(adev, &smu_v11_0_ip_block);
+ break;
+ case IP_VERSION(11, 0, 8):
+ if (adev->apu_flags & AMD_APU_IS_CYAN_SKILLFISH2)
+ amdgpu_device_ip_block_add(adev, &smu_v11_0_ip_block);
+ break;
+ case IP_VERSION(12, 0, 0):
+ case IP_VERSION(12, 0, 1):
+ amdgpu_device_ip_block_add(adev, &smu_v12_0_ip_block);
+ break;
+ case IP_VERSION(13, 0, 0):
+ case IP_VERSION(13, 0, 1):
+ case IP_VERSION(13, 0, 2):
+ case IP_VERSION(13, 0, 3):
+ case IP_VERSION(13, 0, 4):
+ case IP_VERSION(13, 0, 5):
+ case IP_VERSION(13, 0, 6):
+ case IP_VERSION(13, 0, 7):
+ case IP_VERSION(13, 0, 8):
+ case IP_VERSION(13, 0, 10):
+ case IP_VERSION(13, 0, 11):
+ case IP_VERSION(13, 0, 14):
+ case IP_VERSION(13, 0, 12):
+ amdgpu_device_ip_block_add(adev, &smu_v13_0_ip_block);
+ break;
+ case IP_VERSION(14, 0, 0):
+ case IP_VERSION(14, 0, 1):
+ case IP_VERSION(14, 0, 2):
+ case IP_VERSION(14, 0, 3):
+ case IP_VERSION(14, 0, 4):
+ case IP_VERSION(14, 0, 5):
+ amdgpu_device_ip_block_add(adev, &smu_v14_0_ip_block);
+ break;
+ default:
+ dev_err(adev->dev,
+ "Failed to add smu ip block(MP1_HWIP:0x%x)\n",
+ amdgpu_ip_version(adev, MP1_HWIP, 0));
+ return -EINVAL;
+ }
+ return 0;
+}
+
+#if defined(CONFIG_DRM_AMD_DC)
+static void amdgpu_discovery_set_sriov_display(struct amdgpu_device *adev)
+{
+ amdgpu_device_set_sriov_virtual_display(adev);
+ amdgpu_device_ip_block_add(adev, &amdgpu_vkms_ip_block);
+}
+#endif
+
+static int amdgpu_discovery_set_display_ip_blocks(struct amdgpu_device *adev)
+{
+ if (adev->enable_virtual_display) {
+ amdgpu_device_ip_block_add(adev, &amdgpu_vkms_ip_block);
+ return 0;
+ }
+
+ if (!amdgpu_device_has_dc_support(adev))
+ return 0;
+
+#if defined(CONFIG_DRM_AMD_DC)
+ if (amdgpu_ip_version(adev, DCE_HWIP, 0)) {
+ switch (amdgpu_ip_version(adev, DCE_HWIP, 0)) {
+ case IP_VERSION(1, 0, 0):
+ case IP_VERSION(1, 0, 1):
+ case IP_VERSION(2, 0, 2):
+ case IP_VERSION(2, 0, 0):
+ case IP_VERSION(2, 0, 3):
+ case IP_VERSION(2, 1, 0):
+ case IP_VERSION(3, 0, 0):
+ case IP_VERSION(3, 0, 2):
+ case IP_VERSION(3, 0, 3):
+ case IP_VERSION(3, 0, 1):
+ case IP_VERSION(3, 1, 2):
+ case IP_VERSION(3, 1, 3):
+ case IP_VERSION(3, 1, 4):
+ case IP_VERSION(3, 1, 5):
+ case IP_VERSION(3, 1, 6):
+ case IP_VERSION(3, 2, 0):
+ case IP_VERSION(3, 2, 1):
+ case IP_VERSION(3, 5, 0):
+ case IP_VERSION(3, 5, 1):
+ case IP_VERSION(3, 6, 0):
+ case IP_VERSION(4, 1, 0):
+ /* TODO: Fix IP version. DC code expects version 4.0.1 */
+ if (adev->ip_versions[DCE_HWIP][0] == IP_VERSION(4, 1, 0))
+ adev->ip_versions[DCE_HWIP][0] = IP_VERSION(4, 0, 1);
+
+ if (amdgpu_sriov_vf(adev))
+ amdgpu_discovery_set_sriov_display(adev);
+ else
+ amdgpu_device_ip_block_add(adev, &dm_ip_block);
+ break;
+ default:
+ dev_err(adev->dev,
+ "Failed to add dm ip block(DCE_HWIP:0x%x)\n",
+ amdgpu_ip_version(adev, DCE_HWIP, 0));
+ return -EINVAL;
+ }
+ } else if (amdgpu_ip_version(adev, DCI_HWIP, 0)) {
+ switch (amdgpu_ip_version(adev, DCI_HWIP, 0)) {
+ case IP_VERSION(12, 0, 0):
+ case IP_VERSION(12, 0, 1):
+ case IP_VERSION(12, 1, 0):
+ if (amdgpu_sriov_vf(adev))
+ amdgpu_discovery_set_sriov_display(adev);
+ else
+ amdgpu_device_ip_block_add(adev, &dm_ip_block);
+ break;
+ default:
+ dev_err(adev->dev,
+ "Failed to add dm ip block(DCI_HWIP:0x%x)\n",
+ amdgpu_ip_version(adev, DCI_HWIP, 0));
+ return -EINVAL;
+ }
+ }
+#endif
+ return 0;
+}
+
+static int amdgpu_discovery_set_gc_ip_blocks(struct amdgpu_device *adev)
+{
+ switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
+ case IP_VERSION(9, 0, 1):
+ case IP_VERSION(9, 1, 0):
+ case IP_VERSION(9, 2, 1):
+ case IP_VERSION(9, 2, 2):
+ case IP_VERSION(9, 3, 0):
+ case IP_VERSION(9, 4, 0):
+ case IP_VERSION(9, 4, 1):
+ case IP_VERSION(9, 4, 2):
+ amdgpu_device_ip_block_add(adev, &gfx_v9_0_ip_block);
+ break;
+ case IP_VERSION(9, 4, 3):
+ case IP_VERSION(9, 4, 4):
+ case IP_VERSION(9, 5, 0):
+ amdgpu_device_ip_block_add(adev, &gfx_v9_4_3_ip_block);
+ break;
+ case IP_VERSION(10, 1, 10):
+ case IP_VERSION(10, 1, 2):
+ case IP_VERSION(10, 1, 1):
+ case IP_VERSION(10, 1, 3):
+ case IP_VERSION(10, 1, 4):
+ case IP_VERSION(10, 3, 0):
+ case IP_VERSION(10, 3, 2):
+ case IP_VERSION(10, 3, 1):
+ case IP_VERSION(10, 3, 4):
+ case IP_VERSION(10, 3, 5):
+ case IP_VERSION(10, 3, 6):
+ case IP_VERSION(10, 3, 3):
+ case IP_VERSION(10, 3, 7):
+ amdgpu_device_ip_block_add(adev, &gfx_v10_0_ip_block);
+ break;
+ case IP_VERSION(11, 0, 0):
+ case IP_VERSION(11, 0, 1):
+ case IP_VERSION(11, 0, 2):
+ case IP_VERSION(11, 0, 3):
+ case IP_VERSION(11, 0, 4):
+ case IP_VERSION(11, 5, 0):
+ case IP_VERSION(11, 5, 1):
+ case IP_VERSION(11, 5, 2):
+ case IP_VERSION(11, 5, 3):
+ amdgpu_device_ip_block_add(adev, &gfx_v11_0_ip_block);
+ break;
+ case IP_VERSION(12, 0, 0):
+ case IP_VERSION(12, 0, 1):
+ amdgpu_device_ip_block_add(adev, &gfx_v12_0_ip_block);
+ break;
+ default:
+ dev_err(adev->dev, "Failed to add gfx ip block(GC_HWIP:0x%x)\n",
+ amdgpu_ip_version(adev, GC_HWIP, 0));
+ return -EINVAL;
+ }
+ return 0;
+}
+
+static int amdgpu_discovery_set_sdma_ip_blocks(struct amdgpu_device *adev)
+{
+ switch (amdgpu_ip_version(adev, SDMA0_HWIP, 0)) {
+ case IP_VERSION(4, 0, 0):
+ case IP_VERSION(4, 0, 1):
+ case IP_VERSION(4, 1, 0):
+ case IP_VERSION(4, 1, 1):
+ case IP_VERSION(4, 1, 2):
+ case IP_VERSION(4, 2, 0):
+ case IP_VERSION(4, 2, 2):
+ case IP_VERSION(4, 4, 0):
+ amdgpu_device_ip_block_add(adev, &sdma_v4_0_ip_block);
+ break;
+ case IP_VERSION(4, 4, 2):
+ case IP_VERSION(4, 4, 5):
+ case IP_VERSION(4, 4, 4):
+ amdgpu_device_ip_block_add(adev, &sdma_v4_4_2_ip_block);
+ break;
+ case IP_VERSION(5, 0, 0):
+ case IP_VERSION(5, 0, 1):
+ case IP_VERSION(5, 0, 2):
+ case IP_VERSION(5, 0, 5):
+ amdgpu_device_ip_block_add(adev, &sdma_v5_0_ip_block);
+ break;
+ case IP_VERSION(5, 2, 0):
+ case IP_VERSION(5, 2, 2):
+ case IP_VERSION(5, 2, 4):
+ case IP_VERSION(5, 2, 5):
+ case IP_VERSION(5, 2, 6):
+ case IP_VERSION(5, 2, 3):
+ case IP_VERSION(5, 2, 1):
+ case IP_VERSION(5, 2, 7):
+ amdgpu_device_ip_block_add(adev, &sdma_v5_2_ip_block);
+ break;
+ case IP_VERSION(6, 0, 0):
+ case IP_VERSION(6, 0, 1):
+ case IP_VERSION(6, 0, 2):
+ case IP_VERSION(6, 0, 3):
+ case IP_VERSION(6, 1, 0):
+ case IP_VERSION(6, 1, 1):
+ case IP_VERSION(6, 1, 2):
+ case IP_VERSION(6, 1, 3):
+ amdgpu_device_ip_block_add(adev, &sdma_v6_0_ip_block);
+ break;
+ case IP_VERSION(7, 0, 0):
+ case IP_VERSION(7, 0, 1):
+ amdgpu_device_ip_block_add(adev, &sdma_v7_0_ip_block);
+ break;
+ default:
+ dev_err(adev->dev,
+ "Failed to add sdma ip block(SDMA0_HWIP:0x%x)\n",
+ amdgpu_ip_version(adev, SDMA0_HWIP, 0));
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+static int amdgpu_discovery_set_ras_ip_blocks(struct amdgpu_device *adev)
+{
+ switch (amdgpu_ip_version(adev, MP0_HWIP, 0)) {
+ case IP_VERSION(13, 0, 6):
+ case IP_VERSION(13, 0, 12):
+ case IP_VERSION(13, 0, 14):
+ amdgpu_device_ip_block_add(adev, &ras_v1_0_ip_block);
+ break;
+ default:
+ break;
+ }
+ return 0;
+}
+
+static int amdgpu_discovery_set_mm_ip_blocks(struct amdgpu_device *adev)
+{
+ if (amdgpu_ip_version(adev, VCE_HWIP, 0)) {
+ switch (amdgpu_ip_version(adev, UVD_HWIP, 0)) {
+ case IP_VERSION(7, 0, 0):
+ case IP_VERSION(7, 2, 0):
+ /* UVD is not supported on vega20 SR-IOV */
+ if (!(adev->asic_type == CHIP_VEGA20 && amdgpu_sriov_vf(adev)))
+ amdgpu_device_ip_block_add(adev, &uvd_v7_0_ip_block);
+ break;
+ default:
+ dev_err(adev->dev,
+ "Failed to add uvd v7 ip block(UVD_HWIP:0x%x)\n",
+ amdgpu_ip_version(adev, UVD_HWIP, 0));
+ return -EINVAL;
+ }
+ switch (amdgpu_ip_version(adev, VCE_HWIP, 0)) {
+ case IP_VERSION(4, 0, 0):
+ case IP_VERSION(4, 1, 0):
+ /* VCE is not supported on vega20 SR-IOV */
+ if (!(adev->asic_type == CHIP_VEGA20 && amdgpu_sriov_vf(adev)))
+ amdgpu_device_ip_block_add(adev, &vce_v4_0_ip_block);
+ break;
+ default:
+ dev_err(adev->dev,
+ "Failed to add VCE v4 ip block(VCE_HWIP:0x%x)\n",
+ amdgpu_ip_version(adev, VCE_HWIP, 0));
+ return -EINVAL;
+ }
+ } else {
+ switch (amdgpu_ip_version(adev, UVD_HWIP, 0)) {
+ case IP_VERSION(1, 0, 0):
+ case IP_VERSION(1, 0, 1):
+ amdgpu_device_ip_block_add(adev, &vcn_v1_0_ip_block);
+ break;
+ case IP_VERSION(2, 0, 0):
+ case IP_VERSION(2, 0, 2):
+ case IP_VERSION(2, 2, 0):
+ amdgpu_device_ip_block_add(adev, &vcn_v2_0_ip_block);
+ if (!amdgpu_sriov_vf(adev))
+ amdgpu_device_ip_block_add(adev, &jpeg_v2_0_ip_block);
+ break;
+ case IP_VERSION(2, 0, 3):
+ break;
+ case IP_VERSION(2, 5, 0):
+ amdgpu_device_ip_block_add(adev, &vcn_v2_5_ip_block);
+ amdgpu_device_ip_block_add(adev, &jpeg_v2_5_ip_block);
+ break;
+ case IP_VERSION(2, 6, 0):
+ amdgpu_device_ip_block_add(adev, &vcn_v2_6_ip_block);
+ amdgpu_device_ip_block_add(adev, &jpeg_v2_6_ip_block);
+ break;
+ case IP_VERSION(3, 0, 0):
+ case IP_VERSION(3, 0, 16):
+ case IP_VERSION(3, 1, 1):
+ case IP_VERSION(3, 1, 2):
+ case IP_VERSION(3, 0, 2):
+ amdgpu_device_ip_block_add(adev, &vcn_v3_0_ip_block);
+ if (!amdgpu_sriov_vf(adev))
+ amdgpu_device_ip_block_add(adev, &jpeg_v3_0_ip_block);
+ break;
+ case IP_VERSION(3, 0, 33):
+ amdgpu_device_ip_block_add(adev, &vcn_v3_0_ip_block);
+ break;
+ case IP_VERSION(4, 0, 0):
+ case IP_VERSION(4, 0, 2):
+ case IP_VERSION(4, 0, 4):
+ amdgpu_device_ip_block_add(adev, &vcn_v4_0_ip_block);
+ amdgpu_device_ip_block_add(adev, &jpeg_v4_0_ip_block);
+ break;
+ case IP_VERSION(4, 0, 3):
+ amdgpu_device_ip_block_add(adev, &vcn_v4_0_3_ip_block);
+ amdgpu_device_ip_block_add(adev, &jpeg_v4_0_3_ip_block);
+ break;
+ case IP_VERSION(4, 0, 5):
+ case IP_VERSION(4, 0, 6):
+ amdgpu_device_ip_block_add(adev, &vcn_v4_0_5_ip_block);
+ amdgpu_device_ip_block_add(adev, &jpeg_v4_0_5_ip_block);
+ break;
+ case IP_VERSION(5, 0, 0):
+ amdgpu_device_ip_block_add(adev, &vcn_v5_0_0_ip_block);
+ amdgpu_device_ip_block_add(adev, &jpeg_v5_0_0_ip_block);
+ break;
+ case IP_VERSION(5, 0, 1):
+ amdgpu_device_ip_block_add(adev, &vcn_v5_0_1_ip_block);
+ amdgpu_device_ip_block_add(adev, &jpeg_v5_0_1_ip_block);
+ break;
+ default:
+ dev_err(adev->dev,
+ "Failed to add vcn/jpeg ip block(UVD_HWIP:0x%x)\n",
+ amdgpu_ip_version(adev, UVD_HWIP, 0));
+ return -EINVAL;
+ }
+ }
+ return 0;
+}
+
+static int amdgpu_discovery_set_mes_ip_blocks(struct amdgpu_device *adev)
+{
+ switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
+ case IP_VERSION(11, 0, 0):
+ case IP_VERSION(11, 0, 1):
+ case IP_VERSION(11, 0, 2):
+ case IP_VERSION(11, 0, 3):
+ case IP_VERSION(11, 0, 4):
+ case IP_VERSION(11, 5, 0):
+ case IP_VERSION(11, 5, 1):
+ case IP_VERSION(11, 5, 2):
+ case IP_VERSION(11, 5, 3):
+ amdgpu_device_ip_block_add(adev, &mes_v11_0_ip_block);
+ adev->enable_mes = true;
+ adev->enable_mes_kiq = true;
+ break;
+ case IP_VERSION(12, 0, 0):
+ case IP_VERSION(12, 0, 1):
+ amdgpu_device_ip_block_add(adev, &mes_v12_0_ip_block);
+ adev->enable_mes = true;
+ adev->enable_mes_kiq = true;
+ if (amdgpu_uni_mes)
+ adev->enable_uni_mes = true;
+ break;
+ default:
+ break;
+ }
+ return 0;
+}
+
+static void amdgpu_discovery_init_soc_config(struct amdgpu_device *adev)
+{
+ switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
+ case IP_VERSION(9, 4, 3):
+ case IP_VERSION(9, 4, 4):
+ case IP_VERSION(9, 5, 0):
+ aqua_vanjaram_init_soc_config(adev);
+ break;
+ default:
+ break;
+ }
+}
+
+static int amdgpu_discovery_set_vpe_ip_blocks(struct amdgpu_device *adev)
+{
+ switch (amdgpu_ip_version(adev, VPE_HWIP, 0)) {
+ case IP_VERSION(6, 1, 0):
+ case IP_VERSION(6, 1, 1):
+ case IP_VERSION(6, 1, 3):
+ amdgpu_device_ip_block_add(adev, &vpe_v6_1_ip_block);
+ break;
+ default:
+ break;
+ }
+
+ return 0;
+}
+
+static int amdgpu_discovery_set_umsch_mm_ip_blocks(struct amdgpu_device *adev)
+{
+ switch (amdgpu_ip_version(adev, VCN_HWIP, 0)) {
+ case IP_VERSION(4, 0, 5):
+ case IP_VERSION(4, 0, 6):
+ if (amdgpu_umsch_mm & 0x1) {
+ amdgpu_device_ip_block_add(adev, &umsch_mm_v4_0_ip_block);
+ adev->enable_umsch_mm = true;
+ }
+ break;
+ default:
+ break;
+ }
+
+ return 0;
+}
+
+static int amdgpu_discovery_set_isp_ip_blocks(struct amdgpu_device *adev)
+{
+#if defined(CONFIG_DRM_AMD_ISP)
+ switch (amdgpu_ip_version(adev, ISP_HWIP, 0)) {
+ case IP_VERSION(4, 1, 0):
+ amdgpu_device_ip_block_add(adev, &isp_v4_1_0_ip_block);
+ break;
+ case IP_VERSION(4, 1, 1):
+ amdgpu_device_ip_block_add(adev, &isp_v4_1_1_ip_block);
+ break;
+ default:
+ break;
+ }
+#endif
+
+ return 0;
+}
+
+int amdgpu_discovery_set_ip_blocks(struct amdgpu_device *adev)
+{
+ int r;
+
+ switch (adev->asic_type) {
+ case CHIP_VEGA10:
+ /* This is not fatal. We only need the discovery
+ * binary for sysfs. We don't need it for a
+ * functional system.
+ */
+ amdgpu_discovery_init(adev);
+ vega10_reg_base_init(adev);
+ adev->sdma.num_instances = 2;
+ adev->sdma.sdma_mask = 3;
+ adev->gmc.num_umc = 4;
+ adev->gfx.xcc_mask = 1;
+ adev->ip_versions[MMHUB_HWIP][0] = IP_VERSION(9, 0, 0);
+ adev->ip_versions[ATHUB_HWIP][0] = IP_VERSION(9, 0, 0);
+ adev->ip_versions[OSSSYS_HWIP][0] = IP_VERSION(4, 0, 0);
+ adev->ip_versions[HDP_HWIP][0] = IP_VERSION(4, 0, 0);
+ adev->ip_versions[SDMA0_HWIP][0] = IP_VERSION(4, 0, 0);
+ adev->ip_versions[SDMA1_HWIP][0] = IP_VERSION(4, 0, 0);
+ adev->ip_versions[DF_HWIP][0] = IP_VERSION(2, 1, 0);
+ adev->ip_versions[NBIO_HWIP][0] = IP_VERSION(6, 1, 0);
+ adev->ip_versions[UMC_HWIP][0] = IP_VERSION(6, 0, 0);
+ adev->ip_versions[MP0_HWIP][0] = IP_VERSION(9, 0, 0);
+ adev->ip_versions[MP1_HWIP][0] = IP_VERSION(9, 0, 0);
+ adev->ip_versions[THM_HWIP][0] = IP_VERSION(9, 0, 0);
+ adev->ip_versions[SMUIO_HWIP][0] = IP_VERSION(9, 0, 0);
+ adev->ip_versions[GC_HWIP][0] = IP_VERSION(9, 0, 1);
+ adev->ip_versions[UVD_HWIP][0] = IP_VERSION(7, 0, 0);
+ adev->ip_versions[VCE_HWIP][0] = IP_VERSION(4, 0, 0);
+ adev->ip_versions[DCI_HWIP][0] = IP_VERSION(12, 0, 0);
+ break;
+ case CHIP_VEGA12:
+ /* This is not fatal. We only need the discovery
+ * binary for sysfs. We don't need it for a
+ * functional system.
+ */
+ amdgpu_discovery_init(adev);
+ vega10_reg_base_init(adev);
+ adev->sdma.num_instances = 2;
+ adev->sdma.sdma_mask = 3;
+ adev->gmc.num_umc = 4;
+ adev->gfx.xcc_mask = 1;
+ adev->ip_versions[MMHUB_HWIP][0] = IP_VERSION(9, 3, 0);
+ adev->ip_versions[ATHUB_HWIP][0] = IP_VERSION(9, 3, 0);
+ adev->ip_versions[OSSSYS_HWIP][0] = IP_VERSION(4, 0, 1);
+ adev->ip_versions[HDP_HWIP][0] = IP_VERSION(4, 0, 1);
+ adev->ip_versions[SDMA0_HWIP][0] = IP_VERSION(4, 0, 1);
+ adev->ip_versions[SDMA1_HWIP][0] = IP_VERSION(4, 0, 1);
+ adev->ip_versions[DF_HWIP][0] = IP_VERSION(2, 5, 0);
+ adev->ip_versions[NBIO_HWIP][0] = IP_VERSION(6, 2, 0);
+ adev->ip_versions[UMC_HWIP][0] = IP_VERSION(6, 1, 0);
+ adev->ip_versions[MP0_HWIP][0] = IP_VERSION(9, 0, 0);
+ adev->ip_versions[MP1_HWIP][0] = IP_VERSION(9, 0, 0);
+ adev->ip_versions[THM_HWIP][0] = IP_VERSION(9, 0, 0);
+ adev->ip_versions[SMUIO_HWIP][0] = IP_VERSION(9, 0, 1);
+ adev->ip_versions[GC_HWIP][0] = IP_VERSION(9, 2, 1);
+ adev->ip_versions[UVD_HWIP][0] = IP_VERSION(7, 0, 0);
+ adev->ip_versions[VCE_HWIP][0] = IP_VERSION(4, 0, 0);
+ adev->ip_versions[DCI_HWIP][0] = IP_VERSION(12, 0, 1);
+ break;
+ case CHIP_RAVEN:
+ /* This is not fatal. We only need the discovery
+ * binary for sysfs. We don't need it for a
+ * functional system.
+ */
+ amdgpu_discovery_init(adev);
+ vega10_reg_base_init(adev);
+ adev->sdma.num_instances = 1;
+ adev->sdma.sdma_mask = 1;
+ adev->vcn.num_vcn_inst = 1;
+ adev->gmc.num_umc = 2;
+ adev->gfx.xcc_mask = 1;
+ if (adev->apu_flags & AMD_APU_IS_RAVEN2) {
+ adev->ip_versions[MMHUB_HWIP][0] = IP_VERSION(9, 2, 0);
+ adev->ip_versions[ATHUB_HWIP][0] = IP_VERSION(9, 2, 0);
+ adev->ip_versions[OSSSYS_HWIP][0] = IP_VERSION(4, 1, 1);
+ adev->ip_versions[HDP_HWIP][0] = IP_VERSION(4, 1, 1);
+ adev->ip_versions[SDMA0_HWIP][0] = IP_VERSION(4, 1, 1);
+ adev->ip_versions[DF_HWIP][0] = IP_VERSION(2, 1, 1);
+ adev->ip_versions[NBIO_HWIP][0] = IP_VERSION(7, 0, 1);
+ adev->ip_versions[UMC_HWIP][0] = IP_VERSION(7, 5, 0);
+ adev->ip_versions[MP0_HWIP][0] = IP_VERSION(10, 0, 1);
+ adev->ip_versions[MP1_HWIP][0] = IP_VERSION(10, 0, 1);
+ adev->ip_versions[THM_HWIP][0] = IP_VERSION(10, 1, 0);
+ adev->ip_versions[SMUIO_HWIP][0] = IP_VERSION(10, 0, 1);
+ adev->ip_versions[GC_HWIP][0] = IP_VERSION(9, 2, 2);
+ adev->ip_versions[UVD_HWIP][0] = IP_VERSION(1, 0, 1);
+ adev->ip_versions[DCE_HWIP][0] = IP_VERSION(1, 0, 1);
+ adev->ip_versions[ISP_HWIP][0] = IP_VERSION(2, 0, 0);
+ } else {
+ adev->ip_versions[MMHUB_HWIP][0] = IP_VERSION(9, 1, 0);
+ adev->ip_versions[ATHUB_HWIP][0] = IP_VERSION(9, 1, 0);
+ adev->ip_versions[OSSSYS_HWIP][0] = IP_VERSION(4, 1, 0);
+ adev->ip_versions[HDP_HWIP][0] = IP_VERSION(4, 1, 0);
+ adev->ip_versions[SDMA0_HWIP][0] = IP_VERSION(4, 1, 0);
+ adev->ip_versions[DF_HWIP][0] = IP_VERSION(2, 1, 0);
+ adev->ip_versions[NBIO_HWIP][0] = IP_VERSION(7, 0, 0);
+ adev->ip_versions[UMC_HWIP][0] = IP_VERSION(7, 0, 0);
+ adev->ip_versions[MP0_HWIP][0] = IP_VERSION(10, 0, 0);
+ adev->ip_versions[MP1_HWIP][0] = IP_VERSION(10, 0, 0);
+ adev->ip_versions[THM_HWIP][0] = IP_VERSION(10, 0, 0);
+ adev->ip_versions[SMUIO_HWIP][0] = IP_VERSION(10, 0, 0);
+ adev->ip_versions[GC_HWIP][0] = IP_VERSION(9, 1, 0);
+ adev->ip_versions[UVD_HWIP][0] = IP_VERSION(1, 0, 0);
+ adev->ip_versions[DCE_HWIP][0] = IP_VERSION(1, 0, 0);
+ adev->ip_versions[ISP_HWIP][0] = IP_VERSION(2, 0, 0);
+ }
+ break;
+ case CHIP_VEGA20:
+ /* This is not fatal. We only need the discovery
+ * binary for sysfs. We don't need it for a
+ * functional system.
+ */
+ amdgpu_discovery_init(adev);
+ vega20_reg_base_init(adev);
+ adev->sdma.num_instances = 2;
+ adev->sdma.sdma_mask = 3;
+ adev->gmc.num_umc = 8;
+ adev->gfx.xcc_mask = 1;
+ adev->ip_versions[MMHUB_HWIP][0] = IP_VERSION(9, 4, 0);
+ adev->ip_versions[ATHUB_HWIP][0] = IP_VERSION(9, 4, 0);
+ adev->ip_versions[OSSSYS_HWIP][0] = IP_VERSION(4, 2, 0);
+ adev->ip_versions[HDP_HWIP][0] = IP_VERSION(4, 2, 0);
+ adev->ip_versions[SDMA0_HWIP][0] = IP_VERSION(4, 2, 0);
+ adev->ip_versions[SDMA1_HWIP][0] = IP_VERSION(4, 2, 0);
+ adev->ip_versions[DF_HWIP][0] = IP_VERSION(3, 6, 0);
+ adev->ip_versions[NBIO_HWIP][0] = IP_VERSION(7, 4, 0);
+ adev->ip_versions[UMC_HWIP][0] = IP_VERSION(6, 1, 1);
+ adev->ip_versions[MP0_HWIP][0] = IP_VERSION(11, 0, 2);
+ adev->ip_versions[MP1_HWIP][0] = IP_VERSION(11, 0, 2);
+ adev->ip_versions[THM_HWIP][0] = IP_VERSION(11, 0, 2);
+ adev->ip_versions[SMUIO_HWIP][0] = IP_VERSION(11, 0, 2);
+ adev->ip_versions[GC_HWIP][0] = IP_VERSION(9, 4, 0);
+ adev->ip_versions[UVD_HWIP][0] = IP_VERSION(7, 2, 0);
+ adev->ip_versions[UVD_HWIP][1] = IP_VERSION(7, 2, 0);
+ adev->ip_versions[VCE_HWIP][0] = IP_VERSION(4, 1, 0);
+ adev->ip_versions[DCI_HWIP][0] = IP_VERSION(12, 1, 0);
+ break;
+ case CHIP_ARCTURUS:
+ /* This is not fatal. We only need the discovery
+ * binary for sysfs. We don't need it for a
+ * functional system.
+ */
+ amdgpu_discovery_init(adev);
+ arct_reg_base_init(adev);
+ adev->sdma.num_instances = 8;
+ adev->sdma.sdma_mask = 0xff;
+ adev->vcn.num_vcn_inst = 2;
+ adev->gmc.num_umc = 8;
+ adev->gfx.xcc_mask = 1;
+ adev->ip_versions[MMHUB_HWIP][0] = IP_VERSION(9, 4, 1);
+ adev->ip_versions[ATHUB_HWIP][0] = IP_VERSION(9, 4, 1);
+ adev->ip_versions[OSSSYS_HWIP][0] = IP_VERSION(4, 2, 1);
+ adev->ip_versions[HDP_HWIP][0] = IP_VERSION(4, 2, 1);
+ adev->ip_versions[SDMA0_HWIP][0] = IP_VERSION(4, 2, 2);
+ adev->ip_versions[SDMA1_HWIP][0] = IP_VERSION(4, 2, 2);
+ adev->ip_versions[SDMA1_HWIP][1] = IP_VERSION(4, 2, 2);
+ adev->ip_versions[SDMA1_HWIP][2] = IP_VERSION(4, 2, 2);
+ adev->ip_versions[SDMA1_HWIP][3] = IP_VERSION(4, 2, 2);
+ adev->ip_versions[SDMA1_HWIP][4] = IP_VERSION(4, 2, 2);
+ adev->ip_versions[SDMA1_HWIP][5] = IP_VERSION(4, 2, 2);
+ adev->ip_versions[SDMA1_HWIP][6] = IP_VERSION(4, 2, 2);
+ adev->ip_versions[DF_HWIP][0] = IP_VERSION(3, 6, 1);
+ adev->ip_versions[NBIO_HWIP][0] = IP_VERSION(7, 4, 1);
+ adev->ip_versions[UMC_HWIP][0] = IP_VERSION(6, 1, 2);
+ adev->ip_versions[MP0_HWIP][0] = IP_VERSION(11, 0, 4);
+ adev->ip_versions[MP1_HWIP][0] = IP_VERSION(11, 0, 2);
+ adev->ip_versions[THM_HWIP][0] = IP_VERSION(11, 0, 3);
+ adev->ip_versions[SMUIO_HWIP][0] = IP_VERSION(11, 0, 3);
+ adev->ip_versions[GC_HWIP][0] = IP_VERSION(9, 4, 1);
+ adev->ip_versions[UVD_HWIP][0] = IP_VERSION(2, 5, 0);
+ adev->ip_versions[UVD_HWIP][1] = IP_VERSION(2, 5, 0);
+ break;
+ case CHIP_ALDEBARAN:
+ /* This is not fatal. We only need the discovery
+ * binary for sysfs. We don't need it for a
+ * functional system.
+ */
+ amdgpu_discovery_init(adev);
+ aldebaran_reg_base_init(adev);
+ adev->sdma.num_instances = 5;
+ adev->sdma.sdma_mask = 0x1f;
+ adev->vcn.num_vcn_inst = 2;
+ adev->gmc.num_umc = 4;
+ adev->gfx.xcc_mask = 1;
+ adev->ip_versions[MMHUB_HWIP][0] = IP_VERSION(9, 4, 2);
+ adev->ip_versions[ATHUB_HWIP][0] = IP_VERSION(9, 4, 2);
+ adev->ip_versions[OSSSYS_HWIP][0] = IP_VERSION(4, 4, 0);
+ adev->ip_versions[HDP_HWIP][0] = IP_VERSION(4, 4, 0);
+ adev->ip_versions[SDMA0_HWIP][0] = IP_VERSION(4, 4, 0);
+ adev->ip_versions[SDMA0_HWIP][1] = IP_VERSION(4, 4, 0);
+ adev->ip_versions[SDMA0_HWIP][2] = IP_VERSION(4, 4, 0);
+ adev->ip_versions[SDMA0_HWIP][3] = IP_VERSION(4, 4, 0);
+ adev->ip_versions[SDMA0_HWIP][4] = IP_VERSION(4, 4, 0);
+ adev->ip_versions[DF_HWIP][0] = IP_VERSION(3, 6, 2);
+ adev->ip_versions[NBIO_HWIP][0] = IP_VERSION(7, 4, 4);
+ adev->ip_versions[UMC_HWIP][0] = IP_VERSION(6, 7, 0);
+ adev->ip_versions[MP0_HWIP][0] = IP_VERSION(13, 0, 2);
+ adev->ip_versions[MP1_HWIP][0] = IP_VERSION(13, 0, 2);
+ adev->ip_versions[THM_HWIP][0] = IP_VERSION(13, 0, 2);
+ adev->ip_versions[SMUIO_HWIP][0] = IP_VERSION(13, 0, 2);
+ adev->ip_versions[GC_HWIP][0] = IP_VERSION(9, 4, 2);
+ adev->ip_versions[UVD_HWIP][0] = IP_VERSION(2, 6, 0);
+ adev->ip_versions[UVD_HWIP][1] = IP_VERSION(2, 6, 0);
+ adev->ip_versions[XGMI_HWIP][0] = IP_VERSION(6, 1, 0);
+ break;
+ case CHIP_CYAN_SKILLFISH:
+ if (adev->apu_flags & AMD_APU_IS_CYAN_SKILLFISH2) {
+ r = amdgpu_discovery_reg_base_init(adev);
+ if (r)
+ return -EINVAL;
+
+ amdgpu_discovery_harvest_ip(adev);
+ amdgpu_discovery_get_gfx_info(adev);
+ amdgpu_discovery_get_mall_info(adev);
+ amdgpu_discovery_get_vcn_info(adev);
+ } else {
+ cyan_skillfish_reg_base_init(adev);
+ adev->sdma.num_instances = 2;
+ adev->sdma.sdma_mask = 3;
+ adev->gfx.xcc_mask = 1;
+ adev->ip_versions[MMHUB_HWIP][0] = IP_VERSION(2, 0, 3);
+ adev->ip_versions[ATHUB_HWIP][0] = IP_VERSION(2, 0, 3);
+ adev->ip_versions[OSSSYS_HWIP][0] = IP_VERSION(5, 0, 1);
+ adev->ip_versions[HDP_HWIP][0] = IP_VERSION(5, 0, 1);
+ adev->ip_versions[SDMA0_HWIP][0] = IP_VERSION(5, 0, 1);
+ adev->ip_versions[SDMA1_HWIP][1] = IP_VERSION(5, 0, 1);
+ adev->ip_versions[DF_HWIP][0] = IP_VERSION(3, 5, 0);
+ adev->ip_versions[NBIO_HWIP][0] = IP_VERSION(2, 1, 1);
+ adev->ip_versions[UMC_HWIP][0] = IP_VERSION(8, 1, 1);
+ adev->ip_versions[MP0_HWIP][0] = IP_VERSION(11, 0, 8);
+ adev->ip_versions[MP1_HWIP][0] = IP_VERSION(11, 0, 8);
+ adev->ip_versions[THM_HWIP][0] = IP_VERSION(11, 0, 1);
+ adev->ip_versions[SMUIO_HWIP][0] = IP_VERSION(11, 0, 8);
+ adev->ip_versions[GC_HWIP][0] = IP_VERSION(10, 1, 3);
+ adev->ip_versions[UVD_HWIP][0] = IP_VERSION(2, 0, 3);
+ }
+ break;
+ default:
+ r = amdgpu_discovery_reg_base_init(adev);
+ if (r) {
+ drm_err(&adev->ddev, "discovery failed: %d\n", r);
+ return r;
+ }
+
+ amdgpu_discovery_harvest_ip(adev);
+ amdgpu_discovery_get_gfx_info(adev);
+ amdgpu_discovery_get_mall_info(adev);
+ amdgpu_discovery_get_vcn_info(adev);
+ break;
+ }
+
+ amdgpu_discovery_init_soc_config(adev);
+ amdgpu_discovery_sysfs_init(adev);
+
+ switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
+ case IP_VERSION(9, 0, 1):
+ case IP_VERSION(9, 2, 1):
+ case IP_VERSION(9, 4, 0):
+ case IP_VERSION(9, 4, 1):
+ case IP_VERSION(9, 4, 2):
+ case IP_VERSION(9, 4, 3):
+ case IP_VERSION(9, 4, 4):
+ case IP_VERSION(9, 5, 0):
+ adev->family = AMDGPU_FAMILY_AI;
+ break;
+ case IP_VERSION(9, 1, 0):
+ case IP_VERSION(9, 2, 2):
+ case IP_VERSION(9, 3, 0):
+ adev->family = AMDGPU_FAMILY_RV;
+ break;
+ case IP_VERSION(10, 1, 10):
+ case IP_VERSION(10, 1, 1):
+ case IP_VERSION(10, 1, 2):
+ case IP_VERSION(10, 1, 3):
+ case IP_VERSION(10, 1, 4):
+ case IP_VERSION(10, 3, 0):
+ case IP_VERSION(10, 3, 2):
+ case IP_VERSION(10, 3, 4):
+ case IP_VERSION(10, 3, 5):
+ adev->family = AMDGPU_FAMILY_NV;
+ break;
+ case IP_VERSION(10, 3, 1):
+ adev->family = AMDGPU_FAMILY_VGH;
+ adev->apu_flags |= AMD_APU_IS_VANGOGH;
+ break;
+ case IP_VERSION(10, 3, 3):
+ adev->family = AMDGPU_FAMILY_YC;
+ break;
+ case IP_VERSION(10, 3, 6):
+ adev->family = AMDGPU_FAMILY_GC_10_3_6;
+ break;
+ case IP_VERSION(10, 3, 7):
+ adev->family = AMDGPU_FAMILY_GC_10_3_7;
+ break;
+ case IP_VERSION(11, 0, 0):
+ case IP_VERSION(11, 0, 2):
+ case IP_VERSION(11, 0, 3):
+ adev->family = AMDGPU_FAMILY_GC_11_0_0;
+ break;
+ case IP_VERSION(11, 0, 1):
+ case IP_VERSION(11, 0, 4):
+ adev->family = AMDGPU_FAMILY_GC_11_0_1;
+ break;
+ case IP_VERSION(11, 5, 0):
+ case IP_VERSION(11, 5, 1):
+ case IP_VERSION(11, 5, 2):
+ case IP_VERSION(11, 5, 3):
+ adev->family = AMDGPU_FAMILY_GC_11_5_0;
+ break;
+ case IP_VERSION(12, 0, 0):
+ case IP_VERSION(12, 0, 1):
+ adev->family = AMDGPU_FAMILY_GC_12_0_0;
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
+ case IP_VERSION(9, 1, 0):
+ case IP_VERSION(9, 2, 2):
+ case IP_VERSION(9, 3, 0):
+ case IP_VERSION(10, 1, 3):
+ case IP_VERSION(10, 1, 4):
+ case IP_VERSION(10, 3, 1):
+ case IP_VERSION(10, 3, 3):
+ case IP_VERSION(10, 3, 6):
+ case IP_VERSION(10, 3, 7):
+ case IP_VERSION(11, 0, 1):
+ case IP_VERSION(11, 0, 4):
+ case IP_VERSION(11, 5, 0):
+ case IP_VERSION(11, 5, 1):
+ case IP_VERSION(11, 5, 2):
+ case IP_VERSION(11, 5, 3):
+ adev->flags |= AMD_IS_APU;
+ break;
+ default:
+ break;
+ }
+
+ /* set NBIO version */
+ switch (amdgpu_ip_version(adev, NBIO_HWIP, 0)) {
+ case IP_VERSION(6, 1, 0):
+ case IP_VERSION(6, 2, 0):
+ adev->nbio.funcs = &nbio_v6_1_funcs;
+ adev->nbio.hdp_flush_reg = &nbio_v6_1_hdp_flush_reg;
+ break;
+ case IP_VERSION(7, 0, 0):
+ case IP_VERSION(7, 0, 1):
+ case IP_VERSION(2, 5, 0):
+ adev->nbio.funcs = &nbio_v7_0_funcs;
+ adev->nbio.hdp_flush_reg = &nbio_v7_0_hdp_flush_reg;
+ break;
+ case IP_VERSION(7, 4, 0):
+ case IP_VERSION(7, 4, 1):
+ case IP_VERSION(7, 4, 4):
+ adev->nbio.funcs = &nbio_v7_4_funcs;
+ adev->nbio.hdp_flush_reg = &nbio_v7_4_hdp_flush_reg;
+ break;
+ case IP_VERSION(7, 9, 0):
+ case IP_VERSION(7, 9, 1):
+ adev->nbio.funcs = &nbio_v7_9_funcs;
+ adev->nbio.hdp_flush_reg = &nbio_v7_9_hdp_flush_reg;
+ break;
+ case IP_VERSION(7, 11, 0):
+ case IP_VERSION(7, 11, 1):
+ case IP_VERSION(7, 11, 2):
+ case IP_VERSION(7, 11, 3):
+ adev->nbio.funcs = &nbio_v7_11_funcs;
+ adev->nbio.hdp_flush_reg = &nbio_v7_11_hdp_flush_reg;
+ break;
+ case IP_VERSION(7, 2, 0):
+ case IP_VERSION(7, 2, 1):
+ case IP_VERSION(7, 3, 0):
+ case IP_VERSION(7, 5, 0):
+ case IP_VERSION(7, 5, 1):
+ adev->nbio.funcs = &nbio_v7_2_funcs;
+ adev->nbio.hdp_flush_reg = &nbio_v7_2_hdp_flush_reg;
+ break;
+ case IP_VERSION(2, 1, 1):
+ case IP_VERSION(2, 3, 0):
+ case IP_VERSION(2, 3, 1):
+ case IP_VERSION(2, 3, 2):
+ case IP_VERSION(3, 3, 0):
+ case IP_VERSION(3, 3, 1):
+ case IP_VERSION(3, 3, 2):
+ case IP_VERSION(3, 3, 3):
+ adev->nbio.funcs = &nbio_v2_3_funcs;
+ adev->nbio.hdp_flush_reg = &nbio_v2_3_hdp_flush_reg;
+ break;
+ case IP_VERSION(4, 3, 0):
+ case IP_VERSION(4, 3, 1):
+ if (amdgpu_sriov_vf(adev))
+ adev->nbio.funcs = &nbio_v4_3_sriov_funcs;
+ else
+ adev->nbio.funcs = &nbio_v4_3_funcs;
+ adev->nbio.hdp_flush_reg = &nbio_v4_3_hdp_flush_reg;
+ break;
+ case IP_VERSION(7, 7, 0):
+ case IP_VERSION(7, 7, 1):
+ adev->nbio.funcs = &nbio_v7_7_funcs;
+ adev->nbio.hdp_flush_reg = &nbio_v7_7_hdp_flush_reg;
+ break;
+ case IP_VERSION(6, 3, 1):
+ adev->nbio.funcs = &nbif_v6_3_1_funcs;
+ adev->nbio.hdp_flush_reg = &nbif_v6_3_1_hdp_flush_reg;
+ break;
+ default:
+ break;
+ }
+
+ switch (amdgpu_ip_version(adev, HDP_HWIP, 0)) {
+ case IP_VERSION(4, 0, 0):
+ case IP_VERSION(4, 0, 1):
+ case IP_VERSION(4, 1, 0):
+ case IP_VERSION(4, 1, 1):
+ case IP_VERSION(4, 1, 2):
+ case IP_VERSION(4, 2, 0):
+ case IP_VERSION(4, 2, 1):
+ case IP_VERSION(4, 4, 0):
+ case IP_VERSION(4, 4, 2):
+ case IP_VERSION(4, 4, 5):
+ adev->hdp.funcs = &hdp_v4_0_funcs;
+ break;
+ case IP_VERSION(5, 0, 0):
+ case IP_VERSION(5, 0, 1):
+ case IP_VERSION(5, 0, 2):
+ case IP_VERSION(5, 0, 3):
+ case IP_VERSION(5, 0, 4):
+ case IP_VERSION(5, 2, 0):
+ adev->hdp.funcs = &hdp_v5_0_funcs;
+ break;
+ case IP_VERSION(5, 2, 1):
+ adev->hdp.funcs = &hdp_v5_2_funcs;
+ break;
+ case IP_VERSION(6, 0, 0):
+ case IP_VERSION(6, 0, 1):
+ case IP_VERSION(6, 1, 0):
+ adev->hdp.funcs = &hdp_v6_0_funcs;
+ break;
+ case IP_VERSION(7, 0, 0):
+ adev->hdp.funcs = &hdp_v7_0_funcs;
+ break;
+ default:
+ break;
+ }
+
+ switch (amdgpu_ip_version(adev, DF_HWIP, 0)) {
+ case IP_VERSION(3, 6, 0):
+ case IP_VERSION(3, 6, 1):
+ case IP_VERSION(3, 6, 2):
+ adev->df.funcs = &df_v3_6_funcs;
+ break;
+ case IP_VERSION(2, 1, 0):
+ case IP_VERSION(2, 1, 1):
+ case IP_VERSION(2, 5, 0):
+ case IP_VERSION(3, 5, 1):
+ case IP_VERSION(3, 5, 2):
+ adev->df.funcs = &df_v1_7_funcs;
+ break;
+ case IP_VERSION(4, 3, 0):
+ adev->df.funcs = &df_v4_3_funcs;
+ break;
+ case IP_VERSION(4, 6, 2):
+ adev->df.funcs = &df_v4_6_2_funcs;
+ break;
+ case IP_VERSION(4, 15, 0):
+ case IP_VERSION(4, 15, 1):
+ adev->df.funcs = &df_v4_15_funcs;
+ break;
+ default:
+ break;
+ }
+
+ switch (amdgpu_ip_version(adev, SMUIO_HWIP, 0)) {
+ case IP_VERSION(9, 0, 0):
+ case IP_VERSION(9, 0, 1):
+ case IP_VERSION(10, 0, 0):
+ case IP_VERSION(10, 0, 1):
+ case IP_VERSION(10, 0, 2):
+ adev->smuio.funcs = &smuio_v9_0_funcs;
+ break;
+ case IP_VERSION(11, 0, 0):
+ case IP_VERSION(11, 0, 2):
+ case IP_VERSION(11, 0, 3):
+ case IP_VERSION(11, 0, 4):
+ case IP_VERSION(11, 0, 7):
+ case IP_VERSION(11, 0, 8):
+ adev->smuio.funcs = &smuio_v11_0_funcs;
+ break;
+ case IP_VERSION(11, 0, 6):
+ case IP_VERSION(11, 0, 10):
+ case IP_VERSION(11, 0, 11):
+ case IP_VERSION(11, 5, 0):
+ case IP_VERSION(11, 5, 2):
+ case IP_VERSION(13, 0, 1):
+ case IP_VERSION(13, 0, 9):
+ case IP_VERSION(13, 0, 10):
+ adev->smuio.funcs = &smuio_v11_0_6_funcs;
+ break;
+ case IP_VERSION(13, 0, 2):
+ adev->smuio.funcs = &smuio_v13_0_funcs;
+ break;
+ case IP_VERSION(13, 0, 3):
+ case IP_VERSION(13, 0, 11):
+ adev->smuio.funcs = &smuio_v13_0_3_funcs;
+ if (adev->smuio.funcs->get_pkg_type(adev) == AMDGPU_PKG_TYPE_APU) {
+ adev->flags |= AMD_IS_APU;
+ }
+ break;
+ case IP_VERSION(13, 0, 6):
+ case IP_VERSION(13, 0, 8):
+ case IP_VERSION(14, 0, 0):
+ case IP_VERSION(14, 0, 1):
+ adev->smuio.funcs = &smuio_v13_0_6_funcs;
+ break;
+ case IP_VERSION(14, 0, 2):
+ adev->smuio.funcs = &smuio_v14_0_2_funcs;
+ break;
+ default:
+ break;
+ }
+
+ switch (amdgpu_ip_version(adev, LSDMA_HWIP, 0)) {
+ case IP_VERSION(6, 0, 0):
+ case IP_VERSION(6, 0, 1):
+ case IP_VERSION(6, 0, 2):
+ case IP_VERSION(6, 0, 3):
+ adev->lsdma.funcs = &lsdma_v6_0_funcs;
+ break;
+ case IP_VERSION(7, 0, 0):
+ case IP_VERSION(7, 0, 1):
+ adev->lsdma.funcs = &lsdma_v7_0_funcs;
+ break;
+ default:
+ break;
+ }
+
+ r = amdgpu_discovery_set_common_ip_blocks(adev);
+ if (r)
+ return r;
+
+ r = amdgpu_discovery_set_gmc_ip_blocks(adev);
+ if (r)
+ return r;
+
+ /* For SR-IOV, PSP needs to be initialized before IH */
+ if (amdgpu_sriov_vf(adev)) {
+ r = amdgpu_discovery_set_psp_ip_blocks(adev);
+ if (r)
+ return r;
+ r = amdgpu_discovery_set_ih_ip_blocks(adev);
+ if (r)
+ return r;
+ } else {
+ r = amdgpu_discovery_set_ih_ip_blocks(adev);
+ if (r)
+ return r;
+
+ if (likely(adev->firmware.load_type == AMDGPU_FW_LOAD_PSP)) {
+ r = amdgpu_discovery_set_psp_ip_blocks(adev);
+ if (r)
+ return r;
+ }
+ }
+ if (likely(adev->firmware.load_type == AMDGPU_FW_LOAD_PSP)) {
+ r = amdgpu_discovery_set_smu_ip_blocks(adev);
+ if (r)
+ return r;
+ }
+
+ r = amdgpu_discovery_set_display_ip_blocks(adev);
+ if (r)
+ return r;
+
+ r = amdgpu_discovery_set_gc_ip_blocks(adev);
+ if (r)
+ return r;
+
+ r = amdgpu_discovery_set_sdma_ip_blocks(adev);
+ if (r)
+ return r;
+
+ r = amdgpu_discovery_set_ras_ip_blocks(adev);
+ if (r)
+ return r;
+
+ if ((adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT &&
+ !amdgpu_sriov_vf(adev)) ||
+ (adev->firmware.load_type == AMDGPU_FW_LOAD_RLC_BACKDOOR_AUTO && amdgpu_dpm == 1)) {
+ r = amdgpu_discovery_set_smu_ip_blocks(adev);
+ if (r)
+ return r;
+ }
+
+ r = amdgpu_discovery_set_mm_ip_blocks(adev);
+ if (r)
+ return r;
+
+ r = amdgpu_discovery_set_mes_ip_blocks(adev);
+ if (r)
+ return r;
+
+ r = amdgpu_discovery_set_vpe_ip_blocks(adev);
+ if (r)
+ return r;
+
+ r = amdgpu_discovery_set_umsch_mm_ip_blocks(adev);
+ if (r)
+ return r;
+
+ r = amdgpu_discovery_set_isp_ip_blocks(adev);
+ if (r)
+ return r;
return 0;
}
+
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.h
index 8f6183801cb3..4ce04486cc31 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.h
@@ -24,13 +24,27 @@
#ifndef __AMDGPU_DISCOVERY__
#define __AMDGPU_DISCOVERY__
-#define DISCOVERY_TMR_SIZE (4 << 10)
+#include <linux/debugfs.h>
+
+#define DISCOVERY_TMR_SIZE (10 << 10)
#define DISCOVERY_TMR_OFFSET (64 << 10)
+struct ip_discovery_top;
+
+struct amdgpu_discovery_info {
+ struct debugfs_blob_wrapper debugfs_blob;
+ struct ip_discovery_top *ip_top;
+ uint32_t size;
+ uint8_t *bin;
+ bool reserve_tmr;
+};
+
void amdgpu_discovery_fini(struct amdgpu_device *adev);
-int amdgpu_discovery_reg_base_init(struct amdgpu_device *adev);
-int amdgpu_discovery_get_ip_version(struct amdgpu_device *adev, int hw_id,
- int *major, int *minor, int *revision);
-int amdgpu_discovery_get_gfx_info(struct amdgpu_device *adev);
+int amdgpu_discovery_set_ip_blocks(struct amdgpu_device *adev);
+
+int amdgpu_discovery_get_nps_info(struct amdgpu_device *adev,
+ uint32_t *nps_type,
+ struct amdgpu_gmc_memrange **ranges,
+ int *range_cnt, bool refresh);
#endif /* __AMDGPU_DISCOVERY__ */
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c
index 48cb33e5b382..b5d34797d606 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c
@@ -30,17 +30,64 @@
#include "atom.h"
#include "amdgpu_connectors.h"
#include "amdgpu_display.h"
+#include "soc15_common.h"
+#include "gc/gc_11_0_0_offset.h"
+#include "gc/gc_11_0_0_sh_mask.h"
+#include "bif/bif_4_1_d.h"
#include <asm/div64.h>
#include <linux/pci.h>
#include <linux/pm_runtime.h>
#include <drm/drm_crtc_helper.h>
+#include <drm/drm_damage_helper.h>
+#include <drm/drm_drv.h>
#include <drm/drm_edid.h>
-#include <drm/drm_gem_framebuffer_helper.h>
#include <drm/drm_fb_helper.h>
+#include <drm/drm_gem_framebuffer_helper.h>
#include <drm/drm_fourcc.h>
+#include <drm/drm_modeset_helper.h>
#include <drm/drm_vblank.h>
+/**
+ * amdgpu_display_hotplug_work_func - work handler for display hotplug event
+ *
+ * @work: work struct pointer
+ *
+ * This is the hotplug event work handler (all ASICs).
+ * The work gets scheduled from the IRQ handler if there
+ * was a hotplug interrupt. It walks through the connector table
+ * and calls hotplug handler for each connector. After this, it sends
+ * a DRM hotplug event to alert userspace.
+ *
+ * This design approach is required in order to defer hotplug event handling
+ * from the IRQ handler to a work handler because hotplug handler has to use
+ * mutexes which cannot be locked in an IRQ handler (since &mutex_lock may
+ * sleep).
+ */
+void amdgpu_display_hotplug_work_func(struct work_struct *work)
+{
+ struct amdgpu_device *adev = container_of(work, struct amdgpu_device,
+ hotplug_work.work);
+ struct drm_device *dev = adev_to_drm(adev);
+ struct drm_mode_config *mode_config = &dev->mode_config;
+ struct drm_connector *connector;
+ struct drm_connector_list_iter iter;
+
+ mutex_lock(&mode_config->mutex);
+ drm_connector_list_iter_begin(dev, &iter);
+ drm_for_each_connector_iter(connector, &iter)
+ amdgpu_connector_hotplug(connector);
+ drm_connector_list_iter_end(&iter);
+ mutex_unlock(&mode_config->mutex);
+ /* Just fire off a uevent and let userspace tell us what to do */
+ drm_helper_hpd_irq_event(dev);
+}
+
+static int amdgpu_display_framebuffer_init(struct drm_device *dev,
+ struct amdgpu_framebuffer *rfb,
+ const struct drm_mode_fb_cmd2 *mode_cmd,
+ struct drm_gem_object *obj);
+
static void amdgpu_display_flip_callback(struct dma_fence *f,
struct dma_fence_cb *cb)
{
@@ -54,7 +101,7 @@ static void amdgpu_display_flip_callback(struct dma_fence *f,
static bool amdgpu_display_flip_handle_fence(struct amdgpu_flip_work *work,
struct dma_fence **f)
{
- struct dma_fence *fence= *f;
+ struct dma_fence *fence = *f;
if (fence == NULL)
return false;
@@ -80,12 +127,9 @@ static void amdgpu_display_flip_work_func(struct work_struct *__work)
struct drm_crtc *crtc = &amdgpu_crtc->base;
unsigned long flags;
- unsigned i;
+ unsigned int i;
int vpos, hpos;
- if (amdgpu_display_flip_handle_fence(work, &work->excl))
- return;
-
for (i = 0; i < work->shared_count; ++i)
if (amdgpu_display_flip_handle_fence(work, &work->shared[i]))
return;
@@ -116,8 +160,9 @@ static void amdgpu_display_flip_work_func(struct work_struct *__work)
spin_unlock_irqrestore(&crtc->dev->event_lock, flags);
- DRM_DEBUG_DRIVER("crtc:%d[%p], pflip_stat:AMDGPU_FLIP_SUBMITTED, work: %p,\n",
- amdgpu_crtc->crtc_id, amdgpu_crtc, work);
+ drm_dbg_vbl(adev_to_drm(adev),
+ "crtc:%d[%p], pflip_stat:AMDGPU_FLIP_SUBMITTED, work: %p,\n",
+ amdgpu_crtc->crtc_id, amdgpu_crtc, work);
}
@@ -159,7 +204,7 @@ int amdgpu_display_crtc_page_flip_target(struct drm_crtc *crtc,
u64 tiling_flags;
int i, r;
- work = kzalloc(sizeof *work, GFP_KERNEL);
+ work = kzalloc(sizeof(*work), GFP_KERNEL);
if (work == NULL)
return -ENOMEM;
@@ -189,6 +234,7 @@ int amdgpu_display_crtc_page_flip_target(struct drm_crtc *crtc,
}
if (!adev->enable_virtual_display) {
+ new_abo->flags |= AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS;
r = amdgpu_bo_pin(new_abo,
amdgpu_display_supported_domains(adev, new_abo->flags));
if (unlikely(r != 0)) {
@@ -203,9 +249,9 @@ int amdgpu_display_crtc_page_flip_target(struct drm_crtc *crtc,
goto unpin;
}
- r = dma_resv_get_fences_rcu(new_abo->tbo.base.resv, &work->excl,
- &work->shared_count,
- &work->shared);
+ r = dma_resv_get_fences(new_abo->tbo.base.resv, DMA_RESV_USAGE_WRITE,
+ &work->shared_count,
+ &work->shared);
if (unlikely(r != 0)) {
DRM_ERROR("failed to get fences for buffer\n");
goto unpin;
@@ -254,7 +300,6 @@ unreserve:
cleanup:
amdgpu_bo_unref(&work->old_abo);
- dma_fence_put(work->excl);
for (i = 0; i < work->shared_count; ++i)
dma_fence_put(work->shared[i]);
kfree(work->shared);
@@ -287,22 +332,19 @@ int amdgpu_display_crtc_set_config(struct drm_mode_set *set,
if (crtc->enabled)
active = true;
- pm_runtime_mark_last_busy(dev->dev);
-
adev = drm_to_adev(dev);
/* if we have active crtcs and we don't have a power ref,
- take the current one */
+ * take the current one
+ */
if (active && !adev->have_disp_power_ref) {
adev->have_disp_power_ref = true;
return ret;
}
- /* if we have no active crtcs, then drop the power ref
- we got before */
- if (!active && adev->have_disp_power_ref) {
- pm_runtime_put_autosuspend(dev->dev);
+ /* if we have no active crtcs, then go to
+ * drop the power ref we got before
+ */
+ if (!active && adev->have_disp_power_ref)
adev->have_disp_power_ref = false;
- }
-
out:
/* drop the power reference we got coming in here */
pm_runtime_put_autosuspend(dev->dev);
@@ -466,11 +508,10 @@ bool amdgpu_display_ddc_probe(struct amdgpu_connector *amdgpu_connector,
if (amdgpu_connector->router.ddc_valid)
amdgpu_i2c_router_select_ddc_port(amdgpu_connector);
- if (use_aux) {
+ if (use_aux)
ret = i2c_transfer(&amdgpu_connector->ddc_bus->aux.ddc, msgs, 2);
- } else {
+ else
ret = i2c_transfer(&amdgpu_connector->ddc_bus->adapter, msgs, 2);
- }
if (ret != 2)
/* Couldn't find an accessible DDC on this connector */
@@ -479,20 +520,40 @@ bool amdgpu_display_ddc_probe(struct amdgpu_connector *amdgpu_connector,
* EDID header starts with:
* 0x00,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0x00.
* Only the first 6 bytes must be valid as
- * drm_edid_block_valid() can fix the last 2 bytes */
+ * drm_edid_block_valid() can fix the last 2 bytes
+ */
if (drm_edid_header_is_valid(buf) < 6) {
/* Couldn't find an accessible EDID on this
- * connector */
+ * connector
+ */
return false;
}
return true;
}
+static int amdgpu_dirtyfb(struct drm_framebuffer *fb, struct drm_file *file,
+ unsigned int flags, unsigned int color,
+ struct drm_clip_rect *clips, unsigned int num_clips)
+{
+
+ if (file)
+ return -ENOSYS;
+
+ return drm_atomic_helper_dirtyfb(fb, file, flags, color, clips,
+ num_clips);
+}
+
static const struct drm_framebuffer_funcs amdgpu_fb_funcs = {
.destroy = drm_gem_fb_destroy,
.create_handle = drm_gem_fb_create_handle,
};
+static const struct drm_framebuffer_funcs amdgpu_fb_funcs_atomic = {
+ .destroy = drm_gem_fb_destroy,
+ .create_handle = drm_gem_fb_create_handle,
+ .dirty = amdgpu_dirtyfb
+};
+
uint32_t amdgpu_display_supported_domains(struct amdgpu_device *adev,
uint64_t bo_flags)
{
@@ -509,27 +570,9 @@ uint32_t amdgpu_display_supported_domains(struct amdgpu_device *adev,
*/
if ((bo_flags & AMDGPU_GEM_CREATE_CPU_GTT_USWC) &&
amdgpu_bo_support_uswc(bo_flags) &&
- amdgpu_device_asic_has_dc_support(adev->asic_type)) {
- switch (adev->asic_type) {
- case CHIP_CARRIZO:
- case CHIP_STONEY:
- domain |= AMDGPU_GEM_DOMAIN_GTT;
- break;
- case CHIP_RAVEN:
- /* enable S/G on PCO and RV2 */
- if ((adev->apu_flags & AMD_APU_IS_RAVEN2) ||
- (adev->apu_flags & AMD_APU_IS_PICASSO))
- domain |= AMDGPU_GEM_DOMAIN_GTT;
- break;
- case CHIP_RENOIR:
- case CHIP_VANGOGH:
- domain |= AMDGPU_GEM_DOMAIN_GTT;
- break;
-
- default:
- break;
- }
- }
+ adev->dc_enabled &&
+ adev->mode_info.gpu_vm_support)
+ domain |= AMDGPU_GEM_DOMAIN_GTT;
#endif
return domain;
@@ -611,6 +654,10 @@ amdgpu_lookup_format_info(u32 format, uint64_t modifier)
if (!IS_AMD_FMT_MOD(modifier))
return NULL;
+ if (AMD_FMT_MOD_GET(TILE_VERSION, modifier) < AMD_FMT_MOD_TILE_VER_GFX9 ||
+ AMD_FMT_MOD_GET(TILE_VERSION, modifier) >= AMD_FMT_MOD_TILE_VER_GFX12)
+ return NULL;
+
if (AMD_FMT_MOD_GET(DCC_RETILE, modifier))
return lookup_format_info(dcc_retile_formats,
ARRAY_SIZE(dcc_retile_formats),
@@ -675,10 +722,39 @@ extract_render_dcc_offset(struct amdgpu_device *adev,
return 0;
}
+static int convert_tiling_flags_to_modifier_gfx12(struct amdgpu_framebuffer *afb)
+{
+ u64 modifier = 0;
+ int swizzle_mode = AMDGPU_TILING_GET(afb->tiling_flags, GFX12_SWIZZLE_MODE);
+
+ if (!swizzle_mode) {
+ modifier = DRM_FORMAT_MOD_LINEAR;
+ } else {
+ int max_comp_block =
+ AMDGPU_TILING_GET(afb->tiling_flags, GFX12_DCC_MAX_COMPRESSED_BLOCK);
+
+ modifier =
+ AMD_FMT_MOD |
+ AMD_FMT_MOD_SET(TILE_VERSION, AMD_FMT_MOD_TILE_VER_GFX12) |
+ AMD_FMT_MOD_SET(TILE, swizzle_mode) |
+ AMD_FMT_MOD_SET(DCC, afb->gfx12_dcc) |
+ AMD_FMT_MOD_SET(DCC_MAX_COMPRESSED_BLOCK, max_comp_block);
+ }
+
+ afb->base.modifier = modifier;
+ afb->base.flags |= DRM_MODE_FB_MODIFIERS;
+ return 0;
+}
+
static int convert_tiling_flags_to_modifier(struct amdgpu_framebuffer *afb)
{
struct amdgpu_device *adev = drm_to_adev(afb->base.dev);
uint64_t modifier = 0;
+ int num_pipes = 0;
+ int num_pkrs = 0;
+
+ num_pkrs = adev->gfx.config.gb_addr_config_fields.num_pkrs;
+ num_pipes = adev->gfx.config.gb_addr_config_fields.num_pipes;
if (!afb->tiling_flags || !AMDGPU_TILING_GET(afb->tiling_flags, SWIZZLE_MODE)) {
modifier = DRM_FORMAT_MOD_LINEAR;
@@ -691,7 +767,7 @@ static int convert_tiling_flags_to_modifier(struct amdgpu_framebuffer *afb)
int bank_xor_bits = 0;
int packers = 0;
int rb = 0;
- int pipes = ilog2(adev->gfx.config.gb_addr_config_fields.num_pipes);
+ int pipes = ilog2(num_pipes);
uint32_t dcc_offset = AMDGPU_TILING_GET(afb->tiling_flags, DCC_OFFSET_256B);
switch (swizzle >> 2) {
@@ -707,14 +783,21 @@ static int convert_tiling_flags_to_modifier(struct amdgpu_framebuffer *afb)
case 6: /* 64 KiB _X */
block_size_bits = 16;
break;
+ case 7: /* 256 KiB */
+ block_size_bits = 18;
+ break;
default:
/* RESERVED or VAR */
return -EINVAL;
}
- if (adev->asic_type >= CHIP_SIENNA_CICHLID)
+ if (amdgpu_ip_version(adev, GC_HWIP, 0) >= IP_VERSION(11, 0, 0))
+ version = AMD_FMT_MOD_TILE_VER_GFX11;
+ else if (amdgpu_ip_version(adev, GC_HWIP, 0) >=
+ IP_VERSION(10, 3, 0))
version = AMD_FMT_MOD_TILE_VER_GFX10_RBPLUS;
- else if (adev->family == AMDGPU_FAMILY_NV)
+ else if (amdgpu_ip_version(adev, GC_HWIP, 0) >=
+ IP_VERSION(10, 0, 0))
version = AMD_FMT_MOD_TILE_VER_GFX10;
else
version = AMD_FMT_MOD_TILE_VER_GFX9;
@@ -723,19 +806,34 @@ static int convert_tiling_flags_to_modifier(struct amdgpu_framebuffer *afb)
case 0: /* Z microtiling */
return -EINVAL;
case 1: /* S microtiling */
- if (!has_xor)
- version = AMD_FMT_MOD_TILE_VER_GFX9;
+ if (amdgpu_ip_version(adev, GC_HWIP, 0) <
+ IP_VERSION(11, 0, 0)) {
+ if (!has_xor)
+ version = AMD_FMT_MOD_TILE_VER_GFX9;
+ }
break;
case 2:
- if (!has_xor && afb->base.format->cpp[0] != 4)
- version = AMD_FMT_MOD_TILE_VER_GFX9;
+ if (amdgpu_ip_version(adev, GC_HWIP, 0) <
+ IP_VERSION(11, 0, 0)) {
+ if (!has_xor && afb->base.format->cpp[0] != 4)
+ version = AMD_FMT_MOD_TILE_VER_GFX9;
+ }
break;
case 3:
break;
}
if (has_xor) {
+ if (num_pipes == num_pkrs && num_pkrs == 0) {
+ DRM_ERROR("invalid number of pipes and packers\n");
+ return -EINVAL;
+ }
+
switch (version) {
+ case AMD_FMT_MOD_TILE_VER_GFX11:
+ pipe_xor_bits = min(block_size_bits - 8, pipes);
+ packers = ilog2(adev->gfx.config.gb_addr_config_fields.num_pkrs);
+ break;
case AMD_FMT_MOD_TILE_VER_GFX10_RBPLUS:
pipe_xor_bits = min(block_size_bits - 8, pipes);
packers = min(block_size_bits - 8 - pipe_xor_bits,
@@ -769,9 +867,12 @@ static int convert_tiling_flags_to_modifier(struct amdgpu_framebuffer *afb)
u64 render_dcc_offset;
/* Enable constant encode on RAVEN2 and later. */
- bool dcc_constant_encode = adev->asic_type > CHIP_RAVEN ||
- (adev->asic_type == CHIP_RAVEN &&
- adev->external_rev_id >= 0x81);
+ bool dcc_constant_encode =
+ (adev->asic_type > CHIP_RAVEN ||
+ (adev->asic_type == CHIP_RAVEN &&
+ adev->external_rev_id >= 0x81)) &&
+ amdgpu_ip_version(adev, GC_HWIP, 0) <
+ IP_VERSION(11, 0, 0);
int max_cblock_size = dcc_i64b ? AMD_FMT_MOD_DCC_BLOCK_64B :
dcc_i128b ? AMD_FMT_MOD_DCC_BLOCK_128B :
@@ -808,7 +909,9 @@ static int convert_tiling_flags_to_modifier(struct amdgpu_framebuffer *afb)
if (adev->family >= AMDGPU_FAMILY_NV) {
int extra_pipe = 0;
- if (adev->asic_type >= CHIP_SIENNA_CICHLID &&
+ if ((amdgpu_ip_version(adev, GC_HWIP,
+ 0) >=
+ IP_VERSION(10, 3, 0)) &&
pipes == packers && pipes > 1)
extra_pipe = 1;
@@ -837,8 +940,227 @@ static int convert_tiling_flags_to_modifier(struct amdgpu_framebuffer *afb)
return 0;
}
+/* Mirrors the is_displayable check in radeonsi's gfx6_compute_surface */
+static int check_tiling_flags_gfx6(struct amdgpu_framebuffer *afb)
+{
+ u64 micro_tile_mode;
+
+ if (AMDGPU_TILING_GET(afb->tiling_flags, ARRAY_MODE) == 1) /* LINEAR_ALIGNED */
+ return 0;
+
+ micro_tile_mode = AMDGPU_TILING_GET(afb->tiling_flags, MICRO_TILE_MODE);
+ switch (micro_tile_mode) {
+ case 0: /* DISPLAY */
+ case 3: /* RENDER */
+ return 0;
+ default:
+ drm_dbg_kms(afb->base.dev,
+ "Micro tile mode %llu not supported for scanout\n",
+ micro_tile_mode);
+ return -EINVAL;
+ }
+}
+
+static void get_block_dimensions(unsigned int block_log2, unsigned int cpp,
+ unsigned int *width, unsigned int *height)
+{
+ unsigned int cpp_log2 = ilog2(cpp);
+ unsigned int pixel_log2 = block_log2 - cpp_log2;
+ unsigned int width_log2 = (pixel_log2 + 1) / 2;
+ unsigned int height_log2 = pixel_log2 - width_log2;
+
+ *width = 1 << width_log2;
+ *height = 1 << height_log2;
+}
+
+static unsigned int get_dcc_block_size(uint64_t modifier, bool rb_aligned,
+ bool pipe_aligned)
+{
+ unsigned int ver = AMD_FMT_MOD_GET(TILE_VERSION, modifier);
+
+ switch (ver) {
+ case AMD_FMT_MOD_TILE_VER_GFX9: {
+ /*
+ * TODO: for pipe aligned we may need to check the alignment of the
+ * total size of the surface, which may need to be bigger than the
+ * natural alignment due to some HW workarounds
+ */
+ return max(10 + (rb_aligned ? (int)AMD_FMT_MOD_GET(RB, modifier) : 0), 12);
+ }
+ case AMD_FMT_MOD_TILE_VER_GFX10:
+ case AMD_FMT_MOD_TILE_VER_GFX10_RBPLUS:
+ case AMD_FMT_MOD_TILE_VER_GFX11: {
+ int pipes_log2 = AMD_FMT_MOD_GET(PIPE_XOR_BITS, modifier);
+
+ if (ver >= AMD_FMT_MOD_TILE_VER_GFX10_RBPLUS && pipes_log2 > 1 &&
+ AMD_FMT_MOD_GET(PACKERS, modifier) == pipes_log2)
+ ++pipes_log2;
+
+ return max(8 + (pipe_aligned ? pipes_log2 : 0), 12);
+ }
+ default:
+ return 0;
+ }
+}
+
+static int amdgpu_display_verify_plane(struct amdgpu_framebuffer *rfb, int plane,
+ const struct drm_format_info *format,
+ unsigned int block_width, unsigned int block_height,
+ unsigned int block_size_log2)
+{
+ unsigned int width = rfb->base.width /
+ ((plane && plane < format->num_planes) ? format->hsub : 1);
+ unsigned int height = rfb->base.height /
+ ((plane && plane < format->num_planes) ? format->vsub : 1);
+ unsigned int cpp = plane < format->num_planes ? format->cpp[plane] : 1;
+ unsigned int block_pitch = block_width * cpp;
+ unsigned int min_pitch = ALIGN(width * cpp, block_pitch);
+ unsigned int block_size = 1 << block_size_log2;
+ uint64_t size;
+
+ if (rfb->base.pitches[plane] % block_pitch) {
+ drm_dbg_kms(rfb->base.dev,
+ "pitch %d for plane %d is not a multiple of block pitch %d\n",
+ rfb->base.pitches[plane], plane, block_pitch);
+ return -EINVAL;
+ }
+ if (rfb->base.pitches[plane] < min_pitch) {
+ drm_dbg_kms(rfb->base.dev,
+ "pitch %d for plane %d is less than minimum pitch %d\n",
+ rfb->base.pitches[plane], plane, min_pitch);
+ return -EINVAL;
+ }
+
+ /* Force at least natural alignment. */
+ if (rfb->base.offsets[plane] % block_size) {
+ drm_dbg_kms(rfb->base.dev,
+ "offset 0x%x for plane %d is not a multiple of block pitch 0x%x\n",
+ rfb->base.offsets[plane], plane, block_size);
+ return -EINVAL;
+ }
+
+ size = rfb->base.offsets[plane] +
+ (uint64_t)rfb->base.pitches[plane] / block_pitch *
+ block_size * DIV_ROUND_UP(height, block_height);
+
+ if (rfb->base.obj[0]->size < size) {
+ drm_dbg_kms(rfb->base.dev,
+ "BO size 0x%zx is less than 0x%llx required for plane %d\n",
+ rfb->base.obj[0]->size, size, plane);
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+
+static int amdgpu_display_verify_sizes(struct amdgpu_framebuffer *rfb)
+{
+ const struct drm_format_info *format_info = drm_format_info(rfb->base.format->format);
+ uint64_t modifier = rfb->base.modifier;
+ int ret;
+ unsigned int i, block_width, block_height, block_size_log2;
+
+ if (rfb->base.dev->mode_config.fb_modifiers_not_supported)
+ return 0;
+
+ for (i = 0; i < format_info->num_planes; ++i) {
+ if (modifier == DRM_FORMAT_MOD_LINEAR) {
+ block_width = 256 / format_info->cpp[i];
+ block_height = 1;
+ block_size_log2 = 8;
+ } else if (AMD_FMT_MOD_GET(TILE_VERSION, modifier) >= AMD_FMT_MOD_TILE_VER_GFX12) {
+ int swizzle = AMD_FMT_MOD_GET(TILE, modifier);
+
+ switch (swizzle) {
+ case AMD_FMT_MOD_TILE_GFX12_256B_2D:
+ block_size_log2 = 8;
+ break;
+ case AMD_FMT_MOD_TILE_GFX12_4K_2D:
+ block_size_log2 = 12;
+ break;
+ case AMD_FMT_MOD_TILE_GFX12_64K_2D:
+ block_size_log2 = 16;
+ break;
+ case AMD_FMT_MOD_TILE_GFX12_256K_2D:
+ block_size_log2 = 18;
+ break;
+ default:
+ drm_dbg_kms(rfb->base.dev,
+ "Gfx12 swizzle mode with unknown block size: %d\n", swizzle);
+ return -EINVAL;
+ }
+
+ get_block_dimensions(block_size_log2, format_info->cpp[i],
+ &block_width, &block_height);
+ } else {
+ int swizzle = AMD_FMT_MOD_GET(TILE, modifier);
+
+ switch ((swizzle & ~3) + 1) {
+ case DC_SW_256B_S:
+ block_size_log2 = 8;
+ break;
+ case DC_SW_4KB_S:
+ case DC_SW_4KB_S_X:
+ block_size_log2 = 12;
+ break;
+ case DC_SW_64KB_S:
+ case DC_SW_64KB_S_T:
+ case DC_SW_64KB_S_X:
+ block_size_log2 = 16;
+ break;
+ case DC_SW_VAR_S_X:
+ block_size_log2 = 18;
+ break;
+ default:
+ drm_dbg_kms(rfb->base.dev,
+ "Swizzle mode with unknown block size: %d\n", swizzle);
+ return -EINVAL;
+ }
+
+ get_block_dimensions(block_size_log2, format_info->cpp[i],
+ &block_width, &block_height);
+ }
+
+ ret = amdgpu_display_verify_plane(rfb, i, format_info,
+ block_width, block_height, block_size_log2);
+ if (ret)
+ return ret;
+ }
+
+ if (AMD_FMT_MOD_GET(TILE_VERSION, modifier) <= AMD_FMT_MOD_TILE_VER_GFX11 &&
+ AMD_FMT_MOD_GET(DCC, modifier)) {
+ if (AMD_FMT_MOD_GET(DCC_RETILE, modifier)) {
+ block_size_log2 = get_dcc_block_size(modifier, false, false);
+ get_block_dimensions(block_size_log2 + 8, format_info->cpp[0],
+ &block_width, &block_height);
+ ret = amdgpu_display_verify_plane(rfb, i, format_info,
+ block_width, block_height,
+ block_size_log2);
+ if (ret)
+ return ret;
+
+ ++i;
+ block_size_log2 = get_dcc_block_size(modifier, true, true);
+ } else {
+ bool pipe_aligned = AMD_FMT_MOD_GET(DCC_PIPE_ALIGN, modifier);
+
+ block_size_log2 = get_dcc_block_size(modifier, true, pipe_aligned);
+ }
+ get_block_dimensions(block_size_log2 + 8, format_info->cpp[0],
+ &block_width, &block_height);
+ ret = amdgpu_display_verify_plane(rfb, i, format_info,
+ block_width, block_height, block_size_log2);
+ if (ret)
+ return ret;
+ }
+
+ return 0;
+}
+
static int amdgpu_display_get_fb_info(const struct amdgpu_framebuffer *amdgpu_fb,
- uint64_t *tiling_flags, bool *tmz_surface)
+ uint64_t *tiling_flags, bool *tmz_surface,
+ bool *gfx12_dcc)
{
struct amdgpu_bo *rbo;
int r;
@@ -846,6 +1168,7 @@ static int amdgpu_display_get_fb_info(const struct amdgpu_framebuffer *amdgpu_fb
if (!amdgpu_fb) {
*tiling_flags = 0;
*tmz_surface = false;
+ *gfx12_dcc = false;
return 0;
}
@@ -859,28 +1182,64 @@ static int amdgpu_display_get_fb_info(const struct amdgpu_framebuffer *amdgpu_fb
return r;
}
- if (tiling_flags)
- amdgpu_bo_get_tiling_flags(rbo, tiling_flags);
-
- if (tmz_surface)
- *tmz_surface = amdgpu_bo_encrypted(rbo);
+ amdgpu_bo_get_tiling_flags(rbo, tiling_flags);
+ *tmz_surface = amdgpu_bo_encrypted(rbo);
+ *gfx12_dcc = rbo->flags & AMDGPU_GEM_CREATE_GFX12_DCC;
amdgpu_bo_unreserve(rbo);
return r;
}
-int amdgpu_display_framebuffer_init(struct drm_device *dev,
- struct amdgpu_framebuffer *rfb,
- const struct drm_mode_fb_cmd2 *mode_cmd,
- struct drm_gem_object *obj)
+static int amdgpu_display_gem_fb_verify_and_init(struct drm_device *dev,
+ struct amdgpu_framebuffer *rfb,
+ struct drm_file *file_priv,
+ const struct drm_format_info *info,
+ const struct drm_mode_fb_cmd2 *mode_cmd,
+ struct drm_gem_object *obj)
{
- int ret, i;
+ int ret;
+
rfb->base.obj[0] = obj;
- drm_helper_mode_fill_fb_struct(dev, &rfb->base, mode_cmd);
- ret = drm_framebuffer_init(dev, &rfb->base, &amdgpu_fb_funcs);
+ drm_helper_mode_fill_fb_struct(dev, &rfb->base, info, mode_cmd);
+ /* Verify that the modifier is supported. */
+ if (!drm_any_plane_has_format(dev, mode_cmd->pixel_format,
+ mode_cmd->modifier[0])) {
+ drm_dbg_kms(dev,
+ "unsupported pixel format %p4cc / modifier 0x%llx\n",
+ &mode_cmd->pixel_format, mode_cmd->modifier[0]);
+
+ ret = -EINVAL;
+ goto err;
+ }
+
+ ret = amdgpu_display_framebuffer_init(dev, rfb, mode_cmd, obj);
+ if (ret)
+ goto err;
+
+ if (drm_drv_uses_atomic_modeset(dev))
+ ret = drm_framebuffer_init(dev, &rfb->base,
+ &amdgpu_fb_funcs_atomic);
+ else
+ ret = drm_framebuffer_init(dev, &rfb->base, &amdgpu_fb_funcs);
+
if (ret)
- goto fail;
+ goto err;
+
+ return 0;
+err:
+ drm_dbg_kms(dev, "Failed to verify and init gem fb: %d\n", ret);
+ rfb->base.obj[0] = NULL;
+ return ret;
+}
+
+static int amdgpu_display_framebuffer_init(struct drm_device *dev,
+ struct amdgpu_framebuffer *rfb,
+ const struct drm_mode_fb_cmd2 *mode_cmd,
+ struct drm_gem_object *obj)
+{
+ struct amdgpu_device *adev = drm_to_adev(dev);
+ int ret, i;
/*
* This needs to happen before modifier conversion as that might change
@@ -891,39 +1250,53 @@ int amdgpu_display_framebuffer_init(struct drm_device *dev,
drm_dbg_kms(dev, "Plane 0 and %d have different BOs: %u vs. %u\n",
i, mode_cmd->handles[0], mode_cmd->handles[i]);
ret = -EINVAL;
- goto fail;
+ return ret;
}
}
- ret = amdgpu_display_get_fb_info(rfb, &rfb->tiling_flags, &rfb->tmz_surface);
+ ret = amdgpu_display_get_fb_info(rfb, &rfb->tiling_flags, &rfb->tmz_surface,
+ &rfb->gfx12_dcc);
if (ret)
- goto fail;
+ return ret;
- if (dev->mode_config.allow_fb_modifiers &&
+ if (dev->mode_config.fb_modifiers_not_supported && !adev->enable_virtual_display) {
+ drm_WARN_ONCE(dev, adev->family >= AMDGPU_FAMILY_AI,
+ "GFX9+ requires FB check based on format modifier\n");
+ ret = check_tiling_flags_gfx6(rfb);
+ if (ret)
+ return ret;
+ }
+
+ if (!dev->mode_config.fb_modifiers_not_supported &&
!(rfb->base.flags & DRM_MODE_FB_MODIFIERS)) {
- ret = convert_tiling_flags_to_modifier(rfb);
+ if (amdgpu_ip_version(adev, GC_HWIP, 0) >= IP_VERSION(12, 0, 0))
+ ret = convert_tiling_flags_to_modifier_gfx12(rfb);
+ else
+ ret = convert_tiling_flags_to_modifier(rfb);
+
if (ret) {
drm_dbg_kms(dev, "Failed to convert tiling flags 0x%llX to a modifier",
rfb->tiling_flags);
- goto fail;
+ return ret;
}
}
- for (i = 1; i < rfb->base.format->num_planes; ++i) {
+ ret = amdgpu_display_verify_sizes(rfb);
+ if (ret)
+ return ret;
+
+ for (i = 0; i < rfb->base.format->num_planes; ++i) {
+ drm_gem_object_get(rfb->base.obj[0]);
rfb->base.obj[i] = rfb->base.obj[0];
- drm_gem_object_get(rfb->base.obj[i]);
}
return 0;
-
-fail:
- rfb->base.obj[0] = NULL;
- return ret;
}
struct drm_framebuffer *
amdgpu_display_user_framebuffer_create(struct drm_device *dev,
struct drm_file *file_priv,
+ const struct drm_format_info *info,
const struct drm_mode_fb_cmd2 *mode_cmd)
{
struct amdgpu_framebuffer *amdgpu_fb;
@@ -934,16 +1307,19 @@ amdgpu_display_user_framebuffer_create(struct drm_device *dev,
obj = drm_gem_object_lookup(file_priv, mode_cmd->handles[0]);
if (obj == NULL) {
- drm_dbg_kms(dev, "No GEM object associated to handle 0x%08X, "
- "can't create framebuffer\n", mode_cmd->handles[0]);
+ drm_dbg_kms(dev,
+ "No GEM object associated to handle 0x%08X, can't create framebuffer\n",
+ mode_cmd->handles[0]);
+
return ERR_PTR(-ENOENT);
}
/* Handle is imported dma-buf, so cannot be migrated to VRAM for scanout */
bo = gem_to_amdgpu_bo(obj);
domains = amdgpu_display_supported_domains(drm_to_adev(dev), bo->flags);
- if (obj->import_attach && !(domains & AMDGPU_GEM_DOMAIN_GTT)) {
+ if (drm_gem_is_imported(obj) && !(domains & AMDGPU_GEM_DOMAIN_GTT)) {
drm_dbg_kms(dev, "Cannot create framebuffer from imported dma_buf\n");
+ drm_gem_object_put(obj);
return ERR_PTR(-EINVAL);
}
@@ -953,39 +1329,98 @@ amdgpu_display_user_framebuffer_create(struct drm_device *dev,
return ERR_PTR(-ENOMEM);
}
- ret = amdgpu_display_framebuffer_init(dev, amdgpu_fb, mode_cmd, obj);
+ ret = amdgpu_display_gem_fb_verify_and_init(dev, amdgpu_fb, file_priv,
+ info, mode_cmd, obj);
if (ret) {
kfree(amdgpu_fb);
drm_gem_object_put(obj);
return ERR_PTR(ret);
}
+ drm_gem_object_put(obj);
return &amdgpu_fb->base;
}
const struct drm_mode_config_funcs amdgpu_mode_funcs = {
.fb_create = amdgpu_display_user_framebuffer_create,
- .output_poll_changed = drm_fb_helper_output_poll_changed,
};
-static const struct drm_prop_enum_list amdgpu_underscan_enum_list[] =
-{ { UNDERSCAN_OFF, "off" },
+static const struct drm_prop_enum_list amdgpu_underscan_enum_list[] = {
+ { UNDERSCAN_OFF, "off" },
{ UNDERSCAN_ON, "on" },
{ UNDERSCAN_AUTO, "auto" },
};
-static const struct drm_prop_enum_list amdgpu_audio_enum_list[] =
-{ { AMDGPU_AUDIO_DISABLE, "off" },
+static const struct drm_prop_enum_list amdgpu_audio_enum_list[] = {
+ { AMDGPU_AUDIO_DISABLE, "off" },
{ AMDGPU_AUDIO_ENABLE, "on" },
{ AMDGPU_AUDIO_AUTO, "auto" },
};
/* XXX support different dither options? spatial, temporal, both, etc. */
-static const struct drm_prop_enum_list amdgpu_dither_enum_list[] =
-{ { AMDGPU_FMT_DITHER_DISABLE, "off" },
+static const struct drm_prop_enum_list amdgpu_dither_enum_list[] = {
+ { AMDGPU_FMT_DITHER_DISABLE, "off" },
{ AMDGPU_FMT_DITHER_ENABLE, "on" },
};
+/**
+ * DOC: property for adaptive backlight modulation
+ *
+ * The 'adaptive backlight modulation' property is used for the compositor to
+ * directly control the adaptive backlight modulation power savings feature
+ * that is part of DCN hardware.
+ *
+ * The property will be attached specifically to eDP panels that support it.
+ *
+ * The property is by default set to 'sysfs' to allow the sysfs file 'panel_power_savings'
+ * to be able to control it.
+ * If set to 'off' the compositor will ensure it stays off.
+ * The other values 'min', 'bias min', 'bias max', and 'max' will control the
+ * intensity of the power savings.
+ *
+ * Modifying this value can have implications on color accuracy, so tread
+ * carefully.
+ */
+static int amdgpu_display_setup_abm_prop(struct amdgpu_device *adev)
+{
+ const struct drm_prop_enum_list props[] = {
+ { ABM_SYSFS_CONTROL, "sysfs" },
+ { ABM_LEVEL_OFF, "off" },
+ { ABM_LEVEL_MIN, "min" },
+ { ABM_LEVEL_BIAS_MIN, "bias min" },
+ { ABM_LEVEL_BIAS_MAX, "bias max" },
+ { ABM_LEVEL_MAX, "max" },
+ };
+ struct drm_property *prop;
+ int i;
+
+ if (!adev->dc_enabled)
+ return 0;
+
+ prop = drm_property_create(adev_to_drm(adev), DRM_MODE_PROP_ENUM,
+ "adaptive backlight modulation",
+ 6);
+ if (!prop)
+ return -ENOMEM;
+
+ for (i = 0; i < ARRAY_SIZE(props); i++) {
+ int ret;
+
+ ret = drm_property_add_enum(prop, props[i].type,
+ props[i].name);
+
+ if (ret) {
+ drm_property_destroy(adev_to_drm(adev), prop);
+
+ return ret;
+ }
+ }
+
+ adev->mode_info.abm_level_property = prop;
+
+ return 0;
+}
+
int amdgpu_display_modeset_create_props(struct amdgpu_device *adev)
{
int sz;
@@ -1032,15 +1467,7 @@ int amdgpu_display_modeset_create_props(struct amdgpu_device *adev)
"dither",
amdgpu_dither_enum_list, sz);
- if (amdgpu_device_has_dc_support(adev)) {
- adev->mode_info.abm_level_property =
- drm_property_create_range(adev_to_drm(adev), 0,
- "abm level", 0, 4);
- if (!adev->mode_info.abm_level_property)
- return -ENOMEM;
- }
-
- return 0;
+ return amdgpu_display_setup_abm_prop(adev);
}
void amdgpu_display_update_priority(struct amdgpu_device *adev)
@@ -1107,7 +1534,7 @@ bool amdgpu_display_crtc_scaling_mode_fixup(struct drm_crtc *crtc,
if ((!(mode->flags & DRM_MODE_FLAG_INTERLACE)) &&
((amdgpu_encoder->underscan_type == UNDERSCAN_ON) ||
((amdgpu_encoder->underscan_type == UNDERSCAN_AUTO) &&
- drm_detect_hdmi_monitor(amdgpu_connector_edid(connector)) &&
+ connector && connector->display_info.is_hdmi &&
amdgpu_display_is_hdtv_mode(mode)))) {
if (amdgpu_encoder->underscan_hborder != 0)
amdgpu_crtc->h_border = amdgpu_encoder->underscan_hborder;
@@ -1126,6 +1553,7 @@ bool amdgpu_display_crtc_scaling_mode_fixup(struct drm_crtc *crtc,
}
if (amdgpu_crtc->rmx_type != RMX_OFF) {
fixed20_12 a, b;
+
a.full = dfixed_const(src_v);
b.full = dfixed_const(dst_v);
amdgpu_crtc->vsc.full = dfixed_div(a, b);
@@ -1145,7 +1573,7 @@ bool amdgpu_display_crtc_scaling_mode_fixup(struct drm_crtc *crtc,
*
* \param dev Device to query.
* \param pipe Crtc to query.
- * \param flags Flags from caller (DRM_CALLED_FROM_VBLIRQ or 0).
+ * \param flags from caller (DRM_CALLED_FROM_VBLIRQ or 0).
* For driver internal use only also supports these flags:
*
* USE_REAL_VBLANKSTART to use the real start of vblank instead
@@ -1212,8 +1640,7 @@ int amdgpu_display_get_crtc_scanoutpos(struct drm_device *dev,
ret |= DRM_SCANOUTPOS_ACCURATE;
vbl_start = vbl & 0x1fff;
vbl_end = (vbl >> 16) & 0x1fff;
- }
- else {
+ } else {
/* No: Fake something reasonable which gives at least ok results. */
vbl_start = mode->crtc_vdisplay;
vbl_end = 0;
@@ -1221,8 +1648,8 @@ int amdgpu_display_get_crtc_scanoutpos(struct drm_device *dev,
/* Called from driver internal vblank counter query code? */
if (flags & GET_DISTANCE_TO_VBLANKSTART) {
- /* Caller wants distance from real vbl_start in *hpos */
- *hpos = *vpos - vbl_start;
+ /* Caller wants distance from real vbl_start in *hpos */
+ *hpos = *vpos - vbl_start;
}
/* Fudge vblank to start a few scanlines earlier to handle the
@@ -1244,7 +1671,7 @@ int amdgpu_display_get_crtc_scanoutpos(struct drm_device *dev,
/* In vblank? */
if (in_vbl)
- ret |= DRM_SCANOUTPOS_IN_VBLANK;
+ ret |= DRM_SCANOUTPOS_IN_VBLANK;
/* Called from driver internal vblank counter query code? */
if (flags & GET_DISTANCE_TO_VBLANKSTART) {
@@ -1310,3 +1737,192 @@ bool amdgpu_crtc_get_scanout_position(struct drm_crtc *crtc,
return amdgpu_display_get_crtc_scanoutpos(dev, pipe, 0, vpos, hpos,
stime, etime, mode);
}
+
+static bool
+amdgpu_display_robj_is_fb(struct amdgpu_device *adev, struct amdgpu_bo *robj)
+{
+ struct drm_device *dev = adev_to_drm(adev);
+ struct drm_fb_helper *fb_helper = dev->fb_helper;
+
+ if (!fb_helper || !fb_helper->buffer)
+ return false;
+
+ if (gem_to_amdgpu_bo(fb_helper->buffer->gem) != robj)
+ return false;
+
+ return true;
+}
+
+int amdgpu_display_suspend_helper(struct amdgpu_device *adev)
+{
+ struct drm_device *dev = adev_to_drm(adev);
+ struct drm_crtc *crtc;
+ struct drm_connector *connector;
+ struct drm_connector_list_iter iter;
+ int r;
+
+ drm_kms_helper_poll_disable(dev);
+
+ /* turn off display hw */
+ drm_modeset_lock_all(dev);
+ drm_connector_list_iter_begin(dev, &iter);
+ drm_for_each_connector_iter(connector, &iter)
+ drm_helper_connector_dpms(connector,
+ DRM_MODE_DPMS_OFF);
+ drm_connector_list_iter_end(&iter);
+ drm_modeset_unlock_all(dev);
+ /* unpin the front buffers and cursors */
+ list_for_each_entry(crtc, &dev->mode_config.crtc_list, head) {
+ struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc);
+ struct drm_framebuffer *fb = crtc->primary->fb;
+ struct amdgpu_bo *robj;
+
+ if (amdgpu_crtc->cursor_bo && !adev->enable_virtual_display) {
+ struct amdgpu_bo *aobj = gem_to_amdgpu_bo(amdgpu_crtc->cursor_bo);
+
+ r = amdgpu_bo_reserve(aobj, true);
+ if (r == 0) {
+ amdgpu_bo_unpin(aobj);
+ amdgpu_bo_unreserve(aobj);
+ }
+ }
+
+ if (!fb || !fb->obj[0])
+ continue;
+
+ robj = gem_to_amdgpu_bo(fb->obj[0]);
+ if (!amdgpu_display_robj_is_fb(adev, robj)) {
+ r = amdgpu_bo_reserve(robj, true);
+ if (r == 0) {
+ amdgpu_bo_unpin(robj);
+ amdgpu_bo_unreserve(robj);
+ }
+ }
+ }
+ return 0;
+}
+
+int amdgpu_display_resume_helper(struct amdgpu_device *adev)
+{
+ struct drm_device *dev = adev_to_drm(adev);
+ struct drm_connector *connector;
+ struct drm_connector_list_iter iter;
+ struct drm_crtc *crtc;
+ int r;
+
+ /* pin cursors */
+ list_for_each_entry(crtc, &dev->mode_config.crtc_list, head) {
+ struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc);
+
+ if (amdgpu_crtc->cursor_bo && !adev->enable_virtual_display) {
+ struct amdgpu_bo *aobj = gem_to_amdgpu_bo(amdgpu_crtc->cursor_bo);
+
+ r = amdgpu_bo_reserve(aobj, true);
+ if (r == 0) {
+ aobj->flags |= AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS;
+ r = amdgpu_bo_pin(aobj, AMDGPU_GEM_DOMAIN_VRAM);
+ if (r != 0)
+ dev_err(adev->dev, "Failed to pin cursor BO (%d)\n", r);
+ amdgpu_crtc->cursor_addr = amdgpu_bo_gpu_offset(aobj);
+ amdgpu_bo_unreserve(aobj);
+ }
+ }
+ }
+
+ drm_helper_resume_force_mode(dev);
+
+ /* turn on display hw */
+ drm_modeset_lock_all(dev);
+
+ drm_connector_list_iter_begin(dev, &iter);
+ drm_for_each_connector_iter(connector, &iter)
+ drm_helper_connector_dpms(connector,
+ DRM_MODE_DPMS_ON);
+ drm_connector_list_iter_end(&iter);
+
+ drm_modeset_unlock_all(dev);
+
+ drm_kms_helper_poll_enable(dev);
+
+ return 0;
+}
+
+/* panic_bo is set in amdgpu_dm_plane_get_scanout_buffer() and only used in amdgpu_dm_set_pixel()
+ * they are called from the panic handler, and protected by the drm_panic spinlock.
+ */
+static struct amdgpu_bo *panic_abo;
+
+/* Use the indirect MMIO to write each pixel to the GPU VRAM,
+ * This is a simplified version of amdgpu_device_mm_access()
+ */
+static void amdgpu_display_set_pixel(struct drm_scanout_buffer *sb,
+ unsigned int x,
+ unsigned int y,
+ u32 color)
+{
+ struct amdgpu_res_cursor cursor;
+ unsigned long offset;
+ struct amdgpu_bo *abo = panic_abo;
+ struct amdgpu_device *adev = amdgpu_ttm_adev(abo->tbo.bdev);
+ uint32_t tmp;
+
+ offset = x * 4 + y * sb->pitch[0];
+ amdgpu_res_first(abo->tbo.resource, offset, 4, &cursor);
+
+ tmp = cursor.start >> 31;
+ WREG32_NO_KIQ(mmMM_INDEX, ((uint32_t) cursor.start) | 0x80000000);
+ if (tmp != 0xffffffff)
+ WREG32_NO_KIQ(mmMM_INDEX_HI, tmp);
+ WREG32_NO_KIQ(mmMM_DATA, color);
+}
+
+int amdgpu_display_get_scanout_buffer(struct drm_plane *plane,
+ struct drm_scanout_buffer *sb)
+{
+ struct amdgpu_bo *abo;
+ struct drm_framebuffer *fb = plane->state->fb;
+
+ if (!fb)
+ return -EINVAL;
+
+ DRM_DEBUG_KMS("Framebuffer %dx%d %p4cc\n", fb->width, fb->height, &fb->format->format);
+
+ abo = gem_to_amdgpu_bo(fb->obj[0]);
+ if (!abo)
+ return -EINVAL;
+
+ sb->width = fb->width;
+ sb->height = fb->height;
+ /* Use the generic linear format, because tiling will be disabled in panic_flush() */
+ sb->format = drm_format_info(fb->format->format);
+ if (!sb->format)
+ return -EINVAL;
+
+ sb->pitch[0] = fb->pitches[0];
+
+ if (abo->flags & AMDGPU_GEM_CREATE_NO_CPU_ACCESS) {
+ if (abo->tbo.resource->mem_type != TTM_PL_VRAM) {
+ drm_warn(plane->dev, "amdgpu panic, framebuffer not in VRAM\n");
+ return -EINVAL;
+ }
+ /* Only handle 32bits format, to simplify mmio access */
+ if (fb->format->cpp[0] != 4) {
+ drm_warn(plane->dev, "amdgpu panic, pixel format is not 32bits\n");
+ return -EINVAL;
+ }
+ sb->set_pixel = amdgpu_display_set_pixel;
+ panic_abo = abo;
+ return 0;
+ }
+ if (!abo->kmap.virtual &&
+ ttm_bo_kmap(&abo->tbo, 0, PFN_UP(abo->tbo.base.size), &abo->kmap)) {
+ drm_warn(plane->dev, "amdgpu bo map failed, panic won't be displayed\n");
+ return -ENOMEM;
+ }
+ if (abo->kmap.bo_kmap_type & TTM_BO_MAP_IOMEM_MASK)
+ iosys_map_set_vaddr_iomem(&sb->map[0], abo->kmap.virtual);
+ else
+ iosys_map_set_vaddr(&sb->map[0], abo->kmap.virtual);
+
+ return 0;
+}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_display.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_display.h
index dc7b7d116549..49a29bf47a37 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_display.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_display.h
@@ -23,6 +23,8 @@
#ifndef __AMDGPU_DISPLAY_H__
#define __AMDGPU_DISPLAY_H__
+#include <drm/drm_panic.h>
+
#define amdgpu_display_vblank_get_counter(adev, crtc) (adev)->mode_info.funcs->vblank_get_counter((adev), (crtc))
#define amdgpu_display_backlight_set_level(adev, e, l) (adev)->mode_info.funcs->backlight_set_level((e), (l))
#define amdgpu_display_backlight_get_level(adev, e) (adev)->mode_info.funcs->backlight_get_level((e))
@@ -35,16 +37,29 @@
#define amdgpu_display_add_encoder(adev, e, s, c) (adev)->mode_info.funcs->add_encoder((adev), (e), (s), (c))
#define amdgpu_display_add_connector(adev, ci, sd, ct, ib, coi, h, r) (adev)->mode_info.funcs->add_connector((adev), (ci), (sd), (ct), (ib), (coi), (h), (r))
-int amdgpu_display_freesync_ioctl(struct drm_device *dev, void *data,
- struct drm_file *filp);
+void amdgpu_display_hotplug_work_func(struct work_struct *work);
void amdgpu_display_update_priority(struct amdgpu_device *adev);
uint32_t amdgpu_display_supported_domains(struct amdgpu_device *adev,
uint64_t bo_flags);
struct drm_framebuffer *
amdgpu_display_user_framebuffer_create(struct drm_device *dev,
struct drm_file *file_priv,
+ const struct drm_format_info *info,
const struct drm_mode_fb_cmd2 *mode_cmd);
const struct drm_format_info *
amdgpu_lookup_format_info(u32 format, uint64_t modifier);
+int amdgpu_display_suspend_helper(struct amdgpu_device *adev);
+int amdgpu_display_resume_helper(struct amdgpu_device *adev);
+
+int amdgpu_display_get_scanout_buffer(struct drm_plane *plane,
+ struct drm_scanout_buffer *sb);
+
+#define ABM_SYSFS_CONTROL -1
+#define ABM_LEVEL_OFF 0
+#define ABM_LEVEL_MIN 1
+#define ABM_LEVEL_BIAS_MIN 2
+#define ABM_LEVEL_BIAS_MAX 3
+#define ABM_LEVEL_MAX 4
+
#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c
index 47e0b48dc26f..e22cfa7c6d32 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c
@@ -36,98 +36,34 @@
#include "amdgpu_gem.h"
#include "amdgpu_dma_buf.h"
#include "amdgpu_xgmi.h"
+#include "amdgpu_vm.h"
#include <drm/amdgpu_drm.h>
+#include <drm/ttm/ttm_tt.h>
#include <linux/dma-buf.h>
#include <linux/dma-fence-array.h>
#include <linux/pci-p2pdma.h>
-#include <linux/pm_runtime.h>
+
+static const struct dma_buf_attach_ops amdgpu_dma_buf_attach_ops;
/**
- * amdgpu_gem_prime_mmap - &drm_driver.gem_prime_mmap implementation
- * @obj: GEM BO
- * @vma: Virtual memory area
+ * dma_buf_attach_adev - Helper to get adev of an attachment
*
- * Sets up a userspace mapping of the BO's memory in the given
- * virtual memory area.
+ * @attach: attachment
*
* Returns:
- * 0 on success or a negative error code on failure.
+ * A struct amdgpu_device * if the attaching device is an amdgpu device or
+ * partition, NULL otherwise.
*/
-int amdgpu_gem_prime_mmap(struct drm_gem_object *obj,
- struct vm_area_struct *vma)
-{
- struct amdgpu_bo *bo = gem_to_amdgpu_bo(obj);
- struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev);
- unsigned asize = amdgpu_bo_size(bo);
- int ret;
-
- if (!vma->vm_file)
- return -ENODEV;
-
- if (adev == NULL)
- return -ENODEV;
-
- /* Check for valid size. */
- if (asize < vma->vm_end - vma->vm_start)
- return -EINVAL;
-
- if (amdgpu_ttm_tt_get_usermm(bo->tbo.ttm) ||
- (bo->flags & AMDGPU_GEM_CREATE_NO_CPU_ACCESS)) {
- return -EPERM;
- }
- vma->vm_pgoff += amdgpu_bo_mmap_offset(bo) >> PAGE_SHIFT;
-
- /* prime mmap does not need to check access, so allow here */
- ret = drm_vma_node_allow(&obj->vma_node, vma->vm_file->private_data);
- if (ret)
- return ret;
-
- ret = ttm_bo_mmap(vma->vm_file, vma, &adev->mman.bdev);
- drm_vma_node_revoke(&obj->vma_node, vma->vm_file->private_data);
-
- return ret;
-}
-
-static int
-__dma_resv_make_exclusive(struct dma_resv *obj)
+static struct amdgpu_device *dma_buf_attach_adev(struct dma_buf_attachment *attach)
{
- struct dma_fence **fences;
- unsigned int count;
- int r;
-
- if (!dma_resv_get_list(obj)) /* no shared fences to convert */
- return 0;
-
- r = dma_resv_get_fences_rcu(obj, NULL, &count, &fences);
- if (r)
- return r;
-
- if (count == 0) {
- /* Now that was unexpected. */
- } else if (count == 1) {
- dma_resv_add_excl_fence(obj, fences[0]);
- dma_fence_put(fences[0]);
- kfree(fences);
- } else {
- struct dma_fence_array *array;
-
- array = dma_fence_array_create(count, fences,
- dma_fence_context_alloc(1), 0,
- false);
- if (!array)
- goto err_fences_put;
+ if (attach->importer_ops == &amdgpu_dma_buf_attach_ops) {
+ struct drm_gem_object *obj = attach->importer_priv;
+ struct amdgpu_bo *bo = gem_to_amdgpu_bo(obj);
- dma_resv_add_excl_fence(obj, &array->base);
- dma_fence_put(&array->base);
+ return amdgpu_ttm_adev(bo->tbo.bdev);
}
- return 0;
-
-err_fences_put:
- while (count--)
- dma_fence_put(fences[count]);
- kfree(fences);
- return -ENOMEM;
+ return NULL;
}
/**
@@ -141,66 +77,49 @@ err_fences_put:
static int amdgpu_dma_buf_attach(struct dma_buf *dmabuf,
struct dma_buf_attachment *attach)
{
+ struct amdgpu_device *attach_adev = dma_buf_attach_adev(attach);
struct drm_gem_object *obj = dmabuf->priv;
struct amdgpu_bo *bo = gem_to_amdgpu_bo(obj);
struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev);
int r;
- if (pci_p2pdma_distance_many(adev->pdev, &attach->dev, 1, true) < 0)
+ /*
+ * Disable peer-to-peer access for DCC-enabled VRAM surfaces on GFX12+.
+ * Such buffers cannot be safely accessed over P2P due to device-local
+ * compression metadata. Fallback to system-memory path instead.
+ * Device supports GFX12 (GC 12.x or newer)
+ * BO was created with the AMDGPU_GEM_CREATE_GFX12_DCC flag
+ *
+ */
+ if (amdgpu_ip_version(adev, GC_HWIP, 0) >= IP_VERSION(12, 0, 0) &&
+ bo->flags & AMDGPU_GEM_CREATE_GFX12_DCC)
attach->peer2peer = false;
- if (attach->dev->driver == adev->dev->driver)
- return 0;
-
- r = pm_runtime_get_sync(adev_to_drm(adev)->dev);
- if (r < 0)
- goto out;
-
- r = amdgpu_bo_reserve(bo, false);
- if (unlikely(r != 0))
- goto out;
-
/*
- * We only create shared fences for internal use, but importers
- * of the dmabuf rely on exclusive fences for implicitly
- * tracking write hazards. As any of the current fences may
- * correspond to a write, we need to convert all existing
- * fences on the reservation object into a single exclusive
- * fence.
+ * Disable peer-to-peer access for DCC-enabled VRAM surfaces on GFX12+.
+ * Such buffers cannot be safely accessed over P2P due to device-local
+ * compression metadata. Fallback to system-memory path instead.
+ * Device supports GFX12 (GC 12.x or newer)
+ * BO was created with the AMDGPU_GEM_CREATE_GFX12_DCC flag
+ *
*/
- r = __dma_resv_make_exclusive(bo->tbo.base.resv);
- if (r)
- goto out;
+ if (amdgpu_ip_version(adev, GC_HWIP, 0) >= IP_VERSION(12, 0, 0) &&
+ bo->flags & AMDGPU_GEM_CREATE_GFX12_DCC)
+ attach->peer2peer = false;
- bo->prime_shared_count++;
- amdgpu_bo_unreserve(bo);
- return 0;
+ if (!amdgpu_dmabuf_is_xgmi_accessible(attach_adev, bo) &&
+ pci_p2pdma_distance(adev->pdev, attach->dev, false) < 0)
+ attach->peer2peer = false;
-out:
- pm_runtime_put_autosuspend(adev_to_drm(adev)->dev);
- return r;
-}
+ r = dma_resv_lock(bo->tbo.base.resv, NULL);
+ if (r)
+ return r;
-/**
- * amdgpu_dma_buf_detach - &dma_buf_ops.detach implementation
- *
- * @dmabuf: DMA-buf where we remove the attachment from
- * @attach: the attachment to remove
- *
- * Called when an attachment is removed from the DMA-buf.
- */
-static void amdgpu_dma_buf_detach(struct dma_buf *dmabuf,
- struct dma_buf_attachment *attach)
-{
- struct drm_gem_object *obj = dmabuf->priv;
- struct amdgpu_bo *bo = gem_to_amdgpu_bo(obj);
- struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev);
+ amdgpu_vm_bo_update_shared(bo);
- if (attach->dev->driver != adev->dev->driver && bo->prime_shared_count)
- bo->prime_shared_count--;
+ dma_resv_unlock(bo->tbo.base.resv);
- pm_runtime_mark_last_busy(adev_to_drm(adev)->dev);
- pm_runtime_put_autosuspend(adev_to_drm(adev)->dev);
+ return 0;
}
/**
@@ -212,11 +131,35 @@ static void amdgpu_dma_buf_detach(struct dma_buf *dmabuf,
*/
static int amdgpu_dma_buf_pin(struct dma_buf_attachment *attach)
{
- struct drm_gem_object *obj = attach->dmabuf->priv;
- struct amdgpu_bo *bo = gem_to_amdgpu_bo(obj);
+ struct dma_buf *dmabuf = attach->dmabuf;
+ struct amdgpu_bo *bo = gem_to_amdgpu_bo(dmabuf->priv);
+ u32 domains = bo->allowed_domains;
+
+ dma_resv_assert_held(dmabuf->resv);
+
+ /* Try pinning into VRAM to allow P2P with RDMA NICs without ODP
+ * support if all attachments can do P2P. If any attachment can't do
+ * P2P just pin into GTT instead.
+ *
+ * To avoid with conflicting pinnings between GPUs and RDMA when move
+ * notifiers are disabled, only allow pinning in VRAM when move
+ * notiers are enabled.
+ */
+ if (!IS_ENABLED(CONFIG_DMABUF_MOVE_NOTIFY)) {
+ domains &= ~AMDGPU_GEM_DOMAIN_VRAM;
+ } else {
+ list_for_each_entry(attach, &dmabuf->attachments, node)
+ if (!attach->peer2peer)
+ domains &= ~AMDGPU_GEM_DOMAIN_VRAM;
+ }
+
+ if (domains & AMDGPU_GEM_DOMAIN_VRAM)
+ bo->flags |= AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED;
+
+ if (WARN_ON(!domains))
+ return -EINVAL;
- /* pin buffer into GTT */
- return amdgpu_bo_pin(bo, AMDGPU_GEM_DOMAIN_GTT);
+ return amdgpu_bo_pin(bo, domains);
}
/**
@@ -260,7 +203,7 @@ static struct sg_table *amdgpu_dma_buf_map(struct dma_buf_attachment *attach,
if (!bo->tbo.pin_count) {
/* move buffer into GTT or VRAM */
struct ttm_operation_ctx ctx = { false, false };
- unsigned domains = AMDGPU_GEM_DOMAIN_GTT;
+ unsigned int domains = AMDGPU_GEM_DOMAIN_GTT;
if (bo->preferred_domains & AMDGPU_GEM_DOMAIN_VRAM &&
attach->peer2peer) {
@@ -271,13 +214,9 @@ static struct sg_table *amdgpu_dma_buf_map(struct dma_buf_attachment *attach,
r = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx);
if (r)
return ERR_PTR(r);
-
- } else if (!(amdgpu_mem_type_to_domain(bo->tbo.mem.mem_type) &
- AMDGPU_GEM_DOMAIN_GTT)) {
- return ERR_PTR(-EBUSY);
}
- switch (bo->tbo.mem.mem_type) {
+ switch (bo->tbo.resource->mem_type) {
case TTM_PL_TT:
sgt = drm_prime_pages_to_sg(obj->dev,
bo->tbo.ttm->pages,
@@ -291,7 +230,13 @@ static struct sg_table *amdgpu_dma_buf_map(struct dma_buf_attachment *attach,
break;
case TTM_PL_VRAM:
- r = amdgpu_vram_mgr_alloc_sgt(adev, &bo->tbo.mem, attach->dev,
+ /* XGMI-accessible memory should never be DMA-mapped */
+ if (WARN_ON(amdgpu_dmabuf_is_xgmi_accessible(
+ dma_buf_attach_adev(attach), bo)))
+ return ERR_PTR(-EINVAL);
+
+ r = amdgpu_vram_mgr_alloc_sgt(adev, bo->tbo.resource, 0,
+ bo->tbo.base.size, attach->dev,
dir, &sgt);
if (r)
return ERR_PTR(r);
@@ -321,17 +266,12 @@ static void amdgpu_dma_buf_unmap(struct dma_buf_attachment *attach,
struct sg_table *sgt,
enum dma_data_direction dir)
{
- struct dma_buf *dma_buf = attach->dmabuf;
- struct drm_gem_object *obj = dma_buf->priv;
- struct amdgpu_bo *bo = gem_to_amdgpu_bo(obj);
- struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev);
-
- if (sgt->sgl->page_link) {
+ if (sg_page(sgt->sgl)) {
dma_unmap_sgtable(attach->dev, sgt, dir, 0);
sg_free_table(sgt);
kfree(sgt);
} else {
- amdgpu_vram_mgr_free_sgt(adev, attach->dev, dir, sgt);
+ amdgpu_vram_mgr_free_sgt(attach->dev, dir, sgt);
}
}
@@ -376,9 +316,38 @@ static int amdgpu_dma_buf_begin_cpu_access(struct dma_buf *dma_buf,
return ret;
}
+static int amdgpu_dma_buf_vmap(struct dma_buf *dma_buf, struct iosys_map *map)
+{
+ struct drm_gem_object *obj = dma_buf->priv;
+ struct amdgpu_bo *bo = gem_to_amdgpu_bo(obj);
+ int ret;
+
+ /*
+ * Pin to keep buffer in place while it's vmap'ed. The actual
+ * domain is not that important as long as it's mapable. Using
+ * GTT and VRAM should be compatible with most use cases.
+ */
+ ret = amdgpu_bo_pin(bo, AMDGPU_GEM_DOMAIN_GTT | AMDGPU_GEM_DOMAIN_VRAM);
+ if (ret)
+ return ret;
+ ret = drm_gem_dmabuf_vmap(dma_buf, map);
+ if (ret)
+ amdgpu_bo_unpin(bo);
+
+ return ret;
+}
+
+static void amdgpu_dma_buf_vunmap(struct dma_buf *dma_buf, struct iosys_map *map)
+{
+ struct drm_gem_object *obj = dma_buf->priv;
+ struct amdgpu_bo *bo = gem_to_amdgpu_bo(obj);
+
+ drm_gem_dmabuf_vunmap(dma_buf, map);
+ amdgpu_bo_unpin(bo);
+}
+
const struct dma_buf_ops amdgpu_dmabuf_ops = {
.attach = amdgpu_dma_buf_attach,
- .detach = amdgpu_dma_buf_detach,
.pin = amdgpu_dma_buf_pin,
.unpin = amdgpu_dma_buf_unpin,
.map_dma_buf = amdgpu_dma_buf_map,
@@ -386,8 +355,8 @@ const struct dma_buf_ops amdgpu_dmabuf_ops = {
.release = drm_gem_dmabuf_release,
.begin_cpu_access = amdgpu_dma_buf_begin_cpu_access,
.mmap = drm_gem_dmabuf_mmap,
- .vmap = drm_gem_dmabuf_vmap,
- .vunmap = drm_gem_dmabuf_vunmap,
+ .vmap = amdgpu_dma_buf_vmap,
+ .vunmap = amdgpu_dma_buf_vunmap,
};
/**
@@ -405,11 +374,23 @@ struct dma_buf *amdgpu_gem_prime_export(struct drm_gem_object *gobj,
{
struct amdgpu_bo *bo = gem_to_amdgpu_bo(gobj);
struct dma_buf *buf;
+ struct ttm_operation_ctx ctx = {
+ .interruptible = true,
+ .no_wait_gpu = true,
+ /* We opt to avoid OOM on system pages allocations */
+ .gfp_retry_mayfail = true,
+ .allow_res_evict = false,
+ };
+ int ret;
if (amdgpu_ttm_tt_get_usermm(bo->tbo.ttm) ||
bo->flags & AMDGPU_GEM_CREATE_VM_ALWAYS_VALID)
return ERR_PTR(-EPERM);
+ ret = ttm_bo_setup_export(&bo->tbo, &ctx);
+ if (ret)
+ return ERR_PTR(ret);
+
buf = drm_gem_prime_export(gobj, flags);
if (!IS_ERR(buf))
buf->ops = &amdgpu_dmabuf_ops;
@@ -434,30 +415,31 @@ amdgpu_dma_buf_create_obj(struct drm_device *dev, struct dma_buf *dma_buf)
{
struct dma_resv *resv = dma_buf->resv;
struct amdgpu_device *adev = drm_to_adev(dev);
- struct amdgpu_bo *bo;
- struct amdgpu_bo_param bp;
struct drm_gem_object *gobj;
+ struct amdgpu_bo *bo;
+ uint64_t flags = 0;
int ret;
- memset(&bp, 0, sizeof(bp));
- bp.size = dma_buf->size;
- bp.byte_align = PAGE_SIZE;
- bp.domain = AMDGPU_GEM_DOMAIN_CPU;
- bp.flags = 0;
- bp.type = ttm_bo_type_sg;
- bp.resv = resv;
dma_resv_lock(resv, NULL);
+
+ if (dma_buf->ops == &amdgpu_dmabuf_ops) {
+ struct amdgpu_bo *other = gem_to_amdgpu_bo(dma_buf->priv);
+
+ flags |= other->flags & (AMDGPU_GEM_CREATE_CPU_GTT_USWC |
+ AMDGPU_GEM_CREATE_COHERENT |
+ AMDGPU_GEM_CREATE_EXT_COHERENT |
+ AMDGPU_GEM_CREATE_UNCACHED);
+ }
+
ret = amdgpu_gem_object_create(adev, dma_buf->size, PAGE_SIZE,
- AMDGPU_GEM_DOMAIN_CPU,
- 0, ttm_bo_type_sg, resv, &gobj);
+ AMDGPU_GEM_DOMAIN_CPU, flags,
+ ttm_bo_type_sg, resv, &gobj, 0);
if (ret)
goto error;
bo = gem_to_amdgpu_bo(gobj);
bo->allowed_domains = AMDGPU_GEM_DOMAIN_GTT;
bo->preferred_domains = AMDGPU_GEM_DOMAIN_GTT;
- if (dma_buf->ops != &amdgpu_dmabuf_ops)
- bo->prime_shared_count = 1;
dma_resv_unlock(resv);
return gobj;
@@ -487,7 +469,11 @@ amdgpu_dma_buf_move_notify(struct dma_buf_attachment *attach)
struct amdgpu_vm_bo_base *bo_base;
int r;
- if (bo->tbo.mem.mem_type == TTM_PL_SYSTEM)
+ /* FIXME: This should be after the "if", but needs a fix to make sure
+ * DMABuf imports are initialized in the right VM list.
+ */
+ amdgpu_vm_bo_invalidate(bo, false);
+ if (!bo->tbo.resource || bo->tbo.resource->mem_type == TTM_PL_SYSTEM)
return;
r = ttm_bo_validate(&bo->tbo, &placement, &ctx);
@@ -498,7 +484,7 @@ amdgpu_dma_buf_move_notify(struct dma_buf_attachment *attach)
for (bo_base = bo->vm_bo; bo_base; bo_base = bo_base->next) {
struct amdgpu_vm *vm = bo_base->vm;
- struct dma_resv *resv = vm->root.base.bo->tbo.base.resv;
+ struct dma_resv *resv = vm->root.bo->tbo.base.resv;
if (ticket) {
/* When we get an error here it means that somebody
@@ -518,9 +504,12 @@ amdgpu_dma_buf_move_notify(struct dma_buf_attachment *attach)
continue;
}
- r = amdgpu_vm_clear_freed(adev, vm, NULL);
+ /* Reserve fences for two SDMA page table updates */
+ r = dma_resv_reserve_fences(resv, 2);
+ if (!r)
+ r = amdgpu_vm_clear_freed(adev, vm, NULL);
if (!r)
- r = amdgpu_vm_handle_moved(adev, vm);
+ r = amdgpu_vm_handle_moved(adev, vm, ticket);
if (r && r != -EBUSY)
DRM_ERROR("Failed to invalidate VM page tables (%d))\n",
@@ -594,7 +583,10 @@ bool amdgpu_dmabuf_is_xgmi_accessible(struct amdgpu_device *adev,
struct drm_gem_object *obj = &bo->tbo.base;
struct drm_gem_object *gobj;
- if (obj->import_attach) {
+ if (!adev)
+ return false;
+
+ if (drm_gem_is_imported(obj)) {
struct dma_buf *dma_buf = obj->import_attach->dmabuf;
if (dma_buf->ops != &amdgpu_dmabuf_ops)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.h
index 39b5b9616fd8..3e93b9b407a9 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.h
@@ -31,8 +31,6 @@ struct drm_gem_object *amdgpu_gem_prime_import(struct drm_device *dev,
struct dma_buf *dma_buf);
bool amdgpu_dmabuf_is_xgmi_accessible(struct amdgpu_device *adev,
struct amdgpu_bo *bo);
-int amdgpu_gem_prime_mmap(struct drm_gem_object *obj,
- struct vm_area_struct *vma);
extern const struct dma_buf_ops amdgpu_dmabuf_ops;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_doorbell.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_doorbell.h
index 89e6ad30396f..2675689ef70f 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_doorbell.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_doorbell.h
@@ -21,6 +21,9 @@
*
*/
+#ifndef AMDGPU_DOORBELL_H
+#define AMDGPU_DOORBELL_H
+
/*
* GPU doorbell structures, functions & helpers
*/
@@ -28,8 +31,15 @@ struct amdgpu_doorbell {
/* doorbell mmio */
resource_size_t base;
resource_size_t size;
- u32 __iomem *ptr;
- u32 num_doorbells; /* Number of doorbells actually reserved for amdgpu. */
+
+ /* Number of doorbells reserved for amdgpu kernel driver */
+ u32 num_kernel_doorbells;
+
+ /* Kernel doorbells */
+ struct amdgpu_bo *kernel_doorbells;
+
+ /* For CPU access of doorbells */
+ uint32_t *cpu_addr;
};
/* Reserved doorbells for amdgpu (including multimedia).
@@ -52,8 +62,11 @@ struct amdgpu_doorbell_index {
uint32_t userqueue_end;
uint32_t gfx_ring0;
uint32_t gfx_ring1;
- uint32_t sdma_engine[8];
- uint32_t mes_ring;
+ uint32_t gfx_userqueue_start;
+ uint32_t gfx_userqueue_end;
+ uint32_t sdma_engine[16];
+ uint32_t mes_ring0;
+ uint32_t mes_ring1;
uint32_t ih;
union {
struct {
@@ -73,15 +86,17 @@ struct amdgpu_doorbell_index {
uint32_t vce_ring6_7;
} uvd_vce;
};
+ uint32_t vpe_ring;
uint32_t first_non_cp;
uint32_t last_non_cp;
uint32_t max_assignment;
/* Per engine SDMA doorbell size in dword */
uint32_t sdma_doorbell_range;
+ /* Per xcc doorbell size for KIQ/KCQ */
+ uint32_t xcc_doorbell_range;
};
-typedef enum _AMDGPU_DOORBELL_ASSIGNMENT
-{
+enum AMDGPU_DOORBELL_ASSIGNMENT {
AMDGPU_DOORBELL_KIQ = 0x000,
AMDGPU_DOORBELL_HIQ = 0x001,
AMDGPU_DOORBELL_DIQ = 0x002,
@@ -99,10 +114,10 @@ typedef enum _AMDGPU_DOORBELL_ASSIGNMENT
AMDGPU_DOORBELL_IH = 0x1E8,
AMDGPU_DOORBELL_MAX_ASSIGNMENT = 0x3FF,
AMDGPU_DOORBELL_INVALID = 0xFFFF
-} AMDGPU_DOORBELL_ASSIGNMENT;
+};
+
+enum AMDGPU_VEGA20_DOORBELL_ASSIGNMENT {
-typedef enum _AMDGPU_VEGA20_DOORBELL_ASSIGNMENT
-{
/* Compute + GFX: 0~255 */
AMDGPU_VEGA20_DOORBELL_KIQ = 0x000,
AMDGPU_VEGA20_DOORBELL_HIQ = 0x001,
@@ -156,12 +171,20 @@ typedef enum _AMDGPU_VEGA20_DOORBELL_ASSIGNMENT
AMDGPU_VEGA20_DOORBELL64_FIRST_NON_CP = AMDGPU_VEGA20_DOORBELL_sDMA_ENGINE0,
AMDGPU_VEGA20_DOORBELL64_LAST_NON_CP = AMDGPU_VEGA20_DOORBELL64_VCE_RING6_7,
- AMDGPU_VEGA20_DOORBELL_MAX_ASSIGNMENT = 0x18F,
+ /* kiq/kcq from second XCD. Max 8 XCDs */
+ AMDGPU_VEGA20_DOORBELL_XCC1_KIQ_START = 0x190,
+ /* 8 compute rings per GC. Max to 0x1CE */
+ AMDGPU_VEGA20_DOORBELL_XCC1_MEC_RING0_START = 0x197,
+
+ /* AID1 SDMA: 0x1D0 ~ 0x1F7 */
+ AMDGPU_VEGA20_DOORBELL_AID1_sDMA_START = 0x1D0,
+
+ AMDGPU_VEGA20_DOORBELL_MAX_ASSIGNMENT = 0x1F7,
AMDGPU_VEGA20_DOORBELL_INVALID = 0xFFFF
-} AMDGPU_VEGA20_DOORBELL_ASSIGNMENT;
+};
+
+enum AMDGPU_NAVI10_DOORBELL_ASSIGNMENT {
-typedef enum _AMDGPU_NAVI10_DOORBELL_ASSIGNMENT
-{
/* Compute + GFX: 0~255 */
AMDGPU_NAVI10_DOORBELL_KIQ = 0x000,
AMDGPU_NAVI10_DOORBELL_HIQ = 0x001,
@@ -174,11 +197,15 @@ typedef enum _AMDGPU_NAVI10_DOORBELL_ASSIGNMENT
AMDGPU_NAVI10_DOORBELL_MEC_RING5 = 0x008,
AMDGPU_NAVI10_DOORBELL_MEC_RING6 = 0x009,
AMDGPU_NAVI10_DOORBELL_MEC_RING7 = 0x00A,
- AMDGPU_NAVI10_DOORBELL_USERQUEUE_START = 0x00B,
+ AMDGPU_NAVI10_DOORBELL_MES_RING0 = 0x00B,
+ AMDGPU_NAVI10_DOORBELL_MES_RING1 = 0x00C,
+ AMDGPU_NAVI10_DOORBELL_USERQUEUE_START = 0x00D,
AMDGPU_NAVI10_DOORBELL_USERQUEUE_END = 0x08A,
AMDGPU_NAVI10_DOORBELL_GFX_RING0 = 0x08B,
AMDGPU_NAVI10_DOORBELL_GFX_RING1 = 0x08C,
- AMDGPU_NAVI10_DOORBELL_MES_RING = 0x090,
+ AMDGPU_NAVI10_DOORBELL_GFX_USERQUEUE_START = 0x08D,
+ AMDGPU_NAVI10_DOORBELL_GFX_USERQUEUE_END = 0x0FF,
+
/* SDMA:256~335*/
AMDGPU_NAVI10_DOORBELL_sDMA_ENGINE0 = 0x100,
AMDGPU_NAVI10_DOORBELL_sDMA_ENGINE1 = 0x10A,
@@ -200,18 +227,19 @@ typedef enum _AMDGPU_NAVI10_DOORBELL_ASSIGNMENT
AMDGPU_NAVI10_DOORBELL64_VCNc_d = 0x18E,
AMDGPU_NAVI10_DOORBELL64_VCNe_f = 0x18F,
+ AMDGPU_NAVI10_DOORBELL64_VPE = 0x190,
+
AMDGPU_NAVI10_DOORBELL64_FIRST_NON_CP = AMDGPU_NAVI10_DOORBELL_sDMA_ENGINE0,
- AMDGPU_NAVI10_DOORBELL64_LAST_NON_CP = AMDGPU_NAVI10_DOORBELL64_VCNe_f,
+ AMDGPU_NAVI10_DOORBELL64_LAST_NON_CP = AMDGPU_NAVI10_DOORBELL64_VPE,
- AMDGPU_NAVI10_DOORBELL_MAX_ASSIGNMENT = 0x18F,
+ AMDGPU_NAVI10_DOORBELL_MAX_ASSIGNMENT = AMDGPU_NAVI10_DOORBELL64_VPE,
AMDGPU_NAVI10_DOORBELL_INVALID = 0xFFFF
-} AMDGPU_NAVI10_DOORBELL_ASSIGNMENT;
+};
/*
* 64bit doorbell, offset are in QWORD, occupy 2KB doorbell space
*/
-typedef enum _AMDGPU_DOORBELL64_ASSIGNMENT
-{
+enum AMDGPU_DOORBELL64_ASSIGNMENT {
/*
* All compute related doorbells: kiq, hiq, diq, traditional compute queue, user queue, should locate in
* a continues range so that programming CP_MEC_DOORBELL_RANGE_LOWER/UPPER can cover this range.
@@ -287,15 +315,58 @@ typedef enum _AMDGPU_DOORBELL64_ASSIGNMENT
AMDGPU_DOORBELL64_MAX_ASSIGNMENT = 0xFF,
AMDGPU_DOORBELL64_INVALID = 0xFFFF
-} AMDGPU_DOORBELL64_ASSIGNMENT;
+};
+
+enum AMDGPU_DOORBELL_ASSIGNMENT_LAYOUT1 {
+
+ /* XCC0: 0x00 ~20, XCC1: 20 ~ 2F ... */
+
+ /* KIQ/HIQ/DIQ */
+ AMDGPU_DOORBELL_LAYOUT1_KIQ_START = 0x000,
+ AMDGPU_DOORBELL_LAYOUT1_HIQ = 0x001,
+ AMDGPU_DOORBELL_LAYOUT1_DIQ = 0x002,
+ /* Compute: 0x08 ~ 0x20 */
+ AMDGPU_DOORBELL_LAYOUT1_MEC_RING_START = 0x008,
+ AMDGPU_DOORBELL_LAYOUT1_MEC_RING_END = 0x00F,
+ AMDGPU_DOORBELL_LAYOUT1_USERQUEUE_START = 0x010,
+ AMDGPU_DOORBELL_LAYOUT1_USERQUEUE_END = 0x01F,
+ AMDGPU_DOORBELL_LAYOUT1_XCC_RANGE = 0x020,
+
+ /* SDMA: 0x100 ~ 0x19F */
+ AMDGPU_DOORBELL_LAYOUT1_sDMA_ENGINE_START = 0x100,
+ AMDGPU_DOORBELL_LAYOUT1_sDMA_ENGINE_END = 0x19F,
+ /* IH: 0x1A0 ~ 0x1AF */
+ AMDGPU_DOORBELL_LAYOUT1_IH = 0x1A0,
+ /* VCN: 0x1B0 ~ 0x1E8 */
+ AMDGPU_DOORBELL_LAYOUT1_VCN_START = 0x1B0,
+ AMDGPU_DOORBELL_LAYOUT1_VCN_END = 0x1E8,
+
+ AMDGPU_DOORBELL_LAYOUT1_FIRST_NON_CP = AMDGPU_DOORBELL_LAYOUT1_sDMA_ENGINE_START,
+ AMDGPU_DOORBELL_LAYOUT1_LAST_NON_CP = AMDGPU_DOORBELL_LAYOUT1_VCN_END,
+
+ AMDGPU_DOORBELL_LAYOUT1_MAX_ASSIGNMENT = 0x1E8,
+ AMDGPU_DOORBELL_LAYOUT1_INVALID = 0xFFFF
+};
u32 amdgpu_mm_rdoorbell(struct amdgpu_device *adev, u32 index);
void amdgpu_mm_wdoorbell(struct amdgpu_device *adev, u32 index, u32 v);
u64 amdgpu_mm_rdoorbell64(struct amdgpu_device *adev, u32 index);
void amdgpu_mm_wdoorbell64(struct amdgpu_device *adev, u32 index, u64 v);
+/*
+ * GPU doorbell aperture helpers function.
+ */
+int amdgpu_doorbell_init(struct amdgpu_device *adev);
+void amdgpu_doorbell_fini(struct amdgpu_device *adev);
+int amdgpu_doorbell_create_kernel_doorbells(struct amdgpu_device *adev);
+uint32_t amdgpu_doorbell_index_on_bar(struct amdgpu_device *adev,
+ struct amdgpu_bo *db_bo,
+ uint32_t doorbell_index,
+ uint32_t db_size);
+
#define RDOORBELL32(index) amdgpu_mm_rdoorbell(adev, (index))
#define WDOORBELL32(index, v) amdgpu_mm_wdoorbell(adev, (index), (v))
#define RDOORBELL64(index) amdgpu_mm_rdoorbell64(adev, (index))
#define WDOORBELL64(index, v) amdgpu_mm_wdoorbell64(adev, (index), (v))
+#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_doorbell_mgr.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_doorbell_mgr.c
new file mode 100644
index 000000000000..3040437d99c2
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_doorbell_mgr.c
@@ -0,0 +1,244 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright 2022 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#include "amdgpu.h"
+
+/**
+ * amdgpu_mm_rdoorbell - read a doorbell dword
+ *
+ * @adev: amdgpu_device pointer
+ * @index: doorbell index
+ *
+ * Returns the value in the doorbell aperture at the
+ * requested doorbell index (CIK).
+ */
+u32 amdgpu_mm_rdoorbell(struct amdgpu_device *adev, u32 index)
+{
+ if (amdgpu_device_skip_hw_access(adev))
+ return 0;
+
+ if (index < adev->doorbell.num_kernel_doorbells)
+ return readl(adev->doorbell.cpu_addr + index);
+
+ dev_err(adev->dev, "reading beyond doorbell aperture: 0x%08x!\n",
+ index);
+ return 0;
+}
+
+/**
+ * amdgpu_mm_wdoorbell - write a doorbell dword
+ *
+ * @adev: amdgpu_device pointer
+ * @index: doorbell index
+ * @v: value to write
+ *
+ * Writes @v to the doorbell aperture at the
+ * requested doorbell index (CIK).
+ */
+void amdgpu_mm_wdoorbell(struct amdgpu_device *adev, u32 index, u32 v)
+{
+ if (amdgpu_device_skip_hw_access(adev))
+ return;
+
+ if (index < adev->doorbell.num_kernel_doorbells)
+ writel(v, adev->doorbell.cpu_addr + index);
+ else
+ dev_err(adev->dev,
+ "writing beyond doorbell aperture: 0x%08x!\n", index);
+}
+
+/**
+ * amdgpu_mm_rdoorbell64 - read a doorbell Qword
+ *
+ * @adev: amdgpu_device pointer
+ * @index: doorbell index
+ *
+ * Returns the value in the doorbell aperture at the
+ * requested doorbell index (VEGA10+).
+ */
+u64 amdgpu_mm_rdoorbell64(struct amdgpu_device *adev, u32 index)
+{
+ if (amdgpu_device_skip_hw_access(adev))
+ return 0;
+
+ if (index < adev->doorbell.num_kernel_doorbells)
+ return atomic64_read((atomic64_t *)(adev->doorbell.cpu_addr + index));
+
+ dev_err(adev->dev, "reading beyond doorbell aperture: 0x%08x!\n",
+ index);
+ return 0;
+}
+
+/**
+ * amdgpu_mm_wdoorbell64 - write a doorbell Qword
+ *
+ * @adev: amdgpu_device pointer
+ * @index: doorbell index
+ * @v: value to write
+ *
+ * Writes @v to the doorbell aperture at the
+ * requested doorbell index (VEGA10+).
+ */
+void amdgpu_mm_wdoorbell64(struct amdgpu_device *adev, u32 index, u64 v)
+{
+ if (amdgpu_device_skip_hw_access(adev))
+ return;
+
+ if (index < adev->doorbell.num_kernel_doorbells)
+ atomic64_set((atomic64_t *)(adev->doorbell.cpu_addr + index), v);
+ else
+ dev_err(adev->dev,
+ "writing beyond doorbell aperture: 0x%08x!\n", index);
+}
+
+/**
+ * amdgpu_doorbell_index_on_bar - Find doorbell's absolute offset in BAR
+ *
+ * @adev: amdgpu_device pointer
+ * @db_bo: doorbell object's bo
+ * @doorbell_index: doorbell relative index in this doorbell object
+ * @db_size: doorbell size is in byte
+ *
+ * returns doorbell's absolute index in BAR
+ */
+uint32_t amdgpu_doorbell_index_on_bar(struct amdgpu_device *adev,
+ struct amdgpu_bo *db_bo,
+ uint32_t doorbell_index,
+ uint32_t db_size)
+{
+ int db_bo_offset;
+
+ db_bo_offset = amdgpu_bo_gpu_offset_no_check(db_bo);
+
+ /* doorbell index is 32 bit but doorbell's size can be 32 bit
+ * or 64 bit, so *db_size(in byte)/4 for alignment.
+ */
+ return db_bo_offset / sizeof(u32) + doorbell_index *
+ DIV_ROUND_UP(db_size, 4);
+}
+
+/**
+ * amdgpu_doorbell_create_kernel_doorbells - Create kernel doorbells for graphics
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * Creates doorbells for graphics driver usages.
+ * returns 0 on success, error otherwise.
+ */
+int amdgpu_doorbell_create_kernel_doorbells(struct amdgpu_device *adev)
+{
+ int r;
+ int size;
+
+ /* SI HW does not have doorbells, skip allocation */
+ if (adev->doorbell.num_kernel_doorbells == 0)
+ return 0;
+
+ /* Reserve first num_kernel_doorbells (page-aligned) for kernel ops */
+ size = ALIGN(adev->doorbell.num_kernel_doorbells * sizeof(u32), PAGE_SIZE);
+
+ /* Allocate an extra page for MES kernel usages (ring test) */
+ adev->mes.db_start_dw_offset = size / sizeof(u32);
+ size += PAGE_SIZE;
+
+ r = amdgpu_bo_create_kernel(adev,
+ size,
+ PAGE_SIZE,
+ AMDGPU_GEM_DOMAIN_DOORBELL,
+ &adev->doorbell.kernel_doorbells,
+ NULL,
+ (void **)&adev->doorbell.cpu_addr);
+ if (r) {
+ dev_err(adev->dev,
+ "Failed to allocate kernel doorbells, err=%d\n", r);
+ return r;
+ }
+
+ adev->doorbell.num_kernel_doorbells = size / sizeof(u32);
+ return 0;
+}
+
+/*
+ * GPU doorbell aperture helpers function.
+ */
+/**
+ * amdgpu_doorbell_init - Init doorbell driver information.
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * Init doorbell driver information (CIK)
+ * Returns 0 on success, error on failure.
+ */
+int amdgpu_doorbell_init(struct amdgpu_device *adev)
+{
+
+ /* No doorbell on SI hardware generation */
+ if (adev->asic_type < CHIP_BONAIRE) {
+ adev->doorbell.base = 0;
+ adev->doorbell.size = 0;
+ adev->doorbell.num_kernel_doorbells = 0;
+ return 0;
+ }
+
+ if (pci_resource_flags(adev->pdev, 2) & IORESOURCE_UNSET)
+ return -EINVAL;
+
+ amdgpu_asic_init_doorbell_index(adev);
+
+ /* doorbell bar mapping */
+ adev->doorbell.base = pci_resource_start(adev->pdev, 2);
+ adev->doorbell.size = pci_resource_len(adev->pdev, 2);
+
+ adev->doorbell.num_kernel_doorbells =
+ min_t(u32, adev->doorbell.size / sizeof(u32),
+ adev->doorbell_index.max_assignment + 1);
+ if (adev->doorbell.num_kernel_doorbells == 0)
+ return -EINVAL;
+
+ /*
+ * For Vega, reserve and map two pages on doorbell BAR since SDMA
+ * paging queue doorbell use the second page. The
+ * AMDGPU_DOORBELL64_MAX_ASSIGNMENT definition assumes all the
+ * doorbells are in the first page. So with paging queue enabled,
+ * the max num_kernel_doorbells should + 1 page (0x400 in dword)
+ */
+ if (adev->asic_type >= CHIP_VEGA10)
+ adev->doorbell.num_kernel_doorbells += 0x400;
+
+ return 0;
+}
+
+/**
+ * amdgpu_doorbell_fini - Tear down doorbell driver information.
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * Tear down doorbell driver information (CIK)
+ */
+void amdgpu_doorbell_fini(struct amdgpu_device *adev)
+{
+ amdgpu_bo_free_kernel(&adev->doorbell.kernel_doorbells,
+ NULL,
+ (void **)&adev->doorbell.cpu_addr);
+}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
index 4575192d9b08..2dfbddcef9ab 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
@@ -23,28 +23,37 @@
*/
#include <drm/amdgpu_drm.h>
+#include <drm/clients/drm_client_setup.h>
#include <drm/drm_drv.h>
+#include <drm/drm_fbdev_ttm.h>
#include <drm/drm_gem.h>
-#include <drm/drm_vblank.h>
#include <drm/drm_managed.h>
-#include "amdgpu_drv.h"
-
#include <drm/drm_pciids.h>
-#include <linux/console.h>
+#include <drm/drm_probe_helper.h>
+#include <drm/drm_vblank.h>
+
+#include <linux/cc_platform.h>
+#include <linux/dynamic_debug.h>
#include <linux/module.h>
+#include <linux/mmu_notifier.h>
#include <linux/pm_runtime.h>
+#include <linux/suspend.h>
#include <linux/vga_switcheroo.h>
-#include <drm/drm_probe_helper.h>
-#include <linux/mmu_notifier.h>
#include "amdgpu.h"
-#include "amdgpu_irq.h"
-#include "amdgpu_dma_buf.h"
-#include "amdgpu_sched.h"
-
#include "amdgpu_amdkfd.h"
-
+#include "amdgpu_dma_buf.h"
+#include "amdgpu_drv.h"
+#include "amdgpu_fdinfo.h"
+#include "amdgpu_irq.h"
+#include "amdgpu_psp.h"
#include "amdgpu_ras.h"
+#include "amdgpu_reset.h"
+#include "amdgpu_sched.h"
+#include "amdgpu_xgmi.h"
+#include "amdgpu_userq.h"
+#include "amdgpu_userq_fence.h"
+#include "../amdxcp/amdgpu_xcp_drv.h"
/*
* KMS wrapper.
@@ -76,7 +85,7 @@
* - 3.24.0 - Add high priority compute support for gfx9
* - 3.25.0 - Add support for sensor query info (stable pstate sclk/mclk).
* - 3.26.0 - GFX9: Process AMDGPU_IB_FLAG_TC_WB_NOT_INVALIDATE.
- * - 3.27.0 - Add new chunk to to AMDGPU_CS to enable BO_LIST creation.
+ * - 3.27.0 - Add new chunk to AMDGPU_CS to enable BO_LIST creation.
* - 3.28.0 - Add AMDGPU_CHUNK_ID_SCHEDULED_DEPENDENCIES
* - 3.29.0 - Add AMDGPU_IB_FLAG_RESET_GDS_MAX_WAVE_ID
* - 3.30.0 - Add AMDGPU_SCHED_OP_CONTEXT_PRIORITY_OVERRIDE.
@@ -90,18 +99,60 @@
* - 3.38.0 - Add AMDGPU_IB_FLAG_EMIT_MEM_SYNC
* - 3.39.0 - DMABUF implicit sync does a full pipeline sync
* - 3.40.0 - Add AMDGPU_IDS_FLAGS_TMZ
+ * - 3.41.0 - Add video codec query
+ * - 3.42.0 - Add 16bpc fixed point display support
+ * - 3.43.0 - Add device hot plug/unplug support
+ * - 3.44.0 - DCN3 supports DCC independent block settings: !64B && 128B, 64B && 128B
+ * - 3.45.0 - Add context ioctl stable pstate interface
+ * - 3.46.0 - To enable hot plug amdgpu tests in libdrm
+ * - 3.47.0 - Add AMDGPU_GEM_CREATE_DISCARDABLE and AMDGPU_VM_NOALLOC flags
+ * - 3.48.0 - Add IP discovery version info to HW INFO
+ * - 3.49.0 - Add gang submit into CS IOCTL
+ * - 3.50.0 - Update AMDGPU_INFO_DEV_INFO IOCTL for minimum engine and memory clock
+ * Update AMDGPU_INFO_SENSOR IOCTL for PEAK_PSTATE engine and memory clock
+ * 3.51.0 - Return the PCIe gen and lanes from the INFO ioctl
+ * 3.52.0 - Add AMDGPU_IDS_FLAGS_CONFORMANT_TRUNC_COORD, add device_info fields:
+ * tcp_cache_size, num_sqc_per_wgp, sqc_data_cache_size, sqc_inst_cache_size,
+ * gl1c_cache_size, gl2c_cache_size, mall_size, enabled_rb_pipes_mask_hi
+ * 3.53.0 - Support for GFX11 CP GFX shadowing
+ * 3.54.0 - Add AMDGPU_CTX_QUERY2_FLAGS_RESET_IN_PROGRESS support
+ * - 3.55.0 - Add AMDGPU_INFO_GPUVM_FAULT query
+ * - 3.56.0 - Update IB start address and size alignment for decode and encode
+ * - 3.57.0 - Compute tunneling on GFX10+
+ * - 3.58.0 - Add GFX12 DCC support
+ * - 3.59.0 - Cleared VRAM
+ * - 3.60.0 - Add AMDGPU_TILING_GFX12_DCC_WRITE_COMPRESS_DISABLE (Vulkan requirement)
+ * - 3.61.0 - Contains fix for RV/PCO compute queues
+ * - 3.62.0 - Add AMDGPU_IDS_FLAGS_MODE_PF, AMDGPU_IDS_FLAGS_MODE_VF & AMDGPU_IDS_FLAGS_MODE_PT
+ * - 3.63.0 - GFX12 display DCC supports 256B max compressed block size
+ * - 3.64.0 - Userq IP support query
*/
#define KMS_DRIVER_MAJOR 3
-#define KMS_DRIVER_MINOR 40
+#define KMS_DRIVER_MINOR 64
#define KMS_DRIVER_PATCHLEVEL 0
-int amdgpu_vram_limit;
+/*
+ * amdgpu.debug module options. Are all disabled by default
+ */
+enum AMDGPU_DEBUG_MASK {
+ AMDGPU_DEBUG_VM = BIT(0),
+ AMDGPU_DEBUG_LARGEBAR = BIT(1),
+ AMDGPU_DEBUG_DISABLE_GPU_SOFT_RECOVERY = BIT(2),
+ AMDGPU_DEBUG_USE_VRAM_FW_BUF = BIT(3),
+ AMDGPU_DEBUG_ENABLE_RAS_ACA = BIT(4),
+ AMDGPU_DEBUG_ENABLE_EXP_RESETS = BIT(5),
+ AMDGPU_DEBUG_DISABLE_GPU_RING_RESET = BIT(6),
+ AMDGPU_DEBUG_SMU_POOL = BIT(7),
+ AMDGPU_DEBUG_VM_USERPTR = BIT(8),
+ AMDGPU_DEBUG_DISABLE_RAS_CE_LOG = BIT(9),
+ AMDGPU_DEBUG_ENABLE_CE_CS = BIT(10)
+};
+
+unsigned int amdgpu_vram_limit = UINT_MAX;
int amdgpu_vis_vram_limit;
int amdgpu_gart_size = -1; /* auto */
int amdgpu_gtt_size = -1; /* auto */
int amdgpu_moverate = -1; /* auto */
-int amdgpu_benchmarking;
-int amdgpu_testing;
int amdgpu_audio = -1;
int amdgpu_disp_priority;
int amdgpu_hw_i2c;
@@ -119,7 +170,6 @@ int amdgpu_vm_size = -1;
int amdgpu_vm_fragment_size = -1;
int amdgpu_vm_block_size = -1;
int amdgpu_vm_fault_stop;
-int amdgpu_vm_debug;
int amdgpu_vm_update_mode = -1;
int amdgpu_exp_hw_support;
int amdgpu_dc = -1;
@@ -127,11 +177,22 @@ int amdgpu_sched_jobs = 32;
int amdgpu_sched_hw_submission = 2;
uint amdgpu_pcie_gen_cap;
uint amdgpu_pcie_lane_cap;
-uint amdgpu_cg_mask = 0xffffffff;
+u64 amdgpu_cg_mask = 0xffffffffffffffff;
uint amdgpu_pg_mask = 0xffffffff;
uint amdgpu_sdma_phase_quantum = 32;
-char *amdgpu_disable_cu = NULL;
-char *amdgpu_virtual_display = NULL;
+char *amdgpu_disable_cu;
+char *amdgpu_virtual_display;
+int amdgpu_enforce_isolation = -1;
+int amdgpu_modeset = -1;
+
+/* Specifies the default granularity for SVM, used in buffer
+ * migration and restoration of backing memory when handling
+ * recoverable page faults.
+ *
+ * The value is given as log(numPages(buffer)); for a 2 MiB
+ * buffer it computes to be 9
+ */
+uint amdgpu_svm_default_granularity = 9;
/*
* OverDrive(bit 14) disabled by default
@@ -139,12 +200,12 @@ char *amdgpu_virtual_display = NULL;
*/
uint amdgpu_pp_feature_mask = 0xfff7bfff;
uint amdgpu_force_long_training;
-int amdgpu_job_hang_limit;
int amdgpu_lbpw = -1;
int amdgpu_compute_multipipe = -1;
int amdgpu_gpu_recovery = -1; /* auto */
int amdgpu_emu_mode;
uint amdgpu_smu_memory_pool_size;
+int amdgpu_smu_pptable_id = -1;
/*
* FBC (bit 0) disabled by default
* MULTI_MON_PP_MCLK_SWITCH (bit 1) enabled by default
@@ -153,18 +214,50 @@ uint amdgpu_smu_memory_pool_size;
* highest. That helps saving some idle power.
* DISABLE_FRACTIONAL_PWM (bit 2) disabled by default
* PSR (bit 3) disabled by default
+ * EDP NO POWER SEQUENCING (bit 4) disabled by default
*/
uint amdgpu_dc_feature_mask = 2;
uint amdgpu_dc_debug_mask;
+uint amdgpu_dc_visual_confirm;
int amdgpu_async_gfx_ring = 1;
-int amdgpu_mcbp;
+int amdgpu_mcbp = -1;
int amdgpu_discovery = -1;
int amdgpu_mes;
+int amdgpu_mes_log_enable = 0;
+int amdgpu_mes_kiq;
+int amdgpu_uni_mes = 1;
int amdgpu_noretry = -1;
int amdgpu_force_asic_type = -1;
-int amdgpu_tmz;
+int amdgpu_tmz = -1; /* auto */
+uint amdgpu_freesync_vid_mode;
int amdgpu_reset_method = -1; /* auto */
int amdgpu_num_kcq = -1;
+int amdgpu_smartshift_bias;
+int amdgpu_use_xgmi_p2p = 1;
+int amdgpu_vcnfw_log;
+int amdgpu_sg_display = -1; /* auto */
+int amdgpu_user_partt_mode = AMDGPU_AUTO_COMPUTE_PARTITION_MODE;
+int amdgpu_umsch_mm;
+int amdgpu_seamless = -1; /* auto */
+uint amdgpu_debug_mask;
+int amdgpu_agp = -1; /* auto */
+int amdgpu_wbrf = -1;
+int amdgpu_damage_clips = -1; /* auto */
+int amdgpu_umsch_mm_fwlog;
+int amdgpu_rebar = -1; /* auto */
+int amdgpu_user_queue = -1;
+
+DECLARE_DYNDBG_CLASSMAP(drm_debug_classes, DD_CLASS_TYPE_DISJOINT_BITS, 0,
+ "DRM_UT_CORE",
+ "DRM_UT_DRIVER",
+ "DRM_UT_KMS",
+ "DRM_UT_PRIME",
+ "DRM_UT_ATOMIC",
+ "DRM_UT_VBL",
+ "DRM_UT_STATE",
+ "DRM_UT_LEASE",
+ "DRM_UT_DP",
+ "DRM_UT_DRMRES");
struct amdgpu_mgpu_info mgpu_info = {
.mutex = __MUTEX_INITIALIZER(mgpu_info.mutex),
@@ -172,6 +265,10 @@ struct amdgpu_mgpu_info mgpu_info = {
int amdgpu_ras_enable = -1;
uint amdgpu_ras_mask = 0xffffffff;
int amdgpu_bad_page_threshold = -1;
+struct amdgpu_watchdog_timer amdgpu_watchdog_timer = {
+ .timeout_fatal_disable = false,
+ .period = 0x0, /* default to 0x0 (timeout disable) */
+};
/**
* DOC: vramlimit (int)
@@ -189,17 +286,19 @@ module_param_named(vis_vramlimit, amdgpu_vis_vram_limit, int, 0444);
/**
* DOC: gartsize (uint)
- * Restrict the size of GART in Mib (32, 64, etc.) for testing. The default is -1 (The size depends on asic).
+ * Restrict the size of GART (for kernel use) in Mib (32, 64, etc.) for testing.
+ * The default is -1 (The size depends on asic).
*/
-MODULE_PARM_DESC(gartsize, "Size of GART to setup in megabytes (32, 64, etc., -1=auto)");
+MODULE_PARM_DESC(gartsize, "Size of kernel GART to setup in megabytes (32, 64, etc., -1=auto)");
module_param_named(gartsize, amdgpu_gart_size, uint, 0600);
/**
* DOC: gttsize (int)
- * Restrict the size of GTT domain in MiB for testing. The default is -1 (It's VRAM size if 3GB < VRAM < 3/4 RAM,
- * otherwise 3/4 RAM size).
+ * Restrict the size of GTT domain (for userspace use) in MiB for testing.
+ * The default is -1 (Use value specified by TTM).
+ * This parameter is deprecated and will be removed in the future.
*/
-MODULE_PARM_DESC(gttsize, "Size of the GTT domain in megabytes (-1 = auto)");
+MODULE_PARM_DESC(gttsize, "Size of the GTT userspace domain in megabytes (-1 = auto)");
module_param_named(gttsize, amdgpu_gtt_size, int, 0600);
/**
@@ -210,24 +309,10 @@ MODULE_PARM_DESC(moverate, "Maximum buffer migration rate in MB/s. (32, 64, etc.
module_param_named(moverate, amdgpu_moverate, int, 0600);
/**
- * DOC: benchmark (int)
- * Run benchmarks. The default is 0 (Skip benchmarks).
- */
-MODULE_PARM_DESC(benchmark, "Run benchmark");
-module_param_named(benchmark, amdgpu_benchmarking, int, 0444);
-
-/**
- * DOC: test (int)
- * Test BO GTT->VRAM and VRAM->GTT GPU copies. The default is 0 (Skip test, only set 1 to run test).
- */
-MODULE_PARM_DESC(test, "Run tests");
-module_param_named(test, amdgpu_testing, int, 0444);
-
-/**
* DOC: audio (int)
* Set HDMI/DPAudio. Only affects non-DC display handling. The default is -1 (Enabled), set 0 to disabled it.
*/
-MODULE_PARM_DESC(audio, "Audio enable (-1 = auto, 0 = disable, 1 = enable)");
+MODULE_PARM_DESC(audio, "HDMI/DP Audio enable for non DC displays (-1 = auto, 0 = disable, 1 = enable)");
module_param_named(audio, amdgpu_audio, int, 0444);
/**
@@ -259,27 +344,26 @@ MODULE_PARM_DESC(msi, "MSI support (1 = enable, 0 = disable, -1 = auto)");
module_param_named(msi, amdgpu_msi, int, 0444);
/**
+ * DOC: svm_default_granularity (uint)
+ * Used in buffer migration and handling of recoverable page faults
+ */
+MODULE_PARM_DESC(svm_default_granularity, "SVM's default granularity in log(2^Pages), default 9 = 2^9 = 2 MiB");
+module_param_named(svm_default_granularity, amdgpu_svm_default_granularity, uint, 0644);
+
+/**
* DOC: lockup_timeout (string)
* Set GPU scheduler timeout value in ms.
*
- * The format can be [Non-Compute] or [GFX,Compute,SDMA,Video]. That is there can be one or
- * multiple values specified. 0 and negative values are invalidated. They will be adjusted
- * to the default timeout.
- *
- * - With one value specified, the setting will apply to all non-compute jobs.
- * - With multiple values specified, the first one will be for GFX.
- * The second one is for Compute. The third and fourth ones are
- * for SDMA and Video.
+ * The format can be [single value] for setting all timeouts at once or
+ * [GFX,Compute,SDMA,Video] to set individual timeouts.
+ * Negative values mean infinity.
*
- * By default(with no lockup_timeout settings), the timeout for all non-compute(GFX, SDMA and Video)
- * jobs is 10000. And there is no timeout enforced on compute jobs.
+ * By default(with no lockup_timeout settings), the timeout for all queues is 2000.
*/
-MODULE_PARM_DESC(lockup_timeout, "GPU lockup timeout in ms (default: for bare metal 10000 for non-compute jobs and infinity timeout for compute jobs; "
- "for passthrough or sriov, 10000 for all jobs."
- " 0: keep default value. negative: infinity timeout), "
- "format: for bare metal [Non-Compute] or [GFX,Compute,SDMA,Video]; "
- "for passthrough or sriov [all jobs] or [GFX,Compute,SDMA,Video].");
-module_param_string(lockup_timeout, amdgpu_lockup_timeout, sizeof(amdgpu_lockup_timeout), 0444);
+MODULE_PARM_DESC(lockup_timeout,
+ "GPU lockup timeout in ms (default: 2000. 0: keep default value. negative: infinity timeout), format: [single value for all] or [GFX,Compute,SDMA,Video].");
+module_param_string(lockup_timeout, amdgpu_lockup_timeout,
+ sizeof(amdgpu_lockup_timeout), 0444);
/**
* DOC: dpm (int)
@@ -292,9 +376,12 @@ module_param_named(dpm, amdgpu_dpm, int, 0444);
/**
* DOC: fw_load_type (int)
- * Set different firmware loading type for debugging (0 = direct, 1 = SMU, 2 = PSP). The default is -1 (auto).
+ * Set different firmware loading type for debugging, if supported.
+ * Set to 0 to force direct loading if supported by the ASIC. Set
+ * to -1 to select the default loading mode for the ASIC, as defined
+ * by the driver. The default is -1 (auto).
*/
-MODULE_PARM_DESC(fw_load_type, "firmware loading type (0 = direct, 1 = SMU, 2 = PSP, -1 = auto)");
+MODULE_PARM_DESC(fw_load_type, "firmware loading type (3 = rlc backdoor autoload if supported, 2 = smu load if supported, 1 = psp load, 0 = force direct if supported, -1 = auto)");
module_param_named(fw_load_type, amdgpu_fw_load_type, int, 0444);
/**
@@ -306,10 +393,12 @@ module_param_named(aspm, amdgpu_aspm, int, 0444);
/**
* DOC: runpm (int)
- * Override for runtime power management control for dGPUs in PX/HG laptops. The amdgpu driver can dynamically power down
- * the dGPU on PX/HG laptops when it is idle. The default is -1 (auto enable). Setting the value to 0 disables this functionality.
+ * Override for runtime power management control for dGPUs. The amdgpu driver can dynamically power down
+ * the dGPUs when they are idle if supported. The default is -1 (auto enable).
+ * Setting the value to 0 disables this functionality.
+ * Setting the value to -2 is auto enabled with power down when displays are attached.
*/
-MODULE_PARM_DESC(runpm, "PX runtime pm (2 = force enable with BAMACO, 1 = force enable with BACO, 0 = disable, -1 = PX only default)");
+MODULE_PARM_DESC(runpm, "PX runtime pm (2 = force enable with BAMACO, 1 = force enable with BACO, 0 = disable, -1 = auto, -2 = auto with displays)");
module_param_named(runpm, amdgpu_runtime_pm, int, 0444);
/**
@@ -320,7 +409,7 @@ module_param_named(runpm, amdgpu_runtime_pm, int, 0444);
* the kernel log for the list of IPs on the asic. The default is 0xffffffff (enable all blocks on a device).
*/
MODULE_PARM_DESC(ip_block_mask, "IP Block Mask (all blocks enabled (default))");
-module_param_named(ip_block_mask, amdgpu_ip_block_mask, uint, 0444);
+module_param_named_unsafe(ip_block_mask, amdgpu_ip_block_mask, uint, 0444);
/**
* DOC: bapm (int)
@@ -366,13 +455,6 @@ MODULE_PARM_DESC(vm_fault_stop, "Stop on VM fault (0 = never (default), 1 = prin
module_param_named(vm_fault_stop, amdgpu_vm_fault_stop, int, 0444);
/**
- * DOC: vm_debug (int)
- * Debug VM handling (0 = disabled, 1 = enabled). The default is 0 (Disabled).
- */
-MODULE_PARM_DESC(vm_debug, "Debug VM handling (0 = disabled (default), 1 = enabled)");
-module_param_named(vm_debug, amdgpu_vm_debug, int, 0644);
-
-/**
* DOC: vm_update_mode (int)
* Override VM update mode. VM updated by using CPU (0 = never, 1 = Graphics only, 2 = Compute only, 3 = Both). The default
* is -1 (Only in large BAR(LB) systems Compute VM tables will be updated by CPU, otherwise 0, never).
@@ -385,7 +467,7 @@ module_param_named(vm_update_mode, amdgpu_vm_update_mode, int, 0444);
* Enable experimental hw support (1 = enable). The default is 0 (disabled).
*/
MODULE_PARM_DESC(exp_hw_support, "experimental hw support (1 = enable, 0 = disable (default))");
-module_param_named(exp_hw_support, amdgpu_exp_hw_support, int, 0444);
+module_param_named_unsafe(exp_hw_support, amdgpu_exp_hw_support, int, 0444);
/**
* DOC: dc (int)
@@ -441,12 +523,12 @@ MODULE_PARM_DESC(pcie_lane_cap, "PCIE Lane Caps (0: autodetect (default))");
module_param_named(pcie_lane_cap, amdgpu_pcie_lane_cap, uint, 0444);
/**
- * DOC: cg_mask (uint)
+ * DOC: cg_mask (ullong)
* Override Clockgating features enabled on GPU (0 = disable clock gating). See the AMD_CG_SUPPORT flags in
- * drivers/gpu/drm/amd/include/amd_shared.h. The default is 0xffffffff (all enabled).
+ * drivers/gpu/drm/amd/include/amd_shared.h. The default is 0xffffffffffffffff (all enabled).
*/
MODULE_PARM_DESC(cg_mask, "Clockgating flags mask (0 = disable clock gating)");
-module_param_named(cg_mask, amdgpu_cg_mask, uint, 0444);
+module_param_named(cg_mask, amdgpu_cg_mask, ullong, 0444);
/**
* DOC: pg_mask (uint)
@@ -482,13 +564,6 @@ MODULE_PARM_DESC(virtual_display,
module_param_named(virtual_display, amdgpu_virtual_display, charp, 0444);
/**
- * DOC: job_hang_limit (int)
- * Set how much time allow a job hang and not drop it. The default is 0.
- */
-MODULE_PARM_DESC(job_hang_limit, "how much time allow a job hang and not drop it (default 0)");
-module_param_named(job_hang_limit, amdgpu_job_hang_limit, int ,0444);
-
-/**
* DOC: lbpw (int)
* Override Load Balancing Per Watt (LBPW) support (1 = enable, 0 = disable). The default is -1 (auto, enabled).
*/
@@ -503,14 +578,14 @@ module_param_named(compute_multipipe, amdgpu_compute_multipipe, int, 0444);
* Set to enable GPU recovery mechanism (1 = enable, 0 = disable). The default is -1 (auto, disabled except SRIOV).
*/
MODULE_PARM_DESC(gpu_recovery, "Enable GPU recovery mechanism, (1 = enable, 0 = disable, -1 = auto)");
-module_param_named(gpu_recovery, amdgpu_gpu_recovery, int, 0444);
+module_param_named_unsafe(gpu_recovery, amdgpu_gpu_recovery, int, 0444);
/**
* DOC: emu_mode (int)
* Set value 1 to enable emulation mode. This is only needed when running on an emulator. The default is 0 (disabled).
*/
MODULE_PARM_DESC(emu_mode, "Emulation mode, (1 = enable, 0 = disable)");
-module_param_named(emu_mode, amdgpu_emu_mode, int, 0444);
+module_param_named_unsafe(emu_mode, amdgpu_emu_mode, int, 0444);
/**
* DOC: ras_enable (int)
@@ -528,40 +603,54 @@ MODULE_PARM_DESC(ras_mask, "Mask of RAS features to enable (default 0xffffffff),
module_param_named(ras_mask, amdgpu_ras_mask, uint, 0444);
/**
- * DOC: si_support (int)
- * Set SI support driver. This parameter works after set config CONFIG_DRM_AMDGPU_SI. For SI asic, when radeon driver is enabled,
- * set value 0 to use radeon driver, while set value 1 to use amdgpu driver. The default is using radeon driver when it available,
- * otherwise using amdgpu driver.
+ * DOC: timeout_fatal_disable (bool)
+ * Disable Watchdog timeout fatal error event
*/
-#ifdef CONFIG_DRM_AMDGPU_SI
+MODULE_PARM_DESC(timeout_fatal_disable, "disable watchdog timeout fatal error (false = default)");
+module_param_named(timeout_fatal_disable, amdgpu_watchdog_timer.timeout_fatal_disable, bool, 0644);
-#if defined(CONFIG_DRM_RADEON) || defined(CONFIG_DRM_RADEON_MODULE)
-int amdgpu_si_support = 0;
-MODULE_PARM_DESC(si_support, "SI support (1 = enabled, 0 = disabled (default))");
-#else
-int amdgpu_si_support = 1;
-MODULE_PARM_DESC(si_support, "SI support (1 = enabled (default), 0 = disabled)");
-#endif
+/**
+ * DOC: timeout_period (uint)
+ * Modify the watchdog timeout max_cycles as (1 << period)
+ */
+MODULE_PARM_DESC(timeout_period, "watchdog timeout period (0 = timeout disabled, 1 ~ 0x23 = timeout maxcycles = (1 << period)");
+module_param_named(timeout_period, amdgpu_watchdog_timer.period, uint, 0644);
+/**
+ * DOC: si_support (int)
+ * 1 = enabled, 0 = disabled, -1 = default
+ *
+ * SI (Southern Islands) are first generation GCN GPUs, supported by both
+ * drivers: radeon (old) and amdgpu (new). This parameter controls whether
+ * amdgpu should support SI.
+ * By default, SI dedicated GPUs are supported by amdgpu.
+ * Only relevant when CONFIG_DRM_AMDGPU_SI is enabled to build SI support in amdgpu.
+ * See also radeon.si_support which should be disabled when amdgpu.si_support is
+ * enabled, and vice versa.
+ */
+int amdgpu_si_support = -1;
+#ifdef CONFIG_DRM_AMDGPU_SI
+MODULE_PARM_DESC(si_support, "SI support (1 = enabled, 0 = disabled, -1 = default)");
module_param_named(si_support, amdgpu_si_support, int, 0444);
#endif
/**
* DOC: cik_support (int)
- * Set CIK support driver. This parameter works after set config CONFIG_DRM_AMDGPU_CIK. For CIK asic, when radeon driver is enabled,
- * set value 0 to use radeon driver, while set value 1 to use amdgpu driver. The default is using radeon driver when it available,
- * otherwise using amdgpu driver.
- */
+ * 1 = enabled, 0 = disabled, -1 = default
+ *
+ * CIK (Sea Islands) are second generation GCN GPUs, supported by both
+ * drivers: radeon (old) and amdgpu (new). This parameter controls whether
+ * amdgpu should support CIK.
+ * By default:
+ * - CIK dedicated GPUs are supported by amdgpu.
+ * - CIK APUs are supported by radeon (except when radeon is not built).
+ * Only relevant when CONFIG_DRM_AMDGPU_CIK is enabled to build CIK support in amdgpu.
+ * See also radeon.cik_support which should be disabled when amdgpu.cik_support is
+ * enabled, and vice versa.
+ */
+int amdgpu_cik_support = -1;
#ifdef CONFIG_DRM_AMDGPU_CIK
-
-#if defined(CONFIG_DRM_RADEON) || defined(CONFIG_DRM_RADEON_MODULE)
-int amdgpu_cik_support = 0;
-MODULE_PARM_DESC(cik_support, "CIK support (1 = enabled, 0 = disabled (default))");
-#else
-int amdgpu_cik_support = 1;
-MODULE_PARM_DESC(cik_support, "CIK support (1 = enabled (default), 0 = disabled)");
-#endif
-
+MODULE_PARM_DESC(cik_support, "CIK support (1 = enabled, 0 = disabled, -1 = default)");
module_param_named(cik_support, amdgpu_cik_support, int, 0444);
#endif
@@ -571,8 +660,7 @@ module_param_named(cik_support, amdgpu_cik_support, int, 0444);
* E.g. 0x1 = 256Mbyte, 0x2 = 512Mbyte, 0x4 = 1 Gbyte, 0x8 = 2GByte. The default is 0 (disabled).
*/
MODULE_PARM_DESC(smu_memory_pool_size,
- "reserve gtt for smu debug usage, 0 = disable,"
- "0x1 = 256Mbyte, 0x2 = 512Mbyte, 0x4 = 1 Gbyte, 0x8 = 2GByte");
+ "reserve gtt for smu debug usage, 0 = disable,0x1 = 256Mbyte, 0x2 = 512Mbyte, 0x4 = 1 Gbyte, 0x8 = 2GByte");
module_param_named(smu_memory_pool_size, amdgpu_smu_memory_pool_size, uint, 0444);
/**
@@ -585,16 +673,16 @@ module_param_named(async_gfx_ring, amdgpu_async_gfx_ring, int, 0444);
/**
* DOC: mcbp (int)
- * It is used to enable mid command buffer preemption. (0 = disabled (default), 1 = enabled)
+ * It is used to enable mid command buffer preemption. (0 = disabled, 1 = enabled, -1 auto (default))
*/
MODULE_PARM_DESC(mcbp,
- "Enable Mid-command buffer preemption (0 = disabled (default), 1 = enabled)");
+ "Enable Mid-command buffer preemption (0 = disabled, 1 = enabled), -1 = auto (default)");
module_param_named(mcbp, amdgpu_mcbp, int, 0444);
/**
* DOC: discovery (int)
* Allow driver to discover hardware IP information from IP Discovery table at the top of VRAM.
- * (-1 = auto (default), 0 = disabled, 1 = enabled)
+ * (-1 = auto (default), 0 = disabled, 1 = enabled, 2 = use ip_discovery table from file)
*/
MODULE_PARM_DESC(discovery,
"Allow driver to discover hardware IPs from IP Discovery table at the top of VRAM");
@@ -610,8 +698,36 @@ MODULE_PARM_DESC(mes,
module_param_named(mes, amdgpu_mes, int, 0444);
/**
+ * DOC: mes_log_enable (int)
+ * Enable Micro Engine Scheduler log. This is used to enable/disable MES internal log.
+ * (0 = disabled (default), 1 = enabled)
+ */
+MODULE_PARM_DESC(mes_log_enable,
+ "Enable Micro Engine Scheduler log (0 = disabled (default), 1 = enabled)");
+module_param_named(mes_log_enable, amdgpu_mes_log_enable, int, 0444);
+
+/**
+ * DOC: mes_kiq (int)
+ * Enable Micro Engine Scheduler KIQ. This is a new engine pipe for kiq.
+ * (0 = disabled (default), 1 = enabled)
+ */
+MODULE_PARM_DESC(mes_kiq,
+ "Enable Micro Engine Scheduler KIQ (0 = disabled (default), 1 = enabled)");
+module_param_named(mes_kiq, amdgpu_mes_kiq, int, 0444);
+
+/**
+ * DOC: uni_mes (int)
+ * Enable Unified Micro Engine Scheduler. This is a new engine pipe for unified scheduler.
+ * (0 = disabled (default), 1 = enabled)
+ */
+MODULE_PARM_DESC(uni_mes,
+ "Enable Unified Micro Engine Scheduler (0 = disabled, 1 = enabled(default)");
+module_param_named(uni_mes, amdgpu_uni_mes, int, 0444);
+
+/**
* DOC: noretry (int)
- * Disable retry faults in the GPU memory controller.
+ * Disable XNACK retry in the SQ by default on GFXv9 hardware. On ASICs that
+ * do not support per-process XNACK this also disables retry page faults.
* (0 = retry enabled, 1 = retry disabled, -1 auto (default))
*/
MODULE_PARM_DESC(noretry,
@@ -624,8 +740,15 @@ module_param_named(noretry, amdgpu_noretry, int, 0644);
*/
MODULE_PARM_DESC(force_asic_type,
"A non negative value used to specify the asic type for all supported GPUs");
-module_param_named(force_asic_type, amdgpu_force_asic_type, int, 0444);
+module_param_named_unsafe(force_asic_type, amdgpu_force_asic_type, int, 0444);
+/**
+ * DOC: use_xgmi_p2p (int)
+ * Enables/disables XGMI P2P interface (0 = disable, 1 = enable).
+ */
+MODULE_PARM_DESC(use_xgmi_p2p,
+ "Enable XGMI P2P interface (0 = disable; 1 = enable (default))");
+module_param_named(use_xgmi_p2p, amdgpu_use_xgmi_p2p, int, 0444);
#ifdef CONFIG_HSA_AMD
@@ -636,7 +759,7 @@ module_param_named(force_asic_type, amdgpu_force_asic_type, int, 0444);
* assigns queues to HQDs.
*/
int sched_policy = KFD_SCHED_POLICY_HWS;
-module_param(sched_policy, int, 0444);
+module_param_unsafe(sched_policy, int, 0444);
MODULE_PARM_DESC(sched_policy,
"Scheduling policy (0 = HWS (Default), 1 = HWS without over-subscription, 2 = Non-HWS (Used for debugging only)");
@@ -645,7 +768,7 @@ MODULE_PARM_DESC(sched_policy,
* Maximum number of processes that HWS can schedule concurrently. The maximum is the
* number of VMIDs assigned to the HWS, which is also the default.
*/
-int hws_max_conc_proc = 8;
+int hws_max_conc_proc = -1;
module_param(hws_max_conc_proc, int, 0444);
MODULE_PARM_DESC(hws_max_conc_proc,
"Max # processes HWS can execute concurrently when sched_policy=0 (0 = no concurrency, #VMIDs for KFD = Maximum(default))");
@@ -681,38 +804,12 @@ MODULE_PARM_DESC(send_sigterm,
"Send sigterm to HSA process on unhandled exception (0 = disable, 1 = enable)");
/**
- * DOC: debug_largebar (int)
- * Set debug_largebar as 1 to enable simulating large-bar capability on non-large bar
- * system. This limits the VRAM size reported to ROCm applications to the visible
- * size, usually 256MB.
- * Default value is 0, diabled.
- */
-int debug_largebar;
-module_param(debug_largebar, int, 0444);
-MODULE_PARM_DESC(debug_largebar,
- "Debug large-bar flag used to simulate large-bar capability on non-large bar machine (0 = disable, 1 = enable)");
-
-/**
- * DOC: ignore_crat (int)
- * Ignore CRAT table during KFD initialization. By default, KFD uses the ACPI CRAT
- * table to get information about AMD APUs. This option can serve as a workaround on
- * systems with a broken CRAT table.
- *
- * Default is auto (according to asic type, iommu_v2, and crat table, to decide
- * whehter use CRAT)
- */
-int ignore_crat;
-module_param(ignore_crat, int, 0444);
-MODULE_PARM_DESC(ignore_crat,
- "Ignore CRAT table during KFD initialization (0 = auto (default), 1 = ignore CRAT)");
-
-/**
* DOC: halt_if_hws_hang (int)
* Halt if HWS hang is detected. Default value, 0, disables the halt on hang.
* Setting 1 enables halt on hang.
*/
int halt_if_hws_hang;
-module_param(halt_if_hws_hang, int, 0644);
+module_param_unsafe(halt_if_hws_hang, int, 0644);
MODULE_PARM_DESC(halt_if_hws_hang, "Halt if HWS hang is detected (0 = off (default), 1 = on)");
/**
@@ -721,13 +818,13 @@ MODULE_PARM_DESC(halt_if_hws_hang, "Halt if HWS hang is detected (0 = off (defau
* check says. Default value: false (rely on MEC2 firmware version check).
*/
bool hws_gws_support;
-module_param(hws_gws_support, bool, 0444);
+module_param_unsafe(hws_gws_support, bool, 0444);
MODULE_PARM_DESC(hws_gws_support, "Assume MEC2 FW supports GWS barriers (false = rely on FW version check (Default), true = force supported)");
/**
- * DOC: queue_preemption_timeout_ms (int)
- * queue preemption timeout in ms (1 = Minimum, 9000 = default)
- */
+ * DOC: queue_preemption_timeout_ms (int)
+ * queue preemption timeout in ms (1 = Minimum, 9000 = default)
+ */
int queue_preemption_timeout_ms = 9000;
module_param(queue_preemption_timeout_ms, int, 0644);
MODULE_PARM_DESC(queue_preemption_timeout_ms, "queue preemption timeout in ms (1 = Minimum, 9000 = default)");
@@ -748,6 +845,30 @@ bool no_system_mem_limit;
module_param(no_system_mem_limit, bool, 0644);
MODULE_PARM_DESC(no_system_mem_limit, "disable system memory limit (false = default)");
+/**
+ * DOC: no_queue_eviction_on_vm_fault (int)
+ * If set, process queues will not be evicted on gpuvm fault. This is to keep the wavefront context for debugging (0 = queue eviction, 1 = no queue eviction). The default is 0 (queue eviction).
+ */
+int amdgpu_no_queue_eviction_on_vm_fault;
+MODULE_PARM_DESC(no_queue_eviction_on_vm_fault, "No queue eviction on VM fault (0 = queue eviction, 1 = no queue eviction)");
+module_param_named_unsafe(no_queue_eviction_on_vm_fault, amdgpu_no_queue_eviction_on_vm_fault, int, 0444);
+#endif
+
+/**
+ * DOC: mtype_local (int)
+ */
+int amdgpu_mtype_local;
+MODULE_PARM_DESC(mtype_local, "MTYPE for local memory (0 = MTYPE_RW (default), 1 = MTYPE_NC, 2 = MTYPE_CC)");
+module_param_named_unsafe(mtype_local, amdgpu_mtype_local, int, 0444);
+
+/**
+ * DOC: pcie_p2p (bool)
+ * Enable PCIe P2P (requires large-BAR). Default value: true (on)
+ */
+#ifdef CONFIG_HSA_AMD_P2P
+bool pcie_p2p = true;
+module_param(pcie_p2p, bool, 0444);
+MODULE_PARM_DESC(pcie_p2p, "Enable PCIe P2P (requires large-BAR). (N = off, Y = on(default))");
#endif
/**
@@ -760,11 +881,14 @@ module_param_named(dcfeaturemask, amdgpu_dc_feature_mask, uint, 0444);
/**
* DOC: dcdebugmask (uint)
- * Override display features enabled. See enum DC_DEBUG_MASK in drivers/gpu/drm/amd/include/amd_shared.h.
+ * Display debug options. See enum DC_DEBUG_MASK in drivers/gpu/drm/amd/include/amd_shared.h.
*/
MODULE_PARM_DESC(dcdebugmask, "all debug options disabled (default))");
module_param_named(dcdebugmask, amdgpu_dc_debug_mask, uint, 0444);
+MODULE_PARM_DESC(visualconfirm, "Visual confirm (0 = off (default), 1 = MPO, 5 = PSR)");
+module_param_named(visualconfirm, amdgpu_dc_visual_confirm, uint, 0444);
+
/**
* DOC: abmlevel (uint)
* Override the default ABM (Adaptive Backlight Management) level used for DC
@@ -774,12 +898,29 @@ module_param_named(dcdebugmask, amdgpu_dc_debug_mask, uint, 0444);
* the ABM algorithm, with 1 being the least reduction and 4 being the most
* reduction.
*
- * Defaults to 0, or disabled. Userspace can still override this level later
- * after boot.
+ * Defaults to -1, or auto. Userspace can only override this level after
+ * boot if it's set to auto.
+ */
+int amdgpu_dm_abm_level = -1;
+MODULE_PARM_DESC(abmlevel,
+ "ABM level (0 = off, 1-4 = backlight reduction level, -1 auto (default))");
+module_param_named(abmlevel, amdgpu_dm_abm_level, int, 0444);
+
+int amdgpu_backlight = -1;
+MODULE_PARM_DESC(backlight, "Backlight control (0 = pwm, 1 = aux, -1 auto (default))");
+module_param_named(backlight, amdgpu_backlight, bint, 0444);
+
+/**
+ * DOC: damageclips (int)
+ * Enable or disable damage clips support. If damage clips support is disabled,
+ * we will force full frame updates, irrespective of what user space sends to
+ * us.
+ *
+ * Defaults to -1 (where it is enabled unless a PSR-SU display is detected).
*/
-uint amdgpu_dm_abm_level;
-MODULE_PARM_DESC(abmlevel, "ABM level (0 = off (default), 1-4 = backlight reduction level) ");
-module_param_named(abmlevel, amdgpu_dm_abm_level, uint, 0444);
+MODULE_PARM_DESC(damageclips,
+ "Damage clips support (0 = disable, 1 = enable, -1 auto (default))");
+module_param_named(damageclips, amdgpu_damage_clips, int, 0444);
/**
* DOC: tmz (int)
@@ -788,31 +929,912 @@ module_param_named(abmlevel, amdgpu_dm_abm_level, uint, 0444);
*
* The default value: 0 (off). TODO: change to auto till it is completed.
*/
-MODULE_PARM_DESC(tmz, "Enable TMZ feature (-1 = auto, 0 = off (default), 1 = on)");
+MODULE_PARM_DESC(tmz, "Enable TMZ feature (-1 = auto (default), 0 = off, 1 = on)");
module_param_named(tmz, amdgpu_tmz, int, 0444);
/**
+ * DOC: freesync_video (uint)
+ * Enable the optimization to adjust front porch timing to achieve seamless
+ * mode change experience when setting a freesync supported mode for which full
+ * modeset is not needed.
+ *
+ * The Display Core will add a set of modes derived from the base FreeSync
+ * video mode into the corresponding connector's mode list based on commonly
+ * used refresh rates and VRR range of the connected display, when users enable
+ * this feature. From the userspace perspective, they can see a seamless mode
+ * change experience when the change between different refresh rates under the
+ * same resolution. Additionally, userspace applications such as Video playback
+ * can read this modeset list and change the refresh rate based on the video
+ * frame rate. Finally, the userspace can also derive an appropriate mode for a
+ * particular refresh rate based on the FreeSync Mode and add it to the
+ * connector's mode list.
+ *
+ * Note: This is an experimental feature.
+ *
+ * The default value: 0 (off).
+ */
+MODULE_PARM_DESC(
+ freesync_video,
+ "Adds additional modes via VRR for refresh changes without a full modeset (0 = off (default), 1 = on)");
+module_param_named(freesync_video, amdgpu_freesync_vid_mode, uint, 0444);
+
+/**
* DOC: reset_method (int)
- * GPU reset method (-1 = auto (default), 0 = legacy, 1 = mode0, 2 = mode1, 3 = mode2, 4 = baco, 5 = pci)
+ * GPU reset method (-1 = auto (default), 0 = legacy, 1 = mode0, 2 = mode1, 3 = mode2, 4 = baco)
*/
-MODULE_PARM_DESC(reset_method, "GPU reset method (-1 = auto (default), 0 = legacy, 1 = mode0, 2 = mode1, 3 = mode2, 4 = baco/bamaco, 5 = pci)");
-module_param_named(reset_method, amdgpu_reset_method, int, 0444);
+MODULE_PARM_DESC(reset_method, "GPU reset method (-1 = auto (default), 0 = legacy, 1 = mode0, 2 = mode1, 3 = mode2, 4 = baco/bamaco)");
+module_param_named_unsafe(reset_method, amdgpu_reset_method, int, 0644);
/**
- * DOC: bad_page_threshold (int)
- * Bad page threshold is to specify the threshold value of faulty pages
- * detected by RAS ECC, that may result in GPU entering bad status if total
- * faulty pages by ECC exceed threshold value and leave it for user's further
- * check.
+ * DOC: bad_page_threshold (int) Bad page threshold is specifies the
+ * threshold value of faulty pages detected by RAS ECC, which may
+ * result in the GPU entering bad status when the number of total
+ * faulty pages by ECC exceeds the threshold value.
*/
-MODULE_PARM_DESC(bad_page_threshold, "Bad page threshold(-1 = auto(default value), 0 = disable bad page retirement)");
+MODULE_PARM_DESC(bad_page_threshold, "Bad page threshold(-1 = ignore threshold (default value), 0 = disable bad page retirement, -2 = threshold determined by a formula, 0 < threshold < max records, user-defined threshold)");
module_param_named(bad_page_threshold, amdgpu_bad_page_threshold, int, 0444);
MODULE_PARM_DESC(num_kcq, "number of kernel compute queue user want to setup (8 if set to greater than 8 or less than 0, only affect gfx 8+)");
module_param_named(num_kcq, amdgpu_num_kcq, int, 0444);
+/**
+ * DOC: vcnfw_log (int)
+ * Enable vcnfw log output for debugging, the default is disabled.
+ */
+MODULE_PARM_DESC(vcnfw_log, "Enable vcnfw log(0 = disable (default value), 1 = enable)");
+module_param_named(vcnfw_log, amdgpu_vcnfw_log, int, 0444);
+
+/**
+ * DOC: sg_display (int)
+ * Disable S/G (scatter/gather) display (i.e., display from system memory).
+ * This option is only relevant on APUs. Set this option to 0 to disable
+ * S/G display if you experience flickering or other issues under memory
+ * pressure and report the issue.
+ */
+MODULE_PARM_DESC(sg_display, "S/G Display (-1 = auto (default), 0 = disable)");
+module_param_named(sg_display, amdgpu_sg_display, int, 0444);
+
+/**
+ * DOC: umsch_mm (int)
+ * Enable Multi Media User Mode Scheduler. This is a HW scheduling engine for VCN and VPE.
+ * (0 = disabled (default), 1 = enabled)
+ */
+MODULE_PARM_DESC(umsch_mm,
+ "Enable Multi Media User Mode Scheduler (0 = disabled (default), 1 = enabled)");
+module_param_named(umsch_mm, amdgpu_umsch_mm, int, 0444);
+
+/**
+ * DOC: umsch_mm_fwlog (int)
+ * Enable umschfw log output for debugging, the default is disabled.
+ */
+MODULE_PARM_DESC(umsch_mm_fwlog, "Enable umschfw log(0 = disable (default value), 1 = enable)");
+module_param_named(umsch_mm_fwlog, amdgpu_umsch_mm_fwlog, int, 0444);
+
+/**
+ * DOC: smu_pptable_id (int)
+ * Used to override pptable id. id = 0 use VBIOS pptable.
+ * id > 0 use the soft pptable with specicfied id.
+ */
+MODULE_PARM_DESC(smu_pptable_id,
+ "specify pptable id to be used (-1 = auto(default) value, 0 = use pptable from vbios, > 0 = soft pptable id)");
+module_param_named(smu_pptable_id, amdgpu_smu_pptable_id, int, 0444);
+
+/**
+ * DOC: partition_mode (int)
+ * Used to override the default SPX mode.
+ */
+MODULE_PARM_DESC(
+ user_partt_mode,
+ "specify partition mode to be used (-2 = AMDGPU_AUTO_COMPUTE_PARTITION_MODE(default value) \
+ 0 = AMDGPU_SPX_PARTITION_MODE, \
+ 1 = AMDGPU_DPX_PARTITION_MODE, \
+ 2 = AMDGPU_TPX_PARTITION_MODE, \
+ 3 = AMDGPU_QPX_PARTITION_MODE, \
+ 4 = AMDGPU_CPX_PARTITION_MODE)");
+module_param_named(user_partt_mode, amdgpu_user_partt_mode, uint, 0444);
+
+
+/**
+ * DOC: enforce_isolation (int)
+ * enforce process isolation between graphics and compute.
+ * (-1 = auto, 0 = disable, 1 = enable, 2 = enable legacy mode, 3 = enable without cleaner shader)
+ */
+module_param_named(enforce_isolation, amdgpu_enforce_isolation, int, 0444);
+MODULE_PARM_DESC(enforce_isolation,
+"enforce process isolation between graphics and compute. (-1 = auto, 0 = disable, 1 = enable, 2 = enable legacy mode, 3 = enable without cleaner shader)");
+
+/**
+ * DOC: modeset (int)
+ * Override nomodeset (1 = override, -1 = auto). The default is -1 (auto).
+ */
+MODULE_PARM_DESC(modeset, "Override nomodeset (1 = enable, -1 = auto)");
+module_param_named(modeset, amdgpu_modeset, int, 0444);
+
+/**
+ * DOC: seamless (int)
+ * Seamless boot will keep the image on the screen during the boot process.
+ */
+MODULE_PARM_DESC(seamless, "Seamless boot (-1 = auto (default), 0 = disable, 1 = enable)");
+module_param_named(seamless, amdgpu_seamless, int, 0444);
+
+/**
+ * DOC: debug_mask (uint)
+ * Debug options for amdgpu, work as a binary mask with the following options:
+ *
+ * - 0x1: Debug VM handling
+ * - 0x2: Enable simulating large-bar capability on non-large bar system. This
+ * limits the VRAM size reported to ROCm applications to the visible
+ * size, usually 256MB.
+ * - 0x4: Disable GPU soft recovery, always do a full reset
+ * - 0x8: Use VRAM for firmware loading
+ * - 0x10: Enable ACA based RAS logging
+ * - 0x20: Enable experimental resets
+ * - 0x40: Disable ring resets
+ * - 0x80: Use VRAM for SMU pool
+ */
+MODULE_PARM_DESC(debug_mask, "debug options for amdgpu, disabled by default");
+module_param_named_unsafe(debug_mask, amdgpu_debug_mask, uint, 0444);
+
+/**
+ * DOC: agp (int)
+ * Enable the AGP aperture. This provides an aperture in the GPU's internal
+ * address space for direct access to system memory. Note that these accesses
+ * are non-snooped, so they are only used for access to uncached memory.
+ */
+MODULE_PARM_DESC(agp, "AGP (-1 = auto (default), 0 = disable, 1 = enable)");
+module_param_named(agp, amdgpu_agp, int, 0444);
+
+/**
+ * DOC: wbrf (int)
+ * Enable Wifi RFI interference mitigation feature.
+ * Due to electrical and mechanical constraints there may be likely interference of
+ * relatively high-powered harmonics of the (G-)DDR memory clocks with local radio
+ * module frequency bands used by Wifi 6/6e/7. To mitigate the possible RFI interference,
+ * with this feature enabled, PMFW will use either “shadowed P-State” or “P-State” based
+ * on active list of frequencies in-use (to be avoided) as part of initial setting or
+ * P-state transition. However, there may be potential performance impact with this
+ * feature enabled.
+ * (0 = disabled, 1 = enabled, -1 = auto (default setting, will be enabled if supported))
+ */
+MODULE_PARM_DESC(wbrf,
+ "Enable Wifi RFI interference mitigation (0 = disabled, 1 = enabled, -1 = auto(default)");
+module_param_named(wbrf, amdgpu_wbrf, int, 0444);
+
+/**
+ * DOC: rebar (int)
+ * Allow BAR resizing. Disable this to prevent the driver from attempting
+ * to resize the BAR if the GPU supports it and there is available MMIO space.
+ * Note that this just prevents the driver from resizing the BAR. The BIOS
+ * may have already resized the BAR at boot time.
+ */
+MODULE_PARM_DESC(rebar, "Resizable BAR (-1 = auto (default), 0 = disable, 1 = enable)");
+module_param_named(rebar, amdgpu_rebar, int, 0444);
+
+/**
+ * DOC: user_queue (int)
+ * Enable user queues on systems that support user queues. Possible values:
+ *
+ * - -1 = auto (ASIC specific default)
+ * - 0 = user queues disabled
+ * - 1 = user queues enabled and kernel queues enabled (if supported)
+ * - 2 = user queues enabled and kernel queues disabled
+ */
+MODULE_PARM_DESC(user_queue, "Enable user queues (-1 = auto (default), 0 = disable, 1 = enable, 2 = enable UQs and disable KQs)");
+module_param_named(user_queue, amdgpu_user_queue, int, 0444);
+
+/* These devices are not supported by amdgpu.
+ * They are supported by the mach64, r128, radeon drivers
+ */
+static const u16 amdgpu_unsupported_pciidlist[] = {
+ /* mach64 */
+ 0x4354,
+ 0x4358,
+ 0x4554,
+ 0x4742,
+ 0x4744,
+ 0x4749,
+ 0x474C,
+ 0x474D,
+ 0x474E,
+ 0x474F,
+ 0x4750,
+ 0x4751,
+ 0x4752,
+ 0x4753,
+ 0x4754,
+ 0x4755,
+ 0x4756,
+ 0x4757,
+ 0x4758,
+ 0x4759,
+ 0x475A,
+ 0x4C42,
+ 0x4C44,
+ 0x4C47,
+ 0x4C49,
+ 0x4C4D,
+ 0x4C4E,
+ 0x4C50,
+ 0x4C51,
+ 0x4C52,
+ 0x4C53,
+ 0x5654,
+ 0x5655,
+ 0x5656,
+ /* r128 */
+ 0x4c45,
+ 0x4c46,
+ 0x4d46,
+ 0x4d4c,
+ 0x5041,
+ 0x5042,
+ 0x5043,
+ 0x5044,
+ 0x5045,
+ 0x5046,
+ 0x5047,
+ 0x5048,
+ 0x5049,
+ 0x504A,
+ 0x504B,
+ 0x504C,
+ 0x504D,
+ 0x504E,
+ 0x504F,
+ 0x5050,
+ 0x5051,
+ 0x5052,
+ 0x5053,
+ 0x5054,
+ 0x5055,
+ 0x5056,
+ 0x5057,
+ 0x5058,
+ 0x5245,
+ 0x5246,
+ 0x5247,
+ 0x524b,
+ 0x524c,
+ 0x534d,
+ 0x5446,
+ 0x544C,
+ 0x5452,
+ /* radeon */
+ 0x3150,
+ 0x3151,
+ 0x3152,
+ 0x3154,
+ 0x3155,
+ 0x3E50,
+ 0x3E54,
+ 0x4136,
+ 0x4137,
+ 0x4144,
+ 0x4145,
+ 0x4146,
+ 0x4147,
+ 0x4148,
+ 0x4149,
+ 0x414A,
+ 0x414B,
+ 0x4150,
+ 0x4151,
+ 0x4152,
+ 0x4153,
+ 0x4154,
+ 0x4155,
+ 0x4156,
+ 0x4237,
+ 0x4242,
+ 0x4336,
+ 0x4337,
+ 0x4437,
+ 0x4966,
+ 0x4967,
+ 0x4A48,
+ 0x4A49,
+ 0x4A4A,
+ 0x4A4B,
+ 0x4A4C,
+ 0x4A4D,
+ 0x4A4E,
+ 0x4A4F,
+ 0x4A50,
+ 0x4A54,
+ 0x4B48,
+ 0x4B49,
+ 0x4B4A,
+ 0x4B4B,
+ 0x4B4C,
+ 0x4C57,
+ 0x4C58,
+ 0x4C59,
+ 0x4C5A,
+ 0x4C64,
+ 0x4C66,
+ 0x4C67,
+ 0x4E44,
+ 0x4E45,
+ 0x4E46,
+ 0x4E47,
+ 0x4E48,
+ 0x4E49,
+ 0x4E4A,
+ 0x4E4B,
+ 0x4E50,
+ 0x4E51,
+ 0x4E52,
+ 0x4E53,
+ 0x4E54,
+ 0x4E56,
+ 0x5144,
+ 0x5145,
+ 0x5146,
+ 0x5147,
+ 0x5148,
+ 0x514C,
+ 0x514D,
+ 0x5157,
+ 0x5158,
+ 0x5159,
+ 0x515A,
+ 0x515E,
+ 0x5460,
+ 0x5462,
+ 0x5464,
+ 0x5548,
+ 0x5549,
+ 0x554A,
+ 0x554B,
+ 0x554C,
+ 0x554D,
+ 0x554E,
+ 0x554F,
+ 0x5550,
+ 0x5551,
+ 0x5552,
+ 0x5554,
+ 0x564A,
+ 0x564B,
+ 0x564F,
+ 0x5652,
+ 0x5653,
+ 0x5657,
+ 0x5834,
+ 0x5835,
+ 0x5954,
+ 0x5955,
+ 0x5974,
+ 0x5975,
+ 0x5960,
+ 0x5961,
+ 0x5962,
+ 0x5964,
+ 0x5965,
+ 0x5969,
+ 0x5a41,
+ 0x5a42,
+ 0x5a61,
+ 0x5a62,
+ 0x5b60,
+ 0x5b62,
+ 0x5b63,
+ 0x5b64,
+ 0x5b65,
+ 0x5c61,
+ 0x5c63,
+ 0x5d48,
+ 0x5d49,
+ 0x5d4a,
+ 0x5d4c,
+ 0x5d4d,
+ 0x5d4e,
+ 0x5d4f,
+ 0x5d50,
+ 0x5d52,
+ 0x5d57,
+ 0x5e48,
+ 0x5e4a,
+ 0x5e4b,
+ 0x5e4c,
+ 0x5e4d,
+ 0x5e4f,
+ 0x6700,
+ 0x6701,
+ 0x6702,
+ 0x6703,
+ 0x6704,
+ 0x6705,
+ 0x6706,
+ 0x6707,
+ 0x6708,
+ 0x6709,
+ 0x6718,
+ 0x6719,
+ 0x671c,
+ 0x671d,
+ 0x671f,
+ 0x6720,
+ 0x6721,
+ 0x6722,
+ 0x6723,
+ 0x6724,
+ 0x6725,
+ 0x6726,
+ 0x6727,
+ 0x6728,
+ 0x6729,
+ 0x6738,
+ 0x6739,
+ 0x673e,
+ 0x6740,
+ 0x6741,
+ 0x6742,
+ 0x6743,
+ 0x6744,
+ 0x6745,
+ 0x6746,
+ 0x6747,
+ 0x6748,
+ 0x6749,
+ 0x674A,
+ 0x6750,
+ 0x6751,
+ 0x6758,
+ 0x6759,
+ 0x675B,
+ 0x675D,
+ 0x675F,
+ 0x6760,
+ 0x6761,
+ 0x6762,
+ 0x6763,
+ 0x6764,
+ 0x6765,
+ 0x6766,
+ 0x6767,
+ 0x6768,
+ 0x6770,
+ 0x6771,
+ 0x6772,
+ 0x6778,
+ 0x6779,
+ 0x677B,
+ 0x6840,
+ 0x6841,
+ 0x6842,
+ 0x6843,
+ 0x6849,
+ 0x684C,
+ 0x6850,
+ 0x6858,
+ 0x6859,
+ 0x6880,
+ 0x6888,
+ 0x6889,
+ 0x688A,
+ 0x688C,
+ 0x688D,
+ 0x6898,
+ 0x6899,
+ 0x689b,
+ 0x689c,
+ 0x689d,
+ 0x689e,
+ 0x68a0,
+ 0x68a1,
+ 0x68a8,
+ 0x68a9,
+ 0x68b0,
+ 0x68b8,
+ 0x68b9,
+ 0x68ba,
+ 0x68be,
+ 0x68bf,
+ 0x68c0,
+ 0x68c1,
+ 0x68c7,
+ 0x68c8,
+ 0x68c9,
+ 0x68d8,
+ 0x68d9,
+ 0x68da,
+ 0x68de,
+ 0x68e0,
+ 0x68e1,
+ 0x68e4,
+ 0x68e5,
+ 0x68e8,
+ 0x68e9,
+ 0x68f1,
+ 0x68f2,
+ 0x68f8,
+ 0x68f9,
+ 0x68fa,
+ 0x68fe,
+ 0x7100,
+ 0x7101,
+ 0x7102,
+ 0x7103,
+ 0x7104,
+ 0x7105,
+ 0x7106,
+ 0x7108,
+ 0x7109,
+ 0x710A,
+ 0x710B,
+ 0x710C,
+ 0x710E,
+ 0x710F,
+ 0x7140,
+ 0x7141,
+ 0x7142,
+ 0x7143,
+ 0x7144,
+ 0x7145,
+ 0x7146,
+ 0x7147,
+ 0x7149,
+ 0x714A,
+ 0x714B,
+ 0x714C,
+ 0x714D,
+ 0x714E,
+ 0x714F,
+ 0x7151,
+ 0x7152,
+ 0x7153,
+ 0x715E,
+ 0x715F,
+ 0x7180,
+ 0x7181,
+ 0x7183,
+ 0x7186,
+ 0x7187,
+ 0x7188,
+ 0x718A,
+ 0x718B,
+ 0x718C,
+ 0x718D,
+ 0x718F,
+ 0x7193,
+ 0x7196,
+ 0x719B,
+ 0x719F,
+ 0x71C0,
+ 0x71C1,
+ 0x71C2,
+ 0x71C3,
+ 0x71C4,
+ 0x71C5,
+ 0x71C6,
+ 0x71C7,
+ 0x71CD,
+ 0x71CE,
+ 0x71D2,
+ 0x71D4,
+ 0x71D5,
+ 0x71D6,
+ 0x71DA,
+ 0x71DE,
+ 0x7200,
+ 0x7210,
+ 0x7211,
+ 0x7240,
+ 0x7243,
+ 0x7244,
+ 0x7245,
+ 0x7246,
+ 0x7247,
+ 0x7248,
+ 0x7249,
+ 0x724A,
+ 0x724B,
+ 0x724C,
+ 0x724D,
+ 0x724E,
+ 0x724F,
+ 0x7280,
+ 0x7281,
+ 0x7283,
+ 0x7284,
+ 0x7287,
+ 0x7288,
+ 0x7289,
+ 0x728B,
+ 0x728C,
+ 0x7290,
+ 0x7291,
+ 0x7293,
+ 0x7297,
+ 0x7834,
+ 0x7835,
+ 0x791e,
+ 0x791f,
+ 0x793f,
+ 0x7941,
+ 0x7942,
+ 0x796c,
+ 0x796d,
+ 0x796e,
+ 0x796f,
+ 0x9400,
+ 0x9401,
+ 0x9402,
+ 0x9403,
+ 0x9405,
+ 0x940A,
+ 0x940B,
+ 0x940F,
+ 0x94A0,
+ 0x94A1,
+ 0x94A3,
+ 0x94B1,
+ 0x94B3,
+ 0x94B4,
+ 0x94B5,
+ 0x94B9,
+ 0x9440,
+ 0x9441,
+ 0x9442,
+ 0x9443,
+ 0x9444,
+ 0x9446,
+ 0x944A,
+ 0x944B,
+ 0x944C,
+ 0x944E,
+ 0x9450,
+ 0x9452,
+ 0x9456,
+ 0x945A,
+ 0x945B,
+ 0x945E,
+ 0x9460,
+ 0x9462,
+ 0x946A,
+ 0x946B,
+ 0x947A,
+ 0x947B,
+ 0x9480,
+ 0x9487,
+ 0x9488,
+ 0x9489,
+ 0x948A,
+ 0x948F,
+ 0x9490,
+ 0x9491,
+ 0x9495,
+ 0x9498,
+ 0x949C,
+ 0x949E,
+ 0x949F,
+ 0x94C0,
+ 0x94C1,
+ 0x94C3,
+ 0x94C4,
+ 0x94C5,
+ 0x94C6,
+ 0x94C7,
+ 0x94C8,
+ 0x94C9,
+ 0x94CB,
+ 0x94CC,
+ 0x94CD,
+ 0x9500,
+ 0x9501,
+ 0x9504,
+ 0x9505,
+ 0x9506,
+ 0x9507,
+ 0x9508,
+ 0x9509,
+ 0x950F,
+ 0x9511,
+ 0x9515,
+ 0x9517,
+ 0x9519,
+ 0x9540,
+ 0x9541,
+ 0x9542,
+ 0x954E,
+ 0x954F,
+ 0x9552,
+ 0x9553,
+ 0x9555,
+ 0x9557,
+ 0x955f,
+ 0x9580,
+ 0x9581,
+ 0x9583,
+ 0x9586,
+ 0x9587,
+ 0x9588,
+ 0x9589,
+ 0x958A,
+ 0x958B,
+ 0x958C,
+ 0x958D,
+ 0x958E,
+ 0x958F,
+ 0x9590,
+ 0x9591,
+ 0x9593,
+ 0x9595,
+ 0x9596,
+ 0x9597,
+ 0x9598,
+ 0x9599,
+ 0x959B,
+ 0x95C0,
+ 0x95C2,
+ 0x95C4,
+ 0x95C5,
+ 0x95C6,
+ 0x95C7,
+ 0x95C9,
+ 0x95CC,
+ 0x95CD,
+ 0x95CE,
+ 0x95CF,
+ 0x9610,
+ 0x9611,
+ 0x9612,
+ 0x9613,
+ 0x9614,
+ 0x9615,
+ 0x9616,
+ 0x9640,
+ 0x9641,
+ 0x9642,
+ 0x9643,
+ 0x9644,
+ 0x9645,
+ 0x9647,
+ 0x9648,
+ 0x9649,
+ 0x964a,
+ 0x964b,
+ 0x964c,
+ 0x964e,
+ 0x964f,
+ 0x9710,
+ 0x9711,
+ 0x9712,
+ 0x9713,
+ 0x9714,
+ 0x9715,
+ 0x9802,
+ 0x9803,
+ 0x9804,
+ 0x9805,
+ 0x9806,
+ 0x9807,
+ 0x9808,
+ 0x9809,
+ 0x980A,
+ 0x9900,
+ 0x9901,
+ 0x9903,
+ 0x9904,
+ 0x9905,
+ 0x9906,
+ 0x9907,
+ 0x9908,
+ 0x9909,
+ 0x990A,
+ 0x990B,
+ 0x990C,
+ 0x990D,
+ 0x990E,
+ 0x990F,
+ 0x9910,
+ 0x9913,
+ 0x9917,
+ 0x9918,
+ 0x9919,
+ 0x9990,
+ 0x9991,
+ 0x9992,
+ 0x9993,
+ 0x9994,
+ 0x9995,
+ 0x9996,
+ 0x9997,
+ 0x9998,
+ 0x9999,
+ 0x999A,
+ 0x999B,
+ 0x999C,
+ 0x999D,
+ 0x99A0,
+ 0x99A2,
+ 0x99A4,
+ /* radeon secondary ids */
+ 0x3171,
+ 0x3e70,
+ 0x4164,
+ 0x4165,
+ 0x4166,
+ 0x4168,
+ 0x4170,
+ 0x4171,
+ 0x4172,
+ 0x4173,
+ 0x496e,
+ 0x4a69,
+ 0x4a6a,
+ 0x4a6b,
+ 0x4a70,
+ 0x4a74,
+ 0x4b69,
+ 0x4b6b,
+ 0x4b6c,
+ 0x4c6e,
+ 0x4e64,
+ 0x4e65,
+ 0x4e66,
+ 0x4e67,
+ 0x4e68,
+ 0x4e69,
+ 0x4e6a,
+ 0x4e71,
+ 0x4f73,
+ 0x5569,
+ 0x556b,
+ 0x556d,
+ 0x556f,
+ 0x5571,
+ 0x5854,
+ 0x5874,
+ 0x5940,
+ 0x5941,
+ 0x5b70,
+ 0x5b72,
+ 0x5b73,
+ 0x5b74,
+ 0x5b75,
+ 0x5d44,
+ 0x5d45,
+ 0x5d6d,
+ 0x5d6f,
+ 0x5d72,
+ 0x5d77,
+ 0x5e6b,
+ 0x5e6d,
+ 0x7120,
+ 0x7124,
+ 0x7129,
+ 0x712e,
+ 0x712f,
+ 0x7162,
+ 0x7163,
+ 0x7166,
+ 0x7167,
+ 0x7172,
+ 0x7173,
+ 0x71a0,
+ 0x71a1,
+ 0x71a3,
+ 0x71a7,
+ 0x71bb,
+ 0x71e0,
+ 0x71e1,
+ 0x71e2,
+ 0x71e6,
+ 0x71e7,
+ 0x71f2,
+ 0x7269,
+ 0x726b,
+ 0x726e,
+ 0x72a0,
+ 0x72a8,
+ 0x72b1,
+ 0x72b3,
+ 0x793f,
+};
+
static const struct pci_device_id pciidlist[] = {
-#ifdef CONFIG_DRM_AMDGPU_SI
{0x1002, 0x6780, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_TAHITI},
{0x1002, 0x6784, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_TAHITI},
{0x1002, 0x6788, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_TAHITI},
@@ -885,8 +1907,6 @@ static const struct pci_device_id pciidlist[] = {
{0x1002, 0x6665, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_HAINAN|AMD_IS_MOBILITY},
{0x1002, 0x6667, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_HAINAN|AMD_IS_MOBILITY},
{0x1002, 0x666F, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_HAINAN|AMD_IS_MOBILITY},
-#endif
-#ifdef CONFIG_DRM_AMDGPU_CIK
/* Kaveri */
{0x1002, 0x1304, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_KAVERI|AMD_IS_MOBILITY|AMD_IS_APU},
{0x1002, 0x1305, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_KAVERI|AMD_IS_APU},
@@ -969,7 +1989,6 @@ static const struct pci_device_id pciidlist[] = {
{0x1002, 0x985D, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_MULLINS|AMD_IS_MOBILITY|AMD_IS_APU},
{0x1002, 0x985E, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_MULLINS|AMD_IS_MOBILITY|AMD_IS_APU},
{0x1002, 0x985F, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_MULLINS|AMD_IS_MOBILITY|AMD_IS_APU},
-#endif
/* topaz */
{0x1002, 0x6900, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_TOPAZ},
{0x1002, 0x6901, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_TOPAZ},
@@ -1088,6 +2107,7 @@ static const struct pci_device_id pciidlist[] = {
{0x1002, 0x734F, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_NAVI14},
/* Renoir */
+ {0x1002, 0x15E7, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_RENOIR|AMD_IS_APU},
{0x1002, 0x1636, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_RENOIR|AMD_IS_APU},
{0x1002, 0x1638, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_RENOIR|AMD_IS_APU},
{0x1002, 0x164C, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_RENOIR|AMD_IS_APU},
@@ -1101,43 +2121,283 @@ static const struct pci_device_id pciidlist[] = {
{0x1002, 0x73A1, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_SIENNA_CICHLID},
{0x1002, 0x73A2, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_SIENNA_CICHLID},
{0x1002, 0x73A3, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_SIENNA_CICHLID},
+ {0x1002, 0x73A5, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_SIENNA_CICHLID},
+ {0x1002, 0x73A8, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_SIENNA_CICHLID},
+ {0x1002, 0x73A9, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_SIENNA_CICHLID},
{0x1002, 0x73AB, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_SIENNA_CICHLID},
+ {0x1002, 0x73AC, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_SIENNA_CICHLID},
+ {0x1002, 0x73AD, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_SIENNA_CICHLID},
{0x1002, 0x73AE, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_SIENNA_CICHLID},
+ {0x1002, 0x73AF, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_SIENNA_CICHLID},
{0x1002, 0x73BF, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_SIENNA_CICHLID},
- /* Van Gogh */
- {0x1002, 0x163F, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VANGOGH|AMD_IS_APU},
+ /* Yellow Carp */
+ {0x1002, 0x164D, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_YELLOW_CARP|AMD_IS_APU},
+ {0x1002, 0x1681, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_YELLOW_CARP|AMD_IS_APU},
/* Navy_Flounder */
{0x1002, 0x73C0, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_NAVY_FLOUNDER},
{0x1002, 0x73C1, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_NAVY_FLOUNDER},
{0x1002, 0x73C3, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_NAVY_FLOUNDER},
+ {0x1002, 0x73DA, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_NAVY_FLOUNDER},
+ {0x1002, 0x73DB, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_NAVY_FLOUNDER},
+ {0x1002, 0x73DC, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_NAVY_FLOUNDER},
+ {0x1002, 0x73DD, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_NAVY_FLOUNDER},
+ {0x1002, 0x73DE, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_NAVY_FLOUNDER},
{0x1002, 0x73DF, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_NAVY_FLOUNDER},
/* DIMGREY_CAVEFISH */
{0x1002, 0x73E0, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_DIMGREY_CAVEFISH},
{0x1002, 0x73E1, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_DIMGREY_CAVEFISH},
{0x1002, 0x73E2, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_DIMGREY_CAVEFISH},
+ {0x1002, 0x73E3, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_DIMGREY_CAVEFISH},
+ {0x1002, 0x73E8, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_DIMGREY_CAVEFISH},
+ {0x1002, 0x73E9, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_DIMGREY_CAVEFISH},
+ {0x1002, 0x73EA, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_DIMGREY_CAVEFISH},
+ {0x1002, 0x73EB, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_DIMGREY_CAVEFISH},
+ {0x1002, 0x73EC, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_DIMGREY_CAVEFISH},
+ {0x1002, 0x73ED, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_DIMGREY_CAVEFISH},
+ {0x1002, 0x73EF, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_DIMGREY_CAVEFISH},
{0x1002, 0x73FF, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_DIMGREY_CAVEFISH},
+ /* Aldebaran */
+ {0x1002, 0x7408, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_ALDEBARAN},
+ {0x1002, 0x740C, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_ALDEBARAN},
+ {0x1002, 0x740F, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_ALDEBARAN},
+ {0x1002, 0x7410, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_ALDEBARAN},
+
+ /* CYAN_SKILLFISH */
+ {0x1002, 0x13DB, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_CYAN_SKILLFISH|AMD_IS_APU},
+ {0x1002, 0x13F9, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_CYAN_SKILLFISH|AMD_IS_APU},
+ {0x1002, 0x13FA, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_CYAN_SKILLFISH|AMD_IS_APU},
+ {0x1002, 0x13FB, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_CYAN_SKILLFISH|AMD_IS_APU},
+ {0x1002, 0x13FC, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_CYAN_SKILLFISH|AMD_IS_APU},
+ {0x1002, 0x13FE, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_CYAN_SKILLFISH|AMD_IS_APU},
+ {0x1002, 0x143F, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_CYAN_SKILLFISH|AMD_IS_APU},
+
+ /* BEIGE_GOBY */
+ {0x1002, 0x7420, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_BEIGE_GOBY},
+ {0x1002, 0x7421, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_BEIGE_GOBY},
+ {0x1002, 0x7422, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_BEIGE_GOBY},
+ {0x1002, 0x7423, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_BEIGE_GOBY},
+ {0x1002, 0x7424, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_BEIGE_GOBY},
+ {0x1002, 0x743F, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_BEIGE_GOBY},
+
+ { PCI_DEVICE(0x1002, PCI_ANY_ID),
+ .class = PCI_CLASS_DISPLAY_VGA << 8,
+ .class_mask = 0xffffff,
+ .driver_data = CHIP_IP_DISCOVERY },
+
+ { PCI_DEVICE(0x1002, PCI_ANY_ID),
+ .class = PCI_CLASS_DISPLAY_OTHER << 8,
+ .class_mask = 0xffffff,
+ .driver_data = CHIP_IP_DISCOVERY },
+
+ { PCI_DEVICE(0x1002, PCI_ANY_ID),
+ .class = PCI_CLASS_ACCELERATOR_PROCESSING << 8,
+ .class_mask = 0xffffff,
+ .driver_data = CHIP_IP_DISCOVERY },
+
{0, 0, 0}
};
MODULE_DEVICE_TABLE(pci, pciidlist);
+static const struct amdgpu_asic_type_quirk asic_type_quirks[] = {
+ /* differentiate between P10 and P11 asics with the same DID */
+ {0x67FF, 0xE3, CHIP_POLARIS10},
+ {0x67FF, 0xE7, CHIP_POLARIS10},
+ {0x67FF, 0xF3, CHIP_POLARIS10},
+ {0x67FF, 0xF7, CHIP_POLARIS10},
+};
+
static const struct drm_driver amdgpu_kms_driver;
+static void amdgpu_get_secondary_funcs(struct amdgpu_device *adev)
+{
+ struct pci_dev *p = NULL;
+ int i;
+
+ /* 0 - GPU
+ * 1 - audio
+ * 2 - USB
+ * 3 - UCSI
+ */
+ for (i = 1; i < 4; i++) {
+ p = pci_get_domain_bus_and_slot(pci_domain_nr(adev->pdev->bus),
+ adev->pdev->bus->number, i);
+ if (p) {
+ pm_runtime_get_sync(&p->dev);
+ pm_runtime_put_autosuspend(&p->dev);
+ pci_dev_put(p);
+ }
+ }
+}
+
+static void amdgpu_init_debug_options(struct amdgpu_device *adev)
+{
+ if (amdgpu_debug_mask & AMDGPU_DEBUG_VM) {
+ pr_info("debug: VM handling debug enabled\n");
+ adev->debug_vm = true;
+ }
+
+ if (amdgpu_debug_mask & AMDGPU_DEBUG_LARGEBAR) {
+ pr_info("debug: enabled simulating large-bar capability on non-large bar system\n");
+ adev->debug_largebar = true;
+ }
+
+ if (amdgpu_debug_mask & AMDGPU_DEBUG_DISABLE_GPU_SOFT_RECOVERY) {
+ pr_info("debug: soft reset for GPU recovery disabled\n");
+ adev->debug_disable_soft_recovery = true;
+ }
+
+ if (amdgpu_debug_mask & AMDGPU_DEBUG_USE_VRAM_FW_BUF) {
+ pr_info("debug: place fw in vram for frontdoor loading\n");
+ adev->debug_use_vram_fw_buf = true;
+ }
+
+ if (amdgpu_debug_mask & AMDGPU_DEBUG_ENABLE_RAS_ACA) {
+ pr_info("debug: enable RAS ACA\n");
+ adev->debug_enable_ras_aca = true;
+ }
+
+ if (amdgpu_debug_mask & AMDGPU_DEBUG_ENABLE_EXP_RESETS) {
+ pr_info("debug: enable experimental reset features\n");
+ adev->debug_exp_resets = true;
+ }
+
+ if (amdgpu_debug_mask & AMDGPU_DEBUG_DISABLE_GPU_RING_RESET) {
+ pr_info("debug: ring reset disabled\n");
+ adev->debug_disable_gpu_ring_reset = true;
+ }
+ if (amdgpu_debug_mask & AMDGPU_DEBUG_SMU_POOL) {
+ pr_info("debug: use vram for smu pool\n");
+ adev->pm.smu_debug_mask |= SMU_DEBUG_POOL_USE_VRAM;
+ }
+ if (amdgpu_debug_mask & AMDGPU_DEBUG_VM_USERPTR) {
+ pr_info("debug: VM mode debug for userptr is enabled\n");
+ adev->debug_vm_userptr = true;
+ }
+
+ if (amdgpu_debug_mask & AMDGPU_DEBUG_DISABLE_RAS_CE_LOG) {
+ pr_info("debug: disable kernel logs of correctable errors\n");
+ adev->debug_disable_ce_logs = true;
+ }
+
+ if (amdgpu_debug_mask & AMDGPU_DEBUG_ENABLE_CE_CS) {
+ pr_info("debug: allowing command submission to CE engine\n");
+ adev->debug_enable_ce_cs = true;
+ }
+}
+
+static unsigned long amdgpu_fix_asic_type(struct pci_dev *pdev, unsigned long flags)
+{
+ int i;
+
+ for (i = 0; i < ARRAY_SIZE(asic_type_quirks); i++) {
+ if (pdev->device == asic_type_quirks[i].device &&
+ pdev->revision == asic_type_quirks[i].revision) {
+ flags &= ~AMD_ASIC_MASK;
+ flags |= asic_type_quirks[i].type;
+ break;
+ }
+ }
+
+ return flags;
+}
+
+static bool amdgpu_support_enabled(struct device *dev,
+ const enum amd_asic_type family)
+{
+ const char *gen;
+ const char *param;
+ int module_param = -1;
+ bool radeon_support_built = IS_ENABLED(CONFIG_DRM_RADEON);
+ bool amdgpu_support_built = false;
+ bool support_by_default = false;
+
+ switch (family) {
+ case CHIP_TAHITI:
+ case CHIP_PITCAIRN:
+ case CHIP_VERDE:
+ case CHIP_OLAND:
+ case CHIP_HAINAN:
+ gen = "SI";
+ param = "si_support";
+ module_param = amdgpu_si_support;
+ amdgpu_support_built = IS_ENABLED(CONFIG_DRM_AMDGPU_SI);
+ support_by_default = true;
+ break;
+
+ case CHIP_BONAIRE:
+ case CHIP_HAWAII:
+ support_by_default = true;
+ fallthrough;
+ case CHIP_KAVERI:
+ case CHIP_KABINI:
+ case CHIP_MULLINS:
+ gen = "CIK";
+ param = "cik_support";
+ module_param = amdgpu_cik_support;
+ amdgpu_support_built = IS_ENABLED(CONFIG_DRM_AMDGPU_CIK);
+ break;
+
+ default:
+ /* All other chips are supported by amdgpu only */
+ return true;
+ }
+
+ if (!amdgpu_support_built) {
+ dev_info(dev, "amdgpu built without %s support\n", gen);
+ return false;
+ }
+
+ if ((module_param == -1 && (support_by_default || !radeon_support_built)) ||
+ module_param == 1) {
+ if (radeon_support_built)
+ dev_info(dev, "%s support provided by amdgpu.\n"
+ "Use radeon.%s=1 amdgpu.%s=0 to override.\n",
+ gen, param, param);
+
+ return true;
+ }
+
+ if (radeon_support_built)
+ dev_info(dev, "%s support provided by radeon.\n"
+ "Use radeon.%s=0 amdgpu.%s=1 to override.\n",
+ gen, param, param);
+ else if (module_param == 0)
+ dev_info(dev, "%s support disabled by module param\n", gen);
+
+ return false;
+}
+
static int amdgpu_pci_probe(struct pci_dev *pdev,
const struct pci_device_id *ent)
{
struct drm_device *ddev;
struct amdgpu_device *adev;
unsigned long flags = ent->driver_data;
- int ret, retry = 0;
+ int ret, retry = 0, i;
bool supports_atomic = false;
- if (!amdgpu_virtual_display &&
- amdgpu_device_asic_has_dc_support(flags & AMD_ASIC_MASK))
+ if ((pdev->class >> 8) == PCI_CLASS_DISPLAY_VGA ||
+ (pdev->class >> 8) == PCI_CLASS_DISPLAY_OTHER) {
+ if (drm_firmware_drivers_only() && amdgpu_modeset == -1)
+ return -EINVAL;
+ }
+
+ /* skip devices which are owned by radeon */
+ for (i = 0; i < ARRAY_SIZE(amdgpu_unsupported_pciidlist); i++) {
+ if (amdgpu_unsupported_pciidlist[i] == pdev->device)
+ return -ENODEV;
+ }
+
+ if (amdgpu_aspm == -1 && !pcie_aspm_enabled(pdev))
+ amdgpu_aspm = 0;
+
+ if (amdgpu_virtual_display ||
+ amdgpu_device_asic_has_dc_support(pdev, flags & AMD_ASIC_MASK))
supports_atomic = true;
if ((flags & AMD_EXP_HW_SUPPORT) && !amdgpu_exp_hw_support) {
@@ -1146,55 +2406,21 @@ static int amdgpu_pci_probe(struct pci_dev *pdev,
return -ENODEV;
}
+ flags = amdgpu_fix_asic_type(pdev, flags);
+
/* Due to hardware bugs, S/G Display on raven requires a 1:1 IOMMU mapping,
* however, SME requires an indirect IOMMU mapping because the encryption
* bit is beyond the DMA mask of the chip.
*/
- if (mem_encrypt_active() && ((flags & AMD_ASIC_MASK) == CHIP_RAVEN)) {
+ if (cc_platform_has(CC_ATTR_MEM_ENCRYPT) &&
+ ((flags & AMD_ASIC_MASK) == CHIP_RAVEN)) {
dev_info(&pdev->dev,
"SME is not compatible with RAVEN\n");
return -ENOTSUPP;
}
-#ifdef CONFIG_DRM_AMDGPU_SI
- if (!amdgpu_si_support) {
- switch (flags & AMD_ASIC_MASK) {
- case CHIP_TAHITI:
- case CHIP_PITCAIRN:
- case CHIP_VERDE:
- case CHIP_OLAND:
- case CHIP_HAINAN:
- dev_info(&pdev->dev,
- "SI support provided by radeon.\n");
- dev_info(&pdev->dev,
- "Use radeon.si_support=0 amdgpu.si_support=1 to override.\n"
- );
- return -ENODEV;
- }
- }
-#endif
-#ifdef CONFIG_DRM_AMDGPU_CIK
- if (!amdgpu_cik_support) {
- switch (flags & AMD_ASIC_MASK) {
- case CHIP_KAVERI:
- case CHIP_BONAIRE:
- case CHIP_HAWAII:
- case CHIP_KABINI:
- case CHIP_MULLINS:
- dev_info(&pdev->dev,
- "CIK support provided by radeon.\n");
- dev_info(&pdev->dev,
- "Use radeon.cik_support=0 amdgpu.cik_support=1 to override.\n"
- );
- return -ENODEV;
- }
- }
-#endif
-
- /* Get rid of things like offb */
- ret = drm_fb_helper_remove_conflicting_pci_framebuffers(pdev, "amdgpudrmfb");
- if (ret)
- return ret;
+ if (!amdgpu_support_enabled(&pdev->dev, flags & AMD_ASIC_MASK))
+ return -ENODEV;
adev = devm_drm_dev_alloc(&pdev->dev, &amdgpu_kms_driver, typeof(*adev), ddev);
if (IS_ERR(adev))
@@ -1213,12 +2439,14 @@ static int amdgpu_pci_probe(struct pci_dev *pdev,
pci_set_drvdata(pdev, ddev);
- ret = amdgpu_driver_load_kms(adev, ent->driver_data);
+ amdgpu_init_debug_options(adev);
+
+ ret = amdgpu_driver_load_kms(adev, flags);
if (ret)
goto err_pci;
retry_init:
- ret = drm_dev_register(ddev, ent->driver_data);
+ ret = drm_dev_register(ddev, flags);
if (ret == -EAGAIN && ++retry <= 3) {
DRM_INFO("retry init %d\n", retry);
/* Don't request EX mode too frequently which is attacking */
@@ -1228,10 +2456,78 @@ retry_init:
goto err_pci;
}
+ ret = amdgpu_xcp_dev_register(adev, ent);
+ if (ret)
+ goto err_pci;
+
+ ret = amdgpu_amdkfd_drm_client_create(adev);
+ if (ret)
+ goto err_pci;
+
+ /*
+ * 1. don't init fbdev on hw without DCE
+ * 2. don't init fbdev if there are no connectors
+ */
+ if (adev->mode_info.mode_config_initialized &&
+ !list_empty(&adev_to_drm(adev)->mode_config.connector_list)) {
+ const struct drm_format_info *format;
+
+ /* select 8 bpp console on low vram cards */
+ if (adev->gmc.real_vram_size <= (32*1024*1024))
+ format = drm_format_info(DRM_FORMAT_C8);
+ else
+ format = NULL;
+
+ drm_client_setup(adev_to_drm(adev), format);
+ }
+
ret = amdgpu_debugfs_init(adev);
if (ret)
DRM_ERROR("Creating debugfs files failed (%d).\n", ret);
+ if (adev->pm.rpm_mode != AMDGPU_RUNPM_NONE) {
+ /* only need to skip on ATPX */
+ if (amdgpu_device_supports_px(adev))
+ dev_pm_set_driver_flags(ddev->dev, DPM_FLAG_NO_DIRECT_COMPLETE);
+ /* we want direct complete for BOCO */
+ if (amdgpu_device_supports_boco(adev))
+ dev_pm_set_driver_flags(ddev->dev, DPM_FLAG_SMART_PREPARE |
+ DPM_FLAG_SMART_SUSPEND |
+ DPM_FLAG_MAY_SKIP_RESUME);
+ pm_runtime_use_autosuspend(ddev->dev);
+ pm_runtime_set_autosuspend_delay(ddev->dev, 5000);
+
+ pm_runtime_allow(ddev->dev);
+
+ pm_runtime_put_autosuspend(ddev->dev);
+
+ pci_wake_from_d3(pdev, TRUE);
+
+ /*
+ * For runpm implemented via BACO, PMFW will handle the
+ * timing for BACO in and out:
+ * - put ASIC into BACO state only when both video and
+ * audio functions are in D3 state.
+ * - pull ASIC out of BACO state when either video or
+ * audio function is in D0 state.
+ * Also, at startup, PMFW assumes both functions are in
+ * D0 state.
+ *
+ * So if snd driver was loaded prior to amdgpu driver
+ * and audio function was put into D3 state, there will
+ * be no PMFW-aware D-state transition(D0->D3) on runpm
+ * suspend. Thus the BACO will be not correctly kicked in.
+ *
+ * Via amdgpu_get_secondary_funcs(), the audio dev is put
+ * into D0 state. Then there will be a PMFW-aware D-state
+ * transition(D0->D3) on runpm suspend.
+ */
+ if (amdgpu_device_supports_baco(adev) &&
+ !(adev->flags & AMD_IS_APU) &&
+ adev->asic_type >= CHIP_NAVI10)
+ amdgpu_get_secondary_funcs(adev);
+ }
+
return 0;
err_pci:
@@ -1243,15 +2539,27 @@ static void
amdgpu_pci_remove(struct pci_dev *pdev)
{
struct drm_device *dev = pci_get_drvdata(pdev);
+ struct amdgpu_device *adev = drm_to_adev(dev);
-#ifdef MODULE
- if (THIS_MODULE->state != MODULE_STATE_GOING)
-#endif
- DRM_ERROR("Hotplug removal is not supported\n");
+ amdgpu_ras_eeprom_check_and_recover(adev);
+ amdgpu_xcp_dev_unplug(adev);
+ amdgpu_gmc_prepare_nps_mode_change(adev);
drm_dev_unplug(dev);
+
+ if (adev->pm.rpm_mode != AMDGPU_RUNPM_NONE) {
+ pm_runtime_get_sync(dev->dev);
+ pm_runtime_forbid(dev->dev);
+ }
+
amdgpu_driver_unload_kms(dev);
+
+ /*
+ * Flush any in flight DMA operations from device.
+ * Clear the Bus Master Enable bit and then wait on the PCIe Device
+ * StatusTransactions Pending bit.
+ */
pci_disable_device(pdev);
- pci_set_drvdata(pdev, NULL);
+ pci_wait_for_pending_transaction(pdev);
}
static void
@@ -1263,6 +2571,10 @@ amdgpu_pci_shutdown(struct pci_dev *pdev)
if (amdgpu_ras_intr_triggered())
return;
+ /* device maybe not resumed here, return immediately in this case */
+ if (adev->in_s4 && adev->in_suspend)
+ return;
+
/* if we are running in a VM, make sure the device
* torn down properly on reboot/shutdown.
* unfortunately we can't detect certain
@@ -1270,24 +2582,106 @@ amdgpu_pci_shutdown(struct pci_dev *pdev)
*/
if (!amdgpu_passthrough(adev))
adev->mp1_state = PP_MP1_STATE_UNLOAD;
- adev->in_poweroff_reboot_com = true;
- amdgpu_device_ip_suspend(adev);
- adev->in_poweroff_reboot_com = false;
+ amdgpu_device_prepare(dev);
+ amdgpu_device_suspend(dev, true);
adev->mp1_state = PP_MP1_STATE_NONE;
}
+static int amdgpu_pmops_prepare(struct device *dev)
+{
+ struct drm_device *drm_dev = dev_get_drvdata(dev);
+ struct amdgpu_device *adev = drm_to_adev(drm_dev);
+
+ /* device maybe not resumed here, return immediately in this case */
+ if (adev->in_s4 && adev->in_suspend)
+ return 0;
+
+ /* Return a positive number here so
+ * DPM_FLAG_SMART_SUSPEND works properly
+ */
+ if (amdgpu_device_supports_boco(adev) && pm_runtime_suspended(dev))
+ return 1;
+
+ /* if we will not support s3 or s2i for the device
+ * then skip suspend
+ */
+ if (!amdgpu_acpi_is_s0ix_active(adev) &&
+ !amdgpu_acpi_is_s3_active(adev))
+ return 1;
+
+ return amdgpu_device_prepare(drm_dev);
+}
+
+static void amdgpu_pmops_complete(struct device *dev)
+{
+ amdgpu_device_complete(dev_get_drvdata(dev));
+}
+
static int amdgpu_pmops_suspend(struct device *dev)
{
struct drm_device *drm_dev = dev_get_drvdata(dev);
+ struct amdgpu_device *adev = drm_to_adev(drm_dev);
+
+ if (amdgpu_acpi_is_s0ix_active(adev))
+ adev->in_s0ix = true;
+ else if (amdgpu_acpi_is_s3_active(adev))
+ adev->in_s3 = true;
+ if (!adev->in_s0ix && !adev->in_s3) {
+#if IS_ENABLED(CONFIG_SUSPEND)
+ /* don't allow going deep first time followed by s2idle the next time */
+ if (adev->last_suspend_state != PM_SUSPEND_ON &&
+ adev->last_suspend_state != pm_suspend_target_state) {
+ drm_err_once(drm_dev, "Unsupported suspend state %d\n",
+ pm_suspend_target_state);
+ return -EINVAL;
+ }
+#endif
+ return 0;
+ }
+
+#if IS_ENABLED(CONFIG_SUSPEND)
+ /* cache the state last used for suspend */
+ adev->last_suspend_state = pm_suspend_target_state;
+#endif
return amdgpu_device_suspend(drm_dev, true);
}
+static int amdgpu_pmops_suspend_noirq(struct device *dev)
+{
+ struct drm_device *drm_dev = dev_get_drvdata(dev);
+ struct amdgpu_device *adev = drm_to_adev(drm_dev);
+ int r;
+
+ if (amdgpu_acpi_should_gpu_reset(adev)) {
+ amdgpu_device_lock_reset_domain(adev->reset_domain);
+ r = amdgpu_asic_reset(adev);
+ amdgpu_device_unlock_reset_domain(adev->reset_domain);
+ return r;
+ }
+
+ return 0;
+}
+
static int amdgpu_pmops_resume(struct device *dev)
{
struct drm_device *drm_dev = dev_get_drvdata(dev);
+ struct amdgpu_device *adev = drm_to_adev(drm_dev);
+ int r;
- return amdgpu_device_resume(drm_dev, true);
+ if (!adev->in_s0ix && !adev->in_s3)
+ return 0;
+
+ /* Avoids registers access if device is physically gone */
+ if (!pci_device_is_present(adev->pdev))
+ adev->no_hw_access = true;
+
+ r = amdgpu_device_resume(drm_dev, true);
+ if (amdgpu_acpi_is_s0ix_active(adev))
+ adev->in_s0ix = false;
+ else
+ adev->in_s3 = false;
+ return r;
}
static int amdgpu_pmops_freeze(struct device *dev)
@@ -1296,18 +2690,23 @@ static int amdgpu_pmops_freeze(struct device *dev)
struct amdgpu_device *adev = drm_to_adev(drm_dev);
int r;
- adev->in_hibernate = true;
r = amdgpu_device_suspend(drm_dev, true);
- adev->in_hibernate = false;
if (r)
return r;
- return amdgpu_asic_reset(adev);
+
+ if (amdgpu_acpi_should_gpu_reset(adev))
+ return amdgpu_asic_reset(adev);
+ return 0;
}
static int amdgpu_pmops_thaw(struct device *dev)
{
struct drm_device *drm_dev = dev_get_drvdata(dev);
+ /* do not resume device if it's normal hibernation */
+ if (!pm_hibernate_is_recovering() && !pm_hibernation_mode_is_suspend())
+ return 0;
+
return amdgpu_device_resume(drm_dev, true);
}
@@ -1315,12 +2714,12 @@ static int amdgpu_pmops_poweroff(struct device *dev)
{
struct drm_device *drm_dev = dev_get_drvdata(dev);
struct amdgpu_device *adev = drm_to_adev(drm_dev);
- int r;
- adev->in_poweroff_reboot_com = true;
- r = amdgpu_device_suspend(drm_dev, true);
- adev->in_poweroff_reboot_com = false;
- return r;
+ /* device maybe not resumed here, return immediately in this case */
+ if (adev->in_s4 && adev->in_suspend)
+ return 0;
+
+ return amdgpu_device_suspend(drm_dev, true);
}
static int amdgpu_pmops_restore(struct device *dev)
@@ -1330,6 +2729,82 @@ static int amdgpu_pmops_restore(struct device *dev)
return amdgpu_device_resume(drm_dev, true);
}
+static int amdgpu_runtime_idle_check_display(struct device *dev)
+{
+ struct pci_dev *pdev = to_pci_dev(dev);
+ struct drm_device *drm_dev = pci_get_drvdata(pdev);
+ struct amdgpu_device *adev = drm_to_adev(drm_dev);
+
+ if (adev->mode_info.num_crtc) {
+ struct drm_connector *list_connector;
+ struct drm_connector_list_iter iter;
+ int ret = 0;
+
+ if (amdgpu_runtime_pm != -2) {
+ /* XXX: Return busy if any displays are connected to avoid
+ * possible display wakeups after runtime resume due to
+ * hotplug events in case any displays were connected while
+ * the GPU was in suspend. Remove this once that is fixed.
+ */
+ mutex_lock(&drm_dev->mode_config.mutex);
+ drm_connector_list_iter_begin(drm_dev, &iter);
+ drm_for_each_connector_iter(list_connector, &iter) {
+ if (list_connector->status == connector_status_connected) {
+ ret = -EBUSY;
+ break;
+ }
+ }
+ drm_connector_list_iter_end(&iter);
+ mutex_unlock(&drm_dev->mode_config.mutex);
+
+ if (ret)
+ return ret;
+ }
+
+ if (adev->dc_enabled) {
+ struct drm_crtc *crtc;
+
+ drm_for_each_crtc(crtc, drm_dev) {
+ drm_modeset_lock(&crtc->mutex, NULL);
+ if (crtc->state->active)
+ ret = -EBUSY;
+ drm_modeset_unlock(&crtc->mutex);
+ if (ret < 0)
+ break;
+ }
+ } else {
+ mutex_lock(&drm_dev->mode_config.mutex);
+ drm_modeset_lock(&drm_dev->mode_config.connection_mutex, NULL);
+
+ drm_connector_list_iter_begin(drm_dev, &iter);
+ drm_for_each_connector_iter(list_connector, &iter) {
+ if (list_connector->dpms == DRM_MODE_DPMS_ON) {
+ ret = -EBUSY;
+ break;
+ }
+ }
+
+ drm_connector_list_iter_end(&iter);
+
+ drm_modeset_unlock(&drm_dev->mode_config.connection_mutex);
+ mutex_unlock(&drm_dev->mode_config.mutex);
+ }
+ if (ret)
+ return ret;
+ }
+
+ return 0;
+}
+
+static int amdgpu_runtime_idle_check_userq(struct device *dev)
+{
+ struct pci_dev *pdev = to_pci_dev(dev);
+ struct drm_device *drm_dev = pci_get_drvdata(pdev);
+ struct amdgpu_device *adev = drm_to_adev(drm_dev);
+
+ return xa_empty(&adev->userq_doorbell_xa) ? 0 : -EBUSY;
+}
+
static int amdgpu_pmops_runtime_suspend(struct device *dev)
{
struct pci_dev *pdev = to_pci_dev(dev);
@@ -1337,14 +2812,22 @@ static int amdgpu_pmops_runtime_suspend(struct device *dev)
struct amdgpu_device *adev = drm_to_adev(drm_dev);
int ret, i;
- if (!adev->runpm) {
+ if (adev->pm.rpm_mode == AMDGPU_RUNPM_NONE) {
pm_runtime_forbid(dev);
return -EBUSY;
}
+ ret = amdgpu_runtime_idle_check_display(dev);
+ if (ret)
+ return ret;
+ ret = amdgpu_runtime_idle_check_userq(dev);
+ if (ret)
+ return ret;
+
/* wait for all rings to drain before suspending */
for (i = 0; i < AMDGPU_MAX_RINGS; i++) {
struct amdgpu_ring *ring = adev->rings[i];
+
if (ring && ring->sched.ready) {
ret = amdgpu_fence_wait_empty(ring);
if (ret)
@@ -1353,30 +2836,51 @@ static int amdgpu_pmops_runtime_suspend(struct device *dev)
}
adev->in_runpm = true;
- if (amdgpu_device_supports_atpx(drm_dev))
+ if (adev->pm.rpm_mode == AMDGPU_RUNPM_PX)
drm_dev->switch_power_state = DRM_SWITCH_POWER_CHANGING;
+ /*
+ * By setting mp1_state as PP_MP1_STATE_UNLOAD, MP1 will do some
+ * proper cleanups and put itself into a state ready for PNP. That
+ * can address some random resuming failure observed on BOCO capable
+ * platforms.
+ * TODO: this may be also needed for PX capable platform.
+ */
+ if (adev->pm.rpm_mode == AMDGPU_RUNPM_BOCO)
+ adev->mp1_state = PP_MP1_STATE_UNLOAD;
+
+ ret = amdgpu_device_prepare(drm_dev);
+ if (ret)
+ return ret;
ret = amdgpu_device_suspend(drm_dev, false);
if (ret) {
adev->in_runpm = false;
+ if (adev->pm.rpm_mode == AMDGPU_RUNPM_BOCO)
+ adev->mp1_state = PP_MP1_STATE_NONE;
return ret;
}
- if (amdgpu_device_supports_atpx(drm_dev)) {
+ if (adev->pm.rpm_mode == AMDGPU_RUNPM_BOCO)
+ adev->mp1_state = PP_MP1_STATE_NONE;
+
+ if (adev->pm.rpm_mode == AMDGPU_RUNPM_PX) {
/* Only need to handle PCI state in the driver for ATPX
* PCI core handles it for _PR3.
*/
- if (!amdgpu_is_atpx_hybrid()) {
- amdgpu_device_cache_pci_state(pdev);
- pci_disable_device(pdev);
- pci_ignore_hotplug(pdev);
- pci_set_power_state(pdev, PCI_D3cold);
- }
+ amdgpu_device_cache_pci_state(pdev);
+ pci_disable_device(pdev);
+ pci_ignore_hotplug(pdev);
+ pci_set_power_state(pdev, PCI_D3cold);
drm_dev->switch_power_state = DRM_SWITCH_POWER_DYNAMIC_OFF;
- } else if (amdgpu_device_supports_baco(drm_dev)) {
- amdgpu_device_baco_enter(drm_dev);
+ } else if (adev->pm.rpm_mode == AMDGPU_RUNPM_BOCO) {
+ /* nothing to do */
+ } else if ((adev->pm.rpm_mode == AMDGPU_RUNPM_BACO) ||
+ (adev->pm.rpm_mode == AMDGPU_RUNPM_BAMACO)) {
+ amdgpu_device_baco_enter(adev);
}
+ dev_dbg(&pdev->dev, "asic/device is runtime suspended\n");
+
return 0;
}
@@ -1387,33 +2891,42 @@ static int amdgpu_pmops_runtime_resume(struct device *dev)
struct amdgpu_device *adev = drm_to_adev(drm_dev);
int ret;
- if (!adev->runpm)
+ if (adev->pm.rpm_mode == AMDGPU_RUNPM_NONE)
return -EINVAL;
- if (amdgpu_device_supports_atpx(drm_dev)) {
+ /* Avoids registers access if device is physically gone */
+ if (!pci_device_is_present(adev->pdev))
+ adev->no_hw_access = true;
+
+ if (adev->pm.rpm_mode == AMDGPU_RUNPM_PX) {
drm_dev->switch_power_state = DRM_SWITCH_POWER_CHANGING;
/* Only need to handle PCI state in the driver for ATPX
* PCI core handles it for _PR3.
*/
- if (!amdgpu_is_atpx_hybrid()) {
- pci_set_power_state(pdev, PCI_D0);
- amdgpu_device_load_pci_state(pdev);
- ret = pci_enable_device(pdev);
- if (ret)
- return ret;
- }
+ pci_set_power_state(pdev, PCI_D0);
+ amdgpu_device_load_pci_state(pdev);
+ ret = pci_enable_device(pdev);
+ if (ret)
+ return ret;
pci_set_master(pdev);
- } else if (amdgpu_device_supports_boco(drm_dev)) {
+ } else if (adev->pm.rpm_mode == AMDGPU_RUNPM_BOCO) {
/* Only need to handle PCI state in the driver for ATPX
* PCI core handles it for _PR3.
*/
pci_set_master(pdev);
- } else if (amdgpu_device_supports_baco(drm_dev)) {
- amdgpu_device_baco_exit(drm_dev);
+ } else if ((adev->pm.rpm_mode == AMDGPU_RUNPM_BACO) ||
+ (adev->pm.rpm_mode == AMDGPU_RUNPM_BAMACO)) {
+ amdgpu_device_baco_exit(adev);
}
ret = amdgpu_device_resume(drm_dev, false);
- if (amdgpu_device_supports_atpx(drm_dev))
+ if (ret) {
+ if (adev->pm.rpm_mode == AMDGPU_RUNPM_PX)
+ pci_disable_device(pdev);
+ return ret;
+ }
+
+ if (adev->pm.rpm_mode == AMDGPU_RUNPM_PX)
drm_dev->switch_power_state = DRM_SWITCH_POWER_ON;
adev->in_runpm = false;
return 0;
@@ -1423,55 +2936,38 @@ static int amdgpu_pmops_runtime_idle(struct device *dev)
{
struct drm_device *drm_dev = dev_get_drvdata(dev);
struct amdgpu_device *adev = drm_to_adev(drm_dev);
- /* we don't want the main rpm_idle to call suspend - we want to autosuspend */
- int ret = 1;
+ int ret;
- if (!adev->runpm) {
+ if (adev->pm.rpm_mode == AMDGPU_RUNPM_NONE) {
pm_runtime_forbid(dev);
return -EBUSY;
}
- if (amdgpu_device_has_dc_support(adev)) {
- struct drm_crtc *crtc;
-
- drm_modeset_lock_all(drm_dev);
-
- drm_for_each_crtc(crtc, drm_dev) {
- if (crtc->state->active) {
- ret = -EBUSY;
- break;
- }
- }
-
- drm_modeset_unlock_all(drm_dev);
-
- } else {
- struct drm_connector *list_connector;
- struct drm_connector_list_iter iter;
-
- mutex_lock(&drm_dev->mode_config.mutex);
- drm_modeset_lock(&drm_dev->mode_config.connection_mutex, NULL);
-
- drm_connector_list_iter_begin(drm_dev, &iter);
- drm_for_each_connector_iter(list_connector, &iter) {
- if (list_connector->dpms == DRM_MODE_DPMS_ON) {
- ret = -EBUSY;
- break;
- }
- }
+ ret = amdgpu_runtime_idle_check_display(dev);
+ if (ret)
+ goto done;
- drm_connector_list_iter_end(&iter);
+ ret = amdgpu_runtime_idle_check_userq(dev);
+done:
+ pm_runtime_autosuspend(dev);
+ return ret;
+}
- drm_modeset_unlock(&drm_dev->mode_config.connection_mutex);
- mutex_unlock(&drm_dev->mode_config.mutex);
+static int amdgpu_drm_release(struct inode *inode, struct file *filp)
+{
+ struct drm_file *file_priv = filp->private_data;
+ struct amdgpu_fpriv *fpriv = file_priv->driver_priv;
+ struct drm_device *dev = file_priv->minor->dev;
+ int idx;
+
+ if (fpriv && drm_dev_enter(dev, &idx)) {
+ fpriv->evf_mgr.fd_closing = true;
+ amdgpu_eviction_fence_destroy(&fpriv->evf_mgr);
+ amdgpu_userq_mgr_fini(&fpriv->userq_mgr);
+ drm_dev_exit(idx);
}
- if (ret == -EBUSY)
- DRM_DEBUG_DRIVER("failing to power off - crtc active\n");
-
- pm_runtime_mark_last_busy(dev);
- pm_runtime_autosuspend(dev);
- return ret;
+ return drm_release(inode, filp);
}
long amdgpu_drm_ioctl(struct file *filp,
@@ -1480,6 +2976,7 @@ long amdgpu_drm_ioctl(struct file *filp,
struct drm_file *file_priv = filp->private_data;
struct drm_device *dev;
long ret;
+
dev = file_priv->minor->dev;
ret = pm_runtime_get_sync(dev->dev);
if (ret < 0)
@@ -1487,19 +2984,21 @@ long amdgpu_drm_ioctl(struct file *filp,
ret = drm_ioctl(filp, cmd, arg);
- pm_runtime_mark_last_busy(dev->dev);
out:
pm_runtime_put_autosuspend(dev->dev);
return ret;
}
static const struct dev_pm_ops amdgpu_pm_ops = {
- .suspend = amdgpu_pmops_suspend,
- .resume = amdgpu_pmops_resume,
- .freeze = amdgpu_pmops_freeze,
- .thaw = amdgpu_pmops_thaw,
- .poweroff = amdgpu_pmops_poweroff,
- .restore = amdgpu_pmops_restore,
+ .prepare = pm_sleep_ptr(amdgpu_pmops_prepare),
+ .complete = pm_sleep_ptr(amdgpu_pmops_complete),
+ .suspend = pm_sleep_ptr(amdgpu_pmops_suspend),
+ .suspend_noirq = pm_sleep_ptr(amdgpu_pmops_suspend_noirq),
+ .resume = pm_sleep_ptr(amdgpu_pmops_resume),
+ .freeze = pm_sleep_ptr(amdgpu_pmops_freeze),
+ .thaw = pm_sleep_ptr(amdgpu_pmops_thaw),
+ .poweroff = pm_sleep_ptr(amdgpu_pmops_poweroff),
+ .restore = pm_sleep_ptr(amdgpu_pmops_restore),
.runtime_suspend = amdgpu_pmops_runtime_suspend,
.runtime_resume = amdgpu_pmops_runtime_resume,
.runtime_idle = amdgpu_pmops_runtime_idle,
@@ -1521,14 +3020,18 @@ static const struct file_operations amdgpu_driver_kms_fops = {
.owner = THIS_MODULE,
.open = drm_open,
.flush = amdgpu_flush,
- .release = drm_release,
+ .release = amdgpu_drm_release,
.unlocked_ioctl = amdgpu_drm_ioctl,
- .mmap = amdgpu_mmap,
+ .mmap = drm_gem_mmap,
.poll = drm_poll,
.read = drm_read,
#ifdef CONFIG_COMPAT
.compat_ioctl = amdgpu_kms_compat_ioctl,
#endif
+#ifdef CONFIG_PROC_FS
+ .show_fdinfo = drm_show_fdinfo,
+#endif
+ .fop_flags = FOP_UNSIGNED_OFFSET,
};
int amdgpu_file_to_fpriv(struct file *filp, struct amdgpu_fpriv **fpriv)
@@ -1538,9 +3041,8 @@ int amdgpu_file_to_fpriv(struct file *filp, struct amdgpu_fpriv **fpriv)
if (!filp)
return -EINVAL;
- if (filp->f_op != &amdgpu_driver_kms_fops) {
+ if (filp->f_op != &amdgpu_driver_kms_fops)
return -EINVAL;
- }
file = filp->private_data;
*fpriv = file->driver_priv;
@@ -1565,6 +3067,10 @@ const struct drm_ioctl_desc amdgpu_ioctls_kms[] = {
DRM_IOCTL_DEF_DRV(AMDGPU_GEM_VA, amdgpu_gem_va_ioctl, DRM_AUTH|DRM_RENDER_ALLOW),
DRM_IOCTL_DEF_DRV(AMDGPU_GEM_OP, amdgpu_gem_op_ioctl, DRM_AUTH|DRM_RENDER_ALLOW),
DRM_IOCTL_DEF_DRV(AMDGPU_GEM_USERPTR, amdgpu_gem_userptr_ioctl, DRM_AUTH|DRM_RENDER_ALLOW),
+ DRM_IOCTL_DEF_DRV(AMDGPU_USERQ, amdgpu_userq_ioctl, DRM_AUTH|DRM_RENDER_ALLOW),
+ DRM_IOCTL_DEF_DRV(AMDGPU_USERQ_SIGNAL, amdgpu_userq_signal_ioctl, DRM_AUTH|DRM_RENDER_ALLOW),
+ DRM_IOCTL_DEF_DRV(AMDGPU_USERQ_WAIT, amdgpu_userq_wait_ioctl, DRM_AUTH|DRM_RENDER_ALLOW),
+ DRM_IOCTL_DEF_DRV(AMDGPU_GEM_LIST_HANDLES, amdgpu_gem_list_handles_ioctl, DRM_AUTH|DRM_RENDER_ALLOW),
};
static const struct drm_driver amdgpu_kms_driver = {
@@ -1575,22 +3081,44 @@ static const struct drm_driver amdgpu_kms_driver = {
DRIVER_SYNCOBJ_TIMELINE,
.open = amdgpu_driver_open_kms,
.postclose = amdgpu_driver_postclose_kms,
- .lastclose = amdgpu_driver_lastclose_kms,
- .irq_handler = amdgpu_irq_handler,
.ioctls = amdgpu_ioctls_kms,
.num_ioctls = ARRAY_SIZE(amdgpu_ioctls_kms),
.dumb_create = amdgpu_mode_dumb_create,
.dumb_map_offset = amdgpu_mode_dumb_mmap,
+ DRM_FBDEV_TTM_DRIVER_OPS,
+ .fops = &amdgpu_driver_kms_fops,
+ .release = &amdgpu_driver_release_kms,
+#ifdef CONFIG_PROC_FS
+ .show_fdinfo = amdgpu_show_fdinfo,
+#endif
+
+ .gem_prime_import = amdgpu_gem_prime_import,
+
+ .name = DRIVER_NAME,
+ .desc = DRIVER_DESC,
+ .major = KMS_DRIVER_MAJOR,
+ .minor = KMS_DRIVER_MINOR,
+ .patchlevel = KMS_DRIVER_PATCHLEVEL,
+};
+
+const struct drm_driver amdgpu_partition_driver = {
+ .driver_features =
+ DRIVER_GEM | DRIVER_RENDER | DRIVER_SYNCOBJ |
+ DRIVER_SYNCOBJ_TIMELINE,
+ .open = amdgpu_driver_open_kms,
+ .postclose = amdgpu_driver_postclose_kms,
+ .ioctls = amdgpu_ioctls_kms,
+ .num_ioctls = ARRAY_SIZE(amdgpu_ioctls_kms),
+ .dumb_create = amdgpu_mode_dumb_create,
+ .dumb_map_offset = amdgpu_mode_dumb_mmap,
+ DRM_FBDEV_TTM_DRIVER_OPS,
.fops = &amdgpu_driver_kms_fops,
+ .release = &amdgpu_driver_release_kms,
- .prime_handle_to_fd = drm_gem_prime_handle_to_fd,
- .prime_fd_to_handle = drm_gem_prime_fd_to_handle,
.gem_prime_import = amdgpu_gem_prime_import,
- .gem_prime_mmap = amdgpu_gem_prime_mmap,
.name = DRIVER_NAME,
.desc = DRIVER_DESC,
- .date = DRIVER_DATE,
.major = KMS_DRIVER_MAJOR,
.minor = KMS_DRIVER_MINOR,
.patchlevel = KMS_DRIVER_PATCHLEVEL,
@@ -1603,39 +3131,49 @@ static struct pci_error_handlers amdgpu_pci_err_handler = {
.resume = amdgpu_pci_resume,
};
+static const struct attribute_group *amdgpu_sysfs_groups[] = {
+ &amdgpu_vram_mgr_attr_group,
+ &amdgpu_gtt_mgr_attr_group,
+ &amdgpu_flash_attr_group,
+ NULL,
+};
+
static struct pci_driver amdgpu_kms_pci_driver = {
.name = DRIVER_NAME,
.id_table = pciidlist,
.probe = amdgpu_pci_probe,
.remove = amdgpu_pci_remove,
.shutdown = amdgpu_pci_shutdown,
- .driver.pm = &amdgpu_pm_ops,
+ .driver.pm = pm_ptr(&amdgpu_pm_ops),
.err_handler = &amdgpu_pci_err_handler,
+ .dev_groups = amdgpu_sysfs_groups,
};
static int __init amdgpu_init(void)
{
int r;
- if (vgacon_text_force()) {
- DRM_ERROR("VGACON disables amdgpu kernel modesetting.\n");
- return -EINVAL;
- }
-
r = amdgpu_sync_init();
if (r)
goto error_sync;
- r = amdgpu_fence_slab_init();
+ r = amdgpu_userq_fence_slab_init();
if (r)
goto error_fence;
DRM_INFO("amdgpu kernel modesetting enabled.\n");
amdgpu_register_atpx_handler();
+ amdgpu_acpi_detect();
/* Ignore KFD init failures. Normal when CONFIG_HSA_AMD is not set. */
amdgpu_amdkfd_init();
+ if (amdgpu_pp_feature_mask & PP_OVERDRIVE_MASK) {
+ add_taint(TAINT_CPU_OUT_OF_SPEC, LOCKDEP_STILL_OK);
+ pr_crit("Overdrive is enabled, please disable it before "
+ "reporting any bugs unrelated to overdrive.\n");
+ }
+
/* let modprobe override vga console setting */
return pci_register_driver(&amdgpu_kms_pci_driver);
@@ -1651,9 +3189,11 @@ static void __exit amdgpu_exit(void)
amdgpu_amdkfd_fini();
pci_unregister_driver(&amdgpu_kms_pci_driver);
amdgpu_unregister_atpx_handler();
+ amdgpu_acpi_release();
amdgpu_sync_fini();
- amdgpu_fence_slab_fini();
+ amdgpu_userq_fence_slab_fini();
mmu_notifier_synchronize();
+ amdgpu_xcp_drv_release();
}
module_init(amdgpu_init);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.h
index e3a4f7048042..2d86cc6f7f4d 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.h
@@ -40,9 +40,13 @@
#define DRIVER_NAME "amdgpu"
#define DRIVER_DESC "AMD GPU"
-#define DRIVER_DATE "20150101"
+
+extern const struct drm_driver amdgpu_partition_driver;
long amdgpu_drm_ioctl(struct file *filp,
unsigned int cmd, unsigned long arg);
+long amdgpu_kms_compat_ioctl(struct file *filp,
+ unsigned int cmd, unsigned long arg);
+
#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_eeprom.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_eeprom.c
new file mode 100644
index 000000000000..8cd69836dd99
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_eeprom.c
@@ -0,0 +1,240 @@
+/*
+ * Copyright 2021 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#include "amdgpu_eeprom.h"
+#include "amdgpu.h"
+
+/* AT24CM02 and M24M02-R have a 256-byte write page size.
+ */
+#define EEPROM_PAGE_BITS 8
+#define EEPROM_PAGE_SIZE (1U << EEPROM_PAGE_BITS)
+#define EEPROM_PAGE_MASK (EEPROM_PAGE_SIZE - 1)
+
+#define EEPROM_OFFSET_SIZE 2
+
+/* EEPROM memory addresses are 19-bits long, which can
+ * be partitioned into 3, 8, 8 bits, for a total of 19.
+ * The upper 3 bits are sent as part of the 7-bit
+ * "Device Type Identifier"--an I2C concept, which for EEPROM devices
+ * is hard-coded as 1010b, indicating that it is an EEPROM
+ * device--this is the wire format, followed by the upper
+ * 3 bits of the 19-bit address, followed by the direction,
+ * followed by two bytes holding the rest of the 16-bits of
+ * the EEPROM memory address. The format on the wire for EEPROM
+ * devices is: 1010XYZD, A15:A8, A7:A0,
+ * Where D is the direction and sequenced out by the hardware.
+ * Bits XYZ are memory address bits 18, 17 and 16.
+ * These bits are compared to how pins 1-3 of the part are connected,
+ * depending on the size of the part, more on that later.
+ *
+ * Note that of this wire format, a client is in control
+ * of, and needs to specify only XYZ, A15:A8, A7:0, bits,
+ * which is exactly the EEPROM memory address, or offset,
+ * in order to address up to 8 EEPROM devices on the I2C bus.
+ *
+ * For instance, a 2-Mbit I2C EEPROM part, addresses all its bytes,
+ * using an 18-bit address, bit 17 to 0 and thus would use all but one bit of
+ * the 19 bits previously mentioned. The designer would then not connect
+ * pins 1 and 2, and pin 3 usually named "A_2" or "E2", would be connected to
+ * either Vcc or GND. This would allow for up to two 2-Mbit parts on
+ * the same bus, where one would be addressable with bit 18 as 1, and
+ * the other with bit 18 of the address as 0.
+ *
+ * For a 2-Mbit part, bit 18 is usually known as the "Chip Enable" or
+ * "Hardware Address Bit". This bit is compared to the load on pin 3
+ * of the device, described above, and if there is a match, then this
+ * device responds to the command. This way, you can connect two
+ * 2-Mbit EEPROM devices on the same bus, but see one contiguous
+ * memory from 0 to 7FFFFh, where address 0 to 3FFFF is in the device
+ * whose pin 3 is connected to GND, and address 40000 to 7FFFFh is in
+ * the 2nd device, whose pin 3 is connected to Vcc.
+ *
+ * This addressing you encode in the 32-bit "eeprom_addr" below,
+ * namely the 19-bits "XYZ,A15:A0", as a single 19-bit address. For
+ * instance, eeprom_addr = 0x6DA01, is 110_1101_1010_0000_0001, where
+ * XYZ=110b, and A15:A0=DA01h. The XYZ bits become part of the device
+ * address, and the rest of the address bits are sent as the memory
+ * address bytes.
+ *
+ * That is, for an I2C EEPROM driver everything is controlled by
+ * the "eeprom_addr".
+ *
+ * See also top of amdgpu_ras_eeprom.c.
+ *
+ * P.S. If you need to write, lock and read the Identification Page,
+ * (M24M02-DR device only, which we do not use), change the "7" to
+ * "0xF" in the macro below, and let the client set bit 20 to 1 in
+ * "eeprom_addr", and set A10 to 0 to write into it, and A10 and A1 to
+ * 1 to lock it permanently.
+ */
+#define MAKE_I2C_ADDR(_aa) ((0xA << 3) | (((_aa) >> 16) & 0xF))
+
+static int __amdgpu_eeprom_xfer(struct i2c_adapter *i2c_adap, u32 eeprom_addr,
+ u8 *eeprom_buf, u32 buf_size, bool read)
+{
+ u8 eeprom_offset_buf[EEPROM_OFFSET_SIZE];
+ struct i2c_msg msgs[] = {
+ {
+ .flags = 0,
+ .len = EEPROM_OFFSET_SIZE,
+ .buf = eeprom_offset_buf,
+ },
+ {
+ .flags = read ? I2C_M_RD : 0,
+ },
+ };
+ const u8 *p = eeprom_buf;
+ int r;
+ u16 len;
+
+ for (r = 0; buf_size > 0;
+ buf_size -= len, eeprom_addr += len, eeprom_buf += len) {
+ /* Set the EEPROM address we want to write to/read from.
+ */
+ msgs[0].addr = MAKE_I2C_ADDR(eeprom_addr);
+ msgs[1].addr = msgs[0].addr;
+ msgs[0].buf[0] = (eeprom_addr >> 8) & 0xff;
+ msgs[0].buf[1] = eeprom_addr & 0xff;
+
+ if (!read) {
+ /* Write the maximum amount of data, without
+ * crossing the device's page boundary, as per
+ * its spec. Partial page writes are allowed,
+ * starting at any location within the page,
+ * so long as the page boundary isn't crossed
+ * over (actually the page pointer rolls
+ * over).
+ *
+ * As per the AT24CM02 EEPROM spec, after
+ * writing into a page, the I2C driver should
+ * terminate the transfer, i.e. in
+ * "i2c_transfer()" below, with a STOP
+ * condition, so that the self-timed write
+ * cycle begins. This is implied for the
+ * "i2c_transfer()" abstraction.
+ */
+ len = min(EEPROM_PAGE_SIZE - (eeprom_addr & EEPROM_PAGE_MASK),
+ buf_size);
+ } else {
+ /* Reading from the EEPROM has no limitation
+ * on the number of bytes read from the EEPROM
+ * device--they are simply sequenced out.
+ * Keep in mind that i2c_msg.len is u16 type.
+ */
+ len = min(U16_MAX, buf_size);
+ }
+ msgs[1].len = len;
+ msgs[1].buf = eeprom_buf;
+
+ /* This constitutes a START-STOP transaction.
+ */
+ r = i2c_transfer(i2c_adap, msgs, ARRAY_SIZE(msgs));
+ if (r != ARRAY_SIZE(msgs))
+ break;
+
+ if (!read) {
+ /* According to EEPROM specs the length of the
+ * self-writing cycle, tWR (tW), is 10 ms.
+ *
+ * TODO: Use polling on ACK, aka Acknowledge
+ * Polling, to minimize waiting for the
+ * internal write cycle to complete, as it is
+ * usually smaller than tWR (tW).
+ */
+ msleep(10);
+ }
+ }
+
+ return r < 0 ? r : eeprom_buf - p;
+}
+
+/**
+ * amdgpu_eeprom_xfer -- Read/write from/to an I2C EEPROM device
+ * @i2c_adap: pointer to the I2C adapter to use
+ * @eeprom_addr: EEPROM address from which to read/write
+ * @eeprom_buf: pointer to data buffer to read into/write from
+ * @buf_size: the size of @eeprom_buf
+ * @read: True if reading from the EEPROM, false if writing
+ *
+ * Returns the number of bytes read/written; -errno on error.
+ */
+static int amdgpu_eeprom_xfer(struct i2c_adapter *i2c_adap, u32 eeprom_addr,
+ u8 *eeprom_buf, u32 buf_size, bool read)
+{
+ const struct i2c_adapter_quirks *quirks = i2c_adap->quirks;
+ u16 limit;
+ u16 ps; /* Partial size */
+ int res = 0, r;
+
+ if (!quirks)
+ limit = 0;
+ else if (read)
+ limit = quirks->max_read_len;
+ else
+ limit = quirks->max_write_len;
+
+ if (limit == 0) {
+ return __amdgpu_eeprom_xfer(i2c_adap, eeprom_addr,
+ eeprom_buf, buf_size, read);
+ } else if (limit <= EEPROM_OFFSET_SIZE) {
+ dev_err_ratelimited(&i2c_adap->dev,
+ "maddr:0x%04X size:0x%02X:quirk max_%s_len must be > %d",
+ eeprom_addr, buf_size,
+ str_read_write(read), EEPROM_OFFSET_SIZE);
+ return -EINVAL;
+ }
+
+ /* The "limit" includes all data bytes sent/received,
+ * which would include the EEPROM_OFFSET_SIZE bytes.
+ * Account for them here.
+ */
+ limit -= EEPROM_OFFSET_SIZE;
+ for ( ; buf_size > 0;
+ buf_size -= ps, eeprom_addr += ps, eeprom_buf += ps) {
+ ps = min(limit, buf_size);
+
+ r = __amdgpu_eeprom_xfer(i2c_adap, eeprom_addr,
+ eeprom_buf, ps, read);
+ if (r < 0)
+ return r;
+ res += r;
+ }
+
+ return res;
+}
+
+int amdgpu_eeprom_read(struct i2c_adapter *i2c_adap,
+ u32 eeprom_addr, u8 *eeprom_buf,
+ u32 bytes)
+{
+ return amdgpu_eeprom_xfer(i2c_adap, eeprom_addr, eeprom_buf, bytes,
+ true);
+}
+
+int amdgpu_eeprom_write(struct i2c_adapter *i2c_adap,
+ u32 eeprom_addr, u8 *eeprom_buf,
+ u32 bytes)
+{
+ return amdgpu_eeprom_xfer(i2c_adap, eeprom_addr, eeprom_buf, bytes,
+ false);
+}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_eeprom.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_eeprom.h
new file mode 100644
index 000000000000..8083b8253ef4
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_eeprom.h
@@ -0,0 +1,37 @@
+/*
+ * Copyright 2021 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#ifndef _AMDGPU_EEPROM_H
+#define _AMDGPU_EEPROM_H
+
+#include <linux/i2c.h>
+
+int amdgpu_eeprom_read(struct i2c_adapter *i2c_adap,
+ u32 eeprom_addr, u8 *eeprom_buf,
+ u32 bytes);
+
+int amdgpu_eeprom_write(struct i2c_adapter *i2c_adap,
+ u32 eeprom_addr, u8 *eeprom_buf,
+ u32 bytes);
+
+#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_encoders.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_encoders.c
index af4ef84e27a7..3aaeed2d3562 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_encoders.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_encoders.c
@@ -24,7 +24,6 @@
* Alex Deucher
*/
-#include <drm/drm_crtc_helper.h>
#include <drm/amdgpu_drm.h>
#include "amdgpu.h"
#include "amdgpu_connectors.h"
@@ -71,6 +70,7 @@ void amdgpu_encoder_set_active_device(struct drm_encoder *encoder)
drm_for_each_connector_iter(connector, &iter) {
if (connector->encoder == encoder) {
struct amdgpu_connector *amdgpu_connector = to_amdgpu_connector(connector);
+
amdgpu_encoder->active_device = amdgpu_encoder->devices & amdgpu_connector->devices;
DRM_DEBUG_KMS("setting active device to %08x from %08x %08x for encoder %d\n",
amdgpu_encoder->active_device, amdgpu_encoder->devices,
@@ -166,12 +166,12 @@ void amdgpu_panel_mode_fixup(struct drm_encoder *encoder,
{
struct amdgpu_encoder *amdgpu_encoder = to_amdgpu_encoder(encoder);
struct drm_display_mode *native_mode = &amdgpu_encoder->native_mode;
- unsigned hblank = native_mode->htotal - native_mode->hdisplay;
- unsigned vblank = native_mode->vtotal - native_mode->vdisplay;
- unsigned hover = native_mode->hsync_start - native_mode->hdisplay;
- unsigned vover = native_mode->vsync_start - native_mode->vdisplay;
- unsigned hsync_width = native_mode->hsync_end - native_mode->hsync_start;
- unsigned vsync_width = native_mode->vsync_end - native_mode->vsync_start;
+ unsigned int hblank = native_mode->htotal - native_mode->hdisplay;
+ unsigned int vblank = native_mode->vtotal - native_mode->vdisplay;
+ unsigned int hover = native_mode->hsync_start - native_mode->hdisplay;
+ unsigned int vover = native_mode->vsync_start - native_mode->vdisplay;
+ unsigned int hsync_width = native_mode->hsync_end - native_mode->hsync_start;
+ unsigned int vsync_width = native_mode->vsync_end - native_mode->vsync_start;
adjusted_mode->clock = native_mode->clock;
adjusted_mode->flags = native_mode->flags;
@@ -222,7 +222,7 @@ bool amdgpu_dig_monitor_is_duallink(struct drm_encoder *encoder,
case DRM_MODE_CONNECTOR_HDMIB:
if (amdgpu_connector->use_digital) {
/* HDMI 1.3 supports up to 340 Mhz over single link */
- if (drm_detect_hdmi_monitor(amdgpu_connector_edid(connector))) {
+ if (connector->display_info.is_hdmi) {
if (pixel_clock > 340000)
return true;
else
@@ -244,7 +244,7 @@ bool amdgpu_dig_monitor_is_duallink(struct drm_encoder *encoder,
return false;
else {
/* HDMI 1.3 supports up to 340 Mhz over single link */
- if (drm_detect_hdmi_monitor(amdgpu_connector_edid(connector))) {
+ if (connector->display_info.is_hdmi) {
if (pixel_clock > 340000)
return true;
else
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_eviction_fence.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_eviction_fence.c
new file mode 100644
index 000000000000..23d7d0b0d625
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_eviction_fence.c
@@ -0,0 +1,241 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright 2024 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+#include <linux/sched.h>
+#include <drm/drm_exec.h>
+#include "amdgpu.h"
+
+#define work_to_evf_mgr(w, name) container_of(w, struct amdgpu_eviction_fence_mgr, name)
+#define evf_mgr_to_fpriv(e) container_of(e, struct amdgpu_fpriv, evf_mgr)
+
+static const char *
+amdgpu_eviction_fence_get_driver_name(struct dma_fence *fence)
+{
+ return "amdgpu_eviction_fence";
+}
+
+static const char *
+amdgpu_eviction_fence_get_timeline_name(struct dma_fence *f)
+{
+ struct amdgpu_eviction_fence *ef;
+
+ ef = container_of(f, struct amdgpu_eviction_fence, base);
+ return ef->timeline_name;
+}
+
+int
+amdgpu_eviction_fence_replace_fence(struct amdgpu_eviction_fence_mgr *evf_mgr,
+ struct drm_exec *exec)
+{
+ struct amdgpu_eviction_fence *old_ef, *new_ef;
+ struct drm_gem_object *obj;
+ unsigned long index;
+ int ret;
+
+ if (evf_mgr->ev_fence &&
+ !dma_fence_is_signaled(&evf_mgr->ev_fence->base))
+ return 0;
+ /*
+ * Steps to replace eviction fence:
+ * * lock all objects in exec (caller)
+ * * create a new eviction fence
+ * * update new eviction fence in evf_mgr
+ * * attach the new eviction fence to BOs
+ * * release the old fence
+ * * unlock the objects (caller)
+ */
+ new_ef = amdgpu_eviction_fence_create(evf_mgr);
+ if (!new_ef) {
+ DRM_ERROR("Failed to create new eviction fence\n");
+ return -ENOMEM;
+ }
+
+ /* Update the eviction fence now */
+ spin_lock(&evf_mgr->ev_fence_lock);
+ old_ef = evf_mgr->ev_fence;
+ evf_mgr->ev_fence = new_ef;
+ spin_unlock(&evf_mgr->ev_fence_lock);
+
+ /* Attach the new fence */
+ drm_exec_for_each_locked_object(exec, index, obj) {
+ struct amdgpu_bo *bo = gem_to_amdgpu_bo(obj);
+
+ if (!bo)
+ continue;
+ ret = amdgpu_eviction_fence_attach(evf_mgr, bo);
+ if (ret) {
+ DRM_ERROR("Failed to attch new eviction fence\n");
+ goto free_err;
+ }
+ }
+
+ /* Free old fence */
+ if (old_ef)
+ dma_fence_put(&old_ef->base);
+ return 0;
+
+free_err:
+ kfree(new_ef);
+ return ret;
+}
+
+static void
+amdgpu_eviction_fence_suspend_worker(struct work_struct *work)
+{
+ struct amdgpu_eviction_fence_mgr *evf_mgr = work_to_evf_mgr(work, suspend_work.work);
+ struct amdgpu_fpriv *fpriv = evf_mgr_to_fpriv(evf_mgr);
+ struct amdgpu_userq_mgr *uq_mgr = &fpriv->userq_mgr;
+ struct amdgpu_eviction_fence *ev_fence;
+
+ mutex_lock(&uq_mgr->userq_mutex);
+ spin_lock(&evf_mgr->ev_fence_lock);
+ ev_fence = evf_mgr->ev_fence;
+ if (ev_fence)
+ dma_fence_get(&ev_fence->base);
+ else
+ goto unlock;
+ spin_unlock(&evf_mgr->ev_fence_lock);
+
+ amdgpu_userq_evict(uq_mgr, ev_fence);
+
+ mutex_unlock(&uq_mgr->userq_mutex);
+ dma_fence_put(&ev_fence->base);
+ return;
+
+unlock:
+ spin_unlock(&evf_mgr->ev_fence_lock);
+ mutex_unlock(&uq_mgr->userq_mutex);
+}
+
+static bool amdgpu_eviction_fence_enable_signaling(struct dma_fence *f)
+{
+ struct amdgpu_eviction_fence_mgr *evf_mgr;
+ struct amdgpu_eviction_fence *ev_fence;
+
+ if (!f)
+ return true;
+
+ ev_fence = to_ev_fence(f);
+ evf_mgr = ev_fence->evf_mgr;
+
+ schedule_delayed_work(&evf_mgr->suspend_work, 0);
+ return true;
+}
+
+static const struct dma_fence_ops amdgpu_eviction_fence_ops = {
+ .get_driver_name = amdgpu_eviction_fence_get_driver_name,
+ .get_timeline_name = amdgpu_eviction_fence_get_timeline_name,
+ .enable_signaling = amdgpu_eviction_fence_enable_signaling,
+};
+
+void amdgpu_eviction_fence_signal(struct amdgpu_eviction_fence_mgr *evf_mgr,
+ struct amdgpu_eviction_fence *ev_fence)
+{
+ spin_lock(&evf_mgr->ev_fence_lock);
+ dma_fence_signal(&ev_fence->base);
+ spin_unlock(&evf_mgr->ev_fence_lock);
+}
+
+struct amdgpu_eviction_fence *
+amdgpu_eviction_fence_create(struct amdgpu_eviction_fence_mgr *evf_mgr)
+{
+ struct amdgpu_eviction_fence *ev_fence;
+
+ ev_fence = kzalloc(sizeof(*ev_fence), GFP_KERNEL);
+ if (!ev_fence)
+ return NULL;
+
+ ev_fence->evf_mgr = evf_mgr;
+ get_task_comm(ev_fence->timeline_name, current);
+ spin_lock_init(&ev_fence->lock);
+ dma_fence_init64(&ev_fence->base, &amdgpu_eviction_fence_ops,
+ &ev_fence->lock, evf_mgr->ev_fence_ctx,
+ atomic_inc_return(&evf_mgr->ev_fence_seq));
+ return ev_fence;
+}
+
+void amdgpu_eviction_fence_destroy(struct amdgpu_eviction_fence_mgr *evf_mgr)
+{
+ struct amdgpu_eviction_fence *ev_fence;
+
+ /* Wait for any pending work to execute */
+ flush_delayed_work(&evf_mgr->suspend_work);
+
+ spin_lock(&evf_mgr->ev_fence_lock);
+ ev_fence = evf_mgr->ev_fence;
+ spin_unlock(&evf_mgr->ev_fence_lock);
+
+ if (!ev_fence)
+ return;
+
+ dma_fence_wait(&ev_fence->base, false);
+
+ /* Last unref of ev_fence */
+ dma_fence_put(&ev_fence->base);
+}
+
+int amdgpu_eviction_fence_attach(struct amdgpu_eviction_fence_mgr *evf_mgr,
+ struct amdgpu_bo *bo)
+{
+ struct amdgpu_eviction_fence *ev_fence;
+ struct dma_resv *resv = bo->tbo.base.resv;
+ int ret;
+
+ if (!resv)
+ return 0;
+
+ ret = dma_resv_reserve_fences(resv, 1);
+ if (ret) {
+ DRM_DEBUG_DRIVER("Failed to resv fence space\n");
+ return ret;
+ }
+
+ spin_lock(&evf_mgr->ev_fence_lock);
+ ev_fence = evf_mgr->ev_fence;
+ if (ev_fence)
+ dma_resv_add_fence(resv, &ev_fence->base, DMA_RESV_USAGE_BOOKKEEP);
+ spin_unlock(&evf_mgr->ev_fence_lock);
+
+ return 0;
+}
+
+void amdgpu_eviction_fence_detach(struct amdgpu_eviction_fence_mgr *evf_mgr,
+ struct amdgpu_bo *bo)
+{
+ struct dma_fence *stub = dma_fence_get_stub();
+
+ dma_resv_replace_fences(bo->tbo.base.resv, evf_mgr->ev_fence_ctx,
+ stub, DMA_RESV_USAGE_BOOKKEEP);
+ dma_fence_put(stub);
+}
+
+int amdgpu_eviction_fence_init(struct amdgpu_eviction_fence_mgr *evf_mgr)
+{
+ /* This needs to be done one time per open */
+ atomic_set(&evf_mgr->ev_fence_seq, 0);
+ evf_mgr->ev_fence_ctx = dma_fence_context_alloc(1);
+ spin_lock_init(&evf_mgr->ev_fence_lock);
+
+ INIT_DELAYED_WORK(&evf_mgr->suspend_work, amdgpu_eviction_fence_suspend_worker);
+ return 0;
+}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_eviction_fence.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_eviction_fence.h
new file mode 100644
index 000000000000..fcd867b7147d
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_eviction_fence.h
@@ -0,0 +1,69 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright 2023 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#ifndef AMDGPU_EV_FENCE_H_
+#define AMDGPU_EV_FENCE_H_
+
+struct amdgpu_eviction_fence {
+ struct dma_fence base;
+ spinlock_t lock;
+ char timeline_name[TASK_COMM_LEN];
+ struct amdgpu_eviction_fence_mgr *evf_mgr;
+};
+
+struct amdgpu_eviction_fence_mgr {
+ u64 ev_fence_ctx;
+ atomic_t ev_fence_seq;
+ spinlock_t ev_fence_lock;
+ struct amdgpu_eviction_fence *ev_fence;
+ struct delayed_work suspend_work;
+ uint8_t fd_closing;
+};
+
+/* Eviction fence helper functions */
+struct amdgpu_eviction_fence *
+amdgpu_eviction_fence_create(struct amdgpu_eviction_fence_mgr *evf_mgr);
+
+void
+amdgpu_eviction_fence_destroy(struct amdgpu_eviction_fence_mgr *evf_mgr);
+
+int
+amdgpu_eviction_fence_attach(struct amdgpu_eviction_fence_mgr *evf_mgr,
+ struct amdgpu_bo *bo);
+
+void
+amdgpu_eviction_fence_detach(struct amdgpu_eviction_fence_mgr *evf_mgr,
+ struct amdgpu_bo *bo);
+
+int
+amdgpu_eviction_fence_init(struct amdgpu_eviction_fence_mgr *evf_mgr);
+
+void
+amdgpu_eviction_fence_signal(struct amdgpu_eviction_fence_mgr *evf_mgr,
+ struct amdgpu_eviction_fence *ev_fence);
+
+int
+amdgpu_eviction_fence_replace_fence(struct amdgpu_eviction_fence_mgr *evf_mgr,
+ struct drm_exec *exec);
+#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_fb.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_fb.c
deleted file mode 100644
index 51cd49c6f38f..000000000000
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_fb.c
+++ /dev/null
@@ -1,390 +0,0 @@
-/*
- * Copyright © 2007 David Airlie
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
- * DEALINGS IN THE SOFTWARE.
- *
- * Authors:
- * David Airlie
- */
-
-#include <linux/module.h>
-#include <linux/pm_runtime.h>
-#include <linux/slab.h>
-#include <linux/vga_switcheroo.h>
-
-#include <drm/amdgpu_drm.h>
-#include <drm/drm_crtc.h>
-#include <drm/drm_crtc_helper.h>
-#include <drm/drm_fb_helper.h>
-#include <drm/drm_fourcc.h>
-
-#include "amdgpu.h"
-#include "cikd.h"
-#include "amdgpu_gem.h"
-
-#include "amdgpu_display.h"
-
-/* object hierarchy -
- this contains a helper + a amdgpu fb
- the helper contains a pointer to amdgpu framebuffer baseclass.
-*/
-
-static int
-amdgpufb_open(struct fb_info *info, int user)
-{
- struct drm_fb_helper *fb_helper = info->par;
- int ret = pm_runtime_get_sync(fb_helper->dev->dev);
- if (ret < 0 && ret != -EACCES) {
- pm_runtime_mark_last_busy(fb_helper->dev->dev);
- pm_runtime_put_autosuspend(fb_helper->dev->dev);
- return ret;
- }
- return 0;
-}
-
-static int
-amdgpufb_release(struct fb_info *info, int user)
-{
- struct drm_fb_helper *fb_helper = info->par;
-
- pm_runtime_mark_last_busy(fb_helper->dev->dev);
- pm_runtime_put_autosuspend(fb_helper->dev->dev);
- return 0;
-}
-
-static const struct fb_ops amdgpufb_ops = {
- .owner = THIS_MODULE,
- DRM_FB_HELPER_DEFAULT_OPS,
- .fb_open = amdgpufb_open,
- .fb_release = amdgpufb_release,
- .fb_fillrect = drm_fb_helper_cfb_fillrect,
- .fb_copyarea = drm_fb_helper_cfb_copyarea,
- .fb_imageblit = drm_fb_helper_cfb_imageblit,
-};
-
-
-int amdgpu_align_pitch(struct amdgpu_device *adev, int width, int cpp, bool tiled)
-{
- int aligned = width;
- int pitch_mask = 0;
-
- switch (cpp) {
- case 1:
- pitch_mask = 255;
- break;
- case 2:
- pitch_mask = 127;
- break;
- case 3:
- case 4:
- pitch_mask = 63;
- break;
- }
-
- aligned += pitch_mask;
- aligned &= ~pitch_mask;
- return aligned * cpp;
-}
-
-static void amdgpufb_destroy_pinned_object(struct drm_gem_object *gobj)
-{
- struct amdgpu_bo *abo = gem_to_amdgpu_bo(gobj);
- int ret;
-
- ret = amdgpu_bo_reserve(abo, true);
- if (likely(ret == 0)) {
- amdgpu_bo_kunmap(abo);
- amdgpu_bo_unpin(abo);
- amdgpu_bo_unreserve(abo);
- }
- drm_gem_object_put(gobj);
-}
-
-static int amdgpufb_create_pinned_object(struct amdgpu_fbdev *rfbdev,
- struct drm_mode_fb_cmd2 *mode_cmd,
- struct drm_gem_object **gobj_p)
-{
- const struct drm_format_info *info;
- struct amdgpu_device *adev = rfbdev->adev;
- struct drm_gem_object *gobj = NULL;
- struct amdgpu_bo *abo = NULL;
- bool fb_tiled = false; /* useful for testing */
- u32 tiling_flags = 0, domain;
- int ret;
- int aligned_size, size;
- int height = mode_cmd->height;
- u32 cpp;
- u64 flags = AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED |
- AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS |
- AMDGPU_GEM_CREATE_VRAM_CLEARED;
-
- info = drm_get_format_info(adev_to_drm(adev), mode_cmd);
- cpp = info->cpp[0];
-
- /* need to align pitch with crtc limits */
- mode_cmd->pitches[0] = amdgpu_align_pitch(adev, mode_cmd->width, cpp,
- fb_tiled);
- domain = amdgpu_display_supported_domains(adev, flags);
- height = ALIGN(mode_cmd->height, 8);
- size = mode_cmd->pitches[0] * height;
- aligned_size = ALIGN(size, PAGE_SIZE);
- ret = amdgpu_gem_object_create(adev, aligned_size, 0, domain, flags,
- ttm_bo_type_kernel, NULL, &gobj);
- if (ret) {
- pr_err("failed to allocate framebuffer (%d)\n", aligned_size);
- return -ENOMEM;
- }
- abo = gem_to_amdgpu_bo(gobj);
-
- if (fb_tiled)
- tiling_flags = AMDGPU_TILING_SET(ARRAY_MODE, GRPH_ARRAY_2D_TILED_THIN1);
-
- ret = amdgpu_bo_reserve(abo, false);
- if (unlikely(ret != 0))
- goto out_unref;
-
- if (tiling_flags) {
- ret = amdgpu_bo_set_tiling_flags(abo,
- tiling_flags);
- if (ret)
- dev_err(adev->dev, "FB failed to set tiling flags\n");
- }
-
- ret = amdgpu_bo_pin(abo, domain);
- if (ret) {
- amdgpu_bo_unreserve(abo);
- goto out_unref;
- }
-
- ret = amdgpu_ttm_alloc_gart(&abo->tbo);
- if (ret) {
- amdgpu_bo_unreserve(abo);
- dev_err(adev->dev, "%p bind failed\n", abo);
- goto out_unref;
- }
-
- ret = amdgpu_bo_kmap(abo, NULL);
- amdgpu_bo_unreserve(abo);
- if (ret) {
- goto out_unref;
- }
-
- *gobj_p = gobj;
- return 0;
-out_unref:
- amdgpufb_destroy_pinned_object(gobj);
- *gobj_p = NULL;
- return ret;
-}
-
-static int amdgpufb_create(struct drm_fb_helper *helper,
- struct drm_fb_helper_surface_size *sizes)
-{
- struct amdgpu_fbdev *rfbdev = (struct amdgpu_fbdev *)helper;
- struct amdgpu_device *adev = rfbdev->adev;
- struct fb_info *info;
- struct drm_framebuffer *fb = NULL;
- struct drm_mode_fb_cmd2 mode_cmd;
- struct drm_gem_object *gobj = NULL;
- struct amdgpu_bo *abo = NULL;
- int ret;
- unsigned long tmp;
-
- memset(&mode_cmd, 0, sizeof(mode_cmd));
- mode_cmd.width = sizes->surface_width;
- mode_cmd.height = sizes->surface_height;
-
- if (sizes->surface_bpp == 24)
- sizes->surface_bpp = 32;
-
- mode_cmd.pixel_format = drm_mode_legacy_fb_format(sizes->surface_bpp,
- sizes->surface_depth);
-
- ret = amdgpufb_create_pinned_object(rfbdev, &mode_cmd, &gobj);
- if (ret) {
- DRM_ERROR("failed to create fbcon object %d\n", ret);
- return ret;
- }
-
- abo = gem_to_amdgpu_bo(gobj);
-
- /* okay we have an object now allocate the framebuffer */
- info = drm_fb_helper_alloc_fbi(helper);
- if (IS_ERR(info)) {
- ret = PTR_ERR(info);
- goto out;
- }
-
- ret = amdgpu_display_framebuffer_init(adev_to_drm(adev), &rfbdev->rfb,
- &mode_cmd, gobj);
- if (ret) {
- DRM_ERROR("failed to initialize framebuffer %d\n", ret);
- goto out;
- }
-
- fb = &rfbdev->rfb.base;
-
- /* setup helper */
- rfbdev->helper.fb = fb;
-
- info->fbops = &amdgpufb_ops;
-
- tmp = amdgpu_bo_gpu_offset(abo) - adev->gmc.vram_start;
- info->fix.smem_start = adev->gmc.aper_base + tmp;
- info->fix.smem_len = amdgpu_bo_size(abo);
- info->screen_base = amdgpu_bo_kptr(abo);
- info->screen_size = amdgpu_bo_size(abo);
-
- drm_fb_helper_fill_info(info, &rfbdev->helper, sizes);
-
- /* setup aperture base/size for vesafb takeover */
- info->apertures->ranges[0].base = adev_to_drm(adev)->mode_config.fb_base;
- info->apertures->ranges[0].size = adev->gmc.aper_size;
-
- /* Use default scratch pixmap (info->pixmap.flags = FB_PIXMAP_SYSTEM) */
-
- if (info->screen_base == NULL) {
- ret = -ENOSPC;
- goto out;
- }
-
- DRM_INFO("fb mappable at 0x%lX\n", info->fix.smem_start);
- DRM_INFO("vram apper at 0x%lX\n", (unsigned long)adev->gmc.aper_base);
- DRM_INFO("size %lu\n", (unsigned long)amdgpu_bo_size(abo));
- DRM_INFO("fb depth is %d\n", fb->format->depth);
- DRM_INFO(" pitch is %d\n", fb->pitches[0]);
-
- vga_switcheroo_client_fb_set(adev->pdev, info);
- return 0;
-
-out:
- if (abo) {
-
- }
- if (fb && ret) {
- drm_gem_object_put(gobj);
- drm_framebuffer_unregister_private(fb);
- drm_framebuffer_cleanup(fb);
- kfree(fb);
- }
- return ret;
-}
-
-static int amdgpu_fbdev_destroy(struct drm_device *dev, struct amdgpu_fbdev *rfbdev)
-{
- struct amdgpu_framebuffer *rfb = &rfbdev->rfb;
-
- drm_fb_helper_unregister_fbi(&rfbdev->helper);
-
- if (rfb->base.obj[0]) {
- amdgpufb_destroy_pinned_object(rfb->base.obj[0]);
- rfb->base.obj[0] = NULL;
- drm_framebuffer_unregister_private(&rfb->base);
- drm_framebuffer_cleanup(&rfb->base);
- }
- drm_fb_helper_fini(&rfbdev->helper);
-
- return 0;
-}
-
-static const struct drm_fb_helper_funcs amdgpu_fb_helper_funcs = {
- .fb_probe = amdgpufb_create,
-};
-
-int amdgpu_fbdev_init(struct amdgpu_device *adev)
-{
- struct amdgpu_fbdev *rfbdev;
- int bpp_sel = 32;
- int ret;
-
- /* don't init fbdev on hw without DCE */
- if (!adev->mode_info.mode_config_initialized)
- return 0;
-
- /* don't init fbdev if there are no connectors */
- if (list_empty(&adev_to_drm(adev)->mode_config.connector_list))
- return 0;
-
- /* select 8 bpp console on low vram cards */
- if (adev->gmc.real_vram_size <= (32*1024*1024))
- bpp_sel = 8;
-
- rfbdev = kzalloc(sizeof(struct amdgpu_fbdev), GFP_KERNEL);
- if (!rfbdev)
- return -ENOMEM;
-
- rfbdev->adev = adev;
- adev->mode_info.rfbdev = rfbdev;
-
- drm_fb_helper_prepare(adev_to_drm(adev), &rfbdev->helper,
- &amdgpu_fb_helper_funcs);
-
- ret = drm_fb_helper_init(adev_to_drm(adev), &rfbdev->helper);
- if (ret) {
- kfree(rfbdev);
- return ret;
- }
-
- /* disable all the possible outputs/crtcs before entering KMS mode */
- if (!amdgpu_device_has_dc_support(adev))
- drm_helper_disable_unused_functions(adev_to_drm(adev));
-
- drm_fb_helper_initial_config(&rfbdev->helper, bpp_sel);
- return 0;
-}
-
-void amdgpu_fbdev_fini(struct amdgpu_device *adev)
-{
- if (!adev->mode_info.rfbdev)
- return;
-
- amdgpu_fbdev_destroy(adev_to_drm(adev), adev->mode_info.rfbdev);
- kfree(adev->mode_info.rfbdev);
- adev->mode_info.rfbdev = NULL;
-}
-
-void amdgpu_fbdev_set_suspend(struct amdgpu_device *adev, int state)
-{
- if (adev->mode_info.rfbdev)
- drm_fb_helper_set_suspend_unlocked(&adev->mode_info.rfbdev->helper,
- state);
-}
-
-int amdgpu_fbdev_total_size(struct amdgpu_device *adev)
-{
- struct amdgpu_bo *robj;
- int size = 0;
-
- if (!adev->mode_info.rfbdev)
- return 0;
-
- robj = gem_to_amdgpu_bo(adev->mode_info.rfbdev->rfb.base.obj[0]);
- size += amdgpu_bo_size(robj);
- return size;
-}
-
-bool amdgpu_fbdev_robj_is_fb(struct amdgpu_device *adev, struct amdgpu_bo *robj)
-{
- if (!adev->mode_info.rfbdev)
- return false;
- if (robj == gem_to_amdgpu_bo(adev->mode_info.rfbdev->rfb.base.obj[0]))
- return true;
- return false;
-}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_fdinfo.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_fdinfo.c
new file mode 100644
index 000000000000..b349bb3676d5
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_fdinfo.c
@@ -0,0 +1,124 @@
+// SPDX-License-Identifier: MIT
+/* Copyright 2021 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: David Nieto
+ * Roy Sun
+ */
+
+#include <linux/debugfs.h>
+#include <linux/list.h>
+#include <linux/module.h>
+#include <linux/uaccess.h>
+#include <linux/reboot.h>
+#include <linux/syscalls.h>
+
+#include <drm/amdgpu_drm.h>
+#include <drm/drm_debugfs.h>
+#include <drm/drm_drv.h>
+#include <drm/drm_file.h>
+
+#include "amdgpu.h"
+#include "amdgpu_vm.h"
+#include "amdgpu_gem.h"
+#include "amdgpu_ctx.h"
+#include "amdgpu_fdinfo.h"
+
+
+static const char *amdgpu_ip_name[AMDGPU_HW_IP_NUM] = {
+ [AMDGPU_HW_IP_GFX] = "gfx",
+ [AMDGPU_HW_IP_COMPUTE] = "compute",
+ [AMDGPU_HW_IP_DMA] = "dma",
+ [AMDGPU_HW_IP_UVD] = "dec",
+ [AMDGPU_HW_IP_VCE] = "enc",
+ [AMDGPU_HW_IP_UVD_ENC] = "enc_1",
+ [AMDGPU_HW_IP_VCN_DEC] = "dec",
+ [AMDGPU_HW_IP_VCN_ENC] = "enc",
+ [AMDGPU_HW_IP_VCN_JPEG] = "jpeg",
+ [AMDGPU_HW_IP_VPE] = "vpe",
+};
+
+void amdgpu_show_fdinfo(struct drm_printer *p, struct drm_file *file)
+{
+ struct amdgpu_fpriv *fpriv = file->driver_priv;
+ struct amdgpu_vm *vm = &fpriv->vm;
+
+ struct amdgpu_mem_stats stats[__AMDGPU_PL_NUM];
+ ktime_t usage[AMDGPU_HW_IP_NUM];
+ const char *pl_name[] = {
+ [TTM_PL_VRAM] = "vram",
+ [TTM_PL_TT] = "gtt",
+ [TTM_PL_SYSTEM] = "cpu",
+ [AMDGPU_PL_GDS] = "gds",
+ [AMDGPU_PL_GWS] = "gws",
+ [AMDGPU_PL_OA] = "oa",
+ [AMDGPU_PL_DOORBELL] = "doorbell",
+ [AMDGPU_PL_MMIO_REMAP] = "mmioremap",
+ };
+ unsigned int hw_ip, i;
+
+ amdgpu_vm_get_memory(vm, stats);
+ amdgpu_ctx_mgr_usage(&fpriv->ctx_mgr, usage);
+
+ /*
+ * ******************************************************************
+ * For text output format description please see drm-usage-stats.rst!
+ * ******************************************************************
+ */
+
+ drm_printf(p, "pasid:\t%u\n", fpriv->vm.pasid);
+
+ for (i = 0; i < ARRAY_SIZE(pl_name); i++) {
+ if (!pl_name[i])
+ continue;
+
+ drm_print_memory_stats(p,
+ &stats[i].drm,
+ DRM_GEM_OBJECT_RESIDENT |
+ DRM_GEM_OBJECT_PURGEABLE,
+ pl_name[i]);
+ }
+
+ /* Legacy amdgpu keys, alias to drm-resident-memory-: */
+ drm_printf(p, "drm-memory-vram:\t%llu KiB\n",
+ stats[TTM_PL_VRAM].drm.resident/1024UL);
+ drm_printf(p, "drm-memory-gtt: \t%llu KiB\n",
+ stats[TTM_PL_TT].drm.resident/1024UL);
+ drm_printf(p, "drm-memory-cpu: \t%llu KiB\n",
+ stats[TTM_PL_SYSTEM].drm.resident/1024UL);
+
+ /* Amdgpu specific memory accounting keys: */
+ drm_printf(p, "amd-evicted-vram:\t%llu KiB\n",
+ stats[TTM_PL_VRAM].evicted/1024UL);
+ drm_printf(p, "amd-requested-vram:\t%llu KiB\n",
+ (stats[TTM_PL_VRAM].drm.shared +
+ stats[TTM_PL_VRAM].drm.private) / 1024UL);
+ drm_printf(p, "amd-requested-gtt:\t%llu KiB\n",
+ (stats[TTM_PL_TT].drm.shared +
+ stats[TTM_PL_TT].drm.private) / 1024UL);
+
+ for (hw_ip = 0; hw_ip < AMDGPU_HW_IP_NUM; ++hw_ip) {
+ if (!usage[hw_ip])
+ continue;
+
+ drm_printf(p, "drm-engine-%s:\t%lld ns\n", amdgpu_ip_name[hw_ip],
+ ktime_to_ns(usage[hw_ip]));
+ }
+}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_fdinfo.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_fdinfo.h
new file mode 100644
index 000000000000..0398f5a159ef
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_fdinfo.h
@@ -0,0 +1,42 @@
+/* SPDX-License-Identifier: MIT
+ * Copyright 2021 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: David Nieto
+ * Roy Sun
+ */
+#ifndef __AMDGPU_SMI_H__
+#define __AMDGPU_SMI_H__
+
+#include <linux/idr.h>
+#include <linux/kfifo.h>
+#include <linux/rbtree.h>
+#include <drm/gpu_scheduler.h>
+#include <drm/drm_file.h>
+#include <linux/sched/mm.h>
+
+#include "amdgpu_sync.h"
+#include "amdgpu_ring.h"
+#include "amdgpu_ids.h"
+
+uint32_t amdgpu_get_ip_count(struct amdgpu_device *adev, int id);
+void amdgpu_show_fdinfo(struct drm_printer *p, struct drm_file *file);
+
+#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c
index d56f4023ebb3..c7843e336310 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c
@@ -36,45 +36,12 @@
#include <linux/firmware.h>
#include <linux/pm_runtime.h>
-#include <drm/drm_debugfs.h>
-
+#include <drm/drm_drv.h>
#include "amdgpu.h"
#include "amdgpu_trace.h"
+#include "amdgpu_reset.h"
/*
- * Fences
- * Fences mark an event in the GPUs pipeline and are used
- * for GPU/CPU synchronization. When the fence is written,
- * it is expected that all buffers associated with that fence
- * are no longer in use by the associated ring on the GPU and
- * that the the relevant GPU caches have been flushed.
- */
-
-struct amdgpu_fence {
- struct dma_fence base;
-
- /* RB, DMA, etc. */
- struct amdgpu_ring *ring;
-};
-
-static struct kmem_cache *amdgpu_fence_slab;
-
-int amdgpu_fence_slab_init(void)
-{
- amdgpu_fence_slab = kmem_cache_create(
- "amdgpu_fence", sizeof(struct amdgpu_fence), 0,
- SLAB_HWCACHE_ALIGN, NULL);
- if (!amdgpu_fence_slab)
- return -ENOMEM;
- return 0;
-}
-
-void amdgpu_fence_slab_fini(void)
-{
- rcu_barrier();
- kmem_cache_destroy(amdgpu_fence_slab);
-}
-/*
* Cast helper
*/
static const struct dma_fence_ops amdgpu_fence_ops;
@@ -82,10 +49,7 @@ static inline struct amdgpu_fence *to_amdgpu_fence(struct dma_fence *f)
{
struct amdgpu_fence *__f = container_of(f, struct amdgpu_fence, base);
- if (__f->base.ops == &amdgpu_fence_ops)
- return __f;
-
- return NULL;
+ return __f;
}
/**
@@ -129,33 +93,32 @@ static u32 amdgpu_fence_read(struct amdgpu_ring *ring)
* amdgpu_fence_emit - emit a fence on the requested ring
*
* @ring: ring the fence is associated with
- * @f: resulting fence object
+ * @af: amdgpu fence input
* @flags: flags to pass into the subordinate .emit_fence() call
*
* Emits a fence command on the requested ring (all asics).
* Returns 0 on success, -ENOMEM on failure.
*/
-int amdgpu_fence_emit(struct amdgpu_ring *ring, struct dma_fence **f,
- unsigned flags)
+int amdgpu_fence_emit(struct amdgpu_ring *ring, struct amdgpu_fence *af,
+ unsigned int flags)
{
struct amdgpu_device *adev = ring->adev;
- struct amdgpu_fence *fence;
+ struct dma_fence *fence;
struct dma_fence __rcu **ptr;
uint32_t seq;
int r;
- fence = kmem_cache_alloc(amdgpu_fence_slab, GFP_KERNEL);
- if (fence == NULL)
- return -ENOMEM;
+ fence = &af->base;
+ af->ring = ring;
seq = ++ring->fence_drv.sync_seq;
- fence->ring = ring;
- dma_fence_init(&fence->base, &amdgpu_fence_ops,
+ dma_fence_init(fence, &amdgpu_fence_ops,
&ring->fence_drv.lock,
- adev->fence_context + ring->idx,
- seq);
+ adev->fence_context + ring->idx, seq);
+
amdgpu_ring_emit_fence(ring, ring->fence_drv.gpu_addr,
seq, flags | AMDGPU_FENCE_FLAG_INT);
+ amdgpu_fence_save_wptr(af);
pm_runtime_get_noresume(adev_to_drm(adev)->dev);
ptr = &ring->fence_drv.fences[seq & ring->fence_drv.num_fences_mask];
if (unlikely(rcu_dereference_protected(*ptr, 1))) {
@@ -173,12 +136,12 @@ int amdgpu_fence_emit(struct amdgpu_ring *ring, struct dma_fence **f,
}
}
+ to_amdgpu_fence(fence)->start_timestamp = ktime_get();
+
/* This function can't be called concurrently anyway, otherwise
* emitting the fence would mess up the hardware ring buffer.
*/
- rcu_assign_pointer(*ptr, dma_fence_get(&fence->base));
-
- *f = &fence->base;
+ rcu_assign_pointer(*ptr, dma_fence_get(fence));
return 0;
}
@@ -247,7 +210,6 @@ bool amdgpu_fence_process(struct amdgpu_ring *ring)
struct amdgpu_fence_driver *drv = &ring->fence_drv;
struct amdgpu_device *adev = ring->adev;
uint32_t seq, last_seq;
- int r;
do {
last_seq = atomic_read(&ring->fence_drv.last_seq);
@@ -255,7 +217,7 @@ bool amdgpu_fence_process(struct amdgpu_ring *ring)
} while (atomic_cmpxchg(&drv->last_seq, last_seq, seq) != last_seq);
- if (del_timer(&ring->fence_drv.fallback_timer) &&
+ if (timer_delete(&ring->fence_drv.fallback_timer) &&
seq != ring->fence_drv.sync_seq)
amdgpu_fence_schedule_fallback(ring);
@@ -267,6 +229,7 @@ bool amdgpu_fence_process(struct amdgpu_ring *ring)
do {
struct dma_fence *fence, **ptr;
+ struct amdgpu_fence *am_fence;
++last_seq;
last_seq &= drv->num_fences_mask;
@@ -279,14 +242,14 @@ bool amdgpu_fence_process(struct amdgpu_ring *ring)
if (!fence)
continue;
- r = dma_fence_signal(fence);
- if (!r)
- DMA_FENCE_TRACE(fence, "signaled from irq context\n");
- else
- BUG();
-
+ /* Save the wptr in the fence driver so we know what the last processed
+ * wptr was. This is required for re-emitting the ring state for
+ * queues that are reset but are not guilty and thus have no guilty fence.
+ */
+ am_fence = container_of(fence, struct amdgpu_fence, base);
+ drv->signalled_wptr = am_fence->wptr;
+ dma_fence_signal(fence);
dma_fence_put(fence);
- pm_runtime_mark_last_busy(adev_to_drm(adev)->dev);
pm_runtime_put_autosuspend(adev_to_drm(adev)->dev);
} while (last_seq != seq);
@@ -302,11 +265,13 @@ bool amdgpu_fence_process(struct amdgpu_ring *ring)
*/
static void amdgpu_fence_fallback(struct timer_list *t)
{
- struct amdgpu_ring *ring = from_timer(ring, t,
- fence_drv.fallback_timer);
+ struct amdgpu_ring *ring = timer_container_of(ring, t,
+ fence_drv.fallback_timer);
if (amdgpu_fence_process(ring))
- DRM_WARN("Fence fallback timer expired on ring %s\n", ring->name);
+ dev_warn(ring->adev->dev,
+ "Fence fallback timer expired on ring %s\n",
+ ring->name);
}
/**
@@ -354,14 +319,11 @@ signed long amdgpu_fence_wait_polling(struct amdgpu_ring *ring,
uint32_t wait_seq,
signed long timeout)
{
- uint32_t seq;
-
- do {
- seq = amdgpu_fence_read(ring);
- udelay(5);
- timeout -= 5;
- } while ((int32_t)(wait_seq - seq) > 0 && timeout > 0);
+ while ((int32_t)(wait_seq - amdgpu_fence_read(ring)) > 0 && timeout > 0) {
+ udelay(2);
+ timeout -= 2;
+ }
return timeout > 0 ? timeout : 0;
}
/**
@@ -373,14 +335,13 @@ signed long amdgpu_fence_wait_polling(struct amdgpu_ring *ring,
* Returns the number of emitted fences on the ring. Used by the
* dynpm code to ring track activity.
*/
-unsigned amdgpu_fence_count_emitted(struct amdgpu_ring *ring)
+unsigned int amdgpu_fence_count_emitted(struct amdgpu_ring *ring)
{
uint64_t emitted;
/* We are not protected by ring lock when reading the last sequence
* but it's ok to report slightly wrong fence count here.
*/
- amdgpu_fence_process(ring);
emitted = 0x100000000ull;
emitted -= atomic_read(&ring->fence_drv.last_seq);
emitted += READ_ONCE(ring->fence_drv.sync_seq);
@@ -388,6 +349,57 @@ unsigned amdgpu_fence_count_emitted(struct amdgpu_ring *ring)
}
/**
+ * amdgpu_fence_last_unsignaled_time_us - the time fence emitted until now
+ * @ring: ring the fence is associated with
+ *
+ * Find the earliest fence unsignaled until now, calculate the time delta
+ * between the time fence emitted and now.
+ */
+u64 amdgpu_fence_last_unsignaled_time_us(struct amdgpu_ring *ring)
+{
+ struct amdgpu_fence_driver *drv = &ring->fence_drv;
+ struct dma_fence *fence;
+ uint32_t last_seq, sync_seq;
+
+ last_seq = atomic_read(&ring->fence_drv.last_seq);
+ sync_seq = READ_ONCE(ring->fence_drv.sync_seq);
+ if (last_seq == sync_seq)
+ return 0;
+
+ ++last_seq;
+ last_seq &= drv->num_fences_mask;
+ fence = drv->fences[last_seq];
+ if (!fence)
+ return 0;
+
+ return ktime_us_delta(ktime_get(),
+ to_amdgpu_fence(fence)->start_timestamp);
+}
+
+/**
+ * amdgpu_fence_update_start_timestamp - update the timestamp of the fence
+ * @ring: ring the fence is associated with
+ * @seq: the fence seq number to update.
+ * @timestamp: the start timestamp to update.
+ *
+ * The function called at the time the fence and related ib is about to
+ * resubmit to gpu in MCBP scenario. Thus we do not consider race condition
+ * with amdgpu_fence_process to modify the same fence.
+ */
+void amdgpu_fence_update_start_timestamp(struct amdgpu_ring *ring, uint32_t seq, ktime_t timestamp)
+{
+ struct amdgpu_fence_driver *drv = &ring->fence_drv;
+ struct dma_fence *fence;
+
+ seq &= drv->num_fences_mask;
+ fence = drv->fences[seq];
+ if (!fence)
+ return;
+
+ to_amdgpu_fence(fence)->start_timestamp = timestamp;
+}
+
+/**
* amdgpu_fence_driver_start_ring - make the fence driver
* ready for use on the requested ring.
*
@@ -402,14 +414,14 @@ unsigned amdgpu_fence_count_emitted(struct amdgpu_ring *ring)
*/
int amdgpu_fence_driver_start_ring(struct amdgpu_ring *ring,
struct amdgpu_irq_src *irq_src,
- unsigned irq_type)
+ unsigned int irq_type)
{
struct amdgpu_device *adev = ring->adev;
uint64_t index;
if (ring->funcs->type != AMDGPU_RING_TYPE_UVD) {
- ring->fence_drv.cpu_addr = &adev->wb.wb[ring->fence_offs];
- ring->fence_drv.gpu_addr = adev->wb.gpu_addr + (ring->fence_offs * 4);
+ ring->fence_drv.cpu_addr = ring->fence_cpu_addr;
+ ring->fence_drv.gpu_addr = ring->fence_gpu_addr;
} else {
/* put fence directly behind firmware */
index = ALIGN(adev->uvd.fw->size, 8);
@@ -418,9 +430,6 @@ int amdgpu_fence_driver_start_ring(struct amdgpu_ring *ring,
}
amdgpu_fence_write(ring, atomic_read(&ring->fence_drv.last_seq));
- if (irq_src)
- amdgpu_irq_get(adev, irq_src, irq_type);
-
ring->fence_drv.irq_src = irq_src;
ring->fence_drv.irq_type = irq_type;
ring->fence_drv.initialized = true;
@@ -435,22 +444,18 @@ int amdgpu_fence_driver_start_ring(struct amdgpu_ring *ring,
* for the requested ring.
*
* @ring: ring to init the fence driver on
- * @num_hw_submission: number of entries on the hardware queue
*
* Init the fence driver for the requested ring (all asics).
* Helper function for amdgpu_fence_driver_init().
*/
-int amdgpu_fence_driver_init_ring(struct amdgpu_ring *ring,
- unsigned num_hw_submission)
+int amdgpu_fence_driver_init_ring(struct amdgpu_ring *ring)
{
struct amdgpu_device *adev = ring->adev;
- long timeout;
- int r;
if (!adev)
return -EINVAL;
- if (!is_power_of_2(num_hw_submission))
+ if (!is_power_of_2(ring->num_hw_submission))
return -EINVAL;
ring->fence_drv.cpu_addr = NULL;
@@ -461,45 +466,19 @@ int amdgpu_fence_driver_init_ring(struct amdgpu_ring *ring,
timer_setup(&ring->fence_drv.fallback_timer, amdgpu_fence_fallback, 0);
- ring->fence_drv.num_fences_mask = num_hw_submission * 2 - 1;
+ ring->fence_drv.num_fences_mask = ring->num_hw_submission * 2 - 1;
spin_lock_init(&ring->fence_drv.lock);
- ring->fence_drv.fences = kcalloc(num_hw_submission * 2, sizeof(void *),
+ ring->fence_drv.fences = kcalloc(ring->num_hw_submission * 2, sizeof(void *),
GFP_KERNEL);
+
if (!ring->fence_drv.fences)
return -ENOMEM;
- /* No need to setup the GPU scheduler for rings that don't need it */
- if (!ring->no_scheduler) {
- switch (ring->funcs->type) {
- case AMDGPU_RING_TYPE_GFX:
- timeout = adev->gfx_timeout;
- break;
- case AMDGPU_RING_TYPE_COMPUTE:
- timeout = adev->compute_timeout;
- break;
- case AMDGPU_RING_TYPE_SDMA:
- timeout = adev->sdma_timeout;
- break;
- default:
- timeout = adev->video_timeout;
- break;
- }
-
- r = drm_sched_init(&ring->sched, &amdgpu_sched_ops,
- num_hw_submission, amdgpu_job_hang_limit,
- timeout, ring->name);
- if (r) {
- DRM_ERROR("Failed to create scheduler on ring %s.\n",
- ring->name);
- return r;
- }
- }
-
return 0;
}
/**
- * amdgpu_fence_driver_init - init the fence driver
+ * amdgpu_fence_driver_sw_init - init the fence driver
* for all possible rings.
*
* @adev: amdgpu device pointer
@@ -510,108 +489,184 @@ int amdgpu_fence_driver_init_ring(struct amdgpu_ring *ring,
* amdgpu_fence_driver_start_ring().
* Returns 0 for success.
*/
-int amdgpu_fence_driver_init(struct amdgpu_device *adev)
+int amdgpu_fence_driver_sw_init(struct amdgpu_device *adev)
{
return 0;
}
/**
- * amdgpu_fence_driver_fini - tear down the fence driver
+ * amdgpu_fence_need_ring_interrupt_restore - helper function to check whether
+ * fence driver interrupts need to be restored.
+ *
+ * @ring: ring that to be checked
+ *
+ * Interrupts for rings that belong to GFX IP don't need to be restored
+ * when the target power state is s0ix.
+ *
+ * Return true if need to restore interrupts, false otherwise.
+ */
+static bool amdgpu_fence_need_ring_interrupt_restore(struct amdgpu_ring *ring)
+{
+ struct amdgpu_device *adev = ring->adev;
+ bool is_gfx_power_domain = false;
+
+ switch (ring->funcs->type) {
+ case AMDGPU_RING_TYPE_SDMA:
+ /* SDMA 5.x+ is part of GFX power domain so it's covered by GFXOFF */
+ if (amdgpu_ip_version(adev, SDMA0_HWIP, 0) >=
+ IP_VERSION(5, 0, 0))
+ is_gfx_power_domain = true;
+ break;
+ case AMDGPU_RING_TYPE_GFX:
+ case AMDGPU_RING_TYPE_COMPUTE:
+ case AMDGPU_RING_TYPE_KIQ:
+ case AMDGPU_RING_TYPE_MES:
+ is_gfx_power_domain = true;
+ break;
+ default:
+ break;
+ }
+
+ return !(adev->in_s0ix && is_gfx_power_domain);
+}
+
+/**
+ * amdgpu_fence_driver_hw_fini - tear down the fence driver
* for all possible rings.
*
* @adev: amdgpu device pointer
*
* Tear down the fence driver for all possible rings (all asics).
*/
-void amdgpu_fence_driver_fini(struct amdgpu_device *adev)
+void amdgpu_fence_driver_hw_fini(struct amdgpu_device *adev)
{
- unsigned i, j;
- int r;
+ int i, r;
for (i = 0; i < AMDGPU_MAX_RINGS; i++) {
struct amdgpu_ring *ring = adev->rings[i];
if (!ring || !ring->fence_drv.initialized)
continue;
- r = amdgpu_fence_wait_empty(ring);
- if (r) {
- /* no need to trigger GPU reset as we are unloading */
+
+ /* You can't wait for HW to signal if it's gone */
+ if (!drm_dev_is_unplugged(adev_to_drm(adev)))
+ r = amdgpu_fence_wait_empty(ring);
+ else
+ r = -ENODEV;
+ /* no need to trigger GPU reset as we are unloading */
+ if (r)
amdgpu_fence_driver_force_completion(ring);
- }
- if (ring->fence_drv.irq_src)
+
+ if (!drm_dev_is_unplugged(adev_to_drm(adev)) &&
+ ring->fence_drv.irq_src &&
+ amdgpu_fence_need_ring_interrupt_restore(ring))
amdgpu_irq_put(adev, ring->fence_drv.irq_src,
ring->fence_drv.irq_type);
- if (!ring->no_scheduler)
- drm_sched_fini(&ring->sched);
- del_timer_sync(&ring->fence_drv.fallback_timer);
- for (j = 0; j <= ring->fence_drv.num_fences_mask; ++j)
- dma_fence_put(ring->fence_drv.fences[j]);
- kfree(ring->fence_drv.fences);
- ring->fence_drv.fences = NULL;
- ring->fence_drv.initialized = false;
+
+ timer_delete_sync(&ring->fence_drv.fallback_timer);
}
}
-/**
- * amdgpu_fence_driver_suspend - suspend the fence driver
- * for all possible rings.
- *
- * @adev: amdgpu device pointer
- *
- * Suspend the fence driver for all possible rings (all asics).
- */
-void amdgpu_fence_driver_suspend(struct amdgpu_device *adev)
+/* Will either stop and flush handlers for amdgpu interrupt or reanble it */
+void amdgpu_fence_driver_isr_toggle(struct amdgpu_device *adev, bool stop)
{
- int i, r;
+ int i;
+
+ for (i = 0; i < AMDGPU_MAX_RINGS; i++) {
+ struct amdgpu_ring *ring = adev->rings[i];
+
+ if (!ring || !ring->fence_drv.initialized || !ring->fence_drv.irq_src)
+ continue;
+
+ if (stop)
+ disable_irq(adev->irq.irq);
+ else
+ enable_irq(adev->irq.irq);
+ }
+}
+
+void amdgpu_fence_driver_sw_fini(struct amdgpu_device *adev)
+{
+ unsigned int i, j;
for (i = 0; i < AMDGPU_MAX_RINGS; i++) {
struct amdgpu_ring *ring = adev->rings[i];
+
if (!ring || !ring->fence_drv.initialized)
continue;
- /* wait for gpu to finish processing current batch */
- r = amdgpu_fence_wait_empty(ring);
- if (r) {
- /* delay GPU reset to resume */
- amdgpu_fence_driver_force_completion(ring);
- }
+ /*
+ * Notice we check for sched.ops since there's some
+ * override on the meaning of sched.ready by amdgpu.
+ * The natural check would be sched.ready, which is
+ * set as drm_sched_init() finishes...
+ */
+ if (ring->sched.ops)
+ drm_sched_fini(&ring->sched);
- /* disable the interrupt */
- if (ring->fence_drv.irq_src)
- amdgpu_irq_put(adev, ring->fence_drv.irq_src,
- ring->fence_drv.irq_type);
+ for (j = 0; j <= ring->fence_drv.num_fences_mask; ++j)
+ dma_fence_put(ring->fence_drv.fences[j]);
+ kfree(ring->fence_drv.fences);
+ ring->fence_drv.fences = NULL;
+ ring->fence_drv.initialized = false;
}
}
/**
- * amdgpu_fence_driver_resume - resume the fence driver
+ * amdgpu_fence_driver_hw_init - enable the fence driver
* for all possible rings.
*
* @adev: amdgpu device pointer
*
- * Resume the fence driver for all possible rings (all asics).
+ * Enable the fence driver for all possible rings (all asics).
* Not all asics have all rings, so each asic will only
* start the fence driver on the rings it has using
* amdgpu_fence_driver_start_ring().
* Returns 0 for success.
*/
-void amdgpu_fence_driver_resume(struct amdgpu_device *adev)
+void amdgpu_fence_driver_hw_init(struct amdgpu_device *adev)
{
int i;
for (i = 0; i < AMDGPU_MAX_RINGS; i++) {
struct amdgpu_ring *ring = adev->rings[i];
+
if (!ring || !ring->fence_drv.initialized)
continue;
/* enable the interrupt */
- if (ring->fence_drv.irq_src)
+ if (ring->fence_drv.irq_src &&
+ amdgpu_fence_need_ring_interrupt_restore(ring))
amdgpu_irq_get(adev, ring->fence_drv.irq_src,
ring->fence_drv.irq_type);
}
}
/**
+ * amdgpu_fence_driver_set_error - set error code on fences
+ * @ring: the ring which contains the fences
+ * @error: the error code to set
+ *
+ * Set an error code to all the fences pending on the ring.
+ */
+void amdgpu_fence_driver_set_error(struct amdgpu_ring *ring, int error)
+{
+ struct amdgpu_fence_driver *drv = &ring->fence_drv;
+ unsigned long flags;
+
+ spin_lock_irqsave(&drv->lock, flags);
+ for (unsigned int i = 0; i <= drv->num_fences_mask; ++i) {
+ struct dma_fence *fence;
+
+ fence = rcu_dereference_protected(drv->fences[i],
+ lockdep_is_held(&drv->lock));
+ if (fence && !dma_fence_is_signaled_locked(fence))
+ dma_fence_set_error(fence, error);
+ }
+ spin_unlock_irqrestore(&drv->lock, flags);
+}
+
+/**
* amdgpu_fence_driver_force_completion - force signal latest fence of ring
*
* @ring: fence of the ring to signal
@@ -619,10 +674,125 @@ void amdgpu_fence_driver_resume(struct amdgpu_device *adev)
*/
void amdgpu_fence_driver_force_completion(struct amdgpu_ring *ring)
{
+ amdgpu_fence_driver_set_error(ring, -ECANCELED);
amdgpu_fence_write(ring, ring->fence_drv.sync_seq);
amdgpu_fence_process(ring);
}
+
+/*
+ * Kernel queue reset handling
+ *
+ * The driver can reset individual queues for most engines, but those queues
+ * may contain work from multiple contexts. Resetting the queue will reset
+ * lose all of that state. In order to minimize the collateral damage, the
+ * driver will save the ring contents which are not associated with the guilty
+ * context prior to resetting the queue. After resetting the queue the queue
+ * contents from the other contexts is re-emitted to the rings so that it can
+ * be processed by the engine. To handle this, we save the queue's write
+ * pointer (wptr) in the fences associated with each context. If we get a
+ * queue timeout, we can then use the wptrs from the fences to determine
+ * which data needs to be saved out of the queue's ring buffer.
+ */
+
+/**
+ * amdgpu_fence_driver_guilty_force_completion - force signal of specified sequence
+ *
+ * @af: fence of the ring to signal
+ *
+ */
+void amdgpu_fence_driver_guilty_force_completion(struct amdgpu_fence *af)
+{
+ struct dma_fence *unprocessed;
+ struct dma_fence __rcu **ptr;
+ struct amdgpu_fence *fence;
+ struct amdgpu_ring *ring = af->ring;
+ unsigned long flags;
+ u32 seq, last_seq;
+
+ last_seq = amdgpu_fence_read(ring) & ring->fence_drv.num_fences_mask;
+ seq = ring->fence_drv.sync_seq & ring->fence_drv.num_fences_mask;
+
+ /* mark all fences from the guilty context with an error */
+ spin_lock_irqsave(&ring->fence_drv.lock, flags);
+ do {
+ last_seq++;
+ last_seq &= ring->fence_drv.num_fences_mask;
+
+ ptr = &ring->fence_drv.fences[last_seq];
+ rcu_read_lock();
+ unprocessed = rcu_dereference(*ptr);
+
+ if (unprocessed && !dma_fence_is_signaled_locked(unprocessed)) {
+ fence = container_of(unprocessed, struct amdgpu_fence, base);
+
+ if (fence == af)
+ dma_fence_set_error(&fence->base, -ETIME);
+ else if (fence->context == af->context)
+ dma_fence_set_error(&fence->base, -ECANCELED);
+ }
+ rcu_read_unlock();
+ } while (last_seq != seq);
+ spin_unlock_irqrestore(&ring->fence_drv.lock, flags);
+ /* signal the guilty fence */
+ amdgpu_fence_write(ring, (u32)af->base.seqno);
+ amdgpu_fence_process(ring);
+}
+
+void amdgpu_fence_save_wptr(struct amdgpu_fence *af)
+{
+ af->wptr = af->ring->wptr;
+}
+
+static void amdgpu_ring_backup_unprocessed_command(struct amdgpu_ring *ring,
+ u64 start_wptr, u32 end_wptr)
+{
+ unsigned int first_idx = start_wptr & ring->buf_mask;
+ unsigned int last_idx = end_wptr & ring->buf_mask;
+ unsigned int i;
+
+ /* Backup the contents of the ring buffer. */
+ for (i = first_idx; i != last_idx; ++i, i &= ring->buf_mask)
+ ring->ring_backup[ring->ring_backup_entries_to_copy++] = ring->ring[i];
+}
+
+void amdgpu_ring_backup_unprocessed_commands(struct amdgpu_ring *ring,
+ struct amdgpu_fence *guilty_fence)
+{
+ struct dma_fence *unprocessed;
+ struct dma_fence __rcu **ptr;
+ struct amdgpu_fence *fence;
+ u64 wptr;
+ u32 seq, last_seq;
+
+ last_seq = amdgpu_fence_read(ring) & ring->fence_drv.num_fences_mask;
+ seq = ring->fence_drv.sync_seq & ring->fence_drv.num_fences_mask;
+ wptr = ring->fence_drv.signalled_wptr;
+ ring->ring_backup_entries_to_copy = 0;
+
+ do {
+ last_seq++;
+ last_seq &= ring->fence_drv.num_fences_mask;
+
+ ptr = &ring->fence_drv.fences[last_seq];
+ rcu_read_lock();
+ unprocessed = rcu_dereference(*ptr);
+
+ if (unprocessed && !dma_fence_is_signaled(unprocessed)) {
+ fence = container_of(unprocessed, struct amdgpu_fence, base);
+
+ /* save everything if the ring is not guilty, otherwise
+ * just save the content from other contexts.
+ */
+ if (!guilty_fence || (fence->context != guilty_fence->context))
+ amdgpu_ring_backup_unprocessed_command(ring, wptr,
+ fence->wptr);
+ wptr = fence->wptr;
+ }
+ rcu_read_unlock();
+ } while (last_seq != seq);
+}
+
/*
* Common fence implementation
*/
@@ -634,8 +804,7 @@ static const char *amdgpu_fence_get_driver_name(struct dma_fence *fence)
static const char *amdgpu_fence_get_timeline_name(struct dma_fence *f)
{
- struct amdgpu_fence *fence = to_amdgpu_fence(f);
- return (const char *)fence->ring->name;
+ return (const char *)to_amdgpu_fence(f)->ring->name;
}
/**
@@ -648,13 +817,8 @@ static const char *amdgpu_fence_get_timeline_name(struct dma_fence *f)
*/
static bool amdgpu_fence_enable_signaling(struct dma_fence *f)
{
- struct amdgpu_fence *fence = to_amdgpu_fence(f);
- struct amdgpu_ring *ring = fence->ring;
-
- if (!timer_pending(&ring->fence_drv.fallback_timer))
- amdgpu_fence_schedule_fallback(ring);
-
- DMA_FENCE_TRACE(&fence->base, "armed on ring %i!\n", ring->idx);
+ if (!timer_pending(&to_amdgpu_fence(f)->ring->fence_drv.fallback_timer))
+ amdgpu_fence_schedule_fallback(to_amdgpu_fence(f)->ring);
return true;
}
@@ -669,8 +833,9 @@ static bool amdgpu_fence_enable_signaling(struct dma_fence *f)
static void amdgpu_fence_free(struct rcu_head *rcu)
{
struct dma_fence *f = container_of(rcu, struct dma_fence, rcu);
- struct amdgpu_fence *fence = to_amdgpu_fence(f);
- kmem_cache_free(amdgpu_fence_slab, fence);
+
+ /* free fence_slab if it's separated fence*/
+ kfree(to_amdgpu_fence(f));
}
/**
@@ -697,15 +862,14 @@ static const struct dma_fence_ops amdgpu_fence_ops = {
* Fence debugfs
*/
#if defined(CONFIG_DEBUG_FS)
-static int amdgpu_debugfs_fence_info(struct seq_file *m, void *data)
+static int amdgpu_debugfs_fence_info_show(struct seq_file *m, void *unused)
{
- struct drm_info_node *node = (struct drm_info_node *)m->private;
- struct drm_device *dev = node->minor->dev;
- struct amdgpu_device *adev = drm_to_adev(dev);
+ struct amdgpu_device *adev = m->private;
int i;
for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
struct amdgpu_ring *ring = adev->rings[i];
+
if (!ring || !ring->fence_drv.initialized)
continue;
@@ -746,11 +910,10 @@ static int amdgpu_debugfs_fence_info(struct seq_file *m, void *data)
*
* Manually trigger a gpu reset at the next fence wait.
*/
-static int amdgpu_debugfs_gpu_recover(struct seq_file *m, void *data)
+static int gpu_recover_get(void *data, u64 *val)
{
- struct drm_info_node *node = (struct drm_info_node *) m->private;
- struct drm_device *dev = node->minor->dev;
- struct amdgpu_device *adev = drm_to_adev(dev);
+ struct amdgpu_device *adev = (struct amdgpu_device *)data;
+ struct drm_device *dev = adev_to_drm(adev);
int r;
r = pm_runtime_get_sync(dev->dev);
@@ -759,35 +922,55 @@ static int amdgpu_debugfs_gpu_recover(struct seq_file *m, void *data)
return 0;
}
- seq_printf(m, "gpu recover\n");
- amdgpu_device_gpu_recover(adev, NULL);
+ if (amdgpu_reset_domain_schedule(adev->reset_domain, &adev->reset_work))
+ flush_work(&adev->reset_work);
+
+ *val = atomic_read(&adev->reset_domain->reset_res);
- pm_runtime_mark_last_busy(dev->dev);
pm_runtime_put_autosuspend(dev->dev);
return 0;
}
-static const struct drm_info_list amdgpu_debugfs_fence_list[] = {
- {"amdgpu_fence_info", &amdgpu_debugfs_fence_info, 0, NULL},
- {"amdgpu_gpu_recover", &amdgpu_debugfs_gpu_recover, 0, NULL}
-};
+DEFINE_SHOW_ATTRIBUTE(amdgpu_debugfs_fence_info);
+DEFINE_DEBUGFS_ATTRIBUTE(amdgpu_debugfs_gpu_recover_fops, gpu_recover_get, NULL,
+ "%lld\n");
+
+static void amdgpu_debugfs_reset_work(struct work_struct *work)
+{
+ struct amdgpu_device *adev = container_of(work, struct amdgpu_device,
+ reset_work);
+
+ struct amdgpu_reset_context reset_context;
+
+ memset(&reset_context, 0, sizeof(reset_context));
+
+ reset_context.method = AMD_RESET_METHOD_NONE;
+ reset_context.reset_req_dev = adev;
+ reset_context.src = AMDGPU_RESET_SRC_USER;
+ set_bit(AMDGPU_NEED_FULL_RESET, &reset_context.flags);
+ set_bit(AMDGPU_SKIP_COREDUMP, &reset_context.flags);
+
+ amdgpu_device_gpu_recover(adev, NULL, &reset_context);
+}
-static const struct drm_info_list amdgpu_debugfs_fence_list_sriov[] = {
- {"amdgpu_fence_info", &amdgpu_debugfs_fence_info, 0, NULL},
-};
#endif
-int amdgpu_debugfs_fence_init(struct amdgpu_device *adev)
+void amdgpu_debugfs_fence_init(struct amdgpu_device *adev)
{
#if defined(CONFIG_DEBUG_FS)
- if (amdgpu_sriov_vf(adev))
- return amdgpu_debugfs_add_files(adev, amdgpu_debugfs_fence_list_sriov,
- ARRAY_SIZE(amdgpu_debugfs_fence_list_sriov));
- return amdgpu_debugfs_add_files(adev, amdgpu_debugfs_fence_list,
- ARRAY_SIZE(amdgpu_debugfs_fence_list));
-#else
- return 0;
+ struct drm_minor *minor = adev_to_drm(adev)->primary;
+ struct dentry *root = minor->debugfs_root;
+
+ debugfs_create_file("amdgpu_fence_info", 0444, root, adev,
+ &amdgpu_debugfs_fence_info_fops);
+
+ if (!amdgpu_sriov_vf(adev)) {
+
+ INIT_WORK(&adev->reset_work, amdgpu_debugfs_reset_work);
+ debugfs_create_file("amdgpu_gpu_recover", 0444, root, adev,
+ &amdgpu_debugfs_gpu_recover_fops);
+ }
#endif
}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_fru_eeprom.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_fru_eeprom.c
index 8f4a8f8d8146..b0082aa7f3c6 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_fru_eeprom.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_fru_eeprom.c
@@ -27,12 +27,13 @@
#include "smu_v11_0_i2c.h"
#include "atom.h"
#include "amdgpu_fru_eeprom.h"
+#include "amdgpu_eeprom.h"
-#define I2C_PRODUCT_INFO_ADDR 0xAC
-#define I2C_PRODUCT_INFO_ADDR_SIZE 0x2
-#define I2C_PRODUCT_INFO_OFFSET 0xC0
+#define FRU_EEPROM_MADDR_6 0x60000
+#define FRU_EEPROM_MADDR_8 0x80000
+#define FRU_EEPROM_MADDR_INV 0xFFFFF
-static bool is_fru_eeprom_supported(struct amdgpu_device *adev)
+static bool is_fru_eeprom_supported(struct amdgpu_device *adev, u32 *fru_addr)
{
/* Only server cards have the FRU EEPROM
* TODO: See if we can figure this out dynamically instead of
@@ -40,160 +41,368 @@ static bool is_fru_eeprom_supported(struct amdgpu_device *adev)
*/
struct atom_context *atom_ctx = adev->mode_info.atom_context;
- /* VBIOS is of the format ###-DXXXYY-##. For SKU identification,
+ /* The i2c access is blocked on VF
+ * TODO: Need other way to get the info
+ * Also, FRU not valid for APU devices.
+ */
+ if (amdgpu_sriov_vf(adev) || (adev->flags & AMD_IS_APU))
+ return false;
+
+ /* The default I2C EEPROM address of the FRU.
+ */
+ if (fru_addr)
+ *fru_addr = FRU_EEPROM_MADDR_8;
+
+ /* VBIOS is of the format ###-DXXXYYYY-##. For SKU identification,
* we can use just the "DXXX" portion. If there were more models, we
* could convert the 3 characters to a hex integer and use a switch
* for ease/speed/readability. For now, 2 string comparisons are
* reasonable and not too expensive
*/
- switch (adev->asic_type) {
- case CHIP_VEGA20:
- /* D161 and D163 are the VG20 server SKUs */
- if (strnstr(atom_ctx->vbios_version, "D161",
- sizeof(atom_ctx->vbios_version)) ||
- strnstr(atom_ctx->vbios_version, "D163",
- sizeof(atom_ctx->vbios_version)))
+ switch (amdgpu_ip_version(adev, MP1_HWIP, 0)) {
+ case IP_VERSION(11, 0, 2):
+ switch (adev->asic_type) {
+ case CHIP_VEGA20:
+ /* D161 and D163 are the VG20 server SKUs */
+ if (atom_ctx && (strnstr(atom_ctx->vbios_pn, "D161",
+ sizeof(atom_ctx->vbios_pn)) ||
+ strnstr(atom_ctx->vbios_pn, "D163",
+ sizeof(atom_ctx->vbios_pn)))) {
+ if (fru_addr)
+ *fru_addr = FRU_EEPROM_MADDR_6;
+ return true;
+ } else {
+ return false;
+ }
+ case CHIP_ARCTURUS:
+ default:
+ return false;
+ }
+ case IP_VERSION(11, 0, 7):
+ if (atom_ctx && strnstr(atom_ctx->vbios_pn, "D603",
+ sizeof(atom_ctx->vbios_pn))) {
+ if (strnstr(atom_ctx->vbios_pn, "D603GLXE",
+ sizeof(atom_ctx->vbios_pn))) {
+ return false;
+ }
+
+ if (fru_addr)
+ *fru_addr = FRU_EEPROM_MADDR_6;
return true;
- else
+
+ } else {
return false;
+ }
+ case IP_VERSION(13, 0, 2):
+ /* All Aldebaran SKUs have an FRU */
+ if (atom_ctx && !strnstr(atom_ctx->vbios_pn, "D673",
+ sizeof(atom_ctx->vbios_pn)))
+ if (fru_addr)
+ *fru_addr = FRU_EEPROM_MADDR_6;
+ return true;
+ case IP_VERSION(13, 0, 6):
+ case IP_VERSION(13, 0, 14):
+ if (fru_addr)
+ *fru_addr = FRU_EEPROM_MADDR_8;
+ return true;
+ case IP_VERSION(13, 0, 12):
+ if (fru_addr)
+ *fru_addr = FRU_EEPROM_MADDR_INV;
+ return true;
default:
return false;
}
}
-static int amdgpu_fru_read_eeprom(struct amdgpu_device *adev, uint32_t addrptr,
- unsigned char *buff)
+int amdgpu_fru_get_product_info(struct amdgpu_device *adev)
{
- int ret, size;
- struct i2c_msg msg = {
- .addr = I2C_PRODUCT_INFO_ADDR,
- .flags = I2C_M_RD,
- .buf = buff,
- };
- buff[0] = 0;
- buff[1] = addrptr;
- msg.len = I2C_PRODUCT_INFO_ADDR_SIZE + 1;
- ret = i2c_transfer(&adev->pm.smu_i2c, &msg, 1);
-
- if (ret < 1) {
- DRM_WARN("FRU: Failed to get size field");
- return ret;
+ struct amdgpu_fru_info *fru_info;
+ unsigned char buf[8], *pia;
+ u32 addr, fru_addr;
+ int size, len;
+ u8 csum;
+
+ if (!is_fru_eeprom_supported(adev, &fru_addr))
+ return 0;
+
+ /* FRU data avaialble, but no direct EEPROM access */
+ if (fru_addr == FRU_EEPROM_MADDR_INV)
+ return 0;
+
+ if (!adev->fru_info) {
+ adev->fru_info = kzalloc(sizeof(*adev->fru_info), GFP_KERNEL);
+ if (!adev->fru_info)
+ return -ENOMEM;
}
- /* The size returned by the i2c requires subtraction of 0xC0 since the
- * size apparently always reports as 0xC0+actual size.
+ fru_info = adev->fru_info;
+ /* For Arcturus-and-later, default value of serial_number is unique_id
+ * so convert it to a 16-digit HEX string for convenience and
+ * backwards-compatibility.
*/
- size = buff[2] - I2C_PRODUCT_INFO_OFFSET;
- /* Add 1 since address field was 1 byte */
- buff[1] = addrptr + 1;
-
- msg.len = I2C_PRODUCT_INFO_ADDR_SIZE + size;
- ret = i2c_transfer(&adev->pm.smu_i2c, &msg, 1);
+ sprintf(fru_info->serial, "%llx", adev->unique_id);
- if (ret < 1) {
- DRM_WARN("FRU: Failed to get data field");
- return ret;
+ /* If algo exists, it means that the i2c_adapter's initialized */
+ if (!adev->pm.fru_eeprom_i2c_bus || !adev->pm.fru_eeprom_i2c_bus->algo) {
+ dev_warn(adev->dev,
+ "Cannot access FRU, EEPROM accessor not initialized");
+ return -ENODEV;
}
- return size;
-}
+ /* Read the IPMI Common header */
+ len = amdgpu_eeprom_read(adev->pm.fru_eeprom_i2c_bus, fru_addr, buf,
+ sizeof(buf));
+ if (len != 8) {
+ dev_err(adev->dev, "Couldn't read the IPMI Common Header: %d",
+ len);
+ return len < 0 ? len : -EIO;
+ }
-int amdgpu_fru_get_product_info(struct amdgpu_device *adev)
-{
- unsigned char buff[34];
- int addrptr = 0, size = 0;
+ if (buf[0] != 1) {
+ dev_err(adev->dev, "Bad IPMI Common Header version: 0x%02x",
+ buf[0]);
+ return -EIO;
+ }
- if (!is_fru_eeprom_supported(adev))
- return 0;
+ for (csum = 0; len > 0; len--)
+ csum += buf[len - 1];
+ if (csum) {
+ dev_err(adev->dev, "Bad IPMI Common Header checksum: 0x%02x",
+ csum);
+ return -EIO;
+ }
- /* If algo exists, it means that the i2c_adapter's initialized */
- if (!adev->pm.smu_i2c.algo) {
- DRM_WARN("Cannot access FRU, EEPROM accessor not initialized");
+ /* Get the offset to the Product Info Area (PIA). */
+ addr = buf[4] * 8;
+ if (!addr)
return 0;
- }
- /* There's a lot of repetition here. This is due to the FRU having
- * variable-length fields. To get the information, we have to find the
- * size of each field, and then keep reading along and reading along
- * until we get all of the data that we want. We use addrptr to track
- * the address as we go
- */
+ /* Get the absolute address to the PIA. */
+ addr += fru_addr;
- /* The first fields are all of size 1-byte, from 0-7 are offsets that
- * contain information that isn't useful to us.
- * Bytes 8-a are all 1-byte and refer to the size of the entire struct,
- * and the language field, so just start from 0xb, manufacturer size
- */
- addrptr = 0xb;
- size = amdgpu_fru_read_eeprom(adev, addrptr, buff);
- if (size < 1) {
- DRM_ERROR("Failed to read FRU Manufacturer, ret:%d", size);
- return size;
+ /* Read the header of the PIA. */
+ len = amdgpu_eeprom_read(adev->pm.fru_eeprom_i2c_bus, addr, buf, 3);
+ if (len != 3) {
+ dev_err(adev->dev,
+ "Couldn't read the Product Info Area header: %d", len);
+ return len < 0 ? len : -EIO;
}
- /* Increment the addrptr by the size of the field, and 1 due to the
- * size field being 1 byte. This pattern continues below.
- */
- addrptr += size + 1;
- size = amdgpu_fru_read_eeprom(adev, addrptr, buff);
- if (size < 1) {
- DRM_ERROR("Failed to read FRU product name, ret:%d", size);
- return size;
+ if (buf[0] != 1) {
+ dev_err(adev->dev, "Bad IPMI Product Info Area version: 0x%02x",
+ buf[0]);
+ return -EIO;
}
- /* Product name should only be 32 characters. Any more,
- * and something could be wrong. Cap it at 32 to be safe
- */
- if (size > 32) {
- DRM_WARN("FRU Product Number is larger than 32 characters. This is likely a mistake");
- size = 32;
+ size = buf[1] * 8;
+ pia = kzalloc(size, GFP_KERNEL);
+ if (!pia)
+ return -ENOMEM;
+
+ /* Read the whole PIA. */
+ len = amdgpu_eeprom_read(adev->pm.fru_eeprom_i2c_bus, addr, pia, size);
+ if (len != size) {
+ kfree(pia);
+ dev_err(adev->dev, "Couldn't read the Product Info Area: %d",
+ len);
+ return len < 0 ? len : -EIO;
}
- /* Start at 2 due to buff using fields 0 and 1 for the address */
- memcpy(adev->product_name, &buff[2], size);
- adev->product_name[size] = '\0';
-
- addrptr += size + 1;
- size = amdgpu_fru_read_eeprom(adev, addrptr, buff);
- if (size < 1) {
- DRM_ERROR("Failed to read FRU product number, ret:%d", size);
- return size;
+
+ for (csum = 0; size > 0; size--)
+ csum += pia[size - 1];
+ if (csum) {
+ dev_err(adev->dev, "Bad Product Info Area checksum: 0x%02x",
+ csum);
+ kfree(pia);
+ return -EIO;
}
- /* Product number should only be 16 characters. Any more,
- * and something could be wrong. Cap it at 16 to be safe
+ /* Now extract useful information from the PIA.
+ *
+ * Read Manufacturer Name field whose length is [3].
*/
- if (size > 16) {
- DRM_WARN("FRU Product Number is larger than 16 characters. This is likely a mistake");
- size = 16;
- }
- memcpy(adev->product_number, &buff[2], size);
- adev->product_number[size] = '\0';
+ addr = 3;
+ if (addr + 1 >= len)
+ goto Out;
+ memcpy(fru_info->manufacturer_name, pia + addr + 1,
+ min_t(size_t, sizeof(fru_info->manufacturer_name),
+ pia[addr] & 0x3F));
+ fru_info->manufacturer_name[sizeof(fru_info->manufacturer_name) - 1] =
+ '\0';
- addrptr += size + 1;
- size = amdgpu_fru_read_eeprom(adev, addrptr, buff);
+ /* Read Product Name field. */
+ addr += 1 + (pia[addr] & 0x3F);
+ if (addr + 1 >= len)
+ goto Out;
+ memcpy(fru_info->product_name, pia + addr + 1,
+ min_t(size_t, sizeof(fru_info->product_name), pia[addr] & 0x3F));
+ fru_info->product_name[sizeof(fru_info->product_name) - 1] = '\0';
- if (size < 1) {
- DRM_ERROR("Failed to read FRU product version, ret:%d", size);
- return size;
- }
+ /* Go to the Product Part/Model Number field. */
+ addr += 1 + (pia[addr] & 0x3F);
+ if (addr + 1 >= len)
+ goto Out;
+ memcpy(fru_info->product_number, pia + addr + 1,
+ min_t(size_t, sizeof(fru_info->product_number),
+ pia[addr] & 0x3F));
+ fru_info->product_number[sizeof(fru_info->product_number) - 1] = '\0';
- addrptr += size + 1;
- size = amdgpu_fru_read_eeprom(adev, addrptr, buff);
+ /* Go to the Product Version field. */
+ addr += 1 + (pia[addr] & 0x3F);
- if (size < 1) {
- DRM_ERROR("Failed to read FRU serial number, ret:%d", size);
- return size;
- }
+ /* Go to the Product Serial Number field. */
+ addr += 1 + (pia[addr] & 0x3F);
+ if (addr + 1 >= len)
+ goto Out;
+ memcpy(fru_info->serial, pia + addr + 1,
+ min_t(size_t, sizeof(fru_info->serial), pia[addr] & 0x3F));
+ fru_info->serial[sizeof(fru_info->serial) - 1] = '\0';
- /* Serial number should only be 16 characters. Any more,
- * and something could be wrong. Cap it at 16 to be safe
- */
- if (size > 16) {
- DRM_WARN("FRU Serial Number is larger than 16 characters. This is likely a mistake");
- size = 16;
- }
- memcpy(adev->serial, &buff[2], size);
- adev->serial[size] = '\0';
+ /* Asset Tag field */
+ addr += 1 + (pia[addr] & 0x3F);
+
+ /* FRU File Id field. This could be 'null'. */
+ addr += 1 + (pia[addr] & 0x3F);
+ if ((addr + 1 >= len) || !(pia[addr] & 0x3F))
+ goto Out;
+ memcpy(fru_info->fru_id, pia + addr + 1,
+ min_t(size_t, sizeof(fru_info->fru_id), pia[addr] & 0x3F));
+ fru_info->fru_id[sizeof(fru_info->fru_id) - 1] = '\0';
+Out:
+ kfree(pia);
return 0;
}
+
+/**
+ * DOC: product_name
+ *
+ * The amdgpu driver provides a sysfs API for reporting the product name
+ * for the device
+ * The file product_name is used for this and returns the product name
+ * as returned from the FRU.
+ * NOTE: This is only available for certain server cards
+ */
+
+static ssize_t amdgpu_fru_product_name_show(struct device *dev,
+ struct device_attribute *attr,
+ char *buf)
+{
+ struct drm_device *ddev = dev_get_drvdata(dev);
+ struct amdgpu_device *adev = drm_to_adev(ddev);
+
+ return sysfs_emit(buf, "%s\n", adev->fru_info->product_name);
+}
+
+static DEVICE_ATTR(product_name, 0444, amdgpu_fru_product_name_show, NULL);
+
+/**
+ * DOC: product_number
+ *
+ * The amdgpu driver provides a sysfs API for reporting the part number
+ * for the device
+ * The file product_number is used for this and returns the part number
+ * as returned from the FRU.
+ * NOTE: This is only available for certain server cards
+ */
+
+static ssize_t amdgpu_fru_product_number_show(struct device *dev,
+ struct device_attribute *attr,
+ char *buf)
+{
+ struct drm_device *ddev = dev_get_drvdata(dev);
+ struct amdgpu_device *adev = drm_to_adev(ddev);
+
+ return sysfs_emit(buf, "%s\n", adev->fru_info->product_number);
+}
+
+static DEVICE_ATTR(product_number, 0444, amdgpu_fru_product_number_show, NULL);
+
+/**
+ * DOC: serial_number
+ *
+ * The amdgpu driver provides a sysfs API for reporting the serial number
+ * for the device
+ * The file serial_number is used for this and returns the serial number
+ * as returned from the FRU.
+ * NOTE: This is only available for certain server cards
+ */
+
+static ssize_t amdgpu_fru_serial_number_show(struct device *dev,
+ struct device_attribute *attr,
+ char *buf)
+{
+ struct drm_device *ddev = dev_get_drvdata(dev);
+ struct amdgpu_device *adev = drm_to_adev(ddev);
+
+ return sysfs_emit(buf, "%s\n", adev->fru_info->serial);
+}
+
+static DEVICE_ATTR(serial_number, 0444, amdgpu_fru_serial_number_show, NULL);
+
+/**
+ * DOC: fru_id
+ *
+ * The amdgpu driver provides a sysfs API for reporting FRU File Id
+ * for the device.
+ * The file fru_id is used for this and returns the File Id value
+ * as returned from the FRU.
+ * NOTE: This is only available for certain server cards
+ */
+
+static ssize_t amdgpu_fru_id_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ struct drm_device *ddev = dev_get_drvdata(dev);
+ struct amdgpu_device *adev = drm_to_adev(ddev);
+
+ return sysfs_emit(buf, "%s\n", adev->fru_info->fru_id);
+}
+
+static DEVICE_ATTR(fru_id, 0444, amdgpu_fru_id_show, NULL);
+
+/**
+ * DOC: manufacturer
+ *
+ * The amdgpu driver provides a sysfs API for reporting manufacturer name from
+ * FRU information.
+ * The file manufacturer returns the value as returned from the FRU.
+ * NOTE: This is only available for certain server cards
+ */
+
+static ssize_t amdgpu_fru_manufacturer_name_show(struct device *dev,
+ struct device_attribute *attr,
+ char *buf)
+{
+ struct drm_device *ddev = dev_get_drvdata(dev);
+ struct amdgpu_device *adev = drm_to_adev(ddev);
+
+ return sysfs_emit(buf, "%s\n", adev->fru_info->manufacturer_name);
+}
+
+static DEVICE_ATTR(manufacturer, 0444, amdgpu_fru_manufacturer_name_show, NULL);
+
+static const struct attribute *amdgpu_fru_attributes[] = {
+ &dev_attr_product_name.attr,
+ &dev_attr_product_number.attr,
+ &dev_attr_serial_number.attr,
+ &dev_attr_fru_id.attr,
+ &dev_attr_manufacturer.attr,
+ NULL
+};
+
+int amdgpu_fru_sysfs_init(struct amdgpu_device *adev)
+{
+ if (!is_fru_eeprom_supported(adev, NULL) || !adev->fru_info)
+ return 0;
+
+ return sysfs_create_files(&adev->dev->kobj, amdgpu_fru_attributes);
+}
+
+void amdgpu_fru_sysfs_fini(struct amdgpu_device *adev)
+{
+ if (!adev->fru_info)
+ return;
+
+ sysfs_remove_files(&adev->dev->kobj, amdgpu_fru_attributes);
+}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_fru_eeprom.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_fru_eeprom.h
index 1308d976d60e..98f3196599ef 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_fru_eeprom.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_fru_eeprom.h
@@ -24,6 +24,19 @@
#ifndef __AMDGPU_FRU_EEPROM_H__
#define __AMDGPU_FRU_EEPROM_H__
+#define AMDGPU_PRODUCT_NAME_LEN 64
+
+/* FRU product information */
+struct amdgpu_fru_info {
+ char product_number[20];
+ char product_name[AMDGPU_PRODUCT_NAME_LEN];
+ char serial[20];
+ char manufacturer_name[32];
+ char fru_id[50];
+};
+
int amdgpu_fru_get_product_info(struct amdgpu_device *adev);
+int amdgpu_fru_sysfs_init(struct amdgpu_device *adev);
+void amdgpu_fru_sysfs_fini(struct amdgpu_device *adev);
#endif // __AMDGPU_FRU_EEPROM_H__
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_fw_attestation.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_fw_attestation.c
index 8d1ad294cb02..328a1b963548 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_fw_attestation.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_fw_attestation.c
@@ -32,17 +32,15 @@
#include "soc15_common.h"
#define FW_ATTESTATION_DB_COOKIE 0x143b6a37
-#define FW_ATTESTATION_RECORD_VALID 1
+#define FW_ATTESTATION_RECORD_VALID 1
#define FW_ATTESTATION_MAX_SIZE 4096
-typedef struct FW_ATT_DB_HEADER
-{
+struct FW_ATT_DB_HEADER {
uint32_t AttDbVersion; /* version of the fwar feature */
uint32_t AttDbCookie; /* cookie as an extra check for corrupt data */
-} FW_ATT_DB_HEADER;
+};
-typedef struct FW_ATT_RECORD
-{
+struct FW_ATT_RECORD {
uint16_t AttFwIdV1; /* Legacy FW Type field */
uint16_t AttFwIdV2; /* V2 FW ID field */
uint32_t AttFWVersion; /* FW Version */
@@ -50,7 +48,7 @@ typedef struct FW_ATT_RECORD
uint8_t AttSource; /* FW source indicator */
uint8_t RecordValid; /* Indicates whether the record is a valid entry */
uint32_t AttFwTaId; /* Ta ID (only in TA Attestation Table) */
-} FW_ATT_RECORD;
+};
static ssize_t amdgpu_fw_attestation_debugfs_read(struct file *f,
char __user *buf,
@@ -60,15 +58,15 @@ static ssize_t amdgpu_fw_attestation_debugfs_read(struct file *f,
struct amdgpu_device *adev = (struct amdgpu_device *)file_inode(f)->i_private;
uint64_t records_addr = 0;
uint64_t vram_pos = 0;
- FW_ATT_DB_HEADER fw_att_hdr = {0};
- FW_ATT_RECORD fw_att_record = {0};
+ struct FW_ATT_DB_HEADER fw_att_hdr = {0};
+ struct FW_ATT_RECORD fw_att_record = {0};
- if (size < sizeof(FW_ATT_RECORD)) {
+ if (size < sizeof(struct FW_ATT_RECORD)) {
DRM_WARN("FW attestation input buffer not enough memory");
return -EINVAL;
}
- if ((*pos + sizeof(FW_ATT_DB_HEADER)) >= FW_ATTESTATION_MAX_SIZE) {
+ if ((*pos + sizeof(struct FW_ATT_DB_HEADER)) >= FW_ATTESTATION_MAX_SIZE) {
DRM_WARN("FW attestation out of bounds");
return 0;
}
@@ -83,8 +81,8 @@ static ssize_t amdgpu_fw_attestation_debugfs_read(struct file *f,
if (*pos == 0) {
amdgpu_device_vram_access(adev,
vram_pos,
- (uint32_t*)&fw_att_hdr,
- sizeof(FW_ATT_DB_HEADER),
+ (uint32_t *)&fw_att_hdr,
+ sizeof(struct FW_ATT_DB_HEADER),
false);
if (fw_att_hdr.AttDbCookie != FW_ATTESTATION_DB_COOKIE) {
@@ -96,20 +94,20 @@ static ssize_t amdgpu_fw_attestation_debugfs_read(struct file *f,
}
amdgpu_device_vram_access(adev,
- vram_pos + sizeof(FW_ATT_DB_HEADER) + *pos,
- (uint32_t*)&fw_att_record,
- sizeof(FW_ATT_RECORD),
+ vram_pos + sizeof(struct FW_ATT_DB_HEADER) + *pos,
+ (uint32_t *)&fw_att_record,
+ sizeof(struct FW_ATT_RECORD),
false);
if (fw_att_record.RecordValid != FW_ATTESTATION_RECORD_VALID)
return 0;
- if (copy_to_user(buf, (void*)&fw_att_record, sizeof(FW_ATT_RECORD)))
+ if (copy_to_user(buf, (void *)&fw_att_record, sizeof(struct FW_ATT_RECORD)))
return -EINVAL;
- *pos += sizeof(FW_ATT_RECORD);
+ *pos += sizeof(struct FW_ATT_RECORD);
- return sizeof(FW_ATT_RECORD);
+ return sizeof(struct FW_ATT_RECORD);
}
static const struct file_operations amdgpu_fw_attestation_debugfs_ops = {
@@ -121,6 +119,13 @@ static const struct file_operations amdgpu_fw_attestation_debugfs_ops = {
static int amdgpu_is_fw_attestation_supported(struct amdgpu_device *adev)
{
+ if (adev->flags & AMD_IS_APU)
+ return 0;
+
+ if (amdgpu_ip_version(adev, MP0_HWIP, 0) == IP_VERSION(14, 0, 2) ||
+ amdgpu_ip_version(adev, MP0_HWIP, 0) == IP_VERSION(14, 0, 3))
+ return 0;
+
if (adev->asic_type >= CHIP_SIENNA_CICHLID)
return 1;
@@ -133,7 +138,7 @@ void amdgpu_fw_attestation_debugfs_init(struct amdgpu_device *adev)
return;
debugfs_create_file("amdgpu_fw_attestation",
- S_IRUSR,
+ 0400,
adev_to_drm(adev)->primary->debugfs_root,
adev,
&amdgpu_fw_attestation_debugfs_ops);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c
index 0db933026722..d2237ce9da70 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c
@@ -34,6 +34,9 @@
#include <asm/set_memory.h>
#endif
#include "amdgpu.h"
+#include "amdgpu_reset.h"
+#include <drm/drm_drv.h>
+#include <drm/ttm/ttm_tt.h>
/*
* GART
@@ -60,7 +63,7 @@
*/
/**
- * amdgpu_dummy_page_init - init dummy page used by the driver
+ * amdgpu_gart_dummy_page_init - init dummy page used by the driver
*
* @adev: amdgpu_device pointer
*
@@ -71,12 +74,13 @@
*/
static int amdgpu_gart_dummy_page_init(struct amdgpu_device *adev)
{
- struct page *dummy_page = ttm_bo_glob.dummy_read_page;
+ struct page *dummy_page = ttm_glob.dummy_read_page;
if (adev->dummy_page_addr)
return 0;
- adev->dummy_page_addr = dma_map_page(&adev->pdev->dev, dummy_page, 0,
- PAGE_SIZE, PCI_DMA_BIDIRECTIONAL);
+ adev->dummy_page_addr = dma_map_page_attrs(&adev->pdev->dev, dummy_page, 0,
+ PAGE_SIZE, DMA_BIDIRECTIONAL,
+ DMA_ATTR_SKIP_CPU_SYNC);
if (dma_mapping_error(&adev->pdev->dev, adev->dummy_page_addr)) {
dev_err(&adev->pdev->dev, "Failed to DMA MAP the dummy page\n");
adev->dummy_page_addr = 0;
@@ -86,105 +90,184 @@ static int amdgpu_gart_dummy_page_init(struct amdgpu_device *adev)
}
/**
- * amdgpu_dummy_page_fini - free dummy page used by the driver
+ * amdgpu_gart_dummy_page_fini - free dummy page used by the driver
*
* @adev: amdgpu_device pointer
*
* Frees the dummy page used by the driver (all asics).
*/
-static void amdgpu_gart_dummy_page_fini(struct amdgpu_device *adev)
+void amdgpu_gart_dummy_page_fini(struct amdgpu_device *adev)
{
if (!adev->dummy_page_addr)
return;
- pci_unmap_page(adev->pdev, adev->dummy_page_addr,
- PAGE_SIZE, PCI_DMA_BIDIRECTIONAL);
+ dma_unmap_page_attrs(&adev->pdev->dev, adev->dummy_page_addr, PAGE_SIZE,
+ DMA_BIDIRECTIONAL,
+ DMA_ATTR_SKIP_CPU_SYNC);
adev->dummy_page_addr = 0;
}
/**
- * amdgpu_gart_table_vram_alloc - allocate vram for gart page table
+ * amdgpu_gart_table_ram_alloc - allocate system ram for gart page table
*
* @adev: amdgpu_device pointer
*
- * Allocate video memory for GART page table
- * (pcie r4xx, r5xx+). These asics require the
- * gart table to be in video memory.
+ * Allocate system memory for GART page table for ASICs that don't have
+ * dedicated VRAM.
* Returns 0 for success, error for failure.
*/
-int amdgpu_gart_table_vram_alloc(struct amdgpu_device *adev)
+int amdgpu_gart_table_ram_alloc(struct amdgpu_device *adev)
{
- int r;
+ unsigned int order = get_order(adev->gart.table_size);
+ gfp_t gfp_flags = GFP_KERNEL | __GFP_ZERO;
+ struct amdgpu_bo *bo = NULL;
+ struct sg_table *sg = NULL;
+ struct amdgpu_bo_param bp;
+ dma_addr_t dma_addr;
+ struct page *p;
+ unsigned long x;
+ int ret;
+
+ if (adev->gart.bo != NULL)
+ return 0;
- if (adev->gart.bo == NULL) {
- struct amdgpu_bo_param bp;
-
- memset(&bp, 0, sizeof(bp));
- bp.size = adev->gart.table_size;
- bp.byte_align = PAGE_SIZE;
- bp.domain = AMDGPU_GEM_DOMAIN_VRAM;
- bp.flags = AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED |
- AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS;
- bp.type = ttm_bo_type_kernel;
- bp.resv = NULL;
- r = amdgpu_bo_create(adev, &bp, &adev->gart.bo);
- if (r) {
- return r;
- }
+ p = alloc_pages(gfp_flags, order);
+ if (!p)
+ return -ENOMEM;
+
+ /* assign pages to this device */
+ for (x = 0; x < (1UL << order); x++)
+ p[x].mapping = adev->mman.bdev.dev_mapping;
+
+ /* If the hardware does not support UTCL2 snooping of the CPU caches
+ * then set_memory_wc() could be used as a workaround to mark the pages
+ * as write combine memory.
+ */
+ dma_addr = dma_map_page(&adev->pdev->dev, p, 0, adev->gart.table_size,
+ DMA_BIDIRECTIONAL);
+ if (dma_mapping_error(&adev->pdev->dev, dma_addr)) {
+ dev_err(&adev->pdev->dev, "Failed to DMA MAP the GART BO page\n");
+ __free_pages(p, order);
+ p = NULL;
+ return -EFAULT;
+ }
+
+ dev_info(adev->dev, "%s dma_addr:%pad\n", __func__, &dma_addr);
+ /* Create SG table */
+ sg = kmalloc(sizeof(*sg), GFP_KERNEL);
+ if (!sg) {
+ ret = -ENOMEM;
+ goto error;
+ }
+ ret = sg_alloc_table(sg, 1, GFP_KERNEL);
+ if (ret)
+ goto error;
+
+ sg_dma_address(sg->sgl) = dma_addr;
+ sg->sgl->length = adev->gart.table_size;
+#ifdef CONFIG_NEED_SG_DMA_LENGTH
+ sg->sgl->dma_length = adev->gart.table_size;
+#endif
+ /* Create SG BO */
+ memset(&bp, 0, sizeof(bp));
+ bp.size = adev->gart.table_size;
+ bp.byte_align = PAGE_SIZE;
+ bp.domain = AMDGPU_GEM_DOMAIN_CPU;
+ bp.type = ttm_bo_type_sg;
+ bp.resv = NULL;
+ bp.bo_ptr_size = sizeof(struct amdgpu_bo);
+ bp.flags = 0;
+ ret = amdgpu_bo_create(adev, &bp, &bo);
+ if (ret)
+ goto error;
+
+ bo->tbo.sg = sg;
+ bo->tbo.ttm->sg = sg;
+ bo->allowed_domains = AMDGPU_GEM_DOMAIN_GTT;
+ bo->preferred_domains = AMDGPU_GEM_DOMAIN_GTT;
+
+ ret = amdgpu_bo_reserve(bo, true);
+ if (ret) {
+ dev_err(adev->dev, "(%d) failed to reserve bo for GART system bo\n", ret);
+ goto error;
}
+
+ ret = amdgpu_bo_pin(bo, AMDGPU_GEM_DOMAIN_GTT);
+ WARN(ret, "Pinning the GART table failed");
+ if (ret)
+ goto error_resv;
+
+ adev->gart.bo = bo;
+ adev->gart.ptr = page_to_virt(p);
+ /* Make GART table accessible in VMID0 */
+ ret = amdgpu_ttm_alloc_gart(&adev->gart.bo->tbo);
+ if (ret)
+ amdgpu_gart_table_ram_free(adev);
+ amdgpu_bo_unreserve(bo);
+
return 0;
+
+error_resv:
+ amdgpu_bo_unreserve(bo);
+error:
+ amdgpu_bo_unref(&bo);
+ if (sg) {
+ sg_free_table(sg);
+ kfree(sg);
+ }
+ __free_pages(p, order);
+ return ret;
}
/**
- * amdgpu_gart_table_vram_pin - pin gart page table in vram
+ * amdgpu_gart_table_ram_free - free gart page table system ram
*
* @adev: amdgpu_device pointer
*
- * Pin the GART page table in vram so it will not be moved
- * by the memory manager (pcie r4xx, r5xx+). These asics require the
- * gart table to be in video memory.
- * Returns 0 for success, error for failure.
+ * Free the system memory used for the GART page tableon ASICs that don't
+ * have dedicated VRAM.
*/
-int amdgpu_gart_table_vram_pin(struct amdgpu_device *adev)
+void amdgpu_gart_table_ram_free(struct amdgpu_device *adev)
{
- int r;
-
- r = amdgpu_bo_reserve(adev->gart.bo, false);
- if (unlikely(r != 0))
- return r;
- r = amdgpu_bo_pin(adev->gart.bo, AMDGPU_GEM_DOMAIN_VRAM);
- if (r) {
+ unsigned int order = get_order(adev->gart.table_size);
+ struct sg_table *sg = adev->gart.bo->tbo.sg;
+ struct page *p;
+ unsigned long x;
+ int ret;
+
+ ret = amdgpu_bo_reserve(adev->gart.bo, false);
+ if (!ret) {
+ amdgpu_bo_unpin(adev->gart.bo);
amdgpu_bo_unreserve(adev->gart.bo);
- return r;
}
- r = amdgpu_bo_kmap(adev->gart.bo, &adev->gart.ptr);
- if (r)
- amdgpu_bo_unpin(adev->gart.bo);
- amdgpu_bo_unreserve(adev->gart.bo);
- return r;
+ amdgpu_bo_unref(&adev->gart.bo);
+ sg_free_table(sg);
+ kfree(sg);
+ p = virt_to_page(adev->gart.ptr);
+ for (x = 0; x < (1UL << order); x++)
+ p[x].mapping = NULL;
+ __free_pages(p, order);
+
+ adev->gart.ptr = NULL;
}
/**
- * amdgpu_gart_table_vram_unpin - unpin gart page table in vram
+ * amdgpu_gart_table_vram_alloc - allocate vram for gart page table
*
* @adev: amdgpu_device pointer
*
- * Unpin the GART page table in vram (pcie r4xx, r5xx+).
- * These asics require the gart table to be in video memory.
+ * Allocate video memory for GART page table
+ * (pcie r4xx, r5xx+). These asics require the
+ * gart table to be in video memory.
+ * Returns 0 for success, error for failure.
*/
-void amdgpu_gart_table_vram_unpin(struct amdgpu_device *adev)
+int amdgpu_gart_table_vram_alloc(struct amdgpu_device *adev)
{
- int r;
+ if (adev->gart.bo != NULL)
+ return 0;
- if (adev->gart.bo == NULL) {
- return;
- }
- r = amdgpu_bo_reserve(adev->gart.bo, true);
- if (likely(r == 0)) {
- amdgpu_bo_kunmap(adev->gart.bo);
- amdgpu_bo_unpin(adev->gart.bo);
- amdgpu_bo_unreserve(adev->gart.bo);
- adev->gart.ptr = NULL;
- }
+ return amdgpu_bo_create_kernel(adev, adev->gart.table_size, PAGE_SIZE,
+ AMDGPU_GEM_DOMAIN_VRAM, &adev->gart.bo,
+ NULL, (void *)&adev->gart.ptr);
}
/**
@@ -198,10 +281,7 @@ void amdgpu_gart_table_vram_unpin(struct amdgpu_device *adev)
*/
void amdgpu_gart_table_vram_free(struct amdgpu_device *adev)
{
- if (adev->gart.bo == NULL) {
- return;
- }
- amdgpu_bo_unref(&adev->gart.bo);
+ amdgpu_bo_free_kernel(&adev->gart.bo, NULL, (void *)&adev->gart.ptr);
}
/*
@@ -218,27 +298,24 @@ void amdgpu_gart_table_vram_free(struct amdgpu_device *adev)
* replaces them with the dummy page (all asics).
* Returns 0 for success, -EINVAL for failure.
*/
-int amdgpu_gart_unbind(struct amdgpu_device *adev, uint64_t offset,
+void amdgpu_gart_unbind(struct amdgpu_device *adev, uint64_t offset,
int pages)
{
unsigned t;
- unsigned p;
int i, j;
u64 page_base;
/* Starting from VEGA10, system bit must be 0 to mean invalid. */
uint64_t flags = 0;
+ int idx;
- if (!adev->gart.ready) {
- WARN(1, "trying to unbind memory from uninitialized GART !\n");
- return -EINVAL;
- }
+ if (!adev->gart.ptr)
+ return;
+
+ if (!drm_dev_enter(adev_to_drm(adev), &idx))
+ return;
t = offset / AMDGPU_GPU_PAGE_SIZE;
- p = t / AMDGPU_GPU_PAGES_IN_CPU_PAGE;
- for (i = 0; i < pages; i++, p++) {
-#ifdef CONFIG_DRM_AMDGPU_GART_DEBUGFS
- adev->gart.pages[p] = NULL;
-#endif
+ for (i = 0; i < pages; i++) {
page_base = adev->dummy_page_addr;
if (!adev->gart.ptr)
continue;
@@ -249,12 +326,9 @@ int amdgpu_gart_unbind(struct amdgpu_device *adev, uint64_t offset,
page_base += AMDGPU_GPU_PAGE_SIZE;
}
}
- mb();
- amdgpu_asic_flush_hdp(adev, NULL);
- for (i = 0; i < adev->num_vmhubs; i++)
- amdgpu_gmc_flush_gpu_tlb(adev, 0, i, 0);
+ amdgpu_gart_invalidate_tlb(adev);
- return 0;
+ drm_dev_exit(idx);
}
/**
@@ -270,17 +344,16 @@ int amdgpu_gart_unbind(struct amdgpu_device *adev, uint64_t offset,
* Map the dma_addresses into GART entries (all asics).
* Returns 0 for success, -EINVAL for failure.
*/
-int amdgpu_gart_map(struct amdgpu_device *adev, uint64_t offset,
+void amdgpu_gart_map(struct amdgpu_device *adev, uint64_t offset,
int pages, dma_addr_t *dma_addr, uint64_t flags,
void *dst)
{
uint64_t page_base;
unsigned i, j, t;
+ int idx;
- if (!adev->gart.ready) {
- WARN(1, "trying to bind memory to uninitialized GART !\n");
- return -EINVAL;
- }
+ if (!drm_dev_enter(adev_to_drm(adev), &idx))
+ return;
t = offset / AMDGPU_GPU_PAGE_SIZE;
@@ -291,7 +364,43 @@ int amdgpu_gart_map(struct amdgpu_device *adev, uint64_t offset,
page_base += AMDGPU_GPU_PAGE_SIZE;
}
}
- return 0;
+ drm_dev_exit(idx);
+}
+
+/**
+ * amdgpu_gart_map_vram_range - map VRAM pages into the GART page table
+ *
+ * @adev: amdgpu_device pointer
+ * @pa: physical address of the first page to be mapped
+ * @start_page: first page to map in the GART aperture
+ * @num_pages: number of pages to be mapped
+ * @flags: page table entry flags
+ * @dst: CPU address of the GART table
+ *
+ * Binds a BO that is allocated in VRAM to the GART page table
+ * (all ASICs).
+ *
+ * Useful when a kernel BO is located in VRAM but
+ * needs to be accessed from the GART address space.
+ */
+void amdgpu_gart_map_vram_range(struct amdgpu_device *adev, uint64_t pa,
+ uint64_t start_page, uint64_t num_pages,
+ uint64_t flags, void *dst)
+{
+ u32 i, idx;
+
+ /* The SYSTEM flag indicates the pages aren't in VRAM. */
+ WARN_ON_ONCE(flags & AMDGPU_PTE_SYSTEM);
+
+ if (!drm_dev_enter(adev_to_drm(adev), &idx))
+ return;
+
+ for (i = 0; i < num_pages; ++i) {
+ amdgpu_gmc_set_pte_pde(adev, adev->gart.ptr,
+ start_page + i, pa + AMDGPU_GPU_PAGE_SIZE * i, flags);
+ }
+
+ drm_dev_exit(idx);
}
/**
@@ -300,7 +409,6 @@ int amdgpu_gart_map(struct amdgpu_device *adev, uint64_t offset,
* @adev: amdgpu_device pointer
* @offset: offset into the GPU's gart aperture
* @pages: number of pages to bind
- * @pagelist: pages to bind
* @dma_addr: DMA addresses of pages
* @flags: page table entry flags
*
@@ -308,40 +416,38 @@ int amdgpu_gart_map(struct amdgpu_device *adev, uint64_t offset,
* (all asics).
* Returns 0 for success, -EINVAL for failure.
*/
-int amdgpu_gart_bind(struct amdgpu_device *adev, uint64_t offset,
- int pages, struct page **pagelist, dma_addr_t *dma_addr,
+void amdgpu_gart_bind(struct amdgpu_device *adev, uint64_t offset,
+ int pages, dma_addr_t *dma_addr,
uint64_t flags)
{
-#ifdef CONFIG_DRM_AMDGPU_GART_DEBUGFS
- unsigned t,p;
-#endif
- int r, i;
+ if (!adev->gart.ptr)
+ return;
- if (!adev->gart.ready) {
- WARN(1, "trying to bind memory to uninitialized GART !\n");
- return -EINVAL;
- }
+ amdgpu_gart_map(adev, offset, pages, dma_addr, flags, adev->gart.ptr);
+}
-#ifdef CONFIG_DRM_AMDGPU_GART_DEBUGFS
- t = offset / AMDGPU_GPU_PAGE_SIZE;
- p = t / AMDGPU_GPU_PAGES_IN_CPU_PAGE;
- for (i = 0; i < pages; i++, p++)
- adev->gart.pages[p] = pagelist ? pagelist[i] : NULL;
-#endif
+/**
+ * amdgpu_gart_invalidate_tlb - invalidate gart TLB
+ *
+ * @adev: amdgpu device driver pointer
+ *
+ * Invalidate gart TLB which can be use as a way to flush gart changes
+ *
+ */
+void amdgpu_gart_invalidate_tlb(struct amdgpu_device *adev)
+{
+ int i;
if (!adev->gart.ptr)
- return 0;
-
- r = amdgpu_gart_map(adev, offset, pages, dma_addr, flags,
- adev->gart.ptr);
- if (r)
- return r;
+ return;
mb();
- amdgpu_asic_flush_hdp(adev, NULL);
- for (i = 0; i < adev->num_vmhubs; i++)
+ if (down_read_trylock(&adev->reset_domain->sem)) {
+ amdgpu_device_flush_hdp(adev, NULL);
+ up_read(&adev->reset_domain->sem);
+ }
+ for_each_set_bit(i, adev->vmhubs_mask, AMDGPU_MAX_VMHUBS)
amdgpu_gmc_flush_gpu_tlb(adev, 0, i, 0);
- return 0;
}
/**
@@ -373,29 +479,5 @@ int amdgpu_gart_init(struct amdgpu_device *adev)
DRM_INFO("GART: num cpu pages %u, num gpu pages %u\n",
adev->gart.num_cpu_pages, adev->gart.num_gpu_pages);
-#ifdef CONFIG_DRM_AMDGPU_GART_DEBUGFS
- /* Allocate pages table */
- adev->gart.pages = vzalloc(array_size(sizeof(void *),
- adev->gart.num_cpu_pages));
- if (adev->gart.pages == NULL)
- return -ENOMEM;
-#endif
-
return 0;
}
-
-/**
- * amdgpu_gart_fini - tear down the driver info for managing the gart
- *
- * @adev: amdgpu_device pointer
- *
- * Tear down the gart driver info and free the dummy page (all asics).
- */
-void amdgpu_gart_fini(struct amdgpu_device *adev)
-{
-#ifdef CONFIG_DRM_AMDGPU_GART_DEBUGFS
- vfree(adev->gart.pages);
- adev->gart.pages = NULL;
-#endif
- amdgpu_gart_dummy_page_fini(adev);
-}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.h
index afa2e2877d87..d3118275ddae 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.h
@@ -46,28 +46,26 @@ struct amdgpu_gart {
unsigned num_gpu_pages;
unsigned num_cpu_pages;
unsigned table_size;
-#ifdef CONFIG_DRM_AMDGPU_GART_DEBUGFS
- struct page **pages;
-#endif
- bool ready;
/* Asic default pte flags */
uint64_t gart_pte_flags;
};
+int amdgpu_gart_table_ram_alloc(struct amdgpu_device *adev);
+void amdgpu_gart_table_ram_free(struct amdgpu_device *adev);
int amdgpu_gart_table_vram_alloc(struct amdgpu_device *adev);
void amdgpu_gart_table_vram_free(struct amdgpu_device *adev);
-int amdgpu_gart_table_vram_pin(struct amdgpu_device *adev);
-void amdgpu_gart_table_vram_unpin(struct amdgpu_device *adev);
int amdgpu_gart_init(struct amdgpu_device *adev);
-void amdgpu_gart_fini(struct amdgpu_device *adev);
-int amdgpu_gart_unbind(struct amdgpu_device *adev, uint64_t offset,
- int pages);
-int amdgpu_gart_map(struct amdgpu_device *adev, uint64_t offset,
- int pages, dma_addr_t *dma_addr, uint64_t flags,
- void *dst);
-int amdgpu_gart_bind(struct amdgpu_device *adev, uint64_t offset,
- int pages, struct page **pagelist,
- dma_addr_t *dma_addr, uint64_t flags);
-
+void amdgpu_gart_dummy_page_fini(struct amdgpu_device *adev);
+void amdgpu_gart_unbind(struct amdgpu_device *adev, uint64_t offset,
+ int pages);
+void amdgpu_gart_map(struct amdgpu_device *adev, uint64_t offset,
+ int pages, dma_addr_t *dma_addr, uint64_t flags,
+ void *dst);
+void amdgpu_gart_bind(struct amdgpu_device *adev, uint64_t offset,
+ int pages, dma_addr_t *dma_addr, uint64_t flags);
+void amdgpu_gart_map_vram_range(struct amdgpu_device *adev, uint64_t pa,
+ uint64_t start_page, uint64_t num_pages,
+ uint64_t flags, void *dst);
+void amdgpu_gart_invalidate_tlb(struct amdgpu_device *adev);
#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
index b443907afcea..3e38c5db2987 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
@@ -32,38 +32,189 @@
#include <linux/dma-buf.h>
#include <drm/amdgpu_drm.h>
-#include <drm/drm_debugfs.h>
+#include <drm/drm_drv.h>
+#include <drm/drm_exec.h>
#include <drm/drm_gem_ttm_helper.h>
+#include <drm/ttm/ttm_tt.h>
+#include <drm/drm_syncobj.h>
#include "amdgpu.h"
#include "amdgpu_display.h"
#include "amdgpu_dma_buf.h"
+#include "amdgpu_hmm.h"
#include "amdgpu_xgmi.h"
+#include "amdgpu_vm.h"
-static const struct drm_gem_object_funcs amdgpu_gem_object_funcs;
+static int
+amdgpu_gem_add_input_fence(struct drm_file *filp,
+ uint64_t syncobj_handles_array,
+ uint32_t num_syncobj_handles)
+{
+ struct dma_fence *fence;
+ uint32_t *syncobj_handles;
+ int ret, i;
-static void amdgpu_gem_object_free(struct drm_gem_object *gobj)
+ if (!num_syncobj_handles)
+ return 0;
+
+ syncobj_handles = memdup_user(u64_to_user_ptr(syncobj_handles_array),
+ size_mul(sizeof(uint32_t), num_syncobj_handles));
+ if (IS_ERR(syncobj_handles))
+ return PTR_ERR(syncobj_handles);
+
+ for (i = 0; i < num_syncobj_handles; i++) {
+
+ if (!syncobj_handles[i]) {
+ ret = -EINVAL;
+ goto free_memdup;
+ }
+
+ ret = drm_syncobj_find_fence(filp, syncobj_handles[i], 0, 0, &fence);
+ if (ret)
+ goto free_memdup;
+
+ dma_fence_wait(fence, false);
+
+ /* TODO: optimize async handling */
+ dma_fence_put(fence);
+ }
+
+free_memdup:
+ kfree(syncobj_handles);
+ return ret;
+}
+
+static int
+amdgpu_gem_update_timeline_node(struct drm_file *filp,
+ uint32_t syncobj_handle,
+ uint64_t point,
+ struct drm_syncobj **syncobj,
+ struct dma_fence_chain **chain)
+{
+ if (!syncobj_handle)
+ return 0;
+
+ /* Find the sync object */
+ *syncobj = drm_syncobj_find(filp, syncobj_handle);
+ if (!*syncobj)
+ return -ENOENT;
+
+ if (!point)
+ return 0;
+
+ /* Allocate the chain node */
+ *chain = dma_fence_chain_alloc();
+ if (!*chain) {
+ drm_syncobj_put(*syncobj);
+ return -ENOMEM;
+ }
+
+ return 0;
+}
+
+static void
+amdgpu_gem_update_bo_mapping(struct drm_file *filp,
+ struct amdgpu_bo_va *bo_va,
+ uint32_t operation,
+ uint64_t point,
+ struct dma_fence *fence,
+ struct drm_syncobj *syncobj,
+ struct dma_fence_chain *chain)
+{
+ struct amdgpu_bo *bo = bo_va ? bo_va->base.bo : NULL;
+ struct amdgpu_fpriv *fpriv = filp->driver_priv;
+ struct amdgpu_vm *vm = &fpriv->vm;
+ struct dma_fence *last_update;
+
+ if (!syncobj)
+ return;
+
+ /* Find the last update fence */
+ switch (operation) {
+ case AMDGPU_VA_OP_MAP:
+ case AMDGPU_VA_OP_REPLACE:
+ if (bo && (bo->tbo.base.resv == vm->root.bo->tbo.base.resv))
+ last_update = vm->last_update;
+ else
+ last_update = bo_va->last_pt_update;
+ break;
+ case AMDGPU_VA_OP_UNMAP:
+ case AMDGPU_VA_OP_CLEAR:
+ last_update = fence;
+ break;
+ default:
+ return;
+ }
+
+ /* Add fence to timeline */
+ if (!point)
+ drm_syncobj_replace_fence(syncobj, last_update);
+ else
+ drm_syncobj_add_point(syncobj, chain, last_update, point);
+}
+
+static vm_fault_t amdgpu_gem_fault(struct vm_fault *vmf)
{
- struct amdgpu_bo *robj = gem_to_amdgpu_bo(gobj);
+ struct ttm_buffer_object *bo = vmf->vma->vm_private_data;
+ struct drm_device *ddev = bo->base.dev;
+ vm_fault_t ret;
+ int idx;
+
+ ret = ttm_bo_vm_reserve(bo, vmf);
+ if (ret)
+ return ret;
+
+ if (drm_dev_enter(ddev, &idx)) {
+ ret = amdgpu_bo_fault_reserve_notify(bo);
+ if (ret) {
+ drm_dev_exit(idx);
+ goto unlock;
+ }
+
+ ret = ttm_bo_vm_fault_reserved(vmf, vmf->vma->vm_page_prot,
+ TTM_BO_VM_NUM_PREFAULT);
- if (robj) {
- amdgpu_mn_unregister(robj);
- amdgpu_bo_unref(&robj);
+ drm_dev_exit(idx);
+ } else {
+ ret = ttm_bo_vm_dummy_page(vmf, vmf->vma->vm_page_prot);
}
+ if (ret == VM_FAULT_RETRY && !(vmf->flags & FAULT_FLAG_RETRY_NOWAIT))
+ return ret;
+
+unlock:
+ dma_resv_unlock(bo->base.resv);
+ return ret;
+}
+
+static const struct vm_operations_struct amdgpu_gem_vm_ops = {
+ .fault = amdgpu_gem_fault,
+ .open = ttm_bo_vm_open,
+ .close = ttm_bo_vm_close,
+ .access = ttm_bo_vm_access
+};
+
+static void amdgpu_gem_object_free(struct drm_gem_object *gobj)
+{
+ struct amdgpu_bo *aobj = gem_to_amdgpu_bo(gobj);
+
+ amdgpu_hmm_unregister(aobj);
+ ttm_bo_fini(&aobj->tbo);
}
int amdgpu_gem_object_create(struct amdgpu_device *adev, unsigned long size,
int alignment, u32 initial_domain,
u64 flags, enum ttm_bo_type type,
struct dma_resv *resv,
- struct drm_gem_object **obj)
+ struct drm_gem_object **obj, int8_t xcp_id_plus1)
{
struct amdgpu_bo *bo;
+ struct amdgpu_bo_user *ubo;
struct amdgpu_bo_param bp;
int r;
memset(&bp, 0, sizeof(bp));
*obj = NULL;
+ flags |= AMDGPU_GEM_CREATE_VRAM_WIPE_ON_RELEASE;
bp.size = size;
bp.byte_align = alignment;
@@ -72,12 +223,15 @@ int amdgpu_gem_object_create(struct amdgpu_device *adev, unsigned long size,
bp.preferred_domain = initial_domain;
bp.flags = flags;
bp.domain = initial_domain;
- r = amdgpu_bo_create(adev, &bp, &bo);
+ bp.bo_ptr_size = sizeof(struct amdgpu_bo);
+ bp.xcp_id_plus1 = xcp_id_plus1;
+
+ r = amdgpu_bo_create_user(adev, &bp, &ubo);
if (r)
return r;
+ bo = &ubo->bo;
*obj = &bo->tbo.base;
- (*obj)->funcs = &amdgpu_gem_object_funcs;
return 0;
}
@@ -126,21 +280,63 @@ static int amdgpu_gem_object_open(struct drm_gem_object *obj,
return -EPERM;
if (abo->flags & AMDGPU_GEM_CREATE_VM_ALWAYS_VALID &&
- abo->tbo.base.resv != vm->root.base.bo->tbo.base.resv)
+ !amdgpu_vm_is_bo_always_valid(vm, abo))
return -EPERM;
r = amdgpu_bo_reserve(abo, false);
if (r)
return r;
+ amdgpu_vm_bo_update_shared(abo);
bo_va = amdgpu_vm_bo_find(vm, abo);
- if (!bo_va) {
+ if (!bo_va)
bo_va = amdgpu_vm_bo_add(adev, vm, abo);
- } else {
+ else
++bo_va->ref_count;
+
+ /* attach gfx eviction fence */
+ r = amdgpu_eviction_fence_attach(&fpriv->evf_mgr, abo);
+ if (r) {
+ DRM_DEBUG_DRIVER("Failed to attach eviction fence to BO\n");
+ amdgpu_bo_unreserve(abo);
+ return r;
}
+
amdgpu_bo_unreserve(abo);
- return 0;
+
+ /* Validate and add eviction fence to DMABuf imports with dynamic
+ * attachment in compute VMs. Re-validation will be done by
+ * amdgpu_vm_validate. Fences are on the reservation shared with the
+ * export, which is currently required to be validated and fenced
+ * already by amdgpu_amdkfd_gpuvm_restore_process_bos.
+ *
+ * Nested locking below for the case that a GEM object is opened in
+ * kfd_mem_export_dmabuf. Since the lock below is only taken for imports,
+ * but not for export, this is a different lock class that cannot lead to
+ * circular lock dependencies.
+ */
+ if (!vm->is_compute_context || !vm->process_info)
+ return 0;
+ if (!drm_gem_is_imported(obj) ||
+ !dma_buf_is_dynamic(obj->import_attach->dmabuf))
+ return 0;
+ mutex_lock_nested(&vm->process_info->lock, 1);
+ if (!WARN_ON(!vm->process_info->eviction_fence)) {
+ r = amdgpu_amdkfd_bo_validate_and_fence(abo, AMDGPU_GEM_DOMAIN_GTT,
+ &vm->process_info->eviction_fence->base);
+ if (r) {
+ struct amdgpu_task_info *ti = amdgpu_vm_get_task_info_vm(vm);
+
+ dev_warn(adev->dev, "validate_and_fence failed: %d\n", r);
+ if (ti) {
+ dev_warn(adev->dev, "pid %d\n", ti->task.pid);
+ amdgpu_vm_put_task_info(ti);
+ }
+ }
+ }
+ mutex_unlock(&vm->process_info->lock);
+
+ return r;
}
static void amdgpu_gem_object_close(struct drm_gem_object *obj,
@@ -151,44 +347,40 @@ static void amdgpu_gem_object_close(struct drm_gem_object *obj,
struct amdgpu_fpriv *fpriv = file_priv->driver_priv;
struct amdgpu_vm *vm = &fpriv->vm;
- struct amdgpu_bo_list_entry vm_pd;
- struct list_head list, duplicates;
struct dma_fence *fence = NULL;
- struct ttm_validate_buffer tv;
- struct ww_acquire_ctx ticket;
struct amdgpu_bo_va *bo_va;
+ struct drm_exec exec;
long r;
- INIT_LIST_HEAD(&list);
- INIT_LIST_HEAD(&duplicates);
-
- tv.bo = &bo->tbo;
- tv.num_shared = 2;
- list_add(&tv.head, &list);
+ drm_exec_init(&exec, DRM_EXEC_IGNORE_DUPLICATES, 0);
+ drm_exec_until_all_locked(&exec) {
+ r = drm_exec_prepare_obj(&exec, &bo->tbo.base, 1);
+ drm_exec_retry_on_contention(&exec);
+ if (unlikely(r))
+ goto out_unlock;
+
+ r = amdgpu_vm_lock_pd(vm, &exec, 0);
+ drm_exec_retry_on_contention(&exec);
+ if (unlikely(r))
+ goto out_unlock;
+ }
- amdgpu_vm_get_pd_bo(vm, &list, &vm_pd);
+ if (!amdgpu_vm_is_bo_always_valid(vm, bo))
+ amdgpu_eviction_fence_detach(&fpriv->evf_mgr, bo);
- r = ttm_eu_reserve_buffers(&ticket, &list, false, &duplicates);
- if (r) {
- dev_err(adev->dev, "leaking bo va because "
- "we fail to reserve bo (%ld)\n", r);
- return;
- }
bo_va = amdgpu_vm_bo_find(vm, bo);
if (!bo_va || --bo_va->ref_count)
goto out_unlock;
- amdgpu_vm_bo_rmv(adev, bo_va);
+ amdgpu_vm_bo_del(adev, bo_va);
+ amdgpu_vm_bo_update_shared(bo);
if (!amdgpu_vm_ready(vm))
goto out_unlock;
- fence = dma_resv_get_excl(bo->tbo.base.resv);
- if (fence) {
- amdgpu_bo_fence(bo, fence, true);
- fence = NULL;
- }
-
r = amdgpu_vm_clear_freed(adev, vm, &fence);
+ if (unlikely(r < 0))
+ dev_err(adev->dev, "failed to clear page "
+ "tables on GEM object close (%ld)\n", r);
if (r || !fence)
goto out_unlock;
@@ -196,19 +388,41 @@ static void amdgpu_gem_object_close(struct drm_gem_object *obj,
dma_fence_put(fence);
out_unlock:
- if (unlikely(r < 0))
- dev_err(adev->dev, "failed to clear page "
- "tables on GEM object close (%ld)\n", r);
- ttm_eu_backoff_reservation(&ticket, &list);
+ if (r)
+ dev_err(adev->dev, "leaking bo va (%ld)\n", r);
+ drm_exec_fini(&exec);
+}
+
+static int amdgpu_gem_object_mmap(struct drm_gem_object *obj, struct vm_area_struct *vma)
+{
+ struct amdgpu_bo *bo = gem_to_amdgpu_bo(obj);
+
+ if (amdgpu_ttm_tt_get_usermm(bo->tbo.ttm))
+ return -EPERM;
+ if (bo->flags & AMDGPU_GEM_CREATE_NO_CPU_ACCESS)
+ return -EPERM;
+
+ /* Workaround for Thunk bug creating PROT_NONE,MAP_PRIVATE mappings
+ * for debugger access to invisible VRAM. Should have used MAP_SHARED
+ * instead. Clearing VM_MAYWRITE prevents the mapping from ever
+ * becoming writable and makes is_cow_mapping(vm_flags) false.
+ */
+ if (is_cow_mapping(vma->vm_flags) &&
+ !(vma->vm_flags & VM_ACCESS_FLAGS))
+ vm_flags_clear(vma, VM_MAYWRITE);
+
+ return drm_gem_ttm_mmap(obj, vma);
}
-static const struct drm_gem_object_funcs amdgpu_gem_object_funcs = {
+const struct drm_gem_object_funcs amdgpu_gem_object_funcs = {
.free = amdgpu_gem_object_free,
.open = amdgpu_gem_object_open,
.close = amdgpu_gem_object_close,
.export = amdgpu_gem_prime_export,
.vmap = drm_gem_ttm_vmap,
.vunmap = drm_gem_ttm_vunmap,
+ .mmap = amdgpu_gem_object_mmap,
+ .vm_ops = &amdgpu_gem_vm_ops,
};
/*
@@ -229,14 +443,7 @@ int amdgpu_gem_create_ioctl(struct drm_device *dev, void *data,
int r;
/* reject invalid gem flags */
- if (flags & ~(AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED |
- AMDGPU_GEM_CREATE_NO_CPU_ACCESS |
- AMDGPU_GEM_CREATE_CPU_GTT_USWC |
- AMDGPU_GEM_CREATE_VRAM_CLEARED |
- AMDGPU_GEM_CREATE_VM_ALWAYS_VALID |
- AMDGPU_GEM_CREATE_EXPLICIT_SYNC |
- AMDGPU_GEM_CREATE_ENCRYPTED))
-
+ if (flags & ~AMDGPU_GEM_CREATE_SETTABLE_MASK)
return -EINVAL;
/* reject invalid gem domains */
@@ -248,6 +455,12 @@ int amdgpu_gem_create_ioctl(struct drm_device *dev, void *data,
return -EINVAL;
}
+ /* always clear VRAM */
+ flags |= AMDGPU_GEM_CREATE_VRAM_CLEARED;
+
+ if (args->in.domains & AMDGPU_GEM_DOMAIN_MMIO_REMAP)
+ return -EINVAL;
+
/* create a gem object to contain this object in */
if (args->in.domains & (AMDGPU_GEM_DOMAIN_GDS |
AMDGPU_GEM_DOMAIN_GWS | AMDGPU_GEM_DOMAIN_OA)) {
@@ -262,42 +475,39 @@ int amdgpu_gem_create_ioctl(struct drm_device *dev, void *data,
}
if (flags & AMDGPU_GEM_CREATE_VM_ALWAYS_VALID) {
- r = amdgpu_bo_reserve(vm->root.base.bo, false);
+ r = amdgpu_bo_reserve(vm->root.bo, false);
if (r)
return r;
- resv = vm->root.base.bo->tbo.base.resv;
+ resv = vm->root.bo->tbo.base.resv;
}
initial_domain = (u32)(0xffffffff & args->in.domains);
retry:
r = amdgpu_gem_object_create(adev, size, args->in.alignment,
initial_domain,
- flags, ttm_bo_type_device, resv, &gobj);
- if (r) {
- if (r != -ERESTARTSYS) {
- if (flags & AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED) {
- flags &= ~AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED;
- goto retry;
- }
+ flags, ttm_bo_type_device, resv, &gobj, fpriv->xcp_id + 1);
+ if (r && r != -ERESTARTSYS) {
+ if (flags & AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED) {
+ flags &= ~AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED;
+ goto retry;
+ }
- if (initial_domain == AMDGPU_GEM_DOMAIN_VRAM) {
- initial_domain |= AMDGPU_GEM_DOMAIN_GTT;
- goto retry;
- }
- DRM_DEBUG("Failed to allocate GEM object (%llu, %d, %llu, %d)\n",
- size, initial_domain, args->in.alignment, r);
+ if (initial_domain == AMDGPU_GEM_DOMAIN_VRAM) {
+ initial_domain |= AMDGPU_GEM_DOMAIN_GTT;
+ goto retry;
}
- return r;
+ DRM_DEBUG("Failed to allocate GEM object (%llu, %d, %llu, %d)\n",
+ size, initial_domain, args->in.alignment, r);
}
if (flags & AMDGPU_GEM_CREATE_VM_ALWAYS_VALID) {
if (!r) {
struct amdgpu_bo *abo = gem_to_amdgpu_bo(gobj);
- abo->parent = amdgpu_bo_ref(vm->root.base.bo);
+ abo->parent = amdgpu_bo_ref(vm->root.bo);
}
- amdgpu_bo_unreserve(vm->root.base.bo);
+ amdgpu_bo_unreserve(vm->root.bo);
}
if (r)
return r;
@@ -319,7 +529,9 @@ int amdgpu_gem_userptr_ioctl(struct drm_device *dev, void *data,
struct ttm_operation_ctx ctx = { true, false };
struct amdgpu_device *adev = drm_to_adev(dev);
struct drm_amdgpu_gem_userptr *args = data;
+ struct amdgpu_fpriv *fpriv = filp->driver_priv;
struct drm_gem_object *gobj;
+ struct amdgpu_hmm_range *range;
struct amdgpu_bo *bo;
uint32_t handle;
int r;
@@ -344,7 +556,7 @@ int amdgpu_gem_userptr_ioctl(struct drm_device *dev, void *data,
/* create a gem object to contain this object in */
r = amdgpu_gem_object_create(adev, args->size, 0, AMDGPU_GEM_DOMAIN_CPU,
- 0, ttm_bo_type_device, NULL, &gobj);
+ 0, ttm_bo_type_device, NULL, &gobj, fpriv->xcp_id + 1);
if (r)
return r;
@@ -355,21 +567,25 @@ int amdgpu_gem_userptr_ioctl(struct drm_device *dev, void *data,
if (r)
goto release_object;
- if (args->flags & AMDGPU_GEM_USERPTR_REGISTER) {
- r = amdgpu_mn_register(bo, args->addr);
- if (r)
- goto release_object;
- }
+ r = amdgpu_hmm_register(bo, args->addr);
+ if (r)
+ goto release_object;
if (args->flags & AMDGPU_GEM_USERPTR_VALIDATE) {
- r = amdgpu_ttm_tt_get_user_pages(bo, bo->tbo.ttm->pages);
- if (r)
+ range = amdgpu_hmm_range_alloc(NULL);
+ if (unlikely(!range))
+ return -ENOMEM;
+ r = amdgpu_ttm_tt_get_user_pages(bo, range);
+ if (r) {
+ amdgpu_hmm_range_free(range);
goto release_object;
-
+ }
r = amdgpu_bo_reserve(bo, true);
if (r)
goto user_pages_done;
+ amdgpu_ttm_tt_set_user_pages(bo->tbo.ttm, range);
+
amdgpu_bo_placement_from_domain(bo, AMDGPU_GEM_DOMAIN_GTT);
r = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx);
amdgpu_bo_unreserve(bo);
@@ -385,8 +601,7 @@ int amdgpu_gem_userptr_ioctl(struct drm_device *dev, void *data,
user_pages_done:
if (args->flags & AMDGPU_GEM_USERPTR_VALIDATE)
- amdgpu_ttm_tt_get_user_pages_done(bo->tbo.ttm);
-
+ amdgpu_hmm_range_free(range);
release_object:
drm_gem_object_put(gobj);
@@ -401,9 +616,9 @@ int amdgpu_mode_dumb_mmap(struct drm_file *filp,
struct amdgpu_bo *robj;
gobj = drm_gem_object_lookup(filp, handle);
- if (gobj == NULL) {
+ if (!gobj)
return -ENOENT;
- }
+
robj = gem_to_amdgpu_bo(gobj);
if (amdgpu_ttm_tt_get_usermm(robj->tbo.ttm) ||
(robj->flags & AMDGPU_GEM_CREATE_NO_CPU_ACCESS)) {
@@ -420,6 +635,7 @@ int amdgpu_gem_mmap_ioctl(struct drm_device *dev, void *data,
{
union drm_amdgpu_gem_mmap *args = data;
uint32_t handle = args->in.handle;
+
memset(args, 0, sizeof(*args));
return amdgpu_mode_dumb_mmap(filp, dev, handle, &args->out.addr_ptr);
}
@@ -446,7 +662,7 @@ unsigned long amdgpu_gem_timeout(uint64_t timeout_ns)
timeout_jiffies = nsecs_to_jiffies(ktime_to_ns(timeout));
/* clamp timeout to avoid unsigned-> signed overflow */
- if (timeout_jiffies > MAX_SCHEDULE_TIMEOUT )
+ if (timeout_jiffies > MAX_SCHEDULE_TIMEOUT)
return MAX_SCHEDULE_TIMEOUT - 1;
return timeout_jiffies;
@@ -464,12 +680,12 @@ int amdgpu_gem_wait_idle_ioctl(struct drm_device *dev, void *data,
long ret;
gobj = drm_gem_object_lookup(filp, handle);
- if (gobj == NULL) {
+ if (!gobj)
return -ENOENT;
- }
+
robj = gem_to_amdgpu_bo(gobj);
- ret = dma_resv_wait_timeout_rcu(robj->tbo.base.resv, true, true,
- timeout);
+ ret = dma_resv_wait_timeout(robj->tbo.base.resv, DMA_RESV_USAGE_READ,
+ true, timeout);
/* ret == 0 means not signaled,
* ret > 0 means signaled
@@ -493,7 +709,7 @@ int amdgpu_gem_metadata_ioctl(struct drm_device *dev, void *data,
struct amdgpu_bo *robj;
int r = -1;
- DRM_DEBUG("%d \n", args->handle);
+ DRM_DEBUG("%d\n", args->handle);
gobj = drm_gem_object_lookup(filp, args->handle);
if (gobj == NULL)
return -ENOENT;
@@ -538,18 +754,23 @@ out:
*
* Update the bo_va directly after setting its address. Errors are not
* vital here, so they are not reported back to userspace.
+ *
+ * Returns resulting fence if freed BO(s) got cleared from the PT.
+ * otherwise stub fence in case of error.
*/
-static void amdgpu_gem_va_update_vm(struct amdgpu_device *adev,
- struct amdgpu_vm *vm,
- struct amdgpu_bo_va *bo_va,
- uint32_t operation)
+static struct dma_fence *
+amdgpu_gem_va_update_vm(struct amdgpu_device *adev,
+ struct amdgpu_vm *vm,
+ struct amdgpu_bo_va *bo_va,
+ uint32_t operation)
{
+ struct dma_fence *fence = dma_fence_get_stub();
int r;
if (!amdgpu_vm_ready(vm))
- return;
+ return fence;
- r = amdgpu_vm_clear_freed(adev, vm, NULL);
+ r = amdgpu_vm_clear_freed(adev, vm, &fence);
if (r)
goto error;
@@ -565,34 +786,8 @@ static void amdgpu_gem_va_update_vm(struct amdgpu_device *adev,
error:
if (r && r != -ERESTARTSYS)
DRM_ERROR("Couldn't update BO_VA (%d)\n", r);
-}
-/**
- * amdgpu_gem_va_map_flags - map GEM UAPI flags into hardware flags
- *
- * @adev: amdgpu_device pointer
- * @flags: GEM UAPI flags
- *
- * Returns the GEM UAPI flags mapped into hardware for the ASIC.
- */
-uint64_t amdgpu_gem_va_map_flags(struct amdgpu_device *adev, uint32_t flags)
-{
- uint64_t pte_flag = 0;
-
- if (flags & AMDGPU_VM_PAGE_EXECUTABLE)
- pte_flag |= AMDGPU_PTE_EXECUTABLE;
- if (flags & AMDGPU_VM_PAGE_READABLE)
- pte_flag |= AMDGPU_PTE_READABLE;
- if (flags & AMDGPU_VM_PAGE_WRITEABLE)
- pte_flag |= AMDGPU_PTE_WRITEABLE;
- if (flags & AMDGPU_VM_PAGE_PRT)
- pte_flag |= AMDGPU_PTE_PRT;
-
- if (adev->gmc.gmc_funcs->map_mtype)
- pte_flag |= amdgpu_gmc_map_mtype(adev,
- flags & AMDGPU_VM_MTYPE_MASK);
-
- return pte_flag;
+ return fence;
}
int amdgpu_gem_va_ioctl(struct drm_device *dev, void *data,
@@ -600,7 +795,8 @@ int amdgpu_gem_va_ioctl(struct drm_device *dev, void *data,
{
const uint32_t valid_flags = AMDGPU_VM_DELAY_UPDATE |
AMDGPU_VM_PAGE_READABLE | AMDGPU_VM_PAGE_WRITEABLE |
- AMDGPU_VM_PAGE_EXECUTABLE | AMDGPU_VM_MTYPE_MASK;
+ AMDGPU_VM_PAGE_EXECUTABLE | AMDGPU_VM_MTYPE_MASK |
+ AMDGPU_VM_PAGE_NOALLOC;
const uint32_t prt_flags = AMDGPU_VM_DELAY_UPDATE |
AMDGPU_VM_PAGE_PRT;
@@ -610,25 +806,24 @@ int amdgpu_gem_va_ioctl(struct drm_device *dev, void *data,
struct amdgpu_fpriv *fpriv = filp->driver_priv;
struct amdgpu_bo *abo;
struct amdgpu_bo_va *bo_va;
- struct amdgpu_bo_list_entry vm_pd;
- struct ttm_validate_buffer tv;
- struct ww_acquire_ctx ticket;
- struct list_head list, duplicates;
- uint64_t va_flags;
+ struct drm_syncobj *timeline_syncobj = NULL;
+ struct dma_fence_chain *timeline_chain = NULL;
+ struct dma_fence *fence;
+ struct drm_exec exec;
uint64_t vm_size;
int r = 0;
- if (args->va_address < AMDGPU_VA_RESERVED_SIZE) {
+ if (args->va_address < AMDGPU_VA_RESERVED_BOTTOM) {
dev_dbg(dev->dev,
- "va_address 0x%LX is in reserved area 0x%LX\n",
- args->va_address, AMDGPU_VA_RESERVED_SIZE);
+ "va_address 0x%llx is in reserved area 0x%llx\n",
+ args->va_address, AMDGPU_VA_RESERVED_BOTTOM);
return -EINVAL;
}
if (args->va_address >= AMDGPU_GMC_HOLE_START &&
args->va_address < AMDGPU_GMC_HOLE_END) {
dev_dbg(dev->dev,
- "va_address 0x%LX is in VA hole 0x%LX-0x%LX\n",
+ "va_address 0x%llx is in VA hole 0x%llx-0x%llx\n",
args->va_address, AMDGPU_GMC_HOLE_START,
AMDGPU_GMC_HOLE_END);
return -EINVAL;
@@ -637,7 +832,7 @@ int amdgpu_gem_va_ioctl(struct drm_device *dev, void *data,
args->va_address &= AMDGPU_GMC_HOLE_MASK;
vm_size = adev->vm_manager.max_pfn * AMDGPU_GPU_PAGE_SIZE;
- vm_size -= AMDGPU_VA_RESERVED_SIZE;
+ vm_size -= AMDGPU_VA_RESERVED_TOP;
if (args->va_address + args->map_size > vm_size) {
dev_dbg(dev->dev,
"va_address 0x%llx is in top reserved area 0x%llx\n",
@@ -663,36 +858,44 @@ int amdgpu_gem_va_ioctl(struct drm_device *dev, void *data,
return -EINVAL;
}
- INIT_LIST_HEAD(&list);
- INIT_LIST_HEAD(&duplicates);
if ((args->operation != AMDGPU_VA_OP_CLEAR) &&
!(args->flags & AMDGPU_VM_PAGE_PRT)) {
gobj = drm_gem_object_lookup(filp, args->handle);
if (gobj == NULL)
return -ENOENT;
abo = gem_to_amdgpu_bo(gobj);
- tv.bo = &abo->tbo;
- if (abo->flags & AMDGPU_GEM_CREATE_VM_ALWAYS_VALID)
- tv.num_shared = 1;
- else
- tv.num_shared = 0;
- list_add(&tv.head, &list);
} else {
gobj = NULL;
abo = NULL;
}
- amdgpu_vm_get_pd_bo(&fpriv->vm, &list, &vm_pd);
-
- r = ttm_eu_reserve_buffers(&ticket, &list, true, &duplicates);
+ r = amdgpu_gem_add_input_fence(filp,
+ args->input_fence_syncobj_handles,
+ args->num_syncobj_handles);
if (r)
- goto error_unref;
+ goto error_put_gobj;
+
+ drm_exec_init(&exec, DRM_EXEC_INTERRUPTIBLE_WAIT |
+ DRM_EXEC_IGNORE_DUPLICATES, 0);
+ drm_exec_until_all_locked(&exec) {
+ if (gobj) {
+ r = drm_exec_lock_obj(&exec, gobj);
+ drm_exec_retry_on_contention(&exec);
+ if (unlikely(r))
+ goto error;
+ }
+
+ r = amdgpu_vm_lock_pd(&fpriv->vm, &exec, 2);
+ drm_exec_retry_on_contention(&exec);
+ if (unlikely(r))
+ goto error;
+ }
if (abo) {
bo_va = amdgpu_vm_bo_find(&fpriv->vm, abo);
if (!bo_va) {
r = -ENOENT;
- goto error_backoff;
+ goto error;
}
} else if (args->operation != AMDGPU_VA_OP_CLEAR) {
bo_va = fpriv->prt_va;
@@ -700,12 +903,19 @@ int amdgpu_gem_va_ioctl(struct drm_device *dev, void *data,
bo_va = NULL;
}
+ r = amdgpu_gem_update_timeline_node(filp,
+ args->vm_timeline_syncobj_out,
+ args->vm_timeline_point,
+ &timeline_syncobj,
+ &timeline_chain);
+ if (r)
+ goto error;
+
switch (args->operation) {
case AMDGPU_VA_OP_MAP:
- va_flags = amdgpu_gem_va_map_flags(adev, args->flags);
r = amdgpu_vm_bo_map(adev, bo_va, args->va_address,
args->offset_in_bo, args->map_size,
- va_flags);
+ args->flags);
break;
case AMDGPU_VA_OP_UNMAP:
r = amdgpu_vm_bo_unmap(adev, bo_va, args->va_address);
@@ -717,22 +927,31 @@ int amdgpu_gem_va_ioctl(struct drm_device *dev, void *data,
args->map_size);
break;
case AMDGPU_VA_OP_REPLACE:
- va_flags = amdgpu_gem_va_map_flags(adev, args->flags);
r = amdgpu_vm_bo_replace_map(adev, bo_va, args->va_address,
args->offset_in_bo, args->map_size,
- va_flags);
+ args->flags);
break;
default:
break;
}
- if (!r && !(args->flags & AMDGPU_VM_DELAY_UPDATE) && !amdgpu_vm_debug)
- amdgpu_gem_va_update_vm(adev, &fpriv->vm, bo_va,
- args->operation);
+ if (!r && !(args->flags & AMDGPU_VM_DELAY_UPDATE) && !adev->debug_vm) {
+ fence = amdgpu_gem_va_update_vm(adev, &fpriv->vm, bo_va,
+ args->operation);
+
+ if (timeline_syncobj)
+ amdgpu_gem_update_bo_mapping(filp, bo_va,
+ args->operation,
+ args->vm_timeline_point,
+ fence, timeline_syncobj,
+ timeline_chain);
+ else
+ dma_fence_put(fence);
-error_backoff:
- ttm_eu_backoff_reservation(&ticket, &list);
+ }
-error_unref:
+error:
+ drm_exec_fini(&exec);
+error_put_gobj:
drm_gem_object_put(gobj);
return r;
}
@@ -740,22 +959,38 @@ error_unref:
int amdgpu_gem_op_ioctl(struct drm_device *dev, void *data,
struct drm_file *filp)
{
- struct amdgpu_device *adev = drm_to_adev(dev);
struct drm_amdgpu_gem_op *args = data;
struct drm_gem_object *gobj;
struct amdgpu_vm_bo_base *base;
struct amdgpu_bo *robj;
+ struct drm_exec exec;
+ struct amdgpu_fpriv *fpriv = filp->driver_priv;
int r;
+ if (args->padding)
+ return -EINVAL;
+
gobj = drm_gem_object_lookup(filp, args->handle);
- if (gobj == NULL) {
+ if (!gobj)
return -ENOENT;
- }
+
robj = gem_to_amdgpu_bo(gobj);
- r = amdgpu_bo_reserve(robj, false);
- if (unlikely(r))
- goto out;
+ drm_exec_init(&exec, DRM_EXEC_INTERRUPTIBLE_WAIT |
+ DRM_EXEC_IGNORE_DUPLICATES, 0);
+ drm_exec_until_all_locked(&exec) {
+ r = drm_exec_lock_obj(&exec, gobj);
+ drm_exec_retry_on_contention(&exec);
+ if (r)
+ goto out_exec;
+
+ if (args->op == AMDGPU_GEM_OP_GET_MAPPING_INFO) {
+ r = amdgpu_vm_lock_pd(&fpriv->vm, &exec, 0);
+ drm_exec_retry_on_contention(&exec);
+ if (r)
+ goto out_exec;
+ }
+ }
switch (args->op) {
case AMDGPU_GEM_OP_GET_GEM_CREATE_INFO: {
@@ -763,31 +998,29 @@ int amdgpu_gem_op_ioctl(struct drm_device *dev, void *data,
void __user *out = u64_to_user_ptr(args->value);
info.bo_size = robj->tbo.base.size;
- info.alignment = robj->tbo.mem.page_alignment << PAGE_SHIFT;
+ info.alignment = robj->tbo.page_alignment << PAGE_SHIFT;
info.domains = robj->preferred_domains;
info.domain_flags = robj->flags;
- amdgpu_bo_unreserve(robj);
+ drm_exec_fini(&exec);
if (copy_to_user(out, &info, sizeof(info)))
r = -EFAULT;
break;
}
case AMDGPU_GEM_OP_SET_PLACEMENT:
- if (robj->prime_shared_count && (args->value & AMDGPU_GEM_DOMAIN_VRAM)) {
+ if (drm_gem_is_imported(&robj->tbo.base) &&
+ args->value & AMDGPU_GEM_DOMAIN_VRAM) {
r = -EINVAL;
- amdgpu_bo_unreserve(robj);
- break;
+ goto out_exec;
}
if (amdgpu_ttm_tt_get_usermm(robj->tbo.ttm)) {
r = -EPERM;
- amdgpu_bo_unreserve(robj);
- break;
+ goto out_exec;
}
for (base = robj->vm_bo; base; base = base->next)
if (amdgpu_xgmi_same_hive(amdgpu_ttm_adev(robj->tbo.bdev),
- amdgpu_ttm_adev(base->vm->root.base.bo->tbo.bdev))) {
+ amdgpu_ttm_adev(base->vm->root.bo->tbo.bdev))) {
r = -EINVAL;
- amdgpu_bo_unreserve(robj);
- goto out;
+ goto out_exec;
}
@@ -799,29 +1032,186 @@ int amdgpu_gem_op_ioctl(struct drm_device *dev, void *data,
robj->allowed_domains |= AMDGPU_GEM_DOMAIN_GTT;
if (robj->flags & AMDGPU_GEM_CREATE_VM_ALWAYS_VALID)
- amdgpu_vm_bo_invalidate(adev, robj, true);
+ amdgpu_vm_bo_invalidate(robj, true);
+ drm_exec_fini(&exec);
+ break;
+ case AMDGPU_GEM_OP_GET_MAPPING_INFO: {
+ struct amdgpu_bo_va *bo_va = amdgpu_vm_bo_find(&fpriv->vm, robj);
+ struct drm_amdgpu_gem_vm_entry *vm_entries;
+ struct amdgpu_bo_va_mapping *mapping;
+ int num_mappings = 0;
+ /*
+ * num_entries is set as an input to the size of the user-allocated array of
+ * drm_amdgpu_gem_vm_entry stored at args->value.
+ * num_entries is sent back as output as the number of mappings the bo has.
+ * If that number is larger than the size of the array, the ioctl must
+ * be retried.
+ */
+ vm_entries = kvcalloc(args->num_entries, sizeof(*vm_entries), GFP_KERNEL);
+ if (!vm_entries)
+ return -ENOMEM;
+
+ amdgpu_vm_bo_va_for_each_valid_mapping(bo_va, mapping) {
+ if (num_mappings < args->num_entries) {
+ vm_entries[num_mappings].addr = mapping->start * AMDGPU_GPU_PAGE_SIZE;
+ vm_entries[num_mappings].size = (mapping->last - mapping->start + 1) * AMDGPU_GPU_PAGE_SIZE;
+ vm_entries[num_mappings].offset = mapping->offset;
+ vm_entries[num_mappings].flags = mapping->flags;
+ }
+ num_mappings += 1;
+ }
+
+ amdgpu_vm_bo_va_for_each_invalid_mapping(bo_va, mapping) {
+ if (num_mappings < args->num_entries) {
+ vm_entries[num_mappings].addr = mapping->start * AMDGPU_GPU_PAGE_SIZE;
+ vm_entries[num_mappings].size = (mapping->last - mapping->start + 1) * AMDGPU_GPU_PAGE_SIZE;
+ vm_entries[num_mappings].offset = mapping->offset;
+ vm_entries[num_mappings].flags = mapping->flags;
+ }
+ num_mappings += 1;
+ }
+
+ drm_exec_fini(&exec);
- amdgpu_bo_unreserve(robj);
+ if (num_mappings > 0 && num_mappings <= args->num_entries)
+ if (copy_to_user(u64_to_user_ptr(args->value), vm_entries, num_mappings * sizeof(*vm_entries)))
+ r = -EFAULT;
+
+ args->num_entries = num_mappings;
+
+ kvfree(vm_entries);
break;
+ }
default:
- amdgpu_bo_unreserve(robj);
+ drm_exec_fini(&exec);
r = -EINVAL;
}
-out:
+ drm_gem_object_put(gobj);
+ return r;
+out_exec:
+ drm_exec_fini(&exec);
drm_gem_object_put(gobj);
return r;
}
+/**
+ * amdgpu_gem_list_handles_ioctl - get information about a process' buffer objects
+ *
+ * @dev: drm device pointer
+ * @data: drm_amdgpu_gem_list_handles
+ * @filp: drm file pointer
+ *
+ * num_entries is set as an input to the size of the entries array.
+ * num_entries is sent back as output as the number of bos in the process.
+ * If that number is larger than the size of the array, the ioctl must
+ * be retried.
+ *
+ * Returns:
+ * 0 for success, -errno for errors.
+ */
+int amdgpu_gem_list_handles_ioctl(struct drm_device *dev, void *data,
+ struct drm_file *filp)
+{
+ struct drm_amdgpu_gem_list_handles *args = data;
+ struct drm_amdgpu_gem_list_handles_entry *bo_entries;
+ struct drm_gem_object *gobj;
+ int id, ret = 0;
+ int bo_index = 0;
+ int num_bos = 0;
+
+ spin_lock(&filp->table_lock);
+ idr_for_each_entry(&filp->object_idr, gobj, id)
+ num_bos += 1;
+ spin_unlock(&filp->table_lock);
+
+ if (args->num_entries < num_bos) {
+ args->num_entries = num_bos;
+ return 0;
+ }
+
+ if (num_bos == 0) {
+ args->num_entries = 0;
+ return 0;
+ }
+
+ bo_entries = kvcalloc(num_bos, sizeof(*bo_entries), GFP_KERNEL);
+ if (!bo_entries)
+ return -ENOMEM;
+
+ spin_lock(&filp->table_lock);
+ idr_for_each_entry(&filp->object_idr, gobj, id) {
+ struct amdgpu_bo *bo = gem_to_amdgpu_bo(gobj);
+ struct drm_amdgpu_gem_list_handles_entry *bo_entry;
+
+ if (bo_index >= num_bos) {
+ ret = -EAGAIN;
+ break;
+ }
+
+ bo_entry = &bo_entries[bo_index];
+
+ bo_entry->size = amdgpu_bo_size(bo);
+ bo_entry->alloc_flags = bo->flags & AMDGPU_GEM_CREATE_SETTABLE_MASK;
+ bo_entry->preferred_domains = bo->preferred_domains;
+ bo_entry->gem_handle = id;
+ bo_entry->alignment = bo->tbo.page_alignment;
+
+ if (bo->tbo.base.import_attach)
+ bo_entry->flags |= AMDGPU_GEM_LIST_HANDLES_FLAG_IS_IMPORT;
+
+ bo_index += 1;
+ }
+ spin_unlock(&filp->table_lock);
+
+ args->num_entries = bo_index;
+
+ if (!ret)
+ if (copy_to_user(u64_to_user_ptr(args->entries), bo_entries, num_bos * sizeof(*bo_entries)))
+ ret = -EFAULT;
+
+ kvfree(bo_entries);
+
+ return ret;
+}
+
+static int amdgpu_gem_align_pitch(struct amdgpu_device *adev,
+ int width,
+ int cpp,
+ bool tiled)
+{
+ int aligned = width;
+ int pitch_mask = 0;
+
+ switch (cpp) {
+ case 1:
+ pitch_mask = 255;
+ break;
+ case 2:
+ pitch_mask = 127;
+ break;
+ case 3:
+ case 4:
+ pitch_mask = 63;
+ break;
+ }
+
+ aligned += pitch_mask;
+ aligned &= ~pitch_mask;
+ return aligned * cpp;
+}
+
int amdgpu_mode_dumb_create(struct drm_file *file_priv,
struct drm_device *dev,
struct drm_mode_create_dumb *args)
{
struct amdgpu_device *adev = drm_to_adev(dev);
+ struct amdgpu_fpriv *fpriv = file_priv->driver_priv;
struct drm_gem_object *gobj;
uint32_t handle;
u64 flags = AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED |
- AMDGPU_GEM_CREATE_CPU_GTT_USWC;
+ AMDGPU_GEM_CREATE_CPU_GTT_USWC |
+ AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS;
u32 domain;
int r;
@@ -833,32 +1223,32 @@ int amdgpu_mode_dumb_create(struct drm_file *file_priv,
if (adev->mman.buffer_funcs_enabled)
flags |= AMDGPU_GEM_CREATE_VRAM_CLEARED;
- args->pitch = amdgpu_align_pitch(adev, args->width,
- DIV_ROUND_UP(args->bpp, 8), 0);
+ args->pitch = amdgpu_gem_align_pitch(adev, args->width,
+ DIV_ROUND_UP(args->bpp, 8), 0);
args->size = (u64)args->pitch * args->height;
args->size = ALIGN(args->size, PAGE_SIZE);
- domain = amdgpu_bo_get_preferred_pin_domain(adev,
+ domain = amdgpu_bo_get_preferred_domain(adev,
amdgpu_display_supported_domains(adev, flags));
r = amdgpu_gem_object_create(adev, args->size, 0, domain, flags,
- ttm_bo_type_device, NULL, &gobj);
+ ttm_bo_type_device, NULL, &gobj, fpriv->xcp_id + 1);
if (r)
return -ENOMEM;
r = drm_gem_handle_create(file_priv, gobj, &handle);
/* drop reference from allocate - handle holds it now */
drm_gem_object_put(gobj);
- if (r) {
+ if (r)
return r;
- }
+
args->handle = handle;
return 0;
}
#if defined(CONFIG_DEBUG_FS)
-static int amdgpu_debugfs_gem_info(struct seq_file *m, void *data)
+static int amdgpu_debugfs_gem_info_show(struct seq_file *m, void *unused)
{
- struct drm_info_node *node = (struct drm_info_node *)m->private;
- struct drm_device *dev = node->minor->dev;
+ struct amdgpu_device *adev = m->private;
+ struct drm_device *dev = adev_to_drm(adev);
struct drm_file *file;
int r;
@@ -869,6 +1259,7 @@ static int amdgpu_debugfs_gem_info(struct seq_file *m, void *data)
list_for_each_entry(file, &dev->filelist, lhead) {
struct task_struct *task;
struct drm_gem_object *gobj;
+ struct pid *pid;
int id;
/*
@@ -878,8 +1269,9 @@ static int amdgpu_debugfs_gem_info(struct seq_file *m, void *data)
* Therefore, we need to protect this ->comm access using RCU.
*/
rcu_read_lock();
- task = pid_task(file->pid, PIDTYPE_PID);
- seq_printf(m, "pid %8d command %s:\n", pid_nr(file->pid),
+ pid = rcu_dereference(file->pid);
+ task = pid_task(pid, PIDTYPE_TGID);
+ seq_printf(m, "pid %8d command %s:\n", pid_nr(pid),
task ? task->comm : "<unknown>");
rcu_read_unlock();
@@ -896,16 +1288,17 @@ static int amdgpu_debugfs_gem_info(struct seq_file *m, void *data)
return 0;
}
-static const struct drm_info_list amdgpu_debugfs_gem_list[] = {
- {"amdgpu_gem_info", &amdgpu_debugfs_gem_info, 0, NULL},
-};
+DEFINE_SHOW_ATTRIBUTE(amdgpu_debugfs_gem_info);
+
#endif
-int amdgpu_debugfs_gem_init(struct amdgpu_device *adev)
+void amdgpu_debugfs_gem_init(struct amdgpu_device *adev)
{
#if defined(CONFIG_DEBUG_FS)
- return amdgpu_debugfs_add_files(adev, amdgpu_debugfs_gem_list,
- ARRAY_SIZE(amdgpu_debugfs_gem_list));
+ struct drm_minor *minor = adev_to_drm(adev)->primary;
+ struct dentry *root = minor->debugfs_root;
+
+ debugfs_create_file("amdgpu_gem_info", 0444, root, adev,
+ &amdgpu_debugfs_gem_info_fops);
#endif
- return 0;
}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.h
index 637bf51dbf06..b558336bc4c6 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.h
@@ -33,6 +33,8 @@
#define AMDGPU_GEM_DOMAIN_MAX 0x3
#define gem_to_amdgpu_bo(gobj) container_of((gobj), struct amdgpu_bo, tbo.base)
+extern const struct drm_gem_object_funcs amdgpu_gem_object_funcs;
+
unsigned long amdgpu_gem_timeout(uint64_t timeout_ns);
/*
@@ -43,8 +45,7 @@ int amdgpu_gem_object_create(struct amdgpu_device *adev, unsigned long size,
int alignment, u32 initial_domain,
u64 flags, enum ttm_bo_type type,
struct dma_resv *resv,
- struct drm_gem_object **obj);
-
+ struct drm_gem_object **obj, int8_t xcp_id_plus1);
int amdgpu_mode_dumb_create(struct drm_file *file_priv,
struct drm_device *dev,
struct drm_mode_create_dumb *args);
@@ -62,13 +63,28 @@ int amdgpu_gem_mmap_ioctl(struct drm_device *dev, void *data,
struct drm_file *filp);
int amdgpu_gem_wait_idle_ioctl(struct drm_device *dev, void *data,
struct drm_file *filp);
-uint64_t amdgpu_gem_va_map_flags(struct amdgpu_device *adev, uint32_t flags);
int amdgpu_gem_va_ioctl(struct drm_device *dev, void *data,
struct drm_file *filp);
int amdgpu_gem_op_ioctl(struct drm_device *dev, void *data,
struct drm_file *filp);
+int amdgpu_gem_list_handles_ioctl(struct drm_device *dev, void *data,
+ struct drm_file *filp);
int amdgpu_gem_metadata_ioctl(struct drm_device *dev, void *data,
struct drm_file *filp);
+#define AMDGPU_GEM_CREATE_SETTABLE_MASK (AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED | \
+ AMDGPU_GEM_CREATE_NO_CPU_ACCESS | \
+ AMDGPU_GEM_CREATE_CPU_GTT_USWC | \
+ AMDGPU_GEM_CREATE_VRAM_CLEARED | \
+ AMDGPU_GEM_CREATE_VM_ALWAYS_VALID | \
+ AMDGPU_GEM_CREATE_EXPLICIT_SYNC | \
+ AMDGPU_GEM_CREATE_VRAM_WIPE_ON_RELEASE | \
+ AMDGPU_GEM_CREATE_ENCRYPTED | \
+ AMDGPU_GEM_CREATE_GFX12_DCC | \
+ AMDGPU_GEM_CREATE_DISCARDABLE | \
+ AMDGPU_GEM_CREATE_COHERENT | \
+ AMDGPU_GEM_CREATE_UNCACHED | \
+ AMDGPU_GEM_CREATE_EXT_COHERENT)
+
#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
index 8e0a6c62322e..8b118c53f351 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
@@ -23,14 +23,24 @@
*
*/
+#include <linux/firmware.h>
+#include <linux/pm_runtime.h>
+
#include "amdgpu.h"
#include "amdgpu_gfx.h"
#include "amdgpu_rlc.h"
#include "amdgpu_ras.h"
+#include "amdgpu_reset.h"
+#include "amdgpu_xcp.h"
+#include "amdgpu_xgmi.h"
+#include "amdgpu_mes.h"
+#include "nvd.h"
/* delay 0.1 second to enable gfx off feature */
#define GFX_OFF_DELAY_ENABLE msecs_to_jiffies(100)
+#define GFX_OFF_NO_DELAY 0
+
/*
* GPU GFX IP block helpers function.
*/
@@ -60,35 +70,26 @@ void amdgpu_queue_mask_bit_to_mec_queue(struct amdgpu_device *adev, int bit,
}
bool amdgpu_gfx_is_mec_queue_enabled(struct amdgpu_device *adev,
- int mec, int pipe, int queue)
+ int xcc_id, int mec, int pipe, int queue)
{
return test_bit(amdgpu_gfx_mec_queue_to_bit(adev, mec, pipe, queue),
- adev->gfx.mec.queue_bitmap);
+ adev->gfx.mec_bitmap[xcc_id].queue_bitmap);
}
-int amdgpu_gfx_me_queue_to_bit(struct amdgpu_device *adev,
- int me, int pipe, int queue)
+static int amdgpu_gfx_me_queue_to_bit(struct amdgpu_device *adev,
+ int me, int pipe, int queue)
{
+ int num_queue_per_pipe = 1; /* we only enable 1 KGQ per pipe */
int bit = 0;
bit += me * adev->gfx.me.num_pipe_per_me
- * adev->gfx.me.num_queue_per_pipe;
- bit += pipe * adev->gfx.me.num_queue_per_pipe;
+ * num_queue_per_pipe;
+ bit += pipe * num_queue_per_pipe;
bit += queue;
return bit;
}
-void amdgpu_gfx_bit_to_me_queue(struct amdgpu_device *adev, int bit,
- int *me, int *pipe, int *queue)
-{
- *queue = bit % adev->gfx.me.num_queue_per_pipe;
- *pipe = (bit / adev->gfx.me.num_queue_per_pipe)
- % adev->gfx.me.num_pipe_per_me;
- *me = (bit / adev->gfx.me.num_queue_per_pipe)
- / adev->gfx.me.num_pipe_per_me;
-}
-
bool amdgpu_gfx_is_me_queue_enabled(struct amdgpu_device *adev,
int me, int pipe, int queue)
{
@@ -97,42 +98,6 @@ bool amdgpu_gfx_is_me_queue_enabled(struct amdgpu_device *adev,
}
/**
- * amdgpu_gfx_scratch_get - Allocate a scratch register
- *
- * @adev: amdgpu_device pointer
- * @reg: scratch register mmio offset
- *
- * Allocate a CP scratch register for use by the driver (all asics).
- * Returns 0 on success or -EINVAL on failure.
- */
-int amdgpu_gfx_scratch_get(struct amdgpu_device *adev, uint32_t *reg)
-{
- int i;
-
- i = ffs(adev->gfx.scratch.free_mask);
- if (i != 0 && i <= adev->gfx.scratch.num_reg) {
- i--;
- adev->gfx.scratch.free_mask &= ~(1u << i);
- *reg = adev->gfx.scratch.reg_base + i;
- return 0;
- }
- return -EINVAL;
-}
-
-/**
- * amdgpu_gfx_scratch_free - Free a scratch register
- *
- * @adev: amdgpu_device pointer
- * @reg: scratch register mmio offset
- *
- * Free a CP scratch register allocated for use by the driver (all asics)
- */
-void amdgpu_gfx_scratch_free(struct amdgpu_device *adev, uint32_t reg)
-{
- adev->gfx.scratch.free_mask |= 1u << (reg - adev->gfx.scratch.reg_base);
-}
-
-/**
* amdgpu_gfx_parse_disable_cu - Parse the disable_cu module parameter
*
* @mask: array in which the per-shader array disable masks will be stored
@@ -142,9 +107,9 @@ void amdgpu_gfx_scratch_free(struct amdgpu_device *adev, uint32_t reg)
* The bitmask of CUs to be disabled in the shader array determined by se and
* sh is stored in mask[se * max_sh + sh].
*/
-void amdgpu_gfx_parse_disable_cu(unsigned *mask, unsigned max_se, unsigned max_sh)
+void amdgpu_gfx_parse_disable_cu(unsigned int *mask, unsigned int max_se, unsigned int max_sh)
{
- unsigned se, sh, cu;
+ unsigned int se, sh, cu;
const char *p;
memset(mask, 0, sizeof(*mask) * max_se * max_sh);
@@ -156,6 +121,7 @@ void amdgpu_gfx_parse_disable_cu(unsigned *mask, unsigned max_se, unsigned max_s
for (;;) {
char *next;
int ret = sscanf(p, "%u.%u.%u", &se, &sh, &cu);
+
if (ret < 3) {
DRM_ERROR("amdgpu: could not parse disable_cu\n");
return;
@@ -176,14 +142,22 @@ void amdgpu_gfx_parse_disable_cu(unsigned *mask, unsigned max_se, unsigned max_s
}
}
-static bool amdgpu_gfx_is_multipipe_capable(struct amdgpu_device *adev)
+static bool amdgpu_gfx_is_graphics_multipipe_capable(struct amdgpu_device *adev)
+{
+ return amdgpu_async_gfx_ring && adev->gfx.me.num_pipe_per_me > 1;
+}
+
+static bool amdgpu_gfx_is_compute_multipipe_capable(struct amdgpu_device *adev)
{
if (amdgpu_compute_multipipe != -1) {
- DRM_INFO("amdgpu: forcing compute pipe policy %d\n",
+ dev_info(adev->dev, "amdgpu: forcing compute pipe policy %d\n",
amdgpu_compute_multipipe);
return amdgpu_compute_multipipe == 1;
}
+ if (amdgpu_ip_version(adev, GC_HWIP, 0) > IP_VERSION(9, 0, 0))
+ return true;
+
/* FIXME: spreading the queues across pipes causes perf regressions
* on POLARIS11 compute workloads */
if (adev->asic_type == CHIP_POLARIS11)
@@ -192,6 +166,28 @@ static bool amdgpu_gfx_is_multipipe_capable(struct amdgpu_device *adev)
return adev->gfx.mec.num_mec > 1;
}
+bool amdgpu_gfx_is_high_priority_graphics_queue(struct amdgpu_device *adev,
+ struct amdgpu_ring *ring)
+{
+ int queue = ring->queue;
+ int pipe = ring->pipe;
+
+ /* Policy: use pipe1 queue0 as high priority graphics queue if we
+ * have more than one gfx pipe.
+ */
+ if (amdgpu_gfx_is_graphics_multipipe_capable(adev) &&
+ adev->gfx.num_gfx_rings > 1 && pipe == 1 && queue == 0) {
+ int me = ring->me;
+ int bit;
+
+ bit = amdgpu_gfx_me_queue_to_bit(adev, me, pipe, queue);
+ if (ring == &adev->gfx.gfx_ring[bit])
+ return true;
+ }
+
+ return false;
+}
+
bool amdgpu_gfx_is_high_priority_compute_queue(struct amdgpu_device *adev,
struct amdgpu_ring *ring)
{
@@ -207,55 +203,71 @@ bool amdgpu_gfx_is_high_priority_compute_queue(struct amdgpu_device *adev,
void amdgpu_gfx_compute_queue_acquire(struct amdgpu_device *adev)
{
- int i, queue, pipe;
- bool multipipe_policy = amdgpu_gfx_is_multipipe_capable(adev);
+ int i, j, queue, pipe;
+ bool multipipe_policy = amdgpu_gfx_is_compute_multipipe_capable(adev);
int max_queues_per_mec = min(adev->gfx.mec.num_pipe_per_mec *
adev->gfx.mec.num_queue_per_pipe,
adev->gfx.num_compute_rings);
+ int num_xcc = adev->gfx.xcc_mask ? NUM_XCC(adev->gfx.xcc_mask) : 1;
if (multipipe_policy) {
- /* policy: make queues evenly cross all pipes on MEC1 only */
- for (i = 0; i < max_queues_per_mec; i++) {
- pipe = i % adev->gfx.mec.num_pipe_per_mec;
- queue = (i / adev->gfx.mec.num_pipe_per_mec) %
- adev->gfx.mec.num_queue_per_pipe;
-
- set_bit(pipe * adev->gfx.mec.num_queue_per_pipe + queue,
- adev->gfx.mec.queue_bitmap);
+ /* policy: make queues evenly cross all pipes on MEC1 only
+ * for multiple xcc, just use the original policy for simplicity */
+ for (j = 0; j < num_xcc; j++) {
+ for (i = 0; i < max_queues_per_mec; i++) {
+ pipe = i % adev->gfx.mec.num_pipe_per_mec;
+ queue = (i / adev->gfx.mec.num_pipe_per_mec) %
+ adev->gfx.mec.num_queue_per_pipe;
+
+ set_bit(pipe * adev->gfx.mec.num_queue_per_pipe + queue,
+ adev->gfx.mec_bitmap[j].queue_bitmap);
+ }
}
} else {
/* policy: amdgpu owns all queues in the given pipe */
- for (i = 0; i < max_queues_per_mec; ++i)
- set_bit(i, adev->gfx.mec.queue_bitmap);
+ for (j = 0; j < num_xcc; j++) {
+ for (i = 0; i < max_queues_per_mec; ++i)
+ set_bit(i, adev->gfx.mec_bitmap[j].queue_bitmap);
+ }
}
- dev_dbg(adev->dev, "mec queue bitmap weight=%d\n", bitmap_weight(adev->gfx.mec.queue_bitmap, AMDGPU_MAX_COMPUTE_QUEUES));
+ for (j = 0; j < num_xcc; j++) {
+ dev_dbg(adev->dev, "mec queue bitmap weight=%d\n",
+ bitmap_weight(adev->gfx.mec_bitmap[j].queue_bitmap, AMDGPU_MAX_COMPUTE_QUEUES));
+ }
}
void amdgpu_gfx_graphics_queue_acquire(struct amdgpu_device *adev)
{
- int i, queue, me;
-
- for (i = 0; i < AMDGPU_MAX_GFX_QUEUES; ++i) {
- queue = i % adev->gfx.me.num_queue_per_pipe;
- me = (i / adev->gfx.me.num_queue_per_pipe)
- / adev->gfx.me.num_pipe_per_me;
+ int i, queue, pipe;
+ bool multipipe_policy = amdgpu_gfx_is_graphics_multipipe_capable(adev);
+ int num_queue_per_pipe = 1; /* we only enable 1 KGQ per pipe */
+ int max_queues_per_me = adev->gfx.me.num_pipe_per_me * num_queue_per_pipe;
- if (me >= adev->gfx.me.num_me)
- break;
+ if (multipipe_policy) {
/* policy: amdgpu owns the first queue per pipe at this stage
* will extend to mulitple queues per pipe later */
- if (me == 0 && queue < 1)
+ for (i = 0; i < max_queues_per_me; i++) {
+ pipe = i % adev->gfx.me.num_pipe_per_me;
+ queue = (i / adev->gfx.me.num_pipe_per_me) %
+ num_queue_per_pipe;
+
+ set_bit(pipe * num_queue_per_pipe + queue,
+ adev->gfx.me.queue_bitmap);
+ }
+ } else {
+ for (i = 0; i < max_queues_per_me; ++i)
set_bit(i, adev->gfx.me.queue_bitmap);
}
/* update the number of active graphics rings */
- adev->gfx.num_gfx_rings =
- bitmap_weight(adev->gfx.me.queue_bitmap, AMDGPU_MAX_GFX_QUEUES);
+ if (adev->gfx.num_gfx_rings)
+ adev->gfx.num_gfx_rings =
+ bitmap_weight(adev->gfx.me.queue_bitmap, AMDGPU_MAX_GFX_QUEUES);
}
static int amdgpu_gfx_kiq_acquire(struct amdgpu_device *adev,
- struct amdgpu_ring *ring)
+ struct amdgpu_ring *ring, int xcc_id)
{
int queue_bit;
int mec, pipe, queue;
@@ -264,8 +276,8 @@ static int amdgpu_gfx_kiq_acquire(struct amdgpu_device *adev,
* adev->gfx.mec.num_pipe_per_mec
* adev->gfx.mec.num_queue_per_pipe;
- while (queue_bit-- >= 0) {
- if (test_bit(queue_bit, adev->gfx.mec.queue_bitmap))
+ while (--queue_bit >= 0) {
+ if (test_bit(queue_bit, adev->gfx.mec_bitmap[xcc_id].queue_bitmap))
continue;
amdgpu_queue_mask_bit_to_mec_queue(adev, queue_bit, &mec, &pipe, &queue);
@@ -289,11 +301,11 @@ static int amdgpu_gfx_kiq_acquire(struct amdgpu_device *adev,
return -EINVAL;
}
-int amdgpu_gfx_kiq_init_ring(struct amdgpu_device *adev,
- struct amdgpu_ring *ring,
- struct amdgpu_irq_src *irq)
+int amdgpu_gfx_kiq_init_ring(struct amdgpu_device *adev, int xcc_id)
{
- struct amdgpu_kiq *kiq = &adev->gfx.kiq;
+ struct amdgpu_kiq *kiq = &adev->gfx.kiq[xcc_id];
+ struct amdgpu_irq_src *irq = &kiq->irq;
+ struct amdgpu_ring *ring = &kiq->ring;
int r = 0;
spin_lock_init(&kiq->ring_lock);
@@ -301,18 +313,24 @@ int amdgpu_gfx_kiq_init_ring(struct amdgpu_device *adev,
ring->adev = NULL;
ring->ring_obj = NULL;
ring->use_doorbell = true;
- ring->doorbell_index = adev->doorbell_index.kiq;
-
- r = amdgpu_gfx_kiq_acquire(adev, ring);
+ ring->xcc_id = xcc_id;
+ ring->vm_hub = AMDGPU_GFXHUB(xcc_id);
+ ring->doorbell_index =
+ (adev->doorbell_index.kiq +
+ xcc_id * adev->doorbell_index.xcc_doorbell_range)
+ << 1;
+
+ r = amdgpu_gfx_kiq_acquire(adev, ring, xcc_id);
if (r)
return r;
ring->eop_gpu_addr = kiq->eop_gpu_addr;
ring->no_scheduler = true;
- sprintf(ring->name, "kiq_%d.%d.%d", ring->me, ring->pipe, ring->queue);
- r = amdgpu_ring_init(adev, ring, 1024,
- irq, AMDGPU_CP_KIQ_IRQ_DRIVER0,
- AMDGPU_RING_PRIO_DEFAULT);
+ snprintf(ring->name, sizeof(ring->name), "kiq_%hhu.%hhu.%hhu.%hhu",
+ (unsigned char)xcc_id, (unsigned char)ring->me,
+ (unsigned char)ring->pipe, (unsigned char)ring->queue);
+ r = amdgpu_ring_init(adev, ring, 1024, irq, AMDGPU_CP_KIQ_IRQ_DRIVER0,
+ AMDGPU_RING_PRIO_DEFAULT, NULL);
if (r)
dev_warn(adev->dev, "(%d) failed to init kiq ring\n", r);
@@ -324,19 +342,19 @@ void amdgpu_gfx_kiq_free_ring(struct amdgpu_ring *ring)
amdgpu_ring_fini(ring);
}
-void amdgpu_gfx_kiq_fini(struct amdgpu_device *adev)
+void amdgpu_gfx_kiq_fini(struct amdgpu_device *adev, int xcc_id)
{
- struct amdgpu_kiq *kiq = &adev->gfx.kiq;
+ struct amdgpu_kiq *kiq = &adev->gfx.kiq[xcc_id];
amdgpu_bo_free_kernel(&kiq->eop_obj, &kiq->eop_gpu_addr, NULL);
}
int amdgpu_gfx_kiq_init(struct amdgpu_device *adev,
- unsigned hpd_size)
+ unsigned int hpd_size, int xcc_id)
{
int r;
u32 *hpd;
- struct amdgpu_kiq *kiq = &adev->gfx.kiq;
+ struct amdgpu_kiq *kiq = &adev->gfx.kiq[xcc_id];
r = amdgpu_bo_create_kernel(adev, hpd_size, PAGE_SIZE,
AMDGPU_GEM_DOMAIN_GTT, &kiq->eop_obj,
@@ -359,31 +377,44 @@ int amdgpu_gfx_kiq_init(struct amdgpu_device *adev,
/* create MQD for each compute/gfx queue */
int amdgpu_gfx_mqd_sw_init(struct amdgpu_device *adev,
- unsigned mqd_size)
+ unsigned int mqd_size, int xcc_id)
{
- struct amdgpu_ring *ring = NULL;
- int r, i;
+ int r, i, j;
+ struct amdgpu_kiq *kiq = &adev->gfx.kiq[xcc_id];
+ struct amdgpu_ring *ring = &kiq->ring;
+ u32 domain = AMDGPU_GEM_DOMAIN_GTT;
+
+#if !defined(CONFIG_ARM) && !defined(CONFIG_ARM64)
+ /* Only enable on gfx10 and 11 for now to avoid changing behavior on older chips */
+ if (amdgpu_ip_version(adev, GC_HWIP, 0) >= IP_VERSION(10, 0, 0))
+ domain |= AMDGPU_GEM_DOMAIN_VRAM;
+#endif
/* create MQD for KIQ */
- ring = &adev->gfx.kiq.ring;
- if (!ring->mqd_obj) {
+ if (!adev->enable_mes_kiq && !ring->mqd_obj) {
/* originaly the KIQ MQD is put in GTT domain, but for SRIOV VRAM domain is a must
* otherwise hypervisor trigger SAVE_VF fail after driver unloaded which mean MQD
* deallocated and gart_unbind, to strict diverage we decide to use VRAM domain for
* KIQ MQD no matter SRIOV or Bare-metal
*/
r = amdgpu_bo_create_kernel(adev, mqd_size, PAGE_SIZE,
- AMDGPU_GEM_DOMAIN_VRAM, &ring->mqd_obj,
- &ring->mqd_gpu_addr, &ring->mqd_ptr);
+ AMDGPU_GEM_DOMAIN_VRAM |
+ AMDGPU_GEM_DOMAIN_GTT,
+ &ring->mqd_obj,
+ &ring->mqd_gpu_addr,
+ &ring->mqd_ptr);
if (r) {
dev_warn(adev->dev, "failed to create ring mqd ob (%d)", r);
return r;
}
/* prepare MQD backup */
- adev->gfx.mec.mqd_backup[AMDGPU_MAX_COMPUTE_RINGS] = kmalloc(mqd_size, GFP_KERNEL);
- if (!adev->gfx.mec.mqd_backup[AMDGPU_MAX_COMPUTE_RINGS])
- dev_warn(adev->dev, "no memory to create MQD backup for ring %s\n", ring->name);
+ kiq->mqd_backup = kzalloc(mqd_size, GFP_KERNEL);
+ if (!kiq->mqd_backup) {
+ dev_warn(adev->dev,
+ "no memory to create MQD backup for ring %s\n", ring->name);
+ return -ENOMEM;
+ }
}
if (adev->asic_type >= CHIP_NAVI10 && amdgpu_async_gfx_ring) {
@@ -392,47 +423,55 @@ int amdgpu_gfx_mqd_sw_init(struct amdgpu_device *adev,
ring = &adev->gfx.gfx_ring[i];
if (!ring->mqd_obj) {
r = amdgpu_bo_create_kernel(adev, mqd_size, PAGE_SIZE,
- AMDGPU_GEM_DOMAIN_GTT, &ring->mqd_obj,
+ domain, &ring->mqd_obj,
&ring->mqd_gpu_addr, &ring->mqd_ptr);
if (r) {
dev_warn(adev->dev, "failed to create ring mqd bo (%d)", r);
return r;
}
+ ring->mqd_size = mqd_size;
/* prepare MQD backup */
- adev->gfx.me.mqd_backup[i] = kmalloc(mqd_size, GFP_KERNEL);
- if (!adev->gfx.me.mqd_backup[i])
+ adev->gfx.me.mqd_backup[i] = kzalloc(mqd_size, GFP_KERNEL);
+ if (!adev->gfx.me.mqd_backup[i]) {
dev_warn(adev->dev, "no memory to create MQD backup for ring %s\n", ring->name);
+ return -ENOMEM;
+ }
}
}
}
/* create MQD for each KCQ */
for (i = 0; i < adev->gfx.num_compute_rings; i++) {
- ring = &adev->gfx.compute_ring[i];
+ j = i + xcc_id * adev->gfx.num_compute_rings;
+ ring = &adev->gfx.compute_ring[j];
if (!ring->mqd_obj) {
r = amdgpu_bo_create_kernel(adev, mqd_size, PAGE_SIZE,
- AMDGPU_GEM_DOMAIN_GTT, &ring->mqd_obj,
+ domain, &ring->mqd_obj,
&ring->mqd_gpu_addr, &ring->mqd_ptr);
if (r) {
dev_warn(adev->dev, "failed to create ring mqd bo (%d)", r);
return r;
}
+ ring->mqd_size = mqd_size;
/* prepare MQD backup */
- adev->gfx.mec.mqd_backup[i] = kmalloc(mqd_size, GFP_KERNEL);
- if (!adev->gfx.mec.mqd_backup[i])
+ adev->gfx.mec.mqd_backup[j] = kzalloc(mqd_size, GFP_KERNEL);
+ if (!adev->gfx.mec.mqd_backup[j]) {
dev_warn(adev->dev, "no memory to create MQD backup for ring %s\n", ring->name);
+ return -ENOMEM;
+ }
}
}
return 0;
}
-void amdgpu_gfx_mqd_sw_fini(struct amdgpu_device *adev)
+void amdgpu_gfx_mqd_sw_fini(struct amdgpu_device *adev, int xcc_id)
{
struct amdgpu_ring *ring = NULL;
- int i;
+ int i, j;
+ struct amdgpu_kiq *kiq = &adev->gfx.kiq[xcc_id];
if (adev->asic_type >= CHIP_NAVI10 && amdgpu_async_gfx_ring) {
for (i = 0; i < adev->gfx.num_gfx_rings; i++) {
@@ -445,38 +484,123 @@ void amdgpu_gfx_mqd_sw_fini(struct amdgpu_device *adev)
}
for (i = 0; i < adev->gfx.num_compute_rings; i++) {
- ring = &adev->gfx.compute_ring[i];
- kfree(adev->gfx.mec.mqd_backup[i]);
+ j = i + xcc_id * adev->gfx.num_compute_rings;
+ ring = &adev->gfx.compute_ring[j];
+ kfree(adev->gfx.mec.mqd_backup[j]);
amdgpu_bo_free_kernel(&ring->mqd_obj,
&ring->mqd_gpu_addr,
&ring->mqd_ptr);
}
- ring = &adev->gfx.kiq.ring;
- kfree(adev->gfx.mec.mqd_backup[AMDGPU_MAX_COMPUTE_RINGS]);
+ ring = &kiq->ring;
+ kfree(kiq->mqd_backup);
amdgpu_bo_free_kernel(&ring->mqd_obj,
&ring->mqd_gpu_addr,
&ring->mqd_ptr);
}
-int amdgpu_gfx_disable_kcq(struct amdgpu_device *adev)
+int amdgpu_gfx_disable_kcq(struct amdgpu_device *adev, int xcc_id)
{
- struct amdgpu_kiq *kiq = &adev->gfx.kiq;
+ struct amdgpu_kiq *kiq = &adev->gfx.kiq[xcc_id];
struct amdgpu_ring *kiq_ring = &kiq->ring;
- int i;
+ int i, r = 0;
+ int j;
+
+ if (adev->enable_mes) {
+ for (i = 0; i < adev->gfx.num_compute_rings; i++) {
+ j = i + xcc_id * adev->gfx.num_compute_rings;
+ amdgpu_mes_unmap_legacy_queue(adev,
+ &adev->gfx.compute_ring[j],
+ RESET_QUEUES, 0, 0);
+ }
+ return 0;
+ }
if (!kiq->pmf || !kiq->pmf->kiq_unmap_queues)
return -EINVAL;
+ if (!kiq_ring->sched.ready || amdgpu_in_reset(adev))
+ return 0;
+
+ spin_lock(&kiq->ring_lock);
if (amdgpu_ring_alloc(kiq_ring, kiq->pmf->unmap_queues_size *
- adev->gfx.num_compute_rings))
+ adev->gfx.num_compute_rings)) {
+ spin_unlock(&kiq->ring_lock);
return -ENOMEM;
+ }
- for (i = 0; i < adev->gfx.num_compute_rings; i++)
- kiq->pmf->kiq_unmap_queues(kiq_ring, &adev->gfx.compute_ring[i],
+ for (i = 0; i < adev->gfx.num_compute_rings; i++) {
+ j = i + xcc_id * adev->gfx.num_compute_rings;
+ kiq->pmf->kiq_unmap_queues(kiq_ring,
+ &adev->gfx.compute_ring[j],
RESET_QUEUES, 0, 0);
+ }
+ /* Submit unmap queue packet */
+ amdgpu_ring_commit(kiq_ring);
+ /*
+ * Ring test will do a basic scratch register change check. Just run
+ * this to ensure that unmap queues that is submitted before got
+ * processed successfully before returning.
+ */
+ r = amdgpu_ring_test_helper(kiq_ring);
+
+ spin_unlock(&kiq->ring_lock);
+
+ return r;
+}
+
+int amdgpu_gfx_disable_kgq(struct amdgpu_device *adev, int xcc_id)
+{
+ struct amdgpu_kiq *kiq = &adev->gfx.kiq[xcc_id];
+ struct amdgpu_ring *kiq_ring = &kiq->ring;
+ int i, r = 0;
+ int j;
+
+ if (adev->enable_mes) {
+ if (amdgpu_gfx_is_master_xcc(adev, xcc_id)) {
+ for (i = 0; i < adev->gfx.num_gfx_rings; i++) {
+ j = i + xcc_id * adev->gfx.num_gfx_rings;
+ amdgpu_mes_unmap_legacy_queue(adev,
+ &adev->gfx.gfx_ring[j],
+ PREEMPT_QUEUES, 0, 0);
+ }
+ }
+ return 0;
+ }
+
+ if (!kiq->pmf || !kiq->pmf->kiq_unmap_queues)
+ return -EINVAL;
+
+ if (!adev->gfx.kiq[0].ring.sched.ready || amdgpu_in_reset(adev))
+ return 0;
+
+ if (amdgpu_gfx_is_master_xcc(adev, xcc_id)) {
+ spin_lock(&kiq->ring_lock);
+ if (amdgpu_ring_alloc(kiq_ring, kiq->pmf->unmap_queues_size *
+ adev->gfx.num_gfx_rings)) {
+ spin_unlock(&kiq->ring_lock);
+ return -ENOMEM;
+ }
+
+ for (i = 0; i < adev->gfx.num_gfx_rings; i++) {
+ j = i + xcc_id * adev->gfx.num_gfx_rings;
+ kiq->pmf->kiq_unmap_queues(kiq_ring,
+ &adev->gfx.gfx_ring[j],
+ PREEMPT_QUEUES, 0, 0);
+ }
+ /* Submit unmap queue packet */
+ amdgpu_ring_commit(kiq_ring);
+
+ /*
+ * Ring test will do a basic scratch register change check.
+ * Just run this to ensure that unmap queues that is submitted
+ * before got processed successfully before returning.
+ */
+ r = amdgpu_ring_test_helper(kiq_ring);
+ spin_unlock(&kiq->ring_lock);
+ }
- return amdgpu_ring_test_helper(kiq_ring);
+ return r;
}
int amdgpu_queue_mask_bit_to_set_resource_bit(struct amdgpu_device *adev,
@@ -492,90 +616,294 @@ int amdgpu_queue_mask_bit_to_set_resource_bit(struct amdgpu_device *adev,
return set_resource_bit;
}
-int amdgpu_gfx_enable_kcq(struct amdgpu_device *adev)
+static int amdgpu_gfx_mes_enable_kcq(struct amdgpu_device *adev, int xcc_id)
{
- struct amdgpu_kiq *kiq = &adev->gfx.kiq;
- struct amdgpu_ring *kiq_ring = &adev->gfx.kiq.ring;
+ struct amdgpu_kiq *kiq = &adev->gfx.kiq[xcc_id];
+ struct amdgpu_ring *kiq_ring = &kiq->ring;
+ uint64_t queue_mask = ~0ULL;
+ int r, i, j;
+
+ amdgpu_device_flush_hdp(adev, NULL);
+
+ if (!adev->enable_uni_mes) {
+ spin_lock(&kiq->ring_lock);
+ r = amdgpu_ring_alloc(kiq_ring, kiq->pmf->set_resources_size);
+ if (r) {
+ dev_err(adev->dev, "Failed to lock KIQ (%d).\n", r);
+ spin_unlock(&kiq->ring_lock);
+ return r;
+ }
+
+ kiq->pmf->kiq_set_resources(kiq_ring, queue_mask);
+ r = amdgpu_ring_test_helper(kiq_ring);
+ spin_unlock(&kiq->ring_lock);
+ if (r)
+ dev_err(adev->dev, "KIQ failed to set resources\n");
+ }
+
+ for (i = 0; i < adev->gfx.num_compute_rings; i++) {
+ j = i + xcc_id * adev->gfx.num_compute_rings;
+ r = amdgpu_mes_map_legacy_queue(adev,
+ &adev->gfx.compute_ring[j]);
+ if (r) {
+ dev_err(adev->dev, "failed to map compute queue\n");
+ return r;
+ }
+ }
+
+ return 0;
+}
+
+int amdgpu_gfx_enable_kcq(struct amdgpu_device *adev, int xcc_id)
+{
+ struct amdgpu_kiq *kiq = &adev->gfx.kiq[xcc_id];
+ struct amdgpu_ring *kiq_ring = &kiq->ring;
uint64_t queue_mask = 0;
- int r, i;
+ int r, i, j;
+
+ if (adev->mes.enable_legacy_queue_map)
+ return amdgpu_gfx_mes_enable_kcq(adev, xcc_id);
if (!kiq->pmf || !kiq->pmf->kiq_map_queues || !kiq->pmf->kiq_set_resources)
return -EINVAL;
for (i = 0; i < AMDGPU_MAX_COMPUTE_QUEUES; ++i) {
- if (!test_bit(i, adev->gfx.mec.queue_bitmap))
+ if (!test_bit(i, adev->gfx.mec_bitmap[xcc_id].queue_bitmap))
continue;
/* This situation may be hit in the future if a new HW
* generation exposes more than 64 queues. If so, the
* definition of queue_mask needs updating */
if (WARN_ON(i > (sizeof(queue_mask)*8))) {
- DRM_ERROR("Invalid KCQ enabled: %d\n", i);
+ dev_err(adev->dev, "Invalid KCQ enabled: %d\n", i);
break;
}
queue_mask |= (1ull << amdgpu_queue_mask_bit_to_set_resource_bit(adev, i));
}
- DRM_INFO("kiq ring mec %d pipe %d q %d\n", kiq_ring->me, kiq_ring->pipe,
- kiq_ring->queue);
+ amdgpu_device_flush_hdp(adev, NULL);
+
+ dev_info(adev->dev, "kiq ring mec %d pipe %d q %d\n", kiq_ring->me,
+ kiq_ring->pipe, kiq_ring->queue);
+ spin_lock(&kiq->ring_lock);
r = amdgpu_ring_alloc(kiq_ring, kiq->pmf->map_queues_size *
adev->gfx.num_compute_rings +
kiq->pmf->set_resources_size);
if (r) {
- DRM_ERROR("Failed to lock KIQ (%d).\n", r);
+ dev_err(adev->dev, "Failed to lock KIQ (%d).\n", r);
+ spin_unlock(&kiq->ring_lock);
return r;
}
kiq->pmf->kiq_set_resources(kiq_ring, queue_mask);
- for (i = 0; i < adev->gfx.num_compute_rings; i++)
- kiq->pmf->kiq_map_queues(kiq_ring, &adev->gfx.compute_ring[i]);
+ for (i = 0; i < adev->gfx.num_compute_rings; i++) {
+ j = i + xcc_id * adev->gfx.num_compute_rings;
+ kiq->pmf->kiq_map_queues(kiq_ring,
+ &adev->gfx.compute_ring[j]);
+ }
+ /* Submit map queue packet */
+ amdgpu_ring_commit(kiq_ring);
+ /*
+ * Ring test will do a basic scratch register change check. Just run
+ * this to ensure that map queues that is submitted before got
+ * processed successfully before returning.
+ */
+ r = amdgpu_ring_test_helper(kiq_ring);
+ spin_unlock(&kiq->ring_lock);
+ if (r)
+ dev_err(adev->dev, "KCQ enable failed\n");
+
+ return r;
+}
+
+int amdgpu_gfx_enable_kgq(struct amdgpu_device *adev, int xcc_id)
+{
+ struct amdgpu_kiq *kiq = &adev->gfx.kiq[xcc_id];
+ struct amdgpu_ring *kiq_ring = &kiq->ring;
+ int r, i, j;
+
+ if (!kiq->pmf || !kiq->pmf->kiq_map_queues)
+ return -EINVAL;
+
+ amdgpu_device_flush_hdp(adev, NULL);
+ if (adev->mes.enable_legacy_queue_map) {
+ for (i = 0; i < adev->gfx.num_gfx_rings; i++) {
+ j = i + xcc_id * adev->gfx.num_gfx_rings;
+ r = amdgpu_mes_map_legacy_queue(adev,
+ &adev->gfx.gfx_ring[j]);
+ if (r) {
+ dev_err(adev->dev, "failed to map gfx queue\n");
+ return r;
+ }
+ }
+
+ return 0;
+ }
+
+ spin_lock(&kiq->ring_lock);
+ /* No need to map kcq on the slave */
+ if (amdgpu_gfx_is_master_xcc(adev, xcc_id)) {
+ r = amdgpu_ring_alloc(kiq_ring, kiq->pmf->map_queues_size *
+ adev->gfx.num_gfx_rings);
+ if (r) {
+ dev_err(adev->dev, "Failed to lock KIQ (%d).\n", r);
+ spin_unlock(&kiq->ring_lock);
+ return r;
+ }
+
+ for (i = 0; i < adev->gfx.num_gfx_rings; i++) {
+ j = i + xcc_id * adev->gfx.num_gfx_rings;
+ kiq->pmf->kiq_map_queues(kiq_ring,
+ &adev->gfx.gfx_ring[j]);
+ }
+ }
+ /* Submit map queue packet */
+ amdgpu_ring_commit(kiq_ring);
+ /*
+ * Ring test will do a basic scratch register change check. Just run
+ * this to ensure that map queues that is submitted before got
+ * processed successfully before returning.
+ */
r = amdgpu_ring_test_helper(kiq_ring);
+ spin_unlock(&kiq->ring_lock);
if (r)
- DRM_ERROR("KCQ enable failed\n");
+ dev_err(adev->dev, "KGQ enable failed\n");
return r;
}
+static void amdgpu_gfx_do_off_ctrl(struct amdgpu_device *adev, bool enable,
+ bool no_delay)
+{
+ unsigned long delay = GFX_OFF_DELAY_ENABLE;
+
+ if (!(adev->pm.pp_feature & PP_GFXOFF_MASK))
+ return;
+
+ mutex_lock(&adev->gfx.gfx_off_mutex);
+
+ if (enable) {
+ /* If the count is already 0, it means there's an imbalance bug somewhere.
+ * Note that the bug may be in a different caller than the one which triggers the
+ * WARN_ON_ONCE.
+ */
+ if (WARN_ON_ONCE(adev->gfx.gfx_off_req_count == 0))
+ goto unlock;
+
+ adev->gfx.gfx_off_req_count--;
+
+ if (adev->gfx.gfx_off_req_count == 0 &&
+ !adev->gfx.gfx_off_state) {
+ /* If going to s2idle, no need to wait */
+ if (no_delay) {
+ if (!amdgpu_dpm_set_powergating_by_smu(adev,
+ AMD_IP_BLOCK_TYPE_GFX, true, 0))
+ adev->gfx.gfx_off_state = true;
+ } else {
+ schedule_delayed_work(&adev->gfx.gfx_off_delay_work,
+ delay);
+ }
+ }
+ } else {
+ if (adev->gfx.gfx_off_req_count == 0) {
+ cancel_delayed_work_sync(&adev->gfx.gfx_off_delay_work);
+
+ if (adev->gfx.gfx_off_state &&
+ !amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_GFX, false, 0)) {
+ adev->gfx.gfx_off_state = false;
+
+ if (adev->gfx.funcs->init_spm_golden) {
+ dev_dbg(adev->dev,
+ "GFXOFF is disabled, re-init SPM golden settings\n");
+ amdgpu_gfx_init_spm_golden(adev);
+ }
+ }
+ }
+
+ adev->gfx.gfx_off_req_count++;
+ }
+
+unlock:
+ mutex_unlock(&adev->gfx.gfx_off_mutex);
+}
+
/* amdgpu_gfx_off_ctrl - Handle gfx off feature enable/disable
*
* @adev: amdgpu_device pointer
* @bool enable true: enable gfx off feature, false: disable gfx off feature
*
- * 1. gfx off feature will be enabled by gfx ip after gfx cg gp enabled.
+ * 1. gfx off feature will be enabled by gfx ip after gfx cg pg enabled.
* 2. other client can send request to disable gfx off feature, the request should be honored.
* 3. other client can cancel their request of disable gfx off feature
* 4. other client should not send request to enable gfx off feature before disable gfx off feature.
+ *
+ * gfx off allow will be delayed by GFX_OFF_DELAY_ENABLE ms.
*/
-
void amdgpu_gfx_off_ctrl(struct amdgpu_device *adev, bool enable)
{
- if (!(adev->pm.pp_feature & PP_GFXOFF_MASK))
- return;
+ /* If going to s2idle, no need to wait */
+ bool no_delay = adev->in_s0ix ? true : false;
+
+ amdgpu_gfx_do_off_ctrl(adev, enable, no_delay);
+}
+
+/* amdgpu_gfx_off_ctrl_immediate - Handle gfx off feature enable/disable
+ *
+ * @adev: amdgpu_device pointer
+ * @bool enable true: enable gfx off feature, false: disable gfx off feature
+ *
+ * 1. gfx off feature will be enabled by gfx ip after gfx cg pg enabled.
+ * 2. other client can send request to disable gfx off feature, the request should be honored.
+ * 3. other client can cancel their request of disable gfx off feature
+ * 4. other client should not send request to enable gfx off feature before disable gfx off feature.
+ *
+ * gfx off allow will be issued immediately.
+ */
+void amdgpu_gfx_off_ctrl_immediate(struct amdgpu_device *adev, bool enable)
+{
+ amdgpu_gfx_do_off_ctrl(adev, enable, true);
+}
+
+int amdgpu_set_gfx_off_residency(struct amdgpu_device *adev, bool value)
+{
+ int r = 0;
mutex_lock(&adev->gfx.gfx_off_mutex);
- if (!enable)
- adev->gfx.gfx_off_req_count++;
- else if (adev->gfx.gfx_off_req_count > 0)
- adev->gfx.gfx_off_req_count--;
+ r = amdgpu_dpm_set_residency_gfxoff(adev, value);
- if (enable && !adev->gfx.gfx_off_state && !adev->gfx.gfx_off_req_count) {
- schedule_delayed_work(&adev->gfx.gfx_off_delay_work, GFX_OFF_DELAY_ENABLE);
- } else if (!enable && adev->gfx.gfx_off_state) {
- if (!amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_GFX, false)) {
- adev->gfx.gfx_off_state = false;
+ mutex_unlock(&adev->gfx.gfx_off_mutex);
- if (adev->gfx.funcs->init_spm_golden) {
- dev_dbg(adev->dev, "GFXOFF is disabled, re-init SPM golden settings\n");
- amdgpu_gfx_init_spm_golden(adev);
- }
- }
- }
+ return r;
+}
+
+int amdgpu_get_gfx_off_residency(struct amdgpu_device *adev, u32 *value)
+{
+ int r = 0;
+
+ mutex_lock(&adev->gfx.gfx_off_mutex);
+
+ r = amdgpu_dpm_get_residency_gfxoff(adev, value);
mutex_unlock(&adev->gfx.gfx_off_mutex);
+
+ return r;
+}
+
+int amdgpu_get_gfx_off_entrycount(struct amdgpu_device *adev, u64 *value)
+{
+ int r = 0;
+
+ mutex_lock(&adev->gfx.gfx_off_mutex);
+
+ r = amdgpu_dpm_get_entrycount_gfxoff(adev, value);
+
+ mutex_unlock(&adev->gfx.gfx_off_mutex);
+
+ return r;
}
int amdgpu_get_gfx_off_status(struct amdgpu_device *adev, uint32_t *value)
@@ -585,71 +913,88 @@ int amdgpu_get_gfx_off_status(struct amdgpu_device *adev, uint32_t *value)
mutex_lock(&adev->gfx.gfx_off_mutex);
- r = smu_get_status_gfxoff(adev, value);
+ r = amdgpu_dpm_get_status_gfxoff(adev, value);
mutex_unlock(&adev->gfx.gfx_off_mutex);
return r;
}
-int amdgpu_gfx_ras_late_init(struct amdgpu_device *adev)
+int amdgpu_gfx_ras_late_init(struct amdgpu_device *adev, struct ras_common_if *ras_block)
{
int r;
- struct ras_fs_if fs_info = {
- .sysfs_name = "gfx_err_count",
- };
- struct ras_ih_if ih_info = {
- .cb = amdgpu_gfx_process_ras_data_cb,
- };
- if (!adev->gfx.ras_if) {
- adev->gfx.ras_if = kmalloc(sizeof(struct ras_common_if), GFP_KERNEL);
- if (!adev->gfx.ras_if)
- return -ENOMEM;
- adev->gfx.ras_if->block = AMDGPU_RAS_BLOCK__GFX;
- adev->gfx.ras_if->type = AMDGPU_RAS_ERROR__MULTI_UNCORRECTABLE;
- adev->gfx.ras_if->sub_block_index = 0;
- strcpy(adev->gfx.ras_if->name, "gfx");
- }
- fs_info.head = ih_info.head = *adev->gfx.ras_if;
-
- r = amdgpu_ras_late_init(adev, adev->gfx.ras_if,
- &fs_info, &ih_info);
- if (r)
- goto free;
+ if (amdgpu_ras_is_supported(adev, ras_block->block)) {
+ if (!amdgpu_persistent_edc_harvesting_supported(adev)) {
+ r = amdgpu_ras_reset_error_status(adev, AMDGPU_RAS_BLOCK__GFX);
+ if (r)
+ return r;
+ }
- if (amdgpu_ras_is_supported(adev, adev->gfx.ras_if->block)) {
- r = amdgpu_irq_get(adev, &adev->gfx.cp_ecc_error_irq, 0);
+ r = amdgpu_ras_block_late_init(adev, ras_block);
if (r)
- goto late_fini;
+ return r;
+
+ if (amdgpu_sriov_vf(adev))
+ return r;
+
+ if (adev->gfx.cp_ecc_error_irq.funcs) {
+ r = amdgpu_irq_get(adev, &adev->gfx.cp_ecc_error_irq, 0);
+ if (r)
+ goto late_fini;
+ }
} else {
- /* free gfx ras_if if ras is not supported */
- r = 0;
- goto free;
+ amdgpu_ras_feature_enable_on_boot(adev, ras_block, 0);
}
return 0;
late_fini:
- amdgpu_ras_late_fini(adev, adev->gfx.ras_if, &ih_info);
-free:
- kfree(adev->gfx.ras_if);
- adev->gfx.ras_if = NULL;
+ amdgpu_ras_block_late_fini(adev, ras_block);
return r;
}
-void amdgpu_gfx_ras_fini(struct amdgpu_device *adev)
+int amdgpu_gfx_ras_sw_init(struct amdgpu_device *adev)
{
- if (amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX) &&
- adev->gfx.ras_if) {
- struct ras_common_if *ras_if = adev->gfx.ras_if;
- struct ras_ih_if ih_info = {
- .head = *ras_if,
- .cb = amdgpu_gfx_process_ras_data_cb,
- };
+ int err = 0;
+ struct amdgpu_gfx_ras *ras = NULL;
- amdgpu_ras_late_fini(adev, ras_if, &ih_info);
- kfree(ras_if);
+ /* adev->gfx.ras is NULL, which means gfx does not
+ * support ras function, then do nothing here.
+ */
+ if (!adev->gfx.ras)
+ return 0;
+
+ ras = adev->gfx.ras;
+
+ err = amdgpu_ras_register_ras_block(adev, &ras->ras_block);
+ if (err) {
+ dev_err(adev->dev, "Failed to register gfx ras block!\n");
+ return err;
}
+
+ strcpy(ras->ras_block.ras_comm.name, "gfx");
+ ras->ras_block.ras_comm.block = AMDGPU_RAS_BLOCK__GFX;
+ ras->ras_block.ras_comm.type = AMDGPU_RAS_ERROR__MULTI_UNCORRECTABLE;
+ adev->gfx.ras_if = &ras->ras_block.ras_comm;
+
+ /* If not define special ras_late_init function, use gfx default ras_late_init */
+ if (!ras->ras_block.ras_late_init)
+ ras->ras_block.ras_late_init = amdgpu_gfx_ras_late_init;
+
+ /* If not defined special ras_cb function, use default ras_cb */
+ if (!ras->ras_block.ras_cb)
+ ras->ras_block.ras_cb = amdgpu_gfx_process_ras_data_cb;
+
+ return 0;
+}
+
+int amdgpu_gfx_poison_consumption_handler(struct amdgpu_device *adev,
+ struct amdgpu_iv_entry *entry)
+{
+ if (adev->gfx.ras && adev->gfx.ras->poison_consumption_handler)
+ return adev->gfx.ras->poison_consumption_handler(adev, entry);
+
+ return 0;
}
int amdgpu_gfx_process_ras_data_cb(struct amdgpu_device *adev,
@@ -664,8 +1009,9 @@ int amdgpu_gfx_process_ras_data_cb(struct amdgpu_device *adev,
*/
if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX)) {
kgd2kfd_set_sram_ecc_flag(adev->kfd.dev);
- if (adev->gfx.funcs->query_ras_error_count)
- adev->gfx.funcs->query_ras_error_count(adev, err_data);
+ if (adev->gfx.ras && adev->gfx.ras->ras_block.hw_ops &&
+ adev->gfx.ras->ras_block.hw_ops->query_ras_error_count)
+ adev->gfx.ras->ras_block.hw_ops->query_ras_error_count(adev, err_data);
amdgpu_ras_reset_gpu(adev);
}
return AMDGPU_RAS_SUCCESS;
@@ -685,22 +1031,44 @@ int amdgpu_gfx_cp_ecc_error_irq(struct amdgpu_device *adev,
ih_data.head = *ras_if;
- DRM_ERROR("CP ECC ERROR IRQ\n");
+ dev_err(adev->dev, "CP ECC ERROR IRQ\n");
amdgpu_ras_interrupt_dispatch(adev, &ih_data);
return 0;
}
-uint32_t amdgpu_kiq_rreg(struct amdgpu_device *adev, uint32_t reg)
+void amdgpu_gfx_ras_error_func(struct amdgpu_device *adev,
+ void *ras_error_status,
+ void (*func)(struct amdgpu_device *adev, void *ras_error_status,
+ int xcc_id))
+{
+ int i;
+ int num_xcc = adev->gfx.xcc_mask ? NUM_XCC(adev->gfx.xcc_mask) : 1;
+ uint32_t xcc_mask = GENMASK(num_xcc - 1, 0);
+ struct ras_err_data *err_data = (struct ras_err_data *)ras_error_status;
+
+ if (err_data) {
+ err_data->ue_count = 0;
+ err_data->ce_count = 0;
+ }
+
+ for_each_inst(i, xcc_mask)
+ func(adev, ras_error_status, i);
+}
+
+uint32_t amdgpu_kiq_rreg(struct amdgpu_device *adev, uint32_t reg, uint32_t xcc_id)
{
signed long r, cnt = 0;
unsigned long flags;
uint32_t seq, reg_val_offs = 0, value = 0;
- struct amdgpu_kiq *kiq = &adev->gfx.kiq;
+ struct amdgpu_kiq *kiq = &adev->gfx.kiq[xcc_id];
struct amdgpu_ring *ring = &kiq->ring;
- if (adev->in_pci_err_recovery)
+ if (amdgpu_device_skip_hw_access(adev))
return 0;
+ if (adev->mes.ring[0].sched.ready)
+ return amdgpu_mes_rreg(adev, reg);
+
BUG_ON(!ring->funcs->emit_rreg);
spin_lock_irqsave(&kiq->ring_lock, flags);
@@ -708,7 +1076,10 @@ uint32_t amdgpu_kiq_rreg(struct amdgpu_device *adev, uint32_t reg)
pr_err("critical bug! too many kiq readers\n");
goto failed_unlock;
}
- amdgpu_ring_alloc(ring, 32);
+ r = amdgpu_ring_alloc(ring, 32);
+ if (r)
+ goto failed_unlock;
+
amdgpu_ring_emit_rreg(ring, reg, reg_val_offs);
r = amdgpu_fence_emit_polling(ring, &seq, MAX_KIQ_REG_WAIT);
if (r)
@@ -732,6 +1103,9 @@ uint32_t amdgpu_kiq_rreg(struct amdgpu_device *adev, uint32_t reg)
might_sleep();
while (r < 1 && cnt++ < MAX_KIQ_REG_TRY) {
+ if (amdgpu_in_reset(adev))
+ goto failed_kiq_read;
+
msleep(MAX_KIQ_REG_BAILOUT_INTERVAL);
r = amdgpu_fence_wait_polling(ring, seq, MAX_KIQ_REG_WAIT);
}
@@ -755,21 +1129,29 @@ failed_kiq_read:
return ~0;
}
-void amdgpu_kiq_wreg(struct amdgpu_device *adev, uint32_t reg, uint32_t v)
+void amdgpu_kiq_wreg(struct amdgpu_device *adev, uint32_t reg, uint32_t v, uint32_t xcc_id)
{
signed long r, cnt = 0;
unsigned long flags;
uint32_t seq;
- struct amdgpu_kiq *kiq = &adev->gfx.kiq;
+ struct amdgpu_kiq *kiq = &adev->gfx.kiq[xcc_id];
struct amdgpu_ring *ring = &kiq->ring;
BUG_ON(!ring->funcs->emit_wreg);
- if (adev->in_pci_err_recovery)
+ if (amdgpu_device_skip_hw_access(adev))
+ return;
+
+ if (adev->mes.ring[0].sched.ready) {
+ amdgpu_mes_wreg(adev, reg, v);
return;
+ }
spin_lock_irqsave(&kiq->ring_lock, flags);
- amdgpu_ring_alloc(ring, 32);
+ r = amdgpu_ring_alloc(ring, 32);
+ if (r)
+ goto failed_unlock;
+
amdgpu_ring_emit_wreg(ring, reg, v);
r = amdgpu_fence_emit_polling(ring, &seq, MAX_KIQ_REG_WAIT);
if (r)
@@ -793,6 +1175,8 @@ void amdgpu_kiq_wreg(struct amdgpu_device *adev, uint32_t reg, uint32_t v)
might_sleep();
while (r < 1 && cnt++ < MAX_KIQ_REG_TRY) {
+ if (amdgpu_in_reset(adev))
+ goto failed_kiq_write;
msleep(MAX_KIQ_REG_BAILOUT_INTERVAL);
r = amdgpu_fence_wait_polling(ring, seq, MAX_KIQ_REG_WAIT);
@@ -805,11 +1189,81 @@ void amdgpu_kiq_wreg(struct amdgpu_device *adev, uint32_t reg, uint32_t v)
failed_undo:
amdgpu_ring_undo(ring);
+failed_unlock:
spin_unlock_irqrestore(&kiq->ring_lock, flags);
failed_kiq_write:
dev_err(adev->dev, "failed to write reg:%x\n", reg);
}
+int amdgpu_kiq_hdp_flush(struct amdgpu_device *adev)
+{
+ signed long r, cnt = 0;
+ unsigned long flags;
+ uint32_t seq;
+ struct amdgpu_kiq *kiq = &adev->gfx.kiq[0];
+ struct amdgpu_ring *ring = &kiq->ring;
+
+ if (amdgpu_device_skip_hw_access(adev))
+ return 0;
+
+ if (adev->enable_mes_kiq && adev->mes.ring[0].sched.ready)
+ return amdgpu_mes_hdp_flush(adev);
+
+ if (!ring->funcs->emit_hdp_flush) {
+ return -EOPNOTSUPP;
+ }
+
+ spin_lock_irqsave(&kiq->ring_lock, flags);
+ r = amdgpu_ring_alloc(ring, 32);
+ if (r)
+ goto failed_unlock;
+
+ amdgpu_ring_emit_hdp_flush(ring);
+ r = amdgpu_fence_emit_polling(ring, &seq, MAX_KIQ_REG_WAIT);
+ if (r)
+ goto failed_undo;
+
+ amdgpu_ring_commit(ring);
+ spin_unlock_irqrestore(&kiq->ring_lock, flags);
+
+ r = amdgpu_fence_wait_polling(ring, seq, MAX_KIQ_REG_WAIT);
+
+ /* don't wait anymore for gpu reset case because this way may
+ * block gpu_recover() routine forever, e.g. this virt_kiq_rreg
+ * is triggered in TTM and ttm_bo_lock_delayed_workqueue() will
+ * never return if we keep waiting in virt_kiq_rreg, which cause
+ * gpu_recover() hang there.
+ *
+ * also don't wait anymore for IRQ context
+ * */
+ if (r < 1 && (amdgpu_in_reset(adev) || in_interrupt()))
+ goto failed_kiq_hdp_flush;
+
+ might_sleep();
+ while (r < 1 && cnt++ < MAX_KIQ_REG_TRY) {
+ if (amdgpu_in_reset(adev))
+ goto failed_kiq_hdp_flush;
+
+ msleep(MAX_KIQ_REG_BAILOUT_INTERVAL);
+ r = amdgpu_fence_wait_polling(ring, seq, MAX_KIQ_REG_WAIT);
+ }
+
+ if (cnt > MAX_KIQ_REG_TRY) {
+ dev_err(adev->dev, "failed to flush HDP via KIQ timeout\n");
+ return -ETIMEDOUT;
+ }
+
+ return 0;
+
+failed_undo:
+ amdgpu_ring_undo(ring);
+failed_unlock:
+ spin_unlock_irqrestore(&kiq->ring_lock, flags);
+failed_kiq_hdp_flush:
+ dev_err(adev->dev, "failed to flush HDP via KIQ\n");
+ return r < 0 ? r : -EIO;
+}
+
int amdgpu_gfx_get_num_kcq(struct amdgpu_device *adev)
{
if (amdgpu_num_kcq == -1) {
@@ -821,22 +1275,1283 @@ int amdgpu_gfx_get_num_kcq(struct amdgpu_device *adev)
return amdgpu_num_kcq;
}
-/* amdgpu_gfx_state_change_set - Handle gfx power state change set
+void amdgpu_gfx_cp_init_microcode(struct amdgpu_device *adev,
+ uint32_t ucode_id)
+{
+ const struct gfx_firmware_header_v1_0 *cp_hdr;
+ const struct gfx_firmware_header_v2_0 *cp_hdr_v2_0;
+ struct amdgpu_firmware_info *info = NULL;
+ const struct firmware *ucode_fw;
+ unsigned int fw_size;
+
+ switch (ucode_id) {
+ case AMDGPU_UCODE_ID_CP_PFP:
+ cp_hdr = (const struct gfx_firmware_header_v1_0 *)
+ adev->gfx.pfp_fw->data;
+ adev->gfx.pfp_fw_version =
+ le32_to_cpu(cp_hdr->header.ucode_version);
+ adev->gfx.pfp_feature_version =
+ le32_to_cpu(cp_hdr->ucode_feature_version);
+ ucode_fw = adev->gfx.pfp_fw;
+ fw_size = le32_to_cpu(cp_hdr->header.ucode_size_bytes);
+ break;
+ case AMDGPU_UCODE_ID_CP_RS64_PFP:
+ cp_hdr_v2_0 = (const struct gfx_firmware_header_v2_0 *)
+ adev->gfx.pfp_fw->data;
+ adev->gfx.pfp_fw_version =
+ le32_to_cpu(cp_hdr_v2_0->header.ucode_version);
+ adev->gfx.pfp_feature_version =
+ le32_to_cpu(cp_hdr_v2_0->ucode_feature_version);
+ ucode_fw = adev->gfx.pfp_fw;
+ fw_size = le32_to_cpu(cp_hdr_v2_0->ucode_size_bytes);
+ break;
+ case AMDGPU_UCODE_ID_CP_RS64_PFP_P0_STACK:
+ case AMDGPU_UCODE_ID_CP_RS64_PFP_P1_STACK:
+ cp_hdr_v2_0 = (const struct gfx_firmware_header_v2_0 *)
+ adev->gfx.pfp_fw->data;
+ ucode_fw = adev->gfx.pfp_fw;
+ fw_size = le32_to_cpu(cp_hdr_v2_0->data_size_bytes);
+ break;
+ case AMDGPU_UCODE_ID_CP_ME:
+ cp_hdr = (const struct gfx_firmware_header_v1_0 *)
+ adev->gfx.me_fw->data;
+ adev->gfx.me_fw_version =
+ le32_to_cpu(cp_hdr->header.ucode_version);
+ adev->gfx.me_feature_version =
+ le32_to_cpu(cp_hdr->ucode_feature_version);
+ ucode_fw = adev->gfx.me_fw;
+ fw_size = le32_to_cpu(cp_hdr->header.ucode_size_bytes);
+ break;
+ case AMDGPU_UCODE_ID_CP_RS64_ME:
+ cp_hdr_v2_0 = (const struct gfx_firmware_header_v2_0 *)
+ adev->gfx.me_fw->data;
+ adev->gfx.me_fw_version =
+ le32_to_cpu(cp_hdr_v2_0->header.ucode_version);
+ adev->gfx.me_feature_version =
+ le32_to_cpu(cp_hdr_v2_0->ucode_feature_version);
+ ucode_fw = adev->gfx.me_fw;
+ fw_size = le32_to_cpu(cp_hdr_v2_0->ucode_size_bytes);
+ break;
+ case AMDGPU_UCODE_ID_CP_RS64_ME_P0_STACK:
+ case AMDGPU_UCODE_ID_CP_RS64_ME_P1_STACK:
+ cp_hdr_v2_0 = (const struct gfx_firmware_header_v2_0 *)
+ adev->gfx.me_fw->data;
+ ucode_fw = adev->gfx.me_fw;
+ fw_size = le32_to_cpu(cp_hdr_v2_0->data_size_bytes);
+ break;
+ case AMDGPU_UCODE_ID_CP_CE:
+ cp_hdr = (const struct gfx_firmware_header_v1_0 *)
+ adev->gfx.ce_fw->data;
+ adev->gfx.ce_fw_version =
+ le32_to_cpu(cp_hdr->header.ucode_version);
+ adev->gfx.ce_feature_version =
+ le32_to_cpu(cp_hdr->ucode_feature_version);
+ ucode_fw = adev->gfx.ce_fw;
+ fw_size = le32_to_cpu(cp_hdr->header.ucode_size_bytes);
+ break;
+ case AMDGPU_UCODE_ID_CP_MEC1:
+ cp_hdr = (const struct gfx_firmware_header_v1_0 *)
+ adev->gfx.mec_fw->data;
+ adev->gfx.mec_fw_version =
+ le32_to_cpu(cp_hdr->header.ucode_version);
+ adev->gfx.mec_feature_version =
+ le32_to_cpu(cp_hdr->ucode_feature_version);
+ ucode_fw = adev->gfx.mec_fw;
+ fw_size = le32_to_cpu(cp_hdr->header.ucode_size_bytes) -
+ le32_to_cpu(cp_hdr->jt_size) * 4;
+ break;
+ case AMDGPU_UCODE_ID_CP_MEC1_JT:
+ cp_hdr = (const struct gfx_firmware_header_v1_0 *)
+ adev->gfx.mec_fw->data;
+ ucode_fw = adev->gfx.mec_fw;
+ fw_size = le32_to_cpu(cp_hdr->jt_size) * 4;
+ break;
+ case AMDGPU_UCODE_ID_CP_MEC2:
+ cp_hdr = (const struct gfx_firmware_header_v1_0 *)
+ adev->gfx.mec2_fw->data;
+ adev->gfx.mec2_fw_version =
+ le32_to_cpu(cp_hdr->header.ucode_version);
+ adev->gfx.mec2_feature_version =
+ le32_to_cpu(cp_hdr->ucode_feature_version);
+ ucode_fw = adev->gfx.mec2_fw;
+ fw_size = le32_to_cpu(cp_hdr->header.ucode_size_bytes) -
+ le32_to_cpu(cp_hdr->jt_size) * 4;
+ break;
+ case AMDGPU_UCODE_ID_CP_MEC2_JT:
+ cp_hdr = (const struct gfx_firmware_header_v1_0 *)
+ adev->gfx.mec2_fw->data;
+ ucode_fw = adev->gfx.mec2_fw;
+ fw_size = le32_to_cpu(cp_hdr->jt_size) * 4;
+ break;
+ case AMDGPU_UCODE_ID_CP_RS64_MEC:
+ cp_hdr_v2_0 = (const struct gfx_firmware_header_v2_0 *)
+ adev->gfx.mec_fw->data;
+ adev->gfx.mec_fw_version =
+ le32_to_cpu(cp_hdr_v2_0->header.ucode_version);
+ adev->gfx.mec_feature_version =
+ le32_to_cpu(cp_hdr_v2_0->ucode_feature_version);
+ ucode_fw = adev->gfx.mec_fw;
+ fw_size = le32_to_cpu(cp_hdr_v2_0->ucode_size_bytes);
+ break;
+ case AMDGPU_UCODE_ID_CP_RS64_MEC_P0_STACK:
+ case AMDGPU_UCODE_ID_CP_RS64_MEC_P1_STACK:
+ case AMDGPU_UCODE_ID_CP_RS64_MEC_P2_STACK:
+ case AMDGPU_UCODE_ID_CP_RS64_MEC_P3_STACK:
+ cp_hdr_v2_0 = (const struct gfx_firmware_header_v2_0 *)
+ adev->gfx.mec_fw->data;
+ ucode_fw = adev->gfx.mec_fw;
+ fw_size = le32_to_cpu(cp_hdr_v2_0->data_size_bytes);
+ break;
+ default:
+ dev_err(adev->dev, "Invalid ucode id %u\n", ucode_id);
+ return;
+ }
+
+ if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
+ info = &adev->firmware.ucode[ucode_id];
+ info->ucode_id = ucode_id;
+ info->fw = ucode_fw;
+ adev->firmware.fw_size += ALIGN(fw_size, PAGE_SIZE);
+ }
+}
+
+bool amdgpu_gfx_is_master_xcc(struct amdgpu_device *adev, int xcc_id)
+{
+ return !(xcc_id % (adev->gfx.num_xcc_per_xcp ?
+ adev->gfx.num_xcc_per_xcp : 1));
+}
+
+static ssize_t amdgpu_gfx_get_current_compute_partition(struct device *dev,
+ struct device_attribute *addr,
+ char *buf)
+{
+ struct drm_device *ddev = dev_get_drvdata(dev);
+ struct amdgpu_device *adev = drm_to_adev(ddev);
+ int mode;
+
+ /* Only minimal precaution taken to reject requests while in reset.*/
+ if (amdgpu_in_reset(adev))
+ return -EPERM;
+
+ mode = amdgpu_xcp_query_partition_mode(adev->xcp_mgr,
+ AMDGPU_XCP_FL_NONE);
+
+ return sysfs_emit(buf, "%s\n", amdgpu_gfx_compute_mode_desc(mode));
+}
+
+static ssize_t amdgpu_gfx_set_compute_partition(struct device *dev,
+ struct device_attribute *addr,
+ const char *buf, size_t count)
+{
+ struct drm_device *ddev = dev_get_drvdata(dev);
+ struct amdgpu_device *adev = drm_to_adev(ddev);
+ enum amdgpu_gfx_partition mode;
+ int ret = 0, num_xcc;
+
+ num_xcc = NUM_XCC(adev->gfx.xcc_mask);
+ if (num_xcc % 2 != 0)
+ return -EINVAL;
+
+ if (!strncasecmp("SPX", buf, strlen("SPX"))) {
+ mode = AMDGPU_SPX_PARTITION_MODE;
+ } else if (!strncasecmp("DPX", buf, strlen("DPX"))) {
+ /*
+ * DPX mode needs AIDs to be in multiple of 2.
+ * Each AID connects 2 XCCs.
+ */
+ if (num_xcc%4)
+ return -EINVAL;
+ mode = AMDGPU_DPX_PARTITION_MODE;
+ } else if (!strncasecmp("TPX", buf, strlen("TPX"))) {
+ if (num_xcc != 6)
+ return -EINVAL;
+ mode = AMDGPU_TPX_PARTITION_MODE;
+ } else if (!strncasecmp("QPX", buf, strlen("QPX"))) {
+ if (num_xcc != 8)
+ return -EINVAL;
+ mode = AMDGPU_QPX_PARTITION_MODE;
+ } else if (!strncasecmp("CPX", buf, strlen("CPX"))) {
+ mode = AMDGPU_CPX_PARTITION_MODE;
+ } else {
+ return -EINVAL;
+ }
+
+ /* Don't allow a switch while under reset */
+ if (!down_read_trylock(&adev->reset_domain->sem))
+ return -EPERM;
+
+ ret = amdgpu_xcp_switch_partition_mode(adev->xcp_mgr, mode);
+
+ up_read(&adev->reset_domain->sem);
+
+ if (ret)
+ return ret;
+
+ return count;
+}
+
+static const char *xcp_desc[] = {
+ [AMDGPU_SPX_PARTITION_MODE] = "SPX",
+ [AMDGPU_DPX_PARTITION_MODE] = "DPX",
+ [AMDGPU_TPX_PARTITION_MODE] = "TPX",
+ [AMDGPU_QPX_PARTITION_MODE] = "QPX",
+ [AMDGPU_CPX_PARTITION_MODE] = "CPX",
+};
+
+static ssize_t amdgpu_gfx_get_available_compute_partition(struct device *dev,
+ struct device_attribute *addr,
+ char *buf)
+{
+ struct drm_device *ddev = dev_get_drvdata(dev);
+ struct amdgpu_device *adev = drm_to_adev(ddev);
+ struct amdgpu_xcp_mgr *xcp_mgr = adev->xcp_mgr;
+ int size = 0, mode;
+ char *sep = "";
+
+ if (!xcp_mgr || !xcp_mgr->avail_xcp_modes)
+ return sysfs_emit(buf, "Not supported\n");
+
+ for_each_inst(mode, xcp_mgr->avail_xcp_modes) {
+ size += sysfs_emit_at(buf, size, "%s%s", sep, xcp_desc[mode]);
+ sep = ", ";
+ }
+
+ size += sysfs_emit_at(buf, size, "\n");
+
+ return size;
+}
+
+static int amdgpu_gfx_run_cleaner_shader_job(struct amdgpu_ring *ring)
+{
+ struct amdgpu_device *adev = ring->adev;
+ struct drm_gpu_scheduler *sched = &ring->sched;
+ struct drm_sched_entity entity;
+ static atomic_t counter;
+ struct dma_fence *f;
+ struct amdgpu_job *job;
+ struct amdgpu_ib *ib;
+ void *owner;
+ int i, r;
+
+ /* Initialize the scheduler entity */
+ r = drm_sched_entity_init(&entity, DRM_SCHED_PRIORITY_NORMAL,
+ &sched, 1, NULL);
+ if (r) {
+ dev_err(adev->dev, "Failed setting up GFX kernel entity.\n");
+ goto err;
+ }
+
+ /*
+ * Use some unique dummy value as the owner to make sure we execute
+ * the cleaner shader on each submission. The value just need to change
+ * for each submission and is otherwise meaningless.
+ */
+ owner = (void *)(unsigned long)atomic_inc_return(&counter);
+
+ r = amdgpu_job_alloc_with_ib(ring->adev, &entity, owner,
+ 64, 0, &job,
+ AMDGPU_KERNEL_JOB_ID_CLEANER_SHADER);
+ if (r)
+ goto err;
+
+ job->enforce_isolation = true;
+ /* always run the cleaner shader */
+ job->run_cleaner_shader = true;
+
+ ib = &job->ibs[0];
+ for (i = 0; i <= ring->funcs->align_mask; ++i)
+ ib->ptr[i] = ring->funcs->nop;
+ ib->length_dw = ring->funcs->align_mask + 1;
+
+ f = amdgpu_job_submit(job);
+
+ r = dma_fence_wait(f, false);
+ if (r)
+ goto err;
+
+ dma_fence_put(f);
+
+ /* Clean up the scheduler entity */
+ drm_sched_entity_destroy(&entity);
+ return 0;
+
+err:
+ return r;
+}
+
+static int amdgpu_gfx_run_cleaner_shader(struct amdgpu_device *adev, int xcp_id)
+{
+ int num_xcc = NUM_XCC(adev->gfx.xcc_mask);
+ struct amdgpu_ring *ring;
+ int num_xcc_to_clear;
+ int i, r, xcc_id;
+
+ if (adev->gfx.num_xcc_per_xcp)
+ num_xcc_to_clear = adev->gfx.num_xcc_per_xcp;
+ else
+ num_xcc_to_clear = 1;
+
+ for (xcc_id = 0; xcc_id < num_xcc; xcc_id++) {
+ for (i = 0; i < adev->gfx.num_compute_rings; i++) {
+ ring = &adev->gfx.compute_ring[i + xcc_id * adev->gfx.num_compute_rings];
+ if ((ring->xcp_id == xcp_id) && ring->sched.ready) {
+ r = amdgpu_gfx_run_cleaner_shader_job(ring);
+ if (r)
+ return r;
+ num_xcc_to_clear--;
+ break;
+ }
+ }
+ }
+
+ if (num_xcc_to_clear)
+ return -ENOENT;
+
+ return 0;
+}
+
+/**
+ * amdgpu_gfx_set_run_cleaner_shader - Execute the AMDGPU GFX Cleaner Shader
+ * @dev: The device structure
+ * @attr: The device attribute structure
+ * @buf: The buffer containing the input data
+ * @count: The size of the input data
+ *
+ * Provides the sysfs interface to manually run a cleaner shader, which is
+ * used to clear the GPU state between different tasks. Writing a value to the
+ * 'run_cleaner_shader' sysfs file triggers the cleaner shader execution.
+ * The value written corresponds to the partition index on multi-partition
+ * devices. On single-partition devices, the value should be '0'.
+ *
+ * The cleaner shader clears the Local Data Store (LDS) and General Purpose
+ * Registers (GPRs) to ensure data isolation between GPU workloads.
+ *
+ * Return: The number of bytes written to the sysfs file.
+ */
+static ssize_t amdgpu_gfx_set_run_cleaner_shader(struct device *dev,
+ struct device_attribute *attr,
+ const char *buf,
+ size_t count)
+{
+ struct drm_device *ddev = dev_get_drvdata(dev);
+ struct amdgpu_device *adev = drm_to_adev(ddev);
+ int ret;
+ long value;
+
+ if (amdgpu_in_reset(adev))
+ return -EPERM;
+ if (adev->in_suspend && !adev->in_runpm)
+ return -EPERM;
+
+ if (adev->gfx.disable_kq)
+ return -EPERM;
+
+ ret = kstrtol(buf, 0, &value);
+
+ if (ret)
+ return -EINVAL;
+
+ if (value < 0)
+ return -EINVAL;
+
+ if (adev->xcp_mgr) {
+ if (value >= adev->xcp_mgr->num_xcps)
+ return -EINVAL;
+ } else {
+ if (value > 1)
+ return -EINVAL;
+ }
+
+ ret = pm_runtime_get_sync(ddev->dev);
+ if (ret < 0) {
+ pm_runtime_put_autosuspend(ddev->dev);
+ return ret;
+ }
+
+ ret = amdgpu_gfx_run_cleaner_shader(adev, value);
+
+ pm_runtime_put_autosuspend(ddev->dev);
+
+ if (ret)
+ return ret;
+
+ return count;
+}
+
+/**
+ * amdgpu_gfx_get_enforce_isolation - Query AMDGPU GFX Enforce Isolation Settings
+ * @dev: The device structure
+ * @attr: The device attribute structure
+ * @buf: The buffer to store the output data
+ *
+ * Provides the sysfs read interface to get the current settings of the 'enforce_isolation'
+ * feature for each GPU partition. Reading from the 'enforce_isolation'
+ * sysfs file returns the isolation settings for all partitions, where '0'
+ * indicates disabled, '1' indicates enabled, and '2' indicates enabled in legacy mode,
+ * and '3' indicates enabled without cleaner shader.
+ *
+ * Return: The number of bytes read from the sysfs file.
+ */
+static ssize_t amdgpu_gfx_get_enforce_isolation(struct device *dev,
+ struct device_attribute *attr,
+ char *buf)
+{
+ struct drm_device *ddev = dev_get_drvdata(dev);
+ struct amdgpu_device *adev = drm_to_adev(ddev);
+ int i;
+ ssize_t size = 0;
+
+ if (adev->xcp_mgr) {
+ for (i = 0; i < adev->xcp_mgr->num_xcps; i++) {
+ size += sysfs_emit_at(buf, size, "%u", adev->enforce_isolation[i]);
+ if (i < (adev->xcp_mgr->num_xcps - 1))
+ size += sysfs_emit_at(buf, size, " ");
+ }
+ buf[size++] = '\n';
+ } else {
+ size = sysfs_emit_at(buf, 0, "%u\n", adev->enforce_isolation[0]);
+ }
+
+ return size;
+}
+
+/**
+ * amdgpu_gfx_set_enforce_isolation - Control AMDGPU GFX Enforce Isolation
+ * @dev: The device structure
+ * @attr: The device attribute structure
+ * @buf: The buffer containing the input data
+ * @count: The size of the input data
+ *
+ * This function allows control over the 'enforce_isolation' feature, which
+ * serializes access to the graphics engine. Writing '0' to disable, '1' to
+ * enable isolation with cleaner shader, '2' to enable legacy isolation without
+ * cleaner shader, or '3' to enable process isolation without submitting the
+ * cleaner shader to the 'enforce_isolation' sysfs file sets the isolation mode
+ * for each partition. The input should specify the setting for all
+ * partitions.
+ *
+ * Return: The number of bytes written to the sysfs file.
+ */
+static ssize_t amdgpu_gfx_set_enforce_isolation(struct device *dev,
+ struct device_attribute *attr,
+ const char *buf, size_t count)
+{
+ struct drm_device *ddev = dev_get_drvdata(dev);
+ struct amdgpu_device *adev = drm_to_adev(ddev);
+ long partition_values[MAX_XCP] = {0};
+ int ret, i, num_partitions;
+ const char *input_buf = buf;
+
+ for (i = 0; i < (adev->xcp_mgr ? adev->xcp_mgr->num_xcps : 1); i++) {
+ ret = sscanf(input_buf, "%ld", &partition_values[i]);
+ if (ret <= 0)
+ break;
+
+ /* Move the pointer to the next value in the string */
+ input_buf = strchr(input_buf, ' ');
+ if (input_buf) {
+ input_buf++;
+ } else {
+ i++;
+ break;
+ }
+ }
+ num_partitions = i;
+
+ if (adev->xcp_mgr && num_partitions != adev->xcp_mgr->num_xcps)
+ return -EINVAL;
+
+ if (!adev->xcp_mgr && num_partitions != 1)
+ return -EINVAL;
+
+ for (i = 0; i < num_partitions; i++) {
+ if (partition_values[i] != 0 &&
+ partition_values[i] != 1 &&
+ partition_values[i] != 2 &&
+ partition_values[i] != 3)
+ return -EINVAL;
+ }
+
+ mutex_lock(&adev->enforce_isolation_mutex);
+ for (i = 0; i < num_partitions; i++) {
+ switch (partition_values[i]) {
+ case 0:
+ default:
+ adev->enforce_isolation[i] = AMDGPU_ENFORCE_ISOLATION_DISABLE;
+ break;
+ case 1:
+ adev->enforce_isolation[i] =
+ AMDGPU_ENFORCE_ISOLATION_ENABLE;
+ break;
+ case 2:
+ adev->enforce_isolation[i] =
+ AMDGPU_ENFORCE_ISOLATION_ENABLE_LEGACY;
+ break;
+ case 3:
+ adev->enforce_isolation[i] =
+ AMDGPU_ENFORCE_ISOLATION_NO_CLEANER_SHADER;
+ break;
+ }
+ }
+ mutex_unlock(&adev->enforce_isolation_mutex);
+
+ amdgpu_mes_update_enforce_isolation(adev);
+
+ return count;
+}
+
+static ssize_t amdgpu_gfx_get_gfx_reset_mask(struct device *dev,
+ struct device_attribute *attr,
+ char *buf)
+{
+ struct drm_device *ddev = dev_get_drvdata(dev);
+ struct amdgpu_device *adev = drm_to_adev(ddev);
+
+ if (!adev)
+ return -ENODEV;
+
+ return amdgpu_show_reset_mask(buf, adev->gfx.gfx_supported_reset);
+}
+
+static ssize_t amdgpu_gfx_get_compute_reset_mask(struct device *dev,
+ struct device_attribute *attr,
+ char *buf)
+{
+ struct drm_device *ddev = dev_get_drvdata(dev);
+ struct amdgpu_device *adev = drm_to_adev(ddev);
+
+ if (!adev)
+ return -ENODEV;
+
+ return amdgpu_show_reset_mask(buf, adev->gfx.compute_supported_reset);
+}
+
+static DEVICE_ATTR(run_cleaner_shader, 0200,
+ NULL, amdgpu_gfx_set_run_cleaner_shader);
+
+static DEVICE_ATTR(enforce_isolation, 0644,
+ amdgpu_gfx_get_enforce_isolation,
+ amdgpu_gfx_set_enforce_isolation);
+
+static DEVICE_ATTR(current_compute_partition, 0644,
+ amdgpu_gfx_get_current_compute_partition,
+ amdgpu_gfx_set_compute_partition);
+
+static DEVICE_ATTR(available_compute_partition, 0444,
+ amdgpu_gfx_get_available_compute_partition, NULL);
+static DEVICE_ATTR(gfx_reset_mask, 0444,
+ amdgpu_gfx_get_gfx_reset_mask, NULL);
+
+static DEVICE_ATTR(compute_reset_mask, 0444,
+ amdgpu_gfx_get_compute_reset_mask, NULL);
+
+static int amdgpu_gfx_sysfs_xcp_init(struct amdgpu_device *adev)
+{
+ struct amdgpu_xcp_mgr *xcp_mgr = adev->xcp_mgr;
+ bool xcp_switch_supported;
+ int r;
+
+ if (!xcp_mgr)
+ return 0;
+
+ xcp_switch_supported =
+ (xcp_mgr->funcs && xcp_mgr->funcs->switch_partition_mode);
+
+ if (!xcp_switch_supported)
+ dev_attr_current_compute_partition.attr.mode &=
+ ~(S_IWUSR | S_IWGRP | S_IWOTH);
+
+ r = device_create_file(adev->dev, &dev_attr_current_compute_partition);
+ if (r)
+ return r;
+
+ if (xcp_switch_supported)
+ r = device_create_file(adev->dev,
+ &dev_attr_available_compute_partition);
+
+ return r;
+}
+
+static void amdgpu_gfx_sysfs_xcp_fini(struct amdgpu_device *adev)
+{
+ struct amdgpu_xcp_mgr *xcp_mgr = adev->xcp_mgr;
+ bool xcp_switch_supported;
+
+ if (!xcp_mgr)
+ return;
+
+ xcp_switch_supported =
+ (xcp_mgr->funcs && xcp_mgr->funcs->switch_partition_mode);
+ device_remove_file(adev->dev, &dev_attr_current_compute_partition);
+
+ if (xcp_switch_supported)
+ device_remove_file(adev->dev,
+ &dev_attr_available_compute_partition);
+}
+
+static int amdgpu_gfx_sysfs_isolation_shader_init(struct amdgpu_device *adev)
+{
+ int r;
+
+ r = device_create_file(adev->dev, &dev_attr_enforce_isolation);
+ if (r)
+ return r;
+ if (adev->gfx.enable_cleaner_shader)
+ r = device_create_file(adev->dev, &dev_attr_run_cleaner_shader);
+
+ return r;
+}
+
+static void amdgpu_gfx_sysfs_isolation_shader_fini(struct amdgpu_device *adev)
+{
+ device_remove_file(adev->dev, &dev_attr_enforce_isolation);
+ if (adev->gfx.enable_cleaner_shader)
+ device_remove_file(adev->dev, &dev_attr_run_cleaner_shader);
+}
+
+static int amdgpu_gfx_sysfs_reset_mask_init(struct amdgpu_device *adev)
+{
+ int r = 0;
+
+ if (!amdgpu_gpu_recovery)
+ return r;
+
+ if (adev->gfx.num_gfx_rings) {
+ r = device_create_file(adev->dev, &dev_attr_gfx_reset_mask);
+ if (r)
+ return r;
+ }
+
+ if (adev->gfx.num_compute_rings) {
+ r = device_create_file(adev->dev, &dev_attr_compute_reset_mask);
+ if (r)
+ return r;
+ }
+
+ return r;
+}
+
+static void amdgpu_gfx_sysfs_reset_mask_fini(struct amdgpu_device *adev)
+{
+ if (!amdgpu_gpu_recovery)
+ return;
+
+ if (adev->gfx.num_gfx_rings)
+ device_remove_file(adev->dev, &dev_attr_gfx_reset_mask);
+
+ if (adev->gfx.num_compute_rings)
+ device_remove_file(adev->dev, &dev_attr_compute_reset_mask);
+}
+
+int amdgpu_gfx_sysfs_init(struct amdgpu_device *adev)
+{
+ int r;
+
+ r = amdgpu_gfx_sysfs_xcp_init(adev);
+ if (r) {
+ dev_err(adev->dev, "failed to create xcp sysfs files");
+ return r;
+ }
+
+ r = amdgpu_gfx_sysfs_isolation_shader_init(adev);
+ if (r)
+ dev_err(adev->dev, "failed to create isolation sysfs files");
+
+ r = amdgpu_gfx_sysfs_reset_mask_init(adev);
+ if (r)
+ dev_err(adev->dev, "failed to create reset mask sysfs files");
+
+ return r;
+}
+
+void amdgpu_gfx_sysfs_fini(struct amdgpu_device *adev)
+{
+ if (adev->dev->kobj.sd) {
+ amdgpu_gfx_sysfs_xcp_fini(adev);
+ amdgpu_gfx_sysfs_isolation_shader_fini(adev);
+ amdgpu_gfx_sysfs_reset_mask_fini(adev);
+ }
+}
+
+int amdgpu_gfx_cleaner_shader_sw_init(struct amdgpu_device *adev,
+ unsigned int cleaner_shader_size)
+{
+ if (!adev->gfx.enable_cleaner_shader)
+ return -EOPNOTSUPP;
+
+ return amdgpu_bo_create_kernel(adev, cleaner_shader_size, PAGE_SIZE,
+ AMDGPU_GEM_DOMAIN_VRAM | AMDGPU_GEM_DOMAIN_GTT,
+ &adev->gfx.cleaner_shader_obj,
+ &adev->gfx.cleaner_shader_gpu_addr,
+ (void **)&adev->gfx.cleaner_shader_cpu_ptr);
+}
+
+void amdgpu_gfx_cleaner_shader_sw_fini(struct amdgpu_device *adev)
+{
+ if (!adev->gfx.enable_cleaner_shader)
+ return;
+
+ amdgpu_bo_free_kernel(&adev->gfx.cleaner_shader_obj,
+ &adev->gfx.cleaner_shader_gpu_addr,
+ (void **)&adev->gfx.cleaner_shader_cpu_ptr);
+}
+
+void amdgpu_gfx_cleaner_shader_init(struct amdgpu_device *adev,
+ unsigned int cleaner_shader_size,
+ const void *cleaner_shader_ptr)
+{
+ if (!adev->gfx.enable_cleaner_shader)
+ return;
+
+ if (adev->gfx.cleaner_shader_cpu_ptr && cleaner_shader_ptr)
+ memcpy_toio(adev->gfx.cleaner_shader_cpu_ptr, cleaner_shader_ptr,
+ cleaner_shader_size);
+}
+
+/**
+ * amdgpu_gfx_kfd_sch_ctrl - Control the KFD scheduler from the KGD (Graphics Driver)
* @adev: amdgpu_device pointer
- * @state: gfx power state(1 -sGpuChangeState_D0Entry and 2 -sGpuChangeState_D3Entry)
+ * @idx: Index of the scheduler to control
+ * @enable: Whether to enable or disable the KFD scheduler
+ *
+ * This function is used to control the KFD (Kernel Fusion Driver) scheduler
+ * from the KGD. It is part of the cleaner shader feature. This function plays
+ * a key role in enforcing process isolation on the GPU.
+ *
+ * The function uses a reference count mechanism (kfd_sch_req_count) to keep
+ * track of the number of requests to enable the KFD scheduler. When a request
+ * to enable the KFD scheduler is made, the reference count is decremented.
+ * When the reference count reaches zero, a delayed work is scheduled to
+ * enforce isolation after a delay of GFX_SLICE_PERIOD.
*
+ * When a request to disable the KFD scheduler is made, the function first
+ * checks if the reference count is zero. If it is, it cancels the delayed work
+ * for enforcing isolation and checks if the KFD scheduler is active. If the
+ * KFD scheduler is active, it sends a request to stop the KFD scheduler and
+ * sets the KFD scheduler state to inactive. Then, it increments the reference
+ * count.
+ *
+ * The function is synchronized using the kfd_sch_mutex to ensure that the KFD
+ * scheduler state and reference count are updated atomically.
+ *
+ * Note: If the reference count is already zero when a request to enable the
+ * KFD scheduler is made, it means there's an imbalance bug somewhere. The
+ * function triggers a warning in this case.
*/
+static void amdgpu_gfx_kfd_sch_ctrl(struct amdgpu_device *adev, u32 idx,
+ bool enable)
+{
+ mutex_lock(&adev->gfx.userq_sch_mutex);
+
+ if (enable) {
+ /* If the count is already 0, it means there's an imbalance bug somewhere.
+ * Note that the bug may be in a different caller than the one which triggers the
+ * WARN_ON_ONCE.
+ */
+ if (WARN_ON_ONCE(adev->gfx.userq_sch_req_count[idx] == 0)) {
+ dev_err(adev->dev, "Attempted to enable KFD scheduler when reference count is already zero\n");
+ goto unlock;
+ }
-void amdgpu_gfx_state_change_set(struct amdgpu_device *adev, enum gfx_change_state state)
+ adev->gfx.userq_sch_req_count[idx]--;
+
+ if (adev->gfx.userq_sch_req_count[idx] == 0 &&
+ adev->gfx.userq_sch_inactive[idx]) {
+ schedule_delayed_work(&adev->gfx.enforce_isolation[idx].work,
+ msecs_to_jiffies(adev->gfx.enforce_isolation_time[idx]));
+ }
+ } else {
+ if (adev->gfx.userq_sch_req_count[idx] == 0) {
+ cancel_delayed_work_sync(&adev->gfx.enforce_isolation[idx].work);
+ if (!adev->gfx.userq_sch_inactive[idx]) {
+ amdgpu_userq_stop_sched_for_enforce_isolation(adev, idx);
+ if (adev->kfd.init_complete)
+ amdgpu_amdkfd_stop_sched(adev, idx);
+ adev->gfx.userq_sch_inactive[idx] = true;
+ }
+ }
+
+ adev->gfx.userq_sch_req_count[idx]++;
+ }
+
+unlock:
+ mutex_unlock(&adev->gfx.userq_sch_mutex);
+}
+
+/**
+ * amdgpu_gfx_enforce_isolation_handler - work handler for enforcing shader isolation
+ *
+ * @work: work_struct.
+ *
+ * This function is the work handler for enforcing shader isolation on AMD GPUs.
+ * It counts the number of emitted fences for each GFX and compute ring. If there
+ * are any fences, it schedules the `enforce_isolation_work` to be run after a
+ * delay of `GFX_SLICE_PERIOD`. If there are no fences, it signals the Kernel Fusion
+ * Driver (KFD) to resume the runqueue. The function is synchronized using the
+ * `enforce_isolation_mutex`.
+ */
+void amdgpu_gfx_enforce_isolation_handler(struct work_struct *work)
{
- if (is_support_sw_smu(adev)) {
- smu_gfx_state_change_set(&adev->smu, state);
+ struct amdgpu_isolation_work *isolation_work =
+ container_of(work, struct amdgpu_isolation_work, work.work);
+ struct amdgpu_device *adev = isolation_work->adev;
+ u32 i, idx, fences = 0;
+
+ if (isolation_work->xcp_id == AMDGPU_XCP_NO_PARTITION)
+ idx = 0;
+ else
+ idx = isolation_work->xcp_id;
+
+ if (idx >= MAX_XCP)
+ return;
+
+ mutex_lock(&adev->enforce_isolation_mutex);
+ for (i = 0; i < AMDGPU_MAX_GFX_RINGS; ++i) {
+ if (isolation_work->xcp_id == adev->gfx.gfx_ring[i].xcp_id)
+ fences += amdgpu_fence_count_emitted(&adev->gfx.gfx_ring[i]);
+ }
+ for (i = 0; i < (AMDGPU_MAX_COMPUTE_RINGS * AMDGPU_MAX_GC_INSTANCES); ++i) {
+ if (isolation_work->xcp_id == adev->gfx.compute_ring[i].xcp_id)
+ fences += amdgpu_fence_count_emitted(&adev->gfx.compute_ring[i]);
+ }
+ if (fences) {
+ /* we've already had our timeslice, so let's wrap this up */
+ schedule_delayed_work(&adev->gfx.enforce_isolation[idx].work,
+ msecs_to_jiffies(1));
} else {
- mutex_lock(&adev->pm.mutex);
- if (adev->powerplay.pp_funcs &&
- adev->powerplay.pp_funcs->gfx_state_change_set)
- ((adev)->powerplay.pp_funcs->gfx_state_change_set(
- (adev)->powerplay.pp_handle, state));
- mutex_unlock(&adev->pm.mutex);
+ /* Tell KFD to resume the runqueue */
+ WARN_ON_ONCE(!adev->gfx.userq_sch_inactive[idx]);
+ WARN_ON_ONCE(adev->gfx.userq_sch_req_count[idx]);
+
+ amdgpu_userq_start_sched_for_enforce_isolation(adev, idx);
+ if (adev->kfd.init_complete)
+ amdgpu_amdkfd_start_sched(adev, idx);
+ adev->gfx.userq_sch_inactive[idx] = false;
}
+ mutex_unlock(&adev->enforce_isolation_mutex);
}
+
+/**
+ * amdgpu_gfx_enforce_isolation_wait_for_kfd - Manage KFD wait period for process isolation
+ * @adev: amdgpu_device pointer
+ * @idx: Index of the GPU partition
+ *
+ * When kernel submissions come in, the jobs are given a time slice and once
+ * that time slice is up, if there are KFD user queues active, kernel
+ * submissions are blocked until KFD has had its time slice. Once the KFD time
+ * slice is up, KFD user queues are preempted and kernel submissions are
+ * unblocked and allowed to run again.
+ */
+static void
+amdgpu_gfx_enforce_isolation_wait_for_kfd(struct amdgpu_device *adev,
+ u32 idx)
+{
+ unsigned long cjiffies;
+ bool wait = false;
+
+ mutex_lock(&adev->enforce_isolation_mutex);
+ if (adev->enforce_isolation[idx] == AMDGPU_ENFORCE_ISOLATION_ENABLE) {
+ /* set the initial values if nothing is set */
+ if (!adev->gfx.enforce_isolation_jiffies[idx]) {
+ adev->gfx.enforce_isolation_jiffies[idx] = jiffies;
+ adev->gfx.enforce_isolation_time[idx] = GFX_SLICE_PERIOD_MS;
+ }
+ /* Make sure KFD gets a chance to run */
+ if (amdgpu_amdkfd_compute_active(adev, idx)) {
+ cjiffies = jiffies;
+ if (time_after(cjiffies, adev->gfx.enforce_isolation_jiffies[idx])) {
+ cjiffies -= adev->gfx.enforce_isolation_jiffies[idx];
+ if ((jiffies_to_msecs(cjiffies) >= GFX_SLICE_PERIOD_MS)) {
+ /* if our time is up, let KGD work drain before scheduling more */
+ wait = true;
+ /* reset the timer period */
+ adev->gfx.enforce_isolation_time[idx] = GFX_SLICE_PERIOD_MS;
+ } else {
+ /* set the timer period to what's left in our time slice */
+ adev->gfx.enforce_isolation_time[idx] =
+ GFX_SLICE_PERIOD_MS - jiffies_to_msecs(cjiffies);
+ }
+ } else {
+ /* if jiffies wrap around we will just wait a little longer */
+ adev->gfx.enforce_isolation_jiffies[idx] = jiffies;
+ }
+ } else {
+ /* if there is no KFD work, then set the full slice period */
+ adev->gfx.enforce_isolation_jiffies[idx] = jiffies;
+ adev->gfx.enforce_isolation_time[idx] = GFX_SLICE_PERIOD_MS;
+ }
+ }
+ mutex_unlock(&adev->enforce_isolation_mutex);
+
+ if (wait)
+ msleep(GFX_SLICE_PERIOD_MS);
+}
+
+/**
+ * amdgpu_gfx_enforce_isolation_ring_begin_use - Begin use of a ring with enforced isolation
+ * @ring: Pointer to the amdgpu_ring structure
+ *
+ * Ring begin_use helper implementation for gfx which serializes access to the
+ * gfx IP between kernel submission IOCTLs and KFD user queues when isolation
+ * enforcement is enabled. The kernel submission IOCTLs and KFD user queues
+ * each get a time slice when both are active.
+ */
+void amdgpu_gfx_enforce_isolation_ring_begin_use(struct amdgpu_ring *ring)
+{
+ struct amdgpu_device *adev = ring->adev;
+ u32 idx;
+ bool sched_work = false;
+
+ if (!adev->gfx.enable_cleaner_shader)
+ return;
+
+ if (ring->xcp_id == AMDGPU_XCP_NO_PARTITION)
+ idx = 0;
+ else
+ idx = ring->xcp_id;
+
+ if (idx >= MAX_XCP)
+ return;
+
+ /* Don't submit more work until KFD has had some time */
+ amdgpu_gfx_enforce_isolation_wait_for_kfd(adev, idx);
+
+ mutex_lock(&adev->enforce_isolation_mutex);
+ if (adev->enforce_isolation[idx] == AMDGPU_ENFORCE_ISOLATION_ENABLE) {
+ if (adev->kfd.init_complete)
+ sched_work = true;
+ }
+ mutex_unlock(&adev->enforce_isolation_mutex);
+
+ if (sched_work)
+ amdgpu_gfx_kfd_sch_ctrl(adev, idx, false);
+}
+
+/**
+ * amdgpu_gfx_enforce_isolation_ring_end_use - End use of a ring with enforced isolation
+ * @ring: Pointer to the amdgpu_ring structure
+ *
+ * Ring end_use helper implementation for gfx which serializes access to the
+ * gfx IP between kernel submission IOCTLs and KFD user queues when isolation
+ * enforcement is enabled. The kernel submission IOCTLs and KFD user queues
+ * each get a time slice when both are active.
+ */
+void amdgpu_gfx_enforce_isolation_ring_end_use(struct amdgpu_ring *ring)
+{
+ struct amdgpu_device *adev = ring->adev;
+ u32 idx;
+ bool sched_work = false;
+
+ if (!adev->gfx.enable_cleaner_shader)
+ return;
+
+ if (ring->xcp_id == AMDGPU_XCP_NO_PARTITION)
+ idx = 0;
+ else
+ idx = ring->xcp_id;
+
+ if (idx >= MAX_XCP)
+ return;
+
+ mutex_lock(&adev->enforce_isolation_mutex);
+ if (adev->enforce_isolation[idx] == AMDGPU_ENFORCE_ISOLATION_ENABLE) {
+ if (adev->kfd.init_complete)
+ sched_work = true;
+ }
+ mutex_unlock(&adev->enforce_isolation_mutex);
+
+ if (sched_work)
+ amdgpu_gfx_kfd_sch_ctrl(adev, idx, true);
+}
+
+void amdgpu_gfx_profile_idle_work_handler(struct work_struct *work)
+{
+ struct amdgpu_device *adev =
+ container_of(work, struct amdgpu_device, gfx.idle_work.work);
+ enum PP_SMC_POWER_PROFILE profile;
+ u32 i, fences = 0;
+ int r;
+
+ if (adev->gfx.num_gfx_rings)
+ profile = PP_SMC_POWER_PROFILE_FULLSCREEN3D;
+ else
+ profile = PP_SMC_POWER_PROFILE_COMPUTE;
+
+ for (i = 0; i < AMDGPU_MAX_GFX_RINGS; ++i)
+ fences += amdgpu_fence_count_emitted(&adev->gfx.gfx_ring[i]);
+ for (i = 0; i < (AMDGPU_MAX_COMPUTE_RINGS * AMDGPU_MAX_GC_INSTANCES); ++i)
+ fences += amdgpu_fence_count_emitted(&adev->gfx.compute_ring[i]);
+ if (!fences && !atomic_read(&adev->gfx.total_submission_cnt)) {
+ mutex_lock(&adev->gfx.workload_profile_mutex);
+ if (adev->gfx.workload_profile_active) {
+ r = amdgpu_dpm_switch_power_profile(adev, profile, false);
+ if (r)
+ dev_warn(adev->dev, "(%d) failed to disable %s power profile mode\n", r,
+ profile == PP_SMC_POWER_PROFILE_FULLSCREEN3D ?
+ "fullscreen 3D" : "compute");
+ adev->gfx.workload_profile_active = false;
+ }
+ mutex_unlock(&adev->gfx.workload_profile_mutex);
+ } else {
+ schedule_delayed_work(&adev->gfx.idle_work, GFX_PROFILE_IDLE_TIMEOUT);
+ }
+}
+
+void amdgpu_gfx_profile_ring_begin_use(struct amdgpu_ring *ring)
+{
+ struct amdgpu_device *adev = ring->adev;
+ enum PP_SMC_POWER_PROFILE profile;
+ int r;
+
+ if (amdgpu_dpm_is_overdrive_enabled(adev))
+ return;
+
+ if (adev->gfx.num_gfx_rings)
+ profile = PP_SMC_POWER_PROFILE_FULLSCREEN3D;
+ else
+ profile = PP_SMC_POWER_PROFILE_COMPUTE;
+
+ atomic_inc(&adev->gfx.total_submission_cnt);
+
+ cancel_delayed_work_sync(&adev->gfx.idle_work);
+
+ /* We can safely return early here because we've cancelled the
+ * the delayed work so there is no one else to set it to false
+ * and we don't care if someone else sets it to true.
+ */
+ if (adev->gfx.workload_profile_active)
+ return;
+
+ mutex_lock(&adev->gfx.workload_profile_mutex);
+ if (!adev->gfx.workload_profile_active) {
+ r = amdgpu_dpm_switch_power_profile(adev, profile, true);
+ if (r)
+ dev_warn(adev->dev, "(%d) failed to disable %s power profile mode\n", r,
+ profile == PP_SMC_POWER_PROFILE_FULLSCREEN3D ?
+ "fullscreen 3D" : "compute");
+ adev->gfx.workload_profile_active = true;
+ }
+ mutex_unlock(&adev->gfx.workload_profile_mutex);
+}
+
+void amdgpu_gfx_profile_ring_end_use(struct amdgpu_ring *ring)
+{
+ struct amdgpu_device *adev = ring->adev;
+
+ if (amdgpu_dpm_is_overdrive_enabled(adev))
+ return;
+
+ atomic_dec(&ring->adev->gfx.total_submission_cnt);
+
+ schedule_delayed_work(&ring->adev->gfx.idle_work, GFX_PROFILE_IDLE_TIMEOUT);
+}
+
+/**
+ * amdgpu_gfx_csb_preamble_start - Set CSB preamble start
+ *
+ * @buffer: This is an output variable that gets the PACKET3 preamble setup.
+ *
+ * Return:
+ * return the latest index.
+ */
+u32 amdgpu_gfx_csb_preamble_start(u32 *buffer)
+{
+ u32 count = 0;
+
+ buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
+ buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
+
+ buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CONTEXT_CONTROL, 1));
+ buffer[count++] = cpu_to_le32(0x80000000);
+ buffer[count++] = cpu_to_le32(0x80000000);
+
+ return count;
+}
+
+/**
+ * amdgpu_gfx_csb_data_parser - Parser CS data
+ *
+ * @adev: amdgpu_device pointer used to get the CS data and other gfx info.
+ * @buffer: This is an output variable that gets the PACKET3 preamble end.
+ * @count: Index to start set the preemble end.
+ *
+ * Return:
+ * return the latest index.
+ */
+u32 amdgpu_gfx_csb_data_parser(struct amdgpu_device *adev, u32 *buffer, u32 count)
+{
+ const struct cs_section_def *sect = NULL;
+ const struct cs_extent_def *ext = NULL;
+ u32 i;
+
+ for (sect = adev->gfx.rlc.cs_data; sect->section != NULL; ++sect) {
+ for (ext = sect->section; ext->extent != NULL; ++ext) {
+ if (sect->id == SECT_CONTEXT) {
+ buffer[count++] = cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, ext->reg_count));
+ buffer[count++] = cpu_to_le32(ext->reg_index - PACKET3_SET_CONTEXT_REG_START);
+
+ for (i = 0; i < ext->reg_count; i++)
+ buffer[count++] = cpu_to_le32(ext->extent[i]);
+ }
+ }
+ }
+
+ return count;
+}
+
+/**
+ * amdgpu_gfx_csb_preamble_end - Set CSB preamble end
+ *
+ * @buffer: This is an output variable that gets the PACKET3 preamble end.
+ * @count: Index to start set the preemble end.
+ */
+void amdgpu_gfx_csb_preamble_end(u32 *buffer, u32 count)
+{
+ buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
+ buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_END_CLEAR_STATE);
+
+ buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CLEAR_STATE, 0));
+ buffer[count++] = cpu_to_le32(0);
+}
+
+/*
+ * debugfs for to enable/disable gfx job submission to specific core.
+ */
+#if defined(CONFIG_DEBUG_FS)
+static int amdgpu_debugfs_gfx_sched_mask_set(void *data, u64 val)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)data;
+ u32 i;
+ u64 mask = 0;
+ struct amdgpu_ring *ring;
+
+ if (!adev)
+ return -ENODEV;
+
+ mask = (1ULL << adev->gfx.num_gfx_rings) - 1;
+ if ((val & mask) == 0)
+ return -EINVAL;
+
+ for (i = 0; i < adev->gfx.num_gfx_rings; ++i) {
+ ring = &adev->gfx.gfx_ring[i];
+ if (val & (1 << i))
+ ring->sched.ready = true;
+ else
+ ring->sched.ready = false;
+ }
+ /* publish sched.ready flag update effective immediately across smp */
+ smp_rmb();
+ return 0;
+}
+
+static int amdgpu_debugfs_gfx_sched_mask_get(void *data, u64 *val)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)data;
+ u32 i;
+ u64 mask = 0;
+ struct amdgpu_ring *ring;
+
+ if (!adev)
+ return -ENODEV;
+ for (i = 0; i < adev->gfx.num_gfx_rings; ++i) {
+ ring = &adev->gfx.gfx_ring[i];
+ if (ring->sched.ready)
+ mask |= 1ULL << i;
+ }
+
+ *val = mask;
+ return 0;
+}
+
+DEFINE_DEBUGFS_ATTRIBUTE(amdgpu_debugfs_gfx_sched_mask_fops,
+ amdgpu_debugfs_gfx_sched_mask_get,
+ amdgpu_debugfs_gfx_sched_mask_set, "%llx\n");
+
+#endif
+
+void amdgpu_debugfs_gfx_sched_mask_init(struct amdgpu_device *adev)
+{
+#if defined(CONFIG_DEBUG_FS)
+ struct drm_minor *minor = adev_to_drm(adev)->primary;
+ struct dentry *root = minor->debugfs_root;
+ char name[32];
+
+ if (!(adev->gfx.num_gfx_rings > 1))
+ return;
+ sprintf(name, "amdgpu_gfx_sched_mask");
+ debugfs_create_file(name, 0600, root, adev,
+ &amdgpu_debugfs_gfx_sched_mask_fops);
+#endif
+}
+
+/*
+ * debugfs for to enable/disable compute job submission to specific core.
+ */
+#if defined(CONFIG_DEBUG_FS)
+static int amdgpu_debugfs_compute_sched_mask_set(void *data, u64 val)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)data;
+ u32 i;
+ u64 mask = 0;
+ struct amdgpu_ring *ring;
+
+ if (!adev)
+ return -ENODEV;
+
+ mask = (1ULL << adev->gfx.num_compute_rings) - 1;
+ if ((val & mask) == 0)
+ return -EINVAL;
+
+ for (i = 0; i < adev->gfx.num_compute_rings; ++i) {
+ ring = &adev->gfx.compute_ring[i];
+ if (val & (1 << i))
+ ring->sched.ready = true;
+ else
+ ring->sched.ready = false;
+ }
+
+ /* publish sched.ready flag update effective immediately across smp */
+ smp_rmb();
+ return 0;
+}
+
+static int amdgpu_debugfs_compute_sched_mask_get(void *data, u64 *val)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)data;
+ u32 i;
+ u64 mask = 0;
+ struct amdgpu_ring *ring;
+
+ if (!adev)
+ return -ENODEV;
+ for (i = 0; i < adev->gfx.num_compute_rings; ++i) {
+ ring = &adev->gfx.compute_ring[i];
+ if (ring->sched.ready)
+ mask |= 1ULL << i;
+ }
+
+ *val = mask;
+ return 0;
+}
+
+DEFINE_DEBUGFS_ATTRIBUTE(amdgpu_debugfs_compute_sched_mask_fops,
+ amdgpu_debugfs_compute_sched_mask_get,
+ amdgpu_debugfs_compute_sched_mask_set, "%llx\n");
+
+#endif
+
+void amdgpu_debugfs_compute_sched_mask_init(struct amdgpu_device *adev)
+{
+#if defined(CONFIG_DEBUG_FS)
+ struct drm_minor *minor = adev_to_drm(adev)->primary;
+ struct dentry *root = minor->debugfs_root;
+ char name[32];
+
+ if (!(adev->gfx.num_compute_rings > 1))
+ return;
+ sprintf(name, "amdgpu_compute_sched_mask");
+ debugfs_create_file(name, 0600, root, adev,
+ &amdgpu_debugfs_compute_sched_mask_fops);
+#endif
+}
+
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
index 72dbcd2bc6a6..efd61a1ccc66 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
@@ -30,6 +30,11 @@
#include "clearstate_defs.h"
#include "amdgpu_ring.h"
#include "amdgpu_rlc.h"
+#include "amdgpu_imu.h"
+#include "soc15.h"
+#include "amdgpu_ras.h"
+#include "amdgpu_ring_mux.h"
+#include "amdgpu_xcp.h"
/* GFX current status */
#define AMDGPU_GFX_NORMAL_MODE 0x00000000L
@@ -38,34 +43,77 @@
#define AMDGPU_GFX_CG_DISABLED_MODE 0x00000004L
#define AMDGPU_GFX_LBPW_DISABLED_MODE 0x00000008L
-#define AMDGPU_MAX_GFX_QUEUES KGD_MAX_QUEUES
-#define AMDGPU_MAX_COMPUTE_QUEUES KGD_MAX_QUEUES
+#define AMDGPU_MAX_GC_INSTANCES 8
+#define AMDGPU_MAX_QUEUES 128
-enum gfx_pipe_priority {
- AMDGPU_GFX_PIPE_PRIO_NORMAL = 1,
- AMDGPU_GFX_PIPE_PRIO_HIGH,
- AMDGPU_GFX_PIPE_PRIO_MAX
-};
+#define AMDGPU_MAX_GFX_QUEUES AMDGPU_MAX_QUEUES
+#define AMDGPU_MAX_COMPUTE_QUEUES AMDGPU_MAX_QUEUES
-/* Argument for PPSMC_MSG_GpuChangeState */
-enum gfx_change_state {
- sGpuChangeState_D0Entry = 1,
- sGpuChangeState_D3Entry,
+enum amdgpu_gfx_pipe_priority {
+ AMDGPU_GFX_PIPE_PRIO_NORMAL = AMDGPU_RING_PRIO_1,
+ AMDGPU_GFX_PIPE_PRIO_HIGH = AMDGPU_RING_PRIO_2
};
#define AMDGPU_GFX_QUEUE_PRIORITY_MINIMUM 0
#define AMDGPU_GFX_QUEUE_PRIORITY_MAXIMUM 15
+/* 1 second timeout */
+#define GFX_PROFILE_IDLE_TIMEOUT msecs_to_jiffies(1000)
+
+enum amdgpu_gfx_partition {
+ AMDGPU_SPX_PARTITION_MODE = 0,
+ AMDGPU_DPX_PARTITION_MODE = 1,
+ AMDGPU_TPX_PARTITION_MODE = 2,
+ AMDGPU_QPX_PARTITION_MODE = 3,
+ AMDGPU_CPX_PARTITION_MODE = 4,
+ AMDGPU_UNKNOWN_COMPUTE_PARTITION_MODE = -1,
+ /* Automatically choose the right mode */
+ AMDGPU_AUTO_COMPUTE_PARTITION_MODE = -2,
+};
+
+#define NUM_XCC(x) hweight16(x)
+
+enum amdgpu_gfx_ras_mem_id_type {
+ AMDGPU_GFX_CP_MEM = 0,
+ AMDGPU_GFX_GCEA_MEM,
+ AMDGPU_GFX_GC_CANE_MEM,
+ AMDGPU_GFX_GCUTCL2_MEM,
+ AMDGPU_GFX_GDS_MEM,
+ AMDGPU_GFX_LDS_MEM,
+ AMDGPU_GFX_RLC_MEM,
+ AMDGPU_GFX_SP_MEM,
+ AMDGPU_GFX_SPI_MEM,
+ AMDGPU_GFX_SQC_MEM,
+ AMDGPU_GFX_SQ_MEM,
+ AMDGPU_GFX_TA_MEM,
+ AMDGPU_GFX_TCC_MEM,
+ AMDGPU_GFX_TCA_MEM,
+ AMDGPU_GFX_TCI_MEM,
+ AMDGPU_GFX_TCP_MEM,
+ AMDGPU_GFX_TD_MEM,
+ AMDGPU_GFX_TCX_MEM,
+ AMDGPU_GFX_ATC_L2_MEM,
+ AMDGPU_GFX_UTCL2_MEM,
+ AMDGPU_GFX_VML2_MEM,
+ AMDGPU_GFX_VML2_WALKER_MEM,
+ AMDGPU_GFX_MEM_TYPE_NUM
+};
+
struct amdgpu_mec {
struct amdgpu_bo *hpd_eop_obj;
u64 hpd_eop_gpu_addr;
struct amdgpu_bo *mec_fw_obj;
u64 mec_fw_gpu_addr;
+ struct amdgpu_bo *mec_fw_data_obj;
+ u64 mec_fw_data_gpu_addr;
+
u32 num_mec;
u32 num_pipe_per_mec;
u32 num_queue_per_pipe;
- void *mqd_backup[AMDGPU_MAX_COMPUTE_RINGS + 1];
+ void *mqd_backup[AMDGPU_MAX_COMPUTE_RINGS * AMDGPU_MAX_GC_INSTANCES];
+};
+struct amdgpu_mec_bitmap {
/* These are the resources for which amdgpu takes ownership */
DECLARE_BITMAP(queue_bitmap, AMDGPU_MAX_COMPUTE_QUEUES);
};
@@ -94,6 +142,10 @@ struct kiq_pm4_funcs {
void (*kiq_invalidate_tlbs)(struct amdgpu_ring *kiq_ring,
uint16_t pasid, uint32_t flush_type,
bool all_hub);
+ void (*kiq_reset_hw_queue)(struct amdgpu_ring *kiq_ring,
+ uint32_t queue_type, uint32_t me_id,
+ uint32_t pipe_id, uint32_t queue_id,
+ uint32_t xcc_id, uint32_t vmid);
/* Packet sizes */
int set_resources_size;
int map_queues_size;
@@ -109,15 +161,7 @@ struct amdgpu_kiq {
struct amdgpu_ring ring;
struct amdgpu_irq_src irq;
const struct kiq_pm4_funcs *pmf;
-};
-
-/*
- * GPU scratch registers structures, functions & helpers
- */
-struct amdgpu_scratch {
- unsigned num_reg;
- uint32_t reg_base;
- uint32_t free_mask;
+ void *mqd_backup;
};
/*
@@ -126,10 +170,46 @@ struct amdgpu_scratch {
#define AMDGPU_GFX_MAX_SE 4
#define AMDGPU_GFX_MAX_SH_PER_SE 2
+/**
+ * amdgpu_rb_config - Configure a single Render Backend (RB)
+ *
+ * Bad RBs are fused off and there is a harvest register the driver reads to
+ * determine which RB(s) are fused off so that the driver can configure the
+ * hardware state so that nothing gets sent to them. There are also user
+ * harvest registers that the driver can program to disable additional RBs,
+ * etc., for testing purposes.
+ */
struct amdgpu_rb_config {
+ /**
+ * @rb_backend_disable:
+ *
+ * The value captured from register RB_BACKEND_DISABLE indicates if the
+ * RB backend is disabled or not.
+ */
uint32_t rb_backend_disable;
+
+ /**
+ * @user_rb_backend_disable:
+ *
+ * The value captured from register USER_RB_BACKEND_DISABLE indicates
+ * if the User RB backend is disabled or not.
+ */
uint32_t user_rb_backend_disable;
+
+ /**
+ * @raster_config:
+ *
+ * To set up all of the states, it is necessary to have two registers
+ * to keep all of the states. This field holds the first register.
+ */
uint32_t raster_config;
+
+ /**
+ * @raster_config_1:
+ *
+ * To set up all of the states, it is necessary to have two registers
+ * to keep all of the states. This field holds the second register.
+ */
uint32_t raster_config_1;
};
@@ -177,6 +257,13 @@ struct amdgpu_gfx_config {
uint32_t macrotile_mode_array[16];
struct gb_addr_config gb_addr_config_fields;
+
+ /**
+ * @rb_config:
+ *
+ * Matrix that keeps all the Render Backend (color and depth buffer
+ * handling) configuration on the 3D engine.
+ */
struct amdgpu_rb_config rb_config[AMDGPU_GFX_MAX_SE][AMDGPU_GFX_MAX_SH_PER_SE];
/* gfx configure feature */
@@ -187,7 +274,29 @@ struct amdgpu_gfx_config {
uint32_t num_sc_per_sh;
uint32_t num_packer_per_sc;
uint32_t pa_sc_tile_steering_override;
+ /* Whether texture coordinate truncation is conformant. */
+ bool ta_cntl2_truncate_coord_mode;
uint64_t tcc_disabled_mask;
+ uint32_t gc_num_tcp_per_sa;
+ uint32_t gc_num_sdp_interface;
+ uint32_t gc_num_tcps;
+ uint32_t gc_num_tcp_per_wpg;
+ uint32_t gc_tcp_l1_size;
+ uint32_t gc_num_sqc_per_wgp;
+ uint32_t gc_l1_instruction_cache_size_per_sqc;
+ uint32_t gc_l1_data_cache_size_per_sqc;
+ uint32_t gc_gl1c_per_sa;
+ uint32_t gc_gl1c_size_per_instance;
+ uint32_t gc_gl2c_per_gpu;
+ uint32_t gc_tcp_size_per_cu;
+ uint32_t gc_num_cu_per_sqc;
+ uint32_t gc_tcc_size;
+ uint32_t gc_tcp_cache_line_size;
+ uint32_t gc_instruction_cache_size_per_sqc;
+ uint32_t gc_instruction_cache_line_size;
+ uint32_t gc_scalar_data_cache_size_per_sqc;
+ uint32_t gc_scalar_data_cache_line_size;
+ uint32_t gc_tcc_cache_line_size;
};
struct amdgpu_cu_info {
@@ -201,30 +310,52 @@ struct amdgpu_cu_info {
uint32_t number;
uint32_t ao_cu_mask;
uint32_t ao_cu_bitmap[4][4];
- uint32_t bitmap[4][4];
+ uint32_t bitmap[AMDGPU_MAX_GC_INSTANCES][4][4];
+};
+
+struct amdgpu_gfx_ras {
+ struct amdgpu_ras_block_object ras_block;
+ void (*enable_watchdog_timer)(struct amdgpu_device *adev);
+ int (*rlc_gc_fed_irq)(struct amdgpu_device *adev,
+ struct amdgpu_irq_src *source,
+ struct amdgpu_iv_entry *entry);
+ int (*poison_consumption_handler)(struct amdgpu_device *adev,
+ struct amdgpu_iv_entry *entry);
+};
+
+struct amdgpu_gfx_shadow_info {
+ u32 shadow_size;
+ u32 shadow_alignment;
+ u32 csa_size;
+ u32 csa_alignment;
};
struct amdgpu_gfx_funcs {
/* get the gpu clock counter */
uint64_t (*get_gpu_clock_counter)(struct amdgpu_device *adev);
void (*select_se_sh)(struct amdgpu_device *adev, u32 se_num,
- u32 sh_num, u32 instance);
- void (*read_wave_data)(struct amdgpu_device *adev, uint32_t simd,
+ u32 sh_num, u32 instance, int xcc_id);
+ void (*read_wave_data)(struct amdgpu_device *adev, uint32_t xcc_id, uint32_t simd,
uint32_t wave, uint32_t *dst, int *no_fields);
- void (*read_wave_vgprs)(struct amdgpu_device *adev, uint32_t simd,
+ void (*read_wave_vgprs)(struct amdgpu_device *adev, uint32_t xcc_id, uint32_t simd,
uint32_t wave, uint32_t thread, uint32_t start,
uint32_t size, uint32_t *dst);
- void (*read_wave_sgprs)(struct amdgpu_device *adev, uint32_t simd,
+ void (*read_wave_sgprs)(struct amdgpu_device *adev, uint32_t xcc_id, uint32_t simd,
uint32_t wave, uint32_t start, uint32_t size,
uint32_t *dst);
void (*select_me_pipe_q)(struct amdgpu_device *adev, u32 me, u32 pipe,
- u32 queue, u32 vmid);
- int (*ras_error_inject)(struct amdgpu_device *adev, void *inject_if);
- int (*query_ras_error_count) (struct amdgpu_device *adev, void *ras_error_status);
- void (*reset_ras_error_count) (struct amdgpu_device *adev);
+ u32 queue, u32 vmid, u32 xcc_id);
void (*init_spm_golden)(struct amdgpu_device *adev);
- void (*query_ras_error_status) (struct amdgpu_device *adev);
void (*update_perfmon_mgcg)(struct amdgpu_device *adev, bool enable);
+ int (*get_gfx_shadow_info)(struct amdgpu_device *adev,
+ struct amdgpu_gfx_shadow_info *shadow_info,
+ bool skip_check);
+ enum amdgpu_gfx_partition
+ (*query_partition_mode)(struct amdgpu_device *adev);
+ int (*switch_partition_mode)(struct amdgpu_device *adev,
+ int num_xccs_per_xcp);
+ int (*ih_node_to_logical_xcc)(struct amdgpu_device *adev, int ih_node);
+ int (*get_xccs_per_xcp)(struct amdgpu_device *adev);
};
struct sq_work {
@@ -236,6 +367,10 @@ struct amdgpu_pfp {
struct amdgpu_bo *pfp_fw_obj;
uint64_t pfp_fw_gpu_addr;
uint32_t *pfp_fw_ptr;
+
+ struct amdgpu_bo *pfp_fw_data_obj;
+ uint64_t pfp_fw_data_gpu_addr;
+ uint32_t *pfp_fw_data_ptr;
};
struct amdgpu_ce {
@@ -248,6 +383,11 @@ struct amdgpu_me {
struct amdgpu_bo *me_fw_obj;
uint64_t me_fw_gpu_addr;
uint32_t *me_fw_ptr;
+
+ struct amdgpu_bo *me_fw_data_obj;
+ uint64_t me_fw_data_gpu_addr;
+ uint32_t *me_fw_data_ptr;
+
uint32_t num_me;
uint32_t num_pipe_per_me;
uint32_t num_queue_per_pipe;
@@ -257,6 +397,12 @@ struct amdgpu_me {
DECLARE_BITMAP(queue_bitmap, AMDGPU_MAX_GFX_QUEUES);
};
+struct amdgpu_isolation_work {
+ struct amdgpu_device *adev;
+ u32 xcp_id;
+ struct delayed_work work;
+};
+
struct amdgpu_gfx {
struct mutex gpu_clock_mutex;
struct amdgpu_gfx_config config;
@@ -265,8 +411,10 @@ struct amdgpu_gfx {
struct amdgpu_ce ce;
struct amdgpu_me me;
struct amdgpu_mec mec;
- struct amdgpu_kiq kiq;
- struct amdgpu_scratch scratch;
+ struct amdgpu_mec_bitmap mec_bitmap[AMDGPU_MAX_GC_INSTANCES];
+ struct amdgpu_kiq kiq[AMDGPU_MAX_GC_INSTANCES];
+ struct amdgpu_imu imu;
+ bool rs64_enable; /* firmware format */
const struct firmware *me_fw; /* ME firmware */
uint32_t me_fw_version;
const struct firmware *pfp_fw; /* PFP firmware */
@@ -279,6 +427,8 @@ struct amdgpu_gfx {
uint32_t mec_fw_version;
const struct firmware *mec2_fw; /* MEC2 firmware */
uint32_t mec2_fw_version;
+ const struct firmware *imu_fw; /* IMU firmware */
+ uint32_t imu_fw_version;
uint32_t me_feature_version;
uint32_t ce_feature_version;
uint32_t pfp_feature_version;
@@ -289,6 +439,10 @@ struct amdgpu_gfx {
uint32_t rlc_srlg_feature_version;
uint32_t rlc_srls_fw_version;
uint32_t rlc_srls_feature_version;
+ uint32_t rlcp_ucode_version;
+ uint32_t rlcp_ucode_feature_version;
+ uint32_t rlcv_ucode_version;
+ uint32_t rlcv_ucode_feature_version;
uint32_t mec_feature_version;
uint32_t mec2_feature_version;
bool mec_fw_write_wait;
@@ -296,13 +450,15 @@ struct amdgpu_gfx {
bool cp_fw_write_wait;
struct amdgpu_ring gfx_ring[AMDGPU_MAX_GFX_RINGS];
unsigned num_gfx_rings;
- struct amdgpu_ring compute_ring[AMDGPU_MAX_COMPUTE_RINGS];
+ struct amdgpu_ring compute_ring[AMDGPU_MAX_COMPUTE_RINGS * AMDGPU_MAX_GC_INSTANCES];
unsigned num_compute_rings;
struct amdgpu_irq_src eop_irq;
struct amdgpu_irq_src priv_reg_irq;
struct amdgpu_irq_src priv_inst_irq;
+ struct amdgpu_irq_src bad_op_irq;
struct amdgpu_irq_src cp_ecc_error_irq;
struct amdgpu_irq_src sq_irq;
+ struct amdgpu_irq_src rlc_gc_fed_irq;
struct sq_work sq_work;
/* gfx status */
@@ -315,12 +471,16 @@ struct amdgpu_gfx {
/* reset mask */
uint32_t grbm_soft_reset;
uint32_t srbm_soft_reset;
+ uint32_t gfx_supported_reset;
+ uint32_t compute_supported_reset;
/* gfx off */
- bool gfx_off_state; /* true: enabled, false: disabled */
- struct mutex gfx_off_mutex;
- uint32_t gfx_off_req_count; /* default 1, enable gfx off: dec 1, disable gfx off: add 1 */
- struct delayed_work gfx_off_delay_work;
+ bool gfx_off_state; /* true: enabled, false: disabled */
+ struct mutex gfx_off_mutex; /* mutex to change gfxoff state */
+ uint32_t gfx_off_req_count; /* default 1, enable gfx off: dec 1, disable gfx off: add 1 */
+ struct delayed_work gfx_off_delay_work; /* async work to set gfx block off */
+ uint32_t gfx_off_residency; /* last logged residency */
+ uint64_t gfx_off_entrycount; /* count of times GPU has get into GFXOFF state */
/* pipe reservation */
struct mutex pipe_reserve_mutex;
@@ -328,12 +488,69 @@ struct amdgpu_gfx {
/*ras */
struct ras_common_if *ras_if;
+ struct amdgpu_gfx_ras *ras;
+
+ bool is_poweron;
+
+ struct amdgpu_ring sw_gfx_ring[AMDGPU_MAX_SW_GFX_RINGS];
+ struct amdgpu_ring_mux muxer;
+
+ bool cp_gfx_shadow; /* for gfx11 */
+
+ uint16_t xcc_mask;
+ uint32_t num_xcc_per_xcp;
+ struct mutex partition_mutex;
+ bool mcbp; /* mid command buffer preemption */
+
+ /* IP reg dump */
+ uint32_t *ip_dump_core;
+ uint32_t *ip_dump_compute_queues;
+ uint32_t *ip_dump_gfx_queues;
+
+ struct mutex reset_sem_mutex;
+
+ /* cleaner shader */
+ struct amdgpu_bo *cleaner_shader_obj;
+ unsigned int cleaner_shader_size;
+ u64 cleaner_shader_gpu_addr;
+ void *cleaner_shader_cpu_ptr;
+ const void *cleaner_shader_ptr;
+ bool enable_cleaner_shader;
+ struct amdgpu_isolation_work enforce_isolation[MAX_XCP];
+ /* Mutex for synchronizing KFD scheduler operations */
+ struct mutex userq_sch_mutex;
+ u64 userq_sch_req_count[MAX_XCP];
+ bool userq_sch_inactive[MAX_XCP];
+ unsigned long enforce_isolation_jiffies[MAX_XCP];
+ unsigned long enforce_isolation_time[MAX_XCP];
+
+ atomic_t total_submission_cnt;
+ struct delayed_work idle_work;
+ bool workload_profile_active;
+ struct mutex workload_profile_mutex;
+
+ bool disable_kq;
+ bool disable_uq;
};
+struct amdgpu_gfx_ras_reg_entry {
+ struct amdgpu_ras_err_status_reg_entry reg_entry;
+ enum amdgpu_gfx_ras_mem_id_type mem_id_type;
+ uint32_t se_num;
+};
+
+struct amdgpu_gfx_ras_mem_id_entry {
+ const struct amdgpu_ras_memory_id_entry *mem_id_ent;
+ uint32_t size;
+};
+
+#define AMDGPU_GFX_MEMID_ENT(x) {(x), ARRAY_SIZE(x)},
+
#define amdgpu_gfx_get_gpu_clock_counter(adev) (adev)->gfx.funcs->get_gpu_clock_counter((adev))
-#define amdgpu_gfx_select_se_sh(adev, se, sh, instance) (adev)->gfx.funcs->select_se_sh((adev), (se), (sh), (instance))
-#define amdgpu_gfx_select_me_pipe_q(adev, me, pipe, q, vmid) (adev)->gfx.funcs->select_me_pipe_q((adev), (me), (pipe), (q), (vmid))
+#define amdgpu_gfx_select_se_sh(adev, se, sh, instance, xcc_id) ((adev)->gfx.funcs->select_se_sh((adev), (se), (sh), (instance), (xcc_id)))
+#define amdgpu_gfx_select_me_pipe_q(adev, me, pipe, q, vmid, xcc_id) ((adev)->gfx.funcs->select_me_pipe_q((adev), (me), (pipe), (q), (vmid), (xcc_id)))
#define amdgpu_gfx_init_spm_golden(adev) (adev)->gfx.funcs->init_spm_golden((adev))
+#define amdgpu_gfx_get_gfx_shadow_info(adev, si) ((adev)->gfx.funcs->get_gfx_shadow_info((adev), (si), false))
/**
* amdgpu_gfx_create_bitmask - create a bitmask
@@ -348,27 +565,24 @@ static inline u32 amdgpu_gfx_create_bitmask(u32 bit_width)
return (u32)((1ULL << bit_width) - 1);
}
-int amdgpu_gfx_scratch_get(struct amdgpu_device *adev, uint32_t *reg);
-void amdgpu_gfx_scratch_free(struct amdgpu_device *adev, uint32_t reg);
-
void amdgpu_gfx_parse_disable_cu(unsigned *mask, unsigned max_se,
unsigned max_sh);
-int amdgpu_gfx_kiq_init_ring(struct amdgpu_device *adev,
- struct amdgpu_ring *ring,
- struct amdgpu_irq_src *irq);
+int amdgpu_gfx_kiq_init_ring(struct amdgpu_device *adev, int xcc_id);
void amdgpu_gfx_kiq_free_ring(struct amdgpu_ring *ring);
-void amdgpu_gfx_kiq_fini(struct amdgpu_device *adev);
+void amdgpu_gfx_kiq_fini(struct amdgpu_device *adev, int xcc_id);
int amdgpu_gfx_kiq_init(struct amdgpu_device *adev,
- unsigned hpd_size);
+ unsigned hpd_size, int xcc_id);
int amdgpu_gfx_mqd_sw_init(struct amdgpu_device *adev,
- unsigned mqd_size);
-void amdgpu_gfx_mqd_sw_fini(struct amdgpu_device *adev);
-int amdgpu_gfx_disable_kcq(struct amdgpu_device *adev);
-int amdgpu_gfx_enable_kcq(struct amdgpu_device *adev);
+ unsigned mqd_size, int xcc_id);
+void amdgpu_gfx_mqd_sw_fini(struct amdgpu_device *adev, int xcc_id);
+int amdgpu_gfx_disable_kcq(struct amdgpu_device *adev, int xcc_id);
+int amdgpu_gfx_enable_kcq(struct amdgpu_device *adev, int xcc_id);
+int amdgpu_gfx_disable_kgq(struct amdgpu_device *adev, int xcc_id);
+int amdgpu_gfx_enable_kgq(struct amdgpu_device *adev, int xcc_id);
void amdgpu_gfx_compute_queue_acquire(struct amdgpu_device *adev);
void amdgpu_gfx_graphics_queue_acquire(struct amdgpu_device *adev);
@@ -377,28 +591,81 @@ int amdgpu_gfx_mec_queue_to_bit(struct amdgpu_device *adev, int mec,
int pipe, int queue);
void amdgpu_queue_mask_bit_to_mec_queue(struct amdgpu_device *adev, int bit,
int *mec, int *pipe, int *queue);
-bool amdgpu_gfx_is_mec_queue_enabled(struct amdgpu_device *adev, int mec,
- int pipe, int queue);
+bool amdgpu_gfx_is_mec_queue_enabled(struct amdgpu_device *adev, int xcc_id,
+ int mec, int pipe, int queue);
bool amdgpu_gfx_is_high_priority_compute_queue(struct amdgpu_device *adev,
struct amdgpu_ring *ring);
-int amdgpu_gfx_me_queue_to_bit(struct amdgpu_device *adev, int me,
- int pipe, int queue);
-void amdgpu_gfx_bit_to_me_queue(struct amdgpu_device *adev, int bit,
- int *me, int *pipe, int *queue);
+bool amdgpu_gfx_is_high_priority_graphics_queue(struct amdgpu_device *adev,
+ struct amdgpu_ring *ring);
bool amdgpu_gfx_is_me_queue_enabled(struct amdgpu_device *adev, int me,
int pipe, int queue);
void amdgpu_gfx_off_ctrl(struct amdgpu_device *adev, bool enable);
+void amdgpu_gfx_off_ctrl_immediate(struct amdgpu_device *adev, bool enable);
int amdgpu_get_gfx_off_status(struct amdgpu_device *adev, uint32_t *value);
-int amdgpu_gfx_ras_late_init(struct amdgpu_device *adev);
+int amdgpu_gfx_ras_late_init(struct amdgpu_device *adev, struct ras_common_if *ras_block);
void amdgpu_gfx_ras_fini(struct amdgpu_device *adev);
+int amdgpu_get_gfx_off_entrycount(struct amdgpu_device *adev, u64 *value);
+int amdgpu_get_gfx_off_residency(struct amdgpu_device *adev, u32 *residency);
+int amdgpu_set_gfx_off_residency(struct amdgpu_device *adev, bool value);
int amdgpu_gfx_process_ras_data_cb(struct amdgpu_device *adev,
void *err_data,
struct amdgpu_iv_entry *entry);
int amdgpu_gfx_cp_ecc_error_irq(struct amdgpu_device *adev,
struct amdgpu_irq_src *source,
struct amdgpu_iv_entry *entry);
-uint32_t amdgpu_kiq_rreg(struct amdgpu_device *adev, uint32_t reg);
-void amdgpu_kiq_wreg(struct amdgpu_device *adev, uint32_t reg, uint32_t v);
+uint32_t amdgpu_kiq_rreg(struct amdgpu_device *adev, uint32_t reg, uint32_t xcc_id);
+void amdgpu_kiq_wreg(struct amdgpu_device *adev, uint32_t reg, uint32_t v, uint32_t xcc_id);
+int amdgpu_kiq_hdp_flush(struct amdgpu_device *adev);
int amdgpu_gfx_get_num_kcq(struct amdgpu_device *adev);
-void amdgpu_gfx_state_change_set(struct amdgpu_device *adev, enum gfx_change_state state);
+void amdgpu_gfx_cp_init_microcode(struct amdgpu_device *adev, uint32_t ucode_id);
+
+int amdgpu_gfx_ras_sw_init(struct amdgpu_device *adev);
+int amdgpu_gfx_poison_consumption_handler(struct amdgpu_device *adev,
+ struct amdgpu_iv_entry *entry);
+
+bool amdgpu_gfx_is_master_xcc(struct amdgpu_device *adev, int xcc_id);
+int amdgpu_gfx_sysfs_init(struct amdgpu_device *adev);
+void amdgpu_gfx_sysfs_fini(struct amdgpu_device *adev);
+void amdgpu_gfx_ras_error_func(struct amdgpu_device *adev,
+ void *ras_error_status,
+ void (*func)(struct amdgpu_device *adev, void *ras_error_status,
+ int xcc_id));
+int amdgpu_gfx_cleaner_shader_sw_init(struct amdgpu_device *adev,
+ unsigned int cleaner_shader_size);
+void amdgpu_gfx_cleaner_shader_sw_fini(struct amdgpu_device *adev);
+void amdgpu_gfx_cleaner_shader_init(struct amdgpu_device *adev,
+ unsigned int cleaner_shader_size,
+ const void *cleaner_shader_ptr);
+void amdgpu_gfx_enforce_isolation_handler(struct work_struct *work);
+void amdgpu_gfx_enforce_isolation_ring_begin_use(struct amdgpu_ring *ring);
+void amdgpu_gfx_enforce_isolation_ring_end_use(struct amdgpu_ring *ring);
+
+void amdgpu_gfx_profile_idle_work_handler(struct work_struct *work);
+void amdgpu_gfx_profile_ring_begin_use(struct amdgpu_ring *ring);
+void amdgpu_gfx_profile_ring_end_use(struct amdgpu_ring *ring);
+u32 amdgpu_gfx_csb_preamble_start(u32 *buffer);
+u32 amdgpu_gfx_csb_data_parser(struct amdgpu_device *adev, u32 *buffer, u32 count);
+void amdgpu_gfx_csb_preamble_end(u32 *buffer, u32 count);
+
+void amdgpu_debugfs_gfx_sched_mask_init(struct amdgpu_device *adev);
+void amdgpu_debugfs_compute_sched_mask_init(struct amdgpu_device *adev);
+
+static inline const char *amdgpu_gfx_compute_mode_desc(int mode)
+{
+ switch (mode) {
+ case AMDGPU_SPX_PARTITION_MODE:
+ return "SPX";
+ case AMDGPU_DPX_PARTITION_MODE:
+ return "DPX";
+ case AMDGPU_TPX_PARTITION_MODE:
+ return "TPX";
+ case AMDGPU_QPX_PARTITION_MODE:
+ return "QPX";
+ case AMDGPU_CPX_PARTITION_MODE:
+ return "CPX";
+ default:
+ return "UNKNOWN";
+ }
+}
+
#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfxhub.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfxhub.h
index 66ebc2e3b2ad..c7b44aeb671b 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfxhub.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfxhub.h
@@ -34,6 +34,10 @@ struct amdgpu_gfxhub_funcs {
void (*set_fault_enable_default)(struct amdgpu_device *adev, bool value);
void (*init)(struct amdgpu_device *adev);
int (*get_xgmi_info)(struct amdgpu_device *adev);
+ void (*utcl2_harvest)(struct amdgpu_device *adev);
+ void (*mode2_save_regs)(struct amdgpu_device *adev);
+ void (*mode2_restore_regs)(struct amdgpu_device *adev);
+ void (*halt)(struct amdgpu_device *adev);
};
struct amdgpu_gfxhub {
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c
index fe1a39ffda72..869bceb0fe2c 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c
@@ -25,12 +25,79 @@
*/
#include <linux/io-64-nonatomic-lo-hi.h>
+#ifdef CONFIG_X86
+#include <asm/hypervisor.h>
+#endif
#include "amdgpu.h"
#include "amdgpu_gmc.h"
#include "amdgpu_ras.h"
+#include "amdgpu_reset.h"
#include "amdgpu_xgmi.h"
+#include <drm/drm_drv.h>
+#include <drm/ttm/ttm_tt.h>
+
+static const u64 four_gb = 0x100000000ULL;
+
+bool amdgpu_gmc_is_pdb0_enabled(struct amdgpu_device *adev)
+{
+ return adev->gmc.xgmi.connected_to_cpu || amdgpu_virt_xgmi_migrate_enabled(adev);
+}
+
+/**
+ * amdgpu_gmc_pdb0_alloc - allocate vram for pdb0
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * Allocate video memory for pdb0 and map it for CPU access
+ * Returns 0 for success, error for failure.
+ */
+int amdgpu_gmc_pdb0_alloc(struct amdgpu_device *adev)
+{
+ int r;
+ struct amdgpu_bo_param bp;
+ u64 vram_size = adev->gmc.xgmi.node_segment_size * adev->gmc.xgmi.num_physical_nodes;
+ uint32_t pde0_page_shift = adev->gmc.vmid0_page_table_block_size + 21;
+ uint32_t npdes = (vram_size + (1ULL << pde0_page_shift) - 1) >> pde0_page_shift;
+
+ memset(&bp, 0, sizeof(bp));
+ bp.size = PAGE_ALIGN((npdes + 1) * 8);
+ bp.byte_align = PAGE_SIZE;
+ bp.domain = AMDGPU_GEM_DOMAIN_VRAM;
+ bp.flags = AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED |
+ AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS;
+ bp.type = ttm_bo_type_kernel;
+ bp.resv = NULL;
+ bp.bo_ptr_size = sizeof(struct amdgpu_bo);
+
+ r = amdgpu_bo_create(adev, &bp, &adev->gmc.pdb0_bo);
+ if (r)
+ return r;
+
+ r = amdgpu_bo_reserve(adev->gmc.pdb0_bo, false);
+ if (unlikely(r != 0))
+ goto bo_reserve_failure;
+
+ r = amdgpu_bo_pin(adev->gmc.pdb0_bo, AMDGPU_GEM_DOMAIN_VRAM);
+ if (r)
+ goto bo_pin_failure;
+ r = amdgpu_bo_kmap(adev->gmc.pdb0_bo, &adev->gmc.ptr_pdb0);
+ if (r)
+ goto bo_kmap_failure;
+
+ amdgpu_bo_unreserve(adev->gmc.pdb0_bo);
+ return 0;
+
+bo_kmap_failure:
+ amdgpu_bo_unpin(adev->gmc.pdb0_bo);
+bo_pin_failure:
+ amdgpu_bo_unreserve(adev->gmc.pdb0_bo);
+bo_reserve_failure:
+ amdgpu_bo_unref(&adev->gmc.pdb0_bo);
+ return r;
+}
+
/**
* amdgpu_gmc_get_pde_for_bo - get the PDE for a BO
*
@@ -46,7 +113,7 @@ void amdgpu_gmc_get_pde_for_bo(struct amdgpu_bo *bo, int level,
{
struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev);
- switch (bo->tbo.mem.mem_type) {
+ switch (bo->tbo.resource->mem_type) {
case TTM_PL_TT:
*addr = bo->tbo.ttm->dma_address[0];
break;
@@ -57,7 +124,7 @@ void amdgpu_gmc_get_pde_for_bo(struct amdgpu_bo *bo, int level,
*addr = 0;
break;
}
- *flags = amdgpu_ttm_tt_pde_flags(bo->tbo.ttm, &bo->tbo.mem);
+ *flags = amdgpu_ttm_tt_pde_flags(bo->tbo.ttm, bo->tbo.resource);
amdgpu_gmc_get_vm_pde(adev, level, addr, flags);
}
@@ -105,6 +172,7 @@ int amdgpu_gmc_set_pte_pde(struct amdgpu_device *adev, void *cpu_pt_addr,
value = addr & 0x0000FFFFFFFFF000ULL;
value |= flags;
writeq(value, ptr + (gpu_page_idx * 8));
+
return 0;
}
@@ -120,6 +188,9 @@ uint64_t amdgpu_gmc_agp_addr(struct ttm_buffer_object *bo)
{
struct amdgpu_device *adev = amdgpu_ttm_adev(bo->bdev);
+ if (!bo->ttm)
+ return AMDGPU_BO_INVALID_OFFSET;
+
if (bo->ttm->num_pages != 1 || bo->ttm->caching == ttm_cached)
return AMDGPU_BO_INVALID_OFFSET;
@@ -142,13 +213,20 @@ uint64_t amdgpu_gmc_agp_addr(struct ttm_buffer_object *bo)
void amdgpu_gmc_vram_location(struct amdgpu_device *adev, struct amdgpu_gmc *mc,
u64 base)
{
+ uint64_t vis_limit = (uint64_t)amdgpu_vis_vram_limit << 20;
uint64_t limit = (uint64_t)amdgpu_vram_limit << 20;
mc->vram_start = base;
mc->vram_end = mc->vram_start + mc->mc_vram_size - 1;
- if (limit && limit < mc->real_vram_size)
+ if (limit < mc->real_vram_size)
mc->real_vram_size = limit;
+ if (vis_limit && vis_limit < mc->visible_vram_size)
+ mc->visible_vram_size = vis_limit;
+
+ if (mc->real_vram_size < mc->visible_vram_size)
+ mc->visible_vram_size = mc->real_vram_size;
+
if (mc->xgmi.num_physical_nodes == 0) {
mc->fb_start = mc->vram_start;
mc->fb_end = mc->vram_end;
@@ -158,26 +236,67 @@ void amdgpu_gmc_vram_location(struct amdgpu_device *adev, struct amdgpu_gmc *mc,
mc->vram_end, mc->real_vram_size >> 20);
}
+/** amdgpu_gmc_sysvm_location - place vram and gart in sysvm aperture
+ *
+ * @adev: amdgpu device structure holding all necessary information
+ * @mc: memory controller structure holding memory information
+ *
+ * This function is only used if use GART for FB translation. In such
+ * case, we use sysvm aperture (vmid0 page tables) for both vram
+ * and gart (aka system memory) access.
+ *
+ * GPUVM (and our organization of vmid0 page tables) require sysvm
+ * aperture to be placed at a location aligned with 8 times of native
+ * page size. For example, if vm_context0_cntl.page_table_block_size
+ * is 12, then native page size is 8G (2M*2^12), sysvm should start
+ * with a 64G aligned address. For simplicity, we just put sysvm at
+ * address 0. So vram start at address 0 and gart is right after vram.
+ */
+void amdgpu_gmc_sysvm_location(struct amdgpu_device *adev, struct amdgpu_gmc *mc)
+{
+ u64 hive_vram_start = 0;
+ u64 hive_vram_end = mc->xgmi.node_segment_size * mc->xgmi.num_physical_nodes - 1;
+ mc->vram_start = mc->xgmi.node_segment_size * mc->xgmi.physical_node_id;
+ mc->vram_end = mc->vram_start + mc->xgmi.node_segment_size - 1;
+ /* node_segment_size may not 4GB aligned on SRIOV, align up is needed. */
+ mc->gart_start = ALIGN(hive_vram_end + 1, four_gb);
+ mc->gart_end = mc->gart_start + mc->gart_size - 1;
+ if (amdgpu_virt_xgmi_migrate_enabled(adev)) {
+ /* set mc->vram_start to 0 to switch the returned GPU address of
+ * amdgpu_bo_create_reserved() from FB aperture to GART aperture.
+ */
+ mc->vram_start = 0;
+ mc->vram_end = mc->vram_start + mc->mc_vram_size - 1;
+ mc->visible_vram_size = min(mc->visible_vram_size, mc->real_vram_size);
+ } else {
+ mc->fb_start = hive_vram_start;
+ mc->fb_end = hive_vram_end;
+ }
+ dev_info(adev->dev, "VRAM: %lluM 0x%016llX - 0x%016llX (%lluM used)\n",
+ mc->mc_vram_size >> 20, mc->vram_start,
+ mc->vram_end, mc->real_vram_size >> 20);
+ dev_info(adev->dev, "GART: %lluM 0x%016llX - 0x%016llX\n",
+ mc->gart_size >> 20, mc->gart_start, mc->gart_end);
+}
+
/**
* amdgpu_gmc_gart_location - try to find GART location
*
* @adev: amdgpu device structure holding all necessary information
* @mc: memory controller structure holding memory information
+ * @gart_placement: GART placement policy with respect to VRAM
*
- * Function will place try to place GART before or after VRAM.
- *
+ * Function will try to place GART before or after VRAM.
* If GART size is bigger than space left then we ajust GART size.
* Thus function will never fails.
*/
-void amdgpu_gmc_gart_location(struct amdgpu_device *adev, struct amdgpu_gmc *mc)
+void amdgpu_gmc_gart_location(struct amdgpu_device *adev, struct amdgpu_gmc *mc,
+ enum amdgpu_gart_placement gart_placement)
{
- const uint64_t four_gb = 0x100000000ULL;
u64 size_af, size_bf;
/*To avoid the hole, limit the max mc address to AMDGPU_GMC_HOLE_START*/
u64 max_mc_address = min(adev->gmc.mc_mask, AMDGPU_GMC_HOLE_START - 1);
- mc->gart_size += adev->pm.smu_prv_buffer_size;
-
/* VCE doesn't like it when BOs cross a 4GB segment, so align
* the GART base on a 4GB boundary as well.
*/
@@ -189,11 +308,22 @@ void amdgpu_gmc_gart_location(struct amdgpu_device *adev, struct amdgpu_gmc *mc)
mc->gart_size = max(size_bf, size_af);
}
- if ((size_bf >= mc->gart_size && size_bf < size_af) ||
- (size_af < mc->gart_size))
- mc->gart_start = 0;
- else
+ switch (gart_placement) {
+ case AMDGPU_GART_PLACEMENT_HIGH:
mc->gart_start = max_mc_address - mc->gart_size + 1;
+ break;
+ case AMDGPU_GART_PLACEMENT_LOW:
+ mc->gart_start = 0;
+ break;
+ case AMDGPU_GART_PLACEMENT_BEST_FIT:
+ default:
+ if ((size_bf >= mc->gart_size && size_bf < size_af) ||
+ (size_af < mc->gart_size))
+ mc->gart_start = 0;
+ else
+ mc->gart_start = max_mc_address - mc->gart_size + 1;
+ break;
+ }
mc->gart_start &= ~(four_gb - 1);
mc->gart_end = mc->gart_start + mc->gart_size - 1;
@@ -218,14 +348,6 @@ void amdgpu_gmc_agp_location(struct amdgpu_device *adev, struct amdgpu_gmc *mc)
const uint64_t sixteen_gb_mask = ~(sixteen_gb - 1);
u64 size_af, size_bf;
- if (amdgpu_sriov_vf(adev)) {
- mc->agp_start = 0xffffffffffff;
- mc->agp_end = 0x0;
- mc->agp_size = 0;
-
- return;
- }
-
if (mc->fb_start > mc->gart_start) {
size_bf = (mc->fb_start & sixteen_gb_mask) -
ALIGN(mc->gart_end + 1, sixteen_gb);
@@ -250,9 +372,40 @@ void amdgpu_gmc_agp_location(struct amdgpu_device *adev, struct amdgpu_gmc *mc)
}
/**
+ * amdgpu_gmc_set_agp_default - Set the default AGP aperture value.
+ * @adev: amdgpu device structure holding all necessary information
+ * @mc: memory controller structure holding memory information
+ *
+ * To disable the AGP aperture, you need to set the start to a larger
+ * value than the end. This function sets the default value which
+ * can then be overridden using amdgpu_gmc_agp_location() if you want
+ * to enable the AGP aperture on a specific chip.
+ *
+ */
+void amdgpu_gmc_set_agp_default(struct amdgpu_device *adev,
+ struct amdgpu_gmc *mc)
+{
+ mc->agp_start = 0xffffffffffff;
+ mc->agp_end = 0;
+ mc->agp_size = 0;
+}
+
+/**
+ * amdgpu_gmc_fault_key - get hask key from vm fault address and pasid
+ *
+ * @addr: 48 bit physical address, page aligned (36 significant bits)
+ * @pasid: 16 bit process address space identifier
+ */
+static inline uint64_t amdgpu_gmc_fault_key(uint64_t addr, uint16_t pasid)
+{
+ return addr << 4 | pasid;
+}
+
+/**
* amdgpu_gmc_filter_faults - filter VM faults
*
* @adev: amdgpu device structure
+ * @ih: interrupt ring that the fault received from
* @addr: address of the VM fault
* @pasid: PASID of the process causing the fault
* @timestamp: timestamp of the fault
@@ -261,15 +414,19 @@ void amdgpu_gmc_agp_location(struct amdgpu_device *adev, struct amdgpu_gmc *mc)
* True if the fault was filtered and should not be processed further.
* False if the fault is a new one and needs to be handled.
*/
-bool amdgpu_gmc_filter_faults(struct amdgpu_device *adev, uint64_t addr,
+bool amdgpu_gmc_filter_faults(struct amdgpu_device *adev,
+ struct amdgpu_ih_ring *ih, uint64_t addr,
uint16_t pasid, uint64_t timestamp)
{
struct amdgpu_gmc *gmc = &adev->gmc;
-
- uint64_t stamp, key = addr << 4 | pasid;
+ uint64_t stamp, key = amdgpu_gmc_fault_key(addr, pasid);
struct amdgpu_gmc_fault *fault;
uint32_t hash;
+ /* Stale retry fault if timestamp goes backward */
+ if (amdgpu_ih_ts_after(timestamp, ih->processed_timestamp))
+ return true;
+
/* If we don't have space left in the ring buffer return immediately */
stamp = max(timestamp, AMDGPU_GMC_FAULT_TIMEOUT + 1) -
AMDGPU_GMC_FAULT_TIMEOUT;
@@ -282,8 +439,21 @@ bool amdgpu_gmc_filter_faults(struct amdgpu_device *adev, uint64_t addr,
while (fault->timestamp >= stamp) {
uint64_t tmp;
- if (fault->key == key)
- return true;
+ if (atomic64_read(&fault->key) == key) {
+ /*
+ * if we get a fault which is already present in
+ * the fault_ring and the timestamp of
+ * the fault is after the expired timestamp,
+ * then this is a new fault that needs to be added
+ * into the fault ring.
+ */
+ if (fault->timestamp_expiry != 0 &&
+ amdgpu_ih_ts_after(fault->timestamp_expiry,
+ timestamp))
+ break;
+ else
+ return true;
+ }
tmp = fault->timestamp;
fault = &gmc->fault_ring[fault->next];
@@ -295,7 +465,7 @@ bool amdgpu_gmc_filter_faults(struct amdgpu_device *adev, uint64_t addr,
/* Add the fault to the ring */
fault = &gmc->fault_ring[gmc->last_fault];
- fault->key = key;
+ atomic64_set(&fault->key, key);
fault->timestamp = timestamp;
/* And update the hash */
@@ -304,30 +474,104 @@ bool amdgpu_gmc_filter_faults(struct amdgpu_device *adev, uint64_t addr,
return false;
}
-int amdgpu_gmc_ras_late_init(struct amdgpu_device *adev)
+/**
+ * amdgpu_gmc_filter_faults_remove - remove address from VM faults filter
+ *
+ * @adev: amdgpu device structure
+ * @addr: address of the VM fault
+ * @pasid: PASID of the process causing the fault
+ *
+ * Remove the address from fault filter, then future vm fault on this address
+ * will pass to retry fault handler to recover.
+ */
+void amdgpu_gmc_filter_faults_remove(struct amdgpu_device *adev, uint64_t addr,
+ uint16_t pasid)
+{
+ struct amdgpu_gmc *gmc = &adev->gmc;
+ uint64_t key = amdgpu_gmc_fault_key(addr, pasid);
+ struct amdgpu_ih_ring *ih;
+ struct amdgpu_gmc_fault *fault;
+ uint32_t last_wptr;
+ uint64_t last_ts;
+ uint32_t hash;
+ uint64_t tmp;
+
+ if (adev->irq.retry_cam_enabled)
+ return;
+
+ ih = &adev->irq.ih1;
+ /* Get the WPTR of the last entry in IH ring */
+ last_wptr = amdgpu_ih_get_wptr(adev, ih);
+ /* Order wptr with ring data. */
+ rmb();
+ /* Get the timetamp of the last entry in IH ring */
+ last_ts = amdgpu_ih_decode_iv_ts(adev, ih, last_wptr, -1);
+
+ hash = hash_64(key, AMDGPU_GMC_FAULT_HASH_ORDER);
+ fault = &gmc->fault_ring[gmc->fault_hash[hash].idx];
+ do {
+ if (atomic64_read(&fault->key) == key) {
+ /*
+ * Update the timestamp when this fault
+ * expired.
+ */
+ fault->timestamp_expiry = last_ts;
+ break;
+ }
+
+ tmp = fault->timestamp;
+ fault = &gmc->fault_ring[fault->next];
+ } while (fault->timestamp < tmp);
+}
+
+int amdgpu_gmc_ras_sw_init(struct amdgpu_device *adev)
{
int r;
- if (adev->umc.funcs && adev->umc.funcs->ras_late_init) {
- r = adev->umc.funcs->ras_late_init(adev);
- if (r)
- return r;
- }
+ /* umc ras block */
+ r = amdgpu_umc_ras_sw_init(adev);
+ if (r)
+ return r;
- if (adev->mmhub.funcs && adev->mmhub.funcs->ras_late_init) {
- r = adev->mmhub.funcs->ras_late_init(adev);
- if (r)
- return r;
- }
+ /* mmhub ras block */
+ r = amdgpu_mmhub_ras_sw_init(adev);
+ if (r)
+ return r;
+
+ /* hdp ras block */
+ r = amdgpu_hdp_ras_sw_init(adev);
+ if (r)
+ return r;
+
+ /* mca.x ras block */
+ r = amdgpu_mca_mp0_ras_sw_init(adev);
+ if (r)
+ return r;
+
+ r = amdgpu_mca_mp1_ras_sw_init(adev);
+ if (r)
+ return r;
- return amdgpu_xgmi_ras_late_init(adev);
+ r = amdgpu_mca_mpio_ras_sw_init(adev);
+ if (r)
+ return r;
+
+ /* xgmi ras block */
+ r = amdgpu_xgmi_ras_sw_init(adev);
+ if (r)
+ return r;
+
+ return 0;
+}
+
+int amdgpu_gmc_ras_late_init(struct amdgpu_device *adev)
+{
+ return 0;
}
void amdgpu_gmc_ras_fini(struct amdgpu_device *adev)
{
- amdgpu_umc_ras_fini(adev);
- amdgpu_mmhub_ras_fini(adev);
- amdgpu_xgmi_ras_fini(adev);
+
}
/*
@@ -337,23 +581,42 @@ void amdgpu_gmc_ras_fini(struct amdgpu_device *adev)
* subject to change when ring number changes
* Engine 17: Gart flushes
*/
-#define GFXHUB_FREE_VM_INV_ENGS_BITMAP 0x1FFF3
-#define MMHUB_FREE_VM_INV_ENGS_BITMAP 0x1FFF3
+#define AMDGPU_VMHUB_INV_ENG_BITMAP 0x1FFF3
int amdgpu_gmc_allocate_vm_inv_eng(struct amdgpu_device *adev)
{
struct amdgpu_ring *ring;
- unsigned vm_inv_engs[AMDGPU_MAX_VMHUBS] =
- {GFXHUB_FREE_VM_INV_ENGS_BITMAP, MMHUB_FREE_VM_INV_ENGS_BITMAP,
- GFXHUB_FREE_VM_INV_ENGS_BITMAP};
+ unsigned vm_inv_engs[AMDGPU_MAX_VMHUBS] = {0};
unsigned i;
unsigned vmhub, inv_eng;
+ struct amdgpu_ring *shared_ring;
+
+ /* init the vm inv eng for all vmhubs */
+ for_each_set_bit(i, adev->vmhubs_mask, AMDGPU_MAX_VMHUBS) {
+ vm_inv_engs[i] = AMDGPU_VMHUB_INV_ENG_BITMAP;
+ /* reserve engine 5 for firmware */
+ if (adev->enable_mes)
+ vm_inv_engs[i] &= ~(1 << 5);
+ /* reserve engine 6 for uni mes */
+ if (adev->enable_uni_mes)
+ vm_inv_engs[i] &= ~(1 << 6);
+ /* reserve mmhub engine 3 for firmware */
+ if (adev->enable_umsch_mm)
+ vm_inv_engs[i] &= ~(1 << 3);
+ }
for (i = 0; i < adev->num_rings; ++i) {
ring = adev->rings[i];
- vmhub = ring->funcs->vmhub;
+ vmhub = ring->vm_hub;
- if (ring == &adev->mes.ring)
+ if (ring == &adev->mes.ring[0] ||
+ ring == &adev->mes.ring[1] ||
+ ring == &adev->umsch_mm.ring ||
+ ring == &adev->cper.ring_buf)
+ continue;
+
+ /* Skip if the ring is a shared ring */
+ if (amdgpu_sdma_is_shared_inv_eng(adev, ring))
continue;
inv_eng = ffs(vm_inv_engs[vmhub]);
@@ -367,14 +630,232 @@ int amdgpu_gmc_allocate_vm_inv_eng(struct amdgpu_device *adev)
vm_inv_engs[vmhub] &= ~(1 << ring->vm_inv_eng);
dev_info(adev->dev, "ring %s uses VM inv eng %u on hub %u\n",
- ring->name, ring->vm_inv_eng, ring->funcs->vmhub);
+ ring->name, ring->vm_inv_eng, ring->vm_hub);
+ /* SDMA has a special packet which allows it to use the same
+ * invalidation engine for all the rings in one instance.
+ * Therefore, we do not allocate a separate VM invalidation engine
+ * for SDMA page rings. Instead, they share the VM invalidation
+ * engine with the SDMA gfx ring. This change ensures efficient
+ * resource management and avoids the issue of insufficient VM
+ * invalidation engines.
+ */
+ shared_ring = amdgpu_sdma_get_shared_ring(adev, ring);
+ if (shared_ring) {
+ shared_ring->vm_inv_eng = ring->vm_inv_eng;
+ dev_info(adev->dev, "ring %s shares VM invalidation engine %u with ring %s on hub %u\n",
+ ring->name, ring->vm_inv_eng, shared_ring->name, ring->vm_hub);
+ continue;
+ }
}
return 0;
}
+void amdgpu_gmc_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid,
+ uint32_t vmhub, uint32_t flush_type)
+{
+ struct amdgpu_ring *ring = adev->mman.buffer_funcs_ring;
+ struct amdgpu_vmhub *hub = &adev->vmhub[vmhub];
+ struct dma_fence *fence;
+ struct amdgpu_job *job;
+ int r;
+
+ if (!hub->sdma_invalidation_workaround || vmid ||
+ !adev->mman.buffer_funcs_enabled || !adev->ib_pool_ready ||
+ !ring->sched.ready) {
+ /*
+ * A GPU reset should flush all TLBs anyway, so no need to do
+ * this while one is ongoing.
+ */
+ if (!down_read_trylock(&adev->reset_domain->sem))
+ return;
+
+ if (adev->gmc.flush_tlb_needs_extra_type_2)
+ adev->gmc.gmc_funcs->flush_gpu_tlb(adev, vmid,
+ vmhub, 2);
+
+ if (adev->gmc.flush_tlb_needs_extra_type_0 && flush_type == 2)
+ adev->gmc.gmc_funcs->flush_gpu_tlb(adev, vmid,
+ vmhub, 0);
+
+ adev->gmc.gmc_funcs->flush_gpu_tlb(adev, vmid, vmhub,
+ flush_type);
+ up_read(&adev->reset_domain->sem);
+ return;
+ }
+
+ /* The SDMA on Navi 1x has a bug which can theoretically result in memory
+ * corruption if an invalidation happens at the same time as an VA
+ * translation. Avoid this by doing the invalidation from the SDMA
+ * itself at least for GART.
+ */
+ mutex_lock(&adev->mman.gtt_window_lock);
+ r = amdgpu_job_alloc_with_ib(ring->adev, &adev->mman.high_pr,
+ AMDGPU_FENCE_OWNER_UNDEFINED,
+ 16 * 4, AMDGPU_IB_POOL_IMMEDIATE,
+ &job, AMDGPU_KERNEL_JOB_ID_FLUSH_GPU_TLB);
+ if (r)
+ goto error_alloc;
+
+ job->vm_pd_addr = amdgpu_gmc_pd_addr(adev->gart.bo);
+ job->vm_needs_flush = true;
+ job->ibs->ptr[job->ibs->length_dw++] = ring->funcs->nop;
+ amdgpu_ring_pad_ib(ring, &job->ibs[0]);
+ fence = amdgpu_job_submit(job);
+ mutex_unlock(&adev->mman.gtt_window_lock);
+
+ dma_fence_wait(fence, false);
+ dma_fence_put(fence);
+
+ return;
+
+error_alloc:
+ mutex_unlock(&adev->mman.gtt_window_lock);
+ dev_err(adev->dev, "Error flushing GPU TLB using the SDMA (%d)!\n", r);
+}
+
+int amdgpu_gmc_flush_gpu_tlb_pasid(struct amdgpu_device *adev, uint16_t pasid,
+ uint32_t flush_type, bool all_hub,
+ uint32_t inst)
+{
+ struct amdgpu_ring *ring = &adev->gfx.kiq[inst].ring;
+ struct amdgpu_kiq *kiq = &adev->gfx.kiq[inst];
+ unsigned int ndw;
+ int r, cnt = 0;
+ uint32_t seq;
+
+ /*
+ * A GPU reset should flush all TLBs anyway, so no need to do
+ * this while one is ongoing.
+ */
+ if (!down_read_trylock(&adev->reset_domain->sem))
+ return 0;
+
+ if (!adev->gmc.flush_pasid_uses_kiq || !ring->sched.ready) {
+ if (adev->gmc.flush_tlb_needs_extra_type_2)
+ adev->gmc.gmc_funcs->flush_gpu_tlb_pasid(adev, pasid,
+ 2, all_hub,
+ inst);
+
+ if (adev->gmc.flush_tlb_needs_extra_type_0 && flush_type == 2)
+ adev->gmc.gmc_funcs->flush_gpu_tlb_pasid(adev, pasid,
+ 0, all_hub,
+ inst);
+
+ adev->gmc.gmc_funcs->flush_gpu_tlb_pasid(adev, pasid,
+ flush_type, all_hub,
+ inst);
+ r = 0;
+ } else {
+ /* 2 dwords flush + 8 dwords fence */
+ ndw = kiq->pmf->invalidate_tlbs_size + 8;
+
+ if (adev->gmc.flush_tlb_needs_extra_type_2)
+ ndw += kiq->pmf->invalidate_tlbs_size;
+
+ if (adev->gmc.flush_tlb_needs_extra_type_0)
+ ndw += kiq->pmf->invalidate_tlbs_size;
+
+ spin_lock(&adev->gfx.kiq[inst].ring_lock);
+ r = amdgpu_ring_alloc(ring, ndw);
+ if (r) {
+ spin_unlock(&adev->gfx.kiq[inst].ring_lock);
+ goto error_unlock_reset;
+ }
+ if (adev->gmc.flush_tlb_needs_extra_type_2)
+ kiq->pmf->kiq_invalidate_tlbs(ring, pasid, 2, all_hub);
+
+ if (flush_type == 2 && adev->gmc.flush_tlb_needs_extra_type_0)
+ kiq->pmf->kiq_invalidate_tlbs(ring, pasid, 0, all_hub);
+
+ kiq->pmf->kiq_invalidate_tlbs(ring, pasid, flush_type, all_hub);
+ r = amdgpu_fence_emit_polling(ring, &seq, MAX_KIQ_REG_WAIT);
+ if (r) {
+ amdgpu_ring_undo(ring);
+ spin_unlock(&adev->gfx.kiq[inst].ring_lock);
+ goto error_unlock_reset;
+ }
+
+ amdgpu_ring_commit(ring);
+ spin_unlock(&adev->gfx.kiq[inst].ring_lock);
+
+ r = amdgpu_fence_wait_polling(ring, seq, MAX_KIQ_REG_WAIT);
+
+ might_sleep();
+ while (r < 1 && cnt++ < MAX_KIQ_REG_TRY &&
+ !amdgpu_reset_pending(adev->reset_domain)) {
+ msleep(MAX_KIQ_REG_BAILOUT_INTERVAL);
+ r = amdgpu_fence_wait_polling(ring, seq, MAX_KIQ_REG_WAIT);
+ }
+
+ if (cnt > MAX_KIQ_REG_TRY) {
+ dev_err(adev->dev, "timeout waiting for kiq fence\n");
+ r = -ETIME;
+ } else
+ r = 0;
+ }
+
+error_unlock_reset:
+ up_read(&adev->reset_domain->sem);
+ return r;
+}
+
+void amdgpu_gmc_fw_reg_write_reg_wait(struct amdgpu_device *adev,
+ uint32_t reg0, uint32_t reg1,
+ uint32_t ref, uint32_t mask,
+ uint32_t xcc_inst)
+{
+ struct amdgpu_kiq *kiq = &adev->gfx.kiq[xcc_inst];
+ struct amdgpu_ring *ring = &kiq->ring;
+ signed long r, cnt = 0;
+ unsigned long flags;
+ uint32_t seq;
+
+ if (adev->mes.ring[0].sched.ready) {
+ amdgpu_mes_reg_write_reg_wait(adev, reg0, reg1,
+ ref, mask);
+ return;
+ }
+
+ spin_lock_irqsave(&kiq->ring_lock, flags);
+ amdgpu_ring_alloc(ring, 32);
+ amdgpu_ring_emit_reg_write_reg_wait(ring, reg0, reg1,
+ ref, mask);
+ r = amdgpu_fence_emit_polling(ring, &seq, MAX_KIQ_REG_WAIT);
+ if (r)
+ goto failed_undo;
+
+ amdgpu_ring_commit(ring);
+ spin_unlock_irqrestore(&kiq->ring_lock, flags);
+
+ r = amdgpu_fence_wait_polling(ring, seq, MAX_KIQ_REG_WAIT);
+
+ /* don't wait anymore for IRQ context */
+ if (r < 1 && in_interrupt())
+ goto failed_kiq;
+
+ might_sleep();
+ while (r < 1 && cnt++ < MAX_KIQ_REG_TRY &&
+ !amdgpu_reset_pending(adev->reset_domain)) {
+
+ msleep(MAX_KIQ_REG_BAILOUT_INTERVAL);
+ r = amdgpu_fence_wait_polling(ring, seq, MAX_KIQ_REG_WAIT);
+ }
+
+ if (cnt > MAX_KIQ_REG_TRY)
+ goto failed_kiq;
+
+ return;
+
+failed_undo:
+ amdgpu_ring_undo(ring);
+ spin_unlock_irqrestore(&kiq->ring_lock, flags);
+failed_kiq:
+ dev_err(adev->dev, "failed to write reg %x wait reg %x\n", reg0, reg1);
+}
+
/**
- * amdgpu_tmz_set -- check and set if a device supports TMZ
+ * amdgpu_gmc_tmz_set -- check and set if a device supports TMZ
* @adev: amdgpu_device pointer
*
* Check and set if an the device @adev supports Trusted Memory
@@ -382,13 +863,44 @@ int amdgpu_gmc_allocate_vm_inv_eng(struct amdgpu_device *adev)
*/
void amdgpu_gmc_tmz_set(struct amdgpu_device *adev)
{
- switch (adev->asic_type) {
- case CHIP_RAVEN:
- case CHIP_RENOIR:
- case CHIP_NAVI10:
- case CHIP_NAVI14:
- case CHIP_NAVI12:
- case CHIP_VANGOGH:
+ switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
+ /* RAVEN */
+ case IP_VERSION(9, 2, 2):
+ case IP_VERSION(9, 1, 0):
+ /* RENOIR looks like RAVEN */
+ case IP_VERSION(9, 3, 0):
+ /* GC 10.3.7 */
+ case IP_VERSION(10, 3, 7):
+ /* GC 11.0.1 */
+ case IP_VERSION(11, 0, 1):
+ if (amdgpu_tmz == 0) {
+ adev->gmc.tmz_enabled = false;
+ dev_info(adev->dev,
+ "Trusted Memory Zone (TMZ) feature disabled (cmd line)\n");
+ } else {
+ adev->gmc.tmz_enabled = true;
+ dev_info(adev->dev,
+ "Trusted Memory Zone (TMZ) feature enabled\n");
+ }
+ break;
+ case IP_VERSION(10, 1, 10):
+ case IP_VERSION(10, 1, 1):
+ case IP_VERSION(10, 1, 2):
+ case IP_VERSION(10, 1, 3):
+ case IP_VERSION(10, 3, 0):
+ case IP_VERSION(10, 3, 2):
+ case IP_VERSION(10, 3, 4):
+ case IP_VERSION(10, 3, 5):
+ case IP_VERSION(10, 3, 6):
+ /* VANGOGH */
+ case IP_VERSION(10, 3, 1):
+ /* YELLOW_CARP*/
+ case IP_VERSION(10, 3, 3):
+ case IP_VERSION(11, 0, 4):
+ case IP_VERSION(11, 5, 0):
+ case IP_VERSION(11, 5, 1):
+ case IP_VERSION(11, 5, 2):
+ case IP_VERSION(11, 5, 3):
/* Don't enable it by default yet.
*/
if (amdgpu_tmz < 1) {
@@ -403,14 +915,14 @@ void amdgpu_gmc_tmz_set(struct amdgpu_device *adev)
break;
default:
adev->gmc.tmz_enabled = false;
- dev_warn(adev->dev,
+ dev_info(adev->dev,
"Trusted Memory Zone (TMZ) feature not supported\n");
break;
}
}
/**
- * amdgpu_noretry_set -- set per asic noretry defaults
+ * amdgpu_gmc_noretry_set -- set per asic noretry defaults
* @adev: amdgpu_device pointer
*
* Set a per asic default for the no-retry parameter.
@@ -419,38 +931,20 @@ void amdgpu_gmc_tmz_set(struct amdgpu_device *adev)
void amdgpu_gmc_noretry_set(struct amdgpu_device *adev)
{
struct amdgpu_gmc *gmc = &adev->gmc;
+ uint32_t gc_ver = amdgpu_ip_version(adev, GC_HWIP, 0);
+ bool noretry_default = (gc_ver == IP_VERSION(9, 0, 1) ||
+ gc_ver == IP_VERSION(9, 4, 0) ||
+ gc_ver == IP_VERSION(9, 4, 1) ||
+ gc_ver == IP_VERSION(9, 4, 2) ||
+ gc_ver == IP_VERSION(9, 4, 3) ||
+ gc_ver == IP_VERSION(9, 4, 4) ||
+ gc_ver == IP_VERSION(9, 5, 0) ||
+ gc_ver >= IP_VERSION(10, 3, 0));
- switch (adev->asic_type) {
- case CHIP_VEGA10:
- case CHIP_VEGA20:
- /*
- * noretry = 0 will cause kfd page fault tests fail
- * for some ASICs, so set default to 1 for these ASICs.
- */
- if (amdgpu_noretry == -1)
- gmc->noretry = 1;
- else
- gmc->noretry = amdgpu_noretry;
- break;
- case CHIP_RAVEN:
- default:
- /* Raven currently has issues with noretry
- * regardless of what we decide for other
- * asics, we should leave raven with
- * noretry = 0 until we root cause the
- * issues.
- *
- * default this to 0 for now, but we may want
- * to change this in the future for certain
- * GPUs as it can increase performance in
- * certain cases.
- */
- if (amdgpu_noretry == -1)
- gmc->noretry = 0;
- else
- gmc->noretry = amdgpu_noretry;
- break;
- }
+ if (!amdgpu_sriov_xnack_support(adev))
+ gmc->noretry = 1;
+ else
+ gmc->noretry = (amdgpu_noretry == -1) ? noretry_default : amdgpu_noretry;
}
void amdgpu_gmc_set_vm_fault_masks(struct amdgpu_device *adev, int hub_type,
@@ -463,13 +957,18 @@ void amdgpu_gmc_set_vm_fault_masks(struct amdgpu_device *adev, int hub_type,
for (i = 0; i < 16; i++) {
reg = hub->vm_context0_cntl + hub->ctx_distance * i;
- tmp = RREG32(reg);
+ tmp = (hub_type == AMDGPU_GFXHUB(0)) ?
+ RREG32_SOC15_IP(GC, reg) :
+ RREG32_SOC15_IP(MMHUB, reg);
+
if (enable)
tmp |= hub->vm_cntx_cntl_vm_fault;
else
tmp &= ~hub->vm_cntx_cntl_vm_fault;
- WREG32(reg, tmp);
+ (hub_type == AMDGPU_GFXHUB(0)) ?
+ WREG32_SOC15_IP(GC, reg, tmp) :
+ WREG32_SOC15_IP(MMHUB, reg, tmp);
}
}
@@ -478,6 +977,13 @@ void amdgpu_gmc_get_vbios_allocations(struct amdgpu_device *adev)
unsigned size;
/*
+ * Some ASICs need to reserve a region of video memory to avoid access
+ * from driver
+ */
+ adev->mman.stolen_reserved_offset = 0;
+ adev->mman.stolen_reserved_size = 0;
+
+ /*
* TODO:
* Currently there is a bug where some memory client outside
* of the driver writes to first 8M of VRAM on S3 resume,
@@ -487,6 +993,17 @@ void amdgpu_gmc_get_vbios_allocations(struct amdgpu_device *adev)
*/
switch (adev->asic_type) {
case CHIP_VEGA10:
+ adev->mman.keep_stolen_vga_memory = true;
+ /*
+ * VEGA10 SRIOV VF with MS_HYPERV host needs some firmware reserved area.
+ */
+#ifdef CONFIG_X86
+ if (amdgpu_sriov_vf(adev) && hypervisor_is_type(X86_HYPER_MS_HYPERV)) {
+ adev->mman.stolen_reserved_offset = 0x500000;
+ adev->mman.stolen_reserved_size = 0x200000;
+ }
+#endif
+ break;
case CHIP_RAVEN:
case CHIP_RENOIR:
adev->mman.keep_stolen_vga_memory = true;
@@ -497,7 +1014,7 @@ void amdgpu_gmc_get_vbios_allocations(struct amdgpu_device *adev)
}
if (amdgpu_sriov_vf(adev) ||
- !amdgpu_device_ip_get_ip_block(adev, AMD_IP_BLOCK_TYPE_DCE)) {
+ !amdgpu_device_has_display_hardware(adev)) {
size = 0;
} else {
size = amdgpu_gmc_get_vbios_fb_size(adev);
@@ -518,3 +1035,648 @@ void amdgpu_gmc_get_vbios_allocations(struct amdgpu_device *adev)
adev->mman.stolen_extended_size = 0;
}
}
+
+/**
+ * amdgpu_gmc_init_pdb0 - initialize PDB0
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * This function is only used when GART page table is used
+ * for FB address translatioin. In such a case, we construct
+ * a 2-level system VM page table: PDB0->PTB, to cover both
+ * VRAM of the hive and system memory.
+ *
+ * PDB0 is static, initialized once on driver initialization.
+ * The first n entries of PDB0 are used as PTE by setting
+ * P bit to 1, pointing to VRAM. The n+1'th entry points
+ * to a big PTB covering system memory.
+ *
+ */
+void amdgpu_gmc_init_pdb0(struct amdgpu_device *adev)
+{
+ int i;
+ uint64_t flags = adev->gart.gart_pte_flags; //TODO it is UC. explore NC/RW?
+ /* Each PDE0 (used as PTE) covers (2^vmid0_page_table_block_size)*2M
+ */
+ u64 vram_size = adev->gmc.xgmi.node_segment_size * adev->gmc.xgmi.num_physical_nodes;
+ u64 pde0_page_size = (1ULL<<adev->gmc.vmid0_page_table_block_size)<<21;
+ u64 vram_addr, vram_end;
+ u64 gart_ptb_gpu_pa = amdgpu_gmc_vram_pa(adev, adev->gart.bo);
+ int idx;
+
+ if (!drm_dev_enter(adev_to_drm(adev), &idx))
+ return;
+
+ flags |= AMDGPU_PTE_VALID | AMDGPU_PTE_READABLE;
+ flags |= AMDGPU_PTE_WRITEABLE;
+ flags |= AMDGPU_PTE_SNOOPED;
+ flags |= AMDGPU_PTE_FRAG((adev->gmc.vmid0_page_table_block_size + 9*1));
+ flags |= AMDGPU_PDE_PTE_FLAG(adev);
+
+ vram_addr = adev->vm_manager.vram_base_offset;
+ if (!amdgpu_virt_xgmi_migrate_enabled(adev))
+ vram_addr -= adev->gmc.xgmi.physical_node_id * adev->gmc.xgmi.node_segment_size;
+ vram_end = vram_addr + vram_size;
+
+ /* The first n PDE0 entries are used as PTE,
+ * pointing to vram
+ */
+ for (i = 0; vram_addr < vram_end; i++, vram_addr += pde0_page_size)
+ amdgpu_gmc_set_pte_pde(adev, adev->gmc.ptr_pdb0, i, vram_addr, flags);
+
+ /* The n+1'th PDE0 entry points to a huge
+ * PTB who has more than 512 entries each
+ * pointing to a 4K system page
+ */
+ flags = AMDGPU_PTE_VALID;
+ flags |= AMDGPU_PTE_SNOOPED | AMDGPU_PDE_BFS_FLAG(adev, 0);
+ /* Requires gart_ptb_gpu_pa to be 4K aligned */
+ amdgpu_gmc_set_pte_pde(adev, adev->gmc.ptr_pdb0, i, gart_ptb_gpu_pa, flags);
+ drm_dev_exit(idx);
+}
+
+/**
+ * amdgpu_gmc_vram_mc2pa - calculate vram buffer's physical address from MC
+ * address
+ *
+ * @adev: amdgpu_device pointer
+ * @mc_addr: MC address of buffer
+ */
+uint64_t amdgpu_gmc_vram_mc2pa(struct amdgpu_device *adev, uint64_t mc_addr)
+{
+ return mc_addr - adev->gmc.vram_start + adev->vm_manager.vram_base_offset;
+}
+
+/**
+ * amdgpu_gmc_vram_pa - calculate vram buffer object's physical address from
+ * GPU's view
+ *
+ * @adev: amdgpu_device pointer
+ * @bo: amdgpu buffer object
+ */
+uint64_t amdgpu_gmc_vram_pa(struct amdgpu_device *adev, struct amdgpu_bo *bo)
+{
+ return amdgpu_gmc_vram_mc2pa(adev, amdgpu_bo_gpu_offset(bo));
+}
+
+int amdgpu_gmc_vram_checking(struct amdgpu_device *adev)
+{
+ struct amdgpu_bo *vram_bo = NULL;
+ uint64_t vram_gpu = 0;
+ void *vram_ptr = NULL;
+
+ int ret, size = 0x100000;
+ uint8_t cptr[10];
+
+ ret = amdgpu_bo_create_kernel(adev, size, PAGE_SIZE,
+ AMDGPU_GEM_DOMAIN_VRAM,
+ &vram_bo,
+ &vram_gpu,
+ &vram_ptr);
+ if (ret)
+ return ret;
+
+ memset(vram_ptr, 0x86, size);
+ memset(cptr, 0x86, 10);
+
+ /**
+ * Check the start, the mid, and the end of the memory if the content of
+ * each byte is the pattern "0x86". If yes, we suppose the vram bo is
+ * workable.
+ *
+ * Note: If check the each byte of whole 1M bo, it will cost too many
+ * seconds, so here, we just pick up three parts for emulation.
+ */
+ ret = memcmp(vram_ptr, cptr, 10);
+ if (ret) {
+ ret = -EIO;
+ goto release_buffer;
+ }
+
+ ret = memcmp(vram_ptr + (size / 2), cptr, 10);
+ if (ret) {
+ ret = -EIO;
+ goto release_buffer;
+ }
+
+ ret = memcmp(vram_ptr + size - 10, cptr, 10);
+ if (ret) {
+ ret = -EIO;
+ goto release_buffer;
+ }
+
+release_buffer:
+ amdgpu_bo_free_kernel(&vram_bo, &vram_gpu,
+ &vram_ptr);
+
+ return ret;
+}
+
+static const char *nps_desc[] = {
+ [AMDGPU_NPS1_PARTITION_MODE] = "NPS1",
+ [AMDGPU_NPS2_PARTITION_MODE] = "NPS2",
+ [AMDGPU_NPS3_PARTITION_MODE] = "NPS3",
+ [AMDGPU_NPS4_PARTITION_MODE] = "NPS4",
+ [AMDGPU_NPS6_PARTITION_MODE] = "NPS6",
+ [AMDGPU_NPS8_PARTITION_MODE] = "NPS8",
+};
+
+static ssize_t available_memory_partition_show(struct device *dev,
+ struct device_attribute *addr,
+ char *buf)
+{
+ struct drm_device *ddev = dev_get_drvdata(dev);
+ struct amdgpu_device *adev = drm_to_adev(ddev);
+ int size = 0, mode;
+ char *sep = "";
+
+ for_each_inst(mode, adev->gmc.supported_nps_modes) {
+ size += sysfs_emit_at(buf, size, "%s%s", sep, nps_desc[mode]);
+ sep = ", ";
+ }
+ size += sysfs_emit_at(buf, size, "\n");
+
+ return size;
+}
+
+static ssize_t current_memory_partition_store(struct device *dev,
+ struct device_attribute *attr,
+ const char *buf, size_t count)
+{
+ struct drm_device *ddev = dev_get_drvdata(dev);
+ struct amdgpu_device *adev = drm_to_adev(ddev);
+ enum amdgpu_memory_partition mode;
+ struct amdgpu_hive_info *hive;
+ int i;
+
+ mode = UNKNOWN_MEMORY_PARTITION_MODE;
+ for_each_inst(i, adev->gmc.supported_nps_modes) {
+ if (!strncasecmp(nps_desc[i], buf, strlen(nps_desc[i]))) {
+ mode = i;
+ break;
+ }
+ }
+
+ if (mode == UNKNOWN_MEMORY_PARTITION_MODE)
+ return -EINVAL;
+
+ if (mode == adev->gmc.gmc_funcs->query_mem_partition_mode(adev)) {
+ dev_info(
+ adev->dev,
+ "requested NPS mode is same as current NPS mode, skipping\n");
+ return count;
+ }
+
+ /* If device is part of hive, all devices in the hive should request the
+ * same mode. Hence store the requested mode in hive.
+ */
+ hive = amdgpu_get_xgmi_hive(adev);
+ if (hive) {
+ atomic_set(&hive->requested_nps_mode, mode);
+ amdgpu_put_xgmi_hive(hive);
+ } else {
+ adev->gmc.requested_nps_mode = mode;
+ }
+
+ dev_info(
+ adev->dev,
+ "NPS mode change requested, please remove and reload the driver\n");
+
+ return count;
+}
+
+static ssize_t current_memory_partition_show(
+ struct device *dev, struct device_attribute *addr, char *buf)
+{
+ struct drm_device *ddev = dev_get_drvdata(dev);
+ struct amdgpu_device *adev = drm_to_adev(ddev);
+ enum amdgpu_memory_partition mode;
+
+ /* Only minimal precaution taken to reject requests while in reset */
+ if (amdgpu_in_reset(adev))
+ return -EPERM;
+
+ mode = adev->gmc.gmc_funcs->query_mem_partition_mode(adev);
+ if ((mode >= ARRAY_SIZE(nps_desc)) ||
+ (BIT(mode) & AMDGPU_ALL_NPS_MASK) != BIT(mode))
+ return sysfs_emit(buf, "UNKNOWN\n");
+
+ return sysfs_emit(buf, "%s\n", nps_desc[mode]);
+}
+
+static DEVICE_ATTR_RW(current_memory_partition);
+static DEVICE_ATTR_RO(available_memory_partition);
+
+int amdgpu_gmc_sysfs_init(struct amdgpu_device *adev)
+{
+ bool nps_switch_support;
+ int r = 0;
+
+ if (!adev->gmc.gmc_funcs->query_mem_partition_mode)
+ return 0;
+
+ nps_switch_support = (hweight32(adev->gmc.supported_nps_modes &
+ AMDGPU_ALL_NPS_MASK) > 1);
+ if (!nps_switch_support)
+ dev_attr_current_memory_partition.attr.mode &=
+ ~(S_IWUSR | S_IWGRP | S_IWOTH);
+ else
+ r = device_create_file(adev->dev,
+ &dev_attr_available_memory_partition);
+
+ if (r)
+ return r;
+
+ return device_create_file(adev->dev,
+ &dev_attr_current_memory_partition);
+}
+
+void amdgpu_gmc_sysfs_fini(struct amdgpu_device *adev)
+{
+ if (!adev->gmc.gmc_funcs->query_mem_partition_mode)
+ return;
+
+ device_remove_file(adev->dev, &dev_attr_current_memory_partition);
+ device_remove_file(adev->dev, &dev_attr_available_memory_partition);
+}
+
+int amdgpu_gmc_get_nps_memranges(struct amdgpu_device *adev,
+ struct amdgpu_mem_partition_info *mem_ranges,
+ uint8_t *exp_ranges)
+{
+ struct amdgpu_gmc_memrange *ranges;
+ int range_cnt, ret, i, j;
+ uint32_t nps_type;
+ bool refresh;
+
+ if (!mem_ranges || !exp_ranges)
+ return -EINVAL;
+
+ refresh = (adev->init_lvl->level != AMDGPU_INIT_LEVEL_MINIMAL_XGMI) &&
+ (adev->gmc.reset_flags & AMDGPU_GMC_INIT_RESET_NPS);
+ ret = amdgpu_discovery_get_nps_info(adev, &nps_type, &ranges,
+ &range_cnt, refresh);
+
+ if (ret)
+ return ret;
+
+ /* TODO: For now, expect ranges and partition count to be the same.
+ * Adjust if there are holes expected in any NPS domain.
+ */
+ if (*exp_ranges && (range_cnt != *exp_ranges)) {
+ dev_warn(
+ adev->dev,
+ "NPS config mismatch - expected ranges: %d discovery - nps mode: %d, nps ranges: %d",
+ *exp_ranges, nps_type, range_cnt);
+ ret = -EINVAL;
+ goto err;
+ }
+
+ for (i = 0; i < range_cnt; ++i) {
+ if (ranges[i].base_address >= ranges[i].limit_address) {
+ dev_warn(
+ adev->dev,
+ "Invalid NPS range - nps mode: %d, range[%d]: base: %llx limit: %llx",
+ nps_type, i, ranges[i].base_address,
+ ranges[i].limit_address);
+ ret = -EINVAL;
+ goto err;
+ }
+
+ /* Check for overlaps, not expecting any now */
+ for (j = i - 1; j >= 0; j--) {
+ if (max(ranges[j].base_address,
+ ranges[i].base_address) <=
+ min(ranges[j].limit_address,
+ ranges[i].limit_address)) {
+ dev_warn(
+ adev->dev,
+ "overlapping ranges detected [ %llx - %llx ] | [%llx - %llx]",
+ ranges[j].base_address,
+ ranges[j].limit_address,
+ ranges[i].base_address,
+ ranges[i].limit_address);
+ ret = -EINVAL;
+ goto err;
+ }
+ }
+
+ mem_ranges[i].range.fpfn =
+ (ranges[i].base_address -
+ adev->vm_manager.vram_base_offset) >>
+ AMDGPU_GPU_PAGE_SHIFT;
+ mem_ranges[i].range.lpfn =
+ (ranges[i].limit_address -
+ adev->vm_manager.vram_base_offset) >>
+ AMDGPU_GPU_PAGE_SHIFT;
+ mem_ranges[i].size =
+ ranges[i].limit_address - ranges[i].base_address + 1;
+ }
+
+ if (!*exp_ranges)
+ *exp_ranges = range_cnt;
+err:
+ kfree(ranges);
+
+ return ret;
+}
+
+int amdgpu_gmc_request_memory_partition(struct amdgpu_device *adev,
+ int nps_mode)
+{
+ /* Not supported on VF devices and APUs */
+ if (amdgpu_sriov_vf(adev) || (adev->flags & AMD_IS_APU))
+ return -EOPNOTSUPP;
+
+ if (!adev->psp.funcs) {
+ dev_err(adev->dev,
+ "PSP interface not available for nps mode change request");
+ return -EINVAL;
+ }
+
+ return psp_memory_partition(&adev->psp, nps_mode);
+}
+
+static inline bool amdgpu_gmc_need_nps_switch_req(struct amdgpu_device *adev,
+ int req_nps_mode,
+ int cur_nps_mode)
+{
+ return (((BIT(req_nps_mode) & adev->gmc.supported_nps_modes) ==
+ BIT(req_nps_mode)) &&
+ req_nps_mode != cur_nps_mode);
+}
+
+void amdgpu_gmc_prepare_nps_mode_change(struct amdgpu_device *adev)
+{
+ int req_nps_mode, cur_nps_mode, r;
+ struct amdgpu_hive_info *hive;
+
+ if (amdgpu_sriov_vf(adev) || !adev->gmc.supported_nps_modes ||
+ !adev->gmc.gmc_funcs->request_mem_partition_mode)
+ return;
+
+ cur_nps_mode = adev->gmc.gmc_funcs->query_mem_partition_mode(adev);
+ hive = amdgpu_get_xgmi_hive(adev);
+ if (hive) {
+ req_nps_mode = atomic_read(&hive->requested_nps_mode);
+ if (!amdgpu_gmc_need_nps_switch_req(adev, req_nps_mode,
+ cur_nps_mode)) {
+ amdgpu_put_xgmi_hive(hive);
+ return;
+ }
+ r = amdgpu_xgmi_request_nps_change(adev, hive, req_nps_mode);
+ amdgpu_put_xgmi_hive(hive);
+ goto out;
+ }
+
+ req_nps_mode = adev->gmc.requested_nps_mode;
+ if (!amdgpu_gmc_need_nps_switch_req(adev, req_nps_mode, cur_nps_mode))
+ return;
+
+ /* even if this fails, we should let driver unload w/o blocking */
+ r = adev->gmc.gmc_funcs->request_mem_partition_mode(adev, req_nps_mode);
+out:
+ if (r)
+ dev_err(adev->dev, "NPS mode change request failed\n");
+ else
+ dev_info(
+ adev->dev,
+ "NPS mode change request done, reload driver to complete the change\n");
+}
+
+bool amdgpu_gmc_need_reset_on_init(struct amdgpu_device *adev)
+{
+ if (adev->gmc.gmc_funcs->need_reset_on_init)
+ return adev->gmc.gmc_funcs->need_reset_on_init(adev);
+
+ return false;
+}
+
+enum amdgpu_memory_partition
+amdgpu_gmc_get_vf_memory_partition(struct amdgpu_device *adev)
+{
+ switch (adev->gmc.num_mem_partitions) {
+ case 0:
+ return UNKNOWN_MEMORY_PARTITION_MODE;
+ case 1:
+ return AMDGPU_NPS1_PARTITION_MODE;
+ case 2:
+ return AMDGPU_NPS2_PARTITION_MODE;
+ case 4:
+ return AMDGPU_NPS4_PARTITION_MODE;
+ case 8:
+ return AMDGPU_NPS8_PARTITION_MODE;
+ default:
+ return AMDGPU_NPS1_PARTITION_MODE;
+ }
+}
+
+enum amdgpu_memory_partition
+amdgpu_gmc_get_memory_partition(struct amdgpu_device *adev, u32 *supp_modes)
+{
+ enum amdgpu_memory_partition mode = UNKNOWN_MEMORY_PARTITION_MODE;
+
+ if (adev->nbio.funcs &&
+ adev->nbio.funcs->get_memory_partition_mode)
+ mode = adev->nbio.funcs->get_memory_partition_mode(adev,
+ supp_modes);
+ else
+ dev_warn(adev->dev, "memory partition mode query is not supported\n");
+
+ return mode;
+}
+
+enum amdgpu_memory_partition
+amdgpu_gmc_query_memory_partition(struct amdgpu_device *adev)
+{
+ if (amdgpu_sriov_vf(adev))
+ return amdgpu_gmc_get_vf_memory_partition(adev);
+ else
+ return amdgpu_gmc_get_memory_partition(adev, NULL);
+}
+
+static bool amdgpu_gmc_validate_partition_info(struct amdgpu_device *adev)
+{
+ enum amdgpu_memory_partition mode;
+ u32 supp_modes;
+ bool valid;
+
+ mode = amdgpu_gmc_get_memory_partition(adev, &supp_modes);
+
+ /* Mode detected by hardware not present in supported modes */
+ if ((mode != UNKNOWN_MEMORY_PARTITION_MODE) &&
+ !(BIT(mode - 1) & supp_modes))
+ return false;
+
+ switch (mode) {
+ case UNKNOWN_MEMORY_PARTITION_MODE:
+ case AMDGPU_NPS1_PARTITION_MODE:
+ valid = (adev->gmc.num_mem_partitions == 1);
+ break;
+ case AMDGPU_NPS2_PARTITION_MODE:
+ valid = (adev->gmc.num_mem_partitions == 2);
+ break;
+ case AMDGPU_NPS4_PARTITION_MODE:
+ valid = (adev->gmc.num_mem_partitions == 3 ||
+ adev->gmc.num_mem_partitions == 4);
+ break;
+ case AMDGPU_NPS8_PARTITION_MODE:
+ valid = (adev->gmc.num_mem_partitions == 8);
+ break;
+ default:
+ valid = false;
+ }
+
+ return valid;
+}
+
+static bool amdgpu_gmc_is_node_present(int *node_ids, int num_ids, int nid)
+{
+ int i;
+
+ /* Check if node with id 'nid' is present in 'node_ids' array */
+ for (i = 0; i < num_ids; ++i)
+ if (node_ids[i] == nid)
+ return true;
+
+ return false;
+}
+
+static void
+amdgpu_gmc_init_acpi_mem_ranges(struct amdgpu_device *adev,
+ struct amdgpu_mem_partition_info *mem_ranges)
+{
+ struct amdgpu_numa_info numa_info;
+ int node_ids[AMDGPU_MAX_MEM_RANGES];
+ int num_ranges = 0, ret;
+ int num_xcc, xcc_id;
+ uint32_t xcc_mask;
+
+ num_xcc = NUM_XCC(adev->gfx.xcc_mask);
+ xcc_mask = (1U << num_xcc) - 1;
+
+ for_each_inst(xcc_id, xcc_mask) {
+ ret = amdgpu_acpi_get_mem_info(adev, xcc_id, &numa_info);
+ if (ret)
+ continue;
+
+ if (numa_info.nid == NUMA_NO_NODE) {
+ mem_ranges[0].size = numa_info.size;
+ mem_ranges[0].numa.node = numa_info.nid;
+ num_ranges = 1;
+ break;
+ }
+
+ if (amdgpu_gmc_is_node_present(node_ids, num_ranges,
+ numa_info.nid))
+ continue;
+
+ node_ids[num_ranges] = numa_info.nid;
+ mem_ranges[num_ranges].numa.node = numa_info.nid;
+ mem_ranges[num_ranges].size = numa_info.size;
+ ++num_ranges;
+ }
+
+ adev->gmc.num_mem_partitions = num_ranges;
+}
+
+void amdgpu_gmc_init_sw_mem_ranges(struct amdgpu_device *adev,
+ struct amdgpu_mem_partition_info *mem_ranges)
+{
+ enum amdgpu_memory_partition mode;
+ u32 start_addr = 0, size;
+ int i, r, l;
+
+ mode = amdgpu_gmc_query_memory_partition(adev);
+
+ switch (mode) {
+ case UNKNOWN_MEMORY_PARTITION_MODE:
+ adev->gmc.num_mem_partitions = 0;
+ break;
+ case AMDGPU_NPS1_PARTITION_MODE:
+ adev->gmc.num_mem_partitions = 1;
+ break;
+ case AMDGPU_NPS2_PARTITION_MODE:
+ adev->gmc.num_mem_partitions = 2;
+ break;
+ case AMDGPU_NPS4_PARTITION_MODE:
+ if (adev->flags & AMD_IS_APU)
+ adev->gmc.num_mem_partitions = 3;
+ else
+ adev->gmc.num_mem_partitions = 4;
+ break;
+ case AMDGPU_NPS8_PARTITION_MODE:
+ adev->gmc.num_mem_partitions = 8;
+ break;
+ default:
+ adev->gmc.num_mem_partitions = 1;
+ break;
+ }
+
+ /* Use NPS range info, if populated */
+ r = amdgpu_gmc_get_nps_memranges(adev, mem_ranges,
+ &adev->gmc.num_mem_partitions);
+ if (!r) {
+ l = 0;
+ for (i = 1; i < adev->gmc.num_mem_partitions; ++i) {
+ if (mem_ranges[i].range.lpfn >
+ mem_ranges[i - 1].range.lpfn)
+ l = i;
+ }
+
+ } else {
+ if (!adev->gmc.num_mem_partitions) {
+ dev_warn(adev->dev,
+ "Not able to detect NPS mode, fall back to NPS1\n");
+ adev->gmc.num_mem_partitions = 1;
+ }
+ /* Fallback to sw based calculation */
+ size = (adev->gmc.real_vram_size + SZ_16M) >> AMDGPU_GPU_PAGE_SHIFT;
+ size /= adev->gmc.num_mem_partitions;
+
+ for (i = 0; i < adev->gmc.num_mem_partitions; ++i) {
+ mem_ranges[i].range.fpfn = start_addr;
+ mem_ranges[i].size =
+ ((u64)size << AMDGPU_GPU_PAGE_SHIFT);
+ mem_ranges[i].range.lpfn = start_addr + size - 1;
+ start_addr += size;
+ }
+
+ l = adev->gmc.num_mem_partitions - 1;
+ }
+
+ /* Adjust the last one */
+ mem_ranges[l].range.lpfn =
+ (adev->gmc.real_vram_size >> AMDGPU_GPU_PAGE_SHIFT) - 1;
+ mem_ranges[l].size =
+ adev->gmc.real_vram_size -
+ ((u64)mem_ranges[l].range.fpfn << AMDGPU_GPU_PAGE_SHIFT);
+}
+
+int amdgpu_gmc_init_mem_ranges(struct amdgpu_device *adev)
+{
+ bool valid;
+
+ adev->gmc.mem_partitions = kcalloc(AMDGPU_MAX_MEM_RANGES,
+ sizeof(struct amdgpu_mem_partition_info),
+ GFP_KERNEL);
+ if (!adev->gmc.mem_partitions)
+ return -ENOMEM;
+
+ if (adev->gmc.is_app_apu)
+ amdgpu_gmc_init_acpi_mem_ranges(adev, adev->gmc.mem_partitions);
+ else
+ amdgpu_gmc_init_sw_mem_ranges(adev, adev->gmc.mem_partitions);
+
+ if (amdgpu_sriov_vf(adev))
+ valid = true;
+ else
+ valid = amdgpu_gmc_validate_partition_info(adev);
+ if (!valid) {
+ /* TODO: handle invalid case */
+ dev_warn(adev->dev,
+ "Mem ranges not matching with hardware config\n");
+ }
+
+ return 0;
+}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h
index aa0c83776ce0..727342689d4b 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h
@@ -29,6 +29,8 @@
#include <linux/types.h>
#include "amdgpu_irq.h"
+#include "amdgpu_xgmi.h"
+#include "amdgpu_ras.h"
/* VA hole for 48bit addresses on Vega10 */
#define AMDGPU_GMC_HOLE_START 0x0000800000000000ULL
@@ -60,15 +62,43 @@
*/
#define AMDGPU_GMC_FAULT_TIMEOUT 5000ULL
+/* XNACK flags */
+#define AMDGPU_GMC_XNACK_FLAG_CHAIN BIT(0)
+
struct firmware;
+enum amdgpu_memory_partition {
+ UNKNOWN_MEMORY_PARTITION_MODE = 0,
+ AMDGPU_NPS1_PARTITION_MODE = 1,
+ AMDGPU_NPS2_PARTITION_MODE = 2,
+ AMDGPU_NPS3_PARTITION_MODE = 3,
+ AMDGPU_NPS4_PARTITION_MODE = 4,
+ AMDGPU_NPS6_PARTITION_MODE = 6,
+ AMDGPU_NPS8_PARTITION_MODE = 8,
+};
+
+#define AMDGPU_ALL_NPS_MASK \
+ (BIT(AMDGPU_NPS1_PARTITION_MODE) | BIT(AMDGPU_NPS2_PARTITION_MODE) | \
+ BIT(AMDGPU_NPS3_PARTITION_MODE) | BIT(AMDGPU_NPS4_PARTITION_MODE) | \
+ BIT(AMDGPU_NPS6_PARTITION_MODE) | BIT(AMDGPU_NPS8_PARTITION_MODE))
+
+#define AMDGPU_GMC_INIT_RESET_NPS BIT(0)
+
+#define AMDGPU_MAX_MEM_RANGES 8
+
+#define AMDGPU_GMC9_FAULT_SOURCE_DATA_RETRY 0x80
+#define AMDGPU_GMC9_FAULT_SOURCE_DATA_READ 0x40
+#define AMDGPU_GMC9_FAULT_SOURCE_DATA_WRITE 0x20
+#define AMDGPU_GMC9_FAULT_SOURCE_DATA_EXE 0x10
+
/*
* GMC page fault information
*/
struct amdgpu_gmc_fault {
- uint64_t timestamp;
+ uint64_t timestamp:48;
uint64_t next:AMDGPU_GMC_FAULT_RING_ORDER;
- uint64_t key:52;
+ atomic64_t key;
+ uint64_t timestamp_expiry:48;
};
/*
@@ -99,7 +129,13 @@ struct amdgpu_vmhub {
uint32_t eng_distance;
uint32_t eng_addr_distance; /* include LO32/HI32 */
+ uint32_t vm_cntx_cntl;
uint32_t vm_cntx_cntl_vm_fault;
+ uint32_t vm_l2_bank_select_reserved_cid2;
+
+ uint32_t vm_contexts_disable;
+
+ bool sdma_invalidation_workaround;
const struct amdgpu_vmhub_funcs *vmhub_funcs;
};
@@ -112,8 +148,9 @@ struct amdgpu_gmc_funcs {
void (*flush_gpu_tlb)(struct amdgpu_device *adev, uint32_t vmid,
uint32_t vmhub, uint32_t flush_type);
/* flush the vm tlb via pasid */
- int (*flush_gpu_tlb_pasid)(struct amdgpu_device *adev, uint16_t pasid,
- uint32_t flush_type, bool all_hub);
+ void (*flush_gpu_tlb_pasid)(struct amdgpu_device *adev, uint16_t pasid,
+ uint32_t flush_type, bool all_hub,
+ uint32_t inst);
/* flush the vm tlb via ring */
uint64_t (*emit_flush_gpu_tlb)(struct amdgpu_ring *ring, unsigned vmid,
uint64_t pd_addr);
@@ -122,33 +159,58 @@ struct amdgpu_gmc_funcs {
unsigned pasid);
/* enable/disable PRT support */
void (*set_prt)(struct amdgpu_device *adev, bool enable);
- /* map mtype to hardware flags */
- uint64_t (*map_mtype)(struct amdgpu_device *adev, uint32_t flags);
/* get the pde for a given mc addr */
void (*get_vm_pde)(struct amdgpu_device *adev, int level,
u64 *dst, u64 *flags);
- /* get the pte flags to use for a BO VA mapping */
+ /* get the pte flags to use for PTEs */
void (*get_vm_pte)(struct amdgpu_device *adev,
- struct amdgpu_bo_va_mapping *mapping,
- uint64_t *flags);
+ struct amdgpu_vm *vm,
+ struct amdgpu_bo *bo,
+ uint32_t vm_flags,
+ uint64_t *pte_flags);
+ /* override per-page pte flags */
+ void (*override_vm_pte_flags)(struct amdgpu_device *dev,
+ struct amdgpu_vm *vm,
+ uint64_t addr, uint64_t *flags);
/* get the amount of memory used by the vbios for pre-OS console */
unsigned int (*get_vbios_fb_size)(struct amdgpu_device *adev);
+ /* get the DCC buffer alignment */
+ unsigned int (*get_dcc_alignment)(struct amdgpu_device *adev);
+
+ enum amdgpu_memory_partition (*query_mem_partition_mode)(
+ struct amdgpu_device *adev);
+ /* Request NPS mode */
+ int (*request_mem_partition_mode)(struct amdgpu_device *adev,
+ int nps_mode);
+ bool (*need_reset_on_init)(struct amdgpu_device *adev);
+};
+
+struct amdgpu_mem_partition_info {
+ union {
+ struct {
+ uint32_t fpfn;
+ uint32_t lpfn;
+ } range;
+ struct {
+ int node;
+ } numa;
+ };
+ uint64_t size;
+};
+
+#define INVALID_PFN -1
+
+struct amdgpu_gmc_memrange {
+ uint64_t base_address;
+ uint64_t limit_address;
+ uint32_t flags;
+ int nid_mask;
};
-struct amdgpu_xgmi {
- /* from psp */
- u64 node_id;
- u64 hive_id;
- /* fixed per family */
- u64 node_segment_size;
- /* physical node (0-3) */
- unsigned physical_node_id;
- /* number of nodes (0-4) */
- unsigned num_physical_nodes;
- /* gpu list in the same hive */
- struct list_head head;
- bool supported;
- struct ras_common_if *ras_if;
+enum amdgpu_gart_placement {
+ AMDGPU_GART_PLACEMENT_BEST_FIT = 0,
+ AMDGPU_GART_PLACEMENT_HIGH,
+ AMDGPU_GART_PLACEMENT_LOW,
};
struct amdgpu_gmc {
@@ -189,10 +251,13 @@ struct amdgpu_gmc {
u64 gart_end;
/* Frame buffer aperture of this GPU device. Different from
* fb_start (see below), this only covers the local GPU device.
- * Driver get fb_start from MC_VM_FB_LOCATION_BASE (set by vbios)
- * and calculate vram_start of this local device by adding an
- * offset inside the XGMI hive.
- * Under VMID0, logical address == MC address
+ * If driver uses FB aperture to access FB, driver get fb_start from
+ * MC_VM_FB_LOCATION_BASE (set by vbios) and calculate vram_start
+ * of this local device by adding an offset inside the XGMI hive.
+ * If driver uses GART table for VMID0 FB access, driver finds a hole in
+ * VMID0's virtual address space to place the SYSVM aperture inside
+ * which the first part is vram and the second part is gart (covering
+ * system ram).
*/
u64 vram_start;
u64 vram_end;
@@ -234,24 +299,80 @@ struct amdgpu_gmc {
uint64_t last_fault:AMDGPU_GMC_FAULT_RING_ORDER;
bool tmz_enabled;
+ bool is_app_apu;
+ struct amdgpu_mem_partition_info *mem_partitions;
+ uint8_t num_mem_partitions;
const struct amdgpu_gmc_funcs *gmc_funcs;
+ enum amdgpu_memory_partition requested_nps_mode;
+ uint32_t supported_nps_modes;
+ uint32_t reset_flags;
struct amdgpu_xgmi xgmi;
struct amdgpu_irq_src ecc_irq;
int noretry;
+ uint32_t xnack_flags;
+
+ uint32_t vmid0_page_table_block_size;
+ uint32_t vmid0_page_table_depth;
+ struct amdgpu_bo *pdb0_bo;
+ /* CPU kmapped address of pdb0*/
+ void *ptr_pdb0;
+
+ /* MALL size */
+ u64 mall_size;
+ uint32_t m_half_use;
+
+ /* number of UMC instances */
+ int num_umc;
+ /* mode2 save restore */
+ u64 VM_L2_CNTL;
+ u64 VM_L2_CNTL2;
+ u64 VM_DUMMY_PAGE_FAULT_CNTL;
+ u64 VM_DUMMY_PAGE_FAULT_ADDR_LO32;
+ u64 VM_DUMMY_PAGE_FAULT_ADDR_HI32;
+ u64 VM_L2_PROTECTION_FAULT_CNTL;
+ u64 VM_L2_PROTECTION_FAULT_CNTL2;
+ u64 VM_L2_PROTECTION_FAULT_MM_CNTL3;
+ u64 VM_L2_PROTECTION_FAULT_MM_CNTL4;
+ u64 VM_L2_PROTECTION_FAULT_ADDR_LO32;
+ u64 VM_L2_PROTECTION_FAULT_ADDR_HI32;
+ u64 VM_DEBUG;
+ u64 VM_L2_MM_GROUP_RT_CLASSES;
+ u64 VM_L2_BANK_SELECT_RESERVED_CID;
+ u64 VM_L2_BANK_SELECT_RESERVED_CID2;
+ u64 VM_L2_CACHE_PARITY_CNTL;
+ u64 VM_L2_IH_LOG_CNTL;
+ u64 VM_CONTEXT_CNTL[16];
+ u64 VM_CONTEXT_PAGE_TABLE_BASE_ADDR_LO32[16];
+ u64 VM_CONTEXT_PAGE_TABLE_BASE_ADDR_HI32[16];
+ u64 VM_CONTEXT_PAGE_TABLE_START_ADDR_LO32[16];
+ u64 VM_CONTEXT_PAGE_TABLE_START_ADDR_HI32[16];
+ u64 VM_CONTEXT_PAGE_TABLE_END_ADDR_LO32[16];
+ u64 VM_CONTEXT_PAGE_TABLE_END_ADDR_HI32[16];
+ u64 MC_VM_MX_L1_TLB_CNTL;
+
+ u64 noretry_flags;
+
+ bool flush_tlb_needs_extra_type_0;
+ bool flush_tlb_needs_extra_type_2;
+ bool flush_pasid_uses_kiq;
};
-#define amdgpu_gmc_flush_gpu_tlb(adev, vmid, vmhub, type) ((adev)->gmc.gmc_funcs->flush_gpu_tlb((adev), (vmid), (vmhub), (type)))
-#define amdgpu_gmc_flush_gpu_tlb_pasid(adev, pasid, type, allhub) \
- ((adev)->gmc.gmc_funcs->flush_gpu_tlb_pasid \
- ((adev), (pasid), (type), (allhub)))
#define amdgpu_gmc_emit_flush_gpu_tlb(r, vmid, addr) (r)->adev->gmc.gmc_funcs->emit_flush_gpu_tlb((r), (vmid), (addr))
#define amdgpu_gmc_emit_pasid_mapping(r, vmid, pasid) (r)->adev->gmc.gmc_funcs->emit_pasid_mapping((r), (vmid), (pasid))
-#define amdgpu_gmc_map_mtype(adev, flags) (adev)->gmc.gmc_funcs->map_mtype((adev),(flags))
#define amdgpu_gmc_get_vm_pde(adev, level, dst, flags) (adev)->gmc.gmc_funcs->get_vm_pde((adev), (level), (dst), (flags))
-#define amdgpu_gmc_get_vm_pte(adev, mapping, flags) (adev)->gmc.gmc_funcs->get_vm_pte((adev), (mapping), (flags))
+#define amdgpu_gmc_get_vm_pte(adev, vm, bo, vm_flags, pte_flags) \
+ ((adev)->gmc.gmc_funcs->get_vm_pte((adev), (vm), (bo), (vm_flags), \
+ (pte_flags)))
+#define amdgpu_gmc_override_vm_pte_flags(adev, vm, addr, pte_flags) \
+ (adev)->gmc.gmc_funcs->override_vm_pte_flags \
+ ((adev), (vm), (addr), (pte_flags))
#define amdgpu_gmc_get_vbios_fb_size(adev) (adev)->gmc.gmc_funcs->get_vbios_fb_size((adev))
+#define amdgpu_gmc_get_dcc_alignment(adev) ({ \
+ typeof(adev) _adev = (adev); \
+ _adev->gmc.gmc_funcs->get_dcc_alignment(_adev); \
+})
/**
* amdgpu_gmc_vram_full_visible - Check if full VRAM is visible through the BAR
@@ -281,6 +402,8 @@ static inline uint64_t amdgpu_gmc_sign_extend(uint64_t addr)
return addr;
}
+bool amdgpu_gmc_is_pdb0_enabled(struct amdgpu_device *adev);
+int amdgpu_gmc_pdb0_alloc(struct amdgpu_device *adev);
void amdgpu_gmc_get_pde_for_bo(struct amdgpu_bo *bo, int level,
uint64_t *addr, uint64_t *flags);
int amdgpu_gmc_set_pte_pde(struct amdgpu_device *adev, void *cpu_pt_addr,
@@ -288,17 +411,34 @@ int amdgpu_gmc_set_pte_pde(struct amdgpu_device *adev, void *cpu_pt_addr,
uint64_t flags);
uint64_t amdgpu_gmc_pd_addr(struct amdgpu_bo *bo);
uint64_t amdgpu_gmc_agp_addr(struct ttm_buffer_object *bo);
+void amdgpu_gmc_sysvm_location(struct amdgpu_device *adev, struct amdgpu_gmc *mc);
void amdgpu_gmc_vram_location(struct amdgpu_device *adev, struct amdgpu_gmc *mc,
u64 base);
void amdgpu_gmc_gart_location(struct amdgpu_device *adev,
- struct amdgpu_gmc *mc);
+ struct amdgpu_gmc *mc,
+ enum amdgpu_gart_placement gart_placement);
void amdgpu_gmc_agp_location(struct amdgpu_device *adev,
struct amdgpu_gmc *mc);
-bool amdgpu_gmc_filter_faults(struct amdgpu_device *adev, uint64_t addr,
+void amdgpu_gmc_set_agp_default(struct amdgpu_device *adev,
+ struct amdgpu_gmc *mc);
+bool amdgpu_gmc_filter_faults(struct amdgpu_device *adev,
+ struct amdgpu_ih_ring *ih, uint64_t addr,
uint16_t pasid, uint64_t timestamp);
+void amdgpu_gmc_filter_faults_remove(struct amdgpu_device *adev, uint64_t addr,
+ uint16_t pasid);
+int amdgpu_gmc_ras_sw_init(struct amdgpu_device *adev);
int amdgpu_gmc_ras_late_init(struct amdgpu_device *adev);
void amdgpu_gmc_ras_fini(struct amdgpu_device *adev);
int amdgpu_gmc_allocate_vm_inv_eng(struct amdgpu_device *adev);
+void amdgpu_gmc_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid,
+ uint32_t vmhub, uint32_t flush_type);
+int amdgpu_gmc_flush_gpu_tlb_pasid(struct amdgpu_device *adev, uint16_t pasid,
+ uint32_t flush_type, bool all_hub,
+ uint32_t inst);
+void amdgpu_gmc_fw_reg_write_reg_wait(struct amdgpu_device *adev,
+ uint32_t reg0, uint32_t reg1,
+ uint32_t ref, uint32_t mask,
+ uint32_t xcc_inst);
extern void amdgpu_gmc_tmz_set(struct amdgpu_device *adev);
extern void amdgpu_gmc_noretry_set(struct amdgpu_device *adev);
@@ -309,4 +449,28 @@ amdgpu_gmc_set_vm_fault_masks(struct amdgpu_device *adev, int hub_type,
void amdgpu_gmc_get_vbios_allocations(struct amdgpu_device *adev);
+void amdgpu_gmc_init_pdb0(struct amdgpu_device *adev);
+uint64_t amdgpu_gmc_vram_mc2pa(struct amdgpu_device *adev, uint64_t mc_addr);
+uint64_t amdgpu_gmc_vram_pa(struct amdgpu_device *adev, struct amdgpu_bo *bo);
+int amdgpu_gmc_vram_checking(struct amdgpu_device *adev);
+int amdgpu_gmc_sysfs_init(struct amdgpu_device *adev);
+void amdgpu_gmc_sysfs_fini(struct amdgpu_device *adev);
+
+int amdgpu_gmc_get_nps_memranges(struct amdgpu_device *adev,
+ struct amdgpu_mem_partition_info *mem_ranges,
+ uint8_t *exp_ranges);
+
+int amdgpu_gmc_request_memory_partition(struct amdgpu_device *adev,
+ int nps_mode);
+void amdgpu_gmc_prepare_nps_mode_change(struct amdgpu_device *adev);
+bool amdgpu_gmc_need_reset_on_init(struct amdgpu_device *adev);
+enum amdgpu_memory_partition
+amdgpu_gmc_get_vf_memory_partition(struct amdgpu_device *adev);
+enum amdgpu_memory_partition
+amdgpu_gmc_get_memory_partition(struct amdgpu_device *adev, u32 *supp_modes);
+enum amdgpu_memory_partition
+amdgpu_gmc_query_memory_partition(struct amdgpu_device *adev);
+int amdgpu_gmc_init_mem_ranges(struct amdgpu_device *adev);
+void amdgpu_gmc_init_sw_mem_ranges(struct amdgpu_device *adev,
+ struct amdgpu_mem_partition_info *mem_ranges);
#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gtt_mgr.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gtt_mgr.c
index 8980329cded0..895c1e4c6747 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gtt_mgr.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gtt_mgr.c
@@ -22,18 +22,16 @@
* Authors: Christian König
*/
+#include <drm/ttm/ttm_range_manager.h>
+
#include "amdgpu.h"
-static inline struct amdgpu_gtt_mgr *to_gtt_mgr(struct ttm_resource_manager *man)
+static inline struct amdgpu_gtt_mgr *
+to_gtt_mgr(struct ttm_resource_manager *man)
{
return container_of(man, struct amdgpu_gtt_mgr, manager);
}
-struct amdgpu_gtt_node {
- struct drm_mm_node node;
- struct ttm_buffer_object *tbo;
-};
-
/**
* DOC: mem_info_gtt_total
*
@@ -43,14 +41,15 @@ struct amdgpu_gtt_node {
* the GTT block, in bytes
*/
static ssize_t amdgpu_mem_info_gtt_total_show(struct device *dev,
- struct device_attribute *attr, char *buf)
+ struct device_attribute *attr,
+ char *buf)
{
struct drm_device *ddev = dev_get_drvdata(dev);
struct amdgpu_device *adev = drm_to_adev(ddev);
- struct ttm_resource_manager *man = ttm_manager_type(&adev->mman.bdev, TTM_PL_TT);
+ struct ttm_resource_manager *man;
- return snprintf(buf, PAGE_SIZE, "%llu\n",
- man->size * PAGE_SIZE);
+ man = ttm_manager_type(&adev->mman.bdev, TTM_PL_TT);
+ return sysfs_emit(buf, "%llu\n", man->size);
}
/**
@@ -62,14 +61,14 @@ static ssize_t amdgpu_mem_info_gtt_total_show(struct device *dev,
* size of the GTT block, in bytes
*/
static ssize_t amdgpu_mem_info_gtt_used_show(struct device *dev,
- struct device_attribute *attr, char *buf)
+ struct device_attribute *attr,
+ char *buf)
{
struct drm_device *ddev = dev_get_drvdata(dev);
struct amdgpu_device *adev = drm_to_adev(ddev);
- struct ttm_resource_manager *man = ttm_manager_type(&adev->mman.bdev, TTM_PL_TT);
+ struct ttm_resource_manager *man = &adev->mman.gtt_mgr.manager;
- return snprintf(buf, PAGE_SIZE, "%llu\n",
- amdgpu_gtt_mgr_usage(man));
+ return sysfs_emit(buf, "%llu\n", ttm_resource_manager_usage(man));
}
static DEVICE_ATTR(mem_info_gtt_total, S_IRUGO,
@@ -77,90 +76,28 @@ static DEVICE_ATTR(mem_info_gtt_total, S_IRUGO,
static DEVICE_ATTR(mem_info_gtt_used, S_IRUGO,
amdgpu_mem_info_gtt_used_show, NULL);
-static const struct ttm_resource_manager_func amdgpu_gtt_mgr_func;
-/**
- * amdgpu_gtt_mgr_init - init GTT manager and DRM MM
- *
- * @adev: amdgpu_device pointer
- * @gtt_size: maximum size of GTT
- *
- * Allocate and initialize the GTT manager.
- */
-int amdgpu_gtt_mgr_init(struct amdgpu_device *adev, uint64_t gtt_size)
-{
- struct amdgpu_gtt_mgr *mgr = &adev->mman.gtt_mgr;
- struct ttm_resource_manager *man = &mgr->manager;
- uint64_t start, size;
- int ret;
-
- man->use_tt = true;
- man->func = &amdgpu_gtt_mgr_func;
-
- ttm_resource_manager_init(man, gtt_size >> PAGE_SHIFT);
-
- start = AMDGPU_GTT_MAX_TRANSFER_SIZE * AMDGPU_GTT_NUM_TRANSFER_WINDOWS;
- size = (adev->gmc.gart_size >> PAGE_SHIFT) - start;
- drm_mm_init(&mgr->mm, start, size);
- spin_lock_init(&mgr->lock);
- atomic64_set(&mgr->available, gtt_size >> PAGE_SHIFT);
-
- ret = device_create_file(adev->dev, &dev_attr_mem_info_gtt_total);
- if (ret) {
- DRM_ERROR("Failed to create device file mem_info_gtt_total\n");
- return ret;
- }
- ret = device_create_file(adev->dev, &dev_attr_mem_info_gtt_used);
- if (ret) {
- DRM_ERROR("Failed to create device file mem_info_gtt_used\n");
- return ret;
- }
-
- ttm_set_driver_manager(&adev->mman.bdev, TTM_PL_TT, &mgr->manager);
- ttm_resource_manager_set_used(man, true);
- return 0;
-}
-
-/**
- * amdgpu_gtt_mgr_fini - free and destroy GTT manager
- *
- * @adev: amdgpu_device pointer
- *
- * Destroy and free the GTT manager, returns -EBUSY if ranges are still
- * allocated inside it.
- */
-void amdgpu_gtt_mgr_fini(struct amdgpu_device *adev)
-{
- struct amdgpu_gtt_mgr *mgr = &adev->mman.gtt_mgr;
- struct ttm_resource_manager *man = &mgr->manager;
- int ret;
-
- ttm_resource_manager_set_used(man, false);
-
- ret = ttm_resource_manager_evict_all(&adev->mman.bdev, man);
- if (ret)
- return;
-
- spin_lock(&mgr->lock);
- drm_mm_takedown(&mgr->mm);
- spin_unlock(&mgr->lock);
-
- device_remove_file(adev->dev, &dev_attr_mem_info_gtt_total);
- device_remove_file(adev->dev, &dev_attr_mem_info_gtt_used);
+static struct attribute *amdgpu_gtt_mgr_attributes[] = {
+ &dev_attr_mem_info_gtt_total.attr,
+ &dev_attr_mem_info_gtt_used.attr,
+ NULL
+};
- ttm_resource_manager_cleanup(man);
- ttm_set_driver_manager(&adev->mman.bdev, TTM_PL_TT, NULL);
-}
+const struct attribute_group amdgpu_gtt_mgr_attr_group = {
+ .attrs = amdgpu_gtt_mgr_attributes
+};
/**
* amdgpu_gtt_mgr_has_gart_addr - Check if mem has address space
*
- * @mem: the mem object to check
+ * @res: the mem object to check
*
* Check if a mem object has already address space allocated.
*/
-bool amdgpu_gtt_mgr_has_gart_addr(struct ttm_resource *mem)
+bool amdgpu_gtt_mgr_has_gart_addr(struct ttm_resource *res)
{
- return mem->mm_node != NULL;
+ struct ttm_range_mgr_node *node = to_ttm_range_mgr_node(res);
+
+ return drm_mm_node_allocated(&node->mm_nodes[0]);
}
/**
@@ -169,62 +106,54 @@ bool amdgpu_gtt_mgr_has_gart_addr(struct ttm_resource *mem)
* @man: TTM memory type manager
* @tbo: TTM BO we need this range for
* @place: placement flags and restrictions
- * @mem: the resulting mem object
+ * @res: the resulting mem object
*
* Dummy, allocate the node but no space for it yet.
*/
static int amdgpu_gtt_mgr_new(struct ttm_resource_manager *man,
struct ttm_buffer_object *tbo,
const struct ttm_place *place,
- struct ttm_resource *mem)
+ struct ttm_resource **res)
{
struct amdgpu_gtt_mgr *mgr = to_gtt_mgr(man);
- struct amdgpu_gtt_node *node;
+ uint32_t num_pages = PFN_UP(tbo->base.size);
+ struct ttm_range_mgr_node *node;
int r;
- spin_lock(&mgr->lock);
- if ((&tbo->mem == mem || tbo->mem.mem_type != TTM_PL_TT) &&
- atomic64_read(&mgr->available) < mem->num_pages) {
- spin_unlock(&mgr->lock);
- return -ENOSPC;
- }
- atomic64_sub(mem->num_pages, &mgr->available);
- spin_unlock(&mgr->lock);
+ node = kzalloc(struct_size(node, mm_nodes, 1), GFP_KERNEL);
+ if (!node)
+ return -ENOMEM;
- if (!place->lpfn) {
- mem->mm_node = NULL;
- mem->start = AMDGPU_BO_INVALID_OFFSET;
- return 0;
+ ttm_resource_init(tbo, place, &node->base);
+ if (!(place->flags & TTM_PL_FLAG_TEMPORARY) &&
+ ttm_resource_manager_usage(man) > man->size) {
+ r = -ENOSPC;
+ goto err_free;
}
- node = kzalloc(sizeof(*node), GFP_KERNEL);
- if (!node) {
- r = -ENOMEM;
- goto err_out;
+ if (place->lpfn) {
+ spin_lock(&mgr->lock);
+ r = drm_mm_insert_node_in_range(&mgr->mm, &node->mm_nodes[0],
+ num_pages, tbo->page_alignment,
+ 0, place->fpfn, place->lpfn,
+ DRM_MM_INSERT_BEST);
+ spin_unlock(&mgr->lock);
+ if (unlikely(r))
+ goto err_free;
+
+ node->base.start = node->mm_nodes[0].start;
+ } else {
+ node->mm_nodes[0].start = 0;
+ node->mm_nodes[0].size = PFN_UP(node->base.size);
+ node->base.start = AMDGPU_BO_INVALID_OFFSET;
}
- node->tbo = tbo;
-
- spin_lock(&mgr->lock);
- r = drm_mm_insert_node_in_range(&mgr->mm, &node->node, mem->num_pages,
- mem->page_alignment, 0, place->fpfn,
- place->lpfn, DRM_MM_INSERT_BEST);
- spin_unlock(&mgr->lock);
-
- if (unlikely(r))
- goto err_free;
-
- mem->mm_node = node;
- mem->start = node->node.start;
-
+ *res = &node->base;
return 0;
err_free:
+ ttm_resource_fini(man, &node->base);
kfree(node);
-
-err_out:
- atomic64_add(mem->num_pages, &mgr->available);
-
return r;
}
@@ -232,58 +161,81 @@ err_out:
* amdgpu_gtt_mgr_del - free ranges
*
* @man: TTM memory type manager
- * @mem: TTM memory object
+ * @res: TTM memory object
*
* Free the allocated GTT again.
*/
static void amdgpu_gtt_mgr_del(struct ttm_resource_manager *man,
- struct ttm_resource *mem)
+ struct ttm_resource *res)
{
+ struct ttm_range_mgr_node *node = to_ttm_range_mgr_node(res);
struct amdgpu_gtt_mgr *mgr = to_gtt_mgr(man);
- struct amdgpu_gtt_node *node = mem->mm_node;
- if (node) {
- spin_lock(&mgr->lock);
- drm_mm_remove_node(&node->node);
- spin_unlock(&mgr->lock);
- kfree(node);
- }
+ spin_lock(&mgr->lock);
+ if (drm_mm_node_allocated(&node->mm_nodes[0]))
+ drm_mm_remove_node(&node->mm_nodes[0]);
+ spin_unlock(&mgr->lock);
- atomic64_add(mem->num_pages, &mgr->available);
+ ttm_resource_fini(man, res);
+ kfree(node);
}
/**
- * amdgpu_gtt_mgr_usage - return usage of GTT domain
+ * amdgpu_gtt_mgr_recover - re-init gart
*
- * @man: TTM memory type manager
+ * @mgr: amdgpu_gtt_mgr pointer
*
- * Return how many bytes are used in the GTT domain
+ * Re-init the gart for each known BO in the GTT.
*/
-uint64_t amdgpu_gtt_mgr_usage(struct ttm_resource_manager *man)
+void amdgpu_gtt_mgr_recover(struct amdgpu_gtt_mgr *mgr)
{
- struct amdgpu_gtt_mgr *mgr = to_gtt_mgr(man);
- s64 result = man->size - atomic64_read(&mgr->available);
-
- return (result > 0 ? result : 0) * PAGE_SIZE;
-}
-
-int amdgpu_gtt_mgr_recover(struct ttm_resource_manager *man)
-{
- struct amdgpu_gtt_mgr *mgr = to_gtt_mgr(man);
- struct amdgpu_gtt_node *node;
+ struct ttm_range_mgr_node *node;
struct drm_mm_node *mm_node;
- int r = 0;
+ struct amdgpu_device *adev;
+ adev = container_of(mgr, typeof(*adev), mman.gtt_mgr);
spin_lock(&mgr->lock);
drm_mm_for_each_node(mm_node, &mgr->mm) {
- node = container_of(mm_node, struct amdgpu_gtt_node, node);
- r = amdgpu_ttm_recover_gart(node->tbo);
- if (r)
- break;
+ node = container_of(mm_node, typeof(*node), mm_nodes[0]);
+ amdgpu_ttm_recover_gart(node->base.bo);
}
spin_unlock(&mgr->lock);
+}
- return r;
+/**
+ * amdgpu_gtt_mgr_intersects - test for intersection
+ *
+ * @man: Our manager object
+ * @res: The resource to test
+ * @place: The place for the new allocation
+ * @size: The size of the new allocation
+ *
+ * Simplified intersection test, only interesting if we need GART or not.
+ */
+static bool amdgpu_gtt_mgr_intersects(struct ttm_resource_manager *man,
+ struct ttm_resource *res,
+ const struct ttm_place *place,
+ size_t size)
+{
+ return !place->lpfn || amdgpu_gtt_mgr_has_gart_addr(res);
+}
+
+/**
+ * amdgpu_gtt_mgr_compatible - test for compatibility
+ *
+ * @man: Our manager object
+ * @res: The resource to test
+ * @place: The place for the new allocation
+ * @size: The size of the new allocation
+ *
+ * Simplified compatibility test.
+ */
+static bool amdgpu_gtt_mgr_compatible(struct ttm_resource_manager *man,
+ struct ttm_resource *res,
+ const struct ttm_place *place,
+ size_t size)
+{
+ return !place->lpfn || amdgpu_gtt_mgr_has_gart_addr(res);
}
/**
@@ -302,14 +254,70 @@ static void amdgpu_gtt_mgr_debug(struct ttm_resource_manager *man,
spin_lock(&mgr->lock);
drm_mm_print(&mgr->mm, printer);
spin_unlock(&mgr->lock);
-
- drm_printf(printer, "man size:%llu pages, gtt available:%lld pages, usage:%lluMB\n",
- man->size, (u64)atomic64_read(&mgr->available),
- amdgpu_gtt_mgr_usage(man) >> 20);
}
static const struct ttm_resource_manager_func amdgpu_gtt_mgr_func = {
.alloc = amdgpu_gtt_mgr_new,
.free = amdgpu_gtt_mgr_del,
+ .intersects = amdgpu_gtt_mgr_intersects,
+ .compatible = amdgpu_gtt_mgr_compatible,
.debug = amdgpu_gtt_mgr_debug
};
+
+/**
+ * amdgpu_gtt_mgr_init - init GTT manager and DRM MM
+ *
+ * @adev: amdgpu_device pointer
+ * @gtt_size: maximum size of GTT
+ *
+ * Allocate and initialize the GTT manager.
+ */
+int amdgpu_gtt_mgr_init(struct amdgpu_device *adev, uint64_t gtt_size)
+{
+ struct amdgpu_gtt_mgr *mgr = &adev->mman.gtt_mgr;
+ struct ttm_resource_manager *man = &mgr->manager;
+ uint64_t start, size;
+
+ man->use_tt = true;
+ man->func = &amdgpu_gtt_mgr_func;
+
+ ttm_resource_manager_init(man, &adev->mman.bdev, gtt_size);
+
+ start = AMDGPU_GTT_MAX_TRANSFER_SIZE * AMDGPU_GTT_NUM_TRANSFER_WINDOWS;
+ start += amdgpu_vce_required_gart_pages(adev);
+ size = (adev->gmc.gart_size >> PAGE_SHIFT) - start;
+ drm_mm_init(&mgr->mm, start, size);
+ spin_lock_init(&mgr->lock);
+
+ ttm_set_driver_manager(&adev->mman.bdev, TTM_PL_TT, &mgr->manager);
+ ttm_resource_manager_set_used(man, true);
+ return 0;
+}
+
+/**
+ * amdgpu_gtt_mgr_fini - free and destroy GTT manager
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * Destroy and free the GTT manager, returns -EBUSY if ranges are still
+ * allocated inside it.
+ */
+void amdgpu_gtt_mgr_fini(struct amdgpu_device *adev)
+{
+ struct amdgpu_gtt_mgr *mgr = &adev->mman.gtt_mgr;
+ struct ttm_resource_manager *man = &mgr->manager;
+ int ret;
+
+ ttm_resource_manager_set_used(man, false);
+
+ ret = ttm_resource_manager_evict_all(&adev->mman.bdev, man);
+ if (ret)
+ return;
+
+ spin_lock(&mgr->lock);
+ drm_mm_takedown(&mgr->mm);
+ spin_unlock(&mgr->lock);
+
+ ttm_resource_manager_cleanup(man);
+ ttm_set_driver_manager(&adev->mman.bdev, TTM_PL_TT, NULL);
+}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_hdp.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_hdp.c
new file mode 100644
index 000000000000..5a60d69a3e1f
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_hdp.c
@@ -0,0 +1,84 @@
+/*
+ * Copyright 2023 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+#include "amdgpu.h"
+#include "amdgpu_ras.h"
+#include <uapi/linux/kfd_ioctl.h>
+
+int amdgpu_hdp_ras_sw_init(struct amdgpu_device *adev)
+{
+ int err;
+ struct amdgpu_hdp_ras *ras;
+
+ if (!adev->hdp.ras)
+ return 0;
+
+ ras = adev->hdp.ras;
+ err = amdgpu_ras_register_ras_block(adev, &ras->ras_block);
+ if (err) {
+ dev_err(adev->dev, "Failed to register hdp ras block!\n");
+ return err;
+ }
+
+ strcpy(ras->ras_block.ras_comm.name, "hdp");
+ ras->ras_block.ras_comm.block = AMDGPU_RAS_BLOCK__HDP;
+ ras->ras_block.ras_comm.type = AMDGPU_RAS_ERROR__MULTI_UNCORRECTABLE;
+ adev->hdp.ras_if = &ras->ras_block.ras_comm;
+
+ /* hdp ras follows amdgpu_ras_block_late_init_default for late init */
+ return 0;
+}
+
+void amdgpu_hdp_generic_flush(struct amdgpu_device *adev,
+ struct amdgpu_ring *ring)
+{
+ if (!ring || !ring->funcs->emit_wreg) {
+ WREG32((adev->rmmio_remap.reg_offset +
+ KFD_MMIO_REMAP_HDP_MEM_FLUSH_CNTL) >>
+ 2,
+ 0);
+ if (adev->nbio.funcs->get_memsize)
+ adev->nbio.funcs->get_memsize(adev);
+ } else {
+ amdgpu_ring_emit_wreg(ring,
+ (adev->rmmio_remap.reg_offset +
+ KFD_MMIO_REMAP_HDP_MEM_FLUSH_CNTL) >>
+ 2,
+ 0);
+ }
+}
+
+void amdgpu_hdp_invalidate(struct amdgpu_device *adev, struct amdgpu_ring *ring)
+{
+ if (adev->asic_funcs && adev->asic_funcs->invalidate_hdp)
+ adev->asic_funcs->invalidate_hdp(adev, ring);
+ else if (adev->hdp.funcs && adev->hdp.funcs->invalidate_hdp)
+ adev->hdp.funcs->invalidate_hdp(adev, ring);
+}
+
+void amdgpu_hdp_flush(struct amdgpu_device *adev, struct amdgpu_ring *ring)
+{
+ if (adev->asic_funcs && adev->asic_funcs->flush_hdp)
+ adev->asic_funcs->flush_hdp(adev, ring);
+ else if (adev->hdp.funcs && adev->hdp.funcs->flush_hdp)
+ adev->hdp.funcs->flush_hdp(adev, ring);
+}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_hdp.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_hdp.h
index 43caf9f8cc11..d9f488fa76b9 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_hdp.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_hdp.h
@@ -22,19 +22,32 @@
*/
#ifndef __AMDGPU_HDP_H__
#define __AMDGPU_HDP_H__
+#include "amdgpu_ras.h"
+
+struct amdgpu_hdp_ras {
+ struct amdgpu_ras_block_object ras_block;
+};
struct amdgpu_hdp_funcs {
void (*flush_hdp)(struct amdgpu_device *adev, struct amdgpu_ring *ring);
void (*invalidate_hdp)(struct amdgpu_device *adev,
struct amdgpu_ring *ring);
- void (*reset_ras_error_count)(struct amdgpu_device *adev);
void (*update_clock_gating)(struct amdgpu_device *adev, bool enable);
- void (*get_clock_gating_state)(struct amdgpu_device *adev, u32 *flags);
+ void (*get_clock_gating_state)(struct amdgpu_device *adev, u64 *flags);
void (*init_registers)(struct amdgpu_device *adev);
};
struct amdgpu_hdp {
+ struct ras_common_if *ras_if;
const struct amdgpu_hdp_funcs *funcs;
+ struct amdgpu_hdp_ras *ras;
};
+int amdgpu_hdp_ras_sw_init(struct amdgpu_device *adev);
+void amdgpu_hdp_generic_flush(struct amdgpu_device *adev,
+ struct amdgpu_ring *ring);
+void amdgpu_hdp_invalidate(struct amdgpu_device *adev,
+ struct amdgpu_ring *ring);
+void amdgpu_hdp_flush(struct amdgpu_device *adev,
+ struct amdgpu_ring *ring);
#endif /* __AMDGPU_HDP_H__ */
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_hmm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_hmm.c
new file mode 100644
index 000000000000..90d26d820bac
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_hmm.c
@@ -0,0 +1,293 @@
+/*
+ * Copyright 2014 Advanced Micro Devices, Inc.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ */
+/*
+ * Authors:
+ * Christian König <christian.koenig@amd.com>
+ */
+
+/**
+ * DOC: MMU Notifier
+ *
+ * For coherent userptr handling registers an MMU notifier to inform the driver
+ * about updates on the page tables of a process.
+ *
+ * When somebody tries to invalidate the page tables we block the update until
+ * all operations on the pages in question are completed, then those pages are
+ * marked as accessed and also dirty if it wasn't a read only access.
+ *
+ * New command submissions using the userptrs in question are delayed until all
+ * page table invalidation are completed and we once more see a coherent process
+ * address space.
+ */
+
+#include <linux/firmware.h>
+#include <linux/module.h>
+#include <drm/drm.h>
+
+#include "amdgpu.h"
+#include "amdgpu_amdkfd.h"
+#include "amdgpu_hmm.h"
+
+#define MAX_WALK_BYTE (2UL << 30)
+
+/**
+ * amdgpu_hmm_invalidate_gfx - callback to notify about mm change
+ *
+ * @mni: the range (mm) is about to update
+ * @range: details on the invalidation
+ * @cur_seq: Value to pass to mmu_interval_set_seq()
+ *
+ * Block for operations on BOs to finish and mark pages as accessed and
+ * potentially dirty.
+ */
+static bool amdgpu_hmm_invalidate_gfx(struct mmu_interval_notifier *mni,
+ const struct mmu_notifier_range *range,
+ unsigned long cur_seq)
+{
+ struct amdgpu_bo *bo = container_of(mni, struct amdgpu_bo, notifier);
+ struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev);
+ long r;
+
+ if (!mmu_notifier_range_blockable(range))
+ return false;
+
+ mutex_lock(&adev->notifier_lock);
+
+ mmu_interval_set_seq(mni, cur_seq);
+
+ r = dma_resv_wait_timeout(bo->tbo.base.resv, DMA_RESV_USAGE_BOOKKEEP,
+ false, MAX_SCHEDULE_TIMEOUT);
+ mutex_unlock(&adev->notifier_lock);
+ if (r <= 0)
+ DRM_ERROR("(%ld) failed to wait for user bo\n", r);
+ return true;
+}
+
+static const struct mmu_interval_notifier_ops amdgpu_hmm_gfx_ops = {
+ .invalidate = amdgpu_hmm_invalidate_gfx,
+};
+
+/**
+ * amdgpu_hmm_invalidate_hsa - callback to notify about mm change
+ *
+ * @mni: the range (mm) is about to update
+ * @range: details on the invalidation
+ * @cur_seq: Value to pass to mmu_interval_set_seq()
+ *
+ * We temporarily evict the BO attached to this range. This necessitates
+ * evicting all user-mode queues of the process.
+ */
+static bool amdgpu_hmm_invalidate_hsa(struct mmu_interval_notifier *mni,
+ const struct mmu_notifier_range *range,
+ unsigned long cur_seq)
+{
+ struct amdgpu_bo *bo = container_of(mni, struct amdgpu_bo, notifier);
+
+ if (!mmu_notifier_range_blockable(range))
+ return false;
+
+ amdgpu_amdkfd_evict_userptr(mni, cur_seq, bo->kfd_bo);
+
+ return true;
+}
+
+static const struct mmu_interval_notifier_ops amdgpu_hmm_hsa_ops = {
+ .invalidate = amdgpu_hmm_invalidate_hsa,
+};
+
+/**
+ * amdgpu_hmm_register - register a BO for notifier updates
+ *
+ * @bo: amdgpu buffer object
+ * @addr: userptr addr we should monitor
+ *
+ * Registers a mmu_notifier for the given BO at the specified address.
+ * Returns 0 on success, -ERRNO if anything goes wrong.
+ */
+int amdgpu_hmm_register(struct amdgpu_bo *bo, unsigned long addr)
+{
+ int r;
+
+ if (bo->kfd_bo)
+ r = mmu_interval_notifier_insert(&bo->notifier, current->mm,
+ addr, amdgpu_bo_size(bo),
+ &amdgpu_hmm_hsa_ops);
+ else
+ r = mmu_interval_notifier_insert(&bo->notifier, current->mm, addr,
+ amdgpu_bo_size(bo),
+ &amdgpu_hmm_gfx_ops);
+ if (r)
+ /*
+ * Make sure amdgpu_hmm_unregister() doesn't call
+ * mmu_interval_notifier_remove() when the notifier isn't properly
+ * initialized.
+ */
+ bo->notifier.mm = NULL;
+
+ return r;
+}
+
+/**
+ * amdgpu_hmm_unregister - unregister a BO for notifier updates
+ *
+ * @bo: amdgpu buffer object
+ *
+ * Remove any registration of mmu notifier updates from the buffer object.
+ */
+void amdgpu_hmm_unregister(struct amdgpu_bo *bo)
+{
+ if (!bo->notifier.mm)
+ return;
+ mmu_interval_notifier_remove(&bo->notifier);
+ bo->notifier.mm = NULL;
+}
+
+int amdgpu_hmm_range_get_pages(struct mmu_interval_notifier *notifier,
+ uint64_t start, uint64_t npages, bool readonly,
+ void *owner,
+ struct amdgpu_hmm_range *range)
+{
+ unsigned long end;
+ unsigned long timeout;
+ unsigned long *pfns;
+ int r = 0;
+ struct hmm_range *hmm_range = &range->hmm_range;
+
+ pfns = kvmalloc_array(npages, sizeof(*pfns), GFP_KERNEL);
+ if (unlikely(!pfns)) {
+ r = -ENOMEM;
+ goto out_free_range;
+ }
+
+ hmm_range->notifier = notifier;
+ hmm_range->default_flags = HMM_PFN_REQ_FAULT;
+ if (!readonly)
+ hmm_range->default_flags |= HMM_PFN_REQ_WRITE;
+ hmm_range->hmm_pfns = pfns;
+ hmm_range->start = start;
+ end = start + npages * PAGE_SIZE;
+ hmm_range->dev_private_owner = owner;
+
+ do {
+ hmm_range->end = min(hmm_range->start + MAX_WALK_BYTE, end);
+
+ pr_debug("hmm range: start = 0x%lx, end = 0x%lx",
+ hmm_range->start, hmm_range->end);
+
+ timeout = jiffies + msecs_to_jiffies(HMM_RANGE_DEFAULT_TIMEOUT);
+
+retry:
+ hmm_range->notifier_seq = mmu_interval_read_begin(notifier);
+ r = hmm_range_fault(hmm_range);
+ if (unlikely(r)) {
+ if (r == -EBUSY && !time_after(jiffies, timeout))
+ goto retry;
+ goto out_free_pfns;
+ }
+
+ if (hmm_range->end == end)
+ break;
+ hmm_range->hmm_pfns += MAX_WALK_BYTE >> PAGE_SHIFT;
+ hmm_range->start = hmm_range->end;
+ } while (hmm_range->end < end);
+
+ hmm_range->start = start;
+ hmm_range->hmm_pfns = pfns;
+
+ return 0;
+
+out_free_pfns:
+ kvfree(pfns);
+ hmm_range->hmm_pfns = NULL;
+out_free_range:
+ if (r == -EBUSY)
+ r = -EAGAIN;
+ return r;
+}
+
+/**
+ * amdgpu_hmm_range_valid - check if an HMM range is still valid
+ * @range: pointer to the &struct amdgpu_hmm_range to validate
+ *
+ * Determines whether the given HMM range @range is still valid by
+ * checking for invalidations via the MMU notifier sequence. This is
+ * typically used to verify that the range has not been invalidated
+ * by concurrent address space updates before it is accessed.
+ *
+ * Return:
+ * * true if @range is valid and can be used safely
+ * * false if @range is NULL or has been invalidated
+ */
+bool amdgpu_hmm_range_valid(struct amdgpu_hmm_range *range)
+{
+ if (!range)
+ return false;
+
+ return !mmu_interval_read_retry(range->hmm_range.notifier,
+ range->hmm_range.notifier_seq);
+}
+
+/**
+ * amdgpu_hmm_range_alloc - allocate and initialize an AMDGPU HMM range
+ * @bo: optional buffer object to associate with this HMM range
+ *
+ * Allocates memory for amdgpu_hmm_range and associates it with the @bo passed.
+ * The reference count of the @bo is incremented.
+ *
+ * Return:
+ * Pointer to a newly allocated struct amdgpu_hmm_range on success,
+ * or NULL if memory allocation fails.
+ */
+struct amdgpu_hmm_range *amdgpu_hmm_range_alloc(struct amdgpu_bo *bo)
+{
+ struct amdgpu_hmm_range *range;
+
+ range = kzalloc(sizeof(*range), GFP_KERNEL);
+ if (!range)
+ return NULL;
+
+ range->bo = amdgpu_bo_ref(bo);
+ return range;
+}
+
+/**
+ * amdgpu_hmm_range_free - release an AMDGPU HMM range
+ * @range: pointer to the range object to free
+ *
+ * Releases all resources held by @range, including the associated
+ * hmm_pfns and the dropping reference of associated bo if any.
+ *
+ * Return: void
+ */
+void amdgpu_hmm_range_free(struct amdgpu_hmm_range *range)
+{
+ if (!range)
+ return;
+
+ kvfree(range->hmm_range.hmm_pfns);
+ amdgpu_bo_unref(&range->bo);
+ kfree(range);
+}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_hmm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_hmm.h
new file mode 100644
index 000000000000..140bc9cd57b4
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_hmm.h
@@ -0,0 +1,73 @@
+/*
+ * Copyright 2017 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: Christian König
+ */
+#ifndef __AMDGPU_MN_H__
+#define __AMDGPU_MN_H__
+
+#include <linux/types.h>
+#include <linux/hmm.h>
+#include <linux/rwsem.h>
+#include <linux/workqueue.h>
+#include <linux/interval_tree.h>
+#include <linux/mmu_notifier.h>
+
+struct amdgpu_hmm_range {
+ struct hmm_range hmm_range;
+ struct amdgpu_bo *bo;
+};
+
+int amdgpu_hmm_range_get_pages(struct mmu_interval_notifier *notifier,
+ uint64_t start, uint64_t npages, bool readonly,
+ void *owner,
+ struct amdgpu_hmm_range *range);
+
+#if defined(CONFIG_HMM_MIRROR)
+bool amdgpu_hmm_range_valid(struct amdgpu_hmm_range *range);
+struct amdgpu_hmm_range *amdgpu_hmm_range_alloc(struct amdgpu_bo *bo);
+void amdgpu_hmm_range_free(struct amdgpu_hmm_range *range);
+int amdgpu_hmm_register(struct amdgpu_bo *bo, unsigned long addr);
+void amdgpu_hmm_unregister(struct amdgpu_bo *bo);
+#else
+static inline int amdgpu_hmm_register(struct amdgpu_bo *bo, unsigned long addr)
+{
+ DRM_WARN_ONCE("HMM_MIRROR kernel config option is not enabled, "
+ "add CONFIG_ZONE_DEVICE=y in config file to fix this\n");
+ return -ENODEV;
+}
+
+static inline void amdgpu_hmm_unregister(struct amdgpu_bo *bo) {}
+
+static inline bool amdgpu_hmm_range_valid(struct amdgpu_hmm_range *range)
+{
+ return false;
+}
+
+static inline struct amdgpu_hmm_range *amdgpu_hmm_range_alloc(struct amdgpu_bo *bo)
+{
+ return NULL;
+}
+
+static inline void amdgpu_hmm_range_free(struct amdgpu_hmm_range *range) {}
+#endif
+
+#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_i2c.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_i2c.c
index bca4dddd5a15..9cb72f0c5277 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_i2c.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_i2c.c
@@ -24,7 +24,6 @@
* Alex Deucher
*/
-#include <linux/export.h>
#include <linux/pci.h>
#include <drm/drm_edid.h>
@@ -175,7 +174,6 @@ struct amdgpu_i2c_chan *amdgpu_i2c_create(struct drm_device *dev,
i2c->rec = *rec;
i2c->adapter.owner = THIS_MODULE;
- i2c->adapter.class = I2C_CLASS_DDC;
i2c->adapter.dev.parent = dev->dev;
i2c->dev = dev;
i2c_set_adapdata(&i2c->adapter, i2c);
@@ -186,7 +184,7 @@ struct amdgpu_i2c_chan *amdgpu_i2c_create(struct drm_device *dev,
snprintf(i2c->adapter.name, sizeof(i2c->adapter.name),
"AMDGPU i2c hw bus %s", name);
i2c->adapter.algo = &amdgpu_atombios_i2c_algo;
- ret = i2c_add_adapter(&i2c->adapter);
+ ret = devm_i2c_add_adapter(dev->dev, &i2c->adapter);
if (ret)
goto out_free;
} else {
@@ -217,22 +215,23 @@ out_free:
}
-void amdgpu_i2c_destroy(struct amdgpu_i2c_chan *i2c)
-{
- if (!i2c)
- return;
- WARN_ON(i2c->has_aux);
- i2c_del_adapter(&i2c->adapter);
- kfree(i2c);
-}
-
-/* Add the default buses */
void amdgpu_i2c_init(struct amdgpu_device *adev)
{
- if (amdgpu_hw_i2c)
- DRM_INFO("hw_i2c forced on, you may experience display detection problems!\n");
-
- amdgpu_atombios_i2c_init(adev);
+ if (!adev->is_atom_fw) {
+ if (!amdgpu_device_has_dc_support(adev)) {
+ amdgpu_atombios_i2c_init(adev);
+ } else {
+ switch (adev->asic_type) {
+ case CHIP_POLARIS10:
+ case CHIP_POLARIS11:
+ case CHIP_POLARIS12:
+ amdgpu_atombios_oem_i2c_init(adev, 0x97);
+ break;
+ default:
+ break;
+ }
+ }
+ }
}
/* remove all the buses */
@@ -240,28 +239,9 @@ void amdgpu_i2c_fini(struct amdgpu_device *adev)
{
int i;
- for (i = 0; i < AMDGPU_MAX_I2C_BUS; i++) {
- if (adev->i2c_bus[i]) {
- amdgpu_i2c_destroy(adev->i2c_bus[i]);
+ for (i = 0; i < AMDGPU_MAX_I2C_BUS; i++)
+ if (adev->i2c_bus[i])
adev->i2c_bus[i] = NULL;
- }
- }
-}
-
-/* Add additional buses */
-void amdgpu_i2c_add(struct amdgpu_device *adev,
- const struct amdgpu_i2c_bus_rec *rec,
- const char *name)
-{
- struct drm_device *dev = adev_to_drm(adev);
- int i;
-
- for (i = 0; i < AMDGPU_MAX_I2C_BUS; i++) {
- if (!adev->i2c_bus[i]) {
- adev->i2c_bus[i] = amdgpu_i2c_create(dev, rec, name);
- return;
- }
- }
}
/* looks up bus based on id */
@@ -280,7 +260,7 @@ amdgpu_i2c_lookup(struct amdgpu_device *adev,
return NULL;
}
-static void amdgpu_i2c_get_byte(struct amdgpu_i2c_chan *i2c_bus,
+static int amdgpu_i2c_get_byte(struct amdgpu_i2c_chan *i2c_bus,
u8 slave_addr,
u8 addr,
u8 *val)
@@ -305,16 +285,18 @@ static void amdgpu_i2c_get_byte(struct amdgpu_i2c_chan *i2c_bus,
out_buf[0] = addr;
out_buf[1] = 0;
- if (i2c_transfer(&i2c_bus->adapter, msgs, 2) == 2) {
- *val = in_buf[0];
- DRM_DEBUG("val = 0x%02x\n", *val);
- } else {
- DRM_DEBUG("i2c 0x%02x 0x%02x read failed\n",
- addr, *val);
+ if (i2c_transfer(&i2c_bus->adapter, msgs, 2) != 2) {
+ DRM_DEBUG("i2c 0x%02x read failed\n", addr);
+ return -EIO;
}
+
+ *val = in_buf[0];
+ DRM_DEBUG("val = 0x%02x\n", *val);
+
+ return 0;
}
-static void amdgpu_i2c_put_byte(struct amdgpu_i2c_chan *i2c_bus,
+static int amdgpu_i2c_put_byte(struct amdgpu_i2c_chan *i2c_bus,
u8 slave_addr,
u8 addr,
u8 val)
@@ -330,16 +312,19 @@ static void amdgpu_i2c_put_byte(struct amdgpu_i2c_chan *i2c_bus,
out_buf[0] = addr;
out_buf[1] = val;
- if (i2c_transfer(&i2c_bus->adapter, &msg, 1) != 1)
- DRM_DEBUG("i2c 0x%02x 0x%02x write failed\n",
- addr, val);
+ if (i2c_transfer(&i2c_bus->adapter, &msg, 1) != 1) {
+ DRM_DEBUG("i2c 0x%02x 0x%02x write failed\n", addr, val);
+ return -EIO;
+ }
+
+ return 0;
}
/* ddc router switching */
void
amdgpu_i2c_router_select_ddc_port(const struct amdgpu_connector *amdgpu_connector)
{
- u8 val;
+ u8 val = 0;
if (!amdgpu_connector->router.ddc_valid)
return;
@@ -347,16 +332,18 @@ amdgpu_i2c_router_select_ddc_port(const struct amdgpu_connector *amdgpu_connecto
if (!amdgpu_connector->router_bus)
return;
- amdgpu_i2c_get_byte(amdgpu_connector->router_bus,
+ if (amdgpu_i2c_get_byte(amdgpu_connector->router_bus,
amdgpu_connector->router.i2c_addr,
- 0x3, &val);
+ 0x3, &val))
+ return;
val &= ~amdgpu_connector->router.ddc_mux_control_pin;
amdgpu_i2c_put_byte(amdgpu_connector->router_bus,
amdgpu_connector->router.i2c_addr,
0x3, val);
- amdgpu_i2c_get_byte(amdgpu_connector->router_bus,
+ if (amdgpu_i2c_get_byte(amdgpu_connector->router_bus,
amdgpu_connector->router.i2c_addr,
- 0x1, &val);
+ 0x1, &val))
+ return;
val &= ~amdgpu_connector->router.ddc_mux_control_pin;
val |= amdgpu_connector->router.ddc_mux_state;
amdgpu_i2c_put_byte(amdgpu_connector->router_bus,
@@ -376,16 +363,18 @@ amdgpu_i2c_router_select_cd_port(const struct amdgpu_connector *amdgpu_connector
if (!amdgpu_connector->router_bus)
return;
- amdgpu_i2c_get_byte(amdgpu_connector->router_bus,
+ if (amdgpu_i2c_get_byte(amdgpu_connector->router_bus,
amdgpu_connector->router.i2c_addr,
- 0x3, &val);
+ 0x3, &val))
+ return;
val &= ~amdgpu_connector->router.cd_mux_control_pin;
amdgpu_i2c_put_byte(amdgpu_connector->router_bus,
amdgpu_connector->router.i2c_addr,
0x3, val);
- amdgpu_i2c_get_byte(amdgpu_connector->router_bus,
+ if (amdgpu_i2c_get_byte(amdgpu_connector->router_bus,
amdgpu_connector->router.i2c_addr,
- 0x1, &val);
+ 0x1, &val))
+ return;
val &= ~amdgpu_connector->router.cd_mux_control_pin;
val |= amdgpu_connector->router.cd_mux_state;
amdgpu_i2c_put_byte(amdgpu_connector->router_bus,
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_i2c.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_i2c.h
index 63c2ff7499e1..1d3d3806e0dd 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_i2c.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_i2c.h
@@ -30,9 +30,6 @@ struct amdgpu_i2c_chan *amdgpu_i2c_create(struct drm_device *dev,
void amdgpu_i2c_destroy(struct amdgpu_i2c_chan *i2c);
void amdgpu_i2c_init(struct amdgpu_device *adev);
void amdgpu_i2c_fini(struct amdgpu_device *adev);
-void amdgpu_i2c_add(struct amdgpu_device *adev,
- const struct amdgpu_i2c_bus_rec *rec,
- const char *name);
struct amdgpu_i2c_chan *
amdgpu_i2c_lookup(struct amdgpu_device *adev,
const struct amdgpu_i2c_bus_rec *i2c_bus);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
index 7645223ea0ef..586a58facca1 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
@@ -30,7 +30,6 @@
#include <linux/slab.h>
#include <drm/amdgpu_drm.h>
-#include <drm/drm_debugfs.h>
#include "amdgpu.h"
#include "atom.h"
@@ -63,20 +62,22 @@
* Returns 0 on success, error on failure.
*/
int amdgpu_ib_get(struct amdgpu_device *adev, struct amdgpu_vm *vm,
- unsigned size, enum amdgpu_ib_pool_type pool_type,
+ unsigned int size, enum amdgpu_ib_pool_type pool_type,
struct amdgpu_ib *ib)
{
int r;
if (size) {
r = amdgpu_sa_bo_new(&adev->ib_pools[pool_type],
- &ib->sa_bo, size, 256);
+ &ib->sa_bo, size);
if (r) {
dev_err(adev->dev, "failed to get a new IB (%d)\n", r);
return r;
}
ib->ptr = amdgpu_sa_bo_cpu_addr(ib->sa_bo);
+ /* flush the cache before commit the IB */
+ ib->flags = AMDGPU_IB_FLAG_EMIT_MEM_SYNC;
if (!vm)
ib->gpu_addr = amdgpu_sa_bo_gpu_addr(ib->sa_bo);
@@ -88,16 +89,14 @@ int amdgpu_ib_get(struct amdgpu_device *adev, struct amdgpu_vm *vm,
/**
* amdgpu_ib_free - free an IB (Indirect Buffer)
*
- * @adev: amdgpu_device pointer
* @ib: IB object to free
* @f: the fence SA bo need wait on for the ib alloation
*
* Free an IB (all asics).
*/
-void amdgpu_ib_free(struct amdgpu_device *adev, struct amdgpu_ib *ib,
- struct dma_fence *f)
+void amdgpu_ib_free(struct amdgpu_ib *ib, struct dma_fence *f)
{
- amdgpu_sa_bo_free(adev, &ib->sa_bo, f);
+ amdgpu_sa_bo_free(&ib->sa_bo, f);
}
/**
@@ -122,24 +121,26 @@ void amdgpu_ib_free(struct amdgpu_device *adev, struct amdgpu_ib *ib,
* a CONST_IB), it will be put on the ring prior to the DE IB. Prior
* to SI there was just a DE IB.
*/
-int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs,
+int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned int num_ibs,
struct amdgpu_ib *ibs, struct amdgpu_job *job,
struct dma_fence **f)
{
struct amdgpu_device *adev = ring->adev;
struct amdgpu_ib *ib = &ibs[0];
struct dma_fence *tmp = NULL;
- bool skip_preamble, need_ctx_switch;
- unsigned patch_offset = ~0;
+ struct amdgpu_fence *af;
+ bool need_ctx_switch;
struct amdgpu_vm *vm;
uint64_t fence_ctx;
uint32_t status = 0, alloc_size;
- unsigned fence_flags = 0;
- bool secure;
-
- unsigned i;
- int r = 0;
+ unsigned int fence_flags = 0;
+ bool secure, init_shadow;
+ u64 shadow_va, csa_va, gds_va;
+ int vmid = AMDGPU_JOB_GET_VMID(job);
bool need_pipe_sync = false;
+ unsigned int cond_exec;
+ unsigned int i;
+ int r = 0;
if (num_ibs == 0)
return -EINVAL;
@@ -148,26 +149,48 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs,
if (job) {
vm = job->vm;
fence_ctx = job->base.s_fence ?
- job->base.s_fence->scheduled.context : 0;
+ job->base.s_fence->finished.context : 0;
+ shadow_va = job->shadow_va;
+ csa_va = job->csa_va;
+ gds_va = job->gds_va;
+ init_shadow = job->init_shadow;
+ af = job->hw_fence;
+ /* Save the context of the job for reset handling.
+ * The driver needs this so it can skip the ring
+ * contents for guilty contexts.
+ */
+ af->context = fence_ctx;
+ /* the vm fence is also part of the job's context */
+ job->hw_vm_fence->context = fence_ctx;
} else {
vm = NULL;
fence_ctx = 0;
+ shadow_va = 0;
+ csa_va = 0;
+ gds_va = 0;
+ init_shadow = false;
+ af = kzalloc(sizeof(*af), GFP_ATOMIC);
+ if (!af)
+ return -ENOMEM;
}
if (!ring->sched.ready) {
dev_err(adev->dev, "couldn't schedule ib on ring <%s>\n", ring->name);
- return -EINVAL;
+ r = -EINVAL;
+ goto free_fence;
}
if (vm && !job->vmid) {
dev_err(adev->dev, "VM IB without ID\n");
- return -EINVAL;
+ r = -EINVAL;
+ goto free_fence;
}
if ((ib->flags & AMDGPU_IB_FLAGS_SECURE) &&
- (ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE)) {
- dev_err(adev->dev, "secure submissions not supported on compute rings\n");
- return -EINVAL;
+ (!ring->funcs->secure_submission_supported)) {
+ dev_err(adev->dev, "secure submissions not supported on ring <%s>\n", ring->name);
+ r = -EINVAL;
+ goto free_fence;
}
alloc_size = ring->funcs->emit_frame_size + num_ibs *
@@ -176,14 +199,14 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs,
r = amdgpu_ring_alloc(ring, alloc_size);
if (r) {
dev_err(adev->dev, "scheduling IB failed (%d).\n", r);
- return r;
+ goto free_fence;
}
need_ctx_switch = ring->current_ctx != fence_ctx;
if (ring->funcs->emit_pipeline_sync && job &&
- ((tmp = amdgpu_sync_get_fence(&job->sched_sync)) ||
- (amdgpu_sriov_vf(adev) && need_ctx_switch) ||
- amdgpu_vm_need_pipeline_sync(ring, job))) {
+ ((tmp = amdgpu_sync_get_fence(&job->explicit_sync)) ||
+ need_ctx_switch || amdgpu_vm_need_pipeline_sync(ring, job))) {
+
need_pipe_sync = true;
if (tmp)
@@ -210,23 +233,21 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs,
}
}
- if (job && ring->funcs->init_cond_exec)
- patch_offset = amdgpu_ring_init_cond_exec(ring);
+ amdgpu_ring_ib_begin(ring);
-#ifdef CONFIG_X86_64
- if (!(adev->flags & AMD_IS_APU))
-#endif
- {
- if (ring->funcs->emit_hdp_flush)
- amdgpu_ring_emit_hdp_flush(ring);
- else
- amdgpu_asic_flush_hdp(adev, ring);
- }
+ if (ring->funcs->emit_gfx_shadow)
+ amdgpu_ring_emit_gfx_shadow(ring, shadow_va, csa_va, gds_va,
+ init_shadow, vmid);
+
+ if (ring->funcs->init_cond_exec)
+ cond_exec = amdgpu_ring_init_cond_exec(ring,
+ ring->cond_exe_gpu_addr);
+
+ amdgpu_device_flush_hdp(adev, ring);
if (need_ctx_switch)
status |= AMDGPU_HAVE_CTX_SWITCH;
- skip_preamble = ring->current_ctx == fence_ctx;
if (job && ring->funcs->emit_cntxcntl) {
status |= job->preamble_status;
status |= job->preemption_status;
@@ -244,14 +265,6 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs,
for (i = 0; i < num_ibs; ++i) {
ib = &ibs[i];
- /* drop preamble IBs if we don't have a context switch */
- if ((ib->flags & AMDGPU_IB_FLAG_PREAMBLE) &&
- skip_preamble &&
- !(status & AMDGPU_PREAMBLE_IB_PRESENT_FIRST) &&
- !amdgpu_mcbp &&
- !amdgpu_sriov_vf(adev)) /* for SRIOV preemption, Preamble CE ib must be inserted anyway */
- continue;
-
if (job && ring->funcs->emit_frame_cntl) {
if (secure != !!(ib->flags & AMDGPU_IB_FLAGS_SECURE)) {
amdgpu_ring_emit_frame_cntl(ring, false, secure);
@@ -267,10 +280,7 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs,
if (job && ring->funcs->emit_frame_cntl)
amdgpu_ring_emit_frame_cntl(ring, false, secure);
-#ifdef CONFIG_X86_64
- if (!(adev->flags & AMD_IS_APU))
-#endif
- amdgpu_asic_invalidate_hdp(adev, ring);
+ amdgpu_device_invalidate_hdp(adev, ring);
if (ib->flags & AMDGPU_IB_FLAG_TC_WB_NOT_INVALIDATE)
fence_flags |= AMDGPU_FENCE_FLAG_TC_WB_ONLY;
@@ -281,31 +291,54 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs,
fence_flags | AMDGPU_FENCE_FLAG_64BIT);
}
- r = amdgpu_fence_emit(ring, f, fence_flags);
+ if (ring->funcs->emit_gfx_shadow && ring->funcs->init_cond_exec) {
+ amdgpu_ring_emit_gfx_shadow(ring, 0, 0, 0, false, 0);
+ amdgpu_ring_init_cond_exec(ring, ring->cond_exe_gpu_addr);
+ }
+
+ r = amdgpu_fence_emit(ring, af, fence_flags);
if (r) {
dev_err(adev->dev, "failed to emit fence (%d)\n", r);
if (job && job->vmid)
- amdgpu_vmid_reset(adev, ring->funcs->vmhub, job->vmid);
+ amdgpu_vmid_reset(adev, ring->vm_hub, job->vmid);
amdgpu_ring_undo(ring);
return r;
}
+ *f = &af->base;
+ /* get a ref for the job */
+ if (job)
+ dma_fence_get(*f);
if (ring->funcs->insert_end)
ring->funcs->insert_end(ring);
- if (patch_offset != ~0 && ring->funcs->patch_cond_exec)
- amdgpu_ring_patch_cond_exec(ring, patch_offset);
+ amdgpu_ring_patch_cond_exec(ring, cond_exec);
ring->current_ctx = fence_ctx;
- if (vm && ring->funcs->emit_switch_buffer)
+ if (job && ring->funcs->emit_switch_buffer)
amdgpu_ring_emit_switch_buffer(ring);
if (ring->funcs->emit_wave_limit &&
ring->hw_prio == AMDGPU_GFX_PIPE_PRIO_HIGH)
ring->funcs->emit_wave_limit(ring, false);
+ /* Save the wptr associated with this fence.
+ * This must be last for resets to work properly
+ * as we need to save the wptr associated with this
+ * fence so we know what rings contents to backup
+ * after we reset the queue.
+ */
+ amdgpu_fence_save_wptr(af);
+
+ amdgpu_ring_ib_end(ring);
amdgpu_ring_commit(ring);
+
return 0;
+
+free_fence:
+ if (!job)
+ kfree(af);
+ return r;
}
/**
@@ -319,20 +352,14 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs,
*/
int amdgpu_ib_pool_init(struct amdgpu_device *adev)
{
- unsigned size;
int r, i;
if (adev->ib_pool_ready)
return 0;
for (i = 0; i < AMDGPU_IB_POOL_MAX; i++) {
- if (i == AMDGPU_IB_POOL_DIRECT)
- size = PAGE_SIZE * 2;
- else
- size = AMDGPU_IB_POOL_SIZE;
-
r = amdgpu_sa_bo_manager_init(adev, &adev->ib_pools[i],
- size, AMDGPU_GPU_PAGE_SIZE,
+ AMDGPU_IB_POOL_SIZE, 256,
AMDGPU_GEM_DOMAIN_GTT);
if (r)
goto error;
@@ -381,7 +408,7 @@ int amdgpu_ib_ring_tests(struct amdgpu_device *adev)
{
long tmo_gfx, tmo_mm;
int r, ret = 0;
- unsigned i;
+ unsigned int i;
tmo_mm = tmo_gfx = AMDGPU_IB_TEST_TIMEOUT;
if (amdgpu_sriov_vf(adev)) {
@@ -398,7 +425,7 @@ int amdgpu_ib_ring_tests(struct amdgpu_device *adev)
/* for CP & SDMA engines since they are scheduled together so
* need to make the timeout width enough to cover the time
* cost waiting for it coming back under RUNTIME only
- */
+ */
tmo_gfx = 8 * AMDGPU_IB_TEST_TIMEOUT;
} else if (adev->gmc.xgmi.hive_id) {
tmo_gfx = AMDGPU_IB_TEST_GFX_XGMI_TIMEOUT;
@@ -414,6 +441,10 @@ int amdgpu_ib_ring_tests(struct amdgpu_device *adev)
if (!ring->sched.ready || !ring->funcs->test_ib)
continue;
+ if (adev->enable_mes &&
+ ring->funcs->type == AMDGPU_RING_TYPE_KIQ)
+ continue;
+
/* MM engine need more time */
if (ring->funcs->type == AMDGPU_RING_TYPE_UVD ||
ring->funcs->type == AMDGPU_RING_TYPE_VCE ||
@@ -453,36 +484,34 @@ int amdgpu_ib_ring_tests(struct amdgpu_device *adev)
*/
#if defined(CONFIG_DEBUG_FS)
-static int amdgpu_debugfs_sa_info(struct seq_file *m, void *data)
+static int amdgpu_debugfs_sa_info_show(struct seq_file *m, void *unused)
{
- struct drm_info_node *node = (struct drm_info_node *) m->private;
- struct drm_device *dev = node->minor->dev;
- struct amdgpu_device *adev = drm_to_adev(dev);
+ struct amdgpu_device *adev = m->private;
- seq_printf(m, "--------------------- DELAYED --------------------- \n");
+ seq_puts(m, "--------------------- DELAYED ---------------------\n");
amdgpu_sa_bo_dump_debug_info(&adev->ib_pools[AMDGPU_IB_POOL_DELAYED],
m);
- seq_printf(m, "-------------------- IMMEDIATE -------------------- \n");
+ seq_puts(m, "-------------------- IMMEDIATE --------------------\n");
amdgpu_sa_bo_dump_debug_info(&adev->ib_pools[AMDGPU_IB_POOL_IMMEDIATE],
m);
- seq_printf(m, "--------------------- DIRECT ---------------------- \n");
+ seq_puts(m, "--------------------- DIRECT ----------------------\n");
amdgpu_sa_bo_dump_debug_info(&adev->ib_pools[AMDGPU_IB_POOL_DIRECT], m);
return 0;
}
-static const struct drm_info_list amdgpu_debugfs_sa_list[] = {
- {"amdgpu_sa_info", &amdgpu_debugfs_sa_info, 0, NULL},
-};
+DEFINE_SHOW_ATTRIBUTE(amdgpu_debugfs_sa_info);
#endif
-int amdgpu_debugfs_sa_init(struct amdgpu_device *adev)
+void amdgpu_debugfs_sa_init(struct amdgpu_device *adev)
{
#if defined(CONFIG_DEBUG_FS)
- return amdgpu_debugfs_add_files(adev, amdgpu_debugfs_sa_list,
- ARRAY_SIZE(amdgpu_debugfs_sa_list));
-#else
- return 0;
+ struct drm_minor *minor = adev_to_drm(adev)->primary;
+ struct dentry *root = minor->debugfs_root;
+
+ debugfs_create_file("amdgpu_sa_info", 0444, root, adev,
+ &amdgpu_debugfs_sa_info_fops);
+
#endif
}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c
index 94b069630db3..9cab36322c16 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c
@@ -62,9 +62,8 @@ int amdgpu_pasid_alloc(unsigned int bits)
int pasid = -EINVAL;
for (bits = min(bits, 31U); bits > 0; bits--) {
- pasid = ida_simple_get(&amdgpu_pasid_ida,
- 1U << (bits - 1), 1U << bits,
- GFP_KERNEL);
+ pasid = ida_alloc_range(&amdgpu_pasid_ida, 1U << (bits - 1),
+ (1U << bits) - 1, GFP_KERNEL);
if (pasid != -ENOSPC)
break;
}
@@ -82,7 +81,7 @@ int amdgpu_pasid_alloc(unsigned int bits)
void amdgpu_pasid_free(u32 pasid)
{
trace_amdgpu_pasid_freed(pasid);
- ida_simple_remove(&amdgpu_pasid_ida, pasid);
+ ida_free(&amdgpu_pasid_ida, pasid);
}
static void amdgpu_pasid_free_cb(struct dma_fence *fence,
@@ -107,36 +106,19 @@ static void amdgpu_pasid_free_cb(struct dma_fence *fence,
void amdgpu_pasid_free_delayed(struct dma_resv *resv,
u32 pasid)
{
- struct dma_fence *fence, **fences;
struct amdgpu_pasid_cb *cb;
- unsigned count;
+ struct dma_fence *fence;
int r;
- r = dma_resv_get_fences_rcu(resv, NULL, &count, &fences);
+ r = dma_resv_get_singleton(resv, DMA_RESV_USAGE_BOOKKEEP, &fence);
if (r)
goto fallback;
- if (count == 0) {
+ if (!fence) {
amdgpu_pasid_free(pasid);
return;
}
- if (count == 1) {
- fence = fences[0];
- kfree(fences);
- } else {
- uint64_t context = dma_fence_context_alloc(1);
- struct dma_fence_array *array;
-
- array = dma_fence_array_create(count, fences, context,
- 1, false);
- if (!array) {
- kfree(fences);
- goto fallback;
- }
- fence = &array->base;
- }
-
cb = kmalloc(sizeof(*cb), GFP_KERNEL);
if (!cb) {
/* Last resort when we are OOM */
@@ -156,8 +138,8 @@ fallback:
/* Not enough memory for the delayed delete, as last resort
* block for all the fences to complete.
*/
- dma_resv_wait_timeout_rcu(resv, true, false,
- MAX_SCHEDULE_TIMEOUT);
+ dma_resv_wait_timeout(resv, DMA_RESV_USAGE_BOOKKEEP,
+ false, MAX_SCHEDULE_TIMEOUT);
amdgpu_pasid_free(pasid);
}
@@ -182,147 +164,145 @@ bool amdgpu_vmid_had_gpu_reset(struct amdgpu_device *adev,
atomic_read(&adev->gpu_reset_counter);
}
+/* Check if we need to switch to another set of resources */
+static bool amdgpu_vmid_gds_switch_needed(struct amdgpu_vmid *id,
+ struct amdgpu_job *job)
+{
+ return id->gds_base != job->gds_base ||
+ id->gds_size != job->gds_size ||
+ id->gws_base != job->gws_base ||
+ id->gws_size != job->gws_size ||
+ id->oa_base != job->oa_base ||
+ id->oa_size != job->oa_size;
+}
+
+/* Check if the id is compatible with the job */
+static bool amdgpu_vmid_compatible(struct amdgpu_vmid *id,
+ struct amdgpu_job *job)
+{
+ return id->pd_gpu_addr == job->vm_pd_addr &&
+ !amdgpu_vmid_gds_switch_needed(id, job);
+}
+
/**
- * amdgpu_vm_grab_idle - grab idle VMID
+ * amdgpu_vmid_grab_idle - grab idle VMID
*
- * @vm: vm to allocate id for
* @ring: ring we want to submit job to
- * @sync: sync object where we add dependencies
* @idle: resulting idle VMID
+ * @fence: fence to wait for if no id could be grabbed
*
* Try to find an idle VMID, if none is idle add a fence to wait to the sync
* object. Returns -ENOMEM when we are out of memory.
*/
-static int amdgpu_vmid_grab_idle(struct amdgpu_vm *vm,
- struct amdgpu_ring *ring,
- struct amdgpu_sync *sync,
- struct amdgpu_vmid **idle)
+static int amdgpu_vmid_grab_idle(struct amdgpu_ring *ring,
+ struct amdgpu_vmid **idle,
+ struct dma_fence **fence)
{
struct amdgpu_device *adev = ring->adev;
- unsigned vmhub = ring->funcs->vmhub;
+ unsigned vmhub = ring->vm_hub;
struct amdgpu_vmid_mgr *id_mgr = &adev->vm_manager.id_mgr[vmhub];
- struct dma_fence **fences;
- unsigned i;
- int r;
-
- if (ring->vmid_wait && !dma_fence_is_signaled(ring->vmid_wait))
- return amdgpu_sync_fence(sync, ring->vmid_wait);
- fences = kmalloc_array(id_mgr->num_ids, sizeof(void *), GFP_KERNEL);
- if (!fences)
- return -ENOMEM;
+ /* If anybody is waiting for a VMID let everybody wait for fairness */
+ if (!dma_fence_is_signaled(ring->vmid_wait)) {
+ *fence = dma_fence_get(ring->vmid_wait);
+ return 0;
+ }
/* Check if we have an idle VMID */
- i = 0;
- list_for_each_entry((*idle), &id_mgr->ids_lru, list) {
- fences[i] = amdgpu_sync_peek_fence(&(*idle)->active, ring);
- if (!fences[i])
- break;
- ++i;
+ list_for_each_entry_reverse((*idle), &id_mgr->ids_lru, list) {
+ /* Don't use per engine and per process VMID at the same time */
+ struct amdgpu_ring *r = adev->vm_manager.concurrent_flush ?
+ NULL : ring;
+
+ *fence = amdgpu_sync_peek_fence(&(*idle)->active, r);
+ if (!(*fence))
+ return 0;
}
- /* If we can't find a idle VMID to use, wait till one becomes available */
- if (&(*idle)->list == &id_mgr->ids_lru) {
- u64 fence_context = adev->vm_manager.fence_context + ring->idx;
- unsigned seqno = ++adev->vm_manager.seqno[ring->idx];
- struct dma_fence_array *array;
- unsigned j;
-
- *idle = NULL;
- for (j = 0; j < i; ++j)
- dma_fence_get(fences[j]);
-
- array = dma_fence_array_create(i, fences, fence_context,
- seqno, true);
- if (!array) {
- for (j = 0; j < i; ++j)
- dma_fence_put(fences[j]);
- kfree(fences);
- return -ENOMEM;
- }
-
- r = amdgpu_sync_fence(sync, &array->base);
- dma_fence_put(ring->vmid_wait);
- ring->vmid_wait = &array->base;
- return r;
- }
- kfree(fences);
+ /*
+ * If we can't find a idle VMID to use, wait on a fence from the least
+ * recently used in the hope that it will be available soon.
+ */
+ *idle = NULL;
+ dma_fence_put(ring->vmid_wait);
+ ring->vmid_wait = dma_fence_get(*fence);
+ /* This is the reference we return */
+ dma_fence_get(*fence);
return 0;
}
/**
- * amdgpu_vm_grab_reserved - try to assign reserved VMID
+ * amdgpu_vmid_grab_reserved - try to assign reserved VMID
*
* @vm: vm to allocate id for
* @ring: ring we want to submit job to
- * @sync: sync object where we add dependencies
- * @fence: fence protecting ID from reuse
* @job: job who wants to use the VMID
* @id: resulting VMID
+ * @fence: fence to wait for if no id could be grabbed
*
* Try to assign a reserved VMID.
*/
static int amdgpu_vmid_grab_reserved(struct amdgpu_vm *vm,
struct amdgpu_ring *ring,
- struct amdgpu_sync *sync,
- struct dma_fence *fence,
struct amdgpu_job *job,
- struct amdgpu_vmid **id)
+ struct amdgpu_vmid **id,
+ struct dma_fence **fence)
{
struct amdgpu_device *adev = ring->adev;
- unsigned vmhub = ring->funcs->vmhub;
+ unsigned vmhub = ring->vm_hub;
uint64_t fence_context = adev->fence_context + ring->idx;
- struct dma_fence *updates = sync->last_vm_update;
bool needs_flush = vm->use_cpu_for_update;
- int r = 0;
+ uint64_t updates = amdgpu_vm_tlb_seq(vm);
+ int r;
*id = vm->reserved_vmid[vmhub];
- if (updates && (*id)->flushed_updates &&
- updates->context == (*id)->flushed_updates->context &&
- !dma_fence_is_later(updates, (*id)->flushed_updates))
- updates = NULL;
-
if ((*id)->owner != vm->immediate.fence_context ||
- job->vm_pd_addr != (*id)->pd_gpu_addr ||
- updates || !(*id)->last_flush ||
+ !amdgpu_vmid_compatible(*id, job) ||
+ (*id)->flushed_updates < updates ||
+ !(*id)->last_flush ||
((*id)->last_flush->context != fence_context &&
- !dma_fence_is_signaled((*id)->last_flush))) {
+ !dma_fence_is_signaled((*id)->last_flush)))
+ needs_flush = true;
+
+ if ((*id)->owner != vm->immediate.fence_context ||
+ (!adev->vm_manager.concurrent_flush && needs_flush)) {
struct dma_fence *tmp;
+ /* Don't use per engine and per process VMID at the
+ * same time
+ */
+ if (adev->vm_manager.concurrent_flush)
+ ring = NULL;
+
/* to prevent one context starved by another context */
(*id)->pd_gpu_addr = 0;
tmp = amdgpu_sync_peek_fence(&(*id)->active, ring);
if (tmp) {
*id = NULL;
- r = amdgpu_sync_fence(sync, tmp);
- return r;
+ *fence = dma_fence_get(tmp);
+ return 0;
}
- needs_flush = true;
}
/* Good we can use this VMID. Remember this submission as
* user of the VMID.
*/
- r = amdgpu_sync_fence(&(*id)->active, fence);
+ r = amdgpu_sync_fence(&(*id)->active, &job->base.s_fence->finished,
+ GFP_ATOMIC);
if (r)
return r;
- if (updates) {
- dma_fence_put((*id)->flushed_updates);
- (*id)->flushed_updates = dma_fence_get(updates);
- }
job->vm_needs_flush = needs_flush;
+ job->spm_update_needed = true;
return 0;
}
/**
- * amdgpu_vm_grab_used - try to reuse a VMID
+ * amdgpu_vmid_grab_used - try to reuse a VMID
*
* @vm: vm to allocate id for
* @ring: ring we want to submit job to
- * @sync: sync object where we add dependencies
- * @fence: fence protecting ID from reuse
* @job: job who wants to use the VMID
* @id: resulting VMID
*
@@ -330,16 +310,14 @@ static int amdgpu_vmid_grab_reserved(struct amdgpu_vm *vm,
*/
static int amdgpu_vmid_grab_used(struct amdgpu_vm *vm,
struct amdgpu_ring *ring,
- struct amdgpu_sync *sync,
- struct dma_fence *fence,
struct amdgpu_job *job,
struct amdgpu_vmid **id)
{
struct amdgpu_device *adev = ring->adev;
- unsigned vmhub = ring->funcs->vmhub;
+ unsigned vmhub = ring->vm_hub;
struct amdgpu_vmid_mgr *id_mgr = &adev->vm_manager.id_mgr[vmhub];
uint64_t fence_context = adev->fence_context + ring->idx;
- struct dma_fence *updates = sync->last_vm_update;
+ uint64_t updates = amdgpu_vm_tlb_seq(vm);
int r;
job->vm_needs_flush = vm->use_cpu_for_update;
@@ -347,13 +325,12 @@ static int amdgpu_vmid_grab_used(struct amdgpu_vm *vm,
/* Check if we can use a VMID already assigned to this VM */
list_for_each_entry_reverse((*id), &id_mgr->ids_lru, list) {
bool needs_flush = vm->use_cpu_for_update;
- struct dma_fence *flushed;
/* Check all the prerequisites to using this VMID */
if ((*id)->owner != vm->immediate.fence_context)
continue;
- if ((*id)->pd_gpu_addr != job->vm_pd_addr)
+ if (!amdgpu_vmid_compatible(*id, job))
continue;
if (!(*id)->last_flush ||
@@ -361,30 +338,21 @@ static int amdgpu_vmid_grab_used(struct amdgpu_vm *vm,
!dma_fence_is_signaled((*id)->last_flush)))
needs_flush = true;
- flushed = (*id)->flushed_updates;
- if (updates && (!flushed || dma_fence_is_later(updates, flushed)))
+ if ((*id)->flushed_updates < updates)
needs_flush = true;
- /* Concurrent flushes are only possible starting with Vega10 and
- * are broken on Navi10 and Navi14.
- */
- if (needs_flush && (adev->asic_type < CHIP_VEGA10 ||
- adev->asic_type == CHIP_NAVI10 ||
- adev->asic_type == CHIP_NAVI14))
+ if (needs_flush && !adev->vm_manager.concurrent_flush)
continue;
/* Good, we can use this VMID. Remember this submission as
* user of the VMID.
*/
- r = amdgpu_sync_fence(&(*id)->active, fence);
+ r = amdgpu_sync_fence(&(*id)->active,
+ &job->base.s_fence->finished,
+ GFP_ATOMIC);
if (r)
return r;
- if (updates && (!flushed || dma_fence_is_later(updates, flushed))) {
- dma_fence_put((*id)->flushed_updates);
- (*id)->flushed_updates = dma_fence_get(updates);
- }
-
job->vm_needs_flush |= needs_flush;
return 0;
}
@@ -394,69 +362,74 @@ static int amdgpu_vmid_grab_used(struct amdgpu_vm *vm,
}
/**
- * amdgpu_vm_grab_id - allocate the next free VMID
+ * amdgpu_vmid_grab - allocate the next free VMID
*
* @vm: vm to allocate id for
* @ring: ring we want to submit job to
- * @sync: sync object where we add dependencies
- * @fence: fence protecting ID from reuse
* @job: job who wants to use the VMID
+ * @fence: fence to wait for if no id could be grabbed
*
* Allocate an id for the vm, adding fences to the sync obj as necessary.
*/
int amdgpu_vmid_grab(struct amdgpu_vm *vm, struct amdgpu_ring *ring,
- struct amdgpu_sync *sync, struct dma_fence *fence,
- struct amdgpu_job *job)
+ struct amdgpu_job *job, struct dma_fence **fence)
{
struct amdgpu_device *adev = ring->adev;
- unsigned vmhub = ring->funcs->vmhub;
+ unsigned vmhub = ring->vm_hub;
struct amdgpu_vmid_mgr *id_mgr = &adev->vm_manager.id_mgr[vmhub];
struct amdgpu_vmid *idle = NULL;
struct amdgpu_vmid *id = NULL;
int r = 0;
mutex_lock(&id_mgr->lock);
- r = amdgpu_vmid_grab_idle(vm, ring, sync, &idle);
+ r = amdgpu_vmid_grab_idle(ring, &idle, fence);
if (r || !idle)
goto error;
- if (vm->reserved_vmid[vmhub]) {
- r = amdgpu_vmid_grab_reserved(vm, ring, sync, fence, job, &id);
+ if (amdgpu_vmid_uses_reserved(vm, vmhub)) {
+ r = amdgpu_vmid_grab_reserved(vm, ring, job, &id, fence);
if (r || !id)
goto error;
} else {
- r = amdgpu_vmid_grab_used(vm, ring, sync, fence, job, &id);
+ r = amdgpu_vmid_grab_used(vm, ring, job, &id);
if (r)
goto error;
if (!id) {
- struct dma_fence *updates = sync->last_vm_update;
-
/* Still no ID to use? Then use the idle one found earlier */
id = idle;
/* Remember this submission as user of the VMID */
- r = amdgpu_sync_fence(&id->active, fence);
+ r = amdgpu_sync_fence(&id->active,
+ &job->base.s_fence->finished,
+ GFP_ATOMIC);
if (r)
goto error;
- dma_fence_put(id->flushed_updates);
- id->flushed_updates = dma_fence_get(updates);
job->vm_needs_flush = true;
}
list_move_tail(&id->list, &id_mgr->ids_lru);
}
- id->pd_gpu_addr = job->vm_pd_addr;
- id->owner = vm->immediate.fence_context;
-
+ job->gds_switch_needed = amdgpu_vmid_gds_switch_needed(id, job);
if (job->vm_needs_flush) {
+ id->flushed_updates = amdgpu_vm_tlb_seq(vm);
dma_fence_put(id->last_flush);
id->last_flush = NULL;
}
job->vmid = id - id_mgr->ids;
job->pasid = vm->pasid;
+
+ id->gds_base = job->gds_base;
+ id->gds_size = job->gds_size;
+ id->gws_base = job->gws_base;
+ id->gws_size = job->gws_size;
+ id->oa_base = job->oa_base;
+ id->oa_size = job->oa_size;
+ id->pd_gpu_addr = job->vm_pd_addr;
+ id->owner = vm->immediate.fence_context;
+
trace_amdgpu_vm_grab_id(vm, ring, job);
error:
@@ -464,29 +437,47 @@ error:
return r;
}
-int amdgpu_vmid_alloc_reserved(struct amdgpu_device *adev,
- struct amdgpu_vm *vm,
+/*
+ * amdgpu_vmid_uses_reserved - check if a VM will use a reserved VMID
+ * @vm: the VM to check
+ * @vmhub: the VMHUB which will be used
+ *
+ * Returns: True if the VM will use a reserved VMID.
+ */
+bool amdgpu_vmid_uses_reserved(struct amdgpu_vm *vm, unsigned int vmhub)
+{
+ return vm->reserved_vmid[vmhub];
+}
+
+/*
+ * amdgpu_vmid_alloc_reserved - reserve a specific VMID for this vm
+ * @adev: amdgpu device structure
+ * @vm: the VM to reserve an ID for
+ * @vmhub: the VMHUB which should be used
+ *
+ * Mostly used to have a reserved VMID for debugging and SPM.
+ *
+ * Returns: 0 for success, -ENOENT if an ID is already reserved.
+ */
+int amdgpu_vmid_alloc_reserved(struct amdgpu_device *adev, struct amdgpu_vm *vm,
unsigned vmhub)
{
- struct amdgpu_vmid_mgr *id_mgr;
- struct amdgpu_vmid *idle;
+ struct amdgpu_vmid_mgr *id_mgr = &adev->vm_manager.id_mgr[vmhub];
+ struct amdgpu_vmid *id;
int r = 0;
- id_mgr = &adev->vm_manager.id_mgr[vmhub];
mutex_lock(&id_mgr->lock);
if (vm->reserved_vmid[vmhub])
goto unlock;
- if (atomic_inc_return(&id_mgr->reserved_vmid_num) >
- AMDGPU_VM_MAX_RESERVED_VMID) {
- DRM_ERROR("Over limitation of reserved vmid\n");
- atomic_dec(&id_mgr->reserved_vmid_num);
- r = -EINVAL;
+ if (id_mgr->reserved_vmid) {
+ r = -ENOENT;
goto unlock;
}
- /* Select the first entry VMID */
- idle = list_first_entry(&id_mgr->ids_lru, struct amdgpu_vmid, list);
- list_del_init(&idle->list);
- vm->reserved_vmid[vmhub] = idle;
+ /* Remove from normal round robin handling */
+ id = list_first_entry(&id_mgr->ids_lru, struct amdgpu_vmid, list);
+ list_del_init(&id->list);
+ vm->reserved_vmid[vmhub] = id;
+ id_mgr->reserved_vmid = true;
mutex_unlock(&id_mgr->lock);
return 0;
@@ -495,8 +486,13 @@ unlock:
return r;
}
-void amdgpu_vmid_free_reserved(struct amdgpu_device *adev,
- struct amdgpu_vm *vm,
+/*
+ * amdgpu_vmid_free_reserved - free up a reserved VMID again
+ * @adev: amdgpu device structure
+ * @vm: the VM with the reserved ID
+ * @vmhub: the VMHUB which should be used
+ */
+void amdgpu_vmid_free_reserved(struct amdgpu_device *adev, struct amdgpu_vm *vm,
unsigned vmhub)
{
struct amdgpu_vmid_mgr *id_mgr = &adev->vm_manager.id_mgr[vmhub];
@@ -506,7 +502,7 @@ void amdgpu_vmid_free_reserved(struct amdgpu_device *adev,
list_add(&vm->reserved_vmid[vmhub]->list,
&id_mgr->ids_lru);
vm->reserved_vmid[vmhub] = NULL;
- atomic_dec(&id_mgr->reserved_vmid_num);
+ id_mgr->reserved_vmid = false;
}
mutex_unlock(&id_mgr->lock);
}
@@ -574,10 +570,17 @@ void amdgpu_vmid_mgr_init(struct amdgpu_device *adev)
mutex_init(&id_mgr->lock);
INIT_LIST_HEAD(&id_mgr->ids_lru);
- atomic_set(&id_mgr->reserved_vmid_num, 0);
- /* manage only VMIDs not used by KFD */
- id_mgr->num_ids = adev->vm_manager.first_kfd_vmid;
+ /* for GC <10, SDMA uses MMHUB so use first_kfd_vmid for both GC and MM */
+ if (amdgpu_ip_version(adev, GC_HWIP, 0) < IP_VERSION(10, 0, 0))
+ /* manage only VMIDs not used by KFD */
+ id_mgr->num_ids = adev->vm_manager.first_kfd_vmid;
+ else if (AMDGPU_IS_MMHUB0(i) ||
+ AMDGPU_IS_MMHUB1(i))
+ id_mgr->num_ids = 16;
+ else
+ /* manage only VMIDs not used by KFD */
+ id_mgr->num_ids = adev->vm_manager.first_kfd_vmid;
/* skip over VMID 0, since it is the system VM */
for (j = 1; j < id_mgr->num_ids; ++j) {
@@ -608,7 +611,6 @@ void amdgpu_vmid_mgr_fini(struct amdgpu_device *adev)
struct amdgpu_vmid *id = &id_mgr->ids[j];
amdgpu_sync_free(&id->active);
- dma_fence_put(id->flushed_updates);
dma_fence_put(id->last_flush);
dma_fence_put(id->pasid_mapping);
}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.h
index 0c3b4fa1f936..b3649cd3af56 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.h
@@ -47,7 +47,7 @@ struct amdgpu_vmid {
uint64_t pd_gpu_addr;
/* last flushed PD/PT update */
- struct dma_fence *flushed_updates;
+ uint64_t flushed_updates;
uint32_t current_gpu_reset_count;
@@ -67,7 +67,7 @@ struct amdgpu_vmid_mgr {
unsigned num_ids;
struct list_head ids_lru;
struct amdgpu_vmid ids[AMDGPU_NUM_VMID];
- atomic_t reserved_vmid_num;
+ bool reserved_vmid;
};
int amdgpu_pasid_alloc(unsigned int bits);
@@ -77,15 +77,13 @@ void amdgpu_pasid_free_delayed(struct dma_resv *resv,
bool amdgpu_vmid_had_gpu_reset(struct amdgpu_device *adev,
struct amdgpu_vmid *id);
-int amdgpu_vmid_alloc_reserved(struct amdgpu_device *adev,
- struct amdgpu_vm *vm,
+bool amdgpu_vmid_uses_reserved(struct amdgpu_vm *vm, unsigned int vmhub);
+int amdgpu_vmid_alloc_reserved(struct amdgpu_device *adev, struct amdgpu_vm *vm,
unsigned vmhub);
-void amdgpu_vmid_free_reserved(struct amdgpu_device *adev,
- struct amdgpu_vm *vm,
+void amdgpu_vmid_free_reserved(struct amdgpu_device *adev, struct amdgpu_vm *vm,
unsigned vmhub);
int amdgpu_vmid_grab(struct amdgpu_vm *vm, struct amdgpu_ring *ring,
- struct amdgpu_sync *sync, struct dma_fence *fence,
- struct amdgpu_job *job);
+ struct amdgpu_job *job, struct dma_fence **fence);
void amdgpu_vmid_reset(struct amdgpu_device *adev, unsigned vmhub,
unsigned vmid);
void amdgpu_vmid_reset_all(struct amdgpu_device *adev);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.c
index dc852af4f3b7..a6419246e9c2 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.c
@@ -25,6 +25,7 @@
#include "amdgpu.h"
#include "amdgpu_ih.h"
+#include "amdgpu_reset.h"
/**
* amdgpu_ih_ring_init - initialize the IH state
@@ -99,6 +100,8 @@ int amdgpu_ih_ring_init(struct amdgpu_device *adev, struct amdgpu_ih_ring *ih,
ih->rptr_addr = adev->wb.gpu_addr + rptr_offs * 4;
ih->rptr_cpu = &adev->wb.wb[rptr_offs];
}
+
+ init_waitqueue_head(&ih->wait_process);
return 0;
}
@@ -113,9 +116,11 @@ int amdgpu_ih_ring_init(struct amdgpu_device *adev, struct amdgpu_ih_ring *ih,
*/
void amdgpu_ih_ring_fini(struct amdgpu_device *adev, struct amdgpu_ih_ring *ih)
{
+
+ if (!ih->ring)
+ return;
+
if (ih->use_bus_addr) {
- if (!ih->ring)
- return;
/* add 8 bytes for the rptr/wptr shadows and
* add them to the end of the ring allocation.
@@ -134,6 +139,7 @@ void amdgpu_ih_ring_fini(struct amdgpu_device *adev, struct amdgpu_ih_ring *ih)
/**
* amdgpu_ih_ring_write - write IV to the ring buffer
*
+ * @adev: amdgpu_device pointer
* @ih: ih ring to write to
* @iv: the iv to write
* @num_dw: size of the iv in dw
@@ -141,8 +147,8 @@ void amdgpu_ih_ring_fini(struct amdgpu_device *adev, struct amdgpu_ih_ring *ih)
* Writes an IV to the ring buffer using the CPU and increment the wptr.
* Used for testing and delegating IVs to a software ring.
*/
-void amdgpu_ih_ring_write(struct amdgpu_ih_ring *ih, const uint32_t *iv,
- unsigned int num_dw)
+void amdgpu_ih_ring_write(struct amdgpu_device *adev, struct amdgpu_ih_ring *ih,
+ const uint32_t *iv, unsigned int num_dw)
{
uint32_t wptr = le32_to_cpu(*ih->wptr_cpu) >> 2;
unsigned int i;
@@ -157,10 +163,41 @@ void amdgpu_ih_ring_write(struct amdgpu_ih_ring *ih, const uint32_t *iv,
if (wptr != READ_ONCE(ih->rptr)) {
wmb();
WRITE_ONCE(*ih->wptr_cpu, cpu_to_le32(wptr));
+ } else if (adev->irq.retry_cam_enabled) {
+ dev_warn_once(adev->dev, "IH soft ring buffer overflow 0x%X, 0x%X\n",
+ wptr, ih->rptr);
}
}
/**
+ * amdgpu_ih_wait_on_checkpoint_process_ts - wait to process IVs up to checkpoint
+ *
+ * @adev: amdgpu_device pointer
+ * @ih: ih ring to process
+ *
+ * Used to ensure ring has processed IVs up to the checkpoint write pointer.
+ */
+int amdgpu_ih_wait_on_checkpoint_process_ts(struct amdgpu_device *adev,
+ struct amdgpu_ih_ring *ih)
+{
+ uint32_t checkpoint_wptr;
+ uint64_t checkpoint_ts;
+ long timeout = HZ;
+
+ if (!ih->enabled || adev->shutdown)
+ return -ENODEV;
+
+ checkpoint_wptr = amdgpu_ih_get_wptr(adev, ih);
+ /* Order wptr with ring data. */
+ rmb();
+ checkpoint_ts = amdgpu_ih_decode_iv_ts(adev, ih, checkpoint_wptr, -1);
+
+ return wait_event_interruptible_timeout(ih->wait_process,
+ amdgpu_ih_ts_after(checkpoint_ts, ih->processed_timestamp) ||
+ ih->rptr == amdgpu_ih_get_wptr(adev, ih), timeout);
+}
+
+/**
* amdgpu_ih_process - interrupt handler
*
* @adev: amdgpu_device pointer
@@ -171,7 +208,7 @@ void amdgpu_ih_ring_write(struct amdgpu_ih_ring *ih, const uint32_t *iv,
*/
int amdgpu_ih_process(struct amdgpu_device *adev, struct amdgpu_ih_ring *ih)
{
- unsigned int count = AMDGPU_IH_MAX_NUM_IVS;
+ unsigned int count;
u32 wptr;
if (!ih->enabled || adev->shutdown)
@@ -180,11 +217,8 @@ int amdgpu_ih_process(struct amdgpu_device *adev, struct amdgpu_ih_ring *ih)
wptr = amdgpu_ih_get_wptr(adev, ih);
restart_ih:
- /* is somebody else already processing irqs? */
- if (atomic_xchg(&ih->lock, 1))
- return IRQ_NONE;
-
- DRM_DEBUG("%s: rptr %d, wptr %d\n", __func__, ih->rptr, wptr);
+ count = AMDGPU_IH_MAX_NUM_IVS;
+ dev_dbg(adev->dev, "%s: rptr %d, wptr %d\n", __func__, ih->rptr, wptr);
/* Order reading of wptr vs. reading of IH ring data */
rmb();
@@ -194,13 +228,23 @@ restart_ih:
ih->rptr &= ih->ptr_mask;
}
- amdgpu_ih_set_rptr(adev, ih);
- atomic_set(&ih->lock, 0);
+ if (!ih->overflow)
+ amdgpu_ih_set_rptr(adev, ih);
+
+ wake_up_all(&ih->wait_process);
/* make sure wptr hasn't changed while processing */
wptr = amdgpu_ih_get_wptr(adev, ih);
if (wptr != ih->rptr)
- goto restart_ih;
+ if (!ih->overflow)
+ goto restart_ih;
+
+ if (ih->overflow)
+ if (amdgpu_sriov_runtime(adev))
+ WARN_ONCE(!amdgpu_reset_domain_schedule(adev->reset_domain,
+ &adev->virt.flr_work),
+ "Failed to queue work! at %s",
+ __func__);
return IRQ_HANDLED;
}
@@ -213,7 +257,7 @@ restart_ih:
* @entry: IV entry
*
* Decodes the interrupt vector at the current rptr
- * position and also advance the position for for Vega10
+ * position and also advance the position for Vega10
* and later GPUs.
*/
void amdgpu_ih_decode_iv_helper(struct amdgpu_device *adev,
@@ -241,7 +285,7 @@ void amdgpu_ih_decode_iv_helper(struct amdgpu_device *adev,
entry->timestamp = dw[1] | ((u64)(dw[2] & 0xffff) << 32);
entry->timestamp_src = dw[2] >> 31;
entry->pasid = dw[3] & 0xffff;
- entry->pasid_src = dw[3] >> 31;
+ entry->node_id = (dw[3] >> 16) & 0xff;
entry->src_data[0] = dw[4];
entry->src_data[1] = dw[5];
entry->src_data[2] = dw[6];
@@ -250,3 +294,24 @@ void amdgpu_ih_decode_iv_helper(struct amdgpu_device *adev,
/* wptr/rptr are in bytes! */
ih->rptr += 32;
}
+
+uint64_t amdgpu_ih_decode_iv_ts_helper(struct amdgpu_ih_ring *ih, u32 rptr,
+ signed int offset)
+{
+ uint32_t iv_size = 32;
+ uint32_t ring_index;
+ uint32_t dw1, dw2;
+
+ rptr += iv_size * offset;
+ ring_index = (rptr & ih->ptr_mask) >> 2;
+
+ dw1 = le32_to_cpu(ih->ring[ring_index + 1]);
+ dw2 = le32_to_cpu(ih->ring[ring_index + 2]);
+ return dw1 | ((u64)(dw2 & 0xffff) << 32);
+}
+
+const char *amdgpu_ih_ring_name(struct amdgpu_device *adev, struct amdgpu_ih_ring *ih)
+{
+ return ih == &adev->irq.ih ? "ih" : ih == &adev->irq.ih_soft ? "sw ih" :
+ ih == &adev->irq.ih1 ? "ih1" : ih == &adev->irq.ih2 ? "ih2" : "unknown";
+}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.h
index 6ed4a85fc7c3..f58b6be7fccc 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.h
@@ -27,6 +27,9 @@
/* Maximum number of IVs processed at once */
#define AMDGPU_IH_MAX_NUM_IVS 32
+#define IH_RING_SIZE (256 * 1024)
+#define IH_SW_RING_SIZE (16 * 1024) /* enough for 512 CAM entries */
+
struct amdgpu_device;
struct amdgpu_iv_entry;
@@ -53,42 +56,63 @@ struct amdgpu_ih_ring {
bool use_bus_addr;
struct amdgpu_bo *ring_obj;
- volatile uint32_t *ring;
+ uint32_t *ring;
uint64_t gpu_addr;
uint64_t wptr_addr;
- volatile uint32_t *wptr_cpu;
+ uint32_t *wptr_cpu;
uint64_t rptr_addr;
- volatile uint32_t *rptr_cpu;
+ uint32_t *rptr_cpu;
bool enabled;
unsigned rptr;
- atomic_t lock;
struct amdgpu_ih_regs ih_regs;
+
+ /* For waiting on IH processing at checkpoint. */
+ wait_queue_head_t wait_process;
+ uint64_t processed_timestamp;
+ bool overflow;
};
+/* return true if time stamp t2 is after t1 with 48bit wrap around */
+#define amdgpu_ih_ts_after(t1, t2) \
+ (((int64_t)((t2) << 16) - (int64_t)((t1) << 16)) > 0LL)
+
+#define amdgpu_ih_ts_after_or_equal(t1, t2) \
+ (((int64_t)((t2) << 16) - (int64_t)((t1) << 16)) >= 0LL)
+
/* provided by the ih block */
struct amdgpu_ih_funcs {
/* ring read/write ptr handling, called from interrupt context */
u32 (*get_wptr)(struct amdgpu_device *adev, struct amdgpu_ih_ring *ih);
void (*decode_iv)(struct amdgpu_device *adev, struct amdgpu_ih_ring *ih,
struct amdgpu_iv_entry *entry);
+ uint64_t (*decode_iv_ts)(struct amdgpu_ih_ring *ih, u32 rptr,
+ signed int offset);
void (*set_rptr)(struct amdgpu_device *adev, struct amdgpu_ih_ring *ih);
};
#define amdgpu_ih_get_wptr(adev, ih) (adev)->irq.ih_funcs->get_wptr((adev), (ih))
#define amdgpu_ih_decode_iv(adev, iv) \
(adev)->irq.ih_funcs->decode_iv((adev), (ih), (iv))
+#define amdgpu_ih_decode_iv_ts(adev, ih, rptr, offset) \
+ (WARN_ON_ONCE(!(adev)->irq.ih_funcs->decode_iv_ts) ? 0 : \
+ (adev)->irq.ih_funcs->decode_iv_ts((ih), (rptr), (offset)))
#define amdgpu_ih_set_rptr(adev, ih) (adev)->irq.ih_funcs->set_rptr((adev), (ih))
int amdgpu_ih_ring_init(struct amdgpu_device *adev, struct amdgpu_ih_ring *ih,
unsigned ring_size, bool use_bus_addr);
void amdgpu_ih_ring_fini(struct amdgpu_device *adev, struct amdgpu_ih_ring *ih);
-void amdgpu_ih_ring_write(struct amdgpu_ih_ring *ih, const uint32_t *iv,
- unsigned int num_dw);
+void amdgpu_ih_ring_write(struct amdgpu_device *adev, struct amdgpu_ih_ring *ih,
+ const uint32_t *iv, unsigned int num_dw);
+int amdgpu_ih_wait_on_checkpoint_process_ts(struct amdgpu_device *adev,
+ struct amdgpu_ih_ring *ih);
int amdgpu_ih_process(struct amdgpu_device *adev, struct amdgpu_ih_ring *ih);
void amdgpu_ih_decode_iv_helper(struct amdgpu_device *adev,
struct amdgpu_ih_ring *ih,
struct amdgpu_iv_entry *entry);
+uint64_t amdgpu_ih_decode_iv_ts_helper(struct amdgpu_ih_ring *ih, u32 rptr,
+ signed int offset);
+const char *amdgpu_ih_ring_name(struct amdgpu_device *adev, struct amdgpu_ih_ring *ih);
#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_imu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_imu.h
new file mode 100644
index 000000000000..484e936812e4
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_imu.h
@@ -0,0 +1,58 @@
+/*
+ * Copyright 2021 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#ifndef __AMDGPU_IMU_H__
+#define __AMDGPU_IMU_H__
+
+enum imu_work_mode {
+ DEBUG_MODE,
+ MISSION_MODE
+};
+
+struct amdgpu_imu_funcs {
+ int (*init_microcode)(struct amdgpu_device *adev);
+ int (*load_microcode)(struct amdgpu_device *adev);
+ void (*setup_imu)(struct amdgpu_device *adev);
+ int (*start_imu)(struct amdgpu_device *adev);
+ void (*program_rlc_ram)(struct amdgpu_device *adev);
+ int (*wait_for_reset_status)(struct amdgpu_device *adev);
+};
+
+struct imu_rlc_ram_golden {
+ u32 hwip;
+ u32 instance;
+ u32 segment;
+ u32 reg;
+ u32 data;
+ u32 addr_mask;
+};
+
+#define IMU_RLC_RAM_GOLDEN_VALUE(ip, inst, reg, data, addr_mask) \
+ { ip##_HWIP, inst, reg##_BASE_IDX, reg, data, addr_mask }
+
+struct amdgpu_imu {
+ const struct amdgpu_imu_funcs *funcs;
+ enum imu_work_mode mode;
+};
+
+#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ioc32.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ioc32.c
index 5cf142e849bb..a1cbd7c3deb2 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ioc32.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ioc32.c
@@ -1,4 +1,4 @@
-/**
+/*
* \file amdgpu_ioc32.c
*
* 32-bit ioctl compatibility routines for the AMDGPU DRM.
@@ -37,12 +37,9 @@
long amdgpu_kms_compat_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
{
unsigned int nr = DRM_IOCTL_NR(cmd);
- int ret;
if (nr < DRM_COMMAND_BASE)
return drm_compat_ioctl(filp, cmd, arg);
- ret = amdgpu_drm_ioctl(filp, cmd, arg);
-
- return ret;
+ return amdgpu_drm_ioctl(filp, cmd, arg);
}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ip.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ip.c
new file mode 100644
index 000000000000..99e1cf4fc955
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ip.c
@@ -0,0 +1,96 @@
+/*
+ * Copyright 2025 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#include "amdgpu.h"
+#include "amdgpu_ip.h"
+
+static int8_t amdgpu_logical_to_dev_inst(struct amdgpu_device *adev,
+ enum amd_hw_ip_block_type block,
+ int8_t inst)
+{
+ int8_t dev_inst;
+
+ switch (block) {
+ case GC_HWIP:
+ case SDMA0_HWIP:
+ /* Both JPEG and VCN as JPEG is only alias of VCN */
+ case VCN_HWIP:
+ dev_inst = adev->ip_map.dev_inst[block][inst];
+ break;
+ default:
+ /* For rest of the IPs, no look up required.
+ * Assume 'logical instance == physical instance' for all configs. */
+ dev_inst = inst;
+ break;
+ }
+
+ return dev_inst;
+}
+
+static uint32_t amdgpu_logical_to_dev_mask(struct amdgpu_device *adev,
+ enum amd_hw_ip_block_type block,
+ uint32_t mask)
+{
+ uint32_t dev_mask = 0;
+ int8_t log_inst, dev_inst;
+
+ while (mask) {
+ log_inst = ffs(mask) - 1;
+ dev_inst = amdgpu_logical_to_dev_inst(adev, block, log_inst);
+ dev_mask |= (1 << dev_inst);
+ mask &= ~(1 << log_inst);
+ }
+
+ return dev_mask;
+}
+
+static void amdgpu_populate_ip_map(struct amdgpu_device *adev,
+ enum amd_hw_ip_block_type ip_block,
+ uint32_t inst_mask)
+{
+ int l = 0, i;
+
+ while (inst_mask) {
+ i = ffs(inst_mask) - 1;
+ adev->ip_map.dev_inst[ip_block][l++] = i;
+ inst_mask &= ~(1 << i);
+ }
+ for (; l < HWIP_MAX_INSTANCE; l++)
+ adev->ip_map.dev_inst[ip_block][l] = -1;
+}
+
+void amdgpu_ip_map_init(struct amdgpu_device *adev)
+{
+ u32 ip_map[][2] = {
+ { GC_HWIP, adev->gfx.xcc_mask },
+ { SDMA0_HWIP, adev->sdma.sdma_mask },
+ { VCN_HWIP, adev->vcn.inst_mask },
+ };
+ int i;
+
+ for (i = 0; i < ARRAY_SIZE(ip_map); ++i)
+ amdgpu_populate_ip_map(adev, ip_map[i][0], ip_map[i][1]);
+
+ adev->ip_map.logical_to_dev_inst = amdgpu_logical_to_dev_inst;
+ adev->ip_map.logical_to_dev_mask = amdgpu_logical_to_dev_mask;
+}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ip.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ip.h
new file mode 100644
index 000000000000..2490fd322aec
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ip.h
@@ -0,0 +1,29 @@
+/*
+ * Copyright 2025 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#ifndef __AMDGPU_IP_H__
+#define __AMDGPU_IP_H__
+
+void amdgpu_ip_map_init(struct amdgpu_device *adev);
+
+#endif /* __AMDGPU_IP_H__ */
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c
index afbbec82a289..8112ffc85995 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c
@@ -45,10 +45,9 @@
#include <linux/irq.h>
#include <linux/pci.h>
-#include <drm/drm_crtc_helper.h>
-#include <drm/drm_irq.h>
#include <drm/drm_vblank.h>
#include <drm/amdgpu_drm.h>
+#include <drm/drm_drv.h>
#include "amdgpu.h"
#include "amdgpu_ih.h"
#include "atom.h"
@@ -65,40 +64,55 @@
#define AMDGPU_WAIT_IDLE_TIMEOUT 200
-/**
- * amdgpu_hotplug_work_func - work handler for display hotplug event
- *
- * @work: work struct pointer
- *
- * This is the hotplug event work handler (all ASICs).
- * The work gets scheduled from the IRQ handler if there
- * was a hotplug interrupt. It walks through the connector table
- * and calls hotplug handler for each connector. After this, it sends
- * a DRM hotplug event to alert userspace.
- *
- * This design approach is required in order to defer hotplug event handling
- * from the IRQ handler to a work handler because hotplug handler has to use
- * mutexes which cannot be locked in an IRQ handler (since &mutex_lock may
- * sleep).
- */
-static void amdgpu_hotplug_work_func(struct work_struct *work)
-{
- struct amdgpu_device *adev = container_of(work, struct amdgpu_device,
- hotplug_work);
- struct drm_device *dev = adev_to_drm(adev);
- struct drm_mode_config *mode_config = &dev->mode_config;
- struct drm_connector *connector;
- struct drm_connector_list_iter iter;
-
- mutex_lock(&mode_config->mutex);
- drm_connector_list_iter_begin(dev, &iter);
- drm_for_each_connector_iter(connector, &iter)
- amdgpu_connector_hotplug(connector);
- drm_connector_list_iter_end(&iter);
- mutex_unlock(&mode_config->mutex);
- /* Just fire off a uevent and let userspace tell us what to do */
- drm_helper_hpd_irq_event(dev);
-}
+const char *soc15_ih_clientid_name[] = {
+ "IH",
+ "SDMA2 or ACP",
+ "ATHUB",
+ "BIF",
+ "SDMA3 or DCE",
+ "SDMA4 or ISP",
+ "VMC1 or PCIE0",
+ "RLC",
+ "SDMA0",
+ "SDMA1",
+ "SE0SH",
+ "SE1SH",
+ "SE2SH",
+ "SE3SH",
+ "VCN1 or UVD1",
+ "THM",
+ "VCN or UVD",
+ "SDMA5 or VCE0",
+ "VMC",
+ "SDMA6 or XDMA",
+ "GRBM_CP",
+ "ATS",
+ "ROM_SMUIO",
+ "DF",
+ "SDMA7 or VCE1",
+ "PWR",
+ "reserved",
+ "UTCL2",
+ "EA",
+ "UTCL2LOG",
+ "MP0",
+ "MP1"
+};
+
+const int node_id_to_phys_map[NODEID_MAX] = {
+ [AID0_NODEID] = 0,
+ [XCD0_NODEID] = 0,
+ [XCD1_NODEID] = 1,
+ [AID1_NODEID] = 1,
+ [XCD2_NODEID] = 2,
+ [XCD3_NODEID] = 3,
+ [AID2_NODEID] = 2,
+ [XCD4_NODEID] = 4,
+ [XCD5_NODEID] = 5,
+ [AID3_NODEID] = 3,
+ [XCD6_NODEID] = 6,
+ [XCD7_NODEID] = 7,
+};
/**
* amdgpu_irq_disable_all - disable *all* interrupts
@@ -110,7 +124,7 @@ static void amdgpu_hotplug_work_func(struct work_struct *work)
void amdgpu_irq_disable_all(struct amdgpu_device *adev)
{
unsigned long irqflags;
- unsigned i, j, k;
+ unsigned int i, j, k;
int r;
spin_lock_irqsave(&adev->irq.lock, irqflags);
@@ -125,12 +139,12 @@ void amdgpu_irq_disable_all(struct amdgpu_device *adev)
continue;
for (k = 0; k < src->num_types; ++k) {
- atomic_set(&src->enabled_types[k], 0);
r = src->funcs->set(adev, src, k,
AMDGPU_IRQ_STATE_DISABLE);
if (r)
- DRM_ERROR("error disabling interrupt (%d)\n",
- r);
+ dev_err(adev->dev,
+ "error disabling interrupt (%d)\n",
+ r);
}
}
}
@@ -148,7 +162,7 @@ void amdgpu_irq_disable_all(struct amdgpu_device *adev)
* Returns:
* result of handling the IRQ, as defined by &irqreturn_t
*/
-irqreturn_t amdgpu_irq_handler(int irq, void *arg)
+static irqreturn_t amdgpu_irq_handler(int irq, void *arg)
{
struct drm_device *dev = (struct drm_device *) arg;
struct amdgpu_device *adev = drm_to_adev(dev);
@@ -158,20 +172,7 @@ irqreturn_t amdgpu_irq_handler(int irq, void *arg)
if (ret == IRQ_HANDLED)
pm_runtime_mark_last_busy(dev->dev);
- /* For the hardware that cannot enable bif ring for both ras_controller_irq
- * and ras_err_evnet_athub_irq ih cookies, the driver has to poll status
- * register to check whether the interrupt is triggered or not, and properly
- * ack the interrupt if it is there
- */
- if (amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__PCIE_BIF)) {
- if (adev->nbio.funcs &&
- adev->nbio.funcs->handle_ras_controller_intr_no_bifring)
- adev->nbio.funcs->handle_ras_controller_intr_no_bifring(adev);
-
- if (adev->nbio.funcs &&
- adev->nbio.funcs->handle_ras_err_event_athub_intr_no_bifring)
- adev->nbio.funcs->handle_ras_err_event_athub_intr_no_bifring(adev);
- }
+ amdgpu_ras_interrupt_fatal_error_handler(adev);
return ret;
}
@@ -242,6 +243,21 @@ static bool amdgpu_msi_ok(struct amdgpu_device *adev)
return true;
}
+void amdgpu_restore_msix(struct amdgpu_device *adev)
+{
+ u16 ctrl;
+
+ pci_read_config_word(adev->pdev, adev->pdev->msix_cap + PCI_MSIX_FLAGS, &ctrl);
+ if (!(ctrl & PCI_MSIX_FLAGS_ENABLE))
+ return;
+
+ /* VF FLR */
+ ctrl &= ~PCI_MSIX_FLAGS_ENABLE;
+ pci_write_config_word(adev->pdev, adev->pdev->msix_cap + PCI_MSIX_FLAGS, ctrl);
+ ctrl |= PCI_MSIX_FLAGS_ENABLE;
+ pci_write_config_word(adev->pdev, adev->pdev->msix_cap + PCI_MSIX_FLAGS, ctrl);
+}
+
/**
* amdgpu_irq_init - initialize interrupt handling
*
@@ -255,66 +271,79 @@ static bool amdgpu_msi_ok(struct amdgpu_device *adev)
*/
int amdgpu_irq_init(struct amdgpu_device *adev)
{
- int r = 0;
+ unsigned int irq, flags;
+ int r;
spin_lock_init(&adev->irq.lock);
/* Enable MSI if not disabled by module parameter */
adev->irq.msi_enabled = false;
- if (amdgpu_msi_ok(adev)) {
- int nvec = pci_msix_vec_count(adev->pdev);
- unsigned int flags;
+ if (!amdgpu_msi_ok(adev))
+ flags = PCI_IRQ_INTX;
+ else
+ flags = PCI_IRQ_ALL_TYPES;
- if (nvec <= 0) {
- flags = PCI_IRQ_MSI;
- } else {
- flags = PCI_IRQ_MSI | PCI_IRQ_MSIX;
- }
- /* we only need one vector */
- nvec = pci_alloc_irq_vectors(adev->pdev, 1, 1, flags);
- if (nvec > 0) {
- adev->irq.msi_enabled = true;
- dev_dbg(adev->dev, "using MSI/MSI-X.\n");
- }
+ /* we only need one vector */
+ r = pci_alloc_irq_vectors(adev->pdev, 1, 1, flags);
+ if (r < 0) {
+ dev_err(adev->dev, "Failed to alloc msi vectors\n");
+ return r;
}
- if (!amdgpu_device_has_dc_support(adev)) {
- if (!adev->enable_virtual_display)
- /* Disable vblank IRQs aggressively for power-saving */
- /* XXX: can this be enabled for DC? */
- adev_to_drm(adev)->vblank_disable_immediate = true;
-
- r = drm_vblank_init(adev_to_drm(adev), adev->mode_info.num_crtc);
- if (r)
- return r;
-
- /* Pre-DCE11 */
- INIT_WORK(&adev->hotplug_work,
- amdgpu_hotplug_work_func);
+ if (amdgpu_msi_ok(adev)) {
+ adev->irq.msi_enabled = true;
+ dev_dbg(adev->dev, "using MSI/MSI-X.\n");
}
INIT_WORK(&adev->irq.ih1_work, amdgpu_irq_handle_ih1);
INIT_WORK(&adev->irq.ih2_work, amdgpu_irq_handle_ih2);
INIT_WORK(&adev->irq.ih_soft_work, amdgpu_irq_handle_ih_soft);
+ /* Use vector 0 for MSI-X. */
+ r = pci_irq_vector(adev->pdev, 0);
+ if (r < 0)
+ goto free_vectors;
+ irq = r;
+
+ /* PCI devices require shared interrupts. */
+ r = request_irq(irq, amdgpu_irq_handler, IRQF_SHARED, adev_to_drm(adev)->driver->name,
+ adev_to_drm(adev));
+ if (r)
+ goto free_vectors;
+
adev->irq.installed = true;
- /* Use vector 0 for MSI-X */
- r = drm_irq_install(adev_to_drm(adev), pci_irq_vector(adev->pdev, 0));
- if (r) {
- adev->irq.installed = false;
- if (!amdgpu_device_has_dc_support(adev))
- flush_work(&adev->hotplug_work);
- return r;
- }
+ adev->irq.irq = irq;
adev_to_drm(adev)->max_vblank_count = 0x00ffffff;
- DRM_DEBUG("amdgpu: irq initialized.\n");
+ dev_dbg(adev->dev, "amdgpu: irq initialized.\n");
return 0;
+
+free_vectors:
+ if (adev->irq.msi_enabled)
+ pci_free_irq_vectors(adev->pdev);
+
+ adev->irq.msi_enabled = false;
+ return r;
+}
+
+void amdgpu_irq_fini_hw(struct amdgpu_device *adev)
+{
+ if (adev->irq.installed) {
+ free_irq(adev->irq.irq, adev_to_drm(adev));
+ adev->irq.installed = false;
+ if (adev->irq.msi_enabled)
+ pci_free_irq_vectors(adev->pdev);
+ }
+
+ amdgpu_ih_ring_fini(adev, &adev->irq.ih_soft);
+ amdgpu_ih_ring_fini(adev, &adev->irq.ih);
+ amdgpu_ih_ring_fini(adev, &adev->irq.ih1);
+ amdgpu_ih_ring_fini(adev, &adev->irq.ih2);
}
/**
- * amdgpu_irq_fini - shut down interrupt handling
+ * amdgpu_irq_fini_sw - shut down interrupt handling
*
* @adev: amdgpu device pointer
*
@@ -322,18 +351,9 @@ int amdgpu_irq_init(struct amdgpu_device *adev)
* functionality, shuts down vblank, hotplug and reset interrupt handling,
* turns off interrupts from all sources (all ASICs).
*/
-void amdgpu_irq_fini(struct amdgpu_device *adev)
+void amdgpu_irq_fini_sw(struct amdgpu_device *adev)
{
- unsigned i, j;
-
- if (adev->irq.installed) {
- drm_irq_uninstall(adev_to_drm(adev));
- adev->irq.installed = false;
- if (adev->irq.msi_enabled)
- pci_free_irq_vectors(adev->pdev);
- if (!amdgpu_device_has_dc_support(adev))
- flush_work(&adev->hotplug_work);
- }
+ unsigned int i, j;
for (i = 0; i < AMDGPU_IRQ_CLIENTID_MAX; ++i) {
if (!adev->irq.client[i].sources)
@@ -347,11 +367,6 @@ void amdgpu_irq_fini(struct amdgpu_device *adev)
kfree(src->enabled_types);
src->enabled_types = NULL;
- if (src->data) {
- kfree(src->data);
- kfree(src);
- adev->irq.client[i].sources[j] = NULL;
- }
}
kfree(adev->irq.client[i].sources);
adev->irq.client[i].sources = NULL;
@@ -372,7 +387,7 @@ void amdgpu_irq_fini(struct amdgpu_device *adev)
* 0 on success or error code otherwise
*/
int amdgpu_irq_add_id(struct amdgpu_device *adev,
- unsigned client_id, unsigned src_id,
+ unsigned int client_id, unsigned int src_id,
struct amdgpu_irq_src *source)
{
if (client_id >= AMDGPU_IRQ_CLIENTID_MAX)
@@ -424,13 +439,21 @@ void amdgpu_irq_dispatch(struct amdgpu_device *adev,
{
u32 ring_index = ih->rptr >> 2;
struct amdgpu_iv_entry entry;
- unsigned client_id, src_id;
+ unsigned int client_id, src_id;
struct amdgpu_irq_src *src;
bool handled = false;
int r;
entry.ih = ih;
entry.iv_entry = (const uint32_t *)&ih->ring[ring_index];
+
+ /*
+ * timestamp is not supported on some legacy SOCs (cik, cz, iceland,
+ * si and tonga), so initialize timestamp and timestamp_src to 0
+ */
+ entry.timestamp = 0;
+ entry.timestamp_src = 0;
+
amdgpu_ih_decode_iv(adev, &entry);
trace_amdgpu_iv(ih - &adev->irq.ih, &entry);
@@ -439,33 +462,41 @@ void amdgpu_irq_dispatch(struct amdgpu_device *adev,
src_id = entry.src_id;
if (client_id >= AMDGPU_IRQ_CLIENTID_MAX) {
- DRM_DEBUG("Invalid client_id in IV: %d\n", client_id);
+ dev_dbg(adev->dev, "Invalid client_id in IV: %d\n", client_id);
} else if (src_id >= AMDGPU_MAX_IRQ_SRC_ID) {
- DRM_DEBUG("Invalid src_id in IV: %d\n", src_id);
+ dev_dbg(adev->dev, "Invalid src_id in IV: %d\n", src_id);
- } else if ((client_id == AMDGPU_IRQ_CLIENTID_LEGACY) &&
+ } else if (((client_id == AMDGPU_IRQ_CLIENTID_LEGACY) ||
+ (client_id == SOC15_IH_CLIENTID_ISP)) &&
adev->irq.virq[src_id]) {
- generic_handle_irq(irq_find_mapping(adev->irq.domain, src_id));
+ generic_handle_domain_irq(adev->irq.domain, src_id);
} else if (!adev->irq.client[client_id].sources) {
- DRM_DEBUG("Unregistered interrupt client_id: %d src_id: %d\n",
- client_id, src_id);
+ dev_dbg(adev->dev,
+ "Unregistered interrupt client_id: %d src_id: %d\n",
+ client_id, src_id);
} else if ((src = adev->irq.client[client_id].sources[src_id])) {
r = src->funcs->process(adev, src, &entry);
if (r < 0)
- DRM_ERROR("error processing interrupt (%d)\n", r);
+ dev_err(adev->dev, "error processing interrupt (%d)\n",
+ r);
else if (r)
handled = true;
} else {
- DRM_DEBUG("Unhandled interrupt src_id: %d\n", src_id);
+ dev_dbg(adev->dev,
+ "Unregistered interrupt src_id: %d of client_id:%d\n",
+ src_id, client_id);
}
/* Send it to amdkfd as well if it isn't already handled */
if (!handled)
amdgpu_amdkfd_interrupt(adev, entry.iv_entry);
+
+ if (amdgpu_ih_ts_after(ih->processed_timestamp, entry.timestamp))
+ ih->processed_timestamp = entry.timestamp;
}
/**
@@ -482,7 +513,7 @@ void amdgpu_irq_delegate(struct amdgpu_device *adev,
struct amdgpu_iv_entry *entry,
unsigned int num_dw)
{
- amdgpu_ih_ring_write(&adev->irq.ih_soft, entry->iv_entry, num_dw);
+ amdgpu_ih_ring_write(adev, &adev->irq.ih_soft, entry->iv_entry, num_dw);
schedule_work(&adev->irq.ih_soft_work);
}
@@ -496,7 +527,7 @@ void amdgpu_irq_delegate(struct amdgpu_device *adev,
* Updates interrupt state for the specific source (all ASICs).
*/
int amdgpu_irq_update(struct amdgpu_device *adev,
- struct amdgpu_irq_src *src, unsigned type)
+ struct amdgpu_irq_src *src, unsigned int type)
{
unsigned long irqflags;
enum amdgpu_interrupt_state state;
@@ -505,7 +536,8 @@ int amdgpu_irq_update(struct amdgpu_device *adev,
spin_lock_irqsave(&adev->irq.lock, irqflags);
/* We need to determine after taking the lock, otherwise
- we might disable just enabled interrupts again */
+ * we might disable just enabled interrupts again
+ */
if (amdgpu_irq_enabled(adev, src, type))
state = AMDGPU_IRQ_STATE_ENABLE;
else
@@ -528,6 +560,9 @@ void amdgpu_irq_gpu_reset_resume_helper(struct amdgpu_device *adev)
{
int i, j, k;
+ if (amdgpu_sriov_vf(adev) || amdgpu_passthrough(adev))
+ amdgpu_restore_msix(adev);
+
for (i = 0; i < AMDGPU_IRQ_CLIENTID_MAX; ++i) {
if (!adev->irq.client[i].sources)
continue;
@@ -535,7 +570,7 @@ void amdgpu_irq_gpu_reset_resume_helper(struct amdgpu_device *adev)
for (j = 0; j < AMDGPU_MAX_IRQ_SRC_ID; ++j) {
struct amdgpu_irq_src *src = adev->irq.client[i].sources[j];
- if (!src)
+ if (!src || !src->funcs || !src->funcs->set)
continue;
for (k = 0; k < src->num_types; k++)
amdgpu_irq_update(adev, src, k);
@@ -556,9 +591,9 @@ void amdgpu_irq_gpu_reset_resume_helper(struct amdgpu_device *adev)
* 0 on success or error code otherwise
*/
int amdgpu_irq_get(struct amdgpu_device *adev, struct amdgpu_irq_src *src,
- unsigned type)
+ unsigned int type)
{
- if (!adev_to_drm(adev)->irq_enabled)
+ if (!adev->irq.installed)
return -ENOENT;
if (type >= src->num_types)
@@ -586,9 +621,13 @@ int amdgpu_irq_get(struct amdgpu_device *adev, struct amdgpu_irq_src *src,
* 0 on success or error code otherwise
*/
int amdgpu_irq_put(struct amdgpu_device *adev, struct amdgpu_irq_src *src,
- unsigned type)
+ unsigned int type)
{
- if (!adev_to_drm(adev)->irq_enabled)
+ /* When the threshold is reached,the interrupt source may not be enabled.return -EINVAL */
+ if (amdgpu_ras_is_rma(adev) && !amdgpu_irq_enabled(adev, src, type))
+ return -EINVAL;
+
+ if (!adev->irq.installed)
return -ENOENT;
if (type >= src->num_types)
@@ -597,6 +636,9 @@ int amdgpu_irq_put(struct amdgpu_device *adev, struct amdgpu_irq_src *src,
if (!src->enabled_types || !src->funcs->set)
return -EINVAL;
+ if (WARN_ON(!amdgpu_irq_enabled(adev, src, type)))
+ return -EINVAL;
+
if (atomic_dec_and_test(&src->enabled_types[type]))
return amdgpu_irq_update(adev, src, type);
@@ -617,9 +659,9 @@ int amdgpu_irq_put(struct amdgpu_device *adev, struct amdgpu_irq_src *src,
* invalid parameters
*/
bool amdgpu_irq_enabled(struct amdgpu_device *adev, struct amdgpu_irq_src *src,
- unsigned type)
+ unsigned int type)
{
- if (!adev_to_drm(adev)->irq_enabled)
+ if (!adev->irq.installed)
return false;
if (type >= src->num_types)
@@ -691,10 +733,10 @@ static const struct irq_domain_ops amdgpu_hw_irqdomain_ops = {
*/
int amdgpu_irq_add_domain(struct amdgpu_device *adev)
{
- adev->irq.domain = irq_domain_add_linear(NULL, AMDGPU_MAX_IRQ_SRC_ID,
- &amdgpu_hw_irqdomain_ops, adev);
+ adev->irq.domain = irq_domain_create_linear(NULL, AMDGPU_MAX_IRQ_SRC_ID,
+ &amdgpu_hw_irqdomain_ops, adev);
if (!adev->irq.domain) {
- DRM_ERROR("GPU irq add domain failed\n");
+ dev_err(adev->dev, "GPU irq add domain failed\n");
return -ENODEV;
}
@@ -730,7 +772,7 @@ void amdgpu_irq_remove_domain(struct amdgpu_device *adev)
* Returns:
* Linux IRQ
*/
-unsigned amdgpu_irq_create_mapping(struct amdgpu_device *adev, unsigned src_id)
+unsigned int amdgpu_irq_create_mapping(struct amdgpu_device *adev, unsigned int src_id)
{
adev->irq.virq[src_id] = irq_create_mapping(adev->irq.domain, src_id);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.h
index ac527e5deae6..9f0417456abd 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.h
@@ -53,7 +53,7 @@ struct amdgpu_iv_entry {
uint64_t timestamp;
unsigned timestamp_src;
unsigned pasid;
- unsigned pasid_src;
+ unsigned node_id;
unsigned src_data[AMDGPU_IRQ_SRC_DATA_MAX_SIZE_DW];
const uint32_t *iv_entry;
};
@@ -62,7 +62,6 @@ struct amdgpu_irq_src {
unsigned num_types;
atomic_t *enabled_types;
const struct amdgpu_irq_src_funcs *funcs;
- void *data;
};
struct amdgpu_irq_client {
@@ -81,6 +80,7 @@ struct amdgpu_irq_src_funcs {
struct amdgpu_irq {
bool installed;
+ unsigned int irq;
spinlock_t lock;
/* interrupt sources */
struct amdgpu_irq_client client[AMDGPU_IRQ_CLIENTID_MAX];
@@ -98,13 +98,33 @@ struct amdgpu_irq {
struct irq_domain *domain; /* GPU irq controller domain */
unsigned virq[AMDGPU_MAX_IRQ_SRC_ID];
uint32_t srbm_soft_reset;
+ u32 retry_cam_doorbell_index;
+ bool retry_cam_enabled;
};
+enum interrupt_node_id_per_aid {
+ AID0_NODEID = 0,
+ XCD0_NODEID = 1,
+ XCD1_NODEID = 2,
+ AID1_NODEID = 4,
+ XCD2_NODEID = 5,
+ XCD3_NODEID = 6,
+ AID2_NODEID = 8,
+ XCD4_NODEID = 9,
+ XCD5_NODEID = 10,
+ AID3_NODEID = 12,
+ XCD6_NODEID = 13,
+ XCD7_NODEID = 14,
+ NODEID_MAX,
+};
+
+extern const int node_id_to_phys_map[NODEID_MAX];
+
void amdgpu_irq_disable_all(struct amdgpu_device *adev);
-irqreturn_t amdgpu_irq_handler(int irq, void *arg);
int amdgpu_irq_init(struct amdgpu_device *adev);
-void amdgpu_irq_fini(struct amdgpu_device *adev);
+void amdgpu_irq_fini_sw(struct amdgpu_device *adev);
+void amdgpu_irq_fini_hw(struct amdgpu_device *adev);
int amdgpu_irq_add_id(struct amdgpu_device *adev,
unsigned client_id, unsigned src_id,
struct amdgpu_irq_src *source);
@@ -126,5 +146,6 @@ void amdgpu_irq_gpu_reset_resume_helper(struct amdgpu_device *adev);
int amdgpu_irq_add_domain(struct amdgpu_device *adev);
void amdgpu_irq_remove_domain(struct amdgpu_device *adev);
unsigned amdgpu_irq_create_mapping(struct amdgpu_device *adev, unsigned src_id);
+void amdgpu_restore_msix(struct amdgpu_device *adev);
#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_isp.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_isp.c
new file mode 100644
index 000000000000..37270c4dab8d
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_isp.c
@@ -0,0 +1,345 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright (C) 2024 Advanced Micro Devices, Inc. All rights reserved.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ */
+
+#include <linux/firmware.h>
+#include <linux/mfd/core.h>
+
+#include "amdgpu.h"
+#include "amdgpu_isp.h"
+#include "isp_v4_1_0.h"
+#include "isp_v4_1_1.h"
+
+#define ISP_MC_ADDR_ALIGN (1024 * 32)
+
+/**
+ * isp_hw_init - start and test isp block
+ *
+ * @ip_block: Pointer to the amdgpu_ip_block for this hw instance.
+ *
+ */
+static int isp_hw_init(struct amdgpu_ip_block *ip_block)
+{
+ struct amdgpu_device *adev = ip_block->adev;
+ struct amdgpu_isp *isp = &adev->isp;
+
+ if (isp->funcs->hw_init != NULL)
+ return isp->funcs->hw_init(isp);
+
+ return -ENODEV;
+}
+
+/**
+ * isp_hw_fini - stop the hardware block
+ *
+ * @ip_block: Pointer to the amdgpu_ip_block for this hw instance.
+ *
+ */
+static int isp_hw_fini(struct amdgpu_ip_block *ip_block)
+{
+ struct amdgpu_isp *isp = &ip_block->adev->isp;
+
+ if (isp->funcs->hw_fini != NULL)
+ return isp->funcs->hw_fini(isp);
+
+ return -ENODEV;
+}
+
+static int isp_load_fw_by_psp(struct amdgpu_device *adev)
+{
+ const struct common_firmware_header *hdr;
+ char ucode_prefix[10];
+ int r = 0;
+
+ /* get isp fw binary name and path */
+ amdgpu_ucode_ip_version_decode(adev, ISP_HWIP, ucode_prefix,
+ sizeof(ucode_prefix));
+
+ /* read isp fw */
+ r = amdgpu_ucode_request(adev, &adev->isp.fw, AMDGPU_UCODE_OPTIONAL,
+ "amdgpu/%s.bin", ucode_prefix);
+ if (r) {
+ amdgpu_ucode_release(&adev->isp.fw);
+ return r;
+ }
+
+ hdr = (const struct common_firmware_header *)adev->isp.fw->data;
+
+ adev->firmware.ucode[AMDGPU_UCODE_ID_ISP].ucode_id =
+ AMDGPU_UCODE_ID_ISP;
+ adev->firmware.ucode[AMDGPU_UCODE_ID_ISP].fw = adev->isp.fw;
+
+ adev->firmware.fw_size +=
+ ALIGN(le32_to_cpu(hdr->ucode_size_bytes), PAGE_SIZE);
+
+ return r;
+}
+
+static int isp_early_init(struct amdgpu_ip_block *ip_block)
+{
+
+ struct amdgpu_device *adev = ip_block->adev;
+ struct amdgpu_isp *isp = &adev->isp;
+
+ switch (amdgpu_ip_version(adev, ISP_HWIP, 0)) {
+ case IP_VERSION(4, 1, 0):
+ isp_v4_1_0_set_isp_funcs(isp);
+ break;
+ case IP_VERSION(4, 1, 1):
+ isp_v4_1_1_set_isp_funcs(isp);
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ isp->adev = adev;
+ isp->parent = adev->dev;
+
+ if (isp_load_fw_by_psp(adev)) {
+ DRM_DEBUG_DRIVER("%s: isp fw load failed\n", __func__);
+ return -ENOENT;
+ }
+
+ return 0;
+}
+
+static bool isp_is_idle(struct amdgpu_ip_block *ip_block)
+{
+ return true;
+}
+
+static int isp_set_clockgating_state(struct amdgpu_ip_block *ip_block,
+ enum amd_clockgating_state state)
+{
+ return 0;
+}
+
+static int isp_set_powergating_state(struct amdgpu_ip_block *ip_block,
+ enum amd_powergating_state state)
+{
+ return 0;
+}
+
+static int is_valid_isp_device(struct device *isp_parent, struct device *amdgpu_dev)
+{
+ if (isp_parent != amdgpu_dev)
+ return -EINVAL;
+
+ return 0;
+}
+
+/**
+ * isp_user_buffer_alloc - create user buffer object (BO) for isp
+ *
+ * @dev: isp device handle
+ * @dmabuf: DMABUF handle for isp buffer allocated in system memory
+ * @buf_obj: GPU buffer object handle to initialize
+ * @buf_addr: GPU addr of the pinned BO to initialize
+ *
+ * Imports isp DMABUF to allocate and pin a user BO for isp internal use. It does
+ * GART alloc to generate GPU addr for BO to make it accessible through the
+ * GART aperture for ISP HW.
+ *
+ * This function is exported to allow the V4L2 isp device external to drm device
+ * to create and access the isp user BO.
+ *
+ * Returns:
+ * 0 on success, negative error code otherwise.
+ */
+int isp_user_buffer_alloc(struct device *dev, void *dmabuf,
+ void **buf_obj, u64 *buf_addr)
+{
+ struct platform_device *ispdev = to_platform_device(dev);
+ const struct isp_platform_data *isp_pdata;
+ struct amdgpu_device *adev;
+ struct mfd_cell *mfd_cell;
+ struct amdgpu_bo *bo;
+ u64 gpu_addr;
+ int ret;
+
+ if (WARN_ON(!ispdev))
+ return -ENODEV;
+
+ if (WARN_ON(!buf_obj))
+ return -EINVAL;
+
+ if (WARN_ON(!buf_addr))
+ return -EINVAL;
+
+ mfd_cell = &ispdev->mfd_cell[0];
+ if (!mfd_cell)
+ return -ENODEV;
+
+ isp_pdata = mfd_cell->platform_data;
+ adev = isp_pdata->adev;
+
+ ret = is_valid_isp_device(ispdev->dev.parent, adev->dev);
+ if (ret)
+ return ret;
+
+ ret = amdgpu_bo_create_isp_user(adev, dmabuf,
+ AMDGPU_GEM_DOMAIN_GTT, &bo, &gpu_addr);
+ if (ret) {
+ drm_err(&adev->ddev, "failed to alloc gart user buffer (%d)", ret);
+ return ret;
+ }
+
+ *buf_obj = (void *)bo;
+ *buf_addr = gpu_addr;
+
+ return 0;
+}
+EXPORT_SYMBOL(isp_user_buffer_alloc);
+
+/**
+ * isp_user_buffer_free - free isp user buffer object (BO)
+ *
+ * @buf_obj: amdgpu isp user BO to free
+ *
+ * unpin and unref BO for isp internal use.
+ *
+ * This function is exported to allow the V4L2 isp device
+ * external to drm device to free the isp user BO.
+ */
+void isp_user_buffer_free(void *buf_obj)
+{
+ amdgpu_bo_free_isp_user(buf_obj);
+}
+EXPORT_SYMBOL(isp_user_buffer_free);
+
+/**
+ * isp_kernel_buffer_alloc - create kernel buffer object (BO) for isp
+ *
+ * @dev: isp device handle
+ * @size: size for the new BO
+ * @buf_obj: GPU BO handle to initialize
+ * @gpu_addr: GPU addr of the pinned BO
+ * @cpu_addr: CPU address mapping of BO
+ *
+ * Allocates and pins a kernel BO for internal isp firmware use.
+ *
+ * This function is exported to allow the V4L2 isp device
+ * external to drm device to create and access the kernel BO.
+ *
+ * Returns:
+ * 0 on success, negative error code otherwise.
+ */
+int isp_kernel_buffer_alloc(struct device *dev, u64 size,
+ void **buf_obj, u64 *gpu_addr, void **cpu_addr)
+{
+ struct platform_device *ispdev = to_platform_device(dev);
+ struct amdgpu_bo **bo = (struct amdgpu_bo **)buf_obj;
+ const struct isp_platform_data *isp_pdata;
+ struct amdgpu_device *adev;
+ struct mfd_cell *mfd_cell;
+ int ret;
+
+ if (WARN_ON(!ispdev))
+ return -ENODEV;
+
+ if (WARN_ON(!buf_obj))
+ return -EINVAL;
+
+ if (WARN_ON(!gpu_addr))
+ return -EINVAL;
+
+ if (WARN_ON(!cpu_addr))
+ return -EINVAL;
+
+ mfd_cell = &ispdev->mfd_cell[0];
+ if (!mfd_cell)
+ return -ENODEV;
+
+ isp_pdata = mfd_cell->platform_data;
+ adev = isp_pdata->adev;
+
+ ret = is_valid_isp_device(ispdev->dev.parent, adev->dev);
+ if (ret)
+ return ret;
+
+ /* Ensure *bo is NULL so a new BO will be created */
+ *bo = NULL;
+ ret = amdgpu_bo_create_kernel(adev,
+ size,
+ ISP_MC_ADDR_ALIGN,
+ AMDGPU_GEM_DOMAIN_GTT,
+ bo,
+ gpu_addr,
+ cpu_addr);
+ if (!cpu_addr || ret) {
+ drm_err(&adev->ddev, "failed to alloc gart kernel buffer (%d)", ret);
+ return ret;
+ }
+
+ return 0;
+}
+EXPORT_SYMBOL(isp_kernel_buffer_alloc);
+
+/**
+ * isp_kernel_buffer_free - free isp kernel buffer object (BO)
+ *
+ * @buf_obj: amdgpu isp user BO to free
+ * @gpu_addr: GPU addr of isp kernel BO
+ * @cpu_addr: CPU addr of isp kernel BO
+ *
+ * unmaps and unpin a isp kernel BO.
+ *
+ * This function is exported to allow the V4L2 isp device
+ * external to drm device to free the kernel BO.
+ */
+void isp_kernel_buffer_free(void **buf_obj, u64 *gpu_addr, void **cpu_addr)
+{
+ struct amdgpu_bo **bo = (struct amdgpu_bo **)buf_obj;
+
+ amdgpu_bo_free_kernel(bo, gpu_addr, cpu_addr);
+}
+EXPORT_SYMBOL(isp_kernel_buffer_free);
+
+static const struct amd_ip_funcs isp_ip_funcs = {
+ .name = "isp_ip",
+ .early_init = isp_early_init,
+ .hw_init = isp_hw_init,
+ .hw_fini = isp_hw_fini,
+ .is_idle = isp_is_idle,
+ .set_clockgating_state = isp_set_clockgating_state,
+ .set_powergating_state = isp_set_powergating_state,
+};
+
+const struct amdgpu_ip_block_version isp_v4_1_0_ip_block = {
+ .type = AMD_IP_BLOCK_TYPE_ISP,
+ .major = 4,
+ .minor = 1,
+ .rev = 0,
+ .funcs = &isp_ip_funcs,
+};
+
+const struct amdgpu_ip_block_version isp_v4_1_1_ip_block = {
+ .type = AMD_IP_BLOCK_TYPE_ISP,
+ .major = 4,
+ .minor = 1,
+ .rev = 1,
+ .funcs = &isp_ip_funcs,
+};
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_isp.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_isp.h
new file mode 100644
index 000000000000..d6f4ffa4c97c
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_isp.h
@@ -0,0 +1,60 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright (C) 2024 Advanced Micro Devices, Inc. All rights reserved.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ */
+
+#ifndef __AMDGPU_ISP_H__
+#define __AMDGPU_ISP_H__
+
+#include <drm/amd/isp.h>
+#include <linux/pm_domain.h>
+
+#define ISP_REGS_OFFSET_END 0x629A4
+
+struct amdgpu_isp;
+
+struct isp_funcs {
+ int (*hw_init)(struct amdgpu_isp *isp);
+ int (*hw_fini)(struct amdgpu_isp *isp);
+};
+
+struct amdgpu_isp {
+ struct device *parent;
+ struct amdgpu_device *adev;
+ const struct isp_funcs *funcs;
+ struct mfd_cell *isp_cell;
+ struct resource *isp_res;
+ struct resource *isp_i2c_res;
+ struct resource *isp_gpio_res;
+ struct isp_platform_data *isp_pdata;
+ unsigned int harvest_config;
+ const struct firmware *fw;
+ struct generic_pm_domain ispgpd;
+};
+
+extern const struct amdgpu_ip_block_version isp_v4_1_0_ip_block;
+extern const struct amdgpu_ip_block_version isp_v4_1_1_ip_block;
+
+#endif /* __AMDGPU_ISP_H__ */
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
index ff48101bab55..0a0dcbf0798d 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
@@ -25,100 +25,268 @@
#include <linux/wait.h>
#include <linux/sched.h>
+#include <drm/drm_drv.h>
+
#include "amdgpu.h"
#include "amdgpu_trace.h"
+#include "amdgpu_reset.h"
+#include "amdgpu_dev_coredump.h"
+#include "amdgpu_xgmi.h"
+
+static void amdgpu_job_do_core_dump(struct amdgpu_device *adev,
+ struct amdgpu_job *job)
+{
+ int i;
+
+ dev_info(adev->dev, "Dumping IP State\n");
+ for (i = 0; i < adev->num_ip_blocks; i++)
+ if (adev->ip_blocks[i].version->funcs->dump_ip_state)
+ adev->ip_blocks[i].version->funcs
+ ->dump_ip_state((void *)&adev->ip_blocks[i]);
+ dev_info(adev->dev, "Dumping IP State Completed\n");
+
+ amdgpu_coredump(adev, true, false, job);
+}
-static void amdgpu_job_timedout(struct drm_sched_job *s_job)
+static void amdgpu_job_core_dump(struct amdgpu_device *adev,
+ struct amdgpu_job *job)
+{
+ struct list_head device_list, *device_list_handle = NULL;
+ struct amdgpu_device *tmp_adev = NULL;
+ struct amdgpu_hive_info *hive = NULL;
+
+ if (!amdgpu_sriov_vf(adev))
+ hive = amdgpu_get_xgmi_hive(adev);
+ if (hive)
+ mutex_lock(&hive->hive_lock);
+ /*
+ * Reuse the logic in amdgpu_device_gpu_recover() to build list of
+ * devices for code dump
+ */
+ INIT_LIST_HEAD(&device_list);
+ if (!amdgpu_sriov_vf(adev) && (adev->gmc.xgmi.num_physical_nodes > 1) && hive) {
+ list_for_each_entry(tmp_adev, &hive->device_list, gmc.xgmi.head)
+ list_add_tail(&tmp_adev->reset_list, &device_list);
+ if (!list_is_first(&adev->reset_list, &device_list))
+ list_rotate_to_front(&adev->reset_list, &device_list);
+ device_list_handle = &device_list;
+ } else {
+ list_add_tail(&adev->reset_list, &device_list);
+ device_list_handle = &device_list;
+ }
+
+ /* Do the coredump for each device */
+ list_for_each_entry(tmp_adev, device_list_handle, reset_list)
+ amdgpu_job_do_core_dump(tmp_adev, job);
+
+ if (hive) {
+ mutex_unlock(&hive->hive_lock);
+ amdgpu_put_xgmi_hive(hive);
+ }
+}
+
+static enum drm_gpu_sched_stat amdgpu_job_timedout(struct drm_sched_job *s_job)
{
struct amdgpu_ring *ring = to_amdgpu_ring(s_job->sched);
struct amdgpu_job *job = to_amdgpu_job(s_job);
- struct amdgpu_task_info ti;
+ struct drm_wedge_task_info *info = NULL;
+ struct amdgpu_task_info *ti = NULL;
struct amdgpu_device *adev = ring->adev;
+ int idx, r;
- memset(&ti, 0, sizeof(struct amdgpu_task_info));
+ if (!drm_dev_enter(adev_to_drm(adev), &idx)) {
+ dev_info(adev->dev, "%s - device unplugged skipping recovery on scheduler:%s",
+ __func__, s_job->sched->name);
+
+ /* Effectively the job is aborted as the device is gone */
+ return DRM_GPU_SCHED_STAT_ENODEV;
+ }
+
+ /*
+ * Do the coredump immediately after a job timeout to get a very
+ * close dump/snapshot/representation of GPU's current error status
+ * Skip it for SRIOV, since VF FLR will be triggered by host driver
+ * before job timeout
+ */
+ if (!amdgpu_sriov_vf(adev))
+ amdgpu_job_core_dump(adev, job);
if (amdgpu_gpu_recovery &&
+ amdgpu_ring_is_reset_type_supported(ring, AMDGPU_RESET_TYPE_SOFT_RESET) &&
amdgpu_ring_soft_recovery(ring, job->vmid, s_job->s_fence->parent)) {
- DRM_ERROR("ring %s timeout, but soft recovered\n",
- s_job->sched->name);
- return;
+ dev_err(adev->dev, "ring %s timeout, but soft recovered\n",
+ s_job->sched->name);
+ goto exit;
+ }
+
+ dev_err(adev->dev, "ring %s timeout, signaled seq=%u, emitted seq=%u\n",
+ job->base.sched->name, atomic_read(&ring->fence_drv.last_seq),
+ ring->fence_drv.sync_seq);
+
+ ti = amdgpu_vm_get_task_info_pasid(ring->adev, job->pasid);
+ if (ti) {
+ amdgpu_vm_print_task_info(adev, ti);
+ info = &ti->task;
+ }
+
+ /* attempt a per ring reset */
+ if (amdgpu_gpu_recovery &&
+ amdgpu_ring_is_reset_type_supported(ring, AMDGPU_RESET_TYPE_PER_QUEUE) &&
+ ring->funcs->reset) {
+ dev_err(adev->dev, "Starting %s ring reset\n",
+ s_job->sched->name);
+ r = amdgpu_ring_reset(ring, job->vmid, job->hw_fence);
+ if (!r) {
+ atomic_inc(&ring->adev->gpu_reset_counter);
+ dev_err(adev->dev, "Ring %s reset succeeded\n",
+ ring->sched.name);
+ drm_dev_wedged_event(adev_to_drm(adev),
+ DRM_WEDGE_RECOVERY_NONE, info);
+ goto exit;
+ }
+ dev_err(adev->dev, "Ring %s reset failed\n", ring->sched.name);
}
- amdgpu_vm_get_task_info(ring->adev, job->pasid, &ti);
- DRM_ERROR("ring %s timeout, signaled seq=%u, emitted seq=%u\n",
- job->base.sched->name, atomic_read(&ring->fence_drv.last_seq),
- ring->fence_drv.sync_seq);
- DRM_ERROR("Process information: process %s pid %d thread %s pid %d\n",
- ti.process_name, ti.tgid, ti.task_name, ti.pid);
+ dma_fence_set_error(&s_job->s_fence->finished, -ETIME);
if (amdgpu_device_should_recover_gpu(ring->adev)) {
- amdgpu_device_gpu_recover(ring->adev, job);
+ struct amdgpu_reset_context reset_context;
+ memset(&reset_context, 0, sizeof(reset_context));
+
+ reset_context.method = AMD_RESET_METHOD_NONE;
+ reset_context.reset_req_dev = adev;
+ reset_context.src = AMDGPU_RESET_SRC_JOB;
+ clear_bit(AMDGPU_NEED_FULL_RESET, &reset_context.flags);
+
+ /*
+ * To avoid an unnecessary extra coredump, as we have already
+ * got the very close representation of GPU's error status
+ */
+ set_bit(AMDGPU_SKIP_COREDUMP, &reset_context.flags);
+
+ r = amdgpu_device_gpu_recover(ring->adev, job, &reset_context);
+ if (r)
+ dev_err(adev->dev, "GPU Recovery Failed: %d\n", r);
} else {
drm_sched_suspend_timeout(&ring->sched);
if (amdgpu_sriov_vf(adev))
adev->virt.tdr_debug = true;
}
+
+exit:
+ amdgpu_vm_put_task_info(ti);
+ drm_dev_exit(idx);
+ return DRM_GPU_SCHED_STAT_RESET;
}
-int amdgpu_job_alloc(struct amdgpu_device *adev, unsigned num_ibs,
- struct amdgpu_job **job, struct amdgpu_vm *vm)
+int amdgpu_job_alloc(struct amdgpu_device *adev, struct amdgpu_vm *vm,
+ struct drm_sched_entity *entity, void *owner,
+ unsigned int num_ibs, struct amdgpu_job **job,
+ u64 drm_client_id)
{
- size_t size = sizeof(struct amdgpu_job);
+ struct amdgpu_fence *af;
+ int r;
if (num_ibs == 0)
return -EINVAL;
- size += sizeof(struct amdgpu_ib) * num_ibs;
-
- *job = kzalloc(size, GFP_KERNEL);
+ *job = kzalloc(struct_size(*job, ibs, num_ibs), GFP_KERNEL);
if (!*job)
return -ENOMEM;
- /*
- * Initialize the scheduler to at least some ring so that we always
- * have a pointer to adev.
- */
- (*job)->base.sched = &adev->rings[0]->sched;
+ af = kzalloc(sizeof(struct amdgpu_fence), GFP_KERNEL);
+ if (!af) {
+ r = -ENOMEM;
+ goto err_job;
+ }
+ (*job)->hw_fence = af;
+
+ af = kzalloc(sizeof(struct amdgpu_fence), GFP_KERNEL);
+ if (!af) {
+ r = -ENOMEM;
+ goto err_fence;
+ }
+ (*job)->hw_vm_fence = af;
+
(*job)->vm = vm;
- (*job)->ibs = (void *)&(*job)[1];
- (*job)->num_ibs = num_ibs;
- amdgpu_sync_create(&(*job)->sync);
- amdgpu_sync_create(&(*job)->sched_sync);
- (*job)->vram_lost_counter = atomic_read(&adev->vram_lost_counter);
+ amdgpu_sync_create(&(*job)->explicit_sync);
+ (*job)->generation = amdgpu_vm_generation(adev, vm);
(*job)->vm_pd_addr = AMDGPU_BO_INVALID_OFFSET;
- return 0;
+ if (!entity)
+ return 0;
+
+ return drm_sched_job_init(&(*job)->base, entity, 1, owner,
+ drm_client_id);
+
+err_fence:
+ kfree((*job)->hw_fence);
+err_job:
+ kfree(*job);
+ *job = NULL;
+
+ return r;
}
-int amdgpu_job_alloc_with_ib(struct amdgpu_device *adev, unsigned size,
- enum amdgpu_ib_pool_type pool_type,
- struct amdgpu_job **job)
+int amdgpu_job_alloc_with_ib(struct amdgpu_device *adev,
+ struct drm_sched_entity *entity, void *owner,
+ size_t size, enum amdgpu_ib_pool_type pool_type,
+ struct amdgpu_job **job, u64 k_job_id)
{
int r;
- r = amdgpu_job_alloc(adev, 1, job, NULL);
+ r = amdgpu_job_alloc(adev, NULL, entity, owner, 1, job,
+ k_job_id);
if (r)
return r;
+ (*job)->num_ibs = 1;
r = amdgpu_ib_get(adev, NULL, size, pool_type, &(*job)->ibs[0]);
- if (r)
+ if (r) {
+ if (entity)
+ drm_sched_job_cleanup(&(*job)->base);
+ kfree((*job)->hw_vm_fence);
+ kfree((*job)->hw_fence);
kfree(*job);
+ *job = NULL;
+ }
return r;
}
+void amdgpu_job_set_resources(struct amdgpu_job *job, struct amdgpu_bo *gds,
+ struct amdgpu_bo *gws, struct amdgpu_bo *oa)
+{
+ if (gds) {
+ job->gds_base = amdgpu_bo_gpu_offset(gds) >> PAGE_SHIFT;
+ job->gds_size = amdgpu_bo_size(gds) >> PAGE_SHIFT;
+ }
+ if (gws) {
+ job->gws_base = amdgpu_bo_gpu_offset(gws) >> PAGE_SHIFT;
+ job->gws_size = amdgpu_bo_size(gws) >> PAGE_SHIFT;
+ }
+ if (oa) {
+ job->oa_base = amdgpu_bo_gpu_offset(oa) >> PAGE_SHIFT;
+ job->oa_size = amdgpu_bo_size(oa) >> PAGE_SHIFT;
+ }
+}
+
void amdgpu_job_free_resources(struct amdgpu_job *job)
{
- struct amdgpu_ring *ring = to_amdgpu_ring(job->base.sched);
struct dma_fence *f;
unsigned i;
- /* use sched fence if available */
- f = job->base.s_fence ? &job->base.s_fence->finished : job->fence;
+ /* Check if any fences were initialized */
+ if (job->base.s_fence && job->base.s_fence->finished.ops)
+ f = &job->base.s_fence->finished;
+ else if (job->hw_fence && job->hw_fence->base.ops)
+ f = &job->hw_fence->base;
+ else
+ f = NULL;
for (i = 0; i < job->num_ibs; ++i)
- amdgpu_ib_free(ring->adev, &job->ibs[i], f);
+ amdgpu_ib_free(&job->ibs[i], f);
}
static void amdgpu_job_free_cb(struct drm_sched_job *s_job)
@@ -127,39 +295,68 @@ static void amdgpu_job_free_cb(struct drm_sched_job *s_job)
drm_sched_job_cleanup(s_job);
- dma_fence_put(job->fence);
- amdgpu_sync_free(&job->sync);
- amdgpu_sync_free(&job->sched_sync);
+ amdgpu_sync_free(&job->explicit_sync);
+
+ if (job->hw_fence->base.ops)
+ dma_fence_put(&job->hw_fence->base);
+ else
+ kfree(job->hw_fence);
+ if (job->hw_vm_fence->base.ops)
+ dma_fence_put(&job->hw_vm_fence->base);
+ else
+ kfree(job->hw_vm_fence);
+
kfree(job);
}
+void amdgpu_job_set_gang_leader(struct amdgpu_job *job,
+ struct amdgpu_job *leader)
+{
+ struct dma_fence *fence = &leader->base.s_fence->scheduled;
+
+ WARN_ON(job->gang_submit);
+
+ /*
+ * Don't add a reference when we are the gang leader to avoid circle
+ * dependency.
+ */
+ if (job != leader)
+ dma_fence_get(fence);
+ job->gang_submit = fence;
+}
+
void amdgpu_job_free(struct amdgpu_job *job)
{
+ if (job->base.entity)
+ drm_sched_job_cleanup(&job->base);
+
amdgpu_job_free_resources(job);
+ amdgpu_sync_free(&job->explicit_sync);
+ if (job->gang_submit != &job->base.s_fence->scheduled)
+ dma_fence_put(job->gang_submit);
+
+ if (job->hw_fence->base.ops)
+ dma_fence_put(&job->hw_fence->base);
+ else
+ kfree(job->hw_fence);
+ if (job->hw_vm_fence->base.ops)
+ dma_fence_put(&job->hw_vm_fence->base);
+ else
+ kfree(job->hw_vm_fence);
- dma_fence_put(job->fence);
- amdgpu_sync_free(&job->sync);
- amdgpu_sync_free(&job->sched_sync);
kfree(job);
}
-int amdgpu_job_submit(struct amdgpu_job *job, struct drm_sched_entity *entity,
- void *owner, struct dma_fence **f)
+struct dma_fence *amdgpu_job_submit(struct amdgpu_job *job)
{
- int r;
-
- if (!f)
- return -EINVAL;
-
- r = drm_sched_job_init(&job->base, entity, owner);
- if (r)
- return r;
+ struct dma_fence *f;
- *f = dma_fence_get(&job->base.s_fence->finished);
+ drm_sched_job_arm(&job->base);
+ f = dma_fence_get(&job->base.s_fence->finished);
amdgpu_job_free_resources(job);
- drm_sched_entity_push_job(&job->base, entity);
+ drm_sched_entity_push_job(&job->base);
- return 0;
+ return f;
}
int amdgpu_job_submit_direct(struct amdgpu_job *job, struct amdgpu_ring *ring,
@@ -168,8 +365,8 @@ int amdgpu_job_submit_direct(struct amdgpu_job *job, struct amdgpu_ring *ring,
int r;
job->base.sched = &ring->sched;
- r = amdgpu_ib_schedule(ring, job->num_ibs, job->ibs, NULL, fence);
- job->fence = dma_fence_get(*fence);
+ r = amdgpu_ib_schedule(ring, job->num_ibs, job->ibs, job, fence);
+
if (r)
return r;
@@ -177,38 +374,49 @@ int amdgpu_job_submit_direct(struct amdgpu_job *job, struct amdgpu_ring *ring,
return 0;
}
-static struct dma_fence *amdgpu_job_dependency(struct drm_sched_job *sched_job,
- struct drm_sched_entity *s_entity)
+static struct dma_fence *
+amdgpu_job_prepare_job(struct drm_sched_job *sched_job,
+ struct drm_sched_entity *s_entity)
{
struct amdgpu_ring *ring = to_amdgpu_ring(s_entity->rq->sched);
struct amdgpu_job *job = to_amdgpu_job(sched_job);
- struct amdgpu_vm *vm = job->vm;
struct dma_fence *fence;
int r;
- fence = amdgpu_sync_get_fence(&job->sync);
- if (fence && drm_sched_dependency_optimized(fence, s_entity)) {
- r = amdgpu_sync_fence(&job->sched_sync, fence);
- if (r)
- DRM_ERROR("Error adding fence (%d)\n", r);
+ r = drm_sched_entity_error(s_entity);
+ if (r)
+ goto error;
+
+ if (job->gang_submit) {
+ fence = amdgpu_device_switch_gang(ring->adev, job->gang_submit);
+ if (fence)
+ return fence;
}
- while (fence == NULL && vm && !job->vmid) {
- r = amdgpu_vmid_grab(vm, ring, &job->sync,
- &job->base.s_fence->finished,
- job);
- if (r)
- DRM_ERROR("Error getting VM ID (%d)\n", r);
+ fence = amdgpu_device_enforce_isolation(ring->adev, ring, job);
+ if (fence)
+ return fence;
- fence = amdgpu_sync_get_fence(&job->sync);
+ if (job->vm && !job->vmid) {
+ r = amdgpu_vmid_grab(job->vm, ring, job, &fence);
+ if (r) {
+ dev_err(ring->adev->dev, "Error getting VM ID (%d)\n", r);
+ goto error;
+ }
+ return fence;
}
- return fence;
+ return NULL;
+
+error:
+ dma_fence_set_error(&job->base.s_fence->finished, r);
+ return NULL;
}
static struct dma_fence *amdgpu_job_run(struct drm_sched_job *sched_job)
{
struct amdgpu_ring *ring = to_amdgpu_ring(sched_job->sched);
+ struct amdgpu_device *adev = ring->adev;
struct dma_fence *fence = NULL, *finished;
struct amdgpu_job *job;
int r = 0;
@@ -216,33 +424,50 @@ static struct dma_fence *amdgpu_job_run(struct drm_sched_job *sched_job)
job = to_amdgpu_job(sched_job);
finished = &job->base.s_fence->finished;
- BUG_ON(amdgpu_sync_peek_fence(&job->sync, NULL));
-
trace_amdgpu_sched_run_job(job);
- if (job->vram_lost_counter != atomic_read(&ring->adev->vram_lost_counter))
- dma_fence_set_error(finished, -ECANCELED);/* skip IB as well if VRAM lost */
+ /* Skip job if VRAM is lost and never resubmit gangs */
+ if (job->generation != amdgpu_vm_generation(adev, job->vm) ||
+ (job->job_run_counter && job->gang_submit))
+ dma_fence_set_error(finished, -ECANCELED);
if (finished->error < 0) {
- DRM_INFO("Skip scheduling IBs!\n");
+ dev_dbg(adev->dev, "Skip scheduling IBs in ring(%s)",
+ ring->name);
} else {
r = amdgpu_ib_schedule(ring, job->num_ibs, job->ibs, job,
&fence);
if (r)
- DRM_ERROR("Error scheduling IBs (%d)\n", r);
+ dev_err(adev->dev,
+ "Error scheduling IBs (%d) in ring(%s)", r,
+ ring->name);
}
- /* if gpu reset, hw fence will be replaced here */
- dma_fence_put(job->fence);
- job->fence = dma_fence_get(fence);
+ job->job_run_counter++;
amdgpu_job_free_resources(job);
fence = r ? ERR_PTR(r) : fence;
return fence;
}
-#define to_drm_sched_job(sched_job) \
- container_of((sched_job), struct drm_sched_job, queue_node)
+/*
+ * This is a duplicate function from DRM scheduler sched_internal.h.
+ * Plan is to remove it when amdgpu_job_stop_all_jobs_on_sched is removed, due
+ * latter being incorrect and racy.
+ *
+ * See https://lore.kernel.org/amd-gfx/44edde63-7181-44fb-a4f7-94e50514f539@amd.com/
+ */
+static struct drm_sched_job *
+drm_sched_entity_queue_pop(struct drm_sched_entity *entity)
+{
+ struct spsc_node *node;
+
+ node = spsc_queue_pop(&entity->job_queue);
+ if (!node)
+ return NULL;
+
+ return container_of(node, struct drm_sched_job, queue_node);
+}
void amdgpu_job_stop_all_jobs_on_sched(struct drm_gpu_scheduler *sched)
{
@@ -251,15 +476,11 @@ void amdgpu_job_stop_all_jobs_on_sched(struct drm_gpu_scheduler *sched)
int i;
/* Signal all jobs not yet scheduled */
- for (i = DRM_SCHED_PRIORITY_COUNT - 1; i >= DRM_SCHED_PRIORITY_MIN; i--) {
- struct drm_sched_rq *rq = &sched->sched_rq[i];
-
- if (!rq)
- continue;
-
+ for (i = DRM_SCHED_PRIORITY_KERNEL; i < sched->num_rqs; i++) {
+ struct drm_sched_rq *rq = sched->sched_rq[i];
spin_lock(&rq->lock);
list_for_each_entry(s_entity, &rq->entities, list) {
- while ((s_job = to_drm_sched_job(spsc_queue_pop(&s_entity->job_queue)))) {
+ while ((s_job = drm_sched_entity_queue_pop(s_entity))) {
struct drm_sched_fence *s_fence = s_job->s_fence;
dma_fence_signal(&s_fence->scheduled);
@@ -280,7 +501,7 @@ void amdgpu_job_stop_all_jobs_on_sched(struct drm_gpu_scheduler *sched)
}
const struct drm_sched_backend_ops amdgpu_sched_ops = {
- .dependency = amdgpu_job_dependency,
+ .prepare_job = amdgpu_job_prepare_job,
.run_job = amdgpu_job_run,
.timedout_job = amdgpu_job_timedout,
.free_job = amdgpu_job_free_cb
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.h
index 81caac9b958a..7abf069d17d4 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.h
@@ -23,6 +23,10 @@
#ifndef __AMDGPU_JOB_H__
#define __AMDGPU_JOB_H__
+#include <drm/gpu_scheduler.h>
+#include "amdgpu_sync.h"
+#include "amdgpu_ring.h"
+
/* bit set means command submit involves a preamble IB */
#define AMDGPU_PREAMBLE_IB_PRESENT (1 << 0)
/* bit set means preamble IB is first presented in belonging context */
@@ -40,38 +44,84 @@
struct amdgpu_fence;
enum amdgpu_ib_pool_type;
+/* Internal kernel job ids. (decreasing values, starting from U64_MAX). */
+#define AMDGPU_KERNEL_JOB_ID_VM_UPDATE (18446744073709551615ULL)
+#define AMDGPU_KERNEL_JOB_ID_VM_UPDATE_PDES (18446744073709551614ULL)
+#define AMDGPU_KERNEL_JOB_ID_VM_UPDATE_RANGE (18446744073709551613ULL)
+#define AMDGPU_KERNEL_JOB_ID_VM_PT_CLEAR (18446744073709551612ULL)
+#define AMDGPU_KERNEL_JOB_ID_TTM_MAP_BUFFER (18446744073709551611ULL)
+#define AMDGPU_KERNEL_JOB_ID_TTM_ACCESS_MEMORY_SDMA (18446744073709551610ULL)
+#define AMDGPU_KERNEL_JOB_ID_TTM_COPY_BUFFER (18446744073709551609ULL)
+#define AMDGPU_KERNEL_JOB_ID_CLEAR_ON_RELEASE (18446744073709551608ULL)
+#define AMDGPU_KERNEL_JOB_ID_MOVE_BLIT (18446744073709551607ULL)
+#define AMDGPU_KERNEL_JOB_ID_TTM_CLEAR_BUFFER (18446744073709551606ULL)
+#define AMDGPU_KERNEL_JOB_ID_CLEANER_SHADER (18446744073709551605ULL)
+#define AMDGPU_KERNEL_JOB_ID_FLUSH_GPU_TLB (18446744073709551604ULL)
+#define AMDGPU_KERNEL_JOB_ID_KFD_GART_MAP (18446744073709551603ULL)
+#define AMDGPU_KERNEL_JOB_ID_VCN_RING_TEST (18446744073709551602ULL)
+
struct amdgpu_job {
struct drm_sched_job base;
struct amdgpu_vm *vm;
- struct amdgpu_sync sync;
- struct amdgpu_sync sched_sync;
- struct amdgpu_ib *ibs;
- struct dma_fence *fence; /* the hw fence */
+ struct amdgpu_sync explicit_sync;
+ struct amdgpu_fence *hw_fence;
+ struct amdgpu_fence *hw_vm_fence;
+ struct dma_fence *gang_submit;
uint32_t preamble_status;
uint32_t preemption_status;
- uint32_t num_ibs;
bool vm_needs_flush;
+ bool gds_switch_needed;
+ bool spm_update_needed;
uint64_t vm_pd_addr;
unsigned vmid;
unsigned pasid;
uint32_t gds_base, gds_size;
uint32_t gws_base, gws_size;
uint32_t oa_base, oa_size;
- uint32_t vram_lost_counter;
+ uint64_t generation;
/* user fence handling */
uint64_t uf_addr;
uint64_t uf_sequence;
+
+ /* virtual addresses for shadow/GDS/CSA */
+ uint64_t shadow_va;
+ uint64_t csa_va;
+ uint64_t gds_va;
+ bool init_shadow;
+
+ /* job_run_counter >= 1 means a resubmit job */
+ uint32_t job_run_counter;
+
+ /* enforce isolation */
+ bool enforce_isolation;
+ bool run_cleaner_shader;
+
+ uint32_t num_ibs;
+ struct amdgpu_ib ibs[];
};
-int amdgpu_job_alloc(struct amdgpu_device *adev, unsigned num_ibs,
- struct amdgpu_job **job, struct amdgpu_vm *vm);
-int amdgpu_job_alloc_with_ib(struct amdgpu_device *adev, unsigned size,
- enum amdgpu_ib_pool_type pool, struct amdgpu_job **job);
+static inline struct amdgpu_ring *amdgpu_job_ring(struct amdgpu_job *job)
+{
+ return to_amdgpu_ring(job->base.entity->rq->sched);
+}
+
+int amdgpu_job_alloc(struct amdgpu_device *adev, struct amdgpu_vm *vm,
+ struct drm_sched_entity *entity, void *owner,
+ unsigned int num_ibs, struct amdgpu_job **job,
+ u64 drm_client_id);
+int amdgpu_job_alloc_with_ib(struct amdgpu_device *adev,
+ struct drm_sched_entity *entity, void *owner,
+ size_t size, enum amdgpu_ib_pool_type pool_type,
+ struct amdgpu_job **job,
+ u64 k_job_id);
+void amdgpu_job_set_resources(struct amdgpu_job *job, struct amdgpu_bo *gds,
+ struct amdgpu_bo *gws, struct amdgpu_bo *oa);
void amdgpu_job_free_resources(struct amdgpu_job *job);
+void amdgpu_job_set_gang_leader(struct amdgpu_job *job,
+ struct amdgpu_job *leader);
void amdgpu_job_free(struct amdgpu_job *job);
-int amdgpu_job_submit(struct amdgpu_job *job, struct drm_sched_entity *entity,
- void *owner, struct dma_fence **f);
+struct dma_fence *amdgpu_job_submit(struct amdgpu_job *job);
int amdgpu_job_submit_direct(struct amdgpu_job *job, struct amdgpu_ring *ring,
struct dma_fence **fence);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_jpeg.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_jpeg.c
index 8996cb4ed57a..63ee6ba6a931 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_jpeg.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_jpeg.c
@@ -33,29 +33,62 @@
#define JPEG_IDLE_TIMEOUT msecs_to_jiffies(1000)
static void amdgpu_jpeg_idle_work_handler(struct work_struct *work);
+static void amdgpu_jpeg_reg_dump_fini(struct amdgpu_device *adev);
int amdgpu_jpeg_sw_init(struct amdgpu_device *adev)
{
+ int i, r;
+
INIT_DELAYED_WORK(&adev->jpeg.idle_work, amdgpu_jpeg_idle_work_handler);
mutex_init(&adev->jpeg.jpeg_pg_lock);
atomic_set(&adev->jpeg.total_submission_cnt, 0);
+ if ((adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) &&
+ (adev->pg_flags & AMD_PG_SUPPORT_JPEG_DPG))
+ adev->jpeg.indirect_sram = true;
+
+ for (i = 0; i < adev->jpeg.num_jpeg_inst; i++) {
+ if (adev->jpeg.harvest_config & (1U << i))
+ continue;
+
+ if (adev->jpeg.indirect_sram) {
+ r = amdgpu_bo_create_kernel(adev, 64 * 2 * 4, PAGE_SIZE,
+ AMDGPU_GEM_DOMAIN_VRAM |
+ AMDGPU_GEM_DOMAIN_GTT,
+ &adev->jpeg.inst[i].dpg_sram_bo,
+ &adev->jpeg.inst[i].dpg_sram_gpu_addr,
+ &adev->jpeg.inst[i].dpg_sram_cpu_addr);
+ if (r) {
+ dev_err(adev->dev,
+ "JPEG %d (%d) failed to allocate DPG bo\n", i, r);
+ return r;
+ }
+ }
+ }
+
return 0;
}
int amdgpu_jpeg_sw_fini(struct amdgpu_device *adev)
{
- int i;
-
- cancel_delayed_work_sync(&adev->jpeg.idle_work);
+ int i, j;
for (i = 0; i < adev->jpeg.num_jpeg_inst; ++i) {
- if (adev->jpeg.harvest_config & (1 << i))
+ if (adev->jpeg.harvest_config & (1U << i))
continue;
- amdgpu_ring_fini(&adev->jpeg.inst[i].ring_dec);
+ amdgpu_bo_free_kernel(
+ &adev->jpeg.inst[i].dpg_sram_bo,
+ &adev->jpeg.inst[i].dpg_sram_gpu_addr,
+ (void **)&adev->jpeg.inst[i].dpg_sram_cpu_addr);
+
+ for (j = 0; j < adev->jpeg.num_jpeg_rings; ++j)
+ amdgpu_ring_fini(&adev->jpeg.inst[i].ring_dec[j]);
}
+ if (adev->jpeg.reg_list)
+ amdgpu_jpeg_reg_dump_fini(adev);
+
mutex_destroy(&adev->jpeg.jpeg_pg_lock);
return 0;
@@ -78,19 +111,22 @@ static void amdgpu_jpeg_idle_work_handler(struct work_struct *work)
struct amdgpu_device *adev =
container_of(work, struct amdgpu_device, jpeg.idle_work.work);
unsigned int fences = 0;
- unsigned int i;
+ unsigned int i, j;
for (i = 0; i < adev->jpeg.num_jpeg_inst; ++i) {
- if (adev->jpeg.harvest_config & (1 << i))
+ if (adev->jpeg.harvest_config & (1U << i))
continue;
- fences += amdgpu_fence_count_emitted(&adev->jpeg.inst[i].ring_dec);
+ for (j = 0; j < adev->jpeg.num_jpeg_rings; ++j)
+ fences += amdgpu_fence_count_emitted(&adev->jpeg.inst[i].ring_dec[j]);
}
- if (!fences && !atomic_read(&adev->jpeg.total_submission_cnt))
+ if (!fences && !atomic_read(&adev->jpeg.total_submission_cnt)) {
+ mutex_lock(&adev->jpeg.jpeg_pg_lock);
amdgpu_device_ip_set_powergating_state(adev, AMD_IP_BLOCK_TYPE_JPEG,
AMD_PG_STATE_GATE);
- else
+ mutex_unlock(&adev->jpeg.jpeg_pg_lock);
+ } else
schedule_delayed_work(&adev->jpeg.idle_work, JPEG_IDLE_TIMEOUT);
}
@@ -120,18 +156,25 @@ int amdgpu_jpeg_dec_ring_test_ring(struct amdgpu_ring *ring)
unsigned i;
int r;
- WREG32(adev->jpeg.inst[ring->me].external.jpeg_pitch, 0xCAFEDEAD);
+ /* JPEG in SRIOV does not support direct register read/write */
+ if (amdgpu_sriov_vf(adev))
+ return 0;
+
r = amdgpu_ring_alloc(ring, 3);
if (r)
return r;
- amdgpu_ring_write(ring, PACKET0(adev->jpeg.internal.jpeg_pitch, 0));
- amdgpu_ring_write(ring, 0xDEADBEEF);
+ WREG32(adev->jpeg.inst[ring->me].external.jpeg_pitch[ring->pipe], 0xCAFEDEAD);
+ /* Add a read register to make sure the write register is executed. */
+ RREG32(adev->jpeg.inst[ring->me].external.jpeg_pitch[ring->pipe]);
+
+ amdgpu_ring_write(ring, PACKET0(adev->jpeg.internal.jpeg_pitch[ring->pipe], 0));
+ amdgpu_ring_write(ring, 0xABADCAFE);
amdgpu_ring_commit(ring);
for (i = 0; i < adev->usec_timeout; i++) {
- tmp = RREG32(adev->jpeg.inst[ring->me].external.jpeg_pitch);
- if (tmp == 0xDEADBEEF)
+ tmp = RREG32(adev->jpeg.inst[ring->me].external.jpeg_pitch[ring->pipe]);
+ if (tmp == 0xABADCAFE)
break;
udelay(1);
}
@@ -152,14 +195,15 @@ static int amdgpu_jpeg_dec_set_reg(struct amdgpu_ring *ring, uint32_t handle,
const unsigned ib_size_dw = 16;
int i, r;
- r = amdgpu_job_alloc_with_ib(ring->adev, ib_size_dw * 4,
- AMDGPU_IB_POOL_DIRECT, &job);
+ r = amdgpu_job_alloc_with_ib(ring->adev, NULL, NULL, ib_size_dw * 4,
+ AMDGPU_IB_POOL_DIRECT, &job,
+ AMDGPU_KERNEL_JOB_ID_VCN_RING_TEST);
if (r)
return r;
ib = &job->ibs[0];
- ib->ptr[0] = PACKETJ(adev->jpeg.internal.jpeg_pitch, 0, 0, PACKETJ_TYPE0);
+ ib->ptr[0] = PACKETJ(adev->jpeg.internal.jpeg_pitch[ring->pipe], 0, 0, PACKETJ_TYPE0);
ib->ptr[1] = 0xDEADBEEF;
for (i = 2; i < 16; i += 2) {
ib->ptr[i] = PACKETJ(0, 0, 0, PACKETJ_TYPE6);
@@ -204,17 +248,360 @@ int amdgpu_jpeg_dec_ring_test_ib(struct amdgpu_ring *ring, long timeout)
r = 0;
}
- for (i = 0; i < adev->usec_timeout; i++) {
- tmp = RREG32(adev->jpeg.inst[ring->me].external.jpeg_pitch);
- if (tmp == 0xDEADBEEF)
- break;
- udelay(1);
+ if (!amdgpu_sriov_vf(adev)) {
+ for (i = 0; i < adev->usec_timeout; i++) {
+ tmp = RREG32(adev->jpeg.inst[ring->me].external.jpeg_pitch[ring->pipe]);
+ if (tmp == 0xDEADBEEF)
+ break;
+ udelay(1);
+ if (amdgpu_emu_mode == 1)
+ udelay(10);
+ }
+
+ if (i >= adev->usec_timeout)
+ r = -ETIMEDOUT;
}
- if (i >= adev->usec_timeout)
- r = -ETIMEDOUT;
-
dma_fence_put(fence);
error:
return r;
}
+
+int amdgpu_jpeg_process_poison_irq(struct amdgpu_device *adev,
+ struct amdgpu_irq_src *source,
+ struct amdgpu_iv_entry *entry)
+{
+ struct ras_common_if *ras_if = adev->jpeg.ras_if;
+ struct ras_dispatch_if ih_data = {
+ .entry = entry,
+ };
+
+ if (!ras_if)
+ return 0;
+
+ ih_data.head = *ras_if;
+ amdgpu_ras_interrupt_dispatch(adev, &ih_data);
+
+ return 0;
+}
+
+int amdgpu_jpeg_ras_late_init(struct amdgpu_device *adev, struct ras_common_if *ras_block)
+{
+ int r, i;
+
+ r = amdgpu_ras_block_late_init(adev, ras_block);
+ if (r)
+ return r;
+
+ if (amdgpu_ras_is_supported(adev, ras_block->block)) {
+ for (i = 0; i < adev->jpeg.num_jpeg_inst; ++i) {
+ if (adev->jpeg.harvest_config & (1 << i) ||
+ !adev->jpeg.inst[i].ras_poison_irq.funcs)
+ continue;
+
+ r = amdgpu_irq_get(adev, &adev->jpeg.inst[i].ras_poison_irq, 0);
+ if (r)
+ goto late_fini;
+ }
+ }
+ return 0;
+
+late_fini:
+ amdgpu_ras_block_late_fini(adev, ras_block);
+ return r;
+}
+
+int amdgpu_jpeg_ras_sw_init(struct amdgpu_device *adev)
+{
+ int err;
+ struct amdgpu_jpeg_ras *ras;
+
+ if (!adev->jpeg.ras)
+ return 0;
+
+ ras = adev->jpeg.ras;
+ err = amdgpu_ras_register_ras_block(adev, &ras->ras_block);
+ if (err) {
+ dev_err(adev->dev, "Failed to register jpeg ras block!\n");
+ return err;
+ }
+
+ strcpy(ras->ras_block.ras_comm.name, "jpeg");
+ ras->ras_block.ras_comm.block = AMDGPU_RAS_BLOCK__JPEG;
+ ras->ras_block.ras_comm.type = AMDGPU_RAS_ERROR__POISON;
+ adev->jpeg.ras_if = &ras->ras_block.ras_comm;
+
+ if (!ras->ras_block.ras_late_init)
+ ras->ras_block.ras_late_init = amdgpu_jpeg_ras_late_init;
+
+ return 0;
+}
+
+int amdgpu_jpeg_psp_update_sram(struct amdgpu_device *adev, int inst_idx,
+ enum AMDGPU_UCODE_ID ucode_id)
+{
+ struct amdgpu_firmware_info ucode = {
+ .ucode_id = AMDGPU_UCODE_ID_JPEG_RAM,
+ .mc_addr = adev->jpeg.inst[inst_idx].dpg_sram_gpu_addr,
+ .ucode_size = ((uintptr_t)adev->jpeg.inst[inst_idx].dpg_sram_curr_addr -
+ (uintptr_t)adev->jpeg.inst[inst_idx].dpg_sram_cpu_addr),
+ };
+
+ return psp_execute_ip_fw_load(&adev->psp, &ucode);
+}
+
+/*
+ * debugfs for to enable/disable jpeg job submission to specific core.
+ */
+#if defined(CONFIG_DEBUG_FS)
+static int amdgpu_debugfs_jpeg_sched_mask_set(void *data, u64 val)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)data;
+ u32 i, j;
+ u64 mask = 0;
+ struct amdgpu_ring *ring;
+
+ if (!adev)
+ return -ENODEV;
+
+ mask = (1ULL << (adev->jpeg.num_jpeg_inst * adev->jpeg.num_jpeg_rings)) - 1;
+ if ((val & mask) == 0)
+ return -EINVAL;
+
+ for (i = 0; i < adev->jpeg.num_jpeg_inst; ++i) {
+ for (j = 0; j < adev->jpeg.num_jpeg_rings; ++j) {
+ ring = &adev->jpeg.inst[i].ring_dec[j];
+ if (val & (BIT_ULL((i * adev->jpeg.num_jpeg_rings) + j)))
+ ring->sched.ready = true;
+ else
+ ring->sched.ready = false;
+ }
+ }
+ /* publish sched.ready flag update effective immediately across smp */
+ smp_rmb();
+ return 0;
+}
+
+static int amdgpu_debugfs_jpeg_sched_mask_get(void *data, u64 *val)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)data;
+ u32 i, j;
+ u64 mask = 0;
+ struct amdgpu_ring *ring;
+
+ if (!adev)
+ return -ENODEV;
+ for (i = 0; i < adev->jpeg.num_jpeg_inst; ++i) {
+ for (j = 0; j < adev->jpeg.num_jpeg_rings; ++j) {
+ ring = &adev->jpeg.inst[i].ring_dec[j];
+ if (ring->sched.ready)
+ mask |= 1ULL << ((i * adev->jpeg.num_jpeg_rings) + j);
+ }
+ }
+ *val = mask;
+ return 0;
+}
+
+DEFINE_DEBUGFS_ATTRIBUTE(amdgpu_debugfs_jpeg_sched_mask_fops,
+ amdgpu_debugfs_jpeg_sched_mask_get,
+ amdgpu_debugfs_jpeg_sched_mask_set, "%llx\n");
+
+#endif
+
+void amdgpu_debugfs_jpeg_sched_mask_init(struct amdgpu_device *adev)
+{
+#if defined(CONFIG_DEBUG_FS)
+ struct drm_minor *minor = adev_to_drm(adev)->primary;
+ struct dentry *root = minor->debugfs_root;
+ char name[32];
+
+ if (!(adev->jpeg.num_jpeg_inst > 1) && !(adev->jpeg.num_jpeg_rings > 1))
+ return;
+ sprintf(name, "amdgpu_jpeg_sched_mask");
+ debugfs_create_file(name, 0600, root, adev,
+ &amdgpu_debugfs_jpeg_sched_mask_fops);
+#endif
+}
+
+static ssize_t amdgpu_get_jpeg_reset_mask(struct device *dev,
+ struct device_attribute *attr,
+ char *buf)
+{
+ struct drm_device *ddev = dev_get_drvdata(dev);
+ struct amdgpu_device *adev = drm_to_adev(ddev);
+
+ if (!adev)
+ return -ENODEV;
+
+ return amdgpu_show_reset_mask(buf, adev->jpeg.supported_reset);
+}
+
+static DEVICE_ATTR(jpeg_reset_mask, 0444,
+ amdgpu_get_jpeg_reset_mask, NULL);
+
+int amdgpu_jpeg_sysfs_reset_mask_init(struct amdgpu_device *adev)
+{
+ int r = 0;
+
+ if (adev->jpeg.num_jpeg_inst) {
+ r = device_create_file(adev->dev, &dev_attr_jpeg_reset_mask);
+ if (r)
+ return r;
+ }
+
+ return r;
+}
+
+void amdgpu_jpeg_sysfs_reset_mask_fini(struct amdgpu_device *adev)
+{
+ if (adev->dev->kobj.sd) {
+ if (adev->jpeg.num_jpeg_inst)
+ device_remove_file(adev->dev, &dev_attr_jpeg_reset_mask);
+ }
+}
+
+int amdgpu_jpeg_reg_dump_init(struct amdgpu_device *adev,
+ const struct amdgpu_hwip_reg_entry *reg, u32 count)
+{
+ adev->jpeg.ip_dump = kcalloc(adev->jpeg.num_jpeg_inst * count,
+ sizeof(uint32_t), GFP_KERNEL);
+ if (!adev->jpeg.ip_dump) {
+ dev_err(adev->dev,
+ "Failed to allocate memory for JPEG IP Dump\n");
+ return -ENOMEM;
+ }
+ adev->jpeg.reg_list = reg;
+ adev->jpeg.reg_count = count;
+
+ return 0;
+}
+
+static void amdgpu_jpeg_reg_dump_fini(struct amdgpu_device *adev)
+{
+ kfree(adev->jpeg.ip_dump);
+ adev->jpeg.reg_list = NULL;
+ adev->jpeg.reg_count = 0;
+}
+
+void amdgpu_jpeg_dump_ip_state(struct amdgpu_ip_block *ip_block)
+{
+ struct amdgpu_device *adev = ip_block->adev;
+ u32 inst_off, inst_id, is_powered;
+ int i, j;
+
+ if (!adev->jpeg.ip_dump)
+ return;
+
+ for (i = 0; i < adev->jpeg.num_jpeg_inst; i++) {
+ if (adev->jpeg.harvest_config & (1 << i))
+ continue;
+
+ inst_id = GET_INST(JPEG, i);
+ inst_off = i * adev->jpeg.reg_count;
+ /* check power status from UVD_JPEG_POWER_STATUS */
+ adev->jpeg.ip_dump[inst_off] =
+ RREG32(SOC15_REG_ENTRY_OFFSET_INST(adev->jpeg.reg_list[0],
+ inst_id));
+ is_powered = ((adev->jpeg.ip_dump[inst_off] & 0x1) != 1);
+
+ if (is_powered)
+ for (j = 1; j < adev->jpeg.reg_count; j++)
+ adev->jpeg.ip_dump[inst_off + j] =
+ RREG32(SOC15_REG_ENTRY_OFFSET_INST(adev->jpeg.reg_list[j],
+ inst_id));
+ }
+}
+
+void amdgpu_jpeg_print_ip_state(struct amdgpu_ip_block *ip_block, struct drm_printer *p)
+{
+ struct amdgpu_device *adev = ip_block->adev;
+ u32 inst_off, is_powered;
+ int i, j;
+
+ if (!adev->jpeg.ip_dump)
+ return;
+
+ drm_printf(p, "num_instances:%d\n", adev->jpeg.num_jpeg_inst);
+ for (i = 0; i < adev->jpeg.num_jpeg_inst; i++) {
+ if (adev->jpeg.harvest_config & (1 << i)) {
+ drm_printf(p, "\nHarvested Instance:JPEG%d Skipping dump\n", i);
+ continue;
+ }
+
+ inst_off = i * adev->jpeg.reg_count;
+ is_powered = ((adev->jpeg.ip_dump[inst_off] & 0x1) != 1);
+
+ if (is_powered) {
+ drm_printf(p, "Active Instance:JPEG%d\n", i);
+ for (j = 0; j < adev->jpeg.reg_count; j++)
+ drm_printf(p, "%-50s \t 0x%08x\n", adev->jpeg.reg_list[j].reg_name,
+ adev->jpeg.ip_dump[inst_off + j]);
+ } else
+ drm_printf(p, "\nInactive Instance:JPEG%d\n", i);
+ }
+}
+
+static inline bool amdgpu_jpeg_reg_valid(u32 reg)
+{
+ if (reg < JPEG_REG_RANGE_START || reg > JPEG_REG_RANGE_END ||
+ (reg >= JPEG_ATOMIC_RANGE_START && reg <= JPEG_ATOMIC_RANGE_END))
+ return false;
+ else
+ return true;
+}
+
+/**
+ * amdgpu_jpeg_dec_parse_cs - command submission parser
+ *
+ * @parser: Command submission parser context
+ * @job: the job to parse
+ * @ib: the IB to parse
+ *
+ * Parse the command stream, return -EINVAL for invalid packet,
+ * 0 otherwise
+ */
+
+int amdgpu_jpeg_dec_parse_cs(struct amdgpu_cs_parser *parser,
+ struct amdgpu_job *job,
+ struct amdgpu_ib *ib)
+{
+ u32 i, reg, res, cond, type;
+ struct amdgpu_device *adev = parser->adev;
+
+ for (i = 0; i < ib->length_dw ; i += 2) {
+ reg = CP_PACKETJ_GET_REG(ib->ptr[i]);
+ res = CP_PACKETJ_GET_RES(ib->ptr[i]);
+ cond = CP_PACKETJ_GET_COND(ib->ptr[i]);
+ type = CP_PACKETJ_GET_TYPE(ib->ptr[i]);
+
+ if (res) /* only support 0 at the moment */
+ return -EINVAL;
+
+ switch (type) {
+ case PACKETJ_TYPE0:
+ if (cond != PACKETJ_CONDITION_CHECK0 ||
+ !amdgpu_jpeg_reg_valid(reg)) {
+ dev_err(adev->dev, "Invalid packet [0x%08x]!\n", ib->ptr[i]);
+ return -EINVAL;
+ }
+ break;
+ case PACKETJ_TYPE3:
+ if (cond != PACKETJ_CONDITION_CHECK3 ||
+ !amdgpu_jpeg_reg_valid(reg)) {
+ dev_err(adev->dev, "Invalid packet [0x%08x]!\n", ib->ptr[i]);
+ return -EINVAL;
+ }
+ break;
+ case PACKETJ_TYPE6:
+ if (ib->ptr[i] == CP_PACKETJ_NOP)
+ continue;
+ dev_err(adev->dev, "Invalid packet [0x%08x]!\n", ib->ptr[i]);
+ return -EINVAL;
+ default:
+ dev_err(adev->dev, "Unknown packet type %d !\n", type);
+ return -EINVAL;
+ }
+ }
+
+ return 0;
+}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_jpeg.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_jpeg.h
index 55fbff2be761..346ae0ab09d3 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_jpeg.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_jpeg.h
@@ -24,30 +24,131 @@
#ifndef __AMDGPU_JPEG_H__
#define __AMDGPU_JPEG_H__
-#define AMDGPU_MAX_JPEG_INSTANCES 2
+#include "amdgpu_ras.h"
+#include "amdgpu_cs.h"
+
+#define AMDGPU_MAX_JPEG_INSTANCES 4
+#define AMDGPU_MAX_JPEG_RINGS 10
+#define AMDGPU_MAX_JPEG_RINGS_4_0_3 8
+
+#define JPEG_REG_RANGE_START 0x4000
+#define JPEG_REG_RANGE_END 0x41c2
+#define JPEG_ATOMIC_RANGE_START 0x4120
+#define JPEG_ATOMIC_RANGE_END 0x412A
+
#define AMDGPU_JPEG_HARVEST_JPEG0 (1 << 0)
#define AMDGPU_JPEG_HARVEST_JPEG1 (1 << 1)
+#define WREG32_SOC15_JPEG_DPG_MODE(inst_idx, offset, value, indirect) \
+ do { \
+ if (!indirect) { \
+ WREG32_SOC15(JPEG, GET_INST(JPEG, inst_idx), \
+ mmUVD_DPG_LMA_DATA, value); \
+ WREG32_SOC15( \
+ JPEG, GET_INST(JPEG, inst_idx), \
+ mmUVD_DPG_LMA_CTL, \
+ (UVD_DPG_LMA_CTL__READ_WRITE_MASK | \
+ offset << UVD_DPG_LMA_CTL__READ_WRITE_ADDR__SHIFT | \
+ indirect << UVD_DPG_LMA_CTL__SRAM_SEL__SHIFT)); \
+ } else { \
+ *adev->jpeg.inst[inst_idx].dpg_sram_curr_addr++ = \
+ offset; \
+ *adev->jpeg.inst[inst_idx].dpg_sram_curr_addr++ = \
+ value; \
+ } \
+ } while (0)
+
+#define RREG32_SOC15_JPEG_DPG_MODE(inst_idx, offset, mask_en) \
+ ({ \
+ WREG32_SOC15(JPEG, inst_idx, mmUVD_DPG_LMA_CTL, \
+ (0x0 << UVD_DPG_LMA_CTL__READ_WRITE__SHIFT | \
+ mask_en << UVD_DPG_LMA_CTL__MASK_EN__SHIFT | \
+ offset << UVD_DPG_LMA_CTL__READ_WRITE_ADDR__SHIFT)); \
+ RREG32_SOC15(JPEG, inst_idx, mmUVD_DPG_LMA_DATA); \
+ })
+
+#define WREG32_SOC24_JPEG_DPG_MODE(inst_idx, offset, value, indirect) \
+ do { \
+ WREG32_SOC15(JPEG, GET_INST(JPEG, inst_idx), \
+ regUVD_DPG_LMA_DATA, value); \
+ WREG32_SOC15(JPEG, GET_INST(JPEG, inst_idx), \
+ regUVD_DPG_LMA_MASK, 0xFFFFFFFF); \
+ WREG32_SOC15( \
+ JPEG, GET_INST(JPEG, inst_idx), \
+ regUVD_DPG_LMA_CTL, \
+ (UVD_DPG_LMA_CTL__READ_WRITE_MASK | \
+ offset << UVD_DPG_LMA_CTL__READ_WRITE_ADDR__SHIFT | \
+ indirect << UVD_DPG_LMA_CTL__SRAM_SEL__SHIFT)); \
+ } while (0)
+
+#define RREG32_SOC24_JPEG_DPG_MODE(inst_idx, offset, mask_en) \
+ do { \
+ WREG32_SOC15(JPEG, GET_INST(JPEG, inst_idx), \
+ regUVD_DPG_LMA_MASK, 0xFFFFFFFF); \
+ WREG32_SOC15(JPEG, GET_INST(JPEG, inst_idx), \
+ regUVD_DPG_LMA_CTL, \
+ (UVD_DPG_LMA_CTL__MASK_EN_MASK | \
+ offset << UVD_DPG_LMA_CTL__READ_WRITE_ADDR__SHIFT)); \
+ RREG32_SOC15(JPEG, inst_idx, regUVD_DPG_LMA_DATA); \
+ } while (0)
+
+#define ADD_SOC24_JPEG_TO_DPG_SRAM(inst_idx, offset, value, indirect) \
+ do { \
+ *adev->jpeg.inst[inst_idx].dpg_sram_curr_addr++ = offset; \
+ *adev->jpeg.inst[inst_idx].dpg_sram_curr_addr++ = value; \
+ } while (0)
+
+struct amdgpu_hwip_reg_entry;
+
+enum amdgpu_jpeg_caps {
+ AMDGPU_JPEG_RRMT_ENABLED,
+};
+
+#define AMDGPU_JPEG_CAPS(caps) BIT(AMDGPU_JPEG_##caps)
+
struct amdgpu_jpeg_reg{
- unsigned jpeg_pitch;
+ unsigned jpeg_pitch[AMDGPU_MAX_JPEG_RINGS];
};
struct amdgpu_jpeg_inst {
- struct amdgpu_ring ring_dec;
+ struct amdgpu_ring ring_dec[AMDGPU_MAX_JPEG_RINGS];
struct amdgpu_irq_src irq;
+ struct amdgpu_irq_src ras_poison_irq;
struct amdgpu_jpeg_reg external;
+ struct amdgpu_bo *dpg_sram_bo;
+ struct dpg_pause_state pause_state;
+ void *dpg_sram_cpu_addr;
+ uint64_t dpg_sram_gpu_addr;
+ uint32_t *dpg_sram_curr_addr;
+ uint8_t aid_id;
+};
+
+struct amdgpu_jpeg_ras {
+ struct amdgpu_ras_block_object ras_block;
};
struct amdgpu_jpeg {
uint8_t num_jpeg_inst;
struct amdgpu_jpeg_inst inst[AMDGPU_MAX_JPEG_INSTANCES];
+ unsigned num_jpeg_rings;
struct amdgpu_jpeg_reg internal;
unsigned harvest_config;
struct delayed_work idle_work;
enum amd_powergating_state cur_state;
struct mutex jpeg_pg_lock;
atomic_t total_submission_cnt;
+ struct ras_common_if *ras_if;
+ struct amdgpu_jpeg_ras *ras;
+
+ uint16_t inst_mask;
+ uint8_t num_inst_per_aid;
+ bool indirect_sram;
+ uint32_t supported_reset;
+ uint32_t caps;
+ u32 *ip_dump;
+ u32 reg_count;
+ const struct amdgpu_hwip_reg_entry *reg_list;
};
int amdgpu_jpeg_sw_init(struct amdgpu_device *adev);
@@ -61,4 +162,23 @@ void amdgpu_jpeg_ring_end_use(struct amdgpu_ring *ring);
int amdgpu_jpeg_dec_ring_test_ring(struct amdgpu_ring *ring);
int amdgpu_jpeg_dec_ring_test_ib(struct amdgpu_ring *ring, long timeout);
+int amdgpu_jpeg_process_poison_irq(struct amdgpu_device *adev,
+ struct amdgpu_irq_src *source,
+ struct amdgpu_iv_entry *entry);
+int amdgpu_jpeg_ras_late_init(struct amdgpu_device *adev,
+ struct ras_common_if *ras_block);
+int amdgpu_jpeg_ras_sw_init(struct amdgpu_device *adev);
+int amdgpu_jpeg_psp_update_sram(struct amdgpu_device *adev, int inst_idx,
+ enum AMDGPU_UCODE_ID ucode_id);
+void amdgpu_debugfs_jpeg_sched_mask_init(struct amdgpu_device *adev);
+int amdgpu_jpeg_sysfs_reset_mask_init(struct amdgpu_device *adev);
+void amdgpu_jpeg_sysfs_reset_mask_fini(struct amdgpu_device *adev);
+int amdgpu_jpeg_reg_dump_init(struct amdgpu_device *adev,
+ const struct amdgpu_hwip_reg_entry *reg, u32 count);
+void amdgpu_jpeg_dump_ip_state(struct amdgpu_ip_block *ip_block);
+void amdgpu_jpeg_print_ip_state(struct amdgpu_ip_block *ip_block, struct drm_printer *p);
+int amdgpu_jpeg_dec_parse_cs(struct amdgpu_cs_parser *parser,
+ struct amdgpu_job *job,
+ struct amdgpu_ib *ib);
+
#endif /*__AMDGPU_JPEG_H__*/
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
index 64beb3399604..6ee77f431d56 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
@@ -27,8 +27,9 @@
*/
#include "amdgpu.h"
-#include <drm/drm_debugfs.h>
#include <drm/amdgpu_drm.h>
+#include <drm/drm_drv.h>
+#include <drm/drm_fb_helper.h>
#include "amdgpu_uvd.h"
#include "amdgpu_vce.h"
#include "atom.h"
@@ -42,6 +43,9 @@
#include "amdgpu_gem.h"
#include "amdgpu_display.h"
#include "amdgpu_ras.h"
+#include "amdgpu_reset.h"
+#include "amd_pcie.h"
+#include "amdgpu_userq.h"
void amdgpu_unregister_gpu_instance(struct amdgpu_device *adev)
{
@@ -87,13 +91,11 @@ void amdgpu_driver_unload_kms(struct drm_device *dev)
if (adev->rmmio == NULL)
return;
- if (adev->runpm) {
- pm_runtime_get_sync(dev->dev);
- pm_runtime_forbid(dev->dev);
- }
+ if (amdgpu_acpi_smart_shift_update(adev, AMDGPU_SS_DRV_UNLOAD))
+ DRM_WARN("smart shift update failed\n");
amdgpu_acpi_fini(adev);
- amdgpu_device_fini(adev);
+ amdgpu_device_fini_hw(adev);
}
void amdgpu_register_gpu_instance(struct amdgpu_device *adev)
@@ -133,21 +135,10 @@ void amdgpu_register_gpu_instance(struct amdgpu_device *adev)
int amdgpu_driver_load_kms(struct amdgpu_device *adev, unsigned long flags)
{
struct drm_device *dev;
- struct pci_dev *parent;
int r, acpi_status;
dev = adev_to_drm(adev);
- if (amdgpu_has_atpx() &&
- (amdgpu_is_atpx_hybrid() ||
- amdgpu_has_atpx_dgpu_power_cntl()) &&
- ((flags & AMD_IS_APU) == 0) &&
- !pci_is_thunderbolt_attached(to_pci_dev(dev->dev)))
- flags |= AMD_IS_PX;
-
- parent = pci_upstream_bridge(adev->pdev);
- adev->has_pr3 = parent ? pci_pr3_present(parent) : false;
-
/* amdgpu_device_init should report only fatal error
* like memory allocation failure or iomapping failure,
* or memory manager initialization failure, it must
@@ -160,36 +151,7 @@ int amdgpu_driver_load_kms(struct amdgpu_device *adev, unsigned long flags)
goto out;
}
- if (amdgpu_device_supports_atpx(dev) &&
- (amdgpu_runtime_pm != 0)) { /* enable runpm by default for atpx */
- adev->runpm = true;
- dev_info(adev->dev, "Using ATPX for runtime pm\n");
- } else if (amdgpu_device_supports_boco(dev) &&
- (amdgpu_runtime_pm != 0)) { /* enable runpm by default for boco */
- adev->runpm = true;
- dev_info(adev->dev, "Using BOCO for runtime pm\n");
- } else if (amdgpu_device_supports_baco(dev) &&
- (amdgpu_runtime_pm != 0)) {
- switch (adev->asic_type) {
- case CHIP_VEGA20:
- case CHIP_ARCTURUS:
- /* enable runpm if runpm=1 */
- if (amdgpu_runtime_pm > 0)
- adev->runpm = true;
- break;
- case CHIP_VEGA10:
- /* turn runpm on if noretry=0 */
- if (!adev->gmc.noretry)
- adev->runpm = true;
- break;
- default:
- /* enable runpm on CI+ */
- adev->runpm = true;
- break;
- }
- if (adev->runpm)
- dev_info(adev->dev, "Using BACO for runtime pm\n");
- }
+ amdgpu_device_detect_runtime_pm_mode(adev);
/* Call ACPI methods: require modeset init
* but failure is not fatal
@@ -199,29 +161,54 @@ int amdgpu_driver_load_kms(struct amdgpu_device *adev, unsigned long flags)
if (acpi_status)
dev_dbg(dev->dev, "Error during ACPI methods call\n");
- if (adev->runpm) {
- /* only need to skip on ATPX */
- if (amdgpu_device_supports_atpx(dev) &&
- !amdgpu_is_atpx_hybrid())
- dev_pm_set_driver_flags(dev->dev, DPM_FLAG_NO_DIRECT_COMPLETE);
- pm_runtime_use_autosuspend(dev->dev);
- pm_runtime_set_autosuspend_delay(dev->dev, 5000);
- pm_runtime_allow(dev->dev);
- pm_runtime_mark_last_busy(dev->dev);
- pm_runtime_put_autosuspend(dev->dev);
- }
+ if (amdgpu_acpi_smart_shift_update(adev, AMDGPU_SS_DRV_LOAD))
+ DRM_WARN("smart shift update failed\n");
out:
- if (r) {
- /* balance pm_runtime_get_sync in amdgpu_driver_unload_kms */
- if (adev->rmmio && adev->runpm)
- pm_runtime_put_noidle(dev->dev);
+ if (r)
amdgpu_driver_unload_kms(dev);
- }
return r;
}
+static enum amd_ip_block_type
+ amdgpu_ip_get_block_type(struct amdgpu_device *adev, uint32_t ip)
+{
+ enum amd_ip_block_type type;
+
+ switch (ip) {
+ case AMDGPU_HW_IP_GFX:
+ type = AMD_IP_BLOCK_TYPE_GFX;
+ break;
+ case AMDGPU_HW_IP_COMPUTE:
+ type = AMD_IP_BLOCK_TYPE_GFX;
+ break;
+ case AMDGPU_HW_IP_DMA:
+ type = AMD_IP_BLOCK_TYPE_SDMA;
+ break;
+ case AMDGPU_HW_IP_UVD:
+ case AMDGPU_HW_IP_UVD_ENC:
+ type = AMD_IP_BLOCK_TYPE_UVD;
+ break;
+ case AMDGPU_HW_IP_VCE:
+ type = AMD_IP_BLOCK_TYPE_VCE;
+ break;
+ case AMDGPU_HW_IP_VCN_DEC:
+ case AMDGPU_HW_IP_VCN_ENC:
+ type = AMD_IP_BLOCK_TYPE_VCN;
+ break;
+ case AMDGPU_HW_IP_VCN_JPEG:
+ type = (amdgpu_device_ip_get_ip_block(adev, AMD_IP_BLOCK_TYPE_JPEG)) ?
+ AMD_IP_BLOCK_TYPE_JPEG : AMD_IP_BLOCK_TYPE_VCN;
+ break;
+ default:
+ type = AMD_IP_BLOCK_TYPE_NUM;
+ break;
+ }
+
+ return type;
+}
+
static int amdgpu_firmware_info(struct drm_amdgpu_info_firmware *fw_info,
struct drm_amdgpu_query_fw *query_fw,
struct amdgpu_device *adev)
@@ -271,6 +258,14 @@ static int amdgpu_firmware_info(struct drm_amdgpu_info_firmware *fw_info,
fw_info->ver = adev->gfx.rlc_srls_fw_version;
fw_info->feature = adev->gfx.rlc_srls_feature_version;
break;
+ case AMDGPU_INFO_FW_GFX_RLCP:
+ fw_info->ver = adev->gfx.rlcp_ucode_version;
+ fw_info->feature = adev->gfx.rlcp_ucode_feature_version;
+ break;
+ case AMDGPU_INFO_FW_GFX_RLCV:
+ fw_info->ver = adev->gfx.rlcv_ucode_version;
+ fw_info->feature = adev->gfx.rlcv_ucode_feature_version;
+ break;
case AMDGPU_INFO_FW_GFX_MEC:
if (query_fw->index == 0) {
fw_info->ver = adev->gfx.mec_fw_version;
@@ -287,21 +282,36 @@ static int amdgpu_firmware_info(struct drm_amdgpu_info_firmware *fw_info,
break;
case AMDGPU_INFO_FW_TA:
switch (query_fw->index) {
- case 0:
- fw_info->ver = adev->psp.ta_fw_version;
- fw_info->feature = adev->psp.ta_xgmi_ucode_version;
+ case TA_FW_TYPE_PSP_XGMI:
+ fw_info->ver = adev->psp.xgmi_context.context.bin_desc.fw_version;
+ fw_info->feature = adev->psp.xgmi_context.context
+ .bin_desc.feature_version;
+ break;
+ case TA_FW_TYPE_PSP_RAS:
+ fw_info->ver = adev->psp.ras_context.context.bin_desc.fw_version;
+ fw_info->feature = adev->psp.ras_context.context
+ .bin_desc.feature_version;
break;
- case 1:
- fw_info->ver = adev->psp.ta_fw_version;
- fw_info->feature = adev->psp.ta_ras_ucode_version;
+ case TA_FW_TYPE_PSP_HDCP:
+ fw_info->ver = adev->psp.hdcp_context.context.bin_desc.fw_version;
+ fw_info->feature = adev->psp.hdcp_context.context
+ .bin_desc.feature_version;
break;
- case 2:
- fw_info->ver = adev->psp.ta_fw_version;
- fw_info->feature = adev->psp.ta_hdcp_ucode_version;
+ case TA_FW_TYPE_PSP_DTM:
+ fw_info->ver = adev->psp.dtm_context.context.bin_desc.fw_version;
+ fw_info->feature = adev->psp.dtm_context.context
+ .bin_desc.feature_version;
break;
- case 3:
- fw_info->ver = adev->psp.ta_fw_version;
- fw_info->feature = adev->psp.ta_dtm_ucode_version;
+ case TA_FW_TYPE_PSP_RAP:
+ fw_info->ver = adev->psp.rap_context.context.bin_desc.fw_version;
+ fw_info->feature = adev->psp.rap_context.context
+ .bin_desc.feature_version;
+ break;
+ case TA_FW_TYPE_PSP_SECUREDISPLAY:
+ fw_info->ver = adev->psp.securedisplay_context.context.bin_desc.fw_version;
+ fw_info->feature =
+ adev->psp.securedisplay_context.context.bin_desc
+ .feature_version;
break;
default:
return -EINVAL;
@@ -314,12 +324,12 @@ static int amdgpu_firmware_info(struct drm_amdgpu_info_firmware *fw_info,
fw_info->feature = adev->sdma.instance[query_fw->index].feature_version;
break;
case AMDGPU_INFO_FW_SOS:
- fw_info->ver = adev->psp.sos_fw_version;
- fw_info->feature = adev->psp.sos_feature_version;
+ fw_info->ver = adev->psp.sos.fw_version;
+ fw_info->feature = adev->psp.sos.feature_version;
break;
case AMDGPU_INFO_FW_ASD:
- fw_info->ver = adev->psp.asd_fw_version;
- fw_info->feature = adev->psp.asd_feature_version;
+ fw_info->ver = adev->psp.asd_context.bin_desc.fw_version;
+ fw_info->feature = adev->psp.asd_context.bin_desc.feature_version;
break;
case AMDGPU_INFO_FW_DMCU:
fw_info->ver = adev->dm.dmcu_fw_version;
@@ -330,8 +340,30 @@ static int amdgpu_firmware_info(struct drm_amdgpu_info_firmware *fw_info,
fw_info->feature = 0;
break;
case AMDGPU_INFO_FW_TOC:
- fw_info->ver = adev->psp.toc_fw_version;
- fw_info->feature = adev->psp.toc_feature_version;
+ fw_info->ver = adev->psp.toc.fw_version;
+ fw_info->feature = adev->psp.toc.feature_version;
+ break;
+ case AMDGPU_INFO_FW_CAP:
+ fw_info->ver = adev->psp.cap_fw_version;
+ fw_info->feature = adev->psp.cap_feature_version;
+ break;
+ case AMDGPU_INFO_FW_MES_KIQ:
+ fw_info->ver = adev->mes.kiq_version & AMDGPU_MES_VERSION_MASK;
+ fw_info->feature = (adev->mes.kiq_version & AMDGPU_MES_FEAT_VERSION_MASK)
+ >> AMDGPU_MES_FEAT_VERSION_SHIFT;
+ break;
+ case AMDGPU_INFO_FW_MES:
+ fw_info->ver = adev->mes.sched_version & AMDGPU_MES_VERSION_MASK;
+ fw_info->feature = (adev->mes.sched_version & AMDGPU_MES_FEAT_VERSION_MASK)
+ >> AMDGPU_MES_FEAT_VERSION_SHIFT;
+ break;
+ case AMDGPU_INFO_FW_IMU:
+ fw_info->ver = adev->gfx.imu_fw_version;
+ fw_info->feature = 0;
+ break;
+ case AMDGPU_INFO_FW_VPE:
+ fw_info->ver = adev->vpe.fw_version;
+ fw_info->feature = adev->vpe.feature_version;
break;
default:
return -EINVAL;
@@ -339,6 +371,26 @@ static int amdgpu_firmware_info(struct drm_amdgpu_info_firmware *fw_info,
return 0;
}
+static int amdgpu_userq_metadata_info_gfx(struct amdgpu_device *adev,
+ struct drm_amdgpu_info *info,
+ struct drm_amdgpu_info_uq_metadata_gfx *meta)
+{
+ int ret = -EOPNOTSUPP;
+
+ if (adev->gfx.funcs->get_gfx_shadow_info) {
+ struct amdgpu_gfx_shadow_info shadow = {};
+
+ adev->gfx.funcs->get_gfx_shadow_info(adev, &shadow, true);
+ meta->shadow_size = shadow.shadow_size;
+ meta->shadow_alignment = shadow.shadow_alignment;
+ meta->csa_size = shadow.csa_size;
+ meta->csa_alignment = shadow.csa_alignment;
+ ret = 0;
+ }
+
+ return ret;
+}
+
static int amdgpu_hw_ip_info(struct amdgpu_device *adev,
struct drm_amdgpu_info *info,
struct drm_amdgpu_info_hw_ip *result)
@@ -347,6 +399,7 @@ static int amdgpu_hw_ip_info(struct amdgpu_device *adev,
uint32_t ib_size_alignment = 0;
enum amd_ip_block_type type;
unsigned int num_rings = 0;
+ uint32_t num_slots = 0;
unsigned int i, j;
if (info->query_hw_ip.ip_instance >= AMDGPU_HW_IP_INSTANCE_MAX_COUNT)
@@ -356,24 +409,45 @@ static int amdgpu_hw_ip_info(struct amdgpu_device *adev,
case AMDGPU_HW_IP_GFX:
type = AMD_IP_BLOCK_TYPE_GFX;
for (i = 0; i < adev->gfx.num_gfx_rings; i++)
- if (adev->gfx.gfx_ring[i].sched.ready)
+ if (adev->gfx.gfx_ring[i].sched.ready &&
+ !adev->gfx.gfx_ring[i].no_user_submission)
++num_rings;
+
+ if (!adev->gfx.disable_uq) {
+ for (i = 0; i < AMDGPU_MES_MAX_GFX_PIPES; i++)
+ num_slots += hweight32(adev->mes.gfx_hqd_mask[i]);
+ }
+
ib_start_alignment = 32;
ib_size_alignment = 32;
break;
case AMDGPU_HW_IP_COMPUTE:
type = AMD_IP_BLOCK_TYPE_GFX;
for (i = 0; i < adev->gfx.num_compute_rings; i++)
- if (adev->gfx.compute_ring[i].sched.ready)
+ if (adev->gfx.compute_ring[i].sched.ready &&
+ !adev->gfx.compute_ring[i].no_user_submission)
++num_rings;
+
+ if (!adev->sdma.disable_uq) {
+ for (i = 0; i < AMDGPU_MES_MAX_COMPUTE_PIPES; i++)
+ num_slots += hweight32(adev->mes.compute_hqd_mask[i]);
+ }
+
ib_start_alignment = 32;
ib_size_alignment = 32;
break;
case AMDGPU_HW_IP_DMA:
type = AMD_IP_BLOCK_TYPE_SDMA;
for (i = 0; i < adev->sdma.num_instances; i++)
- if (adev->sdma.instance[i].ring.sched.ready)
+ if (adev->sdma.instance[i].ring.sched.ready &&
+ !adev->sdma.instance[i].ring.no_user_submission)
++num_rings;
+
+ if (!adev->gfx.disable_uq) {
+ for (i = 0; i < AMDGPU_MES_MAX_SDMA_PIPES; i++)
+ num_slots += hweight32(adev->mes.sdma_hqd_mask[i]);
+ }
+
ib_start_alignment = 256;
ib_size_alignment = 4;
break;
@@ -383,19 +457,21 @@ static int amdgpu_hw_ip_info(struct amdgpu_device *adev,
if (adev->uvd.harvest_config & (1 << i))
continue;
- if (adev->uvd.inst[i].ring.sched.ready)
+ if (adev->uvd.inst[i].ring.sched.ready &&
+ !adev->uvd.inst[i].ring.no_user_submission)
++num_rings;
}
- ib_start_alignment = 64;
+ ib_start_alignment = 256;
ib_size_alignment = 64;
break;
case AMDGPU_HW_IP_VCE:
type = AMD_IP_BLOCK_TYPE_VCE;
for (i = 0; i < adev->vce.num_rings; i++)
- if (adev->vce.ring[i].sched.ready)
+ if (adev->vce.ring[i].sched.ready &&
+ !adev->vce.ring[i].no_user_submission)
++num_rings;
- ib_start_alignment = 4;
- ib_size_alignment = 1;
+ ib_start_alignment = 256;
+ ib_size_alignment = 4;
break;
case AMDGPU_HW_IP_UVD_ENC:
type = AMD_IP_BLOCK_TYPE_UVD;
@@ -404,36 +480,39 @@ static int amdgpu_hw_ip_info(struct amdgpu_device *adev,
continue;
for (j = 0; j < adev->uvd.num_enc_rings; j++)
- if (adev->uvd.inst[i].ring_enc[j].sched.ready)
+ if (adev->uvd.inst[i].ring_enc[j].sched.ready &&
+ !adev->uvd.inst[i].ring_enc[j].no_user_submission)
++num_rings;
}
- ib_start_alignment = 64;
- ib_size_alignment = 64;
+ ib_start_alignment = 256;
+ ib_size_alignment = 4;
break;
case AMDGPU_HW_IP_VCN_DEC:
type = AMD_IP_BLOCK_TYPE_VCN;
for (i = 0; i < adev->vcn.num_vcn_inst; i++) {
- if (adev->uvd.harvest_config & (1 << i))
+ if (adev->vcn.harvest_config & (1 << i))
continue;
- if (adev->vcn.inst[i].ring_dec.sched.ready)
+ if (adev->vcn.inst[i].ring_dec.sched.ready &&
+ !adev->vcn.inst[i].ring_dec.no_user_submission)
++num_rings;
}
- ib_start_alignment = 16;
- ib_size_alignment = 16;
+ ib_start_alignment = 256;
+ ib_size_alignment = 64;
break;
case AMDGPU_HW_IP_VCN_ENC:
type = AMD_IP_BLOCK_TYPE_VCN;
for (i = 0; i < adev->vcn.num_vcn_inst; i++) {
- if (adev->uvd.harvest_config & (1 << i))
+ if (adev->vcn.harvest_config & (1 << i))
continue;
- for (j = 0; j < adev->vcn.num_enc_rings; j++)
- if (adev->vcn.inst[i].ring_enc[j].sched.ready)
+ for (j = 0; j < adev->vcn.inst[i].num_enc_rings; j++)
+ if (adev->vcn.inst[i].ring_enc[j].sched.ready &&
+ !adev->vcn.inst[i].ring_enc[j].no_user_submission)
++num_rings;
}
- ib_start_alignment = 64;
- ib_size_alignment = 1;
+ ib_start_alignment = 256;
+ ib_size_alignment = 4;
break;
case AMDGPU_HW_IP_VCN_JPEG:
type = (amdgpu_device_ip_get_ip_block(adev, AMD_IP_BLOCK_TYPE_JPEG)) ?
@@ -443,11 +522,21 @@ static int amdgpu_hw_ip_info(struct amdgpu_device *adev,
if (adev->jpeg.harvest_config & (1 << i))
continue;
- if (adev->jpeg.inst[i].ring_dec.sched.ready)
- ++num_rings;
+ for (j = 0; j < adev->jpeg.num_jpeg_rings; j++)
+ if (adev->jpeg.inst[i].ring_dec[j].sched.ready &&
+ !adev->jpeg.inst[i].ring_dec[j].no_user_submission)
+ ++num_rings;
}
- ib_start_alignment = 16;
- ib_size_alignment = 16;
+ ib_start_alignment = 256;
+ ib_size_alignment = 64;
+ break;
+ case AMDGPU_HW_IP_VPE:
+ type = AMD_IP_BLOCK_TYPE_VPE;
+ if (adev->vpe.ring.sched.ready &&
+ !adev->vpe.ring.no_user_submission)
+ ++num_rings;
+ ib_start_alignment = 256;
+ ib_size_alignment = 4;
break;
default:
return -EINVAL;
@@ -466,8 +555,41 @@ static int amdgpu_hw_ip_info(struct amdgpu_device *adev,
result->hw_ip_version_major = adev->ip_blocks[i].version->major;
result->hw_ip_version_minor = adev->ip_blocks[i].version->minor;
+
+ if (adev->asic_type >= CHIP_VEGA10) {
+ switch (type) {
+ case AMD_IP_BLOCK_TYPE_GFX:
+ result->ip_discovery_version =
+ IP_VERSION_MAJ_MIN_REV(amdgpu_ip_version(adev, GC_HWIP, 0));
+ break;
+ case AMD_IP_BLOCK_TYPE_SDMA:
+ result->ip_discovery_version =
+ IP_VERSION_MAJ_MIN_REV(amdgpu_ip_version(adev, SDMA0_HWIP, 0));
+ break;
+ case AMD_IP_BLOCK_TYPE_UVD:
+ case AMD_IP_BLOCK_TYPE_VCN:
+ case AMD_IP_BLOCK_TYPE_JPEG:
+ result->ip_discovery_version =
+ IP_VERSION_MAJ_MIN_REV(amdgpu_ip_version(adev, UVD_HWIP, 0));
+ break;
+ case AMD_IP_BLOCK_TYPE_VCE:
+ result->ip_discovery_version =
+ IP_VERSION_MAJ_MIN_REV(amdgpu_ip_version(adev, VCE_HWIP, 0));
+ break;
+ case AMD_IP_BLOCK_TYPE_VPE:
+ result->ip_discovery_version =
+ IP_VERSION_MAJ_MIN_REV(amdgpu_ip_version(adev, VPE_HWIP, 0));
+ break;
+ default:
+ result->ip_discovery_version = 0;
+ break;
+ }
+ } else {
+ result->ip_discovery_version = 0;
+ }
result->capabilities_flags = 0;
result->available_rings = (1 << num_rings) - 1;
+ result->userq_num_slots = num_slots;
result->ib_start_alignment = ib_start_alignment;
result->ib_size_alignment = ib_size_alignment;
return 0;
@@ -494,11 +616,16 @@ int amdgpu_info_ioctl(struct drm_device *dev, void *data, struct drm_file *filp)
struct drm_amdgpu_info *info = data;
struct amdgpu_mode_info *minfo = &adev->mode_info;
void __user *out = (void __user *)(uintptr_t)info->return_pointer;
+ struct amdgpu_fpriv *fpriv;
+ struct amdgpu_ip_block *ip_block;
+ enum amd_ip_block_type type;
+ struct amdgpu_xcp *xcp;
+ u32 count, inst_mask;
uint32_t size = info->return_size;
struct drm_crtc *crtc;
uint32_t ui32 = 0;
uint64_t ui64 = 0;
- int i, found;
+ int i, found, ret;
int ui32_size = sizeof(ui32);
if (!info->return_size || !info->return_pointer)
@@ -513,6 +640,7 @@ int amdgpu_info_ioctl(struct drm_device *dev, void *data, struct drm_file *filp)
crtc = (struct drm_crtc *)minfo->crtcs[i];
if (crtc && crtc->base.id == info->mode_crtc.id) {
struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc);
+
ui32 = amdgpu_crtc->crtc_id;
found = 1;
break;
@@ -525,56 +653,82 @@ int amdgpu_info_ioctl(struct drm_device *dev, void *data, struct drm_file *filp)
return copy_to_user(out, &ui32, min(size, 4u)) ? -EFAULT : 0;
case AMDGPU_INFO_HW_IP_INFO: {
struct drm_amdgpu_info_hw_ip ip = {};
- int ret;
ret = amdgpu_hw_ip_info(adev, info, &ip);
if (ret)
return ret;
- ret = copy_to_user(out, &ip, min((size_t)size, sizeof(ip)));
+ ret = copy_to_user(out, &ip, min_t(size_t, size, sizeof(ip)));
return ret ? -EFAULT : 0;
}
case AMDGPU_INFO_HW_IP_COUNT: {
- enum amd_ip_block_type type;
- uint32_t count = 0;
+ fpriv = (struct amdgpu_fpriv *)filp->driver_priv;
+ type = amdgpu_ip_get_block_type(adev, info->query_hw_ip.type);
+ ip_block = amdgpu_device_ip_get_ip_block(adev, type);
- switch (info->query_hw_ip.type) {
- case AMDGPU_HW_IP_GFX:
- type = AMD_IP_BLOCK_TYPE_GFX;
- break;
- case AMDGPU_HW_IP_COMPUTE:
- type = AMD_IP_BLOCK_TYPE_GFX;
- break;
- case AMDGPU_HW_IP_DMA:
- type = AMD_IP_BLOCK_TYPE_SDMA;
- break;
- case AMDGPU_HW_IP_UVD:
- type = AMD_IP_BLOCK_TYPE_UVD;
+ if (!ip_block || !ip_block->status.valid)
+ return -EINVAL;
+
+ if (adev->xcp_mgr && adev->xcp_mgr->num_xcps > 0 &&
+ fpriv->xcp_id < adev->xcp_mgr->num_xcps) {
+ xcp = &adev->xcp_mgr->xcp[fpriv->xcp_id];
+ switch (type) {
+ case AMD_IP_BLOCK_TYPE_GFX:
+ ret = amdgpu_xcp_get_inst_details(xcp, AMDGPU_XCP_GFX, &inst_mask);
+ if (ret)
+ return ret;
+ count = hweight32(inst_mask);
+ break;
+ case AMD_IP_BLOCK_TYPE_SDMA:
+ ret = amdgpu_xcp_get_inst_details(xcp, AMDGPU_XCP_SDMA, &inst_mask);
+ if (ret)
+ return ret;
+ count = hweight32(inst_mask);
+ break;
+ case AMD_IP_BLOCK_TYPE_JPEG:
+ ret = amdgpu_xcp_get_inst_details(xcp, AMDGPU_XCP_VCN, &inst_mask);
+ if (ret)
+ return ret;
+ count = hweight32(inst_mask) * adev->jpeg.num_jpeg_rings;
+ break;
+ case AMD_IP_BLOCK_TYPE_VCN:
+ ret = amdgpu_xcp_get_inst_details(xcp, AMDGPU_XCP_VCN, &inst_mask);
+ if (ret)
+ return ret;
+ count = hweight32(inst_mask);
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ return copy_to_user(out, &count, min(size, 4u)) ? -EFAULT : 0;
+ }
+
+ switch (type) {
+ case AMD_IP_BLOCK_TYPE_GFX:
+ case AMD_IP_BLOCK_TYPE_VCE:
+ count = 1;
break;
- case AMDGPU_HW_IP_VCE:
- type = AMD_IP_BLOCK_TYPE_VCE;
+ case AMD_IP_BLOCK_TYPE_SDMA:
+ count = adev->sdma.num_instances;
break;
- case AMDGPU_HW_IP_UVD_ENC:
- type = AMD_IP_BLOCK_TYPE_UVD;
+ case AMD_IP_BLOCK_TYPE_JPEG:
+ count = adev->jpeg.num_jpeg_inst * adev->jpeg.num_jpeg_rings;
break;
- case AMDGPU_HW_IP_VCN_DEC:
- case AMDGPU_HW_IP_VCN_ENC:
- type = AMD_IP_BLOCK_TYPE_VCN;
+ case AMD_IP_BLOCK_TYPE_VCN:
+ count = adev->vcn.num_vcn_inst;
break;
- case AMDGPU_HW_IP_VCN_JPEG:
- type = (amdgpu_device_ip_get_ip_block(adev, AMD_IP_BLOCK_TYPE_JPEG)) ?
- AMD_IP_BLOCK_TYPE_JPEG : AMD_IP_BLOCK_TYPE_VCN;
+ case AMD_IP_BLOCK_TYPE_UVD:
+ count = adev->uvd.num_uvd_inst;
break;
+ /* For all other IP block types not listed in the switch statement
+ * the ip status is valid here and the instance count is one.
+ */
default:
- return -EINVAL;
+ count = 1;
+ break;
}
- for (i = 0; i < adev->num_ip_blocks; i++)
- if (adev->ip_blocks[i].version->type == type &&
- adev->ip_blocks[i].status.valid &&
- count < AMDGPU_HW_IP_INSTANCE_MAX_COUNT)
- count++;
-
return copy_to_user(out, &count, min(size, 4u)) ? -EFAULT : 0;
}
case AMDGPU_INFO_TIMESTAMP:
@@ -582,7 +736,6 @@ int amdgpu_info_ioctl(struct drm_device *dev, void *data, struct drm_file *filp)
return copy_to_user(out, &ui64, min(size, 8u)) ? -EFAULT : 0;
case AMDGPU_INFO_FW_VERSION: {
struct drm_amdgpu_info_firmware fw_info;
- int ret;
/* We only support one instance of each IP block right now. */
if (info->query_fw.ip_instance != 0)
@@ -605,13 +758,14 @@ int amdgpu_info_ioctl(struct drm_device *dev, void *data, struct drm_file *filp)
ui64 = atomic64_read(&adev->num_vram_cpu_page_faults);
return copy_to_user(out, &ui64, min(size, 8u)) ? -EFAULT : 0;
case AMDGPU_INFO_VRAM_USAGE:
- ui64 = amdgpu_vram_mgr_usage(ttm_manager_type(&adev->mman.bdev, TTM_PL_VRAM));
+ ui64 = ttm_resource_manager_used(&adev->mman.vram_mgr.manager) ?
+ ttm_resource_manager_usage(&adev->mman.vram_mgr.manager) : 0;
return copy_to_user(out, &ui64, min(size, 8u)) ? -EFAULT : 0;
case AMDGPU_INFO_VIS_VRAM_USAGE:
- ui64 = amdgpu_vram_mgr_vis_usage(ttm_manager_type(&adev->mman.bdev, TTM_PL_VRAM));
+ ui64 = amdgpu_vram_mgr_vis_usage(&adev->mman.vram_mgr);
return copy_to_user(out, &ui64, min(size, 8u)) ? -EFAULT : 0;
case AMDGPU_INFO_GTT_USAGE:
- ui64 = amdgpu_gtt_mgr_usage(ttm_manager_type(&adev->mman.bdev, TTM_PL_TT));
+ ui64 = ttm_resource_manager_usage(&adev->mman.gtt_mgr.manager);
return copy_to_user(out, &ui64, min(size, 8u)) ? -EFAULT : 0;
case AMDGPU_INFO_GDS_CONFIG: {
struct drm_amdgpu_info_gds gds_info;
@@ -635,24 +789,24 @@ int amdgpu_info_ioctl(struct drm_device *dev, void *data, struct drm_file *filp)
atomic64_read(&adev->visible_pin_size),
vram_gtt.vram_size);
vram_gtt.gtt_size = ttm_manager_type(&adev->mman.bdev, TTM_PL_TT)->size;
- vram_gtt.gtt_size *= PAGE_SIZE;
vram_gtt.gtt_size -= atomic64_read(&adev->gart_pin_size);
return copy_to_user(out, &vram_gtt,
min((size_t)size, sizeof(vram_gtt))) ? -EFAULT : 0;
}
case AMDGPU_INFO_MEMORY: {
struct drm_amdgpu_memory_info mem;
- struct ttm_resource_manager *vram_man =
- ttm_manager_type(&adev->mman.bdev, TTM_PL_VRAM);
struct ttm_resource_manager *gtt_man =
- ttm_manager_type(&adev->mman.bdev, TTM_PL_TT);
+ &adev->mman.gtt_mgr.manager;
+ struct ttm_resource_manager *vram_man =
+ &adev->mman.vram_mgr.manager;
+
memset(&mem, 0, sizeof(mem));
mem.vram.total_heap_size = adev->gmc.real_vram_size;
mem.vram.usable_heap_size = adev->gmc.real_vram_size -
atomic64_read(&adev->vram_pin_size) -
AMDGPU_VM_RESERVED_VRAM;
- mem.vram.heap_usage =
- amdgpu_vram_mgr_usage(vram_man);
+ mem.vram.heap_usage = ttm_resource_manager_used(&adev->mman.vram_mgr.manager) ?
+ ttm_resource_manager_usage(vram_man) : 0;
mem.vram.max_allocation = mem.vram.usable_heap_size * 3 / 4;
mem.cpu_accessible_vram.total_heap_size =
@@ -662,16 +816,14 @@ int amdgpu_info_ioctl(struct drm_device *dev, void *data, struct drm_file *filp)
atomic64_read(&adev->visible_pin_size),
mem.vram.usable_heap_size);
mem.cpu_accessible_vram.heap_usage =
- amdgpu_vram_mgr_vis_usage(vram_man);
+ amdgpu_vram_mgr_vis_usage(&adev->mman.vram_mgr);
mem.cpu_accessible_vram.max_allocation =
mem.cpu_accessible_vram.usable_heap_size * 3 / 4;
mem.gtt.total_heap_size = gtt_man->size;
- mem.gtt.total_heap_size *= PAGE_SIZE;
mem.gtt.usable_heap_size = mem.gtt.total_heap_size -
atomic64_read(&adev->gart_pin_size);
- mem.gtt.heap_usage =
- amdgpu_gtt_mgr_usage(gtt_man);
+ mem.gtt.heap_usage = ttm_resource_manager_usage(gtt_man);
mem.gtt.max_allocation = mem.gtt.usable_heap_size * 3 / 4;
return copy_to_user(out, &mem,
@@ -679,32 +831,47 @@ int amdgpu_info_ioctl(struct drm_device *dev, void *data, struct drm_file *filp)
? -EFAULT : 0;
}
case AMDGPU_INFO_READ_MMR_REG: {
- unsigned n, alloc_size;
+ int ret = 0;
+ unsigned int n, alloc_size;
uint32_t *regs;
- unsigned se_num = (info->read_mmr_reg.instance >>
+ unsigned int se_num = (info->read_mmr_reg.instance >>
AMDGPU_INFO_MMR_SE_INDEX_SHIFT) &
AMDGPU_INFO_MMR_SE_INDEX_MASK;
- unsigned sh_num = (info->read_mmr_reg.instance >>
+ unsigned int sh_num = (info->read_mmr_reg.instance >>
AMDGPU_INFO_MMR_SH_INDEX_SHIFT) &
AMDGPU_INFO_MMR_SH_INDEX_MASK;
+ if (!down_read_trylock(&adev->reset_domain->sem))
+ return -ENOENT;
+
/* set full masks if the userspace set all bits
- * in the bitfields */
- if (se_num == AMDGPU_INFO_MMR_SE_INDEX_MASK)
+ * in the bitfields
+ */
+ if (se_num == AMDGPU_INFO_MMR_SE_INDEX_MASK) {
se_num = 0xffffffff;
- else if (se_num >= AMDGPU_GFX_MAX_SE)
- return -EINVAL;
- if (sh_num == AMDGPU_INFO_MMR_SH_INDEX_MASK)
+ } else if (se_num >= AMDGPU_GFX_MAX_SE) {
+ ret = -EINVAL;
+ goto out;
+ }
+
+ if (sh_num == AMDGPU_INFO_MMR_SH_INDEX_MASK) {
sh_num = 0xffffffff;
- else if (sh_num >= AMDGPU_GFX_MAX_SH_PER_SE)
- return -EINVAL;
+ } else if (sh_num >= AMDGPU_GFX_MAX_SH_PER_SE) {
+ ret = -EINVAL;
+ goto out;
+ }
- if (info->read_mmr_reg.count > 128)
- return -EINVAL;
+ if (info->read_mmr_reg.count > 128) {
+ ret = -EINVAL;
+ goto out;
+ }
regs = kmalloc_array(info->read_mmr_reg.count, sizeof(*regs), GFP_KERNEL);
- if (!regs)
- return -ENOMEM;
+ if (!regs) {
+ ret = -ENOMEM;
+ goto out;
+ }
+
alloc_size = info->read_mmr_reg.count * sizeof(*regs);
amdgpu_gfx_off_ctrl(adev, false);
@@ -716,18 +883,22 @@ int amdgpu_info_ioctl(struct drm_device *dev, void *data, struct drm_file *filp)
info->read_mmr_reg.dword_offset + i);
kfree(regs);
amdgpu_gfx_off_ctrl(adev, true);
- return -EFAULT;
+ ret = -EFAULT;
+ goto out;
}
}
amdgpu_gfx_off_ctrl(adev, true);
n = copy_to_user(out, regs, min(size, alloc_size));
kfree(regs);
- return n ? -EFAULT : 0;
+ ret = (n ? -EFAULT : 0);
+out:
+ up_read(&adev->reset_domain->sem);
+ return ret;
}
case AMDGPU_INFO_DEV_INFO: {
struct drm_amdgpu_info_device *dev_info;
uint64_t vm_size;
- int ret;
+ uint32_t pcie_gen_mask, pcie_width_mask;
dev_info = kzalloc(sizeof(*dev_info), GFP_KERNEL);
if (!dev_info)
@@ -745,32 +916,52 @@ int amdgpu_info_ioctl(struct drm_device *dev, void *data, struct drm_file *filp)
if (adev->pm.dpm_enabled) {
dev_info->max_engine_clock = amdgpu_dpm_get_sclk(adev, false) * 10;
dev_info->max_memory_clock = amdgpu_dpm_get_mclk(adev, false) * 10;
+ dev_info->min_engine_clock = amdgpu_dpm_get_sclk(adev, true) * 10;
+ dev_info->min_memory_clock = amdgpu_dpm_get_mclk(adev, true) * 10;
} else {
- dev_info->max_engine_clock = adev->clock.default_sclk * 10;
- dev_info->max_memory_clock = adev->clock.default_mclk * 10;
+ dev_info->max_engine_clock =
+ dev_info->min_engine_clock =
+ adev->clock.default_sclk * 10;
+ dev_info->max_memory_clock =
+ dev_info->min_memory_clock =
+ adev->clock.default_mclk * 10;
}
dev_info->enabled_rb_pipes_mask = adev->gfx.config.backend_enable_mask;
dev_info->num_rb_pipes = adev->gfx.config.max_backends_per_se *
adev->gfx.config.max_shader_engines;
dev_info->num_hw_gfx_contexts = adev->gfx.config.max_hw_contexts;
- dev_info->_pad = 0;
dev_info->ids_flags = 0;
if (adev->flags & AMD_IS_APU)
dev_info->ids_flags |= AMDGPU_IDS_FLAGS_FUSION;
- if (amdgpu_mcbp || amdgpu_sriov_vf(adev))
+ if (adev->gfx.mcbp)
dev_info->ids_flags |= AMDGPU_IDS_FLAGS_PREEMPTION;
if (amdgpu_is_tmz(adev))
dev_info->ids_flags |= AMDGPU_IDS_FLAGS_TMZ;
+ if (adev->gfx.config.ta_cntl2_truncate_coord_mode)
+ dev_info->ids_flags |= AMDGPU_IDS_FLAGS_CONFORMANT_TRUNC_COORD;
+
+ /* Gang submit is not supported under SRIOV currently */
+ if (!amdgpu_sriov_vf(adev))
+ dev_info->ids_flags |= AMDGPU_IDS_FLAGS_GANG_SUBMIT;
+
+ if (amdgpu_passthrough(adev))
+ dev_info->ids_flags |= (AMDGPU_IDS_FLAGS_MODE_PT <<
+ AMDGPU_IDS_FLAGS_MODE_SHIFT) &
+ AMDGPU_IDS_FLAGS_MODE_MASK;
+ else if (amdgpu_sriov_vf(adev))
+ dev_info->ids_flags |= (AMDGPU_IDS_FLAGS_MODE_VF <<
+ AMDGPU_IDS_FLAGS_MODE_SHIFT) &
+ AMDGPU_IDS_FLAGS_MODE_MASK;
vm_size = adev->vm_manager.max_pfn * AMDGPU_GPU_PAGE_SIZE;
- vm_size -= AMDGPU_VA_RESERVED_SIZE;
+ vm_size -= AMDGPU_VA_RESERVED_TOP;
/* Older VCE FW versions are buggy and can handle only 40bits */
if (adev->vce.fw_version &&
adev->vce.fw_version < AMDGPU_VCE_FW_53_45)
vm_size = min(vm_size, 1ULL << 40);
- dev_info->virtual_address_offset = AMDGPU_VA_RESERVED_SIZE;
+ dev_info->virtual_address_offset = AMDGPU_VA_RESERVED_BOTTOM;
dev_info->virtual_address_max =
min(vm_size, AMDGPU_GMC_HOLE_START);
@@ -778,16 +969,16 @@ int amdgpu_info_ioctl(struct drm_device *dev, void *data, struct drm_file *filp)
dev_info->high_va_offset = AMDGPU_GMC_HOLE_END;
dev_info->high_va_max = AMDGPU_GMC_HOLE_END | vm_size;
}
- dev_info->virtual_address_alignment = max((int)PAGE_SIZE, AMDGPU_GPU_PAGE_SIZE);
+ dev_info->virtual_address_alignment = max_t(u32, PAGE_SIZE, AMDGPU_GPU_PAGE_SIZE);
dev_info->pte_fragment_size = (1 << adev->vm_manager.fragment_size) * AMDGPU_GPU_PAGE_SIZE;
- dev_info->gart_page_size = AMDGPU_GPU_PAGE_SIZE;
+ dev_info->gart_page_size = max_t(u32, PAGE_SIZE, AMDGPU_GPU_PAGE_SIZE);
dev_info->cu_active_number = adev->gfx.cu_info.number;
dev_info->cu_ao_mask = adev->gfx.cu_info.ao_cu_mask;
dev_info->ce_ram_size = adev->gfx.ce_ram_size;
memcpy(&dev_info->cu_ao_bitmap[0], &adev->gfx.cu_info.ao_cu_bitmap[0],
sizeof(adev->gfx.cu_info.ao_cu_bitmap));
memcpy(&dev_info->cu_bitmap[0], &adev->gfx.cu_info.bitmap[0],
- sizeof(adev->gfx.cu_info.bitmap));
+ sizeof(dev_info->cu_bitmap));
dev_info->vram_type = adev->gmc.vram_type;
dev_info->vram_bit_width = adev->gmc.vram_width;
dev_info->vce_harvest_config = adev->vce.harvest_config;
@@ -807,13 +998,51 @@ int amdgpu_info_ioctl(struct drm_device *dev, void *data, struct drm_file *filp)
dev_info->tcc_disabled_mask = adev->gfx.config.tcc_disabled_mask;
+ /* Combine the chip gen mask with the platform (CPU/mobo) mask. */
+ pcie_gen_mask = adev->pm.pcie_gen_mask &
+ (adev->pm.pcie_gen_mask >> CAIL_PCIE_LINK_SPEED_SUPPORT_SHIFT);
+ pcie_width_mask = adev->pm.pcie_mlw_mask &
+ (adev->pm.pcie_mlw_mask >> CAIL_PCIE_LINK_WIDTH_SUPPORT_SHIFT);
+ dev_info->pcie_gen = fls(pcie_gen_mask);
+ dev_info->pcie_num_lanes =
+ pcie_width_mask & CAIL_ASIC_PCIE_LINK_WIDTH_SUPPORT_X32 ? 32 :
+ pcie_width_mask & CAIL_ASIC_PCIE_LINK_WIDTH_SUPPORT_X16 ? 16 :
+ pcie_width_mask & CAIL_ASIC_PCIE_LINK_WIDTH_SUPPORT_X12 ? 12 :
+ pcie_width_mask & CAIL_ASIC_PCIE_LINK_WIDTH_SUPPORT_X8 ? 8 :
+ pcie_width_mask & CAIL_ASIC_PCIE_LINK_WIDTH_SUPPORT_X4 ? 4 :
+ pcie_width_mask & CAIL_ASIC_PCIE_LINK_WIDTH_SUPPORT_X2 ? 2 : 1;
+
+ dev_info->tcp_cache_size = adev->gfx.config.gc_tcp_l1_size;
+ dev_info->num_sqc_per_wgp = adev->gfx.config.gc_num_sqc_per_wgp;
+ dev_info->sqc_data_cache_size = adev->gfx.config.gc_l1_data_cache_size_per_sqc;
+ dev_info->sqc_inst_cache_size = adev->gfx.config.gc_l1_instruction_cache_size_per_sqc;
+ dev_info->gl1c_cache_size = adev->gfx.config.gc_gl1c_size_per_instance *
+ adev->gfx.config.gc_gl1c_per_sa;
+ dev_info->gl2c_cache_size = adev->gfx.config.gc_gl2c_per_gpu;
+ dev_info->mall_size = adev->gmc.mall_size;
+
+
+ if (adev->gfx.funcs->get_gfx_shadow_info) {
+ struct amdgpu_gfx_shadow_info shadow_info;
+
+ ret = amdgpu_gfx_get_gfx_shadow_info(adev, &shadow_info);
+ if (!ret) {
+ dev_info->shadow_size = shadow_info.shadow_size;
+ dev_info->shadow_alignment = shadow_info.shadow_alignment;
+ dev_info->csa_size = shadow_info.csa_size;
+ dev_info->csa_alignment = shadow_info.csa_alignment;
+ }
+ }
+
+ dev_info->userq_ip_mask = amdgpu_userq_get_supported_ip_mask(adev);
+
ret = copy_to_user(out, dev_info,
min((size_t)size, sizeof(*dev_info))) ? -EFAULT : 0;
kfree(dev_info);
return ret;
}
case AMDGPU_INFO_VCE_CLOCK_TABLE: {
- unsigned i;
+ unsigned int i;
struct drm_amdgpu_info_vce_clock_table vce_clk_table = {};
struct amd_vce_state *vce_state;
@@ -850,6 +1079,26 @@ int amdgpu_info_ioctl(struct drm_device *dev, void *data, struct drm_file *filp)
min((size_t)size, (size_t)(bios_size - bios_offset)))
? -EFAULT : 0;
}
+ case AMDGPU_INFO_VBIOS_INFO: {
+ struct drm_amdgpu_info_vbios vbios_info = {};
+ struct atom_context *atom_context;
+
+ atom_context = adev->mode_info.atom_context;
+ if (atom_context) {
+ memcpy(vbios_info.name, atom_context->name,
+ sizeof(atom_context->name));
+ memcpy(vbios_info.vbios_pn, atom_context->vbios_pn,
+ sizeof(atom_context->vbios_pn));
+ vbios_info.version = atom_context->version;
+ memcpy(vbios_info.vbios_ver_str, atom_context->vbios_ver_str,
+ sizeof(atom_context->vbios_ver_str));
+ memcpy(vbios_info.date, atom_context->date,
+ sizeof(atom_context->date));
+ }
+
+ return copy_to_user(out, &vbios_info,
+ min((size_t)size, sizeof(vbios_info))) ? -EFAULT : 0;
+ }
default:
DRM_DEBUG_KMS("Invalid request %d\n",
info->vbios_info.type);
@@ -919,7 +1168,21 @@ int amdgpu_info_ioctl(struct drm_device *dev, void *data, struct drm_file *filp)
case AMDGPU_INFO_SENSOR_GPU_AVG_POWER:
/* get average GPU power */
if (amdgpu_dpm_read_sensor(adev,
- AMDGPU_PP_SENSOR_GPU_POWER,
+ AMDGPU_PP_SENSOR_GPU_AVG_POWER,
+ (void *)&ui32, &ui32_size)) {
+ /* fall back to input power for backwards compat */
+ if (amdgpu_dpm_read_sensor(adev,
+ AMDGPU_PP_SENSOR_GPU_INPUT_POWER,
+ (void *)&ui32, &ui32_size)) {
+ return -EINVAL;
+ }
+ }
+ ui32 >>= 8;
+ break;
+ case AMDGPU_INFO_SENSOR_GPU_INPUT_POWER:
+ /* get input GPU power */
+ if (amdgpu_dpm_read_sensor(adev,
+ AMDGPU_PP_SENSOR_GPU_INPUT_POWER,
(void *)&ui32, &ui32_size)) {
return -EINVAL;
}
@@ -959,6 +1222,24 @@ int amdgpu_info_ioctl(struct drm_device *dev, void *data, struct drm_file *filp)
}
ui32 /= 100;
break;
+ case AMDGPU_INFO_SENSOR_PEAK_PSTATE_GFX_SCLK:
+ /* get peak pstate sclk in Mhz */
+ if (amdgpu_dpm_read_sensor(adev,
+ AMDGPU_PP_SENSOR_PEAK_PSTATE_SCLK,
+ (void *)&ui32, &ui32_size)) {
+ return -EINVAL;
+ }
+ ui32 /= 100;
+ break;
+ case AMDGPU_INFO_SENSOR_PEAK_PSTATE_GFX_MCLK:
+ /* get peak pstate mclk in Mhz */
+ if (amdgpu_dpm_read_sensor(adev,
+ AMDGPU_PP_SENSOR_PEAK_PSTATE_MCLK,
+ (void *)&ui32, &ui32_size)) {
+ return -EINVAL;
+ }
+ ui32 /= 100;
+ break;
default:
DRM_DEBUG_KMS("Invalid request %d\n",
info->sensor_info.type);
@@ -975,12 +1256,117 @@ int amdgpu_info_ioctl(struct drm_device *dev, void *data, struct drm_file *filp)
if (!ras)
return -EINVAL;
- ras_mask = (uint64_t)ras->supported << 32 | ras->features;
+ ras_mask = (uint64_t)adev->ras_enabled << 32 | ras->features;
return copy_to_user(out, &ras_mask,
min_t(u64, size, sizeof(ras_mask))) ?
-EFAULT : 0;
}
+ case AMDGPU_INFO_VIDEO_CAPS: {
+ const struct amdgpu_video_codecs *codecs;
+ struct drm_amdgpu_info_video_caps *caps;
+ int r;
+
+ if (!adev->asic_funcs->query_video_codecs)
+ return -EINVAL;
+
+ switch (info->video_cap.type) {
+ case AMDGPU_INFO_VIDEO_CAPS_DECODE:
+ r = amdgpu_asic_query_video_codecs(adev, false, &codecs);
+ if (r)
+ return -EINVAL;
+ break;
+ case AMDGPU_INFO_VIDEO_CAPS_ENCODE:
+ r = amdgpu_asic_query_video_codecs(adev, true, &codecs);
+ if (r)
+ return -EINVAL;
+ break;
+ default:
+ DRM_DEBUG_KMS("Invalid request %d\n",
+ info->video_cap.type);
+ return -EINVAL;
+ }
+
+ caps = kzalloc(sizeof(*caps), GFP_KERNEL);
+ if (!caps)
+ return -ENOMEM;
+
+ for (i = 0; i < codecs->codec_count; i++) {
+ int idx = codecs->codec_array[i].codec_type;
+
+ switch (idx) {
+ case AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG2:
+ case AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4:
+ case AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_VC1:
+ case AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4_AVC:
+ case AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_HEVC:
+ case AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_JPEG:
+ case AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_VP9:
+ case AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_AV1:
+ caps->codec_info[idx].valid = 1;
+ caps->codec_info[idx].max_width =
+ codecs->codec_array[i].max_width;
+ caps->codec_info[idx].max_height =
+ codecs->codec_array[i].max_height;
+ caps->codec_info[idx].max_pixels_per_frame =
+ codecs->codec_array[i].max_pixels_per_frame;
+ caps->codec_info[idx].max_level =
+ codecs->codec_array[i].max_level;
+ break;
+ default:
+ break;
+ }
+ }
+ r = copy_to_user(out, caps,
+ min((size_t)size, sizeof(*caps))) ? -EFAULT : 0;
+ kfree(caps);
+ return r;
+ }
+ case AMDGPU_INFO_MAX_IBS: {
+ uint32_t max_ibs[AMDGPU_HW_IP_NUM];
+
+ for (i = 0; i < AMDGPU_HW_IP_NUM; ++i)
+ max_ibs[i] = amdgpu_ring_max_ibs(i);
+
+ return copy_to_user(out, max_ibs,
+ min((size_t)size, sizeof(max_ibs))) ? -EFAULT : 0;
+ }
+ case AMDGPU_INFO_GPUVM_FAULT: {
+ struct amdgpu_fpriv *fpriv = filp->driver_priv;
+ struct amdgpu_vm *vm = &fpriv->vm;
+ struct drm_amdgpu_info_gpuvm_fault gpuvm_fault;
+ unsigned long flags;
+
+ if (!vm)
+ return -EINVAL;
+
+ memset(&gpuvm_fault, 0, sizeof(gpuvm_fault));
+
+ xa_lock_irqsave(&adev->vm_manager.pasids, flags);
+ gpuvm_fault.addr = vm->fault_info.addr;
+ gpuvm_fault.status = vm->fault_info.status;
+ gpuvm_fault.vmhub = vm->fault_info.vmhub;
+ xa_unlock_irqrestore(&adev->vm_manager.pasids, flags);
+
+ return copy_to_user(out, &gpuvm_fault,
+ min((size_t)size, sizeof(gpuvm_fault))) ? -EFAULT : 0;
+ }
+ case AMDGPU_INFO_UQ_FW_AREAS: {
+ struct drm_amdgpu_info_uq_metadata meta_info = {};
+
+ switch (info->query_hw_ip.type) {
+ case AMDGPU_HW_IP_GFX:
+ ret = amdgpu_userq_metadata_info_gfx(adev, info, &meta_info.gfx);
+ if (ret)
+ return ret;
+
+ ret = copy_to_user(out, &meta_info,
+ min((size_t)size, sizeof(meta_info))) ? -EFAULT : 0;
+ return 0;
+ default:
+ return -EINVAL;
+ }
+ }
default:
DRM_DEBUG_KMS("Invalid request %d\n", info->query);
return -EINVAL;
@@ -988,23 +1374,6 @@ int amdgpu_info_ioctl(struct drm_device *dev, void *data, struct drm_file *filp)
return 0;
}
-
-/*
- * Outdated mess for old drm with Xorg being in charge (void function now).
- */
-/**
- * amdgpu_driver_lastclose_kms - drm callback for last close
- *
- * @dev: drm dev pointer
- *
- * Switch vga_switcheroo state after last close (all asics).
- */
-void amdgpu_driver_lastclose_kms(struct drm_device *dev)
-{
- drm_fb_helper_lastclose(dev);
- vga_switcheroo_process_delayed_switch();
-}
-
/**
* amdgpu_driver_open_kms - drm callback for open
*
@@ -1046,7 +1415,14 @@ int amdgpu_driver_open_kms(struct drm_device *dev, struct drm_file *file_priv)
dev_warn(adev->dev, "No more PASIDs available!");
pasid = 0;
}
- r = amdgpu_vm_init(adev, &fpriv->vm, AMDGPU_VM_CONTEXT_GFX, pasid);
+
+ r = amdgpu_xcp_open_device(adev, fpriv, file_priv);
+ if (r)
+ goto error_pasid;
+
+ amdgpu_debugfs_vm_init(file_priv);
+
+ r = amdgpu_vm_init(adev, &fpriv->vm, fpriv->xcp_id, pasid);
if (r)
goto error_pasid;
@@ -1056,7 +1432,7 @@ int amdgpu_driver_open_kms(struct drm_device *dev, struct drm_file *file_priv)
goto error_vm;
}
- if (amdgpu_mcbp || amdgpu_sriov_vf(adev)) {
+ if (adev->gfx.mcbp) {
uint64_t csa_addr = amdgpu_csa_vaddr(adev) & AMDGPU_GMC_HOLE_MASK;
r = amdgpu_map_static_csa(adev, &fpriv->vm, adev->virt.csa_obj,
@@ -1065,10 +1441,22 @@ int amdgpu_driver_open_kms(struct drm_device *dev, struct drm_file *file_priv)
goto error_vm;
}
+ r = amdgpu_seq64_map(adev, &fpriv->vm, &fpriv->seq64_va);
+ if (r)
+ goto error_vm;
+
mutex_init(&fpriv->bo_list_lock);
- idr_init(&fpriv->bo_list_handles);
+ idr_init_base(&fpriv->bo_list_handles, 1);
+
+ r = amdgpu_userq_mgr_init(&fpriv->userq_mgr, file_priv, adev);
+ if (r)
+ DRM_WARN("Can't setup usermode queues, use legacy workload submission only\n");
- amdgpu_ctx_mgr_init(&fpriv->ctx_mgr);
+ r = amdgpu_eviction_fence_init(&fpriv->evf_mgr);
+ if (r)
+ goto error_vm;
+
+ amdgpu_ctx_mgr_init(&fpriv->ctx_mgr, adev);
file_priv->driver_priv = fpriv;
goto out_suspend;
@@ -1083,7 +1471,6 @@ error_pasid:
kfree(fpriv);
out_suspend:
- pm_runtime_mark_last_busy(dev->dev);
pm_put:
pm_runtime_put_autosuspend(dev->dev);
@@ -1118,18 +1505,22 @@ void amdgpu_driver_postclose_kms(struct drm_device *dev,
if (amdgpu_device_ip_get_ip_block(adev, AMD_IP_BLOCK_TYPE_VCE) != NULL)
amdgpu_vce_free_handles(adev, file_priv);
- amdgpu_vm_bo_rmv(adev, fpriv->prt_va);
+ if (fpriv->csa_va) {
+ uint64_t csa_addr = amdgpu_csa_vaddr(adev) & AMDGPU_GMC_HOLE_MASK;
- if (amdgpu_mcbp || amdgpu_sriov_vf(adev)) {
- /* TODO: how to handle reserve failure */
- BUG_ON(amdgpu_bo_reserve(adev->virt.csa_obj, true));
- amdgpu_vm_bo_rmv(adev, fpriv->csa_va);
+ WARN_ON(amdgpu_unmap_static_csa(adev, &fpriv->vm, adev->virt.csa_obj,
+ fpriv->csa_va, csa_addr));
fpriv->csa_va = NULL;
- amdgpu_bo_unreserve(adev->virt.csa_obj);
}
+ amdgpu_seq64_unmap(adev, fpriv);
+
pasid = fpriv->vm.pasid;
- pd = amdgpu_bo_ref(fpriv->vm.root.base.bo);
+ pd = amdgpu_bo_ref(fpriv->vm.root.bo);
+ if (!WARN_ON(amdgpu_bo_reserve(pd, true))) {
+ amdgpu_vm_bo_del(adev, fpriv->prt_va);
+ amdgpu_bo_unreserve(pd);
+ }
amdgpu_ctx_mgr_fini(&fpriv->ctx_mgr);
amdgpu_vm_fini(adev, &fpriv->vm);
@@ -1147,10 +1538,18 @@ void amdgpu_driver_postclose_kms(struct drm_device *dev,
kfree(fpriv);
file_priv->driver_priv = NULL;
- pm_runtime_mark_last_busy(dev->dev);
pm_runtime_put_autosuspend(dev->dev);
}
+
+void amdgpu_driver_release_kms(struct drm_device *dev)
+{
+ struct amdgpu_device *adev = drm_to_adev(dev);
+
+ amdgpu_device_fini_sw(adev);
+ pci_set_drvdata(adev->pdev, NULL);
+}
+
/*
* VBlank related functions.
*/
@@ -1262,16 +1661,26 @@ void amdgpu_disable_vblank_kms(struct drm_crtc *crtc)
*/
#if defined(CONFIG_DEBUG_FS)
-static int amdgpu_debugfs_firmware_info(struct seq_file *m, void *data)
+static int amdgpu_debugfs_firmware_info_show(struct seq_file *m, void *unused)
{
- struct drm_info_node *node = (struct drm_info_node *) m->private;
- struct drm_device *dev = node->minor->dev;
- struct amdgpu_device *adev = drm_to_adev(dev);
+ struct amdgpu_device *adev = m->private;
struct drm_amdgpu_info_firmware fw_info;
struct drm_amdgpu_query_fw query_fw;
struct atom_context *ctx = adev->mode_info.atom_context;
+ uint8_t smu_program, smu_major, smu_minor, smu_debug;
int ret, i;
+ static const char *ta_fw_name[TA_FW_TYPE_MAX_INDEX] = {
+#define TA_FW_NAME(type)[TA_FW_TYPE_PSP_##type] = #type
+ TA_FW_NAME(XGMI),
+ TA_FW_NAME(RAS),
+ TA_FW_NAME(HDCP),
+ TA_FW_NAME(DTM),
+ TA_FW_NAME(RAP),
+ TA_FW_NAME(SECUREDISPLAY),
+#undef TA_FW_NAME
+ };
+
/* VCE */
query_fw.fw_type = AMDGPU_INFO_FW_VCE;
ret = amdgpu_firmware_info(&fw_info, &query_fw, adev);
@@ -1352,6 +1761,22 @@ static int amdgpu_debugfs_firmware_info(struct seq_file *m, void *data)
seq_printf(m, "RLC SRLS feature version: %u, firmware version: 0x%08x\n",
fw_info.feature, fw_info.ver);
+ /* RLCP */
+ query_fw.fw_type = AMDGPU_INFO_FW_GFX_RLCP;
+ ret = amdgpu_firmware_info(&fw_info, &query_fw, adev);
+ if (ret)
+ return ret;
+ seq_printf(m, "RLCP feature version: %u, firmware version: 0x%08x\n",
+ fw_info.feature, fw_info.ver);
+
+ /* RLCV */
+ query_fw.fw_type = AMDGPU_INFO_FW_GFX_RLCV;
+ ret = amdgpu_firmware_info(&fw_info, &query_fw, adev);
+ if (ret)
+ return ret;
+ seq_printf(m, "RLCV feature version: %u, firmware version: 0x%08x\n",
+ fw_info.feature, fw_info.ver);
+
/* MEC */
query_fw.fw_type = AMDGPU_INFO_FW_GFX_MEC;
query_fw.index = 0;
@@ -1371,6 +1796,15 @@ static int amdgpu_debugfs_firmware_info(struct seq_file *m, void *data)
fw_info.feature, fw_info.ver);
}
+ /* IMU */
+ query_fw.fw_type = AMDGPU_INFO_FW_IMU;
+ query_fw.index = 0;
+ ret = amdgpu_firmware_info(&fw_info, &query_fw, adev);
+ if (ret)
+ return ret;
+ seq_printf(m, "IMU feature version: %u, firmware version: 0x%08x\n",
+ fw_info.feature, fw_info.ver);
+
/* PSP SOS */
query_fw.fw_type = AMDGPU_INFO_FW_SOS;
ret = amdgpu_firmware_info(&fw_info, &query_fw, adev);
@@ -1389,31 +1823,14 @@ static int amdgpu_debugfs_firmware_info(struct seq_file *m, void *data)
fw_info.feature, fw_info.ver);
query_fw.fw_type = AMDGPU_INFO_FW_TA;
- for (i = 0; i < 4; i++) {
+ for (i = TA_FW_TYPE_PSP_XGMI; i < TA_FW_TYPE_MAX_INDEX; i++) {
query_fw.index = i;
ret = amdgpu_firmware_info(&fw_info, &query_fw, adev);
if (ret)
continue;
- switch (query_fw.index) {
- case 0:
- seq_printf(m, "TA %s feature version: 0x%08x, firmware version: 0x%08x\n",
- "RAS", fw_info.feature, fw_info.ver);
- break;
- case 1:
- seq_printf(m, "TA %s feature version: 0x%08x, firmware version: 0x%08x\n",
- "XGMI", fw_info.feature, fw_info.ver);
- break;
- case 2:
- seq_printf(m, "TA %s feature version: 0x%08x, firmware version: 0x%08x\n",
- "HDCP", fw_info.feature, fw_info.ver);
- break;
- case 3:
- seq_printf(m, "TA %s feature version: 0x%08x, firmware version: 0x%08x\n",
- "DTM", fw_info.feature, fw_info.ver);
- break;
- default:
- return -EINVAL;
- }
+
+ seq_printf(m, "TA %s feature version: 0x%08x, firmware version: 0x%08x\n",
+ ta_fw_name[i], fw_info.feature, fw_info.ver);
}
/* SMC */
@@ -1421,8 +1838,12 @@ static int amdgpu_debugfs_firmware_info(struct seq_file *m, void *data)
ret = amdgpu_firmware_info(&fw_info, &query_fw, adev);
if (ret)
return ret;
- seq_printf(m, "SMC feature version: %u, firmware version: 0x%08x\n",
- fw_info.feature, fw_info.ver);
+ smu_program = (fw_info.ver >> 24) & 0xff;
+ smu_major = (fw_info.ver >> 16) & 0xff;
+ smu_minor = (fw_info.ver >> 8) & 0xff;
+ smu_debug = (fw_info.ver >> 0) & 0xff;
+ seq_printf(m, "SMC feature version: %u, program: %d, firmware version: 0x%08x (%d.%d.%d)\n",
+ fw_info.feature, smu_program, fw_info.ver, smu_major, smu_minor, smu_debug);
/* SDMA */
query_fw.fw_type = AMDGPU_INFO_FW_SDMA;
@@ -1467,22 +1888,57 @@ static int amdgpu_debugfs_firmware_info(struct seq_file *m, void *data)
seq_printf(m, "TOC feature version: %u, firmware version: 0x%08x\n",
fw_info.feature, fw_info.ver);
- seq_printf(m, "VBIOS version: %s\n", ctx->vbios_version);
+ /* CAP */
+ if (adev->psp.cap_fw) {
+ query_fw.fw_type = AMDGPU_INFO_FW_CAP;
+ ret = amdgpu_firmware_info(&fw_info, &query_fw, adev);
+ if (ret)
+ return ret;
+ seq_printf(m, "CAP feature version: %u, firmware version: 0x%08x\n",
+ fw_info.feature, fw_info.ver);
+ }
+
+ /* MES_KIQ */
+ query_fw.fw_type = AMDGPU_INFO_FW_MES_KIQ;
+ ret = amdgpu_firmware_info(&fw_info, &query_fw, adev);
+ if (ret)
+ return ret;
+ seq_printf(m, "MES_KIQ feature version: %u, firmware version: 0x%08x\n",
+ fw_info.feature, fw_info.ver);
+
+ /* MES */
+ query_fw.fw_type = AMDGPU_INFO_FW_MES;
+ ret = amdgpu_firmware_info(&fw_info, &query_fw, adev);
+ if (ret)
+ return ret;
+ seq_printf(m, "MES feature version: %u, firmware version: 0x%08x\n",
+ fw_info.feature, fw_info.ver);
+
+ /* VPE */
+ query_fw.fw_type = AMDGPU_INFO_FW_VPE;
+ ret = amdgpu_firmware_info(&fw_info, &query_fw, adev);
+ if (ret)
+ return ret;
+ seq_printf(m, "VPE feature version: %u, firmware version: 0x%08x\n",
+ fw_info.feature, fw_info.ver);
+
+ seq_printf(m, "VBIOS version: %s\n", ctx->vbios_pn);
return 0;
}
-static const struct drm_info_list amdgpu_firmware_info_list[] = {
- {"amdgpu_firmware_info", amdgpu_debugfs_firmware_info, 0, NULL},
-};
+DEFINE_SHOW_ATTRIBUTE(amdgpu_debugfs_firmware_info);
+
#endif
-int amdgpu_debugfs_firmware_init(struct amdgpu_device *adev)
+void amdgpu_debugfs_firmware_init(struct amdgpu_device *adev)
{
#if defined(CONFIG_DEBUG_FS)
- return amdgpu_debugfs_add_files(adev, amdgpu_firmware_info_list,
- ARRAY_SIZE(amdgpu_firmware_info_list));
-#else
- return 0;
+ struct drm_minor *minor = adev_to_drm(adev)->primary;
+ struct dentry *root = minor->debugfs_root;
+
+ debugfs_create_file("amdgpu_firmware_info", 0444, root,
+ adev, &amdgpu_debugfs_firmware_info_fops);
+
#endif
}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_lsdma.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_lsdma.c
new file mode 100644
index 000000000000..4d1d4994ea3f
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_lsdma.c
@@ -0,0 +1,91 @@
+/*
+ * Copyright 2022 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#include "amdgpu.h"
+#include "amdgpu_lsdma.h"
+
+#define AMDGPU_LSDMA_MAX_SIZE 0x2000000ULL
+
+int amdgpu_lsdma_wait_for(struct amdgpu_device *adev,
+ uint32_t reg_index, uint32_t reg_val,
+ uint32_t mask)
+{
+ uint32_t val;
+ int i;
+
+ for (i = 0; i < adev->usec_timeout; i++) {
+ val = RREG32(reg_index);
+ if ((val & mask) == reg_val)
+ return 0;
+ udelay(1);
+ }
+
+ return -ETIME;
+}
+
+int amdgpu_lsdma_copy_mem(struct amdgpu_device *adev,
+ uint64_t src_addr,
+ uint64_t dst_addr,
+ uint64_t mem_size)
+{
+ int ret;
+
+ if (mem_size == 0)
+ return -EINVAL;
+
+ while (mem_size > 0) {
+ uint64_t current_copy_size = min(mem_size, AMDGPU_LSDMA_MAX_SIZE);
+
+ ret = adev->lsdma.funcs->copy_mem(adev, src_addr, dst_addr, current_copy_size);
+ if (ret)
+ return ret;
+ src_addr += current_copy_size;
+ dst_addr += current_copy_size;
+ mem_size -= current_copy_size;
+ }
+
+ return 0;
+}
+
+int amdgpu_lsdma_fill_mem(struct amdgpu_device *adev,
+ uint64_t dst_addr,
+ uint32_t data,
+ uint64_t mem_size)
+{
+ int ret;
+
+ if (mem_size == 0)
+ return -EINVAL;
+
+ while (mem_size > 0) {
+ uint64_t current_fill_size = min(mem_size, AMDGPU_LSDMA_MAX_SIZE);
+
+ ret = adev->lsdma.funcs->fill_mem(adev, dst_addr, data, current_fill_size);
+ if (ret)
+ return ret;
+ dst_addr += current_fill_size;
+ mem_size -= current_fill_size;
+ }
+
+ return 0;
+}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_lsdma.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_lsdma.h
new file mode 100644
index 000000000000..c61ba58c5ee0
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_lsdma.h
@@ -0,0 +1,46 @@
+/*
+ * Copyright 2022 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#ifndef __AMDGPU_LSDMA_H__
+#define __AMDGPU_LSDMA_H__
+
+struct amdgpu_lsdma {
+ const struct amdgpu_lsdma_funcs *funcs;
+};
+
+struct amdgpu_lsdma_funcs {
+ int (*copy_mem)(struct amdgpu_device *adev, uint64_t src_addr,
+ uint64_t dst_addr, uint64_t size);
+ int (*fill_mem)(struct amdgpu_device *adev, uint64_t dst_addr,
+ uint32_t data, uint64_t size);
+ void (*update_memory_power_gating)(struct amdgpu_device *adev, bool enable);
+};
+
+int amdgpu_lsdma_copy_mem(struct amdgpu_device *adev, uint64_t src_addr,
+ uint64_t dst_addr, uint64_t mem_size);
+int amdgpu_lsdma_fill_mem(struct amdgpu_device *adev, uint64_t dst_addr,
+ uint32_t data, uint64_t mem_size);
+int amdgpu_lsdma_wait_for(struct amdgpu_device *adev, uint32_t reg_index,
+ uint32_t reg_val, uint32_t mask);
+
+#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mca.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_mca.c
new file mode 100644
index 000000000000..3ca03b5e0f91
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mca.c
@@ -0,0 +1,630 @@
+/*
+ * Copyright 2021 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+#include "amdgpu_ras.h"
+#include "amdgpu.h"
+#include "amdgpu_mca.h"
+
+#include "umc/umc_6_7_0_offset.h"
+#include "umc/umc_6_7_0_sh_mask.h"
+
+static bool amdgpu_mca_is_deferred_error(struct amdgpu_device *adev,
+ uint64_t mc_status)
+{
+ if (adev->umc.ras->check_ecc_err_status)
+ return adev->umc.ras->check_ecc_err_status(adev,
+ AMDGPU_MCA_ERROR_TYPE_DE, &mc_status);
+
+ return false;
+}
+
+void amdgpu_mca_query_correctable_error_count(struct amdgpu_device *adev,
+ uint64_t mc_status_addr,
+ unsigned long *error_count)
+{
+ uint64_t mc_status = RREG64_PCIE(mc_status_addr);
+
+ if (REG_GET_FIELD(mc_status, MCA_UMC_UMC0_MCUMC_STATUST0, Val) == 1 &&
+ REG_GET_FIELD(mc_status, MCA_UMC_UMC0_MCUMC_STATUST0, CECC) == 1)
+ *error_count += 1;
+}
+
+void amdgpu_mca_query_uncorrectable_error_count(struct amdgpu_device *adev,
+ uint64_t mc_status_addr,
+ unsigned long *error_count)
+{
+ uint64_t mc_status = RREG64_PCIE(mc_status_addr);
+
+ if ((REG_GET_FIELD(mc_status, MCA_UMC_UMC0_MCUMC_STATUST0, Val) == 1) &&
+ (REG_GET_FIELD(mc_status, MCA_UMC_UMC0_MCUMC_STATUST0, Deferred) == 1 ||
+ REG_GET_FIELD(mc_status, MCA_UMC_UMC0_MCUMC_STATUST0, UECC) == 1 ||
+ REG_GET_FIELD(mc_status, MCA_UMC_UMC0_MCUMC_STATUST0, PCC) == 1 ||
+ REG_GET_FIELD(mc_status, MCA_UMC_UMC0_MCUMC_STATUST0, UC) == 1 ||
+ REG_GET_FIELD(mc_status, MCA_UMC_UMC0_MCUMC_STATUST0, TCC) == 1))
+ *error_count += 1;
+}
+
+void amdgpu_mca_reset_error_count(struct amdgpu_device *adev,
+ uint64_t mc_status_addr)
+{
+ WREG64_PCIE(mc_status_addr, 0x0ULL);
+}
+
+void amdgpu_mca_query_ras_error_count(struct amdgpu_device *adev,
+ uint64_t mc_status_addr,
+ void *ras_error_status)
+{
+ struct ras_err_data *err_data = (struct ras_err_data *)ras_error_status;
+
+ amdgpu_mca_query_correctable_error_count(adev, mc_status_addr, &(err_data->ce_count));
+ amdgpu_mca_query_uncorrectable_error_count(adev, mc_status_addr, &(err_data->ue_count));
+
+ amdgpu_mca_reset_error_count(adev, mc_status_addr);
+}
+
+int amdgpu_mca_mp0_ras_sw_init(struct amdgpu_device *adev)
+{
+ int err;
+ struct amdgpu_mca_ras_block *ras;
+
+ if (!adev->mca.mp0.ras)
+ return 0;
+
+ ras = adev->mca.mp0.ras;
+
+ err = amdgpu_ras_register_ras_block(adev, &ras->ras_block);
+ if (err) {
+ dev_err(adev->dev, "Failed to register mca.mp0 ras block!\n");
+ return err;
+ }
+
+ strcpy(ras->ras_block.ras_comm.name, "mca.mp0");
+ ras->ras_block.ras_comm.block = AMDGPU_RAS_BLOCK__MCA;
+ ras->ras_block.ras_comm.type = AMDGPU_RAS_ERROR__MULTI_UNCORRECTABLE;
+ adev->mca.mp0.ras_if = &ras->ras_block.ras_comm;
+
+ return 0;
+}
+
+int amdgpu_mca_mp1_ras_sw_init(struct amdgpu_device *adev)
+{
+ int err;
+ struct amdgpu_mca_ras_block *ras;
+
+ if (!adev->mca.mp1.ras)
+ return 0;
+
+ ras = adev->mca.mp1.ras;
+
+ err = amdgpu_ras_register_ras_block(adev, &ras->ras_block);
+ if (err) {
+ dev_err(adev->dev, "Failed to register mca.mp1 ras block!\n");
+ return err;
+ }
+
+ strcpy(ras->ras_block.ras_comm.name, "mca.mp1");
+ ras->ras_block.ras_comm.block = AMDGPU_RAS_BLOCK__MCA;
+ ras->ras_block.ras_comm.type = AMDGPU_RAS_ERROR__MULTI_UNCORRECTABLE;
+ adev->mca.mp1.ras_if = &ras->ras_block.ras_comm;
+
+ return 0;
+}
+
+int amdgpu_mca_mpio_ras_sw_init(struct amdgpu_device *adev)
+{
+ int err;
+ struct amdgpu_mca_ras_block *ras;
+
+ if (!adev->mca.mpio.ras)
+ return 0;
+
+ ras = adev->mca.mpio.ras;
+
+ err = amdgpu_ras_register_ras_block(adev, &ras->ras_block);
+ if (err) {
+ dev_err(adev->dev, "Failed to register mca.mpio ras block!\n");
+ return err;
+ }
+
+ strcpy(ras->ras_block.ras_comm.name, "mca.mpio");
+ ras->ras_block.ras_comm.block = AMDGPU_RAS_BLOCK__MCA;
+ ras->ras_block.ras_comm.type = AMDGPU_RAS_ERROR__MULTI_UNCORRECTABLE;
+ adev->mca.mpio.ras_if = &ras->ras_block.ras_comm;
+
+ return 0;
+}
+
+static void amdgpu_mca_bank_set_init(struct mca_bank_set *mca_set)
+{
+ if (!mca_set)
+ return;
+
+ memset(mca_set, 0, sizeof(*mca_set));
+ INIT_LIST_HEAD(&mca_set->list);
+}
+
+static int amdgpu_mca_bank_set_add_entry(struct mca_bank_set *mca_set, struct mca_bank_entry *entry)
+{
+ struct mca_bank_node *node;
+
+ if (!entry)
+ return -EINVAL;
+
+ node = kvzalloc(sizeof(*node), GFP_KERNEL);
+ if (!node)
+ return -ENOMEM;
+
+ memcpy(&node->entry, entry, sizeof(*entry));
+
+ INIT_LIST_HEAD(&node->node);
+ list_add_tail(&node->node, &mca_set->list);
+
+ mca_set->nr_entries++;
+
+ return 0;
+}
+
+static int amdgpu_mca_bank_set_merge(struct mca_bank_set *mca_set, struct mca_bank_set *new)
+{
+ struct mca_bank_node *node;
+
+ list_for_each_entry(node, &new->list, node)
+ amdgpu_mca_bank_set_add_entry(mca_set, &node->entry);
+
+ return 0;
+}
+
+static void amdgpu_mca_bank_set_remove_node(struct mca_bank_set *mca_set, struct mca_bank_node *node)
+{
+ if (!node)
+ return;
+
+ list_del(&node->node);
+ kvfree(node);
+
+ mca_set->nr_entries--;
+}
+
+static void amdgpu_mca_bank_set_release(struct mca_bank_set *mca_set)
+{
+ struct mca_bank_node *node, *tmp;
+
+ if (list_empty(&mca_set->list))
+ return;
+
+ list_for_each_entry_safe(node, tmp, &mca_set->list, node)
+ amdgpu_mca_bank_set_remove_node(mca_set, node);
+}
+
+void amdgpu_mca_smu_init_funcs(struct amdgpu_device *adev, const struct amdgpu_mca_smu_funcs *mca_funcs)
+{
+ struct amdgpu_mca *mca = &adev->mca;
+
+ mca->mca_funcs = mca_funcs;
+}
+
+int amdgpu_mca_init(struct amdgpu_device *adev)
+{
+ struct amdgpu_mca *mca = &adev->mca;
+ struct mca_bank_cache *mca_cache;
+ int i;
+
+ atomic_set(&mca->ue_update_flag, 0);
+
+ for (i = 0; i < ARRAY_SIZE(mca->mca_caches); i++) {
+ mca_cache = &mca->mca_caches[i];
+ mutex_init(&mca_cache->lock);
+ amdgpu_mca_bank_set_init(&mca_cache->mca_set);
+ }
+
+ return 0;
+}
+
+void amdgpu_mca_fini(struct amdgpu_device *adev)
+{
+ struct amdgpu_mca *mca = &adev->mca;
+ struct mca_bank_cache *mca_cache;
+ int i;
+
+ atomic_set(&mca->ue_update_flag, 0);
+
+ for (i = 0; i < ARRAY_SIZE(mca->mca_caches); i++) {
+ mca_cache = &mca->mca_caches[i];
+ amdgpu_mca_bank_set_release(&mca_cache->mca_set);
+ mutex_destroy(&mca_cache->lock);
+ }
+}
+
+int amdgpu_mca_reset(struct amdgpu_device *adev)
+{
+ amdgpu_mca_fini(adev);
+
+ return amdgpu_mca_init(adev);
+}
+
+int amdgpu_mca_smu_set_debug_mode(struct amdgpu_device *adev, bool enable)
+{
+ const struct amdgpu_mca_smu_funcs *mca_funcs = adev->mca.mca_funcs;
+
+ if (mca_funcs && mca_funcs->mca_set_debug_mode)
+ return mca_funcs->mca_set_debug_mode(adev, enable);
+
+ return -EOPNOTSUPP;
+}
+
+static void amdgpu_mca_smu_mca_bank_dump(struct amdgpu_device *adev, int idx, struct mca_bank_entry *entry,
+ struct ras_query_context *qctx)
+{
+ u64 event_id = qctx ? qctx->evid.event_id : RAS_EVENT_INVALID_ID;
+
+ RAS_EVENT_LOG(adev, event_id, HW_ERR "Accelerator Check Architecture events logged\n");
+ RAS_EVENT_LOG(adev, event_id, HW_ERR "aca entry[%02d].STATUS=0x%016llx\n",
+ idx, entry->regs[MCA_REG_IDX_STATUS]);
+ RAS_EVENT_LOG(adev, event_id, HW_ERR "aca entry[%02d].ADDR=0x%016llx\n",
+ idx, entry->regs[MCA_REG_IDX_ADDR]);
+ RAS_EVENT_LOG(adev, event_id, HW_ERR "aca entry[%02d].MISC0=0x%016llx\n",
+ idx, entry->regs[MCA_REG_IDX_MISC0]);
+ RAS_EVENT_LOG(adev, event_id, HW_ERR "aca entry[%02d].IPID=0x%016llx\n",
+ idx, entry->regs[MCA_REG_IDX_IPID]);
+ RAS_EVENT_LOG(adev, event_id, HW_ERR "aca entry[%02d].SYND=0x%016llx\n",
+ idx, entry->regs[MCA_REG_IDX_SYND]);
+}
+
+static int amdgpu_mca_smu_get_valid_mca_count(struct amdgpu_device *adev, enum amdgpu_mca_error_type type, uint32_t *count)
+{
+ const struct amdgpu_mca_smu_funcs *mca_funcs = adev->mca.mca_funcs;
+
+ if (!count)
+ return -EINVAL;
+
+ if (mca_funcs && mca_funcs->mca_get_valid_mca_count)
+ return mca_funcs->mca_get_valid_mca_count(adev, type, count);
+
+ return -EOPNOTSUPP;
+}
+
+static int amdgpu_mca_smu_get_mca_entry(struct amdgpu_device *adev, enum amdgpu_mca_error_type type,
+ int idx, struct mca_bank_entry *entry)
+{
+ const struct amdgpu_mca_smu_funcs *mca_funcs = adev->mca.mca_funcs;
+ int count;
+
+ if (!mca_funcs || !mca_funcs->mca_get_mca_entry)
+ return -EOPNOTSUPP;
+
+ switch (type) {
+ case AMDGPU_MCA_ERROR_TYPE_UE:
+ count = mca_funcs->max_ue_count;
+ break;
+ case AMDGPU_MCA_ERROR_TYPE_CE:
+ count = mca_funcs->max_ce_count;
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ if (idx >= count)
+ return -EINVAL;
+
+ return mca_funcs->mca_get_mca_entry(adev, type, idx, entry);
+}
+
+static bool amdgpu_mca_bank_should_update(struct amdgpu_device *adev, enum amdgpu_mca_error_type type)
+{
+ struct amdgpu_mca *mca = &adev->mca;
+ bool ret = true;
+
+ /*
+ * Because the UE Valid MCA count will only be cleared after reset,
+ * in order to avoid repeated counting of the error count,
+ * the aca bank is only updated once during the gpu recovery stage.
+ */
+ if (type == AMDGPU_MCA_ERROR_TYPE_UE) {
+ if (amdgpu_ras_intr_triggered())
+ ret = atomic_cmpxchg(&mca->ue_update_flag, 0, 1) == 0;
+ else
+ atomic_set(&mca->ue_update_flag, 0);
+ }
+
+ return ret;
+}
+
+static bool amdgpu_mca_bank_should_dump(struct amdgpu_device *adev, enum amdgpu_mca_error_type type,
+ struct mca_bank_entry *entry)
+{
+ bool ret;
+
+ switch (type) {
+ case AMDGPU_MCA_ERROR_TYPE_CE:
+ ret = amdgpu_mca_is_deferred_error(adev, entry->regs[MCA_REG_IDX_STATUS]);
+ break;
+ case AMDGPU_MCA_ERROR_TYPE_UE:
+ default:
+ ret = true;
+ break;
+ }
+
+ return ret;
+}
+
+static int amdgpu_mca_smu_get_mca_set(struct amdgpu_device *adev, enum amdgpu_mca_error_type type, struct mca_bank_set *mca_set,
+ struct ras_query_context *qctx)
+{
+ struct mca_bank_entry entry;
+ uint32_t count = 0, i;
+ int ret;
+
+ if (!mca_set)
+ return -EINVAL;
+
+ if (!amdgpu_mca_bank_should_update(adev, type))
+ return 0;
+
+ ret = amdgpu_mca_smu_get_valid_mca_count(adev, type, &count);
+ if (ret)
+ return ret;
+
+ for (i = 0; i < count; i++) {
+ memset(&entry, 0, sizeof(entry));
+ ret = amdgpu_mca_smu_get_mca_entry(adev, type, i, &entry);
+ if (ret)
+ return ret;
+
+ amdgpu_mca_bank_set_add_entry(mca_set, &entry);
+
+ if (amdgpu_mca_bank_should_dump(adev, type, &entry))
+ amdgpu_mca_smu_mca_bank_dump(adev, i, &entry, qctx);
+ }
+
+ return 0;
+}
+
+static int amdgpu_mca_smu_parse_mca_error_count(struct amdgpu_device *adev, enum amdgpu_ras_block blk,
+ enum amdgpu_mca_error_type type, struct mca_bank_entry *entry, uint32_t *count)
+{
+ const struct amdgpu_mca_smu_funcs *mca_funcs = adev->mca.mca_funcs;
+
+ if (!count || !entry)
+ return -EINVAL;
+
+ if (!mca_funcs || !mca_funcs->mca_parse_mca_error_count)
+ return -EOPNOTSUPP;
+
+ return mca_funcs->mca_parse_mca_error_count(adev, blk, type, entry, count);
+}
+
+static int amdgpu_mca_dispatch_mca_set(struct amdgpu_device *adev, enum amdgpu_ras_block blk, enum amdgpu_mca_error_type type,
+ struct mca_bank_set *mca_set, struct ras_err_data *err_data)
+{
+ struct amdgpu_smuio_mcm_config_info mcm_info;
+ struct mca_bank_node *node, *tmp;
+ struct mca_bank_entry *entry;
+ uint32_t count;
+ int ret;
+
+ if (!mca_set)
+ return -EINVAL;
+
+ if (!mca_set->nr_entries)
+ return 0;
+
+ list_for_each_entry_safe(node, tmp, &mca_set->list, node) {
+ entry = &node->entry;
+
+ count = 0;
+ ret = amdgpu_mca_smu_parse_mca_error_count(adev, blk, type, entry, &count);
+ if (ret && ret != -EOPNOTSUPP)
+ return ret;
+
+ if (!count)
+ continue;
+
+ memset(&mcm_info, 0, sizeof(mcm_info));
+
+ mcm_info.socket_id = entry->info.socket_id;
+ mcm_info.die_id = entry->info.aid;
+
+ if (type == AMDGPU_MCA_ERROR_TYPE_UE) {
+ amdgpu_ras_error_statistic_ue_count(err_data,
+ &mcm_info, (uint64_t)count);
+ } else {
+ if (amdgpu_mca_is_deferred_error(adev, entry->regs[MCA_REG_IDX_STATUS]))
+ amdgpu_ras_error_statistic_de_count(err_data,
+ &mcm_info, (uint64_t)count);
+ else
+ amdgpu_ras_error_statistic_ce_count(err_data,
+ &mcm_info, (uint64_t)count);
+ }
+
+ amdgpu_mca_bank_set_remove_node(mca_set, node);
+ }
+
+ return 0;
+}
+
+static int amdgpu_mca_add_mca_set_to_cache(struct amdgpu_device *adev, enum amdgpu_mca_error_type type, struct mca_bank_set *new)
+{
+ struct mca_bank_cache *mca_cache = &adev->mca.mca_caches[type];
+ int ret;
+
+ mutex_lock(&mca_cache->lock);
+ ret = amdgpu_mca_bank_set_merge(&mca_cache->mca_set, new);
+ mutex_unlock(&mca_cache->lock);
+
+ return ret;
+}
+
+int amdgpu_mca_smu_log_ras_error(struct amdgpu_device *adev, enum amdgpu_ras_block blk, enum amdgpu_mca_error_type type,
+ struct ras_err_data *err_data, struct ras_query_context *qctx)
+{
+ struct mca_bank_set mca_set;
+ struct mca_bank_cache *mca_cache = &adev->mca.mca_caches[type];
+ int ret;
+
+ amdgpu_mca_bank_set_init(&mca_set);
+
+ ret = amdgpu_mca_smu_get_mca_set(adev, type, &mca_set, qctx);
+ if (ret)
+ goto out_mca_release;
+
+ ret = amdgpu_mca_dispatch_mca_set(adev, blk, type, &mca_set, err_data);
+ if (ret)
+ goto out_mca_release;
+
+ /* add remain mca bank to mca cache */
+ if (mca_set.nr_entries) {
+ ret = amdgpu_mca_add_mca_set_to_cache(adev, type, &mca_set);
+ if (ret)
+ goto out_mca_release;
+ }
+
+ /* dispatch mca set again if mca cache has valid data */
+ mutex_lock(&mca_cache->lock);
+ if (mca_cache->mca_set.nr_entries)
+ ret = amdgpu_mca_dispatch_mca_set(adev, blk, type, &mca_cache->mca_set, err_data);
+ mutex_unlock(&mca_cache->lock);
+
+out_mca_release:
+ amdgpu_mca_bank_set_release(&mca_set);
+
+ return ret;
+}
+
+#if defined(CONFIG_DEBUG_FS)
+static int amdgpu_mca_smu_debug_mode_set(void *data, u64 val)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)data;
+ int ret;
+
+ ret = amdgpu_ras_set_mca_debug_mode(adev, val ? true : false);
+ if (ret)
+ return ret;
+
+ dev_info(adev->dev, "amdgpu set smu mca debug mode %s success\n", val ? "on" : "off");
+
+ return 0;
+}
+
+static void mca_dump_entry(struct seq_file *m, struct mca_bank_entry *entry)
+{
+ int i, idx = entry->idx;
+ int reg_idx_array[] = {
+ MCA_REG_IDX_STATUS,
+ MCA_REG_IDX_ADDR,
+ MCA_REG_IDX_MISC0,
+ MCA_REG_IDX_IPID,
+ MCA_REG_IDX_SYND,
+ };
+
+ seq_printf(m, "mca entry[%d].type: %s\n", idx, entry->type == AMDGPU_MCA_ERROR_TYPE_UE ? "UE" : "CE");
+ seq_printf(m, "mca entry[%d].ip: %d\n", idx, entry->ip);
+ seq_printf(m, "mca entry[%d].info: socketid:%d aid:%d hwid:0x%03x mcatype:0x%04x\n",
+ idx, entry->info.socket_id, entry->info.aid, entry->info.hwid, entry->info.mcatype);
+
+ for (i = 0; i < ARRAY_SIZE(reg_idx_array); i++)
+ seq_printf(m, "mca entry[%d].regs[%d]: 0x%016llx\n", idx, reg_idx_array[i], entry->regs[reg_idx_array[i]]);
+}
+
+static int mca_dump_show(struct seq_file *m, enum amdgpu_mca_error_type type)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)m->private;
+ struct mca_bank_node *node;
+ struct mca_bank_set mca_set;
+ struct ras_query_context qctx;
+ int ret;
+
+ amdgpu_mca_bank_set_init(&mca_set);
+
+ qctx.evid.event_id = RAS_EVENT_INVALID_ID;
+ ret = amdgpu_mca_smu_get_mca_set(adev, type, &mca_set, &qctx);
+ if (ret)
+ goto err_free_mca_set;
+
+ seq_printf(m, "amdgpu smu %s valid mca count: %d\n",
+ type == AMDGPU_MCA_ERROR_TYPE_UE ? "UE" : "CE", mca_set.nr_entries);
+
+ if (!mca_set.nr_entries)
+ goto err_free_mca_set;
+
+ list_for_each_entry(node, &mca_set.list, node)
+ mca_dump_entry(m, &node->entry);
+
+ /* add mca bank to mca bank cache */
+ ret = amdgpu_mca_add_mca_set_to_cache(adev, type, &mca_set);
+
+err_free_mca_set:
+ amdgpu_mca_bank_set_release(&mca_set);
+
+ return ret;
+}
+
+static int mca_dump_ce_show(struct seq_file *m, void *unused)
+{
+ return mca_dump_show(m, AMDGPU_MCA_ERROR_TYPE_CE);
+}
+
+static int mca_dump_ce_open(struct inode *inode, struct file *file)
+{
+ return single_open(file, mca_dump_ce_show, inode->i_private);
+}
+
+static const struct file_operations mca_ce_dump_debug_fops = {
+ .owner = THIS_MODULE,
+ .open = mca_dump_ce_open,
+ .read = seq_read,
+ .llseek = seq_lseek,
+ .release = single_release,
+};
+
+static int mca_dump_ue_show(struct seq_file *m, void *unused)
+{
+ return mca_dump_show(m, AMDGPU_MCA_ERROR_TYPE_UE);
+}
+
+static int mca_dump_ue_open(struct inode *inode, struct file *file)
+{
+ return single_open(file, mca_dump_ue_show, inode->i_private);
+}
+
+static const struct file_operations mca_ue_dump_debug_fops = {
+ .owner = THIS_MODULE,
+ .open = mca_dump_ue_open,
+ .read = seq_read,
+ .llseek = seq_lseek,
+ .release = single_release,
+};
+
+DEFINE_DEBUGFS_ATTRIBUTE(mca_debug_mode_fops, NULL, amdgpu_mca_smu_debug_mode_set, "%llu\n");
+#endif
+
+void amdgpu_mca_smu_debugfs_init(struct amdgpu_device *adev, struct dentry *root)
+{
+#if defined(CONFIG_DEBUG_FS)
+ if (!root)
+ return;
+
+ debugfs_create_file("mca_debug_mode", 0200, root, adev, &mca_debug_mode_fops);
+ debugfs_create_file("mca_ue_dump", 0400, root, adev, &mca_ue_dump_debug_fops);
+ debugfs_create_file("mca_ce_dump", 0400, root, adev, &mca_ce_dump_debug_fops);
+#endif
+}
+
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mca.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_mca.h
new file mode 100644
index 000000000000..e80323ff90c1
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mca.h
@@ -0,0 +1,169 @@
+/*
+ * Copyright (C) 2021 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
+ * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+#ifndef __AMDGPU_MCA_H__
+#define __AMDGPU_MCA_H__
+
+#include "amdgpu_ras.h"
+
+#define MCA_MAX_REGS_COUNT (16)
+
+#define MCA_REG_FIELD(x, h, l) (((x) & GENMASK_ULL(h, l)) >> l)
+#define MCA_REG__STATUS__VAL(x) MCA_REG_FIELD(x, 63, 63)
+#define MCA_REG__STATUS__OVERFLOW(x) MCA_REG_FIELD(x, 62, 62)
+#define MCA_REG__STATUS__UC(x) MCA_REG_FIELD(x, 61, 61)
+#define MCA_REG__STATUS__EN(x) MCA_REG_FIELD(x, 60, 60)
+#define MCA_REG__STATUS__MISCV(x) MCA_REG_FIELD(x, 59, 59)
+#define MCA_REG__STATUS__ADDRV(x) MCA_REG_FIELD(x, 58, 58)
+#define MCA_REG__STATUS__PCC(x) MCA_REG_FIELD(x, 57, 57)
+#define MCA_REG__STATUS__ERRCOREIDVAL(x) MCA_REG_FIELD(x, 56, 56)
+#define MCA_REG__STATUS__TCC(x) MCA_REG_FIELD(x, 55, 55)
+#define MCA_REG__STATUS__SYNDV(x) MCA_REG_FIELD(x, 53, 53)
+#define MCA_REG__STATUS__CECC(x) MCA_REG_FIELD(x, 46, 46)
+#define MCA_REG__STATUS__UECC(x) MCA_REG_FIELD(x, 45, 45)
+#define MCA_REG__STATUS__DEFERRED(x) MCA_REG_FIELD(x, 44, 44)
+#define MCA_REG__STATUS__POISON(x) MCA_REG_FIELD(x, 43, 43)
+#define MCA_REG__STATUS__SCRUB(x) MCA_REG_FIELD(x, 40, 40)
+#define MCA_REG__STATUS__ERRCOREID(x) MCA_REG_FIELD(x, 37, 32)
+#define MCA_REG__STATUS__ADDRLSB(x) MCA_REG_FIELD(x, 29, 24)
+#define MCA_REG__STATUS__ERRORCODEEXT(x) MCA_REG_FIELD(x, 21, 16)
+#define MCA_REG__STATUS__ERRORCODE(x) MCA_REG_FIELD(x, 15, 0)
+
+#define MCA_REG__MISC0__ERRCNT(x) MCA_REG_FIELD(x, 43, 32)
+
+#define MCA_REG__SYND__ERRORINFORMATION(x) MCA_REG_FIELD(x, 17, 0)
+
+enum amdgpu_mca_ip {
+ AMDGPU_MCA_IP_UNKNOW = -1,
+ AMDGPU_MCA_IP_PSP = 0,
+ AMDGPU_MCA_IP_SDMA,
+ AMDGPU_MCA_IP_GC,
+ AMDGPU_MCA_IP_SMU,
+ AMDGPU_MCA_IP_MP5,
+ AMDGPU_MCA_IP_UMC,
+ AMDGPU_MCA_IP_PCS_XGMI,
+ AMDGPU_MCA_IP_COUNT,
+};
+
+enum amdgpu_mca_error_type {
+ AMDGPU_MCA_ERROR_TYPE_UE = 0,
+ AMDGPU_MCA_ERROR_TYPE_CE,
+ AMDGPU_MCA_ERROR_TYPE_DE,
+};
+
+struct amdgpu_mca_ras_block {
+ struct amdgpu_ras_block_object ras_block;
+};
+
+struct amdgpu_mca_ras {
+ struct ras_common_if *ras_if;
+ struct amdgpu_mca_ras_block *ras;
+};
+
+struct mca_bank_set {
+ int nr_entries;
+ struct list_head list;
+};
+
+struct mca_bank_cache {
+ struct mca_bank_set mca_set;
+ struct mutex lock;
+};
+
+struct amdgpu_mca {
+ struct amdgpu_mca_ras mp0;
+ struct amdgpu_mca_ras mp1;
+ struct amdgpu_mca_ras mpio;
+ const struct amdgpu_mca_smu_funcs *mca_funcs;
+ struct mca_bank_cache mca_caches[AMDGPU_MCA_ERROR_TYPE_DE];
+ atomic_t ue_update_flag;
+};
+
+enum mca_reg_idx {
+ MCA_REG_IDX_STATUS = 1,
+ MCA_REG_IDX_ADDR = 2,
+ MCA_REG_IDX_MISC0 = 3,
+ MCA_REG_IDX_IPID = 5,
+ MCA_REG_IDX_SYND = 6,
+ MCA_REG_IDX_COUNT = 16,
+};
+
+struct mca_bank_info {
+ int socket_id;
+ int aid;
+ int hwid;
+ int mcatype;
+};
+
+struct mca_bank_entry {
+ int idx;
+ enum amdgpu_mca_error_type type;
+ enum amdgpu_mca_ip ip;
+ struct mca_bank_info info;
+ uint64_t regs[MCA_MAX_REGS_COUNT];
+};
+
+struct mca_bank_node {
+ struct mca_bank_entry entry;
+ struct list_head node;
+};
+
+struct amdgpu_mca_smu_funcs {
+ int max_ue_count;
+ int max_ce_count;
+ int (*mca_set_debug_mode)(struct amdgpu_device *adev, bool enable);
+ int (*mca_parse_mca_error_count)(struct amdgpu_device *adev, enum amdgpu_ras_block blk, enum amdgpu_mca_error_type type,
+ struct mca_bank_entry *entry, uint32_t *count);
+ int (*mca_get_valid_mca_count)(struct amdgpu_device *adev, enum amdgpu_mca_error_type type,
+ uint32_t *count);
+ int (*mca_get_mca_entry)(struct amdgpu_device *adev, enum amdgpu_mca_error_type type,
+ int idx, struct mca_bank_entry *entry);
+};
+
+void amdgpu_mca_query_correctable_error_count(struct amdgpu_device *adev,
+ uint64_t mc_status_addr,
+ unsigned long *error_count);
+
+void amdgpu_mca_query_uncorrectable_error_count(struct amdgpu_device *adev,
+ uint64_t mc_status_addr,
+ unsigned long *error_count);
+
+void amdgpu_mca_reset_error_count(struct amdgpu_device *adev,
+ uint64_t mc_status_addr);
+
+void amdgpu_mca_query_ras_error_count(struct amdgpu_device *adev,
+ uint64_t mc_status_addr,
+ void *ras_error_status);
+int amdgpu_mca_mp0_ras_sw_init(struct amdgpu_device *adev);
+int amdgpu_mca_mp1_ras_sw_init(struct amdgpu_device *adev);
+int amdgpu_mca_mpio_ras_sw_init(struct amdgpu_device *adev);
+
+void amdgpu_mca_smu_init_funcs(struct amdgpu_device *adev, const struct amdgpu_mca_smu_funcs *mca_funcs);
+int amdgpu_mca_init(struct amdgpu_device *adev);
+void amdgpu_mca_fini(struct amdgpu_device *adev);
+int amdgpu_mca_reset(struct amdgpu_device *adev);
+int amdgpu_mca_smu_set_debug_mode(struct amdgpu_device *adev, bool enable);
+int amdgpu_mca_smu_get_mca_set_error_count(struct amdgpu_device *adev, enum amdgpu_ras_block blk,
+ enum amdgpu_mca_error_type type, uint32_t *total);
+void amdgpu_mca_smu_debugfs_init(struct amdgpu_device *adev, struct dentry *root);
+int amdgpu_mca_smu_log_ras_error(struct amdgpu_device *adev, enum amdgpu_ras_block blk, enum amdgpu_mca_error_type type,
+ struct ras_err_data *err_data, struct ras_query_context *qctx);
+
+#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c
new file mode 100644
index 000000000000..9c182ce501af
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c
@@ -0,0 +1,784 @@
+/*
+ * Copyright 2019 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#include <linux/firmware.h>
+#include <drm/drm_exec.h>
+
+#include "amdgpu_mes.h"
+#include "amdgpu.h"
+#include "soc15_common.h"
+#include "amdgpu_mes_ctx.h"
+
+#define AMDGPU_MES_MAX_NUM_OF_QUEUES_PER_PROCESS 1024
+#define AMDGPU_ONE_DOORBELL_SIZE 8
+
+int amdgpu_mes_doorbell_process_slice(struct amdgpu_device *adev)
+{
+ return roundup(AMDGPU_ONE_DOORBELL_SIZE *
+ AMDGPU_MES_MAX_NUM_OF_QUEUES_PER_PROCESS,
+ PAGE_SIZE);
+}
+
+static int amdgpu_mes_doorbell_init(struct amdgpu_device *adev)
+{
+ int i;
+ struct amdgpu_mes *mes = &adev->mes;
+
+ /* Bitmap for dynamic allocation of kernel doorbells */
+ mes->doorbell_bitmap = bitmap_zalloc(PAGE_SIZE / sizeof(u32), GFP_KERNEL);
+ if (!mes->doorbell_bitmap) {
+ dev_err(adev->dev, "Failed to allocate MES doorbell bitmap\n");
+ return -ENOMEM;
+ }
+
+ mes->num_mes_dbs = PAGE_SIZE / AMDGPU_ONE_DOORBELL_SIZE;
+ for (i = 0; i < AMDGPU_MES_PRIORITY_NUM_LEVELS; i++) {
+ adev->mes.aggregated_doorbells[i] = mes->db_start_dw_offset + i * 2;
+ set_bit(i, mes->doorbell_bitmap);
+ }
+
+ return 0;
+}
+
+static int amdgpu_mes_event_log_init(struct amdgpu_device *adev)
+{
+ int r;
+
+ if (!amdgpu_mes_log_enable)
+ return 0;
+
+ r = amdgpu_bo_create_kernel(adev, adev->mes.event_log_size, PAGE_SIZE,
+ AMDGPU_GEM_DOMAIN_VRAM,
+ &adev->mes.event_log_gpu_obj,
+ &adev->mes.event_log_gpu_addr,
+ &adev->mes.event_log_cpu_addr);
+ if (r) {
+ dev_warn(adev->dev, "failed to create MES event log buffer (%d)", r);
+ return r;
+ }
+
+ memset(adev->mes.event_log_cpu_addr, 0, adev->mes.event_log_size);
+
+ return 0;
+
+}
+
+static void amdgpu_mes_doorbell_free(struct amdgpu_device *adev)
+{
+ bitmap_free(adev->mes.doorbell_bitmap);
+}
+
+int amdgpu_mes_init(struct amdgpu_device *adev)
+{
+ int i, r, num_pipes;
+
+ adev->mes.adev = adev;
+
+ idr_init(&adev->mes.pasid_idr);
+ idr_init(&adev->mes.gang_id_idr);
+ idr_init(&adev->mes.queue_id_idr);
+ ida_init(&adev->mes.doorbell_ida);
+ spin_lock_init(&adev->mes.queue_id_lock);
+ mutex_init(&adev->mes.mutex_hidden);
+
+ for (i = 0; i < AMDGPU_MAX_MES_PIPES; i++)
+ spin_lock_init(&adev->mes.ring_lock[i]);
+
+ adev->mes.total_max_queue = AMDGPU_FENCE_MES_QUEUE_ID_MASK;
+ adev->mes.vmid_mask_mmhub = 0xFF00;
+ adev->mes.vmid_mask_gfxhub = adev->gfx.disable_kq ? 0xFFFE : 0xFF00;
+
+ num_pipes = adev->gfx.me.num_pipe_per_me * adev->gfx.me.num_me;
+ if (num_pipes > AMDGPU_MES_MAX_GFX_PIPES)
+ dev_warn(adev->dev, "more gfx pipes than supported by MES! (%d vs %d)\n",
+ num_pipes, AMDGPU_MES_MAX_GFX_PIPES);
+
+ for (i = 0; i < AMDGPU_MES_MAX_GFX_PIPES; i++) {
+ if (i >= num_pipes)
+ break;
+ if (amdgpu_ip_version(adev, GC_HWIP, 0) >=
+ IP_VERSION(12, 0, 0))
+ /*
+ * GFX V12 has only one GFX pipe, but 8 queues in it.
+ * GFX pipe 0 queue 0 is being used by Kernel queue.
+ * Set GFX pipe 0 queue 1-7 for MES scheduling
+ * mask = 1111 1110b
+ */
+ adev->mes.gfx_hqd_mask[i] = adev->gfx.disable_kq ? 0xFF : 0xFE;
+ else
+ /*
+ * GFX pipe 0 queue 0 is being used by Kernel queue.
+ * Set GFX pipe 0 queue 1 for MES scheduling
+ * mask = 10b
+ */
+ adev->mes.gfx_hqd_mask[i] = adev->gfx.disable_kq ? 0x3 : 0x2;
+ }
+
+ num_pipes = adev->gfx.mec.num_pipe_per_mec * adev->gfx.mec.num_mec;
+ if (num_pipes > AMDGPU_MES_MAX_COMPUTE_PIPES)
+ dev_warn(adev->dev, "more compute pipes than supported by MES! (%d vs %d)\n",
+ num_pipes, AMDGPU_MES_MAX_COMPUTE_PIPES);
+
+ for (i = 0; i < AMDGPU_MES_MAX_COMPUTE_PIPES; i++) {
+ if (i >= num_pipes)
+ break;
+ adev->mes.compute_hqd_mask[i] = adev->gfx.disable_kq ? 0xF : 0xC;
+ }
+
+ num_pipes = adev->sdma.num_instances;
+ if (num_pipes > AMDGPU_MES_MAX_SDMA_PIPES)
+ dev_warn(adev->dev, "more SDMA pipes than supported by MES! (%d vs %d)\n",
+ num_pipes, AMDGPU_MES_MAX_SDMA_PIPES);
+
+ for (i = 0; i < AMDGPU_MES_MAX_SDMA_PIPES; i++) {
+ if (i >= num_pipes)
+ break;
+ adev->mes.sdma_hqd_mask[i] = 0xfc;
+ }
+
+ for (i = 0; i < AMDGPU_MAX_MES_PIPES; i++) {
+ r = amdgpu_device_wb_get(adev, &adev->mes.sch_ctx_offs[i]);
+ if (r) {
+ dev_err(adev->dev,
+ "(%d) ring trail_fence_offs wb alloc failed\n",
+ r);
+ goto error;
+ }
+ adev->mes.sch_ctx_gpu_addr[i] =
+ adev->wb.gpu_addr + (adev->mes.sch_ctx_offs[i] * 4);
+ adev->mes.sch_ctx_ptr[i] =
+ (uint64_t *)&adev->wb.wb[adev->mes.sch_ctx_offs[i]];
+
+ r = amdgpu_device_wb_get(adev,
+ &adev->mes.query_status_fence_offs[i]);
+ if (r) {
+ dev_err(adev->dev,
+ "(%d) query_status_fence_offs wb alloc failed\n",
+ r);
+ goto error;
+ }
+ adev->mes.query_status_fence_gpu_addr[i] = adev->wb.gpu_addr +
+ (adev->mes.query_status_fence_offs[i] * 4);
+ adev->mes.query_status_fence_ptr[i] =
+ (uint64_t *)&adev->wb.wb[adev->mes.query_status_fence_offs[i]];
+ }
+
+ r = amdgpu_mes_doorbell_init(adev);
+ if (r)
+ goto error;
+
+ r = amdgpu_mes_event_log_init(adev);
+ if (r)
+ goto error_doorbell;
+
+ if (adev->mes.hung_queue_db_array_size) {
+ r = amdgpu_bo_create_kernel(adev,
+ adev->mes.hung_queue_db_array_size * sizeof(u32),
+ PAGE_SIZE,
+ AMDGPU_GEM_DOMAIN_GTT,
+ &adev->mes.hung_queue_db_array_gpu_obj,
+ &adev->mes.hung_queue_db_array_gpu_addr,
+ &adev->mes.hung_queue_db_array_cpu_addr);
+ if (r) {
+ dev_warn(adev->dev, "failed to create MES hung db array buffer (%d)", r);
+ goto error_doorbell;
+ }
+ }
+
+ return 0;
+
+error_doorbell:
+ amdgpu_mes_doorbell_free(adev);
+error:
+ for (i = 0; i < AMDGPU_MAX_MES_PIPES; i++) {
+ if (adev->mes.sch_ctx_ptr[i])
+ amdgpu_device_wb_free(adev, adev->mes.sch_ctx_offs[i]);
+ if (adev->mes.query_status_fence_ptr[i])
+ amdgpu_device_wb_free(adev,
+ adev->mes.query_status_fence_offs[i]);
+ }
+
+ idr_destroy(&adev->mes.pasid_idr);
+ idr_destroy(&adev->mes.gang_id_idr);
+ idr_destroy(&adev->mes.queue_id_idr);
+ ida_destroy(&adev->mes.doorbell_ida);
+ mutex_destroy(&adev->mes.mutex_hidden);
+ return r;
+}
+
+void amdgpu_mes_fini(struct amdgpu_device *adev)
+{
+ int i;
+
+ amdgpu_bo_free_kernel(&adev->mes.hung_queue_db_array_gpu_obj,
+ &adev->mes.hung_queue_db_array_gpu_addr,
+ &adev->mes.hung_queue_db_array_cpu_addr);
+
+ amdgpu_bo_free_kernel(&adev->mes.event_log_gpu_obj,
+ &adev->mes.event_log_gpu_addr,
+ &adev->mes.event_log_cpu_addr);
+
+ for (i = 0; i < AMDGPU_MAX_MES_PIPES; i++) {
+ if (adev->mes.sch_ctx_ptr[i])
+ amdgpu_device_wb_free(adev, adev->mes.sch_ctx_offs[i]);
+ if (adev->mes.query_status_fence_ptr[i])
+ amdgpu_device_wb_free(adev,
+ adev->mes.query_status_fence_offs[i]);
+ }
+
+ amdgpu_mes_doorbell_free(adev);
+
+ idr_destroy(&adev->mes.pasid_idr);
+ idr_destroy(&adev->mes.gang_id_idr);
+ idr_destroy(&adev->mes.queue_id_idr);
+ ida_destroy(&adev->mes.doorbell_ida);
+ mutex_destroy(&adev->mes.mutex_hidden);
+}
+
+int amdgpu_mes_suspend(struct amdgpu_device *adev)
+{
+ struct mes_suspend_gang_input input;
+ int r;
+
+ if (!amdgpu_mes_suspend_resume_all_supported(adev))
+ return 0;
+
+ memset(&input, 0x0, sizeof(struct mes_suspend_gang_input));
+ input.suspend_all_gangs = 1;
+
+ /*
+ * Avoid taking any other locks under MES lock to avoid circular
+ * lock dependencies.
+ */
+ amdgpu_mes_lock(&adev->mes);
+ r = adev->mes.funcs->suspend_gang(&adev->mes, &input);
+ amdgpu_mes_unlock(&adev->mes);
+ if (r)
+ dev_err(adev->dev, "failed to suspend all gangs");
+
+ return r;
+}
+
+int amdgpu_mes_resume(struct amdgpu_device *adev)
+{
+ struct mes_resume_gang_input input;
+ int r;
+
+ if (!amdgpu_mes_suspend_resume_all_supported(adev))
+ return 0;
+
+ memset(&input, 0x0, sizeof(struct mes_resume_gang_input));
+ input.resume_all_gangs = 1;
+
+ /*
+ * Avoid taking any other locks under MES lock to avoid circular
+ * lock dependencies.
+ */
+ amdgpu_mes_lock(&adev->mes);
+ r = adev->mes.funcs->resume_gang(&adev->mes, &input);
+ amdgpu_mes_unlock(&adev->mes);
+ if (r)
+ dev_err(adev->dev, "failed to resume all gangs");
+
+ return r;
+}
+
+int amdgpu_mes_map_legacy_queue(struct amdgpu_device *adev,
+ struct amdgpu_ring *ring)
+{
+ struct mes_map_legacy_queue_input queue_input;
+ int r;
+
+ memset(&queue_input, 0, sizeof(queue_input));
+
+ queue_input.queue_type = ring->funcs->type;
+ queue_input.doorbell_offset = ring->doorbell_index;
+ queue_input.pipe_id = ring->pipe;
+ queue_input.queue_id = ring->queue;
+ queue_input.mqd_addr = amdgpu_bo_gpu_offset(ring->mqd_obj);
+ queue_input.wptr_addr = ring->wptr_gpu_addr;
+
+ amdgpu_mes_lock(&adev->mes);
+ r = adev->mes.funcs->map_legacy_queue(&adev->mes, &queue_input);
+ amdgpu_mes_unlock(&adev->mes);
+ if (r)
+ dev_err(adev->dev, "failed to map legacy queue\n");
+
+ return r;
+}
+
+int amdgpu_mes_unmap_legacy_queue(struct amdgpu_device *adev,
+ struct amdgpu_ring *ring,
+ enum amdgpu_unmap_queues_action action,
+ u64 gpu_addr, u64 seq)
+{
+ struct mes_unmap_legacy_queue_input queue_input;
+ int r;
+
+ queue_input.action = action;
+ queue_input.queue_type = ring->funcs->type;
+ queue_input.doorbell_offset = ring->doorbell_index;
+ queue_input.pipe_id = ring->pipe;
+ queue_input.queue_id = ring->queue;
+ queue_input.trail_fence_addr = gpu_addr;
+ queue_input.trail_fence_data = seq;
+
+ amdgpu_mes_lock(&adev->mes);
+ r = adev->mes.funcs->unmap_legacy_queue(&adev->mes, &queue_input);
+ amdgpu_mes_unlock(&adev->mes);
+ if (r)
+ dev_err(adev->dev, "failed to unmap legacy queue\n");
+
+ return r;
+}
+
+int amdgpu_mes_reset_legacy_queue(struct amdgpu_device *adev,
+ struct amdgpu_ring *ring,
+ unsigned int vmid,
+ bool use_mmio)
+{
+ struct mes_reset_queue_input queue_input;
+ int r;
+
+ memset(&queue_input, 0, sizeof(queue_input));
+
+ queue_input.queue_type = ring->funcs->type;
+ queue_input.doorbell_offset = ring->doorbell_index;
+ queue_input.me_id = ring->me;
+ queue_input.pipe_id = ring->pipe;
+ queue_input.queue_id = ring->queue;
+ queue_input.mqd_addr = ring->mqd_obj ? amdgpu_bo_gpu_offset(ring->mqd_obj) : 0;
+ queue_input.wptr_addr = ring->wptr_gpu_addr;
+ queue_input.vmid = vmid;
+ queue_input.use_mmio = use_mmio;
+ queue_input.is_kq = true;
+ if (ring->funcs->type == AMDGPU_RING_TYPE_GFX)
+ queue_input.legacy_gfx = true;
+
+ amdgpu_mes_lock(&adev->mes);
+ r = adev->mes.funcs->reset_hw_queue(&adev->mes, &queue_input);
+ amdgpu_mes_unlock(&adev->mes);
+ if (r)
+ dev_err(adev->dev, "failed to reset legacy queue\n");
+
+ return r;
+}
+
+int amdgpu_mes_get_hung_queue_db_array_size(struct amdgpu_device *adev)
+{
+ return adev->mes.hung_queue_db_array_size;
+}
+
+int amdgpu_mes_detect_and_reset_hung_queues(struct amdgpu_device *adev,
+ int queue_type,
+ bool detect_only,
+ unsigned int *hung_db_num,
+ u32 *hung_db_array)
+
+{
+ struct mes_detect_and_reset_queue_input input;
+ u32 *db_array = adev->mes.hung_queue_db_array_cpu_addr;
+ int r, i;
+
+ if (!hung_db_num || !hung_db_array)
+ return -EINVAL;
+
+ if ((queue_type != AMDGPU_RING_TYPE_GFX) &&
+ (queue_type != AMDGPU_RING_TYPE_COMPUTE) &&
+ (queue_type != AMDGPU_RING_TYPE_SDMA))
+ return -EINVAL;
+
+ /* Clear the doorbell array before detection */
+ memset(adev->mes.hung_queue_db_array_cpu_addr, AMDGPU_MES_INVALID_DB_OFFSET,
+ adev->mes.hung_queue_db_array_size * sizeof(u32));
+ input.queue_type = queue_type;
+ input.detect_only = detect_only;
+
+ r = adev->mes.funcs->detect_and_reset_hung_queues(&adev->mes,
+ &input);
+ if (r) {
+ dev_err(adev->dev, "failed to detect and reset\n");
+ } else {
+ *hung_db_num = 0;
+ for (i = 0; i < adev->mes.hung_queue_hqd_info_offset; i++) {
+ if (db_array[i] != AMDGPU_MES_INVALID_DB_OFFSET) {
+ hung_db_array[i] = db_array[i];
+ *hung_db_num += 1;
+ }
+ }
+
+ /*
+ * TODO: return HQD info for MES scheduled user compute queue reset cases
+ * stored in hung_db_array hqd info offset to full array size
+ */
+ }
+
+ return r;
+}
+
+uint32_t amdgpu_mes_rreg(struct amdgpu_device *adev, uint32_t reg)
+{
+ struct mes_misc_op_input op_input;
+ int r, val = 0;
+ uint32_t addr_offset = 0;
+ uint64_t read_val_gpu_addr;
+ uint32_t *read_val_ptr;
+
+ if (amdgpu_device_wb_get(adev, &addr_offset)) {
+ dev_err(adev->dev, "critical bug! too many mes readers\n");
+ goto error;
+ }
+ read_val_gpu_addr = adev->wb.gpu_addr + (addr_offset * 4);
+ read_val_ptr = (uint32_t *)&adev->wb.wb[addr_offset];
+ op_input.op = MES_MISC_OP_READ_REG;
+ op_input.read_reg.reg_offset = reg;
+ op_input.read_reg.buffer_addr = read_val_gpu_addr;
+
+ if (!adev->mes.funcs->misc_op) {
+ dev_err(adev->dev, "mes rreg is not supported!\n");
+ goto error;
+ }
+
+ amdgpu_mes_lock(&adev->mes);
+ r = adev->mes.funcs->misc_op(&adev->mes, &op_input);
+ amdgpu_mes_unlock(&adev->mes);
+ if (r)
+ dev_err(adev->dev, "failed to read reg (0x%x)\n", reg);
+ else
+ val = *(read_val_ptr);
+
+error:
+ if (addr_offset)
+ amdgpu_device_wb_free(adev, addr_offset);
+ return val;
+}
+
+int amdgpu_mes_wreg(struct amdgpu_device *adev,
+ uint32_t reg, uint32_t val)
+{
+ struct mes_misc_op_input op_input;
+ int r;
+
+ op_input.op = MES_MISC_OP_WRITE_REG;
+ op_input.write_reg.reg_offset = reg;
+ op_input.write_reg.reg_value = val;
+
+ if (!adev->mes.funcs->misc_op) {
+ dev_err(adev->dev, "mes wreg is not supported!\n");
+ r = -EINVAL;
+ goto error;
+ }
+
+ amdgpu_mes_lock(&adev->mes);
+ r = adev->mes.funcs->misc_op(&adev->mes, &op_input);
+ amdgpu_mes_unlock(&adev->mes);
+ if (r)
+ dev_err(adev->dev, "failed to write reg (0x%x)\n", reg);
+
+error:
+ return r;
+}
+
+int amdgpu_mes_reg_write_reg_wait(struct amdgpu_device *adev,
+ uint32_t reg0, uint32_t reg1,
+ uint32_t ref, uint32_t mask)
+{
+ struct mes_misc_op_input op_input;
+ int r;
+
+ op_input.op = MES_MISC_OP_WRM_REG_WR_WAIT;
+ op_input.wrm_reg.reg0 = reg0;
+ op_input.wrm_reg.reg1 = reg1;
+ op_input.wrm_reg.ref = ref;
+ op_input.wrm_reg.mask = mask;
+
+ if (!adev->mes.funcs->misc_op) {
+ dev_err(adev->dev, "mes reg_write_reg_wait is not supported!\n");
+ r = -EINVAL;
+ goto error;
+ }
+
+ amdgpu_mes_lock(&adev->mes);
+ r = adev->mes.funcs->misc_op(&adev->mes, &op_input);
+ amdgpu_mes_unlock(&adev->mes);
+ if (r)
+ dev_err(adev->dev, "failed to reg_write_reg_wait\n");
+
+error:
+ return r;
+}
+
+int amdgpu_mes_hdp_flush(struct amdgpu_device *adev)
+{
+ uint32_t hdp_flush_req_offset, hdp_flush_done_offset, ref_and_mask;
+
+ hdp_flush_req_offset = adev->nbio.funcs->get_hdp_flush_req_offset(adev);
+ hdp_flush_done_offset = adev->nbio.funcs->get_hdp_flush_done_offset(adev);
+ ref_and_mask = adev->nbio.hdp_flush_reg->ref_and_mask_cp0;
+
+ return amdgpu_mes_reg_write_reg_wait(adev, hdp_flush_req_offset, hdp_flush_done_offset,
+ ref_and_mask, ref_and_mask);
+}
+
+int amdgpu_mes_set_shader_debugger(struct amdgpu_device *adev,
+ uint64_t process_context_addr,
+ uint32_t spi_gdbg_per_vmid_cntl,
+ const uint32_t *tcp_watch_cntl,
+ uint32_t flags,
+ bool trap_en)
+{
+ struct mes_misc_op_input op_input = {0};
+ int r;
+
+ if (!adev->mes.funcs->misc_op) {
+ dev_err(adev->dev,
+ "mes set shader debugger is not supported!\n");
+ return -EINVAL;
+ }
+
+ op_input.op = MES_MISC_OP_SET_SHADER_DEBUGGER;
+ op_input.set_shader_debugger.process_context_addr = process_context_addr;
+ op_input.set_shader_debugger.flags.u32all = flags;
+
+ /* use amdgpu mes_flush_shader_debugger instead */
+ if (op_input.set_shader_debugger.flags.process_ctx_flush)
+ return -EINVAL;
+
+ op_input.set_shader_debugger.spi_gdbg_per_vmid_cntl = spi_gdbg_per_vmid_cntl;
+ memcpy(op_input.set_shader_debugger.tcp_watch_cntl, tcp_watch_cntl,
+ sizeof(op_input.set_shader_debugger.tcp_watch_cntl));
+
+ if (((adev->mes.sched_version & AMDGPU_MES_API_VERSION_MASK) >>
+ AMDGPU_MES_API_VERSION_SHIFT) >= 14)
+ op_input.set_shader_debugger.trap_en = trap_en;
+
+ amdgpu_mes_lock(&adev->mes);
+
+ r = adev->mes.funcs->misc_op(&adev->mes, &op_input);
+ if (r)
+ dev_err(adev->dev, "failed to set_shader_debugger\n");
+
+ amdgpu_mes_unlock(&adev->mes);
+
+ return r;
+}
+
+int amdgpu_mes_flush_shader_debugger(struct amdgpu_device *adev,
+ uint64_t process_context_addr)
+{
+ struct mes_misc_op_input op_input = {0};
+ int r;
+
+ if (!adev->mes.funcs->misc_op) {
+ dev_err(adev->dev,
+ "mes flush shader debugger is not supported!\n");
+ return -EINVAL;
+ }
+
+ op_input.op = MES_MISC_OP_SET_SHADER_DEBUGGER;
+ op_input.set_shader_debugger.process_context_addr = process_context_addr;
+ op_input.set_shader_debugger.flags.process_ctx_flush = true;
+
+ amdgpu_mes_lock(&adev->mes);
+
+ r = adev->mes.funcs->misc_op(&adev->mes, &op_input);
+ if (r)
+ dev_err(adev->dev, "failed to set_shader_debugger\n");
+
+ amdgpu_mes_unlock(&adev->mes);
+
+ return r;
+}
+
+uint32_t amdgpu_mes_get_aggregated_doorbell_index(struct amdgpu_device *adev,
+ enum amdgpu_mes_priority_level prio)
+{
+ return adev->mes.aggregated_doorbells[prio];
+}
+
+int amdgpu_mes_init_microcode(struct amdgpu_device *adev, int pipe)
+{
+ const struct mes_firmware_header_v1_0 *mes_hdr;
+ struct amdgpu_firmware_info *info;
+ char ucode_prefix[30];
+ char fw_name[50];
+ bool need_retry = false;
+ u32 *ucode_ptr;
+ int r;
+
+ amdgpu_ucode_ip_version_decode(adev, GC_HWIP, ucode_prefix,
+ sizeof(ucode_prefix));
+ if (adev->enable_uni_mes) {
+ snprintf(fw_name, sizeof(fw_name),
+ "amdgpu/%s_uni_mes.bin", ucode_prefix);
+ } else if (amdgpu_ip_version(adev, GC_HWIP, 0) >= IP_VERSION(11, 0, 0) &&
+ amdgpu_ip_version(adev, GC_HWIP, 0) < IP_VERSION(12, 0, 0)) {
+ snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mes%s.bin",
+ ucode_prefix,
+ pipe == AMDGPU_MES_SCHED_PIPE ? "_2" : "1");
+ need_retry = true;
+ } else {
+ snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mes%s.bin",
+ ucode_prefix,
+ pipe == AMDGPU_MES_SCHED_PIPE ? "" : "1");
+ }
+
+ r = amdgpu_ucode_request(adev, &adev->mes.fw[pipe], AMDGPU_UCODE_REQUIRED,
+ "%s", fw_name);
+ if (r && need_retry && pipe == AMDGPU_MES_SCHED_PIPE) {
+ dev_info(adev->dev, "try to fall back to %s_mes.bin\n", ucode_prefix);
+ r = amdgpu_ucode_request(adev, &adev->mes.fw[pipe],
+ AMDGPU_UCODE_REQUIRED,
+ "amdgpu/%s_mes.bin", ucode_prefix);
+ }
+
+ if (r)
+ goto out;
+
+ mes_hdr = (const struct mes_firmware_header_v1_0 *)
+ adev->mes.fw[pipe]->data;
+ adev->mes.uc_start_addr[pipe] =
+ le32_to_cpu(mes_hdr->mes_uc_start_addr_lo) |
+ ((uint64_t)(le32_to_cpu(mes_hdr->mes_uc_start_addr_hi)) << 32);
+ adev->mes.data_start_addr[pipe] =
+ le32_to_cpu(mes_hdr->mes_data_start_addr_lo) |
+ ((uint64_t)(le32_to_cpu(mes_hdr->mes_data_start_addr_hi)) << 32);
+ ucode_ptr = (u32 *)(adev->mes.fw[pipe]->data +
+ sizeof(union amdgpu_firmware_header));
+ adev->mes.fw_version[pipe] =
+ le32_to_cpu(ucode_ptr[24]) & AMDGPU_MES_VERSION_MASK;
+
+ if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
+ int ucode, ucode_data;
+
+ if (pipe == AMDGPU_MES_SCHED_PIPE) {
+ ucode = AMDGPU_UCODE_ID_CP_MES;
+ ucode_data = AMDGPU_UCODE_ID_CP_MES_DATA;
+ } else {
+ ucode = AMDGPU_UCODE_ID_CP_MES1;
+ ucode_data = AMDGPU_UCODE_ID_CP_MES1_DATA;
+ }
+
+ info = &adev->firmware.ucode[ucode];
+ info->ucode_id = ucode;
+ info->fw = adev->mes.fw[pipe];
+ adev->firmware.fw_size +=
+ ALIGN(le32_to_cpu(mes_hdr->mes_ucode_size_bytes),
+ PAGE_SIZE);
+
+ info = &adev->firmware.ucode[ucode_data];
+ info->ucode_id = ucode_data;
+ info->fw = adev->mes.fw[pipe];
+ adev->firmware.fw_size +=
+ ALIGN(le32_to_cpu(mes_hdr->mes_ucode_data_size_bytes),
+ PAGE_SIZE);
+ }
+
+ return 0;
+out:
+ amdgpu_ucode_release(&adev->mes.fw[pipe]);
+ return r;
+}
+
+bool amdgpu_mes_suspend_resume_all_supported(struct amdgpu_device *adev)
+{
+ uint32_t mes_rev = adev->mes.sched_version & AMDGPU_MES_VERSION_MASK;
+
+ return ((amdgpu_ip_version(adev, GC_HWIP, 0) >= IP_VERSION(11, 0, 0) &&
+ amdgpu_ip_version(adev, GC_HWIP, 0) < IP_VERSION(12, 0, 0) &&
+ mes_rev >= 0x63) ||
+ amdgpu_ip_version(adev, GC_HWIP, 0) >= IP_VERSION(12, 0, 0));
+}
+
+/* Fix me -- node_id is used to identify the correct MES instances in the future */
+static int amdgpu_mes_set_enforce_isolation(struct amdgpu_device *adev,
+ uint32_t node_id, bool enable)
+{
+ struct mes_misc_op_input op_input = {0};
+ int r;
+
+ op_input.op = MES_MISC_OP_CHANGE_CONFIG;
+ op_input.change_config.option.limit_single_process = enable ? 1 : 0;
+
+ if (!adev->mes.funcs->misc_op) {
+ dev_err(adev->dev, "mes change config is not supported!\n");
+ r = -EINVAL;
+ goto error;
+ }
+
+ amdgpu_mes_lock(&adev->mes);
+ r = adev->mes.funcs->misc_op(&adev->mes, &op_input);
+ amdgpu_mes_unlock(&adev->mes);
+ if (r)
+ dev_err(adev->dev, "failed to change_config.\n");
+
+error:
+ return r;
+}
+
+int amdgpu_mes_update_enforce_isolation(struct amdgpu_device *adev)
+{
+ int i, r = 0;
+
+ if (adev->enable_mes && adev->gfx.enable_cleaner_shader) {
+ mutex_lock(&adev->enforce_isolation_mutex);
+ for (i = 0; i < (adev->xcp_mgr ? adev->xcp_mgr->num_xcps : 1); i++) {
+ if (adev->enforce_isolation[i] == AMDGPU_ENFORCE_ISOLATION_ENABLE)
+ r |= amdgpu_mes_set_enforce_isolation(adev, i, true);
+ else
+ r |= amdgpu_mes_set_enforce_isolation(adev, i, false);
+ }
+ mutex_unlock(&adev->enforce_isolation_mutex);
+ }
+ return r;
+}
+
+#if defined(CONFIG_DEBUG_FS)
+
+static int amdgpu_debugfs_mes_event_log_show(struct seq_file *m, void *unused)
+{
+ struct amdgpu_device *adev = m->private;
+ uint32_t *mem = (uint32_t *)(adev->mes.event_log_cpu_addr);
+
+ seq_hex_dump(m, "", DUMP_PREFIX_OFFSET, 32, 4,
+ mem, adev->mes.event_log_size, false);
+
+ return 0;
+}
+
+DEFINE_SHOW_ATTRIBUTE(amdgpu_debugfs_mes_event_log);
+
+#endif
+
+void amdgpu_debugfs_mes_event_log_init(struct amdgpu_device *adev)
+{
+
+#if defined(CONFIG_DEBUG_FS)
+ struct drm_minor *minor = adev_to_drm(adev)->primary;
+ struct dentry *root = minor->debugfs_root;
+ if (adev->enable_mes && amdgpu_mes_log_enable)
+ debugfs_create_file("amdgpu_mes_event_log", 0444, root,
+ adev, &amdgpu_debugfs_mes_event_log_fops);
+
+#endif
+}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h
index 7334982ea702..e989225b354b 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h
@@ -24,10 +24,25 @@
#ifndef __AMDGPU_MES_H__
#define __AMDGPU_MES_H__
+#include "amdgpu_irq.h"
+#include "kgd_kfd_interface.h"
+#include "amdgpu_gfx.h"
+#include "amdgpu_doorbell.h"
+#include <linux/sched/mm.h>
+
#define AMDGPU_MES_MAX_COMPUTE_PIPES 8
#define AMDGPU_MES_MAX_GFX_PIPES 2
#define AMDGPU_MES_MAX_SDMA_PIPES 2
+#define AMDGPU_MES_API_VERSION_SHIFT 12
+#define AMDGPU_MES_FEAT_VERSION_SHIFT 24
+
+#define AMDGPU_MES_VERSION_MASK 0x00000fff
+#define AMDGPU_MES_API_VERSION_MASK 0x00fff000
+#define AMDGPU_MES_FEAT_VERSION_MASK 0xff000000
+#define AMDGPU_MES_MSCRATCH_SIZE 0x40000
+#define AMDGPU_MES_INVALID_DB_OFFSET 0xffffffff
+
enum amdgpu_mes_priority_level {
AMDGPU_MES_PRIORITY_LEVEL_LOW = 0,
AMDGPU_MES_PRIORITY_LEVEL_NORMAL = 1,
@@ -37,57 +52,159 @@ enum amdgpu_mes_priority_level {
AMDGPU_MES_PRIORITY_NUM_LEVELS
};
+#define AMDGPU_MES_PROC_CTX_SIZE 0x1000 /* one page area */
+#define AMDGPU_MES_GANG_CTX_SIZE 0x1000 /* one page area */
+
struct amdgpu_mes_funcs;
+enum amdgpu_mes_pipe {
+ AMDGPU_MES_SCHED_PIPE = 0,
+ AMDGPU_MES_KIQ_PIPE,
+ AMDGPU_MAX_MES_PIPES = 2,
+};
+
struct amdgpu_mes {
struct amdgpu_device *adev;
+ struct mutex mutex_hidden;
+
+ struct idr pasid_idr;
+ struct idr gang_id_idr;
+ struct idr queue_id_idr;
+ struct ida doorbell_ida;
+
+ spinlock_t queue_id_lock;
+
+ uint32_t sched_version;
+ uint32_t kiq_version;
+ uint32_t fw_version[AMDGPU_MAX_MES_PIPES];
+ bool enable_legacy_queue_map;
+
uint32_t total_max_queue;
- uint32_t doorbell_id_offset;
uint32_t max_doorbell_slices;
uint64_t default_process_quantum;
uint64_t default_gang_quantum;
- struct amdgpu_ring ring;
+ struct amdgpu_ring ring[AMDGPU_MAX_MES_PIPES];
+ spinlock_t ring_lock[AMDGPU_MAX_MES_PIPES];
- const struct firmware *fw;
+ const struct firmware *fw[AMDGPU_MAX_MES_PIPES];
/* mes ucode */
- struct amdgpu_bo *ucode_fw_obj;
- uint64_t ucode_fw_gpu_addr;
- uint32_t *ucode_fw_ptr;
- uint32_t ucode_fw_version;
- uint64_t uc_start_addr;
+ struct amdgpu_bo *ucode_fw_obj[AMDGPU_MAX_MES_PIPES];
+ uint64_t ucode_fw_gpu_addr[AMDGPU_MAX_MES_PIPES];
+ uint32_t *ucode_fw_ptr[AMDGPU_MAX_MES_PIPES];
+ uint64_t uc_start_addr[AMDGPU_MAX_MES_PIPES];
/* mes ucode data */
- struct amdgpu_bo *data_fw_obj;
- uint64_t data_fw_gpu_addr;
- uint32_t *data_fw_ptr;
- uint32_t data_fw_version;
- uint64_t data_start_addr;
+ struct amdgpu_bo *data_fw_obj[AMDGPU_MAX_MES_PIPES];
+ uint64_t data_fw_gpu_addr[AMDGPU_MAX_MES_PIPES];
+ uint32_t *data_fw_ptr[AMDGPU_MAX_MES_PIPES];
+ uint64_t data_start_addr[AMDGPU_MAX_MES_PIPES];
/* eop gpu obj */
- struct amdgpu_bo *eop_gpu_obj;
- uint64_t eop_gpu_addr;
+ struct amdgpu_bo *eop_gpu_obj[AMDGPU_MAX_MES_PIPES];
+ uint64_t eop_gpu_addr[AMDGPU_MAX_MES_PIPES];
- void *mqd_backup;
+ void *mqd_backup[AMDGPU_MAX_MES_PIPES];
+ struct amdgpu_irq_src irq[AMDGPU_MAX_MES_PIPES];
uint32_t vmid_mask_gfxhub;
uint32_t vmid_mask_mmhub;
- uint32_t compute_hqd_mask[AMDGPU_MES_MAX_COMPUTE_PIPES];
uint32_t gfx_hqd_mask[AMDGPU_MES_MAX_GFX_PIPES];
+ uint32_t compute_hqd_mask[AMDGPU_MES_MAX_COMPUTE_PIPES];
uint32_t sdma_hqd_mask[AMDGPU_MES_MAX_SDMA_PIPES];
- uint32_t agreegated_doorbells[AMDGPU_MES_PRIORITY_NUM_LEVELS];
- uint32_t sch_ctx_offs;
- uint64_t sch_ctx_gpu_addr;
- uint64_t *sch_ctx_ptr;
- uint32_t query_status_fence_offs;
- uint64_t query_status_fence_gpu_addr;
- uint64_t *query_status_fence_ptr;
+ uint32_t aggregated_doorbells[AMDGPU_MES_PRIORITY_NUM_LEVELS];
+ uint32_t sch_ctx_offs[AMDGPU_MAX_MES_PIPES];
+ uint64_t sch_ctx_gpu_addr[AMDGPU_MAX_MES_PIPES];
+ uint64_t *sch_ctx_ptr[AMDGPU_MAX_MES_PIPES];
+ uint32_t query_status_fence_offs[AMDGPU_MAX_MES_PIPES];
+ uint64_t query_status_fence_gpu_addr[AMDGPU_MAX_MES_PIPES];
+ uint64_t *query_status_fence_ptr[AMDGPU_MAX_MES_PIPES];
+
+ uint32_t saved_flags;
+
+ /* initialize kiq pipe */
+ int (*kiq_hw_init)(struct amdgpu_device *adev);
+ int (*kiq_hw_fini)(struct amdgpu_device *adev);
+
+ /* MES doorbells */
+ uint32_t db_start_dw_offset;
+ uint32_t num_mes_dbs;
+ unsigned long *doorbell_bitmap;
+
+ /* MES event log buffer */
+ uint32_t event_log_size;
+ struct amdgpu_bo *event_log_gpu_obj;
+ uint64_t event_log_gpu_addr;
+ void *event_log_cpu_addr;
/* ip specific functions */
const struct amdgpu_mes_funcs *funcs;
+
+ /* mes resource_1 bo*/
+ struct amdgpu_bo *resource_1[AMDGPU_MAX_MES_PIPES];
+ uint64_t resource_1_gpu_addr[AMDGPU_MAX_MES_PIPES];
+ void *resource_1_addr[AMDGPU_MAX_MES_PIPES];
+
+ int hung_queue_db_array_size;
+ int hung_queue_hqd_info_offset;
+ struct amdgpu_bo *hung_queue_db_array_gpu_obj;
+ uint64_t hung_queue_db_array_gpu_addr;
+ void *hung_queue_db_array_cpu_addr;
+};
+
+struct amdgpu_mes_gang {
+ int gang_id;
+ int priority;
+ int inprocess_gang_priority;
+ int global_priority_level;
+ struct list_head list;
+ struct amdgpu_mes_process *process;
+ struct amdgpu_bo *gang_ctx_bo;
+ uint64_t gang_ctx_gpu_addr;
+ void *gang_ctx_cpu_ptr;
+ uint64_t gang_quantum;
+ struct list_head queue_list;
+};
+
+struct amdgpu_mes_queue {
+ struct list_head list;
+ struct amdgpu_mes_gang *gang;
+ int queue_id;
+ uint64_t doorbell_off;
+ struct amdgpu_bo *mqd_obj;
+ void *mqd_cpu_ptr;
+ uint64_t mqd_gpu_addr;
+ uint64_t wptr_gpu_addr;
+ int queue_type;
+ int paging;
+ struct amdgpu_ring *ring;
+};
+
+struct amdgpu_mes_queue_properties {
+ int queue_type;
+ uint64_t hqd_base_gpu_addr;
+ uint64_t rptr_gpu_addr;
+ uint64_t wptr_gpu_addr;
+ uint64_t wptr_mc_addr;
+ uint32_t queue_size;
+ uint64_t eop_gpu_addr;
+ uint32_t hqd_pipe_priority;
+ uint32_t hqd_queue_priority;
+ bool paging;
+ struct amdgpu_ring *ring;
+ /* out */
+ uint64_t doorbell_off;
+};
+
+struct amdgpu_mes_gang_properties {
+ uint32_t priority;
+ uint32_t gang_quantum;
+ uint32_t inprocess_gang_priority;
+ uint32_t priority_level;
+ int global_priority_level;
};
struct mes_add_queue_input {
@@ -104,13 +221,44 @@ struct mes_add_queue_input {
uint32_t doorbell_offset;
uint64_t mqd_addr;
uint64_t wptr_addr;
+ uint64_t wptr_mc_addr;
uint32_t queue_type;
uint32_t paging;
+ uint32_t gws_base;
+ uint32_t gws_size;
+ uint64_t tba_addr;
+ uint64_t tma_addr;
+ uint32_t trap_en;
+ uint32_t skip_process_ctx_clear;
+ uint32_t is_kfd_process;
+ uint32_t is_aql_queue;
+ uint32_t queue_size;
+ uint32_t exclusively_scheduled;
};
struct mes_remove_queue_input {
uint32_t doorbell_offset;
uint64_t gang_context_addr;
+ bool remove_queue_after_reset;
+};
+
+struct mes_map_legacy_queue_input {
+ uint32_t queue_type;
+ uint32_t doorbell_offset;
+ uint32_t pipe_id;
+ uint32_t queue_id;
+ uint64_t mqd_addr;
+ uint64_t wptr_addr;
+};
+
+struct mes_unmap_legacy_queue_input {
+ enum amdgpu_unmap_queues_action action;
+ uint32_t queue_type;
+ uint32_t doorbell_offset;
+ uint32_t pipe_id;
+ uint32_t queue_id;
+ uint64_t trail_fence_addr;
+ uint64_t trail_fence_data;
};
struct mes_suspend_gang_input {
@@ -125,6 +273,95 @@ struct mes_resume_gang_input {
uint64_t gang_context_addr;
};
+struct mes_reset_queue_input {
+ uint32_t queue_type;
+ uint32_t doorbell_offset;
+ bool use_mmio;
+ uint32_t me_id;
+ uint32_t pipe_id;
+ uint32_t queue_id;
+ uint64_t mqd_addr;
+ uint64_t wptr_addr;
+ uint32_t vmid;
+ bool legacy_gfx;
+ bool is_kq;
+};
+
+struct mes_detect_and_reset_queue_input {
+ uint32_t queue_type;
+ bool detect_only;
+};
+
+struct mes_inv_tlbs_pasid_input {
+ uint32_t xcc_id;
+ uint16_t pasid;
+ uint8_t hub_id;
+ uint8_t flush_type;
+};
+
+enum mes_misc_opcode {
+ MES_MISC_OP_WRITE_REG,
+ MES_MISC_OP_READ_REG,
+ MES_MISC_OP_WRM_REG_WAIT,
+ MES_MISC_OP_WRM_REG_WR_WAIT,
+ MES_MISC_OP_SET_SHADER_DEBUGGER,
+ MES_MISC_OP_CHANGE_CONFIG,
+};
+
+struct mes_misc_op_input {
+ enum mes_misc_opcode op;
+
+ union {
+ struct {
+ uint32_t reg_offset;
+ uint64_t buffer_addr;
+ } read_reg;
+
+ struct {
+ uint32_t reg_offset;
+ uint32_t reg_value;
+ } write_reg;
+
+ struct {
+ uint32_t ref;
+ uint32_t mask;
+ uint32_t reg0;
+ uint32_t reg1;
+ } wrm_reg;
+
+ struct {
+ uint64_t process_context_addr;
+ union {
+ struct {
+ uint32_t single_memop : 1;
+ uint32_t single_alu_op : 1;
+ uint32_t reserved: 29;
+ uint32_t process_ctx_flush: 1;
+ };
+ uint32_t u32all;
+ } flags;
+ uint32_t spi_gdbg_per_vmid_cntl;
+ uint32_t tcp_watch_cntl[4];
+ uint32_t trap_en;
+ } set_shader_debugger;
+
+ struct {
+ union {
+ struct {
+ uint32_t limit_single_process : 1;
+ uint32_t enable_hws_logging_buffer : 1;
+ uint32_t reserved : 30;
+ };
+ uint32_t all;
+ } option;
+ struct {
+ uint32_t tdr_level;
+ uint32_t tdr_delay;
+ } tdr_config;
+ } change_config;
+ };
+};
+
struct amdgpu_mes_funcs {
int (*add_hw_queue)(struct amdgpu_mes *mes,
struct mes_add_queue_input *input);
@@ -132,11 +369,141 @@ struct amdgpu_mes_funcs {
int (*remove_hw_queue)(struct amdgpu_mes *mes,
struct mes_remove_queue_input *input);
+ int (*map_legacy_queue)(struct amdgpu_mes *mes,
+ struct mes_map_legacy_queue_input *input);
+
+ int (*unmap_legacy_queue)(struct amdgpu_mes *mes,
+ struct mes_unmap_legacy_queue_input *input);
+
int (*suspend_gang)(struct amdgpu_mes *mes,
struct mes_suspend_gang_input *input);
int (*resume_gang)(struct amdgpu_mes *mes,
struct mes_resume_gang_input *input);
+
+ int (*misc_op)(struct amdgpu_mes *mes,
+ struct mes_misc_op_input *input);
+
+ int (*reset_hw_queue)(struct amdgpu_mes *mes,
+ struct mes_reset_queue_input *input);
+
+ int (*detect_and_reset_hung_queues)(struct amdgpu_mes *mes,
+ struct mes_detect_and_reset_queue_input *input);
+
+
+ int (*invalidate_tlbs_pasid)(struct amdgpu_mes *mes,
+ struct mes_inv_tlbs_pasid_input *input);
};
+#define amdgpu_mes_kiq_hw_init(adev) (adev)->mes.kiq_hw_init((adev))
+#define amdgpu_mes_kiq_hw_fini(adev) (adev)->mes.kiq_hw_fini((adev))
+
+int amdgpu_mes_init_microcode(struct amdgpu_device *adev, int pipe);
+int amdgpu_mes_init(struct amdgpu_device *adev);
+void amdgpu_mes_fini(struct amdgpu_device *adev);
+
+int amdgpu_mes_suspend(struct amdgpu_device *adev);
+int amdgpu_mes_resume(struct amdgpu_device *adev);
+
+int amdgpu_mes_map_legacy_queue(struct amdgpu_device *adev,
+ struct amdgpu_ring *ring);
+int amdgpu_mes_unmap_legacy_queue(struct amdgpu_device *adev,
+ struct amdgpu_ring *ring,
+ enum amdgpu_unmap_queues_action action,
+ u64 gpu_addr, u64 seq);
+int amdgpu_mes_reset_legacy_queue(struct amdgpu_device *adev,
+ struct amdgpu_ring *ring,
+ unsigned int vmid,
+ bool use_mmio);
+
+int amdgpu_mes_get_hung_queue_db_array_size(struct amdgpu_device *adev);
+int amdgpu_mes_detect_and_reset_hung_queues(struct amdgpu_device *adev,
+ int queue_type,
+ bool detect_only,
+ unsigned int *hung_db_num,
+ u32 *hung_db_array);
+
+uint32_t amdgpu_mes_rreg(struct amdgpu_device *adev, uint32_t reg);
+int amdgpu_mes_wreg(struct amdgpu_device *adev,
+ uint32_t reg, uint32_t val);
+int amdgpu_mes_reg_write_reg_wait(struct amdgpu_device *adev,
+ uint32_t reg0, uint32_t reg1,
+ uint32_t ref, uint32_t mask);
+int amdgpu_mes_hdp_flush(struct amdgpu_device *adev);
+int amdgpu_mes_set_shader_debugger(struct amdgpu_device *adev,
+ uint64_t process_context_addr,
+ uint32_t spi_gdbg_per_vmid_cntl,
+ const uint32_t *tcp_watch_cntl,
+ uint32_t flags,
+ bool trap_en);
+int amdgpu_mes_flush_shader_debugger(struct amdgpu_device *adev,
+ uint64_t process_context_addr);
+
+uint32_t amdgpu_mes_get_aggregated_doorbell_index(struct amdgpu_device *adev,
+ enum amdgpu_mes_priority_level prio);
+
+int amdgpu_mes_doorbell_process_slice(struct amdgpu_device *adev);
+
+/*
+ * MES lock can be taken in MMU notifiers.
+ *
+ * A bit more detail about why to set no-FS reclaim with MES lock:
+ *
+ * The purpose of the MMU notifier is to stop GPU access to memory so
+ * that the Linux VM subsystem can move pages around safely. This is
+ * done by preempting user mode queues for the affected process. When
+ * MES is used, MES lock needs to be taken to preempt the queues.
+ *
+ * The MMU notifier callback entry point in the driver is
+ * amdgpu_mn_invalidate_range_start_hsa. The relevant call chain from
+ * there is:
+ * amdgpu_amdkfd_evict_userptr -> kgd2kfd_quiesce_mm ->
+ * kfd_process_evict_queues -> pdd->dev->dqm->ops.evict_process_queues
+ *
+ * The last part of the chain is a function pointer where we take the
+ * MES lock.
+ *
+ * The problem with taking locks in the MMU notifier is, that MMU
+ * notifiers can be called in reclaim-FS context. That's where the
+ * kernel frees up pages to make room for new page allocations under
+ * memory pressure. While we are running in reclaim-FS context, we must
+ * not trigger another memory reclaim operation because that would
+ * recursively reenter the reclaim code and cause a deadlock. The
+ * memalloc_nofs_save/restore calls guarantee that.
+ *
+ * In addition we also need to avoid lock dependencies on other locks taken
+ * under the MES lock, for example reservation locks. Here is a possible
+ * scenario of a deadlock:
+ * Thread A: takes and holds reservation lock | triggers reclaim-FS |
+ * MMU notifier | blocks trying to take MES lock
+ * Thread B: takes and holds MES lock | blocks trying to take reservation lock
+ *
+ * In this scenario Thread B gets involved in a deadlock even without
+ * triggering a reclaim-FS operation itself.
+ * To fix this and break the lock dependency chain you'd need to either:
+ * 1. protect reservation locks with memalloc_nofs_save/restore, or
+ * 2. avoid taking reservation locks under the MES lock.
+ *
+ * Reservation locks are taken all over the kernel in different subsystems, we
+ * have no control over them and their lock dependencies.So the only workable
+ * solution is to avoid taking other locks under the MES lock.
+ * As a result, make sure no reclaim-FS happens while holding this lock anywhere
+ * to prevent deadlocks when an MMU notifier runs in reclaim-FS context.
+ */
+static inline void amdgpu_mes_lock(struct amdgpu_mes *mes)
+{
+ mutex_lock(&mes->mutex_hidden);
+ mes->saved_flags = memalloc_noreclaim_save();
+}
+
+static inline void amdgpu_mes_unlock(struct amdgpu_mes *mes)
+{
+ memalloc_noreclaim_restore(mes->saved_flags);
+ mutex_unlock(&mes->mutex_hidden);
+}
+
+bool amdgpu_mes_suspend_resume_all_supported(struct amdgpu_device *adev);
+
+int amdgpu_mes_update_enforce_isolation(struct amdgpu_device *adev);
+
#endif /* __AMDGPU_MES_H__ */
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes_ctx.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes_ctx.h
new file mode 100644
index 000000000000..912a5be2ece6
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes_ctx.h
@@ -0,0 +1,122 @@
+/*
+ * Copyright 2019 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#ifndef __AMDGPU_MES_CTX_H__
+#define __AMDGPU_MES_CTX_H__
+
+#include "v10_structs.h"
+
+enum {
+ AMDGPU_MES_CTX_RPTR_OFFS = 0,
+ AMDGPU_MES_CTX_WPTR_OFFS,
+ AMDGPU_MES_CTX_FENCE_OFFS,
+ AMDGPU_MES_CTX_COND_EXE_OFFS,
+ AMDGPU_MES_CTX_TRAIL_FENCE_OFFS,
+ AMDGPU_MES_CTX_MAX_OFFS,
+};
+
+enum {
+ AMDGPU_MES_CTX_RING_OFFS = AMDGPU_MES_CTX_MAX_OFFS,
+ AMDGPU_MES_CTX_IB_OFFS,
+ AMDGPU_MES_CTX_PADDING_OFFS,
+};
+
+#define AMDGPU_MES_CTX_MAX_GFX_RINGS 1
+#define AMDGPU_MES_CTX_MAX_COMPUTE_RINGS 4
+#define AMDGPU_MES_CTX_MAX_SDMA_RINGS 2
+#define AMDGPU_MES_CTX_MAX_RINGS \
+ (AMDGPU_MES_CTX_MAX_GFX_RINGS + \
+ AMDGPU_MES_CTX_MAX_COMPUTE_RINGS + \
+ AMDGPU_MES_CTX_MAX_SDMA_RINGS)
+
+#define AMDGPU_CSA_SDMA_SIZE 64
+#define GFX10_MEC_HPD_SIZE 2048
+
+struct amdgpu_wb_slot {
+ uint32_t data[8];
+};
+
+struct amdgpu_mes_ctx_meta_data {
+ struct {
+ uint8_t ring[PAGE_SIZE * 4];
+
+ /* gfx csa */
+ struct v10_gfx_meta_data gfx_meta_data;
+
+ uint8_t gds_backup[64 * 1024];
+
+ struct amdgpu_wb_slot slots[AMDGPU_MES_CTX_MAX_OFFS];
+
+ /* only for ib test */
+ uint32_t ib[256] __aligned(256);
+
+ uint32_t padding[64];
+
+ } __aligned(PAGE_SIZE) gfx[AMDGPU_MES_CTX_MAX_GFX_RINGS];
+
+ struct {
+ uint8_t ring[PAGE_SIZE * 4];
+
+ uint8_t mec_hpd[GFX10_MEC_HPD_SIZE];
+
+ struct amdgpu_wb_slot slots[AMDGPU_MES_CTX_MAX_OFFS];
+
+ /* only for ib test */
+ uint32_t ib[256] __aligned(256);
+
+ uint32_t padding[64];
+
+ } __aligned(PAGE_SIZE) compute[AMDGPU_MES_CTX_MAX_COMPUTE_RINGS];
+
+ struct {
+ uint8_t ring[PAGE_SIZE * 4];
+
+ /* sdma csa for mcbp */
+ uint8_t sdma_meta_data[AMDGPU_CSA_SDMA_SIZE];
+
+ struct amdgpu_wb_slot slots[AMDGPU_MES_CTX_MAX_OFFS];
+
+ /* only for ib test */
+ uint32_t ib[256] __aligned(256);
+
+ uint32_t padding[64];
+
+ } __aligned(PAGE_SIZE) sdma[AMDGPU_MES_CTX_MAX_SDMA_RINGS];
+};
+
+struct amdgpu_mes_ctx_data {
+ struct amdgpu_bo *meta_data_obj;
+ uint64_t meta_data_gpu_addr;
+ uint64_t meta_data_mc_addr;
+ struct amdgpu_bo_va *meta_data_va;
+ void *meta_data_ptr;
+ uint32_t gang_ids[AMDGPU_HW_IP_DMA+1];
+};
+
+#define AMDGPU_FENCE_MES_QUEUE_FLAG 0x1000000u
+#define AMDGPU_FENCE_MES_QUEUE_ID_MASK (AMDGPU_FENCE_MES_QUEUE_FLAG - 1)
+
+#define AMDGPU_FENCE_MES_QUEUE_FLAG 0x1000000u
+#define AMDGPU_FENCE_MES_QUEUE_ID_MASK (AMDGPU_FENCE_MES_QUEUE_FLAG - 1)
+
+#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mmhub.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_mmhub.c
index ead3dc572ec5..0f6b1021fef3 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mmhub.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mmhub.c
@@ -1,5 +1,5 @@
/*
- * Copyright 2019 Advanced Micro Devices, Inc.
+ * Copyright (C) 2023 Advanced Micro Devices, Inc.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
@@ -8,62 +8,39 @@
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
- * The above copyright notice and this permission notice shall be included in
- * all copies or substantial portions of the Software.
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
*
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
- * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
- * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
- * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
- * OTHER DEALINGS IN THE SOFTWARE.
- *
+ * THE COPYRIGHT HOLDER(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
+ * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
-
#include "amdgpu.h"
#include "amdgpu_ras.h"
-int amdgpu_mmhub_ras_late_init(struct amdgpu_device *adev)
+int amdgpu_mmhub_ras_sw_init(struct amdgpu_device *adev)
{
- int r;
- struct ras_ih_if ih_info = {
- .cb = NULL,
- };
- struct ras_fs_if fs_info = {
- .sysfs_name = "mmhub_err_count",
- };
+ int err;
+ struct amdgpu_mmhub_ras *ras;
- if (!adev->mmhub.ras_if) {
- adev->mmhub.ras_if = kmalloc(sizeof(struct ras_common_if), GFP_KERNEL);
- if (!adev->mmhub.ras_if)
- return -ENOMEM;
- adev->mmhub.ras_if->block = AMDGPU_RAS_BLOCK__MMHUB;
- adev->mmhub.ras_if->type = AMDGPU_RAS_ERROR__MULTI_UNCORRECTABLE;
- adev->mmhub.ras_if->sub_block_index = 0;
- strcpy(adev->mmhub.ras_if->name, "mmhub");
- }
- ih_info.head = fs_info.head = *adev->mmhub.ras_if;
- r = amdgpu_ras_late_init(adev, adev->mmhub.ras_if,
- &fs_info, &ih_info);
- if (r || !amdgpu_ras_is_supported(adev, adev->mmhub.ras_if->block)) {
- kfree(adev->mmhub.ras_if);
- adev->mmhub.ras_if = NULL;
- }
+ if (!adev->mmhub.ras)
+ return 0;
- return r;
-}
+ ras = adev->mmhub.ras;
+ err = amdgpu_ras_register_ras_block(adev, &ras->ras_block);
+ if (err) {
+ dev_err(adev->dev, "Failed to register mmhub ras block!\n");
+ return err;
+ }
-void amdgpu_mmhub_ras_fini(struct amdgpu_device *adev)
-{
- if (amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__MMHUB) &&
- adev->mmhub.ras_if) {
- struct ras_common_if *ras_if = adev->mmhub.ras_if;
- struct ras_ih_if ih_info = {
- .cb = NULL,
- };
+ strcpy(ras->ras_block.ras_comm.name, "mmhub");
+ ras->ras_block.ras_comm.block = AMDGPU_RAS_BLOCK__MMHUB;
+ ras->ras_block.ras_comm.type = AMDGPU_RAS_ERROR__MULTI_UNCORRECTABLE;
+ adev->mmhub.ras_if = &ras->ras_block.ras_comm;
- amdgpu_ras_late_fini(adev, ras_if, &ih_info);
- kfree(ras_if);
- }
+ /* mmhub ras follows amdgpu_ras_block_late_init_default for late init */
+ return 0;
}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mmhub.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_mmhub.h
index 1ae9bdae7311..1ca9d4ed8063 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mmhub.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mmhub.h
@@ -21,13 +21,36 @@
#ifndef __AMDGPU_MMHUB_H__
#define __AMDGPU_MMHUB_H__
+enum amdgpu_mmhub_ras_memory_id {
+ AMDGPU_MMHUB_WGMI_PAGEMEM = 0,
+ AMDGPU_MMHUB_RGMI_PAGEMEM = 1,
+ AMDGPU_MMHUB_WDRAM_PAGEMEM = 2,
+ AMDGPU_MMHUB_RDRAM_PAGEMEM = 3,
+ AMDGPU_MMHUB_WIO_CMDMEM = 4,
+ AMDGPU_MMHUB_RIO_CMDMEM = 5,
+ AMDGPU_MMHUB_WGMI_CMDMEM = 6,
+ AMDGPU_MMHUB_RGMI_CMDMEM = 7,
+ AMDGPU_MMHUB_WDRAM_CMDMEM = 8,
+ AMDGPU_MMHUB_RDRAM_CMDMEM = 9,
+ AMDGPU_MMHUB_MAM_DMEM0 = 10,
+ AMDGPU_MMHUB_MAM_DMEM1 = 11,
+ AMDGPU_MMHUB_MAM_DMEM2 = 12,
+ AMDGPU_MMHUB_MAM_DMEM3 = 13,
+ AMDGPU_MMHUB_WRET_TAGMEM = 19,
+ AMDGPU_MMHUB_RRET_TAGMEM = 20,
+ AMDGPU_MMHUB_WIO_DATAMEM = 21,
+ AMDGPU_MMHUB_WGMI_DATAMEM = 22,
+ AMDGPU_MMHUB_WDRAM_DATAMEM = 23,
+ AMDGPU_MMHUB_MEMORY_BLOCK_LAST,
+};
+
+struct amdgpu_mmhub_ras {
+ struct amdgpu_ras_block_object ras_block;
+};
+
struct amdgpu_mmhub_funcs {
- void (*ras_init)(struct amdgpu_device *adev);
- int (*ras_late_init)(struct amdgpu_device *adev);
- void (*query_ras_error_count)(struct amdgpu_device *adev,
- void *ras_error_status);
- void (*reset_ras_error_count)(struct amdgpu_device *adev);
u64 (*get_fb_location)(struct amdgpu_device *adev);
+ u64 (*get_mc_fb_offset)(struct amdgpu_device *adev);
void (*init)(struct amdgpu_device *adev);
int (*gart_enable)(struct amdgpu_device *adev);
void (*set_fault_enable_default)(struct amdgpu_device *adev,
@@ -35,20 +58,20 @@ struct amdgpu_mmhub_funcs {
void (*gart_disable)(struct amdgpu_device *adev);
int (*set_clockgating)(struct amdgpu_device *adev,
enum amd_clockgating_state state);
- void (*get_clockgating)(struct amdgpu_device *adev, u32 *flags);
+ void (*get_clockgating)(struct amdgpu_device *adev, u64 *flags);
void (*setup_vm_pt_regs)(struct amdgpu_device *adev, uint32_t vmid,
uint64_t page_table_base);
void (*update_power_gating)(struct amdgpu_device *adev,
bool enable);
- void (*query_ras_error_status)(struct amdgpu_device *adev);
};
struct amdgpu_mmhub {
struct ras_common_if *ras_if;
const struct amdgpu_mmhub_funcs *funcs;
+ struct amdgpu_mmhub_ras *ras;
};
-int amdgpu_mmhub_ras_late_init(struct amdgpu_device *adev);
-void amdgpu_mmhub_ras_fini(struct amdgpu_device *adev);
+int amdgpu_mmhub_ras_sw_init(struct amdgpu_device *adev);
+
#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c
deleted file mode 100644
index 828b5167ff12..000000000000
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c
+++ /dev/null
@@ -1,157 +0,0 @@
-/*
- * Copyright 2014 Advanced Micro Devices, Inc.
- * All Rights Reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the
- * "Software"), to deal in the Software without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sub license, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
- * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
- * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
- * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
- * USE OR OTHER DEALINGS IN THE SOFTWARE.
- *
- * The above copyright notice and this permission notice (including the
- * next paragraph) shall be included in all copies or substantial portions
- * of the Software.
- *
- */
-/*
- * Authors:
- * Christian König <christian.koenig@amd.com>
- */
-
-/**
- * DOC: MMU Notifier
- *
- * For coherent userptr handling registers an MMU notifier to inform the driver
- * about updates on the page tables of a process.
- *
- * When somebody tries to invalidate the page tables we block the update until
- * all operations on the pages in question are completed, then those pages are
- * marked as accessed and also dirty if it wasn't a read only access.
- *
- * New command submissions using the userptrs in question are delayed until all
- * page table invalidation are completed and we once more see a coherent process
- * address space.
- */
-
-#include <linux/firmware.h>
-#include <linux/module.h>
-#include <drm/drm.h>
-
-#include "amdgpu.h"
-#include "amdgpu_amdkfd.h"
-
-/**
- * amdgpu_mn_invalidate_gfx - callback to notify about mm change
- *
- * @mni: the range (mm) is about to update
- * @range: details on the invalidation
- * @cur_seq: Value to pass to mmu_interval_set_seq()
- *
- * Block for operations on BOs to finish and mark pages as accessed and
- * potentially dirty.
- */
-static bool amdgpu_mn_invalidate_gfx(struct mmu_interval_notifier *mni,
- const struct mmu_notifier_range *range,
- unsigned long cur_seq)
-{
- struct amdgpu_bo *bo = container_of(mni, struct amdgpu_bo, notifier);
- struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev);
- long r;
-
- if (!mmu_notifier_range_blockable(range))
- return false;
-
- mutex_lock(&adev->notifier_lock);
-
- mmu_interval_set_seq(mni, cur_seq);
-
- r = dma_resv_wait_timeout_rcu(bo->tbo.base.resv, true, false,
- MAX_SCHEDULE_TIMEOUT);
- mutex_unlock(&adev->notifier_lock);
- if (r <= 0)
- DRM_ERROR("(%ld) failed to wait for user bo\n", r);
- return true;
-}
-
-static const struct mmu_interval_notifier_ops amdgpu_mn_gfx_ops = {
- .invalidate = amdgpu_mn_invalidate_gfx,
-};
-
-/**
- * amdgpu_mn_invalidate_hsa - callback to notify about mm change
- *
- * @mni: the range (mm) is about to update
- * @range: details on the invalidation
- * @cur_seq: Value to pass to mmu_interval_set_seq()
- *
- * We temporarily evict the BO attached to this range. This necessitates
- * evicting all user-mode queues of the process.
- */
-static bool amdgpu_mn_invalidate_hsa(struct mmu_interval_notifier *mni,
- const struct mmu_notifier_range *range,
- unsigned long cur_seq)
-{
- struct amdgpu_bo *bo = container_of(mni, struct amdgpu_bo, notifier);
- struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev);
-
- if (!mmu_notifier_range_blockable(range))
- return false;
-
- mutex_lock(&adev->notifier_lock);
-
- mmu_interval_set_seq(mni, cur_seq);
-
- amdgpu_amdkfd_evict_userptr(bo->kfd_bo, bo->notifier.mm);
- mutex_unlock(&adev->notifier_lock);
-
- return true;
-}
-
-static const struct mmu_interval_notifier_ops amdgpu_mn_hsa_ops = {
- .invalidate = amdgpu_mn_invalidate_hsa,
-};
-
-/**
- * amdgpu_mn_register - register a BO for notifier updates
- *
- * @bo: amdgpu buffer object
- * @addr: userptr addr we should monitor
- *
- * Registers a mmu_notifier for the given BO at the specified address.
- * Returns 0 on success, -ERRNO if anything goes wrong.
- */
-int amdgpu_mn_register(struct amdgpu_bo *bo, unsigned long addr)
-{
- if (bo->kfd_bo)
- return mmu_interval_notifier_insert(&bo->notifier, current->mm,
- addr, amdgpu_bo_size(bo),
- &amdgpu_mn_hsa_ops);
- return mmu_interval_notifier_insert(&bo->notifier, current->mm, addr,
- amdgpu_bo_size(bo),
- &amdgpu_mn_gfx_ops);
-}
-
-/**
- * amdgpu_mn_unregister - unregister a BO for notifier updates
- *
- * @bo: amdgpu buffer object
- *
- * Remove any registration of mmu notifier updates from the buffer object.
- */
-void amdgpu_mn_unregister(struct amdgpu_bo *bo)
-{
- if (!bo->notifier.mm)
- return;
- mmu_interval_notifier_remove(&bo->notifier);
- bo->notifier.mm = NULL;
-}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.h
deleted file mode 100644
index a292238f75eb..000000000000
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.h
+++ /dev/null
@@ -1,46 +0,0 @@
-/*
- * Copyright 2017 Advanced Micro Devices, Inc.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in
- * all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
- * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
- * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
- * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
- * OTHER DEALINGS IN THE SOFTWARE.
- *
- * Authors: Christian König
- */
-#ifndef __AMDGPU_MN_H__
-#define __AMDGPU_MN_H__
-
-#include <linux/types.h>
-#include <linux/hmm.h>
-#include <linux/rwsem.h>
-#include <linux/workqueue.h>
-#include <linux/interval_tree.h>
-
-#if defined(CONFIG_HMM_MIRROR)
-int amdgpu_mn_register(struct amdgpu_bo *bo, unsigned long addr);
-void amdgpu_mn_unregister(struct amdgpu_bo *bo);
-#else
-static inline int amdgpu_mn_register(struct amdgpu_bo *bo, unsigned long addr)
-{
- DRM_WARN_ONCE("HMM_MIRROR kernel config option is not enabled, "
- "add CONFIG_ZONE_DEVICE=y in config file to fix this\n");
- return -ENODEV;
-}
-static inline void amdgpu_mn_unregister(struct amdgpu_bo *bo) {}
-#endif
-
-#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h
index 319cb19e1b99..dc8d2f52c7d6 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h
@@ -30,21 +30,18 @@
#ifndef AMDGPU_MODE_H
#define AMDGPU_MODE_H
+#include <drm/display/drm_dp_helper.h>
#include <drm/drm_crtc.h>
-#include <drm/drm_edid.h>
#include <drm/drm_encoder.h>
-#include <drm/drm_dp_helper.h>
#include <drm/drm_fixed.h>
-#include <drm/drm_crtc_helper.h>
-#include <drm/drm_fb_helper.h>
-#include <drm/drm_plane_helper.h>
+#include <drm/drm_framebuffer.h>
#include <drm/drm_probe_helper.h>
#include <linux/i2c.h>
#include <linux/i2c-algo-bit.h>
#include <linux/hrtimer.h>
#include "amdgpu_irq.h"
-#include <drm/drm_dp_mst_helper.h>
+#include <drm/display/drm_dp_mst_helper.h>
#include "modules/inc/mod_freesync.h"
#include "amdgpu_dm_irq_params.h"
@@ -53,6 +50,8 @@ struct amdgpu_device;
struct amdgpu_encoder;
struct amdgpu_router;
struct amdgpu_hpd;
+struct edid;
+struct drm_edid;
#define to_amdgpu_crtc(x) container_of(x, struct amdgpu_crtc, base)
#define to_amdgpu_connector(x) container_of(x, struct amdgpu_connector, base)
@@ -232,8 +231,6 @@ struct amdgpu_i2c_chan {
struct mutex mutex;
};
-struct amdgpu_fbdev;
-
struct amdgpu_afmt {
bool enabled;
int offset;
@@ -304,18 +301,12 @@ struct amdgpu_framebuffer {
uint64_t tiling_flags;
bool tmz_surface;
+ bool gfx12_dcc;
/* caching for later use */
uint64_t address;
};
-struct amdgpu_fbdev {
- struct drm_fb_helper helper;
- struct amdgpu_framebuffer rfb;
- struct list_head fbdev_list;
- struct amdgpu_device *adev;
-};
-
struct amdgpu_mode_info {
struct atom_context *atom_context;
struct card_info *atom_card_info;
@@ -338,13 +329,10 @@ struct amdgpu_mode_info {
/* Adaptive Backlight Modulation (power feature) */
struct drm_property *abm_level_property;
/* hardcoded DFP edid from BIOS */
- struct edid *bios_hardcoded_edid;
- int bios_hardcoded_edid_size;
+ const struct drm_edid *bios_hardcoded_edid;
- /* pointer to fbdev info structure */
- struct amdgpu_fbdev *rfbdev;
/* firmware flags */
- u16 firmware_flags;
+ u32 firmware_flags;
/* pointer to backlight encoder */
struct amdgpu_encoder *bl_encoder;
u8 bl_level; /* saved backlight level */
@@ -352,22 +340,110 @@ struct amdgpu_mode_info {
int num_crtc; /* number of crtcs */
int num_hpd; /* number of hpd pins */
int num_dig; /* number of dig blocks */
+ bool gpu_vm_support; /* supports display from GTT */
int disp_priority;
const struct amdgpu_display_funcs *funcs;
const enum drm_plane_type *plane_type;
+
+ /* Driver-private color mgmt props */
+
+ /* @plane_degamma_lut_property: Plane property to set a degamma LUT to
+ * convert encoded values to light linear values before sampling or
+ * blending.
+ */
+ struct drm_property *plane_degamma_lut_property;
+ /* @plane_degamma_lut_size_property: Plane property to define the max
+ * size of degamma LUT as supported by the driver (read-only).
+ */
+ struct drm_property *plane_degamma_lut_size_property;
+ /**
+ * @plane_degamma_tf_property: Plane pre-defined transfer function to
+ * to go from scanout/encoded values to linear values.
+ */
+ struct drm_property *plane_degamma_tf_property;
+ /**
+ * @plane_hdr_mult_property:
+ */
+ struct drm_property *plane_hdr_mult_property;
+
+ struct drm_property *plane_ctm_property;
+ /**
+ * @shaper_lut_property: Plane property to set pre-blending shaper LUT
+ * that converts color content before 3D LUT. If
+ * plane_shaper_tf_property != Identity TF, AMD color module will
+ * combine the user LUT values with pre-defined TF into the LUT
+ * parameters to be programmed.
+ */
+ struct drm_property *plane_shaper_lut_property;
+ /**
+ * @shaper_lut_size_property: Plane property for the size of
+ * pre-blending shaper LUT as supported by the driver (read-only).
+ */
+ struct drm_property *plane_shaper_lut_size_property;
+ /**
+ * @plane_shaper_tf_property: Plane property to set a predefined
+ * transfer function for pre-blending shaper (before applying 3D LUT)
+ * with or without LUT. There is no shaper ROM, but we can use AMD
+ * color modules to program LUT parameters from predefined TF (or
+ * from a combination of pre-defined TF and the custom 1D LUT).
+ */
+ struct drm_property *plane_shaper_tf_property;
+ /**
+ * @plane_lut3d_property: Plane property for color transformation using
+ * a 3D LUT (pre-blending), a three-dimensional array where each
+ * element is an RGB triplet. Each dimension has the size of
+ * lut3d_size. The array contains samples from the approximated
+ * function. On AMD, values between samples are estimated by
+ * tetrahedral interpolation. The array is accessed with three indices,
+ * one for each input dimension (color channel), blue being the
+ * outermost dimension, red the innermost.
+ */
+ struct drm_property *plane_lut3d_property;
+ /**
+ * @plane_degamma_lut_size_property: Plane property to define the max
+ * size of 3D LUT as supported by the driver (read-only). The max size
+ * is the max size of one dimension and, therefore, the max number of
+ * entries for 3D LUT array is the 3D LUT size cubed;
+ */
+ struct drm_property *plane_lut3d_size_property;
+ /**
+ * @plane_blend_lut_property: Plane property for output gamma before
+ * blending. Userspace set a blend LUT to convert colors after 3D LUT
+ * conversion. It works as a post-3DLUT 1D LUT. With shaper LUT, they
+ * are sandwiching 3D LUT with two 1D LUT. If plane_blend_tf_property
+ * != Identity TF, AMD color module will combine the user LUT values
+ * with pre-defined TF into the LUT parameters to be programmed.
+ */
+ struct drm_property *plane_blend_lut_property;
+ /**
+ * @plane_blend_lut_size_property: Plane property to define the max
+ * size of blend LUT as supported by the driver (read-only).
+ */
+ struct drm_property *plane_blend_lut_size_property;
+ /**
+ * @plane_blend_tf_property: Plane property to set a predefined
+ * transfer function for pre-blending blend/out_gamma (after applying
+ * 3D LUT) with or without LUT. There is no blend ROM, but we can use
+ * AMD color modules to program LUT parameters from predefined TF (or
+ * from a combination of pre-defined TF and the custom 1D LUT).
+ */
+ struct drm_property *plane_blend_tf_property;
+ /* @regamma_tf_property: Transfer function for CRTC regamma
+ * (post-blending). Possible values are defined by `enum
+ * amdgpu_transfer_function`. There is no regamma ROM, but we can use
+ * AMD color modules to program LUT parameters from predefined TF (or
+ * from a combination of pre-defined TF and the custom 1D LUT).
+ */
+ struct drm_property *regamma_tf_property;
};
#define AMDGPU_MAX_BL_LEVEL 0xFF
-#if defined(CONFIG_BACKLIGHT_CLASS_DEVICE) || defined(CONFIG_BACKLIGHT_CLASS_DEVICE_MODULE)
-
struct amdgpu_backlight_privdata {
struct amdgpu_encoder *encoder;
uint8_t negative;
};
-#endif
-
struct amdgpu_atom_ss {
uint16_t percentage;
uint16_t percentage_divider;
@@ -422,8 +498,6 @@ struct amdgpu_crtc {
struct drm_connector *connector;
/* for dpm */
u32 line_time;
- u32 wm_low;
- u32 wm_high;
u32 lb_vblank_lead_lines;
struct drm_display_mode hw_mode;
/* for virtual dce */
@@ -432,6 +506,10 @@ struct amdgpu_crtc {
int otg_inst;
struct drm_pending_vblank_event *event;
+
+ bool wb_pending;
+ bool wb_enabled;
+ struct drm_writeback_connector *wb_conn;
};
struct amdgpu_encoder_atom_dig {
@@ -531,6 +609,7 @@ struct amdgpu_i2c_adapter {
struct i2c_adapter base;
struct ddc_service *ddc_service;
+ bool oem;
};
#define TO_DM_AUX(x) container_of((x), struct amdgpu_dm_dp_aux, aux)
@@ -549,6 +628,7 @@ struct amdgpu_connector {
void *con_priv;
bool dac_load_detect;
bool detected_by_load; /* if the connection status was determined by load */
+ bool detected_hpd_without_ddc; /* if an HPD signal was detected on DVI, but ddc probing failed */
uint16_t connector_object_id;
struct amdgpu_hpd hpd;
struct amdgpu_router router;
@@ -564,8 +644,8 @@ struct amdgpu_mst_connector {
struct drm_dp_mst_topology_mgr mst_mgr;
struct amdgpu_dm_dp_aux dm_dp_aux;
- struct drm_dp_mst_port *port;
- struct amdgpu_connector *mst_port;
+ struct drm_dp_mst_port *mst_output_port;
+ struct amdgpu_connector *mst_root;
bool is_mst_connector;
struct amdgpu_encoder *mst_encoder;
};
@@ -602,11 +682,6 @@ int amdgpu_display_get_crtc_scanoutpos(struct drm_device *dev,
int *hpos, ktime_t *stime, ktime_t *etime,
const struct drm_display_mode *mode);
-int amdgpu_display_framebuffer_init(struct drm_device *dev,
- struct amdgpu_framebuffer *rfb,
- const struct drm_mode_fb_cmd2 *mode_cmd,
- struct drm_gem_object *obj);
-
int amdgpufb_remove(struct drm_device *dev, struct drm_framebuffer *fb);
void amdgpu_enc_destroy(struct drm_encoder *encoder);
@@ -623,15 +698,6 @@ bool amdgpu_crtc_get_scanout_position(struct drm_crtc *crtc,
int *hpos, ktime_t *stime, ktime_t *etime,
const struct drm_display_mode *mode);
-/* fbdev layer */
-int amdgpu_fbdev_init(struct amdgpu_device *adev);
-void amdgpu_fbdev_fini(struct amdgpu_device *adev);
-void amdgpu_fbdev_set_suspend(struct amdgpu_device *adev, int state);
-int amdgpu_fbdev_total_size(struct amdgpu_device *adev);
-bool amdgpu_fbdev_robj_is_fb(struct amdgpu_device *adev, struct amdgpu_bo *robj);
-
-int amdgpu_align_pitch(struct amdgpu_device *adev, int width, int bpp, bool tiled);
-
/* amdgpu_display.c */
void amdgpu_display_print_display_setup(struct drm_device *dev);
int amdgpu_display_modeset_create_props(struct amdgpu_device *adev);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_nbio.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_nbio.c
index 6201a5f4b4fa..a974265837f0 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_nbio.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_nbio.c
@@ -22,62 +22,65 @@
#include "amdgpu.h"
#include "amdgpu_ras.h"
-int amdgpu_nbio_ras_late_init(struct amdgpu_device *adev)
+int amdgpu_nbio_ras_sw_init(struct amdgpu_device *adev)
{
- int r;
- struct ras_ih_if ih_info = {
- .cb = NULL,
- };
- struct ras_fs_if fs_info = {
- .sysfs_name = "pcie_bif_err_count",
- };
+ int err;
+ struct amdgpu_nbio_ras *ras;
+
+ if (!adev->nbio.ras)
+ return 0;
- if (!adev->nbio.ras_if) {
- adev->nbio.ras_if = kmalloc(sizeof(struct ras_common_if), GFP_KERNEL);
- if (!adev->nbio.ras_if)
- return -ENOMEM;
- adev->nbio.ras_if->block = AMDGPU_RAS_BLOCK__PCIE_BIF;
- adev->nbio.ras_if->type = AMDGPU_RAS_ERROR__MULTI_UNCORRECTABLE;
- adev->nbio.ras_if->sub_block_index = 0;
- strcpy(adev->nbio.ras_if->name, "pcie_bif");
+ ras = adev->nbio.ras;
+ err = amdgpu_ras_register_ras_block(adev, &ras->ras_block);
+ if (err) {
+ dev_err(adev->dev, "Failed to register pcie_bif ras block!\n");
+ return err;
}
- ih_info.head = fs_info.head = *adev->nbio.ras_if;
- r = amdgpu_ras_late_init(adev, adev->nbio.ras_if,
- &fs_info, &ih_info);
+
+ strcpy(ras->ras_block.ras_comm.name, "pcie_bif");
+ ras->ras_block.ras_comm.block = AMDGPU_RAS_BLOCK__PCIE_BIF;
+ ras->ras_block.ras_comm.type = AMDGPU_RAS_ERROR__MULTI_UNCORRECTABLE;
+ adev->nbio.ras_if = &ras->ras_block.ras_comm;
+
+ return 0;
+}
+
+u64 amdgpu_nbio_get_pcie_replay_count(struct amdgpu_device *adev)
+{
+ if (adev->nbio.funcs && adev->nbio.funcs->get_pcie_replay_count)
+ return adev->nbio.funcs->get_pcie_replay_count(adev);
+
+ return 0;
+}
+
+bool amdgpu_nbio_is_replay_cnt_supported(struct amdgpu_device *adev)
+{
+ if (amdgpu_sriov_vf(adev) || !adev->asic_funcs ||
+ !adev->asic_funcs->get_pcie_replay_count ||
+ (!adev->nbio.funcs || !adev->nbio.funcs->get_pcie_replay_count))
+ return false;
+
+ return true;
+}
+
+int amdgpu_nbio_ras_late_init(struct amdgpu_device *adev, struct ras_common_if *ras_block)
+{
+ int r;
+ r = amdgpu_ras_block_late_init(adev, ras_block);
if (r)
- goto free;
+ return r;
- if (amdgpu_ras_is_supported(adev, adev->nbio.ras_if->block)) {
+ if (amdgpu_ras_is_supported(adev, ras_block->block)) {
r = amdgpu_irq_get(adev, &adev->nbio.ras_controller_irq, 0);
if (r)
goto late_fini;
r = amdgpu_irq_get(adev, &adev->nbio.ras_err_event_athub_irq, 0);
if (r)
goto late_fini;
- } else {
- r = 0;
- goto free;
}
return 0;
late_fini:
- amdgpu_ras_late_fini(adev, adev->nbio.ras_if, &ih_info);
-free:
- kfree(adev->nbio.ras_if);
- adev->nbio.ras_if = NULL;
+ amdgpu_ras_block_late_fini(adev, ras_block);
return r;
}
-
-void amdgpu_nbio_ras_fini(struct amdgpu_device *adev)
-{
- if (amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__PCIE_BIF) &&
- adev->nbio.ras_if) {
- struct ras_common_if *ras_if = adev->nbio.ras_if;
- struct ras_ih_if ih_info = {
- .cb = NULL,
- };
-
- amdgpu_ras_late_fini(adev, ras_if, &ih_info);
- kfree(ras_if);
- }
-}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_nbio.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_nbio.h
index 7c11bce4514b..b528de6a01f6 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_nbio.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_nbio.h
@@ -47,12 +47,21 @@ struct nbio_hdp_flush_reg {
u32 ref_and_mask_sdma7;
};
+struct amdgpu_nbio_ras {
+ struct amdgpu_ras_block_object ras_block;
+ void (*handle_ras_controller_intr_no_bifring)(struct amdgpu_device *adev);
+ void (*handle_ras_err_event_athub_intr_no_bifring)(struct amdgpu_device *adev);
+ int (*init_ras_controller_interrupt)(struct amdgpu_device *adev);
+ int (*init_ras_err_event_athub_interrupt)(struct amdgpu_device *adev);
+};
+
struct amdgpu_nbio_funcs {
const struct nbio_hdp_flush_reg *hdp_flush_reg;
u32 (*get_hdp_flush_req_offset)(struct amdgpu_device *adev);
u32 (*get_hdp_flush_done_offset)(struct amdgpu_device *adev);
u32 (*get_pcie_index_offset)(struct amdgpu_device *adev);
u32 (*get_pcie_data_offset)(struct amdgpu_device *adev);
+ u32 (*get_pcie_index_hi_offset)(struct amdgpu_device *adev);
u32 (*get_pcie_port_index_offset)(struct amdgpu_device *adev);
u32 (*get_pcie_port_data_offset)(struct amdgpu_device *adev);
u32 (*get_rev_id)(struct amdgpu_device *adev);
@@ -60,8 +69,11 @@ struct amdgpu_nbio_funcs {
u32 (*get_memsize)(struct amdgpu_device *adev);
void (*sdma_doorbell_range)(struct amdgpu_device *adev, int instance,
bool use_doorbell, int doorbell_index, int doorbell_size);
+ void (*vpe_doorbell_range)(struct amdgpu_device *adev, int instance,
+ bool use_doorbell, int doorbell_index, int doorbell_size);
void (*vcn_doorbell_range)(struct amdgpu_device *adev, bool use_doorbell,
int doorbell_index, int instance);
+ void (*gc_doorbell_init)(struct amdgpu_device *adev);
void (*enable_doorbell_aperture)(struct amdgpu_device *adev,
bool enable);
void (*enable_doorbell_selfring_aperture)(struct amdgpu_device *adev,
@@ -75,20 +87,23 @@ struct amdgpu_nbio_funcs {
void (*update_medium_grain_light_sleep)(struct amdgpu_device *adev,
bool enable);
void (*get_clockgating_state)(struct amdgpu_device *adev,
- u32 *flags);
+ u64 *flags);
void (*ih_control)(struct amdgpu_device *adev);
void (*init_registers)(struct amdgpu_device *adev);
void (*remap_hdp_registers)(struct amdgpu_device *adev);
- void (*handle_ras_controller_intr_no_bifring)(struct amdgpu_device *adev);
- void (*handle_ras_err_event_athub_intr_no_bifring)(struct amdgpu_device *adev);
- int (*init_ras_controller_interrupt)(struct amdgpu_device *adev);
- int (*init_ras_err_event_athub_interrupt)(struct amdgpu_device *adev);
- void (*query_ras_error_count)(struct amdgpu_device *adev,
- void *ras_error_status);
- int (*ras_late_init)(struct amdgpu_device *adev);
void (*enable_aspm)(struct amdgpu_device *adev,
bool enable);
void (*program_aspm)(struct amdgpu_device *adev);
+ void (*apply_lc_spc_mode_wa)(struct amdgpu_device *adev);
+ void (*apply_l1_link_width_reconfig_wa)(struct amdgpu_device *adev);
+ void (*clear_doorbell_interrupt)(struct amdgpu_device *adev);
+ u32 (*get_rom_offset)(struct amdgpu_device *adev);
+ int (*get_compute_partition_mode)(struct amdgpu_device *adev);
+ u32 (*get_memory_partition_mode)(struct amdgpu_device *adev,
+ u32 *supp_modes);
+ bool (*is_nps_switch_requested)(struct amdgpu_device *adev);
+ u64 (*get_pcie_replay_count)(struct amdgpu_device *adev);
+ void (*set_reg_remap)(struct amdgpu_device *adev);
};
struct amdgpu_nbio {
@@ -97,8 +112,13 @@ struct amdgpu_nbio {
struct amdgpu_irq_src ras_err_event_athub_irq;
struct ras_common_if *ras_if;
const struct amdgpu_nbio_funcs *funcs;
+ struct amdgpu_nbio_ras *ras;
};
-int amdgpu_nbio_ras_late_init(struct amdgpu_device *adev);
-void amdgpu_nbio_ras_fini(struct amdgpu_device *adev);
+int amdgpu_nbio_ras_sw_init(struct amdgpu_device *adev);
+int amdgpu_nbio_ras_late_init(struct amdgpu_device *adev, struct ras_common_if *ras_block);
+u64 amdgpu_nbio_get_pcie_replay_count(struct amdgpu_device *adev);
+
+bool amdgpu_nbio_is_replay_cnt_supported(struct amdgpu_device *adev);
+
#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
index 4b29b8205442..e08f58de4b17 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
@@ -32,12 +32,17 @@
#include <linux/list.h>
#include <linux/slab.h>
#include <linux/dma-buf.h>
+#include <linux/export.h>
+#include <drm/drm_drv.h>
#include <drm/amdgpu_drm.h>
#include <drm/drm_cache.h>
#include "amdgpu.h"
#include "amdgpu_trace.h"
#include "amdgpu_amdkfd.h"
+#include "amdgpu_vram_mgr.h"
+#include "amdgpu_vm.h"
+#include "amdgpu_dma_buf.h"
/**
* DOC: amdgpu_object
@@ -52,50 +57,27 @@
*
*/
-/**
- * amdgpu_bo_subtract_pin_size - Remove BO from pin_size accounting
- *
- * @bo: &amdgpu_bo buffer object
- *
- * This function is called when a BO stops being pinned, and updates the
- * &amdgpu_device pin_size values accordingly.
- */
-static void amdgpu_bo_subtract_pin_size(struct amdgpu_bo *bo)
-{
- struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev);
-
- if (bo->tbo.mem.mem_type == TTM_PL_VRAM) {
- atomic64_sub(amdgpu_bo_size(bo), &adev->vram_pin_size);
- atomic64_sub(amdgpu_vram_mgr_bo_visible_size(bo),
- &adev->visible_pin_size);
- } else if (bo->tbo.mem.mem_type == TTM_PL_TT) {
- atomic64_sub(amdgpu_bo_size(bo), &adev->gart_pin_size);
- }
-}
-
static void amdgpu_bo_destroy(struct ttm_buffer_object *tbo)
{
- struct amdgpu_device *adev = amdgpu_ttm_adev(tbo->bdev);
struct amdgpu_bo *bo = ttm_to_amdgpu_bo(tbo);
- if (bo->tbo.pin_count > 0)
- amdgpu_bo_subtract_pin_size(bo);
-
amdgpu_bo_kunmap(bo);
- if (bo->tbo.base.import_attach)
+ if (drm_gem_is_imported(&bo->tbo.base))
drm_prime_gem_destroy(&bo->tbo.base, bo->tbo.sg);
drm_gem_object_release(&bo->tbo.base);
- /* in case amdgpu_device_recover_vram got NULL of bo->parent */
- if (!list_empty(&bo->shadow_list)) {
- mutex_lock(&adev->shadow_list_lock);
- list_del_init(&bo->shadow_list);
- mutex_unlock(&adev->shadow_list_lock);
- }
amdgpu_bo_unref(&bo->parent);
+ kvfree(bo);
+}
+
+static void amdgpu_bo_user_destroy(struct ttm_buffer_object *tbo)
+{
+ struct amdgpu_bo *bo = ttm_to_amdgpu_bo(tbo);
+ struct amdgpu_bo_user *ubo;
- kfree(bo->metadata);
- kfree(bo);
+ ubo = to_amdgpu_bo_user(bo);
+ kfree(ubo->metadata);
+ amdgpu_bo_destroy(tbo);
}
/**
@@ -110,8 +92,10 @@ static void amdgpu_bo_destroy(struct ttm_buffer_object *tbo)
*/
bool amdgpu_bo_is_amdgpu_bo(struct ttm_buffer_object *bo)
{
- if (bo->destroy == &amdgpu_bo_destroy)
+ if (bo->destroy == &amdgpu_bo_destroy ||
+ bo->destroy == &amdgpu_bo_user_destroy)
return true;
+
return false;
}
@@ -132,28 +116,65 @@ void amdgpu_bo_placement_from_domain(struct amdgpu_bo *abo, u32 domain)
u32 c = 0;
if (domain & AMDGPU_GEM_DOMAIN_VRAM) {
- unsigned visible_pfn = adev->gmc.visible_vram_size >> PAGE_SHIFT;
-
- places[c].fpfn = 0;
- places[c].lpfn = 0;
+ unsigned int visible_pfn = adev->gmc.visible_vram_size >> PAGE_SHIFT;
+ int8_t mem_id = KFD_XCP_MEM_ID(adev, abo->xcp_id);
+
+ if (adev->gmc.mem_partitions && mem_id >= 0) {
+ places[c].fpfn = adev->gmc.mem_partitions[mem_id].range.fpfn;
+ /*
+ * memory partition range lpfn is inclusive start + size - 1
+ * TTM place lpfn is exclusive start + size
+ */
+ places[c].lpfn = adev->gmc.mem_partitions[mem_id].range.lpfn + 1;
+ } else {
+ places[c].fpfn = 0;
+ places[c].lpfn = 0;
+ }
places[c].mem_type = TTM_PL_VRAM;
places[c].flags = 0;
if (flags & AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED)
- places[c].lpfn = visible_pfn;
+ places[c].lpfn = min_not_zero(places[c].lpfn, visible_pfn);
else
places[c].flags |= TTM_PL_FLAG_TOPDOWN;
- if (flags & AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS)
+ if (abo->tbo.type == ttm_bo_type_kernel &&
+ flags & AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS)
places[c].flags |= TTM_PL_FLAG_CONTIGUOUS;
+
+ c++;
+ }
+
+ if (domain & AMDGPU_GEM_DOMAIN_DOORBELL) {
+ places[c].fpfn = 0;
+ places[c].lpfn = 0;
+ places[c].mem_type = AMDGPU_PL_DOORBELL;
+ places[c].flags = 0;
+ c++;
+ }
+
+ if (domain & AMDGPU_GEM_DOMAIN_MMIO_REMAP) {
+ places[c].fpfn = 0;
+ places[c].lpfn = 0;
+ places[c].mem_type = AMDGPU_PL_MMIO_REMAP;
+ places[c].flags = 0;
c++;
}
if (domain & AMDGPU_GEM_DOMAIN_GTT) {
places[c].fpfn = 0;
places[c].lpfn = 0;
- places[c].mem_type = TTM_PL_TT;
+ places[c].mem_type =
+ abo->flags & AMDGPU_GEM_CREATE_PREEMPTIBLE ?
+ AMDGPU_PL_PREEMPT : TTM_PL_TT;
places[c].flags = 0;
+ /*
+ * When GTT is just an alternative to VRAM make sure that we
+ * only use it as fallback and still try to fill up VRAM first.
+ */
+ if (abo->tbo.resource && !(adev->flags & AMD_IS_APU) &&
+ domain & abo->preferred_domains & AMDGPU_GEM_DOMAIN_VRAM)
+ places[c].flags |= TTM_PL_FLAG_FALLBACK;
c++;
}
@@ -197,13 +218,10 @@ void amdgpu_bo_placement_from_domain(struct amdgpu_bo *abo, u32 domain)
c++;
}
- BUG_ON(c >= AMDGPU_BO_MAX_PLACEMENTS);
+ BUG_ON(c > AMDGPU_BO_MAX_PLACEMENTS);
placement->num_placement = c;
placement->placement = places;
-
- placement->num_busy_placement = c;
- placement->busy_placement = places;
}
/**
@@ -248,6 +266,7 @@ int amdgpu_bo_create_reserved(struct amdgpu_device *adev,
bp.flags |= AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS;
bp.type = ttm_bo_type_kernel;
bp.resv = NULL;
+ bp.bo_ptr_size = sizeof(struct amdgpu_bo);
if (!*bo_ptr) {
r = amdgpu_bo_create(adev, &bp, bo_ptr);
@@ -315,6 +334,9 @@ error_free:
*
* Allocates and pins a BO for kernel internal use.
*
+ * This function is exported to allow the V4L2 isp device
+ * external to drm device to create and access the kernel BO.
+ *
* Note: For bo_ptr new BO is only created if bo_ptr points to NULL.
*
* Returns:
@@ -340,22 +362,89 @@ int amdgpu_bo_create_kernel(struct amdgpu_device *adev,
}
/**
+ * amdgpu_bo_create_isp_user - create user BO for isp
+ *
+ * @adev: amdgpu device object
+ * @dma_buf: DMABUF handle for isp buffer
+ * @domain: where to place it
+ * @bo: used to initialize BOs in structures
+ * @gpu_addr: GPU addr of the pinned BO
+ *
+ * Imports isp DMABUF to allocate and pin a user BO for isp internal use. It does
+ * GART alloc to generate gpu_addr for BO to make it accessible through the
+ * GART aperture for ISP HW.
+ *
+ * This function is exported to allow the V4L2 isp device external to drm device
+ * to create and access the isp user BO.
+ *
+ * Returns:
+ * 0 on success, negative error code otherwise.
+ */
+int amdgpu_bo_create_isp_user(struct amdgpu_device *adev,
+ struct dma_buf *dma_buf, u32 domain, struct amdgpu_bo **bo,
+ u64 *gpu_addr)
+
+{
+ struct drm_gem_object *gem_obj;
+ int r;
+
+ gem_obj = amdgpu_gem_prime_import(&adev->ddev, dma_buf);
+ *bo = gem_to_amdgpu_bo(gem_obj);
+ if (!(*bo)) {
+ dev_err(adev->dev, "failed to get valid isp user bo\n");
+ return -EINVAL;
+ }
+
+ r = amdgpu_bo_reserve(*bo, false);
+ if (r) {
+ dev_err(adev->dev, "(%d) failed to reserve isp user bo\n", r);
+ return r;
+ }
+
+ r = amdgpu_bo_pin(*bo, domain);
+ if (r) {
+ dev_err(adev->dev, "(%d) isp user bo pin failed\n", r);
+ goto error_unreserve;
+ }
+
+ r = amdgpu_ttm_alloc_gart(&(*bo)->tbo);
+ if (r) {
+ dev_err(adev->dev, "%p bind failed\n", *bo);
+ goto error_unpin;
+ }
+
+ if (!WARN_ON(!gpu_addr))
+ *gpu_addr = amdgpu_bo_gpu_offset(*bo);
+
+ amdgpu_bo_unreserve(*bo);
+
+ return 0;
+
+error_unpin:
+ amdgpu_bo_unpin(*bo);
+error_unreserve:
+ amdgpu_bo_unreserve(*bo);
+ amdgpu_bo_unref(bo);
+
+ return r;
+}
+
+/**
* amdgpu_bo_create_kernel_at - create BO for kernel use at specific location
*
* @adev: amdgpu device object
* @offset: offset of the BO
* @size: size of the BO
- * @domain: where to place it
* @bo_ptr: used to initialize BOs in structures
* @cpu_addr: optional CPU address mapping
*
- * Creates a kernel BO at a specific offset in the address space of the domain.
+ * Creates a kernel BO at a specific offset in VRAM.
*
* Returns:
* 0 on success, negative error code otherwise.
*/
int amdgpu_bo_create_kernel_at(struct amdgpu_device *adev,
- uint64_t offset, uint64_t size, uint32_t domain,
+ uint64_t offset, uint64_t size,
struct amdgpu_bo **bo_ptr, void **cpu_addr)
{
struct ttm_operation_ctx ctx = { false, false };
@@ -365,8 +454,9 @@ int amdgpu_bo_create_kernel_at(struct amdgpu_device *adev,
offset &= PAGE_MASK;
size = ALIGN(size, PAGE_SIZE);
- r = amdgpu_bo_create_reserved(adev, size, PAGE_SIZE, domain, bo_ptr,
- NULL, cpu_addr);
+ r = amdgpu_bo_create_reserved(adev, size, PAGE_SIZE,
+ AMDGPU_GEM_DOMAIN_VRAM, bo_ptr, NULL,
+ cpu_addr);
if (r)
return r;
@@ -380,14 +470,14 @@ int amdgpu_bo_create_kernel_at(struct amdgpu_device *adev,
if (cpu_addr)
amdgpu_bo_kunmap(*bo_ptr);
- ttm_resource_free(&(*bo_ptr)->tbo, &(*bo_ptr)->tbo.mem);
+ ttm_resource_free(&(*bo_ptr)->tbo, &(*bo_ptr)->tbo.resource);
for (i = 0; i < (*bo_ptr)->placement.num_placement; ++i) {
(*bo_ptr)->placements[i].fpfn = offset >> PAGE_SHIFT;
(*bo_ptr)->placements[i].lpfn = (offset + size) >> PAGE_SHIFT;
}
r = ttm_bo_mem_space(&(*bo_ptr)->tbo, &(*bo_ptr)->placement,
- &(*bo_ptr)->tbo.mem, &ctx);
+ &(*bo_ptr)->tbo.resource, &ctx);
if (r)
goto error;
@@ -414,6 +504,9 @@ error:
* @cpu_addr: pointer to where the BO's CPU memory space address was stored
*
* unmaps and unpin a BO for kernel internal use.
+ *
+ * This function is exported to allow the V4L2 isp device
+ * external to drm device to free the kernel BO.
*/
void amdgpu_bo_free_kernel(struct amdgpu_bo **bo, u64 *gpu_addr,
void **cpu_addr)
@@ -421,6 +514,8 @@ void amdgpu_bo_free_kernel(struct amdgpu_bo **bo, u64 *gpu_addr,
if (*bo == NULL)
return;
+ WARN_ON(amdgpu_ttm_adev((*bo)->tbo.bdev)->in_suspend);
+
if (likely(amdgpu_bo_reserve(*bo, true) == 0)) {
if (cpu_addr)
amdgpu_bo_kunmap(*bo);
@@ -437,7 +532,29 @@ void amdgpu_bo_free_kernel(struct amdgpu_bo **bo, u64 *gpu_addr,
*cpu_addr = NULL;
}
-/* Validate bo size is bit bigger then the request domain */
+/**
+ * amdgpu_bo_free_isp_user - free BO for isp use
+ *
+ * @bo: amdgpu isp user BO to free
+ *
+ * unpin and unref BO for isp internal use.
+ *
+ * This function is exported to allow the V4L2 isp device
+ * external to drm device to free the isp user BO.
+ */
+void amdgpu_bo_free_isp_user(struct amdgpu_bo *bo)
+{
+ if (bo == NULL)
+ return;
+
+ if (amdgpu_bo_reserve(bo, true) == 0) {
+ amdgpu_bo_unpin(bo);
+ amdgpu_bo_unreserve(bo);
+ }
+ amdgpu_bo_unref(&bo);
+}
+
+/* Validate bo size is bit bigger than the request domain */
static bool amdgpu_bo_validate_size(struct amdgpu_device *adev,
unsigned long size, u32 domain)
{
@@ -445,33 +562,26 @@ static bool amdgpu_bo_validate_size(struct amdgpu_device *adev,
/*
* If GTT is part of requested domains the check must succeed to
- * allow fall back to GTT
+ * allow fall back to GTT.
*/
- if (domain & AMDGPU_GEM_DOMAIN_GTT) {
+ if (domain & AMDGPU_GEM_DOMAIN_GTT)
man = ttm_manager_type(&adev->mman.bdev, TTM_PL_TT);
-
- if (size < (man->size << PAGE_SHIFT))
- return true;
- else
- goto fail;
- }
-
- if (domain & AMDGPU_GEM_DOMAIN_VRAM) {
+ else if (domain & AMDGPU_GEM_DOMAIN_VRAM)
man = ttm_manager_type(&adev->mman.bdev, TTM_PL_VRAM);
+ else
+ return true;
- if (size < (man->size << PAGE_SHIFT))
- return true;
- else
- goto fail;
+ if (!man) {
+ if (domain & AMDGPU_GEM_DOMAIN_GTT)
+ WARN_ON_ONCE("GTT domain requested but GTT mem manager uninitialized");
+ return false;
}
+ /* TODO add more domains checks, such as AMDGPU_GEM_DOMAIN_CPU, _DOMAIN_DOORBELL */
+ if (size < man->size)
+ return true;
- /* TODO add more domains checks, such as AMDGPU_GEM_DOMAIN_CPU */
- return true;
-
-fail:
- DRM_DEBUG("BO size %lu > total memory in domain: %llu\n", size,
- man->size << PAGE_SHIFT);
+ DRM_DEBUG("BO size %lu > total memory in domain: %llu\n", size, man->size);
return false;
}
@@ -509,7 +619,18 @@ bool amdgpu_bo_support_uswc(u64 bo_flags)
#endif
}
-static int amdgpu_bo_do_create(struct amdgpu_device *adev,
+/**
+ * amdgpu_bo_create - create an &amdgpu_bo buffer object
+ * @adev: amdgpu device object
+ * @bp: parameters to be used for the buffer object
+ * @bo_ptr: pointer to the buffer object pointer
+ *
+ * Creates an &amdgpu_bo buffer object.
+ *
+ * Returns:
+ * 0 for success or a negative error code on failure.
+ */
+int amdgpu_bo_create(struct amdgpu_device *adev,
struct amdgpu_bo_param *bp,
struct amdgpu_bo **bo_ptr)
{
@@ -523,7 +644,6 @@ static int amdgpu_bo_do_create(struct amdgpu_device *adev,
};
struct amdgpu_bo *bo;
unsigned long page_align, size = bp->size;
- size_t acc_size;
int r;
/* Note that GDS/GWS/OA allocates 1 page per byte/resource. */
@@ -531,6 +651,7 @@ static int amdgpu_bo_do_create(struct amdgpu_device *adev,
/* GWS and OA don't need any alignment. */
page_align = bp->byte_align;
size <<= PAGE_SHIFT;
+
} else if (bp->domain & AMDGPU_GEM_DOMAIN_GDS) {
/* Both size and alignment must be a multiple of 4. */
page_align = ALIGN(bp->byte_align, 4);
@@ -544,26 +665,32 @@ static int amdgpu_bo_do_create(struct amdgpu_device *adev,
if (!amdgpu_bo_validate_size(adev, size, bp->domain))
return -ENOMEM;
- *bo_ptr = NULL;
-
- acc_size = ttm_bo_dma_acc_size(&adev->mman.bdev, size,
- sizeof(struct amdgpu_bo));
+ BUG_ON(bp->bo_ptr_size < sizeof(struct amdgpu_bo));
- bo = kzalloc(sizeof(struct amdgpu_bo), GFP_KERNEL);
+ *bo_ptr = NULL;
+ bo = kvzalloc(bp->bo_ptr_size, GFP_KERNEL);
if (bo == NULL)
return -ENOMEM;
drm_gem_private_object_init(adev_to_drm(adev), &bo->tbo.base, size);
- INIT_LIST_HEAD(&bo->shadow_list);
+ bo->tbo.base.funcs = &amdgpu_gem_object_funcs;
bo->vm_bo = NULL;
bo->preferred_domains = bp->preferred_domain ? bp->preferred_domain :
bp->domain;
bo->allowed_domains = bo->preferred_domains;
if (bp->type != ttm_bo_type_kernel &&
+ !(bp->flags & AMDGPU_GEM_CREATE_DISCARDABLE) &&
bo->allowed_domains == AMDGPU_GEM_DOMAIN_VRAM)
bo->allowed_domains |= AMDGPU_GEM_DOMAIN_GTT;
bo->flags = bp->flags;
+ if (adev->gmc.mem_partitions)
+ /* For GPUs with spatial partitioning, bo->xcp_id=-1 means any partition */
+ bo->xcp_id = bp->xcp_id_plus1 - 1;
+ else
+ /* For GPUs without spatial partitioning */
+ bo->xcp_id = 0;
+
if (!amdgpu_bo_support_uswc(bo->flags))
bo->flags &= ~AMDGPU_GEM_CREATE_CPU_GTT_USWC;
@@ -574,33 +701,36 @@ static int amdgpu_bo_do_create(struct amdgpu_device *adev,
else
amdgpu_bo_placement_from_domain(bo, bp->domain);
if (bp->type == ttm_bo_type_kernel)
+ bo->tbo.priority = 2;
+ else if (!(bp->flags & AMDGPU_GEM_CREATE_DISCARDABLE))
bo->tbo.priority = 1;
- r = ttm_bo_init_reserved(&adev->mman.bdev, &bo->tbo, size, bp->type,
- &bo->placement, page_align, &ctx, acc_size,
- NULL, bp->resv, &amdgpu_bo_destroy);
+ if (!bp->destroy)
+ bp->destroy = &amdgpu_bo_destroy;
+
+ r = ttm_bo_init_reserved(&adev->mman.bdev, &bo->tbo, bp->type,
+ &bo->placement, page_align, &ctx, NULL,
+ bp->resv, bp->destroy);
if (unlikely(r != 0))
return r;
if (!amdgpu_gmc_vram_full_visible(&adev->gmc) &&
- bo->tbo.mem.mem_type == TTM_PL_VRAM &&
- bo->tbo.mem.start < adev->gmc.visible_vram_size >> PAGE_SHIFT)
+ amdgpu_res_cpu_visible(adev, bo->tbo.resource))
amdgpu_cs_report_moved_bytes(adev, ctx.bytes_moved,
ctx.bytes_moved);
else
amdgpu_cs_report_moved_bytes(adev, ctx.bytes_moved, 0);
if (bp->flags & AMDGPU_GEM_CREATE_VRAM_CLEARED &&
- bo->tbo.mem.mem_type == TTM_PL_VRAM) {
+ bo->tbo.resource->mem_type == TTM_PL_VRAM) {
struct dma_fence *fence;
- r = amdgpu_fill_buffer(bo, 0, bo->tbo.base.resv, &fence);
+ r = amdgpu_ttm_clear_buffer(bo, bo->tbo.base.resv, &fence);
if (unlikely(r))
goto fail_unreserve;
- amdgpu_bo_fence(bo, fence, false);
- dma_fence_put(bo->tbo.moving);
- bo->tbo.moving = dma_fence_get(fence);
+ dma_resv_add_fence(bo->tbo.base.resv, fence,
+ DMA_RESV_USAGE_KERNEL);
dma_fence_put(fence);
}
if (!bp->resv)
@@ -622,141 +752,67 @@ fail_unreserve:
return r;
}
-static int amdgpu_bo_create_shadow(struct amdgpu_device *adev,
- unsigned long size,
- struct amdgpu_bo *bo)
-{
- struct amdgpu_bo_param bp;
- int r;
-
- if (bo->shadow)
- return 0;
-
- memset(&bp, 0, sizeof(bp));
- bp.size = size;
- bp.domain = AMDGPU_GEM_DOMAIN_GTT;
- bp.flags = AMDGPU_GEM_CREATE_CPU_GTT_USWC |
- AMDGPU_GEM_CREATE_SHADOW;
- bp.type = ttm_bo_type_kernel;
- bp.resv = bo->tbo.base.resv;
-
- r = amdgpu_bo_do_create(adev, &bp, &bo->shadow);
- if (!r) {
- bo->shadow->parent = amdgpu_bo_ref(bo);
- mutex_lock(&adev->shadow_list_lock);
- list_add_tail(&bo->shadow->shadow_list, &adev->shadow_list);
- mutex_unlock(&adev->shadow_list_lock);
- }
-
- return r;
-}
-
/**
- * amdgpu_bo_create - create an &amdgpu_bo buffer object
+ * amdgpu_bo_create_user - create an &amdgpu_bo_user buffer object
* @adev: amdgpu device object
* @bp: parameters to be used for the buffer object
- * @bo_ptr: pointer to the buffer object pointer
+ * @ubo_ptr: pointer to the buffer object pointer
*
- * Creates an &amdgpu_bo buffer object; and if requested, also creates a
- * shadow object.
- * Shadow object is used to backup the original buffer object, and is always
- * in GTT.
+ * Create a BO to be used by user application;
*
* Returns:
* 0 for success or a negative error code on failure.
*/
-int amdgpu_bo_create(struct amdgpu_device *adev,
- struct amdgpu_bo_param *bp,
- struct amdgpu_bo **bo_ptr)
+
+int amdgpu_bo_create_user(struct amdgpu_device *adev,
+ struct amdgpu_bo_param *bp,
+ struct amdgpu_bo_user **ubo_ptr)
{
- u64 flags = bp->flags;
+ struct amdgpu_bo *bo_ptr;
int r;
- bp->flags = bp->flags & ~AMDGPU_GEM_CREATE_SHADOW;
- r = amdgpu_bo_do_create(adev, bp, bo_ptr);
+ bp->bo_ptr_size = sizeof(struct amdgpu_bo_user);
+ bp->destroy = &amdgpu_bo_user_destroy;
+ r = amdgpu_bo_create(adev, bp, &bo_ptr);
if (r)
return r;
- if ((flags & AMDGPU_GEM_CREATE_SHADOW) && !(adev->flags & AMD_IS_APU)) {
- if (!bp->resv)
- WARN_ON(dma_resv_lock((*bo_ptr)->tbo.base.resv,
- NULL));
-
- r = amdgpu_bo_create_shadow(adev, bp->size, *bo_ptr);
-
- if (!bp->resv)
- dma_resv_unlock((*bo_ptr)->tbo.base.resv);
-
- if (r)
- amdgpu_bo_unref(bo_ptr);
- }
-
+ *ubo_ptr = to_amdgpu_bo_user(bo_ptr);
return r;
}
/**
- * amdgpu_bo_validate - validate an &amdgpu_bo buffer object
- * @bo: pointer to the buffer object
+ * amdgpu_bo_create_vm - create an &amdgpu_bo_vm buffer object
+ * @adev: amdgpu device object
+ * @bp: parameters to be used for the buffer object
+ * @vmbo_ptr: pointer to the buffer object pointer
*
- * Sets placement according to domain; and changes placement and caching
- * policy of the buffer object according to the placement.
- * This is used for validating shadow bos. It calls ttm_bo_validate() to
- * make sure the buffer is resident where it needs to be.
+ * Create a BO to be for GPUVM.
*
* Returns:
* 0 for success or a negative error code on failure.
*/
-int amdgpu_bo_validate(struct amdgpu_bo *bo)
+
+int amdgpu_bo_create_vm(struct amdgpu_device *adev,
+ struct amdgpu_bo_param *bp,
+ struct amdgpu_bo_vm **vmbo_ptr)
{
- struct ttm_operation_ctx ctx = { false, false };
- uint32_t domain;
+ struct amdgpu_bo *bo_ptr;
int r;
- if (bo->tbo.pin_count)
- return 0;
-
- domain = bo->preferred_domains;
-
-retry:
- amdgpu_bo_placement_from_domain(bo, domain);
- r = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx);
- if (unlikely(r == -ENOMEM) && domain != bo->allowed_domains) {
- domain = bo->allowed_domains;
- goto retry;
- }
+ /* bo_ptr_size will be determined by the caller and it depends on
+ * num of amdgpu_vm_pt entries.
+ */
+ BUG_ON(bp->bo_ptr_size < sizeof(struct amdgpu_bo_vm));
+ r = amdgpu_bo_create(adev, bp, &bo_ptr);
+ if (r)
+ return r;
+ *vmbo_ptr = to_amdgpu_bo_vm(bo_ptr);
return r;
}
/**
- * amdgpu_bo_restore_shadow - restore an &amdgpu_bo shadow
- *
- * @shadow: &amdgpu_bo shadow to be restored
- * @fence: dma_fence associated with the operation
- *
- * Copies a buffer object's shadow content back to the object.
- * This is used for recovering a buffer from its shadow in case of a gpu
- * reset where vram context may be lost.
- *
- * Returns:
- * 0 for success or a negative error code on failure.
- */
-int amdgpu_bo_restore_shadow(struct amdgpu_bo *shadow, struct dma_fence **fence)
-
-{
- struct amdgpu_device *adev = amdgpu_ttm_adev(shadow->tbo.bdev);
- struct amdgpu_ring *ring = adev->mman.buffer_funcs_ring;
- uint64_t shadow_addr, parent_addr;
-
- shadow_addr = amdgpu_bo_gpu_offset(shadow);
- parent_addr = amdgpu_bo_gpu_offset(shadow->parent);
-
- return amdgpu_copy_buffer(ring, shadow_addr, parent_addr,
- amdgpu_bo_size(shadow), NULL, fence,
- true, false, false);
-}
-
-/**
* amdgpu_bo_kmap - map an &amdgpu_bo buffer object
* @bo: &amdgpu_bo buffer object to be mapped
* @ptr: kernel virtual address to be returned
@@ -775,6 +831,11 @@ int amdgpu_bo_kmap(struct amdgpu_bo *bo, void **ptr)
if (bo->flags & AMDGPU_GEM_CREATE_NO_CPU_ACCESS)
return -EPERM;
+ r = dma_resv_wait_timeout(bo->tbo.base.resv, DMA_RESV_USAGE_KERNEL,
+ false, MAX_SCHEDULE_TIMEOUT);
+ if (r < 0)
+ return r;
+
kptr = amdgpu_bo_kptr(bo);
if (kptr) {
if (ptr)
@@ -782,12 +843,7 @@ int amdgpu_bo_kmap(struct amdgpu_bo *bo, void **ptr)
return 0;
}
- r = dma_resv_wait_timeout_rcu(bo->tbo.base.resv, false, false,
- MAX_SCHEDULE_TIMEOUT);
- if (r < 0)
- return r;
-
- r = ttm_bo_kmap(&bo->tbo, 0, bo->tbo.mem.num_pages, &bo->kmap);
+ r = ttm_bo_kmap(&bo->tbo, 0, PFN_UP(bo->tbo.base.size), &bo->kmap);
if (r)
return r;
@@ -839,7 +895,7 @@ struct amdgpu_bo *amdgpu_bo_ref(struct amdgpu_bo *bo)
if (bo == NULL)
return NULL;
- ttm_bo_get(&bo->tbo);
+ drm_gem_object_get(&bo->tbo.base);
return bo;
}
@@ -851,40 +907,30 @@ struct amdgpu_bo *amdgpu_bo_ref(struct amdgpu_bo *bo)
*/
void amdgpu_bo_unref(struct amdgpu_bo **bo)
{
- struct ttm_buffer_object *tbo;
-
if ((*bo) == NULL)
return;
- tbo = &((*bo)->tbo);
- ttm_bo_put(tbo);
+ drm_gem_object_put(&(*bo)->tbo.base);
*bo = NULL;
}
/**
- * amdgpu_bo_pin_restricted - pin an &amdgpu_bo buffer object
+ * amdgpu_bo_pin - pin an &amdgpu_bo buffer object
* @bo: &amdgpu_bo buffer object to be pinned
* @domain: domain to be pinned to
- * @min_offset: the start of requested address range
- * @max_offset: the end of requested address range
*
- * Pins the buffer object according to requested domain and address range. If
- * the memory is unbound gart memory, binds the pages into gart table. Adjusts
- * pin_count and pin_size accordingly.
+ * Pins the buffer object according to requested domain. If the memory is
+ * unbound gart memory, binds the pages into gart table. Adjusts pin_count and
+ * pin_size accordingly.
*
* Pinning means to lock pages in memory along with keeping them at a fixed
* offset. It is required when a buffer can not be moved, for example, when
* a display buffer is being scanned out.
*
- * Compared with amdgpu_bo_pin(), this function gives more flexibility on
- * where to pin a buffer if there are specific restrictions on where a buffer
- * must be located.
- *
* Returns:
* 0 for success or a negative error code on failure.
*/
-int amdgpu_bo_pin_restricted(struct amdgpu_bo *bo, u32 domain,
- u64 min_offset, u64 max_offset)
+int amdgpu_bo_pin(struct amdgpu_bo *bo, u32 domain)
{
struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev);
struct ttm_operation_ctx ctx = { false, false };
@@ -893,25 +939,21 @@ int amdgpu_bo_pin_restricted(struct amdgpu_bo *bo, u32 domain,
if (amdgpu_ttm_tt_get_usermm(bo->tbo.ttm))
return -EPERM;
- if (WARN_ON_ONCE(min_offset > max_offset))
- return -EINVAL;
+ /* Check domain to be pinned to against preferred domains */
+ if (bo->preferred_domains & domain)
+ domain = bo->preferred_domains & domain;
/* A shared bo cannot be migrated to VRAM */
- if (bo->prime_shared_count || bo->tbo.base.import_attach) {
+ if (drm_gem_is_imported(&bo->tbo.base)) {
if (domain & AMDGPU_GEM_DOMAIN_GTT)
domain = AMDGPU_GEM_DOMAIN_GTT;
else
return -EINVAL;
}
- /* This assumes only APU display buffers are pinned with (VRAM|GTT).
- * See function amdgpu_display_supported_domains()
- */
- domain = amdgpu_bo_get_preferred_pin_domain(adev, domain);
-
if (bo->tbo.pin_count) {
- uint32_t mem_type = bo->tbo.mem.mem_type;
- uint32_t mem_flags = bo->tbo.mem.placement;
+ uint32_t mem_type = bo->tbo.resource->mem_type;
+ uint32_t mem_flags = bo->tbo.resource->placement;
if (!(domain & amdgpu_mem_type_to_domain(mem_type)))
return -EINVAL;
@@ -922,18 +964,15 @@ int amdgpu_bo_pin_restricted(struct amdgpu_bo *bo, u32 domain,
return -EINVAL;
ttm_bo_pin(&bo->tbo);
-
- if (max_offset != 0) {
- u64 domain_start = amdgpu_ttm_domain_start(adev,
- mem_type);
- WARN_ON_ONCE(max_offset <
- (amdgpu_bo_gpu_offset(bo) - domain_start));
- }
-
return 0;
}
- if (bo->tbo.base.import_attach)
+ /* This assumes only APU display buffers are pinned with (VRAM|GTT).
+ * See function amdgpu_display_supported_domains()
+ */
+ domain = amdgpu_bo_get_preferred_domain(adev, domain);
+
+ if (drm_gem_is_imported(&bo->tbo.base))
dma_buf_pin(bo->tbo.base.import_attach);
/* force to pin into visible video ram */
@@ -941,16 +980,9 @@ int amdgpu_bo_pin_restricted(struct amdgpu_bo *bo, u32 domain,
bo->flags |= AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED;
amdgpu_bo_placement_from_domain(bo, domain);
for (i = 0; i < bo->placement.num_placement; i++) {
- unsigned fpfn, lpfn;
-
- fpfn = min_offset >> PAGE_SHIFT;
- lpfn = max_offset >> PAGE_SHIFT;
-
- if (fpfn > bo->placements[i].fpfn)
- bo->placements[i].fpfn = fpfn;
- if (!bo->placements[i].lpfn ||
- (lpfn && lpfn < bo->placements[i].lpfn))
- bo->placements[i].lpfn = lpfn;
+ if (bo->flags & AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS &&
+ bo->placements[i].mem_type == TTM_PL_VRAM)
+ bo->placements[i].flags |= TTM_PL_FLAG_CONTIGUOUS;
}
r = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx);
@@ -961,12 +993,11 @@ int amdgpu_bo_pin_restricted(struct amdgpu_bo *bo, u32 domain,
ttm_bo_pin(&bo->tbo);
- domain = amdgpu_mem_type_to_domain(bo->tbo.mem.mem_type);
- if (domain == AMDGPU_GEM_DOMAIN_VRAM) {
+ if (bo->tbo.resource->mem_type == TTM_PL_VRAM) {
atomic64_add(amdgpu_bo_size(bo), &adev->vram_pin_size);
atomic64_add(amdgpu_vram_mgr_bo_visible_size(bo),
&adev->visible_pin_size);
- } else if (domain == AMDGPU_GEM_DOMAIN_GTT) {
+ } else if (bo->tbo.resource->mem_type == TTM_PL_TT) {
atomic64_add(amdgpu_bo_size(bo), &adev->gart_pin_size);
}
@@ -975,24 +1006,6 @@ error:
}
/**
- * amdgpu_bo_pin - pin an &amdgpu_bo buffer object
- * @bo: &amdgpu_bo buffer object to be pinned
- * @domain: domain to be pinned to
- *
- * A simple wrapper to amdgpu_bo_pin_restricted().
- * Provides a simpler API for buffers that do not have any strict restrictions
- * on where a buffer must be located.
- *
- * Returns:
- * 0 for success or a negative error code on failure.
- */
-int amdgpu_bo_pin(struct amdgpu_bo *bo, u32 domain)
-{
- bo->flags |= AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS;
- return amdgpu_bo_pin_restricted(bo, domain, 0, 0);
-}
-
-/**
* amdgpu_bo_unpin - unpin an &amdgpu_bo buffer object
* @bo: &amdgpu_bo buffer object to be unpinned
*
@@ -1004,43 +1017,26 @@ int amdgpu_bo_pin(struct amdgpu_bo *bo, u32 domain)
*/
void amdgpu_bo_unpin(struct amdgpu_bo *bo)
{
+ struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev);
+
ttm_bo_unpin(&bo->tbo);
if (bo->tbo.pin_count)
return;
- amdgpu_bo_subtract_pin_size(bo);
-
- if (bo->tbo.base.import_attach)
+ if (drm_gem_is_imported(&bo->tbo.base))
dma_buf_unpin(bo->tbo.base.import_attach);
-}
-/**
- * amdgpu_bo_evict_vram - evict VRAM buffers
- * @adev: amdgpu device object
- *
- * Evicts all VRAM buffers on the lru list of the memory type.
- * Mainly used for evicting vram at suspend time.
- *
- * Returns:
- * 0 for success or a negative error code on failure.
- */
-int amdgpu_bo_evict_vram(struct amdgpu_device *adev)
-{
- struct ttm_resource_manager *man;
-
- /* late 2.6.33 fix IGP hibernate - we need pm ops to do this correct */
-#ifndef CONFIG_HIBERNATION
- if (adev->flags & AMD_IS_APU) {
- /* Useless to evict on IGP chips */
- return 0;
+ if (bo->tbo.resource->mem_type == TTM_PL_VRAM) {
+ atomic64_sub(amdgpu_bo_size(bo), &adev->vram_pin_size);
+ atomic64_sub(amdgpu_vram_mgr_bo_visible_size(bo),
+ &adev->visible_pin_size);
+ } else if (bo->tbo.resource->mem_type == TTM_PL_TT) {
+ atomic64_sub(amdgpu_bo_size(bo), &adev->gart_pin_size);
}
-#endif
- man = ttm_manager_type(&adev->mman.bdev, TTM_PL_VRAM);
- return ttm_resource_manager_evict_all(&adev->mman.bdev, man);
}
-static const char *amdgpu_vram_names[] = {
+static const char * const amdgpu_vram_names[] = {
"UNKNOWN",
"GDDR1",
"DDR2",
@@ -1051,7 +1047,10 @@ static const char *amdgpu_vram_names[] = {
"DDR3",
"DDR4",
"GDDR6",
- "DDR5"
+ "DDR5",
+ "LPDDR4",
+ "LPDDR5",
+ "HBM3E"
};
/**
@@ -1065,13 +1064,22 @@ static const char *amdgpu_vram_names[] = {
*/
int amdgpu_bo_init(struct amdgpu_device *adev)
{
- /* reserve PAT memory space to WC for VRAM */
- arch_io_reserve_memtype_wc(adev->gmc.aper_base,
- adev->gmc.aper_size);
+ /* On A+A platform, VRAM can be mapped as WB */
+ if (!adev->gmc.xgmi.connected_to_cpu && !adev->gmc.is_app_apu) {
+ /* reserve PAT memory space to WC for VRAM */
+ int r = arch_io_reserve_memtype_wc(adev->gmc.aper_base,
+ adev->gmc.aper_size);
+
+ if (r) {
+ DRM_ERROR("Unable to set WC memtype for the aperture base\n");
+ return r;
+ }
+
+ /* Add an MTRR for the VRAM */
+ adev->gmc.vram_mtrr = arch_phys_wc_add(adev->gmc.aper_base,
+ adev->gmc.aper_size);
+ }
- /* Add an MTRR for the VRAM */
- adev->gmc.vram_mtrr = arch_phys_wc_add(adev->gmc.aper_base,
- adev->gmc.aper_size);
DRM_INFO("Detected VRAM RAM=%lluM, BAR=%lluM\n",
adev->gmc.mc_vram_size >> 20,
(unsigned long long)adev->gmc.aper_size >> 20);
@@ -1088,28 +1096,17 @@ int amdgpu_bo_init(struct amdgpu_device *adev)
*/
void amdgpu_bo_fini(struct amdgpu_device *adev)
{
- amdgpu_ttm_fini(adev);
- arch_phys_wc_del(adev->gmc.vram_mtrr);
- arch_io_free_memtype_wc(adev->gmc.aper_base, adev->gmc.aper_size);
-}
+ int idx;
-/**
- * amdgpu_bo_fbdev_mmap - mmap fbdev memory
- * @bo: &amdgpu_bo buffer object
- * @vma: vma as input from the fbdev mmap method
- *
- * Calls ttm_fbdev_mmap() to mmap fbdev memory if it is backed by a bo.
- *
- * Returns:
- * 0 for success or a negative error code on failure.
- */
-int amdgpu_bo_fbdev_mmap(struct amdgpu_bo *bo,
- struct vm_area_struct *vma)
-{
- if (vma->vm_pgoff != 0)
- return -EACCES;
+ amdgpu_ttm_fini(adev);
- return ttm_bo_mmap_obj(vma, &bo->tbo);
+ if (drm_dev_enter(adev_to_drm(adev), &idx)) {
+ if (!adev->gmc.xgmi.connected_to_cpu && !adev->gmc.is_app_apu) {
+ arch_phys_wc_del(adev->gmc.vram_mtrr);
+ arch_io_free_memtype_wc(adev->gmc.aper_base, adev->gmc.aper_size);
+ }
+ drm_dev_exit(idx);
+ }
}
/**
@@ -1126,12 +1123,15 @@ int amdgpu_bo_fbdev_mmap(struct amdgpu_bo *bo,
int amdgpu_bo_set_tiling_flags(struct amdgpu_bo *bo, u64 tiling_flags)
{
struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev);
+ struct amdgpu_bo_user *ubo;
+ BUG_ON(bo->tbo.type == ttm_bo_type_kernel);
if (adev->family <= AMDGPU_FAMILY_CZ &&
AMDGPU_TILING_GET(tiling_flags, TILE_SPLIT) > 6)
return -EINVAL;
- bo->tiling_flags = tiling_flags;
+ ubo = to_amdgpu_bo_user(bo);
+ ubo->tiling_flags = tiling_flags;
return 0;
}
@@ -1145,10 +1145,14 @@ int amdgpu_bo_set_tiling_flags(struct amdgpu_bo *bo, u64 tiling_flags)
*/
void amdgpu_bo_get_tiling_flags(struct amdgpu_bo *bo, u64 *tiling_flags)
{
+ struct amdgpu_bo_user *ubo;
+
+ BUG_ON(bo->tbo.type == ttm_bo_type_kernel);
dma_resv_assert_held(bo->tbo.base.resv);
+ ubo = to_amdgpu_bo_user(bo);
if (tiling_flags)
- *tiling_flags = bo->tiling_flags;
+ *tiling_flags = ubo->tiling_flags;
}
/**
@@ -1164,16 +1168,19 @@ void amdgpu_bo_get_tiling_flags(struct amdgpu_bo *bo, u64 *tiling_flags)
* Returns:
* 0 for success or a negative error code on failure.
*/
-int amdgpu_bo_set_metadata (struct amdgpu_bo *bo, void *metadata,
- uint32_t metadata_size, uint64_t flags)
+int amdgpu_bo_set_metadata(struct amdgpu_bo *bo, void *metadata,
+ u32 metadata_size, uint64_t flags)
{
+ struct amdgpu_bo_user *ubo;
void *buffer;
+ BUG_ON(bo->tbo.type == ttm_bo_type_kernel);
+ ubo = to_amdgpu_bo_user(bo);
if (!metadata_size) {
- if (bo->metadata_size) {
- kfree(bo->metadata);
- bo->metadata = NULL;
- bo->metadata_size = 0;
+ if (ubo->metadata_size) {
+ kfree(ubo->metadata);
+ ubo->metadata = NULL;
+ ubo->metadata_size = 0;
}
return 0;
}
@@ -1185,10 +1192,10 @@ int amdgpu_bo_set_metadata (struct amdgpu_bo *bo, void *metadata,
if (buffer == NULL)
return -ENOMEM;
- kfree(bo->metadata);
- bo->metadata_flags = flags;
- bo->metadata = buffer;
- bo->metadata_size = metadata_size;
+ kfree(ubo->metadata);
+ ubo->metadata_flags = flags;
+ ubo->metadata = buffer;
+ ubo->metadata_size = metadata_size;
return 0;
}
@@ -1212,21 +1219,26 @@ int amdgpu_bo_get_metadata(struct amdgpu_bo *bo, void *buffer,
size_t buffer_size, uint32_t *metadata_size,
uint64_t *flags)
{
+ struct amdgpu_bo_user *ubo;
+
if (!buffer && !metadata_size)
return -EINVAL;
+ BUG_ON(bo->tbo.type == ttm_bo_type_kernel);
+ ubo = to_amdgpu_bo_user(bo);
+ if (metadata_size)
+ *metadata_size = ubo->metadata_size;
+
if (buffer) {
- if (buffer_size < bo->metadata_size)
+ if (buffer_size < ubo->metadata_size)
return -EINVAL;
- if (bo->metadata_size)
- memcpy(buffer, bo->metadata, bo->metadata_size);
+ if (ubo->metadata_size)
+ memcpy(buffer, ubo->metadata, ubo->metadata_size);
}
- if (metadata_size)
- *metadata_size = bo->metadata_size;
if (flags)
- *flags = bo->metadata_flags;
+ *flags = ubo->metadata_flags;
return 0;
}
@@ -1235,7 +1247,7 @@ int amdgpu_bo_get_metadata(struct amdgpu_bo *bo, void *buffer,
* amdgpu_bo_move_notify - notification about a memory move
* @bo: pointer to a buffer object
* @evict: if this move is evicting the buffer from the graphics address space
- * @new_mem: new information of the bufer object
+ * @new_mem: new resource for backing the BO
*
* Marks the corresponding &amdgpu_bo buffer object as invalid, also performs
* bookkeeping.
@@ -1245,32 +1257,24 @@ void amdgpu_bo_move_notify(struct ttm_buffer_object *bo,
bool evict,
struct ttm_resource *new_mem)
{
- struct amdgpu_device *adev = amdgpu_ttm_adev(bo->bdev);
+ struct ttm_resource *old_mem = bo->resource;
struct amdgpu_bo *abo;
- struct ttm_resource *old_mem = &bo->mem;
if (!amdgpu_bo_is_amdgpu_bo(bo))
return;
abo = ttm_to_amdgpu_bo(bo);
- amdgpu_vm_bo_invalidate(adev, abo, evict);
+ amdgpu_vm_bo_move(abo, new_mem, evict);
amdgpu_bo_kunmap(abo);
- if (abo->tbo.base.dma_buf && !abo->tbo.base.import_attach &&
- bo->mem.mem_type != TTM_PL_SYSTEM)
+ if (abo->tbo.base.dma_buf && !drm_gem_is_imported(&abo->tbo.base) &&
+ old_mem && old_mem->mem_type != TTM_PL_SYSTEM)
dma_buf_move_notify(abo->tbo.base.dma_buf);
- /* remember the eviction */
- if (evict)
- atomic64_inc(&adev->num_evictions);
-
- /* update statistics */
- if (!new_mem)
- return;
-
/* move_notify is called before move happens */
- trace_amdgpu_bo_move(abo, new_mem->mem_type, old_mem->mem_type);
+ trace_amdgpu_bo_move(abo, new_mem ? new_mem->mem_type : -1,
+ old_mem ? old_mem->mem_type : -1);
}
/**
@@ -1282,6 +1286,7 @@ void amdgpu_bo_move_notify(struct ttm_buffer_object *bo,
*/
void amdgpu_bo_release_notify(struct ttm_buffer_object *bo)
{
+ struct amdgpu_device *adev = amdgpu_ttm_adev(bo->bdev);
struct dma_fence *fence = NULL;
struct amdgpu_bo *abo;
int r;
@@ -1291,28 +1296,42 @@ void amdgpu_bo_release_notify(struct ttm_buffer_object *bo)
abo = ttm_to_amdgpu_bo(bo);
- if (abo->kfd_bo)
- amdgpu_amdkfd_unreserve_memory_limit(abo);
+ WARN_ON(abo->vm_bo);
- /* We only remove the fence if the resv has individualized. */
- WARN_ON_ONCE(bo->type == ttm_bo_type_kernel
- && bo->base.resv != &bo->base._resv);
- if (bo->base.resv == &bo->base._resv)
- amdgpu_amdkfd_remove_fence_on_pt_pd_bos(abo);
+ if (abo->kfd_bo)
+ amdgpu_amdkfd_release_notify(abo);
- if (bo->mem.mem_type != TTM_PL_VRAM || !bo->mem.mm_node ||
- !(abo->flags & AMDGPU_GEM_CREATE_VRAM_WIPE_ON_RELEASE))
+ /*
+ * We lock the private dma_resv object here and since the BO is about to
+ * be released nobody else should have a pointer to it.
+ * So when this locking here fails something is wrong with the reference
+ * counting.
+ */
+ if (WARN_ON_ONCE(!dma_resv_trylock(&bo->base._resv)))
return;
- dma_resv_lock(bo->base.resv, NULL);
+ amdgpu_amdkfd_remove_all_eviction_fences(abo);
- r = amdgpu_fill_buffer(abo, AMDGPU_POISON, bo->base.resv, &fence);
- if (!WARN_ON(r)) {
- amdgpu_bo_fence(abo, fence, false);
- dma_fence_put(fence);
- }
+ if (!bo->resource || bo->resource->mem_type != TTM_PL_VRAM ||
+ !(abo->flags & AMDGPU_GEM_CREATE_VRAM_WIPE_ON_RELEASE) ||
+ adev->in_suspend || drm_dev_is_unplugged(adev_to_drm(adev)))
+ goto out;
- dma_resv_unlock(bo->base.resv);
+ r = dma_resv_reserve_fences(&bo->base._resv, 1);
+ if (r)
+ goto out;
+
+ r = amdgpu_fill_buffer(abo, 0, &bo->base._resv, &fence, true,
+ AMDGPU_KERNEL_JOB_ID_CLEAR_ON_RELEASE);
+ if (WARN_ON(r))
+ goto out;
+
+ amdgpu_vram_mgr_set_cleared(bo->resource);
+ dma_resv_add_fence(&bo->base._resv, fence, DMA_RESV_USAGE_KERNEL);
+ dma_fence_put(fence);
+
+out:
+ dma_resv_unlock(&bo->base._resv);
}
/**
@@ -1331,18 +1350,12 @@ vm_fault_t amdgpu_bo_fault_reserve_notify(struct ttm_buffer_object *bo)
struct amdgpu_device *adev = amdgpu_ttm_adev(bo->bdev);
struct ttm_operation_ctx ctx = { false, false };
struct amdgpu_bo *abo = ttm_to_amdgpu_bo(bo);
- unsigned long offset, size;
int r;
/* Remember that this BO was accessed by the CPU */
abo->flags |= AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED;
- if (bo->mem.mem_type != TTM_PL_VRAM)
- return 0;
-
- size = bo->mem.num_pages << PAGE_SHIFT;
- offset = bo->mem.start << PAGE_SHIFT;
- if ((offset + size) <= adev->gmc.visible_vram_size)
+ if (amdgpu_res_cpu_visible(adev, bo->resource))
return 0;
/* Can't move a pinned BO to visible VRAM */
@@ -1355,8 +1368,7 @@ vm_fault_t amdgpu_bo_fault_reserve_notify(struct ttm_buffer_object *bo)
AMDGPU_GEM_DOMAIN_GTT);
/* Avoid costly evictions; only set GTT as a busy placement */
- abo->placement.num_busy_placement = 1;
- abo->placement.busy_placement = &abo->placements[1];
+ abo->placements[0].flags |= TTM_PL_FLAG_DESIRED;
r = ttm_bo_validate(bo, &abo->placement, &ctx);
if (unlikely(r == -EBUSY || r == -ERESTARTSYS))
@@ -1364,10 +1376,9 @@ vm_fault_t amdgpu_bo_fault_reserve_notify(struct ttm_buffer_object *bo)
else if (unlikely(r))
return VM_FAULT_SIGBUS;
- offset = bo->mem.start << PAGE_SHIFT;
/* this should never happen */
- if (bo->mem.mem_type == TTM_PL_VRAM &&
- (offset + size) > adev->gmc.visible_vram_size)
+ if (bo->resource->mem_type == TTM_PL_VRAM &&
+ !amdgpu_res_cpu_visible(adev, bo->resource))
return VM_FAULT_SIGBUS;
ttm_bo_move_to_lru_tail_unlocked(bo);
@@ -1386,11 +1397,17 @@ void amdgpu_bo_fence(struct amdgpu_bo *bo, struct dma_fence *fence,
bool shared)
{
struct dma_resv *resv = bo->tbo.base.resv;
+ int r;
- if (shared)
- dma_resv_add_shared_fence(resv, fence);
- else
- dma_resv_add_excl_fence(resv, fence);
+ r = dma_resv_reserve_fences(resv, 1);
+ if (r) {
+ /* As last resort on OOM we block for the fence */
+ dma_fence_wait(fence, false);
+ return;
+ }
+
+ dma_resv_add_fence(resv, fence, shared ? DMA_RESV_USAGE_READ :
+ DMA_RESV_USAGE_WRITE);
}
/**
@@ -1451,17 +1468,37 @@ int amdgpu_bo_sync_wait(struct amdgpu_bo *bo, void *owner, bool intr)
*/
u64 amdgpu_bo_gpu_offset(struct amdgpu_bo *bo)
{
- WARN_ON_ONCE(bo->tbo.mem.mem_type == TTM_PL_SYSTEM);
+ WARN_ON_ONCE(bo->tbo.resource->mem_type == TTM_PL_SYSTEM);
WARN_ON_ONCE(!dma_resv_is_locked(bo->tbo.base.resv) &&
!bo->tbo.pin_count && bo->tbo.type != ttm_bo_type_kernel);
- WARN_ON_ONCE(bo->tbo.mem.start == AMDGPU_BO_INVALID_OFFSET);
- WARN_ON_ONCE(bo->tbo.mem.mem_type == TTM_PL_VRAM &&
+ WARN_ON_ONCE(bo->tbo.resource->start == AMDGPU_BO_INVALID_OFFSET);
+ WARN_ON_ONCE(bo->tbo.resource->mem_type == TTM_PL_VRAM &&
!(bo->flags & AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS));
return amdgpu_bo_gpu_offset_no_check(bo);
}
/**
+ * amdgpu_bo_fb_aper_addr - return FB aperture GPU offset of the VRAM bo
+ * @bo: amdgpu VRAM buffer object for which we query the offset
+ *
+ * Returns:
+ * current FB aperture GPU offset of the object.
+ */
+u64 amdgpu_bo_fb_aper_addr(struct amdgpu_bo *bo)
+{
+ struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev);
+ uint64_t offset, fb_base;
+
+ WARN_ON_ONCE(bo->tbo.resource->mem_type != TTM_PL_VRAM);
+
+ fb_base = adev->gmc.fb_start;
+ fb_base += adev->gmc.xgmi.physical_node_id * adev->gmc.xgmi.node_segment_size;
+ offset = (bo->tbo.resource->start << PAGE_SHIFT) + fb_base;
+ return amdgpu_gmc_sign_extend(offset);
+}
+
+/**
* amdgpu_bo_gpu_offset_no_check - return GPU offset of bo
* @bo: amdgpu object for which we query the offset
*
@@ -1471,26 +1508,72 @@ u64 amdgpu_bo_gpu_offset(struct amdgpu_bo *bo)
u64 amdgpu_bo_gpu_offset_no_check(struct amdgpu_bo *bo)
{
struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev);
- uint64_t offset;
+ uint64_t offset = AMDGPU_BO_INVALID_OFFSET;
- offset = (bo->tbo.mem.start << PAGE_SHIFT) +
- amdgpu_ttm_domain_start(adev, bo->tbo.mem.mem_type);
+ if (bo->tbo.resource->mem_type == TTM_PL_TT)
+ offset = amdgpu_gmc_agp_addr(&bo->tbo);
+
+ if (offset == AMDGPU_BO_INVALID_OFFSET)
+ offset = (bo->tbo.resource->start << PAGE_SHIFT) +
+ amdgpu_ttm_domain_start(adev, bo->tbo.resource->mem_type);
return amdgpu_gmc_sign_extend(offset);
}
/**
- * amdgpu_bo_get_preferred_pin_domain - get preferred domain for scanout
+ * amdgpu_bo_mem_stats_placement - bo placement for memory accounting
+ * @bo: the buffer object we should look at
+ *
+ * BO can have multiple preferred placements, to avoid double counting we want
+ * to file it under a single placement for memory stats.
+ * Luckily, if we take the highest set bit in preferred_domains the result is
+ * quite sensible.
+ *
+ * Returns:
+ * Which of the placements should the BO be accounted under.
+ */
+uint32_t amdgpu_bo_mem_stats_placement(struct amdgpu_bo *bo)
+{
+ uint32_t domain = bo->preferred_domains & AMDGPU_GEM_DOMAIN_MASK;
+
+ if (!domain)
+ return TTM_PL_SYSTEM;
+
+ switch (rounddown_pow_of_two(domain)) {
+ case AMDGPU_GEM_DOMAIN_CPU:
+ return TTM_PL_SYSTEM;
+ case AMDGPU_GEM_DOMAIN_GTT:
+ return TTM_PL_TT;
+ case AMDGPU_GEM_DOMAIN_VRAM:
+ return TTM_PL_VRAM;
+ case AMDGPU_GEM_DOMAIN_GDS:
+ return AMDGPU_PL_GDS;
+ case AMDGPU_GEM_DOMAIN_GWS:
+ return AMDGPU_PL_GWS;
+ case AMDGPU_GEM_DOMAIN_OA:
+ return AMDGPU_PL_OA;
+ case AMDGPU_GEM_DOMAIN_DOORBELL:
+ return AMDGPU_PL_DOORBELL;
+ case AMDGPU_GEM_DOMAIN_MMIO_REMAP:
+ return AMDGPU_PL_MMIO_REMAP;
+ default:
+ return TTM_PL_SYSTEM;
+ }
+}
+
+/**
+ * amdgpu_bo_get_preferred_domain - get preferred domain
* @adev: amdgpu device object
* @domain: allowed :ref:`memory domains <amdgpu_memory_domains>`
*
* Returns:
- * Which of the allowed domains is preferred for pinning the BO for scanout.
+ * Which of the allowed domains is preferred for allocating the BO.
*/
-uint32_t amdgpu_bo_get_preferred_pin_domain(struct amdgpu_device *adev,
+uint32_t amdgpu_bo_get_preferred_domain(struct amdgpu_device *adev,
uint32_t domain)
{
- if (domain == (AMDGPU_GEM_DOMAIN_VRAM | AMDGPU_GEM_DOMAIN_GTT)) {
+ if ((domain == (AMDGPU_GEM_DOMAIN_VRAM | AMDGPU_GEM_DOMAIN_GTT)) &&
+ ((adev->asic_type == CHIP_CARRIZO) || (adev->asic_type == CHIP_STONEY))) {
domain = AMDGPU_GEM_DOMAIN_VRAM;
if (adev->gmc.real_vram_size <= AMDGPU_SG_THRESHOLD)
domain = AMDGPU_GEM_DOMAIN_GTT;
@@ -1520,25 +1603,54 @@ uint32_t amdgpu_bo_get_preferred_pin_domain(struct amdgpu_device *adev,
*/
u64 amdgpu_bo_print_info(int id, struct amdgpu_bo *bo, struct seq_file *m)
{
+ struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev);
struct dma_buf_attachment *attachment;
struct dma_buf *dma_buf;
- unsigned int domain;
const char *placement;
unsigned int pin_count;
u64 size;
- domain = amdgpu_mem_type_to_domain(bo->tbo.mem.mem_type);
- switch (domain) {
- case AMDGPU_GEM_DOMAIN_VRAM:
- placement = "VRAM";
- break;
- case AMDGPU_GEM_DOMAIN_GTT:
- placement = " GTT";
- break;
- case AMDGPU_GEM_DOMAIN_CPU:
- default:
- placement = " CPU";
- break;
+ if (dma_resv_trylock(bo->tbo.base.resv)) {
+ if (!bo->tbo.resource) {
+ placement = "NONE";
+ } else {
+ switch (bo->tbo.resource->mem_type) {
+ case TTM_PL_VRAM:
+ if (amdgpu_res_cpu_visible(adev, bo->tbo.resource))
+ placement = "VRAM VISIBLE";
+ else
+ placement = "VRAM";
+ break;
+ case TTM_PL_TT:
+ placement = "GTT";
+ break;
+ case AMDGPU_PL_GDS:
+ placement = "GDS";
+ break;
+ case AMDGPU_PL_GWS:
+ placement = "GWS";
+ break;
+ case AMDGPU_PL_OA:
+ placement = "OA";
+ break;
+ case AMDGPU_PL_PREEMPT:
+ placement = "PREEMPTIBLE";
+ break;
+ case AMDGPU_PL_DOORBELL:
+ placement = "DOORBELL";
+ break;
+ case AMDGPU_PL_MMIO_REMAP:
+ placement = "MMIO REMAP";
+ break;
+ case TTM_PL_SYSTEM:
+ default:
+ placement = "CPU";
+ break;
+ }
+ }
+ dma_resv_unlock(bo->tbo.base.resv);
+ } else {
+ placement = "UNKNOWN";
}
size = amdgpu_bo_size(bo);
@@ -1553,19 +1665,22 @@ u64 amdgpu_bo_print_info(int id, struct amdgpu_bo *bo, struct seq_file *m)
attachment = READ_ONCE(bo->tbo.base.import_attach);
if (attachment)
- seq_printf(m, " imported from %p", dma_buf);
+ seq_printf(m, " imported from ino:%lu", file_inode(dma_buf->file)->i_ino);
else if (dma_buf)
- seq_printf(m, " exported as %p", dma_buf);
+ seq_printf(m, " exported as ino:%lu", file_inode(dma_buf->file)->i_ino);
amdgpu_bo_print_flag(m, bo, CPU_ACCESS_REQUIRED);
amdgpu_bo_print_flag(m, bo, NO_CPU_ACCESS);
amdgpu_bo_print_flag(m, bo, CPU_GTT_USWC);
amdgpu_bo_print_flag(m, bo, VRAM_CLEARED);
- amdgpu_bo_print_flag(m, bo, SHADOW);
amdgpu_bo_print_flag(m, bo, VRAM_CONTIGUOUS);
amdgpu_bo_print_flag(m, bo, VM_ALWAYS_VALID);
amdgpu_bo_print_flag(m, bo, EXPLICIT_SYNC);
-
+ /* Add the gem obj resv fence dump*/
+ if (dma_resv_trylock(bo->tbo.base.resv)) {
+ dma_resv_describe(bo->tbo.base.resv, m);
+ dma_resv_unlock(bo->tbo.base.resv);
+ }
seq_puts(m, "\n");
return size;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h
index 9ac37569823f..52c2d1731aab 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h
@@ -30,6 +30,8 @@
#include <drm/amdgpu_drm.h>
#include "amdgpu.h"
+#include "amdgpu_res_cursor.h"
+
#ifdef CONFIG_MMU_NOTIFIER
#include <linux/mmu_notifier.h>
#endif
@@ -37,15 +39,25 @@
#define AMDGPU_BO_INVALID_OFFSET LONG_MAX
#define AMDGPU_BO_MAX_PLACEMENTS 3
+/* BO flag to indicate a KFD userptr BO */
+#define AMDGPU_AMDKFD_CREATE_USERPTR_BO (1ULL << 63)
+
+#define to_amdgpu_bo_user(abo) container_of((abo), struct amdgpu_bo_user, bo)
+#define to_amdgpu_bo_vm(abo) container_of((abo), struct amdgpu_bo_vm, bo)
+
struct amdgpu_bo_param {
unsigned long size;
int byte_align;
+ u32 bo_ptr_size;
u32 domain;
u32 preferred_domain;
u64 flags;
enum ttm_bo_type type;
bool no_wait_gpu;
- struct dma_resv *resv;
+ struct dma_resv *resv;
+ void (*destroy)(struct ttm_buffer_object *bo);
+ /* xcp partition number plus 1, 0 means any partition */
+ int8_t xcp_id_plus1;
};
/* bo virtual addresses in a vm */
@@ -57,7 +69,7 @@ struct amdgpu_bo_va_mapping {
uint64_t last;
uint64_t __subtree_last;
uint64_t offset;
- uint64_t flags;
+ uint32_t flags;
};
/* User space allocated BO in a VM */
@@ -78,6 +90,13 @@ struct amdgpu_bo_va {
bool cleared;
bool is_xgmi;
+
+ /*
+ * protected by vm reservation lock
+ * if non-zero, cannot unmap from GPU because user queues may still access it
+ */
+ unsigned int queue_refcount;
+ atomic_t userq_va_mapped;
};
struct amdgpu_bo {
@@ -89,27 +108,36 @@ struct amdgpu_bo {
struct ttm_buffer_object tbo;
struct ttm_bo_kmap_obj kmap;
u64 flags;
- u64 tiling_flags;
- u64 metadata_flags;
- void *metadata;
- u32 metadata_size;
- unsigned prime_shared_count;
/* per VM structure for page tables and with virtual addresses */
struct amdgpu_vm_bo_base *vm_bo;
/* Constant after initialization */
struct amdgpu_bo *parent;
- struct amdgpu_bo *shadow;
-
- struct amdgpu_mn *mn;
-
#ifdef CONFIG_MMU_NOTIFIER
struct mmu_interval_notifier notifier;
#endif
+ struct kgd_mem *kfd_bo;
- struct list_head shadow_list;
+ /*
+ * For GPUs with spatial partitioning, xcp partition number, -1 means
+ * any partition. For other ASICs without spatial partition, always 0
+ * for memory accounting.
+ */
+ int8_t xcp_id;
+};
- struct kgd_mem *kfd_bo;
+struct amdgpu_bo_user {
+ struct amdgpu_bo bo;
+ u64 tiling_flags;
+ u64 metadata_flags;
+ void *metadata;
+ u32 metadata_size;
+
+};
+
+struct amdgpu_bo_vm {
+ struct amdgpu_bo bo;
+ struct amdgpu_vm_bo_base entries[];
};
static inline struct amdgpu_bo *ttm_to_amdgpu_bo(struct ttm_buffer_object *tbo)
@@ -138,6 +166,10 @@ static inline unsigned amdgpu_mem_type_to_domain(u32 mem_type)
return AMDGPU_GEM_DOMAIN_GWS;
case AMDGPU_PL_OA:
return AMDGPU_GEM_DOMAIN_OA;
+ case AMDGPU_PL_DOORBELL:
+ return AMDGPU_GEM_DOMAIN_DOORBELL;
+ case AMDGPU_PL_MMIO_REMAP:
+ return AMDGPU_GEM_DOMAIN_MMIO_REMAP;
default:
break;
}
@@ -184,7 +216,7 @@ static inline unsigned amdgpu_bo_ngpu_pages(struct amdgpu_bo *bo)
static inline unsigned amdgpu_bo_gpu_page_alignment(struct amdgpu_bo *bo)
{
- return (bo->tbo.mem.page_alignment << PAGE_SHIFT) / AMDGPU_GPU_PAGE_SIZE;
+ return (bo->tbo.page_alignment << PAGE_SHIFT) / AMDGPU_GPU_PAGE_SIZE;
}
/**
@@ -199,27 +231,6 @@ static inline u64 amdgpu_bo_mmap_offset(struct amdgpu_bo *bo)
}
/**
- * amdgpu_bo_in_cpu_visible_vram - check if BO is (partly) in visible VRAM
- */
-static inline bool amdgpu_bo_in_cpu_visible_vram(struct amdgpu_bo *bo)
-{
- struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev);
- unsigned fpfn = adev->gmc.visible_vram_size >> PAGE_SHIFT;
- struct drm_mm_node *node = bo->tbo.mem.mm_node;
- unsigned long pages_left;
-
- if (bo->tbo.mem.mem_type != TTM_PL_VRAM)
- return false;
-
- for (pages_left = bo->tbo.mem.num_pages; pages_left;
- pages_left -= node->size, node++)
- if (node->start < fpfn)
- return true;
-
- return false;
-}
-
-/**
* amdgpu_bo_explicit_sync - return whether the bo is explicitly synced
*/
static inline bool amdgpu_bo_explicit_sync(struct amdgpu_bo *bo)
@@ -252,25 +263,31 @@ int amdgpu_bo_create_kernel(struct amdgpu_device *adev,
unsigned long size, int align,
u32 domain, struct amdgpu_bo **bo_ptr,
u64 *gpu_addr, void **cpu_addr);
+int amdgpu_bo_create_isp_user(struct amdgpu_device *adev,
+ struct dma_buf *dbuf, u32 domain,
+ struct amdgpu_bo **bo,
+ u64 *gpu_addr);
int amdgpu_bo_create_kernel_at(struct amdgpu_device *adev,
- uint64_t offset, uint64_t size, uint32_t domain,
+ uint64_t offset, uint64_t size,
struct amdgpu_bo **bo_ptr, void **cpu_addr);
+int amdgpu_bo_create_user(struct amdgpu_device *adev,
+ struct amdgpu_bo_param *bp,
+ struct amdgpu_bo_user **ubo_ptr);
+int amdgpu_bo_create_vm(struct amdgpu_device *adev,
+ struct amdgpu_bo_param *bp,
+ struct amdgpu_bo_vm **ubo_ptr);
void amdgpu_bo_free_kernel(struct amdgpu_bo **bo, u64 *gpu_addr,
void **cpu_addr);
+void amdgpu_bo_free_isp_user(struct amdgpu_bo *bo);
int amdgpu_bo_kmap(struct amdgpu_bo *bo, void **ptr);
void *amdgpu_bo_kptr(struct amdgpu_bo *bo);
void amdgpu_bo_kunmap(struct amdgpu_bo *bo);
struct amdgpu_bo *amdgpu_bo_ref(struct amdgpu_bo *bo);
void amdgpu_bo_unref(struct amdgpu_bo **bo);
int amdgpu_bo_pin(struct amdgpu_bo *bo, u32 domain);
-int amdgpu_bo_pin_restricted(struct amdgpu_bo *bo, u32 domain,
- u64 min_offset, u64 max_offset);
void amdgpu_bo_unpin(struct amdgpu_bo *bo);
-int amdgpu_bo_evict_vram(struct amdgpu_device *adev);
int amdgpu_bo_init(struct amdgpu_device *adev);
void amdgpu_bo_fini(struct amdgpu_device *adev);
-int amdgpu_bo_fbdev_mmap(struct amdgpu_bo *bo,
- struct vm_area_struct *vma);
int amdgpu_bo_set_tiling_flags(struct amdgpu_bo *bo, u64 tiling_flags);
void amdgpu_bo_get_tiling_flags(struct amdgpu_bo *bo, u64 *tiling_flags);
int amdgpu_bo_set_metadata (struct amdgpu_bo *bo, void *metadata,
@@ -290,25 +307,31 @@ int amdgpu_bo_sync_wait_resv(struct amdgpu_device *adev, struct dma_resv *resv,
bool intr);
int amdgpu_bo_sync_wait(struct amdgpu_bo *bo, void *owner, bool intr);
u64 amdgpu_bo_gpu_offset(struct amdgpu_bo *bo);
+u64 amdgpu_bo_fb_aper_addr(struct amdgpu_bo *bo);
u64 amdgpu_bo_gpu_offset_no_check(struct amdgpu_bo *bo);
-int amdgpu_bo_validate(struct amdgpu_bo *bo);
-int amdgpu_bo_restore_shadow(struct amdgpu_bo *shadow,
- struct dma_fence **fence);
-uint32_t amdgpu_bo_get_preferred_pin_domain(struct amdgpu_device *adev,
+uint32_t amdgpu_bo_mem_stats_placement(struct amdgpu_bo *bo);
+uint32_t amdgpu_bo_get_preferred_domain(struct amdgpu_device *adev,
uint32_t domain);
/*
* sub allocation
*/
+static inline struct amdgpu_sa_manager *
+to_amdgpu_sa_manager(struct drm_suballoc_manager *manager)
+{
+ return container_of(manager, struct amdgpu_sa_manager, base);
+}
-static inline uint64_t amdgpu_sa_bo_gpu_addr(struct amdgpu_sa_bo *sa_bo)
+static inline uint64_t amdgpu_sa_bo_gpu_addr(struct drm_suballoc *sa_bo)
{
- return sa_bo->manager->gpu_addr + sa_bo->soffset;
+ return to_amdgpu_sa_manager(sa_bo->manager)->gpu_addr +
+ drm_suballoc_soffset(sa_bo);
}
-static inline void * amdgpu_sa_bo_cpu_addr(struct amdgpu_sa_bo *sa_bo)
+static inline void *amdgpu_sa_bo_cpu_addr(struct drm_suballoc *sa_bo)
{
- return sa_bo->manager->cpu_ptr + sa_bo->soffset;
+ return to_amdgpu_sa_manager(sa_bo->manager)->cpu_ptr +
+ drm_suballoc_soffset(sa_bo);
}
int amdgpu_sa_bo_manager_init(struct amdgpu_device *adev,
@@ -319,17 +342,16 @@ void amdgpu_sa_bo_manager_fini(struct amdgpu_device *adev,
int amdgpu_sa_bo_manager_start(struct amdgpu_device *adev,
struct amdgpu_sa_manager *sa_manager);
int amdgpu_sa_bo_new(struct amdgpu_sa_manager *sa_manager,
- struct amdgpu_sa_bo **sa_bo,
- unsigned size, unsigned align);
-void amdgpu_sa_bo_free(struct amdgpu_device *adev,
- struct amdgpu_sa_bo **sa_bo,
- struct dma_fence *fence);
+ struct drm_suballoc **sa_bo,
+ unsigned int size);
+void amdgpu_sa_bo_free(struct drm_suballoc **sa_bo,
+ struct dma_fence *fence);
#if defined(CONFIG_DEBUG_FS)
void amdgpu_sa_bo_dump_debug_info(struct amdgpu_sa_manager *sa_manager,
struct seq_file *m);
u64 amdgpu_bo_print_info(int id, struct amdgpu_bo *bo, struct seq_file *m);
#endif
-int amdgpu_debugfs_sa_init(struct amdgpu_device *adev);
+void amdgpu_debugfs_sa_init(struct amdgpu_device *adev);
bool amdgpu_bo_support_uswc(u64 bo_flags);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_pll.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_pll.c
index f2e20666c9c1..675aa138ea11 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_pll.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_pll.c
@@ -69,6 +69,7 @@ static void amdgpu_pll_reduce_ratio(unsigned *nom, unsigned *den,
/**
* amdgpu_pll_get_fb_ref_div - feedback and ref divider calculation
*
+ * @adev: amdgpu_device pointer
* @nom: nominator
* @den: denominator
* @post_div: post divider
@@ -80,15 +81,20 @@ static void amdgpu_pll_reduce_ratio(unsigned *nom, unsigned *den,
* Calculate feedback and reference divider for a given post divider. Makes
* sure we stay within the limits.
*/
-static void amdgpu_pll_get_fb_ref_div(unsigned nom, unsigned den, unsigned post_div,
- unsigned fb_div_max, unsigned ref_div_max,
- unsigned *fb_div, unsigned *ref_div)
+static void amdgpu_pll_get_fb_ref_div(struct amdgpu_device *adev, unsigned int nom,
+ unsigned int den, unsigned int post_div,
+ unsigned int fb_div_max, unsigned int ref_div_max,
+ unsigned int *fb_div, unsigned int *ref_div)
{
+
/* limit reference * post divider to a maximum */
- ref_div_max = min(128 / post_div, ref_div_max);
+ if (adev->family == AMDGPU_FAMILY_SI)
+ ref_div_max = min(100 / post_div, ref_div_max);
+ else
+ ref_div_max = min(128 / post_div, ref_div_max);
/* get matching reference and feedback divider */
- *ref_div = min(max(DIV_ROUND_CLOSEST(den, post_div), 1u), ref_div_max);
+ *ref_div = clamp(DIV_ROUND_CLOSEST(den, post_div), 1u, ref_div_max);
*fb_div = DIV_ROUND_CLOSEST(nom * *ref_div * post_div, den);
/* limit fb divider to its maximum */
@@ -101,6 +107,7 @@ static void amdgpu_pll_get_fb_ref_div(unsigned nom, unsigned den, unsigned post_
/**
* amdgpu_pll_compute - compute PLL paramaters
*
+ * @adev: amdgpu_device pointer
* @pll: information about the PLL
* @freq: requested frequency
* @dot_clock_p: resulting pixel clock
@@ -112,7 +119,8 @@ static void amdgpu_pll_get_fb_ref_div(unsigned nom, unsigned den, unsigned post_
* Try to calculate the PLL parameters to generate the given frequency:
* dot_clock = (ref_freq * feedback_div) / (ref_div * post_div)
*/
-void amdgpu_pll_compute(struct amdgpu_pll *pll,
+void amdgpu_pll_compute(struct amdgpu_device *adev,
+ struct amdgpu_pll *pll,
u32 freq,
u32 *dot_clock_p,
u32 *fb_div_p,
@@ -199,7 +207,7 @@ void amdgpu_pll_compute(struct amdgpu_pll *pll,
for (post_div = post_div_min; post_div <= post_div_max; ++post_div) {
unsigned diff;
- amdgpu_pll_get_fb_ref_div(nom, den, post_div, fb_div_max,
+ amdgpu_pll_get_fb_ref_div(adev, nom, den, post_div, fb_div_max,
ref_div_max, &fb_div, &ref_div);
diff = abs(target_clock - (pll->reference_freq * fb_div) /
(ref_div * post_div));
@@ -214,7 +222,7 @@ void amdgpu_pll_compute(struct amdgpu_pll *pll,
post_div = post_div_best;
/* get the feedback and reference divider for the optimal value */
- amdgpu_pll_get_fb_ref_div(nom, den, post_div, fb_div_max, ref_div_max,
+ amdgpu_pll_get_fb_ref_div(adev, nom, den, post_div, fb_div_max, ref_div_max,
&fb_div, &ref_div);
/* reduce the numbers to a simpler ratio once more */
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_pll.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_pll.h
index db6136f68b82..44a583d6c9b4 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_pll.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_pll.h
@@ -24,7 +24,8 @@
#ifndef __AMDGPU_PLL_H__
#define __AMDGPU_PLL_H__
-void amdgpu_pll_compute(struct amdgpu_pll *pll,
+void amdgpu_pll_compute(struct amdgpu_device *adev,
+ struct amdgpu_pll *pll,
u32 freq,
u32 *dot_clock_p,
u32 *fb_div_p,
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_pmu.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_pmu.c
index 19c0a3655228..6e91ea1de5aa 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_pmu.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_pmu.c
@@ -233,6 +233,10 @@ static void amdgpu_perf_start(struct perf_event *event, int flags)
if (WARN_ON_ONCE(!(hwc->state & PERF_HES_STOPPED)))
return;
+ if ((!pe->adev->df.funcs) ||
+ (!pe->adev->df.funcs->pmc_start))
+ return;
+
WARN_ON_ONCE(!(hwc->state & PERF_HES_UPTODATE));
hwc->state = 0;
@@ -268,9 +272,12 @@ static void amdgpu_perf_read(struct perf_event *event)
pmu);
u64 count, prev;
- do {
- prev = local64_read(&hwc->prev_count);
+ if ((!pe->adev->df.funcs) ||
+ (!pe->adev->df.funcs->pmc_get_count))
+ return;
+ prev = local64_read(&hwc->prev_count);
+ do {
switch (hwc->config_base) {
case AMDGPU_PMU_EVENT_CONFIG_TYPE_DF:
case AMDGPU_PMU_EVENT_CONFIG_TYPE_XGMI:
@@ -281,7 +288,7 @@ static void amdgpu_perf_read(struct perf_event *event)
count = 0;
break;
}
- } while (local64_cmpxchg(&hwc->prev_count, prev, count) != prev);
+ } while (!local64_try_cmpxchg(&hwc->prev_count, &prev, count));
local64_add(count - prev, &event->count);
}
@@ -297,6 +304,10 @@ static void amdgpu_perf_stop(struct perf_event *event, int flags)
if (hwc->state & PERF_HES_UPTODATE)
return;
+ if ((!pe->adev->df.funcs) ||
+ (!pe->adev->df.funcs->pmc_stop))
+ return;
+
switch (hwc->config_base) {
case AMDGPU_PMU_EVENT_CONFIG_TYPE_DF:
case AMDGPU_PMU_EVENT_CONFIG_TYPE_XGMI:
@@ -326,6 +337,10 @@ static int amdgpu_perf_add(struct perf_event *event, int flags)
struct amdgpu_pmu_entry,
pmu);
+ if ((!pe->adev->df.funcs) ||
+ (!pe->adev->df.funcs->pmc_start))
+ return -EINVAL;
+
switch (pe->pmu_perf_type) {
case AMDGPU_PMU_PERF_TYPE_DF:
hwc->config_base = AMDGPU_PMU_EVENT_CONFIG_TYPE_DF;
@@ -371,6 +386,9 @@ static void amdgpu_perf_del(struct perf_event *event, int flags)
struct amdgpu_pmu_entry *pe = container_of(event->pmu,
struct amdgpu_pmu_entry,
pmu);
+ if ((!pe->adev->df.funcs) ||
+ (!pe->adev->df.funcs->pmc_stop))
+ return;
amdgpu_perf_stop(event, PERF_EF_UPDATE);
@@ -519,8 +537,10 @@ static int init_pmu_entry_by_type_and_add(struct amdgpu_pmu_entry *pmu_entry,
pmu_entry->pmu.attr_groups = kmemdup(attr_groups, sizeof(attr_groups),
GFP_KERNEL);
- if (!pmu_entry->pmu.attr_groups)
+ if (!pmu_entry->pmu.attr_groups) {
+ ret = -ENOMEM;
goto err_attr_group;
+ }
snprintf(pmu_name, PMU_NAME_SIZE, "%s_%d", pmu_entry->pmu_file_prefix,
adev_to_drm(pmu_entry->adev)->primary->index);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_preempt_mgr.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_preempt_mgr.c
new file mode 100644
index 000000000000..34b5e22b44e5
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_preempt_mgr.c
@@ -0,0 +1,145 @@
+// SPDX-License-Identifier: GPL-2.0 OR MIT
+/*
+ * Copyright 2016-2021 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: Christian König, Felix Kuehling
+ */
+
+#include "amdgpu.h"
+
+/**
+ * DOC: mem_info_preempt_used
+ *
+ * The amdgpu driver provides a sysfs API for reporting current total amount of
+ * used preemptible memory.
+ * The file mem_info_preempt_used is used for this, and returns the current
+ * used size of the preemptible block, in bytes
+ */
+static ssize_t mem_info_preempt_used_show(struct device *dev,
+ struct device_attribute *attr,
+ char *buf)
+{
+ struct drm_device *ddev = dev_get_drvdata(dev);
+ struct amdgpu_device *adev = drm_to_adev(ddev);
+ struct ttm_resource_manager *man = &adev->mman.preempt_mgr;
+
+ return sysfs_emit(buf, "%llu\n", ttm_resource_manager_usage(man));
+}
+
+static DEVICE_ATTR_RO(mem_info_preempt_used);
+
+/**
+ * amdgpu_preempt_mgr_new - allocate a new node
+ *
+ * @man: TTM memory type manager
+ * @tbo: TTM BO we need this range for
+ * @place: placement flags and restrictions
+ * @res: TTM memory object
+ *
+ * Dummy, just count the space used without allocating resources or any limit.
+ */
+static int amdgpu_preempt_mgr_new(struct ttm_resource_manager *man,
+ struct ttm_buffer_object *tbo,
+ const struct ttm_place *place,
+ struct ttm_resource **res)
+{
+ *res = kzalloc(sizeof(**res), GFP_KERNEL);
+ if (!*res)
+ return -ENOMEM;
+
+ ttm_resource_init(tbo, place, *res);
+ (*res)->start = AMDGPU_BO_INVALID_OFFSET;
+ return 0;
+}
+
+/**
+ * amdgpu_preempt_mgr_del - free ranges
+ *
+ * @man: TTM memory type manager
+ * @res: TTM memory object
+ *
+ * Free the allocated GTT again.
+ */
+static void amdgpu_preempt_mgr_del(struct ttm_resource_manager *man,
+ struct ttm_resource *res)
+{
+ ttm_resource_fini(man, res);
+ kfree(res);
+}
+
+static const struct ttm_resource_manager_func amdgpu_preempt_mgr_func = {
+ .alloc = amdgpu_preempt_mgr_new,
+ .free = amdgpu_preempt_mgr_del,
+};
+
+/**
+ * amdgpu_preempt_mgr_init - init PREEMPT manager and DRM MM
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * Allocate and initialize the GTT manager.
+ */
+int amdgpu_preempt_mgr_init(struct amdgpu_device *adev)
+{
+ struct ttm_resource_manager *man = &adev->mman.preempt_mgr;
+ int ret;
+
+ man->use_tt = true;
+ man->func = &amdgpu_preempt_mgr_func;
+
+ ttm_resource_manager_init(man, &adev->mman.bdev, (1 << 30));
+
+ ret = device_create_file(adev->dev, &dev_attr_mem_info_preempt_used);
+ if (ret) {
+ DRM_ERROR("Failed to create device file mem_info_preempt_used\n");
+ return ret;
+ }
+
+ ttm_set_driver_manager(&adev->mman.bdev, AMDGPU_PL_PREEMPT, man);
+ ttm_resource_manager_set_used(man, true);
+ return 0;
+}
+
+/**
+ * amdgpu_preempt_mgr_fini - free and destroy GTT manager
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * Destroy and free the GTT manager, returns -EBUSY if ranges are still
+ * allocated inside it.
+ */
+void amdgpu_preempt_mgr_fini(struct amdgpu_device *adev)
+{
+ struct ttm_resource_manager *man = &adev->mman.preempt_mgr;
+ int ret;
+
+ ttm_resource_manager_set_used(man, false);
+
+ ret = ttm_resource_manager_evict_all(&adev->mman.bdev, man);
+ if (ret)
+ return;
+
+ if (adev->dev->kobj.sd)
+ device_remove_file(adev->dev, &dev_attr_mem_info_preempt_used);
+
+ ttm_resource_manager_cleanup(man);
+ ttm_set_driver_manager(&adev->mman.bdev, AMDGPU_PL_PREEMPT, NULL);
+}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c
index 839917eb7bc3..0b10497d487c 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c
@@ -24,24 +24,58 @@
*/
#include <linux/firmware.h>
-#include <linux/dma-mapping.h>
+#include <drm/drm_drv.h>
#include "amdgpu.h"
#include "amdgpu_psp.h"
#include "amdgpu_ucode.h"
+#include "amdgpu_xgmi.h"
#include "soc15_common.h"
#include "psp_v3_1.h"
#include "psp_v10_0.h"
#include "psp_v11_0.h"
+#include "psp_v11_0_8.h"
#include "psp_v12_0.h"
+#include "psp_v13_0.h"
+#include "psp_v13_0_4.h"
+#include "psp_v14_0.h"
#include "amdgpu_ras.h"
#include "amdgpu_securedisplay.h"
+#include "amdgpu_atomfirmware.h"
-static int psp_sysfs_init(struct amdgpu_device *adev);
-static void psp_sysfs_fini(struct amdgpu_device *adev);
+#define AMD_VBIOS_FILE_MAX_SIZE_B (1024*1024*16)
static int psp_load_smu_fw(struct psp_context *psp);
+static int psp_rap_terminate(struct psp_context *psp);
+static int psp_securedisplay_terminate(struct psp_context *psp);
+
+static int psp_ring_init(struct psp_context *psp,
+ enum psp_ring_type ring_type)
+{
+ int ret = 0;
+ struct psp_ring *ring;
+ struct amdgpu_device *adev = psp->adev;
+
+ ring = &psp->km_ring;
+
+ ring->ring_type = ring_type;
+
+ /* allocate 4k Page of Local Frame Buffer memory for ring */
+ ring->ring_size = 0x1000;
+ ret = amdgpu_bo_create_kernel(adev, ring->ring_size, PAGE_SIZE,
+ AMDGPU_GEM_DOMAIN_VRAM |
+ AMDGPU_GEM_DOMAIN_GTT,
+ &adev->firmware.rbuf,
+ &ring->ring_mem_mc_addr,
+ (void **)&ring->ring_mem);
+ if (ret) {
+ ring->ring_size = 0;
+ return ret;
+ }
+
+ return 0;
+}
/*
* Due to DF Cstate management centralized to PMFW, the firmware
@@ -56,58 +90,174 @@ static int psp_load_smu_fw(struct psp_context *psp);
* - Load XGMI/RAS/HDCP/DTM TA if any
*
* This new sequence is required for
- * - Arcturus
- * - Navi12 and onwards
+ * - Arcturus and onwards
*/
static void psp_check_pmfw_centralized_cstate_management(struct psp_context *psp)
{
struct amdgpu_device *adev = psp->adev;
- psp->pmfw_centralized_cstate_management = false;
-
- if (amdgpu_sriov_vf(adev))
- return;
-
- if (adev->flags & AMD_IS_APU)
+ if (amdgpu_sriov_vf(adev)) {
+ psp->pmfw_centralized_cstate_management = false;
return;
+ }
- if ((adev->asic_type == CHIP_ARCTURUS) ||
- (adev->asic_type >= CHIP_NAVI12))
+ switch (amdgpu_ip_version(adev, MP0_HWIP, 0)) {
+ case IP_VERSION(11, 0, 0):
+ case IP_VERSION(11, 0, 4):
+ case IP_VERSION(11, 0, 5):
+ case IP_VERSION(11, 0, 7):
+ case IP_VERSION(11, 0, 9):
+ case IP_VERSION(11, 0, 11):
+ case IP_VERSION(11, 0, 12):
+ case IP_VERSION(11, 0, 13):
+ case IP_VERSION(13, 0, 0):
+ case IP_VERSION(13, 0, 2):
+ case IP_VERSION(13, 0, 7):
psp->pmfw_centralized_cstate_management = true;
+ break;
+ default:
+ psp->pmfw_centralized_cstate_management = false;
+ break;
+ }
}
-static int psp_early_init(void *handle)
+static int psp_init_sriov_microcode(struct psp_context *psp)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = psp->adev;
+ char ucode_prefix[30];
+ int ret = 0;
+
+ amdgpu_ucode_ip_version_decode(adev, MP0_HWIP, ucode_prefix, sizeof(ucode_prefix));
+
+ switch (amdgpu_ip_version(adev, MP0_HWIP, 0)) {
+ case IP_VERSION(9, 0, 0):
+ case IP_VERSION(11, 0, 7):
+ case IP_VERSION(11, 0, 9):
+ adev->virt.autoload_ucode_id = AMDGPU_UCODE_ID_CP_MEC2;
+ ret = psp_init_cap_microcode(psp, ucode_prefix);
+ break;
+ case IP_VERSION(13, 0, 2):
+ adev->virt.autoload_ucode_id = AMDGPU_UCODE_ID_CP_MEC2;
+ ret = psp_init_cap_microcode(psp, ucode_prefix);
+ ret &= psp_init_ta_microcode(psp, ucode_prefix);
+ break;
+ case IP_VERSION(13, 0, 0):
+ adev->virt.autoload_ucode_id = 0;
+ break;
+ case IP_VERSION(13, 0, 6):
+ case IP_VERSION(13, 0, 14):
+ ret = psp_init_cap_microcode(psp, ucode_prefix);
+ ret &= psp_init_ta_microcode(psp, ucode_prefix);
+ break;
+ case IP_VERSION(13, 0, 10):
+ adev->virt.autoload_ucode_id = AMDGPU_UCODE_ID_CP_MES1_DATA;
+ ret = psp_init_cap_microcode(psp, ucode_prefix);
+ break;
+ case IP_VERSION(13, 0, 12):
+ ret = psp_init_ta_microcode(psp, ucode_prefix);
+ break;
+ default:
+ return -EINVAL;
+ }
+ return ret;
+}
+
+static int psp_early_init(struct amdgpu_ip_block *ip_block)
+{
+ struct amdgpu_device *adev = ip_block->adev;
struct psp_context *psp = &adev->psp;
- switch (adev->asic_type) {
- case CHIP_VEGA10:
- case CHIP_VEGA12:
+ psp->autoload_supported = true;
+ psp->boot_time_tmr = true;
+
+ switch (amdgpu_ip_version(adev, MP0_HWIP, 0)) {
+ case IP_VERSION(9, 0, 0):
psp_v3_1_set_psp_funcs(psp);
psp->autoload_supported = false;
+ psp->boot_time_tmr = false;
break;
- case CHIP_RAVEN:
+ case IP_VERSION(10, 0, 0):
+ case IP_VERSION(10, 0, 1):
psp_v10_0_set_psp_funcs(psp);
psp->autoload_supported = false;
+ psp->boot_time_tmr = false;
break;
- case CHIP_VEGA20:
- case CHIP_ARCTURUS:
+ case IP_VERSION(11, 0, 2):
+ case IP_VERSION(11, 0, 4):
psp_v11_0_set_psp_funcs(psp);
psp->autoload_supported = false;
+ psp->boot_time_tmr = false;
break;
- case CHIP_NAVI10:
- case CHIP_NAVI14:
- case CHIP_NAVI12:
- case CHIP_SIENNA_CICHLID:
- case CHIP_NAVY_FLOUNDER:
- case CHIP_VANGOGH:
- case CHIP_DIMGREY_CAVEFISH:
+ case IP_VERSION(11, 0, 0):
+ case IP_VERSION(11, 0, 7):
+ adev->psp.sup_pd_fw_up = !amdgpu_sriov_vf(adev);
+ fallthrough;
+ case IP_VERSION(11, 0, 5):
+ case IP_VERSION(11, 0, 9):
+ case IP_VERSION(11, 0, 11):
+ case IP_VERSION(11, 5, 0):
+ case IP_VERSION(11, 5, 2):
+ case IP_VERSION(11, 0, 12):
+ case IP_VERSION(11, 0, 13):
psp_v11_0_set_psp_funcs(psp);
- psp->autoload_supported = true;
+ psp->boot_time_tmr = false;
break;
- case CHIP_RENOIR:
+ case IP_VERSION(11, 0, 3):
+ case IP_VERSION(12, 0, 1):
psp_v12_0_set_psp_funcs(psp);
+ psp->autoload_supported = false;
+ psp->boot_time_tmr = false;
+ break;
+ case IP_VERSION(13, 0, 2):
+ psp->boot_time_tmr = false;
+ fallthrough;
+ case IP_VERSION(13, 0, 6):
+ case IP_VERSION(13, 0, 14):
+ psp_v13_0_set_psp_funcs(psp);
+ psp->autoload_supported = false;
+ break;
+ case IP_VERSION(13, 0, 12):
+ psp_v13_0_set_psp_funcs(psp);
+ psp->autoload_supported = false;
+ adev->psp.sup_ifwi_up = !amdgpu_sriov_vf(adev);
+ break;
+ case IP_VERSION(13, 0, 1):
+ case IP_VERSION(13, 0, 3):
+ case IP_VERSION(13, 0, 5):
+ case IP_VERSION(13, 0, 8):
+ case IP_VERSION(13, 0, 11):
+ case IP_VERSION(14, 0, 0):
+ case IP_VERSION(14, 0, 1):
+ case IP_VERSION(14, 0, 4):
+ psp_v13_0_set_psp_funcs(psp);
+ psp->boot_time_tmr = false;
+ break;
+ case IP_VERSION(11, 0, 8):
+ if (adev->apu_flags & AMD_APU_IS_CYAN_SKILLFISH2) {
+ psp_v11_0_8_set_psp_funcs(psp);
+ }
+ psp->autoload_supported = false;
+ psp->boot_time_tmr = false;
+ break;
+ case IP_VERSION(13, 0, 0):
+ case IP_VERSION(13, 0, 7):
+ case IP_VERSION(13, 0, 10):
+ psp_v13_0_set_psp_funcs(psp);
+ adev->psp.sup_ifwi_up = !amdgpu_sriov_vf(adev);
+ psp->boot_time_tmr = false;
+ break;
+ case IP_VERSION(13, 0, 4):
+ psp_v13_0_4_set_psp_funcs(psp);
+ psp->boot_time_tmr = false;
+ break;
+ case IP_VERSION(14, 0, 2):
+ case IP_VERSION(14, 0, 3):
+ adev->psp.sup_ifwi_up = !amdgpu_sriov_vf(adev);
+ psp_v14_0_set_psp_funcs(psp);
+ break;
+ case IP_VERSION(14, 0, 5):
+ psp_v14_0_set_psp_funcs(psp);
+ psp->boot_time_tmr = false;
break;
default:
return -EINVAL;
@@ -115,9 +265,52 @@ static int psp_early_init(void *handle)
psp->adev = adev;
+ adev->psp_timeout = 20000;
+
psp_check_pmfw_centralized_cstate_management(psp);
- return 0;
+ if (amdgpu_sriov_vf(adev))
+ return psp_init_sriov_microcode(psp);
+ else
+ return psp_init_microcode(psp);
+}
+
+void psp_ta_free_shared_buf(struct ta_mem_context *mem_ctx)
+{
+ amdgpu_bo_free_kernel(&mem_ctx->shared_bo, &mem_ctx->shared_mc_addr,
+ &mem_ctx->shared_buf);
+ mem_ctx->shared_bo = NULL;
+}
+
+static void psp_free_shared_bufs(struct psp_context *psp)
+{
+ void *tmr_buf;
+ void **pptr;
+
+ /* free TMR memory buffer */
+ pptr = amdgpu_sriov_vf(psp->adev) ? &tmr_buf : NULL;
+ amdgpu_bo_free_kernel(&psp->tmr_bo, &psp->tmr_mc_addr, pptr);
+ psp->tmr_bo = NULL;
+
+ /* free xgmi shared memory */
+ psp_ta_free_shared_buf(&psp->xgmi_context.context.mem_context);
+
+ /* free ras shared memory */
+ psp_ta_free_shared_buf(&psp->ras_context.context.mem_context);
+
+ /* free hdcp shared memory */
+ psp_ta_free_shared_buf(&psp->hdcp_context.context.mem_context);
+
+ /* free dtm shared memory */
+ psp_ta_free_shared_buf(&psp->dtm_context.context.mem_context);
+
+ /* free rap shared memory */
+ psp_ta_free_shared_buf(&psp->rap_context.context.mem_context);
+
+ /* free securedisplay shared memory */
+ psp_ta_free_shared_buf(&psp->securedisplay_context.context.mem_context);
+
+
}
static void psp_memory_training_fini(struct psp_context *psp)
@@ -135,21 +328,22 @@ static int psp_memory_training_init(struct psp_context *psp)
struct psp_memory_training_context *ctx = &psp->mem_train_ctx;
if (ctx->init != PSP_MEM_TRAIN_RESERVE_SUCCESS) {
- DRM_DEBUG("memory training is not supported!\n");
+ dev_dbg(psp->adev->dev, "memory training is not supported!\n");
return 0;
}
ctx->sys_cache = kzalloc(ctx->train_data_size, GFP_KERNEL);
if (ctx->sys_cache == NULL) {
- DRM_ERROR("alloc mem_train_ctx.sys_cache failed!\n");
+ dev_err(psp->adev->dev, "alloc mem_train_ctx.sys_cache failed!\n");
ret = -ENOMEM;
goto Err_out;
}
- DRM_DEBUG("train_data_size:%llx,p2c_train_data_offset:%llx,c2p_train_data_offset:%llx.\n",
- ctx->train_data_size,
- ctx->p2c_train_data_offset,
- ctx->c2p_train_data_offset);
+ dev_dbg(psp->adev->dev,
+ "train_data_size:%llx,p2c_train_data_offset:%llx,c2p_train_data_offset:%llx.\n",
+ ctx->train_data_size,
+ ctx->p2c_train_data_offset,
+ ctx->c2p_train_data_offset);
ctx->init = PSP_MEM_TRAIN_INIT_SUCCESS;
return 0;
@@ -158,74 +352,239 @@ Err_out:
return ret;
}
-static int psp_sw_init(void *handle)
+/*
+ * Helper funciton to query psp runtime database entry
+ *
+ * @adev: amdgpu_device pointer
+ * @entry_type: the type of psp runtime database entry
+ * @db_entry: runtime database entry pointer
+ *
+ * Return false if runtime database doesn't exit or entry is invalid
+ * or true if the specific database entry is found, and copy to @db_entry
+ */
+static bool psp_get_runtime_db_entry(struct amdgpu_device *adev,
+ enum psp_runtime_entry_type entry_type,
+ void *db_entry)
+{
+ uint64_t db_header_pos, db_dir_pos;
+ struct psp_runtime_data_header db_header = {0};
+ struct psp_runtime_data_directory db_dir = {0};
+ bool ret = false;
+ int i;
+
+ if (amdgpu_ip_version(adev, MP0_HWIP, 0) == IP_VERSION(13, 0, 6) ||
+ amdgpu_ip_version(adev, MP0_HWIP, 0) == IP_VERSION(13, 0, 12) ||
+ amdgpu_ip_version(adev, MP0_HWIP, 0) == IP_VERSION(13, 0, 14))
+ return false;
+
+ db_header_pos = adev->gmc.mc_vram_size - PSP_RUNTIME_DB_OFFSET;
+ db_dir_pos = db_header_pos + sizeof(struct psp_runtime_data_header);
+
+ /* read runtime db header from vram */
+ amdgpu_device_vram_access(adev, db_header_pos, (uint32_t *)&db_header,
+ sizeof(struct psp_runtime_data_header), false);
+
+ if (db_header.cookie != PSP_RUNTIME_DB_COOKIE_ID) {
+ /* runtime db doesn't exist, exit */
+ dev_dbg(adev->dev, "PSP runtime database doesn't exist\n");
+ return false;
+ }
+
+ /* read runtime database entry from vram */
+ amdgpu_device_vram_access(adev, db_dir_pos, (uint32_t *)&db_dir,
+ sizeof(struct psp_runtime_data_directory), false);
+
+ if (db_dir.entry_count >= PSP_RUNTIME_DB_DIAG_ENTRY_MAX_COUNT) {
+ /* invalid db entry count, exit */
+ dev_warn(adev->dev, "Invalid PSP runtime database entry count\n");
+ return false;
+ }
+
+ /* look up for requested entry type */
+ for (i = 0; i < db_dir.entry_count && !ret; i++) {
+ if (db_dir.entry_list[i].entry_type == entry_type) {
+ switch (entry_type) {
+ case PSP_RUNTIME_ENTRY_TYPE_BOOT_CONFIG:
+ if (db_dir.entry_list[i].size < sizeof(struct psp_runtime_boot_cfg_entry)) {
+ /* invalid db entry size */
+ dev_warn(adev->dev, "Invalid PSP runtime database boot cfg entry size\n");
+ return false;
+ }
+ /* read runtime database entry */
+ amdgpu_device_vram_access(adev, db_header_pos + db_dir.entry_list[i].offset,
+ (uint32_t *)db_entry, sizeof(struct psp_runtime_boot_cfg_entry), false);
+ ret = true;
+ break;
+ case PSP_RUNTIME_ENTRY_TYPE_PPTABLE_ERR_STATUS:
+ if (db_dir.entry_list[i].size < sizeof(struct psp_runtime_scpm_entry)) {
+ /* invalid db entry size */
+ dev_warn(adev->dev, "Invalid PSP runtime database scpm entry size\n");
+ return false;
+ }
+ /* read runtime database entry */
+ amdgpu_device_vram_access(adev, db_header_pos + db_dir.entry_list[i].offset,
+ (uint32_t *)db_entry, sizeof(struct psp_runtime_scpm_entry), false);
+ ret = true;
+ break;
+ default:
+ ret = false;
+ break;
+ }
+ }
+ }
+
+ return ret;
+}
+
+static int psp_sw_init(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
struct psp_context *psp = &adev->psp;
int ret;
+ struct psp_runtime_boot_cfg_entry boot_cfg_entry;
+ struct psp_memory_training_context *mem_training_ctx = &psp->mem_train_ctx;
+ struct psp_runtime_scpm_entry scpm_entry;
- if (!amdgpu_sriov_vf(adev)) {
- ret = psp_init_microcode(psp);
- if (ret) {
- DRM_ERROR("Failed to load psp firmware!\n");
- return ret;
- }
+ psp->cmd = kzalloc(sizeof(struct psp_gfx_cmd_resp), GFP_KERNEL);
+ if (!psp->cmd) {
+ dev_err(adev->dev, "Failed to allocate memory to command buffer!\n");
+ return -ENOMEM;
}
- ret = psp_memory_training_init(psp);
- if (ret) {
- DRM_ERROR("Failed to initialize memory training!\n");
- return ret;
+ adev->psp.xgmi_context.supports_extended_data =
+ !adev->gmc.xgmi.connected_to_cpu &&
+ amdgpu_ip_version(adev, MP0_HWIP, 0) == IP_VERSION(13, 0, 2);
+
+ memset(&scpm_entry, 0, sizeof(scpm_entry));
+ if ((psp_get_runtime_db_entry(adev,
+ PSP_RUNTIME_ENTRY_TYPE_PPTABLE_ERR_STATUS,
+ &scpm_entry)) &&
+ (scpm_entry.scpm_status != SCPM_DISABLE)) {
+ adev->scpm_enabled = true;
+ adev->scpm_status = scpm_entry.scpm_status;
+ } else {
+ adev->scpm_enabled = false;
+ adev->scpm_status = SCPM_DISABLE;
}
- ret = psp_mem_training(psp, PSP_MEM_TRAIN_COLD_BOOT);
- if (ret) {
- DRM_ERROR("Failed to process memory training!\n");
- return ret;
+
+ /* TODO: stop gpu driver services and print alarm if scpm is enabled with error status */
+
+ memset(&boot_cfg_entry, 0, sizeof(boot_cfg_entry));
+ if (psp_get_runtime_db_entry(adev,
+ PSP_RUNTIME_ENTRY_TYPE_BOOT_CONFIG,
+ &boot_cfg_entry)) {
+ psp->boot_cfg_bitmask = boot_cfg_entry.boot_cfg_bitmask;
+ if ((psp->boot_cfg_bitmask) &
+ BOOT_CFG_FEATURE_TWO_STAGE_DRAM_TRAINING) {
+ /* If psp runtime database exists, then
+ * only enable two stage memory training
+ * when TWO_STAGE_DRAM_TRAINING bit is set
+ * in runtime database
+ */
+ mem_training_ctx->enable_mem_training = true;
+ }
+
+ } else {
+ /* If psp runtime database doesn't exist or is
+ * invalid, force enable two stage memory training
+ */
+ mem_training_ctx->enable_mem_training = true;
}
- if (adev->asic_type == CHIP_NAVI10 || adev->asic_type == CHIP_SIENNA_CICHLID) {
- ret= psp_sysfs_init(adev);
+ if (mem_training_ctx->enable_mem_training) {
+ ret = psp_memory_training_init(psp);
+ if (ret) {
+ dev_err(adev->dev, "Failed to initialize memory training!\n");
+ return ret;
+ }
+
+ ret = psp_mem_training(psp, PSP_MEM_TRAIN_COLD_BOOT);
if (ret) {
+ dev_err(adev->dev, "Failed to process memory training!\n");
return ret;
}
}
+ ret = amdgpu_bo_create_kernel(adev, PSP_1_MEG, PSP_1_MEG,
+ (amdgpu_sriov_vf(adev) || adev->debug_use_vram_fw_buf) ?
+ AMDGPU_GEM_DOMAIN_VRAM : AMDGPU_GEM_DOMAIN_GTT,
+ &psp->fw_pri_bo,
+ &psp->fw_pri_mc_addr,
+ &psp->fw_pri_buf);
+ if (ret)
+ return ret;
+
+ ret = amdgpu_bo_create_kernel(adev, PSP_FENCE_BUFFER_SIZE, PAGE_SIZE,
+ AMDGPU_GEM_DOMAIN_VRAM |
+ AMDGPU_GEM_DOMAIN_GTT,
+ &psp->fence_buf_bo,
+ &psp->fence_buf_mc_addr,
+ &psp->fence_buf);
+ if (ret)
+ goto failed1;
+
+ ret = amdgpu_bo_create_kernel(adev, PSP_CMD_BUFFER_SIZE, PAGE_SIZE,
+ AMDGPU_GEM_DOMAIN_VRAM |
+ AMDGPU_GEM_DOMAIN_GTT,
+ &psp->cmd_buf_bo, &psp->cmd_buf_mc_addr,
+ (void **)&psp->cmd_buf_mem);
+ if (ret)
+ goto failed2;
+
return 0;
+
+failed2:
+ amdgpu_bo_free_kernel(&psp->fence_buf_bo,
+ &psp->fence_buf_mc_addr, &psp->fence_buf);
+failed1:
+ amdgpu_bo_free_kernel(&psp->fw_pri_bo,
+ &psp->fw_pri_mc_addr, &psp->fw_pri_buf);
+ return ret;
}
-static int psp_sw_fini(void *handle)
+static int psp_sw_fini(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
+ struct psp_context *psp = &adev->psp;
- psp_memory_training_fini(&adev->psp);
- if (adev->psp.sos_fw) {
- release_firmware(adev->psp.sos_fw);
- adev->psp.sos_fw = NULL;
- }
- if (adev->psp.asd_fw) {
- release_firmware(adev->psp.asd_fw);
- adev->psp.asd_fw = NULL;
- }
- if (adev->psp.ta_fw) {
- release_firmware(adev->psp.ta_fw);
- adev->psp.ta_fw = NULL;
- }
+ psp_memory_training_fini(psp);
+
+ amdgpu_ucode_release(&psp->sos_fw);
+ amdgpu_ucode_release(&psp->asd_fw);
+ amdgpu_ucode_release(&psp->ta_fw);
+ amdgpu_ucode_release(&psp->cap_fw);
+ amdgpu_ucode_release(&psp->toc_fw);
+
+ kfree(psp->cmd);
+ psp->cmd = NULL;
+
+ psp_free_shared_bufs(psp);
- if (adev->asic_type == CHIP_NAVI10 ||
- adev->asic_type == CHIP_SIENNA_CICHLID)
- psp_sysfs_fini(adev);
+ if (psp->km_ring.ring_mem)
+ amdgpu_bo_free_kernel(&adev->firmware.rbuf,
+ &psp->km_ring.ring_mem_mc_addr,
+ (void **)&psp->km_ring.ring_mem);
+
+ amdgpu_bo_free_kernel(&psp->fw_pri_bo,
+ &psp->fw_pri_mc_addr, &psp->fw_pri_buf);
+ amdgpu_bo_free_kernel(&psp->fence_buf_bo,
+ &psp->fence_buf_mc_addr, &psp->fence_buf);
+ amdgpu_bo_free_kernel(&psp->cmd_buf_bo, &psp->cmd_buf_mc_addr,
+ (void **)&psp->cmd_buf_mem);
return 0;
}
-int psp_wait_for(struct psp_context *psp, uint32_t reg_index,
- uint32_t reg_val, uint32_t mask, bool check_changed)
+int psp_wait_for(struct psp_context *psp, uint32_t reg_index, uint32_t reg_val,
+ uint32_t mask, uint32_t flags)
{
+ bool check_changed = flags & PSP_WAITREG_CHANGED;
+ bool verbose = !(flags & PSP_WAITREG_NOVERBOSE);
uint32_t val;
int i;
struct amdgpu_device *adev = psp->adev;
- if (psp->adev->in_pci_err_recovery)
+ if (psp->adev->no_hw_access)
return 0;
for (i = 0; i < adev->usec_timeout; i++) {
@@ -240,9 +599,96 @@ int psp_wait_for(struct psp_context *psp, uint32_t reg_index,
udelay(1);
}
+ if (verbose)
+ dev_err(adev->dev,
+ "psp reg (0x%x) wait timed out, mask: %x, read: %x exp: %x",
+ reg_index, mask, val, reg_val);
+
return -ETIME;
}
+int psp_wait_for_spirom_update(struct psp_context *psp, uint32_t reg_index,
+ uint32_t reg_val, uint32_t mask, uint32_t msec_timeout)
+{
+ uint32_t val;
+ int i;
+ struct amdgpu_device *adev = psp->adev;
+
+ if (psp->adev->no_hw_access)
+ return 0;
+
+ for (i = 0; i < msec_timeout; i++) {
+ val = RREG32(reg_index);
+ if ((val & mask) == reg_val)
+ return 0;
+ msleep(1);
+ }
+
+ return -ETIME;
+}
+
+static const char *psp_gfx_cmd_name(enum psp_gfx_cmd_id cmd_id)
+{
+ switch (cmd_id) {
+ case GFX_CMD_ID_LOAD_TA:
+ return "LOAD_TA";
+ case GFX_CMD_ID_UNLOAD_TA:
+ return "UNLOAD_TA";
+ case GFX_CMD_ID_INVOKE_CMD:
+ return "INVOKE_CMD";
+ case GFX_CMD_ID_LOAD_ASD:
+ return "LOAD_ASD";
+ case GFX_CMD_ID_SETUP_TMR:
+ return "SETUP_TMR";
+ case GFX_CMD_ID_LOAD_IP_FW:
+ return "LOAD_IP_FW";
+ case GFX_CMD_ID_DESTROY_TMR:
+ return "DESTROY_TMR";
+ case GFX_CMD_ID_SAVE_RESTORE:
+ return "SAVE_RESTORE_IP_FW";
+ case GFX_CMD_ID_SETUP_VMR:
+ return "SETUP_VMR";
+ case GFX_CMD_ID_DESTROY_VMR:
+ return "DESTROY_VMR";
+ case GFX_CMD_ID_PROG_REG:
+ return "PROG_REG";
+ case GFX_CMD_ID_GET_FW_ATTESTATION:
+ return "GET_FW_ATTESTATION";
+ case GFX_CMD_ID_LOAD_TOC:
+ return "ID_LOAD_TOC";
+ case GFX_CMD_ID_AUTOLOAD_RLC:
+ return "AUTOLOAD_RLC";
+ case GFX_CMD_ID_BOOT_CFG:
+ return "BOOT_CFG";
+ case GFX_CMD_ID_CONFIG_SQ_PERFMON:
+ return "CONFIG_SQ_PERFMON";
+ case GFX_CMD_ID_FB_FW_RESERV_ADDR:
+ return "FB_FW_RESERV_ADDR";
+ case GFX_CMD_ID_FB_FW_RESERV_EXT_ADDR:
+ return "FB_FW_RESERV_EXT_ADDR";
+ case GFX_CMD_ID_SRIOV_SPATIAL_PART:
+ return "SPATIAL_PARTITION";
+ case GFX_CMD_ID_FB_NPS_MODE:
+ return "NPS_MODE_CHANGE";
+ default:
+ return "UNKNOWN CMD";
+ }
+}
+
+static bool psp_err_warn(struct psp_context *psp)
+{
+ struct psp_gfx_cmd_resp *cmd = psp->cmd_buf_mem;
+
+ /* This response indicates reg list is already loaded */
+ if (amdgpu_ip_version(psp->adev, MP0_HWIP, 0) == IP_VERSION(13, 0, 2) &&
+ cmd->cmd_id == GFX_CMD_ID_LOAD_IP_FW &&
+ cmd->cmd.cmd_load_ip_fw.fw_type == GFX_FW_TYPE_REG_LIST &&
+ cmd->resp.status == TEE_ERROR_CANCEL)
+ return false;
+
+ return true;
+}
+
static int
psp_cmd_submit_buf(struct psp_context *psp,
struct amdgpu_firmware_info *ucode,
@@ -250,15 +696,13 @@ psp_cmd_submit_buf(struct psp_context *psp,
{
int ret;
int index;
- int timeout = 20000;
+ int timeout = psp->adev->psp_timeout;
bool ras_intr = false;
bool skip_unsupport = false;
- if (psp->adev->in_pci_err_recovery)
+ if (psp->adev->no_hw_access)
return 0;
- mutex_lock(&psp->mutex);
-
memset(psp->cmd_buf_mem, 0, PSP_CMD_BUFFER_SIZE);
memcpy(psp->cmd_buf_mem, cmd, sizeof(struct psp_gfx_cmd_resp));
@@ -267,11 +711,10 @@ psp_cmd_submit_buf(struct psp_context *psp,
ret = psp_ring_cmd_submit(psp, psp->cmd_buf_mc_addr, fence_mc_addr, index);
if (ret) {
atomic_dec(&psp->fence_value);
- mutex_unlock(&psp->mutex);
- return ret;
+ goto exit;
}
- amdgpu_asic_invalidate_hdp(psp->adev, NULL);
+ amdgpu_device_invalidate_hdp(psp->adev, NULL);
while (*((unsigned int *)psp->fence_buf) != index) {
if (--timeout == 0)
break;
@@ -284,14 +727,14 @@ psp_cmd_submit_buf(struct psp_context *psp,
if (ras_intr)
break;
usleep_range(10, 100);
- amdgpu_asic_invalidate_hdp(psp->adev, NULL);
+ amdgpu_device_invalidate_hdp(psp->adev, NULL);
}
/* We allow TEE_ERROR_NOT_SUPPORTED for VMR command and PSP_ERR_UNKNOWN_COMMAND in SRIOV */
skip_unsupport = (psp->cmd_buf_mem->resp.status == TEE_ERROR_NOT_SUPPORTED ||
psp->cmd_buf_mem->resp.status == PSP_ERR_UNKNOWN_COMMAND) && amdgpu_sriov_vf(psp->adev);
- memcpy((void*)&cmd->resp, (void*)&psp->cmd_buf_mem->resp, sizeof(struct psp_gfx_resp));
+ memcpy(&cmd->resp, &psp->cmd_buf_mem->resp, sizeof(struct psp_gfx_resp));
/* In some cases, psp response status is not 0 even there is no
* problem while the command is submitted. Some version of PSP FW
@@ -302,14 +745,23 @@ psp_cmd_submit_buf(struct psp_context *psp,
*/
if (!skip_unsupport && (psp->cmd_buf_mem->resp.status || !timeout) && !ras_intr) {
if (ucode)
- DRM_WARN("failed to load ucode id (%d) ",
- ucode->ucode_id);
- DRM_WARN("psp command (0x%X) failed and response status is (0x%X)\n",
- psp->cmd_buf_mem->cmd_id,
- psp->cmd_buf_mem->resp.status);
- if (!timeout) {
- mutex_unlock(&psp->mutex);
- return -EINVAL;
+ dev_warn(psp->adev->dev,
+ "failed to load ucode %s(0x%X) ",
+ amdgpu_ucode_name(ucode->ucode_id), ucode->ucode_id);
+ if (psp_err_warn(psp))
+ dev_warn(
+ psp->adev->dev,
+ "psp gfx command %s(0x%X) failed and response status is (0x%X)\n",
+ psp_gfx_cmd_name(psp->cmd_buf_mem->cmd_id),
+ psp->cmd_buf_mem->cmd_id,
+ psp->cmd_buf_mem->resp.status);
+ /* If any firmware (including CAP) load fails under SRIOV, it should
+ * return failure to stop the VF from initializing.
+ * Also return failure in case of timeout
+ */
+ if ((ucode && amdgpu_sriov_vf(psp->adev)) || !timeout) {
+ ret = -EINVAL;
+ goto exit;
}
}
@@ -317,15 +769,40 @@ psp_cmd_submit_buf(struct psp_context *psp,
ucode->tmr_mc_addr_lo = psp->cmd_buf_mem->resp.fw_addr_lo;
ucode->tmr_mc_addr_hi = psp->cmd_buf_mem->resp.fw_addr_hi;
}
- mutex_unlock(&psp->mutex);
+exit:
return ret;
}
+static struct psp_gfx_cmd_resp *acquire_psp_cmd_buf(struct psp_context *psp)
+{
+ struct psp_gfx_cmd_resp *cmd = psp->cmd;
+
+ mutex_lock(&psp->mutex);
+
+ memset(cmd, 0, sizeof(struct psp_gfx_cmd_resp));
+
+ return cmd;
+}
+
+static void release_psp_cmd_buf(struct psp_context *psp)
+{
+ mutex_unlock(&psp->mutex);
+}
+
static void psp_prep_tmr_cmd_buf(struct psp_context *psp,
struct psp_gfx_cmd_resp *cmd,
- uint64_t tmr_mc, uint32_t size)
+ uint64_t tmr_mc, struct amdgpu_bo *tmr_bo)
{
+ struct amdgpu_device *adev = psp->adev;
+ uint32_t size = 0;
+ uint64_t tmr_pa = 0;
+
+ if (tmr_bo) {
+ size = amdgpu_bo_size(tmr_bo);
+ tmr_pa = amdgpu_gmc_vram_pa(adev, tmr_bo);
+ }
+
if (amdgpu_sriov_vf(psp->adev))
cmd->cmd_id = GFX_CMD_ID_SETUP_VMR;
else
@@ -333,6 +810,9 @@ static void psp_prep_tmr_cmd_buf(struct psp_context *psp,
cmd->cmd.cmd_setup_tmr.buf_phy_addr_lo = lower_32_bits(tmr_mc);
cmd->cmd.cmd_setup_tmr.buf_phy_addr_hi = upper_32_bits(tmr_mc);
cmd->cmd.cmd_setup_tmr.buf_size = size;
+ cmd->cmd.cmd_setup_tmr.bitfield.virt_phy_addr = 1;
+ cmd->cmd.cmd_setup_tmr.system_phy_addr_lo = lower_32_bits(tmr_pa);
+ cmd->cmd.cmd_setup_tmr.system_phy_addr_hi = upper_32_bits(tmr_pa);
}
static void psp_prep_load_toc_cmd_buf(struct psp_gfx_cmd_resp *cmd,
@@ -349,29 +829,27 @@ static int psp_load_toc(struct psp_context *psp,
uint32_t *tmr_size)
{
int ret;
- struct psp_gfx_cmd_resp *cmd;
+ struct psp_gfx_cmd_resp *cmd = acquire_psp_cmd_buf(psp);
- cmd = kzalloc(sizeof(struct psp_gfx_cmd_resp), GFP_KERNEL);
- if (!cmd)
- return -ENOMEM;
/* Copy toc to psp firmware private buffer */
- memset(psp->fw_pri_buf, 0, PSP_1_MEG);
- memcpy(psp->fw_pri_buf, psp->toc_start_addr, psp->toc_bin_size);
+ psp_copy_fw(psp, psp->toc.start_addr, psp->toc.size_bytes);
- psp_prep_load_toc_cmd_buf(cmd, psp->fw_pri_mc_addr, psp->toc_bin_size);
+ psp_prep_load_toc_cmd_buf(cmd, psp->fw_pri_mc_addr, psp->toc.size_bytes);
ret = psp_cmd_submit_buf(psp, NULL, cmd,
psp->fence_buf_mc_addr);
if (!ret)
*tmr_size = psp->cmd_buf_mem->resp.tmr_size;
- kfree(cmd);
+
+ release_psp_cmd_buf(psp);
+
return ret;
}
/* Set up Trusted Memory Region */
static int psp_tmr_init(struct psp_context *psp)
{
- int ret;
+ int ret = 0;
int tmr_size;
void *tmr_buf;
void **pptr;
@@ -383,54 +861,46 @@ static int psp_tmr_init(struct psp_context *psp)
* Note: this memory need be reserved till the driver
* uninitializes.
*/
- tmr_size = PSP_TMR_SIZE;
+ tmr_size = PSP_TMR_SIZE(psp->adev);
/* For ASICs support RLC autoload, psp will parse the toc
- * and calculate the total size of TMR needed */
+ * and calculate the total size of TMR needed
+ */
if (!amdgpu_sriov_vf(psp->adev) &&
- psp->toc_start_addr &&
- psp->toc_bin_size &&
+ psp->toc.start_addr &&
+ psp->toc.size_bytes &&
psp->fw_pri_buf) {
ret = psp_load_toc(psp, &tmr_size);
if (ret) {
- DRM_ERROR("Failed to load toc\n");
+ dev_err(psp->adev->dev, "Failed to load toc\n");
return ret;
}
}
- pptr = amdgpu_sriov_vf(psp->adev) ? &tmr_buf : NULL;
- ret = amdgpu_bo_create_kernel(psp->adev, tmr_size, PSP_TMR_SIZE,
- AMDGPU_GEM_DOMAIN_VRAM,
- &psp->tmr_bo, &psp->tmr_mc_addr, pptr);
-
- return ret;
-}
-
-static int psp_clear_vf_fw(struct psp_context *psp)
-{
- int ret;
- struct psp_gfx_cmd_resp *cmd;
-
- if (!amdgpu_sriov_vf(psp->adev) || psp->adev->asic_type != CHIP_NAVI12)
- return 0;
-
- cmd = kzalloc(sizeof(struct psp_gfx_cmd_resp), GFP_KERNEL);
- if (!cmd)
- return -ENOMEM;
-
- cmd->cmd_id = GFX_CMD_ID_CLEAR_VF_FW;
-
- ret = psp_cmd_submit_buf(psp, NULL, cmd, psp->fence_buf_mc_addr);
- kfree(cmd);
+ if (!psp->tmr_bo && !psp->boot_time_tmr) {
+ pptr = amdgpu_sriov_vf(psp->adev) ? &tmr_buf : NULL;
+ ret = amdgpu_bo_create_kernel(psp->adev, tmr_size,
+ PSP_TMR_ALIGNMENT,
+ AMDGPU_GEM_DOMAIN_GTT | AMDGPU_GEM_DOMAIN_VRAM,
+ &psp->tmr_bo, &psp->tmr_mc_addr,
+ pptr);
+ }
+ if (amdgpu_virt_xgmi_migrate_enabled(psp->adev) && psp->tmr_bo)
+ psp->tmr_mc_addr = amdgpu_bo_fb_aper_addr(psp->tmr_bo);
return ret;
}
static bool psp_skip_tmr(struct psp_context *psp)
{
- switch (psp->adev->asic_type) {
- case CHIP_NAVI12:
- case CHIP_SIENNA_CICHLID:
+ switch (amdgpu_ip_version(psp->adev, MP0_HWIP, 0)) {
+ case IP_VERSION(11, 0, 9):
+ case IP_VERSION(11, 0, 7):
+ case IP_VERSION(13, 0, 2):
+ case IP_VERSION(13, 0, 6):
+ case IP_VERSION(13, 0, 10):
+ case IP_VERSION(13, 0, 12):
+ case IP_VERSION(13, 0, 14):
return true;
default:
return false;
@@ -448,19 +918,17 @@ static int psp_tmr_load(struct psp_context *psp)
if (amdgpu_sriov_vf(psp->adev) && psp_skip_tmr(psp))
return 0;
- cmd = kzalloc(sizeof(struct psp_gfx_cmd_resp), GFP_KERNEL);
- if (!cmd)
- return -ENOMEM;
+ cmd = acquire_psp_cmd_buf(psp);
- psp_prep_tmr_cmd_buf(psp, cmd, psp->tmr_mc_addr,
- amdgpu_bo_size(psp->tmr_bo));
- DRM_INFO("reserve 0x%lx from 0x%llx for PSP TMR\n",
- amdgpu_bo_size(psp->tmr_bo), psp->tmr_mc_addr);
+ psp_prep_tmr_cmd_buf(psp, cmd, psp->tmr_mc_addr, psp->tmr_bo);
+ if (psp->tmr_bo)
+ dev_info(psp->adev->dev, "reserve 0x%lx from 0x%llx for PSP TMR\n",
+ amdgpu_bo_size(psp->tmr_bo), psp->tmr_mc_addr);
ret = psp_cmd_submit_buf(psp, NULL, cmd,
psp->fence_buf_mc_addr);
- kfree(cmd);
+ release_psp_cmd_buf(psp);
return ret;
}
@@ -479,36 +947,28 @@ static int psp_tmr_unload(struct psp_context *psp)
int ret;
struct psp_gfx_cmd_resp *cmd;
- cmd = kzalloc(sizeof(struct psp_gfx_cmd_resp), GFP_KERNEL);
- if (!cmd)
- return -ENOMEM;
+ /* skip TMR unload for Navi12 and CHIP_SIENNA_CICHLID SRIOV,
+ * as TMR is not loaded at all
+ */
+ if (amdgpu_sriov_vf(psp->adev) && psp_skip_tmr(psp))
+ return 0;
+
+ cmd = acquire_psp_cmd_buf(psp);
psp_prep_tmr_unload_cmd_buf(psp, cmd);
- DRM_INFO("free PSP TMR buffer\n");
+ dev_dbg(psp->adev->dev, "free PSP TMR buffer\n");
ret = psp_cmd_submit_buf(psp, NULL, cmd,
psp->fence_buf_mc_addr);
- kfree(cmd);
+ release_psp_cmd_buf(psp);
return ret;
}
static int psp_tmr_terminate(struct psp_context *psp)
{
- int ret;
- void *tmr_buf;
- void **pptr;
-
- ret = psp_tmr_unload(psp);
- if (ret)
- return ret;
-
- /* free TMR memory buffer */
- pptr = amdgpu_sriov_vf(psp->adev) ? &tmr_buf : NULL;
- amdgpu_bo_free_kernel(&psp->tmr_bo, &psp->tmr_mc_addr, pptr);
-
- return 0;
+ return psp_tmr_unload(psp);
}
int psp_get_fw_attestation_records_addr(struct psp_context *psp,
@@ -523,9 +983,7 @@ int psp_get_fw_attestation_records_addr(struct psp_context *psp,
if (amdgpu_sriov_vf(psp->adev))
return 0;
- cmd = kzalloc(sizeof(struct psp_gfx_cmd_resp), GFP_KERNEL);
- if (!cmd)
- return -ENOMEM;
+ cmd = acquire_psp_cmd_buf(psp);
cmd->cmd_id = GFX_CMD_ID_GET_FW_ATTESTATION;
@@ -537,54 +995,255 @@ int psp_get_fw_attestation_records_addr(struct psp_context *psp,
((uint64_t)cmd->resp.uresp.fwar_db_info.fwar_db_addr_hi << 32);
}
- kfree(cmd);
+ release_psp_cmd_buf(psp);
return ret;
}
-static void psp_prep_asd_load_cmd_buf(struct psp_gfx_cmd_resp *cmd,
- uint64_t asd_mc, uint32_t size)
+static int psp_get_fw_reservation_info(struct psp_context *psp,
+ uint32_t cmd_id,
+ uint64_t *addr,
+ uint32_t *size)
{
- cmd->cmd_id = GFX_CMD_ID_LOAD_ASD;
- cmd->cmd.cmd_load_ta.app_phy_addr_lo = lower_32_bits(asd_mc);
- cmd->cmd.cmd_load_ta.app_phy_addr_hi = upper_32_bits(asd_mc);
- cmd->cmd.cmd_load_ta.app_len = size;
+ int ret;
+ uint32_t status;
+ struct psp_gfx_cmd_resp *cmd;
+
+ cmd = acquire_psp_cmd_buf(psp);
+
+ cmd->cmd_id = cmd_id;
- cmd->cmd.cmd_load_ta.cmd_buf_phy_addr_lo = 0;
- cmd->cmd.cmd_load_ta.cmd_buf_phy_addr_hi = 0;
- cmd->cmd.cmd_load_ta.cmd_buf_len = 0;
+ ret = psp_cmd_submit_buf(psp, NULL, cmd,
+ psp->fence_buf_mc_addr);
+ if (ret) {
+ release_psp_cmd_buf(psp);
+ return ret;
+ }
+
+ status = cmd->resp.status;
+ if (status == PSP_ERR_UNKNOWN_COMMAND) {
+ release_psp_cmd_buf(psp);
+ *addr = 0;
+ *size = 0;
+ return 0;
+ }
+
+ *addr = (uint64_t)cmd->resp.uresp.fw_reserve_info.reserve_base_address_hi << 32 |
+ cmd->resp.uresp.fw_reserve_info.reserve_base_address_lo;
+ *size = cmd->resp.uresp.fw_reserve_info.reserve_size;
+
+ release_psp_cmd_buf(psp);
+
+ return 0;
}
-static int psp_asd_load(struct psp_context *psp)
+int psp_update_fw_reservation(struct psp_context *psp)
{
int ret;
+ uint64_t reserv_addr, reserv_addr_ext;
+ uint32_t reserv_size, reserv_size_ext, mp0_ip_ver;
+ struct amdgpu_device *adev = psp->adev;
+
+ mp0_ip_ver = amdgpu_ip_version(adev, MP0_HWIP, 0);
+
+ if (amdgpu_sriov_vf(psp->adev))
+ return 0;
+
+ switch (mp0_ip_ver) {
+ case IP_VERSION(14, 0, 2):
+ if (adev->psp.sos.fw_version < 0x3b0e0d)
+ return 0;
+ break;
+
+ case IP_VERSION(14, 0, 3):
+ if (adev->psp.sos.fw_version < 0x3a0e14)
+ return 0;
+ break;
+
+ default:
+ return 0;
+ }
+
+ ret = psp_get_fw_reservation_info(psp, GFX_CMD_ID_FB_FW_RESERV_ADDR, &reserv_addr, &reserv_size);
+ if (ret)
+ return ret;
+ ret = psp_get_fw_reservation_info(psp, GFX_CMD_ID_FB_FW_RESERV_EXT_ADDR, &reserv_addr_ext, &reserv_size_ext);
+ if (ret)
+ return ret;
+
+ if (reserv_addr != adev->gmc.real_vram_size - reserv_size) {
+ dev_warn(adev->dev, "reserve fw region is not valid!\n");
+ return 0;
+ }
+
+ amdgpu_bo_free_kernel(&adev->mman.fw_reserved_memory, NULL, NULL);
+
+ reserv_size = roundup(reserv_size, SZ_1M);
+
+ ret = amdgpu_bo_create_kernel_at(adev, reserv_addr, reserv_size, &adev->mman.fw_reserved_memory, NULL);
+ if (ret) {
+ dev_err(adev->dev, "reserve fw region failed(%d)!\n", ret);
+ amdgpu_bo_free_kernel(&adev->mman.fw_reserved_memory, NULL, NULL);
+ return ret;
+ }
+
+ reserv_size_ext = roundup(reserv_size_ext, SZ_1M);
+
+ ret = amdgpu_bo_create_kernel_at(adev, reserv_addr_ext, reserv_size_ext,
+ &adev->mman.fw_reserved_memory_extend, NULL);
+ if (ret) {
+ dev_err(adev->dev, "reserve extend fw region failed(%d)!\n", ret);
+ amdgpu_bo_free_kernel(&adev->mman.fw_reserved_memory_extend, NULL, NULL);
+ return ret;
+ }
+
+ return 0;
+}
+
+static int psp_boot_config_get(struct amdgpu_device *adev, uint32_t *boot_cfg)
+{
+ struct psp_context *psp = &adev->psp;
+ struct psp_gfx_cmd_resp *cmd;
+ int ret;
+
+ if (amdgpu_sriov_vf(adev))
+ return 0;
+
+ cmd = acquire_psp_cmd_buf(psp);
+
+ cmd->cmd_id = GFX_CMD_ID_BOOT_CFG;
+ cmd->cmd.boot_cfg.sub_cmd = BOOTCFG_CMD_GET;
+
+ ret = psp_cmd_submit_buf(psp, NULL, cmd, psp->fence_buf_mc_addr);
+ if (!ret) {
+ *boot_cfg =
+ (cmd->resp.uresp.boot_cfg.boot_cfg & BOOT_CONFIG_GECC) ? 1 : 0;
+ }
+
+ release_psp_cmd_buf(psp);
+
+ return ret;
+}
+
+static int psp_boot_config_set(struct amdgpu_device *adev, uint32_t boot_cfg)
+{
+ int ret;
+ struct psp_context *psp = &adev->psp;
+ struct psp_gfx_cmd_resp *cmd;
+
+ if (amdgpu_sriov_vf(adev))
+ return 0;
+
+ cmd = acquire_psp_cmd_buf(psp);
+
+ cmd->cmd_id = GFX_CMD_ID_BOOT_CFG;
+ cmd->cmd.boot_cfg.sub_cmd = BOOTCFG_CMD_SET;
+ cmd->cmd.boot_cfg.boot_config = boot_cfg;
+ cmd->cmd.boot_cfg.boot_config_valid = boot_cfg;
+
+ ret = psp_cmd_submit_buf(psp, NULL, cmd, psp->fence_buf_mc_addr);
+
+ release_psp_cmd_buf(psp);
+
+ return ret;
+}
+
+static int psp_rl_load(struct amdgpu_device *adev)
+{
+ int ret;
+ struct psp_context *psp = &adev->psp;
+ struct psp_gfx_cmd_resp *cmd;
+
+ if (!is_psp_fw_valid(psp->rl))
+ return 0;
+
+ cmd = acquire_psp_cmd_buf(psp);
+
+ memset(psp->fw_pri_buf, 0, PSP_1_MEG);
+ memcpy(psp->fw_pri_buf, psp->rl.start_addr, psp->rl.size_bytes);
+
+ cmd->cmd_id = GFX_CMD_ID_LOAD_IP_FW;
+ cmd->cmd.cmd_load_ip_fw.fw_phy_addr_lo = lower_32_bits(psp->fw_pri_mc_addr);
+ cmd->cmd.cmd_load_ip_fw.fw_phy_addr_hi = upper_32_bits(psp->fw_pri_mc_addr);
+ cmd->cmd.cmd_load_ip_fw.fw_size = psp->rl.size_bytes;
+ cmd->cmd.cmd_load_ip_fw.fw_type = GFX_FW_TYPE_REG_LIST;
+
+ ret = psp_cmd_submit_buf(psp, NULL, cmd, psp->fence_buf_mc_addr);
+
+ release_psp_cmd_buf(psp);
+
+ return ret;
+}
+
+int psp_memory_partition(struct psp_context *psp, int mode)
+{
+ struct psp_gfx_cmd_resp *cmd;
+ int ret;
+
+ if (amdgpu_sriov_vf(psp->adev))
+ return 0;
+
+ cmd = acquire_psp_cmd_buf(psp);
+
+ cmd->cmd_id = GFX_CMD_ID_FB_NPS_MODE;
+ cmd->cmd.cmd_memory_part.mode = mode;
+
+ dev_info(psp->adev->dev,
+ "Requesting %d memory partition change through PSP", mode);
+ ret = psp_cmd_submit_buf(psp, NULL, cmd, psp->fence_buf_mc_addr);
+ if (ret)
+ dev_err(psp->adev->dev,
+ "PSP request failed to change to NPS%d mode\n", mode);
+
+ release_psp_cmd_buf(psp);
+
+ return ret;
+}
+
+int psp_spatial_partition(struct psp_context *psp, int mode)
+{
struct psp_gfx_cmd_resp *cmd;
+ int ret;
+
+ if (amdgpu_sriov_vf(psp->adev))
+ return 0;
+
+ cmd = acquire_psp_cmd_buf(psp);
+
+ cmd->cmd_id = GFX_CMD_ID_SRIOV_SPATIAL_PART;
+ cmd->cmd.cmd_spatial_part.mode = mode;
+
+ dev_info(psp->adev->dev, "Requesting %d partitions through PSP", mode);
+ ret = psp_cmd_submit_buf(psp, NULL, cmd, psp->fence_buf_mc_addr);
+
+ release_psp_cmd_buf(psp);
+
+ return ret;
+}
+
+static int psp_asd_initialize(struct psp_context *psp)
+{
+ int ret;
/* If PSP version doesn't match ASD version, asd loading will be failed.
* add workaround to bypass it for sriov now.
* TODO: add version check to make it common
*/
- if (amdgpu_sriov_vf(psp->adev) || !psp->asd_ucode_size)
+ if (amdgpu_sriov_vf(psp->adev) || !psp->asd_context.bin_desc.size_bytes)
return 0;
- cmd = kzalloc(sizeof(struct psp_gfx_cmd_resp), GFP_KERNEL);
- if (!cmd)
- return -ENOMEM;
-
- memset(psp->fw_pri_buf, 0, PSP_1_MEG);
- memcpy(psp->fw_pri_buf, psp->asd_start_addr, psp->asd_ucode_size);
-
- psp_prep_asd_load_cmd_buf(cmd, psp->fw_pri_mc_addr,
- psp->asd_ucode_size);
+ /* bypass asd if display hardware is not available */
+ if (!amdgpu_device_has_display_hardware(psp->adev) &&
+ amdgpu_ip_version(psp->adev, MP0_HWIP, 0) >= IP_VERSION(13, 0, 10))
+ return 0;
- ret = psp_cmd_submit_buf(psp, NULL, cmd,
- psp->fence_buf_mc_addr);
- if (!ret) {
- psp->asd_context.asd_initialized = true;
- psp->asd_context.session_id = cmd->resp.session_id;
- }
+ psp->asd_context.mem_context.shared_mc_addr = 0;
+ psp->asd_context.mem_context.shared_mem_size = PSP_ASD_SHARED_MEM_SIZE;
+ psp->asd_context.ta_load_type = GFX_CMD_ID_LOAD_ASD;
- kfree(cmd);
+ ret = psp_ta_load(psp, &psp->asd_context);
+ if (!ret)
+ psp->asd_context.initialized = true;
return ret;
}
@@ -596,29 +1255,35 @@ static void psp_prep_ta_unload_cmd_buf(struct psp_gfx_cmd_resp *cmd,
cmd->cmd.cmd_unload_ta.session_id = session_id;
}
-static int psp_asd_unload(struct psp_context *psp)
+int psp_ta_unload(struct psp_context *psp, struct ta_context *context)
+{
+ int ret;
+ struct psp_gfx_cmd_resp *cmd = acquire_psp_cmd_buf(psp);
+
+ psp_prep_ta_unload_cmd_buf(cmd, context->session_id);
+
+ ret = psp_cmd_submit_buf(psp, NULL, cmd, psp->fence_buf_mc_addr);
+
+ context->resp_status = cmd->resp.status;
+
+ release_psp_cmd_buf(psp);
+
+ return ret;
+}
+
+static int psp_asd_terminate(struct psp_context *psp)
{
int ret;
- struct psp_gfx_cmd_resp *cmd;
if (amdgpu_sriov_vf(psp->adev))
return 0;
- if (!psp->asd_context.asd_initialized)
+ if (!psp->asd_context.initialized)
return 0;
- cmd = kzalloc(sizeof(struct psp_gfx_cmd_resp), GFP_KERNEL);
- if (!cmd)
- return -ENOMEM;
-
- psp_prep_ta_unload_cmd_buf(cmd, psp->asd_context.session_id);
-
- ret = psp_cmd_submit_buf(psp, NULL, cmd,
- psp->fence_buf_mc_addr);
+ ret = psp_ta_unload(psp, &psp->asd_context);
if (!ret)
- psp->asd_context.asd_initialized = false;
-
- kfree(cmd);
+ psp->asd_context.initialized = false;
return ret;
}
@@ -634,54 +1299,53 @@ static void psp_prep_reg_prog_cmd_buf(struct psp_gfx_cmd_resp *cmd,
int psp_reg_program(struct psp_context *psp, enum psp_reg_prog_id reg,
uint32_t value)
{
- struct psp_gfx_cmd_resp *cmd = NULL;
+ struct psp_gfx_cmd_resp *cmd;
int ret = 0;
if (reg >= PSP_REG_LAST)
return -EINVAL;
- cmd = kzalloc(sizeof(struct psp_gfx_cmd_resp), GFP_KERNEL);
- if (!cmd)
- return -ENOMEM;
+ cmd = acquire_psp_cmd_buf(psp);
psp_prep_reg_prog_cmd_buf(cmd, reg, value);
ret = psp_cmd_submit_buf(psp, NULL, cmd, psp->fence_buf_mc_addr);
+ if (ret)
+ dev_err(psp->adev->dev, "PSP failed to program reg id %d\n", reg);
+
+ release_psp_cmd_buf(psp);
- kfree(cmd);
return ret;
}
static void psp_prep_ta_load_cmd_buf(struct psp_gfx_cmd_resp *cmd,
uint64_t ta_bin_mc,
- uint32_t ta_bin_size,
- uint64_t ta_shared_mc,
- uint32_t ta_shared_size)
+ struct ta_context *context)
{
- cmd->cmd_id = GFX_CMD_ID_LOAD_TA;
- cmd->cmd.cmd_load_ta.app_phy_addr_lo = lower_32_bits(ta_bin_mc);
+ cmd->cmd_id = context->ta_load_type;
+ cmd->cmd.cmd_load_ta.app_phy_addr_lo = lower_32_bits(ta_bin_mc);
cmd->cmd.cmd_load_ta.app_phy_addr_hi = upper_32_bits(ta_bin_mc);
- cmd->cmd.cmd_load_ta.app_len = ta_bin_size;
+ cmd->cmd.cmd_load_ta.app_len = context->bin_desc.size_bytes;
- cmd->cmd.cmd_load_ta.cmd_buf_phy_addr_lo = lower_32_bits(ta_shared_mc);
- cmd->cmd.cmd_load_ta.cmd_buf_phy_addr_hi = upper_32_bits(ta_shared_mc);
- cmd->cmd.cmd_load_ta.cmd_buf_len = ta_shared_size;
+ cmd->cmd.cmd_load_ta.cmd_buf_phy_addr_lo =
+ lower_32_bits(context->mem_context.shared_mc_addr);
+ cmd->cmd.cmd_load_ta.cmd_buf_phy_addr_hi =
+ upper_32_bits(context->mem_context.shared_mc_addr);
+ cmd->cmd.cmd_load_ta.cmd_buf_len = context->mem_context.shared_mem_size;
}
-static int psp_xgmi_init_shared_buf(struct psp_context *psp)
+int psp_ta_init_shared_buf(struct psp_context *psp,
+ struct ta_mem_context *mem_ctx)
{
- int ret;
-
/*
* Allocate 16k memory aligned to 4k from Frame Buffer (local
- * physical) for xgmi ta <-> Driver
+ * physical) for ta to host memory
*/
- ret = amdgpu_bo_create_kernel(psp->adev, PSP_XGMI_SHARED_MEM_SIZE,
- PAGE_SIZE, AMDGPU_GEM_DOMAIN_VRAM,
- &psp->xgmi_context.xgmi_shared_bo,
- &psp->xgmi_context.xgmi_shared_mc_addr,
- &psp->xgmi_context.xgmi_shared_buf);
-
- return ret;
+ return amdgpu_bo_create_kernel(psp->adev, mem_ctx->shared_mem_size,
+ PAGE_SIZE, AMDGPU_GEM_DOMAIN_VRAM |
+ AMDGPU_GEM_DOMAIN_GTT,
+ &mem_ctx->shared_bo,
+ &mem_ctx->shared_mc_addr,
+ &mem_ctx->shared_buf);
}
static void psp_prep_ta_invoke_cmd_buf(struct psp_gfx_cmd_resp *cmd,
@@ -693,143 +1357,120 @@ static void psp_prep_ta_invoke_cmd_buf(struct psp_gfx_cmd_resp *cmd,
cmd->cmd.cmd_invoke_cmd.ta_cmd_id = ta_cmd_id;
}
-static int psp_ta_invoke(struct psp_context *psp,
+int psp_ta_invoke(struct psp_context *psp,
uint32_t ta_cmd_id,
- uint32_t session_id)
+ struct ta_context *context)
{
int ret;
- struct psp_gfx_cmd_resp *cmd;
-
- cmd = kzalloc(sizeof(struct psp_gfx_cmd_resp), GFP_KERNEL);
- if (!cmd)
- return -ENOMEM;
+ struct psp_gfx_cmd_resp *cmd = acquire_psp_cmd_buf(psp);
- psp_prep_ta_invoke_cmd_buf(cmd, ta_cmd_id, session_id);
+ psp_prep_ta_invoke_cmd_buf(cmd, ta_cmd_id, context->session_id);
ret = psp_cmd_submit_buf(psp, NULL, cmd,
psp->fence_buf_mc_addr);
- kfree(cmd);
+ context->resp_status = cmd->resp.status;
+
+ release_psp_cmd_buf(psp);
return ret;
}
-static int psp_xgmi_load(struct psp_context *psp)
+int psp_ta_load(struct psp_context *psp, struct ta_context *context)
{
int ret;
struct psp_gfx_cmd_resp *cmd;
- /*
- * TODO: bypass the loading in sriov for now
- */
+ cmd = acquire_psp_cmd_buf(psp);
- cmd = kzalloc(sizeof(struct psp_gfx_cmd_resp), GFP_KERNEL);
- if (!cmd)
- return -ENOMEM;
+ psp_copy_fw(psp, context->bin_desc.start_addr,
+ context->bin_desc.size_bytes);
- memset(psp->fw_pri_buf, 0, PSP_1_MEG);
- memcpy(psp->fw_pri_buf, psp->ta_xgmi_start_addr, psp->ta_xgmi_ucode_size);
+ if (amdgpu_virt_xgmi_migrate_enabled(psp->adev) &&
+ context->mem_context.shared_bo)
+ context->mem_context.shared_mc_addr =
+ amdgpu_bo_fb_aper_addr(context->mem_context.shared_bo);
- psp_prep_ta_load_cmd_buf(cmd,
- psp->fw_pri_mc_addr,
- psp->ta_xgmi_ucode_size,
- psp->xgmi_context.xgmi_shared_mc_addr,
- PSP_XGMI_SHARED_MEM_SIZE);
+ psp_prep_ta_load_cmd_buf(cmd, psp->fw_pri_mc_addr, context);
ret = psp_cmd_submit_buf(psp, NULL, cmd,
psp->fence_buf_mc_addr);
- if (!ret) {
- psp->xgmi_context.initialized = 1;
- psp->xgmi_context.session_id = cmd->resp.session_id;
- }
-
- kfree(cmd);
-
- return ret;
-}
-
-static int psp_xgmi_unload(struct psp_context *psp)
-{
- int ret;
- struct psp_gfx_cmd_resp *cmd;
- struct amdgpu_device *adev = psp->adev;
+ context->resp_status = cmd->resp.status;
- /* XGMI TA unload currently is not supported on Arcturus */
- if (adev->asic_type == CHIP_ARCTURUS)
- return 0;
-
- /*
- * TODO: bypass the unloading in sriov for now
- */
-
- cmd = kzalloc(sizeof(struct psp_gfx_cmd_resp), GFP_KERNEL);
- if (!cmd)
- return -ENOMEM;
-
- psp_prep_ta_unload_cmd_buf(cmd, psp->xgmi_context.session_id);
-
- ret = psp_cmd_submit_buf(psp, NULL, cmd,
- psp->fence_buf_mc_addr);
+ if (!ret)
+ context->session_id = cmd->resp.session_id;
- kfree(cmd);
+ release_psp_cmd_buf(psp);
return ret;
}
int psp_xgmi_invoke(struct psp_context *psp, uint32_t ta_cmd_id)
{
- return psp_ta_invoke(psp, ta_cmd_id, psp->xgmi_context.session_id);
+ return psp_ta_invoke(psp, ta_cmd_id, &psp->xgmi_context.context);
}
int psp_xgmi_terminate(struct psp_context *psp)
{
int ret;
+ struct amdgpu_device *adev = psp->adev;
- if (!psp->xgmi_context.initialized)
+ /* XGMI TA unload currently is not supported on Arcturus/Aldebaran A+A */
+ if (amdgpu_ip_version(adev, MP0_HWIP, 0) == IP_VERSION(11, 0, 4) ||
+ (amdgpu_ip_version(adev, MP0_HWIP, 0) == IP_VERSION(13, 0, 2) &&
+ adev->gmc.xgmi.connected_to_cpu))
return 0;
- ret = psp_xgmi_unload(psp);
- if (ret)
- return ret;
+ if (!psp->xgmi_context.context.initialized)
+ return 0;
- psp->xgmi_context.initialized = 0;
+ ret = psp_ta_unload(psp, &psp->xgmi_context.context);
- /* free xgmi shared memory */
- amdgpu_bo_free_kernel(&psp->xgmi_context.xgmi_shared_bo,
- &psp->xgmi_context.xgmi_shared_mc_addr,
- &psp->xgmi_context.xgmi_shared_buf);
+ psp->xgmi_context.context.initialized = false;
- return 0;
+ return ret;
}
-int psp_xgmi_initialize(struct psp_context *psp)
+int psp_xgmi_initialize(struct psp_context *psp, bool set_extended_data, bool load_ta)
{
struct ta_xgmi_shared_memory *xgmi_cmd;
int ret;
- if (!psp->adev->psp.ta_fw ||
- !psp->adev->psp.ta_xgmi_ucode_size ||
- !psp->adev->psp.ta_xgmi_start_addr)
+ if (!psp->ta_fw ||
+ !psp->xgmi_context.context.bin_desc.size_bytes ||
+ !psp->xgmi_context.context.bin_desc.start_addr)
return -ENOENT;
- if (!psp->xgmi_context.initialized) {
- ret = psp_xgmi_init_shared_buf(psp);
+ if (!load_ta)
+ goto invoke;
+
+ psp->xgmi_context.context.mem_context.shared_mem_size = PSP_XGMI_SHARED_MEM_SIZE;
+ psp->xgmi_context.context.ta_load_type = GFX_CMD_ID_LOAD_TA;
+
+ if (!psp->xgmi_context.context.mem_context.shared_buf) {
+ ret = psp_ta_init_shared_buf(psp, &psp->xgmi_context.context.mem_context);
if (ret)
return ret;
}
/* Load XGMI TA */
- ret = psp_xgmi_load(psp);
- if (ret)
+ ret = psp_ta_load(psp, &psp->xgmi_context.context);
+ if (!ret)
+ psp->xgmi_context.context.initialized = true;
+ else
return ret;
+invoke:
/* Initialize XGMI session */
- xgmi_cmd = (struct ta_xgmi_shared_memory *)(psp->xgmi_context.xgmi_shared_buf);
+ xgmi_cmd = (struct ta_xgmi_shared_memory *)(psp->xgmi_context.context.mem_context.shared_buf);
memset(xgmi_cmd, 0, sizeof(struct ta_xgmi_shared_memory));
+ xgmi_cmd->flag_extend_link_record = set_extended_data;
xgmi_cmd->cmd_id = TA_COMMAND_XGMI__INITIALIZE;
ret = psp_xgmi_invoke(psp, xgmi_cmd->cmd_id);
+ /* note down the capbility flag for XGMI TA */
+ psp->xgmi_context.xgmi_ta_caps = xgmi_cmd->caps_flag;
return ret;
}
@@ -839,7 +1480,7 @@ int psp_xgmi_get_hive_id(struct psp_context *psp, uint64_t *hive_id)
struct ta_xgmi_shared_memory *xgmi_cmd;
int ret;
- xgmi_cmd = (struct ta_xgmi_shared_memory *)psp->xgmi_context.xgmi_shared_buf;
+ xgmi_cmd = (struct ta_xgmi_shared_memory *)psp->xgmi_context.context.mem_context.shared_buf;
memset(xgmi_cmd, 0, sizeof(struct ta_xgmi_shared_memory));
xgmi_cmd->cmd_id = TA_COMMAND_XGMI__GET_HIVE_ID;
@@ -859,7 +1500,7 @@ int psp_xgmi_get_node_id(struct psp_context *psp, uint64_t *node_id)
struct ta_xgmi_shared_memory *xgmi_cmd;
int ret;
- xgmi_cmd = (struct ta_xgmi_shared_memory *)psp->xgmi_context.xgmi_shared_buf;
+ xgmi_cmd = (struct ta_xgmi_shared_memory *)psp->xgmi_context.context.mem_context.shared_buf;
memset(xgmi_cmd, 0, sizeof(struct ta_xgmi_shared_memory));
xgmi_cmd->cmd_id = TA_COMMAND_XGMI__GET_NODE_ID;
@@ -874,9 +1515,78 @@ int psp_xgmi_get_node_id(struct psp_context *psp, uint64_t *node_id)
return 0;
}
+static bool psp_xgmi_peer_link_info_supported(struct psp_context *psp)
+{
+ return (amdgpu_ip_version(psp->adev, MP0_HWIP, 0) ==
+ IP_VERSION(13, 0, 2) &&
+ psp->xgmi_context.context.bin_desc.fw_version >= 0x2000000b) ||
+ amdgpu_ip_version(psp->adev, MP0_HWIP, 0) >=
+ IP_VERSION(13, 0, 6);
+}
+
+/*
+ * Chips that support extended topology information require the driver to
+ * reflect topology information in the opposite direction. This is
+ * because the TA has already exceeded its link record limit and if the
+ * TA holds bi-directional information, the driver would have to do
+ * multiple fetches instead of just two.
+ */
+static void psp_xgmi_reflect_topology_info(struct psp_context *psp,
+ struct psp_xgmi_node_info node_info)
+{
+ struct amdgpu_device *mirror_adev;
+ struct amdgpu_hive_info *hive;
+ uint64_t src_node_id = psp->adev->gmc.xgmi.node_id;
+ uint64_t dst_node_id = node_info.node_id;
+ uint8_t dst_num_hops = node_info.num_hops;
+ uint8_t dst_is_sharing_enabled = node_info.is_sharing_enabled;
+ uint8_t dst_num_links = node_info.num_links;
+
+ hive = amdgpu_get_xgmi_hive(psp->adev);
+ if (WARN_ON(!hive))
+ return;
+
+ list_for_each_entry(mirror_adev, &hive->device_list, gmc.xgmi.head) {
+ struct psp_xgmi_topology_info *mirror_top_info;
+ int j;
+
+ if (mirror_adev->gmc.xgmi.node_id != dst_node_id)
+ continue;
+
+ mirror_top_info = &mirror_adev->psp.xgmi_context.top_info;
+ for (j = 0; j < mirror_top_info->num_nodes; j++) {
+ if (mirror_top_info->nodes[j].node_id != src_node_id)
+ continue;
+
+ mirror_top_info->nodes[j].num_hops = dst_num_hops;
+ mirror_top_info->nodes[j].is_sharing_enabled = dst_is_sharing_enabled;
+ /* prevent 0 num_links value re-reflection since reflection
+ * criteria is based on num_hops (direct or indirect).
+ */
+ if (dst_num_links) {
+ mirror_top_info->nodes[j].num_links = dst_num_links;
+ /* swap src and dst due to frame of reference */
+ for (int k = 0; k < dst_num_links; k++) {
+ mirror_top_info->nodes[j].port_num[k].src_xgmi_port_num =
+ node_info.port_num[k].dst_xgmi_port_num;
+ mirror_top_info->nodes[j].port_num[k].dst_xgmi_port_num =
+ node_info.port_num[k].src_xgmi_port_num;
+ }
+ }
+
+ break;
+ }
+
+ break;
+ }
+
+ amdgpu_put_xgmi_hive(hive);
+}
+
int psp_xgmi_get_topology_info(struct psp_context *psp,
int number_devices,
- struct psp_xgmi_topology_info *topology)
+ struct psp_xgmi_topology_info *topology,
+ bool get_extended_data)
{
struct ta_xgmi_shared_memory *xgmi_cmd;
struct ta_xgmi_cmd_get_topology_info_input *topology_info_input;
@@ -887,12 +1597,13 @@ int psp_xgmi_get_topology_info(struct psp_context *psp,
if (!topology || topology->num_nodes > TA_XGMI__MAX_CONNECTED_NODES)
return -EINVAL;
- xgmi_cmd = (struct ta_xgmi_shared_memory *)psp->xgmi_context.xgmi_shared_buf;
+ xgmi_cmd = (struct ta_xgmi_shared_memory *)psp->xgmi_context.context.mem_context.shared_buf;
memset(xgmi_cmd, 0, sizeof(struct ta_xgmi_shared_memory));
+ xgmi_cmd->flag_extend_link_record = get_extended_data;
/* Fill in the shared memory with topology information as input */
topology_info_input = &xgmi_cmd->xgmi_in_message.get_topology_info;
- xgmi_cmd->cmd_id = TA_COMMAND_XGMI__GET_GET_TOPOLOGY_INFO;
+ xgmi_cmd->cmd_id = TA_COMMAND_XGMI__GET_TOPOLOGY_INFO;
topology_info_input->num_nodes = number_devices;
for (i = 0; i < topology_info_input->num_nodes; i++) {
@@ -903,7 +1614,7 @@ int psp_xgmi_get_topology_info(struct psp_context *psp,
}
/* Invoke xgmi ta to get the topology information */
- ret = psp_xgmi_invoke(psp, TA_COMMAND_XGMI__GET_GET_TOPOLOGY_INFO);
+ ret = psp_xgmi_invoke(psp, TA_COMMAND_XGMI__GET_TOPOLOGY_INFO);
if (ret)
return ret;
@@ -911,10 +1622,83 @@ int psp_xgmi_get_topology_info(struct psp_context *psp,
topology_info_output = &xgmi_cmd->xgmi_out_message.get_topology_info;
topology->num_nodes = xgmi_cmd->xgmi_out_message.get_topology_info.num_nodes;
for (i = 0; i < topology->num_nodes; i++) {
- topology->nodes[i].node_id = topology_info_output->nodes[i].node_id;
- topology->nodes[i].num_hops = topology_info_output->nodes[i].num_hops;
- topology->nodes[i].is_sharing_enabled = topology_info_output->nodes[i].is_sharing_enabled;
- topology->nodes[i].sdma_engine = topology_info_output->nodes[i].sdma_engine;
+ /* extended data will either be 0 or equal to non-extended data */
+ if (topology_info_output->nodes[i].num_hops)
+ topology->nodes[i].num_hops = topology_info_output->nodes[i].num_hops;
+
+ /* non-extended data gets everything here so no need to update */
+ if (!get_extended_data) {
+ topology->nodes[i].node_id = topology_info_output->nodes[i].node_id;
+ topology->nodes[i].is_sharing_enabled =
+ topology_info_output->nodes[i].is_sharing_enabled;
+ topology->nodes[i].sdma_engine =
+ topology_info_output->nodes[i].sdma_engine;
+ }
+
+ }
+
+ /* Invoke xgmi ta again to get the link information */
+ if (psp_xgmi_peer_link_info_supported(psp)) {
+ struct ta_xgmi_cmd_get_peer_link_info *link_info_output;
+ struct ta_xgmi_cmd_get_extend_peer_link_info *link_extend_info_output;
+ bool requires_reflection =
+ (psp->xgmi_context.supports_extended_data &&
+ get_extended_data) ||
+ amdgpu_ip_version(psp->adev, MP0_HWIP, 0) ==
+ IP_VERSION(13, 0, 6) ||
+ amdgpu_ip_version(psp->adev, MP0_HWIP, 0) ==
+ IP_VERSION(13, 0, 14) ||
+ amdgpu_sriov_vf(psp->adev);
+ bool ta_port_num_support = psp->xgmi_context.xgmi_ta_caps & EXTEND_PEER_LINK_INFO_CMD_FLAG ||
+ amdgpu_sriov_xgmi_ta_ext_peer_link_en(psp->adev);
+
+ /* popluate the shared output buffer rather than the cmd input buffer
+ * with node_ids as the input for GET_PEER_LINKS command execution.
+ * This is required for GET_PEER_LINKS per xgmi ta implementation.
+ * The same requirement for GET_EXTEND_PEER_LINKS command.
+ */
+ if (ta_port_num_support) {
+ link_extend_info_output = &xgmi_cmd->xgmi_out_message.get_extend_link_info;
+
+ for (i = 0; i < topology->num_nodes; i++)
+ link_extend_info_output->nodes[i].node_id = topology->nodes[i].node_id;
+
+ link_extend_info_output->num_nodes = topology->num_nodes;
+ xgmi_cmd->cmd_id = TA_COMMAND_XGMI__GET_EXTEND_PEER_LINKS;
+ } else {
+ link_info_output = &xgmi_cmd->xgmi_out_message.get_link_info;
+
+ for (i = 0; i < topology->num_nodes; i++)
+ link_info_output->nodes[i].node_id = topology->nodes[i].node_id;
+
+ link_info_output->num_nodes = topology->num_nodes;
+ xgmi_cmd->cmd_id = TA_COMMAND_XGMI__GET_PEER_LINKS;
+ }
+
+ ret = psp_xgmi_invoke(psp, xgmi_cmd->cmd_id);
+ if (ret)
+ return ret;
+
+ for (i = 0; i < topology->num_nodes; i++) {
+ uint8_t node_num_links = ta_port_num_support ?
+ link_extend_info_output->nodes[i].num_links : link_info_output->nodes[i].num_links;
+ /* accumulate num_links on extended data */
+ if (get_extended_data) {
+ topology->nodes[i].num_links = topology->nodes[i].num_links + node_num_links;
+ } else {
+ topology->nodes[i].num_links = (requires_reflection && topology->nodes[i].num_links) ?
+ topology->nodes[i].num_links : node_num_links;
+ }
+ /* popluate the connected port num info if supported and available */
+ if (ta_port_num_support && topology->nodes[i].num_links) {
+ memcpy(topology->nodes[i].port_num, link_extend_info_output->nodes[i].port_num,
+ sizeof(struct xgmi_connected_port_num) * TA_XGMI__MAX_PORT_NUM);
+ }
+
+ /* reflect the topology information for bi-directionality */
+ if (requires_reflection && topology->nodes[i].num_hops)
+ psp_xgmi_reflect_topology_info(psp, topology->nodes[i]);
+ }
}
return 0;
@@ -931,7 +1715,7 @@ int psp_xgmi_set_topology_info(struct psp_context *psp,
if (!topology || topology->num_nodes > TA_XGMI__MAX_CONNECTED_NODES)
return -EINVAL;
- xgmi_cmd = (struct ta_xgmi_shared_memory *)psp->xgmi_context.xgmi_shared_buf;
+ xgmi_cmd = (struct ta_xgmi_shared_memory *)psp->xgmi_context.context.mem_context.shared_buf;
memset(xgmi_cmd, 0, sizeof(struct ta_xgmi_shared_memory));
topology_info_input = &xgmi_cmd->xgmi_in_message.get_topology_info;
@@ -950,91 +1734,90 @@ int psp_xgmi_set_topology_info(struct psp_context *psp,
}
// ras begin
-static int psp_ras_init_shared_buf(struct psp_context *psp)
+static void psp_ras_ta_check_status(struct psp_context *psp)
{
- int ret;
+ struct ta_ras_shared_memory *ras_cmd =
+ (struct ta_ras_shared_memory *)psp->ras_context.context.mem_context.shared_buf;
- /*
- * Allocate 16k memory aligned to 4k from Frame Buffer (local
- * physical) for ras ta <-> Driver
- */
- ret = amdgpu_bo_create_kernel(psp->adev, PSP_RAS_SHARED_MEM_SIZE,
- PAGE_SIZE, AMDGPU_GEM_DOMAIN_VRAM,
- &psp->ras.ras_shared_bo,
- &psp->ras.ras_shared_mc_addr,
- &psp->ras.ras_shared_buf);
-
- return ret;
+ switch (ras_cmd->ras_status) {
+ case TA_RAS_STATUS__ERROR_UNSUPPORTED_IP:
+ dev_warn(psp->adev->dev,
+ "RAS WARNING: cmd failed due to unsupported ip\n");
+ break;
+ case TA_RAS_STATUS__ERROR_UNSUPPORTED_ERROR_INJ:
+ dev_warn(psp->adev->dev,
+ "RAS WARNING: cmd failed due to unsupported error injection\n");
+ break;
+ case TA_RAS_STATUS__SUCCESS:
+ break;
+ case TA_RAS_STATUS__TEE_ERROR_ACCESS_DENIED:
+ if (ras_cmd->cmd_id == TA_RAS_COMMAND__TRIGGER_ERROR)
+ dev_warn(psp->adev->dev,
+ "RAS WARNING: Inject error to critical region is not allowed\n");
+ break;
+ default:
+ dev_warn(psp->adev->dev,
+ "RAS WARNING: ras status = 0x%X\n", ras_cmd->ras_status);
+ break;
+ }
}
-static int psp_ras_load(struct psp_context *psp)
+static int psp_ras_send_cmd(struct psp_context *psp,
+ enum ras_command cmd_id, void *in, void *out)
{
- int ret;
- struct psp_gfx_cmd_resp *cmd;
struct ta_ras_shared_memory *ras_cmd;
+ uint32_t cmd = cmd_id;
+ int ret = 0;
- /*
- * TODO: bypass the loading in sriov for now
- */
- if (amdgpu_sriov_vf(psp->adev))
- return 0;
-
- cmd = kzalloc(sizeof(struct psp_gfx_cmd_resp), GFP_KERNEL);
- if (!cmd)
- return -ENOMEM;
-
- memset(psp->fw_pri_buf, 0, PSP_1_MEG);
- memcpy(psp->fw_pri_buf, psp->ta_ras_start_addr, psp->ta_ras_ucode_size);
-
- psp_prep_ta_load_cmd_buf(cmd,
- psp->fw_pri_mc_addr,
- psp->ta_ras_ucode_size,
- psp->ras.ras_shared_mc_addr,
- PSP_RAS_SHARED_MEM_SIZE);
-
- ret = psp_cmd_submit_buf(psp, NULL, cmd,
- psp->fence_buf_mc_addr);
-
- ras_cmd = (struct ta_ras_shared_memory *)psp->ras.ras_shared_buf;
+ if (!in)
+ return -EINVAL;
- if (!ret) {
- psp->ras.session_id = cmd->resp.session_id;
+ mutex_lock(&psp->ras_context.mutex);
+ ras_cmd = (struct ta_ras_shared_memory *)psp->ras_context.context.mem_context.shared_buf;
+ memset(ras_cmd, 0, sizeof(struct ta_ras_shared_memory));
- if (!ras_cmd->ras_status)
- psp->ras.ras_initialized = true;
- else
- dev_warn(psp->adev->dev, "RAS Init Status: 0x%X\n", ras_cmd->ras_status);
+ switch (cmd) {
+ case TA_RAS_COMMAND__ENABLE_FEATURES:
+ case TA_RAS_COMMAND__DISABLE_FEATURES:
+ memcpy(&ras_cmd->ras_in_message,
+ in, sizeof(ras_cmd->ras_in_message));
+ break;
+ case TA_RAS_COMMAND__TRIGGER_ERROR:
+ memcpy(&ras_cmd->ras_in_message.trigger_error,
+ in, sizeof(ras_cmd->ras_in_message.trigger_error));
+ break;
+ case TA_RAS_COMMAND__QUERY_ADDRESS:
+ memcpy(&ras_cmd->ras_in_message.address,
+ in, sizeof(ras_cmd->ras_in_message.address));
+ break;
+ default:
+ dev_err(psp->adev->dev, "Invalid ras cmd id: %u\n", cmd);
+ ret = -EINVAL;
+ goto err_out;
}
- if (ret || ras_cmd->ras_status)
- amdgpu_ras_fini(psp->adev);
-
- kfree(cmd);
-
- return ret;
-}
-
-static int psp_ras_unload(struct psp_context *psp)
-{
- int ret;
- struct psp_gfx_cmd_resp *cmd;
-
- /*
- * TODO: bypass the unloading in sriov for now
- */
- if (amdgpu_sriov_vf(psp->adev))
- return 0;
-
- cmd = kzalloc(sizeof(struct psp_gfx_cmd_resp), GFP_KERNEL);
- if (!cmd)
- return -ENOMEM;
-
- psp_prep_ta_unload_cmd_buf(cmd, psp->ras.session_id);
+ ras_cmd->cmd_id = cmd;
+ ret = psp_ras_invoke(psp, ras_cmd->cmd_id);
- ret = psp_cmd_submit_buf(psp, NULL, cmd,
- psp->fence_buf_mc_addr);
+ switch (cmd) {
+ case TA_RAS_COMMAND__TRIGGER_ERROR:
+ if (!ret && out)
+ memcpy(out, &ras_cmd->ras_status, sizeof(ras_cmd->ras_status));
+ break;
+ case TA_RAS_COMMAND__QUERY_ADDRESS:
+ if (ret || ras_cmd->ras_status || psp->cmd_buf_mem->resp.status)
+ ret = -EINVAL;
+ else if (out)
+ memcpy(out,
+ &ras_cmd->ras_out_message.address,
+ sizeof(ras_cmd->ras_out_message.address));
+ break;
+ default:
+ break;
+ }
- kfree(cmd);
+err_out:
+ mutex_unlock(&psp->ras_context.mutex);
return ret;
}
@@ -1044,7 +1827,7 @@ int psp_ras_invoke(struct psp_context *psp, uint32_t ta_cmd_id)
struct ta_ras_shared_memory *ras_cmd;
int ret;
- ras_cmd = (struct ta_ras_shared_memory *)psp->ras.ras_shared_buf;
+ ras_cmd = (struct ta_ras_shared_memory *)psp->ras_context.context.mem_context.shared_buf;
/*
* TODO: bypass the loading in sriov for now
@@ -1052,14 +1835,13 @@ int psp_ras_invoke(struct psp_context *psp, uint32_t ta_cmd_id)
if (amdgpu_sriov_vf(psp->adev))
return 0;
- ret = psp_ta_invoke(psp, ta_cmd_id, psp->ras.session_id);
+ ret = psp_ta_invoke(psp, ta_cmd_id, &psp->ras_context.context);
if (amdgpu_ras_intr_triggered())
return ret;
- if (ras_cmd->if_version > RAS_TA_HOST_IF_VER)
- {
- DRM_WARN("RAS: Unsupported Interface");
+ if (ras_cmd->if_version > RAS_TA_HOST_IF_VER) {
+ dev_warn(psp->adev->dev, "RAS: Unsupported Interface\n");
return -EINVAL;
}
@@ -1068,10 +1850,11 @@ int psp_ras_invoke(struct psp_context *psp, uint32_t ta_cmd_id)
dev_warn(psp->adev->dev, "ECC switch disabled\n");
ras_cmd->ras_status = TA_RAS_STATUS__ERROR_RAS_NOT_AVAILABLE;
- }
- else if (ras_cmd->ras_out_message.flags.reg_access_failure_flag)
+ } else if (ras_cmd->ras_out_message.flags.reg_access_failure_flag)
dev_warn(psp->adev->dev,
"RAS internal register access blocked\n");
+
+ psp_ras_ta_check_status(psp);
}
return ret;
@@ -1080,30 +1863,22 @@ int psp_ras_invoke(struct psp_context *psp, uint32_t ta_cmd_id)
int psp_ras_enable_features(struct psp_context *psp,
union ta_ras_cmd_input *info, bool enable)
{
- struct ta_ras_shared_memory *ras_cmd;
+ enum ras_command cmd_id;
int ret;
- if (!psp->ras.ras_initialized)
+ if (!psp->ras_context.context.initialized || !info)
return -EINVAL;
- ras_cmd = (struct ta_ras_shared_memory *)psp->ras.ras_shared_buf;
- memset(ras_cmd, 0, sizeof(struct ta_ras_shared_memory));
-
- if (enable)
- ras_cmd->cmd_id = TA_RAS_COMMAND__ENABLE_FEATURES;
- else
- ras_cmd->cmd_id = TA_RAS_COMMAND__DISABLE_FEATURES;
-
- ras_cmd->ras_in_message = *info;
-
- ret = psp_ras_invoke(psp, ras_cmd->cmd_id);
+ cmd_id = enable ?
+ TA_RAS_COMMAND__ENABLE_FEATURES : TA_RAS_COMMAND__DISABLE_FEATURES;
+ ret = psp_ras_send_cmd(psp, cmd_id, info, NULL);
if (ret)
return -EINVAL;
- return ras_cmd->ras_status;
+ return 0;
}
-static int psp_ras_terminate(struct psp_context *psp)
+int psp_ras_terminate(struct psp_context *psp)
{
int ret;
@@ -1113,135 +1888,199 @@ static int psp_ras_terminate(struct psp_context *psp)
if (amdgpu_sriov_vf(psp->adev))
return 0;
- if (!psp->ras.ras_initialized)
+ if (!psp->ras_context.context.initialized)
return 0;
- ret = psp_ras_unload(psp);
- if (ret)
- return ret;
+ ret = psp_ta_unload(psp, &psp->ras_context.context);
- psp->ras.ras_initialized = false;
+ psp->ras_context.context.initialized = false;
- /* free ras shared memory */
- amdgpu_bo_free_kernel(&psp->ras.ras_shared_bo,
- &psp->ras.ras_shared_mc_addr,
- &psp->ras.ras_shared_buf);
+ mutex_destroy(&psp->ras_context.mutex);
- return 0;
+ return ret;
}
-static int psp_ras_initialize(struct psp_context *psp)
+int psp_ras_initialize(struct psp_context *psp)
{
int ret;
+ uint32_t boot_cfg = 0xFF;
+ struct amdgpu_device *adev = psp->adev;
+ struct ta_ras_shared_memory *ras_cmd;
/*
* TODO: bypass the initialize in sriov for now
*/
- if (amdgpu_sriov_vf(psp->adev))
+ if (amdgpu_sriov_vf(adev))
return 0;
- if (!psp->adev->psp.ta_ras_ucode_size ||
- !psp->adev->psp.ta_ras_start_addr) {
- dev_info(psp->adev->dev, "RAS: optional ras ta ucode is not available\n");
+ if (!adev->psp.ras_context.context.bin_desc.size_bytes ||
+ !adev->psp.ras_context.context.bin_desc.start_addr) {
+ dev_info(adev->dev, "RAS: optional ras ta ucode is not available\n");
return 0;
}
- if (!psp->ras.ras_initialized) {
- ret = psp_ras_init_shared_buf(psp);
+ if (amdgpu_atomfirmware_dynamic_boot_config_supported(adev)) {
+ /* query GECC enablement status from boot config
+ * boot_cfg: 1: GECC is enabled or 0: GECC is disabled
+ */
+ ret = psp_boot_config_get(adev, &boot_cfg);
+ if (ret)
+ dev_warn(adev->dev, "PSP get boot config failed\n");
+
+ if (boot_cfg == 1 && !adev->ras_default_ecc_enabled &&
+ amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__UMC)) {
+ dev_warn(adev->dev, "GECC is currently enabled, which may affect performance\n");
+ dev_warn(adev->dev,
+ "To disable GECC, please reboot the system and load the amdgpu driver with the parameter amdgpu_ras_enable=0\n");
+ } else {
+ if ((adev->ras_default_ecc_enabled || amdgpu_ras_enable == 1) &&
+ amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__UMC)) {
+ if (boot_cfg == 1) {
+ dev_info(adev->dev, "GECC is enabled\n");
+ } else {
+ /* enable GECC in next boot cycle if it is disabled
+ * in boot config, or force enable GECC if failed to
+ * get boot configuration
+ */
+ ret = psp_boot_config_set(adev, BOOT_CONFIG_GECC);
+ if (ret)
+ dev_warn(adev->dev, "PSP set boot config failed\n");
+ else
+ dev_warn(adev->dev, "GECC will be enabled in next boot cycle\n");
+ }
+ } else {
+ if (!boot_cfg) {
+ if (!adev->ras_default_ecc_enabled &&
+ amdgpu_ras_enable != 1 &&
+ amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__UMC))
+ dev_warn(adev->dev, "GECC is disabled, set amdgpu_ras_enable=1 to enable GECC in next boot cycle if needed\n");
+ else
+ dev_info(adev->dev, "GECC is disabled\n");
+ } else {
+ /* disable GECC in next boot cycle if ras is
+ * disabled by module parameter amdgpu_ras_enable
+ * and/or amdgpu_ras_mask, or boot_config_get call
+ * is failed
+ */
+ ret = psp_boot_config_set(adev, 0);
+ if (ret)
+ dev_warn(adev->dev, "PSP set boot config failed\n");
+ else
+ dev_warn(adev->dev, "GECC will be disabled in next boot cycle if set amdgpu_ras_enable and/or amdgpu_ras_mask to 0x0\n");
+ }
+ }
+ }
+ }
+
+ psp->ras_context.context.mem_context.shared_mem_size = PSP_RAS_SHARED_MEM_SIZE;
+ psp->ras_context.context.ta_load_type = GFX_CMD_ID_LOAD_TA;
+
+ if (!psp->ras_context.context.mem_context.shared_buf) {
+ ret = psp_ta_init_shared_buf(psp, &psp->ras_context.context.mem_context);
if (ret)
return ret;
}
- ret = psp_ras_load(psp);
- if (ret)
- return ret;
+ ras_cmd = (struct ta_ras_shared_memory *)psp->ras_context.context.mem_context.shared_buf;
+ memset(ras_cmd, 0, sizeof(struct ta_ras_shared_memory));
- return 0;
+ if (amdgpu_ras_is_poison_mode_supported(adev))
+ ras_cmd->ras_in_message.init_flags.poison_mode_en = 1;
+ if (!adev->gmc.xgmi.connected_to_cpu && !adev->gmc.is_app_apu)
+ ras_cmd->ras_in_message.init_flags.dgpu_mode = 1;
+ ras_cmd->ras_in_message.init_flags.xcc_mask =
+ adev->gfx.xcc_mask;
+ ras_cmd->ras_in_message.init_flags.channel_dis_num = hweight32(adev->gmc.m_half_use) * 2;
+ if (adev->gmc.gmc_funcs->query_mem_partition_mode)
+ ras_cmd->ras_in_message.init_flags.nps_mode =
+ adev->gmc.gmc_funcs->query_mem_partition_mode(adev);
+ ras_cmd->ras_in_message.init_flags.active_umc_mask = adev->umc.active_mask;
+
+ ret = psp_ta_load(psp, &psp->ras_context.context);
+
+ if (!ret && !ras_cmd->ras_status) {
+ psp->ras_context.context.initialized = true;
+ mutex_init(&psp->ras_context.mutex);
+ } else {
+ if (ras_cmd->ras_status)
+ dev_warn(adev->dev, "RAS Init Status: 0x%X\n", ras_cmd->ras_status);
+
+ /* fail to load RAS TA */
+ psp->ras_context.context.initialized = false;
+ }
+
+ return ret;
}
int psp_ras_trigger_error(struct psp_context *psp,
- struct ta_ras_trigger_error_input *info)
+ struct ta_ras_trigger_error_input *info, uint32_t instance_mask)
{
- struct ta_ras_shared_memory *ras_cmd;
+ struct amdgpu_device *adev = psp->adev;
int ret;
+ uint32_t dev_mask;
+ uint32_t ras_status = 0;
- if (!psp->ras.ras_initialized)
+ if (!psp->ras_context.context.initialized || !info)
return -EINVAL;
- ras_cmd = (struct ta_ras_shared_memory *)psp->ras.ras_shared_buf;
- memset(ras_cmd, 0, sizeof(struct ta_ras_shared_memory));
+ switch (info->block_id) {
+ case TA_RAS_BLOCK__GFX:
+ dev_mask = GET_MASK(GC, instance_mask);
+ break;
+ case TA_RAS_BLOCK__SDMA:
+ dev_mask = GET_MASK(SDMA0, instance_mask);
+ break;
+ case TA_RAS_BLOCK__VCN:
+ case TA_RAS_BLOCK__JPEG:
+ dev_mask = GET_MASK(VCN, instance_mask);
+ break;
+ default:
+ dev_mask = instance_mask;
+ break;
+ }
- ras_cmd->cmd_id = TA_RAS_COMMAND__TRIGGER_ERROR;
- ras_cmd->ras_in_message.trigger_error = *info;
+ /* reuse sub_block_index for backward compatibility */
+ dev_mask <<= AMDGPU_RAS_INST_SHIFT;
+ dev_mask &= AMDGPU_RAS_INST_MASK;
+ info->sub_block_index |= dev_mask;
- ret = psp_ras_invoke(psp, ras_cmd->cmd_id);
+ ret = psp_ras_send_cmd(psp,
+ TA_RAS_COMMAND__TRIGGER_ERROR, info, &ras_status);
if (ret)
return -EINVAL;
/* If err_event_athub occurs error inject was successful, however
- return status from TA is no long reliable */
+ * return status from TA is no long reliable
+ */
if (amdgpu_ras_intr_triggered())
return 0;
- return ras_cmd->ras_status;
-}
-// ras end
-
-// HDCP start
-static int psp_hdcp_init_shared_buf(struct psp_context *psp)
-{
- int ret;
-
- /*
- * Allocate 16k memory aligned to 4k from Frame Buffer (local
- * physical) for hdcp ta <-> Driver
- */
- ret = amdgpu_bo_create_kernel(psp->adev, PSP_HDCP_SHARED_MEM_SIZE,
- PAGE_SIZE, AMDGPU_GEM_DOMAIN_VRAM,
- &psp->hdcp_context.hdcp_shared_bo,
- &psp->hdcp_context.hdcp_shared_mc_addr,
- &psp->hdcp_context.hdcp_shared_buf);
+ if (ras_status == TA_RAS_STATUS__TEE_ERROR_ACCESS_DENIED)
+ return -EACCES;
+ else if (ras_status)
+ return -EINVAL;
- return ret;
+ return 0;
}
-static int psp_hdcp_load(struct psp_context *psp)
+int psp_ras_query_address(struct psp_context *psp,
+ struct ta_ras_query_address_input *addr_in,
+ struct ta_ras_query_address_output *addr_out)
{
int ret;
- struct psp_gfx_cmd_resp *cmd;
- /*
- * TODO: bypass the loading in sriov for now
- */
- if (amdgpu_sriov_vf(psp->adev))
- return 0;
-
- cmd = kzalloc(sizeof(struct psp_gfx_cmd_resp), GFP_KERNEL);
- if (!cmd)
- return -ENOMEM;
-
- memset(psp->fw_pri_buf, 0, PSP_1_MEG);
- memcpy(psp->fw_pri_buf, psp->ta_hdcp_start_addr,
- psp->ta_hdcp_ucode_size);
-
- psp_prep_ta_load_cmd_buf(cmd,
- psp->fw_pri_mc_addr,
- psp->ta_hdcp_ucode_size,
- psp->hdcp_context.hdcp_shared_mc_addr,
- PSP_HDCP_SHARED_MEM_SIZE);
-
- ret = psp_cmd_submit_buf(psp, NULL, cmd, psp->fence_buf_mc_addr);
-
- if (!ret) {
- psp->hdcp_context.hdcp_initialized = true;
- psp->hdcp_context.session_id = cmd->resp.session_id;
- mutex_init(&psp->hdcp_context.mutex);
- }
+ if (!psp->ras_context.context.initialized ||
+ !addr_in || !addr_out)
+ return -EINVAL;
- kfree(cmd);
+ ret = psp_ras_send_cmd(psp,
+ TA_RAS_COMMAND__QUERY_ADDRESS, addr_in, addr_out);
return ret;
}
+// ras end
+
+// HDCP start
static int psp_hdcp_initialize(struct psp_context *psp)
{
int ret;
@@ -1252,45 +2091,30 @@ static int psp_hdcp_initialize(struct psp_context *psp)
if (amdgpu_sriov_vf(psp->adev))
return 0;
- if (!psp->adev->psp.ta_hdcp_ucode_size ||
- !psp->adev->psp.ta_hdcp_start_addr) {
+ /* bypass hdcp initialization if dmu is harvested */
+ if (!amdgpu_device_has_display_hardware(psp->adev))
+ return 0;
+
+ if (!psp->hdcp_context.context.bin_desc.size_bytes ||
+ !psp->hdcp_context.context.bin_desc.start_addr) {
dev_info(psp->adev->dev, "HDCP: optional hdcp ta ucode is not available\n");
return 0;
}
- if (!psp->hdcp_context.hdcp_initialized) {
- ret = psp_hdcp_init_shared_buf(psp);
+ psp->hdcp_context.context.mem_context.shared_mem_size = PSP_HDCP_SHARED_MEM_SIZE;
+ psp->hdcp_context.context.ta_load_type = GFX_CMD_ID_LOAD_TA;
+
+ if (!psp->hdcp_context.context.mem_context.shared_buf) {
+ ret = psp_ta_init_shared_buf(psp, &psp->hdcp_context.context.mem_context);
if (ret)
return ret;
}
- ret = psp_hdcp_load(psp);
- if (ret)
- return ret;
-
- return 0;
-}
-
-static int psp_hdcp_unload(struct psp_context *psp)
-{
- int ret;
- struct psp_gfx_cmd_resp *cmd;
-
- /*
- * TODO: bypass the unloading in sriov for now
- */
- if (amdgpu_sriov_vf(psp->adev))
- return 0;
-
- cmd = kzalloc(sizeof(struct psp_gfx_cmd_resp), GFP_KERNEL);
- if (!cmd)
- return -ENOMEM;
-
- psp_prep_ta_unload_cmd_buf(cmd, psp->hdcp_context.session_id);
-
- ret = psp_cmd_submit_buf(psp, NULL, cmd, psp->fence_buf_mc_addr);
-
- kfree(cmd);
+ ret = psp_ta_load(psp, &psp->hdcp_context.context);
+ if (!ret) {
+ psp->hdcp_context.context.initialized = true;
+ mutex_init(&psp->hdcp_context.mutex);
+ }
return ret;
}
@@ -1303,7 +2127,10 @@ int psp_hdcp_invoke(struct psp_context *psp, uint32_t ta_cmd_id)
if (amdgpu_sriov_vf(psp->adev))
return 0;
- return psp_ta_invoke(psp, ta_cmd_id, psp->hdcp_context.session_id);
+ if (!psp->hdcp_context.context.initialized)
+ return 0;
+
+ return psp_ta_invoke(psp, ta_cmd_id, &psp->hdcp_context.context);
}
static int psp_hdcp_terminate(struct psp_context *psp)
@@ -1316,84 +2143,18 @@ static int psp_hdcp_terminate(struct psp_context *psp)
if (amdgpu_sriov_vf(psp->adev))
return 0;
- if (!psp->hdcp_context.hdcp_initialized) {
- if (psp->hdcp_context.hdcp_shared_buf)
- goto out;
- else
- return 0;
- }
-
- ret = psp_hdcp_unload(psp);
- if (ret)
- return ret;
-
- psp->hdcp_context.hdcp_initialized = false;
-
-out:
- /* free hdcp shared memory */
- amdgpu_bo_free_kernel(&psp->hdcp_context.hdcp_shared_bo,
- &psp->hdcp_context.hdcp_shared_mc_addr,
- &psp->hdcp_context.hdcp_shared_buf);
-
- return 0;
-}
-// HDCP end
-
-// DTM start
-static int psp_dtm_init_shared_buf(struct psp_context *psp)
-{
- int ret;
-
- /*
- * Allocate 16k memory aligned to 4k from Frame Buffer (local
- * physical) for dtm ta <-> Driver
- */
- ret = amdgpu_bo_create_kernel(psp->adev, PSP_DTM_SHARED_MEM_SIZE,
- PAGE_SIZE, AMDGPU_GEM_DOMAIN_VRAM,
- &psp->dtm_context.dtm_shared_bo,
- &psp->dtm_context.dtm_shared_mc_addr,
- &psp->dtm_context.dtm_shared_buf);
-
- return ret;
-}
-
-static int psp_dtm_load(struct psp_context *psp)
-{
- int ret;
- struct psp_gfx_cmd_resp *cmd;
-
- /*
- * TODO: bypass the loading in sriov for now
- */
- if (amdgpu_sriov_vf(psp->adev))
+ if (!psp->hdcp_context.context.initialized)
return 0;
- cmd = kzalloc(sizeof(struct psp_gfx_cmd_resp), GFP_KERNEL);
- if (!cmd)
- return -ENOMEM;
-
- memset(psp->fw_pri_buf, 0, PSP_1_MEG);
- memcpy(psp->fw_pri_buf, psp->ta_dtm_start_addr, psp->ta_dtm_ucode_size);
+ ret = psp_ta_unload(psp, &psp->hdcp_context.context);
- psp_prep_ta_load_cmd_buf(cmd,
- psp->fw_pri_mc_addr,
- psp->ta_dtm_ucode_size,
- psp->dtm_context.dtm_shared_mc_addr,
- PSP_DTM_SHARED_MEM_SIZE);
-
- ret = psp_cmd_submit_buf(psp, NULL, cmd, psp->fence_buf_mc_addr);
-
- if (!ret) {
- psp->dtm_context.dtm_initialized = true;
- psp->dtm_context.session_id = cmd->resp.session_id;
- mutex_init(&psp->dtm_context.mutex);
- }
-
- kfree(cmd);
+ psp->hdcp_context.context.initialized = false;
return ret;
}
+// HDCP end
+// DTM start
static int psp_dtm_initialize(struct psp_context *psp)
{
int ret;
@@ -1404,45 +2165,30 @@ static int psp_dtm_initialize(struct psp_context *psp)
if (amdgpu_sriov_vf(psp->adev))
return 0;
- if (!psp->adev->psp.ta_dtm_ucode_size ||
- !psp->adev->psp.ta_dtm_start_addr) {
+ /* bypass dtm initialization if dmu is harvested */
+ if (!amdgpu_device_has_display_hardware(psp->adev))
+ return 0;
+
+ if (!psp->dtm_context.context.bin_desc.size_bytes ||
+ !psp->dtm_context.context.bin_desc.start_addr) {
dev_info(psp->adev->dev, "DTM: optional dtm ta ucode is not available\n");
return 0;
}
- if (!psp->dtm_context.dtm_initialized) {
- ret = psp_dtm_init_shared_buf(psp);
+ psp->dtm_context.context.mem_context.shared_mem_size = PSP_DTM_SHARED_MEM_SIZE;
+ psp->dtm_context.context.ta_load_type = GFX_CMD_ID_LOAD_TA;
+
+ if (!psp->dtm_context.context.mem_context.shared_buf) {
+ ret = psp_ta_init_shared_buf(psp, &psp->dtm_context.context.mem_context);
if (ret)
return ret;
}
- ret = psp_dtm_load(psp);
- if (ret)
- return ret;
-
- return 0;
-}
-
-static int psp_dtm_unload(struct psp_context *psp)
-{
- int ret;
- struct psp_gfx_cmd_resp *cmd;
-
- /*
- * TODO: bypass the unloading in sriov for now
- */
- if (amdgpu_sriov_vf(psp->adev))
- return 0;
-
- cmd = kzalloc(sizeof(struct psp_gfx_cmd_resp), GFP_KERNEL);
- if (!cmd)
- return -ENOMEM;
-
- psp_prep_ta_unload_cmd_buf(cmd, psp->dtm_context.session_id);
-
- ret = psp_cmd_submit_buf(psp, NULL, cmd, psp->fence_buf_mc_addr);
-
- kfree(cmd);
+ ret = psp_ta_load(psp, &psp->dtm_context.context);
+ if (!ret) {
+ psp->dtm_context.context.initialized = true;
+ mutex_init(&psp->dtm_context.mutex);
+ }
return ret;
}
@@ -1455,7 +2201,10 @@ int psp_dtm_invoke(struct psp_context *psp, uint32_t ta_cmd_id)
if (amdgpu_sriov_vf(psp->adev))
return 0;
- return psp_ta_invoke(psp, ta_cmd_id, psp->dtm_context.session_id);
+ if (!psp->dtm_context.context.initialized)
+ return 0;
+
+ return psp_ta_invoke(psp, ta_cmd_id, &psp->dtm_context.context);
}
static int psp_dtm_terminate(struct psp_context *psp)
@@ -1468,99 +2217,22 @@ static int psp_dtm_terminate(struct psp_context *psp)
if (amdgpu_sriov_vf(psp->adev))
return 0;
- if (!psp->dtm_context.dtm_initialized) {
- if (psp->dtm_context.dtm_shared_buf)
- goto out;
- else
- return 0;
- }
-
- ret = psp_dtm_unload(psp);
- if (ret)
- return ret;
+ if (!psp->dtm_context.context.initialized)
+ return 0;
- psp->dtm_context.dtm_initialized = false;
+ ret = psp_ta_unload(psp, &psp->dtm_context.context);
-out:
- /* free hdcp shared memory */
- amdgpu_bo_free_kernel(&psp->dtm_context.dtm_shared_bo,
- &psp->dtm_context.dtm_shared_mc_addr,
- &psp->dtm_context.dtm_shared_buf);
+ psp->dtm_context.context.initialized = false;
- return 0;
+ return ret;
}
// DTM end
// RAP start
-static int psp_rap_init_shared_buf(struct psp_context *psp)
-{
- int ret;
-
- /*
- * Allocate 16k memory aligned to 4k from Frame Buffer (local
- * physical) for rap ta <-> Driver
- */
- ret = amdgpu_bo_create_kernel(psp->adev, PSP_RAP_SHARED_MEM_SIZE,
- PAGE_SIZE, AMDGPU_GEM_DOMAIN_VRAM,
- &psp->rap_context.rap_shared_bo,
- &psp->rap_context.rap_shared_mc_addr,
- &psp->rap_context.rap_shared_buf);
-
- return ret;
-}
-
-static int psp_rap_load(struct psp_context *psp)
-{
- int ret;
- struct psp_gfx_cmd_resp *cmd;
-
- cmd = kzalloc(sizeof(struct psp_gfx_cmd_resp), GFP_KERNEL);
- if (!cmd)
- return -ENOMEM;
-
- memset(psp->fw_pri_buf, 0, PSP_1_MEG);
- memcpy(psp->fw_pri_buf, psp->ta_rap_start_addr, psp->ta_rap_ucode_size);
-
- psp_prep_ta_load_cmd_buf(cmd,
- psp->fw_pri_mc_addr,
- psp->ta_rap_ucode_size,
- psp->rap_context.rap_shared_mc_addr,
- PSP_RAP_SHARED_MEM_SIZE);
-
- ret = psp_cmd_submit_buf(psp, NULL, cmd, psp->fence_buf_mc_addr);
-
- if (!ret) {
- psp->rap_context.rap_initialized = true;
- psp->rap_context.session_id = cmd->resp.session_id;
- mutex_init(&psp->rap_context.mutex);
- }
-
- kfree(cmd);
-
- return ret;
-}
-
-static int psp_rap_unload(struct psp_context *psp)
-{
- int ret;
- struct psp_gfx_cmd_resp *cmd;
-
- cmd = kzalloc(sizeof(struct psp_gfx_cmd_resp), GFP_KERNEL);
- if (!cmd)
- return -ENOMEM;
-
- psp_prep_ta_unload_cmd_buf(cmd, psp->rap_context.session_id);
-
- ret = psp_cmd_submit_buf(psp, NULL, cmd, psp->fence_buf_mc_addr);
-
- kfree(cmd);
-
- return ret;
-}
-
static int psp_rap_initialize(struct psp_context *psp)
{
int ret;
+ enum ta_rap_status status = TA_RAP_STATUS__SUCCESS;
/*
* TODO: bypass the initialize in sriov for now
@@ -1568,34 +2240,38 @@ static int psp_rap_initialize(struct psp_context *psp)
if (amdgpu_sriov_vf(psp->adev))
return 0;
- if (!psp->adev->psp.ta_rap_ucode_size ||
- !psp->adev->psp.ta_rap_start_addr) {
+ if (!psp->rap_context.context.bin_desc.size_bytes ||
+ !psp->rap_context.context.bin_desc.start_addr) {
dev_info(psp->adev->dev, "RAP: optional rap ta ucode is not available\n");
return 0;
}
- if (!psp->rap_context.rap_initialized) {
- ret = psp_rap_init_shared_buf(psp);
+ psp->rap_context.context.mem_context.shared_mem_size = PSP_RAP_SHARED_MEM_SIZE;
+ psp->rap_context.context.ta_load_type = GFX_CMD_ID_LOAD_TA;
+
+ if (!psp->rap_context.context.mem_context.shared_buf) {
+ ret = psp_ta_init_shared_buf(psp, &psp->rap_context.context.mem_context);
if (ret)
return ret;
}
- ret = psp_rap_load(psp);
- if (ret)
+ ret = psp_ta_load(psp, &psp->rap_context.context);
+ if (!ret) {
+ psp->rap_context.context.initialized = true;
+ mutex_init(&psp->rap_context.mutex);
+ } else
return ret;
- ret = psp_rap_invoke(psp, TA_CMD_RAP__INITIALIZE);
- if (ret != TA_RAP_STATUS__SUCCESS) {
- psp_rap_unload(psp);
-
- amdgpu_bo_free_kernel(&psp->rap_context.rap_shared_bo,
- &psp->rap_context.rap_shared_mc_addr,
- &psp->rap_context.rap_shared_buf);
+ ret = psp_rap_invoke(psp, TA_CMD_RAP__INITIALIZE, &status);
+ if (ret || status != TA_RAP_STATUS__SUCCESS) {
+ psp_rap_terminate(psp);
+ /* free rap shared memory */
+ psp_ta_free_shared_buf(&psp->rap_context.context.mem_context);
- psp->rap_context.rap_initialized = false;
+ dev_warn(psp->adev->dev, "RAP TA initialize fail (%d) status %d.\n",
+ ret, status);
- dev_warn(psp->adev->dev, "RAP TA initialize fail.\n");
- return -EINVAL;
+ return ret;
}
return 0;
@@ -1605,28 +2281,23 @@ static int psp_rap_terminate(struct psp_context *psp)
{
int ret;
- if (!psp->rap_context.rap_initialized)
+ if (!psp->rap_context.context.initialized)
return 0;
- ret = psp_rap_unload(psp);
-
- psp->rap_context.rap_initialized = false;
+ ret = psp_ta_unload(psp, &psp->rap_context.context);
- /* free rap shared memory */
- amdgpu_bo_free_kernel(&psp->rap_context.rap_shared_bo,
- &psp->rap_context.rap_shared_mc_addr,
- &psp->rap_context.rap_shared_buf);
+ psp->rap_context.context.initialized = false;
return ret;
}
-int psp_rap_invoke(struct psp_context *psp, uint32_t ta_cmd_id)
+int psp_rap_invoke(struct psp_context *psp, uint32_t ta_cmd_id, enum ta_rap_status *status)
{
struct ta_rap_shared_memory *rap_cmd;
- int ret;
+ int ret = 0;
- if (!psp->rap_context.rap_initialized)
- return -EINVAL;
+ if (!psp->rap_context.context.initialized)
+ return 0;
if (ta_cmd_id != TA_CMD_RAP__INITIALIZE &&
ta_cmd_id != TA_CMD_RAP__VALIDATE_L0)
@@ -1635,96 +2306,31 @@ int psp_rap_invoke(struct psp_context *psp, uint32_t ta_cmd_id)
mutex_lock(&psp->rap_context.mutex);
rap_cmd = (struct ta_rap_shared_memory *)
- psp->rap_context.rap_shared_buf;
+ psp->rap_context.context.mem_context.shared_buf;
memset(rap_cmd, 0, sizeof(struct ta_rap_shared_memory));
rap_cmd->cmd_id = ta_cmd_id;
rap_cmd->validation_method_id = METHOD_A;
- ret = psp_ta_invoke(psp, rap_cmd->cmd_id, psp->rap_context.session_id);
- if (ret) {
- mutex_unlock(&psp->rap_context.mutex);
- return ret;
- }
-
- mutex_unlock(&psp->rap_context.mutex);
-
- return rap_cmd->rap_status;
-}
-// RAP end
-
-/* securedisplay start */
-static int psp_securedisplay_init_shared_buf(struct psp_context *psp)
-{
- int ret;
-
- /*
- * Allocate 16k memory aligned to 4k from Frame Buffer (local
- * physical) for sa ta <-> Driver
- */
- ret = amdgpu_bo_create_kernel(psp->adev, PSP_SECUREDISPLAY_SHARED_MEM_SIZE,
- PAGE_SIZE, AMDGPU_GEM_DOMAIN_VRAM,
- &psp->securedisplay_context.securedisplay_shared_bo,
- &psp->securedisplay_context.securedisplay_shared_mc_addr,
- &psp->securedisplay_context.securedisplay_shared_buf);
-
- return ret;
-}
-
-static int psp_securedisplay_load(struct psp_context *psp)
-{
- int ret;
- struct psp_gfx_cmd_resp *cmd;
-
- cmd = kzalloc(sizeof(struct psp_gfx_cmd_resp), GFP_KERNEL);
- if (!cmd)
- return -ENOMEM;
-
- memset(psp->fw_pri_buf, 0, PSP_1_MEG);
- memcpy(psp->fw_pri_buf, psp->ta_securedisplay_start_addr, psp->ta_securedisplay_ucode_size);
-
- psp_prep_ta_load_cmd_buf(cmd,
- psp->fw_pri_mc_addr,
- psp->ta_securedisplay_ucode_size,
- psp->securedisplay_context.securedisplay_shared_mc_addr,
- PSP_SECUREDISPLAY_SHARED_MEM_SIZE);
-
- ret = psp_cmd_submit_buf(psp, NULL, cmd, psp->fence_buf_mc_addr);
-
+ ret = psp_ta_invoke(psp, rap_cmd->cmd_id, &psp->rap_context.context);
if (ret)
- goto failed;
-
- psp->securedisplay_context.securedisplay_initialized = true;
- psp->securedisplay_context.session_id = cmd->resp.session_id;
- mutex_init(&psp->securedisplay_context.mutex);
-
-failed:
- kfree(cmd);
- return ret;
-}
+ goto out_unlock;
-static int psp_securedisplay_unload(struct psp_context *psp)
-{
- int ret;
- struct psp_gfx_cmd_resp *cmd;
-
- cmd = kzalloc(sizeof(struct psp_gfx_cmd_resp), GFP_KERNEL);
- if (!cmd)
- return -ENOMEM;
-
- psp_prep_ta_unload_cmd_buf(cmd, psp->securedisplay_context.session_id);
+ if (status)
+ *status = rap_cmd->rap_status;
- ret = psp_cmd_submit_buf(psp, NULL, cmd, psp->fence_buf_mc_addr);
-
- kfree(cmd);
+out_unlock:
+ mutex_unlock(&psp->rap_context.mutex);
return ret;
}
+// RAP end
+/* securedisplay start */
static int psp_securedisplay_initialize(struct psp_context *psp)
{
int ret;
- struct securedisplay_cmd *securedisplay_cmd;
+ struct ta_securedisplay_cmd *securedisplay_cmd;
/*
* TODO: bypass the initialize in sriov for now
@@ -1732,35 +2338,51 @@ static int psp_securedisplay_initialize(struct psp_context *psp)
if (amdgpu_sriov_vf(psp->adev))
return 0;
- if (!psp->adev->psp.ta_securedisplay_ucode_size ||
- !psp->adev->psp.ta_securedisplay_start_addr) {
- dev_info(psp->adev->dev, "SECUREDISPLAY: securedisplay ta ucode is not available\n");
+ /* bypass securedisplay initialization if dmu is harvested */
+ if (!amdgpu_device_has_display_hardware(psp->adev))
+ return 0;
+
+ if (!psp->securedisplay_context.context.bin_desc.size_bytes ||
+ !psp->securedisplay_context.context.bin_desc.start_addr) {
+ dev_info(psp->adev->dev,
+ "SECUREDISPLAY: optional securedisplay ta ucode is not available\n");
return 0;
}
- if (!psp->securedisplay_context.securedisplay_initialized) {
- ret = psp_securedisplay_init_shared_buf(psp);
+ psp->securedisplay_context.context.mem_context.shared_mem_size =
+ PSP_SECUREDISPLAY_SHARED_MEM_SIZE;
+ psp->securedisplay_context.context.ta_load_type = GFX_CMD_ID_LOAD_TA;
+
+ if (!psp->securedisplay_context.context.initialized) {
+ ret = psp_ta_init_shared_buf(psp,
+ &psp->securedisplay_context.context.mem_context);
if (ret)
return ret;
}
- ret = psp_securedisplay_load(psp);
- if (ret)
+ ret = psp_ta_load(psp, &psp->securedisplay_context.context);
+ if (!ret && !psp->securedisplay_context.context.resp_status) {
+ psp->securedisplay_context.context.initialized = true;
+ mutex_init(&psp->securedisplay_context.mutex);
+ } else {
+ /* don't try again */
+ psp->securedisplay_context.context.bin_desc.size_bytes = 0;
return ret;
+ }
+
+ mutex_lock(&psp->securedisplay_context.mutex);
psp_prep_securedisplay_cmd_buf(psp, &securedisplay_cmd,
TA_SECUREDISPLAY_COMMAND__QUERY_TA);
ret = psp_securedisplay_invoke(psp, TA_SECUREDISPLAY_COMMAND__QUERY_TA);
- if (ret) {
- psp_securedisplay_unload(psp);
-
- amdgpu_bo_free_kernel(&psp->securedisplay_context.securedisplay_shared_bo,
- &psp->securedisplay_context.securedisplay_shared_mc_addr,
- &psp->securedisplay_context.securedisplay_shared_buf);
- psp->securedisplay_context.securedisplay_initialized = false;
+ mutex_unlock(&psp->securedisplay_context.mutex);
+ if (ret) {
+ psp_securedisplay_terminate(psp);
+ /* free securedisplay shared memory */
+ psp_ta_free_shared_buf(&psp->securedisplay_context.context.mem_context);
dev_err(psp->adev->dev, "SECUREDISPLAY TA initialize fail.\n");
return -EINVAL;
}
@@ -1769,6 +2391,8 @@ static int psp_securedisplay_initialize(struct psp_context *psp)
psp_securedisplay_parse_resp_status(psp, securedisplay_cmd->status);
dev_err(psp->adev->dev, "SECUREDISPLAY: query securedisplay TA failed. ret 0x%x\n",
securedisplay_cmd->securedisplay_out_message.query_ta.query_cmd_ret);
+ /* don't try again */
+ psp->securedisplay_context.context.bin_desc.size_bytes = 0;
}
return 0;
@@ -1784,19 +2408,12 @@ static int psp_securedisplay_terminate(struct psp_context *psp)
if (amdgpu_sriov_vf(psp->adev))
return 0;
- if (!psp->securedisplay_context.securedisplay_initialized)
+ if (!psp->securedisplay_context.context.initialized)
return 0;
- ret = psp_securedisplay_unload(psp);
- if (ret)
- return ret;
-
- psp->securedisplay_context.securedisplay_initialized = false;
+ ret = psp_ta_unload(psp, &psp->securedisplay_context.context);
- /* free securedisplay shared memory */
- amdgpu_bo_free_kernel(&psp->securedisplay_context.securedisplay_shared_bo,
- &psp->securedisplay_context.securedisplay_shared_mc_addr,
- &psp->securedisplay_context.securedisplay_shared_buf);
+ psp->securedisplay_context.context.initialized = false;
return ret;
}
@@ -1805,77 +2422,193 @@ int psp_securedisplay_invoke(struct psp_context *psp, uint32_t ta_cmd_id)
{
int ret;
- if (!psp->securedisplay_context.securedisplay_initialized)
+ if (!psp->securedisplay_context.context.initialized)
return -EINVAL;
if (ta_cmd_id != TA_SECUREDISPLAY_COMMAND__QUERY_TA &&
- ta_cmd_id != TA_SECUREDISPLAY_COMMAND__SEND_ROI_CRC)
+ ta_cmd_id != TA_SECUREDISPLAY_COMMAND__SEND_ROI_CRC &&
+ ta_cmd_id != TA_SECUREDISPLAY_COMMAND__SEND_ROI_CRC_V2)
return -EINVAL;
- mutex_lock(&psp->securedisplay_context.mutex);
+ ret = psp_ta_invoke(psp, ta_cmd_id, &psp->securedisplay_context.context);
- ret = psp_ta_invoke(psp, ta_cmd_id, psp->securedisplay_context.session_id);
+ return ret;
+}
+/* SECUREDISPLAY end */
- mutex_unlock(&psp->securedisplay_context.mutex);
+int amdgpu_psp_wait_for_bootloader(struct amdgpu_device *adev)
+{
+ struct psp_context *psp = &adev->psp;
+ int ret = 0;
+
+ if (!amdgpu_sriov_vf(adev) && psp->funcs && psp->funcs->wait_for_bootloader != NULL)
+ ret = psp->funcs->wait_for_bootloader(psp);
return ret;
}
-/* SECUREDISPLAY end */
+
+bool amdgpu_psp_get_ras_capability(struct psp_context *psp)
+{
+ if (psp->funcs &&
+ psp->funcs->get_ras_capability) {
+ return psp->funcs->get_ras_capability(psp);
+ } else {
+ return false;
+ }
+}
+
+bool amdgpu_psp_tos_reload_needed(struct amdgpu_device *adev)
+{
+ struct psp_context *psp = &adev->psp;
+
+ if (amdgpu_sriov_vf(adev) || (adev->flags & AMD_IS_APU))
+ return false;
+
+ if (psp->funcs && psp->funcs->is_reload_needed)
+ return psp->funcs->is_reload_needed(psp);
+
+ return false;
+}
+
+static void psp_update_gpu_addresses(struct amdgpu_device *adev)
+{
+ struct psp_context *psp = &adev->psp;
+
+ if (psp->cmd_buf_bo && psp->cmd_buf_mem) {
+ psp->fw_pri_mc_addr = amdgpu_bo_fb_aper_addr(psp->fw_pri_bo);
+ psp->fence_buf_mc_addr = amdgpu_bo_fb_aper_addr(psp->fence_buf_bo);
+ psp->cmd_buf_mc_addr = amdgpu_bo_fb_aper_addr(psp->cmd_buf_bo);
+ }
+ if (adev->firmware.rbuf && psp->km_ring.ring_mem)
+ psp->km_ring.ring_mem_mc_addr = amdgpu_bo_fb_aper_addr(adev->firmware.rbuf);
+}
static int psp_hw_start(struct psp_context *psp)
{
struct amdgpu_device *adev = psp->adev;
int ret;
+ if (amdgpu_virt_xgmi_migrate_enabled(adev))
+ psp_update_gpu_addresses(adev);
+
if (!amdgpu_sriov_vf(adev)) {
- if (psp->kdb_bin_size &&
+ if ((is_psp_fw_valid(psp->kdb)) &&
(psp->funcs->bootloader_load_kdb != NULL)) {
ret = psp_bootloader_load_kdb(psp);
if (ret) {
- DRM_ERROR("PSP load kdb failed!\n");
+ dev_err(adev->dev, "PSP load kdb failed!\n");
return ret;
}
}
- if (psp->spl_bin_size) {
+ if ((is_psp_fw_valid(psp->spl)) &&
+ (psp->funcs->bootloader_load_spl != NULL)) {
ret = psp_bootloader_load_spl(psp);
if (ret) {
- DRM_ERROR("PSP load spl failed!\n");
+ dev_err(adev->dev, "PSP load spl failed!\n");
return ret;
}
}
- ret = psp_bootloader_load_sysdrv(psp);
- if (ret) {
- DRM_ERROR("PSP load sysdrv failed!\n");
- return ret;
+ if ((is_psp_fw_valid(psp->sys)) &&
+ (psp->funcs->bootloader_load_sysdrv != NULL)) {
+ ret = psp_bootloader_load_sysdrv(psp);
+ if (ret) {
+ dev_err(adev->dev, "PSP load sys drv failed!\n");
+ return ret;
+ }
}
- ret = psp_bootloader_load_sos(psp);
- if (ret) {
- DRM_ERROR("PSP load sos failed!\n");
- return ret;
+ if ((is_psp_fw_valid(psp->soc_drv)) &&
+ (psp->funcs->bootloader_load_soc_drv != NULL)) {
+ ret = psp_bootloader_load_soc_drv(psp);
+ if (ret) {
+ dev_err(adev->dev, "PSP load soc drv failed!\n");
+ return ret;
+ }
+ }
+
+ if ((is_psp_fw_valid(psp->intf_drv)) &&
+ (psp->funcs->bootloader_load_intf_drv != NULL)) {
+ ret = psp_bootloader_load_intf_drv(psp);
+ if (ret) {
+ dev_err(adev->dev, "PSP load intf drv failed!\n");
+ return ret;
+ }
+ }
+
+ if ((is_psp_fw_valid(psp->dbg_drv)) &&
+ (psp->funcs->bootloader_load_dbg_drv != NULL)) {
+ ret = psp_bootloader_load_dbg_drv(psp);
+ if (ret) {
+ dev_err(adev->dev, "PSP load dbg drv failed!\n");
+ return ret;
+ }
+ }
+
+ if ((is_psp_fw_valid(psp->ras_drv)) &&
+ (psp->funcs->bootloader_load_ras_drv != NULL)) {
+ ret = psp_bootloader_load_ras_drv(psp);
+ if (ret) {
+ dev_err(adev->dev, "PSP load ras_drv failed!\n");
+ return ret;
+ }
+ }
+
+ if ((is_psp_fw_valid(psp->ipkeymgr_drv)) &&
+ (psp->funcs->bootloader_load_ipkeymgr_drv != NULL)) {
+ ret = psp_bootloader_load_ipkeymgr_drv(psp);
+ if (ret) {
+ dev_err(adev->dev, "PSP load ipkeymgr_drv failed!\n");
+ return ret;
+ }
+ }
+
+ if ((is_psp_fw_valid(psp->spdm_drv)) &&
+ (psp->funcs->bootloader_load_spdm_drv != NULL)) {
+ ret = psp_bootloader_load_spdm_drv(psp);
+ if (ret) {
+ dev_err(adev->dev, "PSP load spdm_drv failed!\n");
+ return ret;
+ }
+ }
+
+ if ((is_psp_fw_valid(psp->sos)) &&
+ (psp->funcs->bootloader_load_sos != NULL)) {
+ ret = psp_bootloader_load_sos(psp);
+ if (ret) {
+ dev_err(adev->dev, "PSP load sos failed!\n");
+ return ret;
+ }
}
}
ret = psp_ring_create(psp, PSP_RING_TYPE__KM);
if (ret) {
- DRM_ERROR("PSP create ring failed!\n");
+ dev_err(adev->dev, "PSP create ring failed!\n");
return ret;
}
- ret = psp_clear_vf_fw(psp);
- if (ret) {
- DRM_ERROR("PSP clear vf fw!\n");
- return ret;
+ if (!amdgpu_in_reset(adev) && !adev->in_suspend) {
+ ret = psp_update_fw_reservation(psp);
+ if (ret) {
+ dev_err(adev->dev, "update fw reservation failed!\n");
+ return ret;
+ }
}
- ret = psp_tmr_init(psp);
- if (ret) {
- DRM_ERROR("PSP tmr init failed!\n");
- return ret;
+ if (amdgpu_sriov_vf(adev) && amdgpu_in_reset(adev))
+ goto skip_pin_bo;
+
+ if (!psp->boot_time_tmr || psp->autoload_supported) {
+ ret = psp_tmr_init(psp);
+ if (ret) {
+ dev_err(adev->dev, "PSP tmr init failed!\n");
+ return ret;
+ }
}
+skip_pin_bo:
/*
* For ASICs with DF Cstate management centralized
* to PMFW, TMR setup should be performed after PMFW
@@ -1887,10 +2620,12 @@ static int psp_hw_start(struct psp_context *psp)
return ret;
}
- ret = psp_tmr_load(psp);
- if (ret) {
- DRM_ERROR("PSP load tmr failed!\n");
- return ret;
+ if (!psp->boot_time_tmr || !psp->autoload_supported) {
+ ret = psp_tmr_load(psp);
+ if (ret) {
+ dev_err(adev->dev, "PSP load tmr failed!\n");
+ return ret;
+ }
}
return 0;
@@ -1900,6 +2635,9 @@ static int psp_get_fw_type(struct amdgpu_firmware_info *ucode,
enum psp_gfx_fw_type *type)
{
switch (ucode->ucode_id) {
+ case AMDGPU_UCODE_ID_CAP:
+ *type = GFX_FW_TYPE_CAP;
+ break;
case AMDGPU_UCODE_ID_SDMA0:
*type = GFX_FW_TYPE_SDMA0;
break;
@@ -1930,6 +2668,12 @@ static int psp_get_fw_type(struct amdgpu_firmware_info *ucode,
case AMDGPU_UCODE_ID_CP_MES_DATA:
*type = GFX_FW_TYPE_MES_STACK;
break;
+ case AMDGPU_UCODE_ID_CP_MES1:
+ *type = GFX_FW_TYPE_CP_MES_KIQ;
+ break;
+ case AMDGPU_UCODE_ID_CP_MES1_DATA:
+ *type = GFX_FW_TYPE_MES_KIQ_STACK;
+ break;
case AMDGPU_UCODE_ID_CP_CE:
*type = GFX_FW_TYPE_CP_CE;
break;
@@ -1951,6 +2695,12 @@ static int psp_get_fw_type(struct amdgpu_firmware_info *ucode,
case AMDGPU_UCODE_ID_CP_MEC2_JT:
*type = GFX_FW_TYPE_CP_MEC_ME2;
break;
+ case AMDGPU_UCODE_ID_RLC_P:
+ *type = GFX_FW_TYPE_RLC_P;
+ break;
+ case AMDGPU_UCODE_ID_RLC_V:
+ *type = GFX_FW_TYPE_RLC_V;
+ break;
case AMDGPU_UCODE_ID_RLC_G:
*type = GFX_FW_TYPE_RLC_G;
break;
@@ -1969,9 +2719,27 @@ static int psp_get_fw_type(struct amdgpu_firmware_info *ucode,
case AMDGPU_UCODE_ID_RLC_DRAM:
*type = GFX_FW_TYPE_RLC_DRAM_BOOT;
break;
+ case AMDGPU_UCODE_ID_GLOBAL_TAP_DELAYS:
+ *type = GFX_FW_TYPE_GLOBAL_TAP_DELAYS;
+ break;
+ case AMDGPU_UCODE_ID_SE0_TAP_DELAYS:
+ *type = GFX_FW_TYPE_SE0_TAP_DELAYS;
+ break;
+ case AMDGPU_UCODE_ID_SE1_TAP_DELAYS:
+ *type = GFX_FW_TYPE_SE1_TAP_DELAYS;
+ break;
+ case AMDGPU_UCODE_ID_SE2_TAP_DELAYS:
+ *type = GFX_FW_TYPE_SE2_TAP_DELAYS;
+ break;
+ case AMDGPU_UCODE_ID_SE3_TAP_DELAYS:
+ *type = GFX_FW_TYPE_SE3_TAP_DELAYS;
+ break;
case AMDGPU_UCODE_ID_SMC:
*type = GFX_FW_TYPE_SMU;
break;
+ case AMDGPU_UCODE_ID_PPTABLE:
+ *type = GFX_FW_TYPE_PPTABLE;
+ break;
case AMDGPU_UCODE_ID_UVD:
*type = GFX_FW_TYPE_UVD;
break;
@@ -2002,6 +2770,79 @@ static int psp_get_fw_type(struct amdgpu_firmware_info *ucode,
case AMDGPU_UCODE_ID_DMCUB:
*type = GFX_FW_TYPE_DMUB;
break;
+ case AMDGPU_UCODE_ID_SDMA_UCODE_TH0:
+ case AMDGPU_UCODE_ID_SDMA_RS64:
+ *type = GFX_FW_TYPE_SDMA_UCODE_TH0;
+ break;
+ case AMDGPU_UCODE_ID_SDMA_UCODE_TH1:
+ *type = GFX_FW_TYPE_SDMA_UCODE_TH1;
+ break;
+ case AMDGPU_UCODE_ID_IMU_I:
+ *type = GFX_FW_TYPE_IMU_I;
+ break;
+ case AMDGPU_UCODE_ID_IMU_D:
+ *type = GFX_FW_TYPE_IMU_D;
+ break;
+ case AMDGPU_UCODE_ID_CP_RS64_PFP:
+ *type = GFX_FW_TYPE_RS64_PFP;
+ break;
+ case AMDGPU_UCODE_ID_CP_RS64_ME:
+ *type = GFX_FW_TYPE_RS64_ME;
+ break;
+ case AMDGPU_UCODE_ID_CP_RS64_MEC:
+ *type = GFX_FW_TYPE_RS64_MEC;
+ break;
+ case AMDGPU_UCODE_ID_CP_RS64_PFP_P0_STACK:
+ *type = GFX_FW_TYPE_RS64_PFP_P0_STACK;
+ break;
+ case AMDGPU_UCODE_ID_CP_RS64_PFP_P1_STACK:
+ *type = GFX_FW_TYPE_RS64_PFP_P1_STACK;
+ break;
+ case AMDGPU_UCODE_ID_CP_RS64_ME_P0_STACK:
+ *type = GFX_FW_TYPE_RS64_ME_P0_STACK;
+ break;
+ case AMDGPU_UCODE_ID_CP_RS64_ME_P1_STACK:
+ *type = GFX_FW_TYPE_RS64_ME_P1_STACK;
+ break;
+ case AMDGPU_UCODE_ID_CP_RS64_MEC_P0_STACK:
+ *type = GFX_FW_TYPE_RS64_MEC_P0_STACK;
+ break;
+ case AMDGPU_UCODE_ID_CP_RS64_MEC_P1_STACK:
+ *type = GFX_FW_TYPE_RS64_MEC_P1_STACK;
+ break;
+ case AMDGPU_UCODE_ID_CP_RS64_MEC_P2_STACK:
+ *type = GFX_FW_TYPE_RS64_MEC_P2_STACK;
+ break;
+ case AMDGPU_UCODE_ID_CP_RS64_MEC_P3_STACK:
+ *type = GFX_FW_TYPE_RS64_MEC_P3_STACK;
+ break;
+ case AMDGPU_UCODE_ID_VPE_CTX:
+ *type = GFX_FW_TYPE_VPEC_FW1;
+ break;
+ case AMDGPU_UCODE_ID_VPE_CTL:
+ *type = GFX_FW_TYPE_VPEC_FW2;
+ break;
+ case AMDGPU_UCODE_ID_VPE:
+ *type = GFX_FW_TYPE_VPE;
+ break;
+ case AMDGPU_UCODE_ID_UMSCH_MM_UCODE:
+ *type = GFX_FW_TYPE_UMSCH_UCODE;
+ break;
+ case AMDGPU_UCODE_ID_UMSCH_MM_DATA:
+ *type = GFX_FW_TYPE_UMSCH_DATA;
+ break;
+ case AMDGPU_UCODE_ID_UMSCH_MM_CMD_BUFFER:
+ *type = GFX_FW_TYPE_UMSCH_CMD_BUFFER;
+ break;
+ case AMDGPU_UCODE_ID_P2S_TABLE:
+ *type = GFX_FW_TYPE_P2S_TABLE;
+ break;
+ case AMDGPU_UCODE_ID_JPEG_RAM:
+ *type = GFX_FW_TYPE_JPEG_RAM;
+ break;
+ case AMDGPU_UCODE_ID_ISP:
+ *type = GFX_FW_TYPE_ISP;
+ break;
case AMDGPU_UCODE_ID_MAXIMUM:
default:
return -EINVAL;
@@ -2058,14 +2899,13 @@ static void psp_print_fw_hdr(struct psp_context *psp,
}
}
-static int psp_prep_load_ip_fw_cmd_buf(struct amdgpu_firmware_info *ucode,
+static int psp_prep_load_ip_fw_cmd_buf(struct psp_context *psp,
+ struct amdgpu_firmware_info *ucode,
struct psp_gfx_cmd_resp *cmd)
{
int ret;
uint64_t fw_mem_mc_addr = ucode->mc_addr;
- memset(cmd, 0, sizeof(struct psp_gfx_cmd_resp));
-
cmd->cmd_id = GFX_CMD_ID_LOAD_IP_FW;
cmd->cmd.cmd_load_ip_fw.fw_phy_addr_lo = lower_32_bits(fw_mem_mc_addr);
cmd->cmd.cmd_load_ip_fw.fw_phy_addr_hi = upper_32_bits(fw_mem_mc_addr);
@@ -2073,22 +2913,51 @@ static int psp_prep_load_ip_fw_cmd_buf(struct amdgpu_firmware_info *ucode,
ret = psp_get_fw_type(ucode, &cmd->cmd.cmd_load_ip_fw.fw_type);
if (ret)
- DRM_ERROR("Unknown firmware type\n");
+ dev_err(psp->adev->dev, "Unknown firmware type\n");
return ret;
}
-static int psp_execute_np_fw_load(struct psp_context *psp,
- struct amdgpu_firmware_info *ucode)
+int psp_execute_ip_fw_load(struct psp_context *psp,
+ struct amdgpu_firmware_info *ucode)
{
int ret = 0;
+ struct psp_gfx_cmd_resp *cmd = acquire_psp_cmd_buf(psp);
- ret = psp_prep_load_ip_fw_cmd_buf(ucode, psp->cmd);
- if (ret)
- return ret;
+ ret = psp_prep_load_ip_fw_cmd_buf(psp, ucode, cmd);
+ if (!ret) {
+ ret = psp_cmd_submit_buf(psp, ucode, cmd,
+ psp->fence_buf_mc_addr);
+ }
- ret = psp_cmd_submit_buf(psp, ucode, psp->cmd,
- psp->fence_buf_mc_addr);
+ release_psp_cmd_buf(psp);
+
+ return ret;
+}
+
+static int psp_load_p2s_table(struct psp_context *psp)
+{
+ int ret;
+ struct amdgpu_device *adev = psp->adev;
+ struct amdgpu_firmware_info *ucode =
+ &adev->firmware.ucode[AMDGPU_UCODE_ID_P2S_TABLE];
+
+ if (adev->in_runpm && ((adev->pm.rpm_mode == AMDGPU_RUNPM_BACO) ||
+ (adev->pm.rpm_mode == AMDGPU_RUNPM_BAMACO)))
+ return 0;
+
+ if (amdgpu_ip_version(adev, MP0_HWIP, 0) == IP_VERSION(13, 0, 6) ||
+ amdgpu_ip_version(adev, MP0_HWIP, 0) == IP_VERSION(13, 0, 14)) {
+ uint32_t supp_vers = adev->flags & AMD_IS_APU ? 0x0036013D :
+ 0x0036003C;
+ if (psp->sos.fw_version < supp_vers)
+ return 0;
+ }
+
+ if (!ucode->fw || amdgpu_sriov_vf(psp->adev))
+ return 0;
+
+ ret = psp_execute_ip_fw_load(psp, ucode);
return ret;
}
@@ -2099,24 +2968,31 @@ static int psp_load_smu_fw(struct psp_context *psp)
struct amdgpu_device *adev = psp->adev;
struct amdgpu_firmware_info *ucode =
&adev->firmware.ucode[AMDGPU_UCODE_ID_SMC];
- struct amdgpu_ras *ras = psp->ras.ras;
+ struct amdgpu_ras *ras = psp->ras_context.ras;
- if (!ucode->fw || amdgpu_sriov_vf(psp->adev))
+ /*
+ * Skip SMU FW reloading in case of using BACO for runpm only,
+ * as SMU is always alive.
+ */
+ if (adev->in_runpm && ((adev->pm.rpm_mode == AMDGPU_RUNPM_BACO) ||
+ (adev->pm.rpm_mode == AMDGPU_RUNPM_BAMACO)))
return 0;
+ if (!ucode->fw || amdgpu_sriov_vf(psp->adev))
+ return 0;
- if (amdgpu_in_reset(adev) && ras && ras->supported &&
- adev->asic_type == CHIP_ARCTURUS) {
+ if ((amdgpu_in_reset(adev) && ras && adev->ras_enabled &&
+ (amdgpu_ip_version(adev, MP0_HWIP, 0) == IP_VERSION(11, 0, 4) ||
+ amdgpu_ip_version(adev, MP0_HWIP, 0) == IP_VERSION(11, 0, 2)))) {
ret = amdgpu_dpm_set_mp1_state(adev, PP_MP1_STATE_UNLOAD);
- if (ret) {
- DRM_WARN("Failed to set MP1 state prepare for reload\n");
- }
+ if (ret)
+ dev_err(adev->dev, "Failed to set MP1 state prepare for reload\n");
}
- ret = psp_execute_np_fw_load(psp, ucode);
+ ret = psp_execute_ip_fw_load(psp, ucode);
if (ret)
- DRM_ERROR("PSP load smu failed!\n");
+ dev_err(adev->dev, "PSP load smu failed!\n");
return ret;
}
@@ -2124,7 +3000,10 @@ static int psp_load_smu_fw(struct psp_context *psp)
static bool fw_load_skip_check(struct psp_context *psp,
struct amdgpu_firmware_info *ucode)
{
- if (!ucode->fw)
+ if (!ucode->fw || !ucode->ucode_size)
+ return true;
+
+ if (ucode->ucode_id == AMDGPU_UCODE_ID_P2S_TABLE)
return true;
if (ucode->ucode_id == AMDGPU_UCODE_ID_SMC &&
@@ -2134,20 +3013,7 @@ static bool fw_load_skip_check(struct psp_context *psp,
return true;
if (amdgpu_sriov_vf(psp->adev) &&
- (ucode->ucode_id == AMDGPU_UCODE_ID_SDMA0
- || ucode->ucode_id == AMDGPU_UCODE_ID_SDMA1
- || ucode->ucode_id == AMDGPU_UCODE_ID_SDMA2
- || ucode->ucode_id == AMDGPU_UCODE_ID_SDMA3
- || ucode->ucode_id == AMDGPU_UCODE_ID_SDMA4
- || ucode->ucode_id == AMDGPU_UCODE_ID_SDMA5
- || ucode->ucode_id == AMDGPU_UCODE_ID_SDMA6
- || ucode->ucode_id == AMDGPU_UCODE_ID_SDMA7
- || ucode->ucode_id == AMDGPU_UCODE_ID_RLC_G
- || ucode->ucode_id == AMDGPU_UCODE_ID_RLC_RESTORE_LIST_CNTL
- || ucode->ucode_id == AMDGPU_UCODE_ID_RLC_RESTORE_LIST_GPM_MEM
- || ucode->ucode_id == AMDGPU_UCODE_ID_RLC_RESTORE_LIST_SRM_MEM
- || ucode->ucode_id == AMDGPU_UCODE_ID_SMC))
- /*skip ucode loading in SRIOV VF */
+ amdgpu_virt_fw_load_skip_check(psp->adev, ucode->ucode_id))
return true;
if (psp->autoload_supported &&
@@ -2159,7 +3025,23 @@ static bool fw_load_skip_check(struct psp_context *psp,
return false;
}
-static int psp_np_fw_load(struct psp_context *psp)
+int psp_load_fw_list(struct psp_context *psp,
+ struct amdgpu_firmware_info **ucode_list, int ucode_count)
+{
+ int ret = 0, i;
+ struct amdgpu_firmware_info *ucode;
+
+ for (i = 0; i < ucode_count; ++i) {
+ ucode = ucode_list[i];
+ psp_print_fw_hdr(psp, ucode);
+ ret = psp_execute_ip_fw_load(psp, ucode);
+ if (ret)
+ return ret;
+ }
+ return ret;
+}
+
+static int psp_load_non_psp_fw(struct psp_context *psp)
{
int i, ret;
struct amdgpu_firmware_info *ucode;
@@ -2172,6 +3054,9 @@ static int psp_np_fw_load(struct psp_context *psp)
return ret;
}
+ /* Load P2S table first if it's available */
+ psp_load_p2s_table(psp);
+
for (i = 0; i < adev->firmware.max_ucodes; i++) {
ucode = &adev->firmware.ucode[i];
@@ -2187,27 +3072,32 @@ static int psp_np_fw_load(struct psp_context *psp)
continue;
if (psp->autoload_supported &&
- (adev->asic_type >= CHIP_SIENNA_CICHLID &&
- adev->asic_type <= CHIP_DIMGREY_CAVEFISH) &&
+ (amdgpu_ip_version(adev, MP0_HWIP, 0) ==
+ IP_VERSION(11, 0, 7) ||
+ amdgpu_ip_version(adev, MP0_HWIP, 0) ==
+ IP_VERSION(11, 0, 11) ||
+ amdgpu_ip_version(adev, MP0_HWIP, 0) ==
+ IP_VERSION(11, 0, 12)) &&
(ucode->ucode_id == AMDGPU_UCODE_ID_SDMA1 ||
ucode->ucode_id == AMDGPU_UCODE_ID_SDMA2 ||
ucode->ucode_id == AMDGPU_UCODE_ID_SDMA3))
/* PSP only receive one SDMA fw for sienna_cichlid,
- * as all four sdma fw are same */
+ * as all four sdma fw are same
+ */
continue;
psp_print_fw_hdr(psp, ucode);
- ret = psp_execute_np_fw_load(psp, ucode);
+ ret = psp_execute_ip_fw_load(psp, ucode);
if (ret)
return ret;
- /* Start rlc autoload after psp recieved all the gfx firmware */
+ /* Start rlc autoload after psp received all the gfx firmware */
if (psp->autoload_supported && ucode->ucode_id == (amdgpu_sriov_vf(adev) ?
- AMDGPU_UCODE_ID_CP_MEC2 : AMDGPU_UCODE_ID_RLC_G)) {
+ adev->virt.autoload_ucode_id : AMDGPU_UCODE_ID_RLC_G)) {
ret = psp_rlc_autoload_start(psp);
if (ret) {
- DRM_ERROR("Failed to start rlc autoload\n");
+ dev_err(adev->dev, "Failed to start rlc autoload\n");
return ret;
}
}
@@ -2222,65 +3112,55 @@ static int psp_load_fw(struct amdgpu_device *adev)
struct psp_context *psp = &adev->psp;
if (amdgpu_sriov_vf(adev) && amdgpu_in_reset(adev)) {
- psp_ring_stop(psp, PSP_RING_TYPE__KM); /* should not destroy ring, only stop */
- goto skip_memalloc;
- }
-
- psp->cmd = kzalloc(sizeof(struct psp_gfx_cmd_resp), GFP_KERNEL);
- if (!psp->cmd)
- return -ENOMEM;
+ /* should not destroy ring, only stop */
+ psp_ring_stop(psp, PSP_RING_TYPE__KM);
+ } else {
+ memset(psp->fence_buf, 0, PSP_FENCE_BUFFER_SIZE);
- ret = amdgpu_bo_create_kernel(adev, PSP_1_MEG, PSP_1_MEG,
- AMDGPU_GEM_DOMAIN_GTT,
- &psp->fw_pri_bo,
- &psp->fw_pri_mc_addr,
- &psp->fw_pri_buf);
- if (ret)
- goto failed;
+ ret = psp_ring_init(psp, PSP_RING_TYPE__KM);
+ if (ret) {
+ dev_err(adev->dev, "PSP ring init failed!\n");
+ goto failed;
+ }
+ }
- ret = amdgpu_bo_create_kernel(adev, PSP_FENCE_BUFFER_SIZE, PAGE_SIZE,
- AMDGPU_GEM_DOMAIN_VRAM,
- &psp->fence_buf_bo,
- &psp->fence_buf_mc_addr,
- &psp->fence_buf);
+ ret = psp_hw_start(psp);
if (ret)
goto failed;
- ret = amdgpu_bo_create_kernel(adev, PSP_CMD_BUFFER_SIZE, PAGE_SIZE,
- AMDGPU_GEM_DOMAIN_VRAM,
- &psp->cmd_buf_bo, &psp->cmd_buf_mc_addr,
- (void **)&psp->cmd_buf_mem);
+ ret = psp_load_non_psp_fw(psp);
if (ret)
- goto failed;
-
- memset(psp->fence_buf, 0, PSP_FENCE_BUFFER_SIZE);
+ goto failed1;
- ret = psp_ring_init(psp, PSP_RING_TYPE__KM);
+ ret = psp_asd_initialize(psp);
if (ret) {
- DRM_ERROR("PSP ring init failed!\n");
- goto failed;
+ dev_err(adev->dev, "PSP load asd failed!\n");
+ goto failed1;
}
-skip_memalloc:
- ret = psp_hw_start(psp);
- if (ret)
- goto failed;
-
- ret = psp_np_fw_load(psp);
- if (ret)
- goto failed;
-
- ret = psp_asd_load(psp);
+ ret = psp_rl_load(adev);
if (ret) {
- DRM_ERROR("PSP load asd failed!\n");
- return ret;
+ dev_err(adev->dev, "PSP load RL failed!\n");
+ goto failed1;
}
- if (psp->adev->psp.ta_fw) {
+ if (amdgpu_sriov_vf(adev) && amdgpu_in_reset(adev)) {
+ if (adev->gmc.xgmi.num_physical_nodes > 1) {
+ ret = psp_xgmi_initialize(psp, false, true);
+ /* Warning the XGMI seesion initialize failure
+ * Instead of stop driver initialization
+ */
+ if (ret)
+ dev_err(psp->adev->dev,
+ "XGMI: Failed to initialize XGMI session\n");
+ }
+ }
+
+ if (psp->ta_fw) {
ret = psp_ras_initialize(psp);
if (ret)
dev_err(psp->adev->dev,
- "RAS: Failed to initialize RAS\n");
+ "RAS: Failed to initialize RAS\n");
ret = psp_hdcp_initialize(psp);
if (ret)
@@ -2305,32 +3185,32 @@ skip_memalloc:
return 0;
+failed1:
+ psp_free_shared_bufs(psp);
failed:
/*
* all cleanup jobs (xgmi terminate, ras terminate,
* ring destroy, cmd/fence/fw buffers destory,
* psp->cmd destory) are delayed to psp_hw_fini
*/
+ psp_ring_destroy(psp, PSP_RING_TYPE__KM);
return ret;
}
-static int psp_hw_init(void *handle)
+static int psp_hw_init(struct amdgpu_ip_block *ip_block)
{
int ret;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
mutex_lock(&adev->firmware.mutex);
- /*
- * This sequence is just used on hw_init only once, no need on
- * resume.
- */
+
ret = amdgpu_ucode_init_bo(adev);
if (ret)
goto failed;
ret = psp_load_fw(adev);
if (ret) {
- DRM_ERROR("PSP firmware loading failed\n");
+ dev_err(adev->dev, "PSP firmware loading failed\n");
goto failed;
}
@@ -2343,139 +3223,137 @@ failed:
return -EINVAL;
}
-static int psp_hw_fini(void *handle)
+static int psp_hw_fini(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
struct psp_context *psp = &adev->psp;
- int ret;
- if (psp->adev->psp.ta_fw) {
+ if (psp->ta_fw) {
psp_ras_terminate(psp);
psp_securedisplay_terminate(psp);
psp_rap_terminate(psp);
psp_dtm_terminate(psp);
psp_hdcp_terminate(psp);
- }
- psp_asd_unload(psp);
- ret = psp_clear_vf_fw(psp);
- if (ret) {
- DRM_ERROR("PSP clear vf fw!\n");
- return ret;
+ if (adev->gmc.xgmi.num_physical_nodes > 1)
+ psp_xgmi_terminate(psp);
}
+ psp_asd_terminate(psp);
psp_tmr_terminate(psp);
- psp_ring_destroy(psp, PSP_RING_TYPE__KM);
- amdgpu_bo_free_kernel(&psp->fw_pri_bo,
- &psp->fw_pri_mc_addr, &psp->fw_pri_buf);
- amdgpu_bo_free_kernel(&psp->fence_buf_bo,
- &psp->fence_buf_mc_addr, &psp->fence_buf);
- amdgpu_bo_free_kernel(&psp->cmd_buf_bo, &psp->cmd_buf_mc_addr,
- (void **)&psp->cmd_buf_mem);
-
- kfree(psp->cmd);
- psp->cmd = NULL;
+ psp_ring_destroy(psp, PSP_RING_TYPE__KM);
return 0;
}
-static int psp_suspend(void *handle)
+static int psp_suspend(struct amdgpu_ip_block *ip_block)
{
- int ret;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ int ret = 0;
+ struct amdgpu_device *adev = ip_block->adev;
struct psp_context *psp = &adev->psp;
if (adev->gmc.xgmi.num_physical_nodes > 1 &&
- psp->xgmi_context.initialized == 1) {
+ psp->xgmi_context.context.initialized) {
ret = psp_xgmi_terminate(psp);
if (ret) {
- DRM_ERROR("Failed to terminate xgmi ta\n");
- return ret;
+ dev_err(adev->dev, "Failed to terminate xgmi ta\n");
+ goto out;
}
}
- if (psp->adev->psp.ta_fw) {
+ if (psp->ta_fw) {
ret = psp_ras_terminate(psp);
if (ret) {
- DRM_ERROR("Failed to terminate ras ta\n");
- return ret;
+ dev_err(adev->dev, "Failed to terminate ras ta\n");
+ goto out;
}
ret = psp_hdcp_terminate(psp);
if (ret) {
- DRM_ERROR("Failed to terminate hdcp ta\n");
- return ret;
+ dev_err(adev->dev, "Failed to terminate hdcp ta\n");
+ goto out;
}
ret = psp_dtm_terminate(psp);
if (ret) {
- DRM_ERROR("Failed to terminate dtm ta\n");
- return ret;
+ dev_err(adev->dev, "Failed to terminate dtm ta\n");
+ goto out;
}
ret = psp_rap_terminate(psp);
if (ret) {
- DRM_ERROR("Failed to terminate rap ta\n");
- return ret;
+ dev_err(adev->dev, "Failed to terminate rap ta\n");
+ goto out;
}
ret = psp_securedisplay_terminate(psp);
if (ret) {
- DRM_ERROR("Failed to terminate securedisplay ta\n");
- return ret;
+ dev_err(adev->dev, "Failed to terminate securedisplay ta\n");
+ goto out;
}
}
- ret = psp_asd_unload(psp);
+ ret = psp_asd_terminate(psp);
if (ret) {
- DRM_ERROR("Failed to unload asd\n");
- return ret;
+ dev_err(adev->dev, "Failed to terminate asd\n");
+ goto out;
}
ret = psp_tmr_terminate(psp);
if (ret) {
- DRM_ERROR("Failed to terminate tmr\n");
- return ret;
+ dev_err(adev->dev, "Failed to terminate tmr\n");
+ goto out;
}
ret = psp_ring_stop(psp, PSP_RING_TYPE__KM);
- if (ret) {
- DRM_ERROR("PSP ring stop failed\n");
- return ret;
- }
+ if (ret)
+ dev_err(adev->dev, "PSP ring stop failed\n");
- return 0;
+out:
+ return ret;
}
-static int psp_resume(void *handle)
+static int psp_resume(struct amdgpu_ip_block *ip_block)
{
int ret;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
struct psp_context *psp = &adev->psp;
- DRM_INFO("PSP is resuming...\n");
+ dev_info(adev->dev, "PSP is resuming...\n");
- ret = psp_mem_training(psp, PSP_MEM_TRAIN_RESUME);
- if (ret) {
- DRM_ERROR("Failed to process memory training!\n");
- return ret;
+ if (psp->mem_train_ctx.enable_mem_training) {
+ ret = psp_mem_training(psp, PSP_MEM_TRAIN_RESUME);
+ if (ret) {
+ dev_err(adev->dev, "Failed to process memory training!\n");
+ return ret;
+ }
}
mutex_lock(&adev->firmware.mutex);
+ ret = amdgpu_ucode_init_bo(adev);
+ if (ret)
+ goto failed;
+
ret = psp_hw_start(psp);
if (ret)
goto failed;
- ret = psp_np_fw_load(psp);
+ ret = psp_load_non_psp_fw(psp);
if (ret)
goto failed;
- ret = psp_asd_load(psp);
+ ret = psp_asd_initialize(psp);
+ if (ret) {
+ dev_err(adev->dev, "PSP load asd failed!\n");
+ goto failed;
+ }
+
+ ret = psp_rl_load(adev);
if (ret) {
- DRM_ERROR("PSP load asd failed!\n");
+ dev_err(adev->dev, "PSP load RL failed!\n");
goto failed;
}
if (adev->gmc.xgmi.num_physical_nodes > 1) {
- ret = psp_xgmi_initialize(psp);
+ ret = psp_xgmi_initialize(psp, false, true);
/* Warning the XGMI seesion initialize failure
* Instead of stop driver initialization
*/
@@ -2484,11 +3362,11 @@ static int psp_resume(void *handle)
"XGMI: Failed to initialize XGMI session\n");
}
- if (psp->adev->psp.ta_fw) {
+ if (psp->ta_fw) {
ret = psp_ras_initialize(psp);
if (ret)
dev_err(psp->adev->dev,
- "RAS: Failed to initialize RAS\n");
+ "RAS: Failed to initialize RAS\n");
ret = psp_hdcp_initialize(psp);
if (ret)
@@ -2516,7 +3394,7 @@ static int psp_resume(void *handle)
return 0;
failed:
- DRM_ERROR("PSP resume failed\n");
+ dev_err(adev->dev, "PSP resume failed\n");
mutex_unlock(&adev->firmware.mutex);
return ret;
}
@@ -2538,31 +3416,16 @@ int psp_gpu_reset(struct amdgpu_device *adev)
int psp_rlc_autoload_start(struct psp_context *psp)
{
int ret;
- struct psp_gfx_cmd_resp *cmd;
-
- cmd = kzalloc(sizeof(struct psp_gfx_cmd_resp), GFP_KERNEL);
- if (!cmd)
- return -ENOMEM;
+ struct psp_gfx_cmd_resp *cmd = acquire_psp_cmd_buf(psp);
cmd->cmd_id = GFX_CMD_ID_AUTOLOAD_RLC;
ret = psp_cmd_submit_buf(psp, NULL, cmd,
psp->fence_buf_mc_addr);
- kfree(cmd);
- return ret;
-}
-
-int psp_update_vcn_sram(struct amdgpu_device *adev, int inst_idx,
- uint64_t cmd_gpu_addr, int cmd_size)
-{
- struct amdgpu_firmware_info ucode = {0};
- ucode.ucode_id = inst_idx ? AMDGPU_UCODE_ID_VCN1_RAM :
- AMDGPU_UCODE_ID_VCN0_RAM;
- ucode.mc_addr = cmd_gpu_addr;
- ucode.ucode_size = cmd_size;
+ release_psp_cmd_buf(psp);
- return psp_execute_np_fw_load(&adev->psp, &ucode);
+ return ret;
}
int psp_ring_cmd_submit(struct psp_context *psp,
@@ -2592,9 +3455,11 @@ int psp_ring_cmd_submit(struct psp_context *psp,
write_frame = ring_buffer_start + (psp_write_ptr_reg / rb_frame_size_dw);
/* Check invalid write_frame ptr address */
if ((write_frame < ring_buffer_start) || (ring_buffer_end < write_frame)) {
- DRM_ERROR("ring_buffer_start = %p; ring_buffer_end = %p; write_frame = %p\n",
- ring_buffer_start, ring_buffer_end, write_frame);
- DRM_ERROR("write_frame is pointing to address out of bounds\n");
+ dev_err(adev->dev,
+ "ring_buffer_start = %p; ring_buffer_end = %p; write_frame = %p\n",
+ ring_buffer_start, ring_buffer_end, write_frame);
+ dev_err(adev->dev,
+ "write_frame is pointing to address out of bounds\n");
return -EINVAL;
}
@@ -2607,7 +3472,7 @@ int psp_ring_cmd_submit(struct psp_context *psp,
write_frame->fence_addr_hi = upper_32_bits(fence_mc_addr);
write_frame->fence_addr_lo = lower_32_bits(fence_mc_addr);
write_frame->fence_value = index;
- amdgpu_asic_flush_hdp(adev, NULL);
+ amdgpu_device_flush_hdp(adev, NULL);
/* Update the write Pointer in DWORDs */
psp_write_ptr_reg = (psp_write_ptr_reg + rb_frame_size_dw) % ring_size_dw;
@@ -2615,142 +3480,286 @@ int psp_ring_cmd_submit(struct psp_context *psp,
return 0;
}
-int psp_init_asd_microcode(struct psp_context *psp,
- const char *chip_name)
+int psp_init_asd_microcode(struct psp_context *psp, const char *chip_name)
{
struct amdgpu_device *adev = psp->adev;
- char fw_name[PSP_FW_NAME_LEN];
const struct psp_firmware_header_v1_0 *asd_hdr;
int err = 0;
- if (!chip_name) {
- dev_err(adev->dev, "invalid chip name for asd microcode\n");
- return -EINVAL;
- }
-
- snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_asd.bin", chip_name);
- err = request_firmware(&adev->psp.asd_fw, fw_name, adev->dev);
- if (err)
- goto out;
-
- err = amdgpu_ucode_validate(adev->psp.asd_fw);
+ err = amdgpu_ucode_request(adev, &adev->psp.asd_fw, AMDGPU_UCODE_REQUIRED,
+ "amdgpu/%s_asd.bin", chip_name);
if (err)
goto out;
asd_hdr = (const struct psp_firmware_header_v1_0 *)adev->psp.asd_fw->data;
- adev->psp.asd_fw_version = le32_to_cpu(asd_hdr->header.ucode_version);
- adev->psp.asd_feature_version = le32_to_cpu(asd_hdr->ucode_feature_version);
- adev->psp.asd_ucode_size = le32_to_cpu(asd_hdr->header.ucode_size_bytes);
- adev->psp.asd_start_addr = (uint8_t *)asd_hdr +
+ adev->psp.asd_context.bin_desc.fw_version = le32_to_cpu(asd_hdr->header.ucode_version);
+ adev->psp.asd_context.bin_desc.feature_version = le32_to_cpu(asd_hdr->sos.fw_version);
+ adev->psp.asd_context.bin_desc.size_bytes = le32_to_cpu(asd_hdr->header.ucode_size_bytes);
+ adev->psp.asd_context.bin_desc.start_addr = (uint8_t *)asd_hdr +
le32_to_cpu(asd_hdr->header.ucode_array_offset_bytes);
return 0;
out:
- dev_err(adev->dev, "fail to initialize asd microcode\n");
- release_firmware(adev->psp.asd_fw);
- adev->psp.asd_fw = NULL;
+ amdgpu_ucode_release(&adev->psp.asd_fw);
return err;
}
-int psp_init_toc_microcode(struct psp_context *psp,
- const char *chip_name)
+int psp_init_toc_microcode(struct psp_context *psp, const char *chip_name)
{
struct amdgpu_device *adev = psp->adev;
- char fw_name[30];
const struct psp_firmware_header_v1_0 *toc_hdr;
int err = 0;
- if (!chip_name) {
- dev_err(adev->dev, "invalid chip name for toc microcode\n");
- return -EINVAL;
- }
-
- snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_toc.bin", chip_name);
- err = request_firmware(&adev->psp.toc_fw, fw_name, adev->dev);
- if (err)
- goto out;
-
- err = amdgpu_ucode_validate(adev->psp.toc_fw);
+ err = amdgpu_ucode_request(adev, &adev->psp.toc_fw, AMDGPU_UCODE_REQUIRED,
+ "amdgpu/%s_toc.bin", chip_name);
if (err)
goto out;
toc_hdr = (const struct psp_firmware_header_v1_0 *)adev->psp.toc_fw->data;
- adev->psp.toc_fw_version = le32_to_cpu(toc_hdr->header.ucode_version);
- adev->psp.toc_feature_version = le32_to_cpu(toc_hdr->ucode_feature_version);
- adev->psp.toc_bin_size = le32_to_cpu(toc_hdr->header.ucode_size_bytes);
- adev->psp.toc_start_addr = (uint8_t *)toc_hdr +
+ adev->psp.toc.fw_version = le32_to_cpu(toc_hdr->header.ucode_version);
+ adev->psp.toc.feature_version = le32_to_cpu(toc_hdr->sos.fw_version);
+ adev->psp.toc.size_bytes = le32_to_cpu(toc_hdr->header.ucode_size_bytes);
+ adev->psp.toc.start_addr = (uint8_t *)toc_hdr +
le32_to_cpu(toc_hdr->header.ucode_array_offset_bytes);
return 0;
out:
- dev_err(adev->dev, "fail to request/validate toc microcode\n");
- release_firmware(adev->psp.toc_fw);
- adev->psp.toc_fw = NULL;
+ amdgpu_ucode_release(&adev->psp.toc_fw);
return err;
}
-int psp_init_sos_microcode(struct psp_context *psp,
- const char *chip_name)
+static int parse_sos_bin_descriptor(struct psp_context *psp,
+ const struct psp_fw_bin_desc *desc,
+ const struct psp_firmware_header_v2_0 *sos_hdr)
+{
+ uint8_t *ucode_start_addr = NULL;
+
+ if (!psp || !desc || !sos_hdr)
+ return -EINVAL;
+
+ ucode_start_addr = (uint8_t *)sos_hdr +
+ le32_to_cpu(desc->offset_bytes) +
+ le32_to_cpu(sos_hdr->header.ucode_array_offset_bytes);
+
+ switch (desc->fw_type) {
+ case PSP_FW_TYPE_PSP_SOS:
+ psp->sos.fw_version = le32_to_cpu(desc->fw_version);
+ psp->sos.feature_version = le32_to_cpu(desc->fw_version);
+ psp->sos.size_bytes = le32_to_cpu(desc->size_bytes);
+ psp->sos.start_addr = ucode_start_addr;
+ break;
+ case PSP_FW_TYPE_PSP_SYS_DRV:
+ psp->sys.fw_version = le32_to_cpu(desc->fw_version);
+ psp->sys.feature_version = le32_to_cpu(desc->fw_version);
+ psp->sys.size_bytes = le32_to_cpu(desc->size_bytes);
+ psp->sys.start_addr = ucode_start_addr;
+ break;
+ case PSP_FW_TYPE_PSP_KDB:
+ psp->kdb.fw_version = le32_to_cpu(desc->fw_version);
+ psp->kdb.feature_version = le32_to_cpu(desc->fw_version);
+ psp->kdb.size_bytes = le32_to_cpu(desc->size_bytes);
+ psp->kdb.start_addr = ucode_start_addr;
+ break;
+ case PSP_FW_TYPE_PSP_TOC:
+ psp->toc.fw_version = le32_to_cpu(desc->fw_version);
+ psp->toc.feature_version = le32_to_cpu(desc->fw_version);
+ psp->toc.size_bytes = le32_to_cpu(desc->size_bytes);
+ psp->toc.start_addr = ucode_start_addr;
+ break;
+ case PSP_FW_TYPE_PSP_SPL:
+ psp->spl.fw_version = le32_to_cpu(desc->fw_version);
+ psp->spl.feature_version = le32_to_cpu(desc->fw_version);
+ psp->spl.size_bytes = le32_to_cpu(desc->size_bytes);
+ psp->spl.start_addr = ucode_start_addr;
+ break;
+ case PSP_FW_TYPE_PSP_RL:
+ psp->rl.fw_version = le32_to_cpu(desc->fw_version);
+ psp->rl.feature_version = le32_to_cpu(desc->fw_version);
+ psp->rl.size_bytes = le32_to_cpu(desc->size_bytes);
+ psp->rl.start_addr = ucode_start_addr;
+ break;
+ case PSP_FW_TYPE_PSP_SOC_DRV:
+ psp->soc_drv.fw_version = le32_to_cpu(desc->fw_version);
+ psp->soc_drv.feature_version = le32_to_cpu(desc->fw_version);
+ psp->soc_drv.size_bytes = le32_to_cpu(desc->size_bytes);
+ psp->soc_drv.start_addr = ucode_start_addr;
+ break;
+ case PSP_FW_TYPE_PSP_INTF_DRV:
+ psp->intf_drv.fw_version = le32_to_cpu(desc->fw_version);
+ psp->intf_drv.feature_version = le32_to_cpu(desc->fw_version);
+ psp->intf_drv.size_bytes = le32_to_cpu(desc->size_bytes);
+ psp->intf_drv.start_addr = ucode_start_addr;
+ break;
+ case PSP_FW_TYPE_PSP_DBG_DRV:
+ psp->dbg_drv.fw_version = le32_to_cpu(desc->fw_version);
+ psp->dbg_drv.feature_version = le32_to_cpu(desc->fw_version);
+ psp->dbg_drv.size_bytes = le32_to_cpu(desc->size_bytes);
+ psp->dbg_drv.start_addr = ucode_start_addr;
+ break;
+ case PSP_FW_TYPE_PSP_RAS_DRV:
+ psp->ras_drv.fw_version = le32_to_cpu(desc->fw_version);
+ psp->ras_drv.feature_version = le32_to_cpu(desc->fw_version);
+ psp->ras_drv.size_bytes = le32_to_cpu(desc->size_bytes);
+ psp->ras_drv.start_addr = ucode_start_addr;
+ break;
+ case PSP_FW_TYPE_PSP_IPKEYMGR_DRV:
+ psp->ipkeymgr_drv.fw_version = le32_to_cpu(desc->fw_version);
+ psp->ipkeymgr_drv.feature_version = le32_to_cpu(desc->fw_version);
+ psp->ipkeymgr_drv.size_bytes = le32_to_cpu(desc->size_bytes);
+ psp->ipkeymgr_drv.start_addr = ucode_start_addr;
+ break;
+ case PSP_FW_TYPE_PSP_SPDM_DRV:
+ psp->spdm_drv.fw_version = le32_to_cpu(desc->fw_version);
+ psp->spdm_drv.feature_version = le32_to_cpu(desc->fw_version);
+ psp->spdm_drv.size_bytes = le32_to_cpu(desc->size_bytes);
+ psp->spdm_drv.start_addr = ucode_start_addr;
+ break;
+ default:
+ dev_warn(psp->adev->dev, "Unsupported PSP FW type: %d\n", desc->fw_type);
+ break;
+ }
+
+ return 0;
+}
+
+static int psp_init_sos_base_fw(struct amdgpu_device *adev)
{
- struct amdgpu_device *adev = psp->adev;
- char fw_name[PSP_FW_NAME_LEN];
const struct psp_firmware_header_v1_0 *sos_hdr;
- const struct psp_firmware_header_v1_1 *sos_hdr_v1_1;
- const struct psp_firmware_header_v1_2 *sos_hdr_v1_2;
const struct psp_firmware_header_v1_3 *sos_hdr_v1_3;
- int err = 0;
+ uint8_t *ucode_array_start_addr;
+
+ sos_hdr = (const struct psp_firmware_header_v1_0 *)adev->psp.sos_fw->data;
+ ucode_array_start_addr = (uint8_t *)sos_hdr +
+ le32_to_cpu(sos_hdr->header.ucode_array_offset_bytes);
+
+ if (adev->gmc.xgmi.connected_to_cpu ||
+ (amdgpu_ip_version(adev, MP0_HWIP, 0) != IP_VERSION(13, 0, 2))) {
+ adev->psp.sos.fw_version = le32_to_cpu(sos_hdr->header.ucode_version);
+ adev->psp.sos.feature_version = le32_to_cpu(sos_hdr->sos.fw_version);
+
+ adev->psp.sys.size_bytes = le32_to_cpu(sos_hdr->sos.offset_bytes);
+ adev->psp.sys.start_addr = ucode_array_start_addr;
+
+ adev->psp.sos.size_bytes = le32_to_cpu(sos_hdr->sos.size_bytes);
+ adev->psp.sos.start_addr = ucode_array_start_addr +
+ le32_to_cpu(sos_hdr->sos.offset_bytes);
+ } else {
+ /* Load alternate PSP SOS FW */
+ sos_hdr_v1_3 = (const struct psp_firmware_header_v1_3 *)adev->psp.sos_fw->data;
+
+ adev->psp.sos.fw_version = le32_to_cpu(sos_hdr_v1_3->sos_aux.fw_version);
+ adev->psp.sos.feature_version = le32_to_cpu(sos_hdr_v1_3->sos_aux.fw_version);
+
+ adev->psp.sys.size_bytes = le32_to_cpu(sos_hdr_v1_3->sys_drv_aux.size_bytes);
+ adev->psp.sys.start_addr = ucode_array_start_addr +
+ le32_to_cpu(sos_hdr_v1_3->sys_drv_aux.offset_bytes);
+
+ adev->psp.sos.size_bytes = le32_to_cpu(sos_hdr_v1_3->sos_aux.size_bytes);
+ adev->psp.sos.start_addr = ucode_array_start_addr +
+ le32_to_cpu(sos_hdr_v1_3->sos_aux.offset_bytes);
+ }
- if (!chip_name) {
- dev_err(adev->dev, "invalid chip name for sos microcode\n");
+ if ((adev->psp.sys.size_bytes == 0) || (adev->psp.sos.size_bytes == 0)) {
+ dev_warn(adev->dev, "PSP SOS FW not available");
return -EINVAL;
}
- snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_sos.bin", chip_name);
- err = request_firmware(&adev->psp.sos_fw, fw_name, adev->dev);
- if (err)
- goto out;
+ return 0;
+}
+
+int psp_init_sos_microcode(struct psp_context *psp, const char *chip_name)
+{
+ struct amdgpu_device *adev = psp->adev;
+ const struct psp_firmware_header_v1_0 *sos_hdr;
+ const struct psp_firmware_header_v1_1 *sos_hdr_v1_1;
+ const struct psp_firmware_header_v1_2 *sos_hdr_v1_2;
+ const struct psp_firmware_header_v1_3 *sos_hdr_v1_3;
+ const struct psp_firmware_header_v2_0 *sos_hdr_v2_0;
+ const struct psp_firmware_header_v2_1 *sos_hdr_v2_1;
+ int fw_index, fw_bin_count, start_index = 0;
+ const struct psp_fw_bin_desc *fw_bin;
+ uint8_t *ucode_array_start_addr;
+ int err = 0;
- err = amdgpu_ucode_validate(adev->psp.sos_fw);
+ if (amdgpu_is_kicker_fw(adev))
+ err = amdgpu_ucode_request(adev, &adev->psp.sos_fw, AMDGPU_UCODE_REQUIRED,
+ "amdgpu/%s_sos_kicker.bin", chip_name);
+ else
+ err = amdgpu_ucode_request(adev, &adev->psp.sos_fw, AMDGPU_UCODE_REQUIRED,
+ "amdgpu/%s_sos.bin", chip_name);
if (err)
goto out;
sos_hdr = (const struct psp_firmware_header_v1_0 *)adev->psp.sos_fw->data;
+ ucode_array_start_addr = (uint8_t *)sos_hdr +
+ le32_to_cpu(sos_hdr->header.ucode_array_offset_bytes);
amdgpu_ucode_print_psp_hdr(&sos_hdr->header);
switch (sos_hdr->header.header_version_major) {
case 1:
- adev->psp.sos_fw_version = le32_to_cpu(sos_hdr->header.ucode_version);
- adev->psp.sos_feature_version = le32_to_cpu(sos_hdr->ucode_feature_version);
- adev->psp.sos_bin_size = le32_to_cpu(sos_hdr->sos_size_bytes);
- adev->psp.sys_bin_size = le32_to_cpu(sos_hdr->sos_offset_bytes);
- adev->psp.sys_start_addr = (uint8_t *)sos_hdr +
- le32_to_cpu(sos_hdr->header.ucode_array_offset_bytes);
- adev->psp.sos_start_addr = (uint8_t *)adev->psp.sys_start_addr +
- le32_to_cpu(sos_hdr->sos_offset_bytes);
+ err = psp_init_sos_base_fw(adev);
+ if (err)
+ goto out;
+
if (sos_hdr->header.header_version_minor == 1) {
sos_hdr_v1_1 = (const struct psp_firmware_header_v1_1 *)adev->psp.sos_fw->data;
- adev->psp.toc_bin_size = le32_to_cpu(sos_hdr_v1_1->toc_size_bytes);
- adev->psp.toc_start_addr = (uint8_t *)adev->psp.sys_start_addr +
- le32_to_cpu(sos_hdr_v1_1->toc_offset_bytes);
- adev->psp.kdb_bin_size = le32_to_cpu(sos_hdr_v1_1->kdb_size_bytes);
- adev->psp.kdb_start_addr = (uint8_t *)adev->psp.sys_start_addr +
- le32_to_cpu(sos_hdr_v1_1->kdb_offset_bytes);
+ adev->psp.toc.size_bytes = le32_to_cpu(sos_hdr_v1_1->toc.size_bytes);
+ adev->psp.toc.start_addr = (uint8_t *)adev->psp.sys.start_addr +
+ le32_to_cpu(sos_hdr_v1_1->toc.offset_bytes);
+ adev->psp.kdb.size_bytes = le32_to_cpu(sos_hdr_v1_1->kdb.size_bytes);
+ adev->psp.kdb.start_addr = (uint8_t *)adev->psp.sys.start_addr +
+ le32_to_cpu(sos_hdr_v1_1->kdb.offset_bytes);
}
if (sos_hdr->header.header_version_minor == 2) {
sos_hdr_v1_2 = (const struct psp_firmware_header_v1_2 *)adev->psp.sos_fw->data;
- adev->psp.kdb_bin_size = le32_to_cpu(sos_hdr_v1_2->kdb_size_bytes);
- adev->psp.kdb_start_addr = (uint8_t *)adev->psp.sys_start_addr +
- le32_to_cpu(sos_hdr_v1_2->kdb_offset_bytes);
+ adev->psp.kdb.size_bytes = le32_to_cpu(sos_hdr_v1_2->kdb.size_bytes);
+ adev->psp.kdb.start_addr = (uint8_t *)adev->psp.sys.start_addr +
+ le32_to_cpu(sos_hdr_v1_2->kdb.offset_bytes);
}
if (sos_hdr->header.header_version_minor == 3) {
sos_hdr_v1_3 = (const struct psp_firmware_header_v1_3 *)adev->psp.sos_fw->data;
- adev->psp.toc_bin_size = le32_to_cpu(sos_hdr_v1_3->v1_1.toc_size_bytes);
- adev->psp.toc_start_addr = (uint8_t *)adev->psp.sys_start_addr +
- le32_to_cpu(sos_hdr_v1_3->v1_1.toc_offset_bytes);
- adev->psp.kdb_bin_size = le32_to_cpu(sos_hdr_v1_3->v1_1.kdb_size_bytes);
- adev->psp.kdb_start_addr = (uint8_t *)adev->psp.sys_start_addr +
- le32_to_cpu(sos_hdr_v1_3->v1_1.kdb_offset_bytes);
- adev->psp.spl_bin_size = le32_to_cpu(sos_hdr_v1_3->spl_size_bytes);
- adev->psp.spl_start_addr = (uint8_t *)adev->psp.sys_start_addr +
- le32_to_cpu(sos_hdr_v1_3->spl_offset_bytes);
+ adev->psp.toc.size_bytes = le32_to_cpu(sos_hdr_v1_3->v1_1.toc.size_bytes);
+ adev->psp.toc.start_addr = ucode_array_start_addr +
+ le32_to_cpu(sos_hdr_v1_3->v1_1.toc.offset_bytes);
+ adev->psp.kdb.size_bytes = le32_to_cpu(sos_hdr_v1_3->v1_1.kdb.size_bytes);
+ adev->psp.kdb.start_addr = ucode_array_start_addr +
+ le32_to_cpu(sos_hdr_v1_3->v1_1.kdb.offset_bytes);
+ adev->psp.spl.size_bytes = le32_to_cpu(sos_hdr_v1_3->spl.size_bytes);
+ adev->psp.spl.start_addr = ucode_array_start_addr +
+ le32_to_cpu(sos_hdr_v1_3->spl.offset_bytes);
+ adev->psp.rl.size_bytes = le32_to_cpu(sos_hdr_v1_3->rl.size_bytes);
+ adev->psp.rl.start_addr = ucode_array_start_addr +
+ le32_to_cpu(sos_hdr_v1_3->rl.offset_bytes);
+ }
+ break;
+ case 2:
+ sos_hdr_v2_0 = (const struct psp_firmware_header_v2_0 *)adev->psp.sos_fw->data;
+
+ fw_bin_count = le32_to_cpu(sos_hdr_v2_0->psp_fw_bin_count);
+
+ if (fw_bin_count >= UCODE_MAX_PSP_PACKAGING) {
+ dev_err(adev->dev, "packed SOS count exceeds maximum limit\n");
+ err = -EINVAL;
+ goto out;
+ }
+
+ if (sos_hdr_v2_0->header.header_version_minor == 1) {
+ sos_hdr_v2_1 = (const struct psp_firmware_header_v2_1 *)adev->psp.sos_fw->data;
+
+ fw_bin = sos_hdr_v2_1->psp_fw_bin;
+
+ if (psp_is_aux_sos_load_required(psp))
+ start_index = le32_to_cpu(sos_hdr_v2_1->psp_aux_fw_bin_index);
+ else
+ fw_bin_count -= le32_to_cpu(sos_hdr_v2_1->psp_aux_fw_bin_index);
+
+ } else {
+ fw_bin = sos_hdr_v2_0->psp_fw_bin;
+ }
+
+ for (fw_index = start_index; fw_index < fw_bin_count; fw_index++) {
+ err = parse_sos_bin_descriptor(psp, fw_bin + fw_index,
+ sos_hdr_v2_0);
+ if (err)
+ goto out;
}
break;
default:
@@ -2762,16 +3771,43 @@ int psp_init_sos_microcode(struct psp_context *psp,
return 0;
out:
- dev_err(adev->dev,
- "failed to init sos firmware\n");
- release_firmware(adev->psp.sos_fw);
- adev->psp.sos_fw = NULL;
+ amdgpu_ucode_release(&adev->psp.sos_fw);
return err;
}
+static bool is_ta_fw_applicable(struct psp_context *psp,
+ const struct psp_fw_bin_desc *desc)
+{
+ struct amdgpu_device *adev = psp->adev;
+ uint32_t fw_version;
+
+ switch (desc->fw_type) {
+ case TA_FW_TYPE_PSP_XGMI:
+ case TA_FW_TYPE_PSP_XGMI_AUX:
+ /* for now, AUX TA only exists on 13.0.6 ta bin,
+ * from v20.00.0x.14
+ */
+ if (amdgpu_ip_version(adev, MP0_HWIP, 0) ==
+ IP_VERSION(13, 0, 6)) {
+ fw_version = le32_to_cpu(desc->fw_version);
+
+ if (adev->flags & AMD_IS_APU &&
+ (fw_version & 0xff) >= 0x14)
+ return desc->fw_type == TA_FW_TYPE_PSP_XGMI_AUX;
+ else
+ return desc->fw_type == TA_FW_TYPE_PSP_XGMI;
+ }
+ break;
+ default:
+ break;
+ }
+
+ return true;
+}
+
static int parse_ta_bin_descriptor(struct psp_context *psp,
- const struct ta_fw_bin_desc *desc,
+ const struct psp_fw_bin_desc *desc,
const struct ta_firmware_header_v2_0 *ta_hdr)
{
uint8_t *ucode_start_addr = NULL;
@@ -2779,46 +3815,53 @@ static int parse_ta_bin_descriptor(struct psp_context *psp,
if (!psp || !desc || !ta_hdr)
return -EINVAL;
+ if (!is_ta_fw_applicable(psp, desc))
+ return 0;
+
ucode_start_addr = (uint8_t *)ta_hdr +
le32_to_cpu(desc->offset_bytes) +
le32_to_cpu(ta_hdr->header.ucode_array_offset_bytes);
switch (desc->fw_type) {
case TA_FW_TYPE_PSP_ASD:
- psp->asd_fw_version = le32_to_cpu(desc->fw_version);
- psp->asd_feature_version = le32_to_cpu(desc->fw_version);
- psp->asd_ucode_size = le32_to_cpu(desc->size_bytes);
- psp->asd_start_addr = ucode_start_addr;
+ psp->asd_context.bin_desc.fw_version = le32_to_cpu(desc->fw_version);
+ psp->asd_context.bin_desc.feature_version = le32_to_cpu(desc->fw_version);
+ psp->asd_context.bin_desc.size_bytes = le32_to_cpu(desc->size_bytes);
+ psp->asd_context.bin_desc.start_addr = ucode_start_addr;
break;
case TA_FW_TYPE_PSP_XGMI:
- psp->ta_xgmi_ucode_version = le32_to_cpu(desc->fw_version);
- psp->ta_xgmi_ucode_size = le32_to_cpu(desc->size_bytes);
- psp->ta_xgmi_start_addr = ucode_start_addr;
+ case TA_FW_TYPE_PSP_XGMI_AUX:
+ psp->xgmi_context.context.bin_desc.fw_version = le32_to_cpu(desc->fw_version);
+ psp->xgmi_context.context.bin_desc.size_bytes = le32_to_cpu(desc->size_bytes);
+ psp->xgmi_context.context.bin_desc.start_addr = ucode_start_addr;
break;
case TA_FW_TYPE_PSP_RAS:
- psp->ta_ras_ucode_version = le32_to_cpu(desc->fw_version);
- psp->ta_ras_ucode_size = le32_to_cpu(desc->size_bytes);
- psp->ta_ras_start_addr = ucode_start_addr;
+ psp->ras_context.context.bin_desc.fw_version = le32_to_cpu(desc->fw_version);
+ psp->ras_context.context.bin_desc.size_bytes = le32_to_cpu(desc->size_bytes);
+ psp->ras_context.context.bin_desc.start_addr = ucode_start_addr;
break;
case TA_FW_TYPE_PSP_HDCP:
- psp->ta_hdcp_ucode_version = le32_to_cpu(desc->fw_version);
- psp->ta_hdcp_ucode_size = le32_to_cpu(desc->size_bytes);
- psp->ta_hdcp_start_addr = ucode_start_addr;
+ psp->hdcp_context.context.bin_desc.fw_version = le32_to_cpu(desc->fw_version);
+ psp->hdcp_context.context.bin_desc.size_bytes = le32_to_cpu(desc->size_bytes);
+ psp->hdcp_context.context.bin_desc.start_addr = ucode_start_addr;
break;
case TA_FW_TYPE_PSP_DTM:
- psp->ta_dtm_ucode_version = le32_to_cpu(desc->fw_version);
- psp->ta_dtm_ucode_size = le32_to_cpu(desc->size_bytes);
- psp->ta_dtm_start_addr = ucode_start_addr;
+ psp->dtm_context.context.bin_desc.fw_version = le32_to_cpu(desc->fw_version);
+ psp->dtm_context.context.bin_desc.size_bytes = le32_to_cpu(desc->size_bytes);
+ psp->dtm_context.context.bin_desc.start_addr = ucode_start_addr;
break;
case TA_FW_TYPE_PSP_RAP:
- psp->ta_rap_ucode_version = le32_to_cpu(desc->fw_version);
- psp->ta_rap_ucode_size = le32_to_cpu(desc->size_bytes);
- psp->ta_rap_start_addr = ucode_start_addr;
+ psp->rap_context.context.bin_desc.fw_version = le32_to_cpu(desc->fw_version);
+ psp->rap_context.context.bin_desc.size_bytes = le32_to_cpu(desc->size_bytes);
+ psp->rap_context.context.bin_desc.start_addr = ucode_start_addr;
break;
case TA_FW_TYPE_PSP_SECUREDISPLAY:
- psp->ta_securedisplay_ucode_version = le32_to_cpu(desc->fw_version);
- psp->ta_securedisplay_ucode_size = le32_to_cpu(desc->size_bytes);
- psp->ta_securedisplay_start_addr = ucode_start_addr;
+ psp->securedisplay_context.context.bin_desc.fw_version =
+ le32_to_cpu(desc->fw_version);
+ psp->securedisplay_context.context.bin_desc.size_bytes =
+ le32_to_cpu(desc->size_bytes);
+ psp->securedisplay_context.context.bin_desc.start_addr =
+ ucode_start_addr;
break;
default:
dev_warn(psp->adev->dev, "Unsupported TA type: %d\n", desc->fw_type);
@@ -2828,41 +3871,76 @@ static int parse_ta_bin_descriptor(struct psp_context *psp,
return 0;
}
-int psp_init_ta_microcode(struct psp_context *psp,
- const char *chip_name)
+static int parse_ta_v1_microcode(struct psp_context *psp)
{
+ const struct ta_firmware_header_v1_0 *ta_hdr;
struct amdgpu_device *adev = psp->adev;
- char fw_name[PSP_FW_NAME_LEN];
- const struct ta_firmware_header_v2_0 *ta_hdr;
- int err = 0;
- int ta_index = 0;
- if (!chip_name) {
- dev_err(adev->dev, "invalid chip name for ta microcode\n");
+ ta_hdr = (const struct ta_firmware_header_v1_0 *) adev->psp.ta_fw->data;
+
+ if (le16_to_cpu(ta_hdr->header.header_version_major) != 1)
return -EINVAL;
- }
- snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_ta.bin", chip_name);
- err = request_firmware(&adev->psp.ta_fw, fw_name, adev->dev);
- if (err)
- goto out;
+ adev->psp.xgmi_context.context.bin_desc.fw_version =
+ le32_to_cpu(ta_hdr->xgmi.fw_version);
+ adev->psp.xgmi_context.context.bin_desc.size_bytes =
+ le32_to_cpu(ta_hdr->xgmi.size_bytes);
+ adev->psp.xgmi_context.context.bin_desc.start_addr =
+ (uint8_t *)ta_hdr +
+ le32_to_cpu(ta_hdr->header.ucode_array_offset_bytes);
+
+ adev->psp.ras_context.context.bin_desc.fw_version =
+ le32_to_cpu(ta_hdr->ras.fw_version);
+ adev->psp.ras_context.context.bin_desc.size_bytes =
+ le32_to_cpu(ta_hdr->ras.size_bytes);
+ adev->psp.ras_context.context.bin_desc.start_addr =
+ (uint8_t *)adev->psp.xgmi_context.context.bin_desc.start_addr +
+ le32_to_cpu(ta_hdr->ras.offset_bytes);
+
+ adev->psp.hdcp_context.context.bin_desc.fw_version =
+ le32_to_cpu(ta_hdr->hdcp.fw_version);
+ adev->psp.hdcp_context.context.bin_desc.size_bytes =
+ le32_to_cpu(ta_hdr->hdcp.size_bytes);
+ adev->psp.hdcp_context.context.bin_desc.start_addr =
+ (uint8_t *)ta_hdr +
+ le32_to_cpu(ta_hdr->header.ucode_array_offset_bytes);
+
+ adev->psp.dtm_context.context.bin_desc.fw_version =
+ le32_to_cpu(ta_hdr->dtm.fw_version);
+ adev->psp.dtm_context.context.bin_desc.size_bytes =
+ le32_to_cpu(ta_hdr->dtm.size_bytes);
+ adev->psp.dtm_context.context.bin_desc.start_addr =
+ (uint8_t *)adev->psp.hdcp_context.context.bin_desc.start_addr +
+ le32_to_cpu(ta_hdr->dtm.offset_bytes);
+
+ adev->psp.securedisplay_context.context.bin_desc.fw_version =
+ le32_to_cpu(ta_hdr->securedisplay.fw_version);
+ adev->psp.securedisplay_context.context.bin_desc.size_bytes =
+ le32_to_cpu(ta_hdr->securedisplay.size_bytes);
+ adev->psp.securedisplay_context.context.bin_desc.start_addr =
+ (uint8_t *)adev->psp.hdcp_context.context.bin_desc.start_addr +
+ le32_to_cpu(ta_hdr->securedisplay.offset_bytes);
+
+ adev->psp.ta_fw_version = le32_to_cpu(ta_hdr->header.ucode_version);
- err = amdgpu_ucode_validate(adev->psp.ta_fw);
- if (err)
- goto out;
+ return 0;
+}
+
+static int parse_ta_v2_microcode(struct psp_context *psp)
+{
+ const struct ta_firmware_header_v2_0 *ta_hdr;
+ struct amdgpu_device *adev = psp->adev;
+ int err = 0;
+ int ta_index = 0;
ta_hdr = (const struct ta_firmware_header_v2_0 *)adev->psp.ta_fw->data;
- if (le16_to_cpu(ta_hdr->header.header_version_major) != 2) {
- dev_err(adev->dev, "unsupported TA header version\n");
- err = -EINVAL;
- goto out;
- }
+ if (le16_to_cpu(ta_hdr->header.header_version_major) != 2)
+ return -EINVAL;
- if (le32_to_cpu(ta_hdr->ta_fw_bin_count) >= UCODE_MAX_TA_PACKAGING) {
+ if (le32_to_cpu(ta_hdr->ta_fw_bin_count) >= UCODE_MAX_PSP_PACKAGING) {
dev_err(adev->dev, "packed TA count exceeds maximum limit\n");
- err = -EINVAL;
- goto out;
+ return -EINVAL;
}
for (ta_index = 0; ta_index < le32_to_cpu(ta_hdr->ta_fw_bin_count); ta_index++) {
@@ -2870,24 +3948,131 @@ int psp_init_ta_microcode(struct psp_context *psp,
&ta_hdr->ta_fw_bin[ta_index],
ta_hdr);
if (err)
- goto out;
+ return err;
}
return 0;
+}
+
+int psp_init_ta_microcode(struct psp_context *psp, const char *chip_name)
+{
+ const struct common_firmware_header *hdr;
+ struct amdgpu_device *adev = psp->adev;
+ int err;
+
+ if (amdgpu_is_kicker_fw(adev))
+ err = amdgpu_ucode_request(adev, &adev->psp.ta_fw, AMDGPU_UCODE_REQUIRED,
+ "amdgpu/%s_ta_kicker.bin", chip_name);
+ else
+ err = amdgpu_ucode_request(adev, &adev->psp.ta_fw, AMDGPU_UCODE_REQUIRED,
+ "amdgpu/%s_ta.bin", chip_name);
+ if (err)
+ return err;
+
+ hdr = (const struct common_firmware_header *)adev->psp.ta_fw->data;
+ switch (le16_to_cpu(hdr->header_version_major)) {
+ case 1:
+ err = parse_ta_v1_microcode(psp);
+ break;
+ case 2:
+ err = parse_ta_v2_microcode(psp);
+ break;
+ default:
+ dev_err(adev->dev, "unsupported TA header version\n");
+ err = -EINVAL;
+ }
+
+ if (err)
+ amdgpu_ucode_release(&adev->psp.ta_fw);
+
+ return err;
+}
+
+int psp_init_cap_microcode(struct psp_context *psp, const char *chip_name)
+{
+ struct amdgpu_device *adev = psp->adev;
+ const struct psp_firmware_header_v1_0 *cap_hdr_v1_0;
+ struct amdgpu_firmware_info *info = NULL;
+ int err = 0;
+
+ if (!amdgpu_sriov_vf(adev)) {
+ dev_err(adev->dev, "cap microcode should only be loaded under SRIOV\n");
+ return -EINVAL;
+ }
+
+ err = amdgpu_ucode_request(adev, &adev->psp.cap_fw, AMDGPU_UCODE_OPTIONAL,
+ "amdgpu/%s_cap.bin", chip_name);
+ if (err) {
+ if (err == -ENODEV) {
+ dev_warn(adev->dev, "cap microcode does not exist, skip\n");
+ err = 0;
+ } else {
+ dev_err(adev->dev, "fail to initialize cap microcode\n");
+ }
+ goto out;
+ }
+
+ info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CAP];
+ info->ucode_id = AMDGPU_UCODE_ID_CAP;
+ info->fw = adev->psp.cap_fw;
+ cap_hdr_v1_0 = (const struct psp_firmware_header_v1_0 *)
+ adev->psp.cap_fw->data;
+ adev->firmware.fw_size += ALIGN(
+ le32_to_cpu(cap_hdr_v1_0->header.ucode_size_bytes), PAGE_SIZE);
+ adev->psp.cap_fw_version = le32_to_cpu(cap_hdr_v1_0->header.ucode_version);
+ adev->psp.cap_feature_version = le32_to_cpu(cap_hdr_v1_0->sos.fw_version);
+ adev->psp.cap_ucode_size = le32_to_cpu(cap_hdr_v1_0->header.ucode_size_bytes);
+
+ return 0;
+
out:
- dev_err(adev->dev, "fail to initialize ta microcode\n");
- release_firmware(adev->psp.ta_fw);
- adev->psp.ta_fw = NULL;
+ amdgpu_ucode_release(&adev->psp.cap_fw);
return err;
}
-static int psp_set_clockgating_state(void *handle,
- enum amd_clockgating_state state)
+int psp_config_sq_perfmon(struct psp_context *psp,
+ uint32_t xcp_id, bool core_override_enable,
+ bool reg_override_enable, bool perfmon_override_enable)
+{
+ int ret;
+
+ if (amdgpu_sriov_vf(psp->adev))
+ return 0;
+
+ if (xcp_id > MAX_XCP) {
+ dev_err(psp->adev->dev, "invalid xcp_id %d\n", xcp_id);
+ return -EINVAL;
+ }
+
+ if (amdgpu_ip_version(psp->adev, MP0_HWIP, 0) != IP_VERSION(13, 0, 6)) {
+ dev_err(psp->adev->dev, "Unsupported MP0 version 0x%x for CONFIG_SQ_PERFMON command\n",
+ amdgpu_ip_version(psp->adev, MP0_HWIP, 0));
+ return -EINVAL;
+ }
+ struct psp_gfx_cmd_resp *cmd = acquire_psp_cmd_buf(psp);
+
+ cmd->cmd_id = GFX_CMD_ID_CONFIG_SQ_PERFMON;
+ cmd->cmd.config_sq_perfmon.gfx_xcp_mask = BIT_MASK(xcp_id);
+ cmd->cmd.config_sq_perfmon.core_override = core_override_enable;
+ cmd->cmd.config_sq_perfmon.reg_override = reg_override_enable;
+ cmd->cmd.config_sq_perfmon.perfmon_override = perfmon_override_enable;
+
+ ret = psp_cmd_submit_buf(psp, NULL, cmd, psp->fence_buf_mc_addr);
+ if (ret)
+ dev_warn(psp->adev->dev, "PSP failed to config sq: xcp%d core%d reg%d perfmon%d\n",
+ xcp_id, core_override_enable, reg_override_enable, perfmon_override_enable);
+
+ release_psp_cmd_buf(psp);
+ return ret;
+}
+
+static int psp_set_clockgating_state(struct amdgpu_ip_block *ip_block,
+ enum amd_clockgating_state state)
{
return 0;
}
-static int psp_set_powergating_state(void *handle,
+static int psp_set_powergating_state(struct amdgpu_ip_block *ip_block,
enum amd_powergating_state state)
{
return 0;
@@ -2899,11 +4084,13 @@ static ssize_t psp_usbc_pd_fw_sysfs_read(struct device *dev,
{
struct drm_device *ddev = dev_get_drvdata(dev);
struct amdgpu_device *adev = drm_to_adev(ddev);
+ struct amdgpu_ip_block *ip_block;
uint32_t fw_ver;
int ret;
- if (!adev->ip_blocks[AMD_IP_BLOCK_TYPE_PSP].status.late_initialized) {
- DRM_INFO("PSP block is not ready yet.");
+ ip_block = amdgpu_device_ip_get_ip_block(adev, AMD_IP_BLOCK_TYPE_PSP);
+ if (!ip_block || !ip_block->status.late_initialized) {
+ dev_info(adev->dev, "PSP block is not ready yet\n.");
return -EBUSY;
}
@@ -2912,11 +4099,11 @@ static ssize_t psp_usbc_pd_fw_sysfs_read(struct device *dev,
mutex_unlock(&adev->psp.mutex);
if (ret) {
- DRM_ERROR("Failed to read USBC PD FW, err = %d", ret);
+ dev_err(adev->dev, "Failed to read USBC PD FW, err = %d\n", ret);
return ret;
}
- return snprintf(buf, PAGE_SIZE, "%x\n", fw_ver);
+ return sysfs_emit(buf, "%x\n", fw_ver);
}
static ssize_t psp_usbc_pd_fw_sysfs_write(struct device *dev,
@@ -2926,99 +4113,362 @@ static ssize_t psp_usbc_pd_fw_sysfs_write(struct device *dev,
{
struct drm_device *ddev = dev_get_drvdata(dev);
struct amdgpu_device *adev = drm_to_adev(ddev);
- void *cpu_addr;
- dma_addr_t dma_addr;
- int ret;
- char fw_name[100];
+ int ret, idx;
const struct firmware *usbc_pd_fw;
-
- if (!adev->ip_blocks[AMD_IP_BLOCK_TYPE_PSP].status.late_initialized) {
- DRM_INFO("PSP block is not ready yet.");
+ struct amdgpu_bo *fw_buf_bo = NULL;
+ uint64_t fw_pri_mc_addr;
+ void *fw_pri_cpu_addr;
+ struct amdgpu_ip_block *ip_block;
+
+ ip_block = amdgpu_device_ip_get_ip_block(adev, AMD_IP_BLOCK_TYPE_PSP);
+ if (!ip_block || !ip_block->status.late_initialized) {
+ dev_err(adev->dev, "PSP block is not ready yet.");
return -EBUSY;
}
- snprintf(fw_name, sizeof(fw_name), "amdgpu/%s", buf);
- ret = request_firmware(&usbc_pd_fw, fw_name, adev->dev);
+ if (!drm_dev_enter(ddev, &idx))
+ return -ENODEV;
+
+ ret = amdgpu_ucode_request(adev, &usbc_pd_fw, AMDGPU_UCODE_REQUIRED,
+ "amdgpu/%s", buf);
if (ret)
goto fail;
- /* We need contiguous physical mem to place the FW for psp to access */
- cpu_addr = dma_alloc_coherent(adev->dev, usbc_pd_fw->size, &dma_addr, GFP_KERNEL);
-
- ret = dma_mapping_error(adev->dev, dma_addr);
+ /* LFB address which is aligned to 1MB boundary per PSP request */
+ ret = amdgpu_bo_create_kernel(adev, usbc_pd_fw->size, 0x100000,
+ AMDGPU_GEM_DOMAIN_VRAM |
+ AMDGPU_GEM_DOMAIN_GTT,
+ &fw_buf_bo, &fw_pri_mc_addr,
+ &fw_pri_cpu_addr);
if (ret)
goto rel_buf;
- memcpy_toio(cpu_addr, usbc_pd_fw->data, usbc_pd_fw->size);
-
- /*
- * x86 specific workaround.
- * Without it the buffer is invisible in PSP.
- *
- * TODO Remove once PSP starts snooping CPU cache
- */
-#ifdef CONFIG_X86
- clflush_cache_range(cpu_addr, (usbc_pd_fw->size & ~(L1_CACHE_BYTES - 1)));
-#endif
+ memcpy_toio(fw_pri_cpu_addr, usbc_pd_fw->data, usbc_pd_fw->size);
mutex_lock(&adev->psp.mutex);
- ret = psp_load_usbc_pd_fw(&adev->psp, dma_addr);
+ ret = psp_load_usbc_pd_fw(&adev->psp, fw_pri_mc_addr);
mutex_unlock(&adev->psp.mutex);
-rel_buf:
- dma_free_coherent(adev->dev, usbc_pd_fw->size, cpu_addr, dma_addr);
- release_firmware(usbc_pd_fw);
+ amdgpu_bo_free_kernel(&fw_buf_bo, &fw_pri_mc_addr, &fw_pri_cpu_addr);
+rel_buf:
+ amdgpu_ucode_release(&usbc_pd_fw);
fail:
if (ret) {
- DRM_ERROR("Failed to load USBC PD FW, err = %d", ret);
- return ret;
+ dev_err(adev->dev, "Failed to load USBC PD FW, err = %d", ret);
+ count = ret;
}
+ drm_dev_exit(idx);
return count;
}
-static DEVICE_ATTR(usbc_pd_fw, S_IRUGO | S_IWUSR,
+void psp_copy_fw(struct psp_context *psp, uint8_t *start_addr, uint32_t bin_size)
+{
+ int idx;
+
+ if (!drm_dev_enter(adev_to_drm(psp->adev), &idx))
+ return;
+
+ memset(psp->fw_pri_buf, 0, PSP_1_MEG);
+ memcpy(psp->fw_pri_buf, start_addr, bin_size);
+
+ drm_dev_exit(idx);
+}
+
+/**
+ * DOC: usbc_pd_fw
+ * Reading from this file will retrieve the USB-C PD firmware version. Writing to
+ * this file will trigger the update process.
+ */
+static DEVICE_ATTR(usbc_pd_fw, 0644,
psp_usbc_pd_fw_sysfs_read,
psp_usbc_pd_fw_sysfs_write);
+int is_psp_fw_valid(struct psp_bin_desc bin)
+{
+ return bin.size_bytes;
+}
+static ssize_t amdgpu_psp_vbflash_write(struct file *filp, struct kobject *kobj,
+ const struct bin_attribute *bin_attr,
+ char *buffer, loff_t pos, size_t count)
+{
+ struct device *dev = kobj_to_dev(kobj);
+ struct drm_device *ddev = dev_get_drvdata(dev);
+ struct amdgpu_device *adev = drm_to_adev(ddev);
-const struct amd_ip_funcs psp_ip_funcs = {
- .name = "psp",
- .early_init = psp_early_init,
- .late_init = NULL,
- .sw_init = psp_sw_init,
- .sw_fini = psp_sw_fini,
- .hw_init = psp_hw_init,
- .hw_fini = psp_hw_fini,
- .suspend = psp_suspend,
- .resume = psp_resume,
- .is_idle = NULL,
- .check_soft_reset = NULL,
- .wait_for_idle = NULL,
- .soft_reset = NULL,
- .set_clockgating_state = psp_set_clockgating_state,
- .set_powergating_state = psp_set_powergating_state,
+ adev->psp.vbflash_done = false;
+
+ /* Safeguard against memory drain */
+ if (adev->psp.vbflash_image_size > AMD_VBIOS_FILE_MAX_SIZE_B) {
+ dev_err(adev->dev, "File size cannot exceed %u\n", AMD_VBIOS_FILE_MAX_SIZE_B);
+ kvfree(adev->psp.vbflash_tmp_buf);
+ adev->psp.vbflash_tmp_buf = NULL;
+ adev->psp.vbflash_image_size = 0;
+ return -ENOMEM;
+ }
+
+ /* TODO Just allocate max for now and optimize to realloc later if needed */
+ if (!adev->psp.vbflash_tmp_buf) {
+ adev->psp.vbflash_tmp_buf = kvmalloc(AMD_VBIOS_FILE_MAX_SIZE_B, GFP_KERNEL);
+ if (!adev->psp.vbflash_tmp_buf)
+ return -ENOMEM;
+ }
+
+ mutex_lock(&adev->psp.mutex);
+ memcpy(adev->psp.vbflash_tmp_buf + pos, buffer, count);
+ adev->psp.vbflash_image_size += count;
+ mutex_unlock(&adev->psp.mutex);
+
+ dev_dbg(adev->dev, "IFWI staged for update\n");
+
+ return count;
+}
+
+static ssize_t amdgpu_psp_vbflash_read(struct file *filp, struct kobject *kobj,
+ const struct bin_attribute *bin_attr, char *buffer,
+ loff_t pos, size_t count)
+{
+ struct device *dev = kobj_to_dev(kobj);
+ struct drm_device *ddev = dev_get_drvdata(dev);
+ struct amdgpu_device *adev = drm_to_adev(ddev);
+ struct amdgpu_bo *fw_buf_bo = NULL;
+ uint64_t fw_pri_mc_addr;
+ void *fw_pri_cpu_addr;
+ int ret;
+
+ if (adev->psp.vbflash_image_size == 0)
+ return -EINVAL;
+
+ dev_dbg(adev->dev, "PSP IFWI flash process initiated\n");
+
+ ret = amdgpu_bo_create_kernel(adev, adev->psp.vbflash_image_size,
+ AMDGPU_GPU_PAGE_SIZE,
+ AMDGPU_GEM_DOMAIN_VRAM,
+ &fw_buf_bo,
+ &fw_pri_mc_addr,
+ &fw_pri_cpu_addr);
+ if (ret)
+ goto rel_buf;
+
+ memcpy_toio(fw_pri_cpu_addr, adev->psp.vbflash_tmp_buf, adev->psp.vbflash_image_size);
+
+ mutex_lock(&adev->psp.mutex);
+ ret = psp_update_spirom(&adev->psp, fw_pri_mc_addr);
+ mutex_unlock(&adev->psp.mutex);
+
+ amdgpu_bo_free_kernel(&fw_buf_bo, &fw_pri_mc_addr, &fw_pri_cpu_addr);
+
+rel_buf:
+ kvfree(adev->psp.vbflash_tmp_buf);
+ adev->psp.vbflash_tmp_buf = NULL;
+ adev->psp.vbflash_image_size = 0;
+
+ if (ret) {
+ dev_err(adev->dev, "Failed to load IFWI, err = %d\n", ret);
+ return ret;
+ }
+
+ dev_dbg(adev->dev, "PSP IFWI flash process done\n");
+ return 0;
+}
+
+/**
+ * DOC: psp_vbflash
+ * Writing to this file will stage an IFWI for update. Reading from this file
+ * will trigger the update process.
+ */
+static const struct bin_attribute psp_vbflash_bin_attr = {
+ .attr = {.name = "psp_vbflash", .mode = 0660},
+ .size = 0,
+ .write = amdgpu_psp_vbflash_write,
+ .read = amdgpu_psp_vbflash_read,
};
-static int psp_sysfs_init(struct amdgpu_device *adev)
+/**
+ * DOC: psp_vbflash_status
+ * The status of the flash process.
+ * 0: IFWI flash not complete.
+ * 1: IFWI flash complete.
+ */
+static ssize_t amdgpu_psp_vbflash_status(struct device *dev,
+ struct device_attribute *attr,
+ char *buf)
{
- int ret = device_create_file(adev->dev, &dev_attr_usbc_pd_fw);
+ struct drm_device *ddev = dev_get_drvdata(dev);
+ struct amdgpu_device *adev = drm_to_adev(ddev);
+ uint32_t vbflash_status;
+
+ vbflash_status = psp_vbflash_status(&adev->psp);
+ if (!adev->psp.vbflash_done)
+ vbflash_status = 0;
+ else if (adev->psp.vbflash_done && !(vbflash_status & 0x80000000))
+ vbflash_status = 1;
+
+ return sysfs_emit(buf, "0x%x\n", vbflash_status);
+}
+static DEVICE_ATTR(psp_vbflash_status, 0440, amdgpu_psp_vbflash_status, NULL);
+static const struct bin_attribute *const bin_flash_attrs[] = {
+ &psp_vbflash_bin_attr,
+ NULL
+};
+
+static struct attribute *flash_attrs[] = {
+ &dev_attr_psp_vbflash_status.attr,
+ &dev_attr_usbc_pd_fw.attr,
+ NULL
+};
+
+static umode_t amdgpu_flash_attr_is_visible(struct kobject *kobj, struct attribute *attr, int idx)
+{
+ struct device *dev = kobj_to_dev(kobj);
+ struct drm_device *ddev = dev_get_drvdata(dev);
+ struct amdgpu_device *adev = drm_to_adev(ddev);
+
+ if (attr == &dev_attr_usbc_pd_fw.attr)
+ return adev->psp.sup_pd_fw_up ? 0660 : 0;
+
+ return adev->psp.sup_ifwi_up ? 0440 : 0;
+}
+
+static umode_t amdgpu_bin_flash_attr_is_visible(struct kobject *kobj,
+ const struct bin_attribute *attr,
+ int idx)
+{
+ struct device *dev = kobj_to_dev(kobj);
+ struct drm_device *ddev = dev_get_drvdata(dev);
+ struct amdgpu_device *adev = drm_to_adev(ddev);
+
+ return adev->psp.sup_ifwi_up ? 0660 : 0;
+}
+
+const struct attribute_group amdgpu_flash_attr_group = {
+ .attrs = flash_attrs,
+ .bin_attrs = bin_flash_attrs,
+ .is_bin_visible = amdgpu_bin_flash_attr_is_visible,
+ .is_visible = amdgpu_flash_attr_is_visible,
+};
+
+#if defined(CONFIG_DEBUG_FS)
+static int psp_read_spirom_debugfs_open(struct inode *inode, struct file *filp)
+{
+ struct amdgpu_device *adev = filp->f_inode->i_private;
+ struct spirom_bo *bo_triplet;
+ int ret;
+
+ /* serialize the open() file calling */
+ if (!mutex_trylock(&adev->psp.mutex))
+ return -EBUSY;
+
+ /*
+ * make sure only one userpace process is alive for dumping so that
+ * only one memory buffer of AMD_VBIOS_FILE_MAX_SIZE * 2 is consumed.
+ * let's say the case where one process try opening the file while
+ * another one has proceeded to read or release. In this way, eliminate
+ * the use of mutex for read() or release() callback as well.
+ */
+ if (adev->psp.spirom_dump_trip) {
+ mutex_unlock(&adev->psp.mutex);
+ return -EBUSY;
+ }
+
+ bo_triplet = kzalloc(sizeof(struct spirom_bo), GFP_KERNEL);
+ if (!bo_triplet) {
+ mutex_unlock(&adev->psp.mutex);
+ return -ENOMEM;
+ }
+
+ ret = amdgpu_bo_create_kernel(adev, AMD_VBIOS_FILE_MAX_SIZE_B * 2,
+ AMDGPU_GPU_PAGE_SIZE,
+ AMDGPU_GEM_DOMAIN_GTT,
+ &bo_triplet->bo,
+ &bo_triplet->mc_addr,
+ &bo_triplet->cpu_addr);
+ if (ret)
+ goto rel_trip;
+
+ ret = psp_dump_spirom(&adev->psp, bo_triplet->mc_addr);
if (ret)
- DRM_ERROR("Failed to create USBC PD FW control file!");
+ goto rel_bo;
+ adev->psp.spirom_dump_trip = bo_triplet;
+ mutex_unlock(&adev->psp.mutex);
+ return 0;
+rel_bo:
+ amdgpu_bo_free_kernel(&bo_triplet->bo, &bo_triplet->mc_addr,
+ &bo_triplet->cpu_addr);
+rel_trip:
+ kfree(bo_triplet);
+ mutex_unlock(&adev->psp.mutex);
+ dev_err(adev->dev, "Trying IFWI dump fails, err = %d\n", ret);
return ret;
}
-static void psp_sysfs_fini(struct amdgpu_device *adev)
+static ssize_t psp_read_spirom_debugfs_read(struct file *filp, char __user *buf, size_t size,
+ loff_t *pos)
{
- device_remove_file(adev->dev, &dev_attr_usbc_pd_fw);
+ struct amdgpu_device *adev = filp->f_inode->i_private;
+ struct spirom_bo *bo_triplet = adev->psp.spirom_dump_trip;
+
+ if (!bo_triplet)
+ return -EINVAL;
+
+ return simple_read_from_buffer(buf,
+ size,
+ pos, bo_triplet->cpu_addr,
+ AMD_VBIOS_FILE_MAX_SIZE_B * 2);
}
-const struct amdgpu_ip_block_version psp_v3_1_ip_block =
+static int psp_read_spirom_debugfs_release(struct inode *inode, struct file *filp)
{
+ struct amdgpu_device *adev = filp->f_inode->i_private;
+ struct spirom_bo *bo_triplet = adev->psp.spirom_dump_trip;
+
+ if (bo_triplet) {
+ amdgpu_bo_free_kernel(&bo_triplet->bo, &bo_triplet->mc_addr,
+ &bo_triplet->cpu_addr);
+ kfree(bo_triplet);
+ }
+
+ adev->psp.spirom_dump_trip = NULL;
+ return 0;
+}
+
+static const struct file_operations psp_dump_spirom_debugfs_ops = {
+ .owner = THIS_MODULE,
+ .open = psp_read_spirom_debugfs_open,
+ .read = psp_read_spirom_debugfs_read,
+ .release = psp_read_spirom_debugfs_release,
+ .llseek = default_llseek,
+};
+#endif
+
+void amdgpu_psp_debugfs_init(struct amdgpu_device *adev)
+{
+#if defined(CONFIG_DEBUG_FS)
+ struct drm_minor *minor = adev_to_drm(adev)->primary;
+
+ debugfs_create_file_size("psp_spirom_dump", 0444, minor->debugfs_root,
+ adev, &psp_dump_spirom_debugfs_ops, AMD_VBIOS_FILE_MAX_SIZE_B * 2);
+#endif
+}
+
+const struct amd_ip_funcs psp_ip_funcs = {
+ .name = "psp",
+ .early_init = psp_early_init,
+ .sw_init = psp_sw_init,
+ .sw_fini = psp_sw_fini,
+ .hw_init = psp_hw_init,
+ .hw_fini = psp_hw_fini,
+ .suspend = psp_suspend,
+ .resume = psp_resume,
+ .set_clockgating_state = psp_set_clockgating_state,
+ .set_powergating_state = psp_set_powergating_state,
+};
+
+const struct amdgpu_ip_block_version psp_v3_1_ip_block = {
.type = AMD_IP_BLOCK_TYPE_PSP,
.major = 3,
.minor = 1,
@@ -3026,8 +4476,7 @@ const struct amdgpu_ip_block_version psp_v3_1_ip_block =
.funcs = &psp_ip_funcs,
};
-const struct amdgpu_ip_block_version psp_v10_0_ip_block =
-{
+const struct amdgpu_ip_block_version psp_v10_0_ip_block = {
.type = AMD_IP_BLOCK_TYPE_PSP,
.major = 10,
.minor = 0,
@@ -3035,8 +4484,7 @@ const struct amdgpu_ip_block_version psp_v10_0_ip_block =
.funcs = &psp_ip_funcs,
};
-const struct amdgpu_ip_block_version psp_v11_0_ip_block =
-{
+const struct amdgpu_ip_block_version psp_v11_0_ip_block = {
.type = AMD_IP_BLOCK_TYPE_PSP,
.major = 11,
.minor = 0,
@@ -3044,11 +4492,42 @@ const struct amdgpu_ip_block_version psp_v11_0_ip_block =
.funcs = &psp_ip_funcs,
};
-const struct amdgpu_ip_block_version psp_v12_0_ip_block =
-{
+const struct amdgpu_ip_block_version psp_v11_0_8_ip_block = {
+ .type = AMD_IP_BLOCK_TYPE_PSP,
+ .major = 11,
+ .minor = 0,
+ .rev = 8,
+ .funcs = &psp_ip_funcs,
+};
+
+const struct amdgpu_ip_block_version psp_v12_0_ip_block = {
.type = AMD_IP_BLOCK_TYPE_PSP,
.major = 12,
.minor = 0,
.rev = 0,
.funcs = &psp_ip_funcs,
};
+
+const struct amdgpu_ip_block_version psp_v13_0_ip_block = {
+ .type = AMD_IP_BLOCK_TYPE_PSP,
+ .major = 13,
+ .minor = 0,
+ .rev = 0,
+ .funcs = &psp_ip_funcs,
+};
+
+const struct amdgpu_ip_block_version psp_v13_0_4_ip_block = {
+ .type = AMD_IP_BLOCK_TYPE_PSP,
+ .major = 13,
+ .minor = 0,
+ .rev = 4,
+ .funcs = &psp_ip_funcs,
+};
+
+const struct amdgpu_ip_block_version psp_v14_0_ip_block = {
+ .type = AMD_IP_BLOCK_TYPE_PSP,
+ .major = 14,
+ .minor = 0,
+ .rev = 0,
+ .funcs = &psp_ip_funcs,
+};
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h
index cb50ba445f8c..237b624aa51c 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h
@@ -34,32 +34,79 @@
#define PSP_FENCE_BUFFER_SIZE 0x1000
#define PSP_CMD_BUFFER_SIZE 0x1000
-#define PSP_XGMI_SHARED_MEM_SIZE 0x4000
-#define PSP_RAS_SHARED_MEM_SIZE 0x4000
#define PSP_1_MEG 0x100000
-#define PSP_TMR_SIZE 0x400000
-#define PSP_HDCP_SHARED_MEM_SIZE 0x4000
-#define PSP_DTM_SHARED_MEM_SIZE 0x4000
-#define PSP_RAP_SHARED_MEM_SIZE 0x4000
-#define PSP_SECUREDISPLAY_SHARED_MEM_SIZE 0x4000
-#define PSP_SHARED_MEM_SIZE 0x4000
+#define PSP_TMR_SIZE(adev) ((adev)->asic_type == CHIP_ALDEBARAN ? 0x800000 : 0x400000)
+#define PSP_TMR_ALIGNMENT 0x100000
#define PSP_FW_NAME_LEN 0x24
+/* VBIOS gfl defines */
+#define MBOX_READY_MASK 0x80000000
+#define MBOX_STATUS_MASK 0x0000FFFF
+#define MBOX_COMMAND_MASK 0x00FF0000
+#define MBOX_READY_FLAG 0x80000000
+#define C2PMSG_CMD_SPI_UPDATE_ROM_IMAGE_ADDR_LO 0x2
+#define C2PMSG_CMD_SPI_UPDATE_ROM_IMAGE_ADDR_HI 0x3
+#define C2PMSG_CMD_SPI_UPDATE_FLASH_IMAGE 0x4
+#define C2PMSG_CMD_SPI_GET_ROM_IMAGE_ADDR_LO 0xf
+#define C2PMSG_CMD_SPI_GET_ROM_IMAGE_ADDR_HI 0x10
+#define C2PMSG_CMD_SPI_GET_FLASH_IMAGE 0x11
+
+/* Command register bit 31 set to indicate readiness */
+#define MBOX_TOS_READY_FLAG (GFX_FLAG_RESPONSE)
+#define MBOX_TOS_READY_MASK (GFX_CMD_RESPONSE_MASK | GFX_CMD_STATUS_MASK)
+
+/* Values to check for a successful GFX_CMD response wait. Check against
+ * both status bits and response state - helps to detect a command failure
+ * or other unexpected cases like a device drop reading all 0xFFs
+ */
+#define MBOX_TOS_RESP_FLAG (GFX_FLAG_RESPONSE)
+#define MBOX_TOS_RESP_MASK (GFX_CMD_RESPONSE_MASK | GFX_CMD_STATUS_MASK)
+
+extern const struct attribute_group amdgpu_flash_attr_group;
+
+enum psp_shared_mem_size {
+ PSP_ASD_SHARED_MEM_SIZE = 0x0,
+ PSP_XGMI_SHARED_MEM_SIZE = 0x4000,
+ PSP_RAS_SHARED_MEM_SIZE = 0x4000,
+ PSP_HDCP_SHARED_MEM_SIZE = 0x4000,
+ PSP_DTM_SHARED_MEM_SIZE = 0x4000,
+ PSP_RAP_SHARED_MEM_SIZE = 0x4000,
+ PSP_SECUREDISPLAY_SHARED_MEM_SIZE = 0x4000,
+};
+
+enum ta_type_id {
+ TA_TYPE_XGMI = 1,
+ TA_TYPE_RAS,
+ TA_TYPE_HDCP,
+ TA_TYPE_DTM,
+ TA_TYPE_RAP,
+ TA_TYPE_SECUREDISPLAY,
+
+ TA_TYPE_MAX_INDEX,
+};
+
struct psp_context;
struct psp_xgmi_node_info;
struct psp_xgmi_topology_info;
+struct psp_bin_desc;
enum psp_bootloader_cmd {
PSP_BL__LOAD_SYSDRV = 0x10000,
PSP_BL__LOAD_SOSDRV = 0x20000,
PSP_BL__LOAD_KEY_DATABASE = 0x80000,
+ PSP_BL__LOAD_SOCDRV = 0xB0000,
+ PSP_BL__LOAD_DBGDRV = 0xC0000,
+ PSP_BL__LOAD_HADDRV = PSP_BL__LOAD_DBGDRV,
+ PSP_BL__LOAD_INTFDRV = 0xD0000,
+ PSP_BL__LOAD_RASDRV = 0xE0000,
+ PSP_BL__LOAD_IPKEYMGRDRV = 0xF0000,
PSP_BL__DRAM_LONG_TRAIN = 0x100000,
PSP_BL__DRAM_SHORT_TRAIN = 0x200000,
PSP_BL__LOAD_TOS_SPL_TABLE = 0x10000000,
+ PSP_BL__LOAD_SPDMDRV = 0x20000000,
};
-enum psp_ring_type
-{
+enum psp_ring_type {
PSP_RING_TYPE__INVALID = 0,
/*
* These values map to the way the PSP kernel identifies the
@@ -69,13 +116,13 @@ enum psp_ring_type
PSP_RING_TYPE__KM = 2 /* Kernel mode ring (formerly called GPCOM) */
};
-struct psp_ring
-{
+struct psp_ring {
enum psp_ring_type ring_type;
struct psp_gfx_rb_frame *ring_mem;
uint64_t ring_mem_mc_addr;
void *ring_mem_handle;
uint32_t ring_size;
+ uint32_t ring_wptr;
};
/* More registers may will be supported */
@@ -83,17 +130,26 @@ enum psp_reg_prog_id {
PSP_REG_IH_RB_CNTL = 0, /* register IH_RB_CNTL */
PSP_REG_IH_RB_CNTL_RING1 = 1, /* register IH_RB_CNTL_RING1 */
PSP_REG_IH_RB_CNTL_RING2 = 2, /* register IH_RB_CNTL_RING2 */
+ PSP_REG_MMHUB_L1_TLB_CNTL = 25,
PSP_REG_LAST
};
-struct psp_funcs
-{
+#define PSP_WAITREG_CHANGED BIT(0) /* check if the value has changed */
+#define PSP_WAITREG_NOVERBOSE BIT(1) /* No error verbose */
+
+struct psp_funcs {
int (*init_microcode)(struct psp_context *psp);
+ int (*wait_for_bootloader)(struct psp_context *psp);
int (*bootloader_load_kdb)(struct psp_context *psp);
int (*bootloader_load_spl)(struct psp_context *psp);
int (*bootloader_load_sysdrv)(struct psp_context *psp);
+ int (*bootloader_load_soc_drv)(struct psp_context *psp);
+ int (*bootloader_load_intf_drv)(struct psp_context *psp);
+ int (*bootloader_load_dbg_drv)(struct psp_context *psp);
+ int (*bootloader_load_ras_drv)(struct psp_context *psp);
+ int (*bootloader_load_ipkeymgr_drv)(struct psp_context *psp);
+ int (*bootloader_load_spdm_drv)(struct psp_context *psp);
int (*bootloader_load_sos)(struct psp_context *psp);
- int (*ring_init)(struct psp_context *psp, enum psp_ring_type ring_type);
int (*ring_create)(struct psp_context *psp,
enum psp_ring_type ring_type);
int (*ring_stop)(struct psp_context *psp,
@@ -105,8 +161,23 @@ struct psp_funcs
int (*mem_training)(struct psp_context *psp, uint32_t ops);
uint32_t (*ring_get_wptr)(struct psp_context *psp);
void (*ring_set_wptr)(struct psp_context *psp, uint32_t value);
- int (*load_usbc_pd_fw)(struct psp_context *psp, dma_addr_t dma_addr);
+ int (*load_usbc_pd_fw)(struct psp_context *psp, uint64_t fw_pri_mc_addr);
int (*read_usbc_pd_fw)(struct psp_context *psp, uint32_t *fw_ver);
+ int (*update_spirom)(struct psp_context *psp, uint64_t fw_pri_mc_addr);
+ int (*dump_spirom)(struct psp_context *psp, uint64_t fw_pri_mc_addr);
+ int (*vbflash_stat)(struct psp_context *psp);
+ int (*fatal_error_recovery_quirk)(struct psp_context *psp);
+ bool (*get_ras_capability)(struct psp_context *psp);
+ bool (*is_aux_sos_load_required)(struct psp_context *psp);
+ bool (*is_reload_needed)(struct psp_context *psp);
+ int (*reg_program_no_ring)(struct psp_context *psp, uint32_t val,
+ enum psp_reg_prog_id id);
+};
+
+struct ta_funcs {
+ int (*fn_ta_initialize)(struct psp_context *psp);
+ int (*fn_ta_invoke)(struct psp_context *psp, uint32_t ta_cmd_id);
+ int (*fn_ta_terminate)(struct psp_context *psp);
};
#define AMDGPU_XGMI_MAX_CONNECTED_NODES 64
@@ -115,6 +186,8 @@ struct psp_xgmi_node_info {
uint8_t num_hops;
uint8_t is_sharing_enabled;
enum ta_xgmi_assigned_sdma_engine sdma_engine;
+ uint8_t num_links;
+ struct xgmi_connected_port_num port_num[TA_XGMI__MAX_PORT_NUM];
};
struct psp_xgmi_topology_info {
@@ -122,71 +195,53 @@ struct psp_xgmi_topology_info {
struct psp_xgmi_node_info nodes[AMDGPU_XGMI_MAX_CONNECTED_NODES];
};
-struct psp_asd_context {
- bool asd_initialized;
- uint32_t session_id;
+struct psp_bin_desc {
+ uint32_t fw_version;
+ uint32_t feature_version;
+ uint32_t size_bytes;
+ uint8_t *start_addr;
};
-struct psp_xgmi_context {
- uint8_t initialized;
- uint32_t session_id;
- struct amdgpu_bo *xgmi_shared_bo;
- uint64_t xgmi_shared_mc_addr;
- void *xgmi_shared_buf;
- struct psp_xgmi_topology_info top_info;
+struct ta_mem_context {
+ struct amdgpu_bo *shared_bo;
+ uint64_t shared_mc_addr;
+ void *shared_buf;
+ enum psp_shared_mem_size shared_mem_size;
};
-struct psp_ras_context {
- /*ras fw*/
- bool ras_initialized;
+struct ta_context {
+ bool initialized;
uint32_t session_id;
- struct amdgpu_bo *ras_shared_bo;
- uint64_t ras_shared_mc_addr;
- void *ras_shared_buf;
- struct amdgpu_ras *ras;
+ uint32_t resp_status;
+ struct ta_mem_context mem_context;
+ struct psp_bin_desc bin_desc;
+ enum psp_gfx_cmd_id ta_load_type;
+ enum ta_type_id ta_type;
};
-struct psp_hdcp_context {
- bool hdcp_initialized;
- uint32_t session_id;
- struct amdgpu_bo *hdcp_shared_bo;
- uint64_t hdcp_shared_mc_addr;
- void *hdcp_shared_buf;
- struct mutex mutex;
-};
-
-struct psp_dtm_context {
- bool dtm_initialized;
- uint32_t session_id;
- struct amdgpu_bo *dtm_shared_bo;
- uint64_t dtm_shared_mc_addr;
- void *dtm_shared_buf;
- struct mutex mutex;
+struct ta_cp_context {
+ struct ta_context context;
+ struct mutex mutex;
};
-struct psp_rap_context {
- bool rap_initialized;
- uint32_t session_id;
- struct amdgpu_bo *rap_shared_bo;
- uint64_t rap_shared_mc_addr;
- void *rap_shared_buf;
- struct mutex mutex;
+struct psp_xgmi_context {
+ struct ta_context context;
+ struct psp_xgmi_topology_info top_info;
+ bool supports_extended_data;
+ uint8_t xgmi_ta_caps;
};
-struct psp_securedisplay_context {
- bool securedisplay_initialized;
- uint32_t session_id;
- struct amdgpu_bo *securedisplay_shared_bo;
- uint64_t securedisplay_shared_mc_addr;
- void *securedisplay_shared_buf;
- struct mutex mutex;
+struct psp_ras_context {
+ struct ta_context context;
+ struct amdgpu_ras *ras;
+ struct mutex mutex;
};
#define MEM_TRAIN_SYSTEM_SIGNATURE 0x54534942
#define GDDR6_MEM_TRAINING_DATA_SIZE_IN_BYTES 0x1000
#define GDDR6_MEM_TRAINING_OFFSET 0x8000
/*Define the VRAM size that will be encroached by BIST training.*/
-#define GDDR6_MEM_TRAINING_ENCROACHED_SIZE 0x2000000
+#define BIST_MEM_TRAINING_ENCROACHED_SIZE 0x2000000
enum psp_memory_training_init_flag {
PSP_MEM_TRAIN_NOT_SUPPORT = 0x0,
@@ -224,15 +279,91 @@ struct psp_memory_training_context {
enum psp_memory_training_init_flag init;
u32 training_cnt;
+ bool enable_mem_training;
+};
+
+/** PSP runtime DB **/
+#define PSP_RUNTIME_DB_SIZE_IN_BYTES 0x10000
+#define PSP_RUNTIME_DB_OFFSET 0x100000
+#define PSP_RUNTIME_DB_COOKIE_ID 0x0ed5
+#define PSP_RUNTIME_DB_VER_1 0x0100
+#define PSP_RUNTIME_DB_DIAG_ENTRY_MAX_COUNT 0x40
+
+enum psp_runtime_entry_type {
+ PSP_RUNTIME_ENTRY_TYPE_INVALID = 0x0,
+ PSP_RUNTIME_ENTRY_TYPE_TEST = 0x1,
+ PSP_RUNTIME_ENTRY_TYPE_MGPU_COMMON = 0x2, /* Common mGPU runtime data */
+ PSP_RUNTIME_ENTRY_TYPE_MGPU_WAFL = 0x3, /* WAFL runtime data */
+ PSP_RUNTIME_ENTRY_TYPE_MGPU_XGMI = 0x4, /* XGMI runtime data */
+ PSP_RUNTIME_ENTRY_TYPE_BOOT_CONFIG = 0x5, /* Boot Config runtime data */
+ PSP_RUNTIME_ENTRY_TYPE_PPTABLE_ERR_STATUS = 0x6, /* SCPM validation data */
+};
+
+/* PSP runtime DB header */
+struct psp_runtime_data_header {
+ /* determine the existence of runtime db */
+ uint16_t cookie;
+ /* version of runtime db */
+ uint16_t version;
+};
+
+/* PSP runtime DB entry */
+struct psp_runtime_entry {
+ /* type of runtime db entry */
+ uint32_t entry_type;
+ /* offset of entry in bytes */
+ uint16_t offset;
+ /* size of entry in bytes */
+ uint16_t size;
+};
+
+/* PSP runtime DB directory */
+struct psp_runtime_data_directory {
+ /* number of valid entries */
+ uint16_t entry_count;
+ /* db entries*/
+ struct psp_runtime_entry entry_list[PSP_RUNTIME_DB_DIAG_ENTRY_MAX_COUNT];
+};
+
+/* PSP runtime DB boot config feature bitmask */
+enum psp_runtime_boot_cfg_feature {
+ BOOT_CFG_FEATURE_GECC = 0x1,
+ BOOT_CFG_FEATURE_TWO_STAGE_DRAM_TRAINING = 0x2,
+};
+
+/* PSP run time DB SCPM authentication defines */
+enum psp_runtime_scpm_authentication {
+ SCPM_DISABLE = 0x0,
+ SCPM_ENABLE = 0x1,
+ SCPM_ENABLE_WITH_SCPM_ERR = 0x2,
};
-struct psp_context
-{
- struct amdgpu_device *adev;
- struct psp_ring km_ring;
+/* PSP runtime DB boot config entry */
+struct psp_runtime_boot_cfg_entry {
+ uint32_t boot_cfg_bitmask;
+ uint32_t reserved;
+};
+
+/* PSP runtime DB SCPM entry */
+struct psp_runtime_scpm_entry {
+ enum psp_runtime_scpm_authentication scpm_status;
+};
+
+#if defined(CONFIG_DEBUG_FS)
+struct spirom_bo {
+ struct amdgpu_bo *bo;
+ uint64_t mc_addr;
+ void *cpu_addr;
+};
+#endif
+
+struct psp_context {
+ struct amdgpu_device *adev;
+ struct psp_ring km_ring;
struct psp_gfx_cmd_resp *cmd;
const struct psp_funcs *funcs;
+ const struct ta_funcs *ta_funcs;
/* firmware buffer */
struct amdgpu_bo *fw_pri_bo;
@@ -241,18 +372,18 @@ struct psp_context
/* sos firmware */
const struct firmware *sos_fw;
- uint32_t sos_fw_version;
- uint32_t sos_feature_version;
- uint32_t sys_bin_size;
- uint32_t sos_bin_size;
- uint32_t toc_bin_size;
- uint32_t kdb_bin_size;
- uint32_t spl_bin_size;
- uint8_t *sys_start_addr;
- uint8_t *sos_start_addr;
- uint8_t *toc_start_addr;
- uint8_t *kdb_start_addr;
- uint8_t *spl_start_addr;
+ struct psp_bin_desc sys;
+ struct psp_bin_desc sos;
+ struct psp_bin_desc toc;
+ struct psp_bin_desc kdb;
+ struct psp_bin_desc spl;
+ struct psp_bin_desc rl;
+ struct psp_bin_desc soc_drv;
+ struct psp_bin_desc intf_drv;
+ struct psp_bin_desc dbg_drv;
+ struct psp_bin_desc ras_drv;
+ struct psp_bin_desc ipkeymgr_drv;
+ struct psp_bin_desc spdm_drv;
/* tmr buffer */
struct amdgpu_bo *tmr_bo;
@@ -260,15 +391,12 @@ struct psp_context
/* asd firmware */
const struct firmware *asd_fw;
- uint32_t asd_fw_version;
- uint32_t asd_feature_version;
- uint32_t asd_ucode_size;
- uint8_t *asd_start_addr;
/* toc firmware */
const struct firmware *toc_fw;
- uint32_t toc_fw_version;
- uint32_t toc_feature_version;
+
+ /* cap firmware */
+ const struct firmware *cap_fw;
/* fence buffer */
struct amdgpu_bo *fence_buf_bo;
@@ -284,44 +412,41 @@ struct psp_context
atomic_t fence_value;
/* flag to mark whether gfx fw autoload is supported or not */
bool autoload_supported;
+ /* flag to mark whether psp use runtime TMR or boottime TMR */
+ bool boot_time_tmr;
/* flag to mark whether df cstate management centralized to PMFW */
bool pmfw_centralized_cstate_management;
/* xgmi ta firmware and buffer */
const struct firmware *ta_fw;
uint32_t ta_fw_version;
- uint32_t ta_xgmi_ucode_version;
- uint32_t ta_xgmi_ucode_size;
- uint8_t *ta_xgmi_start_addr;
- uint32_t ta_ras_ucode_version;
- uint32_t ta_ras_ucode_size;
- uint8_t *ta_ras_start_addr;
-
- uint32_t ta_hdcp_ucode_version;
- uint32_t ta_hdcp_ucode_size;
- uint8_t *ta_hdcp_start_addr;
-
- uint32_t ta_dtm_ucode_version;
- uint32_t ta_dtm_ucode_size;
- uint8_t *ta_dtm_start_addr;
-
- uint32_t ta_rap_ucode_version;
- uint32_t ta_rap_ucode_size;
- uint8_t *ta_rap_start_addr;
-
- uint32_t ta_securedisplay_ucode_version;
- uint32_t ta_securedisplay_ucode_size;
- uint8_t *ta_securedisplay_start_addr;
-
- struct psp_asd_context asd_context;
+
+ uint32_t cap_fw_version;
+ uint32_t cap_feature_version;
+ uint32_t cap_ucode_size;
+
+ struct ta_context asd_context;
struct psp_xgmi_context xgmi_context;
- struct psp_ras_context ras;
- struct psp_hdcp_context hdcp_context;
- struct psp_dtm_context dtm_context;
- struct psp_rap_context rap_context;
- struct psp_securedisplay_context securedisplay_context;
+ struct psp_ras_context ras_context;
+ struct ta_cp_context hdcp_context;
+ struct ta_cp_context dtm_context;
+ struct ta_cp_context rap_context;
+ struct ta_cp_context securedisplay_context;
struct mutex mutex;
struct psp_memory_training_context mem_train_ctx;
+
+ uint32_t boot_cfg_bitmask;
+
+ /* firmware upgrades supported */
+ bool sup_pd_fw_up;
+ bool sup_ifwi_up;
+
+ char *vbflash_tmp_buf;
+ size_t vbflash_image_size;
+ bool vbflash_done;
+#if defined(CONFIG_DEBUG_FS)
+ struct spirom_bo *spirom_dump_trip;
+#endif
};
struct amdgpu_psp_funcs {
@@ -330,7 +455,6 @@ struct amdgpu_psp_funcs {
};
-#define psp_ring_init(psp, type) (psp)->funcs->ring_init((psp), (type))
#define psp_ring_create(psp, type) (psp)->funcs->ring_create((psp), (type))
#define psp_ring_stop(psp, type) (psp)->funcs->ring_stop((psp), (type))
#define psp_ring_destroy(psp, type) ((psp)->funcs->ring_destroy((psp), (type)))
@@ -342,6 +466,21 @@ struct amdgpu_psp_funcs {
((psp)->funcs->bootloader_load_spl ? (psp)->funcs->bootloader_load_spl((psp)) : 0)
#define psp_bootloader_load_sysdrv(psp) \
((psp)->funcs->bootloader_load_sysdrv ? (psp)->funcs->bootloader_load_sysdrv((psp)) : 0)
+#define psp_bootloader_load_soc_drv(psp) \
+ ((psp)->funcs->bootloader_load_soc_drv ? (psp)->funcs->bootloader_load_soc_drv((psp)) : 0)
+#define psp_bootloader_load_intf_drv(psp) \
+ ((psp)->funcs->bootloader_load_intf_drv ? (psp)->funcs->bootloader_load_intf_drv((psp)) : 0)
+#define psp_bootloader_load_dbg_drv(psp) \
+ ((psp)->funcs->bootloader_load_dbg_drv ? (psp)->funcs->bootloader_load_dbg_drv((psp)) : 0)
+#define psp_bootloader_load_ras_drv(psp) \
+ ((psp)->funcs->bootloader_load_ras_drv ? \
+ (psp)->funcs->bootloader_load_ras_drv((psp)) : 0)
+#define psp_bootloader_load_ipkeymgr_drv(psp) \
+ ((psp)->funcs->bootloader_load_ipkeymgr_drv ? \
+ (psp)->funcs->bootloader_load_ipkeymgr_drv((psp)) : 0)
+#define psp_bootloader_load_spdm_drv(psp) \
+ ((psp)->funcs->bootloader_load_spdm_drv ? \
+ (psp)->funcs->bootloader_load_spdm_drv((psp)) : 0)
#define psp_bootloader_load_sos(psp) \
((psp)->funcs->bootloader_load_sos ? (psp)->funcs->bootloader_load_sos((psp)) : 0)
#define psp_smu_reload_quirk(psp) \
@@ -354,53 +493,97 @@ struct amdgpu_psp_funcs {
#define psp_ring_get_wptr(psp) (psp)->funcs->ring_get_wptr((psp))
#define psp_ring_set_wptr(psp, value) (psp)->funcs->ring_set_wptr((psp), (value))
-#define psp_load_usbc_pd_fw(psp, dma_addr) \
+#define psp_load_usbc_pd_fw(psp, fw_pri_mc_addr) \
((psp)->funcs->load_usbc_pd_fw ? \
- (psp)->funcs->load_usbc_pd_fw((psp), (dma_addr)) : -EINVAL)
+ (psp)->funcs->load_usbc_pd_fw((psp), (fw_pri_mc_addr)) : -EINVAL)
#define psp_read_usbc_pd_fw(psp, fw_ver) \
((psp)->funcs->read_usbc_pd_fw ? \
(psp)->funcs->read_usbc_pd_fw((psp), fw_ver) : -EINVAL)
+#define psp_update_spirom(psp, fw_pri_mc_addr) \
+ ((psp)->funcs->update_spirom ? \
+ (psp)->funcs->update_spirom((psp), fw_pri_mc_addr) : -EINVAL)
+
+#define psp_dump_spirom(psp, fw_pri_mc_addr) \
+ ((psp)->funcs->dump_spirom ? \
+ (psp)->funcs->dump_spirom((psp), fw_pri_mc_addr) : -EINVAL)
+
+#define psp_vbflash_status(psp) \
+ ((psp)->funcs->vbflash_stat ? \
+ (psp)->funcs->vbflash_stat((psp)) : -EINVAL)
+
+#define psp_fatal_error_recovery_quirk(psp) \
+ ((psp)->funcs->fatal_error_recovery_quirk ? \
+ (psp)->funcs->fatal_error_recovery_quirk((psp)) : 0)
+
+#define psp_is_aux_sos_load_required(psp) \
+ ((psp)->funcs->is_aux_sos_load_required ? (psp)->funcs->is_aux_sos_load_required((psp)) : 0)
+
+#define psp_reg_program_no_ring(psp, val, id) \
+ ((psp)->funcs->reg_program_no_ring ? \
+ (psp)->funcs->reg_program_no_ring((psp), val, id) : -EINVAL)
+
extern const struct amd_ip_funcs psp_ip_funcs;
extern const struct amdgpu_ip_block_version psp_v3_1_ip_block;
-extern int psp_wait_for(struct psp_context *psp, uint32_t reg_index,
- uint32_t field_val, uint32_t mask, bool check_changed);
-
extern const struct amdgpu_ip_block_version psp_v10_0_ip_block;
+extern const struct amdgpu_ip_block_version psp_v11_0_ip_block;
+extern const struct amdgpu_ip_block_version psp_v11_0_8_ip_block;
extern const struct amdgpu_ip_block_version psp_v12_0_ip_block;
+extern const struct amdgpu_ip_block_version psp_v13_0_ip_block;
+extern const struct amdgpu_ip_block_version psp_v13_0_4_ip_block;
+extern const struct amdgpu_ip_block_version psp_v14_0_ip_block;
+
+int psp_wait_for(struct psp_context *psp, uint32_t reg_index,
+ uint32_t field_val, uint32_t mask, uint32_t flags);
+extern int psp_wait_for_spirom_update(struct psp_context *psp, uint32_t reg_index,
+ uint32_t field_val, uint32_t mask, uint32_t msec_timeout);
+
+int psp_execute_ip_fw_load(struct psp_context *psp,
+ struct amdgpu_firmware_info *ucode);
int psp_gpu_reset(struct amdgpu_device *adev);
-int psp_update_vcn_sram(struct amdgpu_device *adev, int inst_idx,
- uint64_t cmd_gpu_addr, int cmd_size);
-int psp_xgmi_initialize(struct psp_context *psp);
+int psp_ta_init_shared_buf(struct psp_context *psp,
+ struct ta_mem_context *mem_ctx);
+void psp_ta_free_shared_buf(struct ta_mem_context *mem_ctx);
+int psp_ta_unload(struct psp_context *psp, struct ta_context *context);
+int psp_ta_load(struct psp_context *psp, struct ta_context *context);
+int psp_ta_invoke(struct psp_context *psp,
+ uint32_t ta_cmd_id,
+ struct ta_context *context);
+
+int psp_xgmi_initialize(struct psp_context *psp, bool set_extended_data, bool load_ta);
int psp_xgmi_terminate(struct psp_context *psp);
int psp_xgmi_invoke(struct psp_context *psp, uint32_t ta_cmd_id);
int psp_xgmi_get_hive_id(struct psp_context *psp, uint64_t *hive_id);
int psp_xgmi_get_node_id(struct psp_context *psp, uint64_t *node_id);
int psp_xgmi_get_topology_info(struct psp_context *psp,
int number_devices,
- struct psp_xgmi_topology_info *topology);
+ struct psp_xgmi_topology_info *topology,
+ bool get_extended_data);
int psp_xgmi_set_topology_info(struct psp_context *psp,
int number_devices,
struct psp_xgmi_topology_info *topology);
-
+int psp_ras_initialize(struct psp_context *psp);
int psp_ras_invoke(struct psp_context *psp, uint32_t ta_cmd_id);
int psp_ras_enable_features(struct psp_context *psp,
union ta_ras_cmd_input *info, bool enable);
int psp_ras_trigger_error(struct psp_context *psp,
- struct ta_ras_trigger_error_input *info);
+ struct ta_ras_trigger_error_input *info, uint32_t instance_mask);
+int psp_ras_terminate(struct psp_context *psp);
+int psp_ras_query_address(struct psp_context *psp,
+ struct ta_ras_query_address_input *addr_in,
+ struct ta_ras_query_address_output *addr_out);
int psp_hdcp_invoke(struct psp_context *psp, uint32_t ta_cmd_id);
int psp_dtm_invoke(struct psp_context *psp, uint32_t ta_cmd_id);
-int psp_rap_invoke(struct psp_context *psp, uint32_t ta_cmd_id);
+int psp_rap_invoke(struct psp_context *psp, uint32_t ta_cmd_id, enum ta_rap_status *status);
int psp_securedisplay_invoke(struct psp_context *psp, uint32_t ta_cmd_id);
int psp_rlc_autoload_start(struct psp_context *psp);
-extern const struct amdgpu_ip_block_version psp_v11_0_ip_block;
int psp_reg_program(struct psp_context *psp, enum psp_reg_prog_id reg,
uint32_t value);
int psp_ring_cmd_submit(struct psp_context *psp,
@@ -415,6 +598,29 @@ int psp_init_sos_microcode(struct psp_context *psp,
const char *chip_name);
int psp_init_ta_microcode(struct psp_context *psp,
const char *chip_name);
+int psp_init_cap_microcode(struct psp_context *psp,
+ const char *chip_name);
int psp_get_fw_attestation_records_addr(struct psp_context *psp,
uint64_t *output_ptr);
+int psp_update_fw_reservation(struct psp_context *psp);
+int psp_load_fw_list(struct psp_context *psp,
+ struct amdgpu_firmware_info **ucode_list, int ucode_count);
+void psp_copy_fw(struct psp_context *psp, uint8_t *start_addr, uint32_t bin_size);
+
+int psp_spatial_partition(struct psp_context *psp, int mode);
+int psp_memory_partition(struct psp_context *psp, int mode);
+
+int is_psp_fw_valid(struct psp_bin_desc bin);
+
+int amdgpu_psp_wait_for_bootloader(struct amdgpu_device *adev);
+bool amdgpu_psp_get_ras_capability(struct psp_context *psp);
+
+int psp_config_sq_perfmon(struct psp_context *psp, uint32_t xcp_id,
+ bool core_override_enable, bool reg_override_enable, bool perfmon_override_enable);
+bool amdgpu_psp_tos_reload_needed(struct amdgpu_device *adev);
+int amdgpu_psp_reg_program_no_ring(struct psp_context *psp, uint32_t val,
+ enum psp_reg_prog_id id);
+void amdgpu_psp_debugfs_init(struct amdgpu_device *adev);
+
+
#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp_ta.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp_ta.c
new file mode 100644
index 000000000000..6e8aad91bcd3
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp_ta.c
@@ -0,0 +1,392 @@
+/*
+ * Copyright 2022 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#include "amdgpu.h"
+#include "amdgpu_psp_ta.h"
+
+#if defined(CONFIG_DEBUG_FS)
+
+static ssize_t ta_if_load_debugfs_write(struct file *fp, const char *buf,
+ size_t len, loff_t *off);
+static ssize_t ta_if_unload_debugfs_write(struct file *fp, const char *buf,
+ size_t len, loff_t *off);
+static ssize_t ta_if_invoke_debugfs_write(struct file *fp, const char *buf,
+ size_t len, loff_t *off);
+
+static uint32_t get_bin_version(const uint8_t *bin)
+{
+ const struct common_firmware_header *hdr =
+ (const struct common_firmware_header *)bin;
+
+ return hdr->ucode_version;
+}
+
+static int prep_ta_mem_context(struct ta_mem_context *mem_context,
+ uint8_t *shared_buf,
+ uint32_t shared_buf_len)
+{
+ if (mem_context->shared_mem_size < shared_buf_len)
+ return -EINVAL;
+ memset(mem_context->shared_buf, 0, mem_context->shared_mem_size);
+ memcpy((void *)mem_context->shared_buf, shared_buf, shared_buf_len);
+
+ return 0;
+}
+
+static bool is_ta_type_valid(enum ta_type_id ta_type)
+{
+ switch (ta_type) {
+ case TA_TYPE_RAS:
+ return true;
+ default:
+ return false;
+ }
+}
+
+static const struct ta_funcs ras_ta_funcs = {
+ .fn_ta_initialize = psp_ras_initialize,
+ .fn_ta_invoke = psp_ras_invoke,
+ .fn_ta_terminate = psp_ras_terminate
+};
+
+static void set_ta_context_funcs(struct psp_context *psp,
+ enum ta_type_id ta_type,
+ struct ta_context **pcontext)
+{
+ switch (ta_type) {
+ case TA_TYPE_RAS:
+ *pcontext = &psp->ras_context.context;
+ psp->ta_funcs = &ras_ta_funcs;
+ break;
+ default:
+ break;
+ }
+}
+
+static const struct file_operations ta_load_debugfs_fops = {
+ .write = ta_if_load_debugfs_write,
+ .llseek = default_llseek,
+ .owner = THIS_MODULE
+};
+
+static const struct file_operations ta_unload_debugfs_fops = {
+ .write = ta_if_unload_debugfs_write,
+ .llseek = default_llseek,
+ .owner = THIS_MODULE
+};
+
+static const struct file_operations ta_invoke_debugfs_fops = {
+ .write = ta_if_invoke_debugfs_write,
+ .llseek = default_llseek,
+ .owner = THIS_MODULE
+};
+
+/*
+ * DOC: AMDGPU TA debugfs interfaces
+ *
+ * Three debugfs interfaces can be opened by a program to
+ * load/invoke/unload TA,
+ *
+ * - /sys/kernel/debug/dri/<N>/ta_if/ta_load
+ * - /sys/kernel/debug/dri/<N>/ta_if/ta_invoke
+ * - /sys/kernel/debug/dri/<N>/ta_if/ta_unload
+ *
+ * How to use the interfaces in a program?
+ *
+ * A program needs to provide transmit buffer to the interfaces
+ * and will receive buffer from the interfaces below,
+ *
+ * - For TA load debugfs interface:
+ * Transmit buffer:
+ * - TA type (4bytes)
+ * - TA bin length (4bytes)
+ * - TA bin
+ * Receive buffer:
+ * - TA ID (4bytes)
+ *
+ * - For TA invoke debugfs interface:
+ * Transmit buffer:
+ * - TA type (4bytes)
+ * - TA ID (4bytes)
+ * - TA CMD ID (4bytes)
+ * - TA shard buf length
+ * (4bytes, value not beyond TA shared memory size)
+ * - TA shared buf
+ * Receive buffer:
+ * - TA shared buf
+ *
+ * - For TA unload debugfs interface:
+ * Transmit buffer:
+ * - TA type (4bytes)
+ * - TA ID (4bytes)
+ */
+
+static ssize_t ta_if_load_debugfs_write(struct file *fp, const char *buf, size_t len, loff_t *off)
+{
+ uint32_t ta_type = 0;
+ uint32_t ta_bin_len = 0;
+ uint8_t *ta_bin = NULL;
+ uint32_t copy_pos = 0;
+ int ret = 0;
+
+ struct amdgpu_device *adev = (struct amdgpu_device *)file_inode(fp)->i_private;
+ struct psp_context *psp = &adev->psp;
+ struct ta_context *context = NULL;
+
+ if (!buf)
+ return -EINVAL;
+
+ ret = copy_from_user((void *)&ta_type, &buf[copy_pos], sizeof(uint32_t));
+ if (ret || (!is_ta_type_valid(ta_type)))
+ return -EFAULT;
+
+ copy_pos += sizeof(uint32_t);
+
+ ret = copy_from_user((void *)&ta_bin_len, &buf[copy_pos], sizeof(uint32_t));
+ if (ret)
+ return -EFAULT;
+
+ if (ta_bin_len > PSP_1_MEG)
+ return -EINVAL;
+
+ copy_pos += sizeof(uint32_t);
+
+ ta_bin = memdup_user(&buf[copy_pos], ta_bin_len);
+ if (IS_ERR(ta_bin))
+ return PTR_ERR(ta_bin);
+
+ /* Set TA context and functions */
+ set_ta_context_funcs(psp, ta_type, &context);
+
+ if (!psp->ta_funcs || !psp->ta_funcs->fn_ta_terminate) {
+ dev_err(adev->dev, "Unsupported function to terminate TA\n");
+ ret = -EOPNOTSUPP;
+ goto err_free_bin;
+ }
+
+ /*
+ * Allocate TA shared buf in case shared buf was freed
+ * due to loading TA failed before.
+ */
+ if (!context->mem_context.shared_buf) {
+ ret = psp_ta_init_shared_buf(psp, &context->mem_context);
+ if (ret) {
+ ret = -ENOMEM;
+ goto err_free_bin;
+ }
+ }
+
+ ret = psp_fn_ta_terminate(psp);
+ if (ret || context->resp_status) {
+ dev_err(adev->dev,
+ "Failed to unload embedded TA (%d) and status (0x%X)\n",
+ ret, context->resp_status);
+ if (!ret)
+ ret = -EINVAL;
+ goto err_free_ta_shared_buf;
+ }
+
+ /* Prepare TA context for TA initialization */
+ context->ta_type = ta_type;
+ context->bin_desc.fw_version = get_bin_version(ta_bin);
+ context->bin_desc.size_bytes = ta_bin_len;
+ context->bin_desc.start_addr = ta_bin;
+
+ if (!psp->ta_funcs->fn_ta_initialize) {
+ dev_err(adev->dev, "Unsupported function to initialize TA\n");
+ ret = -EOPNOTSUPP;
+ goto err_free_ta_shared_buf;
+ }
+
+ ret = psp_fn_ta_initialize(psp);
+ if (ret || context->resp_status) {
+ dev_err(adev->dev, "Failed to load TA via debugfs (%d) and status (0x%X)\n",
+ ret, context->resp_status);
+ if (!ret)
+ ret = -EINVAL;
+ goto err_free_ta_shared_buf;
+ }
+
+ if (copy_to_user((char *)buf, (void *)&context->session_id, sizeof(uint32_t)))
+ ret = -EFAULT;
+
+err_free_ta_shared_buf:
+ /* Only free TA shared buf when returns error code */
+ if (ret && context->mem_context.shared_buf)
+ psp_ta_free_shared_buf(&context->mem_context);
+err_free_bin:
+ kfree(ta_bin);
+
+ return ret;
+}
+
+static ssize_t ta_if_unload_debugfs_write(struct file *fp, const char *buf, size_t len, loff_t *off)
+{
+ uint32_t ta_type = 0;
+ uint32_t ta_id = 0;
+ uint32_t copy_pos = 0;
+ int ret = 0;
+
+ struct amdgpu_device *adev = (struct amdgpu_device *)file_inode(fp)->i_private;
+ struct psp_context *psp = &adev->psp;
+ struct ta_context *context = NULL;
+
+ if (!buf)
+ return -EINVAL;
+
+ ret = copy_from_user((void *)&ta_type, &buf[copy_pos], sizeof(uint32_t));
+ if (ret || (!is_ta_type_valid(ta_type)))
+ return -EFAULT;
+
+ copy_pos += sizeof(uint32_t);
+
+ ret = copy_from_user((void *)&ta_id, &buf[copy_pos], sizeof(uint32_t));
+ if (ret)
+ return -EFAULT;
+
+ set_ta_context_funcs(psp, ta_type, &context);
+ context->session_id = ta_id;
+
+ if (!psp->ta_funcs || !psp->ta_funcs->fn_ta_terminate) {
+ dev_err(adev->dev, "Unsupported function to terminate TA\n");
+ return -EOPNOTSUPP;
+ }
+
+ ret = psp_fn_ta_terminate(psp);
+ if (ret || context->resp_status) {
+ dev_err(adev->dev, "Failed to unload TA via debugfs (%d) and status (0x%X)\n",
+ ret, context->resp_status);
+ if (!ret)
+ ret = -EINVAL;
+ }
+
+ if (context->mem_context.shared_buf)
+ psp_ta_free_shared_buf(&context->mem_context);
+
+ return ret;
+}
+
+static ssize_t ta_if_invoke_debugfs_write(struct file *fp, const char *buf, size_t len, loff_t *off)
+{
+ uint32_t ta_type = 0;
+ uint32_t ta_id = 0;
+ uint32_t cmd_id = 0;
+ uint32_t shared_buf_len = 0;
+ uint8_t *shared_buf = NULL;
+ uint32_t copy_pos = 0;
+ int ret = 0;
+
+ struct amdgpu_device *adev = (struct amdgpu_device *)file_inode(fp)->i_private;
+ struct psp_context *psp = &adev->psp;
+ struct ta_context *context = NULL;
+
+ if (!buf)
+ return -EINVAL;
+
+ ret = copy_from_user((void *)&ta_type, &buf[copy_pos], sizeof(uint32_t));
+ if (ret)
+ return -EFAULT;
+ copy_pos += sizeof(uint32_t);
+
+ ret = copy_from_user((void *)&ta_id, &buf[copy_pos], sizeof(uint32_t));
+ if (ret)
+ return -EFAULT;
+ copy_pos += sizeof(uint32_t);
+
+ ret = copy_from_user((void *)&cmd_id, &buf[copy_pos], sizeof(uint32_t));
+ if (ret)
+ return -EFAULT;
+ copy_pos += sizeof(uint32_t);
+
+ ret = copy_from_user((void *)&shared_buf_len, &buf[copy_pos], sizeof(uint32_t));
+ if (ret)
+ return -EFAULT;
+ copy_pos += sizeof(uint32_t);
+
+ shared_buf = memdup_user(&buf[copy_pos], shared_buf_len);
+ if (IS_ERR(shared_buf))
+ return PTR_ERR(shared_buf);
+
+ set_ta_context_funcs(psp, ta_type, &context);
+
+ if (!context || !context->initialized) {
+ dev_err(adev->dev, "TA is not initialized\n");
+ ret = -EINVAL;
+ goto err_free_shared_buf;
+ }
+
+ if (!psp->ta_funcs || !psp->ta_funcs->fn_ta_invoke) {
+ dev_err(adev->dev, "Unsupported function to invoke TA\n");
+ ret = -EOPNOTSUPP;
+ goto err_free_shared_buf;
+ }
+
+ context->session_id = ta_id;
+
+ mutex_lock(&psp->ras_context.mutex);
+ ret = prep_ta_mem_context(&context->mem_context, shared_buf, shared_buf_len);
+ if (ret)
+ goto err_free_shared_buf;
+
+ ret = psp_fn_ta_invoke(psp, cmd_id);
+ if (ret || context->resp_status) {
+ dev_err(adev->dev, "Failed to invoke TA via debugfs (%d) and status (0x%X)\n",
+ ret, context->resp_status);
+ if (!ret) {
+ ret = -EINVAL;
+ goto err_free_shared_buf;
+ }
+ }
+
+ if (copy_to_user((char *)&buf[copy_pos], context->mem_context.shared_buf, shared_buf_len))
+ ret = -EFAULT;
+
+err_free_shared_buf:
+ mutex_unlock(&psp->ras_context.mutex);
+ kfree(shared_buf);
+
+ return ret;
+}
+
+void amdgpu_ta_if_debugfs_init(struct amdgpu_device *adev)
+{
+ struct drm_minor *minor = adev_to_drm(adev)->primary;
+
+ struct dentry *dir = debugfs_create_dir("ta_if", minor->debugfs_root);
+
+ debugfs_create_file("ta_load", 0200, dir, adev,
+ &ta_load_debugfs_fops);
+
+ debugfs_create_file("ta_unload", 0200, dir,
+ adev, &ta_unload_debugfs_fops);
+
+ debugfs_create_file("ta_invoke", 0200, dir,
+ adev, &ta_invoke_debugfs_fops);
+}
+
+#else
+void amdgpu_ta_if_debugfs_init(struct amdgpu_device *adev)
+{
+
+}
+#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp_ta.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp_ta.h
new file mode 100644
index 000000000000..14cd1c81c3e6
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp_ta.h
@@ -0,0 +1,34 @@
+/*
+ * Copyright 2022 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#ifndef __AMDGPU_PSP_TA_H__
+#define __AMDGPU_PSP_TA_H__
+
+/* Calling set_ta_context_funcs is required before using the following macros */
+#define psp_fn_ta_initialize(psp) ((psp)->ta_funcs->fn_ta_initialize((psp)))
+#define psp_fn_ta_invoke(psp, ta_cmd_id) ((psp)->ta_funcs->fn_ta_invoke((psp), (ta_cmd_id)))
+#define psp_fn_ta_terminate(psp) ((psp)->ta_funcs->fn_ta_terminate((psp)))
+
+void amdgpu_ta_if_debugfs_init(struct amdgpu_device *adev);
+
+#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_rap.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_rap.c
index 8da5356c36f1..bacf888735db 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_rap.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_rap.c
@@ -48,6 +48,7 @@ static ssize_t amdgpu_rap_debugfs_write(struct file *f, const char __user *buf,
struct ta_rap_cmd_output_data *rap_cmd_output;
struct drm_device *dev = adev_to_drm(adev);
uint32_t op;
+ enum ta_rap_status status;
int ret;
if (*pos || size != 2)
@@ -70,13 +71,12 @@ static ssize_t amdgpu_rap_debugfs_write(struct file *f, const char __user *buf,
switch (op) {
case 2:
- ret = psp_rap_invoke(&adev->psp, op);
-
- if (ret == TA_RAP_STATUS__SUCCESS) {
+ ret = psp_rap_invoke(&adev->psp, op, &status);
+ if (!ret && status == TA_RAP_STATUS__SUCCESS) {
dev_info(adev->dev, "RAP L0 validate test success.\n");
} else {
rap_shared_mem = (struct ta_rap_shared_memory *)
- adev->psp.rap_context.rap_shared_buf;
+ adev->psp.rap_context.context.mem_context.shared_buf;
rap_cmd_output = &(rap_shared_mem->rap_out_message.output);
dev_info(adev->dev, "RAP test failed, the output is:\n");
@@ -97,10 +97,10 @@ static ssize_t amdgpu_rap_debugfs_write(struct file *f, const char __user *buf,
default:
dev_info(adev->dev, "Unsupported op id: %d, ", op);
dev_info(adev->dev, "Only support op 2(L0 validate test).\n");
+ break;
}
amdgpu_gfx_off_ctrl(adev, true);
- pm_runtime_mark_last_busy(dev->dev);
pm_runtime_put_autosuspend(dev->dev);
return size;
@@ -115,13 +115,11 @@ static const struct file_operations amdgpu_rap_debugfs_ops = {
void amdgpu_rap_debugfs_init(struct amdgpu_device *adev)
{
-#if defined(CONFIG_DEBUG_FS)
struct drm_minor *minor = adev_to_drm(adev)->primary;
- if (!adev->psp.rap_context.rap_initialized)
+ if (!adev->psp.rap_context.context.initialized)
return;
debugfs_create_file("rap_test", S_IWUSR, minor->debugfs_root,
adev, &amdgpu_rap_debugfs_ops);
-#endif
}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
index 1fb2a91ad30a..2a6cf7963dde 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
@@ -27,13 +27,27 @@
#include <linux/uaccess.h>
#include <linux/reboot.h>
#include <linux/syscalls.h>
+#include <linux/pm_runtime.h>
+#include <linux/list_sort.h>
#include "amdgpu.h"
#include "amdgpu_ras.h"
#include "amdgpu_atomfirmware.h"
#include "amdgpu_xgmi.h"
#include "ivsrcid/nbio/irqsrcs_nbif_7_4.h"
-
+#include "nbio_v4_3.h"
+#include "nbif_v6_3_1.h"
+#include "nbio_v7_9.h"
+#include "atom.h"
+#include "amdgpu_reset.h"
+#include "amdgpu_psp.h"
+#include "amdgpu_ras_mgr.h"
+
+#ifdef CONFIG_X86_MCE_AMD
+#include <asm/mce.h>
+
+static bool notifier_registered;
+#endif
static const char *RAS_FS_NAME = "ras";
const char *ras_error_string[] = {
@@ -59,18 +73,64 @@ const char *ras_block_string[] = {
"mp0",
"mp1",
"fuse",
+ "mca",
+ "vcn",
+ "jpeg",
+ "ih",
+ "mpio",
+ "mmsch",
+};
+
+const char *ras_mca_block_string[] = {
+ "mca_mp0",
+ "mca_mp1",
+ "mca_mpio",
+ "mca_iohc",
};
+struct amdgpu_ras_block_list {
+ /* ras block link */
+ struct list_head node;
+
+ struct amdgpu_ras_block_object *ras_obj;
+};
+
+const char *get_ras_block_str(struct ras_common_if *ras_block)
+{
+ if (!ras_block)
+ return "NULL";
+
+ if (ras_block->block >= AMDGPU_RAS_BLOCK_COUNT ||
+ ras_block->block >= ARRAY_SIZE(ras_block_string))
+ return "OUT OF RANGE";
+
+ if (ras_block->block == AMDGPU_RAS_BLOCK__MCA)
+ return ras_mca_block_string[ras_block->sub_block_index];
+
+ return ras_block_string[ras_block->block];
+}
+
+#define ras_block_str(_BLOCK_) \
+ (((_BLOCK_) < ARRAY_SIZE(ras_block_string)) ? ras_block_string[_BLOCK_] : "Out Of Range")
+
#define ras_err_str(i) (ras_error_string[ffs(i)])
-#define ras_block_str(i) (ras_block_string[i])
#define RAS_DEFAULT_FLAGS (AMDGPU_RAS_FLAG_INIT_BY_VBIOS)
/* inject address is 52 bits */
#define RAS_UMC_INJECT_ADDR_LIMIT (0x1ULL << 52)
-/* typical ECC bad page rate(1 bad page per 100MB VRAM) */
-#define RAS_BAD_PAGE_RATE (100 * 1024 * 1024ULL)
+/* typical ECC bad page rate is 1 bad page per 100MB VRAM */
+#define RAS_BAD_PAGE_COVER (100 * 1024 * 1024ULL)
+
+#define MAX_UMC_POISON_POLLING_TIME_ASYNC 10
+
+#define AMDGPU_RAS_RETIRE_PAGE_INTERVAL 100 //ms
+
+#define MAX_FLUSH_RETIRE_DWORK_TIMES 100
+
+#define BYPASS_ALLOCATED_ADDRESS 0x0
+#define BYPASS_INITIALIZATION_ADDRESS 0x1
enum amdgpu_ras_retire_page_reservation {
AMDGPU_RAS_RETIRE_PAGE_RESERVED,
@@ -80,11 +140,25 @@ enum amdgpu_ras_retire_page_reservation {
atomic_t amdgpu_ras_in_intr = ATOMIC_INIT(0);
-static bool amdgpu_ras_check_bad_page_unlock(struct amdgpu_ras *con,
+static int amdgpu_ras_check_bad_page_unlock(struct amdgpu_ras *con,
uint64_t addr);
-static bool amdgpu_ras_check_bad_page(struct amdgpu_device *adev,
+static int amdgpu_ras_check_bad_page(struct amdgpu_device *adev,
uint64_t addr);
+static void amdgpu_ras_critical_region_init(struct amdgpu_device *adev);
+static void amdgpu_ras_critical_region_fini(struct amdgpu_device *adev);
+
+#ifdef CONFIG_X86_MCE_AMD
+static void amdgpu_register_bad_pages_mca_notifier(struct amdgpu_device *adev);
+static void
+amdgpu_unregister_bad_pages_mca_notifier(struct amdgpu_device *adev);
+struct mce_notifier_adev_list {
+ struct amdgpu_device *devs[MAX_GPU_INSTANCE];
+ int num_gpu;
+};
+static struct mce_notifier_adev_list mce_adev_list;
+#endif
+
void amdgpu_ras_set_error_query_ready(struct amdgpu_device *adev, bool ready)
{
if (adev && amdgpu_ras_get_context(adev))
@@ -99,6 +173,98 @@ static bool amdgpu_ras_get_error_query_ready(struct amdgpu_device *adev)
return false;
}
+static int amdgpu_reserve_page_direct(struct amdgpu_device *adev, uint64_t address)
+{
+ struct ras_err_data err_data;
+ struct eeprom_table_record err_rec;
+ int ret;
+
+ ret = amdgpu_ras_check_bad_page(adev, address);
+ if (ret == -EINVAL) {
+ dev_warn(adev->dev,
+ "RAS WARN: input address 0x%llx is invalid.\n",
+ address);
+ return -EINVAL;
+ } else if (ret == 1) {
+ dev_warn(adev->dev,
+ "RAS WARN: 0x%llx has already been marked as bad page!\n",
+ address);
+ return 0;
+ }
+
+ ret = amdgpu_ras_error_data_init(&err_data);
+ if (ret)
+ return ret;
+
+ memset(&err_rec, 0x0, sizeof(struct eeprom_table_record));
+ err_data.err_addr = &err_rec;
+ amdgpu_umc_fill_error_record(&err_data, address, address, 0, 0);
+
+ if (amdgpu_bad_page_threshold != 0) {
+ amdgpu_ras_add_bad_pages(adev, err_data.err_addr,
+ err_data.err_addr_cnt, false);
+ amdgpu_ras_save_bad_pages(adev, NULL);
+ }
+
+ amdgpu_ras_error_data_fini(&err_data);
+
+ dev_warn(adev->dev, "WARNING: THIS IS ONLY FOR TEST PURPOSES AND WILL CORRUPT RAS EEPROM\n");
+ dev_warn(adev->dev, "Clear EEPROM:\n");
+ dev_warn(adev->dev, " echo 1 > /sys/kernel/debug/dri/0/ras/ras_eeprom_reset\n");
+
+ return 0;
+}
+
+static int amdgpu_check_address_validity(struct amdgpu_device *adev,
+ uint64_t address, uint64_t flags)
+{
+ struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
+ struct amdgpu_vram_block_info blk_info;
+ uint64_t page_pfns[32] = {0};
+ int i, ret, count;
+ bool hit = false;
+
+ if (amdgpu_ip_version(adev, UMC_HWIP, 0) < IP_VERSION(12, 0, 0))
+ return 0;
+
+ if (amdgpu_sriov_vf(adev)) {
+ if (amdgpu_virt_check_vf_critical_region(adev, address, &hit))
+ return -EPERM;
+ return hit ? -EACCES : 0;
+ }
+
+ if ((address >= adev->gmc.mc_vram_size) ||
+ (address >= RAS_UMC_INJECT_ADDR_LIMIT))
+ return -EFAULT;
+
+ count = amdgpu_umc_lookup_bad_pages_in_a_row(adev,
+ address, page_pfns, ARRAY_SIZE(page_pfns));
+ if (count <= 0)
+ return -EPERM;
+
+ for (i = 0; i < count; i++) {
+ memset(&blk_info, 0, sizeof(blk_info));
+ ret = amdgpu_vram_mgr_query_address_block_info(&adev->mman.vram_mgr,
+ page_pfns[i] << AMDGPU_GPU_PAGE_SHIFT, &blk_info);
+ if (!ret) {
+ /* The input address that needs to be checked is allocated by
+ * current calling process, so it is necessary to exclude
+ * the calling process.
+ */
+ if ((flags == BYPASS_ALLOCATED_ADDRESS) &&
+ ((blk_info.task.pid != task_pid_nr(current)) ||
+ strncmp(blk_info.task.comm, current->comm, TASK_COMM_LEN)))
+ return -EACCES;
+ else if ((flags == BYPASS_INITIALIZATION_ADDRESS) &&
+ (blk_info.task.pid == con->init_task_pid) &&
+ !strncmp(blk_info.task.comm, con->init_task_comm, TASK_COMM_LEN))
+ return -EACCES;
+ }
+ }
+
+ return 0;
+}
+
static ssize_t amdgpu_ras_debugfs_read(struct file *f, char __user *buf,
size_t size, loff_t *pos)
{
@@ -109,9 +275,16 @@ static ssize_t amdgpu_ras_debugfs_read(struct file *f, char __user *buf,
ssize_t s;
char val[128];
- if (amdgpu_ras_error_query(obj->adev, &info))
+ if (amdgpu_ras_query_error_status(obj->adev, &info))
return -EINVAL;
+ /* Hardware counter will be reset automatically after the query on Vega20 and Arcturus */
+ if (amdgpu_ip_version(obj->adev, MP0_HWIP, 0) != IP_VERSION(11, 0, 2) &&
+ amdgpu_ip_version(obj->adev, MP0_HWIP, 0) != IP_VERSION(11, 0, 4)) {
+ if (amdgpu_ras_reset_error_status(obj->adev, info.head.block))
+ dev_warn(obj->adev->dev, "Failed to reset error counter and error status");
+ }
+
s = snprintf(val, sizeof(val), "%s: %lu\n%s: %lu\n",
"ue", info.ue_count,
"ce", info.ce_count);
@@ -143,7 +316,7 @@ static int amdgpu_ras_find_block_id_by_name(const char *name, int *block_id)
for (i = 0; i < ARRAY_SIZE(ras_block_string); i++) {
*block_id = i;
- if (strcmp(name, ras_block_str(i)) == 0)
+ if (strcmp(name, ras_block_string[i]) == 0)
return 0;
}
return -EINVAL;
@@ -161,6 +334,8 @@ static int amdgpu_ras_debugfs_ctrl_parse_data(struct file *f,
int block_id;
uint32_t sub_block;
u64 address, value;
+ /* default value is 0 if the mask is not set by user */
+ u32 instance_mask = 0;
if (*pos)
return -EINVAL;
@@ -178,34 +353,65 @@ static int amdgpu_ras_debugfs_ctrl_parse_data(struct file *f,
op = 1;
else if (sscanf(str, "inject %32s %8s", block_name, err) == 2)
op = 2;
+ else if (strstr(str, "retire_page") != NULL)
+ op = 3;
+ else if (strstr(str, "check_address") != NULL)
+ op = 4;
else if (str[0] && str[1] && str[2] && str[3])
/* ascii string, but commands are not matched. */
return -EINVAL;
if (op != -1) {
+ if (op == 3) {
+ if (sscanf(str, "%*s 0x%llx", &address) != 1 &&
+ sscanf(str, "%*s %llu", &address) != 1)
+ return -EINVAL;
+
+ data->op = op;
+ data->inject.address = address;
+
+ return 0;
+ } else if (op == 4) {
+ if (sscanf(str, "%*s 0x%llx 0x%llx", &address, &value) != 2 &&
+ sscanf(str, "%*s %llu %llu", &address, &value) != 2)
+ return -EINVAL;
+
+ data->op = op;
+ data->inject.address = address;
+ data->inject.value = value;
+ return 0;
+ }
+
if (amdgpu_ras_find_block_id_by_name(block_name, &block_id))
return -EINVAL;
data->head.block = block_id;
- /* only ue and ce errors are supported */
+ /* only ue, ce and poison errors are supported */
if (!memcmp("ue", err, 2))
data->head.type = AMDGPU_RAS_ERROR__MULTI_UNCORRECTABLE;
else if (!memcmp("ce", err, 2))
data->head.type = AMDGPU_RAS_ERROR__SINGLE_CORRECTABLE;
+ else if (!memcmp("poison", err, 6))
+ data->head.type = AMDGPU_RAS_ERROR__POISON;
else
return -EINVAL;
data->op = op;
if (op == 2) {
- if (sscanf(str, "%*s %*s %*s %u %llu %llu",
- &sub_block, &address, &value) != 3)
- if (sscanf(str, "%*s %*s %*s 0x%x 0x%llx 0x%llx",
- &sub_block, &address, &value) != 3)
- return -EINVAL;
+ if (sscanf(str, "%*s %*s %*s 0x%x 0x%llx 0x%llx 0x%x",
+ &sub_block, &address, &value, &instance_mask) != 4 &&
+ sscanf(str, "%*s %*s %*s %u %llu %llu %u",
+ &sub_block, &address, &value, &instance_mask) != 4 &&
+ sscanf(str, "%*s %*s %*s 0x%x 0x%llx 0x%llx",
+ &sub_block, &address, &value) != 3 &&
+ sscanf(str, "%*s %*s %*s %u %llu %llu",
+ &sub_block, &address, &value) != 3)
+ return -EINVAL;
data->head.sub_block_index = sub_block;
data->inject.address = address;
data->inject.value = value;
+ data->inject.instance_mask = instance_mask;
}
} else {
if (size < sizeof(*data))
@@ -218,10 +424,50 @@ static int amdgpu_ras_debugfs_ctrl_parse_data(struct file *f,
return 0;
}
+static void amdgpu_ras_instance_mask_check(struct amdgpu_device *adev,
+ struct ras_debug_if *data)
+{
+ int num_xcc = adev->gfx.xcc_mask ? NUM_XCC(adev->gfx.xcc_mask) : 1;
+ uint32_t mask, inst_mask = data->inject.instance_mask;
+
+ /* no need to set instance mask if there is only one instance */
+ if (num_xcc <= 1 && inst_mask) {
+ data->inject.instance_mask = 0;
+ dev_dbg(adev->dev,
+ "RAS inject mask(0x%x) isn't supported and force it to 0.\n",
+ inst_mask);
+
+ return;
+ }
+
+ switch (data->head.block) {
+ case AMDGPU_RAS_BLOCK__GFX:
+ mask = GENMASK(num_xcc - 1, 0);
+ break;
+ case AMDGPU_RAS_BLOCK__SDMA:
+ mask = GENMASK(adev->sdma.num_instances - 1, 0);
+ break;
+ case AMDGPU_RAS_BLOCK__VCN:
+ case AMDGPU_RAS_BLOCK__JPEG:
+ mask = GENMASK(adev->vcn.num_vcn_inst - 1, 0);
+ break;
+ default:
+ mask = inst_mask;
+ break;
+ }
+
+ /* remove invalid bits in instance mask */
+ data->inject.instance_mask &= mask;
+ if (inst_mask != data->inject.instance_mask)
+ dev_dbg(adev->dev,
+ "Adjust RAS inject mask 0x%x to 0x%x\n",
+ inst_mask, data->inject.instance_mask);
+}
+
/**
* DOC: AMDGPU RAS debugfs control interface
*
- * It accepts struct ras_debug_if who has two members.
+ * The control interface accepts struct ras_debug_if which has two members.
*
* First member: ras_debug_if::head or ras_debug_if::inject.
*
@@ -233,7 +479,7 @@ static int amdgpu_ras_debugfs_ctrl_parse_data(struct file *f,
* sub_block_index: some IPs have subcomponets. say, GFX, sDMA.
* name: the name of IP.
*
- * inject has two more members than head, they are address, value.
+ * inject has three more members than head, they are address, value and mask.
* As their names indicate, inject operation will write the
* value to the address.
*
@@ -246,55 +492,62 @@ static int amdgpu_ras_debugfs_ctrl_parse_data(struct file *f,
*
* How to use the interface?
*
- * Programs
+ * In a program
*
- * Copy the struct ras_debug_if in your codes and initialize it.
- * Write the struct to the control node.
+ * Copy the struct ras_debug_if in your code and initialize it.
+ * Write the struct to the control interface.
*
- * Shells
+ * From shell
*
* .. code-block:: bash
*
- * echo op block [error [sub_block address value]] > .../ras/ras_ctrl
+ * echo "disable <block>" > /sys/kernel/debug/dri/<N>/ras/ras_ctrl
+ * echo "enable <block> <error>" > /sys/kernel/debug/dri/<N>/ras/ras_ctrl
+ * echo "inject <block> <error> <sub-block> <address> <value> <mask>" > /sys/kernel/debug/dri/<N>/ras/ras_ctrl
+ *
+ * Where N, is the card which you want to affect.
*
- * Parameters:
+ * "disable" requires only the block.
+ * "enable" requires the block and error type.
+ * "inject" requires the block, error type, address, and value.
*
- * op: disable, enable, inject
- * disable: only block is needed
- * enable: block and error are needed
- * inject: error, address, value are needed
- * block: umc, sdma, gfx, .........
+ * The block is one of: umc, sdma, gfx, etc.
* see ras_block_string[] for details
- * error: ue, ce
- * ue: multi_uncorrectable
- * ce: single_correctable
- * sub_block:
- * sub block index, pass 0 if there is no sub block
*
- * here are some examples for bash commands:
+ * The error type is one of: ue, ce and poison where,
+ * ue is multi-uncorrectable
+ * ce is single-correctable
+ * poison is poison
+ *
+ * The sub-block is a the sub-block index, pass 0 if there is no sub-block.
+ * The address and value are hexadecimal numbers, leading 0x is optional.
+ * The mask means instance mask, is optional, default value is 0x1.
+ *
+ * For instance,
*
* .. code-block:: bash
*
* echo inject umc ue 0x0 0x0 0x0 > /sys/kernel/debug/dri/0/ras/ras_ctrl
- * echo inject umc ce 0 0 0 > /sys/kernel/debug/dri/0/ras/ras_ctrl
+ * echo inject umc ce 0 0 0 3 > /sys/kernel/debug/dri/0/ras/ras_ctrl
* echo disable umc > /sys/kernel/debug/dri/0/ras/ras_ctrl
*
- * How to check the result?
+ * How to check the result of the operation?
*
- * For disable/enable, please check ras features at
+ * To check disable/enable, see "ras" features at,
* /sys/class/drm/card[0/1/2...]/device/ras/features
*
- * For inject, please check corresponding err count at
- * /sys/class/drm/card[0/1/2...]/device/ras/[gfx/sdma/...]_err_count
+ * To check inject, see the corresponding error count at,
+ * /sys/class/drm/card[0/1/2...]/device/ras/[gfx|sdma|umc|...]_err_count
*
* .. note::
* Operations are only allowed on blocks which are supported.
- * Please check ras mask at /sys/module/amdgpu/parameters/ras_mask
+ * Check the "ras" mask at /sys/module/amdgpu/parameters/ras_mask
* to see which blocks support RAS on a particular asic.
*
*/
-static ssize_t amdgpu_ras_debugfs_ctrl_write(struct file *f, const char __user *buf,
- size_t size, loff_t *pos)
+static ssize_t amdgpu_ras_debugfs_ctrl_write(struct file *f,
+ const char __user *buf,
+ size_t size, loff_t *pos)
{
struct amdgpu_device *adev = (struct amdgpu_device *)file_inode(f)->i_private;
struct ras_debug_if data;
@@ -308,7 +561,18 @@ static ssize_t amdgpu_ras_debugfs_ctrl_write(struct file *f, const char __user *
ret = amdgpu_ras_debugfs_ctrl_parse_data(f, buf, size, pos, &data);
if (ret)
- return -EINVAL;
+ return ret;
+
+ if (data.op == 3) {
+ ret = amdgpu_reserve_page_direct(adev, data.inject.address);
+ if (!ret)
+ return size;
+ else
+ return ret;
+ } else if (data.op == 4) {
+ ret = amdgpu_check_address_validity(adev, data.inject.address, data.inject.value);
+ return ret ? ret : size;
+ }
if (!amdgpu_ras_is_supported(adev, data.head.block))
return -EINVAL;
@@ -321,24 +585,21 @@ static ssize_t amdgpu_ras_debugfs_ctrl_write(struct file *f, const char __user *
ret = amdgpu_ras_feature_enable(adev, &data.head, 1);
break;
case 2:
- if ((data.inject.address >= adev->gmc.mc_vram_size) ||
- (data.inject.address >= RAS_UMC_INJECT_ADDR_LIMIT)) {
- dev_warn(adev->dev, "RAS WARN: input address "
- "0x%llx is invalid.",
+ /* umc ce/ue error injection for a bad page is not allowed */
+ if (data.head.block == AMDGPU_RAS_BLOCK__UMC)
+ ret = amdgpu_ras_check_bad_page(adev, data.inject.address);
+ if (ret == -EINVAL) {
+ dev_warn(adev->dev, "RAS WARN: input address 0x%llx is invalid.",
data.inject.address);
- ret = -EINVAL;
break;
- }
-
- /* umc ce/ue error injection for a bad page is not allowed */
- if ((data.head.block == AMDGPU_RAS_BLOCK__UMC) &&
- amdgpu_ras_check_bad_page(adev, data.inject.address)) {
- dev_warn(adev->dev, "RAS WARN: 0x%llx has been marked "
- "as bad before error injection!\n",
+ } else if (ret == 1) {
+ dev_warn(adev->dev, "RAS WARN: inject: 0x%llx has already been marked as bad!\n",
data.inject.address);
break;
}
+ amdgpu_ras_instance_mask_check(adev, &data);
+
/* data.inject.address is offset instead of absolute gpu address */
ret = amdgpu_ras_error_inject(adev, &data.inject);
break;
@@ -348,11 +609,13 @@ static ssize_t amdgpu_ras_debugfs_ctrl_write(struct file *f, const char __user *
}
if (ret)
- return -EINVAL;
+ return ret;
return size;
}
+static int amdgpu_uniras_clear_badpages_info(struct amdgpu_device *adev);
+
/**
* DOC: AMDGPU RAS debugfs EEPROM table reset interface
*
@@ -369,21 +632,29 @@ static ssize_t amdgpu_ras_debugfs_ctrl_write(struct file *f, const char __user *
* will reset EEPROM table to 0 entries.
*
*/
-static ssize_t amdgpu_ras_debugfs_eeprom_write(struct file *f, const char __user *buf,
- size_t size, loff_t *pos)
+static ssize_t amdgpu_ras_debugfs_eeprom_write(struct file *f,
+ const char __user *buf,
+ size_t size, loff_t *pos)
{
struct amdgpu_device *adev =
(struct amdgpu_device *)file_inode(f)->i_private;
int ret;
+ if (amdgpu_uniras_enabled(adev)) {
+ ret = amdgpu_uniras_clear_badpages_info(adev);
+ return ret ? ret : size;
+ }
+
ret = amdgpu_ras_eeprom_reset_table(
- &(amdgpu_ras_get_context(adev)->eeprom_control));
+ &(amdgpu_ras_get_context(adev)->eeprom_control));
- if (ret == 1) {
+ if (!ret) {
+ /* Something was written to EEPROM.
+ */
amdgpu_ras_get_context(adev)->flags = RAS_DEFAULT_FLAGS;
return size;
} else {
- return -EIO;
+ return ret;
}
}
@@ -431,15 +702,23 @@ static ssize_t amdgpu_ras_sysfs_read(struct device *dev,
};
if (!amdgpu_ras_get_error_query_ready(obj->adev))
- return snprintf(buf, PAGE_SIZE,
- "Query currently inaccessible\n");
+ return sysfs_emit(buf, "Query currently inaccessible\n");
- if (amdgpu_ras_error_query(obj->adev, &info))
+ if (amdgpu_ras_query_error_status(obj->adev, &info))
return -EINVAL;
- return snprintf(buf, PAGE_SIZE, "%s: %lu\n%s: %lu\n",
- "ue", info.ue_count,
- "ce", info.ce_count);
+ if (amdgpu_ip_version(obj->adev, MP0_HWIP, 0) != IP_VERSION(11, 0, 2) &&
+ amdgpu_ip_version(obj->adev, MP0_HWIP, 0) != IP_VERSION(11, 0, 4)) {
+ if (amdgpu_ras_reset_error_status(obj->adev, info.head.block))
+ dev_warn(obj->adev->dev, "Failed to reset error counter and error status");
+ }
+
+ if (info.head.block == AMDGPU_RAS_BLOCK__UMC)
+ return sysfs_emit(buf, "%s: %lu\n%s: %lu\n%s: %lu\n", "ue", info.ue_count,
+ "ce", info.ce_count, "de", info.de_count);
+ else
+ return sysfs_emit(buf, "%s: %lu\n%s: %lu\n", "ue", info.ue_count,
+ "ce", info.ce_count);
}
/* obj begin */
@@ -449,11 +728,13 @@ static ssize_t amdgpu_ras_sysfs_read(struct device *dev,
static inline void put_obj(struct ras_manager *obj)
{
- if (obj && --obj->use == 0)
+ if (obj && (--obj->use == 0)) {
list_del(&obj->node);
- if (obj && obj->use < 0) {
- DRM_ERROR("RAS ERROR: Unbalance obj(%s) use\n", obj->head.name);
+ amdgpu_ras_error_data_fini(&obj->err_data);
}
+
+ if (obj && (obj->use < 0))
+ DRM_ERROR("RAS ERROR: Unbalance obj(%s) use\n", get_ras_block_str(&obj->head));
}
/* make one obj and return it. */
@@ -463,17 +744,27 @@ static struct ras_manager *amdgpu_ras_create_obj(struct amdgpu_device *adev,
struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
struct ras_manager *obj;
- if (!con)
+ if (!adev->ras_enabled || !con)
return NULL;
if (head->block >= AMDGPU_RAS_BLOCK_COUNT)
return NULL;
- obj = &con->objs[head->block];
+ if (head->block == AMDGPU_RAS_BLOCK__MCA) {
+ if (head->sub_block_index >= AMDGPU_RAS_MCA_BLOCK__LAST)
+ return NULL;
+
+ obj = &con->objs[AMDGPU_RAS_BLOCK__LAST + head->sub_block_index];
+ } else
+ obj = &con->objs[head->block];
+
/* already exist. return obj? */
if (alive_obj(obj))
return NULL;
+ if (amdgpu_ras_error_data_init(&obj->err_data))
+ return NULL;
+
obj->head = *head;
obj->adev = adev;
list_add(&obj->node, &con->head);
@@ -490,26 +781,28 @@ struct ras_manager *amdgpu_ras_find_obj(struct amdgpu_device *adev,
struct ras_manager *obj;
int i;
- if (!con)
+ if (!adev->ras_enabled || !con)
return NULL;
if (head) {
if (head->block >= AMDGPU_RAS_BLOCK_COUNT)
return NULL;
- obj = &con->objs[head->block];
+ if (head->block == AMDGPU_RAS_BLOCK__MCA) {
+ if (head->sub_block_index >= AMDGPU_RAS_MCA_BLOCK__LAST)
+ return NULL;
+
+ obj = &con->objs[AMDGPU_RAS_BLOCK__LAST + head->sub_block_index];
+ } else
+ obj = &con->objs[head->block];
- if (alive_obj(obj)) {
- WARN_ON(head->block != obj->head.block);
+ if (alive_obj(obj))
return obj;
- }
} else {
- for (i = 0; i < AMDGPU_RAS_BLOCK_COUNT; i++) {
+ for (i = 0; i < AMDGPU_RAS_BLOCK_COUNT + AMDGPU_RAS_MCA_BLOCK_COUNT; i++) {
obj = &con->objs[i];
- if (alive_obj(obj)) {
- WARN_ON(i != obj->head.block);
+ if (alive_obj(obj))
return obj;
- }
}
}
@@ -517,36 +810,11 @@ struct ras_manager *amdgpu_ras_find_obj(struct amdgpu_device *adev,
}
/* obj end */
-static void amdgpu_ras_parse_status_code(struct amdgpu_device *adev,
- const char* invoke_type,
- const char* block_name,
- enum ta_ras_status ret)
-{
- switch (ret) {
- case TA_RAS_STATUS__SUCCESS:
- return;
- case TA_RAS_STATUS__ERROR_RAS_NOT_AVAILABLE:
- dev_warn(adev->dev,
- "RAS WARN: %s %s currently unavailable\n",
- invoke_type,
- block_name);
- break;
- default:
- dev_err(adev->dev,
- "RAS ERROR: %s %s error failed ret 0x%X\n",
- invoke_type,
- block_name,
- ret);
- }
-}
-
/* feature ctl begin */
static int amdgpu_ras_is_feature_allowed(struct amdgpu_device *adev,
- struct ras_common_if *head)
+ struct ras_common_if *head)
{
- struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
-
- return con->hw_supported & BIT(head->block);
+ return adev->ras_hw_enabled & BIT(head->block);
}
static int amdgpu_ras_is_feature_enabled(struct amdgpu_device *adev,
@@ -575,8 +843,6 @@ static int __amdgpu_ras_feature_enable(struct amdgpu_device *adev,
*/
if (!amdgpu_ras_is_feature_allowed(adev, head))
return 0;
- if (!(!!enable ^ !!amdgpu_ras_is_feature_enabled(adev, head)))
- return 0;
if (enable) {
if (!obj) {
@@ -609,52 +875,50 @@ int amdgpu_ras_feature_enable(struct amdgpu_device *adev,
if (!con)
return -EINVAL;
- info = kzalloc(sizeof(union ta_ras_cmd_input), GFP_KERNEL);
- if (!info)
- return -ENOMEM;
-
- if (!enable) {
- info->disable_features = (struct ta_ras_disable_features_input) {
- .block_id = amdgpu_ras_block_to_ta(head->block),
- .error_type = amdgpu_ras_error_to_ta(head->type),
- };
- } else {
- info->enable_features = (struct ta_ras_enable_features_input) {
- .block_id = amdgpu_ras_block_to_ta(head->block),
- .error_type = amdgpu_ras_error_to_ta(head->type),
- };
- }
+ /* For non-gfx ip, do not enable ras feature if it is not allowed */
+ /* For gfx ip, regardless of feature support status, */
+ /* Force issue enable or disable ras feature commands */
+ if (head->block != AMDGPU_RAS_BLOCK__GFX &&
+ !amdgpu_ras_is_feature_allowed(adev, head))
+ return 0;
- /* Do not enable if it is not allowed. */
- WARN_ON(enable && !amdgpu_ras_is_feature_allowed(adev, head));
- /* Are we alerady in that state we are going to set? */
- if (!(!!enable ^ !!amdgpu_ras_is_feature_enabled(adev, head))) {
- ret = 0;
- goto out;
- }
+ /* Only enable gfx ras feature from host side */
+ if (head->block == AMDGPU_RAS_BLOCK__GFX &&
+ !amdgpu_sriov_vf(adev) &&
+ !amdgpu_ras_intr_triggered()) {
+ info = kzalloc(sizeof(union ta_ras_cmd_input), GFP_KERNEL);
+ if (!info)
+ return -ENOMEM;
+
+ if (!enable) {
+ info->disable_features = (struct ta_ras_disable_features_input) {
+ .block_id = amdgpu_ras_block_to_ta(head->block),
+ .error_type = amdgpu_ras_error_to_ta(head->type),
+ };
+ } else {
+ info->enable_features = (struct ta_ras_enable_features_input) {
+ .block_id = amdgpu_ras_block_to_ta(head->block),
+ .error_type = amdgpu_ras_error_to_ta(head->type),
+ };
+ }
- if (!amdgpu_ras_intr_triggered()) {
ret = psp_ras_enable_features(&adev->psp, info, enable);
if (ret) {
- amdgpu_ras_parse_status_code(adev,
- enable ? "enable":"disable",
- ras_block_str(head->block),
- (enum ta_ras_status)ret);
- if (ret == TA_RAS_STATUS__RESET_NEEDED)
- ret = -EAGAIN;
- else
- ret = -EINVAL;
-
- goto out;
+ dev_err(adev->dev, "ras %s %s failed poison:%d ret:%d\n",
+ enable ? "enable":"disable",
+ get_ras_block_str(head),
+ amdgpu_ras_is_poison_mode_supported(adev), ret);
+ kfree(info);
+ return ret;
}
+
+ kfree(info);
}
/* setup the obj */
__amdgpu_ras_feature_enable(adev, head, enable);
- ret = 0;
-out:
- kfree(info);
- return ret;
+
+ return 0;
}
/* Only used in device probe stage and called only once. */
@@ -685,7 +949,7 @@ int amdgpu_ras_feature_enable_on_boot(struct amdgpu_device *adev,
if (!ret)
dev_info(adev->dev,
"RAS INFO: %s setup object\n",
- ras_block_str(head->block));
+ get_ras_block_str(head));
}
} else {
/* setup the object then issue a ras TA disable cmd.*/
@@ -693,7 +957,15 @@ int amdgpu_ras_feature_enable_on_boot(struct amdgpu_device *adev,
if (ret)
return ret;
+ /* gfx block ras disable cmd must send to ras-ta */
+ if (head->block == AMDGPU_RAS_BLOCK__GFX)
+ con->features |= BIT(head->block);
+
ret = amdgpu_ras_feature_enable(adev, head, 0);
+
+ /* clean gfx block ras features flag */
+ if (adev->ras_enabled && head->block == AMDGPU_RAS_BLOCK__GFX)
+ con->features &= ~BIT(head->block);
}
} else
ret = amdgpu_ras_feature_enable(adev, head, enable);
@@ -727,18 +999,39 @@ static int amdgpu_ras_enable_all_features(struct amdgpu_device *adev,
bool bypass)
{
struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
- int ras_block_count = AMDGPU_RAS_BLOCK_COUNT;
int i;
- const enum amdgpu_ras_error_type default_ras_type =
- AMDGPU_RAS_ERROR__NONE;
+ const enum amdgpu_ras_error_type default_ras_type = AMDGPU_RAS_ERROR__NONE;
- for (i = 0; i < ras_block_count; i++) {
+ for (i = 0; i < AMDGPU_RAS_BLOCK_COUNT; i++) {
struct ras_common_if head = {
.block = i,
.type = default_ras_type,
.sub_block_index = 0,
};
- strcpy(head.name, ras_block_str(i));
+
+ if (i == AMDGPU_RAS_BLOCK__MCA)
+ continue;
+
+ if (bypass) {
+ /*
+ * bypass psp. vbios enable ras for us.
+ * so just create the obj
+ */
+ if (__amdgpu_ras_feature_enable(adev, &head, 1))
+ break;
+ } else {
+ if (amdgpu_ras_feature_enable(adev, &head, 1))
+ break;
+ }
+ }
+
+ for (i = 0; i < AMDGPU_RAS_MCA_BLOCK_COUNT; i++) {
+ struct ras_common_if head = {
+ .block = AMDGPU_RAS_BLOCK__MCA,
+ .type = default_ras_type,
+ .sub_block_index = i,
+ };
+
if (bypass) {
/*
* bypass psp. vbios enable ras for us.
@@ -756,102 +1049,626 @@ static int amdgpu_ras_enable_all_features(struct amdgpu_device *adev,
}
/* feature ctl end */
-/* query/inject/cure begin */
-int amdgpu_ras_error_query(struct amdgpu_device *adev,
- struct ras_query_if *info)
+static int amdgpu_ras_block_match_default(struct amdgpu_ras_block_object *block_obj,
+ enum amdgpu_ras_block block)
{
- struct ras_manager *obj = amdgpu_ras_find_obj(adev, &info->head);
- struct ras_err_data err_data = {0, 0, 0, NULL};
- int i;
-
- if (!obj)
+ if (!block_obj)
return -EINVAL;
- switch (info->head.block) {
- case AMDGPU_RAS_BLOCK__UMC:
- if (adev->umc.funcs->query_ras_error_count)
- adev->umc.funcs->query_ras_error_count(adev, &err_data);
+ if (block_obj->ras_comm.block == block)
+ return 0;
+
+ return -EINVAL;
+}
+
+static struct amdgpu_ras_block_object *amdgpu_ras_get_ras_block(struct amdgpu_device *adev,
+ enum amdgpu_ras_block block, uint32_t sub_block_index)
+{
+ struct amdgpu_ras_block_list *node, *tmp;
+ struct amdgpu_ras_block_object *obj;
+
+ if (block >= AMDGPU_RAS_BLOCK__LAST)
+ return NULL;
+
+ list_for_each_entry_safe(node, tmp, &adev->ras_list, node) {
+ if (!node->ras_obj) {
+ dev_warn(adev->dev, "Warning: abnormal ras list node.\n");
+ continue;
+ }
+
+ obj = node->ras_obj;
+ if (obj->ras_block_match) {
+ if (obj->ras_block_match(obj, block, sub_block_index) == 0)
+ return obj;
+ } else {
+ if (amdgpu_ras_block_match_default(obj, block) == 0)
+ return obj;
+ }
+ }
+
+ return NULL;
+}
+
+static void amdgpu_ras_get_ecc_info(struct amdgpu_device *adev, struct ras_err_data *err_data)
+{
+ struct amdgpu_ras *ras = amdgpu_ras_get_context(adev);
+ int ret = 0;
+
+ /*
+ * choosing right query method according to
+ * whether smu support query error information
+ */
+ ret = amdgpu_dpm_get_ecc_info(adev, (void *)&(ras->umc_ecc));
+ if (ret == -EOPNOTSUPP) {
+ if (adev->umc.ras && adev->umc.ras->ras_block.hw_ops &&
+ adev->umc.ras->ras_block.hw_ops->query_ras_error_count)
+ adev->umc.ras->ras_block.hw_ops->query_ras_error_count(adev, err_data);
+
/* umc query_ras_error_address is also responsible for clearing
* error status
*/
- if (adev->umc.funcs->query_ras_error_address)
- adev->umc.funcs->query_ras_error_address(adev, &err_data);
- break;
- case AMDGPU_RAS_BLOCK__SDMA:
- if (adev->sdma.funcs->query_ras_error_count) {
- for (i = 0; i < adev->sdma.num_instances; i++)
- adev->sdma.funcs->query_ras_error_count(adev, i,
- &err_data);
+ if (adev->umc.ras && adev->umc.ras->ras_block.hw_ops &&
+ adev->umc.ras->ras_block.hw_ops->query_ras_error_address)
+ adev->umc.ras->ras_block.hw_ops->query_ras_error_address(adev, err_data);
+ } else if (!ret) {
+ if (adev->umc.ras &&
+ adev->umc.ras->ecc_info_query_ras_error_count)
+ adev->umc.ras->ecc_info_query_ras_error_count(adev, err_data);
+
+ if (adev->umc.ras &&
+ adev->umc.ras->ecc_info_query_ras_error_address)
+ adev->umc.ras->ecc_info_query_ras_error_address(adev, err_data);
+ }
+}
+
+static void amdgpu_ras_error_print_error_data(struct amdgpu_device *adev,
+ struct ras_manager *ras_mgr,
+ struct ras_err_data *err_data,
+ struct ras_query_context *qctx,
+ const char *blk_name,
+ bool is_ue,
+ bool is_de)
+{
+ struct amdgpu_smuio_mcm_config_info *mcm_info;
+ struct ras_err_node *err_node;
+ struct ras_err_info *err_info;
+ u64 event_id = qctx->evid.event_id;
+
+ if (is_ue) {
+ for_each_ras_error(err_node, err_data) {
+ err_info = &err_node->err_info;
+ mcm_info = &err_info->mcm_info;
+ if (err_info->ue_count) {
+ RAS_EVENT_LOG(adev, event_id, "socket: %d, die: %d, "
+ "%lld new uncorrectable hardware errors detected in %s block\n",
+ mcm_info->socket_id,
+ mcm_info->die_id,
+ err_info->ue_count,
+ blk_name);
+ }
+ }
+
+ for_each_ras_error(err_node, &ras_mgr->err_data) {
+ err_info = &err_node->err_info;
+ mcm_info = &err_info->mcm_info;
+ RAS_EVENT_LOG(adev, event_id, "socket: %d, die: %d, "
+ "%lld uncorrectable hardware errors detected in total in %s block\n",
+ mcm_info->socket_id, mcm_info->die_id, err_info->ue_count, blk_name);
+ }
+
+ } else {
+ if (is_de) {
+ for_each_ras_error(err_node, err_data) {
+ err_info = &err_node->err_info;
+ mcm_info = &err_info->mcm_info;
+ if (err_info->de_count) {
+ RAS_EVENT_LOG(adev, event_id, "socket: %d, die: %d, "
+ "%lld new deferred hardware errors detected in %s block\n",
+ mcm_info->socket_id,
+ mcm_info->die_id,
+ err_info->de_count,
+ blk_name);
+ }
+ }
+
+ for_each_ras_error(err_node, &ras_mgr->err_data) {
+ err_info = &err_node->err_info;
+ mcm_info = &err_info->mcm_info;
+ RAS_EVENT_LOG(adev, event_id, "socket: %d, die: %d, "
+ "%lld deferred hardware errors detected in total in %s block\n",
+ mcm_info->socket_id, mcm_info->die_id,
+ err_info->de_count, blk_name);
+ }
+ } else {
+ if (adev->debug_disable_ce_logs)
+ return;
+
+ for_each_ras_error(err_node, err_data) {
+ err_info = &err_node->err_info;
+ mcm_info = &err_info->mcm_info;
+ if (err_info->ce_count) {
+ RAS_EVENT_LOG(adev, event_id, "socket: %d, die: %d, "
+ "%lld new correctable hardware errors detected in %s block\n",
+ mcm_info->socket_id,
+ mcm_info->die_id,
+ err_info->ce_count,
+ blk_name);
+ }
+ }
+
+ for_each_ras_error(err_node, &ras_mgr->err_data) {
+ err_info = &err_node->err_info;
+ mcm_info = &err_info->mcm_info;
+ RAS_EVENT_LOG(adev, event_id, "socket: %d, die: %d, "
+ "%lld correctable hardware errors detected in total in %s block\n",
+ mcm_info->socket_id, mcm_info->die_id,
+ err_info->ce_count, blk_name);
+ }
+ }
+ }
+}
+
+static inline bool err_data_has_source_info(struct ras_err_data *data)
+{
+ return !list_empty(&data->err_node_list);
+}
+
+static void amdgpu_ras_error_generate_report(struct amdgpu_device *adev,
+ struct ras_query_if *query_if,
+ struct ras_err_data *err_data,
+ struct ras_query_context *qctx)
+{
+ struct ras_manager *ras_mgr = amdgpu_ras_find_obj(adev, &query_if->head);
+ const char *blk_name = get_ras_block_str(&query_if->head);
+ u64 event_id = qctx->evid.event_id;
+
+ if (err_data->ce_count) {
+ if (err_data_has_source_info(err_data)) {
+ amdgpu_ras_error_print_error_data(adev, ras_mgr, err_data, qctx,
+ blk_name, false, false);
+ } else if (!adev->aid_mask &&
+ adev->smuio.funcs &&
+ adev->smuio.funcs->get_socket_id &&
+ adev->smuio.funcs->get_die_id) {
+ RAS_EVENT_LOG(adev, event_id, "socket: %d, die: %d "
+ "%ld correctable hardware errors "
+ "detected in %s block\n",
+ adev->smuio.funcs->get_socket_id(adev),
+ adev->smuio.funcs->get_die_id(adev),
+ ras_mgr->err_data.ce_count,
+ blk_name);
+ } else {
+ RAS_EVENT_LOG(adev, event_id, "%ld correctable hardware errors "
+ "detected in %s block\n",
+ ras_mgr->err_data.ce_count,
+ blk_name);
}
- break;
- case AMDGPU_RAS_BLOCK__GFX:
- if (adev->gfx.funcs->query_ras_error_count)
- adev->gfx.funcs->query_ras_error_count(adev, &err_data);
- break;
- case AMDGPU_RAS_BLOCK__MMHUB:
- if (adev->mmhub.funcs->query_ras_error_count)
- adev->mmhub.funcs->query_ras_error_count(adev, &err_data);
- break;
- case AMDGPU_RAS_BLOCK__PCIE_BIF:
- if (adev->nbio.funcs->query_ras_error_count)
- adev->nbio.funcs->query_ras_error_count(adev, &err_data);
- break;
- case AMDGPU_RAS_BLOCK__XGMI_WAFL:
- amdgpu_xgmi_query_ras_error_count(adev, &err_data);
- break;
- default:
- break;
}
- obj->err_data.ue_count += err_data.ue_count;
- obj->err_data.ce_count += err_data.ce_count;
+ if (err_data->ue_count) {
+ if (err_data_has_source_info(err_data)) {
+ amdgpu_ras_error_print_error_data(adev, ras_mgr, err_data, qctx,
+ blk_name, true, false);
+ } else if (!adev->aid_mask &&
+ adev->smuio.funcs &&
+ adev->smuio.funcs->get_socket_id &&
+ adev->smuio.funcs->get_die_id) {
+ RAS_EVENT_LOG(adev, event_id, "socket: %d, die: %d "
+ "%ld uncorrectable hardware errors "
+ "detected in %s block\n",
+ adev->smuio.funcs->get_socket_id(adev),
+ adev->smuio.funcs->get_die_id(adev),
+ ras_mgr->err_data.ue_count,
+ blk_name);
+ } else {
+ RAS_EVENT_LOG(adev, event_id, "%ld uncorrectable hardware errors "
+ "detected in %s block\n",
+ ras_mgr->err_data.ue_count,
+ blk_name);
+ }
+ }
- info->ue_count = obj->err_data.ue_count;
- info->ce_count = obj->err_data.ce_count;
+ if (err_data->de_count) {
+ if (err_data_has_source_info(err_data)) {
+ amdgpu_ras_error_print_error_data(adev, ras_mgr, err_data, qctx,
+ blk_name, false, true);
+ } else if (!adev->aid_mask &&
+ adev->smuio.funcs &&
+ adev->smuio.funcs->get_socket_id &&
+ adev->smuio.funcs->get_die_id) {
+ RAS_EVENT_LOG(adev, event_id, "socket: %d, die: %d "
+ "%ld deferred hardware errors "
+ "detected in %s block\n",
+ adev->smuio.funcs->get_socket_id(adev),
+ adev->smuio.funcs->get_die_id(adev),
+ ras_mgr->err_data.de_count,
+ blk_name);
+ } else {
+ RAS_EVENT_LOG(adev, event_id, "%ld deferred hardware errors "
+ "detected in %s block\n",
+ ras_mgr->err_data.de_count,
+ blk_name);
+ }
+ }
+}
- if (err_data.ce_count) {
- dev_info(adev->dev, "%ld correctable hardware errors "
- "detected in %s block, no user "
- "action is needed.\n",
- obj->err_data.ce_count,
- ras_block_str(info->head.block));
+static void amdgpu_ras_virt_error_generate_report(struct amdgpu_device *adev,
+ struct ras_query_if *query_if,
+ struct ras_err_data *err_data,
+ struct ras_query_context *qctx)
+{
+ unsigned long new_ue, new_ce, new_de;
+ struct ras_manager *obj = amdgpu_ras_find_obj(adev, &query_if->head);
+ const char *blk_name = get_ras_block_str(&query_if->head);
+ u64 event_id = qctx->evid.event_id;
+
+ new_ce = err_data->ce_count - obj->err_data.ce_count;
+ new_ue = err_data->ue_count - obj->err_data.ue_count;
+ new_de = err_data->de_count - obj->err_data.de_count;
+
+ if (new_ce) {
+ RAS_EVENT_LOG(adev, event_id, "%lu correctable hardware errors "
+ "detected in %s block\n",
+ new_ce,
+ blk_name);
}
- if (err_data.ue_count) {
- dev_info(adev->dev, "%ld uncorrectable hardware errors "
- "detected in %s block\n",
- obj->err_data.ue_count,
- ras_block_str(info->head.block));
+
+ if (new_ue) {
+ RAS_EVENT_LOG(adev, event_id, "%lu uncorrectable hardware errors "
+ "detected in %s block\n",
+ new_ue,
+ blk_name);
+ }
+
+ if (new_de) {
+ RAS_EVENT_LOG(adev, event_id, "%lu deferred hardware errors "
+ "detected in %s block\n",
+ new_de,
+ blk_name);
+ }
+}
+
+static void amdgpu_rasmgr_error_data_statistic_update(struct ras_manager *obj, struct ras_err_data *err_data)
+{
+ struct ras_err_node *err_node;
+ struct ras_err_info *err_info;
+
+ if (err_data_has_source_info(err_data)) {
+ for_each_ras_error(err_node, err_data) {
+ err_info = &err_node->err_info;
+ amdgpu_ras_error_statistic_de_count(&obj->err_data,
+ &err_info->mcm_info, err_info->de_count);
+ amdgpu_ras_error_statistic_ce_count(&obj->err_data,
+ &err_info->mcm_info, err_info->ce_count);
+ amdgpu_ras_error_statistic_ue_count(&obj->err_data,
+ &err_info->mcm_info, err_info->ue_count);
+ }
+ } else {
+ /* for legacy asic path which doesn't has error source info */
+ obj->err_data.ue_count += err_data->ue_count;
+ obj->err_data.ce_count += err_data->ce_count;
+ obj->err_data.de_count += err_data->de_count;
}
+}
+
+static void amdgpu_ras_mgr_virt_error_data_statistics_update(struct ras_manager *obj,
+ struct ras_err_data *err_data)
+{
+ /* Host reports absolute counts */
+ obj->err_data.ue_count = err_data->ue_count;
+ obj->err_data.ce_count = err_data->ce_count;
+ obj->err_data.de_count = err_data->de_count;
+}
+
+static struct ras_manager *get_ras_manager(struct amdgpu_device *adev, enum amdgpu_ras_block blk)
+{
+ struct ras_common_if head;
+
+ memset(&head, 0, sizeof(head));
+ head.block = blk;
+
+ return amdgpu_ras_find_obj(adev, &head);
+}
+
+int amdgpu_ras_bind_aca(struct amdgpu_device *adev, enum amdgpu_ras_block blk,
+ const struct aca_info *aca_info, void *data)
+{
+ struct ras_manager *obj;
+
+ /* in resume phase, no need to create aca fs node */
+ if (adev->in_suspend || amdgpu_reset_in_recovery(adev))
+ return 0;
+
+ obj = get_ras_manager(adev, blk);
+ if (!obj)
+ return -EINVAL;
+
+ return amdgpu_aca_add_handle(adev, &obj->aca_handle, ras_block_str(blk), aca_info, data);
+}
+
+int amdgpu_ras_unbind_aca(struct amdgpu_device *adev, enum amdgpu_ras_block blk)
+{
+ struct ras_manager *obj;
+
+ obj = get_ras_manager(adev, blk);
+ if (!obj)
+ return -EINVAL;
+
+ amdgpu_aca_remove_handle(&obj->aca_handle);
return 0;
}
-/* Trigger XGMI/WAFL error */
-static int amdgpu_ras_error_inject_xgmi(struct amdgpu_device *adev,
- struct ta_ras_trigger_error_input *block_info)
+static int amdgpu_aca_log_ras_error_data(struct amdgpu_device *adev, enum amdgpu_ras_block blk,
+ enum aca_error_type type, struct ras_err_data *err_data,
+ struct ras_query_context *qctx)
+{
+ struct ras_manager *obj;
+
+ obj = get_ras_manager(adev, blk);
+ if (!obj)
+ return -EINVAL;
+
+ return amdgpu_aca_get_error_data(adev, &obj->aca_handle, type, err_data, qctx);
+}
+
+ssize_t amdgpu_ras_aca_sysfs_read(struct device *dev, struct device_attribute *attr,
+ struct aca_handle *handle, char *buf, void *data)
{
+ struct ras_manager *obj = container_of(handle, struct ras_manager, aca_handle);
+ struct ras_query_if info = {
+ .head = obj->head,
+ };
+
+ if (!amdgpu_ras_get_error_query_ready(obj->adev))
+ return sysfs_emit(buf, "Query currently inaccessible\n");
+
+ if (amdgpu_ras_query_error_status(obj->adev, &info))
+ return -EINVAL;
+
+ return sysfs_emit(buf, "%s: %lu\n%s: %lu\n%s: %lu\n", "ue", info.ue_count,
+ "ce", info.ce_count, "de", info.de_count);
+}
+
+static int amdgpu_ras_query_error_status_helper(struct amdgpu_device *adev,
+ struct ras_query_if *info,
+ struct ras_err_data *err_data,
+ struct ras_query_context *qctx,
+ unsigned int error_query_mode)
+{
+ enum amdgpu_ras_block blk = info ? info->head.block : AMDGPU_RAS_BLOCK_COUNT;
+ struct amdgpu_ras_block_object *block_obj = NULL;
int ret;
- if (amdgpu_dpm_set_df_cstate(adev, DF_CSTATE_DISALLOW))
- dev_warn(adev->dev, "Failed to disallow df cstate");
+ if (blk == AMDGPU_RAS_BLOCK_COUNT)
+ return -EINVAL;
- if (amdgpu_dpm_allow_xgmi_power_down(adev, false))
- dev_warn(adev->dev, "Failed to disallow XGMI power down");
+ if (error_query_mode == AMDGPU_RAS_INVALID_ERROR_QUERY)
+ return -EINVAL;
- ret = psp_ras_trigger_error(&adev->psp, block_info);
+ if (error_query_mode == AMDGPU_RAS_VIRT_ERROR_COUNT_QUERY) {
+ return amdgpu_virt_req_ras_err_count(adev, blk, err_data);
+ } else if (error_query_mode == AMDGPU_RAS_DIRECT_ERROR_QUERY) {
+ if (info->head.block == AMDGPU_RAS_BLOCK__UMC) {
+ amdgpu_ras_get_ecc_info(adev, err_data);
+ } else {
+ block_obj = amdgpu_ras_get_ras_block(adev, info->head.block, 0);
+ if (!block_obj || !block_obj->hw_ops) {
+ dev_dbg_once(adev->dev, "%s doesn't config RAS function\n",
+ get_ras_block_str(&info->head));
+ return -EINVAL;
+ }
- if (amdgpu_ras_intr_triggered())
+ if (block_obj->hw_ops->query_ras_error_count)
+ block_obj->hw_ops->query_ras_error_count(adev, err_data);
+
+ if ((info->head.block == AMDGPU_RAS_BLOCK__SDMA) ||
+ (info->head.block == AMDGPU_RAS_BLOCK__GFX) ||
+ (info->head.block == AMDGPU_RAS_BLOCK__MMHUB)) {
+ if (block_obj->hw_ops->query_ras_error_status)
+ block_obj->hw_ops->query_ras_error_status(adev);
+ }
+ }
+ } else {
+ if (amdgpu_aca_is_enabled(adev)) {
+ ret = amdgpu_aca_log_ras_error_data(adev, blk, ACA_ERROR_TYPE_UE, err_data, qctx);
+ if (ret)
+ return ret;
+
+ ret = amdgpu_aca_log_ras_error_data(adev, blk, ACA_ERROR_TYPE_CE, err_data, qctx);
+ if (ret)
+ return ret;
+
+ ret = amdgpu_aca_log_ras_error_data(adev, blk, ACA_ERROR_TYPE_DEFERRED, err_data, qctx);
+ if (ret)
+ return ret;
+ } else {
+ /* FIXME: add code to check return value later */
+ amdgpu_mca_smu_log_ras_error(adev, blk, AMDGPU_MCA_ERROR_TYPE_UE, err_data, qctx);
+ amdgpu_mca_smu_log_ras_error(adev, blk, AMDGPU_MCA_ERROR_TYPE_CE, err_data, qctx);
+ }
+ }
+
+ return 0;
+}
+
+/* query/inject/cure begin */
+static int amdgpu_ras_query_error_status_with_event(struct amdgpu_device *adev,
+ struct ras_query_if *info,
+ enum ras_event_type type)
+{
+ struct ras_manager *obj = amdgpu_ras_find_obj(adev, &info->head);
+ struct ras_err_data err_data;
+ struct ras_query_context qctx;
+ unsigned int error_query_mode;
+ int ret;
+
+ if (!obj)
+ return -EINVAL;
+
+ ret = amdgpu_ras_error_data_init(&err_data);
+ if (ret)
+ return ret;
+
+ if (!amdgpu_ras_get_error_query_mode(adev, &error_query_mode))
+ return -EINVAL;
+
+ memset(&qctx, 0, sizeof(qctx));
+ qctx.evid.type = type;
+ qctx.evid.event_id = amdgpu_ras_acquire_event_id(adev, type);
+
+ if (!down_read_trylock(&adev->reset_domain->sem)) {
+ ret = -EIO;
+ goto out_fini_err_data;
+ }
+
+ ret = amdgpu_ras_query_error_status_helper(adev, info,
+ &err_data,
+ &qctx,
+ error_query_mode);
+ up_read(&adev->reset_domain->sem);
+ if (ret)
+ goto out_fini_err_data;
+
+ if (error_query_mode != AMDGPU_RAS_VIRT_ERROR_COUNT_QUERY) {
+ amdgpu_rasmgr_error_data_statistic_update(obj, &err_data);
+ amdgpu_ras_error_generate_report(adev, info, &err_data, &qctx);
+ } else {
+ /* Host provides absolute error counts. First generate the report
+ * using the previous VF internal count against new host count.
+ * Then Update VF internal count.
+ */
+ amdgpu_ras_virt_error_generate_report(adev, info, &err_data, &qctx);
+ amdgpu_ras_mgr_virt_error_data_statistics_update(obj, &err_data);
+ }
+
+ info->ue_count = obj->err_data.ue_count;
+ info->ce_count = obj->err_data.ce_count;
+ info->de_count = obj->err_data.de_count;
+
+out_fini_err_data:
+ amdgpu_ras_error_data_fini(&err_data);
+
+ return ret;
+}
+
+static int amdgpu_uniras_clear_badpages_info(struct amdgpu_device *adev)
+{
+ struct ras_cmd_dev_handle req = {0};
+ int ret;
+
+ ret = amdgpu_ras_mgr_handle_ras_cmd(adev, RAS_CMD__CLEAR_BAD_PAGE_INFO,
+ &req, sizeof(req), NULL, 0);
+ if (ret) {
+ dev_err(adev->dev, "Failed to clear bad pages info, ret: %d\n", ret);
return ret;
+ }
+
+ return 0;
+}
+
+static int amdgpu_uniras_query_block_ecc(struct amdgpu_device *adev,
+ struct ras_query_if *info)
+{
+ struct ras_cmd_block_ecc_info_req req = {0};
+ struct ras_cmd_block_ecc_info_rsp rsp = {0};
+ int ret;
- if (amdgpu_dpm_allow_xgmi_power_down(adev, true))
- dev_warn(adev->dev, "Failed to allow XGMI power down");
+ if (!info)
+ return -EINVAL;
- if (amdgpu_dpm_set_df_cstate(adev, DF_CSTATE_ALLOW))
- dev_warn(adev->dev, "Failed to allow df cstate");
+ req.block_id = info->head.block;
+ req.subblock_id = info->head.sub_block_index;
+
+ ret = amdgpu_ras_mgr_handle_ras_cmd(adev, RAS_CMD__GET_BLOCK_ECC_STATUS,
+ &req, sizeof(req), &rsp, sizeof(rsp));
+ if (!ret) {
+ info->ce_count = rsp.ce_count;
+ info->ue_count = rsp.ue_count;
+ info->de_count = rsp.de_count;
+ }
return ret;
}
+int amdgpu_ras_query_error_status(struct amdgpu_device *adev, struct ras_query_if *info)
+{
+ if (amdgpu_uniras_enabled(adev))
+ return amdgpu_uniras_query_block_ecc(adev, info);
+ else
+ return amdgpu_ras_query_error_status_with_event(adev, info, RAS_EVENT_TYPE_INVALID);
+}
+
+int amdgpu_ras_reset_error_count(struct amdgpu_device *adev,
+ enum amdgpu_ras_block block)
+{
+ struct amdgpu_ras_block_object *block_obj = amdgpu_ras_get_ras_block(adev, block, 0);
+ const struct amdgpu_mca_smu_funcs *mca_funcs = adev->mca.mca_funcs;
+ const struct aca_smu_funcs *smu_funcs = adev->aca.smu_funcs;
+
+ if (!block_obj || !block_obj->hw_ops) {
+ dev_dbg_once(adev->dev, "%s doesn't config RAS function\n",
+ ras_block_str(block));
+ return -EOPNOTSUPP;
+ }
+
+ if (!amdgpu_ras_is_supported(adev, block) ||
+ !amdgpu_ras_get_aca_debug_mode(adev))
+ return -EOPNOTSUPP;
+
+ if (amdgpu_sriov_vf(adev))
+ return -EOPNOTSUPP;
+
+ /* skip ras error reset in gpu reset */
+ if ((amdgpu_in_reset(adev) || amdgpu_ras_in_recovery(adev)) &&
+ ((smu_funcs && smu_funcs->set_debug_mode) ||
+ (mca_funcs && mca_funcs->mca_set_debug_mode)))
+ return -EOPNOTSUPP;
+
+ if (block_obj->hw_ops->reset_ras_error_count)
+ block_obj->hw_ops->reset_ras_error_count(adev);
+
+ return 0;
+}
+
+int amdgpu_ras_reset_error_status(struct amdgpu_device *adev,
+ enum amdgpu_ras_block block)
+{
+ struct amdgpu_ras_block_object *block_obj = amdgpu_ras_get_ras_block(adev, block, 0);
+
+ if (amdgpu_ras_reset_error_count(adev, block) == -EOPNOTSUPP)
+ return 0;
+
+ if ((block == AMDGPU_RAS_BLOCK__GFX) ||
+ (block == AMDGPU_RAS_BLOCK__MMHUB)) {
+ if (block_obj->hw_ops->reset_ras_error_status)
+ block_obj->hw_ops->reset_ras_error_status(adev);
+ }
+
+ return 0;
+}
+
+static int amdgpu_uniras_error_inject(struct amdgpu_device *adev,
+ struct ras_inject_if *info)
+{
+ struct ras_cmd_inject_error_req inject_req;
+ struct ras_cmd_inject_error_rsp rsp;
+
+ if (!info)
+ return -EINVAL;
+
+ memset(&inject_req, 0, sizeof(inject_req));
+ inject_req.block_id = info->head.block;
+ inject_req.subblock_id = info->head.sub_block_index;
+ inject_req.address = info->address;
+ inject_req.error_type = info->head.type;
+ inject_req.instance_mask = info->instance_mask;
+ inject_req.method = info->value;
+
+ return amdgpu_ras_mgr_handle_ras_cmd(adev, RAS_CMD__INJECT_ERROR,
+ &inject_req, sizeof(inject_req), &rsp, sizeof(rsp));
+}
+
/* wrapper of psp_ras_trigger_error */
int amdgpu_ras_error_inject(struct amdgpu_device *adev,
struct ras_inject_if *info)
@@ -864,71 +1681,151 @@ int amdgpu_ras_error_inject(struct amdgpu_device *adev,
.address = info->address,
.value = info->value,
};
- int ret = 0;
+ int ret = -EINVAL;
+ struct amdgpu_ras_block_object *block_obj = amdgpu_ras_get_ras_block(adev,
+ info->head.block,
+ info->head.sub_block_index);
+
+ if (amdgpu_uniras_enabled(adev))
+ return amdgpu_uniras_error_inject(adev, info);
+
+ /* inject on guest isn't allowed, return success directly */
+ if (amdgpu_sriov_vf(adev))
+ return 0;
if (!obj)
return -EINVAL;
+ if (!block_obj || !block_obj->hw_ops) {
+ dev_dbg_once(adev->dev, "%s doesn't config RAS function\n",
+ get_ras_block_str(&info->head));
+ return -EINVAL;
+ }
+
/* Calculate XGMI relative offset */
- if (adev->gmc.xgmi.num_physical_nodes > 1) {
+ if (adev->gmc.xgmi.num_physical_nodes > 1 &&
+ info->head.block != AMDGPU_RAS_BLOCK__GFX) {
block_info.address =
amdgpu_xgmi_get_relative_phy_addr(adev,
block_info.address);
}
- switch (info->head.block) {
- case AMDGPU_RAS_BLOCK__GFX:
- if (adev->gfx.funcs->ras_error_inject)
- ret = adev->gfx.funcs->ras_error_inject(adev, info);
- else
- ret = -EINVAL;
- break;
- case AMDGPU_RAS_BLOCK__UMC:
- case AMDGPU_RAS_BLOCK__MMHUB:
- case AMDGPU_RAS_BLOCK__PCIE_BIF:
- ret = psp_ras_trigger_error(&adev->psp, &block_info);
- break;
- case AMDGPU_RAS_BLOCK__XGMI_WAFL:
- ret = amdgpu_ras_error_inject_xgmi(adev, &block_info);
- break;
- default:
- dev_info(adev->dev, "%s error injection is not supported yet\n",
- ras_block_str(info->head.block));
- ret = -EINVAL;
+ if (block_obj->hw_ops->ras_error_inject) {
+ if (info->head.block == AMDGPU_RAS_BLOCK__GFX)
+ ret = block_obj->hw_ops->ras_error_inject(adev, info, info->instance_mask);
+ else /* Special ras_error_inject is defined (e.g: xgmi) */
+ ret = block_obj->hw_ops->ras_error_inject(adev, &block_info,
+ info->instance_mask);
+ } else {
+ /* default path */
+ ret = psp_ras_trigger_error(&adev->psp, &block_info, info->instance_mask);
}
- amdgpu_ras_parse_status_code(adev,
- "inject",
- ras_block_str(info->head.block),
- (enum ta_ras_status)ret);
+ if (ret)
+ dev_err(adev->dev, "ras inject %s failed %d\n",
+ get_ras_block_str(&info->head), ret);
return ret;
}
-/* get the total error counts on all IPs */
-unsigned long amdgpu_ras_query_error_count(struct amdgpu_device *adev,
- bool is_ce)
+/**
+ * amdgpu_ras_query_error_count_helper -- Get error counter for specific IP
+ * @adev: pointer to AMD GPU device
+ * @ce_count: pointer to an integer to be set to the count of correctible errors.
+ * @ue_count: pointer to an integer to be set to the count of uncorrectible errors.
+ * @query_info: pointer to ras_query_if
+ *
+ * Return 0 for query success or do nothing, otherwise return an error
+ * on failures
+ */
+static int amdgpu_ras_query_error_count_helper(struct amdgpu_device *adev,
+ unsigned long *ce_count,
+ unsigned long *ue_count,
+ struct ras_query_if *query_info)
+{
+ int ret;
+
+ if (!query_info)
+ /* do nothing if query_info is not specified */
+ return 0;
+
+ ret = amdgpu_ras_query_error_status(adev, query_info);
+ if (ret)
+ return ret;
+
+ *ce_count += query_info->ce_count;
+ *ue_count += query_info->ue_count;
+
+ /* some hardware/IP supports read to clear
+ * no need to explictly reset the err status after the query call */
+ if (amdgpu_ip_version(adev, MP0_HWIP, 0) != IP_VERSION(11, 0, 2) &&
+ amdgpu_ip_version(adev, MP0_HWIP, 0) != IP_VERSION(11, 0, 4)) {
+ if (amdgpu_ras_reset_error_status(adev, query_info->head.block))
+ dev_warn(adev->dev,
+ "Failed to reset error counter and error status\n");
+ }
+
+ return 0;
+}
+
+/**
+ * amdgpu_ras_query_error_count -- Get error counts of all IPs or specific IP
+ * @adev: pointer to AMD GPU device
+ * @ce_count: pointer to an integer to be set to the count of correctible errors.
+ * @ue_count: pointer to an integer to be set to the count of uncorrectible
+ * errors.
+ * @query_info: pointer to ras_query_if if the query request is only for
+ * specific ip block; if info is NULL, then the qurey request is for
+ * all the ip blocks that support query ras error counters/status
+ *
+ * If set, @ce_count or @ue_count, count and return the corresponding
+ * error counts in those integer pointers. Return 0 if the device
+ * supports RAS. Return -EOPNOTSUPP if the device doesn't support RAS.
+ */
+int amdgpu_ras_query_error_count(struct amdgpu_device *adev,
+ unsigned long *ce_count,
+ unsigned long *ue_count,
+ struct ras_query_if *query_info)
{
struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
struct ras_manager *obj;
- struct ras_err_data data = {0, 0};
+ unsigned long ce, ue;
+ int ret;
- if (!con)
- return 0;
+ if (!adev->ras_enabled || !con)
+ return -EOPNOTSUPP;
- list_for_each_entry(obj, &con->head, node) {
- struct ras_query_if info = {
- .head = obj->head,
- };
+ /* Don't count since no reporting.
+ */
+ if (!ce_count && !ue_count)
+ return 0;
- if (amdgpu_ras_error_query(adev, &info))
- return 0;
+ ce = 0;
+ ue = 0;
+ if (!query_info) {
+ /* query all the ip blocks that support ras query interface */
+ list_for_each_entry(obj, &con->head, node) {
+ struct ras_query_if info = {
+ .head = obj->head,
+ };
- data.ce_count += info.ce_count;
- data.ue_count += info.ue_count;
+ ret = amdgpu_ras_query_error_count_helper(adev, &ce, &ue, &info);
+ }
+ } else {
+ /* query specific ip block */
+ ret = amdgpu_ras_query_error_count_helper(adev, &ce, &ue, query_info);
}
- return is_ce ? data.ce_count : data.ue_count;
+ if (ret)
+ return ret;
+
+ if (ce_count)
+ *ce_count = ce;
+
+ if (ue_count)
+ *ue_count = ue;
+
+ return 0;
}
/* query/inject/cure end */
@@ -936,7 +1833,9 @@ unsigned long amdgpu_ras_query_error_count(struct amdgpu_device *adev,
/* sysfs begin */
static int amdgpu_ras_badpages_read(struct amdgpu_device *adev,
- struct ras_badpage **bps, unsigned int *count);
+ struct ras_badpage *bps, uint32_t count, uint32_t start);
+static int amdgpu_uniras_badpages_read(struct amdgpu_device *adev,
+ struct ras_badpage *bps, uint32_t count, uint32_t start);
static char *amdgpu_ras_badpage_flags_str(unsigned int flags)
{
@@ -982,7 +1881,7 @@ static char *amdgpu_ras_badpage_flags_str(unsigned int flags)
*/
static ssize_t amdgpu_ras_sysfs_badpages_read(struct file *f,
- struct kobject *kobj, struct bin_attribute *attr,
+ struct kobject *kobj, const struct bin_attribute *attr,
char *buf, loff_t ppos, size_t count)
{
struct amdgpu_ras *con =
@@ -994,19 +1893,50 @@ static ssize_t amdgpu_ras_sysfs_badpages_read(struct file *f,
unsigned int end = div64_ul(ppos + count - 1, element_size);
ssize_t s = 0;
struct ras_badpage *bps = NULL;
- unsigned int bps_count = 0;
+ int bps_count = 0, i, status;
+ uint64_t address;
memset(buf, 0, count);
- if (amdgpu_ras_badpages_read(adev, &bps, &bps_count))
+ bps_count = end - start;
+ bps = kmalloc_array(bps_count, sizeof(*bps), GFP_KERNEL);
+ if (!bps)
return 0;
- for (; start < end && start < bps_count; start++)
+ memset(bps, 0, sizeof(*bps) * bps_count);
+
+ if (amdgpu_uniras_enabled(adev))
+ bps_count = amdgpu_uniras_badpages_read(adev, bps, bps_count, start);
+ else
+ bps_count = amdgpu_ras_badpages_read(adev, bps, bps_count, start);
+
+ if (bps_count <= 0) {
+ kfree(bps);
+ return 0;
+ }
+
+ for (i = 0; i < bps_count; i++) {
+ address = ((uint64_t)bps[i].bp) << AMDGPU_GPU_PAGE_SHIFT;
+ if (amdgpu_ras_check_critical_address(adev, address))
+ continue;
+
+ bps[i].size = AMDGPU_GPU_PAGE_SIZE;
+
+ status = amdgpu_vram_mgr_query_page_status(&adev->mman.vram_mgr,
+ address);
+ if (status == -EBUSY)
+ bps[i].flags = AMDGPU_RAS_RETIRE_PAGE_PENDING;
+ else if (status == -ENOENT)
+ bps[i].flags = AMDGPU_RAS_RETIRE_PAGE_FAULT;
+ else
+ bps[i].flags = AMDGPU_RAS_RETIRE_PAGE_RESERVED;
+
s += scnprintf(&buf[s], element_size + 1,
"0x%08x : 0x%08x : %1s\n",
- bps[start].bp,
- bps[start].size,
- amdgpu_ras_badpage_flags_str(bps[start].flags));
+ bps[i].bp,
+ bps[i].size,
+ amdgpu_ras_badpage_flags_str(bps[i].flags));
+ }
kfree(bps);
@@ -1019,23 +1949,106 @@ static ssize_t amdgpu_ras_sysfs_features_read(struct device *dev,
struct amdgpu_ras *con =
container_of(attr, struct amdgpu_ras, features_attr);
- return scnprintf(buf, PAGE_SIZE, "feature mask: 0x%x\n", con->features);
+ return sysfs_emit(buf, "feature mask: 0x%x\n", con->features);
+}
+
+static bool amdgpu_ras_get_version_info(struct amdgpu_device *adev, u32 *major,
+ u32 *minor, u32 *rev)
+{
+ int i;
+
+ if (!adev || !major || !minor || !rev || !amdgpu_uniras_enabled(adev))
+ return false;
+
+ for (i = 0; i < adev->num_ip_blocks; i++) {
+ if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_RAS) {
+ *major = adev->ip_blocks[i].version->major;
+ *minor = adev->ip_blocks[i].version->minor;
+ *rev = adev->ip_blocks[i].version->rev;
+ return true;
+ }
+ }
+
+ return false;
+}
+
+static ssize_t amdgpu_ras_sysfs_version_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ struct amdgpu_ras *con =
+ container_of(attr, struct amdgpu_ras, version_attr);
+ u32 major, minor, rev;
+ ssize_t size = 0;
+
+ size += sysfs_emit_at(buf, size, "table version: 0x%x\n",
+ con->eeprom_control.tbl_hdr.version);
+
+ if (amdgpu_ras_get_version_info(con->adev, &major, &minor, &rev))
+ size += sysfs_emit_at(buf, size, "ras version: %u.%u.%u\n",
+ major, minor, rev);
+
+ return size;
+}
+
+static ssize_t amdgpu_ras_sysfs_schema_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ struct amdgpu_ras *con =
+ container_of(attr, struct amdgpu_ras, schema_attr);
+ return sysfs_emit(buf, "schema: 0x%x\n", con->schema);
+}
+
+static struct {
+ enum ras_event_type type;
+ const char *name;
+} dump_event[] = {
+ {RAS_EVENT_TYPE_FATAL, "Fatal Error"},
+ {RAS_EVENT_TYPE_POISON_CREATION, "Poison Creation"},
+ {RAS_EVENT_TYPE_POISON_CONSUMPTION, "Poison Consumption"},
+};
+
+static ssize_t amdgpu_ras_sysfs_event_state_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ struct amdgpu_ras *con =
+ container_of(attr, struct amdgpu_ras, event_state_attr);
+ struct ras_event_manager *event_mgr = con->event_mgr;
+ struct ras_event_state *event_state;
+ int i, size = 0;
+
+ if (!event_mgr)
+ return -EINVAL;
+
+ size += sysfs_emit_at(buf, size, "current seqno: %llu\n", atomic64_read(&event_mgr->seqno));
+ for (i = 0; i < ARRAY_SIZE(dump_event); i++) {
+ event_state = &event_mgr->event_state[dump_event[i].type];
+ size += sysfs_emit_at(buf, size, "%s: count:%llu, last_seqno:%llu\n",
+ dump_event[i].name,
+ atomic64_read(&event_state->count),
+ event_state->last_seqno);
+ }
+
+ return (ssize_t)size;
}
static void amdgpu_ras_sysfs_remove_bad_page_node(struct amdgpu_device *adev)
{
struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
- sysfs_remove_file_from_group(&adev->dev->kobj,
+ if (adev->dev->kobj.sd)
+ sysfs_remove_file_from_group(&adev->dev->kobj,
&con->badpages_attr.attr,
RAS_FS_NAME);
}
-static int amdgpu_ras_sysfs_remove_feature_node(struct amdgpu_device *adev)
+static int amdgpu_ras_sysfs_remove_dev_attr_node(struct amdgpu_device *adev)
{
struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
struct attribute *attrs[] = {
&con->features_attr.attr,
+ &con->version_attr.attr,
+ &con->schema_attr.attr,
+ &con->event_state_attr.attr,
NULL
};
struct attribute_group group = {
@@ -1043,24 +2056,30 @@ static int amdgpu_ras_sysfs_remove_feature_node(struct amdgpu_device *adev)
.attrs = attrs,
};
- sysfs_remove_group(&adev->dev->kobj, &group);
+ if (adev->dev->kobj.sd)
+ sysfs_remove_group(&adev->dev->kobj, &group);
return 0;
}
int amdgpu_ras_sysfs_create(struct amdgpu_device *adev,
- struct ras_fs_if *head)
+ struct ras_common_if *head)
{
- struct ras_manager *obj = amdgpu_ras_find_obj(adev, &head->head);
+ struct ras_manager *obj = amdgpu_ras_find_obj(adev, head);
+
+ if (amdgpu_aca_is_enabled(adev))
+ return 0;
if (!obj || obj->attr_inuse)
return -EINVAL;
+ if (amdgpu_sriov_vf(adev) && !amdgpu_virt_ras_telemetry_block_en(adev, head->block))
+ return 0;
+
get_obj(obj);
- memcpy(obj->fs_data.sysfs_name,
- head->sysfs_name,
- sizeof(obj->fs_data.sysfs_name));
+ snprintf(obj->fs_data.sysfs_name, sizeof(obj->fs_data.sysfs_name),
+ "%s_err_count", head->name);
obj->sysfs_attr = (struct device_attribute){
.attr = {
@@ -1088,10 +2107,14 @@ int amdgpu_ras_sysfs_remove(struct amdgpu_device *adev,
{
struct ras_manager *obj = amdgpu_ras_find_obj(adev, head);
+ if (amdgpu_aca_is_enabled(adev))
+ return 0;
+
if (!obj || !obj->attr_inuse)
return -EINVAL;
- sysfs_remove_file_from_group(&adev->dev->kobj,
+ if (adev->dev->kobj.sd)
+ sysfs_remove_file_from_group(&adev->dev->kobj,
&obj->sysfs_attr.attr,
RAS_FS_NAME);
obj->attr_inuse = 0;
@@ -1112,7 +2135,7 @@ static int amdgpu_ras_sysfs_remove_all(struct amdgpu_device *adev)
if (amdgpu_bad_page_threshold != 0)
amdgpu_ras_sysfs_remove_bad_page_node(adev);
- amdgpu_ras_sysfs_remove_feature_node(adev);
+ amdgpu_ras_sysfs_remove_dev_attr_node(adev);
return 0;
}
@@ -1137,16 +2160,29 @@ static int amdgpu_ras_sysfs_remove_all(struct amdgpu_device *adev)
*
*/
/* debugfs begin */
-static void amdgpu_ras_debugfs_create_ctrl_node(struct amdgpu_device *adev)
+static struct dentry *amdgpu_ras_debugfs_create_ctrl_node(struct amdgpu_device *adev)
{
struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
- struct drm_minor *minor = adev_to_drm(adev)->primary;
-
- con->dir = debugfs_create_dir(RAS_FS_NAME, minor->debugfs_root);
- debugfs_create_file("ras_ctrl", S_IWUGO | S_IRUGO, con->dir,
- adev, &amdgpu_ras_debugfs_ctrl_ops);
- debugfs_create_file("ras_eeprom_reset", S_IWUGO | S_IRUGO, con->dir,
- adev, &amdgpu_ras_debugfs_eeprom_ops);
+ struct amdgpu_ras_eeprom_control *eeprom = &con->eeprom_control;
+ struct drm_minor *minor = adev_to_drm(adev)->primary;
+ struct dentry *dir;
+
+ dir = debugfs_create_dir(RAS_FS_NAME, minor->debugfs_root);
+ debugfs_create_file("ras_ctrl", S_IWUGO | S_IRUGO, dir, adev,
+ &amdgpu_ras_debugfs_ctrl_ops);
+ debugfs_create_file("ras_eeprom_reset", S_IWUGO | S_IRUGO, dir, adev,
+ &amdgpu_ras_debugfs_eeprom_ops);
+ debugfs_create_u32("bad_page_cnt_threshold", 0444, dir,
+ &con->bad_page_cnt_threshold);
+ debugfs_create_u32("ras_num_recs", 0444, dir, &eeprom->ras_num_recs);
+ debugfs_create_x32("ras_hw_enabled", 0444, dir, &adev->ras_hw_enabled);
+ debugfs_create_x32("ras_enabled", 0444, dir, &adev->ras_enabled);
+ debugfs_create_file("ras_eeprom_size", S_IRUGO, dir, adev,
+ &amdgpu_ras_debugfs_eeprom_size_ops);
+ con->de_ras_eeprom_table = debugfs_create_file("ras_eeprom_table",
+ S_IRUGO, dir, adev,
+ &amdgpu_ras_debugfs_eeprom_table_ops);
+ amdgpu_ras_debugfs_set_ret_size(&con->eeprom_control);
/*
* After one uncorrectable error happens, usually GPU recovery will
@@ -1156,24 +2192,24 @@ static void amdgpu_ras_debugfs_create_ctrl_node(struct amdgpu_device *adev)
* ERREVENT_ATHUB_INTERRUPT generated. Normal GPU recovery routine
* will never be called.
*/
- debugfs_create_bool("auto_reboot", S_IWUGO | S_IRUGO, con->dir,
- &con->reboot);
+ debugfs_create_bool("auto_reboot", S_IWUGO | S_IRUGO, dir, &con->reboot);
/*
* User could set this not to clean up hardware's error count register
* of RAS IPs during ras recovery.
*/
- debugfs_create_bool("disable_ras_err_cnt_harvest", 0644,
- con->dir, &con->disable_ras_err_cnt_harvest);
+ debugfs_create_bool("disable_ras_err_cnt_harvest", 0644, dir,
+ &con->disable_ras_err_cnt_harvest);
+ return dir;
}
static void amdgpu_ras_debugfs_create(struct amdgpu_device *adev,
- struct ras_fs_if *head)
+ struct ras_fs_if *head,
+ struct dentry *dir)
{
- struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
struct ras_manager *obj = amdgpu_ras_find_obj(adev, &head->head);
- if (!obj || obj->ent)
+ if (!obj || !dir)
return;
get_obj(obj);
@@ -1182,14 +2218,32 @@ static void amdgpu_ras_debugfs_create(struct amdgpu_device *adev,
head->debugfs_name,
sizeof(obj->fs_data.debugfs_name));
- obj->ent = debugfs_create_file(obj->fs_data.debugfs_name,
- S_IWUGO | S_IRUGO, con->dir, obj,
- &amdgpu_ras_debugfs_ops);
+ debugfs_create_file(obj->fs_data.debugfs_name, S_IWUGO | S_IRUGO, dir,
+ obj, &amdgpu_ras_debugfs_ops);
+}
+
+static bool amdgpu_ras_aca_is_supported(struct amdgpu_device *adev)
+{
+ bool ret;
+
+ switch (amdgpu_ip_version(adev, MP0_HWIP, 0)) {
+ case IP_VERSION(13, 0, 6):
+ case IP_VERSION(13, 0, 12):
+ case IP_VERSION(13, 0, 14):
+ ret = true;
+ break;
+ default:
+ ret = false;
+ break;
+ }
+
+ return ret;
}
void amdgpu_ras_debugfs_create_all(struct amdgpu_device *adev)
{
struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
+ struct dentry *dir;
struct ras_manager *obj;
struct ras_fs_if fs_info;
@@ -1200,49 +2254,39 @@ void amdgpu_ras_debugfs_create_all(struct amdgpu_device *adev)
if (!IS_ENABLED(CONFIG_DEBUG_FS) || !con)
return;
- amdgpu_ras_debugfs_create_ctrl_node(adev);
+ dir = amdgpu_ras_debugfs_create_ctrl_node(adev);
list_for_each_entry(obj, &con->head, node) {
if (amdgpu_ras_is_supported(adev, obj->head.block) &&
(obj->attr_inuse == 1)) {
sprintf(fs_info.debugfs_name, "%s_err_inject",
- ras_block_str(obj->head.block));
+ get_ras_block_str(&obj->head));
fs_info.head = obj->head;
- amdgpu_ras_debugfs_create(adev, &fs_info);
+ amdgpu_ras_debugfs_create(adev, &fs_info, dir);
}
}
-}
-
-static void amdgpu_ras_debugfs_remove(struct amdgpu_device *adev,
- struct ras_common_if *head)
-{
- struct ras_manager *obj = amdgpu_ras_find_obj(adev, head);
- if (!obj || !obj->ent)
- return;
-
- obj->ent = NULL;
- put_obj(obj);
-}
-
-static void amdgpu_ras_debugfs_remove_all(struct amdgpu_device *adev)
-{
- struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
- struct ras_manager *obj, *tmp;
-
- list_for_each_entry_safe(obj, tmp, &con->head, node) {
- amdgpu_ras_debugfs_remove(adev, &obj->head);
+ if (amdgpu_ras_aca_is_supported(adev)) {
+ if (amdgpu_aca_is_enabled(adev))
+ amdgpu_aca_smu_debugfs_init(adev, dir);
+ else
+ amdgpu_mca_smu_debugfs_init(adev, dir);
}
-
- con->dir = NULL;
}
+
/* debugfs end */
/* ras fs */
-static BIN_ATTR(gpu_vram_bad_pages, S_IRUGO,
- amdgpu_ras_sysfs_badpages_read, NULL, 0);
+static const BIN_ATTR(gpu_vram_bad_pages, S_IRUGO,
+ amdgpu_ras_sysfs_badpages_read, NULL, 0);
static DEVICE_ATTR(features, S_IRUGO,
amdgpu_ras_sysfs_features_read, NULL);
+static DEVICE_ATTR(version, 0444,
+ amdgpu_ras_sysfs_version_show, NULL);
+static DEVICE_ATTR(schema, 0444,
+ amdgpu_ras_sysfs_schema_show, NULL);
+static DEVICE_ATTR(event_state, 0444,
+ amdgpu_ras_sysfs_event_state_show, NULL);
static int amdgpu_ras_fs_init(struct amdgpu_device *adev)
{
struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
@@ -1251,26 +2295,41 @@ static int amdgpu_ras_fs_init(struct amdgpu_device *adev)
};
struct attribute *attrs[] = {
&con->features_attr.attr,
+ &con->version_attr.attr,
+ &con->schema_attr.attr,
+ &con->event_state_attr.attr,
NULL
};
- struct bin_attribute *bin_attrs[] = {
+ const struct bin_attribute *bin_attrs[] = {
NULL,
NULL,
};
int r;
+ group.attrs = attrs;
+
/* add features entry */
con->features_attr = dev_attr_features;
- group.attrs = attrs;
sysfs_attr_init(attrs[0]);
+ /* add version entry */
+ con->version_attr = dev_attr_version;
+ sysfs_attr_init(attrs[1]);
+
+ /* add schema entry */
+ con->schema_attr = dev_attr_schema;
+ sysfs_attr_init(attrs[2]);
+
+ /* add event_state entry */
+ con->event_state_attr = dev_attr_event_state;
+ sysfs_attr_init(attrs[3]);
+
if (amdgpu_bad_page_threshold != 0) {
/* add bad_page_features entry */
- bin_attr_gpu_vram_bad_pages.private = NULL;
con->badpages_attr = bin_attr_gpu_vram_bad_pages;
+ sysfs_bin_attr_init(&con->badpages_attr);
bin_attrs[0] = &con->badpages_attr;
group.bin_attrs = bin_attrs;
- sysfs_bin_attr_init(bin_attrs[0]);
}
r = sysfs_create_group(&adev->dev->kobj, &group);
@@ -1282,20 +2341,179 @@ static int amdgpu_ras_fs_init(struct amdgpu_device *adev)
static int amdgpu_ras_fs_fini(struct amdgpu_device *adev)
{
- if (IS_ENABLED(CONFIG_DEBUG_FS))
- amdgpu_ras_debugfs_remove_all(adev);
+ struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
+ struct ras_manager *con_obj, *ip_obj, *tmp;
+
+ if (IS_ENABLED(CONFIG_DEBUG_FS)) {
+ list_for_each_entry_safe(con_obj, tmp, &con->head, node) {
+ ip_obj = amdgpu_ras_find_obj(adev, &con_obj->head);
+ if (ip_obj)
+ put_obj(ip_obj);
+ }
+ }
+
amdgpu_ras_sysfs_remove_all(adev);
return 0;
}
/* ras fs end */
/* ih begin */
+
+/* For the hardware that cannot enable bif ring for both ras_controller_irq
+ * and ras_err_evnet_athub_irq ih cookies, the driver has to poll status
+ * register to check whether the interrupt is triggered or not, and properly
+ * ack the interrupt if it is there
+ */
+void amdgpu_ras_interrupt_fatal_error_handler(struct amdgpu_device *adev)
+{
+ /* Fatal error events are handled on host side */
+ if (amdgpu_sriov_vf(adev))
+ return;
+ /*
+ * If the current interrupt is caused by a non-fatal RAS error, skip
+ * check for fatal error. For fatal errors, FED status of all devices
+ * in XGMI hive gets set when the first device gets fatal error
+ * interrupt. The error gets propagated to other devices as well, so
+ * make sure to ack the interrupt regardless of FED status.
+ */
+ if (!amdgpu_ras_get_fed_status(adev) &&
+ amdgpu_ras_is_err_state(adev, AMDGPU_RAS_BLOCK__ANY))
+ return;
+
+ if (amdgpu_uniras_enabled(adev)) {
+ amdgpu_ras_mgr_handle_fatal_interrupt(adev, NULL);
+ return;
+ }
+
+ if (adev->nbio.ras &&
+ adev->nbio.ras->handle_ras_controller_intr_no_bifring)
+ adev->nbio.ras->handle_ras_controller_intr_no_bifring(adev);
+
+ if (adev->nbio.ras &&
+ adev->nbio.ras->handle_ras_err_event_athub_intr_no_bifring)
+ adev->nbio.ras->handle_ras_err_event_athub_intr_no_bifring(adev);
+}
+
+static void amdgpu_ras_interrupt_poison_consumption_handler(struct ras_manager *obj,
+ struct amdgpu_iv_entry *entry)
+{
+ bool poison_stat = false;
+ struct amdgpu_device *adev = obj->adev;
+ struct amdgpu_ras_block_object *block_obj =
+ amdgpu_ras_get_ras_block(adev, obj->head.block, 0);
+ struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
+ enum ras_event_type type = RAS_EVENT_TYPE_POISON_CONSUMPTION;
+ u64 event_id;
+ int ret;
+
+ if (!block_obj || !con)
+ return;
+
+ ret = amdgpu_ras_mark_ras_event(adev, type);
+ if (ret)
+ return;
+
+ amdgpu_ras_set_err_poison(adev, block_obj->ras_comm.block);
+ /* both query_poison_status and handle_poison_consumption are optional,
+ * but at least one of them should be implemented if we need poison
+ * consumption handler
+ */
+ if (block_obj->hw_ops && block_obj->hw_ops->query_poison_status) {
+ poison_stat = block_obj->hw_ops->query_poison_status(adev);
+ if (!poison_stat) {
+ /* Not poison consumption interrupt, no need to handle it */
+ dev_info(adev->dev, "No RAS poison status in %s poison IH.\n",
+ block_obj->ras_comm.name);
+
+ return;
+ }
+ }
+
+ amdgpu_umc_poison_handler(adev, obj->head.block, 0);
+
+ if (block_obj->hw_ops && block_obj->hw_ops->handle_poison_consumption)
+ poison_stat = block_obj->hw_ops->handle_poison_consumption(adev);
+
+ /* gpu reset is fallback for failed and default cases.
+ * For RMA case, amdgpu_umc_poison_handler will handle gpu reset.
+ */
+ if (poison_stat && !amdgpu_ras_is_rma(adev)) {
+ event_id = amdgpu_ras_acquire_event_id(adev, type);
+ RAS_EVENT_LOG(adev, event_id,
+ "GPU reset for %s RAS poison consumption is issued!\n",
+ block_obj->ras_comm.name);
+ amdgpu_ras_reset_gpu(adev);
+ }
+
+ if (!poison_stat)
+ amdgpu_gfx_poison_consumption_handler(adev, entry);
+}
+
+static void amdgpu_ras_interrupt_poison_creation_handler(struct ras_manager *obj,
+ struct amdgpu_iv_entry *entry)
+{
+ struct amdgpu_device *adev = obj->adev;
+ enum ras_event_type type = RAS_EVENT_TYPE_POISON_CREATION;
+ u64 event_id;
+ int ret;
+
+ ret = amdgpu_ras_mark_ras_event(adev, type);
+ if (ret)
+ return;
+
+ event_id = amdgpu_ras_acquire_event_id(adev, type);
+ RAS_EVENT_LOG(adev, event_id, "Poison is created\n");
+
+ if (amdgpu_ip_version(obj->adev, UMC_HWIP, 0) >= IP_VERSION(12, 0, 0)) {
+ struct amdgpu_ras *con = amdgpu_ras_get_context(obj->adev);
+
+ atomic_inc(&con->page_retirement_req_cnt);
+ atomic_inc(&con->poison_creation_count);
+
+ wake_up(&con->page_retirement_wq);
+ }
+}
+
+static void amdgpu_ras_interrupt_umc_handler(struct ras_manager *obj,
+ struct amdgpu_iv_entry *entry)
+{
+ struct ras_ih_data *data = &obj->ih_data;
+ struct ras_err_data err_data;
+ int ret;
+
+ if (!data->cb)
+ return;
+
+ ret = amdgpu_ras_error_data_init(&err_data);
+ if (ret)
+ return;
+
+ /* Let IP handle its data, maybe we need get the output
+ * from the callback to update the error type/count, etc
+ */
+ amdgpu_ras_set_fed(obj->adev, true);
+ ret = data->cb(obj->adev, &err_data, entry);
+ /* ue will trigger an interrupt, and in that case
+ * we need do a reset to recovery the whole system.
+ * But leave IP do that recovery, here we just dispatch
+ * the error.
+ */
+ if (ret == AMDGPU_RAS_SUCCESS) {
+ /* these counts could be left as 0 if
+ * some blocks do not count error number
+ */
+ obj->err_data.ue_count += err_data.ue_count;
+ obj->err_data.ce_count += err_data.ce_count;
+ obj->err_data.de_count += err_data.de_count;
+ }
+
+ amdgpu_ras_error_data_fini(&err_data);
+}
+
static void amdgpu_ras_interrupt_handler(struct ras_manager *obj)
{
struct ras_ih_data *data = &obj->ih_data;
struct amdgpu_iv_entry entry;
- int ret;
- struct ras_err_data err_data = {0, 0, 0, NULL};
while (data->rptr != data->wptr) {
rmb();
@@ -1306,23 +2524,17 @@ static void amdgpu_ras_interrupt_handler(struct ras_manager *obj)
data->rptr = (data->aligned_element_size +
data->rptr) % data->ring_size;
- /* Let IP handle its data, maybe we need get the output
- * from the callback to udpate the error type/count, etc
- */
- if (data->cb) {
- ret = data->cb(obj->adev, &err_data, &entry);
- /* ue will trigger an interrupt, and in that case
- * we need do a reset to recovery the whole system.
- * But leave IP do that recovery, here we just dispatch
- * the error.
- */
- if (ret == AMDGPU_RAS_SUCCESS) {
- /* these counts could be left as 0 if
- * some blocks do not count error number
- */
- obj->err_data.ue_count += err_data.ue_count;
- obj->err_data.ce_count += err_data.ce_count;
- }
+ if (amdgpu_ras_is_poison_mode_supported(obj->adev)) {
+ if (obj->head.block == AMDGPU_RAS_BLOCK__UMC)
+ amdgpu_ras_interrupt_poison_creation_handler(obj, &entry);
+ else
+ amdgpu_ras_interrupt_poison_consumption_handler(obj, &entry);
+ } else {
+ if (obj->head.block == AMDGPU_RAS_BLOCK__UMC)
+ amdgpu_ras_interrupt_umc_handler(obj, &entry);
+ else
+ dev_warn(obj->adev->dev,
+ "No RAS interrupt handler for non-UMC block with poison disabled.\n");
}
}
}
@@ -1340,12 +2552,25 @@ static void amdgpu_ras_interrupt_process_handler(struct work_struct *work)
int amdgpu_ras_interrupt_dispatch(struct amdgpu_device *adev,
struct ras_dispatch_if *info)
{
- struct ras_manager *obj = amdgpu_ras_find_obj(adev, &info->head);
- struct ras_ih_data *data = &obj->ih_data;
+ struct ras_manager *obj;
+ struct ras_ih_data *data;
+ if (amdgpu_uniras_enabled(adev)) {
+ struct ras_ih_info ih_info;
+
+ memset(&ih_info, 0, sizeof(ih_info));
+ ih_info.block = info->head.block;
+ memcpy(&ih_info.iv_entry, info->entry, sizeof(struct amdgpu_iv_entry));
+
+ return amdgpu_ras_mgr_handle_controller_interrupt(adev, &ih_info);
+ }
+
+ obj = amdgpu_ras_find_obj(adev, &info->head);
if (!obj)
return -EINVAL;
+ data = &obj->ih_data;
+
if (data->inuse == 0)
return 0;
@@ -1363,9 +2588,9 @@ int amdgpu_ras_interrupt_dispatch(struct amdgpu_device *adev,
}
int amdgpu_ras_interrupt_remove_handler(struct amdgpu_device *adev,
- struct ras_ih_if *info)
+ struct ras_common_if *head)
{
- struct ras_manager *obj = amdgpu_ras_find_obj(adev, &info->head);
+ struct ras_manager *obj = amdgpu_ras_find_obj(adev, head);
struct ras_ih_data *data;
if (!obj)
@@ -1385,24 +2610,27 @@ int amdgpu_ras_interrupt_remove_handler(struct amdgpu_device *adev,
}
int amdgpu_ras_interrupt_add_handler(struct amdgpu_device *adev,
- struct ras_ih_if *info)
+ struct ras_common_if *head)
{
- struct ras_manager *obj = amdgpu_ras_find_obj(adev, &info->head);
+ struct ras_manager *obj = amdgpu_ras_find_obj(adev, head);
struct ras_ih_data *data;
+ struct amdgpu_ras_block_object *ras_obj;
if (!obj) {
/* in case we registe the IH before enable ras feature */
- obj = amdgpu_ras_create_obj(adev, &info->head);
+ obj = amdgpu_ras_create_obj(adev, head);
if (!obj)
return -EINVAL;
} else
get_obj(obj);
+ ras_obj = container_of(head, struct amdgpu_ras_block_object, ras_comm);
+
data = &obj->ih_data;
/* add the callback.etc */
*data = (struct ras_ih_data) {
.inuse = 0,
- .cb = info->cb,
+ .cb = ras_obj->ras_cb,
.element_size = sizeof(struct amdgpu_iv_entry),
.rptr = 0,
.wptr = 0,
@@ -1431,10 +2659,7 @@ static int amdgpu_ras_interrupt_remove_all(struct amdgpu_device *adev)
struct ras_manager *obj, *tmp;
list_for_each_entry_safe(obj, tmp, &con->head, node) {
- struct ras_ih_if info = {
- .head = obj->head,
- };
- amdgpu_ras_interrupt_remove_handler(adev, &info);
+ amdgpu_ras_interrupt_remove_handler(adev, &obj->head);
}
return 0;
@@ -1442,12 +2667,12 @@ static int amdgpu_ras_interrupt_remove_all(struct amdgpu_device *adev)
/* ih end */
/* traversal all IPs except NBIO to query error counter */
-static void amdgpu_ras_log_on_err_counter(struct amdgpu_device *adev)
+static void amdgpu_ras_log_on_err_counter(struct amdgpu_device *adev, enum ras_event_type type)
{
struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
struct ras_manager *obj;
- if (!con)
+ if (!adev->ras_enabled || !con)
return;
list_for_each_entry(obj, &con->head, node) {
@@ -1464,7 +2689,28 @@ static void amdgpu_ras_log_on_err_counter(struct amdgpu_device *adev)
if (info.head.block == AMDGPU_RAS_BLOCK__PCIE_BIF)
continue;
- amdgpu_ras_error_query(adev, &info);
+ /*
+ * this is a workaround for aldebaran, skip send msg to
+ * smu to get ecc_info table due to smu handle get ecc
+ * info table failed temporarily.
+ * should be removed until smu fix handle ecc_info table.
+ */
+ if ((info.head.block == AMDGPU_RAS_BLOCK__UMC) &&
+ (amdgpu_ip_version(adev, MP1_HWIP, 0) ==
+ IP_VERSION(13, 0, 2)))
+ continue;
+
+ amdgpu_ras_query_error_status_with_event(adev, &info, type);
+
+ if (amdgpu_ip_version(adev, MP0_HWIP, 0) !=
+ IP_VERSION(11, 0, 2) &&
+ amdgpu_ip_version(adev, MP0_HWIP, 0) !=
+ IP_VERSION(11, 0, 4) &&
+ amdgpu_ip_version(adev, MP0_HWIP, 0) !=
+ IP_VERSION(13, 0, 0)) {
+ if (amdgpu_ras_reset_error_status(adev, info.head.block))
+ dev_warn(adev->dev, "Failed to reset error counter and error status");
+ }
}
}
@@ -1472,22 +2718,28 @@ static void amdgpu_ras_log_on_err_counter(struct amdgpu_device *adev)
static void amdgpu_ras_error_status_query(struct amdgpu_device *adev,
struct ras_query_if *info)
{
+ struct amdgpu_ras_block_object *block_obj;
/*
* Only two block need to query read/write
* RspStatus at current state
*/
- switch (info->head.block) {
- case AMDGPU_RAS_BLOCK__GFX:
- if (adev->gfx.funcs->query_ras_error_status)
- adev->gfx.funcs->query_ras_error_status(adev);
- break;
- case AMDGPU_RAS_BLOCK__MMHUB:
- if (adev->mmhub.funcs->query_ras_error_status)
- adev->mmhub.funcs->query_ras_error_status(adev);
- break;
- default:
- break;
+ if ((info->head.block != AMDGPU_RAS_BLOCK__GFX) &&
+ (info->head.block != AMDGPU_RAS_BLOCK__MMHUB))
+ return;
+
+ block_obj = amdgpu_ras_get_ras_block(adev,
+ info->head.block,
+ info->head.sub_block_index);
+
+ if (!block_obj || !block_obj->hw_ops) {
+ dev_dbg_once(adev->dev, "%s doesn't config RAS function\n",
+ get_ras_block_str(&info->head));
+ return;
}
+
+ if (block_obj->hw_ops->query_ras_error_status)
+ block_obj->hw_ops->query_ras_error_status(adev);
+
}
static void amdgpu_ras_query_err_status(struct amdgpu_device *adev)
@@ -1495,7 +2747,7 @@ static void amdgpu_ras_query_err_status(struct amdgpu_device *adev)
struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
struct ras_manager *obj;
- if (!con)
+ if (!adev->ras_enabled || !con)
return;
list_for_each_entry(obj, &con->head, node) {
@@ -1507,55 +2759,121 @@ static void amdgpu_ras_query_err_status(struct amdgpu_device *adev)
}
}
-/* recovery begin */
-
-/* return 0 on success.
- * caller need free bps.
- */
static int amdgpu_ras_badpages_read(struct amdgpu_device *adev,
- struct ras_badpage **bps, unsigned int *count)
+ struct ras_badpage *bps, uint32_t count, uint32_t start)
{
struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
struct ras_err_handler_data *data;
- int i = 0;
- int ret = 0, status;
+ int r = 0;
+ uint32_t i;
if (!con || !con->eh_data || !bps || !count)
return -EINVAL;
mutex_lock(&con->recovery_lock);
data = con->eh_data;
- if (!data || data->count == 0) {
- *bps = NULL;
- ret = -EINVAL;
- goto out;
+ if (start < data->count) {
+ for (i = start; i < data->count; i++) {
+ if (!data->bps[i].ts)
+ continue;
+
+ bps[r].bp = data->bps[i].retired_page;
+ r++;
+ if (r >= count)
+ break;
+ }
}
+ mutex_unlock(&con->recovery_lock);
- *bps = kmalloc(sizeof(struct ras_badpage) * data->count, GFP_KERNEL);
- if (!*bps) {
- ret = -ENOMEM;
- goto out;
- }
+ return r;
+}
- for (; i < data->count; i++) {
- (*bps)[i] = (struct ras_badpage){
- .bp = data->bps[i].retired_page,
- .size = AMDGPU_GPU_PAGE_SIZE,
- .flags = AMDGPU_RAS_RETIRE_PAGE_RESERVED,
- };
- status = amdgpu_vram_mgr_query_page_status(
- ttm_manager_type(&adev->mman.bdev, TTM_PL_VRAM),
- data->bps[i].retired_page);
- if (status == -EBUSY)
- (*bps)[i].flags = AMDGPU_RAS_RETIRE_PAGE_PENDING;
- else if (status == -ENOENT)
- (*bps)[i].flags = AMDGPU_RAS_RETIRE_PAGE_FAULT;
+static int amdgpu_uniras_badpages_read(struct amdgpu_device *adev,
+ struct ras_badpage *bps, uint32_t count, uint32_t start)
+{
+ struct ras_cmd_bad_pages_info_req cmd_input;
+ struct ras_cmd_bad_pages_info_rsp *output;
+ uint32_t group, start_group, end_group;
+ uint32_t pos, pos_in_group;
+ int r = 0, i;
+
+ if (!bps || !count)
+ return -EINVAL;
+
+ output = kmalloc(sizeof(*output), GFP_KERNEL);
+ if (!output)
+ return -ENOMEM;
+
+ memset(&cmd_input, 0, sizeof(cmd_input));
+
+ start_group = start / RAS_CMD_MAX_BAD_PAGES_PER_GROUP;
+ end_group = (start + count + RAS_CMD_MAX_BAD_PAGES_PER_GROUP - 1) /
+ RAS_CMD_MAX_BAD_PAGES_PER_GROUP;
+
+ pos = start;
+ for (group = start_group; group < end_group; group++) {
+ memset(output, 0, sizeof(*output));
+ cmd_input.group_index = group;
+ if (amdgpu_ras_mgr_handle_ras_cmd(adev, RAS_CMD__GET_BAD_PAGES,
+ &cmd_input, sizeof(cmd_input), output, sizeof(*output)))
+ goto out;
+
+ if (pos >= output->bp_total_cnt)
+ goto out;
+
+ pos_in_group = pos - group * RAS_CMD_MAX_BAD_PAGES_PER_GROUP;
+ for (i = pos_in_group; i < output->bp_in_group; i++, pos++) {
+ if (!output->records[i].ts)
+ continue;
+
+ bps[r].bp = output->records[i].retired_page;
+ r++;
+ if (r >= count)
+ goto out;
+ }
}
- *count = data->count;
out:
- mutex_unlock(&con->recovery_lock);
- return ret;
+ kfree(output);
+ return r;
+}
+
+static void amdgpu_ras_set_fed_all(struct amdgpu_device *adev,
+ struct amdgpu_hive_info *hive, bool status)
+{
+ struct amdgpu_device *tmp_adev;
+
+ if (hive) {
+ list_for_each_entry(tmp_adev, &hive->device_list, gmc.xgmi.head)
+ amdgpu_ras_set_fed(tmp_adev, status);
+ } else {
+ amdgpu_ras_set_fed(adev, status);
+ }
+}
+
+bool amdgpu_ras_in_recovery(struct amdgpu_device *adev)
+{
+ struct amdgpu_hive_info *hive = amdgpu_get_xgmi_hive(adev);
+ struct amdgpu_ras *ras = amdgpu_ras_get_context(adev);
+ int hive_ras_recovery = 0;
+
+ if (hive) {
+ hive_ras_recovery = atomic_read(&hive->ras_recovery);
+ amdgpu_put_xgmi_hive(hive);
+ }
+
+ if (ras && (atomic_read(&ras->in_recovery) || hive_ras_recovery))
+ return true;
+
+ return false;
+}
+
+static enum ras_event_type amdgpu_ras_get_fatal_error_event(struct amdgpu_device *adev)
+{
+ if (amdgpu_ras_intr_triggered())
+ return RAS_EVENT_TYPE_FATAL;
+ else
+ return RAS_EVENT_TYPE_POISON_CONSUMPTION;
}
static void amdgpu_ras_do_recovery(struct work_struct *work)
@@ -1565,9 +2883,26 @@ static void amdgpu_ras_do_recovery(struct work_struct *work)
struct amdgpu_device *remote_adev = NULL;
struct amdgpu_device *adev = ras->adev;
struct list_head device_list, *device_list_handle = NULL;
+ struct amdgpu_hive_info *hive = amdgpu_get_xgmi_hive(adev);
+ unsigned int error_query_mode;
+ enum ras_event_type type;
+
+ if (hive) {
+ atomic_set(&hive->ras_recovery, 1);
+ /* If any device which is part of the hive received RAS fatal
+ * error interrupt, set fatal error status on all. This
+ * condition will need a recovery, and flag will be cleared
+ * as part of recovery.
+ */
+ list_for_each_entry(remote_adev, &hive->device_list,
+ gmc.xgmi.head)
+ if (amdgpu_ras_get_fed_status(remote_adev)) {
+ amdgpu_ras_set_fed_all(adev, hive, true);
+ break;
+ }
+ }
if (!ras->disable_ras_err_cnt_harvest) {
- struct amdgpu_hive_info *hive = amdgpu_get_xgmi_hive(adev);
/* Build list of devices to query RAS related errors */
if (hive && adev->gmc.xgmi.num_physical_nodes > 1) {
@@ -1578,18 +2913,64 @@ static void amdgpu_ras_do_recovery(struct work_struct *work)
device_list_handle = &device_list;
}
+ if (amdgpu_ras_get_error_query_mode(adev, &error_query_mode)) {
+ if (error_query_mode == AMDGPU_RAS_FIRMWARE_ERROR_QUERY) {
+ /* wait 500ms to ensure pmfw polling mca bank info done */
+ msleep(500);
+ }
+ }
+
+ type = amdgpu_ras_get_fatal_error_event(adev);
list_for_each_entry(remote_adev,
device_list_handle, gmc.xgmi.head) {
- amdgpu_ras_query_err_status(remote_adev);
- amdgpu_ras_log_on_err_counter(remote_adev);
+ if (amdgpu_uniras_enabled(remote_adev)) {
+ amdgpu_ras_mgr_update_ras_ecc(remote_adev);
+ } else {
+ amdgpu_ras_query_err_status(remote_adev);
+ amdgpu_ras_log_on_err_counter(remote_adev, type);
+ }
}
- amdgpu_put_xgmi_hive(hive);
}
- if (amdgpu_device_should_recover_gpu(ras->adev))
- amdgpu_device_gpu_recover(ras->adev, NULL);
+ if (amdgpu_device_should_recover_gpu(ras->adev)) {
+ struct amdgpu_reset_context reset_context;
+ memset(&reset_context, 0, sizeof(reset_context));
+
+ reset_context.method = AMD_RESET_METHOD_NONE;
+ reset_context.reset_req_dev = adev;
+ reset_context.src = AMDGPU_RESET_SRC_RAS;
+ set_bit(AMDGPU_SKIP_COREDUMP, &reset_context.flags);
+
+ /* Perform full reset in fatal error mode */
+ if (!amdgpu_ras_is_poison_mode_supported(ras->adev))
+ set_bit(AMDGPU_NEED_FULL_RESET, &reset_context.flags);
+ else {
+ clear_bit(AMDGPU_NEED_FULL_RESET, &reset_context.flags);
+
+ if (ras->gpu_reset_flags & AMDGPU_RAS_GPU_RESET_MODE2_RESET) {
+ ras->gpu_reset_flags &= ~AMDGPU_RAS_GPU_RESET_MODE2_RESET;
+ reset_context.method = AMD_RESET_METHOD_MODE2;
+ }
+
+ /* Fatal error occurs in poison mode, mode1 reset is used to
+ * recover gpu.
+ */
+ if (ras->gpu_reset_flags & AMDGPU_RAS_GPU_RESET_MODE1_RESET) {
+ ras->gpu_reset_flags &= ~AMDGPU_RAS_GPU_RESET_MODE1_RESET;
+ set_bit(AMDGPU_NEED_FULL_RESET, &reset_context.flags);
+
+ psp_fatal_error_recovery_quirk(&adev->psp);
+ }
+ }
+
+ amdgpu_device_gpu_recover(ras->adev, NULL, &reset_context);
+ }
atomic_set(&ras->in_recovery, 0);
+ if (hive) {
+ atomic_set(&hive->ras_recovery, 0);
+ amdgpu_put_xgmi_hive(hive);
+ }
}
/* alloc/realloc bps array */
@@ -1599,10 +2980,9 @@ static int amdgpu_ras_realloc_eh_data_space(struct amdgpu_device *adev,
unsigned int old_space = data->count + data->space_left;
unsigned int new_space = old_space + pages;
unsigned int align_space = ALIGN(new_space, 512);
- void *bps = kmalloc(align_space * sizeof(*data->bps), GFP_KERNEL);
+ void *bps = kmalloc_array(align_space, sizeof(*data->bps), GFP_KERNEL);
if (!bps) {
- kfree(bps);
return -ENOMEM;
}
@@ -1617,44 +2997,283 @@ static int amdgpu_ras_realloc_eh_data_space(struct amdgpu_device *adev,
return 0;
}
-/* it deal with vram only. */
-int amdgpu_ras_add_bad_pages(struct amdgpu_device *adev,
- struct eeprom_table_record *bps, int pages)
+static int amdgpu_ras_mca2pa_by_idx(struct amdgpu_device *adev,
+ struct eeprom_table_record *bps,
+ struct ras_err_data *err_data)
{
- struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
- struct ras_err_handler_data *data;
+ struct ta_ras_query_address_input addr_in;
+ uint32_t socket = 0;
int ret = 0;
- uint32_t i;
- if (!con || !con->eh_data || !bps || pages <= 0)
- return 0;
+ if (adev->smuio.funcs && adev->smuio.funcs->get_socket_id)
+ socket = adev->smuio.funcs->get_socket_id(adev);
- mutex_lock(&con->recovery_lock);
- data = con->eh_data;
- if (!data)
- goto out;
+ /* reinit err_data */
+ err_data->err_addr_cnt = 0;
+ err_data->err_addr_len = adev->umc.retire_unit;
+
+ memset(&addr_in, 0, sizeof(addr_in));
+ addr_in.ma.err_addr = bps->address;
+ addr_in.ma.socket_id = socket;
+ addr_in.ma.ch_inst = bps->mem_channel;
+ if (!amdgpu_ras_smu_eeprom_supported(adev)) {
+ /* tell RAS TA the node instance is not used */
+ addr_in.ma.node_inst = TA_RAS_INV_NODE;
+ } else {
+ addr_in.ma.umc_inst = bps->mcumc_id;
+ addr_in.ma.node_inst = bps->cu;
+ }
+
+ if (adev->umc.ras && adev->umc.ras->convert_ras_err_addr)
+ ret = adev->umc.ras->convert_ras_err_addr(adev, err_data,
+ &addr_in, NULL, false);
+
+ return ret;
+}
+
+static int amdgpu_ras_mca2pa(struct amdgpu_device *adev,
+ struct eeprom_table_record *bps,
+ struct ras_err_data *err_data)
+{
+ struct ta_ras_query_address_input addr_in;
+ uint32_t die_id, socket = 0;
+
+ if (adev->smuio.funcs && adev->smuio.funcs->get_socket_id)
+ socket = adev->smuio.funcs->get_socket_id(adev);
+
+ /* although die id is gotten from PA in nps1 mode, the id is
+ * fitable for any nps mode
+ */
+ if (adev->umc.ras && adev->umc.ras->get_die_id_from_pa)
+ die_id = adev->umc.ras->get_die_id_from_pa(adev, bps->address,
+ bps->retired_page << AMDGPU_GPU_PAGE_SHIFT);
+ else
+ return -EINVAL;
+
+ /* reinit err_data */
+ err_data->err_addr_cnt = 0;
+ err_data->err_addr_len = adev->umc.retire_unit;
+
+ memset(&addr_in, 0, sizeof(addr_in));
+ addr_in.ma.err_addr = bps->address;
+ addr_in.ma.ch_inst = bps->mem_channel;
+ addr_in.ma.umc_inst = bps->mcumc_id;
+ addr_in.ma.node_inst = die_id;
+ addr_in.ma.socket_id = socket;
+
+ if (adev->umc.ras && adev->umc.ras->convert_ras_err_addr)
+ return adev->umc.ras->convert_ras_err_addr(adev, err_data,
+ &addr_in, NULL, false);
+ else
+ return -EINVAL;
+}
+
+static int __amdgpu_ras_restore_bad_pages(struct amdgpu_device *adev,
+ struct eeprom_table_record *bps, int count)
+{
+ int j;
+ struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
+ struct ras_err_handler_data *data = con->eh_data;
- for (i = 0; i < pages; i++) {
+ for (j = 0; j < count; j++) {
if (amdgpu_ras_check_bad_page_unlock(con,
- bps[i].retired_page << AMDGPU_GPU_PAGE_SHIFT))
+ bps[j].retired_page << AMDGPU_GPU_PAGE_SHIFT)) {
+ data->count++;
+ data->space_left--;
continue;
+ }
if (!data->space_left &&
- amdgpu_ras_realloc_eh_data_space(adev, data, 256)) {
- ret = -ENOMEM;
- goto out;
+ amdgpu_ras_realloc_eh_data_space(adev, data, 256)) {
+ return -ENOMEM;
}
- amdgpu_vram_mgr_reserve_range(
- ttm_manager_type(&adev->mman.bdev, TTM_PL_VRAM),
- bps[i].retired_page << AMDGPU_GPU_PAGE_SHIFT,
- AMDGPU_GPU_PAGE_SIZE);
+ amdgpu_ras_reserve_page(adev, bps[j].retired_page);
- memcpy(&data->bps[data->count], &bps[i], sizeof(*data->bps));
+ memcpy(&data->bps[data->count], &(bps[j]),
+ sizeof(struct eeprom_table_record));
data->count++;
data->space_left--;
+ con->bad_page_num++;
+ }
+
+ return 0;
+}
+
+static int __amdgpu_ras_convert_rec_array_from_rom(struct amdgpu_device *adev,
+ struct eeprom_table_record *bps, struct ras_err_data *err_data,
+ enum amdgpu_memory_partition nps)
+{
+ int i = 0;
+ enum amdgpu_memory_partition save_nps;
+
+ save_nps = (bps[0].retired_page >> UMC_NPS_SHIFT) & UMC_NPS_MASK;
+
+ /*old asics just have pa in eeprom*/
+ if (IP_VERSION_MAJ(amdgpu_ip_version(adev, UMC_HWIP, 0)) < 12) {
+ memcpy(err_data->err_addr, bps,
+ sizeof(struct eeprom_table_record) * adev->umc.retire_unit);
+ goto out;
+ }
+
+ for (i = 0; i < adev->umc.retire_unit; i++)
+ bps[i].retired_page &= ~(UMC_NPS_MASK << UMC_NPS_SHIFT);
+
+ if (save_nps) {
+ if (save_nps == nps) {
+ if (amdgpu_umc_pages_in_a_row(adev, err_data,
+ bps[0].retired_page << AMDGPU_GPU_PAGE_SHIFT))
+ return -EINVAL;
+ for (i = 0; i < adev->umc.retire_unit; i++) {
+ err_data->err_addr[i].address = bps[0].address;
+ err_data->err_addr[i].mem_channel = bps[0].mem_channel;
+ err_data->err_addr[i].bank = bps[0].bank;
+ err_data->err_addr[i].err_type = bps[0].err_type;
+ err_data->err_addr[i].mcumc_id = bps[0].mcumc_id;
+ }
+ } else {
+ if (amdgpu_ras_mca2pa_by_idx(adev, &bps[0], err_data))
+ return -EINVAL;
+ }
+ } else {
+ if (bps[0].address == 0) {
+ /* for specific old eeprom data, mca address is not stored,
+ * calc it from pa
+ */
+ if (amdgpu_umc_pa2mca(adev, bps[0].retired_page << AMDGPU_GPU_PAGE_SHIFT,
+ &(bps[0].address), AMDGPU_NPS1_PARTITION_MODE))
+ return -EINVAL;
+ }
+
+ if (amdgpu_ras_mca2pa(adev, &bps[0], err_data)) {
+ if (nps == AMDGPU_NPS1_PARTITION_MODE)
+ memcpy(err_data->err_addr, bps,
+ sizeof(struct eeprom_table_record) * adev->umc.retire_unit);
+ else
+ return -EOPNOTSUPP;
+ }
}
+
out:
+ return __amdgpu_ras_restore_bad_pages(adev, err_data->err_addr, adev->umc.retire_unit);
+}
+
+static int __amdgpu_ras_convert_rec_from_rom(struct amdgpu_device *adev,
+ struct eeprom_table_record *bps, struct ras_err_data *err_data,
+ enum amdgpu_memory_partition nps)
+{
+ int i = 0;
+ enum amdgpu_memory_partition save_nps;
+
+ if (!amdgpu_ras_smu_eeprom_supported(adev)) {
+ save_nps = (bps->retired_page >> UMC_NPS_SHIFT) & UMC_NPS_MASK;
+ bps->retired_page &= ~(UMC_NPS_MASK << UMC_NPS_SHIFT);
+ } else {
+ /* if pmfw manages eeprom, save_nps is not stored on eeprom,
+ * we should always convert mca address into physical address,
+ * make save_nps different from nps
+ */
+ save_nps = nps + 1;
+ }
+
+ if (save_nps == nps) {
+ if (amdgpu_umc_pages_in_a_row(adev, err_data,
+ bps->retired_page << AMDGPU_GPU_PAGE_SHIFT))
+ return -EINVAL;
+ for (i = 0; i < adev->umc.retire_unit; i++) {
+ err_data->err_addr[i].address = bps->address;
+ err_data->err_addr[i].mem_channel = bps->mem_channel;
+ err_data->err_addr[i].bank = bps->bank;
+ err_data->err_addr[i].err_type = bps->err_type;
+ err_data->err_addr[i].mcumc_id = bps->mcumc_id;
+ }
+ } else {
+ if (bps->address) {
+ if (amdgpu_ras_mca2pa_by_idx(adev, bps, err_data))
+ return -EINVAL;
+ } else {
+ /* for specific old eeprom data, mca address is not stored,
+ * calc it from pa
+ */
+ if (amdgpu_umc_pa2mca(adev, bps->retired_page << AMDGPU_GPU_PAGE_SHIFT,
+ &(bps->address), AMDGPU_NPS1_PARTITION_MODE))
+ return -EINVAL;
+
+ if (amdgpu_ras_mca2pa(adev, bps, err_data))
+ return -EOPNOTSUPP;
+ }
+ }
+
+ return __amdgpu_ras_restore_bad_pages(adev, err_data->err_addr,
+ adev->umc.retire_unit);
+}
+
+/* it deal with vram only. */
+int amdgpu_ras_add_bad_pages(struct amdgpu_device *adev,
+ struct eeprom_table_record *bps, int pages, bool from_rom)
+{
+ struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
+ struct ras_err_data err_data;
+ struct amdgpu_ras_eeprom_control *control =
+ &adev->psp.ras_context.ras->eeprom_control;
+ enum amdgpu_memory_partition nps = AMDGPU_NPS1_PARTITION_MODE;
+ int ret = 0;
+ uint32_t i = 0;
+
+ if (!con || !con->eh_data || !bps || pages <= 0)
+ return 0;
+
+ if (from_rom) {
+ err_data.err_addr =
+ kcalloc(adev->umc.retire_unit,
+ sizeof(struct eeprom_table_record), GFP_KERNEL);
+ if (!err_data.err_addr) {
+ dev_warn(adev->dev, "Failed to alloc UMC error address record in mca2pa conversion!\n");
+ return -ENOMEM;
+ }
+
+ if (adev->gmc.gmc_funcs->query_mem_partition_mode)
+ nps = adev->gmc.gmc_funcs->query_mem_partition_mode(adev);
+ }
+
+ mutex_lock(&con->recovery_lock);
+
+ if (from_rom) {
+ /* there is no pa recs in V3, so skip pa recs processing */
+ if ((control->tbl_hdr.version < RAS_TABLE_VER_V3) &&
+ !amdgpu_ras_smu_eeprom_supported(adev)) {
+ for (i = 0; i < pages; i++) {
+ if (control->ras_num_recs - i >= adev->umc.retire_unit) {
+ if ((bps[i].address == bps[i + 1].address) &&
+ (bps[i].mem_channel == bps[i + 1].mem_channel)) {
+ /* deal with retire_unit records a time */
+ ret = __amdgpu_ras_convert_rec_array_from_rom(adev,
+ &bps[i], &err_data, nps);
+ if (ret)
+ con->bad_page_num -= adev->umc.retire_unit;
+ i += (adev->umc.retire_unit - 1);
+ } else {
+ break;
+ }
+ } else {
+ break;
+ }
+ }
+ }
+ for (; i < pages; i++) {
+ ret = __amdgpu_ras_convert_rec_from_rom(adev,
+ &bps[i], &err_data, nps);
+ if (ret)
+ con->bad_page_num -= adev->umc.retire_unit;
+ }
+
+ con->eh_data->count_saved = con->eh_data->count;
+ } else {
+ ret = __amdgpu_ras_restore_bad_pages(adev, bps, pages);
+ }
+
+ if (from_rom)
+ kfree(err_data.err_addr);
mutex_unlock(&con->recovery_lock);
return ret;
@@ -1663,31 +3282,70 @@ out:
/*
* write error record array to eeprom, the function should be
* protected by recovery_lock
+ * new_cnt: new added UE count, excluding reserved bad pages, can be NULL
*/
-int amdgpu_ras_save_bad_pages(struct amdgpu_device *adev)
+int amdgpu_ras_save_bad_pages(struct amdgpu_device *adev,
+ unsigned long *new_cnt)
{
struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
struct ras_err_handler_data *data;
struct amdgpu_ras_eeprom_control *control;
- int save_count;
+ int save_count, unit_num, i;
+
+ if (!con || !con->eh_data) {
+ if (new_cnt)
+ *new_cnt = 0;
+
+ return 0;
+ }
+
+ if (!con->eeprom_control.is_eeprom_valid) {
+ dev_warn(adev->dev,
+ "Failed to save EEPROM table data because of EEPROM data corruption!");
+ if (new_cnt)
+ *new_cnt = 0;
- if (!con || !con->eh_data)
return 0;
+ }
+ mutex_lock(&con->recovery_lock);
control = &con->eeprom_control;
data = con->eh_data;
- save_count = data->count - control->num_recs;
+ if (amdgpu_ras_smu_eeprom_supported(adev))
+ unit_num = control->ras_num_recs -
+ control->ras_num_recs_old;
+ else
+ unit_num = data->count / adev->umc.retire_unit -
+ control->ras_num_recs;
+
+ save_count = con->bad_page_num - control->ras_num_bad_pages;
+ mutex_unlock(&con->recovery_lock);
+
+ if (new_cnt)
+ *new_cnt = unit_num;
+
/* only new entries are saved */
- if (save_count > 0) {
- if (amdgpu_ras_eeprom_process_recods(control,
- &data->bps[control->num_recs],
- true,
- save_count)) {
- dev_err(adev->dev, "Failed to save EEPROM table data!");
- return -EIO;
+ if (unit_num && save_count) {
+ /*old asics only save pa to eeprom like before*/
+ if (IP_VERSION_MAJ(amdgpu_ip_version(adev, UMC_HWIP, 0)) < 12) {
+ if (amdgpu_ras_eeprom_append(control,
+ &data->bps[data->count_saved], unit_num)) {
+ dev_err(adev->dev, "Failed to save EEPROM table data!");
+ return -EIO;
+ }
+ } else {
+ for (i = 0; i < unit_num; i++) {
+ if (amdgpu_ras_eeprom_append(control,
+ &data->bps[data->count_saved +
+ i * adev->umc.retire_unit], 1)) {
+ dev_err(adev->dev, "Failed to save EEPROM table data!");
+ return -EIO;
+ }
+ }
}
dev_info(adev->dev, "Saved %d pages to EEPROM table.\n", save_count);
+ data->count_saved = data->count;
}
return 0;
@@ -1700,44 +3358,85 @@ int amdgpu_ras_save_bad_pages(struct amdgpu_device *adev)
static int amdgpu_ras_load_bad_pages(struct amdgpu_device *adev)
{
struct amdgpu_ras_eeprom_control *control =
- &adev->psp.ras.ras->eeprom_control;
- struct eeprom_table_record *bps = NULL;
- int ret = 0;
+ &adev->psp.ras_context.ras->eeprom_control;
+ struct eeprom_table_record *bps;
+ int ret, i = 0;
/* no bad page record, skip eeprom access */
- if (!control->num_recs || (amdgpu_bad_page_threshold == 0))
- return ret;
+ if (control->ras_num_recs == 0 || amdgpu_bad_page_threshold == 0)
+ return 0;
- bps = kcalloc(control->num_recs, sizeof(*bps), GFP_KERNEL);
+ bps = kcalloc(control->ras_num_recs, sizeof(*bps), GFP_KERNEL);
if (!bps)
return -ENOMEM;
- if (amdgpu_ras_eeprom_process_recods(control, bps, false,
- control->num_recs)) {
+ ret = amdgpu_ras_eeprom_read(control, bps, control->ras_num_recs);
+ if (ret) {
dev_err(adev->dev, "Failed to load EEPROM table records!");
- ret = -EIO;
- goto out;
- }
+ } else {
+ if (adev->umc.ras && adev->umc.ras->convert_ras_err_addr) {
+ /*In V3, there is no pa recs, and some cases(when address==0) may be parsed
+ as pa recs, so add verion check to avoid it.
+ */
+ if ((control->tbl_hdr.version < RAS_TABLE_VER_V3) &&
+ !amdgpu_ras_smu_eeprom_supported(adev)) {
+ for (i = 0; i < control->ras_num_recs; i++) {
+ if ((control->ras_num_recs - i) >= adev->umc.retire_unit) {
+ if ((bps[i].address == bps[i + 1].address) &&
+ (bps[i].mem_channel == bps[i + 1].mem_channel)) {
+ control->ras_num_pa_recs += adev->umc.retire_unit;
+ i += (adev->umc.retire_unit - 1);
+ } else {
+ control->ras_num_mca_recs +=
+ (control->ras_num_recs - i);
+ break;
+ }
+ } else {
+ control->ras_num_mca_recs += (control->ras_num_recs - i);
+ break;
+ }
+ }
+ } else {
+ control->ras_num_mca_recs = control->ras_num_recs;
+ }
+ }
+
+ ret = amdgpu_ras_add_bad_pages(adev, bps, control->ras_num_recs, true);
+ if (ret)
+ goto out;
+
+ ret = amdgpu_ras_eeprom_check(control);
+ if (ret)
+ goto out;
- ret = amdgpu_ras_add_bad_pages(adev, bps, control->num_recs);
+ /* HW not usable */
+ if (amdgpu_ras_is_rma(adev))
+ ret = -EHWPOISON;
+ }
out:
kfree(bps);
return ret;
}
-static bool amdgpu_ras_check_bad_page_unlock(struct amdgpu_ras *con,
+static int amdgpu_ras_check_bad_page_unlock(struct amdgpu_ras *con,
uint64_t addr)
{
struct ras_err_handler_data *data = con->eh_data;
+ struct amdgpu_device *adev = con->adev;
int i;
+ if ((addr >= adev->gmc.mc_vram_size &&
+ adev->gmc.mc_vram_size) ||
+ (addr >= RAS_UMC_INJECT_ADDR_LIMIT))
+ return -EINVAL;
+
addr >>= AMDGPU_GPU_PAGE_SHIFT;
for (i = 0; i < data->count; i++)
if (addr == data->bps[i].retired_page)
- return true;
+ return 1;
- return false;
+ return 0;
}
/*
@@ -1745,11 +3444,11 @@ static bool amdgpu_ras_check_bad_page_unlock(struct amdgpu_ras *con,
*
* Note: this check is only for umc block
*/
-static bool amdgpu_ras_check_bad_page(struct amdgpu_device *adev,
+static int amdgpu_ras_check_bad_page(struct amdgpu_device *adev,
uint64_t addr)
{
struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
- bool ret = false;
+ int ret = 0;
if (!con || !con->eh_data)
return ret;
@@ -1761,87 +3460,443 @@ static bool amdgpu_ras_check_bad_page(struct amdgpu_device *adev,
}
static void amdgpu_ras_validate_threshold(struct amdgpu_device *adev,
- uint32_t max_length)
+ uint32_t max_count)
{
struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
- int tmp_threshold = amdgpu_bad_page_threshold;
- u64 val;
/*
- * Justification of value bad_page_cnt_threshold in ras structure
- *
- * Generally, -1 <= amdgpu_bad_page_threshold <= max record length
- * in eeprom, and introduce two scenarios accordingly.
- *
- * Bad page retirement enablement:
- * - If amdgpu_bad_page_threshold = -1,
- * bad_page_cnt_threshold = typical value by formula.
- *
- * - When the value from user is 0 < amdgpu_bad_page_threshold <
- * max record length in eeprom, use it directly.
- *
- * Bad page retirement disablement:
- * - If amdgpu_bad_page_threshold = 0, bad page retirement
- * functionality is disabled, and bad_page_cnt_threshold will
- * take no effect.
+ * amdgpu_bad_page_threshold is used to config
+ * the threshold for the number of bad pages.
+ * -1: Threshold is set to default value
+ * Driver will issue a warning message when threshold is reached
+ * and continue runtime services.
+ * 0: Disable bad page retirement
+ * Driver will not retire bad pages
+ * which is intended for debugging purpose.
+ * -2: Threshold is determined by a formula
+ * that assumes 1 bad page per 100M of local memory.
+ * Driver will continue runtime services when threhold is reached.
+ * 0 < threshold < max number of bad page records in EEPROM,
+ * A user-defined threshold is set
+ * Driver will halt runtime services when this custom threshold is reached.
*/
+ if (amdgpu_bad_page_threshold == -2) {
+ u64 val = adev->gmc.mc_vram_size;
- if (tmp_threshold < -1)
- tmp_threshold = -1;
- else if (tmp_threshold > max_length)
- tmp_threshold = max_length;
-
- if (tmp_threshold == -1) {
- val = adev->gmc.mc_vram_size;
- do_div(val, RAS_BAD_PAGE_RATE);
+ do_div(val, RAS_BAD_PAGE_COVER);
con->bad_page_cnt_threshold = min(lower_32_bits(val),
- max_length);
+ max_count);
+ } else if (amdgpu_bad_page_threshold == -1) {
+ con->bad_page_cnt_threshold = ((con->reserved_pages_in_bytes) >> 21) << 4;
} else {
- con->bad_page_cnt_threshold = tmp_threshold;
+ con->bad_page_cnt_threshold = min_t(int, max_count,
+ amdgpu_bad_page_threshold);
}
}
-int amdgpu_ras_recovery_init(struct amdgpu_device *adev)
+int amdgpu_ras_put_poison_req(struct amdgpu_device *adev,
+ enum amdgpu_ras_block block, uint16_t pasid,
+ pasid_notify pasid_fn, void *data, uint32_t reset)
+{
+ int ret = 0;
+ struct ras_poison_msg poison_msg;
+ struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
+
+ memset(&poison_msg, 0, sizeof(poison_msg));
+ poison_msg.block = block;
+ poison_msg.pasid = pasid;
+ poison_msg.reset = reset;
+ poison_msg.pasid_fn = pasid_fn;
+ poison_msg.data = data;
+
+ ret = kfifo_put(&con->poison_fifo, poison_msg);
+ if (!ret) {
+ dev_err(adev->dev, "Poison message fifo is full!\n");
+ return -ENOSPC;
+ }
+
+ return 0;
+}
+
+static int amdgpu_ras_get_poison_req(struct amdgpu_device *adev,
+ struct ras_poison_msg *poison_msg)
+{
+ struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
+
+ return kfifo_get(&con->poison_fifo, poison_msg);
+}
+
+static void amdgpu_ras_ecc_log_init(struct ras_ecc_log_info *ecc_log)
+{
+ mutex_init(&ecc_log->lock);
+
+ INIT_RADIX_TREE(&ecc_log->de_page_tree, GFP_KERNEL);
+ ecc_log->de_queried_count = 0;
+ ecc_log->consumption_q_count = 0;
+}
+
+static void amdgpu_ras_ecc_log_fini(struct ras_ecc_log_info *ecc_log)
+{
+ struct radix_tree_iter iter;
+ void __rcu **slot;
+ struct ras_ecc_err *ecc_err;
+
+ mutex_lock(&ecc_log->lock);
+ radix_tree_for_each_slot(slot, &ecc_log->de_page_tree, &iter, 0) {
+ ecc_err = radix_tree_deref_slot(slot);
+ kfree(ecc_err->err_pages.pfn);
+ kfree(ecc_err);
+ radix_tree_iter_delete(&ecc_log->de_page_tree, &iter, slot);
+ }
+ mutex_unlock(&ecc_log->lock);
+
+ mutex_destroy(&ecc_log->lock);
+ ecc_log->de_queried_count = 0;
+ ecc_log->consumption_q_count = 0;
+}
+
+static bool amdgpu_ras_schedule_retirement_dwork(struct amdgpu_ras *con,
+ uint32_t delayed_ms)
+{
+ int ret;
+
+ mutex_lock(&con->umc_ecc_log.lock);
+ ret = radix_tree_tagged(&con->umc_ecc_log.de_page_tree,
+ UMC_ECC_NEW_DETECTED_TAG);
+ mutex_unlock(&con->umc_ecc_log.lock);
+
+ if (ret)
+ schedule_delayed_work(&con->page_retirement_dwork,
+ msecs_to_jiffies(delayed_ms));
+
+ return ret ? true : false;
+}
+
+static void amdgpu_ras_do_page_retirement(struct work_struct *work)
+{
+ struct amdgpu_ras *con = container_of(work, struct amdgpu_ras,
+ page_retirement_dwork.work);
+ struct amdgpu_device *adev = con->adev;
+ struct ras_err_data err_data;
+
+ /* If gpu reset is ongoing, delay retiring the bad pages */
+ if (amdgpu_in_reset(adev) || amdgpu_ras_in_recovery(adev)) {
+ amdgpu_ras_schedule_retirement_dwork(con,
+ AMDGPU_RAS_RETIRE_PAGE_INTERVAL * 3);
+ return;
+ }
+
+ amdgpu_ras_error_data_init(&err_data);
+
+ amdgpu_umc_handle_bad_pages(adev, &err_data);
+
+ amdgpu_ras_error_data_fini(&err_data);
+
+ amdgpu_ras_schedule_retirement_dwork(con,
+ AMDGPU_RAS_RETIRE_PAGE_INTERVAL);
+}
+
+static int amdgpu_ras_poison_creation_handler(struct amdgpu_device *adev,
+ uint32_t poison_creation_count)
+{
+ int ret = 0;
+ struct ras_ecc_log_info *ecc_log;
+ struct ras_query_if info;
+ u32 timeout = MAX_UMC_POISON_POLLING_TIME_ASYNC;
+ struct amdgpu_ras *ras = amdgpu_ras_get_context(adev);
+ u64 de_queried_count;
+ u64 consumption_q_count;
+ enum ras_event_type type = RAS_EVENT_TYPE_POISON_CREATION;
+
+ memset(&info, 0, sizeof(info));
+ info.head.block = AMDGPU_RAS_BLOCK__UMC;
+
+ ecc_log = &ras->umc_ecc_log;
+ ecc_log->de_queried_count = 0;
+ ecc_log->consumption_q_count = 0;
+
+ do {
+ ret = amdgpu_ras_query_error_status_with_event(adev, &info, type);
+ if (ret)
+ return ret;
+
+ de_queried_count = ecc_log->de_queried_count;
+ consumption_q_count = ecc_log->consumption_q_count;
+
+ if (de_queried_count && consumption_q_count)
+ break;
+
+ msleep(100);
+ } while (--timeout);
+
+ if (de_queried_count)
+ schedule_delayed_work(&ras->page_retirement_dwork, 0);
+
+ if (amdgpu_ras_is_rma(adev) && atomic_cmpxchg(&ras->rma_in_recovery, 0, 1) == 0)
+ amdgpu_ras_reset_gpu(adev);
+
+ return 0;
+}
+
+static void amdgpu_ras_clear_poison_fifo(struct amdgpu_device *adev)
+{
+ struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
+ struct ras_poison_msg msg;
+ int ret;
+
+ do {
+ ret = kfifo_get(&con->poison_fifo, &msg);
+ } while (ret);
+}
+
+static int amdgpu_ras_poison_consumption_handler(struct amdgpu_device *adev,
+ uint32_t msg_count, uint32_t *gpu_reset)
+{
+ struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
+ uint32_t reset_flags = 0, reset = 0;
+ struct ras_poison_msg msg;
+ int ret, i;
+
+ kgd2kfd_set_sram_ecc_flag(adev->kfd.dev);
+
+ for (i = 0; i < msg_count; i++) {
+ ret = amdgpu_ras_get_poison_req(adev, &msg);
+ if (!ret)
+ continue;
+
+ if (msg.pasid_fn)
+ msg.pasid_fn(adev, msg.pasid, msg.data);
+
+ reset_flags |= msg.reset;
+ }
+
+ /*
+ * Try to ensure poison creation handler is completed first
+ * to set rma if bad page exceed threshold.
+ */
+ flush_delayed_work(&con->page_retirement_dwork);
+
+ /* for RMA, amdgpu_ras_poison_creation_handler will trigger gpu reset */
+ if (reset_flags && !amdgpu_ras_is_rma(adev)) {
+ if (reset_flags & AMDGPU_RAS_GPU_RESET_MODE1_RESET)
+ reset = AMDGPU_RAS_GPU_RESET_MODE1_RESET;
+ else if (reset_flags & AMDGPU_RAS_GPU_RESET_MODE2_RESET)
+ reset = AMDGPU_RAS_GPU_RESET_MODE2_RESET;
+ else
+ reset = reset_flags;
+
+ con->gpu_reset_flags |= reset;
+ amdgpu_ras_reset_gpu(adev);
+
+ *gpu_reset = reset;
+
+ /* Wait for gpu recovery to complete */
+ flush_work(&con->recovery_work);
+ }
+
+ return 0;
+}
+
+static int amdgpu_ras_page_retirement_thread(void *param)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)param;
+ struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
+ uint32_t poison_creation_count, msg_count;
+ uint32_t gpu_reset;
+ int ret;
+
+ while (!kthread_should_stop()) {
+
+ wait_event_interruptible(con->page_retirement_wq,
+ kthread_should_stop() ||
+ atomic_read(&con->page_retirement_req_cnt));
+
+ if (kthread_should_stop())
+ break;
+
+ mutex_lock(&con->poison_lock);
+ gpu_reset = 0;
+
+ do {
+ poison_creation_count = atomic_read(&con->poison_creation_count);
+ ret = amdgpu_ras_poison_creation_handler(adev, poison_creation_count);
+ if (ret == -EIO)
+ break;
+
+ if (poison_creation_count) {
+ atomic_sub(poison_creation_count, &con->poison_creation_count);
+ atomic_sub(poison_creation_count, &con->page_retirement_req_cnt);
+ }
+ } while (atomic_read(&con->poison_creation_count) &&
+ !atomic_read(&con->poison_consumption_count));
+
+ if (ret != -EIO) {
+ msg_count = kfifo_len(&con->poison_fifo);
+ if (msg_count) {
+ ret = amdgpu_ras_poison_consumption_handler(adev,
+ msg_count, &gpu_reset);
+ if ((ret != -EIO) &&
+ (gpu_reset != AMDGPU_RAS_GPU_RESET_MODE1_RESET))
+ atomic_sub(msg_count, &con->page_retirement_req_cnt);
+ }
+ }
+
+ if ((ret == -EIO) || (gpu_reset == AMDGPU_RAS_GPU_RESET_MODE1_RESET)) {
+ /* gpu mode-1 reset is ongoing or just completed ras mode-1 reset */
+ /* Clear poison creation request */
+ atomic_set(&con->poison_creation_count, 0);
+ atomic_set(&con->poison_consumption_count, 0);
+
+ /* Clear poison fifo */
+ amdgpu_ras_clear_poison_fifo(adev);
+
+ /* Clear all poison requests */
+ atomic_set(&con->page_retirement_req_cnt, 0);
+
+ if (ret == -EIO) {
+ /* Wait for mode-1 reset to complete */
+ down_read(&adev->reset_domain->sem);
+ up_read(&adev->reset_domain->sem);
+ }
+
+ /* Wake up work to save bad pages to eeprom */
+ schedule_delayed_work(&con->page_retirement_dwork, 0);
+ } else if (gpu_reset) {
+ /* gpu just completed mode-2 reset or other reset */
+ /* Clear poison consumption messages cached in fifo */
+ msg_count = kfifo_len(&con->poison_fifo);
+ if (msg_count) {
+ amdgpu_ras_clear_poison_fifo(adev);
+ atomic_sub(msg_count, &con->page_retirement_req_cnt);
+ }
+
+ atomic_set(&con->poison_consumption_count, 0);
+
+ /* Wake up work to save bad pages to eeprom */
+ schedule_delayed_work(&con->page_retirement_dwork, 0);
+ }
+ mutex_unlock(&con->poison_lock);
+ }
+
+ return 0;
+}
+
+int amdgpu_ras_init_badpage_info(struct amdgpu_device *adev)
+{
+ struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
+ struct amdgpu_ras_eeprom_control *control;
+ int ret;
+
+ if (!con || amdgpu_sriov_vf(adev))
+ return 0;
+
+ if (amdgpu_uniras_enabled(adev))
+ return 0;
+
+ control = &con->eeprom_control;
+ con->ras_smu_drv = amdgpu_dpm_get_ras_smu_driver(adev);
+
+ ret = amdgpu_ras_eeprom_init(control);
+ control->is_eeprom_valid = !ret;
+
+ if (!adev->umc.ras || !adev->umc.ras->convert_ras_err_addr)
+ control->ras_num_pa_recs = control->ras_num_recs;
+
+ if (adev->umc.ras &&
+ adev->umc.ras->get_retire_flip_bits)
+ adev->umc.ras->get_retire_flip_bits(adev);
+
+ if (control->ras_num_recs && control->is_eeprom_valid) {
+ ret = amdgpu_ras_load_bad_pages(adev);
+ if (ret) {
+ control->is_eeprom_valid = false;
+ return 0;
+ }
+
+ amdgpu_dpm_send_hbm_bad_pages_num(
+ adev, control->ras_num_bad_pages);
+
+ if (con->update_channel_flag == true) {
+ amdgpu_dpm_send_hbm_bad_channel_flag(
+ adev, control->bad_channel_bitmap);
+ con->update_channel_flag = false;
+ }
+
+ /* The format action is only applied to new ASICs */
+ if (IP_VERSION_MAJ(amdgpu_ip_version(adev, UMC_HWIP, 0)) >= 12 &&
+ control->tbl_hdr.version < RAS_TABLE_VER_V3)
+ if (!amdgpu_ras_eeprom_reset_table(control))
+ if (amdgpu_ras_save_bad_pages(adev, NULL))
+ dev_warn(adev->dev, "Failed to format RAS EEPROM data in V3 version!\n");
+ }
+
+ return 0;
+}
+
+int amdgpu_ras_recovery_init(struct amdgpu_device *adev, bool init_bp_info)
{
struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
struct ras_err_handler_data **data;
- uint32_t max_eeprom_records_len = 0;
- bool exc_err_limit = false;
+ u32 max_eeprom_records_count = 0;
int ret;
- if (con)
- data = &con->eh_data;
- else
+ if (!con || amdgpu_sriov_vf(adev))
return 0;
- *data = kmalloc(sizeof(**data), GFP_KERNEL | __GFP_ZERO);
+ /* Allow access to RAS EEPROM via debugfs, when the ASIC
+ * supports RAS and debugfs is enabled, but when
+ * adev->ras_enabled is unset, i.e. when "ras_enable"
+ * module parameter is set to 0.
+ */
+ con->adev = adev;
+
+ if (!adev->ras_enabled)
+ return 0;
+
+ data = &con->eh_data;
+ *data = kzalloc(sizeof(**data), GFP_KERNEL);
if (!*data) {
ret = -ENOMEM;
goto out;
}
mutex_init(&con->recovery_lock);
+ mutex_init(&con->poison_lock);
INIT_WORK(&con->recovery_work, amdgpu_ras_do_recovery);
atomic_set(&con->in_recovery, 0);
- con->adev = adev;
+ atomic_set(&con->rma_in_recovery, 0);
+ con->eeprom_control.bad_channel_bitmap = 0;
- max_eeprom_records_len = amdgpu_ras_eeprom_get_record_max_length();
- amdgpu_ras_validate_threshold(adev, max_eeprom_records_len);
+ max_eeprom_records_count = amdgpu_ras_eeprom_max_record_count(&con->eeprom_control);
+ amdgpu_ras_validate_threshold(adev, max_eeprom_records_count);
- ret = amdgpu_ras_eeprom_init(&con->eeprom_control, &exc_err_limit);
- /*
- * This calling fails when exc_err_limit is true or
- * ret != 0.
- */
- if (exc_err_limit || ret)
- goto free;
-
- if (con->eeprom_control.num_recs) {
- ret = amdgpu_ras_load_bad_pages(adev);
+ if (init_bp_info) {
+ ret = amdgpu_ras_init_badpage_info(adev);
if (ret)
goto free;
}
+ mutex_init(&con->page_rsv_lock);
+ INIT_KFIFO(con->poison_fifo);
+ mutex_init(&con->page_retirement_lock);
+ init_waitqueue_head(&con->page_retirement_wq);
+ atomic_set(&con->page_retirement_req_cnt, 0);
+ atomic_set(&con->poison_creation_count, 0);
+ atomic_set(&con->poison_consumption_count, 0);
+ con->page_retirement_thread =
+ kthread_run(amdgpu_ras_page_retirement_thread, adev, "umc_page_retirement");
+ if (IS_ERR(con->page_retirement_thread)) {
+ con->page_retirement_thread = NULL;
+ dev_warn(adev->dev, "Failed to create umc_page_retirement thread!!!\n");
+ }
+
+ INIT_DELAYED_WORK(&con->page_retirement_dwork, amdgpu_ras_do_page_retirement);
+ amdgpu_ras_ecc_log_init(&con->umc_ecc_log);
+#ifdef CONFIG_X86_MCE_AMD
+ if ((adev->asic_type == CHIP_ALDEBARAN) &&
+ (adev->gmc.xgmi.connected_to_cpu))
+ amdgpu_register_bad_pages_mca_notifier(adev);
+#endif
return 0;
free:
@@ -1849,13 +3904,13 @@ free:
kfree(*data);
con->eh_data = NULL;
out:
- dev_warn(adev->dev, "Failed to initialize ras recovery!\n");
+ dev_warn(adev->dev, "Failed to initialize ras recovery! (%d)\n", ret);
/*
* Except error threshold exceeding case, other failure cases in this
* function would not fail amdgpu driver init.
*/
- if (!exc_err_limit)
+ if (!amdgpu_ras_is_rma(adev))
ret = 0;
else
ret = -EINVAL;
@@ -1867,45 +3922,181 @@ static int amdgpu_ras_recovery_fini(struct amdgpu_device *adev)
{
struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
struct ras_err_handler_data *data = con->eh_data;
+ int max_flush_timeout = MAX_FLUSH_RETIRE_DWORK_TIMES;
+ bool ret;
/* recovery_init failed to init it, fini is useless */
if (!data)
return 0;
+ /* Save all cached bad pages to eeprom */
+ do {
+ flush_delayed_work(&con->page_retirement_dwork);
+ ret = amdgpu_ras_schedule_retirement_dwork(con, 0);
+ } while (ret && max_flush_timeout--);
+
+ if (con->page_retirement_thread)
+ kthread_stop(con->page_retirement_thread);
+
+ atomic_set(&con->page_retirement_req_cnt, 0);
+ atomic_set(&con->poison_creation_count, 0);
+
+ mutex_destroy(&con->page_rsv_lock);
+
cancel_work_sync(&con->recovery_work);
+ cancel_delayed_work_sync(&con->page_retirement_dwork);
+
+ amdgpu_ras_ecc_log_fini(&con->umc_ecc_log);
+
mutex_lock(&con->recovery_lock);
con->eh_data = NULL;
kfree(data->bps);
kfree(data);
mutex_unlock(&con->recovery_lock);
+ amdgpu_ras_critical_region_init(adev);
+#ifdef CONFIG_X86_MCE_AMD
+ amdgpu_unregister_bad_pages_mca_notifier(adev);
+#endif
return 0;
}
/* recovery end */
-/* return 0 if ras will reset gpu and repost.*/
-int amdgpu_ras_request_reset_on_boot(struct amdgpu_device *adev,
- unsigned int block)
+static bool amdgpu_ras_asic_supported(struct amdgpu_device *adev)
{
- struct amdgpu_ras *ras = amdgpu_ras_get_context(adev);
+ if (amdgpu_sriov_vf(adev)) {
+ switch (amdgpu_ip_version(adev, MP0_HWIP, 0)) {
+ case IP_VERSION(13, 0, 2):
+ case IP_VERSION(13, 0, 6):
+ case IP_VERSION(13, 0, 12):
+ case IP_VERSION(13, 0, 14):
+ return true;
+ default:
+ return false;
+ }
+ }
- if (!ras)
- return -EINVAL;
+ if (adev->asic_type == CHIP_IP_DISCOVERY) {
+ switch (amdgpu_ip_version(adev, MP0_HWIP, 0)) {
+ case IP_VERSION(13, 0, 0):
+ case IP_VERSION(13, 0, 6):
+ case IP_VERSION(13, 0, 10):
+ case IP_VERSION(13, 0, 12):
+ case IP_VERSION(13, 0, 14):
+ case IP_VERSION(14, 0, 3):
+ return true;
+ default:
+ return false;
+ }
+ }
- ras->flags |= AMDGPU_RAS_FLAG_INIT_NEED_RESET;
- return 0;
+ return adev->asic_type == CHIP_VEGA10 ||
+ adev->asic_type == CHIP_VEGA20 ||
+ adev->asic_type == CHIP_ARCTURUS ||
+ adev->asic_type == CHIP_ALDEBARAN ||
+ adev->asic_type == CHIP_SIENNA_CICHLID;
}
-static int amdgpu_ras_check_asic_type(struct amdgpu_device *adev)
+/*
+ * this is workaround for vega20 workstation sku,
+ * force enable gfx ras, ignore vbios gfx ras flag
+ * due to GC EDC can not write
+ */
+static void amdgpu_ras_get_quirks(struct amdgpu_device *adev)
{
- if (adev->asic_type != CHIP_VEGA10 &&
- adev->asic_type != CHIP_VEGA20 &&
- adev->asic_type != CHIP_ARCTURUS &&
- adev->asic_type != CHIP_SIENNA_CICHLID)
- return 1;
- else
- return 0;
+ struct atom_context *ctx = adev->mode_info.atom_context;
+
+ if (!ctx)
+ return;
+
+ if (strnstr(ctx->vbios_pn, "D16406",
+ sizeof(ctx->vbios_pn)) ||
+ strnstr(ctx->vbios_pn, "D36002",
+ sizeof(ctx->vbios_pn)))
+ adev->ras_hw_enabled |= (1 << AMDGPU_RAS_BLOCK__GFX);
+}
+
+/* Query ras capablity via atomfirmware interface */
+static void amdgpu_ras_query_ras_capablity_from_vbios(struct amdgpu_device *adev)
+{
+ /* mem_ecc cap */
+ if (amdgpu_atomfirmware_mem_ecc_supported(adev)) {
+ dev_info(adev->dev, "MEM ECC is active.\n");
+ adev->ras_hw_enabled |= (1 << AMDGPU_RAS_BLOCK__UMC |
+ 1 << AMDGPU_RAS_BLOCK__DF);
+ } else {
+ dev_info(adev->dev, "MEM ECC is not presented.\n");
+ }
+
+ /* sram_ecc cap */
+ if (amdgpu_atomfirmware_sram_ecc_supported(adev)) {
+ dev_info(adev->dev, "SRAM ECC is active.\n");
+ if (!amdgpu_sriov_vf(adev))
+ adev->ras_hw_enabled |= ~(1 << AMDGPU_RAS_BLOCK__UMC |
+ 1 << AMDGPU_RAS_BLOCK__DF);
+ else
+ adev->ras_hw_enabled |= (1 << AMDGPU_RAS_BLOCK__PCIE_BIF |
+ 1 << AMDGPU_RAS_BLOCK__SDMA |
+ 1 << AMDGPU_RAS_BLOCK__GFX);
+
+ /*
+ * VCN/JPEG RAS can be supported on both bare metal and
+ * SRIOV environment
+ */
+ if (amdgpu_ip_version(adev, VCN_HWIP, 0) == IP_VERSION(2, 6, 0) ||
+ amdgpu_ip_version(adev, VCN_HWIP, 0) == IP_VERSION(4, 0, 0) ||
+ amdgpu_ip_version(adev, VCN_HWIP, 0) == IP_VERSION(4, 0, 3) ||
+ amdgpu_ip_version(adev, VCN_HWIP, 0) == IP_VERSION(5, 0, 1))
+ adev->ras_hw_enabled |= (1 << AMDGPU_RAS_BLOCK__VCN |
+ 1 << AMDGPU_RAS_BLOCK__JPEG);
+ else
+ adev->ras_hw_enabled &= ~(1 << AMDGPU_RAS_BLOCK__VCN |
+ 1 << AMDGPU_RAS_BLOCK__JPEG);
+
+ /*
+ * XGMI RAS is not supported if xgmi num physical nodes
+ * is zero
+ */
+ if (!adev->gmc.xgmi.num_physical_nodes)
+ adev->ras_hw_enabled &= ~(1 << AMDGPU_RAS_BLOCK__XGMI_WAFL);
+ } else {
+ dev_info(adev->dev, "SRAM ECC is not presented.\n");
+ }
+}
+
+/* Query poison mode from umc/df IP callbacks */
+static void amdgpu_ras_query_poison_mode(struct amdgpu_device *adev)
+{
+ struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
+ bool df_poison, umc_poison;
+
+ /* poison setting is useless on SRIOV guest */
+ if (amdgpu_sriov_vf(adev) || !con)
+ return;
+
+ /* Init poison supported flag, the default value is false */
+ if (adev->gmc.xgmi.connected_to_cpu ||
+ adev->gmc.is_app_apu) {
+ /* enabled by default when GPU is connected to CPU */
+ con->poison_supported = true;
+ } else if (adev->df.funcs &&
+ adev->df.funcs->query_ras_poison_mode &&
+ adev->umc.ras &&
+ adev->umc.ras->query_ras_poison_mode) {
+ df_poison =
+ adev->df.funcs->query_ras_poison_mode(adev);
+ umc_poison =
+ adev->umc.ras->query_ras_poison_mode(adev);
+
+ /* Only poison is set in both DF and UMC, we can support it */
+ if (df_poison && umc_poison)
+ con->poison_supported = true;
+ else if (df_poison != umc_poison)
+ dev_warn(adev->dev,
+ "Poison setting is inconsistent in DF/UMC(%d:%d)!\n",
+ df_poison, umc_poison);
+ }
}
/*
@@ -1917,36 +4108,147 @@ static int amdgpu_ras_check_asic_type(struct amdgpu_device *adev)
* we have to initialize ras as normal. but need check if operation is
* allowed or not in each function.
*/
-static void amdgpu_ras_check_supported(struct amdgpu_device *adev,
- uint32_t *hw_supported, uint32_t *supported)
+static void amdgpu_ras_check_supported(struct amdgpu_device *adev)
{
- *hw_supported = 0;
- *supported = 0;
+ adev->ras_hw_enabled = adev->ras_enabled = 0;
- if (amdgpu_sriov_vf(adev) || !adev->is_atom_fw ||
- amdgpu_ras_check_asic_type(adev))
+ if (!amdgpu_ras_asic_supported(adev))
return;
- if (amdgpu_atomfirmware_mem_ecc_supported(adev)) {
- dev_info(adev->dev, "HBM ECC is active.\n");
- *hw_supported |= (1 << AMDGPU_RAS_BLOCK__UMC |
- 1 << AMDGPU_RAS_BLOCK__DF);
- } else
- dev_info(adev->dev, "HBM ECC is not presented.\n");
+ if (amdgpu_sriov_vf(adev)) {
+ if (amdgpu_virt_get_ras_capability(adev))
+ goto init_ras_enabled_flag;
+ }
- if (amdgpu_atomfirmware_sram_ecc_supported(adev)) {
- dev_info(adev->dev, "SRAM ECC is active.\n");
- *hw_supported |= ~(1 << AMDGPU_RAS_BLOCK__UMC |
- 1 << AMDGPU_RAS_BLOCK__DF);
- } else
- dev_info(adev->dev, "SRAM ECC is not presented.\n");
+ /* query ras capability from psp */
+ if (amdgpu_psp_get_ras_capability(&adev->psp))
+ goto init_ras_enabled_flag;
+ /* query ras capablity from bios */
+ if (!adev->gmc.xgmi.connected_to_cpu && !adev->gmc.is_app_apu) {
+ amdgpu_ras_query_ras_capablity_from_vbios(adev);
+ } else {
+ /* driver only manages a few IP blocks RAS feature
+ * when GPU is connected cpu through XGMI */
+ adev->ras_hw_enabled |= (1 << AMDGPU_RAS_BLOCK__GFX |
+ 1 << AMDGPU_RAS_BLOCK__SDMA |
+ 1 << AMDGPU_RAS_BLOCK__MMHUB);
+ }
+
+ /* apply asic specific settings (vega20 only for now) */
+ amdgpu_ras_get_quirks(adev);
+
+ /* query poison mode from umc/df ip callback */
+ amdgpu_ras_query_poison_mode(adev);
+
+init_ras_enabled_flag:
/* hw_supported needs to be aligned with RAS block mask. */
- *hw_supported &= AMDGPU_RAS_BLOCK_MASK;
+ adev->ras_hw_enabled &= AMDGPU_RAS_BLOCK_MASK;
+
+ adev->ras_enabled = amdgpu_ras_enable == 0 ? 0 :
+ adev->ras_hw_enabled & amdgpu_ras_mask;
+
+ /* aca is disabled by default except for psp v13_0_6/v13_0_12/v13_0_14 */
+ if (!amdgpu_sriov_vf(adev)) {
+ adev->aca.is_enabled =
+ (amdgpu_ip_version(adev, MP0_HWIP, 0) == IP_VERSION(13, 0, 6) ||
+ amdgpu_ip_version(adev, MP0_HWIP, 0) == IP_VERSION(13, 0, 12) ||
+ amdgpu_ip_version(adev, MP0_HWIP, 0) == IP_VERSION(13, 0, 14));
+ }
- *supported = amdgpu_ras_enable == 0 ?
- 0 : *hw_supported & amdgpu_ras_mask;
- adev->ras_features = *supported;
+ /* bad page feature is not applicable to specific app platform */
+ if (adev->gmc.is_app_apu &&
+ amdgpu_ip_version(adev, UMC_HWIP, 0) == IP_VERSION(12, 0, 0))
+ amdgpu_bad_page_threshold = 0;
+}
+
+static void amdgpu_ras_counte_dw(struct work_struct *work)
+{
+ struct amdgpu_ras *con = container_of(work, struct amdgpu_ras,
+ ras_counte_delay_work.work);
+ struct amdgpu_device *adev = con->adev;
+ struct drm_device *dev = adev_to_drm(adev);
+ unsigned long ce_count, ue_count;
+ int res;
+
+ res = pm_runtime_get_sync(dev->dev);
+ if (res < 0)
+ goto Out;
+
+ /* Cache new values.
+ */
+ if (amdgpu_ras_query_error_count(adev, &ce_count, &ue_count, NULL) == 0) {
+ atomic_set(&con->ras_ce_count, ce_count);
+ atomic_set(&con->ras_ue_count, ue_count);
+ }
+
+Out:
+ pm_runtime_put_autosuspend(dev->dev);
+}
+
+static int amdgpu_get_ras_schema(struct amdgpu_device *adev)
+{
+ return amdgpu_ras_is_poison_mode_supported(adev) ? AMDGPU_RAS_ERROR__POISON : 0 |
+ AMDGPU_RAS_ERROR__SINGLE_CORRECTABLE |
+ AMDGPU_RAS_ERROR__MULTI_UNCORRECTABLE |
+ AMDGPU_RAS_ERROR__PARITY;
+}
+
+static void ras_event_mgr_init(struct ras_event_manager *mgr)
+{
+ struct ras_event_state *event_state;
+ int i;
+
+ memset(mgr, 0, sizeof(*mgr));
+ atomic64_set(&mgr->seqno, 0);
+
+ for (i = 0; i < ARRAY_SIZE(mgr->event_state); i++) {
+ event_state = &mgr->event_state[i];
+ event_state->last_seqno = RAS_EVENT_INVALID_ID;
+ atomic64_set(&event_state->count, 0);
+ }
+}
+
+static void amdgpu_ras_event_mgr_init(struct amdgpu_device *adev)
+{
+ struct amdgpu_ras *ras = amdgpu_ras_get_context(adev);
+ struct amdgpu_hive_info *hive;
+
+ if (!ras)
+ return;
+
+ hive = amdgpu_get_xgmi_hive(adev);
+ ras->event_mgr = hive ? &hive->event_mgr : &ras->__event_mgr;
+
+ /* init event manager with node 0 on xgmi system */
+ if (!amdgpu_reset_in_recovery(adev)) {
+ if (!hive || adev->gmc.xgmi.node_id == 0)
+ ras_event_mgr_init(ras->event_mgr);
+ }
+
+ if (hive)
+ amdgpu_put_xgmi_hive(hive);
+}
+
+static void amdgpu_ras_init_reserved_vram_size(struct amdgpu_device *adev)
+{
+ struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
+
+ if (!con || (adev->flags & AMD_IS_APU))
+ return;
+
+ switch (amdgpu_ip_version(adev, MP0_HWIP, 0)) {
+ case IP_VERSION(13, 0, 2):
+ case IP_VERSION(13, 0, 6):
+ case IP_VERSION(13, 0, 12):
+ con->reserved_pages_in_bytes = AMDGPU_RAS_RESERVED_VRAM_SIZE_DEFAULT;
+ break;
+ case IP_VERSION(13, 0, 14):
+ con->reserved_pages_in_bytes = (AMDGPU_RAS_RESERVED_VRAM_SIZE_DEFAULT << 1);
+ break;
+ default:
+ break;
+ }
}
int amdgpu_ras_init(struct amdgpu_device *adev)
@@ -1957,48 +4259,141 @@ int amdgpu_ras_init(struct amdgpu_device *adev)
if (con)
return 0;
- con = kmalloc(sizeof(struct amdgpu_ras) +
- sizeof(struct ras_manager) * AMDGPU_RAS_BLOCK_COUNT,
- GFP_KERNEL|__GFP_ZERO);
+ con = kzalloc(sizeof(*con) +
+ sizeof(struct ras_manager) * AMDGPU_RAS_BLOCK_COUNT +
+ sizeof(struct ras_manager) * AMDGPU_RAS_MCA_BLOCK_COUNT,
+ GFP_KERNEL);
if (!con)
return -ENOMEM;
+ con->adev = adev;
+ INIT_DELAYED_WORK(&con->ras_counte_delay_work, amdgpu_ras_counte_dw);
+ atomic_set(&con->ras_ce_count, 0);
+ atomic_set(&con->ras_ue_count, 0);
+
con->objs = (struct ras_manager *)(con + 1);
amdgpu_ras_set_context(adev, con);
- amdgpu_ras_check_supported(adev, &con->hw_supported,
- &con->supported);
- if (!con->hw_supported || (adev->asic_type == CHIP_VEGA10)) {
+ amdgpu_ras_check_supported(adev);
+
+ if (!adev->ras_enabled || adev->asic_type == CHIP_VEGA10) {
+ /* set gfx block ras context feature for VEGA20 Gaming
+ * send ras disable cmd to ras ta during ras late init.
+ */
+ if (!adev->ras_enabled && adev->asic_type == CHIP_VEGA20) {
+ con->features |= BIT(AMDGPU_RAS_BLOCK__GFX);
+
+ return 0;
+ }
+
r = 0;
goto release_con;
}
+ con->update_channel_flag = false;
con->features = 0;
+ con->schema = 0;
INIT_LIST_HEAD(&con->head);
/* Might need get this flag from vbios. */
con->flags = RAS_DEFAULT_FLAGS;
- if (adev->nbio.funcs->init_ras_controller_interrupt) {
- r = adev->nbio.funcs->init_ras_controller_interrupt(adev);
+ /* initialize nbio ras function ahead of any other
+ * ras functions so hardware fatal error interrupt
+ * can be enabled as early as possible */
+ switch (amdgpu_ip_version(adev, NBIO_HWIP, 0)) {
+ case IP_VERSION(7, 4, 0):
+ case IP_VERSION(7, 4, 1):
+ case IP_VERSION(7, 4, 4):
+ if (!adev->gmc.xgmi.connected_to_cpu)
+ adev->nbio.ras = &nbio_v7_4_ras;
+ break;
+ case IP_VERSION(4, 3, 0):
+ if (adev->ras_hw_enabled & (1 << AMDGPU_RAS_BLOCK__DF))
+ /* unlike other generation of nbio ras,
+ * nbio v4_3 only support fatal error interrupt
+ * to inform software that DF is freezed due to
+ * system fatal error event. driver should not
+ * enable nbio ras in such case. Instead,
+ * check DF RAS */
+ adev->nbio.ras = &nbio_v4_3_ras;
+ break;
+ case IP_VERSION(6, 3, 1):
+ if (adev->ras_hw_enabled & (1 << AMDGPU_RAS_BLOCK__DF))
+ /* unlike other generation of nbio ras,
+ * nbif v6_3_1 only support fatal error interrupt
+ * to inform software that DF is freezed due to
+ * system fatal error event. driver should not
+ * enable nbio ras in such case. Instead,
+ * check DF RAS
+ */
+ adev->nbio.ras = &nbif_v6_3_1_ras;
+ break;
+ case IP_VERSION(7, 9, 0):
+ case IP_VERSION(7, 9, 1):
+ if (!adev->gmc.is_app_apu)
+ adev->nbio.ras = &nbio_v7_9_ras;
+ break;
+ default:
+ /* nbio ras is not available */
+ break;
+ }
+
+ /* nbio ras block needs to be enabled ahead of other ras blocks
+ * to handle fatal error */
+ r = amdgpu_nbio_ras_sw_init(adev);
+ if (r)
+ return r;
+
+ if (adev->nbio.ras &&
+ adev->nbio.ras->init_ras_controller_interrupt) {
+ r = adev->nbio.ras->init_ras_controller_interrupt(adev);
if (r)
goto release_con;
}
- if (adev->nbio.funcs->init_ras_err_event_athub_interrupt) {
- r = adev->nbio.funcs->init_ras_err_event_athub_interrupt(adev);
+ if (adev->nbio.ras &&
+ adev->nbio.ras->init_ras_err_event_athub_interrupt) {
+ r = adev->nbio.ras->init_ras_err_event_athub_interrupt(adev);
if (r)
goto release_con;
}
+ /* Packed socket_id to ras feature mask bits[31:29] */
+ if (adev->smuio.funcs &&
+ adev->smuio.funcs->get_socket_id)
+ con->features |= ((adev->smuio.funcs->get_socket_id(adev)) <<
+ AMDGPU_RAS_FEATURES_SOCKETID_SHIFT);
+
+ /* Get RAS schema for particular SOC */
+ con->schema = amdgpu_get_ras_schema(adev);
+
+ amdgpu_ras_init_reserved_vram_size(adev);
+
if (amdgpu_ras_fs_init(adev)) {
r = -EINVAL;
goto release_con;
}
+ if (amdgpu_ras_aca_is_supported(adev)) {
+ if (amdgpu_aca_is_enabled(adev))
+ r = amdgpu_aca_init(adev);
+ else
+ r = amdgpu_mca_init(adev);
+ if (r)
+ goto release_con;
+ }
+
+ con->init_task_pid = task_pid_nr(current);
+ get_task_comm(con->init_task_comm, current);
+
+ mutex_init(&con->critical_region_lock);
+ INIT_LIST_HEAD(&con->critical_region_head);
+
dev_info(adev->dev, "RAS INFO: ras initialized successfully, "
- "hardware ability[%x] ras_mask[%x]\n",
- con->hw_supported, con->supported);
+ "hardware ability[%x] ras_mask[%x]\n",
+ adev->ras_hw_enabled, adev->ras_enabled);
+
return 0;
release_con:
amdgpu_ras_set_context(adev, NULL);
@@ -2007,12 +4402,51 @@ release_con:
return r;
}
+int amdgpu_persistent_edc_harvesting_supported(struct amdgpu_device *adev)
+{
+ if (adev->gmc.xgmi.connected_to_cpu ||
+ adev->gmc.is_app_apu)
+ return 1;
+ return 0;
+}
+
+static int amdgpu_persistent_edc_harvesting(struct amdgpu_device *adev,
+ struct ras_common_if *ras_block)
+{
+ struct ras_query_if info = {
+ .head = *ras_block,
+ };
+
+ if (!amdgpu_persistent_edc_harvesting_supported(adev))
+ return 0;
+
+ if (amdgpu_ras_query_error_status(adev, &info) != 0)
+ DRM_WARN("RAS init harvest failure");
+
+ if (amdgpu_ras_reset_error_status(adev, ras_block->block) != 0)
+ DRM_WARN("RAS init harvest reset failure");
+
+ return 0;
+}
+
+bool amdgpu_ras_is_poison_mode_supported(struct amdgpu_device *adev)
+{
+ struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
+
+ if (!con)
+ return false;
+
+ return con->poison_supported;
+}
+
/* helper function to handle common stuff in ip late init phase */
-int amdgpu_ras_late_init(struct amdgpu_device *adev,
- struct ras_common_if *ras_block,
- struct ras_fs_if *fs_info,
- struct ras_ih_if *ih_info)
+int amdgpu_ras_block_late_init(struct amdgpu_device *adev,
+ struct ras_common_if *ras_block)
{
+ struct amdgpu_ras_block_object *ras_obj = NULL;
+ struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
+ struct ras_query_if *query_info;
+ unsigned long ue_count, ce_count;
int r;
/* disable RAS feature per IP block if it is not supported */
@@ -2023,12 +4457,7 @@ int amdgpu_ras_late_init(struct amdgpu_device *adev,
r = amdgpu_ras_feature_enable_on_boot(adev, ras_block, 1);
if (r) {
- if (r == -EAGAIN) {
- /* request gpu reset. will run again */
- amdgpu_ras_request_reset_on_boot(adev,
- ras_block->block);
- return 0;
- } else if (adev->in_suspend || amdgpu_in_reset(adev)) {
+ if (adev->in_suspend || amdgpu_reset_in_recovery(adev)) {
/* in resume phase, if fail to enable ras,
* clean up all ras fs nodes, and disable ras */
goto cleanup;
@@ -2036,43 +4465,79 @@ int amdgpu_ras_late_init(struct amdgpu_device *adev,
return r;
}
+ /* check for errors on warm reset edc persisant supported ASIC */
+ amdgpu_persistent_edc_harvesting(adev, ras_block);
+
/* in resume phase, no need to create ras fs node */
- if (adev->in_suspend || amdgpu_in_reset(adev))
+ if (adev->in_suspend || amdgpu_reset_in_recovery(adev))
return 0;
- if (ih_info->cb) {
- r = amdgpu_ras_interrupt_add_handler(adev, ih_info);
+ ras_obj = container_of(ras_block, struct amdgpu_ras_block_object, ras_comm);
+ if (ras_obj->ras_cb || (ras_obj->hw_ops &&
+ (ras_obj->hw_ops->query_poison_status ||
+ ras_obj->hw_ops->handle_poison_consumption))) {
+ r = amdgpu_ras_interrupt_add_handler(adev, ras_block);
if (r)
- goto interrupt;
+ goto cleanup;
}
- r = amdgpu_ras_sysfs_create(adev, fs_info);
- if (r)
- goto sysfs;
+ if (ras_obj->hw_ops &&
+ (ras_obj->hw_ops->query_ras_error_count ||
+ ras_obj->hw_ops->query_ras_error_status)) {
+ r = amdgpu_ras_sysfs_create(adev, ras_block);
+ if (r)
+ goto interrupt;
+
+ /* Those are the cached values at init.
+ */
+ query_info = kzalloc(sizeof(*query_info), GFP_KERNEL);
+ if (!query_info)
+ return -ENOMEM;
+ memcpy(&query_info->head, ras_block, sizeof(struct ras_common_if));
+
+ if (amdgpu_ras_query_error_count(adev, &ce_count, &ue_count, query_info) == 0) {
+ atomic_set(&con->ras_ce_count, ce_count);
+ atomic_set(&con->ras_ue_count, ue_count);
+ }
+
+ kfree(query_info);
+ }
return 0;
-cleanup:
- amdgpu_ras_sysfs_remove(adev, ras_block);
-sysfs:
- if (ih_info->cb)
- amdgpu_ras_interrupt_remove_handler(adev, ih_info);
+
interrupt:
+ if (ras_obj->ras_cb)
+ amdgpu_ras_interrupt_remove_handler(adev, ras_block);
+cleanup:
amdgpu_ras_feature_enable(adev, ras_block, 0);
return r;
}
+static int amdgpu_ras_block_late_init_default(struct amdgpu_device *adev,
+ struct ras_common_if *ras_block)
+{
+ return amdgpu_ras_block_late_init(adev, ras_block);
+}
+
/* helper function to remove ras fs node and interrupt handler */
-void amdgpu_ras_late_fini(struct amdgpu_device *adev,
- struct ras_common_if *ras_block,
- struct ras_ih_if *ih_info)
+void amdgpu_ras_block_late_fini(struct amdgpu_device *adev,
+ struct ras_common_if *ras_block)
{
- if (!ras_block || !ih_info)
+ struct amdgpu_ras_block_object *ras_obj;
+ if (!ras_block)
return;
amdgpu_ras_sysfs_remove(adev, ras_block);
- if (ih_info->cb)
- amdgpu_ras_interrupt_remove_handler(adev, ih_info);
- amdgpu_ras_feature_enable(adev, ras_block, 0);
+
+ ras_obj = container_of(ras_block, struct amdgpu_ras_block_object, ras_comm);
+ if (ras_obj->ras_cb)
+ amdgpu_ras_interrupt_remove_handler(adev, ras_block);
+}
+
+static void amdgpu_ras_block_late_fini_default(struct amdgpu_device *adev,
+ struct ras_common_if *ras_block)
+{
+ return amdgpu_ras_block_late_fini(adev, ras_block);
}
/* do some init work after IP late init as dependence.
@@ -2083,8 +4548,12 @@ void amdgpu_ras_resume(struct amdgpu_device *adev)
struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
struct ras_manager *obj, *tmp;
- if (!con)
+ if (!adev->ras_enabled || !con) {
+ /* clean ras context for VEGA20 Gaming after send ras disable cmd */
+ amdgpu_release_ras_context(adev);
+
return;
+ }
if (con->flags & AMDGPU_RAS_FLAG_INIT_BY_VBIOS) {
/* Set up all other IPs which are not implemented. There is a
@@ -2106,62 +4575,134 @@ void amdgpu_ras_resume(struct amdgpu_device *adev)
}
}
}
-
- if (con->flags & AMDGPU_RAS_FLAG_INIT_NEED_RESET) {
- con->flags &= ~AMDGPU_RAS_FLAG_INIT_NEED_RESET;
- /* setup ras obj state as disabled.
- * for init_by_vbios case.
- * if we want to enable ras, just enable it in a normal way.
- * If we want do disable it, need setup ras obj as enabled,
- * then issue another TA disable cmd.
- * See feature_enable_on_boot
- */
- amdgpu_ras_disable_all_features(adev, 1);
- amdgpu_ras_reset_gpu(adev);
- }
}
void amdgpu_ras_suspend(struct amdgpu_device *adev)
{
struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
- if (!con)
+ if (!adev->ras_enabled || !con)
return;
amdgpu_ras_disable_all_features(adev, 0);
/* Make sure all ras objects are disabled. */
- if (con->features)
+ if (AMDGPU_RAS_GET_FEATURES(con->features))
amdgpu_ras_disable_all_features(adev, 1);
}
+int amdgpu_ras_late_init(struct amdgpu_device *adev)
+{
+ struct amdgpu_ras_block_list *node, *tmp;
+ struct amdgpu_ras_block_object *obj;
+ int r;
+
+ amdgpu_ras_event_mgr_init(adev);
+
+ if (amdgpu_ras_aca_is_supported(adev)) {
+ if (amdgpu_reset_in_recovery(adev)) {
+ if (amdgpu_aca_is_enabled(adev))
+ r = amdgpu_aca_reset(adev);
+ else
+ r = amdgpu_mca_reset(adev);
+ if (r)
+ return r;
+ }
+
+ if (!amdgpu_sriov_vf(adev)) {
+ if (amdgpu_aca_is_enabled(adev))
+ amdgpu_ras_set_aca_debug_mode(adev, false);
+ else
+ amdgpu_ras_set_mca_debug_mode(adev, false);
+ }
+ }
+
+ /* Guest side doesn't need init ras feature */
+ if (amdgpu_sriov_vf(adev) && !amdgpu_sriov_ras_telemetry_en(adev))
+ return 0;
+
+ list_for_each_entry_safe(node, tmp, &adev->ras_list, node) {
+ obj = node->ras_obj;
+ if (!obj) {
+ dev_warn(adev->dev, "Warning: abnormal ras list node.\n");
+ continue;
+ }
+
+ if (!amdgpu_ras_is_supported(adev, obj->ras_comm.block))
+ continue;
+
+ if (obj->ras_late_init) {
+ r = obj->ras_late_init(adev, &obj->ras_comm);
+ if (r) {
+ dev_err(adev->dev, "%s failed to execute ras_late_init! ret:%d\n",
+ obj->ras_comm.name, r);
+ return r;
+ }
+ } else
+ amdgpu_ras_block_late_init_default(adev, &obj->ras_comm);
+ }
+
+ return 0;
+}
+
/* do some fini work before IP fini as dependence */
int amdgpu_ras_pre_fini(struct amdgpu_device *adev)
{
struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
- if (!con)
+ if (!adev->ras_enabled || !con)
return 0;
+
/* Need disable ras on all IPs here before ip [hw/sw]fini */
- amdgpu_ras_disable_all_features(adev, 0);
+ if (AMDGPU_RAS_GET_FEATURES(con->features))
+ amdgpu_ras_disable_all_features(adev, 0);
amdgpu_ras_recovery_fini(adev);
return 0;
}
int amdgpu_ras_fini(struct amdgpu_device *adev)
{
+ struct amdgpu_ras_block_list *ras_node, *tmp;
+ struct amdgpu_ras_block_object *obj = NULL;
struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
- if (!con)
+ if (!adev->ras_enabled || !con)
return 0;
+ amdgpu_ras_critical_region_fini(adev);
+ mutex_destroy(&con->critical_region_lock);
+
+ list_for_each_entry_safe(ras_node, tmp, &adev->ras_list, node) {
+ if (ras_node->ras_obj) {
+ obj = ras_node->ras_obj;
+ if (amdgpu_ras_is_supported(adev, obj->ras_comm.block) &&
+ obj->ras_fini)
+ obj->ras_fini(adev, &obj->ras_comm);
+ else
+ amdgpu_ras_block_late_fini_default(adev, &obj->ras_comm);
+ }
+
+ /* Clear ras blocks from ras_list and free ras block list node */
+ list_del(&ras_node->node);
+ kfree(ras_node);
+ }
+
amdgpu_ras_fs_fini(adev);
amdgpu_ras_interrupt_remove_all(adev);
- WARN(con->features, "Feature mask is not cleared");
+ if (amdgpu_ras_aca_is_supported(adev)) {
+ if (amdgpu_aca_is_enabled(adev))
+ amdgpu_aca_fini(adev);
+ else
+ amdgpu_mca_fini(adev);
+ }
- if (con->features)
- amdgpu_ras_disable_all_features(adev, 1);
+ WARN(AMDGPU_RAS_GET_FEATURES(con->features), "Feature mask is not cleared");
+
+ if (AMDGPU_RAS_GET_FEATURES(con->features))
+ amdgpu_ras_disable_all_features(adev, 0);
+
+ cancel_delayed_work_sync(&con->ras_counte_delay_work);
amdgpu_ras_set_context(adev, NULL);
kfree(con);
@@ -2169,20 +4710,161 @@ int amdgpu_ras_fini(struct amdgpu_device *adev)
return 0;
}
-void amdgpu_ras_global_ras_isr(struct amdgpu_device *adev)
+bool amdgpu_ras_get_fed_status(struct amdgpu_device *adev)
{
- uint32_t hw_supported, supported;
+ struct amdgpu_ras *ras;
- amdgpu_ras_check_supported(adev, &hw_supported, &supported);
- if (!hw_supported)
- return;
+ ras = amdgpu_ras_get_context(adev);
+ if (!ras)
+ return false;
+
+ return test_bit(AMDGPU_RAS_BLOCK__LAST, &ras->ras_err_state);
+}
+
+void amdgpu_ras_set_fed(struct amdgpu_device *adev, bool status)
+{
+ struct amdgpu_ras *ras;
+
+ ras = amdgpu_ras_get_context(adev);
+ if (ras) {
+ if (status)
+ set_bit(AMDGPU_RAS_BLOCK__LAST, &ras->ras_err_state);
+ else
+ clear_bit(AMDGPU_RAS_BLOCK__LAST, &ras->ras_err_state);
+ }
+}
+
+void amdgpu_ras_clear_err_state(struct amdgpu_device *adev)
+{
+ struct amdgpu_ras *ras;
+
+ ras = amdgpu_ras_get_context(adev);
+ if (ras) {
+ ras->ras_err_state = 0;
+ ras->gpu_reset_flags = 0;
+ }
+}
+
+void amdgpu_ras_set_err_poison(struct amdgpu_device *adev,
+ enum amdgpu_ras_block block)
+{
+ struct amdgpu_ras *ras;
+
+ ras = amdgpu_ras_get_context(adev);
+ if (ras)
+ set_bit(block, &ras->ras_err_state);
+}
+
+bool amdgpu_ras_is_err_state(struct amdgpu_device *adev, int block)
+{
+ struct amdgpu_ras *ras;
+
+ ras = amdgpu_ras_get_context(adev);
+ if (ras) {
+ if (block == AMDGPU_RAS_BLOCK__ANY)
+ return (ras->ras_err_state != 0);
+ else
+ return test_bit(block, &ras->ras_err_state) ||
+ test_bit(AMDGPU_RAS_BLOCK__LAST,
+ &ras->ras_err_state);
+ }
+
+ return false;
+}
+
+static struct ras_event_manager *__get_ras_event_mgr(struct amdgpu_device *adev)
+{
+ struct amdgpu_ras *ras;
+
+ ras = amdgpu_ras_get_context(adev);
+ if (!ras)
+ return NULL;
+
+ return ras->event_mgr;
+}
+
+int amdgpu_ras_mark_ras_event_caller(struct amdgpu_device *adev, enum ras_event_type type,
+ const void *caller)
+{
+ struct ras_event_manager *event_mgr;
+ struct ras_event_state *event_state;
+ int ret = 0;
+
+ if (amdgpu_uniras_enabled(adev))
+ return 0;
+
+ if (type >= RAS_EVENT_TYPE_COUNT) {
+ ret = -EINVAL;
+ goto out;
+ }
+
+ event_mgr = __get_ras_event_mgr(adev);
+ if (!event_mgr) {
+ ret = -EINVAL;
+ goto out;
+ }
+ event_state = &event_mgr->event_state[type];
+ event_state->last_seqno = atomic64_inc_return(&event_mgr->seqno);
+ atomic64_inc(&event_state->count);
+
+out:
+ if (ret && caller)
+ dev_warn(adev->dev, "failed mark ras event (%d) in %ps, ret:%d\n",
+ (int)type, caller, ret);
+
+ return ret;
+}
+
+u64 amdgpu_ras_acquire_event_id(struct amdgpu_device *adev, enum ras_event_type type)
+{
+ struct ras_event_manager *event_mgr;
+ u64 id;
+
+ if (type >= RAS_EVENT_TYPE_COUNT)
+ return RAS_EVENT_INVALID_ID;
+
+ switch (type) {
+ case RAS_EVENT_TYPE_FATAL:
+ case RAS_EVENT_TYPE_POISON_CREATION:
+ case RAS_EVENT_TYPE_POISON_CONSUMPTION:
+ event_mgr = __get_ras_event_mgr(adev);
+ if (!event_mgr)
+ return RAS_EVENT_INVALID_ID;
+
+ id = event_mgr->event_state[type].last_seqno;
+ break;
+ case RAS_EVENT_TYPE_INVALID:
+ default:
+ id = RAS_EVENT_INVALID_ID;
+ break;
+ }
+
+ return id;
+}
+
+int amdgpu_ras_global_ras_isr(struct amdgpu_device *adev)
+{
if (atomic_cmpxchg(&amdgpu_ras_in_intr, 0, 1) == 0) {
- dev_info(adev->dev, "uncorrectable hardware error"
- "(ERREVENT_ATHUB_INTERRUPT) detected!\n");
+ struct amdgpu_ras *ras = amdgpu_ras_get_context(adev);
+ enum ras_event_type type = RAS_EVENT_TYPE_FATAL;
+ u64 event_id = RAS_EVENT_INVALID_ID;
+
+ if (amdgpu_uniras_enabled(adev))
+ return 0;
+
+ if (!amdgpu_ras_mark_ras_event(adev, type))
+ event_id = amdgpu_ras_acquire_event_id(adev, type);
+ RAS_EVENT_LOG(adev, event_id, "uncorrectable hardware error"
+ "(ERREVENT_ATHUB_INTERRUPT) detected!\n");
+
+ amdgpu_ras_set_fed(adev, true);
+ ras->gpu_reset_flags |= AMDGPU_RAS_GPU_RESET_MODE1_RESET;
amdgpu_ras_reset_gpu(adev);
}
+
+ return -EBUSY;
}
bool amdgpu_ras_need_emergency_restart(struct amdgpu_device *adev)
@@ -2196,18 +4878,871 @@ bool amdgpu_ras_need_emergency_restart(struct amdgpu_device *adev)
return false;
}
-bool amdgpu_ras_check_err_threshold(struct amdgpu_device *adev)
+void amdgpu_release_ras_context(struct amdgpu_device *adev)
{
struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
- bool exc_err_limit = false;
- if (con && (amdgpu_bad_page_threshold != 0))
- amdgpu_ras_eeprom_check_err_threshold(&con->eeprom_control,
- &exc_err_limit);
+ if (!con)
+ return;
+
+ if (!adev->ras_enabled && con->features & BIT(AMDGPU_RAS_BLOCK__GFX)) {
+ con->features &= ~BIT(AMDGPU_RAS_BLOCK__GFX);
+ amdgpu_ras_set_context(adev, NULL);
+ kfree(con);
+ }
+}
+
+#ifdef CONFIG_X86_MCE_AMD
+static struct amdgpu_device *find_adev(uint32_t node_id)
+{
+ int i;
+ struct amdgpu_device *adev = NULL;
+
+ for (i = 0; i < mce_adev_list.num_gpu; i++) {
+ adev = mce_adev_list.devs[i];
+
+ if (adev && adev->gmc.xgmi.connected_to_cpu &&
+ adev->gmc.xgmi.physical_node_id == node_id)
+ break;
+ adev = NULL;
+ }
+
+ return adev;
+}
+
+#define GET_MCA_IPID_GPUID(m) (((m) >> 44) & 0xF)
+#define GET_UMC_INST(m) (((m) >> 21) & 0x7)
+#define GET_CHAN_INDEX(m) ((((m) >> 12) & 0x3) | (((m) >> 18) & 0x4))
+#define GPU_ID_OFFSET 8
+
+static int amdgpu_bad_page_notifier(struct notifier_block *nb,
+ unsigned long val, void *data)
+{
+ struct mce *m = (struct mce *)data;
+ struct amdgpu_device *adev = NULL;
+ uint32_t gpu_id = 0;
+ uint32_t umc_inst = 0, ch_inst = 0;
+
+ /*
+ * If the error was generated in UMC_V2, which belongs to GPU UMCs,
+ * and error occurred in DramECC (Extended error code = 0) then only
+ * process the error, else bail out.
+ */
+ if (!m || !((smca_get_bank_type(m->extcpu, m->bank) == SMCA_UMC_V2) &&
+ (XEC(m->status, 0x3f) == 0x0)))
+ return NOTIFY_DONE;
+
+ /*
+ * If it is correctable error, return.
+ */
+ if (mce_is_correctable(m))
+ return NOTIFY_OK;
+
+ /*
+ * GPU Id is offset by GPU_ID_OFFSET in MCA_IPID_UMC register.
+ */
+ gpu_id = GET_MCA_IPID_GPUID(m->ipid) - GPU_ID_OFFSET;
+
+ adev = find_adev(gpu_id);
+ if (!adev) {
+ DRM_WARN("%s: Unable to find adev for gpu_id: %d\n", __func__,
+ gpu_id);
+ return NOTIFY_DONE;
+ }
+
+ /*
+ * If it is uncorrectable error, then find out UMC instance and
+ * channel index.
+ */
+ umc_inst = GET_UMC_INST(m->ipid);
+ ch_inst = GET_CHAN_INDEX(m->ipid);
+
+ dev_info(adev->dev, "Uncorrectable error detected in UMC inst: %d, chan_idx: %d",
+ umc_inst, ch_inst);
+
+ if (!amdgpu_umc_page_retirement_mca(adev, m->addr, ch_inst, umc_inst))
+ return NOTIFY_OK;
+ else
+ return NOTIFY_DONE;
+}
+
+static struct notifier_block amdgpu_bad_page_nb = {
+ .notifier_call = amdgpu_bad_page_notifier,
+ .priority = MCE_PRIO_UC,
+};
+
+static void amdgpu_register_bad_pages_mca_notifier(struct amdgpu_device *adev)
+{
+ /*
+ * Add the adev to the mce_adev_list.
+ * During mode2 reset, amdgpu device is temporarily
+ * removed from the mgpu_info list which can cause
+ * page retirement to fail.
+ * Use this list instead of mgpu_info to find the amdgpu
+ * device on which the UMC error was reported.
+ */
+ mce_adev_list.devs[mce_adev_list.num_gpu++] = adev;
/*
- * We are only interested in variable exc_err_limit,
- * as it says if GPU is in bad state or not.
+ * Register the x86 notifier only once
+ * with MCE subsystem.
+ */
+ if (notifier_registered == false) {
+ mce_register_decode_chain(&amdgpu_bad_page_nb);
+ notifier_registered = true;
+ }
+}
+static void amdgpu_unregister_bad_pages_mca_notifier(struct amdgpu_device *adev)
+{
+ int i, j;
+
+ if (!notifier_registered && !mce_adev_list.num_gpu)
+ return;
+ for (i = 0, j = 0; i < mce_adev_list.num_gpu; i++) {
+ if (mce_adev_list.devs[i] == adev)
+ mce_adev_list.devs[i] = NULL;
+ if (!mce_adev_list.devs[i])
+ ++j;
+ }
+
+ if (j == mce_adev_list.num_gpu) {
+ mce_adev_list.num_gpu = 0;
+ /* Unregister x86 notifier with MCE subsystem. */
+ if (notifier_registered) {
+ mce_unregister_decode_chain(&amdgpu_bad_page_nb);
+ notifier_registered = false;
+ }
+ }
+}
+#endif
+
+struct amdgpu_ras *amdgpu_ras_get_context(struct amdgpu_device *adev)
+{
+ if (!adev)
+ return NULL;
+
+ return adev->psp.ras_context.ras;
+}
+
+int amdgpu_ras_set_context(struct amdgpu_device *adev, struct amdgpu_ras *ras_con)
+{
+ if (!adev)
+ return -EINVAL;
+
+ adev->psp.ras_context.ras = ras_con;
+ return 0;
+}
+
+/* check if ras is supported on block, say, sdma, gfx */
+int amdgpu_ras_is_supported(struct amdgpu_device *adev,
+ unsigned int block)
+{
+ int ret = 0;
+ struct amdgpu_ras *ras = amdgpu_ras_get_context(adev);
+
+ if (block >= AMDGPU_RAS_BLOCK_COUNT)
+ return 0;
+
+ ret = ras && (adev->ras_enabled & (1 << block));
+
+ /* For the special asic with mem ecc enabled but sram ecc
+ * not enabled, even if the ras block is not supported on
+ * .ras_enabled, if the asic supports poison mode and the
+ * ras block has ras configuration, it can be considered
+ * that the ras block supports ras function.
*/
- return exc_err_limit;
+ if (!ret &&
+ (block == AMDGPU_RAS_BLOCK__GFX ||
+ block == AMDGPU_RAS_BLOCK__SDMA ||
+ block == AMDGPU_RAS_BLOCK__VCN ||
+ block == AMDGPU_RAS_BLOCK__JPEG) &&
+ (amdgpu_ras_mask & (1 << block)) &&
+ amdgpu_ras_is_poison_mode_supported(adev) &&
+ amdgpu_ras_get_ras_block(adev, block, 0))
+ ret = 1;
+
+ return ret;
+}
+
+int amdgpu_ras_reset_gpu(struct amdgpu_device *adev)
+{
+ struct amdgpu_ras *ras = amdgpu_ras_get_context(adev);
+
+ /* mode1 is the only selection for RMA status */
+ if (amdgpu_ras_is_rma(adev)) {
+ ras->gpu_reset_flags = 0;
+ ras->gpu_reset_flags |= AMDGPU_RAS_GPU_RESET_MODE1_RESET;
+ }
+
+ if (atomic_cmpxchg(&ras->in_recovery, 0, 1) == 0) {
+ struct amdgpu_hive_info *hive = amdgpu_get_xgmi_hive(adev);
+ int hive_ras_recovery = 0;
+
+ if (hive) {
+ hive_ras_recovery = atomic_read(&hive->ras_recovery);
+ amdgpu_put_xgmi_hive(hive);
+ }
+ /* In the case of multiple GPUs, after a GPU has started
+ * resetting all GPUs on hive, other GPUs do not need to
+ * trigger GPU reset again.
+ */
+ if (!hive_ras_recovery)
+ amdgpu_reset_domain_schedule(ras->adev->reset_domain, &ras->recovery_work);
+ else
+ atomic_set(&ras->in_recovery, 0);
+ } else {
+ flush_work(&ras->recovery_work);
+ amdgpu_reset_domain_schedule(ras->adev->reset_domain, &ras->recovery_work);
+ }
+
+ return 0;
+}
+
+int amdgpu_ras_set_mca_debug_mode(struct amdgpu_device *adev, bool enable)
+{
+ struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
+ int ret = 0;
+
+ if (con) {
+ ret = amdgpu_mca_smu_set_debug_mode(adev, enable);
+ if (!ret)
+ con->is_aca_debug_mode = enable;
+ }
+
+ return ret;
+}
+
+int amdgpu_ras_set_aca_debug_mode(struct amdgpu_device *adev, bool enable)
+{
+ struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
+ int ret = 0;
+
+ if (con) {
+ if (amdgpu_aca_is_enabled(adev))
+ ret = amdgpu_aca_smu_set_debug_mode(adev, enable);
+ else
+ ret = amdgpu_mca_smu_set_debug_mode(adev, enable);
+ if (!ret)
+ con->is_aca_debug_mode = enable;
+ }
+
+ return ret;
+}
+
+bool amdgpu_ras_get_aca_debug_mode(struct amdgpu_device *adev)
+{
+ struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
+ const struct aca_smu_funcs *smu_funcs = adev->aca.smu_funcs;
+ const struct amdgpu_mca_smu_funcs *mca_funcs = adev->mca.mca_funcs;
+
+ if (!con)
+ return false;
+
+ if ((amdgpu_aca_is_enabled(adev) && smu_funcs && smu_funcs->set_debug_mode) ||
+ (!amdgpu_aca_is_enabled(adev) && mca_funcs && mca_funcs->mca_set_debug_mode))
+ return con->is_aca_debug_mode;
+ else
+ return true;
+}
+
+bool amdgpu_ras_get_error_query_mode(struct amdgpu_device *adev,
+ unsigned int *error_query_mode)
+{
+ struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
+ const struct amdgpu_mca_smu_funcs *mca_funcs = adev->mca.mca_funcs;
+ const struct aca_smu_funcs *smu_funcs = adev->aca.smu_funcs;
+
+ if (!con) {
+ *error_query_mode = AMDGPU_RAS_INVALID_ERROR_QUERY;
+ return false;
+ }
+
+ if (amdgpu_sriov_vf(adev)) {
+ *error_query_mode = AMDGPU_RAS_VIRT_ERROR_COUNT_QUERY;
+ } else if ((smu_funcs && smu_funcs->set_debug_mode) || (mca_funcs && mca_funcs->mca_set_debug_mode)) {
+ *error_query_mode =
+ (con->is_aca_debug_mode) ? AMDGPU_RAS_DIRECT_ERROR_QUERY : AMDGPU_RAS_FIRMWARE_ERROR_QUERY;
+ } else {
+ *error_query_mode = AMDGPU_RAS_DIRECT_ERROR_QUERY;
+ }
+
+ return true;
+}
+
+/* Register each ip ras block into amdgpu ras */
+int amdgpu_ras_register_ras_block(struct amdgpu_device *adev,
+ struct amdgpu_ras_block_object *ras_block_obj)
+{
+ struct amdgpu_ras_block_list *ras_node;
+ if (!adev || !ras_block_obj)
+ return -EINVAL;
+
+ ras_node = kzalloc(sizeof(*ras_node), GFP_KERNEL);
+ if (!ras_node)
+ return -ENOMEM;
+
+ INIT_LIST_HEAD(&ras_node->node);
+ ras_node->ras_obj = ras_block_obj;
+ list_add_tail(&ras_node->node, &adev->ras_list);
+
+ return 0;
+}
+
+void amdgpu_ras_get_error_type_name(uint32_t err_type, char *err_type_name)
+{
+ if (!err_type_name)
+ return;
+
+ switch (err_type) {
+ case AMDGPU_RAS_ERROR__SINGLE_CORRECTABLE:
+ sprintf(err_type_name, "correctable");
+ break;
+ case AMDGPU_RAS_ERROR__MULTI_UNCORRECTABLE:
+ sprintf(err_type_name, "uncorrectable");
+ break;
+ default:
+ sprintf(err_type_name, "unknown");
+ break;
+ }
+}
+
+bool amdgpu_ras_inst_get_memory_id_field(struct amdgpu_device *adev,
+ const struct amdgpu_ras_err_status_reg_entry *reg_entry,
+ uint32_t instance,
+ uint32_t *memory_id)
+{
+ uint32_t err_status_lo_data, err_status_lo_offset;
+
+ if (!reg_entry)
+ return false;
+
+ err_status_lo_offset =
+ AMDGPU_RAS_REG_ENTRY_OFFSET(reg_entry->hwip, instance,
+ reg_entry->seg_lo, reg_entry->reg_lo);
+ err_status_lo_data = RREG32(err_status_lo_offset);
+
+ if ((reg_entry->flags & AMDGPU_RAS_ERR_STATUS_VALID) &&
+ !REG_GET_FIELD(err_status_lo_data, ERR_STATUS_LO, ERR_STATUS_VALID_FLAG))
+ return false;
+
+ *memory_id = REG_GET_FIELD(err_status_lo_data, ERR_STATUS_LO, MEMORY_ID);
+
+ return true;
+}
+
+bool amdgpu_ras_inst_get_err_cnt_field(struct amdgpu_device *adev,
+ const struct amdgpu_ras_err_status_reg_entry *reg_entry,
+ uint32_t instance,
+ unsigned long *err_cnt)
+{
+ uint32_t err_status_hi_data, err_status_hi_offset;
+
+ if (!reg_entry)
+ return false;
+
+ err_status_hi_offset =
+ AMDGPU_RAS_REG_ENTRY_OFFSET(reg_entry->hwip, instance,
+ reg_entry->seg_hi, reg_entry->reg_hi);
+ err_status_hi_data = RREG32(err_status_hi_offset);
+
+ if ((reg_entry->flags & AMDGPU_RAS_ERR_INFO_VALID) &&
+ !REG_GET_FIELD(err_status_hi_data, ERR_STATUS_HI, ERR_INFO_VALID_FLAG))
+ /* keep the check here in case we need to refer to the result later */
+ dev_dbg(adev->dev, "Invalid err_info field\n");
+
+ /* read err count */
+ *err_cnt = REG_GET_FIELD(err_status_hi_data, ERR_STATUS, ERR_CNT);
+
+ return true;
+}
+
+void amdgpu_ras_inst_query_ras_error_count(struct amdgpu_device *adev,
+ const struct amdgpu_ras_err_status_reg_entry *reg_list,
+ uint32_t reg_list_size,
+ const struct amdgpu_ras_memory_id_entry *mem_list,
+ uint32_t mem_list_size,
+ uint32_t instance,
+ uint32_t err_type,
+ unsigned long *err_count)
+{
+ uint32_t memory_id;
+ unsigned long err_cnt;
+ char err_type_name[16];
+ uint32_t i, j;
+
+ for (i = 0; i < reg_list_size; i++) {
+ /* query memory_id from err_status_lo */
+ if (!amdgpu_ras_inst_get_memory_id_field(adev, &reg_list[i],
+ instance, &memory_id))
+ continue;
+
+ /* query err_cnt from err_status_hi */
+ if (!amdgpu_ras_inst_get_err_cnt_field(adev, &reg_list[i],
+ instance, &err_cnt) ||
+ !err_cnt)
+ continue;
+
+ *err_count += err_cnt;
+
+ /* log the errors */
+ amdgpu_ras_get_error_type_name(err_type, err_type_name);
+ if (!mem_list) {
+ /* memory_list is not supported */
+ dev_info(adev->dev,
+ "%ld %s hardware errors detected in %s, instance: %d, memory_id: %d\n",
+ err_cnt, err_type_name,
+ reg_list[i].block_name,
+ instance, memory_id);
+ } else {
+ for (j = 0; j < mem_list_size; j++) {
+ if (memory_id == mem_list[j].memory_id) {
+ dev_info(adev->dev,
+ "%ld %s hardware errors detected in %s, instance: %d, memory block: %s\n",
+ err_cnt, err_type_name,
+ reg_list[i].block_name,
+ instance, mem_list[j].name);
+ break;
+ }
+ }
+ }
+ }
+}
+
+void amdgpu_ras_inst_reset_ras_error_count(struct amdgpu_device *adev,
+ const struct amdgpu_ras_err_status_reg_entry *reg_list,
+ uint32_t reg_list_size,
+ uint32_t instance)
+{
+ uint32_t err_status_lo_offset, err_status_hi_offset;
+ uint32_t i;
+
+ for (i = 0; i < reg_list_size; i++) {
+ err_status_lo_offset =
+ AMDGPU_RAS_REG_ENTRY_OFFSET(reg_list[i].hwip, instance,
+ reg_list[i].seg_lo, reg_list[i].reg_lo);
+ err_status_hi_offset =
+ AMDGPU_RAS_REG_ENTRY_OFFSET(reg_list[i].hwip, instance,
+ reg_list[i].seg_hi, reg_list[i].reg_hi);
+ WREG32(err_status_lo_offset, 0);
+ WREG32(err_status_hi_offset, 0);
+ }
+}
+
+int amdgpu_ras_error_data_init(struct ras_err_data *err_data)
+{
+ memset(err_data, 0, sizeof(*err_data));
+
+ INIT_LIST_HEAD(&err_data->err_node_list);
+
+ return 0;
+}
+
+static void amdgpu_ras_error_node_release(struct ras_err_node *err_node)
+{
+ if (!err_node)
+ return;
+
+ list_del(&err_node->node);
+ kvfree(err_node);
+}
+
+void amdgpu_ras_error_data_fini(struct ras_err_data *err_data)
+{
+ struct ras_err_node *err_node, *tmp;
+
+ list_for_each_entry_safe(err_node, tmp, &err_data->err_node_list, node)
+ amdgpu_ras_error_node_release(err_node);
+}
+
+static struct ras_err_node *amdgpu_ras_error_find_node_by_id(struct ras_err_data *err_data,
+ struct amdgpu_smuio_mcm_config_info *mcm_info)
+{
+ struct ras_err_node *err_node;
+ struct amdgpu_smuio_mcm_config_info *ref_id;
+
+ if (!err_data || !mcm_info)
+ return NULL;
+
+ for_each_ras_error(err_node, err_data) {
+ ref_id = &err_node->err_info.mcm_info;
+
+ if (mcm_info->socket_id == ref_id->socket_id &&
+ mcm_info->die_id == ref_id->die_id)
+ return err_node;
+ }
+
+ return NULL;
+}
+
+static struct ras_err_node *amdgpu_ras_error_node_new(void)
+{
+ struct ras_err_node *err_node;
+
+ err_node = kvzalloc(sizeof(*err_node), GFP_KERNEL);
+ if (!err_node)
+ return NULL;
+
+ INIT_LIST_HEAD(&err_node->node);
+
+ return err_node;
+}
+
+static int ras_err_info_cmp(void *priv, const struct list_head *a, const struct list_head *b)
+{
+ struct ras_err_node *nodea = container_of(a, struct ras_err_node, node);
+ struct ras_err_node *nodeb = container_of(b, struct ras_err_node, node);
+ struct amdgpu_smuio_mcm_config_info *infoa = &nodea->err_info.mcm_info;
+ struct amdgpu_smuio_mcm_config_info *infob = &nodeb->err_info.mcm_info;
+
+ if (unlikely(infoa->socket_id != infob->socket_id))
+ return infoa->socket_id - infob->socket_id;
+ else
+ return infoa->die_id - infob->die_id;
+
+ return 0;
+}
+
+static struct ras_err_info *amdgpu_ras_error_get_info(struct ras_err_data *err_data,
+ struct amdgpu_smuio_mcm_config_info *mcm_info)
+{
+ struct ras_err_node *err_node;
+
+ err_node = amdgpu_ras_error_find_node_by_id(err_data, mcm_info);
+ if (err_node)
+ return &err_node->err_info;
+
+ err_node = amdgpu_ras_error_node_new();
+ if (!err_node)
+ return NULL;
+
+ memcpy(&err_node->err_info.mcm_info, mcm_info, sizeof(*mcm_info));
+
+ err_data->err_list_count++;
+ list_add_tail(&err_node->node, &err_data->err_node_list);
+ list_sort(NULL, &err_data->err_node_list, ras_err_info_cmp);
+
+ return &err_node->err_info;
+}
+
+int amdgpu_ras_error_statistic_ue_count(struct ras_err_data *err_data,
+ struct amdgpu_smuio_mcm_config_info *mcm_info,
+ u64 count)
+{
+ struct ras_err_info *err_info;
+
+ if (!err_data || !mcm_info)
+ return -EINVAL;
+
+ if (!count)
+ return 0;
+
+ err_info = amdgpu_ras_error_get_info(err_data, mcm_info);
+ if (!err_info)
+ return -EINVAL;
+
+ err_info->ue_count += count;
+ err_data->ue_count += count;
+
+ return 0;
+}
+
+int amdgpu_ras_error_statistic_ce_count(struct ras_err_data *err_data,
+ struct amdgpu_smuio_mcm_config_info *mcm_info,
+ u64 count)
+{
+ struct ras_err_info *err_info;
+
+ if (!err_data || !mcm_info)
+ return -EINVAL;
+
+ if (!count)
+ return 0;
+
+ err_info = amdgpu_ras_error_get_info(err_data, mcm_info);
+ if (!err_info)
+ return -EINVAL;
+
+ err_info->ce_count += count;
+ err_data->ce_count += count;
+
+ return 0;
+}
+
+int amdgpu_ras_error_statistic_de_count(struct ras_err_data *err_data,
+ struct amdgpu_smuio_mcm_config_info *mcm_info,
+ u64 count)
+{
+ struct ras_err_info *err_info;
+
+ if (!err_data || !mcm_info)
+ return -EINVAL;
+
+ if (!count)
+ return 0;
+
+ err_info = amdgpu_ras_error_get_info(err_data, mcm_info);
+ if (!err_info)
+ return -EINVAL;
+
+ err_info->de_count += count;
+ err_data->de_count += count;
+
+ return 0;
+}
+
+#define mmMP0_SMN_C2PMSG_92 0x1609C
+#define mmMP0_SMN_C2PMSG_126 0x160BE
+static void amdgpu_ras_boot_time_error_reporting(struct amdgpu_device *adev,
+ u32 instance)
+{
+ u32 socket_id, aid_id, hbm_id;
+ u32 fw_status;
+ u32 boot_error;
+ u64 reg_addr;
+
+ /* The pattern for smn addressing in other SOC could be different from
+ * the one for aqua_vanjaram. We should revisit the code if the pattern
+ * is changed. In such case, replace the aqua_vanjaram implementation
+ * with more common helper */
+ reg_addr = (mmMP0_SMN_C2PMSG_92 << 2) +
+ aqua_vanjaram_encode_ext_smn_addressing(instance);
+ fw_status = amdgpu_device_indirect_rreg_ext(adev, reg_addr);
+
+ reg_addr = (mmMP0_SMN_C2PMSG_126 << 2) +
+ aqua_vanjaram_encode_ext_smn_addressing(instance);
+ boot_error = amdgpu_device_indirect_rreg_ext(adev, reg_addr);
+
+ socket_id = AMDGPU_RAS_GPU_ERR_SOCKET_ID(boot_error);
+ aid_id = AMDGPU_RAS_GPU_ERR_AID_ID(boot_error);
+ hbm_id = ((1 == AMDGPU_RAS_GPU_ERR_HBM_ID(boot_error)) ? 0 : 1);
+
+ if (AMDGPU_RAS_GPU_ERR_MEM_TRAINING(boot_error))
+ dev_info(adev->dev,
+ "socket: %d, aid: %d, hbm: %d, fw_status: 0x%x, memory training failed\n",
+ socket_id, aid_id, hbm_id, fw_status);
+
+ if (AMDGPU_RAS_GPU_ERR_FW_LOAD(boot_error))
+ dev_info(adev->dev,
+ "socket: %d, aid: %d, fw_status: 0x%x, firmware load failed at boot time\n",
+ socket_id, aid_id, fw_status);
+
+ if (AMDGPU_RAS_GPU_ERR_WAFL_LINK_TRAINING(boot_error))
+ dev_info(adev->dev,
+ "socket: %d, aid: %d, fw_status: 0x%x, wafl link training failed\n",
+ socket_id, aid_id, fw_status);
+
+ if (AMDGPU_RAS_GPU_ERR_XGMI_LINK_TRAINING(boot_error))
+ dev_info(adev->dev,
+ "socket: %d, aid: %d, fw_status: 0x%x, xgmi link training failed\n",
+ socket_id, aid_id, fw_status);
+
+ if (AMDGPU_RAS_GPU_ERR_USR_CP_LINK_TRAINING(boot_error))
+ dev_info(adev->dev,
+ "socket: %d, aid: %d, fw_status: 0x%x, usr cp link training failed\n",
+ socket_id, aid_id, fw_status);
+
+ if (AMDGPU_RAS_GPU_ERR_USR_DP_LINK_TRAINING(boot_error))
+ dev_info(adev->dev,
+ "socket: %d, aid: %d, fw_status: 0x%x, usr dp link training failed\n",
+ socket_id, aid_id, fw_status);
+
+ if (AMDGPU_RAS_GPU_ERR_HBM_MEM_TEST(boot_error))
+ dev_info(adev->dev,
+ "socket: %d, aid: %d, hbm: %d, fw_status: 0x%x, hbm memory test failed\n",
+ socket_id, aid_id, hbm_id, fw_status);
+
+ if (AMDGPU_RAS_GPU_ERR_HBM_BIST_TEST(boot_error))
+ dev_info(adev->dev,
+ "socket: %d, aid: %d, hbm: %d, fw_status: 0x%x, hbm bist test failed\n",
+ socket_id, aid_id, hbm_id, fw_status);
+
+ if (AMDGPU_RAS_GPU_ERR_DATA_ABORT(boot_error))
+ dev_info(adev->dev,
+ "socket: %d, aid: %d, fw_status: 0x%x, data abort exception\n",
+ socket_id, aid_id, fw_status);
+
+ if (AMDGPU_RAS_GPU_ERR_GENERIC(boot_error))
+ dev_info(adev->dev,
+ "socket: %d, aid: %d, fw_status: 0x%x, Boot Controller Generic Error\n",
+ socket_id, aid_id, fw_status);
+}
+
+static bool amdgpu_ras_boot_error_detected(struct amdgpu_device *adev,
+ u32 instance)
+{
+ u64 reg_addr;
+ u32 reg_data;
+ int retry_loop;
+
+ reg_addr = (mmMP0_SMN_C2PMSG_92 << 2) +
+ aqua_vanjaram_encode_ext_smn_addressing(instance);
+
+ for (retry_loop = 0; retry_loop < AMDGPU_RAS_BOOT_STATUS_POLLING_LIMIT; retry_loop++) {
+ reg_data = amdgpu_device_indirect_rreg_ext(adev, reg_addr);
+ if ((reg_data & AMDGPU_RAS_BOOT_STATUS_MASK) == AMDGPU_RAS_BOOT_STEADY_STATUS)
+ return false;
+ else
+ msleep(1);
+ }
+
+ return true;
+}
+
+void amdgpu_ras_query_boot_status(struct amdgpu_device *adev, u32 num_instances)
+{
+ u32 i;
+
+ for (i = 0; i < num_instances; i++) {
+ if (amdgpu_ras_boot_error_detected(adev, i))
+ amdgpu_ras_boot_time_error_reporting(adev, i);
+ }
+}
+
+int amdgpu_ras_reserve_page(struct amdgpu_device *adev, uint64_t pfn)
+{
+ struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
+ struct amdgpu_vram_mgr *mgr = &adev->mman.vram_mgr;
+ uint64_t start = pfn << AMDGPU_GPU_PAGE_SHIFT;
+ int ret = 0;
+
+ if (amdgpu_ras_check_critical_address(adev, start))
+ return 0;
+
+ mutex_lock(&con->page_rsv_lock);
+ ret = amdgpu_vram_mgr_query_page_status(mgr, start);
+ if (ret == -ENOENT)
+ ret = amdgpu_vram_mgr_reserve_range(mgr, start, AMDGPU_GPU_PAGE_SIZE);
+ mutex_unlock(&con->page_rsv_lock);
+
+ return ret;
+}
+
+void amdgpu_ras_event_log_print(struct amdgpu_device *adev, u64 event_id,
+ const char *fmt, ...)
+{
+ struct va_format vaf;
+ va_list args;
+
+ va_start(args, fmt);
+ vaf.fmt = fmt;
+ vaf.va = &args;
+
+ if (RAS_EVENT_ID_IS_VALID(event_id))
+ dev_printk(KERN_INFO, adev->dev, "{%llu}%pV", event_id, &vaf);
+ else
+ dev_printk(KERN_INFO, adev->dev, "%pV", &vaf);
+
+ va_end(args);
+}
+
+bool amdgpu_ras_is_rma(struct amdgpu_device *adev)
+{
+ struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
+
+ if (amdgpu_uniras_enabled(adev))
+ return amdgpu_ras_mgr_is_rma(adev);
+
+ if (!con)
+ return false;
+
+ return con->is_rma;
+}
+
+int amdgpu_ras_add_critical_region(struct amdgpu_device *adev,
+ struct amdgpu_bo *bo)
+{
+ struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
+ struct amdgpu_vram_mgr_resource *vres;
+ struct ras_critical_region *region;
+ struct drm_buddy_block *block;
+ int ret = 0;
+
+ if (!bo || !bo->tbo.resource)
+ return -EINVAL;
+
+ vres = to_amdgpu_vram_mgr_resource(bo->tbo.resource);
+
+ mutex_lock(&con->critical_region_lock);
+
+ /* Check if the bo had been recorded */
+ list_for_each_entry(region, &con->critical_region_head, node)
+ if (region->bo == bo)
+ goto out;
+
+ /* Record new critical amdgpu bo */
+ list_for_each_entry(block, &vres->blocks, link) {
+ region = kzalloc(sizeof(*region), GFP_KERNEL);
+ if (!region) {
+ ret = -ENOMEM;
+ goto out;
+ }
+ region->bo = bo;
+ region->start = amdgpu_vram_mgr_block_start(block);
+ region->size = amdgpu_vram_mgr_block_size(block);
+ list_add_tail(&region->node, &con->critical_region_head);
+ }
+
+out:
+ mutex_unlock(&con->critical_region_lock);
+
+ return ret;
+}
+
+static void amdgpu_ras_critical_region_init(struct amdgpu_device *adev)
+{
+ amdgpu_ras_add_critical_region(adev, adev->mman.fw_reserved_memory);
+}
+
+static void amdgpu_ras_critical_region_fini(struct amdgpu_device *adev)
+{
+ struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
+ struct ras_critical_region *region, *tmp;
+
+ mutex_lock(&con->critical_region_lock);
+ list_for_each_entry_safe(region, tmp, &con->critical_region_head, node) {
+ list_del(&region->node);
+ kfree(region);
+ }
+ mutex_unlock(&con->critical_region_lock);
+}
+
+bool amdgpu_ras_check_critical_address(struct amdgpu_device *adev, uint64_t addr)
+{
+ struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
+ struct ras_critical_region *region;
+ bool ret = false;
+
+ mutex_lock(&con->critical_region_lock);
+ list_for_each_entry(region, &con->critical_region_head, node) {
+ if ((region->start <= addr) &&
+ (addr < (region->start + region->size))) {
+ ret = true;
+ break;
+ }
+ }
+ mutex_unlock(&con->critical_region_lock);
+
+ return ret;
+}
+
+void amdgpu_ras_pre_reset(struct amdgpu_device *adev,
+ struct list_head *device_list)
+{
+ struct amdgpu_device *tmp_adev = NULL;
+
+ list_for_each_entry(tmp_adev, device_list, reset_list) {
+ if (amdgpu_uniras_enabled(tmp_adev))
+ amdgpu_ras_mgr_pre_reset(tmp_adev);
+ }
+}
+
+void amdgpu_ras_post_reset(struct amdgpu_device *adev,
+ struct list_head *device_list)
+{
+ struct amdgpu_device *tmp_adev = NULL;
+
+ list_for_each_entry(tmp_adev, device_list, reset_list) {
+ if (amdgpu_uniras_enabled(tmp_adev))
+ amdgpu_ras_mgr_post_reset(tmp_adev);
+ }
}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h
index 762f5e46c007..ff44190d7d98 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h
@@ -26,13 +26,57 @@
#include <linux/debugfs.h>
#include <linux/list.h>
-#include "amdgpu.h"
-#include "amdgpu_psp.h"
+#include <linux/kfifo.h>
+#include <linux/radix-tree.h>
#include "ta_ras_if.h"
#include "amdgpu_ras_eeprom.h"
+#include "amdgpu_smuio.h"
+#include "amdgpu_aca.h"
+
+struct amdgpu_iv_entry;
+
+#define AMDGPU_RAS_GPU_ERR_MEM_TRAINING(x) AMDGPU_GET_REG_FIELD(x, 0, 0)
+#define AMDGPU_RAS_GPU_ERR_FW_LOAD(x) AMDGPU_GET_REG_FIELD(x, 1, 1)
+#define AMDGPU_RAS_GPU_ERR_WAFL_LINK_TRAINING(x) AMDGPU_GET_REG_FIELD(x, 2, 2)
+#define AMDGPU_RAS_GPU_ERR_XGMI_LINK_TRAINING(x) AMDGPU_GET_REG_FIELD(x, 3, 3)
+#define AMDGPU_RAS_GPU_ERR_USR_CP_LINK_TRAINING(x) AMDGPU_GET_REG_FIELD(x, 4, 4)
+#define AMDGPU_RAS_GPU_ERR_USR_DP_LINK_TRAINING(x) AMDGPU_GET_REG_FIELD(x, 5, 5)
+#define AMDGPU_RAS_GPU_ERR_HBM_MEM_TEST(x) AMDGPU_GET_REG_FIELD(x, 6, 6)
+#define AMDGPU_RAS_GPU_ERR_HBM_BIST_TEST(x) AMDGPU_GET_REG_FIELD(x, 7, 7)
+#define AMDGPU_RAS_GPU_ERR_SOCKET_ID(x) AMDGPU_GET_REG_FIELD(x, 10, 8)
+#define AMDGPU_RAS_GPU_ERR_AID_ID(x) AMDGPU_GET_REG_FIELD(x, 12, 11)
+#define AMDGPU_RAS_GPU_ERR_HBM_ID(x) AMDGPU_GET_REG_FIELD(x, 14, 13)
+#define AMDGPU_RAS_GPU_ERR_DATA_ABORT(x) AMDGPU_GET_REG_FIELD(x, 29, 29)
+#define AMDGPU_RAS_GPU_ERR_GENERIC(x) AMDGPU_GET_REG_FIELD(x, 30, 30)
+
+#define AMDGPU_RAS_BOOT_STATUS_POLLING_LIMIT 100
+#define AMDGPU_RAS_BOOT_STEADY_STATUS 0xBA
+#define AMDGPU_RAS_BOOT_STATUS_MASK 0xFF
#define AMDGPU_RAS_FLAG_INIT_BY_VBIOS (0x1 << 0)
-#define AMDGPU_RAS_FLAG_INIT_NEED_RESET (0x1 << 1)
+/* position of instance value in sub_block_index of
+ * ta_ras_trigger_error_input, the sub block uses lower 12 bits
+ */
+#define AMDGPU_RAS_INST_MASK 0xfffff000
+#define AMDGPU_RAS_INST_SHIFT 0xc
+
+#define AMDGPU_RAS_FEATURES_SOCKETID_SHIFT 29
+#define AMDGPU_RAS_FEATURES_SOCKETID_MASK 0xe0000000
+
+/* Reserve 8 physical dram row for possible retirement.
+ * In worst cases, it will lose 8 * 2MB memory in vram domain */
+#define AMDGPU_RAS_RESERVED_VRAM_SIZE_DEFAULT (16ULL << 20)
+/* The high three bits indicates socketid */
+#define AMDGPU_RAS_GET_FEATURES(val) ((val) & ~AMDGPU_RAS_FEATURES_SOCKETID_MASK)
+
+#define RAS_EVENT_INVALID_ID (BIT_ULL(63))
+#define RAS_EVENT_ID_IS_VALID(x) (!((x) & BIT_ULL(63)))
+
+#define RAS_EVENT_LOG(adev, id, fmt, ...) \
+ amdgpu_ras_event_log_print((adev), (id), (fmt), ##__VA_ARGS__)
+
+#define amdgpu_ras_mark_ras_event(adev, type) \
+ (amdgpu_ras_mark_ras_event_caller((adev), (type), __builtin_return_address(0)))
enum amdgpu_ras_block {
AMDGPU_RAS_BLOCK__UMC = 0,
@@ -49,11 +93,28 @@ enum amdgpu_ras_block {
AMDGPU_RAS_BLOCK__MP0,
AMDGPU_RAS_BLOCK__MP1,
AMDGPU_RAS_BLOCK__FUSE,
+ AMDGPU_RAS_BLOCK__MCA,
+ AMDGPU_RAS_BLOCK__VCN,
+ AMDGPU_RAS_BLOCK__JPEG,
+ AMDGPU_RAS_BLOCK__IH,
+ AMDGPU_RAS_BLOCK__MPIO,
+ AMDGPU_RAS_BLOCK__MMSCH,
+
+ AMDGPU_RAS_BLOCK__LAST,
+ AMDGPU_RAS_BLOCK__ANY = -1
+};
+
+enum amdgpu_ras_mca_block {
+ AMDGPU_RAS_MCA_BLOCK__MP0 = 0,
+ AMDGPU_RAS_MCA_BLOCK__MP1,
+ AMDGPU_RAS_MCA_BLOCK__MPIO,
+ AMDGPU_RAS_MCA_BLOCK__IOHC,
- AMDGPU_RAS_BLOCK__LAST
+ AMDGPU_RAS_MCA_BLOCK__LAST
};
#define AMDGPU_RAS_BLOCK_COUNT AMDGPU_RAS_BLOCK__LAST
+#define AMDGPU_RAS_MCA_BLOCK_COUNT AMDGPU_RAS_MCA_BLOCK__LAST
#define AMDGPU_RAS_BLOCK_MASK ((1ULL << AMDGPU_RAS_BLOCK_COUNT) - 1)
enum amdgpu_ras_gfx_subblock {
@@ -302,33 +363,193 @@ enum amdgpu_ras_ret {
AMDGPU_RAS_PT,
};
+enum amdgpu_ras_error_query_mode {
+ AMDGPU_RAS_INVALID_ERROR_QUERY = 0,
+ AMDGPU_RAS_DIRECT_ERROR_QUERY = 1,
+ AMDGPU_RAS_FIRMWARE_ERROR_QUERY = 2,
+ AMDGPU_RAS_VIRT_ERROR_COUNT_QUERY = 3,
+};
+
+/* ras error status reisger fields */
+#define ERR_STATUS_LO__ERR_STATUS_VALID_FLAG__SHIFT 0x0
+#define ERR_STATUS_LO__ERR_STATUS_VALID_FLAG_MASK 0x00000001L
+#define ERR_STATUS_LO__MEMORY_ID__SHIFT 0x18
+#define ERR_STATUS_LO__MEMORY_ID_MASK 0xFF000000L
+#define ERR_STATUS_HI__ERR_INFO_VALID_FLAG__SHIFT 0x2
+#define ERR_STATUS_HI__ERR_INFO_VALID_FLAG_MASK 0x00000004L
+#define ERR_STATUS__ERR_CNT__SHIFT 0x17
+#define ERR_STATUS__ERR_CNT_MASK 0x03800000L
+
+#define AMDGPU_RAS_REG_ENTRY(ip, inst, reg_lo, reg_hi) \
+ ip##_HWIP, inst, reg_lo##_BASE_IDX, reg_lo, reg_hi##_BASE_IDX, reg_hi
+
+#define AMDGPU_RAS_REG_ENTRY_OFFSET(hwip, ip_inst, segment, reg) \
+ (adev->reg_offset[hwip][ip_inst][segment] + (reg))
+
+#define AMDGPU_RAS_ERR_INFO_VALID (1 << 0)
+#define AMDGPU_RAS_ERR_STATUS_VALID (1 << 1)
+#define AMDGPU_RAS_ERR_ADDRESS_VALID (1 << 2)
+
+#define AMDGPU_RAS_GPU_RESET_MODE2_RESET (0x1 << 0)
+#define AMDGPU_RAS_GPU_RESET_MODE1_RESET (0x1 << 1)
+
+struct amdgpu_ras_err_status_reg_entry {
+ uint32_t hwip;
+ uint32_t ip_inst;
+ uint32_t seg_lo;
+ uint32_t reg_lo;
+ uint32_t seg_hi;
+ uint32_t reg_hi;
+ uint32_t reg_inst;
+ uint32_t flags;
+ const char *block_name;
+};
+
+struct amdgpu_ras_memory_id_entry {
+ uint32_t memory_id;
+ const char *name;
+};
+
struct ras_common_if {
enum amdgpu_ras_block block;
enum amdgpu_ras_error_type type;
uint32_t sub_block_index;
- /* block name */
char name[32];
};
+#define MAX_UMC_CHANNEL_NUM 32
+
+struct ecc_info_per_ch {
+ uint16_t ce_count_lo_chip;
+ uint16_t ce_count_hi_chip;
+ uint64_t mca_umc_status;
+ uint64_t mca_umc_addr;
+ uint64_t mca_ceumc_addr;
+};
+
+struct umc_ecc_info {
+ struct ecc_info_per_ch ecc[MAX_UMC_CHANNEL_NUM];
+
+ /* Determine smu ecctable whether support
+ * record correctable error address
+ */
+ int record_ce_addr_supported;
+};
+
+enum ras_event_type {
+ RAS_EVENT_TYPE_INVALID = 0,
+ RAS_EVENT_TYPE_FATAL,
+ RAS_EVENT_TYPE_POISON_CREATION,
+ RAS_EVENT_TYPE_POISON_CONSUMPTION,
+ RAS_EVENT_TYPE_COUNT,
+};
+
+struct ras_event_state {
+ u64 last_seqno;
+ atomic64_t count;
+};
+
+struct ras_event_manager {
+ atomic64_t seqno;
+ struct ras_event_state event_state[RAS_EVENT_TYPE_COUNT];
+};
+
+struct ras_event_id {
+ enum ras_event_type type;
+ u64 event_id;
+};
+
+struct ras_query_context {
+ struct ras_event_id evid;
+};
+
+typedef int (*pasid_notify)(struct amdgpu_device *adev,
+ uint16_t pasid, void *data);
+
+struct ras_poison_msg {
+ enum amdgpu_ras_block block;
+ uint16_t pasid;
+ uint32_t reset;
+ pasid_notify pasid_fn;
+ void *data;
+};
+
+struct ras_err_pages {
+ uint32_t count;
+ uint64_t *pfn;
+};
+
+struct ras_ecc_err {
+ uint64_t status;
+ uint64_t ipid;
+ uint64_t addr;
+ uint64_t pa_pfn;
+ /* save global channel index across all UMC instances */
+ uint32_t channel_idx;
+ struct ras_err_pages err_pages;
+};
+
+struct ras_ecc_log_info {
+ struct mutex lock;
+ struct radix_tree_root de_page_tree;
+ uint64_t de_queried_count;
+ uint64_t consumption_q_count;
+};
+
+struct ras_critical_region {
+ struct list_head node;
+ struct amdgpu_bo *bo;
+ uint64_t start;
+ uint64_t size;
+};
+
+struct ras_eeprom_table_version {
+ uint32_t minor : 16;
+ uint32_t major : 16;
+};
+
+struct ras_eeprom_smu_funcs {
+ int (*get_ras_table_version)(struct amdgpu_device *adev,
+ uint32_t *table_version);
+ int (*get_badpage_count)(struct amdgpu_device *adev, uint32_t *count, uint32_t timeout);
+ int (*get_badpage_mca_addr)(struct amdgpu_device *adev, uint16_t index, uint64_t *mca_addr);
+ int (*set_timestamp)(struct amdgpu_device *adev, uint64_t timestamp);
+ int (*get_timestamp)(struct amdgpu_device *adev,
+ uint16_t index, uint64_t *timestamp);
+ int (*get_badpage_ipid)(struct amdgpu_device *adev, uint16_t index, uint64_t *ipid);
+ int (*erase_ras_table)(struct amdgpu_device *adev, uint32_t *result);
+};
+
+enum ras_smu_feature_flags {
+ RAS_SMU_FEATURE_BIT__RAS_EEPROM = BIT_ULL(0),
+};
+
+struct ras_smu_drv {
+ const struct ras_eeprom_smu_funcs *smu_eeprom_funcs;
+ void (*ras_smu_feature_flags)(struct amdgpu_device *adev, uint64_t *flags);
+};
+
struct amdgpu_ras {
+ void *ras_mgr;
/* ras infrastructure */
/* for ras itself. */
- uint32_t hw_supported;
- /* for IP to check its ras ability. */
- uint32_t supported;
uint32_t features;
+ uint32_t schema;
struct list_head head;
- /* debugfs */
- struct dentry *dir;
/* sysfs */
struct device_attribute features_attr;
+ struct device_attribute version_attr;
+ struct device_attribute schema_attr;
+ struct device_attribute event_state_attr;
struct bin_attribute badpages_attr;
+ struct dentry *de_ras_eeprom_table;
/* block array */
struct ras_manager *objs;
/* gpu recovery */
struct work_struct recovery_work;
atomic_t in_recovery;
+ atomic_t rma_in_recovery;
struct amdgpu_device *adev;
/* error handler data */
struct ras_err_handler_data *eh_data;
@@ -345,25 +566,100 @@ struct amdgpu_ras {
/* disable ras error count harvest in recovery */
bool disable_ras_err_cnt_harvest;
+
+ /* is poison mode supported */
+ bool poison_supported;
+
+ /* RAS count errors delayed work */
+ struct delayed_work ras_counte_delay_work;
+ atomic_t ras_ue_count;
+ atomic_t ras_ce_count;
+
+ /* record umc error info queried from smu */
+ struct umc_ecc_info umc_ecc;
+
+ /* Indicates smu whether need update bad channel info */
+ bool update_channel_flag;
+ /* Record status of smu mca debug mode */
+ bool is_aca_debug_mode;
+ bool is_rma;
+
+ /* Record special requirements of gpu reset caller */
+ uint32_t gpu_reset_flags;
+
+ struct task_struct *page_retirement_thread;
+ wait_queue_head_t page_retirement_wq;
+ struct mutex page_retirement_lock;
+ atomic_t page_retirement_req_cnt;
+ atomic_t poison_creation_count;
+ atomic_t poison_consumption_count;
+ struct mutex page_rsv_lock;
+ DECLARE_KFIFO(poison_fifo, struct ras_poison_msg, 128);
+ struct ras_ecc_log_info umc_ecc_log;
+ struct delayed_work page_retirement_dwork;
+
+ /* ras errors detected */
+ unsigned long ras_err_state;
+
+ /* RAS event manager */
+ struct ras_event_manager __event_mgr;
+ struct ras_event_manager *event_mgr;
+
+ uint64_t reserved_pages_in_bytes;
+
+ pid_t init_task_pid;
+ char init_task_comm[TASK_COMM_LEN];
+
+ int bad_page_num;
+
+ struct list_head critical_region_head;
+ struct mutex critical_region_lock;
+
+ /* Protect poison injection */
+ struct mutex poison_lock;
+
+ /* Disable/Enable uniras switch */
+ bool uniras_enabled;
+ const struct ras_smu_drv *ras_smu_drv;
};
struct ras_fs_data {
- char sysfs_name[32];
+ char sysfs_name[48];
char debugfs_name[32];
};
+struct ras_err_info {
+ struct amdgpu_smuio_mcm_config_info mcm_info;
+ u64 ce_count;
+ u64 ue_count;
+ u64 de_count;
+};
+
+struct ras_err_node {
+ struct list_head node;
+ struct ras_err_info err_info;
+};
+
struct ras_err_data {
unsigned long ue_count;
unsigned long ce_count;
+ unsigned long de_count;
unsigned long err_addr_cnt;
struct eeprom_table_record *err_addr;
+ unsigned long err_addr_len;
+ u32 err_list_count;
+ struct list_head err_node_list;
};
+#define for_each_ras_error(err_node, err_data) \
+ list_for_each_entry(err_node, &(err_data)->err_node_list, node)
+
struct ras_err_handler_data {
/* point to bad page records array */
struct eeprom_table_record *bps;
/* the count of entries */
int count;
+ int count_saved;
/* the space can place new entries */
int space_left;
};
@@ -395,8 +691,6 @@ struct ras_manager {
struct list_head node;
/* the device */
struct amdgpu_device *adev;
- /* debugfs */
- struct dentry *ent;
/* sysfs */
struct device_attribute sysfs_attr;
int attr_inuse;
@@ -408,6 +702,8 @@ struct ras_manager {
struct ras_ih_data ih_data;
struct ras_err_data err_data;
+
+ struct aca_handle aca_handle;
};
struct ras_badpage {
@@ -419,7 +715,7 @@ struct ras_badpage {
/* interfaces for IP */
struct ras_fs_if {
struct ras_common_if head;
- char sysfs_name[32];
+ const char* sysfs_name;
char debugfs_name[32];
};
@@ -427,12 +723,14 @@ struct ras_query_if {
struct ras_common_if head;
unsigned long ue_count;
unsigned long ce_count;
+ unsigned long de_count;
};
struct ras_inject_if {
struct ras_common_if head;
uint64_t address;
uint64_t value;
+ uint32_t instance_mask;
};
struct ras_cure_if {
@@ -457,6 +755,30 @@ struct ras_debug_if {
};
int op;
};
+
+struct amdgpu_ras_block_object {
+ struct ras_common_if ras_comm;
+
+ int (*ras_block_match)(struct amdgpu_ras_block_object *block_obj,
+ enum amdgpu_ras_block block, uint32_t sub_block_index);
+ int (*ras_late_init)(struct amdgpu_device *adev, struct ras_common_if *ras_block);
+ void (*ras_fini)(struct amdgpu_device *adev, struct ras_common_if *ras_block);
+ ras_ih_cb ras_cb;
+ const struct amdgpu_ras_block_hw_ops *hw_ops;
+};
+
+struct amdgpu_ras_block_hw_ops {
+ int (*ras_error_inject)(struct amdgpu_device *adev,
+ void *inject_if, uint32_t instance_mask);
+ void (*query_ras_error_count)(struct amdgpu_device *adev, void *ras_error_status);
+ void (*query_ras_error_status)(struct amdgpu_device *adev);
+ void (*query_ras_error_address)(struct amdgpu_device *adev, void *ras_error_status);
+ void (*reset_ras_error_count)(struct amdgpu_device *adev);
+ void (*reset_ras_error_status)(struct amdgpu_device *adev);
+ bool (*query_poison_status)(struct amdgpu_device *adev);
+ bool (*handle_poison_consumption)(struct amdgpu_device *adev);
+};
+
/* work flow
* vbios
* 1: ras feature enable (enabled by default)
@@ -471,46 +793,23 @@ struct ras_debug_if {
* 8: feature disable
*/
-#define amdgpu_ras_get_context(adev) ((adev)->psp.ras.ras)
-#define amdgpu_ras_set_context(adev, ras_con) ((adev)->psp.ras.ras = (ras_con))
-
-/* check if ras is supported on block, say, sdma, gfx */
-static inline int amdgpu_ras_is_supported(struct amdgpu_device *adev,
- unsigned int block)
-{
- struct amdgpu_ras *ras = amdgpu_ras_get_context(adev);
-
- if (block >= AMDGPU_RAS_BLOCK_COUNT)
- return 0;
- return ras && (ras->supported & (1 << block));
-}
-
-int amdgpu_ras_recovery_init(struct amdgpu_device *adev);
-int amdgpu_ras_request_reset_on_boot(struct amdgpu_device *adev,
- unsigned int block);
+int amdgpu_ras_init_badpage_info(struct amdgpu_device *adev);
+int amdgpu_ras_recovery_init(struct amdgpu_device *adev, bool init_bp_info);
void amdgpu_ras_resume(struct amdgpu_device *adev);
void amdgpu_ras_suspend(struct amdgpu_device *adev);
-unsigned long amdgpu_ras_query_error_count(struct amdgpu_device *adev,
- bool is_ce);
-
-bool amdgpu_ras_check_err_threshold(struct amdgpu_device *adev);
+int amdgpu_ras_query_error_count(struct amdgpu_device *adev,
+ unsigned long *ce_count,
+ unsigned long *ue_count,
+ struct ras_query_if *query_info);
/* error handling functions */
int amdgpu_ras_add_bad_pages(struct amdgpu_device *adev,
- struct eeprom_table_record *bps, int pages);
+ struct eeprom_table_record *bps, int pages, bool from_rom);
-int amdgpu_ras_save_bad_pages(struct amdgpu_device *adev);
-
-static inline int amdgpu_ras_reset_gpu(struct amdgpu_device *adev)
-{
- struct amdgpu_ras *ras = amdgpu_ras_get_context(adev);
-
- if (atomic_cmpxchg(&ras->in_recovery, 0, 1) == 0)
- schedule_work(&ras->recovery_work);
- return 0;
-}
+int amdgpu_ras_save_bad_pages(struct amdgpu_device *adev,
+ unsigned long *new_cnt);
static inline enum ta_ras_block
amdgpu_ras_block_to_ta(enum amdgpu_ras_block block) {
@@ -543,6 +842,18 @@ amdgpu_ras_block_to_ta(enum amdgpu_ras_block block) {
return TA_RAS_BLOCK__MP1;
case AMDGPU_RAS_BLOCK__FUSE:
return TA_RAS_BLOCK__FUSE;
+ case AMDGPU_RAS_BLOCK__MCA:
+ return TA_RAS_BLOCK__MCA;
+ case AMDGPU_RAS_BLOCK__VCN:
+ return TA_RAS_BLOCK__VCN;
+ case AMDGPU_RAS_BLOCK__JPEG:
+ return TA_RAS_BLOCK__JPEG;
+ case AMDGPU_RAS_BLOCK__IH:
+ return TA_RAS_BLOCK__IH;
+ case AMDGPU_RAS_BLOCK__MPIO:
+ return TA_RAS_BLOCK__MPIO;
+ case AMDGPU_RAS_BLOCK__MMSCH:
+ return TA_RAS_BLOCK__MMSCH;
default:
WARN_ONCE(1, "RAS ERROR: unexpected block id %d\n", block);
return TA_RAS_BLOCK__UMC;
@@ -570,15 +881,15 @@ amdgpu_ras_error_to_ta(enum amdgpu_ras_error_type error) {
/* called in ip_init and ip_fini */
int amdgpu_ras_init(struct amdgpu_device *adev);
+int amdgpu_ras_late_init(struct amdgpu_device *adev);
int amdgpu_ras_fini(struct amdgpu_device *adev);
int amdgpu_ras_pre_fini(struct amdgpu_device *adev);
-int amdgpu_ras_late_init(struct amdgpu_device *adev,
- struct ras_common_if *ras_block,
- struct ras_fs_if *fs_info,
- struct ras_ih_if *ih_info);
-void amdgpu_ras_late_fini(struct amdgpu_device *adev,
- struct ras_common_if *ras_block,
- struct ras_ih_if *ih_info);
+
+int amdgpu_ras_block_late_init(struct amdgpu_device *adev,
+ struct ras_common_if *ras_block);
+
+void amdgpu_ras_block_late_fini(struct amdgpu_device *adev,
+ struct ras_common_if *ras_block);
int amdgpu_ras_feature_enable(struct amdgpu_device *adev,
struct ras_common_if *head, bool enable);
@@ -587,24 +898,29 @@ int amdgpu_ras_feature_enable_on_boot(struct amdgpu_device *adev,
struct ras_common_if *head, bool enable);
int amdgpu_ras_sysfs_create(struct amdgpu_device *adev,
- struct ras_fs_if *head);
+ struct ras_common_if *head);
int amdgpu_ras_sysfs_remove(struct amdgpu_device *adev,
struct ras_common_if *head);
void amdgpu_ras_debugfs_create_all(struct amdgpu_device *adev);
-int amdgpu_ras_error_query(struct amdgpu_device *adev,
+int amdgpu_ras_query_error_status(struct amdgpu_device *adev,
struct ras_query_if *info);
+int amdgpu_ras_reset_error_count(struct amdgpu_device *adev,
+ enum amdgpu_ras_block block);
+int amdgpu_ras_reset_error_status(struct amdgpu_device *adev,
+ enum amdgpu_ras_block block);
+
int amdgpu_ras_error_inject(struct amdgpu_device *adev,
struct ras_inject_if *info);
int amdgpu_ras_interrupt_add_handler(struct amdgpu_device *adev,
- struct ras_ih_if *info);
+ struct ras_common_if *head);
int amdgpu_ras_interrupt_remove_handler(struct amdgpu_device *adev,
- struct ras_ih_if *info);
+ struct ras_common_if *head);
int amdgpu_ras_interrupt_dispatch(struct amdgpu_device *adev,
struct ras_dispatch_if *info);
@@ -624,9 +940,108 @@ static inline void amdgpu_ras_intr_cleared(void)
atomic_set(&amdgpu_ras_in_intr, 0);
}
-void amdgpu_ras_global_ras_isr(struct amdgpu_device *adev);
+int amdgpu_ras_global_ras_isr(struct amdgpu_device *adev);
void amdgpu_ras_set_error_query_ready(struct amdgpu_device *adev, bool ready);
bool amdgpu_ras_need_emergency_restart(struct amdgpu_device *adev);
+
+void amdgpu_release_ras_context(struct amdgpu_device *adev);
+
+int amdgpu_persistent_edc_harvesting_supported(struct amdgpu_device *adev);
+
+const char *get_ras_block_str(struct ras_common_if *ras_block);
+
+bool amdgpu_ras_is_poison_mode_supported(struct amdgpu_device *adev);
+
+int amdgpu_ras_is_supported(struct amdgpu_device *adev, unsigned int block);
+
+int amdgpu_ras_reset_gpu(struct amdgpu_device *adev);
+
+struct amdgpu_ras* amdgpu_ras_get_context(struct amdgpu_device *adev);
+
+int amdgpu_ras_set_context(struct amdgpu_device *adev, struct amdgpu_ras *ras_con);
+
+int amdgpu_ras_set_mca_debug_mode(struct amdgpu_device *adev, bool enable);
+int amdgpu_ras_set_aca_debug_mode(struct amdgpu_device *adev, bool enable);
+bool amdgpu_ras_get_aca_debug_mode(struct amdgpu_device *adev);
+bool amdgpu_ras_get_error_query_mode(struct amdgpu_device *adev,
+ unsigned int *mode);
+
+int amdgpu_ras_register_ras_block(struct amdgpu_device *adev,
+ struct amdgpu_ras_block_object *ras_block_obj);
+void amdgpu_ras_interrupt_fatal_error_handler(struct amdgpu_device *adev);
+void amdgpu_ras_get_error_type_name(uint32_t err_type, char *err_type_name);
+bool amdgpu_ras_inst_get_memory_id_field(struct amdgpu_device *adev,
+ const struct amdgpu_ras_err_status_reg_entry *reg_entry,
+ uint32_t instance,
+ uint32_t *memory_id);
+bool amdgpu_ras_inst_get_err_cnt_field(struct amdgpu_device *adev,
+ const struct amdgpu_ras_err_status_reg_entry *reg_entry,
+ uint32_t instance,
+ unsigned long *err_cnt);
+void amdgpu_ras_inst_query_ras_error_count(struct amdgpu_device *adev,
+ const struct amdgpu_ras_err_status_reg_entry *reg_list,
+ uint32_t reg_list_size,
+ const struct amdgpu_ras_memory_id_entry *mem_list,
+ uint32_t mem_list_size,
+ uint32_t instance,
+ uint32_t err_type,
+ unsigned long *err_count);
+void amdgpu_ras_inst_reset_ras_error_count(struct amdgpu_device *adev,
+ const struct amdgpu_ras_err_status_reg_entry *reg_list,
+ uint32_t reg_list_size,
+ uint32_t instance);
+
+int amdgpu_ras_error_data_init(struct ras_err_data *err_data);
+void amdgpu_ras_error_data_fini(struct ras_err_data *err_data);
+int amdgpu_ras_error_statistic_ce_count(struct ras_err_data *err_data,
+ struct amdgpu_smuio_mcm_config_info *mcm_info,
+ u64 count);
+int amdgpu_ras_error_statistic_ue_count(struct ras_err_data *err_data,
+ struct amdgpu_smuio_mcm_config_info *mcm_info,
+ u64 count);
+int amdgpu_ras_error_statistic_de_count(struct ras_err_data *err_data,
+ struct amdgpu_smuio_mcm_config_info *mcm_info,
+ u64 count);
+void amdgpu_ras_query_boot_status(struct amdgpu_device *adev, u32 num_instances);
+int amdgpu_ras_bind_aca(struct amdgpu_device *adev, enum amdgpu_ras_block blk,
+ const struct aca_info *aca_info, void *data);
+int amdgpu_ras_unbind_aca(struct amdgpu_device *adev, enum amdgpu_ras_block blk);
+
+ssize_t amdgpu_ras_aca_sysfs_read(struct device *dev, struct device_attribute *attr,
+ struct aca_handle *handle, char *buf, void *data);
+
+void amdgpu_ras_set_fed(struct amdgpu_device *adev, bool status);
+bool amdgpu_ras_get_fed_status(struct amdgpu_device *adev);
+void amdgpu_ras_set_err_poison(struct amdgpu_device *adev,
+ enum amdgpu_ras_block block);
+void amdgpu_ras_clear_err_state(struct amdgpu_device *adev);
+bool amdgpu_ras_is_err_state(struct amdgpu_device *adev, int block);
+
+u64 amdgpu_ras_acquire_event_id(struct amdgpu_device *adev, enum ras_event_type type);
+int amdgpu_ras_mark_ras_event_caller(struct amdgpu_device *adev, enum ras_event_type type,
+ const void *caller);
+
+int amdgpu_ras_reserve_page(struct amdgpu_device *adev, uint64_t pfn);
+
+int amdgpu_ras_add_critical_region(struct amdgpu_device *adev, struct amdgpu_bo *bo);
+bool amdgpu_ras_check_critical_address(struct amdgpu_device *adev, uint64_t addr);
+
+int amdgpu_ras_put_poison_req(struct amdgpu_device *adev,
+ enum amdgpu_ras_block block, uint16_t pasid,
+ pasid_notify pasid_fn, void *data, uint32_t reset);
+
+bool amdgpu_ras_in_recovery(struct amdgpu_device *adev);
+
+__printf(3, 4)
+void amdgpu_ras_event_log_print(struct amdgpu_device *adev, u64 event_id,
+ const char *fmt, ...);
+
+bool amdgpu_ras_is_rma(struct amdgpu_device *adev);
+
+void amdgpu_ras_pre_reset(struct amdgpu_device *adev,
+ struct list_head *device_list);
+void amdgpu_ras_post_reset(struct amdgpu_device *adev,
+ struct list_head *device_list);
#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.c
index 19d9aa76cfbf..64dd7a81bff5 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.c
@@ -26,97 +26,221 @@
#include "amdgpu_ras.h"
#include <linux/bits.h>
#include "atom.h"
+#include "amdgpu_eeprom.h"
+#include "amdgpu_atomfirmware.h"
+#include <linux/debugfs.h>
+#include <linux/uaccess.h>
-#define EEPROM_I2C_TARGET_ADDR_VEGA20 0xA0
-#define EEPROM_I2C_TARGET_ADDR_ARCTURUS 0xA8
-#define EEPROM_I2C_TARGET_ADDR_ARCTURUS_D342 0xA0
-#define EEPROM_I2C_TARGET_ADDR_SIENNA_CICHLID 0xA0
+#include "amdgpu_reset.h"
+#include "amdgpu_ras_mgr.h"
+
+/* These are memory addresses as would be seen by one or more EEPROM
+ * chips strung on the I2C bus, usually by manipulating pins 1-3 of a
+ * set of EEPROM devices. They form a continuous memory space.
+ *
+ * The I2C device address includes the device type identifier, 1010b,
+ * which is a reserved value and indicates that this is an I2C EEPROM
+ * device. It also includes the top 3 bits of the 19 bit EEPROM memory
+ * address, namely bits 18, 17, and 16. This makes up the 7 bit
+ * address sent on the I2C bus with bit 0 being the direction bit,
+ * which is not represented here, and sent by the hardware directly.
+ *
+ * For instance,
+ * 50h = 1010000b => device type identifier 1010b, bits 18:16 = 000b, address 0.
+ * 54h = 1010100b => --"--, bits 18:16 = 100b, address 40000h.
+ * 56h = 1010110b => --"--, bits 18:16 = 110b, address 60000h.
+ * Depending on the size of the I2C EEPROM device(s), bits 18:16 may
+ * address memory in a device or a device on the I2C bus, depending on
+ * the status of pins 1-3. See top of amdgpu_eeprom.c.
+ *
+ * The RAS table lives either at address 0 or address 40000h of EEPROM.
+ */
+#define EEPROM_I2C_MADDR_0 0x0
+#define EEPROM_I2C_MADDR_4 0x40000
/*
- * The 2 macros bellow represent the actual size in bytes that
+ * The 2 macros below represent the actual size in bytes that
* those entities occupy in the EEPROM memory.
- * EEPROM_TABLE_RECORD_SIZE is different than sizeof(eeprom_table_record) which
+ * RAS_TABLE_RECORD_SIZE is different than sizeof(eeprom_table_record) which
* uses uint64 to store 6b fields such as retired_page.
*/
-#define EEPROM_TABLE_HEADER_SIZE 20
-#define EEPROM_TABLE_RECORD_SIZE 24
-
-#define EEPROM_ADDRESS_SIZE 0x2
+#define RAS_TABLE_HEADER_SIZE 20
+#define RAS_TABLE_RECORD_SIZE 24
/* Table hdr is 'AMDR' */
-#define EEPROM_TABLE_HDR_VAL 0x414d4452
-#define EEPROM_TABLE_VER 0x00010000
+#define RAS_TABLE_HDR_VAL 0x414d4452
/* Bad GPU tag ‘BADG’ */
-#define EEPROM_TABLE_HDR_BAD 0x42414447
+#define RAS_TABLE_HDR_BAD 0x42414447
-/* Assume 2 Mbit size */
-#define EEPROM_SIZE_BYTES 256000
-#define EEPROM_PAGE__SIZE_BYTES 256
-#define EEPROM_HDR_START 0
-#define EEPROM_RECORD_START (EEPROM_HDR_START + EEPROM_TABLE_HEADER_SIZE)
-#define EEPROM_MAX_RECORD_NUM ((EEPROM_SIZE_BYTES - EEPROM_TABLE_HEADER_SIZE) / EEPROM_TABLE_RECORD_SIZE)
-#define EEPROM_ADDR_MSB_MASK GENMASK(17, 8)
+/*
+ * EEPROM Table structure v1
+ * ---------------------------------
+ * | |
+ * | EEPROM TABLE HEADER |
+ * | ( size 20 Bytes ) |
+ * | |
+ * ---------------------------------
+ * | |
+ * | BAD PAGE RECORD AREA |
+ * | |
+ * ---------------------------------
+ */
-#define to_amdgpu_device(x) (container_of(x, struct amdgpu_ras, eeprom_control))->adev
+/* Assume 2-Mbit size EEPROM and take up the whole space. */
+#define RAS_TBL_SIZE_BYTES (256 * 1024)
+#define RAS_TABLE_START 0
+#define RAS_HDR_START RAS_TABLE_START
+#define RAS_RECORD_START (RAS_HDR_START + RAS_TABLE_HEADER_SIZE)
+#define RAS_MAX_RECORD_COUNT ((RAS_TBL_SIZE_BYTES - RAS_TABLE_HEADER_SIZE) \
+ / RAS_TABLE_RECORD_SIZE)
-static bool __is_ras_eeprom_supported(struct amdgpu_device *adev)
-{
- if ((adev->asic_type == CHIP_VEGA20) ||
- (adev->asic_type == CHIP_ARCTURUS) ||
- (adev->asic_type == CHIP_SIENNA_CICHLID))
- return true;
+/*
+ * EEPROM Table structrue v2.1
+ * ---------------------------------
+ * | |
+ * | EEPROM TABLE HEADER |
+ * | ( size 20 Bytes ) |
+ * | |
+ * ---------------------------------
+ * | |
+ * | EEPROM TABLE RAS INFO |
+ * | (available info size 4 Bytes) |
+ * | ( reserved size 252 Bytes ) |
+ * | |
+ * ---------------------------------
+ * | |
+ * | BAD PAGE RECORD AREA |
+ * | |
+ * ---------------------------------
+ */
- return false;
-}
+/* EEPROM Table V2_1 */
+#define RAS_TABLE_V2_1_INFO_SIZE 256
+#define RAS_TABLE_V2_1_INFO_START RAS_TABLE_HEADER_SIZE
+#define RAS_RECORD_START_V2_1 (RAS_HDR_START + RAS_TABLE_HEADER_SIZE + \
+ RAS_TABLE_V2_1_INFO_SIZE)
+#define RAS_MAX_RECORD_COUNT_V2_1 ((RAS_TBL_SIZE_BYTES - RAS_TABLE_HEADER_SIZE - \
+ RAS_TABLE_V2_1_INFO_SIZE) \
+ / RAS_TABLE_RECORD_SIZE)
+
+#define RAS_SMU_MESSAGE_TIMEOUT_MS 1000 /* 1s */
+
+/* Given a zero-based index of an EEPROM RAS record, yields the EEPROM
+ * offset off of RAS_TABLE_START. That is, this is something you can
+ * add to control->i2c_address, and then tell I2C layer to read
+ * from/write to there. _N is the so called absolute index,
+ * because it starts right after the table header.
+ */
+#define RAS_INDEX_TO_OFFSET(_C, _N) ((_C)->ras_record_offset + \
+ (_N) * RAS_TABLE_RECORD_SIZE)
-static bool __get_eeprom_i2c_addr_arct(struct amdgpu_device *adev,
- uint16_t *i2c_addr)
-{
- struct atom_context *atom_ctx = adev->mode_info.atom_context;
+#define RAS_OFFSET_TO_INDEX(_C, _O) (((_O) - \
+ (_C)->ras_record_offset) / RAS_TABLE_RECORD_SIZE)
- if (!i2c_addr || !atom_ctx)
- return false;
+/* Given a 0-based relative record index, 0, 1, 2, ..., etc., off
+ * of "fri", return the absolute record index off of the end of
+ * the table header.
+ */
+#define RAS_RI_TO_AI(_C, _I) (((_I) + (_C)->ras_fri) % \
+ (_C)->ras_max_record_count)
- if (strnstr(atom_ctx->vbios_version,
- "D342",
- sizeof(atom_ctx->vbios_version)))
- *i2c_addr = EEPROM_I2C_TARGET_ADDR_ARCTURUS_D342;
- else
- *i2c_addr = EEPROM_I2C_TARGET_ADDR_ARCTURUS;
+#define RAS_NUM_RECS(_tbl_hdr) (((_tbl_hdr)->tbl_size - \
+ RAS_TABLE_HEADER_SIZE) / RAS_TABLE_RECORD_SIZE)
+
+#define RAS_NUM_RECS_V2_1(_tbl_hdr) (((_tbl_hdr)->tbl_size - \
+ RAS_TABLE_HEADER_SIZE - \
+ RAS_TABLE_V2_1_INFO_SIZE) / RAS_TABLE_RECORD_SIZE)
- return true;
+#define to_amdgpu_device(x) ((container_of(x, struct amdgpu_ras, eeprom_control))->adev)
+
+static bool __is_ras_eeprom_supported(struct amdgpu_device *adev)
+{
+ switch (amdgpu_ip_version(adev, MP1_HWIP, 0)) {
+ case IP_VERSION(11, 0, 2): /* VEGA20 and ARCTURUS */
+ case IP_VERSION(11, 0, 7): /* Sienna cichlid */
+ case IP_VERSION(13, 0, 0):
+ case IP_VERSION(13, 0, 2): /* Aldebaran */
+ case IP_VERSION(13, 0, 10):
+ return true;
+ case IP_VERSION(13, 0, 6):
+ case IP_VERSION(13, 0, 12):
+ case IP_VERSION(13, 0, 14):
+ return (adev->gmc.is_app_apu) ? false : true;
+ default:
+ return false;
+ }
}
static bool __get_eeprom_i2c_addr(struct amdgpu_device *adev,
- uint16_t *i2c_addr)
+ struct amdgpu_ras_eeprom_control *control)
{
- if (!i2c_addr)
- return false;
+ struct atom_context *atom_ctx = adev->mode_info.atom_context;
+ u8 i2c_addr;
- switch (adev->asic_type) {
- case CHIP_VEGA20:
- *i2c_addr = EEPROM_I2C_TARGET_ADDR_VEGA20;
- break;
+ if (!control)
+ return false;
- case CHIP_ARCTURUS:
- return __get_eeprom_i2c_addr_arct(adev, i2c_addr);
+ if (adev->bios && amdgpu_atomfirmware_ras_rom_addr(adev, &i2c_addr)) {
+ /* The address given by VBIOS is an 8-bit, wire-format
+ * address, i.e. the most significant byte.
+ *
+ * Normalize it to a 19-bit EEPROM address. Remove the
+ * device type identifier and make it a 7-bit address;
+ * then make it a 19-bit EEPROM address. See top of
+ * amdgpu_eeprom.c.
+ */
+ i2c_addr = (i2c_addr & 0x0F) >> 1;
+ control->i2c_address = ((u32) i2c_addr) << 16;
- case CHIP_SIENNA_CICHLID:
- *i2c_addr = EEPROM_I2C_TARGET_ADDR_SIENNA_CICHLID;
- break;
+ return true;
+ }
+ switch (amdgpu_ip_version(adev, MP1_HWIP, 0)) {
+ case IP_VERSION(11, 0, 2):
+ /* VEGA20 and ARCTURUS */
+ if (adev->asic_type == CHIP_VEGA20)
+ control->i2c_address = EEPROM_I2C_MADDR_0;
+ else if (strnstr(atom_ctx->vbios_pn,
+ "D342",
+ sizeof(atom_ctx->vbios_pn)))
+ control->i2c_address = EEPROM_I2C_MADDR_0;
+ else
+ control->i2c_address = EEPROM_I2C_MADDR_4;
+ return true;
+ case IP_VERSION(11, 0, 7):
+ control->i2c_address = EEPROM_I2C_MADDR_0;
+ return true;
+ case IP_VERSION(13, 0, 2):
+ if (strnstr(atom_ctx->vbios_pn, "D673",
+ sizeof(atom_ctx->vbios_pn)))
+ control->i2c_address = EEPROM_I2C_MADDR_4;
+ else
+ control->i2c_address = EEPROM_I2C_MADDR_0;
+ return true;
+ case IP_VERSION(13, 0, 0):
+ if (strnstr(atom_ctx->vbios_pn, "D707",
+ sizeof(atom_ctx->vbios_pn)))
+ control->i2c_address = EEPROM_I2C_MADDR_0;
+ else
+ control->i2c_address = EEPROM_I2C_MADDR_4;
+ return true;
+ case IP_VERSION(13, 0, 6):
+ case IP_VERSION(13, 0, 10):
+ case IP_VERSION(13, 0, 12):
+ case IP_VERSION(13, 0, 14):
+ control->i2c_address = EEPROM_I2C_MADDR_4;
+ return true;
default:
return false;
}
-
- return true;
}
-static void __encode_table_header_to_buff(struct amdgpu_ras_eeprom_table_header *hdr,
- unsigned char *buff)
+static void
+__encode_table_header_to_buf(struct amdgpu_ras_eeprom_table_header *hdr,
+ unsigned char *buf)
{
- uint32_t *pp = (uint32_t *) buff;
+ u32 *pp = (uint32_t *)buf;
pp[0] = cpu_to_le32(hdr->header);
pp[1] = cpu_to_le32(hdr->version);
@@ -125,10 +249,11 @@ static void __encode_table_header_to_buff(struct amdgpu_ras_eeprom_table_header
pp[4] = cpu_to_le32(hdr->checksum);
}
-static void __decode_table_header_from_buff(struct amdgpu_ras_eeprom_table_header *hdr,
- unsigned char *buff)
+static void
+__decode_table_header_from_buf(struct amdgpu_ras_eeprom_table_header *hdr,
+ unsigned char *buf)
{
- uint32_t *pp = (uint32_t *)buff;
+ u32 *pp = (uint32_t *)buf;
hdr->header = le32_to_cpu(pp[0]);
hdr->version = le32_to_cpu(pp[1]);
@@ -137,527 +262,1669 @@ static void __decode_table_header_from_buff(struct amdgpu_ras_eeprom_table_heade
hdr->checksum = le32_to_cpu(pp[4]);
}
-static int __update_table_header(struct amdgpu_ras_eeprom_control *control,
- unsigned char *buff)
+static int __write_table_header(struct amdgpu_ras_eeprom_control *control)
{
- int ret = 0;
+ u8 buf[RAS_TABLE_HEADER_SIZE];
struct amdgpu_device *adev = to_amdgpu_device(control);
- struct i2c_msg msg = {
- .addr = 0,
- .flags = 0,
- .len = EEPROM_ADDRESS_SIZE + EEPROM_TABLE_HEADER_SIZE,
- .buf = buff,
- };
-
+ int res;
- *(uint16_t *)buff = EEPROM_HDR_START;
- __encode_table_header_to_buff(&control->tbl_hdr, buff + EEPROM_ADDRESS_SIZE);
-
- msg.addr = control->i2c_address;
+ memset(buf, 0, sizeof(buf));
+ __encode_table_header_to_buf(&control->tbl_hdr, buf);
/* i2c may be unstable in gpu reset */
- down_read(&adev->reset_sem);
- ret = i2c_transfer(&adev->pm.smu_i2c, &msg, 1);
- up_read(&adev->reset_sem);
-
- if (ret < 1)
- DRM_ERROR("Failed to write EEPROM table header, ret:%d", ret);
+ down_read(&adev->reset_domain->sem);
+ res = amdgpu_eeprom_write(adev->pm.ras_eeprom_i2c_bus,
+ control->i2c_address +
+ control->ras_header_offset,
+ buf, RAS_TABLE_HEADER_SIZE);
+ up_read(&adev->reset_domain->sem);
+
+ if (res < 0) {
+ dev_err(adev->dev, "Failed to write EEPROM table header:%d",
+ res);
+ } else if (res < RAS_TABLE_HEADER_SIZE) {
+ dev_err(adev->dev, "Short write:%d out of %d\n", res,
+ RAS_TABLE_HEADER_SIZE);
+ res = -EIO;
+ } else {
+ res = 0;
+ }
- return ret;
+ return res;
}
-static uint32_t __calc_hdr_byte_sum(struct amdgpu_ras_eeprom_control *control)
+static void
+__encode_table_ras_info_to_buf(struct amdgpu_ras_eeprom_table_ras_info *rai,
+ unsigned char *buf)
{
- int i;
- uint32_t tbl_sum = 0;
+ u32 *pp = (uint32_t *)buf;
+ u32 tmp;
- /* Header checksum, skip checksum field in the calculation */
- for (i = 0; i < sizeof(control->tbl_hdr) - sizeof(control->tbl_hdr.checksum); i++)
- tbl_sum += *(((unsigned char *)&control->tbl_hdr) + i);
+ tmp = ((uint32_t)(rai->rma_status) & 0xFF) |
+ (((uint32_t)(rai->health_percent) << 8) & 0xFF00) |
+ (((uint32_t)(rai->ecc_page_threshold) << 16) & 0xFFFF0000);
+ pp[0] = cpu_to_le32(tmp);
+}
+
+static void
+__decode_table_ras_info_from_buf(struct amdgpu_ras_eeprom_table_ras_info *rai,
+ unsigned char *buf)
+{
+ u32 *pp = (uint32_t *)buf;
+ u32 tmp;
- return tbl_sum;
+ tmp = le32_to_cpu(pp[0]);
+ rai->rma_status = tmp & 0xFF;
+ rai->health_percent = (tmp >> 8) & 0xFF;
+ rai->ecc_page_threshold = (tmp >> 16) & 0xFFFF;
}
-static uint32_t __calc_recs_byte_sum(struct eeprom_table_record *records,
- int num)
+static int __write_table_ras_info(struct amdgpu_ras_eeprom_control *control)
{
- int i, j;
- uint32_t tbl_sum = 0;
+ struct amdgpu_device *adev = to_amdgpu_device(control);
+ u8 *buf;
+ int res;
- /* Records checksum */
- for (i = 0; i < num; i++) {
- struct eeprom_table_record *record = &records[i];
+ buf = kzalloc(RAS_TABLE_V2_1_INFO_SIZE, GFP_KERNEL);
+ if (!buf) {
+ dev_err(adev->dev,
+ "Failed to alloc buf to write table ras info\n");
+ return -ENOMEM;
+ }
- for (j = 0; j < sizeof(*record); j++) {
- tbl_sum += *(((unsigned char *)record) + j);
- }
+ __encode_table_ras_info_to_buf(&control->tbl_rai, buf);
+
+ /* i2c may be unstable in gpu reset */
+ down_read(&adev->reset_domain->sem);
+ res = amdgpu_eeprom_write(adev->pm.ras_eeprom_i2c_bus,
+ control->i2c_address +
+ control->ras_info_offset,
+ buf, RAS_TABLE_V2_1_INFO_SIZE);
+ up_read(&adev->reset_domain->sem);
+
+ if (res < 0) {
+ dev_err(adev->dev, "Failed to write EEPROM table ras info:%d",
+ res);
+ } else if (res < RAS_TABLE_V2_1_INFO_SIZE) {
+ dev_err(adev->dev, "Short write:%d out of %d\n", res,
+ RAS_TABLE_V2_1_INFO_SIZE);
+ res = -EIO;
+ } else {
+ res = 0;
}
- return tbl_sum;
-}
+ kfree(buf);
-static inline uint32_t __calc_tbl_byte_sum(struct amdgpu_ras_eeprom_control *control,
- struct eeprom_table_record *records, int num)
-{
- return __calc_hdr_byte_sum(control) + __calc_recs_byte_sum(records, num);
+ return res;
}
-/* Checksum = 256 -((sum of all table entries) mod 256) */
-static void __update_tbl_checksum(struct amdgpu_ras_eeprom_control *control,
- struct eeprom_table_record *records, int num,
- uint32_t old_hdr_byte_sum)
+static u8 __calc_hdr_byte_sum(const struct amdgpu_ras_eeprom_control *control)
{
- /*
- * This will update the table sum with new records.
- *
- * TODO: What happens when the EEPROM table is to be wrapped around
- * and old records from start will get overridden.
- */
+ int ii;
+ u8 *pp, csum;
+ size_t sz;
- /* need to recalculate updated header byte sum */
- control->tbl_byte_sum -= old_hdr_byte_sum;
- control->tbl_byte_sum += __calc_tbl_byte_sum(control, records, num);
+ /* Header checksum, skip checksum field in the calculation */
+ sz = sizeof(control->tbl_hdr) - sizeof(control->tbl_hdr.checksum);
+ pp = (u8 *) &control->tbl_hdr;
+ csum = 0;
+ for (ii = 0; ii < sz; ii++, pp++)
+ csum += *pp;
- control->tbl_hdr.checksum = 256 - (control->tbl_byte_sum % 256);
+ return csum;
}
-/* table sum mod 256 + checksum must equals 256 */
-static bool __validate_tbl_checksum(struct amdgpu_ras_eeprom_control *control,
- struct eeprom_table_record *records, int num)
+static u8 __calc_ras_info_byte_sum(const struct amdgpu_ras_eeprom_control *control)
{
- control->tbl_byte_sum = __calc_tbl_byte_sum(control, records, num);
+ int ii;
+ u8 *pp, csum;
+ size_t sz;
- if (control->tbl_hdr.checksum + (control->tbl_byte_sum % 256) != 256) {
- DRM_WARN("Checksum mismatch, checksum: %u ", control->tbl_hdr.checksum);
- return false;
- }
+ sz = sizeof(control->tbl_rai);
+ pp = (u8 *) &control->tbl_rai;
+ csum = 0;
+ for (ii = 0; ii < sz; ii++, pp++)
+ csum += *pp;
- return true;
+ return csum;
}
static int amdgpu_ras_eeprom_correct_header_tag(
- struct amdgpu_ras_eeprom_control *control,
- uint32_t header)
+ struct amdgpu_ras_eeprom_control *control,
+ uint32_t header)
{
- unsigned char buff[EEPROM_ADDRESS_SIZE + EEPROM_TABLE_HEADER_SIZE];
struct amdgpu_ras_eeprom_table_header *hdr = &control->tbl_hdr;
- int ret = 0;
-
- memset(buff, 0, EEPROM_ADDRESS_SIZE + EEPROM_TABLE_HEADER_SIZE);
-
- mutex_lock(&control->tbl_mutex);
+ u8 *hh;
+ int res;
+ u8 csum;
+
+ csum = -hdr->checksum;
+
+ hh = (void *) &hdr->header;
+ csum -= (hh[0] + hh[1] + hh[2] + hh[3]);
+ hh = (void *) &header;
+ csum += hh[0] + hh[1] + hh[2] + hh[3];
+ csum = -csum;
+ mutex_lock(&control->ras_tbl_mutex);
hdr->header = header;
- ret = __update_table_header(control, buff);
- mutex_unlock(&control->tbl_mutex);
+ hdr->checksum = csum;
+ res = __write_table_header(control);
+ mutex_unlock(&control->ras_tbl_mutex);
- return ret;
+ return res;
}
-int amdgpu_ras_eeprom_reset_table(struct amdgpu_ras_eeprom_control *control)
+static void amdgpu_ras_set_eeprom_table_version(struct amdgpu_ras_eeprom_control *control)
{
- unsigned char buff[EEPROM_ADDRESS_SIZE + EEPROM_TABLE_HEADER_SIZE] = { 0 };
+ struct amdgpu_device *adev = to_amdgpu_device(control);
struct amdgpu_ras_eeprom_table_header *hdr = &control->tbl_hdr;
- int ret = 0;
-
- mutex_lock(&control->tbl_mutex);
-
- hdr->header = EEPROM_TABLE_HDR_VAL;
- hdr->version = EEPROM_TABLE_VER;
- hdr->first_rec_offset = EEPROM_RECORD_START;
- hdr->tbl_size = EEPROM_TABLE_HEADER_SIZE;
-
- control->tbl_byte_sum = 0;
- __update_tbl_checksum(control, NULL, 0, 0);
- control->next_addr = EEPROM_RECORD_START;
-
- ret = __update_table_header(control, buff);
-
- mutex_unlock(&control->tbl_mutex);
-
- return ret;
+ switch (amdgpu_ip_version(adev, UMC_HWIP, 0)) {
+ case IP_VERSION(8, 10, 0):
+ hdr->version = RAS_TABLE_VER_V2_1;
+ return;
+ case IP_VERSION(12, 0, 0):
+ case IP_VERSION(12, 5, 0):
+ hdr->version = RAS_TABLE_VER_V3;
+ return;
+ default:
+ hdr->version = RAS_TABLE_VER_V1;
+ return;
+ }
}
-int amdgpu_ras_eeprom_init(struct amdgpu_ras_eeprom_control *control,
- bool *exceed_err_limit)
+/**
+ * amdgpu_ras_eeprom_reset_table -- Reset the RAS EEPROM table
+ * @control: pointer to control structure
+ *
+ * Reset the contents of the header of the RAS EEPROM table.
+ * Return 0 on success, -errno on error.
+ */
+int amdgpu_ras_eeprom_reset_table(struct amdgpu_ras_eeprom_control *control)
{
- int ret = 0;
struct amdgpu_device *adev = to_amdgpu_device(control);
- unsigned char buff[EEPROM_ADDRESS_SIZE + EEPROM_TABLE_HEADER_SIZE] = { 0 };
struct amdgpu_ras_eeprom_table_header *hdr = &control->tbl_hdr;
- struct amdgpu_ras *ras = amdgpu_ras_get_context(adev);
- struct i2c_msg msg = {
- .addr = 0,
- .flags = I2C_M_RD,
- .len = EEPROM_ADDRESS_SIZE + EEPROM_TABLE_HEADER_SIZE,
- .buf = buff,
- };
-
- *exceed_err_limit = false;
-
- if (!__is_ras_eeprom_supported(adev))
- return 0;
-
- /* Verify i2c adapter is initialized */
- if (!adev->pm.smu_i2c.algo)
- return -ENOENT;
-
- if (!__get_eeprom_i2c_addr(adev, &control->i2c_address))
- return -EINVAL;
+ struct amdgpu_ras_eeprom_table_ras_info *rai = &control->tbl_rai;
+ struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
+ u32 erase_res = 0;
+ u8 csum;
+ int res;
+
+ mutex_lock(&control->ras_tbl_mutex);
+
+ if (!amdgpu_ras_smu_eeprom_supported(adev)) {
+ hdr->header = RAS_TABLE_HDR_VAL;
+ amdgpu_ras_set_eeprom_table_version(control);
+
+ if (hdr->version >= RAS_TABLE_VER_V2_1) {
+ hdr->first_rec_offset = RAS_RECORD_START_V2_1;
+ hdr->tbl_size = RAS_TABLE_HEADER_SIZE +
+ RAS_TABLE_V2_1_INFO_SIZE;
+ rai->rma_status = GPU_HEALTH_USABLE;
+
+ control->ras_record_offset = RAS_RECORD_START_V2_1;
+ control->ras_max_record_count = RAS_MAX_RECORD_COUNT_V2_1;
+ /**
+ * GPU health represented as a percentage.
+ * 0 means worst health, 100 means fully health.
+ */
+ rai->health_percent = 100;
+ /* ecc_page_threshold = 0 means disable bad page retirement */
+ rai->ecc_page_threshold = con->bad_page_cnt_threshold;
+ } else {
+ hdr->first_rec_offset = RAS_RECORD_START;
+ hdr->tbl_size = RAS_TABLE_HEADER_SIZE;
- mutex_init(&control->tbl_mutex);
+ control->ras_record_offset = RAS_RECORD_START;
+ control->ras_max_record_count = RAS_MAX_RECORD_COUNT;
+ }
- msg.addr = control->i2c_address;
- /* Read/Create table header from EEPROM address 0 */
- ret = i2c_transfer(&adev->pm.smu_i2c, &msg, 1);
- if (ret < 1) {
- DRM_ERROR("Failed to read EEPROM table header, ret:%d", ret);
- return ret;
+ csum = __calc_hdr_byte_sum(control);
+ if (hdr->version >= RAS_TABLE_VER_V2_1)
+ csum += __calc_ras_info_byte_sum(control);
+ csum = -csum;
+ hdr->checksum = csum;
+ res = __write_table_header(control);
+ if (!res && hdr->version > RAS_TABLE_VER_V1)
+ res = __write_table_ras_info(control);
+ } else {
+ res = amdgpu_ras_smu_erase_ras_table(adev, &erase_res);
+ if (res || erase_res) {
+ dev_warn(adev->dev, "RAS EEPROM reset failed, res:%d result:%d",
+ res, erase_res);
+ if (!res)
+ res = -EIO;
+ }
}
- __decode_table_header_from_buff(hdr, &buff[2]);
+ control->ras_num_recs = 0;
+ control->ras_num_bad_pages = 0;
+ control->ras_num_mca_recs = 0;
+ control->ras_num_pa_recs = 0;
+ control->ras_fri = 0;
- if (hdr->header == EEPROM_TABLE_HDR_VAL) {
- control->num_recs = (hdr->tbl_size - EEPROM_TABLE_HEADER_SIZE) /
- EEPROM_TABLE_RECORD_SIZE;
- control->tbl_byte_sum = __calc_hdr_byte_sum(control);
- control->next_addr = EEPROM_RECORD_START;
+ amdgpu_dpm_send_hbm_bad_pages_num(adev, control->ras_num_bad_pages);
- DRM_DEBUG_DRIVER("Found existing EEPROM table with %d records",
- control->num_recs);
+ control->bad_channel_bitmap = 0;
+ amdgpu_dpm_send_hbm_bad_channel_flag(adev, control->bad_channel_bitmap);
+ con->update_channel_flag = false;
- } else if ((hdr->header == EEPROM_TABLE_HDR_BAD) &&
- (amdgpu_bad_page_threshold != 0)) {
- if (ras->bad_page_cnt_threshold > control->num_recs) {
- dev_info(adev->dev, "Using one valid bigger bad page "
- "threshold and correcting eeprom header tag.\n");
- ret = amdgpu_ras_eeprom_correct_header_tag(control,
- EEPROM_TABLE_HDR_VAL);
- } else {
- *exceed_err_limit = true;
- dev_err(adev->dev, "Exceeding the bad_page_threshold parameter, "
- "disabling the GPU.\n");
- }
- } else {
- DRM_INFO("Creating new EEPROM table");
+ amdgpu_ras_debugfs_set_ret_size(control);
- ret = amdgpu_ras_eeprom_reset_table(control);
- }
+ mutex_unlock(&control->ras_tbl_mutex);
- return ret == 1 ? 0 : -EIO;
+ return res;
}
-static void __encode_table_record_to_buff(struct amdgpu_ras_eeprom_control *control,
- struct eeprom_table_record *record,
- unsigned char *buff)
+static void
+__encode_table_record_to_buf(struct amdgpu_ras_eeprom_control *control,
+ struct eeprom_table_record *record,
+ unsigned char *buf)
{
__le64 tmp = 0;
int i = 0;
/* Next are all record fields according to EEPROM page spec in LE foramt */
- buff[i++] = record->err_type;
+ buf[i++] = record->err_type;
- buff[i++] = record->bank;
+ buf[i++] = record->bank;
tmp = cpu_to_le64(record->ts);
- memcpy(buff + i, &tmp, 8);
+ memcpy(buf + i, &tmp, 8);
i += 8;
tmp = cpu_to_le64((record->offset & 0xffffffffffff));
- memcpy(buff + i, &tmp, 6);
+ memcpy(buf + i, &tmp, 6);
i += 6;
- buff[i++] = record->mem_channel;
- buff[i++] = record->mcumc_id;
+ buf[i++] = record->mem_channel;
+ buf[i++] = record->mcumc_id;
tmp = cpu_to_le64((record->retired_page & 0xffffffffffff));
- memcpy(buff + i, &tmp, 6);
+ memcpy(buf + i, &tmp, 6);
}
-static void __decode_table_record_from_buff(struct amdgpu_ras_eeprom_control *control,
- struct eeprom_table_record *record,
- unsigned char *buff)
+static void
+__decode_table_record_from_buf(struct amdgpu_ras_eeprom_control *control,
+ struct eeprom_table_record *record,
+ unsigned char *buf)
{
__le64 tmp = 0;
int i = 0;
/* Next are all record fields according to EEPROM page spec in LE foramt */
- record->err_type = buff[i++];
+ record->err_type = buf[i++];
- record->bank = buff[i++];
+ record->bank = buf[i++];
- memcpy(&tmp, buff + i, 8);
+ memcpy(&tmp, buf + i, 8);
record->ts = le64_to_cpu(tmp);
i += 8;
- memcpy(&tmp, buff + i, 6);
+ memcpy(&tmp, buf + i, 6);
record->offset = (le64_to_cpu(tmp) & 0xffffffffffff);
i += 6;
- record->mem_channel = buff[i++];
- record->mcumc_id = buff[i++];
+ record->mem_channel = buf[i++];
+ record->mcumc_id = buf[i++];
- memcpy(&tmp, buff + i, 6);
+ memcpy(&tmp, buf + i, 6);
record->retired_page = (le64_to_cpu(tmp) & 0xffffffffffff);
}
-/*
- * When reaching end of EEPROM memory jump back to 0 record address
- * When next record access will go beyond EEPROM page boundary modify bits A17/A8
- * in I2C selector to go to next page
+bool amdgpu_ras_eeprom_check_err_threshold(struct amdgpu_device *adev)
+{
+ struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
+
+ if (amdgpu_uniras_enabled(adev))
+ return amdgpu_ras_mgr_check_eeprom_safety_watermark(adev);
+
+ if (!__is_ras_eeprom_supported(adev) ||
+ !amdgpu_bad_page_threshold)
+ return false;
+
+ /* skip check eeprom table for VEGA20 Gaming */
+ if (!con)
+ return false;
+ else
+ if (!(con->features & BIT(AMDGPU_RAS_BLOCK__UMC)))
+ return false;
+
+ if (con->eeprom_control.tbl_hdr.header == RAS_TABLE_HDR_BAD) {
+ if (con->eeprom_control.ras_num_bad_pages > con->bad_page_cnt_threshold)
+ dev_warn(adev->dev, "RAS records:%d exceed threshold:%d",
+ con->eeprom_control.ras_num_bad_pages, con->bad_page_cnt_threshold);
+ if ((amdgpu_bad_page_threshold == -1) ||
+ (amdgpu_bad_page_threshold == -2)) {
+ dev_warn(adev->dev,
+ "Please consult AMD Service Action Guide (SAG) for appropriate service procedures.\n");
+ return false;
+ } else {
+ dev_warn(adev->dev,
+ "Please consider adjusting the customized threshold.\n");
+ return true;
+ }
+ }
+
+ return false;
+}
+
+/**
+ * __amdgpu_ras_eeprom_write -- write indexed from buffer to EEPROM
+ * @control: pointer to control structure
+ * @buf: pointer to buffer containing data to write
+ * @fri: start writing at this index
+ * @num: number of records to write
+ *
+ * The caller must hold the table mutex in @control.
+ * Return 0 on success, -errno otherwise.
*/
-static uint32_t __correct_eeprom_dest_address(uint32_t curr_address)
+static int __amdgpu_ras_eeprom_write(struct amdgpu_ras_eeprom_control *control,
+ u8 *buf, const u32 fri, const u32 num)
{
- uint32_t next_address = curr_address + EEPROM_TABLE_RECORD_SIZE;
+ struct amdgpu_device *adev = to_amdgpu_device(control);
+ u32 buf_size;
+ int res;
- /* When all EEPROM memory used jump back to 0 address */
- if (next_address > EEPROM_SIZE_BYTES) {
- DRM_INFO("Reached end of EEPROM memory, jumping to 0 "
- "and overriding old record");
- return EEPROM_RECORD_START;
+ /* i2c may be unstable in gpu reset */
+ down_read(&adev->reset_domain->sem);
+ buf_size = num * RAS_TABLE_RECORD_SIZE;
+ res = amdgpu_eeprom_write(adev->pm.ras_eeprom_i2c_bus,
+ control->i2c_address +
+ RAS_INDEX_TO_OFFSET(control, fri),
+ buf, buf_size);
+ up_read(&adev->reset_domain->sem);
+ if (res < 0) {
+ dev_err(adev->dev, "Writing %d EEPROM table records error:%d",
+ num, res);
+ } else if (res < buf_size) {
+ /* Short write, return error.
+ */
+ dev_err(adev->dev, "Wrote %d records out of %d",
+ res / RAS_TABLE_RECORD_SIZE, num);
+ res = -EIO;
+ } else {
+ res = 0;
}
- /*
- * To check if we overflow page boundary compare next address with
- * current and see if bits 17/8 of the EEPROM address will change
- * If they do start from the next 256b page
- *
- * https://www.st.com/resource/en/datasheet/m24m02-dr.pdf sec. 5.1.2
+ return res;
+}
+
+static int
+amdgpu_ras_eeprom_append_table(struct amdgpu_ras_eeprom_control *control,
+ struct eeprom_table_record *record,
+ const u32 num)
+{
+ struct amdgpu_ras *con = amdgpu_ras_get_context(to_amdgpu_device(control));
+ struct amdgpu_device *adev = to_amdgpu_device(control);
+ u32 a, b, i;
+ u8 *buf, *pp;
+ int res;
+
+ buf = kcalloc(num, RAS_TABLE_RECORD_SIZE, GFP_KERNEL);
+ if (!buf)
+ return -ENOMEM;
+
+ /* Encode all of them in one go.
*/
- if ((curr_address & EEPROM_ADDR_MSB_MASK) != (next_address & EEPROM_ADDR_MSB_MASK)) {
- DRM_DEBUG_DRIVER("Reached end of EEPROM memory page, jumping to next: %lx",
- (next_address & EEPROM_ADDR_MSB_MASK));
+ pp = buf;
+ for (i = 0; i < num; i++, pp += RAS_TABLE_RECORD_SIZE) {
+ __encode_table_record_to_buf(control, &record[i], pp);
+
+ /* update bad channel bitmap */
+ if ((record[i].mem_channel < BITS_PER_TYPE(control->bad_channel_bitmap)) &&
+ !(control->bad_channel_bitmap & (1 << record[i].mem_channel))) {
+ control->bad_channel_bitmap |= 1 << record[i].mem_channel;
+ con->update_channel_flag = true;
+ }
+ }
- return (next_address & EEPROM_ADDR_MSB_MASK);
+ /* a, first record index to write into.
+ * b, last record index to write into.
+ * a = first index to read (fri) + number of records in the table,
+ * b = a + @num - 1.
+ * Let N = control->ras_max_num_record_count, then we have,
+ * case 0: 0 <= a <= b < N,
+ * just append @num records starting at a;
+ * case 1: 0 <= a < N <= b,
+ * append (N - a) records starting at a, and
+ * append the remainder, b % N + 1, starting at 0.
+ * case 2: 0 <= fri < N <= a <= b, then modulo N we get two subcases,
+ * case 2a: 0 <= a <= b < N
+ * append num records starting at a; and fix fri if b overwrote it,
+ * and since a <= b, if b overwrote it then a must've also,
+ * and if b didn't overwrite it, then a didn't also.
+ * case 2b: 0 <= b < a < N
+ * write num records starting at a, which wraps around 0=N
+ * and overwrite fri unconditionally. Now from case 2a,
+ * this means that b eclipsed fri to overwrite it and wrap
+ * around 0 again, i.e. b = 2N+r pre modulo N, so we unconditionally
+ * set fri = b + 1 (mod N).
+ * Now, since fri is updated in every case, except the trivial case 0,
+ * the number of records present in the table after writing, is,
+ * num_recs - 1 = b - fri (mod N), and we take the positive value,
+ * by adding an arbitrary multiple of N before taking the modulo N
+ * as shown below.
+ */
+ a = control->ras_fri + control->ras_num_recs;
+ b = a + num - 1;
+ if (b < control->ras_max_record_count) {
+ res = __amdgpu_ras_eeprom_write(control, buf, a, num);
+ } else if (a < control->ras_max_record_count) {
+ u32 g0, g1;
+
+ g0 = control->ras_max_record_count - a;
+ g1 = b % control->ras_max_record_count + 1;
+ res = __amdgpu_ras_eeprom_write(control, buf, a, g0);
+ if (res)
+ goto Out;
+ res = __amdgpu_ras_eeprom_write(control,
+ buf + g0 * RAS_TABLE_RECORD_SIZE,
+ 0, g1);
+ if (res)
+ goto Out;
+ if (g1 > control->ras_fri)
+ control->ras_fri = g1 % control->ras_max_record_count;
+ } else {
+ a %= control->ras_max_record_count;
+ b %= control->ras_max_record_count;
+
+ if (a <= b) {
+ /* Note that, b - a + 1 = num. */
+ res = __amdgpu_ras_eeprom_write(control, buf, a, num);
+ if (res)
+ goto Out;
+ if (b >= control->ras_fri)
+ control->ras_fri = (b + 1) % control->ras_max_record_count;
+ } else {
+ u32 g0, g1;
+
+ /* b < a, which means, we write from
+ * a to the end of the table, and from
+ * the start of the table to b.
+ */
+ g0 = control->ras_max_record_count - a;
+ g1 = b + 1;
+ res = __amdgpu_ras_eeprom_write(control, buf, a, g0);
+ if (res)
+ goto Out;
+ res = __amdgpu_ras_eeprom_write(control,
+ buf + g0 * RAS_TABLE_RECORD_SIZE,
+ 0, g1);
+ if (res)
+ goto Out;
+ control->ras_fri = g1 % control->ras_max_record_count;
+ }
}
+ control->ras_num_recs = 1 + (control->ras_max_record_count + b
+ - control->ras_fri)
+ % control->ras_max_record_count;
+
+ /*old asics only save pa to eeprom like before*/
+ if (IP_VERSION_MAJ(amdgpu_ip_version(adev, UMC_HWIP, 0)) < 12)
+ control->ras_num_pa_recs += num;
+ else
+ control->ras_num_mca_recs += num;
- return curr_address;
+ control->ras_num_bad_pages = con->bad_page_num;
+Out:
+ kfree(buf);
+ return res;
}
-int amdgpu_ras_eeprom_check_err_threshold(
- struct amdgpu_ras_eeprom_control *control,
- bool *exceed_err_limit)
+static int
+amdgpu_ras_eeprom_update_header(struct amdgpu_ras_eeprom_control *control)
{
struct amdgpu_device *adev = to_amdgpu_device(control);
- unsigned char buff[EEPROM_ADDRESS_SIZE +
- EEPROM_TABLE_HEADER_SIZE] = { 0 };
- struct amdgpu_ras_eeprom_table_header *hdr = &control->tbl_hdr;
- struct i2c_msg msg = {
- .addr = control->i2c_address,
- .flags = I2C_M_RD,
- .len = EEPROM_ADDRESS_SIZE + EEPROM_TABLE_HEADER_SIZE,
- .buf = buff,
- };
- int ret;
+ struct amdgpu_ras *ras = amdgpu_ras_get_context(adev);
+ u8 *buf, *pp, csum;
+ u32 buf_size;
+ int res;
+
+ /* Modify the header if it exceeds.
+ */
+ if (amdgpu_bad_page_threshold != 0 &&
+ control->ras_num_bad_pages > ras->bad_page_cnt_threshold) {
+ dev_warn(adev->dev,
+ "Saved bad pages %d reaches threshold value %d\n",
+ control->ras_num_bad_pages, ras->bad_page_cnt_threshold);
+
+ if (adev->cper.enabled && !amdgpu_uniras_enabled(adev) &&
+ amdgpu_cper_generate_bp_threshold_record(adev))
+ dev_warn(adev->dev, "fail to generate bad page threshold cper records\n");
+
+ if ((amdgpu_bad_page_threshold != -1) &&
+ (amdgpu_bad_page_threshold != -2)) {
+ control->tbl_hdr.header = RAS_TABLE_HDR_BAD;
+ if (control->tbl_hdr.version >= RAS_TABLE_VER_V2_1) {
+ control->tbl_rai.rma_status = GPU_RETIRED__ECC_REACH_THRESHOLD;
+ control->tbl_rai.health_percent = 0;
+ }
+ ras->is_rma = true;
+ }
+
+ /* ignore the -ENOTSUPP return value */
+ amdgpu_dpm_send_rma_reason(adev);
+ }
+
+ if (control->tbl_hdr.version >= RAS_TABLE_VER_V2_1)
+ control->tbl_hdr.tbl_size = RAS_TABLE_HEADER_SIZE +
+ RAS_TABLE_V2_1_INFO_SIZE +
+ control->ras_num_recs * RAS_TABLE_RECORD_SIZE;
+ else
+ control->tbl_hdr.tbl_size = RAS_TABLE_HEADER_SIZE +
+ control->ras_num_recs * RAS_TABLE_RECORD_SIZE;
+ control->tbl_hdr.checksum = 0;
+
+ buf_size = control->ras_num_recs * RAS_TABLE_RECORD_SIZE;
+ buf = kcalloc(control->ras_num_recs, RAS_TABLE_RECORD_SIZE, GFP_KERNEL);
+ if (!buf) {
+ dev_err(adev->dev,
+ "allocating memory for table of size %d bytes failed\n",
+ control->tbl_hdr.tbl_size);
+ res = -ENOMEM;
+ goto Out;
+ }
+
+ down_read(&adev->reset_domain->sem);
+ res = amdgpu_eeprom_read(adev->pm.ras_eeprom_i2c_bus,
+ control->i2c_address +
+ control->ras_record_offset,
+ buf, buf_size);
+ up_read(&adev->reset_domain->sem);
+ if (res < 0) {
+ dev_err(adev->dev, "EEPROM failed reading records:%d\n", res);
+ goto Out;
+ } else if (res < buf_size) {
+ dev_err(adev->dev, "EEPROM read %d out of %d bytes\n", res,
+ buf_size);
+ res = -EIO;
+ goto Out;
+ }
+
+ /**
+ * bad page records have been stored in eeprom,
+ * now calculate gpu health percent
+ */
+ if (amdgpu_bad_page_threshold != 0 &&
+ control->tbl_hdr.version >= RAS_TABLE_VER_V2_1 &&
+ control->ras_num_bad_pages <= ras->bad_page_cnt_threshold)
+ control->tbl_rai.health_percent = ((ras->bad_page_cnt_threshold -
+ control->ras_num_bad_pages) * 100) /
+ ras->bad_page_cnt_threshold;
+
+ /* Recalc the checksum.
+ */
+ csum = 0;
+ for (pp = buf; pp < buf + buf_size; pp++)
+ csum += *pp;
+
+ csum += __calc_hdr_byte_sum(control);
+ if (control->tbl_hdr.version >= RAS_TABLE_VER_V2_1)
+ csum += __calc_ras_info_byte_sum(control);
+ /* avoid sign extension when assigning to "checksum" */
+ csum = -csum;
+ control->tbl_hdr.checksum = csum;
+ res = __write_table_header(control);
+ if (!res && control->tbl_hdr.version > RAS_TABLE_VER_V1)
+ res = __write_table_ras_info(control);
+Out:
+ kfree(buf);
+ return res;
+}
+
+int amdgpu_ras_eeprom_update_record_num(struct amdgpu_ras_eeprom_control *control)
+{
+ struct amdgpu_device *adev = to_amdgpu_device(control);
+ int ret, retry = 20;
+
+ if (!amdgpu_ras_smu_eeprom_supported(adev))
+ return 0;
+
+ control->ras_num_recs_old = control->ras_num_recs;
- *exceed_err_limit = false;
+ do {
+ /* 1000ms timeout is long enough, smu_get_badpage_count won't
+ * return -EBUSY before timeout.
+ */
+ ret = amdgpu_ras_smu_get_badpage_count(adev,
+ &(control->ras_num_recs), RAS_SMU_MESSAGE_TIMEOUT_MS);
+ if (!ret &&
+ (control->ras_num_recs_old == control->ras_num_recs)) {
+ /* record number update in PMFW needs some time,
+ * smu_get_badpage_count may return immediately without
+ * count update, sleep for a while and retry again.
+ */
+ msleep(50);
+ retry--;
+ } else {
+ break;
+ }
+ } while (retry);
+
+ /* no update of record number is not a real failure,
+ * don't print warning here
+ */
+ if (!ret && (control->ras_num_recs_old == control->ras_num_recs))
+ ret = -EINVAL;
+
+ return ret;
+}
+
+static int amdgpu_ras_smu_eeprom_append(struct amdgpu_ras_eeprom_control *control)
+{
+ struct amdgpu_device *adev = to_amdgpu_device(control);
+ struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
+
+ if (!amdgpu_ras_smu_eeprom_supported(adev) || !con)
+ return 0;
+
+ control->ras_num_bad_pages = con->bad_page_num;
+
+ if (amdgpu_bad_page_threshold != 0 &&
+ control->ras_num_bad_pages > con->bad_page_cnt_threshold) {
+ dev_warn(adev->dev,
+ "Saved bad pages %d reaches threshold value %d\n",
+ control->ras_num_bad_pages, con->bad_page_cnt_threshold);
+
+ if (adev->cper.enabled && amdgpu_cper_generate_bp_threshold_record(adev))
+ dev_warn(adev->dev, "fail to generate bad page threshold cper records\n");
+
+ if ((amdgpu_bad_page_threshold != -1) &&
+ (amdgpu_bad_page_threshold != -2))
+ con->is_rma = true;
+ }
+
+ return 0;
+}
+
+/**
+ * amdgpu_ras_eeprom_append -- append records to the EEPROM RAS table
+ * @control: pointer to control structure
+ * @record: array of records to append
+ * @num: number of records in @record array
+ *
+ * Append @num records to the table, calculate the checksum and write
+ * the table back to EEPROM. The maximum number of records that
+ * can be appended is between 1 and control->ras_max_record_count,
+ * regardless of how many records are already stored in the table.
+ *
+ * Return 0 on success or if EEPROM is not supported, -errno on error.
+ */
+int amdgpu_ras_eeprom_append(struct amdgpu_ras_eeprom_control *control,
+ struct eeprom_table_record *record,
+ const u32 num)
+{
+ struct amdgpu_device *adev = to_amdgpu_device(control);
+ int res, i;
+ uint64_t nps = AMDGPU_NPS1_PARTITION_MODE;
if (!__is_ras_eeprom_supported(adev))
return 0;
- /* read EEPROM table header */
- mutex_lock(&control->tbl_mutex);
- ret = i2c_transfer(&adev->pm.smu_i2c, &msg, 1);
- if (ret < 1) {
- dev_err(adev->dev, "Failed to read EEPROM table header.\n");
- goto err;
+ if (amdgpu_ras_smu_eeprom_supported(adev))
+ return amdgpu_ras_smu_eeprom_append(control);
+
+ if (num == 0) {
+ dev_err(adev->dev, "will not append 0 records\n");
+ return -EINVAL;
+ } else if (num > control->ras_max_record_count) {
+ dev_err(adev->dev,
+ "cannot append %d records than the size of table %d\n",
+ num, control->ras_max_record_count);
+ return -EINVAL;
+ }
+
+ if (adev->gmc.gmc_funcs->query_mem_partition_mode)
+ nps = adev->gmc.gmc_funcs->query_mem_partition_mode(adev);
+
+ /* set the new channel index flag */
+ for (i = 0; i < num; i++)
+ record[i].retired_page |= (nps << UMC_NPS_SHIFT);
+
+ mutex_lock(&control->ras_tbl_mutex);
+
+ res = amdgpu_ras_eeprom_append_table(control, record, num);
+ if (!res)
+ res = amdgpu_ras_eeprom_update_header(control);
+ if (!res)
+ amdgpu_ras_debugfs_set_ret_size(control);
+
+ mutex_unlock(&control->ras_tbl_mutex);
+
+ /* clear channel index flag, the flag is only saved on eeprom */
+ for (i = 0; i < num; i++)
+ record[i].retired_page &= ~(nps << UMC_NPS_SHIFT);
+
+ return res;
+}
+
+/**
+ * __amdgpu_ras_eeprom_read -- read indexed from EEPROM into buffer
+ * @control: pointer to control structure
+ * @buf: pointer to buffer to read into
+ * @fri: first record index, start reading at this index, absolute index
+ * @num: number of records to read
+ *
+ * The caller must hold the table mutex in @control.
+ * Return 0 on success, -errno otherwise.
+ */
+static int __amdgpu_ras_eeprom_read(struct amdgpu_ras_eeprom_control *control,
+ u8 *buf, const u32 fri, const u32 num)
+{
+ struct amdgpu_device *adev = to_amdgpu_device(control);
+ u32 buf_size;
+ int res;
+
+ /* i2c may be unstable in gpu reset */
+ down_read(&adev->reset_domain->sem);
+ buf_size = num * RAS_TABLE_RECORD_SIZE;
+ res = amdgpu_eeprom_read(adev->pm.ras_eeprom_i2c_bus,
+ control->i2c_address +
+ RAS_INDEX_TO_OFFSET(control, fri),
+ buf, buf_size);
+ up_read(&adev->reset_domain->sem);
+ if (res < 0) {
+ dev_err(adev->dev, "Reading %d EEPROM table records error:%d",
+ num, res);
+ } else if (res < buf_size) {
+ /* Short read, return error.
+ */
+ dev_err(adev->dev, "Read %d records out of %d",
+ res / RAS_TABLE_RECORD_SIZE, num);
+ res = -EIO;
+ } else {
+ res = 0;
}
- __decode_table_header_from_buff(hdr, &buff[2]);
+ return res;
+}
+
+int amdgpu_ras_eeprom_read_idx(struct amdgpu_ras_eeprom_control *control,
+ struct eeprom_table_record *record, u32 rec_idx,
+ const u32 num)
+{
+ struct amdgpu_device *adev = to_amdgpu_device(control);
+ uint64_t ts, end_idx;
+ int i, ret;
+ u64 mca, ipid;
+
+ if (!amdgpu_ras_smu_eeprom_supported(adev))
+ return 0;
- if (hdr->header == EEPROM_TABLE_HDR_BAD) {
- dev_warn(adev->dev, "This GPU is in BAD status.");
- dev_warn(adev->dev, "Please retire it or setting one bigger "
- "threshold value when reloading driver.\n");
- *exceed_err_limit = true;
+ if (!adev->umc.ras || !adev->umc.ras->mca_ipid_parse)
+ return -EOPNOTSUPP;
+
+ end_idx = rec_idx + num;
+ for (i = rec_idx; i < end_idx; i++) {
+ ret = amdgpu_ras_smu_get_badpage_mca_addr(adev, i, &mca);
+ if (ret)
+ return ret;
+
+ ret = amdgpu_ras_smu_get_badpage_ipid(adev, i, &ipid);
+ if (ret)
+ return ret;
+
+ ret = amdgpu_ras_smu_get_timestamp(adev, i, &ts);
+ if (ret)
+ return ret;
+
+ record[i - rec_idx].address = mca;
+ /* retired_page (pa) is unused now */
+ record[i - rec_idx].retired_page = 0x1ULL;
+ record[i - rec_idx].ts = ts;
+ record[i - rec_idx].err_type = AMDGPU_RAS_EEPROM_ERR_NON_RECOVERABLE;
+
+ adev->umc.ras->mca_ipid_parse(adev, ipid,
+ (uint32_t *)&(record[i - rec_idx].cu),
+ (uint32_t *)&(record[i - rec_idx].mem_channel),
+ (uint32_t *)&(record[i - rec_idx].mcumc_id), NULL);
}
-err:
- mutex_unlock(&control->tbl_mutex);
return 0;
}
-int amdgpu_ras_eeprom_process_recods(struct amdgpu_ras_eeprom_control *control,
- struct eeprom_table_record *records,
- bool write,
- int num)
+/**
+ * amdgpu_ras_eeprom_read -- read EEPROM
+ * @control: pointer to control structure
+ * @record: array of records to read into
+ * @num: number of records in @record
+ *
+ * Reads num records from the RAS table in EEPROM and
+ * writes the data into @record array.
+ *
+ * Returns 0 on success, -errno on error.
+ */
+int amdgpu_ras_eeprom_read(struct amdgpu_ras_eeprom_control *control,
+ struct eeprom_table_record *record,
+ const u32 num)
{
- int i, ret = 0;
- struct i2c_msg *msgs, *msg;
- unsigned char *buffs, *buff;
- struct eeprom_table_record *record;
struct amdgpu_device *adev = to_amdgpu_device(control);
- struct amdgpu_ras *ras = amdgpu_ras_get_context(adev);
+ struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
+ int i, res;
+ u8 *buf, *pp;
+ u32 g0, g1;
+
+ if (amdgpu_ras_smu_eeprom_supported(adev))
+ return amdgpu_ras_eeprom_read_idx(control, record, 0, num);
if (!__is_ras_eeprom_supported(adev))
return 0;
- buffs = kcalloc(num, EEPROM_ADDRESS_SIZE + EEPROM_TABLE_RECORD_SIZE,
- GFP_KERNEL);
- if (!buffs)
+ if (num == 0) {
+ dev_err(adev->dev, "will not read 0 records\n");
+ return -EINVAL;
+ } else if (num > control->ras_num_recs) {
+ dev_err(adev->dev, "too many records to read:%d available:%d\n",
+ num, control->ras_num_recs);
+ return -EINVAL;
+ }
+
+ buf = kcalloc(num, RAS_TABLE_RECORD_SIZE, GFP_KERNEL);
+ if (!buf)
return -ENOMEM;
- mutex_lock(&control->tbl_mutex);
+ /* Determine how many records to read, from the first record
+ * index, fri, to the end of the table, and from the beginning
+ * of the table, such that the total number of records is
+ * @num, and we handle wrap around when fri > 0 and
+ * fri + num > RAS_MAX_RECORD_COUNT.
+ *
+ * First we compute the index of the last element
+ * which would be fetched from each region,
+ * g0 is in [fri, fri + num - 1], and
+ * g1 is in [0, RAS_MAX_RECORD_COUNT - 1].
+ * Then, if g0 < RAS_MAX_RECORD_COUNT, the index of
+ * the last element to fetch, we set g0 to _the number_
+ * of elements to fetch, @num, since we know that the last
+ * indexed to be fetched does not exceed the table.
+ *
+ * If, however, g0 >= RAS_MAX_RECORD_COUNT, then
+ * we set g0 to the number of elements to read
+ * until the end of the table, and g1 to the number of
+ * elements to read from the beginning of the table.
+ */
+ g0 = control->ras_fri + num - 1;
+ g1 = g0 % control->ras_max_record_count;
+ if (g0 < control->ras_max_record_count) {
+ g0 = num;
+ g1 = 0;
+ } else {
+ g0 = control->ras_max_record_count - control->ras_fri;
+ g1 += 1;
+ }
- msgs = kcalloc(num, sizeof(*msgs), GFP_KERNEL);
- if (!msgs) {
- ret = -ENOMEM;
- goto free_buff;
+ mutex_lock(&control->ras_tbl_mutex);
+ res = __amdgpu_ras_eeprom_read(control, buf, control->ras_fri, g0);
+ if (res)
+ goto Out;
+ if (g1) {
+ res = __amdgpu_ras_eeprom_read(control,
+ buf + g0 * RAS_TABLE_RECORD_SIZE,
+ 0, g1);
+ if (res)
+ goto Out;
}
- /*
- * If saved bad pages number exceeds the bad page threshold for
- * the whole VRAM, update table header to mark the BAD GPU tag
- * and schedule one ras recovery after eeprom write is done,
- * this can avoid the missing for latest records.
- *
- * This new header will be picked up and checked in the bootup
- * by ras recovery, which may break bootup process to notify
- * user this GPU is in bad state and to retire such GPU for
- * further check.
+ res = 0;
+
+ /* Read up everything? Then transform.
*/
- if (write && (amdgpu_bad_page_threshold != 0) &&
- ((control->num_recs + num) >= ras->bad_page_cnt_threshold)) {
- dev_warn(adev->dev,
- "Saved bad pages(%d) reaches threshold value(%d).\n",
- control->num_recs + num, ras->bad_page_cnt_threshold);
- control->tbl_hdr.header = EEPROM_TABLE_HDR_BAD;
+ pp = buf;
+ for (i = 0; i < num; i++, pp += RAS_TABLE_RECORD_SIZE) {
+ __decode_table_record_from_buf(control, &record[i], pp);
+
+ /* update bad channel bitmap */
+ if ((record[i].mem_channel < BITS_PER_TYPE(control->bad_channel_bitmap)) &&
+ !(control->bad_channel_bitmap & (1 << record[i].mem_channel))) {
+ control->bad_channel_bitmap |= 1 << record[i].mem_channel;
+ con->update_channel_flag = true;
+ }
}
+Out:
+ kfree(buf);
+ mutex_unlock(&control->ras_tbl_mutex);
+
+ return res;
+}
+
+uint32_t amdgpu_ras_eeprom_max_record_count(struct amdgpu_ras_eeprom_control *control)
+{
+ /* get available eeprom table version first before eeprom table init */
+ amdgpu_ras_set_eeprom_table_version(control);
+
+ if (control->tbl_hdr.version >= RAS_TABLE_VER_V2_1)
+ return RAS_MAX_RECORD_COUNT_V2_1;
+ else
+ return RAS_MAX_RECORD_COUNT;
+}
+
+static ssize_t
+amdgpu_ras_debugfs_eeprom_size_read(struct file *f, char __user *buf,
+ size_t size, loff_t *pos)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)file_inode(f)->i_private;
+ struct amdgpu_ras *ras = amdgpu_ras_get_context(adev);
+ struct amdgpu_ras_eeprom_control *control = ras ? &ras->eeprom_control : NULL;
+ u8 data[50];
+ int res;
+
+ if (!size)
+ return size;
+
+ if (!ras || !control) {
+ res = snprintf(data, sizeof(data), "Not supported\n");
+ } else {
+ res = snprintf(data, sizeof(data), "%d bytes or %d records\n",
+ RAS_TBL_SIZE_BYTES, control->ras_max_record_count);
+ }
+
+ if (*pos >= res)
+ return 0;
+
+ res -= *pos;
+ res = min_t(size_t, res, size);
- /* In case of overflow just start from beginning to not lose newest records */
- if (write && (control->next_addr + EEPROM_TABLE_RECORD_SIZE * num > EEPROM_SIZE_BYTES))
- control->next_addr = EEPROM_RECORD_START;
+ if (copy_to_user(buf, &data[*pos], res))
+ return -EFAULT;
- /*
- * TODO Currently makes EEPROM writes for each record, this creates
- * internal fragmentation. Optimized the code to do full page write of
- * 256b
+ *pos += res;
+
+ return res;
+}
+
+const struct file_operations amdgpu_ras_debugfs_eeprom_size_ops = {
+ .owner = THIS_MODULE,
+ .read = amdgpu_ras_debugfs_eeprom_size_read,
+ .write = NULL,
+ .llseek = default_llseek,
+};
+
+static const char *tbl_hdr_str = " Signature Version FirstOffs Size Checksum\n";
+static const char *tbl_hdr_fmt = "0x%08X 0x%08X 0x%08X 0x%08X 0x%08X\n";
+#define tbl_hdr_fmt_size (5 * (2+8) + 4 + 1)
+static const char *rec_hdr_str = "Index Offset ErrType Bank/CU TimeStamp Offs/Addr MemChl MCUMCID RetiredPage\n";
+static const char *rec_hdr_fmt = "%5d 0x%05X %7s 0x%02X 0x%016llX 0x%012llX 0x%02X 0x%02X 0x%012llX\n";
+#define rec_hdr_fmt_size (5 + 1 + 7 + 1 + 7 + 1 + 7 + 1 + 18 + 1 + 14 + 1 + 6 + 1 + 7 + 1 + 14 + 1)
+
+static const char *record_err_type_str[AMDGPU_RAS_EEPROM_ERR_COUNT] = {
+ "ignore",
+ "re",
+ "ue",
+};
+
+static loff_t amdgpu_ras_debugfs_table_size(struct amdgpu_ras_eeprom_control *control)
+{
+ return strlen(tbl_hdr_str) + tbl_hdr_fmt_size +
+ strlen(rec_hdr_str) + rec_hdr_fmt_size * control->ras_num_recs;
+}
+
+void amdgpu_ras_debugfs_set_ret_size(struct amdgpu_ras_eeprom_control *control)
+{
+ struct amdgpu_ras *ras = container_of(control, struct amdgpu_ras,
+ eeprom_control);
+ struct dentry *de = ras->de_ras_eeprom_table;
+
+ if (de)
+ d_inode(de)->i_size = amdgpu_ras_debugfs_table_size(control);
+}
+
+static ssize_t amdgpu_ras_debugfs_table_read(struct file *f, char __user *buf,
+ size_t size, loff_t *pos)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)file_inode(f)->i_private;
+ struct amdgpu_ras *ras = amdgpu_ras_get_context(adev);
+ struct amdgpu_ras_eeprom_control *control = &ras->eeprom_control;
+ const size_t orig_size = size;
+ int res = -EFAULT;
+ size_t data_len;
+
+ /* pmfw manages eeprom data by itself */
+ if (amdgpu_ras_smu_eeprom_supported(adev))
+ return 0;
+
+ mutex_lock(&control->ras_tbl_mutex);
+
+ /* We want *pos - data_len > 0, which means there's
+ * bytes to be printed from data.
*/
- for (i = 0; i < num; i++) {
- buff = &buffs[i * (EEPROM_ADDRESS_SIZE + EEPROM_TABLE_RECORD_SIZE)];
- record = &records[i];
- msg = &msgs[i];
+ data_len = strlen(tbl_hdr_str);
+ if (*pos < data_len) {
+ data_len -= *pos;
+ data_len = min_t(size_t, data_len, size);
+ if (copy_to_user(buf, &tbl_hdr_str[*pos], data_len))
+ goto Out;
+ buf += data_len;
+ size -= data_len;
+ *pos += data_len;
+ }
+
+ data_len = strlen(tbl_hdr_str) + tbl_hdr_fmt_size;
+ if (*pos < data_len && size > 0) {
+ u8 data[tbl_hdr_fmt_size + 1];
+ loff_t lpos;
+
+ snprintf(data, sizeof(data), tbl_hdr_fmt,
+ control->tbl_hdr.header,
+ control->tbl_hdr.version,
+ control->tbl_hdr.first_rec_offset,
+ control->tbl_hdr.tbl_size,
+ control->tbl_hdr.checksum);
+
+ data_len -= *pos;
+ data_len = min_t(size_t, data_len, size);
+ lpos = *pos - strlen(tbl_hdr_str);
+ if (copy_to_user(buf, &data[lpos], data_len))
+ goto Out;
+ buf += data_len;
+ size -= data_len;
+ *pos += data_len;
+ }
+
+ data_len = strlen(tbl_hdr_str) + tbl_hdr_fmt_size + strlen(rec_hdr_str);
+ if (*pos < data_len && size > 0) {
+ loff_t lpos;
+
+ data_len -= *pos;
+ data_len = min_t(size_t, data_len, size);
+ lpos = *pos - strlen(tbl_hdr_str) - tbl_hdr_fmt_size;
+ if (copy_to_user(buf, &rec_hdr_str[lpos], data_len))
+ goto Out;
+ buf += data_len;
+ size -= data_len;
+ *pos += data_len;
+ }
- control->next_addr = __correct_eeprom_dest_address(control->next_addr);
+ data_len = amdgpu_ras_debugfs_table_size(control);
+ if (*pos < data_len && size > 0) {
+ u8 dare[RAS_TABLE_RECORD_SIZE];
+ u8 data[rec_hdr_fmt_size + 1];
+ struct eeprom_table_record record;
+ int s, r;
- /*
- * Update bits 16,17 of EEPROM address in I2C address by setting them
- * to bits 1,2 of Device address byte
+ /* Find the starting record index
*/
- msg->addr = control->i2c_address |
- ((control->next_addr & EEPROM_ADDR_MSB_MASK) >> 15);
- msg->flags = write ? 0 : I2C_M_RD;
- msg->len = EEPROM_ADDRESS_SIZE + EEPROM_TABLE_RECORD_SIZE;
- msg->buf = buff;
-
- /* Insert the EEPROM dest addess, bits 0-15 */
- buff[0] = ((control->next_addr >> 8) & 0xff);
- buff[1] = (control->next_addr & 0xff);
-
- /* EEPROM table content is stored in LE format */
- if (write)
- __encode_table_record_to_buff(control, record, buff + EEPROM_ADDRESS_SIZE);
-
- /*
- * The destination EEPROM address might need to be corrected to account
- * for page or entire memory wrapping
+ s = *pos - strlen(tbl_hdr_str) - tbl_hdr_fmt_size -
+ strlen(rec_hdr_str);
+ s = s / rec_hdr_fmt_size;
+ r = *pos - strlen(tbl_hdr_str) - tbl_hdr_fmt_size -
+ strlen(rec_hdr_str);
+ r = r % rec_hdr_fmt_size;
+
+ for ( ; size > 0 && s < control->ras_num_recs; s++) {
+ u32 ai = RAS_RI_TO_AI(control, s);
+ /* Read a single record
+ */
+ res = __amdgpu_ras_eeprom_read(control, dare, ai, 1);
+ if (res)
+ goto Out;
+ __decode_table_record_from_buf(control, &record, dare);
+ snprintf(data, sizeof(data), rec_hdr_fmt,
+ s,
+ RAS_INDEX_TO_OFFSET(control, ai),
+ record_err_type_str[record.err_type],
+ record.bank,
+ record.ts,
+ record.offset,
+ record.mem_channel,
+ record.mcumc_id,
+ record.retired_page);
+
+ data_len = min_t(size_t, rec_hdr_fmt_size - r, size);
+ if (copy_to_user(buf, &data[r], data_len)) {
+ res = -EFAULT;
+ goto Out;
+ }
+ buf += data_len;
+ size -= data_len;
+ *pos += data_len;
+ r = 0;
+ }
+ }
+ res = 0;
+Out:
+ mutex_unlock(&control->ras_tbl_mutex);
+ return res < 0 ? res : orig_size - size;
+}
+
+static ssize_t
+amdgpu_ras_debugfs_eeprom_table_read(struct file *f, char __user *buf,
+ size_t size, loff_t *pos)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)file_inode(f)->i_private;
+ struct amdgpu_ras *ras = amdgpu_ras_get_context(adev);
+ struct amdgpu_ras_eeprom_control *control = ras ? &ras->eeprom_control : NULL;
+ u8 data[81];
+ int res;
+
+ if (!size)
+ return size;
+
+ if (!ras || !control) {
+ res = snprintf(data, sizeof(data), "Not supported\n");
+ if (*pos >= res)
+ return 0;
+
+ res -= *pos;
+ res = min_t(size_t, res, size);
+
+ if (copy_to_user(buf, &data[*pos], res))
+ return -EFAULT;
+
+ *pos += res;
+
+ return res;
+ } else {
+ return amdgpu_ras_debugfs_table_read(f, buf, size, pos);
+ }
+}
+
+const struct file_operations amdgpu_ras_debugfs_eeprom_table_ops = {
+ .owner = THIS_MODULE,
+ .read = amdgpu_ras_debugfs_eeprom_table_read,
+ .write = NULL,
+ .llseek = default_llseek,
+};
+
+/**
+ * __verify_ras_table_checksum -- verify the RAS EEPROM table checksum
+ * @control: pointer to control structure
+ *
+ * Check the checksum of the stored in EEPROM RAS table.
+ *
+ * Return 0 if the checksum is correct,
+ * positive if it is not correct, and
+ * -errno on I/O error.
+ */
+static int __verify_ras_table_checksum(struct amdgpu_ras_eeprom_control *control)
+{
+ struct amdgpu_device *adev = to_amdgpu_device(control);
+ int buf_size, res;
+ u8 csum, *buf, *pp;
+
+ if (control->tbl_hdr.version >= RAS_TABLE_VER_V2_1)
+ buf_size = RAS_TABLE_HEADER_SIZE +
+ RAS_TABLE_V2_1_INFO_SIZE +
+ control->ras_num_recs * RAS_TABLE_RECORD_SIZE;
+ else
+ buf_size = RAS_TABLE_HEADER_SIZE +
+ control->ras_num_recs * RAS_TABLE_RECORD_SIZE;
+
+ buf = kzalloc(buf_size, GFP_KERNEL);
+ if (!buf) {
+ dev_err(adev->dev,
+ "Out of memory checking RAS table checksum.\n");
+ return -ENOMEM;
+ }
+
+ res = amdgpu_eeprom_read(adev->pm.ras_eeprom_i2c_bus,
+ control->i2c_address +
+ control->ras_header_offset,
+ buf, buf_size);
+ if (res < buf_size) {
+ dev_err(adev->dev, "Partial read for checksum, res:%d\n", res);
+ /* On partial reads, return -EIO.
*/
- control->next_addr += EEPROM_TABLE_RECORD_SIZE;
+ if (res >= 0)
+ res = -EIO;
+ goto Out;
}
- /* i2c may be unstable in gpu reset */
- down_read(&adev->reset_sem);
- ret = i2c_transfer(&adev->pm.smu_i2c, msgs, num);
- up_read(&adev->reset_sem);
+ csum = 0;
+ for (pp = buf; pp < buf + buf_size; pp++)
+ csum += *pp;
+Out:
+ kfree(buf);
+ return res < 0 ? res : csum;
+}
+
+static int __read_table_ras_info(struct amdgpu_ras_eeprom_control *control)
+{
+ struct amdgpu_ras_eeprom_table_ras_info *rai = &control->tbl_rai;
+ struct amdgpu_device *adev = to_amdgpu_device(control);
+ unsigned char *buf;
+ int res;
+
+ buf = kzalloc(RAS_TABLE_V2_1_INFO_SIZE, GFP_KERNEL);
+ if (!buf) {
+ dev_err(adev->dev,
+ "Failed to alloc buf to read EEPROM table ras info\n");
+ return -ENOMEM;
+ }
+
+ /**
+ * EEPROM table V2_1 supports ras info,
+ * read EEPROM table ras info
+ */
+ res = amdgpu_eeprom_read(adev->pm.ras_eeprom_i2c_bus,
+ control->i2c_address + control->ras_info_offset,
+ buf, RAS_TABLE_V2_1_INFO_SIZE);
+ if (res < RAS_TABLE_V2_1_INFO_SIZE) {
+ dev_err(adev->dev,
+ "Failed to read EEPROM table ras info, res:%d", res);
+ res = res >= 0 ? -EIO : res;
+ goto Out;
+ }
+
+ __decode_table_ras_info_from_buf(rai, buf);
+
+Out:
+ kfree(buf);
+ return res == RAS_TABLE_V2_1_INFO_SIZE ? 0 : res;
+}
+
+static int amdgpu_ras_smu_eeprom_init(struct amdgpu_ras_eeprom_control *control)
+{
+ struct amdgpu_device *adev = to_amdgpu_device(control);
+ struct amdgpu_ras_eeprom_table_header *hdr = &control->tbl_hdr;
+ struct amdgpu_ras *ras = amdgpu_ras_get_context(adev);
+ uint64_t local_time;
+ int res;
+
+ ras->is_rma = false;
+
+ if (!__is_ras_eeprom_supported(adev))
+ return 0;
+ mutex_init(&control->ras_tbl_mutex);
+
+ res = amdgpu_ras_smu_get_table_version(adev, &(hdr->version));
+ if (res)
+ return res;
+
+ res = amdgpu_ras_smu_get_badpage_count(adev,
+ &(control->ras_num_recs), 100);
+ if (res)
+ return res;
+
+ local_time = (uint64_t)ktime_get_real_seconds();
+ res = amdgpu_ras_smu_set_timestamp(adev, local_time);
+ if (res)
+ return res;
+
+ control->ras_max_record_count = 4000;
+
+ control->ras_num_mca_recs = 0;
+ control->ras_num_pa_recs = 0;
+
+ return 0;
+}
+
+int amdgpu_ras_eeprom_init(struct amdgpu_ras_eeprom_control *control)
+{
+ struct amdgpu_device *adev = to_amdgpu_device(control);
+ unsigned char buf[RAS_TABLE_HEADER_SIZE] = { 0 };
+ struct amdgpu_ras_eeprom_table_header *hdr = &control->tbl_hdr;
+ struct amdgpu_ras *ras = amdgpu_ras_get_context(adev);
+ int res;
+
+ if (amdgpu_ras_smu_eeprom_supported(adev))
+ return amdgpu_ras_smu_eeprom_init(control);
+
+ ras->is_rma = false;
+
+ if (!__is_ras_eeprom_supported(adev))
+ return 0;
- if (ret < 1) {
- DRM_ERROR("Failed to process EEPROM table records, ret:%d", ret);
+ /* Verify i2c adapter is initialized */
+ if (!adev->pm.ras_eeprom_i2c_bus || !adev->pm.ras_eeprom_i2c_bus->algo)
+ return -ENOENT;
- /* TODO Restore prev next EEPROM address ? */
- goto free_msgs;
+ if (!__get_eeprom_i2c_addr(adev, control))
+ return -EINVAL;
+
+ control->ras_header_offset = RAS_HDR_START;
+ control->ras_info_offset = RAS_TABLE_V2_1_INFO_START;
+ mutex_init(&control->ras_tbl_mutex);
+
+ /* Read the table header from EEPROM address */
+ res = amdgpu_eeprom_read(adev->pm.ras_eeprom_i2c_bus,
+ control->i2c_address + control->ras_header_offset,
+ buf, RAS_TABLE_HEADER_SIZE);
+ if (res < RAS_TABLE_HEADER_SIZE) {
+ dev_err(adev->dev, "Failed to read EEPROM table header, res:%d",
+ res);
+ return res >= 0 ? -EIO : res;
}
+ __decode_table_header_from_buf(hdr, buf);
- if (!write) {
- for (i = 0; i < num; i++) {
- buff = &buffs[i*(EEPROM_ADDRESS_SIZE + EEPROM_TABLE_RECORD_SIZE)];
- record = &records[i];
+ if (hdr->header != RAS_TABLE_HDR_VAL &&
+ hdr->header != RAS_TABLE_HDR_BAD) {
+ dev_info(adev->dev, "Creating a new EEPROM table");
+ return amdgpu_ras_eeprom_reset_table(control);
+ }
- __decode_table_record_from_buff(control, record, buff + EEPROM_ADDRESS_SIZE);
+ switch (hdr->version) {
+ case RAS_TABLE_VER_V2_1:
+ case RAS_TABLE_VER_V3:
+ control->ras_num_recs = RAS_NUM_RECS_V2_1(hdr);
+ control->ras_record_offset = RAS_RECORD_START_V2_1;
+ control->ras_max_record_count = RAS_MAX_RECORD_COUNT_V2_1;
+ break;
+ case RAS_TABLE_VER_V1:
+ control->ras_num_recs = RAS_NUM_RECS(hdr);
+ control->ras_record_offset = RAS_RECORD_START;
+ control->ras_max_record_count = RAS_MAX_RECORD_COUNT;
+ break;
+ default:
+ dev_err(adev->dev,
+ "RAS header invalid, unsupported version: %u",
+ hdr->version);
+ return -EINVAL;
+ }
+
+ if (control->ras_num_recs > control->ras_max_record_count) {
+ dev_err(adev->dev,
+ "RAS header invalid, records in header: %u max allowed :%u",
+ control->ras_num_recs, control->ras_max_record_count);
+ return -EINVAL;
+ }
+
+ control->ras_fri = RAS_OFFSET_TO_INDEX(control, hdr->first_rec_offset);
+ control->ras_num_mca_recs = 0;
+ control->ras_num_pa_recs = 0;
+ return 0;
+}
+
+static int amdgpu_ras_smu_eeprom_check(struct amdgpu_ras_eeprom_control *control)
+{
+ struct amdgpu_device *adev = to_amdgpu_device(control);
+ struct amdgpu_ras *ras = amdgpu_ras_get_context(adev);
+
+ if (!__is_ras_eeprom_supported(adev))
+ return 0;
+
+ control->ras_num_bad_pages = ras->bad_page_num;
+
+ if ((ras->bad_page_cnt_threshold < control->ras_num_bad_pages) &&
+ amdgpu_bad_page_threshold != 0) {
+ dev_warn(adev->dev,
+ "RAS records:%d exceed threshold:%d\n",
+ control->ras_num_bad_pages, ras->bad_page_cnt_threshold);
+ if ((amdgpu_bad_page_threshold == -1) ||
+ (amdgpu_bad_page_threshold == -2)) {
+ dev_warn(adev->dev,
+ "Please consult AMD Service Action Guide (SAG) for appropriate service procedures\n");
+ } else {
+ ras->is_rma = true;
+ dev_warn(adev->dev,
+ "User defined threshold is set, runtime service will be halt when threshold is reached\n");
}
+
+ return 0;
}
- if (write) {
- uint32_t old_hdr_byte_sum = __calc_hdr_byte_sum(control);
+ dev_dbg(adev->dev,
+ "Found existing EEPROM table with %d records",
+ control->ras_num_bad_pages);
- /*
- * Update table header with size and CRC and account for table
- * wrap around where the assumption is that we treat it as empty
- * table
- *
- * TODO - Check the assumption is correct
+ /* Warn if we are at 90% of the threshold or above
+ */
+ if (10 * control->ras_num_bad_pages >= 9 * ras->bad_page_cnt_threshold)
+ dev_warn(adev->dev, "RAS records:%u exceeds 90%% of threshold:%d",
+ control->ras_num_bad_pages,
+ ras->bad_page_cnt_threshold);
+ return 0;
+}
+
+int amdgpu_ras_eeprom_check(struct amdgpu_ras_eeprom_control *control)
+{
+ struct amdgpu_device *adev = to_amdgpu_device(control);
+ struct amdgpu_ras_eeprom_table_header *hdr = &control->tbl_hdr;
+ struct amdgpu_ras *ras = amdgpu_ras_get_context(adev);
+ int res = 0;
+
+ if (amdgpu_ras_smu_eeprom_supported(adev))
+ return amdgpu_ras_smu_eeprom_check(control);
+
+ if (!__is_ras_eeprom_supported(adev))
+ return 0;
+
+ /* Verify i2c adapter is initialized */
+ if (!adev->pm.ras_eeprom_i2c_bus || !adev->pm.ras_eeprom_i2c_bus->algo)
+ return -ENOENT;
+
+ if (!__get_eeprom_i2c_addr(adev, control))
+ return -EINVAL;
+
+ control->ras_num_bad_pages = ras->bad_page_num;
+
+ if (hdr->header == RAS_TABLE_HDR_VAL) {
+ dev_dbg(adev->dev,
+ "Found existing EEPROM table with %d records",
+ control->ras_num_bad_pages);
+
+ if (hdr->version >= RAS_TABLE_VER_V2_1) {
+ res = __read_table_ras_info(control);
+ if (res)
+ return res;
+ }
+
+ res = __verify_ras_table_checksum(control);
+ if (res)
+ dev_err(adev->dev,
+ "RAS table incorrect checksum or error:%d\n",
+ res);
+
+ /* Warn if we are at 90% of the threshold or above
*/
- control->num_recs += num;
- control->num_recs %= EEPROM_MAX_RECORD_NUM;
- control->tbl_hdr.tbl_size += EEPROM_TABLE_RECORD_SIZE * num;
- if (control->tbl_hdr.tbl_size > EEPROM_SIZE_BYTES)
- control->tbl_hdr.tbl_size = EEPROM_TABLE_HEADER_SIZE +
- control->num_recs * EEPROM_TABLE_RECORD_SIZE;
+ if (10 * control->ras_num_bad_pages >= 9 * ras->bad_page_cnt_threshold)
+ dev_warn(adev->dev, "RAS records:%u exceeds 90%% of threshold:%d",
+ control->ras_num_bad_pages,
+ ras->bad_page_cnt_threshold);
+ } else if (hdr->header == RAS_TABLE_HDR_BAD &&
+ amdgpu_bad_page_threshold != 0) {
+ if (hdr->version >= RAS_TABLE_VER_V2_1) {
+ res = __read_table_ras_info(control);
+ if (res)
+ return res;
+ }
+
+ res = __verify_ras_table_checksum(control);
+ if (res) {
+ dev_err(adev->dev,
+ "RAS Table incorrect checksum or error:%d\n",
+ res);
+ return -EINVAL;
+ }
+ if (ras->bad_page_cnt_threshold >= control->ras_num_bad_pages) {
+ /* This means that, the threshold was increased since
+ * the last time the system was booted, and now,
+ * ras->bad_page_cnt_threshold - control->num_recs > 0,
+ * so that at least one more record can be saved,
+ * before the page count threshold is reached.
+ */
+ dev_info(adev->dev,
+ "records:%d threshold:%d, resetting "
+ "RAS table header signature",
+ control->ras_num_bad_pages,
+ ras->bad_page_cnt_threshold);
+ res = amdgpu_ras_eeprom_correct_header_tag(control,
+ RAS_TABLE_HDR_VAL);
+ } else {
+ dev_warn(adev->dev,
+ "RAS records:%d exceed threshold:%d\n",
+ control->ras_num_bad_pages, ras->bad_page_cnt_threshold);
+ if ((amdgpu_bad_page_threshold == -1) ||
+ (amdgpu_bad_page_threshold == -2)) {
+ res = 0;
+ dev_warn(adev->dev,
+ "Please consult AMD Service Action Guide (SAG) for appropriate service procedures\n");
+ } else {
+ ras->is_rma = true;
+ dev_warn(adev->dev,
+ "User defined threshold is set, runtime service will be halt when threshold is reached\n");
+ }
+ }
+ }
- __update_tbl_checksum(control, records, num, old_hdr_byte_sum);
+ return res < 0 ? res : 0;
+}
- __update_table_header(control, buffs);
- } else if (!__validate_tbl_checksum(control, records, num)) {
- DRM_WARN("EEPROM Table checksum mismatch!");
- /* TODO Uncomment when EEPROM read/write is relliable */
- /* ret = -EIO; */
+void amdgpu_ras_eeprom_check_and_recover(struct amdgpu_device *adev)
+{
+ struct amdgpu_ras *ras = amdgpu_ras_get_context(adev);
+ struct amdgpu_ras_eeprom_control *control;
+ int res;
+
+ if (!__is_ras_eeprom_supported(adev) || !ras ||
+ amdgpu_ras_smu_eeprom_supported(adev))
+ return;
+ control = &ras->eeprom_control;
+ if (!control->is_eeprom_valid)
+ return;
+ res = __verify_ras_table_checksum(control);
+ if (res) {
+ dev_warn(adev->dev,
+ "RAS table incorrect checksum or error:%d, try to recover\n",
+ res);
+ if (!amdgpu_ras_eeprom_reset_table(control))
+ if (!amdgpu_ras_save_bad_pages(adev, NULL))
+ if (!__verify_ras_table_checksum(control)) {
+ dev_info(adev->dev, "RAS table recovery succeed\n");
+ return;
+ }
+ dev_err(adev->dev, "RAS table recovery failed\n");
+ control->is_eeprom_valid = false;
}
+ return;
+}
+
+static const struct ras_smu_drv *amdgpu_ras_get_smu_ras_drv(struct amdgpu_device *adev)
+{
+ struct amdgpu_ras *ras = amdgpu_ras_get_context(adev);
+
+ if (!ras)
+ return NULL;
+
+ return ras->ras_smu_drv;
+}
+
+static uint64_t amdgpu_ras_smu_get_feature_flags(struct amdgpu_device *adev)
+{
+ const struct ras_smu_drv *ras_smu_drv = amdgpu_ras_get_smu_ras_drv(adev);
+ uint64_t flags = 0ULL;
+
+ if (!ras_smu_drv)
+ goto out;
+
+ if (ras_smu_drv->ras_smu_feature_flags)
+ ras_smu_drv->ras_smu_feature_flags(adev, &flags);
+
+out:
+ return flags;
+}
-free_msgs:
- kfree(msgs);
+bool amdgpu_ras_smu_eeprom_supported(struct amdgpu_device *adev)
+{
+ const struct ras_smu_drv *smu_ras_drv = amdgpu_ras_get_smu_ras_drv(adev);
+ uint64_t flags = 0ULL;
+
+ if (!__is_ras_eeprom_supported(adev) || !smu_ras_drv)
+ return false;
-free_buff:
- kfree(buffs);
+ if (!smu_ras_drv->smu_eeprom_funcs)
+ return false;
- mutex_unlock(&control->tbl_mutex);
+ flags = amdgpu_ras_smu_get_feature_flags(adev);
- return ret == num ? 0 : -EIO;
+ return !!(flags & RAS_SMU_FEATURE_BIT__RAS_EEPROM);
}
-inline uint32_t amdgpu_ras_eeprom_get_record_max_length(void)
+int amdgpu_ras_smu_get_table_version(struct amdgpu_device *adev,
+ uint32_t *table_version)
{
- return EEPROM_MAX_RECORD_NUM;
+ const struct ras_smu_drv *smu_ras_drv = amdgpu_ras_get_smu_ras_drv(adev);
+
+ if (!amdgpu_ras_smu_eeprom_supported(adev))
+ return -EOPNOTSUPP;
+
+ if (smu_ras_drv->smu_eeprom_funcs->get_ras_table_version)
+ return smu_ras_drv->smu_eeprom_funcs->get_ras_table_version(adev,
+ table_version);
+ return -EOPNOTSUPP;
}
-/* Used for testing if bugs encountered */
-#if 0
-void amdgpu_ras_eeprom_test(struct amdgpu_ras_eeprom_control *control)
+int amdgpu_ras_smu_get_badpage_count(struct amdgpu_device *adev,
+ uint32_t *count, uint32_t timeout)
{
- int i;
- struct eeprom_table_record *recs = kcalloc(1, sizeof(*recs), GFP_KERNEL);
+ const struct ras_smu_drv *smu_ras_drv = amdgpu_ras_get_smu_ras_drv(adev);
- if (!recs)
- return;
+ if (!amdgpu_ras_smu_eeprom_supported(adev))
+ return -EOPNOTSUPP;
- for (i = 0; i < 1 ; i++) {
- recs[i].address = 0xdeadbeef;
- recs[i].retired_page = i;
- }
+ if (smu_ras_drv->smu_eeprom_funcs->get_badpage_count)
+ return smu_ras_drv->smu_eeprom_funcs->get_badpage_count(adev,
+ count, timeout);
+ return -EOPNOTSUPP;
+}
+
+int amdgpu_ras_smu_get_badpage_mca_addr(struct amdgpu_device *adev,
+ uint16_t index, uint64_t *mca_addr)
+{
+ const struct ras_smu_drv *smu_ras_drv = amdgpu_ras_get_smu_ras_drv(adev);
+
+ if (!amdgpu_ras_smu_eeprom_supported(adev))
+ return -EOPNOTSUPP;
+
+ if (smu_ras_drv->smu_eeprom_funcs->get_badpage_mca_addr)
+ return smu_ras_drv->smu_eeprom_funcs->get_badpage_mca_addr(adev,
+ index, mca_addr);
+ return -EOPNOTSUPP;
+}
- if (!amdgpu_ras_eeprom_process_recods(control, recs, true, 1)) {
+int amdgpu_ras_smu_set_timestamp(struct amdgpu_device *adev,
+ uint64_t timestamp)
+{
+ const struct ras_smu_drv *smu_ras_drv = amdgpu_ras_get_smu_ras_drv(adev);
+
+ if (!amdgpu_ras_smu_eeprom_supported(adev))
+ return -EOPNOTSUPP;
+
+ if (smu_ras_drv->smu_eeprom_funcs->set_timestamp)
+ return smu_ras_drv->smu_eeprom_funcs->set_timestamp(adev,
+ timestamp);
+ return -EOPNOTSUPP;
+}
+
+int amdgpu_ras_smu_get_timestamp(struct amdgpu_device *adev,
+ uint16_t index, uint64_t *timestamp)
+{
+ const struct ras_smu_drv *smu_ras_drv = amdgpu_ras_get_smu_ras_drv(adev);
+
+ if (!amdgpu_ras_smu_eeprom_supported(adev))
+ return -EOPNOTSUPP;
- memset(recs, 0, sizeof(*recs) * 1);
+ if (smu_ras_drv->smu_eeprom_funcs->get_timestamp)
+ return smu_ras_drv->smu_eeprom_funcs->get_timestamp(adev,
+ index, timestamp);
+ return -EOPNOTSUPP;
+}
+
+int amdgpu_ras_smu_get_badpage_ipid(struct amdgpu_device *adev,
+ uint16_t index, uint64_t *ipid)
+{
+ const struct ras_smu_drv *smu_ras_drv = amdgpu_ras_get_smu_ras_drv(adev);
+
+ if (!amdgpu_ras_smu_eeprom_supported(adev))
+ return -EOPNOTSUPP;
- control->next_addr = EEPROM_RECORD_START;
+ if (smu_ras_drv->smu_eeprom_funcs->get_badpage_ipid)
+ return smu_ras_drv->smu_eeprom_funcs->get_badpage_ipid(adev,
+ index, ipid);
+ return -EOPNOTSUPP;
+}
+
+int amdgpu_ras_smu_erase_ras_table(struct amdgpu_device *adev,
+ uint32_t *result)
+{
+ const struct ras_smu_drv *smu_ras_drv = amdgpu_ras_get_smu_ras_drv(adev);
- if (!amdgpu_ras_eeprom_process_recods(control, recs, false, 1)) {
- for (i = 0; i < 1; i++)
- DRM_INFO("rec.address :0x%llx, rec.retired_page :%llu",
- recs[i].address, recs[i].retired_page);
- } else
- DRM_ERROR("Failed in reading from table");
+ if (!amdgpu_ras_smu_eeprom_supported(adev))
+ return -EOPNOTSUPP;
- } else
- DRM_ERROR("Failed in writing to table");
+ if (smu_ras_drv->smu_eeprom_funcs->erase_ras_table)
+ return smu_ras_drv->smu_eeprom_funcs->erase_ras_table(adev,
+ result);
+ return -EOPNOTSUPP;
}
-#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.h
index c7a5e5c7c61e..2e5d63957e71 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.h
@@ -26,12 +26,22 @@
#include <linux/i2c.h>
+#define RAS_TABLE_VER_V1 0x00010000
+#define RAS_TABLE_VER_V2_1 0x00021000
+#define RAS_TABLE_VER_V3 0x00030000
+
struct amdgpu_device;
-enum amdgpu_ras_eeprom_err_type{
- AMDGPU_RAS_EEPROM_ERR_PLACE_HOLDER,
+enum amdgpu_ras_gpu_health_status {
+ GPU_HEALTH_USABLE = 0,
+ GPU_RETIRED__ECC_REACH_THRESHOLD = 2,
+};
+
+enum amdgpu_ras_eeprom_err_type {
+ AMDGPU_RAS_EEPROM_ERR_NA,
AMDGPU_RAS_EEPROM_ERR_RECOVERABLE,
- AMDGPU_RAS_EEPROM_ERR_NON_RECOVERABLE
+ AMDGPU_RAS_EEPROM_ERR_NON_RECOVERABLE,
+ AMDGPU_RAS_EEPROM_ERR_COUNT,
};
struct amdgpu_ras_eeprom_table_header {
@@ -40,15 +50,73 @@ struct amdgpu_ras_eeprom_table_header {
uint32_t first_rec_offset;
uint32_t tbl_size;
uint32_t checksum;
-}__attribute__((__packed__));
+} __packed;
+
+struct amdgpu_ras_eeprom_table_ras_info {
+ u8 rma_status;
+ u8 health_percent;
+ u16 ecc_page_threshold;
+ u32 padding[64 - 1];
+} __packed;
struct amdgpu_ras_eeprom_control {
struct amdgpu_ras_eeprom_table_header tbl_hdr;
- uint32_t next_addr;
- unsigned int num_recs;
- struct mutex tbl_mutex;
- uint32_t tbl_byte_sum;
- uint16_t i2c_address; // 8-bit represented address
+
+ struct amdgpu_ras_eeprom_table_ras_info tbl_rai;
+
+ /* Base I2C EEPPROM 19-bit memory address,
+ * where the table is located. For more information,
+ * see top of amdgpu_eeprom.c.
+ */
+ u32 i2c_address;
+
+ /* The byte offset off of @i2c_address
+ * where the table header is found,
+ * and where the records start--always
+ * right after the header.
+ */
+ u32 ras_header_offset;
+ u32 ras_info_offset;
+ u32 ras_record_offset;
+
+ /* Number of records in the table.
+ */
+ u32 ras_num_recs;
+ u32 ras_num_recs_old;
+
+ /* the bad page number is ras_num_recs or
+ * ras_num_recs * umc.retire_unit
+ */
+ u32 ras_num_bad_pages;
+
+ /* Number of records store mca address */
+ u32 ras_num_mca_recs;
+
+ /* Number of records store physical address */
+ u32 ras_num_pa_recs;
+
+ /* First record index to read, 0-based.
+ * Range is [0, num_recs-1]. This is
+ * an absolute index, starting right after
+ * the table header.
+ */
+ u32 ras_fri;
+
+ /* Maximum possible number of records
+ * we could store, i.e. the maximum capacity
+ * of the table.
+ */
+ u32 ras_max_record_count;
+
+ /* Protect table access via this mutex.
+ */
+ struct mutex ras_tbl_mutex;
+
+ /* Record channel info which occurred bad pages
+ */
+ u32 bad_channel_bitmap;
+
+ bool is_eeprom_valid;
};
/*
@@ -74,23 +142,58 @@ struct eeprom_table_record {
unsigned char mem_channel;
unsigned char mcumc_id;
-}__attribute__((__packed__));
+} __packed;
+
+int amdgpu_ras_eeprom_init(struct amdgpu_ras_eeprom_control *control);
-int amdgpu_ras_eeprom_init(struct amdgpu_ras_eeprom_control *control,
- bool *exceed_err_limit);
int amdgpu_ras_eeprom_reset_table(struct amdgpu_ras_eeprom_control *control);
-int amdgpu_ras_eeprom_check_err_threshold(
- struct amdgpu_ras_eeprom_control *control,
- bool *exceed_err_limit);
+bool amdgpu_ras_eeprom_check_err_threshold(struct amdgpu_device *adev);
+
+int amdgpu_ras_eeprom_read(struct amdgpu_ras_eeprom_control *control,
+ struct eeprom_table_record *records, const u32 num);
+
+int amdgpu_ras_eeprom_append(struct amdgpu_ras_eeprom_control *control,
+ struct eeprom_table_record *records, const u32 num);
+
+uint32_t amdgpu_ras_eeprom_max_record_count(struct amdgpu_ras_eeprom_control *control);
+
+void amdgpu_ras_debugfs_set_ret_size(struct amdgpu_ras_eeprom_control *control);
+
+int amdgpu_ras_eeprom_check(struct amdgpu_ras_eeprom_control *control);
+
+void amdgpu_ras_eeprom_check_and_recover(struct amdgpu_device *adev);
+
+bool amdgpu_ras_smu_eeprom_supported(struct amdgpu_device *adev);
+
+int amdgpu_ras_smu_get_table_version(struct amdgpu_device *adev,
+ uint32_t *table_version);
+
+int amdgpu_ras_smu_get_badpage_count(struct amdgpu_device *adev,
+ uint32_t *count, uint32_t timeout);
+
+int amdgpu_ras_smu_get_badpage_mca_addr(struct amdgpu_device *adev,
+ uint16_t index, uint64_t *mca_addr);
+
+int amdgpu_ras_smu_set_timestamp(struct amdgpu_device *adev,
+ uint64_t timestamp);
+
+int amdgpu_ras_smu_get_timestamp(struct amdgpu_device *adev,
+ uint16_t index, uint64_t *timestamp);
+
+int amdgpu_ras_smu_get_badpage_ipid(struct amdgpu_device *adev,
+ uint16_t index, uint64_t *ipid);
+
+int amdgpu_ras_smu_erase_ras_table(struct amdgpu_device *adev,
+ uint32_t *result);
-int amdgpu_ras_eeprom_process_recods(struct amdgpu_ras_eeprom_control *control,
- struct eeprom_table_record *records,
- bool write,
- int num);
+int amdgpu_ras_eeprom_read_idx(struct amdgpu_ras_eeprom_control *control,
+ struct eeprom_table_record *record, u32 rec_idx,
+ const u32 num);
-inline uint32_t amdgpu_ras_eeprom_get_record_max_length(void);
+int amdgpu_ras_eeprom_update_record_num(struct amdgpu_ras_eeprom_control *control);
-void amdgpu_ras_eeprom_test(struct amdgpu_ras_eeprom_control *control);
+extern const struct file_operations amdgpu_ras_debugfs_eeprom_size_ops;
+extern const struct file_operations amdgpu_ras_debugfs_eeprom_table_ops;
#endif // _AMDGPU_RAS_EEPROM_H
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_res_cursor.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_res_cursor.h
new file mode 100644
index 000000000000..be2e56ce1355
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_res_cursor.h
@@ -0,0 +1,194 @@
+// SPDX-License-Identifier: GPL-2.0 OR MIT
+/*
+ * Copyright 2020 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: Christian König
+ */
+
+#ifndef __AMDGPU_RES_CURSOR_H__
+#define __AMDGPU_RES_CURSOR_H__
+
+#include <drm/drm_mm.h>
+#include <drm/ttm/ttm_resource.h>
+#include <drm/ttm/ttm_range_manager.h>
+
+#include "amdgpu_vram_mgr.h"
+
+/* state back for walking over vram_mgr and gtt_mgr allocations */
+struct amdgpu_res_cursor {
+ uint64_t start;
+ uint64_t size;
+ uint64_t remaining;
+ void *node;
+ uint32_t mem_type;
+};
+
+/**
+ * amdgpu_res_first - initialize a amdgpu_res_cursor
+ *
+ * @res: TTM resource object to walk
+ * @start: Start of the range
+ * @size: Size of the range
+ * @cur: cursor object to initialize
+ *
+ * Start walking over the range of allocations between @start and @size.
+ */
+static inline void amdgpu_res_first(struct ttm_resource *res,
+ uint64_t start, uint64_t size,
+ struct amdgpu_res_cursor *cur)
+{
+ struct drm_buddy_block *block;
+ struct list_head *head, *next;
+ struct drm_mm_node *node;
+
+ if (!res)
+ goto fallback;
+
+ BUG_ON(start + size > res->size);
+
+ cur->mem_type = res->mem_type;
+
+ switch (cur->mem_type) {
+ case TTM_PL_VRAM:
+ head = &to_amdgpu_vram_mgr_resource(res)->blocks;
+
+ block = list_first_entry_or_null(head,
+ struct drm_buddy_block,
+ link);
+ if (!block)
+ goto fallback;
+
+ while (start >= amdgpu_vram_mgr_block_size(block)) {
+ start -= amdgpu_vram_mgr_block_size(block);
+
+ next = block->link.next;
+ if (next != head)
+ block = list_entry(next, struct drm_buddy_block, link);
+ }
+
+ cur->start = amdgpu_vram_mgr_block_start(block) + start;
+ cur->size = min(amdgpu_vram_mgr_block_size(block) - start, size);
+ cur->remaining = size;
+ cur->node = block;
+ break;
+ case TTM_PL_TT:
+ case AMDGPU_PL_DOORBELL:
+ case AMDGPU_PL_MMIO_REMAP:
+ node = to_ttm_range_mgr_node(res)->mm_nodes;
+ while (start >= node->size << PAGE_SHIFT)
+ start -= node++->size << PAGE_SHIFT;
+
+ cur->start = (node->start << PAGE_SHIFT) + start;
+ cur->size = min((node->size << PAGE_SHIFT) - start, size);
+ cur->remaining = size;
+ cur->node = node;
+ break;
+ default:
+ goto fallback;
+ }
+
+ return;
+
+fallback:
+ cur->start = start;
+ cur->size = size;
+ cur->remaining = size;
+ cur->node = NULL;
+ WARN_ON(res && start + size > res->size);
+}
+
+/**
+ * amdgpu_res_next - advance the cursor
+ *
+ * @cur: the cursor to advance
+ * @size: number of bytes to move forward
+ *
+ * Move the cursor @size bytes forwrad, walking to the next node if necessary.
+ */
+static inline void amdgpu_res_next(struct amdgpu_res_cursor *cur, uint64_t size)
+{
+ struct drm_buddy_block *block;
+ struct drm_mm_node *node;
+ struct list_head *next;
+
+ BUG_ON(size > cur->remaining);
+
+ cur->remaining -= size;
+ if (!cur->remaining)
+ return;
+
+ cur->size -= size;
+ if (cur->size) {
+ cur->start += size;
+ return;
+ }
+
+ switch (cur->mem_type) {
+ case TTM_PL_VRAM:
+ block = cur->node;
+
+ next = block->link.next;
+ block = list_entry(next, struct drm_buddy_block, link);
+
+ cur->node = block;
+ cur->start = amdgpu_vram_mgr_block_start(block);
+ cur->size = min(amdgpu_vram_mgr_block_size(block), cur->remaining);
+ break;
+ case TTM_PL_TT:
+ case AMDGPU_PL_DOORBELL:
+ case AMDGPU_PL_MMIO_REMAP:
+ node = cur->node;
+
+ cur->node = ++node;
+ cur->start = node->start << PAGE_SHIFT;
+ cur->size = min(node->size << PAGE_SHIFT, cur->remaining);
+ break;
+ default:
+ return;
+ }
+}
+
+/**
+ * amdgpu_res_cleared - check if blocks are cleared
+ *
+ * @cur: the cursor to extract the block
+ *
+ * Check if the @cur block is cleared
+ */
+static inline bool amdgpu_res_cleared(struct amdgpu_res_cursor *cur)
+{
+ struct drm_buddy_block *block;
+
+ switch (cur->mem_type) {
+ case TTM_PL_VRAM:
+ block = cur->node;
+
+ if (!amdgpu_vram_mgr_is_cleared(block))
+ return false;
+ break;
+ default:
+ return false;
+ }
+
+ return true;
+}
+
+#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_reset.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_reset.c
new file mode 100644
index 000000000000..28c4ad62f50e
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_reset.c
@@ -0,0 +1,354 @@
+/*
+ * Copyright 2021 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#include "amdgpu_reset.h"
+#include "aldebaran.h"
+#include "sienna_cichlid.h"
+#include "smu_v13_0_10.h"
+
+static int amdgpu_reset_xgmi_reset_on_init_suspend(struct amdgpu_device *adev)
+{
+ int i;
+
+ for (i = adev->num_ip_blocks - 1; i >= 0; i--) {
+ if (!adev->ip_blocks[i].status.valid)
+ continue;
+ if (!adev->ip_blocks[i].status.hw)
+ continue;
+ /* displays are handled in phase1 */
+ if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_DCE)
+ continue;
+
+ /* XXX handle errors */
+ amdgpu_ip_block_suspend(&adev->ip_blocks[i]);
+ adev->ip_blocks[i].status.hw = false;
+ }
+
+ /* VCN FW shared region is in frambuffer, there are some flags
+ * initialized in that region during sw_init. Make sure the region is
+ * backed up.
+ */
+ amdgpu_vcn_save_vcpu_bo(adev);
+
+ return 0;
+}
+
+static int amdgpu_reset_xgmi_reset_on_init_prep_hwctxt(
+ struct amdgpu_reset_control *reset_ctl,
+ struct amdgpu_reset_context *reset_context)
+{
+ struct list_head *reset_device_list = reset_context->reset_device_list;
+ struct amdgpu_device *tmp_adev;
+ int r;
+
+ list_for_each_entry(tmp_adev, reset_device_list, reset_list) {
+ amdgpu_unregister_gpu_instance(tmp_adev);
+ r = amdgpu_reset_xgmi_reset_on_init_suspend(tmp_adev);
+ if (r) {
+ dev_err(tmp_adev->dev,
+ "xgmi reset on init: prepare for reset failed");
+ return r;
+ }
+ }
+
+ return r;
+}
+
+static int amdgpu_reset_xgmi_reset_on_init_restore_hwctxt(
+ struct amdgpu_reset_control *reset_ctl,
+ struct amdgpu_reset_context *reset_context)
+{
+ struct list_head *reset_device_list = reset_context->reset_device_list;
+ struct amdgpu_device *tmp_adev = NULL;
+ int r;
+
+ r = amdgpu_device_reinit_after_reset(reset_context);
+ if (r)
+ return r;
+ list_for_each_entry(tmp_adev, reset_device_list, reset_list) {
+ if (!tmp_adev->kfd.init_complete) {
+ kgd2kfd_init_zone_device(tmp_adev);
+ amdgpu_amdkfd_device_init(tmp_adev);
+ amdgpu_amdkfd_drm_client_create(tmp_adev);
+ }
+ }
+
+ return r;
+}
+
+static int amdgpu_reset_xgmi_reset_on_init_perform_reset(
+ struct amdgpu_reset_control *reset_ctl,
+ struct amdgpu_reset_context *reset_context)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)reset_ctl->handle;
+ struct list_head *reset_device_list = reset_context->reset_device_list;
+ struct amdgpu_device *tmp_adev = NULL;
+ int r;
+
+ dev_dbg(adev->dev, "xgmi roi - hw reset\n");
+
+ list_for_each_entry(tmp_adev, reset_device_list, reset_list) {
+ mutex_lock(&tmp_adev->reset_cntl->reset_lock);
+ tmp_adev->reset_cntl->active_reset =
+ amdgpu_asic_reset_method(adev);
+ }
+ r = 0;
+ /* Mode1 reset needs to be triggered on all devices together */
+ list_for_each_entry(tmp_adev, reset_device_list, reset_list) {
+ /* For XGMI run all resets in parallel to speed up the process */
+ if (!queue_work(system_unbound_wq, &tmp_adev->xgmi_reset_work))
+ r = -EALREADY;
+ if (r) {
+ dev_err(tmp_adev->dev,
+ "xgmi reset on init: reset failed with error, %d",
+ r);
+ break;
+ }
+ }
+
+ /* For XGMI wait for all resets to complete before proceed */
+ if (!r) {
+ list_for_each_entry(tmp_adev, reset_device_list, reset_list) {
+ flush_work(&tmp_adev->xgmi_reset_work);
+ r = tmp_adev->asic_reset_res;
+ if (r)
+ break;
+ }
+ }
+
+ list_for_each_entry(tmp_adev, reset_device_list, reset_list) {
+ mutex_unlock(&tmp_adev->reset_cntl->reset_lock);
+ tmp_adev->reset_cntl->active_reset = AMD_RESET_METHOD_NONE;
+ }
+
+ return r;
+}
+
+int amdgpu_reset_do_xgmi_reset_on_init(
+ struct amdgpu_reset_context *reset_context)
+{
+ struct list_head *reset_device_list = reset_context->reset_device_list;
+ struct amdgpu_device *adev;
+ int r;
+
+ if (!reset_device_list || list_empty(reset_device_list) ||
+ list_is_singular(reset_device_list))
+ return -EINVAL;
+
+ adev = list_first_entry(reset_device_list, struct amdgpu_device,
+ reset_list);
+ r = amdgpu_reset_prepare_hwcontext(adev, reset_context);
+ if (r)
+ return r;
+
+ r = amdgpu_reset_perform_reset(adev, reset_context);
+
+ return r;
+}
+
+struct amdgpu_reset_handler xgmi_reset_on_init_handler = {
+ .reset_method = AMD_RESET_METHOD_ON_INIT,
+ .prepare_env = NULL,
+ .prepare_hwcontext = amdgpu_reset_xgmi_reset_on_init_prep_hwctxt,
+ .perform_reset = amdgpu_reset_xgmi_reset_on_init_perform_reset,
+ .restore_hwcontext = amdgpu_reset_xgmi_reset_on_init_restore_hwctxt,
+ .restore_env = NULL,
+ .do_reset = NULL,
+};
+
+int amdgpu_reset_init(struct amdgpu_device *adev)
+{
+ int ret = 0;
+
+ switch (amdgpu_ip_version(adev, MP1_HWIP, 0)) {
+ case IP_VERSION(13, 0, 2):
+ case IP_VERSION(13, 0, 6):
+ case IP_VERSION(13, 0, 12):
+ case IP_VERSION(13, 0, 14):
+ ret = aldebaran_reset_init(adev);
+ break;
+ case IP_VERSION(11, 0, 7):
+ ret = sienna_cichlid_reset_init(adev);
+ break;
+ case IP_VERSION(13, 0, 10):
+ ret = smu_v13_0_10_reset_init(adev);
+ break;
+ default:
+ break;
+ }
+
+ return ret;
+}
+
+int amdgpu_reset_fini(struct amdgpu_device *adev)
+{
+ int ret = 0;
+
+ switch (amdgpu_ip_version(adev, MP1_HWIP, 0)) {
+ case IP_VERSION(13, 0, 2):
+ case IP_VERSION(13, 0, 6):
+ case IP_VERSION(13, 0, 12):
+ case IP_VERSION(13, 0, 14):
+ ret = aldebaran_reset_fini(adev);
+ break;
+ case IP_VERSION(11, 0, 7):
+ ret = sienna_cichlid_reset_fini(adev);
+ break;
+ case IP_VERSION(13, 0, 10):
+ ret = smu_v13_0_10_reset_fini(adev);
+ break;
+ default:
+ break;
+ }
+
+ return ret;
+}
+
+int amdgpu_reset_prepare_hwcontext(struct amdgpu_device *adev,
+ struct amdgpu_reset_context *reset_context)
+{
+ struct amdgpu_reset_handler *reset_handler = NULL;
+
+ if (adev->reset_cntl && adev->reset_cntl->get_reset_handler)
+ reset_handler = adev->reset_cntl->get_reset_handler(
+ adev->reset_cntl, reset_context);
+ if (!reset_handler)
+ return -EOPNOTSUPP;
+
+ return reset_handler->prepare_hwcontext(adev->reset_cntl,
+ reset_context);
+}
+
+int amdgpu_reset_perform_reset(struct amdgpu_device *adev,
+ struct amdgpu_reset_context *reset_context)
+{
+ int ret;
+ struct amdgpu_reset_handler *reset_handler = NULL;
+
+ if (adev->reset_cntl)
+ reset_handler = adev->reset_cntl->get_reset_handler(
+ adev->reset_cntl, reset_context);
+ if (!reset_handler)
+ return -EOPNOTSUPP;
+
+ ret = reset_handler->perform_reset(adev->reset_cntl, reset_context);
+ if (ret)
+ return ret;
+
+ return reset_handler->restore_hwcontext(adev->reset_cntl,
+ reset_context);
+}
+
+
+void amdgpu_reset_destroy_reset_domain(struct kref *ref)
+{
+ struct amdgpu_reset_domain *reset_domain = container_of(ref,
+ struct amdgpu_reset_domain,
+ refcount);
+ if (reset_domain->wq)
+ destroy_workqueue(reset_domain->wq);
+
+ kvfree(reset_domain);
+}
+
+struct amdgpu_reset_domain *amdgpu_reset_create_reset_domain(enum amdgpu_reset_domain_type type,
+ char *wq_name)
+{
+ struct amdgpu_reset_domain *reset_domain;
+
+ reset_domain = kvzalloc(sizeof(struct amdgpu_reset_domain), GFP_KERNEL);
+ if (!reset_domain) {
+ DRM_ERROR("Failed to allocate amdgpu_reset_domain!");
+ return NULL;
+ }
+
+ reset_domain->type = type;
+ kref_init(&reset_domain->refcount);
+
+ reset_domain->wq = create_singlethread_workqueue(wq_name);
+ if (!reset_domain->wq) {
+ DRM_ERROR("Failed to allocate wq for amdgpu_reset_domain!");
+ amdgpu_reset_put_reset_domain(reset_domain);
+ return NULL;
+
+ }
+
+ atomic_set(&reset_domain->in_gpu_reset, 0);
+ atomic_set(&reset_domain->reset_res, 0);
+ init_rwsem(&reset_domain->sem);
+
+ return reset_domain;
+}
+
+void amdgpu_device_lock_reset_domain(struct amdgpu_reset_domain *reset_domain)
+{
+ atomic_set(&reset_domain->in_gpu_reset, 1);
+ down_write(&reset_domain->sem);
+}
+
+
+void amdgpu_device_unlock_reset_domain(struct amdgpu_reset_domain *reset_domain)
+{
+ atomic_set(&reset_domain->in_gpu_reset, 0);
+ up_write(&reset_domain->sem);
+}
+
+void amdgpu_reset_get_desc(struct amdgpu_reset_context *rst_ctxt, char *buf,
+ size_t len)
+{
+ if (!buf || !len)
+ return;
+
+ switch (rst_ctxt->src) {
+ case AMDGPU_RESET_SRC_JOB:
+ if (rst_ctxt->job) {
+ snprintf(buf, len, "job hang on ring:%s",
+ rst_ctxt->job->base.sched->name);
+ } else {
+ strscpy(buf, "job hang", len);
+ }
+ break;
+ case AMDGPU_RESET_SRC_RAS:
+ strscpy(buf, "RAS error", len);
+ break;
+ case AMDGPU_RESET_SRC_MES:
+ strscpy(buf, "MES hang", len);
+ break;
+ case AMDGPU_RESET_SRC_HWS:
+ strscpy(buf, "HWS hang", len);
+ break;
+ case AMDGPU_RESET_SRC_USER:
+ strscpy(buf, "user trigger", len);
+ break;
+ case AMDGPU_RESET_SRC_USERQ:
+ strscpy(buf, "user queue trigger", len);
+ break;
+ default:
+ strscpy(buf, "unknown", len);
+ }
+}
+
+bool amdgpu_reset_in_recovery(struct amdgpu_device *adev)
+{
+ return (adev->init_lvl->level == AMDGPU_INIT_LEVEL_RESET_RECOVERY);
+}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_reset.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_reset.h
new file mode 100644
index 000000000000..07b4d37f1db6
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_reset.h
@@ -0,0 +1,176 @@
+/*
+ * Copyright 2021 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#ifndef __AMDGPU_RESET_H__
+#define __AMDGPU_RESET_H__
+
+#include "amdgpu.h"
+
+#define AMDGPU_RESET_MAX_HANDLERS 5
+
+enum AMDGPU_RESET_FLAGS {
+
+ AMDGPU_NEED_FULL_RESET = 0,
+ AMDGPU_SKIP_HW_RESET = 1,
+ AMDGPU_SKIP_COREDUMP = 2,
+ AMDGPU_HOST_FLR = 3,
+};
+
+enum AMDGPU_RESET_SRCS {
+ AMDGPU_RESET_SRC_UNKNOWN,
+ AMDGPU_RESET_SRC_JOB,
+ AMDGPU_RESET_SRC_RAS,
+ AMDGPU_RESET_SRC_MES,
+ AMDGPU_RESET_SRC_HWS,
+ AMDGPU_RESET_SRC_USER,
+ AMDGPU_RESET_SRC_USERQ,
+};
+
+struct amdgpu_reset_context {
+ enum amd_reset_method method;
+ struct amdgpu_device *reset_req_dev;
+ struct amdgpu_job *job;
+ struct amdgpu_hive_info *hive;
+ struct list_head *reset_device_list;
+ unsigned long flags;
+ enum AMDGPU_RESET_SRCS src;
+};
+
+struct amdgpu_reset_handler {
+ enum amd_reset_method reset_method;
+ int (*prepare_env)(struct amdgpu_reset_control *reset_ctl,
+ struct amdgpu_reset_context *context);
+ int (*prepare_hwcontext)(struct amdgpu_reset_control *reset_ctl,
+ struct amdgpu_reset_context *context);
+ int (*perform_reset)(struct amdgpu_reset_control *reset_ctl,
+ struct amdgpu_reset_context *context);
+ int (*restore_hwcontext)(struct amdgpu_reset_control *reset_ctl,
+ struct amdgpu_reset_context *context);
+ int (*restore_env)(struct amdgpu_reset_control *reset_ctl,
+ struct amdgpu_reset_context *context);
+
+ int (*do_reset)(struct amdgpu_device *adev);
+};
+
+struct amdgpu_reset_control {
+ void *handle;
+ struct work_struct reset_work;
+ struct mutex reset_lock;
+ struct amdgpu_reset_handler *(
+ *reset_handlers)[AMDGPU_RESET_MAX_HANDLERS];
+ atomic_t in_reset;
+ enum amd_reset_method active_reset;
+ struct amdgpu_reset_handler *(*get_reset_handler)(
+ struct amdgpu_reset_control *reset_ctl,
+ struct amdgpu_reset_context *context);
+ void (*async_reset)(struct work_struct *work);
+};
+
+
+enum amdgpu_reset_domain_type {
+ SINGLE_DEVICE,
+ XGMI_HIVE
+};
+
+struct amdgpu_reset_domain {
+ struct kref refcount;
+ struct workqueue_struct *wq;
+ enum amdgpu_reset_domain_type type;
+ struct rw_semaphore sem;
+ atomic_t in_gpu_reset;
+ atomic_t reset_res;
+};
+
+int amdgpu_reset_init(struct amdgpu_device *adev);
+int amdgpu_reset_fini(struct amdgpu_device *adev);
+
+int amdgpu_reset_prepare_hwcontext(struct amdgpu_device *adev,
+ struct amdgpu_reset_context *reset_context);
+
+int amdgpu_reset_perform_reset(struct amdgpu_device *adev,
+ struct amdgpu_reset_context *reset_context);
+
+int amdgpu_reset_prepare_env(struct amdgpu_device *adev,
+ struct amdgpu_reset_context *reset_context);
+int amdgpu_reset_restore_env(struct amdgpu_device *adev,
+ struct amdgpu_reset_context *reset_context);
+
+struct amdgpu_reset_domain *amdgpu_reset_create_reset_domain(enum amdgpu_reset_domain_type type,
+ char *wq_name);
+
+void amdgpu_reset_destroy_reset_domain(struct kref *ref);
+
+static inline bool amdgpu_reset_get_reset_domain(struct amdgpu_reset_domain *domain)
+{
+ return kref_get_unless_zero(&domain->refcount) != 0;
+}
+
+static inline void amdgpu_reset_put_reset_domain(struct amdgpu_reset_domain *domain)
+{
+ if (domain)
+ kref_put(&domain->refcount, amdgpu_reset_destroy_reset_domain);
+}
+
+static inline bool amdgpu_reset_domain_schedule(struct amdgpu_reset_domain *domain,
+ struct work_struct *work)
+{
+ return queue_work(domain->wq, work);
+}
+
+static inline bool amdgpu_reset_pending(struct amdgpu_reset_domain *domain)
+{
+ lockdep_assert_held(&domain->sem);
+ return rwsem_is_contended(&domain->sem);
+}
+
+void amdgpu_device_lock_reset_domain(struct amdgpu_reset_domain *reset_domain);
+
+void amdgpu_device_unlock_reset_domain(struct amdgpu_reset_domain *reset_domain);
+
+void amdgpu_reset_get_desc(struct amdgpu_reset_context *rst_ctxt, char *buf,
+ size_t len);
+
+#define for_each_handler(i, handler, reset_ctl) \
+ for (i = 0; (i < AMDGPU_RESET_MAX_HANDLERS) && \
+ (handler = (*reset_ctl->reset_handlers)[i]); \
+ ++i)
+
+extern struct amdgpu_reset_handler xgmi_reset_on_init_handler;
+int amdgpu_reset_do_xgmi_reset_on_init(
+ struct amdgpu_reset_context *reset_context);
+
+bool amdgpu_reset_in_recovery(struct amdgpu_device *adev);
+
+static inline void amdgpu_reset_set_dpc_status(struct amdgpu_device *adev,
+ bool status)
+{
+ adev->pcie_reset_ctx.occurs_dpc = status;
+ adev->no_hw_access = status;
+}
+
+static inline bool amdgpu_reset_in_dpc(struct amdgpu_device *adev)
+{
+ return adev->pcie_reset_ctx.occurs_dpc;
+}
+
+#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c
index b644c78475fd..c596b6df2e2d 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c
@@ -33,6 +33,7 @@
#include <drm/amdgpu_drm.h>
#include "amdgpu.h"
+#include "amdgpu_ras_mgr.h"
#include "atom.h"
/*
@@ -50,6 +51,26 @@
*/
/**
+ * amdgpu_ring_max_ibs - Return max IBs that fit in a single submission.
+ *
+ * @type: ring type for which to return the limit.
+ */
+unsigned int amdgpu_ring_max_ibs(enum amdgpu_ring_type type)
+{
+ switch (type) {
+ case AMDGPU_RING_TYPE_GFX:
+ /* Need to keep at least 192 on GFX7+ for old radv. */
+ return 192;
+ case AMDGPU_RING_TYPE_COMPUTE:
+ return 125;
+ case AMDGPU_RING_TYPE_VCN_JPEG:
+ return 16;
+ default:
+ return 49;
+ }
+}
+
+/**
* amdgpu_ring_alloc - allocate space on the ring buffer
*
* @ring: amdgpu_ring structure holding ring information
@@ -58,7 +79,7 @@
* Allocate @ndw dwords in the ring buffer (all asics).
* Returns 0 on success, error on failure.
*/
-int amdgpu_ring_alloc(struct amdgpu_ring *ring, unsigned ndw)
+int amdgpu_ring_alloc(struct amdgpu_ring *ring, unsigned int ndw)
{
/* Align requested size with padding so unlock_commit can
* pad safely */
@@ -79,6 +100,29 @@ int amdgpu_ring_alloc(struct amdgpu_ring *ring, unsigned ndw)
return 0;
}
+/**
+ * amdgpu_ring_alloc_reemit - allocate space on the ring buffer for reemit
+ *
+ * @ring: amdgpu_ring structure holding ring information
+ * @ndw: number of dwords to allocate in the ring buffer
+ *
+ * Allocate @ndw dwords in the ring buffer (all asics).
+ * doesn't check the max_dw limit as we may be reemitting
+ * several submissions.
+ */
+static void amdgpu_ring_alloc_reemit(struct amdgpu_ring *ring, unsigned int ndw)
+{
+ /* Align requested size with padding so unlock_commit can
+ * pad safely */
+ ndw = (ndw + ring->funcs->align_mask) & ~ring->funcs->align_mask;
+
+ ring->count_dw = ndw;
+ ring->wptr_old = ring->wptr;
+
+ if (ring->funcs->begin_use)
+ ring->funcs->begin_use(ring);
+}
+
/** amdgpu_ring_insert_nop - insert NOP packets
*
* @ring: amdgpu_ring structure holding ring information
@@ -88,10 +132,22 @@ int amdgpu_ring_alloc(struct amdgpu_ring *ring, unsigned ndw)
*/
void amdgpu_ring_insert_nop(struct amdgpu_ring *ring, uint32_t count)
{
- int i;
+ uint32_t occupied, chunk1, chunk2;
+
+ occupied = ring->wptr & ring->buf_mask;
+ chunk1 = ring->buf_mask + 1 - occupied;
+ chunk1 = (chunk1 >= count) ? count : chunk1;
+ chunk2 = count - chunk1;
+
+ if (chunk1)
+ memset32(&ring->ring[occupied], ring->funcs->nop, chunk1);
- for (i = 0; i < count; i++)
- amdgpu_ring_write(ring, ring->funcs->nop);
+ if (chunk2)
+ memset32(ring->ring, ring->funcs->nop, chunk2);
+
+ ring->wptr += count;
+ ring->wptr &= ring->ptr_mask;
+ ring->count_dw -= count;
}
/**
@@ -104,8 +160,16 @@ void amdgpu_ring_insert_nop(struct amdgpu_ring *ring, uint32_t count)
*/
void amdgpu_ring_generic_pad_ib(struct amdgpu_ring *ring, struct amdgpu_ib *ib)
{
- while (ib->length_dw & ring->funcs->align_mask)
- ib->ptr[ib->length_dw++] = ring->funcs->nop;
+ u32 align_mask = ring->funcs->align_mask;
+ u32 count = ib->length_dw & align_mask;
+
+ if (count) {
+ count = align_mask + 1 - count;
+
+ memset32(&ib->ptr[ib->length_dw], ring->funcs->nop, count);
+
+ ib->length_dw += count;
+ }
}
/**
@@ -121,11 +185,16 @@ void amdgpu_ring_commit(struct amdgpu_ring *ring)
{
uint32_t count;
+ if (ring->count_dw < 0)
+ DRM_ERROR("amdgpu: writing more dwords to the ring than expected!\n");
+
/* We pad to match fetch size */
count = ring->funcs->align_mask + 1 -
(ring->wptr & ring->funcs->align_mask);
- count %= ring->funcs->align_mask + 1;
- ring->funcs->insert_nop(ring, count);
+ count &= ring->funcs->align_mask;
+
+ if (count != 0)
+ ring->funcs->insert_nop(ring, count);
mb();
amdgpu_ring_set_wptr(ring);
@@ -149,6 +218,12 @@ void amdgpu_ring_undo(struct amdgpu_ring *ring)
ring->funcs->end_use(ring);
}
+#define amdgpu_ring_get_gpu_addr(ring, offset) \
+ (ring->adev->wb.gpu_addr + offset * 4)
+
+#define amdgpu_ring_get_cpu_addr(ring, offset) \
+ (&ring->adev->wb.wb[offset])
+
/**
* amdgpu_ring_init - init driver ring struct.
*
@@ -158,18 +233,21 @@ void amdgpu_ring_undo(struct amdgpu_ring *ring)
* @irq_src: interrupt source to use for this ring
* @irq_type: interrupt type to use for this ring
* @hw_prio: ring priority (NORMAL/HIGH)
+ * @sched_score: optional score atomic shared with other schedulers
*
* Initialize the driver information for the selected ring (all asics).
* Returns 0 on success, error on failure.
*/
int amdgpu_ring_init(struct amdgpu_device *adev, struct amdgpu_ring *ring,
unsigned int max_dw, struct amdgpu_irq_src *irq_src,
- unsigned int irq_type, unsigned int hw_prio)
+ unsigned int irq_type, unsigned int hw_prio,
+ atomic_t *sched_score)
{
int r;
int sched_hw_submission = amdgpu_sched_hw_submission;
u32 *num_sched;
u32 hw_ip;
+ unsigned int max_ibs_dw;
/* Set the hw submission limit higher for KIQ because
* it's used for a number of gfx/compute tasks by both
@@ -179,6 +257,8 @@ int amdgpu_ring_init(struct amdgpu_device *adev, struct amdgpu_ring *ring,
*/
if (ring->funcs->type == AMDGPU_RING_TYPE_KIQ)
sched_hw_submission = max(sched_hw_submission, 256);
+ if (ring->funcs->type == AMDGPU_RING_TYPE_MES)
+ sched_hw_submission = 8;
else if (ring == &adev->sdma.instance[0].page)
sched_hw_submission = 256;
@@ -187,9 +267,14 @@ int amdgpu_ring_init(struct amdgpu_device *adev, struct amdgpu_ring *ring,
return -EINVAL;
ring->adev = adev;
+ ring->num_hw_submission = sched_hw_submission;
+ ring->sched_score = sched_score;
+ ring->vmid_wait = dma_fence_get_stub();
+
ring->idx = adev->num_rings++;
adev->rings[ring->idx] = ring;
- r = amdgpu_fence_driver_init_ring(ring, sched_hw_submission);
+
+ r = amdgpu_fence_driver_init_ring(ring);
if (r)
return r;
}
@@ -214,44 +299,89 @@ int amdgpu_ring_init(struct amdgpu_device *adev, struct amdgpu_ring *ring,
r = amdgpu_device_wb_get(adev, &ring->trail_fence_offs);
if (r) {
- dev_err(adev->dev,
- "(%d) ring trail_fence_offs wb alloc failed\n", r);
+ dev_err(adev->dev, "(%d) ring trail_fence_offs wb alloc failed\n", r);
return r;
}
- ring->trail_fence_gpu_addr =
- adev->wb.gpu_addr + (ring->trail_fence_offs * 4);
- ring->trail_fence_cpu_addr = &adev->wb.wb[ring->trail_fence_offs];
r = amdgpu_device_wb_get(adev, &ring->cond_exe_offs);
if (r) {
dev_err(adev->dev, "(%d) ring cond_exec_polling wb alloc failed\n", r);
return r;
}
- ring->cond_exe_gpu_addr = adev->wb.gpu_addr + (ring->cond_exe_offs * 4);
- ring->cond_exe_cpu_addr = &adev->wb.wb[ring->cond_exe_offs];
+
+ ring->fence_gpu_addr =
+ amdgpu_ring_get_gpu_addr(ring, ring->fence_offs);
+ ring->fence_cpu_addr =
+ amdgpu_ring_get_cpu_addr(ring, ring->fence_offs);
+
+ ring->rptr_gpu_addr =
+ amdgpu_ring_get_gpu_addr(ring, ring->rptr_offs);
+ ring->rptr_cpu_addr =
+ amdgpu_ring_get_cpu_addr(ring, ring->rptr_offs);
+
+ ring->wptr_gpu_addr =
+ amdgpu_ring_get_gpu_addr(ring, ring->wptr_offs);
+ ring->wptr_cpu_addr =
+ amdgpu_ring_get_cpu_addr(ring, ring->wptr_offs);
+
+ ring->trail_fence_gpu_addr =
+ amdgpu_ring_get_gpu_addr(ring, ring->trail_fence_offs);
+ ring->trail_fence_cpu_addr =
+ amdgpu_ring_get_cpu_addr(ring, ring->trail_fence_offs);
+
+ ring->cond_exe_gpu_addr =
+ amdgpu_ring_get_gpu_addr(ring, ring->cond_exe_offs);
+ ring->cond_exe_cpu_addr =
+ amdgpu_ring_get_cpu_addr(ring, ring->cond_exe_offs);
+
/* always set cond_exec_polling to CONTINUE */
*ring->cond_exe_cpu_addr = 1;
- r = amdgpu_fence_driver_start_ring(ring, irq_src, irq_type);
- if (r) {
- dev_err(adev->dev, "failed initializing fences (%d).\n", r);
- return r;
- }
+ if (ring->funcs->type != AMDGPU_RING_TYPE_CPER) {
+ r = amdgpu_fence_driver_start_ring(ring, irq_src, irq_type);
+ if (r) {
+ dev_err(adev->dev, "failed initializing fences (%d).\n", r);
+ return r;
+ }
+
+ max_ibs_dw = ring->funcs->emit_frame_size +
+ amdgpu_ring_max_ibs(ring->funcs->type) * ring->funcs->emit_ib_size;
+ max_ibs_dw = (max_ibs_dw + ring->funcs->align_mask) & ~ring->funcs->align_mask;
+
+ if (WARN_ON(max_ibs_dw > max_dw))
+ max_dw = max_ibs_dw;
- ring->ring_size = roundup_pow_of_two(max_dw * 4 * sched_hw_submission);
+ ring->ring_size = roundup_pow_of_two(max_dw * 4 * sched_hw_submission);
+ } else {
+ ring->ring_size = roundup_pow_of_two(max_dw * 4);
+ ring->count_dw = (ring->ring_size - 4) >> 2;
+ /* ring buffer is empty now */
+ ring->wptr = *ring->rptr_cpu_addr = 0;
+ }
ring->buf_mask = (ring->ring_size / 4) - 1;
ring->ptr_mask = ring->funcs->support_64bit_ptrs ?
0xffffffffffffffff : ring->buf_mask;
+ /* Initialize cached_rptr to 0 */
+ ring->cached_rptr = 0;
+
+ if (!ring->ring_backup) {
+ ring->ring_backup = kvzalloc(ring->ring_size, GFP_KERNEL);
+ if (!ring->ring_backup)
+ return -ENOMEM;
+ }
+
/* Allocate ring buffer */
if (ring->ring_obj == NULL) {
- r = amdgpu_bo_create_kernel(adev, ring->ring_size + ring->funcs->extra_dw, PAGE_SIZE,
+ r = amdgpu_bo_create_kernel(adev, ring->ring_size + ring->funcs->extra_bytes,
+ PAGE_SIZE,
AMDGPU_GEM_DOMAIN_GTT,
&ring->ring_obj,
&ring->gpu_addr,
(void **)&ring->ring);
if (r) {
dev_err(adev->dev, "(%d) ring create failed\n", r);
+ kvfree(ring->ring_backup);
return r;
}
amdgpu_ring_clear_ring(ring);
@@ -260,7 +390,7 @@ int amdgpu_ring_init(struct amdgpu_device *adev, struct amdgpu_ring *ring,
ring->max_dw = max_dw;
ring->hw_prio = hw_prio;
- if (!ring->no_scheduler) {
+ if (!ring->no_scheduler && ring->funcs->type < AMDGPU_HW_IP_NUM) {
hw_ip = ring->funcs->type;
num_sched = &adev->gpu_sched[hw_ip][hw_prio].num_scheds;
adev->gpu_sched[hw_ip][hw_prio].sched[(*num_sched)++] =
@@ -295,12 +425,12 @@ void amdgpu_ring_fini(struct amdgpu_ring *ring)
amdgpu_bo_free_kernel(&ring->ring_obj,
&ring->gpu_addr,
(void **)&ring->ring);
+ kvfree(ring->ring_backup);
+ ring->ring_backup = NULL;
dma_fence_put(ring->vmid_wait);
ring->vmid_wait = NULL;
ring->me = 0;
-
- ring->adev->rings[ring->idx] = NULL;
}
/**
@@ -335,17 +465,30 @@ void amdgpu_ring_emit_reg_write_reg_wait_helper(struct amdgpu_ring *ring,
bool amdgpu_ring_soft_recovery(struct amdgpu_ring *ring, unsigned int vmid,
struct dma_fence *fence)
{
- ktime_t deadline = ktime_add_us(ktime_get(), 10000);
+ unsigned long flags;
+ ktime_t deadline;
+ bool ret;
+
+ deadline = ktime_add_us(ktime_get(), 10000);
if (amdgpu_sriov_vf(ring->adev) || !ring->funcs->soft_recovery || !fence)
return false;
- atomic_inc(&ring->adev->gpu_reset_counter);
+ spin_lock_irqsave(fence->lock, flags);
+ if (!dma_fence_is_signaled_locked(fence))
+ dma_fence_set_error(fence, -ENODATA);
+ spin_unlock_irqrestore(fence->lock, flags);
+
while (!dma_fence_is_signaled(fence) &&
ktime_to_ns(ktime_sub(deadline, ktime_get())) > 0)
ring->funcs->soft_recovery(ring, vmid);
- return dma_fence_is_signaled(fence);
+ ret = dma_fence_is_signaled(fence);
+ /* increment the counter only if soft reset worked */
+ if (ret)
+ atomic_inc(&ring->adev->gpu_reset_counter);
+
+ return ret;
}
/*
@@ -353,6 +496,66 @@ bool amdgpu_ring_soft_recovery(struct amdgpu_ring *ring, unsigned int vmid,
*/
#if defined(CONFIG_DEBUG_FS)
+static ssize_t amdgpu_ras_cper_debugfs_read(struct file *f, char __user *buf,
+ size_t size, loff_t *offset)
+{
+ const uint8_t ring_header_size = 12;
+ struct amdgpu_ring *ring = file_inode(f)->i_private;
+ struct ras_cmd_cper_snapshot_req *snapshot_req __free(kfree) =
+ kzalloc(sizeof(struct ras_cmd_cper_snapshot_req), GFP_KERNEL);
+ struct ras_cmd_cper_snapshot_rsp *snapshot_rsp __free(kfree) =
+ kzalloc(sizeof(struct ras_cmd_cper_snapshot_rsp), GFP_KERNEL);
+ struct ras_cmd_cper_record_req *record_req __free(kfree) =
+ kzalloc(sizeof(struct ras_cmd_cper_record_req), GFP_KERNEL);
+ struct ras_cmd_cper_record_rsp *record_rsp __free(kfree) =
+ kzalloc(sizeof(struct ras_cmd_cper_record_rsp), GFP_KERNEL);
+ uint8_t *ring_header __free(kfree) =
+ kzalloc(ring_header_size, GFP_KERNEL);
+ uint32_t total_cper_num;
+ uint64_t start_cper_id;
+ int r;
+
+ if (!snapshot_req || !snapshot_rsp || !record_req || !record_rsp ||
+ !ring_header)
+ return -ENOMEM;
+
+ if (!(*offset)) {
+ /* Need at least 12 bytes for the header on the first read */
+ if (size < ring_header_size)
+ return -EINVAL;
+
+ if (copy_to_user(buf, ring_header, ring_header_size))
+ return -EFAULT;
+ buf += ring_header_size;
+ size -= ring_header_size;
+ }
+
+ r = amdgpu_ras_mgr_handle_ras_cmd(ring->adev,
+ RAS_CMD__GET_CPER_SNAPSHOT,
+ snapshot_req, sizeof(struct ras_cmd_cper_snapshot_req),
+ snapshot_rsp, sizeof(struct ras_cmd_cper_snapshot_rsp));
+ if (r || !snapshot_rsp->total_cper_num)
+ return r;
+
+ start_cper_id = snapshot_rsp->start_cper_id;
+ total_cper_num = snapshot_rsp->total_cper_num;
+
+ record_req->buf_ptr = (uint64_t)(uintptr_t)buf;
+ record_req->buf_size = size;
+ record_req->cper_start_id = start_cper_id + *offset;
+ record_req->cper_num = total_cper_num;
+ r = amdgpu_ras_mgr_handle_ras_cmd(ring->adev, RAS_CMD__GET_CPER_RECORD,
+ record_req, sizeof(struct ras_cmd_cper_record_req),
+ record_rsp, sizeof(struct ras_cmd_cper_record_rsp));
+ if (r)
+ return r;
+
+ r = *offset ? record_rsp->real_data_size : record_rsp->real_data_size + ring_header_size;
+ (*offset) += record_rsp->real_cper_num;
+
+ return r;
+}
+
/* Layout of file is 12 bytes consisting of
* - rptr
* - wptr
@@ -364,8 +567,13 @@ static ssize_t amdgpu_debugfs_ring_read(struct file *f, char __user *buf,
size_t size, loff_t *pos)
{
struct amdgpu_ring *ring = file_inode(f)->i_private;
- int r, i;
uint32_t value, result, early[3];
+ uint64_t p;
+ loff_t i;
+ int r;
+
+ if (ring->funcs->type == AMDGPU_RING_TYPE_CPER && amdgpu_uniras_enabled(ring->adev))
+ return amdgpu_ras_cper_debugfs_read(f, buf, size, pos);
if (*pos & 3 || size & 3)
return -EINVAL;
@@ -373,13 +581,18 @@ static ssize_t amdgpu_debugfs_ring_read(struct file *f, char __user *buf,
result = 0;
if (*pos < 12) {
+ if (ring->funcs->type == AMDGPU_RING_TYPE_CPER)
+ mutex_lock(&ring->adev->cper.ring_lock);
+
early[0] = amdgpu_ring_get_rptr(ring) & ring->buf_mask;
early[1] = amdgpu_ring_get_wptr(ring) & ring->buf_mask;
early[2] = ring->wptr & ring->buf_mask;
for (i = *pos / 4; i < 3 && size; i++) {
r = put_user(early[i], (uint32_t *)buf);
- if (r)
- return r;
+ if (r) {
+ result = r;
+ goto out;
+ }
buf += 4;
result += 4;
size -= 4;
@@ -387,51 +600,145 @@ static ssize_t amdgpu_debugfs_ring_read(struct file *f, char __user *buf,
}
}
- while (size) {
- if (*pos >= (ring->ring_size + 12))
- return result;
+ if (ring->funcs->type != AMDGPU_RING_TYPE_CPER) {
+ while (size) {
+ if (*pos >= (ring->ring_size + 12))
+ return result;
- value = ring->ring[(*pos - 12)/4];
- r = put_user(value, (uint32_t *)buf);
- if (r)
- return r;
- buf += 4;
- result += 4;
- size -= 4;
- *pos += 4;
+ value = ring->ring[(*pos - 12)/4];
+ r = put_user(value, (uint32_t *)buf);
+ if (r)
+ return r;
+ buf += 4;
+ result += 4;
+ size -= 4;
+ *pos += 4;
+ }
+ } else {
+ p = early[0];
+ if (early[0] <= early[1])
+ size = (early[1] - early[0]);
+ else
+ size = ring->ring_size - (early[0] - early[1]);
+
+ while (size) {
+ if (p == early[1])
+ goto out;
+
+ value = ring->ring[p];
+ r = put_user(value, (uint32_t *)buf);
+ if (r) {
+ result = r;
+ goto out;
+ }
+
+ buf += 4;
+ result += 4;
+ size--;
+ p++;
+ p &= ring->ptr_mask;
+ }
}
+out:
+ if (ring->funcs->type == AMDGPU_RING_TYPE_CPER)
+ mutex_unlock(&ring->adev->cper.ring_lock);
+
return result;
}
+static ssize_t amdgpu_debugfs_virt_ring_read(struct file *f, char __user *buf,
+ size_t size, loff_t *pos)
+{
+ struct amdgpu_ring *ring = file_inode(f)->i_private;
+
+ if (*pos & 3 || size & 3)
+ return -EINVAL;
+
+ if (ring->funcs->type == AMDGPU_RING_TYPE_CPER)
+ amdgpu_virt_req_ras_cper_dump(ring->adev, false);
+
+ return amdgpu_debugfs_ring_read(f, buf, size, pos);
+}
+
static const struct file_operations amdgpu_debugfs_ring_fops = {
.owner = THIS_MODULE,
.read = amdgpu_debugfs_ring_read,
.llseek = default_llseek
};
+static const struct file_operations amdgpu_debugfs_virt_ring_fops = {
+ .owner = THIS_MODULE,
+ .read = amdgpu_debugfs_virt_ring_read,
+ .llseek = default_llseek
+};
+
+static ssize_t amdgpu_debugfs_mqd_read(struct file *f, char __user *buf,
+ size_t size, loff_t *pos)
+{
+ struct amdgpu_ring *ring = file_inode(f)->i_private;
+ ssize_t bytes = min_t(ssize_t, ring->mqd_size - *pos, size);
+ void *from = ((u8 *)ring->mqd_ptr) + *pos;
+
+ if (*pos > ring->mqd_size)
+ return 0;
+
+ if (copy_to_user(buf, from, bytes))
+ return -EFAULT;
+
+ *pos += bytes;
+ return bytes;
+}
+
+static const struct file_operations amdgpu_debugfs_mqd_fops = {
+ .owner = THIS_MODULE,
+ .read = amdgpu_debugfs_mqd_read,
+ .llseek = default_llseek
+};
+
+static int amdgpu_debugfs_ring_error(void *data, u64 val)
+{
+ struct amdgpu_ring *ring = data;
+
+ amdgpu_fence_driver_set_error(ring, val);
+ return 0;
+}
+
+DEFINE_DEBUGFS_ATTRIBUTE_SIGNED(amdgpu_debugfs_error_fops, NULL,
+ amdgpu_debugfs_ring_error, "%lld\n");
+
#endif
-int amdgpu_debugfs_ring_init(struct amdgpu_device *adev,
- struct amdgpu_ring *ring)
+void amdgpu_debugfs_ring_init(struct amdgpu_device *adev,
+ struct amdgpu_ring *ring)
{
#if defined(CONFIG_DEBUG_FS)
struct drm_minor *minor = adev_to_drm(adev)->primary;
- struct dentry *ent, *root = minor->debugfs_root;
+ struct dentry *root = minor->debugfs_root;
char name[32];
sprintf(name, "amdgpu_ring_%s", ring->name);
+ if (amdgpu_sriov_vf(adev))
+ debugfs_create_file_size(name, S_IFREG | 0444, root, ring,
+ &amdgpu_debugfs_virt_ring_fops,
+ ring->ring_size + 12);
+ else
+ debugfs_create_file_size(name, S_IFREG | 0444, root, ring,
+ &amdgpu_debugfs_ring_fops,
+ ring->ring_size + 12);
+
+ if (ring->mqd_obj) {
+ sprintf(name, "amdgpu_mqd_%s", ring->name);
+ debugfs_create_file_size(name, S_IFREG | 0444, root, ring,
+ &amdgpu_debugfs_mqd_fops,
+ ring->mqd_size);
+ }
- ent = debugfs_create_file(name,
- S_IFREG | S_IRUGO, root,
- ring, &amdgpu_debugfs_ring_fops);
- if (!ent)
- return -ENOMEM;
+ sprintf(name, "amdgpu_error_%s", ring->name);
+ debugfs_create_file(name, 0200, root, ring,
+ &amdgpu_debugfs_error_fops);
- i_size_write(ent->d_inode, ring->ring_size + 12);
- ring->ent = ent;
#endif
- return 0;
}
/**
@@ -457,5 +764,164 @@ int amdgpu_ring_test_helper(struct amdgpu_ring *ring)
ring->name);
ring->sched.ready = !r;
+
return r;
}
+
+static void amdgpu_ring_to_mqd_prop(struct amdgpu_ring *ring,
+ struct amdgpu_mqd_prop *prop)
+{
+ struct amdgpu_device *adev = ring->adev;
+ bool is_high_prio_compute = ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE &&
+ amdgpu_gfx_is_high_priority_compute_queue(adev, ring);
+ bool is_high_prio_gfx = ring->funcs->type == AMDGPU_RING_TYPE_GFX &&
+ amdgpu_gfx_is_high_priority_graphics_queue(adev, ring);
+
+ memset(prop, 0, sizeof(*prop));
+
+ prop->mqd_gpu_addr = ring->mqd_gpu_addr;
+ prop->hqd_base_gpu_addr = ring->gpu_addr;
+ prop->rptr_gpu_addr = ring->rptr_gpu_addr;
+ prop->wptr_gpu_addr = ring->wptr_gpu_addr;
+ prop->queue_size = ring->ring_size;
+ prop->eop_gpu_addr = ring->eop_gpu_addr;
+ prop->use_doorbell = ring->use_doorbell;
+ prop->doorbell_index = ring->doorbell_index;
+ prop->kernel_queue = true;
+
+ /* map_queues packet doesn't need activate the queue,
+ * so only kiq need set this field.
+ */
+ prop->hqd_active = ring->funcs->type == AMDGPU_RING_TYPE_KIQ;
+
+ prop->allow_tunneling = is_high_prio_compute;
+ if (is_high_prio_compute || is_high_prio_gfx) {
+ prop->hqd_pipe_priority = AMDGPU_GFX_PIPE_PRIO_HIGH;
+ prop->hqd_queue_priority = AMDGPU_GFX_QUEUE_PRIORITY_MAXIMUM;
+ }
+}
+
+int amdgpu_ring_init_mqd(struct amdgpu_ring *ring)
+{
+ struct amdgpu_device *adev = ring->adev;
+ struct amdgpu_mqd *mqd_mgr;
+ struct amdgpu_mqd_prop prop;
+
+ amdgpu_ring_to_mqd_prop(ring, &prop);
+
+ ring->wptr = 0;
+
+ if (ring->funcs->type == AMDGPU_RING_TYPE_KIQ)
+ mqd_mgr = &adev->mqds[AMDGPU_HW_IP_COMPUTE];
+ else
+ mqd_mgr = &adev->mqds[ring->funcs->type];
+
+ return mqd_mgr->init_mqd(adev, ring->mqd_ptr, &prop);
+}
+
+void amdgpu_ring_ib_begin(struct amdgpu_ring *ring)
+{
+ if (ring->is_sw_ring)
+ amdgpu_sw_ring_ib_begin(ring);
+}
+
+void amdgpu_ring_ib_end(struct amdgpu_ring *ring)
+{
+ if (ring->is_sw_ring)
+ amdgpu_sw_ring_ib_end(ring);
+}
+
+void amdgpu_ring_ib_on_emit_cntl(struct amdgpu_ring *ring)
+{
+ if (ring->is_sw_ring)
+ amdgpu_sw_ring_ib_mark_offset(ring, AMDGPU_MUX_OFFSET_TYPE_CONTROL);
+}
+
+void amdgpu_ring_ib_on_emit_ce(struct amdgpu_ring *ring)
+{
+ if (ring->is_sw_ring)
+ amdgpu_sw_ring_ib_mark_offset(ring, AMDGPU_MUX_OFFSET_TYPE_CE);
+}
+
+void amdgpu_ring_ib_on_emit_de(struct amdgpu_ring *ring)
+{
+ if (ring->is_sw_ring)
+ amdgpu_sw_ring_ib_mark_offset(ring, AMDGPU_MUX_OFFSET_TYPE_DE);
+}
+
+bool amdgpu_ring_sched_ready(struct amdgpu_ring *ring)
+{
+ if (!ring)
+ return false;
+
+ if (ring->no_scheduler || !drm_sched_wqueue_ready(&ring->sched))
+ return false;
+
+ return true;
+}
+
+void amdgpu_ring_reset_helper_begin(struct amdgpu_ring *ring,
+ struct amdgpu_fence *guilty_fence)
+{
+ /* Stop the scheduler to prevent anybody else from touching the ring buffer. */
+ drm_sched_wqueue_stop(&ring->sched);
+ /* back up the non-guilty commands */
+ amdgpu_ring_backup_unprocessed_commands(ring, guilty_fence);
+}
+
+int amdgpu_ring_reset_helper_end(struct amdgpu_ring *ring,
+ struct amdgpu_fence *guilty_fence)
+{
+ unsigned int i;
+ int r;
+
+ /* verify that the ring is functional */
+ r = amdgpu_ring_test_ring(ring);
+ if (r)
+ return r;
+
+ /* signal the guilty fence and set an error on all fences from the context */
+ if (guilty_fence)
+ amdgpu_fence_driver_guilty_force_completion(guilty_fence);
+ /* Re-emit the non-guilty commands */
+ if (ring->ring_backup_entries_to_copy) {
+ amdgpu_ring_alloc_reemit(ring, ring->ring_backup_entries_to_copy);
+ for (i = 0; i < ring->ring_backup_entries_to_copy; i++)
+ amdgpu_ring_write(ring, ring->ring_backup[i]);
+ amdgpu_ring_commit(ring);
+ }
+ /* Start the scheduler again */
+ drm_sched_wqueue_start(&ring->sched);
+ return 0;
+}
+
+bool amdgpu_ring_is_reset_type_supported(struct amdgpu_ring *ring,
+ u32 reset_type)
+{
+ switch (ring->funcs->type) {
+ case AMDGPU_RING_TYPE_GFX:
+ if (ring->adev->gfx.gfx_supported_reset & reset_type)
+ return true;
+ break;
+ case AMDGPU_RING_TYPE_COMPUTE:
+ if (ring->adev->gfx.compute_supported_reset & reset_type)
+ return true;
+ break;
+ case AMDGPU_RING_TYPE_SDMA:
+ if (ring->adev->sdma.supported_reset & reset_type)
+ return true;
+ break;
+ case AMDGPU_RING_TYPE_VCN_DEC:
+ case AMDGPU_RING_TYPE_VCN_ENC:
+ if (ring->adev->vcn.supported_reset & reset_type)
+ return true;
+ break;
+ case AMDGPU_RING_TYPE_VCN_JPEG:
+ if (ring->adev->jpeg.supported_reset & reset_type)
+ return true;
+ break;
+ default:
+ break;
+ }
+ return false;
+}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
index 56acec1075ac..7a27c6c4bb44 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
@@ -27,17 +27,32 @@
#include <drm/amdgpu_drm.h>
#include <drm/gpu_scheduler.h>
#include <drm/drm_print.h>
+#include <drm/drm_suballoc.h>
+
+struct amdgpu_device;
+struct amdgpu_ring;
+struct amdgpu_ib;
+struct amdgpu_cs_parser;
+struct amdgpu_job;
+struct amdgpu_vm;
/* max number of rings */
-#define AMDGPU_MAX_RINGS 28
-#define AMDGPU_MAX_HWIP_RINGS 8
+#define AMDGPU_MAX_RINGS 149
+#define AMDGPU_MAX_HWIP_RINGS 64
#define AMDGPU_MAX_GFX_RINGS 2
+#define AMDGPU_MAX_SW_GFX_RINGS 2
#define AMDGPU_MAX_COMPUTE_RINGS 8
#define AMDGPU_MAX_VCE_RINGS 3
#define AMDGPU_MAX_UVD_ENC_RINGS 2
-
-#define AMDGPU_RING_PRIO_DEFAULT 1
-#define AMDGPU_RING_PRIO_MAX AMDGPU_GFX_PIPE_PRIO_MAX
+#define AMDGPU_MAX_VPE_RINGS 2
+
+enum amdgpu_ring_priority_level {
+ AMDGPU_RING_PRIO_0,
+ AMDGPU_RING_PRIO_1,
+ AMDGPU_RING_PRIO_DEFAULT = 1,
+ AMDGPU_RING_PRIO_2,
+ AMDGPU_RING_PRIO_MAX
+};
/* some special values for the owner field */
#define AMDGPU_FENCE_OWNER_UNDEFINED ((void *)0ul)
@@ -47,6 +62,7 @@
#define AMDGPU_FENCE_FLAG_64BIT (1 << 0)
#define AMDGPU_FENCE_FLAG_INT (1 << 1)
#define AMDGPU_FENCE_FLAG_TC_WB_ONLY (1 << 2)
+#define AMDGPU_FENCE_FLAG_EXEC (1 << 3)
#define to_amdgpu_ring(s) container_of((s), struct amdgpu_ring, sched)
@@ -62,8 +78,12 @@ enum amdgpu_ring_type {
AMDGPU_RING_TYPE_VCN_DEC = AMDGPU_HW_IP_VCN_DEC,
AMDGPU_RING_TYPE_VCN_ENC = AMDGPU_HW_IP_VCN_ENC,
AMDGPU_RING_TYPE_VCN_JPEG = AMDGPU_HW_IP_VCN_JPEG,
+ AMDGPU_RING_TYPE_VPE = AMDGPU_HW_IP_VPE,
AMDGPU_RING_TYPE_KIQ,
- AMDGPU_RING_TYPE_MES
+ AMDGPU_RING_TYPE_MES,
+ AMDGPU_RING_TYPE_UMSCH_MM,
+ AMDGPU_RING_TYPE_CPER,
+ AMDGPU_RING_TYPE_MAX,
};
enum amdgpu_ib_pool_type {
@@ -77,11 +97,13 @@ enum amdgpu_ib_pool_type {
AMDGPU_IB_POOL_MAX
};
-struct amdgpu_device;
-struct amdgpu_ring;
-struct amdgpu_ib;
-struct amdgpu_cs_parser;
-struct amdgpu_job;
+struct amdgpu_ib {
+ struct drm_suballoc *sa_bo;
+ uint32_t length_dw;
+ uint64_t gpu_addr;
+ uint32_t *ptr;
+ uint32_t flags;
+};
struct amdgpu_sched {
u32 num_scheds;
@@ -93,10 +115,11 @@ struct amdgpu_sched {
*/
struct amdgpu_fence_driver {
uint64_t gpu_addr;
- volatile uint32_t *cpu_addr;
+ uint32_t *cpu_addr;
/* sync_seq is protected by ring emission lock */
uint32_t sync_seq;
atomic_t last_seq;
+ u64 signalled_wptr;
bool initialized;
struct amdgpu_irq_src *irq_src;
unsigned irq_type;
@@ -106,19 +129,44 @@ struct amdgpu_fence_driver {
struct dma_fence **fences;
};
-int amdgpu_fence_driver_init(struct amdgpu_device *adev);
-void amdgpu_fence_driver_fini(struct amdgpu_device *adev);
+/*
+ * Fences mark an event in the GPUs pipeline and are used
+ * for GPU/CPU synchronization. When the fence is written,
+ * it is expected that all buffers associated with that fence
+ * are no longer in use by the associated ring on the GPU and
+ * that the relevant GPU caches have been flushed.
+ */
+
+struct amdgpu_fence {
+ struct dma_fence base;
+
+ /* RB, DMA, etc. */
+ struct amdgpu_ring *ring;
+ ktime_t start_timestamp;
+
+ /* wptr for the fence for resets */
+ u64 wptr;
+ /* fence context for resets */
+ u64 context;
+};
+
+extern const struct drm_sched_backend_ops amdgpu_sched_ops;
+
+void amdgpu_fence_driver_set_error(struct amdgpu_ring *ring, int error);
void amdgpu_fence_driver_force_completion(struct amdgpu_ring *ring);
+void amdgpu_fence_driver_guilty_force_completion(struct amdgpu_fence *af);
+void amdgpu_fence_save_wptr(struct amdgpu_fence *af);
-int amdgpu_fence_driver_init_ring(struct amdgpu_ring *ring,
- unsigned num_hw_submission);
+int amdgpu_fence_driver_init_ring(struct amdgpu_ring *ring);
int amdgpu_fence_driver_start_ring(struct amdgpu_ring *ring,
struct amdgpu_irq_src *irq_src,
unsigned irq_type);
-void amdgpu_fence_driver_suspend(struct amdgpu_device *adev);
-void amdgpu_fence_driver_resume(struct amdgpu_device *adev);
-int amdgpu_fence_emit(struct amdgpu_ring *ring, struct dma_fence **fence,
- unsigned flags);
+void amdgpu_fence_driver_hw_init(struct amdgpu_device *adev);
+void amdgpu_fence_driver_hw_fini(struct amdgpu_device *adev);
+int amdgpu_fence_driver_sw_init(struct amdgpu_device *adev);
+void amdgpu_fence_driver_sw_fini(struct amdgpu_device *adev);
+int amdgpu_fence_emit(struct amdgpu_ring *ring, struct amdgpu_fence *af,
+ unsigned int flags);
int amdgpu_fence_emit_polling(struct amdgpu_ring *ring, uint32_t *s,
uint32_t timeout);
bool amdgpu_fence_process(struct amdgpu_ring *ring);
@@ -128,27 +176,64 @@ signed long amdgpu_fence_wait_polling(struct amdgpu_ring *ring,
signed long timeout);
unsigned amdgpu_fence_count_emitted(struct amdgpu_ring *ring);
+void amdgpu_fence_driver_isr_toggle(struct amdgpu_device *adev, bool stop);
+
+u64 amdgpu_fence_last_unsignaled_time_us(struct amdgpu_ring *ring);
+void amdgpu_fence_update_start_timestamp(struct amdgpu_ring *ring, uint32_t seq,
+ ktime_t timestamp);
+
/*
* Rings.
*/
/* provided by hw blocks that expose a ring buffer for commands */
struct amdgpu_ring_funcs {
+ /**
+ * @type:
+ *
+ * GFX, Compute, SDMA, UVD, VCE, VCN, VPE, KIQ, MES, UMSCH, and CPER
+ * use ring buffers. The type field just identifies which component the
+ * ring buffer is associated with.
+ */
enum amdgpu_ring_type type;
uint32_t align_mask;
+
+ /**
+ * @nop:
+ *
+ * Every block in the amdgpu has no-op instructions (e.g., GFX 10
+ * uses PACKET3(PACKET3_NOP, 0x3FFF), VCN 5 uses VCN_ENC_CMD_NO_OP,
+ * etc). This field receives the specific no-op for the component
+ * that initializes the ring.
+ */
u32 nop;
bool support_64bit_ptrs;
bool no_user_fence;
- unsigned vmhub;
- unsigned extra_dw;
+ bool secure_submission_supported;
+
+ /**
+ * @extra_bytes:
+ *
+ * Optional extra space in bytes that is added to the ring size
+ * when allocating the BO that holds the contents of the ring.
+ * This space isn't used for command submission to the ring,
+ * but is just there to satisfy some hardware requirements or
+ * implement workarounds. It's up to the implementation of each
+ * specific ring to initialize this space.
+ */
+ unsigned extra_bytes;
/* ring read/write ptr handling */
u64 (*get_rptr)(struct amdgpu_ring *ring);
u64 (*get_wptr)(struct amdgpu_ring *ring);
void (*set_wptr)(struct amdgpu_ring *ring);
/* validating and patching of IBs */
- int (*parse_cs)(struct amdgpu_cs_parser *p, uint32_t ib_idx);
- int (*patch_cs_in_place)(struct amdgpu_cs_parser *p, uint32_t ib_idx);
+ int (*parse_cs)(struct amdgpu_cs_parser *p,
+ struct amdgpu_job *job,
+ struct amdgpu_ib *ib);
+ int (*patch_cs_in_place)(struct amdgpu_cs_parser *p,
+ struct amdgpu_job *job,
+ struct amdgpu_ib *ib);
/* constants to calculate how many DW are needed for an emit */
unsigned emit_frame_size;
unsigned emit_ib_size;
@@ -176,13 +261,14 @@ struct amdgpu_ring_funcs {
void (*insert_end)(struct amdgpu_ring *ring);
/* pad the indirect buffer to the necessary number of dw */
void (*pad_ib)(struct amdgpu_ring *ring, struct amdgpu_ib *ib);
- unsigned (*init_cond_exec)(struct amdgpu_ring *ring);
- void (*patch_cond_exec)(struct amdgpu_ring *ring, unsigned offset);
+ unsigned (*init_cond_exec)(struct amdgpu_ring *ring, uint64_t addr);
/* note usage for clock and power gating */
void (*begin_use)(struct amdgpu_ring *ring);
void (*end_use)(struct amdgpu_ring *ring);
void (*emit_switch_buffer) (struct amdgpu_ring *ring);
void (*emit_cntxcntl) (struct amdgpu_ring *ring, uint32_t flags);
+ void (*emit_gfx_shadow)(struct amdgpu_ring *ring, u64 shadow_va, u64 csa_va,
+ u64 gds_va, bool init_shadow, int vmid);
void (*emit_rreg)(struct amdgpu_ring *ring, uint32_t reg,
uint32_t reg_val_offs);
void (*emit_wreg)(struct amdgpu_ring *ring, uint32_t reg, uint32_t val);
@@ -198,8 +284,17 @@ struct amdgpu_ring_funcs {
int (*preempt_ib)(struct amdgpu_ring *ring);
void (*emit_mem_sync)(struct amdgpu_ring *ring);
void (*emit_wave_limit)(struct amdgpu_ring *ring, bool enable);
+ void (*patch_cntl)(struct amdgpu_ring *ring, unsigned offset);
+ void (*patch_ce)(struct amdgpu_ring *ring, unsigned offset);
+ void (*patch_de)(struct amdgpu_ring *ring, unsigned offset);
+ int (*reset)(struct amdgpu_ring *ring, unsigned int vmid,
+ struct amdgpu_fence *timedout_fence);
+ void (*emit_cleaner_shader)(struct amdgpu_ring *ring);
};
+/**
+ * amdgpu_ring - Holds ring information
+ */
struct amdgpu_ring {
struct amdgpu_device *adev;
const struct amdgpu_ring_funcs *funcs;
@@ -207,53 +302,128 @@ struct amdgpu_ring {
struct drm_gpu_scheduler sched;
struct amdgpu_bo *ring_obj;
- volatile uint32_t *ring;
+ uint32_t *ring;
+ /* backups for resets */
+ uint32_t *ring_backup;
+ unsigned int ring_backup_entries_to_copy;
unsigned rptr_offs;
+ u64 rptr_gpu_addr;
+ u32 *rptr_cpu_addr;
+
+ /**
+ * @wptr:
+ *
+ * This is part of the Ring buffer implementation and represents the
+ * write pointer. The wptr determines where the host has written.
+ */
u64 wptr;
+
+ /**
+ * @wptr_old:
+ *
+ * Before update wptr with the new value, usually the old value is
+ * stored in the wptr_old.
+ */
u64 wptr_old;
unsigned ring_size;
+
+ /**
+ * @max_dw:
+ *
+ * Maximum number of DWords for ring allocation. This information is
+ * provided at the ring initialization time, and each IP block can
+ * specify a specific value. Check places that invoke
+ * amdgpu_ring_init() to see the maximum size per block.
+ */
unsigned max_dw;
+
+ /**
+ * @count_dw:
+ *
+ * This value starts with the maximum amount of DWords supported by the
+ * ring. This value is updated based on the ring manipulation.
+ */
int count_dw;
uint64_t gpu_addr;
+
+ /**
+ * @ptr_mask:
+ *
+ * Some IPs provide support for 64-bit pointers and others for 32-bit
+ * only; this behavior is component-specific and defined by the field
+ * support_64bit_ptr. If the IP block supports 64-bits, the mask
+ * 0xffffffffffffffff is set; otherwise, this value assumes buf_mask.
+ * Notice that this field is used to keep wptr under a valid range.
+ */
uint64_t ptr_mask;
+
+ /**
+ * @buf_mask:
+ *
+ * Buffer mask is a value used to keep wptr count under its
+ * thresholding. Buffer mask initialized during the ring buffer
+ * initialization time, and it is defined as (ring_size / 4) -1.
+ */
uint32_t buf_mask;
u32 idx;
+ u32 xcc_id;
+ u32 xcp_id;
u32 me;
u32 pipe;
u32 queue;
struct amdgpu_bo *mqd_obj;
uint64_t mqd_gpu_addr;
void *mqd_ptr;
+ unsigned mqd_size;
uint64_t eop_gpu_addr;
u32 doorbell_index;
bool use_doorbell;
bool use_pollmem;
unsigned wptr_offs;
+ u64 wptr_gpu_addr;
+
+ /**
+ * @wptr_cpu_addr:
+ *
+ * This is the CPU address pointer in the writeback slot. This is used
+ * to commit changes to the GPU.
+ */
+ u32 *wptr_cpu_addr;
unsigned fence_offs;
+ u64 fence_gpu_addr;
+ u32 *fence_cpu_addr;
uint64_t current_ctx;
char name[16];
u32 trail_seq;
unsigned trail_fence_offs;
u64 trail_fence_gpu_addr;
- volatile u32 *trail_fence_cpu_addr;
+ u32 *trail_fence_cpu_addr;
unsigned cond_exe_offs;
u64 cond_exe_gpu_addr;
- volatile u32 *cond_exe_cpu_addr;
+ u32 *cond_exe_cpu_addr;
+ unsigned int set_q_mode_offs;
+ u32 *set_q_mode_ptr;
+ u64 set_q_mode_token;
+ unsigned vm_hub;
unsigned vm_inv_eng;
struct dma_fence *vmid_wait;
bool has_compute_vm_bug;
bool no_scheduler;
+ bool no_user_submission;
int hw_prio;
+ unsigned num_hw_submission;
+ atomic_t *sched_score;
-#if defined(CONFIG_DEBUG_FS)
- struct dentry *ent;
-#endif
+ bool is_sw_ring;
+ unsigned int entry_index;
+ /* store the cached rptr to restore after reset */
+ uint64_t cached_rptr;
};
-#define amdgpu_ring_parse_cs(r, p, ib) ((r)->funcs->parse_cs((p), (ib)))
-#define amdgpu_ring_patch_cs_in_place(r, p, ib) ((r)->funcs->patch_cs_in_place((p), (ib)))
+#define amdgpu_ring_parse_cs(r, p, job, ib) ((r)->funcs->parse_cs((p), (job), (ib)))
+#define amdgpu_ring_patch_cs_in_place(r, p, job, ib) ((r)->funcs->patch_cs_in_place((p), (job), (ib)))
#define amdgpu_ring_test_ring(r) (r)->funcs->test_ring((r))
-#define amdgpu_ring_test_ib(r, t) (r)->funcs->test_ib((r), (t))
+#define amdgpu_ring_test_ib(r, t) ((r)->funcs->test_ib ? (r)->funcs->test_ib((r), (t)) : 0)
#define amdgpu_ring_get_rptr(r) (r)->funcs->get_rptr((r))
#define amdgpu_ring_get_wptr(r) (r)->funcs->get_wptr((r))
#define amdgpu_ring_set_wptr(r) (r)->funcs->set_wptr((r))
@@ -265,24 +435,36 @@ struct amdgpu_ring {
#define amdgpu_ring_emit_hdp_flush(r) (r)->funcs->emit_hdp_flush((r))
#define amdgpu_ring_emit_switch_buffer(r) (r)->funcs->emit_switch_buffer((r))
#define amdgpu_ring_emit_cntxcntl(r, d) (r)->funcs->emit_cntxcntl((r), (d))
+#define amdgpu_ring_emit_gfx_shadow(r, s, c, g, i, v) ((r)->funcs->emit_gfx_shadow((r), (s), (c), (g), (i), (v)))
#define amdgpu_ring_emit_rreg(r, d, o) (r)->funcs->emit_rreg((r), (d), (o))
#define amdgpu_ring_emit_wreg(r, d, v) (r)->funcs->emit_wreg((r), (d), (v))
#define amdgpu_ring_emit_reg_wait(r, d, v, m) (r)->funcs->emit_reg_wait((r), (d), (v), (m))
#define amdgpu_ring_emit_reg_write_reg_wait(r, d0, d1, v, m) (r)->funcs->emit_reg_write_reg_wait((r), (d0), (d1), (v), (m))
#define amdgpu_ring_emit_frame_cntl(r, b, s) (r)->funcs->emit_frame_cntl((r), (b), (s))
#define amdgpu_ring_pad_ib(r, ib) ((r)->funcs->pad_ib((r), (ib)))
-#define amdgpu_ring_init_cond_exec(r) (r)->funcs->init_cond_exec((r))
-#define amdgpu_ring_patch_cond_exec(r,o) (r)->funcs->patch_cond_exec((r),(o))
+#define amdgpu_ring_init_cond_exec(r, a) (r)->funcs->init_cond_exec((r), (a))
#define amdgpu_ring_preempt_ib(r) (r)->funcs->preempt_ib(r)
+#define amdgpu_ring_patch_cntl(r, o) ((r)->funcs->patch_cntl((r), (o)))
+#define amdgpu_ring_patch_ce(r, o) ((r)->funcs->patch_ce((r), (o)))
+#define amdgpu_ring_patch_de(r, o) ((r)->funcs->patch_de((r), (o)))
+#define amdgpu_ring_reset(r, v, f) (r)->funcs->reset((r), (v), (f))
+unsigned int amdgpu_ring_max_ibs(enum amdgpu_ring_type type);
int amdgpu_ring_alloc(struct amdgpu_ring *ring, unsigned ndw);
+void amdgpu_ring_ib_begin(struct amdgpu_ring *ring);
+void amdgpu_ring_ib_end(struct amdgpu_ring *ring);
+void amdgpu_ring_ib_on_emit_cntl(struct amdgpu_ring *ring);
+void amdgpu_ring_ib_on_emit_ce(struct amdgpu_ring *ring);
+void amdgpu_ring_ib_on_emit_de(struct amdgpu_ring *ring);
+
void amdgpu_ring_insert_nop(struct amdgpu_ring *ring, uint32_t count);
void amdgpu_ring_generic_pad_ib(struct amdgpu_ring *ring, struct amdgpu_ib *ib);
void amdgpu_ring_commit(struct amdgpu_ring *ring);
void amdgpu_ring_undo(struct amdgpu_ring *ring);
int amdgpu_ring_init(struct amdgpu_device *adev, struct amdgpu_ring *ring,
- unsigned int ring_size, struct amdgpu_irq_src *irq_src,
- unsigned int irq_type, unsigned int prio);
+ unsigned int max_dw, struct amdgpu_irq_src *irq_src,
+ unsigned int irq_type, unsigned int hw_prio,
+ atomic_t *sched_score);
void amdgpu_ring_fini(struct amdgpu_ring *ring);
void amdgpu_ring_emit_reg_write_reg_wait_helper(struct amdgpu_ring *ring,
uint32_t reg0, uint32_t val0,
@@ -298,16 +480,11 @@ static inline void amdgpu_ring_set_preempt_cond_exec(struct amdgpu_ring *ring,
static inline void amdgpu_ring_clear_ring(struct amdgpu_ring *ring)
{
- int i = 0;
- while (i <= ring->buf_mask)
- ring->ring[i++] = ring->funcs->nop;
-
+ memset32(ring->ring, ring->funcs->nop, ring->buf_mask + 1);
}
static inline void amdgpu_ring_write(struct amdgpu_ring *ring, uint32_t v)
{
- if (ring->count_dw <= 0)
- DRM_ERROR("amdgpu: writing more dwords to the ring than expected!\n");
ring->ring[ring->wptr++ & ring->buf_mask] = v;
ring->wptr &= ring->ptr_mask;
ring->count_dw--;
@@ -317,26 +494,20 @@ static inline void amdgpu_ring_write_multiple(struct amdgpu_ring *ring,
void *src, int count_dw)
{
unsigned occupied, chunk1, chunk2;
- void *dst;
-
- if (unlikely(ring->count_dw < count_dw))
- DRM_ERROR("amdgpu: writing more dwords to the ring than expected!\n");
occupied = ring->wptr & ring->buf_mask;
- dst = (void *)&ring->ring[occupied];
chunk1 = ring->buf_mask + 1 - occupied;
- chunk1 = (chunk1 >= count_dw) ? count_dw: chunk1;
+ chunk1 = (chunk1 >= count_dw) ? count_dw : chunk1;
chunk2 = count_dw - chunk1;
chunk1 <<= 2;
chunk2 <<= 2;
if (chunk1)
- memcpy(dst, src, chunk1);
+ memcpy(&ring->ring[occupied], src, chunk1);
if (chunk2) {
src += chunk1;
- dst = (void *)ring->ring;
- memcpy(dst, src, chunk2);
+ memcpy(ring->ring, src, chunk2);
}
ring->wptr += count_dw;
@@ -344,10 +515,66 @@ static inline void amdgpu_ring_write_multiple(struct amdgpu_ring *ring,
ring->count_dw -= count_dw;
}
+/**
+ * amdgpu_ring_patch_cond_exec - patch dw count of conditional execute
+ * @ring: amdgpu_ring structure
+ * @offset: offset returned by amdgpu_ring_init_cond_exec
+ *
+ * Calculate the dw count and patch it into a cond_exec command.
+ */
+static inline void amdgpu_ring_patch_cond_exec(struct amdgpu_ring *ring,
+ unsigned int offset)
+{
+ unsigned cur;
+
+ if (!ring->funcs->init_cond_exec)
+ return;
+
+ WARN_ON(offset > ring->buf_mask);
+ WARN_ON(ring->ring[offset] != 0);
+
+ cur = (ring->wptr - 1) & ring->buf_mask;
+ if (cur < offset)
+ cur += ring->ring_size >> 2;
+ ring->ring[offset] = cur - offset;
+}
+
int amdgpu_ring_test_helper(struct amdgpu_ring *ring);
-int amdgpu_debugfs_ring_init(struct amdgpu_device *adev,
- struct amdgpu_ring *ring);
-void amdgpu_debugfs_ring_fini(struct amdgpu_ring *ring);
+void amdgpu_debugfs_ring_init(struct amdgpu_device *adev,
+ struct amdgpu_ring *ring);
+
+int amdgpu_ring_init_mqd(struct amdgpu_ring *ring);
+
+static inline u32 amdgpu_ib_get_value(struct amdgpu_ib *ib, int idx)
+{
+ return ib->ptr[idx];
+}
+
+static inline void amdgpu_ib_set_value(struct amdgpu_ib *ib, int idx,
+ uint32_t value)
+{
+ ib->ptr[idx] = value;
+}
+int amdgpu_ib_get(struct amdgpu_device *adev, struct amdgpu_vm *vm,
+ unsigned size,
+ enum amdgpu_ib_pool_type pool,
+ struct amdgpu_ib *ib);
+void amdgpu_ib_free(struct amdgpu_ib *ib, struct dma_fence *f);
+int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs,
+ struct amdgpu_ib *ibs, struct amdgpu_job *job,
+ struct dma_fence **f);
+int amdgpu_ib_pool_init(struct amdgpu_device *adev);
+void amdgpu_ib_pool_fini(struct amdgpu_device *adev);
+int amdgpu_ib_ring_tests(struct amdgpu_device *adev);
+bool amdgpu_ring_sched_ready(struct amdgpu_ring *ring);
+void amdgpu_ring_backup_unprocessed_commands(struct amdgpu_ring *ring,
+ struct amdgpu_fence *guilty_fence);
+void amdgpu_ring_reset_helper_begin(struct amdgpu_ring *ring,
+ struct amdgpu_fence *guilty_fence);
+int amdgpu_ring_reset_helper_end(struct amdgpu_ring *ring,
+ struct amdgpu_fence *guilty_fence);
+bool amdgpu_ring_is_reset_type_supported(struct amdgpu_ring *ring,
+ u32 reset_type);
#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring_mux.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring_mux.c
new file mode 100644
index 000000000000..7e7d6c3865bc
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring_mux.c
@@ -0,0 +1,576 @@
+/*
+ * Copyright 2022 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+#include <linux/slab.h>
+#include <drm/drm_print.h>
+
+#include "amdgpu_ring_mux.h"
+#include "amdgpu_ring.h"
+#include "amdgpu.h"
+
+#define AMDGPU_MUX_RESUBMIT_JIFFIES_TIMEOUT (HZ / 2)
+#define AMDGPU_MAX_LAST_UNSIGNALED_THRESHOLD_US 10000
+
+static const struct ring_info {
+ unsigned int hw_pio;
+ const char *ring_name;
+} sw_ring_info[] = {
+ { AMDGPU_RING_PRIO_DEFAULT, "gfx_low"},
+ { AMDGPU_RING_PRIO_2, "gfx_high"},
+};
+
+static struct kmem_cache *amdgpu_mux_chunk_slab;
+
+static inline struct amdgpu_mux_entry *amdgpu_ring_mux_sw_entry(struct amdgpu_ring_mux *mux,
+ struct amdgpu_ring *ring)
+{
+ return ring->entry_index < mux->ring_entry_size ?
+ &mux->ring_entry[ring->entry_index] : NULL;
+}
+
+/* copy packages on sw ring range[begin, end) */
+static void amdgpu_ring_mux_copy_pkt_from_sw_ring(struct amdgpu_ring_mux *mux,
+ struct amdgpu_ring *ring,
+ u64 s_start, u64 s_end)
+{
+ u64 start, end;
+ struct amdgpu_ring *real_ring = mux->real_ring;
+
+ start = s_start & ring->buf_mask;
+ end = s_end & ring->buf_mask;
+
+ if (start == end) {
+ DRM_ERROR("no more data copied from sw ring\n");
+ return;
+ }
+ if (start > end) {
+ amdgpu_ring_alloc(real_ring, (ring->ring_size >> 2) + end - start);
+ amdgpu_ring_write_multiple(real_ring, (void *)&ring->ring[start],
+ (ring->ring_size >> 2) - start);
+ amdgpu_ring_write_multiple(real_ring, (void *)&ring->ring[0], end);
+ } else {
+ amdgpu_ring_alloc(real_ring, end - start);
+ amdgpu_ring_write_multiple(real_ring, (void *)&ring->ring[start], end - start);
+ }
+}
+
+static void amdgpu_mux_resubmit_chunks(struct amdgpu_ring_mux *mux)
+{
+ struct amdgpu_mux_entry *e = NULL;
+ struct amdgpu_mux_chunk *chunk;
+ uint32_t seq, last_seq;
+ int i;
+
+ /*find low priority entries:*/
+ if (!mux->s_resubmit)
+ return;
+
+ for (i = 0; i < mux->num_ring_entries; i++) {
+ if (mux->ring_entry[i].ring->hw_prio <= AMDGPU_RING_PRIO_DEFAULT) {
+ e = &mux->ring_entry[i];
+ break;
+ }
+ }
+
+ if (!e) {
+ DRM_ERROR("%s no low priority ring found\n", __func__);
+ return;
+ }
+
+ last_seq = atomic_read(&e->ring->fence_drv.last_seq);
+ seq = mux->seqno_to_resubmit;
+ if (last_seq < seq) {
+ /*resubmit all the fences between (last_seq, seq]*/
+ list_for_each_entry(chunk, &e->list, entry) {
+ if (chunk->sync_seq > last_seq && chunk->sync_seq <= seq) {
+ amdgpu_fence_update_start_timestamp(e->ring,
+ chunk->sync_seq,
+ ktime_get());
+ if (chunk->sync_seq ==
+ le32_to_cpu(*(e->ring->fence_drv.cpu_addr + 2))) {
+ if (chunk->cntl_offset <= e->ring->buf_mask)
+ amdgpu_ring_patch_cntl(e->ring,
+ chunk->cntl_offset);
+ if (chunk->ce_offset <= e->ring->buf_mask)
+ amdgpu_ring_patch_ce(e->ring, chunk->ce_offset);
+ if (chunk->de_offset <= e->ring->buf_mask)
+ amdgpu_ring_patch_de(e->ring, chunk->de_offset);
+ }
+ amdgpu_ring_mux_copy_pkt_from_sw_ring(mux, e->ring,
+ chunk->start,
+ chunk->end);
+ mux->wptr_resubmit = chunk->end;
+ amdgpu_ring_commit(mux->real_ring);
+ }
+ }
+ }
+
+ timer_delete(&mux->resubmit_timer);
+ mux->s_resubmit = false;
+}
+
+static void amdgpu_ring_mux_schedule_resubmit(struct amdgpu_ring_mux *mux)
+{
+ mod_timer(&mux->resubmit_timer, jiffies + AMDGPU_MUX_RESUBMIT_JIFFIES_TIMEOUT);
+}
+
+static void amdgpu_mux_resubmit_fallback(struct timer_list *t)
+{
+ struct amdgpu_ring_mux *mux = timer_container_of(mux, t,
+ resubmit_timer);
+
+ if (!spin_trylock(&mux->lock)) {
+ amdgpu_ring_mux_schedule_resubmit(mux);
+ DRM_ERROR("reschedule resubmit\n");
+ return;
+ }
+ amdgpu_mux_resubmit_chunks(mux);
+ spin_unlock(&mux->lock);
+}
+
+int amdgpu_ring_mux_init(struct amdgpu_ring_mux *mux, struct amdgpu_ring *ring,
+ unsigned int entry_size)
+{
+ mux->real_ring = ring;
+ mux->num_ring_entries = 0;
+
+ mux->ring_entry = kcalloc(entry_size, sizeof(struct amdgpu_mux_entry), GFP_KERNEL);
+ if (!mux->ring_entry)
+ return -ENOMEM;
+
+ mux->ring_entry_size = entry_size;
+ mux->s_resubmit = false;
+
+ amdgpu_mux_chunk_slab = KMEM_CACHE(amdgpu_mux_chunk, SLAB_HWCACHE_ALIGN);
+ if (!amdgpu_mux_chunk_slab) {
+ DRM_ERROR("create amdgpu_mux_chunk cache failed\n");
+ return -ENOMEM;
+ }
+
+ spin_lock_init(&mux->lock);
+ timer_setup(&mux->resubmit_timer, amdgpu_mux_resubmit_fallback, 0);
+
+ return 0;
+}
+
+void amdgpu_ring_mux_fini(struct amdgpu_ring_mux *mux)
+{
+ struct amdgpu_mux_entry *e;
+ struct amdgpu_mux_chunk *chunk, *chunk2;
+ int i;
+
+ for (i = 0; i < mux->num_ring_entries; i++) {
+ e = &mux->ring_entry[i];
+ list_for_each_entry_safe(chunk, chunk2, &e->list, entry) {
+ list_del(&chunk->entry);
+ kmem_cache_free(amdgpu_mux_chunk_slab, chunk);
+ }
+ }
+ kmem_cache_destroy(amdgpu_mux_chunk_slab);
+ kfree(mux->ring_entry);
+ mux->ring_entry = NULL;
+ mux->num_ring_entries = 0;
+ mux->ring_entry_size = 0;
+}
+
+int amdgpu_ring_mux_add_sw_ring(struct amdgpu_ring_mux *mux, struct amdgpu_ring *ring)
+{
+ struct amdgpu_mux_entry *e;
+
+ if (mux->num_ring_entries >= mux->ring_entry_size) {
+ DRM_ERROR("add sw ring exceeding max entry size\n");
+ return -ENOENT;
+ }
+
+ e = &mux->ring_entry[mux->num_ring_entries];
+ ring->entry_index = mux->num_ring_entries;
+ e->ring = ring;
+
+ INIT_LIST_HEAD(&e->list);
+ mux->num_ring_entries += 1;
+ return 0;
+}
+
+void amdgpu_ring_mux_set_wptr(struct amdgpu_ring_mux *mux, struct amdgpu_ring *ring, u64 wptr)
+{
+ struct amdgpu_mux_entry *e;
+
+ spin_lock(&mux->lock);
+
+ if (ring->hw_prio <= AMDGPU_RING_PRIO_DEFAULT)
+ amdgpu_mux_resubmit_chunks(mux);
+
+ e = amdgpu_ring_mux_sw_entry(mux, ring);
+ if (!e) {
+ DRM_ERROR("cannot find entry for sw ring\n");
+ spin_unlock(&mux->lock);
+ return;
+ }
+
+ /* We could skip this set wptr as preemption in process. */
+ if (ring->hw_prio <= AMDGPU_RING_PRIO_DEFAULT && mux->pending_trailing_fence_signaled) {
+ spin_unlock(&mux->lock);
+ return;
+ }
+
+ e->sw_cptr = e->sw_wptr;
+ /* Update cptr if the package already copied in resubmit functions */
+ if (ring->hw_prio <= AMDGPU_RING_PRIO_DEFAULT && e->sw_cptr < mux->wptr_resubmit)
+ e->sw_cptr = mux->wptr_resubmit;
+ e->sw_wptr = wptr;
+ e->start_ptr_in_hw_ring = mux->real_ring->wptr;
+
+ /* Skip copying for the packages already resubmitted.*/
+ if (ring->hw_prio > AMDGPU_RING_PRIO_DEFAULT || mux->wptr_resubmit < wptr) {
+ amdgpu_ring_mux_copy_pkt_from_sw_ring(mux, ring, e->sw_cptr, wptr);
+ e->end_ptr_in_hw_ring = mux->real_ring->wptr;
+ amdgpu_ring_commit(mux->real_ring);
+ } else {
+ e->end_ptr_in_hw_ring = mux->real_ring->wptr;
+ }
+ spin_unlock(&mux->lock);
+}
+
+u64 amdgpu_ring_mux_get_wptr(struct amdgpu_ring_mux *mux, struct amdgpu_ring *ring)
+{
+ struct amdgpu_mux_entry *e;
+
+ e = amdgpu_ring_mux_sw_entry(mux, ring);
+ if (!e) {
+ DRM_ERROR("cannot find entry for sw ring\n");
+ return 0;
+ }
+
+ return e->sw_wptr;
+}
+
+/**
+ * amdgpu_ring_mux_get_rptr - get the readptr of the software ring
+ * @mux: the multiplexer the software rings attach to
+ * @ring: the software ring of which we calculate the readptr
+ *
+ * The return value of the readptr is not precise while the other rings could
+ * write data onto the real ring buffer.After overwriting on the real ring, we
+ * can not decide if our packages have been excuted or not read yet. However,
+ * this function is only called by the tools such as umr to collect the latest
+ * packages for the hang analysis. We assume the hang happens near our latest
+ * submit. Thus we could use the following logic to give the clue:
+ * If the readptr is between start and end, then we return the copy pointer
+ * plus the distance from start to readptr. If the readptr is before start, we
+ * return the copy pointer. Lastly, if the readptr is past end, we return the
+ * write pointer.
+ */
+u64 amdgpu_ring_mux_get_rptr(struct amdgpu_ring_mux *mux, struct amdgpu_ring *ring)
+{
+ struct amdgpu_mux_entry *e;
+ u64 readp, offset, start, end;
+
+ e = amdgpu_ring_mux_sw_entry(mux, ring);
+ if (!e) {
+ DRM_ERROR("no sw entry found!\n");
+ return 0;
+ }
+
+ readp = amdgpu_ring_get_rptr(mux->real_ring);
+
+ start = e->start_ptr_in_hw_ring & mux->real_ring->buf_mask;
+ end = e->end_ptr_in_hw_ring & mux->real_ring->buf_mask;
+ if (start > end) {
+ if (readp <= end)
+ readp += mux->real_ring->ring_size >> 2;
+ end += mux->real_ring->ring_size >> 2;
+ }
+
+ if (start <= readp && readp <= end) {
+ offset = readp - start;
+ e->sw_rptr = (e->sw_cptr + offset) & ring->buf_mask;
+ } else if (readp < start) {
+ e->sw_rptr = e->sw_cptr;
+ } else {
+ /* end < readptr */
+ e->sw_rptr = e->sw_wptr;
+ }
+
+ return e->sw_rptr;
+}
+
+u64 amdgpu_sw_ring_get_rptr_gfx(struct amdgpu_ring *ring)
+{
+ struct amdgpu_device *adev = ring->adev;
+ struct amdgpu_ring_mux *mux = &adev->gfx.muxer;
+
+ WARN_ON(!ring->is_sw_ring);
+ return amdgpu_ring_mux_get_rptr(mux, ring);
+}
+
+u64 amdgpu_sw_ring_get_wptr_gfx(struct amdgpu_ring *ring)
+{
+ struct amdgpu_device *adev = ring->adev;
+ struct amdgpu_ring_mux *mux = &adev->gfx.muxer;
+
+ WARN_ON(!ring->is_sw_ring);
+ return amdgpu_ring_mux_get_wptr(mux, ring);
+}
+
+void amdgpu_sw_ring_set_wptr_gfx(struct amdgpu_ring *ring)
+{
+ struct amdgpu_device *adev = ring->adev;
+ struct amdgpu_ring_mux *mux = &adev->gfx.muxer;
+
+ WARN_ON(!ring->is_sw_ring);
+ amdgpu_ring_mux_set_wptr(mux, ring, ring->wptr);
+}
+
+/* Override insert_nop to prevent emitting nops to the software rings */
+void amdgpu_sw_ring_insert_nop(struct amdgpu_ring *ring, uint32_t count)
+{
+ WARN_ON(!ring->is_sw_ring);
+}
+
+const char *amdgpu_sw_ring_name(int idx)
+{
+ return idx < ARRAY_SIZE(sw_ring_info) ?
+ sw_ring_info[idx].ring_name : NULL;
+}
+
+unsigned int amdgpu_sw_ring_priority(int idx)
+{
+ return idx < ARRAY_SIZE(sw_ring_info) ?
+ sw_ring_info[idx].hw_pio : AMDGPU_RING_PRIO_DEFAULT;
+}
+
+/*Scan on low prio rings to have unsignaled fence and high ring has no fence.*/
+static int amdgpu_mcbp_scan(struct amdgpu_ring_mux *mux)
+{
+ struct amdgpu_ring *ring;
+ int i, need_preempt;
+
+ need_preempt = 0;
+ for (i = 0; i < mux->num_ring_entries; i++) {
+ ring = mux->ring_entry[i].ring;
+ if (ring->hw_prio > AMDGPU_RING_PRIO_DEFAULT &&
+ amdgpu_fence_count_emitted(ring) > 0)
+ return 0;
+ if (ring->hw_prio <= AMDGPU_RING_PRIO_DEFAULT &&
+ amdgpu_fence_last_unsignaled_time_us(ring) >
+ AMDGPU_MAX_LAST_UNSIGNALED_THRESHOLD_US)
+ need_preempt = 1;
+ }
+ return need_preempt && !mux->s_resubmit;
+}
+
+/* Trigger Mid-Command Buffer Preemption (MCBP) and find if we need to resubmit. */
+static int amdgpu_mcbp_trigger_preempt(struct amdgpu_ring_mux *mux)
+{
+ int r;
+
+ spin_lock(&mux->lock);
+ mux->pending_trailing_fence_signaled = true;
+ r = amdgpu_ring_preempt_ib(mux->real_ring);
+ spin_unlock(&mux->lock);
+ return r;
+}
+
+void amdgpu_sw_ring_ib_begin(struct amdgpu_ring *ring)
+{
+ struct amdgpu_device *adev = ring->adev;
+ struct amdgpu_ring_mux *mux = &adev->gfx.muxer;
+
+ WARN_ON(!ring->is_sw_ring);
+ if (adev->gfx.mcbp && ring->hw_prio > AMDGPU_RING_PRIO_DEFAULT) {
+ if (amdgpu_mcbp_scan(mux) > 0)
+ amdgpu_mcbp_trigger_preempt(mux);
+ return;
+ }
+
+ amdgpu_ring_mux_start_ib(mux, ring);
+}
+
+void amdgpu_sw_ring_ib_end(struct amdgpu_ring *ring)
+{
+ struct amdgpu_device *adev = ring->adev;
+ struct amdgpu_ring_mux *mux = &adev->gfx.muxer;
+
+ WARN_ON(!ring->is_sw_ring);
+ if (adev->gfx.mcbp && ring->hw_prio > AMDGPU_RING_PRIO_DEFAULT)
+ return;
+ amdgpu_ring_mux_end_ib(mux, ring);
+}
+
+void amdgpu_sw_ring_ib_mark_offset(struct amdgpu_ring *ring, enum amdgpu_ring_mux_offset_type type)
+{
+ struct amdgpu_device *adev = ring->adev;
+ struct amdgpu_ring_mux *mux = &adev->gfx.muxer;
+ unsigned offset;
+
+ if (ring->hw_prio > AMDGPU_RING_PRIO_DEFAULT)
+ return;
+
+ offset = ring->wptr & ring->buf_mask;
+
+ amdgpu_ring_mux_ib_mark_offset(mux, ring, offset, type);
+}
+
+void amdgpu_ring_mux_start_ib(struct amdgpu_ring_mux *mux, struct amdgpu_ring *ring)
+{
+ struct amdgpu_mux_entry *e;
+ struct amdgpu_mux_chunk *chunk;
+
+ spin_lock(&mux->lock);
+ amdgpu_mux_resubmit_chunks(mux);
+ spin_unlock(&mux->lock);
+
+ e = amdgpu_ring_mux_sw_entry(mux, ring);
+ if (!e) {
+ DRM_ERROR("cannot find entry!\n");
+ return;
+ }
+
+ chunk = kmem_cache_alloc(amdgpu_mux_chunk_slab, GFP_KERNEL);
+ if (!chunk) {
+ DRM_ERROR("alloc amdgpu_mux_chunk_slab failed\n");
+ return;
+ }
+
+ chunk->start = ring->wptr;
+ /* the initialized value used to check if they are set by the ib submission*/
+ chunk->cntl_offset = ring->buf_mask + 1;
+ chunk->de_offset = ring->buf_mask + 1;
+ chunk->ce_offset = ring->buf_mask + 1;
+ list_add_tail(&chunk->entry, &e->list);
+}
+
+static void scan_and_remove_signaled_chunk(struct amdgpu_ring_mux *mux, struct amdgpu_ring *ring)
+{
+ uint32_t last_seq = 0;
+ struct amdgpu_mux_entry *e;
+ struct amdgpu_mux_chunk *chunk, *tmp;
+
+ e = amdgpu_ring_mux_sw_entry(mux, ring);
+ if (!e) {
+ DRM_ERROR("cannot find entry!\n");
+ return;
+ }
+
+ last_seq = atomic_read(&ring->fence_drv.last_seq);
+
+ list_for_each_entry_safe(chunk, tmp, &e->list, entry) {
+ if (chunk->sync_seq <= last_seq) {
+ list_del(&chunk->entry);
+ kmem_cache_free(amdgpu_mux_chunk_slab, chunk);
+ }
+ }
+}
+
+void amdgpu_ring_mux_ib_mark_offset(struct amdgpu_ring_mux *mux,
+ struct amdgpu_ring *ring, u64 offset,
+ enum amdgpu_ring_mux_offset_type type)
+{
+ struct amdgpu_mux_entry *e;
+ struct amdgpu_mux_chunk *chunk;
+
+ e = amdgpu_ring_mux_sw_entry(mux, ring);
+ if (!e) {
+ DRM_ERROR("cannot find entry!\n");
+ return;
+ }
+
+ chunk = list_last_entry(&e->list, struct amdgpu_mux_chunk, entry);
+ if (!chunk) {
+ DRM_ERROR("cannot find chunk!\n");
+ return;
+ }
+
+ switch (type) {
+ case AMDGPU_MUX_OFFSET_TYPE_CONTROL:
+ chunk->cntl_offset = offset;
+ break;
+ case AMDGPU_MUX_OFFSET_TYPE_DE:
+ chunk->de_offset = offset;
+ break;
+ case AMDGPU_MUX_OFFSET_TYPE_CE:
+ chunk->ce_offset = offset;
+ break;
+ default:
+ DRM_ERROR("invalid type (%d)\n", type);
+ break;
+ }
+}
+
+void amdgpu_ring_mux_end_ib(struct amdgpu_ring_mux *mux, struct amdgpu_ring *ring)
+{
+ struct amdgpu_mux_entry *e;
+ struct amdgpu_mux_chunk *chunk;
+
+ e = amdgpu_ring_mux_sw_entry(mux, ring);
+ if (!e) {
+ DRM_ERROR("cannot find entry!\n");
+ return;
+ }
+
+ chunk = list_last_entry(&e->list, struct amdgpu_mux_chunk, entry);
+ if (!chunk) {
+ DRM_ERROR("cannot find chunk!\n");
+ return;
+ }
+
+ chunk->end = ring->wptr;
+ chunk->sync_seq = READ_ONCE(ring->fence_drv.sync_seq);
+
+ scan_and_remove_signaled_chunk(mux, ring);
+}
+
+bool amdgpu_mcbp_handle_trailing_fence_irq(struct amdgpu_ring_mux *mux)
+{
+ struct amdgpu_mux_entry *e;
+ struct amdgpu_ring *ring = NULL;
+ int i;
+
+ if (!mux->pending_trailing_fence_signaled)
+ return false;
+
+ if (mux->real_ring->trail_seq != le32_to_cpu(*mux->real_ring->trail_fence_cpu_addr))
+ return false;
+
+ for (i = 0; i < mux->num_ring_entries; i++) {
+ e = &mux->ring_entry[i];
+ if (e->ring->hw_prio <= AMDGPU_RING_PRIO_DEFAULT) {
+ ring = e->ring;
+ break;
+ }
+ }
+
+ if (!ring) {
+ DRM_ERROR("cannot find low priority ring\n");
+ return false;
+ }
+
+ amdgpu_fence_process(ring);
+ if (amdgpu_fence_count_emitted(ring) > 0) {
+ mux->s_resubmit = true;
+ mux->seqno_to_resubmit = ring->fence_drv.sync_seq;
+ amdgpu_ring_mux_schedule_resubmit(mux);
+ }
+
+ mux->pending_trailing_fence_signaled = false;
+ return true;
+}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring_mux.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring_mux.h
new file mode 100644
index 000000000000..d3186b570b82
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring_mux.h
@@ -0,0 +1,127 @@
+/*
+ * Copyright 2022 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#ifndef __AMDGPU_RING_MUX__
+#define __AMDGPU_RING_MUX__
+
+#include <linux/timer.h>
+#include <linux/spinlock.h>
+#include "amdgpu_ring.h"
+
+struct amdgpu_ring;
+
+/**
+ * struct amdgpu_mux_entry - the entry recording software rings copying information.
+ * @ring: the pointer to the software ring.
+ * @start_ptr_in_hw_ring: last start location copied to in the hardware ring.
+ * @end_ptr_in_hw_ring: last end location copied to in the hardware ring.
+ * @sw_cptr: the position of the copy pointer in the sw ring.
+ * @sw_rptr: the read pointer in software ring.
+ * @sw_wptr: the write pointer in software ring.
+ * @list: list head for amdgpu_mux_chunk
+ */
+struct amdgpu_mux_entry {
+ struct amdgpu_ring *ring;
+ u64 start_ptr_in_hw_ring;
+ u64 end_ptr_in_hw_ring;
+ u64 sw_cptr;
+ u64 sw_rptr;
+ u64 sw_wptr;
+ struct list_head list;
+};
+
+enum amdgpu_ring_mux_offset_type {
+ AMDGPU_MUX_OFFSET_TYPE_CONTROL,
+ AMDGPU_MUX_OFFSET_TYPE_DE,
+ AMDGPU_MUX_OFFSET_TYPE_CE,
+};
+
+enum ib_complete_status {
+ /* IB not started/reset value, default value. */
+ IB_COMPLETION_STATUS_DEFAULT = 0,
+ /* IB preempted, started but not completed. */
+ IB_COMPLETION_STATUS_PREEMPTED = 1,
+ /* IB completed. */
+ IB_COMPLETION_STATUS_COMPLETED = 2,
+};
+
+struct amdgpu_ring_mux {
+ struct amdgpu_ring *real_ring;
+
+ struct amdgpu_mux_entry *ring_entry;
+ unsigned int num_ring_entries;
+ unsigned int ring_entry_size;
+ /*the lock for copy data from different software rings*/
+ spinlock_t lock;
+ bool s_resubmit;
+ uint32_t seqno_to_resubmit;
+ u64 wptr_resubmit;
+ struct timer_list resubmit_timer;
+
+ bool pending_trailing_fence_signaled;
+};
+
+/**
+ * struct amdgpu_mux_chunk - save the location of indirect buffer's package on softare rings.
+ * @entry: the list entry.
+ * @sync_seq: the fence seqno related with the saved IB.
+ * @start:- start location on the software ring.
+ * @end:- end location on the software ring.
+ * @control_offset:- the PRE_RESUME bit position used for resubmission.
+ * @de_offset:- the anchor in write_data for de meta of resubmission.
+ * @ce_offset:- the anchor in write_data for ce meta of resubmission.
+ */
+struct amdgpu_mux_chunk {
+ struct list_head entry;
+ uint32_t sync_seq;
+ u64 start;
+ u64 end;
+ u64 cntl_offset;
+ u64 de_offset;
+ u64 ce_offset;
+};
+
+int amdgpu_ring_mux_init(struct amdgpu_ring_mux *mux, struct amdgpu_ring *ring,
+ unsigned int entry_size);
+void amdgpu_ring_mux_fini(struct amdgpu_ring_mux *mux);
+int amdgpu_ring_mux_add_sw_ring(struct amdgpu_ring_mux *mux, struct amdgpu_ring *ring);
+void amdgpu_ring_mux_set_wptr(struct amdgpu_ring_mux *mux, struct amdgpu_ring *ring, u64 wptr);
+u64 amdgpu_ring_mux_get_wptr(struct amdgpu_ring_mux *mux, struct amdgpu_ring *ring);
+u64 amdgpu_ring_mux_get_rptr(struct amdgpu_ring_mux *mux, struct amdgpu_ring *ring);
+void amdgpu_ring_mux_start_ib(struct amdgpu_ring_mux *mux, struct amdgpu_ring *ring);
+void amdgpu_ring_mux_end_ib(struct amdgpu_ring_mux *mux, struct amdgpu_ring *ring);
+void amdgpu_ring_mux_ib_mark_offset(struct amdgpu_ring_mux *mux, struct amdgpu_ring *ring,
+ u64 offset, enum amdgpu_ring_mux_offset_type type);
+bool amdgpu_mcbp_handle_trailing_fence_irq(struct amdgpu_ring_mux *mux);
+
+u64 amdgpu_sw_ring_get_rptr_gfx(struct amdgpu_ring *ring);
+u64 amdgpu_sw_ring_get_wptr_gfx(struct amdgpu_ring *ring);
+void amdgpu_sw_ring_set_wptr_gfx(struct amdgpu_ring *ring);
+void amdgpu_sw_ring_insert_nop(struct amdgpu_ring *ring, uint32_t count);
+void amdgpu_sw_ring_ib_begin(struct amdgpu_ring *ring);
+void amdgpu_sw_ring_ib_end(struct amdgpu_ring *ring);
+void amdgpu_sw_ring_ib_mark_offset(struct amdgpu_ring *ring, enum amdgpu_ring_mux_offset_type type);
+const char *amdgpu_sw_ring_name(int idx);
+unsigned int amdgpu_sw_ring_priority(int idx);
+
+#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_rlc.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_rlc.c
index 6373bfb47d55..5aa830a02d80 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_rlc.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_rlc.c
@@ -31,12 +31,13 @@
* amdgpu_gfx_rlc_enter_safe_mode - Set RLC into safe mode
*
* @adev: amdgpu_device pointer
+ * @xcc_id: xcc accelerated compute core id
*
* Set RLC enter into safe mode if RLC is enabled and haven't in safe mode.
*/
-void amdgpu_gfx_rlc_enter_safe_mode(struct amdgpu_device *adev)
+void amdgpu_gfx_rlc_enter_safe_mode(struct amdgpu_device *adev, int xcc_id)
{
- if (adev->gfx.rlc.in_safe_mode)
+ if (adev->gfx.rlc.in_safe_mode[xcc_id])
return;
/* if RLC is not enabled, do nothing */
@@ -46,8 +47,8 @@ void amdgpu_gfx_rlc_enter_safe_mode(struct amdgpu_device *adev)
if (adev->cg_flags &
(AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_MGCG |
AMD_CG_SUPPORT_GFX_3D_CGCG)) {
- adev->gfx.rlc.funcs->set_safe_mode(adev);
- adev->gfx.rlc.in_safe_mode = true;
+ adev->gfx.rlc.funcs->set_safe_mode(adev, xcc_id);
+ adev->gfx.rlc.in_safe_mode[xcc_id] = true;
}
}
@@ -55,12 +56,13 @@ void amdgpu_gfx_rlc_enter_safe_mode(struct amdgpu_device *adev)
* amdgpu_gfx_rlc_exit_safe_mode - Set RLC out of safe mode
*
* @adev: amdgpu_device pointer
+ * @xcc_id: xcc accelerated compute core id
*
* Set RLC exit safe mode if RLC is enabled and have entered into safe mode.
*/
-void amdgpu_gfx_rlc_exit_safe_mode(struct amdgpu_device *adev)
+void amdgpu_gfx_rlc_exit_safe_mode(struct amdgpu_device *adev, int xcc_id)
{
- if (!(adev->gfx.rlc.in_safe_mode))
+ if (!(adev->gfx.rlc.in_safe_mode[xcc_id]))
return;
/* if RLC is not enabled, do nothing */
@@ -70,8 +72,8 @@ void amdgpu_gfx_rlc_exit_safe_mode(struct amdgpu_device *adev)
if (adev->cg_flags &
(AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_MGCG |
AMD_CG_SUPPORT_GFX_3D_CGCG)) {
- adev->gfx.rlc.funcs->unset_safe_mode(adev);
- adev->gfx.rlc.in_safe_mode = false;
+ adev->gfx.rlc.funcs->unset_safe_mode(adev, xcc_id);
+ adev->gfx.rlc.in_safe_mode[xcc_id] = false;
}
}
@@ -87,13 +89,14 @@ void amdgpu_gfx_rlc_exit_safe_mode(struct amdgpu_device *adev)
int amdgpu_gfx_rlc_init_sr(struct amdgpu_device *adev, u32 dws)
{
const u32 *src_ptr;
- volatile u32 *dst_ptr;
+ u32 *dst_ptr;
u32 i;
int r;
/* allocate save restore block */
r = amdgpu_bo_create_reserved(adev, dws * 4, PAGE_SIZE,
- AMDGPU_GEM_DOMAIN_VRAM,
+ AMDGPU_GEM_DOMAIN_VRAM |
+ AMDGPU_GEM_DOMAIN_GTT,
&adev->gfx.rlc.save_restore_obj,
&adev->gfx.rlc.save_restore_gpu_addr,
(void **)&adev->gfx.rlc.sr_ptr);
@@ -130,7 +133,8 @@ int amdgpu_gfx_rlc_init_csb(struct amdgpu_device *adev)
/* allocate clear state block */
adev->gfx.rlc.clear_state_size = dws = adev->gfx.rlc.funcs->get_csb_size(adev);
r = amdgpu_bo_create_kernel(adev, dws * 4, PAGE_SIZE,
- AMDGPU_GEM_DOMAIN_VRAM,
+ AMDGPU_GEM_DOMAIN_VRAM |
+ AMDGPU_GEM_DOMAIN_GTT,
&adev->gfx.rlc.clear_state_obj,
&adev->gfx.rlc.clear_state_gpu_addr,
(void **)&adev->gfx.rlc.cs_ptr);
@@ -156,7 +160,8 @@ int amdgpu_gfx_rlc_init_cpt(struct amdgpu_device *adev)
int r;
r = amdgpu_bo_create_reserved(adev, adev->gfx.rlc.cp_table_size,
- PAGE_SIZE, AMDGPU_GEM_DOMAIN_VRAM,
+ PAGE_SIZE, AMDGPU_GEM_DOMAIN_VRAM |
+ AMDGPU_GEM_DOMAIN_GTT,
&adev->gfx.rlc.cp_table_obj,
&adev->gfx.rlc.cp_table_gpu_addr,
(void **)&adev->gfx.rlc.cp_table_ptr);
@@ -184,7 +189,7 @@ int amdgpu_gfx_rlc_init_cpt(struct amdgpu_device *adev)
void amdgpu_gfx_rlc_setup_cp_table(struct amdgpu_device *adev)
{
const __le32 *fw_data;
- volatile u32 *dst_ptr;
+ u32 *dst_ptr;
int me, i, max_me;
u32 bo_offset = 0;
u32 table_offset, table_size;
@@ -236,7 +241,7 @@ void amdgpu_gfx_rlc_setup_cp_table(struct amdgpu_device *adev)
table_size = le32_to_cpu(hdr->jt_size);
}
- for (i = 0; i < table_size; i ++) {
+ for (i = 0; i < table_size; i++) {
dst_ptr[bo_offset + i] =
cpu_to_le32(le32_to_cpu(fw_data[table_offset + i]));
}
@@ -272,3 +277,274 @@ void amdgpu_gfx_rlc_fini(struct amdgpu_device *adev)
&adev->gfx.rlc.cp_table_gpu_addr,
(void **)&adev->gfx.rlc.cp_table_ptr);
}
+
+static int amdgpu_gfx_rlc_init_microcode_v2_0(struct amdgpu_device *adev)
+{
+ const struct common_firmware_header *common_hdr;
+ const struct rlc_firmware_header_v2_0 *rlc_hdr;
+ struct amdgpu_firmware_info *info;
+ unsigned int *tmp;
+ unsigned int i;
+
+ rlc_hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data;
+
+ adev->gfx.rlc_fw_version = le32_to_cpu(rlc_hdr->header.ucode_version);
+ adev->gfx.rlc_feature_version = le32_to_cpu(rlc_hdr->ucode_feature_version);
+ adev->gfx.rlc.save_and_restore_offset =
+ le32_to_cpu(rlc_hdr->save_and_restore_offset);
+ adev->gfx.rlc.clear_state_descriptor_offset =
+ le32_to_cpu(rlc_hdr->clear_state_descriptor_offset);
+ adev->gfx.rlc.avail_scratch_ram_locations =
+ le32_to_cpu(rlc_hdr->avail_scratch_ram_locations);
+ adev->gfx.rlc.reg_restore_list_size =
+ le32_to_cpu(rlc_hdr->reg_restore_list_size);
+ adev->gfx.rlc.reg_list_format_start =
+ le32_to_cpu(rlc_hdr->reg_list_format_start);
+ adev->gfx.rlc.reg_list_format_separate_start =
+ le32_to_cpu(rlc_hdr->reg_list_format_separate_start);
+ adev->gfx.rlc.starting_offsets_start =
+ le32_to_cpu(rlc_hdr->starting_offsets_start);
+ adev->gfx.rlc.reg_list_format_size_bytes =
+ le32_to_cpu(rlc_hdr->reg_list_format_size_bytes);
+ adev->gfx.rlc.reg_list_size_bytes =
+ le32_to_cpu(rlc_hdr->reg_list_size_bytes);
+ adev->gfx.rlc.register_list_format =
+ kmalloc(adev->gfx.rlc.reg_list_format_size_bytes +
+ adev->gfx.rlc.reg_list_size_bytes, GFP_KERNEL);
+ if (!adev->gfx.rlc.register_list_format) {
+ dev_err(adev->dev, "failed to allocate memory for rlc register_list_format\n");
+ return -ENOMEM;
+ }
+
+ tmp = (unsigned int *)((uintptr_t)rlc_hdr +
+ le32_to_cpu(rlc_hdr->reg_list_format_array_offset_bytes));
+ for (i = 0 ; i < (rlc_hdr->reg_list_format_size_bytes >> 2); i++)
+ adev->gfx.rlc.register_list_format[i] = le32_to_cpu(tmp[i]);
+
+ adev->gfx.rlc.register_restore = adev->gfx.rlc.register_list_format + i;
+
+ tmp = (unsigned int *)((uintptr_t)rlc_hdr +
+ le32_to_cpu(rlc_hdr->reg_list_array_offset_bytes));
+ for (i = 0 ; i < (rlc_hdr->reg_list_size_bytes >> 2); i++)
+ adev->gfx.rlc.register_restore[i] = le32_to_cpu(tmp[i]);
+
+ if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
+ info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_G];
+ info->ucode_id = AMDGPU_UCODE_ID_RLC_G;
+ info->fw = adev->gfx.rlc_fw;
+ if (info->fw) {
+ common_hdr = (const struct common_firmware_header *)info->fw->data;
+ adev->firmware.fw_size +=
+ ALIGN(le32_to_cpu(common_hdr->ucode_size_bytes), PAGE_SIZE);
+ }
+ }
+
+ return 0;
+}
+
+static void amdgpu_gfx_rlc_init_microcode_v2_1(struct amdgpu_device *adev)
+{
+ const struct rlc_firmware_header_v2_1 *rlc_hdr;
+ struct amdgpu_firmware_info *info;
+
+ rlc_hdr = (const struct rlc_firmware_header_v2_1 *)adev->gfx.rlc_fw->data;
+ adev->gfx.rlc_srlc_fw_version = le32_to_cpu(rlc_hdr->save_restore_list_cntl_ucode_ver);
+ adev->gfx.rlc_srlc_feature_version = le32_to_cpu(rlc_hdr->save_restore_list_cntl_feature_ver);
+ adev->gfx.rlc.save_restore_list_cntl_size_bytes = le32_to_cpu(rlc_hdr->save_restore_list_cntl_size_bytes);
+ adev->gfx.rlc.save_restore_list_cntl = (u8 *)rlc_hdr + le32_to_cpu(rlc_hdr->save_restore_list_cntl_offset_bytes);
+ adev->gfx.rlc_srlg_fw_version = le32_to_cpu(rlc_hdr->save_restore_list_gpm_ucode_ver);
+ adev->gfx.rlc_srlg_feature_version = le32_to_cpu(rlc_hdr->save_restore_list_gpm_feature_ver);
+ adev->gfx.rlc.save_restore_list_gpm_size_bytes = le32_to_cpu(rlc_hdr->save_restore_list_gpm_size_bytes);
+ adev->gfx.rlc.save_restore_list_gpm = (u8 *)rlc_hdr + le32_to_cpu(rlc_hdr->save_restore_list_gpm_offset_bytes);
+ adev->gfx.rlc_srls_fw_version = le32_to_cpu(rlc_hdr->save_restore_list_srm_ucode_ver);
+ adev->gfx.rlc_srls_feature_version = le32_to_cpu(rlc_hdr->save_restore_list_srm_feature_ver);
+ adev->gfx.rlc.save_restore_list_srm_size_bytes = le32_to_cpu(rlc_hdr->save_restore_list_srm_size_bytes);
+ adev->gfx.rlc.save_restore_list_srm = (u8 *)rlc_hdr + le32_to_cpu(rlc_hdr->save_restore_list_srm_offset_bytes);
+ adev->gfx.rlc.reg_list_format_direct_reg_list_length =
+ le32_to_cpu(rlc_hdr->reg_list_format_direct_reg_list_length);
+
+ if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
+ if (adev->gfx.rlc.save_restore_list_cntl_size_bytes) {
+ info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_RESTORE_LIST_CNTL];
+ info->ucode_id = AMDGPU_UCODE_ID_RLC_RESTORE_LIST_CNTL;
+ info->fw = adev->gfx.rlc_fw;
+ adev->firmware.fw_size +=
+ ALIGN(adev->gfx.rlc.save_restore_list_cntl_size_bytes, PAGE_SIZE);
+ }
+
+ if (adev->gfx.rlc.save_restore_list_gpm_size_bytes) {
+ info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_RESTORE_LIST_GPM_MEM];
+ info->ucode_id = AMDGPU_UCODE_ID_RLC_RESTORE_LIST_GPM_MEM;
+ info->fw = adev->gfx.rlc_fw;
+ adev->firmware.fw_size +=
+ ALIGN(adev->gfx.rlc.save_restore_list_gpm_size_bytes, PAGE_SIZE);
+ }
+
+ if (adev->gfx.rlc.save_restore_list_srm_size_bytes) {
+ info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_RESTORE_LIST_SRM_MEM];
+ info->ucode_id = AMDGPU_UCODE_ID_RLC_RESTORE_LIST_SRM_MEM;
+ info->fw = adev->gfx.rlc_fw;
+ adev->firmware.fw_size +=
+ ALIGN(adev->gfx.rlc.save_restore_list_srm_size_bytes, PAGE_SIZE);
+ }
+ }
+}
+
+static void amdgpu_gfx_rlc_init_microcode_v2_2(struct amdgpu_device *adev)
+{
+ const struct rlc_firmware_header_v2_2 *rlc_hdr;
+ struct amdgpu_firmware_info *info;
+
+ rlc_hdr = (const struct rlc_firmware_header_v2_2 *)adev->gfx.rlc_fw->data;
+ adev->gfx.rlc.rlc_iram_ucode_size_bytes = le32_to_cpu(rlc_hdr->rlc_iram_ucode_size_bytes);
+ adev->gfx.rlc.rlc_iram_ucode = (u8 *)rlc_hdr + le32_to_cpu(rlc_hdr->rlc_iram_ucode_offset_bytes);
+ adev->gfx.rlc.rlc_dram_ucode_size_bytes = le32_to_cpu(rlc_hdr->rlc_dram_ucode_size_bytes);
+ adev->gfx.rlc.rlc_dram_ucode = (u8 *)rlc_hdr + le32_to_cpu(rlc_hdr->rlc_dram_ucode_offset_bytes);
+
+ if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
+ if (adev->gfx.rlc.rlc_iram_ucode_size_bytes) {
+ info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_IRAM];
+ info->ucode_id = AMDGPU_UCODE_ID_RLC_IRAM;
+ info->fw = adev->gfx.rlc_fw;
+ adev->firmware.fw_size +=
+ ALIGN(adev->gfx.rlc.rlc_iram_ucode_size_bytes, PAGE_SIZE);
+ }
+
+ if (adev->gfx.rlc.rlc_dram_ucode_size_bytes) {
+ info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_DRAM];
+ info->ucode_id = AMDGPU_UCODE_ID_RLC_DRAM;
+ info->fw = adev->gfx.rlc_fw;
+ adev->firmware.fw_size +=
+ ALIGN(adev->gfx.rlc.rlc_dram_ucode_size_bytes, PAGE_SIZE);
+ }
+ }
+}
+
+static void amdgpu_gfx_rlc_init_microcode_v2_3(struct amdgpu_device *adev)
+{
+ const struct rlc_firmware_header_v2_3 *rlc_hdr;
+ struct amdgpu_firmware_info *info;
+
+ rlc_hdr = (const struct rlc_firmware_header_v2_3 *)adev->gfx.rlc_fw->data;
+ adev->gfx.rlcp_ucode_version = le32_to_cpu(rlc_hdr->rlcp_ucode_version);
+ adev->gfx.rlcp_ucode_feature_version = le32_to_cpu(rlc_hdr->rlcp_ucode_feature_version);
+ adev->gfx.rlc.rlcp_ucode_size_bytes = le32_to_cpu(rlc_hdr->rlcp_ucode_size_bytes);
+ adev->gfx.rlc.rlcp_ucode = (u8 *)rlc_hdr + le32_to_cpu(rlc_hdr->rlcp_ucode_offset_bytes);
+
+ adev->gfx.rlcv_ucode_version = le32_to_cpu(rlc_hdr->rlcv_ucode_version);
+ adev->gfx.rlcv_ucode_feature_version = le32_to_cpu(rlc_hdr->rlcv_ucode_feature_version);
+ adev->gfx.rlc.rlcv_ucode_size_bytes = le32_to_cpu(rlc_hdr->rlcv_ucode_size_bytes);
+ adev->gfx.rlc.rlcv_ucode = (u8 *)rlc_hdr + le32_to_cpu(rlc_hdr->rlcv_ucode_offset_bytes);
+
+ if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
+ if (adev->gfx.rlc.rlcp_ucode_size_bytes) {
+ info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_P];
+ info->ucode_id = AMDGPU_UCODE_ID_RLC_P;
+ info->fw = adev->gfx.rlc_fw;
+ adev->firmware.fw_size +=
+ ALIGN(adev->gfx.rlc.rlcp_ucode_size_bytes, PAGE_SIZE);
+ }
+
+ if (adev->gfx.rlc.rlcv_ucode_size_bytes) {
+ info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_V];
+ info->ucode_id = AMDGPU_UCODE_ID_RLC_V;
+ info->fw = adev->gfx.rlc_fw;
+ adev->firmware.fw_size +=
+ ALIGN(adev->gfx.rlc.rlcv_ucode_size_bytes, PAGE_SIZE);
+ }
+ }
+}
+
+static void amdgpu_gfx_rlc_init_microcode_v2_4(struct amdgpu_device *adev)
+{
+ const struct rlc_firmware_header_v2_4 *rlc_hdr;
+ struct amdgpu_firmware_info *info;
+
+ rlc_hdr = (const struct rlc_firmware_header_v2_4 *)adev->gfx.rlc_fw->data;
+ adev->gfx.rlc.global_tap_delays_ucode_size_bytes = le32_to_cpu(rlc_hdr->global_tap_delays_ucode_size_bytes);
+ adev->gfx.rlc.global_tap_delays_ucode = (u8 *)rlc_hdr + le32_to_cpu(rlc_hdr->global_tap_delays_ucode_offset_bytes);
+ adev->gfx.rlc.se0_tap_delays_ucode_size_bytes = le32_to_cpu(rlc_hdr->se0_tap_delays_ucode_size_bytes);
+ adev->gfx.rlc.se0_tap_delays_ucode = (u8 *)rlc_hdr + le32_to_cpu(rlc_hdr->se0_tap_delays_ucode_offset_bytes);
+ adev->gfx.rlc.se1_tap_delays_ucode_size_bytes = le32_to_cpu(rlc_hdr->se1_tap_delays_ucode_size_bytes);
+ adev->gfx.rlc.se1_tap_delays_ucode = (u8 *)rlc_hdr + le32_to_cpu(rlc_hdr->se1_tap_delays_ucode_offset_bytes);
+ adev->gfx.rlc.se2_tap_delays_ucode_size_bytes = le32_to_cpu(rlc_hdr->se2_tap_delays_ucode_size_bytes);
+ adev->gfx.rlc.se2_tap_delays_ucode = (u8 *)rlc_hdr + le32_to_cpu(rlc_hdr->se2_tap_delays_ucode_offset_bytes);
+ adev->gfx.rlc.se3_tap_delays_ucode_size_bytes = le32_to_cpu(rlc_hdr->se3_tap_delays_ucode_size_bytes);
+ adev->gfx.rlc.se3_tap_delays_ucode = (u8 *)rlc_hdr + le32_to_cpu(rlc_hdr->se3_tap_delays_ucode_offset_bytes);
+
+ if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
+ if (adev->gfx.rlc.global_tap_delays_ucode_size_bytes) {
+ info = &adev->firmware.ucode[AMDGPU_UCODE_ID_GLOBAL_TAP_DELAYS];
+ info->ucode_id = AMDGPU_UCODE_ID_GLOBAL_TAP_DELAYS;
+ info->fw = adev->gfx.rlc_fw;
+ adev->firmware.fw_size +=
+ ALIGN(adev->gfx.rlc.global_tap_delays_ucode_size_bytes, PAGE_SIZE);
+ }
+
+ if (adev->gfx.rlc.se0_tap_delays_ucode_size_bytes) {
+ info = &adev->firmware.ucode[AMDGPU_UCODE_ID_SE0_TAP_DELAYS];
+ info->ucode_id = AMDGPU_UCODE_ID_SE0_TAP_DELAYS;
+ info->fw = adev->gfx.rlc_fw;
+ adev->firmware.fw_size +=
+ ALIGN(adev->gfx.rlc.se0_tap_delays_ucode_size_bytes, PAGE_SIZE);
+ }
+
+ if (adev->gfx.rlc.se1_tap_delays_ucode_size_bytes) {
+ info = &adev->firmware.ucode[AMDGPU_UCODE_ID_SE1_TAP_DELAYS];
+ info->ucode_id = AMDGPU_UCODE_ID_SE1_TAP_DELAYS;
+ info->fw = adev->gfx.rlc_fw;
+ adev->firmware.fw_size +=
+ ALIGN(adev->gfx.rlc.se1_tap_delays_ucode_size_bytes, PAGE_SIZE);
+ }
+
+ if (adev->gfx.rlc.se2_tap_delays_ucode_size_bytes) {
+ info = &adev->firmware.ucode[AMDGPU_UCODE_ID_SE2_TAP_DELAYS];
+ info->ucode_id = AMDGPU_UCODE_ID_SE2_TAP_DELAYS;
+ info->fw = adev->gfx.rlc_fw;
+ adev->firmware.fw_size +=
+ ALIGN(adev->gfx.rlc.se2_tap_delays_ucode_size_bytes, PAGE_SIZE);
+ }
+
+ if (adev->gfx.rlc.se3_tap_delays_ucode_size_bytes) {
+ info = &adev->firmware.ucode[AMDGPU_UCODE_ID_SE3_TAP_DELAYS];
+ info->ucode_id = AMDGPU_UCODE_ID_SE3_TAP_DELAYS;
+ info->fw = adev->gfx.rlc_fw;
+ adev->firmware.fw_size +=
+ ALIGN(adev->gfx.rlc.se3_tap_delays_ucode_size_bytes, PAGE_SIZE);
+ }
+ }
+}
+
+int amdgpu_gfx_rlc_init_microcode(struct amdgpu_device *adev,
+ uint16_t version_major,
+ uint16_t version_minor)
+{
+ int err;
+
+ if (version_major < 2) {
+ /* only support rlc_hdr v2.x and onwards */
+ dev_err(adev->dev, "unsupported rlc fw hdr\n");
+ return -EINVAL;
+ }
+
+ /* is_rlc_v2_1 is still used in APU code path */
+ if (version_major == 2 && version_minor == 1)
+ adev->gfx.rlc.is_rlc_v2_1 = true;
+
+ err = amdgpu_gfx_rlc_init_microcode_v2_0(adev);
+ if (err) {
+ dev_err(adev->dev, "fail to init rlc v2_0 microcode\n");
+ return err;
+ }
+
+ if (version_minor >= 1)
+ amdgpu_gfx_rlc_init_microcode_v2_1(adev);
+ if (version_minor >= 2)
+ amdgpu_gfx_rlc_init_microcode_v2_2(adev);
+ if (version_minor == 3)
+ amdgpu_gfx_rlc_init_microcode_v2_3(adev);
+ if (version_minor == 4)
+ amdgpu_gfx_rlc_init_microcode_v2_4(adev);
+
+ return 0;
+}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_rlc.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_rlc.h
index aeaaae713c59..2ce310b31942 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_rlc.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_rlc.h
@@ -26,6 +26,8 @@
#include "clearstate_defs.h"
+#define AMDGPU_MAX_RLC_INSTANCES 8
+
/* firmware ID used in rlc toc */
typedef enum _FIRMWARE_ID_ {
FIRMWARE_ID_INVALID = 0,
@@ -69,6 +71,94 @@ typedef enum _FIRMWARE_ID_ {
FIRMWARE_ID_MAX = 38,
} FIRMWARE_ID;
+typedef enum _SOC21_FIRMWARE_ID_ {
+ SOC21_FIRMWARE_ID_INVALID = 0,
+ SOC21_FIRMWARE_ID_RLC_G_UCODE = 1,
+ SOC21_FIRMWARE_ID_RLC_TOC = 2,
+ SOC21_FIRMWARE_ID_RLCG_SCRATCH = 3,
+ SOC21_FIRMWARE_ID_RLC_SRM_ARAM = 4,
+ SOC21_FIRMWARE_ID_RLC_P_UCODE = 5,
+ SOC21_FIRMWARE_ID_RLC_V_UCODE = 6,
+ SOC21_FIRMWARE_ID_RLX6_UCODE = 7,
+ SOC21_FIRMWARE_ID_RLX6_UCODE_CORE1 = 8,
+ SOC21_FIRMWARE_ID_RLX6_DRAM_BOOT = 9,
+ SOC21_FIRMWARE_ID_RLX6_DRAM_BOOT_CORE1 = 10,
+ SOC21_FIRMWARE_ID_SDMA_UCODE_TH0 = 11,
+ SOC21_FIRMWARE_ID_SDMA_UCODE_TH1 = 12,
+ SOC21_FIRMWARE_ID_CP_PFP = 13,
+ SOC21_FIRMWARE_ID_CP_ME = 14,
+ SOC21_FIRMWARE_ID_CP_MEC = 15,
+ SOC21_FIRMWARE_ID_RS64_MES_P0 = 16,
+ SOC21_FIRMWARE_ID_RS64_MES_P1 = 17,
+ SOC21_FIRMWARE_ID_RS64_PFP = 18,
+ SOC21_FIRMWARE_ID_RS64_ME = 19,
+ SOC21_FIRMWARE_ID_RS64_MEC = 20,
+ SOC21_FIRMWARE_ID_RS64_MES_P0_STACK = 21,
+ SOC21_FIRMWARE_ID_RS64_MES_P1_STACK = 22,
+ SOC21_FIRMWARE_ID_RS64_PFP_P0_STACK = 23,
+ SOC21_FIRMWARE_ID_RS64_PFP_P1_STACK = 24,
+ SOC21_FIRMWARE_ID_RS64_ME_P0_STACK = 25,
+ SOC21_FIRMWARE_ID_RS64_ME_P1_STACK = 26,
+ SOC21_FIRMWARE_ID_RS64_MEC_P0_STACK = 27,
+ SOC21_FIRMWARE_ID_RS64_MEC_P1_STACK = 28,
+ SOC21_FIRMWARE_ID_RS64_MEC_P2_STACK = 29,
+ SOC21_FIRMWARE_ID_RS64_MEC_P3_STACK = 30,
+ SOC21_FIRMWARE_ID_RLC_SRM_DRAM_SR = 31,
+ SOC21_FIRMWARE_ID_RLCG_SCRATCH_SR = 32,
+ SOC21_FIRMWARE_ID_RLCP_SCRATCH_SR = 33,
+ SOC21_FIRMWARE_ID_RLCV_SCRATCH_SR = 34,
+ SOC21_FIRMWARE_ID_RLX6_DRAM_SR = 35,
+ SOC21_FIRMWARE_ID_RLX6_DRAM_SR_CORE1 = 36,
+ SOC21_FIRMWARE_ID_MAX = 37
+} SOC21_FIRMWARE_ID;
+
+typedef enum _SOC24_FIRMWARE_ID_ {
+ SOC24_FIRMWARE_ID_INVALID = 0,
+ SOC24_FIRMWARE_ID_RLC_G_UCODE = 1,
+ SOC24_FIRMWARE_ID_RLC_TOC = 2,
+ SOC24_FIRMWARE_ID_RLCG_SCRATCH = 3,
+ SOC24_FIRMWARE_ID_RLC_SRM_ARAM = 4,
+ SOC24_FIRMWARE_ID_RLC_P_UCODE = 5,
+ SOC24_FIRMWARE_ID_RLC_V_UCODE = 6,
+ SOC24_FIRMWARE_ID_RLX6_UCODE = 7,
+ SOC24_FIRMWARE_ID_RLX6_UCODE_CORE1 = 8,
+ SOC24_FIRMWARE_ID_RLX6_DRAM_BOOT = 9,
+ SOC24_FIRMWARE_ID_RLX6_DRAM_BOOT_CORE1 = 10,
+ SOC24_FIRMWARE_ID_SDMA_UCODE_TH0 = 11,
+ SOC24_FIRMWARE_ID_SDMA_UCODE_TH1 = 12,
+ SOC24_FIRMWARE_ID_CP_PFP = 13,
+ SOC24_FIRMWARE_ID_CP_ME = 14,
+ SOC24_FIRMWARE_ID_CP_MEC = 15,
+ SOC24_FIRMWARE_ID_RS64_MES_P0 = 16,
+ SOC24_FIRMWARE_ID_RS64_MES_P1 = 17,
+ SOC24_FIRMWARE_ID_RS64_PFP = 18,
+ SOC24_FIRMWARE_ID_RS64_ME = 19,
+ SOC24_FIRMWARE_ID_RS64_MEC = 20,
+ SOC24_FIRMWARE_ID_RS64_MES_P0_STACK = 21,
+ SOC24_FIRMWARE_ID_RS64_MES_P1_STACK = 22,
+ SOC24_FIRMWARE_ID_RS64_PFP_P0_STACK = 23,
+ SOC24_FIRMWARE_ID_RS64_PFP_P1_STACK = 24,
+ SOC24_FIRMWARE_ID_RS64_ME_P0_STACK = 25,
+ SOC24_FIRMWARE_ID_RS64_ME_P1_STACK = 26,
+ SOC24_FIRMWARE_ID_RS64_MEC_P0_STACK = 27,
+ SOC24_FIRMWARE_ID_RS64_MEC_P1_STACK = 28,
+ SOC24_FIRMWARE_ID_RS64_MEC_P2_STACK = 29,
+ SOC24_FIRMWARE_ID_RS64_MEC_P3_STACK = 30,
+ SOC24_FIRMWARE_ID_RLC_SRM_DRAM_SR = 31,
+ SOC24_FIRMWARE_ID_RLCG_SCRATCH_SR = 32,
+ SOC24_FIRMWARE_ID_RLCP_SCRATCH_SR = 33,
+ SOC24_FIRMWARE_ID_RLCV_SCRATCH_SR = 34,
+ SOC24_FIRMWARE_ID_RLX6_DRAM_SR = 35,
+ SOC24_FIRMWARE_ID_RLX6_DRAM_SR_CORE1 = 36,
+ SOC24_FIRMWARE_ID_RLCDEBUGLOG = 37,
+ SOC24_FIRMWARE_ID_SRIOV_DEBUG = 38,
+ SOC24_FIRMWARE_ID_SRIOV_CSA_RLC = 39,
+ SOC24_FIRMWARE_ID_SRIOV_CSA_SDMA = 40,
+ SOC24_FIRMWARE_ID_SRIOV_CSA_CP = 41,
+ SOC24_FIRMWARE_ID_UMF_ZONE_PAD = 42,
+ SOC24_FIRMWARE_ID_MAX = 43
+} SOC24_FIRMWARE_ID;
+
typedef struct _RLC_TABLE_OF_CONTENT {
union {
unsigned int DW0;
@@ -112,46 +202,96 @@ typedef struct _RLC_TABLE_OF_CONTENT {
};
} RLC_TABLE_OF_CONTENT;
+typedef struct _RLC_TABLE_OF_CONTENT_V2 {
+ union {
+ unsigned int DW0;
+ struct {
+ uint32_t offset : 25;
+ uint32_t id : 7;
+ };
+ };
+
+ union {
+ unsigned int DW1;
+ struct {
+ uint32_t reserved0 : 1;
+ uint32_t reserved1 : 1;
+ uint32_t reserved2 : 1;
+ uint32_t memory_destination : 2;
+ uint32_t vfflr_image_code : 4;
+ uint32_t reserved9 : 1;
+ uint32_t reserved10 : 1;
+ uint32_t reserved11 : 1;
+ uint32_t size_x16 : 1;
+ uint32_t reserved13 : 1;
+ uint32_t size : 18;
+ };
+ };
+} RLC_TABLE_OF_CONTENT_V2;
+
#define RLC_TOC_MAX_SIZE 64
struct amdgpu_rlc_funcs {
bool (*is_rlc_enabled)(struct amdgpu_device *adev);
- void (*set_safe_mode)(struct amdgpu_device *adev);
- void (*unset_safe_mode)(struct amdgpu_device *adev);
+ void (*set_safe_mode)(struct amdgpu_device *adev, int xcc_id);
+ void (*unset_safe_mode)(struct amdgpu_device *adev, int xcc_id);
int (*init)(struct amdgpu_device *adev);
u32 (*get_csb_size)(struct amdgpu_device *adev);
- void (*get_csb_buffer)(struct amdgpu_device *adev, volatile u32 *buffer);
+
+ /**
+ * @get_csb_buffer: Get the clear state to be put into the hardware.
+ *
+ * The parameter adev is used to get the CS data and other gfx info,
+ * and buffer is the RLC CS pointer
+ *
+ * Sometimes, the user space puts a request to clear the state in the
+ * command buffer; this function provides the clear state that gets put
+ * into the hardware. Note that the driver programs Clear State
+ * Indirect Buffer (CSB) explicitly when it sets up the kernel rings,
+ * and it also provides a pointer to it which is used by the firmware
+ * to load the clear state in some cases.
+ */
+ void (*get_csb_buffer)(struct amdgpu_device *adev, u32 *buffer);
int (*get_cp_table_num)(struct amdgpu_device *adev);
int (*resume)(struct amdgpu_device *adev);
void (*stop)(struct amdgpu_device *adev);
void (*reset)(struct amdgpu_device *adev);
void (*start)(struct amdgpu_device *adev);
- void (*update_spm_vmid)(struct amdgpu_device *adev, unsigned vmid);
- void (*rlcg_wreg)(struct amdgpu_device *adev, u32 offset, u32 v);
+ void (*update_spm_vmid)(struct amdgpu_device *adev, struct amdgpu_ring *ring, unsigned vmid);
bool (*is_rlcg_access_range)(struct amdgpu_device *adev, uint32_t reg);
};
+struct amdgpu_rlcg_reg_access_ctrl {
+ uint32_t scratch_reg0;
+ uint32_t scratch_reg1;
+ uint32_t scratch_reg2;
+ uint32_t scratch_reg3;
+ uint32_t grbm_cntl;
+ uint32_t grbm_idx;
+ uint32_t spare_int;
+};
+
struct amdgpu_rlc {
/* for power gating */
struct amdgpu_bo *save_restore_obj;
uint64_t save_restore_gpu_addr;
- volatile uint32_t *sr_ptr;
+ uint32_t *sr_ptr;
const u32 *reg_list;
u32 reg_list_size;
/* for clear state */
struct amdgpu_bo *clear_state_obj;
uint64_t clear_state_gpu_addr;
- volatile uint32_t *cs_ptr;
+ uint32_t *cs_ptr;
const struct cs_section_def *cs_data;
u32 clear_state_size;
/* for cp tables */
struct amdgpu_bo *cp_table_obj;
uint64_t cp_table_gpu_addr;
- volatile uint32_t *cp_table_ptr;
+ uint32_t *cp_table_ptr;
u32 cp_table_size;
/* safe mode for updating CG/PG state */
- bool in_safe_mode;
+ bool in_safe_mode[AMDGPU_MAX_RLC_INSTANCES];
const struct amdgpu_rlc_funcs *funcs;
/* for firmware data */
@@ -170,6 +310,13 @@ struct amdgpu_rlc {
u32 save_restore_list_srm_size_bytes;
u32 rlc_iram_ucode_size_bytes;
u32 rlc_dram_ucode_size_bytes;
+ u32 rlcp_ucode_size_bytes;
+ u32 rlcv_ucode_size_bytes;
+ u32 global_tap_delays_ucode_size_bytes;
+ u32 se0_tap_delays_ucode_size_bytes;
+ u32 se1_tap_delays_ucode_size_bytes;
+ u32 se2_tap_delays_ucode_size_bytes;
+ u32 se3_tap_delays_ucode_size_bytes;
u32 *register_list_format;
u32 *register_restore;
@@ -178,6 +325,13 @@ struct amdgpu_rlc {
u8 *save_restore_list_srm;
u8 *rlc_iram_ucode;
u8 *rlc_dram_ucode;
+ u8 *rlcp_ucode;
+ u8 *rlcv_ucode;
+ u8 *global_tap_delays_ucode;
+ u8 *se0_tap_delays_ucode;
+ u8 *se1_tap_delays_ucode;
+ u8 *se2_tap_delays_ucode;
+ u8 *se3_tap_delays_ucode;
bool is_rlc_v2_1;
@@ -190,14 +344,20 @@ struct amdgpu_rlc {
struct amdgpu_bo *rlc_toc_bo;
uint64_t rlc_toc_gpu_addr;
void *rlc_toc_buf;
+
+ bool rlcg_reg_access_supported;
+ /* registers for rlcg indirect reg access */
+ struct amdgpu_rlcg_reg_access_ctrl reg_access_ctrl[AMDGPU_MAX_RLC_INSTANCES];
};
-void amdgpu_gfx_rlc_enter_safe_mode(struct amdgpu_device *adev);
-void amdgpu_gfx_rlc_exit_safe_mode(struct amdgpu_device *adev);
+void amdgpu_gfx_rlc_enter_safe_mode(struct amdgpu_device *adev, int xcc_id);
+void amdgpu_gfx_rlc_exit_safe_mode(struct amdgpu_device *adev, int xcc_id);
int amdgpu_gfx_rlc_init_sr(struct amdgpu_device *adev, u32 dws);
int amdgpu_gfx_rlc_init_csb(struct amdgpu_device *adev);
int amdgpu_gfx_rlc_init_cpt(struct amdgpu_device *adev);
void amdgpu_gfx_rlc_setup_cp_table(struct amdgpu_device *adev);
void amdgpu_gfx_rlc_fini(struct amdgpu_device *adev);
-
+int amdgpu_gfx_rlc_init_microcode(struct amdgpu_device *adev,
+ uint16_t version_major,
+ uint16_t version_minor);
#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sa.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_sa.c
index 524d10b21041..39070b2a4c04 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sa.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sa.c
@@ -44,327 +44,62 @@
#include "amdgpu.h"
-static void amdgpu_sa_bo_remove_locked(struct amdgpu_sa_bo *sa_bo);
-static void amdgpu_sa_bo_try_free(struct amdgpu_sa_manager *sa_manager);
-
int amdgpu_sa_bo_manager_init(struct amdgpu_device *adev,
struct amdgpu_sa_manager *sa_manager,
- unsigned size, u32 align, u32 domain)
+ unsigned int size, u32 suballoc_align, u32 domain)
{
- int i, r;
-
- init_waitqueue_head(&sa_manager->wq);
- sa_manager->bo = NULL;
- sa_manager->size = size;
- sa_manager->domain = domain;
- sa_manager->align = align;
- sa_manager->hole = &sa_manager->olist;
- INIT_LIST_HEAD(&sa_manager->olist);
- for (i = 0; i < AMDGPU_SA_NUM_FENCE_LISTS; ++i)
- INIT_LIST_HEAD(&sa_manager->flist[i]);
+ int r;
- r = amdgpu_bo_create_kernel(adev, size, align, domain, &sa_manager->bo,
- &sa_manager->gpu_addr, &sa_manager->cpu_ptr);
+ r = amdgpu_bo_create_kernel(adev, size, AMDGPU_GPU_PAGE_SIZE, domain,
+ &sa_manager->bo, &sa_manager->gpu_addr,
+ &sa_manager->cpu_ptr);
if (r) {
dev_err(adev->dev, "(%d) failed to allocate bo for manager\n", r);
return r;
}
- memset(sa_manager->cpu_ptr, 0, sa_manager->size);
+ memset(sa_manager->cpu_ptr, 0, size);
+ drm_suballoc_manager_init(&sa_manager->base, size, suballoc_align);
return r;
}
void amdgpu_sa_bo_manager_fini(struct amdgpu_device *adev,
struct amdgpu_sa_manager *sa_manager)
{
- struct amdgpu_sa_bo *sa_bo, *tmp;
-
if (sa_manager->bo == NULL) {
dev_err(adev->dev, "no bo for sa manager\n");
return;
}
- if (!list_empty(&sa_manager->olist)) {
- sa_manager->hole = &sa_manager->olist,
- amdgpu_sa_bo_try_free(sa_manager);
- if (!list_empty(&sa_manager->olist)) {
- dev_err(adev->dev, "sa_manager is not empty, clearing anyway\n");
- }
- }
- list_for_each_entry_safe(sa_bo, tmp, &sa_manager->olist, olist) {
- amdgpu_sa_bo_remove_locked(sa_bo);
- }
+ drm_suballoc_manager_fini(&sa_manager->base);
amdgpu_bo_free_kernel(&sa_manager->bo, &sa_manager->gpu_addr, &sa_manager->cpu_ptr);
- sa_manager->size = 0;
}
-static void amdgpu_sa_bo_remove_locked(struct amdgpu_sa_bo *sa_bo)
-{
- struct amdgpu_sa_manager *sa_manager = sa_bo->manager;
- if (sa_manager->hole == &sa_bo->olist) {
- sa_manager->hole = sa_bo->olist.prev;
- }
- list_del_init(&sa_bo->olist);
- list_del_init(&sa_bo->flist);
- dma_fence_put(sa_bo->fence);
- kfree(sa_bo);
-}
-
-static void amdgpu_sa_bo_try_free(struct amdgpu_sa_manager *sa_manager)
+int amdgpu_sa_bo_new(struct amdgpu_sa_manager *sa_manager,
+ struct drm_suballoc **sa_bo,
+ unsigned int size)
{
- struct amdgpu_sa_bo *sa_bo, *tmp;
+ struct drm_suballoc *sa = drm_suballoc_new(&sa_manager->base, size,
+ GFP_KERNEL, false, 0);
- if (sa_manager->hole->next == &sa_manager->olist)
- return;
+ if (IS_ERR(sa)) {
+ *sa_bo = NULL;
- sa_bo = list_entry(sa_manager->hole->next, struct amdgpu_sa_bo, olist);
- list_for_each_entry_safe_from(sa_bo, tmp, &sa_manager->olist, olist) {
- if (sa_bo->fence == NULL ||
- !dma_fence_is_signaled(sa_bo->fence)) {
- return;
- }
- amdgpu_sa_bo_remove_locked(sa_bo);
+ return PTR_ERR(sa);
}
-}
-static inline unsigned amdgpu_sa_bo_hole_soffset(struct amdgpu_sa_manager *sa_manager)
-{
- struct list_head *hole = sa_manager->hole;
-
- if (hole != &sa_manager->olist) {
- return list_entry(hole, struct amdgpu_sa_bo, olist)->eoffset;
- }
+ *sa_bo = sa;
return 0;
}
-static inline unsigned amdgpu_sa_bo_hole_eoffset(struct amdgpu_sa_manager *sa_manager)
-{
- struct list_head *hole = sa_manager->hole;
-
- if (hole->next != &sa_manager->olist) {
- return list_entry(hole->next, struct amdgpu_sa_bo, olist)->soffset;
- }
- return sa_manager->size;
-}
-
-static bool amdgpu_sa_bo_try_alloc(struct amdgpu_sa_manager *sa_manager,
- struct amdgpu_sa_bo *sa_bo,
- unsigned size, unsigned align)
+void amdgpu_sa_bo_free(struct drm_suballoc **sa_bo, struct dma_fence *fence)
{
- unsigned soffset, eoffset, wasted;
-
- soffset = amdgpu_sa_bo_hole_soffset(sa_manager);
- eoffset = amdgpu_sa_bo_hole_eoffset(sa_manager);
- wasted = (align - (soffset % align)) % align;
-
- if ((eoffset - soffset) >= (size + wasted)) {
- soffset += wasted;
-
- sa_bo->manager = sa_manager;
- sa_bo->soffset = soffset;
- sa_bo->eoffset = soffset + size;
- list_add(&sa_bo->olist, sa_manager->hole);
- INIT_LIST_HEAD(&sa_bo->flist);
- sa_manager->hole = &sa_bo->olist;
- return true;
- }
- return false;
-}
-
-/**
- * amdgpu_sa_event - Check if we can stop waiting
- *
- * @sa_manager: pointer to the sa_manager
- * @size: number of bytes we want to allocate
- * @align: alignment we need to match
- *
- * Check if either there is a fence we can wait for or
- * enough free memory to satisfy the allocation directly
- */
-static bool amdgpu_sa_event(struct amdgpu_sa_manager *sa_manager,
- unsigned size, unsigned align)
-{
- unsigned soffset, eoffset, wasted;
- int i;
-
- for (i = 0; i < AMDGPU_SA_NUM_FENCE_LISTS; ++i)
- if (!list_empty(&sa_manager->flist[i]))
- return true;
-
- soffset = amdgpu_sa_bo_hole_soffset(sa_manager);
- eoffset = amdgpu_sa_bo_hole_eoffset(sa_manager);
- wasted = (align - (soffset % align)) % align;
-
- if ((eoffset - soffset) >= (size + wasted)) {
- return true;
- }
-
- return false;
-}
-
-static bool amdgpu_sa_bo_next_hole(struct amdgpu_sa_manager *sa_manager,
- struct dma_fence **fences,
- unsigned *tries)
-{
- struct amdgpu_sa_bo *best_bo = NULL;
- unsigned i, soffset, best, tmp;
-
- /* if hole points to the end of the buffer */
- if (sa_manager->hole->next == &sa_manager->olist) {
- /* try again with its beginning */
- sa_manager->hole = &sa_manager->olist;
- return true;
- }
-
- soffset = amdgpu_sa_bo_hole_soffset(sa_manager);
- /* to handle wrap around we add sa_manager->size */
- best = sa_manager->size * 2;
- /* go over all fence list and try to find the closest sa_bo
- * of the current last
- */
- for (i = 0; i < AMDGPU_SA_NUM_FENCE_LISTS; ++i) {
- struct amdgpu_sa_bo *sa_bo;
-
- fences[i] = NULL;
-
- if (list_empty(&sa_manager->flist[i]))
- continue;
-
- sa_bo = list_first_entry(&sa_manager->flist[i],
- struct amdgpu_sa_bo, flist);
-
- if (!dma_fence_is_signaled(sa_bo->fence)) {
- fences[i] = sa_bo->fence;
- continue;
- }
-
- /* limit the number of tries each ring gets */
- if (tries[i] > 2) {
- continue;
- }
-
- tmp = sa_bo->soffset;
- if (tmp < soffset) {
- /* wrap around, pretend it's after */
- tmp += sa_manager->size;
- }
- tmp -= soffset;
- if (tmp < best) {
- /* this sa bo is the closest one */
- best = tmp;
- best_bo = sa_bo;
- }
- }
-
- if (best_bo) {
- uint32_t idx = best_bo->fence->context;
-
- idx %= AMDGPU_SA_NUM_FENCE_LISTS;
- ++tries[idx];
- sa_manager->hole = best_bo->olist.prev;
-
- /* we knew that this one is signaled,
- so it's save to remote it */
- amdgpu_sa_bo_remove_locked(best_bo);
- return true;
- }
- return false;
-}
-
-int amdgpu_sa_bo_new(struct amdgpu_sa_manager *sa_manager,
- struct amdgpu_sa_bo **sa_bo,
- unsigned size, unsigned align)
-{
- struct dma_fence *fences[AMDGPU_SA_NUM_FENCE_LISTS];
- unsigned tries[AMDGPU_SA_NUM_FENCE_LISTS];
- unsigned count;
- int i, r;
- signed long t;
-
- if (WARN_ON_ONCE(align > sa_manager->align))
- return -EINVAL;
-
- if (WARN_ON_ONCE(size > sa_manager->size))
- return -EINVAL;
-
- *sa_bo = kmalloc(sizeof(struct amdgpu_sa_bo), GFP_KERNEL);
- if (!(*sa_bo))
- return -ENOMEM;
- (*sa_bo)->manager = sa_manager;
- (*sa_bo)->fence = NULL;
- INIT_LIST_HEAD(&(*sa_bo)->olist);
- INIT_LIST_HEAD(&(*sa_bo)->flist);
-
- spin_lock(&sa_manager->wq.lock);
- do {
- for (i = 0; i < AMDGPU_SA_NUM_FENCE_LISTS; ++i)
- tries[i] = 0;
-
- do {
- amdgpu_sa_bo_try_free(sa_manager);
-
- if (amdgpu_sa_bo_try_alloc(sa_manager, *sa_bo,
- size, align)) {
- spin_unlock(&sa_manager->wq.lock);
- return 0;
- }
-
- /* see if we can skip over some allocations */
- } while (amdgpu_sa_bo_next_hole(sa_manager, fences, tries));
-
- for (i = 0, count = 0; i < AMDGPU_SA_NUM_FENCE_LISTS; ++i)
- if (fences[i])
- fences[count++] = dma_fence_get(fences[i]);
-
- if (count) {
- spin_unlock(&sa_manager->wq.lock);
- t = dma_fence_wait_any_timeout(fences, count, false,
- MAX_SCHEDULE_TIMEOUT,
- NULL);
- for (i = 0; i < count; ++i)
- dma_fence_put(fences[i]);
-
- r = (t > 0) ? 0 : t;
- spin_lock(&sa_manager->wq.lock);
- } else {
- /* if we have nothing to wait for block */
- r = wait_event_interruptible_locked(
- sa_manager->wq,
- amdgpu_sa_event(sa_manager, size, align)
- );
- }
-
- } while (!r);
-
- spin_unlock(&sa_manager->wq.lock);
- kfree(*sa_bo);
- *sa_bo = NULL;
- return r;
-}
-
-void amdgpu_sa_bo_free(struct amdgpu_device *adev, struct amdgpu_sa_bo **sa_bo,
- struct dma_fence *fence)
-{
- struct amdgpu_sa_manager *sa_manager;
-
if (sa_bo == NULL || *sa_bo == NULL) {
return;
}
- sa_manager = (*sa_bo)->manager;
- spin_lock(&sa_manager->wq.lock);
- if (fence && !dma_fence_is_signaled(fence)) {
- uint32_t idx;
-
- (*sa_bo)->fence = dma_fence_get(fence);
- idx = fence->context % AMDGPU_SA_NUM_FENCE_LISTS;
- list_add_tail(&(*sa_bo)->flist, &sa_manager->flist[idx]);
- } else {
- amdgpu_sa_bo_remove_locked(*sa_bo);
- }
- wake_up_all_locked(&sa_manager->wq);
- spin_unlock(&sa_manager->wq.lock);
+ drm_suballoc_free(*sa_bo, fence);
*sa_bo = NULL;
}
@@ -373,26 +108,8 @@ void amdgpu_sa_bo_free(struct amdgpu_device *adev, struct amdgpu_sa_bo **sa_bo,
void amdgpu_sa_bo_dump_debug_info(struct amdgpu_sa_manager *sa_manager,
struct seq_file *m)
{
- struct amdgpu_sa_bo *i;
-
- spin_lock(&sa_manager->wq.lock);
- list_for_each_entry(i, &sa_manager->olist, olist) {
- uint64_t soffset = i->soffset + sa_manager->gpu_addr;
- uint64_t eoffset = i->eoffset + sa_manager->gpu_addr;
- if (&i->olist == sa_manager->hole) {
- seq_printf(m, ">");
- } else {
- seq_printf(m, " ");
- }
- seq_printf(m, "[0x%010llx 0x%010llx] size %8lld",
- soffset, eoffset, eoffset - soffset);
+ struct drm_printer p = drm_seq_file_printer(m);
- if (i->fence)
- seq_printf(m, " protected by 0x%016llx on context %llu",
- i->fence->seqno, i->fence->context);
-
- seq_printf(m, "\n");
- }
- spin_unlock(&sa_manager->wq.lock);
+ drm_suballoc_dump_debug_info(&sa_manager->base, &p, sa_manager->gpu_addr);
}
#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sched.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_sched.c
index b7d861ed5284..341beec59537 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sched.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sched.c
@@ -22,7 +22,6 @@
* Authors: Andres Rodriguez <andresx7@gmail.com>
*/
-#include <linux/fdtable.h>
#include <linux/file.h>
#include <linux/pid.h>
@@ -32,90 +31,57 @@
#include "amdgpu_sched.h"
#include "amdgpu_vm.h"
-int amdgpu_to_sched_priority(int amdgpu_priority,
- enum drm_sched_priority *prio)
-{
- switch (amdgpu_priority) {
- case AMDGPU_CTX_PRIORITY_VERY_HIGH:
- *prio = DRM_SCHED_PRIORITY_HIGH;
- break;
- case AMDGPU_CTX_PRIORITY_HIGH:
- *prio = DRM_SCHED_PRIORITY_HIGH;
- break;
- case AMDGPU_CTX_PRIORITY_NORMAL:
- *prio = DRM_SCHED_PRIORITY_NORMAL;
- break;
- case AMDGPU_CTX_PRIORITY_LOW:
- case AMDGPU_CTX_PRIORITY_VERY_LOW:
- *prio = DRM_SCHED_PRIORITY_MIN;
- break;
- case AMDGPU_CTX_PRIORITY_UNSET:
- *prio = DRM_SCHED_PRIORITY_UNSET;
- break;
- default:
- WARN(1, "Invalid context priority %d\n", amdgpu_priority);
- return -EINVAL;
- }
-
- return 0;
-}
-
static int amdgpu_sched_process_priority_override(struct amdgpu_device *adev,
int fd,
- enum drm_sched_priority priority)
+ int32_t priority)
{
- struct fd f = fdget(fd);
+ CLASS(fd, f)(fd);
struct amdgpu_fpriv *fpriv;
+ struct amdgpu_ctx_mgr *mgr;
struct amdgpu_ctx *ctx;
uint32_t id;
int r;
- if (!f.file)
+ if (fd_empty(f))
return -EINVAL;
- r = amdgpu_file_to_fpriv(f.file, &fpriv);
- if (r) {
- fdput(f);
+ r = amdgpu_file_to_fpriv(fd_file(f), &fpriv);
+ if (r)
return r;
- }
- idr_for_each_entry(&fpriv->ctx_mgr.ctx_handles, ctx, id)
+ mgr = &fpriv->ctx_mgr;
+ mutex_lock(&mgr->lock);
+ idr_for_each_entry(&mgr->ctx_handles, ctx, id)
amdgpu_ctx_priority_override(ctx, priority);
+ mutex_unlock(&mgr->lock);
- fdput(f);
return 0;
}
static int amdgpu_sched_context_priority_override(struct amdgpu_device *adev,
int fd,
unsigned ctx_id,
- enum drm_sched_priority priority)
+ int32_t priority)
{
- struct fd f = fdget(fd);
+ CLASS(fd, f)(fd);
struct amdgpu_fpriv *fpriv;
struct amdgpu_ctx *ctx;
int r;
- if (!f.file)
+ if (fd_empty(f))
return -EINVAL;
- r = amdgpu_file_to_fpriv(f.file, &fpriv);
- if (r) {
- fdput(f);
+ r = amdgpu_file_to_fpriv(fd_file(f), &fpriv);
+ if (r)
return r;
- }
ctx = amdgpu_ctx_get(fpriv, ctx_id);
- if (!ctx) {
- fdput(f);
+ if (!ctx)
return -EINVAL;
- }
amdgpu_ctx_priority_override(ctx, priority);
amdgpu_ctx_put(ctx);
- fdput(f);
-
return 0;
}
@@ -124,7 +90,6 @@ int amdgpu_sched_ioctl(struct drm_device *dev, void *data,
{
union drm_amdgpu_sched *args = data;
struct amdgpu_device *adev = drm_to_adev(dev);
- enum drm_sched_priority priority;
int r;
/* First check the op, then the op's argument.
@@ -138,21 +103,22 @@ int amdgpu_sched_ioctl(struct drm_device *dev, void *data,
return -EINVAL;
}
- r = amdgpu_to_sched_priority(args->in.priority, &priority);
- if (r)
- return r;
+ if (!amdgpu_ctx_priority_is_valid(args->in.priority)) {
+ WARN(1, "Invalid context priority %d\n", args->in.priority);
+ return -EINVAL;
+ }
switch (args->in.op) {
case AMDGPU_SCHED_OP_PROCESS_PRIORITY_OVERRIDE:
r = amdgpu_sched_process_priority_override(adev,
args->in.fd,
- priority);
+ args->in.priority);
break;
case AMDGPU_SCHED_OP_CONTEXT_PRIORITY_OVERRIDE:
r = amdgpu_sched_context_priority_override(adev,
args->in.fd,
args->in.ctx_id,
- priority);
+ args->in.priority);
break;
default:
/* Impossible.
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.c
index de91d29c9d96..8b8a04138711 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.c
@@ -21,9 +21,13 @@
*
*/
+#include <linux/firmware.h>
#include "amdgpu.h"
#include "amdgpu_sdma.h"
#include "amdgpu_ras.h"
+#include "amdgpu_reset.h"
+#include "gc/gc_10_1_0_offset.h"
+#include "gc/gc_10_3_0_sh_mask.h"
#define AMDGPU_CSA_SDMA_SIZE 64
/* SDMA CSA reside in the 3rd page of CSA */
@@ -63,7 +67,7 @@ int amdgpu_sdma_get_index_from_ring(struct amdgpu_ring *ring, uint32_t *index)
}
uint64_t amdgpu_sdma_get_csa_mc_addr(struct amdgpu_ring *ring,
- unsigned vmid)
+ unsigned int vmid)
{
struct amdgpu_device *adev = ring->adev;
uint64_t csa_mc_addr;
@@ -71,7 +75,7 @@ uint64_t amdgpu_sdma_get_csa_mc_addr(struct amdgpu_ring *ring,
int r;
/* don't enable OS preemption on SDMA under SRIOV */
- if (amdgpu_sriov_vf(adev) || vmid == 0 || !amdgpu_mcbp)
+ if (amdgpu_sriov_vf(adev) || vmid == 0 || !adev->gfx.mcbp)
return 0;
r = amdgpu_sdma_get_index_from_ring(ring, &index);
@@ -87,79 +91,39 @@ uint64_t amdgpu_sdma_get_csa_mc_addr(struct amdgpu_ring *ring,
}
int amdgpu_sdma_ras_late_init(struct amdgpu_device *adev,
- void *ras_ih_info)
+ struct ras_common_if *ras_block)
{
int r, i;
- struct ras_ih_if *ih_info = (struct ras_ih_if *)ras_ih_info;
- struct ras_fs_if fs_info = {
- .sysfs_name = "sdma_err_count",
- };
-
- if (!ih_info)
- return -EINVAL;
-
- if (!adev->sdma.ras_if) {
- adev->sdma.ras_if = kmalloc(sizeof(struct ras_common_if), GFP_KERNEL);
- if (!adev->sdma.ras_if)
- return -ENOMEM;
- adev->sdma.ras_if->block = AMDGPU_RAS_BLOCK__SDMA;
- adev->sdma.ras_if->type = AMDGPU_RAS_ERROR__MULTI_UNCORRECTABLE;
- adev->sdma.ras_if->sub_block_index = 0;
- strcpy(adev->sdma.ras_if->name, "sdma");
- }
- fs_info.head = ih_info->head = *adev->sdma.ras_if;
- r = amdgpu_ras_late_init(adev, adev->sdma.ras_if,
- &fs_info, ih_info);
+ r = amdgpu_ras_block_late_init(adev, ras_block);
if (r)
- goto free;
+ return r;
- if (amdgpu_ras_is_supported(adev, adev->sdma.ras_if->block)) {
+ if (amdgpu_ras_is_supported(adev, ras_block->block)) {
for (i = 0; i < adev->sdma.num_instances; i++) {
r = amdgpu_irq_get(adev, &adev->sdma.ecc_irq,
AMDGPU_SDMA_IRQ_INSTANCE0 + i);
if (r)
goto late_fini;
}
- } else {
- r = 0;
- goto free;
}
return 0;
late_fini:
- amdgpu_ras_late_fini(adev, adev->sdma.ras_if, ih_info);
-free:
- kfree(adev->sdma.ras_if);
- adev->sdma.ras_if = NULL;
+ amdgpu_ras_block_late_fini(adev, ras_block);
return r;
}
-void amdgpu_sdma_ras_fini(struct amdgpu_device *adev)
-{
- if (amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__SDMA) &&
- adev->sdma.ras_if) {
- struct ras_common_if *ras_if = adev->sdma.ras_if;
- struct ras_ih_if ih_info = {
- .head = *ras_if,
- /* the cb member will not be used by
- * amdgpu_ras_interrupt_remove_handler, init it only
- * to cheat the check in ras_late_fini
- */
- .cb = amdgpu_sdma_process_ras_data_cb,
- };
-
- amdgpu_ras_late_fini(adev, ras_if, &ih_info);
- kfree(ras_if);
- }
-}
-
int amdgpu_sdma_process_ras_data_cb(struct amdgpu_device *adev,
void *err_data,
struct amdgpu_iv_entry *entry)
{
kgd2kfd_set_sram_ecc_flag(adev->kfd.dev);
+
+ if (amdgpu_sriov_vf(adev))
+ return AMDGPU_RAS_SUCCESS;
+
amdgpu_ras_reset_gpu(adev);
return AMDGPU_RAS_SUCCESS;
@@ -182,3 +146,466 @@ int amdgpu_sdma_process_ecc_irq(struct amdgpu_device *adev,
amdgpu_ras_interrupt_dispatch(adev, &ih_data);
return 0;
}
+
+static int amdgpu_sdma_init_inst_ctx(struct amdgpu_sdma_instance *sdma_inst)
+{
+ uint16_t version_major;
+ const struct common_firmware_header *header = NULL;
+ const struct sdma_firmware_header_v1_0 *hdr;
+ const struct sdma_firmware_header_v2_0 *hdr_v2;
+ const struct sdma_firmware_header_v3_0 *hdr_v3;
+
+ header = (const struct common_firmware_header *)
+ sdma_inst->fw->data;
+ version_major = le16_to_cpu(header->header_version_major);
+
+ switch (version_major) {
+ case 1:
+ hdr = (const struct sdma_firmware_header_v1_0 *)sdma_inst->fw->data;
+ sdma_inst->fw_version = le32_to_cpu(hdr->header.ucode_version);
+ sdma_inst->feature_version = le32_to_cpu(hdr->ucode_feature_version);
+ break;
+ case 2:
+ hdr_v2 = (const struct sdma_firmware_header_v2_0 *)sdma_inst->fw->data;
+ sdma_inst->fw_version = le32_to_cpu(hdr_v2->header.ucode_version);
+ sdma_inst->feature_version = le32_to_cpu(hdr_v2->ucode_feature_version);
+ break;
+ case 3:
+ hdr_v3 = (const struct sdma_firmware_header_v3_0 *)sdma_inst->fw->data;
+ sdma_inst->fw_version = le32_to_cpu(hdr_v3->header.ucode_version);
+ sdma_inst->feature_version = le32_to_cpu(hdr_v3->ucode_feature_version);
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ if (sdma_inst->feature_version >= 20)
+ sdma_inst->burst_nop = true;
+
+ return 0;
+}
+
+void amdgpu_sdma_destroy_inst_ctx(struct amdgpu_device *adev,
+ bool duplicate)
+{
+ int i;
+
+ for (i = 0; i < adev->sdma.num_instances; i++) {
+ amdgpu_ucode_release(&adev->sdma.instance[i].fw);
+ if (duplicate)
+ break;
+ }
+
+ memset((void *)adev->sdma.instance, 0,
+ sizeof(struct amdgpu_sdma_instance) * AMDGPU_MAX_SDMA_INSTANCES);
+}
+
+int amdgpu_sdma_init_microcode(struct amdgpu_device *adev,
+ u32 instance, bool duplicate)
+{
+ struct amdgpu_firmware_info *info = NULL;
+ const struct common_firmware_header *header = NULL;
+ int err, i;
+ const struct sdma_firmware_header_v2_0 *sdma_hdr;
+ const struct sdma_firmware_header_v3_0 *sdma_hv3;
+ uint16_t version_major;
+ char ucode_prefix[30];
+
+ amdgpu_ucode_ip_version_decode(adev, SDMA0_HWIP, ucode_prefix, sizeof(ucode_prefix));
+ if (instance == 0)
+ err = amdgpu_ucode_request(adev, &adev->sdma.instance[instance].fw,
+ AMDGPU_UCODE_REQUIRED,
+ "amdgpu/%s.bin", ucode_prefix);
+ else
+ err = amdgpu_ucode_request(adev, &adev->sdma.instance[instance].fw,
+ AMDGPU_UCODE_REQUIRED,
+ "amdgpu/%s%d.bin", ucode_prefix, instance);
+ if (err)
+ goto out;
+
+ header = (const struct common_firmware_header *)
+ adev->sdma.instance[instance].fw->data;
+ version_major = le16_to_cpu(header->header_version_major);
+
+ if ((duplicate && instance) || (!duplicate && version_major > 1)) {
+ err = -EINVAL;
+ goto out;
+ }
+
+ err = amdgpu_sdma_init_inst_ctx(&adev->sdma.instance[instance]);
+ if (err)
+ goto out;
+
+ if (duplicate) {
+ for (i = 1; i < adev->sdma.num_instances; i++)
+ memcpy((void *)&adev->sdma.instance[i],
+ (void *)&adev->sdma.instance[0],
+ sizeof(struct amdgpu_sdma_instance));
+ }
+
+ DRM_DEBUG("psp_load == '%s'\n",
+ adev->firmware.load_type == AMDGPU_FW_LOAD_PSP ? "true" : "false");
+
+ if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
+ switch (version_major) {
+ case 1:
+ for (i = 0; i < adev->sdma.num_instances; i++) {
+ if (!duplicate && (instance != i))
+ continue;
+ else {
+ /* Use a single copy per SDMA firmware type. PSP uses the same instance for all
+ * groups of SDMAs */
+ if ((amdgpu_ip_version(adev, SDMA0_HWIP, 0) ==
+ IP_VERSION(4, 4, 2) ||
+ amdgpu_ip_version(adev, SDMA0_HWIP, 0) ==
+ IP_VERSION(4, 4, 4) ||
+ amdgpu_ip_version(adev, SDMA0_HWIP, 0) ==
+ IP_VERSION(4, 4, 5)) &&
+ adev->firmware.load_type ==
+ AMDGPU_FW_LOAD_PSP &&
+ adev->sdma.num_inst_per_aid == i) {
+ break;
+ }
+ info = &adev->firmware.ucode[AMDGPU_UCODE_ID_SDMA0 + i];
+ info->ucode_id = AMDGPU_UCODE_ID_SDMA0 + i;
+ info->fw = adev->sdma.instance[i].fw;
+ adev->firmware.fw_size +=
+ ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
+ }
+ }
+ break;
+ case 2:
+ sdma_hdr = (const struct sdma_firmware_header_v2_0 *)
+ adev->sdma.instance[0].fw->data;
+ info = &adev->firmware.ucode[AMDGPU_UCODE_ID_SDMA_UCODE_TH0];
+ info->ucode_id = AMDGPU_UCODE_ID_SDMA_UCODE_TH0;
+ info->fw = adev->sdma.instance[0].fw;
+ adev->firmware.fw_size +=
+ ALIGN(le32_to_cpu(sdma_hdr->ctx_ucode_size_bytes), PAGE_SIZE);
+ info = &adev->firmware.ucode[AMDGPU_UCODE_ID_SDMA_UCODE_TH1];
+ info->ucode_id = AMDGPU_UCODE_ID_SDMA_UCODE_TH1;
+ info->fw = adev->sdma.instance[0].fw;
+ adev->firmware.fw_size +=
+ ALIGN(le32_to_cpu(sdma_hdr->ctl_ucode_size_bytes), PAGE_SIZE);
+ break;
+ case 3:
+ sdma_hv3 = (const struct sdma_firmware_header_v3_0 *)
+ adev->sdma.instance[0].fw->data;
+ info = &adev->firmware.ucode[AMDGPU_UCODE_ID_SDMA_RS64];
+ info->ucode_id = AMDGPU_UCODE_ID_SDMA_RS64;
+ info->fw = adev->sdma.instance[0].fw;
+ adev->firmware.fw_size +=
+ ALIGN(le32_to_cpu(sdma_hv3->ucode_size_bytes), PAGE_SIZE);
+ break;
+ default:
+ err = -EINVAL;
+ }
+ }
+
+out:
+ if (err)
+ amdgpu_sdma_destroy_inst_ctx(adev, duplicate);
+ return err;
+}
+
+int amdgpu_sdma_ras_sw_init(struct amdgpu_device *adev)
+{
+ int err = 0;
+ struct amdgpu_sdma_ras *ras = NULL;
+
+ /* adev->sdma.ras is NULL, which means sdma does not
+ * support ras function, then do nothing here.
+ */
+ if (!adev->sdma.ras)
+ return 0;
+
+ ras = adev->sdma.ras;
+
+ err = amdgpu_ras_register_ras_block(adev, &ras->ras_block);
+ if (err) {
+ dev_err(adev->dev, "Failed to register sdma ras block!\n");
+ return err;
+ }
+
+ strcpy(ras->ras_block.ras_comm.name, "sdma");
+ ras->ras_block.ras_comm.block = AMDGPU_RAS_BLOCK__SDMA;
+ ras->ras_block.ras_comm.type = AMDGPU_RAS_ERROR__MULTI_UNCORRECTABLE;
+ adev->sdma.ras_if = &ras->ras_block.ras_comm;
+
+ /* If not define special ras_late_init function, use default ras_late_init */
+ if (!ras->ras_block.ras_late_init)
+ ras->ras_block.ras_late_init = amdgpu_sdma_ras_late_init;
+
+ /* If not defined special ras_cb function, use default ras_cb */
+ if (!ras->ras_block.ras_cb)
+ ras->ras_block.ras_cb = amdgpu_sdma_process_ras_data_cb;
+
+ return 0;
+}
+
+/*
+ * debugfs for to enable/disable sdma job submission to specific core.
+ */
+#if defined(CONFIG_DEBUG_FS)
+static int amdgpu_debugfs_sdma_sched_mask_set(void *data, u64 val)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)data;
+ u64 i, num_ring;
+ u64 mask = 0;
+ struct amdgpu_ring *ring, *page = NULL;
+
+ if (!adev)
+ return -ENODEV;
+
+ /* Determine the number of rings per SDMA instance
+ * (1 for sdma gfx ring, 2 if page queue exists)
+ */
+ if (adev->sdma.has_page_queue)
+ num_ring = 2;
+ else
+ num_ring = 1;
+
+ /* Calculate the maximum possible mask value
+ * based on the number of SDMA instances and rings
+ */
+ mask = BIT_ULL(adev->sdma.num_instances * num_ring) - 1;
+
+ if ((val & mask) == 0)
+ return -EINVAL;
+
+ for (i = 0; i < adev->sdma.num_instances; ++i) {
+ ring = &adev->sdma.instance[i].ring;
+ if (adev->sdma.has_page_queue)
+ page = &adev->sdma.instance[i].page;
+ if (val & BIT_ULL(i * num_ring))
+ ring->sched.ready = true;
+ else
+ ring->sched.ready = false;
+
+ if (page) {
+ if (val & BIT_ULL(i * num_ring + 1))
+ page->sched.ready = true;
+ else
+ page->sched.ready = false;
+ }
+ }
+ /* publish sched.ready flag update effective immediately across smp */
+ smp_rmb();
+ return 0;
+}
+
+static int amdgpu_debugfs_sdma_sched_mask_get(void *data, u64 *val)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)data;
+ u64 i, num_ring;
+ u64 mask = 0;
+ struct amdgpu_ring *ring, *page = NULL;
+
+ if (!adev)
+ return -ENODEV;
+
+ /* Determine the number of rings per SDMA instance
+ * (1 for sdma gfx ring, 2 if page queue exists)
+ */
+ if (adev->sdma.has_page_queue)
+ num_ring = 2;
+ else
+ num_ring = 1;
+
+ for (i = 0; i < adev->sdma.num_instances; ++i) {
+ ring = &adev->sdma.instance[i].ring;
+ if (adev->sdma.has_page_queue)
+ page = &adev->sdma.instance[i].page;
+
+ if (ring->sched.ready)
+ mask |= BIT_ULL(i * num_ring);
+ else
+ mask &= ~BIT_ULL(i * num_ring);
+
+ if (page) {
+ if (page->sched.ready)
+ mask |= BIT_ULL(i * num_ring + 1);
+ else
+ mask &= ~BIT_ULL(i * num_ring + 1);
+ }
+ }
+
+ *val = mask;
+ return 0;
+}
+
+DEFINE_DEBUGFS_ATTRIBUTE(amdgpu_debugfs_sdma_sched_mask_fops,
+ amdgpu_debugfs_sdma_sched_mask_get,
+ amdgpu_debugfs_sdma_sched_mask_set, "%llx\n");
+
+#endif
+
+void amdgpu_debugfs_sdma_sched_mask_init(struct amdgpu_device *adev)
+{
+#if defined(CONFIG_DEBUG_FS)
+ struct drm_minor *minor = adev_to_drm(adev)->primary;
+ struct dentry *root = minor->debugfs_root;
+ char name[32];
+
+ if (!(adev->sdma.num_instances > 1))
+ return;
+ sprintf(name, "amdgpu_sdma_sched_mask");
+ debugfs_create_file(name, 0600, root, adev,
+ &amdgpu_debugfs_sdma_sched_mask_fops);
+#endif
+}
+
+static ssize_t amdgpu_get_sdma_reset_mask(struct device *dev,
+ struct device_attribute *attr,
+ char *buf)
+{
+ struct drm_device *ddev = dev_get_drvdata(dev);
+ struct amdgpu_device *adev = drm_to_adev(ddev);
+
+ if (!adev)
+ return -ENODEV;
+
+ return amdgpu_show_reset_mask(buf, adev->sdma.supported_reset);
+}
+
+static DEVICE_ATTR(sdma_reset_mask, 0444,
+ amdgpu_get_sdma_reset_mask, NULL);
+
+int amdgpu_sdma_sysfs_reset_mask_init(struct amdgpu_device *adev)
+{
+ int r = 0;
+
+ if (!amdgpu_gpu_recovery)
+ return r;
+
+ if (adev->sdma.num_instances) {
+ r = device_create_file(adev->dev, &dev_attr_sdma_reset_mask);
+ if (r)
+ return r;
+ }
+
+ return r;
+}
+
+void amdgpu_sdma_sysfs_reset_mask_fini(struct amdgpu_device *adev)
+{
+ if (!amdgpu_gpu_recovery)
+ return;
+
+ if (adev->dev->kobj.sd) {
+ if (adev->sdma.num_instances)
+ device_remove_file(adev->dev, &dev_attr_sdma_reset_mask);
+ }
+}
+
+struct amdgpu_ring *amdgpu_sdma_get_shared_ring(struct amdgpu_device *adev, struct amdgpu_ring *ring)
+{
+ if (adev->sdma.has_page_queue &&
+ (ring->me < adev->sdma.num_instances) &&
+ (ring == &adev->sdma.instance[ring->me].ring))
+ return &adev->sdma.instance[ring->me].page;
+ else
+ return NULL;
+}
+
+/**
+* amdgpu_sdma_is_shared_inv_eng - Check if a ring is an SDMA ring that shares a VM invalidation engine
+* @adev: Pointer to the AMDGPU device structure
+* @ring: Pointer to the ring structure to check
+*
+* This function checks if the given ring is an SDMA ring that shares a VM invalidation engine.
+* It returns true if the ring is such an SDMA ring, false otherwise.
+*/
+bool amdgpu_sdma_is_shared_inv_eng(struct amdgpu_device *adev, struct amdgpu_ring *ring)
+{
+ int i = ring->me;
+
+ if (!adev->sdma.has_page_queue || i >= adev->sdma.num_instances)
+ return false;
+
+ if (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 3) ||
+ amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 4) ||
+ amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 5, 0))
+ return (ring == &adev->sdma.instance[i].page);
+ else
+ return false;
+}
+
+static int amdgpu_sdma_soft_reset(struct amdgpu_device *adev, u32 instance_id)
+{
+ struct amdgpu_sdma_instance *sdma_instance = &adev->sdma.instance[instance_id];
+
+ if (sdma_instance->funcs->soft_reset_kernel_queue)
+ return sdma_instance->funcs->soft_reset_kernel_queue(adev, instance_id);
+
+ return -EOPNOTSUPP;
+}
+
+/**
+ * amdgpu_sdma_reset_engine - Reset a specific SDMA engine
+ * @adev: Pointer to the AMDGPU device
+ * @instance_id: Logical ID of the SDMA engine instance to reset
+ * @caller_handles_kernel_queues: Skip kernel queue processing. Caller
+ * will handle it.
+ *
+ * Returns: 0 on success, or a negative error code on failure.
+ */
+int amdgpu_sdma_reset_engine(struct amdgpu_device *adev, uint32_t instance_id,
+ bool caller_handles_kernel_queues)
+{
+ int ret = 0;
+ struct amdgpu_sdma_instance *sdma_instance = &adev->sdma.instance[instance_id];
+ struct amdgpu_ring *gfx_ring = &sdma_instance->ring;
+ struct amdgpu_ring *page_ring = &sdma_instance->page;
+
+ mutex_lock(&sdma_instance->engine_reset_mutex);
+
+ if (!caller_handles_kernel_queues) {
+ /* Stop the scheduler's work queue for the GFX and page rings if they are running.
+ * This ensures that no new tasks are submitted to the queues while
+ * the reset is in progress.
+ */
+ drm_sched_wqueue_stop(&gfx_ring->sched);
+
+ if (adev->sdma.has_page_queue)
+ drm_sched_wqueue_stop(&page_ring->sched);
+ }
+
+ if (sdma_instance->funcs->stop_kernel_queue) {
+ sdma_instance->funcs->stop_kernel_queue(gfx_ring);
+ if (adev->sdma.has_page_queue)
+ sdma_instance->funcs->stop_kernel_queue(page_ring);
+ }
+
+ /* Perform the SDMA reset for the specified instance */
+ ret = amdgpu_sdma_soft_reset(adev, instance_id);
+ if (ret) {
+ dev_err(adev->dev, "Failed to reset SDMA logical instance %u\n", instance_id);
+ goto exit;
+ }
+
+ if (sdma_instance->funcs->start_kernel_queue) {
+ sdma_instance->funcs->start_kernel_queue(gfx_ring);
+ if (adev->sdma.has_page_queue)
+ sdma_instance->funcs->start_kernel_queue(page_ring);
+ }
+
+exit:
+ if (!caller_handles_kernel_queues) {
+ /* Restart the scheduler's work queue for the GFX and page rings
+ * if they were stopped by this function. This allows new tasks
+ * to be submitted to the queues after the reset is complete.
+ */
+ if (!ret) {
+ amdgpu_fence_driver_force_completion(gfx_ring);
+ drm_sched_wqueue_start(&gfx_ring->sched);
+ if (adev->sdma.has_page_queue) {
+ amdgpu_fence_driver_force_completion(page_ring);
+ drm_sched_wqueue_start(&page_ring->sched);
+ }
+ }
+ }
+ mutex_unlock(&sdma_instance->engine_reset_mutex);
+
+ return ret;
+}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.h
index e5b8fb8e75c5..34311f32be4c 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.h
@@ -23,9 +23,10 @@
#ifndef __AMDGPU_SDMA_H__
#define __AMDGPU_SDMA_H__
+#include "amdgpu_ras.h"
/* max number of IP instances */
-#define AMDGPU_MAX_SDMA_INSTANCES 8
+#define AMDGPU_MAX_SDMA_INSTANCES 16
enum amdgpu_sdma_irq {
AMDGPU_SDMA_IRQ_INSTANCE0 = 0,
@@ -36,9 +37,25 @@ enum amdgpu_sdma_irq {
AMDGPU_SDMA_IRQ_INSTANCE5,
AMDGPU_SDMA_IRQ_INSTANCE6,
AMDGPU_SDMA_IRQ_INSTANCE7,
+ AMDGPU_SDMA_IRQ_INSTANCE8,
+ AMDGPU_SDMA_IRQ_INSTANCE9,
+ AMDGPU_SDMA_IRQ_INSTANCE10,
+ AMDGPU_SDMA_IRQ_INSTANCE11,
+ AMDGPU_SDMA_IRQ_INSTANCE12,
+ AMDGPU_SDMA_IRQ_INSTANCE13,
+ AMDGPU_SDMA_IRQ_INSTANCE14,
+ AMDGPU_SDMA_IRQ_INSTANCE15,
AMDGPU_SDMA_IRQ_LAST
};
+#define NUM_SDMA(x) hweight32(x)
+
+struct amdgpu_sdma_funcs {
+ int (*stop_kernel_queue)(struct amdgpu_ring *ring);
+ int (*start_kernel_queue)(struct amdgpu_ring *ring);
+ int (*soft_reset_kernel_queue)(struct amdgpu_device *adev, u32 instance_id);
+};
+
struct amdgpu_sdma_instance {
/* SDMA firmware */
const struct firmware *fw;
@@ -48,27 +65,74 @@ struct amdgpu_sdma_instance {
struct amdgpu_ring ring;
struct amdgpu_ring page;
bool burst_nop;
+ uint32_t aid_id;
+
+ struct amdgpu_bo *sdma_fw_obj;
+ uint64_t sdma_fw_gpu_addr;
+ uint32_t *sdma_fw_ptr;
+ struct mutex engine_reset_mutex;
+ /* track guilty state of GFX and PAGE queues */
+ bool gfx_guilty;
+ bool page_guilty;
+ const struct amdgpu_sdma_funcs *funcs;
};
-struct amdgpu_sdma_ras_funcs {
- int (*ras_late_init)(struct amdgpu_device *adev,
- void *ras_ih_info);
- void (*ras_fini)(struct amdgpu_device *adev);
- int (*query_ras_error_count)(struct amdgpu_device *adev,
- uint32_t instance, void *ras_error_status);
- void (*reset_ras_error_count)(struct amdgpu_device *adev);
+enum amdgpu_sdma_ras_memory_id {
+ AMDGPU_SDMA_MBANK_DATA_BUF0 = 1,
+ AMDGPU_SDMA_MBANK_DATA_BUF1 = 2,
+ AMDGPU_SDMA_MBANK_DATA_BUF2 = 3,
+ AMDGPU_SDMA_MBANK_DATA_BUF3 = 4,
+ AMDGPU_SDMA_MBANK_DATA_BUF4 = 5,
+ AMDGPU_SDMA_MBANK_DATA_BUF5 = 6,
+ AMDGPU_SDMA_MBANK_DATA_BUF6 = 7,
+ AMDGPU_SDMA_MBANK_DATA_BUF7 = 8,
+ AMDGPU_SDMA_MBANK_DATA_BUF8 = 9,
+ AMDGPU_SDMA_MBANK_DATA_BUF9 = 10,
+ AMDGPU_SDMA_MBANK_DATA_BUF10 = 11,
+ AMDGPU_SDMA_MBANK_DATA_BUF11 = 12,
+ AMDGPU_SDMA_MBANK_DATA_BUF12 = 13,
+ AMDGPU_SDMA_MBANK_DATA_BUF13 = 14,
+ AMDGPU_SDMA_MBANK_DATA_BUF14 = 15,
+ AMDGPU_SDMA_MBANK_DATA_BUF15 = 16,
+ AMDGPU_SDMA_UCODE_BUF = 17,
+ AMDGPU_SDMA_RB_CMD_BUF = 18,
+ AMDGPU_SDMA_IB_CMD_BUF = 19,
+ AMDGPU_SDMA_UTCL1_RD_FIFO = 20,
+ AMDGPU_SDMA_UTCL1_RDBST_FIFO = 21,
+ AMDGPU_SDMA_UTCL1_WR_FIFO = 22,
+ AMDGPU_SDMA_DATA_LUT_FIFO = 23,
+ AMDGPU_SDMA_SPLIT_DAT_BUF = 24,
+ AMDGPU_SDMA_MEMORY_BLOCK_LAST,
+};
+
+struct amdgpu_sdma_ras {
+ struct amdgpu_ras_block_object ras_block;
};
struct amdgpu_sdma {
struct amdgpu_sdma_instance instance[AMDGPU_MAX_SDMA_INSTANCES];
struct amdgpu_irq_src trap_irq;
struct amdgpu_irq_src illegal_inst_irq;
+ struct amdgpu_irq_src fence_irq;
struct amdgpu_irq_src ecc_irq;
+ struct amdgpu_irq_src vm_hole_irq;
+ struct amdgpu_irq_src doorbell_invalid_irq;
+ struct amdgpu_irq_src pool_timeout_irq;
+ struct amdgpu_irq_src srbm_write_irq;
+ struct amdgpu_irq_src ctxt_empty_irq;
+
int num_instances;
+ uint32_t sdma_mask;
+ int num_inst_per_aid;
uint32_t srbm_soft_reset;
bool has_page_queue;
struct ras_common_if *ras_if;
- const struct amdgpu_sdma_ras_funcs *funcs;
+ struct amdgpu_sdma_ras *ras;
+ uint32_t *ip_dump;
+ uint32_t supported_reset;
+ struct list_head reset_callback_list;
+ bool no_user_submission;
+ bool disable_uq;
};
/*
@@ -90,7 +154,7 @@ struct amdgpu_buffer_funcs {
uint64_t dst_offset,
/* number of byte to transfer */
uint32_t byte_count,
- bool tmz);
+ uint32_t copy_flags);
/* maximum bytes in a single operation */
uint32_t fill_max_bytes;
@@ -108,6 +172,9 @@ struct amdgpu_buffer_funcs {
uint32_t byte_count);
};
+int amdgpu_sdma_reset_engine(struct amdgpu_device *adev, uint32_t instance_id,
+ bool caller_handles_kernel_queues);
+
#define amdgpu_emit_copy_buffer(adev, ib, s, d, b, t) (adev)->mman.buffer_funcs->emit_copy_buffer((ib), (s), (d), (b), (t))
#define amdgpu_emit_fill_buffer(adev, ib, s, d, b) (adev)->mman.buffer_funcs->emit_fill_buffer((ib), (s), (d), (b))
@@ -116,12 +183,22 @@ amdgpu_sdma_get_instance_from_ring(struct amdgpu_ring *ring);
int amdgpu_sdma_get_index_from_ring(struct amdgpu_ring *ring, uint32_t *index);
uint64_t amdgpu_sdma_get_csa_mc_addr(struct amdgpu_ring *ring, unsigned vmid);
int amdgpu_sdma_ras_late_init(struct amdgpu_device *adev,
- void *ras_ih_info);
-void amdgpu_sdma_ras_fini(struct amdgpu_device *adev);
+ struct ras_common_if *ras_block);
int amdgpu_sdma_process_ras_data_cb(struct amdgpu_device *adev,
void *err_data,
struct amdgpu_iv_entry *entry);
int amdgpu_sdma_process_ecc_irq(struct amdgpu_device *adev,
struct amdgpu_irq_src *source,
struct amdgpu_iv_entry *entry);
+int amdgpu_sdma_init_microcode(struct amdgpu_device *adev, u32 instance,
+ bool duplicate);
+void amdgpu_sdma_destroy_inst_ctx(struct amdgpu_device *adev,
+ bool duplicate);
+int amdgpu_sdma_ras_sw_init(struct amdgpu_device *adev);
+void amdgpu_debugfs_sdma_sched_mask_init(struct amdgpu_device *adev);
+int amdgpu_sdma_sysfs_reset_mask_init(struct amdgpu_device *adev);
+void amdgpu_sdma_sysfs_reset_mask_fini(struct amdgpu_device *adev);
+bool amdgpu_sdma_is_shared_inv_eng(struct amdgpu_device *adev, struct amdgpu_ring *ring);
+struct amdgpu_ring *amdgpu_sdma_get_shared_ring(struct amdgpu_device *adev,
+ struct amdgpu_ring *ring);
#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_securedisplay.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_securedisplay.c
index 834440ab9ff7..3739be1b71e0 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_securedisplay.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_securedisplay.c
@@ -69,32 +69,35 @@ void psp_securedisplay_parse_resp_status(struct psp_context *psp,
case TA_SECUREDISPLAY_STATUS__READ_CRC_ERROR:
dev_err(psp->adev->dev, "Secure display: Failed to Read CRC");
break;
+ case TA_SECUREDISPLAY_STATUS__I2C_INIT_ERROR:
+ dev_err(psp->adev->dev, "Secure display: Failed to initialize I2C.");
+ break;
default:
dev_err(psp->adev->dev, "Secure display: Failed to parse status: %d\n", status);
}
}
-void psp_prep_securedisplay_cmd_buf(struct psp_context *psp, struct securedisplay_cmd **cmd,
+void psp_prep_securedisplay_cmd_buf(struct psp_context *psp, struct ta_securedisplay_cmd **cmd,
enum ta_securedisplay_command command_id)
{
- *cmd = (struct securedisplay_cmd *)psp->securedisplay_context.securedisplay_shared_buf;
- memset(*cmd, 0, sizeof(struct securedisplay_cmd));
+ *cmd = (struct ta_securedisplay_cmd *)psp->securedisplay_context.context.mem_context.shared_buf;
+ memset(*cmd, 0, sizeof(struct ta_securedisplay_cmd));
(*cmd)->status = TA_SECUREDISPLAY_STATUS__GENERIC_FAILURE;
(*cmd)->cmd_id = command_id;
}
+#if defined(CONFIG_DEBUG_FS)
+
static ssize_t amdgpu_securedisplay_debugfs_write(struct file *f, const char __user *buf,
size_t size, loff_t *pos)
{
struct amdgpu_device *adev = (struct amdgpu_device *)file_inode(f)->i_private;
struct psp_context *psp = &adev->psp;
- struct securedisplay_cmd *securedisplay_cmd;
+ struct ta_securedisplay_cmd *securedisplay_cmd;
struct drm_device *dev = adev_to_drm(adev);
uint32_t phy_id;
uint32_t op;
- int i;
char str[64];
- char i2c_output[256];
int ret;
if (*pos || size > sizeof(str) - 1)
@@ -118,6 +121,7 @@ static ssize_t amdgpu_securedisplay_debugfs_write(struct file *f, const char __u
switch (op) {
case 1:
+ mutex_lock(&psp->securedisplay_context.mutex);
psp_prep_securedisplay_cmd_buf(psp, &securedisplay_cmd,
TA_SECUREDISPLAY_COMMAND__QUERY_TA);
ret = psp_securedisplay_invoke(psp, TA_SECUREDISPLAY_COMMAND__QUERY_TA);
@@ -128,29 +132,33 @@ static ssize_t amdgpu_securedisplay_debugfs_write(struct file *f, const char __u
else
psp_securedisplay_parse_resp_status(psp, securedisplay_cmd->status);
}
+ mutex_unlock(&psp->securedisplay_context.mutex);
break;
case 2:
+ if (size < 3 || phy_id >= TA_SECUREDISPLAY_MAX_PHY) {
+ dev_err(adev->dev, "Invalid input: %s\n", str);
+ return -EINVAL;
+ }
+ mutex_lock(&psp->securedisplay_context.mutex);
psp_prep_securedisplay_cmd_buf(psp, &securedisplay_cmd,
TA_SECUREDISPLAY_COMMAND__SEND_ROI_CRC);
securedisplay_cmd->securedisplay_in_message.send_roi_crc.phy_id = phy_id;
ret = psp_securedisplay_invoke(psp, TA_SECUREDISPLAY_COMMAND__SEND_ROI_CRC);
if (!ret) {
if (securedisplay_cmd->status == TA_SECUREDISPLAY_STATUS__SUCCESS) {
- memset(i2c_output, 0, sizeof(i2c_output));
- for (i = 0; i < TA_SECUREDISPLAY_I2C_BUFFER_SIZE; i++)
- sprintf(i2c_output, "%s 0x%X", i2c_output,
- securedisplay_cmd->securedisplay_out_message.send_roi_crc.i2c_buf[i]);
- dev_info(adev->dev, "SECUREDISPLAY: I2C buffer out put is :%s\n", i2c_output);
+ dev_info(adev->dev, "SECUREDISPLAY: I2C buffer out put is: %*ph\n",
+ TA_SECUREDISPLAY_I2C_BUFFER_SIZE,
+ securedisplay_cmd->securedisplay_out_message.send_roi_crc.i2c_buf);
} else {
psp_securedisplay_parse_resp_status(psp, securedisplay_cmd->status);
}
}
+ mutex_unlock(&psp->securedisplay_context.mutex);
break;
default:
dev_err(adev->dev, "Invalid input: %s\n", str);
}
- pm_runtime_mark_last_busy(dev->dev);
pm_runtime_put_autosuspend(dev->dev);
return size;
@@ -163,11 +171,13 @@ static const struct file_operations amdgpu_securedisplay_debugfs_ops = {
.llseek = default_llseek
};
+#endif
+
void amdgpu_securedisplay_debugfs_init(struct amdgpu_device *adev)
{
#if defined(CONFIG_DEBUG_FS)
- if (!adev->psp.securedisplay_context.securedisplay_initialized)
+ if (!adev->psp.securedisplay_context.context.initialized)
return;
debugfs_create_file("securedisplay_test", S_IWUSR, adev_to_drm(adev)->primary->debugfs_root,
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_securedisplay.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_securedisplay.h
index fe98574748f4..456ad68ed4b2 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_securedisplay.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_securedisplay.h
@@ -30,7 +30,7 @@
void amdgpu_securedisplay_debugfs_init(struct amdgpu_device *adev);
void psp_securedisplay_parse_resp_status(struct psp_context *psp,
enum ta_securedisplay_status status);
-void psp_prep_securedisplay_cmd_buf(struct psp_context *psp, struct securedisplay_cmd **cmd,
+void psp_prep_securedisplay_cmd_buf(struct psp_context *psp, struct ta_securedisplay_cmd **cmd,
enum ta_securedisplay_command command_id);
#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_seq64.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_seq64.c
new file mode 100644
index 000000000000..a0b479d5fff1
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_seq64.c
@@ -0,0 +1,262 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright 2023 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#include "amdgpu.h"
+#include "amdgpu_seq64.h"
+
+#include <drm/drm_exec.h>
+
+/**
+ * DOC: amdgpu_seq64
+ *
+ * amdgpu_seq64 allocates a 64bit memory on each request in sequence order.
+ * seq64 driver is required for user queue fence memory allocation, TLB
+ * counters and VM updates. It has maximum count of 32768 64 bit slots.
+ */
+
+/**
+ * amdgpu_seq64_get_va_base - Get the seq64 va base address
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * Returns:
+ * va base address on success
+ */
+static inline u64 amdgpu_seq64_get_va_base(struct amdgpu_device *adev)
+{
+ u64 addr = AMDGPU_VA_RESERVED_SEQ64_START(adev);
+
+ addr = amdgpu_gmc_sign_extend(addr);
+
+ return addr;
+}
+
+/**
+ * amdgpu_seq64_map - Map the seq64 memory to VM
+ *
+ * @adev: amdgpu_device pointer
+ * @vm: vm pointer
+ * @bo_va: bo_va pointer
+ *
+ * Map the seq64 memory to the given VM.
+ *
+ * Returns:
+ * 0 on success or a negative error code on failure
+ */
+int amdgpu_seq64_map(struct amdgpu_device *adev, struct amdgpu_vm *vm,
+ struct amdgpu_bo_va **bo_va)
+{
+ struct amdgpu_bo *bo;
+ struct drm_exec exec;
+ u64 seq64_addr;
+ int r;
+
+ bo = adev->seq64.sbo;
+ if (!bo)
+ return -EINVAL;
+
+ drm_exec_init(&exec, DRM_EXEC_INTERRUPTIBLE_WAIT, 0);
+ drm_exec_until_all_locked(&exec) {
+ r = amdgpu_vm_lock_pd(vm, &exec, 0);
+ if (likely(!r))
+ r = drm_exec_lock_obj(&exec, &bo->tbo.base);
+ drm_exec_retry_on_contention(&exec);
+ if (unlikely(r))
+ goto error;
+ }
+
+ *bo_va = amdgpu_vm_bo_add(adev, vm, bo);
+ if (!*bo_va) {
+ r = -ENOMEM;
+ goto error;
+ }
+
+ seq64_addr = amdgpu_seq64_get_va_base(adev) & AMDGPU_GMC_HOLE_MASK;
+
+ r = amdgpu_vm_bo_map(adev, *bo_va, seq64_addr, 0,
+ AMDGPU_VA_RESERVED_SEQ64_SIZE,
+ AMDGPU_VM_PAGE_READABLE | AMDGPU_VM_MTYPE_UC);
+ if (r) {
+ DRM_ERROR("failed to do bo_map on userq sem, err=%d\n", r);
+ amdgpu_vm_bo_del(adev, *bo_va);
+ goto error;
+ }
+
+ r = amdgpu_vm_bo_update(adev, *bo_va, false);
+ if (r) {
+ DRM_ERROR("failed to do vm_bo_update on userq sem\n");
+ amdgpu_vm_bo_del(adev, *bo_va);
+ goto error;
+ }
+
+error:
+ drm_exec_fini(&exec);
+ return r;
+}
+
+/**
+ * amdgpu_seq64_unmap - Unmap the seq64 memory
+ *
+ * @adev: amdgpu_device pointer
+ * @fpriv: DRM file private
+ *
+ * Unmap the seq64 memory from the given VM.
+ */
+void amdgpu_seq64_unmap(struct amdgpu_device *adev, struct amdgpu_fpriv *fpriv)
+{
+ struct amdgpu_vm *vm;
+ struct amdgpu_bo *bo;
+ struct drm_exec exec;
+ int r;
+
+ if (!fpriv->seq64_va)
+ return;
+
+ bo = adev->seq64.sbo;
+ if (!bo)
+ return;
+
+ vm = &fpriv->vm;
+
+ drm_exec_init(&exec, 0, 0);
+ drm_exec_until_all_locked(&exec) {
+ r = amdgpu_vm_lock_pd(vm, &exec, 0);
+ if (likely(!r))
+ r = drm_exec_lock_obj(&exec, &bo->tbo.base);
+ drm_exec_retry_on_contention(&exec);
+ if (unlikely(r))
+ goto error;
+ }
+
+ amdgpu_vm_bo_del(adev, fpriv->seq64_va);
+
+ fpriv->seq64_va = NULL;
+
+error:
+ drm_exec_fini(&exec);
+}
+
+/**
+ * amdgpu_seq64_alloc - Allocate a 64 bit memory
+ *
+ * @adev: amdgpu_device pointer
+ * @va: VA to access the seq in process address space
+ * @gpu_addr: GPU address to access the seq
+ * @cpu_addr: CPU address to access the seq
+ *
+ * Alloc a 64 bit memory from seq64 pool.
+ *
+ * Returns:
+ * 0 on success or a negative error code on failure
+ */
+int amdgpu_seq64_alloc(struct amdgpu_device *adev, u64 *va,
+ u64 *gpu_addr, u64 **cpu_addr)
+{
+ unsigned long bit_pos;
+
+ bit_pos = find_first_zero_bit(adev->seq64.used, adev->seq64.num_sem);
+ if (bit_pos >= adev->seq64.num_sem)
+ return -ENOSPC;
+
+ __set_bit(bit_pos, adev->seq64.used);
+
+ *va = bit_pos * sizeof(u64) + amdgpu_seq64_get_va_base(adev);
+
+ if (gpu_addr)
+ *gpu_addr = bit_pos * sizeof(u64) + adev->seq64.gpu_addr;
+
+ *cpu_addr = bit_pos + adev->seq64.cpu_base_addr;
+
+ return 0;
+}
+
+/**
+ * amdgpu_seq64_free - Free the given 64 bit memory
+ *
+ * @adev: amdgpu_device pointer
+ * @va: gpu start address to be freed
+ *
+ * Free the given 64 bit memory from seq64 pool.
+ */
+void amdgpu_seq64_free(struct amdgpu_device *adev, u64 va)
+{
+ unsigned long bit_pos;
+
+ bit_pos = (va - amdgpu_seq64_get_va_base(adev)) / sizeof(u64);
+ if (bit_pos < adev->seq64.num_sem)
+ __clear_bit(bit_pos, adev->seq64.used);
+}
+
+/**
+ * amdgpu_seq64_fini - Cleanup seq64 driver
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * Free the memory space allocated for seq64.
+ *
+ */
+void amdgpu_seq64_fini(struct amdgpu_device *adev)
+{
+ amdgpu_bo_free_kernel(&adev->seq64.sbo,
+ NULL,
+ (void **)&adev->seq64.cpu_base_addr);
+}
+
+/**
+ * amdgpu_seq64_init - Initialize seq64 driver
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * Allocate the required memory space for seq64.
+ *
+ * Returns:
+ * 0 on success or a negative error code on failure
+ */
+int amdgpu_seq64_init(struct amdgpu_device *adev)
+{
+ int r;
+
+ if (adev->seq64.sbo)
+ return 0;
+
+ /*
+ * AMDGPU_MAX_SEQ64_SLOTS * sizeof(u64) * 8 = AMDGPU_MAX_SEQ64_SLOTS
+ * 64bit slots
+ */
+ r = amdgpu_bo_create_kernel(adev, AMDGPU_VA_RESERVED_SEQ64_SIZE,
+ PAGE_SIZE, AMDGPU_GEM_DOMAIN_GTT,
+ &adev->seq64.sbo, &adev->seq64.gpu_addr,
+ (void **)&adev->seq64.cpu_base_addr);
+ if (r) {
+ dev_warn(adev->dev, "(%d) create seq64 failed\n", r);
+ return r;
+ }
+
+ memset(adev->seq64.cpu_base_addr, 0, AMDGPU_VA_RESERVED_SEQ64_SIZE);
+
+ adev->seq64.num_sem = AMDGPU_MAX_SEQ64_SLOTS;
+ memset(&adev->seq64.used, 0, sizeof(adev->seq64.used));
+
+ return 0;
+}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_seq64.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_seq64.h
new file mode 100644
index 000000000000..26a249aaaee1
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_seq64.h
@@ -0,0 +1,49 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright 2023 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#ifndef __AMDGPU_SEQ64_H__
+#define __AMDGPU_SEQ64_H__
+
+#include "amdgpu_vm.h"
+
+#define AMDGPU_MAX_SEQ64_SLOTS (AMDGPU_VA_RESERVED_SEQ64_SIZE / sizeof(u64))
+
+struct amdgpu_seq64 {
+ struct amdgpu_bo *sbo;
+ u32 num_sem;
+ u64 gpu_addr;
+ u64 *cpu_base_addr;
+ DECLARE_BITMAP(used, AMDGPU_MAX_SEQ64_SLOTS);
+};
+
+void amdgpu_seq64_fini(struct amdgpu_device *adev);
+int amdgpu_seq64_init(struct amdgpu_device *adev);
+int amdgpu_seq64_alloc(struct amdgpu_device *adev, u64 *va, u64 *gpu_addr, u64 **cpu_addr);
+void amdgpu_seq64_free(struct amdgpu_device *adev, u64 gpu_addr);
+int amdgpu_seq64_map(struct amdgpu_device *adev, struct amdgpu_vm *vm,
+ struct amdgpu_bo_va **bo_va);
+void amdgpu_seq64_unmap(struct amdgpu_device *adev, struct amdgpu_fpriv *fpriv);
+
+#endif
+
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_smuio.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_smuio.h
index 03009157aec8..ec9d12f85f39 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_smuio.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_smuio.h
@@ -23,11 +23,28 @@
#ifndef __AMDGPU_SMUIO_H__
#define __AMDGPU_SMUIO_H__
+enum amdgpu_pkg_type {
+ AMDGPU_PKG_TYPE_APU = 2,
+ AMDGPU_PKG_TYPE_CEM = 3,
+ AMDGPU_PKG_TYPE_OAM = 4,
+ AMDGPU_PKG_TYPE_UNKNOWN,
+};
+
+struct amdgpu_smuio_mcm_config_info {
+ int socket_id;
+ int die_id;
+};
+
struct amdgpu_smuio_funcs {
u32 (*get_rom_index_offset)(struct amdgpu_device *adev);
u32 (*get_rom_data_offset)(struct amdgpu_device *adev);
void (*update_rom_clock_gating)(struct amdgpu_device *adev, bool enable);
- void (*get_clock_gating_state)(struct amdgpu_device *adev, u32 *flags);
+ void (*get_clock_gating_state)(struct amdgpu_device *adev, u64 *flags);
+ u32 (*get_die_id)(struct amdgpu_device *adev);
+ u32 (*get_socket_id)(struct amdgpu_device *adev);
+ enum amdgpu_pkg_type (*get_pkg_type)(struct amdgpu_device *adev);
+ bool (*is_host_gpu_xgmi_supported)(struct amdgpu_device *adev);
+ u64 (*get_gpu_clock_counter)(struct amdgpu_device *adev);
};
struct amdgpu_smuio {
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c
index 4e558632a5d2..d6ae9974c952 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: MIT
/*
* Copyright 2014 Advanced Micro Devices, Inc.
* All Rights Reserved.
@@ -28,6 +29,8 @@
* Christian König <christian.koenig@amd.com>
*/
+#include <linux/dma-fence-chain.h>
+
#include "amdgpu.h"
#include "amdgpu_trace.h"
#include "amdgpu_amdkfd.h"
@@ -49,7 +52,6 @@ static struct kmem_cache *amdgpu_sync_slab;
void amdgpu_sync_create(struct amdgpu_sync *sync)
{
hash_init(sync->fences);
- sync->last_vm_update = NULL;
}
/**
@@ -133,11 +135,16 @@ static bool amdgpu_sync_add_later(struct amdgpu_sync *sync, struct dma_fence *f)
struct amdgpu_sync_entry *e;
hash_for_each_possible(sync->fences, e, node, f->context) {
- if (unlikely(e->fence->context != f->context))
- continue;
+ if (dma_fence_is_signaled(e->fence)) {
+ dma_fence_put(e->fence);
+ e->fence = dma_fence_get(f);
+ return true;
+ }
- amdgpu_sync_keep_later(&e->fence, f);
- return true;
+ if (likely(e->fence->context == f->context)) {
+ amdgpu_sync_keep_later(&e->fence, f);
+ return true;
+ }
}
return false;
}
@@ -147,10 +154,12 @@ static bool amdgpu_sync_add_later(struct amdgpu_sync *sync, struct dma_fence *f)
*
* @sync: sync object to add fence to
* @f: fence to sync to
+ * @flags: memory allocation flags to use when allocating sync entry
*
* Add the fence to the sync object.
*/
-int amdgpu_sync_fence(struct amdgpu_sync *sync, struct dma_fence *f)
+int amdgpu_sync_fence(struct amdgpu_sync *sync, struct dma_fence *f,
+ gfp_t flags)
{
struct amdgpu_sync_entry *e;
@@ -160,7 +169,7 @@ int amdgpu_sync_fence(struct amdgpu_sync *sync, struct dma_fence *f)
if (amdgpu_sync_add_later(sync, f))
return 0;
- e = kmem_cache_alloc(amdgpu_sync_slab, GFP_KERNEL);
+ e = kmem_cache_alloc(amdgpu_sync_slab, flags);
if (!e)
return -ENOMEM;
@@ -169,21 +178,54 @@ int amdgpu_sync_fence(struct amdgpu_sync *sync, struct dma_fence *f)
return 0;
}
-/**
- * amdgpu_sync_vm_fence - remember to sync to this VM fence
- *
- * @sync: sync object to add fence to
- * @fence: the VM fence to add
- *
- * Add the fence to the sync object and remember it as VM update.
- */
-int amdgpu_sync_vm_fence(struct amdgpu_sync *sync, struct dma_fence *fence)
+/* Determine based on the owner and mode if we should sync to a fence or not */
+static bool amdgpu_sync_test_fence(struct amdgpu_device *adev,
+ enum amdgpu_sync_mode mode,
+ void *owner, struct dma_fence *f)
{
- if (!fence)
- return 0;
+ void *fence_owner = amdgpu_sync_get_owner(f);
- amdgpu_sync_keep_later(&sync->last_vm_update, fence);
- return amdgpu_sync_fence(sync, fence);
+ /* Always sync to moves, no matter what */
+ if (fence_owner == AMDGPU_FENCE_OWNER_UNDEFINED)
+ return true;
+
+ /* We only want to trigger KFD eviction fences on
+ * evict or move jobs. Skip KFD fences otherwise.
+ */
+ if (fence_owner == AMDGPU_FENCE_OWNER_KFD &&
+ owner != AMDGPU_FENCE_OWNER_UNDEFINED)
+ return false;
+
+ /* Never sync to VM updates either. */
+ if (fence_owner == AMDGPU_FENCE_OWNER_VM &&
+ owner != AMDGPU_FENCE_OWNER_UNDEFINED &&
+ owner != AMDGPU_FENCE_OWNER_KFD)
+ return false;
+
+ /* Ignore fences depending on the sync mode */
+ switch (mode) {
+ case AMDGPU_SYNC_ALWAYS:
+ return true;
+
+ case AMDGPU_SYNC_NE_OWNER:
+ if (amdgpu_sync_same_dev(adev, f) &&
+ fence_owner == owner)
+ return false;
+ break;
+
+ case AMDGPU_SYNC_EQ_OWNER:
+ if (amdgpu_sync_same_dev(adev, f) &&
+ fence_owner != owner)
+ return false;
+ break;
+
+ case AMDGPU_SYNC_EXPLICIT:
+ return false;
+ }
+
+ WARN(debug_evictions && fence_owner == AMDGPU_FENCE_OWNER_KFD,
+ "Adding eviction fence to sync obj");
+ return true;
}
/**
@@ -201,79 +243,67 @@ int amdgpu_sync_resv(struct amdgpu_device *adev, struct amdgpu_sync *sync,
struct dma_resv *resv, enum amdgpu_sync_mode mode,
void *owner)
{
- struct dma_resv_list *flist;
+ struct dma_resv_iter cursor;
struct dma_fence *f;
- unsigned i;
- int r = 0;
+ int r;
if (resv == NULL)
return -EINVAL;
-
- /* always sync to the exclusive fence */
- f = dma_resv_get_excl(resv);
- r = amdgpu_sync_fence(sync, f);
-
- flist = dma_resv_get_list(resv);
- if (!flist || r)
- return r;
-
- for (i = 0; i < flist->shared_count; ++i) {
- void *fence_owner;
-
- f = rcu_dereference_protected(flist->shared[i],
- dma_resv_held(resv));
-
- fence_owner = amdgpu_sync_get_owner(f);
-
- /* Always sync to moves, no matter what */
- if (fence_owner == AMDGPU_FENCE_OWNER_UNDEFINED) {
- r = amdgpu_sync_fence(sync, f);
- if (r)
+ /* Implicitly sync only to KERNEL, WRITE and READ */
+ dma_resv_for_each_fence(&cursor, resv, DMA_RESV_USAGE_READ, f) {
+ dma_fence_chain_for_each(f, f) {
+ struct dma_fence *tmp = dma_fence_chain_contained(f);
+
+ if (amdgpu_sync_test_fence(adev, mode, owner, tmp)) {
+ r = amdgpu_sync_fence(sync, f, GFP_KERNEL);
+ dma_fence_put(f);
+ if (r)
+ return r;
break;
+ }
}
+ }
+ return 0;
+}
- /* We only want to trigger KFD eviction fences on
- * evict or move jobs. Skip KFD fences otherwise.
- */
- if (fence_owner == AMDGPU_FENCE_OWNER_KFD &&
- owner != AMDGPU_FENCE_OWNER_UNDEFINED)
- continue;
-
- /* Never sync to VM updates either. */
- if (fence_owner == AMDGPU_FENCE_OWNER_VM &&
- owner != AMDGPU_FENCE_OWNER_UNDEFINED)
- continue;
-
- /* Ignore fences depending on the sync mode */
- switch (mode) {
- case AMDGPU_SYNC_ALWAYS:
- break;
-
- case AMDGPU_SYNC_NE_OWNER:
- if (amdgpu_sync_same_dev(adev, f) &&
- fence_owner == owner)
- continue;
- break;
+/**
+ * amdgpu_sync_kfd - sync to KFD fences
+ *
+ * @sync: sync object to add KFD fences to
+ * @resv: reservation object with KFD fences
+ *
+ * Extract all KFD fences and add them to the sync object.
+ */
+int amdgpu_sync_kfd(struct amdgpu_sync *sync, struct dma_resv *resv)
+{
+ struct dma_resv_iter cursor;
+ struct dma_fence *f;
+ int r = 0;
- case AMDGPU_SYNC_EQ_OWNER:
- if (amdgpu_sync_same_dev(adev, f) &&
- fence_owner != owner)
- continue;
- break;
+ dma_resv_iter_begin(&cursor, resv, DMA_RESV_USAGE_BOOKKEEP);
+ dma_resv_for_each_fence_unlocked(&cursor, f) {
+ void *fence_owner = amdgpu_sync_get_owner(f);
- case AMDGPU_SYNC_EXPLICIT:
+ if (fence_owner != AMDGPU_FENCE_OWNER_KFD)
continue;
- }
- WARN(debug_evictions && fence_owner == AMDGPU_FENCE_OWNER_KFD,
- "Adding eviction fence to sync obj");
- r = amdgpu_sync_fence(sync, f);
+ r = amdgpu_sync_fence(sync, f, GFP_KERNEL);
if (r)
break;
}
+ dma_resv_iter_end(&cursor);
+
return r;
}
+/* Free the entry back to the slab */
+static void amdgpu_sync_entry_free(struct amdgpu_sync_entry *e)
+{
+ hash_del(&e->node);
+ dma_fence_put(e->fence);
+ kmem_cache_free(amdgpu_sync_slab, e);
+}
+
/**
* amdgpu_sync_peek_fence - get the next fence not signaled yet
*
@@ -295,9 +325,7 @@ struct dma_fence *amdgpu_sync_peek_fence(struct amdgpu_sync *sync,
struct drm_sched_fence *s_fence = to_drm_sched_fence(f);
if (dma_fence_is_signaled(f)) {
- hash_del(&e->node);
- dma_fence_put(f);
- kmem_cache_free(amdgpu_sync_slab, e);
+ amdgpu_sync_entry_free(e);
continue;
}
if (ring && s_fence) {
@@ -331,6 +359,7 @@ struct dma_fence *amdgpu_sync_get_fence(struct amdgpu_sync *sync)
struct hlist_node *tmp;
struct dma_fence *f;
int i;
+
hash_for_each_safe(sync->fences, i, tmp, e, node) {
f = e->fence;
@@ -365,19 +394,64 @@ int amdgpu_sync_clone(struct amdgpu_sync *source, struct amdgpu_sync *clone)
hash_for_each_safe(source->fences, i, tmp, e, node) {
f = e->fence;
if (!dma_fence_is_signaled(f)) {
- r = amdgpu_sync_fence(clone, f);
+ r = amdgpu_sync_fence(clone, f, GFP_KERNEL);
if (r)
return r;
} else {
- hash_del(&e->node);
- dma_fence_put(f);
- kmem_cache_free(amdgpu_sync_slab, e);
+ amdgpu_sync_entry_free(e);
}
}
- dma_fence_put(clone->last_vm_update);
- clone->last_vm_update = dma_fence_get(source->last_vm_update);
+ return 0;
+}
+/**
+ * amdgpu_sync_move - move all fences from src to dst
+ *
+ * @src: source of the fences, empty after function
+ * @dst: destination for the fences
+ *
+ * Moves all fences from source to destination. All fences in destination are
+ * freed and source is empty after the function call.
+ */
+void amdgpu_sync_move(struct amdgpu_sync *src, struct amdgpu_sync *dst)
+{
+ unsigned int i;
+
+ amdgpu_sync_free(dst);
+
+ for (i = 0; i < HASH_SIZE(src->fences); ++i)
+ hlist_move_list(&src->fences[i], &dst->fences[i]);
+}
+
+/**
+ * amdgpu_sync_push_to_job - push fences into job
+ * @sync: sync object to get the fences from
+ * @job: job to push the fences into
+ *
+ * Add all unsignaled fences from sync to job.
+ */
+int amdgpu_sync_push_to_job(struct amdgpu_sync *sync, struct amdgpu_job *job)
+{
+ struct amdgpu_sync_entry *e;
+ struct hlist_node *tmp;
+ struct dma_fence *f;
+ int i, r;
+
+ hash_for_each_safe(sync->fences, i, tmp, e, node) {
+ f = e->fence;
+ if (dma_fence_is_signaled(f)) {
+ amdgpu_sync_entry_free(e);
+ continue;
+ }
+
+ dma_fence_get(f);
+ r = drm_sched_job_add_dependency(&job->base, f);
+ if (r) {
+ dma_fence_put(f);
+ return r;
+ }
+ }
return 0;
}
@@ -392,9 +466,7 @@ int amdgpu_sync_wait(struct amdgpu_sync *sync, bool intr)
if (r)
return r;
- hash_del(&e->node);
- dma_fence_put(e->fence);
- kmem_cache_free(amdgpu_sync_slab, e);
+ amdgpu_sync_entry_free(e);
}
return 0;
@@ -411,15 +483,10 @@ void amdgpu_sync_free(struct amdgpu_sync *sync)
{
struct amdgpu_sync_entry *e;
struct hlist_node *tmp;
- unsigned i;
-
- hash_for_each_safe(sync->fences, i, tmp, e, node) {
- hash_del(&e->node);
- dma_fence_put(e->fence);
- kmem_cache_free(amdgpu_sync_slab, e);
- }
+ unsigned int i;
- dma_fence_put(sync->last_vm_update);
+ hash_for_each_safe(sync->fences, i, tmp, e, node)
+ amdgpu_sync_entry_free(e);
}
/**
@@ -429,9 +496,7 @@ void amdgpu_sync_free(struct amdgpu_sync *sync)
*/
int amdgpu_sync_init(void)
{
- amdgpu_sync_slab = kmem_cache_create(
- "amdgpu_sync", sizeof(struct amdgpu_sync_entry), 0,
- SLAB_HWCACHE_ALIGN, NULL);
+ amdgpu_sync_slab = KMEM_CACHE(amdgpu_sync_entry, SLAB_HWCACHE_ALIGN);
if (!amdgpu_sync_slab)
return -ENOMEM;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.h
index 7c0fe20c470d..51eb4382c91e 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.h
@@ -30,6 +30,7 @@ struct dma_fence;
struct dma_resv;
struct amdgpu_device;
struct amdgpu_ring;
+struct amdgpu_job;
enum amdgpu_sync_mode {
AMDGPU_SYNC_ALWAYS,
@@ -43,19 +44,21 @@ enum amdgpu_sync_mode {
*/
struct amdgpu_sync {
DECLARE_HASHTABLE(fences, 4);
- struct dma_fence *last_vm_update;
};
void amdgpu_sync_create(struct amdgpu_sync *sync);
-int amdgpu_sync_fence(struct amdgpu_sync *sync, struct dma_fence *f);
-int amdgpu_sync_vm_fence(struct amdgpu_sync *sync, struct dma_fence *fence);
+int amdgpu_sync_fence(struct amdgpu_sync *sync, struct dma_fence *f,
+ gfp_t flags);
int amdgpu_sync_resv(struct amdgpu_device *adev, struct amdgpu_sync *sync,
struct dma_resv *resv, enum amdgpu_sync_mode mode,
void *owner);
+int amdgpu_sync_kfd(struct amdgpu_sync *sync, struct dma_resv *resv);
struct dma_fence *amdgpu_sync_peek_fence(struct amdgpu_sync *sync,
struct amdgpu_ring *ring);
struct dma_fence *amdgpu_sync_get_fence(struct amdgpu_sync *sync);
int amdgpu_sync_clone(struct amdgpu_sync *source, struct amdgpu_sync *clone);
+void amdgpu_sync_move(struct amdgpu_sync *src, struct amdgpu_sync *dst);
+int amdgpu_sync_push_to_job(struct amdgpu_sync *sync, struct amdgpu_job *job);
int amdgpu_sync_wait(struct amdgpu_sync *sync, bool intr);
void amdgpu_sync_free(struct amdgpu_sync *sync);
int amdgpu_sync_init(void);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_test.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_test.c
deleted file mode 100644
index 7b230bcbf2c6..000000000000
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_test.c
+++ /dev/null
@@ -1,249 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0 OR MIT
-/*
- * Copyright 2009 VMware, Inc.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in
- * all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
- * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
- * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
- * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
- * OTHER DEALINGS IN THE SOFTWARE.
- *
- * Authors: Michel Dänzer
- */
-
-#include <drm/amdgpu_drm.h>
-#include "amdgpu.h"
-#include "amdgpu_uvd.h"
-#include "amdgpu_vce.h"
-
-/* Test BO GTT->VRAM and VRAM->GTT GPU copies across the whole GTT aperture */
-static void amdgpu_do_test_moves(struct amdgpu_device *adev)
-{
- struct amdgpu_ring *ring = adev->mman.buffer_funcs_ring;
- struct amdgpu_bo *vram_obj = NULL;
- struct amdgpu_bo **gtt_obj = NULL;
- struct amdgpu_bo_param bp;
- uint64_t gart_addr, vram_addr;
- unsigned n, size;
- int i, r;
-
- size = 1024 * 1024;
-
- /* Number of tests =
- * (Total GTT - gart_pin_size - (2 transfer windows for buffer moves)) / test size
- */
- n = adev->gmc.gart_size - atomic64_read(&adev->gart_pin_size);
- n -= AMDGPU_GTT_MAX_TRANSFER_SIZE * AMDGPU_GTT_NUM_TRANSFER_WINDOWS *
- AMDGPU_GPU_PAGE_SIZE;
- n /= size;
-
- gtt_obj = kcalloc(n, sizeof(*gtt_obj), GFP_KERNEL);
- if (!gtt_obj) {
- DRM_ERROR("Failed to allocate %d pointers\n", n);
- r = 1;
- goto out_cleanup;
- }
- memset(&bp, 0, sizeof(bp));
- bp.size = size;
- bp.byte_align = PAGE_SIZE;
- bp.domain = AMDGPU_GEM_DOMAIN_VRAM;
- bp.flags = 0;
- bp.type = ttm_bo_type_kernel;
- bp.resv = NULL;
-
- r = amdgpu_bo_create(adev, &bp, &vram_obj);
- if (r) {
- DRM_ERROR("Failed to create VRAM object\n");
- goto out_cleanup;
- }
- r = amdgpu_bo_reserve(vram_obj, false);
- if (unlikely(r != 0))
- goto out_unref;
- r = amdgpu_bo_pin(vram_obj, AMDGPU_GEM_DOMAIN_VRAM);
- if (r) {
- DRM_ERROR("Failed to pin VRAM object\n");
- goto out_unres;
- }
- vram_addr = amdgpu_bo_gpu_offset(vram_obj);
- for (i = 0; i < n; i++) {
- void *gtt_map, *vram_map;
- void **gart_start, **gart_end;
- void **vram_start, **vram_end;
- struct dma_fence *fence = NULL;
-
- bp.domain = AMDGPU_GEM_DOMAIN_GTT;
- r = amdgpu_bo_create(adev, &bp, gtt_obj + i);
- if (r) {
- DRM_ERROR("Failed to create GTT object %d\n", i);
- goto out_lclean;
- }
-
- r = amdgpu_bo_reserve(gtt_obj[i], false);
- if (unlikely(r != 0))
- goto out_lclean_unref;
- r = amdgpu_bo_pin(gtt_obj[i], AMDGPU_GEM_DOMAIN_GTT);
- if (r) {
- DRM_ERROR("Failed to pin GTT object %d\n", i);
- goto out_lclean_unres;
- }
- r = amdgpu_ttm_alloc_gart(&gtt_obj[i]->tbo);
- if (r) {
- DRM_ERROR("%p bind failed\n", gtt_obj[i]);
- goto out_lclean_unpin;
- }
- gart_addr = amdgpu_bo_gpu_offset(gtt_obj[i]);
-
- r = amdgpu_bo_kmap(gtt_obj[i], &gtt_map);
- if (r) {
- DRM_ERROR("Failed to map GTT object %d\n", i);
- goto out_lclean_unpin;
- }
-
- for (gart_start = gtt_map, gart_end = gtt_map + size;
- gart_start < gart_end;
- gart_start++)
- *gart_start = gart_start;
-
- amdgpu_bo_kunmap(gtt_obj[i]);
-
- r = amdgpu_copy_buffer(ring, gart_addr, vram_addr,
- size, NULL, &fence, false, false, false);
-
- if (r) {
- DRM_ERROR("Failed GTT->VRAM copy %d\n", i);
- goto out_lclean_unpin;
- }
-
- r = dma_fence_wait(fence, false);
- if (r) {
- DRM_ERROR("Failed to wait for GTT->VRAM fence %d\n", i);
- goto out_lclean_unpin;
- }
-
- dma_fence_put(fence);
- fence = NULL;
-
- r = amdgpu_bo_kmap(vram_obj, &vram_map);
- if (r) {
- DRM_ERROR("Failed to map VRAM object after copy %d\n", i);
- goto out_lclean_unpin;
- }
-
- for (gart_start = gtt_map, gart_end = gtt_map + size,
- vram_start = vram_map, vram_end = vram_map + size;
- vram_start < vram_end;
- gart_start++, vram_start++) {
- if (*vram_start != gart_start) {
- DRM_ERROR("Incorrect GTT->VRAM copy %d: Got 0x%p, "
- "expected 0x%p (GTT/VRAM offset "
- "0x%16llx/0x%16llx)\n",
- i, *vram_start, gart_start,
- (unsigned long long)
- (gart_addr - adev->gmc.gart_start +
- (void *)gart_start - gtt_map),
- (unsigned long long)
- (vram_addr - adev->gmc.vram_start +
- (void *)gart_start - gtt_map));
- amdgpu_bo_kunmap(vram_obj);
- goto out_lclean_unpin;
- }
- *vram_start = vram_start;
- }
-
- amdgpu_bo_kunmap(vram_obj);
-
- r = amdgpu_copy_buffer(ring, vram_addr, gart_addr,
- size, NULL, &fence, false, false, false);
-
- if (r) {
- DRM_ERROR("Failed VRAM->GTT copy %d\n", i);
- goto out_lclean_unpin;
- }
-
- r = dma_fence_wait(fence, false);
- if (r) {
- DRM_ERROR("Failed to wait for VRAM->GTT fence %d\n", i);
- goto out_lclean_unpin;
- }
-
- dma_fence_put(fence);
- fence = NULL;
-
- r = amdgpu_bo_kmap(gtt_obj[i], &gtt_map);
- if (r) {
- DRM_ERROR("Failed to map GTT object after copy %d\n", i);
- goto out_lclean_unpin;
- }
-
- for (gart_start = gtt_map, gart_end = gtt_map + size,
- vram_start = vram_map, vram_end = vram_map + size;
- gart_start < gart_end;
- gart_start++, vram_start++) {
- if (*gart_start != vram_start) {
- DRM_ERROR("Incorrect VRAM->GTT copy %d: Got 0x%p, "
- "expected 0x%p (VRAM/GTT offset "
- "0x%16llx/0x%16llx)\n",
- i, *gart_start, vram_start,
- (unsigned long long)
- (vram_addr - adev->gmc.vram_start +
- (void *)vram_start - vram_map),
- (unsigned long long)
- (gart_addr - adev->gmc.gart_start +
- (void *)vram_start - vram_map));
- amdgpu_bo_kunmap(gtt_obj[i]);
- goto out_lclean_unpin;
- }
- }
-
- amdgpu_bo_kunmap(gtt_obj[i]);
-
- DRM_INFO("Tested GTT->VRAM and VRAM->GTT copy for GTT offset 0x%llx\n",
- gart_addr - adev->gmc.gart_start);
- continue;
-
-out_lclean_unpin:
- amdgpu_bo_unpin(gtt_obj[i]);
-out_lclean_unres:
- amdgpu_bo_unreserve(gtt_obj[i]);
-out_lclean_unref:
- amdgpu_bo_unref(&gtt_obj[i]);
-out_lclean:
- for (--i; i >= 0; --i) {
- amdgpu_bo_unpin(gtt_obj[i]);
- amdgpu_bo_unreserve(gtt_obj[i]);
- amdgpu_bo_unref(&gtt_obj[i]);
- }
- if (fence)
- dma_fence_put(fence);
- break;
- }
-
- amdgpu_bo_unpin(vram_obj);
-out_unres:
- amdgpu_bo_unreserve(vram_obj);
-out_unref:
- amdgpu_bo_unref(&vram_obj);
-out_cleanup:
- kfree(gtt_obj);
- if (r) {
- pr_warn("Error while testing BO move\n");
- }
-}
-
-void amdgpu_test_moves(struct amdgpu_device *adev)
-{
- if (adev->mman.buffer_funcs)
- amdgpu_do_test_moves(adev);
-}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h
index 792d20261846..d13e64a69e25 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h
@@ -127,8 +127,8 @@ TRACE_EVENT(amdgpu_bo_create,
TP_fast_assign(
__entry->bo = bo;
- __entry->pages = bo->tbo.mem.num_pages;
- __entry->type = bo->tbo.mem.mem_type;
+ __entry->pages = PFN_UP(bo->tbo.resource->size);
+ __entry->type = bo->tbo.resource->mem_type;
__entry->prefer = bo->preferred_domains;
__entry->allow = bo->allowed_domains;
__entry->visible = bo->flags;
@@ -140,8 +140,10 @@ TRACE_EVENT(amdgpu_bo_create,
);
TRACE_EVENT(amdgpu_cs,
- TP_PROTO(struct amdgpu_cs_parser *p, int i),
- TP_ARGS(p, i),
+ TP_PROTO(struct amdgpu_cs_parser *p,
+ struct amdgpu_job *job,
+ struct amdgpu_ib *ib),
+ TP_ARGS(p, job, ib),
TP_STRUCT__entry(
__field(struct amdgpu_bo_list *, bo_list)
__field(u32, ring)
@@ -151,10 +153,10 @@ TRACE_EVENT(amdgpu_cs,
TP_fast_assign(
__entry->bo_list = p->bo_list;
- __entry->ring = to_amdgpu_ring(p->entity->rq->sched)->idx;
- __entry->dw = p->job->ibs[i].length_dw;
+ __entry->ring = to_amdgpu_ring(job->base.entity->rq->sched)->idx;
+ __entry->dw = ib->length_dw;
__entry->fences = amdgpu_fence_count_emitted(
- to_amdgpu_ring(p->entity->rq->sched));
+ to_amdgpu_ring(job->base.entity->rq->sched));
),
TP_printk("bo_list=%p, ring=%u, dw=%u, fences=%u",
__entry->bo_list, __entry->ring, __entry->dw,
@@ -165,25 +167,23 @@ TRACE_EVENT(amdgpu_cs_ioctl,
TP_PROTO(struct amdgpu_job *job),
TP_ARGS(job),
TP_STRUCT__entry(
- __field(uint64_t, sched_job_id)
__string(timeline, AMDGPU_JOB_GET_TIMELINE_NAME(job))
- __field(unsigned int, context)
- __field(unsigned int, seqno)
+ __field(u64, context)
+ __field(u64, seqno)
__field(struct dma_fence *, fence)
__string(ring, to_amdgpu_ring(job->base.sched)->name)
__field(u32, num_ibs)
),
TP_fast_assign(
- __entry->sched_job_id = job->base.id;
- __assign_str(timeline, AMDGPU_JOB_GET_TIMELINE_NAME(job))
+ __assign_str(timeline);
__entry->context = job->base.s_fence->finished.context;
__entry->seqno = job->base.s_fence->finished.seqno;
- __assign_str(ring, to_amdgpu_ring(job->base.sched)->name)
+ __assign_str(ring);
__entry->num_ibs = job->num_ibs;
),
- TP_printk("sched_job=%llu, timeline=%s, context=%u, seqno=%u, ring_name=%s, num_ibs=%u",
- __entry->sched_job_id, __get_str(timeline), __entry->context,
+ TP_printk("timeline=%s, fence=%llu:%llu, ring_name=%s, num_ibs=%u",
+ __get_str(timeline), __entry->context,
__entry->seqno, __get_str(ring), __entry->num_ibs)
);
@@ -191,24 +191,22 @@ TRACE_EVENT(amdgpu_sched_run_job,
TP_PROTO(struct amdgpu_job *job),
TP_ARGS(job),
TP_STRUCT__entry(
- __field(uint64_t, sched_job_id)
__string(timeline, AMDGPU_JOB_GET_TIMELINE_NAME(job))
- __field(unsigned int, context)
- __field(unsigned int, seqno)
+ __field(u64, context)
+ __field(u64, seqno)
__string(ring, to_amdgpu_ring(job->base.sched)->name)
__field(u32, num_ibs)
),
TP_fast_assign(
- __entry->sched_job_id = job->base.id;
- __assign_str(timeline, AMDGPU_JOB_GET_TIMELINE_NAME(job))
+ __assign_str(timeline);
__entry->context = job->base.s_fence->finished.context;
__entry->seqno = job->base.s_fence->finished.seqno;
- __assign_str(ring, to_amdgpu_ring(job->base.sched)->name)
+ __assign_str(ring);
__entry->num_ibs = job->num_ibs;
),
- TP_printk("sched_job=%llu, timeline=%s, context=%u, seqno=%u, ring_name=%s, num_ibs=%u",
- __entry->sched_job_id, __get_str(timeline), __entry->context,
+ TP_printk("timeline=%s, fence=%llu:%llu, ring_name=%s, num_ibs=%u",
+ __get_str(timeline), __entry->context,
__entry->seqno, __get_str(ring), __entry->num_ibs)
);
@@ -229,9 +227,9 @@ TRACE_EVENT(amdgpu_vm_grab_id,
TP_fast_assign(
__entry->pasid = vm->pasid;
- __assign_str(ring, ring->name)
+ __assign_str(ring);
__entry->vmid = job->vmid;
- __entry->vm_hub = ring->funcs->vmhub,
+ __entry->vm_hub = ring->vm_hub,
__entry->pd_addr = job->vm_pd_addr;
__entry->needs_flush = job->vm_needs_flush;
),
@@ -358,11 +356,10 @@ TRACE_EVENT(amdgpu_vm_update_ptes,
}
),
TP_printk("pid:%u vm_ctx:0x%llx start:0x%010llx end:0x%010llx,"
- " flags:0x%llx, incr:%llu, dst:\n%s%s", __entry->pid,
+ " flags:0x%llx, incr:%llu, dst:\n%s", __entry->pid,
__entry->vm_ctx, __entry->start, __entry->end,
__entry->flags, __entry->incr, __print_array(
- __get_dynamic_array(dst), min(__entry->nptes, 32u), 8),
- __entry->nptes > 32 ? "..." : "")
+ __get_dynamic_array(dst), __entry->nptes, 8))
);
TRACE_EVENT(amdgpu_vm_set_ptes,
@@ -424,14 +421,14 @@ TRACE_EVENT(amdgpu_vm_flush,
),
TP_fast_assign(
- __assign_str(ring, ring->name)
+ __assign_str(ring);
__entry->vmid = vmid;
- __entry->vm_hub = ring->funcs->vmhub;
+ __entry->vm_hub = ring->vm_hub;
__entry->pd_addr = pd_addr;
),
TP_printk("ring=%s, id=%u, hub=%u, pd_addr=%010Lx",
__get_str(ring), __entry->vmid,
- __entry->vm_hub,__entry->pd_addr)
+ __entry->vm_hub, __entry->pd_addr)
);
DECLARE_EVENT_CLASS(amdgpu_pasid,
@@ -456,6 +453,38 @@ DEFINE_EVENT(amdgpu_pasid, amdgpu_pasid_freed,
TP_ARGS(pasid)
);
+TRACE_EVENT(amdgpu_isolation,
+ TP_PROTO(void *prev, void *next),
+ TP_ARGS(prev, next),
+ TP_STRUCT__entry(
+ __field(void *, prev)
+ __field(void *, next)
+ ),
+
+ TP_fast_assign(
+ __entry->prev = prev;
+ __entry->next = next;
+ ),
+ TP_printk("prev=%p, next=%p",
+ __entry->prev,
+ __entry->next)
+);
+
+TRACE_EVENT(amdgpu_cleaner_shader,
+ TP_PROTO(struct amdgpu_ring *ring, struct dma_fence *fence),
+ TP_ARGS(ring, fence),
+ TP_STRUCT__entry(
+ __string(ring, ring->name)
+ __field(u64, seqno)
+ ),
+
+ TP_fast_assign(
+ __assign_str(ring);
+ __entry->seqno = fence->seqno;
+ ),
+ TP_printk("ring=%s, seqno=%Lu", __get_str(ring), __entry->seqno)
+);
+
TRACE_EVENT(amdgpu_bo_list_set,
TP_PROTO(struct amdgpu_bo_list *list, struct amdgpu_bo *bo),
TP_ARGS(list, bo),
@@ -493,7 +522,7 @@ TRACE_EVENT(amdgpu_cs_bo_status,
);
TRACE_EVENT(amdgpu_bo_move,
- TP_PROTO(struct amdgpu_bo* bo, uint32_t new_placement, uint32_t old_placement),
+ TP_PROTO(struct amdgpu_bo *bo, uint32_t new_placement, uint32_t old_placement),
TP_ARGS(bo, new_placement, old_placement),
TP_STRUCT__entry(
__field(struct amdgpu_bo *, bo)
@@ -518,23 +547,35 @@ TRACE_EVENT(amdgpu_ib_pipe_sync,
TP_ARGS(sched_job, fence),
TP_STRUCT__entry(
__string(ring, sched_job->base.sched->name)
- __field(uint64_t, id)
__field(struct dma_fence *, fence)
- __field(uint64_t, ctx)
- __field(unsigned, seqno)
+ __field(u64, ctx)
+ __field(u64, seqno)
),
TP_fast_assign(
- __assign_str(ring, sched_job->base.sched->name)
- __entry->id = sched_job->base.id;
+ __assign_str(ring);
__entry->fence = fence;
__entry->ctx = fence->context;
__entry->seqno = fence->seqno;
),
- TP_printk("job ring=%s, id=%llu, need pipe sync to fence=%p, context=%llu, seq=%u",
- __get_str(ring), __entry->id,
- __entry->fence, __entry->ctx,
- __entry->seqno)
+ TP_printk("job ring=%s need pipe sync to fence=%llu:%llu",
+ __get_str(ring), __entry->ctx, __entry->seqno)
+);
+
+TRACE_EVENT(amdgpu_reset_reg_dumps,
+ TP_PROTO(uint32_t address, uint32_t value),
+ TP_ARGS(address, value),
+ TP_STRUCT__entry(
+ __field(uint32_t, address)
+ __field(uint32_t, value)
+ ),
+ TP_fast_assign(
+ __entry->address = address;
+ __entry->value = value;
+ ),
+ TP_printk("amdgpu register dump 0x%x: 0x%x",
+ __entry->address,
+ __entry->value)
);
#undef AMDGPU_JOB_GET_TIMELINE_NAME
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_trace_points.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_trace_points.c
index 57c6c39ba064..b96d885f6e33 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_trace_points.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_trace_points.c
@@ -23,6 +23,7 @@
*/
#include <drm/amdgpu_drm.h>
+#include "amdgpu_cs.h"
#include "amdgpu.h"
#define CREATE_TRACE_POINTS
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
index 9fd2157b133a..2b931e855abd 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
@@ -32,22 +32,22 @@
#include <linux/dma-mapping.h>
#include <linux/iommu.h>
-#include <linux/hmm.h>
#include <linux/pagemap.h>
#include <linux/sched/task.h>
#include <linux/sched/mm.h>
#include <linux/seq_file.h>
#include <linux/slab.h>
#include <linux/swap.h>
-#include <linux/swiotlb.h>
#include <linux/dma-buf.h>
#include <linux/sizes.h>
+#include <linux/module.h>
-#include <drm/ttm/ttm_bo_api.h>
-#include <drm/ttm/ttm_bo_driver.h>
+#include <drm/drm_drv.h>
+#include <drm/ttm/ttm_bo.h>
#include <drm/ttm/ttm_placement.h>
+#include <drm/ttm/ttm_range_manager.h>
+#include <drm/ttm/ttm_tt.h>
-#include <drm/drm_debugfs.h>
#include <drm/amdgpu_drm.h>
#include "amdgpu.h"
@@ -56,15 +56,19 @@
#include "amdgpu_amdkfd.h"
#include "amdgpu_sdma.h"
#include "amdgpu_ras.h"
+#include "amdgpu_hmm.h"
#include "amdgpu_atomfirmware.h"
+#include "amdgpu_res_cursor.h"
#include "bif/bif_4_1_d.h"
-#define AMDGPU_TTM_VRAM_MAX_DW_READ (size_t)128
+MODULE_IMPORT_NS("DMA_BUF");
-static int amdgpu_ttm_backend_bind(struct ttm_bo_device *bdev,
+#define AMDGPU_TTM_VRAM_MAX_DW_READ ((size_t)128)
+
+static int amdgpu_ttm_backend_bind(struct ttm_device *bdev,
struct ttm_tt *ttm,
struct ttm_resource *bo_mem);
-static void amdgpu_ttm_backend_unbind(struct ttm_bo_device *bdev,
+static void amdgpu_ttm_backend_unbind(struct ttm_device *bdev,
struct ttm_tt *ttm);
static int amdgpu_ttm_init_on_chip(struct amdgpu_device *adev,
@@ -98,35 +102,39 @@ static void amdgpu_evict_flags(struct ttm_buffer_object *bo,
/* Don't handle scatter gather BOs */
if (bo->type == ttm_bo_type_sg) {
placement->num_placement = 0;
- placement->num_busy_placement = 0;
return;
}
/* Object isn't an AMDGPU object so ignore */
if (!amdgpu_bo_is_amdgpu_bo(bo)) {
placement->placement = &placements;
- placement->busy_placement = &placements;
placement->num_placement = 1;
- placement->num_busy_placement = 1;
return;
}
abo = ttm_to_amdgpu_bo(bo);
- switch (bo->mem.mem_type) {
+ if (abo->flags & AMDGPU_GEM_CREATE_DISCARDABLE) {
+ placement->num_placement = 0;
+ return;
+ }
+
+ switch (bo->resource->mem_type) {
case AMDGPU_PL_GDS:
case AMDGPU_PL_GWS:
case AMDGPU_PL_OA:
+ case AMDGPU_PL_DOORBELL:
+ case AMDGPU_PL_MMIO_REMAP:
placement->num_placement = 0;
- placement->num_busy_placement = 0;
return;
case TTM_PL_VRAM:
if (!adev->mman.buffer_funcs_enabled) {
/* Move to system memory */
amdgpu_bo_placement_from_domain(abo, AMDGPU_GEM_DOMAIN_CPU);
+
} else if (!amdgpu_gmc_vram_full_visible(&adev->gmc) &&
!(abo->flags & AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED) &&
- amdgpu_bo_in_cpu_visible_vram(abo)) {
+ amdgpu_res_cpu_visible(adev, bo->resource)) {
/* Try evicting to the CPU inaccessible part of VRAM
* first, but only set GTT as busy placement, so this
@@ -134,17 +142,19 @@ static void amdgpu_evict_flags(struct ttm_buffer_object *bo,
* BOs to be evicted from VRAM
*/
amdgpu_bo_placement_from_domain(abo, AMDGPU_GEM_DOMAIN_VRAM |
- AMDGPU_GEM_DOMAIN_GTT);
+ AMDGPU_GEM_DOMAIN_GTT |
+ AMDGPU_GEM_DOMAIN_CPU);
abo->placements[0].fpfn = adev->gmc.visible_vram_size >> PAGE_SHIFT;
abo->placements[0].lpfn = 0;
- abo->placement.busy_placement = &abo->placements[1];
- abo->placement.num_busy_placement = 1;
+ abo->placements[0].flags |= TTM_PL_FLAG_DESIRED;
} else {
/* Move to GTT memory */
- amdgpu_bo_placement_from_domain(abo, AMDGPU_GEM_DOMAIN_GTT);
+ amdgpu_bo_placement_from_domain(abo, AMDGPU_GEM_DOMAIN_GTT |
+ AMDGPU_GEM_DOMAIN_CPU);
}
break;
case TTM_PL_TT:
+ case AMDGPU_PL_PREEMPT:
default:
amdgpu_bo_placement_from_domain(abo, AMDGPU_GEM_DOMAIN_CPU);
break;
@@ -153,83 +163,14 @@ static void amdgpu_evict_flags(struct ttm_buffer_object *bo,
}
/**
- * amdgpu_verify_access - Verify access for a mmap call
- *
- * @bo: The buffer object to map
- * @filp: The file pointer from the process performing the mmap
- *
- * This is called by ttm_bo_mmap() to verify whether a process
- * has the right to mmap a BO to their process space.
- */
-static int amdgpu_verify_access(struct ttm_buffer_object *bo, struct file *filp)
-{
- struct amdgpu_bo *abo = ttm_to_amdgpu_bo(bo);
-
- /*
- * Don't verify access for KFD BOs. They don't have a GEM
- * object associated with them.
- */
- if (abo->kfd_bo)
- return 0;
-
- if (amdgpu_ttm_tt_get_usermm(bo->ttm))
- return -EPERM;
- return drm_vma_node_verify_access(&abo->tbo.base.vma_node,
- filp->private_data);
-}
-
-/**
- * amdgpu_mm_node_addr - Compute the GPU relative offset of a GTT buffer.
- *
- * @bo: The bo to assign the memory to.
- * @mm_node: Memory manager node for drm allocator.
- * @mem: The region where the bo resides.
- *
- */
-static uint64_t amdgpu_mm_node_addr(struct ttm_buffer_object *bo,
- struct drm_mm_node *mm_node,
- struct ttm_resource *mem)
-{
- uint64_t addr = 0;
-
- if (mm_node->start != AMDGPU_BO_INVALID_OFFSET) {
- addr = mm_node->start << PAGE_SHIFT;
- addr += amdgpu_ttm_domain_start(amdgpu_ttm_adev(bo->bdev),
- mem->mem_type);
- }
- return addr;
-}
-
-/**
- * amdgpu_find_mm_node - Helper function finds the drm_mm_node corresponding to
- * @offset. It also modifies the offset to be within the drm_mm_node returned
- *
- * @mem: The region where the bo resides.
- * @offset: The offset that drm_mm_node is used for finding.
- *
- */
-static struct drm_mm_node *amdgpu_find_mm_node(struct ttm_resource *mem,
- uint64_t *offset)
-{
- struct drm_mm_node *mm_node = mem->mm_node;
-
- while (*offset >= (mm_node->size << PAGE_SHIFT)) {
- *offset -= (mm_node->size << PAGE_SHIFT);
- ++mm_node;
- }
- return mm_node;
-}
-
-/**
* amdgpu_ttm_map_buffer - Map memory into the GART windows
* @bo: buffer object to map
* @mem: memory object to map
- * @mm_node: drm_mm node object to map
- * @num_pages: number of pages to map
- * @offset: offset into @mm_node where to start
+ * @mm_cur: range to map
* @window: which GART window to use
* @ring: DMA ring to use for the copy
* @tmz: if we should setup a TMZ enabled mapping
+ * @size: in number of bytes to map, out number of bytes mapped
* @addr: resulting address inside the MC address space
*
* Setup one of the GART windows to access a specific piece of memory or return
@@ -237,40 +178,56 @@ static struct drm_mm_node *amdgpu_find_mm_node(struct ttm_resource *mem,
*/
static int amdgpu_ttm_map_buffer(struct ttm_buffer_object *bo,
struct ttm_resource *mem,
- struct drm_mm_node *mm_node,
- unsigned num_pages, uint64_t offset,
- unsigned window, struct amdgpu_ring *ring,
- bool tmz, uint64_t *addr)
+ struct amdgpu_res_cursor *mm_cur,
+ unsigned int window, struct amdgpu_ring *ring,
+ bool tmz, uint64_t *size, uint64_t *addr)
{
struct amdgpu_device *adev = ring->adev;
- struct amdgpu_job *job;
- unsigned num_dw, num_bytes;
- struct dma_fence *fence;
+ unsigned int offset, num_pages, num_dw, num_bytes;
uint64_t src_addr, dst_addr;
+ struct amdgpu_job *job;
void *cpu_addr;
uint64_t flags;
- unsigned int i;
int r;
BUG_ON(adev->mman.buffer_funcs->copy_max_bytes <
AMDGPU_GTT_MAX_TRANSFER_SIZE * 8);
+ if (WARN_ON(mem->mem_type == AMDGPU_PL_PREEMPT))
+ return -EINVAL;
+
/* Map only what can't be accessed directly */
if (!tmz && mem->start != AMDGPU_BO_INVALID_OFFSET) {
- *addr = amdgpu_mm_node_addr(bo, mm_node, mem) + offset;
+ *addr = amdgpu_ttm_domain_start(adev, mem->mem_type) +
+ mm_cur->start;
return 0;
}
+
+ /*
+ * If start begins at an offset inside the page, then adjust the size
+ * and addr accordingly
+ */
+ offset = mm_cur->start & ~PAGE_MASK;
+
+ num_pages = PFN_UP(*size + offset);
+ num_pages = min_t(uint32_t, num_pages, AMDGPU_GTT_MAX_TRANSFER_SIZE);
+
+ *size = min(*size, (uint64_t)num_pages * PAGE_SIZE - offset);
+
*addr = adev->gmc.gart_start;
*addr += (u64)window * AMDGPU_GTT_MAX_TRANSFER_SIZE *
AMDGPU_GPU_PAGE_SIZE;
- *addr += offset & ~PAGE_MASK;
+ *addr += offset;
num_dw = ALIGN(adev->mman.buffer_funcs->copy_num_dw, 8);
- num_bytes = num_pages * 8;
+ num_bytes = num_pages * 8 * AMDGPU_GPU_PAGES_IN_CPU_PAGE;
- r = amdgpu_job_alloc_with_ib(adev, num_dw * 4 + num_bytes,
- AMDGPU_IB_POOL_DELAYED, &job);
+ r = amdgpu_job_alloc_with_ib(adev, &adev->mman.high_pr,
+ AMDGPU_FENCE_OWNER_UNDEFINED,
+ num_dw * 4 + num_bytes,
+ AMDGPU_IB_POOL_DELAYED, &job,
+ AMDGPU_KERNEL_JOB_ID_TTM_MAP_BUFFER);
if (r)
return r;
@@ -280,7 +237,7 @@ static int amdgpu_ttm_map_buffer(struct ttm_buffer_object *bo,
dst_addr = amdgpu_bo_gpu_offset(adev->gart.bo);
dst_addr += window * AMDGPU_GTT_MAX_TRANSFER_SIZE * 8;
amdgpu_emit_copy_buffer(adev, &job->ibs[0], src_addr,
- dst_addr, num_bytes, false);
+ dst_addr, num_bytes, 0);
amdgpu_ring_pad_ib(ring, &job->ibs[0]);
WARN_ON(job->ibs[0].length_dw > num_dw);
@@ -292,45 +249,22 @@ static int amdgpu_ttm_map_buffer(struct ttm_buffer_object *bo,
cpu_addr = &job->ibs[0].ptr[num_dw];
if (mem->mem_type == TTM_PL_TT) {
- dma_addr_t *dma_address;
+ dma_addr_t *dma_addr;
- dma_address = &bo->ttm->dma_address[offset >> PAGE_SHIFT];
- r = amdgpu_gart_map(adev, 0, num_pages, dma_address, flags,
- cpu_addr);
- if (r)
- goto error_free;
+ dma_addr = &bo->ttm->dma_address[mm_cur->start >> PAGE_SHIFT];
+ amdgpu_gart_map(adev, 0, num_pages, dma_addr, flags, cpu_addr);
} else {
- dma_addr_t dma_address;
+ u64 pa = mm_cur->start + adev->vm_manager.vram_base_offset;
- dma_address = (mm_node->start << PAGE_SHIFT) + offset;
- dma_address += adev->vm_manager.vram_base_offset;
-
- for (i = 0; i < num_pages; ++i) {
- r = amdgpu_gart_map(adev, i << PAGE_SHIFT, 1,
- &dma_address, flags, cpu_addr);
- if (r)
- goto error_free;
-
- dma_address += PAGE_SIZE;
- }
+ amdgpu_gart_map_vram_range(adev, pa, 0, num_pages, flags, cpu_addr);
}
- r = amdgpu_job_submit(job, &adev->mman.entity,
- AMDGPU_FENCE_OWNER_UNDEFINED, &fence);
- if (r)
- goto error_free;
-
- dma_fence_put(fence);
-
- return r;
-
-error_free:
- amdgpu_job_free(job);
- return r;
+ dma_fence_put(amdgpu_job_submit(job));
+ return 0;
}
/**
- * amdgpu_copy_ttm_mem_to_mem - Helper function for copy
+ * amdgpu_ttm_copy_mem_to_mem - Helper function for copy
* @adev: amdgpu device
* @src: buffer/address where to read from
* @dst: buffer/address where to write to
@@ -344,109 +278,87 @@ error_free:
* move and different for a BO to BO copy.
*
*/
-int amdgpu_ttm_copy_mem_to_mem(struct amdgpu_device *adev,
- const struct amdgpu_copy_mem *src,
- const struct amdgpu_copy_mem *dst,
- uint64_t size, bool tmz,
- struct dma_resv *resv,
- struct dma_fence **f)
+__attribute__((nonnull))
+static int amdgpu_ttm_copy_mem_to_mem(struct amdgpu_device *adev,
+ const struct amdgpu_copy_mem *src,
+ const struct amdgpu_copy_mem *dst,
+ uint64_t size, bool tmz,
+ struct dma_resv *resv,
+ struct dma_fence **f)
{
- const uint32_t GTT_MAX_BYTES = (AMDGPU_GTT_MAX_TRANSFER_SIZE *
- AMDGPU_GPU_PAGE_SIZE);
-
- uint64_t src_node_size, dst_node_size, src_offset, dst_offset;
struct amdgpu_ring *ring = adev->mman.buffer_funcs_ring;
- struct drm_mm_node *src_mm, *dst_mm;
+ struct amdgpu_res_cursor src_mm, dst_mm;
struct dma_fence *fence = NULL;
int r = 0;
+ uint32_t copy_flags = 0;
+ struct amdgpu_bo *abo_src, *abo_dst;
if (!adev->mman.buffer_funcs_enabled) {
- DRM_ERROR("Trying to move memory with ring turned off.\n");
+ dev_err(adev->dev,
+ "Trying to move memory with ring turned off.\n");
return -EINVAL;
}
- src_offset = src->offset;
- if (src->mem->mm_node) {
- src_mm = amdgpu_find_mm_node(src->mem, &src_offset);
- src_node_size = (src_mm->size << PAGE_SHIFT) - src_offset;
- } else {
- src_mm = NULL;
- src_node_size = ULLONG_MAX;
- }
-
- dst_offset = dst->offset;
- if (dst->mem->mm_node) {
- dst_mm = amdgpu_find_mm_node(dst->mem, &dst_offset);
- dst_node_size = (dst_mm->size << PAGE_SHIFT) - dst_offset;
- } else {
- dst_mm = NULL;
- dst_node_size = ULLONG_MAX;
- }
+ amdgpu_res_first(src->mem, src->offset, size, &src_mm);
+ amdgpu_res_first(dst->mem, dst->offset, size, &dst_mm);
mutex_lock(&adev->mman.gtt_window_lock);
-
- while (size) {
- uint32_t src_page_offset = src_offset & ~PAGE_MASK;
- uint32_t dst_page_offset = dst_offset & ~PAGE_MASK;
+ while (src_mm.remaining) {
+ uint64_t from, to, cur_size, tiling_flags;
+ uint32_t num_type, data_format, max_com, write_compress_disable;
struct dma_fence *next;
- uint32_t cur_size;
- uint64_t from, to;
- /* Copy size cannot exceed GTT_MAX_BYTES. So if src or dst
- * begins at an offset, then adjust the size accordingly
- */
- cur_size = max(src_page_offset, dst_page_offset);
- cur_size = min(min3(src_node_size, dst_node_size, size),
- (uint64_t)(GTT_MAX_BYTES - cur_size));
+ /* Never copy more than 256MiB at once to avoid a timeout */
+ cur_size = min3(src_mm.size, dst_mm.size, 256ULL << 20);
/* Map src to window 0 and dst to window 1. */
- r = amdgpu_ttm_map_buffer(src->bo, src->mem, src_mm,
- PFN_UP(cur_size + src_page_offset),
- src_offset, 0, ring, tmz, &from);
+ r = amdgpu_ttm_map_buffer(src->bo, src->mem, &src_mm,
+ 0, ring, tmz, &cur_size, &from);
if (r)
goto error;
- r = amdgpu_ttm_map_buffer(dst->bo, dst->mem, dst_mm,
- PFN_UP(cur_size + dst_page_offset),
- dst_offset, 1, ring, tmz, &to);
+ r = amdgpu_ttm_map_buffer(dst->bo, dst->mem, &dst_mm,
+ 1, ring, tmz, &cur_size, &to);
if (r)
goto error;
- r = amdgpu_copy_buffer(ring, from, to, cur_size,
- resv, &next, false, true, tmz);
+ abo_src = ttm_to_amdgpu_bo(src->bo);
+ abo_dst = ttm_to_amdgpu_bo(dst->bo);
+ if (tmz)
+ copy_flags |= AMDGPU_COPY_FLAGS_TMZ;
+ if ((abo_src->flags & AMDGPU_GEM_CREATE_GFX12_DCC) &&
+ (abo_src->tbo.resource->mem_type == TTM_PL_VRAM))
+ copy_flags |= AMDGPU_COPY_FLAGS_READ_DECOMPRESSED;
+ if ((abo_dst->flags & AMDGPU_GEM_CREATE_GFX12_DCC) &&
+ (dst->mem->mem_type == TTM_PL_VRAM)) {
+ copy_flags |= AMDGPU_COPY_FLAGS_WRITE_COMPRESSED;
+ amdgpu_bo_get_tiling_flags(abo_dst, &tiling_flags);
+ max_com = AMDGPU_TILING_GET(tiling_flags, GFX12_DCC_MAX_COMPRESSED_BLOCK);
+ num_type = AMDGPU_TILING_GET(tiling_flags, GFX12_DCC_NUMBER_TYPE);
+ data_format = AMDGPU_TILING_GET(tiling_flags, GFX12_DCC_DATA_FORMAT);
+ write_compress_disable =
+ AMDGPU_TILING_GET(tiling_flags, GFX12_DCC_WRITE_COMPRESS_DISABLE);
+ copy_flags |= (AMDGPU_COPY_FLAGS_SET(MAX_COMPRESSED, max_com) |
+ AMDGPU_COPY_FLAGS_SET(NUMBER_TYPE, num_type) |
+ AMDGPU_COPY_FLAGS_SET(DATA_FORMAT, data_format) |
+ AMDGPU_COPY_FLAGS_SET(WRITE_COMPRESS_DISABLE,
+ write_compress_disable));
+ }
+
+ r = amdgpu_copy_buffer(ring, from, to, cur_size, resv,
+ &next, false, true, copy_flags);
if (r)
goto error;
dma_fence_put(fence);
fence = next;
- size -= cur_size;
- if (!size)
- break;
-
- src_node_size -= cur_size;
- if (!src_node_size) {
- ++src_mm;
- src_node_size = src_mm->size << PAGE_SHIFT;
- src_offset = 0;
- } else {
- src_offset += cur_size;
- }
-
- dst_node_size -= cur_size;
- if (!dst_node_size) {
- ++dst_mm;
- dst_node_size = dst_mm->size << PAGE_SHIFT;
- dst_offset = 0;
- } else {
- dst_offset += cur_size;
- }
+ amdgpu_res_next(&src_mm, cur_size);
+ amdgpu_res_next(&dst_mm, cur_size);
}
error:
mutex_unlock(&adev->mman.gtt_window_lock);
- if (f)
- *f = dma_fence_get(fence);
- dma_fence_put(fence);
+ *f = fence;
return r;
}
@@ -475,7 +387,7 @@ static int amdgpu_move_blit(struct ttm_buffer_object *bo,
dst.offset = 0;
r = amdgpu_ttm_copy_mem_to_mem(adev, &src, &dst,
- new_mem->num_pages << PAGE_SHIFT,
+ new_mem->size,
amdgpu_bo_encrypted(abo),
bo->base.resv, &fence);
if (r)
@@ -486,11 +398,12 @@ static int amdgpu_move_blit(struct ttm_buffer_object *bo,
(abo->flags & AMDGPU_GEM_CREATE_VRAM_WIPE_ON_RELEASE)) {
struct dma_fence *wipe_fence = NULL;
- r = amdgpu_fill_buffer(ttm_to_amdgpu_bo(bo), AMDGPU_POISON,
- NULL, &wipe_fence);
+ r = amdgpu_fill_buffer(abo, 0, NULL, &wipe_fence,
+ false, AMDGPU_KERNEL_JOB_ID_MOVE_BLIT);
if (r) {
goto error;
} else if (wipe_fence) {
+ amdgpu_vram_mgr_set_cleared(bo->resource);
dma_fence_put(fence);
fence = wipe_fence;
}
@@ -511,28 +424,56 @@ error:
return r;
}
-/*
- * amdgpu_mem_visible - Check that memory can be accessed by ttm_bo_move_memcpy
+/**
+ * amdgpu_res_cpu_visible - Check that resource can be accessed by CPU
+ * @adev: amdgpu device
+ * @res: the resource to check
*
- * Called by amdgpu_bo_move()
+ * Returns: true if the full resource is CPU visible, false otherwise.
*/
-static bool amdgpu_mem_visible(struct amdgpu_device *adev,
- struct ttm_resource *mem)
+bool amdgpu_res_cpu_visible(struct amdgpu_device *adev,
+ struct ttm_resource *res)
{
- struct drm_mm_node *nodes = mem->mm_node;
+ struct amdgpu_res_cursor cursor;
+
+ if (!res)
+ return false;
- if (mem->mem_type == TTM_PL_SYSTEM ||
- mem->mem_type == TTM_PL_TT)
+ if (res->mem_type == TTM_PL_SYSTEM || res->mem_type == TTM_PL_TT ||
+ res->mem_type == AMDGPU_PL_PREEMPT || res->mem_type == AMDGPU_PL_DOORBELL ||
+ res->mem_type == AMDGPU_PL_MMIO_REMAP)
return true;
- if (mem->mem_type != TTM_PL_VRAM)
+
+ if (res->mem_type != TTM_PL_VRAM)
+ return false;
+
+ amdgpu_res_first(res, 0, res->size, &cursor);
+ while (cursor.remaining) {
+ if ((cursor.start + cursor.size) > adev->gmc.visible_vram_size)
+ return false;
+ amdgpu_res_next(&cursor, cursor.size);
+ }
+
+ return true;
+}
+
+/*
+ * amdgpu_res_copyable - Check that memory can be accessed by ttm_bo_move_memcpy
+ *
+ * Called by amdgpu_bo_move()
+ */
+static bool amdgpu_res_copyable(struct amdgpu_device *adev,
+ struct ttm_resource *mem)
+{
+ if (!amdgpu_res_cpu_visible(adev, mem))
return false;
/* ttm_resource_ioremap only supports contiguous memory */
- if (nodes->size != mem->num_pages)
+ if (mem->mem_type == TTM_PL_VRAM &&
+ !(mem->placement & TTM_PL_FLAG_CONTIGUOUS))
return false;
- return ((nodes->start + nodes->size) << PAGE_SHIFT)
- <= adev->gmc.visible_vram_size;
+ return true;
}
/*
@@ -547,75 +488,93 @@ static int amdgpu_bo_move(struct ttm_buffer_object *bo, bool evict,
{
struct amdgpu_device *adev;
struct amdgpu_bo *abo;
- struct ttm_resource *old_mem = &bo->mem;
+ struct ttm_resource *old_mem = bo->resource;
int r;
- if (new_mem->mem_type == TTM_PL_TT) {
+ if (new_mem->mem_type == TTM_PL_TT ||
+ new_mem->mem_type == AMDGPU_PL_PREEMPT) {
r = amdgpu_ttm_backend_bind(bo->bdev, bo->ttm, new_mem);
if (r)
return r;
}
- /* Can't move a pinned BO */
abo = ttm_to_amdgpu_bo(bo);
- if (WARN_ON_ONCE(abo->tbo.pin_count > 0))
- return -EINVAL;
-
adev = amdgpu_ttm_adev(bo->bdev);
- if (old_mem->mem_type == TTM_PL_SYSTEM && bo->ttm == NULL) {
+ if (!old_mem || (old_mem->mem_type == TTM_PL_SYSTEM &&
+ bo->ttm == NULL)) {
+ amdgpu_bo_move_notify(bo, evict, new_mem);
ttm_bo_move_null(bo, new_mem);
- goto out;
+ return 0;
}
if (old_mem->mem_type == TTM_PL_SYSTEM &&
- new_mem->mem_type == TTM_PL_TT) {
+ (new_mem->mem_type == TTM_PL_TT ||
+ new_mem->mem_type == AMDGPU_PL_PREEMPT)) {
+ amdgpu_bo_move_notify(bo, evict, new_mem);
ttm_bo_move_null(bo, new_mem);
- goto out;
+ return 0;
}
- if (old_mem->mem_type == TTM_PL_TT &&
+ if ((old_mem->mem_type == TTM_PL_TT ||
+ old_mem->mem_type == AMDGPU_PL_PREEMPT) &&
new_mem->mem_type == TTM_PL_SYSTEM) {
r = ttm_bo_wait_ctx(bo, ctx);
if (r)
return r;
amdgpu_ttm_backend_unbind(bo->bdev, bo->ttm);
- ttm_resource_free(bo, &bo->mem);
+ amdgpu_bo_move_notify(bo, evict, new_mem);
+ ttm_resource_free(bo, &bo->resource);
ttm_bo_assign_mem(bo, new_mem);
- goto out;
+ return 0;
}
if (old_mem->mem_type == AMDGPU_PL_GDS ||
old_mem->mem_type == AMDGPU_PL_GWS ||
old_mem->mem_type == AMDGPU_PL_OA ||
+ old_mem->mem_type == AMDGPU_PL_DOORBELL ||
+ old_mem->mem_type == AMDGPU_PL_MMIO_REMAP ||
new_mem->mem_type == AMDGPU_PL_GDS ||
new_mem->mem_type == AMDGPU_PL_GWS ||
- new_mem->mem_type == AMDGPU_PL_OA) {
+ new_mem->mem_type == AMDGPU_PL_OA ||
+ new_mem->mem_type == AMDGPU_PL_DOORBELL ||
+ new_mem->mem_type == AMDGPU_PL_MMIO_REMAP) {
/* Nothing to save here */
+ amdgpu_bo_move_notify(bo, evict, new_mem);
ttm_bo_move_null(bo, new_mem);
- goto out;
+ return 0;
}
- if (adev->mman.buffer_funcs_enabled) {
- if (((old_mem->mem_type == TTM_PL_SYSTEM &&
- new_mem->mem_type == TTM_PL_VRAM) ||
- (old_mem->mem_type == TTM_PL_VRAM &&
- new_mem->mem_type == TTM_PL_SYSTEM))) {
- hop->fpfn = 0;
- hop->lpfn = 0;
- hop->mem_type = TTM_PL_TT;
- hop->flags = 0;
- return -EMULTIHOP;
- }
+ if (bo->type == ttm_bo_type_device &&
+ new_mem->mem_type == TTM_PL_VRAM &&
+ old_mem->mem_type != TTM_PL_VRAM) {
+ /* amdgpu_bo_fault_reserve_notify will re-set this if the CPU
+ * accesses the BO after it's moved.
+ */
+ abo->flags &= ~AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED;
+ }
+ if (adev->mman.buffer_funcs_enabled &&
+ ((old_mem->mem_type == TTM_PL_SYSTEM &&
+ new_mem->mem_type == TTM_PL_VRAM) ||
+ (old_mem->mem_type == TTM_PL_VRAM &&
+ new_mem->mem_type == TTM_PL_SYSTEM))) {
+ hop->fpfn = 0;
+ hop->lpfn = 0;
+ hop->mem_type = TTM_PL_TT;
+ hop->flags = TTM_PL_FLAG_TEMPORARY;
+ return -EMULTIHOP;
+ }
+
+ amdgpu_bo_move_notify(bo, evict, new_mem);
+ if (adev->mman.buffer_funcs_enabled)
r = amdgpu_move_blit(bo, evict, new_mem, old_mem);
- } else {
+ else
r = -ENODEV;
- }
if (r) {
/* Check that all memory is CPU accessible */
- if (!amdgpu_mem_visible(adev, old_mem) ||
- !amdgpu_mem_visible(adev, new_mem)) {
+ if (!amdgpu_res_copyable(adev, old_mem) ||
+ !amdgpu_res_copyable(adev, new_mem)) {
pr_err("Move buffer fallback to memcpy unavailable\n");
return r;
}
@@ -625,19 +584,10 @@ static int amdgpu_bo_move(struct ttm_buffer_object *bo, bool evict,
return r;
}
- if (bo->type == ttm_bo_type_device &&
- new_mem->mem_type == TTM_PL_VRAM &&
- old_mem->mem_type != TTM_PL_VRAM) {
- /* amdgpu_bo_fault_reserve_notify will re-set this if the CPU
- * accesses the BO after it's moved.
- */
- abo->flags &= ~AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED;
- }
-
-out:
- /* update statistics */
+ /* update statistics after the move */
+ if (evict)
+ atomic64_inc(&adev->num_evictions);
atomic64_add(bo->base.size, &adev->num_bytes_moved);
- amdgpu_bo_move_notify(bo, evict, new_mem);
return 0;
}
@@ -646,35 +596,40 @@ out:
*
* Called by ttm_mem_io_reserve() ultimately via ttm_bo_vm_fault()
*/
-static int amdgpu_ttm_io_mem_reserve(struct ttm_bo_device *bdev, struct ttm_resource *mem)
+static int amdgpu_ttm_io_mem_reserve(struct ttm_device *bdev,
+ struct ttm_resource *mem)
{
struct amdgpu_device *adev = amdgpu_ttm_adev(bdev);
- struct drm_mm_node *mm_node = mem->mm_node;
- size_t bus_size = (size_t)mem->num_pages << PAGE_SHIFT;
switch (mem->mem_type) {
case TTM_PL_SYSTEM:
/* system memory */
return 0;
case TTM_PL_TT:
+ case AMDGPU_PL_PREEMPT:
break;
case TTM_PL_VRAM:
mem->bus.offset = mem->start << PAGE_SHIFT;
- /* check if it's visible */
- if ((mem->bus.offset + bus_size) > adev->gmc.visible_vram_size)
- return -EINVAL;
- /* Only physically contiguous buffers apply. In a contiguous
- * buffer, size of the first mm_node would match the number of
- * pages in ttm_resource.
- */
+
if (adev->mman.aper_base_kaddr &&
- (mm_node->size == mem->num_pages))
+ mem->placement & TTM_PL_FLAG_CONTIGUOUS)
mem->bus.addr = (u8 *)adev->mman.aper_base_kaddr +
mem->bus.offset;
mem->bus.offset += adev->gmc.aper_base;
mem->bus.is_iomem = true;
- mem->bus.caching = ttm_write_combined;
+ break;
+ case AMDGPU_PL_DOORBELL:
+ mem->bus.offset = mem->start << PAGE_SHIFT;
+ mem->bus.offset += adev->doorbell.base;
+ mem->bus.is_iomem = true;
+ mem->bus.caching = ttm_uncached;
+ break;
+ case AMDGPU_PL_MMIO_REMAP:
+ mem->bus.offset = mem->start << PAGE_SHIFT;
+ mem->bus.offset += adev->rmmio_remap.bus_addr;
+ mem->bus.is_iomem = true;
+ mem->bus.caching = ttm_uncached;
break;
default:
return -EINVAL;
@@ -686,12 +641,17 @@ static unsigned long amdgpu_ttm_io_mem_pfn(struct ttm_buffer_object *bo,
unsigned long page_offset)
{
struct amdgpu_device *adev = amdgpu_ttm_adev(bo->bdev);
- uint64_t offset = (page_offset << PAGE_SHIFT);
- struct drm_mm_node *mm;
+ struct amdgpu_res_cursor cursor;
+
+ amdgpu_res_first(bo->resource, (u64)page_offset << PAGE_SHIFT, 0,
+ &cursor);
- mm = amdgpu_find_mm_node(&bo->mem, &offset);
- offset += adev->gmc.aper_base;
- return mm->start + (offset >> PAGE_SHIFT);
+ if (bo->resource->mem_type == AMDGPU_PL_DOORBELL)
+ return ((uint64_t)(adev->doorbell.base + cursor.start)) >> PAGE_SHIFT;
+ else if (bo->resource->mem_type == AMDGPU_PL_MMIO_REMAP)
+ return ((uint64_t)(adev->rmmio_remap.bus_addr + cursor.start)) >> PAGE_SHIFT;
+
+ return (adev->gmc.aper_base + cursor.start) >> PAGE_SHIFT;
}
/**
@@ -726,29 +686,29 @@ struct amdgpu_ttm_tt {
struct task_struct *usertask;
uint32_t userflags;
bool bound;
-#if IS_ENABLED(CONFIG_DRM_AMDGPU_USERPTR)
- struct hmm_range *range;
-#endif
+ int32_t pool_id;
};
+#define ttm_to_amdgpu_ttm_tt(ptr) container_of(ptr, struct amdgpu_ttm_tt, ttm)
+
#ifdef CONFIG_DRM_AMDGPU_USERPTR
/*
* amdgpu_ttm_tt_get_user_pages - get device accessible pages that back user
* memory and start HMM tracking CPU page table update
*
* Calling function must call amdgpu_ttm_tt_userptr_range_done() once and only
- * once afterwards to stop HMM tracking
+ * once afterwards to stop HMM tracking. Its the caller responsibility to ensure
+ * that range is a valid memory and it is freed too.
*/
-int amdgpu_ttm_tt_get_user_pages(struct amdgpu_bo *bo, struct page **pages)
+int amdgpu_ttm_tt_get_user_pages(struct amdgpu_bo *bo,
+ struct amdgpu_hmm_range *range)
{
struct ttm_tt *ttm = bo->tbo.ttm;
- struct amdgpu_ttm_tt *gtt = (void *)ttm;
+ struct amdgpu_ttm_tt *gtt = ttm_to_amdgpu_ttm_tt(ttm);
unsigned long start = gtt->userptr;
struct vm_area_struct *vma;
- struct hmm_range *range;
- unsigned long timeout;
struct mm_struct *mm;
- unsigned long i;
+ bool readonly;
int r = 0;
mm = bo->notifier.mm;
@@ -757,35 +717,12 @@ int amdgpu_ttm_tt_get_user_pages(struct amdgpu_bo *bo, struct page **pages)
return -EFAULT;
}
- /* Another get_user_pages is running at the same time?? */
- if (WARN_ON(gtt->range))
- return -EFAULT;
-
if (!mmget_not_zero(mm)) /* Happens during process shutdown */
return -ESRCH;
- range = kzalloc(sizeof(*range), GFP_KERNEL);
- if (unlikely(!range)) {
- r = -ENOMEM;
- goto out;
- }
- range->notifier = &bo->notifier;
- range->start = bo->notifier.interval_tree.start;
- range->end = bo->notifier.interval_tree.last + 1;
- range->default_flags = HMM_PFN_REQ_FAULT;
- if (!amdgpu_ttm_tt_is_readonly(ttm))
- range->default_flags |= HMM_PFN_REQ_WRITE;
-
- range->hmm_pfns = kvmalloc_array(ttm->num_pages,
- sizeof(*range->hmm_pfns), GFP_KERNEL);
- if (unlikely(!range->hmm_pfns)) {
- r = -ENOMEM;
- goto out_free_ranges;
- }
-
mmap_read_lock(mm);
- vma = find_vma(mm, start);
- if (unlikely(!vma || start < vma->vm_start)) {
+ vma = vma_lookup(mm, start);
+ if (unlikely(!vma)) {
r = -EFAULT;
goto out_unlock;
}
@@ -794,83 +731,20 @@ int amdgpu_ttm_tt_get_user_pages(struct amdgpu_bo *bo, struct page **pages)
r = -EPERM;
goto out_unlock;
}
- mmap_read_unlock(mm);
- timeout = jiffies + msecs_to_jiffies(HMM_RANGE_DEFAULT_TIMEOUT);
-retry:
- range->notifier_seq = mmu_interval_read_begin(&bo->notifier);
-
- mmap_read_lock(mm);
- r = hmm_range_fault(range);
+ readonly = amdgpu_ttm_tt_is_readonly(ttm);
+ r = amdgpu_hmm_range_get_pages(&bo->notifier, start, ttm->num_pages,
+ readonly, NULL, range);
+out_unlock:
mmap_read_unlock(mm);
- if (unlikely(r)) {
- /*
- * FIXME: This timeout should encompass the retry from
- * mmu_interval_read_retry() as well.
- */
- if (r == -EBUSY && !time_after(jiffies, timeout))
- goto retry;
- goto out_free_pfns;
- }
-
- /*
- * Due to default_flags, all pages are HMM_PFN_VALID or
- * hmm_range_fault() fails. FIXME: The pages cannot be touched outside
- * the notifier_lock, and mmu_interval_read_retry() must be done first.
- */
- for (i = 0; i < ttm->num_pages; i++)
- pages[i] = hmm_pfn_to_page(range->hmm_pfns[i]);
+ if (r)
+ pr_debug("failed %d to get user pages 0x%lx\n", r, start);
- gtt->range = range;
mmput(mm);
- return 0;
-
-out_unlock:
- mmap_read_unlock(mm);
-out_free_pfns:
- kvfree(range->hmm_pfns);
-out_free_ranges:
- kfree(range);
-out:
- mmput(mm);
return r;
}
-/*
- * amdgpu_ttm_tt_userptr_range_done - stop HMM track the CPU page table change
- * Check if the pages backing this ttm range have been invalidated
- *
- * Returns: true if pages are still valid
- */
-bool amdgpu_ttm_tt_get_user_pages_done(struct ttm_tt *ttm)
-{
- struct amdgpu_ttm_tt *gtt = (void *)ttm;
- bool r = false;
-
- if (!gtt || !gtt->userptr)
- return false;
-
- DRM_DEBUG_DRIVER("user_pages_done 0x%llx pages 0x%x\n",
- gtt->userptr, ttm->num_pages);
-
- WARN_ONCE(!gtt->range || !gtt->range->hmm_pfns,
- "No user pages to check\n");
-
- if (gtt->range) {
- /*
- * FIXME: Must always hold notifier_lock for this, and must
- * not ignore the return code.
- */
- r = mmu_interval_read_retry(gtt->range->notifier,
- gtt->range->notifier_seq);
- kvfree(gtt->range->hmm_pfns);
- kfree(gtt->range);
- gtt->range = NULL;
- }
-
- return !r;
-}
#endif
/*
@@ -880,12 +754,12 @@ bool amdgpu_ttm_tt_get_user_pages_done(struct ttm_tt *ttm)
* that backs user memory and will ultimately be mapped into the device
* address space.
*/
-void amdgpu_ttm_tt_set_user_pages(struct ttm_tt *ttm, struct page **pages)
+void amdgpu_ttm_tt_set_user_pages(struct ttm_tt *ttm, struct amdgpu_hmm_range *range)
{
unsigned long i;
for (i = 0; i < ttm->num_pages; ++i)
- ttm->pages[i] = pages ? pages[i] : NULL;
+ ttm->pages[i] = range ? hmm_pfn_to_page(range->hmm_range.hmm_pfns[i]) : NULL;
}
/*
@@ -893,20 +767,19 @@ void amdgpu_ttm_tt_set_user_pages(struct ttm_tt *ttm, struct page **pages)
*
* Called by amdgpu_ttm_backend_bind()
**/
-static int amdgpu_ttm_tt_pin_userptr(struct ttm_bo_device *bdev,
+static int amdgpu_ttm_tt_pin_userptr(struct ttm_device *bdev,
struct ttm_tt *ttm)
{
struct amdgpu_device *adev = amdgpu_ttm_adev(bdev);
- struct amdgpu_ttm_tt *gtt = (void *)ttm;
- int r;
-
+ struct amdgpu_ttm_tt *gtt = ttm_to_amdgpu_ttm_tt(ttm);
int write = !(gtt->userflags & AMDGPU_GEM_USERPTR_READONLY);
enum dma_data_direction direction = write ?
DMA_BIDIRECTIONAL : DMA_TO_DEVICE;
+ int r;
/* Allocate an SG array and squash pages into it */
r = sg_alloc_table_from_pages(ttm->sg, ttm->pages, ttm->num_pages, 0,
- ttm->num_pages << PAGE_SHIFT,
+ (u64)ttm->num_pages << PAGE_SHIFT,
GFP_KERNEL);
if (r)
goto release_sg;
@@ -914,7 +787,7 @@ static int amdgpu_ttm_tt_pin_userptr(struct ttm_bo_device *bdev,
/* Map SG to device */
r = dma_map_sgtable(adev->dev, ttm->sg, direction, 0);
if (r)
- goto release_sg;
+ goto release_sg_table;
/* convert SG to linear array of pages and dma addresses */
drm_prime_sg_to_dma_addr_array(ttm->sg, gtt->ttm.dma_address,
@@ -922,6 +795,8 @@ static int amdgpu_ttm_tt_pin_userptr(struct ttm_bo_device *bdev,
return 0;
+release_sg_table:
+ sg_free_table(ttm->sg);
release_sg:
kfree(ttm->sg);
ttm->sg = NULL;
@@ -931,82 +806,79 @@ release_sg:
/*
* amdgpu_ttm_tt_unpin_userptr - Unpin and unmap userptr pages
*/
-static void amdgpu_ttm_tt_unpin_userptr(struct ttm_bo_device *bdev,
+static void amdgpu_ttm_tt_unpin_userptr(struct ttm_device *bdev,
struct ttm_tt *ttm)
{
struct amdgpu_device *adev = amdgpu_ttm_adev(bdev);
- struct amdgpu_ttm_tt *gtt = (void *)ttm;
-
+ struct amdgpu_ttm_tt *gtt = ttm_to_amdgpu_ttm_tt(ttm);
int write = !(gtt->userflags & AMDGPU_GEM_USERPTR_READONLY);
enum dma_data_direction direction = write ?
DMA_BIDIRECTIONAL : DMA_TO_DEVICE;
/* double check that we don't free the table twice */
- if (!ttm->sg->sgl)
+ if (!ttm->sg || !ttm->sg->sgl)
return;
/* unmap the pages mapped to the device */
dma_unmap_sgtable(adev->dev, ttm->sg, direction, 0);
sg_free_table(ttm->sg);
+}
-#if IS_ENABLED(CONFIG_DRM_AMDGPU_USERPTR)
- if (gtt->range) {
- unsigned long i;
+/*
+ * total_pages is constructed as MQD0+CtrlStack0 + MQD1+CtrlStack1 + ...
+ * MQDn+CtrlStackn where n is the number of XCCs per partition.
+ * pages_per_xcc is the size of one MQD+CtrlStack. The first page is MQD
+ * and uses memory type default, UC. The rest of pages_per_xcc are
+ * Ctrl stack and modify their memory type to NC.
+ */
+static void amdgpu_ttm_gart_bind_gfx9_mqd(struct amdgpu_device *adev,
+ struct ttm_tt *ttm, uint64_t flags)
+{
+ struct amdgpu_ttm_tt *gtt = (void *)ttm;
+ uint64_t total_pages = ttm->num_pages;
+ int num_xcc = max(1U, adev->gfx.num_xcc_per_xcp);
+ uint64_t page_idx, pages_per_xcc;
+ int i;
+ uint64_t ctrl_flags = AMDGPU_PTE_MTYPE_VG10(flags, AMDGPU_MTYPE_NC);
- for (i = 0; i < ttm->num_pages; i++) {
- if (ttm->pages[i] !=
- hmm_pfn_to_page(gtt->range->hmm_pfns[i]))
- break;
- }
+ pages_per_xcc = total_pages;
+ do_div(pages_per_xcc, num_xcc);
- WARN((i == ttm->num_pages), "Missing get_user_page_done\n");
+ for (i = 0, page_idx = 0; i < num_xcc; i++, page_idx += pages_per_xcc) {
+ /* MQD page: use default flags */
+ amdgpu_gart_bind(adev,
+ gtt->offset + (page_idx << PAGE_SHIFT),
+ 1, &gtt->ttm.dma_address[page_idx], flags);
+ /*
+ * Ctrl pages - modify the memory type to NC (ctrl_flags) from
+ * the second page of the BO onward.
+ */
+ amdgpu_gart_bind(adev,
+ gtt->offset + ((page_idx + 1) << PAGE_SHIFT),
+ pages_per_xcc - 1,
+ &gtt->ttm.dma_address[page_idx + 1],
+ ctrl_flags);
}
-#endif
}
-static int amdgpu_ttm_gart_bind(struct amdgpu_device *adev,
- struct ttm_buffer_object *tbo,
- uint64_t flags)
+static void amdgpu_ttm_gart_bind(struct amdgpu_device *adev,
+ struct ttm_buffer_object *tbo,
+ uint64_t flags)
{
struct amdgpu_bo *abo = ttm_to_amdgpu_bo(tbo);
struct ttm_tt *ttm = tbo->ttm;
- struct amdgpu_ttm_tt *gtt = (void *)ttm;
- int r;
+ struct amdgpu_ttm_tt *gtt = ttm_to_amdgpu_ttm_tt(ttm);
if (amdgpu_bo_encrypted(abo))
flags |= AMDGPU_PTE_TMZ;
if (abo->flags & AMDGPU_GEM_CREATE_CP_MQD_GFX9) {
- uint64_t page_idx = 1;
-
- r = amdgpu_gart_bind(adev, gtt->offset, page_idx,
- ttm->pages, gtt->ttm.dma_address, flags);
- if (r)
- goto gart_bind_fail;
-
- /* The memory type of the first page defaults to UC. Now
- * modify the memory type to NC from the second page of
- * the BO onward.
- */
- flags &= ~AMDGPU_PTE_MTYPE_VG10_MASK;
- flags |= AMDGPU_PTE_MTYPE_VG10(AMDGPU_MTYPE_NC);
-
- r = amdgpu_gart_bind(adev,
- gtt->offset + (page_idx << PAGE_SHIFT),
- ttm->num_pages - page_idx,
- &ttm->pages[page_idx],
- &(gtt->ttm.dma_address[page_idx]), flags);
+ amdgpu_ttm_gart_bind_gfx9_mqd(adev, ttm, flags);
} else {
- r = amdgpu_gart_bind(adev, gtt->offset, ttm->num_pages,
- ttm->pages, gtt->ttm.dma_address, flags);
+ amdgpu_gart_bind(adev, gtt->offset, ttm->num_pages,
+ gtt->ttm.dma_address, flags);
}
-
-gart_bind_fail:
- if (r)
- DRM_ERROR("failed to bind %u pages at 0x%08llX\n",
- ttm->num_pages, gtt->offset);
-
- return r;
+ gtt->bound = true;
}
/*
@@ -1015,14 +887,14 @@ gart_bind_fail:
* Called by ttm_tt_bind() on behalf of ttm_bo_handle_move_mem().
* This handles binding GTT memory to the device address space.
*/
-static int amdgpu_ttm_backend_bind(struct ttm_bo_device *bdev,
+static int amdgpu_ttm_backend_bind(struct ttm_device *bdev,
struct ttm_tt *ttm,
struct ttm_resource *bo_mem)
{
struct amdgpu_device *adev = amdgpu_ttm_adev(bdev);
- struct amdgpu_ttm_tt *gtt = (void*)ttm;
+ struct amdgpu_ttm_tt *gtt = ttm_to_amdgpu_ttm_tt(ttm);
uint64_t flags;
- int r = 0;
+ int r;
if (!bo_mem)
return -EINVAL;
@@ -1033,21 +905,33 @@ static int amdgpu_ttm_backend_bind(struct ttm_bo_device *bdev,
if (gtt->userptr) {
r = amdgpu_ttm_tt_pin_userptr(bdev, ttm);
if (r) {
- DRM_ERROR("failed to pin userptr\n");
+ dev_err(adev->dev, "failed to pin userptr\n");
return r;
}
+ } else if (ttm->page_flags & TTM_TT_FLAG_EXTERNAL) {
+ if (!ttm->sg) {
+ struct dma_buf_attachment *attach;
+ struct sg_table *sgt;
+
+ attach = gtt->gobj->import_attach;
+ sgt = dma_buf_map_attachment(attach, DMA_BIDIRECTIONAL);
+ if (IS_ERR(sgt))
+ return PTR_ERR(sgt);
+
+ ttm->sg = sgt;
+ }
+
+ drm_prime_sg_to_dma_addr_array(ttm->sg, gtt->ttm.dma_address,
+ ttm->num_pages);
}
+
if (!ttm->num_pages) {
WARN(1, "nothing to bind %u pages for mreg %p back %p!\n",
ttm->num_pages, bo_mem, ttm);
}
- if (bo_mem->mem_type == AMDGPU_PL_GDS ||
- bo_mem->mem_type == AMDGPU_PL_GWS ||
- bo_mem->mem_type == AMDGPU_PL_OA)
- return -EINVAL;
-
- if (!amdgpu_gtt_mgr_has_gart_addr(bo_mem)) {
+ if (bo_mem->mem_type != TTM_PL_TT ||
+ !amdgpu_gtt_mgr_has_gart_addr(bo_mem)) {
gtt->offset = AMDGPU_BO_INVALID_OFFSET;
return 0;
}
@@ -1057,14 +941,10 @@ static int amdgpu_ttm_backend_bind(struct ttm_bo_device *bdev,
/* bind pages into GART page tables */
gtt->offset = (u64)bo_mem->start << PAGE_SHIFT;
- r = amdgpu_gart_bind(adev, gtt->offset, ttm->num_pages,
- ttm->pages, gtt->ttm.dma_address, flags);
-
- if (r)
- DRM_ERROR("failed to bind %u pages at 0x%08llX\n",
- ttm->num_pages, gtt->offset);
+ amdgpu_gart_bind(adev, gtt->offset, ttm->num_pages,
+ gtt->ttm.dma_address, flags);
gtt->bound = true;
- return r;
+ return 0;
}
/*
@@ -1079,51 +959,41 @@ int amdgpu_ttm_alloc_gart(struct ttm_buffer_object *bo)
{
struct amdgpu_device *adev = amdgpu_ttm_adev(bo->bdev);
struct ttm_operation_ctx ctx = { false, false };
- struct amdgpu_ttm_tt *gtt = (void *)bo->ttm;
- struct ttm_resource tmp;
+ struct amdgpu_ttm_tt *gtt = ttm_to_amdgpu_ttm_tt(bo->ttm);
struct ttm_placement placement;
struct ttm_place placements;
+ struct ttm_resource *tmp;
uint64_t addr, flags;
int r;
- if (bo->mem.start != AMDGPU_BO_INVALID_OFFSET)
+ if (bo->resource->start != AMDGPU_BO_INVALID_OFFSET)
return 0;
addr = amdgpu_gmc_agp_addr(bo);
- if (addr != AMDGPU_BO_INVALID_OFFSET) {
- bo->mem.start = addr >> PAGE_SHIFT;
- } else {
+ if (addr != AMDGPU_BO_INVALID_OFFSET)
+ return 0;
- /* allocate GART space */
- tmp = bo->mem;
- tmp.mm_node = NULL;
- placement.num_placement = 1;
- placement.placement = &placements;
- placement.num_busy_placement = 1;
- placement.busy_placement = &placements;
- placements.fpfn = 0;
- placements.lpfn = adev->gmc.gart_size >> PAGE_SHIFT;
- placements.mem_type = TTM_PL_TT;
- placements.flags = bo->mem.placement;
-
- r = ttm_bo_mem_space(bo, &placement, &tmp, &ctx);
- if (unlikely(r))
- return r;
+ /* allocate GART space */
+ placement.num_placement = 1;
+ placement.placement = &placements;
+ placements.fpfn = 0;
+ placements.lpfn = adev->gmc.gart_size >> PAGE_SHIFT;
+ placements.mem_type = TTM_PL_TT;
+ placements.flags = bo->resource->placement;
- /* compute PTE flags for this buffer object */
- flags = amdgpu_ttm_tt_pte_flags(adev, bo->ttm, &tmp);
+ r = ttm_bo_mem_space(bo, &placement, &tmp, &ctx);
+ if (unlikely(r))
+ return r;
- /* Bind pages */
- gtt->offset = (u64)tmp.start << PAGE_SHIFT;
- r = amdgpu_ttm_gart_bind(adev, bo, flags);
- if (unlikely(r)) {
- ttm_resource_free(bo, &tmp);
- return r;
- }
+ /* compute PTE flags for this buffer object */
+ flags = amdgpu_ttm_tt_pte_flags(adev, bo->ttm, tmp);
- ttm_resource_free(bo, &bo->mem);
- bo->mem = tmp;
- }
+ /* Bind pages */
+ gtt->offset = (u64)tmp->start << PAGE_SHIFT;
+ amdgpu_ttm_gart_bind(adev, bo, flags);
+ amdgpu_gart_invalidate_tlb(adev);
+ ttm_resource_free(bo, &bo->resource);
+ ttm_bo_assign_mem(bo, tmp);
return 0;
}
@@ -1134,19 +1004,16 @@ int amdgpu_ttm_alloc_gart(struct ttm_buffer_object *bo)
* Called by amdgpu_gtt_mgr_recover() from amdgpu_device_reset() to
* rebind GTT pages during a GPU reset.
*/
-int amdgpu_ttm_recover_gart(struct ttm_buffer_object *tbo)
+void amdgpu_ttm_recover_gart(struct ttm_buffer_object *tbo)
{
struct amdgpu_device *adev = amdgpu_ttm_adev(tbo->bdev);
uint64_t flags;
- int r;
if (!tbo->ttm)
- return 0;
-
- flags = amdgpu_ttm_tt_pte_flags(adev, tbo->ttm, &tbo->mem);
- r = amdgpu_ttm_gart_bind(adev, tbo, flags);
+ return;
- return r;
+ flags = amdgpu_ttm_tt_pte_flags(adev, tbo->ttm, tbo->resource);
+ amdgpu_ttm_gart_bind(adev, tbo, flags);
}
/*
@@ -1155,38 +1022,39 @@ int amdgpu_ttm_recover_gart(struct ttm_buffer_object *tbo)
* Called by ttm_tt_unbind() on behalf of ttm_bo_move_ttm() and
* ttm_tt_destroy().
*/
-static void amdgpu_ttm_backend_unbind(struct ttm_bo_device *bdev,
+static void amdgpu_ttm_backend_unbind(struct ttm_device *bdev,
struct ttm_tt *ttm)
{
struct amdgpu_device *adev = amdgpu_ttm_adev(bdev);
- struct amdgpu_ttm_tt *gtt = (void *)ttm;
- int r;
-
- if (!gtt->bound)
- return;
+ struct amdgpu_ttm_tt *gtt = ttm_to_amdgpu_ttm_tt(ttm);
/* if the pages have userptr pinning then clear that first */
- if (gtt->userptr)
+ if (gtt->userptr) {
amdgpu_ttm_tt_unpin_userptr(bdev, ttm);
+ } else if (ttm->sg && drm_gem_is_imported(gtt->gobj)) {
+ struct dma_buf_attachment *attach;
+
+ attach = gtt->gobj->import_attach;
+ dma_buf_unmap_attachment(attach, ttm->sg, DMA_BIDIRECTIONAL);
+ ttm->sg = NULL;
+ }
+
+ if (!gtt->bound)
+ return;
if (gtt->offset == AMDGPU_BO_INVALID_OFFSET)
return;
/* unbind shouldn't be done for GDS/GWS/OA in ttm_bo_clean_mm */
- r = amdgpu_gart_unbind(adev, gtt->offset, ttm->num_pages);
- if (r)
- DRM_ERROR("failed to unbind %u pages at 0x%08llX\n",
- gtt->ttm.num_pages, gtt->offset);
+ amdgpu_gart_unbind(adev, gtt->offset, ttm->num_pages);
gtt->bound = false;
}
-static void amdgpu_ttm_backend_destroy(struct ttm_bo_device *bdev,
+static void amdgpu_ttm_backend_destroy(struct ttm_device *bdev,
struct ttm_tt *ttm)
{
- struct amdgpu_ttm_tt *gtt = (void *)ttm;
+ struct amdgpu_ttm_tt *gtt = ttm_to_amdgpu_ttm_tt(ttm);
- amdgpu_ttm_backend_unbind(bdev, ttm);
- ttm_tt_destroy_common(bdev, ttm);
if (gtt->usertask)
put_task_struct(gtt->usertask);
@@ -1205,15 +1073,20 @@ static void amdgpu_ttm_backend_destroy(struct ttm_bo_device *bdev,
static struct ttm_tt *amdgpu_ttm_tt_create(struct ttm_buffer_object *bo,
uint32_t page_flags)
{
+ struct amdgpu_device *adev = amdgpu_ttm_adev(bo->bdev);
struct amdgpu_bo *abo = ttm_to_amdgpu_bo(bo);
struct amdgpu_ttm_tt *gtt;
enum ttm_caching caching;
gtt = kzalloc(sizeof(struct amdgpu_ttm_tt), GFP_KERNEL);
- if (gtt == NULL) {
+ if (!gtt)
return NULL;
- }
+
gtt->gobj = &bo->base;
+ if (adev->gmc.mem_partitions && abo->xcp_id >= 0)
+ gtt->pool_id = KFD_XCP_MEM_ID(adev, abo->xcp_id);
+ else
+ gtt->pool_id = abo->xcp_id;
if (abo->flags & AMDGPU_GEM_CREATE_CPU_GTT_USWC)
caching = ttm_write_combined;
@@ -1234,42 +1107,39 @@ static struct ttm_tt *amdgpu_ttm_tt_create(struct ttm_buffer_object *bo,
* Map the pages of a ttm_tt object to an address space visible
* to the underlying device.
*/
-static int amdgpu_ttm_tt_populate(struct ttm_bo_device *bdev,
+static int amdgpu_ttm_tt_populate(struct ttm_device *bdev,
struct ttm_tt *ttm,
struct ttm_operation_ctx *ctx)
{
struct amdgpu_device *adev = amdgpu_ttm_adev(bdev);
- struct amdgpu_ttm_tt *gtt = (void *)ttm;
+ struct amdgpu_ttm_tt *gtt = ttm_to_amdgpu_ttm_tt(ttm);
+ struct ttm_pool *pool;
+ pgoff_t i;
+ int ret;
/* user pages are bound by amdgpu_ttm_tt_pin_userptr() */
- if (gtt && gtt->userptr) {
+ if (gtt->userptr) {
ttm->sg = kzalloc(sizeof(struct sg_table), GFP_KERNEL);
if (!ttm->sg)
return -ENOMEM;
-
- ttm->page_flags |= TTM_PAGE_FLAG_SG;
return 0;
}
- if (ttm->page_flags & TTM_PAGE_FLAG_SG) {
- if (!ttm->sg) {
- struct dma_buf_attachment *attach;
- struct sg_table *sgt;
-
- attach = gtt->gobj->import_attach;
- sgt = dma_buf_map_attachment(attach, DMA_BIDIRECTIONAL);
- if (IS_ERR(sgt))
- return PTR_ERR(sgt);
+ if (ttm->page_flags & TTM_TT_FLAG_EXTERNAL)
+ return 0;
- ttm->sg = sgt;
- }
+ if (adev->mman.ttm_pools && gtt->pool_id >= 0)
+ pool = &adev->mman.ttm_pools[gtt->pool_id];
+ else
+ pool = &adev->mman.bdev.pool;
+ ret = ttm_pool_alloc(pool, ttm, ctx);
+ if (ret)
+ return ret;
- drm_prime_sg_to_dma_addr_array(ttm->sg, gtt->ttm.dma_address,
- ttm->num_pages);
- return 0;
- }
+ for (i = 0; i < ttm->num_pages; ++i)
+ ttm->pages[i]->mapping = bdev->dev_mapping;
- return ttm_pool_alloc(&adev->mman.bdev.pool, ttm, ctx);
+ return 0;
}
/*
@@ -1278,33 +1148,57 @@ static int amdgpu_ttm_tt_populate(struct ttm_bo_device *bdev,
* Unmaps pages of a ttm_tt object from the device address space and
* unpopulates the page array backing it.
*/
-static void amdgpu_ttm_tt_unpopulate(struct ttm_bo_device *bdev,
+static void amdgpu_ttm_tt_unpopulate(struct ttm_device *bdev,
struct ttm_tt *ttm)
{
- struct amdgpu_ttm_tt *gtt = (void *)ttm;
+ struct amdgpu_ttm_tt *gtt = ttm_to_amdgpu_ttm_tt(ttm);
struct amdgpu_device *adev;
+ struct ttm_pool *pool;
+ pgoff_t i;
+
+ amdgpu_ttm_backend_unbind(bdev, ttm);
- if (gtt && gtt->userptr) {
+ if (gtt->userptr) {
amdgpu_ttm_tt_set_user_pages(ttm, NULL);
kfree(ttm->sg);
- ttm->page_flags &= ~TTM_PAGE_FLAG_SG;
- return;
- }
-
- if (ttm->sg && gtt->gobj->import_attach) {
- struct dma_buf_attachment *attach;
-
- attach = gtt->gobj->import_attach;
- dma_buf_unmap_attachment(attach, ttm->sg, DMA_BIDIRECTIONAL);
ttm->sg = NULL;
return;
}
- if (ttm->page_flags & TTM_PAGE_FLAG_SG)
+ if (ttm->page_flags & TTM_TT_FLAG_EXTERNAL)
return;
+ for (i = 0; i < ttm->num_pages; ++i)
+ ttm->pages[i]->mapping = NULL;
+
adev = amdgpu_ttm_adev(bdev);
- return ttm_pool_free(&adev->mman.bdev.pool, ttm);
+
+ if (adev->mman.ttm_pools && gtt->pool_id >= 0)
+ pool = &adev->mman.ttm_pools[gtt->pool_id];
+ else
+ pool = &adev->mman.bdev.pool;
+
+ return ttm_pool_free(pool, ttm);
+}
+
+/**
+ * amdgpu_ttm_tt_get_userptr - Return the userptr GTT ttm_tt for the current
+ * task
+ *
+ * @tbo: The ttm_buffer_object that contains the userptr
+ * @user_addr: The returned value
+ */
+int amdgpu_ttm_tt_get_userptr(const struct ttm_buffer_object *tbo,
+ uint64_t *user_addr)
+{
+ struct amdgpu_ttm_tt *gtt;
+
+ if (!tbo->ttm)
+ return -EINVAL;
+
+ gtt = (void *)tbo->ttm;
+ *user_addr = gtt->userptr;
+ return 0;
}
/**
@@ -1315,8 +1209,9 @@ static void amdgpu_ttm_tt_unpopulate(struct ttm_bo_device *bdev,
* @addr: The address in the current tasks VM space to use
* @flags: Requirements of userptr object.
*
- * Called by amdgpu_gem_userptr_ioctl() to bind userptr pages
- * to current task
+ * Called by amdgpu_gem_userptr_ioctl() and kfd_ioctl_alloc_memory_of_gpu() to
+ * bind userptr pages to current task and by kfd_ioctl_acquire_vm() to
+ * initialize GPU VM for a KFD process.
*/
int amdgpu_ttm_tt_set_userptr(struct ttm_buffer_object *bo,
uint64_t addr, uint32_t flags)
@@ -1330,7 +1225,10 @@ int amdgpu_ttm_tt_set_userptr(struct ttm_buffer_object *bo,
return -ENOMEM;
}
- gtt = (void *)bo->ttm;
+ /* Set TTM_TT_FLAG_EXTERNAL before populate but after create. */
+ bo->ttm->page_flags |= TTM_TT_FLAG_EXTERNAL;
+
+ gtt = ttm_to_amdgpu_ttm_tt(bo->ttm);
gtt->userptr = addr;
gtt->userflags = flags;
@@ -1347,7 +1245,7 @@ int amdgpu_ttm_tt_set_userptr(struct ttm_buffer_object *bo,
*/
struct mm_struct *amdgpu_ttm_tt_get_usermm(struct ttm_tt *ttm)
{
- struct amdgpu_ttm_tt *gtt = (void *)ttm;
+ struct amdgpu_ttm_tt *gtt = ttm_to_amdgpu_ttm_tt(ttm);
if (gtt == NULL)
return NULL;
@@ -1364,9 +1262,9 @@ struct mm_struct *amdgpu_ttm_tt_get_usermm(struct ttm_tt *ttm)
*
*/
bool amdgpu_ttm_tt_affect_userptr(struct ttm_tt *ttm, unsigned long start,
- unsigned long end)
+ unsigned long end, unsigned long *userptr)
{
- struct amdgpu_ttm_tt *gtt = (void *)ttm;
+ struct amdgpu_ttm_tt *gtt = ttm_to_amdgpu_ttm_tt(ttm);
unsigned long size;
if (gtt == NULL || !gtt->userptr)
@@ -1379,6 +1277,8 @@ bool amdgpu_ttm_tt_affect_userptr(struct ttm_tt *ttm, unsigned long start,
if (gtt->userptr > end || gtt->userptr + size <= start)
return false;
+ if (userptr)
+ *userptr = gtt->userptr;
return true;
}
@@ -1387,7 +1287,7 @@ bool amdgpu_ttm_tt_affect_userptr(struct ttm_tt *ttm, unsigned long start,
*/
bool amdgpu_ttm_tt_is_userptr(struct ttm_tt *ttm)
{
- struct amdgpu_ttm_tt *gtt = (void *)ttm;
+ struct amdgpu_ttm_tt *gtt = ttm_to_amdgpu_ttm_tt(ttm);
if (gtt == NULL || !gtt->userptr)
return false;
@@ -1400,7 +1300,7 @@ bool amdgpu_ttm_tt_is_userptr(struct ttm_tt *ttm)
*/
bool amdgpu_ttm_tt_is_readonly(struct ttm_tt *ttm)
{
- struct amdgpu_ttm_tt *gtt = (void *)ttm;
+ struct amdgpu_ttm_tt *gtt = ttm_to_amdgpu_ttm_tt(ttm);
if (gtt == NULL)
return false;
@@ -1423,13 +1323,20 @@ uint64_t amdgpu_ttm_tt_pde_flags(struct ttm_tt *ttm, struct ttm_resource *mem)
if (mem && mem->mem_type != TTM_PL_SYSTEM)
flags |= AMDGPU_PTE_VALID;
- if (mem && mem->mem_type == TTM_PL_TT) {
+ if (mem && (mem->mem_type == TTM_PL_TT ||
+ mem->mem_type == AMDGPU_PL_DOORBELL ||
+ mem->mem_type == AMDGPU_PL_PREEMPT ||
+ mem->mem_type == AMDGPU_PL_MMIO_REMAP)) {
flags |= AMDGPU_PTE_SYSTEM;
- if (ttm->caching == ttm_cached)
+ if (ttm && ttm->caching == ttm_cached)
flags |= AMDGPU_PTE_SNOOPED;
}
+ if (mem && mem->mem_type == TTM_PL_VRAM &&
+ mem->bus.caching == ttm_cached)
+ flags |= AMDGPU_PTE_SNOOPED;
+
return flags;
}
@@ -1468,11 +1375,15 @@ uint64_t amdgpu_ttm_tt_pte_flags(struct amdgpu_device *adev, struct ttm_tt *ttm,
static bool amdgpu_ttm_bo_eviction_valuable(struct ttm_buffer_object *bo,
const struct ttm_place *place)
{
- unsigned long num_pages = bo->mem.num_pages;
- struct drm_mm_node *node = bo->mem.mm_node;
- struct dma_resv_list *flist;
+ struct dma_resv_iter resv_cursor;
struct dma_fence *f;
- int i;
+
+ if (!amdgpu_bo_is_amdgpu_bo(bo))
+ return ttm_bo_eviction_valuable(bo, place);
+
+ /* Swapout? */
+ if (bo->resource->mem_type == TTM_PL_SYSTEM)
+ return true;
if (bo->type == ttm_bo_type_kernel &&
!amdgpu_vm_evictable(ttm_to_amdgpu_bo(bo)))
@@ -1482,40 +1393,125 @@ static bool amdgpu_ttm_bo_eviction_valuable(struct ttm_buffer_object *bo,
* If true, then return false as any KFD process needs all its BOs to
* be resident to run successfully
*/
- flist = dma_resv_get_list(bo->base.resv);
- if (flist) {
- for (i = 0; i < flist->shared_count; ++i) {
- f = rcu_dereference_protected(flist->shared[i],
- dma_resv_held(bo->base.resv));
- if (amdkfd_fence_check_mm(f, current->mm))
- return false;
- }
+ dma_resv_for_each_fence(&resv_cursor, bo->base.resv,
+ DMA_RESV_USAGE_BOOKKEEP, f) {
+ if (amdkfd_fence_check_mm(f, current->mm) &&
+ !(place->flags & TTM_PL_FLAG_CONTIGUOUS))
+ return false;
}
- switch (bo->mem.mem_type) {
- case TTM_PL_TT:
- if (amdgpu_bo_is_amdgpu_bo(bo) &&
- amdgpu_bo_encrypted(ttm_to_amdgpu_bo(bo)))
- return false;
- return true;
+ /* Preemptible BOs don't own system resources managed by the
+ * driver (pages, VRAM, GART space). They point to resources
+ * owned by someone else (e.g. pageable memory in user mode
+ * or a DMABuf). They are used in a preemptible context so we
+ * can guarantee no deadlocks and good QoS in case of MMU
+ * notifiers or DMABuf move notifiers from the resource owner.
+ */
+ if (bo->resource->mem_type == AMDGPU_PL_PREEMPT)
+ return false;
- case TTM_PL_VRAM:
- /* Check each drm MM node individually */
- while (num_pages) {
- if (place->fpfn < (node->start + node->size) &&
- !(place->lpfn && place->lpfn <= node->start))
- return true;
-
- num_pages -= node->size;
- ++node;
- }
+ if (bo->resource->mem_type == TTM_PL_TT &&
+ amdgpu_bo_encrypted(ttm_to_amdgpu_bo(bo)))
return false;
- default:
- break;
+ return ttm_bo_eviction_valuable(bo, place);
+}
+
+static void amdgpu_ttm_vram_mm_access(struct amdgpu_device *adev, loff_t pos,
+ void *buf, size_t size, bool write)
+{
+ while (size) {
+ uint64_t aligned_pos = ALIGN_DOWN(pos, 4);
+ uint64_t bytes = 4 - (pos & 0x3);
+ uint32_t shift = (pos & 0x3) * 8;
+ uint32_t mask = 0xffffffff << shift;
+ uint32_t value = 0;
+
+ if (size < bytes) {
+ mask &= 0xffffffff >> (bytes - size) * 8;
+ bytes = size;
+ }
+
+ if (mask != 0xffffffff) {
+ amdgpu_device_mm_access(adev, aligned_pos, &value, 4, false);
+ if (write) {
+ value &= ~mask;
+ value |= (*(uint32_t *)buf << shift) & mask;
+ amdgpu_device_mm_access(adev, aligned_pos, &value, 4, true);
+ } else {
+ value = (value & mask) >> shift;
+ memcpy(buf, &value, bytes);
+ }
+ } else {
+ amdgpu_device_mm_access(adev, aligned_pos, buf, 4, write);
+ }
+
+ pos += bytes;
+ buf += bytes;
+ size -= bytes;
}
+}
- return ttm_bo_eviction_valuable(bo, place);
+static int amdgpu_ttm_access_memory_sdma(struct ttm_buffer_object *bo,
+ unsigned long offset, void *buf,
+ int len, int write)
+{
+ struct amdgpu_bo *abo = ttm_to_amdgpu_bo(bo);
+ struct amdgpu_device *adev = amdgpu_ttm_adev(abo->tbo.bdev);
+ struct amdgpu_res_cursor src_mm;
+ struct amdgpu_job *job;
+ struct dma_fence *fence;
+ uint64_t src_addr, dst_addr;
+ unsigned int num_dw;
+ int r, idx;
+
+ if (len != PAGE_SIZE)
+ return -EINVAL;
+
+ if (!adev->mman.sdma_access_ptr)
+ return -EACCES;
+
+ if (!drm_dev_enter(adev_to_drm(adev), &idx))
+ return -ENODEV;
+
+ if (write)
+ memcpy(adev->mman.sdma_access_ptr, buf, len);
+
+ num_dw = ALIGN(adev->mman.buffer_funcs->copy_num_dw, 8);
+ r = amdgpu_job_alloc_with_ib(adev, &adev->mman.high_pr,
+ AMDGPU_FENCE_OWNER_UNDEFINED,
+ num_dw * 4, AMDGPU_IB_POOL_DELAYED,
+ &job,
+ AMDGPU_KERNEL_JOB_ID_TTM_ACCESS_MEMORY_SDMA);
+ if (r)
+ goto out;
+
+ mutex_lock(&adev->mman.gtt_window_lock);
+ amdgpu_res_first(abo->tbo.resource, offset, len, &src_mm);
+ src_addr = amdgpu_ttm_domain_start(adev, bo->resource->mem_type) +
+ src_mm.start;
+ dst_addr = amdgpu_bo_gpu_offset(adev->mman.sdma_access_bo);
+ if (write)
+ swap(src_addr, dst_addr);
+
+ amdgpu_emit_copy_buffer(adev, &job->ibs[0], src_addr, dst_addr,
+ PAGE_SIZE, 0);
+
+ amdgpu_ring_pad_ib(adev->mman.buffer_funcs_ring, &job->ibs[0]);
+ WARN_ON(job->ibs[0].length_dw > num_dw);
+
+ fence = amdgpu_job_submit(job);
+ mutex_unlock(&adev->mman.gtt_window_lock);
+
+ if (!dma_fence_wait_timeout(fence, false, adev->sdma_timeout))
+ r = -ETIMEDOUT;
+ dma_fence_put(fence);
+
+ if (!(r || write))
+ memcpy(buf, adev->mman.sdma_access_ptr, len);
+out:
+ drm_dev_exit(idx);
+ return r;
}
/**
@@ -1531,67 +1527,38 @@ static bool amdgpu_ttm_bo_eviction_valuable(struct ttm_buffer_object *bo,
* access for debugging purposes.
*/
static int amdgpu_ttm_access_memory(struct ttm_buffer_object *bo,
- unsigned long offset,
- void *buf, int len, int write)
+ unsigned long offset, void *buf, int len,
+ int write)
{
struct amdgpu_bo *abo = ttm_to_amdgpu_bo(bo);
struct amdgpu_device *adev = amdgpu_ttm_adev(abo->tbo.bdev);
- struct drm_mm_node *nodes;
- uint32_t value = 0;
+ struct amdgpu_res_cursor cursor;
int ret = 0;
- uint64_t pos;
- unsigned long flags;
- if (bo->mem.mem_type != TTM_PL_VRAM)
+ if (bo->resource->mem_type != TTM_PL_VRAM)
return -EIO;
- pos = offset;
- nodes = amdgpu_find_mm_node(&abo->tbo.mem, &pos);
- pos += (nodes->start << PAGE_SHIFT);
-
- while (len && pos < adev->gmc.mc_vram_size) {
- uint64_t aligned_pos = pos & ~(uint64_t)3;
- uint64_t bytes = 4 - (pos & 3);
- uint32_t shift = (pos & 3) * 8;
- uint32_t mask = 0xffffffff << shift;
-
- if (len < bytes) {
- mask &= 0xffffffff >> (bytes - len) * 8;
- bytes = len;
+ if (amdgpu_device_has_timeouts_enabled(adev) &&
+ !amdgpu_ttm_access_memory_sdma(bo, offset, buf, len, write))
+ return len;
+
+ amdgpu_res_first(bo->resource, offset, len, &cursor);
+ while (cursor.remaining) {
+ size_t count, size = cursor.size;
+ loff_t pos = cursor.start;
+
+ count = amdgpu_device_aper_access(adev, pos, buf, size, write);
+ size -= count;
+ if (size) {
+ /* using MM to access rest vram and handle un-aligned address */
+ pos += count;
+ buf += count;
+ amdgpu_ttm_vram_mm_access(adev, pos, buf, size, write);
}
- if (mask != 0xffffffff) {
- spin_lock_irqsave(&adev->mmio_idx_lock, flags);
- WREG32_NO_KIQ(mmMM_INDEX, ((uint32_t)aligned_pos) | 0x80000000);
- WREG32_NO_KIQ(mmMM_INDEX_HI, aligned_pos >> 31);
- if (!write || mask != 0xffffffff)
- value = RREG32_NO_KIQ(mmMM_DATA);
- if (write) {
- value &= ~mask;
- value |= (*(uint32_t *)buf << shift) & mask;
- WREG32_NO_KIQ(mmMM_DATA, value);
- }
- spin_unlock_irqrestore(&adev->mmio_idx_lock, flags);
- if (!write) {
- value = (value & mask) >> shift;
- memcpy(buf, &value, bytes);
- }
- } else {
- bytes = (nodes->start + nodes->size) << PAGE_SHIFT;
- bytes = min(bytes - pos, (uint64_t)len & ~0x3ull);
-
- amdgpu_device_vram_access(adev, pos, (uint32_t *)buf,
- bytes, write);
- }
-
- ret += bytes;
- buf = (uint8_t *)buf + bytes;
- pos += bytes;
- len -= bytes;
- if (pos >= (nodes->start + nodes->size) << PAGE_SHIFT) {
- ++nodes;
- pos = (nodes->start << PAGE_SHIFT);
- }
+ ret += cursor.size;
+ buf += cursor.size;
+ amdgpu_res_next(&cursor, cursor.size);
}
return ret;
@@ -1603,7 +1570,7 @@ amdgpu_bo_delete_mem_notify(struct ttm_buffer_object *bo)
amdgpu_bo_move_notify(bo, false, NULL);
}
-static struct ttm_bo_driver amdgpu_bo_driver = {
+static struct ttm_device_funcs amdgpu_bo_driver = {
.ttm_tt_create = &amdgpu_ttm_tt_create,
.ttm_tt_populate = &amdgpu_ttm_tt_populate,
.ttm_tt_unpopulate = &amdgpu_ttm_tt_unpopulate,
@@ -1611,13 +1578,11 @@ static struct ttm_bo_driver amdgpu_bo_driver = {
.eviction_valuable = amdgpu_ttm_bo_eviction_valuable,
.evict_flags = &amdgpu_evict_flags,
.move = &amdgpu_bo_move,
- .verify_access = &amdgpu_verify_access,
.delete_mem_notify = &amdgpu_bo_delete_mem_notify,
.release_notify = &amdgpu_bo_release_notify,
.io_mem_reserve = &amdgpu_ttm_io_mem_reserve,
.io_mem_pfn = amdgpu_ttm_io_mem_pfn,
.access_memory = &amdgpu_ttm_access_memory,
- .del_from_lru_notify = &amdgpu_vm_del_from_lru_notify
};
/*
@@ -1636,6 +1601,23 @@ static void amdgpu_ttm_fw_reserve_vram_fini(struct amdgpu_device *adev)
NULL, &adev->mman.fw_vram_usage_va);
}
+/*
+ * Driver Reservation functions
+ */
+/**
+ * amdgpu_ttm_drv_reserve_vram_fini - free drv reserved vram
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * free drv reserved vram if it has been reserved.
+ */
+static void amdgpu_ttm_drv_reserve_vram_fini(struct amdgpu_device *adev)
+{
+ amdgpu_bo_free_kernel(&adev->mman.drv_vram_usage_reserved_bo,
+ NULL,
+ &adev->mman.drv_vram_usage_va);
+}
+
/**
* amdgpu_ttm_fw_reserve_vram_init - create bo vram reservation from fw
*
@@ -1657,11 +1639,35 @@ static int amdgpu_ttm_fw_reserve_vram_init(struct amdgpu_device *adev)
return amdgpu_bo_create_kernel_at(adev,
adev->mman.fw_vram_usage_start_offset,
adev->mman.fw_vram_usage_size,
- AMDGPU_GEM_DOMAIN_VRAM,
&adev->mman.fw_vram_usage_reserved_bo,
&adev->mman.fw_vram_usage_va);
}
+/**
+ * amdgpu_ttm_drv_reserve_vram_init - create bo vram reservation from driver
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * create bo vram reservation from drv.
+ */
+static int amdgpu_ttm_drv_reserve_vram_init(struct amdgpu_device *adev)
+{
+ u64 vram_size = adev->gmc.visible_vram_size;
+
+ adev->mman.drv_vram_usage_va = NULL;
+ adev->mman.drv_vram_usage_reserved_bo = NULL;
+
+ if (adev->mman.drv_vram_usage_size == 0 ||
+ adev->mman.drv_vram_usage_size > vram_size)
+ return 0;
+
+ return amdgpu_bo_create_kernel_at(adev,
+ adev->mman.drv_vram_usage_start_offset,
+ adev->mman.drv_vram_usage_size,
+ &adev->mman.drv_vram_usage_reserved_bo,
+ &adev->mman.drv_vram_usage_va);
+}
+
/*
* Memoy training reservation functions
*/
@@ -1684,19 +1690,20 @@ static int amdgpu_ttm_training_reserve_vram_fini(struct amdgpu_device *adev)
return 0;
}
-static void amdgpu_ttm_training_data_block_init(struct amdgpu_device *adev)
+static void amdgpu_ttm_training_data_block_init(struct amdgpu_device *adev,
+ uint32_t reserve_size)
{
struct psp_memory_training_context *ctx = &adev->psp.mem_train_ctx;
memset(ctx, 0, sizeof(*ctx));
ctx->c2p_train_data_offset =
- ALIGN((adev->gmc.mc_vram_size - adev->mman.discovery_tmr_size - SZ_1M), SZ_1M);
+ ALIGN((adev->gmc.mc_vram_size - reserve_size - SZ_1M), SZ_1M);
ctx->p2c_train_data_offset =
(adev->gmc.mc_vram_size - GDDR6_MEM_TRAINING_OFFSET);
ctx->train_data_size =
GDDR6_MEM_TRAINING_DATA_SIZE_IN_BYTES;
-
+
DRM_DEBUG("train_data_size:%llx,p2c_train_data_offset:%llx,c2p_train_data_offset:%llx.\n",
ctx->train_data_size,
ctx->p2c_train_data_offset,
@@ -1709,16 +1716,14 @@ static void amdgpu_ttm_training_data_block_init(struct amdgpu_device *adev)
*/
static int amdgpu_ttm_reserve_tmr(struct amdgpu_device *adev)
{
- int ret;
struct psp_memory_training_context *ctx = &adev->psp.mem_train_ctx;
bool mem_train_support = false;
+ uint32_t reserve_size = 0;
+ int ret;
- if (!amdgpu_sriov_vf(adev)) {
- ret = amdgpu_mem_train_support(adev);
- if (ret == 1)
+ if (adev->bios && !amdgpu_sriov_vf(adev)) {
+ if (amdgpu_atomfirmware_mem_training_supported(adev))
mem_train_support = true;
- else if (ret == -1)
- return -EINVAL;
else
DRM_DEBUG("memory training does not support!\n");
}
@@ -1730,43 +1735,135 @@ static int amdgpu_ttm_reserve_tmr(struct amdgpu_device *adev)
* Otherwise, fallback to legacy approach to check and reserve tmr block for ip
* discovery data and G6 memory training data respectively
*/
- adev->mman.discovery_tmr_size =
- amdgpu_atomfirmware_get_fw_reserved_fb_size(adev);
- if (!adev->mman.discovery_tmr_size)
- adev->mman.discovery_tmr_size = DISCOVERY_TMR_OFFSET;
+ if (adev->bios)
+ reserve_size =
+ amdgpu_atomfirmware_get_fw_reserved_fb_size(adev);
+
+ if (!adev->bios &&
+ (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 3) ||
+ amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 4) ||
+ amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 5, 0)))
+ reserve_size = max(reserve_size, (uint32_t)280 << 20);
+ else if (!reserve_size)
+ reserve_size = DISCOVERY_TMR_OFFSET;
if (mem_train_support) {
/* reserve vram for mem train according to TMR location */
- amdgpu_ttm_training_data_block_init(adev);
+ amdgpu_ttm_training_data_block_init(adev, reserve_size);
ret = amdgpu_bo_create_kernel_at(adev,
- ctx->c2p_train_data_offset,
- ctx->train_data_size,
- AMDGPU_GEM_DOMAIN_VRAM,
- &ctx->c2p_bo,
- NULL);
+ ctx->c2p_train_data_offset,
+ ctx->train_data_size,
+ &ctx->c2p_bo,
+ NULL);
if (ret) {
- DRM_ERROR("alloc c2p_bo failed(%d)!\n", ret);
+ dev_err(adev->dev, "alloc c2p_bo failed(%d)!\n", ret);
amdgpu_ttm_training_reserve_vram_fini(adev);
return ret;
}
ctx->init = PSP_MEM_TRAIN_RESERVE_SUCCESS;
}
- ret = amdgpu_bo_create_kernel_at(adev,
- adev->gmc.real_vram_size - adev->mman.discovery_tmr_size,
- adev->mman.discovery_tmr_size,
- AMDGPU_GEM_DOMAIN_VRAM,
- &adev->mman.discovery_memory,
- NULL);
+ ret = amdgpu_bo_create_kernel_at(
+ adev, adev->gmc.real_vram_size - reserve_size, reserve_size,
+ &adev->mman.fw_reserved_memory, NULL);
if (ret) {
- DRM_ERROR("alloc tmr failed(%d)!\n", ret);
- amdgpu_bo_free_kernel(&adev->mman.discovery_memory, NULL, NULL);
+ dev_err(adev->dev, "alloc tmr failed(%d)!\n", ret);
+ amdgpu_bo_free_kernel(&adev->mman.fw_reserved_memory, NULL,
+ NULL);
return ret;
}
return 0;
}
+static int amdgpu_ttm_pools_init(struct amdgpu_device *adev)
+{
+ int i;
+
+ if (!adev->gmc.is_app_apu || !adev->gmc.num_mem_partitions)
+ return 0;
+
+ adev->mman.ttm_pools = kcalloc(adev->gmc.num_mem_partitions,
+ sizeof(*adev->mman.ttm_pools),
+ GFP_KERNEL);
+ if (!adev->mman.ttm_pools)
+ return -ENOMEM;
+
+ for (i = 0; i < adev->gmc.num_mem_partitions; i++) {
+ ttm_pool_init(&adev->mman.ttm_pools[i], adev->dev,
+ adev->gmc.mem_partitions[i].numa.node,
+ TTM_ALLOCATION_POOL_BENEFICIAL_ORDER(get_order(SZ_2M)));
+ }
+ return 0;
+}
+
+static void amdgpu_ttm_pools_fini(struct amdgpu_device *adev)
+{
+ int i;
+
+ if (!adev->gmc.is_app_apu || !adev->mman.ttm_pools)
+ return;
+
+ for (i = 0; i < adev->gmc.num_mem_partitions; i++)
+ ttm_pool_fini(&adev->mman.ttm_pools[i]);
+
+ kfree(adev->mman.ttm_pools);
+ adev->mman.ttm_pools = NULL;
+}
+
+/**
+ * amdgpu_ttm_mmio_remap_bo_init - Allocate the singleton 4K MMIO_REMAP BO
+ * @adev: amdgpu device
+ *
+ * Allocates a one-page (4K) GEM BO in AMDGPU_GEM_DOMAIN_MMIO_REMAP when the
+ * hardware exposes a remap base (adev->rmmio_remap.bus_addr) and the host
+ * PAGE_SIZE is <= AMDGPU_GPU_PAGE_SIZE (4K). The BO is created as a regular
+ * GEM object (amdgpu_bo_create).
+ *
+ * Return:
+ * * 0 on success or intentional skip (feature not present/unsupported)
+ * * negative errno on allocation failure
+ */
+static int amdgpu_ttm_mmio_remap_bo_init(struct amdgpu_device *adev)
+{
+ struct amdgpu_bo_param bp;
+ int r;
+
+ /* Skip if HW doesn't expose remap, or if PAGE_SIZE > AMDGPU_GPU_PAGE_SIZE (4K). */
+ if (!adev->rmmio_remap.bus_addr || PAGE_SIZE > AMDGPU_GPU_PAGE_SIZE)
+ return 0;
+
+ memset(&bp, 0, sizeof(bp));
+
+ /* Create exactly one GEM BO in the MMIO_REMAP domain. */
+ bp.type = ttm_bo_type_device; /* userspace-mappable GEM */
+ bp.size = AMDGPU_GPU_PAGE_SIZE; /* 4K */
+ bp.byte_align = AMDGPU_GPU_PAGE_SIZE;
+ bp.domain = AMDGPU_GEM_DOMAIN_MMIO_REMAP;
+ bp.flags = 0;
+ bp.resv = NULL;
+ bp.bo_ptr_size = sizeof(struct amdgpu_bo);
+
+ r = amdgpu_bo_create(adev, &bp, &adev->rmmio_remap.bo);
+ if (r)
+ return r;
+
+ return 0;
+}
+
+/**
+ * amdgpu_ttm_mmio_remap_bo_fini - Free the singleton MMIO_REMAP BO
+ * @adev: amdgpu device
+ *
+ * Frees the kernel-owned MMIO_REMAP BO if it was allocated by
+ * amdgpu_ttm_mmio_remap_bo_init().
+ */
+static void amdgpu_ttm_mmio_remap_bo_fini(struct amdgpu_device *adev)
+{
+ amdgpu_bo_unref(&adev->rmmio_remap.bo);
+ adev->rmmio_remap.bo = NULL;
+}
+
/*
* amdgpu_ttm_init - Init the memory management (ttm) as well as various
* gtt/vram related fields.
@@ -1780,40 +1877,56 @@ int amdgpu_ttm_init(struct amdgpu_device *adev)
{
uint64_t gtt_size;
int r;
- u64 vis_vram_limit;
mutex_init(&adev->mman.gtt_window_lock);
+ dma_set_max_seg_size(adev->dev, UINT_MAX);
/* No others user of address space so set it to 0 */
- r = ttm_bo_device_init(&adev->mman.bdev, &amdgpu_bo_driver, adev->dev,
+ r = ttm_device_init(&adev->mman.bdev, &amdgpu_bo_driver, adev->dev,
adev_to_drm(adev)->anon_inode->i_mapping,
adev_to_drm(adev)->vma_offset_manager,
- adev->need_swiotlb,
- dma_addressing_limited(adev->dev));
+ (adev->need_swiotlb ?
+ TTM_ALLOCATION_POOL_USE_DMA_ALLOC : 0) |
+ (dma_addressing_limited(adev->dev) ?
+ TTM_ALLOCATION_POOL_USE_DMA32 : 0) |
+ TTM_ALLOCATION_POOL_BENEFICIAL_ORDER(get_order(SZ_2M)));
if (r) {
- DRM_ERROR("failed initializing buffer object driver(%d).\n", r);
+ dev_err(adev->dev,
+ "failed initializing buffer object driver(%d).\n", r);
return r;
}
- adev->mman.initialized = true;
- /* Initialize VRAM pool with all of VRAM divided into pages */
- r = amdgpu_vram_mgr_init(adev);
+ r = amdgpu_ttm_pools_init(adev);
if (r) {
- DRM_ERROR("Failed initializing VRAM heap.\n");
+ dev_err(adev->dev, "failed to init ttm pools(%d).\n", r);
return r;
}
+ adev->mman.initialized = true;
- /* Reduce size of CPU-visible VRAM if requested */
- vis_vram_limit = (u64)amdgpu_vis_vram_limit * 1024 * 1024;
- if (amdgpu_vis_vram_limit > 0 &&
- vis_vram_limit <= adev->gmc.visible_vram_size)
- adev->gmc.visible_vram_size = vis_vram_limit;
+ if (!adev->gmc.is_app_apu) {
+ /* Initialize VRAM pool with all of VRAM divided into pages */
+ r = amdgpu_vram_mgr_init(adev);
+ if (r) {
+ dev_err(adev->dev, "Failed initializing VRAM heap.\n");
+ return r;
+ }
+ }
/* Change the size here instead of the init above so only lpfn is affected */
amdgpu_ttm_set_buffer_funcs_status(adev, false);
#ifdef CONFIG_64BIT
- adev->mman.aper_base_kaddr = ioremap_wc(adev->gmc.aper_base,
- adev->gmc.visible_vram_size);
+#ifdef CONFIG_X86
+ if (adev->gmc.xgmi.connected_to_cpu)
+ adev->mman.aper_base_kaddr = ioremap_cache(adev->gmc.aper_base,
+ adev->gmc.visible_vram_size);
+
+ else if (adev->gmc.is_app_apu)
+ DRM_DEBUG_DRIVER(
+ "No need to ioremap when real vram size is 0\n");
+ else
+#endif
+ adev->mman.aper_base_kaddr = ioremap_wc(adev->gmc.aper_base,
+ adev->gmc.visible_vram_size);
#endif
/*
@@ -1821,16 +1934,23 @@ int amdgpu_ttm_init(struct amdgpu_device *adev)
*place on the VRAM, so reserve it early.
*/
r = amdgpu_ttm_fw_reserve_vram_init(adev);
- if (r) {
+ if (r)
+ return r;
+
+ /*
+ * The reserved VRAM for the driver must be pinned to a specific
+ * location in VRAM, so reserve it early.
+ */
+ r = amdgpu_ttm_drv_reserve_vram_init(adev);
+ if (r)
return r;
- }
/*
- * only NAVI10 and onwards ASIC support for IP discovery.
- * If IP discovery enabled, a block of memory should be
- * reserved for IP discovey.
+ * only NAVI10 and later ASICs support IP discovery.
+ * If IP discovery is enabled, a block of memory should be
+ * reserved for it.
*/
- if (adev->mman.discovery_bin) {
+ if (adev->discovery.reserve_tmr) {
r = amdgpu_ttm_reserve_tmr(adev);
if (r)
return r;
@@ -1839,64 +1959,125 @@ int amdgpu_ttm_init(struct amdgpu_device *adev)
/* allocate memory as required for VGA
* This is used for VGA emulation and pre-OS scanout buffers to
* avoid display artifacts while transitioning between pre-OS
- * and driver. */
- r = amdgpu_bo_create_kernel_at(adev, 0, adev->mman.stolen_vga_size,
- AMDGPU_GEM_DOMAIN_VRAM,
- &adev->mman.stolen_vga_memory,
- NULL);
- if (r)
- return r;
- r = amdgpu_bo_create_kernel_at(adev, adev->mman.stolen_vga_size,
- adev->mman.stolen_extended_size,
- AMDGPU_GEM_DOMAIN_VRAM,
- &adev->mman.stolen_extended_memory,
- NULL);
- if (r)
- return r;
+ * and driver.
+ */
+ if (!adev->gmc.is_app_apu) {
+ r = amdgpu_bo_create_kernel_at(adev, 0,
+ adev->mman.stolen_vga_size,
+ &adev->mman.stolen_vga_memory,
+ NULL);
+ if (r)
+ return r;
- DRM_INFO("amdgpu: %uM of VRAM memory ready\n",
- (unsigned) (adev->gmc.real_vram_size / (1024 * 1024)));
+ r = amdgpu_bo_create_kernel_at(adev, adev->mman.stolen_vga_size,
+ adev->mman.stolen_extended_size,
+ &adev->mman.stolen_extended_memory,
+ NULL);
- /* Compute GTT size, either bsaed on 3/4th the size of RAM size
- * or whatever the user passed on module init */
- if (amdgpu_gtt_size == -1) {
- struct sysinfo si;
+ if (r)
+ return r;
- si_meminfo(&si);
- gtt_size = min(max((AMDGPU_DEFAULT_GTT_SIZE_MB << 20),
- adev->gmc.mc_vram_size),
- ((uint64_t)si.totalram * si.mem_unit * 3/4));
+ r = amdgpu_bo_create_kernel_at(adev,
+ adev->mman.stolen_reserved_offset,
+ adev->mman.stolen_reserved_size,
+ &adev->mman.stolen_reserved_memory,
+ NULL);
+ if (r)
+ return r;
+ } else {
+ DRM_DEBUG_DRIVER("Skipped stolen memory reservation\n");
+ }
+
+ dev_info(adev->dev, "amdgpu: %uM of VRAM memory ready\n",
+ (unsigned int)(adev->gmc.real_vram_size / (1024 * 1024)));
+
+ /* Compute GTT size, either based on TTM limit
+ * or whatever the user passed on module init.
+ */
+ gtt_size = ttm_tt_pages_limit() << PAGE_SHIFT;
+ if (amdgpu_gtt_size != -1) {
+ uint64_t configured_size = (uint64_t)amdgpu_gtt_size << 20;
+
+ drm_warn(&adev->ddev,
+ "Configuring gttsize via module parameter is deprecated, please use ttm.pages_limit\n");
+ if (gtt_size != configured_size)
+ drm_warn(&adev->ddev,
+ "GTT size has been set as %llu but TTM size has been set as %llu, this is unusual\n",
+ configured_size, gtt_size);
+
+ gtt_size = configured_size;
}
- else
- gtt_size = (uint64_t)amdgpu_gtt_size << 20;
/* Initialize GTT memory pool */
r = amdgpu_gtt_mgr_init(adev, gtt_size);
if (r) {
- DRM_ERROR("Failed initializing GTT heap.\n");
+ dev_err(adev->dev, "Failed initializing GTT heap.\n");
+ return r;
+ }
+ dev_info(adev->dev, "amdgpu: %uM of GTT memory ready.\n",
+ (unsigned int)(gtt_size / (1024 * 1024)));
+
+ if (adev->flags & AMD_IS_APU) {
+ if (adev->gmc.real_vram_size < gtt_size)
+ adev->apu_prefer_gtt = true;
+ }
+
+ /* Initialize doorbell pool on PCI BAR */
+ r = amdgpu_ttm_init_on_chip(adev, AMDGPU_PL_DOORBELL, adev->doorbell.size / PAGE_SIZE);
+ if (r) {
+ dev_err(adev->dev, "Failed initializing doorbell heap.\n");
+ return r;
+ }
+
+ /* Create a boorbell page for kernel usages */
+ r = amdgpu_doorbell_create_kernel_doorbells(adev);
+ if (r) {
+ dev_err(adev->dev, "Failed to initialize kernel doorbells.\n");
+ return r;
+ }
+
+ /* Initialize MMIO-remap pool (single page 4K) */
+ r = amdgpu_ttm_init_on_chip(adev, AMDGPU_PL_MMIO_REMAP, 1);
+ if (r) {
+ dev_err(adev->dev, "Failed initializing MMIO-remap heap.\n");
+ return r;
+ }
+
+ /* Allocate the singleton MMIO_REMAP BO (4K) if supported */
+ r = amdgpu_ttm_mmio_remap_bo_init(adev);
+ if (r)
+ return r;
+
+ /* Initialize preemptible memory pool */
+ r = amdgpu_preempt_mgr_init(adev);
+ if (r) {
+ dev_err(adev->dev, "Failed initializing PREEMPT heap.\n");
return r;
}
- DRM_INFO("amdgpu: %uM of GTT memory ready.\n",
- (unsigned)(gtt_size / (1024 * 1024)));
/* Initialize various on-chip memory pools */
r = amdgpu_ttm_init_on_chip(adev, AMDGPU_PL_GDS, adev->gds.gds_size);
if (r) {
- DRM_ERROR("Failed initializing GDS heap.\n");
+ dev_err(adev->dev, "Failed initializing GDS heap.\n");
return r;
}
r = amdgpu_ttm_init_on_chip(adev, AMDGPU_PL_GWS, adev->gds.gws_size);
if (r) {
- DRM_ERROR("Failed initializing gws heap.\n");
+ dev_err(adev->dev, "Failed initializing gws heap.\n");
return r;
}
r = amdgpu_ttm_init_on_chip(adev, AMDGPU_PL_OA, adev->gds.oa_size);
if (r) {
- DRM_ERROR("Failed initializing oa heap.\n");
+ dev_err(adev->dev, "Failed initializing oa heap.\n");
return r;
}
+ if (amdgpu_bo_create_kernel(adev, PAGE_SIZE, PAGE_SIZE,
+ AMDGPU_GEM_DOMAIN_GTT,
+ &adev->mman.sdma_access_bo, NULL,
+ &adev->mman.sdma_access_ptr))
+ DRM_WARN("Debug VRAM access will use slowpath MM access\n");
return 0;
}
@@ -1906,29 +2087,57 @@ int amdgpu_ttm_init(struct amdgpu_device *adev)
*/
void amdgpu_ttm_fini(struct amdgpu_device *adev)
{
+ int idx;
+
if (!adev->mman.initialized)
return;
+ amdgpu_ttm_pools_fini(adev);
+
amdgpu_ttm_training_reserve_vram_fini(adev);
/* return the stolen vga memory back to VRAM */
- amdgpu_bo_free_kernel(&adev->mman.stolen_vga_memory, NULL, NULL);
- amdgpu_bo_free_kernel(&adev->mman.stolen_extended_memory, NULL, NULL);
- /* return the IP Discovery TMR memory back to VRAM */
- amdgpu_bo_free_kernel(&adev->mman.discovery_memory, NULL, NULL);
+ if (!adev->gmc.is_app_apu) {
+ amdgpu_bo_free_kernel(&adev->mman.stolen_vga_memory, NULL, NULL);
+ amdgpu_bo_free_kernel(&adev->mman.stolen_extended_memory, NULL, NULL);
+ /* return the FW reserved memory back to VRAM */
+ amdgpu_bo_free_kernel(&adev->mman.fw_reserved_memory, NULL,
+ NULL);
+ amdgpu_bo_free_kernel(&adev->mman.fw_reserved_memory_extend, NULL,
+ NULL);
+ if (adev->mman.stolen_reserved_size)
+ amdgpu_bo_free_kernel(&adev->mman.stolen_reserved_memory,
+ NULL, NULL);
+ }
+ amdgpu_bo_free_kernel(&adev->mman.sdma_access_bo, NULL,
+ &adev->mman.sdma_access_ptr);
+
+ amdgpu_ttm_mmio_remap_bo_fini(adev);
amdgpu_ttm_fw_reserve_vram_fini(adev);
+ amdgpu_ttm_drv_reserve_vram_fini(adev);
- if (adev->mman.aper_base_kaddr)
- iounmap(adev->mman.aper_base_kaddr);
- adev->mman.aper_base_kaddr = NULL;
+ if (drm_dev_enter(adev_to_drm(adev), &idx)) {
- amdgpu_vram_mgr_fini(adev);
+ if (adev->mman.aper_base_kaddr)
+ iounmap(adev->mman.aper_base_kaddr);
+ adev->mman.aper_base_kaddr = NULL;
+
+ drm_dev_exit(idx);
+ }
+
+ if (!adev->gmc.is_app_apu)
+ amdgpu_vram_mgr_fini(adev);
amdgpu_gtt_mgr_fini(adev);
+ amdgpu_preempt_mgr_fini(adev);
+ amdgpu_doorbell_fini(adev);
+
ttm_range_man_fini(&adev->mman.bdev, AMDGPU_PL_GDS);
ttm_range_man_fini(&adev->mman.bdev, AMDGPU_PL_GWS);
ttm_range_man_fini(&adev->mman.bdev, AMDGPU_PL_OA);
- ttm_bo_device_release(&adev->mman.bdev);
+ ttm_range_man_fini(&adev->mman.bdev, AMDGPU_PL_DOORBELL);
+ ttm_range_man_fini(&adev->mman.bdev, AMDGPU_PL_MMIO_REMAP);
+ ttm_device_fini(&adev->mman.bdev);
adev->mman.initialized = false;
- DRM_INFO("amdgpu: ttm finalized\n");
+ dev_info(adev->dev, "amdgpu: ttm finalized\n");
}
/**
@@ -1947,7 +2156,7 @@ void amdgpu_ttm_set_buffer_funcs_status(struct amdgpu_device *adev, bool enable)
int r;
if (!adev->mman.initialized || amdgpu_in_reset(adev) ||
- adev->mman.buffer_funcs_enabled == enable)
+ adev->mman.buffer_funcs_enabled == enable || adev->gmc.is_app_apu)
return;
if (enable) {
@@ -1956,18 +2165,32 @@ void amdgpu_ttm_set_buffer_funcs_status(struct amdgpu_device *adev, bool enable)
ring = adev->mman.buffer_funcs_ring;
sched = &ring->sched;
- r = drm_sched_entity_init(&adev->mman.entity,
+ r = drm_sched_entity_init(&adev->mman.high_pr,
DRM_SCHED_PRIORITY_KERNEL, &sched,
1, NULL);
if (r) {
- DRM_ERROR("Failed setting up TTM BO move entity (%d)\n",
- r);
+ dev_err(adev->dev,
+ "Failed setting up TTM BO move entity (%d)\n",
+ r);
return;
}
+
+ r = drm_sched_entity_init(&adev->mman.low_pr,
+ DRM_SCHED_PRIORITY_NORMAL, &sched,
+ 1, NULL);
+ if (r) {
+ dev_err(adev->dev,
+ "Failed setting up TTM BO move entity (%d)\n",
+ r);
+ goto error_free_entity;
+ }
} else {
- drm_sched_entity_destroy(&adev->mman.entity);
- dma_fence_put(man->move);
- man->move = NULL;
+ drm_sched_entity_destroy(&adev->mman.high_pr);
+ drm_sched_entity_destroy(&adev->mman.low_pr);
+ /* Drop all the old fences since re-creating the scheduler entities
+ * will allocate new contexts.
+ */
+ ttm_resource_manager_cleanup(man);
}
/* this just adjusts TTM size idea, which sets lpfn to the correct value */
@@ -1975,103 +2198,81 @@ void amdgpu_ttm_set_buffer_funcs_status(struct amdgpu_device *adev, bool enable)
size = adev->gmc.real_vram_size;
else
size = adev->gmc.visible_vram_size;
- man->size = size >> PAGE_SHIFT;
+ man->size = size;
adev->mman.buffer_funcs_enabled = enable;
-}
-static vm_fault_t amdgpu_ttm_fault(struct vm_fault *vmf)
-{
- struct ttm_buffer_object *bo = vmf->vma->vm_private_data;
- vm_fault_t ret;
+ return;
- ret = ttm_bo_vm_reserve(bo, vmf);
- if (ret)
- return ret;
-
- ret = amdgpu_bo_fault_reserve_notify(bo);
- if (ret)
- goto unlock;
-
- ret = ttm_bo_vm_fault_reserved(vmf, vmf->vma->vm_page_prot,
- TTM_BO_VM_NUM_PREFAULT, 1);
- if (ret == VM_FAULT_RETRY && !(vmf->flags & FAULT_FLAG_RETRY_NOWAIT))
- return ret;
-
-unlock:
- dma_resv_unlock(bo->base.resv);
- return ret;
+error_free_entity:
+ drm_sched_entity_destroy(&adev->mman.high_pr);
}
-static struct vm_operations_struct amdgpu_ttm_vm_ops = {
- .fault = amdgpu_ttm_fault,
- .open = ttm_bo_vm_open,
- .close = ttm_bo_vm_close,
- .access = ttm_bo_vm_access
-};
-
-int amdgpu_mmap(struct file *filp, struct vm_area_struct *vma)
+static int amdgpu_ttm_prepare_job(struct amdgpu_device *adev,
+ bool direct_submit,
+ unsigned int num_dw,
+ struct dma_resv *resv,
+ bool vm_needs_flush,
+ struct amdgpu_job **job,
+ bool delayed, u64 k_job_id)
{
- struct drm_file *file_priv = filp->private_data;
- struct amdgpu_device *adev = drm_to_adev(file_priv->minor->dev);
+ enum amdgpu_ib_pool_type pool = direct_submit ?
+ AMDGPU_IB_POOL_DIRECT :
+ AMDGPU_IB_POOL_DELAYED;
int r;
-
- r = ttm_bo_mmap(filp, vma, &adev->mman.bdev);
- if (unlikely(r != 0))
+ struct drm_sched_entity *entity = delayed ? &adev->mman.low_pr :
+ &adev->mman.high_pr;
+ r = amdgpu_job_alloc_with_ib(adev, entity,
+ AMDGPU_FENCE_OWNER_UNDEFINED,
+ num_dw * 4, pool, job, k_job_id);
+ if (r)
return r;
- vma->vm_ops = &amdgpu_ttm_vm_ops;
- return 0;
+ if (vm_needs_flush) {
+ (*job)->vm_pd_addr = amdgpu_gmc_pd_addr(adev->gmc.pdb0_bo ?
+ adev->gmc.pdb0_bo :
+ adev->gart.bo);
+ (*job)->vm_needs_flush = true;
+ }
+ if (!resv)
+ return 0;
+
+ return drm_sched_job_add_resv_dependencies(&(*job)->base, resv,
+ DMA_RESV_USAGE_BOOKKEEP);
}
int amdgpu_copy_buffer(struct amdgpu_ring *ring, uint64_t src_offset,
uint64_t dst_offset, uint32_t byte_count,
struct dma_resv *resv,
struct dma_fence **fence, bool direct_submit,
- bool vm_needs_flush, bool tmz)
+ bool vm_needs_flush, uint32_t copy_flags)
{
- enum amdgpu_ib_pool_type pool = direct_submit ? AMDGPU_IB_POOL_DIRECT :
- AMDGPU_IB_POOL_DELAYED;
struct amdgpu_device *adev = ring->adev;
+ unsigned int num_loops, num_dw;
struct amdgpu_job *job;
-
uint32_t max_bytes;
- unsigned num_loops, num_dw;
- unsigned i;
+ unsigned int i;
int r;
- if (direct_submit && !ring->sched.ready) {
- DRM_ERROR("Trying to move memory with ring turned off.\n");
+ if (!direct_submit && !ring->sched.ready) {
+ dev_err(adev->dev,
+ "Trying to move memory with ring turned off.\n");
return -EINVAL;
}
max_bytes = adev->mman.buffer_funcs->copy_max_bytes;
num_loops = DIV_ROUND_UP(byte_count, max_bytes);
num_dw = ALIGN(num_loops * adev->mman.buffer_funcs->copy_num_dw, 8);
-
- r = amdgpu_job_alloc_with_ib(adev, num_dw * 4, pool, &job);
+ r = amdgpu_ttm_prepare_job(adev, direct_submit, num_dw,
+ resv, vm_needs_flush, &job, false,
+ AMDGPU_KERNEL_JOB_ID_TTM_COPY_BUFFER);
if (r)
return r;
- if (vm_needs_flush) {
- job->vm_pd_addr = amdgpu_gmc_pd_addr(adev->gart.bo);
- job->vm_needs_flush = true;
- }
- if (resv) {
- r = amdgpu_sync_resv(adev, &job->sync, resv,
- AMDGPU_SYNC_ALWAYS,
- AMDGPU_FENCE_OWNER_UNDEFINED);
- if (r) {
- DRM_ERROR("sync failed (%d).\n", r);
- goto error_free;
- }
- }
-
for (i = 0; i < num_loops; i++) {
uint32_t cur_size_in_bytes = min(byte_count, max_bytes);
amdgpu_emit_copy_buffer(adev, &job->ibs[0], src_offset,
- dst_offset, cur_size_in_bytes, tmz);
-
+ dst_offset, cur_size_in_bytes, copy_flags);
src_offset += cur_size_in_bytes;
dst_offset += cur_size_in_bytes;
byte_count -= cur_size_in_bytes;
@@ -2082,8 +2283,7 @@ int amdgpu_copy_buffer(struct amdgpu_ring *ring, uint64_t src_offset,
if (direct_submit)
r = amdgpu_job_submit_direct(job, ring, fence);
else
- r = amdgpu_job_submit(job, &adev->mman.entity,
- AMDGPU_FENCE_OWNER_UNDEFINED, fence);
+ *fence = amdgpu_job_submit(job);
if (r)
goto error_free;
@@ -2091,136 +2291,206 @@ int amdgpu_copy_buffer(struct amdgpu_ring *ring, uint64_t src_offset,
error_free:
amdgpu_job_free(job);
- DRM_ERROR("Error scheduling IBs (%d)\n", r);
+ dev_err(adev->dev, "Error scheduling IBs (%d)\n", r);
return r;
}
-int amdgpu_fill_buffer(struct amdgpu_bo *bo,
- uint32_t src_data,
- struct dma_resv *resv,
- struct dma_fence **fence)
+static int amdgpu_ttm_fill_mem(struct amdgpu_ring *ring, uint32_t src_data,
+ uint64_t dst_addr, uint32_t byte_count,
+ struct dma_resv *resv,
+ struct dma_fence **fence,
+ bool vm_needs_flush, bool delayed,
+ u64 k_job_id)
{
- struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev);
- uint32_t max_bytes = adev->mman.buffer_funcs->fill_max_bytes;
- struct amdgpu_ring *ring = adev->mman.buffer_funcs_ring;
-
- struct drm_mm_node *mm_node;
- unsigned long num_pages;
+ struct amdgpu_device *adev = ring->adev;
unsigned int num_loops, num_dw;
-
struct amdgpu_job *job;
+ uint32_t max_bytes;
+ unsigned int i;
int r;
- if (!adev->mman.buffer_funcs_enabled) {
- DRM_ERROR("Trying to clear memory with ring turned off.\n");
- return -EINVAL;
- }
+ max_bytes = adev->mman.buffer_funcs->fill_max_bytes;
+ num_loops = DIV_ROUND_UP_ULL(byte_count, max_bytes);
+ num_dw = ALIGN(num_loops * adev->mman.buffer_funcs->fill_num_dw, 8);
+ r = amdgpu_ttm_prepare_job(adev, false, num_dw, resv, vm_needs_flush,
+ &job, delayed, k_job_id);
+ if (r)
+ return r;
- if (bo->tbo.mem.mem_type == TTM_PL_TT) {
- r = amdgpu_ttm_alloc_gart(&bo->tbo);
- if (r)
- return r;
- }
+ for (i = 0; i < num_loops; i++) {
+ uint32_t cur_size = min(byte_count, max_bytes);
- num_pages = bo->tbo.mem.num_pages;
- mm_node = bo->tbo.mem.mm_node;
- num_loops = 0;
- while (num_pages) {
- uint64_t byte_count = mm_node->size << PAGE_SHIFT;
+ amdgpu_emit_fill_buffer(adev, &job->ibs[0], src_data, dst_addr,
+ cur_size);
- num_loops += DIV_ROUND_UP_ULL(byte_count, max_bytes);
- num_pages -= mm_node->size;
- ++mm_node;
+ dst_addr += cur_size;
+ byte_count -= cur_size;
}
- num_dw = num_loops * adev->mman.buffer_funcs->fill_num_dw;
- /* for IB padding */
- num_dw += 64;
+ amdgpu_ring_pad_ib(ring, &job->ibs[0]);
+ WARN_ON(job->ibs[0].length_dw > num_dw);
+ *fence = amdgpu_job_submit(job);
+ return 0;
+}
- r = amdgpu_job_alloc_with_ib(adev, num_dw * 4, AMDGPU_IB_POOL_DELAYED,
- &job);
- if (r)
- return r;
+/**
+ * amdgpu_ttm_clear_buffer - clear memory buffers
+ * @bo: amdgpu buffer object
+ * @resv: reservation object
+ * @fence: dma_fence associated with the operation
+ *
+ * Clear the memory buffer resource.
+ *
+ * Returns:
+ * 0 for success or a negative error code on failure.
+ */
+int amdgpu_ttm_clear_buffer(struct amdgpu_bo *bo,
+ struct dma_resv *resv,
+ struct dma_fence **fence)
+{
+ struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev);
+ struct amdgpu_ring *ring = adev->mman.buffer_funcs_ring;
+ struct amdgpu_res_cursor cursor;
+ u64 addr;
+ int r = 0;
- if (resv) {
- r = amdgpu_sync_resv(adev, &job->sync, resv,
- AMDGPU_SYNC_ALWAYS,
- AMDGPU_FENCE_OWNER_UNDEFINED);
- if (r) {
- DRM_ERROR("sync failed (%d).\n", r);
- goto error_free;
- }
- }
+ if (!adev->mman.buffer_funcs_enabled)
+ return -EINVAL;
- num_pages = bo->tbo.mem.num_pages;
- mm_node = bo->tbo.mem.mm_node;
+ if (!fence)
+ return -EINVAL;
- while (num_pages) {
- uint64_t byte_count = mm_node->size << PAGE_SHIFT;
- uint64_t dst_addr;
+ *fence = dma_fence_get_stub();
- dst_addr = amdgpu_mm_node_addr(&bo->tbo, mm_node, &bo->tbo.mem);
- while (byte_count) {
- uint32_t cur_size_in_bytes = min_t(uint64_t, byte_count,
- max_bytes);
+ amdgpu_res_first(bo->tbo.resource, 0, amdgpu_bo_size(bo), &cursor);
- amdgpu_emit_fill_buffer(adev, &job->ibs[0], src_data,
- dst_addr, cur_size_in_bytes);
+ mutex_lock(&adev->mman.gtt_window_lock);
+ while (cursor.remaining) {
+ struct dma_fence *next = NULL;
+ u64 size;
- dst_addr += cur_size_in_bytes;
- byte_count -= cur_size_in_bytes;
+ if (amdgpu_res_cleared(&cursor)) {
+ amdgpu_res_next(&cursor, cursor.size);
+ continue;
}
- num_pages -= mm_node->size;
- ++mm_node;
- }
+ /* Never clear more than 256MiB at once to avoid timeouts */
+ size = min(cursor.size, 256ULL << 20);
- amdgpu_ring_pad_ib(ring, &job->ibs[0]);
- WARN_ON(job->ibs[0].length_dw > num_dw);
- r = amdgpu_job_submit(job, &adev->mman.entity,
- AMDGPU_FENCE_OWNER_UNDEFINED, fence);
- if (r)
- goto error_free;
+ r = amdgpu_ttm_map_buffer(&bo->tbo, bo->tbo.resource, &cursor,
+ 1, ring, false, &size, &addr);
+ if (r)
+ goto err;
- return 0;
+ r = amdgpu_ttm_fill_mem(ring, 0, addr, size, resv,
+ &next, true, true,
+ AMDGPU_KERNEL_JOB_ID_TTM_CLEAR_BUFFER);
+ if (r)
+ goto err;
+
+ dma_fence_put(*fence);
+ *fence = next;
+
+ amdgpu_res_next(&cursor, size);
+ }
+err:
+ mutex_unlock(&adev->mman.gtt_window_lock);
-error_free:
- amdgpu_job_free(job);
return r;
}
-#if defined(CONFIG_DEBUG_FS)
+int amdgpu_fill_buffer(struct amdgpu_bo *bo,
+ uint32_t src_data,
+ struct dma_resv *resv,
+ struct dma_fence **f,
+ bool delayed,
+ u64 k_job_id)
+{
+ struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev);
+ struct amdgpu_ring *ring = adev->mman.buffer_funcs_ring;
+ struct dma_fence *fence = NULL;
+ struct amdgpu_res_cursor dst;
+ int r;
+
+ if (!adev->mman.buffer_funcs_enabled) {
+ dev_err(adev->dev,
+ "Trying to clear memory with ring turned off.\n");
+ return -EINVAL;
+ }
+
+ amdgpu_res_first(bo->tbo.resource, 0, amdgpu_bo_size(bo), &dst);
+
+ mutex_lock(&adev->mman.gtt_window_lock);
+ while (dst.remaining) {
+ struct dma_fence *next;
+ uint64_t cur_size, to;
+
+ /* Never fill more than 256MiB at once to avoid timeouts */
+ cur_size = min(dst.size, 256ULL << 20);
+
+ r = amdgpu_ttm_map_buffer(&bo->tbo, bo->tbo.resource, &dst,
+ 1, ring, false, &cur_size, &to);
+ if (r)
+ goto error;
+
+ r = amdgpu_ttm_fill_mem(ring, src_data, to, cur_size, resv,
+ &next, true, delayed, k_job_id);
+ if (r)
+ goto error;
+
+ dma_fence_put(fence);
+ fence = next;
-static int amdgpu_mm_dump_table(struct seq_file *m, void *data)
+ amdgpu_res_next(&dst, cur_size);
+ }
+error:
+ mutex_unlock(&adev->mman.gtt_window_lock);
+ if (f)
+ *f = dma_fence_get(fence);
+ dma_fence_put(fence);
+ return r;
+}
+
+/**
+ * amdgpu_ttm_evict_resources - evict memory buffers
+ * @adev: amdgpu device object
+ * @mem_type: evicted BO's memory type
+ *
+ * Evicts all @mem_type buffers on the lru list of the memory type.
+ *
+ * Returns:
+ * 0 for success or a negative error code on failure.
+ */
+int amdgpu_ttm_evict_resources(struct amdgpu_device *adev, int mem_type)
{
- struct drm_info_node *node = (struct drm_info_node *)m->private;
- unsigned ttm_pl = (uintptr_t)node->info_ent->data;
- struct drm_device *dev = node->minor->dev;
- struct amdgpu_device *adev = drm_to_adev(dev);
- struct ttm_resource_manager *man = ttm_manager_type(&adev->mman.bdev, ttm_pl);
- struct drm_printer p = drm_seq_file_printer(m);
+ struct ttm_resource_manager *man;
- man->func->debug(man, &p);
- return 0;
+ switch (mem_type) {
+ case TTM_PL_VRAM:
+ case TTM_PL_TT:
+ case AMDGPU_PL_GWS:
+ case AMDGPU_PL_GDS:
+ case AMDGPU_PL_OA:
+ man = ttm_manager_type(&adev->mman.bdev, mem_type);
+ break;
+ default:
+ dev_err(adev->dev, "Trying to evict invalid memory type\n");
+ return -EINVAL;
+ }
+
+ return ttm_resource_manager_evict_all(&adev->mman.bdev, man);
}
-static int amdgpu_ttm_pool_debugfs(struct seq_file *m, void *data)
+#if defined(CONFIG_DEBUG_FS)
+
+static int amdgpu_ttm_page_pool_show(struct seq_file *m, void *unused)
{
- struct drm_info_node *node = (struct drm_info_node *)m->private;
- struct drm_device *dev = node->minor->dev;
- struct amdgpu_device *adev = drm_to_adev(dev);
+ struct amdgpu_device *adev = m->private;
return ttm_pool_debugfs(&adev->mman.bdev.pool, m);
}
-static const struct drm_info_list amdgpu_ttm_debugfs_list[] = {
- {"amdgpu_vram_mm", amdgpu_mm_dump_table, 0, (void *)TTM_PL_VRAM},
- {"amdgpu_gtt_mm", amdgpu_mm_dump_table, 0, (void *)TTM_PL_TT},
- {"amdgpu_gds_mm", amdgpu_mm_dump_table, 0, (void *)AMDGPU_PL_GDS},
- {"amdgpu_gws_mm", amdgpu_mm_dump_table, 0, (void *)AMDGPU_PL_GWS},
- {"amdgpu_oa_mm", amdgpu_mm_dump_table, 0, (void *)AMDGPU_PL_OA},
- {"ttm_page_pool", amdgpu_ttm_pool_debugfs, 0, NULL},
-};
+DEFINE_SHOW_ATTRIBUTE(amdgpu_ttm_page_pool);
/*
* amdgpu_ttm_vram_read - Linear read access to VRAM
@@ -2276,7 +2546,6 @@ static ssize_t amdgpu_ttm_vram_write(struct file *f, const char __user *buf,
return -ENXIO;
while (size) {
- unsigned long flags;
uint32_t value;
if (*pos >= adev->gmc.mc_vram_size)
@@ -2286,11 +2555,7 @@ static ssize_t amdgpu_ttm_vram_write(struct file *f, const char __user *buf,
if (r)
return r;
- spin_lock_irqsave(&adev->mmio_idx_lock, flags);
- WREG32_NO_KIQ(mmMM_INDEX, ((uint32_t)*pos) | 0x80000000);
- WREG32_NO_KIQ(mmMM_INDEX_HI, *pos >> 31);
- WREG32_NO_KIQ(mmMM_DATA, value);
- spin_unlock_irqrestore(&adev->mmio_idx_lock, flags);
+ amdgpu_device_mm_access(adev, *pos, &value, 4, true);
result += 4;
buf += 4;
@@ -2308,58 +2573,6 @@ static const struct file_operations amdgpu_ttm_vram_fops = {
.llseek = default_llseek,
};
-#ifdef CONFIG_DRM_AMDGPU_GART_DEBUGFS
-
-/*
- * amdgpu_ttm_gtt_read - Linear read access to GTT memory
- */
-static ssize_t amdgpu_ttm_gtt_read(struct file *f, char __user *buf,
- size_t size, loff_t *pos)
-{
- struct amdgpu_device *adev = file_inode(f)->i_private;
- ssize_t result = 0;
- int r;
-
- while (size) {
- loff_t p = *pos / PAGE_SIZE;
- unsigned off = *pos & ~PAGE_MASK;
- size_t cur_size = min_t(size_t, size, PAGE_SIZE - off);
- struct page *page;
- void *ptr;
-
- if (p >= adev->gart.num_cpu_pages)
- return result;
-
- page = adev->gart.pages[p];
- if (page) {
- ptr = kmap(page);
- ptr += off;
-
- r = copy_to_user(buf, ptr, cur_size);
- kunmap(adev->gart.pages[p]);
- } else
- r = clear_user(buf, cur_size);
-
- if (r)
- return -EFAULT;
-
- result += cur_size;
- buf += cur_size;
- *pos += cur_size;
- size -= cur_size;
- }
-
- return result;
-}
-
-static const struct file_operations amdgpu_ttm_gtt_fops = {
- .owner = THIS_MODULE,
- .read = amdgpu_ttm_gtt_read,
- .llseek = default_llseek
-};
-
-#endif
-
/*
* amdgpu_iomem_read - Virtual read access to GPU mapped memory
*
@@ -2386,7 +2599,7 @@ static ssize_t amdgpu_iomem_read(struct file *f, char __user *buf,
struct page *p;
void *ptr;
- bytes = bytes < size ? bytes : size;
+ bytes = min(bytes, size);
/* Translate the bus address to a physical address. If
* the domain is NULL it means there is no IOMMU active
@@ -2402,9 +2615,9 @@ static ssize_t amdgpu_iomem_read(struct file *f, char __user *buf,
if (p->mapping != adev->mman.bdev.dev_mapping)
return -EPERM;
- ptr = kmap(p);
+ ptr = kmap_local_page(p);
r = copy_to_user(buf, ptr + off, bytes);
- kunmap(p);
+ kunmap_local(ptr);
if (r)
return -EFAULT;
@@ -2441,7 +2654,7 @@ static ssize_t amdgpu_iomem_write(struct file *f, const char __user *buf,
struct page *p;
void *ptr;
- bytes = bytes < size ? bytes : size;
+ bytes = min(bytes, size);
addr = dom ? iommu_iova_to_phys(dom, addr) : addr;
@@ -2453,9 +2666,9 @@ static ssize_t amdgpu_iomem_write(struct file *f, const char __user *buf,
if (p->mapping != adev->mman.bdev.dev_mapping)
return -EPERM;
- ptr = kmap(p);
+ ptr = kmap_local_page(p);
r = copy_from_user(ptr + off, buf, bytes);
- kunmap(p);
+ kunmap_local(ptr);
if (r)
return -EFAULT;
@@ -2474,46 +2687,35 @@ static const struct file_operations amdgpu_ttm_iomem_fops = {
.llseek = default_llseek
};
-static const struct {
- char *name;
- const struct file_operations *fops;
- int domain;
-} ttm_debugfs_entries[] = {
- { "amdgpu_vram", &amdgpu_ttm_vram_fops, TTM_PL_VRAM },
-#ifdef CONFIG_DRM_AMDGPU_GART_DEBUGFS
- { "amdgpu_gtt", &amdgpu_ttm_gtt_fops, TTM_PL_TT },
#endif
- { "amdgpu_iomem", &amdgpu_ttm_iomem_fops, TTM_PL_SYSTEM },
-};
-#endif
-
-int amdgpu_ttm_debugfs_init(struct amdgpu_device *adev)
+void amdgpu_ttm_debugfs_init(struct amdgpu_device *adev)
{
#if defined(CONFIG_DEBUG_FS)
- unsigned count;
-
struct drm_minor *minor = adev_to_drm(adev)->primary;
- struct dentry *ent, *root = minor->debugfs_root;
-
- for (count = 0; count < ARRAY_SIZE(ttm_debugfs_entries); count++) {
- ent = debugfs_create_file(
- ttm_debugfs_entries[count].name,
- S_IFREG | S_IRUGO, root,
- adev,
- ttm_debugfs_entries[count].fops);
- if (IS_ERR(ent))
- return PTR_ERR(ent);
- if (ttm_debugfs_entries[count].domain == TTM_PL_VRAM)
- i_size_write(ent->d_inode, adev->gmc.mc_vram_size);
- else if (ttm_debugfs_entries[count].domain == TTM_PL_TT)
- i_size_write(ent->d_inode, adev->gmc.gart_size);
- adev->mman.debugfs_entries[count] = ent;
- }
-
- count = ARRAY_SIZE(amdgpu_ttm_debugfs_list);
- return amdgpu_debugfs_add_files(adev, amdgpu_ttm_debugfs_list, count);
-#else
- return 0;
+ struct dentry *root = minor->debugfs_root;
+
+ debugfs_create_file_size("amdgpu_vram", 0444, root, adev,
+ &amdgpu_ttm_vram_fops, adev->gmc.mc_vram_size);
+ debugfs_create_file("amdgpu_iomem", 0444, root, adev,
+ &amdgpu_ttm_iomem_fops);
+ debugfs_create_file("ttm_page_pool", 0444, root, adev,
+ &amdgpu_ttm_page_pool_fops);
+ ttm_resource_manager_create_debugfs(ttm_manager_type(&adev->mman.bdev,
+ TTM_PL_VRAM),
+ root, "amdgpu_vram_mm");
+ ttm_resource_manager_create_debugfs(ttm_manager_type(&adev->mman.bdev,
+ TTM_PL_TT),
+ root, "amdgpu_gtt_mm");
+ ttm_resource_manager_create_debugfs(ttm_manager_type(&adev->mman.bdev,
+ AMDGPU_PL_GDS),
+ root, "amdgpu_gds_mm");
+ ttm_resource_manager_create_debugfs(ttm_manager_type(&adev->mman.bdev,
+ AMDGPU_PL_GWS),
+ root, "amdgpu_gws_mm");
+ ttm_resource_manager_create_debugfs(ttm_manager_type(&adev->mman.bdev,
+ AMDGPU_PL_OA),
+ root, "amdgpu_oa_mm");
+
#endif
}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h
index d2987536d7cd..577ee04ce0bf 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h
@@ -26,59 +26,52 @@
#include <linux/dma-direction.h>
#include <drm/gpu_scheduler.h>
-#include "amdgpu.h"
+#include <drm/ttm/ttm_placement.h>
+#include "amdgpu_vram_mgr.h"
+#include "amdgpu_hmm.h"
#define AMDGPU_PL_GDS (TTM_PL_PRIV + 0)
#define AMDGPU_PL_GWS (TTM_PL_PRIV + 1)
#define AMDGPU_PL_OA (TTM_PL_PRIV + 2)
+#define AMDGPU_PL_PREEMPT (TTM_PL_PRIV + 3)
+#define AMDGPU_PL_DOORBELL (TTM_PL_PRIV + 4)
+#define AMDGPU_PL_MMIO_REMAP (TTM_PL_PRIV + 5)
+#define __AMDGPU_PL_NUM (TTM_PL_PRIV + 6)
#define AMDGPU_GTT_MAX_TRANSFER_SIZE 512
#define AMDGPU_GTT_NUM_TRANSFER_WINDOWS 2
-#define AMDGPU_POISON 0xd0bed0be
+extern const struct attribute_group amdgpu_vram_mgr_attr_group;
+extern const struct attribute_group amdgpu_gtt_mgr_attr_group;
-struct amdgpu_vram_reservation {
- struct list_head node;
- struct drm_mm_node mm_node;
-};
-
-struct amdgpu_vram_mgr {
- struct ttm_resource_manager manager;
- struct drm_mm mm;
- spinlock_t lock;
- struct list_head reservations_pending;
- struct list_head reserved_pages;
- atomic64_t usage;
- atomic64_t vis_usage;
-};
+struct hmm_range;
struct amdgpu_gtt_mgr {
struct ttm_resource_manager manager;
struct drm_mm mm;
spinlock_t lock;
- atomic64_t available;
};
struct amdgpu_mman {
- struct ttm_bo_device bdev;
+ struct ttm_device bdev;
+ struct ttm_pool *ttm_pools;
bool initialized;
void __iomem *aper_base_kaddr;
-#if defined(CONFIG_DEBUG_FS)
- struct dentry *debugfs_entries[8];
-#endif
-
/* buffer handling */
const struct amdgpu_buffer_funcs *buffer_funcs;
struct amdgpu_ring *buffer_funcs_ring;
bool buffer_funcs_enabled;
struct mutex gtt_window_lock;
- /* Scheduler entity for buffer moves */
- struct drm_sched_entity entity;
+ /* High priority scheduler entity for buffer moves */
+ struct drm_sched_entity high_pr;
+ /* Low priority scheduler entity for VRAM clearing */
+ struct drm_sched_entity low_pr;
struct amdgpu_vram_mgr vram_mgr;
struct amdgpu_gtt_mgr gtt_mgr;
+ struct ttm_resource_manager preempt_mgr;
uint64_t stolen_vga_size;
struct amdgpu_bo *stolen_vga_memory;
@@ -86,16 +79,29 @@ struct amdgpu_mman {
struct amdgpu_bo *stolen_extended_memory;
bool keep_stolen_vga_memory;
- /* discovery */
- uint8_t *discovery_bin;
- uint32_t discovery_tmr_size;
- struct amdgpu_bo *discovery_memory;
+ struct amdgpu_bo *stolen_reserved_memory;
+ uint64_t stolen_reserved_offset;
+ uint64_t stolen_reserved_size;
+
+ /* fw reserved memory */
+ struct amdgpu_bo *fw_reserved_memory;
+ struct amdgpu_bo *fw_reserved_memory_extend;
/* firmware VRAM reservation */
u64 fw_vram_usage_start_offset;
u64 fw_vram_usage_size;
struct amdgpu_bo *fw_vram_usage_reserved_bo;
void *fw_vram_usage_va;
+
+ /* driver VRAM reservation */
+ u64 drv_vram_usage_start_offset;
+ u64 drv_vram_usage_size;
+ struct amdgpu_bo *drv_vram_usage_reserved_bo;
+ void *drv_vram_usage_va;
+
+ /* PAGE_SIZE'd BO for process memory r/w over SDMA. */
+ struct amdgpu_bo *sdma_access_bo;
+ void *sdma_access_ptr;
};
struct amdgpu_copy_mem {
@@ -104,80 +110,98 @@ struct amdgpu_copy_mem {
unsigned long offset;
};
+#define AMDGPU_COPY_FLAGS_TMZ (1 << 0)
+#define AMDGPU_COPY_FLAGS_READ_DECOMPRESSED (1 << 1)
+#define AMDGPU_COPY_FLAGS_WRITE_COMPRESSED (1 << 2)
+#define AMDGPU_COPY_FLAGS_MAX_COMPRESSED_SHIFT 3
+#define AMDGPU_COPY_FLAGS_MAX_COMPRESSED_MASK 0x03
+#define AMDGPU_COPY_FLAGS_NUMBER_TYPE_SHIFT 5
+#define AMDGPU_COPY_FLAGS_NUMBER_TYPE_MASK 0x07
+#define AMDGPU_COPY_FLAGS_DATA_FORMAT_SHIFT 8
+#define AMDGPU_COPY_FLAGS_DATA_FORMAT_MASK 0x3f
+#define AMDGPU_COPY_FLAGS_WRITE_COMPRESS_DISABLE_SHIFT 14
+#define AMDGPU_COPY_FLAGS_WRITE_COMPRESS_DISABLE_MASK 0x1
+
+#define AMDGPU_COPY_FLAGS_SET(field, value) \
+ (((__u32)(value) & AMDGPU_COPY_FLAGS_##field##_MASK) << AMDGPU_COPY_FLAGS_##field##_SHIFT)
+#define AMDGPU_COPY_FLAGS_GET(value, field) \
+ (((__u32)(value) >> AMDGPU_COPY_FLAGS_##field##_SHIFT) & AMDGPU_COPY_FLAGS_##field##_MASK)
+
int amdgpu_gtt_mgr_init(struct amdgpu_device *adev, uint64_t gtt_size);
void amdgpu_gtt_mgr_fini(struct amdgpu_device *adev);
+int amdgpu_preempt_mgr_init(struct amdgpu_device *adev);
+void amdgpu_preempt_mgr_fini(struct amdgpu_device *adev);
int amdgpu_vram_mgr_init(struct amdgpu_device *adev);
void amdgpu_vram_mgr_fini(struct amdgpu_device *adev);
bool amdgpu_gtt_mgr_has_gart_addr(struct ttm_resource *mem);
-uint64_t amdgpu_gtt_mgr_usage(struct ttm_resource_manager *man);
-int amdgpu_gtt_mgr_recover(struct ttm_resource_manager *man);
+void amdgpu_gtt_mgr_recover(struct amdgpu_gtt_mgr *mgr);
+
+uint64_t amdgpu_preempt_mgr_usage(struct ttm_resource_manager *man);
u64 amdgpu_vram_mgr_bo_visible_size(struct amdgpu_bo *bo);
int amdgpu_vram_mgr_alloc_sgt(struct amdgpu_device *adev,
struct ttm_resource *mem,
+ u64 offset, u64 size,
struct device *dev,
enum dma_data_direction dir,
struct sg_table **sgt);
-void amdgpu_vram_mgr_free_sgt(struct amdgpu_device *adev,
- struct device *dev,
+void amdgpu_vram_mgr_free_sgt(struct device *dev,
enum dma_data_direction dir,
struct sg_table *sgt);
-uint64_t amdgpu_vram_mgr_usage(struct ttm_resource_manager *man);
-uint64_t amdgpu_vram_mgr_vis_usage(struct ttm_resource_manager *man);
-int amdgpu_vram_mgr_reserve_range(struct ttm_resource_manager *man,
+uint64_t amdgpu_vram_mgr_vis_usage(struct amdgpu_vram_mgr *mgr);
+int amdgpu_vram_mgr_reserve_range(struct amdgpu_vram_mgr *mgr,
uint64_t start, uint64_t size);
-int amdgpu_vram_mgr_query_page_status(struct ttm_resource_manager *man,
+int amdgpu_vram_mgr_query_page_status(struct amdgpu_vram_mgr *mgr,
uint64_t start);
+void amdgpu_vram_mgr_clear_reset_blocks(struct amdgpu_device *adev);
+
+bool amdgpu_res_cpu_visible(struct amdgpu_device *adev,
+ struct ttm_resource *res);
int amdgpu_ttm_init(struct amdgpu_device *adev);
void amdgpu_ttm_fini(struct amdgpu_device *adev);
void amdgpu_ttm_set_buffer_funcs_status(struct amdgpu_device *adev,
bool enable);
-
int amdgpu_copy_buffer(struct amdgpu_ring *ring, uint64_t src_offset,
uint64_t dst_offset, uint32_t byte_count,
struct dma_resv *resv,
struct dma_fence **fence, bool direct_submit,
- bool vm_needs_flush, bool tmz);
-int amdgpu_ttm_copy_mem_to_mem(struct amdgpu_device *adev,
- const struct amdgpu_copy_mem *src,
- const struct amdgpu_copy_mem *dst,
- uint64_t size, bool tmz,
- struct dma_resv *resv,
- struct dma_fence **f);
+ bool vm_needs_flush, uint32_t copy_flags);
+int amdgpu_ttm_clear_buffer(struct amdgpu_bo *bo,
+ struct dma_resv *resv,
+ struct dma_fence **fence);
int amdgpu_fill_buffer(struct amdgpu_bo *bo,
uint32_t src_data,
struct dma_resv *resv,
- struct dma_fence **fence);
+ struct dma_fence **fence,
+ bool delayed,
+ u64 k_job_id);
-int amdgpu_mmap(struct file *filp, struct vm_area_struct *vma);
int amdgpu_ttm_alloc_gart(struct ttm_buffer_object *bo);
-int amdgpu_ttm_recover_gart(struct ttm_buffer_object *tbo);
+void amdgpu_ttm_recover_gart(struct ttm_buffer_object *tbo);
uint64_t amdgpu_ttm_domain_start(struct amdgpu_device *adev, uint32_t type);
#if IS_ENABLED(CONFIG_DRM_AMDGPU_USERPTR)
-int amdgpu_ttm_tt_get_user_pages(struct amdgpu_bo *bo, struct page **pages);
-bool amdgpu_ttm_tt_get_user_pages_done(struct ttm_tt *ttm);
+int amdgpu_ttm_tt_get_user_pages(struct amdgpu_bo *bo,
+ struct amdgpu_hmm_range *range);
#else
static inline int amdgpu_ttm_tt_get_user_pages(struct amdgpu_bo *bo,
- struct page **pages)
+ struct amdgpu_hmm_range *range)
{
return -EPERM;
}
-static inline bool amdgpu_ttm_tt_get_user_pages_done(struct ttm_tt *ttm)
-{
- return false;
-}
#endif
-void amdgpu_ttm_tt_set_user_pages(struct ttm_tt *ttm, struct page **pages);
+void amdgpu_ttm_tt_set_user_pages(struct ttm_tt *ttm, struct amdgpu_hmm_range *range);
+int amdgpu_ttm_tt_get_userptr(const struct ttm_buffer_object *tbo,
+ uint64_t *user_addr);
int amdgpu_ttm_tt_set_userptr(struct ttm_buffer_object *bo,
uint64_t addr, uint32_t flags);
bool amdgpu_ttm_tt_has_userptr(struct ttm_tt *ttm);
struct mm_struct *amdgpu_ttm_tt_get_usermm(struct ttm_tt *ttm);
bool amdgpu_ttm_tt_affect_userptr(struct ttm_tt *ttm, unsigned long start,
- unsigned long end);
+ unsigned long end, unsigned long *userptr);
bool amdgpu_ttm_tt_userptr_invalidated(struct ttm_tt *ttm,
int *last_invalidated);
bool amdgpu_ttm_tt_is_userptr(struct ttm_tt *ttm);
@@ -185,7 +209,8 @@ bool amdgpu_ttm_tt_is_readonly(struct ttm_tt *ttm);
uint64_t amdgpu_ttm_tt_pde_flags(struct ttm_tt *ttm, struct ttm_resource *mem);
uint64_t amdgpu_ttm_tt_pte_flags(struct amdgpu_device *adev, struct ttm_tt *ttm,
struct ttm_resource *mem);
+int amdgpu_ttm_evict_resources(struct amdgpu_device *adev, int mem_type);
-int amdgpu_ttm_debugfs_init(struct amdgpu_device *adev);
+void amdgpu_ttm_debugfs_init(struct amdgpu_device *adev);
#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c
index 1beb08af347f..e96f24e9ad57 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c
@@ -28,6 +28,13 @@
#include "amdgpu.h"
#include "amdgpu_ucode.h"
+#define AMDGPU_UCODE_NAME_MAX (128)
+
+static const struct kicker_device kicker_device_list[] = {
+ {0x744B, 0x00},
+ {0x7551, 0xC8}
+};
+
static void amdgpu_ucode_print_common_hdr(const struct common_firmware_header *hdr)
{
DRM_DEBUG("size_bytes: %u\n", le32_to_cpu(hdr->size_bytes));
@@ -115,6 +122,12 @@ void amdgpu_ucode_print_gfx_hdr(const struct common_firmware_header *hdr)
le32_to_cpu(gfx_hdr->ucode_feature_version));
DRM_DEBUG("jt_offset: %u\n", le32_to_cpu(gfx_hdr->jt_offset));
DRM_DEBUG("jt_size: %u\n", le32_to_cpu(gfx_hdr->jt_size));
+ } else if (version_major == 2) {
+ const struct gfx_firmware_header_v2_0 *gfx_hdr =
+ container_of(hdr, struct gfx_firmware_header_v2_0, header);
+
+ DRM_DEBUG("ucode_feature_version: %u\n",
+ le32_to_cpu(gfx_hdr->ucode_feature_version));
} else {
DRM_ERROR("Unknown GFX ucode version: %u.%u\n", version_major, version_minor);
}
@@ -145,70 +158,138 @@ void amdgpu_ucode_print_rlc_hdr(const struct common_firmware_header *hdr)
} else if (version_major == 2) {
const struct rlc_firmware_header_v2_0 *rlc_hdr =
container_of(hdr, struct rlc_firmware_header_v2_0, header);
+ const struct rlc_firmware_header_v2_1 *rlc_hdr_v2_1 =
+ container_of(rlc_hdr, struct rlc_firmware_header_v2_1, v2_0);
+ const struct rlc_firmware_header_v2_2 *rlc_hdr_v2_2 =
+ container_of(rlc_hdr_v2_1, struct rlc_firmware_header_v2_2, v2_1);
+ const struct rlc_firmware_header_v2_3 *rlc_hdr_v2_3 =
+ container_of(rlc_hdr_v2_2, struct rlc_firmware_header_v2_3, v2_2);
+ const struct rlc_firmware_header_v2_4 *rlc_hdr_v2_4 =
+ container_of(rlc_hdr_v2_3, struct rlc_firmware_header_v2_4, v2_3);
- DRM_DEBUG("ucode_feature_version: %u\n",
- le32_to_cpu(rlc_hdr->ucode_feature_version));
- DRM_DEBUG("jt_offset: %u\n", le32_to_cpu(rlc_hdr->jt_offset));
- DRM_DEBUG("jt_size: %u\n", le32_to_cpu(rlc_hdr->jt_size));
- DRM_DEBUG("save_and_restore_offset: %u\n",
- le32_to_cpu(rlc_hdr->save_and_restore_offset));
- DRM_DEBUG("clear_state_descriptor_offset: %u\n",
- le32_to_cpu(rlc_hdr->clear_state_descriptor_offset));
- DRM_DEBUG("avail_scratch_ram_locations: %u\n",
- le32_to_cpu(rlc_hdr->avail_scratch_ram_locations));
- DRM_DEBUG("reg_restore_list_size: %u\n",
- le32_to_cpu(rlc_hdr->reg_restore_list_size));
- DRM_DEBUG("reg_list_format_start: %u\n",
- le32_to_cpu(rlc_hdr->reg_list_format_start));
- DRM_DEBUG("reg_list_format_separate_start: %u\n",
- le32_to_cpu(rlc_hdr->reg_list_format_separate_start));
- DRM_DEBUG("starting_offsets_start: %u\n",
- le32_to_cpu(rlc_hdr->starting_offsets_start));
- DRM_DEBUG("reg_list_format_size_bytes: %u\n",
- le32_to_cpu(rlc_hdr->reg_list_format_size_bytes));
- DRM_DEBUG("reg_list_format_array_offset_bytes: %u\n",
- le32_to_cpu(rlc_hdr->reg_list_format_array_offset_bytes));
- DRM_DEBUG("reg_list_size_bytes: %u\n",
- le32_to_cpu(rlc_hdr->reg_list_size_bytes));
- DRM_DEBUG("reg_list_array_offset_bytes: %u\n",
- le32_to_cpu(rlc_hdr->reg_list_array_offset_bytes));
- DRM_DEBUG("reg_list_format_separate_size_bytes: %u\n",
- le32_to_cpu(rlc_hdr->reg_list_format_separate_size_bytes));
- DRM_DEBUG("reg_list_format_separate_array_offset_bytes: %u\n",
- le32_to_cpu(rlc_hdr->reg_list_format_separate_array_offset_bytes));
- DRM_DEBUG("reg_list_separate_size_bytes: %u\n",
- le32_to_cpu(rlc_hdr->reg_list_separate_size_bytes));
- DRM_DEBUG("reg_list_separate_array_offset_bytes: %u\n",
- le32_to_cpu(rlc_hdr->reg_list_separate_array_offset_bytes));
- if (version_minor == 1) {
- const struct rlc_firmware_header_v2_1 *v2_1 =
- container_of(rlc_hdr, struct rlc_firmware_header_v2_1, v2_0);
+ switch (version_minor) {
+ case 0:
+ /* rlc_hdr v2_0 */
+ DRM_DEBUG("ucode_feature_version: %u\n",
+ le32_to_cpu(rlc_hdr->ucode_feature_version));
+ DRM_DEBUG("jt_offset: %u\n", le32_to_cpu(rlc_hdr->jt_offset));
+ DRM_DEBUG("jt_size: %u\n", le32_to_cpu(rlc_hdr->jt_size));
+ DRM_DEBUG("save_and_restore_offset: %u\n",
+ le32_to_cpu(rlc_hdr->save_and_restore_offset));
+ DRM_DEBUG("clear_state_descriptor_offset: %u\n",
+ le32_to_cpu(rlc_hdr->clear_state_descriptor_offset));
+ DRM_DEBUG("avail_scratch_ram_locations: %u\n",
+ le32_to_cpu(rlc_hdr->avail_scratch_ram_locations));
+ DRM_DEBUG("reg_restore_list_size: %u\n",
+ le32_to_cpu(rlc_hdr->reg_restore_list_size));
+ DRM_DEBUG("reg_list_format_start: %u\n",
+ le32_to_cpu(rlc_hdr->reg_list_format_start));
+ DRM_DEBUG("reg_list_format_separate_start: %u\n",
+ le32_to_cpu(rlc_hdr->reg_list_format_separate_start));
+ DRM_DEBUG("starting_offsets_start: %u\n",
+ le32_to_cpu(rlc_hdr->starting_offsets_start));
+ DRM_DEBUG("reg_list_format_size_bytes: %u\n",
+ le32_to_cpu(rlc_hdr->reg_list_format_size_bytes));
+ DRM_DEBUG("reg_list_format_array_offset_bytes: %u\n",
+ le32_to_cpu(rlc_hdr->reg_list_format_array_offset_bytes));
+ DRM_DEBUG("reg_list_size_bytes: %u\n",
+ le32_to_cpu(rlc_hdr->reg_list_size_bytes));
+ DRM_DEBUG("reg_list_array_offset_bytes: %u\n",
+ le32_to_cpu(rlc_hdr->reg_list_array_offset_bytes));
+ DRM_DEBUG("reg_list_format_separate_size_bytes: %u\n",
+ le32_to_cpu(rlc_hdr->reg_list_format_separate_size_bytes));
+ DRM_DEBUG("reg_list_format_separate_array_offset_bytes: %u\n",
+ le32_to_cpu(rlc_hdr->reg_list_format_separate_array_offset_bytes));
+ DRM_DEBUG("reg_list_separate_size_bytes: %u\n",
+ le32_to_cpu(rlc_hdr->reg_list_separate_size_bytes));
+ DRM_DEBUG("reg_list_separate_array_offset_bytes: %u\n",
+ le32_to_cpu(rlc_hdr->reg_list_separate_array_offset_bytes));
+ break;
+ case 1:
+ /* rlc_hdr v2_1 */
DRM_DEBUG("reg_list_format_direct_reg_list_length: %u\n",
- le32_to_cpu(v2_1->reg_list_format_direct_reg_list_length));
+ le32_to_cpu(rlc_hdr_v2_1->reg_list_format_direct_reg_list_length));
DRM_DEBUG("save_restore_list_cntl_ucode_ver: %u\n",
- le32_to_cpu(v2_1->save_restore_list_cntl_ucode_ver));
+ le32_to_cpu(rlc_hdr_v2_1->save_restore_list_cntl_ucode_ver));
DRM_DEBUG("save_restore_list_cntl_feature_ver: %u\n",
- le32_to_cpu(v2_1->save_restore_list_cntl_feature_ver));
+ le32_to_cpu(rlc_hdr_v2_1->save_restore_list_cntl_feature_ver));
DRM_DEBUG("save_restore_list_cntl_size_bytes %u\n",
- le32_to_cpu(v2_1->save_restore_list_cntl_size_bytes));
+ le32_to_cpu(rlc_hdr_v2_1->save_restore_list_cntl_size_bytes));
DRM_DEBUG("save_restore_list_cntl_offset_bytes: %u\n",
- le32_to_cpu(v2_1->save_restore_list_cntl_offset_bytes));
+ le32_to_cpu(rlc_hdr_v2_1->save_restore_list_cntl_offset_bytes));
DRM_DEBUG("save_restore_list_gpm_ucode_ver: %u\n",
- le32_to_cpu(v2_1->save_restore_list_gpm_ucode_ver));
+ le32_to_cpu(rlc_hdr_v2_1->save_restore_list_gpm_ucode_ver));
DRM_DEBUG("save_restore_list_gpm_feature_ver: %u\n",
- le32_to_cpu(v2_1->save_restore_list_gpm_feature_ver));
+ le32_to_cpu(rlc_hdr_v2_1->save_restore_list_gpm_feature_ver));
DRM_DEBUG("save_restore_list_gpm_size_bytes %u\n",
- le32_to_cpu(v2_1->save_restore_list_gpm_size_bytes));
+ le32_to_cpu(rlc_hdr_v2_1->save_restore_list_gpm_size_bytes));
DRM_DEBUG("save_restore_list_gpm_offset_bytes: %u\n",
- le32_to_cpu(v2_1->save_restore_list_gpm_offset_bytes));
+ le32_to_cpu(rlc_hdr_v2_1->save_restore_list_gpm_offset_bytes));
DRM_DEBUG("save_restore_list_srm_ucode_ver: %u\n",
- le32_to_cpu(v2_1->save_restore_list_srm_ucode_ver));
+ le32_to_cpu(rlc_hdr_v2_1->save_restore_list_srm_ucode_ver));
DRM_DEBUG("save_restore_list_srm_feature_ver: %u\n",
- le32_to_cpu(v2_1->save_restore_list_srm_feature_ver));
+ le32_to_cpu(rlc_hdr_v2_1->save_restore_list_srm_feature_ver));
DRM_DEBUG("save_restore_list_srm_size_bytes %u\n",
- le32_to_cpu(v2_1->save_restore_list_srm_size_bytes));
+ le32_to_cpu(rlc_hdr_v2_1->save_restore_list_srm_size_bytes));
DRM_DEBUG("save_restore_list_srm_offset_bytes: %u\n",
- le32_to_cpu(v2_1->save_restore_list_srm_offset_bytes));
+ le32_to_cpu(rlc_hdr_v2_1->save_restore_list_srm_offset_bytes));
+ break;
+ case 2:
+ /* rlc_hdr v2_2 */
+ DRM_DEBUG("rlc_iram_ucode_size_bytes: %u\n",
+ le32_to_cpu(rlc_hdr_v2_2->rlc_iram_ucode_size_bytes));
+ DRM_DEBUG("rlc_iram_ucode_offset_bytes: %u\n",
+ le32_to_cpu(rlc_hdr_v2_2->rlc_iram_ucode_offset_bytes));
+ DRM_DEBUG("rlc_dram_ucode_size_bytes: %u\n",
+ le32_to_cpu(rlc_hdr_v2_2->rlc_dram_ucode_size_bytes));
+ DRM_DEBUG("rlc_dram_ucode_offset_bytes: %u\n",
+ le32_to_cpu(rlc_hdr_v2_2->rlc_dram_ucode_offset_bytes));
+ break;
+ case 3:
+ /* rlc_hdr v2_3 */
+ DRM_DEBUG("rlcp_ucode_version: %u\n",
+ le32_to_cpu(rlc_hdr_v2_3->rlcp_ucode_version));
+ DRM_DEBUG("rlcp_ucode_feature_version: %u\n",
+ le32_to_cpu(rlc_hdr_v2_3->rlcp_ucode_feature_version));
+ DRM_DEBUG("rlcp_ucode_size_bytes: %u\n",
+ le32_to_cpu(rlc_hdr_v2_3->rlcp_ucode_size_bytes));
+ DRM_DEBUG("rlcp_ucode_offset_bytes: %u\n",
+ le32_to_cpu(rlc_hdr_v2_3->rlcp_ucode_offset_bytes));
+ DRM_DEBUG("rlcv_ucode_version: %u\n",
+ le32_to_cpu(rlc_hdr_v2_3->rlcv_ucode_version));
+ DRM_DEBUG("rlcv_ucode_feature_version: %u\n",
+ le32_to_cpu(rlc_hdr_v2_3->rlcv_ucode_feature_version));
+ DRM_DEBUG("rlcv_ucode_size_bytes: %u\n",
+ le32_to_cpu(rlc_hdr_v2_3->rlcv_ucode_size_bytes));
+ DRM_DEBUG("rlcv_ucode_offset_bytes: %u\n",
+ le32_to_cpu(rlc_hdr_v2_3->rlcv_ucode_offset_bytes));
+ break;
+ case 4:
+ /* rlc_hdr v2_4 */
+ DRM_DEBUG("global_tap_delays_ucode_size_bytes :%u\n",
+ le32_to_cpu(rlc_hdr_v2_4->global_tap_delays_ucode_size_bytes));
+ DRM_DEBUG("global_tap_delays_ucode_offset_bytes: %u\n",
+ le32_to_cpu(rlc_hdr_v2_4->global_tap_delays_ucode_offset_bytes));
+ DRM_DEBUG("se0_tap_delays_ucode_size_bytes :%u\n",
+ le32_to_cpu(rlc_hdr_v2_4->se0_tap_delays_ucode_size_bytes));
+ DRM_DEBUG("se0_tap_delays_ucode_offset_bytes: %u\n",
+ le32_to_cpu(rlc_hdr_v2_4->se0_tap_delays_ucode_offset_bytes));
+ DRM_DEBUG("se1_tap_delays_ucode_size_bytes :%u\n",
+ le32_to_cpu(rlc_hdr_v2_4->se1_tap_delays_ucode_size_bytes));
+ DRM_DEBUG("se1_tap_delays_ucode_offset_bytes: %u\n",
+ le32_to_cpu(rlc_hdr_v2_4->se1_tap_delays_ucode_offset_bytes));
+ DRM_DEBUG("se2_tap_delays_ucode_size_bytes :%u\n",
+ le32_to_cpu(rlc_hdr_v2_4->se2_tap_delays_ucode_size_bytes));
+ DRM_DEBUG("se2_tap_delays_ucode_offset_bytes: %u\n",
+ le32_to_cpu(rlc_hdr_v2_4->se2_tap_delays_ucode_offset_bytes));
+ DRM_DEBUG("se3_tap_delays_ucode_size_bytes :%u\n",
+ le32_to_cpu(rlc_hdr_v2_4->se3_tap_delays_ucode_size_bytes));
+ DRM_DEBUG("se3_tap_delays_ucode_offset_bytes: %u\n",
+ le32_to_cpu(rlc_hdr_v2_4->se3_tap_delays_ucode_offset_bytes));
+ break;
+ default:
+ DRM_ERROR("Unknown RLC v2 ucode: v2.%u\n", version_minor);
+ break;
}
} else {
DRM_ERROR("Unknown RLC ucode version: %u.%u\n", version_major, version_minor);
@@ -238,6 +319,23 @@ void amdgpu_ucode_print_sdma_hdr(const struct common_firmware_header *hdr)
container_of(sdma_hdr, struct sdma_firmware_header_v1_1, v1_0);
DRM_DEBUG("digest_size: %u\n", le32_to_cpu(sdma_v1_1_hdr->digest_size));
}
+ } else if (version_major == 2) {
+ const struct sdma_firmware_header_v2_0 *sdma_hdr =
+ container_of(hdr, struct sdma_firmware_header_v2_0, header);
+
+ DRM_DEBUG("ucode_feature_version: %u\n",
+ le32_to_cpu(sdma_hdr->ucode_feature_version));
+ DRM_DEBUG("ctx_jt_offset: %u\n", le32_to_cpu(sdma_hdr->ctx_jt_offset));
+ DRM_DEBUG("ctx_jt_size: %u\n", le32_to_cpu(sdma_hdr->ctx_jt_size));
+ DRM_DEBUG("ctl_ucode_offset: %u\n", le32_to_cpu(sdma_hdr->ctl_ucode_offset));
+ DRM_DEBUG("ctl_jt_offset: %u\n", le32_to_cpu(sdma_hdr->ctl_jt_offset));
+ DRM_DEBUG("ctl_jt_size: %u\n", le32_to_cpu(sdma_hdr->ctl_jt_size));
+ } else if (version_major == 3) {
+ const struct sdma_firmware_header_v3_0 *sdma_hdr =
+ container_of(hdr, struct sdma_firmware_header_v3_0, header);
+
+ DRM_DEBUG("ucode_reversion: %u\n",
+ le32_to_cpu(sdma_hdr->ucode_feature_version));
} else {
DRM_ERROR("Unknown SDMA ucode version: %u.%u\n",
version_major, version_minor);
@@ -248,6 +346,8 @@ void amdgpu_ucode_print_psp_hdr(const struct common_firmware_header *hdr)
{
uint16_t version_major = le16_to_cpu(hdr->header_version_major);
uint16_t version_minor = le16_to_cpu(hdr->header_version_minor);
+ uint32_t fw_index;
+ const struct psp_fw_bin_desc *desc;
DRM_DEBUG("PSP\n");
amdgpu_ucode_print_common_hdr(hdr);
@@ -257,36 +357,36 @@ void amdgpu_ucode_print_psp_hdr(const struct common_firmware_header *hdr)
container_of(hdr, struct psp_firmware_header_v1_0, header);
DRM_DEBUG("ucode_feature_version: %u\n",
- le32_to_cpu(psp_hdr->ucode_feature_version));
+ le32_to_cpu(psp_hdr->sos.fw_version));
DRM_DEBUG("sos_offset_bytes: %u\n",
- le32_to_cpu(psp_hdr->sos_offset_bytes));
+ le32_to_cpu(psp_hdr->sos.offset_bytes));
DRM_DEBUG("sos_size_bytes: %u\n",
- le32_to_cpu(psp_hdr->sos_size_bytes));
+ le32_to_cpu(psp_hdr->sos.size_bytes));
if (version_minor == 1) {
const struct psp_firmware_header_v1_1 *psp_hdr_v1_1 =
container_of(psp_hdr, struct psp_firmware_header_v1_1, v1_0);
DRM_DEBUG("toc_header_version: %u\n",
- le32_to_cpu(psp_hdr_v1_1->toc_header_version));
+ le32_to_cpu(psp_hdr_v1_1->toc.fw_version));
DRM_DEBUG("toc_offset_bytes: %u\n",
- le32_to_cpu(psp_hdr_v1_1->toc_offset_bytes));
+ le32_to_cpu(psp_hdr_v1_1->toc.offset_bytes));
DRM_DEBUG("toc_size_bytes: %u\n",
- le32_to_cpu(psp_hdr_v1_1->toc_size_bytes));
+ le32_to_cpu(psp_hdr_v1_1->toc.size_bytes));
DRM_DEBUG("kdb_header_version: %u\n",
- le32_to_cpu(psp_hdr_v1_1->kdb_header_version));
+ le32_to_cpu(psp_hdr_v1_1->kdb.fw_version));
DRM_DEBUG("kdb_offset_bytes: %u\n",
- le32_to_cpu(psp_hdr_v1_1->kdb_offset_bytes));
+ le32_to_cpu(psp_hdr_v1_1->kdb.offset_bytes));
DRM_DEBUG("kdb_size_bytes: %u\n",
- le32_to_cpu(psp_hdr_v1_1->kdb_size_bytes));
+ le32_to_cpu(psp_hdr_v1_1->kdb.size_bytes));
}
if (version_minor == 2) {
const struct psp_firmware_header_v1_2 *psp_hdr_v1_2 =
container_of(psp_hdr, struct psp_firmware_header_v1_2, v1_0);
DRM_DEBUG("kdb_header_version: %u\n",
- le32_to_cpu(psp_hdr_v1_2->kdb_header_version));
+ le32_to_cpu(psp_hdr_v1_2->kdb.fw_version));
DRM_DEBUG("kdb_offset_bytes: %u\n",
- le32_to_cpu(psp_hdr_v1_2->kdb_offset_bytes));
+ le32_to_cpu(psp_hdr_v1_2->kdb.offset_bytes));
DRM_DEBUG("kdb_size_bytes: %u\n",
- le32_to_cpu(psp_hdr_v1_2->kdb_size_bytes));
+ le32_to_cpu(psp_hdr_v1_2->kdb.size_bytes));
}
if (version_minor == 3) {
const struct psp_firmware_header_v1_1 *psp_hdr_v1_1 =
@@ -294,23 +394,94 @@ void amdgpu_ucode_print_psp_hdr(const struct common_firmware_header *hdr)
const struct psp_firmware_header_v1_3 *psp_hdr_v1_3 =
container_of(psp_hdr_v1_1, struct psp_firmware_header_v1_3, v1_1);
DRM_DEBUG("toc_header_version: %u\n",
- le32_to_cpu(psp_hdr_v1_3->v1_1.toc_header_version));
+ le32_to_cpu(psp_hdr_v1_3->v1_1.toc.fw_version));
DRM_DEBUG("toc_offset_bytes: %u\n",
- le32_to_cpu(psp_hdr_v1_3->v1_1.toc_offset_bytes));
+ le32_to_cpu(psp_hdr_v1_3->v1_1.toc.offset_bytes));
DRM_DEBUG("toc_size_bytes: %u\n",
- le32_to_cpu(psp_hdr_v1_3->v1_1.toc_size_bytes));
+ le32_to_cpu(psp_hdr_v1_3->v1_1.toc.size_bytes));
DRM_DEBUG("kdb_header_version: %u\n",
- le32_to_cpu(psp_hdr_v1_3->v1_1.kdb_header_version));
+ le32_to_cpu(psp_hdr_v1_3->v1_1.kdb.fw_version));
DRM_DEBUG("kdb_offset_bytes: %u\n",
- le32_to_cpu(psp_hdr_v1_3->v1_1.kdb_offset_bytes));
+ le32_to_cpu(psp_hdr_v1_3->v1_1.kdb.offset_bytes));
DRM_DEBUG("kdb_size_bytes: %u\n",
- le32_to_cpu(psp_hdr_v1_3->v1_1.kdb_size_bytes));
+ le32_to_cpu(psp_hdr_v1_3->v1_1.kdb.size_bytes));
DRM_DEBUG("spl_header_version: %u\n",
- le32_to_cpu(psp_hdr_v1_3->spl_header_version));
+ le32_to_cpu(psp_hdr_v1_3->spl.fw_version));
DRM_DEBUG("spl_offset_bytes: %u\n",
- le32_to_cpu(psp_hdr_v1_3->spl_offset_bytes));
+ le32_to_cpu(psp_hdr_v1_3->spl.offset_bytes));
DRM_DEBUG("spl_size_bytes: %u\n",
- le32_to_cpu(psp_hdr_v1_3->spl_size_bytes));
+ le32_to_cpu(psp_hdr_v1_3->spl.size_bytes));
+ }
+ } else if (version_major == 2) {
+ const struct psp_firmware_header_v2_0 *psp_hdr_v2_0 =
+ container_of(hdr, struct psp_firmware_header_v2_0, header);
+ for (fw_index = 0; fw_index < le32_to_cpu(psp_hdr_v2_0->psp_fw_bin_count); fw_index++) {
+ desc = &(psp_hdr_v2_0->psp_fw_bin[fw_index]);
+ switch (desc->fw_type) {
+ case PSP_FW_TYPE_PSP_SOS:
+ DRM_DEBUG("psp_sos_version: %u\n",
+ le32_to_cpu(desc->fw_version));
+ DRM_DEBUG("psp_sos_size_bytes: %u\n",
+ le32_to_cpu(desc->size_bytes));
+ break;
+ case PSP_FW_TYPE_PSP_SYS_DRV:
+ DRM_DEBUG("psp_sys_drv_version: %u\n",
+ le32_to_cpu(desc->fw_version));
+ DRM_DEBUG("psp_sys_drv_size_bytes: %u\n",
+ le32_to_cpu(desc->size_bytes));
+ break;
+ case PSP_FW_TYPE_PSP_KDB:
+ DRM_DEBUG("psp_kdb_version: %u\n",
+ le32_to_cpu(desc->fw_version));
+ DRM_DEBUG("psp_kdb_size_bytes: %u\n",
+ le32_to_cpu(desc->size_bytes));
+ break;
+ case PSP_FW_TYPE_PSP_TOC:
+ DRM_DEBUG("psp_toc_version: %u\n",
+ le32_to_cpu(desc->fw_version));
+ DRM_DEBUG("psp_toc_size_bytes: %u\n",
+ le32_to_cpu(desc->size_bytes));
+ break;
+ case PSP_FW_TYPE_PSP_SPL:
+ DRM_DEBUG("psp_spl_version: %u\n",
+ le32_to_cpu(desc->fw_version));
+ DRM_DEBUG("psp_spl_size_bytes: %u\n",
+ le32_to_cpu(desc->size_bytes));
+ break;
+ case PSP_FW_TYPE_PSP_RL:
+ DRM_DEBUG("psp_rl_version: %u\n",
+ le32_to_cpu(desc->fw_version));
+ DRM_DEBUG("psp_rl_size_bytes: %u\n",
+ le32_to_cpu(desc->size_bytes));
+ break;
+ case PSP_FW_TYPE_PSP_SOC_DRV:
+ DRM_DEBUG("psp_soc_drv_version: %u\n",
+ le32_to_cpu(desc->fw_version));
+ DRM_DEBUG("psp_soc_drv_size_bytes: %u\n",
+ le32_to_cpu(desc->size_bytes));
+ break;
+ case PSP_FW_TYPE_PSP_INTF_DRV:
+ DRM_DEBUG("psp_intf_drv_version: %u\n",
+ le32_to_cpu(desc->fw_version));
+ DRM_DEBUG("psp_intf_drv_size_bytes: %u\n",
+ le32_to_cpu(desc->size_bytes));
+ break;
+ case PSP_FW_TYPE_PSP_DBG_DRV:
+ DRM_DEBUG("psp_dbg_drv_version: %u\n",
+ le32_to_cpu(desc->fw_version));
+ DRM_DEBUG("psp_dbg_drv_size_bytes: %u\n",
+ le32_to_cpu(desc->size_bytes));
+ break;
+ case PSP_FW_TYPE_PSP_RAS_DRV:
+ DRM_DEBUG("psp_ras_drv_version: %u\n",
+ le32_to_cpu(desc->fw_version));
+ DRM_DEBUG("psp_ras_drv_size_bytes: %u\n",
+ le32_to_cpu(desc->size_bytes));
+ break;
+ default:
+ DRM_DEBUG("Unsupported PSP fw type: %d\n", desc->fw_type);
+ break;
+ }
}
} else {
DRM_ERROR("Unknown PSP ucode version: %u.%u\n",
@@ -339,7 +510,7 @@ void amdgpu_ucode_print_gpu_info_hdr(const struct common_firmware_header *hdr)
}
}
-int amdgpu_ucode_validate(const struct firmware *fw)
+static int amdgpu_ucode_validate(const struct firmware *fw)
{
const struct common_firmware_header *hdr =
(const struct common_firmware_header *)fw->data;
@@ -355,8 +526,8 @@ bool amdgpu_ucode_hdr_version(union amdgpu_firmware_header *hdr,
{
if ((hdr->common.header_version_major == hdr_major) &&
(hdr->common.header_version_minor == hdr_minor))
- return false;
- return true;
+ return true;
+ return false;
}
enum amdgpu_firmware_load_type
@@ -389,39 +560,191 @@ amdgpu_ucode_get_load_type(struct amdgpu_device *adev, int load_type)
case CHIP_POLARIS12:
case CHIP_VEGAM:
return AMDGPU_FW_LOAD_SMU;
- case CHIP_VEGA10:
- case CHIP_RAVEN:
- case CHIP_VEGA12:
- case CHIP_VEGA20:
- case CHIP_ARCTURUS:
- case CHIP_RENOIR:
- case CHIP_NAVI10:
- case CHIP_NAVI14:
- case CHIP_NAVI12:
- case CHIP_SIENNA_CICHLID:
- case CHIP_NAVY_FLOUNDER:
- case CHIP_VANGOGH:
- case CHIP_DIMGREY_CAVEFISH:
+ case CHIP_CYAN_SKILLFISH:
+ if (!(load_type &&
+ adev->apu_flags & AMD_APU_IS_CYAN_SKILLFISH2))
+ return AMDGPU_FW_LOAD_DIRECT;
+ else
+ return AMDGPU_FW_LOAD_PSP;
+ default:
if (!load_type)
return AMDGPU_FW_LOAD_DIRECT;
+ else if (load_type == 3)
+ return AMDGPU_FW_LOAD_RLC_BACKDOOR_AUTO;
else
return AMDGPU_FW_LOAD_PSP;
+ }
+}
+
+const char *amdgpu_ucode_name(enum AMDGPU_UCODE_ID ucode_id)
+{
+ switch (ucode_id) {
+ case AMDGPU_UCODE_ID_SDMA0:
+ return "SDMA0";
+ case AMDGPU_UCODE_ID_SDMA1:
+ return "SDMA1";
+ case AMDGPU_UCODE_ID_SDMA2:
+ return "SDMA2";
+ case AMDGPU_UCODE_ID_SDMA3:
+ return "SDMA3";
+ case AMDGPU_UCODE_ID_SDMA4:
+ return "SDMA4";
+ case AMDGPU_UCODE_ID_SDMA5:
+ return "SDMA5";
+ case AMDGPU_UCODE_ID_SDMA6:
+ return "SDMA6";
+ case AMDGPU_UCODE_ID_SDMA7:
+ return "SDMA7";
+ case AMDGPU_UCODE_ID_SDMA_UCODE_TH0:
+ return "SDMA_CTX";
+ case AMDGPU_UCODE_ID_SDMA_UCODE_TH1:
+ return "SDMA_CTL";
+ case AMDGPU_UCODE_ID_CP_CE:
+ return "CP_CE";
+ case AMDGPU_UCODE_ID_CP_PFP:
+ return "CP_PFP";
+ case AMDGPU_UCODE_ID_CP_ME:
+ return "CP_ME";
+ case AMDGPU_UCODE_ID_CP_MEC1:
+ return "CP_MEC1";
+ case AMDGPU_UCODE_ID_CP_MEC1_JT:
+ return "CP_MEC1_JT";
+ case AMDGPU_UCODE_ID_CP_MEC2:
+ return "CP_MEC2";
+ case AMDGPU_UCODE_ID_CP_MEC2_JT:
+ return "CP_MEC2_JT";
+ case AMDGPU_UCODE_ID_CP_MES:
+ return "CP_MES";
+ case AMDGPU_UCODE_ID_CP_MES_DATA:
+ return "CP_MES_DATA";
+ case AMDGPU_UCODE_ID_CP_MES1:
+ return "CP_MES_KIQ";
+ case AMDGPU_UCODE_ID_CP_MES1_DATA:
+ return "CP_MES_KIQ_DATA";
+ case AMDGPU_UCODE_ID_RLC_RESTORE_LIST_CNTL:
+ return "RLC_RESTORE_LIST_CNTL";
+ case AMDGPU_UCODE_ID_RLC_RESTORE_LIST_GPM_MEM:
+ return "RLC_RESTORE_LIST_GPM_MEM";
+ case AMDGPU_UCODE_ID_RLC_RESTORE_LIST_SRM_MEM:
+ return "RLC_RESTORE_LIST_SRM_MEM";
+ case AMDGPU_UCODE_ID_RLC_IRAM:
+ return "RLC_IRAM";
+ case AMDGPU_UCODE_ID_RLC_DRAM:
+ return "RLC_DRAM";
+ case AMDGPU_UCODE_ID_RLC_G:
+ return "RLC_G";
+ case AMDGPU_UCODE_ID_RLC_P:
+ return "RLC_P";
+ case AMDGPU_UCODE_ID_RLC_V:
+ return "RLC_V";
+ case AMDGPU_UCODE_ID_GLOBAL_TAP_DELAYS:
+ return "GLOBAL_TAP_DELAYS";
+ case AMDGPU_UCODE_ID_SE0_TAP_DELAYS:
+ return "SE0_TAP_DELAYS";
+ case AMDGPU_UCODE_ID_SE1_TAP_DELAYS:
+ return "SE1_TAP_DELAYS";
+ case AMDGPU_UCODE_ID_SE2_TAP_DELAYS:
+ return "SE2_TAP_DELAYS";
+ case AMDGPU_UCODE_ID_SE3_TAP_DELAYS:
+ return "SE3_TAP_DELAYS";
+ case AMDGPU_UCODE_ID_IMU_I:
+ return "IMU_I";
+ case AMDGPU_UCODE_ID_IMU_D:
+ return "IMU_D";
+ case AMDGPU_UCODE_ID_STORAGE:
+ return "STORAGE";
+ case AMDGPU_UCODE_ID_SMC:
+ return "SMC";
+ case AMDGPU_UCODE_ID_PPTABLE:
+ return "PPTABLE";
+ case AMDGPU_UCODE_ID_P2S_TABLE:
+ return "P2STABLE";
+ case AMDGPU_UCODE_ID_UVD:
+ return "UVD";
+ case AMDGPU_UCODE_ID_UVD1:
+ return "UVD1";
+ case AMDGPU_UCODE_ID_VCE:
+ return "VCE";
+ case AMDGPU_UCODE_ID_VCN:
+ return "VCN";
+ case AMDGPU_UCODE_ID_VCN1:
+ return "VCN1";
+ case AMDGPU_UCODE_ID_DMCU_ERAM:
+ return "DMCU_ERAM";
+ case AMDGPU_UCODE_ID_DMCU_INTV:
+ return "DMCU_INTV";
+ case AMDGPU_UCODE_ID_VCN0_RAM:
+ return "VCN0_RAM";
+ case AMDGPU_UCODE_ID_VCN1_RAM:
+ return "VCN1_RAM";
+ case AMDGPU_UCODE_ID_DMCUB:
+ return "DMCUB";
+ case AMDGPU_UCODE_ID_CAP:
+ return "CAP";
+ case AMDGPU_UCODE_ID_VPE_CTX:
+ return "VPE_CTX";
+ case AMDGPU_UCODE_ID_VPE_CTL:
+ return "VPE_CTL";
+ case AMDGPU_UCODE_ID_VPE:
+ return "VPE";
+ case AMDGPU_UCODE_ID_UMSCH_MM_UCODE:
+ return "UMSCH_MM_UCODE";
+ case AMDGPU_UCODE_ID_UMSCH_MM_DATA:
+ return "UMSCH_MM_DATA";
+ case AMDGPU_UCODE_ID_UMSCH_MM_CMD_BUFFER:
+ return "UMSCH_MM_CMD_BUFFER";
+ case AMDGPU_UCODE_ID_JPEG_RAM:
+ return "JPEG";
+ case AMDGPU_UCODE_ID_SDMA_RS64:
+ return "RS64_SDMA";
+ case AMDGPU_UCODE_ID_CP_RS64_PFP:
+ return "RS64_PFP";
+ case AMDGPU_UCODE_ID_CP_RS64_ME:
+ return "RS64_ME";
+ case AMDGPU_UCODE_ID_CP_RS64_MEC:
+ return "RS64_MEC";
+ case AMDGPU_UCODE_ID_CP_RS64_PFP_P0_STACK:
+ return "RS64_PFP_P0_STACK";
+ case AMDGPU_UCODE_ID_CP_RS64_PFP_P1_STACK:
+ return "RS64_PFP_P1_STACK";
+ case AMDGPU_UCODE_ID_CP_RS64_ME_P0_STACK:
+ return "RS64_ME_P0_STACK";
+ case AMDGPU_UCODE_ID_CP_RS64_ME_P1_STACK:
+ return "RS64_ME_P1_STACK";
+ case AMDGPU_UCODE_ID_CP_RS64_MEC_P0_STACK:
+ return "RS64_MEC_P0_STACK";
+ case AMDGPU_UCODE_ID_CP_RS64_MEC_P1_STACK:
+ return "RS64_MEC_P1_STACK";
+ case AMDGPU_UCODE_ID_CP_RS64_MEC_P2_STACK:
+ return "RS64_MEC_P2_STACK";
+ case AMDGPU_UCODE_ID_CP_RS64_MEC_P3_STACK:
+ return "RS64_MEC_P3_STACK";
+ case AMDGPU_UCODE_ID_ISP:
+ return "ISP";
default:
- DRM_ERROR("Unknown firmware load type\n");
+ return "UNKNOWN UCODE";
}
+}
+
+static inline int amdgpu_ucode_is_valid(uint32_t fw_version)
+{
+ if (!fw_version)
+ return -EINVAL;
- return AMDGPU_FW_LOAD_DIRECT;
+ return 0;
}
#define FW_VERSION_ATTR(name, mode, field) \
static ssize_t show_##name(struct device *dev, \
- struct device_attribute *attr, \
- char *buf) \
+ struct device_attribute *attr, char *buf) \
{ \
struct drm_device *ddev = dev_get_drvdata(dev); \
struct amdgpu_device *adev = drm_to_adev(ddev); \
\
- return snprintf(buf, PAGE_SIZE, "0x%08x\n", adev->field); \
+ if (!buf) \
+ return amdgpu_ucode_is_valid(adev->field); \
+ \
+ return sysfs_emit(buf, "0x%08x\n", adev->field); \
} \
static DEVICE_ATTR(name, mode, show_##name, NULL)
@@ -437,15 +760,20 @@ FW_VERSION_ATTR(rlc_srlg_fw_version, 0444, gfx.rlc_srlg_fw_version);
FW_VERSION_ATTR(rlc_srls_fw_version, 0444, gfx.rlc_srls_fw_version);
FW_VERSION_ATTR(mec_fw_version, 0444, gfx.mec_fw_version);
FW_VERSION_ATTR(mec2_fw_version, 0444, gfx.mec2_fw_version);
-FW_VERSION_ATTR(sos_fw_version, 0444, psp.sos_fw_version);
-FW_VERSION_ATTR(asd_fw_version, 0444, psp.asd_fw_version);
-FW_VERSION_ATTR(ta_ras_fw_version, 0444, psp.ta_ras_ucode_version);
-FW_VERSION_ATTR(ta_xgmi_fw_version, 0444, psp.ta_xgmi_ucode_version);
+FW_VERSION_ATTR(imu_fw_version, 0444, gfx.imu_fw_version);
+FW_VERSION_ATTR(sos_fw_version, 0444, psp.sos.fw_version);
+FW_VERSION_ATTR(asd_fw_version, 0444, psp.asd_context.bin_desc.fw_version);
+FW_VERSION_ATTR(ta_ras_fw_version, 0444, psp.ras_context.context.bin_desc.fw_version);
+FW_VERSION_ATTR(ta_xgmi_fw_version, 0444, psp.xgmi_context.context.bin_desc.fw_version);
FW_VERSION_ATTR(smc_fw_version, 0444, pm.fw_version);
FW_VERSION_ATTR(sdma_fw_version, 0444, sdma.instance[0].fw_version);
FW_VERSION_ATTR(sdma2_fw_version, 0444, sdma.instance[1].fw_version);
FW_VERSION_ATTR(vcn_fw_version, 0444, vcn.fw_version);
FW_VERSION_ATTR(dmcu_fw_version, 0444, dm.dmcu_fw_version);
+FW_VERSION_ATTR(dmcub_fw_version, 0444, dm.dmcub_fw_version);
+FW_VERSION_ATTR(mes_fw_version, 0444, mes.sched_version & AMDGPU_MES_VERSION_MASK);
+FW_VERSION_ATTR(mes_kiq_fw_version, 0444, mes.kiq_version & AMDGPU_MES_VERSION_MASK);
+FW_VERSION_ATTR(pldm_fw_version, 0444, firmware.pldm_version);
static struct attribute *fw_attrs[] = {
&dev_attr_vce_fw_version.attr, &dev_attr_uvd_fw_version.attr,
@@ -458,12 +786,30 @@ static struct attribute *fw_attrs[] = {
&dev_attr_ta_ras_fw_version.attr, &dev_attr_ta_xgmi_fw_version.attr,
&dev_attr_smc_fw_version.attr, &dev_attr_sdma_fw_version.attr,
&dev_attr_sdma2_fw_version.attr, &dev_attr_vcn_fw_version.attr,
- &dev_attr_dmcu_fw_version.attr, NULL
+ &dev_attr_dmcu_fw_version.attr, &dev_attr_dmcub_fw_version.attr,
+ &dev_attr_imu_fw_version.attr, &dev_attr_mes_fw_version.attr,
+ &dev_attr_mes_kiq_fw_version.attr, &dev_attr_pldm_fw_version.attr,
+ NULL
};
+#define to_dev_attr(x) container_of(x, struct device_attribute, attr)
+
+static umode_t amdgpu_ucode_sys_visible(struct kobject *kobj,
+ struct attribute *attr, int idx)
+{
+ struct device_attribute *dev_attr = to_dev_attr(attr);
+ struct device *dev = kobj_to_dev(kobj);
+
+ if (dev_attr->show(dev, dev_attr, NULL) == -EINVAL)
+ return 0;
+
+ return attr->mode;
+}
+
static const struct attribute_group fw_attr_group = {
.name = "fw_version",
- .attrs = fw_attrs
+ .attrs = fw_attrs,
+ .is_visible = amdgpu_ucode_sys_visible
};
int amdgpu_ucode_sysfs_init(struct amdgpu_device *adev)
@@ -482,11 +828,18 @@ static int amdgpu_ucode_init_single_fw(struct amdgpu_device *adev,
{
const struct common_firmware_header *header = NULL;
const struct gfx_firmware_header_v1_0 *cp_hdr = NULL;
+ const struct gfx_firmware_header_v2_0 *cpv2_hdr = NULL;
const struct dmcu_firmware_header_v1_0 *dmcu_hdr = NULL;
const struct dmcub_firmware_header_v1_0 *dmcub_hdr = NULL;
const struct mes_firmware_header_v1_0 *mes_hdr = NULL;
-
- if (NULL == ucode->fw)
+ const struct sdma_firmware_header_v2_0 *sdma_hdr = NULL;
+ const struct sdma_firmware_header_v3_0 *sdmav3_hdr = NULL;
+ const struct imu_firmware_header_v1_0 *imu_hdr = NULL;
+ const struct vpe_firmware_header_v1_0 *vpe_hdr = NULL;
+ const struct umsch_mm_firmware_header_v1_0 *umsch_mm_hdr = NULL;
+ u8 *ucode_addr;
+
+ if (!ucode->fw)
return 0;
ucode->mc_addr = mc_addr;
@@ -497,98 +850,240 @@ static int amdgpu_ucode_init_single_fw(struct amdgpu_device *adev,
header = (const struct common_firmware_header *)ucode->fw->data;
cp_hdr = (const struct gfx_firmware_header_v1_0 *)ucode->fw->data;
+ cpv2_hdr = (const struct gfx_firmware_header_v2_0 *)ucode->fw->data;
dmcu_hdr = (const struct dmcu_firmware_header_v1_0 *)ucode->fw->data;
dmcub_hdr = (const struct dmcub_firmware_header_v1_0 *)ucode->fw->data;
mes_hdr = (const struct mes_firmware_header_v1_0 *)ucode->fw->data;
-
- if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP ||
- (ucode->ucode_id != AMDGPU_UCODE_ID_CP_MEC1 &&
- ucode->ucode_id != AMDGPU_UCODE_ID_CP_MEC2 &&
- ucode->ucode_id != AMDGPU_UCODE_ID_CP_MEC1_JT &&
- ucode->ucode_id != AMDGPU_UCODE_ID_CP_MEC2_JT &&
- ucode->ucode_id != AMDGPU_UCODE_ID_CP_MES &&
- ucode->ucode_id != AMDGPU_UCODE_ID_CP_MES_DATA &&
- ucode->ucode_id != AMDGPU_UCODE_ID_RLC_RESTORE_LIST_CNTL &&
- ucode->ucode_id != AMDGPU_UCODE_ID_RLC_RESTORE_LIST_GPM_MEM &&
- ucode->ucode_id != AMDGPU_UCODE_ID_RLC_RESTORE_LIST_SRM_MEM &&
- ucode->ucode_id != AMDGPU_UCODE_ID_RLC_IRAM &&
- ucode->ucode_id != AMDGPU_UCODE_ID_RLC_DRAM &&
- ucode->ucode_id != AMDGPU_UCODE_ID_DMCU_ERAM &&
- ucode->ucode_id != AMDGPU_UCODE_ID_DMCU_INTV &&
- ucode->ucode_id != AMDGPU_UCODE_ID_DMCUB)) {
- ucode->ucode_size = le32_to_cpu(header->ucode_size_bytes);
-
- memcpy(ucode->kaddr, (void *)((uint8_t *)ucode->fw->data +
- le32_to_cpu(header->ucode_array_offset_bytes)),
- ucode->ucode_size);
- } else if (ucode->ucode_id == AMDGPU_UCODE_ID_CP_MEC1 ||
- ucode->ucode_id == AMDGPU_UCODE_ID_CP_MEC2) {
- ucode->ucode_size = le32_to_cpu(header->ucode_size_bytes) -
- le32_to_cpu(cp_hdr->jt_size) * 4;
-
- memcpy(ucode->kaddr, (void *)((uint8_t *)ucode->fw->data +
- le32_to_cpu(header->ucode_array_offset_bytes)),
- ucode->ucode_size);
- } else if (ucode->ucode_id == AMDGPU_UCODE_ID_CP_MEC1_JT ||
- ucode->ucode_id == AMDGPU_UCODE_ID_CP_MEC2_JT) {
- ucode->ucode_size = le32_to_cpu(cp_hdr->jt_size) * 4;
-
- memcpy(ucode->kaddr, (void *)((uint8_t *)ucode->fw->data +
- le32_to_cpu(header->ucode_array_offset_bytes) +
- le32_to_cpu(cp_hdr->jt_offset) * 4),
- ucode->ucode_size);
- } else if (ucode->ucode_id == AMDGPU_UCODE_ID_DMCU_ERAM) {
- ucode->ucode_size = le32_to_cpu(header->ucode_size_bytes) -
+ sdma_hdr = (const struct sdma_firmware_header_v2_0 *)ucode->fw->data;
+ sdmav3_hdr = (const struct sdma_firmware_header_v3_0 *)ucode->fw->data;
+ imu_hdr = (const struct imu_firmware_header_v1_0 *)ucode->fw->data;
+ vpe_hdr = (const struct vpe_firmware_header_v1_0 *)ucode->fw->data;
+ umsch_mm_hdr = (const struct umsch_mm_firmware_header_v1_0 *)ucode->fw->data;
+
+ if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
+ switch (ucode->ucode_id) {
+ case AMDGPU_UCODE_ID_SDMA_UCODE_TH0:
+ ucode->ucode_size = le32_to_cpu(sdma_hdr->ctx_ucode_size_bytes);
+ ucode_addr = (u8 *)ucode->fw->data +
+ le32_to_cpu(sdma_hdr->header.ucode_array_offset_bytes);
+ break;
+ case AMDGPU_UCODE_ID_SDMA_UCODE_TH1:
+ ucode->ucode_size = le32_to_cpu(sdma_hdr->ctl_ucode_size_bytes);
+ ucode_addr = (u8 *)ucode->fw->data +
+ le32_to_cpu(sdma_hdr->ctl_ucode_offset);
+ break;
+ case AMDGPU_UCODE_ID_SDMA_RS64:
+ ucode->ucode_size = le32_to_cpu(sdmav3_hdr->ucode_size_bytes);
+ ucode_addr = (u8 *)ucode->fw->data +
+ le32_to_cpu(sdmav3_hdr->header.ucode_array_offset_bytes);
+ break;
+ case AMDGPU_UCODE_ID_CP_MEC1:
+ case AMDGPU_UCODE_ID_CP_MEC2:
+ ucode->ucode_size = le32_to_cpu(header->ucode_size_bytes) -
+ le32_to_cpu(cp_hdr->jt_size) * 4;
+ ucode_addr = (u8 *)ucode->fw->data +
+ le32_to_cpu(header->ucode_array_offset_bytes);
+ break;
+ case AMDGPU_UCODE_ID_CP_MEC1_JT:
+ case AMDGPU_UCODE_ID_CP_MEC2_JT:
+ ucode->ucode_size = le32_to_cpu(cp_hdr->jt_size) * 4;
+ ucode_addr = (u8 *)ucode->fw->data +
+ le32_to_cpu(header->ucode_array_offset_bytes) +
+ le32_to_cpu(cp_hdr->jt_offset) * 4;
+ break;
+ case AMDGPU_UCODE_ID_RLC_RESTORE_LIST_CNTL:
+ ucode->ucode_size = adev->gfx.rlc.save_restore_list_cntl_size_bytes;
+ ucode_addr = adev->gfx.rlc.save_restore_list_cntl;
+ break;
+ case AMDGPU_UCODE_ID_RLC_RESTORE_LIST_GPM_MEM:
+ ucode->ucode_size = adev->gfx.rlc.save_restore_list_gpm_size_bytes;
+ ucode_addr = adev->gfx.rlc.save_restore_list_gpm;
+ break;
+ case AMDGPU_UCODE_ID_RLC_RESTORE_LIST_SRM_MEM:
+ ucode->ucode_size = adev->gfx.rlc.save_restore_list_srm_size_bytes;
+ ucode_addr = adev->gfx.rlc.save_restore_list_srm;
+ break;
+ case AMDGPU_UCODE_ID_RLC_IRAM:
+ ucode->ucode_size = adev->gfx.rlc.rlc_iram_ucode_size_bytes;
+ ucode_addr = adev->gfx.rlc.rlc_iram_ucode;
+ break;
+ case AMDGPU_UCODE_ID_RLC_DRAM:
+ ucode->ucode_size = adev->gfx.rlc.rlc_dram_ucode_size_bytes;
+ ucode_addr = adev->gfx.rlc.rlc_dram_ucode;
+ break;
+ case AMDGPU_UCODE_ID_RLC_P:
+ ucode->ucode_size = adev->gfx.rlc.rlcp_ucode_size_bytes;
+ ucode_addr = adev->gfx.rlc.rlcp_ucode;
+ break;
+ case AMDGPU_UCODE_ID_RLC_V:
+ ucode->ucode_size = adev->gfx.rlc.rlcv_ucode_size_bytes;
+ ucode_addr = adev->gfx.rlc.rlcv_ucode;
+ break;
+ case AMDGPU_UCODE_ID_GLOBAL_TAP_DELAYS:
+ ucode->ucode_size = adev->gfx.rlc.global_tap_delays_ucode_size_bytes;
+ ucode_addr = adev->gfx.rlc.global_tap_delays_ucode;
+ break;
+ case AMDGPU_UCODE_ID_SE0_TAP_DELAYS:
+ ucode->ucode_size = adev->gfx.rlc.se0_tap_delays_ucode_size_bytes;
+ ucode_addr = adev->gfx.rlc.se0_tap_delays_ucode;
+ break;
+ case AMDGPU_UCODE_ID_SE1_TAP_DELAYS:
+ ucode->ucode_size = adev->gfx.rlc.se1_tap_delays_ucode_size_bytes;
+ ucode_addr = adev->gfx.rlc.se1_tap_delays_ucode;
+ break;
+ case AMDGPU_UCODE_ID_SE2_TAP_DELAYS:
+ ucode->ucode_size = adev->gfx.rlc.se2_tap_delays_ucode_size_bytes;
+ ucode_addr = adev->gfx.rlc.se2_tap_delays_ucode;
+ break;
+ case AMDGPU_UCODE_ID_SE3_TAP_DELAYS:
+ ucode->ucode_size = adev->gfx.rlc.se3_tap_delays_ucode_size_bytes;
+ ucode_addr = adev->gfx.rlc.se3_tap_delays_ucode;
+ break;
+ case AMDGPU_UCODE_ID_CP_MES:
+ ucode->ucode_size = le32_to_cpu(mes_hdr->mes_ucode_size_bytes);
+ ucode_addr = (u8 *)ucode->fw->data +
+ le32_to_cpu(mes_hdr->mes_ucode_offset_bytes);
+ break;
+ case AMDGPU_UCODE_ID_CP_MES_DATA:
+ ucode->ucode_size = le32_to_cpu(mes_hdr->mes_ucode_data_size_bytes);
+ ucode_addr = (u8 *)ucode->fw->data +
+ le32_to_cpu(mes_hdr->mes_ucode_data_offset_bytes);
+ break;
+ case AMDGPU_UCODE_ID_CP_MES1:
+ ucode->ucode_size = le32_to_cpu(mes_hdr->mes_ucode_size_bytes);
+ ucode_addr = (u8 *)ucode->fw->data +
+ le32_to_cpu(mes_hdr->mes_ucode_offset_bytes);
+ break;
+ case AMDGPU_UCODE_ID_CP_MES1_DATA:
+ ucode->ucode_size = le32_to_cpu(mes_hdr->mes_ucode_data_size_bytes);
+ ucode_addr = (u8 *)ucode->fw->data +
+ le32_to_cpu(mes_hdr->mes_ucode_data_offset_bytes);
+ break;
+ case AMDGPU_UCODE_ID_DMCU_ERAM:
+ ucode->ucode_size = le32_to_cpu(header->ucode_size_bytes) -
le32_to_cpu(dmcu_hdr->intv_size_bytes);
-
- memcpy(ucode->kaddr, (void *)((uint8_t *)ucode->fw->data +
- le32_to_cpu(header->ucode_array_offset_bytes)),
- ucode->ucode_size);
- } else if (ucode->ucode_id == AMDGPU_UCODE_ID_DMCU_INTV) {
- ucode->ucode_size = le32_to_cpu(dmcu_hdr->intv_size_bytes);
-
- memcpy(ucode->kaddr, (void *)((uint8_t *)ucode->fw->data +
- le32_to_cpu(header->ucode_array_offset_bytes) +
- le32_to_cpu(dmcu_hdr->intv_offset_bytes)),
- ucode->ucode_size);
- } else if (ucode->ucode_id == AMDGPU_UCODE_ID_DMCUB) {
- ucode->ucode_size = le32_to_cpu(dmcub_hdr->inst_const_bytes);
- memcpy(ucode->kaddr,
- (void *)((uint8_t *)ucode->fw->data +
- le32_to_cpu(header->ucode_array_offset_bytes)),
- ucode->ucode_size);
- } else if (ucode->ucode_id == AMDGPU_UCODE_ID_RLC_RESTORE_LIST_CNTL) {
- ucode->ucode_size = adev->gfx.rlc.save_restore_list_cntl_size_bytes;
- memcpy(ucode->kaddr, adev->gfx.rlc.save_restore_list_cntl,
- ucode->ucode_size);
- } else if (ucode->ucode_id == AMDGPU_UCODE_ID_RLC_RESTORE_LIST_GPM_MEM) {
- ucode->ucode_size = adev->gfx.rlc.save_restore_list_gpm_size_bytes;
- memcpy(ucode->kaddr, adev->gfx.rlc.save_restore_list_gpm,
- ucode->ucode_size);
- } else if (ucode->ucode_id == AMDGPU_UCODE_ID_RLC_RESTORE_LIST_SRM_MEM) {
- ucode->ucode_size = adev->gfx.rlc.save_restore_list_srm_size_bytes;
- memcpy(ucode->kaddr, adev->gfx.rlc.save_restore_list_srm,
- ucode->ucode_size);
- } else if (ucode->ucode_id == AMDGPU_UCODE_ID_RLC_IRAM) {
- ucode->ucode_size = adev->gfx.rlc.rlc_iram_ucode_size_bytes;
- memcpy(ucode->kaddr, adev->gfx.rlc.rlc_iram_ucode,
- ucode->ucode_size);
- } else if (ucode->ucode_id == AMDGPU_UCODE_ID_RLC_DRAM) {
- ucode->ucode_size = adev->gfx.rlc.rlc_dram_ucode_size_bytes;
- memcpy(ucode->kaddr, adev->gfx.rlc.rlc_dram_ucode,
- ucode->ucode_size);
- } else if (ucode->ucode_id == AMDGPU_UCODE_ID_CP_MES) {
- ucode->ucode_size = le32_to_cpu(mes_hdr->mes_ucode_size_bytes);
- memcpy(ucode->kaddr, (void *)((uint8_t *)adev->mes.fw->data +
- le32_to_cpu(mes_hdr->mes_ucode_offset_bytes)),
- ucode->ucode_size);
- } else if (ucode->ucode_id == AMDGPU_UCODE_ID_CP_MES_DATA) {
- ucode->ucode_size = le32_to_cpu(mes_hdr->mes_ucode_data_size_bytes);
- memcpy(ucode->kaddr, (void *)((uint8_t *)adev->mes.fw->data +
- le32_to_cpu(mes_hdr->mes_ucode_data_offset_bytes)),
- ucode->ucode_size);
+ ucode_addr = (u8 *)ucode->fw->data +
+ le32_to_cpu(header->ucode_array_offset_bytes);
+ break;
+ case AMDGPU_UCODE_ID_DMCU_INTV:
+ ucode->ucode_size = le32_to_cpu(dmcu_hdr->intv_size_bytes);
+ ucode_addr = (u8 *)ucode->fw->data +
+ le32_to_cpu(header->ucode_array_offset_bytes) +
+ le32_to_cpu(dmcu_hdr->intv_offset_bytes);
+ break;
+ case AMDGPU_UCODE_ID_DMCUB:
+ ucode->ucode_size = le32_to_cpu(dmcub_hdr->inst_const_bytes);
+ ucode_addr = (u8 *)ucode->fw->data +
+ le32_to_cpu(header->ucode_array_offset_bytes);
+ break;
+ case AMDGPU_UCODE_ID_PPTABLE:
+ ucode->ucode_size = ucode->fw->size;
+ ucode_addr = (u8 *)ucode->fw->data;
+ break;
+ case AMDGPU_UCODE_ID_P2S_TABLE:
+ ucode->ucode_size = ucode->fw->size;
+ ucode_addr = (u8 *)ucode->fw->data;
+ break;
+ case AMDGPU_UCODE_ID_IMU_I:
+ ucode->ucode_size = le32_to_cpu(imu_hdr->imu_iram_ucode_size_bytes);
+ ucode_addr = (u8 *)ucode->fw->data +
+ le32_to_cpu(imu_hdr->header.ucode_array_offset_bytes);
+ break;
+ case AMDGPU_UCODE_ID_IMU_D:
+ ucode->ucode_size = le32_to_cpu(imu_hdr->imu_dram_ucode_size_bytes);
+ ucode_addr = (u8 *)ucode->fw->data +
+ le32_to_cpu(imu_hdr->header.ucode_array_offset_bytes) +
+ le32_to_cpu(imu_hdr->imu_iram_ucode_size_bytes);
+ break;
+ case AMDGPU_UCODE_ID_CP_RS64_PFP:
+ ucode->ucode_size = le32_to_cpu(cpv2_hdr->ucode_size_bytes);
+ ucode_addr = (u8 *)ucode->fw->data +
+ le32_to_cpu(header->ucode_array_offset_bytes);
+ break;
+ case AMDGPU_UCODE_ID_CP_RS64_PFP_P0_STACK:
+ ucode->ucode_size = le32_to_cpu(cpv2_hdr->data_size_bytes);
+ ucode_addr = (u8 *)ucode->fw->data +
+ le32_to_cpu(cpv2_hdr->data_offset_bytes);
+ break;
+ case AMDGPU_UCODE_ID_CP_RS64_PFP_P1_STACK:
+ ucode->ucode_size = le32_to_cpu(cpv2_hdr->data_size_bytes);
+ ucode_addr = (u8 *)ucode->fw->data +
+ le32_to_cpu(cpv2_hdr->data_offset_bytes);
+ break;
+ case AMDGPU_UCODE_ID_CP_RS64_ME:
+ ucode->ucode_size = le32_to_cpu(cpv2_hdr->ucode_size_bytes);
+ ucode_addr = (u8 *)ucode->fw->data +
+ le32_to_cpu(header->ucode_array_offset_bytes);
+ break;
+ case AMDGPU_UCODE_ID_CP_RS64_ME_P0_STACK:
+ ucode->ucode_size = le32_to_cpu(cpv2_hdr->data_size_bytes);
+ ucode_addr = (u8 *)ucode->fw->data +
+ le32_to_cpu(cpv2_hdr->data_offset_bytes);
+ break;
+ case AMDGPU_UCODE_ID_CP_RS64_ME_P1_STACK:
+ ucode->ucode_size = le32_to_cpu(cpv2_hdr->data_size_bytes);
+ ucode_addr = (u8 *)ucode->fw->data +
+ le32_to_cpu(cpv2_hdr->data_offset_bytes);
+ break;
+ case AMDGPU_UCODE_ID_CP_RS64_MEC:
+ ucode->ucode_size = le32_to_cpu(cpv2_hdr->ucode_size_bytes);
+ ucode_addr = (u8 *)ucode->fw->data +
+ le32_to_cpu(header->ucode_array_offset_bytes);
+ break;
+ case AMDGPU_UCODE_ID_CP_RS64_MEC_P0_STACK:
+ ucode->ucode_size = le32_to_cpu(cpv2_hdr->data_size_bytes);
+ ucode_addr = (u8 *)ucode->fw->data +
+ le32_to_cpu(cpv2_hdr->data_offset_bytes);
+ break;
+ case AMDGPU_UCODE_ID_CP_RS64_MEC_P1_STACK:
+ ucode->ucode_size = le32_to_cpu(cpv2_hdr->data_size_bytes);
+ ucode_addr = (u8 *)ucode->fw->data +
+ le32_to_cpu(cpv2_hdr->data_offset_bytes);
+ break;
+ case AMDGPU_UCODE_ID_CP_RS64_MEC_P2_STACK:
+ ucode->ucode_size = le32_to_cpu(cpv2_hdr->data_size_bytes);
+ ucode_addr = (u8 *)ucode->fw->data +
+ le32_to_cpu(cpv2_hdr->data_offset_bytes);
+ break;
+ case AMDGPU_UCODE_ID_CP_RS64_MEC_P3_STACK:
+ ucode->ucode_size = le32_to_cpu(cpv2_hdr->data_size_bytes);
+ ucode_addr = (u8 *)ucode->fw->data +
+ le32_to_cpu(cpv2_hdr->data_offset_bytes);
+ break;
+ case AMDGPU_UCODE_ID_VPE_CTX:
+ ucode->ucode_size = le32_to_cpu(vpe_hdr->ctx_ucode_size_bytes);
+ ucode_addr = (u8 *)ucode->fw->data +
+ le32_to_cpu(vpe_hdr->header.ucode_array_offset_bytes);
+ break;
+ case AMDGPU_UCODE_ID_VPE_CTL:
+ ucode->ucode_size = le32_to_cpu(vpe_hdr->ctl_ucode_size_bytes);
+ ucode_addr = (u8 *)ucode->fw->data +
+ le32_to_cpu(vpe_hdr->ctl_ucode_offset);
+ break;
+ case AMDGPU_UCODE_ID_UMSCH_MM_UCODE:
+ ucode->ucode_size = le32_to_cpu(umsch_mm_hdr->umsch_mm_ucode_size_bytes);
+ ucode_addr = (u8 *)ucode->fw->data +
+ le32_to_cpu(umsch_mm_hdr->header.ucode_array_offset_bytes);
+ break;
+ case AMDGPU_UCODE_ID_UMSCH_MM_DATA:
+ ucode->ucode_size = le32_to_cpu(umsch_mm_hdr->umsch_mm_ucode_data_size_bytes);
+ ucode_addr = (u8 *)ucode->fw->data +
+ le32_to_cpu(umsch_mm_hdr->umsch_mm_ucode_data_offset_bytes);
+ break;
+ default:
+ ucode->ucode_size = le32_to_cpu(header->ucode_size_bytes);
+ ucode_addr = (u8 *)ucode->fw->data +
+ le32_to_cpu(header->ucode_array_offset_bytes);
+ break;
+ }
+ } else {
+ ucode->ucode_size = le32_to_cpu(header->ucode_size_bytes);
+ ucode_addr = (u8 *)ucode->fw->data +
+ le32_to_cpu(header->ucode_array_offset_bytes);
}
+ memcpy(ucode->kaddr, ucode_addr, ucode->ucode_size);
+
return 0;
}
@@ -600,7 +1095,7 @@ static int amdgpu_ucode_patch_jt(struct amdgpu_firmware_info *ucode,
uint8_t *src_addr = NULL;
uint8_t *dst_addr = NULL;
- if (NULL == ucode->fw)
+ if (!ucode->fw)
return 0;
comm_hdr = (const struct common_firmware_header *)ucode->fw->data;
@@ -618,9 +1113,11 @@ static int amdgpu_ucode_patch_jt(struct amdgpu_firmware_info *ucode,
int amdgpu_ucode_create_bo(struct amdgpu_device *adev)
{
- if (adev->firmware.load_type != AMDGPU_FW_LOAD_DIRECT) {
+ if ((adev->firmware.load_type != AMDGPU_FW_LOAD_DIRECT) &&
+ (adev->firmware.load_type != AMDGPU_FW_LOAD_RLC_BACKDOOR_AUTO)) {
amdgpu_bo_create_kernel(adev, adev->firmware.fw_size, PAGE_SIZE,
- amdgpu_sriov_vf(adev) ? AMDGPU_GEM_DOMAIN_VRAM : AMDGPU_GEM_DOMAIN_GTT,
+ (amdgpu_sriov_vf(adev) || adev->debug_use_vram_fw_buf) ?
+ AMDGPU_GEM_DOMAIN_VRAM : AMDGPU_GEM_DOMAIN_GTT,
&adev->firmware.fw_buf,
&adev->firmware.fw_buf_mc,
&adev->firmware.fw_buf_ptr);
@@ -636,8 +1133,7 @@ int amdgpu_ucode_create_bo(struct amdgpu_device *adev)
void amdgpu_ucode_free_bo(struct amdgpu_device *adev)
{
- if (adev->firmware.load_type != AMDGPU_FW_LOAD_DIRECT)
- amdgpu_bo_free_kernel(&adev->firmware.fw_buf,
+ amdgpu_bo_free_kernel(&adev->firmware.fw_buf,
&adev->firmware.fw_buf_mc,
&adev->firmware.fw_buf_ptr);
}
@@ -664,6 +1160,9 @@ int amdgpu_ucode_init_bo(struct amdgpu_device *adev)
adev->firmware.max_ucodes = AMDGPU_UCODE_ID_MAXIMUM;
}
+ if (amdgpu_virt_xgmi_migrate_enabled(adev) && adev->firmware.fw_buf)
+ adev->firmware.fw_buf_mc = amdgpu_bo_fb_aper_addr(adev->firmware.fw_buf);
+
for (i = 0; i < adev->firmware.max_ucodes; i++) {
ucode = &adev->firmware.ucode[i];
if (ucode->fw) {
@@ -672,6 +1171,7 @@ int amdgpu_ucode_init_bo(struct amdgpu_device *adev)
if (i == AMDGPU_UCODE_ID_CP_MEC1 &&
adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) {
const struct gfx_firmware_header_v1_0 *cp_hdr;
+
cp_hdr = (const struct gfx_firmware_header_v1_0 *)ucode->fw->data;
amdgpu_ucode_patch_jt(ucode, adev->firmware.fw_buf_mc + fw_offset,
adev->firmware.fw_buf_ptr + fw_offset);
@@ -682,3 +1182,335 @@ int amdgpu_ucode_init_bo(struct amdgpu_device *adev)
}
return 0;
}
+
+static const char *amdgpu_ucode_legacy_naming(struct amdgpu_device *adev, int block_type)
+{
+ if (block_type == MP0_HWIP) {
+ switch (amdgpu_ip_version(adev, MP0_HWIP, 0)) {
+ case IP_VERSION(9, 0, 0):
+ switch (adev->asic_type) {
+ case CHIP_VEGA10:
+ return "vega10";
+ case CHIP_VEGA12:
+ return "vega12";
+ default:
+ return NULL;
+ }
+ case IP_VERSION(10, 0, 0):
+ case IP_VERSION(10, 0, 1):
+ if (adev->asic_type == CHIP_RAVEN) {
+ if (adev->apu_flags & AMD_APU_IS_RAVEN2)
+ return "raven2";
+ else if (adev->apu_flags & AMD_APU_IS_PICASSO)
+ return "picasso";
+ return "raven";
+ }
+ break;
+ case IP_VERSION(11, 0, 0):
+ return "navi10";
+ case IP_VERSION(11, 0, 2):
+ return "vega20";
+ case IP_VERSION(11, 0, 3):
+ return "renoir";
+ case IP_VERSION(11, 0, 4):
+ return "arcturus";
+ case IP_VERSION(11, 0, 5):
+ return "navi14";
+ case IP_VERSION(11, 0, 7):
+ return "sienna_cichlid";
+ case IP_VERSION(11, 0, 9):
+ return "navi12";
+ case IP_VERSION(11, 0, 11):
+ return "navy_flounder";
+ case IP_VERSION(11, 0, 12):
+ return "dimgrey_cavefish";
+ case IP_VERSION(11, 0, 13):
+ return "beige_goby";
+ case IP_VERSION(11, 5, 0):
+ case IP_VERSION(11, 5, 2):
+ return "vangogh";
+ case IP_VERSION(12, 0, 1):
+ return "green_sardine";
+ case IP_VERSION(13, 0, 2):
+ return "aldebaran";
+ case IP_VERSION(13, 0, 1):
+ case IP_VERSION(13, 0, 3):
+ return "yellow_carp";
+ }
+ } else if (block_type == MP1_HWIP) {
+ switch (amdgpu_ip_version(adev, MP1_HWIP, 0)) {
+ case IP_VERSION(9, 0, 0):
+ case IP_VERSION(10, 0, 0):
+ case IP_VERSION(10, 0, 1):
+ case IP_VERSION(11, 0, 2):
+ if (adev->asic_type == CHIP_ARCTURUS)
+ return "arcturus_smc";
+ return NULL;
+ case IP_VERSION(11, 0, 0):
+ return "navi10_smc";
+ case IP_VERSION(11, 0, 5):
+ return "navi14_smc";
+ case IP_VERSION(11, 0, 9):
+ return "navi12_smc";
+ case IP_VERSION(11, 0, 7):
+ return "sienna_cichlid_smc";
+ case IP_VERSION(11, 0, 11):
+ return "navy_flounder_smc";
+ case IP_VERSION(11, 0, 12):
+ return "dimgrey_cavefish_smc";
+ case IP_VERSION(11, 0, 13):
+ return "beige_goby_smc";
+ case IP_VERSION(13, 0, 2):
+ return "aldebaran_smc";
+ }
+ } else if (block_type == SDMA0_HWIP) {
+ switch (amdgpu_ip_version(adev, SDMA0_HWIP, 0)) {
+ case IP_VERSION(4, 0, 0):
+ return "vega10_sdma";
+ case IP_VERSION(4, 0, 1):
+ return "vega12_sdma";
+ case IP_VERSION(4, 1, 0):
+ case IP_VERSION(4, 1, 1):
+ if (adev->apu_flags & AMD_APU_IS_RAVEN2)
+ return "raven2_sdma";
+ else if (adev->apu_flags & AMD_APU_IS_PICASSO)
+ return "picasso_sdma";
+ return "raven_sdma";
+ case IP_VERSION(4, 1, 2):
+ if (adev->apu_flags & AMD_APU_IS_RENOIR)
+ return "renoir_sdma";
+ return "green_sardine_sdma";
+ case IP_VERSION(4, 2, 0):
+ return "vega20_sdma";
+ case IP_VERSION(4, 2, 2):
+ return "arcturus_sdma";
+ case IP_VERSION(4, 4, 0):
+ return "aldebaran_sdma";
+ case IP_VERSION(5, 0, 0):
+ return "navi10_sdma";
+ case IP_VERSION(5, 0, 1):
+ return "cyan_skillfish2_sdma";
+ case IP_VERSION(5, 0, 2):
+ return "navi14_sdma";
+ case IP_VERSION(5, 0, 5):
+ return "navi12_sdma";
+ case IP_VERSION(5, 2, 0):
+ return "sienna_cichlid_sdma";
+ case IP_VERSION(5, 2, 2):
+ return "navy_flounder_sdma";
+ case IP_VERSION(5, 2, 4):
+ return "dimgrey_cavefish_sdma";
+ case IP_VERSION(5, 2, 5):
+ return "beige_goby_sdma";
+ case IP_VERSION(5, 2, 3):
+ return "yellow_carp_sdma";
+ case IP_VERSION(5, 2, 1):
+ return "vangogh_sdma";
+ }
+ } else if (block_type == UVD_HWIP) {
+ switch (amdgpu_ip_version(adev, UVD_HWIP, 0)) {
+ case IP_VERSION(1, 0, 0):
+ case IP_VERSION(1, 0, 1):
+ if (adev->apu_flags & AMD_APU_IS_RAVEN2)
+ return "raven2_vcn";
+ else if (adev->apu_flags & AMD_APU_IS_PICASSO)
+ return "picasso_vcn";
+ return "raven_vcn";
+ case IP_VERSION(2, 5, 0):
+ return "arcturus_vcn";
+ case IP_VERSION(2, 2, 0):
+ if (adev->apu_flags & AMD_APU_IS_RENOIR)
+ return "renoir_vcn";
+ return "green_sardine_vcn";
+ case IP_VERSION(2, 6, 0):
+ return "aldebaran_vcn";
+ case IP_VERSION(2, 0, 0):
+ return "navi10_vcn";
+ case IP_VERSION(2, 0, 2):
+ if (adev->asic_type == CHIP_NAVI12)
+ return "navi12_vcn";
+ return "navi14_vcn";
+ case IP_VERSION(3, 0, 0):
+ case IP_VERSION(3, 0, 64):
+ case IP_VERSION(3, 0, 192):
+ if (amdgpu_ip_version(adev, GC_HWIP, 0) ==
+ IP_VERSION(10, 3, 0))
+ return "sienna_cichlid_vcn";
+ return "navy_flounder_vcn";
+ case IP_VERSION(3, 0, 2):
+ return "vangogh_vcn";
+ case IP_VERSION(3, 0, 16):
+ return "dimgrey_cavefish_vcn";
+ case IP_VERSION(3, 0, 33):
+ return "beige_goby_vcn";
+ case IP_VERSION(3, 1, 1):
+ return "yellow_carp_vcn";
+ }
+ } else if (block_type == GC_HWIP) {
+ switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
+ case IP_VERSION(9, 0, 1):
+ return "vega10";
+ case IP_VERSION(9, 2, 1):
+ return "vega12";
+ case IP_VERSION(9, 4, 0):
+ return "vega20";
+ case IP_VERSION(9, 2, 2):
+ case IP_VERSION(9, 1, 0):
+ if (adev->apu_flags & AMD_APU_IS_RAVEN2)
+ return "raven2";
+ else if (adev->apu_flags & AMD_APU_IS_PICASSO)
+ return "picasso";
+ return "raven";
+ case IP_VERSION(9, 4, 1):
+ return "arcturus";
+ case IP_VERSION(9, 3, 0):
+ if (adev->apu_flags & AMD_APU_IS_RENOIR)
+ return "renoir";
+ return "green_sardine";
+ case IP_VERSION(9, 4, 2):
+ return "aldebaran";
+ case IP_VERSION(10, 1, 10):
+ return "navi10";
+ case IP_VERSION(10, 1, 1):
+ return "navi14";
+ case IP_VERSION(10, 1, 2):
+ return "navi12";
+ case IP_VERSION(10, 3, 0):
+ return "sienna_cichlid";
+ case IP_VERSION(10, 3, 2):
+ return "navy_flounder";
+ case IP_VERSION(10, 3, 1):
+ return "vangogh";
+ case IP_VERSION(10, 3, 4):
+ return "dimgrey_cavefish";
+ case IP_VERSION(10, 3, 5):
+ return "beige_goby";
+ case IP_VERSION(10, 3, 3):
+ return "yellow_carp";
+ case IP_VERSION(10, 1, 3):
+ case IP_VERSION(10, 1, 4):
+ return "cyan_skillfish2";
+ }
+ }
+ return NULL;
+}
+
+bool amdgpu_is_kicker_fw(struct amdgpu_device *adev)
+{
+ int i;
+
+ for (i = 0; i < ARRAY_SIZE(kicker_device_list); i++) {
+ if (adev->pdev->device == kicker_device_list[i].device &&
+ adev->pdev->revision == kicker_device_list[i].revision)
+ return true;
+ }
+
+ return false;
+}
+
+void amdgpu_ucode_ip_version_decode(struct amdgpu_device *adev, int block_type, char *ucode_prefix, int len)
+{
+ int maj, min, rev;
+ char *ip_name;
+ const char *legacy;
+ uint32_t version = amdgpu_ip_version(adev, block_type, 0);
+
+ legacy = amdgpu_ucode_legacy_naming(adev, block_type);
+ if (legacy) {
+ snprintf(ucode_prefix, len, "%s", legacy);
+ return;
+ }
+
+ switch (block_type) {
+ case GC_HWIP:
+ ip_name = "gc";
+ break;
+ case SDMA0_HWIP:
+ ip_name = "sdma";
+ break;
+ case MP0_HWIP:
+ ip_name = "psp";
+ break;
+ case MP1_HWIP:
+ ip_name = "smu";
+ break;
+ case UVD_HWIP:
+ ip_name = "vcn";
+ break;
+ case VPE_HWIP:
+ ip_name = "vpe";
+ break;
+ case ISP_HWIP:
+ ip_name = "isp";
+ break;
+ default:
+ BUG();
+ }
+
+ maj = IP_VERSION_MAJ(version);
+ min = IP_VERSION_MIN(version);
+ rev = IP_VERSION_REV(version);
+
+ snprintf(ucode_prefix, len, "%s_%d_%d_%d", ip_name, maj, min, rev);
+}
+
+/*
+ * amdgpu_ucode_request - Fetch and validate amdgpu microcode
+ *
+ * @adev: amdgpu device
+ * @fw: pointer to load firmware to
+ * @required: whether the firmware is required
+ * @fmt: firmware name format string
+ * @...: variable arguments
+ *
+ * This is a helper that will use request_firmware and amdgpu_ucode_validate
+ * to load and run basic validation on firmware. If the load fails, remap
+ * the error code to -ENODEV, so that early_init functions will fail to load.
+ */
+int amdgpu_ucode_request(struct amdgpu_device *adev, const struct firmware **fw,
+ enum amdgpu_ucode_required required, const char *fmt, ...)
+{
+ char fname[AMDGPU_UCODE_NAME_MAX];
+ va_list ap;
+ int r;
+
+ va_start(ap, fmt);
+ r = vsnprintf(fname, sizeof(fname), fmt, ap);
+ va_end(ap);
+ if (r == sizeof(fname)) {
+ dev_warn(adev->dev, "amdgpu firmware name buffer overflow\n");
+ return -EOVERFLOW;
+ }
+
+ if (required == AMDGPU_UCODE_REQUIRED)
+ r = request_firmware(fw, fname, adev->dev);
+ else {
+ r = firmware_request_nowarn(fw, fname, adev->dev);
+ if (r)
+ drm_info(&adev->ddev, "Optional firmware \"%s\" was not found\n", fname);
+ }
+ if (r)
+ return -ENODEV;
+
+ r = amdgpu_ucode_validate(*fw);
+ if (r)
+ /*
+ * The amdgpu_ucode_request() should be paired with amdgpu_ucode_release()
+ * regardless of success/failure, and the amdgpu_ucode_release() takes care of
+ * firmware release and need to avoid redundant release FW operation here.
+ */
+ dev_dbg(adev->dev, "\"%s\" failed to validate\n", fname);
+
+ return r;
+}
+
+/*
+ * amdgpu_ucode_release - Release firmware microcode
+ *
+ * @fw: pointer to firmware to release
+ */
+void amdgpu_ucode_release(const struct firmware **fw)
+{
+ release_firmware(*fw);
+ *fw = NULL;
+}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.h
index 46449e70348b..6349aad6da35 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.h
@@ -25,6 +25,8 @@
#include "amdgpu_socbb.h"
+#define RS64_FW_UC_START_ADDR_LO 0x3000
+
struct common_firmware_header {
uint32_t size_bytes; /* size of the entire header+image(s) in bytes */
uint32_t header_size_bytes; /* size of just the header in bytes */
@@ -71,60 +73,88 @@ struct smc_firmware_header_v2_1 {
uint32_t pptable_entry_offset;
};
+struct psp_fw_legacy_bin_desc {
+ uint32_t fw_version;
+ uint32_t offset_bytes;
+ uint32_t size_bytes;
+};
+
/* version_major=1, version_minor=0 */
struct psp_firmware_header_v1_0 {
struct common_firmware_header header;
- uint32_t ucode_feature_version;
- uint32_t sos_offset_bytes;
- uint32_t sos_size_bytes;
+ struct psp_fw_legacy_bin_desc sos;
};
/* version_major=1, version_minor=1 */
struct psp_firmware_header_v1_1 {
struct psp_firmware_header_v1_0 v1_0;
- uint32_t toc_header_version;
- uint32_t toc_offset_bytes;
- uint32_t toc_size_bytes;
- uint32_t kdb_header_version;
- uint32_t kdb_offset_bytes;
- uint32_t kdb_size_bytes;
+ struct psp_fw_legacy_bin_desc toc;
+ struct psp_fw_legacy_bin_desc kdb;
};
/* version_major=1, version_minor=2 */
struct psp_firmware_header_v1_2 {
struct psp_firmware_header_v1_0 v1_0;
- uint32_t reserve[3];
- uint32_t kdb_header_version;
- uint32_t kdb_offset_bytes;
- uint32_t kdb_size_bytes;
+ struct psp_fw_legacy_bin_desc res;
+ struct psp_fw_legacy_bin_desc kdb;
};
/* version_major=1, version_minor=3 */
struct psp_firmware_header_v1_3 {
struct psp_firmware_header_v1_1 v1_1;
- uint32_t spl_header_version;
- uint32_t spl_offset_bytes;
- uint32_t spl_size_bytes;
+ struct psp_fw_legacy_bin_desc spl;
+ struct psp_fw_legacy_bin_desc rl;
+ struct psp_fw_legacy_bin_desc sys_drv_aux;
+ struct psp_fw_legacy_bin_desc sos_aux;
+};
+
+struct psp_fw_bin_desc {
+ uint32_t fw_type;
+ uint32_t fw_version;
+ uint32_t offset_bytes;
+ uint32_t size_bytes;
+};
+
+enum psp_fw_type {
+ PSP_FW_TYPE_UNKOWN,
+ PSP_FW_TYPE_PSP_SOS,
+ PSP_FW_TYPE_PSP_SYS_DRV,
+ PSP_FW_TYPE_PSP_KDB,
+ PSP_FW_TYPE_PSP_TOC,
+ PSP_FW_TYPE_PSP_SPL,
+ PSP_FW_TYPE_PSP_RL,
+ PSP_FW_TYPE_PSP_SOC_DRV,
+ PSP_FW_TYPE_PSP_INTF_DRV,
+ PSP_FW_TYPE_PSP_DBG_DRV,
+ PSP_FW_TYPE_PSP_RAS_DRV,
+ PSP_FW_TYPE_PSP_IPKEYMGR_DRV,
+ PSP_FW_TYPE_PSP_SPDM_DRV,
+ PSP_FW_TYPE_MAX_INDEX,
+};
+
+/* version_major=2, version_minor=0 */
+struct psp_firmware_header_v2_0 {
+ struct common_firmware_header header;
+ uint32_t psp_fw_bin_count;
+ struct psp_fw_bin_desc psp_fw_bin[];
+};
+
+/* version_major=2, version_minor=1 */
+struct psp_firmware_header_v2_1 {
+ struct common_firmware_header header;
+ uint32_t psp_fw_bin_count;
+ uint32_t psp_aux_fw_bin_index;
+ struct psp_fw_bin_desc psp_fw_bin[];
};
/* version_major=1, version_minor=0 */
struct ta_firmware_header_v1_0 {
struct common_firmware_header header;
- uint32_t ta_xgmi_ucode_version;
- uint32_t ta_xgmi_offset_bytes;
- uint32_t ta_xgmi_size_bytes;
- uint32_t ta_ras_ucode_version;
- uint32_t ta_ras_offset_bytes;
- uint32_t ta_ras_size_bytes;
- uint32_t ta_hdcp_ucode_version;
- uint32_t ta_hdcp_offset_bytes;
- uint32_t ta_hdcp_size_bytes;
- uint32_t ta_dtm_ucode_version;
- uint32_t ta_dtm_offset_bytes;
- uint32_t ta_dtm_size_bytes;
- uint32_t ta_securedisplay_ucode_version;
- uint32_t ta_securedisplay_offset_bytes;
- uint32_t ta_securedisplay_size_bytes;
+ struct psp_fw_legacy_bin_desc xgmi;
+ struct psp_fw_legacy_bin_desc ras;
+ struct psp_fw_legacy_bin_desc hdcp;
+ struct psp_fw_legacy_bin_desc dtm;
+ struct psp_fw_legacy_bin_desc securedisplay;
};
enum ta_fw_type {
@@ -136,20 +166,15 @@ enum ta_fw_type {
TA_FW_TYPE_PSP_DTM,
TA_FW_TYPE_PSP_RAP,
TA_FW_TYPE_PSP_SECUREDISPLAY,
-};
-
-struct ta_fw_bin_desc {
- uint32_t fw_type;
- uint32_t fw_version;
- uint32_t offset_bytes;
- uint32_t size_bytes;
+ TA_FW_TYPE_PSP_XGMI_AUX,
+ TA_FW_TYPE_MAX_INDEX,
};
/* version_major=2, version_minor=0 */
struct ta_firmware_header_v2_0 {
struct common_firmware_header header;
uint32_t ta_fw_bin_count;
- struct ta_fw_bin_desc ta_fw_bin[];
+ struct psp_fw_bin_desc ta_fw_bin[];
};
/* version_major=1, version_minor=0 */
@@ -160,6 +185,18 @@ struct gfx_firmware_header_v1_0 {
uint32_t jt_size; /* size of jt */
};
+/* version_major=2, version_minor=0 */
+struct gfx_firmware_header_v2_0 {
+ struct common_firmware_header header;
+ uint32_t ucode_feature_version;
+ uint32_t ucode_size_bytes;
+ uint32_t ucode_offset_bytes;
+ uint32_t data_size_bytes;
+ uint32_t data_offset_bytes;
+ uint32_t ucode_start_addr_lo;
+ uint32_t ucode_start_addr_hi;
+};
+
/* version_major=1, version_minor=0 */
struct mes_firmware_header_v1_0 {
struct common_firmware_header header;
@@ -226,7 +263,7 @@ struct rlc_firmware_header_v2_1 {
uint32_t save_restore_list_srm_offset_bytes;
};
-/* version_major=2, version_minor=1 */
+/* version_major=2, version_minor=2 */
struct rlc_firmware_header_v2_2 {
struct rlc_firmware_header_v2_1 v2_1;
uint32_t rlc_iram_ucode_size_bytes;
@@ -235,6 +272,34 @@ struct rlc_firmware_header_v2_2 {
uint32_t rlc_dram_ucode_offset_bytes;
};
+/* version_major=2, version_minor=3 */
+struct rlc_firmware_header_v2_3 {
+ struct rlc_firmware_header_v2_2 v2_2;
+ uint32_t rlcp_ucode_version;
+ uint32_t rlcp_ucode_feature_version;
+ uint32_t rlcp_ucode_size_bytes;
+ uint32_t rlcp_ucode_offset_bytes;
+ uint32_t rlcv_ucode_version;
+ uint32_t rlcv_ucode_feature_version;
+ uint32_t rlcv_ucode_size_bytes;
+ uint32_t rlcv_ucode_offset_bytes;
+};
+
+/* version_major=2, version_minor=4 */
+struct rlc_firmware_header_v2_4 {
+ struct rlc_firmware_header_v2_3 v2_3;
+ uint32_t global_tap_delays_ucode_size_bytes;
+ uint32_t global_tap_delays_ucode_offset_bytes;
+ uint32_t se0_tap_delays_ucode_size_bytes;
+ uint32_t se0_tap_delays_ucode_offset_bytes;
+ uint32_t se1_tap_delays_ucode_size_bytes;
+ uint32_t se1_tap_delays_ucode_offset_bytes;
+ uint32_t se2_tap_delays_ucode_size_bytes;
+ uint32_t se2_tap_delays_ucode_offset_bytes;
+ uint32_t se3_tap_delays_ucode_size_bytes;
+ uint32_t se3_tap_delays_ucode_offset_bytes;
+};
+
/* version_major=1, version_minor=0 */
struct sdma_firmware_header_v1_0 {
struct common_firmware_header header;
@@ -250,6 +315,57 @@ struct sdma_firmware_header_v1_1 {
uint32_t digest_size;
};
+/* version_major=2, version_minor=0 */
+struct sdma_firmware_header_v2_0 {
+ struct common_firmware_header header;
+ uint32_t ucode_feature_version;
+ uint32_t ctx_ucode_size_bytes; /* context thread ucode size */
+ uint32_t ctx_jt_offset; /* context thread jt location */
+ uint32_t ctx_jt_size; /* context thread size of jt */
+ uint32_t ctl_ucode_offset;
+ uint32_t ctl_ucode_size_bytes; /* control thread ucode size */
+ uint32_t ctl_jt_offset; /* control thread jt location */
+ uint32_t ctl_jt_size; /* control thread size of jt */
+};
+
+/* version_major=1, version_minor=0 */
+struct vpe_firmware_header_v1_0 {
+ struct common_firmware_header header;
+ uint32_t ucode_feature_version;
+ uint32_t ctx_ucode_size_bytes; /* context thread ucode size */
+ uint32_t ctx_jt_offset; /* context thread jt location */
+ uint32_t ctx_jt_size; /* context thread size of jt */
+ uint32_t ctl_ucode_offset;
+ uint32_t ctl_ucode_size_bytes; /* control thread ucode size */
+ uint32_t ctl_jt_offset; /* control thread jt location */
+ uint32_t ctl_jt_size; /* control thread size of jt */
+};
+
+/* version_major=1, version_minor=0 */
+struct umsch_mm_firmware_header_v1_0 {
+ struct common_firmware_header header;
+ uint32_t umsch_mm_ucode_version;
+ uint32_t umsch_mm_ucode_size_bytes;
+ uint32_t umsch_mm_ucode_offset_bytes;
+ uint32_t umsch_mm_ucode_data_version;
+ uint32_t umsch_mm_ucode_data_size_bytes;
+ uint32_t umsch_mm_ucode_data_offset_bytes;
+ uint32_t umsch_mm_irq_start_addr_lo;
+ uint32_t umsch_mm_irq_start_addr_hi;
+ uint32_t umsch_mm_uc_start_addr_lo;
+ uint32_t umsch_mm_uc_start_addr_hi;
+ uint32_t umsch_mm_data_start_addr_lo;
+ uint32_t umsch_mm_data_start_addr_hi;
+};
+
+/* version_major=3, version_minor=0 */
+struct sdma_firmware_header_v3_0 {
+ struct common_firmware_header header;
+ uint32_t ucode_feature_version;
+ uint32_t ucode_offset_bytes;
+ uint32_t ucode_size_bytes;
+};
+
/* gpu info payload */
struct gpu_info_firmware_v1_0 {
uint32_t gc_num_se;
@@ -303,6 +419,15 @@ struct dmcub_firmware_header_v1_0 {
uint32_t bss_data_bytes; /* size of bss/data region, in bytes */
};
+/* version_major=1, version_minor=0 */
+struct imu_firmware_header_v1_0 {
+ struct common_firmware_header header;
+ uint32_t imu_iram_ucode_size_bytes;
+ uint32_t imu_iram_ucode_offset_bytes;
+ uint32_t imu_dram_ucode_size_bytes;
+ uint32_t imu_dram_ucode_offset_bytes;
+};
+
/* header is fixed size */
union amdgpu_firmware_header {
struct common_firmware_header common;
@@ -312,27 +437,37 @@ union amdgpu_firmware_header {
struct psp_firmware_header_v1_0 psp;
struct psp_firmware_header_v1_1 psp_v1_1;
struct psp_firmware_header_v1_3 psp_v1_3;
+ struct psp_firmware_header_v2_0 psp_v2_0;
+ struct psp_firmware_header_v2_0 psp_v2_1;
struct ta_firmware_header_v1_0 ta;
struct ta_firmware_header_v2_0 ta_v2_0;
struct gfx_firmware_header_v1_0 gfx;
+ struct gfx_firmware_header_v2_0 gfx_v2_0;
struct rlc_firmware_header_v1_0 rlc;
struct rlc_firmware_header_v2_0 rlc_v2_0;
struct rlc_firmware_header_v2_1 rlc_v2_1;
+ struct rlc_firmware_header_v2_2 rlc_v2_2;
+ struct rlc_firmware_header_v2_3 rlc_v2_3;
+ struct rlc_firmware_header_v2_4 rlc_v2_4;
struct sdma_firmware_header_v1_0 sdma;
struct sdma_firmware_header_v1_1 sdma_v1_1;
+ struct sdma_firmware_header_v2_0 sdma_v2_0;
+ struct sdma_firmware_header_v3_0 sdma_v3_0;
struct gpu_info_firmware_header_v1_0 gpu_info;
struct dmcu_firmware_header_v1_0 dmcu;
struct dmcub_firmware_header_v1_0 dmcub;
+ struct imu_firmware_header_v1_0 imu;
uint8_t raw[0x100];
};
-#define UCODE_MAX_TA_PACKAGING ((sizeof(union amdgpu_firmware_header) - sizeof(struct common_firmware_header) - 4) / sizeof(struct ta_fw_bin_desc))
+#define UCODE_MAX_PSP_PACKAGING (((sizeof(union amdgpu_firmware_header) - sizeof(struct common_firmware_header) - 4) / sizeof(struct psp_fw_bin_desc)) * 2)
/*
* fw loading support
*/
enum AMDGPU_UCODE_ID {
- AMDGPU_UCODE_ID_SDMA0 = 0,
+ AMDGPU_UCODE_ID_CAP = 0,
+ AMDGPU_UCODE_ID_SDMA0,
AMDGPU_UCODE_ID_SDMA1,
AMDGPU_UCODE_ID_SDMA2,
AMDGPU_UCODE_ID_SDMA3,
@@ -340,23 +475,49 @@ enum AMDGPU_UCODE_ID {
AMDGPU_UCODE_ID_SDMA5,
AMDGPU_UCODE_ID_SDMA6,
AMDGPU_UCODE_ID_SDMA7,
+ AMDGPU_UCODE_ID_SDMA_UCODE_TH0,
+ AMDGPU_UCODE_ID_SDMA_UCODE_TH1,
+ AMDGPU_UCODE_ID_SDMA_RS64,
AMDGPU_UCODE_ID_CP_CE,
AMDGPU_UCODE_ID_CP_PFP,
AMDGPU_UCODE_ID_CP_ME,
+ AMDGPU_UCODE_ID_CP_RS64_PFP,
+ AMDGPU_UCODE_ID_CP_RS64_ME,
+ AMDGPU_UCODE_ID_CP_RS64_MEC,
+ AMDGPU_UCODE_ID_CP_RS64_PFP_P0_STACK,
+ AMDGPU_UCODE_ID_CP_RS64_PFP_P1_STACK,
+ AMDGPU_UCODE_ID_CP_RS64_ME_P0_STACK,
+ AMDGPU_UCODE_ID_CP_RS64_ME_P1_STACK,
+ AMDGPU_UCODE_ID_CP_RS64_MEC_P0_STACK,
+ AMDGPU_UCODE_ID_CP_RS64_MEC_P1_STACK,
+ AMDGPU_UCODE_ID_CP_RS64_MEC_P2_STACK,
+ AMDGPU_UCODE_ID_CP_RS64_MEC_P3_STACK,
AMDGPU_UCODE_ID_CP_MEC1,
AMDGPU_UCODE_ID_CP_MEC1_JT,
AMDGPU_UCODE_ID_CP_MEC2,
AMDGPU_UCODE_ID_CP_MEC2_JT,
AMDGPU_UCODE_ID_CP_MES,
AMDGPU_UCODE_ID_CP_MES_DATA,
+ AMDGPU_UCODE_ID_CP_MES1,
+ AMDGPU_UCODE_ID_CP_MES1_DATA,
+ AMDGPU_UCODE_ID_IMU_I,
+ AMDGPU_UCODE_ID_IMU_D,
+ AMDGPU_UCODE_ID_GLOBAL_TAP_DELAYS,
+ AMDGPU_UCODE_ID_SE0_TAP_DELAYS,
+ AMDGPU_UCODE_ID_SE1_TAP_DELAYS,
+ AMDGPU_UCODE_ID_SE2_TAP_DELAYS,
+ AMDGPU_UCODE_ID_SE3_TAP_DELAYS,
AMDGPU_UCODE_ID_RLC_RESTORE_LIST_CNTL,
AMDGPU_UCODE_ID_RLC_RESTORE_LIST_GPM_MEM,
AMDGPU_UCODE_ID_RLC_RESTORE_LIST_SRM_MEM,
AMDGPU_UCODE_ID_RLC_IRAM,
AMDGPU_UCODE_ID_RLC_DRAM,
+ AMDGPU_UCODE_ID_RLC_P,
+ AMDGPU_UCODE_ID_RLC_V,
AMDGPU_UCODE_ID_RLC_G,
AMDGPU_UCODE_ID_STORAGE,
AMDGPU_UCODE_ID_SMC,
+ AMDGPU_UCODE_ID_PPTABLE,
AMDGPU_UCODE_ID_UVD,
AMDGPU_UCODE_ID_UVD1,
AMDGPU_UCODE_ID_VCE,
@@ -367,6 +528,15 @@ enum AMDGPU_UCODE_ID {
AMDGPU_UCODE_ID_VCN0_RAM,
AMDGPU_UCODE_ID_VCN1_RAM,
AMDGPU_UCODE_ID_DMCUB,
+ AMDGPU_UCODE_ID_VPE_CTX,
+ AMDGPU_UCODE_ID_VPE_CTL,
+ AMDGPU_UCODE_ID_VPE,
+ AMDGPU_UCODE_ID_UMSCH_MM_UCODE,
+ AMDGPU_UCODE_ID_UMSCH_MM_DATA,
+ AMDGPU_UCODE_ID_UMSCH_MM_CMD_BUFFER,
+ AMDGPU_UCODE_ID_P2S_TABLE,
+ AMDGPU_UCODE_ID_JPEG_RAM,
+ AMDGPU_UCODE_ID_ISP,
AMDGPU_UCODE_ID_MAXIMUM,
};
@@ -379,11 +549,16 @@ enum AMDGPU_UCODE_STATUS {
enum amdgpu_firmware_load_type {
AMDGPU_FW_LOAD_DIRECT = 0,
- AMDGPU_FW_LOAD_SMU,
AMDGPU_FW_LOAD_PSP,
+ AMDGPU_FW_LOAD_SMU,
AMDGPU_FW_LOAD_RLC_BACKDOOR_AUTO,
};
+enum amdgpu_ucode_required {
+ AMDGPU_UCODE_OPTIONAL,
+ AMDGPU_UCODE_REQUIRED,
+};
+
/* conform to smu_ucode_xfer_cz.h */
#define AMDGPU_SDMA0_UCODE_LOADED 0x00000001
#define AMDGPU_SDMA1_UCODE_LOADED 0x00000002
@@ -427,16 +602,26 @@ struct amdgpu_firmware {
void *fw_buf_ptr;
uint64_t fw_buf_mc;
+ uint32_t pldm_version;
+};
+
+struct kicker_device{
+ unsigned short device;
+ u8 revision;
};
void amdgpu_ucode_print_mc_hdr(const struct common_firmware_header *hdr);
void amdgpu_ucode_print_smc_hdr(const struct common_firmware_header *hdr);
+void amdgpu_ucode_print_imu_hdr(const struct common_firmware_header *hdr);
void amdgpu_ucode_print_gfx_hdr(const struct common_firmware_header *hdr);
void amdgpu_ucode_print_rlc_hdr(const struct common_firmware_header *hdr);
void amdgpu_ucode_print_sdma_hdr(const struct common_firmware_header *hdr);
void amdgpu_ucode_print_psp_hdr(const struct common_firmware_header *hdr);
void amdgpu_ucode_print_gpu_info_hdr(const struct common_firmware_header *hdr);
-int amdgpu_ucode_validate(const struct firmware *fw);
+__printf(4, 5)
+int amdgpu_ucode_request(struct amdgpu_device *adev, const struct firmware **fw,
+ enum amdgpu_ucode_required required, const char *fmt, ...);
+void amdgpu_ucode_release(const struct firmware **fw);
bool amdgpu_ucode_hdr_version(union amdgpu_firmware_header *hdr,
uint16_t hdr_major, uint16_t hdr_minor);
@@ -449,4 +634,9 @@ void amdgpu_ucode_sysfs_fini(struct amdgpu_device *adev);
enum amdgpu_firmware_load_type
amdgpu_ucode_get_load_type(struct amdgpu_device *adev, int load_type);
+const char *amdgpu_ucode_name(enum AMDGPU_UCODE_ID ucode_id);
+
+void amdgpu_ucode_ip_version_decode(struct amdgpu_device *adev, int block_type, char *ucode_prefix, int len);
+bool amdgpu_is_kicker_fw(struct amdgpu_device *adev);
+
#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c
index a2975c8092a9..3f0b0e9af4f3 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c
@@ -21,122 +21,363 @@
*
*/
-#include "amdgpu_ras.h"
+#include <linux/sort.h>
+#include "amdgpu.h"
+#include "umc_v6_7.h"
+#include "amdgpu_ras_mgr.h"
+#define MAX_UMC_POISON_POLLING_TIME_SYNC 20 //ms
-int amdgpu_umc_ras_late_init(struct amdgpu_device *adev)
+#define MAX_UMC_HASH_STRING_SIZE 256
+
+static int amdgpu_umc_convert_error_address(struct amdgpu_device *adev,
+ struct ras_err_data *err_data, uint64_t err_addr,
+ uint32_t ch_inst, uint32_t umc_inst)
{
- int r;
- struct ras_fs_if fs_info = {
- .sysfs_name = "umc_err_count",
- };
- struct ras_ih_if ih_info = {
- .cb = amdgpu_umc_process_ras_data_cb,
- };
+ switch (amdgpu_ip_version(adev, UMC_HWIP, 0)) {
+ case IP_VERSION(6, 7, 0):
+ umc_v6_7_convert_error_address(adev,
+ err_data, err_addr, ch_inst, umc_inst);
+ break;
+ default:
+ dev_warn(adev->dev,
+ "UMC address to Physical address translation is not supported\n");
+ return AMDGPU_RAS_FAIL;
+ }
+
+ return AMDGPU_RAS_SUCCESS;
+}
+
+int amdgpu_umc_page_retirement_mca(struct amdgpu_device *adev,
+ uint64_t err_addr, uint32_t ch_inst, uint32_t umc_inst)
+{
+ struct ras_err_data err_data;
+ int ret;
+
+ ret = amdgpu_ras_error_data_init(&err_data);
+ if (ret)
+ return ret;
- if (!adev->umc.ras_if) {
- adev->umc.ras_if =
- kmalloc(sizeof(struct ras_common_if), GFP_KERNEL);
- if (!adev->umc.ras_if)
- return -ENOMEM;
- adev->umc.ras_if->block = AMDGPU_RAS_BLOCK__UMC;
- adev->umc.ras_if->type = AMDGPU_RAS_ERROR__MULTI_UNCORRECTABLE;
- adev->umc.ras_if->sub_block_index = 0;
- strcpy(adev->umc.ras_if->name, "umc");
+ err_data.err_addr =
+ kcalloc(adev->umc.max_ras_err_cnt_per_query,
+ sizeof(struct eeprom_table_record), GFP_KERNEL);
+ if (!err_data.err_addr) {
+ dev_warn(adev->dev,
+ "Failed to alloc memory for umc error record in MCA notifier!\n");
+ ret = AMDGPU_RAS_FAIL;
+ goto out_fini_err_data;
}
- ih_info.head = fs_info.head = *adev->umc.ras_if;
- r = amdgpu_ras_late_init(adev, adev->umc.ras_if,
- &fs_info, &ih_info);
- if (r)
- goto free;
+ err_data.err_addr_len = adev->umc.max_ras_err_cnt_per_query;
- if (amdgpu_ras_is_supported(adev, adev->umc.ras_if->block)) {
- r = amdgpu_irq_get(adev, &adev->gmc.ecc_irq, 0);
- if (r)
- goto late_fini;
- } else {
- r = 0;
- goto free;
+ /*
+ * Translate UMC channel address to Physical address
+ */
+ ret = amdgpu_umc_convert_error_address(adev, &err_data, err_addr,
+ ch_inst, umc_inst);
+ if (ret)
+ goto out_free_err_addr;
+
+ if (amdgpu_bad_page_threshold != 0) {
+ amdgpu_ras_add_bad_pages(adev, err_data.err_addr,
+ err_data.err_addr_cnt, false);
+ amdgpu_ras_save_bad_pages(adev, NULL);
}
- /* ras init of specific umc version */
- if (adev->umc.funcs && adev->umc.funcs->err_cnt_init)
- adev->umc.funcs->err_cnt_init(adev);
+out_free_err_addr:
+ kfree(err_data.err_addr);
- return 0;
+out_fini_err_data:
+ amdgpu_ras_error_data_fini(&err_data);
-late_fini:
- amdgpu_ras_late_fini(adev, adev->umc.ras_if, &ih_info);
-free:
- kfree(adev->umc.ras_if);
- adev->umc.ras_if = NULL;
- return r;
+ return ret;
}
-void amdgpu_umc_ras_fini(struct amdgpu_device *adev)
+void amdgpu_umc_handle_bad_pages(struct amdgpu_device *adev,
+ void *ras_error_status)
{
- if (amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__UMC) &&
- adev->umc.ras_if) {
- struct ras_common_if *ras_if = adev->umc.ras_if;
- struct ras_ih_if ih_info = {
- .head = *ras_if,
- .cb = amdgpu_umc_process_ras_data_cb,
- };
+ struct ras_err_data *err_data = (struct ras_err_data *)ras_error_status;
+ struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
+ struct amdgpu_ras_eeprom_control *control = &con->eeprom_control;
+ unsigned int error_query_mode;
+ int ret = 0;
+ unsigned long err_count;
+
+ amdgpu_ras_get_error_query_mode(adev, &error_query_mode);
- amdgpu_ras_late_fini(adev, ras_if, &ih_info);
- kfree(ras_if);
+ err_data->err_addr =
+ kcalloc(adev->umc.max_ras_err_cnt_per_query,
+ sizeof(struct eeprom_table_record), GFP_KERNEL);
+
+ /* still call query_ras_error_address to clear error status
+ * even NOMEM error is encountered
+ */
+ if (!err_data->err_addr)
+ dev_warn(adev->dev,
+ "Failed to alloc memory for umc error address record!\n");
+ else
+ err_data->err_addr_len = adev->umc.max_ras_err_cnt_per_query;
+
+ mutex_lock(&con->page_retirement_lock);
+ if (!amdgpu_ras_smu_eeprom_supported(adev)) {
+ ret = amdgpu_dpm_get_ecc_info(adev, (void *)&(con->umc_ecc));
+ if (ret == -EOPNOTSUPP &&
+ error_query_mode == AMDGPU_RAS_DIRECT_ERROR_QUERY) {
+ if (adev->umc.ras && adev->umc.ras->ras_block.hw_ops &&
+ adev->umc.ras->ras_block.hw_ops->query_ras_error_count)
+ adev->umc.ras->ras_block.hw_ops->query_ras_error_count(adev,
+ ras_error_status);
+
+ if (adev->umc.ras && adev->umc.ras->ras_block.hw_ops &&
+ adev->umc.ras->ras_block.hw_ops->query_ras_error_address &&
+ adev->umc.max_ras_err_cnt_per_query) {
+ err_data->err_addr =
+ kcalloc(adev->umc.max_ras_err_cnt_per_query,
+ sizeof(struct eeprom_table_record), GFP_KERNEL);
+
+ /* still call query_ras_error_address to clear error status
+ * even NOMEM error is encountered
+ */
+ if (!err_data->err_addr)
+ dev_warn(adev->dev,
+ "Failed to alloc memory for umc error address record!\n");
+ else
+ err_data->err_addr_len =
+ adev->umc.max_ras_err_cnt_per_query;
+
+ /* umc query_ras_error_address is also responsible for clearing
+ * error status
+ */
+ adev->umc.ras->ras_block.hw_ops->query_ras_error_address(adev,
+ ras_error_status);
+ }
+ } else if (error_query_mode == AMDGPU_RAS_FIRMWARE_ERROR_QUERY ||
+ (!ret && error_query_mode == AMDGPU_RAS_DIRECT_ERROR_QUERY)) {
+ if (adev->umc.ras &&
+ adev->umc.ras->ecc_info_query_ras_error_count)
+ adev->umc.ras->ecc_info_query_ras_error_count(adev,
+ ras_error_status);
+
+ if (adev->umc.ras &&
+ adev->umc.ras->ecc_info_query_ras_error_address &&
+ adev->umc.max_ras_err_cnt_per_query) {
+ err_data->err_addr =
+ kcalloc(adev->umc.max_ras_err_cnt_per_query,
+ sizeof(struct eeprom_table_record), GFP_KERNEL);
+
+ /* still call query_ras_error_address to clear error status
+ * even NOMEM error is encountered
+ */
+ if (!err_data->err_addr)
+ dev_warn(adev->dev,
+ "Failed to alloc memory for umc error address record!\n");
+ else
+ err_data->err_addr_len =
+ adev->umc.max_ras_err_cnt_per_query;
+
+ /* umc query_ras_error_address is also responsible for clearing
+ * error status
+ */
+ adev->umc.ras->ecc_info_query_ras_error_address(adev,
+ ras_error_status);
+ }
+ }
+ } else {
+ if (!amdgpu_ras_eeprom_update_record_num(control)) {
+ err_data->err_addr_cnt = err_data->de_count =
+ control->ras_num_recs - control->ras_num_recs_old;
+ amdgpu_ras_eeprom_read_idx(control, err_data->err_addr,
+ control->ras_num_recs_old, err_data->de_count);
+ }
}
+
+ /* only uncorrectable error needs gpu reset */
+ if (err_data->ue_count || err_data->de_count) {
+ err_count = err_data->ue_count + err_data->de_count;
+ if ((amdgpu_bad_page_threshold != 0) &&
+ err_data->err_addr_cnt) {
+ amdgpu_ras_add_bad_pages(adev, err_data->err_addr,
+ err_data->err_addr_cnt, amdgpu_ras_smu_eeprom_supported(adev));
+ amdgpu_ras_save_bad_pages(adev, &err_count);
+
+ amdgpu_dpm_send_hbm_bad_pages_num(adev,
+ con->eeprom_control.ras_num_bad_pages);
+
+ if (con->update_channel_flag == true) {
+ amdgpu_dpm_send_hbm_bad_channel_flag(adev, con->eeprom_control.bad_channel_bitmap);
+ con->update_channel_flag = false;
+ }
+ }
+ }
+
+ kfree(err_data->err_addr);
+ err_data->err_addr = NULL;
+
+ mutex_unlock(&con->page_retirement_lock);
}
-int amdgpu_umc_process_ras_data_cb(struct amdgpu_device *adev,
+static int amdgpu_umc_do_page_retirement(struct amdgpu_device *adev,
void *ras_error_status,
- struct amdgpu_iv_entry *entry)
+ struct amdgpu_iv_entry *entry,
+ uint32_t reset)
{
struct ras_err_data *err_data = (struct ras_err_data *)ras_error_status;
+ struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
kgd2kfd_set_sram_ecc_flag(adev->kfd.dev);
- if (adev->umc.funcs &&
- adev->umc.funcs->query_ras_error_count)
- adev->umc.funcs->query_ras_error_count(adev, ras_error_status);
-
- if (adev->umc.funcs &&
- adev->umc.funcs->query_ras_error_address &&
- adev->umc.max_ras_err_cnt_per_query) {
- err_data->err_addr =
- kcalloc(adev->umc.max_ras_err_cnt_per_query,
- sizeof(struct eeprom_table_record), GFP_KERNEL);
+ amdgpu_umc_handle_bad_pages(adev, ras_error_status);
+
+ if ((err_data->ue_count || err_data->de_count) &&
+ (reset || amdgpu_ras_is_rma(adev))) {
+ con->gpu_reset_flags |= reset;
+ amdgpu_ras_reset_gpu(adev);
+ }
- /* still call query_ras_error_address to clear error status
- * even NOMEM error is encountered
- */
- if(!err_data->err_addr)
- dev_warn(adev->dev, "Failed to alloc memory for "
- "umc error address record!\n");
+ return AMDGPU_RAS_SUCCESS;
+}
+
+int amdgpu_umc_pasid_poison_handler(struct amdgpu_device *adev,
+ enum amdgpu_ras_block block, uint16_t pasid,
+ pasid_notify pasid_fn, void *data, uint32_t reset)
+{
+ int ret = AMDGPU_RAS_SUCCESS;
- /* umc query_ras_error_address is also responsible for clearing
- * error status
- */
- adev->umc.funcs->query_ras_error_address(adev, ras_error_status);
+ if (adev->gmc.xgmi.connected_to_cpu ||
+ adev->gmc.is_app_apu) {
+ if (reset) {
+ /* MCA poison handler is only responsible for GPU reset,
+ * let MCA notifier do page retirement.
+ */
+ kgd2kfd_set_sram_ecc_flag(adev->kfd.dev);
+ amdgpu_ras_reset_gpu(adev);
+ }
+ return ret;
}
- /* only uncorrectable error needs gpu reset */
- if (err_data->ue_count) {
- dev_info(adev->dev, "%ld uncorrectable hardware errors "
- "detected in UMC block\n",
- err_data->ue_count);
+ if (!amdgpu_sriov_vf(adev)) {
+ if (amdgpu_ip_version(adev, UMC_HWIP, 0) < IP_VERSION(12, 0, 0)) {
+ struct ras_err_data err_data;
+ struct ras_common_if head = {
+ .block = AMDGPU_RAS_BLOCK__UMC,
+ };
+ struct ras_manager *obj = amdgpu_ras_find_obj(adev, &head);
- if ((amdgpu_bad_page_threshold != 0) &&
- err_data->err_addr_cnt) {
- amdgpu_ras_add_bad_pages(adev, err_data->err_addr,
- err_data->err_addr_cnt);
- amdgpu_ras_save_bad_pages(adev);
+ ret = amdgpu_ras_error_data_init(&err_data);
+ if (ret)
+ return ret;
+
+ ret = amdgpu_umc_do_page_retirement(adev, &err_data, NULL, reset);
+
+ if (ret == AMDGPU_RAS_SUCCESS && obj) {
+ obj->err_data.ue_count += err_data.ue_count;
+ obj->err_data.ce_count += err_data.ce_count;
+ obj->err_data.de_count += err_data.de_count;
+ }
+
+ amdgpu_ras_error_data_fini(&err_data);
+ } else if (amdgpu_uniras_enabled(adev)) {
+ struct ras_ih_info ih_info = {0};
+
+ ih_info.block = block;
+ ih_info.pasid = pasid;
+ ih_info.reset = reset;
+ ih_info.pasid_fn = pasid_fn;
+ ih_info.data = data;
+ amdgpu_ras_mgr_handle_consumer_interrupt(adev, &ih_info);
+ } else {
+ struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
+ int ret;
+
+ ret = amdgpu_ras_put_poison_req(adev,
+ block, pasid, pasid_fn, data, reset);
+ if (!ret) {
+ atomic_inc(&con->page_retirement_req_cnt);
+ atomic_inc(&con->poison_consumption_count);
+ wake_up(&con->page_retirement_wq);
+ }
}
+ } else {
+ if (adev->virt.ops && adev->virt.ops->ras_poison_handler)
+ adev->virt.ops->ras_poison_handler(adev, block);
+ else
+ dev_warn(adev->dev,
+ "No ras_poison_handler interface in SRIOV!\n");
+ }
- amdgpu_ras_reset_gpu(adev);
+ return ret;
+}
+
+int amdgpu_umc_poison_handler(struct amdgpu_device *adev,
+ enum amdgpu_ras_block block, uint32_t reset)
+{
+ return amdgpu_umc_pasid_poison_handler(adev,
+ block, 0, NULL, NULL, reset);
+}
+
+int amdgpu_umc_process_ras_data_cb(struct amdgpu_device *adev,
+ void *ras_error_status,
+ struct amdgpu_iv_entry *entry)
+{
+ return amdgpu_umc_do_page_retirement(adev, ras_error_status, entry,
+ AMDGPU_RAS_GPU_RESET_MODE1_RESET);
+}
+
+int amdgpu_umc_ras_sw_init(struct amdgpu_device *adev)
+{
+ int err;
+ struct amdgpu_umc_ras *ras;
+
+ if (!adev->umc.ras)
+ return 0;
+
+ ras = adev->umc.ras;
+
+ err = amdgpu_ras_register_ras_block(adev, &ras->ras_block);
+ if (err) {
+ dev_err(adev->dev, "Failed to register umc ras block!\n");
+ return err;
}
- kfree(err_data->err_addr);
- return AMDGPU_RAS_SUCCESS;
+ strcpy(adev->umc.ras->ras_block.ras_comm.name, "umc");
+ ras->ras_block.ras_comm.block = AMDGPU_RAS_BLOCK__UMC;
+ ras->ras_block.ras_comm.type = AMDGPU_RAS_ERROR__MULTI_UNCORRECTABLE;
+ adev->umc.ras_if = &ras->ras_block.ras_comm;
+
+ if (!ras->ras_block.ras_late_init)
+ ras->ras_block.ras_late_init = amdgpu_umc_ras_late_init;
+
+ if (!ras->ras_block.ras_cb)
+ ras->ras_block.ras_cb = amdgpu_umc_process_ras_data_cb;
+
+ return 0;
+}
+
+int amdgpu_umc_ras_late_init(struct amdgpu_device *adev, struct ras_common_if *ras_block)
+{
+ int r;
+
+ r = amdgpu_ras_block_late_init(adev, ras_block);
+ if (r)
+ return r;
+
+ if (amdgpu_sriov_vf(adev))
+ return r;
+
+ if (amdgpu_ras_is_supported(adev, ras_block->block)) {
+ r = amdgpu_irq_get(adev, &adev->gmc.ecc_irq, 0);
+ if (r)
+ goto late_fini;
+ }
+
+ /* ras init of specific umc version */
+ if (adev->umc.ras &&
+ adev->umc.ras->err_cnt_init)
+ adev->umc.ras->err_cnt_init(adev);
+
+ return 0;
+
+late_fini:
+ amdgpu_ras_block_late_fini(adev, ras_block);
+ return r;
}
int amdgpu_umc_process_ecc_irq(struct amdgpu_device *adev,
@@ -156,3 +397,231 @@ int amdgpu_umc_process_ecc_irq(struct amdgpu_device *adev,
amdgpu_ras_interrupt_dispatch(adev, &ih_data);
return 0;
}
+
+int amdgpu_umc_fill_error_record(struct ras_err_data *err_data,
+ uint64_t err_addr,
+ uint64_t retired_page,
+ uint32_t channel_index,
+ uint32_t umc_inst)
+{
+ struct eeprom_table_record *err_rec;
+
+ if (!err_data ||
+ !err_data->err_addr ||
+ (err_data->err_addr_cnt >= err_data->err_addr_len))
+ return -EINVAL;
+
+ err_rec = &err_data->err_addr[err_data->err_addr_cnt];
+
+ err_rec->address = err_addr;
+ /* page frame address is saved */
+ err_rec->retired_page = retired_page >> AMDGPU_GPU_PAGE_SHIFT;
+ err_rec->ts = (uint64_t)ktime_get_real_seconds();
+ err_rec->err_type = AMDGPU_RAS_EEPROM_ERR_NON_RECOVERABLE;
+ err_rec->cu = 0;
+ err_rec->mem_channel = channel_index;
+ err_rec->mcumc_id = umc_inst;
+
+ err_data->err_addr_cnt++;
+
+ return 0;
+}
+
+static int amdgpu_umc_loop_all_aid(struct amdgpu_device *adev, umc_func func,
+ void *data)
+{
+ uint32_t umc_node_inst;
+ uint32_t node_inst;
+ uint32_t umc_inst;
+ uint32_t ch_inst;
+ int ret;
+
+ /*
+ * This loop is done based on the following -
+ * umc.active mask = mask of active umc instances across all nodes
+ * umc.umc_inst_num = maximum number of umc instancess per node
+ * umc.node_inst_num = maximum number of node instances
+ * Channel instances are not assumed to be harvested.
+ */
+ dev_dbg(adev->dev, "active umcs :%lx umc_inst per node: %d",
+ adev->umc.active_mask, adev->umc.umc_inst_num);
+ for_each_set_bit(umc_node_inst, &(adev->umc.active_mask),
+ adev->umc.node_inst_num * adev->umc.umc_inst_num) {
+ node_inst = umc_node_inst / adev->umc.umc_inst_num;
+ umc_inst = umc_node_inst % adev->umc.umc_inst_num;
+ LOOP_UMC_CH_INST(ch_inst) {
+ dev_dbg(adev->dev,
+ "node_inst :%d umc_inst: %d ch_inst: %d",
+ node_inst, umc_inst, ch_inst);
+ ret = func(adev, node_inst, umc_inst, ch_inst, data);
+ if (ret) {
+ dev_err(adev->dev,
+ "Node %d umc %d ch %d func returns %d\n",
+ node_inst, umc_inst, ch_inst, ret);
+ return ret;
+ }
+ }
+ }
+
+ return 0;
+}
+
+int amdgpu_umc_loop_channels(struct amdgpu_device *adev,
+ umc_func func, void *data)
+{
+ uint32_t node_inst = 0;
+ uint32_t umc_inst = 0;
+ uint32_t ch_inst = 0;
+ int ret = 0;
+
+ if (adev->aid_mask)
+ return amdgpu_umc_loop_all_aid(adev, func, data);
+
+ if (adev->umc.node_inst_num) {
+ LOOP_UMC_EACH_NODE_INST_AND_CH(node_inst, umc_inst, ch_inst) {
+ ret = func(adev, node_inst, umc_inst, ch_inst, data);
+ if (ret) {
+ dev_err(adev->dev, "Node %d umc %d ch %d func returns %d\n",
+ node_inst, umc_inst, ch_inst, ret);
+ return ret;
+ }
+ }
+ } else {
+ LOOP_UMC_INST_AND_CH(umc_inst, ch_inst) {
+ ret = func(adev, 0, umc_inst, ch_inst, data);
+ if (ret) {
+ dev_err(adev->dev, "Umc %d ch %d func returns %d\n",
+ umc_inst, ch_inst, ret);
+ return ret;
+ }
+ }
+ }
+
+ return 0;
+}
+
+int amdgpu_umc_update_ecc_status(struct amdgpu_device *adev,
+ uint64_t status, uint64_t ipid, uint64_t addr)
+{
+ if (adev->umc.ras->update_ecc_status)
+ return adev->umc.ras->update_ecc_status(adev,
+ status, ipid, addr);
+ return 0;
+}
+
+int amdgpu_umc_logs_ecc_err(struct amdgpu_device *adev,
+ struct radix_tree_root *ecc_tree, struct ras_ecc_err *ecc_err)
+{
+ struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
+ struct ras_ecc_log_info *ecc_log;
+ int ret;
+
+ ecc_log = &con->umc_ecc_log;
+
+ mutex_lock(&ecc_log->lock);
+ ret = radix_tree_insert(ecc_tree, ecc_err->pa_pfn, ecc_err);
+ if (!ret)
+ radix_tree_tag_set(ecc_tree,
+ ecc_err->pa_pfn, UMC_ECC_NEW_DETECTED_TAG);
+ mutex_unlock(&ecc_log->lock);
+
+ return ret;
+}
+
+int amdgpu_umc_pages_in_a_row(struct amdgpu_device *adev,
+ struct ras_err_data *err_data, uint64_t pa_addr)
+{
+ struct ta_ras_query_address_output addr_out;
+
+ /* reinit err_data */
+ err_data->err_addr_cnt = 0;
+ err_data->err_addr_len = adev->umc.retire_unit;
+
+ addr_out.pa.pa = pa_addr;
+ if (adev->umc.ras && adev->umc.ras->convert_ras_err_addr)
+ return adev->umc.ras->convert_ras_err_addr(adev, err_data, NULL,
+ &addr_out, false);
+ else
+ return -EINVAL;
+}
+
+int amdgpu_umc_lookup_bad_pages_in_a_row(struct amdgpu_device *adev,
+ uint64_t pa_addr, uint64_t *pfns, int len)
+{
+ int i, ret;
+ struct ras_err_data err_data;
+
+ err_data.err_addr = kcalloc(adev->umc.retire_unit,
+ sizeof(struct eeprom_table_record), GFP_KERNEL);
+ if (!err_data.err_addr) {
+ dev_warn(adev->dev, "Failed to alloc memory in bad page lookup!\n");
+ return 0;
+ }
+
+ ret = amdgpu_umc_pages_in_a_row(adev, &err_data, pa_addr);
+ if (ret)
+ goto out;
+
+ for (i = 0; i < adev->umc.retire_unit; i++) {
+ if (i >= len)
+ goto out;
+
+ pfns[i] = err_data.err_addr[i].retired_page;
+ }
+ ret = i;
+ adev->umc.err_addr_cnt = err_data.err_addr_cnt;
+
+out:
+ kfree(err_data.err_addr);
+ return ret;
+}
+
+int amdgpu_umc_mca_to_addr(struct amdgpu_device *adev,
+ uint64_t err_addr, uint32_t ch, uint32_t umc,
+ uint32_t node, uint32_t socket,
+ struct ta_ras_query_address_output *addr_out, bool dump_addr)
+{
+ struct ta_ras_query_address_input addr_in;
+ int ret;
+
+ memset(&addr_in, 0, sizeof(addr_in));
+ addr_in.ma.err_addr = err_addr;
+ addr_in.ma.ch_inst = ch;
+ addr_in.ma.umc_inst = umc;
+ addr_in.ma.node_inst = node;
+ addr_in.ma.socket_id = socket;
+
+ if (adev->umc.ras && adev->umc.ras->convert_ras_err_addr) {
+ ret = adev->umc.ras->convert_ras_err_addr(adev, NULL, &addr_in,
+ addr_out, dump_addr);
+ if (ret)
+ return ret;
+ } else {
+ return 0;
+ }
+
+ return 0;
+}
+
+int amdgpu_umc_pa2mca(struct amdgpu_device *adev,
+ uint64_t pa, uint64_t *mca, enum amdgpu_memory_partition nps)
+{
+ struct ta_ras_query_address_input addr_in;
+ struct ta_ras_query_address_output addr_out;
+ int ret;
+
+ /* nps: the pa belongs to */
+ addr_in.pa.pa = pa | ((uint64_t)nps << 58);
+ addr_in.addr_type = TA_RAS_PA_TO_MCA;
+ ret = psp_ras_query_address(&adev->psp, &addr_in, &addr_out);
+ if (ret) {
+ dev_warn(adev->dev, "Failed to query RAS MCA address for 0x%llx",
+ pa);
+
+ return ret;
+ }
+
+ *mca = addr_out.ma.err_addr;
+
+ return 0;
+}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.h
index 183814493658..28dff750c47e 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.h
@@ -20,12 +20,23 @@
*/
#ifndef __AMDGPU_UMC_H__
#define __AMDGPU_UMC_H__
-
+#include "amdgpu_ras.h"
+#include "amdgpu_mca.h"
+/*
+ * (addr / 256) * 4096, the higher 26 bits in ErrorAddr
+ * is the index of 4KB block
+ */
+#define ADDR_OF_4KB_BLOCK(addr) (((addr) & ~0xffULL) << 4)
/*
* (addr / 256) * 8192, the higher 26 bits in ErrorAddr
* is the index of 8KB block
*/
#define ADDR_OF_8KB_BLOCK(addr) (((addr) & ~0xffULL) << 5)
+/*
+ * (addr / 256) * 32768, the higher 26 bits in ErrorAddr
+ * is the index of 8KB block
+ */
+#define ADDR_OF_32KB_BLOCK(addr) (((addr) & ~0xffULL) << 7)
/* channel index is the index of 256B block */
#define ADDR_OF_256B_BLOCK(channel_index) ((channel_index) << 8)
/* offset in 256B block */
@@ -35,13 +46,78 @@
#define LOOP_UMC_CH_INST(ch_inst) for ((ch_inst) = 0; (ch_inst) < adev->umc.channel_inst_num; (ch_inst)++)
#define LOOP_UMC_INST_AND_CH(umc_inst, ch_inst) LOOP_UMC_INST((umc_inst)) LOOP_UMC_CH_INST((ch_inst))
-struct amdgpu_umc_funcs {
+#define LOOP_UMC_NODE_INST(node_inst) \
+ for_each_set_bit((node_inst), &(adev->umc.active_mask), adev->umc.node_inst_num)
+
+#define LOOP_UMC_EACH_NODE_INST_AND_CH(node_inst, umc_inst, ch_inst) \
+ LOOP_UMC_NODE_INST((node_inst)) LOOP_UMC_INST_AND_CH((umc_inst), (ch_inst))
+
+/* Page retirement tag */
+#define UMC_ECC_NEW_DETECTED_TAG 0x1
+/*
+ * a flag to indicate v2 of channel index stored in eeprom
+ *
+ * v1 (legacy way): store channel index within a umc instance in eeprom
+ * range in UMC v12: 0 ~ 7
+ * v2: store global channel index in eeprom
+ * range in UMC v12: 0 ~ 127
+ *
+ * NOTE: it's better to store it in eeprom_table_record.mem_channel,
+ * but there is only 8 bits in mem_channel, and the channel number may
+ * increase in the future, we decide to save it in
+ * eeprom_table_record.retired_page. retired_page is useless in v2,
+ * we depend on eeprom_table_record.address instead of retired_page in v2.
+ * Only 48 bits are saved on eeprom, use bit 47 here.
+ */
+#define UMC_CHANNEL_IDX_V2 BIT_ULL(47)
+
+/*
+ * save nps value to eeprom_table_record.retired_page[47:40],
+ * the channel index flag above will be retired.
+ */
+#define UMC_NPS_SHIFT 40
+#define UMC_NPS_MASK 0xffULL
+
+/* three column bits and one row bit in MCA address flip
+ * in bad page retirement
+ */
+#define RETIRE_FLIP_BITS_NUM 4
+
+struct amdgpu_umc_flip_bits {
+ uint32_t flip_bits_in_pa[RETIRE_FLIP_BITS_NUM];
+ uint32_t flip_row_bit;
+ uint32_t r13_in_pa;
+ uint32_t bit_num;
+};
+
+typedef int (*umc_func)(struct amdgpu_device *adev, uint32_t node_inst,
+ uint32_t umc_inst, uint32_t ch_inst, void *data);
+
+struct amdgpu_umc_ras {
+ struct amdgpu_ras_block_object ras_block;
void (*err_cnt_init)(struct amdgpu_device *adev);
- int (*ras_late_init)(struct amdgpu_device *adev);
- void (*query_ras_error_count)(struct amdgpu_device *adev,
- void *ras_error_status);
- void (*query_ras_error_address)(struct amdgpu_device *adev,
+ bool (*query_ras_poison_mode)(struct amdgpu_device *adev);
+ void (*ecc_info_query_ras_error_count)(struct amdgpu_device *adev,
+ void *ras_error_status);
+ void (*ecc_info_query_ras_error_address)(struct amdgpu_device *adev,
void *ras_error_status);
+ bool (*check_ecc_err_status)(struct amdgpu_device *adev,
+ enum amdgpu_mca_error_type type, void *ras_error_status);
+ int (*update_ecc_status)(struct amdgpu_device *adev,
+ uint64_t status, uint64_t ipid, uint64_t addr);
+ int (*convert_ras_err_addr)(struct amdgpu_device *adev,
+ struct ras_err_data *err_data,
+ struct ta_ras_query_address_input *addr_in,
+ struct ta_ras_query_address_output *addr_out,
+ bool dump_addr);
+ uint32_t (*get_die_id_from_pa)(struct amdgpu_device *adev,
+ uint64_t mca_addr, uint64_t retired_page);
+ void (*get_retire_flip_bits)(struct amdgpu_device *adev);
+ void (*mca_ipid_parse)(struct amdgpu_device *adev, uint64_t ipid,
+ uint32_t *did, uint32_t *ch, uint32_t *umc_inst, uint32_t *sid);
+};
+
+struct amdgpu_umc_funcs {
void (*init_registers)(struct amdgpu_device *adev);
};
@@ -52,21 +128,69 @@ struct amdgpu_umc {
uint32_t channel_inst_num;
/* number of umc instance with memory map register access */
uint32_t umc_inst_num;
+
+ /* Total number of umc node instance including harvest one */
+ uint32_t node_inst_num;
+
/* UMC regiser per channel offset */
uint32_t channel_offs;
+ /* how many pages are retired in one UE */
+ uint32_t retire_unit;
/* channel index table of interleaved memory */
const uint32_t *channel_idx_tbl;
struct ras_common_if *ras_if;
const struct amdgpu_umc_funcs *funcs;
+ struct amdgpu_umc_ras *ras;
+
+ /* active mask for umc node instance */
+ unsigned long active_mask;
+
+ struct amdgpu_umc_flip_bits flip_bits;
+
+ unsigned long err_addr_cnt;
};
-int amdgpu_umc_ras_late_init(struct amdgpu_device *adev);
-void amdgpu_umc_ras_fini(struct amdgpu_device *adev);
-int amdgpu_umc_process_ras_data_cb(struct amdgpu_device *adev,
- void *ras_error_status,
- struct amdgpu_iv_entry *entry);
+int amdgpu_umc_ras_sw_init(struct amdgpu_device *adev);
+int amdgpu_umc_ras_late_init(struct amdgpu_device *adev, struct ras_common_if *ras_block);
+int amdgpu_umc_poison_handler(struct amdgpu_device *adev,
+ enum amdgpu_ras_block block, uint32_t reset);
+int amdgpu_umc_pasid_poison_handler(struct amdgpu_device *adev,
+ enum amdgpu_ras_block block, uint16_t pasid,
+ pasid_notify pasid_fn, void *data, uint32_t reset);
int amdgpu_umc_process_ecc_irq(struct amdgpu_device *adev,
struct amdgpu_irq_src *source,
struct amdgpu_iv_entry *entry);
+int amdgpu_umc_fill_error_record(struct ras_err_data *err_data,
+ uint64_t err_addr,
+ uint64_t retired_page,
+ uint32_t channel_index,
+ uint32_t umc_inst);
+
+int amdgpu_umc_process_ras_data_cb(struct amdgpu_device *adev,
+ void *ras_error_status,
+ struct amdgpu_iv_entry *entry);
+int amdgpu_umc_page_retirement_mca(struct amdgpu_device *adev,
+ uint64_t err_addr, uint32_t ch_inst, uint32_t umc_inst);
+
+int amdgpu_umc_loop_channels(struct amdgpu_device *adev,
+ umc_func func, void *data);
+
+int amdgpu_umc_update_ecc_status(struct amdgpu_device *adev,
+ uint64_t status, uint64_t ipid, uint64_t addr);
+int amdgpu_umc_logs_ecc_err(struct amdgpu_device *adev,
+ struct radix_tree_root *ecc_tree, struct ras_ecc_err *ecc_err);
+
+void amdgpu_umc_handle_bad_pages(struct amdgpu_device *adev,
+ void *ras_error_status);
+int amdgpu_umc_pages_in_a_row(struct amdgpu_device *adev,
+ struct ras_err_data *err_data, uint64_t pa_addr);
+int amdgpu_umc_lookup_bad_pages_in_a_row(struct amdgpu_device *adev,
+ uint64_t pa_addr, uint64_t *pfns, int len);
+int amdgpu_umc_mca_to_addr(struct amdgpu_device *adev,
+ uint64_t err_addr, uint32_t ch, uint32_t umc,
+ uint32_t node, uint32_t socket,
+ struct ta_ras_query_address_output *addr_out, bool dump_addr);
+int amdgpu_umc_pa2mca(struct amdgpu_device *adev,
+ uint64_t pa, uint64_t *mca, enum amdgpu_memory_partition nps);
#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_umr.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_umr.h
new file mode 100644
index 000000000000..5b27fc41ffbf
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_umr.h
@@ -0,0 +1,85 @@
+/*
+ * Copyright 2021 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+#include <linux/ioctl.h>
+
+/*
+ * MMIO debugfs IOCTL structure
+ */
+struct amdgpu_debugfs_regs2_iocdata {
+ __u32 use_srbm, use_grbm, pg_lock;
+ struct {
+ __u32 se, sh, instance;
+ } grbm;
+ struct {
+ __u32 me, pipe, queue, vmid;
+ } srbm;
+};
+
+struct amdgpu_debugfs_regs2_iocdata_v2 {
+ __u32 use_srbm, use_grbm, pg_lock;
+ struct {
+ __u32 se, sh, instance;
+ } grbm;
+ struct {
+ __u32 me, pipe, queue, vmid;
+ } srbm;
+ u32 xcc_id;
+};
+
+struct amdgpu_debugfs_gprwave_iocdata {
+ u32 gpr_or_wave, se, sh, cu, wave, simd, xcc_id;
+ struct {
+ u32 thread, vpgr_or_sgpr;
+ } gpr;
+};
+
+/*
+ * MMIO debugfs state data (per file* handle)
+ */
+struct amdgpu_debugfs_regs2_data {
+ struct amdgpu_device *adev;
+ struct mutex lock;
+ struct amdgpu_debugfs_regs2_iocdata_v2 id;
+};
+
+struct amdgpu_debugfs_gprwave_data {
+ struct amdgpu_device *adev;
+ struct mutex lock;
+ struct amdgpu_debugfs_gprwave_iocdata id;
+};
+
+enum AMDGPU_DEBUGFS_REGS2_CMDS {
+ AMDGPU_DEBUGFS_REGS2_CMD_SET_STATE = 0,
+ AMDGPU_DEBUGFS_REGS2_CMD_SET_STATE_V2,
+};
+
+enum AMDGPU_DEBUGFS_GPRWAVE_CMDS {
+ AMDGPU_DEBUGFS_GPRWAVE_CMD_SET_STATE = 0,
+};
+
+//reg2 interface
+#define AMDGPU_DEBUGFS_REGS2_IOC_SET_STATE _IOWR(0x20, AMDGPU_DEBUGFS_REGS2_CMD_SET_STATE, struct amdgpu_debugfs_regs2_iocdata)
+#define AMDGPU_DEBUGFS_REGS2_IOC_SET_STATE_V2 _IOWR(0x20, AMDGPU_DEBUGFS_REGS2_CMD_SET_STATE_V2, struct amdgpu_debugfs_regs2_iocdata_v2)
+
+//gprwave interface
+#define AMDGPU_DEBUGFS_GPRWAVE_IOC_SET_STATE _IOWR(0x20, AMDGPU_DEBUGFS_GPRWAVE_CMD_SET_STATE, struct amdgpu_debugfs_gprwave_iocdata)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_umsch_mm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_umsch_mm.c
new file mode 100644
index 000000000000..cd707d70a0bf
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_umsch_mm.c
@@ -0,0 +1,550 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright 2023 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#include <linux/firmware.h>
+#include <linux/module.h>
+#include <linux/debugfs.h>
+#include <drm/drm_exec.h>
+#include <drm/drm_drv.h>
+
+#include "amdgpu.h"
+#include "amdgpu_umsch_mm.h"
+#include "umsch_mm_v4_0.h"
+
+MODULE_FIRMWARE("amdgpu/umsch_mm_4_0_0.bin");
+
+int amdgpu_umsch_mm_submit_pkt(struct amdgpu_umsch_mm *umsch, void *pkt, int ndws)
+{
+ struct amdgpu_ring *ring = &umsch->ring;
+
+ if (amdgpu_ring_alloc(ring, ndws))
+ return -ENOMEM;
+
+ amdgpu_ring_write_multiple(ring, pkt, ndws);
+ amdgpu_ring_commit(ring);
+
+ return 0;
+}
+
+int amdgpu_umsch_mm_query_fence(struct amdgpu_umsch_mm *umsch)
+{
+ struct amdgpu_ring *ring = &umsch->ring;
+ struct amdgpu_device *adev = ring->adev;
+ int r;
+
+ r = amdgpu_fence_wait_polling(ring, ring->fence_drv.sync_seq, adev->usec_timeout);
+ if (r < 1) {
+ dev_err(adev->dev, "ring umsch timeout, emitted fence %u\n",
+ ring->fence_drv.sync_seq);
+ return -ETIMEDOUT;
+ }
+
+ return 0;
+}
+
+static void umsch_mm_ring_set_wptr(struct amdgpu_ring *ring)
+{
+ struct amdgpu_umsch_mm *umsch = (struct amdgpu_umsch_mm *)ring;
+ struct amdgpu_device *adev = ring->adev;
+
+ if (ring->use_doorbell)
+ WDOORBELL32(ring->doorbell_index, ring->wptr << 2);
+ else
+ WREG32(umsch->rb_wptr, ring->wptr << 2);
+}
+
+static u64 umsch_mm_ring_get_rptr(struct amdgpu_ring *ring)
+{
+ struct amdgpu_umsch_mm *umsch = (struct amdgpu_umsch_mm *)ring;
+ struct amdgpu_device *adev = ring->adev;
+
+ return RREG32(umsch->rb_rptr);
+}
+
+static u64 umsch_mm_ring_get_wptr(struct amdgpu_ring *ring)
+{
+ struct amdgpu_umsch_mm *umsch = (struct amdgpu_umsch_mm *)ring;
+ struct amdgpu_device *adev = ring->adev;
+
+ return RREG32(umsch->rb_wptr);
+}
+
+static const struct amdgpu_ring_funcs umsch_v4_0_ring_funcs = {
+ .type = AMDGPU_RING_TYPE_UMSCH_MM,
+ .align_mask = 0,
+ .nop = 0,
+ .support_64bit_ptrs = false,
+ .get_rptr = umsch_mm_ring_get_rptr,
+ .get_wptr = umsch_mm_ring_get_wptr,
+ .set_wptr = umsch_mm_ring_set_wptr,
+ .insert_nop = amdgpu_ring_insert_nop,
+};
+
+int amdgpu_umsch_mm_ring_init(struct amdgpu_umsch_mm *umsch)
+{
+ struct amdgpu_device *adev = container_of(umsch, struct amdgpu_device, umsch_mm);
+ struct amdgpu_ring *ring = &umsch->ring;
+
+ ring->vm_hub = AMDGPU_MMHUB0(0);
+ ring->use_doorbell = true;
+ ring->no_scheduler = true;
+ ring->doorbell_index = (AMDGPU_NAVI10_DOORBELL64_VCN0_1 << 1) + 6;
+
+ snprintf(ring->name, sizeof(ring->name), "umsch");
+
+ return amdgpu_ring_init(adev, ring, 1024, NULL, 0, AMDGPU_RING_PRIO_DEFAULT, NULL);
+}
+
+int amdgpu_umsch_mm_init_microcode(struct amdgpu_umsch_mm *umsch)
+{
+ const struct umsch_mm_firmware_header_v1_0 *umsch_mm_hdr;
+ struct amdgpu_device *adev = umsch->ring.adev;
+ const char *fw_name = NULL;
+ int r;
+
+ switch (amdgpu_ip_version(adev, VCN_HWIP, 0)) {
+ case IP_VERSION(4, 0, 5):
+ case IP_VERSION(4, 0, 6):
+ fw_name = "4_0_0";
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ r = amdgpu_ucode_request(adev, &adev->umsch_mm.fw, AMDGPU_UCODE_REQUIRED,
+ "amdgpu/umsch_mm_%s.bin", fw_name);
+ if (r) {
+ release_firmware(adev->umsch_mm.fw);
+ adev->umsch_mm.fw = NULL;
+ return r;
+ }
+
+ umsch_mm_hdr = (const struct umsch_mm_firmware_header_v1_0 *)adev->umsch_mm.fw->data;
+
+ adev->umsch_mm.ucode_size = le32_to_cpu(umsch_mm_hdr->umsch_mm_ucode_size_bytes);
+ adev->umsch_mm.data_size = le32_to_cpu(umsch_mm_hdr->umsch_mm_ucode_data_size_bytes);
+
+ adev->umsch_mm.irq_start_addr =
+ le32_to_cpu(umsch_mm_hdr->umsch_mm_irq_start_addr_lo) |
+ ((uint64_t)(le32_to_cpu(umsch_mm_hdr->umsch_mm_irq_start_addr_hi)) << 32);
+ adev->umsch_mm.uc_start_addr =
+ le32_to_cpu(umsch_mm_hdr->umsch_mm_uc_start_addr_lo) |
+ ((uint64_t)(le32_to_cpu(umsch_mm_hdr->umsch_mm_uc_start_addr_hi)) << 32);
+ adev->umsch_mm.data_start_addr =
+ le32_to_cpu(umsch_mm_hdr->umsch_mm_data_start_addr_lo) |
+ ((uint64_t)(le32_to_cpu(umsch_mm_hdr->umsch_mm_data_start_addr_hi)) << 32);
+
+ if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
+ struct amdgpu_firmware_info *info;
+
+ info = &adev->firmware.ucode[AMDGPU_UCODE_ID_UMSCH_MM_UCODE];
+ info->ucode_id = AMDGPU_UCODE_ID_UMSCH_MM_UCODE;
+ info->fw = adev->umsch_mm.fw;
+ adev->firmware.fw_size +=
+ ALIGN(le32_to_cpu(umsch_mm_hdr->umsch_mm_ucode_size_bytes), PAGE_SIZE);
+
+ info = &adev->firmware.ucode[AMDGPU_UCODE_ID_UMSCH_MM_DATA];
+ info->ucode_id = AMDGPU_UCODE_ID_UMSCH_MM_DATA;
+ info->fw = adev->umsch_mm.fw;
+ adev->firmware.fw_size +=
+ ALIGN(le32_to_cpu(umsch_mm_hdr->umsch_mm_ucode_data_size_bytes), PAGE_SIZE);
+ }
+
+ return 0;
+}
+
+int amdgpu_umsch_mm_allocate_ucode_buffer(struct amdgpu_umsch_mm *umsch)
+{
+ const struct umsch_mm_firmware_header_v1_0 *umsch_mm_hdr;
+ struct amdgpu_device *adev = umsch->ring.adev;
+ const __le32 *fw_data;
+ uint32_t fw_size;
+ int r;
+
+ umsch_mm_hdr = (const struct umsch_mm_firmware_header_v1_0 *)
+ adev->umsch_mm.fw->data;
+
+ fw_data = (const __le32 *)(adev->umsch_mm.fw->data +
+ le32_to_cpu(umsch_mm_hdr->umsch_mm_ucode_offset_bytes));
+ fw_size = le32_to_cpu(umsch_mm_hdr->umsch_mm_ucode_size_bytes);
+
+ r = amdgpu_bo_create_reserved(adev, fw_size,
+ 4 * 1024, AMDGPU_GEM_DOMAIN_VRAM,
+ &adev->umsch_mm.ucode_fw_obj,
+ &adev->umsch_mm.ucode_fw_gpu_addr,
+ (void **)&adev->umsch_mm.ucode_fw_ptr);
+ if (r) {
+ dev_err(adev->dev, "(%d) failed to create umsch_mm fw ucode bo\n", r);
+ return r;
+ }
+
+ memcpy(adev->umsch_mm.ucode_fw_ptr, fw_data, fw_size);
+
+ amdgpu_bo_kunmap(adev->umsch_mm.ucode_fw_obj);
+ amdgpu_bo_unreserve(adev->umsch_mm.ucode_fw_obj);
+ return 0;
+}
+
+int amdgpu_umsch_mm_allocate_ucode_data_buffer(struct amdgpu_umsch_mm *umsch)
+{
+ const struct umsch_mm_firmware_header_v1_0 *umsch_mm_hdr;
+ struct amdgpu_device *adev = umsch->ring.adev;
+ const __le32 *fw_data;
+ uint32_t fw_size;
+ int r;
+
+ umsch_mm_hdr = (const struct umsch_mm_firmware_header_v1_0 *)
+ adev->umsch_mm.fw->data;
+
+ fw_data = (const __le32 *)(adev->umsch_mm.fw->data +
+ le32_to_cpu(umsch_mm_hdr->umsch_mm_ucode_data_offset_bytes));
+ fw_size = le32_to_cpu(umsch_mm_hdr->umsch_mm_ucode_data_size_bytes);
+
+ r = amdgpu_bo_create_reserved(adev, fw_size,
+ 64 * 1024, AMDGPU_GEM_DOMAIN_VRAM,
+ &adev->umsch_mm.data_fw_obj,
+ &adev->umsch_mm.data_fw_gpu_addr,
+ (void **)&adev->umsch_mm.data_fw_ptr);
+ if (r) {
+ dev_err(adev->dev, "(%d) failed to create umsch_mm fw data bo\n", r);
+ return r;
+ }
+
+ memcpy(adev->umsch_mm.data_fw_ptr, fw_data, fw_size);
+
+ amdgpu_bo_kunmap(adev->umsch_mm.data_fw_obj);
+ amdgpu_bo_unreserve(adev->umsch_mm.data_fw_obj);
+ return 0;
+}
+
+int amdgpu_umsch_mm_psp_execute_cmd_buf(struct amdgpu_umsch_mm *umsch)
+{
+ struct amdgpu_device *adev = umsch->ring.adev;
+ struct amdgpu_firmware_info ucode = {
+ .ucode_id = AMDGPU_UCODE_ID_UMSCH_MM_CMD_BUFFER,
+ .mc_addr = adev->umsch_mm.cmd_buf_gpu_addr,
+ .ucode_size = ((uintptr_t)adev->umsch_mm.cmd_buf_curr_ptr -
+ (uintptr_t)adev->umsch_mm.cmd_buf_ptr),
+ };
+
+ return psp_execute_ip_fw_load(&adev->psp, &ucode);
+}
+
+static void umsch_mm_agdb_index_init(struct amdgpu_device *adev)
+{
+ uint32_t umsch_mm_agdb_start;
+ int i;
+
+ umsch_mm_agdb_start = adev->doorbell_index.max_assignment + 1;
+ umsch_mm_agdb_start = roundup(umsch_mm_agdb_start, 1024);
+ umsch_mm_agdb_start += (AMDGPU_NAVI10_DOORBELL64_VCN0_1 << 1);
+
+ for (i = 0; i < CONTEXT_PRIORITY_NUM_LEVELS; i++)
+ adev->umsch_mm.agdb_index[i] = umsch_mm_agdb_start + i;
+}
+
+static int umsch_mm_init(struct amdgpu_device *adev)
+{
+ int r;
+
+ adev->umsch_mm.vmid_mask_mm_vpe = 0xf00;
+ adev->umsch_mm.engine_mask = (1 << UMSCH_SWIP_ENGINE_TYPE_VPE);
+ adev->umsch_mm.vpe_hqd_mask = 0xfe;
+
+ r = amdgpu_device_wb_get(adev, &adev->umsch_mm.wb_index);
+ if (r) {
+ dev_err(adev->dev, "failed to alloc wb for umsch: %d\n", r);
+ return r;
+ }
+
+ adev->umsch_mm.sch_ctx_gpu_addr = adev->wb.gpu_addr +
+ (adev->umsch_mm.wb_index * 4);
+
+ r = amdgpu_bo_create_kernel(adev, PAGE_SIZE, PAGE_SIZE,
+ AMDGPU_GEM_DOMAIN_GTT,
+ &adev->umsch_mm.cmd_buf_obj,
+ &adev->umsch_mm.cmd_buf_gpu_addr,
+ (void **)&adev->umsch_mm.cmd_buf_ptr);
+ if (r) {
+ dev_err(adev->dev, "failed to allocate cmdbuf bo %d\n", r);
+ amdgpu_device_wb_free(adev, adev->umsch_mm.wb_index);
+ return r;
+ }
+
+ r = amdgpu_bo_create_kernel(adev, AMDGPU_UMSCHFW_LOG_SIZE, PAGE_SIZE,
+ AMDGPU_GEM_DOMAIN_VRAM |
+ AMDGPU_GEM_DOMAIN_GTT,
+ &adev->umsch_mm.dbglog_bo,
+ &adev->umsch_mm.log_gpu_addr,
+ &adev->umsch_mm.log_cpu_addr);
+ if (r) {
+ dev_err(adev->dev, "(%d) failed to allocate umsch debug bo\n", r);
+ return r;
+ }
+
+ mutex_init(&adev->umsch_mm.mutex_hidden);
+
+ umsch_mm_agdb_index_init(adev);
+
+ return 0;
+}
+
+
+static int umsch_mm_early_init(struct amdgpu_ip_block *ip_block)
+{
+ struct amdgpu_device *adev = ip_block->adev;
+
+ switch (amdgpu_ip_version(adev, VCN_HWIP, 0)) {
+ case IP_VERSION(4, 0, 5):
+ case IP_VERSION(4, 0, 6):
+ umsch_mm_v4_0_set_funcs(&adev->umsch_mm);
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ adev->umsch_mm.ring.funcs = &umsch_v4_0_ring_funcs;
+ umsch_mm_set_regs(&adev->umsch_mm);
+
+ return 0;
+}
+
+static int umsch_mm_late_init(struct amdgpu_ip_block *ip_block)
+{
+ struct amdgpu_device *adev = ip_block->adev;
+
+ if (amdgpu_in_reset(adev) || adev->in_s0ix || adev->in_suspend)
+ return 0;
+
+ return 0;
+}
+
+static int umsch_mm_sw_init(struct amdgpu_ip_block *ip_block)
+{
+ struct amdgpu_device *adev = ip_block->adev;
+ int r;
+
+ r = umsch_mm_init(adev);
+ if (r)
+ return r;
+
+ amdgpu_umsch_fwlog_init(&adev->umsch_mm);
+ r = umsch_mm_ring_init(&adev->umsch_mm);
+ if (r)
+ return r;
+
+ r = umsch_mm_init_microcode(&adev->umsch_mm);
+ if (r)
+ return r;
+
+ return 0;
+}
+
+static int umsch_mm_sw_fini(struct amdgpu_ip_block *ip_block)
+{
+ struct amdgpu_device *adev = ip_block->adev;
+
+ release_firmware(adev->umsch_mm.fw);
+ adev->umsch_mm.fw = NULL;
+
+ amdgpu_ring_fini(&adev->umsch_mm.ring);
+
+ mutex_destroy(&adev->umsch_mm.mutex_hidden);
+
+ amdgpu_bo_free_kernel(&adev->umsch_mm.cmd_buf_obj,
+ &adev->umsch_mm.cmd_buf_gpu_addr,
+ (void **)&adev->umsch_mm.cmd_buf_ptr);
+
+ amdgpu_bo_free_kernel(&adev->umsch_mm.dbglog_bo,
+ &adev->umsch_mm.log_gpu_addr,
+ (void **)&adev->umsch_mm.log_cpu_addr);
+
+ amdgpu_device_wb_free(adev, adev->umsch_mm.wb_index);
+
+ return 0;
+}
+
+static int umsch_mm_hw_init(struct amdgpu_ip_block *ip_block)
+{
+ struct amdgpu_device *adev = ip_block->adev;
+ int r;
+
+ r = umsch_mm_load_microcode(&adev->umsch_mm);
+ if (r)
+ return r;
+
+ umsch_mm_ring_start(&adev->umsch_mm);
+
+ r = umsch_mm_set_hw_resources(&adev->umsch_mm);
+ if (r)
+ return r;
+
+ return 0;
+}
+
+static int umsch_mm_hw_fini(struct amdgpu_ip_block *ip_block)
+{
+ struct amdgpu_device *adev = ip_block->adev;
+
+ umsch_mm_ring_stop(&adev->umsch_mm);
+
+ amdgpu_bo_free_kernel(&adev->umsch_mm.data_fw_obj,
+ &adev->umsch_mm.data_fw_gpu_addr,
+ (void **)&adev->umsch_mm.data_fw_ptr);
+
+ amdgpu_bo_free_kernel(&adev->umsch_mm.ucode_fw_obj,
+ &adev->umsch_mm.ucode_fw_gpu_addr,
+ (void **)&adev->umsch_mm.ucode_fw_ptr);
+ return 0;
+}
+
+static int umsch_mm_suspend(struct amdgpu_ip_block *ip_block)
+{
+ return umsch_mm_hw_fini(ip_block);
+}
+
+static int umsch_mm_resume(struct amdgpu_ip_block *ip_block)
+{
+ return umsch_mm_hw_init(ip_block);
+}
+
+void amdgpu_umsch_fwlog_init(struct amdgpu_umsch_mm *umsch_mm)
+{
+#if defined(CONFIG_DEBUG_FS)
+ void *fw_log_cpu_addr = umsch_mm->log_cpu_addr;
+ volatile struct amdgpu_umsch_fwlog *log_buf = fw_log_cpu_addr;
+
+ log_buf->header_size = sizeof(struct amdgpu_umsch_fwlog);
+ log_buf->buffer_size = AMDGPU_UMSCHFW_LOG_SIZE;
+ log_buf->rptr = log_buf->header_size;
+ log_buf->wptr = log_buf->header_size;
+ log_buf->wrapped = 0;
+#endif
+}
+
+/*
+ * debugfs for mapping umsch firmware log buffer.
+ */
+#if defined(CONFIG_DEBUG_FS)
+static ssize_t amdgpu_debugfs_umsch_fwlog_read(struct file *f, char __user *buf,
+ size_t size, loff_t *pos)
+{
+ struct amdgpu_umsch_mm *umsch_mm;
+ void *log_buf;
+ volatile struct amdgpu_umsch_fwlog *plog;
+ unsigned int read_pos, write_pos, available, i, read_bytes = 0;
+ unsigned int read_num[2] = {0};
+
+ umsch_mm = file_inode(f)->i_private;
+ if (!umsch_mm)
+ return -ENODEV;
+
+ if (!umsch_mm->log_cpu_addr)
+ return -EFAULT;
+
+ log_buf = umsch_mm->log_cpu_addr;
+
+ plog = (volatile struct amdgpu_umsch_fwlog *)log_buf;
+ read_pos = plog->rptr;
+ write_pos = plog->wptr;
+
+ if (read_pos > AMDGPU_UMSCHFW_LOG_SIZE || write_pos > AMDGPU_UMSCHFW_LOG_SIZE)
+ return -EFAULT;
+
+ if (!size || (read_pos == write_pos))
+ return 0;
+
+ if (write_pos > read_pos) {
+ available = write_pos - read_pos;
+ read_num[0] = min_t(size_t, size, available);
+ } else {
+ read_num[0] = AMDGPU_UMSCHFW_LOG_SIZE - read_pos;
+ available = read_num[0] + write_pos - plog->header_size;
+ if (size > available)
+ read_num[1] = write_pos - plog->header_size;
+ else if (size > read_num[0])
+ read_num[1] = size - read_num[0];
+ else
+ read_num[0] = size;
+ }
+
+ for (i = 0; i < 2; i++) {
+ if (read_num[i]) {
+ if (read_pos == AMDGPU_UMSCHFW_LOG_SIZE)
+ read_pos = plog->header_size;
+ if (read_num[i] == copy_to_user((buf + read_bytes),
+ (log_buf + read_pos), read_num[i]))
+ return -EFAULT;
+
+ read_bytes += read_num[i];
+ read_pos += read_num[i];
+ }
+ }
+
+ plog->rptr = read_pos;
+ *pos += read_bytes;
+ return read_bytes;
+}
+
+static const struct file_operations amdgpu_debugfs_umschfwlog_fops = {
+ .owner = THIS_MODULE,
+ .read = amdgpu_debugfs_umsch_fwlog_read,
+ .llseek = default_llseek
+};
+#endif
+
+void amdgpu_debugfs_umsch_fwlog_init(struct amdgpu_device *adev,
+ struct amdgpu_umsch_mm *umsch_mm)
+{
+#if defined(CONFIG_DEBUG_FS)
+ struct drm_minor *minor = adev_to_drm(adev)->primary;
+ struct dentry *root = minor->debugfs_root;
+ char name[32];
+
+ sprintf(name, "amdgpu_umsch_fwlog");
+ debugfs_create_file_size(name, S_IFREG | 0444, root, umsch_mm,
+ &amdgpu_debugfs_umschfwlog_fops,
+ AMDGPU_UMSCHFW_LOG_SIZE);
+#endif
+}
+
+static const struct amd_ip_funcs umsch_mm_v4_0_ip_funcs = {
+ .name = "umsch_mm_v4_0",
+ .early_init = umsch_mm_early_init,
+ .late_init = umsch_mm_late_init,
+ .sw_init = umsch_mm_sw_init,
+ .sw_fini = umsch_mm_sw_fini,
+ .hw_init = umsch_mm_hw_init,
+ .hw_fini = umsch_mm_hw_fini,
+ .suspend = umsch_mm_suspend,
+ .resume = umsch_mm_resume,
+};
+
+const struct amdgpu_ip_block_version umsch_mm_v4_0_ip_block = {
+ .type = AMD_IP_BLOCK_TYPE_UMSCH_MM,
+ .major = 4,
+ .minor = 0,
+ .rev = 0,
+ .funcs = &umsch_mm_v4_0_ip_funcs,
+};
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_umsch_mm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_umsch_mm.h
new file mode 100644
index 000000000000..2c771a753778
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_umsch_mm.h
@@ -0,0 +1,246 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright 2023 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#ifndef __AMDGPU_UMSCH_MM_H__
+#define __AMDGPU_UMSCH_MM_H__
+
+enum UMSCH_SWIP_ENGINE_TYPE {
+ UMSCH_SWIP_ENGINE_TYPE_VCN0 = 0,
+ UMSCH_SWIP_ENGINE_TYPE_VCN1 = 1,
+ UMSCH_SWIP_ENGINE_TYPE_VCN = 2,
+ UMSCH_SWIP_ENGINE_TYPE_VPE = 3,
+ UMSCH_SWIP_ENGINE_TYPE_MAX
+};
+
+enum UMSCH_CONTEXT_PRIORITY_LEVEL {
+ CONTEXT_PRIORITY_LEVEL_IDLE = 0,
+ CONTEXT_PRIORITY_LEVEL_NORMAL = 1,
+ CONTEXT_PRIORITY_LEVEL_FOCUS = 2,
+ CONTEXT_PRIORITY_LEVEL_REALTIME = 3,
+ CONTEXT_PRIORITY_NUM_LEVELS
+};
+
+struct umsch_mm_set_resource_input {
+ uint32_t vmid_mask_mm_vcn;
+ uint32_t vmid_mask_mm_vpe;
+ uint32_t collaboration_mask_vpe;
+ uint32_t logging_vmid;
+ uint32_t engine_mask;
+ union {
+ struct {
+ uint32_t disable_reset : 1;
+ uint32_t disable_umsch_mm_log : 1;
+ uint32_t use_rs64mem_for_proc_ctx_csa : 1;
+ uint32_t reserved : 29;
+ };
+ uint32_t uint32_all;
+ };
+};
+
+struct amdgpu_umsch_fwlog {
+ uint32_t rptr;
+ uint32_t wptr;
+ uint32_t buffer_size;
+ uint32_t header_size;
+ uint32_t wrapped;
+};
+
+struct umsch_mm_add_queue_input {
+ uint32_t process_id;
+ uint64_t page_table_base_addr;
+ uint64_t process_va_start;
+ uint64_t process_va_end;
+ uint64_t process_quantum;
+ uint64_t process_csa_addr;
+ uint64_t context_quantum;
+ uint64_t context_csa_addr;
+ uint32_t inprocess_context_priority;
+ enum UMSCH_CONTEXT_PRIORITY_LEVEL context_global_priority_level;
+ uint32_t doorbell_offset_0;
+ uint32_t doorbell_offset_1;
+ enum UMSCH_SWIP_ENGINE_TYPE engine_type;
+ uint32_t affinity;
+ uint64_t mqd_addr;
+ uint64_t h_context;
+ uint64_t h_queue;
+ uint32_t vm_context_cntl;
+
+ uint32_t process_csa_array_index;
+ uint32_t context_csa_array_index;
+
+ struct {
+ uint32_t is_context_suspended : 1;
+ uint32_t collaboration_mode : 1;
+ uint32_t reserved : 30;
+ };
+};
+
+struct umsch_mm_remove_queue_input {
+ uint32_t doorbell_offset_0;
+ uint32_t doorbell_offset_1;
+ uint64_t context_csa_addr;
+ uint32_t context_csa_array_index;
+};
+
+struct MQD_INFO {
+ uint32_t rb_base_hi;
+ uint32_t rb_base_lo;
+ uint32_t rb_size;
+ uint32_t wptr_val;
+ uint32_t rptr_val;
+ uint32_t unmapped;
+ uint32_t vmid;
+};
+
+struct amdgpu_umsch_mm;
+
+struct umsch_mm_funcs {
+ int (*set_hw_resources)(struct amdgpu_umsch_mm *umsch);
+ int (*add_queue)(struct amdgpu_umsch_mm *umsch,
+ struct umsch_mm_add_queue_input *input);
+ int (*remove_queue)(struct amdgpu_umsch_mm *umsch,
+ struct umsch_mm_remove_queue_input *input);
+ int (*set_regs)(struct amdgpu_umsch_mm *umsch);
+ int (*init_microcode)(struct amdgpu_umsch_mm *umsch);
+ int (*load_microcode)(struct amdgpu_umsch_mm *umsch);
+ int (*ring_init)(struct amdgpu_umsch_mm *umsch);
+ int (*ring_start)(struct amdgpu_umsch_mm *umsch);
+ int (*ring_stop)(struct amdgpu_umsch_mm *umsch);
+ int (*ring_fini)(struct amdgpu_umsch_mm *umsch);
+};
+
+struct amdgpu_umsch_mm {
+ struct amdgpu_ring ring;
+
+ uint32_t rb_wptr;
+ uint32_t rb_rptr;
+
+ const struct umsch_mm_funcs *funcs;
+
+ const struct firmware *fw;
+ uint32_t fw_version;
+ uint32_t feature_version;
+
+ struct amdgpu_bo *ucode_fw_obj;
+ uint64_t ucode_fw_gpu_addr;
+ uint32_t *ucode_fw_ptr;
+ uint64_t irq_start_addr;
+ uint64_t uc_start_addr;
+ uint32_t ucode_size;
+
+ struct amdgpu_bo *data_fw_obj;
+ uint64_t data_fw_gpu_addr;
+ uint32_t *data_fw_ptr;
+ uint64_t data_start_addr;
+ uint32_t data_size;
+
+ struct amdgpu_bo *cmd_buf_obj;
+ uint64_t cmd_buf_gpu_addr;
+ uint32_t *cmd_buf_ptr;
+ uint32_t *cmd_buf_curr_ptr;
+
+ uint32_t wb_index;
+ uint64_t sch_ctx_gpu_addr;
+ uint32_t *sch_ctx_cpu_addr;
+
+ uint32_t vmid_mask_mm_vcn;
+ uint32_t vmid_mask_mm_vpe;
+ uint32_t engine_mask;
+ uint32_t vcn0_hqd_mask;
+ uint32_t vcn1_hqd_mask;
+ uint32_t vcn_hqd_mask[2];
+ uint32_t vpe_hqd_mask;
+ uint32_t agdb_index[CONTEXT_PRIORITY_NUM_LEVELS];
+
+ struct mutex mutex_hidden;
+ struct amdgpu_bo *dbglog_bo;
+ void *log_cpu_addr;
+ uint64_t log_gpu_addr;
+ uint32_t mem_size;
+ uint32_t log_offset;
+};
+
+int amdgpu_umsch_mm_submit_pkt(struct amdgpu_umsch_mm *umsch, void *pkt, int ndws);
+int amdgpu_umsch_mm_query_fence(struct amdgpu_umsch_mm *umsch);
+
+int amdgpu_umsch_mm_init_microcode(struct amdgpu_umsch_mm *umsch);
+int amdgpu_umsch_mm_allocate_ucode_buffer(struct amdgpu_umsch_mm *umsch);
+int amdgpu_umsch_mm_allocate_ucode_data_buffer(struct amdgpu_umsch_mm *umsch);
+
+int amdgpu_umsch_mm_psp_execute_cmd_buf(struct amdgpu_umsch_mm *umsch);
+
+int amdgpu_umsch_mm_ring_init(struct amdgpu_umsch_mm *umsch);
+
+void amdgpu_debugfs_umsch_fwlog_init(struct amdgpu_device *adev,
+ struct amdgpu_umsch_mm *umsch);
+
+void amdgpu_umsch_fwlog_init(struct amdgpu_umsch_mm *umsch_mm);
+
+#define WREG32_SOC15_UMSCH(reg, value) \
+ do { \
+ uint32_t reg_offset = adev->reg_offset[VCN_HWIP][0][reg##_BASE_IDX] + reg; \
+ if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) { \
+ *adev->umsch_mm.cmd_buf_curr_ptr++ = (reg_offset << 2); \
+ *adev->umsch_mm.cmd_buf_curr_ptr++ = value; \
+ } else { \
+ WREG32(reg_offset, value); \
+ } \
+ } while (0)
+
+#define umsch_mm_set_hw_resources(umsch) \
+ ((umsch)->funcs->set_hw_resources ? (umsch)->funcs->set_hw_resources((umsch)) : 0)
+#define umsch_mm_add_queue(umsch, input) \
+ ((umsch)->funcs->add_queue ? (umsch)->funcs->add_queue((umsch), (input)) : 0)
+#define umsch_mm_remove_queue(umsch, input) \
+ ((umsch)->funcs->remove_queue ? (umsch)->funcs->remove_queue((umsch), (input)) : 0)
+
+#define umsch_mm_set_regs(umsch) \
+ ((umsch)->funcs->set_regs ? (umsch)->funcs->set_regs((umsch)) : 0)
+#define umsch_mm_init_microcode(umsch) \
+ ((umsch)->funcs->init_microcode ? (umsch)->funcs->init_microcode((umsch)) : 0)
+#define umsch_mm_load_microcode(umsch) \
+ ((umsch)->funcs->load_microcode ? (umsch)->funcs->load_microcode((umsch)) : 0)
+
+#define umsch_mm_ring_init(umsch) \
+ ((umsch)->funcs->ring_init ? (umsch)->funcs->ring_init((umsch)) : 0)
+#define umsch_mm_ring_start(umsch) \
+ ((umsch)->funcs->ring_start ? (umsch)->funcs->ring_start((umsch)) : 0)
+#define umsch_mm_ring_stop(umsch) \
+ ((umsch)->funcs->ring_stop ? (umsch)->funcs->ring_stop((umsch)) : 0)
+#define umsch_mm_ring_fini(umsch) \
+ ((umsch)->funcs->ring_fini ? (umsch)->funcs->ring_fini((umsch)) : 0)
+
+static inline void amdgpu_umsch_mm_lock(struct amdgpu_umsch_mm *umsch)
+{
+ mutex_lock(&umsch->mutex_hidden);
+}
+
+static inline void amdgpu_umsch_mm_unlock(struct amdgpu_umsch_mm *umsch)
+{
+ mutex_unlock(&umsch->mutex_hidden);
+}
+
+extern const struct amdgpu_ip_block_version umsch_mm_v4_0_ip_block;
+
+#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c
new file mode 100644
index 000000000000..9a969175900e
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c
@@ -0,0 +1,1482 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright 2023 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#include <drm/drm_auth.h>
+#include <drm/drm_exec.h>
+#include <linux/pm_runtime.h>
+#include <drm/drm_drv.h>
+
+#include "amdgpu.h"
+#include "amdgpu_reset.h"
+#include "amdgpu_vm.h"
+#include "amdgpu_userq.h"
+#include "amdgpu_hmm.h"
+#include "amdgpu_userq_fence.h"
+
+u32 amdgpu_userq_get_supported_ip_mask(struct amdgpu_device *adev)
+{
+ int i;
+ u32 userq_ip_mask = 0;
+
+ for (i = 0; i < AMDGPU_HW_IP_NUM; i++) {
+ if (adev->userq_funcs[i])
+ userq_ip_mask |= (1 << i);
+ }
+
+ return userq_ip_mask;
+}
+
+static bool amdgpu_userq_is_reset_type_supported(struct amdgpu_device *adev,
+ enum amdgpu_ring_type ring_type, int reset_type)
+{
+
+ if (ring_type < 0 || ring_type >= AMDGPU_RING_TYPE_MAX)
+ return false;
+
+ switch (ring_type) {
+ case AMDGPU_RING_TYPE_GFX:
+ if (adev->gfx.gfx_supported_reset & reset_type)
+ return true;
+ break;
+ case AMDGPU_RING_TYPE_COMPUTE:
+ if (adev->gfx.compute_supported_reset & reset_type)
+ return true;
+ break;
+ case AMDGPU_RING_TYPE_SDMA:
+ if (adev->sdma.supported_reset & reset_type)
+ return true;
+ break;
+ case AMDGPU_RING_TYPE_VCN_DEC:
+ case AMDGPU_RING_TYPE_VCN_ENC:
+ if (adev->vcn.supported_reset & reset_type)
+ return true;
+ break;
+ case AMDGPU_RING_TYPE_VCN_JPEG:
+ if (adev->jpeg.supported_reset & reset_type)
+ return true;
+ break;
+ default:
+ break;
+ }
+ return false;
+}
+
+static void amdgpu_userq_gpu_reset(struct amdgpu_device *adev)
+{
+ if (amdgpu_device_should_recover_gpu(adev)) {
+ amdgpu_reset_domain_schedule(adev->reset_domain,
+ &adev->userq_reset_work);
+ /* Wait for the reset job to complete */
+ flush_work(&adev->userq_reset_work);
+ }
+}
+
+static int
+amdgpu_userq_detect_and_reset_queues(struct amdgpu_userq_mgr *uq_mgr)
+{
+ struct amdgpu_device *adev = uq_mgr->adev;
+ const int queue_types[] = {
+ AMDGPU_RING_TYPE_COMPUTE,
+ AMDGPU_RING_TYPE_GFX,
+ AMDGPU_RING_TYPE_SDMA
+ };
+ const int num_queue_types = ARRAY_SIZE(queue_types);
+ bool gpu_reset = false;
+ int r = 0;
+ int i;
+
+ /* Warning if current process mutex is not held */
+ WARN_ON(!mutex_is_locked(&uq_mgr->userq_mutex));
+
+ if (unlikely(adev->debug_disable_gpu_ring_reset)) {
+ dev_err(adev->dev, "userq reset disabled by debug mask\n");
+ return 0;
+ }
+
+ /*
+ * If GPU recovery feature is disabled system-wide,
+ * skip all reset detection logic
+ */
+ if (!amdgpu_gpu_recovery)
+ return 0;
+
+ /*
+ * Iterate through all queue types to detect and reset problematic queues
+ * Process each queue type in the defined order
+ */
+ for (i = 0; i < num_queue_types; i++) {
+ int ring_type = queue_types[i];
+ const struct amdgpu_userq_funcs *funcs = adev->userq_funcs[ring_type];
+
+ if (!amdgpu_userq_is_reset_type_supported(adev, ring_type, AMDGPU_RESET_TYPE_PER_QUEUE))
+ continue;
+
+ if (atomic_read(&uq_mgr->userq_count[ring_type]) > 0 &&
+ funcs && funcs->detect_and_reset) {
+ r = funcs->detect_and_reset(adev, ring_type);
+ if (r) {
+ gpu_reset = true;
+ break;
+ }
+ }
+ }
+
+ if (gpu_reset)
+ amdgpu_userq_gpu_reset(adev);
+
+ return r;
+}
+
+static int amdgpu_userq_buffer_va_list_add(struct amdgpu_usermode_queue *queue,
+ struct amdgpu_bo_va_mapping *va_map, u64 addr)
+{
+ struct amdgpu_userq_va_cursor *va_cursor;
+ struct userq_va_list;
+
+ va_cursor = kzalloc(sizeof(*va_cursor), GFP_KERNEL);
+ if (!va_cursor)
+ return -ENOMEM;
+
+ INIT_LIST_HEAD(&va_cursor->list);
+ va_cursor->gpu_addr = addr;
+ atomic_set(&va_map->bo_va->userq_va_mapped, 1);
+ list_add(&va_cursor->list, &queue->userq_va_list);
+
+ return 0;
+}
+
+int amdgpu_userq_input_va_validate(struct amdgpu_usermode_queue *queue,
+ u64 addr, u64 expected_size)
+{
+ struct amdgpu_bo_va_mapping *va_map;
+ struct amdgpu_vm *vm = queue->vm;
+ u64 user_addr;
+ u64 size;
+ int r = 0;
+
+ user_addr = (addr & AMDGPU_GMC_HOLE_MASK) >> AMDGPU_GPU_PAGE_SHIFT;
+ size = expected_size >> AMDGPU_GPU_PAGE_SHIFT;
+
+ r = amdgpu_bo_reserve(vm->root.bo, false);
+ if (r)
+ return r;
+
+ va_map = amdgpu_vm_bo_lookup_mapping(vm, user_addr);
+ if (!va_map) {
+ r = -EINVAL;
+ goto out_err;
+ }
+ /* Only validate the userq whether resident in the VM mapping range */
+ if (user_addr >= va_map->start &&
+ va_map->last - user_addr + 1 >= size) {
+ amdgpu_userq_buffer_va_list_add(queue, va_map, user_addr);
+ amdgpu_bo_unreserve(vm->root.bo);
+ return 0;
+ }
+
+ r = -EINVAL;
+out_err:
+ amdgpu_bo_unreserve(vm->root.bo);
+ return r;
+}
+
+static bool amdgpu_userq_buffer_va_mapped(struct amdgpu_vm *vm, u64 addr)
+{
+ struct amdgpu_bo_va_mapping *mapping;
+ bool r;
+
+ if (amdgpu_bo_reserve(vm->root.bo, false))
+ return false;
+
+ mapping = amdgpu_vm_bo_lookup_mapping(vm, addr);
+ if (!IS_ERR_OR_NULL(mapping) && atomic_read(&mapping->bo_va->userq_va_mapped))
+ r = true;
+ else
+ r = false;
+ amdgpu_bo_unreserve(vm->root.bo);
+
+ return r;
+}
+
+static bool amdgpu_userq_buffer_vas_mapped(struct amdgpu_usermode_queue *queue)
+{
+ struct amdgpu_userq_va_cursor *va_cursor, *tmp;
+ int r = 0;
+
+ list_for_each_entry_safe(va_cursor, tmp, &queue->userq_va_list, list) {
+ r += amdgpu_userq_buffer_va_mapped(queue->vm, va_cursor->gpu_addr);
+ dev_dbg(queue->userq_mgr->adev->dev,
+ "validate the userq mapping:%p va:%llx r:%d\n",
+ queue, va_cursor->gpu_addr, r);
+ }
+
+ if (r != 0)
+ return true;
+
+ return false;
+}
+
+static void amdgpu_userq_buffer_va_list_del(struct amdgpu_bo_va_mapping *mapping,
+ struct amdgpu_userq_va_cursor *va_cursor)
+{
+ atomic_set(&mapping->bo_va->userq_va_mapped, 0);
+ list_del(&va_cursor->list);
+ kfree(va_cursor);
+}
+
+static int amdgpu_userq_buffer_vas_list_cleanup(struct amdgpu_device *adev,
+ struct amdgpu_usermode_queue *queue)
+{
+ struct amdgpu_userq_va_cursor *va_cursor, *tmp;
+ struct amdgpu_bo_va_mapping *mapping;
+ int r;
+
+ r = amdgpu_bo_reserve(queue->vm->root.bo, false);
+ if (r)
+ return r;
+
+ list_for_each_entry_safe(va_cursor, tmp, &queue->userq_va_list, list) {
+ mapping = amdgpu_vm_bo_lookup_mapping(queue->vm, va_cursor->gpu_addr);
+ if (!mapping) {
+ r = -EINVAL;
+ goto err;
+ }
+ dev_dbg(adev->dev, "delete the userq:%p va:%llx\n",
+ queue, va_cursor->gpu_addr);
+ amdgpu_userq_buffer_va_list_del(mapping, va_cursor);
+ }
+err:
+ amdgpu_bo_unreserve(queue->vm->root.bo);
+ return r;
+}
+
+static int
+amdgpu_userq_preempt_helper(struct amdgpu_userq_mgr *uq_mgr,
+ struct amdgpu_usermode_queue *queue)
+{
+ struct amdgpu_device *adev = uq_mgr->adev;
+ const struct amdgpu_userq_funcs *userq_funcs =
+ adev->userq_funcs[queue->queue_type];
+ bool found_hung_queue = false;
+ int r = 0;
+
+ if (queue->state == AMDGPU_USERQ_STATE_MAPPED) {
+ r = userq_funcs->preempt(uq_mgr, queue);
+ if (r) {
+ queue->state = AMDGPU_USERQ_STATE_HUNG;
+ found_hung_queue = true;
+ } else {
+ queue->state = AMDGPU_USERQ_STATE_PREEMPTED;
+ }
+ }
+
+ if (found_hung_queue)
+ amdgpu_userq_detect_and_reset_queues(uq_mgr);
+
+ return r;
+}
+
+static int
+amdgpu_userq_restore_helper(struct amdgpu_userq_mgr *uq_mgr,
+ struct amdgpu_usermode_queue *queue)
+{
+ struct amdgpu_device *adev = uq_mgr->adev;
+ const struct amdgpu_userq_funcs *userq_funcs =
+ adev->userq_funcs[queue->queue_type];
+ int r = 0;
+
+ if (queue->state == AMDGPU_USERQ_STATE_PREEMPTED) {
+ r = userq_funcs->restore(uq_mgr, queue);
+ if (r) {
+ queue->state = AMDGPU_USERQ_STATE_HUNG;
+ } else {
+ queue->state = AMDGPU_USERQ_STATE_MAPPED;
+ }
+ }
+
+ return r;
+}
+
+static int
+amdgpu_userq_unmap_helper(struct amdgpu_userq_mgr *uq_mgr,
+ struct amdgpu_usermode_queue *queue)
+{
+ struct amdgpu_device *adev = uq_mgr->adev;
+ const struct amdgpu_userq_funcs *userq_funcs =
+ adev->userq_funcs[queue->queue_type];
+ bool found_hung_queue = false;
+ int r = 0;
+
+ if ((queue->state == AMDGPU_USERQ_STATE_MAPPED) ||
+ (queue->state == AMDGPU_USERQ_STATE_PREEMPTED)) {
+ r = userq_funcs->unmap(uq_mgr, queue);
+ if (r) {
+ queue->state = AMDGPU_USERQ_STATE_HUNG;
+ found_hung_queue = true;
+ } else {
+ queue->state = AMDGPU_USERQ_STATE_UNMAPPED;
+ }
+ }
+
+ if (found_hung_queue)
+ amdgpu_userq_detect_and_reset_queues(uq_mgr);
+
+ return r;
+}
+
+static int
+amdgpu_userq_map_helper(struct amdgpu_userq_mgr *uq_mgr,
+ struct amdgpu_usermode_queue *queue)
+{
+ struct amdgpu_device *adev = uq_mgr->adev;
+ const struct amdgpu_userq_funcs *userq_funcs =
+ adev->userq_funcs[queue->queue_type];
+ int r = 0;
+
+ if (queue->state == AMDGPU_USERQ_STATE_UNMAPPED) {
+ r = userq_funcs->map(uq_mgr, queue);
+ if (r) {
+ queue->state = AMDGPU_USERQ_STATE_HUNG;
+ amdgpu_userq_detect_and_reset_queues(uq_mgr);
+ } else {
+ queue->state = AMDGPU_USERQ_STATE_MAPPED;
+ }
+ }
+
+ return r;
+}
+
+static int
+amdgpu_userq_wait_for_last_fence(struct amdgpu_userq_mgr *uq_mgr,
+ struct amdgpu_usermode_queue *queue)
+{
+ struct dma_fence *f = queue->last_fence;
+ int ret = 0;
+
+ if (f && !dma_fence_is_signaled(f)) {
+ ret = dma_fence_wait_timeout(f, true, MAX_SCHEDULE_TIMEOUT);
+ if (ret <= 0) {
+ drm_file_err(uq_mgr->file, "Timed out waiting for fence=%llu:%llu\n",
+ f->context, f->seqno);
+ queue->state = AMDGPU_USERQ_STATE_HUNG;
+ return -ETIME;
+ }
+ }
+
+ return ret;
+}
+
+static void
+amdgpu_userq_cleanup(struct amdgpu_userq_mgr *uq_mgr,
+ struct amdgpu_usermode_queue *queue,
+ int queue_id)
+{
+ struct amdgpu_device *adev = uq_mgr->adev;
+ const struct amdgpu_userq_funcs *uq_funcs = adev->userq_funcs[queue->queue_type];
+
+ /* Wait for mode-1 reset to complete */
+ down_read(&adev->reset_domain->sem);
+
+ /* Drop the userq reference. */
+ amdgpu_userq_buffer_vas_list_cleanup(adev, queue);
+ uq_funcs->mqd_destroy(uq_mgr, queue);
+ amdgpu_userq_fence_driver_free(queue);
+ /* Use interrupt-safe locking since IRQ handlers may access these XArrays */
+ xa_erase_irq(&uq_mgr->userq_mgr_xa, (unsigned long)queue_id);
+ xa_erase_irq(&adev->userq_doorbell_xa, queue->doorbell_index);
+ queue->userq_mgr = NULL;
+ list_del(&queue->userq_va_list);
+ kfree(queue);
+
+ up_read(&adev->reset_domain->sem);
+}
+
+static struct amdgpu_usermode_queue *
+amdgpu_userq_find(struct amdgpu_userq_mgr *uq_mgr, int qid)
+{
+ return xa_load(&uq_mgr->userq_mgr_xa, qid);
+}
+
+void
+amdgpu_userq_ensure_ev_fence(struct amdgpu_userq_mgr *uq_mgr,
+ struct amdgpu_eviction_fence_mgr *evf_mgr)
+{
+ struct amdgpu_eviction_fence *ev_fence;
+
+retry:
+ /* Flush any pending resume work to create ev_fence */
+ flush_delayed_work(&uq_mgr->resume_work);
+
+ mutex_lock(&uq_mgr->userq_mutex);
+ spin_lock(&evf_mgr->ev_fence_lock);
+ ev_fence = evf_mgr->ev_fence;
+ spin_unlock(&evf_mgr->ev_fence_lock);
+ if (!ev_fence || dma_fence_is_signaled(&ev_fence->base)) {
+ mutex_unlock(&uq_mgr->userq_mutex);
+ /*
+ * Looks like there was no pending resume work,
+ * add one now to create a valid eviction fence
+ */
+ schedule_delayed_work(&uq_mgr->resume_work, 0);
+ goto retry;
+ }
+}
+
+int amdgpu_userq_create_object(struct amdgpu_userq_mgr *uq_mgr,
+ struct amdgpu_userq_obj *userq_obj,
+ int size)
+{
+ struct amdgpu_device *adev = uq_mgr->adev;
+ struct amdgpu_bo_param bp;
+ int r;
+
+ memset(&bp, 0, sizeof(bp));
+ bp.byte_align = PAGE_SIZE;
+ bp.domain = AMDGPU_GEM_DOMAIN_GTT;
+ bp.flags = AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS |
+ AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED;
+ bp.type = ttm_bo_type_kernel;
+ bp.size = size;
+ bp.resv = NULL;
+ bp.bo_ptr_size = sizeof(struct amdgpu_bo);
+
+ r = amdgpu_bo_create(adev, &bp, &userq_obj->obj);
+ if (r) {
+ drm_file_err(uq_mgr->file, "Failed to allocate BO for userqueue (%d)", r);
+ return r;
+ }
+
+ r = amdgpu_bo_reserve(userq_obj->obj, true);
+ if (r) {
+ drm_file_err(uq_mgr->file, "Failed to reserve BO to map (%d)", r);
+ goto free_obj;
+ }
+
+ r = amdgpu_ttm_alloc_gart(&(userq_obj->obj)->tbo);
+ if (r) {
+ drm_file_err(uq_mgr->file, "Failed to alloc GART for userqueue object (%d)", r);
+ goto unresv;
+ }
+
+ r = amdgpu_bo_kmap(userq_obj->obj, &userq_obj->cpu_ptr);
+ if (r) {
+ drm_file_err(uq_mgr->file, "Failed to map BO for userqueue (%d)", r);
+ goto unresv;
+ }
+
+ userq_obj->gpu_addr = amdgpu_bo_gpu_offset(userq_obj->obj);
+ amdgpu_bo_unreserve(userq_obj->obj);
+ memset(userq_obj->cpu_ptr, 0, size);
+ return 0;
+
+unresv:
+ amdgpu_bo_unreserve(userq_obj->obj);
+
+free_obj:
+ amdgpu_bo_unref(&userq_obj->obj);
+ return r;
+}
+
+void amdgpu_userq_destroy_object(struct amdgpu_userq_mgr *uq_mgr,
+ struct amdgpu_userq_obj *userq_obj)
+{
+ amdgpu_bo_kunmap(userq_obj->obj);
+ amdgpu_bo_unref(&userq_obj->obj);
+}
+
+uint64_t
+amdgpu_userq_get_doorbell_index(struct amdgpu_userq_mgr *uq_mgr,
+ struct amdgpu_db_info *db_info,
+ struct drm_file *filp)
+{
+ uint64_t index;
+ struct drm_gem_object *gobj;
+ struct amdgpu_userq_obj *db_obj = db_info->db_obj;
+ int r, db_size;
+
+ gobj = drm_gem_object_lookup(filp, db_info->doorbell_handle);
+ if (gobj == NULL) {
+ drm_file_err(uq_mgr->file, "Can't find GEM object for doorbell\n");
+ return -EINVAL;
+ }
+
+ db_obj->obj = amdgpu_bo_ref(gem_to_amdgpu_bo(gobj));
+ drm_gem_object_put(gobj);
+
+ r = amdgpu_bo_reserve(db_obj->obj, true);
+ if (r) {
+ drm_file_err(uq_mgr->file, "[Usermode queues] Failed to pin doorbell object\n");
+ goto unref_bo;
+ }
+
+ /* Pin the BO before generating the index, unpin in queue destroy */
+ r = amdgpu_bo_pin(db_obj->obj, AMDGPU_GEM_DOMAIN_DOORBELL);
+ if (r) {
+ drm_file_err(uq_mgr->file, "[Usermode queues] Failed to pin doorbell object\n");
+ goto unresv_bo;
+ }
+
+ switch (db_info->queue_type) {
+ case AMDGPU_HW_IP_GFX:
+ case AMDGPU_HW_IP_COMPUTE:
+ case AMDGPU_HW_IP_DMA:
+ db_size = sizeof(u64);
+ break;
+ default:
+ drm_file_err(uq_mgr->file, "[Usermode queues] IP %d not support\n",
+ db_info->queue_type);
+ r = -EINVAL;
+ goto unpin_bo;
+ }
+
+ index = amdgpu_doorbell_index_on_bar(uq_mgr->adev, db_obj->obj,
+ db_info->doorbell_offset, db_size);
+ drm_dbg_driver(adev_to_drm(uq_mgr->adev),
+ "[Usermode queues] doorbell index=%lld\n", index);
+ amdgpu_bo_unreserve(db_obj->obj);
+ return index;
+
+unpin_bo:
+ amdgpu_bo_unpin(db_obj->obj);
+unresv_bo:
+ amdgpu_bo_unreserve(db_obj->obj);
+unref_bo:
+ amdgpu_bo_unref(&db_obj->obj);
+ return r;
+}
+
+static int
+amdgpu_userq_destroy(struct drm_file *filp, int queue_id)
+{
+ struct amdgpu_fpriv *fpriv = filp->driver_priv;
+ struct amdgpu_userq_mgr *uq_mgr = &fpriv->userq_mgr;
+ struct amdgpu_device *adev = uq_mgr->adev;
+ struct amdgpu_usermode_queue *queue;
+ int r = 0;
+
+ cancel_delayed_work_sync(&uq_mgr->resume_work);
+ mutex_lock(&uq_mgr->userq_mutex);
+
+ queue = amdgpu_userq_find(uq_mgr, queue_id);
+ if (!queue) {
+ drm_dbg_driver(adev_to_drm(uq_mgr->adev), "Invalid queue id to destroy\n");
+ mutex_unlock(&uq_mgr->userq_mutex);
+ return -EINVAL;
+ }
+ amdgpu_userq_wait_for_last_fence(uq_mgr, queue);
+ r = amdgpu_bo_reserve(queue->db_obj.obj, true);
+ if (!r) {
+ amdgpu_bo_unpin(queue->db_obj.obj);
+ amdgpu_bo_unreserve(queue->db_obj.obj);
+ }
+ amdgpu_bo_unref(&queue->db_obj.obj);
+ atomic_dec(&uq_mgr->userq_count[queue->queue_type]);
+#if defined(CONFIG_DEBUG_FS)
+ debugfs_remove_recursive(queue->debugfs_queue);
+#endif
+ amdgpu_userq_detect_and_reset_queues(uq_mgr);
+ r = amdgpu_userq_unmap_helper(uq_mgr, queue);
+ /*TODO: It requires a reset for userq hw unmap error*/
+ if (unlikely(r != AMDGPU_USERQ_STATE_UNMAPPED)) {
+ drm_warn(adev_to_drm(uq_mgr->adev), "trying to destroy a HW mapping userq\n");
+ queue->state = AMDGPU_USERQ_STATE_HUNG;
+ }
+ amdgpu_userq_cleanup(uq_mgr, queue, queue_id);
+ mutex_unlock(&uq_mgr->userq_mutex);
+
+ pm_runtime_put_autosuspend(adev_to_drm(adev)->dev);
+
+ return r;
+}
+
+static int amdgpu_userq_priority_permit(struct drm_file *filp,
+ int priority)
+{
+ if (priority < AMDGPU_USERQ_CREATE_FLAGS_QUEUE_PRIORITY_HIGH)
+ return 0;
+
+ if (capable(CAP_SYS_NICE))
+ return 0;
+
+ if (drm_is_current_master(filp))
+ return 0;
+
+ return -EACCES;
+}
+
+#if defined(CONFIG_DEBUG_FS)
+static int amdgpu_mqd_info_read(struct seq_file *m, void *unused)
+{
+ struct amdgpu_usermode_queue *queue = m->private;
+ struct amdgpu_bo *bo;
+ int r;
+
+ if (!queue || !queue->mqd.obj)
+ return -EINVAL;
+
+ bo = amdgpu_bo_ref(queue->mqd.obj);
+ r = amdgpu_bo_reserve(bo, true);
+ if (r) {
+ amdgpu_bo_unref(&bo);
+ return -EINVAL;
+ }
+
+ seq_printf(m, "queue_type: %d\n", queue->queue_type);
+ seq_printf(m, "mqd_gpu_address: 0x%llx\n", amdgpu_bo_gpu_offset(queue->mqd.obj));
+
+ amdgpu_bo_unreserve(bo);
+ amdgpu_bo_unref(&bo);
+
+ return 0;
+}
+
+static int amdgpu_mqd_info_open(struct inode *inode, struct file *file)
+{
+ return single_open(file, amdgpu_mqd_info_read, inode->i_private);
+}
+
+static const struct file_operations amdgpu_mqd_info_fops = {
+ .owner = THIS_MODULE,
+ .open = amdgpu_mqd_info_open,
+ .read = seq_read,
+ .llseek = seq_lseek,
+ .release = single_release,
+};
+#endif
+
+static int
+amdgpu_userq_create(struct drm_file *filp, union drm_amdgpu_userq *args)
+{
+ struct amdgpu_fpriv *fpriv = filp->driver_priv;
+ struct amdgpu_userq_mgr *uq_mgr = &fpriv->userq_mgr;
+ struct amdgpu_device *adev = uq_mgr->adev;
+ const struct amdgpu_userq_funcs *uq_funcs;
+ struct amdgpu_usermode_queue *queue;
+ struct amdgpu_db_info db_info;
+ char *queue_name;
+ bool skip_map_queue;
+ u32 qid;
+ uint64_t index;
+ int r = 0;
+ int priority =
+ (args->in.flags & AMDGPU_USERQ_CREATE_FLAGS_QUEUE_PRIORITY_MASK) >>
+ AMDGPU_USERQ_CREATE_FLAGS_QUEUE_PRIORITY_SHIFT;
+
+ r = amdgpu_userq_priority_permit(filp, priority);
+ if (r)
+ return r;
+
+ r = pm_runtime_get_sync(adev_to_drm(adev)->dev);
+ if (r < 0) {
+ drm_file_err(uq_mgr->file, "pm_runtime_get_sync() failed for userqueue create\n");
+ pm_runtime_put_autosuspend(adev_to_drm(adev)->dev);
+ return r;
+ }
+
+ /*
+ * There could be a situation that we are creating a new queue while
+ * the other queues under this UQ_mgr are suspended. So if there is any
+ * resume work pending, wait for it to get done.
+ *
+ * This will also make sure we have a valid eviction fence ready to be used.
+ */
+ amdgpu_userq_ensure_ev_fence(&fpriv->userq_mgr, &fpriv->evf_mgr);
+
+ uq_funcs = adev->userq_funcs[args->in.ip_type];
+ if (!uq_funcs) {
+ drm_file_err(uq_mgr->file, "Usermode queue is not supported for this IP (%u)\n",
+ args->in.ip_type);
+ r = -EINVAL;
+ goto unlock;
+ }
+
+ queue = kzalloc(sizeof(struct amdgpu_usermode_queue), GFP_KERNEL);
+ if (!queue) {
+ drm_file_err(uq_mgr->file, "Failed to allocate memory for queue\n");
+ r = -ENOMEM;
+ goto unlock;
+ }
+
+ INIT_LIST_HEAD(&queue->userq_va_list);
+ queue->doorbell_handle = args->in.doorbell_handle;
+ queue->queue_type = args->in.ip_type;
+ queue->vm = &fpriv->vm;
+ queue->priority = priority;
+
+ db_info.queue_type = queue->queue_type;
+ db_info.doorbell_handle = queue->doorbell_handle;
+ db_info.db_obj = &queue->db_obj;
+ db_info.doorbell_offset = args->in.doorbell_offset;
+
+ /* Validate the userq virtual address.*/
+ if (amdgpu_userq_input_va_validate(queue, args->in.queue_va, args->in.queue_size) ||
+ amdgpu_userq_input_va_validate(queue, args->in.rptr_va, AMDGPU_GPU_PAGE_SIZE) ||
+ amdgpu_userq_input_va_validate(queue, args->in.wptr_va, AMDGPU_GPU_PAGE_SIZE)) {
+ r = -EINVAL;
+ kfree(queue);
+ goto unlock;
+ }
+
+ /* Convert relative doorbell offset into absolute doorbell index */
+ index = amdgpu_userq_get_doorbell_index(uq_mgr, &db_info, filp);
+ if (index == (uint64_t)-EINVAL) {
+ drm_file_err(uq_mgr->file, "Failed to get doorbell for queue\n");
+ kfree(queue);
+ r = -EINVAL;
+ goto unlock;
+ }
+
+ queue->doorbell_index = index;
+ xa_init_flags(&queue->fence_drv_xa, XA_FLAGS_ALLOC);
+ r = amdgpu_userq_fence_driver_alloc(adev, queue);
+ if (r) {
+ drm_file_err(uq_mgr->file, "Failed to alloc fence driver\n");
+ goto unlock;
+ }
+
+ r = uq_funcs->mqd_create(uq_mgr, &args->in, queue);
+ if (r) {
+ drm_file_err(uq_mgr->file, "Failed to create Queue\n");
+ amdgpu_userq_fence_driver_free(queue);
+ kfree(queue);
+ goto unlock;
+ }
+
+ /* Wait for mode-1 reset to complete */
+ down_read(&adev->reset_domain->sem);
+ r = xa_err(xa_store_irq(&adev->userq_doorbell_xa, index, queue, GFP_KERNEL));
+ if (r) {
+ kfree(queue);
+ up_read(&adev->reset_domain->sem);
+ goto unlock;
+ }
+
+ r = xa_alloc(&uq_mgr->userq_mgr_xa, &qid, queue, XA_LIMIT(1, AMDGPU_MAX_USERQ_COUNT), GFP_KERNEL);
+ if (r) {
+ drm_file_err(uq_mgr->file, "Failed to allocate a queue id\n");
+ amdgpu_userq_fence_driver_free(queue);
+ uq_funcs->mqd_destroy(uq_mgr, queue);
+ kfree(queue);
+ r = -ENOMEM;
+ up_read(&adev->reset_domain->sem);
+ goto unlock;
+ }
+ up_read(&adev->reset_domain->sem);
+ queue->userq_mgr = uq_mgr;
+
+ /* don't map the queue if scheduling is halted */
+ if (adev->userq_halt_for_enforce_isolation &&
+ ((queue->queue_type == AMDGPU_HW_IP_GFX) ||
+ (queue->queue_type == AMDGPU_HW_IP_COMPUTE)))
+ skip_map_queue = true;
+ else
+ skip_map_queue = false;
+ if (!skip_map_queue) {
+ r = amdgpu_userq_map_helper(uq_mgr, queue);
+ if (r) {
+ drm_file_err(uq_mgr->file, "Failed to map Queue\n");
+ xa_erase(&uq_mgr->userq_mgr_xa, qid);
+ amdgpu_userq_fence_driver_free(queue);
+ uq_funcs->mqd_destroy(uq_mgr, queue);
+ kfree(queue);
+ goto unlock;
+ }
+ }
+
+ queue_name = kasprintf(GFP_KERNEL, "queue-%d", qid);
+ if (!queue_name) {
+ r = -ENOMEM;
+ goto unlock;
+ }
+
+#if defined(CONFIG_DEBUG_FS)
+ /* Queue dentry per client to hold MQD information */
+ queue->debugfs_queue = debugfs_create_dir(queue_name, filp->debugfs_client);
+ debugfs_create_file("mqd_info", 0444, queue->debugfs_queue, queue, &amdgpu_mqd_info_fops);
+#endif
+ kfree(queue_name);
+
+ args->out.queue_id = qid;
+ atomic_inc(&uq_mgr->userq_count[queue->queue_type]);
+
+unlock:
+ mutex_unlock(&uq_mgr->userq_mutex);
+
+ return r;
+}
+
+static int amdgpu_userq_input_args_validate(struct drm_device *dev,
+ union drm_amdgpu_userq *args,
+ struct drm_file *filp)
+{
+ struct amdgpu_device *adev = drm_to_adev(dev);
+
+ switch (args->in.op) {
+ case AMDGPU_USERQ_OP_CREATE:
+ if (args->in.flags & ~(AMDGPU_USERQ_CREATE_FLAGS_QUEUE_PRIORITY_MASK |
+ AMDGPU_USERQ_CREATE_FLAGS_QUEUE_SECURE))
+ return -EINVAL;
+ /* Usermode queues are only supported for GFX IP as of now */
+ if (args->in.ip_type != AMDGPU_HW_IP_GFX &&
+ args->in.ip_type != AMDGPU_HW_IP_DMA &&
+ args->in.ip_type != AMDGPU_HW_IP_COMPUTE) {
+ drm_file_err(filp, "Usermode queue doesn't support IP type %u\n",
+ args->in.ip_type);
+ return -EINVAL;
+ }
+
+ if ((args->in.flags & AMDGPU_USERQ_CREATE_FLAGS_QUEUE_SECURE) &&
+ (args->in.ip_type != AMDGPU_HW_IP_GFX) &&
+ (args->in.ip_type != AMDGPU_HW_IP_COMPUTE) &&
+ !amdgpu_is_tmz(adev)) {
+ drm_file_err(filp, "Secure only supported on GFX/Compute queues\n");
+ return -EINVAL;
+ }
+
+ if (args->in.queue_va == AMDGPU_BO_INVALID_OFFSET ||
+ args->in.queue_va == 0 ||
+ args->in.queue_size == 0) {
+ drm_file_err(filp, "invalidate userq queue va or size\n");
+ return -EINVAL;
+ }
+ if (!args->in.wptr_va || !args->in.rptr_va) {
+ drm_file_err(filp, "invalidate userq queue rptr or wptr\n");
+ return -EINVAL;
+ }
+ break;
+ case AMDGPU_USERQ_OP_FREE:
+ if (args->in.ip_type ||
+ args->in.doorbell_handle ||
+ args->in.doorbell_offset ||
+ args->in.flags ||
+ args->in.queue_va ||
+ args->in.queue_size ||
+ args->in.rptr_va ||
+ args->in.wptr_va ||
+ args->in.mqd ||
+ args->in.mqd_size)
+ return -EINVAL;
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+int amdgpu_userq_ioctl(struct drm_device *dev, void *data,
+ struct drm_file *filp)
+{
+ union drm_amdgpu_userq *args = data;
+ int r;
+
+ if (amdgpu_userq_input_args_validate(dev, args, filp) < 0)
+ return -EINVAL;
+
+ switch (args->in.op) {
+ case AMDGPU_USERQ_OP_CREATE:
+ r = amdgpu_userq_create(filp, args);
+ if (r)
+ drm_file_err(filp, "Failed to create usermode queue\n");
+ break;
+
+ case AMDGPU_USERQ_OP_FREE:
+ r = amdgpu_userq_destroy(filp, args->in.queue_id);
+ if (r)
+ drm_file_err(filp, "Failed to destroy usermode queue\n");
+ break;
+
+ default:
+ drm_dbg_driver(dev, "Invalid user queue op specified: %d\n", args->in.op);
+ return -EINVAL;
+ }
+
+ return r;
+}
+
+static int
+amdgpu_userq_restore_all(struct amdgpu_userq_mgr *uq_mgr)
+{
+ struct amdgpu_usermode_queue *queue;
+ unsigned long queue_id;
+ int ret = 0, r;
+
+ /* Resume all the queues for this process */
+ xa_for_each(&uq_mgr->userq_mgr_xa, queue_id, queue) {
+
+ if (!amdgpu_userq_buffer_vas_mapped(queue)) {
+ drm_file_err(uq_mgr->file,
+ "trying restore queue without va mapping\n");
+ queue->state = AMDGPU_USERQ_STATE_INVALID_VA;
+ continue;
+ }
+
+ r = amdgpu_userq_restore_helper(uq_mgr, queue);
+ if (r)
+ ret = r;
+ }
+
+ if (ret)
+ drm_file_err(uq_mgr->file, "Failed to map all the queues\n");
+ return ret;
+}
+
+static int amdgpu_userq_validate_vm(void *param, struct amdgpu_bo *bo)
+{
+ struct ttm_operation_ctx ctx = { false, false };
+
+ amdgpu_bo_placement_from_domain(bo, bo->allowed_domains);
+ return ttm_bo_validate(&bo->tbo, &bo->placement, &ctx);
+}
+
+/* Handle all BOs on the invalidated list, validate them and update the PTs */
+static int
+amdgpu_userq_bo_validate(struct amdgpu_device *adev, struct drm_exec *exec,
+ struct amdgpu_vm *vm)
+{
+ struct ttm_operation_ctx ctx = { false, false };
+ struct amdgpu_bo_va *bo_va;
+ struct amdgpu_bo *bo;
+ int ret;
+
+ spin_lock(&vm->status_lock);
+ while (!list_empty(&vm->invalidated)) {
+ bo_va = list_first_entry(&vm->invalidated,
+ struct amdgpu_bo_va,
+ base.vm_status);
+ spin_unlock(&vm->status_lock);
+
+ bo = bo_va->base.bo;
+ ret = drm_exec_prepare_obj(exec, &bo->tbo.base, 2);
+ if (unlikely(ret))
+ return ret;
+
+ amdgpu_bo_placement_from_domain(bo, bo->allowed_domains);
+ ret = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx);
+ if (ret)
+ return ret;
+
+ /* This moves the bo_va to the done list */
+ ret = amdgpu_vm_bo_update(adev, bo_va, false);
+ if (ret)
+ return ret;
+
+ spin_lock(&vm->status_lock);
+ }
+ spin_unlock(&vm->status_lock);
+
+ return 0;
+}
+
+/* Make sure the whole VM is ready to be used */
+static int
+amdgpu_userq_vm_validate(struct amdgpu_userq_mgr *uq_mgr)
+{
+ struct amdgpu_fpriv *fpriv = uq_mgr_to_fpriv(uq_mgr);
+ bool invalidated = false, new_addition = false;
+ struct ttm_operation_ctx ctx = { true, false };
+ struct amdgpu_device *adev = uq_mgr->adev;
+ struct amdgpu_hmm_range *range;
+ struct amdgpu_vm *vm = &fpriv->vm;
+ unsigned long key, tmp_key;
+ struct amdgpu_bo_va *bo_va;
+ struct amdgpu_bo *bo;
+ struct drm_exec exec;
+ struct xarray xa;
+ int ret;
+
+ xa_init(&xa);
+
+retry_lock:
+ drm_exec_init(&exec, DRM_EXEC_IGNORE_DUPLICATES, 0);
+ drm_exec_until_all_locked(&exec) {
+ ret = amdgpu_vm_lock_pd(vm, &exec, 1);
+ drm_exec_retry_on_contention(&exec);
+ if (unlikely(ret))
+ goto unlock_all;
+
+ ret = amdgpu_vm_lock_done_list(vm, &exec, 1);
+ drm_exec_retry_on_contention(&exec);
+ if (unlikely(ret))
+ goto unlock_all;
+
+ /* This validates PDs, PTs and per VM BOs */
+ ret = amdgpu_vm_validate(adev, vm, NULL,
+ amdgpu_userq_validate_vm,
+ NULL);
+ if (unlikely(ret))
+ goto unlock_all;
+
+ /* This locks and validates the remaining evicted BOs */
+ ret = amdgpu_userq_bo_validate(adev, &exec, vm);
+ drm_exec_retry_on_contention(&exec);
+ if (unlikely(ret))
+ goto unlock_all;
+ }
+
+ if (invalidated) {
+ xa_for_each(&xa, tmp_key, range) {
+ bo = range->bo;
+ amdgpu_bo_placement_from_domain(bo, AMDGPU_GEM_DOMAIN_CPU);
+ ret = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx);
+ if (ret)
+ goto unlock_all;
+
+ amdgpu_ttm_tt_set_user_pages(bo->tbo.ttm, range);
+
+ amdgpu_bo_placement_from_domain(bo, AMDGPU_GEM_DOMAIN_GTT);
+ ret = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx);
+ if (ret)
+ goto unlock_all;
+ }
+ invalidated = false;
+ }
+
+ ret = amdgpu_vm_handle_moved(adev, vm, NULL);
+ if (ret)
+ goto unlock_all;
+
+ key = 0;
+ /* Validate User Ptr BOs */
+ list_for_each_entry(bo_va, &vm->done, base.vm_status) {
+ bo = bo_va->base.bo;
+ if (!bo)
+ continue;
+
+ if (!amdgpu_ttm_tt_is_userptr(bo->tbo.ttm))
+ continue;
+
+ range = xa_load(&xa, key);
+ if (range && range->bo != bo) {
+ xa_erase(&xa, key);
+ amdgpu_hmm_range_free(range);
+ range = NULL;
+ }
+
+ if (!range) {
+ range = amdgpu_hmm_range_alloc(bo);
+ if (!range) {
+ ret = -ENOMEM;
+ goto unlock_all;
+ }
+
+ xa_store(&xa, key, range, GFP_KERNEL);
+ new_addition = true;
+ }
+ key++;
+ }
+
+ if (new_addition) {
+ drm_exec_fini(&exec);
+ xa_for_each(&xa, tmp_key, range) {
+ if (!range)
+ continue;
+ bo = range->bo;
+ ret = amdgpu_ttm_tt_get_user_pages(bo, range);
+ if (ret)
+ goto unlock_all;
+ }
+
+ invalidated = true;
+ new_addition = false;
+ goto retry_lock;
+ }
+
+ ret = amdgpu_vm_update_pdes(adev, vm, false);
+ if (ret)
+ goto unlock_all;
+
+ /*
+ * We need to wait for all VM updates to finish before restarting the
+ * queues. Using the done list like that is now ok since everything is
+ * locked in place.
+ */
+ list_for_each_entry(bo_va, &vm->done, base.vm_status)
+ dma_fence_wait(bo_va->last_pt_update, false);
+ dma_fence_wait(vm->last_update, false);
+
+ ret = amdgpu_eviction_fence_replace_fence(&fpriv->evf_mgr, &exec);
+ if (ret)
+ drm_file_err(uq_mgr->file, "Failed to replace eviction fence\n");
+
+unlock_all:
+ drm_exec_fini(&exec);
+ xa_for_each(&xa, tmp_key, range) {
+ if (!range)
+ continue;
+ bo = range->bo;
+ amdgpu_hmm_range_free(range);
+ }
+ xa_destroy(&xa);
+ return ret;
+}
+
+static void amdgpu_userq_restore_worker(struct work_struct *work)
+{
+ struct amdgpu_userq_mgr *uq_mgr = work_to_uq_mgr(work, resume_work.work);
+ struct amdgpu_fpriv *fpriv = uq_mgr_to_fpriv(uq_mgr);
+ int ret;
+
+ flush_delayed_work(&fpriv->evf_mgr.suspend_work);
+
+ mutex_lock(&uq_mgr->userq_mutex);
+
+ ret = amdgpu_userq_vm_validate(uq_mgr);
+ if (ret) {
+ drm_file_err(uq_mgr->file, "Failed to validate BOs to restore\n");
+ goto unlock;
+ }
+
+ ret = amdgpu_userq_restore_all(uq_mgr);
+ if (ret) {
+ drm_file_err(uq_mgr->file, "Failed to restore all queues\n");
+ goto unlock;
+ }
+
+unlock:
+ mutex_unlock(&uq_mgr->userq_mutex);
+}
+
+static int
+amdgpu_userq_evict_all(struct amdgpu_userq_mgr *uq_mgr)
+{
+ struct amdgpu_usermode_queue *queue;
+ unsigned long queue_id;
+ int ret = 0, r;
+
+ amdgpu_userq_detect_and_reset_queues(uq_mgr);
+ /* Try to unmap all the queues in this process ctx */
+ xa_for_each(&uq_mgr->userq_mgr_xa, queue_id, queue) {
+ r = amdgpu_userq_preempt_helper(uq_mgr, queue);
+ if (r)
+ ret = r;
+ }
+
+ if (ret)
+ drm_file_err(uq_mgr->file, "Couldn't unmap all the queues\n");
+ return ret;
+}
+
+void amdgpu_userq_reset_work(struct work_struct *work)
+{
+ struct amdgpu_device *adev = container_of(work, struct amdgpu_device,
+ userq_reset_work);
+ struct amdgpu_reset_context reset_context;
+
+ memset(&reset_context, 0, sizeof(reset_context));
+
+ reset_context.method = AMD_RESET_METHOD_NONE;
+ reset_context.reset_req_dev = adev;
+ reset_context.src = AMDGPU_RESET_SRC_USERQ;
+ set_bit(AMDGPU_NEED_FULL_RESET, &reset_context.flags);
+ /*set_bit(AMDGPU_SKIP_COREDUMP, &reset_context.flags);*/
+
+ amdgpu_device_gpu_recover(adev, NULL, &reset_context);
+}
+
+static int
+amdgpu_userq_wait_for_signal(struct amdgpu_userq_mgr *uq_mgr)
+{
+ struct amdgpu_usermode_queue *queue;
+ unsigned long queue_id;
+ int ret;
+
+ xa_for_each(&uq_mgr->userq_mgr_xa, queue_id, queue) {
+ struct dma_fence *f = queue->last_fence;
+
+ if (!f || dma_fence_is_signaled(f))
+ continue;
+ ret = dma_fence_wait_timeout(f, true, msecs_to_jiffies(100));
+ if (ret <= 0) {
+ drm_file_err(uq_mgr->file, "Timed out waiting for fence=%llu:%llu\n",
+ f->context, f->seqno);
+ return -ETIMEDOUT;
+ }
+ }
+
+ return 0;
+}
+
+void
+amdgpu_userq_evict(struct amdgpu_userq_mgr *uq_mgr,
+ struct amdgpu_eviction_fence *ev_fence)
+{
+ struct amdgpu_fpriv *fpriv = uq_mgr_to_fpriv(uq_mgr);
+ struct amdgpu_eviction_fence_mgr *evf_mgr = &fpriv->evf_mgr;
+ struct amdgpu_device *adev = uq_mgr->adev;
+ int ret;
+
+ /* Wait for any pending userqueue fence work to finish */
+ ret = amdgpu_userq_wait_for_signal(uq_mgr);
+ if (ret)
+ dev_err(adev->dev, "Not evicting userqueue, timeout waiting for work\n");
+
+ ret = amdgpu_userq_evict_all(uq_mgr);
+ if (ret)
+ dev_err(adev->dev, "Failed to evict userqueue\n");
+
+ /* Signal current eviction fence */
+ amdgpu_eviction_fence_signal(evf_mgr, ev_fence);
+
+ if (evf_mgr->fd_closing) {
+ cancel_delayed_work_sync(&uq_mgr->resume_work);
+ return;
+ }
+
+ /* Schedule a resume work */
+ schedule_delayed_work(&uq_mgr->resume_work, 0);
+}
+
+int amdgpu_userq_mgr_init(struct amdgpu_userq_mgr *userq_mgr, struct drm_file *file_priv,
+ struct amdgpu_device *adev)
+{
+ mutex_init(&userq_mgr->userq_mutex);
+ xa_init_flags(&userq_mgr->userq_mgr_xa, XA_FLAGS_ALLOC);
+ userq_mgr->adev = adev;
+ userq_mgr->file = file_priv;
+
+ INIT_DELAYED_WORK(&userq_mgr->resume_work, amdgpu_userq_restore_worker);
+ return 0;
+}
+
+void amdgpu_userq_mgr_fini(struct amdgpu_userq_mgr *userq_mgr)
+{
+ struct amdgpu_usermode_queue *queue;
+ unsigned long queue_id;
+
+ cancel_delayed_work_sync(&userq_mgr->resume_work);
+
+ mutex_lock(&userq_mgr->userq_mutex);
+ amdgpu_userq_detect_and_reset_queues(userq_mgr);
+ xa_for_each(&userq_mgr->userq_mgr_xa, queue_id, queue) {
+ amdgpu_userq_wait_for_last_fence(userq_mgr, queue);
+ amdgpu_userq_unmap_helper(userq_mgr, queue);
+ amdgpu_userq_cleanup(userq_mgr, queue, queue_id);
+ }
+
+ xa_destroy(&userq_mgr->userq_mgr_xa);
+ mutex_unlock(&userq_mgr->userq_mutex);
+ mutex_destroy(&userq_mgr->userq_mutex);
+}
+
+int amdgpu_userq_suspend(struct amdgpu_device *adev)
+{
+ u32 ip_mask = amdgpu_userq_get_supported_ip_mask(adev);
+ struct amdgpu_usermode_queue *queue;
+ struct amdgpu_userq_mgr *uqm;
+ unsigned long queue_id;
+ int r;
+
+ if (!ip_mask)
+ return 0;
+
+ xa_for_each(&adev->userq_doorbell_xa, queue_id, queue) {
+ uqm = queue->userq_mgr;
+ cancel_delayed_work_sync(&uqm->resume_work);
+ guard(mutex)(&uqm->userq_mutex);
+ amdgpu_userq_detect_and_reset_queues(uqm);
+ if (adev->in_s0ix)
+ r = amdgpu_userq_preempt_helper(uqm, queue);
+ else
+ r = amdgpu_userq_unmap_helper(uqm, queue);
+ if (r)
+ return r;
+ }
+ return 0;
+}
+
+int amdgpu_userq_resume(struct amdgpu_device *adev)
+{
+ u32 ip_mask = amdgpu_userq_get_supported_ip_mask(adev);
+ struct amdgpu_usermode_queue *queue;
+ struct amdgpu_userq_mgr *uqm;
+ unsigned long queue_id;
+ int r;
+
+ if (!ip_mask)
+ return 0;
+
+ xa_for_each(&adev->userq_doorbell_xa, queue_id, queue) {
+ uqm = queue->userq_mgr;
+ guard(mutex)(&uqm->userq_mutex);
+ if (adev->in_s0ix)
+ r = amdgpu_userq_restore_helper(uqm, queue);
+ else
+ r = amdgpu_userq_map_helper(uqm, queue);
+ if (r)
+ return r;
+ }
+
+ return 0;
+}
+
+int amdgpu_userq_stop_sched_for_enforce_isolation(struct amdgpu_device *adev,
+ u32 idx)
+{
+ u32 ip_mask = amdgpu_userq_get_supported_ip_mask(adev);
+ struct amdgpu_usermode_queue *queue;
+ struct amdgpu_userq_mgr *uqm;
+ unsigned long queue_id;
+ int ret = 0, r;
+
+ /* only need to stop gfx/compute */
+ if (!(ip_mask & ((1 << AMDGPU_HW_IP_GFX) | (1 << AMDGPU_HW_IP_COMPUTE))))
+ return 0;
+
+ if (adev->userq_halt_for_enforce_isolation)
+ dev_warn(adev->dev, "userq scheduling already stopped!\n");
+ adev->userq_halt_for_enforce_isolation = true;
+ xa_for_each(&adev->userq_doorbell_xa, queue_id, queue) {
+ uqm = queue->userq_mgr;
+ cancel_delayed_work_sync(&uqm->resume_work);
+ mutex_lock(&uqm->userq_mutex);
+ if (((queue->queue_type == AMDGPU_HW_IP_GFX) ||
+ (queue->queue_type == AMDGPU_HW_IP_COMPUTE)) &&
+ (queue->xcp_id == idx)) {
+ amdgpu_userq_detect_and_reset_queues(uqm);
+ r = amdgpu_userq_preempt_helper(uqm, queue);
+ if (r)
+ ret = r;
+ }
+ mutex_unlock(&uqm->userq_mutex);
+ }
+
+ return ret;
+}
+
+int amdgpu_userq_start_sched_for_enforce_isolation(struct amdgpu_device *adev,
+ u32 idx)
+{
+ u32 ip_mask = amdgpu_userq_get_supported_ip_mask(adev);
+ struct amdgpu_usermode_queue *queue;
+ struct amdgpu_userq_mgr *uqm;
+ unsigned long queue_id;
+ int ret = 0, r;
+
+ /* only need to stop gfx/compute */
+ if (!(ip_mask & ((1 << AMDGPU_HW_IP_GFX) | (1 << AMDGPU_HW_IP_COMPUTE))))
+ return 0;
+
+ if (!adev->userq_halt_for_enforce_isolation)
+ dev_warn(adev->dev, "userq scheduling already started!\n");
+ adev->userq_halt_for_enforce_isolation = false;
+ xa_for_each(&adev->userq_doorbell_xa, queue_id, queue) {
+ uqm = queue->userq_mgr;
+ mutex_lock(&uqm->userq_mutex);
+ if (((queue->queue_type == AMDGPU_HW_IP_GFX) ||
+ (queue->queue_type == AMDGPU_HW_IP_COMPUTE)) &&
+ (queue->xcp_id == idx)) {
+ r = amdgpu_userq_restore_helper(uqm, queue);
+ if (r)
+ ret = r;
+ }
+ mutex_unlock(&uqm->userq_mutex);
+ }
+
+ return ret;
+}
+
+int amdgpu_userq_gem_va_unmap_validate(struct amdgpu_device *adev,
+ struct amdgpu_bo_va_mapping *mapping,
+ uint64_t saddr)
+{
+ u32 ip_mask = amdgpu_userq_get_supported_ip_mask(adev);
+ struct amdgpu_bo_va *bo_va = mapping->bo_va;
+ struct dma_resv *resv = bo_va->base.bo->tbo.base.resv;
+ int ret = 0;
+
+ if (!ip_mask)
+ return 0;
+
+ dev_warn_once(adev->dev, "now unmapping a vital queue va:%llx\n", saddr);
+ /**
+ * The userq VA mapping reservation should include the eviction fence,
+ * if the eviction fence can't signal successfully during unmapping,
+ * then driver will warn to flag this improper unmap of the userq VA.
+ * Note: The eviction fence may be attached to different BOs, and this
+ * unmap is only for one kind of userq VAs, so at this point suppose
+ * the eviction fence is always unsignaled.
+ */
+ if (!dma_resv_test_signaled(resv, DMA_RESV_USAGE_BOOKKEEP)) {
+ ret = dma_resv_wait_timeout(resv, DMA_RESV_USAGE_BOOKKEEP, true,
+ MAX_SCHEDULE_TIMEOUT);
+ if (ret <= 0)
+ return -EBUSY;
+ }
+
+ return 0;
+}
+
+void amdgpu_userq_pre_reset(struct amdgpu_device *adev)
+{
+ const struct amdgpu_userq_funcs *userq_funcs;
+ struct amdgpu_usermode_queue *queue;
+ struct amdgpu_userq_mgr *uqm;
+ unsigned long queue_id;
+
+ xa_for_each(&adev->userq_doorbell_xa, queue_id, queue) {
+ uqm = queue->userq_mgr;
+ cancel_delayed_work_sync(&uqm->resume_work);
+ if (queue->state == AMDGPU_USERQ_STATE_MAPPED) {
+ amdgpu_userq_wait_for_last_fence(uqm, queue);
+ userq_funcs = adev->userq_funcs[queue->queue_type];
+ userq_funcs->unmap(uqm, queue);
+ /* just mark all queues as hung at this point.
+ * if unmap succeeds, we could map again
+ * in amdgpu_userq_post_reset() if vram is not lost
+ */
+ queue->state = AMDGPU_USERQ_STATE_HUNG;
+ amdgpu_userq_fence_driver_force_completion(queue);
+ }
+ }
+}
+
+int amdgpu_userq_post_reset(struct amdgpu_device *adev, bool vram_lost)
+{
+ /* if any queue state is AMDGPU_USERQ_STATE_UNMAPPED
+ * at this point, we should be able to map it again
+ * and continue if vram is not lost.
+ */
+ struct amdgpu_userq_mgr *uqm;
+ struct amdgpu_usermode_queue *queue;
+ const struct amdgpu_userq_funcs *userq_funcs;
+ unsigned long queue_id;
+ int r = 0;
+
+ xa_for_each(&adev->userq_doorbell_xa, queue_id, queue) {
+ uqm = queue->userq_mgr;
+ if (queue->state == AMDGPU_USERQ_STATE_HUNG && !vram_lost) {
+ userq_funcs = adev->userq_funcs[queue->queue_type];
+ /* Re-map queue */
+ r = userq_funcs->map(uqm, queue);
+ if (r) {
+ dev_err(adev->dev, "Failed to remap queue %ld\n", queue_id);
+ continue;
+ }
+ queue->state = AMDGPU_USERQ_STATE_MAPPED;
+ }
+ }
+
+ return r;
+}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.h
new file mode 100644
index 000000000000..c37444427a14
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.h
@@ -0,0 +1,161 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright 2023 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#ifndef AMDGPU_USERQ_H_
+#define AMDGPU_USERQ_H_
+#include "amdgpu_eviction_fence.h"
+
+#define AMDGPU_MAX_USERQ_COUNT 512
+
+#define to_ev_fence(f) container_of(f, struct amdgpu_eviction_fence, base)
+#define uq_mgr_to_fpriv(u) container_of(u, struct amdgpu_fpriv, userq_mgr)
+#define work_to_uq_mgr(w, name) container_of(w, struct amdgpu_userq_mgr, name)
+
+enum amdgpu_userq_state {
+ AMDGPU_USERQ_STATE_UNMAPPED = 0,
+ AMDGPU_USERQ_STATE_MAPPED,
+ AMDGPU_USERQ_STATE_PREEMPTED,
+ AMDGPU_USERQ_STATE_HUNG,
+ AMDGPU_USERQ_STATE_INVALID_VA,
+};
+
+struct amdgpu_mqd_prop;
+
+struct amdgpu_userq_obj {
+ void *cpu_ptr;
+ uint64_t gpu_addr;
+ struct amdgpu_bo *obj;
+};
+
+struct amdgpu_userq_va_cursor {
+ u64 gpu_addr;
+ struct list_head list;
+};
+
+struct amdgpu_usermode_queue {
+ int queue_type;
+ enum amdgpu_userq_state state;
+ uint64_t doorbell_handle;
+ uint64_t doorbell_index;
+ uint64_t flags;
+ struct amdgpu_mqd_prop *userq_prop;
+ struct amdgpu_userq_mgr *userq_mgr;
+ struct amdgpu_vm *vm;
+ struct amdgpu_userq_obj mqd;
+ struct amdgpu_userq_obj db_obj;
+ struct amdgpu_userq_obj fw_obj;
+ struct amdgpu_userq_obj wptr_obj;
+ struct xarray fence_drv_xa;
+ struct amdgpu_userq_fence_driver *fence_drv;
+ struct dma_fence *last_fence;
+ u32 xcp_id;
+ int priority;
+ struct dentry *debugfs_queue;
+
+ struct list_head userq_va_list;
+};
+
+struct amdgpu_userq_funcs {
+ int (*mqd_create)(struct amdgpu_userq_mgr *uq_mgr,
+ struct drm_amdgpu_userq_in *args,
+ struct amdgpu_usermode_queue *queue);
+ void (*mqd_destroy)(struct amdgpu_userq_mgr *uq_mgr,
+ struct amdgpu_usermode_queue *uq);
+ int (*unmap)(struct amdgpu_userq_mgr *uq_mgr,
+ struct amdgpu_usermode_queue *queue);
+ int (*map)(struct amdgpu_userq_mgr *uq_mgr,
+ struct amdgpu_usermode_queue *queue);
+ int (*preempt)(struct amdgpu_userq_mgr *uq_mgr,
+ struct amdgpu_usermode_queue *queue);
+ int (*restore)(struct amdgpu_userq_mgr *uq_mgr,
+ struct amdgpu_usermode_queue *queue);
+ int (*detect_and_reset)(struct amdgpu_device *adev,
+ int queue_type);
+};
+
+/* Usermode queues for gfx */
+struct amdgpu_userq_mgr {
+ /**
+ * @userq_mgr_xa: Per-process user queue map (queue ID → queue)
+ * Key: queue_id (unique ID within the process's userq manager)
+ * Value: struct amdgpu_usermode_queue
+ */
+ struct xarray userq_mgr_xa;
+ struct mutex userq_mutex;
+ struct amdgpu_device *adev;
+ struct delayed_work resume_work;
+ struct drm_file *file;
+ atomic_t userq_count[AMDGPU_RING_TYPE_MAX];
+};
+
+struct amdgpu_db_info {
+ uint64_t doorbell_handle;
+ uint32_t queue_type;
+ uint32_t doorbell_offset;
+ struct amdgpu_userq_obj *db_obj;
+};
+
+int amdgpu_userq_ioctl(struct drm_device *dev, void *data, struct drm_file *filp);
+
+int amdgpu_userq_mgr_init(struct amdgpu_userq_mgr *userq_mgr, struct drm_file *file_priv,
+ struct amdgpu_device *adev);
+
+void amdgpu_userq_mgr_fini(struct amdgpu_userq_mgr *userq_mgr);
+
+int amdgpu_userq_create_object(struct amdgpu_userq_mgr *uq_mgr,
+ struct amdgpu_userq_obj *userq_obj,
+ int size);
+
+void amdgpu_userq_destroy_object(struct amdgpu_userq_mgr *uq_mgr,
+ struct amdgpu_userq_obj *userq_obj);
+
+void amdgpu_userq_evict(struct amdgpu_userq_mgr *uq_mgr,
+ struct amdgpu_eviction_fence *ev_fence);
+
+void amdgpu_userq_ensure_ev_fence(struct amdgpu_userq_mgr *userq_mgr,
+ struct amdgpu_eviction_fence_mgr *evf_mgr);
+
+uint64_t amdgpu_userq_get_doorbell_index(struct amdgpu_userq_mgr *uq_mgr,
+ struct amdgpu_db_info *db_info,
+ struct drm_file *filp);
+
+u32 amdgpu_userq_get_supported_ip_mask(struct amdgpu_device *adev);
+
+int amdgpu_userq_suspend(struct amdgpu_device *adev);
+int amdgpu_userq_resume(struct amdgpu_device *adev);
+
+int amdgpu_userq_stop_sched_for_enforce_isolation(struct amdgpu_device *adev,
+ u32 idx);
+int amdgpu_userq_start_sched_for_enforce_isolation(struct amdgpu_device *adev,
+ u32 idx);
+void amdgpu_userq_reset_work(struct work_struct *work);
+void amdgpu_userq_pre_reset(struct amdgpu_device *adev);
+int amdgpu_userq_post_reset(struct amdgpu_device *adev, bool vram_lost);
+
+int amdgpu_userq_input_va_validate(struct amdgpu_usermode_queue *queue,
+ u64 addr, u64 expected_size);
+int amdgpu_userq_gem_va_unmap_validate(struct amdgpu_device *adev,
+ struct amdgpu_bo_va_mapping *mapping,
+ uint64_t saddr);
+#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.c
new file mode 100644
index 000000000000..eba9fb359047
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.c
@@ -0,0 +1,1011 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright 2023 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#include <linux/kref.h>
+#include <linux/slab.h>
+#include <linux/dma-fence-unwrap.h>
+
+#include <drm/drm_exec.h>
+#include <drm/drm_syncobj.h>
+
+#include "amdgpu.h"
+#include "amdgpu_userq_fence.h"
+
+static const struct dma_fence_ops amdgpu_userq_fence_ops;
+static struct kmem_cache *amdgpu_userq_fence_slab;
+
+int amdgpu_userq_fence_slab_init(void)
+{
+ amdgpu_userq_fence_slab = kmem_cache_create("amdgpu_userq_fence",
+ sizeof(struct amdgpu_userq_fence),
+ 0,
+ SLAB_HWCACHE_ALIGN,
+ NULL);
+ if (!amdgpu_userq_fence_slab)
+ return -ENOMEM;
+
+ return 0;
+}
+
+void amdgpu_userq_fence_slab_fini(void)
+{
+ rcu_barrier();
+ kmem_cache_destroy(amdgpu_userq_fence_slab);
+}
+
+static inline struct amdgpu_userq_fence *to_amdgpu_userq_fence(struct dma_fence *f)
+{
+ if (!f || f->ops != &amdgpu_userq_fence_ops)
+ return NULL;
+
+ return container_of(f, struct amdgpu_userq_fence, base);
+}
+
+static u64 amdgpu_userq_fence_read(struct amdgpu_userq_fence_driver *fence_drv)
+{
+ return le64_to_cpu(*fence_drv->cpu_addr);
+}
+
+static void
+amdgpu_userq_fence_write(struct amdgpu_userq_fence_driver *fence_drv,
+ u64 seq)
+{
+ if (fence_drv->cpu_addr)
+ *fence_drv->cpu_addr = cpu_to_le64(seq);
+}
+
+int amdgpu_userq_fence_driver_alloc(struct amdgpu_device *adev,
+ struct amdgpu_usermode_queue *userq)
+{
+ struct amdgpu_userq_fence_driver *fence_drv;
+ unsigned long flags;
+ int r;
+
+ fence_drv = kzalloc(sizeof(*fence_drv), GFP_KERNEL);
+ if (!fence_drv)
+ return -ENOMEM;
+
+ /* Acquire seq64 memory */
+ r = amdgpu_seq64_alloc(adev, &fence_drv->va, &fence_drv->gpu_addr,
+ &fence_drv->cpu_addr);
+ if (r)
+ goto free_fence_drv;
+
+ memset(fence_drv->cpu_addr, 0, sizeof(u64));
+
+ kref_init(&fence_drv->refcount);
+ INIT_LIST_HEAD(&fence_drv->fences);
+ spin_lock_init(&fence_drv->fence_list_lock);
+
+ fence_drv->adev = adev;
+ fence_drv->context = dma_fence_context_alloc(1);
+ get_task_comm(fence_drv->timeline_name, current);
+
+ xa_lock_irqsave(&adev->userq_xa, flags);
+ r = xa_err(__xa_store(&adev->userq_xa, userq->doorbell_index,
+ fence_drv, GFP_KERNEL));
+ xa_unlock_irqrestore(&adev->userq_xa, flags);
+ if (r)
+ goto free_seq64;
+
+ userq->fence_drv = fence_drv;
+
+ return 0;
+
+free_seq64:
+ amdgpu_seq64_free(adev, fence_drv->va);
+free_fence_drv:
+ kfree(fence_drv);
+
+ return r;
+}
+
+static void amdgpu_userq_walk_and_drop_fence_drv(struct xarray *xa)
+{
+ struct amdgpu_userq_fence_driver *fence_drv;
+ unsigned long index;
+
+ if (xa_empty(xa))
+ return;
+
+ xa_lock(xa);
+ xa_for_each(xa, index, fence_drv) {
+ __xa_erase(xa, index);
+ amdgpu_userq_fence_driver_put(fence_drv);
+ }
+
+ xa_unlock(xa);
+}
+
+void
+amdgpu_userq_fence_driver_free(struct amdgpu_usermode_queue *userq)
+{
+ amdgpu_userq_walk_and_drop_fence_drv(&userq->fence_drv_xa);
+ xa_destroy(&userq->fence_drv_xa);
+ /* Drop the fence_drv reference held by user queue */
+ amdgpu_userq_fence_driver_put(userq->fence_drv);
+}
+
+void amdgpu_userq_fence_driver_process(struct amdgpu_userq_fence_driver *fence_drv)
+{
+ struct amdgpu_userq_fence *userq_fence, *tmp;
+ struct dma_fence *fence;
+ unsigned long flags;
+ u64 rptr;
+ int i;
+
+ if (!fence_drv)
+ return;
+
+ spin_lock_irqsave(&fence_drv->fence_list_lock, flags);
+ rptr = amdgpu_userq_fence_read(fence_drv);
+
+ list_for_each_entry_safe(userq_fence, tmp, &fence_drv->fences, link) {
+ fence = &userq_fence->base;
+
+ if (rptr < fence->seqno)
+ break;
+
+ dma_fence_signal(fence);
+
+ for (i = 0; i < userq_fence->fence_drv_array_count; i++)
+ amdgpu_userq_fence_driver_put(userq_fence->fence_drv_array[i]);
+
+ list_del(&userq_fence->link);
+ dma_fence_put(fence);
+ }
+ spin_unlock_irqrestore(&fence_drv->fence_list_lock, flags);
+}
+
+void amdgpu_userq_fence_driver_destroy(struct kref *ref)
+{
+ struct amdgpu_userq_fence_driver *fence_drv = container_of(ref,
+ struct amdgpu_userq_fence_driver,
+ refcount);
+ struct amdgpu_userq_fence_driver *xa_fence_drv;
+ struct amdgpu_device *adev = fence_drv->adev;
+ struct amdgpu_userq_fence *fence, *tmp;
+ struct xarray *xa = &adev->userq_xa;
+ unsigned long index, flags;
+ struct dma_fence *f;
+
+ spin_lock_irqsave(&fence_drv->fence_list_lock, flags);
+ list_for_each_entry_safe(fence, tmp, &fence_drv->fences, link) {
+ f = &fence->base;
+
+ if (!dma_fence_is_signaled(f)) {
+ dma_fence_set_error(f, -ECANCELED);
+ dma_fence_signal(f);
+ }
+
+ list_del(&fence->link);
+ dma_fence_put(f);
+ }
+ spin_unlock_irqrestore(&fence_drv->fence_list_lock, flags);
+
+ xa_lock_irqsave(xa, flags);
+ xa_for_each(xa, index, xa_fence_drv)
+ if (xa_fence_drv == fence_drv)
+ __xa_erase(xa, index);
+ xa_unlock_irqrestore(xa, flags);
+
+ /* Free seq64 memory */
+ amdgpu_seq64_free(adev, fence_drv->va);
+ kfree(fence_drv);
+}
+
+void amdgpu_userq_fence_driver_get(struct amdgpu_userq_fence_driver *fence_drv)
+{
+ kref_get(&fence_drv->refcount);
+}
+
+void amdgpu_userq_fence_driver_put(struct amdgpu_userq_fence_driver *fence_drv)
+{
+ kref_put(&fence_drv->refcount, amdgpu_userq_fence_driver_destroy);
+}
+
+static int amdgpu_userq_fence_alloc(struct amdgpu_userq_fence **userq_fence)
+{
+ *userq_fence = kmem_cache_alloc(amdgpu_userq_fence_slab, GFP_ATOMIC);
+ return *userq_fence ? 0 : -ENOMEM;
+}
+
+static int amdgpu_userq_fence_create(struct amdgpu_usermode_queue *userq,
+ struct amdgpu_userq_fence *userq_fence,
+ u64 seq, struct dma_fence **f)
+{
+ struct amdgpu_userq_fence_driver *fence_drv;
+ struct dma_fence *fence;
+ unsigned long flags;
+
+ fence_drv = userq->fence_drv;
+ if (!fence_drv)
+ return -EINVAL;
+
+ spin_lock_init(&userq_fence->lock);
+ INIT_LIST_HEAD(&userq_fence->link);
+ fence = &userq_fence->base;
+ userq_fence->fence_drv = fence_drv;
+
+ dma_fence_init64(fence, &amdgpu_userq_fence_ops, &userq_fence->lock,
+ fence_drv->context, seq);
+
+ amdgpu_userq_fence_driver_get(fence_drv);
+ dma_fence_get(fence);
+
+ if (!xa_empty(&userq->fence_drv_xa)) {
+ struct amdgpu_userq_fence_driver *stored_fence_drv;
+ unsigned long index, count = 0;
+ int i = 0;
+
+ xa_lock(&userq->fence_drv_xa);
+ xa_for_each(&userq->fence_drv_xa, index, stored_fence_drv)
+ count++;
+
+ userq_fence->fence_drv_array =
+ kvmalloc_array(count,
+ sizeof(struct amdgpu_userq_fence_driver *),
+ GFP_ATOMIC);
+
+ if (userq_fence->fence_drv_array) {
+ xa_for_each(&userq->fence_drv_xa, index, stored_fence_drv) {
+ userq_fence->fence_drv_array[i] = stored_fence_drv;
+ __xa_erase(&userq->fence_drv_xa, index);
+ i++;
+ }
+ }
+
+ userq_fence->fence_drv_array_count = i;
+ xa_unlock(&userq->fence_drv_xa);
+ } else {
+ userq_fence->fence_drv_array = NULL;
+ userq_fence->fence_drv_array_count = 0;
+ }
+
+ /* Check if hardware has already processed the job */
+ spin_lock_irqsave(&fence_drv->fence_list_lock, flags);
+ if (!dma_fence_is_signaled(fence))
+ list_add_tail(&userq_fence->link, &fence_drv->fences);
+ else
+ dma_fence_put(fence);
+
+ spin_unlock_irqrestore(&fence_drv->fence_list_lock, flags);
+
+ *f = fence;
+
+ return 0;
+}
+
+static const char *amdgpu_userq_fence_get_driver_name(struct dma_fence *f)
+{
+ return "amdgpu_userq_fence";
+}
+
+static const char *amdgpu_userq_fence_get_timeline_name(struct dma_fence *f)
+{
+ struct amdgpu_userq_fence *fence = to_amdgpu_userq_fence(f);
+
+ return fence->fence_drv->timeline_name;
+}
+
+static bool amdgpu_userq_fence_signaled(struct dma_fence *f)
+{
+ struct amdgpu_userq_fence *fence = to_amdgpu_userq_fence(f);
+ struct amdgpu_userq_fence_driver *fence_drv = fence->fence_drv;
+ u64 rptr, wptr;
+
+ rptr = amdgpu_userq_fence_read(fence_drv);
+ wptr = fence->base.seqno;
+
+ if (rptr >= wptr)
+ return true;
+
+ return false;
+}
+
+static void amdgpu_userq_fence_free(struct rcu_head *rcu)
+{
+ struct dma_fence *fence = container_of(rcu, struct dma_fence, rcu);
+ struct amdgpu_userq_fence *userq_fence = to_amdgpu_userq_fence(fence);
+ struct amdgpu_userq_fence_driver *fence_drv = userq_fence->fence_drv;
+
+ /* Release the fence driver reference */
+ amdgpu_userq_fence_driver_put(fence_drv);
+
+ kvfree(userq_fence->fence_drv_array);
+ kmem_cache_free(amdgpu_userq_fence_slab, userq_fence);
+}
+
+static void amdgpu_userq_fence_release(struct dma_fence *f)
+{
+ call_rcu(&f->rcu, amdgpu_userq_fence_free);
+}
+
+static const struct dma_fence_ops amdgpu_userq_fence_ops = {
+ .get_driver_name = amdgpu_userq_fence_get_driver_name,
+ .get_timeline_name = amdgpu_userq_fence_get_timeline_name,
+ .signaled = amdgpu_userq_fence_signaled,
+ .release = amdgpu_userq_fence_release,
+};
+
+/**
+ * amdgpu_userq_fence_read_wptr - Read the userq wptr value
+ *
+ * @queue: user mode queue structure pointer
+ * @wptr: write pointer value
+ *
+ * Read the wptr value from userq's MQD. The userq signal IOCTL
+ * creates a dma_fence for the shared buffers that expects the
+ * RPTR value written to seq64 memory >= WPTR.
+ *
+ * Returns wptr value on success, error on failure.
+ */
+static int amdgpu_userq_fence_read_wptr(struct amdgpu_usermode_queue *queue,
+ u64 *wptr)
+{
+ struct amdgpu_bo_va_mapping *mapping;
+ struct amdgpu_bo *bo;
+ u64 addr, *ptr;
+ int r;
+
+ r = amdgpu_bo_reserve(queue->vm->root.bo, false);
+ if (r)
+ return r;
+
+ addr = queue->userq_prop->wptr_gpu_addr;
+ addr &= AMDGPU_GMC_HOLE_MASK;
+
+ mapping = amdgpu_vm_bo_lookup_mapping(queue->vm, addr >> PAGE_SHIFT);
+ if (!mapping) {
+ amdgpu_bo_unreserve(queue->vm->root.bo);
+ DRM_ERROR("Failed to lookup amdgpu_bo_va_mapping\n");
+ return -EINVAL;
+ }
+
+ bo = amdgpu_bo_ref(mapping->bo_va->base.bo);
+ amdgpu_bo_unreserve(queue->vm->root.bo);
+ r = amdgpu_bo_reserve(bo, true);
+ if (r) {
+ amdgpu_bo_unref(&bo);
+ DRM_ERROR("Failed to reserve userqueue wptr bo");
+ return r;
+ }
+
+ r = amdgpu_bo_kmap(bo, (void **)&ptr);
+ if (r) {
+ DRM_ERROR("Failed mapping the userqueue wptr bo");
+ goto map_error;
+ }
+
+ *wptr = le64_to_cpu(*ptr);
+
+ amdgpu_bo_kunmap(bo);
+ amdgpu_bo_unreserve(bo);
+ amdgpu_bo_unref(&bo);
+
+ return 0;
+
+map_error:
+ amdgpu_bo_unreserve(bo);
+ amdgpu_bo_unref(&bo);
+
+ return r;
+}
+
+static void amdgpu_userq_fence_cleanup(struct dma_fence *fence)
+{
+ dma_fence_put(fence);
+}
+
+static void
+amdgpu_userq_fence_driver_set_error(struct amdgpu_userq_fence *fence,
+ int error)
+{
+ struct amdgpu_userq_fence_driver *fence_drv = fence->fence_drv;
+ unsigned long flags;
+ struct dma_fence *f;
+
+ spin_lock_irqsave(&fence_drv->fence_list_lock, flags);
+
+ f = rcu_dereference_protected(&fence->base,
+ lockdep_is_held(&fence_drv->fence_list_lock));
+ if (f && !dma_fence_is_signaled_locked(f))
+ dma_fence_set_error(f, error);
+ spin_unlock_irqrestore(&fence_drv->fence_list_lock, flags);
+}
+
+void
+amdgpu_userq_fence_driver_force_completion(struct amdgpu_usermode_queue *userq)
+{
+ struct dma_fence *f = userq->last_fence;
+
+ if (f) {
+ struct amdgpu_userq_fence *fence = to_amdgpu_userq_fence(f);
+ struct amdgpu_userq_fence_driver *fence_drv = fence->fence_drv;
+ u64 wptr = fence->base.seqno;
+
+ amdgpu_userq_fence_driver_set_error(fence, -ECANCELED);
+ amdgpu_userq_fence_write(fence_drv, wptr);
+ amdgpu_userq_fence_driver_process(fence_drv);
+
+ }
+}
+
+int amdgpu_userq_signal_ioctl(struct drm_device *dev, void *data,
+ struct drm_file *filp)
+{
+ struct amdgpu_fpriv *fpriv = filp->driver_priv;
+ struct amdgpu_userq_mgr *userq_mgr = &fpriv->userq_mgr;
+ struct drm_amdgpu_userq_signal *args = data;
+ struct drm_gem_object **gobj_write = NULL;
+ struct drm_gem_object **gobj_read = NULL;
+ struct amdgpu_usermode_queue *queue;
+ struct amdgpu_userq_fence *userq_fence;
+ struct drm_syncobj **syncobj = NULL;
+ u32 *bo_handles_write, num_write_bo_handles;
+ u32 *syncobj_handles, num_syncobj_handles;
+ u32 *bo_handles_read, num_read_bo_handles;
+ int r, i, entry, rentry, wentry;
+ struct dma_fence *fence;
+ struct drm_exec exec;
+ u64 wptr;
+
+ num_syncobj_handles = args->num_syncobj_handles;
+ syncobj_handles = memdup_user(u64_to_user_ptr(args->syncobj_handles),
+ size_mul(sizeof(u32), num_syncobj_handles));
+ if (IS_ERR(syncobj_handles))
+ return PTR_ERR(syncobj_handles);
+
+ /* Array of pointers to the looked up syncobjs */
+ syncobj = kmalloc_array(num_syncobj_handles, sizeof(*syncobj), GFP_KERNEL);
+ if (!syncobj) {
+ r = -ENOMEM;
+ goto free_syncobj_handles;
+ }
+
+ for (entry = 0; entry < num_syncobj_handles; entry++) {
+ syncobj[entry] = drm_syncobj_find(filp, syncobj_handles[entry]);
+ if (!syncobj[entry]) {
+ r = -ENOENT;
+ goto free_syncobj;
+ }
+ }
+
+ num_read_bo_handles = args->num_bo_read_handles;
+ bo_handles_read = memdup_user(u64_to_user_ptr(args->bo_read_handles),
+ sizeof(u32) * num_read_bo_handles);
+ if (IS_ERR(bo_handles_read)) {
+ r = PTR_ERR(bo_handles_read);
+ goto free_syncobj;
+ }
+
+ /* Array of pointers to the GEM read objects */
+ gobj_read = kmalloc_array(num_read_bo_handles, sizeof(*gobj_read), GFP_KERNEL);
+ if (!gobj_read) {
+ r = -ENOMEM;
+ goto free_bo_handles_read;
+ }
+
+ for (rentry = 0; rentry < num_read_bo_handles; rentry++) {
+ gobj_read[rentry] = drm_gem_object_lookup(filp, bo_handles_read[rentry]);
+ if (!gobj_read[rentry]) {
+ r = -ENOENT;
+ goto put_gobj_read;
+ }
+ }
+
+ num_write_bo_handles = args->num_bo_write_handles;
+ bo_handles_write = memdup_user(u64_to_user_ptr(args->bo_write_handles),
+ sizeof(u32) * num_write_bo_handles);
+ if (IS_ERR(bo_handles_write)) {
+ r = PTR_ERR(bo_handles_write);
+ goto put_gobj_read;
+ }
+
+ /* Array of pointers to the GEM write objects */
+ gobj_write = kmalloc_array(num_write_bo_handles, sizeof(*gobj_write), GFP_KERNEL);
+ if (!gobj_write) {
+ r = -ENOMEM;
+ goto free_bo_handles_write;
+ }
+
+ for (wentry = 0; wentry < num_write_bo_handles; wentry++) {
+ gobj_write[wentry] = drm_gem_object_lookup(filp, bo_handles_write[wentry]);
+ if (!gobj_write[wentry]) {
+ r = -ENOENT;
+ goto put_gobj_write;
+ }
+ }
+
+ /* Retrieve the user queue */
+ queue = xa_load(&userq_mgr->userq_mgr_xa, args->queue_id);
+ if (!queue) {
+ r = -ENOENT;
+ goto put_gobj_write;
+ }
+
+ r = amdgpu_userq_fence_read_wptr(queue, &wptr);
+ if (r)
+ goto put_gobj_write;
+
+ r = amdgpu_userq_fence_alloc(&userq_fence);
+ if (r)
+ goto put_gobj_write;
+
+ /* We are here means UQ is active, make sure the eviction fence is valid */
+ amdgpu_userq_ensure_ev_fence(&fpriv->userq_mgr, &fpriv->evf_mgr);
+
+ /* Create a new fence */
+ r = amdgpu_userq_fence_create(queue, userq_fence, wptr, &fence);
+ if (r) {
+ mutex_unlock(&userq_mgr->userq_mutex);
+ kmem_cache_free(amdgpu_userq_fence_slab, userq_fence);
+ goto put_gobj_write;
+ }
+
+ dma_fence_put(queue->last_fence);
+ queue->last_fence = dma_fence_get(fence);
+ mutex_unlock(&userq_mgr->userq_mutex);
+
+ drm_exec_init(&exec, DRM_EXEC_INTERRUPTIBLE_WAIT,
+ (num_read_bo_handles + num_write_bo_handles));
+
+ /* Lock all BOs with retry handling */
+ drm_exec_until_all_locked(&exec) {
+ r = drm_exec_prepare_array(&exec, gobj_read, num_read_bo_handles, 1);
+ drm_exec_retry_on_contention(&exec);
+ if (r) {
+ amdgpu_userq_fence_cleanup(fence);
+ goto exec_fini;
+ }
+
+ r = drm_exec_prepare_array(&exec, gobj_write, num_write_bo_handles, 1);
+ drm_exec_retry_on_contention(&exec);
+ if (r) {
+ amdgpu_userq_fence_cleanup(fence);
+ goto exec_fini;
+ }
+ }
+
+ for (i = 0; i < num_read_bo_handles; i++) {
+ if (!gobj_read || !gobj_read[i]->resv)
+ continue;
+
+ dma_resv_add_fence(gobj_read[i]->resv, fence,
+ DMA_RESV_USAGE_READ);
+ }
+
+ for (i = 0; i < num_write_bo_handles; i++) {
+ if (!gobj_write || !gobj_write[i]->resv)
+ continue;
+
+ dma_resv_add_fence(gobj_write[i]->resv, fence,
+ DMA_RESV_USAGE_WRITE);
+ }
+
+ /* Add the created fence to syncobj/BO's */
+ for (i = 0; i < num_syncobj_handles; i++)
+ drm_syncobj_replace_fence(syncobj[i], fence);
+
+ /* drop the reference acquired in fence creation function */
+ dma_fence_put(fence);
+
+exec_fini:
+ drm_exec_fini(&exec);
+put_gobj_write:
+ while (wentry-- > 0)
+ drm_gem_object_put(gobj_write[wentry]);
+ kfree(gobj_write);
+free_bo_handles_write:
+ kfree(bo_handles_write);
+put_gobj_read:
+ while (rentry-- > 0)
+ drm_gem_object_put(gobj_read[rentry]);
+ kfree(gobj_read);
+free_bo_handles_read:
+ kfree(bo_handles_read);
+free_syncobj:
+ while (entry-- > 0)
+ if (syncobj[entry])
+ drm_syncobj_put(syncobj[entry]);
+ kfree(syncobj);
+free_syncobj_handles:
+ kfree(syncobj_handles);
+
+ return r;
+}
+
+int amdgpu_userq_wait_ioctl(struct drm_device *dev, void *data,
+ struct drm_file *filp)
+{
+ u32 *syncobj_handles, *timeline_points, *timeline_handles, *bo_handles_read, *bo_handles_write;
+ u32 num_syncobj, num_read_bo_handles, num_write_bo_handles;
+ struct drm_amdgpu_userq_fence_info *fence_info = NULL;
+ struct drm_amdgpu_userq_wait *wait_info = data;
+ struct amdgpu_fpriv *fpriv = filp->driver_priv;
+ struct amdgpu_userq_mgr *userq_mgr = &fpriv->userq_mgr;
+ struct amdgpu_usermode_queue *waitq;
+ struct drm_gem_object **gobj_write;
+ struct drm_gem_object **gobj_read;
+ struct dma_fence **fences = NULL;
+ u16 num_points, num_fences = 0;
+ int r, i, rentry, wentry, cnt;
+ struct drm_exec exec;
+
+ num_read_bo_handles = wait_info->num_bo_read_handles;
+ bo_handles_read = memdup_user(u64_to_user_ptr(wait_info->bo_read_handles),
+ size_mul(sizeof(u32), num_read_bo_handles));
+ if (IS_ERR(bo_handles_read))
+ return PTR_ERR(bo_handles_read);
+
+ num_write_bo_handles = wait_info->num_bo_write_handles;
+ bo_handles_write = memdup_user(u64_to_user_ptr(wait_info->bo_write_handles),
+ size_mul(sizeof(u32), num_write_bo_handles));
+ if (IS_ERR(bo_handles_write)) {
+ r = PTR_ERR(bo_handles_write);
+ goto free_bo_handles_read;
+ }
+
+ num_syncobj = wait_info->num_syncobj_handles;
+ syncobj_handles = memdup_user(u64_to_user_ptr(wait_info->syncobj_handles),
+ size_mul(sizeof(u32), num_syncobj));
+ if (IS_ERR(syncobj_handles)) {
+ r = PTR_ERR(syncobj_handles);
+ goto free_bo_handles_write;
+ }
+
+ num_points = wait_info->num_syncobj_timeline_handles;
+ timeline_handles = memdup_user(u64_to_user_ptr(wait_info->syncobj_timeline_handles),
+ sizeof(u32) * num_points);
+ if (IS_ERR(timeline_handles)) {
+ r = PTR_ERR(timeline_handles);
+ goto free_syncobj_handles;
+ }
+
+ timeline_points = memdup_user(u64_to_user_ptr(wait_info->syncobj_timeline_points),
+ sizeof(u32) * num_points);
+ if (IS_ERR(timeline_points)) {
+ r = PTR_ERR(timeline_points);
+ goto free_timeline_handles;
+ }
+
+ gobj_read = kmalloc_array(num_read_bo_handles, sizeof(*gobj_read), GFP_KERNEL);
+ if (!gobj_read) {
+ r = -ENOMEM;
+ goto free_timeline_points;
+ }
+
+ for (rentry = 0; rentry < num_read_bo_handles; rentry++) {
+ gobj_read[rentry] = drm_gem_object_lookup(filp, bo_handles_read[rentry]);
+ if (!gobj_read[rentry]) {
+ r = -ENOENT;
+ goto put_gobj_read;
+ }
+ }
+
+ gobj_write = kmalloc_array(num_write_bo_handles, sizeof(*gobj_write), GFP_KERNEL);
+ if (!gobj_write) {
+ r = -ENOMEM;
+ goto put_gobj_read;
+ }
+
+ for (wentry = 0; wentry < num_write_bo_handles; wentry++) {
+ gobj_write[wentry] = drm_gem_object_lookup(filp, bo_handles_write[wentry]);
+ if (!gobj_write[wentry]) {
+ r = -ENOENT;
+ goto put_gobj_write;
+ }
+ }
+
+ drm_exec_init(&exec, DRM_EXEC_INTERRUPTIBLE_WAIT,
+ (num_read_bo_handles + num_write_bo_handles));
+
+ /* Lock all BOs with retry handling */
+ drm_exec_until_all_locked(&exec) {
+ r = drm_exec_prepare_array(&exec, gobj_read, num_read_bo_handles, 1);
+ drm_exec_retry_on_contention(&exec);
+ if (r) {
+ drm_exec_fini(&exec);
+ goto put_gobj_write;
+ }
+
+ r = drm_exec_prepare_array(&exec, gobj_write, num_write_bo_handles, 1);
+ drm_exec_retry_on_contention(&exec);
+ if (r) {
+ drm_exec_fini(&exec);
+ goto put_gobj_write;
+ }
+ }
+
+ if (!wait_info->num_fences) {
+ if (num_points) {
+ struct dma_fence_unwrap iter;
+ struct dma_fence *fence;
+ struct dma_fence *f;
+
+ for (i = 0; i < num_points; i++) {
+ r = drm_syncobj_find_fence(filp, timeline_handles[i],
+ timeline_points[i],
+ DRM_SYNCOBJ_WAIT_FLAGS_WAIT_FOR_SUBMIT,
+ &fence);
+ if (r)
+ goto exec_fini;
+
+ dma_fence_unwrap_for_each(f, &iter, fence)
+ num_fences++;
+
+ dma_fence_put(fence);
+ }
+ }
+
+ /* Count syncobj's fence */
+ for (i = 0; i < num_syncobj; i++) {
+ struct dma_fence *fence;
+
+ r = drm_syncobj_find_fence(filp, syncobj_handles[i],
+ 0,
+ DRM_SYNCOBJ_WAIT_FLAGS_WAIT_FOR_SUBMIT,
+ &fence);
+ if (r)
+ goto exec_fini;
+
+ num_fences++;
+ dma_fence_put(fence);
+ }
+
+ /* Count GEM objects fence */
+ for (i = 0; i < num_read_bo_handles; i++) {
+ struct dma_resv_iter resv_cursor;
+ struct dma_fence *fence;
+
+ dma_resv_for_each_fence(&resv_cursor, gobj_read[i]->resv,
+ DMA_RESV_USAGE_READ, fence)
+ num_fences++;
+ }
+
+ for (i = 0; i < num_write_bo_handles; i++) {
+ struct dma_resv_iter resv_cursor;
+ struct dma_fence *fence;
+
+ dma_resv_for_each_fence(&resv_cursor, gobj_write[i]->resv,
+ DMA_RESV_USAGE_WRITE, fence)
+ num_fences++;
+ }
+
+ /*
+ * Passing num_fences = 0 means that userspace doesn't want to
+ * retrieve userq_fence_info. If num_fences = 0 we skip filling
+ * userq_fence_info and return the actual number of fences on
+ * args->num_fences.
+ */
+ wait_info->num_fences = num_fences;
+ } else {
+ /* Array of fence info */
+ fence_info = kmalloc_array(wait_info->num_fences, sizeof(*fence_info), GFP_KERNEL);
+ if (!fence_info) {
+ r = -ENOMEM;
+ goto exec_fini;
+ }
+
+ /* Array of fences */
+ fences = kmalloc_array(wait_info->num_fences, sizeof(*fences), GFP_KERNEL);
+ if (!fences) {
+ r = -ENOMEM;
+ goto free_fence_info;
+ }
+
+ /* Retrieve GEM read objects fence */
+ for (i = 0; i < num_read_bo_handles; i++) {
+ struct dma_resv_iter resv_cursor;
+ struct dma_fence *fence;
+
+ dma_resv_for_each_fence(&resv_cursor, gobj_read[i]->resv,
+ DMA_RESV_USAGE_READ, fence) {
+ if (WARN_ON_ONCE(num_fences >= wait_info->num_fences)) {
+ r = -EINVAL;
+ goto free_fences;
+ }
+
+ fences[num_fences++] = fence;
+ dma_fence_get(fence);
+ }
+ }
+
+ /* Retrieve GEM write objects fence */
+ for (i = 0; i < num_write_bo_handles; i++) {
+ struct dma_resv_iter resv_cursor;
+ struct dma_fence *fence;
+
+ dma_resv_for_each_fence(&resv_cursor, gobj_write[i]->resv,
+ DMA_RESV_USAGE_WRITE, fence) {
+ if (WARN_ON_ONCE(num_fences >= wait_info->num_fences)) {
+ r = -EINVAL;
+ goto free_fences;
+ }
+
+ fences[num_fences++] = fence;
+ dma_fence_get(fence);
+ }
+ }
+
+ if (num_points) {
+ struct dma_fence_unwrap iter;
+ struct dma_fence *fence;
+ struct dma_fence *f;
+
+ for (i = 0; i < num_points; i++) {
+ r = drm_syncobj_find_fence(filp, timeline_handles[i],
+ timeline_points[i],
+ DRM_SYNCOBJ_WAIT_FLAGS_WAIT_FOR_SUBMIT,
+ &fence);
+ if (r)
+ goto free_fences;
+
+ dma_fence_unwrap_for_each(f, &iter, fence) {
+ if (WARN_ON_ONCE(num_fences >= wait_info->num_fences)) {
+ r = -EINVAL;
+ goto free_fences;
+ }
+
+ dma_fence_get(f);
+ fences[num_fences++] = f;
+ }
+
+ dma_fence_put(fence);
+ }
+ }
+
+ /* Retrieve syncobj's fence */
+ for (i = 0; i < num_syncobj; i++) {
+ struct dma_fence *fence;
+
+ r = drm_syncobj_find_fence(filp, syncobj_handles[i],
+ 0,
+ DRM_SYNCOBJ_WAIT_FLAGS_WAIT_FOR_SUBMIT,
+ &fence);
+ if (r)
+ goto free_fences;
+
+ if (WARN_ON_ONCE(num_fences >= wait_info->num_fences)) {
+ r = -EINVAL;
+ goto free_fences;
+ }
+
+ fences[num_fences++] = fence;
+ }
+
+ /*
+ * Keep only the latest fences to reduce the number of values
+ * given back to userspace.
+ */
+ num_fences = dma_fence_dedup_array(fences, num_fences);
+
+ waitq = xa_load(&userq_mgr->userq_mgr_xa, wait_info->waitq_id);
+ if (!waitq) {
+ r = -EINVAL;
+ goto free_fences;
+ }
+
+ for (i = 0, cnt = 0; i < num_fences; i++) {
+ struct amdgpu_userq_fence_driver *fence_drv;
+ struct amdgpu_userq_fence *userq_fence;
+ u32 index;
+
+ userq_fence = to_amdgpu_userq_fence(fences[i]);
+ if (!userq_fence) {
+ /*
+ * Just waiting on other driver fences should
+ * be good for now
+ */
+ r = dma_fence_wait(fences[i], true);
+ if (r) {
+ dma_fence_put(fences[i]);
+ goto free_fences;
+ }
+
+ dma_fence_put(fences[i]);
+ continue;
+ }
+
+ fence_drv = userq_fence->fence_drv;
+ /*
+ * We need to make sure the user queue release their reference
+ * to the fence drivers at some point before queue destruction.
+ * Otherwise, we would gather those references until we don't
+ * have any more space left and crash.
+ */
+ r = xa_alloc(&waitq->fence_drv_xa, &index, fence_drv,
+ xa_limit_32b, GFP_KERNEL);
+ if (r)
+ goto free_fences;
+
+ amdgpu_userq_fence_driver_get(fence_drv);
+
+ /* Store drm syncobj's gpu va address and value */
+ fence_info[cnt].va = fence_drv->va;
+ fence_info[cnt].value = fences[i]->seqno;
+
+ dma_fence_put(fences[i]);
+ /* Increment the actual userq fence count */
+ cnt++;
+ }
+
+ wait_info->num_fences = cnt;
+ /* Copy userq fence info to user space */
+ if (copy_to_user(u64_to_user_ptr(wait_info->out_fences),
+ fence_info, wait_info->num_fences * sizeof(*fence_info))) {
+ r = -EFAULT;
+ goto free_fences;
+ }
+
+ kfree(fences);
+ kfree(fence_info);
+ }
+
+ drm_exec_fini(&exec);
+ for (i = 0; i < num_read_bo_handles; i++)
+ drm_gem_object_put(gobj_read[i]);
+ kfree(gobj_read);
+
+ for (i = 0; i < num_write_bo_handles; i++)
+ drm_gem_object_put(gobj_write[i]);
+ kfree(gobj_write);
+
+ kfree(timeline_points);
+ kfree(timeline_handles);
+ kfree(syncobj_handles);
+ kfree(bo_handles_write);
+ kfree(bo_handles_read);
+
+ return 0;
+
+free_fences:
+ while (num_fences-- > 0)
+ dma_fence_put(fences[num_fences]);
+ kfree(fences);
+free_fence_info:
+ kfree(fence_info);
+exec_fini:
+ drm_exec_fini(&exec);
+put_gobj_write:
+ while (wentry-- > 0)
+ drm_gem_object_put(gobj_write[wentry]);
+ kfree(gobj_write);
+put_gobj_read:
+ while (rentry-- > 0)
+ drm_gem_object_put(gobj_read[rentry]);
+ kfree(gobj_read);
+free_timeline_points:
+ kfree(timeline_points);
+free_timeline_handles:
+ kfree(timeline_handles);
+free_syncobj_handles:
+ kfree(syncobj_handles);
+free_bo_handles_write:
+ kfree(bo_handles_write);
+free_bo_handles_read:
+ kfree(bo_handles_read);
+
+ return r;
+}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.h
new file mode 100644
index 000000000000..d76add2afc77
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.h
@@ -0,0 +1,77 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright 2023 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#ifndef __AMDGPU_USERQ_FENCE_H__
+#define __AMDGPU_USERQ_FENCE_H__
+
+#include <linux/types.h>
+
+#include "amdgpu_userq.h"
+
+struct amdgpu_userq_fence {
+ struct dma_fence base;
+ /*
+ * This lock is necessary to synchronize the
+ * userqueue dma fence operations.
+ */
+ spinlock_t lock;
+ struct list_head link;
+ unsigned long fence_drv_array_count;
+ struct amdgpu_userq_fence_driver *fence_drv;
+ struct amdgpu_userq_fence_driver **fence_drv_array;
+};
+
+struct amdgpu_userq_fence_driver {
+ struct kref refcount;
+ u64 va;
+ u64 gpu_addr;
+ u64 *cpu_addr;
+ u64 context;
+ /*
+ * This lock is necesaary to synchronize the access
+ * to the fences list by the fence driver.
+ */
+ spinlock_t fence_list_lock;
+ struct list_head fences;
+ struct amdgpu_device *adev;
+ char timeline_name[TASK_COMM_LEN];
+};
+
+int amdgpu_userq_fence_slab_init(void);
+void amdgpu_userq_fence_slab_fini(void);
+
+void amdgpu_userq_fence_driver_get(struct amdgpu_userq_fence_driver *fence_drv);
+void amdgpu_userq_fence_driver_put(struct amdgpu_userq_fence_driver *fence_drv);
+int amdgpu_userq_fence_driver_alloc(struct amdgpu_device *adev,
+ struct amdgpu_usermode_queue *userq);
+void amdgpu_userq_fence_driver_free(struct amdgpu_usermode_queue *userq);
+void amdgpu_userq_fence_driver_process(struct amdgpu_userq_fence_driver *fence_drv);
+void amdgpu_userq_fence_driver_force_completion(struct amdgpu_usermode_queue *userq);
+void amdgpu_userq_fence_driver_destroy(struct kref *ref);
+int amdgpu_userq_signal_ioctl(struct drm_device *dev, void *data,
+ struct drm_file *filp);
+int amdgpu_userq_wait_ioctl(struct drm_device *dev, void *data,
+ struct drm_file *filp);
+
+#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_utils.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_utils.h
new file mode 100644
index 000000000000..1e40ca3b1584
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_utils.h
@@ -0,0 +1,91 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright 2025 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#ifndef AMDGPU_UTILS_H_
+#define AMDGPU_UTILS_H_
+
+/* ---------- Generic 2‑bit capability attribute encoding ----------
+ * 00 INVALID, 01 RO, 10 WO, 11 RW
+ */
+enum amdgpu_cap_attr {
+ AMDGPU_CAP_ATTR_INVALID = 0,
+ AMDGPU_CAP_ATTR_RO = 1 << 0,
+ AMDGPU_CAP_ATTR_WO = 1 << 1,
+ AMDGPU_CAP_ATTR_RW = (AMDGPU_CAP_ATTR_RO | AMDGPU_CAP_ATTR_WO),
+};
+
+#define AMDGPU_CAP_ATTR_BITS 2
+#define AMDGPU_CAP_ATTR_MAX ((1U << AMDGPU_CAP_ATTR_BITS) - 1)
+
+/* Internal helper to build helpers for a given enum NAME */
+#define DECLARE_ATTR_CAP_CLASS_HELPERS(NAME) \
+enum { NAME##_BITMAP_BITS = NAME##_COUNT * AMDGPU_CAP_ATTR_BITS }; \
+struct NAME##_caps { \
+ DECLARE_BITMAP(bmap, NAME##_BITMAP_BITS); \
+}; \
+static inline unsigned int NAME##_ATTR_START(enum NAME##_cap_id cap) \
+{ return (unsigned int)cap * AMDGPU_CAP_ATTR_BITS; } \
+static inline void NAME##_attr_init(struct NAME##_caps *c) \
+{ if (c) bitmap_zero(c->bmap, NAME##_BITMAP_BITS); } \
+static inline int NAME##_attr_set(struct NAME##_caps *c, \
+ enum NAME##_cap_id cap, enum amdgpu_cap_attr attr) \
+{ \
+ if (!c) \
+ return -EINVAL; \
+ if (cap >= NAME##_COUNT) \
+ return -EINVAL; \
+ if ((unsigned int)attr > AMDGPU_CAP_ATTR_MAX) \
+ return -EINVAL; \
+ bitmap_write(c->bmap, (unsigned long)attr, \
+ NAME##_ATTR_START(cap), AMDGPU_CAP_ATTR_BITS); \
+ return 0; \
+} \
+static inline int NAME##_attr_get(const struct NAME##_caps *c, \
+ enum NAME##_cap_id cap, enum amdgpu_cap_attr *out) \
+{ \
+ unsigned long v; \
+ if (!c || !out) \
+ return -EINVAL; \
+ if (cap >= NAME##_COUNT) \
+ return -EINVAL; \
+ v = bitmap_read(c->bmap, NAME##_ATTR_START(cap), AMDGPU_CAP_ATTR_BITS); \
+ *out = (enum amdgpu_cap_attr)v; \
+ return 0; \
+} \
+static inline bool NAME##_cap_is_ro(const struct NAME##_caps *c, enum NAME##_cap_id id) \
+{ enum amdgpu_cap_attr a; return !NAME##_attr_get(c, id, &a) && a == AMDGPU_CAP_ATTR_RO; } \
+static inline bool NAME##_cap_is_wo(const struct NAME##_caps *c, enum NAME##_cap_id id) \
+{ enum amdgpu_cap_attr a; return !NAME##_attr_get(c, id, &a) && a == AMDGPU_CAP_ATTR_WO; } \
+static inline bool NAME##_cap_is_rw(const struct NAME##_caps *c, enum NAME##_cap_id id) \
+{ enum amdgpu_cap_attr a; return !NAME##_attr_get(c, id, &a) && a == AMDGPU_CAP_ATTR_RW; }
+
+/* Element expander for enum creation */
+#define _CAP_ENUM_ELEM(x) x,
+
+/* Public macro: declare enum + helpers from an X‑macro list */
+#define DECLARE_ATTR_CAP_CLASS(NAME, LIST_MACRO) \
+ enum NAME##_cap_id { LIST_MACRO(_CAP_ENUM_ELEM) NAME##_COUNT }; \
+ DECLARE_ATTR_CAP_CLASS_HELPERS(NAME)
+
+#endif /* AMDGPU_UTILS_H_ */
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c
index e2ed4689118a..5c38f0d30c87 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c
@@ -32,10 +32,12 @@
#include <linux/module.h>
#include <drm/drm.h>
+#include <drm/drm_drv.h>
#include "amdgpu.h"
#include "amdgpu_pm.h"
#include "amdgpu_uvd.h"
+#include "amdgpu_cs.h"
#include "cikd.h"
#include "uvd/uvd_4_2_d.h"
@@ -94,16 +96,16 @@
*/
struct amdgpu_uvd_cs_ctx {
struct amdgpu_cs_parser *parser;
- unsigned reg, count;
- unsigned data0, data1;
- unsigned idx;
- unsigned ib_idx;
+ unsigned int reg, count;
+ unsigned int data0, data1;
+ unsigned int idx;
+ struct amdgpu_ib *ib;
/* does the IB has a msg command */
bool has_msg_cmd;
/* minimum buffer sizes */
- unsigned *buf_sizes;
+ unsigned int *buf_sizes;
};
#ifdef CONFIG_DRM_AMDGPU_SI
@@ -133,13 +135,58 @@ MODULE_FIRMWARE(FIRMWARE_VEGA12);
MODULE_FIRMWARE(FIRMWARE_VEGA20);
static void amdgpu_uvd_idle_work_handler(struct work_struct *work);
+static void amdgpu_uvd_force_into_uvd_segment(struct amdgpu_bo *abo);
+
+static int amdgpu_uvd_create_msg_bo_helper(struct amdgpu_device *adev,
+ uint32_t size,
+ struct amdgpu_bo **bo_ptr)
+{
+ struct ttm_operation_ctx ctx = { true, false };
+ struct amdgpu_bo *bo = NULL;
+ void *addr;
+ int r;
+
+ r = amdgpu_bo_create_reserved(adev, size, PAGE_SIZE,
+ AMDGPU_GEM_DOMAIN_GTT,
+ &bo, NULL, &addr);
+ if (r)
+ return r;
+
+ if (adev->uvd.address_64_bit)
+ goto succ;
+
+ amdgpu_bo_kunmap(bo);
+ amdgpu_bo_unpin(bo);
+ amdgpu_bo_placement_from_domain(bo, AMDGPU_GEM_DOMAIN_VRAM);
+ amdgpu_uvd_force_into_uvd_segment(bo);
+ r = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx);
+ if (r)
+ goto err;
+ r = amdgpu_bo_pin(bo, AMDGPU_GEM_DOMAIN_VRAM);
+ if (r)
+ goto err_pin;
+ r = amdgpu_bo_kmap(bo, &addr);
+ if (r)
+ goto err_kmap;
+succ:
+ amdgpu_bo_unreserve(bo);
+ *bo_ptr = bo;
+ return 0;
+err_kmap:
+ amdgpu_bo_unpin(bo);
+err_pin:
+err:
+ amdgpu_bo_unreserve(bo);
+ amdgpu_bo_unref(&bo);
+ return r;
+}
int amdgpu_uvd_sw_init(struct amdgpu_device *adev)
{
unsigned long bo_size;
const char *fw_name;
const struct common_firmware_header *hdr;
- unsigned family_id;
+ unsigned int family_id;
int i, j, r;
INIT_DELAYED_WORK(&adev->uvd.idle_work, amdgpu_uvd_idle_work_handler);
@@ -213,19 +260,11 @@ int amdgpu_uvd_sw_init(struct amdgpu_device *adev)
return -EINVAL;
}
- r = request_firmware(&adev->uvd.fw, fw_name, adev->dev);
- if (r) {
- dev_err(adev->dev, "amdgpu_uvd: Can't load firmware \"%s\"\n",
- fw_name);
- return r;
- }
-
- r = amdgpu_ucode_validate(adev->uvd.fw);
+ r = amdgpu_ucode_request(adev, &adev->uvd.fw, AMDGPU_UCODE_REQUIRED, "%s", fw_name);
if (r) {
dev_err(adev->dev, "amdgpu_uvd: Can't validate firmware \"%s\"\n",
fw_name);
- release_firmware(adev->uvd.fw);
- adev->uvd.fw = NULL;
+ amdgpu_ucode_release(&adev->uvd.fw);
return r;
}
@@ -236,7 +275,7 @@ int amdgpu_uvd_sw_init(struct amdgpu_device *adev)
family_id = le32_to_cpu(hdr->ucode_version) & 0xff;
if (adev->asic_type < CHIP_VEGA20) {
- unsigned version_major, version_minor;
+ unsigned int version_major, version_minor;
version_major = (le32_to_cpu(hdr->ucode_version) >> 24) & 0xff;
version_minor = (le32_to_cpu(hdr->ucode_version) >> 8) & 0xff;
@@ -259,7 +298,7 @@ int amdgpu_uvd_sw_init(struct amdgpu_device *adev)
if ((adev->asic_type == CHIP_POLARIS10 ||
adev->asic_type == CHIP_POLARIS11) &&
(adev->uvd.fw_version < FW_1_66_16))
- DRM_ERROR("POLARIS10/11 UVD firmware version %hu.%hu is too old.\n",
+ DRM_ERROR("POLARIS10/11 UVD firmware version %u.%u is too old.\n",
version_major, version_minor);
} else {
unsigned int enc_major, enc_minor, dec_minor;
@@ -284,8 +323,11 @@ int amdgpu_uvd_sw_init(struct amdgpu_device *adev)
if (adev->uvd.harvest_config & (1 << j))
continue;
r = amdgpu_bo_create_kernel(adev, bo_size, PAGE_SIZE,
- AMDGPU_GEM_DOMAIN_VRAM, &adev->uvd.inst[j].vcpu_bo,
- &adev->uvd.inst[j].gpu_addr, &adev->uvd.inst[j].cpu_addr);
+ AMDGPU_GEM_DOMAIN_VRAM |
+ AMDGPU_GEM_DOMAIN_GTT,
+ &adev->uvd.inst[j].vcpu_bo,
+ &adev->uvd.inst[j].gpu_addr,
+ &adev->uvd.inst[j].cpu_addr);
if (r) {
dev_err(adev->dev, "(%d) failed to allocate UVD bo\n", r);
return r;
@@ -301,6 +343,10 @@ int amdgpu_uvd_sw_init(struct amdgpu_device *adev)
if (!amdgpu_device_ip_block_version_cmp(adev, AMD_IP_BLOCK_TYPE_UVD, 5, 0))
adev->uvd.address_64_bit = true;
+ r = amdgpu_uvd_create_msg_bo_helper(adev, 128 << 10, &adev->uvd.ib_bo);
+ if (r)
+ return r;
+
switch (adev->asic_type) {
case CHIP_TONGA:
adev->uvd.use_ctx_buf = adev->uvd.fw_version >= FW_1_65_10;
@@ -323,9 +369,9 @@ int amdgpu_uvd_sw_init(struct amdgpu_device *adev)
int amdgpu_uvd_sw_fini(struct amdgpu_device *adev)
{
+ void *addr = amdgpu_bo_kptr(adev->uvd.ib_bo);
int i, j;
- cancel_delayed_work_sync(&adev->uvd.idle_work);
drm_sched_entity_destroy(&adev->uvd.entity);
for (j = 0; j < adev->uvd.num_uvd_inst; ++j) {
@@ -342,7 +388,8 @@ int amdgpu_uvd_sw_fini(struct amdgpu_device *adev)
for (i = 0; i < AMDGPU_MAX_UVD_ENC_RINGS; ++i)
amdgpu_ring_fini(&adev->uvd.inst[j].ring_enc[i]);
}
- release_firmware(adev->uvd.fw);
+ amdgpu_bo_free_kernel(&adev->uvd.ib_bo, NULL, &addr);
+ amdgpu_ucode_release(&adev->uvd.fw);
return 0;
}
@@ -351,32 +398,32 @@ int amdgpu_uvd_sw_fini(struct amdgpu_device *adev)
* amdgpu_uvd_entity_init - init entity
*
* @adev: amdgpu_device pointer
+ * @ring: amdgpu_ring pointer to check
*
+ * Initialize the entity used for handle management in the kernel driver.
*/
-int amdgpu_uvd_entity_init(struct amdgpu_device *adev)
+int amdgpu_uvd_entity_init(struct amdgpu_device *adev, struct amdgpu_ring *ring)
{
- struct amdgpu_ring *ring;
- struct drm_gpu_scheduler *sched;
- int r;
+ if (ring == &adev->uvd.inst[0].ring) {
+ struct drm_gpu_scheduler *sched = &ring->sched;
+ int r;
- ring = &adev->uvd.inst[0].ring;
- sched = &ring->sched;
- r = drm_sched_entity_init(&adev->uvd.entity, DRM_SCHED_PRIORITY_NORMAL,
- &sched, 1, NULL);
- if (r) {
- DRM_ERROR("Failed setting up UVD kernel entity.\n");
- return r;
+ r = drm_sched_entity_init(&adev->uvd.entity, DRM_SCHED_PRIORITY_NORMAL,
+ &sched, 1, NULL);
+ if (r) {
+ DRM_ERROR("Failed setting up UVD kernel entity.\n");
+ return r;
+ }
}
return 0;
}
-int amdgpu_uvd_suspend(struct amdgpu_device *adev)
+int amdgpu_uvd_prepare_suspend(struct amdgpu_device *adev)
{
- unsigned size;
+ unsigned int size;
void *ptr;
- int i, j;
- bool in_ras_intr = amdgpu_ras_intr_triggered();
+ int i, j, idx;
cancel_delayed_work_sync(&adev->uvd.idle_work);
@@ -403,14 +450,23 @@ int amdgpu_uvd_suspend(struct amdgpu_device *adev)
if (!adev->uvd.inst[j].saved_bo)
return -ENOMEM;
- /* re-write 0 since err_event_athub will corrupt VCPU buffer */
- if (in_ras_intr)
- memset(adev->uvd.inst[j].saved_bo, 0, size);
- else
- memcpy_fromio(adev->uvd.inst[j].saved_bo, ptr, size);
+ if (drm_dev_enter(adev_to_drm(adev), &idx)) {
+ /* re-write 0 since err_event_athub will corrupt VCPU buffer */
+ if (amdgpu_ras_intr_triggered())
+ memset(adev->uvd.inst[j].saved_bo, 0, size);
+ else
+ memcpy_fromio(adev->uvd.inst[j].saved_bo, ptr, size);
+
+ drm_dev_exit(idx);
+ }
}
- if (in_ras_intr)
+ return 0;
+}
+
+int amdgpu_uvd_suspend(struct amdgpu_device *adev)
+{
+ if (amdgpu_ras_intr_triggered())
DRM_WARN("UVD VCPU state may lost due to RAS ERREVENT_ATHUB_INTERRUPT\n");
return 0;
@@ -418,9 +474,9 @@ int amdgpu_uvd_suspend(struct amdgpu_device *adev)
int amdgpu_uvd_resume(struct amdgpu_device *adev)
{
- unsigned size;
+ unsigned int size;
void *ptr;
- int i;
+ int i, idx;
for (i = 0; i < adev->uvd.num_uvd_inst; i++) {
if (adev->uvd.harvest_config & (1 << i))
@@ -432,18 +488,24 @@ int amdgpu_uvd_resume(struct amdgpu_device *adev)
ptr = adev->uvd.inst[i].cpu_addr;
if (adev->uvd.inst[i].saved_bo != NULL) {
- memcpy_toio(ptr, adev->uvd.inst[i].saved_bo, size);
+ if (drm_dev_enter(adev_to_drm(adev), &idx)) {
+ memcpy_toio(ptr, adev->uvd.inst[i].saved_bo, size);
+ drm_dev_exit(idx);
+ }
kvfree(adev->uvd.inst[i].saved_bo);
adev->uvd.inst[i].saved_bo = NULL;
} else {
const struct common_firmware_header *hdr;
- unsigned offset;
+ unsigned int offset;
hdr = (const struct common_firmware_header *)adev->uvd.fw->data;
if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) {
offset = le32_to_cpu(hdr->ucode_array_offset_bytes);
- memcpy_toio(adev->uvd.inst[i].cpu_addr, adev->uvd.fw->data + offset,
- le32_to_cpu(hdr->ucode_size_bytes));
+ if (drm_dev_enter(adev_to_drm(adev), &idx)) {
+ memcpy_toio(adev->uvd.inst[i].cpu_addr, adev->uvd.fw->data + offset,
+ le32_to_cpu(hdr->ucode_size_bytes));
+ drm_dev_exit(idx);
+ }
size -= le32_to_cpu(hdr->ucode_size_bytes);
ptr += le32_to_cpu(hdr->ucode_size_bytes);
}
@@ -485,9 +547,12 @@ void amdgpu_uvd_free_handles(struct amdgpu_device *adev, struct drm_file *filp)
static void amdgpu_uvd_force_into_uvd_segment(struct amdgpu_bo *abo)
{
int i;
+
for (i = 0; i < abo->placement.num_placement; ++i) {
abo->placements[i].fpfn = 0 >> PAGE_SHIFT;
abo->placements[i].lpfn = (256 * 1024 * 1024) >> PAGE_SHIFT;
+ if (abo->placements[i].mem_type == TTM_PL_VRAM)
+ abo->placements[i].flags |= TTM_PL_FLAG_CONTIGUOUS;
}
}
@@ -496,8 +561,8 @@ static u64 amdgpu_uvd_get_addr_from_ctx(struct amdgpu_uvd_cs_ctx *ctx)
uint32_t lo, hi;
uint64_t addr;
- lo = amdgpu_get_ib_value(ctx->parser, ctx->ib_idx, ctx->data0);
- hi = amdgpu_get_ib_value(ctx->parser, ctx->ib_idx, ctx->data1);
+ lo = amdgpu_ib_get_value(ctx->ib, ctx->data0);
+ hi = amdgpu_ib_get_value(ctx->ib, ctx->data1);
addr = ((uint64_t)lo) | (((uint64_t)hi) << 32);
return addr;
@@ -522,16 +587,17 @@ static int amdgpu_uvd_cs_pass1(struct amdgpu_uvd_cs_ctx *ctx)
r = amdgpu_cs_find_mapping(ctx->parser, addr, &bo, &mapping);
if (r) {
- DRM_ERROR("Can't find BO for addr 0x%08Lx\n", addr);
+ DRM_ERROR("Can't find BO for addr 0x%08llx\n", addr);
return r;
}
if (!ctx->parser->adev->uvd.address_64_bit) {
/* check if it's a message or feedback command */
- cmd = amdgpu_get_ib_value(ctx->parser, ctx->ib_idx, ctx->idx) >> 1;
+ cmd = amdgpu_ib_get_value(ctx->ib, ctx->idx) >> 1;
if (cmd == 0x0 || cmd == 0x3) {
/* yes, force it into VRAM */
uint32_t domain = AMDGPU_GEM_DOMAIN_VRAM;
+
amdgpu_bo_placement_from_domain(bo, domain);
}
amdgpu_uvd_force_into_uvd_segment(bo);
@@ -552,21 +618,21 @@ static int amdgpu_uvd_cs_pass1(struct amdgpu_uvd_cs_ctx *ctx)
* Peek into the decode message and calculate the necessary buffer sizes.
*/
static int amdgpu_uvd_cs_msg_decode(struct amdgpu_device *adev, uint32_t *msg,
- unsigned buf_sizes[])
+ unsigned int buf_sizes[])
{
- unsigned stream_type = msg[4];
- unsigned width = msg[6];
- unsigned height = msg[7];
- unsigned dpb_size = msg[9];
- unsigned pitch = msg[28];
- unsigned level = msg[57];
+ unsigned int stream_type = msg[4];
+ unsigned int width = msg[6];
+ unsigned int height = msg[7];
+ unsigned int dpb_size = msg[9];
+ unsigned int pitch = msg[28];
+ unsigned int level = msg[57];
- unsigned width_in_mb = width / 16;
- unsigned height_in_mb = ALIGN(height / 16, 2);
- unsigned fs_in_mb = width_in_mb * height_in_mb;
+ unsigned int width_in_mb = width / 16;
+ unsigned int height_in_mb = ALIGN(height / 16, 2);
+ unsigned int fs_in_mb = width_in_mb * height_in_mb;
- unsigned image_size, tmp, min_dpb_size, num_dpb_buffer;
- unsigned min_ctx_size = ~0;
+ unsigned int image_size, tmp, min_dpb_size, num_dpb_buffer;
+ unsigned int min_ctx_size = ~0;
image_size = width * height;
image_size += image_size / 2;
@@ -574,7 +640,7 @@ static int amdgpu_uvd_cs_msg_decode(struct amdgpu_device *adev, uint32_t *msg,
switch (stream_type) {
case 0: /* H264 */
- switch(level) {
+ switch (level) {
case 30:
num_dpb_buffer = 8100 / fs_in_mb;
break;
@@ -652,7 +718,7 @@ static int amdgpu_uvd_cs_msg_decode(struct amdgpu_device *adev, uint32_t *msg,
break;
case 7: /* H264 Perf */
- switch(level) {
+ switch (level) {
case 30:
num_dpb_buffer = 8100 / fs_in_mb;
break;
@@ -685,7 +751,7 @@ static int amdgpu_uvd_cs_msg_decode(struct amdgpu_device *adev, uint32_t *msg,
/* reference picture buffer */
min_dpb_size = image_size * num_dpb_buffer;
- if (!adev->uvd.use_ctx_buf){
+ if (!adev->uvd.use_ctx_buf) {
/* macroblock context buffer */
min_dpb_size +=
width_in_mb * height_in_mb * num_dpb_buffer * 192;
@@ -748,7 +814,7 @@ static int amdgpu_uvd_cs_msg_decode(struct amdgpu_device *adev, uint32_t *msg,
* Make sure that we don't open up to many sessions.
*/
static int amdgpu_uvd_cs_msg(struct amdgpu_uvd_cs_ctx *ctx,
- struct amdgpu_bo *bo, unsigned offset)
+ struct amdgpu_bo *bo, unsigned int offset)
{
struct amdgpu_device *adev = ctx->parser->adev;
int32_t *msg, msg_type, handle;
@@ -773,6 +839,7 @@ static int amdgpu_uvd_cs_msg(struct amdgpu_uvd_cs_ctx *ctx,
handle = msg[2];
if (handle == 0) {
+ amdgpu_bo_kunmap(bo);
DRM_ERROR("Invalid UVD handle!\n");
return -EINVAL;
}
@@ -829,9 +896,9 @@ static int amdgpu_uvd_cs_msg(struct amdgpu_uvd_cs_ctx *ctx,
default:
DRM_ERROR("Illegal UVD message type (%d)!\n", msg_type);
- return -EINVAL;
}
- BUG();
+
+ amdgpu_bo_kunmap(bo);
return -EINVAL;
}
@@ -853,7 +920,7 @@ static int amdgpu_uvd_cs_pass2(struct amdgpu_uvd_cs_ctx *ctx)
r = amdgpu_cs_find_mapping(ctx->parser, addr, &bo, &mapping);
if (r) {
- DRM_ERROR("Can't find BO for addr 0x%08Lx\n", addr);
+ DRM_ERROR("Can't find BO for addr 0x%08llx\n", addr);
return r;
}
@@ -865,16 +932,14 @@ static int amdgpu_uvd_cs_pass2(struct amdgpu_uvd_cs_ctx *ctx)
addr -= mapping->start * AMDGPU_GPU_PAGE_SIZE;
start += addr;
- amdgpu_set_ib_value(ctx->parser, ctx->ib_idx, ctx->data0,
- lower_32_bits(start));
- amdgpu_set_ib_value(ctx->parser, ctx->ib_idx, ctx->data1,
- upper_32_bits(start));
+ amdgpu_ib_set_value(ctx->ib, ctx->data0, lower_32_bits(start));
+ amdgpu_ib_set_value(ctx->ib, ctx->data1, upper_32_bits(start));
- cmd = amdgpu_get_ib_value(ctx->parser, ctx->ib_idx, ctx->idx) >> 1;
+ cmd = amdgpu_ib_get_value(ctx->ib, ctx->idx) >> 1;
if (cmd < 0x4) {
if ((end - start) < ctx->buf_sizes[cmd]) {
DRM_ERROR("buffer (%d) to small (%d / %d)!\n", cmd,
- (unsigned)(end - start),
+ (unsigned int)(end - start),
ctx->buf_sizes[cmd]);
return -EINVAL;
}
@@ -882,7 +947,7 @@ static int amdgpu_uvd_cs_pass2(struct amdgpu_uvd_cs_ctx *ctx)
} else if (cmd == 0x206) {
if ((end - start) < ctx->buf_sizes[4]) {
DRM_ERROR("buffer (%d) to small (%d / %d)!\n", cmd,
- (unsigned)(end - start),
+ (unsigned int)(end - start),
ctx->buf_sizes[4]);
return -EINVAL;
}
@@ -893,14 +958,14 @@ static int amdgpu_uvd_cs_pass2(struct amdgpu_uvd_cs_ctx *ctx)
if (!ctx->parser->adev->uvd.address_64_bit) {
if ((start >> 28) != ((end - 1) >> 28)) {
- DRM_ERROR("reloc %LX-%LX crossing 256MB boundary!\n",
+ DRM_ERROR("reloc %llx-%llx crossing 256MB boundary!\n",
start, end);
return -EINVAL;
}
if ((cmd == 0 || cmd == 0x3) &&
(start >> 28) != (ctx->parser->adev->uvd.inst->gpu_addr >> 28)) {
- DRM_ERROR("msg/fb buffer %LX-%LX out of 256MB segment!\n",
+ DRM_ERROR("msg/fb buffer %llx-%llx out of 256MB segment!\n",
start, end);
return -EINVAL;
}
@@ -930,14 +995,13 @@ static int amdgpu_uvd_cs_pass2(struct amdgpu_uvd_cs_ctx *ctx)
static int amdgpu_uvd_cs_reg(struct amdgpu_uvd_cs_ctx *ctx,
int (*cb)(struct amdgpu_uvd_cs_ctx *ctx))
{
- struct amdgpu_ib *ib = &ctx->parser->job->ibs[ctx->ib_idx];
int i, r;
ctx->idx++;
for (i = 0; i <= ctx->count; ++i) {
- unsigned reg = ctx->reg + i;
+ unsigned int reg = ctx->reg + i;
- if (ctx->idx >= ib->length_dw) {
+ if (ctx->idx >= ctx->ib->length_dw) {
DRM_ERROR("Register command after end of CS!\n");
return -EINVAL;
}
@@ -977,12 +1041,12 @@ static int amdgpu_uvd_cs_reg(struct amdgpu_uvd_cs_ctx *ctx,
static int amdgpu_uvd_cs_packets(struct amdgpu_uvd_cs_ctx *ctx,
int (*cb)(struct amdgpu_uvd_cs_ctx *ctx))
{
- struct amdgpu_ib *ib = &ctx->parser->job->ibs[ctx->ib_idx];
int r;
- for (ctx->idx = 0 ; ctx->idx < ib->length_dw; ) {
- uint32_t cmd = amdgpu_get_ib_value(ctx->parser, ctx->ib_idx, ctx->idx);
- unsigned type = CP_PACKET_GET_TYPE(cmd);
+ for (ctx->idx = 0 ; ctx->idx < ctx->ib->length_dw; ) {
+ uint32_t cmd = amdgpu_ib_get_value(ctx->ib, ctx->idx);
+ unsigned int type = CP_PACKET_GET_TYPE(cmd);
+
switch (type) {
case PACKET_TYPE0:
ctx->reg = CP_PACKET0_GET_REG(cmd);
@@ -1006,25 +1070,26 @@ static int amdgpu_uvd_cs_packets(struct amdgpu_uvd_cs_ctx *ctx,
* amdgpu_uvd_ring_parse_cs - UVD command submission parser
*
* @parser: Command submission parser context
- * @ib_idx: Which indirect buffer to use
+ * @job: the job to parse
+ * @ib: the IB to patch
*
* Parse the command stream, patch in addresses as necessary.
*/
-int amdgpu_uvd_ring_parse_cs(struct amdgpu_cs_parser *parser, uint32_t ib_idx)
+int amdgpu_uvd_ring_parse_cs(struct amdgpu_cs_parser *parser,
+ struct amdgpu_job *job,
+ struct amdgpu_ib *ib)
{
struct amdgpu_uvd_cs_ctx ctx = {};
- unsigned buf_sizes[] = {
+ unsigned int buf_sizes[] = {
[0x00000000] = 2048,
[0x00000001] = 0xFFFFFFFF,
[0x00000002] = 0xFFFFFFFF,
[0x00000003] = 2048,
[0x00000004] = 0xFFFFFFFF,
};
- struct amdgpu_ib *ib = &parser->job->ibs[ib_idx];
int r;
- parser->job->vm = NULL;
- ib->gpu_addr = amdgpu_sa_bo_gpu_addr(ib->sa_bo);
+ job->vm = NULL;
if (ib->length_dw % 16) {
DRM_ERROR("UVD IB length (%d) not 16 dwords aligned!\n",
@@ -1034,7 +1099,7 @@ int amdgpu_uvd_ring_parse_cs(struct amdgpu_cs_parser *parser, uint32_t ib_idx)
ctx.parser = parser;
ctx.buf_sizes = buf_sizes;
- ctx.ib_idx = ib_idx;
+ ctx.ib = ib;
/* first round only required on chips without UVD 64 bit address support */
if (!parser->adev->uvd.address_64_bit) {
@@ -1062,43 +1127,29 @@ static int amdgpu_uvd_send_msg(struct amdgpu_ring *ring, struct amdgpu_bo *bo,
{
struct amdgpu_device *adev = ring->adev;
struct dma_fence *f = NULL;
+ uint32_t offset, data[4];
struct amdgpu_job *job;
struct amdgpu_ib *ib;
- uint32_t data[4];
uint64_t addr;
- long r;
- int i;
- unsigned offset_idx = 0;
- unsigned offset[3] = { UVD_BASE_SI, 0, 0 };
-
- amdgpu_bo_kunmap(bo);
- amdgpu_bo_unpin(bo);
-
- if (!ring->adev->uvd.address_64_bit) {
- struct ttm_operation_ctx ctx = { true, false };
-
- amdgpu_bo_placement_from_domain(bo, AMDGPU_GEM_DOMAIN_VRAM);
- amdgpu_uvd_force_into_uvd_segment(bo);
- r = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx);
- if (r)
- goto err;
- }
+ int i, r;
- r = amdgpu_job_alloc_with_ib(adev, 64, direct ? AMDGPU_IB_POOL_DIRECT :
- AMDGPU_IB_POOL_DELAYED, &job);
+ r = amdgpu_job_alloc_with_ib(ring->adev, &adev->uvd.entity,
+ AMDGPU_FENCE_OWNER_UNDEFINED,
+ 64, direct ? AMDGPU_IB_POOL_DIRECT :
+ AMDGPU_IB_POOL_DELAYED, &job,
+ AMDGPU_KERNEL_JOB_ID_VCN_RING_TEST);
if (r)
- goto err;
+ return r;
- if (adev->asic_type >= CHIP_VEGA10) {
- offset_idx = 1 + ring->me;
- offset[1] = adev->reg_offset[UVD_HWIP][0][1];
- offset[2] = adev->reg_offset[UVD_HWIP][1][1];
- }
+ if (adev->asic_type >= CHIP_VEGA10)
+ offset = adev->reg_offset[UVD_HWIP][ring->me][1];
+ else
+ offset = UVD_BASE_SI;
- data[0] = PACKET0(offset[offset_idx] + UVD_GPCOM_VCPU_DATA0, 0);
- data[1] = PACKET0(offset[offset_idx] + UVD_GPCOM_VCPU_DATA1, 0);
- data[2] = PACKET0(offset[offset_idx] + UVD_GPCOM_VCPU_CMD, 0);
- data[3] = PACKET0(offset[offset_idx] + UVD_NO_OP, 0);
+ data[0] = PACKET0(offset + UVD_GPCOM_VCPU_DATA0, 0);
+ data[1] = PACKET0(offset + UVD_GPCOM_VCPU_DATA1, 0);
+ data[2] = PACKET0(offset + UVD_GPCOM_VCPU_CMD, 0);
+ data[3] = PACKET0(offset + UVD_NO_OP, 0);
ib = &job->ibs[0];
addr = amdgpu_bo_gpu_offset(bo);
@@ -1115,33 +1166,22 @@ static int amdgpu_uvd_send_msg(struct amdgpu_ring *ring, struct amdgpu_bo *bo,
ib->length_dw = 16;
if (direct) {
- r = dma_resv_wait_timeout_rcu(bo->tbo.base.resv,
- true, false,
- msecs_to_jiffies(10));
- if (r == 0)
- r = -ETIMEDOUT;
- if (r < 0)
- goto err_free;
-
r = amdgpu_job_submit_direct(job, ring, &f);
if (r)
goto err_free;
} else {
- r = amdgpu_sync_resv(adev, &job->sync, bo->tbo.base.resv,
- AMDGPU_SYNC_ALWAYS,
- AMDGPU_FENCE_OWNER_UNDEFINED);
+ r = drm_sched_job_add_resv_dependencies(&job->base,
+ bo->tbo.base.resv,
+ DMA_RESV_USAGE_KERNEL);
if (r)
goto err_free;
- r = amdgpu_job_submit(job, &adev->uvd.entity,
- AMDGPU_FENCE_OWNER_UNDEFINED, &f);
- if (r)
- goto err_free;
+ f = amdgpu_job_submit(job);
}
+ amdgpu_bo_reserve(bo, true);
amdgpu_bo_fence(bo, f, false);
amdgpu_bo_unreserve(bo);
- amdgpu_bo_unref(&bo);
if (fence)
*fence = dma_fence_get(f);
@@ -1151,30 +1191,22 @@ static int amdgpu_uvd_send_msg(struct amdgpu_ring *ring, struct amdgpu_bo *bo,
err_free:
amdgpu_job_free(job);
-
-err:
- amdgpu_bo_unreserve(bo);
- amdgpu_bo_unref(&bo);
return r;
}
/* multiple fence commands without any stream commands in between can
- crash the vcpu so just try to emmit a dummy create/destroy msg to
- avoid this */
+ * crash the vcpu so just try to emmit a dummy create/destroy msg to
+ * avoid this
+ */
int amdgpu_uvd_get_create_msg(struct amdgpu_ring *ring, uint32_t handle,
struct dma_fence **fence)
{
struct amdgpu_device *adev = ring->adev;
- struct amdgpu_bo *bo = NULL;
+ struct amdgpu_bo *bo = adev->uvd.ib_bo;
uint32_t *msg;
- int r, i;
-
- r = amdgpu_bo_create_reserved(adev, 1024, PAGE_SIZE,
- AMDGPU_GEM_DOMAIN_GTT,
- &bo, NULL, (void **)&msg);
- if (r)
- return r;
+ int i;
+ msg = amdgpu_bo_kptr(bo);
/* stitch together an UVD create msg */
msg[0] = cpu_to_le32(0x00000de4);
msg[1] = cpu_to_le32(0x00000000);
@@ -1191,6 +1223,7 @@ int amdgpu_uvd_get_create_msg(struct amdgpu_ring *ring, uint32_t handle,
msg[i] = cpu_to_le32(0x0);
return amdgpu_uvd_send_msg(ring, bo, true, fence);
+
}
int amdgpu_uvd_get_destroy_msg(struct amdgpu_ring *ring, uint32_t handle,
@@ -1201,12 +1234,15 @@ int amdgpu_uvd_get_destroy_msg(struct amdgpu_ring *ring, uint32_t handle,
uint32_t *msg;
int r, i;
- r = amdgpu_bo_create_reserved(adev, 1024, PAGE_SIZE,
- AMDGPU_GEM_DOMAIN_GTT,
- &bo, NULL, (void **)&msg);
- if (r)
- return r;
+ if (direct) {
+ bo = adev->uvd.ib_bo;
+ } else {
+ r = amdgpu_uvd_create_msg_bo_helper(adev, 4096, &bo);
+ if (r)
+ return r;
+ }
+ msg = amdgpu_bo_kptr(bo);
/* stitch together an UVD destroy msg */
msg[0] = cpu_to_le32(0x00000de4);
msg[1] = cpu_to_le32(0x00000002);
@@ -1215,22 +1251,26 @@ int amdgpu_uvd_get_destroy_msg(struct amdgpu_ring *ring, uint32_t handle,
for (i = 4; i < 1024; ++i)
msg[i] = cpu_to_le32(0x0);
- return amdgpu_uvd_send_msg(ring, bo, direct, fence);
+ r = amdgpu_uvd_send_msg(ring, bo, direct, fence);
+
+ if (!direct)
+ amdgpu_bo_free_kernel(&bo, NULL, (void **)&msg);
+
+ return r;
}
static void amdgpu_uvd_idle_work_handler(struct work_struct *work)
{
struct amdgpu_device *adev =
container_of(work, struct amdgpu_device, uvd.idle_work.work);
- unsigned fences = 0, i, j;
+ unsigned int fences = 0, i, j;
for (i = 0; i < adev->uvd.num_uvd_inst; ++i) {
if (adev->uvd.harvest_config & (1 << i))
continue;
fences += amdgpu_fence_count_emitted(&adev->uvd.inst[i].ring);
- for (j = 0; j < adev->uvd.num_enc_rings; ++j) {
+ for (j = 0; j < adev->uvd.num_enc_rings; ++j)
fences += amdgpu_fence_count_emitted(&adev->uvd.inst[i].ring_enc[j]);
- }
}
if (fences == 0) {
@@ -1290,10 +1330,17 @@ int amdgpu_uvd_ring_test_ib(struct amdgpu_ring *ring, long timeout)
struct dma_fence *fence;
long r;
- r = amdgpu_uvd_get_create_msg(ring, 1, NULL);
+ r = amdgpu_uvd_get_create_msg(ring, 1, &fence);
if (r)
goto error;
+ r = dma_fence_wait_timeout(fence, false, timeout);
+ dma_fence_put(fence);
+ if (r == 0)
+ r = -ETIMEDOUT;
+ if (r < 0)
+ goto error;
+
r = amdgpu_uvd_get_destroy_msg(ring, 1, true, &fence);
if (r)
goto error;
@@ -1319,7 +1366,7 @@ error:
*/
uint32_t amdgpu_uvd_used_handles(struct amdgpu_device *adev)
{
- unsigned i;
+ unsigned int i;
uint32_t used_handles = 0;
for (i = 0; i < adev->uvd.max_handles; ++i) {
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.h
index edbb8194ee81..9dfad2f48ef4 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.h
@@ -68,11 +68,13 @@ struct amdgpu_uvd {
/* store image width to adjust nb memory state */
unsigned decode_image_width;
uint32_t keyselect;
+ struct amdgpu_bo *ib_bo;
};
int amdgpu_uvd_sw_init(struct amdgpu_device *adev);
int amdgpu_uvd_sw_fini(struct amdgpu_device *adev);
-int amdgpu_uvd_entity_init(struct amdgpu_device *adev);
+int amdgpu_uvd_entity_init(struct amdgpu_device *adev, struct amdgpu_ring *ring);
+int amdgpu_uvd_prepare_suspend(struct amdgpu_device *adev);
int amdgpu_uvd_suspend(struct amdgpu_device *adev);
int amdgpu_uvd_resume(struct amdgpu_device *adev);
int amdgpu_uvd_get_create_msg(struct amdgpu_ring *ring, uint32_t handle,
@@ -81,7 +83,9 @@ int amdgpu_uvd_get_destroy_msg(struct amdgpu_ring *ring, uint32_t handle,
bool direct, struct dma_fence **fence);
void amdgpu_uvd_free_handles(struct amdgpu_device *adev,
struct drm_file *filp);
-int amdgpu_uvd_ring_parse_cs(struct amdgpu_cs_parser *parser, uint32_t ib_idx);
+int amdgpu_uvd_ring_parse_cs(struct amdgpu_cs_parser *parser,
+ struct amdgpu_job *job,
+ struct amdgpu_ib *ib);
void amdgpu_uvd_ring_begin_use(struct amdgpu_ring *ring);
void amdgpu_uvd_ring_end_use(struct amdgpu_ring *ring);
int amdgpu_uvd_ring_test_ib(struct amdgpu_ring *ring, long timeout);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c
index ea6a62f67e38..a7d8f1ce6ac2 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c
@@ -29,16 +29,21 @@
#include <linux/module.h>
#include <drm/drm.h>
+#include <drm/drm_drv.h>
#include "amdgpu.h"
#include "amdgpu_pm.h"
#include "amdgpu_vce.h"
+#include "amdgpu_cs.h"
#include "cikd.h"
/* 1 second timeout */
#define VCE_IDLE_TIMEOUT msecs_to_jiffies(1000)
/* Firmware Names */
+#ifdef CONFIG_DRM_AMDGPU_SI
+#define FIRMWARE_VCE_V1_0 "amdgpu/vce_1_0_0.bin"
+#endif
#ifdef CONFIG_DRM_AMDGPU_CIK
#define FIRMWARE_BONAIRE "amdgpu/bonaire_vce.bin"
#define FIRMWARE_KABINI "amdgpu/kabini_vce.bin"
@@ -59,6 +64,9 @@
#define FIRMWARE_VEGA12 "amdgpu/vega12_vce.bin"
#define FIRMWARE_VEGA20 "amdgpu/vega20_vce.bin"
+#ifdef CONFIG_DRM_AMDGPU_SI
+MODULE_FIRMWARE(FIRMWARE_VCE_V1_0);
+#endif
#ifdef CONFIG_DRM_AMDGPU_CIK
MODULE_FIRMWARE(FIRMWARE_BONAIRE);
MODULE_FIRMWARE(FIRMWARE_KABINI);
@@ -81,96 +89,98 @@ MODULE_FIRMWARE(FIRMWARE_VEGA20);
static void amdgpu_vce_idle_work_handler(struct work_struct *work);
static int amdgpu_vce_get_create_msg(struct amdgpu_ring *ring, uint32_t handle,
- struct amdgpu_bo *bo,
struct dma_fence **fence);
static int amdgpu_vce_get_destroy_msg(struct amdgpu_ring *ring, uint32_t handle,
bool direct, struct dma_fence **fence);
/**
- * amdgpu_vce_init - allocate memory, load vce firmware
+ * amdgpu_vce_firmware_name() - determine the firmware file name for VCE
*
* @adev: amdgpu_device pointer
- * @size: size for the new BO
*
- * First step to get VCE online, allocate memory and load the firmware
+ * Each chip that has VCE IP may need a different firmware.
+ * This function returns the name of the VCE firmware file
+ * appropriate for the current chip.
*/
-int amdgpu_vce_sw_init(struct amdgpu_device *adev, unsigned long size)
+static const char *amdgpu_vce_firmware_name(struct amdgpu_device *adev)
{
- const char *fw_name;
- const struct common_firmware_header *hdr;
- unsigned ucode_version, version_major, version_minor, binary_id;
- int i, r;
-
switch (adev->asic_type) {
+#ifdef CONFIG_DRM_AMDGPU_SI
+ case CHIP_PITCAIRN:
+ case CHIP_TAHITI:
+ case CHIP_VERDE:
+ return FIRMWARE_VCE_V1_0;
+#endif
#ifdef CONFIG_DRM_AMDGPU_CIK
case CHIP_BONAIRE:
- fw_name = FIRMWARE_BONAIRE;
- break;
+ return FIRMWARE_BONAIRE;
case CHIP_KAVERI:
- fw_name = FIRMWARE_KAVERI;
- break;
+ return FIRMWARE_KAVERI;
case CHIP_KABINI:
- fw_name = FIRMWARE_KABINI;
- break;
+ return FIRMWARE_KABINI;
case CHIP_HAWAII:
- fw_name = FIRMWARE_HAWAII;
- break;
+ return FIRMWARE_HAWAII;
case CHIP_MULLINS:
- fw_name = FIRMWARE_MULLINS;
- break;
+ return FIRMWARE_MULLINS;
#endif
case CHIP_TONGA:
- fw_name = FIRMWARE_TONGA;
- break;
+ return FIRMWARE_TONGA;
case CHIP_CARRIZO:
- fw_name = FIRMWARE_CARRIZO;
- break;
+ return FIRMWARE_CARRIZO;
case CHIP_FIJI:
- fw_name = FIRMWARE_FIJI;
- break;
+ return FIRMWARE_FIJI;
case CHIP_STONEY:
- fw_name = FIRMWARE_STONEY;
- break;
+ return FIRMWARE_STONEY;
case CHIP_POLARIS10:
- fw_name = FIRMWARE_POLARIS10;
- break;
+ return FIRMWARE_POLARIS10;
case CHIP_POLARIS11:
- fw_name = FIRMWARE_POLARIS11;
- break;
+ return FIRMWARE_POLARIS11;
case CHIP_POLARIS12:
- fw_name = FIRMWARE_POLARIS12;
- break;
+ return FIRMWARE_POLARIS12;
case CHIP_VEGAM:
- fw_name = FIRMWARE_VEGAM;
- break;
+ return FIRMWARE_VEGAM;
case CHIP_VEGA10:
- fw_name = FIRMWARE_VEGA10;
- break;
+ return FIRMWARE_VEGA10;
case CHIP_VEGA12:
- fw_name = FIRMWARE_VEGA12;
- break;
+ return FIRMWARE_VEGA12;
case CHIP_VEGA20:
- fw_name = FIRMWARE_VEGA20;
- break;
+ return FIRMWARE_VEGA20;
default:
- return -EINVAL;
+ return NULL;
}
+}
- r = request_firmware(&adev->vce.fw, fw_name, adev->dev);
- if (r) {
- dev_err(adev->dev, "amdgpu_vce: Can't load firmware \"%s\"\n",
- fw_name);
- return r;
- }
+/**
+ * amdgpu_vce_early_init() - try to load VCE firmware
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * Tries to load the VCE firmware.
+ *
+ * When not found, returns ENOENT so that the driver can
+ * still load and initialize the rest of the IP blocks.
+ * The GPU can function just fine without VCE, they will just
+ * not support video encoding.
+ */
+int amdgpu_vce_early_init(struct amdgpu_device *adev)
+{
+ const char *fw_name = amdgpu_vce_firmware_name(adev);
+ const struct common_firmware_header *hdr;
+ unsigned int ucode_version, version_major, version_minor, binary_id;
+ int r;
- r = amdgpu_ucode_validate(adev->vce.fw);
+ if (!fw_name)
+ return -ENOENT;
+
+ r = amdgpu_ucode_request(adev, &adev->vce.fw, AMDGPU_UCODE_REQUIRED, "%s", fw_name);
if (r) {
- dev_err(adev->dev, "amdgpu_vce: Can't validate firmware \"%s\"\n",
- fw_name);
- release_firmware(adev->vce.fw);
- adev->vce.fw = NULL;
- return r;
+ dev_err(adev->dev,
+ "amdgpu_vce: Firmware \"%s\" not found or failed to validate (%d)\n",
+ fw_name, r);
+
+ amdgpu_ucode_release(&adev->vce.fw);
+ return -ENOENT;
}
hdr = (const struct common_firmware_header *)adev->vce.fw->data;
@@ -179,13 +189,39 @@ int amdgpu_vce_sw_init(struct amdgpu_device *adev, unsigned long size)
version_major = (ucode_version >> 20) & 0xfff;
version_minor = (ucode_version >> 8) & 0xfff;
binary_id = ucode_version & 0xff;
- DRM_INFO("Found VCE firmware Version: %d.%d Binary ID: %d\n",
+ dev_info(adev->dev, "Found VCE firmware Version: %d.%d Binary ID: %d\n",
version_major, version_minor, binary_id);
adev->vce.fw_version = ((version_major << 24) | (version_minor << 16) |
(binary_id << 8));
+ return 0;
+}
+
+/**
+ * amdgpu_vce_sw_init() - allocate memory for VCE BO
+ *
+ * @adev: amdgpu_device pointer
+ * @size: size for the new BO
+ *
+ * First step to get VCE online: allocate memory for VCE BO.
+ * The VCE firmware binary is copied into the VCE BO later,
+ * in amdgpu_vce_resume. The VCE executes its code from the
+ * VCE BO and also uses the space in this BO for its stack and data.
+ *
+ * Ideally this BO should be placed in VRAM for optimal performance,
+ * although technically it also runs from system RAM (albeit slowly).
+ */
+int amdgpu_vce_sw_init(struct amdgpu_device *adev, unsigned long size)
+{
+ int i, r;
+
+ if (!adev->vce.fw)
+ return -ENOENT;
+
r = amdgpu_bo_create_kernel(adev, size, PAGE_SIZE,
- AMDGPU_GEM_DOMAIN_VRAM, &adev->vce.vcpu_bo,
+ AMDGPU_GEM_DOMAIN_VRAM |
+ AMDGPU_GEM_DOMAIN_GTT,
+ &adev->vce.vcpu_bo,
&adev->vce.gpu_addr, &adev->vce.cpu_addr);
if (r) {
dev_err(adev->dev, "(%d) failed to allocate VCE bo\n", r);
@@ -204,7 +240,7 @@ int amdgpu_vce_sw_init(struct amdgpu_device *adev, unsigned long size)
}
/**
- * amdgpu_vce_fini - free memory
+ * amdgpu_vce_sw_fini - free memory
*
* @adev: amdgpu_device pointer
*
@@ -212,23 +248,22 @@ int amdgpu_vce_sw_init(struct amdgpu_device *adev, unsigned long size)
*/
int amdgpu_vce_sw_fini(struct amdgpu_device *adev)
{
- unsigned i;
+ unsigned int i;
if (adev->vce.vcpu_bo == NULL)
return 0;
- cancel_delayed_work_sync(&adev->vce.idle_work);
drm_sched_entity_destroy(&adev->vce.entity);
- amdgpu_bo_free_kernel(&adev->vce.vcpu_bo, &adev->vce.gpu_addr,
- (void **)&adev->vce.cpu_addr);
-
for (i = 0; i < adev->vce.num_rings; i++)
amdgpu_ring_fini(&adev->vce.ring[i]);
- release_firmware(adev->vce.fw);
+ amdgpu_ucode_release(&adev->vce.fw);
mutex_destroy(&adev->vce.idle_mutex);
+ amdgpu_bo_free_kernel(&adev->vce.vcpu_bo, &adev->vce.gpu_addr,
+ (void **)&adev->vce.cpu_addr);
+
return 0;
}
@@ -236,21 +271,22 @@ int amdgpu_vce_sw_fini(struct amdgpu_device *adev)
* amdgpu_vce_entity_init - init entity
*
* @adev: amdgpu_device pointer
+ * @ring: amdgpu_ring pointer to check
*
+ * Initialize the entity used for handle management in the kernel driver.
*/
-int amdgpu_vce_entity_init(struct amdgpu_device *adev)
+int amdgpu_vce_entity_init(struct amdgpu_device *adev, struct amdgpu_ring *ring)
{
- struct amdgpu_ring *ring;
- struct drm_gpu_scheduler *sched;
- int r;
-
- ring = &adev->vce.ring[0];
- sched = &ring->sched;
- r = drm_sched_entity_init(&adev->vce.entity, DRM_SCHED_PRIORITY_NORMAL,
- &sched, 1, NULL);
- if (r != 0) {
- DRM_ERROR("Failed setting up VCE run queue.\n");
- return r;
+ if (ring == &adev->vce.ring[0]) {
+ struct drm_gpu_scheduler *sched = &ring->sched;
+ int r;
+
+ r = drm_sched_entity_init(&adev->vce.entity, DRM_SCHED_PRIORITY_NORMAL,
+ &sched, 1, NULL);
+ if (r != 0) {
+ DRM_ERROR("Failed setting up VCE run queue.\n");
+ return r;
+ }
}
return 0;
@@ -290,35 +326,22 @@ int amdgpu_vce_suspend(struct amdgpu_device *adev)
*/
int amdgpu_vce_resume(struct amdgpu_device *adev)
{
- void *cpu_addr;
const struct common_firmware_header *hdr;
- unsigned offset;
- int r;
+ unsigned int offset;
+ int idx;
if (adev->vce.vcpu_bo == NULL)
return -EINVAL;
- r = amdgpu_bo_reserve(adev->vce.vcpu_bo, false);
- if (r) {
- dev_err(adev->dev, "(%d) failed to reserve VCE bo\n", r);
- return r;
- }
-
- r = amdgpu_bo_kmap(adev->vce.vcpu_bo, &cpu_addr);
- if (r) {
- amdgpu_bo_unreserve(adev->vce.vcpu_bo);
- dev_err(adev->dev, "(%d) VCE map failed\n", r);
- return r;
- }
-
hdr = (const struct common_firmware_header *)adev->vce.fw->data;
offset = le32_to_cpu(hdr->ucode_array_offset_bytes);
- memcpy_toio(cpu_addr, adev->vce.fw->data + offset,
- adev->vce.fw->size - offset);
-
- amdgpu_bo_kunmap(adev->vce.vcpu_bo);
- amdgpu_bo_unreserve(adev->vce.vcpu_bo);
+ if (drm_dev_enter(adev_to_drm(adev), &idx)) {
+ memset_io(adev->vce.cpu_addr, 0, amdgpu_bo_size(adev->vce.vcpu_bo));
+ memcpy_toio(adev->vce.cpu_addr, adev->vce.fw->data + offset,
+ adev->vce.fw->size - offset);
+ drm_dev_exit(idx);
+ }
return 0;
}
@@ -334,7 +357,7 @@ static void amdgpu_vce_idle_work_handler(struct work_struct *work)
{
struct amdgpu_device *adev =
container_of(work, struct amdgpu_device, vce.idle_work.work);
- unsigned i, count = 0;
+ unsigned int i, count = 0;
for (i = 0; i < adev->vce.num_rings; i++)
count += amdgpu_fence_count_emitted(&adev->vce.ring[i]);
@@ -411,6 +434,7 @@ void amdgpu_vce_free_handles(struct amdgpu_device *adev, struct drm_file *filp)
{
struct amdgpu_ring *ring = &adev->vce.ring[0];
int i, r;
+
for (i = 0; i < AMDGPU_MAX_VCE_HANDLES; ++i) {
uint32_t handle = atomic_read(&adev->vce.handles[i]);
@@ -427,34 +451,61 @@ void amdgpu_vce_free_handles(struct amdgpu_device *adev, struct drm_file *filp)
}
/**
+ * amdgpu_vce_required_gart_pages() - gets number of GART pages required by VCE
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * Returns how many GART pages we need before GTT for the VCE IP block.
+ * For VCE1, see vce_v1_0_ensure_vcpu_bo_32bit_addr for details.
+ * For VCE2+, this is not needed so return zero.
+ */
+u32 amdgpu_vce_required_gart_pages(struct amdgpu_device *adev)
+{
+ /* VCE IP block not added yet, so can't use amdgpu_ip_version */
+ if (adev->family == AMDGPU_FAMILY_SI)
+ return 512;
+
+ return 0;
+}
+
+/**
* amdgpu_vce_get_create_msg - generate a VCE create msg
*
* @ring: ring we should submit the msg to
* @handle: VCE session handle to use
- * @bo: amdgpu object for which we query the offset
* @fence: optional fence to return
*
* Open up a stream for HW test
*/
static int amdgpu_vce_get_create_msg(struct amdgpu_ring *ring, uint32_t handle,
- struct amdgpu_bo *bo,
struct dma_fence **fence)
{
- const unsigned ib_size_dw = 1024;
+ const unsigned int ib_size_dw = 1024;
struct amdgpu_job *job;
struct amdgpu_ib *ib;
+ struct amdgpu_ib ib_msg;
struct dma_fence *f = NULL;
uint64_t addr;
int i, r;
- r = amdgpu_job_alloc_with_ib(ring->adev, ib_size_dw * 4,
- AMDGPU_IB_POOL_DIRECT, &job);
+ r = amdgpu_job_alloc_with_ib(ring->adev, &ring->adev->vce.entity,
+ AMDGPU_FENCE_OWNER_UNDEFINED,
+ ib_size_dw * 4, AMDGPU_IB_POOL_DIRECT,
+ &job, AMDGPU_KERNEL_JOB_ID_VCN_RING_TEST);
if (r)
return r;
- ib = &job->ibs[0];
+ memset(&ib_msg, 0, sizeof(ib_msg));
+ /* only one gpu page is needed, alloc +1 page to make addr aligned. */
+ r = amdgpu_ib_get(ring->adev, NULL, AMDGPU_GPU_PAGE_SIZE * 2,
+ AMDGPU_IB_POOL_DIRECT,
+ &ib_msg);
+ if (r)
+ goto err;
- addr = amdgpu_bo_gpu_offset(bo);
+ ib = &job->ibs[0];
+ /* let addr point to page boundary */
+ addr = AMDGPU_GPU_PAGE_ALIGN(ib_msg.gpu_addr);
/* stitch together an VCE create msg */
ib->length_dw = 0;
@@ -494,6 +545,7 @@ static int amdgpu_vce_get_create_msg(struct amdgpu_ring *ring, uint32_t handle,
ib->ptr[i] = 0x0;
r = amdgpu_job_submit_direct(job, ring, &f);
+ amdgpu_ib_free(&ib_msg, f);
if (r)
goto err;
@@ -520,15 +572,18 @@ err:
static int amdgpu_vce_get_destroy_msg(struct amdgpu_ring *ring, uint32_t handle,
bool direct, struct dma_fence **fence)
{
- const unsigned ib_size_dw = 1024;
+ const unsigned int ib_size_dw = 1024;
struct amdgpu_job *job;
struct amdgpu_ib *ib;
struct dma_fence *f = NULL;
int i, r;
- r = amdgpu_job_alloc_with_ib(ring->adev, ib_size_dw * 4,
+ r = amdgpu_job_alloc_with_ib(ring->adev, &ring->adev->vce.entity,
+ AMDGPU_FENCE_OWNER_UNDEFINED,
+ ib_size_dw * 4,
direct ? AMDGPU_IB_POOL_DIRECT :
- AMDGPU_IB_POOL_DELAYED, &job);
+ AMDGPU_IB_POOL_DELAYED, &job,
+ AMDGPU_KERNEL_JOB_ID_VCN_RING_TEST);
if (r)
return r;
@@ -558,8 +613,7 @@ static int amdgpu_vce_get_destroy_msg(struct amdgpu_ring *ring, uint32_t handle,
if (direct)
r = amdgpu_job_submit_direct(job, ring, &f);
else
- r = amdgpu_job_submit(job, &ring->adev->vce.entity,
- AMDGPU_FENCE_OWNER_UNDEFINED, &f);
+ f = amdgpu_job_submit(job);
if (r)
goto err;
@@ -574,10 +628,10 @@ err:
}
/**
- * amdgpu_vce_cs_validate_bo - make sure not to cross 4GB boundary
+ * amdgpu_vce_validate_bo - make sure not to cross 4GB boundary
*
- * @p: parser context
- * @ib_idx: indirect buffer to use
+ * @p: cs parser
+ * @ib: indirect buffer to use
* @lo: address of lower dword
* @hi: address of higher dword
* @size: minimum size
@@ -585,19 +639,20 @@ err:
*
* Make sure that no BO cross a 4GB boundary.
*/
-static int amdgpu_vce_validate_bo(struct amdgpu_cs_parser *p, uint32_t ib_idx,
- int lo, int hi, unsigned size, int32_t index)
+static int amdgpu_vce_validate_bo(struct amdgpu_cs_parser *p,
+ struct amdgpu_ib *ib, int lo, int hi,
+ unsigned int size, int32_t index)
{
int64_t offset = ((uint64_t)size) * ((int64_t)index);
struct ttm_operation_ctx ctx = { false, false };
struct amdgpu_bo_va_mapping *mapping;
- unsigned i, fpfn, lpfn;
+ unsigned int i, fpfn, lpfn;
struct amdgpu_bo *bo;
uint64_t addr;
int r;
- addr = ((uint64_t)amdgpu_get_ib_value(p, ib_idx, lo)) |
- ((uint64_t)amdgpu_get_ib_value(p, ib_idx, hi)) << 32;
+ addr = ((uint64_t)amdgpu_ib_get_value(ib, lo)) |
+ ((uint64_t)amdgpu_ib_get_value(ib, hi)) << 32;
if (index >= 0) {
addr += offset;
fpfn = PAGE_ALIGN(offset) >> PAGE_SHIFT;
@@ -609,7 +664,7 @@ static int amdgpu_vce_validate_bo(struct amdgpu_cs_parser *p, uint32_t ib_idx,
r = amdgpu_cs_find_mapping(p, addr, &bo, &mapping);
if (r) {
- DRM_ERROR("Can't find BO for addr 0x%010Lx %d %d %d %d\n",
+ DRM_ERROR("Can't find BO for addr 0x%010llx %d %d %d %d\n",
addr, lo, hi, size, index);
return r;
}
@@ -627,7 +682,7 @@ static int amdgpu_vce_validate_bo(struct amdgpu_cs_parser *p, uint32_t ib_idx,
* amdgpu_vce_cs_reloc - command submission relocation
*
* @p: parser context
- * @ib_idx: indirect buffer to use
+ * @ib: indirect buffer to use
* @lo: address of lower dword
* @hi: address of higher dword
* @size: minimum size
@@ -635,8 +690,8 @@ static int amdgpu_vce_validate_bo(struct amdgpu_cs_parser *p, uint32_t ib_idx,
*
* Patch relocation inside command stream with real buffer address
*/
-static int amdgpu_vce_cs_reloc(struct amdgpu_cs_parser *p, uint32_t ib_idx,
- int lo, int hi, unsigned size, uint32_t index)
+static int amdgpu_vce_cs_reloc(struct amdgpu_cs_parser *p, struct amdgpu_ib *ib,
+ int lo, int hi, unsigned int size, uint32_t index)
{
struct amdgpu_bo_va_mapping *mapping;
struct amdgpu_bo *bo;
@@ -646,20 +701,20 @@ static int amdgpu_vce_cs_reloc(struct amdgpu_cs_parser *p, uint32_t ib_idx,
if (index == 0xffffffff)
index = 0;
- addr = ((uint64_t)amdgpu_get_ib_value(p, ib_idx, lo)) |
- ((uint64_t)amdgpu_get_ib_value(p, ib_idx, hi)) << 32;
+ addr = ((uint64_t)amdgpu_ib_get_value(ib, lo)) |
+ ((uint64_t)amdgpu_ib_get_value(ib, hi)) << 32;
addr += ((uint64_t)size) * ((uint64_t)index);
r = amdgpu_cs_find_mapping(p, addr, &bo, &mapping);
if (r) {
- DRM_ERROR("Can't find BO for addr 0x%010Lx %d %d %d %d\n",
+ DRM_ERROR("Can't find BO for addr 0x%010llx %d %d %d %d\n",
addr, lo, hi, size, index);
return r;
}
if ((addr + (uint64_t)size) >
(mapping->last + 1) * AMDGPU_GPU_PAGE_SIZE) {
- DRM_ERROR("BO too small for addr 0x%010Lx %d %d\n",
+ DRM_ERROR("BO too small for addr 0x%010llx %d %d\n",
addr, lo, hi);
return -EINVAL;
}
@@ -668,8 +723,8 @@ static int amdgpu_vce_cs_reloc(struct amdgpu_cs_parser *p, uint32_t ib_idx,
addr += amdgpu_bo_gpu_offset(bo);
addr -= ((uint64_t)size) * ((uint64_t)index);
- amdgpu_set_ib_value(p, ib_idx, lo, lower_32_bits(addr));
- amdgpu_set_ib_value(p, ib_idx, hi, upper_32_bits(addr));
+ amdgpu_ib_set_value(ib, lo, lower_32_bits(addr));
+ amdgpu_ib_set_value(ib, hi, upper_32_bits(addr));
return 0;
}
@@ -682,12 +737,12 @@ static int amdgpu_vce_cs_reloc(struct amdgpu_cs_parser *p, uint32_t ib_idx,
* @allocated: allocated a new handle?
*
* Validates the handle and return the found session index or -EINVAL
- * we we don't have another free session index.
+ * we don't have another free session index.
*/
static int amdgpu_vce_validate_handle(struct amdgpu_cs_parser *p,
uint32_t handle, uint32_t *allocated)
{
- unsigned i;
+ unsigned int i;
/* validate the handle */
for (i = 0; i < AMDGPU_MAX_VCE_HANDLES; ++i) {
@@ -715,30 +770,32 @@ static int amdgpu_vce_validate_handle(struct amdgpu_cs_parser *p,
}
/**
- * amdgpu_vce_cs_parse - parse and validate the command stream
+ * amdgpu_vce_ring_parse_cs - parse and validate the command stream
*
* @p: parser context
- * @ib_idx: indirect buffer to use
+ * @job: the job to parse
+ * @ib: the IB to patch
*/
-int amdgpu_vce_ring_parse_cs(struct amdgpu_cs_parser *p, uint32_t ib_idx)
+int amdgpu_vce_ring_parse_cs(struct amdgpu_cs_parser *p,
+ struct amdgpu_job *job,
+ struct amdgpu_ib *ib)
{
- struct amdgpu_ib *ib = &p->job->ibs[ib_idx];
- unsigned fb_idx = 0, bs_idx = 0;
+ unsigned int fb_idx = 0, bs_idx = 0;
int session_idx = -1;
uint32_t destroyed = 0;
uint32_t created = 0;
uint32_t allocated = 0;
uint32_t tmp, handle = 0;
- uint32_t *size = &tmp;
- unsigned idx;
+ uint32_t dummy = 0xffffffff;
+ uint32_t *size = &dummy;
+ unsigned int idx;
int i, r = 0;
- p->job->vm = NULL;
- ib->gpu_addr = amdgpu_sa_bo_gpu_addr(ib->sa_bo);
+ job->vm = NULL;
for (idx = 0; idx < ib->length_dw;) {
- uint32_t len = amdgpu_get_ib_value(p, ib_idx, idx);
- uint32_t cmd = amdgpu_get_ib_value(p, ib_idx, idx + 1);
+ uint32_t len = amdgpu_ib_get_value(ib, idx);
+ uint32_t cmd = amdgpu_ib_get_value(ib, idx + 1);
if ((len < 8) || (len & 3)) {
DRM_ERROR("invalid VCE command length (%d)!\n", len);
@@ -748,52 +805,52 @@ int amdgpu_vce_ring_parse_cs(struct amdgpu_cs_parser *p, uint32_t ib_idx)
switch (cmd) {
case 0x00000002: /* task info */
- fb_idx = amdgpu_get_ib_value(p, ib_idx, idx + 6);
- bs_idx = amdgpu_get_ib_value(p, ib_idx, idx + 7);
+ fb_idx = amdgpu_ib_get_value(ib, idx + 6);
+ bs_idx = amdgpu_ib_get_value(ib, idx + 7);
break;
case 0x03000001: /* encode */
- r = amdgpu_vce_validate_bo(p, ib_idx, idx + 10,
- idx + 9, 0, 0);
+ r = amdgpu_vce_validate_bo(p, ib, idx + 10, idx + 9,
+ 0, 0);
if (r)
goto out;
- r = amdgpu_vce_validate_bo(p, ib_idx, idx + 12,
- idx + 11, 0, 0);
+ r = amdgpu_vce_validate_bo(p, ib, idx + 12, idx + 11,
+ 0, 0);
if (r)
goto out;
break;
case 0x05000001: /* context buffer */
- r = amdgpu_vce_validate_bo(p, ib_idx, idx + 3,
- idx + 2, 0, 0);
+ r = amdgpu_vce_validate_bo(p, ib, idx + 3, idx + 2,
+ 0, 0);
if (r)
goto out;
break;
case 0x05000004: /* video bitstream buffer */
- tmp = amdgpu_get_ib_value(p, ib_idx, idx + 4);
- r = amdgpu_vce_validate_bo(p, ib_idx, idx + 3, idx + 2,
+ tmp = amdgpu_ib_get_value(ib, idx + 4);
+ r = amdgpu_vce_validate_bo(p, ib, idx + 3, idx + 2,
tmp, bs_idx);
if (r)
goto out;
break;
case 0x05000005: /* feedback buffer */
- r = amdgpu_vce_validate_bo(p, ib_idx, idx + 3, idx + 2,
+ r = amdgpu_vce_validate_bo(p, ib, idx + 3, idx + 2,
4096, fb_idx);
if (r)
goto out;
break;
case 0x0500000d: /* MV buffer */
- r = amdgpu_vce_validate_bo(p, ib_idx, idx + 3,
- idx + 2, 0, 0);
+ r = amdgpu_vce_validate_bo(p, ib, idx + 3, idx + 2,
+ 0, 0);
if (r)
goto out;
- r = amdgpu_vce_validate_bo(p, ib_idx, idx + 8,
- idx + 7, 0, 0);
+ r = amdgpu_vce_validate_bo(p, ib, idx + 8, idx + 7,
+ 0, 0);
if (r)
goto out;
break;
@@ -803,12 +860,12 @@ int amdgpu_vce_ring_parse_cs(struct amdgpu_cs_parser *p, uint32_t ib_idx)
}
for (idx = 0; idx < ib->length_dw;) {
- uint32_t len = amdgpu_get_ib_value(p, ib_idx, idx);
- uint32_t cmd = amdgpu_get_ib_value(p, ib_idx, idx + 1);
+ uint32_t len = amdgpu_ib_get_value(ib, idx);
+ uint32_t cmd = amdgpu_ib_get_value(ib, idx + 1);
switch (cmd) {
case 0x00000001: /* session */
- handle = amdgpu_get_ib_value(p, ib_idx, idx + 2);
+ handle = amdgpu_ib_get_value(ib, idx + 2);
session_idx = amdgpu_vce_validate_handle(p, handle,
&allocated);
if (session_idx < 0) {
@@ -819,8 +876,8 @@ int amdgpu_vce_ring_parse_cs(struct amdgpu_cs_parser *p, uint32_t ib_idx)
break;
case 0x00000002: /* task info */
- fb_idx = amdgpu_get_ib_value(p, ib_idx, idx + 6);
- bs_idx = amdgpu_get_ib_value(p, ib_idx, idx + 7);
+ fb_idx = amdgpu_ib_get_value(ib, idx + 6);
+ bs_idx = amdgpu_ib_get_value(ib, idx + 7);
break;
case 0x01000001: /* create */
@@ -835,8 +892,8 @@ int amdgpu_vce_ring_parse_cs(struct amdgpu_cs_parser *p, uint32_t ib_idx)
goto out;
}
- *size = amdgpu_get_ib_value(p, ib_idx, idx + 8) *
- amdgpu_get_ib_value(p, ib_idx, idx + 10) *
+ *size = amdgpu_ib_get_value(ib, idx + 8) *
+ amdgpu_ib_get_value(ib, idx + 10) *
8 * 3 / 2;
break;
@@ -865,12 +922,12 @@ int amdgpu_vce_ring_parse_cs(struct amdgpu_cs_parser *p, uint32_t ib_idx)
break;
case 0x03000001: /* encode */
- r = amdgpu_vce_cs_reloc(p, ib_idx, idx + 10, idx + 9,
+ r = amdgpu_vce_cs_reloc(p, ib, idx + 10, idx + 9,
*size, 0);
if (r)
goto out;
- r = amdgpu_vce_cs_reloc(p, ib_idx, idx + 12, idx + 11,
+ r = amdgpu_vce_cs_reloc(p, ib, idx + 12, idx + 11,
*size / 3, 0);
if (r)
goto out;
@@ -881,35 +938,35 @@ int amdgpu_vce_ring_parse_cs(struct amdgpu_cs_parser *p, uint32_t ib_idx)
break;
case 0x05000001: /* context buffer */
- r = amdgpu_vce_cs_reloc(p, ib_idx, idx + 3, idx + 2,
+ r = amdgpu_vce_cs_reloc(p, ib, idx + 3, idx + 2,
*size * 2, 0);
if (r)
goto out;
break;
case 0x05000004: /* video bitstream buffer */
- tmp = amdgpu_get_ib_value(p, ib_idx, idx + 4);
- r = amdgpu_vce_cs_reloc(p, ib_idx, idx + 3, idx + 2,
+ tmp = amdgpu_ib_get_value(ib, idx + 4);
+ r = amdgpu_vce_cs_reloc(p, ib, idx + 3, idx + 2,
tmp, bs_idx);
if (r)
goto out;
break;
case 0x05000005: /* feedback buffer */
- r = amdgpu_vce_cs_reloc(p, ib_idx, idx + 3, idx + 2,
+ r = amdgpu_vce_cs_reloc(p, ib, idx + 3, idx + 2,
4096, fb_idx);
if (r)
goto out;
break;
case 0x0500000d: /* MV buffer */
- r = amdgpu_vce_cs_reloc(p, ib_idx, idx + 3,
- idx + 2, *size, 0);
+ r = amdgpu_vce_cs_reloc(p, ib, idx + 3,
+ idx + 2, *size, 0);
if (r)
goto out;
- r = amdgpu_vce_cs_reloc(p, ib_idx, idx + 8,
- idx + 7, *size / 12, 0);
+ r = amdgpu_vce_cs_reloc(p, ib, idx + 8,
+ idx + 7, *size / 12, 0);
if (r)
goto out;
break;
@@ -951,14 +1008,16 @@ out:
}
/**
- * amdgpu_vce_cs_parse_vm - parse the command stream in VM mode
+ * amdgpu_vce_ring_parse_cs_vm - parse the command stream in VM mode
*
* @p: parser context
- * @ib_idx: indirect buffer to use
+ * @job: the job to parse
+ * @ib: the IB to patch
*/
-int amdgpu_vce_ring_parse_cs_vm(struct amdgpu_cs_parser *p, uint32_t ib_idx)
+int amdgpu_vce_ring_parse_cs_vm(struct amdgpu_cs_parser *p,
+ struct amdgpu_job *job,
+ struct amdgpu_ib *ib)
{
- struct amdgpu_ib *ib = &p->job->ibs[ib_idx];
int session_idx = -1;
uint32_t destroyed = 0;
uint32_t created = 0;
@@ -967,8 +1026,8 @@ int amdgpu_vce_ring_parse_cs_vm(struct amdgpu_cs_parser *p, uint32_t ib_idx)
int i, r = 0, idx = 0;
while (idx < ib->length_dw) {
- uint32_t len = amdgpu_get_ib_value(p, ib_idx, idx);
- uint32_t cmd = amdgpu_get_ib_value(p, ib_idx, idx + 1);
+ uint32_t len = amdgpu_ib_get_value(ib, idx);
+ uint32_t cmd = amdgpu_ib_get_value(ib, idx + 1);
if ((len < 8) || (len & 3)) {
DRM_ERROR("invalid VCE command length (%d)!\n", len);
@@ -978,7 +1037,7 @@ int amdgpu_vce_ring_parse_cs_vm(struct amdgpu_cs_parser *p, uint32_t ib_idx)
switch (cmd) {
case 0x00000001: /* session */
- handle = amdgpu_get_ib_value(p, ib_idx, idx + 2);
+ handle = amdgpu_ib_get_value(ib, idx + 2);
session_idx = amdgpu_vce_validate_handle(p, handle,
&allocated);
if (session_idx < 0) {
@@ -1027,7 +1086,6 @@ out:
if (!r) {
/* No error, free all destroyed handle slots */
tmp = destroyed;
- amdgpu_ib_free(p->adev, ib, NULL);
} else {
/* Error during parsing, free all allocated handle slots */
tmp = allocated;
@@ -1070,7 +1128,7 @@ void amdgpu_vce_ring_emit_ib(struct amdgpu_ring *ring,
*
*/
void amdgpu_vce_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, u64 seq,
- unsigned flags)
+ unsigned int flags)
{
WARN_ON(flags & AMDGPU_FENCE_FLAG_64BIT);
@@ -1092,7 +1150,7 @@ int amdgpu_vce_ring_test_ring(struct amdgpu_ring *ring)
{
struct amdgpu_device *adev = ring->adev;
uint32_t rptr;
- unsigned i;
+ unsigned int i;
int r, timeout = adev->usec_timeout;
/* skip ring test for sriov*/
@@ -1130,20 +1188,13 @@ int amdgpu_vce_ring_test_ring(struct amdgpu_ring *ring)
int amdgpu_vce_ring_test_ib(struct amdgpu_ring *ring, long timeout)
{
struct dma_fence *fence = NULL;
- struct amdgpu_bo *bo = NULL;
long r;
/* skip vce ring1/2 ib test for now, since it's not reliable */
if (ring != &ring->adev->vce.ring[0])
return 0;
- r = amdgpu_bo_create_reserved(ring->adev, 512, PAGE_SIZE,
- AMDGPU_GEM_DOMAIN_VRAM,
- &bo, NULL, NULL);
- if (r)
- return r;
-
- r = amdgpu_vce_get_create_msg(ring, 1, bo, NULL);
+ r = amdgpu_vce_get_create_msg(ring, 1, NULL);
if (r)
goto error;
@@ -1159,7 +1210,19 @@ int amdgpu_vce_ring_test_ib(struct amdgpu_ring *ring, long timeout)
error:
dma_fence_put(fence);
- amdgpu_bo_unreserve(bo);
- amdgpu_bo_free_kernel(&bo, NULL, NULL);
return r;
}
+
+enum amdgpu_ring_priority_level amdgpu_vce_get_ring_prio(int ring)
+{
+ switch (ring) {
+ case 0:
+ return AMDGPU_RING_PRIO_0;
+ case 1:
+ return AMDGPU_RING_PRIO_1;
+ case 2:
+ return AMDGPU_RING_PRIO_2;
+ default:
+ return AMDGPU_RING_PRIO_0;
+ }
+}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.h
index d6d83a3ec803..1c3464ce5037 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.h
@@ -51,16 +51,22 @@ struct amdgpu_vce {
struct drm_sched_entity entity;
uint32_t srbm_soft_reset;
unsigned num_rings;
+ uint32_t keyselect;
};
+int amdgpu_vce_early_init(struct amdgpu_device *adev);
int amdgpu_vce_sw_init(struct amdgpu_device *adev, unsigned long size);
int amdgpu_vce_sw_fini(struct amdgpu_device *adev);
-int amdgpu_vce_entity_init(struct amdgpu_device *adev);
+int amdgpu_vce_entity_init(struct amdgpu_device *adev, struct amdgpu_ring *ring);
int amdgpu_vce_suspend(struct amdgpu_device *adev);
int amdgpu_vce_resume(struct amdgpu_device *adev);
void amdgpu_vce_free_handles(struct amdgpu_device *adev, struct drm_file *filp);
-int amdgpu_vce_ring_parse_cs(struct amdgpu_cs_parser *p, uint32_t ib_idx);
-int amdgpu_vce_ring_parse_cs_vm(struct amdgpu_cs_parser *p, uint32_t ib_idx);
+u32 amdgpu_vce_required_gart_pages(struct amdgpu_device *adev);
+int amdgpu_vce_ring_parse_cs(struct amdgpu_cs_parser *p, struct amdgpu_job *job,
+ struct amdgpu_ib *ib);
+int amdgpu_vce_ring_parse_cs_vm(struct amdgpu_cs_parser *p,
+ struct amdgpu_job *job,
+ struct amdgpu_ib *ib);
void amdgpu_vce_ring_emit_ib(struct amdgpu_ring *ring, struct amdgpu_job *job,
struct amdgpu_ib *ib, uint32_t flags);
void amdgpu_vce_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, u64 seq,
@@ -71,5 +77,6 @@ void amdgpu_vce_ring_begin_use(struct amdgpu_ring *ring);
void amdgpu_vce_ring_end_use(struct amdgpu_ring *ring);
unsigned amdgpu_vce_ring_get_emit_ib_size(struct amdgpu_ring *ring);
unsigned amdgpu_vce_ring_get_dma_frame_size(struct amdgpu_ring *ring);
+enum amdgpu_ring_priority_level amdgpu_vce_get_ring_prio(int ring);
#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c
index 99b82f3c2617..5e0786ea911b 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c
@@ -1,5 +1,5 @@
/*
- * Copyright 2016 Advanced Micro Devices, Inc.
+ * Copyright 2016-2024 Advanced Micro Devices, Inc.
* All Rights Reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
@@ -26,7 +26,10 @@
#include <linux/firmware.h>
#include <linux/module.h>
+#include <linux/dmi.h>
#include <linux/pci.h>
+#include <linux/debugfs.h>
+#include <drm/drm_drv.h>
#include "amdgpu.h"
#include "amdgpu_pm.h"
@@ -34,19 +37,32 @@
#include "soc15d.h"
/* Firmware Names */
-#define FIRMWARE_RAVEN "amdgpu/raven_vcn.bin"
-#define FIRMWARE_PICASSO "amdgpu/picasso_vcn.bin"
-#define FIRMWARE_RAVEN2 "amdgpu/raven2_vcn.bin"
-#define FIRMWARE_ARCTURUS "amdgpu/arcturus_vcn.bin"
-#define FIRMWARE_RENOIR "amdgpu/renoir_vcn.bin"
-#define FIRMWARE_GREEN_SARDINE "amdgpu/green_sardine_vcn.bin"
-#define FIRMWARE_NAVI10 "amdgpu/navi10_vcn.bin"
-#define FIRMWARE_NAVI14 "amdgpu/navi14_vcn.bin"
-#define FIRMWARE_NAVI12 "amdgpu/navi12_vcn.bin"
-#define FIRMWARE_SIENNA_CICHLID "amdgpu/sienna_cichlid_vcn.bin"
-#define FIRMWARE_NAVY_FLOUNDER "amdgpu/navy_flounder_vcn.bin"
-#define FIRMWARE_VANGOGH "amdgpu/vangogh_vcn.bin"
+#define FIRMWARE_RAVEN "amdgpu/raven_vcn.bin"
+#define FIRMWARE_PICASSO "amdgpu/picasso_vcn.bin"
+#define FIRMWARE_RAVEN2 "amdgpu/raven2_vcn.bin"
+#define FIRMWARE_ARCTURUS "amdgpu/arcturus_vcn.bin"
+#define FIRMWARE_RENOIR "amdgpu/renoir_vcn.bin"
+#define FIRMWARE_GREEN_SARDINE "amdgpu/green_sardine_vcn.bin"
+#define FIRMWARE_NAVI10 "amdgpu/navi10_vcn.bin"
+#define FIRMWARE_NAVI14 "amdgpu/navi14_vcn.bin"
+#define FIRMWARE_NAVI12 "amdgpu/navi12_vcn.bin"
+#define FIRMWARE_SIENNA_CICHLID "amdgpu/sienna_cichlid_vcn.bin"
+#define FIRMWARE_NAVY_FLOUNDER "amdgpu/navy_flounder_vcn.bin"
+#define FIRMWARE_VANGOGH "amdgpu/vangogh_vcn.bin"
#define FIRMWARE_DIMGREY_CAVEFISH "amdgpu/dimgrey_cavefish_vcn.bin"
+#define FIRMWARE_ALDEBARAN "amdgpu/aldebaran_vcn.bin"
+#define FIRMWARE_BEIGE_GOBY "amdgpu/beige_goby_vcn.bin"
+#define FIRMWARE_YELLOW_CARP "amdgpu/yellow_carp_vcn.bin"
+#define FIRMWARE_VCN_3_1_2 "amdgpu/vcn_3_1_2.bin"
+#define FIRMWARE_VCN4_0_0 "amdgpu/vcn_4_0_0.bin"
+#define FIRMWARE_VCN4_0_2 "amdgpu/vcn_4_0_2.bin"
+#define FIRMWARE_VCN4_0_3 "amdgpu/vcn_4_0_3.bin"
+#define FIRMWARE_VCN4_0_4 "amdgpu/vcn_4_0_4.bin"
+#define FIRMWARE_VCN4_0_5 "amdgpu/vcn_4_0_5.bin"
+#define FIRMWARE_VCN4_0_6 "amdgpu/vcn_4_0_6.bin"
+#define FIRMWARE_VCN4_0_6_1 "amdgpu/vcn_4_0_6_1.bin"
+#define FIRMWARE_VCN5_0_0 "amdgpu/vcn_5_0_0.bin"
+#define FIRMWARE_VCN5_0_1 "amdgpu/vcn_5_0_1.bin"
MODULE_FIRMWARE(FIRMWARE_RAVEN);
MODULE_FIRMWARE(FIRMWARE_PICASSO);
@@ -54,6 +70,7 @@ MODULE_FIRMWARE(FIRMWARE_RAVEN2);
MODULE_FIRMWARE(FIRMWARE_ARCTURUS);
MODULE_FIRMWARE(FIRMWARE_RENOIR);
MODULE_FIRMWARE(FIRMWARE_GREEN_SARDINE);
+MODULE_FIRMWARE(FIRMWARE_ALDEBARAN);
MODULE_FIRMWARE(FIRMWARE_NAVI10);
MODULE_FIRMWARE(FIRMWARE_NAVI14);
MODULE_FIRMWARE(FIRMWARE_NAVI12);
@@ -61,109 +78,95 @@ MODULE_FIRMWARE(FIRMWARE_SIENNA_CICHLID);
MODULE_FIRMWARE(FIRMWARE_NAVY_FLOUNDER);
MODULE_FIRMWARE(FIRMWARE_VANGOGH);
MODULE_FIRMWARE(FIRMWARE_DIMGREY_CAVEFISH);
+MODULE_FIRMWARE(FIRMWARE_BEIGE_GOBY);
+MODULE_FIRMWARE(FIRMWARE_YELLOW_CARP);
+MODULE_FIRMWARE(FIRMWARE_VCN_3_1_2);
+MODULE_FIRMWARE(FIRMWARE_VCN4_0_0);
+MODULE_FIRMWARE(FIRMWARE_VCN4_0_2);
+MODULE_FIRMWARE(FIRMWARE_VCN4_0_3);
+MODULE_FIRMWARE(FIRMWARE_VCN4_0_4);
+MODULE_FIRMWARE(FIRMWARE_VCN4_0_5);
+MODULE_FIRMWARE(FIRMWARE_VCN4_0_6);
+MODULE_FIRMWARE(FIRMWARE_VCN4_0_6_1);
+MODULE_FIRMWARE(FIRMWARE_VCN5_0_0);
+MODULE_FIRMWARE(FIRMWARE_VCN5_0_1);
static void amdgpu_vcn_idle_work_handler(struct work_struct *work);
+static void amdgpu_vcn_reg_dump_fini(struct amdgpu_device *adev);
-int amdgpu_vcn_sw_init(struct amdgpu_device *adev)
+int amdgpu_vcn_early_init(struct amdgpu_device *adev, int i)
+{
+ char ucode_prefix[25];
+ int r;
+
+ adev->vcn.inst[i].adev = adev;
+ adev->vcn.inst[i].inst = i;
+ amdgpu_ucode_ip_version_decode(adev, UVD_HWIP, ucode_prefix, sizeof(ucode_prefix));
+
+ if (i != 0 && adev->vcn.per_inst_fw) {
+ r = amdgpu_ucode_request(adev, &adev->vcn.inst[i].fw,
+ AMDGPU_UCODE_REQUIRED,
+ "amdgpu/%s_%d.bin", ucode_prefix, i);
+ if (r)
+ amdgpu_ucode_release(&adev->vcn.inst[i].fw);
+ } else {
+ if (!adev->vcn.inst[0].fw) {
+ r = amdgpu_ucode_request(adev, &adev->vcn.inst[0].fw,
+ AMDGPU_UCODE_REQUIRED,
+ "amdgpu/%s.bin", ucode_prefix);
+ if (r)
+ amdgpu_ucode_release(&adev->vcn.inst[0].fw);
+ } else {
+ r = 0;
+ }
+ adev->vcn.inst[i].fw = adev->vcn.inst[0].fw;
+ }
+
+ return r;
+}
+
+int amdgpu_vcn_sw_init(struct amdgpu_device *adev, int i)
{
unsigned long bo_size;
- const char *fw_name;
const struct common_firmware_header *hdr;
unsigned char fw_check;
- int i, r;
-
- INIT_DELAYED_WORK(&adev->vcn.idle_work, amdgpu_vcn_idle_work_handler);
- mutex_init(&adev->vcn.vcn_pg_lock);
- mutex_init(&adev->vcn.vcn1_jpeg1_workaround);
- atomic_set(&adev->vcn.total_submission_cnt, 0);
- for (i = 0; i < adev->vcn.num_vcn_inst; i++)
- atomic_set(&adev->vcn.inst[i].dpg_enc_submission_cnt, 0);
-
- switch (adev->asic_type) {
- case CHIP_RAVEN:
- if (adev->apu_flags & AMD_APU_IS_RAVEN2)
- fw_name = FIRMWARE_RAVEN2;
- else if (adev->apu_flags & AMD_APU_IS_PICASSO)
- fw_name = FIRMWARE_PICASSO;
- else
- fw_name = FIRMWARE_RAVEN;
- break;
- case CHIP_ARCTURUS:
- fw_name = FIRMWARE_ARCTURUS;
- if ((adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) &&
- (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG))
- adev->vcn.indirect_sram = true;
- break;
- case CHIP_RENOIR:
- if (adev->apu_flags & AMD_APU_IS_RENOIR)
- fw_name = FIRMWARE_RENOIR;
- else
- fw_name = FIRMWARE_GREEN_SARDINE;
-
- if ((adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) &&
- (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG))
- adev->vcn.indirect_sram = true;
- break;
- case CHIP_NAVI10:
- fw_name = FIRMWARE_NAVI10;
- if ((adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) &&
- (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG))
- adev->vcn.indirect_sram = true;
- break;
- case CHIP_NAVI14:
- fw_name = FIRMWARE_NAVI14;
- if ((adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) &&
- (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG))
- adev->vcn.indirect_sram = true;
- break;
- case CHIP_NAVI12:
- fw_name = FIRMWARE_NAVI12;
- if ((adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) &&
- (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG))
- adev->vcn.indirect_sram = true;
- break;
- case CHIP_SIENNA_CICHLID:
- fw_name = FIRMWARE_SIENNA_CICHLID;
- if ((adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) &&
- (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG))
- adev->vcn.indirect_sram = true;
- break;
- case CHIP_NAVY_FLOUNDER:
- fw_name = FIRMWARE_NAVY_FLOUNDER;
- if ((adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) &&
- (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG))
- adev->vcn.indirect_sram = true;
- break;
- case CHIP_VANGOGH:
- fw_name = FIRMWARE_VANGOGH;
- break;
- case CHIP_DIMGREY_CAVEFISH:
- fw_name = FIRMWARE_DIMGREY_CAVEFISH;
- if ((adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) &&
- (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG))
- adev->vcn.indirect_sram = true;
- break;
- default:
- return -EINVAL;
- }
+ unsigned int fw_shared_size, log_offset;
+ int r;
- r = request_firmware(&adev->vcn.fw, fw_name, adev->dev);
- if (r) {
- dev_err(adev->dev, "amdgpu_vcn: Can't load firmware \"%s\"\n",
- fw_name);
- return r;
+ mutex_init(&adev->vcn.inst[i].vcn1_jpeg1_workaround);
+ mutex_init(&adev->vcn.inst[i].vcn_pg_lock);
+ mutex_init(&adev->vcn.inst[i].engine_reset_mutex);
+ atomic_set(&adev->vcn.inst[i].total_submission_cnt, 0);
+ INIT_DELAYED_WORK(&adev->vcn.inst[i].idle_work, amdgpu_vcn_idle_work_handler);
+ atomic_set(&adev->vcn.inst[i].dpg_enc_submission_cnt, 0);
+ if ((adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) &&
+ (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG))
+ adev->vcn.inst[i].indirect_sram = true;
+
+ /*
+ * Some Steam Deck's BIOS versions are incompatible with the
+ * indirect SRAM mode, leading to amdgpu being unable to get
+ * properly probed (and even potentially crashing the kernel).
+ * Hence, check for these versions here - notice this is
+ * restricted to Vangogh (Deck's APU).
+ */
+ if (amdgpu_ip_version(adev, UVD_HWIP, 0) == IP_VERSION(3, 0, 2)) {
+ const char *bios_ver = dmi_get_system_info(DMI_BIOS_VERSION);
+
+ if (bios_ver && (!strncmp("F7A0113", bios_ver, 7) ||
+ !strncmp("F7A0114", bios_ver, 7))) {
+ adev->vcn.inst[i].indirect_sram = false;
+ dev_info(adev->dev,
+ "Steam Deck quirk: indirect SRAM disabled on BIOS %s\n", bios_ver);
+ }
}
- r = amdgpu_ucode_validate(adev->vcn.fw);
- if (r) {
- dev_err(adev->dev, "amdgpu_vcn: Can't validate firmware \"%s\"\n",
- fw_name);
- release_firmware(adev->vcn.fw);
- adev->vcn.fw = NULL;
- return r;
- }
+ /* from vcn4 and above, only unified queue is used */
+ adev->vcn.inst[i].using_unified_queue =
+ amdgpu_ip_version(adev, UVD_HWIP, 0) >= IP_VERSION(4, 0, 0);
- hdr = (const struct common_firmware_header *)adev->vcn.fw->data;
+ hdr = (const struct common_firmware_header *)adev->vcn.inst[i].fw->data;
+ adev->vcn.inst[i].fw_version = le32_to_cpu(hdr->ucode_version);
adev->vcn.fw_version = le32_to_cpu(hdr->ucode_version);
/* Bit 20-23, it is encode major and non-zero for new naming convention.
@@ -181,255 +184,378 @@ int amdgpu_vcn_sw_init(struct amdgpu_device *adev)
enc_major = fw_check;
dec_ver = (le32_to_cpu(hdr->ucode_version) >> 24) & 0xf;
vep = (le32_to_cpu(hdr->ucode_version) >> 28) & 0xf;
- DRM_INFO("Found VCN firmware Version ENC: %u.%u DEC: %u VEP: %u Revision: %u\n",
- enc_major, enc_minor, dec_ver, vep, fw_rev);
+ dev_info(adev->dev,
+ "[VCN instance %d] Found VCN firmware Version ENC: %u.%u DEC: %u VEP: %u Revision: %u\n",
+ i, enc_major, enc_minor, dec_ver, vep, fw_rev);
} else {
unsigned int version_major, version_minor, family_id;
family_id = le32_to_cpu(hdr->ucode_version) & 0xff;
version_major = (le32_to_cpu(hdr->ucode_version) >> 24) & 0xff;
version_minor = (le32_to_cpu(hdr->ucode_version) >> 8) & 0xff;
- DRM_INFO("Found VCN firmware Version: %u.%u Family ID: %u\n",
- version_major, version_minor, family_id);
+ dev_info(adev->dev, "[VCN instance %d] Found VCN firmware Version: %u.%u Family ID: %u\n",
+ i, version_major, version_minor, family_id);
}
bo_size = AMDGPU_VCN_STACK_SIZE + AMDGPU_VCN_CONTEXT_SIZE;
if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP)
bo_size += AMDGPU_GPU_PAGE_ALIGN(le32_to_cpu(hdr->ucode_size_bytes) + 8);
- bo_size += AMDGPU_GPU_PAGE_ALIGN(sizeof(struct amdgpu_fw_shared));
- for (i = 0; i < adev->vcn.num_vcn_inst; i++) {
- if (adev->vcn.harvest_config & (1 << i))
- continue;
+ if (amdgpu_ip_version(adev, UVD_HWIP, 0) >= IP_VERSION(5, 0, 0)) {
+ fw_shared_size = AMDGPU_GPU_PAGE_ALIGN(sizeof(struct amdgpu_vcn5_fw_shared));
+ log_offset = offsetof(struct amdgpu_vcn5_fw_shared, fw_log);
+ } else if (amdgpu_ip_version(adev, UVD_HWIP, 0) >= IP_VERSION(4, 0, 0)) {
+ fw_shared_size = AMDGPU_GPU_PAGE_ALIGN(sizeof(struct amdgpu_vcn4_fw_shared));
+ log_offset = offsetof(struct amdgpu_vcn4_fw_shared, fw_log);
+ } else {
+ fw_shared_size = AMDGPU_GPU_PAGE_ALIGN(sizeof(struct amdgpu_fw_shared));
+ log_offset = offsetof(struct amdgpu_fw_shared, fw_log);
+ }
- r = amdgpu_bo_create_kernel(adev, bo_size, PAGE_SIZE,
- AMDGPU_GEM_DOMAIN_VRAM, &adev->vcn.inst[i].vcpu_bo,
- &adev->vcn.inst[i].gpu_addr, &adev->vcn.inst[i].cpu_addr);
+ bo_size += fw_shared_size;
+
+ if (amdgpu_vcnfw_log)
+ bo_size += AMDGPU_VCNFW_LOG_SIZE;
+
+ r = amdgpu_bo_create_kernel(adev, bo_size, PAGE_SIZE,
+ AMDGPU_GEM_DOMAIN_VRAM |
+ AMDGPU_GEM_DOMAIN_GTT,
+ &adev->vcn.inst[i].vcpu_bo,
+ &adev->vcn.inst[i].gpu_addr,
+ &adev->vcn.inst[i].cpu_addr);
+ if (r) {
+ dev_err(adev->dev, "(%d) failed to allocate vcn bo\n", r);
+ return r;
+ }
+
+ adev->vcn.inst[i].fw_shared.cpu_addr = adev->vcn.inst[i].cpu_addr +
+ bo_size - fw_shared_size;
+ adev->vcn.inst[i].fw_shared.gpu_addr = adev->vcn.inst[i].gpu_addr +
+ bo_size - fw_shared_size;
+
+ adev->vcn.inst[i].fw_shared.mem_size = fw_shared_size;
+
+ if (amdgpu_vcnfw_log) {
+ adev->vcn.inst[i].fw_shared.cpu_addr -= AMDGPU_VCNFW_LOG_SIZE;
+ adev->vcn.inst[i].fw_shared.gpu_addr -= AMDGPU_VCNFW_LOG_SIZE;
+ adev->vcn.inst[i].fw_shared.log_offset = log_offset;
+ }
+
+ if (adev->vcn.inst[i].indirect_sram) {
+ r = amdgpu_bo_create_kernel(adev, 64 * 2 * 4, PAGE_SIZE,
+ AMDGPU_GEM_DOMAIN_VRAM |
+ AMDGPU_GEM_DOMAIN_GTT,
+ &adev->vcn.inst[i].dpg_sram_bo,
+ &adev->vcn.inst[i].dpg_sram_gpu_addr,
+ &adev->vcn.inst[i].dpg_sram_cpu_addr);
if (r) {
- dev_err(adev->dev, "(%d) failed to allocate vcn bo\n", r);
+ dev_err(adev->dev, "VCN %d (%d) failed to allocate DPG bo\n", i, r);
return r;
}
-
- adev->vcn.inst[i].fw_shared_cpu_addr = adev->vcn.inst[i].cpu_addr +
- bo_size - AMDGPU_GPU_PAGE_ALIGN(sizeof(struct amdgpu_fw_shared));
- adev->vcn.inst[i].fw_shared_gpu_addr = adev->vcn.inst[i].gpu_addr +
- bo_size - AMDGPU_GPU_PAGE_ALIGN(sizeof(struct amdgpu_fw_shared));
-
- if (adev->vcn.indirect_sram) {
- r = amdgpu_bo_create_kernel(adev, 64 * 2 * 4, PAGE_SIZE,
- AMDGPU_GEM_DOMAIN_VRAM, &adev->vcn.inst[i].dpg_sram_bo,
- &adev->vcn.inst[i].dpg_sram_gpu_addr, &adev->vcn.inst[i].dpg_sram_cpu_addr);
- if (r) {
- dev_err(adev->dev, "VCN %d (%d) failed to allocate DPG bo\n", i, r);
- return r;
- }
- }
}
return 0;
}
-int amdgpu_vcn_sw_fini(struct amdgpu_device *adev)
+void amdgpu_vcn_sw_fini(struct amdgpu_device *adev, int i)
{
- int i, j;
+ int j;
- cancel_delayed_work_sync(&adev->vcn.idle_work);
+ if (adev->vcn.harvest_config & (1 << i))
+ return;
- for (j = 0; j < adev->vcn.num_vcn_inst; ++j) {
- if (adev->vcn.harvest_config & (1 << j))
- continue;
+ amdgpu_bo_free_kernel(
+ &adev->vcn.inst[i].dpg_sram_bo,
+ &adev->vcn.inst[i].dpg_sram_gpu_addr,
+ (void **)&adev->vcn.inst[i].dpg_sram_cpu_addr);
- if (adev->vcn.indirect_sram) {
- amdgpu_bo_free_kernel(&adev->vcn.inst[j].dpg_sram_bo,
- &adev->vcn.inst[j].dpg_sram_gpu_addr,
- (void **)&adev->vcn.inst[j].dpg_sram_cpu_addr);
- }
- kvfree(adev->vcn.inst[j].saved_bo);
+ kvfree(adev->vcn.inst[i].saved_bo);
+
+ amdgpu_bo_free_kernel(&adev->vcn.inst[i].vcpu_bo,
+ &adev->vcn.inst[i].gpu_addr,
+ (void **)&adev->vcn.inst[i].cpu_addr);
- amdgpu_bo_free_kernel(&adev->vcn.inst[j].vcpu_bo,
- &adev->vcn.inst[j].gpu_addr,
- (void **)&adev->vcn.inst[j].cpu_addr);
+ amdgpu_ring_fini(&adev->vcn.inst[i].ring_dec);
- amdgpu_ring_fini(&adev->vcn.inst[j].ring_dec);
+ for (j = 0; j < adev->vcn.inst[i].num_enc_rings; ++j)
+ amdgpu_ring_fini(&adev->vcn.inst[i].ring_enc[j]);
- for (i = 0; i < adev->vcn.num_enc_rings; ++i)
- amdgpu_ring_fini(&adev->vcn.inst[j].ring_enc[i]);
+ if (adev->vcn.per_inst_fw) {
+ amdgpu_ucode_release(&adev->vcn.inst[i].fw);
+ } else {
+ amdgpu_ucode_release(&adev->vcn.inst[0].fw);
+ adev->vcn.inst[i].fw = NULL;
}
- release_firmware(adev->vcn.fw);
- mutex_destroy(&adev->vcn.vcn1_jpeg1_workaround);
- mutex_destroy(&adev->vcn.vcn_pg_lock);
+ if (adev->vcn.reg_list)
+ amdgpu_vcn_reg_dump_fini(adev);
- return 0;
+ mutex_destroy(&adev->vcn.inst[i].vcn_pg_lock);
+ mutex_destroy(&adev->vcn.inst[i].vcn1_jpeg1_workaround);
}
-int amdgpu_vcn_suspend(struct amdgpu_device *adev)
+bool amdgpu_vcn_is_disabled_vcn(struct amdgpu_device *adev, enum vcn_ring_type type, uint32_t vcn_instance)
{
- unsigned size;
- void *ptr;
- int i;
+ bool ret = false;
+ int vcn_config = adev->vcn.inst[vcn_instance].vcn_config;
- cancel_delayed_work_sync(&adev->vcn.idle_work);
+ if ((type == VCN_ENCODE_RING) && (vcn_config & VCN_BLOCK_ENCODE_DISABLE_MASK))
+ ret = true;
+ else if ((type == VCN_DECODE_RING) && (vcn_config & VCN_BLOCK_DECODE_DISABLE_MASK))
+ ret = true;
+ else if ((type == VCN_UNIFIED_RING) && (vcn_config & VCN_BLOCK_QUEUE_DISABLE_MASK))
+ ret = true;
- for (i = 0; i < adev->vcn.num_vcn_inst; ++i) {
- if (adev->vcn.harvest_config & (1 << i))
- continue;
- if (adev->vcn.inst[i].vcpu_bo == NULL)
- return 0;
+ return ret;
+}
- size = amdgpu_bo_size(adev->vcn.inst[i].vcpu_bo);
- ptr = adev->vcn.inst[i].cpu_addr;
+static int amdgpu_vcn_save_vcpu_bo_inst(struct amdgpu_device *adev, int i)
+{
+ unsigned int size;
+ void *ptr;
+ int idx;
- adev->vcn.inst[i].saved_bo = kvmalloc(size, GFP_KERNEL);
- if (!adev->vcn.inst[i].saved_bo)
- return -ENOMEM;
+ if (adev->vcn.harvest_config & (1 << i))
+ return 0;
+ if (adev->vcn.inst[i].vcpu_bo == NULL)
+ return 0;
+
+ size = amdgpu_bo_size(adev->vcn.inst[i].vcpu_bo);
+ ptr = adev->vcn.inst[i].cpu_addr;
+ adev->vcn.inst[i].saved_bo = kvmalloc(size, GFP_KERNEL);
+ if (!adev->vcn.inst[i].saved_bo)
+ return -ENOMEM;
+
+ if (drm_dev_enter(adev_to_drm(adev), &idx)) {
memcpy_fromio(adev->vcn.inst[i].saved_bo, ptr, size);
+ drm_dev_exit(idx);
}
+
return 0;
}
-int amdgpu_vcn_resume(struct amdgpu_device *adev)
+int amdgpu_vcn_save_vcpu_bo(struct amdgpu_device *adev)
{
- unsigned size;
- void *ptr;
- int i;
+ int ret, i;
for (i = 0; i < adev->vcn.num_vcn_inst; ++i) {
- if (adev->vcn.harvest_config & (1 << i))
- continue;
- if (adev->vcn.inst[i].vcpu_bo == NULL)
- return -EINVAL;
+ ret = amdgpu_vcn_save_vcpu_bo_inst(adev, i);
+ if (ret)
+ return ret;
+ }
+
+ return 0;
+}
- size = amdgpu_bo_size(adev->vcn.inst[i].vcpu_bo);
- ptr = adev->vcn.inst[i].cpu_addr;
+int amdgpu_vcn_suspend(struct amdgpu_device *adev, int i)
+{
+ bool in_ras_intr = amdgpu_ras_intr_triggered();
- if (adev->vcn.inst[i].saved_bo != NULL) {
- memcpy_toio(ptr, adev->vcn.inst[i].saved_bo, size);
- kvfree(adev->vcn.inst[i].saved_bo);
- adev->vcn.inst[i].saved_bo = NULL;
- } else {
- const struct common_firmware_header *hdr;
- unsigned offset;
+ if (adev->vcn.harvest_config & (1 << i))
+ return 0;
+
+ /* err_event_athub and dpc recovery will corrupt VCPU buffer, so we need to
+ * restore fw data and clear buffer in amdgpu_vcn_resume() */
+ if (in_ras_intr || adev->pcie_reset_ctx.in_link_reset)
+ return 0;
+
+ return amdgpu_vcn_save_vcpu_bo_inst(adev, i);
+}
+
+int amdgpu_vcn_resume(struct amdgpu_device *adev, int i)
+{
+ unsigned int size;
+ void *ptr;
+ int idx;
- hdr = (const struct common_firmware_header *)adev->vcn.fw->data;
- if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) {
- offset = le32_to_cpu(hdr->ucode_array_offset_bytes);
- memcpy_toio(adev->vcn.inst[i].cpu_addr, adev->vcn.fw->data + offset,
+ if (adev->vcn.harvest_config & (1 << i))
+ return 0;
+ if (adev->vcn.inst[i].vcpu_bo == NULL)
+ return -EINVAL;
+
+ size = amdgpu_bo_size(adev->vcn.inst[i].vcpu_bo);
+ ptr = adev->vcn.inst[i].cpu_addr;
+
+ if (adev->vcn.inst[i].saved_bo != NULL) {
+ if (drm_dev_enter(adev_to_drm(adev), &idx)) {
+ memcpy_toio(ptr, adev->vcn.inst[i].saved_bo, size);
+ drm_dev_exit(idx);
+ }
+ kvfree(adev->vcn.inst[i].saved_bo);
+ adev->vcn.inst[i].saved_bo = NULL;
+ } else {
+ const struct common_firmware_header *hdr;
+ unsigned int offset;
+
+ hdr = (const struct common_firmware_header *)adev->vcn.inst[i].fw->data;
+ if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) {
+ offset = le32_to_cpu(hdr->ucode_array_offset_bytes);
+ if (drm_dev_enter(adev_to_drm(adev), &idx)) {
+ memcpy_toio(adev->vcn.inst[i].cpu_addr,
+ adev->vcn.inst[i].fw->data + offset,
le32_to_cpu(hdr->ucode_size_bytes));
- size -= le32_to_cpu(hdr->ucode_size_bytes);
- ptr += le32_to_cpu(hdr->ucode_size_bytes);
+ drm_dev_exit(idx);
}
- memset_io(ptr, 0, size);
+ size -= le32_to_cpu(hdr->ucode_size_bytes);
+ ptr += le32_to_cpu(hdr->ucode_size_bytes);
}
+ memset_io(ptr, 0, size);
}
+
return 0;
}
-static void amdgpu_vcn_idle_work_handler(struct work_struct *work)
+void amdgpu_vcn_get_profile(struct amdgpu_device *adev)
{
- struct amdgpu_device *adev =
- container_of(work, struct amdgpu_device, vcn.idle_work.work);
- unsigned int fences = 0, fence[AMDGPU_MAX_VCN_INSTANCES] = {0};
- unsigned int i, j;
- int r = 0;
+ int r;
- for (j = 0; j < adev->vcn.num_vcn_inst; ++j) {
- if (adev->vcn.harvest_config & (1 << j))
- continue;
+ mutex_lock(&adev->vcn.workload_profile_mutex);
+
+ if (adev->vcn.workload_profile_active) {
+ mutex_unlock(&adev->vcn.workload_profile_mutex);
+ return;
+ }
+ r = amdgpu_dpm_switch_power_profile(adev, PP_SMC_POWER_PROFILE_VIDEO,
+ true);
+ if (r)
+ dev_warn(adev->dev,
+ "(%d) failed to enable video power profile mode\n", r);
+ else
+ adev->vcn.workload_profile_active = true;
+ mutex_unlock(&adev->vcn.workload_profile_mutex);
+}
- for (i = 0; i < adev->vcn.num_enc_rings; ++i) {
- fence[j] += amdgpu_fence_count_emitted(&adev->vcn.inst[j].ring_enc[i]);
+void amdgpu_vcn_put_profile(struct amdgpu_device *adev)
+{
+ bool pg = true;
+ int r, i;
+
+ mutex_lock(&adev->vcn.workload_profile_mutex);
+ for (i = 0; i < adev->vcn.num_vcn_inst; i++) {
+ if (adev->vcn.inst[i].cur_state != AMD_PG_STATE_GATE) {
+ pg = false;
+ break;
}
+ }
- if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG) {
- struct dpg_pause_state new_state;
+ if (pg) {
+ r = amdgpu_dpm_switch_power_profile(
+ adev, PP_SMC_POWER_PROFILE_VIDEO, false);
+ if (r)
+ dev_warn(
+ adev->dev,
+ "(%d) failed to disable video power profile mode\n",
+ r);
+ else
+ adev->vcn.workload_profile_active = false;
+ }
- if (fence[j] ||
- unlikely(atomic_read(&adev->vcn.inst[j].dpg_enc_submission_cnt)))
- new_state.fw_based = VCN_DPG_STATE__PAUSE;
- else
- new_state.fw_based = VCN_DPG_STATE__UNPAUSE;
+ mutex_unlock(&adev->vcn.workload_profile_mutex);
+}
- adev->vcn.pause_dpg_mode(adev, j, &new_state);
- }
+static void amdgpu_vcn_idle_work_handler(struct work_struct *work)
+{
+ struct amdgpu_vcn_inst *vcn_inst =
+ container_of(work, struct amdgpu_vcn_inst, idle_work.work);
+ struct amdgpu_device *adev = vcn_inst->adev;
+ unsigned int fences = 0, fence[AMDGPU_MAX_VCN_INSTANCES] = {0};
+ unsigned int i = vcn_inst->inst, j;
+
+ if (adev->vcn.harvest_config & (1 << i))
+ return;
- fence[j] += amdgpu_fence_count_emitted(&adev->vcn.inst[j].ring_dec);
- fences += fence[j];
+ for (j = 0; j < adev->vcn.inst[i].num_enc_rings; ++j)
+ fence[i] += amdgpu_fence_count_emitted(&vcn_inst->ring_enc[j]);
+
+ /* Only set DPG pause for VCN3 or below, VCN4 and above will be handled by FW */
+ if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG &&
+ !adev->vcn.inst[i].using_unified_queue) {
+ struct dpg_pause_state new_state;
+
+ if (fence[i] ||
+ unlikely(atomic_read(&vcn_inst->dpg_enc_submission_cnt)))
+ new_state.fw_based = VCN_DPG_STATE__PAUSE;
+ else
+ new_state.fw_based = VCN_DPG_STATE__UNPAUSE;
+
+ adev->vcn.inst[i].pause_dpg_mode(vcn_inst, &new_state);
}
- if (!fences && !atomic_read(&adev->vcn.total_submission_cnt)) {
- amdgpu_gfx_off_ctrl(adev, true);
- amdgpu_device_ip_set_powergating_state(adev, AMD_IP_BLOCK_TYPE_VCN,
- AMD_PG_STATE_GATE);
- r = amdgpu_dpm_switch_power_profile(adev, PP_SMC_POWER_PROFILE_VIDEO,
- false);
- if (r)
- dev_warn(adev->dev, "(%d) failed to disable video power profile mode\n", r);
+ fence[i] += amdgpu_fence_count_emitted(&vcn_inst->ring_dec);
+ fences += fence[i];
+
+ if (!fences && !atomic_read(&vcn_inst->total_submission_cnt)) {
+ mutex_lock(&vcn_inst->vcn_pg_lock);
+ vcn_inst->set_pg_state(vcn_inst, AMD_PG_STATE_GATE);
+ mutex_unlock(&vcn_inst->vcn_pg_lock);
+ amdgpu_vcn_put_profile(adev);
+
} else {
- schedule_delayed_work(&adev->vcn.idle_work, VCN_IDLE_TIMEOUT);
+ schedule_delayed_work(&vcn_inst->idle_work, VCN_IDLE_TIMEOUT);
}
}
void amdgpu_vcn_ring_begin_use(struct amdgpu_ring *ring)
{
struct amdgpu_device *adev = ring->adev;
- int r = 0;
+ struct amdgpu_vcn_inst *vcn_inst = &adev->vcn.inst[ring->me];
- atomic_inc(&adev->vcn.total_submission_cnt);
+ atomic_inc(&vcn_inst->total_submission_cnt);
- if (!cancel_delayed_work_sync(&adev->vcn.idle_work)) {
- amdgpu_gfx_off_ctrl(adev, false);
- r = amdgpu_dpm_switch_power_profile(adev, PP_SMC_POWER_PROFILE_VIDEO,
- true);
- if (r)
- dev_warn(adev->dev, "(%d) failed to switch to video power profile mode\n", r);
- }
+ cancel_delayed_work_sync(&vcn_inst->idle_work);
- mutex_lock(&adev->vcn.vcn_pg_lock);
- amdgpu_device_ip_set_powergating_state(adev, AMD_IP_BLOCK_TYPE_VCN,
- AMD_PG_STATE_UNGATE);
+ mutex_lock(&vcn_inst->vcn_pg_lock);
+ vcn_inst->set_pg_state(vcn_inst, AMD_PG_STATE_UNGATE);
- if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG) {
+ /* Only set DPG pause for VCN3 or below, VCN4 and above will be handled by FW */
+ if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG &&
+ !vcn_inst->using_unified_queue) {
struct dpg_pause_state new_state;
if (ring->funcs->type == AMDGPU_RING_TYPE_VCN_ENC) {
- atomic_inc(&adev->vcn.inst[ring->me].dpg_enc_submission_cnt);
+ atomic_inc(&vcn_inst->dpg_enc_submission_cnt);
new_state.fw_based = VCN_DPG_STATE__PAUSE;
} else {
unsigned int fences = 0;
unsigned int i;
- for (i = 0; i < adev->vcn.num_enc_rings; ++i)
- fences += amdgpu_fence_count_emitted(&adev->vcn.inst[ring->me].ring_enc[i]);
+ for (i = 0; i < vcn_inst->num_enc_rings; ++i)
+ fences += amdgpu_fence_count_emitted(&vcn_inst->ring_enc[i]);
- if (fences || atomic_read(&adev->vcn.inst[ring->me].dpg_enc_submission_cnt))
+ if (fences || atomic_read(&vcn_inst->dpg_enc_submission_cnt))
new_state.fw_based = VCN_DPG_STATE__PAUSE;
else
new_state.fw_based = VCN_DPG_STATE__UNPAUSE;
}
- adev->vcn.pause_dpg_mode(adev, ring->me, &new_state);
+ vcn_inst->pause_dpg_mode(vcn_inst, &new_state);
}
- mutex_unlock(&adev->vcn.vcn_pg_lock);
+ mutex_unlock(&vcn_inst->vcn_pg_lock);
+ amdgpu_vcn_get_profile(adev);
}
void amdgpu_vcn_ring_end_use(struct amdgpu_ring *ring)
{
+ struct amdgpu_device *adev = ring->adev;
+
+ /* Only set DPG pause for VCN3 or below, VCN4 and above will be handled by FW */
if (ring->adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG &&
- ring->funcs->type == AMDGPU_RING_TYPE_VCN_ENC)
+ ring->funcs->type == AMDGPU_RING_TYPE_VCN_ENC &&
+ !adev->vcn.inst[ring->me].using_unified_queue)
atomic_dec(&ring->adev->vcn.inst[ring->me].dpg_enc_submission_cnt);
- atomic_dec(&ring->adev->vcn.total_submission_cnt);
+ atomic_dec(&ring->adev->vcn.inst[ring->me].total_submission_cnt);
- schedule_delayed_work(&ring->adev->vcn.idle_work, VCN_IDLE_TIMEOUT);
+ schedule_delayed_work(&ring->adev->vcn.inst[ring->me].idle_work,
+ VCN_IDLE_TIMEOUT);
}
int amdgpu_vcn_dec_ring_test_ring(struct amdgpu_ring *ring)
{
struct amdgpu_device *adev = ring->adev;
uint32_t tmp = 0;
- unsigned i;
+ unsigned int i;
int r;
/* VCN in SRIOV does not support direct register read/write */
@@ -440,7 +566,7 @@ int amdgpu_vcn_dec_ring_test_ring(struct amdgpu_ring *ring)
r = amdgpu_ring_alloc(ring, 3);
if (r)
return r;
- amdgpu_ring_write(ring, PACKET0(adev->vcn.internal.scratch9, 0));
+ amdgpu_ring_write(ring, PACKET0(adev->vcn.inst[ring->me].internal.scratch9, 0));
amdgpu_ring_write(ring, 0xDEADBEEF);
amdgpu_ring_commit(ring);
for (i = 0; i < adev->usec_timeout; i++) {
@@ -488,33 +614,31 @@ int amdgpu_vcn_dec_sw_ring_test_ring(struct amdgpu_ring *ring)
}
static int amdgpu_vcn_dec_send_msg(struct amdgpu_ring *ring,
- struct amdgpu_bo *bo,
+ struct amdgpu_ib *ib_msg,
struct dma_fence **fence)
{
+ u64 addr = AMDGPU_GPU_PAGE_ALIGN(ib_msg->gpu_addr);
struct amdgpu_device *adev = ring->adev;
struct dma_fence *f = NULL;
struct amdgpu_job *job;
struct amdgpu_ib *ib;
- uint64_t addr;
- void *msg = NULL;
int i, r;
- r = amdgpu_job_alloc_with_ib(adev, 64,
- AMDGPU_IB_POOL_DIRECT, &job);
+ r = amdgpu_job_alloc_with_ib(ring->adev, NULL, NULL,
+ 64, AMDGPU_IB_POOL_DIRECT,
+ &job, AMDGPU_KERNEL_JOB_ID_VCN_RING_TEST);
if (r)
goto err;
ib = &job->ibs[0];
- addr = amdgpu_bo_gpu_offset(bo);
- msg = amdgpu_bo_kptr(bo);
- ib->ptr[0] = PACKET0(adev->vcn.internal.data0, 0);
+ ib->ptr[0] = PACKET0(adev->vcn.inst[ring->me].internal.data0, 0);
ib->ptr[1] = addr;
- ib->ptr[2] = PACKET0(adev->vcn.internal.data1, 0);
+ ib->ptr[2] = PACKET0(adev->vcn.inst[ring->me].internal.data1, 0);
ib->ptr[3] = addr >> 32;
- ib->ptr[4] = PACKET0(adev->vcn.internal.cmd, 0);
+ ib->ptr[4] = PACKET0(adev->vcn.inst[ring->me].internal.cmd, 0);
ib->ptr[5] = 0;
for (i = 6; i < 16; i += 2) {
- ib->ptr[i] = PACKET0(adev->vcn.internal.nop, 0);
+ ib->ptr[i] = PACKET0(adev->vcn.inst[ring->me].internal.nop, 0);
ib->ptr[i+1] = 0;
}
ib->length_dw = 16;
@@ -523,9 +647,7 @@ static int amdgpu_vcn_dec_send_msg(struct amdgpu_ring *ring,
if (r)
goto err_free;
- amdgpu_bo_fence(bo, f, false);
- amdgpu_bo_unreserve(bo);
- amdgpu_bo_free_kernel(&bo, NULL, (void **)&msg);
+ amdgpu_ib_free(ib_msg, f);
if (fence)
*fence = dma_fence_get(f);
@@ -535,27 +657,26 @@ static int amdgpu_vcn_dec_send_msg(struct amdgpu_ring *ring,
err_free:
amdgpu_job_free(job);
-
err:
- amdgpu_bo_unreserve(bo);
- amdgpu_bo_free_kernel(&bo, NULL, (void **)&msg);
+ amdgpu_ib_free(ib_msg, f);
return r;
}
static int amdgpu_vcn_dec_get_create_msg(struct amdgpu_ring *ring, uint32_t handle,
- struct amdgpu_bo **bo)
+ struct amdgpu_ib *ib)
{
struct amdgpu_device *adev = ring->adev;
uint32_t *msg;
int r, i;
- *bo = NULL;
- r = amdgpu_bo_create_reserved(adev, 1024, PAGE_SIZE,
- AMDGPU_GEM_DOMAIN_VRAM,
- bo, NULL, (void **)&msg);
+ memset(ib, 0, sizeof(*ib));
+ r = amdgpu_ib_get(adev, NULL, AMDGPU_GPU_PAGE_SIZE * 2,
+ AMDGPU_IB_POOL_DIRECT,
+ ib);
if (r)
return r;
+ msg = (uint32_t *)AMDGPU_GPU_PAGE_ALIGN((unsigned long)ib->ptr);
msg[0] = cpu_to_le32(0x00000028);
msg[1] = cpu_to_le32(0x00000038);
msg[2] = cpu_to_le32(0x00000001);
@@ -577,19 +698,20 @@ static int amdgpu_vcn_dec_get_create_msg(struct amdgpu_ring *ring, uint32_t hand
}
static int amdgpu_vcn_dec_get_destroy_msg(struct amdgpu_ring *ring, uint32_t handle,
- struct amdgpu_bo **bo)
+ struct amdgpu_ib *ib)
{
struct amdgpu_device *adev = ring->adev;
uint32_t *msg;
int r, i;
- *bo = NULL;
- r = amdgpu_bo_create_reserved(adev, 1024, PAGE_SIZE,
- AMDGPU_GEM_DOMAIN_VRAM,
- bo, NULL, (void **)&msg);
+ memset(ib, 0, sizeof(*ib));
+ r = amdgpu_ib_get(adev, NULL, AMDGPU_GPU_PAGE_SIZE * 2,
+ AMDGPU_IB_POOL_DIRECT,
+ ib);
if (r)
return r;
+ msg = (uint32_t *)AMDGPU_GPU_PAGE_ALIGN((unsigned long)ib->ptr);
msg[0] = cpu_to_le32(0x00000028);
msg[1] = cpu_to_le32(0x00000018);
msg[2] = cpu_to_le32(0x00000000);
@@ -605,21 +727,21 @@ static int amdgpu_vcn_dec_get_destroy_msg(struct amdgpu_ring *ring, uint32_t han
int amdgpu_vcn_dec_ring_test_ib(struct amdgpu_ring *ring, long timeout)
{
struct dma_fence *fence = NULL;
- struct amdgpu_bo *bo;
+ struct amdgpu_ib ib;
long r;
- r = amdgpu_vcn_dec_get_create_msg(ring, 1, &bo);
+ r = amdgpu_vcn_dec_get_create_msg(ring, 1, &ib);
if (r)
goto error;
- r = amdgpu_vcn_dec_send_msg(ring, bo, NULL);
+ r = amdgpu_vcn_dec_send_msg(ring, &ib, NULL);
if (r)
goto error;
- r = amdgpu_vcn_dec_get_destroy_msg(ring, 1, &bo);
+ r = amdgpu_vcn_dec_get_destroy_msg(ring, 1, &ib);
if (r)
goto error;
- r = amdgpu_vcn_dec_send_msg(ring, bo, &fence);
+ r = amdgpu_vcn_dec_send_msg(ring, &ib, &fence);
if (r)
goto error;
@@ -634,28 +756,70 @@ error:
return r;
}
+static uint32_t *amdgpu_vcn_unified_ring_ib_header(struct amdgpu_ib *ib,
+ uint32_t ib_pack_in_dw, bool enc)
+{
+ uint32_t *ib_checksum;
+
+ ib->ptr[ib->length_dw++] = 0x00000010; /* single queue checksum */
+ ib->ptr[ib->length_dw++] = 0x30000002;
+ ib_checksum = &ib->ptr[ib->length_dw++];
+ ib->ptr[ib->length_dw++] = ib_pack_in_dw;
+
+ ib->ptr[ib->length_dw++] = 0x00000010; /* engine info */
+ ib->ptr[ib->length_dw++] = 0x30000001;
+ ib->ptr[ib->length_dw++] = enc ? 0x2 : 0x3;
+ ib->ptr[ib->length_dw++] = ib_pack_in_dw * sizeof(uint32_t);
+
+ return ib_checksum;
+}
+
+static void amdgpu_vcn_unified_ring_ib_checksum(uint32_t **ib_checksum,
+ uint32_t ib_pack_in_dw)
+{
+ uint32_t i;
+ uint32_t checksum = 0;
+
+ for (i = 0; i < ib_pack_in_dw; i++)
+ checksum += *(*ib_checksum + 2 + i);
+
+ **ib_checksum = checksum;
+}
+
static int amdgpu_vcn_dec_sw_send_msg(struct amdgpu_ring *ring,
- struct amdgpu_bo *bo,
- struct dma_fence **fence)
+ struct amdgpu_ib *ib_msg,
+ struct dma_fence **fence)
{
struct amdgpu_vcn_decode_buffer *decode_buffer = NULL;
- const unsigned int ib_size_dw = 64;
+ unsigned int ib_size_dw = 64;
struct amdgpu_device *adev = ring->adev;
struct dma_fence *f = NULL;
struct amdgpu_job *job;
struct amdgpu_ib *ib;
- uint64_t addr;
+ uint64_t addr = AMDGPU_GPU_PAGE_ALIGN(ib_msg->gpu_addr);
+ uint32_t *ib_checksum;
+ uint32_t ib_pack_in_dw;
int i, r;
- r = amdgpu_job_alloc_with_ib(adev, ib_size_dw * 4,
- AMDGPU_IB_POOL_DIRECT, &job);
+ if (adev->vcn.inst[ring->me].using_unified_queue)
+ ib_size_dw += 8;
+
+ r = amdgpu_job_alloc_with_ib(ring->adev, NULL, NULL,
+ ib_size_dw * 4, AMDGPU_IB_POOL_DIRECT,
+ &job, AMDGPU_KERNEL_JOB_ID_VCN_RING_TEST);
if (r)
goto err;
ib = &job->ibs[0];
- addr = amdgpu_bo_gpu_offset(bo);
ib->length_dw = 0;
+ /* single queue headers */
+ if (adev->vcn.inst[ring->me].using_unified_queue) {
+ ib_pack_in_dw = sizeof(struct amdgpu_vcn_decode_buffer) / sizeof(uint32_t)
+ + 4 + 2; /* engine info + decoding ib in dw */
+ ib_checksum = amdgpu_vcn_unified_ring_ib_header(ib, ib_pack_in_dw, false);
+ }
+
ib->ptr[ib->length_dw++] = sizeof(struct amdgpu_vcn_decode_buffer) + 8;
ib->ptr[ib->length_dw++] = cpu_to_le32(AMDGPU_VCN_IB_FLAG_DECODE_BUFFER);
decode_buffer = (struct amdgpu_vcn_decode_buffer *)&(ib->ptr[ib->length_dw]);
@@ -669,13 +833,14 @@ static int amdgpu_vcn_dec_sw_send_msg(struct amdgpu_ring *ring,
for (i = ib->length_dw; i < ib_size_dw; ++i)
ib->ptr[i] = 0x0;
+ if (adev->vcn.inst[ring->me].using_unified_queue)
+ amdgpu_vcn_unified_ring_ib_checksum(&ib_checksum, ib_pack_in_dw);
+
r = amdgpu_job_submit_direct(job, ring, &f);
if (r)
goto err_free;
- amdgpu_bo_fence(bo, f, false);
- amdgpu_bo_unreserve(bo);
- amdgpu_bo_unref(&bo);
+ amdgpu_ib_free(ib_msg, f);
if (fence)
*fence = dma_fence_get(f);
@@ -685,31 +850,29 @@ static int amdgpu_vcn_dec_sw_send_msg(struct amdgpu_ring *ring,
err_free:
amdgpu_job_free(job);
-
err:
- amdgpu_bo_unreserve(bo);
- amdgpu_bo_unref(&bo);
+ amdgpu_ib_free(ib_msg, f);
return r;
}
int amdgpu_vcn_dec_sw_ring_test_ib(struct amdgpu_ring *ring, long timeout)
{
struct dma_fence *fence = NULL;
- struct amdgpu_bo *bo;
+ struct amdgpu_ib ib;
long r;
- r = amdgpu_vcn_dec_get_create_msg(ring, 1, &bo);
+ r = amdgpu_vcn_dec_get_create_msg(ring, 1, &ib);
if (r)
goto error;
- r = amdgpu_vcn_dec_sw_send_msg(ring, bo, NULL);
+ r = amdgpu_vcn_dec_sw_send_msg(ring, &ib, NULL);
if (r)
goto error;
- r = amdgpu_vcn_dec_get_destroy_msg(ring, 1, &bo);
+ r = amdgpu_vcn_dec_get_destroy_msg(ring, 1, &ib);
if (r)
goto error;
- r = amdgpu_vcn_dec_sw_send_msg(ring, bo, &fence);
+ r = amdgpu_vcn_dec_sw_send_msg(ring, &ib, &fence);
if (r)
goto error;
@@ -728,7 +891,7 @@ int amdgpu_vcn_enc_ring_test_ring(struct amdgpu_ring *ring)
{
struct amdgpu_device *adev = ring->adev;
uint32_t rptr;
- unsigned i;
+ unsigned int i;
int r;
if (amdgpu_sriov_vf(adev))
@@ -756,31 +919,41 @@ int amdgpu_vcn_enc_ring_test_ring(struct amdgpu_ring *ring)
}
static int amdgpu_vcn_enc_get_create_msg(struct amdgpu_ring *ring, uint32_t handle,
- struct amdgpu_bo *bo,
+ struct amdgpu_ib *ib_msg,
struct dma_fence **fence)
{
- const unsigned ib_size_dw = 16;
+ unsigned int ib_size_dw = 16;
+ struct amdgpu_device *adev = ring->adev;
struct amdgpu_job *job;
struct amdgpu_ib *ib;
struct dma_fence *f = NULL;
+ uint32_t *ib_checksum = NULL;
uint64_t addr;
int i, r;
- r = amdgpu_job_alloc_with_ib(ring->adev, ib_size_dw * 4,
- AMDGPU_IB_POOL_DIRECT, &job);
+ if (adev->vcn.inst[ring->me].using_unified_queue)
+ ib_size_dw += 8;
+
+ r = amdgpu_job_alloc_with_ib(ring->adev, NULL, NULL,
+ ib_size_dw * 4, AMDGPU_IB_POOL_DIRECT,
+ &job, AMDGPU_KERNEL_JOB_ID_VCN_RING_TEST);
if (r)
return r;
ib = &job->ibs[0];
- addr = amdgpu_bo_gpu_offset(bo);
+ addr = AMDGPU_GPU_PAGE_ALIGN(ib_msg->gpu_addr);
ib->length_dw = 0;
+
+ if (adev->vcn.inst[ring->me].using_unified_queue)
+ ib_checksum = amdgpu_vcn_unified_ring_ib_header(ib, 0x11, true);
+
ib->ptr[ib->length_dw++] = 0x00000018;
ib->ptr[ib->length_dw++] = 0x00000001; /* session info */
ib->ptr[ib->length_dw++] = handle;
ib->ptr[ib->length_dw++] = upper_32_bits(addr);
ib->ptr[ib->length_dw++] = addr;
- ib->ptr[ib->length_dw++] = 0x0000000b;
+ ib->ptr[ib->length_dw++] = 0x00000000;
ib->ptr[ib->length_dw++] = 0x00000014;
ib->ptr[ib->length_dw++] = 0x00000002; /* task info */
@@ -794,6 +967,9 @@ static int amdgpu_vcn_enc_get_create_msg(struct amdgpu_ring *ring, uint32_t hand
for (i = ib->length_dw; i < ib_size_dw; ++i)
ib->ptr[i] = 0x0;
+ if (adev->vcn.inst[ring->me].using_unified_queue)
+ amdgpu_vcn_unified_ring_ib_checksum(&ib_checksum, 0x11);
+
r = amdgpu_job_submit_direct(job, ring, &f);
if (r)
goto err;
@@ -810,31 +986,41 @@ err:
}
static int amdgpu_vcn_enc_get_destroy_msg(struct amdgpu_ring *ring, uint32_t handle,
- struct amdgpu_bo *bo,
+ struct amdgpu_ib *ib_msg,
struct dma_fence **fence)
{
- const unsigned ib_size_dw = 16;
+ unsigned int ib_size_dw = 16;
+ struct amdgpu_device *adev = ring->adev;
struct amdgpu_job *job;
struct amdgpu_ib *ib;
struct dma_fence *f = NULL;
+ uint32_t *ib_checksum = NULL;
uint64_t addr;
int i, r;
- r = amdgpu_job_alloc_with_ib(ring->adev, ib_size_dw * 4,
- AMDGPU_IB_POOL_DIRECT, &job);
+ if (adev->vcn.inst[ring->me].using_unified_queue)
+ ib_size_dw += 8;
+
+ r = amdgpu_job_alloc_with_ib(ring->adev, NULL, NULL,
+ ib_size_dw * 4, AMDGPU_IB_POOL_DIRECT,
+ &job, AMDGPU_KERNEL_JOB_ID_VCN_RING_TEST);
if (r)
return r;
ib = &job->ibs[0];
- addr = amdgpu_bo_gpu_offset(bo);
+ addr = AMDGPU_GPU_PAGE_ALIGN(ib_msg->gpu_addr);
ib->length_dw = 0;
+
+ if (adev->vcn.inst[ring->me].using_unified_queue)
+ ib_checksum = amdgpu_vcn_unified_ring_ib_header(ib, 0x11, true);
+
ib->ptr[ib->length_dw++] = 0x00000018;
ib->ptr[ib->length_dw++] = 0x00000001;
ib->ptr[ib->length_dw++] = handle;
ib->ptr[ib->length_dw++] = upper_32_bits(addr);
ib->ptr[ib->length_dw++] = addr;
- ib->ptr[ib->length_dw++] = 0x0000000b;
+ ib->ptr[ib->length_dw++] = 0x00000000;
ib->ptr[ib->length_dw++] = 0x00000014;
ib->ptr[ib->length_dw++] = 0x00000002;
@@ -848,6 +1034,9 @@ static int amdgpu_vcn_enc_get_destroy_msg(struct amdgpu_ring *ring, uint32_t han
for (i = ib->length_dw; i < ib_size_dw; ++i)
ib->ptr[i] = 0x0;
+ if (adev->vcn.inst[ring->me].using_unified_queue)
+ amdgpu_vcn_unified_ring_ib_checksum(&ib_checksum, 0x11);
+
r = amdgpu_job_submit_direct(job, ring, &f);
if (r)
goto err;
@@ -865,21 +1054,23 @@ err:
int amdgpu_vcn_enc_ring_test_ib(struct amdgpu_ring *ring, long timeout)
{
+ struct amdgpu_device *adev = ring->adev;
struct dma_fence *fence = NULL;
- struct amdgpu_bo *bo = NULL;
+ struct amdgpu_ib ib;
long r;
- r = amdgpu_bo_create_reserved(ring->adev, 128 * 1024, PAGE_SIZE,
- AMDGPU_GEM_DOMAIN_VRAM,
- &bo, NULL, NULL);
+ memset(&ib, 0, sizeof(ib));
+ r = amdgpu_ib_get(adev, NULL, (128 << 10) + AMDGPU_GPU_PAGE_SIZE,
+ AMDGPU_IB_POOL_DIRECT,
+ &ib);
if (r)
return r;
- r = amdgpu_vcn_enc_get_create_msg(ring, 1, bo, NULL);
+ r = amdgpu_vcn_enc_get_create_msg(ring, 1, &ib, NULL);
if (r)
goto error;
- r = amdgpu_vcn_enc_get_destroy_msg(ring, 1, bo, &fence);
+ r = amdgpu_vcn_enc_get_destroy_msg(ring, 1, &ib, &fence);
if (r)
goto error;
@@ -890,9 +1081,557 @@ int amdgpu_vcn_enc_ring_test_ib(struct amdgpu_ring *ring, long timeout)
r = 0;
error:
+ amdgpu_ib_free(&ib, fence);
dma_fence_put(fence);
- amdgpu_bo_unreserve(bo);
- amdgpu_bo_free_kernel(&bo, NULL, NULL);
return r;
}
+
+int amdgpu_vcn_unified_ring_test_ib(struct amdgpu_ring *ring, long timeout)
+{
+ struct amdgpu_device *adev = ring->adev;
+ long r;
+
+ if ((amdgpu_ip_version(adev, UVD_HWIP, 0) != IP_VERSION(4, 0, 3)) &&
+ (amdgpu_ip_version(adev, UVD_HWIP, 0) != IP_VERSION(5, 0, 1))) {
+ r = amdgpu_vcn_enc_ring_test_ib(ring, timeout);
+ if (r)
+ goto error;
+ }
+
+ r = amdgpu_vcn_dec_sw_ring_test_ib(ring, timeout);
+
+error:
+ return r;
+}
+
+enum amdgpu_ring_priority_level amdgpu_vcn_get_enc_ring_prio(int ring)
+{
+ switch (ring) {
+ case 0:
+ return AMDGPU_RING_PRIO_0;
+ case 1:
+ return AMDGPU_RING_PRIO_1;
+ case 2:
+ return AMDGPU_RING_PRIO_2;
+ default:
+ return AMDGPU_RING_PRIO_0;
+ }
+}
+
+void amdgpu_vcn_setup_ucode(struct amdgpu_device *adev, int i)
+{
+ unsigned int idx;
+
+ if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
+ const struct common_firmware_header *hdr;
+
+ if (adev->vcn.harvest_config & (1 << i))
+ return;
+
+ if ((amdgpu_ip_version(adev, UVD_HWIP, 0) == IP_VERSION(4, 0, 3) ||
+ amdgpu_ip_version(adev, UVD_HWIP, 0) == IP_VERSION(5, 0, 1))
+ && (i > 0))
+ return;
+
+ hdr = (const struct common_firmware_header *)adev->vcn.inst[i].fw->data;
+ /* currently only support 2 FW instances */
+ if (i >= 2) {
+ dev_info(adev->dev, "More then 2 VCN FW instances!\n");
+ return;
+ }
+ idx = AMDGPU_UCODE_ID_VCN + i;
+ adev->firmware.ucode[idx].ucode_id = idx;
+ adev->firmware.ucode[idx].fw = adev->vcn.inst[i].fw;
+ adev->firmware.fw_size +=
+ ALIGN(le32_to_cpu(hdr->ucode_size_bytes), PAGE_SIZE);
+ }
+}
+
+/*
+ * debugfs for mapping vcn firmware log buffer.
+ */
+#if defined(CONFIG_DEBUG_FS)
+static ssize_t amdgpu_debugfs_vcn_fwlog_read(struct file *f, char __user *buf,
+ size_t size, loff_t *pos)
+{
+ struct amdgpu_vcn_inst *vcn;
+ void *log_buf;
+ struct amdgpu_vcn_fwlog *plog;
+ unsigned int read_pos, write_pos, available, i, read_bytes = 0;
+ unsigned int read_num[2] = {0};
+
+ vcn = file_inode(f)->i_private;
+ if (!vcn)
+ return -ENODEV;
+
+ if (!vcn->fw_shared.cpu_addr || !amdgpu_vcnfw_log)
+ return -EFAULT;
+
+ log_buf = vcn->fw_shared.cpu_addr + vcn->fw_shared.mem_size;
+
+ plog = (struct amdgpu_vcn_fwlog *)log_buf;
+ read_pos = plog->rptr;
+ write_pos = plog->wptr;
+
+ if (read_pos > AMDGPU_VCNFW_LOG_SIZE || write_pos > AMDGPU_VCNFW_LOG_SIZE)
+ return -EFAULT;
+
+ if (!size || (read_pos == write_pos))
+ return 0;
+
+ if (write_pos > read_pos) {
+ available = write_pos - read_pos;
+ read_num[0] = min_t(size_t, size, available);
+ } else {
+ read_num[0] = AMDGPU_VCNFW_LOG_SIZE - read_pos;
+ available = read_num[0] + write_pos - plog->header_size;
+ if (size > available)
+ read_num[1] = write_pos - plog->header_size;
+ else if (size > read_num[0])
+ read_num[1] = size - read_num[0];
+ else
+ read_num[0] = size;
+ }
+
+ for (i = 0; i < 2; i++) {
+ if (read_num[i]) {
+ if (read_pos == AMDGPU_VCNFW_LOG_SIZE)
+ read_pos = plog->header_size;
+ if (read_num[i] == copy_to_user((buf + read_bytes),
+ (log_buf + read_pos), read_num[i]))
+ return -EFAULT;
+
+ read_bytes += read_num[i];
+ read_pos += read_num[i];
+ }
+ }
+
+ plog->rptr = read_pos;
+ *pos += read_bytes;
+ return read_bytes;
+}
+
+static const struct file_operations amdgpu_debugfs_vcnfwlog_fops = {
+ .owner = THIS_MODULE,
+ .read = amdgpu_debugfs_vcn_fwlog_read,
+ .llseek = default_llseek
+};
+#endif
+
+void amdgpu_debugfs_vcn_fwlog_init(struct amdgpu_device *adev, uint8_t i,
+ struct amdgpu_vcn_inst *vcn)
+{
+#if defined(CONFIG_DEBUG_FS)
+ struct drm_minor *minor = adev_to_drm(adev)->primary;
+ struct dentry *root = minor->debugfs_root;
+ char name[32];
+
+ sprintf(name, "amdgpu_vcn_%d_fwlog", i);
+ debugfs_create_file_size(name, S_IFREG | 0444, root, vcn,
+ &amdgpu_debugfs_vcnfwlog_fops,
+ AMDGPU_VCNFW_LOG_SIZE);
+#endif
+}
+
+void amdgpu_vcn_fwlog_init(struct amdgpu_vcn_inst *vcn)
+{
+#if defined(CONFIG_DEBUG_FS)
+ uint32_t *flag = vcn->fw_shared.cpu_addr;
+ void *fw_log_cpu_addr = vcn->fw_shared.cpu_addr + vcn->fw_shared.mem_size;
+ uint64_t fw_log_gpu_addr = vcn->fw_shared.gpu_addr + vcn->fw_shared.mem_size;
+ struct amdgpu_vcn_fwlog *log_buf = fw_log_cpu_addr;
+ struct amdgpu_fw_shared_fw_logging *fw_log = vcn->fw_shared.cpu_addr
+ + vcn->fw_shared.log_offset;
+ *flag |= cpu_to_le32(AMDGPU_VCN_FW_LOGGING_FLAG);
+ fw_log->is_enabled = 1;
+ fw_log->addr_lo = cpu_to_le32(fw_log_gpu_addr & 0xFFFFFFFF);
+ fw_log->addr_hi = cpu_to_le32(fw_log_gpu_addr >> 32);
+ fw_log->size = cpu_to_le32(AMDGPU_VCNFW_LOG_SIZE);
+
+ log_buf->header_size = sizeof(struct amdgpu_vcn_fwlog);
+ log_buf->buffer_size = AMDGPU_VCNFW_LOG_SIZE;
+ log_buf->rptr = log_buf->header_size;
+ log_buf->wptr = log_buf->header_size;
+ log_buf->wrapped = 0;
+#endif
+}
+
+int amdgpu_vcn_process_poison_irq(struct amdgpu_device *adev,
+ struct amdgpu_irq_src *source,
+ struct amdgpu_iv_entry *entry)
+{
+ struct ras_common_if *ras_if = adev->vcn.ras_if;
+ struct ras_dispatch_if ih_data = {
+ .entry = entry,
+ };
+
+ if (!ras_if)
+ return 0;
+
+ if (!amdgpu_sriov_vf(adev)) {
+ ih_data.head = *ras_if;
+ amdgpu_ras_interrupt_dispatch(adev, &ih_data);
+ } else {
+ if (adev->virt.ops && adev->virt.ops->ras_poison_handler)
+ adev->virt.ops->ras_poison_handler(adev, ras_if->block);
+ else
+ dev_warn(adev->dev,
+ "No ras_poison_handler interface in SRIOV for VCN!\n");
+ }
+
+ return 0;
+}
+
+int amdgpu_vcn_ras_late_init(struct amdgpu_device *adev, struct ras_common_if *ras_block)
+{
+ int r, i;
+
+ r = amdgpu_ras_block_late_init(adev, ras_block);
+ if (r)
+ return r;
+
+ if (amdgpu_ras_is_supported(adev, ras_block->block)) {
+ for (i = 0; i < adev->vcn.num_vcn_inst; i++) {
+ if (adev->vcn.harvest_config & (1 << i) ||
+ !adev->vcn.inst[i].ras_poison_irq.funcs)
+ continue;
+
+ r = amdgpu_irq_get(adev, &adev->vcn.inst[i].ras_poison_irq, 0);
+ if (r)
+ goto late_fini;
+ }
+ }
+ return 0;
+
+late_fini:
+ amdgpu_ras_block_late_fini(adev, ras_block);
+ return r;
+}
+
+int amdgpu_vcn_ras_sw_init(struct amdgpu_device *adev)
+{
+ int err;
+ struct amdgpu_vcn_ras *ras;
+
+ if (!adev->vcn.ras)
+ return 0;
+
+ ras = adev->vcn.ras;
+ err = amdgpu_ras_register_ras_block(adev, &ras->ras_block);
+ if (err) {
+ dev_err(adev->dev, "Failed to register vcn ras block!\n");
+ return err;
+ }
+
+ strcpy(ras->ras_block.ras_comm.name, "vcn");
+ ras->ras_block.ras_comm.block = AMDGPU_RAS_BLOCK__VCN;
+ ras->ras_block.ras_comm.type = AMDGPU_RAS_ERROR__POISON;
+ adev->vcn.ras_if = &ras->ras_block.ras_comm;
+
+ if (!ras->ras_block.ras_late_init)
+ ras->ras_block.ras_late_init = amdgpu_vcn_ras_late_init;
+
+ return 0;
+}
+
+int amdgpu_vcn_psp_update_sram(struct amdgpu_device *adev, int inst_idx,
+ enum AMDGPU_UCODE_ID ucode_id)
+{
+ struct amdgpu_firmware_info ucode = {
+ .ucode_id = (ucode_id ? ucode_id :
+ (inst_idx ? AMDGPU_UCODE_ID_VCN1_RAM :
+ AMDGPU_UCODE_ID_VCN0_RAM)),
+ .mc_addr = adev->vcn.inst[inst_idx].dpg_sram_gpu_addr,
+ .ucode_size = ((uintptr_t)adev->vcn.inst[inst_idx].dpg_sram_curr_addr -
+ (uintptr_t)adev->vcn.inst[inst_idx].dpg_sram_cpu_addr),
+ };
+
+ return psp_execute_ip_fw_load(&adev->psp, &ucode);
+}
+
+static ssize_t amdgpu_get_vcn_reset_mask(struct device *dev,
+ struct device_attribute *attr,
+ char *buf)
+{
+ struct drm_device *ddev = dev_get_drvdata(dev);
+ struct amdgpu_device *adev = drm_to_adev(ddev);
+
+ if (!adev)
+ return -ENODEV;
+
+ return amdgpu_show_reset_mask(buf, adev->vcn.supported_reset);
+}
+
+static DEVICE_ATTR(vcn_reset_mask, 0444,
+ amdgpu_get_vcn_reset_mask, NULL);
+
+int amdgpu_vcn_sysfs_reset_mask_init(struct amdgpu_device *adev)
+{
+ int r = 0;
+
+ if (adev->vcn.num_vcn_inst) {
+ r = device_create_file(adev->dev, &dev_attr_vcn_reset_mask);
+ if (r)
+ return r;
+ }
+
+ return r;
+}
+
+void amdgpu_vcn_sysfs_reset_mask_fini(struct amdgpu_device *adev)
+{
+ if (adev->dev->kobj.sd) {
+ if (adev->vcn.num_vcn_inst)
+ device_remove_file(adev->dev, &dev_attr_vcn_reset_mask);
+ }
+}
+
+/*
+ * debugfs to enable/disable vcn job submission to specific core or
+ * instance. It is created only if the queue type is unified.
+ */
+#if defined(CONFIG_DEBUG_FS)
+static int amdgpu_debugfs_vcn_sched_mask_set(void *data, u64 val)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)data;
+ u32 i;
+ u64 mask;
+ struct amdgpu_ring *ring;
+
+ if (!adev)
+ return -ENODEV;
+
+ mask = (1ULL << adev->vcn.num_vcn_inst) - 1;
+ if ((val & mask) == 0)
+ return -EINVAL;
+ for (i = 0; i < adev->vcn.num_vcn_inst; ++i) {
+ ring = &adev->vcn.inst[i].ring_enc[0];
+ if (val & (1ULL << i))
+ ring->sched.ready = true;
+ else
+ ring->sched.ready = false;
+ }
+ /* publish sched.ready flag update effective immediately across smp */
+ smp_rmb();
+ return 0;
+}
+
+static int amdgpu_debugfs_vcn_sched_mask_get(void *data, u64 *val)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)data;
+ u32 i;
+ u64 mask = 0;
+ struct amdgpu_ring *ring;
+
+ if (!adev)
+ return -ENODEV;
+ for (i = 0; i < adev->vcn.num_vcn_inst; ++i) {
+ ring = &adev->vcn.inst[i].ring_enc[0];
+ if (ring->sched.ready)
+ mask |= 1ULL << i;
+ }
+ *val = mask;
+ return 0;
+}
+
+DEFINE_DEBUGFS_ATTRIBUTE(amdgpu_debugfs_vcn_sched_mask_fops,
+ amdgpu_debugfs_vcn_sched_mask_get,
+ amdgpu_debugfs_vcn_sched_mask_set, "%llx\n");
+#endif
+
+void amdgpu_debugfs_vcn_sched_mask_init(struct amdgpu_device *adev)
+{
+#if defined(CONFIG_DEBUG_FS)
+ struct drm_minor *minor = adev_to_drm(adev)->primary;
+ struct dentry *root = minor->debugfs_root;
+ char name[32];
+
+ if (adev->vcn.num_vcn_inst <= 1 || !adev->vcn.inst[0].using_unified_queue)
+ return;
+ sprintf(name, "amdgpu_vcn_sched_mask");
+ debugfs_create_file(name, 0600, root, adev,
+ &amdgpu_debugfs_vcn_sched_mask_fops);
+#endif
+}
+
+/**
+ * vcn_set_powergating_state - set VCN block powergating state
+ *
+ * @ip_block: amdgpu_ip_block pointer
+ * @state: power gating state
+ *
+ * Set VCN block powergating state
+ */
+int vcn_set_powergating_state(struct amdgpu_ip_block *ip_block,
+ enum amd_powergating_state state)
+{
+ struct amdgpu_device *adev = ip_block->adev;
+ int ret = 0, i;
+
+ for (i = 0; i < adev->vcn.num_vcn_inst; ++i) {
+ struct amdgpu_vcn_inst *vinst = &adev->vcn.inst[i];
+
+ ret |= vinst->set_pg_state(vinst, state);
+ }
+
+ return ret;
+}
+
+/**
+ * amdgpu_vcn_reset_engine - Reset a specific VCN engine
+ * @adev: Pointer to the AMDGPU device
+ * @instance_id: VCN engine instance to reset
+ *
+ * Returns: 0 on success, or a negative error code on failure.
+ */
+static int amdgpu_vcn_reset_engine(struct amdgpu_device *adev,
+ uint32_t instance_id)
+{
+ struct amdgpu_vcn_inst *vinst = &adev->vcn.inst[instance_id];
+ int r, i;
+
+ mutex_lock(&vinst->engine_reset_mutex);
+ /* Stop the scheduler's work queue for the dec and enc rings if they are running.
+ * This ensures that no new tasks are submitted to the queues while
+ * the reset is in progress.
+ */
+ drm_sched_wqueue_stop(&vinst->ring_dec.sched);
+ for (i = 0; i < vinst->num_enc_rings; i++)
+ drm_sched_wqueue_stop(&vinst->ring_enc[i].sched);
+
+ /* Perform the VCN reset for the specified instance */
+ r = vinst->reset(vinst);
+ if (r)
+ goto unlock;
+ r = amdgpu_ring_test_ring(&vinst->ring_dec);
+ if (r)
+ goto unlock;
+ for (i = 0; i < vinst->num_enc_rings; i++) {
+ r = amdgpu_ring_test_ring(&vinst->ring_enc[i]);
+ if (r)
+ goto unlock;
+ }
+ amdgpu_fence_driver_force_completion(&vinst->ring_dec);
+ for (i = 0; i < vinst->num_enc_rings; i++)
+ amdgpu_fence_driver_force_completion(&vinst->ring_enc[i]);
+
+ /* Restart the scheduler's work queue for the dec and enc rings
+ * if they were stopped by this function. This allows new tasks
+ * to be submitted to the queues after the reset is complete.
+ */
+ drm_sched_wqueue_start(&vinst->ring_dec.sched);
+ for (i = 0; i < vinst->num_enc_rings; i++)
+ drm_sched_wqueue_start(&vinst->ring_enc[i].sched);
+
+unlock:
+ mutex_unlock(&vinst->engine_reset_mutex);
+
+ return r;
+}
+
+/**
+ * amdgpu_vcn_ring_reset - Reset a VCN ring
+ * @ring: ring to reset
+ * @vmid: vmid of guilty job
+ * @timedout_fence: fence of timed out job
+ *
+ * This helper is for VCN blocks without unified queues because
+ * resetting the engine resets all queues in that case. With
+ * unified queues we have one queue per engine.
+ * Returns: 0 on success, or a negative error code on failure.
+ */
+int amdgpu_vcn_ring_reset(struct amdgpu_ring *ring,
+ unsigned int vmid,
+ struct amdgpu_fence *timedout_fence)
+{
+ struct amdgpu_device *adev = ring->adev;
+
+ if (adev->vcn.inst[ring->me].using_unified_queue)
+ return -EINVAL;
+
+ return amdgpu_vcn_reset_engine(adev, ring->me);
+}
+
+int amdgpu_vcn_reg_dump_init(struct amdgpu_device *adev,
+ const struct amdgpu_hwip_reg_entry *reg, u32 count)
+{
+ adev->vcn.ip_dump = kcalloc(adev->vcn.num_vcn_inst * count,
+ sizeof(uint32_t), GFP_KERNEL);
+ if (!adev->vcn.ip_dump)
+ return -ENOMEM;
+ adev->vcn.reg_list = reg;
+ adev->vcn.reg_count = count;
+
+ return 0;
+}
+
+static void amdgpu_vcn_reg_dump_fini(struct amdgpu_device *adev)
+{
+ kfree(adev->vcn.ip_dump);
+ adev->vcn.ip_dump = NULL;
+ adev->vcn.reg_list = NULL;
+ adev->vcn.reg_count = 0;
+}
+
+void amdgpu_vcn_dump_ip_state(struct amdgpu_ip_block *ip_block)
+{
+ struct amdgpu_device *adev = ip_block->adev;
+ int i, j;
+ bool is_powered;
+ u32 inst_off;
+
+ if (!adev->vcn.ip_dump)
+ return;
+
+ for (i = 0; i < adev->vcn.num_vcn_inst; i++) {
+ if (adev->vcn.harvest_config & (1 << i))
+ continue;
+
+ inst_off = i * adev->vcn.reg_count;
+ /* mmUVD_POWER_STATUS is always readable and is the first in reg_list */
+ adev->vcn.ip_dump[inst_off] =
+ RREG32(SOC15_REG_ENTRY_OFFSET_INST(adev->vcn.reg_list[0], i));
+ is_powered = (adev->vcn.ip_dump[inst_off] &
+ UVD_POWER_STATUS__UVD_POWER_STATUS_TILES_OFF) !=
+ UVD_POWER_STATUS__UVD_POWER_STATUS_TILES_OFF;
+
+ if (is_powered)
+ for (j = 1; j < adev->vcn.reg_count; j++)
+ adev->vcn.ip_dump[inst_off + j] =
+ RREG32(SOC15_REG_ENTRY_OFFSET_INST(adev->vcn.reg_list[j], i));
+ }
+}
+
+void amdgpu_vcn_print_ip_state(struct amdgpu_ip_block *ip_block, struct drm_printer *p)
+{
+ struct amdgpu_device *adev = ip_block->adev;
+ int i, j;
+ bool is_powered;
+ u32 inst_off;
+
+ if (!adev->vcn.ip_dump)
+ return;
+
+ drm_printf(p, "num_instances:%d\n", adev->vcn.num_vcn_inst);
+ for (i = 0; i < adev->vcn.num_vcn_inst; i++) {
+ if (adev->vcn.harvest_config & (1 << i)) {
+ drm_printf(p, "\nHarvested Instance:VCN%d Skipping dump\n", i);
+ continue;
+ }
+
+ inst_off = i * adev->vcn.reg_count;
+ is_powered = (adev->vcn.ip_dump[inst_off] &
+ UVD_POWER_STATUS__UVD_POWER_STATUS_TILES_OFF) !=
+ UVD_POWER_STATUS__UVD_POWER_STATUS_TILES_OFF;
+
+ if (is_powered) {
+ drm_printf(p, "\nActive Instance:VCN%d\n", i);
+ for (j = 0; j < adev->vcn.reg_count; j++)
+ drm_printf(p, "%-50s \t 0x%08x\n", adev->vcn.reg_list[j].reg_name,
+ adev->vcn.ip_dump[inst_off + j]);
+ } else {
+ drm_printf(p, "\nInactive Instance:VCN%d\n", i);
+ }
+ }
+}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h
index 13aa417f6be7..82624b44e661 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h
@@ -1,5 +1,5 @@
/*
- * Copyright 2016 Advanced Micro Devices, Inc.
+ * Copyright 2016-2024 Advanced Micro Devices, Inc. All rights reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
@@ -24,14 +24,16 @@
#ifndef __AMDGPU_VCN_H__
#define __AMDGPU_VCN_H__
+#include "amdgpu_ras.h"
+
#define AMDGPU_VCN_STACK_SIZE (128*1024)
#define AMDGPU_VCN_CONTEXT_SIZE (512*1024)
#define AMDGPU_VCN_FIRMWARE_OFFSET 256
#define AMDGPU_VCN_MAX_ENC_RINGS 3
-#define AMDGPU_MAX_VCN_INSTANCES 2
-#define AMDGPU_MAX_VCN_ENC_RINGS AMDGPU_VCN_MAX_ENC_RINGS * AMDGPU_MAX_VCN_INSTANCES
+#define AMDGPU_MAX_VCN_INSTANCES 4
+#define AMDGPU_MAX_VCN_ENC_RINGS (AMDGPU_VCN_MAX_ENC_RINGS * AMDGPU_MAX_VCN_INSTANCES)
#define AMDGPU_VCN_HARVEST_VCN0 (1 << 0)
#define AMDGPU_VCN_HARVEST_VCN1 (1 << 1)
@@ -63,10 +65,7 @@
#define VCN_ENC_CMD_REG_WRITE 0x0000000b
#define VCN_ENC_CMD_REG_WAIT 0x0000000c
-#define VCN_VID_SOC_ADDRESS_2_0 0x1fa00
-#define VCN1_VID_SOC_ADDRESS_3_0 0x48200
#define VCN_AON_SOC_ADDRESS_2_0 0x1f800
-#define VCN1_AON_SOC_ADDRESS_3_0 0x48000
#define VCN_VID_IP_ADDRESS_2_0 0x0
#define VCN_AON_IP_ADDRESS_2_0 0x30000
@@ -101,7 +100,8 @@
#define SOC15_DPG_MODE_OFFSET(ip, inst_idx, reg) \
({ \
- uint32_t internal_reg_offset, addr; \
+ /* To avoid a -Wunused-but-set-variable warning. */ \
+ uint32_t internal_reg_offset __maybe_unused, addr; \
bool video_range, video1_range, aon_range, aon1_range; \
\
addr = (adev->reg_offset[ip##_HWIP][inst_idx][reg##_BASE_IDX] + reg); \
@@ -141,26 +141,112 @@
RREG32_SOC15(VCN, inst_idx, mmUVD_DPG_LMA_DATA); \
})
-#define WREG32_SOC15_DPG_MODE(inst_idx, offset, value, mask_en, indirect) \
- do { \
- if (!indirect) { \
- WREG32_SOC15(VCN, inst_idx, mmUVD_DPG_LMA_DATA, value); \
- WREG32_SOC15(VCN, inst_idx, mmUVD_DPG_LMA_CTL, \
- (0x1 << UVD_DPG_LMA_CTL__READ_WRITE__SHIFT | \
- mask_en << UVD_DPG_LMA_CTL__MASK_EN__SHIFT | \
- offset << UVD_DPG_LMA_CTL__READ_WRITE_ADDR__SHIFT)); \
- } else { \
- *adev->vcn.inst[inst_idx].dpg_sram_curr_addr++ = offset; \
- *adev->vcn.inst[inst_idx].dpg_sram_curr_addr++ = value; \
- } \
+#define WREG32_SOC15_DPG_MODE(inst_idx, offset, value, mask_en, indirect) \
+ do { \
+ if (!indirect) { \
+ WREG32_SOC15(VCN, GET_INST(VCN, inst_idx), \
+ mmUVD_DPG_LMA_DATA, value); \
+ WREG32_SOC15( \
+ VCN, GET_INST(VCN, inst_idx), \
+ mmUVD_DPG_LMA_CTL, \
+ (0x1 << UVD_DPG_LMA_CTL__READ_WRITE__SHIFT | \
+ mask_en << UVD_DPG_LMA_CTL__MASK_EN__SHIFT | \
+ offset << UVD_DPG_LMA_CTL__READ_WRITE_ADDR__SHIFT)); \
+ } else { \
+ *adev->vcn.inst[inst_idx].dpg_sram_curr_addr++ = \
+ offset; \
+ *adev->vcn.inst[inst_idx].dpg_sram_curr_addr++ = \
+ value; \
+ } \
} while (0)
+#define SOC24_DPG_MODE_OFFSET(ip, inst_idx, reg) \
+ ({ \
+ /* To avoid a -Wunused-but-set-variable warning. */ \
+ uint32_t internal_reg_offset __maybe_unused, addr; \
+ bool video_range, video1_range, aon_range, aon1_range; \
+ \
+ addr = (adev->reg_offset[ip##_HWIP][inst_idx][reg##_BASE_IDX] + reg); \
+ addr <<= 2; \
+ video_range = ((((0xFFFFF & addr) >= (VCN_VID_SOC_ADDRESS)) && \
+ ((0xFFFFF & addr) < ((VCN_VID_SOC_ADDRESS + 0x2600))))); \
+ video1_range = ((((0xFFFFF & addr) >= (VCN1_VID_SOC_ADDRESS)) && \
+ ((0xFFFFF & addr) < ((VCN1_VID_SOC_ADDRESS + 0x2600))))); \
+ aon_range = ((((0xFFFFF & addr) >= (VCN_AON_SOC_ADDRESS)) && \
+ ((0xFFFFF & addr) < ((VCN_AON_SOC_ADDRESS + 0x600))))); \
+ aon1_range = ((((0xFFFFF & addr) >= (VCN1_AON_SOC_ADDRESS)) && \
+ ((0xFFFFF & addr) < ((VCN1_AON_SOC_ADDRESS + 0x600))))); \
+ if (video_range) \
+ internal_reg_offset = ((0xFFFFF & addr) - (VCN_VID_SOC_ADDRESS) + \
+ (VCN_VID_IP_ADDRESS)); \
+ else if (aon_range) \
+ internal_reg_offset = ((0xFFFFF & addr) - (VCN_AON_SOC_ADDRESS) + \
+ (VCN_AON_IP_ADDRESS)); \
+ else if (video1_range) \
+ internal_reg_offset = ((0xFFFFF & addr) - (VCN1_VID_SOC_ADDRESS) + \
+ (VCN_VID_IP_ADDRESS)); \
+ else if (aon1_range) \
+ internal_reg_offset = ((0xFFFFF & addr) - (VCN1_AON_SOC_ADDRESS) + \
+ (VCN_AON_IP_ADDRESS)); \
+ else \
+ internal_reg_offset = (0xFFFFF & addr); \
+ \
+ internal_reg_offset >>= 2; \
+ })
+
+#define WREG32_SOC24_DPG_MODE(inst_idx, offset, value, mask_en, indirect) \
+ do { \
+ if (!indirect) { \
+ WREG32_SOC15(VCN, GET_INST(VCN, inst_idx), \
+ regUVD_DPG_LMA_DATA, value); \
+ WREG32_SOC15( \
+ VCN, GET_INST(VCN, inst_idx), \
+ regUVD_DPG_LMA_CTL, \
+ (0x1 << UVD_DPG_LMA_CTL__READ_WRITE__SHIFT | \
+ mask_en << UVD_DPG_LMA_CTL__MASK_EN__SHIFT | \
+ offset << UVD_DPG_LMA_CTL__READ_WRITE_ADDR__SHIFT)); \
+ } else { \
+ *adev->vcn.inst[inst_idx].dpg_sram_curr_addr++ = \
+ offset; \
+ *adev->vcn.inst[inst_idx].dpg_sram_curr_addr++ = \
+ value; \
+ } \
+ } while (0)
+
+#define AMDGPU_FW_SHARED_FLAG_0_UNIFIED_QUEUE (1 << 2)
+#define AMDGPU_FW_SHARED_FLAG_0_DRM_KEY_INJECT (1 << 4)
+#define AMDGPU_VCN_FW_SHARED_FLAG_0_RB (1 << 6)
#define AMDGPU_VCN_MULTI_QUEUE_FLAG (1 << 8)
#define AMDGPU_VCN_SW_RING_FLAG (1 << 9)
+#define AMDGPU_VCN_FW_LOGGING_FLAG (1 << 10)
+#define AMDGPU_VCN_SMU_VERSION_INFO_FLAG (1 << 11)
+#define AMDGPU_VCN_SMU_DPM_INTERFACE_FLAG (1 << 11)
+#define AMDGPU_VCN_VF_RB_SETUP_FLAG (1 << 14)
+#define AMDGPU_VCN_VF_RB_DECOUPLE_FLAG (1 << 15)
+
+#define MAX_NUM_VCN_RB_SETUP 4
#define AMDGPU_VCN_IB_FLAG_DECODE_BUFFER 0x00000001
#define AMDGPU_VCN_CMD_FLAG_MSG_BUFFER 0x00000001
+#define VCN_CODEC_DISABLE_MASK_AV1 (1 << 0)
+#define VCN_CODEC_DISABLE_MASK_VP9 (1 << 1)
+#define VCN_CODEC_DISABLE_MASK_HEVC (1 << 2)
+#define VCN_CODEC_DISABLE_MASK_H264 (1 << 3)
+
+#define AMDGPU_VCN_SMU_DPM_INTERFACE_DGPU (0)
+#define AMDGPU_VCN_SMU_DPM_INTERFACE_APU (1)
+
+#define AMDGPU_DRM_KEY_INJECT_WORKAROUND_VCNFW_ASD_HANDSHAKING 2
+
+struct amdgpu_hwip_reg_entry;
+
+enum amdgpu_vcn_caps {
+ AMDGPU_VCN_RRMT_ENABLED,
+};
+
+#define AMDGPU_VCN_CAPS(caps) BIT(AMDGPU_VCN_##caps)
+
enum fw_queue_mode {
FW_QUEUE_RING_RESET = 1,
FW_QUEUE_DPG_HOLD_OFF = 2,
@@ -204,14 +290,25 @@ struct amdgpu_vcn_reg{
unsigned scratch9;
};
+struct amdgpu_vcn_fw_shared {
+ void *cpu_addr;
+ uint64_t gpu_addr;
+ uint32_t mem_size;
+ uint32_t log_offset;
+};
+
struct amdgpu_vcn_inst {
+ struct amdgpu_device *adev;
+ int inst;
struct amdgpu_bo *vcpu_bo;
void *cpu_addr;
uint64_t gpu_addr;
void *saved_bo;
struct amdgpu_ring ring_dec;
struct amdgpu_ring ring_enc[AMDGPU_VCN_MAX_ENC_RINGS];
+ atomic_t sched_score;
struct amdgpu_irq_src irq;
+ struct amdgpu_irq_src ras_poison_irq;
struct amdgpu_vcn_reg external;
struct amdgpu_bo *dpg_sram_bo;
struct dpg_pause_state pause_state;
@@ -219,28 +316,64 @@ struct amdgpu_vcn_inst {
uint64_t dpg_sram_gpu_addr;
uint32_t *dpg_sram_curr_addr;
atomic_t dpg_enc_submission_cnt;
- void *fw_shared_cpu_addr;
- uint64_t fw_shared_gpu_addr;
-};
-
-struct amdgpu_vcn {
- unsigned fw_version;
+ struct amdgpu_vcn_fw_shared fw_shared;
+ uint8_t aid_id;
+ const struct firmware *fw; /* VCN firmware */
+ uint8_t vcn_config;
+ uint32_t vcn_codec_disable_mask;
+ atomic_t total_submission_cnt;
+ struct mutex vcn_pg_lock;
+ enum amd_powergating_state cur_state;
struct delayed_work idle_work;
- const struct firmware *fw; /* VCN firmware */
+ unsigned fw_version;
unsigned num_enc_rings;
- enum amd_powergating_state cur_state;
bool indirect_sram;
+ struct amdgpu_vcn_reg internal;
+ struct mutex vcn1_jpeg1_workaround;
+ int (*pause_dpg_mode)(struct amdgpu_vcn_inst *vinst,
+ struct dpg_pause_state *new_state);
+ int (*set_pg_state)(struct amdgpu_vcn_inst *vinst,
+ enum amd_powergating_state state);
+ int (*reset)(struct amdgpu_vcn_inst *vinst);
+ bool using_unified_queue;
+ struct mutex engine_reset_mutex;
+};
+
+struct amdgpu_vcn_ras {
+ struct amdgpu_ras_block_object ras_block;
+};
+struct amdgpu_vcn {
uint8_t num_vcn_inst;
struct amdgpu_vcn_inst inst[AMDGPU_MAX_VCN_INSTANCES];
- struct amdgpu_vcn_reg internal;
- struct mutex vcn_pg_lock;
- struct mutex vcn1_jpeg1_workaround;
- atomic_t total_submission_cnt;
unsigned harvest_config;
- int (*pause_dpg_mode)(struct amdgpu_device *adev,
- int inst_idx, struct dpg_pause_state *new_state);
+
+ struct ras_common_if *ras_if;
+ struct amdgpu_vcn_ras *ras;
+
+ uint16_t inst_mask;
+ uint8_t num_inst_per_aid;
+
+ /* IP reg dump */
+ uint32_t *ip_dump;
+
+ uint32_t supported_reset;
+ uint32_t caps;
+
+ bool per_inst_fw;
+ unsigned fw_version;
+
+ bool workload_profile_active;
+ struct mutex workload_profile_mutex;
+ u32 reg_count;
+ const struct amdgpu_hwip_reg_entry *reg_list;
+};
+
+struct amdgpu_fw_shared_rb_ptrs_struct {
+ /* to WA DPG R/W ptr issues.*/
+ uint32_t rptr;
+ uint32_t wptr;
};
struct amdgpu_fw_shared_multi_queue {
@@ -256,12 +389,93 @@ struct amdgpu_fw_shared_sw_ring {
uint8_t padding[3];
};
+struct amdgpu_fw_shared_unified_queue_struct {
+ uint8_t is_enabled;
+ uint8_t queue_mode;
+ uint8_t queue_status;
+ uint8_t padding[5];
+};
+
+struct amdgpu_fw_shared_fw_logging {
+ uint8_t is_enabled;
+ uint32_t addr_lo;
+ uint32_t addr_hi;
+ uint32_t size;
+};
+
+struct amdgpu_fw_shared_smu_interface_info {
+ uint8_t smu_interface_type;
+ uint8_t padding[3];
+};
+
struct amdgpu_fw_shared {
uint32_t present_flag_0;
- uint8_t pad[53];
+ uint8_t pad[44];
+ struct amdgpu_fw_shared_rb_ptrs_struct rb;
+ uint8_t pad1[1];
struct amdgpu_fw_shared_multi_queue multi_queue;
struct amdgpu_fw_shared_sw_ring sw_ring;
-} __attribute__((__packed__));
+ struct amdgpu_fw_shared_fw_logging fw_log;
+ struct amdgpu_fw_shared_smu_interface_info smu_interface_info;
+};
+
+struct amdgpu_vcn_rb_setup_info {
+ uint32_t rb_addr_lo;
+ uint32_t rb_addr_hi;
+ uint32_t rb_size;
+};
+
+struct amdgpu_fw_shared_rb_setup {
+ uint32_t is_rb_enabled_flags;
+
+ union {
+ struct {
+ uint32_t rb_addr_lo;
+ uint32_t rb_addr_hi;
+ uint32_t rb_size;
+ uint32_t rb4_addr_lo;
+ uint32_t rb4_addr_hi;
+ uint32_t rb4_size;
+ uint32_t reserved[6];
+ };
+
+ struct {
+ struct amdgpu_vcn_rb_setup_info rb_info[MAX_NUM_VCN_RB_SETUP];
+ };
+ };
+};
+
+struct amdgpu_fw_shared_drm_key_wa {
+ uint8_t method;
+ uint8_t reserved[3];
+};
+
+struct amdgpu_fw_shared_queue_decouple {
+ uint8_t is_enabled;
+ uint8_t reserved[7];
+};
+
+struct amdgpu_vcn4_fw_shared {
+ uint32_t present_flag_0;
+ uint8_t pad[12];
+ struct amdgpu_fw_shared_unified_queue_struct sq;
+ uint8_t pad1[8];
+ struct amdgpu_fw_shared_fw_logging fw_log;
+ uint8_t pad2[20];
+ struct amdgpu_fw_shared_rb_setup rb_setup;
+ struct amdgpu_fw_shared_smu_interface_info smu_dpm_interface;
+ struct amdgpu_fw_shared_drm_key_wa drm_key_wa;
+ uint8_t pad3[9];
+ struct amdgpu_fw_shared_queue_decouple decouple;
+};
+
+struct amdgpu_vcn_fwlog {
+ uint32_t rptr;
+ uint32_t wptr;
+ uint32_t buffer_size;
+ uint32_t header_size;
+ uint8_t wrapped;
+};
struct amdgpu_vcn_decode_buffer {
uint32_t valid_buf_flag;
@@ -270,19 +484,90 @@ struct amdgpu_vcn_decode_buffer {
uint32_t pad[30];
};
-int amdgpu_vcn_sw_init(struct amdgpu_device *adev);
-int amdgpu_vcn_sw_fini(struct amdgpu_device *adev);
-int amdgpu_vcn_suspend(struct amdgpu_device *adev);
-int amdgpu_vcn_resume(struct amdgpu_device *adev);
+struct amdgpu_vcn_rb_metadata {
+ uint32_t size;
+ uint32_t present_flag_0;
+
+ uint8_t version;
+ uint8_t ring_id;
+ uint8_t pad[26];
+};
+
+struct amdgpu_vcn5_fw_shared {
+ uint32_t present_flag_0;
+ uint8_t pad[12];
+ struct amdgpu_fw_shared_unified_queue_struct sq;
+ uint8_t pad1[8];
+ struct amdgpu_fw_shared_fw_logging fw_log;
+ uint8_t pad2[20];
+ struct amdgpu_fw_shared_rb_setup rb_setup;
+ struct amdgpu_fw_shared_smu_interface_info smu_dpm_interface;
+ struct amdgpu_fw_shared_drm_key_wa drm_key_wa;
+ uint8_t pad3[404];
+};
+
+#define VCN_BLOCK_ENCODE_DISABLE_MASK 0x80
+#define VCN_BLOCK_DECODE_DISABLE_MASK 0x40
+#define VCN_BLOCK_QUEUE_DISABLE_MASK 0xC0
+
+enum vcn_ring_type {
+ VCN_ENCODE_RING,
+ VCN_DECODE_RING,
+ VCN_UNIFIED_RING,
+};
+
+int amdgpu_vcn_early_init(struct amdgpu_device *adev, int i);
+int amdgpu_vcn_sw_init(struct amdgpu_device *adev, int i);
+void amdgpu_vcn_sw_fini(struct amdgpu_device *adev, int i);
+int amdgpu_vcn_suspend(struct amdgpu_device *adev, int i);
+int amdgpu_vcn_resume(struct amdgpu_device *adev, int i);
void amdgpu_vcn_ring_begin_use(struct amdgpu_ring *ring);
void amdgpu_vcn_ring_end_use(struct amdgpu_ring *ring);
+bool amdgpu_vcn_is_disabled_vcn(struct amdgpu_device *adev,
+ enum vcn_ring_type type, uint32_t vcn_instance);
+
int amdgpu_vcn_dec_ring_test_ring(struct amdgpu_ring *ring);
int amdgpu_vcn_dec_ring_test_ib(struct amdgpu_ring *ring, long timeout);
int amdgpu_vcn_dec_sw_ring_test_ring(struct amdgpu_ring *ring);
int amdgpu_vcn_dec_sw_ring_test_ib(struct amdgpu_ring *ring, long timeout);
+int amdgpu_vcn_unified_ring_test_ib(struct amdgpu_ring *ring, long timeout);
int amdgpu_vcn_enc_ring_test_ring(struct amdgpu_ring *ring);
int amdgpu_vcn_enc_ring_test_ib(struct amdgpu_ring *ring, long timeout);
+enum amdgpu_ring_priority_level amdgpu_vcn_get_enc_ring_prio(int ring);
+
+void amdgpu_vcn_setup_ucode(struct amdgpu_device *adev, int i);
+
+void amdgpu_vcn_fwlog_init(struct amdgpu_vcn_inst *vcn);
+void amdgpu_debugfs_vcn_fwlog_init(struct amdgpu_device *adev,
+ uint8_t i, struct amdgpu_vcn_inst *vcn);
+
+int amdgpu_vcn_process_poison_irq(struct amdgpu_device *adev,
+ struct amdgpu_irq_src *source,
+ struct amdgpu_iv_entry *entry);
+int amdgpu_vcn_ras_late_init(struct amdgpu_device *adev,
+ struct ras_common_if *ras_block);
+int amdgpu_vcn_ras_sw_init(struct amdgpu_device *adev);
+
+int amdgpu_vcn_psp_update_sram(struct amdgpu_device *adev, int inst_idx,
+ enum AMDGPU_UCODE_ID ucode_id);
+int amdgpu_vcn_save_vcpu_bo(struct amdgpu_device *adev);
+int amdgpu_vcn_sysfs_reset_mask_init(struct amdgpu_device *adev);
+void amdgpu_vcn_sysfs_reset_mask_fini(struct amdgpu_device *adev);
+void amdgpu_debugfs_vcn_sched_mask_init(struct amdgpu_device *adev);
+
+int vcn_set_powergating_state(struct amdgpu_ip_block *ip_block,
+ enum amd_powergating_state state);
+int amdgpu_vcn_ring_reset(struct amdgpu_ring *ring,
+ unsigned int vmid,
+ struct amdgpu_fence *guilty_fence);
+int amdgpu_vcn_reg_dump_init(struct amdgpu_device *adev,
+ const struct amdgpu_hwip_reg_entry *reg, u32 count);
+void amdgpu_vcn_dump_ip_state(struct amdgpu_ip_block *ip_block);
+void amdgpu_vcn_print_ip_state(struct amdgpu_ip_block *ip_block, struct drm_printer *p);
+void amdgpu_vcn_get_profile(struct amdgpu_device *adev);
+void amdgpu_vcn_put_profile(struct amdgpu_device *adev);
+
#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c
index 5da04d45b637..47a6ce4fdc74 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c
@@ -23,10 +23,17 @@
#include <linux/module.h>
+#ifdef CONFIG_X86
+#include <asm/hypervisor.h>
+#endif
+
#include <drm/drm_drv.h>
+#include <xen/xen.h>
#include "amdgpu.h"
#include "amdgpu_ras.h"
+#include "amdgpu_reset.h"
+#include "amdgpu_dpm.h"
#include "vi.h"
#include "soc15.h"
#include "nv.h"
@@ -37,6 +44,18 @@
vf2pf_info->ucode_info[ucode].version = ver; \
} while (0)
+#define mmRCC_CONFIG_MEMSIZE 0xde3
+
+const char *amdgpu_virt_dynamic_crit_table_name[] = {
+ "IP DISCOVERY",
+ "VBIOS IMG",
+ "RAS TELEMETRY",
+ "DATA EXCHANGE",
+ "BAD PAGE INFO",
+ "INIT HEADER",
+ "LAST",
+};
+
bool amdgpu_virt_mmio_blocked(struct amdgpu_device *adev)
{
/* By now all MMIO pages except mailbox are blocked */
@@ -50,58 +69,20 @@ void amdgpu_virt_init_setting(struct amdgpu_device *adev)
struct drm_device *ddev = adev_to_drm(adev);
/* enable virtual display */
- if (adev->mode_info.num_crtc == 0)
- adev->mode_info.num_crtc = 1;
- adev->enable_virtual_display = true;
+ if (adev->asic_type != CHIP_ALDEBARAN &&
+ adev->asic_type != CHIP_ARCTURUS &&
+ ((adev->pdev->class >> 8) != PCI_CLASS_ACCELERATOR_PROCESSING)) {
+ if (adev->mode_info.num_crtc == 0)
+ adev->mode_info.num_crtc = 1;
+ adev->enable_virtual_display = true;
+ }
ddev->driver_features &= ~DRIVER_ATOMIC;
adev->cg_flags = 0;
adev->pg_flags = 0;
-}
-
-void amdgpu_virt_kiq_reg_write_reg_wait(struct amdgpu_device *adev,
- uint32_t reg0, uint32_t reg1,
- uint32_t ref, uint32_t mask)
-{
- struct amdgpu_kiq *kiq = &adev->gfx.kiq;
- struct amdgpu_ring *ring = &kiq->ring;
- signed long r, cnt = 0;
- unsigned long flags;
- uint32_t seq;
-
- spin_lock_irqsave(&kiq->ring_lock, flags);
- amdgpu_ring_alloc(ring, 32);
- amdgpu_ring_emit_reg_write_reg_wait(ring, reg0, reg1,
- ref, mask);
- r = amdgpu_fence_emit_polling(ring, &seq, MAX_KIQ_REG_WAIT);
- if (r)
- goto failed_undo;
-
- amdgpu_ring_commit(ring);
- spin_unlock_irqrestore(&kiq->ring_lock, flags);
-
- r = amdgpu_fence_wait_polling(ring, seq, MAX_KIQ_REG_WAIT);
-
- /* don't wait anymore for IRQ context */
- if (r < 1 && in_interrupt())
- goto failed_kiq;
- might_sleep();
- while (r < 1 && cnt++ < MAX_KIQ_REG_TRY) {
-
- msleep(MAX_KIQ_REG_BAILOUT_INTERVAL);
- r = amdgpu_fence_wait_polling(ring, seq, MAX_KIQ_REG_WAIT);
- }
-
- if (cnt > MAX_KIQ_REG_TRY)
- goto failed_kiq;
-
- return;
-
-failed_undo:
- amdgpu_ring_undo(ring);
- spin_unlock_irqrestore(&kiq->ring_lock, flags);
-failed_kiq:
- dev_err(adev->dev, "failed to write reg %x wait reg %x\n", reg0, reg1);
+ /* Reduce kcq number to 2 to reduce latency */
+ if (amdgpu_num_kcq == -1)
+ amdgpu_num_kcq = 2;
}
/**
@@ -118,8 +99,10 @@ int amdgpu_virt_request_full_gpu(struct amdgpu_device *adev, bool init)
if (virt->ops && virt->ops->req_full_gpu) {
r = virt->ops->req_full_gpu(adev, init);
- if (r)
+ if (r) {
+ adev->no_hw_access = true;
return r;
+ }
adev->virt.caps &= ~AMDGPU_SRIOV_CAPS_RUNTIME;
}
@@ -179,9 +162,24 @@ void amdgpu_virt_request_init_data(struct amdgpu_device *adev)
virt->ops->req_init_data(adev);
if (adev->virt.req_init_data_ver > 0)
- DRM_INFO("host supports REQ_INIT_DATA handshake\n");
+ dev_info(adev->dev, "host supports REQ_INIT_DATA handshake of critical_region_version %d\n",
+ adev->virt.req_init_data_ver);
else
- DRM_WARN("host doesn't support REQ_INIT_DATA handshake\n");
+ dev_warn(adev->dev, "host doesn't support REQ_INIT_DATA handshake\n");
+}
+
+/**
+ * amdgpu_virt_ready_to_reset() - send ready to reset to host
+ * @adev: amdgpu device.
+ * Send ready to reset message to GPU hypervisor to signal we have stopped GPU
+ * activity and is ready for host FLR
+ */
+void amdgpu_virt_ready_to_reset(struct amdgpu_device *adev)
+{
+ struct amdgpu_virt *virt = &adev->virt;
+
+ if (virt->ops && virt->ops->reset_gpu)
+ virt->ops->ready_to_reset(adev);
}
/**
@@ -214,17 +212,18 @@ int amdgpu_virt_alloc_mm_table(struct amdgpu_device *adev)
return 0;
r = amdgpu_bo_create_kernel(adev, PAGE_SIZE, PAGE_SIZE,
- AMDGPU_GEM_DOMAIN_VRAM,
+ AMDGPU_GEM_DOMAIN_VRAM |
+ AMDGPU_GEM_DOMAIN_GTT,
&adev->virt.mm_table.bo,
&adev->virt.mm_table.gpu_addr,
(void *)&adev->virt.mm_table.cpu_addr);
if (r) {
- DRM_ERROR("failed to alloc mm table and error = %d.\n", r);
+ dev_err(adev->dev, "failed to alloc mm table and error = %d.\n", r);
return r;
}
memset((void *)adev->virt.mm_table.cpu_addr, 0, PAGE_SIZE);
- DRM_INFO("MM table gpu addr = 0x%llx, cpu addr = %p.\n",
+ dev_info(adev->dev, "MM table gpu addr = 0x%llx, cpu addr = %p.\n",
adev->virt.mm_table.gpu_addr,
adev->virt.mm_table.cpu_addr);
return 0;
@@ -246,6 +245,22 @@ void amdgpu_virt_free_mm_table(struct amdgpu_device *adev)
adev->virt.mm_table.gpu_addr = 0;
}
+/**
+ * amdgpu_virt_rcvd_ras_interrupt() - receive ras interrupt
+ * @adev: amdgpu device.
+ * Check whether host sent RAS error message
+ * Return: true if found, otherwise false
+ */
+bool amdgpu_virt_rcvd_ras_interrupt(struct amdgpu_device *adev)
+{
+ struct amdgpu_virt *virt = &adev->virt;
+
+ if (!virt->ops || !virt->ops->rcvd_ras_intr)
+ return false;
+
+ return virt->ops->rcvd_ras_intr(adev);
+}
+
unsigned int amd_sriov_msg_checksum(void *obj,
unsigned long obj_size,
@@ -280,17 +295,15 @@ static int amdgpu_virt_init_ras_err_handler_data(struct amdgpu_device *adev)
*data = kmalloc(sizeof(struct amdgpu_virt_ras_err_handler_data), GFP_KERNEL);
if (!*data)
- return -ENOMEM;
+ goto data_failure;
- bps = kmalloc_array(align_space, sizeof((*data)->bps), GFP_KERNEL);
- bps_bo = kmalloc_array(align_space, sizeof((*data)->bps_bo), GFP_KERNEL);
+ bps = kmalloc_array(align_space, sizeof(*(*data)->bps), GFP_KERNEL);
+ if (!bps)
+ goto bps_failure;
- if (!bps || !bps_bo) {
- kfree(bps);
- kfree(bps_bo);
- kfree(*data);
- return -ENOMEM;
- }
+ bps_bo = kmalloc_array(align_space, sizeof(*(*data)->bps_bo), GFP_KERNEL);
+ if (!bps_bo)
+ goto bps_bo_failure;
(*data)->bps = bps;
(*data)->bps_bo = bps_bo;
@@ -300,6 +313,13 @@ static int amdgpu_virt_init_ras_err_handler_data(struct amdgpu_device *adev)
virt->ras_init_done = true;
return 0;
+
+bps_bo_failure:
+ kfree(bps);
+bps_failure:
+ kfree(*data);
+data_failure:
+ return -ENOMEM;
}
static void amdgpu_virt_ras_release_bp(struct amdgpu_device *adev)
@@ -314,8 +334,10 @@ static void amdgpu_virt_ras_release_bp(struct amdgpu_device *adev)
for (i = data->last_reserved - 1; i >= 0; i--) {
bo = data->bps_bo[i];
- amdgpu_bo_free_kernel(&bo, NULL, NULL);
- data->bps_bo[i] = bo;
+ if (bo) {
+ amdgpu_bo_free_kernel(&bo, NULL, NULL);
+ data->bps_bo[i] = bo;
+ }
data->last_reserved = i;
}
}
@@ -355,6 +377,8 @@ static void amdgpu_virt_ras_reserve_bps(struct amdgpu_device *adev)
{
struct amdgpu_virt *virt = &adev->virt;
struct amdgpu_virt_ras_err_handler_data *data = virt->virt_eh_data;
+ struct amdgpu_vram_mgr *mgr = &adev->mman.vram_mgr;
+ struct ttm_resource_manager *man = &mgr->manager;
struct amdgpu_bo *bo = NULL;
uint64_t bp;
int i;
@@ -370,13 +394,20 @@ static void amdgpu_virt_ras_reserve_bps(struct amdgpu_device *adev)
* 2) a ras bad page has been reserved (duplicate error injection
* for one page);
*/
- if (amdgpu_bo_create_kernel_at(adev, bp << AMDGPU_GPU_PAGE_SHIFT,
- AMDGPU_GPU_PAGE_SIZE,
- AMDGPU_GEM_DOMAIN_VRAM,
- &bo, NULL))
- DRM_DEBUG("RAS WARN: reserve vram for retired page %llx fail\n", bp);
-
- data->bps_bo[i] = bo;
+ if (ttm_resource_manager_used(man)) {
+ amdgpu_vram_mgr_reserve_range(&adev->mman.vram_mgr,
+ bp << AMDGPU_GPU_PAGE_SHIFT,
+ AMDGPU_GPU_PAGE_SIZE);
+ data->bps_bo[i] = NULL;
+ } else {
+ if (amdgpu_bo_create_kernel_at(adev, bp << AMDGPU_GPU_PAGE_SHIFT,
+ AMDGPU_GPU_PAGE_SIZE,
+ &bo, NULL))
+ dev_dbg(adev->dev,
+ "RAS WARN: reserve vram for retired page %llx fail\n",
+ bp);
+ data->bps_bo[i] = bo;
+ }
data->last_reserved = i + 1;
bo = NULL;
}
@@ -405,11 +436,19 @@ static void amdgpu_virt_add_bad_page(struct amdgpu_device *adev,
struct eeprom_table_record bp;
uint64_t retired_page;
uint32_t bp_idx, bp_cnt;
+ void *vram_usage_va = NULL;
+
+ if (adev->mman.fw_vram_usage_va)
+ vram_usage_va = adev->mman.fw_vram_usage_va;
+ else
+ vram_usage_va = adev->mman.drv_vram_usage_va;
+
+ memset(&bp, 0, sizeof(bp));
if (bp_block_size) {
bp_cnt = bp_block_size / sizeof(uint64_t);
for (bp_idx = 0; bp_idx < bp_cnt; bp_idx++) {
- retired_page = *(uint64_t *)(adev->mman.fw_vram_usage_va +
+ retired_page = *(uint64_t *)(vram_usage_va +
bp_block_offset + bp_idx * sizeof(uint64_t));
bp.retired_page = retired_page;
@@ -429,11 +468,14 @@ static int amdgpu_virt_read_pf2vf_data(struct amdgpu_device *adev)
uint32_t checksum;
uint32_t checkval;
+ uint32_t i;
+ uint32_t tmp;
+
if (adev->virt.fw_reserve.p_pf2vf == NULL)
return -EINVAL;
if (pf2vf_info->size > 1024) {
- DRM_ERROR("invalid pf2vf message size\n");
+ dev_err(adev->dev, "invalid pf2vf message size: 0x%x\n", pf2vf_info->size);
return -EINVAL;
}
@@ -444,7 +486,9 @@ static int amdgpu_virt_read_pf2vf_data(struct amdgpu_device *adev)
adev->virt.fw_reserve.p_pf2vf, pf2vf_info->size,
adev->virt.fw_reserve.checksum_key, checksum);
if (checksum != checkval) {
- DRM_ERROR("invalid pf2vf message\n");
+ dev_err(adev->dev,
+ "invalid pf2vf message: header checksum=0x%x calculated checksum=0x%x\n",
+ checksum, checkval);
return -EINVAL;
}
@@ -458,7 +502,9 @@ static int amdgpu_virt_read_pf2vf_data(struct amdgpu_device *adev)
adev->virt.fw_reserve.p_pf2vf, pf2vf_info->size,
0, checksum);
if (checksum != checkval) {
- DRM_ERROR("invalid pf2vf message\n");
+ dev_err(adev->dev,
+ "invalid pf2vf message: header checksum=0x%x calculated checksum=0x%x\n",
+ checksum, checkval);
return -EINVAL;
}
@@ -466,10 +512,38 @@ static int amdgpu_virt_read_pf2vf_data(struct amdgpu_device *adev)
((struct amd_sriov_msg_pf2vf_info *)pf2vf_info)->vf2pf_update_interval_ms;
adev->virt.gim_feature =
((struct amd_sriov_msg_pf2vf_info *)pf2vf_info)->feature_flags.all;
-
+ adev->virt.reg_access =
+ ((struct amd_sriov_msg_pf2vf_info *)pf2vf_info)->reg_access_flags.all;
+
+ adev->virt.decode_max_dimension_pixels = 0;
+ adev->virt.decode_max_frame_pixels = 0;
+ adev->virt.encode_max_dimension_pixels = 0;
+ adev->virt.encode_max_frame_pixels = 0;
+ adev->virt.is_mm_bw_enabled = false;
+ for (i = 0; i < AMD_SRIOV_MSG_RESERVE_VCN_INST; i++) {
+ tmp = ((struct amd_sriov_msg_pf2vf_info *)pf2vf_info)->mm_bw_management[i].decode_max_dimension_pixels;
+ adev->virt.decode_max_dimension_pixels = max(tmp, adev->virt.decode_max_dimension_pixels);
+
+ tmp = ((struct amd_sriov_msg_pf2vf_info *)pf2vf_info)->mm_bw_management[i].decode_max_frame_pixels;
+ adev->virt.decode_max_frame_pixels = max(tmp, adev->virt.decode_max_frame_pixels);
+
+ tmp = ((struct amd_sriov_msg_pf2vf_info *)pf2vf_info)->mm_bw_management[i].encode_max_dimension_pixels;
+ adev->virt.encode_max_dimension_pixels = max(tmp, adev->virt.encode_max_dimension_pixels);
+
+ tmp = ((struct amd_sriov_msg_pf2vf_info *)pf2vf_info)->mm_bw_management[i].encode_max_frame_pixels;
+ adev->virt.encode_max_frame_pixels = max(tmp, adev->virt.encode_max_frame_pixels);
+ }
+ if ((adev->virt.decode_max_dimension_pixels > 0) || (adev->virt.encode_max_dimension_pixels > 0))
+ adev->virt.is_mm_bw_enabled = true;
+
+ adev->unique_id =
+ ((struct amd_sriov_msg_pf2vf_info *)pf2vf_info)->uuid;
+ adev->virt.ras_en_caps.all = ((struct amd_sriov_msg_pf2vf_info *)pf2vf_info)->ras_en_caps.all;
+ adev->virt.ras_telemetry_en_caps.all =
+ ((struct amd_sriov_msg_pf2vf_info *)pf2vf_info)->ras_telemetry_en_caps.all;
break;
default:
- DRM_ERROR("invalid pf2vf version\n");
+ dev_err(adev->dev, "invalid pf2vf version: 0x%x\n", pf2vf_info->version);
return -EINVAL;
}
@@ -500,10 +574,13 @@ static void amdgpu_virt_populate_vf2pf_ucode_info(struct amdgpu_device *adev)
POPULATE_UCODE_INFO(vf2pf_info, AMD_SRIOV_UCODE_ID_RLC_SRLS, adev->gfx.rlc_srls_fw_version);
POPULATE_UCODE_INFO(vf2pf_info, AMD_SRIOV_UCODE_ID_MEC, adev->gfx.mec_fw_version);
POPULATE_UCODE_INFO(vf2pf_info, AMD_SRIOV_UCODE_ID_MEC2, adev->gfx.mec2_fw_version);
- POPULATE_UCODE_INFO(vf2pf_info, AMD_SRIOV_UCODE_ID_SOS, adev->psp.sos_fw_version);
- POPULATE_UCODE_INFO(vf2pf_info, AMD_SRIOV_UCODE_ID_ASD, adev->psp.asd_fw_version);
- POPULATE_UCODE_INFO(vf2pf_info, AMD_SRIOV_UCODE_ID_TA_RAS, adev->psp.ta_ras_ucode_version);
- POPULATE_UCODE_INFO(vf2pf_info, AMD_SRIOV_UCODE_ID_TA_XGMI, adev->psp.ta_xgmi_ucode_version);
+ POPULATE_UCODE_INFO(vf2pf_info, AMD_SRIOV_UCODE_ID_SOS, adev->psp.sos.fw_version);
+ POPULATE_UCODE_INFO(vf2pf_info, AMD_SRIOV_UCODE_ID_ASD,
+ adev->psp.asd_context.bin_desc.fw_version);
+ POPULATE_UCODE_INFO(vf2pf_info, AMD_SRIOV_UCODE_ID_TA_RAS,
+ adev->psp.ras_context.context.bin_desc.fw_version);
+ POPULATE_UCODE_INFO(vf2pf_info, AMD_SRIOV_UCODE_ID_TA_XGMI,
+ adev->psp.xgmi_context.context.bin_desc.fw_version);
POPULATE_UCODE_INFO(vf2pf_info, AMD_SRIOV_UCODE_ID_SMC, adev->pm.fw_version);
POPULATE_UCODE_INFO(vf2pf_info, AMD_SRIOV_UCODE_ID_SDMA, adev->sdma.instance[0].fw_version);
POPULATE_UCODE_INFO(vf2pf_info, AMD_SRIOV_UCODE_ID_SDMA2, adev->sdma.instance[1].fw_version);
@@ -514,7 +591,6 @@ static void amdgpu_virt_populate_vf2pf_ucode_info(struct amdgpu_device *adev)
static int amdgpu_virt_write_vf2pf_data(struct amdgpu_device *adev)
{
struct amd_sriov_msg_vf2pf_info *vf2pf_info;
- struct ttm_resource_manager *vram_man = ttm_manager_type(&adev->mman.bdev, TTM_PL_VRAM);
vf2pf_info = (struct amd_sriov_msg_vf2pf_info *) adev->virt.fw_reserve.p_vf2pf;
@@ -537,8 +613,10 @@ static int amdgpu_virt_write_vf2pf_data(struct amdgpu_device *adev)
vf2pf_info->driver_cert = 0;
vf2pf_info->os_info.all = 0;
- vf2pf_info->fb_usage = amdgpu_vram_mgr_usage(vram_man) >> 20;
- vf2pf_info->fb_vis_usage = amdgpu_vram_mgr_vis_usage(vram_man) >> 20;
+ vf2pf_info->fb_usage = ttm_resource_manager_used(&adev->mman.vram_mgr.manager) ?
+ ttm_resource_manager_usage(&adev->mman.vram_mgr.manager) >> 20 : 0;
+ vf2pf_info->fb_vis_usage =
+ amdgpu_vram_mgr_vis_usage(&adev->mman.vram_mgr) >> 20;
vf2pf_info->fb_size = adev->gmc.real_vram_size >> 20;
vf2pf_info->fb_vis_size = adev->gmc.visible_vram_size >> 20;
@@ -550,9 +628,16 @@ static int amdgpu_virt_write_vf2pf_data(struct amdgpu_device *adev)
vf2pf_info->encode_usage = 0;
vf2pf_info->decode_usage = 0;
+ vf2pf_info->dummy_page_addr = (uint64_t)adev->dummy_page_addr;
+ if (amdgpu_sriov_is_mes_info_enable(adev)) {
+ vf2pf_info->mes_info_addr =
+ (uint64_t)(adev->mes.resource_1_gpu_addr[0] + AMDGPU_GPU_PAGE_SIZE);
+ vf2pf_info->mes_info_size =
+ adev->mes.resource_1[0]->tbo.base.size - AMDGPU_GPU_PAGE_SIZE;
+ }
vf2pf_info->checksum =
amd_sriov_msg_checksum(
- vf2pf_info, vf2pf_info->header.size, 0, 0);
+ vf2pf_info, sizeof(*vf2pf_info), 0, 0);
return 0;
}
@@ -563,18 +648,59 @@ static void amdgpu_virt_update_vf2pf_work_item(struct work_struct *work)
int ret;
ret = amdgpu_virt_read_pf2vf_data(adev);
- if (ret)
+ if (ret) {
+ adev->virt.vf2pf_update_retry_cnt++;
+
+ if ((amdgpu_virt_rcvd_ras_interrupt(adev) ||
+ adev->virt.vf2pf_update_retry_cnt >= AMDGPU_VF2PF_UPDATE_MAX_RETRY_LIMIT) &&
+ amdgpu_sriov_runtime(adev)) {
+
+ amdgpu_ras_set_fed(adev, true);
+ if (amdgpu_reset_domain_schedule(adev->reset_domain,
+ &adev->kfd.reset_work))
+ return;
+ else
+ dev_err(adev->dev, "Failed to queue work! at %s", __func__);
+ }
+
goto out;
+ }
+
+ adev->virt.vf2pf_update_retry_cnt = 0;
amdgpu_virt_write_vf2pf_data(adev);
out:
schedule_delayed_work(&(adev->virt.vf2pf_work), adev->virt.vf2pf_update_interval_ms);
}
+static int amdgpu_virt_read_exchange_data_from_mem(struct amdgpu_device *adev, uint32_t *pfvf_data)
+{
+ uint32_t dataexchange_offset =
+ adev->virt.crit_regn_tbl[AMD_SRIOV_MSG_DATAEXCHANGE_TABLE_ID].offset;
+ uint32_t dataexchange_size =
+ adev->virt.crit_regn_tbl[AMD_SRIOV_MSG_DATAEXCHANGE_TABLE_ID].size_kb << 10;
+ uint64_t pos = 0;
+
+ dev_info(adev->dev,
+ "Got data exchange info from dynamic crit_region_table at offset 0x%x with size of 0x%x bytes.\n",
+ dataexchange_offset, dataexchange_size);
+
+ if (!IS_ALIGNED(dataexchange_offset, 4) || !IS_ALIGNED(dataexchange_size, 4)) {
+ dev_err(adev->dev, "Data exchange data not aligned to 4 bytes\n");
+ return -EINVAL;
+ }
+
+ pos = (uint64_t)dataexchange_offset;
+ amdgpu_device_vram_access(adev, pos, pfvf_data,
+ dataexchange_size, false);
+
+ return 0;
+}
+
void amdgpu_virt_fini_data_exchange(struct amdgpu_device *adev)
{
if (adev->virt.vf2pf_update_interval_ms != 0) {
- DRM_INFO("clean up the vf2pf work item\n");
+ dev_info(adev->dev, "clean up the vf2pf work item\n");
cancel_delayed_work_sync(&adev->virt.vf2pf_work);
adev->virt.vf2pf_update_interval_ms = 0;
}
@@ -582,50 +708,118 @@ void amdgpu_virt_fini_data_exchange(struct amdgpu_device *adev)
void amdgpu_virt_init_data_exchange(struct amdgpu_device *adev)
{
- uint64_t bp_block_offset = 0;
- uint32_t bp_block_size = 0;
- struct amd_sriov_msg_pf2vf_info *pf2vf_v2 = NULL;
+ uint32_t *pfvf_data = NULL;
adev->virt.fw_reserve.p_pf2vf = NULL;
adev->virt.fw_reserve.p_vf2pf = NULL;
adev->virt.vf2pf_update_interval_ms = 0;
+ adev->virt.vf2pf_update_retry_cnt = 0;
- if (adev->mman.fw_vram_usage_va != NULL) {
- adev->virt.vf2pf_update_interval_ms = 2000;
+ if (adev->mman.fw_vram_usage_va && adev->mman.drv_vram_usage_va) {
+ dev_warn(adev->dev, "Currently fw_vram and drv_vram should not have values at the same time!");
+ } else if (adev->mman.fw_vram_usage_va || adev->mman.drv_vram_usage_va) {
+ /* go through this logic in ip_init and reset to init workqueue*/
+ amdgpu_virt_exchange_data(adev);
+
+ INIT_DELAYED_WORK(&adev->virt.vf2pf_work, amdgpu_virt_update_vf2pf_work_item);
+ schedule_delayed_work(&(adev->virt.vf2pf_work), msecs_to_jiffies(adev->virt.vf2pf_update_interval_ms));
+ } else if (adev->bios != NULL) {
+ /* got through this logic in early init stage to get necessary flags, e.g. rlcg_acc related*/
+ if (adev->virt.req_init_data_ver == GPU_CRIT_REGION_V2) {
+ pfvf_data =
+ kzalloc(adev->virt.crit_regn_tbl[AMD_SRIOV_MSG_DATAEXCHANGE_TABLE_ID].size_kb << 10,
+ GFP_KERNEL);
+ if (!pfvf_data) {
+ dev_err(adev->dev, "Failed to allocate memory for pfvf_data\n");
+ return;
+ }
+
+ if (amdgpu_virt_read_exchange_data_from_mem(adev, pfvf_data))
+ goto free_pfvf_data;
+
+ adev->virt.fw_reserve.p_pf2vf =
+ (struct amd_sriov_msg_pf2vf_info_header *)pfvf_data;
+
+ amdgpu_virt_read_pf2vf_data(adev);
+
+free_pfvf_data:
+ kfree(pfvf_data);
+ pfvf_data = NULL;
+ adev->virt.fw_reserve.p_pf2vf = NULL;
+ } else {
+ adev->virt.fw_reserve.p_pf2vf =
+ (struct amd_sriov_msg_pf2vf_info_header *)
+ (adev->bios + (AMD_SRIOV_MSG_PF2VF_OFFSET_KB_V1 << 10));
+
+ amdgpu_virt_read_pf2vf_data(adev);
+ }
+ }
+}
- adev->virt.fw_reserve.p_pf2vf =
- (struct amd_sriov_msg_pf2vf_info_header *)
- (adev->mman.fw_vram_usage_va + (AMD_SRIOV_MSG_PF2VF_OFFSET_KB << 10));
- adev->virt.fw_reserve.p_vf2pf =
- (struct amd_sriov_msg_vf2pf_info_header *)
- (adev->mman.fw_vram_usage_va + (AMD_SRIOV_MSG_VF2PF_OFFSET_KB << 10));
+
+void amdgpu_virt_exchange_data(struct amdgpu_device *adev)
+{
+ uint64_t bp_block_offset = 0;
+ uint32_t bp_block_size = 0;
+ struct amd_sriov_msg_pf2vf_info *pf2vf_v2 = NULL;
+
+ if (adev->mman.fw_vram_usage_va || adev->mman.drv_vram_usage_va) {
+ if (adev->mman.fw_vram_usage_va) {
+ if (adev->virt.req_init_data_ver == GPU_CRIT_REGION_V2) {
+ adev->virt.fw_reserve.p_pf2vf =
+ (struct amd_sriov_msg_pf2vf_info_header *)
+ (adev->mman.fw_vram_usage_va +
+ adev->virt.crit_regn_tbl[AMD_SRIOV_MSG_DATAEXCHANGE_TABLE_ID].offset);
+ adev->virt.fw_reserve.p_vf2pf =
+ (struct amd_sriov_msg_vf2pf_info_header *)
+ (adev->mman.fw_vram_usage_va +
+ adev->virt.crit_regn_tbl[AMD_SRIOV_MSG_DATAEXCHANGE_TABLE_ID].offset +
+ (AMD_SRIOV_MSG_SIZE_KB << 10));
+ adev->virt.fw_reserve.ras_telemetry =
+ (adev->mman.fw_vram_usage_va +
+ adev->virt.crit_regn_tbl[AMD_SRIOV_MSG_RAS_TELEMETRY_TABLE_ID].offset);
+ } else {
+ adev->virt.fw_reserve.p_pf2vf =
+ (struct amd_sriov_msg_pf2vf_info_header *)
+ (adev->mman.fw_vram_usage_va + (AMD_SRIOV_MSG_PF2VF_OFFSET_KB_V1 << 10));
+ adev->virt.fw_reserve.p_vf2pf =
+ (struct amd_sriov_msg_vf2pf_info_header *)
+ (adev->mman.fw_vram_usage_va + (AMD_SRIOV_MSG_VF2PF_OFFSET_KB_V1 << 10));
+ adev->virt.fw_reserve.ras_telemetry =
+ (adev->mman.fw_vram_usage_va + (AMD_SRIOV_MSG_RAS_TELEMETRY_OFFSET_KB_V1 << 10));
+ }
+ } else if (adev->mman.drv_vram_usage_va) {
+ adev->virt.fw_reserve.p_pf2vf =
+ (struct amd_sriov_msg_pf2vf_info_header *)
+ (adev->mman.drv_vram_usage_va + (AMD_SRIOV_MSG_PF2VF_OFFSET_KB_V1 << 10));
+ adev->virt.fw_reserve.p_vf2pf =
+ (struct amd_sriov_msg_vf2pf_info_header *)
+ (adev->mman.drv_vram_usage_va + (AMD_SRIOV_MSG_VF2PF_OFFSET_KB_V1 << 10));
+ adev->virt.fw_reserve.ras_telemetry =
+ (adev->mman.drv_vram_usage_va + (AMD_SRIOV_MSG_RAS_TELEMETRY_OFFSET_KB_V1 << 10));
+ }
amdgpu_virt_read_pf2vf_data(adev);
amdgpu_virt_write_vf2pf_data(adev);
/* bad page handling for version 2 */
if (adev->virt.fw_reserve.p_pf2vf->version == 2) {
- pf2vf_v2 = (struct amd_sriov_msg_pf2vf_info *)adev->virt.fw_reserve.p_pf2vf;
+ pf2vf_v2 = (struct amd_sriov_msg_pf2vf_info *)adev->virt.fw_reserve.p_pf2vf;
- bp_block_offset = ((uint64_t)pf2vf_v2->bp_block_offset_low & 0xFFFFFFFF) |
- ((((uint64_t)pf2vf_v2->bp_block_offset_high) << 32) & 0xFFFFFFFF00000000);
- bp_block_size = pf2vf_v2->bp_block_size;
+ bp_block_offset = ((uint64_t)pf2vf_v2->bp_block_offset_low & 0xFFFFFFFF) |
+ ((((uint64_t)pf2vf_v2->bp_block_offset_high) << 32) & 0xFFFFFFFF00000000);
+ bp_block_size = pf2vf_v2->bp_block_size;
- if (bp_block_size && !adev->virt.ras_init_done)
- amdgpu_virt_init_ras_err_handler_data(adev);
+ if (bp_block_size && !adev->virt.ras_init_done)
+ amdgpu_virt_init_ras_err_handler_data(adev);
- if (adev->virt.ras_init_done)
- amdgpu_virt_add_bad_page(adev, bp_block_offset, bp_block_size);
- }
- }
-
- if (adev->virt.vf2pf_update_interval_ms != 0) {
- INIT_DELAYED_WORK(&adev->virt.vf2pf_work, amdgpu_virt_update_vf2pf_work_item);
- schedule_delayed_work(&(adev->virt.vf2pf_work), adev->virt.vf2pf_update_interval_ms);
+ if (adev->virt.ras_init_done)
+ amdgpu_virt_add_bad_page(adev, bp_block_offset, bp_block_size);
+ }
}
}
-void amdgpu_detect_virtualization(struct amdgpu_device *adev)
+static u32 amdgpu_virt_init_detect_asic(struct amdgpu_device *adev)
{
uint32_t reg;
@@ -640,6 +834,8 @@ void amdgpu_detect_virtualization(struct amdgpu_device *adev)
case CHIP_NAVI12:
case CHIP_SIENNA_CICHLID:
case CHIP_ARCTURUS:
+ case CHIP_ALDEBARAN:
+ case CHIP_IP_DISCOVERY:
reg = RREG32(mmRCC_IOV_FUNC_IDENTIFIER);
break;
default: /* other chip doesn't support SRIOV */
@@ -654,34 +850,296 @@ void amdgpu_detect_virtualization(struct amdgpu_device *adev)
adev->virt.caps |= AMDGPU_SRIOV_CAPS_ENABLE_IOV;
if (!reg) {
- if (is_virtual_machine()) /* passthrough mode exclus sriov mod */
+ /* passthrough mode exclus sriov mod */
+ if (is_virtual_machine() && !xen_initial_domain())
adev->virt.caps |= AMDGPU_PASSTHROUGH_MODE;
}
+ return reg;
+}
+
+static bool amdgpu_virt_init_req_data(struct amdgpu_device *adev, u32 reg)
+{
+ bool is_sriov = false;
+
/* we have the ability to check now */
if (amdgpu_sriov_vf(adev)) {
+ is_sriov = true;
+
switch (adev->asic_type) {
case CHIP_TONGA:
case CHIP_FIJI:
vi_set_virt_ops(adev);
break;
case CHIP_VEGA10:
+ soc15_set_virt_ops(adev);
+#ifdef CONFIG_X86
+ /* not send GPU_INIT_DATA with MS_HYPERV*/
+ if (!hypervisor_is_type(X86_HYPER_MS_HYPERV))
+#endif
+ /* send a dummy GPU_INIT_DATA request to host on vega10 */
+ amdgpu_virt_request_init_data(adev);
+ break;
case CHIP_VEGA20:
case CHIP_ARCTURUS:
+ case CHIP_ALDEBARAN:
soc15_set_virt_ops(adev);
break;
case CHIP_NAVI10:
case CHIP_NAVI12:
case CHIP_SIENNA_CICHLID:
+ case CHIP_IP_DISCOVERY:
nv_set_virt_ops(adev);
/* try send GPU_INIT_DATA request to host */
amdgpu_virt_request_init_data(adev);
break;
default: /* other chip doesn't support SRIOV */
- DRM_ERROR("Unknown asic type: %d!\n", adev->asic_type);
+ is_sriov = false;
+ dev_err(adev->dev, "Unknown asic type: %d!\n", adev->asic_type);
break;
}
}
+
+ return is_sriov;
+}
+
+static void amdgpu_virt_init_ras(struct amdgpu_device *adev)
+{
+ ratelimit_state_init(&adev->virt.ras.ras_error_cnt_rs, 5 * HZ, 1);
+ ratelimit_state_init(&adev->virt.ras.ras_cper_dump_rs, 5 * HZ, 1);
+ ratelimit_state_init(&adev->virt.ras.ras_chk_criti_rs, 5 * HZ, 1);
+
+ ratelimit_set_flags(&adev->virt.ras.ras_error_cnt_rs,
+ RATELIMIT_MSG_ON_RELEASE);
+ ratelimit_set_flags(&adev->virt.ras.ras_cper_dump_rs,
+ RATELIMIT_MSG_ON_RELEASE);
+ ratelimit_set_flags(&adev->virt.ras.ras_chk_criti_rs,
+ RATELIMIT_MSG_ON_RELEASE);
+
+ mutex_init(&adev->virt.ras.ras_telemetry_mutex);
+ mutex_init(&adev->virt.access_req_mutex);
+
+ adev->virt.ras.cper_rptr = 0;
+}
+
+static uint8_t amdgpu_virt_crit_region_calc_checksum(uint8_t *buf_start, uint8_t *buf_end)
+{
+ uint32_t sum = 0;
+
+ if (buf_start >= buf_end)
+ return 0;
+
+ for (; buf_start < buf_end; buf_start++)
+ sum += buf_start[0];
+
+ return 0xffffffff - sum;
+}
+
+int amdgpu_virt_init_critical_region(struct amdgpu_device *adev)
+{
+ struct amd_sriov_msg_init_data_header *init_data_hdr = NULL;
+ u64 init_hdr_offset = adev->virt.init_data_header.offset;
+ u64 init_hdr_size = (u64)adev->virt.init_data_header.size_kb << 10; /* KB → bytes */
+ u64 vram_size;
+ u64 end;
+ int r = 0;
+ uint8_t checksum = 0;
+
+ /* Skip below init if critical region version != v2 */
+ if (adev->virt.req_init_data_ver != GPU_CRIT_REGION_V2)
+ return 0;
+
+ if (init_hdr_offset < 0) {
+ dev_err(adev->dev, "Invalid init header offset\n");
+ return -EINVAL;
+ }
+
+ vram_size = RREG32(mmRCC_CONFIG_MEMSIZE);
+ if (!vram_size || vram_size == U32_MAX)
+ return -EINVAL;
+ vram_size <<= 20;
+
+ if (check_add_overflow(init_hdr_offset, init_hdr_size, &end) || end > vram_size) {
+ dev_err(adev->dev, "init_data_header exceeds VRAM size, exiting\n");
+ return -EINVAL;
+ }
+
+ /* Allocate for init_data_hdr */
+ init_data_hdr = kzalloc(sizeof(struct amd_sriov_msg_init_data_header), GFP_KERNEL);
+ if (!init_data_hdr)
+ return -ENOMEM;
+
+ amdgpu_device_vram_access(adev, (uint64_t)init_hdr_offset, (uint32_t *)init_data_hdr,
+ sizeof(struct amd_sriov_msg_init_data_header), false);
+
+ /* Table validation */
+ if (strncmp(init_data_hdr->signature,
+ AMDGPU_SRIOV_CRIT_DATA_SIGNATURE,
+ AMDGPU_SRIOV_CRIT_DATA_SIG_LEN) != 0) {
+ dev_err(adev->dev, "Invalid init data signature: %.4s\n",
+ init_data_hdr->signature);
+ r = -EINVAL;
+ goto out;
+ }
+
+ checksum = amdgpu_virt_crit_region_calc_checksum(
+ (uint8_t *)&init_data_hdr->initdata_offset,
+ (uint8_t *)init_data_hdr +
+ sizeof(struct amd_sriov_msg_init_data_header));
+ if (checksum != init_data_hdr->checksum) {
+ dev_err(adev->dev, "Found unmatching checksum from calculation 0x%x and init_data 0x%x\n",
+ checksum, init_data_hdr->checksum);
+ r = -EINVAL;
+ goto out;
+ }
+
+ memset(&adev->virt.crit_regn, 0, sizeof(adev->virt.crit_regn));
+ memset(adev->virt.crit_regn_tbl, 0, sizeof(adev->virt.crit_regn_tbl));
+
+ adev->virt.crit_regn.offset = init_data_hdr->initdata_offset;
+ adev->virt.crit_regn.size_kb = init_data_hdr->initdata_size_in_kb;
+
+ /* Validation and initialization for each table entry */
+ if (IS_SRIOV_CRIT_REGN_ENTRY_VALID(init_data_hdr, AMD_SRIOV_MSG_IPD_TABLE_ID)) {
+ if (!init_data_hdr->ip_discovery_size_in_kb ||
+ init_data_hdr->ip_discovery_size_in_kb > DISCOVERY_TMR_SIZE) {
+ dev_err(adev->dev, "Invalid %s size: 0x%x\n",
+ amdgpu_virt_dynamic_crit_table_name[AMD_SRIOV_MSG_IPD_TABLE_ID],
+ init_data_hdr->ip_discovery_size_in_kb);
+ r = -EINVAL;
+ goto out;
+ }
+
+ adev->virt.crit_regn_tbl[AMD_SRIOV_MSG_IPD_TABLE_ID].offset =
+ init_data_hdr->ip_discovery_offset;
+ adev->virt.crit_regn_tbl[AMD_SRIOV_MSG_IPD_TABLE_ID].size_kb =
+ init_data_hdr->ip_discovery_size_in_kb;
+ }
+
+ if (IS_SRIOV_CRIT_REGN_ENTRY_VALID(init_data_hdr, AMD_SRIOV_MSG_VBIOS_IMG_TABLE_ID)) {
+ if (!init_data_hdr->vbios_img_size_in_kb) {
+ dev_err(adev->dev, "Invalid %s size: 0x%x\n",
+ amdgpu_virt_dynamic_crit_table_name[AMD_SRIOV_MSG_VBIOS_IMG_TABLE_ID],
+ init_data_hdr->vbios_img_size_in_kb);
+ r = -EINVAL;
+ goto out;
+ }
+
+ adev->virt.crit_regn_tbl[AMD_SRIOV_MSG_VBIOS_IMG_TABLE_ID].offset =
+ init_data_hdr->vbios_img_offset;
+ adev->virt.crit_regn_tbl[AMD_SRIOV_MSG_VBIOS_IMG_TABLE_ID].size_kb =
+ init_data_hdr->vbios_img_size_in_kb;
+ }
+
+ if (IS_SRIOV_CRIT_REGN_ENTRY_VALID(init_data_hdr, AMD_SRIOV_MSG_RAS_TELEMETRY_TABLE_ID)) {
+ if (!init_data_hdr->ras_tele_info_size_in_kb) {
+ dev_err(adev->dev, "Invalid %s size: 0x%x\n",
+ amdgpu_virt_dynamic_crit_table_name[AMD_SRIOV_MSG_RAS_TELEMETRY_TABLE_ID],
+ init_data_hdr->ras_tele_info_size_in_kb);
+ r = -EINVAL;
+ goto out;
+ }
+
+ adev->virt.crit_regn_tbl[AMD_SRIOV_MSG_RAS_TELEMETRY_TABLE_ID].offset =
+ init_data_hdr->ras_tele_info_offset;
+ adev->virt.crit_regn_tbl[AMD_SRIOV_MSG_RAS_TELEMETRY_TABLE_ID].size_kb =
+ init_data_hdr->ras_tele_info_size_in_kb;
+ }
+
+ if (IS_SRIOV_CRIT_REGN_ENTRY_VALID(init_data_hdr, AMD_SRIOV_MSG_DATAEXCHANGE_TABLE_ID)) {
+ if (!init_data_hdr->dataexchange_size_in_kb) {
+ dev_err(adev->dev, "Invalid %s size: 0x%x\n",
+ amdgpu_virt_dynamic_crit_table_name[AMD_SRIOV_MSG_DATAEXCHANGE_TABLE_ID],
+ init_data_hdr->dataexchange_size_in_kb);
+ r = -EINVAL;
+ goto out;
+ }
+
+ adev->virt.crit_regn_tbl[AMD_SRIOV_MSG_DATAEXCHANGE_TABLE_ID].offset =
+ init_data_hdr->dataexchange_offset;
+ adev->virt.crit_regn_tbl[AMD_SRIOV_MSG_DATAEXCHANGE_TABLE_ID].size_kb =
+ init_data_hdr->dataexchange_size_in_kb;
+ }
+
+ if (IS_SRIOV_CRIT_REGN_ENTRY_VALID(init_data_hdr, AMD_SRIOV_MSG_BAD_PAGE_INFO_TABLE_ID)) {
+ if (!init_data_hdr->bad_page_size_in_kb) {
+ dev_err(adev->dev, "Invalid %s size: 0x%x\n",
+ amdgpu_virt_dynamic_crit_table_name[AMD_SRIOV_MSG_BAD_PAGE_INFO_TABLE_ID],
+ init_data_hdr->bad_page_size_in_kb);
+ r = -EINVAL;
+ goto out;
+ }
+
+ adev->virt.crit_regn_tbl[AMD_SRIOV_MSG_BAD_PAGE_INFO_TABLE_ID].offset =
+ init_data_hdr->bad_page_info_offset;
+ adev->virt.crit_regn_tbl[AMD_SRIOV_MSG_BAD_PAGE_INFO_TABLE_ID].size_kb =
+ init_data_hdr->bad_page_size_in_kb;
+ }
+
+ /* Validation for critical region info */
+ if (adev->virt.crit_regn_tbl[AMD_SRIOV_MSG_IPD_TABLE_ID].size_kb > DISCOVERY_TMR_SIZE) {
+ dev_err(adev->dev, "Invalid IP discovery size: 0x%x\n",
+ adev->virt.crit_regn_tbl[AMD_SRIOV_MSG_IPD_TABLE_ID].size_kb);
+ r = -EINVAL;
+ goto out;
+ }
+
+ /* reserved memory starts from crit region base offset with the size of 5MB */
+ adev->mman.fw_vram_usage_start_offset = adev->virt.crit_regn.offset;
+ adev->mman.fw_vram_usage_size = adev->virt.crit_regn.size_kb << 10;
+ dev_info(adev->dev,
+ "critical region v%d requested to reserve memory start at %08llx with %llu KB.\n",
+ init_data_hdr->version,
+ adev->mman.fw_vram_usage_start_offset,
+ adev->mman.fw_vram_usage_size >> 10);
+
+ adev->virt.is_dynamic_crit_regn_enabled = true;
+
+out:
+ kfree(init_data_hdr);
+ init_data_hdr = NULL;
+
+ return r;
+}
+
+int amdgpu_virt_get_dynamic_data_info(struct amdgpu_device *adev,
+ int data_id, uint8_t *binary, u32 *size)
+{
+ uint32_t data_offset = 0;
+ uint32_t data_size = 0;
+ enum amd_sriov_msg_table_id_enum data_table_id = data_id;
+
+ if (data_table_id >= AMD_SRIOV_MSG_MAX_TABLE_ID)
+ return -EINVAL;
+
+ data_offset = adev->virt.crit_regn_tbl[data_table_id].offset;
+ data_size = adev->virt.crit_regn_tbl[data_table_id].size_kb << 10;
+
+ /* Validate on input params */
+ if (!binary || !size || *size < (uint64_t)data_size)
+ return -EINVAL;
+
+ /* Proceed to copy the dynamic content */
+ amdgpu_device_vram_access(adev,
+ (uint64_t)data_offset, (uint32_t *)binary, data_size, false);
+ *size = (uint64_t)data_size;
+
+ dev_dbg(adev->dev,
+ "Got %s info from dynamic crit_region_table at offset 0x%x with size of 0x%x bytes.\n",
+ amdgpu_virt_dynamic_crit_table_name[data_id], data_offset, data_size);
+
+ return 0;
+}
+
+void amdgpu_virt_init(struct amdgpu_device *adev)
+{
+ bool is_sriov = false;
+ uint32_t reg = amdgpu_virt_init_detect_asic(adev);
+
+ is_sriov = amdgpu_virt_init_req_data(adev, reg);
+
+ if (is_sriov)
+ amdgpu_virt_init_ras(adev);
}
static bool amdgpu_virt_access_debugfs_is_mmio(struct amdgpu_device *adev)
@@ -729,3 +1187,661 @@ enum amdgpu_sriov_vf_mode amdgpu_virt_get_sriov_vf_mode(struct amdgpu_device *ad
return mode;
}
+
+void amdgpu_virt_pre_reset(struct amdgpu_device *adev)
+{
+ /* stop the data exchange thread */
+ amdgpu_virt_fini_data_exchange(adev);
+ amdgpu_dpm_set_mp1_state(adev, PP_MP1_STATE_FLR);
+}
+
+void amdgpu_virt_post_reset(struct amdgpu_device *adev)
+{
+ if (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(11, 0, 3)) {
+ /* force set to GFXOFF state after reset,
+ * to avoid some invalid operation before GC enable
+ */
+ adev->gfx.is_poweron = false;
+ }
+
+ adev->mes.ring[0].sched.ready = false;
+}
+
+bool amdgpu_virt_fw_load_skip_check(struct amdgpu_device *adev, uint32_t ucode_id)
+{
+ switch (amdgpu_ip_version(adev, MP0_HWIP, 0)) {
+ case IP_VERSION(13, 0, 0):
+ /* no vf autoload, white list */
+ if (ucode_id == AMDGPU_UCODE_ID_VCN1 ||
+ ucode_id == AMDGPU_UCODE_ID_VCN)
+ return false;
+ else
+ return true;
+ case IP_VERSION(11, 0, 9):
+ case IP_VERSION(11, 0, 7):
+ /* black list for CHIP_NAVI12 and CHIP_SIENNA_CICHLID */
+ if (ucode_id == AMDGPU_UCODE_ID_RLC_G
+ || ucode_id == AMDGPU_UCODE_ID_RLC_RESTORE_LIST_CNTL
+ || ucode_id == AMDGPU_UCODE_ID_RLC_RESTORE_LIST_GPM_MEM
+ || ucode_id == AMDGPU_UCODE_ID_RLC_RESTORE_LIST_SRM_MEM
+ || ucode_id == AMDGPU_UCODE_ID_SMC)
+ return true;
+ else
+ return false;
+ case IP_VERSION(13, 0, 10):
+ /* white list */
+ if (ucode_id == AMDGPU_UCODE_ID_CAP
+ || ucode_id == AMDGPU_UCODE_ID_CP_RS64_PFP
+ || ucode_id == AMDGPU_UCODE_ID_CP_RS64_ME
+ || ucode_id == AMDGPU_UCODE_ID_CP_RS64_MEC
+ || ucode_id == AMDGPU_UCODE_ID_CP_RS64_PFP_P0_STACK
+ || ucode_id == AMDGPU_UCODE_ID_CP_RS64_PFP_P1_STACK
+ || ucode_id == AMDGPU_UCODE_ID_CP_RS64_ME_P0_STACK
+ || ucode_id == AMDGPU_UCODE_ID_CP_RS64_ME_P1_STACK
+ || ucode_id == AMDGPU_UCODE_ID_CP_RS64_MEC_P0_STACK
+ || ucode_id == AMDGPU_UCODE_ID_CP_RS64_MEC_P1_STACK
+ || ucode_id == AMDGPU_UCODE_ID_CP_RS64_MEC_P2_STACK
+ || ucode_id == AMDGPU_UCODE_ID_CP_RS64_MEC_P3_STACK
+ || ucode_id == AMDGPU_UCODE_ID_CP_MES
+ || ucode_id == AMDGPU_UCODE_ID_CP_MES_DATA
+ || ucode_id == AMDGPU_UCODE_ID_CP_MES1
+ || ucode_id == AMDGPU_UCODE_ID_CP_MES1_DATA
+ || ucode_id == AMDGPU_UCODE_ID_VCN1
+ || ucode_id == AMDGPU_UCODE_ID_VCN)
+ return false;
+ else
+ return true;
+ default:
+ /* lagacy black list */
+ if (ucode_id == AMDGPU_UCODE_ID_SDMA0
+ || ucode_id == AMDGPU_UCODE_ID_SDMA1
+ || ucode_id == AMDGPU_UCODE_ID_SDMA2
+ || ucode_id == AMDGPU_UCODE_ID_SDMA3
+ || ucode_id == AMDGPU_UCODE_ID_SDMA4
+ || ucode_id == AMDGPU_UCODE_ID_SDMA5
+ || ucode_id == AMDGPU_UCODE_ID_SDMA6
+ || ucode_id == AMDGPU_UCODE_ID_SDMA7
+ || ucode_id == AMDGPU_UCODE_ID_RLC_G
+ || ucode_id == AMDGPU_UCODE_ID_RLC_RESTORE_LIST_CNTL
+ || ucode_id == AMDGPU_UCODE_ID_RLC_RESTORE_LIST_GPM_MEM
+ || ucode_id == AMDGPU_UCODE_ID_RLC_RESTORE_LIST_SRM_MEM
+ || ucode_id == AMDGPU_UCODE_ID_SMC)
+ return true;
+ else
+ return false;
+ }
+}
+
+void amdgpu_virt_update_sriov_video_codec(struct amdgpu_device *adev,
+ struct amdgpu_video_codec_info *encode, uint32_t encode_array_size,
+ struct amdgpu_video_codec_info *decode, uint32_t decode_array_size)
+{
+ uint32_t i;
+
+ if (!adev->virt.is_mm_bw_enabled)
+ return;
+
+ if (encode) {
+ for (i = 0; i < encode_array_size; i++) {
+ encode[i].max_width = adev->virt.encode_max_dimension_pixels;
+ encode[i].max_pixels_per_frame = adev->virt.encode_max_frame_pixels;
+ if (encode[i].max_width > 0)
+ encode[i].max_height = encode[i].max_pixels_per_frame / encode[i].max_width;
+ else
+ encode[i].max_height = 0;
+ }
+ }
+
+ if (decode) {
+ for (i = 0; i < decode_array_size; i++) {
+ decode[i].max_width = adev->virt.decode_max_dimension_pixels;
+ decode[i].max_pixels_per_frame = adev->virt.decode_max_frame_pixels;
+ if (decode[i].max_width > 0)
+ decode[i].max_height = decode[i].max_pixels_per_frame / decode[i].max_width;
+ else
+ decode[i].max_height = 0;
+ }
+ }
+}
+
+bool amdgpu_virt_get_rlcg_reg_access_flag(struct amdgpu_device *adev,
+ u32 acc_flags, u32 hwip,
+ bool write, u32 *rlcg_flag)
+{
+ bool ret = false;
+
+ switch (hwip) {
+ case GC_HWIP:
+ if (amdgpu_sriov_reg_indirect_gc(adev)) {
+ *rlcg_flag =
+ write ? AMDGPU_RLCG_GC_WRITE : AMDGPU_RLCG_GC_READ;
+ ret = true;
+ /* only in new version, AMDGPU_REGS_NO_KIQ and
+ * AMDGPU_REGS_RLC are enabled simultaneously */
+ } else if ((acc_flags & AMDGPU_REGS_RLC) &&
+ !(acc_flags & AMDGPU_REGS_NO_KIQ) && write) {
+ *rlcg_flag = AMDGPU_RLCG_GC_WRITE_LEGACY;
+ ret = true;
+ }
+ break;
+ case MMHUB_HWIP:
+ if (amdgpu_sriov_reg_indirect_mmhub(adev) &&
+ (acc_flags & AMDGPU_REGS_RLC) && write) {
+ *rlcg_flag = AMDGPU_RLCG_MMHUB_WRITE;
+ ret = true;
+ }
+ break;
+ default:
+ break;
+ }
+ return ret;
+}
+
+u32 amdgpu_virt_rlcg_reg_rw(struct amdgpu_device *adev, u32 offset, u32 v, u32 flag, u32 xcc_id)
+{
+ struct amdgpu_rlcg_reg_access_ctrl *reg_access_ctrl;
+ uint32_t timeout = 50000;
+ uint32_t i, tmp;
+ uint32_t ret = 0;
+ void *scratch_reg0;
+ void *scratch_reg1;
+ void *scratch_reg2;
+ void *scratch_reg3;
+ void *spare_int;
+ unsigned long flags;
+
+ if (!adev->gfx.rlc.rlcg_reg_access_supported) {
+ dev_err(adev->dev,
+ "indirect registers access through rlcg is not available\n");
+ return 0;
+ }
+
+ if (adev->gfx.xcc_mask && (((1 << xcc_id) & adev->gfx.xcc_mask) == 0)) {
+ dev_err(adev->dev, "invalid xcc\n");
+ return 0;
+ }
+
+ if (amdgpu_device_skip_hw_access(adev))
+ return 0;
+
+ reg_access_ctrl = &adev->gfx.rlc.reg_access_ctrl[xcc_id];
+ scratch_reg0 = (void __iomem *)adev->rmmio + 4 * reg_access_ctrl->scratch_reg0;
+ scratch_reg1 = (void __iomem *)adev->rmmio + 4 * reg_access_ctrl->scratch_reg1;
+ scratch_reg2 = (void __iomem *)adev->rmmio + 4 * reg_access_ctrl->scratch_reg2;
+ scratch_reg3 = (void __iomem *)adev->rmmio + 4 * reg_access_ctrl->scratch_reg3;
+
+ spin_lock_irqsave(&adev->virt.rlcg_reg_lock, flags);
+
+ if (reg_access_ctrl->spare_int)
+ spare_int = (void __iomem *)adev->rmmio + 4 * reg_access_ctrl->spare_int;
+
+ if (offset == reg_access_ctrl->grbm_cntl) {
+ /* if the target reg offset is grbm_cntl, write to scratch_reg2 */
+ writel(v, scratch_reg2);
+ if (flag == AMDGPU_RLCG_GC_WRITE_LEGACY)
+ writel(v, ((void __iomem *)adev->rmmio) + (offset * 4));
+ } else if (offset == reg_access_ctrl->grbm_idx) {
+ /* if the target reg offset is grbm_idx, write to scratch_reg3 */
+ writel(v, scratch_reg3);
+ if (flag == AMDGPU_RLCG_GC_WRITE_LEGACY)
+ writel(v, ((void __iomem *)adev->rmmio) + (offset * 4));
+ } else {
+ /*
+ * SCRATCH_REG0 = read/write value
+ * SCRATCH_REG1[30:28] = command
+ * SCRATCH_REG1[19:0] = address in dword
+ * SCRATCH_REG1[27:24] = Error reporting
+ */
+ writel(v, scratch_reg0);
+ writel((offset | flag), scratch_reg1);
+ if (reg_access_ctrl->spare_int)
+ writel(1, spare_int);
+
+ for (i = 0; i < timeout; i++) {
+ tmp = readl(scratch_reg1);
+ if (!(tmp & AMDGPU_RLCG_SCRATCH1_ADDRESS_MASK))
+ break;
+ udelay(10);
+ }
+
+ tmp = readl(scratch_reg1);
+ if (i >= timeout || (tmp & AMDGPU_RLCG_SCRATCH1_ERROR_MASK) != 0) {
+ if (amdgpu_sriov_rlcg_error_report_enabled(adev)) {
+ if (tmp & AMDGPU_RLCG_VFGATE_DISABLED) {
+ dev_err(adev->dev,
+ "vfgate is disabled, rlcg failed to program reg: 0x%05x\n", offset);
+ } else if (tmp & AMDGPU_RLCG_WRONG_OPERATION_TYPE) {
+ dev_err(adev->dev,
+ "wrong operation type, rlcg failed to program reg: 0x%05x\n", offset);
+ } else if (tmp & AMDGPU_RLCG_REG_NOT_IN_RANGE) {
+ dev_err(adev->dev,
+ "register is not in range, rlcg failed to program reg: 0x%05x\n", offset);
+ } else {
+ dev_err(adev->dev,
+ "unknown error type, rlcg failed to program reg: 0x%05x\n", offset);
+ }
+ } else {
+ dev_err(adev->dev,
+ "timeout: rlcg faled to program reg: 0x%05x\n", offset);
+ }
+ }
+ }
+
+ ret = readl(scratch_reg0);
+
+ spin_unlock_irqrestore(&adev->virt.rlcg_reg_lock, flags);
+
+ return ret;
+}
+
+void amdgpu_sriov_wreg(struct amdgpu_device *adev,
+ u32 offset, u32 value,
+ u32 acc_flags, u32 hwip, u32 xcc_id)
+{
+ u32 rlcg_flag;
+
+ if (amdgpu_device_skip_hw_access(adev))
+ return;
+
+ if (!amdgpu_sriov_runtime(adev) &&
+ amdgpu_virt_get_rlcg_reg_access_flag(adev, acc_flags, hwip, true, &rlcg_flag)) {
+ amdgpu_virt_rlcg_reg_rw(adev, offset, value, rlcg_flag, xcc_id);
+ return;
+ }
+
+ if (acc_flags & AMDGPU_REGS_NO_KIQ)
+ WREG32_NO_KIQ(offset, value);
+ else
+ WREG32(offset, value);
+}
+
+u32 amdgpu_sriov_rreg(struct amdgpu_device *adev,
+ u32 offset, u32 acc_flags, u32 hwip, u32 xcc_id)
+{
+ u32 rlcg_flag;
+
+ if (amdgpu_device_skip_hw_access(adev))
+ return 0;
+
+ if (!amdgpu_sriov_runtime(adev) &&
+ amdgpu_virt_get_rlcg_reg_access_flag(adev, acc_flags, hwip, false, &rlcg_flag))
+ return amdgpu_virt_rlcg_reg_rw(adev, offset, 0, rlcg_flag, xcc_id);
+
+ if (acc_flags & AMDGPU_REGS_NO_KIQ)
+ return RREG32_NO_KIQ(offset);
+ else
+ return RREG32(offset);
+}
+
+bool amdgpu_sriov_xnack_support(struct amdgpu_device *adev)
+{
+ bool xnack_mode = true;
+
+ if (amdgpu_sriov_vf(adev) &&
+ amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 2))
+ xnack_mode = false;
+
+ return xnack_mode;
+}
+
+bool amdgpu_virt_get_ras_capability(struct amdgpu_device *adev)
+{
+ struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
+
+ if (!amdgpu_sriov_ras_caps_en(adev))
+ return false;
+
+ if (adev->virt.ras_en_caps.bits.block_umc)
+ adev->ras_hw_enabled |= BIT(AMDGPU_RAS_BLOCK__UMC);
+ if (adev->virt.ras_en_caps.bits.block_sdma)
+ adev->ras_hw_enabled |= BIT(AMDGPU_RAS_BLOCK__SDMA);
+ if (adev->virt.ras_en_caps.bits.block_gfx)
+ adev->ras_hw_enabled |= BIT(AMDGPU_RAS_BLOCK__GFX);
+ if (adev->virt.ras_en_caps.bits.block_mmhub)
+ adev->ras_hw_enabled |= BIT(AMDGPU_RAS_BLOCK__MMHUB);
+ if (adev->virt.ras_en_caps.bits.block_athub)
+ adev->ras_hw_enabled |= BIT(AMDGPU_RAS_BLOCK__ATHUB);
+ if (adev->virt.ras_en_caps.bits.block_pcie_bif)
+ adev->ras_hw_enabled |= BIT(AMDGPU_RAS_BLOCK__PCIE_BIF);
+ if (adev->virt.ras_en_caps.bits.block_hdp)
+ adev->ras_hw_enabled |= BIT(AMDGPU_RAS_BLOCK__HDP);
+ if (adev->virt.ras_en_caps.bits.block_xgmi_wafl)
+ adev->ras_hw_enabled |= BIT(AMDGPU_RAS_BLOCK__XGMI_WAFL);
+ if (adev->virt.ras_en_caps.bits.block_df)
+ adev->ras_hw_enabled |= BIT(AMDGPU_RAS_BLOCK__DF);
+ if (adev->virt.ras_en_caps.bits.block_smn)
+ adev->ras_hw_enabled |= BIT(AMDGPU_RAS_BLOCK__SMN);
+ if (adev->virt.ras_en_caps.bits.block_sem)
+ adev->ras_hw_enabled |= BIT(AMDGPU_RAS_BLOCK__SEM);
+ if (adev->virt.ras_en_caps.bits.block_mp0)
+ adev->ras_hw_enabled |= BIT(AMDGPU_RAS_BLOCK__MP0);
+ if (adev->virt.ras_en_caps.bits.block_mp1)
+ adev->ras_hw_enabled |= BIT(AMDGPU_RAS_BLOCK__MP1);
+ if (adev->virt.ras_en_caps.bits.block_fuse)
+ adev->ras_hw_enabled |= BIT(AMDGPU_RAS_BLOCK__FUSE);
+ if (adev->virt.ras_en_caps.bits.block_mca)
+ adev->ras_hw_enabled |= BIT(AMDGPU_RAS_BLOCK__MCA);
+ if (adev->virt.ras_en_caps.bits.block_vcn)
+ adev->ras_hw_enabled |= BIT(AMDGPU_RAS_BLOCK__VCN);
+ if (adev->virt.ras_en_caps.bits.block_jpeg)
+ adev->ras_hw_enabled |= BIT(AMDGPU_RAS_BLOCK__JPEG);
+ if (adev->virt.ras_en_caps.bits.block_ih)
+ adev->ras_hw_enabled |= BIT(AMDGPU_RAS_BLOCK__IH);
+ if (adev->virt.ras_en_caps.bits.block_mpio)
+ adev->ras_hw_enabled |= BIT(AMDGPU_RAS_BLOCK__MPIO);
+
+ if (adev->virt.ras_en_caps.bits.poison_propogation_mode)
+ con->poison_supported = true; /* Poison is handled by host */
+
+ return true;
+}
+
+static inline enum amd_sriov_ras_telemetry_gpu_block
+amdgpu_ras_block_to_sriov(struct amdgpu_device *adev, enum amdgpu_ras_block block) {
+ switch (block) {
+ case AMDGPU_RAS_BLOCK__UMC:
+ return RAS_TELEMETRY_GPU_BLOCK_UMC;
+ case AMDGPU_RAS_BLOCK__SDMA:
+ return RAS_TELEMETRY_GPU_BLOCK_SDMA;
+ case AMDGPU_RAS_BLOCK__GFX:
+ return RAS_TELEMETRY_GPU_BLOCK_GFX;
+ case AMDGPU_RAS_BLOCK__MMHUB:
+ return RAS_TELEMETRY_GPU_BLOCK_MMHUB;
+ case AMDGPU_RAS_BLOCK__ATHUB:
+ return RAS_TELEMETRY_GPU_BLOCK_ATHUB;
+ case AMDGPU_RAS_BLOCK__PCIE_BIF:
+ return RAS_TELEMETRY_GPU_BLOCK_PCIE_BIF;
+ case AMDGPU_RAS_BLOCK__HDP:
+ return RAS_TELEMETRY_GPU_BLOCK_HDP;
+ case AMDGPU_RAS_BLOCK__XGMI_WAFL:
+ return RAS_TELEMETRY_GPU_BLOCK_XGMI_WAFL;
+ case AMDGPU_RAS_BLOCK__DF:
+ return RAS_TELEMETRY_GPU_BLOCK_DF;
+ case AMDGPU_RAS_BLOCK__SMN:
+ return RAS_TELEMETRY_GPU_BLOCK_SMN;
+ case AMDGPU_RAS_BLOCK__SEM:
+ return RAS_TELEMETRY_GPU_BLOCK_SEM;
+ case AMDGPU_RAS_BLOCK__MP0:
+ return RAS_TELEMETRY_GPU_BLOCK_MP0;
+ case AMDGPU_RAS_BLOCK__MP1:
+ return RAS_TELEMETRY_GPU_BLOCK_MP1;
+ case AMDGPU_RAS_BLOCK__FUSE:
+ return RAS_TELEMETRY_GPU_BLOCK_FUSE;
+ case AMDGPU_RAS_BLOCK__MCA:
+ return RAS_TELEMETRY_GPU_BLOCK_MCA;
+ case AMDGPU_RAS_BLOCK__VCN:
+ return RAS_TELEMETRY_GPU_BLOCK_VCN;
+ case AMDGPU_RAS_BLOCK__JPEG:
+ return RAS_TELEMETRY_GPU_BLOCK_JPEG;
+ case AMDGPU_RAS_BLOCK__IH:
+ return RAS_TELEMETRY_GPU_BLOCK_IH;
+ case AMDGPU_RAS_BLOCK__MPIO:
+ return RAS_TELEMETRY_GPU_BLOCK_MPIO;
+ default:
+ dev_warn(adev->dev, "Unsupported SRIOV RAS telemetry block 0x%x\n",
+ block);
+ return RAS_TELEMETRY_GPU_BLOCK_COUNT;
+ }
+}
+
+static int amdgpu_virt_cache_host_error_counts(struct amdgpu_device *adev,
+ struct amdsriov_ras_telemetry *host_telemetry)
+{
+ struct amd_sriov_ras_telemetry_error_count *tmp = NULL;
+ uint32_t checksum, used_size;
+
+ checksum = host_telemetry->header.checksum;
+ used_size = host_telemetry->header.used_size;
+
+ if (used_size > (AMD_SRIOV_MSG_RAS_TELEMETRY_SIZE_KB_V1 << 10))
+ return 0;
+
+ tmp = kmemdup(&host_telemetry->body.error_count, used_size, GFP_KERNEL);
+ if (!tmp)
+ return -ENOMEM;
+
+ if (checksum != amd_sriov_msg_checksum(tmp, used_size, 0, 0))
+ goto out;
+
+ memcpy(&adev->virt.count_cache, tmp,
+ min(used_size, sizeof(adev->virt.count_cache)));
+out:
+ kfree(tmp);
+
+ return 0;
+}
+
+static int amdgpu_virt_req_ras_err_count_internal(struct amdgpu_device *adev, bool force_update)
+{
+ struct amdgpu_virt *virt = &adev->virt;
+
+ if (!virt->ops || !virt->ops->req_ras_err_count)
+ return -EOPNOTSUPP;
+
+ /* Host allows 15 ras telemetry requests per 60 seconds. Afterwhich, the Host
+ * will ignore incoming guest messages. Ratelimit the guest messages to
+ * prevent guest self DOS.
+ */
+ if (__ratelimit(&virt->ras.ras_error_cnt_rs) || force_update) {
+ mutex_lock(&virt->ras.ras_telemetry_mutex);
+ if (!virt->ops->req_ras_err_count(adev))
+ amdgpu_virt_cache_host_error_counts(adev,
+ virt->fw_reserve.ras_telemetry);
+ mutex_unlock(&virt->ras.ras_telemetry_mutex);
+ }
+
+ return 0;
+}
+
+/* Bypass ACA interface and query ECC counts directly from host */
+int amdgpu_virt_req_ras_err_count(struct amdgpu_device *adev, enum amdgpu_ras_block block,
+ struct ras_err_data *err_data)
+{
+ enum amd_sriov_ras_telemetry_gpu_block sriov_block;
+
+ sriov_block = amdgpu_ras_block_to_sriov(adev, block);
+
+ if (sriov_block >= RAS_TELEMETRY_GPU_BLOCK_COUNT ||
+ !amdgpu_sriov_ras_telemetry_block_en(adev, sriov_block))
+ return -EOPNOTSUPP;
+
+ /* Host Access may be lost during reset, just return last cached data. */
+ if (down_read_trylock(&adev->reset_domain->sem)) {
+ amdgpu_virt_req_ras_err_count_internal(adev, false);
+ up_read(&adev->reset_domain->sem);
+ }
+
+ err_data->ue_count = adev->virt.count_cache.block[sriov_block].ue_count;
+ err_data->ce_count = adev->virt.count_cache.block[sriov_block].ce_count;
+ err_data->de_count = adev->virt.count_cache.block[sriov_block].de_count;
+
+ return 0;
+}
+
+static int
+amdgpu_virt_write_cpers_to_ring(struct amdgpu_device *adev,
+ struct amdsriov_ras_telemetry *host_telemetry,
+ u32 *more)
+{
+ struct amd_sriov_ras_cper_dump *cper_dump = NULL;
+ struct cper_hdr *entry = NULL;
+ struct amdgpu_ring *ring = &adev->cper.ring_buf;
+ uint32_t checksum, used_size, i;
+ int ret = 0;
+
+ checksum = host_telemetry->header.checksum;
+ used_size = host_telemetry->header.used_size;
+
+ if (used_size > (AMD_SRIOV_MSG_RAS_TELEMETRY_SIZE_KB_V1 << 10))
+ return -EINVAL;
+
+ cper_dump = kmemdup(&host_telemetry->body.cper_dump, used_size, GFP_KERNEL);
+ if (!cper_dump)
+ return -ENOMEM;
+
+ if (checksum != amd_sriov_msg_checksum(cper_dump, used_size, 0, 0)) {
+ ret = -EINVAL;
+ goto out;
+ }
+
+ *more = cper_dump->more;
+
+ if (cper_dump->wptr < adev->virt.ras.cper_rptr) {
+ dev_warn(
+ adev->dev,
+ "guest specified rptr that was too high! guest rptr: 0x%llx, host rptr: 0x%llx\n",
+ adev->virt.ras.cper_rptr, cper_dump->wptr);
+
+ adev->virt.ras.cper_rptr = cper_dump->wptr;
+ goto out;
+ }
+
+ entry = (struct cper_hdr *)&cper_dump->buf[0];
+
+ for (i = 0; i < cper_dump->count; i++) {
+ amdgpu_cper_ring_write(ring, entry, entry->record_length);
+ entry = (struct cper_hdr *)((char *)entry +
+ entry->record_length);
+ }
+
+ if (cper_dump->overflow_count)
+ dev_warn(adev->dev,
+ "host reported CPER overflow of 0x%llx entries!\n",
+ cper_dump->overflow_count);
+
+ adev->virt.ras.cper_rptr = cper_dump->wptr;
+out:
+ kfree(cper_dump);
+
+ return ret;
+}
+
+static int amdgpu_virt_req_ras_cper_dump_internal(struct amdgpu_device *adev)
+{
+ struct amdgpu_virt *virt = &adev->virt;
+ int ret = 0;
+ uint32_t more = 0;
+
+ if (!virt->ops || !virt->ops->req_ras_cper_dump)
+ return -EOPNOTSUPP;
+
+ do {
+ if (!virt->ops->req_ras_cper_dump(adev, virt->ras.cper_rptr))
+ ret = amdgpu_virt_write_cpers_to_ring(
+ adev, virt->fw_reserve.ras_telemetry, &more);
+ else
+ ret = 0;
+ } while (more && !ret);
+
+ return ret;
+}
+
+int amdgpu_virt_req_ras_cper_dump(struct amdgpu_device *adev, bool force_update)
+{
+ struct amdgpu_virt *virt = &adev->virt;
+ int ret = 0;
+
+ if (!amdgpu_sriov_ras_cper_en(adev))
+ return -EOPNOTSUPP;
+
+ if ((__ratelimit(&virt->ras.ras_cper_dump_rs) || force_update) &&
+ down_read_trylock(&adev->reset_domain->sem)) {
+ mutex_lock(&virt->ras.ras_telemetry_mutex);
+ ret = amdgpu_virt_req_ras_cper_dump_internal(adev);
+ mutex_unlock(&virt->ras.ras_telemetry_mutex);
+ up_read(&adev->reset_domain->sem);
+ }
+
+ return ret;
+}
+
+int amdgpu_virt_ras_telemetry_post_reset(struct amdgpu_device *adev)
+{
+ unsigned long ue_count, ce_count;
+
+ if (amdgpu_sriov_ras_telemetry_en(adev)) {
+ amdgpu_virt_req_ras_err_count_internal(adev, true);
+ amdgpu_ras_query_error_count(adev, &ce_count, &ue_count, NULL);
+ }
+
+ return 0;
+}
+
+bool amdgpu_virt_ras_telemetry_block_en(struct amdgpu_device *adev,
+ enum amdgpu_ras_block block)
+{
+ enum amd_sriov_ras_telemetry_gpu_block sriov_block;
+
+ sriov_block = amdgpu_ras_block_to_sriov(adev, block);
+
+ if (sriov_block >= RAS_TELEMETRY_GPU_BLOCK_COUNT ||
+ !amdgpu_sriov_ras_telemetry_block_en(adev, sriov_block))
+ return false;
+
+ return true;
+}
+
+/*
+ * amdgpu_virt_request_bad_pages() - request bad pages
+ * @adev: amdgpu device.
+ * Send command to GPU hypervisor to write new bad pages into the shared PF2VF region
+ */
+void amdgpu_virt_request_bad_pages(struct amdgpu_device *adev)
+{
+ struct amdgpu_virt *virt = &adev->virt;
+
+ if (virt->ops && virt->ops->req_bad_pages)
+ virt->ops->req_bad_pages(adev);
+}
+
+static int amdgpu_virt_cache_chk_criti_hit(struct amdgpu_device *adev,
+ struct amdsriov_ras_telemetry *host_telemetry,
+ bool *hit)
+{
+ struct amd_sriov_ras_chk_criti *tmp = NULL;
+ uint32_t checksum, used_size;
+
+ checksum = host_telemetry->header.checksum;
+ used_size = host_telemetry->header.used_size;
+
+ if (used_size > (AMD_SRIOV_MSG_RAS_TELEMETRY_SIZE_KB_V1 << 10))
+ return 0;
+
+ tmp = kmemdup(&host_telemetry->body.chk_criti, used_size, GFP_KERNEL);
+ if (!tmp)
+ return -ENOMEM;
+
+ if (checksum != amd_sriov_msg_checksum(tmp, used_size, 0, 0))
+ goto out;
+
+ if (hit)
+ *hit = tmp->hit ? true : false;
+
+out:
+ kfree(tmp);
+
+ return 0;
+}
+
+int amdgpu_virt_check_vf_critical_region(struct amdgpu_device *adev, u64 addr, bool *hit)
+{
+ struct amdgpu_virt *virt = &adev->virt;
+ int r = -EPERM;
+
+ if (!virt->ops || !virt->ops->req_ras_chk_criti)
+ return -EOPNOTSUPP;
+
+ /* Host allows 15 ras telemetry requests per 60 seconds. Afterwhich, the Host
+ * will ignore incoming guest messages. Ratelimit the guest messages to
+ * prevent guest self DOS.
+ */
+ if (__ratelimit(&virt->ras.ras_chk_criti_rs)) {
+ mutex_lock(&virt->ras.ras_telemetry_mutex);
+ if (!virt->ops->req_ras_chk_criti(adev, addr))
+ r = amdgpu_virt_cache_chk_criti_hit(
+ adev, virt->fw_reserve.ras_telemetry, hit);
+ mutex_unlock(&virt->ras.ras_telemetry_mutex);
+ }
+
+ return r;
+}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h
index 8dd624c20f89..01d5bca2dee1 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h
@@ -31,12 +31,35 @@
#define AMDGPU_SRIOV_CAPS_IS_VF (1 << 2) /* this GPU is a virtual function */
#define AMDGPU_PASSTHROUGH_MODE (1 << 3) /* thw whole GPU is pass through for VM */
#define AMDGPU_SRIOV_CAPS_RUNTIME (1 << 4) /* is out of full access mode */
+#define AMDGPU_VF_MMIO_ACCESS_PROTECT (1 << 5) /* MMIO write access is not allowed in sriov runtime */
+
+/* flags for indirect register access path supported by rlcg for sriov */
+#define AMDGPU_RLCG_GC_WRITE_LEGACY (0x8 << 28)
+#define AMDGPU_RLCG_GC_WRITE (0x0 << 28)
+#define AMDGPU_RLCG_GC_READ (0x1 << 28)
+#define AMDGPU_RLCG_MMHUB_WRITE (0x2 << 28)
+
+/* error code for indirect register access path supported by rlcg for sriov */
+#define AMDGPU_RLCG_VFGATE_DISABLED 0x4000000
+#define AMDGPU_RLCG_WRONG_OPERATION_TYPE 0x2000000
+#define AMDGPU_RLCG_REG_NOT_IN_RANGE 0x1000000
+
+#define AMDGPU_RLCG_SCRATCH1_ADDRESS_MASK 0xFFFFF
+#define AMDGPU_RLCG_SCRATCH1_ERROR_MASK 0xF000000
/* all asic after AI use this offset */
#define mmRCC_IOV_FUNC_IDENTIFIER 0xDE5
/* tonga/fiji use this offset */
#define mmBIF_IOV_FUNC_IDENTIFIER 0x1503
+#define AMDGPU_VF2PF_UPDATE_MAX_RETRY_LIMIT 2
+
+/* Signature used to validate the SR-IOV dynamic critical region init data header ("INDA") */
+#define AMDGPU_SRIOV_CRIT_DATA_SIGNATURE "INDA"
+#define AMDGPU_SRIOV_CRIT_DATA_SIG_LEN 4
+
+#define IS_SRIOV_CRIT_REGN_ENTRY_VALID(hdr, id) ((hdr)->valid_tables & (1 << (id)))
+
enum amdgpu_sriov_vf_mode {
SRIOV_VF_MODE_BARE_METAL = 0,
SRIOV_VF_MODE_ONE_VF,
@@ -61,6 +84,8 @@ struct amdgpu_vf_error_buffer {
uint64_t data[AMDGPU_VF_ERROR_ENTRY_SIZE];
};
+enum idh_request;
+
/**
* struct amdgpu_virt_ops - amdgpu device virt operations
*/
@@ -69,8 +94,17 @@ struct amdgpu_virt_ops {
int (*rel_full_gpu)(struct amdgpu_device *adev, bool init);
int (*req_init_data)(struct amdgpu_device *adev);
int (*reset_gpu)(struct amdgpu_device *adev);
+ void (*ready_to_reset)(struct amdgpu_device *adev);
int (*wait_reset)(struct amdgpu_device *adev);
- void (*trans_msg)(struct amdgpu_device *adev, u32 req, u32 data1, u32 data2, u32 data3);
+ void (*trans_msg)(struct amdgpu_device *adev, enum idh_request req,
+ u32 data1, u32 data2, u32 data3);
+ void (*ras_poison_handler)(struct amdgpu_device *adev,
+ enum amdgpu_ras_block block);
+ bool (*rcvd_ras_intr)(struct amdgpu_device *adev);
+ int (*req_ras_err_count)(struct amdgpu_device *adev);
+ int (*req_ras_cper_dump)(struct amdgpu_device *adev, u64 vf_rptr);
+ int (*req_bad_pages)(struct amdgpu_device *adev);
+ int (*req_ras_chk_criti)(struct amdgpu_device *adev, u64 addr);
};
/*
@@ -79,6 +113,7 @@ struct amdgpu_virt_ops {
struct amdgpu_virt_fw_reserve {
struct amd_sriov_msg_pf2vf_info_header *p_pf2vf;
struct amd_sriov_msg_vf2pf_info_header *p_vf2pf;
+ void *ras_telemetry;
unsigned int checksum_key;
};
@@ -104,6 +139,31 @@ enum AMDGIM_FEATURE_FLAG {
AMDGIM_FEATURE_GIM_MM_BW_MGR = 0x8,
/* PP ONE VF MODE in GIM */
AMDGIM_FEATURE_PP_ONE_VF = (1 << 4),
+ /* Indirect Reg Access enabled */
+ AMDGIM_FEATURE_INDIRECT_REG_ACCESS = (1 << 5),
+ /* AV1 Support MODE*/
+ AMDGIM_FEATURE_AV1_SUPPORT = (1 << 6),
+ /* VCN RB decouple */
+ AMDGIM_FEATURE_VCN_RB_DECOUPLE = (1 << 7),
+ /* MES info */
+ AMDGIM_FEATURE_MES_INFO_ENABLE = (1 << 8),
+ AMDGIM_FEATURE_RAS_CAPS = (1 << 9),
+ AMDGIM_FEATURE_RAS_TELEMETRY = (1 << 10),
+ AMDGIM_FEATURE_RAS_CPER = (1 << 11),
+ AMDGIM_FEATURE_XGMI_TA_EXT_PEER_LINK = (1 << 12),
+};
+
+enum AMDGIM_REG_ACCESS_FLAG {
+ /* Use PSP to program IH_RB_CNTL */
+ AMDGIM_FEATURE_IH_REG_PSP_EN = (1 << 0),
+ /* Use RLC to program MMHUB regs */
+ AMDGIM_FEATURE_MMHUB_REG_RLC_EN = (1 << 1),
+ /* Use RLC to program GC regs */
+ AMDGIM_FEATURE_GC_REG_RLC_EN = (1 << 2),
+ /* Use PSP to program L1_TLB_CNTL */
+ AMDGIM_FEATURE_L1_TLB_CNTL_PSP_EN = (1 << 3),
+ /* Use RLCG to program SQ_CONFIG1 */
+ AMDGIM_FEATURE_REG_ACCESS_SQ_CONFIG = (1 << 4),
};
struct amdgim_pf2vf_info_v1 {
@@ -197,6 +257,23 @@ struct amdgpu_virt_ras_err_handler_data {
int last_reserved;
};
+struct amdgpu_virt_ras {
+ struct ratelimit_state ras_error_cnt_rs;
+ struct ratelimit_state ras_cper_dump_rs;
+ struct ratelimit_state ras_chk_criti_rs;
+ struct mutex ras_telemetry_mutex;
+ uint64_t cper_rptr;
+};
+
+#define AMDGPU_VIRT_CAPS_LIST(X) X(AMDGPU_VIRT_CAP_POWER_LIMIT)
+
+DECLARE_ATTR_CAP_CLASS(amdgpu_virt, AMDGPU_VIRT_CAPS_LIST);
+
+struct amdgpu_virt_region {
+ uint32_t offset;
+ uint32_t size_kb;
+};
+
/* GPU virtualization */
struct amdgpu_virt {
uint32_t caps;
@@ -206,23 +283,61 @@ struct amdgpu_virt {
uint32_t reg_val_offs;
struct amdgpu_irq_src ack_irq;
struct amdgpu_irq_src rcv_irq;
+
struct work_struct flr_work;
+ struct work_struct req_bad_pages_work;
+ struct work_struct handle_bad_pages_work;
+
struct amdgpu_mm_table mm_table;
const struct amdgpu_virt_ops *ops;
struct amdgpu_vf_error_buffer vf_errors;
struct amdgpu_virt_fw_reserve fw_reserve;
+ struct amdgpu_virt_caps virt_caps;
uint32_t gim_feature;
uint32_t reg_access_mode;
int req_init_data_ver;
bool tdr_debug;
struct amdgpu_virt_ras_err_handler_data *virt_eh_data;
bool ras_init_done;
+ uint32_t reg_access;
+
+ /* dynamic(v2) critical regions */
+ struct amdgpu_virt_region init_data_header;
+ struct amdgpu_virt_region crit_regn;
+ struct amdgpu_virt_region crit_regn_tbl[AMD_SRIOV_MSG_MAX_TABLE_ID];
+ bool is_dynamic_crit_regn_enabled;
/* vf2pf message */
struct delayed_work vf2pf_work;
uint32_t vf2pf_update_interval_ms;
+ int vf2pf_update_retry_cnt;
+
+ /* multimedia bandwidth config */
+ bool is_mm_bw_enabled;
+ uint32_t decode_max_dimension_pixels;
+ uint32_t decode_max_frame_pixels;
+ uint32_t encode_max_dimension_pixels;
+ uint32_t encode_max_frame_pixels;
+
+ /* the ucode id to signal the autoload */
+ uint32_t autoload_ucode_id;
+
+ /* Spinlock to protect access to the RLCG register interface */
+ spinlock_t rlcg_reg_lock;
+
+ struct mutex access_req_mutex;
+
+ union amd_sriov_ras_caps ras_en_caps;
+ union amd_sriov_ras_caps ras_telemetry_en_caps;
+ struct amdgpu_virt_ras ras;
+ struct amd_sriov_ras_telemetry_error_count count_cache;
+
+ /* hibernate and resume with different VF feature for xgmi enabled system */
+ bool is_xgmi_node_migrate_enabled;
};
+struct amdgpu_video_codec_info;
+
#define amdgpu_sriov_enabled(adev) \
((adev)->virt.caps & AMDGPU_SRIOV_CAPS_ENABLE_IOV)
@@ -238,13 +353,60 @@ struct amdgpu_virt {
#define amdgpu_sriov_fullaccess(adev) \
(amdgpu_sriov_vf((adev)) && !amdgpu_sriov_runtime((adev)))
+#define amdgpu_sriov_reg_indirect_en(adev) \
+(amdgpu_sriov_vf((adev)) && \
+ ((adev)->virt.gim_feature & (AMDGIM_FEATURE_INDIRECT_REG_ACCESS)))
+
+#define amdgpu_sriov_reg_indirect_ih(adev) \
+(amdgpu_sriov_vf((adev)) && \
+ ((adev)->virt.reg_access & (AMDGIM_FEATURE_IH_REG_PSP_EN)))
+
+#define amdgpu_sriov_reg_indirect_mmhub(adev) \
+(amdgpu_sriov_vf((adev)) && \
+ ((adev)->virt.reg_access & (AMDGIM_FEATURE_MMHUB_REG_RLC_EN)))
+
+#define amdgpu_sriov_reg_indirect_gc(adev) \
+(amdgpu_sriov_vf((adev)) && \
+ ((adev)->virt.reg_access & (AMDGIM_FEATURE_GC_REG_RLC_EN)))
+
+#define amdgpu_sriov_reg_indirect_l1_tlb_cntl(adev) \
+(amdgpu_sriov_vf((adev)) && \
+ ((adev)->virt.reg_access & (AMDGIM_FEATURE_L1_TLB_CNTL_PSP_EN)))
+
+#define amdgpu_sriov_rlcg_error_report_enabled(adev) \
+ (amdgpu_sriov_reg_indirect_mmhub(adev) || amdgpu_sriov_reg_indirect_gc(adev))
+
+#define amdgpu_sriov_reg_access_sq_config(adev) \
+(amdgpu_sriov_vf((adev)) && \
+ ((adev)->virt.reg_access & (AMDGIM_FEATURE_REG_ACCESS_SQ_CONFIG)))
+
#define amdgpu_passthrough(adev) \
((adev)->virt.caps & AMDGPU_PASSTHROUGH_MODE)
+#define amdgpu_sriov_vf_mmio_access_protection(adev) \
+((adev)->virt.caps & AMDGPU_VF_MMIO_ACCESS_PROTECT)
+
+#define amdgpu_sriov_ras_caps_en(adev) \
+((adev)->virt.gim_feature & AMDGIM_FEATURE_RAS_CAPS)
+
+#define amdgpu_sriov_ras_telemetry_en(adev) \
+(((adev)->virt.gim_feature & AMDGIM_FEATURE_RAS_TELEMETRY) && (adev)->virt.fw_reserve.ras_telemetry)
+
+#define amdgpu_sriov_ras_telemetry_block_en(adev, sriov_blk) \
+(amdgpu_sriov_ras_telemetry_en((adev)) && (adev)->virt.ras_telemetry_en_caps.all & BIT(sriov_blk))
+
+#define amdgpu_sriov_ras_cper_en(adev) \
+((adev)->virt.gim_feature & AMDGIM_FEATURE_RAS_CPER)
+
+#define amdgpu_sriov_xgmi_ta_ext_peer_link_en(adev) \
+((adev)->virt.gim_feature & AMDGIM_FEATURE_XGMI_TA_EXT_PEER_LINK)
+
static inline bool is_virtual_machine(void)
{
-#ifdef CONFIG_X86
+#if defined(CONFIG_X86)
return boot_cpu_has(X86_FEATURE_HYPERVISOR);
+#elif defined(CONFIG_ARM64)
+ return !is_kernel_in_hyp_mode();
#else
return false;
#endif
@@ -252,31 +414,73 @@ static inline bool is_virtual_machine(void)
#define amdgpu_sriov_is_pp_one_vf(adev) \
((adev)->virt.gim_feature & AMDGIM_FEATURE_PP_ONE_VF)
+#define amdgpu_sriov_multi_vf_mode(adev) \
+ (amdgpu_sriov_vf(adev) && !amdgpu_sriov_is_pp_one_vf(adev))
#define amdgpu_sriov_is_debug(adev) \
((!amdgpu_in_reset(adev)) && adev->virt.tdr_debug)
#define amdgpu_sriov_is_normal(adev) \
((!amdgpu_in_reset(adev)) && (!adev->virt.tdr_debug))
+#define amdgpu_sriov_is_av1_support(adev) \
+ ((adev)->virt.gim_feature & AMDGIM_FEATURE_AV1_SUPPORT)
+#define amdgpu_sriov_is_vcn_rb_decouple(adev) \
+ ((adev)->virt.gim_feature & AMDGIM_FEATURE_VCN_RB_DECOUPLE)
+#define amdgpu_sriov_is_mes_info_enable(adev) \
+ ((adev)->virt.gim_feature & AMDGIM_FEATURE_MES_INFO_ENABLE)
+
+#define amdgpu_virt_xgmi_migrate_enabled(adev) \
+ ((adev)->virt.is_xgmi_node_migrate_enabled && (adev)->gmc.xgmi.node_segment_size != 0)
bool amdgpu_virt_mmio_blocked(struct amdgpu_device *adev);
void amdgpu_virt_init_setting(struct amdgpu_device *adev);
-void amdgpu_virt_kiq_reg_write_reg_wait(struct amdgpu_device *adev,
- uint32_t reg0, uint32_t rreg1,
- uint32_t ref, uint32_t mask);
int amdgpu_virt_request_full_gpu(struct amdgpu_device *adev, bool init);
int amdgpu_virt_release_full_gpu(struct amdgpu_device *adev, bool init);
int amdgpu_virt_reset_gpu(struct amdgpu_device *adev);
void amdgpu_virt_request_init_data(struct amdgpu_device *adev);
+void amdgpu_virt_ready_to_reset(struct amdgpu_device *adev);
int amdgpu_virt_wait_reset(struct amdgpu_device *adev);
int amdgpu_virt_alloc_mm_table(struct amdgpu_device *adev);
void amdgpu_virt_free_mm_table(struct amdgpu_device *adev);
+bool amdgpu_virt_rcvd_ras_interrupt(struct amdgpu_device *adev);
void amdgpu_virt_release_ras_err_handler_data(struct amdgpu_device *adev);
void amdgpu_virt_init_data_exchange(struct amdgpu_device *adev);
+void amdgpu_virt_exchange_data(struct amdgpu_device *adev);
void amdgpu_virt_fini_data_exchange(struct amdgpu_device *adev);
-void amdgpu_detect_virtualization(struct amdgpu_device *adev);
+void amdgpu_virt_init(struct amdgpu_device *adev);
+
+int amdgpu_virt_init_critical_region(struct amdgpu_device *adev);
+int amdgpu_virt_get_dynamic_data_info(struct amdgpu_device *adev,
+ int data_id, uint8_t *binary, u32 *size);
bool amdgpu_virt_can_access_debugfs(struct amdgpu_device *adev);
int amdgpu_virt_enable_access_debugfs(struct amdgpu_device *adev);
void amdgpu_virt_disable_access_debugfs(struct amdgpu_device *adev);
enum amdgpu_sriov_vf_mode amdgpu_virt_get_sriov_vf_mode(struct amdgpu_device *adev);
+
+void amdgpu_virt_update_sriov_video_codec(struct amdgpu_device *adev,
+ struct amdgpu_video_codec_info *encode, uint32_t encode_array_size,
+ struct amdgpu_video_codec_info *decode, uint32_t decode_array_size);
+void amdgpu_sriov_wreg(struct amdgpu_device *adev,
+ u32 offset, u32 value,
+ u32 acc_flags, u32 hwip, u32 xcc_id);
+u32 amdgpu_sriov_rreg(struct amdgpu_device *adev,
+ u32 offset, u32 acc_flags, u32 hwip, u32 xcc_id);
+bool amdgpu_virt_fw_load_skip_check(struct amdgpu_device *adev,
+ uint32_t ucode_id);
+void amdgpu_virt_pre_reset(struct amdgpu_device *adev);
+void amdgpu_virt_post_reset(struct amdgpu_device *adev);
+bool amdgpu_sriov_xnack_support(struct amdgpu_device *adev);
+bool amdgpu_virt_get_rlcg_reg_access_flag(struct amdgpu_device *adev,
+ u32 acc_flags, u32 hwip,
+ bool write, u32 *rlcg_flag);
+u32 amdgpu_virt_rlcg_reg_rw(struct amdgpu_device *adev, u32 offset, u32 v, u32 flag, u32 xcc_id);
+bool amdgpu_virt_get_ras_capability(struct amdgpu_device *adev);
+int amdgpu_virt_req_ras_err_count(struct amdgpu_device *adev, enum amdgpu_ras_block block,
+ struct ras_err_data *err_data);
+int amdgpu_virt_req_ras_cper_dump(struct amdgpu_device *adev, bool force_update);
+int amdgpu_virt_ras_telemetry_post_reset(struct amdgpu_device *adev);
+bool amdgpu_virt_ras_telemetry_block_en(struct amdgpu_device *adev,
+ enum amdgpu_ras_block block);
+void amdgpu_virt_request_bad_pages(struct amdgpu_device *adev);
+int amdgpu_virt_check_vf_critical_region(struct amdgpu_device *adev, u64 addr, bool *hit);
#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vkms.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vkms.c
new file mode 100644
index 000000000000..79bad9cbe2ab
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vkms.c
@@ -0,0 +1,659 @@
+// SPDX-License-Identifier: GPL-2.0+
+
+#include <drm/drm_atomic_helper.h>
+#include <drm/drm_edid.h>
+#include <drm/drm_simple_kms_helper.h>
+#include <drm/drm_gem_framebuffer_helper.h>
+#include <drm/drm_vblank.h>
+
+#include "amdgpu.h"
+#ifdef CONFIG_DRM_AMDGPU_SI
+#include "dce_v6_0.h"
+#endif
+#ifdef CONFIG_DRM_AMDGPU_CIK
+#include "dce_v8_0.h"
+#endif
+#include "dce_v10_0.h"
+#include "ivsrcid/ivsrcid_vislands30.h"
+#include "amdgpu_vkms.h"
+#include "amdgpu_display.h"
+#include "atom.h"
+#include "amdgpu_irq.h"
+
+/**
+ * DOC: amdgpu_vkms
+ *
+ * The amdgpu vkms interface provides a virtual KMS interface for several use
+ * cases: devices without display hardware, platforms where the actual display
+ * hardware is not useful (e.g., servers), SR-IOV virtual functions, device
+ * emulation/simulation, and device bring up prior to display hardware being
+ * usable. We previously emulated a legacy KMS interface, but there was a desire
+ * to move to the atomic KMS interface. The vkms driver did everything we
+ * needed, but we wanted KMS support natively in the driver without buffer
+ * sharing and the ability to support an instance of VKMS per device. We first
+ * looked at splitting vkms into a stub driver and a helper module that other
+ * drivers could use to implement a virtual display, but this strategy ended up
+ * being messy due to driver specific callbacks needed for buffer management.
+ * Ultimately, it proved easier to import the vkms code as it mostly used core
+ * drm helpers anyway.
+ */
+
+static const u32 amdgpu_vkms_formats[] = {
+ DRM_FORMAT_XRGB8888,
+};
+
+static enum hrtimer_restart amdgpu_vkms_vblank_simulate(struct hrtimer *timer)
+{
+ struct amdgpu_crtc *amdgpu_crtc = container_of(timer, struct amdgpu_crtc, vblank_timer);
+ struct drm_crtc *crtc = &amdgpu_crtc->base;
+ struct amdgpu_vkms_output *output = drm_crtc_to_amdgpu_vkms_output(crtc);
+ u64 ret_overrun;
+ bool ret;
+
+ ret_overrun = hrtimer_forward_now(&amdgpu_crtc->vblank_timer,
+ output->period_ns);
+ if (ret_overrun != 1)
+ DRM_WARN("%s: vblank timer overrun\n", __func__);
+
+ ret = drm_crtc_handle_vblank(crtc);
+ /* Don't queue timer again when vblank is disabled. */
+ if (!ret)
+ return HRTIMER_NORESTART;
+
+ return HRTIMER_RESTART;
+}
+
+static int amdgpu_vkms_enable_vblank(struct drm_crtc *crtc)
+{
+ struct drm_vblank_crtc *vblank = drm_crtc_vblank_crtc(crtc);
+ struct amdgpu_vkms_output *out = drm_crtc_to_amdgpu_vkms_output(crtc);
+ struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc);
+
+ drm_calc_timestamping_constants(crtc, &crtc->mode);
+
+ out->period_ns = ktime_set(0, vblank->framedur_ns);
+ hrtimer_start(&amdgpu_crtc->vblank_timer, out->period_ns, HRTIMER_MODE_REL);
+
+ return 0;
+}
+
+static void amdgpu_vkms_disable_vblank(struct drm_crtc *crtc)
+{
+ struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc);
+
+ hrtimer_try_to_cancel(&amdgpu_crtc->vblank_timer);
+}
+
+static bool amdgpu_vkms_get_vblank_timestamp(struct drm_crtc *crtc,
+ int *max_error,
+ ktime_t *vblank_time,
+ bool in_vblank_irq)
+{
+ struct amdgpu_vkms_output *output = drm_crtc_to_amdgpu_vkms_output(crtc);
+ struct drm_vblank_crtc *vblank = drm_crtc_vblank_crtc(crtc);
+ struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc);
+
+ if (!READ_ONCE(vblank->enabled)) {
+ *vblank_time = ktime_get();
+ return true;
+ }
+
+ *vblank_time = READ_ONCE(amdgpu_crtc->vblank_timer.node.expires);
+
+ if (WARN_ON(*vblank_time == vblank->time))
+ return true;
+
+ /*
+ * To prevent races we roll the hrtimer forward before we do any
+ * interrupt processing - this is how real hw works (the interrupt is
+ * only generated after all the vblank registers are updated) and what
+ * the vblank core expects. Therefore we need to always correct the
+ * timestampe by one frame.
+ */
+ *vblank_time -= output->period_ns;
+
+ return true;
+}
+
+static const struct drm_crtc_funcs amdgpu_vkms_crtc_funcs = {
+ .set_config = drm_atomic_helper_set_config,
+ .destroy = drm_crtc_cleanup,
+ .page_flip = drm_atomic_helper_page_flip,
+ .reset = drm_atomic_helper_crtc_reset,
+ .atomic_duplicate_state = drm_atomic_helper_crtc_duplicate_state,
+ .atomic_destroy_state = drm_atomic_helper_crtc_destroy_state,
+ .enable_vblank = amdgpu_vkms_enable_vblank,
+ .disable_vblank = amdgpu_vkms_disable_vblank,
+ .get_vblank_timestamp = amdgpu_vkms_get_vblank_timestamp,
+};
+
+static void amdgpu_vkms_crtc_atomic_enable(struct drm_crtc *crtc,
+ struct drm_atomic_state *state)
+{
+ drm_crtc_vblank_on(crtc);
+}
+
+static void amdgpu_vkms_crtc_atomic_disable(struct drm_crtc *crtc,
+ struct drm_atomic_state *state)
+{
+ drm_crtc_vblank_off(crtc);
+}
+
+static void amdgpu_vkms_crtc_atomic_flush(struct drm_crtc *crtc,
+ struct drm_atomic_state *state)
+{
+ unsigned long flags;
+ if (crtc->state->event) {
+ spin_lock_irqsave(&crtc->dev->event_lock, flags);
+
+ if (drm_crtc_vblank_get(crtc) != 0)
+ drm_crtc_send_vblank_event(crtc, crtc->state->event);
+ else
+ drm_crtc_arm_vblank_event(crtc, crtc->state->event);
+
+ spin_unlock_irqrestore(&crtc->dev->event_lock, flags);
+
+ crtc->state->event = NULL;
+ }
+}
+
+static const struct drm_crtc_helper_funcs amdgpu_vkms_crtc_helper_funcs = {
+ .atomic_flush = amdgpu_vkms_crtc_atomic_flush,
+ .atomic_enable = amdgpu_vkms_crtc_atomic_enable,
+ .atomic_disable = amdgpu_vkms_crtc_atomic_disable,
+};
+
+static int amdgpu_vkms_crtc_init(struct drm_device *dev, struct drm_crtc *crtc,
+ struct drm_plane *primary, struct drm_plane *cursor)
+{
+ struct amdgpu_device *adev = drm_to_adev(dev);
+ struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc);
+ int ret;
+
+ ret = drm_crtc_init_with_planes(dev, crtc, primary, cursor,
+ &amdgpu_vkms_crtc_funcs, NULL);
+ if (ret) {
+ DRM_ERROR("Failed to init CRTC\n");
+ return ret;
+ }
+
+ drm_crtc_helper_add(crtc, &amdgpu_vkms_crtc_helper_funcs);
+
+ amdgpu_crtc->crtc_id = drm_crtc_index(crtc);
+ adev->mode_info.crtcs[drm_crtc_index(crtc)] = amdgpu_crtc;
+
+ amdgpu_crtc->pll_id = ATOM_PPLL_INVALID;
+ amdgpu_crtc->encoder = NULL;
+ amdgpu_crtc->connector = NULL;
+ amdgpu_crtc->vsync_timer_enabled = AMDGPU_IRQ_STATE_DISABLE;
+
+ hrtimer_setup(&amdgpu_crtc->vblank_timer, &amdgpu_vkms_vblank_simulate, CLOCK_MONOTONIC,
+ HRTIMER_MODE_REL);
+
+ return ret;
+}
+
+static const struct drm_connector_funcs amdgpu_vkms_connector_funcs = {
+ .fill_modes = drm_helper_probe_single_connector_modes,
+ .destroy = drm_connector_cleanup,
+ .reset = drm_atomic_helper_connector_reset,
+ .atomic_duplicate_state = drm_atomic_helper_connector_duplicate_state,
+ .atomic_destroy_state = drm_atomic_helper_connector_destroy_state,
+};
+
+static int amdgpu_vkms_conn_get_modes(struct drm_connector *connector)
+{
+ struct drm_device *dev = connector->dev;
+ struct drm_display_mode *mode = NULL;
+ unsigned i;
+ static const struct mode_size {
+ int w;
+ int h;
+ } common_modes[] = {
+ { 640, 480},
+ { 720, 480},
+ { 800, 600},
+ { 848, 480},
+ {1024, 768},
+ {1152, 768},
+ {1280, 720},
+ {1280, 800},
+ {1280, 854},
+ {1280, 960},
+ {1280, 1024},
+ {1440, 900},
+ {1400, 1050},
+ {1680, 1050},
+ {1600, 1200},
+ {1920, 1080},
+ {1920, 1200},
+ {2560, 1440},
+ {4096, 3112},
+ {3656, 2664},
+ {3840, 2160},
+ {4096, 2160},
+ };
+
+ for (i = 0; i < ARRAY_SIZE(common_modes); i++) {
+ mode = drm_cvt_mode(dev, common_modes[i].w, common_modes[i].h, 60, false, false, false);
+ if (!mode)
+ continue;
+ drm_mode_probed_add(connector, mode);
+ }
+
+ drm_set_preferred_mode(connector, XRES_DEF, YRES_DEF);
+
+ return ARRAY_SIZE(common_modes);
+}
+
+static const struct drm_connector_helper_funcs amdgpu_vkms_conn_helper_funcs = {
+ .get_modes = amdgpu_vkms_conn_get_modes,
+};
+
+static const struct drm_plane_funcs amdgpu_vkms_plane_funcs = {
+ .update_plane = drm_atomic_helper_update_plane,
+ .disable_plane = drm_atomic_helper_disable_plane,
+ .destroy = drm_plane_cleanup,
+ .reset = drm_atomic_helper_plane_reset,
+ .atomic_duplicate_state = drm_atomic_helper_plane_duplicate_state,
+ .atomic_destroy_state = drm_atomic_helper_plane_destroy_state,
+};
+
+static void amdgpu_vkms_plane_atomic_update(struct drm_plane *plane,
+ struct drm_atomic_state *old_state)
+{
+ return;
+}
+
+static int amdgpu_vkms_plane_atomic_check(struct drm_plane *plane,
+ struct drm_atomic_state *state)
+{
+ struct drm_plane_state *new_plane_state = drm_atomic_get_new_plane_state(state,
+ plane);
+ struct drm_crtc_state *crtc_state;
+ int ret;
+
+ if (!new_plane_state->fb || WARN_ON(!new_plane_state->crtc))
+ return 0;
+
+ crtc_state = drm_atomic_get_crtc_state(state,
+ new_plane_state->crtc);
+ if (IS_ERR(crtc_state))
+ return PTR_ERR(crtc_state);
+
+ ret = drm_atomic_helper_check_plane_state(new_plane_state, crtc_state,
+ DRM_PLANE_NO_SCALING,
+ DRM_PLANE_NO_SCALING,
+ false, true);
+ if (ret != 0)
+ return ret;
+
+ /* for now primary plane must be visible and full screen */
+ if (!new_plane_state->visible)
+ return -EINVAL;
+
+ return 0;
+}
+
+static int amdgpu_vkms_prepare_fb(struct drm_plane *plane,
+ struct drm_plane_state *new_state)
+{
+ struct amdgpu_framebuffer *afb;
+ struct drm_gem_object *obj;
+ struct amdgpu_device *adev;
+ struct amdgpu_bo *rbo;
+ uint32_t domain;
+ int r;
+
+ if (!new_state->fb) {
+ DRM_DEBUG_KMS("No FB bound\n");
+ return 0;
+ }
+ afb = to_amdgpu_framebuffer(new_state->fb);
+
+ obj = drm_gem_fb_get_obj(new_state->fb, 0);
+ if (!obj) {
+ DRM_ERROR("Failed to get obj from framebuffer\n");
+ return -EINVAL;
+ }
+
+ rbo = gem_to_amdgpu_bo(obj);
+ adev = amdgpu_ttm_adev(rbo->tbo.bdev);
+
+ r = amdgpu_bo_reserve(rbo, true);
+ if (r) {
+ dev_err(adev->dev, "fail to reserve bo (%d)\n", r);
+ return r;
+ }
+
+ r = dma_resv_reserve_fences(rbo->tbo.base.resv, 1);
+ if (r) {
+ dev_err(adev->dev, "allocating fence slot failed (%d)\n", r);
+ goto error_unlock;
+ }
+
+ if (plane->type != DRM_PLANE_TYPE_CURSOR)
+ domain = amdgpu_display_supported_domains(adev, rbo->flags);
+ else
+ domain = AMDGPU_GEM_DOMAIN_VRAM;
+
+ rbo->flags |= AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS;
+ r = amdgpu_bo_pin(rbo, domain);
+ if (unlikely(r != 0)) {
+ if (r != -ERESTARTSYS)
+ DRM_ERROR("Failed to pin framebuffer with error %d\n", r);
+ goto error_unlock;
+ }
+
+ r = amdgpu_ttm_alloc_gart(&rbo->tbo);
+ if (unlikely(r != 0)) {
+ DRM_ERROR("%p bind failed\n", rbo);
+ goto error_unpin;
+ }
+
+ amdgpu_bo_unreserve(rbo);
+
+ afb->address = amdgpu_bo_gpu_offset(rbo);
+
+ amdgpu_bo_ref(rbo);
+
+ return 0;
+
+error_unpin:
+ amdgpu_bo_unpin(rbo);
+
+error_unlock:
+ amdgpu_bo_unreserve(rbo);
+ return r;
+}
+
+static void amdgpu_vkms_cleanup_fb(struct drm_plane *plane,
+ struct drm_plane_state *old_state)
+{
+ struct amdgpu_bo *rbo;
+ struct drm_gem_object *obj;
+ int r;
+
+ if (!old_state->fb)
+ return;
+
+ obj = drm_gem_fb_get_obj(old_state->fb, 0);
+ if (!obj) {
+ DRM_ERROR("Failed to get obj from framebuffer\n");
+ return;
+ }
+
+ rbo = gem_to_amdgpu_bo(obj);
+ r = amdgpu_bo_reserve(rbo, false);
+ if (unlikely(r)) {
+ DRM_ERROR("failed to reserve rbo before unpin\n");
+ return;
+ }
+
+ amdgpu_bo_unpin(rbo);
+ amdgpu_bo_unreserve(rbo);
+ amdgpu_bo_unref(&rbo);
+}
+
+static const struct drm_plane_helper_funcs amdgpu_vkms_primary_helper_funcs = {
+ .atomic_update = amdgpu_vkms_plane_atomic_update,
+ .atomic_check = amdgpu_vkms_plane_atomic_check,
+ .prepare_fb = amdgpu_vkms_prepare_fb,
+ .cleanup_fb = amdgpu_vkms_cleanup_fb,
+};
+
+static struct drm_plane *amdgpu_vkms_plane_init(struct drm_device *dev,
+ enum drm_plane_type type,
+ int index)
+{
+ struct drm_plane *plane;
+ int ret;
+
+ plane = kzalloc(sizeof(*plane), GFP_KERNEL);
+ if (!plane)
+ return ERR_PTR(-ENOMEM);
+
+ ret = drm_universal_plane_init(dev, plane, 1 << index,
+ &amdgpu_vkms_plane_funcs,
+ amdgpu_vkms_formats,
+ ARRAY_SIZE(amdgpu_vkms_formats),
+ NULL, type, NULL);
+ if (ret) {
+ kfree(plane);
+ return ERR_PTR(ret);
+ }
+
+ drm_plane_helper_add(plane, &amdgpu_vkms_primary_helper_funcs);
+
+ return plane;
+}
+
+static int amdgpu_vkms_output_init(struct drm_device *dev, struct
+ amdgpu_vkms_output *output, int index)
+{
+ struct drm_connector *connector = &output->connector;
+ struct drm_encoder *encoder = &output->encoder;
+ struct drm_crtc *crtc = &output->crtc.base;
+ struct drm_plane *primary, *cursor = NULL;
+ int ret;
+
+ primary = amdgpu_vkms_plane_init(dev, DRM_PLANE_TYPE_PRIMARY, index);
+ if (IS_ERR(primary))
+ return PTR_ERR(primary);
+
+ ret = amdgpu_vkms_crtc_init(dev, crtc, primary, cursor);
+ if (ret)
+ goto err_crtc;
+
+ ret = drm_connector_init(dev, connector, &amdgpu_vkms_connector_funcs,
+ DRM_MODE_CONNECTOR_VIRTUAL);
+ if (ret) {
+ DRM_ERROR("Failed to init connector\n");
+ goto err_connector;
+ }
+
+ drm_connector_helper_add(connector, &amdgpu_vkms_conn_helper_funcs);
+
+ ret = drm_simple_encoder_init(dev, encoder, DRM_MODE_ENCODER_VIRTUAL);
+ if (ret) {
+ DRM_ERROR("Failed to init encoder\n");
+ goto err_encoder;
+ }
+ encoder->possible_crtcs = 1 << index;
+
+ ret = drm_connector_attach_encoder(connector, encoder);
+ if (ret) {
+ DRM_ERROR("Failed to attach connector to encoder\n");
+ goto err_attach;
+ }
+
+ drm_mode_config_reset(dev);
+
+ return 0;
+
+err_attach:
+ drm_encoder_cleanup(encoder);
+
+err_encoder:
+ drm_connector_cleanup(connector);
+
+err_connector:
+ drm_crtc_cleanup(crtc);
+
+err_crtc:
+ drm_plane_cleanup(primary);
+
+ return ret;
+}
+
+const struct drm_mode_config_funcs amdgpu_vkms_mode_funcs = {
+ .fb_create = amdgpu_display_user_framebuffer_create,
+ .atomic_check = drm_atomic_helper_check,
+ .atomic_commit = drm_atomic_helper_commit,
+};
+
+static int amdgpu_vkms_sw_init(struct amdgpu_ip_block *ip_block)
+{
+ int r, i;
+ struct amdgpu_device *adev = ip_block->adev;
+
+ adev->amdgpu_vkms_output = kcalloc(adev->mode_info.num_crtc,
+ sizeof(struct amdgpu_vkms_output), GFP_KERNEL);
+ if (!adev->amdgpu_vkms_output)
+ return -ENOMEM;
+
+ adev_to_drm(adev)->max_vblank_count = 0;
+
+ adev_to_drm(adev)->mode_config.funcs = &amdgpu_vkms_mode_funcs;
+
+ adev_to_drm(adev)->mode_config.max_width = XRES_MAX;
+ adev_to_drm(adev)->mode_config.max_height = YRES_MAX;
+
+ adev_to_drm(adev)->mode_config.preferred_depth = 24;
+ adev_to_drm(adev)->mode_config.prefer_shadow = 1;
+
+ adev_to_drm(adev)->mode_config.fb_modifiers_not_supported = true;
+
+ r = amdgpu_display_modeset_create_props(adev);
+ if (r)
+ return r;
+
+ /* allocate crtcs, encoders, connectors */
+ for (i = 0; i < adev->mode_info.num_crtc; i++) {
+ r = amdgpu_vkms_output_init(adev_to_drm(adev), &adev->amdgpu_vkms_output[i], i);
+ if (r)
+ return r;
+ }
+
+ r = drm_vblank_init(adev_to_drm(adev), adev->mode_info.num_crtc);
+ if (r)
+ return r;
+
+ drm_kms_helper_poll_init(adev_to_drm(adev));
+
+ adev->mode_info.mode_config_initialized = true;
+ return 0;
+}
+
+static int amdgpu_vkms_sw_fini(struct amdgpu_ip_block *ip_block)
+{
+ struct amdgpu_device *adev = ip_block->adev;
+ int i = 0;
+
+ for (i = 0; i < adev->mode_info.num_crtc; i++)
+ if (adev->mode_info.crtcs[i])
+ hrtimer_cancel(&adev->mode_info.crtcs[i]->vblank_timer);
+
+ drm_kms_helper_poll_fini(adev_to_drm(adev));
+ drm_mode_config_cleanup(adev_to_drm(adev));
+
+ adev->mode_info.mode_config_initialized = false;
+
+ drm_edid_free(adev->mode_info.bios_hardcoded_edid);
+ kfree(adev->amdgpu_vkms_output);
+ return 0;
+}
+
+static int amdgpu_vkms_hw_init(struct amdgpu_ip_block *ip_block)
+{
+ struct amdgpu_device *adev = ip_block->adev;
+
+ switch (adev->asic_type) {
+#ifdef CONFIG_DRM_AMDGPU_SI
+ case CHIP_TAHITI:
+ case CHIP_PITCAIRN:
+ case CHIP_VERDE:
+ case CHIP_OLAND:
+ dce_v6_0_disable_dce(adev);
+ break;
+#endif
+#ifdef CONFIG_DRM_AMDGPU_CIK
+ case CHIP_BONAIRE:
+ case CHIP_HAWAII:
+ case CHIP_KAVERI:
+ case CHIP_KABINI:
+ case CHIP_MULLINS:
+ dce_v8_0_disable_dce(adev);
+ break;
+#endif
+ case CHIP_FIJI:
+ case CHIP_TONGA:
+ dce_v10_0_disable_dce(adev);
+ break;
+ case CHIP_TOPAZ:
+#ifdef CONFIG_DRM_AMDGPU_SI
+ case CHIP_HAINAN:
+#endif
+ /* no DCE */
+ break;
+ default:
+ break;
+ }
+ return 0;
+}
+
+static int amdgpu_vkms_hw_fini(struct amdgpu_ip_block *ip_block)
+{
+ return 0;
+}
+
+static int amdgpu_vkms_suspend(struct amdgpu_ip_block *ip_block)
+{
+ struct amdgpu_device *adev = ip_block->adev;
+ int r;
+
+ r = drm_mode_config_helper_suspend(adev_to_drm(adev));
+ if (r)
+ return r;
+
+ return 0;
+}
+
+static int amdgpu_vkms_resume(struct amdgpu_ip_block *ip_block)
+{
+ int r;
+
+ r = amdgpu_vkms_hw_init(ip_block);
+ if (r)
+ return r;
+ return drm_mode_config_helper_resume(adev_to_drm(ip_block->adev));
+}
+
+static bool amdgpu_vkms_is_idle(struct amdgpu_ip_block *ip_block)
+{
+ return true;
+}
+
+static int amdgpu_vkms_set_clockgating_state(struct amdgpu_ip_block *ip_block,
+ enum amd_clockgating_state state)
+{
+ return 0;
+}
+
+static int amdgpu_vkms_set_powergating_state(struct amdgpu_ip_block *ip_block,
+ enum amd_powergating_state state)
+{
+ return 0;
+}
+
+static const struct amd_ip_funcs amdgpu_vkms_ip_funcs = {
+ .name = "amdgpu_vkms",
+ .sw_init = amdgpu_vkms_sw_init,
+ .sw_fini = amdgpu_vkms_sw_fini,
+ .hw_init = amdgpu_vkms_hw_init,
+ .hw_fini = amdgpu_vkms_hw_fini,
+ .suspend = amdgpu_vkms_suspend,
+ .resume = amdgpu_vkms_resume,
+ .is_idle = amdgpu_vkms_is_idle,
+ .set_clockgating_state = amdgpu_vkms_set_clockgating_state,
+ .set_powergating_state = amdgpu_vkms_set_powergating_state,
+};
+
+const struct amdgpu_ip_block_version amdgpu_vkms_ip_block = {
+ .type = AMD_IP_BLOCK_TYPE_DCE,
+ .major = 1,
+ .minor = 0,
+ .rev = 0,
+ .funcs = &amdgpu_vkms_ip_funcs,
+};
+
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vkms.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vkms.h
new file mode 100644
index 000000000000..4f8722ff37c2
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vkms.h
@@ -0,0 +1,25 @@
+/* SPDX-License-Identifier: GPL-2.0+ */
+
+#ifndef _AMDGPU_VKMS_H_
+#define _AMDGPU_VKMS_H_
+
+#define XRES_DEF 1024
+#define YRES_DEF 768
+
+#define XRES_MAX 16384
+#define YRES_MAX 16384
+
+#define drm_crtc_to_amdgpu_vkms_output(target) \
+ container_of(target, struct amdgpu_vkms_output, crtc.base)
+
+extern const struct amdgpu_ip_block_version amdgpu_vkms_ip_block;
+
+struct amdgpu_vkms_output {
+ struct amdgpu_crtc crtc;
+ struct drm_encoder encoder;
+ struct drm_connector connector;
+ ktime_t period_ns;
+ struct drm_pending_vblank_event *event;
+};
+
+#endif /* _AMDGPU_VKMS_H_ */
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
index ad91c0c3c423..a67285118c37 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
@@ -25,38 +25,66 @@
* Alex Deucher
* Jerome Glisse
*/
+
#include <linux/dma-fence-array.h>
#include <linux/interval_tree_generic.h>
#include <linux/idr.h>
#include <linux/dma-buf.h>
#include <drm/amdgpu_drm.h>
+#include <drm/drm_drv.h>
+#include <drm/ttm/ttm_tt.h>
+#include <drm/drm_exec.h>
#include "amdgpu.h"
+#include "amdgpu_vm.h"
#include "amdgpu_trace.h"
#include "amdgpu_amdkfd.h"
#include "amdgpu_gmc.h"
#include "amdgpu_xgmi.h"
#include "amdgpu_dma_buf.h"
+#include "amdgpu_res_cursor.h"
+#include "kfd_svm.h"
/**
* DOC: GPUVM
*
- * GPUVM is similar to the legacy gart on older asics, however
- * rather than there being a single global gart table
- * for the entire GPU, there are multiple VM page tables active
- * at any given time. The VM page tables can contain a mix
- * vram pages and system memory pages and system memory pages
+ * GPUVM is the MMU functionality provided on the GPU.
+ * GPUVM is similar to the legacy GART on older asics, however
+ * rather than there being a single global GART table
+ * for the entire GPU, there can be multiple GPUVM page tables active
+ * at any given time. The GPUVM page tables can contain a mix
+ * VRAM pages and system pages (both memory and MMIO) and system pages
* can be mapped as snooped (cached system pages) or unsnooped
* (uncached system pages).
- * Each VM has an ID associated with it and there is a page table
- * associated with each VMID. When execting a command buffer,
- * the kernel tells the the ring what VMID to use for that command
+ *
+ * Each active GPUVM has an ID associated with it and there is a page table
+ * linked with each VMID. When executing a command buffer,
+ * the kernel tells the engine what VMID to use for that command
* buffer. VMIDs are allocated dynamically as commands are submitted.
* The userspace drivers maintain their own address space and the kernel
* sets up their pages tables accordingly when they submit their
* command buffers and a VMID is assigned.
- * Cayman/Trinity support up to 8 active VMs at any given time;
- * SI supports 16.
+ * The hardware supports up to 16 active GPUVMs at any given time.
+ *
+ * Each GPUVM is represented by a 1-2 or 1-5 level page table, depending
+ * on the ASIC family. GPUVM supports RWX attributes on each page as well
+ * as other features such as encryption and caching attributes.
+ *
+ * VMID 0 is special. It is the GPUVM used for the kernel driver. In
+ * addition to an aperture managed by a page table, VMID 0 also has
+ * several other apertures. There is an aperture for direct access to VRAM
+ * and there is a legacy AGP aperture which just forwards accesses directly
+ * to the matching system physical addresses (or IOVAs when an IOMMU is
+ * present). These apertures provide direct access to these memories without
+ * incurring the overhead of a page table. VMID 0 is used by the kernel
+ * driver for tasks like memory management.
+ *
+ * GPU clients (i.e., engines on the GPU) use GPUVM VMIDs to access memory.
+ * For user applications, each application can have their own unique GPUVM
+ * address space. The application manages the address space and the kernel
+ * driver manages the GPUVM page tables for each process. If an GPU client
+ * accesses an invalid page, it will generate a GPU page fault, similar to
+ * accessing an invalid page on a CPU.
*/
#define START(node) ((node)->start)
@@ -84,132 +112,30 @@ struct amdgpu_prt_cb {
struct dma_fence_cb cb;
};
-/*
- * vm eviction_lock can be taken in MMU notifiers. Make sure no reclaim-FS
- * happens while holding this lock anywhere to prevent deadlocks when
- * an MMU notifier runs in reclaim-FS context.
- */
-static inline void amdgpu_vm_eviction_lock(struct amdgpu_vm *vm)
-{
- mutex_lock(&vm->eviction_lock);
- vm->saved_flags = memalloc_nofs_save();
-}
-
-static inline int amdgpu_vm_eviction_trylock(struct amdgpu_vm *vm)
-{
- if (mutex_trylock(&vm->eviction_lock)) {
- vm->saved_flags = memalloc_nofs_save();
- return 1;
- }
- return 0;
-}
-
-static inline void amdgpu_vm_eviction_unlock(struct amdgpu_vm *vm)
-{
- memalloc_nofs_restore(vm->saved_flags);
- mutex_unlock(&vm->eviction_lock);
-}
-
-/**
- * amdgpu_vm_level_shift - return the addr shift for each level
- *
- * @adev: amdgpu_device pointer
- * @level: VMPT level
- *
- * Returns:
- * The number of bits the pfn needs to be right shifted for a level.
- */
-static unsigned amdgpu_vm_level_shift(struct amdgpu_device *adev,
- unsigned level)
-{
- switch (level) {
- case AMDGPU_VM_PDB2:
- case AMDGPU_VM_PDB1:
- case AMDGPU_VM_PDB0:
- return 9 * (AMDGPU_VM_PDB0 - level) +
- adev->vm_manager.block_size;
- case AMDGPU_VM_PTB:
- return 0;
- default:
- return ~0;
- }
-}
-
/**
- * amdgpu_vm_num_entries - return the number of entries in a PD/PT
- *
- * @adev: amdgpu_device pointer
- * @level: VMPT level
- *
- * Returns:
- * The number of entries in a page directory or page table.
- */
-static unsigned amdgpu_vm_num_entries(struct amdgpu_device *adev,
- unsigned level)
-{
- unsigned shift = amdgpu_vm_level_shift(adev,
- adev->vm_manager.root_level);
-
- if (level == adev->vm_manager.root_level)
- /* For the root directory */
- return round_up(adev->vm_manager.max_pfn, 1ULL << shift)
- >> shift;
- else if (level != AMDGPU_VM_PTB)
- /* Everything in between */
- return 512;
- else
- /* For the page tables on the leaves */
- return AMDGPU_VM_PTE_COUNT(adev);
-}
-
-/**
- * amdgpu_vm_num_ats_entries - return the number of ATS entries in the root PD
- *
- * @adev: amdgpu_device pointer
- *
- * Returns:
- * The number of entries in the root page directory which needs the ATS setting.
+ * struct amdgpu_vm_tlb_seq_struct - Helper to increment the TLB flush sequence
*/
-static unsigned amdgpu_vm_num_ats_entries(struct amdgpu_device *adev)
-{
- unsigned shift;
-
- shift = amdgpu_vm_level_shift(adev, adev->vm_manager.root_level);
- return AMDGPU_GMC_HOLE_START >> (shift + AMDGPU_GPU_PAGE_SHIFT);
-}
+struct amdgpu_vm_tlb_seq_struct {
+ /**
+ * @vm: pointer to the amdgpu_vm structure to set the fence sequence on
+ */
+ struct amdgpu_vm *vm;
-/**
- * amdgpu_vm_entries_mask - the mask to get the entry number of a PD/PT
- *
- * @adev: amdgpu_device pointer
- * @level: VMPT level
- *
- * Returns:
- * The mask to extract the entry number of a PD/PT from an address.
- */
-static uint32_t amdgpu_vm_entries_mask(struct amdgpu_device *adev,
- unsigned int level)
-{
- if (level <= adev->vm_manager.root_level)
- return 0xffffffff;
- else if (level != AMDGPU_VM_PTB)
- return 0x1ff;
- else
- return AMDGPU_VM_PTE_COUNT(adev) - 1;
-}
+ /**
+ * @cb: callback
+ */
+ struct dma_fence_cb cb;
+};
/**
- * amdgpu_vm_bo_size - returns the size of the BOs in bytes
+ * amdgpu_vm_assert_locked - check if VM is correctly locked
+ * @vm: the VM which schould be tested
*
- * @adev: amdgpu_device pointer
- * @level: VMPT level
- *
- * Returns:
- * The size of the BO for a page directory or page table in bytes.
+ * Asserts that the VM root PD is locked.
*/
-static unsigned amdgpu_vm_bo_size(struct amdgpu_device *adev, unsigned level)
+static void amdgpu_vm_assert_locked(struct amdgpu_vm *vm)
{
- return AMDGPU_GPU_PAGE_ALIGN(amdgpu_vm_num_entries(adev, level) * 8);
+ dma_resv_assert_held(vm->root.bo->tbo.base.resv);
}
/**
@@ -226,10 +152,13 @@ static void amdgpu_vm_bo_evicted(struct amdgpu_vm_bo_base *vm_bo)
struct amdgpu_bo *bo = vm_bo->bo;
vm_bo->moved = true;
+ amdgpu_vm_assert_locked(vm);
+ spin_lock(&vm_bo->vm->status_lock);
if (bo->tbo.type == ttm_bo_type_kernel)
list_move(&vm_bo->vm_status, &vm->evicted);
else
list_move_tail(&vm_bo->vm_status, &vm->evicted);
+ spin_unlock(&vm_bo->vm->status_lock);
}
/**
* amdgpu_vm_bo_moved - vm_bo is moved
@@ -241,7 +170,10 @@ static void amdgpu_vm_bo_evicted(struct amdgpu_vm_bo_base *vm_bo)
*/
static void amdgpu_vm_bo_moved(struct amdgpu_vm_bo_base *vm_bo)
{
+ amdgpu_vm_assert_locked(vm_bo->vm);
+ spin_lock(&vm_bo->vm->status_lock);
list_move(&vm_bo->vm_status, &vm_bo->vm->moved);
+ spin_unlock(&vm_bo->vm->status_lock);
}
/**
@@ -254,7 +186,10 @@ static void amdgpu_vm_bo_moved(struct amdgpu_vm_bo_base *vm_bo)
*/
static void amdgpu_vm_bo_idle(struct amdgpu_vm_bo_base *vm_bo)
{
+ amdgpu_vm_assert_locked(vm_bo->vm);
+ spin_lock(&vm_bo->vm->status_lock);
list_move(&vm_bo->vm_status, &vm_bo->vm->idle);
+ spin_unlock(&vm_bo->vm->status_lock);
vm_bo->moved = false;
}
@@ -268,9 +203,25 @@ static void amdgpu_vm_bo_idle(struct amdgpu_vm_bo_base *vm_bo)
*/
static void amdgpu_vm_bo_invalidated(struct amdgpu_vm_bo_base *vm_bo)
{
- spin_lock(&vm_bo->vm->invalidated_lock);
+ spin_lock(&vm_bo->vm->status_lock);
list_move(&vm_bo->vm_status, &vm_bo->vm->invalidated);
- spin_unlock(&vm_bo->vm->invalidated_lock);
+ spin_unlock(&vm_bo->vm->status_lock);
+}
+
+/**
+ * amdgpu_vm_bo_evicted_user - vm_bo is evicted
+ *
+ * @vm_bo: vm_bo which is evicted
+ *
+ * State for BOs used by user mode queues which are not at the location they
+ * should be.
+ */
+static void amdgpu_vm_bo_evicted_user(struct amdgpu_vm_bo_base *vm_bo)
+{
+ vm_bo->moved = true;
+ spin_lock(&vm_bo->vm->status_lock);
+ list_move(&vm_bo->vm_status, &vm_bo->vm->evicted_user);
+ spin_unlock(&vm_bo->vm->status_lock);
}
/**
@@ -283,10 +234,14 @@ static void amdgpu_vm_bo_invalidated(struct amdgpu_vm_bo_base *vm_bo)
*/
static void amdgpu_vm_bo_relocated(struct amdgpu_vm_bo_base *vm_bo)
{
- if (vm_bo->bo->parent)
+ amdgpu_vm_assert_locked(vm_bo->vm);
+ if (vm_bo->bo->parent) {
+ spin_lock(&vm_bo->vm->status_lock);
list_move(&vm_bo->vm_status, &vm_bo->vm->relocated);
- else
+ spin_unlock(&vm_bo->vm->status_lock);
+ } else {
amdgpu_vm_bo_idle(vm_bo);
+ }
}
/**
@@ -299,330 +254,256 @@ static void amdgpu_vm_bo_relocated(struct amdgpu_vm_bo_base *vm_bo)
*/
static void amdgpu_vm_bo_done(struct amdgpu_vm_bo_base *vm_bo)
{
- spin_lock(&vm_bo->vm->invalidated_lock);
+ amdgpu_vm_assert_locked(vm_bo->vm);
+ spin_lock(&vm_bo->vm->status_lock);
list_move(&vm_bo->vm_status, &vm_bo->vm->done);
- spin_unlock(&vm_bo->vm->invalidated_lock);
+ spin_unlock(&vm_bo->vm->status_lock);
}
/**
- * amdgpu_vm_bo_base_init - Adds bo to the list of bos associated with the vm
- *
- * @base: base structure for tracking BO usage in a VM
- * @vm: vm to which bo is to be added
- * @bo: amdgpu buffer object
- *
- * Initialize a bo_va_base structure and add it to the appropriate lists
+ * amdgpu_vm_bo_reset_state_machine - reset the vm_bo state machine
+ * @vm: the VM which state machine to reset
*
+ * Move all vm_bo object in the VM into a state where they will be updated
+ * again during validation.
*/
-static void amdgpu_vm_bo_base_init(struct amdgpu_vm_bo_base *base,
- struct amdgpu_vm *vm,
- struct amdgpu_bo *bo)
+static void amdgpu_vm_bo_reset_state_machine(struct amdgpu_vm *vm)
{
- base->vm = vm;
- base->bo = bo;
- base->next = NULL;
- INIT_LIST_HEAD(&base->vm_status);
+ struct amdgpu_vm_bo_base *vm_bo, *tmp;
- if (!bo)
- return;
- base->next = bo->vm_bo;
- bo->vm_bo = base;
+ amdgpu_vm_assert_locked(vm);
- if (bo->tbo.base.resv != vm->root.base.bo->tbo.base.resv)
- return;
+ spin_lock(&vm->status_lock);
+ list_splice_init(&vm->done, &vm->invalidated);
+ list_for_each_entry(vm_bo, &vm->invalidated, vm_status)
+ vm_bo->moved = true;
- vm->bulk_moveable = false;
- if (bo->tbo.type == ttm_bo_type_kernel && bo->parent)
- amdgpu_vm_bo_relocated(base);
- else
- amdgpu_vm_bo_idle(base);
-
- if (bo->preferred_domains &
- amdgpu_mem_type_to_domain(bo->tbo.mem.mem_type))
- return;
+ list_for_each_entry_safe(vm_bo, tmp, &vm->idle, vm_status) {
+ struct amdgpu_bo *bo = vm_bo->bo;
- /*
- * we checked all the prerequisites, but it looks like this per vm bo
- * is currently evicted. add the bo to the evicted list to make sure it
- * is validated on next vm use to avoid fault.
- * */
- amdgpu_vm_bo_evicted(base);
+ vm_bo->moved = true;
+ if (!bo || bo->tbo.type != ttm_bo_type_kernel)
+ list_move(&vm_bo->vm_status, &vm_bo->vm->moved);
+ else if (bo->parent)
+ list_move(&vm_bo->vm_status, &vm_bo->vm->relocated);
+ }
+ spin_unlock(&vm->status_lock);
}
/**
- * amdgpu_vm_pt_parent - get the parent page directory
- *
- * @pt: child page table
+ * amdgpu_vm_update_shared - helper to update shared memory stat
+ * @base: base structure for tracking BO usage in a VM
*
- * Helper to get the parent entry for the child page table. NULL if we are at
- * the root page directory.
- */
-static struct amdgpu_vm_pt *amdgpu_vm_pt_parent(struct amdgpu_vm_pt *pt)
-{
- struct amdgpu_bo *parent = pt->base.bo->parent;
-
- if (!parent)
- return NULL;
-
- return container_of(parent->vm_bo, struct amdgpu_vm_pt, base);
+ * Takes the vm status_lock and updates the shared memory stat. If the basic
+ * stat changed (e.g. buffer was moved) amdgpu_vm_update_stats need to be called
+ * as well.
+ */
+static void amdgpu_vm_update_shared(struct amdgpu_vm_bo_base *base)
+{
+ struct amdgpu_vm *vm = base->vm;
+ struct amdgpu_bo *bo = base->bo;
+ uint64_t size = amdgpu_bo_size(bo);
+ uint32_t bo_memtype = amdgpu_bo_mem_stats_placement(bo);
+ bool shared;
+
+ dma_resv_assert_held(bo->tbo.base.resv);
+ spin_lock(&vm->status_lock);
+ shared = drm_gem_object_is_shared_for_memory_stats(&bo->tbo.base);
+ if (base->shared != shared) {
+ base->shared = shared;
+ if (shared) {
+ vm->stats[bo_memtype].drm.shared += size;
+ vm->stats[bo_memtype].drm.private -= size;
+ } else {
+ vm->stats[bo_memtype].drm.shared -= size;
+ vm->stats[bo_memtype].drm.private += size;
+ }
+ }
+ spin_unlock(&vm->status_lock);
}
-/*
- * amdgpu_vm_pt_cursor - state for for_each_amdgpu_vm_pt
- */
-struct amdgpu_vm_pt_cursor {
- uint64_t pfn;
- struct amdgpu_vm_pt *parent;
- struct amdgpu_vm_pt *entry;
- unsigned level;
-};
-
/**
- * amdgpu_vm_pt_start - start PD/PT walk
- *
- * @adev: amdgpu_device pointer
- * @vm: amdgpu_vm structure
- * @start: start address of the walk
- * @cursor: state to initialize
+ * amdgpu_vm_bo_update_shared - callback when bo gets shared/unshared
+ * @bo: amdgpu buffer object
*
- * Initialize a amdgpu_vm_pt_cursor to start a walk.
+ * Update the per VM stats for all the vm if needed from private to shared or
+ * vice versa.
*/
-static void amdgpu_vm_pt_start(struct amdgpu_device *adev,
- struct amdgpu_vm *vm, uint64_t start,
- struct amdgpu_vm_pt_cursor *cursor)
+void amdgpu_vm_bo_update_shared(struct amdgpu_bo *bo)
{
- cursor->pfn = start;
- cursor->parent = NULL;
- cursor->entry = &vm->root;
- cursor->level = adev->vm_manager.root_level;
+ struct amdgpu_vm_bo_base *base;
+
+ for (base = bo->vm_bo; base; base = base->next)
+ amdgpu_vm_update_shared(base);
}
/**
- * amdgpu_vm_pt_descendant - go to child node
- *
- * @adev: amdgpu_device pointer
- * @cursor: current state
+ * amdgpu_vm_update_stats_locked - helper to update normal memory stat
+ * @base: base structure for tracking BO usage in a VM
+ * @res: the ttm_resource to use for the purpose of accounting, may or may not
+ * be bo->tbo.resource
+ * @sign: if we should add (+1) or subtract (-1) from the stat
*
- * Walk to the child node of the current node.
- * Returns:
- * True if the walk was possible, false otherwise.
+ * Caller need to have the vm status_lock held. Useful for when multiple update
+ * need to happen at the same time.
*/
-static bool amdgpu_vm_pt_descendant(struct amdgpu_device *adev,
- struct amdgpu_vm_pt_cursor *cursor)
+static void amdgpu_vm_update_stats_locked(struct amdgpu_vm_bo_base *base,
+ struct ttm_resource *res, int sign)
{
- unsigned mask, shift, idx;
+ struct amdgpu_vm *vm = base->vm;
+ struct amdgpu_bo *bo = base->bo;
+ int64_t size = sign * amdgpu_bo_size(bo);
+ uint32_t bo_memtype = amdgpu_bo_mem_stats_placement(bo);
- if (!cursor->entry->entries)
- return false;
+ /* For drm-total- and drm-shared-, BO are accounted by their preferred
+ * placement, see also amdgpu_bo_mem_stats_placement.
+ */
+ if (base->shared)
+ vm->stats[bo_memtype].drm.shared += size;
+ else
+ vm->stats[bo_memtype].drm.private += size;
- BUG_ON(!cursor->entry->base.bo);
- mask = amdgpu_vm_entries_mask(adev, cursor->level);
- shift = amdgpu_vm_level_shift(adev, cursor->level);
+ if (res && res->mem_type < __AMDGPU_PL_NUM) {
+ uint32_t res_memtype = res->mem_type;
- ++cursor->level;
- idx = (cursor->pfn >> shift) & mask;
- cursor->parent = cursor->entry;
- cursor->entry = &cursor->entry->entries[idx];
- return true;
+ vm->stats[res_memtype].drm.resident += size;
+ /* BO only count as purgeable if it is resident,
+ * since otherwise there's nothing to purge.
+ */
+ if (bo->flags & AMDGPU_GEM_CREATE_DISCARDABLE)
+ vm->stats[res_memtype].drm.purgeable += size;
+ if (!(bo->preferred_domains & amdgpu_mem_type_to_domain(res_memtype)))
+ vm->stats[bo_memtype].evicted += size;
+ }
}
/**
- * amdgpu_vm_pt_sibling - go to sibling node
- *
- * @adev: amdgpu_device pointer
- * @cursor: current state
+ * amdgpu_vm_update_stats - helper to update normal memory stat
+ * @base: base structure for tracking BO usage in a VM
+ * @res: the ttm_resource to use for the purpose of accounting, may or may not
+ * be bo->tbo.resource
+ * @sign: if we should add (+1) or subtract (-1) from the stat
*
- * Walk to the sibling node of the current node.
- * Returns:
- * True if the walk was possible, false otherwise.
+ * Updates the basic memory stat when bo is added/deleted/moved.
*/
-static bool amdgpu_vm_pt_sibling(struct amdgpu_device *adev,
- struct amdgpu_vm_pt_cursor *cursor)
+void amdgpu_vm_update_stats(struct amdgpu_vm_bo_base *base,
+ struct ttm_resource *res, int sign)
{
- unsigned shift, num_entries;
-
- /* Root doesn't have a sibling */
- if (!cursor->parent)
- return false;
-
- /* Go to our parents and see if we got a sibling */
- shift = amdgpu_vm_level_shift(adev, cursor->level - 1);
- num_entries = amdgpu_vm_num_entries(adev, cursor->level - 1);
-
- if (cursor->entry == &cursor->parent->entries[num_entries - 1])
- return false;
+ struct amdgpu_vm *vm = base->vm;
- cursor->pfn += 1ULL << shift;
- cursor->pfn &= ~((1ULL << shift) - 1);
- ++cursor->entry;
- return true;
+ spin_lock(&vm->status_lock);
+ amdgpu_vm_update_stats_locked(base, res, sign);
+ spin_unlock(&vm->status_lock);
}
/**
- * amdgpu_vm_pt_ancestor - go to parent node
+ * amdgpu_vm_bo_base_init - Adds bo to the list of bos associated with the vm
*
- * @cursor: current state
+ * @base: base structure for tracking BO usage in a VM
+ * @vm: vm to which bo is to be added
+ * @bo: amdgpu buffer object
+ *
+ * Initialize a bo_va_base structure and add it to the appropriate lists
*
- * Walk to the parent node of the current node.
- * Returns:
- * True if the walk was possible, false otherwise.
*/
-static bool amdgpu_vm_pt_ancestor(struct amdgpu_vm_pt_cursor *cursor)
+void amdgpu_vm_bo_base_init(struct amdgpu_vm_bo_base *base,
+ struct amdgpu_vm *vm, struct amdgpu_bo *bo)
{
- if (!cursor->parent)
- return false;
+ base->vm = vm;
+ base->bo = bo;
+ base->next = NULL;
+ INIT_LIST_HEAD(&base->vm_status);
- --cursor->level;
- cursor->entry = cursor->parent;
- cursor->parent = amdgpu_vm_pt_parent(cursor->parent);
- return true;
-}
+ if (!bo)
+ return;
+ base->next = bo->vm_bo;
+ bo->vm_bo = base;
-/**
- * amdgpu_vm_pt_next - get next PD/PT in hieratchy
- *
- * @adev: amdgpu_device pointer
- * @cursor: current state
- *
- * Walk the PD/PT tree to the next node.
- */
-static void amdgpu_vm_pt_next(struct amdgpu_device *adev,
- struct amdgpu_vm_pt_cursor *cursor)
-{
- /* First try a newborn child */
- if (amdgpu_vm_pt_descendant(adev, cursor))
+ spin_lock(&vm->status_lock);
+ base->shared = drm_gem_object_is_shared_for_memory_stats(&bo->tbo.base);
+ amdgpu_vm_update_stats_locked(base, bo->tbo.resource, +1);
+ spin_unlock(&vm->status_lock);
+
+ if (!amdgpu_vm_is_bo_always_valid(vm, bo))
return;
- /* If that didn't worked try to find a sibling */
- while (!amdgpu_vm_pt_sibling(adev, cursor)) {
- /* No sibling, go to our parents and grandparents */
- if (!amdgpu_vm_pt_ancestor(cursor)) {
- cursor->pfn = ~0ll;
- return;
- }
- }
-}
+ dma_resv_assert_held(vm->root.bo->tbo.base.resv);
-/**
- * amdgpu_vm_pt_first_dfs - start a deep first search
- *
- * @adev: amdgpu_device structure
- * @vm: amdgpu_vm structure
- * @start: optional cursor to start with
- * @cursor: state to initialize
- *
- * Starts a deep first traversal of the PD/PT tree.
- */
-static void amdgpu_vm_pt_first_dfs(struct amdgpu_device *adev,
- struct amdgpu_vm *vm,
- struct amdgpu_vm_pt_cursor *start,
- struct amdgpu_vm_pt_cursor *cursor)
-{
- if (start)
- *cursor = *start;
+ ttm_bo_set_bulk_move(&bo->tbo, &vm->lru_bulk_move);
+ if (bo->tbo.type == ttm_bo_type_kernel && bo->parent)
+ amdgpu_vm_bo_relocated(base);
else
- amdgpu_vm_pt_start(adev, vm, 0, cursor);
- while (amdgpu_vm_pt_descendant(adev, cursor));
-}
-
-/**
- * amdgpu_vm_pt_continue_dfs - check if the deep first search should continue
- *
- * @start: starting point for the search
- * @entry: current entry
- *
- * Returns:
- * True when the search should continue, false otherwise.
- */
-static bool amdgpu_vm_pt_continue_dfs(struct amdgpu_vm_pt_cursor *start,
- struct amdgpu_vm_pt *entry)
-{
- return entry && (!start || entry != start->entry);
-}
+ amdgpu_vm_bo_idle(base);
-/**
- * amdgpu_vm_pt_next_dfs - get the next node for a deep first search
- *
- * @adev: amdgpu_device structure
- * @cursor: current state
- *
- * Move the cursor to the next node in a deep first search.
- */
-static void amdgpu_vm_pt_next_dfs(struct amdgpu_device *adev,
- struct amdgpu_vm_pt_cursor *cursor)
-{
- if (!cursor->entry)
+ if (bo->preferred_domains &
+ amdgpu_mem_type_to_domain(bo->tbo.resource->mem_type))
return;
- if (!cursor->parent)
- cursor->entry = NULL;
- else if (amdgpu_vm_pt_sibling(adev, cursor))
- while (amdgpu_vm_pt_descendant(adev, cursor));
- else
- amdgpu_vm_pt_ancestor(cursor);
+ /*
+ * we checked all the prerequisites, but it looks like this per vm bo
+ * is currently evicted. add the bo to the evicted list to make sure it
+ * is validated on next vm use to avoid fault.
+ * */
+ amdgpu_vm_bo_evicted(base);
}
-/*
- * for_each_amdgpu_vm_pt_dfs_safe - safe deep first search of all PDs/PTs
- */
-#define for_each_amdgpu_vm_pt_dfs_safe(adev, vm, start, cursor, entry) \
- for (amdgpu_vm_pt_first_dfs((adev), (vm), (start), &(cursor)), \
- (entry) = (cursor).entry, amdgpu_vm_pt_next_dfs((adev), &(cursor));\
- amdgpu_vm_pt_continue_dfs((start), (entry)); \
- (entry) = (cursor).entry, amdgpu_vm_pt_next_dfs((adev), &(cursor)))
-
/**
- * amdgpu_vm_get_pd_bo - add the VM PD to a validation list
+ * amdgpu_vm_lock_pd - lock PD in drm_exec
*
* @vm: vm providing the BOs
- * @validated: head of validation list
- * @entry: entry to add
+ * @exec: drm execution context
+ * @num_fences: number of extra fences to reserve
*
- * Add the page directory to the list of BOs to
- * validate for command submission.
+ * Lock the VM root PD in the DRM execution context.
*/
-void amdgpu_vm_get_pd_bo(struct amdgpu_vm *vm,
- struct list_head *validated,
- struct amdgpu_bo_list_entry *entry)
+int amdgpu_vm_lock_pd(struct amdgpu_vm *vm, struct drm_exec *exec,
+ unsigned int num_fences)
{
- entry->priority = 0;
- entry->tv.bo = &vm->root.base.bo->tbo;
- /* Two for VM updates, one for TTM and one for the CS job */
- entry->tv.num_shared = 4;
- entry->user_pages = NULL;
- list_add(&entry->tv.head, validated);
+ /* We need at least two fences for the VM PD/PT updates */
+ return drm_exec_prepare_obj(exec, &vm->root.bo->tbo.base,
+ 2 + num_fences);
}
/**
- * amdgpu_vm_del_from_lru_notify - update bulk_moveable flag
- *
- * @bo: BO which was removed from the LRU
+ * amdgpu_vm_lock_done_list - lock all BOs on the done list
+ * @vm: vm providing the BOs
+ * @exec: drm execution context
+ * @num_fences: number of extra fences to reserve
*
- * Make sure the bulk_moveable flag is updated when a BO is removed from the
- * LRU.
+ * Lock the BOs on the done list in the DRM execution context.
*/
-void amdgpu_vm_del_from_lru_notify(struct ttm_buffer_object *bo)
+int amdgpu_vm_lock_done_list(struct amdgpu_vm *vm, struct drm_exec *exec,
+ unsigned int num_fences)
{
- struct amdgpu_bo *abo;
- struct amdgpu_vm_bo_base *bo_base;
+ struct list_head *prev = &vm->done;
+ struct amdgpu_bo_va *bo_va;
+ struct amdgpu_bo *bo;
+ int ret;
- if (!amdgpu_bo_is_amdgpu_bo(bo))
- return;
+ /* We can only trust prev->next while holding the lock */
+ spin_lock(&vm->status_lock);
+ while (!list_is_head(prev->next, &vm->done)) {
+ bo_va = list_entry(prev->next, typeof(*bo_va), base.vm_status);
- if (bo->pin_count)
- return;
+ bo = bo_va->base.bo;
+ if (bo) {
+ amdgpu_bo_ref(bo);
+ spin_unlock(&vm->status_lock);
- abo = ttm_to_amdgpu_bo(bo);
- if (!abo->parent)
- return;
- for (bo_base = abo->vm_bo; bo_base; bo_base = bo_base->next) {
- struct amdgpu_vm *vm = bo_base->vm;
+ ret = drm_exec_prepare_obj(exec, &bo->tbo.base, 1);
+ amdgpu_bo_unref(&bo);
+ if (unlikely(ret))
+ return ret;
- if (abo->tbo.base.resv == vm->root.base.bo->tbo.base.resv)
- vm->bulk_moveable = false;
+ spin_lock(&vm->status_lock);
+ }
+ prev = prev->next;
}
+ spin_unlock(&vm->status_lock);
+ return 0;
}
+
/**
* amdgpu_vm_move_to_lru_tail - move all BOs to the end of LRU
*
@@ -635,60 +516,106 @@ void amdgpu_vm_del_from_lru_notify(struct ttm_buffer_object *bo)
void amdgpu_vm_move_to_lru_tail(struct amdgpu_device *adev,
struct amdgpu_vm *vm)
{
- struct amdgpu_vm_bo_base *bo_base;
+ spin_lock(&adev->mman.bdev.lru_lock);
+ ttm_lru_bulk_move_tail(&vm->lru_bulk_move);
+ spin_unlock(&adev->mman.bdev.lru_lock);
+}
- if (vm->bulk_moveable) {
- spin_lock(&ttm_bo_glob.lru_lock);
- ttm_bo_bulk_move_lru_tail(&vm->lru_bulk_move);
- spin_unlock(&ttm_bo_glob.lru_lock);
- return;
- }
+/* Create scheduler entities for page table updates */
+static int amdgpu_vm_init_entities(struct amdgpu_device *adev,
+ struct amdgpu_vm *vm)
+{
+ int r;
- memset(&vm->lru_bulk_move, 0, sizeof(vm->lru_bulk_move));
+ r = drm_sched_entity_init(&vm->immediate, DRM_SCHED_PRIORITY_NORMAL,
+ adev->vm_manager.vm_pte_scheds,
+ adev->vm_manager.vm_pte_num_scheds, NULL);
+ if (r)
+ goto error;
- spin_lock(&ttm_bo_glob.lru_lock);
- list_for_each_entry(bo_base, &vm->idle, vm_status) {
- struct amdgpu_bo *bo = bo_base->bo;
+ return drm_sched_entity_init(&vm->delayed, DRM_SCHED_PRIORITY_NORMAL,
+ adev->vm_manager.vm_pte_scheds,
+ adev->vm_manager.vm_pte_num_scheds, NULL);
- if (!bo->parent)
- continue;
+error:
+ drm_sched_entity_destroy(&vm->immediate);
+ return r;
+}
- ttm_bo_move_to_lru_tail(&bo->tbo, &bo->tbo.mem,
- &vm->lru_bulk_move);
- if (bo->shadow)
- ttm_bo_move_to_lru_tail(&bo->shadow->tbo,
- &bo->shadow->tbo.mem,
- &vm->lru_bulk_move);
- }
- spin_unlock(&ttm_bo_glob.lru_lock);
+/* Destroy the entities for page table updates again */
+static void amdgpu_vm_fini_entities(struct amdgpu_vm *vm)
+{
+ drm_sched_entity_destroy(&vm->immediate);
+ drm_sched_entity_destroy(&vm->delayed);
+}
+
+/**
+ * amdgpu_vm_generation - return the page table re-generation counter
+ * @adev: the amdgpu_device
+ * @vm: optional VM to check, might be NULL
+ *
+ * Returns a page table re-generation token to allow checking if submissions
+ * are still valid to use this VM. The VM parameter might be NULL in which case
+ * just the VRAM lost counter will be used.
+ */
+uint64_t amdgpu_vm_generation(struct amdgpu_device *adev, struct amdgpu_vm *vm)
+{
+ uint64_t result = (u64)atomic_read(&adev->vram_lost_counter) << 32;
+
+ if (!vm)
+ return result;
- vm->bulk_moveable = true;
+ result += lower_32_bits(vm->generation);
+ /* Add one if the page tables will be re-generated on next CS */
+ if (drm_sched_entity_error(&vm->delayed))
+ ++result;
+
+ return result;
}
/**
- * amdgpu_vm_validate_pt_bos - validate the page table BOs
+ * amdgpu_vm_validate - validate evicted BOs tracked in the VM
*
* @adev: amdgpu device pointer
* @vm: vm providing the BOs
+ * @ticket: optional reservation ticket used to reserve the VM
* @validate: callback to do the validation
* @param: parameter for the validation callback
*
- * Validate the page table BOs on command submission if neccessary.
+ * Validate the page table BOs and per-VM BOs on command submission if
+ * necessary. If a ticket is given, also try to validate evicted user queue
+ * BOs. They must already be reserved with the given ticket.
*
* Returns:
* Validation result.
*/
-int amdgpu_vm_validate_pt_bos(struct amdgpu_device *adev, struct amdgpu_vm *vm,
- int (*validate)(void *p, struct amdgpu_bo *bo),
- void *param)
+int amdgpu_vm_validate(struct amdgpu_device *adev, struct amdgpu_vm *vm,
+ struct ww_acquire_ctx *ticket,
+ int (*validate)(void *p, struct amdgpu_bo *bo),
+ void *param)
{
- struct amdgpu_vm_bo_base *bo_base, *tmp;
+ uint64_t new_vm_generation = amdgpu_vm_generation(adev, vm);
+ struct amdgpu_vm_bo_base *bo_base;
+ struct amdgpu_bo *bo;
int r;
- vm->bulk_moveable &= list_empty(&vm->evicted);
+ if (vm->generation != new_vm_generation) {
+ vm->generation = new_vm_generation;
+ amdgpu_vm_bo_reset_state_machine(vm);
+ amdgpu_vm_fini_entities(vm);
+ r = amdgpu_vm_init_entities(adev, vm);
+ if (r)
+ return r;
+ }
+
+ spin_lock(&vm->status_lock);
+ while (!list_empty(&vm->evicted)) {
+ bo_base = list_first_entry(&vm->evicted,
+ struct amdgpu_vm_bo_base,
+ vm_status);
+ spin_unlock(&vm->status_lock);
- list_for_each_entry_safe(bo_base, tmp, &vm->evicted, vm_status) {
- struct amdgpu_bo *bo = bo_base->bo;
+ bo = bo_base->bo;
r = validate(param, bo);
if (r)
@@ -697,10 +624,29 @@ int amdgpu_vm_validate_pt_bos(struct amdgpu_device *adev, struct amdgpu_vm *vm,
if (bo->tbo.type != ttm_bo_type_kernel) {
amdgpu_vm_bo_moved(bo_base);
} else {
- vm->update_funcs->map_table(bo);
+ vm->update_funcs->map_table(to_amdgpu_bo_vm(bo));
amdgpu_vm_bo_relocated(bo_base);
}
+ spin_lock(&vm->status_lock);
}
+ while (ticket && !list_empty(&vm->evicted_user)) {
+ bo_base = list_first_entry(&vm->evicted_user,
+ struct amdgpu_vm_bo_base,
+ vm_status);
+ spin_unlock(&vm->status_lock);
+
+ bo = bo_base->bo;
+ dma_resv_assert_held(bo->tbo.base.resv);
+
+ r = validate(param, bo);
+ if (r)
+ return r;
+
+ amdgpu_vm_bo_invalidated(bo_base);
+
+ spin_lock(&vm->status_lock);
+ }
+ spin_unlock(&vm->status_lock);
amdgpu_vm_eviction_lock(vm);
vm->evicting = false;
@@ -717,270 +663,31 @@ int amdgpu_vm_validate_pt_bos(struct amdgpu_device *adev, struct amdgpu_vm *vm,
* Check if all VM PDs/PTs are ready for updates
*
* Returns:
- * True if eviction list is empty.
+ * True if VM is not evicting and all VM entities are not stopped
*/
bool amdgpu_vm_ready(struct amdgpu_vm *vm)
{
- return list_empty(&vm->evicted);
-}
-
-/**
- * amdgpu_vm_clear_bo - initially clear the PDs/PTs
- *
- * @adev: amdgpu_device pointer
- * @vm: VM to clear BO from
- * @bo: BO to clear
- * @immediate: use an immediate update
- *
- * Root PD needs to be reserved when calling this.
- *
- * Returns:
- * 0 on success, errno otherwise.
- */
-static int amdgpu_vm_clear_bo(struct amdgpu_device *adev,
- struct amdgpu_vm *vm,
- struct amdgpu_bo *bo,
- bool immediate)
-{
- struct ttm_operation_ctx ctx = { true, false };
- unsigned level = adev->vm_manager.root_level;
- struct amdgpu_vm_update_params params;
- struct amdgpu_bo *ancestor = bo;
- unsigned entries, ats_entries;
- uint64_t addr;
- int r;
-
- /* Figure out our place in the hierarchy */
- if (ancestor->parent) {
- ++level;
- while (ancestor->parent->parent) {
- ++level;
- ancestor = ancestor->parent;
- }
- }
-
- entries = amdgpu_bo_size(bo) / 8;
- if (!vm->pte_support_ats) {
- ats_entries = 0;
-
- } else if (!bo->parent) {
- ats_entries = amdgpu_vm_num_ats_entries(adev);
- ats_entries = min(ats_entries, entries);
- entries -= ats_entries;
-
- } else {
- struct amdgpu_vm_pt *pt;
-
- pt = container_of(ancestor->vm_bo, struct amdgpu_vm_pt, base);
- ats_entries = amdgpu_vm_num_ats_entries(adev);
- if ((pt - vm->root.entries) >= ats_entries) {
- ats_entries = 0;
- } else {
- ats_entries = entries;
- entries = 0;
- }
- }
-
- r = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx);
- if (r)
- return r;
-
- if (bo->shadow) {
- r = ttm_bo_validate(&bo->shadow->tbo, &bo->shadow->placement,
- &ctx);
- if (r)
- return r;
- }
-
- r = vm->update_funcs->map_table(bo);
- if (r)
- return r;
-
- memset(&params, 0, sizeof(params));
- params.adev = adev;
- params.vm = vm;
- params.immediate = immediate;
-
- r = vm->update_funcs->prepare(&params, NULL, AMDGPU_SYNC_EXPLICIT);
- if (r)
- return r;
-
- addr = 0;
- if (ats_entries) {
- uint64_t value = 0, flags;
-
- flags = AMDGPU_PTE_DEFAULT_ATC;
- if (level != AMDGPU_VM_PTB) {
- /* Handle leaf PDEs as PTEs */
- flags |= AMDGPU_PDE_PTE;
- amdgpu_gmc_get_vm_pde(adev, level, &value, &flags);
- }
-
- r = vm->update_funcs->update(&params, bo, addr, 0, ats_entries,
- value, flags);
- if (r)
- return r;
-
- addr += ats_entries * 8;
- }
-
- if (entries) {
- uint64_t value = 0, flags = 0;
-
- if (adev->asic_type >= CHIP_VEGA10) {
- if (level != AMDGPU_VM_PTB) {
- /* Handle leaf PDEs as PTEs */
- flags |= AMDGPU_PDE_PTE;
- amdgpu_gmc_get_vm_pde(adev, level,
- &value, &flags);
- } else {
- /* Workaround for fault priority problem on GMC9 */
- flags = AMDGPU_PTE_EXECUTABLE;
- }
- }
-
- r = vm->update_funcs->update(&params, bo, addr, 0, entries,
- value, flags);
- if (r)
- return r;
- }
-
- return vm->update_funcs->commit(&params, NULL);
-}
-
-/**
- * amdgpu_vm_bo_param - fill in parameters for PD/PT allocation
- *
- * @adev: amdgpu_device pointer
- * @vm: requesting vm
- * @level: the page table level
- * @immediate: use a immediate update
- * @bp: resulting BO allocation parameters
- */
-static void amdgpu_vm_bo_param(struct amdgpu_device *adev, struct amdgpu_vm *vm,
- int level, bool immediate,
- struct amdgpu_bo_param *bp)
-{
- memset(bp, 0, sizeof(*bp));
-
- bp->size = amdgpu_vm_bo_size(adev, level);
- bp->byte_align = AMDGPU_GPU_PAGE_SIZE;
- bp->domain = AMDGPU_GEM_DOMAIN_VRAM;
- bp->domain = amdgpu_bo_get_preferred_pin_domain(adev, bp->domain);
- bp->flags = AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS |
- AMDGPU_GEM_CREATE_CPU_GTT_USWC;
- if (vm->use_cpu_for_update)
- bp->flags |= AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED;
- else if (!vm->root.base.bo || vm->root.base.bo->shadow)
- bp->flags |= AMDGPU_GEM_CREATE_SHADOW;
- bp->type = ttm_bo_type_kernel;
- bp->no_wait_gpu = immediate;
- if (vm->root.base.bo)
- bp->resv = vm->root.base.bo->tbo.base.resv;
-}
-
-/**
- * amdgpu_vm_alloc_pts - Allocate a specific page table
- *
- * @adev: amdgpu_device pointer
- * @vm: VM to allocate page tables for
- * @cursor: Which page table to allocate
- * @immediate: use an immediate update
- *
- * Make sure a specific page table or directory is allocated.
- *
- * Returns:
- * 1 if page table needed to be allocated, 0 if page table was already
- * allocated, negative errno if an error occurred.
- */
-static int amdgpu_vm_alloc_pts(struct amdgpu_device *adev,
- struct amdgpu_vm *vm,
- struct amdgpu_vm_pt_cursor *cursor,
- bool immediate)
-{
- struct amdgpu_vm_pt *entry = cursor->entry;
- struct amdgpu_bo_param bp;
- struct amdgpu_bo *pt;
- int r;
-
- if (cursor->level < AMDGPU_VM_PTB && !entry->entries) {
- unsigned num_entries;
-
- num_entries = amdgpu_vm_num_entries(adev, cursor->level);
- entry->entries = kvmalloc_array(num_entries,
- sizeof(*entry->entries),
- GFP_KERNEL | __GFP_ZERO);
- if (!entry->entries)
- return -ENOMEM;
- }
-
- if (entry->base.bo)
- return 0;
-
- amdgpu_vm_bo_param(adev, vm, cursor->level, immediate, &bp);
-
- r = amdgpu_bo_create(adev, &bp, &pt);
- if (r)
- return r;
-
- /* Keep a reference to the root directory to avoid
- * freeing them up in the wrong order.
- */
- pt->parent = amdgpu_bo_ref(cursor->parent->base.bo);
- amdgpu_vm_bo_base_init(&entry->base, vm, pt);
+ bool ret;
- r = amdgpu_vm_clear_bo(adev, vm, pt, immediate);
- if (r)
- goto error_free_pt;
-
- return 0;
+ amdgpu_vm_assert_locked(vm);
-error_free_pt:
- amdgpu_bo_unref(&pt->shadow);
- amdgpu_bo_unref(&pt);
- return r;
-}
-
-/**
- * amdgpu_vm_free_table - fre one PD/PT
- *
- * @entry: PDE to free
- */
-static void amdgpu_vm_free_table(struct amdgpu_vm_pt *entry)
-{
- if (entry->base.bo) {
- entry->base.bo->vm_bo = NULL;
- list_del(&entry->base.vm_status);
- amdgpu_bo_unref(&entry->base.bo->shadow);
- amdgpu_bo_unref(&entry->base.bo);
- }
- kvfree(entry->entries);
- entry->entries = NULL;
-}
+ amdgpu_vm_eviction_lock(vm);
+ ret = !vm->evicting;
+ amdgpu_vm_eviction_unlock(vm);
-/**
- * amdgpu_vm_free_pts - free PD/PT levels
- *
- * @adev: amdgpu device structure
- * @vm: amdgpu vm structure
- * @start: optional cursor where to start freeing PDs/PTs
- *
- * Free the page directory or page table level and all sub levels.
- */
-static void amdgpu_vm_free_pts(struct amdgpu_device *adev,
- struct amdgpu_vm *vm,
- struct amdgpu_vm_pt_cursor *start)
-{
- struct amdgpu_vm_pt_cursor cursor;
- struct amdgpu_vm_pt *entry;
+ spin_lock(&vm->status_lock);
+ ret &= list_empty(&vm->evicted);
+ spin_unlock(&vm->status_lock);
- vm->bulk_moveable = false;
+ spin_lock(&vm->immediate.lock);
+ ret &= !vm->immediate.stopped;
+ spin_unlock(&vm->immediate.lock);
- for_each_amdgpu_vm_pt_dfs_safe(adev, vm, start, cursor, entry)
- amdgpu_vm_free_table(entry);
+ spin_lock(&vm->delayed.lock);
+ ret &= !vm->delayed.stopped;
+ spin_unlock(&vm->delayed.lock);
- if (start)
- amdgpu_vm_free_table(start->entry);
+ return ret;
}
/**
@@ -1031,27 +738,22 @@ bool amdgpu_vm_need_pipeline_sync(struct amdgpu_ring *ring,
struct amdgpu_job *job)
{
struct amdgpu_device *adev = ring->adev;
- unsigned vmhub = ring->funcs->vmhub;
+ unsigned vmhub = ring->vm_hub;
struct amdgpu_vmid_mgr *id_mgr = &adev->vm_manager.id_mgr[vmhub];
- struct amdgpu_vmid *id;
- bool gds_switch_needed;
- bool vm_flush_needed = job->vm_needs_flush || ring->has_compute_vm_bug;
if (job->vmid == 0)
return false;
- id = &id_mgr->ids[job->vmid];
- gds_switch_needed = ring->funcs->emit_gds_switch && (
- id->gds_base != job->gds_base ||
- id->gds_size != job->gds_size ||
- id->gws_base != job->gws_base ||
- id->gws_size != job->gws_size ||
- id->oa_base != job->oa_base ||
- id->oa_size != job->oa_size);
-
- if (amdgpu_vmid_had_gpu_reset(adev, id))
+
+ if (job->vm_needs_flush || ring->has_compute_vm_bug)
return true;
- return vm_flush_needed || gds_switch_needed;
+ if (ring->funcs->emit_gds_switch && job->gds_switch_needed)
+ return true;
+
+ if (amdgpu_vmid_had_gpu_reset(adev, &id_mgr->ids[job->vmid]))
+ return true;
+
+ return false;
}
/**
@@ -1070,30 +772,25 @@ int amdgpu_vm_flush(struct amdgpu_ring *ring, struct amdgpu_job *job,
bool need_pipe_sync)
{
struct amdgpu_device *adev = ring->adev;
- unsigned vmhub = ring->funcs->vmhub;
+ struct amdgpu_isolation *isolation = &adev->isolation[ring->xcp_id];
+ unsigned vmhub = ring->vm_hub;
struct amdgpu_vmid_mgr *id_mgr = &adev->vm_manager.id_mgr[vmhub];
struct amdgpu_vmid *id = &id_mgr->ids[job->vmid];
- bool gds_switch_needed = ring->funcs->emit_gds_switch && (
- id->gds_base != job->gds_base ||
- id->gds_size != job->gds_size ||
- id->gws_base != job->gws_base ||
- id->gws_size != job->gws_size ||
- id->oa_base != job->oa_base ||
- id->oa_size != job->oa_size);
+ bool spm_update_needed = job->spm_update_needed;
+ bool gds_switch_needed = ring->funcs->emit_gds_switch &&
+ job->gds_switch_needed;
bool vm_flush_needed = job->vm_needs_flush;
- struct dma_fence *fence = NULL;
+ bool cleaner_shader_needed = false;
bool pasid_mapping_needed = false;
- unsigned patch_offset = 0;
- bool update_spm_vmid_needed = (job->vm && (job->vm->reserved_vmid[vmhub] != NULL));
+ struct dma_fence *fence = NULL;
+ unsigned int patch;
int r;
- if (update_spm_vmid_needed && adev->gfx.rlc.funcs->update_spm_vmid)
- adev->gfx.rlc.funcs->update_spm_vmid(adev, job->vmid);
-
if (amdgpu_vmid_had_gpu_reset(adev, id)) {
gds_switch_needed = true;
vm_flush_needed = true;
pasid_mapping_needed = true;
+ spm_update_needed = true;
}
mutex_lock(&id_mgr->lock);
@@ -1108,15 +805,26 @@ int amdgpu_vm_flush(struct amdgpu_ring *ring, struct amdgpu_job *job,
pasid_mapping_needed &= adev->gmc.gmc_funcs->emit_pasid_mapping &&
ring->funcs->emit_wreg;
- if (!vm_flush_needed && !gds_switch_needed && !need_pipe_sync)
+ cleaner_shader_needed = job->run_cleaner_shader &&
+ adev->gfx.enable_cleaner_shader &&
+ ring->funcs->emit_cleaner_shader && job->base.s_fence &&
+ &job->base.s_fence->scheduled == isolation->spearhead;
+
+ if (!vm_flush_needed && !gds_switch_needed && !need_pipe_sync &&
+ !cleaner_shader_needed)
return 0;
+ amdgpu_ring_ib_begin(ring);
if (ring->funcs->init_cond_exec)
- patch_offset = amdgpu_ring_init_cond_exec(ring);
+ patch = amdgpu_ring_init_cond_exec(ring,
+ ring->cond_exe_gpu_addr);
if (need_pipe_sync)
amdgpu_ring_emit_pipeline_sync(ring);
+ if (cleaner_shader_needed)
+ ring->funcs->emit_cleaner_shader(ring);
+
if (vm_flush_needed) {
trace_amdgpu_vm_flush(ring, job->vmid, job->vm_pd_addr);
amdgpu_ring_emit_vm_flush(ring, job->vmid, job->vm_pd_addr);
@@ -1125,10 +833,24 @@ int amdgpu_vm_flush(struct amdgpu_ring *ring, struct amdgpu_job *job,
if (pasid_mapping_needed)
amdgpu_gmc_emit_pasid_mapping(ring, job->vmid, job->pasid);
- if (vm_flush_needed || pasid_mapping_needed) {
- r = amdgpu_fence_emit(ring, &fence, 0);
+ if (spm_update_needed && adev->gfx.rlc.funcs->update_spm_vmid)
+ adev->gfx.rlc.funcs->update_spm_vmid(adev, ring, job->vmid);
+
+ if (ring->funcs->emit_gds_switch &&
+ gds_switch_needed) {
+ amdgpu_ring_emit_gds_switch(ring, job->vmid, job->gds_base,
+ job->gds_size, job->gws_base,
+ job->gws_size, job->oa_base,
+ job->oa_size);
+ }
+
+ if (vm_flush_needed || pasid_mapping_needed || cleaner_shader_needed) {
+ r = amdgpu_fence_emit(ring, job->hw_vm_fence, 0);
if (r)
return r;
+ fence = &job->hw_vm_fence->base;
+ /* get a ref for the job */
+ dma_fence_get(fence);
}
if (vm_flush_needed) {
@@ -1147,29 +869,29 @@ int amdgpu_vm_flush(struct amdgpu_ring *ring, struct amdgpu_job *job,
id->pasid_mapping = dma_fence_get(fence);
mutex_unlock(&id_mgr->lock);
}
- dma_fence_put(fence);
- if (ring->funcs->emit_gds_switch && gds_switch_needed) {
- id->gds_base = job->gds_base;
- id->gds_size = job->gds_size;
- id->gws_base = job->gws_base;
- id->gws_size = job->gws_size;
- id->oa_base = job->oa_base;
- id->oa_size = job->oa_size;
- amdgpu_ring_emit_gds_switch(ring, job->vmid, job->gds_base,
- job->gds_size, job->gws_base,
- job->gws_size, job->oa_base,
- job->oa_size);
+ /*
+ * Make sure that all other submissions wait for the cleaner shader to
+ * finish before we push them to the HW.
+ */
+ if (cleaner_shader_needed) {
+ trace_amdgpu_cleaner_shader(ring, fence);
+ mutex_lock(&adev->enforce_isolation_mutex);
+ dma_fence_put(isolation->spearhead);
+ isolation->spearhead = dma_fence_get(fence);
+ mutex_unlock(&adev->enforce_isolation_mutex);
}
+ dma_fence_put(fence);
- if (ring->funcs->patch_cond_exec)
- amdgpu_ring_patch_cond_exec(ring, patch_offset);
+ amdgpu_ring_patch_cond_exec(ring, patch);
/* the double SWITCH_BUFFER here *cannot* be skipped by COND_EXEC */
if (ring->funcs->emit_switch_buffer) {
amdgpu_ring_emit_switch_buffer(ring);
amdgpu_ring_emit_switch_buffer(ring);
}
+
+ amdgpu_ring_ib_end(ring);
return 0;
}
@@ -1230,52 +952,6 @@ uint64_t amdgpu_vm_map_gart(const dma_addr_t *pages_addr, uint64_t addr)
}
/**
- * amdgpu_vm_update_pde - update a single level in the hierarchy
- *
- * @params: parameters for the update
- * @vm: requested vm
- * @entry: entry to update
- *
- * Makes sure the requested entry in parent is up to date.
- */
-static int amdgpu_vm_update_pde(struct amdgpu_vm_update_params *params,
- struct amdgpu_vm *vm,
- struct amdgpu_vm_pt *entry)
-{
- struct amdgpu_vm_pt *parent = amdgpu_vm_pt_parent(entry);
- struct amdgpu_bo *bo = parent->base.bo, *pbo;
- uint64_t pde, pt, flags;
- unsigned level;
-
- for (level = 0, pbo = bo->parent; pbo; ++level)
- pbo = pbo->parent;
-
- level += params->adev->vm_manager.root_level;
- amdgpu_gmc_get_pde_for_bo(entry->base.bo, level, &pt, &flags);
- pde = (entry - parent->entries) * 8;
- return vm->update_funcs->update(params, bo, pde, pt, 1, 0, flags);
-}
-
-/**
- * amdgpu_vm_invalidate_pds - mark all PDs as invalid
- *
- * @adev: amdgpu_device pointer
- * @vm: related vm
- *
- * Mark all PD level as invalid after an error.
- */
-static void amdgpu_vm_invalidate_pds(struct amdgpu_device *adev,
- struct amdgpu_vm *vm)
-{
- struct amdgpu_vm_pt_cursor cursor;
- struct amdgpu_vm_pt *entry;
-
- for_each_amdgpu_vm_pt_dfs_safe(adev, vm, NULL, cursor, entry)
- if (entry->base.bo && !entry->base.moved)
- amdgpu_vm_bo_relocated(&entry->base);
-}
-
-/**
* amdgpu_vm_update_pdes - make sure that all directories are valid
*
* @adev: amdgpu_device pointer
@@ -1291,28 +967,38 @@ int amdgpu_vm_update_pdes(struct amdgpu_device *adev,
struct amdgpu_vm *vm, bool immediate)
{
struct amdgpu_vm_update_params params;
- int r;
+ struct amdgpu_vm_bo_base *entry;
+ bool flush_tlb_needed = false;
+ LIST_HEAD(relocated);
+ int r, idx;
+
+ amdgpu_vm_assert_locked(vm);
+
+ spin_lock(&vm->status_lock);
+ list_splice_init(&vm->relocated, &relocated);
+ spin_unlock(&vm->status_lock);
- if (list_empty(&vm->relocated))
+ if (list_empty(&relocated))
return 0;
+ if (!drm_dev_enter(adev_to_drm(adev), &idx))
+ return -ENODEV;
+
memset(&params, 0, sizeof(params));
params.adev = adev;
params.vm = vm;
params.immediate = immediate;
- r = vm->update_funcs->prepare(&params, NULL, AMDGPU_SYNC_EXPLICIT);
+ r = vm->update_funcs->prepare(&params, NULL,
+ AMDGPU_KERNEL_JOB_ID_VM_UPDATE_PDES);
if (r)
- return r;
+ goto error;
- while (!list_empty(&vm->relocated)) {
- struct amdgpu_vm_pt *entry;
+ list_for_each_entry(entry, &relocated, vm_status) {
+ /* vm_flush_needed after updating moved PDEs */
+ flush_tlb_needed |= entry->moved;
- entry = list_first_entry(&vm->relocated, struct amdgpu_vm_pt,
- base.vm_status);
- amdgpu_vm_bo_idle(&entry->base);
-
- r = amdgpu_vm_update_pde(&params, vm, entry);
+ r = amdgpu_vm_pde_update(&params, entry);
if (r)
goto error;
}
@@ -1320,291 +1006,134 @@ int amdgpu_vm_update_pdes(struct amdgpu_device *adev,
r = vm->update_funcs->commit(&params, &vm->last_update);
if (r)
goto error;
- return 0;
+
+ if (flush_tlb_needed)
+ atomic64_inc(&vm->tlb_seq);
+
+ while (!list_empty(&relocated)) {
+ entry = list_first_entry(&relocated, struct amdgpu_vm_bo_base,
+ vm_status);
+ amdgpu_vm_bo_idle(entry);
+ }
error:
- amdgpu_vm_invalidate_pds(adev, vm);
+ drm_dev_exit(idx);
return r;
}
-/*
- * amdgpu_vm_update_flags - figure out flags for PTE updates
+/**
+ * amdgpu_vm_tlb_seq_cb - make sure to increment tlb sequence
+ * @fence: unused
+ * @cb: the callback structure
*
- * Make sure to set the right flags for the PTEs at the desired level.
+ * Increments the tlb sequence to make sure that future CS execute a VM flush.
*/
-static void amdgpu_vm_update_flags(struct amdgpu_vm_update_params *params,
- struct amdgpu_bo *bo, unsigned level,
- uint64_t pe, uint64_t addr,
- unsigned count, uint32_t incr,
- uint64_t flags)
-
+static void amdgpu_vm_tlb_seq_cb(struct dma_fence *fence,
+ struct dma_fence_cb *cb)
{
- if (level != AMDGPU_VM_PTB) {
- flags |= AMDGPU_PDE_PTE;
- amdgpu_gmc_get_vm_pde(params->adev, level, &addr, &flags);
-
- } else if (params->adev->asic_type >= CHIP_VEGA10 &&
- !(flags & AMDGPU_PTE_VALID) &&
- !(flags & AMDGPU_PTE_PRT)) {
+ struct amdgpu_vm_tlb_seq_struct *tlb_cb;
- /* Workaround for fault priority problem on GMC9 */
- flags |= AMDGPU_PTE_EXECUTABLE;
- }
-
- params->vm->update_funcs->update(params, bo, pe, addr, count, incr,
- flags);
+ tlb_cb = container_of(cb, typeof(*tlb_cb), cb);
+ atomic64_inc(&tlb_cb->vm->tlb_seq);
+ kfree(tlb_cb);
}
/**
- * amdgpu_vm_fragment - get fragment for PTEs
+ * amdgpu_vm_tlb_flush - prepare TLB flush
*
- * @params: see amdgpu_vm_update_params definition
- * @start: first PTE to handle
- * @end: last PTE to handle
- * @flags: hw mapping flags
- * @frag: resulting fragment size
- * @frag_end: end of this fragment
+ * @params: parameters for update
+ * @fence: input fence to sync TLB flush with
+ * @tlb_cb: the callback structure
*
- * Returns the first possible fragment for the start and end address.
+ * Increments the tlb sequence to make sure that future CS execute a VM flush.
*/
-static void amdgpu_vm_fragment(struct amdgpu_vm_update_params *params,
- uint64_t start, uint64_t end, uint64_t flags,
- unsigned int *frag, uint64_t *frag_end)
+static void
+amdgpu_vm_tlb_flush(struct amdgpu_vm_update_params *params,
+ struct dma_fence **fence,
+ struct amdgpu_vm_tlb_seq_struct *tlb_cb)
{
- /**
- * The MC L1 TLB supports variable sized pages, based on a fragment
- * field in the PTE. When this field is set to a non-zero value, page
- * granularity is increased from 4KB to (1 << (12 + frag)). The PTE
- * flags are considered valid for all PTEs within the fragment range
- * and corresponding mappings are assumed to be physically contiguous.
- *
- * The L1 TLB can store a single PTE for the whole fragment,
- * significantly increasing the space available for translation
- * caching. This leads to large improvements in throughput when the
- * TLB is under pressure.
- *
- * The L2 TLB distributes small and large fragments into two
- * asymmetric partitions. The large fragment cache is significantly
- * larger. Thus, we try to use large fragments wherever possible.
- * Userspace can support this by aligning virtual base address and
- * allocation size to the fragment size.
- *
- * Starting with Vega10 the fragment size only controls the L1. The L2
- * is now directly feed with small/huge/giant pages from the walker.
- */
- unsigned max_frag;
+ struct amdgpu_vm *vm = params->vm;
- if (params->adev->asic_type < CHIP_VEGA10)
- max_frag = params->adev->vm_manager.fragment_size;
- else
- max_frag = 31;
-
- /* system pages are non continuously */
- if (params->pages_addr) {
- *frag = 0;
- *frag_end = end;
+ tlb_cb->vm = vm;
+ if (!fence || !*fence) {
+ amdgpu_vm_tlb_seq_cb(NULL, &tlb_cb->cb);
return;
}
- /* This intentionally wraps around if no bit is set */
- *frag = min((unsigned)ffs(start) - 1, (unsigned)fls64(end - start) - 1);
- if (*frag >= max_frag) {
- *frag = max_frag;
- *frag_end = end & ~((1ULL << max_frag) - 1);
+ if (!dma_fence_add_callback(*fence, &tlb_cb->cb,
+ amdgpu_vm_tlb_seq_cb)) {
+ dma_fence_put(vm->last_tlb_flush);
+ vm->last_tlb_flush = dma_fence_get(*fence);
} else {
- *frag_end = start + (1 << *frag);
+ amdgpu_vm_tlb_seq_cb(NULL, &tlb_cb->cb);
}
-}
-/**
- * amdgpu_vm_update_ptes - make sure that page tables are valid
- *
- * @params: see amdgpu_vm_update_params definition
- * @start: start of GPU address range
- * @end: end of GPU address range
- * @dst: destination address to map to, the next dst inside the function
- * @flags: mapping flags
- *
- * Update the page tables in the range @start - @end.
- *
- * Returns:
- * 0 for success, -EINVAL for failure.
- */
-static int amdgpu_vm_update_ptes(struct amdgpu_vm_update_params *params,
- uint64_t start, uint64_t end,
- uint64_t dst, uint64_t flags)
-{
- struct amdgpu_device *adev = params->adev;
- struct amdgpu_vm_pt_cursor cursor;
- uint64_t frag_start = start, frag_end;
- unsigned int frag;
- int r;
-
- /* figure out the initial fragment */
- amdgpu_vm_fragment(params, frag_start, end, flags, &frag, &frag_end);
-
- /* walk over the address space and update the PTs */
- amdgpu_vm_pt_start(adev, params->vm, start, &cursor);
- while (cursor.pfn < end) {
- unsigned shift, parent_shift, mask;
- uint64_t incr, entry_end, pe_start;
- struct amdgpu_bo *pt;
-
- if (!params->unlocked) {
- /* make sure that the page tables covering the
- * address range are actually allocated
- */
- r = amdgpu_vm_alloc_pts(params->adev, params->vm,
- &cursor, params->immediate);
- if (r)
- return r;
- }
-
- shift = amdgpu_vm_level_shift(adev, cursor.level);
- parent_shift = amdgpu_vm_level_shift(adev, cursor.level - 1);
- if (params->unlocked) {
- /* Unlocked updates are only allowed on the leaves */
- if (amdgpu_vm_pt_descendant(adev, &cursor))
- continue;
- } else if (adev->asic_type < CHIP_VEGA10 &&
- (flags & AMDGPU_PTE_VALID)) {
- /* No huge page support before GMC v9 */
- if (cursor.level != AMDGPU_VM_PTB) {
- if (!amdgpu_vm_pt_descendant(adev, &cursor))
- return -ENOENT;
- continue;
- }
- } else if (frag < shift) {
- /* We can't use this level when the fragment size is
- * smaller than the address shift. Go to the next
- * child entry and try again.
- */
- if (amdgpu_vm_pt_descendant(adev, &cursor))
- continue;
- } else if (frag >= parent_shift) {
- /* If the fragment size is even larger than the parent
- * shift we should go up one level and check it again.
- */
- if (!amdgpu_vm_pt_ancestor(&cursor))
- return -EINVAL;
- continue;
- }
-
- pt = cursor.entry->base.bo;
- if (!pt) {
- /* We need all PDs and PTs for mapping something, */
- if (flags & AMDGPU_PTE_VALID)
- return -ENOENT;
-
- /* but unmapping something can happen at a higher
- * level.
- */
- if (!amdgpu_vm_pt_ancestor(&cursor))
- return -EINVAL;
-
- pt = cursor.entry->base.bo;
- shift = parent_shift;
- frag_end = max(frag_end, ALIGN(frag_start + 1,
- 1ULL << shift));
- }
+ /* Prepare a TLB flush fence to be attached to PTs */
+ if (!params->unlocked) {
+ amdgpu_vm_tlb_fence_create(params->adev, vm, fence);
- /* Looks good so far, calculate parameters for the update */
- incr = (uint64_t)AMDGPU_GPU_PAGE_SIZE << shift;
- mask = amdgpu_vm_entries_mask(adev, cursor.level);
- pe_start = ((cursor.pfn >> shift) & mask) * 8;
- entry_end = ((uint64_t)mask + 1) << shift;
- entry_end += cursor.pfn & ~(entry_end - 1);
- entry_end = min(entry_end, end);
-
- do {
- struct amdgpu_vm *vm = params->vm;
- uint64_t upd_end = min(entry_end, frag_end);
- unsigned nptes = (upd_end - frag_start) >> shift;
- uint64_t upd_flags = flags | AMDGPU_PTE_FRAG(frag);
-
- /* This can happen when we set higher level PDs to
- * silent to stop fault floods.
- */
- nptes = max(nptes, 1u);
-
- trace_amdgpu_vm_update_ptes(params, frag_start, upd_end,
- nptes, dst, incr, upd_flags,
- vm->task_info.pid,
- vm->immediate.fence_context);
- amdgpu_vm_update_flags(params, pt, cursor.level,
- pe_start, dst, nptes, incr,
- upd_flags);
-
- pe_start += nptes * 8;
- dst += nptes * incr;
-
- frag_start = upd_end;
- if (frag_start >= frag_end) {
- /* figure out the next fragment */
- amdgpu_vm_fragment(params, frag_start, end,
- flags, &frag, &frag_end);
- if (frag < shift)
- break;
- }
- } while (frag_start < entry_end);
-
- if (amdgpu_vm_pt_descendant(adev, &cursor)) {
- /* Free all child entries.
- * Update the tables with the flags and addresses and free up subsequent
- * tables in the case of huge pages or freed up areas.
- * This is the maximum you can free, because all other page tables are not
- * completely covered by the range and so potentially still in use.
- */
- while (cursor.pfn < frag_start) {
- amdgpu_vm_free_pts(adev, params->vm, &cursor);
- amdgpu_vm_pt_next(adev, &cursor);
- }
-
- } else if (frag >= shift) {
- /* or just move on to the next on the same level. */
- amdgpu_vm_pt_next(adev, &cursor);
- }
+ /* Makes sure no PD/PT is freed before the flush */
+ dma_resv_add_fence(vm->root.bo->tbo.base.resv, *fence,
+ DMA_RESV_USAGE_BOOKKEEP);
}
-
- return 0;
}
/**
- * amdgpu_vm_bo_update_mapping - update a mapping in the vm page table
+ * amdgpu_vm_update_range - update a range in the vm page table
*
- * @adev: amdgpu_device pointer of the VM
- * @bo_adev: amdgpu_device pointer of the mapped BO
- * @vm: requested vm
+ * @adev: amdgpu_device pointer to use for commands
+ * @vm: the VM to update the range
* @immediate: immediate submission in a page fault
* @unlocked: unlocked invalidation during MM callback
- * @resv: fences we need to sync to
+ * @flush_tlb: trigger tlb invalidation after update completed
+ * @allow_override: change MTYPE for local NUMA nodes
+ * @sync: fences we need to sync to
* @start: start of mapped range
* @last: last mapped entry
* @flags: flags for the entries
* @offset: offset into nodes and pages_addr
- * @nodes: array of drm_mm_nodes with the MC addresses
+ * @vram_base: base for vram mappings
+ * @res: ttm_resource to map
* @pages_addr: DMA addresses to use for mapping
* @fence: optional resulting fence
*
* Fill in the page table entries between @start and @last.
*
* Returns:
- * 0 for success, -EINVAL for failure.
+ * 0 for success, negative erro code for failure.
*/
-static int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev,
- struct amdgpu_device *bo_adev,
- struct amdgpu_vm *vm, bool immediate,
- bool unlocked, struct dma_resv *resv,
- uint64_t start, uint64_t last,
- uint64_t flags, uint64_t offset,
- struct drm_mm_node *nodes,
- dma_addr_t *pages_addr,
- struct dma_fence **fence)
+int amdgpu_vm_update_range(struct amdgpu_device *adev, struct amdgpu_vm *vm,
+ bool immediate, bool unlocked, bool flush_tlb,
+ bool allow_override, struct amdgpu_sync *sync,
+ uint64_t start, uint64_t last, uint64_t flags,
+ uint64_t offset, uint64_t vram_base,
+ struct ttm_resource *res, dma_addr_t *pages_addr,
+ struct dma_fence **fence)
{
+ struct amdgpu_vm_tlb_seq_struct *tlb_cb;
struct amdgpu_vm_update_params params;
- enum amdgpu_sync_mode sync_mode;
- uint64_t pfn;
- int r;
+ struct amdgpu_res_cursor cursor;
+ int r, idx;
+
+ if (!drm_dev_enter(adev_to_drm(adev), &idx))
+ return -ENODEV;
+
+ tlb_cb = kmalloc(sizeof(*tlb_cb), GFP_KERNEL);
+ if (!tlb_cb) {
+ drm_dev_exit(idx);
+ return -ENOMEM;
+ }
+
+ /* Vega20+XGMI where PTEs get inadvertently cached in L2 texture cache,
+ * heavy-weight flush TLB unconditionally.
+ */
+ flush_tlb |= adev->gmc.xgmi.num_physical_nodes &&
+ amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 0);
+
+ /*
+ * On GFX8 and older any 8 PTE block with a valid bit set enters the TLB
+ */
+ flush_tlb |= amdgpu_ip_version(adev, GC_HWIP, 0) < IP_VERSION(9, 0, 0);
memset(&params, 0, sizeof(params));
params.adev = adev;
@@ -1612,58 +1141,40 @@ static int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev,
params.immediate = immediate;
params.pages_addr = pages_addr;
params.unlocked = unlocked;
-
- /* Implicitly sync to command submissions in the same VM before
- * unmapping. Sync to moving fences before mapping.
- */
- if (!(flags & AMDGPU_PTE_VALID))
- sync_mode = AMDGPU_SYNC_EQ_OWNER;
- else
- sync_mode = AMDGPU_SYNC_EXPLICIT;
-
- pfn = offset >> PAGE_SHIFT;
- if (nodes) {
- while (pfn >= nodes->size) {
- pfn -= nodes->size;
- ++nodes;
- }
- }
+ params.needs_flush = flush_tlb;
+ params.allow_override = allow_override;
+ INIT_LIST_HEAD(&params.tlb_flush_waitlist);
amdgpu_vm_eviction_lock(vm);
if (vm->evicting) {
r = -EBUSY;
- goto error_unlock;
+ goto error_free;
}
if (!unlocked && !dma_fence_is_signaled(vm->last_unlocked)) {
struct dma_fence *tmp = dma_fence_get_stub();
- amdgpu_bo_fence(vm->root.base.bo, vm->last_unlocked, true);
+ amdgpu_bo_fence(vm->root.bo, vm->last_unlocked, true);
swap(vm->last_unlocked, tmp);
dma_fence_put(tmp);
}
- r = vm->update_funcs->prepare(&params, resv, sync_mode);
+ r = vm->update_funcs->prepare(&params, sync,
+ AMDGPU_KERNEL_JOB_ID_VM_UPDATE_RANGE);
if (r)
- goto error_unlock;
+ goto error_free;
- do {
+ amdgpu_res_first(pages_addr ? NULL : res, offset,
+ (last - start + 1) * AMDGPU_GPU_PAGE_SIZE, &cursor);
+ while (cursor.remaining) {
uint64_t tmp, num_entries, addr;
-
- num_entries = last - start + 1;
- if (nodes) {
- addr = nodes->start << PAGE_SHIFT;
- num_entries = min((nodes->size - pfn) *
- AMDGPU_GPU_PAGES_IN_CPU_PAGE, num_entries);
- } else {
- addr = 0;
- }
-
+ num_entries = cursor.size >> AMDGPU_GPU_PAGE_SHIFT;
if (pages_addr) {
bool contiguous = true;
if (num_entries > AMDGPU_GPU_PAGES_IN_CPU_PAGE) {
+ uint64_t pfn = cursor.start >> PAGE_SHIFT;
uint64_t count;
contiguous = pages_addr[pfn + 1] ==
@@ -1678,44 +1189,61 @@ static int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev,
pages_addr[idx - 1] + PAGE_SIZE))
break;
}
+ if (!contiguous)
+ count--;
num_entries = count *
AMDGPU_GPU_PAGES_IN_CPU_PAGE;
}
if (!contiguous) {
- addr = pfn << PAGE_SHIFT;
+ addr = cursor.start;
params.pages_addr = pages_addr;
} else {
- addr = pages_addr[pfn];
+ addr = pages_addr[cursor.start >> PAGE_SHIFT];
params.pages_addr = NULL;
}
- } else if (flags & (AMDGPU_PTE_VALID | AMDGPU_PTE_PRT)) {
- addr += bo_adev->vm_manager.vram_base_offset;
- addr += pfn << PAGE_SHIFT;
+ } else if (flags & (AMDGPU_PTE_VALID | AMDGPU_PTE_PRT_FLAG(adev))) {
+ addr = vram_base + cursor.start;
+ } else {
+ addr = 0;
}
tmp = start + num_entries;
- r = amdgpu_vm_update_ptes(&params, start, tmp, addr, flags);
+ r = amdgpu_vm_ptes_update(&params, start, tmp, addr, flags);
if (r)
- goto error_unlock;
+ goto error_free;
- pfn += num_entries / AMDGPU_GPU_PAGES_IN_CPU_PAGE;
- if (nodes && nodes->size == pfn) {
- pfn = 0;
- ++nodes;
- }
+ amdgpu_res_next(&cursor, num_entries * AMDGPU_GPU_PAGE_SIZE);
start = tmp;
-
- } while (unlikely(start != last + 1));
+ }
r = vm->update_funcs->commit(&params, fence);
+ if (r)
+ goto error_free;
-error_unlock:
+ if (params.needs_flush) {
+ amdgpu_vm_tlb_flush(&params, fence, tlb_cb);
+ tlb_cb = NULL;
+ }
+
+ amdgpu_vm_pt_free_list(adev, &params);
+
+error_free:
+ kfree(tlb_cb);
amdgpu_vm_eviction_unlock(vm);
+ drm_dev_exit(idx);
return r;
}
+void amdgpu_vm_get_memory(struct amdgpu_vm *vm,
+ struct amdgpu_mem_stats stats[__AMDGPU_PL_NUM])
+{
+ spin_lock(&vm->status_lock);
+ memcpy(stats, vm->stats, sizeof(*stats) * __AMDGPU_PL_NUM);
+ spin_unlock(&vm->status_lock);
+}
+
/**
* amdgpu_vm_bo_update - update all BO mappings in the vm page table
*
@@ -1734,56 +1262,85 @@ int amdgpu_vm_bo_update(struct amdgpu_device *adev, struct amdgpu_bo_va *bo_va,
struct amdgpu_bo *bo = bo_va->base.bo;
struct amdgpu_vm *vm = bo_va->base.vm;
struct amdgpu_bo_va_mapping *mapping;
+ struct dma_fence **last_update;
dma_addr_t *pages_addr = NULL;
struct ttm_resource *mem;
- struct drm_mm_node *nodes;
- struct dma_fence **last_update;
- struct dma_resv *resv;
+ struct amdgpu_sync sync;
+ bool flush_tlb = clear;
+ uint64_t vram_base;
uint64_t flags;
- struct amdgpu_device *bo_adev = adev;
+ bool uncached;
int r;
- if (clear || !bo) {
+ amdgpu_sync_create(&sync);
+ if (clear) {
+ mem = NULL;
+
+ /* Implicitly sync to command submissions in the same VM before
+ * unmapping.
+ */
+ r = amdgpu_sync_resv(adev, &sync, vm->root.bo->tbo.base.resv,
+ AMDGPU_SYNC_EQ_OWNER, vm);
+ if (r)
+ goto error_free;
+ if (bo) {
+ r = amdgpu_sync_kfd(&sync, bo->tbo.base.resv);
+ if (r)
+ goto error_free;
+ }
+ } else if (!bo) {
mem = NULL;
- nodes = NULL;
- resv = vm->root.base.bo->tbo.base.resv;
+
+ /* PRT map operations don't need to sync to anything. */
+
} else {
struct drm_gem_object *obj = &bo->tbo.base;
- resv = bo->tbo.base.resv;
- if (obj->import_attach && bo_va->is_xgmi) {
+ if (drm_gem_is_imported(obj) && bo_va->is_xgmi) {
struct dma_buf *dma_buf = obj->import_attach->dmabuf;
struct drm_gem_object *gobj = dma_buf->priv;
struct amdgpu_bo *abo = gem_to_amdgpu_bo(gobj);
- if (abo->tbo.mem.mem_type == TTM_PL_VRAM)
+ if (abo->tbo.resource &&
+ abo->tbo.resource->mem_type == TTM_PL_VRAM)
bo = gem_to_amdgpu_bo(gobj);
}
- mem = &bo->tbo.mem;
- nodes = mem->mm_node;
- if (mem->mem_type == TTM_PL_TT)
+ mem = bo->tbo.resource;
+ if (mem && (mem->mem_type == TTM_PL_TT ||
+ mem->mem_type == AMDGPU_PL_PREEMPT))
pages_addr = bo->tbo.ttm->dma_address;
+
+ /* Implicitly sync to moving fences before mapping anything */
+ r = amdgpu_sync_resv(adev, &sync, bo->tbo.base.resv,
+ AMDGPU_SYNC_EXPLICIT, vm);
+ if (r)
+ goto error_free;
}
if (bo) {
+ struct amdgpu_device *bo_adev;
+
flags = amdgpu_ttm_tt_pte_flags(adev, bo->tbo.ttm, mem);
if (amdgpu_bo_encrypted(bo))
flags |= AMDGPU_PTE_TMZ;
bo_adev = amdgpu_ttm_adev(bo->tbo.bdev);
+ vram_base = bo_adev->vm_manager.vram_base_offset;
+ uncached = (bo->flags & AMDGPU_GEM_CREATE_UNCACHED) != 0;
} else {
flags = 0x0;
+ vram_base = 0;
+ uncached = false;
}
- if (clear || (bo && bo->tbo.base.resv ==
- vm->root.base.bo->tbo.base.resv))
+ if (clear || amdgpu_vm_is_bo_always_valid(vm, bo))
last_update = &vm->last_update;
else
last_update = &bo_va->last_pt_update;
if (!clear && bo_va->base.moved) {
- bo_va->base.moved = false;
+ flush_tlb = true;
list_splice_init(&bo_va->valids, &bo_va->invalids);
} else if (bo_va->cleared != clear) {
@@ -1796,34 +1353,34 @@ int amdgpu_vm_bo_update(struct amdgpu_device *adev, struct amdgpu_bo_va *bo_va,
/* normally,bo_va->flags only contians READABLE and WIRTEABLE bit go here
* but in case of something, we filter the flags in first place
*/
- if (!(mapping->flags & AMDGPU_PTE_READABLE))
+ if (!(mapping->flags & AMDGPU_VM_PAGE_READABLE))
update_flags &= ~AMDGPU_PTE_READABLE;
- if (!(mapping->flags & AMDGPU_PTE_WRITEABLE))
+ if (!(mapping->flags & AMDGPU_VM_PAGE_WRITEABLE))
update_flags &= ~AMDGPU_PTE_WRITEABLE;
/* Apply ASIC specific mapping flags */
- amdgpu_gmc_get_vm_pte(adev, mapping, &update_flags);
+ amdgpu_gmc_get_vm_pte(adev, vm, bo, mapping->flags,
+ &update_flags);
trace_amdgpu_vm_bo_update(mapping);
- r = amdgpu_vm_bo_update_mapping(adev, bo_adev, vm, false, false,
- resv, mapping->start,
- mapping->last, update_flags,
- mapping->offset, nodes,
- pages_addr, last_update);
+ r = amdgpu_vm_update_range(adev, vm, false, false, flush_tlb,
+ !uncached, &sync, mapping->start,
+ mapping->last, update_flags,
+ mapping->offset, vram_base, mem,
+ pages_addr, last_update);
if (r)
- return r;
+ goto error_free;
}
/* If the BO is not in its preferred location add it back to
* the evicted list so that it gets validated again on the
* next command submission.
*/
- if (bo && bo->tbo.base.resv == vm->root.base.bo->tbo.base.resv) {
- uint32_t mem_type = bo->tbo.mem.mem_type;
-
- if (!(bo->preferred_domains &
- amdgpu_mem_type_to_domain(mem_type)))
+ if (amdgpu_vm_is_bo_always_valid(vm, bo)) {
+ if (bo->tbo.resource &&
+ !(bo->preferred_domains &
+ amdgpu_mem_type_to_domain(bo->tbo.resource->mem_type)))
amdgpu_vm_bo_evicted(&bo_va->base);
else
amdgpu_vm_bo_idle(&bo_va->base);
@@ -1833,13 +1390,16 @@ int amdgpu_vm_bo_update(struct amdgpu_device *adev, struct amdgpu_bo_va *bo_va,
list_splice_init(&bo_va->invalids, &bo_va->valids);
bo_va->cleared = clear;
+ bo_va->base.moved = false;
if (trace_amdgpu_vm_bo_mapping_enabled()) {
list_for_each_entry(mapping, &bo_va->valids, list)
trace_amdgpu_vm_bo_mapping(mapping);
}
- return 0;
+error_free:
+ amdgpu_sync_free(&sync);
+ return r;
}
/**
@@ -1941,7 +1501,7 @@ static void amdgpu_vm_free_mapping(struct amdgpu_device *adev,
struct amdgpu_bo_va_mapping *mapping,
struct dma_fence *fence)
{
- if (mapping->flags & AMDGPU_PTE_PRT)
+ if (mapping->flags & AMDGPU_VM_PAGE_PRT)
amdgpu_vm_add_prt_cb(adev, fence);
kfree(mapping);
}
@@ -1956,32 +1516,15 @@ static void amdgpu_vm_free_mapping(struct amdgpu_device *adev,
*/
static void amdgpu_vm_prt_fini(struct amdgpu_device *adev, struct amdgpu_vm *vm)
{
- struct dma_resv *resv = vm->root.base.bo->tbo.base.resv;
- struct dma_fence *excl, **shared;
- unsigned i, shared_count;
- int r;
+ struct dma_resv *resv = vm->root.bo->tbo.base.resv;
+ struct dma_resv_iter cursor;
+ struct dma_fence *fence;
- r = dma_resv_get_fences_rcu(resv, &excl,
- &shared_count, &shared);
- if (r) {
- /* Not enough memory to grab the fence list, as last resort
- * block for all the fences to complete.
- */
- dma_resv_wait_timeout_rcu(resv, true, false,
- MAX_SCHEDULE_TIMEOUT);
- return;
- }
-
- /* Add a callback for each fence in the reservation object */
- amdgpu_vm_prt_get(adev);
- amdgpu_vm_add_prt_cb(adev, excl);
-
- for (i = 0; i < shared_count; ++i) {
+ dma_resv_for_each_fence(&cursor, resv, DMA_RESV_USAGE_BOOKKEEP, fence) {
+ /* Add a callback for each fence in the reservation object */
amdgpu_vm_prt_get(adev);
- amdgpu_vm_add_prt_cb(adev, shared[i]);
+ amdgpu_vm_add_prt_cb(adev, fence);
}
-
- kfree(shared);
}
/**
@@ -2003,29 +1546,34 @@ int amdgpu_vm_clear_freed(struct amdgpu_device *adev,
struct amdgpu_vm *vm,
struct dma_fence **fence)
{
- struct dma_resv *resv = vm->root.base.bo->tbo.base.resv;
struct amdgpu_bo_va_mapping *mapping;
- uint64_t init_pte_value = 0;
struct dma_fence *f = NULL;
+ struct amdgpu_sync sync;
int r;
+
+ /*
+ * Implicitly sync to command submissions in the same VM before
+ * unmapping.
+ */
+ amdgpu_sync_create(&sync);
+ r = amdgpu_sync_resv(adev, &sync, vm->root.bo->tbo.base.resv,
+ AMDGPU_SYNC_EQ_OWNER, vm);
+ if (r)
+ goto error_free;
+
while (!list_empty(&vm->freed)) {
mapping = list_first_entry(&vm->freed,
struct amdgpu_bo_va_mapping, list);
list_del(&mapping->list);
- if (vm->pte_support_ats &&
- mapping->start < AMDGPU_GMC_HOLE_START)
- init_pte_value = AMDGPU_PTE_DEFAULT_ATC;
-
- r = amdgpu_vm_bo_update_mapping(adev, adev, vm, false, false,
- resv, mapping->start,
- mapping->last, init_pte_value,
- 0, NULL, NULL, &f);
+ r = amdgpu_vm_update_range(adev, vm, false, false, true, false,
+ &sync, mapping->start, mapping->last,
+ 0, 0, 0, NULL, NULL, &f);
amdgpu_vm_free_mapping(adev, vm, mapping, f);
if (r) {
dma_fence_put(f);
- return r;
+ goto error_free;
}
}
@@ -2036,7 +1584,9 @@ int amdgpu_vm_clear_freed(struct amdgpu_device *adev,
dma_fence_put(f);
}
- return 0;
+error_free:
+ amdgpu_sync_free(&sync);
+ return r;
}
@@ -2045,6 +1595,7 @@ int amdgpu_vm_clear_freed(struct amdgpu_device *adev,
*
* @adev: amdgpu_device pointer
* @vm: requested vm
+ * @ticket: optional reservation ticket used to reserve the VM
*
* Make sure all BOs which are moved are updated in the PTs.
*
@@ -2054,48 +1605,116 @@ int amdgpu_vm_clear_freed(struct amdgpu_device *adev,
* PTs have to be reserved!
*/
int amdgpu_vm_handle_moved(struct amdgpu_device *adev,
- struct amdgpu_vm *vm)
+ struct amdgpu_vm *vm,
+ struct ww_acquire_ctx *ticket)
{
- struct amdgpu_bo_va *bo_va, *tmp;
+ struct amdgpu_bo_va *bo_va;
struct dma_resv *resv;
- bool clear;
+ bool clear, unlock;
int r;
- list_for_each_entry_safe(bo_va, tmp, &vm->moved, base.vm_status) {
+ spin_lock(&vm->status_lock);
+ while (!list_empty(&vm->moved)) {
+ bo_va = list_first_entry(&vm->moved, struct amdgpu_bo_va,
+ base.vm_status);
+ spin_unlock(&vm->status_lock);
+
/* Per VM BOs never need to bo cleared in the page tables */
r = amdgpu_vm_bo_update(adev, bo_va, false);
if (r)
return r;
+ spin_lock(&vm->status_lock);
}
- spin_lock(&vm->invalidated_lock);
while (!list_empty(&vm->invalidated)) {
bo_va = list_first_entry(&vm->invalidated, struct amdgpu_bo_va,
base.vm_status);
resv = bo_va->base.bo->tbo.base.resv;
- spin_unlock(&vm->invalidated_lock);
+ spin_unlock(&vm->status_lock);
/* Try to reserve the BO to avoid clearing its ptes */
- if (!amdgpu_vm_debug && dma_resv_trylock(resv))
+ if (!adev->debug_vm && dma_resv_trylock(resv)) {
+ clear = false;
+ unlock = true;
+ /* The caller is already holding the reservation lock */
+ } else if (ticket && dma_resv_locking_ctx(resv) == ticket) {
clear = false;
+ unlock = false;
/* Somebody else is using the BO right now */
- else
+ } else {
clear = true;
+ unlock = false;
+ }
r = amdgpu_vm_bo_update(adev, bo_va, clear);
+
+ if (unlock)
+ dma_resv_unlock(resv);
if (r)
return r;
- if (!clear)
- dma_resv_unlock(resv);
- spin_lock(&vm->invalidated_lock);
+ /* Remember evicted DMABuf imports in compute VMs for later
+ * validation
+ */
+ if (vm->is_compute_context &&
+ drm_gem_is_imported(&bo_va->base.bo->tbo.base) &&
+ (!bo_va->base.bo->tbo.resource ||
+ bo_va->base.bo->tbo.resource->mem_type == TTM_PL_SYSTEM))
+ amdgpu_vm_bo_evicted_user(&bo_va->base);
+
+ spin_lock(&vm->status_lock);
}
- spin_unlock(&vm->invalidated_lock);
+ spin_unlock(&vm->status_lock);
return 0;
}
/**
+ * amdgpu_vm_flush_compute_tlb - Flush TLB on compute VM
+ *
+ * @adev: amdgpu_device pointer
+ * @vm: requested vm
+ * @flush_type: flush type
+ * @xcc_mask: mask of XCCs that belong to the compute partition in need of a TLB flush.
+ *
+ * Flush TLB if needed for a compute VM.
+ *
+ * Returns:
+ * 0 for success.
+ */
+int amdgpu_vm_flush_compute_tlb(struct amdgpu_device *adev,
+ struct amdgpu_vm *vm,
+ uint32_t flush_type,
+ uint32_t xcc_mask)
+{
+ uint64_t tlb_seq = amdgpu_vm_tlb_seq(vm);
+ bool all_hub = false;
+ int xcc = 0, r = 0;
+
+ WARN_ON_ONCE(!vm->is_compute_context);
+
+ /*
+ * It can be that we race and lose here, but that is extremely unlikely
+ * and the worst thing which could happen is that we flush the changes
+ * into the TLB once more which is harmless.
+ */
+ if (atomic64_xchg(&vm->kfd_last_flushed_seq, tlb_seq) == tlb_seq)
+ return 0;
+
+ if (adev->family == AMDGPU_FAMILY_AI ||
+ adev->family == AMDGPU_FAMILY_RV)
+ all_hub = true;
+
+ for_each_inst(xcc, xcc_mask) {
+ r = amdgpu_gmc_flush_gpu_tlb_pasid(adev, vm->pasid, flush_type,
+ all_hub, xcc);
+ if (r)
+ break;
+ }
+ return r;
+}
+
+/**
* amdgpu_vm_bo_add - add a bo to a specific vm
*
* @adev: amdgpu_device pointer
@@ -2123,12 +1742,14 @@ struct amdgpu_bo_va *amdgpu_vm_bo_add(struct amdgpu_device *adev,
amdgpu_vm_bo_base_init(&bo_va->base, vm, bo);
bo_va->ref_count = 1;
+ bo_va->last_pt_update = dma_fence_get_stub();
INIT_LIST_HEAD(&bo_va->valids);
INIT_LIST_HEAD(&bo_va->invalids);
if (!bo)
return bo_va;
+ dma_resv_assert_held(bo->tbo.base.resv);
if (amdgpu_dmabuf_is_xgmi_accessible(adev, bo)) {
bo_va->is_xgmi = true;
/* Power up XGMI if it can be potentially used */
@@ -2159,16 +1780,46 @@ static void amdgpu_vm_bo_insert_map(struct amdgpu_device *adev,
list_add(&mapping->list, &bo_va->invalids);
amdgpu_vm_it_insert(mapping, &vm->va);
- if (mapping->flags & AMDGPU_PTE_PRT)
+ if (mapping->flags & AMDGPU_VM_PAGE_PRT)
amdgpu_vm_prt_get(adev);
- if (bo && bo->tbo.base.resv == vm->root.base.bo->tbo.base.resv &&
- !bo_va->base.moved) {
- list_move(&bo_va->base.vm_status, &vm->moved);
- }
+ if (amdgpu_vm_is_bo_always_valid(vm, bo) && !bo_va->base.moved)
+ amdgpu_vm_bo_moved(&bo_va->base);
+
trace_amdgpu_vm_bo_map(bo_va, mapping);
}
+/* Validate operation parameters to prevent potential abuse */
+static int amdgpu_vm_verify_parameters(struct amdgpu_device *adev,
+ struct amdgpu_bo *bo,
+ uint64_t saddr,
+ uint64_t offset,
+ uint64_t size)
+{
+ uint64_t tmp, lpfn;
+
+ if (saddr & AMDGPU_GPU_PAGE_MASK
+ || offset & AMDGPU_GPU_PAGE_MASK
+ || size & AMDGPU_GPU_PAGE_MASK)
+ return -EINVAL;
+
+ if (check_add_overflow(saddr, size, &tmp)
+ || check_add_overflow(offset, size, &tmp)
+ || size == 0 /* which also leads to end < begin */)
+ return -EINVAL;
+
+ /* make sure object fit at this offset */
+ if (bo && offset + size > amdgpu_bo_size(bo))
+ return -EINVAL;
+
+ /* Ensure last pfn not exceed max_pfn */
+ lpfn = (saddr + size - 1) >> AMDGPU_GPU_PAGE_SHIFT;
+ if (lpfn >= adev->vm_manager.max_pfn)
+ return -EINVAL;
+
+ return 0;
+}
+
/**
* amdgpu_vm_bo_map - map bo inside a vm
*
@@ -2189,27 +1840,20 @@ static void amdgpu_vm_bo_insert_map(struct amdgpu_device *adev,
int amdgpu_vm_bo_map(struct amdgpu_device *adev,
struct amdgpu_bo_va *bo_va,
uint64_t saddr, uint64_t offset,
- uint64_t size, uint64_t flags)
+ uint64_t size, uint32_t flags)
{
struct amdgpu_bo_va_mapping *mapping, *tmp;
struct amdgpu_bo *bo = bo_va->base.bo;
struct amdgpu_vm *vm = bo_va->base.vm;
uint64_t eaddr;
+ int r;
- /* validate the parameters */
- if (saddr & AMDGPU_GPU_PAGE_MASK || offset & AMDGPU_GPU_PAGE_MASK ||
- size == 0 || size & AMDGPU_GPU_PAGE_MASK)
- return -EINVAL;
-
- /* make sure object fit at this offset */
- eaddr = saddr + size - 1;
- if (saddr >= eaddr ||
- (bo && offset + size > amdgpu_bo_size(bo)) ||
- (eaddr >= adev->vm_manager.max_pfn << AMDGPU_GPU_PAGE_SHIFT))
- return -EINVAL;
+ r = amdgpu_vm_verify_parameters(adev, bo, saddr, offset, size);
+ if (r)
+ return r;
saddr /= AMDGPU_GPU_PAGE_SIZE;
- eaddr /= AMDGPU_GPU_PAGE_SIZE;
+ eaddr = saddr + (size - 1) / AMDGPU_GPU_PAGE_SIZE;
tmp = amdgpu_vm_it_iter_first(&vm->va, saddr, eaddr);
if (tmp) {
@@ -2255,24 +1899,16 @@ int amdgpu_vm_bo_map(struct amdgpu_device *adev,
int amdgpu_vm_bo_replace_map(struct amdgpu_device *adev,
struct amdgpu_bo_va *bo_va,
uint64_t saddr, uint64_t offset,
- uint64_t size, uint64_t flags)
+ uint64_t size, uint32_t flags)
{
struct amdgpu_bo_va_mapping *mapping;
struct amdgpu_bo *bo = bo_va->base.bo;
uint64_t eaddr;
int r;
- /* validate the parameters */
- if (saddr & AMDGPU_GPU_PAGE_MASK || offset & AMDGPU_GPU_PAGE_MASK ||
- size == 0 || size & AMDGPU_GPU_PAGE_MASK)
- return -EINVAL;
-
- /* make sure object fit at this offset */
- eaddr = saddr + size - 1;
- if (saddr >= eaddr ||
- (bo && offset + size > amdgpu_bo_size(bo)) ||
- (eaddr >= adev->vm_manager.max_pfn << AMDGPU_GPU_PAGE_SHIFT))
- return -EINVAL;
+ r = amdgpu_vm_verify_parameters(adev, bo, saddr, offset, size);
+ if (r)
+ return r;
/* Allocate all the needed memory */
mapping = kmalloc(sizeof(*mapping), GFP_KERNEL);
@@ -2286,7 +1922,7 @@ int amdgpu_vm_bo_replace_map(struct amdgpu_device *adev,
}
saddr /= AMDGPU_GPU_PAGE_SIZE;
- eaddr /= AMDGPU_GPU_PAGE_SIZE;
+ eaddr = saddr + (size - 1) / AMDGPU_GPU_PAGE_SIZE;
mapping->start = saddr;
mapping->last = eaddr;
@@ -2319,6 +1955,7 @@ int amdgpu_vm_bo_unmap(struct amdgpu_device *adev,
struct amdgpu_bo_va_mapping *mapping;
struct amdgpu_vm *vm = bo_va->base.vm;
bool valid = true;
+ int r;
saddr /= AMDGPU_GPU_PAGE_SIZE;
@@ -2339,6 +1976,17 @@ int amdgpu_vm_bo_unmap(struct amdgpu_device *adev,
return -ENOENT;
}
+ /* It's unlikely to happen that the mapping userq hasn't been idled
+ * during user requests GEM unmap IOCTL except for forcing the unmap
+ * from user space.
+ */
+ if (unlikely(atomic_read(&bo_va->userq_va_mapped) > 0)) {
+ r = amdgpu_userq_gem_va_unmap_validate(adev, mapping, saddr);
+ if (unlikely(r == -EBUSY))
+ dev_warn_once(adev->dev,
+ "Attempt to unmap an active userq buffer\n");
+ }
+
list_del(&mapping->list);
amdgpu_vm_it_remove(mapping, &vm->va);
mapping->bo_va = NULL;
@@ -2373,10 +2021,14 @@ int amdgpu_vm_bo_clear_mappings(struct amdgpu_device *adev,
struct amdgpu_bo_va_mapping *before, *after, *tmp, *next;
LIST_HEAD(removed);
uint64_t eaddr;
+ int r;
+
+ r = amdgpu_vm_verify_parameters(adev, NULL, saddr, 0, size);
+ if (r)
+ return r;
- eaddr = saddr + size - 1;
saddr /= AMDGPU_GPU_PAGE_SIZE;
- eaddr /= AMDGPU_GPU_PAGE_SIZE;
+ eaddr = saddr + (size - 1) / AMDGPU_GPU_PAGE_SIZE;
/* Allocate all the needed memory */
before = kzalloc(sizeof(*before), GFP_KERNEL);
@@ -2409,7 +2061,7 @@ int amdgpu_vm_bo_clear_mappings(struct amdgpu_device *adev,
after->start = eaddr + 1;
after->last = tmp->last;
after->offset = tmp->offset;
- after->offset += after->start - tmp->start;
+ after->offset += (after->start - tmp->start) << PAGE_SHIFT;
after->flags = tmp->flags;
after->bo_va = tmp->bo_va;
list_add(&after->list, &tmp->bo_va->invalids);
@@ -2438,18 +2090,30 @@ int amdgpu_vm_bo_clear_mappings(struct amdgpu_device *adev,
/* Insert partial mapping before the range */
if (!list_empty(&before->list)) {
+ struct amdgpu_bo *bo = before->bo_va->base.bo;
+
amdgpu_vm_it_insert(before, &vm->va);
- if (before->flags & AMDGPU_PTE_PRT)
+ if (before->flags & AMDGPU_VM_PAGE_PRT)
amdgpu_vm_prt_get(adev);
+
+ if (amdgpu_vm_is_bo_always_valid(vm, bo) &&
+ !before->bo_va->base.moved)
+ amdgpu_vm_bo_moved(&before->bo_va->base);
} else {
kfree(before);
}
/* Insert partial mapping after the range */
if (!list_empty(&after->list)) {
+ struct amdgpu_bo *bo = after->bo_va->base.bo;
+
amdgpu_vm_it_insert(after, &vm->va);
- if (after->flags & AMDGPU_PTE_PRT)
+ if (after->flags & AMDGPU_VM_PAGE_PRT)
amdgpu_vm_prt_get(adev);
+
+ if (amdgpu_vm_is_bo_always_valid(vm, bo) &&
+ !after->bo_va->base.moved)
+ amdgpu_vm_bo_moved(&after->bo_va->base);
} else {
kfree(after);
}
@@ -2506,7 +2170,7 @@ void amdgpu_vm_bo_trace_cs(struct amdgpu_vm *vm, struct ww_acquire_ctx *ticket)
}
/**
- * amdgpu_vm_bo_rmv - remove a bo to a specific vm
+ * amdgpu_vm_bo_del - remove a bo from a specific vm
*
* @adev: amdgpu_device pointer
* @bo_va: requested bo_va
@@ -2515,7 +2179,7 @@ void amdgpu_vm_bo_trace_cs(struct amdgpu_vm *vm, struct ww_acquire_ctx *ticket)
*
* Object have to be reserved!
*/
-void amdgpu_vm_bo_rmv(struct amdgpu_device *adev,
+void amdgpu_vm_bo_del(struct amdgpu_device *adev,
struct amdgpu_bo_va *bo_va)
{
struct amdgpu_bo_va_mapping *mapping, *next;
@@ -2523,23 +2187,27 @@ void amdgpu_vm_bo_rmv(struct amdgpu_device *adev,
struct amdgpu_vm *vm = bo_va->base.vm;
struct amdgpu_vm_bo_base **base;
+ dma_resv_assert_held(vm->root.bo->tbo.base.resv);
+
if (bo) {
- if (bo->tbo.base.resv == vm->root.base.bo->tbo.base.resv)
- vm->bulk_moveable = false;
+ dma_resv_assert_held(bo->tbo.base.resv);
+ if (amdgpu_vm_is_bo_always_valid(vm, bo))
+ ttm_bo_set_bulk_move(&bo->tbo, NULL);
for (base = &bo_va->base.bo->vm_bo; *base;
base = &(*base)->next) {
if (*base != &bo_va->base)
continue;
+ amdgpu_vm_update_stats(*base, bo->tbo.resource, -1);
*base = bo_va->base.next;
break;
}
}
- spin_lock(&vm->invalidated_lock);
+ spin_lock(&vm->status_lock);
list_del(&bo_va->base.vm_status);
- spin_unlock(&vm->invalidated_lock);
+ spin_unlock(&vm->status_lock);
list_for_each_entry_safe(mapping, next, &bo_va->valids, list) {
list_del(&mapping->list);
@@ -2579,7 +2247,7 @@ bool amdgpu_vm_evictable(struct amdgpu_bo *bo)
return true;
/* Don't evict VM page tables while they are busy */
- if (!dma_resv_test_signaled_rcu(bo->tbo.base.resv, true))
+ if (!dma_resv_test_signaled(bo->tbo.base.resv, DMA_RESV_USAGE_BOOKKEEP))
return false;
/* Try to block ongoing updates */
@@ -2600,25 +2268,19 @@ bool amdgpu_vm_evictable(struct amdgpu_bo *bo)
/**
* amdgpu_vm_bo_invalidate - mark the bo as invalid
*
- * @adev: amdgpu_device pointer
* @bo: amdgpu buffer object
* @evicted: is the BO evicted
*
* Mark @bo as invalid.
*/
-void amdgpu_vm_bo_invalidate(struct amdgpu_device *adev,
- struct amdgpu_bo *bo, bool evicted)
+void amdgpu_vm_bo_invalidate(struct amdgpu_bo *bo, bool evicted)
{
struct amdgpu_vm_bo_base *bo_base;
- /* shadow bo doesn't have bo base, its validation needs its parent */
- if (bo->parent && bo->parent->shadow == bo)
- bo = bo->parent;
-
for (bo_base = bo->vm_bo; bo_base; bo_base = bo_base->next) {
struct amdgpu_vm *vm = bo_base->vm;
- if (evicted && bo->tbo.base.resv == vm->root.base.bo->tbo.base.resv) {
+ if (evicted && amdgpu_vm_is_bo_always_valid(vm, bo)) {
amdgpu_vm_bo_evicted(bo_base);
continue;
}
@@ -2629,7 +2291,7 @@ void amdgpu_vm_bo_invalidate(struct amdgpu_device *adev,
if (bo->tbo.type == ttm_bo_type_kernel)
amdgpu_vm_bo_relocated(bo_base);
- else if (bo->tbo.base.resv == vm->root.base.bo->tbo.base.resv)
+ else if (amdgpu_vm_is_bo_always_valid(vm, bo))
amdgpu_vm_bo_moved(bo_base);
else
amdgpu_vm_bo_invalidated(bo_base);
@@ -2637,6 +2299,32 @@ void amdgpu_vm_bo_invalidate(struct amdgpu_device *adev,
}
/**
+ * amdgpu_vm_bo_move - handle BO move
+ *
+ * @bo: amdgpu buffer object
+ * @new_mem: the new placement of the BO move
+ * @evicted: is the BO evicted
+ *
+ * Update the memory stats for the new placement and mark @bo as invalid.
+ */
+void amdgpu_vm_bo_move(struct amdgpu_bo *bo, struct ttm_resource *new_mem,
+ bool evicted)
+{
+ struct amdgpu_vm_bo_base *bo_base;
+
+ for (bo_base = bo->vm_bo; bo_base; bo_base = bo_base->next) {
+ struct amdgpu_vm *vm = bo_base->vm;
+
+ spin_lock(&vm->status_lock);
+ amdgpu_vm_update_stats_locked(bo_base, bo->tbo.resource, -1);
+ amdgpu_vm_update_stats_locked(bo_base, new_mem, +1);
+ spin_unlock(&vm->status_lock);
+ }
+
+ amdgpu_vm_bo_invalidate(bo, evicted);
+}
+
+/**
* amdgpu_vm_get_block_size - calculate VM page table size as power of two
*
* @vm_size: VM size
@@ -2706,7 +2394,7 @@ void amdgpu_vm_adjust_size(struct amdgpu_device *adev, uint32_t min_vm_size,
phys_ram_gb = ((uint64_t)si.totalram * si.mem_unit +
(1 << 30) - 1) >> 30;
vm_size = roundup_pow_of_two(
- min(max(phys_ram_gb * 3, min_vm_size), max_size));
+ clamp(phys_ram_gb * 3, min_vm_size, max_size));
}
adev->vm_manager.max_pfn = (uint64_t)vm_size << 18;
@@ -2715,7 +2403,7 @@ void amdgpu_vm_adjust_size(struct amdgpu_device *adev, uint32_t min_vm_size,
if (amdgpu_vm_block_size != -1)
tmp >>= amdgpu_vm_block_size - 9;
tmp = DIV_ROUND_UP(fls64(tmp) - 1, 9) - 1;
- adev->vm_manager.num_level = min(max_level, (unsigned)tmp);
+ adev->vm_manager.num_level = min_t(unsigned int, max_level, tmp);
switch (adev->vm_manager.num_level) {
case 3:
adev->vm_manager.root_level = AMDGPU_VM_PDB2;
@@ -2745,10 +2433,11 @@ void amdgpu_vm_adjust_size(struct amdgpu_device *adev, uint32_t min_vm_size,
else
adev->vm_manager.fragment_size = amdgpu_vm_fragment_size;
- DRM_INFO("vm size is %u GB, %u levels, block size is %u-bit, fragment size is %u-bit\n",
- vm_size, adev->vm_manager.num_level + 1,
- adev->vm_manager.block_size,
- adev->vm_manager.fragment_size);
+ dev_info(
+ adev->dev,
+ "vm size is %u GB, %u levels, block size is %u-bit, fragment size is %u-bit\n",
+ vm_size, adev->vm_manager.num_level + 1,
+ adev->vm_manager.block_size, adev->vm_manager.fragment_size);
}
/**
@@ -2759,12 +2448,114 @@ void amdgpu_vm_adjust_size(struct amdgpu_device *adev, uint32_t min_vm_size,
*/
long amdgpu_vm_wait_idle(struct amdgpu_vm *vm, long timeout)
{
- timeout = dma_resv_wait_timeout_rcu(vm->root.base.bo->tbo.base.resv,
- true, true, timeout);
+ timeout = drm_sched_entity_flush(&vm->immediate, timeout);
if (timeout <= 0)
return timeout;
- return dma_fence_wait_timeout(vm->last_unlocked, true, timeout);
+ return drm_sched_entity_flush(&vm->delayed, timeout);
+}
+
+static void amdgpu_vm_destroy_task_info(struct kref *kref)
+{
+ struct amdgpu_task_info *ti = container_of(kref, struct amdgpu_task_info, refcount);
+
+ kfree(ti);
+}
+
+static inline struct amdgpu_vm *
+amdgpu_vm_get_vm_from_pasid(struct amdgpu_device *adev, u32 pasid)
+{
+ struct amdgpu_vm *vm;
+ unsigned long flags;
+
+ xa_lock_irqsave(&adev->vm_manager.pasids, flags);
+ vm = xa_load(&adev->vm_manager.pasids, pasid);
+ xa_unlock_irqrestore(&adev->vm_manager.pasids, flags);
+
+ return vm;
+}
+
+/**
+ * amdgpu_vm_put_task_info - reference down the vm task_info ptr
+ *
+ * @task_info: task_info struct under discussion.
+ *
+ * frees the vm task_info ptr at the last put
+ */
+void amdgpu_vm_put_task_info(struct amdgpu_task_info *task_info)
+{
+ if (task_info)
+ kref_put(&task_info->refcount, amdgpu_vm_destroy_task_info);
+}
+
+/**
+ * amdgpu_vm_get_task_info_vm - Extracts task info for a vm.
+ *
+ * @vm: VM to get info from
+ *
+ * Returns the reference counted task_info structure, which must be
+ * referenced down with amdgpu_vm_put_task_info.
+ */
+struct amdgpu_task_info *
+amdgpu_vm_get_task_info_vm(struct amdgpu_vm *vm)
+{
+ struct amdgpu_task_info *ti = NULL;
+
+ if (vm) {
+ ti = vm->task_info;
+ kref_get(&vm->task_info->refcount);
+ }
+
+ return ti;
+}
+
+/**
+ * amdgpu_vm_get_task_info_pasid - Extracts task info for a PASID.
+ *
+ * @adev: drm device pointer
+ * @pasid: PASID identifier for VM
+ *
+ * Returns the reference counted task_info structure, which must be
+ * referenced down with amdgpu_vm_put_task_info.
+ */
+struct amdgpu_task_info *
+amdgpu_vm_get_task_info_pasid(struct amdgpu_device *adev, u32 pasid)
+{
+ return amdgpu_vm_get_task_info_vm(
+ amdgpu_vm_get_vm_from_pasid(adev, pasid));
+}
+
+static int amdgpu_vm_create_task_info(struct amdgpu_vm *vm)
+{
+ vm->task_info = kzalloc(sizeof(struct amdgpu_task_info), GFP_KERNEL);
+ if (!vm->task_info)
+ return -ENOMEM;
+
+ kref_init(&vm->task_info->refcount);
+ return 0;
+}
+
+/**
+ * amdgpu_vm_set_task_info - Sets VMs task info.
+ *
+ * @vm: vm for which to set the info
+ */
+void amdgpu_vm_set_task_info(struct amdgpu_vm *vm)
+{
+ if (!vm->task_info)
+ return;
+
+ if (vm->task_info->task.pid == current->pid)
+ return;
+
+ vm->task_info->task.pid = current->pid;
+ get_task_comm(vm->task_info->task.comm, current);
+
+ if (current->group_leader->mm != current->mm)
+ return;
+
+ vm->task_info->tgid = current->group_leader->pid;
+ get_task_comm(vm->task_info->process_name, current->group_leader);
}
/**
@@ -2772,8 +2563,8 @@ long amdgpu_vm_wait_idle(struct amdgpu_vm *vm, long timeout)
*
* @adev: amdgpu_device pointer
* @vm: requested vm
- * @vm_context: Indicates if it GFX or Compute context
- * @pasid: Process address space identifier
+ * @xcp_id: GPU partition selection id
+ * @pasid: the pasid the VM is using on this GPU
*
* Init @vm fields.
*
@@ -2781,52 +2572,39 @@ long amdgpu_vm_wait_idle(struct amdgpu_vm *vm, long timeout)
* 0 for success, error for failure.
*/
int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm,
- int vm_context, u32 pasid)
+ int32_t xcp_id, uint32_t pasid)
{
- struct amdgpu_bo_param bp;
- struct amdgpu_bo *root;
+ struct amdgpu_bo *root_bo;
+ struct amdgpu_bo_vm *root;
int r, i;
vm->va = RB_ROOT_CACHED;
for (i = 0; i < AMDGPU_MAX_VMHUBS; i++)
vm->reserved_vmid[i] = NULL;
INIT_LIST_HEAD(&vm->evicted);
+ INIT_LIST_HEAD(&vm->evicted_user);
INIT_LIST_HEAD(&vm->relocated);
INIT_LIST_HEAD(&vm->moved);
INIT_LIST_HEAD(&vm->idle);
INIT_LIST_HEAD(&vm->invalidated);
- spin_lock_init(&vm->invalidated_lock);
+ spin_lock_init(&vm->status_lock);
INIT_LIST_HEAD(&vm->freed);
INIT_LIST_HEAD(&vm->done);
+ INIT_KFIFO(vm->faults);
- /* create scheduler entities for page table updates */
- r = drm_sched_entity_init(&vm->immediate, DRM_SCHED_PRIORITY_NORMAL,
- adev->vm_manager.vm_pte_scheds,
- adev->vm_manager.vm_pte_num_scheds, NULL);
+ r = amdgpu_vm_init_entities(adev, vm);
if (r)
return r;
- r = drm_sched_entity_init(&vm->delayed, DRM_SCHED_PRIORITY_NORMAL,
- adev->vm_manager.vm_pte_scheds,
- adev->vm_manager.vm_pte_num_scheds, NULL);
- if (r)
- goto error_free_immediate;
+ ttm_lru_bulk_move_init(&vm->lru_bulk_move);
- vm->pte_support_ats = false;
vm->is_compute_context = false;
- if (vm_context == AMDGPU_VM_CONTEXT_COMPUTE) {
- vm->use_cpu_for_update = !!(adev->vm_manager.vm_update_mode &
- AMDGPU_VM_USE_CPU_FOR_COMPUTE);
+ vm->use_cpu_for_update = !!(adev->vm_manager.vm_update_mode &
+ AMDGPU_VM_USE_CPU_FOR_GFX);
- if (adev->asic_type == CHIP_RAVEN)
- vm->pte_support_ats = true;
- } else {
- vm->use_cpu_for_update = !!(adev->vm_manager.vm_update_mode &
- AMDGPU_VM_USE_CPU_FOR_GFX);
- }
- DRM_DEBUG_DRIVER("VM update mode is %s\n",
- vm->use_cpu_for_update ? "CPU" : "SDMA");
+ dev_dbg(adev->dev, "VM update mode is %s\n",
+ vm->use_cpu_for_update ? "CPU" : "SDMA");
WARN_ONCE((vm->use_cpu_for_update &&
!amdgpu_gmc_vram_full_visible(&adev->gmc)),
"CPU update of VM recommended only for large BAR system\n");
@@ -2835,107 +2613,79 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm,
vm->update_funcs = &amdgpu_vm_cpu_funcs;
else
vm->update_funcs = &amdgpu_vm_sdma_funcs;
- vm->last_update = NULL;
+
+ vm->last_update = dma_fence_get_stub();
vm->last_unlocked = dma_fence_get_stub();
+ vm->last_tlb_flush = dma_fence_get_stub();
+ vm->generation = amdgpu_vm_generation(adev, NULL);
mutex_init(&vm->eviction_lock);
vm->evicting = false;
+ vm->tlb_fence_context = dma_fence_context_alloc(1);
- amdgpu_vm_bo_param(adev, vm, adev->vm_manager.root_level, false, &bp);
- if (vm_context == AMDGPU_VM_CONTEXT_COMPUTE)
- bp.flags &= ~AMDGPU_GEM_CREATE_SHADOW;
- r = amdgpu_bo_create(adev, &bp, &root);
+ r = amdgpu_vm_pt_create(adev, vm, adev->vm_manager.root_level,
+ false, &root, xcp_id);
if (r)
goto error_free_delayed;
- r = amdgpu_bo_reserve(root, true);
+ root_bo = amdgpu_bo_ref(&root->bo);
+ r = amdgpu_bo_reserve(root_bo, true);
+ if (r) {
+ amdgpu_bo_unref(&root_bo);
+ goto error_free_delayed;
+ }
+
+ amdgpu_vm_bo_base_init(&vm->root, vm, root_bo);
+ r = dma_resv_reserve_fences(root_bo->tbo.base.resv, 1);
if (r)
goto error_free_root;
- r = dma_resv_reserve_shared(root->tbo.base.resv, 1);
+ r = amdgpu_vm_pt_clear(adev, vm, root, false);
if (r)
- goto error_unreserve;
-
- amdgpu_vm_bo_base_init(&vm->root.base, vm, root);
+ goto error_free_root;
- r = amdgpu_vm_clear_bo(adev, vm, root, false);
+ r = amdgpu_vm_create_task_info(vm);
if (r)
- goto error_unreserve;
-
- amdgpu_bo_unreserve(vm->root.base.bo);
-
- if (pasid) {
- unsigned long flags;
+ dev_dbg(adev->dev, "Failed to create task info for VM\n");
- spin_lock_irqsave(&adev->vm_manager.pasid_lock, flags);
- r = idr_alloc(&adev->vm_manager.pasid_idr, vm, pasid, pasid + 1,
- GFP_ATOMIC);
- spin_unlock_irqrestore(&adev->vm_manager.pasid_lock, flags);
+ /* Store new PASID in XArray (if non-zero) */
+ if (pasid != 0) {
+ r = xa_err(xa_store_irq(&adev->vm_manager.pasids, pasid, vm, GFP_KERNEL));
if (r < 0)
goto error_free_root;
vm->pasid = pasid;
}
- INIT_KFIFO(vm->faults);
+ amdgpu_bo_unreserve(vm->root.bo);
+ amdgpu_bo_unref(&root_bo);
return 0;
-error_unreserve:
- amdgpu_bo_unreserve(vm->root.base.bo);
-
error_free_root:
- amdgpu_bo_unref(&vm->root.base.bo->shadow);
- amdgpu_bo_unref(&vm->root.base.bo);
- vm->root.base.bo = NULL;
+ /* If PASID was partially set, erase it from XArray before failing */
+ if (vm->pasid != 0) {
+ xa_erase_irq(&adev->vm_manager.pasids, vm->pasid);
+ vm->pasid = 0;
+ }
+ amdgpu_vm_pt_free_root(adev, vm);
+ amdgpu_bo_unreserve(vm->root.bo);
+ amdgpu_bo_unref(&root_bo);
error_free_delayed:
+ dma_fence_put(vm->last_tlb_flush);
dma_fence_put(vm->last_unlocked);
- drm_sched_entity_destroy(&vm->delayed);
-
-error_free_immediate:
- drm_sched_entity_destroy(&vm->immediate);
+ ttm_lru_bulk_move_fini(&adev->mman.bdev, &vm->lru_bulk_move);
+ amdgpu_vm_fini_entities(vm);
return r;
}
/**
- * amdgpu_vm_check_clean_reserved - check if a VM is clean
- *
- * @adev: amdgpu_device pointer
- * @vm: the VM to check
- *
- * check all entries of the root PD, if any subsequent PDs are allocated,
- * it means there are page table creating and filling, and is no a clean
- * VM
- *
- * Returns:
- * 0 if this VM is clean
- */
-static int amdgpu_vm_check_clean_reserved(struct amdgpu_device *adev,
- struct amdgpu_vm *vm)
-{
- enum amdgpu_vm_level root = adev->vm_manager.root_level;
- unsigned int entries = amdgpu_vm_num_entries(adev, root);
- unsigned int i = 0;
-
- if (!(vm->root.entries))
- return 0;
-
- for (i = 0; i < entries; i++) {
- if (vm->root.entries[i].base.bo)
- return -EINVAL;
- }
-
- return 0;
-}
-
-/**
* amdgpu_vm_make_compute - Turn a GFX VM into a compute VM
*
* @adev: amdgpu_device pointer
* @vm: requested vm
- * @pasid: pasid to use
*
* This only works on GFX VMs that don't have any BOs added and no
* page tables allocated yet.
@@ -2943,7 +2693,6 @@ static int amdgpu_vm_check_clean_reserved(struct amdgpu_device *adev,
* Changes the following VM parameters:
* - use_cpu_for_update
* - pte_supports_ats
- * - pasid (old PASID is released, because compute manages its own PASIDs)
*
* Reinitializes the page directory to reflect the changed ATS
* setting.
@@ -2951,122 +2700,56 @@ static int amdgpu_vm_check_clean_reserved(struct amdgpu_device *adev,
* Returns:
* 0 for success, -errno for errors.
*/
-int amdgpu_vm_make_compute(struct amdgpu_device *adev, struct amdgpu_vm *vm,
- u32 pasid)
+int amdgpu_vm_make_compute(struct amdgpu_device *adev, struct amdgpu_vm *vm)
{
- bool pte_support_ats = (adev->asic_type == CHIP_RAVEN);
int r;
- r = amdgpu_bo_reserve(vm->root.base.bo, true);
+ r = amdgpu_bo_reserve(vm->root.bo, true);
if (r)
return r;
- /* Sanity checks */
- r = amdgpu_vm_check_clean_reserved(adev, vm);
- if (r)
- goto unreserve_bo;
-
- if (pasid) {
- unsigned long flags;
-
- spin_lock_irqsave(&adev->vm_manager.pasid_lock, flags);
- r = idr_alloc(&adev->vm_manager.pasid_idr, vm, pasid, pasid + 1,
- GFP_ATOMIC);
- spin_unlock_irqrestore(&adev->vm_manager.pasid_lock, flags);
-
- if (r == -ENOSPC)
- goto unreserve_bo;
- r = 0;
- }
-
- /* Check if PD needs to be reinitialized and do it before
- * changing any other state, in case it fails.
- */
- if (pte_support_ats != vm->pte_support_ats) {
- vm->pte_support_ats = pte_support_ats;
- r = amdgpu_vm_clear_bo(adev, vm, vm->root.base.bo, false);
- if (r)
- goto free_idr;
- }
-
/* Update VM state */
vm->use_cpu_for_update = !!(adev->vm_manager.vm_update_mode &
AMDGPU_VM_USE_CPU_FOR_COMPUTE);
- DRM_DEBUG_DRIVER("VM update mode is %s\n",
- vm->use_cpu_for_update ? "CPU" : "SDMA");
+ dev_dbg(adev->dev, "VM update mode is %s\n",
+ vm->use_cpu_for_update ? "CPU" : "SDMA");
WARN_ONCE((vm->use_cpu_for_update &&
!amdgpu_gmc_vram_full_visible(&adev->gmc)),
"CPU update of VM recommended only for large BAR system\n");
if (vm->use_cpu_for_update) {
/* Sync with last SDMA update/clear before switching to CPU */
- r = amdgpu_bo_sync_wait(vm->root.base.bo,
+ r = amdgpu_bo_sync_wait(vm->root.bo,
AMDGPU_FENCE_OWNER_UNDEFINED, true);
if (r)
- goto free_idr;
+ goto unreserve_bo;
vm->update_funcs = &amdgpu_vm_cpu_funcs;
+ r = amdgpu_vm_pt_map_tables(adev, vm);
+ if (r)
+ goto unreserve_bo;
+
} else {
vm->update_funcs = &amdgpu_vm_sdma_funcs;
}
+
dma_fence_put(vm->last_update);
- vm->last_update = NULL;
+ vm->last_update = dma_fence_get_stub();
vm->is_compute_context = true;
- if (vm->pasid) {
- unsigned long flags;
-
- spin_lock_irqsave(&adev->vm_manager.pasid_lock, flags);
- idr_remove(&adev->vm_manager.pasid_idr, vm->pasid);
- spin_unlock_irqrestore(&adev->vm_manager.pasid_lock, flags);
-
- /* Free the original amdgpu allocated pasid
- * Will be replaced with kfd allocated pasid
- */
- amdgpu_pasid_free(vm->pasid);
- vm->pasid = 0;
- }
-
- /* Free the shadow bo for compute VM */
- amdgpu_bo_unref(&vm->root.base.bo->shadow);
-
- if (pasid)
- vm->pasid = pasid;
-
- goto unreserve_bo;
-
-free_idr:
- if (pasid) {
- unsigned long flags;
-
- spin_lock_irqsave(&adev->vm_manager.pasid_lock, flags);
- idr_remove(&adev->vm_manager.pasid_idr, pasid);
- spin_unlock_irqrestore(&adev->vm_manager.pasid_lock, flags);
- }
unreserve_bo:
- amdgpu_bo_unreserve(vm->root.base.bo);
+ amdgpu_bo_unreserve(vm->root.bo);
return r;
}
-/**
- * amdgpu_vm_release_compute - release a compute vm
- * @adev: amdgpu_device pointer
- * @vm: a vm turned into compute vm by calling amdgpu_vm_make_compute
- *
- * This is a correspondant of amdgpu_vm_make_compute. It decouples compute
- * pasid from vm. Compute should stop use of vm after this call.
- */
-void amdgpu_vm_release_compute(struct amdgpu_device *adev, struct amdgpu_vm *vm)
+static int amdgpu_vm_stats_is_zero(struct amdgpu_vm *vm)
{
- if (vm->pasid) {
- unsigned long flags;
-
- spin_lock_irqsave(&adev->vm_manager.pasid_lock, flags);
- idr_remove(&adev->vm_manager.pasid_idr, vm->pasid);
- spin_unlock_irqrestore(&adev->vm_manager.pasid_lock, flags);
+ for (int i = 0; i < __AMDGPU_PL_NUM; ++i) {
+ if (!(drm_memory_stats_is_zero(&vm->stats[i].drm) &&
+ vm->stats[i].evicted == 0))
+ return false;
}
- vm->pasid = 0;
- vm->is_compute_context = false;
+ return true;
}
/**
@@ -3083,26 +2766,28 @@ void amdgpu_vm_fini(struct amdgpu_device *adev, struct amdgpu_vm *vm)
struct amdgpu_bo_va_mapping *mapping, *tmp;
bool prt_fini_needed = !!adev->gmc.gmc_funcs->set_prt;
struct amdgpu_bo *root;
+ unsigned long flags;
int i;
amdgpu_amdkfd_gpuvm_destroy_cb(adev, vm);
- root = amdgpu_bo_ref(vm->root.base.bo);
+ root = amdgpu_bo_ref(vm->root.bo);
amdgpu_bo_reserve(root, true);
- if (vm->pasid) {
- unsigned long flags;
-
- spin_lock_irqsave(&adev->vm_manager.pasid_lock, flags);
- idr_remove(&adev->vm_manager.pasid_idr, vm->pasid);
- spin_unlock_irqrestore(&adev->vm_manager.pasid_lock, flags);
+ /* Remove PASID mapping before destroying VM */
+ if (vm->pasid != 0) {
+ xa_erase_irq(&adev->vm_manager.pasids, vm->pasid);
vm->pasid = 0;
}
-
dma_fence_wait(vm->last_unlocked, false);
dma_fence_put(vm->last_unlocked);
+ dma_fence_wait(vm->last_tlb_flush, false);
+ /* Make sure that all fence callbacks have completed */
+ spin_lock_irqsave(vm->last_tlb_flush->lock, flags);
+ spin_unlock_irqrestore(vm->last_tlb_flush->lock, flags);
+ dma_fence_put(vm->last_tlb_flush);
list_for_each_entry_safe(mapping, tmp, &vm->freed, list) {
- if (mapping->flags & AMDGPU_PTE_PRT && prt_fini_needed) {
+ if (mapping->flags & AMDGPU_VM_PAGE_PRT && prt_fini_needed) {
amdgpu_vm_prt_fini(adev, vm);
prt_fini_needed = false;
}
@@ -3111,13 +2796,12 @@ void amdgpu_vm_fini(struct amdgpu_device *adev, struct amdgpu_vm *vm)
amdgpu_vm_free_mapping(adev, vm, mapping, NULL);
}
- amdgpu_vm_free_pts(adev, vm, NULL);
+ amdgpu_vm_pt_free_root(adev, vm);
amdgpu_bo_unreserve(root);
amdgpu_bo_unref(&root);
- WARN_ON(vm->root.base.bo);
+ WARN_ON(vm->root.bo);
- drm_sched_entity_destroy(&vm->immediate);
- drm_sched_entity_destroy(&vm->delayed);
+ amdgpu_vm_fini_entities(vm);
if (!RB_EMPTY_ROOT(&vm->va.rb_root)) {
dev_err(adev->dev, "still active bo inside vm\n");
@@ -3132,8 +2816,22 @@ void amdgpu_vm_fini(struct amdgpu_device *adev, struct amdgpu_vm *vm)
}
dma_fence_put(vm->last_update);
- for (i = 0; i < AMDGPU_MAX_VMHUBS; i++)
+
+ for (i = 0; i < AMDGPU_MAX_VMHUBS; i++) {
amdgpu_vmid_free_reserved(adev, vm, i);
+ }
+
+ ttm_lru_bulk_move_fini(&adev->mman.bdev, &vm->lru_bulk_move);
+
+ if (!amdgpu_vm_stats_is_zero(vm)) {
+ struct amdgpu_task_info *ti = vm->task_info;
+
+ dev_warn(adev->dev,
+ "VM memory stats for proc %s(%d) task %s(%d) is non-zero when fini\n",
+ ti->process_name, ti->task.pid, ti->task.comm, ti->tgid);
+ }
+
+ amdgpu_vm_put_task_info(vm->task_info);
}
/**
@@ -3145,15 +2843,14 @@ void amdgpu_vm_fini(struct amdgpu_device *adev, struct amdgpu_vm *vm)
*/
void amdgpu_vm_manager_init(struct amdgpu_device *adev)
{
- unsigned i;
-
+ /* Concurrent flushes are only possible starting with Vega10 and
+ * are broken on Navi10 and Navi14.
+ */
+ adev->vm_manager.concurrent_flush = !(adev->asic_type < CHIP_VEGA10 ||
+ adev->asic_type == CHIP_NAVI10 ||
+ adev->asic_type == CHIP_NAVI14);
amdgpu_vmid_mgr_init(adev);
- adev->vm_manager.fence_context =
- dma_fence_context_alloc(AMDGPU_MAX_RINGS);
- for (i = 0; i < AMDGPU_MAX_RINGS; ++i)
- adev->vm_manager.seqno[i] = 0;
-
spin_lock_init(&adev->vm_manager.prt_lock);
atomic_set(&adev->vm_manager.num_prt_users, 0);
@@ -3162,7 +2859,11 @@ void amdgpu_vm_manager_init(struct amdgpu_device *adev)
*/
#ifdef CONFIG_X86_64
if (amdgpu_vm_update_mode == -1) {
- if (amdgpu_gmc_vram_full_visible(&adev->gmc))
+ /* For asic with VF MMIO access protection
+ * avoid using CPU for VM table updates
+ */
+ if (amdgpu_gmc_vram_full_visible(&adev->gmc) &&
+ !amdgpu_sriov_vf_mmio_access_protection(adev))
adev->vm_manager.vm_update_mode =
AMDGPU_VM_USE_CPU_FOR_COMPUTE;
else
@@ -3173,8 +2874,7 @@ void amdgpu_vm_manager_init(struct amdgpu_device *adev)
adev->vm_manager.vm_update_mode = 0;
#endif
- idr_init(&adev->vm_manager.pasid_idr);
- spin_lock_init(&adev->vm_manager.pasid_lock);
+ xa_init_flags(&adev->vm_manager.pasids, XA_FLAGS_LOCK_IRQ);
}
/**
@@ -3186,8 +2886,8 @@ void amdgpu_vm_manager_init(struct amdgpu_device *adev)
*/
void amdgpu_vm_manager_fini(struct amdgpu_device *adev)
{
- WARN_ON(!idr_is_empty(&adev->vm_manager.pasid_idr));
- idr_destroy(&adev->vm_manager.pasid_idr);
+ WARN_ON(!xa_empty(&adev->vm_manager.pasids));
+ xa_destroy(&adev->vm_manager.pasids);
amdgpu_vmid_mgr_fini(adev);
}
@@ -3207,34 +2907,18 @@ int amdgpu_vm_ioctl(struct drm_device *dev, void *data, struct drm_file *filp)
union drm_amdgpu_vm *args = data;
struct amdgpu_device *adev = drm_to_adev(dev);
struct amdgpu_fpriv *fpriv = filp->driver_priv;
- long timeout = msecs_to_jiffies(2000);
- int r;
+ struct amdgpu_vm *vm = &fpriv->vm;
+
+ /* No valid flags defined yet */
+ if (args->in.flags)
+ return -EINVAL;
switch (args->in.op) {
case AMDGPU_VM_OP_RESERVE_VMID:
/* We only have requirement to reserve vmid from gfxhub */
- r = amdgpu_vmid_alloc_reserved(adev, &fpriv->vm,
- AMDGPU_GFXHUB_0);
- if (r)
- return r;
- break;
+ return amdgpu_vmid_alloc_reserved(adev, vm, AMDGPU_GFXHUB(0));
case AMDGPU_VM_OP_UNRESERVE_VMID:
- if (amdgpu_sriov_runtime(adev))
- timeout = 8 * timeout;
-
- /* Wait vm idle to make sure the vmid set in SPM_VMID is
- * not referenced anymore.
- */
- r = amdgpu_bo_reserve(fpriv->vm.root.base.bo, true);
- if (r)
- return r;
-
- r = amdgpu_vm_wait_idle(&fpriv->vm, timeout);
- if (r < 0)
- return r;
-
- amdgpu_bo_unreserve(fpriv->vm.root.base.bo);
- amdgpu_vmid_free_reserved(adev, &fpriv->vm, AMDGPU_GFXHUB_0);
+ amdgpu_vmid_free_reserved(adev, vm, AMDGPU_GFXHUB(0));
break;
default:
return -EINVAL;
@@ -3244,100 +2928,73 @@ int amdgpu_vm_ioctl(struct drm_device *dev, void *data, struct drm_file *filp)
}
/**
- * amdgpu_vm_get_task_info - Extracts task info for a PASID.
- *
- * @adev: drm device pointer
- * @pasid: PASID identifier for VM
- * @task_info: task_info to fill.
- */
-void amdgpu_vm_get_task_info(struct amdgpu_device *adev, u32 pasid,
- struct amdgpu_task_info *task_info)
-{
- struct amdgpu_vm *vm;
- unsigned long flags;
-
- spin_lock_irqsave(&adev->vm_manager.pasid_lock, flags);
-
- vm = idr_find(&adev->vm_manager.pasid_idr, pasid);
- if (vm)
- *task_info = vm->task_info;
-
- spin_unlock_irqrestore(&adev->vm_manager.pasid_lock, flags);
-}
-
-/**
- * amdgpu_vm_set_task_info - Sets VMs task info.
- *
- * @vm: vm for which to set the info
- */
-void amdgpu_vm_set_task_info(struct amdgpu_vm *vm)
-{
- if (vm->task_info.pid)
- return;
-
- vm->task_info.pid = current->pid;
- get_task_comm(vm->task_info.task_name, current);
-
- if (current->group_leader->mm != current->mm)
- return;
-
- vm->task_info.tgid = current->group_leader->pid;
- get_task_comm(vm->task_info.process_name, current->group_leader);
-}
-
-/**
* amdgpu_vm_handle_fault - graceful handling of VM faults.
* @adev: amdgpu device pointer
* @pasid: PASID of the VM
+ * @ts: Timestamp of the fault
+ * @vmid: VMID, only used for GFX 9.4.3.
+ * @node_id: Node_id received in IH cookie. Only applicable for
+ * GFX 9.4.3.
* @addr: Address of the fault
+ * @write_fault: true is write fault, false is read fault
*
* Try to gracefully handle a VM fault. Return true if the fault was handled and
* shouldn't be reported any more.
*/
bool amdgpu_vm_handle_fault(struct amdgpu_device *adev, u32 pasid,
- uint64_t addr)
+ u32 vmid, u32 node_id, uint64_t addr, uint64_t ts,
+ bool write_fault)
{
+ bool is_compute_context = false;
struct amdgpu_bo *root;
+ unsigned long irqflags;
uint64_t value, flags;
struct amdgpu_vm *vm;
- long r;
+ int r;
- spin_lock(&adev->vm_manager.pasid_lock);
- vm = idr_find(&adev->vm_manager.pasid_idr, pasid);
- if (vm)
- root = amdgpu_bo_ref(vm->root.base.bo);
- else
+ xa_lock_irqsave(&adev->vm_manager.pasids, irqflags);
+ vm = xa_load(&adev->vm_manager.pasids, pasid);
+ if (vm) {
+ root = amdgpu_bo_ref(vm->root.bo);
+ is_compute_context = vm->is_compute_context;
+ } else {
root = NULL;
- spin_unlock(&adev->vm_manager.pasid_lock);
+ }
+ xa_unlock_irqrestore(&adev->vm_manager.pasids, irqflags);
if (!root)
return false;
+ addr /= AMDGPU_GPU_PAGE_SIZE;
+
+ if (is_compute_context && !svm_range_restore_pages(adev, pasid, vmid,
+ node_id, addr, ts, write_fault)) {
+ amdgpu_bo_unref(&root);
+ return true;
+ }
+
r = amdgpu_bo_reserve(root, true);
if (r)
goto error_unref;
/* Double check that the VM still exists */
- spin_lock(&adev->vm_manager.pasid_lock);
- vm = idr_find(&adev->vm_manager.pasid_idr, pasid);
- if (vm && vm->root.base.bo != root)
+ xa_lock_irqsave(&adev->vm_manager.pasids, irqflags);
+ vm = xa_load(&adev->vm_manager.pasids, pasid);
+ if (vm && vm->root.bo != root)
vm = NULL;
- spin_unlock(&adev->vm_manager.pasid_lock);
+ xa_unlock_irqrestore(&adev->vm_manager.pasids, irqflags);
if (!vm)
goto error_unlock;
- addr /= AMDGPU_GPU_PAGE_SIZE;
flags = AMDGPU_PTE_VALID | AMDGPU_PTE_SNOOPED |
AMDGPU_PTE_SYSTEM;
- if (vm->is_compute_context) {
+ if (is_compute_context) {
/* Intentionally setting invalid PTE flag
* combination to force a no-retry-fault
*/
- flags = AMDGPU_PTE_EXECUTABLE | AMDGPU_PDE_PTE |
- AMDGPU_PTE_TF;
+ flags = AMDGPU_VM_NORETRY_FLAGS;
value = 0;
-
} else if (amdgpu_vm_fault_stop == AMDGPU_VM_FAULT_STOP_NEVER) {
/* Redirect the access to the dummy page */
value = adev->dummy_page_addr;
@@ -3349,9 +3006,14 @@ bool amdgpu_vm_handle_fault(struct amdgpu_device *adev, u32 pasid,
value = 0;
}
- r = amdgpu_vm_bo_update_mapping(adev, adev, vm, true, false, NULL, addr,
- addr, flags, value, NULL, NULL,
- NULL);
+ r = dma_resv_reserve_fences(root->tbo.base.resv, 1);
+ if (r) {
+ pr_debug("failed %d to reserve fence slot\n", r);
+ goto error_unlock;
+ }
+
+ r = amdgpu_vm_update_range(adev, vm, true, false, false, false,
+ NULL, addr, addr, flags, value, 0, NULL, NULL, NULL);
if (r)
goto error_unlock;
@@ -3360,7 +3022,7 @@ bool amdgpu_vm_handle_fault(struct amdgpu_device *adev, u32 pasid,
error_unlock:
amdgpu_bo_unreserve(root);
if (r < 0)
- DRM_ERROR("Can't handle page fault (%ld)\n", r);
+ dev_err(adev->dev, "Can't handle page fault (%d)\n", r);
error_unref:
amdgpu_bo_unref(&root);
@@ -3394,6 +3056,9 @@ void amdgpu_debugfs_vm_bo_info(struct amdgpu_vm *vm, struct seq_file *m)
unsigned int total_done_objs = 0;
unsigned int id = 0;
+ amdgpu_vm_assert_locked(vm);
+
+ spin_lock(&vm->status_lock);
seq_puts(m, "\tIdle BOs:\n");
list_for_each_entry_safe(bo_va, tmp, &vm->idle, base.vm_status) {
if (!bo_va->base.bo)
@@ -3431,7 +3096,6 @@ void amdgpu_debugfs_vm_bo_info(struct amdgpu_vm *vm, struct seq_file *m)
id = 0;
seq_puts(m, "\tInvalidated BOs:\n");
- spin_lock(&vm->invalidated_lock);
list_for_each_entry_safe(bo_va, tmp, &vm->invalidated, base.vm_status) {
if (!bo_va->base.bo)
continue;
@@ -3446,7 +3110,7 @@ void amdgpu_debugfs_vm_bo_info(struct amdgpu_vm *vm, struct seq_file *m)
continue;
total_done += amdgpu_bo_print_info(id++, bo_va->base.bo, m);
}
- spin_unlock(&vm->invalidated_lock);
+ spin_unlock(&vm->status_lock);
total_done_objs = id;
seq_printf(m, "\tTotal idle size: %12lld\tobjs:\t%d\n", total_idle,
@@ -3463,3 +3127,83 @@ void amdgpu_debugfs_vm_bo_info(struct amdgpu_vm *vm, struct seq_file *m)
total_done_objs);
}
#endif
+
+/**
+ * amdgpu_vm_update_fault_cache - update cached fault into.
+ * @adev: amdgpu device pointer
+ * @pasid: PASID of the VM
+ * @addr: Address of the fault
+ * @status: GPUVM fault status register
+ * @vmhub: which vmhub got the fault
+ *
+ * Cache the fault info for later use by userspace in debugging.
+ */
+void amdgpu_vm_update_fault_cache(struct amdgpu_device *adev,
+ unsigned int pasid,
+ uint64_t addr,
+ uint32_t status,
+ unsigned int vmhub)
+{
+ struct amdgpu_vm *vm;
+ unsigned long flags;
+
+ xa_lock_irqsave(&adev->vm_manager.pasids, flags);
+
+ vm = xa_load(&adev->vm_manager.pasids, pasid);
+ /* Don't update the fault cache if status is 0. In the multiple
+ * fault case, subsequent faults will return a 0 status which is
+ * useless for userspace and replaces the useful fault status, so
+ * only update if status is non-0.
+ */
+ if (vm && status) {
+ vm->fault_info.addr = addr;
+ vm->fault_info.status = status;
+ /*
+ * Update the fault information globally for later usage
+ * when vm could be stale or freed.
+ */
+ adev->vm_manager.fault_info.addr = addr;
+ adev->vm_manager.fault_info.vmhub = vmhub;
+ adev->vm_manager.fault_info.status = status;
+
+ if (AMDGPU_IS_GFXHUB(vmhub)) {
+ vm->fault_info.vmhub = AMDGPU_VMHUB_TYPE_GFX;
+ vm->fault_info.vmhub |=
+ (vmhub - AMDGPU_GFXHUB_START) << AMDGPU_VMHUB_IDX_SHIFT;
+ } else if (AMDGPU_IS_MMHUB0(vmhub)) {
+ vm->fault_info.vmhub = AMDGPU_VMHUB_TYPE_MM0;
+ vm->fault_info.vmhub |=
+ (vmhub - AMDGPU_MMHUB0_START) << AMDGPU_VMHUB_IDX_SHIFT;
+ } else if (AMDGPU_IS_MMHUB1(vmhub)) {
+ vm->fault_info.vmhub = AMDGPU_VMHUB_TYPE_MM1;
+ vm->fault_info.vmhub |=
+ (vmhub - AMDGPU_MMHUB1_START) << AMDGPU_VMHUB_IDX_SHIFT;
+ } else {
+ WARN_ONCE(1, "Invalid vmhub %u\n", vmhub);
+ }
+ }
+ xa_unlock_irqrestore(&adev->vm_manager.pasids, flags);
+}
+
+/**
+ * amdgpu_vm_is_bo_always_valid - check if the BO is VM always valid
+ *
+ * @vm: VM to test against.
+ * @bo: BO to be tested.
+ *
+ * Returns true if the BO shares the dma_resv object with the root PD and is
+ * always guaranteed to be valid inside the VM.
+ */
+bool amdgpu_vm_is_bo_always_valid(struct amdgpu_vm *vm, struct amdgpu_bo *bo)
+{
+ return bo && bo->tbo.base.resv == vm->root.bo->tbo.base.resv;
+}
+
+void amdgpu_vm_print_task_info(struct amdgpu_device *adev,
+ struct amdgpu_task_info *task_info)
+{
+ dev_err(adev->dev,
+ " Process %s pid %d thread %s pid %d\n",
+ task_info->process_name, task_info->tgid,
+ task_info->task.comm, task_info->task.pid);
+}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
index 976a12e5a8b9..15d757c016cb 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
@@ -29,16 +29,20 @@
#include <linux/rbtree.h>
#include <drm/gpu_scheduler.h>
#include <drm/drm_file.h>
-#include <drm/ttm/ttm_bo_driver.h>
+#include <drm/ttm/ttm_bo.h>
#include <linux/sched/mm.h>
#include "amdgpu_sync.h"
#include "amdgpu_ring.h"
#include "amdgpu_ids.h"
+#include "amdgpu_ttm.h"
+
+struct drm_exec;
struct amdgpu_bo_va;
struct amdgpu_job;
struct amdgpu_bo_list_entry;
+struct amdgpu_bo_vm;
/*
* GPUVM handling
@@ -82,10 +86,19 @@ struct amdgpu_bo_list_entry;
/* PDE Block Fragment Size for VEGA10 */
#define AMDGPU_PDE_BFS(a) ((uint64_t)a << 59)
+/* Flag combination to set no-retry with TF disabled */
+#define AMDGPU_VM_NORETRY_FLAGS (AMDGPU_PTE_EXECUTABLE | AMDGPU_PDE_PTE | \
+ AMDGPU_PTE_TF)
+/* Flag combination to set no-retry with TF enabled */
+#define AMDGPU_VM_NORETRY_FLAGS_TF (AMDGPU_PTE_VALID | AMDGPU_PTE_SYSTEM | \
+ AMDGPU_PTE_PRT)
/* For GFX9 */
-#define AMDGPU_PTE_MTYPE_VG10(a) ((uint64_t)(a) << 57)
-#define AMDGPU_PTE_MTYPE_VG10_MASK AMDGPU_PTE_MTYPE_VG10(3ULL)
+#define AMDGPU_PTE_MTYPE_VG10_SHIFT(mtype) ((uint64_t)(mtype) << 57)
+#define AMDGPU_PTE_MTYPE_VG10_MASK AMDGPU_PTE_MTYPE_VG10_SHIFT(3ULL)
+#define AMDGPU_PTE_MTYPE_VG10(flags, mtype) \
+ (((uint64_t)(flags) & (~AMDGPU_PTE_MTYPE_VG10_MASK)) | \
+ AMDGPU_PTE_MTYPE_VG10_SHIFT(mtype))
#define AMDGPU_MTYPE_NC 0
#define AMDGPU_MTYPE_CC 2
@@ -98,31 +111,74 @@ struct amdgpu_bo_list_entry;
| AMDGPU_PTE_MTYPE_VG10(AMDGPU_MTYPE_CC))
/* gfx10 */
-#define AMDGPU_PTE_MTYPE_NV10(a) ((uint64_t)(a) << 48)
-#define AMDGPU_PTE_MTYPE_NV10_MASK AMDGPU_PTE_MTYPE_NV10(7ULL)
+#define AMDGPU_PTE_MTYPE_NV10_SHIFT(mtype) ((uint64_t)(mtype) << 48)
+#define AMDGPU_PTE_MTYPE_NV10_MASK AMDGPU_PTE_MTYPE_NV10_SHIFT(7ULL)
+#define AMDGPU_PTE_MTYPE_NV10(flags, mtype) \
+ (((uint64_t)(flags) & (~AMDGPU_PTE_MTYPE_NV10_MASK)) | \
+ AMDGPU_PTE_MTYPE_NV10_SHIFT(mtype))
+
+/* gfx12 */
+#define AMDGPU_PTE_PRT_GFX12 (1ULL << 56)
+#define AMDGPU_PTE_PRT_FLAG(adev) \
+ ((amdgpu_ip_version((adev), GC_HWIP, 0) >= IP_VERSION(12, 0, 0)) ? AMDGPU_PTE_PRT_GFX12 : AMDGPU_PTE_PRT)
+
+#define AMDGPU_PTE_MTYPE_GFX12_SHIFT(mtype) ((uint64_t)(mtype) << 54)
+#define AMDGPU_PTE_MTYPE_GFX12_MASK AMDGPU_PTE_MTYPE_GFX12_SHIFT(3ULL)
+#define AMDGPU_PTE_MTYPE_GFX12(flags, mtype) \
+ (((uint64_t)(flags) & (~AMDGPU_PTE_MTYPE_GFX12_MASK)) | \
+ AMDGPU_PTE_MTYPE_GFX12_SHIFT(mtype))
+
+#define AMDGPU_PTE_DCC (1ULL << 58)
+#define AMDGPU_PTE_IS_PTE (1ULL << 63)
+
+/* PDE Block Fragment Size for gfx v12 */
+#define AMDGPU_PDE_BFS_GFX12(a) ((uint64_t)((a) & 0x1fULL) << 58)
+#define AMDGPU_PDE_BFS_FLAG(adev, a) \
+ ((amdgpu_ip_version((adev), GC_HWIP, 0) >= IP_VERSION(12, 0, 0)) ? AMDGPU_PDE_BFS_GFX12(a) : AMDGPU_PDE_BFS(a))
+/* PDE is handled as PTE for gfx v12 */
+#define AMDGPU_PDE_PTE_GFX12 (1ULL << 63)
+#define AMDGPU_PDE_PTE_FLAG(adev) \
+ ((amdgpu_ip_version((adev), GC_HWIP, 0) >= IP_VERSION(12, 0, 0)) ? AMDGPU_PDE_PTE_GFX12 : AMDGPU_PDE_PTE)
/* How to program VM fault handling */
#define AMDGPU_VM_FAULT_STOP_NEVER 0
#define AMDGPU_VM_FAULT_STOP_FIRST 1
#define AMDGPU_VM_FAULT_STOP_ALWAYS 2
-/* Reserve 4MB VRAM for page tables */
+/* How much VRAM be reserved for page tables */
#define AMDGPU_VM_RESERVED_VRAM (8ULL << 20)
-/* max number of VMHUB */
-#define AMDGPU_MAX_VMHUBS 3
-#define AMDGPU_GFXHUB_0 0
-#define AMDGPU_MMHUB_0 1
-#define AMDGPU_MMHUB_1 2
-
-/* Reserve 2MB at top/bottom of address space for kernel use */
-#define AMDGPU_VA_RESERVED_SIZE (2ULL << 20)
-
-/* max vmids dedicated for process */
-#define AMDGPU_VM_MAX_RESERVED_VMID 1
-
-#define AMDGPU_VM_CONTEXT_GFX 0
-#define AMDGPU_VM_CONTEXT_COMPUTE 1
+/*
+ * max number of VMHUB
+ * layout: max 8 GFXHUB + 4 MMHUB0 + 1 MMHUB1
+ */
+#define AMDGPU_MAX_VMHUBS 13
+#define AMDGPU_GFXHUB_START 0
+#define AMDGPU_MMHUB0_START 8
+#define AMDGPU_MMHUB1_START 12
+#define AMDGPU_GFXHUB(x) (AMDGPU_GFXHUB_START + (x))
+#define AMDGPU_MMHUB0(x) (AMDGPU_MMHUB0_START + (x))
+#define AMDGPU_MMHUB1(x) (AMDGPU_MMHUB1_START + (x))
+
+#define AMDGPU_IS_GFXHUB(x) ((x) >= AMDGPU_GFXHUB_START && (x) < AMDGPU_MMHUB0_START)
+#define AMDGPU_IS_MMHUB0(x) ((x) >= AMDGPU_MMHUB0_START && (x) < AMDGPU_MMHUB1_START)
+#define AMDGPU_IS_MMHUB1(x) ((x) >= AMDGPU_MMHUB1_START && (x) < AMDGPU_MAX_VMHUBS)
+
+/* Reserve space at top/bottom of address space for kernel use */
+#define AMDGPU_VA_RESERVED_CSA_SIZE (2ULL << 20)
+#define AMDGPU_VA_RESERVED_CSA_START(adev) (((adev)->vm_manager.max_pfn \
+ << AMDGPU_GPU_PAGE_SHIFT) \
+ - AMDGPU_VA_RESERVED_CSA_SIZE)
+#define AMDGPU_VA_RESERVED_SEQ64_SIZE (2ULL << 20)
+#define AMDGPU_VA_RESERVED_SEQ64_START(adev) (AMDGPU_VA_RESERVED_CSA_START(adev) \
+ - AMDGPU_VA_RESERVED_SEQ64_SIZE)
+#define AMDGPU_VA_RESERVED_TRAP_SIZE (2ULL << 12)
+#define AMDGPU_VA_RESERVED_TRAP_START(adev) (AMDGPU_VA_RESERVED_SEQ64_START(adev) \
+ - AMDGPU_VA_RESERVED_TRAP_SIZE)
+#define AMDGPU_VA_RESERVED_BOTTOM (1ULL << 16)
+#define AMDGPU_VA_RESERVED_TOP (AMDGPU_VA_RESERVED_TRAP_SIZE + \
+ AMDGPU_VA_RESERVED_SEQ64_SIZE + \
+ AMDGPU_VA_RESERVED_CSA_SIZE)
/* See vm_update_mode */
#define AMDGPU_VM_USE_CPU_FOR_GFX (1 << 0)
@@ -147,20 +203,17 @@ struct amdgpu_vm_bo_base {
/* protected by bo being reserved */
struct amdgpu_vm_bo_base *next;
- /* protected by spinlock */
+ /* protected by vm status_lock */
struct list_head vm_status;
+ /* if the bo is counted as shared in mem stats
+ * protected by vm status_lock */
+ bool shared;
+
/* protected by the BO being reserved */
bool moved;
};
-struct amdgpu_vm_pt {
- struct amdgpu_vm_bo_base base;
-
- /* array of page tables, one for each directory entry */
- struct amdgpu_vm_pt *entries;
-};
-
/* provided by hw blocks that can write ptes, e.g., sdma */
struct amdgpu_vm_pte_funcs {
/* number of dw to reserve per operation */
@@ -183,10 +236,10 @@ struct amdgpu_vm_pte_funcs {
};
struct amdgpu_task_info {
- char process_name[TASK_COMM_LEN];
- char task_name[TASK_COMM_LEN];
- pid_t pid;
- pid_t tgid;
+ struct drm_wedge_task_info task;
+ char process_name[TASK_COMM_LEN];
+ pid_t tgid;
+ struct kref refcount;
};
/**
@@ -234,19 +287,51 @@ struct amdgpu_vm_update_params {
* @num_dw_left: number of dw left for the IB
*/
unsigned int num_dw_left;
+
+ /**
+ * @needs_flush: true whenever we need to invalidate the TLB
+ */
+ bool needs_flush;
+
+ /**
+ * @allow_override: true for memory that is not uncached: allows MTYPE
+ * to be overridden for NUMA local memory.
+ */
+ bool allow_override;
+
+ /**
+ * @tlb_flush_waitlist: temporary storage for BOs until tlb_flush
+ */
+ struct list_head tlb_flush_waitlist;
};
struct amdgpu_vm_update_funcs {
- int (*map_table)(struct amdgpu_bo *bo);
- int (*prepare)(struct amdgpu_vm_update_params *p, struct dma_resv *resv,
- enum amdgpu_sync_mode sync_mode);
+ int (*map_table)(struct amdgpu_bo_vm *bo);
+ int (*prepare)(struct amdgpu_vm_update_params *p,
+ struct amdgpu_sync *sync, u64 k_job_id);
int (*update)(struct amdgpu_vm_update_params *p,
- struct amdgpu_bo *bo, uint64_t pe, uint64_t addr,
+ struct amdgpu_bo_vm *bo, uint64_t pe, uint64_t addr,
unsigned count, uint32_t incr, uint64_t flags);
int (*commit)(struct amdgpu_vm_update_params *p,
struct dma_fence **fence);
};
+struct amdgpu_vm_fault_info {
+ /* fault address */
+ uint64_t addr;
+ /* fault status register */
+ uint32_t status;
+ /* which vmhub? gfxhub, mmhub, etc. */
+ unsigned int vmhub;
+};
+
+struct amdgpu_mem_stats {
+ struct drm_memory_stats drm;
+
+ /* buffers that requested this placement but are currently evicted */
+ uint64_t evicted;
+};
+
struct amdgpu_vm {
/* tree of virtual addresses mapped */
struct rb_root_cached va;
@@ -258,7 +343,20 @@ struct amdgpu_vm {
bool evicting;
unsigned int saved_flags;
- /* BOs who needs a validation */
+ /* Lock to protect vm_bo add/del/move on all lists of vm */
+ spinlock_t status_lock;
+
+ /* Memory statistics for this vm, protected by status_lock */
+ struct amdgpu_mem_stats stats[__AMDGPU_PL_NUM];
+
+ /*
+ * The following lists contain amdgpu_vm_bo_base objects for either
+ * PDs, PTs or per VM BOs. The state transits are:
+ *
+ * evicted -> relocated (PDs, PTs) or moved (per VM BOs) -> idle
+ */
+
+ /* Per-VM and PT BOs who needs a validation */
struct list_head evicted;
/* PT BOs which relocated and their parent need an update */
@@ -270,29 +368,50 @@ struct amdgpu_vm {
/* All BOs of this VM not currently in the state machine */
struct list_head idle;
+ /*
+ * The following lists contain amdgpu_vm_bo_base objects for BOs which
+ * have their own dma_resv object and not depend on the root PD. Their
+ * state transits are:
+ *
+ * evicted_user or invalidated -> done
+ */
+
+ /* BOs for user mode queues that need a validation */
+ struct list_head evicted_user;
+
/* regular invalidated BOs, but not yet updated in the PT */
struct list_head invalidated;
- spinlock_t invalidated_lock;
-
- /* BO mappings freed, but not yet updated in the PT */
- struct list_head freed;
/* BOs which are invalidated, has been updated in the PTs */
struct list_head done;
+ /*
+ * This list contains amdgpu_bo_va_mapping objects which have been freed
+ * but not updated in the PTs
+ */
+ struct list_head freed;
+
/* contains the page directory */
- struct amdgpu_vm_pt root;
+ struct amdgpu_vm_bo_base root;
struct dma_fence *last_update;
/* Scheduler entities for page table updates */
struct drm_sched_entity immediate;
struct drm_sched_entity delayed;
+ /* Last finished delayed update */
+ atomic64_t tlb_seq;
+ struct dma_fence *last_tlb_flush;
+ atomic64_t kfd_last_flushed_seq;
+ uint64_t tlb_fence_context;
+
+ /* How many times we had to re-generate the page tables */
+ uint64_t generation;
+
/* Last unlocked submission to the scheduler entities */
struct dma_fence *last_unlocked;
unsigned int pasid;
- /* dedicated to vm */
struct amdgpu_vmid *reserved_vmid[AMDGPU_MAX_VMHUBS];
/* Flag to indicate if VM tables are updated by CPU or GPU (SDMA) */
@@ -301,9 +420,6 @@ struct amdgpu_vm {
/* Functions to use for VM table updates */
const struct amdgpu_vm_update_funcs *update_funcs;
- /* Flag to indicate ATS support from PTE for GFX9 */
- bool pte_support_ats;
-
/* Up to 128 pending retry page faults */
DECLARE_KFIFO(faults, u64, 128);
@@ -317,24 +433,25 @@ struct amdgpu_vm {
uint64_t pd_phys_addr;
/* Some basic info about the task */
- struct amdgpu_task_info task_info;
+ struct amdgpu_task_info *task_info;
/* Store positions of group of BOs */
struct ttm_lru_bulk_move lru_bulk_move;
- /* mark whether can do the bulk move */
- bool bulk_moveable;
/* Flag to indicate if VM is used for compute */
bool is_compute_context;
+
+ /* Memory partition number, -1 means any partition */
+ int8_t mem_id;
+
+ /* cached fault info */
+ struct amdgpu_vm_fault_info fault_info;
};
struct amdgpu_vm_manager {
/* Handling of VMIDs */
struct amdgpu_vmid_mgr id_mgr[AMDGPU_MAX_VMHUBS];
unsigned int first_kfd_vmid;
-
- /* Handling of VM fences */
- u64 fence_context;
- unsigned seqno[AMDGPU_MAX_RINGS];
+ bool concurrent_flush;
uint64_t max_pfn;
uint32_t num_level;
@@ -362,10 +479,13 @@ struct amdgpu_vm_manager {
/* PASID to VM mapping, will be used in interrupt context to
* look up VM of a page fault
*/
- struct idr pasid_idr;
- spinlock_t pasid_lock;
+ struct xarray pasids;
+ /* Global registration of recent page fault information */
+ struct amdgpu_vm_fault_info fault_info;
};
+struct amdgpu_bo_va_mapping;
+
#define amdgpu_vm_copy_pte(adev, ib, pe, src, count) ((adev)->vm_manager.vm_pte_funcs->copy_pte((ib), (pe), (src), (count)))
#define amdgpu_vm_write_pte(adev, ib, pe, value, count, incr) ((adev)->vm_manager.vm_pte_funcs->write_pte((ib), (pe), (value), (count), (incr)))
#define amdgpu_vm_set_pte_pde(adev, ib, pe, addr, count, incr, flags) ((adev)->vm_manager.vm_pte_funcs->set_pte_pde((ib), (pe), (addr), (count), (incr), (flags)))
@@ -377,18 +497,19 @@ void amdgpu_vm_manager_init(struct amdgpu_device *adev);
void amdgpu_vm_manager_fini(struct amdgpu_device *adev);
long amdgpu_vm_wait_idle(struct amdgpu_vm *vm, long timeout);
-int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm,
- int vm_context, u32 pasid);
-int amdgpu_vm_make_compute(struct amdgpu_device *adev, struct amdgpu_vm *vm, u32 pasid);
-void amdgpu_vm_release_compute(struct amdgpu_device *adev, struct amdgpu_vm *vm);
+int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm, int32_t xcp_id, uint32_t pasid);
+int amdgpu_vm_make_compute(struct amdgpu_device *adev, struct amdgpu_vm *vm);
void amdgpu_vm_fini(struct amdgpu_device *adev, struct amdgpu_vm *vm);
-void amdgpu_vm_get_pd_bo(struct amdgpu_vm *vm,
- struct list_head *validated,
- struct amdgpu_bo_list_entry *entry);
+int amdgpu_vm_lock_pd(struct amdgpu_vm *vm, struct drm_exec *exec,
+ unsigned int num_fences);
+int amdgpu_vm_lock_done_list(struct amdgpu_vm *vm, struct drm_exec *exec,
+ unsigned int num_fences);
bool amdgpu_vm_ready(struct amdgpu_vm *vm);
-int amdgpu_vm_validate_pt_bos(struct amdgpu_device *adev, struct amdgpu_vm *vm,
- int (*callback)(void *p, struct amdgpu_bo *bo),
- void *param);
+uint64_t amdgpu_vm_generation(struct amdgpu_device *adev, struct amdgpu_vm *vm);
+int amdgpu_vm_validate(struct amdgpu_device *adev, struct amdgpu_vm *vm,
+ struct ww_acquire_ctx *ticket,
+ int (*callback)(void *p, struct amdgpu_bo *bo),
+ void *param);
int amdgpu_vm_flush(struct amdgpu_ring *ring, struct amdgpu_job *job, bool need_pipe_sync);
int amdgpu_vm_update_pdes(struct amdgpu_device *adev,
struct amdgpu_vm *vm, bool immediate);
@@ -396,13 +517,31 @@ int amdgpu_vm_clear_freed(struct amdgpu_device *adev,
struct amdgpu_vm *vm,
struct dma_fence **fence);
int amdgpu_vm_handle_moved(struct amdgpu_device *adev,
- struct amdgpu_vm *vm);
+ struct amdgpu_vm *vm,
+ struct ww_acquire_ctx *ticket);
+int amdgpu_vm_flush_compute_tlb(struct amdgpu_device *adev,
+ struct amdgpu_vm *vm,
+ uint32_t flush_type,
+ uint32_t xcc_mask);
+void amdgpu_vm_bo_base_init(struct amdgpu_vm_bo_base *base,
+ struct amdgpu_vm *vm, struct amdgpu_bo *bo);
+int amdgpu_vm_update_range(struct amdgpu_device *adev, struct amdgpu_vm *vm,
+ bool immediate, bool unlocked, bool flush_tlb,
+ bool allow_override, struct amdgpu_sync *sync,
+ uint64_t start, uint64_t last, uint64_t flags,
+ uint64_t offset, uint64_t vram_base,
+ struct ttm_resource *res, dma_addr_t *pages_addr,
+ struct dma_fence **fence);
int amdgpu_vm_bo_update(struct amdgpu_device *adev,
struct amdgpu_bo_va *bo_va,
bool clear);
bool amdgpu_vm_evictable(struct amdgpu_bo *bo);
-void amdgpu_vm_bo_invalidate(struct amdgpu_device *adev,
- struct amdgpu_bo *bo, bool evicted);
+void amdgpu_vm_bo_invalidate(struct amdgpu_bo *bo, bool evicted);
+void amdgpu_vm_update_stats(struct amdgpu_vm_bo_base *base,
+ struct ttm_resource *new_res, int sign);
+void amdgpu_vm_bo_update_shared(struct amdgpu_bo *bo);
+void amdgpu_vm_bo_move(struct amdgpu_bo *bo, struct ttm_resource *new_mem,
+ bool evicted);
uint64_t amdgpu_vm_map_gart(const dma_addr_t *pages_addr, uint64_t addr);
struct amdgpu_bo_va *amdgpu_vm_bo_find(struct amdgpu_vm *vm,
struct amdgpu_bo *bo);
@@ -412,11 +551,11 @@ struct amdgpu_bo_va *amdgpu_vm_bo_add(struct amdgpu_device *adev,
int amdgpu_vm_bo_map(struct amdgpu_device *adev,
struct amdgpu_bo_va *bo_va,
uint64_t addr, uint64_t offset,
- uint64_t size, uint64_t flags);
+ uint64_t size, uint32_t flags);
int amdgpu_vm_bo_replace_map(struct amdgpu_device *adev,
struct amdgpu_bo_va *bo_va,
uint64_t addr, uint64_t offset,
- uint64_t size, uint64_t flags);
+ uint64_t size, uint32_t flags);
int amdgpu_vm_bo_unmap(struct amdgpu_device *adev,
struct amdgpu_bo_va *bo_va,
uint64_t addr);
@@ -426,7 +565,7 @@ int amdgpu_vm_bo_clear_mappings(struct amdgpu_device *adev,
struct amdgpu_bo_va_mapping *amdgpu_vm_bo_lookup_mapping(struct amdgpu_vm *vm,
uint64_t addr);
void amdgpu_vm_bo_trace_cs(struct amdgpu_vm *vm, struct ww_acquire_ctx *ticket);
-void amdgpu_vm_bo_rmv(struct amdgpu_device *adev,
+void amdgpu_vm_bo_del(struct amdgpu_device *adev,
struct amdgpu_bo_va *bo_va);
void amdgpu_vm_adjust_size(struct amdgpu_device *adev, uint32_t min_vm_size,
uint32_t fragment_size_default, unsigned max_level,
@@ -436,19 +575,117 @@ bool amdgpu_vm_need_pipeline_sync(struct amdgpu_ring *ring,
struct amdgpu_job *job);
void amdgpu_vm_check_compute_bug(struct amdgpu_device *adev);
-void amdgpu_vm_get_task_info(struct amdgpu_device *adev, u32 pasid,
- struct amdgpu_task_info *task_info);
+struct amdgpu_task_info *
+amdgpu_vm_get_task_info_pasid(struct amdgpu_device *adev, u32 pasid);
+
+struct amdgpu_task_info *
+amdgpu_vm_get_task_info_vm(struct amdgpu_vm *vm);
+
+void amdgpu_vm_put_task_info(struct amdgpu_task_info *task_info);
+
bool amdgpu_vm_handle_fault(struct amdgpu_device *adev, u32 pasid,
- uint64_t addr);
+ u32 vmid, u32 node_id, uint64_t addr, uint64_t ts,
+ bool write_fault);
void amdgpu_vm_set_task_info(struct amdgpu_vm *vm);
void amdgpu_vm_move_to_lru_tail(struct amdgpu_device *adev,
struct amdgpu_vm *vm);
-void amdgpu_vm_del_from_lru_notify(struct ttm_buffer_object *bo);
+void amdgpu_vm_get_memory(struct amdgpu_vm *vm,
+ struct amdgpu_mem_stats stats[__AMDGPU_PL_NUM]);
+
+int amdgpu_vm_pt_clear(struct amdgpu_device *adev, struct amdgpu_vm *vm,
+ struct amdgpu_bo_vm *vmbo, bool immediate);
+int amdgpu_vm_pt_create(struct amdgpu_device *adev, struct amdgpu_vm *vm,
+ int level, bool immediate, struct amdgpu_bo_vm **vmbo,
+ int32_t xcp_id);
+void amdgpu_vm_pt_free_root(struct amdgpu_device *adev, struct amdgpu_vm *vm);
+
+int amdgpu_vm_pde_update(struct amdgpu_vm_update_params *params,
+ struct amdgpu_vm_bo_base *entry);
+int amdgpu_vm_ptes_update(struct amdgpu_vm_update_params *params,
+ uint64_t start, uint64_t end,
+ uint64_t dst, uint64_t flags);
+void amdgpu_vm_pt_free_work(struct work_struct *work);
+void amdgpu_vm_pt_free_list(struct amdgpu_device *adev,
+ struct amdgpu_vm_update_params *params);
#if defined(CONFIG_DEBUG_FS)
void amdgpu_debugfs_vm_bo_info(struct amdgpu_vm *vm, struct seq_file *m);
#endif
+int amdgpu_vm_pt_map_tables(struct amdgpu_device *adev, struct amdgpu_vm *vm);
+
+bool amdgpu_vm_is_bo_always_valid(struct amdgpu_vm *vm, struct amdgpu_bo *bo);
+
+/**
+ * amdgpu_vm_tlb_seq - return tlb flush sequence number
+ * @vm: the amdgpu_vm structure to query
+ *
+ * Returns the tlb flush sequence number which indicates that the VM TLBs needs
+ * to be invalidated whenever the sequence number change.
+ */
+static inline uint64_t amdgpu_vm_tlb_seq(struct amdgpu_vm *vm)
+{
+ unsigned long flags;
+ spinlock_t *lock;
+
+ /*
+ * Workaround to stop racing between the fence signaling and handling
+ * the cb. The lock is static after initially setting it up, just make
+ * sure that the dma_fence structure isn't freed up.
+ */
+ rcu_read_lock();
+ lock = vm->last_tlb_flush->lock;
+ rcu_read_unlock();
+
+ spin_lock_irqsave(lock, flags);
+ spin_unlock_irqrestore(lock, flags);
+
+ return atomic64_read(&vm->tlb_seq);
+}
+
+/*
+ * vm eviction_lock can be taken in MMU notifiers. Make sure no reclaim-FS
+ * happens while holding this lock anywhere to prevent deadlocks when
+ * an MMU notifier runs in reclaim-FS context.
+ */
+static inline void amdgpu_vm_eviction_lock(struct amdgpu_vm *vm)
+{
+ mutex_lock(&vm->eviction_lock);
+ vm->saved_flags = memalloc_noreclaim_save();
+}
+
+static inline bool amdgpu_vm_eviction_trylock(struct amdgpu_vm *vm)
+{
+ if (mutex_trylock(&vm->eviction_lock)) {
+ vm->saved_flags = memalloc_noreclaim_save();
+ return true;
+ }
+ return false;
+}
+
+static inline void amdgpu_vm_eviction_unlock(struct amdgpu_vm *vm)
+{
+ memalloc_noreclaim_restore(vm->saved_flags);
+ mutex_unlock(&vm->eviction_lock);
+}
+
+void amdgpu_vm_update_fault_cache(struct amdgpu_device *adev,
+ unsigned int pasid,
+ uint64_t addr,
+ uint32_t status,
+ unsigned int vmhub);
+void amdgpu_vm_tlb_fence_create(struct amdgpu_device *adev,
+ struct amdgpu_vm *vm,
+ struct dma_fence **fence);
+
+void amdgpu_vm_print_task_info(struct amdgpu_device *adev,
+ struct amdgpu_task_info *task_info);
+
+#define amdgpu_vm_bo_va_for_each_valid_mapping(bo_va, mapping) \
+ list_for_each_entry(mapping, &(bo_va)->valids, list)
+#define amdgpu_vm_bo_va_for_each_invalid_mapping(bo_va, mapping) \
+ list_for_each_entry(mapping, &(bo_va)->invalids, list)
+
#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_cpu.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_cpu.c
index ac45d9c7a4e9..22e2e5b47341 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_cpu.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_cpu.c
@@ -29,36 +29,37 @@
*
* @table: newly allocated or validated PD/PT
*/
-static int amdgpu_vm_cpu_map_table(struct amdgpu_bo *table)
+static int amdgpu_vm_cpu_map_table(struct amdgpu_bo_vm *table)
{
- return amdgpu_bo_kmap(table, NULL);
+ table->bo.flags |= AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED;
+ return amdgpu_bo_kmap(&table->bo, NULL);
}
/**
* amdgpu_vm_cpu_prepare - prepare page table update with the CPU
*
* @p: see amdgpu_vm_update_params definition
- * @resv: reservation object with embedded fence
- * @sync_mode: synchronization mode
+ * @sync: sync obj with fences to wait on
+ * @k_job_id: the id for tracing/debug purposes
*
* Returns:
* Negativ errno, 0 for success.
*/
static int amdgpu_vm_cpu_prepare(struct amdgpu_vm_update_params *p,
- struct dma_resv *resv,
- enum amdgpu_sync_mode sync_mode)
+ struct amdgpu_sync *sync,
+ u64 k_job_id)
{
- if (!resv)
+ if (!sync)
return 0;
- return amdgpu_bo_sync_wait_resv(p->adev, resv, sync_mode, p->vm, true);
+ return amdgpu_sync_wait(sync, true);
}
/**
* amdgpu_vm_cpu_update - helper to update page tables via CPU
*
* @p: see amdgpu_vm_update_params definition
- * @bo: PD/PT to update
+ * @vmbo: PD/PT to update
* @pe: byte offset of the PDE/PTE, relative to start of PDB/PTB
* @addr: dst addr to write into pe
* @count: number of page entries to update
@@ -68,21 +69,20 @@ static int amdgpu_vm_cpu_prepare(struct amdgpu_vm_update_params *p,
* Write count number of PT/PD entries directly.
*/
static int amdgpu_vm_cpu_update(struct amdgpu_vm_update_params *p,
- struct amdgpu_bo *bo, uint64_t pe,
+ struct amdgpu_bo_vm *vmbo, uint64_t pe,
uint64_t addr, unsigned count, uint32_t incr,
uint64_t flags)
{
unsigned int i;
uint64_t value;
- int r;
+ long r;
- if (bo->tbo.moving) {
- r = dma_fence_wait(bo->tbo.moving, true);
- if (r)
- return r;
- }
+ r = dma_resv_wait_timeout(vmbo->bo.tbo.base.resv, DMA_RESV_USAGE_KERNEL,
+ true, MAX_SCHEDULE_TIMEOUT);
+ if (r < 0)
+ return r;
- pe += (unsigned long)amdgpu_bo_kptr(bo);
+ pe += (unsigned long)amdgpu_bo_kptr(&vmbo->bo);
trace_amdgpu_vm_set_ptes(pe, addr, count, incr, flags, p->immediate);
@@ -108,9 +108,11 @@ static int amdgpu_vm_cpu_update(struct amdgpu_vm_update_params *p,
static int amdgpu_vm_cpu_commit(struct amdgpu_vm_update_params *p,
struct dma_fence **fence)
{
- /* Flush HDP */
+ if (p->needs_flush)
+ atomic64_inc(&p->vm->tlb_seq);
+
mb();
- amdgpu_asic_flush_hdp(p->adev, NULL);
+ amdgpu_device_flush_hdp(p->adev, NULL);
return 0;
}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_pt.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_pt.c
new file mode 100644
index 000000000000..f794fb1cc06e
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_pt.c
@@ -0,0 +1,976 @@
+// SPDX-License-Identifier: GPL-2.0 OR MIT
+/*
+ * Copyright 2022 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include <drm/drm_drv.h>
+
+#include "amdgpu.h"
+#include "amdgpu_trace.h"
+#include "amdgpu_vm.h"
+#include "amdgpu_job.h"
+
+/*
+ * amdgpu_vm_pt_cursor - state for for_each_amdgpu_vm_pt
+ */
+struct amdgpu_vm_pt_cursor {
+ uint64_t pfn;
+ struct amdgpu_vm_bo_base *parent;
+ struct amdgpu_vm_bo_base *entry;
+ unsigned int level;
+};
+
+/**
+ * amdgpu_vm_pt_level_shift - return the addr shift for each level
+ *
+ * @adev: amdgpu_device pointer
+ * @level: VMPT level
+ *
+ * Returns:
+ * The number of bits the pfn needs to be right shifted for a level.
+ */
+static unsigned int amdgpu_vm_pt_level_shift(struct amdgpu_device *adev,
+ unsigned int level)
+{
+ switch (level) {
+ case AMDGPU_VM_PDB2:
+ case AMDGPU_VM_PDB1:
+ case AMDGPU_VM_PDB0:
+ return 9 * (AMDGPU_VM_PDB0 - level) +
+ adev->vm_manager.block_size;
+ case AMDGPU_VM_PTB:
+ return 0;
+ default:
+ return ~0;
+ }
+}
+
+/**
+ * amdgpu_vm_pt_num_entries - return the number of entries in a PD/PT
+ *
+ * @adev: amdgpu_device pointer
+ * @level: VMPT level
+ *
+ * Returns:
+ * The number of entries in a page directory or page table.
+ */
+static unsigned int amdgpu_vm_pt_num_entries(struct amdgpu_device *adev,
+ unsigned int level)
+{
+ unsigned int shift;
+
+ shift = amdgpu_vm_pt_level_shift(adev, adev->vm_manager.root_level);
+ if (level == adev->vm_manager.root_level)
+ /* For the root directory */
+ return round_up(adev->vm_manager.max_pfn, 1ULL << shift)
+ >> shift;
+ else if (level != AMDGPU_VM_PTB)
+ /* Everything in between */
+ return 512;
+
+ /* For the page tables on the leaves */
+ return AMDGPU_VM_PTE_COUNT(adev);
+}
+
+/**
+ * amdgpu_vm_pt_entries_mask - the mask to get the entry number of a PD/PT
+ *
+ * @adev: amdgpu_device pointer
+ * @level: VMPT level
+ *
+ * Returns:
+ * The mask to extract the entry number of a PD/PT from an address.
+ */
+static uint32_t amdgpu_vm_pt_entries_mask(struct amdgpu_device *adev,
+ unsigned int level)
+{
+ if (level <= adev->vm_manager.root_level)
+ return 0xffffffff;
+ else if (level != AMDGPU_VM_PTB)
+ return 0x1ff;
+ else
+ return AMDGPU_VM_PTE_COUNT(adev) - 1;
+}
+
+/**
+ * amdgpu_vm_pt_size - returns the size of the page table in bytes
+ *
+ * @adev: amdgpu_device pointer
+ * @level: VMPT level
+ *
+ * Returns:
+ * The size of the BO for a page directory or page table in bytes.
+ */
+static unsigned int amdgpu_vm_pt_size(struct amdgpu_device *adev,
+ unsigned int level)
+{
+ return AMDGPU_GPU_PAGE_ALIGN(amdgpu_vm_pt_num_entries(adev, level) * 8);
+}
+
+/**
+ * amdgpu_vm_pt_parent - get the parent page directory
+ *
+ * @pt: child page table
+ *
+ * Helper to get the parent entry for the child page table. NULL if we are at
+ * the root page directory.
+ */
+static struct amdgpu_vm_bo_base *
+amdgpu_vm_pt_parent(struct amdgpu_vm_bo_base *pt)
+{
+ struct amdgpu_bo *parent = pt->bo->parent;
+
+ if (!parent)
+ return NULL;
+
+ return parent->vm_bo;
+}
+
+/**
+ * amdgpu_vm_pt_start - start PD/PT walk
+ *
+ * @adev: amdgpu_device pointer
+ * @vm: amdgpu_vm structure
+ * @start: start address of the walk
+ * @cursor: state to initialize
+ *
+ * Initialize a amdgpu_vm_pt_cursor to start a walk.
+ */
+static void amdgpu_vm_pt_start(struct amdgpu_device *adev,
+ struct amdgpu_vm *vm, uint64_t start,
+ struct amdgpu_vm_pt_cursor *cursor)
+{
+ cursor->pfn = start;
+ cursor->parent = NULL;
+ cursor->entry = &vm->root;
+ cursor->level = adev->vm_manager.root_level;
+}
+
+/**
+ * amdgpu_vm_pt_descendant - go to child node
+ *
+ * @adev: amdgpu_device pointer
+ * @cursor: current state
+ *
+ * Walk to the child node of the current node.
+ * Returns:
+ * True if the walk was possible, false otherwise.
+ */
+static bool amdgpu_vm_pt_descendant(struct amdgpu_device *adev,
+ struct amdgpu_vm_pt_cursor *cursor)
+{
+ unsigned int mask, shift, idx;
+
+ if ((cursor->level == AMDGPU_VM_PTB) || !cursor->entry ||
+ !cursor->entry->bo)
+ return false;
+
+ mask = amdgpu_vm_pt_entries_mask(adev, cursor->level);
+ shift = amdgpu_vm_pt_level_shift(adev, cursor->level);
+
+ ++cursor->level;
+ idx = (cursor->pfn >> shift) & mask;
+ cursor->parent = cursor->entry;
+ cursor->entry = &to_amdgpu_bo_vm(cursor->entry->bo)->entries[idx];
+ return true;
+}
+
+/**
+ * amdgpu_vm_pt_sibling - go to sibling node
+ *
+ * @adev: amdgpu_device pointer
+ * @cursor: current state
+ *
+ * Walk to the sibling node of the current node.
+ * Returns:
+ * True if the walk was possible, false otherwise.
+ */
+static bool amdgpu_vm_pt_sibling(struct amdgpu_device *adev,
+ struct amdgpu_vm_pt_cursor *cursor)
+{
+
+ unsigned int shift, num_entries;
+ struct amdgpu_bo_vm *parent;
+
+ /* Root doesn't have a sibling */
+ if (!cursor->parent)
+ return false;
+
+ /* Go to our parents and see if we got a sibling */
+ shift = amdgpu_vm_pt_level_shift(adev, cursor->level - 1);
+ num_entries = amdgpu_vm_pt_num_entries(adev, cursor->level - 1);
+ parent = to_amdgpu_bo_vm(cursor->parent->bo);
+
+ if (cursor->entry == &parent->entries[num_entries - 1])
+ return false;
+
+ cursor->pfn += 1ULL << shift;
+ cursor->pfn &= ~((1ULL << shift) - 1);
+ ++cursor->entry;
+ return true;
+}
+
+/**
+ * amdgpu_vm_pt_ancestor - go to parent node
+ *
+ * @cursor: current state
+ *
+ * Walk to the parent node of the current node.
+ * Returns:
+ * True if the walk was possible, false otherwise.
+ */
+static bool amdgpu_vm_pt_ancestor(struct amdgpu_vm_pt_cursor *cursor)
+{
+ if (!cursor->parent)
+ return false;
+
+ --cursor->level;
+ cursor->entry = cursor->parent;
+ cursor->parent = amdgpu_vm_pt_parent(cursor->parent);
+ return true;
+}
+
+/**
+ * amdgpu_vm_pt_next - get next PD/PT in hieratchy
+ *
+ * @adev: amdgpu_device pointer
+ * @cursor: current state
+ *
+ * Walk the PD/PT tree to the next node.
+ */
+static void amdgpu_vm_pt_next(struct amdgpu_device *adev,
+ struct amdgpu_vm_pt_cursor *cursor)
+{
+ /* First try a newborn child */
+ if (amdgpu_vm_pt_descendant(adev, cursor))
+ return;
+
+ /* If that didn't worked try to find a sibling */
+ while (!amdgpu_vm_pt_sibling(adev, cursor)) {
+ /* No sibling, go to our parents and grandparents */
+ if (!amdgpu_vm_pt_ancestor(cursor)) {
+ cursor->pfn = ~0ll;
+ return;
+ }
+ }
+}
+
+/**
+ * amdgpu_vm_pt_first_dfs - start a deep first search
+ *
+ * @adev: amdgpu_device structure
+ * @vm: amdgpu_vm structure
+ * @start: optional cursor to start with
+ * @cursor: state to initialize
+ *
+ * Starts a deep first traversal of the PD/PT tree.
+ */
+static void amdgpu_vm_pt_first_dfs(struct amdgpu_device *adev,
+ struct amdgpu_vm *vm,
+ struct amdgpu_vm_pt_cursor *start,
+ struct amdgpu_vm_pt_cursor *cursor)
+{
+ if (start)
+ *cursor = *start;
+ else
+ amdgpu_vm_pt_start(adev, vm, 0, cursor);
+
+ while (amdgpu_vm_pt_descendant(adev, cursor))
+ ;
+}
+
+/**
+ * amdgpu_vm_pt_continue_dfs - check if the deep first search should continue
+ *
+ * @start: starting point for the search
+ * @entry: current entry
+ *
+ * Returns:
+ * True when the search should continue, false otherwise.
+ */
+static bool amdgpu_vm_pt_continue_dfs(struct amdgpu_vm_pt_cursor *start,
+ struct amdgpu_vm_bo_base *entry)
+{
+ return entry && (!start || entry != start->entry);
+}
+
+/**
+ * amdgpu_vm_pt_next_dfs - get the next node for a deep first search
+ *
+ * @adev: amdgpu_device structure
+ * @cursor: current state
+ *
+ * Move the cursor to the next node in a deep first search.
+ */
+static void amdgpu_vm_pt_next_dfs(struct amdgpu_device *adev,
+ struct amdgpu_vm_pt_cursor *cursor)
+{
+ if (!cursor->entry)
+ return;
+
+ if (!cursor->parent)
+ cursor->entry = NULL;
+ else if (amdgpu_vm_pt_sibling(adev, cursor))
+ while (amdgpu_vm_pt_descendant(adev, cursor))
+ ;
+ else
+ amdgpu_vm_pt_ancestor(cursor);
+}
+
+/*
+ * for_each_amdgpu_vm_pt_dfs_safe - safe deep first search of all PDs/PTs
+ */
+#define for_each_amdgpu_vm_pt_dfs_safe(adev, vm, start, cursor, entry) \
+ for (amdgpu_vm_pt_first_dfs((adev), (vm), (start), &(cursor)), \
+ (entry) = (cursor).entry, amdgpu_vm_pt_next_dfs((adev), &(cursor));\
+ amdgpu_vm_pt_continue_dfs((start), (entry)); \
+ (entry) = (cursor).entry, amdgpu_vm_pt_next_dfs((adev), &(cursor)))
+
+/**
+ * amdgpu_vm_pt_clear - initially clear the PDs/PTs
+ *
+ * @adev: amdgpu_device pointer
+ * @vm: VM to clear BO from
+ * @vmbo: BO to clear
+ * @immediate: use an immediate update
+ *
+ * Root PD needs to be reserved when calling this.
+ *
+ * Returns:
+ * 0 on success, errno otherwise.
+ */
+int amdgpu_vm_pt_clear(struct amdgpu_device *adev, struct amdgpu_vm *vm,
+ struct amdgpu_bo_vm *vmbo, bool immediate)
+{
+ unsigned int level = adev->vm_manager.root_level;
+ struct ttm_operation_ctx ctx = { true, false };
+ struct amdgpu_vm_update_params params;
+ struct amdgpu_bo *ancestor = &vmbo->bo;
+ unsigned int entries;
+ struct amdgpu_bo *bo = &vmbo->bo;
+ uint64_t addr;
+ int r, idx;
+
+ /* Figure out our place in the hierarchy */
+ if (ancestor->parent) {
+ ++level;
+ while (ancestor->parent->parent) {
+ ++level;
+ ancestor = ancestor->parent;
+ }
+ }
+
+ entries = amdgpu_bo_size(bo) / 8;
+
+ r = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx);
+ if (r)
+ return r;
+
+ if (!drm_dev_enter(adev_to_drm(adev), &idx))
+ return -ENODEV;
+
+ r = vm->update_funcs->map_table(vmbo);
+ if (r)
+ goto exit;
+
+ memset(&params, 0, sizeof(params));
+ params.adev = adev;
+ params.vm = vm;
+ params.immediate = immediate;
+
+ r = vm->update_funcs->prepare(&params, NULL,
+ AMDGPU_KERNEL_JOB_ID_VM_PT_CLEAR);
+ if (r)
+ goto exit;
+
+ addr = 0;
+
+ uint64_t value = 0, flags = 0;
+ if (adev->asic_type >= CHIP_VEGA10) {
+ if (level != AMDGPU_VM_PTB) {
+ /* Handle leaf PDEs as PTEs */
+ flags |= AMDGPU_PDE_PTE_FLAG(adev);
+ amdgpu_gmc_get_vm_pde(adev, level,
+ &value, &flags);
+ } else {
+ /* Workaround for fault priority problem on GMC9 */
+ flags = AMDGPU_PTE_EXECUTABLE;
+ }
+ }
+
+ r = vm->update_funcs->update(&params, vmbo, addr, 0, entries,
+ value, flags);
+ if (r)
+ goto exit;
+
+ r = vm->update_funcs->commit(&params, NULL);
+exit:
+ drm_dev_exit(idx);
+ return r;
+}
+
+/**
+ * amdgpu_vm_pt_create - create bo for PD/PT
+ *
+ * @adev: amdgpu_device pointer
+ * @vm: requesting vm
+ * @level: the page table level
+ * @immediate: use a immediate update
+ * @vmbo: pointer to the buffer object pointer
+ * @xcp_id: GPU partition id
+ */
+int amdgpu_vm_pt_create(struct amdgpu_device *adev, struct amdgpu_vm *vm,
+ int level, bool immediate, struct amdgpu_bo_vm **vmbo,
+ int32_t xcp_id)
+{
+ struct amdgpu_bo_param bp;
+ unsigned int num_entries;
+
+ memset(&bp, 0, sizeof(bp));
+
+ bp.size = amdgpu_vm_pt_size(adev, level);
+ bp.byte_align = AMDGPU_GPU_PAGE_SIZE;
+
+ if (!adev->gmc.is_app_apu)
+ bp.domain = AMDGPU_GEM_DOMAIN_VRAM;
+ else
+ bp.domain = AMDGPU_GEM_DOMAIN_GTT;
+
+ bp.domain = amdgpu_bo_get_preferred_domain(adev, bp.domain);
+ bp.flags = AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS |
+ AMDGPU_GEM_CREATE_CPU_GTT_USWC;
+
+ if (level < AMDGPU_VM_PTB)
+ num_entries = amdgpu_vm_pt_num_entries(adev, level);
+ else
+ num_entries = 0;
+
+ bp.bo_ptr_size = struct_size((*vmbo), entries, num_entries);
+
+ if (vm->use_cpu_for_update)
+ bp.flags |= AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED;
+
+ bp.type = ttm_bo_type_kernel;
+ bp.no_wait_gpu = immediate;
+ bp.xcp_id_plus1 = xcp_id + 1;
+
+ if (vm->root.bo)
+ bp.resv = vm->root.bo->tbo.base.resv;
+
+ return amdgpu_bo_create_vm(adev, &bp, vmbo);
+}
+
+/**
+ * amdgpu_vm_pt_alloc - Allocate a specific page table
+ *
+ * @adev: amdgpu_device pointer
+ * @vm: VM to allocate page tables for
+ * @cursor: Which page table to allocate
+ * @immediate: use an immediate update
+ *
+ * Make sure a specific page table or directory is allocated.
+ *
+ * Returns:
+ * 1 if page table needed to be allocated, 0 if page table was already
+ * allocated, negative errno if an error occurred.
+ */
+static int amdgpu_vm_pt_alloc(struct amdgpu_device *adev,
+ struct amdgpu_vm *vm,
+ struct amdgpu_vm_pt_cursor *cursor,
+ bool immediate)
+{
+ struct amdgpu_vm_bo_base *entry = cursor->entry;
+ struct amdgpu_bo *pt_bo;
+ struct amdgpu_bo_vm *pt;
+ int r;
+
+ if (entry->bo)
+ return 0;
+
+ amdgpu_vm_eviction_unlock(vm);
+ r = amdgpu_vm_pt_create(adev, vm, cursor->level, immediate, &pt,
+ vm->root.bo->xcp_id);
+ amdgpu_vm_eviction_lock(vm);
+ if (r)
+ return r;
+
+ /* Keep a reference to the root directory to avoid
+ * freeing them up in the wrong order.
+ */
+ pt_bo = &pt->bo;
+ pt_bo->parent = amdgpu_bo_ref(cursor->parent->bo);
+ amdgpu_vm_bo_base_init(entry, vm, pt_bo);
+ r = amdgpu_vm_pt_clear(adev, vm, pt, immediate);
+ if (r)
+ goto error_free_pt;
+
+ return 0;
+
+error_free_pt:
+ amdgpu_bo_unref(&pt_bo);
+ return r;
+}
+
+/**
+ * amdgpu_vm_pt_free - free one PD/PT
+ *
+ * @entry: PDE to free
+ */
+static void amdgpu_vm_pt_free(struct amdgpu_vm_bo_base *entry)
+{
+ if (!entry->bo)
+ return;
+
+ amdgpu_vm_update_stats(entry, entry->bo->tbo.resource, -1);
+ entry->bo->vm_bo = NULL;
+ ttm_bo_set_bulk_move(&entry->bo->tbo, NULL);
+
+ spin_lock(&entry->vm->status_lock);
+ list_del(&entry->vm_status);
+ spin_unlock(&entry->vm->status_lock);
+ amdgpu_bo_unref(&entry->bo);
+}
+
+/**
+ * amdgpu_vm_pt_free_list - free PD/PT levels
+ *
+ * @adev: amdgpu device structure
+ * @params: see amdgpu_vm_update_params definition
+ *
+ * Free the page directory objects saved in the flush list
+ */
+void amdgpu_vm_pt_free_list(struct amdgpu_device *adev,
+ struct amdgpu_vm_update_params *params)
+{
+ struct amdgpu_vm_bo_base *entry, *next;
+ bool unlocked = params->unlocked;
+
+ if (list_empty(&params->tlb_flush_waitlist))
+ return;
+
+ /*
+ * unlocked unmap clear page table leaves, warning to free the page entry.
+ */
+ WARN_ON(unlocked);
+
+ list_for_each_entry_safe(entry, next, &params->tlb_flush_waitlist, vm_status)
+ amdgpu_vm_pt_free(entry);
+}
+
+/**
+ * amdgpu_vm_pt_add_list - add PD/PT level to the flush list
+ *
+ * @params: parameters for the update
+ * @cursor: first PT entry to start DF search from, non NULL
+ *
+ * This list will be freed after TLB flush.
+ */
+static void amdgpu_vm_pt_add_list(struct amdgpu_vm_update_params *params,
+ struct amdgpu_vm_pt_cursor *cursor)
+{
+ struct amdgpu_vm_pt_cursor seek;
+ struct amdgpu_vm_bo_base *entry;
+
+ spin_lock(&params->vm->status_lock);
+ for_each_amdgpu_vm_pt_dfs_safe(params->adev, params->vm, cursor, seek, entry) {
+ if (entry && entry->bo)
+ list_move(&entry->vm_status, &params->tlb_flush_waitlist);
+ }
+
+ /* enter start node now */
+ list_move(&cursor->entry->vm_status, &params->tlb_flush_waitlist);
+ spin_unlock(&params->vm->status_lock);
+}
+
+/**
+ * amdgpu_vm_pt_free_root - free root PD
+ * @adev: amdgpu device structure
+ * @vm: amdgpu vm structure
+ *
+ * Free the root page directory and everything below it.
+ */
+void amdgpu_vm_pt_free_root(struct amdgpu_device *adev, struct amdgpu_vm *vm)
+{
+ struct amdgpu_vm_pt_cursor cursor;
+ struct amdgpu_vm_bo_base *entry;
+
+ for_each_amdgpu_vm_pt_dfs_safe(adev, vm, NULL, cursor, entry) {
+ if (entry)
+ amdgpu_vm_pt_free(entry);
+ }
+}
+
+/**
+ * amdgpu_vm_pde_update - update a single level in the hierarchy
+ *
+ * @params: parameters for the update
+ * @entry: entry to update
+ *
+ * Makes sure the requested entry in parent is up to date.
+ */
+int amdgpu_vm_pde_update(struct amdgpu_vm_update_params *params,
+ struct amdgpu_vm_bo_base *entry)
+{
+ struct amdgpu_vm_bo_base *parent = amdgpu_vm_pt_parent(entry);
+ struct amdgpu_bo *bo, *pbo;
+ struct amdgpu_vm *vm = params->vm;
+ uint64_t pde, pt, flags;
+ unsigned int level;
+
+ if (WARN_ON(!parent))
+ return -EINVAL;
+
+ bo = parent->bo;
+ for (level = 0, pbo = bo->parent; pbo; ++level)
+ pbo = pbo->parent;
+
+ level += params->adev->vm_manager.root_level;
+ amdgpu_gmc_get_pde_for_bo(entry->bo, level, &pt, &flags);
+ pde = (entry - to_amdgpu_bo_vm(parent->bo)->entries) * 8;
+ return vm->update_funcs->update(params, to_amdgpu_bo_vm(bo), pde, pt,
+ 1, 0, flags);
+}
+
+/**
+ * amdgpu_vm_pte_update_noretry_flags - Update PTE no-retry flags
+ *
+ * @adev: amdgpu_device pointer
+ * @flags: pointer to PTE flags
+ *
+ * Update PTE no-retry flags when TF is enabled.
+ */
+static void amdgpu_vm_pte_update_noretry_flags(struct amdgpu_device *adev,
+ uint64_t *flags)
+{
+ /*
+ * Update no-retry flags with the corresponding TF
+ * no-retry combination.
+ */
+ if ((*flags & AMDGPU_VM_NORETRY_FLAGS) == AMDGPU_VM_NORETRY_FLAGS) {
+ *flags &= ~AMDGPU_VM_NORETRY_FLAGS;
+ *flags |= adev->gmc.noretry_flags;
+ }
+}
+
+/*
+ * amdgpu_vm_pte_update_flags - figure out flags for PTE updates
+ *
+ * Make sure to set the right flags for the PTEs at the desired level.
+ */
+static void amdgpu_vm_pte_update_flags(struct amdgpu_vm_update_params *params,
+ struct amdgpu_bo_vm *pt,
+ unsigned int level,
+ uint64_t pe, uint64_t addr,
+ unsigned int count, uint32_t incr,
+ uint64_t flags)
+{
+ struct amdgpu_device *adev = params->adev;
+
+ if (level != AMDGPU_VM_PTB) {
+ flags |= AMDGPU_PDE_PTE_FLAG(params->adev);
+ amdgpu_gmc_get_vm_pde(adev, level, &addr, &flags);
+
+ } else if (adev->asic_type >= CHIP_VEGA10 &&
+ !(flags & AMDGPU_PTE_VALID) &&
+ !(flags & AMDGPU_PTE_PRT_FLAG(params->adev))) {
+
+ /* Workaround for fault priority problem on GMC9 */
+ flags |= AMDGPU_PTE_EXECUTABLE;
+ }
+
+ /*
+ * Update no-retry flags to use the no-retry flag combination
+ * with TF enabled. The AMDGPU_VM_NORETRY_FLAGS flag combination
+ * does not work when TF is enabled. So, replace them with
+ * AMDGPU_VM_NORETRY_FLAGS_TF flag combination which works for
+ * all cases.
+ */
+ if (level == AMDGPU_VM_PTB)
+ amdgpu_vm_pte_update_noretry_flags(adev, &flags);
+
+ /* APUs mapping system memory may need different MTYPEs on different
+ * NUMA nodes. Only do this for contiguous ranges that can be assumed
+ * to be on the same NUMA node.
+ */
+ if ((flags & AMDGPU_PTE_SYSTEM) && (adev->flags & AMD_IS_APU) &&
+ adev->gmc.gmc_funcs->override_vm_pte_flags &&
+ num_possible_nodes() > 1 && !params->pages_addr && params->allow_override)
+ amdgpu_gmc_override_vm_pte_flags(adev, params->vm, addr, &flags);
+
+ params->vm->update_funcs->update(params, pt, pe, addr, count, incr,
+ flags);
+}
+
+/**
+ * amdgpu_vm_pte_fragment - get fragment for PTEs
+ *
+ * @params: see amdgpu_vm_update_params definition
+ * @start: first PTE to handle
+ * @end: last PTE to handle
+ * @flags: hw mapping flags
+ * @frag: resulting fragment size
+ * @frag_end: end of this fragment
+ *
+ * Returns the first possible fragment for the start and end address.
+ */
+static void amdgpu_vm_pte_fragment(struct amdgpu_vm_update_params *params,
+ uint64_t start, uint64_t end, uint64_t flags,
+ unsigned int *frag, uint64_t *frag_end)
+{
+ /**
+ * The MC L1 TLB supports variable sized pages, based on a fragment
+ * field in the PTE. When this field is set to a non-zero value, page
+ * granularity is increased from 4KB to (1 << (12 + frag)). The PTE
+ * flags are considered valid for all PTEs within the fragment range
+ * and corresponding mappings are assumed to be physically contiguous.
+ *
+ * The L1 TLB can store a single PTE for the whole fragment,
+ * significantly increasing the space available for translation
+ * caching. This leads to large improvements in throughput when the
+ * TLB is under pressure.
+ *
+ * The L2 TLB distributes small and large fragments into two
+ * asymmetric partitions. The large fragment cache is significantly
+ * larger. Thus, we try to use large fragments wherever possible.
+ * Userspace can support this by aligning virtual base address and
+ * allocation size to the fragment size.
+ *
+ * Starting with Vega10 the fragment size only controls the L1. The L2
+ * is now directly feed with small/huge/giant pages from the walker.
+ */
+ unsigned int max_frag;
+
+ if (params->adev->asic_type < CHIP_VEGA10)
+ max_frag = params->adev->vm_manager.fragment_size;
+ else
+ max_frag = 31;
+
+ /* system pages are non continuously */
+ if (params->pages_addr) {
+ *frag = 0;
+ *frag_end = end;
+ return;
+ }
+
+ /* This intentionally wraps around if no bit is set */
+ *frag = min_t(unsigned int, ffs(start) - 1, fls64(end - start) - 1);
+ if (*frag >= max_frag) {
+ *frag = max_frag;
+ *frag_end = end & ~((1ULL << max_frag) - 1);
+ } else {
+ *frag_end = start + (1 << *frag);
+ }
+}
+
+/**
+ * amdgpu_vm_ptes_update - make sure that page tables are valid
+ *
+ * @params: see amdgpu_vm_update_params definition
+ * @start: start of GPU address range
+ * @end: end of GPU address range
+ * @dst: destination address to map to, the next dst inside the function
+ * @flags: mapping flags
+ *
+ * Update the page tables in the range @start - @end.
+ *
+ * Returns:
+ * 0 for success, -EINVAL for failure.
+ */
+int amdgpu_vm_ptes_update(struct amdgpu_vm_update_params *params,
+ uint64_t start, uint64_t end,
+ uint64_t dst, uint64_t flags)
+{
+ struct amdgpu_device *adev = params->adev;
+ struct amdgpu_vm_pt_cursor cursor;
+ uint64_t frag_start = start, frag_end;
+ unsigned int frag;
+ int r;
+
+ /* figure out the initial fragment */
+ amdgpu_vm_pte_fragment(params, frag_start, end, flags, &frag,
+ &frag_end);
+
+ /* walk over the address space and update the PTs */
+ amdgpu_vm_pt_start(adev, params->vm, start, &cursor);
+ while (cursor.pfn < end) {
+ unsigned int shift, parent_shift, mask;
+ uint64_t incr, entry_end, pe_start;
+ struct amdgpu_bo *pt;
+
+ if (!params->unlocked) {
+ /* make sure that the page tables covering the
+ * address range are actually allocated
+ */
+ r = amdgpu_vm_pt_alloc(params->adev, params->vm,
+ &cursor, params->immediate);
+ if (r)
+ return r;
+ }
+
+ shift = amdgpu_vm_pt_level_shift(adev, cursor.level);
+ parent_shift = amdgpu_vm_pt_level_shift(adev, cursor.level - 1);
+ if (params->unlocked) {
+ /* Unlocked updates are only allowed on the leaves */
+ if (amdgpu_vm_pt_descendant(adev, &cursor))
+ continue;
+ } else if (adev->asic_type < CHIP_VEGA10 &&
+ (flags & AMDGPU_PTE_VALID)) {
+ /* No huge page support before GMC v9 */
+ if (cursor.level != AMDGPU_VM_PTB) {
+ if (!amdgpu_vm_pt_descendant(adev, &cursor))
+ return -ENOENT;
+ continue;
+ }
+ } else if (frag < shift) {
+ /* We can't use this level when the fragment size is
+ * smaller than the address shift. Go to the next
+ * child entry and try again.
+ */
+ if (amdgpu_vm_pt_descendant(adev, &cursor))
+ continue;
+ } else if (frag >= parent_shift) {
+ /* If the fragment size is even larger than the parent
+ * shift we should go up one level and check it again.
+ */
+ if (!amdgpu_vm_pt_ancestor(&cursor))
+ return -EINVAL;
+ continue;
+ }
+
+ pt = cursor.entry->bo;
+ if (!pt) {
+ /* We need all PDs and PTs for mapping something, */
+ if (flags & AMDGPU_PTE_VALID)
+ return -ENOENT;
+
+ /* but unmapping something can happen at a higher
+ * level.
+ */
+ if (!amdgpu_vm_pt_ancestor(&cursor))
+ return -EINVAL;
+
+ pt = cursor.entry->bo;
+ shift = parent_shift;
+ frag_end = max(frag_end, ALIGN(frag_start + 1,
+ 1ULL << shift));
+ }
+
+ /* Looks good so far, calculate parameters for the update */
+ incr = (uint64_t)AMDGPU_GPU_PAGE_SIZE << shift;
+ mask = amdgpu_vm_pt_entries_mask(adev, cursor.level);
+ pe_start = ((cursor.pfn >> shift) & mask) * 8;
+
+ if (cursor.level < AMDGPU_VM_PTB && params->unlocked)
+ /*
+ * MMU notifier callback unlocked unmap huge page, leave is PDE entry,
+ * only clear one entry. Next entry search again for PDE or PTE leave.
+ */
+ entry_end = 1ULL << shift;
+ else
+ entry_end = ((uint64_t)mask + 1) << shift;
+ entry_end += cursor.pfn & ~(entry_end - 1);
+ entry_end = min(entry_end, end);
+
+ do {
+ struct amdgpu_vm *vm = params->vm;
+ uint64_t upd_end = min(entry_end, frag_end);
+ unsigned int nptes = (upd_end - frag_start) >> shift;
+ uint64_t upd_flags = flags | AMDGPU_PTE_FRAG(frag);
+
+ /* This can happen when we set higher level PDs to
+ * silent to stop fault floods.
+ */
+ nptes = max(nptes, 1u);
+
+ trace_amdgpu_vm_update_ptes(params, frag_start, upd_end,
+ min(nptes, 32u), dst, incr,
+ upd_flags,
+ vm->task_info ? vm->task_info->tgid : 0,
+ vm->immediate.fence_context);
+ amdgpu_vm_pte_update_flags(params, to_amdgpu_bo_vm(pt),
+ cursor.level, pe_start, dst,
+ nptes, incr, upd_flags);
+
+ pe_start += nptes * 8;
+ dst += nptes * incr;
+
+ frag_start = upd_end;
+ if (frag_start >= frag_end) {
+ /* figure out the next fragment */
+ amdgpu_vm_pte_fragment(params, frag_start, end,
+ flags, &frag, &frag_end);
+ if (frag < shift)
+ break;
+ }
+ } while (frag_start < entry_end);
+
+ if (amdgpu_vm_pt_descendant(adev, &cursor)) {
+ /* Free all child entries.
+ * Update the tables with the flags and addresses and free up subsequent
+ * tables in the case of huge pages or freed up areas.
+ * This is the maximum you can free, because all other page tables are not
+ * completely covered by the range and so potentially still in use.
+ */
+ while (cursor.pfn < frag_start) {
+ /* Make sure previous mapping is freed */
+ if (cursor.entry->bo) {
+ params->needs_flush = true;
+ amdgpu_vm_pt_add_list(params, &cursor);
+ }
+ amdgpu_vm_pt_next(adev, &cursor);
+ }
+
+ } else if (frag >= shift) {
+ /* or just move on to the next on the same level. */
+ amdgpu_vm_pt_next(adev, &cursor);
+ }
+ }
+
+ return 0;
+}
+
+/**
+ * amdgpu_vm_pt_map_tables - have bo of root PD cpu accessible
+ * @adev: amdgpu device structure
+ * @vm: amdgpu vm structure
+ *
+ * make root page directory and everything below it cpu accessible.
+ */
+int amdgpu_vm_pt_map_tables(struct amdgpu_device *adev, struct amdgpu_vm *vm)
+{
+ struct amdgpu_vm_pt_cursor cursor;
+ struct amdgpu_vm_bo_base *entry;
+
+ for_each_amdgpu_vm_pt_dfs_safe(adev, vm, NULL, cursor, entry) {
+
+ struct amdgpu_bo_vm *bo;
+ int r;
+
+ if (entry->bo) {
+ bo = to_amdgpu_bo_vm(entry->bo);
+ r = vm->update_funcs->map_table(bo);
+ if (r)
+ return r;
+ }
+ }
+
+ return 0;
+}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_sdma.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_sdma.c
index a83a646759c5..36805dcfa159 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_sdma.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_sdma.c
@@ -33,49 +33,65 @@
*
* @table: newly allocated or validated PD/PT
*/
-static int amdgpu_vm_sdma_map_table(struct amdgpu_bo *table)
+static int amdgpu_vm_sdma_map_table(struct amdgpu_bo_vm *table)
{
+ return amdgpu_ttm_alloc_gart(&table->bo.tbo);
+}
+
+/* Allocate a new job for @count PTE updates */
+static int amdgpu_vm_sdma_alloc_job(struct amdgpu_vm_update_params *p,
+ unsigned int count, u64 k_job_id)
+{
+ enum amdgpu_ib_pool_type pool = p->immediate ? AMDGPU_IB_POOL_IMMEDIATE
+ : AMDGPU_IB_POOL_DELAYED;
+ struct drm_sched_entity *entity = p->immediate ? &p->vm->immediate
+ : &p->vm->delayed;
+ unsigned int ndw;
int r;
- r = amdgpu_ttm_alloc_gart(&table->tbo);
+ /* estimate how many dw we need */
+ ndw = AMDGPU_VM_SDMA_MIN_NUM_DW;
+ if (p->pages_addr)
+ ndw += count * 2;
+ ndw = min(ndw, AMDGPU_VM_SDMA_MAX_NUM_DW);
+
+ r = amdgpu_job_alloc_with_ib(p->adev, entity, AMDGPU_FENCE_OWNER_VM,
+ ndw * 4, pool, &p->job, k_job_id);
if (r)
return r;
- if (table->shadow)
- r = amdgpu_ttm_alloc_gart(&table->shadow->tbo);
-
- return r;
+ p->num_dw_left = ndw;
+ return 0;
}
/**
* amdgpu_vm_sdma_prepare - prepare SDMA command submission
*
* @p: see amdgpu_vm_update_params definition
- * @resv: reservation object with embedded fence
- * @sync_mode: synchronization mode
+ * @sync: amdgpu_sync object with fences to wait for
+ * @k_job_id: identifier of the job, for tracing purpose
*
* Returns:
* Negativ errno, 0 for success.
*/
static int amdgpu_vm_sdma_prepare(struct amdgpu_vm_update_params *p,
- struct dma_resv *resv,
- enum amdgpu_sync_mode sync_mode)
+ struct amdgpu_sync *sync, u64 k_job_id)
{
- enum amdgpu_ib_pool_type pool = p->immediate ? AMDGPU_IB_POOL_IMMEDIATE
- : AMDGPU_IB_POOL_DELAYED;
- unsigned int ndw = AMDGPU_VM_SDMA_MIN_NUM_DW;
int r;
- r = amdgpu_job_alloc_with_ib(p->adev, ndw * 4, pool, &p->job);
+ r = amdgpu_vm_sdma_alloc_job(p, 0, k_job_id);
if (r)
return r;
- p->num_dw_left = ndw;
-
- if (!resv)
+ if (!sync)
return 0;
- return amdgpu_sync_resv(p->adev, &p->job->sync, resv, sync_mode, p->vm);
+ r = amdgpu_sync_push_to_job(sync, p->job);
+ if (r) {
+ p->num_dw_left = 0;
+ amdgpu_job_free(p->job);
+ }
+ return r;
}
/**
@@ -91,38 +107,42 @@ static int amdgpu_vm_sdma_commit(struct amdgpu_vm_update_params *p,
struct dma_fence **fence)
{
struct amdgpu_ib *ib = p->job->ibs;
- struct drm_sched_entity *entity;
struct amdgpu_ring *ring;
struct dma_fence *f;
- int r;
- entity = p->immediate ? &p->vm->immediate : &p->vm->delayed;
- ring = container_of(entity->rq->sched, struct amdgpu_ring, sched);
+ ring = container_of(p->vm->delayed.rq->sched, struct amdgpu_ring,
+ sched);
WARN_ON(ib->length_dw == 0);
amdgpu_ring_pad_ib(ring, ib);
+
+ if (p->needs_flush)
+ atomic64_inc(&p->vm->tlb_seq);
+
WARN_ON(ib->length_dw > p->num_dw_left);
- r = amdgpu_job_submit(p->job, entity, AMDGPU_FENCE_OWNER_VM, &f);
- if (r)
- goto error;
+ f = amdgpu_job_submit(p->job);
if (p->unlocked) {
struct dma_fence *tmp = dma_fence_get(f);
- swap(p->vm->last_unlocked, f);
+ swap(p->vm->last_unlocked, tmp);
dma_fence_put(tmp);
} else {
- amdgpu_bo_fence(p->vm->root.base.bo, f, true);
+ dma_resv_add_fence(p->vm->root.bo->tbo.base.resv, f,
+ DMA_RESV_USAGE_BOOKKEEP);
}
- if (fence && !p->immediate)
+ if (fence && !p->immediate) {
+ /*
+ * Most hw generations now have a separate queue for page table
+ * updates, but when the queue is shared with userspace we need
+ * the extra CPU round trip to correctly flush the TLB.
+ */
+ set_bit(DRM_SCHED_FENCE_DONT_PIPELINE, &f->flags);
swap(*fence, f);
+ }
dma_fence_put(f);
return 0;
-
-error:
- amdgpu_job_free(p->job);
- return r;
}
/**
@@ -144,7 +164,7 @@ static void amdgpu_vm_sdma_copy_ptes(struct amdgpu_vm_update_params *p,
src += p->num_dw_left * 4;
- pe += amdgpu_gmc_sign_extend(amdgpu_bo_gpu_offset_no_check(bo));
+ pe += amdgpu_bo_gpu_offset_no_check(bo);
trace_amdgpu_vm_copy_ptes(pe, src, count, p->immediate);
amdgpu_vm_copy_pte(p->adev, ib, pe, src, count);
@@ -171,7 +191,7 @@ static void amdgpu_vm_sdma_set_ptes(struct amdgpu_vm_update_params *p,
{
struct amdgpu_ib *ib = p->job->ibs;
- pe += amdgpu_gmc_sign_extend(amdgpu_bo_gpu_offset_no_check(bo));
+ pe += amdgpu_bo_gpu_offset_no_check(bo);
trace_amdgpu_vm_set_ptes(pe, addr, count, incr, flags, p->immediate);
if (count < 3) {
amdgpu_vm_write_pte(p->adev, ib, pe, addr | flags,
@@ -186,7 +206,7 @@ static void amdgpu_vm_sdma_set_ptes(struct amdgpu_vm_update_params *p,
* amdgpu_vm_sdma_update - execute VM update
*
* @p: see amdgpu_vm_update_params definition
- * @bo: PD/PT to update
+ * @vmbo: PD/PT to update
* @pe: byte offset of the PDE/PTE, relative to start of PDB/PTB
* @addr: dst addr to write into pe
* @count: number of page entries to update
@@ -197,20 +217,29 @@ static void amdgpu_vm_sdma_set_ptes(struct amdgpu_vm_update_params *p,
* the IB.
*/
static int amdgpu_vm_sdma_update(struct amdgpu_vm_update_params *p,
- struct amdgpu_bo *bo, uint64_t pe,
+ struct amdgpu_bo_vm *vmbo, uint64_t pe,
uint64_t addr, unsigned count, uint32_t incr,
uint64_t flags)
{
- enum amdgpu_ib_pool_type pool = p->immediate ? AMDGPU_IB_POOL_IMMEDIATE
- : AMDGPU_IB_POOL_DELAYED;
+ struct amdgpu_bo *bo = &vmbo->bo;
+ struct dma_resv_iter cursor;
unsigned int i, ndw, nptes;
+ struct dma_fence *fence;
uint64_t *pte;
int r;
/* Wait for PD/PT moves to be completed */
- r = amdgpu_sync_fence(&p->job->sync, bo->tbo.moving);
- if (r)
- return r;
+ dma_resv_iter_begin(&cursor, bo->tbo.base.resv, DMA_RESV_USAGE_KERNEL);
+ dma_resv_for_each_fence_unlocked(&cursor, fence) {
+ dma_fence_get(fence);
+ r = drm_sched_job_add_dependency(&p->job->base, fence);
+ if (r) {
+ dma_fence_put(fence);
+ dma_resv_iter_end(&cursor);
+ return r;
+ }
+ }
+ dma_resv_iter_end(&cursor);
do {
ndw = p->num_dw_left;
@@ -221,34 +250,21 @@ static int amdgpu_vm_sdma_update(struct amdgpu_vm_update_params *p,
if (r)
return r;
- /* estimate how many dw we need */
- ndw = 32;
- if (p->pages_addr)
- ndw += count * 2;
- ndw = max(ndw, AMDGPU_VM_SDMA_MIN_NUM_DW);
- ndw = min(ndw, AMDGPU_VM_SDMA_MAX_NUM_DW);
-
- r = amdgpu_job_alloc_with_ib(p->adev, ndw * 4, pool,
- &p->job);
+ r = amdgpu_vm_sdma_alloc_job(p, count,
+ AMDGPU_KERNEL_JOB_ID_VM_UPDATE);
if (r)
return r;
-
- p->num_dw_left = ndw;
}
if (!p->pages_addr) {
/* set page commands needed */
- if (bo->shadow)
- amdgpu_vm_sdma_set_ptes(p, bo->shadow, pe, addr,
- count, incr, flags);
amdgpu_vm_sdma_set_ptes(p, bo, pe, addr, count,
incr, flags);
return 0;
}
/* copy commands needed */
- ndw -= p->adev->vm_manager.vm_pte_funcs->copy_pte_num_dw *
- (bo->shadow ? 2 : 1);
+ ndw -= p->adev->vm_manager.vm_pte_funcs->copy_pte_num_dw;
/* for padding */
ndw -= 7;
@@ -263,8 +279,6 @@ static int amdgpu_vm_sdma_update(struct amdgpu_vm_update_params *p,
pte[i] |= flags;
}
- if (bo->shadow)
- amdgpu_vm_sdma_copy_ptes(p, bo->shadow, pe, nptes);
amdgpu_vm_sdma_copy_ptes(p, bo, pe, nptes);
pe += nptes * 8;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_tlb_fence.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_tlb_fence.c
new file mode 100644
index 000000000000..5d26797356a3
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_tlb_fence.c
@@ -0,0 +1,111 @@
+// SPDX-License-Identifier: GPL-2.0 OR MIT
+/*
+ * Copyright 2023 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include <linux/dma-fence.h>
+#include <linux/workqueue.h>
+
+#include "amdgpu.h"
+#include "amdgpu_vm.h"
+#include "amdgpu_gmc.h"
+
+struct amdgpu_tlb_fence {
+ struct dma_fence base;
+ struct amdgpu_device *adev;
+ struct dma_fence *dependency;
+ struct work_struct work;
+ spinlock_t lock;
+ uint16_t pasid;
+
+};
+
+static const char *amdgpu_tlb_fence_get_driver_name(struct dma_fence *fence)
+{
+ return "amdgpu tlb fence";
+}
+
+static const char *amdgpu_tlb_fence_get_timeline_name(struct dma_fence *f)
+{
+ return "amdgpu tlb timeline";
+}
+
+static void amdgpu_tlb_fence_work(struct work_struct *work)
+{
+ struct amdgpu_tlb_fence *f = container_of(work, typeof(*f), work);
+ int r;
+
+ if (f->dependency) {
+ dma_fence_wait(f->dependency, false);
+ dma_fence_put(f->dependency);
+ f->dependency = NULL;
+ }
+
+ r = amdgpu_gmc_flush_gpu_tlb_pasid(f->adev, f->pasid, 2, true, 0);
+ if (r) {
+ dev_err(f->adev->dev, "TLB flush failed for PASID %d.\n",
+ f->pasid);
+ dma_fence_set_error(&f->base, r);
+ }
+
+ dma_fence_signal(&f->base);
+ dma_fence_put(&f->base);
+}
+
+static const struct dma_fence_ops amdgpu_tlb_fence_ops = {
+ .get_driver_name = amdgpu_tlb_fence_get_driver_name,
+ .get_timeline_name = amdgpu_tlb_fence_get_timeline_name
+};
+
+void amdgpu_vm_tlb_fence_create(struct amdgpu_device *adev, struct amdgpu_vm *vm,
+ struct dma_fence **fence)
+{
+ struct amdgpu_tlb_fence *f;
+
+ f = kmalloc(sizeof(*f), GFP_KERNEL);
+ if (!f) {
+ /*
+ * We can't fail since the PDEs and PTEs are already updated, so
+ * just block for the dependency and execute the TLB flush
+ */
+ if (*fence)
+ dma_fence_wait(*fence, false);
+
+ amdgpu_gmc_flush_gpu_tlb_pasid(adev, vm->pasid, 2, true, 0);
+ *fence = dma_fence_get_stub();
+ return;
+ }
+
+ f->adev = adev;
+ f->dependency = *fence;
+ f->pasid = vm->pasid;
+ INIT_WORK(&f->work, amdgpu_tlb_fence_work);
+ spin_lock_init(&f->lock);
+
+ dma_fence_init64(&f->base, &amdgpu_tlb_fence_ops, &f->lock,
+ vm->tlb_fence_context, atomic64_read(&vm->tlb_seq));
+
+ /* TODO: We probably need a separate wq here */
+ dma_fence_get(&f->base);
+ schedule_work(&f->work);
+
+ *fence = &f->base;
+}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vpe.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vpe.c
new file mode 100644
index 000000000000..aa78c2ee9e21
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vpe.c
@@ -0,0 +1,1018 @@
+/*
+ * Copyright 2022 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include <linux/firmware.h>
+#include <drm/drm_drv.h>
+
+#include "amdgpu.h"
+#include "amdgpu_ucode.h"
+#include "amdgpu_vpe.h"
+#include "amdgpu_smu.h"
+#include "soc15_common.h"
+#include "vpe_v6_1.h"
+
+#define AMDGPU_CSA_VPE_SIZE 64
+/* VPE CSA resides in the 4th page of CSA */
+#define AMDGPU_CSA_VPE_OFFSET (4096 * 3)
+
+/* 1 second timeout */
+#define VPE_IDLE_TIMEOUT msecs_to_jiffies(1000)
+
+#define VPE_MAX_DPM_LEVEL 4
+#define FIXED1_8_BITS_PER_FRACTIONAL_PART 8
+#define GET_PRATIO_INTEGER_PART(x) ((x) >> FIXED1_8_BITS_PER_FRACTIONAL_PART)
+
+static void vpe_set_ring_funcs(struct amdgpu_device *adev);
+
+static inline uint16_t div16_u16_rem(uint16_t dividend, uint16_t divisor, uint16_t *remainder)
+{
+ *remainder = dividend % divisor;
+ return dividend / divisor;
+}
+
+static inline uint16_t complete_integer_division_u16(
+ uint16_t dividend,
+ uint16_t divisor,
+ uint16_t *remainder)
+{
+ return div16_u16_rem(dividend, divisor, (uint16_t *)remainder);
+}
+
+static uint16_t vpe_u1_8_from_fraction(uint16_t numerator, uint16_t denominator)
+{
+ u16 arg1_value = numerator;
+ u16 arg2_value = denominator;
+
+ uint16_t remainder;
+
+ /* determine integer part */
+ uint16_t res_value = complete_integer_division_u16(
+ arg1_value, arg2_value, &remainder);
+
+ if (res_value > 127 /* CHAR_MAX */)
+ return 0;
+
+ /* determine fractional part */
+ {
+ unsigned int i = FIXED1_8_BITS_PER_FRACTIONAL_PART;
+
+ do {
+ remainder <<= 1;
+
+ res_value <<= 1;
+
+ if (remainder >= arg2_value) {
+ res_value |= 1;
+ remainder -= arg2_value;
+ }
+ } while (--i != 0);
+ }
+
+ /* round up LSB */
+ {
+ uint16_t summand = (remainder << 1) >= arg2_value;
+
+ if ((res_value + summand) > 32767 /* SHRT_MAX */)
+ return 0;
+
+ res_value += summand;
+ }
+
+ return res_value;
+}
+
+static uint16_t vpe_internal_get_pratio(uint16_t from_frequency, uint16_t to_frequency)
+{
+ uint16_t pratio = vpe_u1_8_from_fraction(from_frequency, to_frequency);
+
+ if (GET_PRATIO_INTEGER_PART(pratio) > 1)
+ pratio = 0;
+
+ return pratio;
+}
+
+/*
+ * VPE has 4 DPM levels from level 0 (lowerest) to 3 (highest),
+ * VPE FW will dynamically decide which level should be used according to current loading.
+ *
+ * Get VPE and SOC clocks from PM, and select the appropriate four clock values,
+ * calculate the ratios of adjusting from one clock to another.
+ * The VPE FW can then request the appropriate frequency from the PMFW.
+ */
+int amdgpu_vpe_configure_dpm(struct amdgpu_vpe *vpe)
+{
+ struct amdgpu_device *adev = vpe->ring.adev;
+ uint32_t dpm_ctl;
+
+ if (adev->pm.dpm_enabled) {
+ struct dpm_clocks clock_table = { 0 };
+ struct dpm_clock *VPEClks;
+ struct dpm_clock *SOCClks;
+ uint32_t idx;
+ uint32_t vpeclk_enalbled_num = 0;
+ uint32_t pratio_vmax_vnorm = 0, pratio_vnorm_vmid = 0, pratio_vmid_vmin = 0;
+ uint16_t pratio_vmin_freq = 0, pratio_vmid_freq = 0, pratio_vnorm_freq = 0, pratio_vmax_freq = 0;
+
+ dpm_ctl = RREG32(vpe_get_reg_offset(vpe, 0, vpe->regs.dpm_enable));
+ dpm_ctl |= 1; /* DPM enablement */
+ WREG32(vpe_get_reg_offset(vpe, 0, vpe->regs.dpm_enable), dpm_ctl);
+
+ /* Get VPECLK and SOCCLK */
+ if (amdgpu_dpm_get_dpm_clock_table(adev, &clock_table)) {
+ dev_dbg(adev->dev, "%s: get clock failed!\n", __func__);
+ goto disable_dpm;
+ }
+
+ SOCClks = clock_table.SocClocks;
+ VPEClks = clock_table.VPEClocks;
+
+ /* Comfirm enabled vpe clk num
+ * Enabled VPE clocks are ordered from low to high in VPEClks
+ * The highest valid clock index+1 is the number of VPEClks
+ */
+ for (idx = PP_SMU_NUM_VPECLK_DPM_LEVELS; idx && !vpeclk_enalbled_num; idx--)
+ if (VPEClks[idx-1].Freq)
+ vpeclk_enalbled_num = idx;
+
+ /* vpe dpm only cares 4 levels. */
+ for (idx = 0; idx < VPE_MAX_DPM_LEVEL; idx++) {
+ uint32_t soc_dpm_level;
+ uint32_t min_freq;
+
+ if (idx == 0)
+ soc_dpm_level = 0;
+ else
+ soc_dpm_level = (idx * 2) + 1;
+
+ /* clamp the max level */
+ if (soc_dpm_level > vpeclk_enalbled_num - 1)
+ soc_dpm_level = vpeclk_enalbled_num - 1;
+
+ min_freq = (SOCClks[soc_dpm_level].Freq < VPEClks[soc_dpm_level].Freq) ?
+ SOCClks[soc_dpm_level].Freq : VPEClks[soc_dpm_level].Freq;
+
+ switch (idx) {
+ case 0:
+ pratio_vmin_freq = min_freq;
+ break;
+ case 1:
+ pratio_vmid_freq = min_freq;
+ break;
+ case 2:
+ pratio_vnorm_freq = min_freq;
+ break;
+ case 3:
+ pratio_vmax_freq = min_freq;
+ break;
+ default:
+ break;
+ }
+ }
+
+ if (pratio_vmin_freq && pratio_vmid_freq && pratio_vnorm_freq && pratio_vmax_freq) {
+ uint32_t pratio_ctl;
+
+ pratio_vmax_vnorm = (uint32_t)vpe_internal_get_pratio(pratio_vmax_freq, pratio_vnorm_freq);
+ pratio_vnorm_vmid = (uint32_t)vpe_internal_get_pratio(pratio_vnorm_freq, pratio_vmid_freq);
+ pratio_vmid_vmin = (uint32_t)vpe_internal_get_pratio(pratio_vmid_freq, pratio_vmin_freq);
+
+ pratio_ctl = pratio_vmax_vnorm | (pratio_vnorm_vmid << 9) | (pratio_vmid_vmin << 18);
+ WREG32(vpe_get_reg_offset(vpe, 0, vpe->regs.dpm_pratio), pratio_ctl); /* PRatio */
+ WREG32(vpe_get_reg_offset(vpe, 0, vpe->regs.dpm_request_interval), 24000); /* 1ms, unit=1/24MHz */
+ WREG32(vpe_get_reg_offset(vpe, 0, vpe->regs.dpm_decision_threshold), 1200000); /* 50ms */
+ WREG32(vpe_get_reg_offset(vpe, 0, vpe->regs.dpm_busy_clamp_threshold), 1200000);/* 50ms */
+ WREG32(vpe_get_reg_offset(vpe, 0, vpe->regs.dpm_idle_clamp_threshold), 1200000);/* 50ms */
+ dev_dbg(adev->dev, "%s: configure vpe dpm pratio done!\n", __func__);
+ } else {
+ dev_dbg(adev->dev, "%s: invalid pratio parameters!\n", __func__);
+ goto disable_dpm;
+ }
+ }
+ return 0;
+
+disable_dpm:
+ dpm_ctl = RREG32(vpe_get_reg_offset(vpe, 0, vpe->regs.dpm_enable));
+ dpm_ctl &= 0xfffffffe; /* Disable DPM */
+ WREG32(vpe_get_reg_offset(vpe, 0, vpe->regs.dpm_enable), dpm_ctl);
+ dev_dbg(adev->dev, "%s: disable vpe dpm\n", __func__);
+ return -EINVAL;
+}
+
+int amdgpu_vpe_psp_update_sram(struct amdgpu_device *adev)
+{
+ struct amdgpu_firmware_info ucode = {
+ .ucode_id = AMDGPU_UCODE_ID_VPE,
+ .mc_addr = adev->vpe.cmdbuf_gpu_addr,
+ .ucode_size = 8,
+ };
+
+ return psp_execute_ip_fw_load(&adev->psp, &ucode);
+}
+
+int amdgpu_vpe_init_microcode(struct amdgpu_vpe *vpe)
+{
+ struct amdgpu_device *adev = vpe->ring.adev;
+ const struct vpe_firmware_header_v1_0 *vpe_hdr;
+ char fw_prefix[32];
+ int ret;
+
+ amdgpu_ucode_ip_version_decode(adev, VPE_HWIP, fw_prefix, sizeof(fw_prefix));
+ ret = amdgpu_ucode_request(adev, &adev->vpe.fw, AMDGPU_UCODE_REQUIRED,
+ "amdgpu/%s.bin", fw_prefix);
+ if (ret)
+ goto out;
+
+ vpe_hdr = (const struct vpe_firmware_header_v1_0 *)adev->vpe.fw->data;
+ adev->vpe.fw_version = le32_to_cpu(vpe_hdr->header.ucode_version);
+ adev->vpe.feature_version = le32_to_cpu(vpe_hdr->ucode_feature_version);
+
+ if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
+ struct amdgpu_firmware_info *info;
+
+ info = &adev->firmware.ucode[AMDGPU_UCODE_ID_VPE_CTX];
+ info->ucode_id = AMDGPU_UCODE_ID_VPE_CTX;
+ info->fw = adev->vpe.fw;
+ adev->firmware.fw_size +=
+ ALIGN(le32_to_cpu(vpe_hdr->ctx_ucode_size_bytes), PAGE_SIZE);
+
+ info = &adev->firmware.ucode[AMDGPU_UCODE_ID_VPE_CTL];
+ info->ucode_id = AMDGPU_UCODE_ID_VPE_CTL;
+ info->fw = adev->vpe.fw;
+ adev->firmware.fw_size +=
+ ALIGN(le32_to_cpu(vpe_hdr->ctl_ucode_size_bytes), PAGE_SIZE);
+ }
+
+ return 0;
+out:
+ dev_err(adev->dev, "fail to initialize vpe microcode\n");
+ release_firmware(adev->vpe.fw);
+ adev->vpe.fw = NULL;
+ return ret;
+}
+
+int amdgpu_vpe_ring_init(struct amdgpu_vpe *vpe)
+{
+ struct amdgpu_device *adev = container_of(vpe, struct amdgpu_device, vpe);
+ struct amdgpu_ring *ring = &vpe->ring;
+ int ret;
+
+ ring->ring_obj = NULL;
+ ring->use_doorbell = true;
+ ring->vm_hub = AMDGPU_MMHUB0(0);
+ ring->doorbell_index = (adev->doorbell_index.vpe_ring << 1);
+ snprintf(ring->name, 4, "vpe");
+
+ ret = amdgpu_ring_init(adev, ring, 1024, &vpe->trap_irq, 0,
+ AMDGPU_RING_PRIO_DEFAULT, NULL);
+ if (ret)
+ return ret;
+
+ return 0;
+}
+
+int amdgpu_vpe_ring_fini(struct amdgpu_vpe *vpe)
+{
+ amdgpu_ring_fini(&vpe->ring);
+
+ return 0;
+}
+
+static int vpe_early_init(struct amdgpu_ip_block *ip_block)
+{
+ struct amdgpu_device *adev = ip_block->adev;
+ struct amdgpu_vpe *vpe = &adev->vpe;
+
+ switch (amdgpu_ip_version(adev, VPE_HWIP, 0)) {
+ case IP_VERSION(6, 1, 0):
+ case IP_VERSION(6, 1, 3):
+ vpe_v6_1_set_funcs(vpe);
+ break;
+ case IP_VERSION(6, 1, 1):
+ vpe_v6_1_set_funcs(vpe);
+ vpe->collaborate_mode = true;
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ vpe_set_ring_funcs(adev);
+ vpe_set_regs(vpe);
+
+ dev_info(adev->dev, "VPE: collaborate mode %s", vpe->collaborate_mode ? "true" : "false");
+
+ return 0;
+}
+
+static bool vpe_need_dpm0_at_power_down(struct amdgpu_device *adev)
+{
+ switch (amdgpu_ip_version(adev, VPE_HWIP, 0)) {
+ case IP_VERSION(6, 1, 1):
+ return adev->pm.fw_version < 0x0a640500;
+ default:
+ return false;
+ }
+}
+
+static int vpe_get_dpm_level(struct amdgpu_device *adev)
+{
+ struct amdgpu_vpe *vpe = &adev->vpe;
+
+ if (!adev->pm.dpm_enabled)
+ return 0;
+
+ return RREG32(vpe_get_reg_offset(vpe, 0, vpe->regs.dpm_request_lv));
+}
+
+static void vpe_idle_work_handler(struct work_struct *work)
+{
+ struct amdgpu_device *adev =
+ container_of(work, struct amdgpu_device, vpe.idle_work.work);
+ unsigned int fences = 0;
+
+ fences += amdgpu_fence_count_emitted(&adev->vpe.ring);
+ if (fences)
+ goto reschedule;
+
+ if (vpe_need_dpm0_at_power_down(adev) && vpe_get_dpm_level(adev) != 0)
+ goto reschedule;
+
+ amdgpu_device_ip_set_powergating_state(adev, AMD_IP_BLOCK_TYPE_VPE, AMD_PG_STATE_GATE);
+ return;
+
+reschedule:
+ schedule_delayed_work(&adev->vpe.idle_work, VPE_IDLE_TIMEOUT);
+}
+
+static int vpe_common_init(struct amdgpu_vpe *vpe)
+{
+ struct amdgpu_device *adev = container_of(vpe, struct amdgpu_device, vpe);
+ int r;
+
+ r = amdgpu_bo_create_kernel(adev, PAGE_SIZE, PAGE_SIZE,
+ AMDGPU_GEM_DOMAIN_GTT,
+ &adev->vpe.cmdbuf_obj,
+ &adev->vpe.cmdbuf_gpu_addr,
+ (void **)&adev->vpe.cmdbuf_cpu_addr);
+ if (r) {
+ dev_err(adev->dev, "VPE: failed to allocate cmdbuf bo %d\n", r);
+ return r;
+ }
+
+ vpe->context_started = false;
+ INIT_DELAYED_WORK(&adev->vpe.idle_work, vpe_idle_work_handler);
+
+ return 0;
+}
+
+static int vpe_sw_init(struct amdgpu_ip_block *ip_block)
+{
+ struct amdgpu_device *adev = ip_block->adev;
+ struct amdgpu_vpe *vpe = &adev->vpe;
+ int ret;
+
+ ret = vpe_common_init(vpe);
+ if (ret)
+ goto out;
+
+ ret = vpe_irq_init(vpe);
+ if (ret)
+ goto out;
+
+ ret = vpe_ring_init(vpe);
+ if (ret)
+ goto out;
+
+ ret = vpe_init_microcode(vpe);
+ if (ret)
+ goto out;
+
+ adev->vpe.supported_reset =
+ amdgpu_get_soft_full_reset_mask(&adev->vpe.ring);
+ if (!amdgpu_sriov_vf(adev))
+ adev->vpe.supported_reset |= AMDGPU_RESET_TYPE_PER_QUEUE;
+ ret = amdgpu_vpe_sysfs_reset_mask_init(adev);
+ if (ret)
+ goto out;
+out:
+ return ret;
+}
+
+static int vpe_sw_fini(struct amdgpu_ip_block *ip_block)
+{
+ struct amdgpu_device *adev = ip_block->adev;
+ struct amdgpu_vpe *vpe = &adev->vpe;
+
+ release_firmware(vpe->fw);
+ vpe->fw = NULL;
+
+ amdgpu_vpe_sysfs_reset_mask_fini(adev);
+ vpe_ring_fini(vpe);
+
+ amdgpu_bo_free_kernel(&adev->vpe.cmdbuf_obj,
+ &adev->vpe.cmdbuf_gpu_addr,
+ (void **)&adev->vpe.cmdbuf_cpu_addr);
+
+ return 0;
+}
+
+static int vpe_hw_init(struct amdgpu_ip_block *ip_block)
+{
+ struct amdgpu_device *adev = ip_block->adev;
+ struct amdgpu_vpe *vpe = &adev->vpe;
+ int ret;
+
+ /* Power on VPE */
+ ret = amdgpu_device_ip_set_powergating_state(adev, AMD_IP_BLOCK_TYPE_VPE,
+ AMD_PG_STATE_UNGATE);
+ if (ret)
+ return ret;
+
+ ret = vpe_load_microcode(vpe);
+ if (ret)
+ return ret;
+
+ ret = vpe_ring_start(vpe);
+ if (ret)
+ return ret;
+
+ return 0;
+}
+
+static int vpe_hw_fini(struct amdgpu_ip_block *ip_block)
+{
+ struct amdgpu_device *adev = ip_block->adev;
+ struct amdgpu_vpe *vpe = &adev->vpe;
+
+ cancel_delayed_work_sync(&adev->vpe.idle_work);
+
+ vpe_ring_stop(vpe);
+
+ /* Power off VPE */
+ amdgpu_device_ip_set_powergating_state(adev, AMD_IP_BLOCK_TYPE_VPE, AMD_PG_STATE_GATE);
+
+ return 0;
+}
+
+static int vpe_suspend(struct amdgpu_ip_block *ip_block)
+{
+ return vpe_hw_fini(ip_block);
+}
+
+static int vpe_resume(struct amdgpu_ip_block *ip_block)
+{
+ return vpe_hw_init(ip_block);
+}
+
+static void vpe_ring_insert_nop(struct amdgpu_ring *ring, uint32_t count)
+{
+ int i;
+
+ for (i = 0; i < count; i++)
+ if (i == 0)
+ amdgpu_ring_write(ring, ring->funcs->nop |
+ VPE_CMD_NOP_HEADER_COUNT(count - 1));
+ else
+ amdgpu_ring_write(ring, ring->funcs->nop);
+}
+
+static uint64_t vpe_get_csa_mc_addr(struct amdgpu_ring *ring, uint32_t vmid)
+{
+ struct amdgpu_device *adev = ring->adev;
+ uint32_t index = 0;
+ uint64_t csa_mc_addr;
+
+ if (amdgpu_sriov_vf(adev) || vmid == 0 || !adev->gfx.mcbp)
+ return 0;
+
+ csa_mc_addr = amdgpu_csa_vaddr(adev) + AMDGPU_CSA_VPE_OFFSET +
+ index * AMDGPU_CSA_VPE_SIZE;
+
+ return csa_mc_addr;
+}
+
+static void vpe_ring_emit_pred_exec(struct amdgpu_ring *ring,
+ uint32_t device_select,
+ uint32_t exec_count)
+{
+ if (!ring->adev->vpe.collaborate_mode)
+ return;
+
+ amdgpu_ring_write(ring, VPE_CMD_HEADER(VPE_CMD_OPCODE_PRED_EXE, 0) |
+ (device_select << 16));
+ amdgpu_ring_write(ring, exec_count & 0x1fff);
+}
+
+static void vpe_ring_emit_ib(struct amdgpu_ring *ring,
+ struct amdgpu_job *job,
+ struct amdgpu_ib *ib,
+ uint32_t flags)
+{
+ uint32_t vmid = AMDGPU_JOB_GET_VMID(job);
+ uint64_t csa_mc_addr = vpe_get_csa_mc_addr(ring, vmid);
+
+ amdgpu_ring_write(ring, VPE_CMD_HEADER(VPE_CMD_OPCODE_INDIRECT, 0) |
+ VPE_CMD_INDIRECT_HEADER_VMID(vmid & 0xf));
+
+ /* base must be 32 byte aligned */
+ amdgpu_ring_write(ring, ib->gpu_addr & 0xffffffe0);
+ amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr));
+ amdgpu_ring_write(ring, ib->length_dw);
+ amdgpu_ring_write(ring, lower_32_bits(csa_mc_addr));
+ amdgpu_ring_write(ring, upper_32_bits(csa_mc_addr));
+}
+
+static void vpe_ring_emit_fence(struct amdgpu_ring *ring, uint64_t addr,
+ uint64_t seq, unsigned int flags)
+{
+ int i = 0;
+
+ do {
+ /* write the fence */
+ amdgpu_ring_write(ring, VPE_CMD_HEADER(VPE_CMD_OPCODE_FENCE, 0));
+ /* zero in first two bits */
+ WARN_ON_ONCE(addr & 0x3);
+ amdgpu_ring_write(ring, lower_32_bits(addr));
+ amdgpu_ring_write(ring, upper_32_bits(addr));
+ amdgpu_ring_write(ring, i == 0 ? lower_32_bits(seq) : upper_32_bits(seq));
+ addr += 4;
+ } while ((flags & AMDGPU_FENCE_FLAG_64BIT) && (i++ < 1));
+
+ if (flags & AMDGPU_FENCE_FLAG_INT) {
+ /* generate an interrupt */
+ amdgpu_ring_write(ring, VPE_CMD_HEADER(VPE_CMD_OPCODE_TRAP, 0));
+ amdgpu_ring_write(ring, 0);
+ }
+
+}
+
+static void vpe_ring_emit_pipeline_sync(struct amdgpu_ring *ring)
+{
+ uint32_t seq = ring->fence_drv.sync_seq;
+ uint64_t addr = ring->fence_drv.gpu_addr;
+
+ vpe_ring_emit_pred_exec(ring, 0, 6);
+
+ /* wait for idle */
+ amdgpu_ring_write(ring, VPE_CMD_HEADER(VPE_CMD_OPCODE_POLL_REGMEM,
+ VPE_POLL_REGMEM_SUBOP_REGMEM) |
+ VPE_CMD_POLL_REGMEM_HEADER_FUNC(3) | /* equal */
+ VPE_CMD_POLL_REGMEM_HEADER_MEM(1));
+ amdgpu_ring_write(ring, addr & 0xfffffffc);
+ amdgpu_ring_write(ring, upper_32_bits(addr));
+ amdgpu_ring_write(ring, seq); /* reference */
+ amdgpu_ring_write(ring, 0xffffffff); /* mask */
+ amdgpu_ring_write(ring, VPE_CMD_POLL_REGMEM_DW5_RETRY_COUNT(0xfff) |
+ VPE_CMD_POLL_REGMEM_DW5_INTERVAL(4));
+}
+
+static void vpe_ring_emit_wreg(struct amdgpu_ring *ring, uint32_t reg, uint32_t val)
+{
+ vpe_ring_emit_pred_exec(ring, 0, 3);
+
+ amdgpu_ring_write(ring, VPE_CMD_HEADER(VPE_CMD_OPCODE_REG_WRITE, 0));
+ amdgpu_ring_write(ring, reg << 2);
+ amdgpu_ring_write(ring, val);
+}
+
+static void vpe_ring_emit_reg_wait(struct amdgpu_ring *ring, uint32_t reg,
+ uint32_t val, uint32_t mask)
+{
+ vpe_ring_emit_pred_exec(ring, 0, 6);
+
+ amdgpu_ring_write(ring, VPE_CMD_HEADER(VPE_CMD_OPCODE_POLL_REGMEM,
+ VPE_POLL_REGMEM_SUBOP_REGMEM) |
+ VPE_CMD_POLL_REGMEM_HEADER_FUNC(3) | /* equal */
+ VPE_CMD_POLL_REGMEM_HEADER_MEM(0));
+ amdgpu_ring_write(ring, reg << 2);
+ amdgpu_ring_write(ring, 0);
+ amdgpu_ring_write(ring, val); /* reference */
+ amdgpu_ring_write(ring, mask); /* mask */
+ amdgpu_ring_write(ring, VPE_CMD_POLL_REGMEM_DW5_RETRY_COUNT(0xfff) |
+ VPE_CMD_POLL_REGMEM_DW5_INTERVAL(10));
+}
+
+static void vpe_ring_emit_vm_flush(struct amdgpu_ring *ring, unsigned int vmid,
+ uint64_t pd_addr)
+{
+ amdgpu_gmc_emit_flush_gpu_tlb(ring, vmid, pd_addr);
+}
+
+static unsigned int vpe_ring_init_cond_exec(struct amdgpu_ring *ring,
+ uint64_t addr)
+{
+ unsigned int ret;
+
+ amdgpu_ring_write(ring, VPE_CMD_HEADER(VPE_CMD_OPCODE_COND_EXE, 0));
+ amdgpu_ring_write(ring, lower_32_bits(addr));
+ amdgpu_ring_write(ring, upper_32_bits(addr));
+ amdgpu_ring_write(ring, 1);
+ ret = ring->wptr & ring->buf_mask;
+ amdgpu_ring_write(ring, 0);
+
+ return ret;
+}
+
+static int vpe_ring_preempt_ib(struct amdgpu_ring *ring)
+{
+ struct amdgpu_device *adev = ring->adev;
+ struct amdgpu_vpe *vpe = &adev->vpe;
+ uint32_t preempt_reg = vpe->regs.queue0_preempt;
+ int i, r = 0;
+
+ /* assert preemption condition */
+ amdgpu_ring_set_preempt_cond_exec(ring, false);
+
+ /* emit the trailing fence */
+ ring->trail_seq += 1;
+ amdgpu_ring_alloc(ring, 10);
+ vpe_ring_emit_fence(ring, ring->trail_fence_gpu_addr, ring->trail_seq, 0);
+ amdgpu_ring_commit(ring);
+
+ /* assert IB preemption */
+ WREG32(vpe_get_reg_offset(vpe, ring->me, preempt_reg), 1);
+
+ /* poll the trailing fence */
+ for (i = 0; i < adev->usec_timeout; i++) {
+ if (ring->trail_seq ==
+ le32_to_cpu(*(ring->trail_fence_cpu_addr)))
+ break;
+ udelay(1);
+ }
+
+ if (i >= adev->usec_timeout) {
+ r = -EINVAL;
+ dev_err(adev->dev, "ring %d failed to be preempted\n", ring->idx);
+ }
+
+ /* deassert IB preemption */
+ WREG32(vpe_get_reg_offset(vpe, ring->me, preempt_reg), 0);
+
+ /* deassert the preemption condition */
+ amdgpu_ring_set_preempt_cond_exec(ring, true);
+
+ return r;
+}
+
+static int vpe_set_clockgating_state(struct amdgpu_ip_block *ip_block,
+ enum amd_clockgating_state state)
+{
+ return 0;
+}
+
+static int vpe_set_powergating_state(struct amdgpu_ip_block *ip_block,
+ enum amd_powergating_state state)
+{
+ struct amdgpu_device *adev = ip_block->adev;
+ struct amdgpu_vpe *vpe = &adev->vpe;
+
+ if (!adev->pm.dpm_enabled)
+ dev_err(adev->dev, "Without PM, cannot support powergating\n");
+
+ dev_dbg(adev->dev, "%s: %s!\n", __func__, (state == AMD_PG_STATE_GATE) ? "GATE":"UNGATE");
+
+ if (state == AMD_PG_STATE_GATE) {
+ amdgpu_dpm_enable_vpe(adev, false);
+ vpe->context_started = false;
+ } else {
+ amdgpu_dpm_enable_vpe(adev, true);
+ }
+
+ return 0;
+}
+
+static uint64_t vpe_ring_get_rptr(struct amdgpu_ring *ring)
+{
+ struct amdgpu_device *adev = ring->adev;
+ struct amdgpu_vpe *vpe = &adev->vpe;
+ uint64_t rptr;
+
+ if (ring->use_doorbell) {
+ rptr = atomic64_read((atomic64_t *)ring->rptr_cpu_addr);
+ dev_dbg(adev->dev, "rptr/doorbell before shift == 0x%016llx\n", rptr);
+ } else {
+ rptr = RREG32(vpe_get_reg_offset(vpe, ring->me, vpe->regs.queue0_rb_rptr_hi));
+ rptr = rptr << 32;
+ rptr |= RREG32(vpe_get_reg_offset(vpe, ring->me, vpe->regs.queue0_rb_rptr_lo));
+ dev_dbg(adev->dev, "rptr before shift [%i] == 0x%016llx\n", ring->me, rptr);
+ }
+
+ return (rptr >> 2);
+}
+
+static uint64_t vpe_ring_get_wptr(struct amdgpu_ring *ring)
+{
+ struct amdgpu_device *adev = ring->adev;
+ struct amdgpu_vpe *vpe = &adev->vpe;
+ uint64_t wptr;
+
+ if (ring->use_doorbell) {
+ wptr = atomic64_read((atomic64_t *)ring->wptr_cpu_addr);
+ dev_dbg(adev->dev, "wptr/doorbell before shift == 0x%016llx\n", wptr);
+ } else {
+ wptr = RREG32(vpe_get_reg_offset(vpe, ring->me, vpe->regs.queue0_rb_wptr_hi));
+ wptr = wptr << 32;
+ wptr |= RREG32(vpe_get_reg_offset(vpe, ring->me, vpe->regs.queue0_rb_wptr_lo));
+ dev_dbg(adev->dev, "wptr before shift [%i] == 0x%016llx\n", ring->me, wptr);
+ }
+
+ return (wptr >> 2);
+}
+
+static void vpe_ring_set_wptr(struct amdgpu_ring *ring)
+{
+ struct amdgpu_device *adev = ring->adev;
+ struct amdgpu_vpe *vpe = &adev->vpe;
+
+ if (ring->use_doorbell) {
+ dev_dbg(adev->dev, "Using doorbell, \
+ wptr_offs == 0x%08x, \
+ lower_32_bits(ring->wptr) << 2 == 0x%08x, \
+ upper_32_bits(ring->wptr) << 2 == 0x%08x\n",
+ ring->wptr_offs,
+ lower_32_bits(ring->wptr << 2),
+ upper_32_bits(ring->wptr << 2));
+ atomic64_set((atomic64_t *)ring->wptr_cpu_addr, ring->wptr << 2);
+ WDOORBELL64(ring->doorbell_index, ring->wptr << 2);
+ if (vpe->collaborate_mode)
+ WDOORBELL64(ring->doorbell_index + 4, ring->wptr << 2);
+ } else {
+ int i;
+
+ for (i = 0; i < vpe->num_instances; i++) {
+ dev_dbg(adev->dev, "Not using doorbell, \
+ regVPEC_QUEUE0_RB_WPTR == 0x%08x, \
+ regVPEC_QUEUE0_RB_WPTR_HI == 0x%08x\n",
+ lower_32_bits(ring->wptr << 2),
+ upper_32_bits(ring->wptr << 2));
+ WREG32(vpe_get_reg_offset(vpe, i, vpe->regs.queue0_rb_wptr_lo),
+ lower_32_bits(ring->wptr << 2));
+ WREG32(vpe_get_reg_offset(vpe, i, vpe->regs.queue0_rb_wptr_hi),
+ upper_32_bits(ring->wptr << 2));
+ }
+ }
+}
+
+static int vpe_ring_test_ring(struct amdgpu_ring *ring)
+{
+ struct amdgpu_device *adev = ring->adev;
+ const uint32_t test_pattern = 0xdeadbeef;
+ uint32_t index, i;
+ uint64_t wb_addr;
+ int ret;
+
+ ret = amdgpu_device_wb_get(adev, &index);
+ if (ret) {
+ dev_err(adev->dev, "(%d) failed to allocate wb slot\n", ret);
+ return ret;
+ }
+
+ adev->wb.wb[index] = 0;
+ wb_addr = adev->wb.gpu_addr + (index * 4);
+
+ ret = amdgpu_ring_alloc(ring, 4);
+ if (ret) {
+ dev_err(adev->dev, "amdgpu: dma failed to lock ring %d (%d).\n", ring->idx, ret);
+ goto out;
+ }
+
+ amdgpu_ring_write(ring, VPE_CMD_HEADER(VPE_CMD_OPCODE_FENCE, 0));
+ amdgpu_ring_write(ring, lower_32_bits(wb_addr));
+ amdgpu_ring_write(ring, upper_32_bits(wb_addr));
+ amdgpu_ring_write(ring, test_pattern);
+ amdgpu_ring_commit(ring);
+
+ for (i = 0; i < adev->usec_timeout; i++) {
+ if (le32_to_cpu(adev->wb.wb[index]) == test_pattern)
+ goto out;
+ udelay(1);
+ }
+
+ ret = -ETIMEDOUT;
+out:
+ amdgpu_device_wb_free(adev, index);
+
+ return ret;
+}
+
+static int vpe_ring_test_ib(struct amdgpu_ring *ring, long timeout)
+{
+ struct amdgpu_device *adev = ring->adev;
+ const uint32_t test_pattern = 0xdeadbeef;
+ struct amdgpu_ib ib = {};
+ struct dma_fence *f = NULL;
+ uint32_t index;
+ uint64_t wb_addr;
+ int ret;
+
+ ret = amdgpu_device_wb_get(adev, &index);
+ if (ret) {
+ dev_err(adev->dev, "(%d) failed to allocate wb slot\n", ret);
+ return ret;
+ }
+
+ adev->wb.wb[index] = 0;
+ wb_addr = adev->wb.gpu_addr + (index * 4);
+
+ ret = amdgpu_ib_get(adev, NULL, 256, AMDGPU_IB_POOL_DIRECT, &ib);
+ if (ret)
+ goto err0;
+
+ ib.ptr[0] = VPE_CMD_HEADER(VPE_CMD_OPCODE_FENCE, 0);
+ ib.ptr[1] = lower_32_bits(wb_addr);
+ ib.ptr[2] = upper_32_bits(wb_addr);
+ ib.ptr[3] = test_pattern;
+ ib.ptr[4] = VPE_CMD_HEADER(VPE_CMD_OPCODE_NOP, 0);
+ ib.ptr[5] = VPE_CMD_HEADER(VPE_CMD_OPCODE_NOP, 0);
+ ib.ptr[6] = VPE_CMD_HEADER(VPE_CMD_OPCODE_NOP, 0);
+ ib.ptr[7] = VPE_CMD_HEADER(VPE_CMD_OPCODE_NOP, 0);
+ ib.length_dw = 8;
+
+ ret = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f);
+ if (ret)
+ goto err1;
+
+ ret = dma_fence_wait_timeout(f, false, timeout);
+ if (ret <= 0) {
+ ret = ret ? : -ETIMEDOUT;
+ goto err1;
+ }
+
+ ret = (le32_to_cpu(adev->wb.wb[index]) == test_pattern) ? 0 : -EINVAL;
+
+err1:
+ amdgpu_ib_free(&ib, NULL);
+ dma_fence_put(f);
+err0:
+ amdgpu_device_wb_free(adev, index);
+
+ return ret;
+}
+
+static void vpe_ring_begin_use(struct amdgpu_ring *ring)
+{
+ struct amdgpu_device *adev = ring->adev;
+ struct amdgpu_vpe *vpe = &adev->vpe;
+
+ cancel_delayed_work_sync(&adev->vpe.idle_work);
+
+ /* Power on VPE and notify VPE of new context */
+ if (!vpe->context_started) {
+ uint32_t context_notify;
+
+ /* Power on VPE */
+ amdgpu_device_ip_set_powergating_state(adev, AMD_IP_BLOCK_TYPE_VPE, AMD_PG_STATE_UNGATE);
+
+ /* Indicates that a job from a new context has been submitted. */
+ context_notify = RREG32(vpe_get_reg_offset(vpe, 0, vpe->regs.context_indicator));
+ if ((context_notify & 0x1) == 0)
+ context_notify |= 0x1;
+ else
+ context_notify &= ~(0x1);
+ WREG32(vpe_get_reg_offset(vpe, 0, vpe->regs.context_indicator), context_notify);
+ vpe->context_started = true;
+ }
+}
+
+static void vpe_ring_end_use(struct amdgpu_ring *ring)
+{
+ struct amdgpu_device *adev = ring->adev;
+
+ schedule_delayed_work(&adev->vpe.idle_work, VPE_IDLE_TIMEOUT);
+}
+
+static int vpe_ring_reset(struct amdgpu_ring *ring,
+ unsigned int vmid,
+ struct amdgpu_fence *timedout_fence)
+{
+ struct amdgpu_device *adev = ring->adev;
+ int r;
+
+ amdgpu_ring_reset_helper_begin(ring, timedout_fence);
+
+ r = amdgpu_device_ip_set_powergating_state(adev, AMD_IP_BLOCK_TYPE_VPE,
+ AMD_PG_STATE_GATE);
+ if (r)
+ return r;
+ r = amdgpu_device_ip_set_powergating_state(adev, AMD_IP_BLOCK_TYPE_VPE,
+ AMD_PG_STATE_UNGATE);
+ if (r)
+ return r;
+
+ return amdgpu_ring_reset_helper_end(ring, timedout_fence);
+}
+
+static ssize_t amdgpu_get_vpe_reset_mask(struct device *dev,
+ struct device_attribute *attr,
+ char *buf)
+{
+ struct drm_device *ddev = dev_get_drvdata(dev);
+ struct amdgpu_device *adev = drm_to_adev(ddev);
+
+ if (!adev)
+ return -ENODEV;
+
+ return amdgpu_show_reset_mask(buf, adev->vpe.supported_reset);
+}
+
+static DEVICE_ATTR(vpe_reset_mask, 0444,
+ amdgpu_get_vpe_reset_mask, NULL);
+
+int amdgpu_vpe_sysfs_reset_mask_init(struct amdgpu_device *adev)
+{
+ int r = 0;
+
+ if (adev->vpe.num_instances) {
+ r = device_create_file(adev->dev, &dev_attr_vpe_reset_mask);
+ if (r)
+ return r;
+ }
+
+ return r;
+}
+
+void amdgpu_vpe_sysfs_reset_mask_fini(struct amdgpu_device *adev)
+{
+ if (adev->dev->kobj.sd) {
+ if (adev->vpe.num_instances)
+ device_remove_file(adev->dev, &dev_attr_vpe_reset_mask);
+ }
+}
+
+static const struct amdgpu_ring_funcs vpe_ring_funcs = {
+ .type = AMDGPU_RING_TYPE_VPE,
+ .align_mask = 0xf,
+ .nop = VPE_CMD_HEADER(VPE_CMD_OPCODE_NOP, 0),
+ .support_64bit_ptrs = true,
+ .get_rptr = vpe_ring_get_rptr,
+ .get_wptr = vpe_ring_get_wptr,
+ .set_wptr = vpe_ring_set_wptr,
+ .emit_frame_size =
+ 5 + /* vpe_ring_init_cond_exec */
+ 6 + /* vpe_ring_emit_pipeline_sync */
+ 10 + 10 + 10 + /* vpe_ring_emit_fence */
+ /* vpe_ring_emit_vm_flush */
+ SOC15_FLUSH_GPU_TLB_NUM_WREG * 3 +
+ SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 6,
+ .emit_ib_size = 7 + 6,
+ .emit_ib = vpe_ring_emit_ib,
+ .emit_pipeline_sync = vpe_ring_emit_pipeline_sync,
+ .emit_fence = vpe_ring_emit_fence,
+ .emit_vm_flush = vpe_ring_emit_vm_flush,
+ .emit_wreg = vpe_ring_emit_wreg,
+ .emit_reg_wait = vpe_ring_emit_reg_wait,
+ .emit_reg_write_reg_wait = amdgpu_ring_emit_reg_write_reg_wait_helper,
+ .insert_nop = vpe_ring_insert_nop,
+ .pad_ib = amdgpu_ring_generic_pad_ib,
+ .test_ring = vpe_ring_test_ring,
+ .test_ib = vpe_ring_test_ib,
+ .init_cond_exec = vpe_ring_init_cond_exec,
+ .preempt_ib = vpe_ring_preempt_ib,
+ .begin_use = vpe_ring_begin_use,
+ .end_use = vpe_ring_end_use,
+ .reset = vpe_ring_reset,
+};
+
+static void vpe_set_ring_funcs(struct amdgpu_device *adev)
+{
+ adev->vpe.ring.funcs = &vpe_ring_funcs;
+}
+
+const struct amd_ip_funcs vpe_ip_funcs = {
+ .name = "vpe_v6_1",
+ .early_init = vpe_early_init,
+ .sw_init = vpe_sw_init,
+ .sw_fini = vpe_sw_fini,
+ .hw_init = vpe_hw_init,
+ .hw_fini = vpe_hw_fini,
+ .suspend = vpe_suspend,
+ .resume = vpe_resume,
+ .set_clockgating_state = vpe_set_clockgating_state,
+ .set_powergating_state = vpe_set_powergating_state,
+};
+
+const struct amdgpu_ip_block_version vpe_v6_1_ip_block = {
+ .type = AMD_IP_BLOCK_TYPE_VPE,
+ .major = 6,
+ .minor = 1,
+ .rev = 0,
+ .funcs = &vpe_ip_funcs,
+};
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vpe.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vpe.h
new file mode 100644
index 000000000000..695da740a97e
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vpe.h
@@ -0,0 +1,111 @@
+/*
+ * Copyright 2022 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#ifndef __AMDGPU_VPE_H__
+#define __AMDGPU_VPE_H__
+
+#include "amdgpu_ring.h"
+#include "amdgpu_irq.h"
+#include "vpe_6_1_fw_if.h"
+
+#define AMDGPU_MAX_VPE_INSTANCES 2
+
+struct amdgpu_vpe;
+
+struct vpe_funcs {
+ uint32_t (*get_reg_offset)(struct amdgpu_vpe *vpe, uint32_t inst, uint32_t offset);
+ int (*set_regs)(struct amdgpu_vpe *vpe);
+ int (*irq_init)(struct amdgpu_vpe *vpe);
+ int (*init_microcode)(struct amdgpu_vpe *vpe);
+ int (*load_microcode)(struct amdgpu_vpe *vpe);
+ int (*ring_init)(struct amdgpu_vpe *vpe);
+ int (*ring_start)(struct amdgpu_vpe *vpe);
+ int (*ring_stop)(struct amdgpu_vpe *vpe);
+ int (*ring_fini)(struct amdgpu_vpe *vpe);
+};
+
+struct vpe_regs {
+ uint32_t queue0_rb_rptr_lo;
+ uint32_t queue0_rb_rptr_hi;
+ uint32_t queue0_rb_wptr_lo;
+ uint32_t queue0_rb_wptr_hi;
+ uint32_t queue0_preempt;
+
+ uint32_t dpm_enable;
+ uint32_t dpm_pratio;
+ uint32_t dpm_request_interval;
+ uint32_t dpm_decision_threshold;
+ uint32_t dpm_busy_clamp_threshold;
+ uint32_t dpm_idle_clamp_threshold;
+ uint32_t dpm_request_lv;
+ uint32_t context_indicator;
+};
+
+struct amdgpu_vpe {
+ struct amdgpu_ring ring;
+ struct amdgpu_irq_src trap_irq;
+
+ const struct vpe_funcs *funcs;
+ struct vpe_regs regs;
+
+ const struct firmware *fw;
+ uint32_t fw_version;
+ uint32_t feature_version;
+
+ struct amdgpu_bo *cmdbuf_obj;
+ uint64_t cmdbuf_gpu_addr;
+ uint32_t *cmdbuf_cpu_addr;
+ struct delayed_work idle_work;
+ bool context_started;
+
+ uint32_t num_instances;
+ bool collaborate_mode;
+ uint32_t supported_reset;
+};
+
+int amdgpu_vpe_psp_update_sram(struct amdgpu_device *adev);
+int amdgpu_vpe_init_microcode(struct amdgpu_vpe *vpe);
+int amdgpu_vpe_ring_init(struct amdgpu_vpe *vpe);
+int amdgpu_vpe_ring_fini(struct amdgpu_vpe *vpe);
+int amdgpu_vpe_configure_dpm(struct amdgpu_vpe *vpe);
+void amdgpu_vpe_sysfs_reset_mask_fini(struct amdgpu_device *adev);
+int amdgpu_vpe_sysfs_reset_mask_init(struct amdgpu_device *adev);
+
+#define vpe_ring_init(vpe) ((vpe)->funcs->ring_init ? (vpe)->funcs->ring_init((vpe)) : 0)
+#define vpe_ring_start(vpe) ((vpe)->funcs->ring_start ? (vpe)->funcs->ring_start((vpe)) : 0)
+#define vpe_ring_stop(vpe) ((vpe)->funcs->ring_stop ? (vpe)->funcs->ring_stop((vpe)) : 0)
+#define vpe_ring_fini(vpe) ((vpe)->funcs->ring_fini ? (vpe)->funcs->ring_fini((vpe)) : 0)
+
+#define vpe_get_reg_offset(vpe, inst, offset) \
+ ((vpe)->funcs->get_reg_offset ? (vpe)->funcs->get_reg_offset((vpe), (inst), (offset)) : 0)
+#define vpe_set_regs(vpe) \
+ ((vpe)->funcs->set_regs ? (vpe)->funcs->set_regs((vpe)) : 0)
+#define vpe_irq_init(vpe) \
+ ((vpe)->funcs->irq_init ? (vpe)->funcs->irq_init((vpe)) : 0)
+#define vpe_init_microcode(vpe) \
+ ((vpe)->funcs->init_microcode ? (vpe)->funcs->init_microcode((vpe)) : 0)
+#define vpe_load_microcode(vpe) \
+ ((vpe)->funcs->load_microcode ? (vpe)->funcs->load_microcode((vpe)) : 0)
+
+extern const struct amdgpu_ip_block_version vpe_v6_1_ip_block;
+
+#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c
index c89b66bb70e2..9d934c07fa6b 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c
@@ -23,21 +23,73 @@
*/
#include <linux/dma-mapping.h>
+#include <drm/ttm/ttm_range_manager.h>
+#include <drm/drm_drv.h>
+
#include "amdgpu.h"
#include "amdgpu_vm.h"
-#include "amdgpu_atomfirmware.h"
+#include "amdgpu_res_cursor.h"
#include "atom.h"
-static inline struct amdgpu_vram_mgr *to_vram_mgr(struct ttm_resource_manager *man)
+#define AMDGPU_MAX_SG_SEGMENT_SIZE (2UL << 30)
+
+struct amdgpu_vram_reservation {
+ u64 start;
+ u64 size;
+ struct list_head allocated;
+ struct list_head blocks;
+};
+
+static inline struct amdgpu_vram_mgr *
+to_vram_mgr(struct ttm_resource_manager *man)
{
return container_of(man, struct amdgpu_vram_mgr, manager);
}
-static inline struct amdgpu_device *to_amdgpu_device(struct amdgpu_vram_mgr *mgr)
+static inline struct amdgpu_device *
+to_amdgpu_device(struct amdgpu_vram_mgr *mgr)
{
return container_of(mgr, struct amdgpu_device, mman.vram_mgr);
}
+static inline struct drm_buddy_block *
+amdgpu_vram_mgr_first_block(struct list_head *list)
+{
+ return list_first_entry_or_null(list, struct drm_buddy_block, link);
+}
+
+static inline bool amdgpu_is_vram_mgr_blocks_contiguous(struct list_head *head)
+{
+ struct drm_buddy_block *block;
+ u64 start, size;
+
+ block = amdgpu_vram_mgr_first_block(head);
+ if (!block)
+ return false;
+
+ while (head != block->link.next) {
+ start = amdgpu_vram_mgr_block_start(block);
+ size = amdgpu_vram_mgr_block_size(block);
+
+ block = list_entry(block->link.next, struct drm_buddy_block, link);
+ if (start + size != amdgpu_vram_mgr_block_start(block))
+ return false;
+ }
+
+ return true;
+}
+
+static inline u64 amdgpu_vram_mgr_blocks_size(struct list_head *head)
+{
+ struct drm_buddy_block *block;
+ u64 size = 0;
+
+ list_for_each_entry(block, head, link)
+ size += amdgpu_vram_mgr_block_size(block);
+
+ return size;
+}
+
/**
* DOC: mem_info_vram_total
*
@@ -52,7 +104,7 @@ static ssize_t amdgpu_mem_info_vram_total_show(struct device *dev,
struct drm_device *ddev = dev_get_drvdata(dev);
struct amdgpu_device *adev = drm_to_adev(ddev);
- return snprintf(buf, PAGE_SIZE, "%llu\n", adev->gmc.real_vram_size);
+ return sysfs_emit(buf, "%llu\n", adev->gmc.real_vram_size);
}
/**
@@ -69,7 +121,7 @@ static ssize_t amdgpu_mem_info_vis_vram_total_show(struct device *dev,
struct drm_device *ddev = dev_get_drvdata(dev);
struct amdgpu_device *adev = drm_to_adev(ddev);
- return snprintf(buf, PAGE_SIZE, "%llu\n", adev->gmc.visible_vram_size);
+ return sysfs_emit(buf, "%llu\n", adev->gmc.visible_vram_size);
}
/**
@@ -81,14 +133,14 @@ static ssize_t amdgpu_mem_info_vis_vram_total_show(struct device *dev,
* amount of currently used VRAM in bytes
*/
static ssize_t amdgpu_mem_info_vram_used_show(struct device *dev,
- struct device_attribute *attr, char *buf)
+ struct device_attribute *attr,
+ char *buf)
{
struct drm_device *ddev = dev_get_drvdata(dev);
struct amdgpu_device *adev = drm_to_adev(ddev);
- struct ttm_resource_manager *man = ttm_manager_type(&adev->mman.bdev, TTM_PL_VRAM);
+ struct ttm_resource_manager *man = &adev->mman.vram_mgr.manager;
- return snprintf(buf, PAGE_SIZE, "%llu\n",
- amdgpu_vram_mgr_usage(man));
+ return sysfs_emit(buf, "%llu\n", ttm_resource_manager_usage(man));
}
/**
@@ -100,46 +152,54 @@ static ssize_t amdgpu_mem_info_vram_used_show(struct device *dev,
* amount of currently used visible VRAM in bytes
*/
static ssize_t amdgpu_mem_info_vis_vram_used_show(struct device *dev,
- struct device_attribute *attr, char *buf)
+ struct device_attribute *attr,
+ char *buf)
{
struct drm_device *ddev = dev_get_drvdata(dev);
struct amdgpu_device *adev = drm_to_adev(ddev);
- struct ttm_resource_manager *man = ttm_manager_type(&adev->mman.bdev, TTM_PL_VRAM);
- return snprintf(buf, PAGE_SIZE, "%llu\n",
- amdgpu_vram_mgr_vis_usage(man));
+ return sysfs_emit(buf, "%llu\n",
+ amdgpu_vram_mgr_vis_usage(&adev->mman.vram_mgr));
}
+/**
+ * DOC: mem_info_vram_vendor
+ *
+ * The amdgpu driver provides a sysfs API for reporting the vendor of the
+ * installed VRAM
+ * The file mem_info_vram_vendor is used for this and returns the name of the
+ * vendor.
+ */
static ssize_t amdgpu_mem_info_vram_vendor(struct device *dev,
- struct device_attribute *attr,
- char *buf)
+ struct device_attribute *attr,
+ char *buf)
{
struct drm_device *ddev = dev_get_drvdata(dev);
struct amdgpu_device *adev = drm_to_adev(ddev);
switch (adev->gmc.vram_vendor) {
case SAMSUNG:
- return snprintf(buf, PAGE_SIZE, "samsung\n");
+ return sysfs_emit(buf, "samsung\n");
case INFINEON:
- return snprintf(buf, PAGE_SIZE, "infineon\n");
+ return sysfs_emit(buf, "infineon\n");
case ELPIDA:
- return snprintf(buf, PAGE_SIZE, "elpida\n");
+ return sysfs_emit(buf, "elpida\n");
case ETRON:
- return snprintf(buf, PAGE_SIZE, "etron\n");
+ return sysfs_emit(buf, "etron\n");
case NANYA:
- return snprintf(buf, PAGE_SIZE, "nanya\n");
+ return sysfs_emit(buf, "nanya\n");
case HYNIX:
- return snprintf(buf, PAGE_SIZE, "hynix\n");
+ return sysfs_emit(buf, "hynix\n");
case MOSEL:
- return snprintf(buf, PAGE_SIZE, "mosel\n");
+ return sysfs_emit(buf, "mosel\n");
case WINBOND:
- return snprintf(buf, PAGE_SIZE, "winbond\n");
+ return sysfs_emit(buf, "winbond\n");
case ESMT:
- return snprintf(buf, PAGE_SIZE, "esmt\n");
+ return sysfs_emit(buf, "esmt\n");
case MICRON:
- return snprintf(buf, PAGE_SIZE, "micron\n");
+ return sysfs_emit(buf, "micron\n");
default:
- return snprintf(buf, PAGE_SIZE, "unknown\n");
+ return sysfs_emit(buf, "unknown\n");
}
}
@@ -154,7 +214,7 @@ static DEVICE_ATTR(mem_info_vis_vram_used, S_IRUGO,
static DEVICE_ATTR(mem_info_vram_vendor, S_IRUGO,
amdgpu_mem_info_vram_vendor, NULL);
-static const struct attribute *amdgpu_vram_mgr_attributes[] = {
+static struct attribute *amdgpu_vram_mgr_attributes[] = {
&dev_attr_mem_info_vram_total.attr,
&dev_attr_mem_info_vis_vram_total.attr,
&dev_attr_mem_info_vram_used.attr,
@@ -163,91 +223,41 @@ static const struct attribute *amdgpu_vram_mgr_attributes[] = {
NULL
};
-static const struct ttm_resource_manager_func amdgpu_vram_mgr_func;
-
-/**
- * amdgpu_vram_mgr_init - init VRAM manager and DRM MM
- *
- * @adev: amdgpu_device pointer
- *
- * Allocate and initialize the VRAM manager.
- */
-int amdgpu_vram_mgr_init(struct amdgpu_device *adev)
+static umode_t amdgpu_vram_attrs_is_visible(struct kobject *kobj,
+ struct attribute *attr, int i)
{
- struct amdgpu_vram_mgr *mgr = &adev->mman.vram_mgr;
- struct ttm_resource_manager *man = &mgr->manager;
- int ret;
-
- ttm_resource_manager_init(man, adev->gmc.real_vram_size >> PAGE_SHIFT);
-
- man->func = &amdgpu_vram_mgr_func;
+ struct device *dev = kobj_to_dev(kobj);
+ struct drm_device *ddev = dev_get_drvdata(dev);
+ struct amdgpu_device *adev = drm_to_adev(ddev);
- drm_mm_init(&mgr->mm, 0, man->size);
- spin_lock_init(&mgr->lock);
- INIT_LIST_HEAD(&mgr->reservations_pending);
- INIT_LIST_HEAD(&mgr->reserved_pages);
+ if (attr == &dev_attr_mem_info_vram_vendor.attr &&
+ !adev->gmc.vram_vendor)
+ return 0;
- /* Add the two VRAM-related sysfs files */
- ret = sysfs_create_files(&adev->dev->kobj, amdgpu_vram_mgr_attributes);
- if (ret)
- DRM_ERROR("Failed to register sysfs\n");
+ if (!ttm_resource_manager_used(&adev->mman.vram_mgr.manager))
+ return 0;
- ttm_set_driver_manager(&adev->mman.bdev, TTM_PL_VRAM, &mgr->manager);
- ttm_resource_manager_set_used(man, true);
- return 0;
+ return attr->mode;
}
-/**
- * amdgpu_vram_mgr_fini - free and destroy VRAM manager
- *
- * @adev: amdgpu_device pointer
- *
- * Destroy and free the VRAM manager, returns -EBUSY if ranges are still
- * allocated inside it.
- */
-void amdgpu_vram_mgr_fini(struct amdgpu_device *adev)
-{
- struct amdgpu_vram_mgr *mgr = &adev->mman.vram_mgr;
- struct ttm_resource_manager *man = &mgr->manager;
- int ret;
- struct amdgpu_vram_reservation *rsv, *temp;
-
- ttm_resource_manager_set_used(man, false);
-
- ret = ttm_resource_manager_evict_all(&adev->mman.bdev, man);
- if (ret)
- return;
-
- spin_lock(&mgr->lock);
- list_for_each_entry_safe(rsv, temp, &mgr->reservations_pending, node)
- kfree(rsv);
-
- list_for_each_entry_safe(rsv, temp, &mgr->reserved_pages, node) {
- drm_mm_remove_node(&rsv->mm_node);
- kfree(rsv);
- }
- drm_mm_takedown(&mgr->mm);
- spin_unlock(&mgr->lock);
-
- sysfs_remove_files(&adev->dev->kobj, amdgpu_vram_mgr_attributes);
-
- ttm_resource_manager_cleanup(man);
- ttm_set_driver_manager(&adev->mman.bdev, TTM_PL_VRAM, NULL);
-}
+const struct attribute_group amdgpu_vram_mgr_attr_group = {
+ .attrs = amdgpu_vram_mgr_attributes,
+ .is_visible = amdgpu_vram_attrs_is_visible
+};
/**
- * amdgpu_vram_mgr_vis_size - Calculate visible node size
+ * amdgpu_vram_mgr_vis_size - Calculate visible block size
*
* @adev: amdgpu_device pointer
- * @node: MM node structure
+ * @block: DRM BUDDY block structure
*
- * Calculate how many bytes of the MM node are inside visible VRAM
+ * Calculate how many bytes of the DRM BUDDY block are inside visible VRAM
*/
static u64 amdgpu_vram_mgr_vis_size(struct amdgpu_device *adev,
- struct drm_mm_node *node)
+ struct drm_buddy_block *block)
{
- uint64_t start = node->start << PAGE_SHIFT;
- uint64_t end = (node->size + node->start) << PAGE_SHIFT;
+ u64 start = amdgpu_vram_mgr_block_start(block);
+ u64 end = start + amdgpu_vram_mgr_block_size(block);
if (start >= adev->gmc.visible_vram_size)
return 0;
@@ -267,72 +277,83 @@ static u64 amdgpu_vram_mgr_vis_size(struct amdgpu_device *adev,
u64 amdgpu_vram_mgr_bo_visible_size(struct amdgpu_bo *bo)
{
struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev);
- struct ttm_resource *mem = &bo->tbo.mem;
- struct drm_mm_node *nodes = mem->mm_node;
- unsigned pages = mem->num_pages;
- u64 usage;
+ struct ttm_resource *res = bo->tbo.resource;
+ struct amdgpu_vram_mgr_resource *vres = to_amdgpu_vram_mgr_resource(res);
+ struct drm_buddy_block *block;
+ u64 usage = 0;
if (amdgpu_gmc_vram_full_visible(&adev->gmc))
return amdgpu_bo_size(bo);
- if (mem->start >= adev->gmc.visible_vram_size >> PAGE_SHIFT)
+ if (res->start >= adev->gmc.visible_vram_size >> PAGE_SHIFT)
return 0;
- for (usage = 0; nodes && pages; pages -= nodes->size, nodes++)
- usage += amdgpu_vram_mgr_vis_size(adev, nodes);
+ list_for_each_entry(block, &vres->blocks, link)
+ usage += amdgpu_vram_mgr_vis_size(adev, block);
return usage;
}
+/* Commit the reservation of VRAM pages */
static void amdgpu_vram_mgr_do_reserve(struct ttm_resource_manager *man)
{
struct amdgpu_vram_mgr *mgr = to_vram_mgr(man);
struct amdgpu_device *adev = to_amdgpu_device(mgr);
- struct drm_mm *mm = &mgr->mm;
+ struct drm_buddy *mm = &mgr->mm;
struct amdgpu_vram_reservation *rsv, *temp;
+ struct drm_buddy_block *block;
uint64_t vis_usage;
- list_for_each_entry_safe(rsv, temp, &mgr->reservations_pending, node) {
- if (drm_mm_reserve_node(mm, &rsv->mm_node))
+ list_for_each_entry_safe(rsv, temp, &mgr->reservations_pending, blocks) {
+ if (drm_buddy_alloc_blocks(mm, rsv->start, rsv->start + rsv->size,
+ rsv->size, mm->chunk_size, &rsv->allocated,
+ DRM_BUDDY_RANGE_ALLOCATION))
+ continue;
+
+ block = amdgpu_vram_mgr_first_block(&rsv->allocated);
+ if (!block)
continue;
dev_dbg(adev->dev, "Reservation 0x%llx - %lld, Succeeded\n",
- rsv->mm_node.start, rsv->mm_node.size);
+ rsv->start, rsv->size);
- vis_usage = amdgpu_vram_mgr_vis_size(adev, &rsv->mm_node);
+ vis_usage = amdgpu_vram_mgr_vis_size(adev, block);
atomic64_add(vis_usage, &mgr->vis_usage);
- atomic64_add(rsv->mm_node.size << PAGE_SHIFT, &mgr->usage);
- list_move(&rsv->node, &mgr->reserved_pages);
+ spin_lock(&man->bdev->lru_lock);
+ man->usage += rsv->size;
+ spin_unlock(&man->bdev->lru_lock);
+ list_move(&rsv->blocks, &mgr->reserved_pages);
}
}
/**
* amdgpu_vram_mgr_reserve_range - Reserve a range from VRAM
*
- * @man: TTM memory type manager
+ * @mgr: amdgpu_vram_mgr pointer
* @start: start address of the range in VRAM
* @size: size of the range
*
- * Reserve memory from start addess with the specified size in VRAM
+ * Reserve memory from start address with the specified size in VRAM
*/
-int amdgpu_vram_mgr_reserve_range(struct ttm_resource_manager *man,
+int amdgpu_vram_mgr_reserve_range(struct amdgpu_vram_mgr *mgr,
uint64_t start, uint64_t size)
{
- struct amdgpu_vram_mgr *mgr = to_vram_mgr(man);
struct amdgpu_vram_reservation *rsv;
rsv = kzalloc(sizeof(*rsv), GFP_KERNEL);
if (!rsv)
return -ENOMEM;
- INIT_LIST_HEAD(&rsv->node);
- rsv->mm_node.start = start >> PAGE_SHIFT;
- rsv->mm_node.size = size >> PAGE_SHIFT;
+ INIT_LIST_HEAD(&rsv->allocated);
+ INIT_LIST_HEAD(&rsv->blocks);
- spin_lock(&mgr->lock);
- list_add_tail(&mgr->reservations_pending, &rsv->node);
- amdgpu_vram_mgr_do_reserve(man);
- spin_unlock(&mgr->lock);
+ rsv->start = start;
+ rsv->size = size;
+
+ mutex_lock(&mgr->lock);
+ list_add_tail(&rsv->blocks, &mgr->reservations_pending);
+ amdgpu_vram_mgr_do_reserve(&mgr->manager);
+ mutex_unlock(&mgr->lock);
return 0;
}
@@ -340,7 +361,7 @@ int amdgpu_vram_mgr_reserve_range(struct ttm_resource_manager *man,
/**
* amdgpu_vram_mgr_query_page_status - query the reservation status
*
- * @man: TTM memory type manager
+ * @mgr: amdgpu_vram_mgr pointer
* @start: start address of a page in VRAM
*
* Returns:
@@ -348,26 +369,25 @@ int amdgpu_vram_mgr_reserve_range(struct ttm_resource_manager *man,
* 0: the page has been reserved
* -ENOENT: the input page is not a reservation
*/
-int amdgpu_vram_mgr_query_page_status(struct ttm_resource_manager *man,
+int amdgpu_vram_mgr_query_page_status(struct amdgpu_vram_mgr *mgr,
uint64_t start)
{
- struct amdgpu_vram_mgr *mgr = to_vram_mgr(man);
struct amdgpu_vram_reservation *rsv;
int ret;
- spin_lock(&mgr->lock);
+ mutex_lock(&mgr->lock);
- list_for_each_entry(rsv, &mgr->reservations_pending, node) {
- if ((rsv->mm_node.start <= start) &&
- (start < (rsv->mm_node.start + rsv->mm_node.size))) {
+ list_for_each_entry(rsv, &mgr->reservations_pending, blocks) {
+ if (rsv->start <= start &&
+ (start < (rsv->start + rsv->size))) {
ret = -EBUSY;
goto out;
}
}
- list_for_each_entry(rsv, &mgr->reserved_pages, node) {
- if ((rsv->mm_node.start <= start) &&
- (start < (rsv->mm_node.start + rsv->mm_node.size))) {
+ list_for_each_entry(rsv, &mgr->reserved_pages, blocks) {
+ if (rsv->start <= start &&
+ (start < (rsv->start + rsv->size))) {
ret = 0;
goto out;
}
@@ -375,30 +395,37 @@ int amdgpu_vram_mgr_query_page_status(struct ttm_resource_manager *man,
ret = -ENOENT;
out:
- spin_unlock(&mgr->lock);
+ mutex_unlock(&mgr->lock);
return ret;
}
-/**
- * amdgpu_vram_mgr_virt_start - update virtual start address
- *
- * @mem: ttm_resource to update
- * @node: just allocated node
- *
- * Calculate a virtual BO start address to easily check if everything is CPU
- * accessible.
- */
-static void amdgpu_vram_mgr_virt_start(struct ttm_resource *mem,
- struct drm_mm_node *node)
+int amdgpu_vram_mgr_query_address_block_info(struct amdgpu_vram_mgr *mgr,
+ uint64_t address, struct amdgpu_vram_block_info *info)
{
- unsigned long start;
+ struct amdgpu_vram_mgr_resource *vres;
+ struct drm_buddy_block *block;
+ u64 start, size;
+ int ret = -ENOENT;
+
+ mutex_lock(&mgr->lock);
+ list_for_each_entry(vres, &mgr->allocated_vres_list, vres_node) {
+ list_for_each_entry(block, &vres->blocks, link) {
+ start = amdgpu_vram_mgr_block_start(block);
+ size = amdgpu_vram_mgr_block_size(block);
+ if ((start <= address) && (address < (start + size))) {
+ info->start = start;
+ info->size = size;
+ memcpy(&info->task, &vres->task, sizeof(vres->task));
+ ret = 0;
+ goto out;
+ }
+ }
+ }
- start = node->start + node->size;
- if (start > mem->num_pages)
- start -= mem->num_pages;
- else
- start = 0;
- mem->start = max(mem->start, start);
+out:
+ mutex_unlock(&mgr->lock);
+
+ return ret;
}
/**
@@ -407,120 +434,192 @@ static void amdgpu_vram_mgr_virt_start(struct ttm_resource *mem,
* @man: TTM memory type manager
* @tbo: TTM BO we need this range for
* @place: placement flags and restrictions
- * @mem: the resulting mem object
+ * @res: the resulting mem object
*
* Allocate VRAM for the given BO.
*/
static int amdgpu_vram_mgr_new(struct ttm_resource_manager *man,
struct ttm_buffer_object *tbo,
const struct ttm_place *place,
- struct ttm_resource *mem)
+ struct ttm_resource **res)
{
struct amdgpu_vram_mgr *mgr = to_vram_mgr(man);
struct amdgpu_device *adev = to_amdgpu_device(mgr);
- struct drm_mm *mm = &mgr->mm;
- struct drm_mm_node *nodes;
- enum drm_mm_insert_mode mode;
- unsigned long lpfn, num_nodes, pages_per_node, pages_left;
- uint64_t vis_usage = 0, mem_bytes, max_bytes;
- unsigned i;
+ struct amdgpu_bo *bo = ttm_to_amdgpu_bo(tbo);
+ u64 vis_usage = 0, max_bytes, min_block_size;
+ struct amdgpu_vram_mgr_resource *vres;
+ u64 size, remaining_size, lpfn, fpfn;
+ unsigned int adjust_dcc_size = 0;
+ struct drm_buddy *mm = &mgr->mm;
+ struct drm_buddy_block *block;
+ unsigned long pages_per_block;
int r;
- lpfn = place->lpfn;
- if (!lpfn)
+ lpfn = (u64)place->lpfn << PAGE_SHIFT;
+ if (!lpfn || lpfn > man->size)
lpfn = man->size;
+ fpfn = (u64)place->fpfn << PAGE_SHIFT;
+
max_bytes = adev->gmc.mc_vram_size;
if (tbo->type != ttm_bo_type_kernel)
max_bytes -= AMDGPU_VM_RESERVED_VRAM;
- /* bail out quickly if there's likely not enough VRAM for this BO */
- mem_bytes = (u64)mem->num_pages << PAGE_SHIFT;
- if (atomic64_add_return(mem_bytes, &mgr->usage) > max_bytes) {
- atomic64_sub(mem_bytes, &mgr->usage);
- return -ENOSPC;
- }
-
- if (place->flags & TTM_PL_FLAG_CONTIGUOUS) {
- pages_per_node = ~0ul;
- num_nodes = 1;
+ if (bo->flags & AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS) {
+ pages_per_block = ~0ul;
} else {
#ifdef CONFIG_TRANSPARENT_HUGEPAGE
- pages_per_node = HPAGE_PMD_NR;
+ pages_per_block = HPAGE_PMD_NR;
#else
/* default to 2MB */
- pages_per_node = (2UL << (20UL - PAGE_SHIFT));
+ pages_per_block = 2UL << (20UL - PAGE_SHIFT);
#endif
- pages_per_node = max((uint32_t)pages_per_node, mem->page_alignment);
- num_nodes = DIV_ROUND_UP(mem->num_pages, pages_per_node);
+ pages_per_block = max_t(u32, pages_per_block,
+ tbo->page_alignment);
}
- nodes = kvmalloc_array((uint32_t)num_nodes, sizeof(*nodes),
- GFP_KERNEL | __GFP_ZERO);
- if (!nodes) {
- atomic64_sub(mem_bytes, &mgr->usage);
+ vres = kzalloc(sizeof(*vres), GFP_KERNEL);
+ if (!vres)
return -ENOMEM;
+
+ ttm_resource_init(tbo, place, &vres->base);
+
+ /* bail out quickly if there's likely not enough VRAM for this BO */
+ if (ttm_resource_manager_usage(man) > max_bytes) {
+ r = -ENOSPC;
+ goto error_fini;
}
- mode = DRM_MM_INSERT_BEST;
+ INIT_LIST_HEAD(&vres->blocks);
+
if (place->flags & TTM_PL_FLAG_TOPDOWN)
- mode = DRM_MM_INSERT_HIGH;
+ vres->flags |= DRM_BUDDY_TOPDOWN_ALLOCATION;
- mem->start = 0;
- pages_left = mem->num_pages;
+ if (bo->flags & AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS)
+ vres->flags |= DRM_BUDDY_CONTIGUOUS_ALLOCATION;
- spin_lock(&mgr->lock);
- for (i = 0; pages_left >= pages_per_node; ++i) {
- unsigned long pages = rounddown_pow_of_two(pages_left);
+ if (bo->flags & AMDGPU_GEM_CREATE_VRAM_CLEARED)
+ vres->flags |= DRM_BUDDY_CLEAR_ALLOCATION;
- /* Limit maximum size to 2GB due to SG table limitations */
- pages = min(pages, (2UL << (30 - PAGE_SHIFT)));
+ if (fpfn || lpfn != mgr->mm.size)
+ /* Allocate blocks in desired range */
+ vres->flags |= DRM_BUDDY_RANGE_ALLOCATION;
- r = drm_mm_insert_node_in_range(mm, &nodes[i], pages,
- pages_per_node, 0,
- place->fpfn, lpfn,
- mode);
- if (unlikely(r))
- break;
+ if (bo->flags & AMDGPU_GEM_CREATE_GFX12_DCC &&
+ adev->gmc.gmc_funcs->get_dcc_alignment)
+ adjust_dcc_size = amdgpu_gmc_get_dcc_alignment(adev);
+
+ remaining_size = (u64)vres->base.size;
+ if (bo->flags & AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS && adjust_dcc_size) {
+ unsigned int dcc_size;
+
+ dcc_size = roundup_pow_of_two(vres->base.size + adjust_dcc_size);
+ remaining_size = (u64)dcc_size;
- vis_usage += amdgpu_vram_mgr_vis_size(adev, &nodes[i]);
- amdgpu_vram_mgr_virt_start(mem, &nodes[i]);
- pages_left -= pages;
+ vres->flags |= DRM_BUDDY_TRIM_DISABLE;
}
- for (; pages_left; ++i) {
- unsigned long pages = min(pages_left, pages_per_node);
- uint32_t alignment = mem->page_alignment;
+ mutex_lock(&mgr->lock);
+ while (remaining_size) {
+ if (tbo->page_alignment)
+ min_block_size = (u64)tbo->page_alignment << PAGE_SHIFT;
+ else
+ min_block_size = mgr->default_page_size;
- if (pages == pages_per_node)
- alignment = pages_per_node;
+ size = remaining_size;
+
+ if (bo->flags & AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS && adjust_dcc_size)
+ min_block_size = size;
+ else if ((size >= (u64)pages_per_block << PAGE_SHIFT) &&
+ !(size & (((u64)pages_per_block << PAGE_SHIFT) - 1)))
+ min_block_size = (u64)pages_per_block << PAGE_SHIFT;
+
+ BUG_ON(min_block_size < mm->chunk_size);
+
+ r = drm_buddy_alloc_blocks(mm, fpfn,
+ lpfn,
+ size,
+ min_block_size,
+ &vres->blocks,
+ vres->flags);
+
+ if (unlikely(r == -ENOSPC) && pages_per_block == ~0ul &&
+ !(place->flags & TTM_PL_FLAG_CONTIGUOUS)) {
+ vres->flags &= ~DRM_BUDDY_CONTIGUOUS_ALLOCATION;
+ pages_per_block = max_t(u32, 2UL << (20UL - PAGE_SHIFT),
+ tbo->page_alignment);
+
+ continue;
+ }
- r = drm_mm_insert_node_in_range(mm, &nodes[i],
- pages, alignment, 0,
- place->fpfn, lpfn,
- mode);
if (unlikely(r))
- goto error;
+ goto error_free_blocks;
- vis_usage += amdgpu_vram_mgr_vis_size(adev, &nodes[i]);
- amdgpu_vram_mgr_virt_start(mem, &nodes[i]);
- pages_left -= pages;
+ if (size > remaining_size)
+ remaining_size = 0;
+ else
+ remaining_size -= size;
}
- spin_unlock(&mgr->lock);
- atomic64_add(vis_usage, &mgr->vis_usage);
+ vres->task.pid = task_pid_nr(current);
+ get_task_comm(vres->task.comm, current);
+ list_add_tail(&vres->vres_node, &mgr->allocated_vres_list);
+
+ if (bo->flags & AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS && adjust_dcc_size) {
+ struct drm_buddy_block *dcc_block;
+ unsigned long dcc_start;
+ u64 trim_start;
+
+ dcc_block = amdgpu_vram_mgr_first_block(&vres->blocks);
+ /* Adjust the start address for DCC buffers only */
+ dcc_start =
+ roundup((unsigned long)amdgpu_vram_mgr_block_start(dcc_block),
+ adjust_dcc_size);
+ trim_start = (u64)dcc_start;
+ drm_buddy_block_trim(mm, &trim_start,
+ (u64)vres->base.size,
+ &vres->blocks);
+ }
+ mutex_unlock(&mgr->lock);
+
+ vres->base.start = 0;
+ size = max_t(u64, amdgpu_vram_mgr_blocks_size(&vres->blocks),
+ vres->base.size);
+ list_for_each_entry(block, &vres->blocks, link) {
+ unsigned long start;
+
+ start = amdgpu_vram_mgr_block_start(block) +
+ amdgpu_vram_mgr_block_size(block);
+ start >>= PAGE_SHIFT;
+
+ if (start > PFN_UP(size))
+ start -= PFN_UP(size);
+ else
+ start = 0;
+ vres->base.start = max(vres->base.start, start);
+
+ vis_usage += amdgpu_vram_mgr_vis_size(adev, block);
+ }
- mem->mm_node = nodes;
+ if (amdgpu_is_vram_mgr_blocks_contiguous(&vres->blocks))
+ vres->base.placement |= TTM_PL_FLAG_CONTIGUOUS;
+ if (adev->gmc.xgmi.connected_to_cpu)
+ vres->base.bus.caching = ttm_cached;
+ else
+ vres->base.bus.caching = ttm_write_combined;
+
+ atomic64_add(vis_usage, &mgr->vis_usage);
+ *res = &vres->base;
return 0;
-error:
- while (i--)
- drm_mm_remove_node(&nodes[i]);
- spin_unlock(&mgr->lock);
- atomic64_sub(mem->num_pages << PAGE_SHIFT, &mgr->usage);
+error_free_blocks:
+ drm_buddy_free_list(mm, &vres->blocks, 0);
+ mutex_unlock(&mgr->lock);
+error_fini:
+ ttm_resource_fini(man, &vres->base);
+ kfree(vres);
- kvfree(nodes);
return r;
}
@@ -528,45 +627,45 @@ error:
* amdgpu_vram_mgr_del - free ranges
*
* @man: TTM memory type manager
- * @mem: TTM memory object
+ * @res: TTM memory object
*
* Free the allocated VRAM again.
*/
static void amdgpu_vram_mgr_del(struct ttm_resource_manager *man,
- struct ttm_resource *mem)
+ struct ttm_resource *res)
{
+ struct amdgpu_vram_mgr_resource *vres = to_amdgpu_vram_mgr_resource(res);
struct amdgpu_vram_mgr *mgr = to_vram_mgr(man);
struct amdgpu_device *adev = to_amdgpu_device(mgr);
- struct drm_mm_node *nodes = mem->mm_node;
- uint64_t usage = 0, vis_usage = 0;
- unsigned pages = mem->num_pages;
+ struct drm_buddy *mm = &mgr->mm;
+ struct drm_buddy_block *block;
+ uint64_t vis_usage = 0;
- if (!mem->mm_node)
- return;
+ mutex_lock(&mgr->lock);
- spin_lock(&mgr->lock);
- while (pages) {
- pages -= nodes->size;
- drm_mm_remove_node(nodes);
- usage += nodes->size << PAGE_SHIFT;
- vis_usage += amdgpu_vram_mgr_vis_size(adev, nodes);
- ++nodes;
- }
+ list_del(&vres->vres_node);
+ memset(&vres->task, 0, sizeof(vres->task));
+
+ list_for_each_entry(block, &vres->blocks, link)
+ vis_usage += amdgpu_vram_mgr_vis_size(adev, block);
+
+ drm_buddy_free_list(mm, &vres->blocks, vres->flags);
amdgpu_vram_mgr_do_reserve(man);
- spin_unlock(&mgr->lock);
+ mutex_unlock(&mgr->lock);
- atomic64_sub(usage, &mgr->usage);
atomic64_sub(vis_usage, &mgr->vis_usage);
- kvfree(mem->mm_node);
- mem->mm_node = NULL;
+ ttm_resource_fini(man, res);
+ kfree(vres);
}
/**
* amdgpu_vram_mgr_alloc_sgt - allocate and fill a sg table
*
* @adev: amdgpu device pointer
- * @mem: TTM memory object
+ * @res: TTM memory object
+ * @offset: byte offset from the base of VRAM BO
+ * @length: number of bytes to export in sg_table
* @dev: the other device
* @dir: dma direction
* @sgt: resulting sg table
@@ -574,40 +673,48 @@ static void amdgpu_vram_mgr_del(struct ttm_resource_manager *man,
* Allocate and fill a sg table from a VRAM allocation.
*/
int amdgpu_vram_mgr_alloc_sgt(struct amdgpu_device *adev,
- struct ttm_resource *mem,
+ struct ttm_resource *res,
+ u64 offset, u64 length,
struct device *dev,
enum dma_data_direction dir,
struct sg_table **sgt)
{
- struct drm_mm_node *node;
+ struct amdgpu_res_cursor cursor;
struct scatterlist *sg;
int num_entries = 0;
- unsigned int pages;
int i, r;
*sgt = kmalloc(sizeof(**sgt), GFP_KERNEL);
if (!*sgt)
return -ENOMEM;
- for (pages = mem->num_pages, node = mem->mm_node;
- pages; pages -= node->size, ++node)
- ++num_entries;
+ /* Determine the number of DRM_BUDDY blocks to export */
+ amdgpu_res_first(res, offset, length, &cursor);
+ while (cursor.remaining) {
+ num_entries++;
+ amdgpu_res_next(&cursor, min(cursor.size, AMDGPU_MAX_SG_SEGMENT_SIZE));
+ }
r = sg_alloc_table(*sgt, num_entries, GFP_KERNEL);
if (r)
goto error_free;
+ /* Initialize scatterlist nodes of sg_table */
for_each_sgtable_sg((*sgt), sg, i)
sg->length = 0;
- node = mem->mm_node;
+ /*
+ * Walk down DRM_BUDDY blocks to populate scatterlist nodes
+ * @note: Use iterator api to get first the DRM_BUDDY block
+ * and the number of bytes from it. Access the following
+ * DRM_BUDDY block(s) if more buffer needs to exported
+ */
+ amdgpu_res_first(res, offset, length, &cursor);
for_each_sgtable_sg((*sgt), sg, i) {
- phys_addr_t phys = (node->start << PAGE_SHIFT) +
- adev->gmc.aper_base;
- size_t size = node->size << PAGE_SHIFT;
+ phys_addr_t phys = cursor.start + adev->gmc.aper_base;
+ unsigned long size = min(cursor.size, AMDGPU_MAX_SG_SEGMENT_SIZE);
dma_addr_t addr;
- ++node;
addr = dma_map_resource(dev, phys, size, dir,
DMA_ATTR_SKIP_CPU_SYNC);
r = dma_mapping_error(dev, addr);
@@ -617,7 +724,10 @@ int amdgpu_vram_mgr_alloc_sgt(struct amdgpu_device *adev,
sg_set_page(sg, NULL, size, 0);
sg_dma_address(sg) = addr;
sg_dma_len(sg) = size;
+
+ amdgpu_res_next(&cursor, size);
}
+
return 0;
error_unmap:
@@ -639,15 +749,13 @@ error_free:
/**
* amdgpu_vram_mgr_free_sgt - allocate and fill a sg table
*
- * @adev: amdgpu device pointer
* @dev: device pointer
* @dir: data direction of resource to unmap
* @sgt: sg table to free
*
* Free a previously allocate sg table.
*/
-void amdgpu_vram_mgr_free_sgt(struct amdgpu_device *adev,
- struct device *dev,
+void amdgpu_vram_mgr_free_sgt(struct device *dev,
enum dma_data_direction dir,
struct sg_table *sgt)
{
@@ -663,31 +771,98 @@ void amdgpu_vram_mgr_free_sgt(struct amdgpu_device *adev,
}
/**
- * amdgpu_vram_mgr_usage - how many bytes are used in this domain
+ * amdgpu_vram_mgr_vis_usage - how many bytes are used in the visible part
+ *
+ * @mgr: amdgpu_vram_mgr pointer
+ *
+ * Returns how many bytes are used in the visible part of VRAM
+ */
+uint64_t amdgpu_vram_mgr_vis_usage(struct amdgpu_vram_mgr *mgr)
+{
+ return atomic64_read(&mgr->vis_usage);
+}
+
+/**
+ * amdgpu_vram_mgr_clear_reset_blocks - reset clear blocks
+ *
+ * @adev: amdgpu device pointer
+ *
+ * Reset the cleared drm buddy blocks.
+ */
+void amdgpu_vram_mgr_clear_reset_blocks(struct amdgpu_device *adev)
+{
+ struct amdgpu_vram_mgr *mgr = &adev->mman.vram_mgr;
+ struct drm_buddy *mm = &mgr->mm;
+
+ mutex_lock(&mgr->lock);
+ drm_buddy_reset_clear(mm, false);
+ mutex_unlock(&mgr->lock);
+}
+
+/**
+ * amdgpu_vram_mgr_intersects - test each drm buddy block for intersection
*
* @man: TTM memory type manager
+ * @res: The resource to test
+ * @place: The place to test against
+ * @size: Size of the new allocation
*
- * Returns how many bytes are used in this domain.
+ * Test each drm buddy block for intersection for eviction decision.
*/
-uint64_t amdgpu_vram_mgr_usage(struct ttm_resource_manager *man)
+static bool amdgpu_vram_mgr_intersects(struct ttm_resource_manager *man,
+ struct ttm_resource *res,
+ const struct ttm_place *place,
+ size_t size)
{
- struct amdgpu_vram_mgr *mgr = to_vram_mgr(man);
+ struct amdgpu_vram_mgr_resource *mgr = to_amdgpu_vram_mgr_resource(res);
+ struct drm_buddy_block *block;
+
+ /* Check each drm buddy block individually */
+ list_for_each_entry(block, &mgr->blocks, link) {
+ unsigned long fpfn =
+ amdgpu_vram_mgr_block_start(block) >> PAGE_SHIFT;
+ unsigned long lpfn = fpfn +
+ (amdgpu_vram_mgr_block_size(block) >> PAGE_SHIFT);
+
+ if (place->fpfn < lpfn &&
+ (!place->lpfn || place->lpfn > fpfn))
+ return true;
+ }
- return atomic64_read(&mgr->usage);
+ return false;
}
/**
- * amdgpu_vram_mgr_vis_usage - how many bytes are used in the visible part
+ * amdgpu_vram_mgr_compatible - test each drm buddy block for compatibility
*
* @man: TTM memory type manager
+ * @res: The resource to test
+ * @place: The place to test against
+ * @size: Size of the new allocation
*
- * Returns how many bytes are used in the visible part of VRAM
+ * Test each drm buddy block for placement compatibility.
*/
-uint64_t amdgpu_vram_mgr_vis_usage(struct ttm_resource_manager *man)
+static bool amdgpu_vram_mgr_compatible(struct ttm_resource_manager *man,
+ struct ttm_resource *res,
+ const struct ttm_place *place,
+ size_t size)
{
- struct amdgpu_vram_mgr *mgr = to_vram_mgr(man);
+ struct amdgpu_vram_mgr_resource *mgr = to_amdgpu_vram_mgr_resource(res);
+ struct drm_buddy_block *block;
+
+ /* Check each drm buddy block individually */
+ list_for_each_entry(block, &mgr->blocks, link) {
+ unsigned long fpfn =
+ amdgpu_vram_mgr_block_start(block) >> PAGE_SHIFT;
+ unsigned long lpfn = fpfn +
+ (amdgpu_vram_mgr_block_size(block) >> PAGE_SHIFT);
+
+ if (fpfn < place->fpfn ||
+ (place->lpfn && lpfn > place->lpfn))
+ return false;
+ }
- return atomic64_read(&mgr->vis_usage);
+ return true;
}
/**
@@ -702,18 +877,101 @@ static void amdgpu_vram_mgr_debug(struct ttm_resource_manager *man,
struct drm_printer *printer)
{
struct amdgpu_vram_mgr *mgr = to_vram_mgr(man);
+ struct drm_buddy *mm = &mgr->mm;
+ struct amdgpu_vram_reservation *rsv;
+
+ drm_printf(printer, " vis usage:%llu\n",
+ amdgpu_vram_mgr_vis_usage(mgr));
- spin_lock(&mgr->lock);
- drm_mm_print(&mgr->mm, printer);
- spin_unlock(&mgr->lock);
+ mutex_lock(&mgr->lock);
+ drm_printf(printer, "default_page_size: %lluKiB\n",
+ mgr->default_page_size >> 10);
- drm_printf(printer, "man size:%llu pages, ram usage:%lluMB, vis usage:%lluMB\n",
- man->size, amdgpu_vram_mgr_usage(man) >> 20,
- amdgpu_vram_mgr_vis_usage(man) >> 20);
+ drm_buddy_print(mm, printer);
+
+ drm_printf(printer, "reserved:\n");
+ list_for_each_entry(rsv, &mgr->reserved_pages, blocks)
+ drm_printf(printer, "%#018llx-%#018llx: %llu\n",
+ rsv->start, rsv->start + rsv->size, rsv->size);
+ mutex_unlock(&mgr->lock);
}
static const struct ttm_resource_manager_func amdgpu_vram_mgr_func = {
.alloc = amdgpu_vram_mgr_new,
.free = amdgpu_vram_mgr_del,
+ .intersects = amdgpu_vram_mgr_intersects,
+ .compatible = amdgpu_vram_mgr_compatible,
.debug = amdgpu_vram_mgr_debug
};
+
+/**
+ * amdgpu_vram_mgr_init - init VRAM manager and DRM MM
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * Allocate and initialize the VRAM manager.
+ */
+int amdgpu_vram_mgr_init(struct amdgpu_device *adev)
+{
+ struct amdgpu_vram_mgr *mgr = &adev->mman.vram_mgr;
+ struct ttm_resource_manager *man = &mgr->manager;
+ int err;
+
+ man->cg = drmm_cgroup_register_region(adev_to_drm(adev), "vram", adev->gmc.real_vram_size);
+ if (IS_ERR(man->cg))
+ return PTR_ERR(man->cg);
+ ttm_resource_manager_init(man, &adev->mman.bdev,
+ adev->gmc.real_vram_size);
+
+ mutex_init(&mgr->lock);
+ INIT_LIST_HEAD(&mgr->reservations_pending);
+ INIT_LIST_HEAD(&mgr->reserved_pages);
+ INIT_LIST_HEAD(&mgr->allocated_vres_list);
+ mgr->default_page_size = PAGE_SIZE;
+
+ man->func = &amdgpu_vram_mgr_func;
+ err = drm_buddy_init(&mgr->mm, man->size, PAGE_SIZE);
+ if (err)
+ return err;
+
+ ttm_set_driver_manager(&adev->mman.bdev, TTM_PL_VRAM, &mgr->manager);
+ ttm_resource_manager_set_used(man, true);
+ return 0;
+}
+
+/**
+ * amdgpu_vram_mgr_fini - free and destroy VRAM manager
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * Destroy and free the VRAM manager, returns -EBUSY if ranges are still
+ * allocated inside it.
+ */
+void amdgpu_vram_mgr_fini(struct amdgpu_device *adev)
+{
+ struct amdgpu_vram_mgr *mgr = &adev->mman.vram_mgr;
+ struct ttm_resource_manager *man = &mgr->manager;
+ int ret;
+ struct amdgpu_vram_reservation *rsv, *temp;
+
+ ttm_resource_manager_set_used(man, false);
+
+ ret = ttm_resource_manager_evict_all(&adev->mman.bdev, man);
+ if (ret)
+ return;
+
+ mutex_lock(&mgr->lock);
+ list_for_each_entry_safe(rsv, temp, &mgr->reservations_pending, blocks)
+ kfree(rsv);
+
+ list_for_each_entry_safe(rsv, temp, &mgr->reserved_pages, blocks) {
+ drm_buddy_free_list(&mgr->mm, &rsv->allocated, 0);
+ kfree(rsv);
+ }
+ if (!adev->gmc.is_app_apu)
+ drm_buddy_fini(&mgr->mm);
+ mutex_unlock(&mgr->lock);
+
+ ttm_resource_manager_cleanup(man);
+ ttm_set_driver_manager(&adev->mman.bdev, TTM_PL_VRAM, NULL);
+}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.h
new file mode 100644
index 000000000000..5f5fd9a911c2
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.h
@@ -0,0 +1,92 @@
+/* SPDX-License-Identifier: MIT
+ * Copyright 2021 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#ifndef __AMDGPU_VRAM_MGR_H__
+#define __AMDGPU_VRAM_MGR_H__
+
+#include <drm/drm_buddy.h>
+
+struct amdgpu_vram_mgr {
+ struct ttm_resource_manager manager;
+ struct drm_buddy mm;
+ /* protects access to buffer objects */
+ struct mutex lock;
+ struct list_head reservations_pending;
+ struct list_head reserved_pages;
+ atomic64_t vis_usage;
+ u64 default_page_size;
+ struct list_head allocated_vres_list;
+};
+
+struct amdgpu_vres_task {
+ pid_t pid;
+ char comm[TASK_COMM_LEN];
+};
+
+struct amdgpu_vram_block_info {
+ u64 start;
+ u64 size;
+ struct amdgpu_vres_task task;
+};
+
+struct amdgpu_vram_mgr_resource {
+ struct ttm_resource base;
+ struct list_head blocks;
+ unsigned long flags;
+ struct list_head vres_node;
+ struct amdgpu_vres_task task;
+};
+
+static inline u64 amdgpu_vram_mgr_block_start(struct drm_buddy_block *block)
+{
+ return drm_buddy_block_offset(block);
+}
+
+static inline u64 amdgpu_vram_mgr_block_size(struct drm_buddy_block *block)
+{
+ return (u64)PAGE_SIZE << drm_buddy_block_order(block);
+}
+
+static inline bool amdgpu_vram_mgr_is_cleared(struct drm_buddy_block *block)
+{
+ return drm_buddy_block_is_clear(block);
+}
+
+static inline struct amdgpu_vram_mgr_resource *
+to_amdgpu_vram_mgr_resource(struct ttm_resource *res)
+{
+ return container_of(res, struct amdgpu_vram_mgr_resource, base);
+}
+
+static inline void amdgpu_vram_mgr_set_cleared(struct ttm_resource *res)
+{
+ struct amdgpu_vram_mgr_resource *ares = to_amdgpu_vram_mgr_resource(res);
+
+ WARN_ON(ares->flags & DRM_BUDDY_CLEARED);
+ ares->flags |= DRM_BUDDY_CLEARED;
+}
+
+int amdgpu_vram_mgr_query_address_block_info(struct amdgpu_vram_mgr *mgr,
+ uint64_t address, struct amdgpu_vram_block_info *info);
+
+#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_xcp.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_xcp.c
new file mode 100644
index 000000000000..1083db8cea2e
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_xcp.c
@@ -0,0 +1,1107 @@
+/*
+ * Copyright 2022 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+#include "amdgpu.h"
+#include "amdgpu_xcp.h"
+#include "amdgpu_drv.h"
+
+#include <drm/drm_drv.h>
+#include "../amdxcp/amdgpu_xcp_drv.h"
+
+static void amdgpu_xcp_sysfs_entries_init(struct amdgpu_xcp_mgr *xcp_mgr);
+static void amdgpu_xcp_sysfs_entries_update(struct amdgpu_xcp_mgr *xcp_mgr);
+
+static int __amdgpu_xcp_run(struct amdgpu_xcp_mgr *xcp_mgr,
+ struct amdgpu_xcp_ip *xcp_ip, int xcp_state)
+{
+ int (*run_func)(void *handle, uint32_t inst_mask);
+ int ret = 0;
+
+ if (!xcp_ip || !xcp_ip->valid || !xcp_ip->ip_funcs)
+ return 0;
+
+ run_func = NULL;
+
+ switch (xcp_state) {
+ case AMDGPU_XCP_PREPARE_SUSPEND:
+ run_func = xcp_ip->ip_funcs->prepare_suspend;
+ break;
+ case AMDGPU_XCP_SUSPEND:
+ run_func = xcp_ip->ip_funcs->suspend;
+ break;
+ case AMDGPU_XCP_PREPARE_RESUME:
+ run_func = xcp_ip->ip_funcs->prepare_resume;
+ break;
+ case AMDGPU_XCP_RESUME:
+ run_func = xcp_ip->ip_funcs->resume;
+ break;
+ }
+
+ if (run_func)
+ ret = run_func(xcp_mgr->adev, xcp_ip->inst_mask);
+
+ return ret;
+}
+
+static int amdgpu_xcp_run_transition(struct amdgpu_xcp_mgr *xcp_mgr, int xcp_id,
+ int state)
+{
+ struct amdgpu_xcp_ip *xcp_ip;
+ struct amdgpu_xcp *xcp;
+ int i, ret;
+
+ if (xcp_id >= MAX_XCP || !xcp_mgr->xcp[xcp_id].valid)
+ return -EINVAL;
+
+ xcp = &xcp_mgr->xcp[xcp_id];
+ for (i = 0; i < AMDGPU_XCP_MAX_BLOCKS; ++i) {
+ xcp_ip = &xcp->ip[i];
+ ret = __amdgpu_xcp_run(xcp_mgr, xcp_ip, state);
+ if (ret)
+ break;
+ }
+
+ return ret;
+}
+
+int amdgpu_xcp_prepare_suspend(struct amdgpu_xcp_mgr *xcp_mgr, int xcp_id)
+{
+ return amdgpu_xcp_run_transition(xcp_mgr, xcp_id,
+ AMDGPU_XCP_PREPARE_SUSPEND);
+}
+
+int amdgpu_xcp_suspend(struct amdgpu_xcp_mgr *xcp_mgr, int xcp_id)
+{
+ return amdgpu_xcp_run_transition(xcp_mgr, xcp_id, AMDGPU_XCP_SUSPEND);
+}
+
+int amdgpu_xcp_prepare_resume(struct amdgpu_xcp_mgr *xcp_mgr, int xcp_id)
+{
+ return amdgpu_xcp_run_transition(xcp_mgr, xcp_id,
+ AMDGPU_XCP_PREPARE_RESUME);
+}
+
+int amdgpu_xcp_resume(struct amdgpu_xcp_mgr *xcp_mgr, int xcp_id)
+{
+ return amdgpu_xcp_run_transition(xcp_mgr, xcp_id, AMDGPU_XCP_RESUME);
+}
+
+static void __amdgpu_xcp_add_block(struct amdgpu_xcp_mgr *xcp_mgr, int xcp_id,
+ struct amdgpu_xcp_ip *ip)
+{
+ struct amdgpu_xcp *xcp;
+
+ if (!ip)
+ return;
+
+ xcp = &xcp_mgr->xcp[xcp_id];
+ xcp->ip[ip->ip_id] = *ip;
+ xcp->ip[ip->ip_id].valid = true;
+
+ xcp->valid = true;
+}
+
+static void __amdgpu_xcp_set_unique_id(struct amdgpu_xcp_mgr *xcp_mgr,
+ int xcp_id)
+{
+ struct amdgpu_xcp *xcp = &xcp_mgr->xcp[xcp_id];
+ struct amdgpu_device *adev = xcp_mgr->adev;
+ uint32_t inst_mask;
+ uint64_t uid;
+ int i;
+
+ if (!amdgpu_xcp_get_inst_details(xcp, AMDGPU_XCP_GFX, &inst_mask) &&
+ inst_mask) {
+ i = GET_INST(GC, (ffs(inst_mask) - 1));
+ uid = amdgpu_device_get_uid(xcp_mgr->adev->uid_info,
+ AMDGPU_UID_TYPE_XCD, i);
+ if (uid)
+ xcp->unique_id = uid;
+ }
+}
+
+int amdgpu_xcp_init(struct amdgpu_xcp_mgr *xcp_mgr, int num_xcps, int mode)
+{
+ struct amdgpu_device *adev = xcp_mgr->adev;
+ struct amdgpu_xcp_ip ip;
+ uint8_t mem_id;
+ int i, j, ret;
+
+ if (!num_xcps || num_xcps > MAX_XCP)
+ return -EINVAL;
+
+ xcp_mgr->mode = mode;
+
+ for (i = 0; i < MAX_XCP; ++i)
+ xcp_mgr->xcp[i].valid = false;
+
+ /* This is needed for figuring out memory id of xcp */
+ xcp_mgr->num_xcp_per_mem_partition = num_xcps / xcp_mgr->adev->gmc.num_mem_partitions;
+
+ for (i = 0; i < num_xcps; ++i) {
+ for (j = AMDGPU_XCP_GFXHUB; j < AMDGPU_XCP_MAX_BLOCKS; ++j) {
+ ret = xcp_mgr->funcs->get_ip_details(xcp_mgr, i, j,
+ &ip);
+ if (ret)
+ continue;
+
+ __amdgpu_xcp_add_block(xcp_mgr, i, &ip);
+ }
+
+ xcp_mgr->xcp[i].id = i;
+
+ if (xcp_mgr->funcs->get_xcp_mem_id) {
+ ret = xcp_mgr->funcs->get_xcp_mem_id(
+ xcp_mgr, &xcp_mgr->xcp[i], &mem_id);
+ if (ret)
+ continue;
+ else
+ xcp_mgr->xcp[i].mem_id = mem_id;
+ }
+ __amdgpu_xcp_set_unique_id(xcp_mgr, i);
+ }
+
+ xcp_mgr->num_xcps = num_xcps;
+ amdgpu_xcp_update_partition_sched_list(adev);
+
+ return 0;
+}
+
+static int __amdgpu_xcp_switch_partition_mode(struct amdgpu_xcp_mgr *xcp_mgr,
+ int mode)
+{
+ int ret, curr_mode, num_xcps = 0;
+
+ if (!xcp_mgr->funcs || !xcp_mgr->funcs->switch_partition_mode)
+ return 0;
+
+ mutex_lock(&xcp_mgr->xcp_lock);
+
+ curr_mode = xcp_mgr->mode;
+ /* State set to transient mode */
+ xcp_mgr->mode = AMDGPU_XCP_MODE_TRANS;
+
+ ret = xcp_mgr->funcs->switch_partition_mode(xcp_mgr, mode, &num_xcps);
+
+ if (ret) {
+ /* Failed, get whatever mode it's at now */
+ if (xcp_mgr->funcs->query_partition_mode)
+ xcp_mgr->mode = amdgpu_xcp_query_partition_mode(
+ xcp_mgr, AMDGPU_XCP_FL_LOCKED);
+ else
+ xcp_mgr->mode = curr_mode;
+
+ goto out;
+ }
+ amdgpu_xcp_sysfs_entries_update(xcp_mgr);
+out:
+ mutex_unlock(&xcp_mgr->xcp_lock);
+
+ return ret;
+}
+
+int amdgpu_xcp_switch_partition_mode(struct amdgpu_xcp_mgr *xcp_mgr, int mode)
+{
+ if (!xcp_mgr || mode == AMDGPU_XCP_MODE_NONE)
+ return -EINVAL;
+
+ if (xcp_mgr->mode == mode)
+ return 0;
+
+ return __amdgpu_xcp_switch_partition_mode(xcp_mgr, mode);
+}
+
+int amdgpu_xcp_restore_partition_mode(struct amdgpu_xcp_mgr *xcp_mgr)
+{
+ if (!xcp_mgr || xcp_mgr->mode == AMDGPU_XCP_MODE_NONE)
+ return 0;
+
+ return __amdgpu_xcp_switch_partition_mode(xcp_mgr, xcp_mgr->mode);
+}
+
+static bool __amdgpu_xcp_is_cached_mode_valid(struct amdgpu_xcp_mgr *xcp_mgr)
+{
+ if (!xcp_mgr->funcs || !xcp_mgr->funcs->query_partition_mode)
+ return true;
+
+ if (!amdgpu_sriov_vf(xcp_mgr->adev) &&
+ xcp_mgr->mode == AMDGPU_XCP_MODE_NONE)
+ return true;
+
+ if (xcp_mgr->mode != AMDGPU_XCP_MODE_NONE &&
+ xcp_mgr->mode != AMDGPU_XCP_MODE_TRANS)
+ return true;
+
+ return false;
+}
+
+int amdgpu_xcp_query_partition_mode(struct amdgpu_xcp_mgr *xcp_mgr, u32 flags)
+{
+ int mode;
+
+ if (__amdgpu_xcp_is_cached_mode_valid(xcp_mgr))
+ return xcp_mgr->mode;
+
+ if (!(flags & AMDGPU_XCP_FL_LOCKED))
+ mutex_lock(&xcp_mgr->xcp_lock);
+ mode = xcp_mgr->funcs->query_partition_mode(xcp_mgr);
+
+ /* First time query for VF, set the mode here */
+ if (amdgpu_sriov_vf(xcp_mgr->adev) &&
+ xcp_mgr->mode == AMDGPU_XCP_MODE_NONE)
+ xcp_mgr->mode = mode;
+
+ if (xcp_mgr->mode != AMDGPU_XCP_MODE_TRANS && mode != xcp_mgr->mode)
+ dev_WARN(
+ xcp_mgr->adev->dev,
+ "Cached partition mode %d not matching with device mode %d",
+ xcp_mgr->mode, mode);
+
+ if (!(flags & AMDGPU_XCP_FL_LOCKED))
+ mutex_unlock(&xcp_mgr->xcp_lock);
+
+ return mode;
+}
+
+static int amdgpu_xcp_dev_alloc(struct amdgpu_device *adev)
+{
+ struct drm_device *p_ddev;
+ struct drm_device *ddev;
+ int i, ret;
+
+ ddev = adev_to_drm(adev);
+
+ /* xcp #0 shares drm device setting with adev */
+ adev->xcp_mgr->xcp->ddev = ddev;
+
+ for (i = 1; i < MAX_XCP; i++) {
+ ret = amdgpu_xcp_drm_dev_alloc(&p_ddev);
+ if (ret == -ENOSPC) {
+ dev_warn(adev->dev,
+ "Skip xcp node #%d when out of drm node resource.", i);
+ ret = 0;
+ goto out;
+ } else if (ret) {
+ goto out;
+ }
+
+ /* Redirect all IOCTLs to the primary device */
+ adev->xcp_mgr->xcp[i].rdev = p_ddev->render->dev;
+ adev->xcp_mgr->xcp[i].pdev = p_ddev->primary->dev;
+ adev->xcp_mgr->xcp[i].driver = (struct drm_driver *)p_ddev->driver;
+ adev->xcp_mgr->xcp[i].vma_offset_manager = p_ddev->vma_offset_manager;
+ p_ddev->render->dev = ddev;
+ p_ddev->primary->dev = ddev;
+ p_ddev->vma_offset_manager = ddev->vma_offset_manager;
+ p_ddev->driver = &amdgpu_partition_driver;
+ adev->xcp_mgr->xcp[i].ddev = p_ddev;
+
+ dev_set_drvdata(p_ddev->dev, &adev->xcp_mgr->xcp[i]);
+ }
+ ret = 0;
+out:
+ amdgpu_xcp_sysfs_entries_init(adev->xcp_mgr);
+
+ return ret;
+}
+
+int amdgpu_xcp_mgr_init(struct amdgpu_device *adev, int init_mode,
+ int init_num_xcps,
+ struct amdgpu_xcp_mgr_funcs *xcp_funcs)
+{
+ struct amdgpu_xcp_mgr *xcp_mgr;
+ int i;
+
+ if (!xcp_funcs || !xcp_funcs->get_ip_details)
+ return -EINVAL;
+
+ xcp_mgr = kzalloc(sizeof(*xcp_mgr), GFP_KERNEL);
+
+ if (!xcp_mgr)
+ return -ENOMEM;
+
+ xcp_mgr->adev = adev;
+ xcp_mgr->funcs = xcp_funcs;
+ xcp_mgr->mode = init_mode;
+ mutex_init(&xcp_mgr->xcp_lock);
+
+ if (init_mode != AMDGPU_XCP_MODE_NONE)
+ amdgpu_xcp_init(xcp_mgr, init_num_xcps, init_mode);
+
+ adev->xcp_mgr = xcp_mgr;
+ for (i = 0; i < MAX_XCP; ++i)
+ xcp_mgr->xcp[i].xcp_mgr = xcp_mgr;
+
+ return amdgpu_xcp_dev_alloc(adev);
+}
+
+int amdgpu_xcp_get_partition(struct amdgpu_xcp_mgr *xcp_mgr,
+ enum AMDGPU_XCP_IP_BLOCK ip, int instance)
+{
+ struct amdgpu_xcp *xcp;
+ int i, id_mask = 0;
+
+ if (ip >= AMDGPU_XCP_MAX_BLOCKS)
+ return -EINVAL;
+
+ for (i = 0; i < xcp_mgr->num_xcps; ++i) {
+ xcp = &xcp_mgr->xcp[i];
+ if ((xcp->valid) && (xcp->ip[ip].valid) &&
+ (xcp->ip[ip].inst_mask & BIT(instance)))
+ id_mask |= BIT(i);
+ }
+
+ if (!id_mask)
+ id_mask = -ENXIO;
+
+ return id_mask;
+}
+
+int amdgpu_xcp_get_inst_details(struct amdgpu_xcp *xcp,
+ enum AMDGPU_XCP_IP_BLOCK ip,
+ uint32_t *inst_mask)
+{
+ if (!xcp->valid || !inst_mask || !(xcp->ip[ip].valid))
+ return -EINVAL;
+
+ *inst_mask = xcp->ip[ip].inst_mask;
+
+ return 0;
+}
+
+int amdgpu_xcp_dev_register(struct amdgpu_device *adev,
+ const struct pci_device_id *ent)
+{
+ int i, ret;
+
+ if (!adev->xcp_mgr)
+ return 0;
+
+ for (i = 1; i < MAX_XCP; i++) {
+ if (!adev->xcp_mgr->xcp[i].ddev)
+ break;
+
+ ret = drm_dev_register(adev->xcp_mgr->xcp[i].ddev, ent->driver_data);
+ if (ret)
+ return ret;
+ }
+
+ return 0;
+}
+
+void amdgpu_xcp_dev_unplug(struct amdgpu_device *adev)
+{
+ struct drm_device *p_ddev;
+ int i;
+
+ if (!adev->xcp_mgr)
+ return;
+
+ for (i = 1; i < MAX_XCP; i++) {
+ if (!adev->xcp_mgr->xcp[i].ddev)
+ break;
+
+ p_ddev = adev->xcp_mgr->xcp[i].ddev;
+ drm_dev_unplug(p_ddev);
+ p_ddev->render->dev = adev->xcp_mgr->xcp[i].rdev;
+ p_ddev->primary->dev = adev->xcp_mgr->xcp[i].pdev;
+ p_ddev->driver = adev->xcp_mgr->xcp[i].driver;
+ p_ddev->vma_offset_manager = adev->xcp_mgr->xcp[i].vma_offset_manager;
+ amdgpu_xcp_drm_dev_free(p_ddev);
+ }
+}
+
+int amdgpu_xcp_open_device(struct amdgpu_device *adev,
+ struct amdgpu_fpriv *fpriv,
+ struct drm_file *file_priv)
+{
+ int i;
+
+ if (!adev->xcp_mgr)
+ return 0;
+
+ fpriv->xcp_id = AMDGPU_XCP_NO_PARTITION;
+ for (i = 0; i < MAX_XCP; ++i) {
+ if (!adev->xcp_mgr->xcp[i].ddev)
+ break;
+
+ if (file_priv->minor == adev->xcp_mgr->xcp[i].ddev->render) {
+ if (adev->xcp_mgr->xcp[i].valid == FALSE) {
+ dev_err(adev->dev, "renderD%d partition %d not valid!",
+ file_priv->minor->index, i);
+ return -ENOENT;
+ }
+ dev_dbg(adev->dev, "renderD%d partition %d opened!",
+ file_priv->minor->index, i);
+ fpriv->xcp_id = i;
+ break;
+ }
+ }
+
+ fpriv->vm.mem_id = fpriv->xcp_id == AMDGPU_XCP_NO_PARTITION ? -1 :
+ adev->xcp_mgr->xcp[fpriv->xcp_id].mem_id;
+ return 0;
+}
+
+void amdgpu_xcp_release_sched(struct amdgpu_device *adev,
+ struct amdgpu_ctx_entity *entity)
+{
+ struct drm_gpu_scheduler *sched;
+ struct amdgpu_ring *ring;
+
+ if (!adev->xcp_mgr)
+ return;
+
+ sched = entity->entity.rq->sched;
+ if (drm_sched_wqueue_ready(sched)) {
+ ring = to_amdgpu_ring(entity->entity.rq->sched);
+ atomic_dec(&adev->xcp_mgr->xcp[ring->xcp_id].ref_cnt);
+ }
+}
+
+int amdgpu_xcp_select_scheds(struct amdgpu_device *adev,
+ u32 hw_ip, u32 hw_prio,
+ struct amdgpu_fpriv *fpriv,
+ unsigned int *num_scheds,
+ struct drm_gpu_scheduler ***scheds)
+{
+ u32 sel_xcp_id;
+ int i;
+ struct amdgpu_xcp_mgr *xcp_mgr = adev->xcp_mgr;
+
+ if (fpriv->xcp_id == AMDGPU_XCP_NO_PARTITION) {
+ u32 least_ref_cnt = ~0;
+
+ fpriv->xcp_id = 0;
+ for (i = 0; i < xcp_mgr->num_xcps; i++) {
+ u32 total_ref_cnt;
+
+ total_ref_cnt = atomic_read(&xcp_mgr->xcp[i].ref_cnt);
+ if (total_ref_cnt < least_ref_cnt) {
+ fpriv->xcp_id = i;
+ least_ref_cnt = total_ref_cnt;
+ }
+ }
+ }
+ sel_xcp_id = fpriv->xcp_id;
+
+ if (xcp_mgr->xcp[sel_xcp_id].gpu_sched[hw_ip][hw_prio].num_scheds) {
+ *num_scheds =
+ xcp_mgr->xcp[fpriv->xcp_id].gpu_sched[hw_ip][hw_prio].num_scheds;
+ *scheds =
+ xcp_mgr->xcp[fpriv->xcp_id].gpu_sched[hw_ip][hw_prio].sched;
+ atomic_inc(&adev->xcp_mgr->xcp[sel_xcp_id].ref_cnt);
+ dev_dbg(adev->dev, "Selected partition #%d", sel_xcp_id);
+ } else {
+ dev_err(adev->dev, "Failed to schedule partition #%d.", sel_xcp_id);
+ return -ENOENT;
+ }
+
+ return 0;
+}
+
+static void amdgpu_set_xcp_id(struct amdgpu_device *adev,
+ uint32_t inst_idx,
+ struct amdgpu_ring *ring)
+{
+ int xcp_id;
+ enum AMDGPU_XCP_IP_BLOCK ip_blk;
+ uint32_t inst_mask;
+
+ ring->xcp_id = AMDGPU_XCP_NO_PARTITION;
+ if (ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE)
+ adev->gfx.enforce_isolation[0].xcp_id = ring->xcp_id;
+ if ((adev->xcp_mgr->mode == AMDGPU_XCP_MODE_NONE) ||
+ (ring->funcs->type == AMDGPU_RING_TYPE_CPER))
+ return;
+
+ inst_mask = 1 << inst_idx;
+
+ switch (ring->funcs->type) {
+ case AMDGPU_HW_IP_GFX:
+ case AMDGPU_RING_TYPE_COMPUTE:
+ case AMDGPU_RING_TYPE_KIQ:
+ ip_blk = AMDGPU_XCP_GFX;
+ break;
+ case AMDGPU_RING_TYPE_SDMA:
+ ip_blk = AMDGPU_XCP_SDMA;
+ break;
+ case AMDGPU_RING_TYPE_VCN_ENC:
+ case AMDGPU_RING_TYPE_VCN_JPEG:
+ ip_blk = AMDGPU_XCP_VCN;
+ break;
+ default:
+ dev_err(adev->dev, "Not support ring type %d!", ring->funcs->type);
+ return;
+ }
+
+ for (xcp_id = 0; xcp_id < adev->xcp_mgr->num_xcps; xcp_id++) {
+ if (adev->xcp_mgr->xcp[xcp_id].ip[ip_blk].inst_mask & inst_mask) {
+ ring->xcp_id = xcp_id;
+ dev_dbg(adev->dev, "ring:%s xcp_id :%u", ring->name,
+ ring->xcp_id);
+ if (ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE)
+ adev->gfx.enforce_isolation[xcp_id].xcp_id = xcp_id;
+ break;
+ }
+ }
+}
+
+static void amdgpu_xcp_gpu_sched_update(struct amdgpu_device *adev,
+ struct amdgpu_ring *ring,
+ unsigned int sel_xcp_id)
+{
+ unsigned int *num_gpu_sched;
+
+ num_gpu_sched = &adev->xcp_mgr->xcp[sel_xcp_id]
+ .gpu_sched[ring->funcs->type][ring->hw_prio].num_scheds;
+ adev->xcp_mgr->xcp[sel_xcp_id].gpu_sched[ring->funcs->type][ring->hw_prio]
+ .sched[(*num_gpu_sched)++] = &ring->sched;
+ dev_dbg(adev->dev, "%s :[%d] gpu_sched[%d][%d] = %d",
+ ring->name, sel_xcp_id, ring->funcs->type,
+ ring->hw_prio, *num_gpu_sched);
+}
+
+static int amdgpu_xcp_sched_list_update(struct amdgpu_device *adev)
+{
+ struct amdgpu_ring *ring;
+ int i;
+
+ for (i = 0; i < MAX_XCP; i++) {
+ atomic_set(&adev->xcp_mgr->xcp[i].ref_cnt, 0);
+ memset(adev->xcp_mgr->xcp[i].gpu_sched, 0, sizeof(adev->xcp_mgr->xcp->gpu_sched));
+ }
+
+ if (adev->xcp_mgr->mode == AMDGPU_XCP_MODE_NONE)
+ return 0;
+
+ for (i = 0; i < AMDGPU_MAX_RINGS; i++) {
+ ring = adev->rings[i];
+ if (!ring || !ring->sched.ready || ring->no_scheduler)
+ continue;
+
+ amdgpu_xcp_gpu_sched_update(adev, ring, ring->xcp_id);
+
+ /* VCN may be shared by two partitions under CPX MODE in certain
+ * configs.
+ */
+ if ((ring->funcs->type == AMDGPU_RING_TYPE_VCN_ENC ||
+ ring->funcs->type == AMDGPU_RING_TYPE_VCN_JPEG) &&
+ (adev->xcp_mgr->num_xcps > adev->vcn.num_vcn_inst))
+ amdgpu_xcp_gpu_sched_update(adev, ring, ring->xcp_id + 1);
+ }
+
+ return 0;
+}
+
+int amdgpu_xcp_update_partition_sched_list(struct amdgpu_device *adev)
+{
+ int i;
+
+ for (i = 0; i < adev->num_rings; i++) {
+ struct amdgpu_ring *ring = adev->rings[i];
+
+ if (ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE ||
+ ring->funcs->type == AMDGPU_RING_TYPE_KIQ)
+ amdgpu_set_xcp_id(adev, ring->xcc_id, ring);
+ else
+ amdgpu_set_xcp_id(adev, ring->me, ring);
+ }
+
+ return amdgpu_xcp_sched_list_update(adev);
+}
+
+void amdgpu_xcp_update_supported_modes(struct amdgpu_xcp_mgr *xcp_mgr)
+{
+ struct amdgpu_device *adev = xcp_mgr->adev;
+
+ xcp_mgr->supp_xcp_modes = 0;
+
+ switch (NUM_XCC(adev->gfx.xcc_mask)) {
+ case 8:
+ xcp_mgr->supp_xcp_modes = BIT(AMDGPU_SPX_PARTITION_MODE) |
+ BIT(AMDGPU_DPX_PARTITION_MODE) |
+ BIT(AMDGPU_QPX_PARTITION_MODE) |
+ BIT(AMDGPU_CPX_PARTITION_MODE);
+ break;
+ case 6:
+ xcp_mgr->supp_xcp_modes = BIT(AMDGPU_SPX_PARTITION_MODE) |
+ BIT(AMDGPU_TPX_PARTITION_MODE) |
+ BIT(AMDGPU_CPX_PARTITION_MODE);
+ break;
+ case 4:
+ xcp_mgr->supp_xcp_modes = BIT(AMDGPU_SPX_PARTITION_MODE) |
+ BIT(AMDGPU_DPX_PARTITION_MODE) |
+ BIT(AMDGPU_CPX_PARTITION_MODE);
+ break;
+ case 2:
+ xcp_mgr->supp_xcp_modes = BIT(AMDGPU_SPX_PARTITION_MODE) |
+ BIT(AMDGPU_CPX_PARTITION_MODE);
+ break;
+ case 1:
+ xcp_mgr->supp_xcp_modes = BIT(AMDGPU_SPX_PARTITION_MODE) |
+ BIT(AMDGPU_CPX_PARTITION_MODE);
+ break;
+
+ default:
+ break;
+ }
+}
+
+int amdgpu_xcp_pre_partition_switch(struct amdgpu_xcp_mgr *xcp_mgr, u32 flags)
+{
+ /* TODO:
+ * Stop user queues and threads, and make sure GPU is empty of work.
+ */
+
+ if (flags & AMDGPU_XCP_OPS_KFD)
+ amdgpu_amdkfd_device_fini_sw(xcp_mgr->adev);
+
+ return 0;
+}
+
+int amdgpu_xcp_post_partition_switch(struct amdgpu_xcp_mgr *xcp_mgr, u32 flags)
+{
+ int ret = 0;
+
+ if (flags & AMDGPU_XCP_OPS_KFD) {
+ amdgpu_amdkfd_device_probe(xcp_mgr->adev);
+ amdgpu_amdkfd_device_init(xcp_mgr->adev);
+ /* If KFD init failed, return failure */
+ if (!xcp_mgr->adev->kfd.init_complete)
+ ret = -EIO;
+ }
+
+ return ret;
+}
+
+/*====================== xcp sysfs - configuration ======================*/
+#define XCP_CFG_SYSFS_RES_ATTR_SHOW(_name) \
+ static ssize_t amdgpu_xcp_res_sysfs_##_name##_show( \
+ struct amdgpu_xcp_res_details *xcp_res, char *buf) \
+ { \
+ return sysfs_emit(buf, "%d\n", xcp_res->_name); \
+ }
+
+struct amdgpu_xcp_res_sysfs_attribute {
+ struct attribute attr;
+ ssize_t (*show)(struct amdgpu_xcp_res_details *xcp_res, char *buf);
+};
+
+#define XCP_CFG_SYSFS_RES_ATTR(_name) \
+ struct amdgpu_xcp_res_sysfs_attribute xcp_res_sysfs_attr_##_name = { \
+ .attr = { .name = __stringify(_name), .mode = 0400 }, \
+ .show = amdgpu_xcp_res_sysfs_##_name##_show, \
+ }
+
+XCP_CFG_SYSFS_RES_ATTR_SHOW(num_inst)
+XCP_CFG_SYSFS_RES_ATTR(num_inst);
+XCP_CFG_SYSFS_RES_ATTR_SHOW(num_shared)
+XCP_CFG_SYSFS_RES_ATTR(num_shared);
+
+#define XCP_CFG_SYSFS_RES_ATTR_PTR(_name) xcp_res_sysfs_attr_##_name.attr
+
+static struct attribute *xcp_cfg_res_sysfs_attrs[] = {
+ &XCP_CFG_SYSFS_RES_ATTR_PTR(num_inst),
+ &XCP_CFG_SYSFS_RES_ATTR_PTR(num_shared), NULL
+};
+
+static const char *xcp_desc[] = {
+ [AMDGPU_SPX_PARTITION_MODE] = "SPX",
+ [AMDGPU_DPX_PARTITION_MODE] = "DPX",
+ [AMDGPU_TPX_PARTITION_MODE] = "TPX",
+ [AMDGPU_QPX_PARTITION_MODE] = "QPX",
+ [AMDGPU_CPX_PARTITION_MODE] = "CPX",
+};
+
+static const char *nps_desc[] = {
+ [UNKNOWN_MEMORY_PARTITION_MODE] = "UNKNOWN",
+ [AMDGPU_NPS1_PARTITION_MODE] = "NPS1",
+ [AMDGPU_NPS2_PARTITION_MODE] = "NPS2",
+ [AMDGPU_NPS3_PARTITION_MODE] = "NPS3",
+ [AMDGPU_NPS4_PARTITION_MODE] = "NPS4",
+ [AMDGPU_NPS6_PARTITION_MODE] = "NPS6",
+ [AMDGPU_NPS8_PARTITION_MODE] = "NPS8",
+};
+
+ATTRIBUTE_GROUPS(xcp_cfg_res_sysfs);
+
+#define to_xcp_attr(x) \
+ container_of(x, struct amdgpu_xcp_res_sysfs_attribute, attr)
+#define to_xcp_res(x) container_of(x, struct amdgpu_xcp_res_details, kobj)
+
+static ssize_t xcp_cfg_res_sysfs_attr_show(struct kobject *kobj,
+ struct attribute *attr, char *buf)
+{
+ struct amdgpu_xcp_res_sysfs_attribute *attribute;
+ struct amdgpu_xcp_res_details *xcp_res;
+
+ attribute = to_xcp_attr(attr);
+ xcp_res = to_xcp_res(kobj);
+
+ if (!attribute->show)
+ return -EIO;
+
+ return attribute->show(xcp_res, buf);
+}
+
+static const struct sysfs_ops xcp_cfg_res_sysfs_ops = {
+ .show = xcp_cfg_res_sysfs_attr_show,
+};
+
+static const struct kobj_type xcp_cfg_res_sysfs_ktype = {
+ .sysfs_ops = &xcp_cfg_res_sysfs_ops,
+ .default_groups = xcp_cfg_res_sysfs_groups,
+};
+
+const char *xcp_res_names[] = {
+ [AMDGPU_XCP_RES_XCC] = "xcc",
+ [AMDGPU_XCP_RES_DMA] = "dma",
+ [AMDGPU_XCP_RES_DEC] = "dec",
+ [AMDGPU_XCP_RES_JPEG] = "jpeg",
+};
+
+static int amdgpu_xcp_get_res_info(struct amdgpu_xcp_mgr *xcp_mgr,
+ int mode,
+ struct amdgpu_xcp_cfg *xcp_cfg)
+{
+ if (xcp_mgr->funcs && xcp_mgr->funcs->get_xcp_res_info)
+ return xcp_mgr->funcs->get_xcp_res_info(xcp_mgr, mode, xcp_cfg);
+
+ return -EOPNOTSUPP;
+}
+
+#define to_xcp_cfg(x) container_of(x, struct amdgpu_xcp_cfg, kobj)
+static ssize_t supported_xcp_configs_show(struct kobject *kobj,
+ struct kobj_attribute *attr, char *buf)
+{
+ struct amdgpu_xcp_cfg *xcp_cfg = to_xcp_cfg(kobj);
+ struct amdgpu_xcp_mgr *xcp_mgr = xcp_cfg->xcp_mgr;
+ int size = 0, mode;
+ char *sep = "";
+
+ if (!xcp_mgr || !xcp_mgr->supp_xcp_modes)
+ return sysfs_emit(buf, "Not supported\n");
+
+ for_each_inst(mode, xcp_mgr->supp_xcp_modes) {
+ size += sysfs_emit_at(buf, size, "%s%s", sep, xcp_desc[mode]);
+ sep = ", ";
+ }
+
+ size += sysfs_emit_at(buf, size, "\n");
+
+ return size;
+}
+
+static ssize_t supported_nps_configs_show(struct kobject *kobj,
+ struct kobj_attribute *attr, char *buf)
+{
+ struct amdgpu_xcp_cfg *xcp_cfg = to_xcp_cfg(kobj);
+ int size = 0, mode;
+ char *sep = "";
+
+ if (!xcp_cfg || !xcp_cfg->compatible_nps_modes)
+ return sysfs_emit(buf, "Not supported\n");
+
+ for_each_inst(mode, xcp_cfg->compatible_nps_modes) {
+ size += sysfs_emit_at(buf, size, "%s%s", sep, nps_desc[mode]);
+ sep = ", ";
+ }
+
+ size += sysfs_emit_at(buf, size, "\n");
+
+ return size;
+}
+
+static ssize_t xcp_config_show(struct kobject *kobj,
+ struct kobj_attribute *attr, char *buf)
+{
+ struct amdgpu_xcp_cfg *xcp_cfg = to_xcp_cfg(kobj);
+
+ return sysfs_emit(buf, "%s\n",
+ amdgpu_gfx_compute_mode_desc(xcp_cfg->mode));
+}
+
+static ssize_t xcp_config_store(struct kobject *kobj,
+ struct kobj_attribute *attr,
+ const char *buf, size_t size)
+{
+ struct amdgpu_xcp_cfg *xcp_cfg = to_xcp_cfg(kobj);
+ int mode, r;
+
+ if (!strncasecmp("SPX", buf, strlen("SPX")))
+ mode = AMDGPU_SPX_PARTITION_MODE;
+ else if (!strncasecmp("DPX", buf, strlen("DPX")))
+ mode = AMDGPU_DPX_PARTITION_MODE;
+ else if (!strncasecmp("TPX", buf, strlen("TPX")))
+ mode = AMDGPU_TPX_PARTITION_MODE;
+ else if (!strncasecmp("QPX", buf, strlen("QPX")))
+ mode = AMDGPU_QPX_PARTITION_MODE;
+ else if (!strncasecmp("CPX", buf, strlen("CPX")))
+ mode = AMDGPU_CPX_PARTITION_MODE;
+ else
+ return -EINVAL;
+
+ r = amdgpu_xcp_get_res_info(xcp_cfg->xcp_mgr, mode, xcp_cfg);
+
+ if (r)
+ return r;
+
+ xcp_cfg->mode = mode;
+ return size;
+}
+
+static struct kobj_attribute xcp_cfg_sysfs_mode =
+ __ATTR_RW_MODE(xcp_config, 0644);
+
+static void xcp_cfg_sysfs_release(struct kobject *kobj)
+{
+ struct amdgpu_xcp_cfg *xcp_cfg = to_xcp_cfg(kobj);
+
+ kfree(xcp_cfg);
+}
+
+static const struct kobj_type xcp_cfg_sysfs_ktype = {
+ .release = xcp_cfg_sysfs_release,
+ .sysfs_ops = &kobj_sysfs_ops,
+};
+
+static struct kobj_attribute supp_part_sysfs_mode =
+ __ATTR_RO(supported_xcp_configs);
+
+static struct kobj_attribute supp_nps_sysfs_mode =
+ __ATTR_RO(supported_nps_configs);
+
+static const struct attribute *xcp_attrs[] = {
+ &supp_part_sysfs_mode.attr,
+ &xcp_cfg_sysfs_mode.attr,
+ NULL,
+};
+
+static void amdgpu_xcp_cfg_sysfs_init(struct amdgpu_device *adev)
+{
+ struct amdgpu_xcp_res_details *xcp_res;
+ struct amdgpu_xcp_cfg *xcp_cfg;
+ int i, r, j, rid, mode;
+
+ if (!adev->xcp_mgr)
+ return;
+
+ xcp_cfg = kzalloc(sizeof(*xcp_cfg), GFP_KERNEL);
+ if (!xcp_cfg)
+ return;
+ xcp_cfg->xcp_mgr = adev->xcp_mgr;
+
+ r = kobject_init_and_add(&xcp_cfg->kobj, &xcp_cfg_sysfs_ktype,
+ &adev->dev->kobj, "compute_partition_config");
+ if (r)
+ goto err1;
+
+ r = sysfs_create_files(&xcp_cfg->kobj, xcp_attrs);
+ if (r)
+ goto err1;
+
+ if (adev->gmc.supported_nps_modes != 0) {
+ r = sysfs_create_file(&xcp_cfg->kobj, &supp_nps_sysfs_mode.attr);
+ if (r) {
+ sysfs_remove_files(&xcp_cfg->kobj, xcp_attrs);
+ goto err1;
+ }
+ }
+
+ mode = (xcp_cfg->xcp_mgr->mode ==
+ AMDGPU_UNKNOWN_COMPUTE_PARTITION_MODE) ?
+ AMDGPU_SPX_PARTITION_MODE :
+ xcp_cfg->xcp_mgr->mode;
+ r = amdgpu_xcp_get_res_info(xcp_cfg->xcp_mgr, mode, xcp_cfg);
+ if (r) {
+ sysfs_remove_file(&xcp_cfg->kobj, &supp_nps_sysfs_mode.attr);
+ sysfs_remove_files(&xcp_cfg->kobj, xcp_attrs);
+ goto err1;
+ }
+
+ xcp_cfg->mode = mode;
+ for (i = 0; i < xcp_cfg->num_res; i++) {
+ xcp_res = &xcp_cfg->xcp_res[i];
+ rid = xcp_res->id;
+ r = kobject_init_and_add(&xcp_res->kobj,
+ &xcp_cfg_res_sysfs_ktype,
+ &xcp_cfg->kobj, "%s",
+ xcp_res_names[rid]);
+ if (r)
+ goto err;
+ }
+
+ adev->xcp_mgr->xcp_cfg = xcp_cfg;
+ return;
+err:
+ for (j = 0; j < i; j++) {
+ xcp_res = &xcp_cfg->xcp_res[i];
+ kobject_put(&xcp_res->kobj);
+ }
+
+ sysfs_remove_file(&xcp_cfg->kobj, &supp_nps_sysfs_mode.attr);
+ sysfs_remove_files(&xcp_cfg->kobj, xcp_attrs);
+err1:
+ kobject_put(&xcp_cfg->kobj);
+}
+
+static void amdgpu_xcp_cfg_sysfs_fini(struct amdgpu_device *adev)
+{
+ struct amdgpu_xcp_res_details *xcp_res;
+ struct amdgpu_xcp_cfg *xcp_cfg;
+ int i;
+
+ if (!adev->xcp_mgr || !adev->xcp_mgr->xcp_cfg)
+ return;
+
+ xcp_cfg = adev->xcp_mgr->xcp_cfg;
+ for (i = 0; i < xcp_cfg->num_res; i++) {
+ xcp_res = &xcp_cfg->xcp_res[i];
+ kobject_put(&xcp_res->kobj);
+ }
+
+ sysfs_remove_file(&xcp_cfg->kobj, &supp_nps_sysfs_mode.attr);
+ sysfs_remove_files(&xcp_cfg->kobj, xcp_attrs);
+ kobject_put(&xcp_cfg->kobj);
+}
+
+/*====================== xcp sysfs - data entries ======================*/
+
+#define to_xcp(x) container_of(x, struct amdgpu_xcp, kobj)
+
+static ssize_t xcp_metrics_show(struct kobject *kobj,
+ struct kobj_attribute *attr, char *buf)
+{
+ struct amdgpu_xcp *xcp = to_xcp(kobj);
+ struct amdgpu_xcp_mgr *xcp_mgr;
+ ssize_t size;
+
+ xcp_mgr = xcp->xcp_mgr;
+ size = amdgpu_dpm_get_xcp_metrics(xcp_mgr->adev, xcp->id, NULL);
+ if (size <= 0)
+ return size;
+
+ if (size > PAGE_SIZE)
+ return -ENOSPC;
+
+ return amdgpu_dpm_get_xcp_metrics(xcp_mgr->adev, xcp->id, buf);
+}
+
+static umode_t amdgpu_xcp_attrs_is_visible(struct kobject *kobj,
+ struct attribute *attr, int n)
+{
+ struct amdgpu_xcp *xcp = to_xcp(kobj);
+
+ if (!xcp || !xcp->valid)
+ return 0;
+
+ return attr->mode;
+}
+
+static struct kobj_attribute xcp_sysfs_metrics = __ATTR_RO(xcp_metrics);
+
+static struct attribute *amdgpu_xcp_attrs[] = {
+ &xcp_sysfs_metrics.attr,
+ NULL,
+};
+
+static const struct attribute_group amdgpu_xcp_attrs_group = {
+ .attrs = amdgpu_xcp_attrs,
+ .is_visible = amdgpu_xcp_attrs_is_visible
+};
+
+static const struct kobj_type xcp_sysfs_ktype = {
+ .sysfs_ops = &kobj_sysfs_ops,
+};
+
+static void amdgpu_xcp_sysfs_entries_fini(struct amdgpu_xcp_mgr *xcp_mgr, int n)
+{
+ struct amdgpu_xcp *xcp;
+
+ for (n--; n >= 0; n--) {
+ xcp = &xcp_mgr->xcp[n];
+ if (!xcp->ddev || !xcp->valid)
+ continue;
+ sysfs_remove_group(&xcp->kobj, &amdgpu_xcp_attrs_group);
+ kobject_put(&xcp->kobj);
+ }
+}
+
+static void amdgpu_xcp_sysfs_entries_init(struct amdgpu_xcp_mgr *xcp_mgr)
+{
+ struct amdgpu_xcp *xcp;
+ int i, r;
+
+ for (i = 0; i < MAX_XCP; i++) {
+ /* Redirect all IOCTLs to the primary device */
+ xcp = &xcp_mgr->xcp[i];
+ if (!xcp->ddev)
+ break;
+ r = kobject_init_and_add(&xcp->kobj, &xcp_sysfs_ktype,
+ &xcp->ddev->dev->kobj, "xcp");
+ if (r)
+ goto out;
+
+ r = sysfs_create_group(&xcp->kobj, &amdgpu_xcp_attrs_group);
+ if (r)
+ goto out;
+ }
+
+ return;
+out:
+ kobject_put(&xcp->kobj);
+}
+
+static void amdgpu_xcp_sysfs_entries_update(struct amdgpu_xcp_mgr *xcp_mgr)
+{
+ struct amdgpu_xcp *xcp;
+ int i;
+
+ for (i = 0; i < MAX_XCP; i++) {
+ /* Redirect all IOCTLs to the primary device */
+ xcp = &xcp_mgr->xcp[i];
+ if (!xcp->ddev)
+ continue;
+ sysfs_update_group(&xcp->kobj, &amdgpu_xcp_attrs_group);
+ }
+
+ return;
+}
+
+void amdgpu_xcp_sysfs_init(struct amdgpu_device *adev)
+{
+ if (!adev->xcp_mgr)
+ return;
+
+ amdgpu_xcp_cfg_sysfs_init(adev);
+
+ return;
+}
+
+void amdgpu_xcp_sysfs_fini(struct amdgpu_device *adev)
+{
+ if (!adev->xcp_mgr)
+ return;
+ amdgpu_xcp_sysfs_entries_fini(adev->xcp_mgr, MAX_XCP);
+ amdgpu_xcp_cfg_sysfs_fini(adev);
+}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_xcp.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_xcp.h
new file mode 100644
index 000000000000..1928d9e224fc
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_xcp.h
@@ -0,0 +1,217 @@
+/*
+ * Copyright 2022 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#ifndef AMDGPU_XCP_H
+#define AMDGPU_XCP_H
+
+#include <linux/pci.h>
+#include <linux/xarray.h>
+
+#include "amdgpu_ctx.h"
+
+#define MAX_XCP 8
+
+#define AMDGPU_XCP_MODE_NONE -1
+#define AMDGPU_XCP_MODE_TRANS -2
+
+#define AMDGPU_XCP_FL_NONE 0
+#define AMDGPU_XCP_FL_LOCKED (1 << 0)
+
+#define AMDGPU_XCP_NO_PARTITION (~0)
+
+#define AMDGPU_XCP_OPS_KFD (1 << 0)
+
+struct amdgpu_fpriv;
+
+enum AMDGPU_XCP_IP_BLOCK {
+ AMDGPU_XCP_GFXHUB,
+ AMDGPU_XCP_GFX,
+ AMDGPU_XCP_SDMA,
+ AMDGPU_XCP_VCN,
+ AMDGPU_XCP_MAX_BLOCKS
+};
+
+enum AMDGPU_XCP_STATE {
+ AMDGPU_XCP_PREPARE_SUSPEND,
+ AMDGPU_XCP_SUSPEND,
+ AMDGPU_XCP_PREPARE_RESUME,
+ AMDGPU_XCP_RESUME,
+};
+
+enum amdgpu_xcp_res_id {
+ AMDGPU_XCP_RES_XCC,
+ AMDGPU_XCP_RES_DMA,
+ AMDGPU_XCP_RES_DEC,
+ AMDGPU_XCP_RES_JPEG,
+ AMDGPU_XCP_RES_MAX,
+};
+
+struct amdgpu_xcp_res_details {
+ enum amdgpu_xcp_res_id id;
+ u8 num_inst;
+ u8 num_shared;
+ struct kobject kobj;
+};
+
+struct amdgpu_xcp_cfg {
+ u8 mode;
+ struct amdgpu_xcp_res_details xcp_res[AMDGPU_XCP_RES_MAX];
+ u8 num_res;
+ struct amdgpu_xcp_mgr *xcp_mgr;
+ struct kobject kobj;
+ u16 compatible_nps_modes;
+};
+
+struct amdgpu_xcp_ip_funcs {
+ int (*prepare_suspend)(void *handle, uint32_t inst_mask);
+ int (*suspend)(void *handle, uint32_t inst_mask);
+ int (*prepare_resume)(void *handle, uint32_t inst_mask);
+ int (*resume)(void *handle, uint32_t inst_mask);
+};
+
+struct amdgpu_xcp_ip {
+ struct amdgpu_xcp_ip_funcs *ip_funcs;
+ uint32_t inst_mask;
+
+ enum AMDGPU_XCP_IP_BLOCK ip_id;
+ bool valid;
+};
+
+struct amdgpu_xcp {
+ struct amdgpu_xcp_ip ip[AMDGPU_XCP_MAX_BLOCKS];
+
+ uint8_t id;
+ uint8_t mem_id;
+ bool valid;
+ atomic_t ref_cnt;
+ struct drm_device *ddev;
+ struct drm_device *rdev;
+ struct drm_device *pdev;
+ struct drm_driver *driver;
+ struct drm_vma_offset_manager *vma_offset_manager;
+ struct amdgpu_sched gpu_sched[AMDGPU_HW_IP_NUM][AMDGPU_RING_PRIO_MAX];
+ struct amdgpu_xcp_mgr *xcp_mgr;
+ struct kobject kobj;
+ uint64_t unique_id;
+};
+
+struct amdgpu_xcp_mgr {
+ struct amdgpu_device *adev;
+ struct mutex xcp_lock;
+ struct amdgpu_xcp_mgr_funcs *funcs;
+
+ struct amdgpu_xcp xcp[MAX_XCP];
+ uint8_t num_xcps;
+ int8_t mode;
+
+ /* Used to determine KFD memory size limits per XCP */
+ unsigned int num_xcp_per_mem_partition;
+ struct amdgpu_xcp_cfg *xcp_cfg;
+ uint32_t supp_xcp_modes;
+ uint32_t avail_xcp_modes;
+};
+
+struct amdgpu_xcp_mgr_funcs {
+ int (*switch_partition_mode)(struct amdgpu_xcp_mgr *xcp_mgr, int mode,
+ int *num_xcps);
+ int (*query_partition_mode)(struct amdgpu_xcp_mgr *xcp_mgr);
+ int (*get_ip_details)(struct amdgpu_xcp_mgr *xcp_mgr, int xcp_id,
+ enum AMDGPU_XCP_IP_BLOCK ip_id,
+ struct amdgpu_xcp_ip *ip);
+ int (*get_xcp_mem_id)(struct amdgpu_xcp_mgr *xcp_mgr,
+ struct amdgpu_xcp *xcp, uint8_t *mem_id);
+ int (*get_xcp_res_info)(struct amdgpu_xcp_mgr *xcp_mgr,
+ int mode,
+ struct amdgpu_xcp_cfg *xcp_cfg);
+ int (*prepare_suspend)(struct amdgpu_xcp_mgr *xcp_mgr, int xcp_id);
+ int (*suspend)(struct amdgpu_xcp_mgr *xcp_mgr, int xcp_id);
+ int (*prepare_resume)(struct amdgpu_xcp_mgr *xcp_mgr, int xcp_id);
+ int (*resume)(struct amdgpu_xcp_mgr *xcp_mgr, int xcp_id);
+};
+
+int amdgpu_xcp_prepare_suspend(struct amdgpu_xcp_mgr *xcp_mgr, int xcp_id);
+int amdgpu_xcp_suspend(struct amdgpu_xcp_mgr *xcp_mgr, int xcp_id);
+int amdgpu_xcp_prepare_resume(struct amdgpu_xcp_mgr *xcp_mgr, int xcp_id);
+int amdgpu_xcp_resume(struct amdgpu_xcp_mgr *xcp_mgr, int xcp_id);
+
+int amdgpu_xcp_mgr_init(struct amdgpu_device *adev, int init_mode,
+ int init_xcps, struct amdgpu_xcp_mgr_funcs *xcp_funcs);
+int amdgpu_xcp_init(struct amdgpu_xcp_mgr *xcp_mgr, int num_xcps, int mode);
+int amdgpu_xcp_query_partition_mode(struct amdgpu_xcp_mgr *xcp_mgr, u32 flags);
+int amdgpu_xcp_switch_partition_mode(struct amdgpu_xcp_mgr *xcp_mgr, int mode);
+int amdgpu_xcp_restore_partition_mode(struct amdgpu_xcp_mgr *xcp_mgr);
+int amdgpu_xcp_get_partition(struct amdgpu_xcp_mgr *xcp_mgr,
+ enum AMDGPU_XCP_IP_BLOCK ip, int instance);
+
+int amdgpu_xcp_get_inst_details(struct amdgpu_xcp *xcp,
+ enum AMDGPU_XCP_IP_BLOCK ip,
+ uint32_t *inst_mask);
+
+int amdgpu_xcp_dev_register(struct amdgpu_device *adev,
+ const struct pci_device_id *ent);
+void amdgpu_xcp_dev_unplug(struct amdgpu_device *adev);
+int amdgpu_xcp_open_device(struct amdgpu_device *adev,
+ struct amdgpu_fpriv *fpriv,
+ struct drm_file *file_priv);
+void amdgpu_xcp_release_sched(struct amdgpu_device *adev,
+ struct amdgpu_ctx_entity *entity);
+int amdgpu_xcp_select_scheds(struct amdgpu_device *adev,
+ u32 hw_ip, u32 hw_prio,
+ struct amdgpu_fpriv *fpriv,
+ unsigned int *num_scheds,
+ struct drm_gpu_scheduler ***scheds);
+void amdgpu_xcp_update_supported_modes(struct amdgpu_xcp_mgr *xcp_mgr);
+int amdgpu_xcp_update_partition_sched_list(struct amdgpu_device *adev);
+int amdgpu_xcp_pre_partition_switch(struct amdgpu_xcp_mgr *xcp_mgr, u32 flags);
+int amdgpu_xcp_post_partition_switch(struct amdgpu_xcp_mgr *xcp_mgr, u32 flags);
+void amdgpu_xcp_sysfs_init(struct amdgpu_device *adev);
+void amdgpu_xcp_sysfs_fini(struct amdgpu_device *adev);
+
+static inline int amdgpu_xcp_get_num_xcp(struct amdgpu_xcp_mgr *xcp_mgr)
+{
+ if (!xcp_mgr)
+ return 1;
+ else
+ return xcp_mgr->num_xcps;
+}
+
+static inline struct amdgpu_xcp *
+amdgpu_get_next_xcp(struct amdgpu_xcp_mgr *xcp_mgr, int *from)
+{
+ if (!xcp_mgr)
+ return NULL;
+
+ while (*from < MAX_XCP) {
+ if (xcp_mgr->xcp[*from].valid)
+ return &xcp_mgr->xcp[*from];
+ ++(*from);
+ }
+
+ return NULL;
+}
+
+#define for_each_xcp(xcp_mgr, xcp, i) \
+ for (i = 0, xcp = amdgpu_get_next_xcp(xcp_mgr, &i); xcp; \
+ ++i, xcp = amdgpu_get_next_xcp(xcp_mgr, &i))
+
+#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c
index 659b385b27b5..aad530c46a9f 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c
@@ -24,15 +24,27 @@
#include <linux/list.h>
#include "amdgpu.h"
#include "amdgpu_xgmi.h"
-#include "amdgpu_smu.h"
#include "amdgpu_ras.h"
#include "soc15.h"
#include "df/df_3_6_offset.h"
#include "xgmi/xgmi_4_0_0_smn.h"
#include "xgmi/xgmi_4_0_0_sh_mask.h"
+#include "xgmi/xgmi_6_1_0_sh_mask.h"
#include "wafl/wafl2_4_0_0_smn.h"
#include "wafl/wafl2_4_0_0_sh_mask.h"
+#include "amdgpu_reset.h"
+
+#define smnPCS_XGMI3X16_PCS_ERROR_STATUS 0x11a0020c
+#define smnPCS_XGMI3X16_PCS_ERROR_NONCORRECTABLE_MASK 0x11a00218
+#define smnPCS_GOPX1_PCS_ERROR_STATUS 0x12200210
+#define smnPCS_GOPX1_PCS_ERROR_NONCORRECTABLE_MASK 0x12200218
+
+#define XGMI_STATE_DISABLE 0xD1
+#define XGMI_STATE_LS0 0x81
+#define XGMI_LINK_ACTIVE 1
+#define XGMI_LINK_INACTIVE 0
+
static DEFINE_MUTEX(xgmi_mutex);
#define AMDGPU_MAX_XGMI_DEVICE_PER_HIVE 4
@@ -64,6 +76,85 @@ static const int wafl_pcs_err_status_reg_arct[] = {
smnPCS_GOPX1_0_PCS_GOPX1_PCS_ERROR_STATUS + 0x100000,
};
+static const int xgmi3x16_pcs_err_status_reg_aldebaran[] = {
+ smnPCS_XGMI3X16_PCS_ERROR_STATUS,
+ smnPCS_XGMI3X16_PCS_ERROR_STATUS + 0x100000,
+ smnPCS_XGMI3X16_PCS_ERROR_STATUS + 0x200000,
+ smnPCS_XGMI3X16_PCS_ERROR_STATUS + 0x300000,
+ smnPCS_XGMI3X16_PCS_ERROR_STATUS + 0x400000,
+ smnPCS_XGMI3X16_PCS_ERROR_STATUS + 0x500000,
+ smnPCS_XGMI3X16_PCS_ERROR_STATUS + 0x600000,
+ smnPCS_XGMI3X16_PCS_ERROR_STATUS + 0x700000
+};
+
+static const int xgmi3x16_pcs_err_noncorrectable_mask_reg_aldebaran[] = {
+ smnPCS_XGMI3X16_PCS_ERROR_NONCORRECTABLE_MASK,
+ smnPCS_XGMI3X16_PCS_ERROR_NONCORRECTABLE_MASK + 0x100000,
+ smnPCS_XGMI3X16_PCS_ERROR_NONCORRECTABLE_MASK + 0x200000,
+ smnPCS_XGMI3X16_PCS_ERROR_NONCORRECTABLE_MASK + 0x300000,
+ smnPCS_XGMI3X16_PCS_ERROR_NONCORRECTABLE_MASK + 0x400000,
+ smnPCS_XGMI3X16_PCS_ERROR_NONCORRECTABLE_MASK + 0x500000,
+ smnPCS_XGMI3X16_PCS_ERROR_NONCORRECTABLE_MASK + 0x600000,
+ smnPCS_XGMI3X16_PCS_ERROR_NONCORRECTABLE_MASK + 0x700000
+};
+
+static const int walf_pcs_err_status_reg_aldebaran[] = {
+ smnPCS_GOPX1_PCS_ERROR_STATUS,
+ smnPCS_GOPX1_PCS_ERROR_STATUS + 0x100000
+};
+
+static const int walf_pcs_err_noncorrectable_mask_reg_aldebaran[] = {
+ smnPCS_GOPX1_PCS_ERROR_NONCORRECTABLE_MASK,
+ smnPCS_GOPX1_PCS_ERROR_NONCORRECTABLE_MASK + 0x100000
+};
+
+static const int xgmi3x16_pcs_err_status_reg_v6_4[] = {
+ smnPCS_XGMI3X16_PCS_ERROR_STATUS,
+ smnPCS_XGMI3X16_PCS_ERROR_STATUS + 0x100000
+};
+
+static const int xgmi3x16_pcs_err_noncorrectable_mask_reg_v6_4[] = {
+ smnPCS_XGMI3X16_PCS_ERROR_NONCORRECTABLE_MASK,
+ smnPCS_XGMI3X16_PCS_ERROR_NONCORRECTABLE_MASK + 0x100000
+};
+
+static const u64 xgmi_v6_4_0_mca_base_array[] = {
+ 0x11a09200,
+ 0x11b09200,
+};
+
+static const char *xgmi_v6_4_0_ras_error_code_ext[32] = {
+ [0x00] = "XGMI PCS DataLossErr",
+ [0x01] = "XGMI PCS TrainingErr",
+ [0x02] = "XGMI PCS FlowCtrlAckErr",
+ [0x03] = "XGMI PCS RxFifoUnderflowErr",
+ [0x04] = "XGMI PCS RxFifoOverflowErr",
+ [0x05] = "XGMI PCS CRCErr",
+ [0x06] = "XGMI PCS BERExceededErr",
+ [0x07] = "XGMI PCS TxMetaDataErr",
+ [0x08] = "XGMI PCS ReplayBufParityErr",
+ [0x09] = "XGMI PCS DataParityErr",
+ [0x0a] = "XGMI PCS ReplayFifoOverflowErr",
+ [0x0b] = "XGMI PCS ReplayFifoUnderflowErr",
+ [0x0c] = "XGMI PCS ElasticFifoOverflowErr",
+ [0x0d] = "XGMI PCS DeskewErr",
+ [0x0e] = "XGMI PCS FlowCtrlCRCErr",
+ [0x0f] = "XGMI PCS DataStartupLimitErr",
+ [0x10] = "XGMI PCS FCInitTimeoutErr",
+ [0x11] = "XGMI PCS RecoveryTimeoutErr",
+ [0x12] = "XGMI PCS ReadySerialTimeoutErr",
+ [0x13] = "XGMI PCS ReadySerialAttemptErr",
+ [0x14] = "XGMI PCS RecoveryAttemptErr",
+ [0x15] = "XGMI PCS RecoveryRelockAttemptErr",
+ [0x16] = "XGMI PCS ReplayAttemptErr",
+ [0x17] = "XGMI PCS SyncHdrErr",
+ [0x18] = "XGMI PCS TxReplayTimeoutErr",
+ [0x19] = "XGMI PCS RxReplayTimeoutErr",
+ [0x1a] = "XGMI PCS LinkSubTxTimeoutErr",
+ [0x1b] = "XGMI PCS LinkSubRxTimeoutErr",
+ [0x1c] = "XGMI PCS RxCMDPktErr",
+};
+
static const struct amdgpu_pcs_ras_field xgmi_pcs_ras_fields[] = {
{"XGMI PCS DataLossErr",
SOC15_REG_FIELD(XGMI0_PCS_GOPX16_PCS_ERROR_STATUS, DataLossErr)},
@@ -142,6 +233,143 @@ static const struct amdgpu_pcs_ras_field wafl_pcs_ras_fields[] = {
SOC15_REG_FIELD(PCS_GOPX1_0_PCS_GOPX1_PCS_ERROR_STATUS, RecoveryRelockAttemptErr)},
};
+static const struct amdgpu_pcs_ras_field xgmi3x16_pcs_ras_fields[] = {
+ {"XGMI3X16 PCS DataLossErr",
+ SOC15_REG_FIELD(PCS_XGMI3X16_PCS_ERROR_STATUS, DataLossErr)},
+ {"XGMI3X16 PCS TrainingErr",
+ SOC15_REG_FIELD(PCS_XGMI3X16_PCS_ERROR_STATUS, TrainingErr)},
+ {"XGMI3X16 PCS FlowCtrlAckErr",
+ SOC15_REG_FIELD(PCS_XGMI3X16_PCS_ERROR_STATUS, FlowCtrlAckErr)},
+ {"XGMI3X16 PCS RxFifoUnderflowErr",
+ SOC15_REG_FIELD(PCS_XGMI3X16_PCS_ERROR_STATUS, RxFifoUnderflowErr)},
+ {"XGMI3X16 PCS RxFifoOverflowErr",
+ SOC15_REG_FIELD(PCS_XGMI3X16_PCS_ERROR_STATUS, RxFifoOverflowErr)},
+ {"XGMI3X16 PCS CRCErr",
+ SOC15_REG_FIELD(PCS_XGMI3X16_PCS_ERROR_STATUS, CRCErr)},
+ {"XGMI3X16 PCS BERExceededErr",
+ SOC15_REG_FIELD(PCS_XGMI3X16_PCS_ERROR_STATUS, BERExceededErr)},
+ {"XGMI3X16 PCS TxVcidDataErr",
+ SOC15_REG_FIELD(PCS_XGMI3X16_PCS_ERROR_STATUS, TxVcidDataErr)},
+ {"XGMI3X16 PCS ReplayBufParityErr",
+ SOC15_REG_FIELD(PCS_XGMI3X16_PCS_ERROR_STATUS, ReplayBufParityErr)},
+ {"XGMI3X16 PCS DataParityErr",
+ SOC15_REG_FIELD(PCS_XGMI3X16_PCS_ERROR_STATUS, DataParityErr)},
+ {"XGMI3X16 PCS ReplayFifoOverflowErr",
+ SOC15_REG_FIELD(PCS_XGMI3X16_PCS_ERROR_STATUS, ReplayFifoOverflowErr)},
+ {"XGMI3X16 PCS ReplayFifoUnderflowErr",
+ SOC15_REG_FIELD(PCS_XGMI3X16_PCS_ERROR_STATUS, ReplayFifoUnderflowErr)},
+ {"XGMI3X16 PCS ElasticFifoOverflowErr",
+ SOC15_REG_FIELD(PCS_XGMI3X16_PCS_ERROR_STATUS, ElasticFifoOverflowErr)},
+ {"XGMI3X16 PCS DeskewErr",
+ SOC15_REG_FIELD(PCS_XGMI3X16_PCS_ERROR_STATUS, DeskewErr)},
+ {"XGMI3X16 PCS FlowCtrlCRCErr",
+ SOC15_REG_FIELD(PCS_XGMI3X16_PCS_ERROR_STATUS, FlowCtrlCRCErr)},
+ {"XGMI3X16 PCS DataStartupLimitErr",
+ SOC15_REG_FIELD(PCS_XGMI3X16_PCS_ERROR_STATUS, DataStartupLimitErr)},
+ {"XGMI3X16 PCS FCInitTimeoutErr",
+ SOC15_REG_FIELD(PCS_XGMI3X16_PCS_ERROR_STATUS, FCInitTimeoutErr)},
+ {"XGMI3X16 PCS RecoveryTimeoutErr",
+ SOC15_REG_FIELD(PCS_XGMI3X16_PCS_ERROR_STATUS, RecoveryTimeoutErr)},
+ {"XGMI3X16 PCS ReadySerialTimeoutErr",
+ SOC15_REG_FIELD(PCS_XGMI3X16_PCS_ERROR_STATUS, ReadySerialTimeoutErr)},
+ {"XGMI3X16 PCS ReadySerialAttemptErr",
+ SOC15_REG_FIELD(PCS_XGMI3X16_PCS_ERROR_STATUS, ReadySerialAttemptErr)},
+ {"XGMI3X16 PCS RecoveryAttemptErr",
+ SOC15_REG_FIELD(PCS_XGMI3X16_PCS_ERROR_STATUS, RecoveryAttemptErr)},
+ {"XGMI3X16 PCS RecoveryRelockAttemptErr",
+ SOC15_REG_FIELD(PCS_XGMI3X16_PCS_ERROR_STATUS, RecoveryRelockAttemptErr)},
+ {"XGMI3X16 PCS ReplayAttemptErr",
+ SOC15_REG_FIELD(PCS_XGMI3X16_PCS_ERROR_STATUS, ReplayAttemptErr)},
+ {"XGMI3X16 PCS SyncHdrErr",
+ SOC15_REG_FIELD(PCS_XGMI3X16_PCS_ERROR_STATUS, SyncHdrErr)},
+ {"XGMI3X16 PCS TxReplayTimeoutErr",
+ SOC15_REG_FIELD(PCS_XGMI3X16_PCS_ERROR_STATUS, TxReplayTimeoutErr)},
+ {"XGMI3X16 PCS RxReplayTimeoutErr",
+ SOC15_REG_FIELD(PCS_XGMI3X16_PCS_ERROR_STATUS, RxReplayTimeoutErr)},
+ {"XGMI3X16 PCS LinkSubTxTimeoutErr",
+ SOC15_REG_FIELD(PCS_XGMI3X16_PCS_ERROR_STATUS, LinkSubTxTimeoutErr)},
+ {"XGMI3X16 PCS LinkSubRxTimeoutErr",
+ SOC15_REG_FIELD(PCS_XGMI3X16_PCS_ERROR_STATUS, LinkSubRxTimeoutErr)},
+ {"XGMI3X16 PCS RxCMDPktErr",
+ SOC15_REG_FIELD(PCS_XGMI3X16_PCS_ERROR_STATUS, RxCMDPktErr)},
+};
+
+int amdgpu_xgmi_get_ext_link(struct amdgpu_device *adev, int link_num)
+{
+ int link_map_6_4_x[8] = { 0, 3, 1, 2, 7, 6, 4, 5 };
+
+ if (adev->gmc.xgmi.num_physical_nodes <= 1)
+ return -EINVAL;
+
+ switch (amdgpu_ip_version(adev, XGMI_HWIP, 0)) {
+ case IP_VERSION(6, 4, 0):
+ case IP_VERSION(6, 4, 1):
+ if (link_num < ARRAY_SIZE(link_map_6_4_x))
+ return link_map_6_4_x[link_num];
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ return -EINVAL;
+}
+
+static u32 xgmi_v6_4_get_link_status(struct amdgpu_device *adev, int global_link_num)
+{
+ const u32 smn_xgmi_6_4_pcs_state_hist1[2] = { 0x11a00070, 0x11b00070 };
+ const u32 smn_xgmi_6_4_1_pcs_state_hist1[2] = { 0x12100070,
+ 0x11b00070 };
+ u32 i, n;
+ u64 addr;
+
+ switch (amdgpu_ip_version(adev, XGMI_HWIP, 0)) {
+ case IP_VERSION(6, 4, 0):
+ n = ARRAY_SIZE(smn_xgmi_6_4_pcs_state_hist1);
+ addr = smn_xgmi_6_4_pcs_state_hist1[global_link_num % n];
+ break;
+ case IP_VERSION(6, 4, 1):
+ n = ARRAY_SIZE(smn_xgmi_6_4_1_pcs_state_hist1);
+ addr = smn_xgmi_6_4_1_pcs_state_hist1[global_link_num % n];
+ break;
+ default:
+ return U32_MAX;
+ }
+
+ i = global_link_num / n;
+
+ if (!(adev->aid_mask & BIT(i)))
+ return U32_MAX;
+
+ addr += adev->asic_funcs->encode_ext_smn_addressing(i);
+
+ return RREG32_PCIE_EXT(addr);
+}
+
+int amdgpu_get_xgmi_link_status(struct amdgpu_device *adev, int global_link_num)
+{
+ u32 xgmi_state_reg_val;
+
+ if (adev->gmc.xgmi.num_physical_nodes <= 1)
+ return -EINVAL;
+
+ switch (amdgpu_ip_version(adev, XGMI_HWIP, 0)) {
+ case IP_VERSION(6, 4, 0):
+ case IP_VERSION(6, 4, 1):
+ xgmi_state_reg_val = xgmi_v6_4_get_link_status(adev, global_link_num);
+ break;
+ default:
+ return -EOPNOTSUPP;
+ }
+
+ if ((xgmi_state_reg_val & 0xFF) == XGMI_STATE_DISABLE)
+ return -ENOLINK;
+
+ if ((xgmi_state_reg_val & 0xFF) == XGMI_STATE_LS0)
+ return XGMI_LINK_ACTIVE;
+
+ return XGMI_LINK_INACTIVE;
+}
+
/**
* DOC: AMDGPU XGMI Support
*
@@ -178,6 +406,7 @@ static struct attribute *amdgpu_xgmi_hive_attrs[] = {
&amdgpu_xgmi_hive_id,
NULL
};
+ATTRIBUTE_GROUPS(amdgpu_xgmi_hive);
static ssize_t amdgpu_xgmi_show_attrs(struct kobject *kobj,
struct attribute *attr, char *buf)
@@ -196,6 +425,9 @@ static void amdgpu_xgmi_hive_release(struct kobject *kobj)
struct amdgpu_hive_info *hive = container_of(
kobj, struct amdgpu_hive_info, kobj);
+ amdgpu_reset_put_reset_domain(hive->reset_domain);
+ hive->reset_domain = NULL;
+
mutex_destroy(&hive->hive_lock);
kfree(hive);
}
@@ -204,10 +436,10 @@ static const struct sysfs_ops amdgpu_xgmi_hive_ops = {
.show = amdgpu_xgmi_show_attrs,
};
-struct kobj_type amdgpu_xgmi_hive_type = {
+static const struct kobj_type amdgpu_xgmi_hive_type = {
.release = amdgpu_xgmi_hive_release,
.sysfs_ops = &amdgpu_xgmi_hive_ops,
- .default_attrs = amdgpu_xgmi_hive_attrs,
+ .default_groups = amdgpu_xgmi_hive_groups,
};
static ssize_t amdgpu_xgmi_show_device_id(struct device *dev,
@@ -217,10 +449,86 @@ static ssize_t amdgpu_xgmi_show_device_id(struct device *dev,
struct drm_device *ddev = dev_get_drvdata(dev);
struct amdgpu_device *adev = drm_to_adev(ddev);
- return snprintf(buf, PAGE_SIZE, "%llu\n", adev->gmc.xgmi.node_id);
+ return sysfs_emit(buf, "%llu\n", adev->gmc.xgmi.node_id);
}
+static ssize_t amdgpu_xgmi_show_physical_id(struct device *dev,
+ struct device_attribute *attr,
+ char *buf)
+{
+ struct drm_device *ddev = dev_get_drvdata(dev);
+ struct amdgpu_device *adev = drm_to_adev(ddev);
+
+ return sysfs_emit(buf, "%u\n", adev->gmc.xgmi.physical_node_id);
+
+}
+
+static ssize_t amdgpu_xgmi_show_num_hops(struct device *dev,
+ struct device_attribute *attr,
+ char *buf)
+{
+ struct drm_device *ddev = dev_get_drvdata(dev);
+ struct amdgpu_device *adev = drm_to_adev(ddev);
+ struct psp_xgmi_topology_info *top = &adev->psp.xgmi_context.top_info;
+ int i;
+
+ for (i = 0; i < top->num_nodes; i++)
+ sprintf(buf + 3 * i, "%02x ", top->nodes[i].num_hops);
+
+ return sysfs_emit(buf, "%s\n", buf);
+}
+
+static ssize_t amdgpu_xgmi_show_num_links(struct device *dev,
+ struct device_attribute *attr,
+ char *buf)
+{
+ struct drm_device *ddev = dev_get_drvdata(dev);
+ struct amdgpu_device *adev = drm_to_adev(ddev);
+ struct psp_xgmi_topology_info *top = &adev->psp.xgmi_context.top_info;
+ int i;
+
+ for (i = 0; i < top->num_nodes; i++)
+ sprintf(buf + 3 * i, "%02x ", top->nodes[i].num_links);
+
+ return sysfs_emit(buf, "%s\n", buf);
+}
+
+static ssize_t amdgpu_xgmi_show_connected_port_num(struct device *dev,
+ struct device_attribute *attr,
+ char *buf)
+{
+ struct drm_device *ddev = dev_get_drvdata(dev);
+ struct amdgpu_device *adev = drm_to_adev(ddev);
+ struct psp_xgmi_topology_info *top = &adev->psp.xgmi_context.top_info;
+ int i, j, size = 0;
+ int current_node;
+ /*
+ * get the node id in the sysfs for the current socket and show
+ * it in the port num info output in the sysfs for easy reading.
+ * it is NOT the one retrieved from xgmi ta.
+ */
+ for (i = 0; i < top->num_nodes; i++) {
+ if (top->nodes[i].node_id == adev->gmc.xgmi.node_id) {
+ current_node = i;
+ break;
+ }
+ }
+
+ if (i == top->num_nodes)
+ return -EINVAL;
+
+ for (i = 0; i < top->num_nodes; i++) {
+ for (j = 0; j < top->nodes[i].num_links; j++)
+ /* node id in sysfs starts from 1 rather than 0 so +1 here */
+ size += sysfs_emit_at(buf, size, "%02x:%02x -> %02x:%02x\n", current_node + 1,
+ top->nodes[i].port_num[j].src_xgmi_port_num, i + 1,
+ top->nodes[i].port_num[j].dst_xgmi_port_num);
+ }
+
+ return size;
+}
+
#define AMDGPU_XGMI_SET_FICAA(o) ((o) | 0x456801)
static ssize_t amdgpu_xgmi_show_error(struct device *dev,
struct device_attribute *attr,
@@ -235,6 +543,11 @@ static ssize_t amdgpu_xgmi_show_error(struct device *dev,
ficaa_pie_ctl_in = AMDGPU_XGMI_SET_FICAA(0x200);
ficaa_pie_status_in = AMDGPU_XGMI_SET_FICAA(0x208);
+ if ((!adev->df.funcs) ||
+ (!adev->df.funcs->get_fica) ||
+ (!adev->df.funcs->set_fica))
+ return -EINVAL;
+
fica_out = adev->df.funcs->get_fica(adev, ficaa_pie_ctl_in);
if (fica_out != 0x1f)
pr_err("xGMI error counters not enabled!\n");
@@ -246,12 +559,16 @@ static ssize_t amdgpu_xgmi_show_error(struct device *dev,
adev->df.funcs->set_fica(adev, ficaa_pie_status_in, 0, 0);
- return snprintf(buf, PAGE_SIZE, "%u\n", error_count);
+ return sysfs_emit(buf, "%u\n", error_count);
}
static DEVICE_ATTR(xgmi_device_id, S_IRUGO, amdgpu_xgmi_show_device_id, NULL);
+static DEVICE_ATTR(xgmi_physical_id, 0444, amdgpu_xgmi_show_physical_id, NULL);
static DEVICE_ATTR(xgmi_error, S_IRUGO, amdgpu_xgmi_show_error, NULL);
+static DEVICE_ATTR(xgmi_num_hops, S_IRUGO, amdgpu_xgmi_show_num_hops, NULL);
+static DEVICE_ATTR(xgmi_num_links, S_IRUGO, amdgpu_xgmi_show_num_links, NULL);
+static DEVICE_ATTR(xgmi_port_num, S_IRUGO, amdgpu_xgmi_show_connected_port_num, NULL);
static int amdgpu_xgmi_sysfs_add_dev_info(struct amdgpu_device *adev,
struct amdgpu_hive_info *hive)
@@ -266,11 +583,33 @@ static int amdgpu_xgmi_sysfs_add_dev_info(struct amdgpu_device *adev,
return ret;
}
+ ret = device_create_file(adev->dev, &dev_attr_xgmi_physical_id);
+ if (ret) {
+ dev_err(adev->dev, "XGMI: Failed to create device file xgmi_physical_id\n");
+ return ret;
+ }
+
/* Create xgmi error file */
ret = device_create_file(adev->dev, &dev_attr_xgmi_error);
if (ret)
pr_err("failed to create xgmi_error\n");
+ /* Create xgmi num hops file */
+ ret = device_create_file(adev->dev, &dev_attr_xgmi_num_hops);
+ if (ret)
+ pr_err("failed to create xgmi_num_hops\n");
+
+ /* Create xgmi num links file */
+ ret = device_create_file(adev->dev, &dev_attr_xgmi_num_links);
+ if (ret)
+ pr_err("failed to create xgmi_num_links\n");
+
+ /* Create xgmi port num file if supported */
+ if (adev->psp.xgmi_context.xgmi_ta_caps & EXTEND_PEER_LINK_INFO_CMD_FLAG) {
+ ret = device_create_file(adev->dev, &dev_attr_xgmi_port_num);
+ if (ret)
+ dev_err(adev->dev, "failed to create xgmi_port_num\n");
+ }
/* Create sysfs link to hive info folder on the first device */
if (hive->kobj.parent != (&adev->dev->kobj)) {
@@ -298,6 +637,12 @@ remove_link:
remove_file:
device_remove_file(adev->dev, &dev_attr_xgmi_device_id);
+ device_remove_file(adev->dev, &dev_attr_xgmi_physical_id);
+ device_remove_file(adev->dev, &dev_attr_xgmi_error);
+ device_remove_file(adev->dev, &dev_attr_xgmi_num_hops);
+ device_remove_file(adev->dev, &dev_attr_xgmi_num_links);
+ if (adev->psp.xgmi_context.xgmi_ta_caps & EXTEND_PEER_LINK_INFO_CMD_FLAG)
+ device_remove_file(adev->dev, &dev_attr_xgmi_port_num);
success:
return ret;
@@ -310,7 +655,12 @@ static void amdgpu_xgmi_sysfs_rem_dev_info(struct amdgpu_device *adev,
memset(node, 0, sizeof(node));
device_remove_file(adev->dev, &dev_attr_xgmi_device_id);
+ device_remove_file(adev->dev, &dev_attr_xgmi_physical_id);
device_remove_file(adev->dev, &dev_attr_xgmi_error);
+ device_remove_file(adev->dev, &dev_attr_xgmi_num_hops);
+ device_remove_file(adev->dev, &dev_attr_xgmi_num_links);
+ if (adev->psp.xgmi_context.xgmi_ta_caps & EXTEND_PEER_LINK_INFO_CMD_FLAG)
+ device_remove_file(adev->dev, &dev_attr_xgmi_port_num);
if (hive->kobj.parent != (&adev->dev->kobj))
sysfs_remove_link(&adev->dev->kobj,"xgmi_hive_info");
@@ -345,6 +695,7 @@ struct amdgpu_hive_info *amdgpu_get_xgmi_hive(struct amdgpu_device *adev)
hive = kzalloc(sizeof(*hive), GFP_KERNEL);
if (!hive) {
dev_err(adev->dev, "XGMI: allocation failed\n");
+ ret = -ENOMEM;
hive = NULL;
goto pro_end;
}
@@ -356,20 +707,49 @@ struct amdgpu_hive_info *amdgpu_get_xgmi_hive(struct amdgpu_device *adev)
"%s", "xgmi_hive_info");
if (ret) {
dev_err(adev->dev, "XGMI: failed initializing kobject for xgmi hive\n");
- kfree(hive);
+ kobject_put(&hive->kobj);
hive = NULL;
goto pro_end;
}
+ /**
+ * Only init hive->reset_domain for none SRIOV configuration. For SRIOV,
+ * Host driver decide how to reset the GPU either through FLR or chain reset.
+ * Guest side will get individual notifications from the host for the FLR
+ * if necessary.
+ */
+ if (!amdgpu_sriov_vf(adev)) {
+ /**
+ * Avoid recreating reset domain when hive is reconstructed for the case
+ * of reset the devices in the XGMI hive during probe for passthrough GPU
+ * See https://www.spinics.net/lists/amd-gfx/msg58836.html
+ */
+ if (adev->reset_domain->type != XGMI_HIVE) {
+ hive->reset_domain =
+ amdgpu_reset_create_reset_domain(XGMI_HIVE, "amdgpu-reset-hive");
+ if (!hive->reset_domain) {
+ dev_err(adev->dev, "XGMI: failed initializing reset domain for xgmi hive\n");
+ ret = -ENOMEM;
+ kobject_put(&hive->kobj);
+ hive = NULL;
+ goto pro_end;
+ }
+ } else {
+ amdgpu_reset_get_reset_domain(adev->reset_domain);
+ hive->reset_domain = adev->reset_domain;
+ }
+ }
+
hive->hive_id = adev->gmc.xgmi.hive_id;
INIT_LIST_HEAD(&hive->device_list);
INIT_LIST_HEAD(&hive->node);
mutex_init(&hive->hive_lock);
- atomic_set(&hive->in_reset, 0);
atomic_set(&hive->number_devices, 0);
task_barrier_init(&hive->tb);
hive->pstate = AMDGPU_XGMI_PSTATE_UNKNOWN;
hive->hi_req_gpu = NULL;
+ atomic_set(&hive->requested_nps_mode, UNKNOWN_MEMORY_PARTITION_MODE);
+
/*
* hive pstate on boot is high in vega20 so we have to go to low
* pstate on after boot.
@@ -454,6 +834,9 @@ int amdgpu_xgmi_update_topology(struct amdgpu_hive_info *hive, struct amdgpu_dev
{
int ret;
+ if (amdgpu_sriov_vf(adev))
+ return 0;
+
/* Each psp need to set the latest topology */
ret = psp_xgmi_set_topology_info(&adev->psp,
atomic_read(&hive->number_devices),
@@ -468,16 +851,121 @@ int amdgpu_xgmi_update_topology(struct amdgpu_hive_info *hive, struct amdgpu_dev
}
+/*
+ * NOTE psp_xgmi_node_info.num_hops layout is as follows:
+ * num_hops[7:6] = link type (0 = xGMI2, 1 = xGMI3, 2/3 = reserved)
+ * num_hops[5:3] = reserved
+ * num_hops[2:0] = number of hops
+ */
int amdgpu_xgmi_get_hops_count(struct amdgpu_device *adev,
- struct amdgpu_device *peer_adev)
+ struct amdgpu_device *peer_adev)
{
struct psp_xgmi_topology_info *top = &adev->psp.xgmi_context.top_info;
+ uint8_t num_hops_mask = 0x7;
int i;
+ if (!adev->gmc.xgmi.supported)
+ return 0;
+
for (i = 0 ; i < top->num_nodes; ++i)
if (top->nodes[i].node_id == peer_adev->gmc.xgmi.node_id)
- return top->nodes[i].num_hops;
- return -EINVAL;
+ return top->nodes[i].num_hops & num_hops_mask;
+
+ dev_err(adev->dev, "Failed to get xgmi hops count for peer %d.\n",
+ peer_adev->gmc.xgmi.physical_node_id);
+
+ return 0;
+}
+
+int amdgpu_xgmi_get_bandwidth(struct amdgpu_device *adev, struct amdgpu_device *peer_adev,
+ enum amdgpu_xgmi_bw_mode bw_mode, enum amdgpu_xgmi_bw_unit bw_unit,
+ uint32_t *min_bw, uint32_t *max_bw)
+{
+ bool peer_mode = bw_mode == AMDGPU_XGMI_BW_MODE_PER_PEER;
+ int unit_scale = bw_unit == AMDGPU_XGMI_BW_UNIT_MBYTES ? 1000 : 1;
+ int num_lanes = adev->gmc.xgmi.max_width;
+ int speed = adev->gmc.xgmi.max_speed;
+ int num_links = !peer_mode ? 1 : -1;
+
+ if (!(min_bw && max_bw))
+ return -EINVAL;
+
+ *min_bw = 0;
+ *max_bw = 0;
+
+ if (!adev->gmc.xgmi.supported)
+ return -ENODATA;
+
+ if (peer_mode && !peer_adev)
+ return -EINVAL;
+
+ if (peer_mode) {
+ struct psp_xgmi_topology_info *top = &adev->psp.xgmi_context.top_info;
+ int i;
+
+ for (i = 0 ; i < top->num_nodes; ++i) {
+ if (top->nodes[i].node_id != peer_adev->gmc.xgmi.node_id)
+ continue;
+
+ num_links = top->nodes[i].num_links;
+ break;
+ }
+ }
+
+ if (num_links == -1) {
+ dev_err(adev->dev, "Failed to get number of xgmi links for peer %d.\n",
+ peer_adev->gmc.xgmi.physical_node_id);
+ } else if (num_links) {
+ int per_link_bw = (speed * num_lanes * unit_scale)/BITS_PER_BYTE;
+
+ *min_bw = per_link_bw;
+ *max_bw = num_links * per_link_bw;
+ }
+
+ return 0;
+}
+
+bool amdgpu_xgmi_get_is_sharing_enabled(struct amdgpu_device *adev,
+ struct amdgpu_device *peer_adev)
+{
+ struct psp_xgmi_topology_info *top = &adev->psp.xgmi_context.top_info;
+ int i;
+
+ /* Sharing should always be enabled for non-SRIOV. */
+ if (!amdgpu_sriov_vf(adev))
+ return true;
+
+ for (i = 0 ; i < top->num_nodes; ++i)
+ if (top->nodes[i].node_id == peer_adev->gmc.xgmi.node_id)
+ return !!top->nodes[i].is_sharing_enabled;
+
+ return false;
+}
+
+/*
+ * Devices that support extended data require the entire hive to initialize with
+ * the shared memory buffer flag set.
+ *
+ * Hive locks and conditions apply - see amdgpu_xgmi_add_device
+ */
+static int amdgpu_xgmi_initialize_hive_get_data_partition(struct amdgpu_hive_info *hive,
+ bool set_extended_data)
+{
+ struct amdgpu_device *tmp_adev;
+ int ret;
+
+ list_for_each_entry(tmp_adev, &hive->device_list, gmc.xgmi.head) {
+ ret = psp_xgmi_initialize(&tmp_adev->psp, set_extended_data, false);
+ if (ret) {
+ dev_err(tmp_adev->dev,
+ "XGMI: Failed to initialize xgmi session for data partition %i\n",
+ set_extended_data);
+ return ret;
+ }
+
+ }
+
+ return 0;
}
int amdgpu_xgmi_add_device(struct amdgpu_device *adev)
@@ -493,7 +981,7 @@ int amdgpu_xgmi_add_device(struct amdgpu_device *adev)
return 0;
if (amdgpu_device_ip_get_ip_block(adev, AMD_IP_BLOCK_TYPE_PSP)) {
- ret = psp_xgmi_initialize(&adev->psp);
+ ret = psp_xgmi_initialize(&adev->psp, false, true);
if (ret) {
dev_err(adev->dev,
"XGMI: Failed to initialize xgmi session\n");
@@ -552,18 +1040,61 @@ int amdgpu_xgmi_add_device(struct amdgpu_device *adev)
goto exit_unlock;
}
- /* get latest topology info for each device from psp */
- list_for_each_entry(tmp_adev, &hive->device_list, gmc.xgmi.head) {
- ret = psp_xgmi_get_topology_info(&tmp_adev->psp, count,
- &tmp_adev->psp.xgmi_context.top_info);
+ if (amdgpu_sriov_vf(adev) &&
+ adev->psp.xgmi_context.xgmi_ta_caps & EXTEND_PEER_LINK_INFO_CMD_FLAG) {
+ /* only get topology for VF being init if it can support full duplex */
+ ret = psp_xgmi_get_topology_info(&adev->psp, count,
+ &adev->psp.xgmi_context.top_info, false);
if (ret) {
- dev_err(tmp_adev->dev,
+ dev_err(adev->dev,
"XGMI: Get topology failure on device %llx, hive %llx, ret %d",
- tmp_adev->gmc.xgmi.node_id,
- tmp_adev->gmc.xgmi.hive_id, ret);
- /* To do : continue with some node failed or disable the whole hive */
+ adev->gmc.xgmi.node_id,
+ adev->gmc.xgmi.hive_id, ret);
+ /* To do: continue with some node failed or disable the whole hive*/
goto exit_unlock;
}
+ } else {
+ /* get latest topology info for each device from psp */
+ list_for_each_entry(tmp_adev, &hive->device_list, gmc.xgmi.head) {
+ ret = psp_xgmi_get_topology_info(&tmp_adev->psp, count,
+ &tmp_adev->psp.xgmi_context.top_info, false);
+ if (ret) {
+ dev_err(tmp_adev->dev,
+ "XGMI: Get topology failure on device %llx, hive %llx, ret %d",
+ tmp_adev->gmc.xgmi.node_id,
+ tmp_adev->gmc.xgmi.hive_id, ret);
+ /* To do : continue with some node failed or disable the whole hive */
+ goto exit_unlock;
+ }
+ }
+ }
+
+ /* get topology again for hives that support extended data */
+ if (adev->psp.xgmi_context.supports_extended_data) {
+
+ /* initialize the hive to get extended data. */
+ ret = amdgpu_xgmi_initialize_hive_get_data_partition(hive, true);
+ if (ret)
+ goto exit_unlock;
+
+ /* get the extended data. */
+ list_for_each_entry(tmp_adev, &hive->device_list, gmc.xgmi.head) {
+ ret = psp_xgmi_get_topology_info(&tmp_adev->psp, count,
+ &tmp_adev->psp.xgmi_context.top_info, true);
+ if (ret) {
+ dev_err(tmp_adev->dev,
+ "XGMI: Get topology for extended data failure on device %llx, hive %llx, ret %d",
+ tmp_adev->gmc.xgmi.node_id,
+ tmp_adev->gmc.xgmi.hive_id, ret);
+ goto exit_unlock;
+ }
+ }
+
+ /* initialize the hive to get non-extended data for the next round. */
+ ret = amdgpu_xgmi_initialize_hive_get_data_partition(hive, false);
+ if (ret)
+ goto exit_unlock;
+
}
}
@@ -617,57 +1148,94 @@ int amdgpu_xgmi_remove_device(struct amdgpu_device *adev)
amdgpu_put_xgmi_hive(hive);
}
- return psp_xgmi_terminate(&adev->psp);
+ return 0;
+}
+
+static int xgmi_v6_4_0_aca_bank_parser(struct aca_handle *handle, struct aca_bank *bank,
+ enum aca_smu_type type, void *data)
+{
+ struct amdgpu_device *adev = handle->adev;
+ struct aca_bank_info info;
+ const char *error_str;
+ u64 status, count;
+ int ret, ext_error_code;
+
+ ret = aca_bank_info_decode(bank, &info);
+ if (ret)
+ return ret;
+
+ status = bank->regs[ACA_REG_IDX_STATUS];
+ ext_error_code = ACA_REG__STATUS__ERRORCODEEXT(status);
+
+ error_str = ext_error_code < ARRAY_SIZE(xgmi_v6_4_0_ras_error_code_ext) ?
+ xgmi_v6_4_0_ras_error_code_ext[ext_error_code] : NULL;
+ if (error_str)
+ dev_info(adev->dev, "%s detected\n", error_str);
+
+ count = ACA_REG__MISC0__ERRCNT(bank->regs[ACA_REG_IDX_MISC0]);
+
+ switch (type) {
+ case ACA_SMU_TYPE_UE:
+ if (ext_error_code != 0 && ext_error_code != 9)
+ count = 0ULL;
+
+ bank->aca_err_type = ACA_ERROR_TYPE_UE;
+ ret = aca_error_cache_log_bank_error(handle, &info, ACA_ERROR_TYPE_UE, count);
+ break;
+ case ACA_SMU_TYPE_CE:
+ count = ext_error_code == 6 ? count : 0ULL;
+ bank->aca_err_type = ACA_ERROR_TYPE_CE;
+ ret = aca_error_cache_log_bank_error(handle, &info, bank->aca_err_type, count);
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ return ret;
}
-int amdgpu_xgmi_ras_late_init(struct amdgpu_device *adev)
+static const struct aca_bank_ops xgmi_v6_4_0_aca_bank_ops = {
+ .aca_bank_parser = xgmi_v6_4_0_aca_bank_parser,
+};
+
+static const struct aca_info xgmi_v6_4_0_aca_info = {
+ .hwip = ACA_HWIP_TYPE_PCS_XGMI,
+ .mask = ACA_ERROR_UE_MASK | ACA_ERROR_CE_MASK,
+ .bank_ops = &xgmi_v6_4_0_aca_bank_ops,
+};
+
+static int amdgpu_xgmi_ras_late_init(struct amdgpu_device *adev, struct ras_common_if *ras_block)
{
int r;
- struct ras_ih_if ih_info = {
- .cb = NULL,
- };
- struct ras_fs_if fs_info = {
- .sysfs_name = "xgmi_wafl_err_count",
- };
if (!adev->gmc.xgmi.supported ||
adev->gmc.xgmi.num_physical_nodes == 0)
return 0;
- amdgpu_xgmi_reset_ras_error_count(adev);
+ amdgpu_ras_reset_error_count(adev, AMDGPU_RAS_BLOCK__XGMI_WAFL);
- if (!adev->gmc.xgmi.ras_if) {
- adev->gmc.xgmi.ras_if = kmalloc(sizeof(struct ras_common_if), GFP_KERNEL);
- if (!adev->gmc.xgmi.ras_if)
- return -ENOMEM;
- adev->gmc.xgmi.ras_if->block = AMDGPU_RAS_BLOCK__XGMI_WAFL;
- adev->gmc.xgmi.ras_if->type = AMDGPU_RAS_ERROR__MULTI_UNCORRECTABLE;
- adev->gmc.xgmi.ras_if->sub_block_index = 0;
- strcpy(adev->gmc.xgmi.ras_if->name, "xgmi_wafl");
- }
- ih_info.head = fs_info.head = *adev->gmc.xgmi.ras_if;
- r = amdgpu_ras_late_init(adev, adev->gmc.xgmi.ras_if,
- &fs_info, &ih_info);
- if (r || !amdgpu_ras_is_supported(adev, adev->gmc.xgmi.ras_if->block)) {
- kfree(adev->gmc.xgmi.ras_if);
- adev->gmc.xgmi.ras_if = NULL;
+ r = amdgpu_ras_block_late_init(adev, ras_block);
+ if (r)
+ return r;
+
+ switch (amdgpu_ip_version(adev, XGMI_HWIP, 0)) {
+ case IP_VERSION(6, 4, 0):
+ case IP_VERSION(6, 4, 1):
+ r = amdgpu_ras_bind_aca(adev, AMDGPU_RAS_BLOCK__XGMI_WAFL,
+ &xgmi_v6_4_0_aca_info, NULL);
+ if (r)
+ goto late_fini;
+ break;
+ default:
+ break;
}
- return r;
-}
+ return 0;
-void amdgpu_xgmi_ras_fini(struct amdgpu_device *adev)
-{
- if (amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__XGMI_WAFL) &&
- adev->gmc.xgmi.ras_if) {
- struct ras_common_if *ras_if = adev->gmc.xgmi.ras_if;
- struct ras_ih_if ih_info = {
- .cb = NULL,
- };
+late_fini:
+ amdgpu_ras_block_late_fini(adev, ras_block);
- amdgpu_ras_late_fini(adev, ras_if, &ih_info);
- kfree(ras_if);
- }
+ return r;
}
uint64_t amdgpu_xgmi_get_relative_phy_addr(struct amdgpu_device *adev,
@@ -683,7 +1251,7 @@ static void pcs_clear_status(struct amdgpu_device *adev, uint32_t pcs_status_reg
WREG32_PCIE(pcs_status_reg, 0);
}
-void amdgpu_xgmi_reset_ras_error_count(struct amdgpu_device *adev)
+static void amdgpu_xgmi_legacy_reset_ras_error_count(struct amdgpu_device *adev)
{
uint32_t i;
@@ -698,61 +1266,127 @@ void amdgpu_xgmi_reset_ras_error_count(struct amdgpu_device *adev)
pcs_clear_status(adev,
xgmi_pcs_err_status_reg_vg20[i]);
break;
+ case CHIP_ALDEBARAN:
+ for (i = 0; i < ARRAY_SIZE(xgmi3x16_pcs_err_status_reg_aldebaran); i++)
+ pcs_clear_status(adev,
+ xgmi3x16_pcs_err_status_reg_aldebaran[i]);
+ for (i = 0; i < ARRAY_SIZE(walf_pcs_err_status_reg_aldebaran); i++)
+ pcs_clear_status(adev,
+ walf_pcs_err_status_reg_aldebaran[i]);
+ break;
default:
break;
}
+
+ switch (amdgpu_ip_version(adev, XGMI_HWIP, 0)) {
+ case IP_VERSION(6, 4, 0):
+ case IP_VERSION(6, 4, 1):
+ for (i = 0; i < ARRAY_SIZE(xgmi3x16_pcs_err_status_reg_v6_4); i++)
+ pcs_clear_status(adev,
+ xgmi3x16_pcs_err_status_reg_v6_4[i]);
+ break;
+ default:
+ break;
+ }
+}
+
+static void __xgmi_v6_4_0_reset_error_count(struct amdgpu_device *adev, int xgmi_inst, u64 mca_base)
+{
+ WREG64_MCA(xgmi_inst, mca_base, ACA_REG_IDX_STATUS, 0ULL);
+}
+
+static void xgmi_v6_4_0_reset_error_count(struct amdgpu_device *adev, int xgmi_inst)
+{
+ int i;
+
+ for (i = 0; i < ARRAY_SIZE(xgmi_v6_4_0_mca_base_array); i++)
+ __xgmi_v6_4_0_reset_error_count(adev, xgmi_inst, xgmi_v6_4_0_mca_base_array[i]);
+}
+
+static void xgmi_v6_4_0_reset_ras_error_count(struct amdgpu_device *adev)
+{
+ int i;
+
+ for_each_inst(i, adev->aid_mask)
+ xgmi_v6_4_0_reset_error_count(adev, i);
+}
+
+static void amdgpu_xgmi_reset_ras_error_count(struct amdgpu_device *adev)
+{
+ switch (amdgpu_ip_version(adev, XGMI_HWIP, 0)) {
+ case IP_VERSION(6, 4, 0):
+ case IP_VERSION(6, 4, 1):
+ xgmi_v6_4_0_reset_ras_error_count(adev);
+ break;
+ default:
+ amdgpu_xgmi_legacy_reset_ras_error_count(adev);
+ break;
+ }
}
static int amdgpu_xgmi_query_pcs_error_status(struct amdgpu_device *adev,
uint32_t value,
+ uint32_t mask_value,
uint32_t *ue_count,
uint32_t *ce_count,
- bool is_xgmi_pcs)
+ bool is_xgmi_pcs,
+ bool check_mask)
{
int i;
- int ue_cnt;
+ int ue_cnt = 0;
+ const struct amdgpu_pcs_ras_field *pcs_ras_fields = NULL;
+ uint32_t field_array_size = 0;
if (is_xgmi_pcs) {
- /* query xgmi pcs error status,
- * only ue is supported */
- for (i = 0; i < ARRAY_SIZE(xgmi_pcs_ras_fields); i ++) {
- ue_cnt = (value &
- xgmi_pcs_ras_fields[i].pcs_err_mask) >>
- xgmi_pcs_ras_fields[i].pcs_err_shift;
- if (ue_cnt) {
- dev_info(adev->dev, "%s detected\n",
- xgmi_pcs_ras_fields[i].err_name);
- *ue_count += ue_cnt;
- }
+ if (amdgpu_ip_version(adev, XGMI_HWIP, 0) ==
+ IP_VERSION(6, 1, 0) ||
+ amdgpu_ip_version(adev, XGMI_HWIP, 0) ==
+ IP_VERSION(6, 4, 0) ||
+ amdgpu_ip_version(adev, XGMI_HWIP, 0) ==
+ IP_VERSION(6, 4, 1)) {
+ pcs_ras_fields = &xgmi3x16_pcs_ras_fields[0];
+ field_array_size = ARRAY_SIZE(xgmi3x16_pcs_ras_fields);
+ } else {
+ pcs_ras_fields = &xgmi_pcs_ras_fields[0];
+ field_array_size = ARRAY_SIZE(xgmi_pcs_ras_fields);
}
} else {
- /* query wafl pcs error status,
- * only ue is supported */
- for (i = 0; i < ARRAY_SIZE(wafl_pcs_ras_fields); i++) {
- ue_cnt = (value &
- wafl_pcs_ras_fields[i].pcs_err_mask) >>
- wafl_pcs_ras_fields[i].pcs_err_shift;
- if (ue_cnt) {
- dev_info(adev->dev, "%s detected\n",
- wafl_pcs_ras_fields[i].err_name);
- *ue_count += ue_cnt;
- }
+ pcs_ras_fields = &wafl_pcs_ras_fields[0];
+ field_array_size = ARRAY_SIZE(wafl_pcs_ras_fields);
+ }
+
+ if (check_mask)
+ value = value & ~mask_value;
+
+ /* query xgmi/walf pcs error status,
+ * only ue is supported */
+ for (i = 0; value && i < field_array_size; i++) {
+ ue_cnt = (value &
+ pcs_ras_fields[i].pcs_err_mask) >>
+ pcs_ras_fields[i].pcs_err_shift;
+ if (ue_cnt) {
+ dev_info(adev->dev, "%s detected\n",
+ pcs_ras_fields[i].err_name);
+ *ue_count += ue_cnt;
}
+
+ /* reset bit value if the bit is checked */
+ value &= ~(pcs_ras_fields[i].pcs_err_mask);
}
return 0;
}
-int amdgpu_xgmi_query_ras_error_count(struct amdgpu_device *adev,
- void *ras_error_status)
+static void amdgpu_xgmi_legacy_query_ras_error_count(struct amdgpu_device *adev,
+ void *ras_error_status)
{
struct ras_err_data *err_data = (struct ras_err_data *)ras_error_status;
- int i;
- uint32_t data;
+ int i, supported = 1;
+ uint32_t data, mask_data = 0;
uint32_t ue_cnt = 0, ce_cnt = 0;
if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__XGMI_WAFL))
- return -EINVAL;
+ return ;
err_data->ue_count = 0;
err_data->ce_count = 0;
@@ -763,40 +1397,382 @@ int amdgpu_xgmi_query_ras_error_count(struct amdgpu_device *adev,
for (i = 0; i < ARRAY_SIZE(xgmi_pcs_err_status_reg_arct); i++) {
data = RREG32_PCIE(xgmi_pcs_err_status_reg_arct[i]);
if (data)
- amdgpu_xgmi_query_pcs_error_status(adev,
- data, &ue_cnt, &ce_cnt, true);
+ amdgpu_xgmi_query_pcs_error_status(adev, data,
+ mask_data, &ue_cnt, &ce_cnt, true, false);
}
/* check wafl pcs error */
for (i = 0; i < ARRAY_SIZE(wafl_pcs_err_status_reg_arct); i++) {
data = RREG32_PCIE(wafl_pcs_err_status_reg_arct[i]);
if (data)
- amdgpu_xgmi_query_pcs_error_status(adev,
- data, &ue_cnt, &ce_cnt, false);
+ amdgpu_xgmi_query_pcs_error_status(adev, data,
+ mask_data, &ue_cnt, &ce_cnt, false, false);
}
break;
case CHIP_VEGA20:
- default:
/* check xgmi pcs error */
for (i = 0; i < ARRAY_SIZE(xgmi_pcs_err_status_reg_vg20); i++) {
data = RREG32_PCIE(xgmi_pcs_err_status_reg_vg20[i]);
if (data)
- amdgpu_xgmi_query_pcs_error_status(adev,
- data, &ue_cnt, &ce_cnt, true);
+ amdgpu_xgmi_query_pcs_error_status(adev, data,
+ mask_data, &ue_cnt, &ce_cnt, true, false);
}
/* check wafl pcs error */
for (i = 0; i < ARRAY_SIZE(wafl_pcs_err_status_reg_vg20); i++) {
data = RREG32_PCIE(wafl_pcs_err_status_reg_vg20[i]);
if (data)
- amdgpu_xgmi_query_pcs_error_status(adev,
- data, &ue_cnt, &ce_cnt, false);
+ amdgpu_xgmi_query_pcs_error_status(adev, data,
+ mask_data, &ue_cnt, &ce_cnt, false, false);
}
break;
+ case CHIP_ALDEBARAN:
+ /* check xgmi3x16 pcs error */
+ for (i = 0; i < ARRAY_SIZE(xgmi3x16_pcs_err_status_reg_aldebaran); i++) {
+ data = RREG32_PCIE(xgmi3x16_pcs_err_status_reg_aldebaran[i]);
+ mask_data =
+ RREG32_PCIE(xgmi3x16_pcs_err_noncorrectable_mask_reg_aldebaran[i]);
+ if (data)
+ amdgpu_xgmi_query_pcs_error_status(adev, data,
+ mask_data, &ue_cnt, &ce_cnt, true, true);
+ }
+ /* check wafl pcs error */
+ for (i = 0; i < ARRAY_SIZE(walf_pcs_err_status_reg_aldebaran); i++) {
+ data = RREG32_PCIE(walf_pcs_err_status_reg_aldebaran[i]);
+ mask_data =
+ RREG32_PCIE(walf_pcs_err_noncorrectable_mask_reg_aldebaran[i]);
+ if (data)
+ amdgpu_xgmi_query_pcs_error_status(adev, data,
+ mask_data, &ue_cnt, &ce_cnt, false, true);
+ }
+ break;
+ default:
+ supported = 0;
+ break;
}
- amdgpu_xgmi_reset_ras_error_count(adev);
+ switch (amdgpu_ip_version(adev, XGMI_HWIP, 0)) {
+ case IP_VERSION(6, 4, 0):
+ case IP_VERSION(6, 4, 1):
+ /* check xgmi3x16 pcs error */
+ for (i = 0; i < ARRAY_SIZE(xgmi3x16_pcs_err_status_reg_v6_4); i++) {
+ data = RREG32_PCIE(xgmi3x16_pcs_err_status_reg_v6_4[i]);
+ mask_data =
+ RREG32_PCIE(xgmi3x16_pcs_err_noncorrectable_mask_reg_v6_4[i]);
+ if (data)
+ amdgpu_xgmi_query_pcs_error_status(adev, data,
+ mask_data, &ue_cnt, &ce_cnt, true, true);
+ }
+ break;
+ default:
+ if (!supported)
+ dev_warn(adev->dev, "XGMI RAS error query not supported");
+ break;
+ }
+
+ amdgpu_ras_reset_error_count(adev, AMDGPU_RAS_BLOCK__XGMI_WAFL);
err_data->ue_count += ue_cnt;
err_data->ce_count += ce_cnt;
+}
+
+static enum aca_error_type xgmi_v6_4_0_pcs_mca_get_error_type(struct amdgpu_device *adev, u64 status)
+{
+ const char *error_str;
+ int ext_error_code;
+
+ ext_error_code = ACA_REG__STATUS__ERRORCODEEXT(status);
+
+ error_str = ext_error_code < ARRAY_SIZE(xgmi_v6_4_0_ras_error_code_ext) ?
+ xgmi_v6_4_0_ras_error_code_ext[ext_error_code] : NULL;
+ if (error_str)
+ dev_info(adev->dev, "%s detected\n", error_str);
+
+ switch (ext_error_code) {
+ case 0:
+ return ACA_ERROR_TYPE_UE;
+ case 6:
+ return ACA_ERROR_TYPE_CE;
+ default:
+ return -EINVAL;
+ }
+
+ return -EINVAL;
+}
+
+static void __xgmi_v6_4_0_query_error_count(struct amdgpu_device *adev, struct amdgpu_smuio_mcm_config_info *mcm_info,
+ u64 mca_base, struct ras_err_data *err_data)
+{
+ int xgmi_inst = mcm_info->die_id;
+ u64 status = 0;
+
+ status = RREG64_MCA(xgmi_inst, mca_base, ACA_REG_IDX_STATUS);
+ if (!ACA_REG__STATUS__VAL(status))
+ return;
+
+ switch (xgmi_v6_4_0_pcs_mca_get_error_type(adev, status)) {
+ case ACA_ERROR_TYPE_UE:
+ amdgpu_ras_error_statistic_ue_count(err_data, mcm_info, 1ULL);
+ break;
+ case ACA_ERROR_TYPE_CE:
+ amdgpu_ras_error_statistic_ce_count(err_data, mcm_info, 1ULL);
+ break;
+ default:
+ break;
+ }
+
+ WREG64_MCA(xgmi_inst, mca_base, ACA_REG_IDX_STATUS, 0ULL);
+}
+
+static void xgmi_v6_4_0_query_error_count(struct amdgpu_device *adev, int xgmi_inst, struct ras_err_data *err_data)
+{
+ struct amdgpu_smuio_mcm_config_info mcm_info = {
+ .socket_id = adev->smuio.funcs->get_socket_id(adev),
+ .die_id = xgmi_inst,
+ };
+ int i;
+
+ for (i = 0; i < ARRAY_SIZE(xgmi_v6_4_0_mca_base_array); i++)
+ __xgmi_v6_4_0_query_error_count(adev, &mcm_info, xgmi_v6_4_0_mca_base_array[i], err_data);
+}
+
+static void xgmi_v6_4_0_query_ras_error_count(struct amdgpu_device *adev, void *ras_error_status)
+{
+ struct ras_err_data *err_data = (struct ras_err_data *)ras_error_status;
+ int i;
+
+ for_each_inst(i, adev->aid_mask)
+ xgmi_v6_4_0_query_error_count(adev, i, err_data);
+}
+
+static void amdgpu_xgmi_query_ras_error_count(struct amdgpu_device *adev,
+ void *ras_error_status)
+{
+ switch (amdgpu_ip_version(adev, XGMI_HWIP, 0)) {
+ case IP_VERSION(6, 4, 0):
+ case IP_VERSION(6, 4, 1):
+ xgmi_v6_4_0_query_ras_error_count(adev, ras_error_status);
+ break;
+ default:
+ amdgpu_xgmi_legacy_query_ras_error_count(adev, ras_error_status);
+ break;
+ }
+}
+
+/* Trigger XGMI/WAFL error */
+static int amdgpu_ras_error_inject_xgmi(struct amdgpu_device *adev,
+ void *inject_if, uint32_t instance_mask)
+{
+ int ret1, ret2;
+ struct ta_ras_trigger_error_input *block_info =
+ (struct ta_ras_trigger_error_input *)inject_if;
+
+ if (amdgpu_dpm_set_df_cstate(adev, DF_CSTATE_DISALLOW))
+ dev_warn(adev->dev, "Failed to disallow df cstate");
+
+ ret1 = amdgpu_dpm_set_pm_policy(adev, PP_PM_POLICY_XGMI_PLPD, XGMI_PLPD_DISALLOW);
+ if (ret1 && ret1 != -EOPNOTSUPP)
+ dev_warn(adev->dev, "Failed to disallow XGMI power down");
+
+ ret2 = psp_ras_trigger_error(&adev->psp, block_info, instance_mask);
+
+ if (amdgpu_ras_intr_triggered())
+ return ret2;
+
+ ret1 = amdgpu_dpm_set_pm_policy(adev, PP_PM_POLICY_XGMI_PLPD, XGMI_PLPD_DEFAULT);
+ if (ret1 && ret1 != -EOPNOTSUPP)
+ dev_warn(adev->dev, "Failed to allow XGMI power down");
+
+ if (amdgpu_dpm_set_df_cstate(adev, DF_CSTATE_ALLOW))
+ dev_warn(adev->dev, "Failed to allow df cstate");
+
+ return ret2;
+}
+
+struct amdgpu_ras_block_hw_ops xgmi_ras_hw_ops = {
+ .query_ras_error_count = amdgpu_xgmi_query_ras_error_count,
+ .reset_ras_error_count = amdgpu_xgmi_reset_ras_error_count,
+ .ras_error_inject = amdgpu_ras_error_inject_xgmi,
+};
+
+struct amdgpu_xgmi_ras xgmi_ras = {
+ .ras_block = {
+ .hw_ops = &xgmi_ras_hw_ops,
+ .ras_late_init = amdgpu_xgmi_ras_late_init,
+ },
+};
+
+int amdgpu_xgmi_ras_sw_init(struct amdgpu_device *adev)
+{
+ int err;
+ struct amdgpu_xgmi_ras *ras;
+
+ if (!adev->gmc.xgmi.ras)
+ return 0;
+
+ ras = adev->gmc.xgmi.ras;
+ err = amdgpu_ras_register_ras_block(adev, &ras->ras_block);
+ if (err) {
+ dev_err(adev->dev, "Failed to register xgmi_wafl_pcs ras block!\n");
+ return err;
+ }
+
+ strcpy(ras->ras_block.ras_comm.name, "xgmi_wafl");
+ ras->ras_block.ras_comm.block = AMDGPU_RAS_BLOCK__XGMI_WAFL;
+ ras->ras_block.ras_comm.type = AMDGPU_RAS_ERROR__MULTI_UNCORRECTABLE;
+ adev->gmc.xgmi.ras_if = &ras->ras_block.ras_comm;
return 0;
}
+
+static void amdgpu_xgmi_reset_on_init_work(struct work_struct *work)
+{
+ struct amdgpu_hive_info *hive =
+ container_of(work, struct amdgpu_hive_info, reset_on_init_work);
+ struct amdgpu_reset_context reset_context;
+ struct amdgpu_device *tmp_adev;
+ struct list_head device_list;
+ int r;
+
+ mutex_lock(&hive->hive_lock);
+
+ INIT_LIST_HEAD(&device_list);
+ list_for_each_entry(tmp_adev, &hive->device_list, gmc.xgmi.head)
+ list_add_tail(&tmp_adev->reset_list, &device_list);
+
+ tmp_adev = list_first_entry(&device_list, struct amdgpu_device,
+ reset_list);
+ amdgpu_device_lock_reset_domain(tmp_adev->reset_domain);
+
+ reset_context.method = AMD_RESET_METHOD_ON_INIT;
+ reset_context.reset_req_dev = tmp_adev;
+ reset_context.hive = hive;
+ reset_context.reset_device_list = &device_list;
+ set_bit(AMDGPU_NEED_FULL_RESET, &reset_context.flags);
+ set_bit(AMDGPU_SKIP_COREDUMP, &reset_context.flags);
+
+ amdgpu_reset_do_xgmi_reset_on_init(&reset_context);
+ mutex_unlock(&hive->hive_lock);
+ amdgpu_device_unlock_reset_domain(tmp_adev->reset_domain);
+
+ list_for_each_entry(tmp_adev, &hive->device_list, gmc.xgmi.head) {
+ r = amdgpu_ras_init_badpage_info(tmp_adev);
+ if (r && r != -EHWPOISON)
+ dev_err(tmp_adev->dev,
+ "error during bad page data initialization");
+ }
+}
+
+static void amdgpu_xgmi_schedule_reset_on_init(struct amdgpu_hive_info *hive)
+{
+ INIT_WORK(&hive->reset_on_init_work, amdgpu_xgmi_reset_on_init_work);
+ amdgpu_reset_domain_schedule(hive->reset_domain,
+ &hive->reset_on_init_work);
+}
+
+int amdgpu_xgmi_reset_on_init(struct amdgpu_device *adev)
+{
+ struct amdgpu_hive_info *hive;
+ bool reset_scheduled;
+ int num_devs;
+
+ hive = amdgpu_get_xgmi_hive(adev);
+ if (!hive)
+ return -EINVAL;
+
+ mutex_lock(&hive->hive_lock);
+ num_devs = atomic_read(&hive->number_devices);
+ reset_scheduled = false;
+ if (num_devs == adev->gmc.xgmi.num_physical_nodes) {
+ amdgpu_xgmi_schedule_reset_on_init(hive);
+ reset_scheduled = true;
+ }
+
+ mutex_unlock(&hive->hive_lock);
+ amdgpu_put_xgmi_hive(hive);
+
+ if (reset_scheduled)
+ flush_work(&hive->reset_on_init_work);
+
+ return 0;
+}
+
+int amdgpu_xgmi_request_nps_change(struct amdgpu_device *adev,
+ struct amdgpu_hive_info *hive,
+ int req_nps_mode)
+{
+ struct amdgpu_device *tmp_adev;
+ int cur_nps_mode, r;
+
+ /* This is expected to be called only during unload of driver. The
+ * request needs to be placed only once for all devices in the hive. If
+ * one of them fail, revert the request for previous successful devices.
+ * After placing the request, make hive mode as UNKNOWN so that other
+ * devices don't request anymore.
+ */
+ mutex_lock(&hive->hive_lock);
+ if (atomic_read(&hive->requested_nps_mode) ==
+ UNKNOWN_MEMORY_PARTITION_MODE) {
+ dev_dbg(adev->dev, "Unexpected entry for hive NPS change");
+ mutex_unlock(&hive->hive_lock);
+ return 0;
+ }
+ list_for_each_entry(tmp_adev, &hive->device_list, gmc.xgmi.head) {
+ r = adev->gmc.gmc_funcs->request_mem_partition_mode(
+ tmp_adev, req_nps_mode);
+ if (r)
+ break;
+ }
+ if (r) {
+ /* Request back current mode if one of the requests failed */
+ cur_nps_mode =
+ adev->gmc.gmc_funcs->query_mem_partition_mode(tmp_adev);
+ list_for_each_entry_continue_reverse(
+ tmp_adev, &hive->device_list, gmc.xgmi.head)
+ adev->gmc.gmc_funcs->request_mem_partition_mode(
+ tmp_adev, cur_nps_mode);
+ }
+ /* Set to UNKNOWN so that other devices don't request anymore */
+ atomic_set(&hive->requested_nps_mode, UNKNOWN_MEMORY_PARTITION_MODE);
+ mutex_unlock(&hive->hive_lock);
+
+ return r;
+}
+
+bool amdgpu_xgmi_same_hive(struct amdgpu_device *adev,
+ struct amdgpu_device *bo_adev)
+{
+ return (amdgpu_use_xgmi_p2p && adev != bo_adev &&
+ adev->gmc.xgmi.hive_id &&
+ adev->gmc.xgmi.hive_id == bo_adev->gmc.xgmi.hive_id);
+}
+
+void amdgpu_xgmi_early_init(struct amdgpu_device *adev)
+{
+ if (!adev->gmc.xgmi.supported)
+ return;
+
+ switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
+ case IP_VERSION(9, 4, 0):
+ case IP_VERSION(9, 4, 1):
+ case IP_VERSION(9, 4, 2):
+ /* 25 GT/s */
+ adev->gmc.xgmi.max_speed = 25;
+ adev->gmc.xgmi.max_width = 16;
+ break;
+ case IP_VERSION(9, 4, 3):
+ case IP_VERSION(9, 4, 4):
+ case IP_VERSION(9, 5, 0):
+ /* 32 GT/s */
+ adev->gmc.xgmi.max_speed = 32;
+ adev->gmc.xgmi.max_width = 16;
+ break;
+ default:
+ break;
+ }
+}
+
+void amgpu_xgmi_set_max_speed_width(struct amdgpu_device *adev,
+ uint16_t max_speed, uint8_t max_width)
+{
+ adev->gmc.xgmi.max_speed = max_speed;
+ adev->gmc.xgmi.max_width = max_width;
+}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.h
index 148560d63554..5f36aff17e79 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.h
@@ -23,8 +23,7 @@
#define __AMDGPU_XGMI_H__
#include <drm/task_barrier.h>
-#include "amdgpu_psp.h"
-
+#include "amdgpu_ras.h"
struct amdgpu_hive_info {
struct kobject kobj;
@@ -33,7 +32,6 @@ struct amdgpu_hive_info {
struct list_head node;
atomic_t number_devices;
struct mutex hive_lock;
- atomic_t in_reset;
int hi_req_count;
struct amdgpu_device *hi_req_gpu;
struct task_barrier tb;
@@ -42,6 +40,12 @@ struct amdgpu_hive_info {
AMDGPU_XGMI_PSTATE_MAX_VEGA20,
AMDGPU_XGMI_PSTATE_UNKNOWN
} pstate;
+
+ struct amdgpu_reset_domain *reset_domain;
+ atomic_t ras_recovery;
+ struct ras_event_manager event_mgr;
+ struct work_struct reset_on_init_work;
+ atomic_t requested_nps_mode;
};
struct amdgpu_pcs_ras_field {
@@ -50,28 +54,80 @@ struct amdgpu_pcs_ras_field {
uint32_t pcs_err_shift;
};
+/**
+ * Bandwidth range reporting comes in two modes.
+ *
+ * PER_LINK - range for any xgmi link
+ * PER_PEER - range of max of single xgmi link to max of multiple links based on source peer
+ */
+enum amdgpu_xgmi_bw_mode {
+ AMDGPU_XGMI_BW_MODE_PER_LINK = 0,
+ AMDGPU_XGMI_BW_MODE_PER_PEER
+};
+
+enum amdgpu_xgmi_bw_unit {
+ AMDGPU_XGMI_BW_UNIT_GBYTES = 0,
+ AMDGPU_XGMI_BW_UNIT_MBYTES
+};
+
+struct amdgpu_xgmi_ras {
+ struct amdgpu_ras_block_object ras_block;
+};
+extern struct amdgpu_xgmi_ras xgmi_ras;
+
+struct amdgpu_xgmi {
+ /* from psp */
+ u64 node_id;
+ u64 hive_id;
+ /* fixed per family */
+ u64 node_segment_size;
+ /* physical node (0-3) */
+ unsigned physical_node_id;
+ /* number of nodes (0-4) */
+ unsigned num_physical_nodes;
+ /* gpu list in the same hive */
+ struct list_head head;
+ bool supported;
+ struct ras_common_if *ras_if;
+ bool connected_to_cpu;
+ struct amdgpu_xgmi_ras *ras;
+ uint16_t max_speed;
+ uint8_t max_width;
+};
+
struct amdgpu_hive_info *amdgpu_get_xgmi_hive(struct amdgpu_device *adev);
void amdgpu_put_xgmi_hive(struct amdgpu_hive_info *hive);
int amdgpu_xgmi_update_topology(struct amdgpu_hive_info *hive, struct amdgpu_device *adev);
int amdgpu_xgmi_add_device(struct amdgpu_device *adev);
int amdgpu_xgmi_remove_device(struct amdgpu_device *adev);
int amdgpu_xgmi_set_pstate(struct amdgpu_device *adev, int pstate);
-int amdgpu_xgmi_get_hops_count(struct amdgpu_device *adev,
- struct amdgpu_device *peer_adev);
-int amdgpu_xgmi_ras_late_init(struct amdgpu_device *adev);
-void amdgpu_xgmi_ras_fini(struct amdgpu_device *adev);
+int amdgpu_xgmi_get_hops_count(struct amdgpu_device *adev, struct amdgpu_device *peer_adev);
+int amdgpu_xgmi_get_bandwidth(struct amdgpu_device *adev, struct amdgpu_device *peer_adev,
+ enum amdgpu_xgmi_bw_mode bw_mode, enum amdgpu_xgmi_bw_unit bw_unit,
+ uint32_t *min_bw, uint32_t *max_bw);
+bool amdgpu_xgmi_get_is_sharing_enabled(struct amdgpu_device *adev,
+ struct amdgpu_device *peer_adev);
uint64_t amdgpu_xgmi_get_relative_phy_addr(struct amdgpu_device *adev,
uint64_t addr);
-int amdgpu_xgmi_query_ras_error_count(struct amdgpu_device *adev,
- void *ras_error_status);
-void amdgpu_xgmi_reset_ras_error_count(struct amdgpu_device *adev);
+bool amdgpu_xgmi_same_hive(struct amdgpu_device *adev,
+ struct amdgpu_device *bo_adev);
+int amdgpu_xgmi_ras_sw_init(struct amdgpu_device *adev);
+int amdgpu_xgmi_reset_on_init(struct amdgpu_device *adev);
+
+int amdgpu_xgmi_request_nps_change(struct amdgpu_device *adev,
+ struct amdgpu_hive_info *hive,
+ int req_nps_mode);
+int amdgpu_get_xgmi_link_status(struct amdgpu_device *adev,
+ int global_link_num);
+int amdgpu_xgmi_get_ext_link(struct amdgpu_device *adev, int link_num);
+
+void amdgpu_xgmi_early_init(struct amdgpu_device *adev);
+uint32_t amdgpu_xgmi_get_max_bandwidth(struct amdgpu_device *adev);
+
+void amgpu_xgmi_set_max_speed_width(struct amdgpu_device *adev,
+ uint16_t max_speed, uint8_t max_width);
-static inline bool amdgpu_xgmi_same_hive(struct amdgpu_device *adev,
- struct amdgpu_device *bo_adev)
-{
- return (adev != bo_adev &&
- adev->gmc.xgmi.hive_id &&
- adev->gmc.xgmi.hive_id == bo_adev->gmc.xgmi.hive_id);
-}
+/* Cleanup macro for use with __free(xgmi_put_hive) */
+DEFINE_FREE(xgmi_put_hive, struct amdgpu_hive_info *, if (_T) amdgpu_put_xgmi_hive(_T))
#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgv_sriovmsg.h b/drivers/gpu/drm/amd/amdgpu/amdgv_sriovmsg.h
index 5355827ed0ae..3cdb1e0eca37 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgv_sriovmsg.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgv_sriovmsg.h
@@ -1,45 +1,106 @@
/*
- * Copyright 2018-2019 Advanced Micro Devices, Inc.
+ * Copyright (c) 2018-2021 Advanced Micro Devices, Inc. All rights reserved.
*
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
- * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
- * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
- * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
- * OTHER DEALINGS IN THE SOFTWARE.
- *
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
*/
#ifndef AMDGV_SRIOV_MSG__H_
#define AMDGV_SRIOV_MSG__H_
-/* unit in kilobytes */
-#define AMD_SRIOV_MSG_VBIOS_OFFSET 0
-#define AMD_SRIOV_MSG_VBIOS_SIZE_KB 64
-#define AMD_SRIOV_MSG_DATAEXCHANGE_OFFSET_KB AMD_SRIOV_MSG_VBIOS_SIZE_KB
-#define AMD_SRIOV_MSG_DATAEXCHANGE_SIZE_KB 4
+#define AMD_SRIOV_MSG_SIZE_KB 1
/*
- * layout
- * 0 64KB 65KB 66KB
- * | VBIOS | PF2VF | VF2PF | Bad Page | ...
- * | 64KB | 1KB | 1KB |
+ * layout v1
+ * 0 64KB 65KB 66KB 68KB 132KB
+ * | VBIOS | PF2VF | VF2PF | Bad Page | RAS Telemetry Region | ...
+ * | 64KB | 1KB | 1KB | 2KB | 64KB | ...
*/
-#define AMD_SRIOV_MSG_SIZE_KB 1
-#define AMD_SRIOV_MSG_PF2VF_OFFSET_KB AMD_SRIOV_MSG_DATAEXCHANGE_OFFSET_KB
-#define AMD_SRIOV_MSG_VF2PF_OFFSET_KB (AMD_SRIOV_MSG_PF2VF_OFFSET_KB + AMD_SRIOV_MSG_SIZE_KB)
-#define AMD_SRIOV_MSG_BAD_PAGE_OFFSET_KB (AMD_SRIOV_MSG_VF2PF_OFFSET_KB + AMD_SRIOV_MSG_SIZE_KB)
+
+/*
+ * layout v2 (offsets are dynamically allocated and the offsets below are examples)
+ * 0 1KB 64KB 65KB 66KB 68KB 132KB
+ * | INITD_H | VBIOS | PF2VF | VF2PF | Bad Page | RAS Telemetry Region | ...
+ * | 1KB | 64KB | 1KB | 1KB | 2KB | 64KB | ...
+ *
+ * Note: PF2VF + VF2PF + Bad Page = DataExchange region (allocated contiguously)
+ */
+
+/* v1 layout sizes */
+#define AMD_SRIOV_MSG_VBIOS_SIZE_KB_V1 64
+#define AMD_SRIOV_MSG_PF2VF_SIZE_KB_V1 1
+#define AMD_SRIOV_MSG_VF2PF_SIZE_KB_V1 1
+#define AMD_SRIOV_MSG_BAD_PAGE_SIZE_KB_V1 2
+#define AMD_SRIOV_MSG_RAS_TELEMETRY_SIZE_KB_V1 64
+#define AMD_SRIOV_MSG_DATAEXCHANGE_SIZE_KB_V1 \
+ (AMD_SRIOV_MSG_PF2VF_SIZE_KB_V1 + AMD_SRIOV_MSG_VF2PF_SIZE_KB_V1 + \
+ AMD_SRIOV_MSG_BAD_PAGE_SIZE_KB_V1)
+
+/* v1 offsets */
+#define AMD_SRIOV_MSG_VBIOS_OFFSET_V1 0
+#define AMD_SRIOV_MSG_DATAEXCHANGE_OFFSET_KB_V1 AMD_SRIOV_MSG_VBIOS_SIZE_KB_V1
+#define AMD_SRIOV_MSG_TMR_OFFSET_KB 2048
+#define AMD_SRIOV_MSG_PF2VF_OFFSET_KB_V1 AMD_SRIOV_MSG_DATAEXCHANGE_OFFSET_KB_V1
+#define AMD_SRIOV_MSG_VF2PF_OFFSET_KB_V1 \
+ (AMD_SRIOV_MSG_PF2VF_OFFSET_KB_V1 + AMD_SRIOV_MSG_SIZE_KB)
+#define AMD_SRIOV_MSG_BAD_PAGE_OFFSET_KB_V1 \
+ (AMD_SRIOV_MSG_VF2PF_OFFSET_KB_V1 + AMD_SRIOV_MSG_SIZE_KB)
+#define AMD_SRIOV_MSG_RAS_TELEMETRY_OFFSET_KB_V1 \
+ (AMD_SRIOV_MSG_BAD_PAGE_OFFSET_KB_V1 + AMD_SRIOV_MSG_BAD_PAGE_SIZE_KB_V1)
+#define AMD_SRIOV_MSG_INIT_DATA_TOT_SIZE_KB_V1 \
+ (AMD_SRIOV_MSG_VBIOS_SIZE_KB_V1 + AMD_SRIOV_MSG_DATAEXCHANGE_SIZE_KB_V1 + \
+ AMD_SRIOV_MSG_RAS_TELEMETRY_SIZE_KB_V1)
+
+enum amd_sriov_crit_region_version {
+ GPU_CRIT_REGION_V1 = 1,
+ GPU_CRIT_REGION_V2 = 2,
+};
+
+/* v2 layout offset enum (in order of allocation) */
+enum amd_sriov_msg_table_id_enum {
+ AMD_SRIOV_MSG_IPD_TABLE_ID = 0,
+ AMD_SRIOV_MSG_VBIOS_IMG_TABLE_ID,
+ AMD_SRIOV_MSG_RAS_TELEMETRY_TABLE_ID,
+ AMD_SRIOV_MSG_DATAEXCHANGE_TABLE_ID,
+ AMD_SRIOV_MSG_BAD_PAGE_INFO_TABLE_ID,
+ AMD_SRIOV_MSG_INITD_H_TABLE_ID,
+ AMD_SRIOV_MSG_MAX_TABLE_ID,
+};
+
+struct amd_sriov_msg_init_data_header {
+ char signature[4]; /* "INDA" */
+ uint32_t version;
+ uint32_t checksum;
+ uint32_t initdata_offset; /* 0 */
+ uint32_t initdata_size_in_kb; /* 5MB */
+ uint32_t valid_tables;
+ uint32_t vbios_img_offset;
+ uint32_t vbios_img_size_in_kb;
+ uint32_t dataexchange_offset;
+ uint32_t dataexchange_size_in_kb;
+ uint32_t ras_tele_info_offset;
+ uint32_t ras_tele_info_size_in_kb;
+ uint32_t ip_discovery_offset;
+ uint32_t ip_discovery_size_in_kb;
+ uint32_t bad_page_info_offset;
+ uint32_t bad_page_size_in_kb;
+ uint32_t reserved[8];
+};
/*
* PF2VF history log:
@@ -51,10 +112,12 @@
* v2 defined in amdgim
* v3 current
*/
-#define AMD_SRIOV_MSG_FW_VRAM_PF2VF_VER 2
-#define AMD_SRIOV_MSG_FW_VRAM_VF2PF_VER 3
+#define AMD_SRIOV_MSG_FW_VRAM_PF2VF_VER 2
+#define AMD_SRIOV_MSG_FW_VRAM_VF2PF_VER 3
+
+#define AMD_SRIOV_MSG_RESERVE_UCODE 24
-#define AMD_SRIOV_MSG_RESERVE_UCODE 24
+#define AMD_SRIOV_MSG_RESERVE_VCN_INST 4
enum amd_sriov_ucode_engine_id {
AMD_SRIOV_UCODE_ID_VCE = 0,
@@ -81,26 +144,105 @@ enum amd_sriov_ucode_engine_id {
AMD_SRIOV_UCODE_ID__MAX
};
-#pragma pack(push, 1) // PF2VF / VF2PF data areas are byte packed
+#pragma pack(push, 1) // PF2VF / VF2PF data areas are byte packed
union amd_sriov_msg_feature_flags {
struct {
- uint32_t error_log_collect : 1;
- uint32_t host_load_ucodes : 1;
- uint32_t host_flr_vramlost : 1;
- uint32_t mm_bw_management : 1;
- uint32_t pp_one_vf_mode : 1;
- uint32_t reserved : 27;
+ uint32_t error_log_collect : 1;
+ uint32_t host_load_ucodes : 1;
+ uint32_t host_flr_vramlost : 1;
+ uint32_t mm_bw_management : 1;
+ uint32_t pp_one_vf_mode : 1;
+ uint32_t reg_indirect_acc : 1;
+ uint32_t av1_support : 1;
+ uint32_t vcn_rb_decouple : 1;
+ uint32_t mes_info_dump_enable : 1;
+ uint32_t ras_caps : 1;
+ uint32_t ras_telemetry : 1;
+ uint32_t ras_cper : 1;
+ uint32_t xgmi_ta_ext_peer_link : 1;
+ uint32_t reserved : 19;
} flags;
- uint32_t all;
+ uint32_t all;
+};
+
+union amd_sriov_reg_access_flags {
+ struct {
+ uint32_t vf_reg_access_ih : 1;
+ uint32_t vf_reg_access_mmhub : 1;
+ uint32_t vf_reg_access_gc : 1;
+ uint32_t vf_reg_access_l1_tlb_cntl : 1;
+ uint32_t vf_reg_access_sq_config : 1;
+ uint32_t reserved : 27;
+ } flags;
+ uint32_t all;
+};
+
+union amd_sriov_ras_caps {
+ struct {
+ uint64_t block_umc : 1;
+ uint64_t block_sdma : 1;
+ uint64_t block_gfx : 1;
+ uint64_t block_mmhub : 1;
+ uint64_t block_athub : 1;
+ uint64_t block_pcie_bif : 1;
+ uint64_t block_hdp : 1;
+ uint64_t block_xgmi_wafl : 1;
+ uint64_t block_df : 1;
+ uint64_t block_smn : 1;
+ uint64_t block_sem : 1;
+ uint64_t block_mp0 : 1;
+ uint64_t block_mp1 : 1;
+ uint64_t block_fuse : 1;
+ uint64_t block_mca : 1;
+ uint64_t block_vcn : 1;
+ uint64_t block_jpeg : 1;
+ uint64_t block_ih : 1;
+ uint64_t block_mpio : 1;
+ uint64_t block_mmsch : 1;
+ uint64_t poison_propogation_mode : 1;
+ uint64_t reserved : 43;
+ } bits;
+ uint64_t all;
};
union amd_sriov_msg_os_info {
struct {
- uint32_t windows : 1;
- uint32_t reserved : 31;
+ uint32_t windows : 1;
+ uint32_t reserved : 31;
} info;
- uint32_t all;
+ uint32_t all;
+};
+
+struct amd_sriov_msg_uuid_info {
+ union {
+ struct {
+ uint32_t did : 16;
+ uint32_t fcn : 8;
+ uint32_t asic_7 : 8;
+ };
+ uint32_t time_low;
+ };
+
+ struct {
+ uint32_t time_mid : 16;
+ uint32_t time_high : 12;
+ uint32_t version : 4;
+ };
+
+ struct {
+ struct {
+ uint8_t clk_seq_hi : 6;
+ uint8_t variant : 2;
+ };
+ union {
+ uint8_t clk_seq_low;
+ uint8_t asic_6;
+ };
+ uint16_t asic_4;
+ };
+
+ uint32_t asic_0;
};
struct amd_sriov_msg_pf2vf_info_header {
@@ -112,6 +254,7 @@ struct amd_sriov_msg_pf2vf_info_header {
uint32_t reserved[2];
};
+#define AMD_SRIOV_MSG_PF2VF_INFO_FILLED_SIZE (55)
struct amd_sriov_msg_pf2vf_info {
/* header contains size and version */
struct amd_sriov_msg_pf2vf_info_header header;
@@ -149,9 +292,30 @@ struct amd_sriov_msg_pf2vf_info {
/* identification in ROCm SMI */
uint64_t uuid;
uint32_t fcn_idx;
+ /* flags to indicate which register access method VF should use */
+ union amd_sriov_reg_access_flags reg_access_flags;
+ /* MM BW management */
+ struct {
+ uint32_t decode_max_dimension_pixels;
+ uint32_t decode_max_frame_pixels;
+ uint32_t encode_max_dimension_pixels;
+ uint32_t encode_max_frame_pixels;
+ } mm_bw_management[AMD_SRIOV_MSG_RESERVE_VCN_INST];
+ /* UUID info */
+ struct amd_sriov_msg_uuid_info uuid_info;
+ /* PCIE atomic ops support flag */
+ uint32_t pcie_atomic_ops_support_flags;
+ /* Portion of GPU memory occupied by VF. MAX value is 65535, but set to uint32_t to maintain alignment with reserved size */
+ uint32_t gpu_capacity;
+ /* vf bdf on host pci tree for debug only */
+ uint32_t bdf_on_host;
+ uint32_t more_bp; //Reserved for future use.
+ union amd_sriov_ras_caps ras_en_caps;
+ union amd_sriov_ras_caps ras_telemetry_en_caps;
+
/* reserved */
- uint32_t reserved[256-26];
-};
+ uint32_t reserved[256 - AMD_SRIOV_MSG_PF2VF_INFO_FILLED_SIZE];
+} __packed;
struct amd_sriov_msg_vf2pf_info_header {
/* the total structure size in byte */
@@ -162,12 +326,13 @@ struct amd_sriov_msg_vf2pf_info_header {
uint32_t reserved[2];
};
+#define AMD_SRIOV_MSG_VF2PF_INFO_FILLED_SIZE (73)
struct amd_sriov_msg_vf2pf_info {
/* header contains size and version */
struct amd_sriov_msg_vf2pf_info_header header;
uint32_t checksum;
/* driver version */
- uint8_t driver_version[64];
+ uint8_t driver_version[64];
/* driver certification, 1=WHQL, 0=None */
uint32_t driver_cert;
/* guest OS type and version */
@@ -201,13 +366,16 @@ struct amd_sriov_msg_vf2pf_info {
uint32_t fb_size;
/* guest ucode data, each one is 1.25 Dword */
struct {
- uint8_t id;
+ uint8_t id;
uint32_t version;
} ucode_info[AMD_SRIOV_MSG_RESERVE_UCODE];
-
+ uint64_t dummy_page_addr;
+ /* FB allocated for guest MES to record UQ info */
+ uint64_t mes_info_addr;
+ uint32_t mes_info_size;
/* reserved */
- uint32_t reserved[256-68];
-};
+ uint32_t reserved[256 - AMD_SRIOV_MSG_VF2PF_INFO_FILLED_SIZE];
+} __packed;
/* mailbox message send from guest to host */
enum amd_sriov_mailbox_request_message {
@@ -217,22 +385,98 @@ enum amd_sriov_mailbox_request_message {
MB_REQ_MSG_REL_GPU_FINI_ACCESS,
MB_REQ_MSG_REQ_GPU_RESET_ACCESS,
MB_REQ_MSG_REQ_GPU_INIT_DATA,
+ MB_REQ_MSG_PSP_VF_CMD_RELAY,
- MB_REQ_MSG_LOG_VF_ERROR = 200,
+ MB_REQ_MSG_LOG_VF_ERROR = 200,
+ MB_REQ_MSG_READY_TO_RESET = 201,
+ MB_REQ_MSG_RAS_POISON = 202,
+ MB_REQ_RAS_ERROR_COUNT = 203,
+ MB_REQ_RAS_CPER_DUMP = 204,
+ MB_REQ_RAS_BAD_PAGES = 205,
};
/* mailbox message send from host to guest */
enum amd_sriov_mailbox_response_message {
- MB_RES_MSG_CLR_MSG_BUF = 0,
- MB_RES_MSG_READY_TO_ACCESS_GPU = 1,
- MB_RES_MSG_FLR_NOTIFICATION,
- MB_RES_MSG_FLR_NOTIFICATION_COMPLETION,
- MB_RES_MSG_SUCCESS,
- MB_RES_MSG_FAIL,
- MB_RES_MSG_QUERY_ALIVE,
- MB_RES_MSG_GPU_INIT_DATA_READY,
-
- MB_RES_MSG_TEXT_MESSAGE = 255
+ MB_RES_MSG_CLR_MSG_BUF = 0,
+ MB_RES_MSG_READY_TO_ACCESS_GPU = 1,
+ MB_RES_MSG_FLR_NOTIFICATION = 2,
+ MB_RES_MSG_FLR_NOTIFICATION_COMPLETION = 3,
+ MB_RES_MSG_SUCCESS = 4,
+ MB_RES_MSG_FAIL = 5,
+ MB_RES_MSG_QUERY_ALIVE = 6,
+ MB_RES_MSG_GPU_INIT_DATA_READY = 7,
+ MB_RES_MSG_RAS_POISON_READY = 8,
+ MB_RES_MSG_PF_SOFT_FLR_NOTIFICATION = 9,
+ MB_RES_MSG_GPU_RMA = 10,
+ MB_RES_MSG_RAS_ERROR_COUNT_READY = 11,
+ MB_REQ_RAS_CPER_DUMP_READY = 14,
+ MB_RES_MSG_RAS_BAD_PAGES_READY = 15,
+ MB_RES_MSG_RAS_BAD_PAGES_NOTIFICATION = 16,
+ MB_RES_MSG_UNRECOV_ERR_NOTIFICATION = 17,
+ MB_RES_MSG_TEXT_MESSAGE = 255
+};
+
+enum amd_sriov_ras_telemetry_gpu_block {
+ RAS_TELEMETRY_GPU_BLOCK_UMC = 0,
+ RAS_TELEMETRY_GPU_BLOCK_SDMA = 1,
+ RAS_TELEMETRY_GPU_BLOCK_GFX = 2,
+ RAS_TELEMETRY_GPU_BLOCK_MMHUB = 3,
+ RAS_TELEMETRY_GPU_BLOCK_ATHUB = 4,
+ RAS_TELEMETRY_GPU_BLOCK_PCIE_BIF = 5,
+ RAS_TELEMETRY_GPU_BLOCK_HDP = 6,
+ RAS_TELEMETRY_GPU_BLOCK_XGMI_WAFL = 7,
+ RAS_TELEMETRY_GPU_BLOCK_DF = 8,
+ RAS_TELEMETRY_GPU_BLOCK_SMN = 9,
+ RAS_TELEMETRY_GPU_BLOCK_SEM = 10,
+ RAS_TELEMETRY_GPU_BLOCK_MP0 = 11,
+ RAS_TELEMETRY_GPU_BLOCK_MP1 = 12,
+ RAS_TELEMETRY_GPU_BLOCK_FUSE = 13,
+ RAS_TELEMETRY_GPU_BLOCK_MCA = 14,
+ RAS_TELEMETRY_GPU_BLOCK_VCN = 15,
+ RAS_TELEMETRY_GPU_BLOCK_JPEG = 16,
+ RAS_TELEMETRY_GPU_BLOCK_IH = 17,
+ RAS_TELEMETRY_GPU_BLOCK_MPIO = 18,
+ RAS_TELEMETRY_GPU_BLOCK_COUNT = 19,
+};
+
+struct amd_sriov_ras_telemetry_header {
+ uint32_t checksum;
+ uint32_t used_size;
+ uint32_t reserved[2];
+};
+
+struct amd_sriov_ras_telemetry_error_count {
+ struct {
+ uint32_t ce_count;
+ uint32_t ue_count;
+ uint32_t de_count;
+ uint32_t ce_overflow_count;
+ uint32_t ue_overflow_count;
+ uint32_t de_overflow_count;
+ uint32_t reserved[6];
+ } block[RAS_TELEMETRY_GPU_BLOCK_COUNT];
+};
+
+struct amd_sriov_ras_cper_dump {
+ uint32_t more;
+ uint64_t overflow_count;
+ uint64_t count;
+ uint64_t wptr;
+ uint32_t buf[];
+};
+
+struct amd_sriov_ras_chk_criti {
+ uint32_t hit;
+};
+
+struct amdsriov_ras_telemetry {
+ struct amd_sriov_ras_telemetry_header header;
+
+ union {
+ struct amd_sriov_ras_telemetry_error_count error_count;
+ struct amd_sriov_ras_cper_dump cper_dump;
+ struct amd_sriov_ras_chk_criti chk_criti;
+ } body;
};
/* version data stored in MAILBOX_MSGBUF_RCV_DW1 for future expansion */
@@ -240,17 +484,15 @@ enum amd_sriov_gpu_init_data_version {
GPU_INIT_DATA_READY_V1 = 1,
};
-#pragma pack(pop) // Restore previous packing option
+#pragma pack(pop) // Restore previous packing option
/* checksum function between host and guest */
-unsigned int amd_sriov_msg_checksum(void *obj,
- unsigned long obj_size,
- unsigned int key,
- unsigned int checksum);
+unsigned int amd_sriov_msg_checksum(void *obj, unsigned long obj_size, unsigned int key,
+ unsigned int checksum);
/* assertion at compile time */
#ifdef __linux__
-#define stringification(s) _stringification(s)
+#define stringification(s) _stringification(s)
#define _stringification(s) #s
_Static_assert(
@@ -261,13 +503,11 @@ _Static_assert(
sizeof(struct amd_sriov_msg_pf2vf_info) == AMD_SRIOV_MSG_SIZE_KB << 10,
"amd_sriov_msg_pf2vf_info must be " stringification(AMD_SRIOV_MSG_SIZE_KB) " KB");
-_Static_assert(
- AMD_SRIOV_MSG_RESERVE_UCODE % 4 == 0,
- "AMD_SRIOV_MSG_RESERVE_UCODE must be multiple of 4");
+_Static_assert(AMD_SRIOV_MSG_RESERVE_UCODE % 4 == 0,
+ "AMD_SRIOV_MSG_RESERVE_UCODE must be multiple of 4");
-_Static_assert(
- AMD_SRIOV_MSG_RESERVE_UCODE > AMD_SRIOV_UCODE_ID__MAX,
- "AMD_SRIOV_MSG_RESERVE_UCODE must be bigger than AMD_SRIOV_UCODE_ID__MAX");
+_Static_assert(AMD_SRIOV_MSG_RESERVE_UCODE > AMD_SRIOV_UCODE_ID__MAX,
+ "AMD_SRIOV_MSG_RESERVE_UCODE must be bigger than AMD_SRIOV_UCODE_ID__MAX");
#undef _stringification
#undef stringification
diff --git a/drivers/gpu/drm/amd/amdgpu/aqua_vanjaram.c b/drivers/gpu/drm/amd/amdgpu/aqua_vanjaram.c
new file mode 100644
index 000000000000..f9e2edf5260b
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/aqua_vanjaram.c
@@ -0,0 +1,986 @@
+/*
+ * Copyright 2022 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+#include "amdgpu.h"
+#include "soc15.h"
+
+#include "soc15_common.h"
+#include "amdgpu_reg_state.h"
+#include "amdgpu_xcp.h"
+#include "gfx_v9_4_3.h"
+#include "gfxhub_v1_2.h"
+#include "sdma_v4_4_2.h"
+#include "amdgpu_ip.h"
+
+#define XCP_INST_MASK(num_inst, xcp_id) \
+ (num_inst ? GENMASK(num_inst - 1, 0) << (xcp_id * num_inst) : 0)
+
+void aqua_vanjaram_doorbell_index_init(struct amdgpu_device *adev)
+{
+ int i;
+
+ adev->doorbell_index.kiq = AMDGPU_DOORBELL_LAYOUT1_KIQ_START;
+
+ adev->doorbell_index.mec_ring0 = AMDGPU_DOORBELL_LAYOUT1_MEC_RING_START;
+
+ adev->doorbell_index.userqueue_start = AMDGPU_DOORBELL_LAYOUT1_USERQUEUE_START;
+ adev->doorbell_index.userqueue_end = AMDGPU_DOORBELL_LAYOUT1_USERQUEUE_END;
+ adev->doorbell_index.xcc_doorbell_range = AMDGPU_DOORBELL_LAYOUT1_XCC_RANGE;
+
+ adev->doorbell_index.sdma_doorbell_range = 20;
+ for (i = 0; i < adev->sdma.num_instances; i++)
+ adev->doorbell_index.sdma_engine[i] =
+ AMDGPU_DOORBELL_LAYOUT1_sDMA_ENGINE_START +
+ i * (adev->doorbell_index.sdma_doorbell_range >> 1);
+
+ adev->doorbell_index.ih = AMDGPU_DOORBELL_LAYOUT1_IH;
+ adev->doorbell_index.vcn.vcn_ring0_1 = AMDGPU_DOORBELL_LAYOUT1_VCN_START;
+
+ adev->doorbell_index.first_non_cp = AMDGPU_DOORBELL_LAYOUT1_FIRST_NON_CP;
+ adev->doorbell_index.last_non_cp = AMDGPU_DOORBELL_LAYOUT1_LAST_NON_CP;
+
+ adev->doorbell_index.max_assignment = AMDGPU_DOORBELL_LAYOUT1_MAX_ASSIGNMENT << 1;
+}
+
+/* Fixed pattern for smn addressing on different AIDs:
+ * bit[34]: indicate cross AID access
+ * bit[33:32]: indicate target AID id
+ * AID id range is 0 ~ 3 as maximum AID number is 4.
+ */
+u64 aqua_vanjaram_encode_ext_smn_addressing(int ext_id)
+{
+ u64 ext_offset;
+
+ /* local routing and bit[34:32] will be zeros */
+ if (ext_id == 0)
+ return 0;
+
+ /* Initiated from host, accessing to all non-zero aids are cross traffic */
+ ext_offset = ((u64)(ext_id & 0x3) << 32) | (1ULL << 34);
+
+ return ext_offset;
+}
+
+static enum amdgpu_gfx_partition
+__aqua_vanjaram_calc_xcp_mode(struct amdgpu_xcp_mgr *xcp_mgr)
+{
+ struct amdgpu_device *adev = xcp_mgr->adev;
+ int num_xcc, num_xcc_per_xcp = 0, mode = 0;
+
+ num_xcc = NUM_XCC(xcp_mgr->adev->gfx.xcc_mask);
+ if (adev->gfx.funcs->get_xccs_per_xcp)
+ num_xcc_per_xcp = adev->gfx.funcs->get_xccs_per_xcp(adev);
+ if ((num_xcc_per_xcp) && (num_xcc % num_xcc_per_xcp == 0))
+ mode = num_xcc / num_xcc_per_xcp;
+
+ if (num_xcc_per_xcp == 1)
+ return AMDGPU_CPX_PARTITION_MODE;
+
+ switch (mode) {
+ case 1:
+ return AMDGPU_SPX_PARTITION_MODE;
+ case 2:
+ return AMDGPU_DPX_PARTITION_MODE;
+ case 3:
+ return AMDGPU_TPX_PARTITION_MODE;
+ case 4:
+ return AMDGPU_QPX_PARTITION_MODE;
+ default:
+ return AMDGPU_UNKNOWN_COMPUTE_PARTITION_MODE;
+ }
+
+ return AMDGPU_UNKNOWN_COMPUTE_PARTITION_MODE;
+}
+
+static int aqua_vanjaram_query_partition_mode(struct amdgpu_xcp_mgr *xcp_mgr)
+{
+ enum amdgpu_gfx_partition derv_mode,
+ mode = AMDGPU_UNKNOWN_COMPUTE_PARTITION_MODE;
+ struct amdgpu_device *adev = xcp_mgr->adev;
+
+ derv_mode = __aqua_vanjaram_calc_xcp_mode(xcp_mgr);
+
+ if (amdgpu_sriov_vf(adev))
+ return derv_mode;
+
+ if (adev->nbio.funcs->get_compute_partition_mode) {
+ mode = adev->nbio.funcs->get_compute_partition_mode(adev);
+ if (mode != derv_mode) {
+ dev_warn(
+ adev->dev,
+ "Mismatch in compute partition mode - reported : %d derived : %d",
+ mode, derv_mode);
+ if (derv_mode == AMDGPU_UNKNOWN_COMPUTE_PARTITION_MODE)
+ amdgpu_device_bus_status_check(adev);
+ }
+ }
+
+ return mode;
+}
+
+static int __aqua_vanjaram_get_xcc_per_xcp(struct amdgpu_xcp_mgr *xcp_mgr, int mode)
+{
+ int num_xcc, num_xcc_per_xcp = 0;
+
+ num_xcc = NUM_XCC(xcp_mgr->adev->gfx.xcc_mask);
+
+ switch (mode) {
+ case AMDGPU_SPX_PARTITION_MODE:
+ num_xcc_per_xcp = num_xcc;
+ break;
+ case AMDGPU_DPX_PARTITION_MODE:
+ num_xcc_per_xcp = num_xcc / 2;
+ break;
+ case AMDGPU_TPX_PARTITION_MODE:
+ num_xcc_per_xcp = num_xcc / 3;
+ break;
+ case AMDGPU_QPX_PARTITION_MODE:
+ num_xcc_per_xcp = num_xcc / 4;
+ break;
+ case AMDGPU_CPX_PARTITION_MODE:
+ num_xcc_per_xcp = 1;
+ break;
+ }
+
+ return num_xcc_per_xcp;
+}
+
+static int __aqua_vanjaram_get_xcp_ip_info(struct amdgpu_xcp_mgr *xcp_mgr, int xcp_id,
+ enum AMDGPU_XCP_IP_BLOCK ip_id,
+ struct amdgpu_xcp_ip *ip)
+{
+ struct amdgpu_device *adev = xcp_mgr->adev;
+ int num_sdma, num_vcn, num_shared_vcn, num_xcp;
+ int num_xcc_xcp, num_sdma_xcp, num_vcn_xcp;
+
+ num_sdma = adev->sdma.num_instances;
+ num_vcn = adev->vcn.num_vcn_inst;
+ num_shared_vcn = 1;
+
+ num_xcc_xcp = adev->gfx.num_xcc_per_xcp;
+ num_xcp = NUM_XCC(adev->gfx.xcc_mask) / num_xcc_xcp;
+
+ switch (xcp_mgr->mode) {
+ case AMDGPU_SPX_PARTITION_MODE:
+ case AMDGPU_DPX_PARTITION_MODE:
+ case AMDGPU_TPX_PARTITION_MODE:
+ case AMDGPU_QPX_PARTITION_MODE:
+ case AMDGPU_CPX_PARTITION_MODE:
+ num_sdma_xcp = DIV_ROUND_UP(num_sdma, num_xcp);
+ num_vcn_xcp = DIV_ROUND_UP(num_vcn, num_xcp);
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ if (num_vcn && num_xcp > num_vcn)
+ num_shared_vcn = num_xcp / num_vcn;
+
+ switch (ip_id) {
+ case AMDGPU_XCP_GFXHUB:
+ ip->inst_mask = XCP_INST_MASK(num_xcc_xcp, xcp_id);
+ ip->ip_funcs = &gfxhub_v1_2_xcp_funcs;
+ break;
+ case AMDGPU_XCP_GFX:
+ ip->inst_mask = XCP_INST_MASK(num_xcc_xcp, xcp_id);
+ ip->ip_funcs = &gfx_v9_4_3_xcp_funcs;
+ break;
+ case AMDGPU_XCP_SDMA:
+ ip->inst_mask = XCP_INST_MASK(num_sdma_xcp, xcp_id);
+ ip->ip_funcs = &sdma_v4_4_2_xcp_funcs;
+ break;
+ case AMDGPU_XCP_VCN:
+ ip->inst_mask =
+ XCP_INST_MASK(num_vcn_xcp, xcp_id / num_shared_vcn);
+ /* TODO : Assign IP funcs */
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ ip->ip_id = ip_id;
+
+ return 0;
+}
+
+static int __aqua_vanjaram_get_px_mode_info(struct amdgpu_xcp_mgr *xcp_mgr,
+ int px_mode, int *num_xcp,
+ uint16_t *nps_modes)
+{
+ struct amdgpu_device *adev = xcp_mgr->adev;
+ uint32_t gc_ver = amdgpu_ip_version(adev, GC_HWIP, 0);
+
+ if (!num_xcp || !nps_modes || !(xcp_mgr->supp_xcp_modes & BIT(px_mode)))
+ return -EINVAL;
+
+ switch (px_mode) {
+ case AMDGPU_SPX_PARTITION_MODE:
+ *num_xcp = 1;
+ *nps_modes = BIT(AMDGPU_NPS1_PARTITION_MODE);
+ break;
+ case AMDGPU_DPX_PARTITION_MODE:
+ *num_xcp = 2;
+ *nps_modes = BIT(AMDGPU_NPS1_PARTITION_MODE) |
+ BIT(AMDGPU_NPS2_PARTITION_MODE);
+ break;
+ case AMDGPU_TPX_PARTITION_MODE:
+ *num_xcp = 3;
+ *nps_modes = BIT(AMDGPU_NPS1_PARTITION_MODE) |
+ BIT(AMDGPU_NPS4_PARTITION_MODE);
+ break;
+ case AMDGPU_QPX_PARTITION_MODE:
+ *num_xcp = 4;
+ *nps_modes = BIT(AMDGPU_NPS1_PARTITION_MODE) |
+ BIT(AMDGPU_NPS4_PARTITION_MODE);
+ if (gc_ver == IP_VERSION(9, 5, 0))
+ *nps_modes |= BIT(AMDGPU_NPS2_PARTITION_MODE);
+ break;
+ case AMDGPU_CPX_PARTITION_MODE:
+ *num_xcp = NUM_XCC(adev->gfx.xcc_mask);
+ *nps_modes = BIT(AMDGPU_NPS1_PARTITION_MODE) |
+ BIT(AMDGPU_NPS4_PARTITION_MODE);
+ if (gc_ver == IP_VERSION(9, 5, 0))
+ *nps_modes |= BIT(AMDGPU_NPS2_PARTITION_MODE);
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+static int aqua_vanjaram_get_xcp_res_info(struct amdgpu_xcp_mgr *xcp_mgr,
+ int mode,
+ struct amdgpu_xcp_cfg *xcp_cfg)
+{
+ struct amdgpu_device *adev = xcp_mgr->adev;
+ int max_res[AMDGPU_XCP_RES_MAX] = {};
+ bool res_lt_xcp;
+ int num_xcp, i, r;
+ u16 nps_modes;
+
+ if (!(xcp_mgr->supp_xcp_modes & BIT(mode)))
+ return -EINVAL;
+
+ max_res[AMDGPU_XCP_RES_XCC] = NUM_XCC(adev->gfx.xcc_mask);
+ max_res[AMDGPU_XCP_RES_DMA] = adev->sdma.num_instances;
+ max_res[AMDGPU_XCP_RES_DEC] = adev->vcn.num_vcn_inst;
+ max_res[AMDGPU_XCP_RES_JPEG] = adev->jpeg.num_jpeg_inst;
+
+ r = __aqua_vanjaram_get_px_mode_info(xcp_mgr, mode, &num_xcp, &nps_modes);
+ if (r)
+ return r;
+
+ xcp_cfg->compatible_nps_modes =
+ (adev->gmc.supported_nps_modes & nps_modes);
+ xcp_cfg->num_res = ARRAY_SIZE(max_res);
+
+ for (i = 0; i < xcp_cfg->num_res; i++) {
+ res_lt_xcp = max_res[i] < num_xcp;
+ xcp_cfg->xcp_res[i].id = i;
+ xcp_cfg->xcp_res[i].num_inst =
+ res_lt_xcp ? 1 : max_res[i] / num_xcp;
+ xcp_cfg->xcp_res[i].num_inst =
+ i == AMDGPU_XCP_RES_JPEG ?
+ xcp_cfg->xcp_res[i].num_inst *
+ adev->jpeg.num_jpeg_rings : xcp_cfg->xcp_res[i].num_inst;
+ xcp_cfg->xcp_res[i].num_shared =
+ res_lt_xcp ? num_xcp / max_res[i] : 1;
+ }
+
+ return 0;
+}
+
+static enum amdgpu_gfx_partition
+__aqua_vanjaram_get_auto_mode(struct amdgpu_xcp_mgr *xcp_mgr)
+{
+ struct amdgpu_device *adev = xcp_mgr->adev;
+ int num_xcc;
+
+ num_xcc = NUM_XCC(xcp_mgr->adev->gfx.xcc_mask);
+
+ if (adev->gmc.num_mem_partitions == 1)
+ return AMDGPU_SPX_PARTITION_MODE;
+
+ if (adev->gmc.num_mem_partitions == num_xcc)
+ return AMDGPU_CPX_PARTITION_MODE;
+
+ if (adev->gmc.num_mem_partitions == num_xcc / 2)
+ return (adev->flags & AMD_IS_APU) ? AMDGPU_TPX_PARTITION_MODE :
+ AMDGPU_CPX_PARTITION_MODE;
+
+ if (adev->gmc.num_mem_partitions == 2 && !(adev->flags & AMD_IS_APU))
+ return AMDGPU_DPX_PARTITION_MODE;
+
+ return AMDGPU_UNKNOWN_COMPUTE_PARTITION_MODE;
+}
+
+static bool __aqua_vanjaram_is_valid_mode(struct amdgpu_xcp_mgr *xcp_mgr,
+ enum amdgpu_gfx_partition mode)
+{
+ struct amdgpu_device *adev = xcp_mgr->adev;
+ int num_xcc, num_xccs_per_xcp, r;
+ int num_xcp, nps_mode;
+ u16 supp_nps_modes;
+ bool comp_mode;
+
+ nps_mode = adev->gmc.gmc_funcs->query_mem_partition_mode(adev);
+ r = __aqua_vanjaram_get_px_mode_info(xcp_mgr, mode, &num_xcp,
+ &supp_nps_modes);
+ if (r)
+ return false;
+
+ comp_mode = !!(BIT(nps_mode) & supp_nps_modes);
+ num_xcc = NUM_XCC(adev->gfx.xcc_mask);
+ switch (mode) {
+ case AMDGPU_SPX_PARTITION_MODE:
+ return comp_mode && num_xcc > 0;
+ case AMDGPU_DPX_PARTITION_MODE:
+ return comp_mode && (num_xcc % 4) == 0;
+ case AMDGPU_TPX_PARTITION_MODE:
+ return comp_mode && ((num_xcc % 3) == 0);
+ case AMDGPU_QPX_PARTITION_MODE:
+ num_xccs_per_xcp = num_xcc / 4;
+ return comp_mode && (num_xccs_per_xcp >= 2);
+ case AMDGPU_CPX_PARTITION_MODE:
+ return comp_mode && (num_xcc > 1);
+ default:
+ return false;
+ }
+
+ return false;
+}
+
+static void __aqua_vanjaram_update_available_partition_mode(struct amdgpu_xcp_mgr *xcp_mgr)
+{
+ int mode;
+
+ xcp_mgr->avail_xcp_modes = 0;
+
+ for_each_inst(mode, xcp_mgr->supp_xcp_modes) {
+ if (__aqua_vanjaram_is_valid_mode(xcp_mgr, mode))
+ xcp_mgr->avail_xcp_modes |= BIT(mode);
+ }
+}
+
+static int aqua_vanjaram_switch_partition_mode(struct amdgpu_xcp_mgr *xcp_mgr,
+ int mode, int *num_xcps)
+{
+ int num_xcc_per_xcp, num_xcc, ret;
+ struct amdgpu_device *adev;
+ u32 flags = 0;
+
+ adev = xcp_mgr->adev;
+ num_xcc = NUM_XCC(adev->gfx.xcc_mask);
+
+ if (mode == AMDGPU_AUTO_COMPUTE_PARTITION_MODE) {
+ mode = __aqua_vanjaram_get_auto_mode(xcp_mgr);
+ if (mode == AMDGPU_UNKNOWN_COMPUTE_PARTITION_MODE) {
+ dev_err(adev->dev,
+ "Invalid config, no compatible compute partition mode found, available memory partitions: %d",
+ adev->gmc.num_mem_partitions);
+ return -EINVAL;
+ }
+ } else if (!__aqua_vanjaram_is_valid_mode(xcp_mgr, mode)) {
+ dev_err(adev->dev,
+ "Invalid compute partition mode requested, requested: %s, available memory partitions: %d",
+ amdgpu_gfx_compute_mode_desc(mode), adev->gmc.num_mem_partitions);
+ return -EINVAL;
+ }
+
+ if (adev->kfd.init_complete && !amdgpu_in_reset(adev) &&
+ !adev->in_suspend)
+ flags |= AMDGPU_XCP_OPS_KFD;
+
+ if (flags & AMDGPU_XCP_OPS_KFD) {
+ ret = amdgpu_amdkfd_check_and_lock_kfd(adev);
+ if (ret)
+ goto out;
+ }
+
+ ret = amdgpu_xcp_pre_partition_switch(xcp_mgr, flags);
+ if (ret)
+ goto unlock;
+
+ num_xcc_per_xcp = __aqua_vanjaram_get_xcc_per_xcp(xcp_mgr, mode);
+ if (adev->gfx.funcs->switch_partition_mode)
+ adev->gfx.funcs->switch_partition_mode(xcp_mgr->adev,
+ num_xcc_per_xcp);
+
+ /* Init info about new xcps */
+ *num_xcps = num_xcc / num_xcc_per_xcp;
+ amdgpu_xcp_init(xcp_mgr, *num_xcps, mode);
+
+ ret = amdgpu_xcp_post_partition_switch(xcp_mgr, flags);
+ if (!ret)
+ __aqua_vanjaram_update_available_partition_mode(xcp_mgr);
+unlock:
+ if (flags & AMDGPU_XCP_OPS_KFD)
+ amdgpu_amdkfd_unlock_kfd(adev);
+out:
+ return ret;
+}
+
+static int __aqua_vanjaram_get_xcp_mem_id(struct amdgpu_device *adev,
+ int xcc_id, uint8_t *mem_id)
+{
+ /* memory/spatial modes validation check is already done */
+ *mem_id = xcc_id / adev->gfx.num_xcc_per_xcp;
+ *mem_id /= adev->xcp_mgr->num_xcp_per_mem_partition;
+
+ return 0;
+}
+
+static int aqua_vanjaram_get_xcp_mem_id(struct amdgpu_xcp_mgr *xcp_mgr,
+ struct amdgpu_xcp *xcp, uint8_t *mem_id)
+{
+ struct amdgpu_numa_info numa_info;
+ struct amdgpu_device *adev;
+ uint32_t xcc_mask;
+ int r, i, xcc_id;
+
+ adev = xcp_mgr->adev;
+ /* TODO: BIOS is not returning the right info now
+ * Check on this later
+ */
+ /*
+ if (adev->gmc.gmc_funcs->query_mem_partition_mode)
+ mode = adev->gmc.gmc_funcs->query_mem_partition_mode(adev);
+ */
+ if (adev->gmc.num_mem_partitions == 1) {
+ /* Only one range */
+ *mem_id = 0;
+ return 0;
+ }
+
+ r = amdgpu_xcp_get_inst_details(xcp, AMDGPU_XCP_GFX, &xcc_mask);
+ if (r || !xcc_mask)
+ return -EINVAL;
+
+ xcc_id = ffs(xcc_mask) - 1;
+ if (!adev->gmc.is_app_apu)
+ return __aqua_vanjaram_get_xcp_mem_id(adev, xcc_id, mem_id);
+
+ r = amdgpu_acpi_get_mem_info(adev, xcc_id, &numa_info);
+
+ if (r)
+ return r;
+
+ r = -EINVAL;
+ for (i = 0; i < adev->gmc.num_mem_partitions; ++i) {
+ if (adev->gmc.mem_partitions[i].numa.node == numa_info.nid) {
+ *mem_id = i;
+ r = 0;
+ break;
+ }
+ }
+
+ return r;
+}
+
+static int aqua_vanjaram_get_xcp_ip_details(struct amdgpu_xcp_mgr *xcp_mgr, int xcp_id,
+ enum AMDGPU_XCP_IP_BLOCK ip_id,
+ struct amdgpu_xcp_ip *ip)
+{
+ if (!ip)
+ return -EINVAL;
+
+ return __aqua_vanjaram_get_xcp_ip_info(xcp_mgr, xcp_id, ip_id, ip);
+}
+
+struct amdgpu_xcp_mgr_funcs aqua_vanjaram_xcp_funcs = {
+ .switch_partition_mode = &aqua_vanjaram_switch_partition_mode,
+ .query_partition_mode = &aqua_vanjaram_query_partition_mode,
+ .get_ip_details = &aqua_vanjaram_get_xcp_ip_details,
+ .get_xcp_res_info = &aqua_vanjaram_get_xcp_res_info,
+ .get_xcp_mem_id = &aqua_vanjaram_get_xcp_mem_id,
+};
+
+static int aqua_vanjaram_xcp_mgr_init(struct amdgpu_device *adev)
+{
+ int ret;
+
+ if (amdgpu_sriov_vf(adev))
+ aqua_vanjaram_xcp_funcs.switch_partition_mode = NULL;
+
+ ret = amdgpu_xcp_mgr_init(adev, AMDGPU_UNKNOWN_COMPUTE_PARTITION_MODE, 1,
+ &aqua_vanjaram_xcp_funcs);
+ if (ret)
+ return ret;
+
+ amdgpu_xcp_update_supported_modes(adev->xcp_mgr);
+ /* TODO: Default memory node affinity init */
+
+ return ret;
+}
+
+int aqua_vanjaram_init_soc_config(struct amdgpu_device *adev)
+{
+ u32 mask, avail_inst, inst_mask = adev->sdma.sdma_mask;
+ int ret, i;
+
+ /* generally 1 AID supports 4 instances */
+ adev->sdma.num_inst_per_aid = 4;
+ adev->sdma.num_instances = NUM_SDMA(adev->sdma.sdma_mask);
+
+ adev->aid_mask = i = 1;
+ inst_mask >>= adev->sdma.num_inst_per_aid;
+
+ for (mask = (1 << adev->sdma.num_inst_per_aid) - 1; inst_mask;
+ inst_mask >>= adev->sdma.num_inst_per_aid, ++i) {
+ avail_inst = inst_mask & mask;
+ if (avail_inst == mask || avail_inst == 0x3 ||
+ avail_inst == 0xc)
+ adev->aid_mask |= (1 << i);
+ }
+
+ /* Harvest config is not used for aqua vanjaram. VCN and JPEGs will be
+ * addressed based on logical instance ids.
+ */
+ adev->vcn.harvest_config = 0;
+ adev->vcn.num_inst_per_aid = 1;
+ adev->vcn.num_vcn_inst = hweight32(adev->vcn.inst_mask);
+ adev->jpeg.harvest_config = 0;
+ adev->jpeg.num_inst_per_aid = 1;
+ adev->jpeg.num_jpeg_inst = hweight32(adev->jpeg.inst_mask);
+
+ ret = aqua_vanjaram_xcp_mgr_init(adev);
+ if (ret)
+ return ret;
+
+ amdgpu_ip_map_init(adev);
+
+ return 0;
+}
+
+static void aqua_read_smn(struct amdgpu_device *adev,
+ struct amdgpu_smn_reg_data *regdata,
+ uint64_t smn_addr)
+{
+ regdata->addr = smn_addr;
+ regdata->value = RREG32_PCIE(smn_addr);
+}
+
+struct aqua_reg_list {
+ uint64_t start_addr;
+ uint32_t num_regs;
+ uint32_t incrx;
+};
+
+#define DW_ADDR_INCR 4
+
+static void aqua_read_smn_ext(struct amdgpu_device *adev,
+ struct amdgpu_smn_reg_data *regdata,
+ uint64_t smn_addr, int i)
+{
+ regdata->addr =
+ smn_addr + adev->asic_funcs->encode_ext_smn_addressing(i);
+ regdata->value = RREG32_PCIE_EXT(regdata->addr);
+}
+
+#define smnreg_0x1A340218 0x1A340218
+#define smnreg_0x1A3402E4 0x1A3402E4
+#define smnreg_0x1A340294 0x1A340294
+#define smreg_0x1A380088 0x1A380088
+
+#define NUM_PCIE_SMN_REGS 14
+
+static struct aqua_reg_list pcie_reg_addrs[] = {
+ { smnreg_0x1A340218, 1, 0 },
+ { smnreg_0x1A3402E4, 1, 0 },
+ { smnreg_0x1A340294, 6, DW_ADDR_INCR },
+ { smreg_0x1A380088, 6, DW_ADDR_INCR },
+};
+
+static ssize_t aqua_vanjaram_read_pcie_state(struct amdgpu_device *adev,
+ void *buf, size_t max_size)
+{
+ struct amdgpu_reg_state_pcie_v1_0 *pcie_reg_state;
+ uint32_t start_addr, incrx, num_regs, szbuf;
+ struct amdgpu_regs_pcie_v1_0 *pcie_regs;
+ struct amdgpu_smn_reg_data *reg_data;
+ struct pci_dev *us_pdev, *ds_pdev;
+ int aer_cap, r, n;
+
+ if (!buf || !max_size)
+ return -EINVAL;
+
+ pcie_reg_state = (struct amdgpu_reg_state_pcie_v1_0 *)buf;
+
+ szbuf = sizeof(*pcie_reg_state) +
+ amdgpu_reginst_size(1, sizeof(*pcie_regs), NUM_PCIE_SMN_REGS);
+ /* Only one instance of pcie regs */
+ if (max_size < szbuf)
+ return -EOVERFLOW;
+
+ pcie_regs = (struct amdgpu_regs_pcie_v1_0 *)((uint8_t *)buf +
+ sizeof(*pcie_reg_state));
+ pcie_regs->inst_header.instance = 0;
+ pcie_regs->inst_header.state = AMDGPU_INST_S_OK;
+ pcie_regs->inst_header.num_smn_regs = NUM_PCIE_SMN_REGS;
+
+ reg_data = pcie_regs->smn_reg_values;
+
+ for (r = 0; r < ARRAY_SIZE(pcie_reg_addrs); r++) {
+ start_addr = pcie_reg_addrs[r].start_addr;
+ incrx = pcie_reg_addrs[r].incrx;
+ num_regs = pcie_reg_addrs[r].num_regs;
+ for (n = 0; n < num_regs; n++) {
+ aqua_read_smn(adev, reg_data, start_addr + n * incrx);
+ ++reg_data;
+ }
+ }
+
+ ds_pdev = pci_upstream_bridge(adev->pdev);
+ us_pdev = pci_upstream_bridge(ds_pdev);
+
+ pcie_capability_read_word(us_pdev, PCI_EXP_DEVSTA,
+ &pcie_regs->device_status);
+ pcie_capability_read_word(us_pdev, PCI_EXP_LNKSTA,
+ &pcie_regs->link_status);
+
+ aer_cap = pci_find_ext_capability(us_pdev, PCI_EXT_CAP_ID_ERR);
+ if (aer_cap) {
+ pci_read_config_dword(us_pdev, aer_cap + PCI_ERR_COR_STATUS,
+ &pcie_regs->pcie_corr_err_status);
+ pci_read_config_dword(us_pdev, aer_cap + PCI_ERR_UNCOR_STATUS,
+ &pcie_regs->pcie_uncorr_err_status);
+ }
+
+ pci_read_config_dword(us_pdev, PCI_PRIMARY_BUS,
+ &pcie_regs->sub_bus_number_latency);
+
+ pcie_reg_state->common_header.structure_size = szbuf;
+ pcie_reg_state->common_header.format_revision = 1;
+ pcie_reg_state->common_header.content_revision = 0;
+ pcie_reg_state->common_header.state_type = AMDGPU_REG_STATE_TYPE_PCIE;
+ pcie_reg_state->common_header.num_instances = 1;
+
+ return pcie_reg_state->common_header.structure_size;
+}
+
+#define smnreg_0x11A00050 0x11A00050
+#define smnreg_0x11A00180 0x11A00180
+#define smnreg_0x11A00070 0x11A00070
+#define smnreg_0x11A00200 0x11A00200
+#define smnreg_0x11A0020C 0x11A0020C
+#define smnreg_0x11A00210 0x11A00210
+#define smnreg_0x11A00108 0x11A00108
+
+#define XGMI_LINK_REG(smnreg, l) ((smnreg) | (l << 20))
+
+#define NUM_XGMI_SMN_REGS 25
+
+static struct aqua_reg_list xgmi_reg_addrs[] = {
+ { smnreg_0x11A00050, 1, 0 },
+ { smnreg_0x11A00180, 16, DW_ADDR_INCR },
+ { smnreg_0x11A00070, 4, DW_ADDR_INCR },
+ { smnreg_0x11A00200, 1, 0 },
+ { smnreg_0x11A0020C, 1, 0 },
+ { smnreg_0x11A00210, 1, 0 },
+ { smnreg_0x11A00108, 1, 0 },
+};
+
+static ssize_t aqua_vanjaram_read_xgmi_state(struct amdgpu_device *adev,
+ void *buf, size_t max_size)
+{
+ struct amdgpu_reg_state_xgmi_v1_0 *xgmi_reg_state;
+ uint32_t start_addr, incrx, num_regs, szbuf;
+ struct amdgpu_regs_xgmi_v1_0 *xgmi_regs;
+ struct amdgpu_smn_reg_data *reg_data;
+ const int max_xgmi_instances = 8;
+ int inst = 0, i, j, r, n;
+ const int xgmi_inst = 2;
+ void *p;
+
+ if (!buf || !max_size)
+ return -EINVAL;
+
+ xgmi_reg_state = (struct amdgpu_reg_state_xgmi_v1_0 *)buf;
+
+ szbuf = sizeof(*xgmi_reg_state) +
+ amdgpu_reginst_size(max_xgmi_instances, sizeof(*xgmi_regs),
+ NUM_XGMI_SMN_REGS);
+ /* Only one instance of pcie regs */
+ if (max_size < szbuf)
+ return -EOVERFLOW;
+
+ p = &xgmi_reg_state->xgmi_state_regs[0];
+ for_each_inst(i, adev->aid_mask) {
+ for (j = 0; j < xgmi_inst; ++j) {
+ xgmi_regs = (struct amdgpu_regs_xgmi_v1_0 *)p;
+ xgmi_regs->inst_header.instance = inst++;
+
+ xgmi_regs->inst_header.state = AMDGPU_INST_S_OK;
+ xgmi_regs->inst_header.num_smn_regs = NUM_XGMI_SMN_REGS;
+
+ reg_data = xgmi_regs->smn_reg_values;
+
+ for (r = 0; r < ARRAY_SIZE(xgmi_reg_addrs); r++) {
+ start_addr = xgmi_reg_addrs[r].start_addr;
+ incrx = xgmi_reg_addrs[r].incrx;
+ num_regs = xgmi_reg_addrs[r].num_regs;
+
+ for (n = 0; n < num_regs; n++) {
+ aqua_read_smn_ext(
+ adev, reg_data,
+ XGMI_LINK_REG(start_addr, j) +
+ n * incrx,
+ i);
+ ++reg_data;
+ }
+ }
+ p = reg_data;
+ }
+ }
+
+ xgmi_reg_state->common_header.structure_size = szbuf;
+ xgmi_reg_state->common_header.format_revision = 1;
+ xgmi_reg_state->common_header.content_revision = 0;
+ xgmi_reg_state->common_header.state_type = AMDGPU_REG_STATE_TYPE_XGMI;
+ xgmi_reg_state->common_header.num_instances = max_xgmi_instances;
+
+ return xgmi_reg_state->common_header.structure_size;
+}
+
+#define smnreg_0x11C00070 0x11C00070
+#define smnreg_0x11C00210 0x11C00210
+
+static struct aqua_reg_list wafl_reg_addrs[] = {
+ { smnreg_0x11C00070, 4, DW_ADDR_INCR },
+ { smnreg_0x11C00210, 1, 0 },
+};
+
+#define WAFL_LINK_REG(smnreg, l) ((smnreg) | (l << 20))
+
+#define NUM_WAFL_SMN_REGS 5
+
+static ssize_t aqua_vanjaram_read_wafl_state(struct amdgpu_device *adev,
+ void *buf, size_t max_size)
+{
+ struct amdgpu_reg_state_wafl_v1_0 *wafl_reg_state;
+ uint32_t start_addr, incrx, num_regs, szbuf;
+ struct amdgpu_regs_wafl_v1_0 *wafl_regs;
+ struct amdgpu_smn_reg_data *reg_data;
+ const int max_wafl_instances = 8;
+ int inst = 0, i, j, r, n;
+ const int wafl_inst = 2;
+ void *p;
+
+ if (!buf || !max_size)
+ return -EINVAL;
+
+ wafl_reg_state = (struct amdgpu_reg_state_wafl_v1_0 *)buf;
+
+ szbuf = sizeof(*wafl_reg_state) +
+ amdgpu_reginst_size(max_wafl_instances, sizeof(*wafl_regs),
+ NUM_WAFL_SMN_REGS);
+
+ if (max_size < szbuf)
+ return -EOVERFLOW;
+
+ p = &wafl_reg_state->wafl_state_regs[0];
+ for_each_inst(i, adev->aid_mask) {
+ for (j = 0; j < wafl_inst; ++j) {
+ wafl_regs = (struct amdgpu_regs_wafl_v1_0 *)p;
+ wafl_regs->inst_header.instance = inst++;
+
+ wafl_regs->inst_header.state = AMDGPU_INST_S_OK;
+ wafl_regs->inst_header.num_smn_regs = NUM_WAFL_SMN_REGS;
+
+ reg_data = wafl_regs->smn_reg_values;
+
+ for (r = 0; r < ARRAY_SIZE(wafl_reg_addrs); r++) {
+ start_addr = wafl_reg_addrs[r].start_addr;
+ incrx = wafl_reg_addrs[r].incrx;
+ num_regs = wafl_reg_addrs[r].num_regs;
+ for (n = 0; n < num_regs; n++) {
+ aqua_read_smn_ext(
+ adev, reg_data,
+ WAFL_LINK_REG(start_addr, j) +
+ n * incrx,
+ i);
+ ++reg_data;
+ }
+ }
+ p = reg_data;
+ }
+ }
+
+ wafl_reg_state->common_header.structure_size = szbuf;
+ wafl_reg_state->common_header.format_revision = 1;
+ wafl_reg_state->common_header.content_revision = 0;
+ wafl_reg_state->common_header.state_type = AMDGPU_REG_STATE_TYPE_WAFL;
+ wafl_reg_state->common_header.num_instances = max_wafl_instances;
+
+ return wafl_reg_state->common_header.structure_size;
+}
+
+#define smnreg_0x1B311060 0x1B311060
+#define smnreg_0x1B411060 0x1B411060
+#define smnreg_0x1B511060 0x1B511060
+#define smnreg_0x1B611060 0x1B611060
+
+#define smnreg_0x1C307120 0x1C307120
+#define smnreg_0x1C317120 0x1C317120
+
+#define smnreg_0x1C320830 0x1C320830
+#define smnreg_0x1C380830 0x1C380830
+#define smnreg_0x1C3D0830 0x1C3D0830
+#define smnreg_0x1C420830 0x1C420830
+
+#define smnreg_0x1C320100 0x1C320100
+#define smnreg_0x1C380100 0x1C380100
+#define smnreg_0x1C3D0100 0x1C3D0100
+#define smnreg_0x1C420100 0x1C420100
+
+#define smnreg_0x1B310500 0x1B310500
+#define smnreg_0x1C300400 0x1C300400
+
+#define USR_CAKE_INCR 0x11000
+#define USR_LINK_INCR 0x100000
+#define USR_CP_INCR 0x10000
+
+#define NUM_USR_SMN_REGS 20
+
+struct aqua_reg_list usr_reg_addrs[] = {
+ { smnreg_0x1B311060, 4, DW_ADDR_INCR },
+ { smnreg_0x1B411060, 4, DW_ADDR_INCR },
+ { smnreg_0x1B511060, 4, DW_ADDR_INCR },
+ { smnreg_0x1B611060, 4, DW_ADDR_INCR },
+ { smnreg_0x1C307120, 2, DW_ADDR_INCR },
+ { smnreg_0x1C317120, 2, DW_ADDR_INCR },
+};
+
+#define NUM_USR1_SMN_REGS 46
+struct aqua_reg_list usr1_reg_addrs[] = {
+ { smnreg_0x1C320830, 6, USR_CAKE_INCR },
+ { smnreg_0x1C380830, 5, USR_CAKE_INCR },
+ { smnreg_0x1C3D0830, 5, USR_CAKE_INCR },
+ { smnreg_0x1C420830, 4, USR_CAKE_INCR },
+ { smnreg_0x1C320100, 6, USR_CAKE_INCR },
+ { smnreg_0x1C380100, 5, USR_CAKE_INCR },
+ { smnreg_0x1C3D0100, 5, USR_CAKE_INCR },
+ { smnreg_0x1C420100, 4, USR_CAKE_INCR },
+ { smnreg_0x1B310500, 4, USR_LINK_INCR },
+ { smnreg_0x1C300400, 2, USR_CP_INCR },
+};
+
+static ssize_t aqua_vanjaram_read_usr_state(struct amdgpu_device *adev,
+ void *buf, size_t max_size,
+ int reg_state)
+{
+ uint32_t start_addr, incrx, num_regs, szbuf, num_smn;
+ struct amdgpu_reg_state_usr_v1_0 *usr_reg_state;
+ struct amdgpu_regs_usr_v1_0 *usr_regs;
+ struct amdgpu_smn_reg_data *reg_data;
+ const int max_usr_instances = 4;
+ struct aqua_reg_list *reg_addrs;
+ int inst = 0, i, n, r, arr_size;
+ void *p;
+
+ if (!buf || !max_size)
+ return -EINVAL;
+
+ switch (reg_state) {
+ case AMDGPU_REG_STATE_TYPE_USR:
+ arr_size = ARRAY_SIZE(usr_reg_addrs);
+ reg_addrs = usr_reg_addrs;
+ num_smn = NUM_USR_SMN_REGS;
+ break;
+ case AMDGPU_REG_STATE_TYPE_USR_1:
+ arr_size = ARRAY_SIZE(usr1_reg_addrs);
+ reg_addrs = usr1_reg_addrs;
+ num_smn = NUM_USR1_SMN_REGS;
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ usr_reg_state = (struct amdgpu_reg_state_usr_v1_0 *)buf;
+
+ szbuf = sizeof(*usr_reg_state) + amdgpu_reginst_size(max_usr_instances,
+ sizeof(*usr_regs),
+ num_smn);
+ if (max_size < szbuf)
+ return -EOVERFLOW;
+
+ p = &usr_reg_state->usr_state_regs[0];
+ for_each_inst(i, adev->aid_mask) {
+ usr_regs = (struct amdgpu_regs_usr_v1_0 *)p;
+ usr_regs->inst_header.instance = inst++;
+ usr_regs->inst_header.state = AMDGPU_INST_S_OK;
+ usr_regs->inst_header.num_smn_regs = num_smn;
+ reg_data = usr_regs->smn_reg_values;
+
+ for (r = 0; r < arr_size; r++) {
+ start_addr = reg_addrs[r].start_addr;
+ incrx = reg_addrs[r].incrx;
+ num_regs = reg_addrs[r].num_regs;
+ for (n = 0; n < num_regs; n++) {
+ aqua_read_smn_ext(adev, reg_data,
+ start_addr + n * incrx, i);
+ reg_data++;
+ }
+ }
+ p = reg_data;
+ }
+
+ usr_reg_state->common_header.structure_size = szbuf;
+ usr_reg_state->common_header.format_revision = 1;
+ usr_reg_state->common_header.content_revision = 0;
+ usr_reg_state->common_header.state_type = AMDGPU_REG_STATE_TYPE_USR;
+ usr_reg_state->common_header.num_instances = max_usr_instances;
+
+ return usr_reg_state->common_header.structure_size;
+}
+
+ssize_t aqua_vanjaram_get_reg_state(struct amdgpu_device *adev,
+ enum amdgpu_reg_state reg_state, void *buf,
+ size_t max_size)
+{
+ ssize_t size;
+
+ switch (reg_state) {
+ case AMDGPU_REG_STATE_TYPE_PCIE:
+ size = aqua_vanjaram_read_pcie_state(adev, buf, max_size);
+ break;
+ case AMDGPU_REG_STATE_TYPE_XGMI:
+ size = aqua_vanjaram_read_xgmi_state(adev, buf, max_size);
+ break;
+ case AMDGPU_REG_STATE_TYPE_WAFL:
+ size = aqua_vanjaram_read_wafl_state(adev, buf, max_size);
+ break;
+ case AMDGPU_REG_STATE_TYPE_USR:
+ size = aqua_vanjaram_read_usr_state(adev, buf, max_size,
+ AMDGPU_REG_STATE_TYPE_USR);
+ break;
+ case AMDGPU_REG_STATE_TYPE_USR_1:
+ size = aqua_vanjaram_read_usr_state(
+ adev, buf, max_size, AMDGPU_REG_STATE_TYPE_USR_1);
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ return size;
+}
diff --git a/drivers/gpu/drm/amd/amdgpu/athub_v1_0.c b/drivers/gpu/drm/amd/amdgpu/athub_v1_0.c
index 3ea557864320..42f4e163e251 100644
--- a/drivers/gpu/drm/amd/amdgpu/athub_v1_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/athub_v1_0.c
@@ -68,12 +68,13 @@ int athub_v1_0_set_clockgating(struct amdgpu_device *adev,
if (amdgpu_sriov_vf(adev))
return 0;
- switch (adev->asic_type) {
- case CHIP_VEGA10:
- case CHIP_VEGA12:
- case CHIP_VEGA20:
- case CHIP_RAVEN:
- case CHIP_RENOIR:
+ switch (amdgpu_ip_version(adev, ATHUB_HWIP, 0)) {
+ case IP_VERSION(9, 0, 0):
+ case IP_VERSION(9, 1, 0):
+ case IP_VERSION(9, 2, 0):
+ case IP_VERSION(9, 3, 0):
+ case IP_VERSION(9, 4, 0):
+ case IP_VERSION(1, 5, 0):
athub_update_medium_grain_clock_gating(adev,
state == AMD_CG_STATE_GATE);
athub_update_medium_grain_light_sleep(adev,
@@ -86,7 +87,7 @@ int athub_v1_0_set_clockgating(struct amdgpu_device *adev,
return 0;
}
-void athub_v1_0_get_clockgating(struct amdgpu_device *adev, u32 *flags)
+void athub_v1_0_get_clockgating(struct amdgpu_device *adev, u64 *flags)
{
int data;
diff --git a/drivers/gpu/drm/amd/amdgpu/athub_v1_0.h b/drivers/gpu/drm/amd/amdgpu/athub_v1_0.h
index b279af59e34f..6be0a6704ea7 100644
--- a/drivers/gpu/drm/amd/amdgpu/athub_v1_0.h
+++ b/drivers/gpu/drm/amd/amdgpu/athub_v1_0.h
@@ -25,6 +25,6 @@
int athub_v1_0_set_clockgating(struct amdgpu_device *adev,
enum amd_clockgating_state state);
-void athub_v1_0_get_clockgating(struct amdgpu_device *adev, u32 *flags);
+void athub_v1_0_get_clockgating(struct amdgpu_device *adev, u64 *flags);
#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/athub_v2_0.c b/drivers/gpu/drm/amd/amdgpu/athub_v2_0.c
index 5b90efd6f6d0..5a122f50a6e7 100644
--- a/drivers/gpu/drm/amd/amdgpu/athub_v2_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/athub_v2_0.c
@@ -36,9 +36,12 @@ athub_v2_0_update_medium_grain_clock_gating(struct amdgpu_device *adev,
{
uint32_t def, data;
+ if (!(adev->cg_flags & AMD_CG_SUPPORT_MC_MGCG))
+ return;
+
def = data = RREG32_SOC15(ATHUB, 0, mmATHUB_MISC_CNTL);
- if (enable && (adev->cg_flags & AMD_CG_SUPPORT_MC_MGCG))
+ if (enable)
data |= ATHUB_MISC_CNTL__CG_ENABLE_MASK;
else
data &= ~ATHUB_MISC_CNTL__CG_ENABLE_MASK;
@@ -53,10 +56,13 @@ athub_v2_0_update_medium_grain_light_sleep(struct amdgpu_device *adev,
{
uint32_t def, data;
+ if (!((adev->cg_flags & AMD_CG_SUPPORT_MC_LS) &&
+ (adev->cg_flags & AMD_CG_SUPPORT_HDP_LS)))
+ return;
+
def = data = RREG32_SOC15(ATHUB, 0, mmATHUB_MISC_CNTL);
- if (enable && (adev->cg_flags & AMD_CG_SUPPORT_MC_LS) &&
- (adev->cg_flags & AMD_CG_SUPPORT_HDP_LS))
+ if (enable)
data |= ATHUB_MISC_CNTL__CG_MEM_LS_ENABLE_MASK;
else
data &= ~ATHUB_MISC_CNTL__CG_MEM_LS_ENABLE_MASK;
@@ -71,10 +77,10 @@ int athub_v2_0_set_clockgating(struct amdgpu_device *adev,
if (amdgpu_sriov_vf(adev))
return 0;
- switch (adev->asic_type) {
- case CHIP_NAVI10:
- case CHIP_NAVI14:
- case CHIP_NAVI12:
+ switch (amdgpu_ip_version(adev, ATHUB_HWIP, 0)) {
+ case IP_VERSION(1, 3, 1):
+ case IP_VERSION(2, 0, 0):
+ case IP_VERSION(2, 0, 2):
athub_v2_0_update_medium_grain_clock_gating(adev,
state == AMD_CG_STATE_GATE);
athub_v2_0_update_medium_grain_light_sleep(adev,
@@ -87,7 +93,7 @@ int athub_v2_0_set_clockgating(struct amdgpu_device *adev,
return 0;
}
-void athub_v2_0_get_clockgating(struct amdgpu_device *adev, u32 *flags)
+void athub_v2_0_get_clockgating(struct amdgpu_device *adev, u64 *flags)
{
int data;
diff --git a/drivers/gpu/drm/amd/amdgpu/athub_v2_0.h b/drivers/gpu/drm/amd/amdgpu/athub_v2_0.h
index 02932c1c8bab..8b763f6dfd81 100644
--- a/drivers/gpu/drm/amd/amdgpu/athub_v2_0.h
+++ b/drivers/gpu/drm/amd/amdgpu/athub_v2_0.h
@@ -25,6 +25,6 @@
int athub_v2_0_set_clockgating(struct amdgpu_device *adev,
enum amd_clockgating_state state);
-void athub_v2_0_get_clockgating(struct amdgpu_device *adev, u32 *flags);
+void athub_v2_0_get_clockgating(struct amdgpu_device *adev, u64 *flags);
#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/athub_v2_1.c b/drivers/gpu/drm/amd/amdgpu/athub_v2_1.c
index 7b1b18350bf9..e143fcc46148 100644
--- a/drivers/gpu/drm/amd/amdgpu/athub_v2_1.c
+++ b/drivers/gpu/drm/amd/amdgpu/athub_v2_1.c
@@ -70,14 +70,13 @@ int athub_v2_1_set_clockgating(struct amdgpu_device *adev,
if (amdgpu_sriov_vf(adev))
return 0;
- switch (adev->asic_type) {
- case CHIP_SIENNA_CICHLID:
- case CHIP_NAVY_FLOUNDER:
- case CHIP_DIMGREY_CAVEFISH:
- athub_v2_1_update_medium_grain_clock_gating(adev,
- state == AMD_CG_STATE_GATE ? true : false);
- athub_v2_1_update_medium_grain_light_sleep(adev,
- state == AMD_CG_STATE_GATE ? true : false);
+ switch (amdgpu_ip_version(adev, ATHUB_HWIP, 0)) {
+ case IP_VERSION(2, 1, 0):
+ case IP_VERSION(2, 1, 1):
+ case IP_VERSION(2, 1, 2):
+ case IP_VERSION(2, 4, 0):
+ athub_v2_1_update_medium_grain_clock_gating(adev, state == AMD_CG_STATE_GATE);
+ athub_v2_1_update_medium_grain_light_sleep(adev, state == AMD_CG_STATE_GATE);
break;
default:
break;
@@ -86,7 +85,7 @@ int athub_v2_1_set_clockgating(struct amdgpu_device *adev,
return 0;
}
-void athub_v2_1_get_clockgating(struct amdgpu_device *adev, u32 *flags)
+void athub_v2_1_get_clockgating(struct amdgpu_device *adev, u64 *flags)
{
int data;
diff --git a/drivers/gpu/drm/amd/amdgpu/athub_v2_1.h b/drivers/gpu/drm/amd/amdgpu/athub_v2_1.h
index 5e6824c0f591..b799f14bce03 100644
--- a/drivers/gpu/drm/amd/amdgpu/athub_v2_1.h
+++ b/drivers/gpu/drm/amd/amdgpu/athub_v2_1.h
@@ -25,6 +25,6 @@
int athub_v2_1_set_clockgating(struct amdgpu_device *adev,
enum amd_clockgating_state state);
-void athub_v2_1_get_clockgating(struct amdgpu_device *adev, u32 *flags);
+void athub_v2_1_get_clockgating(struct amdgpu_device *adev, u64 *flags);
#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/athub_v3_0.c b/drivers/gpu/drm/amd/amdgpu/athub_v3_0.c
new file mode 100644
index 000000000000..d1bba9c64e16
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/athub_v3_0.c
@@ -0,0 +1,139 @@
+/*
+ * Copyright 2021 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#include "amdgpu.h"
+#include "athub_v3_0.h"
+#include "athub/athub_3_0_0_offset.h"
+#include "athub/athub_3_0_0_sh_mask.h"
+#include "navi10_enum.h"
+#include "soc15_common.h"
+
+#define regATHUB_MISC_CNTL_V3_0_1 0x00d7
+#define regATHUB_MISC_CNTL_V3_0_1_BASE_IDX 0
+#define regATHUB_MISC_CNTL_V3_3_0 0x00d8
+#define regATHUB_MISC_CNTL_V3_3_0_BASE_IDX 0
+
+
+static uint32_t athub_v3_0_get_cg_cntl(struct amdgpu_device *adev)
+{
+ uint32_t data;
+
+ switch (amdgpu_ip_version(adev, ATHUB_HWIP, 0)) {
+ case IP_VERSION(3, 0, 1):
+ data = RREG32_SOC15(ATHUB, 0, regATHUB_MISC_CNTL_V3_0_1);
+ break;
+ case IP_VERSION(3, 3, 0):
+ data = RREG32_SOC15(ATHUB, 0, regATHUB_MISC_CNTL_V3_3_0);
+ break;
+ default:
+ data = RREG32_SOC15(ATHUB, 0, regATHUB_MISC_CNTL);
+ break;
+ }
+ return data;
+}
+
+static void athub_v3_0_set_cg_cntl(struct amdgpu_device *adev, uint32_t data)
+{
+ switch (amdgpu_ip_version(adev, ATHUB_HWIP, 0)) {
+ case IP_VERSION(3, 0, 1):
+ WREG32_SOC15(ATHUB, 0, regATHUB_MISC_CNTL_V3_0_1, data);
+ break;
+ case IP_VERSION(3, 3, 0):
+ WREG32_SOC15(ATHUB, 0, regATHUB_MISC_CNTL_V3_3_0, data);
+ break;
+ default:
+ WREG32_SOC15(ATHUB, 0, regATHUB_MISC_CNTL, data);
+ break;
+ }
+}
+
+static void
+athub_v3_0_update_medium_grain_clock_gating(struct amdgpu_device *adev,
+ bool enable)
+{
+ uint32_t def, data;
+
+ def = data = athub_v3_0_get_cg_cntl(adev);
+
+ if (enable && (adev->cg_flags & AMD_CG_SUPPORT_ATHUB_MGCG))
+ data |= ATHUB_MISC_CNTL__CG_ENABLE_MASK;
+ else
+ data &= ~ATHUB_MISC_CNTL__CG_ENABLE_MASK;
+
+ if (def != data)
+ athub_v3_0_set_cg_cntl(adev, data);
+}
+
+static void
+athub_v3_0_update_medium_grain_light_sleep(struct amdgpu_device *adev,
+ bool enable)
+{
+ uint32_t def, data;
+
+ def = data = athub_v3_0_get_cg_cntl(adev);
+
+ if (enable && (adev->cg_flags & AMD_CG_SUPPORT_ATHUB_LS))
+ data |= ATHUB_MISC_CNTL__CG_MEM_LS_ENABLE_MASK;
+ else
+ data &= ~ATHUB_MISC_CNTL__CG_MEM_LS_ENABLE_MASK;
+
+ if (def != data)
+ athub_v3_0_set_cg_cntl(adev, data);
+}
+
+int athub_v3_0_set_clockgating(struct amdgpu_device *adev,
+ enum amd_clockgating_state state)
+{
+ if (amdgpu_sriov_vf(adev))
+ return 0;
+
+ switch (amdgpu_ip_version(adev, ATHUB_HWIP, 0)) {
+ case IP_VERSION(3, 0, 0):
+ case IP_VERSION(3, 0, 1):
+ case IP_VERSION(3, 0, 2):
+ case IP_VERSION(3, 3, 0):
+ athub_v3_0_update_medium_grain_clock_gating(adev,
+ state == AMD_CG_STATE_GATE);
+ athub_v3_0_update_medium_grain_light_sleep(adev,
+ state == AMD_CG_STATE_GATE);
+ break;
+ default:
+ break;
+ }
+
+ return 0;
+}
+
+void athub_v3_0_get_clockgating(struct amdgpu_device *adev, u64 *flags)
+{
+ int data;
+
+ /* AMD_CG_SUPPORT_ATHUB_MGCG */
+ data = athub_v3_0_get_cg_cntl(adev);
+ if (data & ATHUB_MISC_CNTL__CG_ENABLE_MASK)
+ *flags |= AMD_CG_SUPPORT_ATHUB_MGCG;
+
+ /* AMD_CG_SUPPORT_ATHUB_LS */
+ if (data & ATHUB_MISC_CNTL__CG_MEM_LS_ENABLE_MASK)
+ *flags |= AMD_CG_SUPPORT_ATHUB_LS;
+}
diff --git a/drivers/gpu/drm/amd/amdgpu/athub_v3_0.h b/drivers/gpu/drm/amd/amdgpu/athub_v3_0.h
new file mode 100644
index 000000000000..e08a7d564365
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/athub_v3_0.h
@@ -0,0 +1,30 @@
+/*
+ * Copyright 2021 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+#ifndef __ATHUB_V3_0_H__
+#define __ATHUB_V3_0_H__
+
+int athub_v3_0_set_clockgating(struct amdgpu_device *adev,
+ enum amd_clockgating_state state);
+void athub_v3_0_get_clockgating(struct amdgpu_device *adev, u64 *flags);
+
+#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/athub_v4_1_0.c b/drivers/gpu/drm/amd/amdgpu/athub_v4_1_0.c
new file mode 100644
index 000000000000..8a0773b80864
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/athub_v4_1_0.c
@@ -0,0 +1,122 @@
+/*
+ * Copyright 2023 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#include "amdgpu.h"
+#include "athub_v4_1_0.h"
+#include "athub/athub_4_1_0_offset.h"
+#include "athub/athub_4_1_0_sh_mask.h"
+#include "soc15_common.h"
+
+static uint32_t athub_v4_1_0_get_cg_cntl(struct amdgpu_device *adev)
+{
+ uint32_t data;
+
+ switch (amdgpu_ip_version(adev, ATHUB_HWIP, 0)) {
+ case IP_VERSION(4, 1, 0):
+ data = RREG32_SOC15(ATHUB, 0, regATHUB_MISC_CNTL);
+ break;
+ default:
+ data = 0;
+ break;
+ }
+ return data;
+}
+
+static void athub_v4_1_0_set_cg_cntl(struct amdgpu_device *adev, uint32_t data)
+{
+ switch (amdgpu_ip_version(adev, ATHUB_HWIP, 0)) {
+ case IP_VERSION(4, 1, 0):
+ WREG32_SOC15(ATHUB, 0, regATHUB_MISC_CNTL, data);
+ break;
+ default:
+ break;
+ }
+}
+
+static void
+athub_v4_1_0_update_medium_grain_clock_gating(struct amdgpu_device *adev,
+ bool enable)
+{
+ uint32_t def, data;
+
+ def = data = athub_v4_1_0_get_cg_cntl(adev);
+
+ if (enable && (adev->cg_flags & AMD_CG_SUPPORT_ATHUB_MGCG))
+ data |= ATHUB_MISC_CNTL__CG_ENABLE_MASK;
+ else
+ data &= ~ATHUB_MISC_CNTL__CG_ENABLE_MASK;
+
+ if (def != data)
+ athub_v4_1_0_set_cg_cntl(adev, data);
+}
+
+static void
+athub_v4_1_0_update_medium_grain_light_sleep(struct amdgpu_device *adev,
+ bool enable)
+{
+ uint32_t def, data;
+
+ def = data = athub_v4_1_0_get_cg_cntl(adev);
+
+ if (enable && (adev->cg_flags & AMD_CG_SUPPORT_ATHUB_LS))
+ data |= ATHUB_MISC_CNTL__CG_MEM_LS_ENABLE_MASK;
+ else
+ data &= ~ATHUB_MISC_CNTL__CG_MEM_LS_ENABLE_MASK;
+
+ if (def != data)
+ athub_v4_1_0_set_cg_cntl(adev, data);
+}
+
+int athub_v4_1_0_set_clockgating(struct amdgpu_device *adev,
+ enum amd_clockgating_state state)
+{
+ if (amdgpu_sriov_vf(adev))
+ return 0;
+
+ switch (amdgpu_ip_version(adev, ATHUB_HWIP, 0)) {
+ case IP_VERSION(4, 1, 0):
+ athub_v4_1_0_update_medium_grain_clock_gating(adev,
+ state == AMD_CG_STATE_GATE);
+ athub_v4_1_0_update_medium_grain_light_sleep(adev,
+ state == AMD_CG_STATE_GATE);
+ break;
+ default:
+ break;
+ }
+
+ return 0;
+}
+
+void athub_v4_1_0_get_clockgating(struct amdgpu_device *adev, u64 *flags)
+{
+ int data;
+
+ /* AMD_CG_SUPPORT_ATHUB_MGCG */
+ data = athub_v4_1_0_get_cg_cntl(adev);
+ if (data & ATHUB_MISC_CNTL__CG_ENABLE_MASK)
+ *flags |= AMD_CG_SUPPORT_ATHUB_MGCG;
+
+ /* AMD_CG_SUPPORT_ATHUB_LS */
+ if (data & ATHUB_MISC_CNTL__CG_MEM_LS_ENABLE_MASK)
+ *flags |= AMD_CG_SUPPORT_ATHUB_LS;
+}
diff --git a/drivers/gpu/drm/amd/amdgpu/athub_v4_1_0.h b/drivers/gpu/drm/amd/amdgpu/athub_v4_1_0.h
new file mode 100644
index 000000000000..4d18d0998fa8
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/athub_v4_1_0.h
@@ -0,0 +1,30 @@
+/*
+ * Copyright 2023 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+#ifndef __ATHUB_V4_1_0_H__
+#define __ATHUB_V4_1_0_H__
+
+int athub_v4_1_0_set_clockgating(struct amdgpu_device *adev,
+ enum amd_clockgating_state state);
+void athub_v4_1_0_get_clockgating(struct amdgpu_device *adev, u64 *flags);
+
+#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/atom.c b/drivers/gpu/drm/amd/amdgpu/atom.c
index 515890f4f5a0..7a063e44d429 100644
--- a/drivers/gpu/drm/amd/amdgpu/atom.c
+++ b/drivers/gpu/drm/amd/amdgpu/atom.c
@@ -25,12 +25,15 @@
#include <linux/module.h>
#include <linux/sched.h>
#include <linux/slab.h>
-#include <asm/unaligned.h>
+#include <linux/string_helpers.h>
+
+#include <linux/unaligned.h>
#include <drm/drm_util.h>
#define ATOM_DEBUG
+#include "atomfirmware.h"
#include "atom.h"
#include "atom-names.h"
#include "atom-bits.h"
@@ -59,6 +62,7 @@
typedef struct {
struct atom_context *ctx;
uint32_t *ps, *ws;
+ int ps_size, ws_size;
int ps_shift;
uint16_t start;
unsigned last_jump;
@@ -67,8 +71,8 @@ typedef struct {
} atom_exec_context;
int amdgpu_atom_debug;
-static int amdgpu_atom_execute_table_locked(struct atom_context *ctx, int index, uint32_t *params);
-int amdgpu_atom_execute_table(struct atom_context *ctx, int index, uint32_t *params);
+static int amdgpu_atom_execute_table_locked(struct atom_context *ctx, int index, uint32_t *params, int params_size);
+int amdgpu_atom_execute_table(struct atom_context *ctx, int index, uint32_t *params, int params_size);
static uint32_t atom_arg_mask[8] =
{ 0xFFFFFFFF, 0xFFFF, 0xFFFF00, 0xFFFF0000, 0xFF, 0xFF00, 0xFF0000,
@@ -114,11 +118,11 @@ static uint32_t atom_iio_execute(struct atom_context *ctx, int base,
base++;
break;
case ATOM_IIO_READ:
- temp = ctx->card->ioreg_read(ctx->card, CU16(base + 1));
+ temp = ctx->card->reg_read(ctx->card, CU16(base + 1));
base += 3;
break;
case ATOM_IIO_WRITE:
- ctx->card->ioreg_write(ctx->card, CU16(base + 1), temp);
+ ctx->card->reg_write(ctx->card, CU16(base + 1), temp);
base += 3;
break;
case ATOM_IIO_CLEAR:
@@ -220,7 +224,10 @@ static uint32_t atom_get_src_int(atom_exec_context *ctx, uint8_t attr,
(*ptr)++;
/* get_unaligned_le32 avoids unaligned accesses from atombios
* tables, noticed on a DEC Alpha. */
- val = get_unaligned_le32((u32 *)&ctx->ps[idx]);
+ if (idx < ctx->ps_size)
+ val = get_unaligned_le32((u32 *)&ctx->ps[idx]);
+ else
+ pr_info("PS index out of range: %i > %i\n", idx, ctx->ps_size);
if (print)
DEBUG("PS[0x%02X,0x%04X]", idx, val);
break;
@@ -258,7 +265,10 @@ static uint32_t atom_get_src_int(atom_exec_context *ctx, uint8_t attr,
val = gctx->reg_block;
break;
default:
- val = ctx->ws[idx];
+ if (idx < ctx->ws_size)
+ val = ctx->ws[idx];
+ else
+ pr_info("WS index out of range: %i > %i\n", idx, ctx->ws_size);
}
break;
case ATOM_ARG_ID:
@@ -291,7 +301,7 @@ static uint32_t atom_get_src_int(atom_exec_context *ctx, uint8_t attr,
(*ptr) += 4;
if (print)
DEBUG("IMM 0x%08X\n", val);
- return val;
+ break;
case ATOM_SRC_WORD0:
case ATOM_SRC_WORD8:
case ATOM_SRC_WORD16:
@@ -299,7 +309,7 @@ static uint32_t atom_get_src_int(atom_exec_context *ctx, uint8_t attr,
(*ptr) += 2;
if (print)
DEBUG("IMM 0x%04X\n", val);
- return val;
+ break;
case ATOM_SRC_BYTE0:
case ATOM_SRC_BYTE8:
case ATOM_SRC_BYTE16:
@@ -308,9 +318,9 @@ static uint32_t atom_get_src_int(atom_exec_context *ctx, uint8_t attr,
(*ptr)++;
if (print)
DEBUG("IMM 0x%02X\n", val);
- return val;
+ break;
}
- return 0;
+ return val;
case ATOM_ARG_PLL:
idx = U8(*ptr);
(*ptr)++;
@@ -392,7 +402,6 @@ static void atom_skip_src_int(atom_exec_context *ctx, uint8_t attr, int *ptr)
(*ptr)++;
return;
}
- return;
}
}
@@ -493,6 +502,10 @@ static void atom_put_dst(atom_exec_context *ctx, int arg, uint8_t attr,
idx = U8(*ptr);
(*ptr)++;
DEBUG("PS[0x%02X]", idx);
+ if (idx >= ctx->ps_size) {
+ pr_info("PS index out of range: %i > %i\n", idx, ctx->ps_size);
+ return;
+ }
ctx->ps[idx] = cpu_to_le32(val);
break;
case ATOM_ARG_WS:
@@ -525,6 +538,10 @@ static void atom_put_dst(atom_exec_context *ctx, int arg, uint8_t attr,
gctx->reg_block = val;
break;
default:
+ if (idx >= ctx->ws_size) {
+ pr_info("WS index out of range: %i > %i\n", idx, ctx->ws_size);
+ return;
+ }
ctx->ws[idx] = val;
}
break;
@@ -622,7 +639,7 @@ static void atom_op_calltable(atom_exec_context *ctx, int *ptr, int arg)
else
SDEBUG(" table: %d\n", idx);
if (U16(ctx->ctx->cmd_table + 4 + 2 * idx))
- r = amdgpu_atom_execute_table_locked(ctx->ctx, idx, ctx->ps + ctx->ps_shift);
+ r = amdgpu_atom_execute_table_locked(ctx->ctx, idx, ctx->ps + ctx->ps_shift, ctx->ps_size - ctx->ps_shift);
if (r) {
ctx->abort = true;
}
@@ -739,7 +756,7 @@ static void atom_op_jump(atom_exec_context *ctx, int *ptr, int arg)
break;
}
if (arg != ATOM_COND_ALWAYS)
- SDEBUG(" taken: %s\n", execute ? "yes" : "no");
+ SDEBUG(" taken: %s\n", str_yes_no(execute));
SDEBUG(" target: 0x%04X\n", target);
if (execute) {
if (ctx->last_jump == (ctx->start + target)) {
@@ -1201,7 +1218,7 @@ static struct {
atom_op_div32, ATOM_ARG_WS},
};
-static int amdgpu_atom_execute_table_locked(struct atom_context *ctx, int index, uint32_t *params)
+static int amdgpu_atom_execute_table_locked(struct atom_context *ctx, int index, uint32_t *params, int params_size)
{
int base = CU16(ctx->cmd_table + 4 + 2 * index);
int len, ws, ps, ptr;
@@ -1223,12 +1240,21 @@ static int amdgpu_atom_execute_table_locked(struct atom_context *ctx, int index,
ectx.ps_shift = ps / 4;
ectx.start = base;
ectx.ps = params;
+ ectx.ps_size = params_size;
ectx.abort = false;
ectx.last_jump = 0;
- if (ws)
+ ectx.last_jump_jiffies = 0;
+ if (ws) {
ectx.ws = kcalloc(4, ws, GFP_KERNEL);
- else
+ if (!ectx.ws) {
+ ret = -ENOMEM;
+ goto free;
+ }
+ ectx.ws_size = ws;
+ } else {
ectx.ws = NULL;
+ ectx.ws_size = 0;
+ }
debug_depth++;
while (1) {
@@ -1262,7 +1288,7 @@ free:
return ret;
}
-int amdgpu_atom_execute_table(struct atom_context *ctx, int index, uint32_t *params)
+int amdgpu_atom_execute_table(struct atom_context *ctx, int index, uint32_t *params, int params_size)
{
int r;
@@ -1278,7 +1304,7 @@ int amdgpu_atom_execute_table(struct atom_context *ctx, int index, uint32_t *par
/* reset divmul */
ctx->divmul[0] = 0;
ctx->divmul[1] = 0;
- r = amdgpu_atom_execute_table_locked(ctx, index, params);
+ r = amdgpu_atom_execute_table_locked(ctx, index, params, params_size);
mutex_unlock(&ctx->mutex);
return r;
}
@@ -1299,13 +1325,209 @@ static void atom_index_iio(struct atom_context *ctx, int base)
}
}
+static void atom_get_vbios_name(struct atom_context *ctx)
+{
+ unsigned char *p_rom;
+ unsigned char str_num;
+ unsigned short off_to_vbios_str;
+ unsigned char *c_ptr;
+ int name_size;
+ int i;
+
+ const char *na = "--N/A--";
+ char *back;
+
+ p_rom = ctx->bios;
+
+ str_num = *(p_rom + OFFSET_TO_GET_ATOMBIOS_NUMBER_OF_STRINGS);
+ if (str_num != 0) {
+ off_to_vbios_str =
+ *(unsigned short *)(p_rom + OFFSET_TO_GET_ATOMBIOS_STRING_START);
+
+ c_ptr = (unsigned char *)(p_rom + off_to_vbios_str);
+ } else {
+ /* do not know where to find name */
+ memcpy(ctx->name, na, 7);
+ ctx->name[7] = 0;
+ return;
+ }
+
+ /*
+ * skip the atombios strings, usually 4
+ * 1st is P/N, 2nd is ASIC, 3rd is PCI type, 4th is Memory type
+ */
+ for (i = 0; i < str_num; i++) {
+ while (*c_ptr != 0)
+ c_ptr++;
+ c_ptr++;
+ }
+
+ /* skip the following 2 chars: 0x0D 0x0A */
+ c_ptr += 2;
+
+ name_size = strnlen(c_ptr, STRLEN_LONG - 1);
+ memcpy(ctx->name, c_ptr, name_size);
+ back = ctx->name + name_size;
+ while ((*--back) == ' ')
+ ;
+ *(back + 1) = '\0';
+}
+
+static void atom_get_vbios_date(struct atom_context *ctx)
+{
+ unsigned char *p_rom;
+ unsigned char *date_in_rom;
+
+ p_rom = ctx->bios;
+
+ date_in_rom = p_rom + OFFSET_TO_VBIOS_DATE;
+
+ ctx->date[0] = '2';
+ ctx->date[1] = '0';
+ ctx->date[2] = date_in_rom[6];
+ ctx->date[3] = date_in_rom[7];
+ ctx->date[4] = '/';
+ ctx->date[5] = date_in_rom[0];
+ ctx->date[6] = date_in_rom[1];
+ ctx->date[7] = '/';
+ ctx->date[8] = date_in_rom[3];
+ ctx->date[9] = date_in_rom[4];
+ ctx->date[10] = ' ';
+ ctx->date[11] = date_in_rom[9];
+ ctx->date[12] = date_in_rom[10];
+ ctx->date[13] = date_in_rom[11];
+ ctx->date[14] = date_in_rom[12];
+ ctx->date[15] = date_in_rom[13];
+ ctx->date[16] = '\0';
+}
+
+static unsigned char *atom_find_str_in_rom(struct atom_context *ctx, char *str, int start,
+ int end, int maxlen)
+{
+ unsigned long str_off;
+ unsigned char *p_rom;
+ unsigned short str_len;
+
+ str_off = 0;
+ str_len = strnlen(str, maxlen);
+ p_rom = ctx->bios;
+
+ for (; start <= end; ++start) {
+ for (str_off = 0; str_off < str_len; ++str_off) {
+ if (str[str_off] != *(p_rom + start + str_off))
+ break;
+ }
+
+ if (str_off == str_len || str[str_off] == 0)
+ return p_rom + start;
+ }
+ return NULL;
+}
+
+static void atom_get_vbios_pn(struct atom_context *ctx)
+{
+ unsigned char *p_rom;
+ unsigned short off_to_vbios_str;
+ unsigned char *vbios_str;
+ int count;
+
+ off_to_vbios_str = 0;
+ p_rom = ctx->bios;
+
+ if (*(p_rom + OFFSET_TO_GET_ATOMBIOS_NUMBER_OF_STRINGS) != 0) {
+ off_to_vbios_str =
+ *(unsigned short *)(p_rom + OFFSET_TO_GET_ATOMBIOS_STRING_START);
+
+ vbios_str = (unsigned char *)(p_rom + off_to_vbios_str);
+ } else {
+ vbios_str = p_rom + OFFSET_TO_VBIOS_PART_NUMBER;
+ }
+
+ if (*vbios_str == 0) {
+ vbios_str = atom_find_str_in_rom(ctx, BIOS_ATOM_PREFIX, 3, 1024, 64);
+ if (vbios_str == NULL)
+ vbios_str += sizeof(BIOS_ATOM_PREFIX) - 1;
+ }
+ OPTIMIZER_HIDE_VAR(vbios_str);
+ if (vbios_str != NULL && *vbios_str == 0)
+ vbios_str++;
+
+ if (vbios_str != NULL) {
+ count = 0;
+ while ((count < BIOS_STRING_LENGTH) && vbios_str[count] >= ' ' &&
+ vbios_str[count] <= 'z') {
+ ctx->vbios_pn[count] = vbios_str[count];
+ count++;
+ }
+
+ ctx->vbios_pn[count] = 0;
+ }
+
+ pr_info("ATOM BIOS: %s\n", ctx->vbios_pn);
+}
+
+static void atom_get_vbios_version(struct atom_context *ctx)
+{
+ unsigned short start = 3, end;
+ unsigned char *vbios_ver;
+ unsigned char *p_rom;
+
+ p_rom = ctx->bios;
+ /* Search from strings offset if it's present */
+ start = *(unsigned short *)(p_rom +
+ OFFSET_TO_GET_ATOMBIOS_STRING_START);
+
+ /* Search till atom rom header start point */
+ end = *(unsigned short *)(p_rom + OFFSET_TO_ATOM_ROM_HEADER_POINTER);
+
+ /* Use hardcoded offsets, if the offsets are not populated */
+ if (end <= start) {
+ start = 3;
+ end = 1024;
+ }
+
+ /* find anchor ATOMBIOSBK-AMD */
+ vbios_ver =
+ atom_find_str_in_rom(ctx, BIOS_VERSION_PREFIX, start, end, 64);
+ if (vbios_ver != NULL) {
+ /* skip ATOMBIOSBK-AMD VER */
+ vbios_ver += 18;
+ memcpy(ctx->vbios_ver_str, vbios_ver, STRLEN_NORMAL);
+ } else {
+ ctx->vbios_ver_str[0] = '\0';
+ }
+}
+
+static void atom_get_vbios_build(struct atom_context *ctx)
+{
+ unsigned char *atom_rom_hdr;
+ unsigned char *str;
+ uint16_t base, len;
+
+ base = CU16(ATOM_ROM_TABLE_PTR);
+ atom_rom_hdr = CSTR(base);
+
+ str = CSTR(CU16(base + ATOM_ROM_CFG_PTR));
+ /* Skip config string */
+ while (str < atom_rom_hdr && *str++)
+ ;
+ /* Skip change list string */
+ while (str < atom_rom_hdr && *str++)
+ ;
+
+ len = min(atom_rom_hdr - str, STRLEN_NORMAL);
+ if (len)
+ strscpy(ctx->build_num, str, len);
+}
+
struct atom_context *amdgpu_atom_parse(struct card_info *card, void *bios)
{
int base;
struct atom_context *ctx =
kzalloc(sizeof(struct atom_context), GFP_KERNEL);
- char *str;
- u16 idx;
+ struct _ATOM_ROM_HEADER *atom_rom_header;
+ struct _ATOM_MASTER_DATA_TABLE *master_table;
+ struct _ATOM_FIRMWARE_INFO *atom_fw_info;
if (!ctx)
return NULL;
@@ -1343,16 +1565,22 @@ struct atom_context *amdgpu_atom_parse(struct card_info *card, void *bios)
return NULL;
}
- idx = CU16(ATOM_ROM_PART_NUMBER_PTR);
- if (idx == 0)
- idx = 0x80;
-
- str = CSTR(idx);
- if (*str != '\0') {
- pr_info("ATOM BIOS: %s\n", str);
- strlcpy(ctx->vbios_version, str, sizeof(ctx->vbios_version));
+ atom_rom_header = (struct _ATOM_ROM_HEADER *)CSTR(base);
+ if (atom_rom_header->usMasterDataTableOffset != 0) {
+ master_table = (struct _ATOM_MASTER_DATA_TABLE *)
+ CSTR(atom_rom_header->usMasterDataTableOffset);
+ if (master_table->ListOfDataTables.FirmwareInfo != 0) {
+ atom_fw_info = (struct _ATOM_FIRMWARE_INFO *)
+ CSTR(master_table->ListOfDataTables.FirmwareInfo);
+ ctx->version = atom_fw_info->ulFirmwareRevision;
+ }
}
+ atom_get_vbios_name(ctx);
+ atom_get_vbios_pn(ctx);
+ atom_get_vbios_date(ctx);
+ atom_get_vbios_version(ctx);
+ atom_get_vbios_build(ctx);
return ctx;
}
@@ -1372,7 +1600,7 @@ int amdgpu_atom_asic_init(struct atom_context *ctx)
if (!CU16(ctx->cmd_table + 4 + 2 * ATOM_CMD_INIT))
return 1;
- ret = amdgpu_atom_execute_table(ctx, ATOM_CMD_INIT, ps);
+ ret = amdgpu_atom_execute_table(ctx, ATOM_CMD_INIT, ps, 16);
if (ret)
return ret;
diff --git a/drivers/gpu/drm/amd/amdgpu/atom.h b/drivers/gpu/drm/amd/amdgpu/atom.h
index 4205bbe5d8d7..825ff28731f5 100644
--- a/drivers/gpu/drm/amd/amdgpu/atom.h
+++ b/drivers/gpu/drm/amd/amdgpu/atom.h
@@ -33,11 +33,11 @@ struct drm_device;
#define ATOM_ATI_MAGIC_PTR 0x30
#define ATOM_ATI_MAGIC " 761295520"
#define ATOM_ROM_TABLE_PTR 0x48
-#define ATOM_ROM_PART_NUMBER_PTR 0x6E
#define ATOM_ROM_MAGIC "ATOM"
#define ATOM_ROM_MAGIC_PTR 4
+#define ATOM_ROM_CFG_PTR 0xC
#define ATOM_ROM_MSG_PTR 0x10
#define ATOM_ROM_CMD_PTR 0x1E
#define ATOM_ROM_DATA_PTR 0x20
@@ -112,16 +112,21 @@ struct drm_device;
#define ATOM_IO_SYSIO 2
#define ATOM_IO_IIO 0x80
+#define STRLEN_NORMAL 32
+#define STRLEN_LONG 64
+#define STRLEN_VERYLONG 254
+
struct card_info {
struct drm_device *dev;
- void (* reg_write)(struct card_info *, uint32_t, uint32_t); /* filled by driver */
- uint32_t (* reg_read)(struct card_info *, uint32_t); /* filled by driver */
- void (* ioreg_write)(struct card_info *, uint32_t, uint32_t); /* filled by driver */
- uint32_t (* ioreg_read)(struct card_info *, uint32_t); /* filled by driver */
- void (* mc_write)(struct card_info *, uint32_t, uint32_t); /* filled by driver */
- uint32_t (* mc_read)(struct card_info *, uint32_t); /* filled by driver */
- void (* pll_write)(struct card_info *, uint32_t, uint32_t); /* filled by driver */
- uint32_t (* pll_read)(struct card_info *, uint32_t); /* filled by driver */
+ void (*reg_write)(struct card_info *info,
+ u32 reg, uint32_t val); /* filled by driver */
+ uint32_t (*reg_read)(struct card_info *info, uint32_t reg); /* filled by driver */
+ void (*mc_write)(struct card_info *info,
+ u32 reg, uint32_t val); /* filled by driver */
+ uint32_t (*mc_read)(struct card_info *info, uint32_t reg); /* filled by driver */
+ void (*pll_write)(struct card_info *info,
+ u32 reg, uint32_t val); /* filled by driver */
+ uint32_t (*pll_read)(struct card_info *info, uint32_t reg); /* filled by driver */
};
struct atom_context {
@@ -141,15 +146,21 @@ struct atom_context {
int io_mode;
uint32_t *scratch;
int scratch_size_bytes;
- char vbios_version[20];
+
+ uint8_t name[STRLEN_LONG];
+ uint8_t vbios_pn[STRLEN_LONG];
+ uint32_t version;
+ uint8_t vbios_ver_str[STRLEN_NORMAL];
+ uint8_t date[STRLEN_NORMAL];
+ uint8_t build_num[STRLEN_NORMAL];
};
extern int amdgpu_atom_debug;
-struct atom_context *amdgpu_atom_parse(struct card_info *, void *);
-int amdgpu_atom_execute_table(struct atom_context *, int, uint32_t *);
-int amdgpu_atom_asic_init(struct atom_context *);
-void amdgpu_atom_destroy(struct atom_context *);
+struct atom_context *amdgpu_atom_parse(struct card_info *card, void *bios);
+int amdgpu_atom_execute_table(struct atom_context *ctx, int index, uint32_t *params, int params_size);
+int amdgpu_atom_asic_init(struct atom_context *ctx);
+void amdgpu_atom_destroy(struct atom_context *ctx);
bool amdgpu_atom_parse_data_header(struct atom_context *ctx, int index, uint16_t *size,
uint8_t *frev, uint8_t *crev, uint16_t *data_start);
bool amdgpu_atom_parse_cmd_header(struct atom_context *ctx, int index,
diff --git a/drivers/gpu/drm/amd/amdgpu/atombios_crtc.c b/drivers/gpu/drm/amd/amdgpu/atombios_crtc.c
index 159a2a4385a1..3dfc28840a7d 100644
--- a/drivers/gpu/drm/amd/amdgpu/atombios_crtc.c
+++ b/drivers/gpu/drm/amd/amdgpu/atombios_crtc.c
@@ -24,7 +24,6 @@
* Alex Deucher
*/
-#include <drm/drm_crtc_helper.h>
#include <drm/amdgpu_drm.h>
#include <drm/drm_fixed.h>
#include "amdgpu.h"
@@ -78,7 +77,7 @@ void amdgpu_atombios_crtc_overscan_setup(struct drm_crtc *crtc,
args.usOverscanTop = cpu_to_le16(amdgpu_crtc->v_border);
break;
}
- amdgpu_atom_execute_table(adev->mode_info.atom_context, index, (uint32_t *)&args);
+ amdgpu_atom_execute_table(adev->mode_info.atom_context, index, (uint32_t *)&args, sizeof(args));
}
void amdgpu_atombios_crtc_scaler_setup(struct drm_crtc *crtc)
@@ -107,7 +106,7 @@ void amdgpu_atombios_crtc_scaler_setup(struct drm_crtc *crtc)
args.ucEnable = ATOM_SCALER_DISABLE;
break;
}
- amdgpu_atom_execute_table(adev->mode_info.atom_context, index, (uint32_t *)&args);
+ amdgpu_atom_execute_table(adev->mode_info.atom_context, index, (uint32_t *)&args, sizeof(args));
}
void amdgpu_atombios_crtc_lock(struct drm_crtc *crtc, int lock)
@@ -124,7 +123,7 @@ void amdgpu_atombios_crtc_lock(struct drm_crtc *crtc, int lock)
args.ucCRTC = amdgpu_crtc->crtc_id;
args.ucEnable = lock;
- amdgpu_atom_execute_table(adev->mode_info.atom_context, index, (uint32_t *)&args);
+ amdgpu_atom_execute_table(adev->mode_info.atom_context, index, (uint32_t *)&args, sizeof(args));
}
void amdgpu_atombios_crtc_enable(struct drm_crtc *crtc, int state)
@@ -140,7 +139,7 @@ void amdgpu_atombios_crtc_enable(struct drm_crtc *crtc, int state)
args.ucCRTC = amdgpu_crtc->crtc_id;
args.ucEnable = state;
- amdgpu_atom_execute_table(adev->mode_info.atom_context, index, (uint32_t *)&args);
+ amdgpu_atom_execute_table(adev->mode_info.atom_context, index, (uint32_t *)&args, sizeof(args));
}
void amdgpu_atombios_crtc_blank(struct drm_crtc *crtc, int state)
@@ -156,7 +155,7 @@ void amdgpu_atombios_crtc_blank(struct drm_crtc *crtc, int state)
args.ucCRTC = amdgpu_crtc->crtc_id;
args.ucBlanking = state;
- amdgpu_atom_execute_table(adev->mode_info.atom_context, index, (uint32_t *)&args);
+ amdgpu_atom_execute_table(adev->mode_info.atom_context, index, (uint32_t *)&args, sizeof(args));
}
void amdgpu_atombios_crtc_powergate(struct drm_crtc *crtc, int state)
@@ -172,7 +171,7 @@ void amdgpu_atombios_crtc_powergate(struct drm_crtc *crtc, int state)
args.ucDispPipeId = amdgpu_crtc->crtc_id;
args.ucEnable = state;
- amdgpu_atom_execute_table(adev->mode_info.atom_context, index, (uint32_t *)&args);
+ amdgpu_atom_execute_table(adev->mode_info.atom_context, index, (uint32_t *)&args, sizeof(args));
}
void amdgpu_atombios_crtc_powergate_init(struct amdgpu_device *adev)
@@ -184,7 +183,7 @@ void amdgpu_atombios_crtc_powergate_init(struct amdgpu_device *adev)
args.ucEnable = ATOM_INIT;
- amdgpu_atom_execute_table(adev->mode_info.atom_context, index, (uint32_t *)&args);
+ amdgpu_atom_execute_table(adev->mode_info.atom_context, index, (uint32_t *)&args, sizeof(args));
}
void amdgpu_atombios_crtc_set_dtd_timing(struct drm_crtc *crtc,
@@ -229,7 +228,7 @@ void amdgpu_atombios_crtc_set_dtd_timing(struct drm_crtc *crtc,
args.susModeMiscInfo.usAccess = cpu_to_le16(misc);
args.ucCRTC = amdgpu_crtc->crtc_id;
- amdgpu_atom_execute_table(adev->mode_info.atom_context, index, (uint32_t *)&args);
+ amdgpu_atom_execute_table(adev->mode_info.atom_context, index, (uint32_t *)&args, sizeof(args));
}
union atom_enable_ss {
@@ -294,7 +293,7 @@ static void amdgpu_atombios_crtc_program_ss(struct amdgpu_device *adev,
args.v3.usSpreadSpectrumStep = cpu_to_le16(ss->step);
args.v3.ucEnable = enable;
- amdgpu_atom_execute_table(adev->mode_info.atom_context, index, (uint32_t *)&args);
+ amdgpu_atom_execute_table(adev->mode_info.atom_context, index, (uint32_t *)&args, sizeof(args));
}
union adjust_pixel_clock {
@@ -396,7 +395,7 @@ static u32 amdgpu_atombios_crtc_adjust_pll(struct drm_crtc *crtc,
ADJUST_DISPLAY_CONFIG_SS_ENABLE;
amdgpu_atom_execute_table(adev->mode_info.atom_context,
- index, (uint32_t *)&args);
+ index, (uint32_t *)&args, sizeof(args));
adjusted_clock = le16_to_cpu(args.v1.usPixelClock) * 10;
break;
case 3:
@@ -429,7 +428,7 @@ static u32 amdgpu_atombios_crtc_adjust_pll(struct drm_crtc *crtc,
args.v3.sInput.ucExtTransmitterID = 0;
amdgpu_atom_execute_table(adev->mode_info.atom_context,
- index, (uint32_t *)&args);
+ index, (uint32_t *)&args, sizeof(args));
adjusted_clock = le32_to_cpu(args.v3.sOutput.ulDispPllFreq) * 10;
if (args.v3.sOutput.ucRefDiv) {
amdgpu_crtc->pll_flags |= AMDGPU_PLL_USE_FRAC_FB_DIV;
@@ -515,7 +514,7 @@ void amdgpu_atombios_crtc_set_disp_eng_pll(struct amdgpu_device *adev,
DRM_ERROR("Unknown table version %d %d\n", frev, crev);
return;
}
- amdgpu_atom_execute_table(adev->mode_info.atom_context, index, (uint32_t *)&args);
+ amdgpu_atom_execute_table(adev->mode_info.atom_context, index, (uint32_t *)&args, sizeof(args));
}
union set_dce_clock {
@@ -545,7 +544,7 @@ u32 amdgpu_atombios_crtc_set_dce_clock(struct amdgpu_device *adev,
args.v2_1.asParam.ulDCEClkFreq = cpu_to_le32(freq); /* 10kHz units */
args.v2_1.asParam.ucDCEClkType = clk_type;
args.v2_1.asParam.ucDCEClkSrc = clk_src;
- amdgpu_atom_execute_table(adev->mode_info.atom_context, index, (uint32_t *)&args);
+ amdgpu_atom_execute_table(adev->mode_info.atom_context, index, (uint32_t *)&args, sizeof(args));
ret_freq = le32_to_cpu(args.v2_1.asParam.ulDCEClkFreq) * 10;
break;
default:
@@ -741,7 +740,7 @@ void amdgpu_atombios_crtc_program_pll(struct drm_crtc *crtc,
return;
}
- amdgpu_atom_execute_table(adev->mode_info.atom_context, index, (uint32_t *)&args);
+ amdgpu_atom_execute_table(adev->mode_info.atom_context, index, (uint32_t *)&args, sizeof(args));
}
int amdgpu_atombios_crtc_prepare_pll(struct drm_crtc *crtc,
@@ -851,7 +850,7 @@ void amdgpu_atombios_crtc_set_pll(struct drm_crtc *crtc, struct drm_display_mode
pll->reference_div = amdgpu_crtc->pll_reference_div;
pll->post_div = amdgpu_crtc->pll_post_div;
- amdgpu_pll_compute(pll, amdgpu_crtc->adjusted_clock, &pll_clock,
+ amdgpu_pll_compute(adev, pll, amdgpu_crtc->adjusted_clock, &pll_clock,
&fb_div, &frac_fb_div, &ref_div, &post_div);
amdgpu_atombios_crtc_program_ss(adev, ATOM_DISABLE, amdgpu_crtc->pll_id,
diff --git a/drivers/gpu/drm/amd/amdgpu/atombios_dp.c b/drivers/gpu/drm/amd/amdgpu/atombios_dp.c
index a3ba9ca11e98..492813ab1b54 100644
--- a/drivers/gpu/drm/amd/amdgpu/atombios_dp.c
+++ b/drivers/gpu/drm/amd/amdgpu/atombios_dp.c
@@ -26,6 +26,8 @@
*/
#include <drm/amdgpu_drm.h>
+#include <drm/display/drm_dp_helper.h>
+
#include "amdgpu.h"
#include "atom.h"
@@ -34,7 +36,6 @@
#include "atombios_dp.h"
#include "amdgpu_connectors.h"
#include "amdgpu_atombios.h"
-#include <drm/drm_dp_helper.h>
/* move these to drm_dp_helper.c/h */
#define DP_LINK_CONFIGURATION_SIZE 9
@@ -82,7 +83,7 @@ static int amdgpu_atombios_dp_process_aux_ch(struct amdgpu_i2c_chan *chan,
args.v2.ucDelay = delay / 10;
args.v2.ucHPD_ID = chan->rec.hpd;
- amdgpu_atom_execute_table(adev->mode_info.atom_context, index, (uint32_t *)&args);
+ amdgpu_atom_execute_table(adev->mode_info.atom_context, index, (uint32_t *)&args, sizeof(args));
*ack = args.v2.ucReplyStatus;
@@ -188,6 +189,8 @@ void amdgpu_atombios_dp_aux_init(struct amdgpu_connector *amdgpu_connector)
{
amdgpu_connector->ddc_bus->rec.hpd = amdgpu_connector->hpd.hpd;
amdgpu_connector->ddc_bus->aux.transfer = amdgpu_atombios_dp_aux_transfer;
+ amdgpu_connector->ddc_bus->aux.drm_dev = amdgpu_connector->base.dev;
+
drm_dp_aux_init(&amdgpu_connector->ddc_bus->aux);
amdgpu_connector->ddc_bus->has_aux = true;
}
@@ -298,7 +301,7 @@ static u8 amdgpu_atombios_dp_encoder_service(struct amdgpu_device *adev,
args.ucLaneNum = lane_num;
args.ucStatus = 0;
- amdgpu_atom_execute_table(adev->mode_info.atom_context, index, (uint32_t *)&args);
+ amdgpu_atom_execute_table(adev->mode_info.atom_context, index, (uint32_t *)&args, sizeof(args));
return args.ucStatus;
}
@@ -427,7 +430,7 @@ void amdgpu_atombios_dp_set_link_config(struct drm_connector *connector,
}
int amdgpu_atombios_dp_mode_valid_helper(struct drm_connector *connector,
- struct drm_display_mode *mode)
+ const struct drm_display_mode *mode)
{
struct amdgpu_connector *amdgpu_connector = to_amdgpu_connector(connector);
struct amdgpu_connector_atom_dig *dig_connector;
@@ -455,8 +458,8 @@ bool amdgpu_atombios_dp_needs_link_train(struct amdgpu_connector *amdgpu_connect
u8 link_status[DP_LINK_STATUS_SIZE];
struct amdgpu_connector_atom_dig *dig = amdgpu_connector->con_priv;
- if (drm_dp_dpcd_read_link_status(&amdgpu_connector->ddc_bus->aux, link_status)
- <= 0)
+ if (drm_dp_dpcd_read_link_status(&amdgpu_connector->ddc_bus->aux,
+ link_status) < 0)
return false;
if (drm_dp_channel_eq_ok(link_status, dig->dp_lane_count))
return false;
@@ -610,10 +613,10 @@ amdgpu_atombios_dp_link_train_cr(struct amdgpu_atombios_dp_link_train_info *dp_i
dp_info->tries = 0;
voltage = 0xff;
while (1) {
- drm_dp_link_train_clock_recovery_delay(dp_info->dpcd);
+ drm_dp_link_train_clock_recovery_delay(dp_info->aux, dp_info->dpcd);
if (drm_dp_dpcd_read_link_status(dp_info->aux,
- dp_info->link_status) <= 0) {
+ dp_info->link_status) < 0) {
DRM_ERROR("displayport link status failed\n");
break;
}
@@ -675,10 +678,10 @@ amdgpu_atombios_dp_link_train_ce(struct amdgpu_atombios_dp_link_train_info *dp_i
dp_info->tries = 0;
channel_eq = false;
while (1) {
- drm_dp_link_train_channel_eq_delay(dp_info->dpcd);
+ drm_dp_link_train_channel_eq_delay(dp_info->aux, dp_info->dpcd);
if (drm_dp_dpcd_read_link_status(dp_info->aux,
- dp_info->link_status) <= 0) {
+ dp_info->link_status) < 0) {
DRM_ERROR("displayport link status failed\n");
break;
}
diff --git a/drivers/gpu/drm/amd/amdgpu/atombios_dp.h b/drivers/gpu/drm/amd/amdgpu/atombios_dp.h
index f59d85eaddf0..3e24acf8133f 100644
--- a/drivers/gpu/drm/amd/amdgpu/atombios_dp.h
+++ b/drivers/gpu/drm/amd/amdgpu/atombios_dp.h
@@ -32,7 +32,7 @@ int amdgpu_atombios_dp_get_panel_mode(struct drm_encoder *encoder,
void amdgpu_atombios_dp_set_link_config(struct drm_connector *connector,
const struct drm_display_mode *mode);
int amdgpu_atombios_dp_mode_valid_helper(struct drm_connector *connector,
- struct drm_display_mode *mode);
+ const struct drm_display_mode *mode);
bool amdgpu_atombios_dp_needs_link_train(struct amdgpu_connector *amdgpu_connector);
void amdgpu_atombios_dp_set_rx_power_state(struct drm_connector *connector,
u8 power_state);
diff --git a/drivers/gpu/drm/amd/amdgpu/atombios_encoders.c b/drivers/gpu/drm/amd/amdgpu/atombios_encoders.c
index 6134ed964027..a51f3414b65d 100644
--- a/drivers/gpu/drm/amd/amdgpu/atombios_encoders.c
+++ b/drivers/gpu/drm/amd/amdgpu/atombios_encoders.c
@@ -26,7 +26,9 @@
#include <linux/pci.h>
-#include <drm/drm_crtc_helper.h>
+#include <acpi/video.h>
+
+#include <drm/drm_edid.h>
#include <drm/amdgpu_drm.h>
#include "amdgpu.h"
#include "amdgpu_connectors.h"
@@ -118,8 +120,6 @@ amdgpu_atombios_encoder_set_backlight_level(struct amdgpu_encoder *amdgpu_encode
}
}
-#if defined(CONFIG_BACKLIGHT_CLASS_DEVICE) || defined(CONFIG_BACKLIGHT_CLASS_DEVICE_MODULE)
-
static u8 amdgpu_atombios_encoder_backlight_level(struct backlight_device *bd)
{
u8 level;
@@ -184,7 +184,12 @@ void amdgpu_atombios_encoder_init_backlight(struct amdgpu_encoder *amdgpu_encode
return;
if (!(adev->mode_info.firmware_flags & ATOM_BIOS_INFO_BL_CONTROLLED_BY_GPU))
- return;
+ goto register_acpi_backlight;
+
+ if (!acpi_video_backlight_use_native()) {
+ drm_info(dev, "Skipping amdgpu atom DIG backlight registration\n");
+ goto register_acpi_backlight;
+ }
pdata = kmalloc(sizeof(struct amdgpu_backlight_privdata), GFP_KERNEL);
if (!pdata) {
@@ -210,7 +215,7 @@ void amdgpu_atombios_encoder_init_backlight(struct amdgpu_encoder *amdgpu_encode
dig->bl_dev = bd;
bd->props.brightness = amdgpu_atombios_encoder_get_backlight_brightness(bd);
- bd->props.power = FB_BLANK_UNBLANK;
+ bd->props.power = BACKLIGHT_POWER_ON;
backlight_update_status(bd);
DRM_INFO("amdgpu atom DIG backlight initialized\n");
@@ -220,6 +225,10 @@ void amdgpu_atombios_encoder_init_backlight(struct amdgpu_encoder *amdgpu_encode
error:
kfree(pdata);
return;
+
+register_acpi_backlight:
+ /* Try registering an ACPI video backlight device instead. */
+ acpi_video_register_backlight();
}
void
@@ -251,18 +260,6 @@ amdgpu_atombios_encoder_fini_backlight(struct amdgpu_encoder *amdgpu_encoder)
}
}
-#else /* !CONFIG_BACKLIGHT_CLASS_DEVICE */
-
-void amdgpu_atombios_encoder_init_backlight(struct amdgpu_encoder *encoder)
-{
-}
-
-void amdgpu_atombios_encoder_fini_backlight(struct amdgpu_encoder *encoder)
-{
-}
-
-#endif
-
bool amdgpu_atombios_encoder_is_digital(struct drm_encoder *encoder)
{
struct amdgpu_encoder *amdgpu_encoder = to_amdgpu_encoder(encoder);
@@ -338,7 +335,7 @@ amdgpu_atombios_encoder_setup_dac(struct drm_encoder *encoder, int action)
args.ucDacStandard = ATOM_DAC1_PS2;
args.usPixelClock = cpu_to_le16(amdgpu_encoder->pixel_clock / 10);
- amdgpu_atom_execute_table(adev->mode_info.atom_context, index, (uint32_t *)&args);
+ amdgpu_atom_execute_table(adev->mode_info.atom_context, index, (uint32_t *)&args, sizeof(args));
}
@@ -435,7 +432,7 @@ amdgpu_atombios_encoder_setup_dvo(struct drm_encoder *encoder, int action)
break;
}
- amdgpu_atom_execute_table(adev->mode_info.atom_context, index, (uint32_t *)&args);
+ amdgpu_atom_execute_table(adev->mode_info.atom_context, index, (uint32_t *)&args, sizeof(args));
}
int amdgpu_atombios_encoder_get_encoder_mode(struct drm_encoder *encoder)
@@ -469,7 +466,7 @@ int amdgpu_atombios_encoder_get_encoder_mode(struct drm_encoder *encoder)
if (amdgpu_connector->use_digital &&
(amdgpu_connector->audio == AMDGPU_AUDIO_ENABLE))
return ATOM_ENCODER_MODE_HDMI;
- else if (drm_detect_hdmi_monitor(amdgpu_connector_edid(connector)) &&
+ else if (connector->display_info.is_hdmi &&
(amdgpu_connector->audio == AMDGPU_AUDIO_AUTO))
return ATOM_ENCODER_MODE_HDMI;
else if (amdgpu_connector->use_digital)
@@ -488,7 +485,7 @@ int amdgpu_atombios_encoder_get_encoder_mode(struct drm_encoder *encoder)
if (amdgpu_audio != 0) {
if (amdgpu_connector->audio == AMDGPU_AUDIO_ENABLE)
return ATOM_ENCODER_MODE_HDMI;
- else if (drm_detect_hdmi_monitor(amdgpu_connector_edid(connector)) &&
+ else if (connector->display_info.is_hdmi &&
(amdgpu_connector->audio == AMDGPU_AUDIO_AUTO))
return ATOM_ENCODER_MODE_HDMI;
else
@@ -506,7 +503,7 @@ int amdgpu_atombios_encoder_get_encoder_mode(struct drm_encoder *encoder)
} else if (amdgpu_audio != 0) {
if (amdgpu_connector->audio == AMDGPU_AUDIO_ENABLE)
return ATOM_ENCODER_MODE_HDMI;
- else if (drm_detect_hdmi_monitor(amdgpu_connector_edid(connector)) &&
+ else if (connector->display_info.is_hdmi &&
(amdgpu_connector->audio == AMDGPU_AUDIO_AUTO))
return ATOM_ENCODER_MODE_HDMI;
else
@@ -735,7 +732,7 @@ amdgpu_atombios_encoder_setup_dig_encoder(struct drm_encoder *encoder,
break;
}
- amdgpu_atom_execute_table(adev->mode_info.atom_context, index, (uint32_t *)&args);
+ amdgpu_atom_execute_table(adev->mode_info.atom_context, index, (uint32_t *)&args, sizeof(args));
}
@@ -765,7 +762,6 @@ amdgpu_atombios_encoder_setup_dig_transmitter(struct drm_encoder *encoder, int a
int dp_clock = 0;
int dp_lane_count = 0;
int connector_object_id = 0;
- int igp_lane_info = 0;
int dig_encoder = dig->dig_encoder;
int hpd_id = AMDGPU_HPD_NONE;
@@ -848,26 +844,6 @@ amdgpu_atombios_encoder_setup_dig_transmitter(struct drm_encoder *encoder, int a
else
args.v1.ucConfig |= ATOM_TRANSMITTER_CONFIG_DIG1_ENCODER;
- if ((adev->flags & AMD_IS_APU) &&
- (amdgpu_encoder->encoder_id == ENCODER_OBJECT_ID_INTERNAL_UNIPHY)) {
- if (is_dp ||
- !amdgpu_dig_monitor_is_duallink(encoder, amdgpu_encoder->pixel_clock)) {
- if (igp_lane_info & 0x1)
- args.v1.ucConfig |= ATOM_TRANSMITTER_CONFIG_LANE_0_3;
- else if (igp_lane_info & 0x2)
- args.v1.ucConfig |= ATOM_TRANSMITTER_CONFIG_LANE_4_7;
- else if (igp_lane_info & 0x4)
- args.v1.ucConfig |= ATOM_TRANSMITTER_CONFIG_LANE_8_11;
- else if (igp_lane_info & 0x8)
- args.v1.ucConfig |= ATOM_TRANSMITTER_CONFIG_LANE_12_15;
- } else {
- if (igp_lane_info & 0x3)
- args.v1.ucConfig |= ATOM_TRANSMITTER_CONFIG_LANE_0_7;
- else if (igp_lane_info & 0xc)
- args.v1.ucConfig |= ATOM_TRANSMITTER_CONFIG_LANE_8_15;
- }
- }
-
if (dig->linkb)
args.v1.ucConfig |= ATOM_TRANSMITTER_CONFIG_LINKB;
else
@@ -1160,7 +1136,7 @@ amdgpu_atombios_encoder_setup_dig_transmitter(struct drm_encoder *encoder, int a
break;
}
- amdgpu_atom_execute_table(adev->mode_info.atom_context, index, (uint32_t *)&args);
+ amdgpu_atom_execute_table(adev->mode_info.atom_context, index, (uint32_t *)&args, sizeof(args));
}
bool
@@ -1188,7 +1164,7 @@ amdgpu_atombios_encoder_set_edp_panel_power(struct drm_connector *connector,
args.v1.ucAction = action;
- amdgpu_atom_execute_table(adev->mode_info.atom_context, index, (uint32_t *)&args);
+ amdgpu_atom_execute_table(adev->mode_info.atom_context, index, (uint32_t *)&args, sizeof(args));
/* wait for the panel to power up */
if (action == ATOM_TRANSMITTER_ACTION_POWER_ON) {
@@ -1312,7 +1288,7 @@ amdgpu_atombios_encoder_setup_external_encoder(struct drm_encoder *encoder,
DRM_ERROR("Unknown table version: %d, %d\n", frev, crev);
return;
}
- amdgpu_atom_execute_table(adev->mode_info.atom_context, index, (uint32_t *)&args);
+ amdgpu_atom_execute_table(adev->mode_info.atom_context, index, (uint32_t *)&args, sizeof(args));
}
static void
@@ -1657,7 +1633,7 @@ amdgpu_atombios_encoder_set_crtc_source(struct drm_encoder *encoder)
return;
}
- amdgpu_atom_execute_table(adev->mode_info.atom_context, index, (uint32_t *)&args);
+ amdgpu_atom_execute_table(adev->mode_info.atom_context, index, (uint32_t *)&args, sizeof(args));
}
/* This only needs to be called once at startup */
@@ -1730,7 +1706,7 @@ amdgpu_atombios_encoder_dac_load_detect(struct drm_encoder *encoder,
args.sDacload.ucMisc = DAC_LOAD_MISC_YPrPb;
}
- amdgpu_atom_execute_table(adev->mode_info.atom_context, index, (uint32_t *)&args);
+ amdgpu_atom_execute_table(adev->mode_info.atom_context, index, (uint32_t *)&args, sizeof(args));
return true;
} else
@@ -2088,24 +2064,25 @@ amdgpu_atombios_encoder_get_lcd_info(struct amdgpu_encoder *encoder)
case LCD_FAKE_EDID_PATCH_RECORD_TYPE:
fake_edid_record = (ATOM_FAKE_EDID_PATCH_RECORD *)record;
if (fake_edid_record->ucFakeEDIDLength) {
- struct edid *edid;
- int edid_size =
- max((int)EDID_LENGTH, (int)fake_edid_record->ucFakeEDIDLength);
- edid = kmalloc(edid_size, GFP_KERNEL);
- if (edid) {
- memcpy((u8 *)edid, (u8 *)&fake_edid_record->ucFakeEDIDString[0],
- fake_edid_record->ucFakeEDIDLength);
-
- if (drm_edid_is_valid(edid)) {
- adev->mode_info.bios_hardcoded_edid = edid;
- adev->mode_info.bios_hardcoded_edid_size = edid_size;
- } else
- kfree(edid);
- }
+ const struct drm_edid *edid;
+ int edid_size;
+
+ if (fake_edid_record->ucFakeEDIDLength == 128)
+ edid_size = fake_edid_record->ucFakeEDIDLength;
+ else
+ edid_size = fake_edid_record->ucFakeEDIDLength * 128;
+ edid = drm_edid_alloc(fake_edid_record->ucFakeEDIDString, edid_size);
+ if (drm_edid_valid(edid))
+ adev->mode_info.bios_hardcoded_edid = edid;
+ else
+ drm_edid_free(edid);
+ record += struct_size(fake_edid_record,
+ ucFakeEDIDString,
+ edid_size);
+ } else {
+ /* empty fake edid record must be 3 bytes long */
+ record += sizeof(ATOM_FAKE_EDID_PATCH_RECORD) + 1;
}
- record += fake_edid_record->ucFakeEDIDLength ?
- fake_edid_record->ucFakeEDIDLength + 2 :
- sizeof(ATOM_FAKE_EDID_PATCH_RECORD);
break;
case LCD_PANEL_RESOLUTION_RECORD_TYPE:
panel_res_record = (ATOM_PANEL_RESOLUTION_PATCH_RECORD *)record;
diff --git a/drivers/gpu/drm/amd/amdgpu/atombios_i2c.c b/drivers/gpu/drm/amd/amdgpu/atombios_i2c.c
index af0335535f82..a6501114322f 100644
--- a/drivers/gpu/drm/amd/amdgpu/atombios_i2c.c
+++ b/drivers/gpu/drm/amd/amdgpu/atombios_i2c.c
@@ -86,7 +86,7 @@ static int amdgpu_atombios_i2c_process_i2c_ch(struct amdgpu_i2c_chan *chan,
args.ucSlaveAddr = slave_addr << 1;
args.ucLineNumber = chan->rec.i2c_id;
- amdgpu_atom_execute_table(adev->mode_info.atom_context, index, (uint32_t *)&args);
+ amdgpu_atom_execute_table(adev->mode_info.atom_context, index, (uint32_t *)&args, sizeof(args));
/* error */
if (args.ucStatus != HW_ASSISTED_I2C_STATUS_SUCCESS) {
@@ -172,5 +172,5 @@ void amdgpu_atombios_i2c_channel_trans(struct amdgpu_device *adev, u8 slave_addr
args.ucSlaveAddr = slave_addr;
args.ucLineNumber = line_number;
- amdgpu_atom_execute_table(adev->mode_info.atom_context, index, (uint32_t *)&args);
+ amdgpu_atom_execute_table(adev->mode_info.atom_context, index, (uint32_t *)&args, sizeof(args));
}
diff --git a/drivers/gpu/drm/amd/amdgpu/cik.c b/drivers/gpu/drm/amd/amdgpu/cik.c
index 4d6832cc7fb0..9cd63b4177bf 100644
--- a/drivers/gpu/drm/amd/amdgpu/cik.c
+++ b/drivers/gpu/drm/amd/amdgpu/cik.c
@@ -26,6 +26,8 @@
#include <linux/module.h>
#include <linux/pci.h>
+#include <drm/amdgpu_drm.h>
+
#include "amdgpu.h"
#include "amdgpu_atombios.h"
#include "amdgpu_ih.h"
@@ -68,7 +70,81 @@
#include "amdgpu_dm.h"
#include "amdgpu_amdkfd.h"
-#include "dce_virtual.h"
+#include "amdgpu_vkms.h"
+
+static const struct amdgpu_video_codec_info cik_video_codecs_encode_array[] =
+{
+ {
+ .codec_type = AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4_AVC,
+ .max_width = 2048,
+ .max_height = 1152,
+ .max_pixels_per_frame = 2048 * 1152,
+ .max_level = 0,
+ },
+};
+
+static const struct amdgpu_video_codecs cik_video_codecs_encode =
+{
+ .codec_count = ARRAY_SIZE(cik_video_codecs_encode_array),
+ .codec_array = cik_video_codecs_encode_array,
+};
+
+static const struct amdgpu_video_codec_info cik_video_codecs_decode_array[] =
+{
+ {
+ .codec_type = AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG2,
+ .max_width = 2048,
+ .max_height = 1152,
+ .max_pixels_per_frame = 2048 * 1152,
+ .max_level = 3,
+ },
+ {
+ .codec_type = AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4,
+ .max_width = 2048,
+ .max_height = 1152,
+ .max_pixels_per_frame = 2048 * 1152,
+ .max_level = 5,
+ },
+ {
+ .codec_type = AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4_AVC,
+ .max_width = 2048,
+ .max_height = 1152,
+ .max_pixels_per_frame = 2048 * 1152,
+ .max_level = 41,
+ },
+ {
+ .codec_type = AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_VC1,
+ .max_width = 2048,
+ .max_height = 1152,
+ .max_pixels_per_frame = 2048 * 1152,
+ .max_level = 4,
+ },
+};
+
+static const struct amdgpu_video_codecs cik_video_codecs_decode =
+{
+ .codec_count = ARRAY_SIZE(cik_video_codecs_decode_array),
+ .codec_array = cik_video_codecs_decode_array,
+};
+
+static int cik_query_video_codecs(struct amdgpu_device *adev, bool encode,
+ const struct amdgpu_video_codecs **codecs)
+{
+ switch (adev->asic_type) {
+ case CHIP_BONAIRE:
+ case CHIP_HAWAII:
+ case CHIP_KAVERI:
+ case CHIP_KABINI:
+ case CHIP_MULLINS:
+ if (encode)
+ *codecs = &cik_video_codecs_encode;
+ else
+ *codecs = &cik_video_codecs_decode;
+ return 0;
+ default:
+ return -EINVAL;
+ }
+}
/*
* Indirect registers accessor
@@ -1065,12 +1141,12 @@ static uint32_t cik_get_register_value(struct amdgpu_device *adev,
mutex_lock(&adev->grbm_idx_mutex);
if (se_num != 0xffffffff || sh_num != 0xffffffff)
- amdgpu_gfx_select_se_sh(adev, se_num, sh_num, 0xffffffff);
+ amdgpu_gfx_select_se_sh(adev, se_num, sh_num, 0xffffffff, 0);
val = RREG32(reg_offset);
if (se_num != 0xffffffff || sh_num != 0xffffffff)
- amdgpu_gfx_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
+ amdgpu_gfx_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, 0);
mutex_unlock(&adev->grbm_idx_mutex);
return val;
} else {
@@ -1299,14 +1375,14 @@ static int cik_asic_pci_config_reset(struct amdgpu_device *adev)
return r;
}
-static bool cik_asic_supports_baco(struct amdgpu_device *adev)
+static int cik_asic_supports_baco(struct amdgpu_device *adev)
{
switch (adev->asic_type) {
case CHIP_BONAIRE:
case CHIP_HAWAII:
return amdgpu_dpm_is_baco_supported(adev);
default:
- return false;
+ return 0;
}
}
@@ -1352,6 +1428,10 @@ static int cik_asic_reset(struct amdgpu_device *adev)
{
int r;
+ /* APUs don't have full asic reset */
+ if (adev->flags & AMD_IS_APU)
+ return 0;
+
if (cik_asic_reset_method(adev) == AMD_RESET_METHOD_BACO) {
dev_info(adev->dev, "BACO reset\n");
r = amdgpu_dpm_baco_reset(adev);
@@ -1494,17 +1574,8 @@ static void cik_pcie_gen3_enable(struct amdgpu_device *adev)
u16 bridge_cfg2, gpu_cfg2;
u32 max_lw, current_lw, tmp;
- pcie_capability_read_word(root, PCI_EXP_LNKCTL,
- &bridge_cfg);
- pcie_capability_read_word(adev->pdev, PCI_EXP_LNKCTL,
- &gpu_cfg);
-
- tmp16 = bridge_cfg | PCI_EXP_LNKCTL_HAWD;
- pcie_capability_write_word(root, PCI_EXP_LNKCTL, tmp16);
-
- tmp16 = gpu_cfg | PCI_EXP_LNKCTL_HAWD;
- pcie_capability_write_word(adev->pdev, PCI_EXP_LNKCTL,
- tmp16);
+ pcie_capability_set_word(root, PCI_EXP_LNKCTL, PCI_EXP_LNKCTL_HAWD);
+ pcie_capability_set_word(adev->pdev, PCI_EXP_LNKCTL, PCI_EXP_LNKCTL_HAWD);
tmp = RREG32_PCIE(ixPCIE_LC_STATUS1);
max_lw = (tmp & PCIE_LC_STATUS1__LC_DETECTED_LINK_WIDTH_MASK) >>
@@ -1557,45 +1628,28 @@ static void cik_pcie_gen3_enable(struct amdgpu_device *adev)
msleep(100);
/* linkctl */
- pcie_capability_read_word(root, PCI_EXP_LNKCTL,
- &tmp16);
- tmp16 &= ~PCI_EXP_LNKCTL_HAWD;
- tmp16 |= (bridge_cfg & PCI_EXP_LNKCTL_HAWD);
- pcie_capability_write_word(root, PCI_EXP_LNKCTL,
- tmp16);
-
- pcie_capability_read_word(adev->pdev,
- PCI_EXP_LNKCTL,
- &tmp16);
- tmp16 &= ~PCI_EXP_LNKCTL_HAWD;
- tmp16 |= (gpu_cfg & PCI_EXP_LNKCTL_HAWD);
- pcie_capability_write_word(adev->pdev,
- PCI_EXP_LNKCTL,
- tmp16);
+ pcie_capability_clear_and_set_word(root, PCI_EXP_LNKCTL,
+ PCI_EXP_LNKCTL_HAWD,
+ bridge_cfg &
+ PCI_EXP_LNKCTL_HAWD);
+ pcie_capability_clear_and_set_word(adev->pdev, PCI_EXP_LNKCTL,
+ PCI_EXP_LNKCTL_HAWD,
+ gpu_cfg &
+ PCI_EXP_LNKCTL_HAWD);
/* linkctl2 */
- pcie_capability_read_word(root, PCI_EXP_LNKCTL2,
- &tmp16);
- tmp16 &= ~(PCI_EXP_LNKCTL2_ENTER_COMP |
- PCI_EXP_LNKCTL2_TX_MARGIN);
- tmp16 |= (bridge_cfg2 &
- (PCI_EXP_LNKCTL2_ENTER_COMP |
- PCI_EXP_LNKCTL2_TX_MARGIN));
- pcie_capability_write_word(root,
- PCI_EXP_LNKCTL2,
- tmp16);
-
- pcie_capability_read_word(adev->pdev,
- PCI_EXP_LNKCTL2,
- &tmp16);
- tmp16 &= ~(PCI_EXP_LNKCTL2_ENTER_COMP |
- PCI_EXP_LNKCTL2_TX_MARGIN);
- tmp16 |= (gpu_cfg2 &
- (PCI_EXP_LNKCTL2_ENTER_COMP |
- PCI_EXP_LNKCTL2_TX_MARGIN));
- pcie_capability_write_word(adev->pdev,
- PCI_EXP_LNKCTL2,
- tmp16);
+ pcie_capability_clear_and_set_word(root, PCI_EXP_LNKCTL2,
+ PCI_EXP_LNKCTL2_ENTER_COMP |
+ PCI_EXP_LNKCTL2_TX_MARGIN,
+ bridge_cfg2 &
+ (PCI_EXP_LNKCTL2_ENTER_COMP |
+ PCI_EXP_LNKCTL2_TX_MARGIN));
+ pcie_capability_clear_and_set_word(adev->pdev, PCI_EXP_LNKCTL2,
+ PCI_EXP_LNKCTL2_ENTER_COMP |
+ PCI_EXP_LNKCTL2_TX_MARGIN,
+ gpu_cfg2 &
+ (PCI_EXP_LNKCTL2_ENTER_COMP |
+ PCI_EXP_LNKCTL2_TX_MARGIN));
tmp = RREG32_PCIE(ixPCIE_LC_CNTL4);
tmp &= ~PCIE_LC_CNTL4__LC_SET_QUIESCE_MASK;
@@ -1610,16 +1664,15 @@ static void cik_pcie_gen3_enable(struct amdgpu_device *adev)
speed_cntl &= ~PCIE_LC_SPEED_CNTL__LC_FORCE_DIS_SW_SPEED_CHANGE_MASK;
WREG32_PCIE(ixPCIE_LC_SPEED_CNTL, speed_cntl);
- pcie_capability_read_word(adev->pdev, PCI_EXP_LNKCTL2, &tmp16);
- tmp16 &= ~PCI_EXP_LNKCTL2_TLS;
-
+ tmp16 = 0;
if (adev->pm.pcie_gen_mask & CAIL_PCIE_LINK_SPEED_SUPPORT_GEN3)
tmp16 |= PCI_EXP_LNKCTL2_TLS_8_0GT; /* gen3 */
else if (adev->pm.pcie_gen_mask & CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2)
tmp16 |= PCI_EXP_LNKCTL2_TLS_5_0GT; /* gen2 */
else
tmp16 |= PCI_EXP_LNKCTL2_TLS_2_5GT; /* gen1 */
- pcie_capability_write_word(adev->pdev, PCI_EXP_LNKCTL2, tmp16);
+ pcie_capability_clear_and_set_word(adev->pdev, PCI_EXP_LNKCTL2,
+ PCI_EXP_LNKCTL2_TLS, tmp16);
speed_cntl = RREG32_PCIE(ixPCIE_LC_SPEED_CNTL);
speed_cntl |= PCIE_LC_SPEED_CNTL__LC_INITIATE_LINK_SPEED_CHANGE_MASK;
@@ -1639,16 +1692,12 @@ static void cik_program_aspm(struct amdgpu_device *adev)
bool disable_l0s = false, disable_l1 = false, disable_plloff_in_l1 = false;
bool disable_clkreq = false;
- if (amdgpu_aspm == 0)
+ if (!amdgpu_device_should_use_aspm(adev))
return;
if (pci_is_root_bus(adev->pdev->bus))
return;
- /* XXX double check APUs */
- if (adev->flags & AMD_IS_APU)
- return;
-
orig = data = RREG32_PCIE(ixPCIE_LC_N_FTS_CNTL);
data &= ~PCIE_LC_N_FTS_CNTL__LC_XMIT_N_FTS_MASK;
data |= (0x24 << PCIE_LC_N_FTS_CNTL__LC_XMIT_N_FTS__SHIFT) |
@@ -1933,11 +1982,12 @@ static const struct amdgpu_asic_funcs cik_asic_funcs =
.get_pcie_replay_count = &cik_get_pcie_replay_count,
.supports_baco = &cik_asic_supports_baco,
.pre_asic_init = &cik_pre_asic_init,
+ .query_video_codecs = &cik_query_video_codecs,
};
-static int cik_common_early_init(void *handle)
+static int cik_common_early_init(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
adev->smc_rreg = &cik_smc_rreg;
adev->smc_wreg = &cik_smc_wreg;
@@ -2074,19 +2124,9 @@ static int cik_common_early_init(void *handle)
return 0;
}
-static int cik_common_sw_init(void *handle)
+static int cik_common_hw_init(struct amdgpu_ip_block *ip_block)
{
- return 0;
-}
-
-static int cik_common_sw_fini(void *handle)
-{
- return 0;
-}
-
-static int cik_common_hw_init(void *handle)
-{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
/* move the golden regs per IP block */
cik_init_golden_registers(adev);
@@ -2098,48 +2138,36 @@ static int cik_common_hw_init(void *handle)
return 0;
}
-static int cik_common_hw_fini(void *handle)
+static int cik_common_hw_fini(struct amdgpu_ip_block *ip_block)
{
return 0;
}
-static int cik_common_suspend(void *handle)
-{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
-
- return cik_common_hw_fini(adev);
-}
-
-static int cik_common_resume(void *handle)
+static int cik_common_resume(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
-
- return cik_common_hw_init(adev);
+ return cik_common_hw_init(ip_block);
}
-static bool cik_common_is_idle(void *handle)
+static bool cik_common_is_idle(struct amdgpu_ip_block *ip_block)
{
return true;
}
-static int cik_common_wait_for_idle(void *handle)
-{
- return 0;
-}
-static int cik_common_soft_reset(void *handle)
+
+static int cik_common_soft_reset(struct amdgpu_ip_block *ip_block)
{
/* XXX hard reset?? */
return 0;
}
-static int cik_common_set_clockgating_state(void *handle,
+static int cik_common_set_clockgating_state(struct amdgpu_ip_block *ip_block,
enum amd_clockgating_state state)
{
return 0;
}
-static int cik_common_set_powergating_state(void *handle,
+static int cik_common_set_powergating_state(struct amdgpu_ip_block *ip_block,
enum amd_powergating_state state)
{
return 0;
@@ -2148,15 +2176,10 @@ static int cik_common_set_powergating_state(void *handle,
static const struct amd_ip_funcs cik_common_ip_funcs = {
.name = "cik_common",
.early_init = cik_common_early_init,
- .late_init = NULL,
- .sw_init = cik_common_sw_init,
- .sw_fini = cik_common_sw_fini,
.hw_init = cik_common_hw_init,
.hw_fini = cik_common_hw_fini,
- .suspend = cik_common_suspend,
.resume = cik_common_resume,
.is_idle = cik_common_is_idle,
- .wait_for_idle = cik_common_wait_for_idle,
.soft_reset = cik_common_soft_reset,
.set_clockgating_state = cik_common_set_clockgating_state,
.set_powergating_state = cik_common_set_powergating_state,
@@ -2182,7 +2205,7 @@ int cik_set_ip_blocks(struct amdgpu_device *adev)
amdgpu_device_ip_block_add(adev, &cik_sdma_ip_block);
amdgpu_device_ip_block_add(adev, &pp_smu_ip_block);
if (adev->enable_virtual_display)
- amdgpu_device_ip_block_add(adev, &dce_virtual_ip_block);
+ amdgpu_device_ip_block_add(adev, &amdgpu_vkms_ip_block);
#if defined(CONFIG_DRM_AMD_DC)
else if (amdgpu_device_has_dc_support(adev))
amdgpu_device_ip_block_add(adev, &dm_ip_block);
@@ -2200,7 +2223,7 @@ int cik_set_ip_blocks(struct amdgpu_device *adev)
amdgpu_device_ip_block_add(adev, &cik_sdma_ip_block);
amdgpu_device_ip_block_add(adev, &pp_smu_ip_block);
if (adev->enable_virtual_display)
- amdgpu_device_ip_block_add(adev, &dce_virtual_ip_block);
+ amdgpu_device_ip_block_add(adev, &amdgpu_vkms_ip_block);
#if defined(CONFIG_DRM_AMD_DC)
else if (amdgpu_device_has_dc_support(adev))
amdgpu_device_ip_block_add(adev, &dm_ip_block);
@@ -2218,7 +2241,7 @@ int cik_set_ip_blocks(struct amdgpu_device *adev)
amdgpu_device_ip_block_add(adev, &cik_sdma_ip_block);
amdgpu_device_ip_block_add(adev, &kv_smu_ip_block);
if (adev->enable_virtual_display)
- amdgpu_device_ip_block_add(adev, &dce_virtual_ip_block);
+ amdgpu_device_ip_block_add(adev, &amdgpu_vkms_ip_block);
#if defined(CONFIG_DRM_AMD_DC)
else if (amdgpu_device_has_dc_support(adev))
amdgpu_device_ip_block_add(adev, &dm_ip_block);
@@ -2238,7 +2261,7 @@ int cik_set_ip_blocks(struct amdgpu_device *adev)
amdgpu_device_ip_block_add(adev, &cik_sdma_ip_block);
amdgpu_device_ip_block_add(adev, &kv_smu_ip_block);
if (adev->enable_virtual_display)
- amdgpu_device_ip_block_add(adev, &dce_virtual_ip_block);
+ amdgpu_device_ip_block_add(adev, &amdgpu_vkms_ip_block);
#if defined(CONFIG_DRM_AMD_DC)
else if (amdgpu_device_has_dc_support(adev))
amdgpu_device_ip_block_add(adev, &dm_ip_block);
diff --git a/drivers/gpu/drm/amd/amdgpu/cik_ih.c b/drivers/gpu/drm/amd/amdgpu/cik_ih.c
index d3745711d55f..876a3256dba4 100644
--- a/drivers/gpu/drm/amd/amdgpu/cik_ih.c
+++ b/drivers/gpu/drm/amd/amdgpu/cik_ih.c
@@ -156,6 +156,9 @@ static int cik_ih_irq_init(struct amdgpu_device *adev)
/* enable irqs */
cik_ih_enable_interrupts(adev);
+ if (adev->irq.ih_soft.ring_size)
+ adev->irq.ih_soft.enabled = true;
+
return 0;
}
@@ -192,6 +195,9 @@ static u32 cik_ih_get_wptr(struct amdgpu_device *adev,
wptr = le32_to_cpu(*ih->wptr_cpu);
+ if (ih == &adev->irq.ih_soft)
+ goto out;
+
if (wptr & IH_RB_WPTR__RB_OVERFLOW_MASK) {
wptr &= ~IH_RB_WPTR__RB_OVERFLOW_MASK;
/* When a ring buffer overflow happen start parsing interrupt
@@ -204,7 +210,15 @@ static u32 cik_ih_get_wptr(struct amdgpu_device *adev,
tmp = RREG32(mmIH_RB_CNTL);
tmp |= IH_RB_CNTL__WPTR_OVERFLOW_CLEAR_MASK;
WREG32(mmIH_RB_CNTL, tmp);
+
+ /* Unset the CLEAR_OVERFLOW bit immediately so new overflows
+ * can be detected.
+ */
+ tmp &= ~IH_RB_CNTL__WPTR_OVERFLOW_CLEAR_MASK;
+ WREG32(mmIH_RB_CNTL, tmp);
}
+
+out:
return (wptr & ih->ptr_mask);
}
@@ -277,9 +291,9 @@ static void cik_ih_set_rptr(struct amdgpu_device *adev,
WREG32(mmIH_RB_RPTR, ih->rptr);
}
-static int cik_ih_early_init(void *handle)
+static int cik_ih_early_init(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
int ret;
ret = amdgpu_irq_add_domain(adev);
@@ -291,64 +305,61 @@ static int cik_ih_early_init(void *handle)
return 0;
}
-static int cik_ih_sw_init(void *handle)
+static int cik_ih_sw_init(struct amdgpu_ip_block *ip_block)
{
int r;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
r = amdgpu_ih_ring_init(adev, &adev->irq.ih, 64 * 1024, false);
if (r)
return r;
+ r = amdgpu_ih_ring_init(adev, &adev->irq.ih_soft, IH_SW_RING_SIZE, true);
+ if (r)
+ return r;
+
r = amdgpu_irq_init(adev);
return r;
}
-static int cik_ih_sw_fini(void *handle)
+static int cik_ih_sw_fini(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
- amdgpu_irq_fini(adev);
- amdgpu_ih_ring_fini(adev, &adev->irq.ih);
+ amdgpu_irq_fini_sw(adev);
amdgpu_irq_remove_domain(adev);
return 0;
}
-static int cik_ih_hw_init(void *handle)
+static int cik_ih_hw_init(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
return cik_ih_irq_init(adev);
}
-static int cik_ih_hw_fini(void *handle)
+static int cik_ih_hw_fini(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
-
- cik_ih_irq_disable(adev);
+ cik_ih_irq_disable(ip_block->adev);
return 0;
}
-static int cik_ih_suspend(void *handle)
+static int cik_ih_suspend(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
-
- return cik_ih_hw_fini(adev);
+ return cik_ih_hw_fini(ip_block);
}
-static int cik_ih_resume(void *handle)
+static int cik_ih_resume(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
-
- return cik_ih_hw_init(adev);
+ return cik_ih_hw_init(ip_block);
}
-static bool cik_ih_is_idle(void *handle)
+static bool cik_ih_is_idle(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
u32 tmp = RREG32(mmSRBM_STATUS);
if (tmp & SRBM_STATUS__IH_BUSY_MASK)
@@ -357,11 +368,11 @@ static bool cik_ih_is_idle(void *handle)
return true;
}
-static int cik_ih_wait_for_idle(void *handle)
+static int cik_ih_wait_for_idle(struct amdgpu_ip_block *ip_block)
{
unsigned i;
u32 tmp;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
for (i = 0; i < adev->usec_timeout; i++) {
/* read MC_STATUS */
@@ -373,9 +384,9 @@ static int cik_ih_wait_for_idle(void *handle)
return -ETIMEDOUT;
}
-static int cik_ih_soft_reset(void *handle)
+static int cik_ih_soft_reset(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
u32 srbm_soft_reset = 0;
u32 tmp = RREG32(mmSRBM_STATUS);
@@ -403,13 +414,13 @@ static int cik_ih_soft_reset(void *handle)
return 0;
}
-static int cik_ih_set_clockgating_state(void *handle,
+static int cik_ih_set_clockgating_state(struct amdgpu_ip_block *ip_block,
enum amd_clockgating_state state)
{
return 0;
}
-static int cik_ih_set_powergating_state(void *handle,
+static int cik_ih_set_powergating_state(struct amdgpu_ip_block *ip_block,
enum amd_powergating_state state)
{
return 0;
@@ -418,7 +429,6 @@ static int cik_ih_set_powergating_state(void *handle,
static const struct amd_ip_funcs cik_ih_ip_funcs = {
.name = "cik_ih",
.early_init = cik_ih_early_init,
- .late_init = NULL,
.sw_init = cik_ih_sw_init,
.sw_fini = cik_ih_sw_fini,
.hw_init = cik_ih_hw_init,
@@ -443,8 +453,7 @@ static void cik_ih_set_interrupt_funcs(struct amdgpu_device *adev)
adev->irq.ih_funcs = &cik_ih_funcs;
}
-const struct amdgpu_ip_block_version cik_ih_ip_block =
-{
+const struct amdgpu_ip_block_version cik_ih_ip_block = {
.type = AMD_IP_BLOCK_TYPE_IH,
.major = 2,
.minor = 0,
diff --git a/drivers/gpu/drm/amd/amdgpu/cik_sdma.c b/drivers/gpu/drm/amd/amdgpu/cik_sdma.c
index 43b978144b79..9e8715b4739d 100644
--- a/drivers/gpu/drm/amd/amdgpu/cik_sdma.c
+++ b/drivers/gpu/drm/amd/amdgpu/cik_sdma.c
@@ -54,7 +54,9 @@ static void cik_sdma_set_ring_funcs(struct amdgpu_device *adev);
static void cik_sdma_set_irq_funcs(struct amdgpu_device *adev);
static void cik_sdma_set_buffer_funcs(struct amdgpu_device *adev);
static void cik_sdma_set_vm_pte_funcs(struct amdgpu_device *adev);
-static int cik_sdma_soft_reset(void *handle);
+static int cik_sdma_soft_reset(struct amdgpu_ip_block *ip_block);
+
+u32 amdgpu_cik_gpu_check_soft_reset(struct amdgpu_device *adev);
MODULE_FIRMWARE("amdgpu/bonaire_sdma.bin");
MODULE_FIRMWARE("amdgpu/bonaire_sdma1.bin");
@@ -67,16 +69,12 @@ MODULE_FIRMWARE("amdgpu/kabini_sdma1.bin");
MODULE_FIRMWARE("amdgpu/mullins_sdma.bin");
MODULE_FIRMWARE("amdgpu/mullins_sdma1.bin");
-u32 amdgpu_cik_gpu_check_soft_reset(struct amdgpu_device *adev);
-
-
static void cik_sdma_free_microcode(struct amdgpu_device *adev)
{
int i;
- for (i = 0; i < adev->sdma.num_instances; i++) {
- release_firmware(adev->sdma.instance[i].fw);
- adev->sdma.instance[i].fw = NULL;
- }
+
+ for (i = 0; i < adev->sdma.num_instances; i++)
+ amdgpu_ucode_release(&adev->sdma.instance[i].fw);
}
/*
@@ -108,7 +106,6 @@ static void cik_sdma_free_microcode(struct amdgpu_device *adev)
static int cik_sdma_init_microcode(struct amdgpu_device *adev)
{
const char *chip_name;
- char fw_name[30];
int err = 0, i;
DRM_DEBUG("\n");
@@ -134,21 +131,22 @@ static int cik_sdma_init_microcode(struct amdgpu_device *adev)
for (i = 0; i < adev->sdma.num_instances; i++) {
if (i == 0)
- snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_sdma.bin", chip_name);
+ err = amdgpu_ucode_request(adev, &adev->sdma.instance[i].fw,
+ AMDGPU_UCODE_REQUIRED,
+ "amdgpu/%s_sdma.bin", chip_name);
else
- snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_sdma1.bin", chip_name);
- err = request_firmware(&adev->sdma.instance[i].fw, fw_name, adev->dev);
+ err = amdgpu_ucode_request(adev, &adev->sdma.instance[i].fw,
+ AMDGPU_UCODE_REQUIRED,
+ "amdgpu/%s_sdma1.bin", chip_name);
if (err)
goto out;
- err = amdgpu_ucode_validate(adev->sdma.instance[i].fw);
}
out:
if (err) {
- pr_err("cik_sdma: Failed to load firmware \"%s\"\n", fw_name);
- for (i = 0; i < adev->sdma.num_instances; i++) {
- release_firmware(adev->sdma.instance[i].fw);
- adev->sdma.instance[i].fw = NULL;
- }
+ pr_err("cik_sdma: Failed to load firmware \"%s_sdma%s.bin\"\n",
+ chip_name, i == 0 ? "" : "1");
+ for (i = 0; i < adev->sdma.num_instances; i++)
+ amdgpu_ucode_release(&adev->sdma.instance[i].fw);
}
return err;
}
@@ -164,7 +162,7 @@ static uint64_t cik_sdma_ring_get_rptr(struct amdgpu_ring *ring)
{
u32 rptr;
- rptr = ring->adev->wb.wb[ring->rptr_offs];
+ rptr = *ring->rptr_cpu_addr;
return (rptr & 0x3fffc) >> 2;
}
@@ -195,7 +193,7 @@ static void cik_sdma_ring_set_wptr(struct amdgpu_ring *ring)
struct amdgpu_device *adev = ring->adev;
WREG32(mmSDMA0_GFX_RB_WPTR + sdma_offsets[ring->me],
- (lower_32_bits(ring->wptr) << 2) & 0x3fffc);
+ (ring->wptr << 2) & 0x3fffc);
}
static void cik_sdma_ring_insert_nop(struct amdgpu_ring *ring, uint32_t count)
@@ -309,15 +307,9 @@ static void cik_sdma_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, u64 seq
*/
static void cik_sdma_gfx_stop(struct amdgpu_device *adev)
{
- struct amdgpu_ring *sdma0 = &adev->sdma.instance[0].ring;
- struct amdgpu_ring *sdma1 = &adev->sdma.instance[1].ring;
u32 rb_cntl;
int i;
- if ((adev->mman.buffer_funcs_ring == sdma0) ||
- (adev->mman.buffer_funcs_ring == sdma1))
- amdgpu_ttm_set_buffer_funcs_status(adev, false);
-
for (i = 0; i < adev->sdma.num_instances; i++) {
rb_cntl = RREG32(mmSDMA0_GFX_RB_CNTL + sdma_offsets[i]);
rb_cntl &= ~SDMA0_GFX_RB_CNTL__RB_ENABLE_MASK;
@@ -436,12 +428,10 @@ static int cik_sdma_gfx_resume(struct amdgpu_device *adev)
struct amdgpu_ring *ring;
u32 rb_cntl, ib_cntl;
u32 rb_bufsz;
- u32 wb_offset;
int i, j, r;
for (i = 0; i < adev->sdma.num_instances; i++) {
ring = &adev->sdma.instance[i].ring;
- wb_offset = (ring->rptr_offs * 4);
mutex_lock(&adev->srbm_mutex);
for (j = 0; j < 16; j++) {
@@ -477,9 +467,9 @@ static int cik_sdma_gfx_resume(struct amdgpu_device *adev)
/* set the wb address whether it's enabled or not */
WREG32(mmSDMA0_GFX_RB_RPTR_ADDR_HI + sdma_offsets[i],
- upper_32_bits(adev->wb.gpu_addr + wb_offset) & 0xFFFFFFFF);
+ upper_32_bits(ring->rptr_gpu_addr) & 0xFFFFFFFF);
WREG32(mmSDMA0_GFX_RB_RPTR_ADDR_LO + sdma_offsets[i],
- ((adev->wb.gpu_addr + wb_offset) & 0xFFFFFFFC));
+ ((ring->rptr_gpu_addr) & 0xFFFFFFFC));
rb_cntl |= SDMA0_GFX_RB_CNTL__RPTR_WRITEBACK_ENABLE_MASK;
@@ -487,7 +477,7 @@ static int cik_sdma_gfx_resume(struct amdgpu_device *adev)
WREG32(mmSDMA0_GFX_RB_BASE_HI + sdma_offsets[i], ring->gpu_addr >> 40);
ring->wptr = 0;
- WREG32(mmSDMA0_GFX_RB_WPTR + sdma_offsets[i], lower_32_bits(ring->wptr) << 2);
+ WREG32(mmSDMA0_GFX_RB_WPTR + sdma_offsets[i], ring->wptr << 2);
/* enable DMA RB */
WREG32(mmSDMA0_GFX_RB_CNTL + sdma_offsets[i],
@@ -499,8 +489,6 @@ static int cik_sdma_gfx_resume(struct amdgpu_device *adev)
#endif
/* enable DMA IBs */
WREG32(mmSDMA0_GFX_IB_CNTL + sdma_offsets[i], ib_cntl);
-
- ring->sched.ready = true;
}
cik_sdma_enable(adev, true);
@@ -510,9 +498,6 @@ static int cik_sdma_gfx_resume(struct amdgpu_device *adev)
r = amdgpu_ring_test_helper(ring);
if (r)
return r;
-
- if (adev->mman.buffer_funcs_ring == ring)
- amdgpu_ttm_set_buffer_funcs_status(adev, true);
}
return 0;
@@ -712,7 +697,7 @@ static int cik_sdma_ring_test_ib(struct amdgpu_ring *ring, long timeout)
r = -EINVAL;
err1:
- amdgpu_ib_free(adev, &ib, NULL);
+ amdgpu_ib_free(&ib, NULL);
dma_fence_put(f);
err0:
amdgpu_device_wb_free(adev, index);
@@ -720,7 +705,7 @@ err0:
}
/**
- * cik_sdma_vm_copy_pages - update PTEs by copying them from the GART
+ * cik_sdma_vm_copy_pte - update PTEs by copying them from the GART
*
* @ib: indirect buffer to fill with commands
* @pe: addr of the page entry
@@ -746,7 +731,7 @@ static void cik_sdma_vm_copy_pte(struct amdgpu_ib *ib,
}
/**
- * cik_sdma_vm_write_pages - update PTEs by writing them manually
+ * cik_sdma_vm_write_pte - update PTEs by writing them manually
*
* @ib: indirect buffer to fill with commands
* @pe: addr of the page entry
@@ -775,7 +760,7 @@ static void cik_sdma_vm_write_pte(struct amdgpu_ib *ib, uint64_t pe,
}
/**
- * cik_sdma_vm_set_pages - update the page tables using sDMA
+ * cik_sdma_vm_set_pte_pde - update the page tables using sDMA
*
* @ib: indirect buffer to fill with commands
* @pe: addr of the page entry
@@ -804,7 +789,7 @@ static void cik_sdma_vm_set_pte_pde(struct amdgpu_ib *ib, uint64_t pe,
}
/**
- * cik_sdma_vm_pad_ib - pad the IB to the required number of dw
+ * cik_sdma_ring_pad_ib - pad the IB to the required number of dw
*
* @ring: amdgpu_ring structure holding ring information
* @ib: indirect buffer to fill with padding
@@ -934,12 +919,17 @@ static void cik_enable_sdma_mgls(struct amdgpu_device *adev,
}
}
-static int cik_sdma_early_init(void *handle)
+static int cik_sdma_early_init(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
+ int r;
adev->sdma.num_instances = SDMA_MAX_INSTANCE;
+ r = cik_sdma_init_microcode(adev);
+ if (r)
+ return r;
+
cik_sdma_set_ring_funcs(adev);
cik_sdma_set_irq_funcs(adev);
cik_sdma_set_buffer_funcs(adev);
@@ -948,18 +938,12 @@ static int cik_sdma_early_init(void *handle)
return 0;
}
-static int cik_sdma_sw_init(void *handle)
+static int cik_sdma_sw_init(struct amdgpu_ip_block *ip_block)
{
struct amdgpu_ring *ring;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
int r, i;
- r = cik_sdma_init_microcode(adev);
- if (r) {
- DRM_ERROR("Failed to load sdma firmware!\n");
- return r;
- }
-
/* SDMA trap event */
r = amdgpu_irq_add_id(adev, AMDGPU_IRQ_CLIENTID_LEGACY, 224,
&adev->sdma.trap_irq);
@@ -984,10 +968,9 @@ static int cik_sdma_sw_init(void *handle)
sprintf(ring->name, "sdma%d", i);
r = amdgpu_ring_init(adev, ring, 1024,
&adev->sdma.trap_irq,
- (i == 0) ?
- AMDGPU_SDMA_IRQ_INSTANCE0 :
+ (i == 0) ? AMDGPU_SDMA_IRQ_INSTANCE0 :
AMDGPU_SDMA_IRQ_INSTANCE1,
- AMDGPU_RING_PRIO_DEFAULT);
+ AMDGPU_RING_PRIO_DEFAULT, NULL);
if (r)
return r;
}
@@ -995,9 +978,9 @@ static int cik_sdma_sw_init(void *handle)
return r;
}
-static int cik_sdma_sw_fini(void *handle)
+static int cik_sdma_sw_fini(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
int i;
for (i = 0; i < adev->sdma.num_instances; i++)
@@ -1007,21 +990,16 @@ static int cik_sdma_sw_fini(void *handle)
return 0;
}
-static int cik_sdma_hw_init(void *handle)
+static int cik_sdma_hw_init(struct amdgpu_ip_block *ip_block)
{
- int r;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
- r = cik_sdma_start(adev);
- if (r)
- return r;
-
- return r;
+ return cik_sdma_start(adev);
}
-static int cik_sdma_hw_fini(void *handle)
+static int cik_sdma_hw_fini(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
cik_ctx_switch_enable(adev, false);
cik_sdma_enable(adev, false);
@@ -1029,25 +1007,21 @@ static int cik_sdma_hw_fini(void *handle)
return 0;
}
-static int cik_sdma_suspend(void *handle)
+static int cik_sdma_suspend(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
-
- return cik_sdma_hw_fini(adev);
+ return cik_sdma_hw_fini(ip_block);
}
-static int cik_sdma_resume(void *handle)
+static int cik_sdma_resume(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ cik_sdma_soft_reset(ip_block);
- cik_sdma_soft_reset(handle);
-
- return cik_sdma_hw_init(adev);
+ return cik_sdma_hw_init(ip_block);
}
-static bool cik_sdma_is_idle(void *handle)
+static bool cik_sdma_is_idle(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
u32 tmp = RREG32(mmSRBM_STATUS2);
if (tmp & (SRBM_STATUS2__SDMA_BUSY_MASK |
@@ -1057,27 +1031,23 @@ static bool cik_sdma_is_idle(void *handle)
return true;
}
-static int cik_sdma_wait_for_idle(void *handle)
+static int cik_sdma_wait_for_idle(struct amdgpu_ip_block *ip_block)
{
unsigned i;
- u32 tmp;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
for (i = 0; i < adev->usec_timeout; i++) {
- tmp = RREG32(mmSRBM_STATUS2) & (SRBM_STATUS2__SDMA_BUSY_MASK |
- SRBM_STATUS2__SDMA1_BUSY_MASK);
-
- if (!tmp)
+ if (cik_sdma_is_idle(ip_block))
return 0;
udelay(1);
}
return -ETIMEDOUT;
}
-static int cik_sdma_soft_reset(void *handle)
+static int cik_sdma_soft_reset(struct amdgpu_ip_block *ip_block)
{
u32 srbm_soft_reset = 0;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
u32 tmp;
/* sdma0 */
@@ -1211,11 +1181,11 @@ static int cik_sdma_process_illegal_inst_irq(struct amdgpu_device *adev,
return 0;
}
-static int cik_sdma_set_clockgating_state(void *handle,
+static int cik_sdma_set_clockgating_state(struct amdgpu_ip_block *ip_block,
enum amd_clockgating_state state)
{
bool gate = false;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
if (state == AMD_CG_STATE_GATE)
gate = true;
@@ -1226,7 +1196,7 @@ static int cik_sdma_set_clockgating_state(void *handle,
return 0;
}
-static int cik_sdma_set_powergating_state(void *handle,
+static int cik_sdma_set_powergating_state(struct amdgpu_ip_block *ip_block,
enum amd_powergating_state state)
{
return 0;
@@ -1235,7 +1205,6 @@ static int cik_sdma_set_powergating_state(void *handle,
static const struct amd_ip_funcs cik_sdma_ip_funcs = {
.name = "cik_sdma",
.early_init = cik_sdma_early_init,
- .late_init = NULL,
.sw_init = cik_sdma_sw_init,
.sw_fini = cik_sdma_sw_fini,
.hw_init = cik_sdma_hw_init,
@@ -1309,7 +1278,7 @@ static void cik_sdma_set_irq_funcs(struct amdgpu_device *adev)
* @src_offset: src GPU address
* @dst_offset: dst GPU address
* @byte_count: number of bytes to xfer
- * @tmz: is this a secure operation
+ * @copy_flags: unused
*
* Copy GPU buffers using the DMA engine (CIK).
* Used by the amdgpu ttm implementation to move pages if
@@ -1319,7 +1288,7 @@ static void cik_sdma_emit_copy_buffer(struct amdgpu_ib *ib,
uint64_t src_offset,
uint64_t dst_offset,
uint32_t byte_count,
- bool tmz)
+ uint32_t copy_flags)
{
ib->ptr[ib->length_dw++] = SDMA_PACKET(SDMA_OPCODE_COPY, SDMA_COPY_SUB_OPCODE_LINEAR, 0);
ib->ptr[ib->length_dw++] = byte_count;
diff --git a/drivers/gpu/drm/amd/amdgpu/cikd.h b/drivers/gpu/drm/amd/amdgpu/cikd.h
index 55982c0064b5..8aca4f2734f2 100644
--- a/drivers/gpu/drm/amd/amdgpu/cikd.h
+++ b/drivers/gpu/drm/amd/amdgpu/cikd.h
@@ -51,8 +51,14 @@
#define HPD4_REGISTER_OFFSET (0x1813 - 0x1807)
#define HPD5_REGISTER_OFFSET (0x1816 - 0x1807)
-#define BONAIRE_GB_ADDR_CONFIG_GOLDEN 0x12010001
-#define HAWAII_GB_ADDR_CONFIG_GOLDEN 0x12011003
+/* audio endpt instance offsets */
+#define AUD0_REGISTER_OFFSET (0x1780 - 0x1780)
+#define AUD1_REGISTER_OFFSET (0x1786 - 0x1780)
+#define AUD2_REGISTER_OFFSET (0x178c - 0x1780)
+#define AUD3_REGISTER_OFFSET (0x1792 - 0x1780)
+#define AUD4_REGISTER_OFFSET (0x1798 - 0x1780)
+#define AUD5_REGISTER_OFFSET (0x179d - 0x1780)
+#define AUD6_REGISTER_OFFSET (0x17a4 - 0x1780)
#define PIPEID(x) ((x) << 0)
#define MEID(x) ((x) << 2)
@@ -364,6 +370,7 @@
* 1 - Stream
* 2 - Bypass
*/
+#define EOP_EXEC (1 << 28) /* For Trailing Fence */
#define DATA_SEL(x) ((x) << 29)
/* 0 - discard
* 1 - send low 32bit data
diff --git a/drivers/gpu/drm/amd/amdgpu/clearstate_gfx11.h b/drivers/gpu/drm/amd/amdgpu/clearstate_gfx11.h
new file mode 100644
index 000000000000..a8b29d33c464
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/clearstate_gfx11.h
@@ -0,0 +1,997 @@
+/*
+ * Copyright 2021 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+#ifndef __CLEARSTATE_GFX11_H_
+#define __CLEARSTATE_GFX11_H_
+
+static const unsigned int gfx11_SECT_CONTEXT_def_1[] =
+{
+ 0x00000000, // DB_RENDER_CONTROL
+ 0x00000000, // DB_COUNT_CONTROL
+ 0x00000000, // DB_DEPTH_VIEW
+ 0x00000000, // DB_RENDER_OVERRIDE
+ 0x00000000, // DB_RENDER_OVERRIDE2
+ 0x00000000, // DB_HTILE_DATA_BASE
+ 0, // HOLE
+ 0x00000000, // DB_DEPTH_SIZE_XY
+ 0x00000000, // DB_DEPTH_BOUNDS_MIN
+ 0x00000000, // DB_DEPTH_BOUNDS_MAX
+ 0x00000000, // DB_STENCIL_CLEAR
+ 0x00000000, // DB_DEPTH_CLEAR
+ 0x00000000, // PA_SC_SCREEN_SCISSOR_TL
+ 0x40004000, // PA_SC_SCREEN_SCISSOR_BR
+ 0, // HOLE
+ 0x00000000, // DB_RESERVED_REG_2
+ 0x00000000, // DB_Z_INFO
+ 0x00000000, // DB_STENCIL_INFO
+ 0x00000000, // DB_Z_READ_BASE
+ 0x00000000, // DB_STENCIL_READ_BASE
+ 0x00000000, // DB_Z_WRITE_BASE
+ 0x00000000, // DB_STENCIL_WRITE_BASE
+ 0x00000000, // DB_RESERVED_REG_1
+ 0x00000000, // DB_RESERVED_REG_3
+ 0x00000000, // DB_SPI_VRS_CENTER_LOCATION
+ 0, // HOLE
+ 0x00000000, // DB_Z_READ_BASE_HI
+ 0x00000000, // DB_STENCIL_READ_BASE_HI
+ 0x00000000, // DB_Z_WRITE_BASE_HI
+ 0x00000000, // DB_STENCIL_WRITE_BASE_HI
+ 0x00000000, // DB_HTILE_DATA_BASE_HI
+ 0x00150055, // DB_RMI_L2_CACHE_CONTROL
+ 0x00000000, // TA_BC_BASE_ADDR
+ 0x00000000, // TA_BC_BASE_ADDR_HI
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0x00000000, // COHER_DEST_BASE_HI_0
+ 0x00000000, // COHER_DEST_BASE_HI_1
+ 0x00000000, // COHER_DEST_BASE_HI_2
+ 0x00000000, // COHER_DEST_BASE_HI_3
+ 0x00000000, // COHER_DEST_BASE_2
+ 0x00000000, // COHER_DEST_BASE_3
+ 0x00000000, // PA_SC_WINDOW_OFFSET
+ 0x80000000, // PA_SC_WINDOW_SCISSOR_TL
+ 0x40004000, // PA_SC_WINDOW_SCISSOR_BR
+ 0x0000ffff, // PA_SC_CLIPRECT_RULE
+ 0x00000000, // PA_SC_CLIPRECT_0_TL
+ 0x40004000, // PA_SC_CLIPRECT_0_BR
+ 0x00000000, // PA_SC_CLIPRECT_1_TL
+ 0x40004000, // PA_SC_CLIPRECT_1_BR
+ 0x00000000, // PA_SC_CLIPRECT_2_TL
+ 0x40004000, // PA_SC_CLIPRECT_2_BR
+ 0x00000000, // PA_SC_CLIPRECT_3_TL
+ 0x40004000, // PA_SC_CLIPRECT_3_BR
+ 0xaa99aaaa, // PA_SC_EDGERULE
+ 0x00000000, // PA_SU_HARDWARE_SCREEN_OFFSET
+ 0xffffffff, // CB_TARGET_MASK
+ 0xffffffff, // CB_SHADER_MASK
+ 0x80000000, // PA_SC_GENERIC_SCISSOR_TL
+ 0x40004000, // PA_SC_GENERIC_SCISSOR_BR
+ 0x00000000, // COHER_DEST_BASE_0
+ 0x00000000, // COHER_DEST_BASE_1
+ 0x80000000, // PA_SC_VPORT_SCISSOR_0_TL
+ 0x40004000, // PA_SC_VPORT_SCISSOR_0_BR
+ 0x80000000, // PA_SC_VPORT_SCISSOR_1_TL
+ 0x40004000, // PA_SC_VPORT_SCISSOR_1_BR
+ 0x80000000, // PA_SC_VPORT_SCISSOR_2_TL
+ 0x40004000, // PA_SC_VPORT_SCISSOR_2_BR
+ 0x80000000, // PA_SC_VPORT_SCISSOR_3_TL
+ 0x40004000, // PA_SC_VPORT_SCISSOR_3_BR
+ 0x80000000, // PA_SC_VPORT_SCISSOR_4_TL
+ 0x40004000, // PA_SC_VPORT_SCISSOR_4_BR
+ 0x80000000, // PA_SC_VPORT_SCISSOR_5_TL
+ 0x40004000, // PA_SC_VPORT_SCISSOR_5_BR
+ 0x80000000, // PA_SC_VPORT_SCISSOR_6_TL
+ 0x40004000, // PA_SC_VPORT_SCISSOR_6_BR
+ 0x80000000, // PA_SC_VPORT_SCISSOR_7_TL
+ 0x40004000, // PA_SC_VPORT_SCISSOR_7_BR
+ 0x80000000, // PA_SC_VPORT_SCISSOR_8_TL
+ 0x40004000, // PA_SC_VPORT_SCISSOR_8_BR
+ 0x80000000, // PA_SC_VPORT_SCISSOR_9_TL
+ 0x40004000, // PA_SC_VPORT_SCISSOR_9_BR
+ 0x80000000, // PA_SC_VPORT_SCISSOR_10_TL
+ 0x40004000, // PA_SC_VPORT_SCISSOR_10_BR
+ 0x80000000, // PA_SC_VPORT_SCISSOR_11_TL
+ 0x40004000, // PA_SC_VPORT_SCISSOR_11_BR
+ 0x80000000, // PA_SC_VPORT_SCISSOR_12_TL
+ 0x40004000, // PA_SC_VPORT_SCISSOR_12_BR
+ 0x80000000, // PA_SC_VPORT_SCISSOR_13_TL
+ 0x40004000, // PA_SC_VPORT_SCISSOR_13_BR
+ 0x80000000, // PA_SC_VPORT_SCISSOR_14_TL
+ 0x40004000, // PA_SC_VPORT_SCISSOR_14_BR
+ 0x80000000, // PA_SC_VPORT_SCISSOR_15_TL
+ 0x40004000, // PA_SC_VPORT_SCISSOR_15_BR
+ 0x00000000, // PA_SC_VPORT_ZMIN_0
+ 0x3f800000, // PA_SC_VPORT_ZMAX_0
+ 0x00000000, // PA_SC_VPORT_ZMIN_1
+ 0x3f800000, // PA_SC_VPORT_ZMAX_1
+ 0x00000000, // PA_SC_VPORT_ZMIN_2
+ 0x3f800000, // PA_SC_VPORT_ZMAX_2
+ 0x00000000, // PA_SC_VPORT_ZMIN_3
+ 0x3f800000, // PA_SC_VPORT_ZMAX_3
+ 0x00000000, // PA_SC_VPORT_ZMIN_4
+ 0x3f800000, // PA_SC_VPORT_ZMAX_4
+ 0x00000000, // PA_SC_VPORT_ZMIN_5
+ 0x3f800000, // PA_SC_VPORT_ZMAX_5
+ 0x00000000, // PA_SC_VPORT_ZMIN_6
+ 0x3f800000, // PA_SC_VPORT_ZMAX_6
+ 0x00000000, // PA_SC_VPORT_ZMIN_7
+ 0x3f800000, // PA_SC_VPORT_ZMAX_7
+ 0x00000000, // PA_SC_VPORT_ZMIN_8
+ 0x3f800000, // PA_SC_VPORT_ZMAX_8
+ 0x00000000, // PA_SC_VPORT_ZMIN_9
+ 0x3f800000, // PA_SC_VPORT_ZMAX_9
+ 0x00000000, // PA_SC_VPORT_ZMIN_10
+ 0x3f800000, // PA_SC_VPORT_ZMAX_10
+ 0x00000000, // PA_SC_VPORT_ZMIN_11
+ 0x3f800000, // PA_SC_VPORT_ZMAX_11
+ 0x00000000, // PA_SC_VPORT_ZMIN_12
+ 0x3f800000, // PA_SC_VPORT_ZMAX_12
+ 0x00000000, // PA_SC_VPORT_ZMIN_13
+ 0x3f800000, // PA_SC_VPORT_ZMAX_13
+ 0x00000000, // PA_SC_VPORT_ZMIN_14
+ 0x3f800000, // PA_SC_VPORT_ZMAX_14
+ 0x00000000, // PA_SC_VPORT_ZMIN_15
+ 0x3f800000, // PA_SC_VPORT_ZMAX_15
+ 0x00000000, // PA_SC_RASTER_CONFIG
+ 0x00000000, // PA_SC_RASTER_CONFIG_1
+ 0x00000000, // PA_SC_SCREEN_EXTENT_CONTROL
+};
+static const unsigned int gfx11_SECT_CONTEXT_def_2[] =
+{
+ 0x00000000, // CP_PERFMON_CNTX_CNTL
+ 0x00000000, // CP_PIPEID
+ 0x00000000, // CP_VMID
+ 0x00000000, // CONTEXT_RESERVED_REG0
+ 0x00000000, // CONTEXT_RESERVED_REG1
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0x00000000, // PA_SC_FSR_EN
+ 0x00000000, // PA_SC_FSR_FBW_RECURSIONS_X
+ 0x00000000, // PA_SC_FSR_FBW_RECURSIONS_Y
+ 0, // HOLE
+ 0x00000000, // PA_SC_VRS_OVERRIDE_CNTL
+ 0x00000000, // PA_SC_VRS_RATE_FEEDBACK_BASE
+ 0x00000000, // PA_SC_VRS_RATE_FEEDBACK_BASE_EXT
+ 0x00000000, // PA_SC_VRS_RATE_FEEDBACK_SIZE_XY
+ 0x00000000, // PA_SC_BINNER_OUTPUT_TIMEOUT_CNTL
+ 0x00000000, // PA_SC_VRS_RATE_CACHE_CNTL
+ 0, // HOLE
+ 0, // HOLE
+ 0x00000000, // PA_SC_VRS_RATE_BASE
+ 0x00000000, // PA_SC_VRS_RATE_BASE_EXT
+ 0x00000000, // PA_SC_VRS_RATE_SIZE_XY
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0x00000000, // VGT_MULTI_PRIM_IB_RESET_INDX
+ 0x00550055, // CB_RMI_GL2_CACHE_CONTROL
+ 0x00000000, // CB_BLEND_RED
+ 0x00000000, // CB_BLEND_GREEN
+ 0x00000000, // CB_BLEND_BLUE
+ 0x00000000, // CB_BLEND_ALPHA
+ 0x00000000, // CB_FDCC_CONTROL
+ 0x00000000, // CB_COVERAGE_OUT_CONTROL
+ 0x00000000, // DB_STENCIL_CONTROL
+ 0x01000000, // DB_STENCILREFMASK
+ 0x01000000, // DB_STENCILREFMASK_BF
+ 0, // HOLE
+ 0x00000000, // PA_CL_VPORT_XSCALE
+ 0x00000000, // PA_CL_VPORT_XOFFSET
+ 0x00000000, // PA_CL_VPORT_YSCALE
+ 0x00000000, // PA_CL_VPORT_YOFFSET
+ 0x00000000, // PA_CL_VPORT_ZSCALE
+ 0x00000000, // PA_CL_VPORT_ZOFFSET
+ 0x00000000, // PA_CL_VPORT_XSCALE_1
+ 0x00000000, // PA_CL_VPORT_XOFFSET_1
+ 0x00000000, // PA_CL_VPORT_YSCALE_1
+ 0x00000000, // PA_CL_VPORT_YOFFSET_1
+ 0x00000000, // PA_CL_VPORT_ZSCALE_1
+ 0x00000000, // PA_CL_VPORT_ZOFFSET_1
+ 0x00000000, // PA_CL_VPORT_XSCALE_2
+ 0x00000000, // PA_CL_VPORT_XOFFSET_2
+ 0x00000000, // PA_CL_VPORT_YSCALE_2
+ 0x00000000, // PA_CL_VPORT_YOFFSET_2
+ 0x00000000, // PA_CL_VPORT_ZSCALE_2
+ 0x00000000, // PA_CL_VPORT_ZOFFSET_2
+ 0x00000000, // PA_CL_VPORT_XSCALE_3
+ 0x00000000, // PA_CL_VPORT_XOFFSET_3
+ 0x00000000, // PA_CL_VPORT_YSCALE_3
+ 0x00000000, // PA_CL_VPORT_YOFFSET_3
+ 0x00000000, // PA_CL_VPORT_ZSCALE_3
+ 0x00000000, // PA_CL_VPORT_ZOFFSET_3
+ 0x00000000, // PA_CL_VPORT_XSCALE_4
+ 0x00000000, // PA_CL_VPORT_XOFFSET_4
+ 0x00000000, // PA_CL_VPORT_YSCALE_4
+ 0x00000000, // PA_CL_VPORT_YOFFSET_4
+ 0x00000000, // PA_CL_VPORT_ZSCALE_4
+ 0x00000000, // PA_CL_VPORT_ZOFFSET_4
+ 0x00000000, // PA_CL_VPORT_XSCALE_5
+ 0x00000000, // PA_CL_VPORT_XOFFSET_5
+ 0x00000000, // PA_CL_VPORT_YSCALE_5
+ 0x00000000, // PA_CL_VPORT_YOFFSET_5
+ 0x00000000, // PA_CL_VPORT_ZSCALE_5
+ 0x00000000, // PA_CL_VPORT_ZOFFSET_5
+ 0x00000000, // PA_CL_VPORT_XSCALE_6
+ 0x00000000, // PA_CL_VPORT_XOFFSET_6
+ 0x00000000, // PA_CL_VPORT_YSCALE_6
+ 0x00000000, // PA_CL_VPORT_YOFFSET_6
+ 0x00000000, // PA_CL_VPORT_ZSCALE_6
+ 0x00000000, // PA_CL_VPORT_ZOFFSET_6
+ 0x00000000, // PA_CL_VPORT_XSCALE_7
+ 0x00000000, // PA_CL_VPORT_XOFFSET_7
+ 0x00000000, // PA_CL_VPORT_YSCALE_7
+ 0x00000000, // PA_CL_VPORT_YOFFSET_7
+ 0x00000000, // PA_CL_VPORT_ZSCALE_7
+ 0x00000000, // PA_CL_VPORT_ZOFFSET_7
+ 0x00000000, // PA_CL_VPORT_XSCALE_8
+ 0x00000000, // PA_CL_VPORT_XOFFSET_8
+ 0x00000000, // PA_CL_VPORT_YSCALE_8
+ 0x00000000, // PA_CL_VPORT_YOFFSET_8
+ 0x00000000, // PA_CL_VPORT_ZSCALE_8
+ 0x00000000, // PA_CL_VPORT_ZOFFSET_8
+ 0x00000000, // PA_CL_VPORT_XSCALE_9
+ 0x00000000, // PA_CL_VPORT_XOFFSET_9
+ 0x00000000, // PA_CL_VPORT_YSCALE_9
+ 0x00000000, // PA_CL_VPORT_YOFFSET_9
+ 0x00000000, // PA_CL_VPORT_ZSCALE_9
+ 0x00000000, // PA_CL_VPORT_ZOFFSET_9
+ 0x00000000, // PA_CL_VPORT_XSCALE_10
+ 0x00000000, // PA_CL_VPORT_XOFFSET_10
+ 0x00000000, // PA_CL_VPORT_YSCALE_10
+ 0x00000000, // PA_CL_VPORT_YOFFSET_10
+ 0x00000000, // PA_CL_VPORT_ZSCALE_10
+ 0x00000000, // PA_CL_VPORT_ZOFFSET_10
+ 0x00000000, // PA_CL_VPORT_XSCALE_11
+ 0x00000000, // PA_CL_VPORT_XOFFSET_11
+ 0x00000000, // PA_CL_VPORT_YSCALE_11
+ 0x00000000, // PA_CL_VPORT_YOFFSET_11
+ 0x00000000, // PA_CL_VPORT_ZSCALE_11
+ 0x00000000, // PA_CL_VPORT_ZOFFSET_11
+ 0x00000000, // PA_CL_VPORT_XSCALE_12
+ 0x00000000, // PA_CL_VPORT_XOFFSET_12
+ 0x00000000, // PA_CL_VPORT_YSCALE_12
+ 0x00000000, // PA_CL_VPORT_YOFFSET_12
+ 0x00000000, // PA_CL_VPORT_ZSCALE_12
+ 0x00000000, // PA_CL_VPORT_ZOFFSET_12
+ 0x00000000, // PA_CL_VPORT_XSCALE_13
+ 0x00000000, // PA_CL_VPORT_XOFFSET_13
+ 0x00000000, // PA_CL_VPORT_YSCALE_13
+ 0x00000000, // PA_CL_VPORT_YOFFSET_13
+ 0x00000000, // PA_CL_VPORT_ZSCALE_13
+ 0x00000000, // PA_CL_VPORT_ZOFFSET_13
+ 0x00000000, // PA_CL_VPORT_XSCALE_14
+ 0x00000000, // PA_CL_VPORT_XOFFSET_14
+ 0x00000000, // PA_CL_VPORT_YSCALE_14
+ 0x00000000, // PA_CL_VPORT_YOFFSET_14
+ 0x00000000, // PA_CL_VPORT_ZSCALE_14
+ 0x00000000, // PA_CL_VPORT_ZOFFSET_14
+ 0x00000000, // PA_CL_VPORT_XSCALE_15
+ 0x00000000, // PA_CL_VPORT_XOFFSET_15
+ 0x00000000, // PA_CL_VPORT_YSCALE_15
+ 0x00000000, // PA_CL_VPORT_YOFFSET_15
+ 0x00000000, // PA_CL_VPORT_ZSCALE_15
+ 0x00000000, // PA_CL_VPORT_ZOFFSET_15
+ 0x00000000, // PA_CL_UCP_0_X
+ 0x00000000, // PA_CL_UCP_0_Y
+ 0x00000000, // PA_CL_UCP_0_Z
+ 0x00000000, // PA_CL_UCP_0_W
+ 0x00000000, // PA_CL_UCP_1_X
+ 0x00000000, // PA_CL_UCP_1_Y
+ 0x00000000, // PA_CL_UCP_1_Z
+ 0x00000000, // PA_CL_UCP_1_W
+ 0x00000000, // PA_CL_UCP_2_X
+ 0x00000000, // PA_CL_UCP_2_Y
+ 0x00000000, // PA_CL_UCP_2_Z
+ 0x00000000, // PA_CL_UCP_2_W
+ 0x00000000, // PA_CL_UCP_3_X
+ 0x00000000, // PA_CL_UCP_3_Y
+ 0x00000000, // PA_CL_UCP_3_Z
+ 0x00000000, // PA_CL_UCP_3_W
+ 0x00000000, // PA_CL_UCP_4_X
+ 0x00000000, // PA_CL_UCP_4_Y
+ 0x00000000, // PA_CL_UCP_4_Z
+ 0x00000000, // PA_CL_UCP_4_W
+ 0x00000000, // PA_CL_UCP_5_X
+ 0x00000000, // PA_CL_UCP_5_Y
+ 0x00000000, // PA_CL_UCP_5_Z
+ 0x00000000, // PA_CL_UCP_5_W
+ 0x00000000, // PA_CL_PROG_NEAR_CLIP_Z
+ 0x00000000, // PA_RATE_CNTL
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0x00000000, // SPI_PS_INPUT_CNTL_0
+ 0x00000000, // SPI_PS_INPUT_CNTL_1
+ 0x00000000, // SPI_PS_INPUT_CNTL_2
+ 0x00000000, // SPI_PS_INPUT_CNTL_3
+ 0x00000000, // SPI_PS_INPUT_CNTL_4
+ 0x00000000, // SPI_PS_INPUT_CNTL_5
+ 0x00000000, // SPI_PS_INPUT_CNTL_6
+ 0x00000000, // SPI_PS_INPUT_CNTL_7
+ 0x00000000, // SPI_PS_INPUT_CNTL_8
+ 0x00000000, // SPI_PS_INPUT_CNTL_9
+ 0x00000000, // SPI_PS_INPUT_CNTL_10
+ 0x00000000, // SPI_PS_INPUT_CNTL_11
+ 0x00000000, // SPI_PS_INPUT_CNTL_12
+ 0x00000000, // SPI_PS_INPUT_CNTL_13
+ 0x00000000, // SPI_PS_INPUT_CNTL_14
+ 0x00000000, // SPI_PS_INPUT_CNTL_15
+ 0x00000000, // SPI_PS_INPUT_CNTL_16
+ 0x00000000, // SPI_PS_INPUT_CNTL_17
+ 0x00000000, // SPI_PS_INPUT_CNTL_18
+ 0x00000000, // SPI_PS_INPUT_CNTL_19
+ 0x00000000, // SPI_PS_INPUT_CNTL_20
+ 0x00000000, // SPI_PS_INPUT_CNTL_21
+ 0x00000000, // SPI_PS_INPUT_CNTL_22
+ 0x00000000, // SPI_PS_INPUT_CNTL_23
+ 0x00000000, // SPI_PS_INPUT_CNTL_24
+ 0x00000000, // SPI_PS_INPUT_CNTL_25
+ 0x00000000, // SPI_PS_INPUT_CNTL_26
+ 0x00000000, // SPI_PS_INPUT_CNTL_27
+ 0x00000000, // SPI_PS_INPUT_CNTL_28
+ 0x00000000, // SPI_PS_INPUT_CNTL_29
+ 0x00000000, // SPI_PS_INPUT_CNTL_30
+ 0x00000000, // SPI_PS_INPUT_CNTL_31
+ 0x00000000, // SPI_VS_OUT_CONFIG
+ 0, // HOLE
+ 0x00000000, // SPI_PS_INPUT_ENA
+ 0x00000000, // SPI_PS_INPUT_ADDR
+ 0x00000000, // SPI_INTERP_CONTROL_0
+ 0x00000002, // SPI_PS_IN_CONTROL
+ 0x00000000, // SPI_BARYC_SSAA_CNTL
+ 0x00000000, // SPI_BARYC_CNTL
+ 0, // HOLE
+ 0x00000000, // SPI_TMPRING_SIZE
+ 0x00000000, // SPI_GFX_SCRATCH_BASE_LO
+ 0x00000000, // SPI_GFX_SCRATCH_BASE_HI
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0x00000000, // SPI_SHADER_IDX_FORMAT
+ 0x00000000, // SPI_SHADER_POS_FORMAT
+ 0x00000000, // SPI_SHADER_Z_FORMAT
+ 0x00000000, // SPI_SHADER_COL_FORMAT
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0x00000000, // SX_PS_DOWNCONVERT_CONTROL
+ 0x00000000, // SX_PS_DOWNCONVERT
+ 0x00000000, // SX_BLEND_OPT_EPSILON
+ 0x00000000, // SX_BLEND_OPT_CONTROL
+ 0x00000000, // SX_MRT0_BLEND_OPT
+ 0x00000000, // SX_MRT1_BLEND_OPT
+ 0x00000000, // SX_MRT2_BLEND_OPT
+ 0x00000000, // SX_MRT3_BLEND_OPT
+ 0x00000000, // SX_MRT4_BLEND_OPT
+ 0x00000000, // SX_MRT5_BLEND_OPT
+ 0x00000000, // SX_MRT6_BLEND_OPT
+ 0x00000000, // SX_MRT7_BLEND_OPT
+ 0x00000000, // CB_BLEND0_CONTROL
+ 0x00000000, // CB_BLEND1_CONTROL
+ 0x00000000, // CB_BLEND2_CONTROL
+ 0x00000000, // CB_BLEND3_CONTROL
+ 0x00000000, // CB_BLEND4_CONTROL
+ 0x00000000, // CB_BLEND5_CONTROL
+ 0x00000000, // CB_BLEND6_CONTROL
+ 0x00000000, // CB_BLEND7_CONTROL
+};
+static const unsigned int gfx11_SECT_CONTEXT_def_3[] =
+{
+ 0x00000000, // PA_CL_POINT_X_RAD
+ 0x00000000, // PA_CL_POINT_Y_RAD
+ 0x00000000, // PA_CL_POINT_SIZE
+ 0x00000000, // PA_CL_POINT_CULL_RAD
+};
+static const unsigned int gfx11_SECT_CONTEXT_def_4[] =
+{
+ 0x00000000, // GE_MAX_OUTPUT_PER_SUBGROUP
+ 0x00000000, // DB_DEPTH_CONTROL
+ 0x00000000, // DB_EQAA
+ 0x00000000, // CB_COLOR_CONTROL
+ 0x00000000, // DB_SHADER_CONTROL
+ 0x00090000, // PA_CL_CLIP_CNTL
+ 0x00000004, // PA_SU_SC_MODE_CNTL
+ 0x00000000, // PA_CL_VTE_CNTL
+ 0x00000000, // PA_CL_VS_OUT_CNTL
+ 0x00000000, // PA_CL_NANINF_CNTL
+ 0x00000000, // PA_SU_LINE_STIPPLE_CNTL
+ 0x00000000, // PA_SU_LINE_STIPPLE_SCALE
+ 0x00000000, // PA_SU_PRIM_FILTER_CNTL
+ 0x00000000, // PA_SU_SMALL_PRIM_FILTER_CNTL
+ 0, // HOLE
+ 0x00000000, // PA_CL_NGG_CNTL
+ 0x00000000, // PA_SU_OVER_RASTERIZATION_CNTL
+ 0x00000000, // PA_STEREO_CNTL
+ 0x00000000, // PA_STATE_STEREO_X
+ 0x00000000, // PA_CL_VRS_CNTL
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0x00000000, // PA_SU_POINT_SIZE
+ 0x00000000, // PA_SU_POINT_MINMAX
+ 0x00000000, // PA_SU_LINE_CNTL
+ 0x00000000, // PA_SC_LINE_STIPPLE
+ 0, // HOLE
+ 0, // HOLE
+ 0x00000000, // VGT_HOS_MAX_TESS_LEVEL
+ 0x00000000, // VGT_HOS_MIN_TESS_LEVEL
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0x00000000, // VGT_GS_ONCHIP_CNTL
+ 0x00000000, // PA_SC_MODE_CNTL_0
+ 0x00000000, // PA_SC_MODE_CNTL_1
+ 0x00000000, // VGT_ENHANCE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0x00000000, // IA_ENHANCE
+};
+static const unsigned int gfx11_SECT_CONTEXT_def_5[] =
+{
+ 0x00000000, // WD_ENHANCE
+ 0x00000000, // VGT_PRIMITIVEID_EN
+};
+static const unsigned int gfx11_SECT_CONTEXT_def_6[] =
+{
+ 0x00000000, // VGT_PRIMITIVEID_RESET
+};
+static const unsigned int gfx11_SECT_CONTEXT_def_7[] =
+{
+ 0x00000000, // VGT_DRAW_PAYLOAD_CNTL
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0x00000000, // VGT_ESGS_RING_ITEMSIZE
+ 0, // HOLE
+ 0x00000000, // VGT_REUSE_OFF
+ 0, // HOLE
+ 0x00000000, // DB_HTILE_SURFACE
+ 0x00000000, // DB_SRESULTS_COMPARE_STATE0
+ 0x00000000, // DB_SRESULTS_COMPARE_STATE1
+ 0x00000000, // DB_PRELOAD_CONTROL
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0x00000000, // VGT_STRMOUT_DRAW_OPAQUE_OFFSET
+ 0x00000000, // VGT_STRMOUT_DRAW_OPAQUE_BUFFER_FILLED_SIZE
+ 0x00000000, // VGT_STRMOUT_DRAW_OPAQUE_VERTEX_STRIDE
+ 0, // HOLE
+ 0x00000000, // VGT_GS_MAX_VERT_OUT
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0x00000000, // GE_NGG_SUBGRP_CNTL
+ 0x00000000, // VGT_TESS_DISTRIBUTION
+ 0x00000000, // VGT_SHADER_STAGES_EN
+ 0x00000000, // VGT_LS_HS_CONFIG
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0x00000000, // VGT_TF_PARAM
+ 0x00000000, // DB_ALPHA_TO_MASK
+ 0, // HOLE
+ 0x00000000, // PA_SU_POLY_OFFSET_DB_FMT_CNTL
+ 0x00000000, // PA_SU_POLY_OFFSET_CLAMP
+ 0x00000000, // PA_SU_POLY_OFFSET_FRONT_SCALE
+ 0x00000000, // PA_SU_POLY_OFFSET_FRONT_OFFSET
+ 0x00000000, // PA_SU_POLY_OFFSET_BACK_SCALE
+ 0x00000000, // PA_SU_POLY_OFFSET_BACK_OFFSET
+ 0x00000000, // VGT_GS_INSTANCE_CNT
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0x00000000, // PA_SC_CENTROID_PRIORITY_0
+ 0x00000000, // PA_SC_CENTROID_PRIORITY_1
+ 0x00001000, // PA_SC_LINE_CNTL
+ 0x00000000, // PA_SC_AA_CONFIG
+ 0x00000005, // PA_SU_VTX_CNTL
+ 0x3f800000, // PA_CL_GB_VERT_CLIP_ADJ
+ 0x3f800000, // PA_CL_GB_VERT_DISC_ADJ
+ 0x3f800000, // PA_CL_GB_HORZ_CLIP_ADJ
+ 0x3f800000, // PA_CL_GB_HORZ_DISC_ADJ
+ 0x00000000, // PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y0_0
+ 0x00000000, // PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y0_1
+ 0x00000000, // PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y0_2
+ 0x00000000, // PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y0_3
+ 0x00000000, // PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y0_0
+ 0x00000000, // PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y0_1
+ 0x00000000, // PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y0_2
+ 0x00000000, // PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y0_3
+ 0x00000000, // PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y1_0
+ 0x00000000, // PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y1_1
+ 0x00000000, // PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y1_2
+ 0x00000000, // PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y1_3
+ 0x00000000, // PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y1_0
+ 0x00000000, // PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y1_1
+ 0x00000000, // PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y1_2
+ 0x00000000, // PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y1_3
+ 0xffffffff, // PA_SC_AA_MASK_X0Y0_X1Y0
+ 0xffffffff, // PA_SC_AA_MASK_X0Y1_X1Y1
+ 0x00000000, // PA_SC_SHADER_CONTROL
+ 0x00000003, // PA_SC_BINNER_CNTL_0
+ 0x00000000, // PA_SC_BINNER_CNTL_1
+ 0x00100000, // PA_SC_CONSERVATIVE_RASTERIZATION_CNTL
+ 0x00000000, // PA_SC_NGG_MODE_CNTL
+ 0x00000000, // PA_SC_BINNER_CNTL_2
+ 0, // HOLE
+ 0, // HOLE
+ 0x00000000, // CB_COLOR0_BASE
+ 0, // HOLE
+ 0, // HOLE
+ 0x00000000, // CB_COLOR0_VIEW
+ 0x00000000, // CB_COLOR0_INFO
+ 0x00000000, // CB_COLOR0_ATTRIB
+ 0x00000000, // CB_COLOR0_FDCC_CONTROL
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0x00000000, // CB_COLOR0_DCC_BASE
+ 0, // HOLE
+ 0x00000000, // CB_COLOR1_BASE
+ 0, // HOLE
+ 0, // HOLE
+ 0x00000000, // CB_COLOR1_VIEW
+ 0x00000000, // CB_COLOR1_INFO
+ 0x00000000, // CB_COLOR1_ATTRIB
+ 0x00000000, // CB_COLOR1_FDCC_CONTROL
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0x00000000, // CB_COLOR1_DCC_BASE
+ 0, // HOLE
+ 0x00000000, // CB_COLOR2_BASE
+ 0, // HOLE
+ 0, // HOLE
+ 0x00000000, // CB_COLOR2_VIEW
+ 0x00000000, // CB_COLOR2_INFO
+ 0x00000000, // CB_COLOR2_ATTRIB
+ 0x00000000, // CB_COLOR2_FDCC_CONTROL
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0x00000000, // CB_COLOR2_DCC_BASE
+ 0, // HOLE
+ 0x00000000, // CB_COLOR3_BASE
+ 0, // HOLE
+ 0, // HOLE
+ 0x00000000, // CB_COLOR3_VIEW
+ 0x00000000, // CB_COLOR3_INFO
+ 0x00000000, // CB_COLOR3_ATTRIB
+ 0x00000000, // CB_COLOR3_FDCC_CONTROL
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0x00000000, // CB_COLOR3_DCC_BASE
+ 0, // HOLE
+ 0x00000000, // CB_COLOR4_BASE
+ 0, // HOLE
+ 0, // HOLE
+ 0x00000000, // CB_COLOR4_VIEW
+ 0x00000000, // CB_COLOR4_INFO
+ 0x00000000, // CB_COLOR4_ATTRIB
+ 0x00000000, // CB_COLOR4_FDCC_CONTROL
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0x00000000, // CB_COLOR4_DCC_BASE
+ 0, // HOLE
+ 0x00000000, // CB_COLOR5_BASE
+ 0, // HOLE
+ 0, // HOLE
+ 0x00000000, // CB_COLOR5_VIEW
+ 0x00000000, // CB_COLOR5_INFO
+ 0x00000000, // CB_COLOR5_ATTRIB
+ 0x00000000, // CB_COLOR5_FDCC_CONTROL
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0x00000000, // CB_COLOR5_DCC_BASE
+ 0, // HOLE
+ 0x00000000, // CB_COLOR6_BASE
+ 0, // HOLE
+ 0, // HOLE
+ 0x00000000, // CB_COLOR6_VIEW
+ 0x00000000, // CB_COLOR6_INFO
+ 0x00000000, // CB_COLOR6_ATTRIB
+ 0x00000000, // CB_COLOR6_FDCC_CONTROL
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0x00000000, // CB_COLOR6_DCC_BASE
+ 0, // HOLE
+ 0x00000000, // CB_COLOR7_BASE
+ 0, // HOLE
+ 0, // HOLE
+ 0x00000000, // CB_COLOR7_VIEW
+ 0x00000000, // CB_COLOR7_INFO
+ 0x00000000, // CB_COLOR7_ATTRIB
+ 0x00000000, // CB_COLOR7_FDCC_CONTROL
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0x00000000, // CB_COLOR7_DCC_BASE
+ 0, // HOLE
+ 0x00000000, // CB_COLOR0_BASE_EXT
+ 0x00000000, // CB_COLOR1_BASE_EXT
+ 0x00000000, // CB_COLOR2_BASE_EXT
+ 0x00000000, // CB_COLOR3_BASE_EXT
+ 0x00000000, // CB_COLOR4_BASE_EXT
+ 0x00000000, // CB_COLOR5_BASE_EXT
+ 0x00000000, // CB_COLOR6_BASE_EXT
+ 0x00000000, // CB_COLOR7_BASE_EXT
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0x00000000, // CB_COLOR0_DCC_BASE_EXT
+ 0x00000000, // CB_COLOR1_DCC_BASE_EXT
+ 0x00000000, // CB_COLOR2_DCC_BASE_EXT
+ 0x00000000, // CB_COLOR3_DCC_BASE_EXT
+ 0x00000000, // CB_COLOR4_DCC_BASE_EXT
+ 0x00000000, // CB_COLOR5_DCC_BASE_EXT
+ 0x00000000, // CB_COLOR6_DCC_BASE_EXT
+ 0x00000000, // CB_COLOR7_DCC_BASE_EXT
+ 0x00000000, // CB_COLOR0_ATTRIB2
+ 0x00000000, // CB_COLOR1_ATTRIB2
+ 0x00000000, // CB_COLOR2_ATTRIB2
+ 0x00000000, // CB_COLOR3_ATTRIB2
+ 0x00000000, // CB_COLOR4_ATTRIB2
+ 0x00000000, // CB_COLOR5_ATTRIB2
+ 0x00000000, // CB_COLOR6_ATTRIB2
+ 0x00000000, // CB_COLOR7_ATTRIB2
+ 0x00000000, // CB_COLOR0_ATTRIB3
+ 0x00000000, // CB_COLOR1_ATTRIB3
+ 0x00000000, // CB_COLOR2_ATTRIB3
+ 0x00000000, // CB_COLOR3_ATTRIB3
+ 0x00000000, // CB_COLOR4_ATTRIB3
+ 0x00000000, // CB_COLOR5_ATTRIB3
+ 0x00000000, // CB_COLOR6_ATTRIB3
+ 0x00000000, // CB_COLOR7_ATTRIB3
+};
+static const struct cs_extent_def gfx11_SECT_CONTEXT_defs[] =
+{
+ {gfx11_SECT_CONTEXT_def_1, 0x0000a000, 215 },
+ {gfx11_SECT_CONTEXT_def_2, 0x0000a0d8, 272 },
+ {gfx11_SECT_CONTEXT_def_3, 0x0000a1f5, 4 },
+ {gfx11_SECT_CONTEXT_def_4, 0x0000a1ff, 158 },
+ {gfx11_SECT_CONTEXT_def_5, 0x0000a2a0, 2 },
+ {gfx11_SECT_CONTEXT_def_6, 0x0000a2a3, 1 },
+ {gfx11_SECT_CONTEXT_def_7, 0x0000a2a6, 282 },
+ { 0, 0, 0 }
+};
+static const struct cs_section_def gfx11_cs_data[] = {
+ { gfx11_SECT_CONTEXT_defs, SECT_CONTEXT },
+ { 0, SECT_NONE }
+};
+
+#endif /* __CLEARSTATE_GFX11_H_ */
diff --git a/drivers/gpu/drm/amd/amdgpu/clearstate_gfx12.h b/drivers/gpu/drm/amd/amdgpu/clearstate_gfx12.h
new file mode 100644
index 000000000000..2f6c9d11d5ae
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/clearstate_gfx12.h
@@ -0,0 +1,121 @@
+/*
+ * Copyright 2023 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+#ifndef __CLEARSTATE_GFX12_H_
+#define __CLEARSTATE_GFX12_H_
+
+static const unsigned int gfx12_SECT_CONTEXT_def_1[] = {
+0x00000000, //mmSC_MEM_TEMPORAL
+0x00000000, //mmSC_MEM_SPEC_READ
+0x00000000, //mmPA_SC_VPORT_0_TL
+0x00000000, //mmPA_SC_VPORT_0_BR
+0x00000000, //mmPA_SC_VPORT_1_TL
+0x00000000, //mmPA_SC_VPORT_1_BR
+0x00000000, //mmPA_SC_VPORT_2_TL
+0x00000000, //mmPA_SC_VPORT_2_BR
+0x00000000, //mmPA_SC_VPORT_3_TL
+0x00000000, //mmPA_SC_VPORT_3_BR
+0x00000000, //mmPA_SC_VPORT_4_TL
+0x00000000, //mmPA_SC_VPORT_4_BR
+0x00000000, //mmPA_SC_VPORT_5_TL
+0x00000000, //mmPA_SC_VPORT_5_BR
+0x00000000, //mmPA_SC_VPORT_6_TL
+0x00000000, //mmPA_SC_VPORT_6_BR
+0x00000000, //mmPA_SC_VPORT_7_TL
+0x00000000, //mmPA_SC_VPORT_7_BR
+0x00000000, //mmPA_SC_VPORT_8_TL
+0x00000000, //mmPA_SC_VPORT_8_BR
+0x00000000, //mmPA_SC_VPORT_9_TL
+0x00000000, //mmPA_SC_VPORT_9_BR
+0x00000000, //mmPA_SC_VPORT_10_TL
+0x00000000, //mmPA_SC_VPORT_10_BR
+0x00000000, //mmPA_SC_VPORT_11_TL
+0x00000000, //mmPA_SC_VPORT_11_BR
+0x00000000, //mmPA_SC_VPORT_12_TL
+0x00000000, //mmPA_SC_VPORT_12_BR
+0x00000000, //mmPA_SC_VPORT_13_TL
+0x00000000, //mmPA_SC_VPORT_13_BR
+0x00000000, //mmPA_SC_VPORT_14_TL
+0x00000000, //mmPA_SC_VPORT_14_BR
+0x00000000, //mmPA_SC_VPORT_15_TL
+0x00000000, //mmPA_SC_VPORT_15_BR
+};
+
+static const unsigned int gfx12_SECT_CONTEXT_def_2[] = {
+0x00000000, //mmPA_CL_PROG_NEAR_CLIP_Z
+0x00000000, //mmPA_RATE_CNTL
+};
+
+static const unsigned int gfx12_SECT_CONTEXT_def_3[] = {
+0x00000000, //mmCP_PERFMON_CNTX_CNTL
+};
+
+static const unsigned int gfx12_SECT_CONTEXT_def_4[] = {
+0x00000000, //mmCONTEXT_RESERVED_REG0
+0x00000000, //mmCONTEXT_RESERVED_REG1
+0x00000000, //mmPA_SC_CLIPRECT_0_EXT
+0x00000000, //mmPA_SC_CLIPRECT_1_EXT
+0x00000000, //mmPA_SC_CLIPRECT_2_EXT
+0x00000000, //mmPA_SC_CLIPRECT_3_EXT
+};
+
+static const unsigned int gfx12_SECT_CONTEXT_def_5[] = {
+0x00000000, //mmPA_SC_HIZ_INFO
+0x00000000, //mmPA_SC_HIS_INFO
+0x00000000, //mmPA_SC_HIZ_BASE
+0x00000000, //mmPA_SC_HIZ_BASE_EXT
+0x00000000, //mmPA_SC_HIZ_SIZE_XY
+0x00000000, //mmPA_SC_HIS_BASE
+0x00000000, //mmPA_SC_HIS_BASE_EXT
+0x00000000, //mmPA_SC_HIS_SIZE_XY
+0x00000000, //mmPA_SC_BINNER_OUTPUT_TIMEOUT_CNTL
+0x00000000, //mmPA_SC_BINNER_DYNAMIC_BATCH_LIMIT
+0x00000000, //mmPA_SC_HISZ_CONTROL
+};
+
+static const unsigned int gfx12_SECT_CONTEXT_def_6[] = {
+0x00000000, //mmCB_MEM0_INFO
+0x00000000, //mmCB_MEM1_INFO
+0x00000000, //mmCB_MEM2_INFO
+0x00000000, //mmCB_MEM3_INFO
+0x00000000, //mmCB_MEM4_INFO
+0x00000000, //mmCB_MEM5_INFO
+0x00000000, //mmCB_MEM6_INFO
+0x00000000, //mmCB_MEM7_INFO
+};
+
+static const struct cs_extent_def gfx12_SECT_CONTEXT_defs[] = {
+ {gfx12_SECT_CONTEXT_def_1, 0x0000a03e, 34 },
+ {gfx12_SECT_CONTEXT_def_2, 0x0000a0cc, 2 },
+ {gfx12_SECT_CONTEXT_def_3, 0x0000a0d8, 1 },
+ {gfx12_SECT_CONTEXT_def_4, 0x0000a0db, 6 },
+ {gfx12_SECT_CONTEXT_def_5, 0x0000a2e5, 11 },
+ {gfx12_SECT_CONTEXT_def_6, 0x0000a3c0, 8 },
+ { 0, 0, 0 }
+};
+
+static const struct cs_section_def gfx12_cs_data[] = {
+ { gfx12_SECT_CONTEXT_defs, SECT_CONTEXT },
+ { 0, SECT_NONE }
+};
+
+#endif /* __CLEARSTATE_GFX12_H_ */
diff --git a/drivers/gpu/drm/amd/amdgpu/clearstate_gfx9.h b/drivers/gpu/drm/amd/amdgpu/clearstate_gfx9.h
index 567a904804bc..9c85ca6358c1 100644
--- a/drivers/gpu/drm/amd/amdgpu/clearstate_gfx9.h
+++ b/drivers/gpu/drm/amd/amdgpu/clearstate_gfx9.h
@@ -21,8 +21,7 @@
*
*/
-static const unsigned int gfx9_SECT_CONTEXT_def_1[] =
-{
+static const unsigned int gfx9_SECT_CONTEXT_def_1[] = {
0x00000000, // DB_RENDER_CONTROL
0x00000000, // DB_COUNT_CONTROL
0x00000000, // DB_DEPTH_VIEW
@@ -236,8 +235,7 @@ static const unsigned int gfx9_SECT_CONTEXT_def_1[] =
0x00000000, // PA_SC_VPORT_ZMIN_15
0x3f800000, // PA_SC_VPORT_ZMAX_15
};
-static const unsigned int gfx9_SECT_CONTEXT_def_2[] =
-{
+static const unsigned int gfx9_SECT_CONTEXT_def_2[] = {
0x00000000, // PA_SC_SCREEN_EXTENT_CONTROL
0x00000000, // PA_SC_TILE_STEERING_OVERRIDE
0x00000000, // CP_PERFMON_CNTX_CNTL
@@ -521,15 +519,13 @@ static const unsigned int gfx9_SECT_CONTEXT_def_2[] =
0x00000000, // CB_MRT6_EPITCH
0x00000000, // CB_MRT7_EPITCH
};
-static const unsigned int gfx9_SECT_CONTEXT_def_3[] =
-{
+static const unsigned int gfx9_SECT_CONTEXT_def_3[] = {
0x00000000, // PA_CL_POINT_X_RAD
0x00000000, // PA_CL_POINT_Y_RAD
0x00000000, // PA_CL_POINT_SIZE
0x00000000, // PA_CL_POINT_CULL_RAD
};
-static const unsigned int gfx9_SECT_CONTEXT_def_4[] =
-{
+static const unsigned int gfx9_SECT_CONTEXT_def_4[] = {
0x00000000, // DB_DEPTH_CONTROL
0x00000000, // DB_EQAA
0x00000000, // CB_COLOR_CONTROL
@@ -688,17 +684,14 @@ static const unsigned int gfx9_SECT_CONTEXT_def_4[] =
0x00000000, // VGT_GS_OUT_PRIM_TYPE
0x00000000, // IA_ENHANCE
};
-static const unsigned int gfx9_SECT_CONTEXT_def_5[] =
-{
+static const unsigned int gfx9_SECT_CONTEXT_def_5[] = {
0x00000000, // WD_ENHANCE
0x00000000, // VGT_PRIMITIVEID_EN
};
-static const unsigned int gfx9_SECT_CONTEXT_def_6[] =
-{
+static const unsigned int gfx9_SECT_CONTEXT_def_6[] = {
0x00000000, // VGT_PRIMITIVEID_RESET
};
-static const unsigned int gfx9_SECT_CONTEXT_def_7[] =
-{
+static const unsigned int gfx9_SECT_CONTEXT_def_7[] = {
0x00000000, // VGT_GS_MAX_PRIMS_PER_SUBGROUP
0x00000000, // VGT_DRAW_PAYLOAD_CNTL
0, // HOLE
@@ -766,8 +759,7 @@ static const unsigned int gfx9_SECT_CONTEXT_def_7[] =
0x00000000, // VGT_STRMOUT_CONFIG
0x00000000, // VGT_STRMOUT_BUFFER_CONFIG
};
-static const unsigned int gfx9_SECT_CONTEXT_def_8[] =
-{
+static const unsigned int gfx9_SECT_CONTEXT_def_8[] = {
0x00000000, // PA_SC_CENTROID_PRIORITY_0
0x00000000, // PA_SC_CENTROID_PRIORITY_1
0x00001000, // PA_SC_LINE_CNTL
@@ -924,8 +916,7 @@ static const unsigned int gfx9_SECT_CONTEXT_def_8[] =
0x00000000, // CB_COLOR7_DCC_BASE
0x00000000, // CB_COLOR7_DCC_BASE_EXT
};
-static const struct cs_extent_def gfx9_SECT_CONTEXT_defs[] =
-{
+static const struct cs_extent_def gfx9_SECT_CONTEXT_defs[] = {
{gfx9_SECT_CONTEXT_def_1, 0x0000a000, 212 },
{gfx9_SECT_CONTEXT_def_2, 0x0000a0d6, 282 },
{gfx9_SECT_CONTEXT_def_3, 0x0000a1f5, 4 },
diff --git a/drivers/gpu/drm/amd/amdgpu/clearstate_si.h b/drivers/gpu/drm/amd/amdgpu/clearstate_si.h
index 66e39cdb5cb0..5fd96ddd7f0f 100644
--- a/drivers/gpu/drm/amd/amdgpu/clearstate_si.h
+++ b/drivers/gpu/drm/amd/amdgpu/clearstate_si.h
@@ -21,8 +21,7 @@
*
*/
-static const u32 si_SECT_CONTEXT_def_1[] =
-{
+static const u32 si_SECT_CONTEXT_def_1[] = {
0x00000000, // DB_RENDER_CONTROL
0x00000000, // DB_COUNT_CONTROL
0x00000000, // DB_DEPTH_VIEW
@@ -236,8 +235,7 @@ static const u32 si_SECT_CONTEXT_def_1[] =
0x00000000, // PA_SC_VPORT_ZMIN_15
0x3f800000, // PA_SC_VPORT_ZMAX_15
};
-static const u32 si_SECT_CONTEXT_def_2[] =
-{
+static const u32 si_SECT_CONTEXT_def_2[] = {
0x00000000, // CP_PERFMON_CNTX_CNTL
0x00000000, // CP_RINGID
0x00000000, // CP_VMID
@@ -511,8 +509,7 @@ static const u32 si_SECT_CONTEXT_def_2[] =
0x00000000, // CB_BLEND6_CONTROL
0x00000000, // CB_BLEND7_CONTROL
};
-static const u32 si_SECT_CONTEXT_def_3[] =
-{
+static const u32 si_SECT_CONTEXT_def_3[] = {
0x00000000, // PA_CL_POINT_X_RAD
0x00000000, // PA_CL_POINT_Y_RAD
0x00000000, // PA_CL_POINT_SIZE
@@ -520,8 +517,7 @@ static const u32 si_SECT_CONTEXT_def_3[] =
0x00000000, // VGT_DMA_BASE_HI
0x00000000, // VGT_DMA_BASE
};
-static const u32 si_SECT_CONTEXT_def_4[] =
-{
+static const u32 si_SECT_CONTEXT_def_4[] = {
0x00000000, // DB_DEPTH_CONTROL
0x00000000, // DB_EQAA
0x00000000, // CB_COLOR_CONTROL
@@ -680,16 +676,13 @@ static const u32 si_SECT_CONTEXT_def_4[] =
0x00000000, // VGT_GS_OUT_PRIM_TYPE
0x00000000, // IA_ENHANCE
};
-static const u32 si_SECT_CONTEXT_def_5[] =
-{
+static const u32 si_SECT_CONTEXT_def_5[] = {
0x00000000, // VGT_PRIMITIVEID_EN
};
-static const u32 si_SECT_CONTEXT_def_6[] =
-{
+static const u32 si_SECT_CONTEXT_def_6[] = {
0x00000000, // VGT_PRIMITIVEID_RESET
};
-static const u32 si_SECT_CONTEXT_def_7[] =
-{
+static const u32 si_SECT_CONTEXT_def_7[] = {
0x00000000, // VGT_MULTI_PRIM_IB_RESET_EN
0, // HOLE
0, // HOLE
@@ -924,8 +917,7 @@ static const u32 si_SECT_CONTEXT_def_7[] =
0x00000000, // CB_COLOR7_CLEAR_WORD0
0x00000000, // CB_COLOR7_CLEAR_WORD1
};
-static const struct cs_extent_def si_SECT_CONTEXT_defs[] =
-{
+static const struct cs_extent_def si_SECT_CONTEXT_defs[] = {
{si_SECT_CONTEXT_def_1, 0x0000a000, 212 },
{si_SECT_CONTEXT_def_2, 0x0000a0d8, 272 },
{si_SECT_CONTEXT_def_3, 0x0000a1f5, 6 },
diff --git a/drivers/gpu/drm/amd/amdgpu/cyan_skillfish_reg_init.c b/drivers/gpu/drm/amd/amdgpu/cyan_skillfish_reg_init.c
new file mode 100644
index 000000000000..ed1e25661706
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/cyan_skillfish_reg_init.c
@@ -0,0 +1,56 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright 2018 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+#include "amdgpu.h"
+#include "nv.h"
+
+#include "soc15_common.h"
+#include "soc15_hw_ip.h"
+#include "cyan_skillfish_ip_offset.h"
+
+int cyan_skillfish_reg_base_init(struct amdgpu_device *adev)
+{
+ /* HW has more IP blocks, only initialized the blocke needed by driver */
+ uint32_t i;
+
+ adev->gfx.xcc_mask = 1;
+ for (i = 0 ; i < MAX_INSTANCE ; ++i) {
+ adev->reg_offset[GC_HWIP][i] = (uint32_t *)(&(GC_BASE.instance[i]));
+ adev->reg_offset[HDP_HWIP][i] = (uint32_t *)(&(HDP_BASE.instance[i]));
+ adev->reg_offset[MMHUB_HWIP][i] = (uint32_t *)(&(MMHUB_BASE.instance[i]));
+ adev->reg_offset[ATHUB_HWIP][i] = (uint32_t *)(&(ATHUB_BASE.instance[i]));
+ adev->reg_offset[NBIO_HWIP][i] = (uint32_t *)(&(NBIO_BASE.instance[i]));
+ adev->reg_offset[MP0_HWIP][i] = (uint32_t *)(&(MP0_BASE.instance[i]));
+ adev->reg_offset[MP1_HWIP][i] = (uint32_t *)(&(MP1_BASE.instance[i]));
+ adev->reg_offset[VCN_HWIP][i] = (uint32_t *)(&(UVD0_BASE.instance[i]));
+ adev->reg_offset[DF_HWIP][i] = (uint32_t *)(&(DF_BASE.instance[i]));
+ adev->reg_offset[DCE_HWIP][i] = (uint32_t *)(&(DMU_BASE.instance[i]));
+ adev->reg_offset[OSSSYS_HWIP][i] = (uint32_t *)(&(OSSSYS_BASE.instance[i]));
+ adev->reg_offset[SDMA0_HWIP][i] = (uint32_t *)(&(GC_BASE.instance[i]));
+ adev->reg_offset[SDMA1_HWIP][i] = (uint32_t *)(&(GC_BASE.instance[i]));
+ adev->reg_offset[SMUIO_HWIP][i] = (uint32_t *)(&(SMUIO_BASE.instance[i]));
+ adev->reg_offset[THM_HWIP][i] = (uint32_t *)(&(THM_BASE.instance[i]));
+ adev->reg_offset[CLK_HWIP][i] = (uint32_t *)(&(CLK_BASE.instance[i]));
+ }
+ return 0;
+}
diff --git a/drivers/gpu/drm/amd/amdgpu/cz_ih.c b/drivers/gpu/drm/amd/amdgpu/cz_ih.c
index 307c01301c87..bc7a2e06ab5f 100644
--- a/drivers/gpu/drm/amd/amdgpu/cz_ih.c
+++ b/drivers/gpu/drm/amd/amdgpu/cz_ih.c
@@ -157,6 +157,9 @@ static int cz_ih_irq_init(struct amdgpu_device *adev)
/* enable interrupts */
cz_ih_enable_interrupts(adev);
+ if (adev->irq.ih_soft.ring_size)
+ adev->irq.ih_soft.enabled = true;
+
return 0;
}
@@ -194,6 +197,9 @@ static u32 cz_ih_get_wptr(struct amdgpu_device *adev,
wptr = le32_to_cpu(*ih->wptr_cpu);
+ if (ih == &adev->irq.ih_soft)
+ goto out;
+
if (!REG_GET_FIELD(wptr, IH_RB_WPTR, RB_OVERFLOW))
goto out;
@@ -216,6 +222,11 @@ static u32 cz_ih_get_wptr(struct amdgpu_device *adev,
tmp = REG_SET_FIELD(tmp, IH_RB_CNTL, WPTR_OVERFLOW_CLEAR, 1);
WREG32(mmIH_RB_CNTL, tmp);
+ /* Unset the CLEAR_OVERFLOW bit immediately so new overflows
+ * can be detected.
+ */
+ tmp = REG_SET_FIELD(tmp, IH_RB_CNTL, WPTR_OVERFLOW_CLEAR, 0);
+ WREG32(mmIH_RB_CNTL, tmp);
out:
return (wptr & ih->ptr_mask);
@@ -269,9 +280,9 @@ static void cz_ih_set_rptr(struct amdgpu_device *adev,
WREG32(mmIH_RB_RPTR, ih->rptr);
}
-static int cz_ih_early_init(void *handle)
+static int cz_ih_early_init(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
int ret;
ret = amdgpu_irq_add_domain(adev);
@@ -283,35 +294,38 @@ static int cz_ih_early_init(void *handle)
return 0;
}
-static int cz_ih_sw_init(void *handle)
+static int cz_ih_sw_init(struct amdgpu_ip_block *ip_block)
{
int r;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
r = amdgpu_ih_ring_init(adev, &adev->irq.ih, 64 * 1024, false);
if (r)
return r;
+ r = amdgpu_ih_ring_init(adev, &adev->irq.ih_soft, IH_SW_RING_SIZE, true);
+ if (r)
+ return r;
+
r = amdgpu_irq_init(adev);
return r;
}
-static int cz_ih_sw_fini(void *handle)
+static int cz_ih_sw_fini(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
- amdgpu_irq_fini(adev);
- amdgpu_ih_ring_fini(adev, &adev->irq.ih);
+ amdgpu_irq_fini_sw(adev);
amdgpu_irq_remove_domain(adev);
return 0;
}
-static int cz_ih_hw_init(void *handle)
+static int cz_ih_hw_init(struct amdgpu_ip_block *ip_block)
{
int r;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
r = cz_ih_irq_init(adev);
if (r)
@@ -320,32 +334,26 @@ static int cz_ih_hw_init(void *handle)
return 0;
}
-static int cz_ih_hw_fini(void *handle)
+static int cz_ih_hw_fini(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
-
- cz_ih_irq_disable(adev);
+ cz_ih_irq_disable(ip_block->adev);
return 0;
}
-static int cz_ih_suspend(void *handle)
+static int cz_ih_suspend(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
-
- return cz_ih_hw_fini(adev);
+ return cz_ih_hw_fini(ip_block);
}
-static int cz_ih_resume(void *handle)
+static int cz_ih_resume(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
-
- return cz_ih_hw_init(adev);
+ return cz_ih_hw_init(ip_block);
}
-static bool cz_ih_is_idle(void *handle)
+static bool cz_ih_is_idle(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
u32 tmp = RREG32(mmSRBM_STATUS);
if (REG_GET_FIELD(tmp, SRBM_STATUS, IH_BUSY))
@@ -354,11 +362,11 @@ static bool cz_ih_is_idle(void *handle)
return true;
}
-static int cz_ih_wait_for_idle(void *handle)
+static int cz_ih_wait_for_idle(struct amdgpu_ip_block *ip_block)
{
unsigned i;
u32 tmp;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
for (i = 0; i < adev->usec_timeout; i++) {
/* read MC_STATUS */
@@ -370,10 +378,10 @@ static int cz_ih_wait_for_idle(void *handle)
return -ETIMEDOUT;
}
-static int cz_ih_soft_reset(void *handle)
+static int cz_ih_soft_reset(struct amdgpu_ip_block *ip_block)
{
u32 srbm_soft_reset = 0;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
u32 tmp = RREG32(mmSRBM_STATUS);
if (tmp & SRBM_STATUS__IH_BUSY_MASK)
@@ -400,14 +408,14 @@ static int cz_ih_soft_reset(void *handle)
return 0;
}
-static int cz_ih_set_clockgating_state(void *handle,
+static int cz_ih_set_clockgating_state(struct amdgpu_ip_block *ip_block,
enum amd_clockgating_state state)
{
// TODO
return 0;
}
-static int cz_ih_set_powergating_state(void *handle,
+static int cz_ih_set_powergating_state(struct amdgpu_ip_block *ip_block,
enum amd_powergating_state state)
{
// TODO
@@ -417,7 +425,6 @@ static int cz_ih_set_powergating_state(void *handle,
static const struct amd_ip_funcs cz_ih_ip_funcs = {
.name = "cz_ih",
.early_init = cz_ih_early_init,
- .late_init = NULL,
.sw_init = cz_ih_sw_init,
.sw_fini = cz_ih_sw_fini,
.hw_init = cz_ih_hw_init,
diff --git a/drivers/gpu/drm/amd/amdgpu/dce_v10_0.c b/drivers/gpu/drm/amd/amdgpu/dce_v10_0.c
index 7944781e1086..72ca6538b2e4 100644
--- a/drivers/gpu/drm/amd/amdgpu/dce_v10_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/dce_v10_0.c
@@ -21,7 +21,10 @@
*
*/
+#include <drm/drm_edid.h>
#include <drm/drm_fourcc.h>
+#include <drm/drm_modeset_helper.h>
+#include <drm/drm_modeset_helper_vtables.h>
#include <drm/drm_vblank.h>
#include "amdgpu.h"
@@ -49,9 +52,9 @@
static void dce_v10_0_set_display_funcs(struct amdgpu_device *adev);
static void dce_v10_0_set_irq_funcs(struct amdgpu_device *adev);
+static void dce_v10_0_hpd_int_ack(struct amdgpu_device *adev, int hpd);
-static const u32 crtc_offsets[] =
-{
+static const u32 crtc_offsets[] = {
CRTC0_REGISTER_OFFSET,
CRTC1_REGISTER_OFFSET,
CRTC2_REGISTER_OFFSET,
@@ -61,8 +64,7 @@ static const u32 crtc_offsets[] =
CRTC6_REGISTER_OFFSET
};
-static const u32 hpd_offsets[] =
-{
+static const u32 hpd_offsets[] = {
HPD0_REGISTER_OFFSET,
HPD1_REGISTER_OFFSET,
HPD2_REGISTER_OFFSET,
@@ -119,30 +121,26 @@ static const struct {
.hpd = DISP_INTERRUPT_STATUS_CONTINUE5__DC_HPD6_INTERRUPT_MASK
} };
-static const u32 golden_settings_tonga_a11[] =
-{
+static const u32 golden_settings_tonga_a11[] = {
mmDCI_CLK_CNTL, 0x00000080, 0x00000000,
mmFBC_DEBUG_COMP, 0x000000f0, 0x00000070,
mmFBC_MISC, 0x1f311fff, 0x12300000,
mmHDMI_CONTROL, 0x31000111, 0x00000011,
};
-static const u32 tonga_mgcg_cgcg_init[] =
-{
+static const u32 tonga_mgcg_cgcg_init[] = {
mmXDMA_CLOCK_GATING_CNTL, 0xffffffff, 0x00000100,
mmXDMA_MEM_POWER_CNTL, 0x00000101, 0x00000000,
};
-static const u32 golden_settings_fiji_a10[] =
-{
+static const u32 golden_settings_fiji_a10[] = {
mmDCI_CLK_CNTL, 0x00000080, 0x00000000,
mmFBC_DEBUG_COMP, 0x000000f0, 0x00000070,
mmFBC_MISC, 0x1f311fff, 0x12300000,
mmHDMI_CONTROL, 0x31000111, 0x00000011,
};
-static const u32 fiji_mgcg_cgcg_init[] =
-{
+static const u32 fiji_mgcg_cgcg_init[] = {
mmXDMA_CLOCK_GATING_CNTL, 0xffffffff, 0x00000100,
mmXDMA_MEM_POWER_CNTL, 0x00000101, 0x00000000,
};
@@ -367,6 +365,7 @@ static void dce_v10_0_hpd_init(struct amdgpu_device *adev)
AMDGPU_HPD_DISCONNECT_INT_DELAY_IN_MS);
WREG32(mmDC_HPD_TOGGLE_FILT_CNTL + hpd_offsets[amdgpu_connector->hpd.hpd], tmp);
+ dce_v10_0_hpd_int_ack(adev, amdgpu_connector->hpd.hpd);
dce_v10_0_hpd_set_polarity(adev, amdgpu_connector->hpd.hpd);
amdgpu_irq_get(adev, &adev->hpd_irq,
amdgpu_connector->hpd.hpd);
@@ -1040,7 +1039,7 @@ static void dce_v10_0_program_watermarks(struct amdgpu_device *adev,
(u32)mode->clock);
line_time = (u32) div_u64((u64)mode->crtc_htotal * 1000000,
(u32)mode->clock);
- line_time = min(line_time, (u32)65535);
+ line_time = min_t(u32, line_time, 65535);
/* watermark for high clocks */
if (adev->pm.dpm_enabled) {
@@ -1070,7 +1069,7 @@ static void dce_v10_0_program_watermarks(struct amdgpu_device *adev,
wm_high.num_heads = num_heads;
/* set for high clocks */
- latency_watermark_a = min(dce_v10_0_latency_watermark(&wm_high), (u32)65535);
+ latency_watermark_a = min_t(u32, dce_v10_0_latency_watermark(&wm_high), 65535);
/* possibly force display priority to high */
/* should really do this at mode validation time... */
@@ -1109,7 +1108,7 @@ static void dce_v10_0_program_watermarks(struct amdgpu_device *adev,
wm_low.num_heads = num_heads;
/* set for low clocks */
- latency_watermark_b = min(dce_v10_0_latency_watermark(&wm_low), (u32)65535);
+ latency_watermark_b = min_t(u32, dce_v10_0_latency_watermark(&wm_low), 65535);
/* possibly force display priority to high */
/* should really do this at mode validation time... */
@@ -1142,8 +1141,7 @@ static void dce_v10_0_program_watermarks(struct amdgpu_device *adev,
/* save values for DPM */
amdgpu_crtc->line_time = line_time;
- amdgpu_crtc->wm_high = latency_watermark_a;
- amdgpu_crtc->wm_low = latency_watermark_b;
+
/* Save number of lines the linebuffer leads before the scanout */
amdgpu_crtc->lb_vblank_lead_lines = lb_vblank_lead_lines;
}
@@ -1300,7 +1298,7 @@ static void dce_v10_0_audio_write_speaker_allocation(struct drm_encoder *encoder
return;
}
- sad_count = drm_edid_to_speaker_allocation(amdgpu_connector_edid(connector), &sadb);
+ sad_count = drm_edid_to_speaker_allocation(amdgpu_connector->edid, &sadb);
if (sad_count < 0) {
DRM_ERROR("Couldn't read Speaker Allocation Data Block: %d\n", sad_count);
sad_count = 0;
@@ -1370,7 +1368,7 @@ static void dce_v10_0_audio_write_sad_regs(struct drm_encoder *encoder)
return;
}
- sad_count = drm_edid_to_sad(amdgpu_connector_edid(connector), &sads);
+ sad_count = drm_edid_to_sad(amdgpu_connector->edid, &sads);
if (sad_count < 0)
DRM_ERROR("Couldn't read SADs: %d\n", sad_count);
if (sad_count <= 0)
@@ -1423,8 +1421,7 @@ static void dce_v10_0_audio_enable(struct amdgpu_device *adev,
enable ? AZALIA_F0_CODEC_PIN_CONTROL_HOT_PLUG_CONTROL__AUDIO_ENABLED_MASK : 0);
}
-static const u32 pin_offsets[] =
-{
+static const u32 pin_offsets[] = {
AUD0_REGISTER_OFFSET,
AUD1_REGISTER_OFFSET,
AUD2_REGISTER_OFFSET,
@@ -1464,17 +1461,12 @@ static int dce_v10_0_audio_init(struct amdgpu_device *adev)
static void dce_v10_0_audio_fini(struct amdgpu_device *adev)
{
- int i;
-
if (!amdgpu_audio)
return;
if (!adev->mode_info.audio.enabled)
return;
- for (i = 0; i < adev->mode_info.audio.num_pins; i++)
- dce_v10_0_audio_enable(adev, &adev->mode_info.audio.pin[i], false);
-
adev->mode_info.audio.enabled = false;
}
@@ -1809,8 +1801,7 @@ static void dce_v10_0_afmt_fini(struct amdgpu_device *adev)
}
}
-static const u32 vga_control_regs[6] =
-{
+static const u32 vga_control_regs[6] = {
mmD1VGA_CONTROL,
mmD2VGA_CONTROL,
mmD3VGA_CONTROL,
@@ -1862,7 +1853,6 @@ static int dce_v10_0_crtc_do_set_base(struct drm_crtc *crtc,
u32 tmp, viewport_w, viewport_h;
int r;
bool bypass_lut = false;
- struct drm_format_name_buf format_name;
/* no fb bound */
if (!atomic && !crtc->primary->fb) {
@@ -1885,6 +1875,7 @@ static int dce_v10_0_crtc_do_set_base(struct drm_crtc *crtc,
return r;
if (!atomic) {
+ abo->flags |= AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS;
r = amdgpu_bo_pin(abo, AMDGPU_GEM_DOMAIN_VRAM);
if (unlikely(r != 0)) {
amdgpu_bo_unreserve(abo);
@@ -1981,8 +1972,8 @@ static int dce_v10_0_crtc_do_set_base(struct drm_crtc *crtc,
#endif
break;
default:
- DRM_ERROR("Unsupported screen format %s\n",
- drm_get_format_name(target_fb->format->format, &format_name));
+ DRM_ERROR("Unsupported screen format %p4cc\n",
+ &target_fb->format->format);
return -EINVAL;
}
@@ -2405,6 +2396,7 @@ static int dce_v10_0_crtc_cursor_set2(struct drm_crtc *crtc,
return ret;
}
+ aobj->flags |= AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS;
ret = amdgpu_bo_pin(aobj, AMDGPU_GEM_DOMAIN_VRAM);
amdgpu_bo_unreserve(aobj);
if (ret) {
@@ -2533,7 +2525,7 @@ static void dce_v10_0_crtc_dpms(struct drm_crtc *crtc, int mode)
break;
}
/* adjust pm to dpms */
- amdgpu_pm_compute_clocks(adev);
+ amdgpu_dpm_compute_clocks(adev);
}
static void dce_v10_0_crtc_prepare(struct drm_crtc *crtc)
@@ -2689,6 +2681,32 @@ static const struct drm_crtc_helper_funcs dce_v10_0_crtc_helper_funcs = {
.get_scanout_position = amdgpu_crtc_get_scanout_position,
};
+static void dce_v10_0_panic_flush(struct drm_plane *plane)
+{
+ struct drm_framebuffer *fb;
+ struct amdgpu_crtc *amdgpu_crtc;
+ struct amdgpu_device *adev;
+ uint32_t fb_format;
+
+ if (!plane->fb)
+ return;
+
+ fb = plane->fb;
+ amdgpu_crtc = to_amdgpu_crtc(plane->crtc);
+ adev = drm_to_adev(fb->dev);
+
+ /* Disable DC tiling */
+ fb_format = RREG32(mmGRPH_CONTROL + amdgpu_crtc->crtc_offset);
+ fb_format &= ~GRPH_CONTROL__GRPH_ARRAY_MODE_MASK;
+ WREG32(mmGRPH_CONTROL + amdgpu_crtc->crtc_offset, fb_format);
+
+}
+
+static const struct drm_plane_helper_funcs dce_v10_0_drm_primary_plane_helper_funcs = {
+ .get_scanout_buffer = amdgpu_display_get_scanout_buffer,
+ .panic_flush = dce_v10_0_panic_flush,
+};
+
static int dce_v10_0_crtc_init(struct amdgpu_device *adev, int index)
{
struct amdgpu_crtc *amdgpu_crtc;
@@ -2736,13 +2754,14 @@ static int dce_v10_0_crtc_init(struct amdgpu_device *adev, int index)
amdgpu_crtc->encoder = NULL;
amdgpu_crtc->connector = NULL;
drm_crtc_helper_add(&amdgpu_crtc->base, &dce_v10_0_crtc_helper_funcs);
+ drm_plane_helper_add(amdgpu_crtc->base.primary, &dce_v10_0_drm_primary_plane_helper_funcs);
return 0;
}
-static int dce_v10_0_early_init(void *handle)
+static int dce_v10_0_early_init(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
adev->audio_endpt_rreg = &dce_v10_0_audio_endpt_rreg;
adev->audio_endpt_wreg = &dce_v10_0_audio_endpt_wreg;
@@ -2767,10 +2786,10 @@ static int dce_v10_0_early_init(void *handle)
return 0;
}
-static int dce_v10_0_sw_init(void *handle)
+static int dce_v10_0_sw_init(struct amdgpu_ip_block *ip_block)
{
int r, i;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
for (i = 0; i < adev->mode_info.num_crtc; i++) {
r = amdgpu_irq_add_id(adev, AMDGPU_IRQ_CLIENTID_LEGACY, i + 1, &adev->crtc_irq);
@@ -2799,7 +2818,7 @@ static int dce_v10_0_sw_init(void *handle)
adev_to_drm(adev)->mode_config.preferred_depth = 24;
adev_to_drm(adev)->mode_config.prefer_shadow = 1;
- adev_to_drm(adev)->mode_config.fb_base = adev->gmc.aper_base;
+ adev_to_drm(adev)->mode_config.fb_modifiers_not_supported = true;
r = amdgpu_display_modeset_create_props(adev);
if (r)
@@ -2829,17 +2848,28 @@ static int dce_v10_0_sw_init(void *handle)
if (r)
return r;
+ /* Disable vblank IRQs aggressively for power-saving */
+ /* XXX: can this be enabled for DC? */
+ adev_to_drm(adev)->vblank_disable_immediate = true;
+
+ r = drm_vblank_init(adev_to_drm(adev), adev->mode_info.num_crtc);
+ if (r)
+ return r;
+
+ INIT_DELAYED_WORK(&adev->hotplug_work,
+ amdgpu_display_hotplug_work_func);
+
drm_kms_helper_poll_init(adev_to_drm(adev));
adev->mode_info.mode_config_initialized = true;
return 0;
}
-static int dce_v10_0_sw_fini(void *handle)
+static int dce_v10_0_sw_fini(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
- kfree(adev->mode_info.bios_hardcoded_edid);
+ drm_edid_free(adev->mode_info.bios_hardcoded_edid);
drm_kms_helper_poll_fini(adev_to_drm(adev));
@@ -2853,10 +2883,10 @@ static int dce_v10_0_sw_fini(void *handle)
return 0;
}
-static int dce_v10_0_hw_init(void *handle)
+static int dce_v10_0_hw_init(struct amdgpu_ip_block *ip_block)
{
int i;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
dce_v10_0_init_golden_registers(adev);
@@ -2878,10 +2908,10 @@ static int dce_v10_0_hw_init(void *handle)
return 0;
}
-static int dce_v10_0_hw_fini(void *handle)
+static int dce_v10_0_hw_fini(struct amdgpu_ip_block *ip_block)
{
int i;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
dce_v10_0_hpd_fini(adev);
@@ -2891,28 +2921,35 @@ static int dce_v10_0_hw_fini(void *handle)
dce_v10_0_pageflip_interrupt_fini(adev);
+ flush_delayed_work(&adev->hotplug_work);
+
return 0;
}
-static int dce_v10_0_suspend(void *handle)
+static int dce_v10_0_suspend(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
+ int r;
+
+ r = amdgpu_display_suspend_helper(adev);
+ if (r)
+ return r;
adev->mode_info.bl_level =
amdgpu_atombios_encoder_get_backlight_level_from_reg(adev);
- return dce_v10_0_hw_fini(handle);
+ return dce_v10_0_hw_fini(ip_block);
}
-static int dce_v10_0_resume(void *handle)
+static int dce_v10_0_resume(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
int ret;
amdgpu_atombios_encoder_set_backlight_level_to_reg(adev,
adev->mode_info.bl_level);
- ret = dce_v10_0_hw_init(handle);
+ ret = dce_v10_0_hw_init(ip_block);
/* turn on the BL */
if (adev->mode_info.bl_encoder) {
@@ -2921,31 +2958,28 @@ static int dce_v10_0_resume(void *handle)
amdgpu_display_backlight_set_level(adev, adev->mode_info.bl_encoder,
bl_level);
}
+ if (ret)
+ return ret;
- return ret;
+ return amdgpu_display_resume_helper(adev);
}
-static bool dce_v10_0_is_idle(void *handle)
+static bool dce_v10_0_is_idle(struct amdgpu_ip_block *ip_block)
{
return true;
}
-static int dce_v10_0_wait_for_idle(void *handle)
-{
- return 0;
-}
-
-static bool dce_v10_0_check_soft_reset(void *handle)
+static bool dce_v10_0_check_soft_reset(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
return dce_v10_0_is_display_hung(adev);
}
-static int dce_v10_0_soft_reset(void *handle)
+static int dce_v10_0_soft_reset(struct amdgpu_ip_block *ip_block)
{
u32 srbm_soft_reset = 0, tmp;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
if (dce_v10_0_is_display_hung(adev))
srbm_soft_reset |= SRBM_SOFT_RESET__SOFT_RESET_DC_MASK;
@@ -3035,7 +3069,7 @@ static int dce_v10_0_set_hpd_irq_state(struct amdgpu_device *adev,
u32 tmp;
if (hpd >= adev->mode_info.num_hpd) {
- DRM_DEBUG("invalid hdp %d\n", hpd);
+ DRM_DEBUG("invalid hpd %d\n", hpd);
return 0;
}
@@ -3187,7 +3221,7 @@ static void dce_v10_0_hpd_int_ack(struct amdgpu_device *adev,
u32 tmp;
if (hpd >= adev->mode_info.num_hpd) {
- DRM_DEBUG("invalid hdp %d\n", hpd);
+ DRM_DEBUG("invalid hpd %d\n", hpd);
return;
}
@@ -3282,20 +3316,20 @@ static int dce_v10_0_hpd_irq(struct amdgpu_device *adev,
if (disp_int & mask) {
dce_v10_0_hpd_int_ack(adev, hpd);
- schedule_work(&adev->hotplug_work);
+ schedule_delayed_work(&adev->hotplug_work, 0);
DRM_DEBUG("IH: HPD%d\n", hpd + 1);
}
return 0;
}
-static int dce_v10_0_set_clockgating_state(void *handle,
+static int dce_v10_0_set_clockgating_state(struct amdgpu_ip_block *ip_block,
enum amd_clockgating_state state)
{
return 0;
}
-static int dce_v10_0_set_powergating_state(void *handle,
+static int dce_v10_0_set_powergating_state(struct amdgpu_ip_block *ip_block,
enum amd_powergating_state state)
{
return 0;
@@ -3304,7 +3338,6 @@ static int dce_v10_0_set_powergating_state(void *handle,
static const struct amd_ip_funcs dce_v10_0_ip_funcs = {
.name = "dce_v10_0",
.early_init = dce_v10_0_early_init,
- .late_init = NULL,
.sw_init = dce_v10_0_sw_init,
.sw_fini = dce_v10_0_sw_fini,
.hw_init = dce_v10_0_hw_init,
@@ -3312,7 +3345,6 @@ static const struct amd_ip_funcs dce_v10_0_ip_funcs = {
.suspend = dce_v10_0_suspend,
.resume = dce_v10_0_resume,
.is_idle = dce_v10_0_is_idle,
- .wait_for_idle = dce_v10_0_wait_for_idle,
.check_soft_reset = dce_v10_0_check_soft_reset,
.soft_reset = dce_v10_0_soft_reset,
.set_clockgating_state = dce_v10_0_set_clockgating_state,
@@ -3630,8 +3662,7 @@ static void dce_v10_0_set_irq_funcs(struct amdgpu_device *adev)
adev->hpd_irq.funcs = &dce_v10_0_hpd_irq_funcs;
}
-const struct amdgpu_ip_block_version dce_v10_0_ip_block =
-{
+const struct amdgpu_ip_block_version dce_v10_0_ip_block = {
.type = AMD_IP_BLOCK_TYPE_DCE,
.major = 10,
.minor = 0,
@@ -3639,8 +3670,7 @@ const struct amdgpu_ip_block_version dce_v10_0_ip_block =
.funcs = &dce_v10_0_ip_funcs,
};
-const struct amdgpu_ip_block_version dce_v10_1_ip_block =
-{
+const struct amdgpu_ip_block_version dce_v10_1_ip_block = {
.type = AMD_IP_BLOCK_TYPE_DCE,
.major = 10,
.minor = 1,
diff --git a/drivers/gpu/drm/amd/amdgpu/dce_v11_0.c b/drivers/gpu/drm/amd/amdgpu/dce_v11_0.c
deleted file mode 100644
index 1b6ff0470011..000000000000
--- a/drivers/gpu/drm/amd/amdgpu/dce_v11_0.c
+++ /dev/null
@@ -1,3778 +0,0 @@
-/*
- * Copyright 2014 Advanced Micro Devices, Inc.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in
- * all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
- * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
- * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
- * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
- * OTHER DEALINGS IN THE SOFTWARE.
- *
- */
-
-#include <drm/drm_fourcc.h>
-#include <drm/drm_vblank.h>
-
-#include "amdgpu.h"
-#include "amdgpu_pm.h"
-#include "amdgpu_i2c.h"
-#include "vid.h"
-#include "atom.h"
-#include "amdgpu_atombios.h"
-#include "atombios_crtc.h"
-#include "atombios_encoders.h"
-#include "amdgpu_pll.h"
-#include "amdgpu_connectors.h"
-#include "amdgpu_display.h"
-#include "dce_v11_0.h"
-
-#include "dce/dce_11_0_d.h"
-#include "dce/dce_11_0_sh_mask.h"
-#include "dce/dce_11_0_enum.h"
-#include "oss/oss_3_0_d.h"
-#include "oss/oss_3_0_sh_mask.h"
-#include "gmc/gmc_8_1_d.h"
-#include "gmc/gmc_8_1_sh_mask.h"
-
-#include "ivsrcid/ivsrcid_vislands30.h"
-
-static void dce_v11_0_set_display_funcs(struct amdgpu_device *adev);
-static void dce_v11_0_set_irq_funcs(struct amdgpu_device *adev);
-
-static const u32 crtc_offsets[] =
-{
- CRTC0_REGISTER_OFFSET,
- CRTC1_REGISTER_OFFSET,
- CRTC2_REGISTER_OFFSET,
- CRTC3_REGISTER_OFFSET,
- CRTC4_REGISTER_OFFSET,
- CRTC5_REGISTER_OFFSET,
- CRTC6_REGISTER_OFFSET
-};
-
-static const u32 hpd_offsets[] =
-{
- HPD0_REGISTER_OFFSET,
- HPD1_REGISTER_OFFSET,
- HPD2_REGISTER_OFFSET,
- HPD3_REGISTER_OFFSET,
- HPD4_REGISTER_OFFSET,
- HPD5_REGISTER_OFFSET
-};
-
-static const uint32_t dig_offsets[] = {
- DIG0_REGISTER_OFFSET,
- DIG1_REGISTER_OFFSET,
- DIG2_REGISTER_OFFSET,
- DIG3_REGISTER_OFFSET,
- DIG4_REGISTER_OFFSET,
- DIG5_REGISTER_OFFSET,
- DIG6_REGISTER_OFFSET,
- DIG7_REGISTER_OFFSET,
- DIG8_REGISTER_OFFSET
-};
-
-static const struct {
- uint32_t reg;
- uint32_t vblank;
- uint32_t vline;
- uint32_t hpd;
-
-} interrupt_status_offsets[] = { {
- .reg = mmDISP_INTERRUPT_STATUS,
- .vblank = DISP_INTERRUPT_STATUS__LB_D1_VBLANK_INTERRUPT_MASK,
- .vline = DISP_INTERRUPT_STATUS__LB_D1_VLINE_INTERRUPT_MASK,
- .hpd = DISP_INTERRUPT_STATUS__DC_HPD1_INTERRUPT_MASK
-}, {
- .reg = mmDISP_INTERRUPT_STATUS_CONTINUE,
- .vblank = DISP_INTERRUPT_STATUS_CONTINUE__LB_D2_VBLANK_INTERRUPT_MASK,
- .vline = DISP_INTERRUPT_STATUS_CONTINUE__LB_D2_VLINE_INTERRUPT_MASK,
- .hpd = DISP_INTERRUPT_STATUS_CONTINUE__DC_HPD2_INTERRUPT_MASK
-}, {
- .reg = mmDISP_INTERRUPT_STATUS_CONTINUE2,
- .vblank = DISP_INTERRUPT_STATUS_CONTINUE2__LB_D3_VBLANK_INTERRUPT_MASK,
- .vline = DISP_INTERRUPT_STATUS_CONTINUE2__LB_D3_VLINE_INTERRUPT_MASK,
- .hpd = DISP_INTERRUPT_STATUS_CONTINUE2__DC_HPD3_INTERRUPT_MASK
-}, {
- .reg = mmDISP_INTERRUPT_STATUS_CONTINUE3,
- .vblank = DISP_INTERRUPT_STATUS_CONTINUE3__LB_D4_VBLANK_INTERRUPT_MASK,
- .vline = DISP_INTERRUPT_STATUS_CONTINUE3__LB_D4_VLINE_INTERRUPT_MASK,
- .hpd = DISP_INTERRUPT_STATUS_CONTINUE3__DC_HPD4_INTERRUPT_MASK
-}, {
- .reg = mmDISP_INTERRUPT_STATUS_CONTINUE4,
- .vblank = DISP_INTERRUPT_STATUS_CONTINUE4__LB_D5_VBLANK_INTERRUPT_MASK,
- .vline = DISP_INTERRUPT_STATUS_CONTINUE4__LB_D5_VLINE_INTERRUPT_MASK,
- .hpd = DISP_INTERRUPT_STATUS_CONTINUE4__DC_HPD5_INTERRUPT_MASK
-}, {
- .reg = mmDISP_INTERRUPT_STATUS_CONTINUE5,
- .vblank = DISP_INTERRUPT_STATUS_CONTINUE5__LB_D6_VBLANK_INTERRUPT_MASK,
- .vline = DISP_INTERRUPT_STATUS_CONTINUE5__LB_D6_VLINE_INTERRUPT_MASK,
- .hpd = DISP_INTERRUPT_STATUS_CONTINUE5__DC_HPD6_INTERRUPT_MASK
-} };
-
-static const u32 cz_golden_settings_a11[] =
-{
- mmCRTC_DOUBLE_BUFFER_CONTROL, 0x00010101, 0x00010000,
- mmFBC_MISC, 0x1f311fff, 0x14300000,
-};
-
-static const u32 cz_mgcg_cgcg_init[] =
-{
- mmXDMA_CLOCK_GATING_CNTL, 0xffffffff, 0x00000100,
- mmXDMA_MEM_POWER_CNTL, 0x00000101, 0x00000000,
-};
-
-static const u32 stoney_golden_settings_a11[] =
-{
- mmCRTC_DOUBLE_BUFFER_CONTROL, 0x00010101, 0x00010000,
- mmFBC_MISC, 0x1f311fff, 0x14302000,
-};
-
-static const u32 polaris11_golden_settings_a11[] =
-{
- mmDCI_CLK_CNTL, 0x00000080, 0x00000000,
- mmFBC_DEBUG_COMP, 0x000000f0, 0x00000070,
- mmFBC_DEBUG1, 0xffffffff, 0x00000008,
- mmFBC_MISC, 0x9f313fff, 0x14302008,
- mmHDMI_CONTROL, 0x313f031f, 0x00000011,
-};
-
-static const u32 polaris10_golden_settings_a11[] =
-{
- mmDCI_CLK_CNTL, 0x00000080, 0x00000000,
- mmFBC_DEBUG_COMP, 0x000000f0, 0x00000070,
- mmFBC_MISC, 0x9f313fff, 0x14302008,
- mmHDMI_CONTROL, 0x313f031f, 0x00000011,
-};
-
-static void dce_v11_0_init_golden_registers(struct amdgpu_device *adev)
-{
- switch (adev->asic_type) {
- case CHIP_CARRIZO:
- amdgpu_device_program_register_sequence(adev,
- cz_mgcg_cgcg_init,
- ARRAY_SIZE(cz_mgcg_cgcg_init));
- amdgpu_device_program_register_sequence(adev,
- cz_golden_settings_a11,
- ARRAY_SIZE(cz_golden_settings_a11));
- break;
- case CHIP_STONEY:
- amdgpu_device_program_register_sequence(adev,
- stoney_golden_settings_a11,
- ARRAY_SIZE(stoney_golden_settings_a11));
- break;
- case CHIP_POLARIS11:
- case CHIP_POLARIS12:
- amdgpu_device_program_register_sequence(adev,
- polaris11_golden_settings_a11,
- ARRAY_SIZE(polaris11_golden_settings_a11));
- break;
- case CHIP_POLARIS10:
- case CHIP_VEGAM:
- amdgpu_device_program_register_sequence(adev,
- polaris10_golden_settings_a11,
- ARRAY_SIZE(polaris10_golden_settings_a11));
- break;
- default:
- break;
- }
-}
-
-static u32 dce_v11_0_audio_endpt_rreg(struct amdgpu_device *adev,
- u32 block_offset, u32 reg)
-{
- unsigned long flags;
- u32 r;
-
- spin_lock_irqsave(&adev->audio_endpt_idx_lock, flags);
- WREG32(mmAZALIA_F0_CODEC_ENDPOINT_INDEX + block_offset, reg);
- r = RREG32(mmAZALIA_F0_CODEC_ENDPOINT_DATA + block_offset);
- spin_unlock_irqrestore(&adev->audio_endpt_idx_lock, flags);
-
- return r;
-}
-
-static void dce_v11_0_audio_endpt_wreg(struct amdgpu_device *adev,
- u32 block_offset, u32 reg, u32 v)
-{
- unsigned long flags;
-
- spin_lock_irqsave(&adev->audio_endpt_idx_lock, flags);
- WREG32(mmAZALIA_F0_CODEC_ENDPOINT_INDEX + block_offset, reg);
- WREG32(mmAZALIA_F0_CODEC_ENDPOINT_DATA + block_offset, v);
- spin_unlock_irqrestore(&adev->audio_endpt_idx_lock, flags);
-}
-
-static u32 dce_v11_0_vblank_get_counter(struct amdgpu_device *adev, int crtc)
-{
- if (crtc < 0 || crtc >= adev->mode_info.num_crtc)
- return 0;
- else
- return RREG32(mmCRTC_STATUS_FRAME_COUNT + crtc_offsets[crtc]);
-}
-
-static void dce_v11_0_pageflip_interrupt_init(struct amdgpu_device *adev)
-{
- unsigned i;
-
- /* Enable pflip interrupts */
- for (i = 0; i < adev->mode_info.num_crtc; i++)
- amdgpu_irq_get(adev, &adev->pageflip_irq, i);
-}
-
-static void dce_v11_0_pageflip_interrupt_fini(struct amdgpu_device *adev)
-{
- unsigned i;
-
- /* Disable pflip interrupts */
- for (i = 0; i < adev->mode_info.num_crtc; i++)
- amdgpu_irq_put(adev, &adev->pageflip_irq, i);
-}
-
-/**
- * dce_v11_0_page_flip - pageflip callback.
- *
- * @adev: amdgpu_device pointer
- * @crtc_id: crtc to cleanup pageflip on
- * @crtc_base: new address of the crtc (GPU MC address)
- * @async: asynchronous flip
- *
- * Triggers the actual pageflip by updating the primary
- * surface base address.
- */
-static void dce_v11_0_page_flip(struct amdgpu_device *adev,
- int crtc_id, u64 crtc_base, bool async)
-{
- struct amdgpu_crtc *amdgpu_crtc = adev->mode_info.crtcs[crtc_id];
- struct drm_framebuffer *fb = amdgpu_crtc->base.primary->fb;
- u32 tmp;
-
- /* flip immediate for async, default is vsync */
- tmp = RREG32(mmGRPH_FLIP_CONTROL + amdgpu_crtc->crtc_offset);
- tmp = REG_SET_FIELD(tmp, GRPH_FLIP_CONTROL,
- GRPH_SURFACE_UPDATE_IMMEDIATE_EN, async ? 1 : 0);
- WREG32(mmGRPH_FLIP_CONTROL + amdgpu_crtc->crtc_offset, tmp);
- /* update pitch */
- WREG32(mmGRPH_PITCH + amdgpu_crtc->crtc_offset,
- fb->pitches[0] / fb->format->cpp[0]);
- /* update the scanout addresses */
- WREG32(mmGRPH_PRIMARY_SURFACE_ADDRESS_HIGH + amdgpu_crtc->crtc_offset,
- upper_32_bits(crtc_base));
- /* writing to the low address triggers the update */
- WREG32(mmGRPH_PRIMARY_SURFACE_ADDRESS + amdgpu_crtc->crtc_offset,
- lower_32_bits(crtc_base));
- /* post the write */
- RREG32(mmGRPH_PRIMARY_SURFACE_ADDRESS + amdgpu_crtc->crtc_offset);
-}
-
-static int dce_v11_0_crtc_get_scanoutpos(struct amdgpu_device *adev, int crtc,
- u32 *vbl, u32 *position)
-{
- if ((crtc < 0) || (crtc >= adev->mode_info.num_crtc))
- return -EINVAL;
-
- *vbl = RREG32(mmCRTC_V_BLANK_START_END + crtc_offsets[crtc]);
- *position = RREG32(mmCRTC_STATUS_POSITION + crtc_offsets[crtc]);
-
- return 0;
-}
-
-/**
- * dce_v11_0_hpd_sense - hpd sense callback.
- *
- * @adev: amdgpu_device pointer
- * @hpd: hpd (hotplug detect) pin
- *
- * Checks if a digital monitor is connected (evergreen+).
- * Returns true if connected, false if not connected.
- */
-static bool dce_v11_0_hpd_sense(struct amdgpu_device *adev,
- enum amdgpu_hpd_id hpd)
-{
- bool connected = false;
-
- if (hpd >= adev->mode_info.num_hpd)
- return connected;
-
- if (RREG32(mmDC_HPD_INT_STATUS + hpd_offsets[hpd]) &
- DC_HPD_INT_STATUS__DC_HPD_SENSE_MASK)
- connected = true;
-
- return connected;
-}
-
-/**
- * dce_v11_0_hpd_set_polarity - hpd set polarity callback.
- *
- * @adev: amdgpu_device pointer
- * @hpd: hpd (hotplug detect) pin
- *
- * Set the polarity of the hpd pin (evergreen+).
- */
-static void dce_v11_0_hpd_set_polarity(struct amdgpu_device *adev,
- enum amdgpu_hpd_id hpd)
-{
- u32 tmp;
- bool connected = dce_v11_0_hpd_sense(adev, hpd);
-
- if (hpd >= adev->mode_info.num_hpd)
- return;
-
- tmp = RREG32(mmDC_HPD_INT_CONTROL + hpd_offsets[hpd]);
- if (connected)
- tmp = REG_SET_FIELD(tmp, DC_HPD_INT_CONTROL, DC_HPD_INT_POLARITY, 0);
- else
- tmp = REG_SET_FIELD(tmp, DC_HPD_INT_CONTROL, DC_HPD_INT_POLARITY, 1);
- WREG32(mmDC_HPD_INT_CONTROL + hpd_offsets[hpd], tmp);
-}
-
-/**
- * dce_v11_0_hpd_init - hpd setup callback.
- *
- * @adev: amdgpu_device pointer
- *
- * Setup the hpd pins used by the card (evergreen+).
- * Enable the pin, set the polarity, and enable the hpd interrupts.
- */
-static void dce_v11_0_hpd_init(struct amdgpu_device *adev)
-{
- struct drm_device *dev = adev_to_drm(adev);
- struct drm_connector *connector;
- struct drm_connector_list_iter iter;
- u32 tmp;
-
- drm_connector_list_iter_begin(dev, &iter);
- drm_for_each_connector_iter(connector, &iter) {
- struct amdgpu_connector *amdgpu_connector = to_amdgpu_connector(connector);
-
- if (amdgpu_connector->hpd.hpd >= adev->mode_info.num_hpd)
- continue;
-
- if (connector->connector_type == DRM_MODE_CONNECTOR_eDP ||
- connector->connector_type == DRM_MODE_CONNECTOR_LVDS) {
- /* don't try to enable hpd on eDP or LVDS avoid breaking the
- * aux dp channel on imac and help (but not completely fix)
- * https://bugzilla.redhat.com/show_bug.cgi?id=726143
- * also avoid interrupt storms during dpms.
- */
- tmp = RREG32(mmDC_HPD_INT_CONTROL + hpd_offsets[amdgpu_connector->hpd.hpd]);
- tmp = REG_SET_FIELD(tmp, DC_HPD_INT_CONTROL, DC_HPD_INT_EN, 0);
- WREG32(mmDC_HPD_INT_CONTROL + hpd_offsets[amdgpu_connector->hpd.hpd], tmp);
- continue;
- }
-
- tmp = RREG32(mmDC_HPD_CONTROL + hpd_offsets[amdgpu_connector->hpd.hpd]);
- tmp = REG_SET_FIELD(tmp, DC_HPD_CONTROL, DC_HPD_EN, 1);
- WREG32(mmDC_HPD_CONTROL + hpd_offsets[amdgpu_connector->hpd.hpd], tmp);
-
- tmp = RREG32(mmDC_HPD_TOGGLE_FILT_CNTL + hpd_offsets[amdgpu_connector->hpd.hpd]);
- tmp = REG_SET_FIELD(tmp, DC_HPD_TOGGLE_FILT_CNTL,
- DC_HPD_CONNECT_INT_DELAY,
- AMDGPU_HPD_CONNECT_INT_DELAY_IN_MS);
- tmp = REG_SET_FIELD(tmp, DC_HPD_TOGGLE_FILT_CNTL,
- DC_HPD_DISCONNECT_INT_DELAY,
- AMDGPU_HPD_DISCONNECT_INT_DELAY_IN_MS);
- WREG32(mmDC_HPD_TOGGLE_FILT_CNTL + hpd_offsets[amdgpu_connector->hpd.hpd], tmp);
-
- dce_v11_0_hpd_set_polarity(adev, amdgpu_connector->hpd.hpd);
- amdgpu_irq_get(adev, &adev->hpd_irq, amdgpu_connector->hpd.hpd);
- }
- drm_connector_list_iter_end(&iter);
-}
-
-/**
- * dce_v11_0_hpd_fini - hpd tear down callback.
- *
- * @adev: amdgpu_device pointer
- *
- * Tear down the hpd pins used by the card (evergreen+).
- * Disable the hpd interrupts.
- */
-static void dce_v11_0_hpd_fini(struct amdgpu_device *adev)
-{
- struct drm_device *dev = adev_to_drm(adev);
- struct drm_connector *connector;
- struct drm_connector_list_iter iter;
- u32 tmp;
-
- drm_connector_list_iter_begin(dev, &iter);
- drm_for_each_connector_iter(connector, &iter) {
- struct amdgpu_connector *amdgpu_connector = to_amdgpu_connector(connector);
-
- if (amdgpu_connector->hpd.hpd >= adev->mode_info.num_hpd)
- continue;
-
- tmp = RREG32(mmDC_HPD_CONTROL + hpd_offsets[amdgpu_connector->hpd.hpd]);
- tmp = REG_SET_FIELD(tmp, DC_HPD_CONTROL, DC_HPD_EN, 0);
- WREG32(mmDC_HPD_CONTROL + hpd_offsets[amdgpu_connector->hpd.hpd], tmp);
-
- amdgpu_irq_put(adev, &adev->hpd_irq, amdgpu_connector->hpd.hpd);
- }
- drm_connector_list_iter_end(&iter);
-}
-
-static u32 dce_v11_0_hpd_get_gpio_reg(struct amdgpu_device *adev)
-{
- return mmDC_GPIO_HPD_A;
-}
-
-static bool dce_v11_0_is_display_hung(struct amdgpu_device *adev)
-{
- u32 crtc_hung = 0;
- u32 crtc_status[6];
- u32 i, j, tmp;
-
- for (i = 0; i < adev->mode_info.num_crtc; i++) {
- tmp = RREG32(mmCRTC_CONTROL + crtc_offsets[i]);
- if (REG_GET_FIELD(tmp, CRTC_CONTROL, CRTC_MASTER_EN)) {
- crtc_status[i] = RREG32(mmCRTC_STATUS_HV_COUNT + crtc_offsets[i]);
- crtc_hung |= (1 << i);
- }
- }
-
- for (j = 0; j < 10; j++) {
- for (i = 0; i < adev->mode_info.num_crtc; i++) {
- if (crtc_hung & (1 << i)) {
- tmp = RREG32(mmCRTC_STATUS_HV_COUNT + crtc_offsets[i]);
- if (tmp != crtc_status[i])
- crtc_hung &= ~(1 << i);
- }
- }
- if (crtc_hung == 0)
- return false;
- udelay(100);
- }
-
- return true;
-}
-
-static void dce_v11_0_set_vga_render_state(struct amdgpu_device *adev,
- bool render)
-{
- u32 tmp;
-
- /* Lockout access through VGA aperture*/
- tmp = RREG32(mmVGA_HDP_CONTROL);
- if (render)
- tmp = REG_SET_FIELD(tmp, VGA_HDP_CONTROL, VGA_MEMORY_DISABLE, 0);
- else
- tmp = REG_SET_FIELD(tmp, VGA_HDP_CONTROL, VGA_MEMORY_DISABLE, 1);
- WREG32(mmVGA_HDP_CONTROL, tmp);
-
- /* disable VGA render */
- tmp = RREG32(mmVGA_RENDER_CONTROL);
- if (render)
- tmp = REG_SET_FIELD(tmp, VGA_RENDER_CONTROL, VGA_VSTATUS_CNTL, 1);
- else
- tmp = REG_SET_FIELD(tmp, VGA_RENDER_CONTROL, VGA_VSTATUS_CNTL, 0);
- WREG32(mmVGA_RENDER_CONTROL, tmp);
-}
-
-static int dce_v11_0_get_num_crtc (struct amdgpu_device *adev)
-{
- int num_crtc = 0;
-
- switch (adev->asic_type) {
- case CHIP_CARRIZO:
- num_crtc = 3;
- break;
- case CHIP_STONEY:
- num_crtc = 2;
- break;
- case CHIP_POLARIS10:
- case CHIP_VEGAM:
- num_crtc = 6;
- break;
- case CHIP_POLARIS11:
- case CHIP_POLARIS12:
- num_crtc = 5;
- break;
- default:
- num_crtc = 0;
- }
- return num_crtc;
-}
-
-void dce_v11_0_disable_dce(struct amdgpu_device *adev)
-{
- /*Disable VGA render and enabled crtc, if has DCE engine*/
- if (amdgpu_atombios_has_dce_engine_info(adev)) {
- u32 tmp;
- int crtc_enabled, i;
-
- dce_v11_0_set_vga_render_state(adev, false);
-
- /*Disable crtc*/
- for (i = 0; i < dce_v11_0_get_num_crtc(adev); i++) {
- crtc_enabled = REG_GET_FIELD(RREG32(mmCRTC_CONTROL + crtc_offsets[i]),
- CRTC_CONTROL, CRTC_MASTER_EN);
- if (crtc_enabled) {
- WREG32(mmCRTC_UPDATE_LOCK + crtc_offsets[i], 1);
- tmp = RREG32(mmCRTC_CONTROL + crtc_offsets[i]);
- tmp = REG_SET_FIELD(tmp, CRTC_CONTROL, CRTC_MASTER_EN, 0);
- WREG32(mmCRTC_CONTROL + crtc_offsets[i], tmp);
- WREG32(mmCRTC_UPDATE_LOCK + crtc_offsets[i], 0);
- }
- }
- }
-}
-
-static void dce_v11_0_program_fmt(struct drm_encoder *encoder)
-{
- struct drm_device *dev = encoder->dev;
- struct amdgpu_device *adev = drm_to_adev(dev);
- struct amdgpu_encoder *amdgpu_encoder = to_amdgpu_encoder(encoder);
- struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(encoder->crtc);
- struct drm_connector *connector = amdgpu_get_connector_for_encoder(encoder);
- int bpc = 0;
- u32 tmp = 0;
- enum amdgpu_connector_dither dither = AMDGPU_FMT_DITHER_DISABLE;
-
- if (connector) {
- struct amdgpu_connector *amdgpu_connector = to_amdgpu_connector(connector);
- bpc = amdgpu_connector_get_monitor_bpc(connector);
- dither = amdgpu_connector->dither;
- }
-
- /* LVDS/eDP FMT is set up by atom */
- if (amdgpu_encoder->devices & ATOM_DEVICE_LCD_SUPPORT)
- return;
-
- /* not needed for analog */
- if ((amdgpu_encoder->encoder_id == ENCODER_OBJECT_ID_INTERNAL_KLDSCP_DAC1) ||
- (amdgpu_encoder->encoder_id == ENCODER_OBJECT_ID_INTERNAL_KLDSCP_DAC2))
- return;
-
- if (bpc == 0)
- return;
-
- switch (bpc) {
- case 6:
- if (dither == AMDGPU_FMT_DITHER_ENABLE) {
- /* XXX sort out optimal dither settings */
- tmp = REG_SET_FIELD(tmp, FMT_BIT_DEPTH_CONTROL, FMT_FRAME_RANDOM_ENABLE, 1);
- tmp = REG_SET_FIELD(tmp, FMT_BIT_DEPTH_CONTROL, FMT_HIGHPASS_RANDOM_ENABLE, 1);
- tmp = REG_SET_FIELD(tmp, FMT_BIT_DEPTH_CONTROL, FMT_SPATIAL_DITHER_EN, 1);
- tmp = REG_SET_FIELD(tmp, FMT_BIT_DEPTH_CONTROL, FMT_SPATIAL_DITHER_DEPTH, 0);
- } else {
- tmp = REG_SET_FIELD(tmp, FMT_BIT_DEPTH_CONTROL, FMT_TRUNCATE_EN, 1);
- tmp = REG_SET_FIELD(tmp, FMT_BIT_DEPTH_CONTROL, FMT_TRUNCATE_DEPTH, 0);
- }
- break;
- case 8:
- if (dither == AMDGPU_FMT_DITHER_ENABLE) {
- /* XXX sort out optimal dither settings */
- tmp = REG_SET_FIELD(tmp, FMT_BIT_DEPTH_CONTROL, FMT_FRAME_RANDOM_ENABLE, 1);
- tmp = REG_SET_FIELD(tmp, FMT_BIT_DEPTH_CONTROL, FMT_HIGHPASS_RANDOM_ENABLE, 1);
- tmp = REG_SET_FIELD(tmp, FMT_BIT_DEPTH_CONTROL, FMT_RGB_RANDOM_ENABLE, 1);
- tmp = REG_SET_FIELD(tmp, FMT_BIT_DEPTH_CONTROL, FMT_SPATIAL_DITHER_EN, 1);
- tmp = REG_SET_FIELD(tmp, FMT_BIT_DEPTH_CONTROL, FMT_SPATIAL_DITHER_DEPTH, 1);
- } else {
- tmp = REG_SET_FIELD(tmp, FMT_BIT_DEPTH_CONTROL, FMT_TRUNCATE_EN, 1);
- tmp = REG_SET_FIELD(tmp, FMT_BIT_DEPTH_CONTROL, FMT_TRUNCATE_DEPTH, 1);
- }
- break;
- case 10:
- if (dither == AMDGPU_FMT_DITHER_ENABLE) {
- /* XXX sort out optimal dither settings */
- tmp = REG_SET_FIELD(tmp, FMT_BIT_DEPTH_CONTROL, FMT_FRAME_RANDOM_ENABLE, 1);
- tmp = REG_SET_FIELD(tmp, FMT_BIT_DEPTH_CONTROL, FMT_HIGHPASS_RANDOM_ENABLE, 1);
- tmp = REG_SET_FIELD(tmp, FMT_BIT_DEPTH_CONTROL, FMT_RGB_RANDOM_ENABLE, 1);
- tmp = REG_SET_FIELD(tmp, FMT_BIT_DEPTH_CONTROL, FMT_SPATIAL_DITHER_EN, 1);
- tmp = REG_SET_FIELD(tmp, FMT_BIT_DEPTH_CONTROL, FMT_SPATIAL_DITHER_DEPTH, 2);
- } else {
- tmp = REG_SET_FIELD(tmp, FMT_BIT_DEPTH_CONTROL, FMT_TRUNCATE_EN, 1);
- tmp = REG_SET_FIELD(tmp, FMT_BIT_DEPTH_CONTROL, FMT_TRUNCATE_DEPTH, 2);
- }
- break;
- default:
- /* not needed */
- break;
- }
-
- WREG32(mmFMT_BIT_DEPTH_CONTROL + amdgpu_crtc->crtc_offset, tmp);
-}
-
-
-/* display watermark setup */
-/**
- * dce_v11_0_line_buffer_adjust - Set up the line buffer
- *
- * @adev: amdgpu_device pointer
- * @amdgpu_crtc: the selected display controller
- * @mode: the current display mode on the selected display
- * controller
- *
- * Setup up the line buffer allocation for
- * the selected display controller (CIK).
- * Returns the line buffer size in pixels.
- */
-static u32 dce_v11_0_line_buffer_adjust(struct amdgpu_device *adev,
- struct amdgpu_crtc *amdgpu_crtc,
- struct drm_display_mode *mode)
-{
- u32 tmp, buffer_alloc, i, mem_cfg;
- u32 pipe_offset = amdgpu_crtc->crtc_id;
- /*
- * Line Buffer Setup
- * There are 6 line buffers, one for each display controllers.
- * There are 3 partitions per LB. Select the number of partitions
- * to enable based on the display width. For display widths larger
- * than 4096, you need use to use 2 display controllers and combine
- * them using the stereo blender.
- */
- if (amdgpu_crtc->base.enabled && mode) {
- if (mode->crtc_hdisplay < 1920) {
- mem_cfg = 1;
- buffer_alloc = 2;
- } else if (mode->crtc_hdisplay < 2560) {
- mem_cfg = 2;
- buffer_alloc = 2;
- } else if (mode->crtc_hdisplay < 4096) {
- mem_cfg = 0;
- buffer_alloc = (adev->flags & AMD_IS_APU) ? 2 : 4;
- } else {
- DRM_DEBUG_KMS("Mode too big for LB!\n");
- mem_cfg = 0;
- buffer_alloc = (adev->flags & AMD_IS_APU) ? 2 : 4;
- }
- } else {
- mem_cfg = 1;
- buffer_alloc = 0;
- }
-
- tmp = RREG32(mmLB_MEMORY_CTRL + amdgpu_crtc->crtc_offset);
- tmp = REG_SET_FIELD(tmp, LB_MEMORY_CTRL, LB_MEMORY_CONFIG, mem_cfg);
- WREG32(mmLB_MEMORY_CTRL + amdgpu_crtc->crtc_offset, tmp);
-
- tmp = RREG32(mmPIPE0_DMIF_BUFFER_CONTROL + pipe_offset);
- tmp = REG_SET_FIELD(tmp, PIPE0_DMIF_BUFFER_CONTROL, DMIF_BUFFERS_ALLOCATED, buffer_alloc);
- WREG32(mmPIPE0_DMIF_BUFFER_CONTROL + pipe_offset, tmp);
-
- for (i = 0; i < adev->usec_timeout; i++) {
- tmp = RREG32(mmPIPE0_DMIF_BUFFER_CONTROL + pipe_offset);
- if (REG_GET_FIELD(tmp, PIPE0_DMIF_BUFFER_CONTROL, DMIF_BUFFERS_ALLOCATION_COMPLETED))
- break;
- udelay(1);
- }
-
- if (amdgpu_crtc->base.enabled && mode) {
- switch (mem_cfg) {
- case 0:
- default:
- return 4096 * 2;
- case 1:
- return 1920 * 2;
- case 2:
- return 2560 * 2;
- }
- }
-
- /* controller not enabled, so no lb used */
- return 0;
-}
-
-/**
- * cik_get_number_of_dram_channels - get the number of dram channels
- *
- * @adev: amdgpu_device pointer
- *
- * Look up the number of video ram channels (CIK).
- * Used for display watermark bandwidth calculations
- * Returns the number of dram channels
- */
-static u32 cik_get_number_of_dram_channels(struct amdgpu_device *adev)
-{
- u32 tmp = RREG32(mmMC_SHARED_CHMAP);
-
- switch (REG_GET_FIELD(tmp, MC_SHARED_CHMAP, NOOFCHAN)) {
- case 0:
- default:
- return 1;
- case 1:
- return 2;
- case 2:
- return 4;
- case 3:
- return 8;
- case 4:
- return 3;
- case 5:
- return 6;
- case 6:
- return 10;
- case 7:
- return 12;
- case 8:
- return 16;
- }
-}
-
-struct dce10_wm_params {
- u32 dram_channels; /* number of dram channels */
- u32 yclk; /* bandwidth per dram data pin in kHz */
- u32 sclk; /* engine clock in kHz */
- u32 disp_clk; /* display clock in kHz */
- u32 src_width; /* viewport width */
- u32 active_time; /* active display time in ns */
- u32 blank_time; /* blank time in ns */
- bool interlaced; /* mode is interlaced */
- fixed20_12 vsc; /* vertical scale ratio */
- u32 num_heads; /* number of active crtcs */
- u32 bytes_per_pixel; /* bytes per pixel display + overlay */
- u32 lb_size; /* line buffer allocated to pipe */
- u32 vtaps; /* vertical scaler taps */
-};
-
-/**
- * dce_v11_0_dram_bandwidth - get the dram bandwidth
- *
- * @wm: watermark calculation data
- *
- * Calculate the raw dram bandwidth (CIK).
- * Used for display watermark bandwidth calculations
- * Returns the dram bandwidth in MBytes/s
- */
-static u32 dce_v11_0_dram_bandwidth(struct dce10_wm_params *wm)
-{
- /* Calculate raw DRAM Bandwidth */
- fixed20_12 dram_efficiency; /* 0.7 */
- fixed20_12 yclk, dram_channels, bandwidth;
- fixed20_12 a;
-
- a.full = dfixed_const(1000);
- yclk.full = dfixed_const(wm->yclk);
- yclk.full = dfixed_div(yclk, a);
- dram_channels.full = dfixed_const(wm->dram_channels * 4);
- a.full = dfixed_const(10);
- dram_efficiency.full = dfixed_const(7);
- dram_efficiency.full = dfixed_div(dram_efficiency, a);
- bandwidth.full = dfixed_mul(dram_channels, yclk);
- bandwidth.full = dfixed_mul(bandwidth, dram_efficiency);
-
- return dfixed_trunc(bandwidth);
-}
-
-/**
- * dce_v11_0_dram_bandwidth_for_display - get the dram bandwidth for display
- *
- * @wm: watermark calculation data
- *
- * Calculate the dram bandwidth used for display (CIK).
- * Used for display watermark bandwidth calculations
- * Returns the dram bandwidth for display in MBytes/s
- */
-static u32 dce_v11_0_dram_bandwidth_for_display(struct dce10_wm_params *wm)
-{
- /* Calculate DRAM Bandwidth and the part allocated to display. */
- fixed20_12 disp_dram_allocation; /* 0.3 to 0.7 */
- fixed20_12 yclk, dram_channels, bandwidth;
- fixed20_12 a;
-
- a.full = dfixed_const(1000);
- yclk.full = dfixed_const(wm->yclk);
- yclk.full = dfixed_div(yclk, a);
- dram_channels.full = dfixed_const(wm->dram_channels * 4);
- a.full = dfixed_const(10);
- disp_dram_allocation.full = dfixed_const(3); /* XXX worse case value 0.3 */
- disp_dram_allocation.full = dfixed_div(disp_dram_allocation, a);
- bandwidth.full = dfixed_mul(dram_channels, yclk);
- bandwidth.full = dfixed_mul(bandwidth, disp_dram_allocation);
-
- return dfixed_trunc(bandwidth);
-}
-
-/**
- * dce_v11_0_data_return_bandwidth - get the data return bandwidth
- *
- * @wm: watermark calculation data
- *
- * Calculate the data return bandwidth used for display (CIK).
- * Used for display watermark bandwidth calculations
- * Returns the data return bandwidth in MBytes/s
- */
-static u32 dce_v11_0_data_return_bandwidth(struct dce10_wm_params *wm)
-{
- /* Calculate the display Data return Bandwidth */
- fixed20_12 return_efficiency; /* 0.8 */
- fixed20_12 sclk, bandwidth;
- fixed20_12 a;
-
- a.full = dfixed_const(1000);
- sclk.full = dfixed_const(wm->sclk);
- sclk.full = dfixed_div(sclk, a);
- a.full = dfixed_const(10);
- return_efficiency.full = dfixed_const(8);
- return_efficiency.full = dfixed_div(return_efficiency, a);
- a.full = dfixed_const(32);
- bandwidth.full = dfixed_mul(a, sclk);
- bandwidth.full = dfixed_mul(bandwidth, return_efficiency);
-
- return dfixed_trunc(bandwidth);
-}
-
-/**
- * dce_v11_0_dmif_request_bandwidth - get the dmif bandwidth
- *
- * @wm: watermark calculation data
- *
- * Calculate the dmif bandwidth used for display (CIK).
- * Used for display watermark bandwidth calculations
- * Returns the dmif bandwidth in MBytes/s
- */
-static u32 dce_v11_0_dmif_request_bandwidth(struct dce10_wm_params *wm)
-{
- /* Calculate the DMIF Request Bandwidth */
- fixed20_12 disp_clk_request_efficiency; /* 0.8 */
- fixed20_12 disp_clk, bandwidth;
- fixed20_12 a, b;
-
- a.full = dfixed_const(1000);
- disp_clk.full = dfixed_const(wm->disp_clk);
- disp_clk.full = dfixed_div(disp_clk, a);
- a.full = dfixed_const(32);
- b.full = dfixed_mul(a, disp_clk);
-
- a.full = dfixed_const(10);
- disp_clk_request_efficiency.full = dfixed_const(8);
- disp_clk_request_efficiency.full = dfixed_div(disp_clk_request_efficiency, a);
-
- bandwidth.full = dfixed_mul(b, disp_clk_request_efficiency);
-
- return dfixed_trunc(bandwidth);
-}
-
-/**
- * dce_v11_0_available_bandwidth - get the min available bandwidth
- *
- * @wm: watermark calculation data
- *
- * Calculate the min available bandwidth used for display (CIK).
- * Used for display watermark bandwidth calculations
- * Returns the min available bandwidth in MBytes/s
- */
-static u32 dce_v11_0_available_bandwidth(struct dce10_wm_params *wm)
-{
- /* Calculate the Available bandwidth. Display can use this temporarily but not in average. */
- u32 dram_bandwidth = dce_v11_0_dram_bandwidth(wm);
- u32 data_return_bandwidth = dce_v11_0_data_return_bandwidth(wm);
- u32 dmif_req_bandwidth = dce_v11_0_dmif_request_bandwidth(wm);
-
- return min(dram_bandwidth, min(data_return_bandwidth, dmif_req_bandwidth));
-}
-
-/**
- * dce_v11_0_average_bandwidth - get the average available bandwidth
- *
- * @wm: watermark calculation data
- *
- * Calculate the average available bandwidth used for display (CIK).
- * Used for display watermark bandwidth calculations
- * Returns the average available bandwidth in MBytes/s
- */
-static u32 dce_v11_0_average_bandwidth(struct dce10_wm_params *wm)
-{
- /* Calculate the display mode Average Bandwidth
- * DisplayMode should contain the source and destination dimensions,
- * timing, etc.
- */
- fixed20_12 bpp;
- fixed20_12 line_time;
- fixed20_12 src_width;
- fixed20_12 bandwidth;
- fixed20_12 a;
-
- a.full = dfixed_const(1000);
- line_time.full = dfixed_const(wm->active_time + wm->blank_time);
- line_time.full = dfixed_div(line_time, a);
- bpp.full = dfixed_const(wm->bytes_per_pixel);
- src_width.full = dfixed_const(wm->src_width);
- bandwidth.full = dfixed_mul(src_width, bpp);
- bandwidth.full = dfixed_mul(bandwidth, wm->vsc);
- bandwidth.full = dfixed_div(bandwidth, line_time);
-
- return dfixed_trunc(bandwidth);
-}
-
-/**
- * dce_v11_0_latency_watermark - get the latency watermark
- *
- * @wm: watermark calculation data
- *
- * Calculate the latency watermark (CIK).
- * Used for display watermark bandwidth calculations
- * Returns the latency watermark in ns
- */
-static u32 dce_v11_0_latency_watermark(struct dce10_wm_params *wm)
-{
- /* First calculate the latency in ns */
- u32 mc_latency = 2000; /* 2000 ns. */
- u32 available_bandwidth = dce_v11_0_available_bandwidth(wm);
- u32 worst_chunk_return_time = (512 * 8 * 1000) / available_bandwidth;
- u32 cursor_line_pair_return_time = (128 * 4 * 1000) / available_bandwidth;
- u32 dc_latency = 40000000 / wm->disp_clk; /* dc pipe latency */
- u32 other_heads_data_return_time = ((wm->num_heads + 1) * worst_chunk_return_time) +
- (wm->num_heads * cursor_line_pair_return_time);
- u32 latency = mc_latency + other_heads_data_return_time + dc_latency;
- u32 max_src_lines_per_dst_line, lb_fill_bw, line_fill_time;
- u32 tmp, dmif_size = 12288;
- fixed20_12 a, b, c;
-
- if (wm->num_heads == 0)
- return 0;
-
- a.full = dfixed_const(2);
- b.full = dfixed_const(1);
- if ((wm->vsc.full > a.full) ||
- ((wm->vsc.full > b.full) && (wm->vtaps >= 3)) ||
- (wm->vtaps >= 5) ||
- ((wm->vsc.full >= a.full) && wm->interlaced))
- max_src_lines_per_dst_line = 4;
- else
- max_src_lines_per_dst_line = 2;
-
- a.full = dfixed_const(available_bandwidth);
- b.full = dfixed_const(wm->num_heads);
- a.full = dfixed_div(a, b);
- tmp = div_u64((u64) dmif_size * (u64) wm->disp_clk, mc_latency + 512);
- tmp = min(dfixed_trunc(a), tmp);
-
- lb_fill_bw = min(tmp, wm->disp_clk * wm->bytes_per_pixel / 1000);
-
- a.full = dfixed_const(max_src_lines_per_dst_line * wm->src_width * wm->bytes_per_pixel);
- b.full = dfixed_const(1000);
- c.full = dfixed_const(lb_fill_bw);
- b.full = dfixed_div(c, b);
- a.full = dfixed_div(a, b);
- line_fill_time = dfixed_trunc(a);
-
- if (line_fill_time < wm->active_time)
- return latency;
- else
- return latency + (line_fill_time - wm->active_time);
-
-}
-
-/**
- * dce_v11_0_average_bandwidth_vs_dram_bandwidth_for_display - check
- * average and available dram bandwidth
- *
- * @wm: watermark calculation data
- *
- * Check if the display average bandwidth fits in the display
- * dram bandwidth (CIK).
- * Used for display watermark bandwidth calculations
- * Returns true if the display fits, false if not.
- */
-static bool dce_v11_0_average_bandwidth_vs_dram_bandwidth_for_display(struct dce10_wm_params *wm)
-{
- if (dce_v11_0_average_bandwidth(wm) <=
- (dce_v11_0_dram_bandwidth_for_display(wm) / wm->num_heads))
- return true;
- else
- return false;
-}
-
-/**
- * dce_v11_0_average_bandwidth_vs_available_bandwidth - check
- * average and available bandwidth
- *
- * @wm: watermark calculation data
- *
- * Check if the display average bandwidth fits in the display
- * available bandwidth (CIK).
- * Used for display watermark bandwidth calculations
- * Returns true if the display fits, false if not.
- */
-static bool dce_v11_0_average_bandwidth_vs_available_bandwidth(struct dce10_wm_params *wm)
-{
- if (dce_v11_0_average_bandwidth(wm) <=
- (dce_v11_0_available_bandwidth(wm) / wm->num_heads))
- return true;
- else
- return false;
-}
-
-/**
- * dce_v11_0_check_latency_hiding - check latency hiding
- *
- * @wm: watermark calculation data
- *
- * Check latency hiding (CIK).
- * Used for display watermark bandwidth calculations
- * Returns true if the display fits, false if not.
- */
-static bool dce_v11_0_check_latency_hiding(struct dce10_wm_params *wm)
-{
- u32 lb_partitions = wm->lb_size / wm->src_width;
- u32 line_time = wm->active_time + wm->blank_time;
- u32 latency_tolerant_lines;
- u32 latency_hiding;
- fixed20_12 a;
-
- a.full = dfixed_const(1);
- if (wm->vsc.full > a.full)
- latency_tolerant_lines = 1;
- else {
- if (lb_partitions <= (wm->vtaps + 1))
- latency_tolerant_lines = 1;
- else
- latency_tolerant_lines = 2;
- }
-
- latency_hiding = (latency_tolerant_lines * line_time + wm->blank_time);
-
- if (dce_v11_0_latency_watermark(wm) <= latency_hiding)
- return true;
- else
- return false;
-}
-
-/**
- * dce_v11_0_program_watermarks - program display watermarks
- *
- * @adev: amdgpu_device pointer
- * @amdgpu_crtc: the selected display controller
- * @lb_size: line buffer size
- * @num_heads: number of display controllers in use
- *
- * Calculate and program the display watermarks for the
- * selected display controller (CIK).
- */
-static void dce_v11_0_program_watermarks(struct amdgpu_device *adev,
- struct amdgpu_crtc *amdgpu_crtc,
- u32 lb_size, u32 num_heads)
-{
- struct drm_display_mode *mode = &amdgpu_crtc->base.mode;
- struct dce10_wm_params wm_low, wm_high;
- u32 active_time;
- u32 line_time = 0;
- u32 latency_watermark_a = 0, latency_watermark_b = 0;
- u32 tmp, wm_mask, lb_vblank_lead_lines = 0;
-
- if (amdgpu_crtc->base.enabled && num_heads && mode) {
- active_time = (u32) div_u64((u64)mode->crtc_hdisplay * 1000000,
- (u32)mode->clock);
- line_time = (u32) div_u64((u64)mode->crtc_htotal * 1000000,
- (u32)mode->clock);
- line_time = min(line_time, (u32)65535);
-
- /* watermark for high clocks */
- if (adev->pm.dpm_enabled) {
- wm_high.yclk =
- amdgpu_dpm_get_mclk(adev, false) * 10;
- wm_high.sclk =
- amdgpu_dpm_get_sclk(adev, false) * 10;
- } else {
- wm_high.yclk = adev->pm.current_mclk * 10;
- wm_high.sclk = adev->pm.current_sclk * 10;
- }
-
- wm_high.disp_clk = mode->clock;
- wm_high.src_width = mode->crtc_hdisplay;
- wm_high.active_time = active_time;
- wm_high.blank_time = line_time - wm_high.active_time;
- wm_high.interlaced = false;
- if (mode->flags & DRM_MODE_FLAG_INTERLACE)
- wm_high.interlaced = true;
- wm_high.vsc = amdgpu_crtc->vsc;
- wm_high.vtaps = 1;
- if (amdgpu_crtc->rmx_type != RMX_OFF)
- wm_high.vtaps = 2;
- wm_high.bytes_per_pixel = 4; /* XXX: get this from fb config */
- wm_high.lb_size = lb_size;
- wm_high.dram_channels = cik_get_number_of_dram_channels(adev);
- wm_high.num_heads = num_heads;
-
- /* set for high clocks */
- latency_watermark_a = min(dce_v11_0_latency_watermark(&wm_high), (u32)65535);
-
- /* possibly force display priority to high */
- /* should really do this at mode validation time... */
- if (!dce_v11_0_average_bandwidth_vs_dram_bandwidth_for_display(&wm_high) ||
- !dce_v11_0_average_bandwidth_vs_available_bandwidth(&wm_high) ||
- !dce_v11_0_check_latency_hiding(&wm_high) ||
- (adev->mode_info.disp_priority == 2)) {
- DRM_DEBUG_KMS("force priority to high\n");
- }
-
- /* watermark for low clocks */
- if (adev->pm.dpm_enabled) {
- wm_low.yclk =
- amdgpu_dpm_get_mclk(adev, true) * 10;
- wm_low.sclk =
- amdgpu_dpm_get_sclk(adev, true) * 10;
- } else {
- wm_low.yclk = adev->pm.current_mclk * 10;
- wm_low.sclk = adev->pm.current_sclk * 10;
- }
-
- wm_low.disp_clk = mode->clock;
- wm_low.src_width = mode->crtc_hdisplay;
- wm_low.active_time = active_time;
- wm_low.blank_time = line_time - wm_low.active_time;
- wm_low.interlaced = false;
- if (mode->flags & DRM_MODE_FLAG_INTERLACE)
- wm_low.interlaced = true;
- wm_low.vsc = amdgpu_crtc->vsc;
- wm_low.vtaps = 1;
- if (amdgpu_crtc->rmx_type != RMX_OFF)
- wm_low.vtaps = 2;
- wm_low.bytes_per_pixel = 4; /* XXX: get this from fb config */
- wm_low.lb_size = lb_size;
- wm_low.dram_channels = cik_get_number_of_dram_channels(adev);
- wm_low.num_heads = num_heads;
-
- /* set for low clocks */
- latency_watermark_b = min(dce_v11_0_latency_watermark(&wm_low), (u32)65535);
-
- /* possibly force display priority to high */
- /* should really do this at mode validation time... */
- if (!dce_v11_0_average_bandwidth_vs_dram_bandwidth_for_display(&wm_low) ||
- !dce_v11_0_average_bandwidth_vs_available_bandwidth(&wm_low) ||
- !dce_v11_0_check_latency_hiding(&wm_low) ||
- (adev->mode_info.disp_priority == 2)) {
- DRM_DEBUG_KMS("force priority to high\n");
- }
- lb_vblank_lead_lines = DIV_ROUND_UP(lb_size, mode->crtc_hdisplay);
- }
-
- /* select wm A */
- wm_mask = RREG32(mmDPG_WATERMARK_MASK_CONTROL + amdgpu_crtc->crtc_offset);
- tmp = REG_SET_FIELD(wm_mask, DPG_WATERMARK_MASK_CONTROL, URGENCY_WATERMARK_MASK, 1);
- WREG32(mmDPG_WATERMARK_MASK_CONTROL + amdgpu_crtc->crtc_offset, tmp);
- tmp = RREG32(mmDPG_PIPE_URGENCY_CONTROL + amdgpu_crtc->crtc_offset);
- tmp = REG_SET_FIELD(tmp, DPG_PIPE_URGENCY_CONTROL, URGENCY_LOW_WATERMARK, latency_watermark_a);
- tmp = REG_SET_FIELD(tmp, DPG_PIPE_URGENCY_CONTROL, URGENCY_HIGH_WATERMARK, line_time);
- WREG32(mmDPG_PIPE_URGENCY_CONTROL + amdgpu_crtc->crtc_offset, tmp);
- /* select wm B */
- tmp = REG_SET_FIELD(wm_mask, DPG_WATERMARK_MASK_CONTROL, URGENCY_WATERMARK_MASK, 2);
- WREG32(mmDPG_WATERMARK_MASK_CONTROL + amdgpu_crtc->crtc_offset, tmp);
- tmp = RREG32(mmDPG_PIPE_URGENCY_CONTROL + amdgpu_crtc->crtc_offset);
- tmp = REG_SET_FIELD(tmp, DPG_PIPE_URGENCY_CONTROL, URGENCY_LOW_WATERMARK, latency_watermark_b);
- tmp = REG_SET_FIELD(tmp, DPG_PIPE_URGENCY_CONTROL, URGENCY_HIGH_WATERMARK, line_time);
- WREG32(mmDPG_PIPE_URGENCY_CONTROL + amdgpu_crtc->crtc_offset, tmp);
- /* restore original selection */
- WREG32(mmDPG_WATERMARK_MASK_CONTROL + amdgpu_crtc->crtc_offset, wm_mask);
-
- /* save values for DPM */
- amdgpu_crtc->line_time = line_time;
- amdgpu_crtc->wm_high = latency_watermark_a;
- amdgpu_crtc->wm_low = latency_watermark_b;
- /* Save number of lines the linebuffer leads before the scanout */
- amdgpu_crtc->lb_vblank_lead_lines = lb_vblank_lead_lines;
-}
-
-/**
- * dce_v11_0_bandwidth_update - program display watermarks
- *
- * @adev: amdgpu_device pointer
- *
- * Calculate and program the display watermarks and line
- * buffer allocation (CIK).
- */
-static void dce_v11_0_bandwidth_update(struct amdgpu_device *adev)
-{
- struct drm_display_mode *mode = NULL;
- u32 num_heads = 0, lb_size;
- int i;
-
- amdgpu_display_update_priority(adev);
-
- for (i = 0; i < adev->mode_info.num_crtc; i++) {
- if (adev->mode_info.crtcs[i]->base.enabled)
- num_heads++;
- }
- for (i = 0; i < adev->mode_info.num_crtc; i++) {
- mode = &adev->mode_info.crtcs[i]->base.mode;
- lb_size = dce_v11_0_line_buffer_adjust(adev, adev->mode_info.crtcs[i], mode);
- dce_v11_0_program_watermarks(adev, adev->mode_info.crtcs[i],
- lb_size, num_heads);
- }
-}
-
-static void dce_v11_0_audio_get_connected_pins(struct amdgpu_device *adev)
-{
- int i;
- u32 offset, tmp;
-
- for (i = 0; i < adev->mode_info.audio.num_pins; i++) {
- offset = adev->mode_info.audio.pin[i].offset;
- tmp = RREG32_AUDIO_ENDPT(offset,
- ixAZALIA_F0_CODEC_PIN_CONTROL_RESPONSE_CONFIGURATION_DEFAULT);
- if (((tmp &
- AZALIA_F0_CODEC_PIN_CONTROL_RESPONSE_CONFIGURATION_DEFAULT__PORT_CONNECTIVITY_MASK) >>
- AZALIA_F0_CODEC_PIN_CONTROL_RESPONSE_CONFIGURATION_DEFAULT__PORT_CONNECTIVITY__SHIFT) == 1)
- adev->mode_info.audio.pin[i].connected = false;
- else
- adev->mode_info.audio.pin[i].connected = true;
- }
-}
-
-static struct amdgpu_audio_pin *dce_v11_0_audio_get_pin(struct amdgpu_device *adev)
-{
- int i;
-
- dce_v11_0_audio_get_connected_pins(adev);
-
- for (i = 0; i < adev->mode_info.audio.num_pins; i++) {
- if (adev->mode_info.audio.pin[i].connected)
- return &adev->mode_info.audio.pin[i];
- }
- DRM_ERROR("No connected audio pins found!\n");
- return NULL;
-}
-
-static void dce_v11_0_afmt_audio_select_pin(struct drm_encoder *encoder)
-{
- struct amdgpu_device *adev = drm_to_adev(encoder->dev);
- struct amdgpu_encoder *amdgpu_encoder = to_amdgpu_encoder(encoder);
- struct amdgpu_encoder_atom_dig *dig = amdgpu_encoder->enc_priv;
- u32 tmp;
-
- if (!dig || !dig->afmt || !dig->afmt->pin)
- return;
-
- tmp = RREG32(mmAFMT_AUDIO_SRC_CONTROL + dig->afmt->offset);
- tmp = REG_SET_FIELD(tmp, AFMT_AUDIO_SRC_CONTROL, AFMT_AUDIO_SRC_SELECT, dig->afmt->pin->id);
- WREG32(mmAFMT_AUDIO_SRC_CONTROL + dig->afmt->offset, tmp);
-}
-
-static void dce_v11_0_audio_write_latency_fields(struct drm_encoder *encoder,
- struct drm_display_mode *mode)
-{
- struct drm_device *dev = encoder->dev;
- struct amdgpu_device *adev = drm_to_adev(dev);
- struct amdgpu_encoder *amdgpu_encoder = to_amdgpu_encoder(encoder);
- struct amdgpu_encoder_atom_dig *dig = amdgpu_encoder->enc_priv;
- struct drm_connector *connector;
- struct drm_connector_list_iter iter;
- struct amdgpu_connector *amdgpu_connector = NULL;
- u32 tmp;
- int interlace = 0;
-
- if (!dig || !dig->afmt || !dig->afmt->pin)
- return;
-
- drm_connector_list_iter_begin(dev, &iter);
- drm_for_each_connector_iter(connector, &iter) {
- if (connector->encoder == encoder) {
- amdgpu_connector = to_amdgpu_connector(connector);
- break;
- }
- }
- drm_connector_list_iter_end(&iter);
-
- if (!amdgpu_connector) {
- DRM_ERROR("Couldn't find encoder's connector\n");
- return;
- }
-
- if (mode->flags & DRM_MODE_FLAG_INTERLACE)
- interlace = 1;
- if (connector->latency_present[interlace]) {
- tmp = REG_SET_FIELD(0, AZALIA_F0_CODEC_PIN_CONTROL_RESPONSE_LIPSYNC,
- VIDEO_LIPSYNC, connector->video_latency[interlace]);
- tmp = REG_SET_FIELD(0, AZALIA_F0_CODEC_PIN_CONTROL_RESPONSE_LIPSYNC,
- AUDIO_LIPSYNC, connector->audio_latency[interlace]);
- } else {
- tmp = REG_SET_FIELD(0, AZALIA_F0_CODEC_PIN_CONTROL_RESPONSE_LIPSYNC,
- VIDEO_LIPSYNC, 0);
- tmp = REG_SET_FIELD(0, AZALIA_F0_CODEC_PIN_CONTROL_RESPONSE_LIPSYNC,
- AUDIO_LIPSYNC, 0);
- }
- WREG32_AUDIO_ENDPT(dig->afmt->pin->offset,
- ixAZALIA_F0_CODEC_PIN_CONTROL_RESPONSE_LIPSYNC, tmp);
-}
-
-static void dce_v11_0_audio_write_speaker_allocation(struct drm_encoder *encoder)
-{
- struct drm_device *dev = encoder->dev;
- struct amdgpu_device *adev = drm_to_adev(dev);
- struct amdgpu_encoder *amdgpu_encoder = to_amdgpu_encoder(encoder);
- struct amdgpu_encoder_atom_dig *dig = amdgpu_encoder->enc_priv;
- struct drm_connector *connector;
- struct drm_connector_list_iter iter;
- struct amdgpu_connector *amdgpu_connector = NULL;
- u32 tmp;
- u8 *sadb = NULL;
- int sad_count;
-
- if (!dig || !dig->afmt || !dig->afmt->pin)
- return;
-
- drm_connector_list_iter_begin(dev, &iter);
- drm_for_each_connector_iter(connector, &iter) {
- if (connector->encoder == encoder) {
- amdgpu_connector = to_amdgpu_connector(connector);
- break;
- }
- }
- drm_connector_list_iter_end(&iter);
-
- if (!amdgpu_connector) {
- DRM_ERROR("Couldn't find encoder's connector\n");
- return;
- }
-
- sad_count = drm_edid_to_speaker_allocation(amdgpu_connector_edid(connector), &sadb);
- if (sad_count < 0) {
- DRM_ERROR("Couldn't read Speaker Allocation Data Block: %d\n", sad_count);
- sad_count = 0;
- }
-
- /* program the speaker allocation */
- tmp = RREG32_AUDIO_ENDPT(dig->afmt->pin->offset,
- ixAZALIA_F0_CODEC_PIN_CONTROL_CHANNEL_SPEAKER);
- tmp = REG_SET_FIELD(tmp, AZALIA_F0_CODEC_PIN_CONTROL_CHANNEL_SPEAKER,
- DP_CONNECTION, 0);
- /* set HDMI mode */
- tmp = REG_SET_FIELD(tmp, AZALIA_F0_CODEC_PIN_CONTROL_CHANNEL_SPEAKER,
- HDMI_CONNECTION, 1);
- if (sad_count)
- tmp = REG_SET_FIELD(tmp, AZALIA_F0_CODEC_PIN_CONTROL_CHANNEL_SPEAKER,
- SPEAKER_ALLOCATION, sadb[0]);
- else
- tmp = REG_SET_FIELD(tmp, AZALIA_F0_CODEC_PIN_CONTROL_CHANNEL_SPEAKER,
- SPEAKER_ALLOCATION, 5); /* stereo */
- WREG32_AUDIO_ENDPT(dig->afmt->pin->offset,
- ixAZALIA_F0_CODEC_PIN_CONTROL_CHANNEL_SPEAKER, tmp);
-
- kfree(sadb);
-}
-
-static void dce_v11_0_audio_write_sad_regs(struct drm_encoder *encoder)
-{
- struct drm_device *dev = encoder->dev;
- struct amdgpu_device *adev = drm_to_adev(dev);
- struct amdgpu_encoder *amdgpu_encoder = to_amdgpu_encoder(encoder);
- struct amdgpu_encoder_atom_dig *dig = amdgpu_encoder->enc_priv;
- struct drm_connector *connector;
- struct drm_connector_list_iter iter;
- struct amdgpu_connector *amdgpu_connector = NULL;
- struct cea_sad *sads;
- int i, sad_count;
-
- static const u16 eld_reg_to_type[][2] = {
- { ixAZALIA_F0_CODEC_PIN_CONTROL_AUDIO_DESCRIPTOR0, HDMI_AUDIO_CODING_TYPE_PCM },
- { ixAZALIA_F0_CODEC_PIN_CONTROL_AUDIO_DESCRIPTOR1, HDMI_AUDIO_CODING_TYPE_AC3 },
- { ixAZALIA_F0_CODEC_PIN_CONTROL_AUDIO_DESCRIPTOR2, HDMI_AUDIO_CODING_TYPE_MPEG1 },
- { ixAZALIA_F0_CODEC_PIN_CONTROL_AUDIO_DESCRIPTOR3, HDMI_AUDIO_CODING_TYPE_MP3 },
- { ixAZALIA_F0_CODEC_PIN_CONTROL_AUDIO_DESCRIPTOR4, HDMI_AUDIO_CODING_TYPE_MPEG2 },
- { ixAZALIA_F0_CODEC_PIN_CONTROL_AUDIO_DESCRIPTOR5, HDMI_AUDIO_CODING_TYPE_AAC_LC },
- { ixAZALIA_F0_CODEC_PIN_CONTROL_AUDIO_DESCRIPTOR6, HDMI_AUDIO_CODING_TYPE_DTS },
- { ixAZALIA_F0_CODEC_PIN_CONTROL_AUDIO_DESCRIPTOR7, HDMI_AUDIO_CODING_TYPE_ATRAC },
- { ixAZALIA_F0_CODEC_PIN_CONTROL_AUDIO_DESCRIPTOR9, HDMI_AUDIO_CODING_TYPE_EAC3 },
- { ixAZALIA_F0_CODEC_PIN_CONTROL_AUDIO_DESCRIPTOR10, HDMI_AUDIO_CODING_TYPE_DTS_HD },
- { ixAZALIA_F0_CODEC_PIN_CONTROL_AUDIO_DESCRIPTOR11, HDMI_AUDIO_CODING_TYPE_MLP },
- { ixAZALIA_F0_CODEC_PIN_CONTROL_AUDIO_DESCRIPTOR13, HDMI_AUDIO_CODING_TYPE_WMA_PRO },
- };
-
- if (!dig || !dig->afmt || !dig->afmt->pin)
- return;
-
- drm_connector_list_iter_begin(dev, &iter);
- drm_for_each_connector_iter(connector, &iter) {
- if (connector->encoder == encoder) {
- amdgpu_connector = to_amdgpu_connector(connector);
- break;
- }
- }
- drm_connector_list_iter_end(&iter);
-
- if (!amdgpu_connector) {
- DRM_ERROR("Couldn't find encoder's connector\n");
- return;
- }
-
- sad_count = drm_edid_to_sad(amdgpu_connector_edid(connector), &sads);
- if (sad_count < 0)
- DRM_ERROR("Couldn't read SADs: %d\n", sad_count);
- if (sad_count <= 0)
- return;
- BUG_ON(!sads);
-
- for (i = 0; i < ARRAY_SIZE(eld_reg_to_type); i++) {
- u32 tmp = 0;
- u8 stereo_freqs = 0;
- int max_channels = -1;
- int j;
-
- for (j = 0; j < sad_count; j++) {
- struct cea_sad *sad = &sads[j];
-
- if (sad->format == eld_reg_to_type[i][1]) {
- if (sad->channels > max_channels) {
- tmp = REG_SET_FIELD(tmp, AZALIA_F0_CODEC_PIN_CONTROL_AUDIO_DESCRIPTOR0,
- MAX_CHANNELS, sad->channels);
- tmp = REG_SET_FIELD(tmp, AZALIA_F0_CODEC_PIN_CONTROL_AUDIO_DESCRIPTOR0,
- DESCRIPTOR_BYTE_2, sad->byte2);
- tmp = REG_SET_FIELD(tmp, AZALIA_F0_CODEC_PIN_CONTROL_AUDIO_DESCRIPTOR0,
- SUPPORTED_FREQUENCIES, sad->freq);
- max_channels = sad->channels;
- }
-
- if (sad->format == HDMI_AUDIO_CODING_TYPE_PCM)
- stereo_freqs |= sad->freq;
- else
- break;
- }
- }
-
- tmp = REG_SET_FIELD(tmp, AZALIA_F0_CODEC_PIN_CONTROL_AUDIO_DESCRIPTOR0,
- SUPPORTED_FREQUENCIES_STEREO, stereo_freqs);
- WREG32_AUDIO_ENDPT(dig->afmt->pin->offset, eld_reg_to_type[i][0], tmp);
- }
-
- kfree(sads);
-}
-
-static void dce_v11_0_audio_enable(struct amdgpu_device *adev,
- struct amdgpu_audio_pin *pin,
- bool enable)
-{
- if (!pin)
- return;
-
- WREG32_AUDIO_ENDPT(pin->offset, ixAZALIA_F0_CODEC_PIN_CONTROL_HOT_PLUG_CONTROL,
- enable ? AZALIA_F0_CODEC_PIN_CONTROL_HOT_PLUG_CONTROL__AUDIO_ENABLED_MASK : 0);
-}
-
-static const u32 pin_offsets[] =
-{
- AUD0_REGISTER_OFFSET,
- AUD1_REGISTER_OFFSET,
- AUD2_REGISTER_OFFSET,
- AUD3_REGISTER_OFFSET,
- AUD4_REGISTER_OFFSET,
- AUD5_REGISTER_OFFSET,
- AUD6_REGISTER_OFFSET,
- AUD7_REGISTER_OFFSET,
-};
-
-static int dce_v11_0_audio_init(struct amdgpu_device *adev)
-{
- int i;
-
- if (!amdgpu_audio)
- return 0;
-
- adev->mode_info.audio.enabled = true;
-
- switch (adev->asic_type) {
- case CHIP_CARRIZO:
- case CHIP_STONEY:
- adev->mode_info.audio.num_pins = 7;
- break;
- case CHIP_POLARIS10:
- case CHIP_VEGAM:
- adev->mode_info.audio.num_pins = 8;
- break;
- case CHIP_POLARIS11:
- case CHIP_POLARIS12:
- adev->mode_info.audio.num_pins = 6;
- break;
- default:
- return -EINVAL;
- }
-
- for (i = 0; i < adev->mode_info.audio.num_pins; i++) {
- adev->mode_info.audio.pin[i].channels = -1;
- adev->mode_info.audio.pin[i].rate = -1;
- adev->mode_info.audio.pin[i].bits_per_sample = -1;
- adev->mode_info.audio.pin[i].status_bits = 0;
- adev->mode_info.audio.pin[i].category_code = 0;
- adev->mode_info.audio.pin[i].connected = false;
- adev->mode_info.audio.pin[i].offset = pin_offsets[i];
- adev->mode_info.audio.pin[i].id = i;
- /* disable audio. it will be set up later */
- /* XXX remove once we switch to ip funcs */
- dce_v11_0_audio_enable(adev, &adev->mode_info.audio.pin[i], false);
- }
-
- return 0;
-}
-
-static void dce_v11_0_audio_fini(struct amdgpu_device *adev)
-{
- int i;
-
- if (!amdgpu_audio)
- return;
-
- if (!adev->mode_info.audio.enabled)
- return;
-
- for (i = 0; i < adev->mode_info.audio.num_pins; i++)
- dce_v11_0_audio_enable(adev, &adev->mode_info.audio.pin[i], false);
-
- adev->mode_info.audio.enabled = false;
-}
-
-/*
- * update the N and CTS parameters for a given pixel clock rate
- */
-static void dce_v11_0_afmt_update_ACR(struct drm_encoder *encoder, uint32_t clock)
-{
- struct drm_device *dev = encoder->dev;
- struct amdgpu_device *adev = drm_to_adev(dev);
- struct amdgpu_afmt_acr acr = amdgpu_afmt_acr(clock);
- struct amdgpu_encoder *amdgpu_encoder = to_amdgpu_encoder(encoder);
- struct amdgpu_encoder_atom_dig *dig = amdgpu_encoder->enc_priv;
- u32 tmp;
-
- tmp = RREG32(mmHDMI_ACR_32_0 + dig->afmt->offset);
- tmp = REG_SET_FIELD(tmp, HDMI_ACR_32_0, HDMI_ACR_CTS_32, acr.cts_32khz);
- WREG32(mmHDMI_ACR_32_0 + dig->afmt->offset, tmp);
- tmp = RREG32(mmHDMI_ACR_32_1 + dig->afmt->offset);
- tmp = REG_SET_FIELD(tmp, HDMI_ACR_32_1, HDMI_ACR_N_32, acr.n_32khz);
- WREG32(mmHDMI_ACR_32_1 + dig->afmt->offset, tmp);
-
- tmp = RREG32(mmHDMI_ACR_44_0 + dig->afmt->offset);
- tmp = REG_SET_FIELD(tmp, HDMI_ACR_44_0, HDMI_ACR_CTS_44, acr.cts_44_1khz);
- WREG32(mmHDMI_ACR_44_0 + dig->afmt->offset, tmp);
- tmp = RREG32(mmHDMI_ACR_44_1 + dig->afmt->offset);
- tmp = REG_SET_FIELD(tmp, HDMI_ACR_44_1, HDMI_ACR_N_44, acr.n_44_1khz);
- WREG32(mmHDMI_ACR_44_1 + dig->afmt->offset, tmp);
-
- tmp = RREG32(mmHDMI_ACR_48_0 + dig->afmt->offset);
- tmp = REG_SET_FIELD(tmp, HDMI_ACR_48_0, HDMI_ACR_CTS_48, acr.cts_48khz);
- WREG32(mmHDMI_ACR_48_0 + dig->afmt->offset, tmp);
- tmp = RREG32(mmHDMI_ACR_48_1 + dig->afmt->offset);
- tmp = REG_SET_FIELD(tmp, HDMI_ACR_48_1, HDMI_ACR_N_48, acr.n_48khz);
- WREG32(mmHDMI_ACR_48_1 + dig->afmt->offset, tmp);
-
-}
-
-/*
- * build a HDMI Video Info Frame
- */
-static void dce_v11_0_afmt_update_avi_infoframe(struct drm_encoder *encoder,
- void *buffer, size_t size)
-{
- struct drm_device *dev = encoder->dev;
- struct amdgpu_device *adev = drm_to_adev(dev);
- struct amdgpu_encoder *amdgpu_encoder = to_amdgpu_encoder(encoder);
- struct amdgpu_encoder_atom_dig *dig = amdgpu_encoder->enc_priv;
- uint8_t *frame = buffer + 3;
- uint8_t *header = buffer;
-
- WREG32(mmAFMT_AVI_INFO0 + dig->afmt->offset,
- frame[0x0] | (frame[0x1] << 8) | (frame[0x2] << 16) | (frame[0x3] << 24));
- WREG32(mmAFMT_AVI_INFO1 + dig->afmt->offset,
- frame[0x4] | (frame[0x5] << 8) | (frame[0x6] << 16) | (frame[0x7] << 24));
- WREG32(mmAFMT_AVI_INFO2 + dig->afmt->offset,
- frame[0x8] | (frame[0x9] << 8) | (frame[0xA] << 16) | (frame[0xB] << 24));
- WREG32(mmAFMT_AVI_INFO3 + dig->afmt->offset,
- frame[0xC] | (frame[0xD] << 8) | (header[1] << 24));
-}
-
-static void dce_v11_0_audio_set_dto(struct drm_encoder *encoder, u32 clock)
-{
- struct drm_device *dev = encoder->dev;
- struct amdgpu_device *adev = drm_to_adev(dev);
- struct amdgpu_encoder *amdgpu_encoder = to_amdgpu_encoder(encoder);
- struct amdgpu_encoder_atom_dig *dig = amdgpu_encoder->enc_priv;
- struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(encoder->crtc);
- u32 dto_phase = 24 * 1000;
- u32 dto_modulo = clock;
- u32 tmp;
-
- if (!dig || !dig->afmt)
- return;
-
- /* XXX two dtos; generally use dto0 for hdmi */
- /* Express [24MHz / target pixel clock] as an exact rational
- * number (coefficient of two integer numbers. DCCG_AUDIO_DTOx_PHASE
- * is the numerator, DCCG_AUDIO_DTOx_MODULE is the denominator
- */
- tmp = RREG32(mmDCCG_AUDIO_DTO_SOURCE);
- tmp = REG_SET_FIELD(tmp, DCCG_AUDIO_DTO_SOURCE, DCCG_AUDIO_DTO0_SOURCE_SEL,
- amdgpu_crtc->crtc_id);
- WREG32(mmDCCG_AUDIO_DTO_SOURCE, tmp);
- WREG32(mmDCCG_AUDIO_DTO0_PHASE, dto_phase);
- WREG32(mmDCCG_AUDIO_DTO0_MODULE, dto_modulo);
-}
-
-/*
- * update the info frames with the data from the current display mode
- */
-static void dce_v11_0_afmt_setmode(struct drm_encoder *encoder,
- struct drm_display_mode *mode)
-{
- struct drm_device *dev = encoder->dev;
- struct amdgpu_device *adev = drm_to_adev(dev);
- struct amdgpu_encoder *amdgpu_encoder = to_amdgpu_encoder(encoder);
- struct amdgpu_encoder_atom_dig *dig = amdgpu_encoder->enc_priv;
- struct drm_connector *connector = amdgpu_get_connector_for_encoder(encoder);
- u8 buffer[HDMI_INFOFRAME_HEADER_SIZE + HDMI_AVI_INFOFRAME_SIZE];
- struct hdmi_avi_infoframe frame;
- ssize_t err;
- u32 tmp;
- int bpc = 8;
-
- if (!dig || !dig->afmt)
- return;
-
- /* Silent, r600_hdmi_enable will raise WARN for us */
- if (!dig->afmt->enabled)
- return;
-
- /* hdmi deep color mode general control packets setup, if bpc > 8 */
- if (encoder->crtc) {
- struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(encoder->crtc);
- bpc = amdgpu_crtc->bpc;
- }
-
- /* disable audio prior to setting up hw */
- dig->afmt->pin = dce_v11_0_audio_get_pin(adev);
- dce_v11_0_audio_enable(adev, dig->afmt->pin, false);
-
- dce_v11_0_audio_set_dto(encoder, mode->clock);
-
- tmp = RREG32(mmHDMI_VBI_PACKET_CONTROL + dig->afmt->offset);
- tmp = REG_SET_FIELD(tmp, HDMI_VBI_PACKET_CONTROL, HDMI_NULL_SEND, 1);
- WREG32(mmHDMI_VBI_PACKET_CONTROL + dig->afmt->offset, tmp); /* send null packets when required */
-
- WREG32(mmAFMT_AUDIO_CRC_CONTROL + dig->afmt->offset, 0x1000);
-
- tmp = RREG32(mmHDMI_CONTROL + dig->afmt->offset);
- switch (bpc) {
- case 0:
- case 6:
- case 8:
- case 16:
- default:
- tmp = REG_SET_FIELD(tmp, HDMI_CONTROL, HDMI_DEEP_COLOR_ENABLE, 0);
- tmp = REG_SET_FIELD(tmp, HDMI_CONTROL, HDMI_DEEP_COLOR_DEPTH, 0);
- DRM_DEBUG("%s: Disabling hdmi deep color for %d bpc.\n",
- connector->name, bpc);
- break;
- case 10:
- tmp = REG_SET_FIELD(tmp, HDMI_CONTROL, HDMI_DEEP_COLOR_ENABLE, 1);
- tmp = REG_SET_FIELD(tmp, HDMI_CONTROL, HDMI_DEEP_COLOR_DEPTH, 1);
- DRM_DEBUG("%s: Enabling hdmi deep color 30 for 10 bpc.\n",
- connector->name);
- break;
- case 12:
- tmp = REG_SET_FIELD(tmp, HDMI_CONTROL, HDMI_DEEP_COLOR_ENABLE, 1);
- tmp = REG_SET_FIELD(tmp, HDMI_CONTROL, HDMI_DEEP_COLOR_DEPTH, 2);
- DRM_DEBUG("%s: Enabling hdmi deep color 36 for 12 bpc.\n",
- connector->name);
- break;
- }
- WREG32(mmHDMI_CONTROL + dig->afmt->offset, tmp);
-
- tmp = RREG32(mmHDMI_VBI_PACKET_CONTROL + dig->afmt->offset);
- tmp = REG_SET_FIELD(tmp, HDMI_VBI_PACKET_CONTROL, HDMI_NULL_SEND, 1); /* send null packets when required */
- tmp = REG_SET_FIELD(tmp, HDMI_VBI_PACKET_CONTROL, HDMI_GC_SEND, 1); /* send general control packets */
- tmp = REG_SET_FIELD(tmp, HDMI_VBI_PACKET_CONTROL, HDMI_GC_CONT, 1); /* send general control packets every frame */
- WREG32(mmHDMI_VBI_PACKET_CONTROL + dig->afmt->offset, tmp);
-
- tmp = RREG32(mmHDMI_INFOFRAME_CONTROL0 + dig->afmt->offset);
- /* enable audio info frames (frames won't be set until audio is enabled) */
- tmp = REG_SET_FIELD(tmp, HDMI_INFOFRAME_CONTROL0, HDMI_AUDIO_INFO_SEND, 1);
- /* required for audio info values to be updated */
- tmp = REG_SET_FIELD(tmp, HDMI_INFOFRAME_CONTROL0, HDMI_AUDIO_INFO_CONT, 1);
- WREG32(mmHDMI_INFOFRAME_CONTROL0 + dig->afmt->offset, tmp);
-
- tmp = RREG32(mmAFMT_INFOFRAME_CONTROL0 + dig->afmt->offset);
- /* required for audio info values to be updated */
- tmp = REG_SET_FIELD(tmp, AFMT_INFOFRAME_CONTROL0, AFMT_AUDIO_INFO_UPDATE, 1);
- WREG32(mmAFMT_INFOFRAME_CONTROL0 + dig->afmt->offset, tmp);
-
- tmp = RREG32(mmHDMI_INFOFRAME_CONTROL1 + dig->afmt->offset);
- /* anything other than 0 */
- tmp = REG_SET_FIELD(tmp, HDMI_INFOFRAME_CONTROL1, HDMI_AUDIO_INFO_LINE, 2);
- WREG32(mmHDMI_INFOFRAME_CONTROL1 + dig->afmt->offset, tmp);
-
- WREG32(mmHDMI_GC + dig->afmt->offset, 0); /* unset HDMI_GC_AVMUTE */
-
- tmp = RREG32(mmHDMI_AUDIO_PACKET_CONTROL + dig->afmt->offset);
- /* set the default audio delay */
- tmp = REG_SET_FIELD(tmp, HDMI_AUDIO_PACKET_CONTROL, HDMI_AUDIO_DELAY_EN, 1);
- /* should be suffient for all audio modes and small enough for all hblanks */
- tmp = REG_SET_FIELD(tmp, HDMI_AUDIO_PACKET_CONTROL, HDMI_AUDIO_PACKETS_PER_LINE, 3);
- WREG32(mmHDMI_AUDIO_PACKET_CONTROL + dig->afmt->offset, tmp);
-
- tmp = RREG32(mmAFMT_AUDIO_PACKET_CONTROL + dig->afmt->offset);
- /* allow 60958 channel status fields to be updated */
- tmp = REG_SET_FIELD(tmp, AFMT_AUDIO_PACKET_CONTROL, AFMT_60958_CS_UPDATE, 1);
- WREG32(mmAFMT_AUDIO_PACKET_CONTROL + dig->afmt->offset, tmp);
-
- tmp = RREG32(mmHDMI_ACR_PACKET_CONTROL + dig->afmt->offset);
- if (bpc > 8)
- /* clear SW CTS value */
- tmp = REG_SET_FIELD(tmp, HDMI_ACR_PACKET_CONTROL, HDMI_ACR_SOURCE, 0);
- else
- /* select SW CTS value */
- tmp = REG_SET_FIELD(tmp, HDMI_ACR_PACKET_CONTROL, HDMI_ACR_SOURCE, 1);
- /* allow hw to sent ACR packets when required */
- tmp = REG_SET_FIELD(tmp, HDMI_ACR_PACKET_CONTROL, HDMI_ACR_AUTO_SEND, 1);
- WREG32(mmHDMI_ACR_PACKET_CONTROL + dig->afmt->offset, tmp);
-
- dce_v11_0_afmt_update_ACR(encoder, mode->clock);
-
- tmp = RREG32(mmAFMT_60958_0 + dig->afmt->offset);
- tmp = REG_SET_FIELD(tmp, AFMT_60958_0, AFMT_60958_CS_CHANNEL_NUMBER_L, 1);
- WREG32(mmAFMT_60958_0 + dig->afmt->offset, tmp);
-
- tmp = RREG32(mmAFMT_60958_1 + dig->afmt->offset);
- tmp = REG_SET_FIELD(tmp, AFMT_60958_1, AFMT_60958_CS_CHANNEL_NUMBER_R, 2);
- WREG32(mmAFMT_60958_1 + dig->afmt->offset, tmp);
-
- tmp = RREG32(mmAFMT_60958_2 + dig->afmt->offset);
- tmp = REG_SET_FIELD(tmp, AFMT_60958_2, AFMT_60958_CS_CHANNEL_NUMBER_2, 3);
- tmp = REG_SET_FIELD(tmp, AFMT_60958_2, AFMT_60958_CS_CHANNEL_NUMBER_3, 4);
- tmp = REG_SET_FIELD(tmp, AFMT_60958_2, AFMT_60958_CS_CHANNEL_NUMBER_4, 5);
- tmp = REG_SET_FIELD(tmp, AFMT_60958_2, AFMT_60958_CS_CHANNEL_NUMBER_5, 6);
- tmp = REG_SET_FIELD(tmp, AFMT_60958_2, AFMT_60958_CS_CHANNEL_NUMBER_6, 7);
- tmp = REG_SET_FIELD(tmp, AFMT_60958_2, AFMT_60958_CS_CHANNEL_NUMBER_7, 8);
- WREG32(mmAFMT_60958_2 + dig->afmt->offset, tmp);
-
- dce_v11_0_audio_write_speaker_allocation(encoder);
-
- WREG32(mmAFMT_AUDIO_PACKET_CONTROL2 + dig->afmt->offset,
- (0xff << AFMT_AUDIO_PACKET_CONTROL2__AFMT_AUDIO_CHANNEL_ENABLE__SHIFT));
-
- dce_v11_0_afmt_audio_select_pin(encoder);
- dce_v11_0_audio_write_sad_regs(encoder);
- dce_v11_0_audio_write_latency_fields(encoder, mode);
-
- err = drm_hdmi_avi_infoframe_from_display_mode(&frame, connector, mode);
- if (err < 0) {
- DRM_ERROR("failed to setup AVI infoframe: %zd\n", err);
- return;
- }
-
- err = hdmi_avi_infoframe_pack(&frame, buffer, sizeof(buffer));
- if (err < 0) {
- DRM_ERROR("failed to pack AVI infoframe: %zd\n", err);
- return;
- }
-
- dce_v11_0_afmt_update_avi_infoframe(encoder, buffer, sizeof(buffer));
-
- tmp = RREG32(mmHDMI_INFOFRAME_CONTROL0 + dig->afmt->offset);
- /* enable AVI info frames */
- tmp = REG_SET_FIELD(tmp, HDMI_INFOFRAME_CONTROL0, HDMI_AVI_INFO_SEND, 1);
- /* required for audio info values to be updated */
- tmp = REG_SET_FIELD(tmp, HDMI_INFOFRAME_CONTROL0, HDMI_AVI_INFO_CONT, 1);
- WREG32(mmHDMI_INFOFRAME_CONTROL0 + dig->afmt->offset, tmp);
-
- tmp = RREG32(mmHDMI_INFOFRAME_CONTROL1 + dig->afmt->offset);
- tmp = REG_SET_FIELD(tmp, HDMI_INFOFRAME_CONTROL1, HDMI_AVI_INFO_LINE, 2);
- WREG32(mmHDMI_INFOFRAME_CONTROL1 + dig->afmt->offset, tmp);
-
- tmp = RREG32(mmAFMT_AUDIO_PACKET_CONTROL + dig->afmt->offset);
- /* send audio packets */
- tmp = REG_SET_FIELD(tmp, AFMT_AUDIO_PACKET_CONTROL, AFMT_AUDIO_SAMPLE_SEND, 1);
- WREG32(mmAFMT_AUDIO_PACKET_CONTROL + dig->afmt->offset, tmp);
-
- WREG32(mmAFMT_RAMP_CONTROL0 + dig->afmt->offset, 0x00FFFFFF);
- WREG32(mmAFMT_RAMP_CONTROL1 + dig->afmt->offset, 0x007FFFFF);
- WREG32(mmAFMT_RAMP_CONTROL2 + dig->afmt->offset, 0x00000001);
- WREG32(mmAFMT_RAMP_CONTROL3 + dig->afmt->offset, 0x00000001);
-
- /* enable audio after to setting up hw */
- dce_v11_0_audio_enable(adev, dig->afmt->pin, true);
-}
-
-static void dce_v11_0_afmt_enable(struct drm_encoder *encoder, bool enable)
-{
- struct drm_device *dev = encoder->dev;
- struct amdgpu_device *adev = drm_to_adev(dev);
- struct amdgpu_encoder *amdgpu_encoder = to_amdgpu_encoder(encoder);
- struct amdgpu_encoder_atom_dig *dig = amdgpu_encoder->enc_priv;
-
- if (!dig || !dig->afmt)
- return;
-
- /* Silent, r600_hdmi_enable will raise WARN for us */
- if (enable && dig->afmt->enabled)
- return;
- if (!enable && !dig->afmt->enabled)
- return;
-
- if (!enable && dig->afmt->pin) {
- dce_v11_0_audio_enable(adev, dig->afmt->pin, false);
- dig->afmt->pin = NULL;
- }
-
- dig->afmt->enabled = enable;
-
- DRM_DEBUG("%sabling AFMT interface @ 0x%04X for encoder 0x%x\n",
- enable ? "En" : "Dis", dig->afmt->offset, amdgpu_encoder->encoder_id);
-}
-
-static int dce_v11_0_afmt_init(struct amdgpu_device *adev)
-{
- int i;
-
- for (i = 0; i < adev->mode_info.num_dig; i++)
- adev->mode_info.afmt[i] = NULL;
-
- /* DCE11 has audio blocks tied to DIG encoders */
- for (i = 0; i < adev->mode_info.num_dig; i++) {
- adev->mode_info.afmt[i] = kzalloc(sizeof(struct amdgpu_afmt), GFP_KERNEL);
- if (adev->mode_info.afmt[i]) {
- adev->mode_info.afmt[i]->offset = dig_offsets[i];
- adev->mode_info.afmt[i]->id = i;
- } else {
- int j;
- for (j = 0; j < i; j++) {
- kfree(adev->mode_info.afmt[j]);
- adev->mode_info.afmt[j] = NULL;
- }
- return -ENOMEM;
- }
- }
- return 0;
-}
-
-static void dce_v11_0_afmt_fini(struct amdgpu_device *adev)
-{
- int i;
-
- for (i = 0; i < adev->mode_info.num_dig; i++) {
- kfree(adev->mode_info.afmt[i]);
- adev->mode_info.afmt[i] = NULL;
- }
-}
-
-static const u32 vga_control_regs[6] =
-{
- mmD1VGA_CONTROL,
- mmD2VGA_CONTROL,
- mmD3VGA_CONTROL,
- mmD4VGA_CONTROL,
- mmD5VGA_CONTROL,
- mmD6VGA_CONTROL,
-};
-
-static void dce_v11_0_vga_enable(struct drm_crtc *crtc, bool enable)
-{
- struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc);
- struct drm_device *dev = crtc->dev;
- struct amdgpu_device *adev = drm_to_adev(dev);
- u32 vga_control;
-
- vga_control = RREG32(vga_control_regs[amdgpu_crtc->crtc_id]) & ~1;
- if (enable)
- WREG32(vga_control_regs[amdgpu_crtc->crtc_id], vga_control | 1);
- else
- WREG32(vga_control_regs[amdgpu_crtc->crtc_id], vga_control);
-}
-
-static void dce_v11_0_grph_enable(struct drm_crtc *crtc, bool enable)
-{
- struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc);
- struct drm_device *dev = crtc->dev;
- struct amdgpu_device *adev = drm_to_adev(dev);
-
- if (enable)
- WREG32(mmGRPH_ENABLE + amdgpu_crtc->crtc_offset, 1);
- else
- WREG32(mmGRPH_ENABLE + amdgpu_crtc->crtc_offset, 0);
-}
-
-static int dce_v11_0_crtc_do_set_base(struct drm_crtc *crtc,
- struct drm_framebuffer *fb,
- int x, int y, int atomic)
-{
- struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc);
- struct drm_device *dev = crtc->dev;
- struct amdgpu_device *adev = drm_to_adev(dev);
- struct drm_framebuffer *target_fb;
- struct drm_gem_object *obj;
- struct amdgpu_bo *abo;
- uint64_t fb_location, tiling_flags;
- uint32_t fb_format, fb_pitch_pixels;
- u32 fb_swap = REG_SET_FIELD(0, GRPH_SWAP_CNTL, GRPH_ENDIAN_SWAP, ENDIAN_NONE);
- u32 pipe_config;
- u32 tmp, viewport_w, viewport_h;
- int r;
- bool bypass_lut = false;
- struct drm_format_name_buf format_name;
-
- /* no fb bound */
- if (!atomic && !crtc->primary->fb) {
- DRM_DEBUG_KMS("No FB bound\n");
- return 0;
- }
-
- if (atomic)
- target_fb = fb;
- else
- target_fb = crtc->primary->fb;
-
- /* If atomic, assume fb object is pinned & idle & fenced and
- * just update base pointers
- */
- obj = target_fb->obj[0];
- abo = gem_to_amdgpu_bo(obj);
- r = amdgpu_bo_reserve(abo, false);
- if (unlikely(r != 0))
- return r;
-
- if (!atomic) {
- r = amdgpu_bo_pin(abo, AMDGPU_GEM_DOMAIN_VRAM);
- if (unlikely(r != 0)) {
- amdgpu_bo_unreserve(abo);
- return -EINVAL;
- }
- }
- fb_location = amdgpu_bo_gpu_offset(abo);
-
- amdgpu_bo_get_tiling_flags(abo, &tiling_flags);
- amdgpu_bo_unreserve(abo);
-
- pipe_config = AMDGPU_TILING_GET(tiling_flags, PIPE_CONFIG);
-
- switch (target_fb->format->format) {
- case DRM_FORMAT_C8:
- fb_format = REG_SET_FIELD(0, GRPH_CONTROL, GRPH_DEPTH, 0);
- fb_format = REG_SET_FIELD(fb_format, GRPH_CONTROL, GRPH_FORMAT, 0);
- break;
- case DRM_FORMAT_XRGB4444:
- case DRM_FORMAT_ARGB4444:
- fb_format = REG_SET_FIELD(0, GRPH_CONTROL, GRPH_DEPTH, 1);
- fb_format = REG_SET_FIELD(fb_format, GRPH_CONTROL, GRPH_FORMAT, 2);
-#ifdef __BIG_ENDIAN
- fb_swap = REG_SET_FIELD(fb_swap, GRPH_SWAP_CNTL, GRPH_ENDIAN_SWAP,
- ENDIAN_8IN16);
-#endif
- break;
- case DRM_FORMAT_XRGB1555:
- case DRM_FORMAT_ARGB1555:
- fb_format = REG_SET_FIELD(0, GRPH_CONTROL, GRPH_DEPTH, 1);
- fb_format = REG_SET_FIELD(fb_format, GRPH_CONTROL, GRPH_FORMAT, 0);
-#ifdef __BIG_ENDIAN
- fb_swap = REG_SET_FIELD(fb_swap, GRPH_SWAP_CNTL, GRPH_ENDIAN_SWAP,
- ENDIAN_8IN16);
-#endif
- break;
- case DRM_FORMAT_BGRX5551:
- case DRM_FORMAT_BGRA5551:
- fb_format = REG_SET_FIELD(0, GRPH_CONTROL, GRPH_DEPTH, 1);
- fb_format = REG_SET_FIELD(fb_format, GRPH_CONTROL, GRPH_FORMAT, 5);
-#ifdef __BIG_ENDIAN
- fb_swap = REG_SET_FIELD(fb_swap, GRPH_SWAP_CNTL, GRPH_ENDIAN_SWAP,
- ENDIAN_8IN16);
-#endif
- break;
- case DRM_FORMAT_RGB565:
- fb_format = REG_SET_FIELD(0, GRPH_CONTROL, GRPH_DEPTH, 1);
- fb_format = REG_SET_FIELD(fb_format, GRPH_CONTROL, GRPH_FORMAT, 1);
-#ifdef __BIG_ENDIAN
- fb_swap = REG_SET_FIELD(fb_swap, GRPH_SWAP_CNTL, GRPH_ENDIAN_SWAP,
- ENDIAN_8IN16);
-#endif
- break;
- case DRM_FORMAT_XRGB8888:
- case DRM_FORMAT_ARGB8888:
- fb_format = REG_SET_FIELD(0, GRPH_CONTROL, GRPH_DEPTH, 2);
- fb_format = REG_SET_FIELD(fb_format, GRPH_CONTROL, GRPH_FORMAT, 0);
-#ifdef __BIG_ENDIAN
- fb_swap = REG_SET_FIELD(fb_swap, GRPH_SWAP_CNTL, GRPH_ENDIAN_SWAP,
- ENDIAN_8IN32);
-#endif
- break;
- case DRM_FORMAT_XRGB2101010:
- case DRM_FORMAT_ARGB2101010:
- fb_format = REG_SET_FIELD(0, GRPH_CONTROL, GRPH_DEPTH, 2);
- fb_format = REG_SET_FIELD(fb_format, GRPH_CONTROL, GRPH_FORMAT, 1);
-#ifdef __BIG_ENDIAN
- fb_swap = REG_SET_FIELD(fb_swap, GRPH_SWAP_CNTL, GRPH_ENDIAN_SWAP,
- ENDIAN_8IN32);
-#endif
- /* Greater 8 bpc fb needs to bypass hw-lut to retain precision */
- bypass_lut = true;
- break;
- case DRM_FORMAT_BGRX1010102:
- case DRM_FORMAT_BGRA1010102:
- fb_format = REG_SET_FIELD(0, GRPH_CONTROL, GRPH_DEPTH, 2);
- fb_format = REG_SET_FIELD(fb_format, GRPH_CONTROL, GRPH_FORMAT, 4);
-#ifdef __BIG_ENDIAN
- fb_swap = REG_SET_FIELD(fb_swap, GRPH_SWAP_CNTL, GRPH_ENDIAN_SWAP,
- ENDIAN_8IN32);
-#endif
- /* Greater 8 bpc fb needs to bypass hw-lut to retain precision */
- bypass_lut = true;
- break;
- case DRM_FORMAT_XBGR8888:
- case DRM_FORMAT_ABGR8888:
- fb_format = REG_SET_FIELD(0, GRPH_CONTROL, GRPH_DEPTH, 2);
- fb_format = REG_SET_FIELD(fb_format, GRPH_CONTROL, GRPH_FORMAT, 0);
- fb_swap = REG_SET_FIELD(fb_swap, GRPH_SWAP_CNTL, GRPH_RED_CROSSBAR, 2);
- fb_swap = REG_SET_FIELD(fb_swap, GRPH_SWAP_CNTL, GRPH_BLUE_CROSSBAR, 2);
-#ifdef __BIG_ENDIAN
- fb_swap = REG_SET_FIELD(fb_swap, GRPH_SWAP_CNTL, GRPH_ENDIAN_SWAP,
- ENDIAN_8IN32);
-#endif
- break;
- default:
- DRM_ERROR("Unsupported screen format %s\n",
- drm_get_format_name(target_fb->format->format, &format_name));
- return -EINVAL;
- }
-
- if (AMDGPU_TILING_GET(tiling_flags, ARRAY_MODE) == ARRAY_2D_TILED_THIN1) {
- unsigned bankw, bankh, mtaspect, tile_split, num_banks;
-
- bankw = AMDGPU_TILING_GET(tiling_flags, BANK_WIDTH);
- bankh = AMDGPU_TILING_GET(tiling_flags, BANK_HEIGHT);
- mtaspect = AMDGPU_TILING_GET(tiling_flags, MACRO_TILE_ASPECT);
- tile_split = AMDGPU_TILING_GET(tiling_flags, TILE_SPLIT);
- num_banks = AMDGPU_TILING_GET(tiling_flags, NUM_BANKS);
-
- fb_format = REG_SET_FIELD(fb_format, GRPH_CONTROL, GRPH_NUM_BANKS, num_banks);
- fb_format = REG_SET_FIELD(fb_format, GRPH_CONTROL, GRPH_ARRAY_MODE,
- ARRAY_2D_TILED_THIN1);
- fb_format = REG_SET_FIELD(fb_format, GRPH_CONTROL, GRPH_TILE_SPLIT,
- tile_split);
- fb_format = REG_SET_FIELD(fb_format, GRPH_CONTROL, GRPH_BANK_WIDTH, bankw);
- fb_format = REG_SET_FIELD(fb_format, GRPH_CONTROL, GRPH_BANK_HEIGHT, bankh);
- fb_format = REG_SET_FIELD(fb_format, GRPH_CONTROL, GRPH_MACRO_TILE_ASPECT,
- mtaspect);
- fb_format = REG_SET_FIELD(fb_format, GRPH_CONTROL, GRPH_MICRO_TILE_MODE,
- ADDR_SURF_MICRO_TILING_DISPLAY);
- } else if (AMDGPU_TILING_GET(tiling_flags, ARRAY_MODE) == ARRAY_1D_TILED_THIN1) {
- fb_format = REG_SET_FIELD(fb_format, GRPH_CONTROL, GRPH_ARRAY_MODE,
- ARRAY_1D_TILED_THIN1);
- }
-
- fb_format = REG_SET_FIELD(fb_format, GRPH_CONTROL, GRPH_PIPE_CONFIG,
- pipe_config);
-
- dce_v11_0_vga_enable(crtc, false);
-
- /* Make sure surface address is updated at vertical blank rather than
- * horizontal blank
- */
- tmp = RREG32(mmGRPH_FLIP_CONTROL + amdgpu_crtc->crtc_offset);
- tmp = REG_SET_FIELD(tmp, GRPH_FLIP_CONTROL,
- GRPH_SURFACE_UPDATE_H_RETRACE_EN, 0);
- WREG32(mmGRPH_FLIP_CONTROL + amdgpu_crtc->crtc_offset, tmp);
-
- WREG32(mmGRPH_PRIMARY_SURFACE_ADDRESS_HIGH + amdgpu_crtc->crtc_offset,
- upper_32_bits(fb_location));
- WREG32(mmGRPH_SECONDARY_SURFACE_ADDRESS_HIGH + amdgpu_crtc->crtc_offset,
- upper_32_bits(fb_location));
- WREG32(mmGRPH_PRIMARY_SURFACE_ADDRESS + amdgpu_crtc->crtc_offset,
- (u32)fb_location & GRPH_PRIMARY_SURFACE_ADDRESS__GRPH_PRIMARY_SURFACE_ADDRESS_MASK);
- WREG32(mmGRPH_SECONDARY_SURFACE_ADDRESS + amdgpu_crtc->crtc_offset,
- (u32) fb_location & GRPH_SECONDARY_SURFACE_ADDRESS__GRPH_SECONDARY_SURFACE_ADDRESS_MASK);
- WREG32(mmGRPH_CONTROL + amdgpu_crtc->crtc_offset, fb_format);
- WREG32(mmGRPH_SWAP_CNTL + amdgpu_crtc->crtc_offset, fb_swap);
-
- /*
- * The LUT only has 256 slots for indexing by a 8 bpc fb. Bypass the LUT
- * for > 8 bpc scanout to avoid truncation of fb indices to 8 msb's, to
- * retain the full precision throughout the pipeline.
- */
- tmp = RREG32(mmGRPH_LUT_10BIT_BYPASS + amdgpu_crtc->crtc_offset);
- if (bypass_lut)
- tmp = REG_SET_FIELD(tmp, GRPH_LUT_10BIT_BYPASS, GRPH_LUT_10BIT_BYPASS_EN, 1);
- else
- tmp = REG_SET_FIELD(tmp, GRPH_LUT_10BIT_BYPASS, GRPH_LUT_10BIT_BYPASS_EN, 0);
- WREG32(mmGRPH_LUT_10BIT_BYPASS + amdgpu_crtc->crtc_offset, tmp);
-
- if (bypass_lut)
- DRM_DEBUG_KMS("Bypassing hardware LUT due to 10 bit fb scanout.\n");
-
- WREG32(mmGRPH_SURFACE_OFFSET_X + amdgpu_crtc->crtc_offset, 0);
- WREG32(mmGRPH_SURFACE_OFFSET_Y + amdgpu_crtc->crtc_offset, 0);
- WREG32(mmGRPH_X_START + amdgpu_crtc->crtc_offset, 0);
- WREG32(mmGRPH_Y_START + amdgpu_crtc->crtc_offset, 0);
- WREG32(mmGRPH_X_END + amdgpu_crtc->crtc_offset, target_fb->width);
- WREG32(mmGRPH_Y_END + amdgpu_crtc->crtc_offset, target_fb->height);
-
- fb_pitch_pixels = target_fb->pitches[0] / target_fb->format->cpp[0];
- WREG32(mmGRPH_PITCH + amdgpu_crtc->crtc_offset, fb_pitch_pixels);
-
- dce_v11_0_grph_enable(crtc, true);
-
- WREG32(mmLB_DESKTOP_HEIGHT + amdgpu_crtc->crtc_offset,
- target_fb->height);
-
- x &= ~3;
- y &= ~1;
- WREG32(mmVIEWPORT_START + amdgpu_crtc->crtc_offset,
- (x << 16) | y);
- viewport_w = crtc->mode.hdisplay;
- viewport_h = (crtc->mode.vdisplay + 1) & ~1;
- WREG32(mmVIEWPORT_SIZE + amdgpu_crtc->crtc_offset,
- (viewport_w << 16) | viewport_h);
-
- /* set pageflip to happen anywhere in vblank interval */
- WREG32(mmCRTC_MASTER_UPDATE_MODE + amdgpu_crtc->crtc_offset, 0);
-
- if (!atomic && fb && fb != crtc->primary->fb) {
- abo = gem_to_amdgpu_bo(fb->obj[0]);
- r = amdgpu_bo_reserve(abo, true);
- if (unlikely(r != 0))
- return r;
- amdgpu_bo_unpin(abo);
- amdgpu_bo_unreserve(abo);
- }
-
- /* Bytes per pixel may have changed */
- dce_v11_0_bandwidth_update(adev);
-
- return 0;
-}
-
-static void dce_v11_0_set_interleave(struct drm_crtc *crtc,
- struct drm_display_mode *mode)
-{
- struct drm_device *dev = crtc->dev;
- struct amdgpu_device *adev = drm_to_adev(dev);
- struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc);
- u32 tmp;
-
- tmp = RREG32(mmLB_DATA_FORMAT + amdgpu_crtc->crtc_offset);
- if (mode->flags & DRM_MODE_FLAG_INTERLACE)
- tmp = REG_SET_FIELD(tmp, LB_DATA_FORMAT, INTERLEAVE_EN, 1);
- else
- tmp = REG_SET_FIELD(tmp, LB_DATA_FORMAT, INTERLEAVE_EN, 0);
- WREG32(mmLB_DATA_FORMAT + amdgpu_crtc->crtc_offset, tmp);
-}
-
-static void dce_v11_0_crtc_load_lut(struct drm_crtc *crtc)
-{
- struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc);
- struct drm_device *dev = crtc->dev;
- struct amdgpu_device *adev = drm_to_adev(dev);
- u16 *r, *g, *b;
- int i;
- u32 tmp;
-
- DRM_DEBUG_KMS("%d\n", amdgpu_crtc->crtc_id);
-
- tmp = RREG32(mmINPUT_CSC_CONTROL + amdgpu_crtc->crtc_offset);
- tmp = REG_SET_FIELD(tmp, INPUT_CSC_CONTROL, INPUT_CSC_GRPH_MODE, 0);
- WREG32(mmINPUT_CSC_CONTROL + amdgpu_crtc->crtc_offset, tmp);
-
- tmp = RREG32(mmPRESCALE_GRPH_CONTROL + amdgpu_crtc->crtc_offset);
- tmp = REG_SET_FIELD(tmp, PRESCALE_GRPH_CONTROL, GRPH_PRESCALE_BYPASS, 1);
- WREG32(mmPRESCALE_GRPH_CONTROL + amdgpu_crtc->crtc_offset, tmp);
-
- tmp = RREG32(mmINPUT_GAMMA_CONTROL + amdgpu_crtc->crtc_offset);
- tmp = REG_SET_FIELD(tmp, INPUT_GAMMA_CONTROL, GRPH_INPUT_GAMMA_MODE, 0);
- WREG32(mmINPUT_GAMMA_CONTROL + amdgpu_crtc->crtc_offset, tmp);
-
- WREG32(mmDC_LUT_CONTROL + amdgpu_crtc->crtc_offset, 0);
-
- WREG32(mmDC_LUT_BLACK_OFFSET_BLUE + amdgpu_crtc->crtc_offset, 0);
- WREG32(mmDC_LUT_BLACK_OFFSET_GREEN + amdgpu_crtc->crtc_offset, 0);
- WREG32(mmDC_LUT_BLACK_OFFSET_RED + amdgpu_crtc->crtc_offset, 0);
-
- WREG32(mmDC_LUT_WHITE_OFFSET_BLUE + amdgpu_crtc->crtc_offset, 0xffff);
- WREG32(mmDC_LUT_WHITE_OFFSET_GREEN + amdgpu_crtc->crtc_offset, 0xffff);
- WREG32(mmDC_LUT_WHITE_OFFSET_RED + amdgpu_crtc->crtc_offset, 0xffff);
-
- WREG32(mmDC_LUT_RW_MODE + amdgpu_crtc->crtc_offset, 0);
- WREG32(mmDC_LUT_WRITE_EN_MASK + amdgpu_crtc->crtc_offset, 0x00000007);
-
- WREG32(mmDC_LUT_RW_INDEX + amdgpu_crtc->crtc_offset, 0);
- r = crtc->gamma_store;
- g = r + crtc->gamma_size;
- b = g + crtc->gamma_size;
- for (i = 0; i < 256; i++) {
- WREG32(mmDC_LUT_30_COLOR + amdgpu_crtc->crtc_offset,
- ((*r++ & 0xffc0) << 14) |
- ((*g++ & 0xffc0) << 4) |
- (*b++ >> 6));
- }
-
- tmp = RREG32(mmDEGAMMA_CONTROL + amdgpu_crtc->crtc_offset);
- tmp = REG_SET_FIELD(tmp, DEGAMMA_CONTROL, GRPH_DEGAMMA_MODE, 0);
- tmp = REG_SET_FIELD(tmp, DEGAMMA_CONTROL, CURSOR_DEGAMMA_MODE, 0);
- tmp = REG_SET_FIELD(tmp, DEGAMMA_CONTROL, CURSOR2_DEGAMMA_MODE, 0);
- WREG32(mmDEGAMMA_CONTROL + amdgpu_crtc->crtc_offset, tmp);
-
- tmp = RREG32(mmGAMUT_REMAP_CONTROL + amdgpu_crtc->crtc_offset);
- tmp = REG_SET_FIELD(tmp, GAMUT_REMAP_CONTROL, GRPH_GAMUT_REMAP_MODE, 0);
- WREG32(mmGAMUT_REMAP_CONTROL + amdgpu_crtc->crtc_offset, tmp);
-
- tmp = RREG32(mmREGAMMA_CONTROL + amdgpu_crtc->crtc_offset);
- tmp = REG_SET_FIELD(tmp, REGAMMA_CONTROL, GRPH_REGAMMA_MODE, 0);
- WREG32(mmREGAMMA_CONTROL + amdgpu_crtc->crtc_offset, tmp);
-
- tmp = RREG32(mmOUTPUT_CSC_CONTROL + amdgpu_crtc->crtc_offset);
- tmp = REG_SET_FIELD(tmp, OUTPUT_CSC_CONTROL, OUTPUT_CSC_GRPH_MODE, 0);
- WREG32(mmOUTPUT_CSC_CONTROL + amdgpu_crtc->crtc_offset, tmp);
-
- /* XXX match this to the depth of the crtc fmt block, move to modeset? */
- WREG32(mmDENORM_CONTROL + amdgpu_crtc->crtc_offset, 0);
- /* XXX this only needs to be programmed once per crtc at startup,
- * not sure where the best place for it is
- */
- tmp = RREG32(mmALPHA_CONTROL + amdgpu_crtc->crtc_offset);
- tmp = REG_SET_FIELD(tmp, ALPHA_CONTROL, CURSOR_ALPHA_BLND_ENA, 1);
- WREG32(mmALPHA_CONTROL + amdgpu_crtc->crtc_offset, tmp);
-}
-
-static int dce_v11_0_pick_dig_encoder(struct drm_encoder *encoder)
-{
- struct amdgpu_encoder *amdgpu_encoder = to_amdgpu_encoder(encoder);
- struct amdgpu_encoder_atom_dig *dig = amdgpu_encoder->enc_priv;
-
- switch (amdgpu_encoder->encoder_id) {
- case ENCODER_OBJECT_ID_INTERNAL_UNIPHY:
- if (dig->linkb)
- return 1;
- else
- return 0;
- case ENCODER_OBJECT_ID_INTERNAL_UNIPHY1:
- if (dig->linkb)
- return 3;
- else
- return 2;
- case ENCODER_OBJECT_ID_INTERNAL_UNIPHY2:
- if (dig->linkb)
- return 5;
- else
- return 4;
- case ENCODER_OBJECT_ID_INTERNAL_UNIPHY3:
- return 6;
- default:
- DRM_ERROR("invalid encoder_id: 0x%x\n", amdgpu_encoder->encoder_id);
- return 0;
- }
-}
-
-/**
- * dce_v11_0_pick_pll - Allocate a PPLL for use by the crtc.
- *
- * @crtc: drm crtc
- *
- * Returns the PPLL (Pixel PLL) to be used by the crtc. For DP monitors
- * a single PPLL can be used for all DP crtcs/encoders. For non-DP
- * monitors a dedicated PPLL must be used. If a particular board has
- * an external DP PLL, return ATOM_PPLL_INVALID to skip PLL programming
- * as there is no need to program the PLL itself. If we are not able to
- * allocate a PLL, return ATOM_PPLL_INVALID to skip PLL programming to
- * avoid messing up an existing monitor.
- *
- * Asic specific PLL information
- *
- * DCE 10.x
- * Tonga
- * - PPLL1, PPLL2 are available for all UNIPHY (both DP and non-DP)
- * CI
- * - PPLL0, PPLL1, PPLL2 are available for all UNIPHY (both DP and non-DP) and DAC
- *
- */
-static u32 dce_v11_0_pick_pll(struct drm_crtc *crtc)
-{
- struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc);
- struct drm_device *dev = crtc->dev;
- struct amdgpu_device *adev = drm_to_adev(dev);
- u32 pll_in_use;
- int pll;
-
- if ((adev->asic_type == CHIP_POLARIS10) ||
- (adev->asic_type == CHIP_POLARIS11) ||
- (adev->asic_type == CHIP_POLARIS12) ||
- (adev->asic_type == CHIP_VEGAM)) {
- struct amdgpu_encoder *amdgpu_encoder =
- to_amdgpu_encoder(amdgpu_crtc->encoder);
- struct amdgpu_encoder_atom_dig *dig = amdgpu_encoder->enc_priv;
-
- if (ENCODER_MODE_IS_DP(amdgpu_atombios_encoder_get_encoder_mode(amdgpu_crtc->encoder)))
- return ATOM_DP_DTO;
-
- switch (amdgpu_encoder->encoder_id) {
- case ENCODER_OBJECT_ID_INTERNAL_UNIPHY:
- if (dig->linkb)
- return ATOM_COMBOPHY_PLL1;
- else
- return ATOM_COMBOPHY_PLL0;
- case ENCODER_OBJECT_ID_INTERNAL_UNIPHY1:
- if (dig->linkb)
- return ATOM_COMBOPHY_PLL3;
- else
- return ATOM_COMBOPHY_PLL2;
- case ENCODER_OBJECT_ID_INTERNAL_UNIPHY2:
- if (dig->linkb)
- return ATOM_COMBOPHY_PLL5;
- else
- return ATOM_COMBOPHY_PLL4;
- default:
- DRM_ERROR("invalid encoder_id: 0x%x\n", amdgpu_encoder->encoder_id);
- return ATOM_PPLL_INVALID;
- }
- }
-
- if (ENCODER_MODE_IS_DP(amdgpu_atombios_encoder_get_encoder_mode(amdgpu_crtc->encoder))) {
- if (adev->clock.dp_extclk)
- /* skip PPLL programming if using ext clock */
- return ATOM_PPLL_INVALID;
- else {
- /* use the same PPLL for all DP monitors */
- pll = amdgpu_pll_get_shared_dp_ppll(crtc);
- if (pll != ATOM_PPLL_INVALID)
- return pll;
- }
- } else {
- /* use the same PPLL for all monitors with the same clock */
- pll = amdgpu_pll_get_shared_nondp_ppll(crtc);
- if (pll != ATOM_PPLL_INVALID)
- return pll;
- }
-
- /* XXX need to determine what plls are available on each DCE11 part */
- pll_in_use = amdgpu_pll_get_use_mask(crtc);
- if (adev->flags & AMD_IS_APU) {
- if (!(pll_in_use & (1 << ATOM_PPLL1)))
- return ATOM_PPLL1;
- if (!(pll_in_use & (1 << ATOM_PPLL0)))
- return ATOM_PPLL0;
- DRM_ERROR("unable to allocate a PPLL\n");
- return ATOM_PPLL_INVALID;
- } else {
- if (!(pll_in_use & (1 << ATOM_PPLL2)))
- return ATOM_PPLL2;
- if (!(pll_in_use & (1 << ATOM_PPLL1)))
- return ATOM_PPLL1;
- if (!(pll_in_use & (1 << ATOM_PPLL0)))
- return ATOM_PPLL0;
- DRM_ERROR("unable to allocate a PPLL\n");
- return ATOM_PPLL_INVALID;
- }
- return ATOM_PPLL_INVALID;
-}
-
-static void dce_v11_0_lock_cursor(struct drm_crtc *crtc, bool lock)
-{
- struct amdgpu_device *adev = drm_to_adev(crtc->dev);
- struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc);
- uint32_t cur_lock;
-
- cur_lock = RREG32(mmCUR_UPDATE + amdgpu_crtc->crtc_offset);
- if (lock)
- cur_lock = REG_SET_FIELD(cur_lock, CUR_UPDATE, CURSOR_UPDATE_LOCK, 1);
- else
- cur_lock = REG_SET_FIELD(cur_lock, CUR_UPDATE, CURSOR_UPDATE_LOCK, 0);
- WREG32(mmCUR_UPDATE + amdgpu_crtc->crtc_offset, cur_lock);
-}
-
-static void dce_v11_0_hide_cursor(struct drm_crtc *crtc)
-{
- struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc);
- struct amdgpu_device *adev = drm_to_adev(crtc->dev);
- u32 tmp;
-
- tmp = RREG32(mmCUR_CONTROL + amdgpu_crtc->crtc_offset);
- tmp = REG_SET_FIELD(tmp, CUR_CONTROL, CURSOR_EN, 0);
- WREG32(mmCUR_CONTROL + amdgpu_crtc->crtc_offset, tmp);
-}
-
-static void dce_v11_0_show_cursor(struct drm_crtc *crtc)
-{
- struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc);
- struct amdgpu_device *adev = drm_to_adev(crtc->dev);
- u32 tmp;
-
- WREG32(mmCUR_SURFACE_ADDRESS_HIGH + amdgpu_crtc->crtc_offset,
- upper_32_bits(amdgpu_crtc->cursor_addr));
- WREG32(mmCUR_SURFACE_ADDRESS + amdgpu_crtc->crtc_offset,
- lower_32_bits(amdgpu_crtc->cursor_addr));
-
- tmp = RREG32(mmCUR_CONTROL + amdgpu_crtc->crtc_offset);
- tmp = REG_SET_FIELD(tmp, CUR_CONTROL, CURSOR_EN, 1);
- tmp = REG_SET_FIELD(tmp, CUR_CONTROL, CURSOR_MODE, 2);
- WREG32(mmCUR_CONTROL + amdgpu_crtc->crtc_offset, tmp);
-}
-
-static int dce_v11_0_cursor_move_locked(struct drm_crtc *crtc,
- int x, int y)
-{
- struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc);
- struct amdgpu_device *adev = drm_to_adev(crtc->dev);
- int xorigin = 0, yorigin = 0;
-
- amdgpu_crtc->cursor_x = x;
- amdgpu_crtc->cursor_y = y;
-
- /* avivo cursor are offset into the total surface */
- x += crtc->x;
- y += crtc->y;
- DRM_DEBUG("x %d y %d c->x %d c->y %d\n", x, y, crtc->x, crtc->y);
-
- if (x < 0) {
- xorigin = min(-x, amdgpu_crtc->max_cursor_width - 1);
- x = 0;
- }
- if (y < 0) {
- yorigin = min(-y, amdgpu_crtc->max_cursor_height - 1);
- y = 0;
- }
-
- WREG32(mmCUR_POSITION + amdgpu_crtc->crtc_offset, (x << 16) | y);
- WREG32(mmCUR_HOT_SPOT + amdgpu_crtc->crtc_offset, (xorigin << 16) | yorigin);
- WREG32(mmCUR_SIZE + amdgpu_crtc->crtc_offset,
- ((amdgpu_crtc->cursor_width - 1) << 16) | (amdgpu_crtc->cursor_height - 1));
-
- return 0;
-}
-
-static int dce_v11_0_crtc_cursor_move(struct drm_crtc *crtc,
- int x, int y)
-{
- int ret;
-
- dce_v11_0_lock_cursor(crtc, true);
- ret = dce_v11_0_cursor_move_locked(crtc, x, y);
- dce_v11_0_lock_cursor(crtc, false);
-
- return ret;
-}
-
-static int dce_v11_0_crtc_cursor_set2(struct drm_crtc *crtc,
- struct drm_file *file_priv,
- uint32_t handle,
- uint32_t width,
- uint32_t height,
- int32_t hot_x,
- int32_t hot_y)
-{
- struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc);
- struct drm_gem_object *obj;
- struct amdgpu_bo *aobj;
- int ret;
-
- if (!handle) {
- /* turn off cursor */
- dce_v11_0_hide_cursor(crtc);
- obj = NULL;
- goto unpin;
- }
-
- if ((width > amdgpu_crtc->max_cursor_width) ||
- (height > amdgpu_crtc->max_cursor_height)) {
- DRM_ERROR("bad cursor width or height %d x %d\n", width, height);
- return -EINVAL;
- }
-
- obj = drm_gem_object_lookup(file_priv, handle);
- if (!obj) {
- DRM_ERROR("Cannot find cursor object %x for crtc %d\n", handle, amdgpu_crtc->crtc_id);
- return -ENOENT;
- }
-
- aobj = gem_to_amdgpu_bo(obj);
- ret = amdgpu_bo_reserve(aobj, false);
- if (ret != 0) {
- drm_gem_object_put(obj);
- return ret;
- }
-
- ret = amdgpu_bo_pin(aobj, AMDGPU_GEM_DOMAIN_VRAM);
- amdgpu_bo_unreserve(aobj);
- if (ret) {
- DRM_ERROR("Failed to pin new cursor BO (%d)\n", ret);
- drm_gem_object_put(obj);
- return ret;
- }
- amdgpu_crtc->cursor_addr = amdgpu_bo_gpu_offset(aobj);
-
- dce_v11_0_lock_cursor(crtc, true);
-
- if (width != amdgpu_crtc->cursor_width ||
- height != amdgpu_crtc->cursor_height ||
- hot_x != amdgpu_crtc->cursor_hot_x ||
- hot_y != amdgpu_crtc->cursor_hot_y) {
- int x, y;
-
- x = amdgpu_crtc->cursor_x + amdgpu_crtc->cursor_hot_x - hot_x;
- y = amdgpu_crtc->cursor_y + amdgpu_crtc->cursor_hot_y - hot_y;
-
- dce_v11_0_cursor_move_locked(crtc, x, y);
-
- amdgpu_crtc->cursor_width = width;
- amdgpu_crtc->cursor_height = height;
- amdgpu_crtc->cursor_hot_x = hot_x;
- amdgpu_crtc->cursor_hot_y = hot_y;
- }
-
- dce_v11_0_show_cursor(crtc);
- dce_v11_0_lock_cursor(crtc, false);
-
-unpin:
- if (amdgpu_crtc->cursor_bo) {
- struct amdgpu_bo *aobj = gem_to_amdgpu_bo(amdgpu_crtc->cursor_bo);
- ret = amdgpu_bo_reserve(aobj, true);
- if (likely(ret == 0)) {
- amdgpu_bo_unpin(aobj);
- amdgpu_bo_unreserve(aobj);
- }
- drm_gem_object_put(amdgpu_crtc->cursor_bo);
- }
-
- amdgpu_crtc->cursor_bo = obj;
- return 0;
-}
-
-static void dce_v11_0_cursor_reset(struct drm_crtc *crtc)
-{
- struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc);
-
- if (amdgpu_crtc->cursor_bo) {
- dce_v11_0_lock_cursor(crtc, true);
-
- dce_v11_0_cursor_move_locked(crtc, amdgpu_crtc->cursor_x,
- amdgpu_crtc->cursor_y);
-
- dce_v11_0_show_cursor(crtc);
-
- dce_v11_0_lock_cursor(crtc, false);
- }
-}
-
-static int dce_v11_0_crtc_gamma_set(struct drm_crtc *crtc, u16 *red, u16 *green,
- u16 *blue, uint32_t size,
- struct drm_modeset_acquire_ctx *ctx)
-{
- dce_v11_0_crtc_load_lut(crtc);
-
- return 0;
-}
-
-static void dce_v11_0_crtc_destroy(struct drm_crtc *crtc)
-{
- struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc);
-
- drm_crtc_cleanup(crtc);
- kfree(amdgpu_crtc);
-}
-
-static const struct drm_crtc_funcs dce_v11_0_crtc_funcs = {
- .cursor_set2 = dce_v11_0_crtc_cursor_set2,
- .cursor_move = dce_v11_0_crtc_cursor_move,
- .gamma_set = dce_v11_0_crtc_gamma_set,
- .set_config = amdgpu_display_crtc_set_config,
- .destroy = dce_v11_0_crtc_destroy,
- .page_flip_target = amdgpu_display_crtc_page_flip_target,
- .get_vblank_counter = amdgpu_get_vblank_counter_kms,
- .enable_vblank = amdgpu_enable_vblank_kms,
- .disable_vblank = amdgpu_disable_vblank_kms,
- .get_vblank_timestamp = drm_crtc_vblank_helper_get_vblank_timestamp,
-};
-
-static void dce_v11_0_crtc_dpms(struct drm_crtc *crtc, int mode)
-{
- struct drm_device *dev = crtc->dev;
- struct amdgpu_device *adev = drm_to_adev(dev);
- struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc);
- unsigned type;
-
- switch (mode) {
- case DRM_MODE_DPMS_ON:
- amdgpu_crtc->enabled = true;
- amdgpu_atombios_crtc_enable(crtc, ATOM_ENABLE);
- dce_v11_0_vga_enable(crtc, true);
- amdgpu_atombios_crtc_blank(crtc, ATOM_DISABLE);
- dce_v11_0_vga_enable(crtc, false);
- /* Make sure VBLANK and PFLIP interrupts are still enabled */
- type = amdgpu_display_crtc_idx_to_irq_type(adev,
- amdgpu_crtc->crtc_id);
- amdgpu_irq_update(adev, &adev->crtc_irq, type);
- amdgpu_irq_update(adev, &adev->pageflip_irq, type);
- drm_crtc_vblank_on(crtc);
- dce_v11_0_crtc_load_lut(crtc);
- break;
- case DRM_MODE_DPMS_STANDBY:
- case DRM_MODE_DPMS_SUSPEND:
- case DRM_MODE_DPMS_OFF:
- drm_crtc_vblank_off(crtc);
- if (amdgpu_crtc->enabled) {
- dce_v11_0_vga_enable(crtc, true);
- amdgpu_atombios_crtc_blank(crtc, ATOM_ENABLE);
- dce_v11_0_vga_enable(crtc, false);
- }
- amdgpu_atombios_crtc_enable(crtc, ATOM_DISABLE);
- amdgpu_crtc->enabled = false;
- break;
- }
- /* adjust pm to dpms */
- amdgpu_pm_compute_clocks(adev);
-}
-
-static void dce_v11_0_crtc_prepare(struct drm_crtc *crtc)
-{
- /* disable crtc pair power gating before programming */
- amdgpu_atombios_crtc_powergate(crtc, ATOM_DISABLE);
- amdgpu_atombios_crtc_lock(crtc, ATOM_ENABLE);
- dce_v11_0_crtc_dpms(crtc, DRM_MODE_DPMS_OFF);
-}
-
-static void dce_v11_0_crtc_commit(struct drm_crtc *crtc)
-{
- dce_v11_0_crtc_dpms(crtc, DRM_MODE_DPMS_ON);
- amdgpu_atombios_crtc_lock(crtc, ATOM_DISABLE);
-}
-
-static void dce_v11_0_crtc_disable(struct drm_crtc *crtc)
-{
- struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc);
- struct drm_device *dev = crtc->dev;
- struct amdgpu_device *adev = drm_to_adev(dev);
- struct amdgpu_atom_ss ss;
- int i;
-
- dce_v11_0_crtc_dpms(crtc, DRM_MODE_DPMS_OFF);
- if (crtc->primary->fb) {
- int r;
- struct amdgpu_bo *abo;
-
- abo = gem_to_amdgpu_bo(crtc->primary->fb->obj[0]);
- r = amdgpu_bo_reserve(abo, true);
- if (unlikely(r))
- DRM_ERROR("failed to reserve abo before unpin\n");
- else {
- amdgpu_bo_unpin(abo);
- amdgpu_bo_unreserve(abo);
- }
- }
- /* disable the GRPH */
- dce_v11_0_grph_enable(crtc, false);
-
- amdgpu_atombios_crtc_powergate(crtc, ATOM_ENABLE);
-
- for (i = 0; i < adev->mode_info.num_crtc; i++) {
- if (adev->mode_info.crtcs[i] &&
- adev->mode_info.crtcs[i]->enabled &&
- i != amdgpu_crtc->crtc_id &&
- amdgpu_crtc->pll_id == adev->mode_info.crtcs[i]->pll_id) {
- /* one other crtc is using this pll don't turn
- * off the pll
- */
- goto done;
- }
- }
-
- switch (amdgpu_crtc->pll_id) {
- case ATOM_PPLL0:
- case ATOM_PPLL1:
- case ATOM_PPLL2:
- /* disable the ppll */
- amdgpu_atombios_crtc_program_pll(crtc, amdgpu_crtc->crtc_id, amdgpu_crtc->pll_id,
- 0, 0, ATOM_DISABLE, 0, 0, 0, 0, 0, false, &ss);
- break;
- case ATOM_COMBOPHY_PLL0:
- case ATOM_COMBOPHY_PLL1:
- case ATOM_COMBOPHY_PLL2:
- case ATOM_COMBOPHY_PLL3:
- case ATOM_COMBOPHY_PLL4:
- case ATOM_COMBOPHY_PLL5:
- /* disable the ppll */
- amdgpu_atombios_crtc_program_pll(crtc, ATOM_CRTC_INVALID, amdgpu_crtc->pll_id,
- 0, 0, ATOM_DISABLE, 0, 0, 0, 0, 0, false, &ss);
- break;
- default:
- break;
- }
-done:
- amdgpu_crtc->pll_id = ATOM_PPLL_INVALID;
- amdgpu_crtc->adjusted_clock = 0;
- amdgpu_crtc->encoder = NULL;
- amdgpu_crtc->connector = NULL;
-}
-
-static int dce_v11_0_crtc_mode_set(struct drm_crtc *crtc,
- struct drm_display_mode *mode,
- struct drm_display_mode *adjusted_mode,
- int x, int y, struct drm_framebuffer *old_fb)
-{
- struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc);
- struct drm_device *dev = crtc->dev;
- struct amdgpu_device *adev = drm_to_adev(dev);
-
- if (!amdgpu_crtc->adjusted_clock)
- return -EINVAL;
-
- if ((adev->asic_type == CHIP_POLARIS10) ||
- (adev->asic_type == CHIP_POLARIS11) ||
- (adev->asic_type == CHIP_POLARIS12) ||
- (adev->asic_type == CHIP_VEGAM)) {
- struct amdgpu_encoder *amdgpu_encoder =
- to_amdgpu_encoder(amdgpu_crtc->encoder);
- int encoder_mode =
- amdgpu_atombios_encoder_get_encoder_mode(amdgpu_crtc->encoder);
-
- /* SetPixelClock calculates the plls and ss values now */
- amdgpu_atombios_crtc_program_pll(crtc, amdgpu_crtc->crtc_id,
- amdgpu_crtc->pll_id,
- encoder_mode, amdgpu_encoder->encoder_id,
- adjusted_mode->clock, 0, 0, 0, 0,
- amdgpu_crtc->bpc, amdgpu_crtc->ss_enabled, &amdgpu_crtc->ss);
- } else {
- amdgpu_atombios_crtc_set_pll(crtc, adjusted_mode);
- }
- amdgpu_atombios_crtc_set_dtd_timing(crtc, adjusted_mode);
- dce_v11_0_crtc_do_set_base(crtc, old_fb, x, y, 0);
- amdgpu_atombios_crtc_overscan_setup(crtc, mode, adjusted_mode);
- amdgpu_atombios_crtc_scaler_setup(crtc);
- dce_v11_0_cursor_reset(crtc);
- /* update the hw version fpr dpm */
- amdgpu_crtc->hw_mode = *adjusted_mode;
-
- return 0;
-}
-
-static bool dce_v11_0_crtc_mode_fixup(struct drm_crtc *crtc,
- const struct drm_display_mode *mode,
- struct drm_display_mode *adjusted_mode)
-{
- struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc);
- struct drm_device *dev = crtc->dev;
- struct drm_encoder *encoder;
-
- /* assign the encoder to the amdgpu crtc to avoid repeated lookups later */
- list_for_each_entry(encoder, &dev->mode_config.encoder_list, head) {
- if (encoder->crtc == crtc) {
- amdgpu_crtc->encoder = encoder;
- amdgpu_crtc->connector = amdgpu_get_connector_for_encoder(encoder);
- break;
- }
- }
- if ((amdgpu_crtc->encoder == NULL) || (amdgpu_crtc->connector == NULL)) {
- amdgpu_crtc->encoder = NULL;
- amdgpu_crtc->connector = NULL;
- return false;
- }
- if (!amdgpu_display_crtc_scaling_mode_fixup(crtc, mode, adjusted_mode))
- return false;
- if (amdgpu_atombios_crtc_prepare_pll(crtc, adjusted_mode))
- return false;
- /* pick pll */
- amdgpu_crtc->pll_id = dce_v11_0_pick_pll(crtc);
- /* if we can't get a PPLL for a non-DP encoder, fail */
- if ((amdgpu_crtc->pll_id == ATOM_PPLL_INVALID) &&
- !ENCODER_MODE_IS_DP(amdgpu_atombios_encoder_get_encoder_mode(amdgpu_crtc->encoder)))
- return false;
-
- return true;
-}
-
-static int dce_v11_0_crtc_set_base(struct drm_crtc *crtc, int x, int y,
- struct drm_framebuffer *old_fb)
-{
- return dce_v11_0_crtc_do_set_base(crtc, old_fb, x, y, 0);
-}
-
-static int dce_v11_0_crtc_set_base_atomic(struct drm_crtc *crtc,
- struct drm_framebuffer *fb,
- int x, int y, enum mode_set_atomic state)
-{
- return dce_v11_0_crtc_do_set_base(crtc, fb, x, y, 1);
-}
-
-static const struct drm_crtc_helper_funcs dce_v11_0_crtc_helper_funcs = {
- .dpms = dce_v11_0_crtc_dpms,
- .mode_fixup = dce_v11_0_crtc_mode_fixup,
- .mode_set = dce_v11_0_crtc_mode_set,
- .mode_set_base = dce_v11_0_crtc_set_base,
- .mode_set_base_atomic = dce_v11_0_crtc_set_base_atomic,
- .prepare = dce_v11_0_crtc_prepare,
- .commit = dce_v11_0_crtc_commit,
- .disable = dce_v11_0_crtc_disable,
- .get_scanout_position = amdgpu_crtc_get_scanout_position,
-};
-
-static int dce_v11_0_crtc_init(struct amdgpu_device *adev, int index)
-{
- struct amdgpu_crtc *amdgpu_crtc;
-
- amdgpu_crtc = kzalloc(sizeof(struct amdgpu_crtc) +
- (AMDGPUFB_CONN_LIMIT * sizeof(struct drm_connector *)), GFP_KERNEL);
- if (amdgpu_crtc == NULL)
- return -ENOMEM;
-
- drm_crtc_init(adev_to_drm(adev), &amdgpu_crtc->base, &dce_v11_0_crtc_funcs);
-
- drm_mode_crtc_set_gamma_size(&amdgpu_crtc->base, 256);
- amdgpu_crtc->crtc_id = index;
- adev->mode_info.crtcs[index] = amdgpu_crtc;
-
- amdgpu_crtc->max_cursor_width = 128;
- amdgpu_crtc->max_cursor_height = 128;
- adev_to_drm(adev)->mode_config.cursor_width = amdgpu_crtc->max_cursor_width;
- adev_to_drm(adev)->mode_config.cursor_height = amdgpu_crtc->max_cursor_height;
-
- switch (amdgpu_crtc->crtc_id) {
- case 0:
- default:
- amdgpu_crtc->crtc_offset = CRTC0_REGISTER_OFFSET;
- break;
- case 1:
- amdgpu_crtc->crtc_offset = CRTC1_REGISTER_OFFSET;
- break;
- case 2:
- amdgpu_crtc->crtc_offset = CRTC2_REGISTER_OFFSET;
- break;
- case 3:
- amdgpu_crtc->crtc_offset = CRTC3_REGISTER_OFFSET;
- break;
- case 4:
- amdgpu_crtc->crtc_offset = CRTC4_REGISTER_OFFSET;
- break;
- case 5:
- amdgpu_crtc->crtc_offset = CRTC5_REGISTER_OFFSET;
- break;
- }
-
- amdgpu_crtc->pll_id = ATOM_PPLL_INVALID;
- amdgpu_crtc->adjusted_clock = 0;
- amdgpu_crtc->encoder = NULL;
- amdgpu_crtc->connector = NULL;
- drm_crtc_helper_add(&amdgpu_crtc->base, &dce_v11_0_crtc_helper_funcs);
-
- return 0;
-}
-
-static int dce_v11_0_early_init(void *handle)
-{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
-
- adev->audio_endpt_rreg = &dce_v11_0_audio_endpt_rreg;
- adev->audio_endpt_wreg = &dce_v11_0_audio_endpt_wreg;
-
- dce_v11_0_set_display_funcs(adev);
-
- adev->mode_info.num_crtc = dce_v11_0_get_num_crtc(adev);
-
- switch (adev->asic_type) {
- case CHIP_CARRIZO:
- adev->mode_info.num_hpd = 6;
- adev->mode_info.num_dig = 9;
- break;
- case CHIP_STONEY:
- adev->mode_info.num_hpd = 6;
- adev->mode_info.num_dig = 9;
- break;
- case CHIP_POLARIS10:
- case CHIP_VEGAM:
- adev->mode_info.num_hpd = 6;
- adev->mode_info.num_dig = 6;
- break;
- case CHIP_POLARIS11:
- case CHIP_POLARIS12:
- adev->mode_info.num_hpd = 5;
- adev->mode_info.num_dig = 5;
- break;
- default:
- /* FIXME: not supported yet */
- return -EINVAL;
- }
-
- dce_v11_0_set_irq_funcs(adev);
-
- return 0;
-}
-
-static int dce_v11_0_sw_init(void *handle)
-{
- int r, i;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
-
- for (i = 0; i < adev->mode_info.num_crtc; i++) {
- r = amdgpu_irq_add_id(adev, AMDGPU_IRQ_CLIENTID_LEGACY, i + 1, &adev->crtc_irq);
- if (r)
- return r;
- }
-
- for (i = VISLANDS30_IV_SRCID_D1_GRPH_PFLIP; i < 20; i += 2) {
- r = amdgpu_irq_add_id(adev, AMDGPU_IRQ_CLIENTID_LEGACY, i, &adev->pageflip_irq);
- if (r)
- return r;
- }
-
- /* HPD hotplug */
- r = amdgpu_irq_add_id(adev, AMDGPU_IRQ_CLIENTID_LEGACY, VISLANDS30_IV_SRCID_HOTPLUG_DETECT_A, &adev->hpd_irq);
- if (r)
- return r;
-
- adev_to_drm(adev)->mode_config.funcs = &amdgpu_mode_funcs;
-
- adev_to_drm(adev)->mode_config.async_page_flip = true;
-
- adev_to_drm(adev)->mode_config.max_width = 16384;
- adev_to_drm(adev)->mode_config.max_height = 16384;
-
- adev_to_drm(adev)->mode_config.preferred_depth = 24;
- adev_to_drm(adev)->mode_config.prefer_shadow = 1;
-
- adev_to_drm(adev)->mode_config.fb_base = adev->gmc.aper_base;
-
- r = amdgpu_display_modeset_create_props(adev);
- if (r)
- return r;
-
- adev_to_drm(adev)->mode_config.max_width = 16384;
- adev_to_drm(adev)->mode_config.max_height = 16384;
-
-
- /* allocate crtcs */
- for (i = 0; i < adev->mode_info.num_crtc; i++) {
- r = dce_v11_0_crtc_init(adev, i);
- if (r)
- return r;
- }
-
- if (amdgpu_atombios_get_connector_info_from_object_table(adev))
- amdgpu_display_print_display_setup(adev_to_drm(adev));
- else
- return -EINVAL;
-
- /* setup afmt */
- r = dce_v11_0_afmt_init(adev);
- if (r)
- return r;
-
- r = dce_v11_0_audio_init(adev);
- if (r)
- return r;
-
- drm_kms_helper_poll_init(adev_to_drm(adev));
-
- adev->mode_info.mode_config_initialized = true;
- return 0;
-}
-
-static int dce_v11_0_sw_fini(void *handle)
-{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
-
- kfree(adev->mode_info.bios_hardcoded_edid);
-
- drm_kms_helper_poll_fini(adev_to_drm(adev));
-
- dce_v11_0_audio_fini(adev);
-
- dce_v11_0_afmt_fini(adev);
-
- drm_mode_config_cleanup(adev_to_drm(adev));
- adev->mode_info.mode_config_initialized = false;
-
- return 0;
-}
-
-static int dce_v11_0_hw_init(void *handle)
-{
- int i;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
-
- dce_v11_0_init_golden_registers(adev);
-
- /* disable vga render */
- dce_v11_0_set_vga_render_state(adev, false);
- /* init dig PHYs, disp eng pll */
- amdgpu_atombios_crtc_powergate_init(adev);
- amdgpu_atombios_encoder_init_dig(adev);
- if ((adev->asic_type == CHIP_POLARIS10) ||
- (adev->asic_type == CHIP_POLARIS11) ||
- (adev->asic_type == CHIP_POLARIS12) ||
- (adev->asic_type == CHIP_VEGAM)) {
- amdgpu_atombios_crtc_set_dce_clock(adev, adev->clock.default_dispclk,
- DCE_CLOCK_TYPE_DISPCLK, ATOM_GCK_DFS);
- amdgpu_atombios_crtc_set_dce_clock(adev, 0,
- DCE_CLOCK_TYPE_DPREFCLK, ATOM_GCK_DFS);
- } else {
- amdgpu_atombios_crtc_set_disp_eng_pll(adev, adev->clock.default_dispclk);
- }
-
- /* initialize hpd */
- dce_v11_0_hpd_init(adev);
-
- for (i = 0; i < adev->mode_info.audio.num_pins; i++) {
- dce_v11_0_audio_enable(adev, &adev->mode_info.audio.pin[i], false);
- }
-
- dce_v11_0_pageflip_interrupt_init(adev);
-
- return 0;
-}
-
-static int dce_v11_0_hw_fini(void *handle)
-{
- int i;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
-
- dce_v11_0_hpd_fini(adev);
-
- for (i = 0; i < adev->mode_info.audio.num_pins; i++) {
- dce_v11_0_audio_enable(adev, &adev->mode_info.audio.pin[i], false);
- }
-
- dce_v11_0_pageflip_interrupt_fini(adev);
-
- return 0;
-}
-
-static int dce_v11_0_suspend(void *handle)
-{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
-
- adev->mode_info.bl_level =
- amdgpu_atombios_encoder_get_backlight_level_from_reg(adev);
-
- return dce_v11_0_hw_fini(handle);
-}
-
-static int dce_v11_0_resume(void *handle)
-{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
- int ret;
-
- amdgpu_atombios_encoder_set_backlight_level_to_reg(adev,
- adev->mode_info.bl_level);
-
- ret = dce_v11_0_hw_init(handle);
-
- /* turn on the BL */
- if (adev->mode_info.bl_encoder) {
- u8 bl_level = amdgpu_display_backlight_get_level(adev,
- adev->mode_info.bl_encoder);
- amdgpu_display_backlight_set_level(adev, adev->mode_info.bl_encoder,
- bl_level);
- }
-
- return ret;
-}
-
-static bool dce_v11_0_is_idle(void *handle)
-{
- return true;
-}
-
-static int dce_v11_0_wait_for_idle(void *handle)
-{
- return 0;
-}
-
-static int dce_v11_0_soft_reset(void *handle)
-{
- u32 srbm_soft_reset = 0, tmp;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
-
- if (dce_v11_0_is_display_hung(adev))
- srbm_soft_reset |= SRBM_SOFT_RESET__SOFT_RESET_DC_MASK;
-
- if (srbm_soft_reset) {
- tmp = RREG32(mmSRBM_SOFT_RESET);
- tmp |= srbm_soft_reset;
- dev_info(adev->dev, "SRBM_SOFT_RESET=0x%08X\n", tmp);
- WREG32(mmSRBM_SOFT_RESET, tmp);
- tmp = RREG32(mmSRBM_SOFT_RESET);
-
- udelay(50);
-
- tmp &= ~srbm_soft_reset;
- WREG32(mmSRBM_SOFT_RESET, tmp);
- tmp = RREG32(mmSRBM_SOFT_RESET);
-
- /* Wait a little for things to settle down */
- udelay(50);
- }
- return 0;
-}
-
-static void dce_v11_0_set_crtc_vblank_interrupt_state(struct amdgpu_device *adev,
- int crtc,
- enum amdgpu_interrupt_state state)
-{
- u32 lb_interrupt_mask;
-
- if (crtc >= adev->mode_info.num_crtc) {
- DRM_DEBUG("invalid crtc %d\n", crtc);
- return;
- }
-
- switch (state) {
- case AMDGPU_IRQ_STATE_DISABLE:
- lb_interrupt_mask = RREG32(mmLB_INTERRUPT_MASK + crtc_offsets[crtc]);
- lb_interrupt_mask = REG_SET_FIELD(lb_interrupt_mask, LB_INTERRUPT_MASK,
- VBLANK_INTERRUPT_MASK, 0);
- WREG32(mmLB_INTERRUPT_MASK + crtc_offsets[crtc], lb_interrupt_mask);
- break;
- case AMDGPU_IRQ_STATE_ENABLE:
- lb_interrupt_mask = RREG32(mmLB_INTERRUPT_MASK + crtc_offsets[crtc]);
- lb_interrupt_mask = REG_SET_FIELD(lb_interrupt_mask, LB_INTERRUPT_MASK,
- VBLANK_INTERRUPT_MASK, 1);
- WREG32(mmLB_INTERRUPT_MASK + crtc_offsets[crtc], lb_interrupt_mask);
- break;
- default:
- break;
- }
-}
-
-static void dce_v11_0_set_crtc_vline_interrupt_state(struct amdgpu_device *adev,
- int crtc,
- enum amdgpu_interrupt_state state)
-{
- u32 lb_interrupt_mask;
-
- if (crtc >= adev->mode_info.num_crtc) {
- DRM_DEBUG("invalid crtc %d\n", crtc);
- return;
- }
-
- switch (state) {
- case AMDGPU_IRQ_STATE_DISABLE:
- lb_interrupt_mask = RREG32(mmLB_INTERRUPT_MASK + crtc_offsets[crtc]);
- lb_interrupt_mask = REG_SET_FIELD(lb_interrupt_mask, LB_INTERRUPT_MASK,
- VLINE_INTERRUPT_MASK, 0);
- WREG32(mmLB_INTERRUPT_MASK + crtc_offsets[crtc], lb_interrupt_mask);
- break;
- case AMDGPU_IRQ_STATE_ENABLE:
- lb_interrupt_mask = RREG32(mmLB_INTERRUPT_MASK + crtc_offsets[crtc]);
- lb_interrupt_mask = REG_SET_FIELD(lb_interrupt_mask, LB_INTERRUPT_MASK,
- VLINE_INTERRUPT_MASK, 1);
- WREG32(mmLB_INTERRUPT_MASK + crtc_offsets[crtc], lb_interrupt_mask);
- break;
- default:
- break;
- }
-}
-
-static int dce_v11_0_set_hpd_irq_state(struct amdgpu_device *adev,
- struct amdgpu_irq_src *source,
- unsigned hpd,
- enum amdgpu_interrupt_state state)
-{
- u32 tmp;
-
- if (hpd >= adev->mode_info.num_hpd) {
- DRM_DEBUG("invalid hdp %d\n", hpd);
- return 0;
- }
-
- switch (state) {
- case AMDGPU_IRQ_STATE_DISABLE:
- tmp = RREG32(mmDC_HPD_INT_CONTROL + hpd_offsets[hpd]);
- tmp = REG_SET_FIELD(tmp, DC_HPD_INT_CONTROL, DC_HPD_INT_EN, 0);
- WREG32(mmDC_HPD_INT_CONTROL + hpd_offsets[hpd], tmp);
- break;
- case AMDGPU_IRQ_STATE_ENABLE:
- tmp = RREG32(mmDC_HPD_INT_CONTROL + hpd_offsets[hpd]);
- tmp = REG_SET_FIELD(tmp, DC_HPD_INT_CONTROL, DC_HPD_INT_EN, 1);
- WREG32(mmDC_HPD_INT_CONTROL + hpd_offsets[hpd], tmp);
- break;
- default:
- break;
- }
-
- return 0;
-}
-
-static int dce_v11_0_set_crtc_irq_state(struct amdgpu_device *adev,
- struct amdgpu_irq_src *source,
- unsigned type,
- enum amdgpu_interrupt_state state)
-{
- switch (type) {
- case AMDGPU_CRTC_IRQ_VBLANK1:
- dce_v11_0_set_crtc_vblank_interrupt_state(adev, 0, state);
- break;
- case AMDGPU_CRTC_IRQ_VBLANK2:
- dce_v11_0_set_crtc_vblank_interrupt_state(adev, 1, state);
- break;
- case AMDGPU_CRTC_IRQ_VBLANK3:
- dce_v11_0_set_crtc_vblank_interrupt_state(adev, 2, state);
- break;
- case AMDGPU_CRTC_IRQ_VBLANK4:
- dce_v11_0_set_crtc_vblank_interrupt_state(adev, 3, state);
- break;
- case AMDGPU_CRTC_IRQ_VBLANK5:
- dce_v11_0_set_crtc_vblank_interrupt_state(adev, 4, state);
- break;
- case AMDGPU_CRTC_IRQ_VBLANK6:
- dce_v11_0_set_crtc_vblank_interrupt_state(adev, 5, state);
- break;
- case AMDGPU_CRTC_IRQ_VLINE1:
- dce_v11_0_set_crtc_vline_interrupt_state(adev, 0, state);
- break;
- case AMDGPU_CRTC_IRQ_VLINE2:
- dce_v11_0_set_crtc_vline_interrupt_state(adev, 1, state);
- break;
- case AMDGPU_CRTC_IRQ_VLINE3:
- dce_v11_0_set_crtc_vline_interrupt_state(adev, 2, state);
- break;
- case AMDGPU_CRTC_IRQ_VLINE4:
- dce_v11_0_set_crtc_vline_interrupt_state(adev, 3, state);
- break;
- case AMDGPU_CRTC_IRQ_VLINE5:
- dce_v11_0_set_crtc_vline_interrupt_state(adev, 4, state);
- break;
- case AMDGPU_CRTC_IRQ_VLINE6:
- dce_v11_0_set_crtc_vline_interrupt_state(adev, 5, state);
- break;
- default:
- break;
- }
- return 0;
-}
-
-static int dce_v11_0_set_pageflip_irq_state(struct amdgpu_device *adev,
- struct amdgpu_irq_src *src,
- unsigned type,
- enum amdgpu_interrupt_state state)
-{
- u32 reg;
-
- if (type >= adev->mode_info.num_crtc) {
- DRM_ERROR("invalid pageflip crtc %d\n", type);
- return -EINVAL;
- }
-
- reg = RREG32(mmGRPH_INTERRUPT_CONTROL + crtc_offsets[type]);
- if (state == AMDGPU_IRQ_STATE_DISABLE)
- WREG32(mmGRPH_INTERRUPT_CONTROL + crtc_offsets[type],
- reg & ~GRPH_INTERRUPT_CONTROL__GRPH_PFLIP_INT_MASK_MASK);
- else
- WREG32(mmGRPH_INTERRUPT_CONTROL + crtc_offsets[type],
- reg | GRPH_INTERRUPT_CONTROL__GRPH_PFLIP_INT_MASK_MASK);
-
- return 0;
-}
-
-static int dce_v11_0_pageflip_irq(struct amdgpu_device *adev,
- struct amdgpu_irq_src *source,
- struct amdgpu_iv_entry *entry)
-{
- unsigned long flags;
- unsigned crtc_id;
- struct amdgpu_crtc *amdgpu_crtc;
- struct amdgpu_flip_work *works;
-
- crtc_id = (entry->src_id - 8) >> 1;
- amdgpu_crtc = adev->mode_info.crtcs[crtc_id];
-
- if (crtc_id >= adev->mode_info.num_crtc) {
- DRM_ERROR("invalid pageflip crtc %d\n", crtc_id);
- return -EINVAL;
- }
-
- if (RREG32(mmGRPH_INTERRUPT_STATUS + crtc_offsets[crtc_id]) &
- GRPH_INTERRUPT_STATUS__GRPH_PFLIP_INT_OCCURRED_MASK)
- WREG32(mmGRPH_INTERRUPT_STATUS + crtc_offsets[crtc_id],
- GRPH_INTERRUPT_STATUS__GRPH_PFLIP_INT_CLEAR_MASK);
-
- /* IRQ could occur when in initial stage */
- if(amdgpu_crtc == NULL)
- return 0;
-
- spin_lock_irqsave(&adev_to_drm(adev)->event_lock, flags);
- works = amdgpu_crtc->pflip_works;
- if (amdgpu_crtc->pflip_status != AMDGPU_FLIP_SUBMITTED){
- DRM_DEBUG_DRIVER("amdgpu_crtc->pflip_status = %d != "
- "AMDGPU_FLIP_SUBMITTED(%d)\n",
- amdgpu_crtc->pflip_status,
- AMDGPU_FLIP_SUBMITTED);
- spin_unlock_irqrestore(&adev_to_drm(adev)->event_lock, flags);
- return 0;
- }
-
- /* page flip completed. clean up */
- amdgpu_crtc->pflip_status = AMDGPU_FLIP_NONE;
- amdgpu_crtc->pflip_works = NULL;
-
- /* wakeup usersapce */
- if(works->event)
- drm_crtc_send_vblank_event(&amdgpu_crtc->base, works->event);
-
- spin_unlock_irqrestore(&adev_to_drm(adev)->event_lock, flags);
-
- drm_crtc_vblank_put(&amdgpu_crtc->base);
- schedule_work(&works->unpin_work);
-
- return 0;
-}
-
-static void dce_v11_0_hpd_int_ack(struct amdgpu_device *adev,
- int hpd)
-{
- u32 tmp;
-
- if (hpd >= adev->mode_info.num_hpd) {
- DRM_DEBUG("invalid hdp %d\n", hpd);
- return;
- }
-
- tmp = RREG32(mmDC_HPD_INT_CONTROL + hpd_offsets[hpd]);
- tmp = REG_SET_FIELD(tmp, DC_HPD_INT_CONTROL, DC_HPD_INT_ACK, 1);
- WREG32(mmDC_HPD_INT_CONTROL + hpd_offsets[hpd], tmp);
-}
-
-static void dce_v11_0_crtc_vblank_int_ack(struct amdgpu_device *adev,
- int crtc)
-{
- u32 tmp;
-
- if (crtc < 0 || crtc >= adev->mode_info.num_crtc) {
- DRM_DEBUG("invalid crtc %d\n", crtc);
- return;
- }
-
- tmp = RREG32(mmLB_VBLANK_STATUS + crtc_offsets[crtc]);
- tmp = REG_SET_FIELD(tmp, LB_VBLANK_STATUS, VBLANK_ACK, 1);
- WREG32(mmLB_VBLANK_STATUS + crtc_offsets[crtc], tmp);
-}
-
-static void dce_v11_0_crtc_vline_int_ack(struct amdgpu_device *adev,
- int crtc)
-{
- u32 tmp;
-
- if (crtc < 0 || crtc >= adev->mode_info.num_crtc) {
- DRM_DEBUG("invalid crtc %d\n", crtc);
- return;
- }
-
- tmp = RREG32(mmLB_VLINE_STATUS + crtc_offsets[crtc]);
- tmp = REG_SET_FIELD(tmp, LB_VLINE_STATUS, VLINE_ACK, 1);
- WREG32(mmLB_VLINE_STATUS + crtc_offsets[crtc], tmp);
-}
-
-static int dce_v11_0_crtc_irq(struct amdgpu_device *adev,
- struct amdgpu_irq_src *source,
- struct amdgpu_iv_entry *entry)
-{
- unsigned crtc = entry->src_id - 1;
- uint32_t disp_int = RREG32(interrupt_status_offsets[crtc].reg);
- unsigned int irq_type = amdgpu_display_crtc_idx_to_irq_type(adev,
- crtc);
-
- switch (entry->src_data[0]) {
- case 0: /* vblank */
- if (disp_int & interrupt_status_offsets[crtc].vblank)
- dce_v11_0_crtc_vblank_int_ack(adev, crtc);
- else
- DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
-
- if (amdgpu_irq_enabled(adev, source, irq_type)) {
- drm_handle_vblank(adev_to_drm(adev), crtc);
- }
- DRM_DEBUG("IH: D%d vblank\n", crtc + 1);
-
- break;
- case 1: /* vline */
- if (disp_int & interrupt_status_offsets[crtc].vline)
- dce_v11_0_crtc_vline_int_ack(adev, crtc);
- else
- DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
-
- DRM_DEBUG("IH: D%d vline\n", crtc + 1);
-
- break;
- default:
- DRM_DEBUG("Unhandled interrupt: %d %d\n", entry->src_id, entry->src_data[0]);
- break;
- }
-
- return 0;
-}
-
-static int dce_v11_0_hpd_irq(struct amdgpu_device *adev,
- struct amdgpu_irq_src *source,
- struct amdgpu_iv_entry *entry)
-{
- uint32_t disp_int, mask;
- unsigned hpd;
-
- if (entry->src_data[0] >= adev->mode_info.num_hpd) {
- DRM_DEBUG("Unhandled interrupt: %d %d\n", entry->src_id, entry->src_data[0]);
- return 0;
- }
-
- hpd = entry->src_data[0];
- disp_int = RREG32(interrupt_status_offsets[hpd].reg);
- mask = interrupt_status_offsets[hpd].hpd;
-
- if (disp_int & mask) {
- dce_v11_0_hpd_int_ack(adev, hpd);
- schedule_work(&adev->hotplug_work);
- DRM_DEBUG("IH: HPD%d\n", hpd + 1);
- }
-
- return 0;
-}
-
-static int dce_v11_0_set_clockgating_state(void *handle,
- enum amd_clockgating_state state)
-{
- return 0;
-}
-
-static int dce_v11_0_set_powergating_state(void *handle,
- enum amd_powergating_state state)
-{
- return 0;
-}
-
-static const struct amd_ip_funcs dce_v11_0_ip_funcs = {
- .name = "dce_v11_0",
- .early_init = dce_v11_0_early_init,
- .late_init = NULL,
- .sw_init = dce_v11_0_sw_init,
- .sw_fini = dce_v11_0_sw_fini,
- .hw_init = dce_v11_0_hw_init,
- .hw_fini = dce_v11_0_hw_fini,
- .suspend = dce_v11_0_suspend,
- .resume = dce_v11_0_resume,
- .is_idle = dce_v11_0_is_idle,
- .wait_for_idle = dce_v11_0_wait_for_idle,
- .soft_reset = dce_v11_0_soft_reset,
- .set_clockgating_state = dce_v11_0_set_clockgating_state,
- .set_powergating_state = dce_v11_0_set_powergating_state,
-};
-
-static void
-dce_v11_0_encoder_mode_set(struct drm_encoder *encoder,
- struct drm_display_mode *mode,
- struct drm_display_mode *adjusted_mode)
-{
- struct amdgpu_encoder *amdgpu_encoder = to_amdgpu_encoder(encoder);
-
- amdgpu_encoder->pixel_clock = adjusted_mode->clock;
-
- /* need to call this here rather than in prepare() since we need some crtc info */
- amdgpu_atombios_encoder_dpms(encoder, DRM_MODE_DPMS_OFF);
-
- /* set scaler clears this on some chips */
- dce_v11_0_set_interleave(encoder->crtc, mode);
-
- if (amdgpu_atombios_encoder_get_encoder_mode(encoder) == ATOM_ENCODER_MODE_HDMI) {
- dce_v11_0_afmt_enable(encoder, true);
- dce_v11_0_afmt_setmode(encoder, adjusted_mode);
- }
-}
-
-static void dce_v11_0_encoder_prepare(struct drm_encoder *encoder)
-{
- struct amdgpu_device *adev = drm_to_adev(encoder->dev);
- struct amdgpu_encoder *amdgpu_encoder = to_amdgpu_encoder(encoder);
- struct drm_connector *connector = amdgpu_get_connector_for_encoder(encoder);
-
- if ((amdgpu_encoder->active_device &
- (ATOM_DEVICE_DFP_SUPPORT | ATOM_DEVICE_LCD_SUPPORT)) ||
- (amdgpu_encoder_get_dp_bridge_encoder_id(encoder) !=
- ENCODER_OBJECT_ID_NONE)) {
- struct amdgpu_encoder_atom_dig *dig = amdgpu_encoder->enc_priv;
- if (dig) {
- dig->dig_encoder = dce_v11_0_pick_dig_encoder(encoder);
- if (amdgpu_encoder->active_device & ATOM_DEVICE_DFP_SUPPORT)
- dig->afmt = adev->mode_info.afmt[dig->dig_encoder];
- }
- }
-
- amdgpu_atombios_scratch_regs_lock(adev, true);
-
- if (connector) {
- struct amdgpu_connector *amdgpu_connector = to_amdgpu_connector(connector);
-
- /* select the clock/data port if it uses a router */
- if (amdgpu_connector->router.cd_valid)
- amdgpu_i2c_router_select_cd_port(amdgpu_connector);
-
- /* turn eDP panel on for mode set */
- if (connector->connector_type == DRM_MODE_CONNECTOR_eDP)
- amdgpu_atombios_encoder_set_edp_panel_power(connector,
- ATOM_TRANSMITTER_ACTION_POWER_ON);
- }
-
- /* this is needed for the pll/ss setup to work correctly in some cases */
- amdgpu_atombios_encoder_set_crtc_source(encoder);
- /* set up the FMT blocks */
- dce_v11_0_program_fmt(encoder);
-}
-
-static void dce_v11_0_encoder_commit(struct drm_encoder *encoder)
-{
- struct drm_device *dev = encoder->dev;
- struct amdgpu_device *adev = drm_to_adev(dev);
-
- /* need to call this here as we need the crtc set up */
- amdgpu_atombios_encoder_dpms(encoder, DRM_MODE_DPMS_ON);
- amdgpu_atombios_scratch_regs_lock(adev, false);
-}
-
-static void dce_v11_0_encoder_disable(struct drm_encoder *encoder)
-{
- struct amdgpu_encoder *amdgpu_encoder = to_amdgpu_encoder(encoder);
- struct amdgpu_encoder_atom_dig *dig;
-
- amdgpu_atombios_encoder_dpms(encoder, DRM_MODE_DPMS_OFF);
-
- if (amdgpu_atombios_encoder_is_digital(encoder)) {
- if (amdgpu_atombios_encoder_get_encoder_mode(encoder) == ATOM_ENCODER_MODE_HDMI)
- dce_v11_0_afmt_enable(encoder, false);
- dig = amdgpu_encoder->enc_priv;
- dig->dig_encoder = -1;
- }
- amdgpu_encoder->active_device = 0;
-}
-
-/* these are handled by the primary encoders */
-static void dce_v11_0_ext_prepare(struct drm_encoder *encoder)
-{
-
-}
-
-static void dce_v11_0_ext_commit(struct drm_encoder *encoder)
-{
-
-}
-
-static void
-dce_v11_0_ext_mode_set(struct drm_encoder *encoder,
- struct drm_display_mode *mode,
- struct drm_display_mode *adjusted_mode)
-{
-
-}
-
-static void dce_v11_0_ext_disable(struct drm_encoder *encoder)
-{
-
-}
-
-static void
-dce_v11_0_ext_dpms(struct drm_encoder *encoder, int mode)
-{
-
-}
-
-static const struct drm_encoder_helper_funcs dce_v11_0_ext_helper_funcs = {
- .dpms = dce_v11_0_ext_dpms,
- .prepare = dce_v11_0_ext_prepare,
- .mode_set = dce_v11_0_ext_mode_set,
- .commit = dce_v11_0_ext_commit,
- .disable = dce_v11_0_ext_disable,
- /* no detect for TMDS/LVDS yet */
-};
-
-static const struct drm_encoder_helper_funcs dce_v11_0_dig_helper_funcs = {
- .dpms = amdgpu_atombios_encoder_dpms,
- .mode_fixup = amdgpu_atombios_encoder_mode_fixup,
- .prepare = dce_v11_0_encoder_prepare,
- .mode_set = dce_v11_0_encoder_mode_set,
- .commit = dce_v11_0_encoder_commit,
- .disable = dce_v11_0_encoder_disable,
- .detect = amdgpu_atombios_encoder_dig_detect,
-};
-
-static const struct drm_encoder_helper_funcs dce_v11_0_dac_helper_funcs = {
- .dpms = amdgpu_atombios_encoder_dpms,
- .mode_fixup = amdgpu_atombios_encoder_mode_fixup,
- .prepare = dce_v11_0_encoder_prepare,
- .mode_set = dce_v11_0_encoder_mode_set,
- .commit = dce_v11_0_encoder_commit,
- .detect = amdgpu_atombios_encoder_dac_detect,
-};
-
-static void dce_v11_0_encoder_destroy(struct drm_encoder *encoder)
-{
- struct amdgpu_encoder *amdgpu_encoder = to_amdgpu_encoder(encoder);
- if (amdgpu_encoder->devices & (ATOM_DEVICE_LCD_SUPPORT))
- amdgpu_atombios_encoder_fini_backlight(amdgpu_encoder);
- kfree(amdgpu_encoder->enc_priv);
- drm_encoder_cleanup(encoder);
- kfree(amdgpu_encoder);
-}
-
-static const struct drm_encoder_funcs dce_v11_0_encoder_funcs = {
- .destroy = dce_v11_0_encoder_destroy,
-};
-
-static void dce_v11_0_encoder_add(struct amdgpu_device *adev,
- uint32_t encoder_enum,
- uint32_t supported_device,
- u16 caps)
-{
- struct drm_device *dev = adev_to_drm(adev);
- struct drm_encoder *encoder;
- struct amdgpu_encoder *amdgpu_encoder;
-
- /* see if we already added it */
- list_for_each_entry(encoder, &dev->mode_config.encoder_list, head) {
- amdgpu_encoder = to_amdgpu_encoder(encoder);
- if (amdgpu_encoder->encoder_enum == encoder_enum) {
- amdgpu_encoder->devices |= supported_device;
- return;
- }
-
- }
-
- /* add a new one */
- amdgpu_encoder = kzalloc(sizeof(struct amdgpu_encoder), GFP_KERNEL);
- if (!amdgpu_encoder)
- return;
-
- encoder = &amdgpu_encoder->base;
- switch (adev->mode_info.num_crtc) {
- case 1:
- encoder->possible_crtcs = 0x1;
- break;
- case 2:
- default:
- encoder->possible_crtcs = 0x3;
- break;
- case 3:
- encoder->possible_crtcs = 0x7;
- break;
- case 4:
- encoder->possible_crtcs = 0xf;
- break;
- case 5:
- encoder->possible_crtcs = 0x1f;
- break;
- case 6:
- encoder->possible_crtcs = 0x3f;
- break;
- }
-
- amdgpu_encoder->enc_priv = NULL;
-
- amdgpu_encoder->encoder_enum = encoder_enum;
- amdgpu_encoder->encoder_id = (encoder_enum & OBJECT_ID_MASK) >> OBJECT_ID_SHIFT;
- amdgpu_encoder->devices = supported_device;
- amdgpu_encoder->rmx_type = RMX_OFF;
- amdgpu_encoder->underscan_type = UNDERSCAN_OFF;
- amdgpu_encoder->is_ext_encoder = false;
- amdgpu_encoder->caps = caps;
-
- switch (amdgpu_encoder->encoder_id) {
- case ENCODER_OBJECT_ID_INTERNAL_KLDSCP_DAC1:
- case ENCODER_OBJECT_ID_INTERNAL_KLDSCP_DAC2:
- drm_encoder_init(dev, encoder, &dce_v11_0_encoder_funcs,
- DRM_MODE_ENCODER_DAC, NULL);
- drm_encoder_helper_add(encoder, &dce_v11_0_dac_helper_funcs);
- break;
- case ENCODER_OBJECT_ID_INTERNAL_KLDSCP_DVO1:
- case ENCODER_OBJECT_ID_INTERNAL_UNIPHY:
- case ENCODER_OBJECT_ID_INTERNAL_UNIPHY1:
- case ENCODER_OBJECT_ID_INTERNAL_UNIPHY2:
- case ENCODER_OBJECT_ID_INTERNAL_UNIPHY3:
- if (amdgpu_encoder->devices & (ATOM_DEVICE_LCD_SUPPORT)) {
- amdgpu_encoder->rmx_type = RMX_FULL;
- drm_encoder_init(dev, encoder, &dce_v11_0_encoder_funcs,
- DRM_MODE_ENCODER_LVDS, NULL);
- amdgpu_encoder->enc_priv = amdgpu_atombios_encoder_get_lcd_info(amdgpu_encoder);
- } else if (amdgpu_encoder->devices & (ATOM_DEVICE_CRT_SUPPORT)) {
- drm_encoder_init(dev, encoder, &dce_v11_0_encoder_funcs,
- DRM_MODE_ENCODER_DAC, NULL);
- amdgpu_encoder->enc_priv = amdgpu_atombios_encoder_get_dig_info(amdgpu_encoder);
- } else {
- drm_encoder_init(dev, encoder, &dce_v11_0_encoder_funcs,
- DRM_MODE_ENCODER_TMDS, NULL);
- amdgpu_encoder->enc_priv = amdgpu_atombios_encoder_get_dig_info(amdgpu_encoder);
- }
- drm_encoder_helper_add(encoder, &dce_v11_0_dig_helper_funcs);
- break;
- case ENCODER_OBJECT_ID_SI170B:
- case ENCODER_OBJECT_ID_CH7303:
- case ENCODER_OBJECT_ID_EXTERNAL_SDVOA:
- case ENCODER_OBJECT_ID_EXTERNAL_SDVOB:
- case ENCODER_OBJECT_ID_TITFP513:
- case ENCODER_OBJECT_ID_VT1623:
- case ENCODER_OBJECT_ID_HDMI_SI1930:
- case ENCODER_OBJECT_ID_TRAVIS:
- case ENCODER_OBJECT_ID_NUTMEG:
- /* these are handled by the primary encoders */
- amdgpu_encoder->is_ext_encoder = true;
- if (amdgpu_encoder->devices & (ATOM_DEVICE_LCD_SUPPORT))
- drm_encoder_init(dev, encoder, &dce_v11_0_encoder_funcs,
- DRM_MODE_ENCODER_LVDS, NULL);
- else if (amdgpu_encoder->devices & (ATOM_DEVICE_CRT_SUPPORT))
- drm_encoder_init(dev, encoder, &dce_v11_0_encoder_funcs,
- DRM_MODE_ENCODER_DAC, NULL);
- else
- drm_encoder_init(dev, encoder, &dce_v11_0_encoder_funcs,
- DRM_MODE_ENCODER_TMDS, NULL);
- drm_encoder_helper_add(encoder, &dce_v11_0_ext_helper_funcs);
- break;
- }
-}
-
-static const struct amdgpu_display_funcs dce_v11_0_display_funcs = {
- .bandwidth_update = &dce_v11_0_bandwidth_update,
- .vblank_get_counter = &dce_v11_0_vblank_get_counter,
- .backlight_set_level = &amdgpu_atombios_encoder_set_backlight_level,
- .backlight_get_level = &amdgpu_atombios_encoder_get_backlight_level,
- .hpd_sense = &dce_v11_0_hpd_sense,
- .hpd_set_polarity = &dce_v11_0_hpd_set_polarity,
- .hpd_get_gpio_reg = &dce_v11_0_hpd_get_gpio_reg,
- .page_flip = &dce_v11_0_page_flip,
- .page_flip_get_scanoutpos = &dce_v11_0_crtc_get_scanoutpos,
- .add_encoder = &dce_v11_0_encoder_add,
- .add_connector = &amdgpu_connector_add,
-};
-
-static void dce_v11_0_set_display_funcs(struct amdgpu_device *adev)
-{
- adev->mode_info.funcs = &dce_v11_0_display_funcs;
-}
-
-static const struct amdgpu_irq_src_funcs dce_v11_0_crtc_irq_funcs = {
- .set = dce_v11_0_set_crtc_irq_state,
- .process = dce_v11_0_crtc_irq,
-};
-
-static const struct amdgpu_irq_src_funcs dce_v11_0_pageflip_irq_funcs = {
- .set = dce_v11_0_set_pageflip_irq_state,
- .process = dce_v11_0_pageflip_irq,
-};
-
-static const struct amdgpu_irq_src_funcs dce_v11_0_hpd_irq_funcs = {
- .set = dce_v11_0_set_hpd_irq_state,
- .process = dce_v11_0_hpd_irq,
-};
-
-static void dce_v11_0_set_irq_funcs(struct amdgpu_device *adev)
-{
- if (adev->mode_info.num_crtc > 0)
- adev->crtc_irq.num_types = AMDGPU_CRTC_IRQ_VLINE1 + adev->mode_info.num_crtc;
- else
- adev->crtc_irq.num_types = 0;
- adev->crtc_irq.funcs = &dce_v11_0_crtc_irq_funcs;
-
- adev->pageflip_irq.num_types = adev->mode_info.num_crtc;
- adev->pageflip_irq.funcs = &dce_v11_0_pageflip_irq_funcs;
-
- adev->hpd_irq.num_types = adev->mode_info.num_hpd;
- adev->hpd_irq.funcs = &dce_v11_0_hpd_irq_funcs;
-}
-
-const struct amdgpu_ip_block_version dce_v11_0_ip_block =
-{
- .type = AMD_IP_BLOCK_TYPE_DCE,
- .major = 11,
- .minor = 0,
- .rev = 0,
- .funcs = &dce_v11_0_ip_funcs,
-};
-
-const struct amdgpu_ip_block_version dce_v11_2_ip_block =
-{
- .type = AMD_IP_BLOCK_TYPE_DCE,
- .major = 11,
- .minor = 2,
- .rev = 0,
- .funcs = &dce_v11_0_ip_funcs,
-};
diff --git a/drivers/gpu/drm/amd/amdgpu/dce_v11_0.h b/drivers/gpu/drm/amd/amdgpu/dce_v11_0.h
deleted file mode 100644
index 0d878ca3acba..000000000000
--- a/drivers/gpu/drm/amd/amdgpu/dce_v11_0.h
+++ /dev/null
@@ -1,32 +0,0 @@
-/*
- * Copyright 2014 Advanced Micro Devices, Inc.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in
- * all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
- * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
- * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
- * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
- * OTHER DEALINGS IN THE SOFTWARE.
- *
- */
-
-#ifndef __DCE_V11_0_H__
-#define __DCE_V11_0_H__
-
-extern const struct amdgpu_ip_block_version dce_v11_0_ip_block;
-extern const struct amdgpu_ip_block_version dce_v11_2_ip_block;
-
-void dce_v11_0_disable_dce(struct amdgpu_device *adev);
-
-#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/dce_v6_0.c b/drivers/gpu/drm/amd/amdgpu/dce_v6_0.c
index 83a88385b762..acc887a58518 100644
--- a/drivers/gpu/drm/amd/amdgpu/dce_v6_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/dce_v6_0.c
@@ -23,7 +23,10 @@
#include <linux/pci.h>
+#include <drm/drm_edid.h>
#include <drm/drm_fourcc.h>
+#include <drm/drm_modeset_helper.h>
+#include <drm/drm_modeset_helper_vtables.h>
#include <drm/drm_vblank.h>
#include "amdgpu.h"
@@ -37,18 +40,25 @@
#include "amdgpu_connectors.h"
#include "amdgpu_display.h"
+#include "dce_v6_0.h"
+#include "sid.h"
+
#include "bif/bif_3_0_d.h"
#include "bif/bif_3_0_sh_mask.h"
+
#include "oss/oss_1_0_d.h"
#include "oss/oss_1_0_sh_mask.h"
+
#include "gca/gfx_6_0_d.h"
#include "gca/gfx_6_0_sh_mask.h"
+#include "gca/gfx_7_2_enum.h"
+
#include "gmc/gmc_6_0_d.h"
#include "gmc/gmc_6_0_sh_mask.h"
+
#include "dce/dce_6_0_d.h"
#include "dce/dce_6_0_sh_mask.h"
-#include "gca/gfx_7_2_enum.h"
-#include "dce_v6_0.h"
+
#include "si_enums.h"
static void dce_v6_0_set_display_funcs(struct amdgpu_device *adev);
@@ -56,31 +66,31 @@ static void dce_v6_0_set_irq_funcs(struct amdgpu_device *adev);
static const u32 crtc_offsets[6] =
{
- SI_CRTC0_REGISTER_OFFSET,
- SI_CRTC1_REGISTER_OFFSET,
- SI_CRTC2_REGISTER_OFFSET,
- SI_CRTC3_REGISTER_OFFSET,
- SI_CRTC4_REGISTER_OFFSET,
- SI_CRTC5_REGISTER_OFFSET
+ CRTC0_REGISTER_OFFSET,
+ CRTC1_REGISTER_OFFSET,
+ CRTC2_REGISTER_OFFSET,
+ CRTC3_REGISTER_OFFSET,
+ CRTC4_REGISTER_OFFSET,
+ CRTC5_REGISTER_OFFSET
};
static const u32 hpd_offsets[] =
{
- mmDC_HPD1_INT_STATUS - mmDC_HPD1_INT_STATUS,
- mmDC_HPD2_INT_STATUS - mmDC_HPD1_INT_STATUS,
- mmDC_HPD3_INT_STATUS - mmDC_HPD1_INT_STATUS,
- mmDC_HPD4_INT_STATUS - mmDC_HPD1_INT_STATUS,
- mmDC_HPD5_INT_STATUS - mmDC_HPD1_INT_STATUS,
- mmDC_HPD6_INT_STATUS - mmDC_HPD1_INT_STATUS,
+ HPD0_REGISTER_OFFSET,
+ HPD1_REGISTER_OFFSET,
+ HPD2_REGISTER_OFFSET,
+ HPD3_REGISTER_OFFSET,
+ HPD4_REGISTER_OFFSET,
+ HPD5_REGISTER_OFFSET
};
static const uint32_t dig_offsets[] = {
- SI_CRTC0_REGISTER_OFFSET,
- SI_CRTC1_REGISTER_OFFSET,
- SI_CRTC2_REGISTER_OFFSET,
- SI_CRTC3_REGISTER_OFFSET,
- SI_CRTC4_REGISTER_OFFSET,
- SI_CRTC5_REGISTER_OFFSET,
+ CRTC0_REGISTER_OFFSET,
+ CRTC1_REGISTER_OFFSET,
+ CRTC2_REGISTER_OFFSET,
+ CRTC3_REGISTER_OFFSET,
+ CRTC4_REGISTER_OFFSET,
+ CRTC5_REGISTER_OFFSET,
(0x13830 - 0x7030) >> 2,
};
@@ -203,9 +213,9 @@ static void dce_v6_0_page_flip(struct amdgpu_device *adev,
/* update the scanout addresses */
WREG32(mmGRPH_PRIMARY_SURFACE_ADDRESS_HIGH + amdgpu_crtc->crtc_offset,
upper_32_bits(crtc_base));
+ /* writing to the low address triggers the update */
WREG32(mmGRPH_PRIMARY_SURFACE_ADDRESS + amdgpu_crtc->crtc_offset,
(u32)crtc_base);
-
/* post the write */
RREG32(mmGRPH_PRIMARY_SURFACE_ADDRESS + amdgpu_crtc->crtc_offset);
}
@@ -215,11 +225,11 @@ static int dce_v6_0_crtc_get_scanoutpos(struct amdgpu_device *adev, int crtc,
{
if ((crtc < 0) || (crtc >= adev->mode_info.num_crtc))
return -EINVAL;
+
*vbl = RREG32(mmCRTC_V_BLANK_START_END + crtc_offsets[crtc]);
*position = RREG32(mmCRTC_STATUS_POSITION + crtc_offsets[crtc]);
return 0;
-
}
/**
@@ -239,7 +249,8 @@ static bool dce_v6_0_hpd_sense(struct amdgpu_device *adev,
if (hpd >= adev->mode_info.num_hpd)
return connected;
- if (RREG32(mmDC_HPD1_INT_STATUS + hpd_offsets[hpd]) & DC_HPD1_INT_STATUS__DC_HPD1_SENSE_MASK)
+ if (RREG32(mmDC_HPD1_INT_STATUS + hpd_offsets[hpd]) &
+ DC_HPD1_INT_STATUS__DC_HPD1_SENSE_MASK)
connected = true;
return connected;
@@ -270,6 +281,21 @@ static void dce_v6_0_hpd_set_polarity(struct amdgpu_device *adev,
WREG32(mmDC_HPD1_INT_CONTROL + hpd_offsets[hpd], tmp);
}
+static void dce_v6_0_hpd_int_ack(struct amdgpu_device *adev,
+ int hpd)
+{
+ u32 tmp;
+
+ if (hpd >= adev->mode_info.num_hpd) {
+ DRM_DEBUG("invalid hpd %d\n", hpd);
+ return;
+ }
+
+ tmp = RREG32(mmDC_HPD1_INT_CONTROL + hpd_offsets[hpd]);
+ tmp |= DC_HPD1_INT_CONTROL__DC_HPD1_INT_ACK_MASK;
+ WREG32(mmDC_HPD1_INT_CONTROL + hpd_offsets[hpd], tmp);
+}
+
/**
* dce_v6_0_hpd_init - hpd setup callback.
*
@@ -309,6 +335,7 @@ static void dce_v6_0_hpd_init(struct amdgpu_device *adev)
continue;
}
+ dce_v6_0_hpd_int_ack(adev, amdgpu_connector->hpd.hpd);
dce_v6_0_hpd_set_polarity(adev, amdgpu_connector->hpd.hpd);
amdgpu_irq_get(adev, &adev->hpd_irq, amdgpu_connector->hpd.hpd);
}
@@ -339,7 +366,7 @@ static void dce_v6_0_hpd_fini(struct amdgpu_device *adev)
tmp = RREG32(mmDC_HPD1_CONTROL + hpd_offsets[amdgpu_connector->hpd.hpd]);
tmp &= ~DC_HPD1_CONTROL__DC_HPD1_EN_MASK;
- WREG32(mmDC_HPD1_CONTROL + hpd_offsets[amdgpu_connector->hpd.hpd], 0);
+ WREG32(mmDC_HPD1_CONTROL + hpd_offsets[amdgpu_connector->hpd.hpd], tmp);
amdgpu_irq_put(adev, &adev->hpd_irq, amdgpu_connector->hpd.hpd);
}
@@ -351,13 +378,41 @@ static u32 dce_v6_0_hpd_get_gpio_reg(struct amdgpu_device *adev)
return mmDC_GPIO_HPD_A;
}
+static bool dce_v6_0_is_display_hung(struct amdgpu_device *adev)
+{
+ u32 crtc_hung = 0;
+ u32 crtc_status[6];
+ u32 i, j, tmp;
+
+ for (i = 0; i < adev->mode_info.num_crtc; i++) {
+ if (RREG32(mmCRTC_CONTROL + crtc_offsets[i]) & CRTC_CONTROL__CRTC_MASTER_EN_MASK) {
+ crtc_status[i] = RREG32(mmCRTC_STATUS_HV_COUNT + crtc_offsets[i]);
+ crtc_hung |= (1 << i);
+ }
+ }
+
+ for (j = 0; j < 10; j++) {
+ for (i = 0; i < adev->mode_info.num_crtc; i++) {
+ if (crtc_hung & (1 << i)) {
+ tmp = RREG32(mmCRTC_STATUS_HV_COUNT + crtc_offsets[i]);
+ if (tmp != crtc_status[i])
+ crtc_hung &= ~(1 << i);
+ }
+ }
+ if (crtc_hung == 0)
+ return false;
+ udelay(100);
+ }
+
+ return true;
+}
+
static void dce_v6_0_set_vga_render_state(struct amdgpu_device *adev,
bool render)
{
if (!render)
WREG32(mmVGA_RENDER_CONTROL,
- RREG32(mmVGA_RENDER_CONTROL) & VGA_VSTATUS_CNTL);
-
+ RREG32(mmVGA_RENDER_CONTROL) & ~VGA_RENDER_CONTROL__VGA_VSTATUS_CNTL_MASK);
}
static int dce_v6_0_get_num_crtc(struct amdgpu_device *adev)
@@ -400,7 +455,6 @@ void dce_v6_0_disable_dce(struct amdgpu_device *adev)
static void dce_v6_0_program_fmt(struct drm_encoder *encoder)
{
-
struct drm_device *dev = encoder->dev;
struct amdgpu_device *adev = drm_to_adev(dev);
struct amdgpu_encoder *amdgpu_encoder = to_amdgpu_encoder(encoder);
@@ -456,7 +510,7 @@ static void dce_v6_0_program_fmt(struct drm_encoder *encoder)
}
/**
- * cik_get_number_of_dram_channels - get the number of dram channels
+ * si_get_number_of_dram_channels - get the number of dram channels
*
* @adev: amdgpu_device pointer
*
@@ -843,7 +897,7 @@ static void dce_v6_0_program_watermarks(struct amdgpu_device *adev,
(u32)mode->clock);
line_time = (u32) div_u64((u64)mode->crtc_htotal * 1000000,
(u32)mode->clock);
- line_time = min(line_time, (u32)65535);
+ line_time = min_t(u32, line_time, 65535);
priority_a_cnt = 0;
priority_b_cnt = 0;
@@ -876,8 +930,8 @@ static void dce_v6_0_program_watermarks(struct amdgpu_device *adev,
wm_high.dram_channels = dram_channels;
wm_high.num_heads = num_heads;
- if (adev->pm.dpm_enabled) {
/* watermark for low clocks */
+ if (adev->pm.dpm_enabled) {
wm_low.yclk =
amdgpu_dpm_get_mclk(adev, true) * 10;
wm_low.sclk =
@@ -904,9 +958,9 @@ static void dce_v6_0_program_watermarks(struct amdgpu_device *adev,
wm_low.num_heads = num_heads;
/* set for high clocks */
- latency_watermark_a = min(dce_v6_0_latency_watermark(&wm_high), (u32)65535);
+ latency_watermark_a = min_t(u32, dce_v6_0_latency_watermark(&wm_high), 65535);
/* set for low clocks */
- latency_watermark_b = min(dce_v6_0_latency_watermark(&wm_low), (u32)65535);
+ latency_watermark_b = min_t(u32, dce_v6_0_latency_watermark(&wm_low), 65535);
/* possibly force display priority to high */
/* should really do this at mode validation time... */
@@ -957,16 +1011,16 @@ static void dce_v6_0_program_watermarks(struct amdgpu_device *adev,
/* select wm A */
arb_control3 = RREG32(mmDPG_PIPE_ARBITRATION_CONTROL3 + amdgpu_crtc->crtc_offset);
tmp = arb_control3;
- tmp &= ~LATENCY_WATERMARK_MASK(3);
- tmp |= LATENCY_WATERMARK_MASK(1);
+ tmp &= ~(3 << DPG_PIPE_ARBITRATION_CONTROL3__URGENCY_WATERMARK_MASK__SHIFT);
+ tmp |= (1 << DPG_PIPE_ARBITRATION_CONTROL3__URGENCY_WATERMARK_MASK__SHIFT);
WREG32(mmDPG_PIPE_ARBITRATION_CONTROL3 + amdgpu_crtc->crtc_offset, tmp);
WREG32(mmDPG_PIPE_URGENCY_CONTROL + amdgpu_crtc->crtc_offset,
((latency_watermark_a << DPG_PIPE_URGENCY_CONTROL__URGENCY_LOW_WATERMARK__SHIFT) |
(line_time << DPG_PIPE_URGENCY_CONTROL__URGENCY_HIGH_WATERMARK__SHIFT)));
/* select wm B */
tmp = RREG32(mmDPG_PIPE_ARBITRATION_CONTROL3 + amdgpu_crtc->crtc_offset);
- tmp &= ~LATENCY_WATERMARK_MASK(3);
- tmp |= LATENCY_WATERMARK_MASK(2);
+ tmp &= ~(3 << DPG_PIPE_ARBITRATION_CONTROL3__URGENCY_WATERMARK_MASK__SHIFT);
+ tmp |= (2 << DPG_PIPE_ARBITRATION_CONTROL3__URGENCY_WATERMARK_MASK__SHIFT);
WREG32(mmDPG_PIPE_ARBITRATION_CONTROL3 + amdgpu_crtc->crtc_offset, tmp);
WREG32(mmDPG_PIPE_URGENCY_CONTROL + amdgpu_crtc->crtc_offset,
((latency_watermark_b << DPG_PIPE_URGENCY_CONTROL__URGENCY_LOW_WATERMARK__SHIFT) |
@@ -980,13 +1034,26 @@ static void dce_v6_0_program_watermarks(struct amdgpu_device *adev,
/* save values for DPM */
amdgpu_crtc->line_time = line_time;
- amdgpu_crtc->wm_high = latency_watermark_a;
/* Save number of lines the linebuffer leads before the scanout */
amdgpu_crtc->lb_vblank_lead_lines = lb_vblank_lead_lines;
}
/* watermark setup */
+/**
+ * dce_v6_0_line_buffer_adjust - Set up the line buffer
+ *
+ * @adev: amdgpu_device pointer
+ * @amdgpu_crtc: the selected display controller
+ * @mode: the current display mode on the selected display
+ * controller
+ * @other_mode: the display mode of another display controller
+ * that may be sharing the line buffer
+ *
+ * Setup up the line buffer allocation for
+ * the selected display controller (CIK).
+ * Returns the line buffer size in pixels.
+ */
static u32 dce_v6_0_line_buffer_adjust(struct amdgpu_device *adev,
struct amdgpu_crtc *amdgpu_crtc,
struct drm_display_mode *mode,
@@ -1021,7 +1088,7 @@ static u32 dce_v6_0_line_buffer_adjust(struct amdgpu_device *adev,
}
WREG32(mmDC_LB_MEMORY_SPLIT + amdgpu_crtc->crtc_offset,
- DC_LB_MEMORY_CONFIG(tmp));
+ (tmp << DC_LB_MEMORY_SPLIT__DC_LB_MEMORY_CONFIG__SHIFT));
WREG32(mmPIPE0_DMIF_BUFFER_CONTROL + pipe_offset,
(buffer_alloc << PIPE0_DMIF_BUFFER_CONTROL__DMIF_BUFFERS_ALLOCATED__SHIFT));
@@ -1198,7 +1265,7 @@ static void dce_v6_0_audio_write_speaker_allocation(struct drm_encoder *encoder)
return;
}
- sad_count = drm_edid_to_speaker_allocation(amdgpu_connector_edid(connector), &sadb);
+ sad_count = drm_edid_to_speaker_allocation(amdgpu_connector->edid, &sadb);
if (sad_count < 0) {
DRM_ERROR("Couldn't read Speaker Allocation Data Block: %d\n", sad_count);
sad_count = 0;
@@ -1238,6 +1305,7 @@ static void dce_v6_0_audio_write_sad_regs(struct drm_encoder *encoder)
struct amdgpu_device *adev = drm_to_adev(dev);
struct amdgpu_encoder *amdgpu_encoder = to_amdgpu_encoder(encoder);
struct amdgpu_encoder_atom_dig *dig = amdgpu_encoder->enc_priv;
+ u32 offset;
struct drm_connector *connector;
struct drm_connector_list_iter iter;
struct amdgpu_connector *amdgpu_connector = NULL;
@@ -1259,6 +1327,11 @@ static void dce_v6_0_audio_write_sad_regs(struct drm_encoder *encoder)
{ ixAZALIA_F0_CODEC_PIN_CONTROL_AUDIO_DESCRIPTOR13, HDMI_AUDIO_CODING_TYPE_WMA_PRO },
};
+ if (!dig || !dig->afmt || !dig->afmt->pin)
+ return;
+
+ offset = dig->afmt->pin->offset;
+
drm_connector_list_iter_begin(dev, &iter);
drm_for_each_connector_iter(connector, &iter) {
if (connector->encoder == encoder) {
@@ -1273,14 +1346,14 @@ static void dce_v6_0_audio_write_sad_regs(struct drm_encoder *encoder)
return;
}
- sad_count = drm_edid_to_sad(amdgpu_connector_edid(connector), &sads);
+ sad_count = drm_edid_to_sad(amdgpu_connector->edid, &sads);
if (sad_count < 0)
DRM_ERROR("Couldn't read SADs: %d\n", sad_count);
if (sad_count <= 0)
return;
for (i = 0; i < ARRAY_SIZE(eld_reg_to_type); i++) {
- u32 tmp = 0;
+ u32 value = 0;
u8 stereo_freqs = 0;
int max_channels = -1;
int j;
@@ -1290,12 +1363,12 @@ static void dce_v6_0_audio_write_sad_regs(struct drm_encoder *encoder)
if (sad->format == eld_reg_to_type[i][1]) {
if (sad->channels > max_channels) {
- tmp = REG_SET_FIELD(tmp, AZALIA_F0_CODEC_PIN_CONTROL_AUDIO_DESCRIPTOR0,
- MAX_CHANNELS, sad->channels);
- tmp = REG_SET_FIELD(tmp, AZALIA_F0_CODEC_PIN_CONTROL_AUDIO_DESCRIPTOR0,
- DESCRIPTOR_BYTE_2, sad->byte2);
- tmp = REG_SET_FIELD(tmp, AZALIA_F0_CODEC_PIN_CONTROL_AUDIO_DESCRIPTOR0,
- SUPPORTED_FREQUENCIES, sad->freq);
+ value = (sad->channels <<
+ AZALIA_F0_CODEC_PIN_CONTROL_AUDIO_DESCRIPTOR0__MAX_CHANNELS__SHIFT) |
+ (sad->byte2 <<
+ AZALIA_F0_CODEC_PIN_CONTROL_AUDIO_DESCRIPTOR0__DESCRIPTOR_BYTE_2__SHIFT) |
+ (sad->freq <<
+ AZALIA_F0_CODEC_PIN_CONTROL_AUDIO_DESCRIPTOR0__SUPPORTED_FREQUENCIES__SHIFT);
max_channels = sad->channels;
}
@@ -1306,13 +1379,13 @@ static void dce_v6_0_audio_write_sad_regs(struct drm_encoder *encoder)
}
}
- tmp = REG_SET_FIELD(tmp, AZALIA_F0_CODEC_PIN_CONTROL_AUDIO_DESCRIPTOR0,
- SUPPORTED_FREQUENCIES_STEREO, stereo_freqs);
- WREG32_AUDIO_ENDPT(dig->afmt->pin->offset, eld_reg_to_type[i][0], tmp);
+ value |= (stereo_freqs <<
+ AZALIA_F0_CODEC_PIN_CONTROL_AUDIO_DESCRIPTOR0__SUPPORTED_FREQUENCIES_STEREO__SHIFT);
+
+ WREG32_AUDIO_ENDPT(offset, eld_reg_to_type[i][0], value);
}
kfree(sads);
-
}
static void dce_v6_0_audio_enable(struct amdgpu_device *adev,
@@ -1328,13 +1401,13 @@ static void dce_v6_0_audio_enable(struct amdgpu_device *adev,
static const u32 pin_offsets[7] =
{
- (0x1780 - 0x1780),
- (0x1786 - 0x1780),
- (0x178c - 0x1780),
- (0x1792 - 0x1780),
- (0x1798 - 0x1780),
- (0x179d - 0x1780),
- (0x17a4 - 0x1780),
+ AUD0_REGISTER_OFFSET,
+ AUD1_REGISTER_OFFSET,
+ AUD2_REGISTER_OFFSET,
+ AUD3_REGISTER_OFFSET,
+ AUD4_REGISTER_OFFSET,
+ AUD5_REGISTER_OFFSET,
+ AUD6_REGISTER_OFFSET,
};
static int dce_v6_0_audio_init(struct amdgpu_device *adev)
@@ -1367,6 +1440,8 @@ static int dce_v6_0_audio_init(struct amdgpu_device *adev)
adev->mode_info.audio.pin[i].connected = false;
adev->mode_info.audio.pin[i].offset = pin_offsets[i];
adev->mode_info.audio.pin[i].id = i;
+ /* disable audio. it will be set up later */
+ /* XXX remove once we switch to ip funcs */
dce_v6_0_audio_enable(adev, &adev->mode_info.audio.pin[i], false);
}
@@ -1375,17 +1450,12 @@ static int dce_v6_0_audio_init(struct amdgpu_device *adev)
static void dce_v6_0_audio_fini(struct amdgpu_device *adev)
{
- int i;
-
if (!amdgpu_audio)
return;
if (!adev->mode_info.audio.enabled)
return;
- for (i = 0; i < adev->mode_info.audio.num_pins; i++)
- dce_v6_0_audio_enable(adev, &adev->mode_info.audio.pin[i], false);
-
adev->mode_info.audio.enabled = false;
}
@@ -1816,11 +1886,10 @@ static int dce_v6_0_crtc_do_set_base(struct drm_crtc *crtc,
struct amdgpu_bo *abo;
uint64_t fb_location, tiling_flags;
uint32_t fb_format, fb_pitch_pixels, pipe_config;
- u32 fb_swap = GRPH_ENDIAN_SWAP(GRPH_ENDIAN_NONE);
+ u32 fb_swap = (GRPH_ENDIAN_NONE << GRPH_SWAP_CNTL__GRPH_ENDIAN_SWAP__SHIFT);
u32 viewport_w, viewport_h;
int r;
bool bypass_lut = false;
- struct drm_format_name_buf format_name;
/* no fb bound */
if (!atomic && !crtc->primary->fb) {
@@ -1843,6 +1912,7 @@ static int dce_v6_0_crtc_do_set_base(struct drm_crtc *crtc,
return r;
if (!atomic) {
+ abo->flags |= AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS;
r = amdgpu_bo_pin(abo, AMDGPU_GEM_DOMAIN_VRAM);
if (unlikely(r != 0)) {
amdgpu_bo_unreserve(abo);
@@ -1856,81 +1926,81 @@ static int dce_v6_0_crtc_do_set_base(struct drm_crtc *crtc,
switch (target_fb->format->format) {
case DRM_FORMAT_C8:
- fb_format = (GRPH_DEPTH(GRPH_DEPTH_8BPP) |
- GRPH_FORMAT(GRPH_FORMAT_INDEXED));
+ fb_format = ((GRPH_DEPTH_8BPP << GRPH_CONTROL__GRPH_DEPTH__SHIFT) |
+ (GRPH_FORMAT_INDEXED << GRPH_CONTROL__GRPH_FORMAT__SHIFT));
break;
case DRM_FORMAT_XRGB4444:
case DRM_FORMAT_ARGB4444:
- fb_format = (GRPH_DEPTH(GRPH_DEPTH_16BPP) |
- GRPH_FORMAT(GRPH_FORMAT_ARGB4444));
+ fb_format = ((GRPH_DEPTH_16BPP << GRPH_CONTROL__GRPH_DEPTH__SHIFT) |
+ (GRPH_FORMAT_ARGB4444 << GRPH_CONTROL__GRPH_FORMAT__SHIFT));
#ifdef __BIG_ENDIAN
- fb_swap = GRPH_ENDIAN_SWAP(GRPH_ENDIAN_8IN16);
+ fb_swap = (GRPH_ENDIAN_8IN16 << GRPH_SWAP_CNTL__GRPH_ENDIAN_SWAP__SHIFT);
#endif
break;
case DRM_FORMAT_XRGB1555:
case DRM_FORMAT_ARGB1555:
- fb_format = (GRPH_DEPTH(GRPH_DEPTH_16BPP) |
- GRPH_FORMAT(GRPH_FORMAT_ARGB1555));
+ fb_format = ((GRPH_DEPTH_16BPP << GRPH_CONTROL__GRPH_DEPTH__SHIFT) |
+ (GRPH_FORMAT_ARGB1555 << GRPH_CONTROL__GRPH_FORMAT__SHIFT));
#ifdef __BIG_ENDIAN
- fb_swap = GRPH_ENDIAN_SWAP(GRPH_ENDIAN_8IN16);
+ fb_swap = (GRPH_ENDIAN_8IN16 << GRPH_SWAP_CNTL__GRPH_ENDIAN_SWAP__SHIFT);
#endif
break;
case DRM_FORMAT_BGRX5551:
case DRM_FORMAT_BGRA5551:
- fb_format = (GRPH_DEPTH(GRPH_DEPTH_16BPP) |
- GRPH_FORMAT(GRPH_FORMAT_BGRA5551));
+ fb_format = ((GRPH_DEPTH_16BPP << GRPH_CONTROL__GRPH_DEPTH__SHIFT) |
+ (GRPH_FORMAT_BGRA5551 << GRPH_CONTROL__GRPH_FORMAT__SHIFT));
#ifdef __BIG_ENDIAN
- fb_swap = GRPH_ENDIAN_SWAP(GRPH_ENDIAN_8IN16);
+ fb_swap = (GRPH_ENDIAN_8IN16 << GRPH_SWAP_CNTL__GRPH_ENDIAN_SWAP__SHIFT);
#endif
break;
case DRM_FORMAT_RGB565:
- fb_format = (GRPH_DEPTH(GRPH_DEPTH_16BPP) |
- GRPH_FORMAT(GRPH_FORMAT_ARGB565));
+ fb_format = ((GRPH_DEPTH_16BPP << GRPH_CONTROL__GRPH_DEPTH__SHIFT) |
+ (GRPH_FORMAT_ARGB565 << GRPH_CONTROL__GRPH_FORMAT__SHIFT));
#ifdef __BIG_ENDIAN
- fb_swap = GRPH_ENDIAN_SWAP(GRPH_ENDIAN_8IN16);
+ fb_swap = (GRPH_ENDIAN_8IN16 << GRPH_SWAP_CNTL__GRPH_ENDIAN_SWAP__SHIFT);
#endif
break;
case DRM_FORMAT_XRGB8888:
case DRM_FORMAT_ARGB8888:
- fb_format = (GRPH_DEPTH(GRPH_DEPTH_32BPP) |
- GRPH_FORMAT(GRPH_FORMAT_ARGB8888));
+ fb_format = ((GRPH_DEPTH_32BPP << GRPH_CONTROL__GRPH_DEPTH__SHIFT) |
+ (GRPH_FORMAT_ARGB8888 << GRPH_CONTROL__GRPH_FORMAT__SHIFT));
#ifdef __BIG_ENDIAN
- fb_swap = GRPH_ENDIAN_SWAP(GRPH_ENDIAN_8IN32);
+ fb_swap = (GRPH_ENDIAN_8IN32 << GRPH_SWAP_CNTL__GRPH_ENDIAN_SWAP__SHIFT);
#endif
break;
case DRM_FORMAT_XRGB2101010:
case DRM_FORMAT_ARGB2101010:
- fb_format = (GRPH_DEPTH(GRPH_DEPTH_32BPP) |
- GRPH_FORMAT(GRPH_FORMAT_ARGB2101010));
+ fb_format = ((GRPH_DEPTH_32BPP << GRPH_CONTROL__GRPH_DEPTH__SHIFT) |
+ (GRPH_FORMAT_ARGB2101010 << GRPH_CONTROL__GRPH_FORMAT__SHIFT));
#ifdef __BIG_ENDIAN
- fb_swap = GRPH_ENDIAN_SWAP(GRPH_ENDIAN_8IN32);
+ fb_swap = (GRPH_ENDIAN_8IN32 << GRPH_SWAP_CNTL__GRPH_ENDIAN_SWAP__SHIFT);
#endif
/* Greater 8 bpc fb needs to bypass hw-lut to retain precision */
bypass_lut = true;
break;
case DRM_FORMAT_BGRX1010102:
case DRM_FORMAT_BGRA1010102:
- fb_format = (GRPH_DEPTH(GRPH_DEPTH_32BPP) |
- GRPH_FORMAT(GRPH_FORMAT_BGRA1010102));
+ fb_format = ((GRPH_DEPTH_32BPP << GRPH_CONTROL__GRPH_DEPTH__SHIFT) |
+ (GRPH_FORMAT_BGRA1010102 << GRPH_CONTROL__GRPH_FORMAT__SHIFT));
#ifdef __BIG_ENDIAN
- fb_swap = GRPH_ENDIAN_SWAP(GRPH_ENDIAN_8IN32);
+ fb_swap = (GRPH_ENDIAN_8IN32 << GRPH_SWAP_CNTL__GRPH_ENDIAN_SWAP__SHIFT);
#endif
/* Greater 8 bpc fb needs to bypass hw-lut to retain precision */
bypass_lut = true;
break;
case DRM_FORMAT_XBGR8888:
case DRM_FORMAT_ABGR8888:
- fb_format = (GRPH_DEPTH(GRPH_DEPTH_32BPP) |
- GRPH_FORMAT(GRPH_FORMAT_ARGB8888));
- fb_swap = (GRPH_RED_CROSSBAR(GRPH_RED_SEL_B) |
- GRPH_BLUE_CROSSBAR(GRPH_BLUE_SEL_R));
+ fb_format = ((GRPH_DEPTH_32BPP << GRPH_CONTROL__GRPH_DEPTH__SHIFT) |
+ (GRPH_FORMAT_ARGB8888 << GRPH_CONTROL__GRPH_FORMAT__SHIFT));
+ fb_swap = ((GRPH_RED_SEL_B << GRPH_SWAP_CNTL__GRPH_RED_CROSSBAR__SHIFT) |
+ (GRPH_BLUE_SEL_R << GRPH_SWAP_CNTL__GRPH_BLUE_CROSSBAR__SHIFT));
#ifdef __BIG_ENDIAN
- fb_swap |= GRPH_ENDIAN_SWAP(GRPH_ENDIAN_8IN32);
+ fb_swap |= (GRPH_ENDIAN_8IN32 << GRPH_SWAP_CNTL__GRPH_ENDIAN_SWAP__SHIFT);
#endif
break;
default:
- DRM_ERROR("Unsupported screen format %s\n",
- drm_get_format_name(target_fb->format->format, &format_name));
+ DRM_ERROR("Unsupported screen format %p4cc\n",
+ &target_fb->format->format);
return -EINVAL;
}
@@ -1943,18 +2013,18 @@ static int dce_v6_0_crtc_do_set_base(struct drm_crtc *crtc,
tile_split = AMDGPU_TILING_GET(tiling_flags, TILE_SPLIT);
num_banks = AMDGPU_TILING_GET(tiling_flags, NUM_BANKS);
- fb_format |= GRPH_NUM_BANKS(num_banks);
- fb_format |= GRPH_ARRAY_MODE(GRPH_ARRAY_2D_TILED_THIN1);
- fb_format |= GRPH_TILE_SPLIT(tile_split);
- fb_format |= GRPH_BANK_WIDTH(bankw);
- fb_format |= GRPH_BANK_HEIGHT(bankh);
- fb_format |= GRPH_MACRO_TILE_ASPECT(mtaspect);
+ fb_format |= (num_banks << GRPH_CONTROL__GRPH_NUM_BANKS__SHIFT);
+ fb_format |= (GRPH_ARRAY_2D_TILED_THIN1 << GRPH_CONTROL__GRPH_ARRAY_MODE__SHIFT);
+ fb_format |= (tile_split << GRPH_CONTROL__GRPH_TILE_SPLIT__SHIFT);
+ fb_format |= (bankw << GRPH_CONTROL__GRPH_BANK_WIDTH__SHIFT);
+ fb_format |= (bankh << GRPH_CONTROL__GRPH_BANK_HEIGHT__SHIFT);
+ fb_format |= (mtaspect << GRPH_CONTROL__GRPH_MACRO_TILE_ASPECT__SHIFT);
} else if (AMDGPU_TILING_GET(tiling_flags, ARRAY_MODE) == ARRAY_1D_TILED_THIN1) {
- fb_format |= GRPH_ARRAY_MODE(GRPH_ARRAY_1D_TILED_THIN1);
+ fb_format |= (GRPH_ARRAY_1D_TILED_THIN1 << GRPH_CONTROL__GRPH_ARRAY_MODE__SHIFT);
}
pipe_config = AMDGPU_TILING_GET(tiling_flags, PIPE_CONFIG);
- fb_format |= GRPH_PIPE_CONFIG(pipe_config);
+ fb_format |= (pipe_config << GRPH_CONTROL__GRPH_PIPE_CONFIG__SHIFT);
dce_v6_0_vga_enable(crtc, false);
@@ -1970,7 +2040,7 @@ static int dce_v6_0_crtc_do_set_base(struct drm_crtc *crtc,
WREG32(mmGRPH_PRIMARY_SURFACE_ADDRESS + amdgpu_crtc->crtc_offset,
(u32)fb_location & GRPH_PRIMARY_SURFACE_ADDRESS__GRPH_PRIMARY_SURFACE_ADDRESS_MASK);
WREG32(mmGRPH_SECONDARY_SURFACE_ADDRESS + amdgpu_crtc->crtc_offset,
- (u32) fb_location & GRPH_PRIMARY_SURFACE_ADDRESS__GRPH_PRIMARY_SURFACE_ADDRESS_MASK);
+ (u32) fb_location & GRPH_SECONDARY_SURFACE_ADDRESS__GRPH_SECONDARY_SURFACE_ADDRESS_MASK);
WREG32(mmGRPH_CONTROL + amdgpu_crtc->crtc_offset, fb_format);
WREG32(mmGRPH_SWAP_CNTL + amdgpu_crtc->crtc_offset, fb_swap);
@@ -2038,14 +2108,13 @@ static void dce_v6_0_set_interleave(struct drm_crtc *crtc,
if (mode->flags & DRM_MODE_FLAG_INTERLACE)
WREG32(mmDATA_FORMAT + amdgpu_crtc->crtc_offset,
- INTERLEAVE_EN);
+ DATA_FORMAT__INTERLEAVE_EN_MASK);
else
WREG32(mmDATA_FORMAT + amdgpu_crtc->crtc_offset, 0);
}
static void dce_v6_0_crtc_load_lut(struct drm_crtc *crtc)
{
-
struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc);
struct drm_device *dev = crtc->dev;
struct amdgpu_device *adev = drm_to_adev(dev);
@@ -2055,15 +2124,15 @@ static void dce_v6_0_crtc_load_lut(struct drm_crtc *crtc)
DRM_DEBUG_KMS("%d\n", amdgpu_crtc->crtc_id);
WREG32(mmINPUT_CSC_CONTROL + amdgpu_crtc->crtc_offset,
- ((0 << INPUT_CSC_CONTROL__INPUT_CSC_GRPH_MODE__SHIFT) |
- (0 << INPUT_CSC_CONTROL__INPUT_CSC_OVL_MODE__SHIFT)));
+ ((INPUT_CSC_BYPASS << INPUT_CSC_CONTROL__INPUT_CSC_GRPH_MODE__SHIFT) |
+ (INPUT_CSC_BYPASS << INPUT_CSC_CONTROL__INPUT_CSC_OVL_MODE__SHIFT)));
WREG32(mmPRESCALE_GRPH_CONTROL + amdgpu_crtc->crtc_offset,
PRESCALE_GRPH_CONTROL__GRPH_PRESCALE_BYPASS_MASK);
WREG32(mmPRESCALE_OVL_CONTROL + amdgpu_crtc->crtc_offset,
PRESCALE_OVL_CONTROL__OVL_PRESCALE_BYPASS_MASK);
WREG32(mmINPUT_GAMMA_CONTROL + amdgpu_crtc->crtc_offset,
- ((0 << INPUT_GAMMA_CONTROL__GRPH_INPUT_GAMMA_MODE__SHIFT) |
- (0 << INPUT_GAMMA_CONTROL__OVL_INPUT_GAMMA_MODE__SHIFT)));
+ ((INPUT_GAMMA_USE_LUT << INPUT_GAMMA_CONTROL__GRPH_INPUT_GAMMA_MODE__SHIFT) |
+ (INPUT_GAMMA_USE_LUT << INPUT_GAMMA_CONTROL__OVL_INPUT_GAMMA_MODE__SHIFT)));
WREG32(mmDC_LUT_CONTROL + amdgpu_crtc->crtc_offset, 0);
@@ -2090,19 +2159,19 @@ static void dce_v6_0_crtc_load_lut(struct drm_crtc *crtc)
}
WREG32(mmDEGAMMA_CONTROL + amdgpu_crtc->crtc_offset,
- ((0 << DEGAMMA_CONTROL__GRPH_DEGAMMA_MODE__SHIFT) |
- (0 << DEGAMMA_CONTROL__OVL_DEGAMMA_MODE__SHIFT) |
- ICON_DEGAMMA_MODE(0) |
- (0 << DEGAMMA_CONTROL__CURSOR_DEGAMMA_MODE__SHIFT)));
+ ((DEGAMMA_BYPASS << DEGAMMA_CONTROL__GRPH_DEGAMMA_MODE__SHIFT) |
+ (DEGAMMA_BYPASS << DEGAMMA_CONTROL__OVL_DEGAMMA_MODE__SHIFT) |
+ (DEGAMMA_BYPASS << DEGAMMA_CONTROL__ICON_DEGAMMA_MODE__SHIFT) |
+ (DEGAMMA_BYPASS << DEGAMMA_CONTROL__CURSOR_DEGAMMA_MODE__SHIFT)));
WREG32(mmGAMUT_REMAP_CONTROL + amdgpu_crtc->crtc_offset,
- ((0 << GAMUT_REMAP_CONTROL__GRPH_GAMUT_REMAP_MODE__SHIFT) |
- (0 << GAMUT_REMAP_CONTROL__OVL_GAMUT_REMAP_MODE__SHIFT)));
+ ((GAMUT_REMAP_BYPASS << GAMUT_REMAP_CONTROL__GRPH_GAMUT_REMAP_MODE__SHIFT) |
+ (GAMUT_REMAP_BYPASS << GAMUT_REMAP_CONTROL__OVL_GAMUT_REMAP_MODE__SHIFT)));
WREG32(mmREGAMMA_CONTROL + amdgpu_crtc->crtc_offset,
- ((0 << REGAMMA_CONTROL__GRPH_REGAMMA_MODE__SHIFT) |
- (0 << REGAMMA_CONTROL__OVL_REGAMMA_MODE__SHIFT)));
+ ((REGAMMA_BYPASS << REGAMMA_CONTROL__GRPH_REGAMMA_MODE__SHIFT) |
+ (REGAMMA_BYPASS << REGAMMA_CONTROL__OVL_REGAMMA_MODE__SHIFT)));
WREG32(mmOUTPUT_CSC_CONTROL + amdgpu_crtc->crtc_offset,
- ((0 << OUTPUT_CSC_CONTROL__OUTPUT_CSC_GRPH_MODE__SHIFT) |
- (0 << OUTPUT_CSC_CONTROL__OUTPUT_CSC_OVL_MODE__SHIFT)));
+ ((OUTPUT_CSC_BYPASS << OUTPUT_CSC_CONTROL__OUTPUT_CSC_GRPH_MODE__SHIFT) |
+ (OUTPUT_CSC_BYPASS << OUTPUT_CSC_CONTROL__OUTPUT_CSC_OVL_MODE__SHIFT)));
/* XXX match this to the depth of the crtc fmt block, move to modeset? */
WREG32(0x1a50 + amdgpu_crtc->crtc_offset, 0);
@@ -2197,8 +2266,6 @@ static void dce_v6_0_hide_cursor(struct drm_crtc *crtc)
WREG32(mmCUR_CONTROL + amdgpu_crtc->crtc_offset,
(CURSOR_24_8_PRE_MULT << CUR_CONTROL__CURSOR_MODE__SHIFT) |
(CURSOR_URGENT_1_2 << CUR_CONTROL__CURSOR_URGENT_CONTROL__SHIFT));
-
-
}
static void dce_v6_0_show_cursor(struct drm_crtc *crtc)
@@ -2215,7 +2282,6 @@ static void dce_v6_0_show_cursor(struct drm_crtc *crtc)
CUR_CONTROL__CURSOR_EN_MASK |
(CURSOR_24_8_PRE_MULT << CUR_CONTROL__CURSOR_MODE__SHIFT) |
(CURSOR_URGENT_1_2 << CUR_CONTROL__CURSOR_URGENT_CONTROL__SHIFT));
-
}
static int dce_v6_0_cursor_move_locked(struct drm_crtc *crtc,
@@ -2303,6 +2369,7 @@ static int dce_v6_0_crtc_cursor_set2(struct drm_crtc *crtc,
return ret;
}
+ aobj->flags |= AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS;
ret = amdgpu_bo_pin(aobj, AMDGPU_GEM_DOMAIN_VRAM);
amdgpu_bo_unreserve(aobj);
if (ret) {
@@ -2425,7 +2492,7 @@ static void dce_v6_0_crtc_dpms(struct drm_crtc *crtc, int mode)
break;
}
/* adjust pm to dpms */
- amdgpu_pm_compute_clocks(adev);
+ amdgpu_dpm_compute_clocks(adev);
}
static void dce_v6_0_crtc_prepare(struct drm_crtc *crtc)
@@ -2525,7 +2592,6 @@ static bool dce_v6_0_crtc_mode_fixup(struct drm_crtc *crtc,
const struct drm_display_mode *mode,
struct drm_display_mode *adjusted_mode)
{
-
struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc);
struct drm_device *dev = crtc->dev;
struct drm_encoder *encoder;
@@ -2582,6 +2648,32 @@ static const struct drm_crtc_helper_funcs dce_v6_0_crtc_helper_funcs = {
.get_scanout_position = amdgpu_crtc_get_scanout_position,
};
+static void dce_v6_0_panic_flush(struct drm_plane *plane)
+{
+ struct drm_framebuffer *fb;
+ struct amdgpu_crtc *amdgpu_crtc;
+ struct amdgpu_device *adev;
+ uint32_t fb_format;
+
+ if (!plane->fb)
+ return;
+
+ fb = plane->fb;
+ amdgpu_crtc = to_amdgpu_crtc(plane->crtc);
+ adev = drm_to_adev(fb->dev);
+
+ /* Disable DC tiling */
+ fb_format = RREG32(mmGRPH_CONTROL + amdgpu_crtc->crtc_offset);
+ fb_format &= ~GRPH_CONTROL__GRPH_ARRAY_MODE_MASK;
+ WREG32(mmGRPH_CONTROL + amdgpu_crtc->crtc_offset, fb_format);
+
+}
+
+static const struct drm_plane_helper_funcs dce_v6_0_drm_primary_plane_helper_funcs = {
+ .get_scanout_buffer = amdgpu_display_get_scanout_buffer,
+ .panic_flush = dce_v6_0_panic_flush,
+};
+
static int dce_v6_0_crtc_init(struct amdgpu_device *adev, int index)
{
struct amdgpu_crtc *amdgpu_crtc;
@@ -2609,13 +2701,14 @@ static int dce_v6_0_crtc_init(struct amdgpu_device *adev, int index)
amdgpu_crtc->encoder = NULL;
amdgpu_crtc->connector = NULL;
drm_crtc_helper_add(&amdgpu_crtc->base, &dce_v6_0_crtc_helper_funcs);
+ drm_plane_helper_add(amdgpu_crtc->base.primary, &dce_v6_0_drm_primary_plane_helper_funcs);
return 0;
}
-static int dce_v6_0_early_init(void *handle)
+static int dce_v6_0_early_init(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
adev->audio_endpt_rreg = &dce_v6_0_audio_endpt_rreg;
adev->audio_endpt_wreg = &dce_v6_0_audio_endpt_wreg;
@@ -2644,11 +2737,10 @@ static int dce_v6_0_early_init(void *handle)
return 0;
}
-static int dce_v6_0_sw_init(void *handle)
+static int dce_v6_0_sw_init(struct amdgpu_ip_block *ip_block)
{
int r, i;
- bool ret;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
for (i = 0; i < adev->mode_info.num_crtc; i++) {
r = amdgpu_irq_add_id(adev, AMDGPU_IRQ_CLIENTID_LEGACY, i + 1, &adev->crtc_irq);
@@ -2675,7 +2767,7 @@ static int dce_v6_0_sw_init(void *handle)
adev_to_drm(adev)->mode_config.max_height = 16384;
adev_to_drm(adev)->mode_config.preferred_depth = 24;
adev_to_drm(adev)->mode_config.prefer_shadow = 1;
- adev_to_drm(adev)->mode_config.fb_base = adev->gmc.aper_base;
+ adev_to_drm(adev)->mode_config.fb_modifiers_not_supported = true;
r = amdgpu_display_modeset_create_props(adev);
if (r)
@@ -2691,8 +2783,7 @@ static int dce_v6_0_sw_init(void *handle)
return r;
}
- ret = amdgpu_atombios_get_connector_info_from_object_table(adev);
- if (ret)
+ if (amdgpu_atombios_get_connector_info_from_object_table(adev))
amdgpu_display_print_display_setup(adev_to_drm(adev));
else
return -EINVAL;
@@ -2706,16 +2797,28 @@ static int dce_v6_0_sw_init(void *handle)
if (r)
return r;
+ /* Disable vblank IRQs aggressively for power-saving */
+ /* XXX: can this be enabled for DC? */
+ adev_to_drm(adev)->vblank_disable_immediate = true;
+
+ r = drm_vblank_init(adev_to_drm(adev), adev->mode_info.num_crtc);
+ if (r)
+ return r;
+
+ /* Pre-DCE11 */
+ INIT_DELAYED_WORK(&adev->hotplug_work,
+ amdgpu_display_hotplug_work_func);
+
drm_kms_helper_poll_init(adev_to_drm(adev));
return r;
}
-static int dce_v6_0_sw_fini(void *handle)
+static int dce_v6_0_sw_fini(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
- kfree(adev->mode_info.bios_hardcoded_edid);
+ drm_edid_free(adev->mode_info.bios_hardcoded_edid);
drm_kms_helper_poll_fini(adev_to_drm(adev));
@@ -2728,10 +2831,10 @@ static int dce_v6_0_sw_fini(void *handle)
return 0;
}
-static int dce_v6_0_hw_init(void *handle)
+static int dce_v6_0_hw_init(struct amdgpu_ip_block *ip_block)
{
int i;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
/* disable vga render */
dce_v6_0_set_vga_render_state(adev, false);
@@ -2751,10 +2854,10 @@ static int dce_v6_0_hw_init(void *handle)
return 0;
}
-static int dce_v6_0_hw_fini(void *handle)
+static int dce_v6_0_hw_fini(struct amdgpu_ip_block *ip_block)
{
int i;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
dce_v6_0_hpd_fini(adev);
@@ -2764,28 +2867,34 @@ static int dce_v6_0_hw_fini(void *handle)
dce_v6_0_pageflip_interrupt_fini(adev);
+ flush_delayed_work(&adev->hotplug_work);
+
return 0;
}
-static int dce_v6_0_suspend(void *handle)
+static int dce_v6_0_suspend(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
+ int r;
+ r = amdgpu_display_suspend_helper(adev);
+ if (r)
+ return r;
adev->mode_info.bl_level =
amdgpu_atombios_encoder_get_backlight_level_from_reg(adev);
- return dce_v6_0_hw_fini(handle);
+ return dce_v6_0_hw_fini(ip_block);
}
-static int dce_v6_0_resume(void *handle)
+static int dce_v6_0_resume(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
int ret;
amdgpu_atombios_encoder_set_backlight_level_to_reg(adev,
adev->mode_info.bl_level);
- ret = dce_v6_0_hw_init(handle);
+ ret = dce_v6_0_hw_init(ip_block);
/* turn on the BL */
if (adev->mode_info.bl_encoder) {
@@ -2794,23 +2903,41 @@ static int dce_v6_0_resume(void *handle)
amdgpu_display_backlight_set_level(adev, adev->mode_info.bl_encoder,
bl_level);
}
+ if (ret)
+ return ret;
- return ret;
+ return amdgpu_display_resume_helper(adev);
}
-static bool dce_v6_0_is_idle(void *handle)
+static bool dce_v6_0_is_idle(struct amdgpu_ip_block *ip_block)
{
return true;
}
-static int dce_v6_0_wait_for_idle(void *handle)
+static int dce_v6_0_soft_reset(struct amdgpu_ip_block *ip_block)
{
- return 0;
-}
+ u32 srbm_soft_reset = 0, tmp;
+ struct amdgpu_device *adev = ip_block->adev;
-static int dce_v6_0_soft_reset(void *handle)
-{
- DRM_INFO("xxxx: dce_v6_0_soft_reset --- no impl!!\n");
+ if (dce_v6_0_is_display_hung(adev))
+ srbm_soft_reset |= SRBM_SOFT_RESET__SOFT_RESET_DC_MASK;
+
+ if (srbm_soft_reset) {
+ tmp = RREG32(mmSRBM_SOFT_RESET);
+ tmp |= srbm_soft_reset;
+ dev_info(adev->dev, "SRBM_SOFT_RESET=0x%08X\n", tmp);
+ WREG32(mmSRBM_SOFT_RESET, tmp);
+ tmp = RREG32(mmSRBM_SOFT_RESET);
+
+ udelay(50);
+
+ tmp &= ~srbm_soft_reset;
+ WREG32(mmSRBM_SOFT_RESET, tmp);
+ tmp = RREG32(mmSRBM_SOFT_RESET);
+
+ /* Wait a little for things to settle down */
+ udelay(50);
+ }
return 0;
}
@@ -2827,22 +2954,22 @@ static void dce_v6_0_set_crtc_vblank_interrupt_state(struct amdgpu_device *adev,
switch (crtc) {
case 0:
- reg_block = SI_CRTC0_REGISTER_OFFSET;
+ reg_block = CRTC0_REGISTER_OFFSET;
break;
case 1:
- reg_block = SI_CRTC1_REGISTER_OFFSET;
+ reg_block = CRTC1_REGISTER_OFFSET;
break;
case 2:
- reg_block = SI_CRTC2_REGISTER_OFFSET;
+ reg_block = CRTC2_REGISTER_OFFSET;
break;
case 3:
- reg_block = SI_CRTC3_REGISTER_OFFSET;
+ reg_block = CRTC3_REGISTER_OFFSET;
break;
case 4:
- reg_block = SI_CRTC4_REGISTER_OFFSET;
+ reg_block = CRTC4_REGISTER_OFFSET;
break;
case 5:
- reg_block = SI_CRTC5_REGISTER_OFFSET;
+ reg_block = CRTC5_REGISTER_OFFSET;
break;
default:
DRM_DEBUG("invalid crtc %d\n", crtc);
@@ -2852,12 +2979,12 @@ static void dce_v6_0_set_crtc_vblank_interrupt_state(struct amdgpu_device *adev,
switch (state) {
case AMDGPU_IRQ_STATE_DISABLE:
interrupt_mask = RREG32(mmINT_MASK + reg_block);
- interrupt_mask &= ~VBLANK_INT_MASK;
+ interrupt_mask &= ~INT_MASK__VBLANK_INT_MASK;
WREG32(mmINT_MASK + reg_block, interrupt_mask);
break;
case AMDGPU_IRQ_STATE_ENABLE:
interrupt_mask = RREG32(mmINT_MASK + reg_block);
- interrupt_mask |= VBLANK_INT_MASK;
+ interrupt_mask |= INT_MASK__VBLANK_INT_MASK;
WREG32(mmINT_MASK + reg_block, interrupt_mask);
break;
default:
@@ -2872,28 +2999,28 @@ static void dce_v6_0_set_crtc_vline_interrupt_state(struct amdgpu_device *adev,
}
-static int dce_v6_0_set_hpd_interrupt_state(struct amdgpu_device *adev,
+static int dce_v6_0_set_hpd_irq_state(struct amdgpu_device *adev,
struct amdgpu_irq_src *src,
- unsigned type,
+ unsigned hpd,
enum amdgpu_interrupt_state state)
{
u32 dc_hpd_int_cntl;
- if (type >= adev->mode_info.num_hpd) {
- DRM_DEBUG("invalid hdp %d\n", type);
+ if (hpd >= adev->mode_info.num_hpd) {
+ DRM_DEBUG("invalid hpd %d\n", hpd);
return 0;
}
switch (state) {
case AMDGPU_IRQ_STATE_DISABLE:
- dc_hpd_int_cntl = RREG32(mmDC_HPD1_INT_CONTROL + hpd_offsets[type]);
- dc_hpd_int_cntl &= ~DC_HPDx_INT_EN;
- WREG32(mmDC_HPD1_INT_CONTROL + hpd_offsets[type], dc_hpd_int_cntl);
+ dc_hpd_int_cntl = RREG32(mmDC_HPD1_INT_CONTROL + hpd_offsets[hpd]);
+ dc_hpd_int_cntl &= ~DC_HPD1_INT_CONTROL__DC_HPD1_INT_EN_MASK;
+ WREG32(mmDC_HPD1_INT_CONTROL + hpd_offsets[hpd], dc_hpd_int_cntl);
break;
case AMDGPU_IRQ_STATE_ENABLE:
- dc_hpd_int_cntl = RREG32(mmDC_HPD1_INT_CONTROL + hpd_offsets[type]);
- dc_hpd_int_cntl |= DC_HPDx_INT_EN;
- WREG32(mmDC_HPD1_INT_CONTROL + hpd_offsets[type], dc_hpd_int_cntl);
+ dc_hpd_int_cntl = RREG32(mmDC_HPD1_INT_CONTROL + hpd_offsets[hpd]);
+ dc_hpd_int_cntl |= DC_HPD1_INT_CONTROL__DC_HPD1_INT_EN_MASK;
+ WREG32(mmDC_HPD1_INT_CONTROL + hpd_offsets[hpd], dc_hpd_int_cntl);
break;
default:
break;
@@ -2902,7 +3029,7 @@ static int dce_v6_0_set_hpd_interrupt_state(struct amdgpu_device *adev,
return 0;
}
-static int dce_v6_0_set_crtc_interrupt_state(struct amdgpu_device *adev,
+static int dce_v6_0_set_crtc_irq_state(struct amdgpu_device *adev,
struct amdgpu_irq_src *src,
unsigned type,
enum amdgpu_interrupt_state state)
@@ -2962,7 +3089,7 @@ static int dce_v6_0_crtc_irq(struct amdgpu_device *adev,
switch (entry->src_data[0]) {
case 0: /* vblank */
if (disp_int & interrupt_status_offsets[crtc].vblank)
- WREG32(mmVBLANK_STATUS + crtc_offsets[crtc], VBLANK_ACK);
+ WREG32(mmVBLANK_STATUS + crtc_offsets[crtc], VBLANK_STATUS__VBLANK_ACK_MASK);
else
DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
@@ -2973,7 +3100,7 @@ static int dce_v6_0_crtc_irq(struct amdgpu_device *adev,
break;
case 1: /* vline */
if (disp_int & interrupt_status_offsets[crtc].vline)
- WREG32(mmVLINE_STATUS + crtc_offsets[crtc], VLINE_ACK);
+ WREG32(mmVLINE_STATUS + crtc_offsets[crtc], VLINE_STATUS__VLINE_ACK_MASK);
else
DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
@@ -2987,7 +3114,7 @@ static int dce_v6_0_crtc_irq(struct amdgpu_device *adev,
return 0;
}
-static int dce_v6_0_set_pageflip_interrupt_state(struct amdgpu_device *adev,
+static int dce_v6_0_set_pageflip_irq_state(struct amdgpu_device *adev,
struct amdgpu_irq_src *src,
unsigned type,
enum amdgpu_interrupt_state state)
@@ -3038,7 +3165,7 @@ static int dce_v6_0_pageflip_irq(struct amdgpu_device *adev,
spin_lock_irqsave(&adev_to_drm(adev)->event_lock, flags);
works = amdgpu_crtc->pflip_works;
- if (amdgpu_crtc->pflip_status != AMDGPU_FLIP_SUBMITTED){
+ if (amdgpu_crtc->pflip_status != AMDGPU_FLIP_SUBMITTED) {
DRM_DEBUG_DRIVER("amdgpu_crtc->pflip_status = %d != "
"AMDGPU_FLIP_SUBMITTED(%d)\n",
amdgpu_crtc->pflip_status,
@@ -3067,7 +3194,7 @@ static int dce_v6_0_hpd_irq(struct amdgpu_device *adev,
struct amdgpu_irq_src *source,
struct amdgpu_iv_entry *entry)
{
- uint32_t disp_int, mask, tmp;
+ uint32_t disp_int, mask;
unsigned hpd;
if (entry->src_data[0] >= adev->mode_info.num_hpd) {
@@ -3080,24 +3207,21 @@ static int dce_v6_0_hpd_irq(struct amdgpu_device *adev,
mask = interrupt_status_offsets[hpd].hpd;
if (disp_int & mask) {
- tmp = RREG32(mmDC_HPD1_INT_CONTROL + hpd_offsets[hpd]);
- tmp |= DC_HPD1_INT_CONTROL__DC_HPD1_INT_ACK_MASK;
- WREG32(mmDC_HPD1_INT_CONTROL + hpd_offsets[hpd], tmp);
- schedule_work(&adev->hotplug_work);
+ dce_v6_0_hpd_int_ack(adev, hpd);
+ schedule_delayed_work(&adev->hotplug_work, 0);
DRM_DEBUG("IH: HPD%d\n", hpd + 1);
}
return 0;
-
}
-static int dce_v6_0_set_clockgating_state(void *handle,
+static int dce_v6_0_set_clockgating_state(struct amdgpu_ip_block *ip_block,
enum amd_clockgating_state state)
{
return 0;
}
-static int dce_v6_0_set_powergating_state(void *handle,
+static int dce_v6_0_set_powergating_state(struct amdgpu_ip_block *ip_block,
enum amd_powergating_state state)
{
return 0;
@@ -3106,7 +3230,6 @@ static int dce_v6_0_set_powergating_state(void *handle,
static const struct amd_ip_funcs dce_v6_0_ip_funcs = {
.name = "dce_v6_0",
.early_init = dce_v6_0_early_init,
- .late_init = NULL,
.sw_init = dce_v6_0_sw_init,
.sw_fini = dce_v6_0_sw_fini,
.hw_init = dce_v6_0_hw_init,
@@ -3114,18 +3237,15 @@ static const struct amd_ip_funcs dce_v6_0_ip_funcs = {
.suspend = dce_v6_0_suspend,
.resume = dce_v6_0_resume,
.is_idle = dce_v6_0_is_idle,
- .wait_for_idle = dce_v6_0_wait_for_idle,
.soft_reset = dce_v6_0_soft_reset,
.set_clockgating_state = dce_v6_0_set_clockgating_state,
.set_powergating_state = dce_v6_0_set_powergating_state,
};
-static void
-dce_v6_0_encoder_mode_set(struct drm_encoder *encoder,
+static void dce_v6_0_encoder_mode_set(struct drm_encoder *encoder,
struct drm_display_mode *mode,
struct drm_display_mode *adjusted_mode)
{
-
struct amdgpu_encoder *amdgpu_encoder = to_amdgpu_encoder(encoder);
int em = amdgpu_atombios_encoder_get_encoder_mode(encoder);
@@ -3145,7 +3265,6 @@ dce_v6_0_encoder_mode_set(struct drm_encoder *encoder,
static void dce_v6_0_encoder_prepare(struct drm_encoder *encoder)
{
-
struct amdgpu_device *adev = drm_to_adev(encoder->dev);
struct amdgpu_encoder *amdgpu_encoder = to_amdgpu_encoder(encoder);
struct drm_connector *connector = amdgpu_get_connector_for_encoder(encoder);
@@ -3185,7 +3304,6 @@ static void dce_v6_0_encoder_prepare(struct drm_encoder *encoder)
static void dce_v6_0_encoder_commit(struct drm_encoder *encoder)
{
-
struct drm_device *dev = encoder->dev;
struct amdgpu_device *adev = drm_to_adev(dev);
@@ -3196,7 +3314,6 @@ static void dce_v6_0_encoder_commit(struct drm_encoder *encoder)
static void dce_v6_0_encoder_disable(struct drm_encoder *encoder)
{
-
struct amdgpu_encoder *amdgpu_encoder = to_amdgpu_encoder(encoder);
struct amdgpu_encoder_atom_dig *dig;
int em = amdgpu_atombios_encoder_get_encoder_mode(encoder);
@@ -3223,8 +3340,7 @@ static void dce_v6_0_ext_commit(struct drm_encoder *encoder)
}
-static void
-dce_v6_0_ext_mode_set(struct drm_encoder *encoder,
+static void dce_v6_0_ext_mode_set(struct drm_encoder *encoder,
struct drm_display_mode *mode,
struct drm_display_mode *adjusted_mode)
{
@@ -3236,8 +3352,7 @@ static void dce_v6_0_ext_disable(struct drm_encoder *encoder)
}
-static void
-dce_v6_0_ext_dpms(struct drm_encoder *encoder, int mode)
+static void dce_v6_0_ext_dpms(struct drm_encoder *encoder, int mode)
{
}
@@ -3308,7 +3423,6 @@ static void dce_v6_0_encoder_add(struct amdgpu_device *adev,
amdgpu_encoder->devices |= supported_device;
return;
}
-
}
/* add a new one */
@@ -3415,17 +3529,17 @@ static void dce_v6_0_set_display_funcs(struct amdgpu_device *adev)
}
static const struct amdgpu_irq_src_funcs dce_v6_0_crtc_irq_funcs = {
- .set = dce_v6_0_set_crtc_interrupt_state,
+ .set = dce_v6_0_set_crtc_irq_state,
.process = dce_v6_0_crtc_irq,
};
static const struct amdgpu_irq_src_funcs dce_v6_0_pageflip_irq_funcs = {
- .set = dce_v6_0_set_pageflip_interrupt_state,
+ .set = dce_v6_0_set_pageflip_irq_state,
.process = dce_v6_0_pageflip_irq,
};
static const struct amdgpu_irq_src_funcs dce_v6_0_hpd_irq_funcs = {
- .set = dce_v6_0_set_hpd_interrupt_state,
+ .set = dce_v6_0_set_hpd_irq_state,
.process = dce_v6_0_hpd_irq,
};
diff --git a/drivers/gpu/drm/amd/amdgpu/dce_v8_0.c b/drivers/gpu/drm/amd/amdgpu/dce_v8_0.c
index 224b30214427..2ccd6aad8dd6 100644
--- a/drivers/gpu/drm/amd/amdgpu/dce_v8_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/dce_v8_0.c
@@ -21,7 +21,10 @@
*
*/
+#include <drm/drm_edid.h>
#include <drm/drm_fourcc.h>
+#include <drm/drm_modeset_helper.h>
+#include <drm/drm_modeset_helper_vtables.h>
#include <drm/drm_vblank.h>
#include "amdgpu.h"
@@ -51,8 +54,7 @@
static void dce_v8_0_set_display_funcs(struct amdgpu_device *adev);
static void dce_v8_0_set_irq_funcs(struct amdgpu_device *adev);
-static const u32 crtc_offsets[6] =
-{
+static const u32 crtc_offsets[6] = {
CRTC0_REGISTER_OFFSET,
CRTC1_REGISTER_OFFSET,
CRTC2_REGISTER_OFFSET,
@@ -61,8 +63,7 @@ static const u32 crtc_offsets[6] =
CRTC5_REGISTER_OFFSET
};
-static const u32 hpd_offsets[] =
-{
+static const u32 hpd_offsets[] = {
HPD0_REGISTER_OFFSET,
HPD1_REGISTER_OFFSET,
HPD2_REGISTER_OFFSET,
@@ -264,6 +265,21 @@ static void dce_v8_0_hpd_set_polarity(struct amdgpu_device *adev,
WREG32(mmDC_HPD1_INT_CONTROL + hpd_offsets[hpd], tmp);
}
+static void dce_v8_0_hpd_int_ack(struct amdgpu_device *adev,
+ int hpd)
+{
+ u32 tmp;
+
+ if (hpd >= adev->mode_info.num_hpd) {
+ DRM_DEBUG("invalid hpd %d\n", hpd);
+ return;
+ }
+
+ tmp = RREG32(mmDC_HPD1_INT_CONTROL + hpd_offsets[hpd]);
+ tmp |= DC_HPD1_INT_CONTROL__DC_HPD1_INT_ACK_MASK;
+ WREG32(mmDC_HPD1_INT_CONTROL + hpd_offsets[hpd], tmp);
+}
+
/**
* dce_v8_0_hpd_init - hpd setup callback.
*
@@ -303,6 +319,7 @@ static void dce_v8_0_hpd_init(struct amdgpu_device *adev)
continue;
}
+ dce_v8_0_hpd_int_ack(adev, amdgpu_connector->hpd.hpd);
dce_v8_0_hpd_set_polarity(adev, amdgpu_connector->hpd.hpd);
amdgpu_irq_get(adev, &adev->hpd_irq, amdgpu_connector->hpd.hpd);
}
@@ -333,7 +350,7 @@ static void dce_v8_0_hpd_fini(struct amdgpu_device *adev)
tmp = RREG32(mmDC_HPD1_CONTROL + hpd_offsets[amdgpu_connector->hpd.hpd]);
tmp &= ~DC_HPD1_CONTROL__DC_HPD1_EN_MASK;
- WREG32(mmDC_HPD1_CONTROL + hpd_offsets[amdgpu_connector->hpd.hpd], 0);
+ WREG32(mmDC_HPD1_CONTROL + hpd_offsets[amdgpu_connector->hpd.hpd], tmp);
amdgpu_irq_put(adev, &adev->hpd_irq, amdgpu_connector->hpd.hpd);
}
@@ -975,7 +992,7 @@ static void dce_v8_0_program_watermarks(struct amdgpu_device *adev,
(u32)mode->clock);
line_time = (u32) div_u64((u64)mode->crtc_htotal * 1000000,
(u32)mode->clock);
- line_time = min(line_time, (u32)65535);
+ line_time = min_t(u32, line_time, 65535);
/* watermark for high clocks */
if (adev->pm.dpm_enabled) {
@@ -1005,7 +1022,7 @@ static void dce_v8_0_program_watermarks(struct amdgpu_device *adev,
wm_high.num_heads = num_heads;
/* set for high clocks */
- latency_watermark_a = min(dce_v8_0_latency_watermark(&wm_high), (u32)65535);
+ latency_watermark_a = min_t(u32, dce_v8_0_latency_watermark(&wm_high), 65535);
/* possibly force display priority to high */
/* should really do this at mode validation time... */
@@ -1044,7 +1061,7 @@ static void dce_v8_0_program_watermarks(struct amdgpu_device *adev,
wm_low.num_heads = num_heads;
/* set for low clocks */
- latency_watermark_b = min(dce_v8_0_latency_watermark(&wm_low), (u32)65535);
+ latency_watermark_b = min_t(u32, dce_v8_0_latency_watermark(&wm_low), 65535);
/* possibly force display priority to high */
/* should really do this at mode validation time... */
@@ -1079,8 +1096,7 @@ static void dce_v8_0_program_watermarks(struct amdgpu_device *adev,
/* save values for DPM */
amdgpu_crtc->line_time = line_time;
- amdgpu_crtc->wm_high = latency_watermark_a;
- amdgpu_crtc->wm_low = latency_watermark_b;
+
/* Save number of lines the linebuffer leads before the scanout */
amdgpu_crtc->lb_vblank_lead_lines = lb_vblank_lead_lines;
}
@@ -1255,7 +1271,7 @@ static void dce_v8_0_audio_write_speaker_allocation(struct drm_encoder *encoder)
return;
}
- sad_count = drm_edid_to_speaker_allocation(amdgpu_connector_edid(connector), &sadb);
+ sad_count = drm_edid_to_speaker_allocation(amdgpu_connector->edid, &sadb);
if (sad_count < 0) {
DRM_ERROR("Couldn't read Speaker Allocation Data Block: %d\n", sad_count);
sad_count = 0;
@@ -1323,7 +1339,7 @@ static void dce_v8_0_audio_write_sad_regs(struct drm_encoder *encoder)
return;
}
- sad_count = drm_edid_to_sad(amdgpu_connector_edid(connector), &sads);
+ sad_count = drm_edid_to_sad(amdgpu_connector->edid, &sads);
if (sad_count < 0)
DRM_ERROR("Couldn't read SADs: %d\n", sad_count);
if (sad_count <= 0)
@@ -1343,9 +1359,9 @@ static void dce_v8_0_audio_write_sad_regs(struct drm_encoder *encoder)
if (sad->channels > max_channels) {
value = (sad->channels <<
AZALIA_F0_CODEC_PIN_CONTROL_AUDIO_DESCRIPTOR0__MAX_CHANNELS__SHIFT) |
- (sad->byte2 <<
+ (sad->byte2 <<
AZALIA_F0_CODEC_PIN_CONTROL_AUDIO_DESCRIPTOR0__DESCRIPTOR_BYTE_2__SHIFT) |
- (sad->freq <<
+ (sad->freq <<
AZALIA_F0_CODEC_PIN_CONTROL_AUDIO_DESCRIPTOR0__SUPPORTED_FREQUENCIES__SHIFT);
max_channels = sad->channels;
}
@@ -1377,15 +1393,14 @@ static void dce_v8_0_audio_enable(struct amdgpu_device *adev,
enable ? AZALIA_F0_CODEC_PIN_CONTROL_HOT_PLUG_CONTROL__AUDIO_ENABLED_MASK : 0);
}
-static const u32 pin_offsets[7] =
-{
- (0x1780 - 0x1780),
- (0x1786 - 0x1780),
- (0x178c - 0x1780),
- (0x1792 - 0x1780),
- (0x1798 - 0x1780),
- (0x179d - 0x1780),
- (0x17a4 - 0x1780),
+static const u32 pin_offsets[7] = {
+ AUD0_REGISTER_OFFSET,
+ AUD1_REGISTER_OFFSET,
+ AUD2_REGISTER_OFFSET,
+ AUD3_REGISTER_OFFSET,
+ AUD4_REGISTER_OFFSET,
+ AUD5_REGISTER_OFFSET,
+ AUD6_REGISTER_OFFSET,
};
static int dce_v8_0_audio_init(struct amdgpu_device *adev)
@@ -1427,17 +1442,12 @@ static int dce_v8_0_audio_init(struct amdgpu_device *adev)
static void dce_v8_0_audio_fini(struct amdgpu_device *adev)
{
- int i;
-
if (!amdgpu_audio)
return;
if (!adev->mode_info.audio.enabled)
return;
- for (i = 0; i < adev->mode_info.audio.num_pins; i++)
- dce_v8_0_audio_enable(adev, &adev->mode_info.audio.pin[i], false);
-
adev->mode_info.audio.enabled = false;
}
@@ -1738,8 +1748,7 @@ static void dce_v8_0_afmt_fini(struct amdgpu_device *adev)
}
}
-static const u32 vga_control_regs[6] =
-{
+static const u32 vga_control_regs[6] = {
mmD1VGA_CONTROL,
mmD2VGA_CONTROL,
mmD3VGA_CONTROL,
@@ -1791,7 +1800,6 @@ static int dce_v8_0_crtc_do_set_base(struct drm_crtc *crtc,
u32 viewport_w, viewport_h;
int r;
bool bypass_lut = false;
- struct drm_format_name_buf format_name;
/* no fb bound */
if (!atomic && !crtc->primary->fb) {
@@ -1814,6 +1822,7 @@ static int dce_v8_0_crtc_do_set_base(struct drm_crtc *crtc,
return r;
if (!atomic) {
+ abo->flags |= AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS;
r = amdgpu_bo_pin(abo, AMDGPU_GEM_DOMAIN_VRAM);
if (unlikely(r != 0)) {
amdgpu_bo_unreserve(abo);
@@ -1894,16 +1903,16 @@ static int dce_v8_0_crtc_do_set_base(struct drm_crtc *crtc,
case DRM_FORMAT_XBGR8888:
case DRM_FORMAT_ABGR8888:
fb_format = ((GRPH_DEPTH_32BPP << GRPH_CONTROL__GRPH_DEPTH__SHIFT) |
- (GRPH_FORMAT_ARGB8888 << GRPH_CONTROL__GRPH_FORMAT__SHIFT));
+ (GRPH_FORMAT_ARGB8888 << GRPH_CONTROL__GRPH_FORMAT__SHIFT));
fb_swap = ((GRPH_RED_SEL_B << GRPH_SWAP_CNTL__GRPH_RED_CROSSBAR__SHIFT) |
- (GRPH_BLUE_SEL_R << GRPH_SWAP_CNTL__GRPH_BLUE_CROSSBAR__SHIFT));
+ (GRPH_BLUE_SEL_R << GRPH_SWAP_CNTL__GRPH_BLUE_CROSSBAR__SHIFT));
#ifdef __BIG_ENDIAN
fb_swap |= (GRPH_ENDIAN_8IN32 << GRPH_SWAP_CNTL__GRPH_ENDIAN_SWAP__SHIFT);
#endif
break;
default:
- DRM_ERROR("Unsupported screen format %s\n",
- drm_get_format_name(target_fb->format->format, &format_name));
+ DRM_ERROR("Unsupported screen format %p4cc\n",
+ &target_fb->format->format);
return -EINVAL;
}
@@ -2093,22 +2102,18 @@ static int dce_v8_0_pick_dig_encoder(struct drm_encoder *encoder)
return 1;
else
return 0;
- break;
case ENCODER_OBJECT_ID_INTERNAL_UNIPHY1:
if (dig->linkb)
return 3;
else
return 2;
- break;
case ENCODER_OBJECT_ID_INTERNAL_UNIPHY2:
if (dig->linkb)
return 5;
else
return 4;
- break;
case ENCODER_OBJECT_ID_INTERNAL_UNIPHY3:
return 6;
- break;
default:
DRM_ERROR("invalid encoder_id: 0x%x\n", amdgpu_encoder->encoder_id);
return 0;
@@ -2310,6 +2315,7 @@ static int dce_v8_0_crtc_cursor_set2(struct drm_crtc *crtc,
return ret;
}
+ aobj->flags |= AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS;
ret = amdgpu_bo_pin(aobj, AMDGPU_GEM_DOMAIN_VRAM);
amdgpu_bo_unreserve(aobj);
if (ret) {
@@ -2438,7 +2444,7 @@ static void dce_v8_0_crtc_dpms(struct drm_crtc *crtc, int mode)
break;
}
/* adjust pm to dpms */
- amdgpu_pm_compute_clocks(adev);
+ amdgpu_dpm_compute_clocks(adev);
}
static void dce_v8_0_crtc_prepare(struct drm_crtc *crtc)
@@ -2601,6 +2607,31 @@ static const struct drm_crtc_helper_funcs dce_v8_0_crtc_helper_funcs = {
.get_scanout_position = amdgpu_crtc_get_scanout_position,
};
+static void dce_v8_0_panic_flush(struct drm_plane *plane)
+{
+ struct drm_framebuffer *fb;
+ struct amdgpu_crtc *amdgpu_crtc;
+ struct amdgpu_device *adev;
+ uint32_t fb_format;
+
+ if (!plane->fb)
+ return;
+
+ fb = plane->fb;
+ amdgpu_crtc = to_amdgpu_crtc(plane->crtc);
+ adev = drm_to_adev(fb->dev);
+
+ /* Disable DC tiling */
+ fb_format = RREG32(mmGRPH_CONTROL + amdgpu_crtc->crtc_offset);
+ fb_format &= ~GRPH_CONTROL__GRPH_ARRAY_MODE_MASK;
+ WREG32(mmGRPH_CONTROL + amdgpu_crtc->crtc_offset, fb_format);
+}
+
+static const struct drm_plane_helper_funcs dce_v8_0_drm_primary_plane_helper_funcs = {
+ .get_scanout_buffer = amdgpu_display_get_scanout_buffer,
+ .panic_flush = dce_v8_0_panic_flush,
+};
+
static int dce_v8_0_crtc_init(struct amdgpu_device *adev, int index)
{
struct amdgpu_crtc *amdgpu_crtc;
@@ -2628,13 +2659,14 @@ static int dce_v8_0_crtc_init(struct amdgpu_device *adev, int index)
amdgpu_crtc->encoder = NULL;
amdgpu_crtc->connector = NULL;
drm_crtc_helper_add(&amdgpu_crtc->base, &dce_v8_0_crtc_helper_funcs);
+ drm_plane_helper_add(amdgpu_crtc->base.primary, &dce_v8_0_drm_primary_plane_helper_funcs);
return 0;
}
-static int dce_v8_0_early_init(void *handle)
+static int dce_v8_0_early_init(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
adev->audio_endpt_rreg = &dce_v8_0_audio_endpt_rreg;
adev->audio_endpt_wreg = &dce_v8_0_audio_endpt_wreg;
@@ -2668,10 +2700,10 @@ static int dce_v8_0_early_init(void *handle)
return 0;
}
-static int dce_v8_0_sw_init(void *handle)
+static int dce_v8_0_sw_init(struct amdgpu_ip_block *ip_block)
{
int r, i;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
for (i = 0; i < adev->mode_info.num_crtc; i++) {
r = amdgpu_irq_add_id(adev, AMDGPU_IRQ_CLIENTID_LEGACY, i + 1, &adev->crtc_irq);
@@ -2698,9 +2730,13 @@ static int dce_v8_0_sw_init(void *handle)
adev_to_drm(adev)->mode_config.max_height = 16384;
adev_to_drm(adev)->mode_config.preferred_depth = 24;
- adev_to_drm(adev)->mode_config.prefer_shadow = 1;
+ if (adev->asic_type == CHIP_HAWAII)
+ /* disable prefer shadow for now due to hibernation issues */
+ adev_to_drm(adev)->mode_config.prefer_shadow = 0;
+ else
+ adev_to_drm(adev)->mode_config.prefer_shadow = 1;
- adev_to_drm(adev)->mode_config.fb_base = adev->gmc.aper_base;
+ adev_to_drm(adev)->mode_config.fb_modifiers_not_supported = true;
r = amdgpu_display_modeset_create_props(adev);
if (r)
@@ -2730,17 +2766,29 @@ static int dce_v8_0_sw_init(void *handle)
if (r)
return r;
+ /* Disable vblank IRQs aggressively for power-saving */
+ /* XXX: can this be enabled for DC? */
+ adev_to_drm(adev)->vblank_disable_immediate = true;
+
+ r = drm_vblank_init(adev_to_drm(adev), adev->mode_info.num_crtc);
+ if (r)
+ return r;
+
+ /* Pre-DCE11 */
+ INIT_DELAYED_WORK(&adev->hotplug_work,
+ amdgpu_display_hotplug_work_func);
+
drm_kms_helper_poll_init(adev_to_drm(adev));
adev->mode_info.mode_config_initialized = true;
return 0;
}
-static int dce_v8_0_sw_fini(void *handle)
+static int dce_v8_0_sw_fini(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
- kfree(adev->mode_info.bios_hardcoded_edid);
+ drm_edid_free(adev->mode_info.bios_hardcoded_edid);
drm_kms_helper_poll_fini(adev_to_drm(adev));
@@ -2754,10 +2802,10 @@ static int dce_v8_0_sw_fini(void *handle)
return 0;
}
-static int dce_v8_0_hw_init(void *handle)
+static int dce_v8_0_hw_init(struct amdgpu_ip_block *ip_block)
{
int i;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
/* disable vga render */
dce_v8_0_set_vga_render_state(adev, false);
@@ -2777,10 +2825,10 @@ static int dce_v8_0_hw_init(void *handle)
return 0;
}
-static int dce_v8_0_hw_fini(void *handle)
+static int dce_v8_0_hw_fini(struct amdgpu_ip_block *ip_block)
{
int i;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
dce_v8_0_hpd_fini(adev);
@@ -2790,28 +2838,35 @@ static int dce_v8_0_hw_fini(void *handle)
dce_v8_0_pageflip_interrupt_fini(adev);
+ flush_delayed_work(&adev->hotplug_work);
+
return 0;
}
-static int dce_v8_0_suspend(void *handle)
+static int dce_v8_0_suspend(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
+ int r;
+
+ r = amdgpu_display_suspend_helper(adev);
+ if (r)
+ return r;
adev->mode_info.bl_level =
amdgpu_atombios_encoder_get_backlight_level_from_reg(adev);
- return dce_v8_0_hw_fini(handle);
+ return dce_v8_0_hw_fini(ip_block);
}
-static int dce_v8_0_resume(void *handle)
+static int dce_v8_0_resume(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
int ret;
amdgpu_atombios_encoder_set_backlight_level_to_reg(adev,
adev->mode_info.bl_level);
- ret = dce_v8_0_hw_init(handle);
+ ret = dce_v8_0_hw_init(ip_block);
/* turn on the BL */
if (adev->mode_info.bl_encoder) {
@@ -2820,24 +2875,21 @@ static int dce_v8_0_resume(void *handle)
amdgpu_display_backlight_set_level(adev, adev->mode_info.bl_encoder,
bl_level);
}
+ if (ret)
+ return ret;
- return ret;
+ return amdgpu_display_resume_helper(adev);
}
-static bool dce_v8_0_is_idle(void *handle)
+static bool dce_v8_0_is_idle(struct amdgpu_ip_block *ip_block)
{
return true;
}
-static int dce_v8_0_wait_for_idle(void *handle)
-{
- return 0;
-}
-
-static int dce_v8_0_soft_reset(void *handle)
+static int dce_v8_0_soft_reset(struct amdgpu_ip_block *ip_block)
{
u32 srbm_soft_reset = 0, tmp;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
if (dce_v8_0_is_display_hung(adev))
srbm_soft_reset |= SRBM_SOFT_RESET__SOFT_RESET_DC_MASK;
@@ -2963,7 +3015,7 @@ static void dce_v8_0_set_crtc_vline_interrupt_state(struct amdgpu_device *adev,
}
}
-static int dce_v8_0_set_hpd_interrupt_state(struct amdgpu_device *adev,
+static int dce_v8_0_set_hpd_irq_state(struct amdgpu_device *adev,
struct amdgpu_irq_src *src,
unsigned type,
enum amdgpu_interrupt_state state)
@@ -2971,7 +3023,7 @@ static int dce_v8_0_set_hpd_interrupt_state(struct amdgpu_device *adev,
u32 dc_hpd_int_cntl;
if (type >= adev->mode_info.num_hpd) {
- DRM_DEBUG("invalid hdp %d\n", type);
+ DRM_DEBUG("invalid hpd %d\n", type);
return 0;
}
@@ -2993,7 +3045,7 @@ static int dce_v8_0_set_hpd_interrupt_state(struct amdgpu_device *adev,
return 0;
}
-static int dce_v8_0_set_crtc_interrupt_state(struct amdgpu_device *adev,
+static int dce_v8_0_set_crtc_irq_state(struct amdgpu_device *adev,
struct amdgpu_irq_src *src,
unsigned type,
enum amdgpu_interrupt_state state)
@@ -3078,7 +3130,7 @@ static int dce_v8_0_crtc_irq(struct amdgpu_device *adev,
return 0;
}
-static int dce_v8_0_set_pageflip_interrupt_state(struct amdgpu_device *adev,
+static int dce_v8_0_set_pageflip_irq_state(struct amdgpu_device *adev,
struct amdgpu_irq_src *src,
unsigned type,
enum amdgpu_interrupt_state state)
@@ -3129,7 +3181,7 @@ static int dce_v8_0_pageflip_irq(struct amdgpu_device *adev,
spin_lock_irqsave(&adev_to_drm(adev)->event_lock, flags);
works = amdgpu_crtc->pflip_works;
- if (amdgpu_crtc->pflip_status != AMDGPU_FLIP_SUBMITTED){
+ if (amdgpu_crtc->pflip_status != AMDGPU_FLIP_SUBMITTED) {
DRM_DEBUG_DRIVER("amdgpu_crtc->pflip_status = %d != "
"AMDGPU_FLIP_SUBMITTED(%d)\n",
amdgpu_crtc->pflip_status,
@@ -3158,7 +3210,7 @@ static int dce_v8_0_hpd_irq(struct amdgpu_device *adev,
struct amdgpu_irq_src *source,
struct amdgpu_iv_entry *entry)
{
- uint32_t disp_int, mask, tmp;
+ uint32_t disp_int, mask;
unsigned hpd;
if (entry->src_data[0] >= adev->mode_info.num_hpd) {
@@ -3171,10 +3223,8 @@ static int dce_v8_0_hpd_irq(struct amdgpu_device *adev,
mask = interrupt_status_offsets[hpd].hpd;
if (disp_int & mask) {
- tmp = RREG32(mmDC_HPD1_INT_CONTROL + hpd_offsets[hpd]);
- tmp |= DC_HPD1_INT_CONTROL__DC_HPD1_INT_ACK_MASK;
- WREG32(mmDC_HPD1_INT_CONTROL + hpd_offsets[hpd], tmp);
- schedule_work(&adev->hotplug_work);
+ dce_v8_0_hpd_int_ack(adev, hpd);
+ schedule_delayed_work(&adev->hotplug_work, 0);
DRM_DEBUG("IH: HPD%d\n", hpd + 1);
}
@@ -3182,13 +3232,13 @@ static int dce_v8_0_hpd_irq(struct amdgpu_device *adev,
}
-static int dce_v8_0_set_clockgating_state(void *handle,
+static int dce_v8_0_set_clockgating_state(struct amdgpu_ip_block *ip_block,
enum amd_clockgating_state state)
{
return 0;
}
-static int dce_v8_0_set_powergating_state(void *handle,
+static int dce_v8_0_set_powergating_state(struct amdgpu_ip_block *ip_block,
enum amd_powergating_state state)
{
return 0;
@@ -3197,7 +3247,6 @@ static int dce_v8_0_set_powergating_state(void *handle,
static const struct amd_ip_funcs dce_v8_0_ip_funcs = {
.name = "dce_v8_0",
.early_init = dce_v8_0_early_init,
- .late_init = NULL,
.sw_init = dce_v8_0_sw_init,
.sw_fini = dce_v8_0_sw_fini,
.hw_init = dce_v8_0_hw_init,
@@ -3205,7 +3254,6 @@ static const struct amd_ip_funcs dce_v8_0_ip_funcs = {
.suspend = dce_v8_0_suspend,
.resume = dce_v8_0_resume,
.is_idle = dce_v8_0_is_idle,
- .wait_for_idle = dce_v8_0_wait_for_idle,
.soft_reset = dce_v8_0_soft_reset,
.set_clockgating_state = dce_v8_0_set_clockgating_state,
.set_powergating_state = dce_v8_0_set_powergating_state,
@@ -3493,17 +3541,17 @@ static void dce_v8_0_set_display_funcs(struct amdgpu_device *adev)
}
static const struct amdgpu_irq_src_funcs dce_v8_0_crtc_irq_funcs = {
- .set = dce_v8_0_set_crtc_interrupt_state,
+ .set = dce_v8_0_set_crtc_irq_state,
.process = dce_v8_0_crtc_irq,
};
static const struct amdgpu_irq_src_funcs dce_v8_0_pageflip_irq_funcs = {
- .set = dce_v8_0_set_pageflip_interrupt_state,
+ .set = dce_v8_0_set_pageflip_irq_state,
.process = dce_v8_0_pageflip_irq,
};
static const struct amdgpu_irq_src_funcs dce_v8_0_hpd_irq_funcs = {
- .set = dce_v8_0_set_hpd_interrupt_state,
+ .set = dce_v8_0_set_hpd_irq_state,
.process = dce_v8_0_hpd_irq,
};
@@ -3522,8 +3570,7 @@ static void dce_v8_0_set_irq_funcs(struct amdgpu_device *adev)
adev->hpd_irq.funcs = &dce_v8_0_hpd_irq_funcs;
}
-const struct amdgpu_ip_block_version dce_v8_0_ip_block =
-{
+const struct amdgpu_ip_block_version dce_v8_0_ip_block = {
.type = AMD_IP_BLOCK_TYPE_DCE,
.major = 8,
.minor = 0,
@@ -3531,8 +3578,7 @@ const struct amdgpu_ip_block_version dce_v8_0_ip_block =
.funcs = &dce_v8_0_ip_funcs,
};
-const struct amdgpu_ip_block_version dce_v8_1_ip_block =
-{
+const struct amdgpu_ip_block_version dce_v8_1_ip_block = {
.type = AMD_IP_BLOCK_TYPE_DCE,
.major = 8,
.minor = 1,
@@ -3540,8 +3586,7 @@ const struct amdgpu_ip_block_version dce_v8_1_ip_block =
.funcs = &dce_v8_0_ip_funcs,
};
-const struct amdgpu_ip_block_version dce_v8_2_ip_block =
-{
+const struct amdgpu_ip_block_version dce_v8_2_ip_block = {
.type = AMD_IP_BLOCK_TYPE_DCE,
.major = 8,
.minor = 2,
@@ -3549,8 +3594,7 @@ const struct amdgpu_ip_block_version dce_v8_2_ip_block =
.funcs = &dce_v8_0_ip_funcs,
};
-const struct amdgpu_ip_block_version dce_v8_3_ip_block =
-{
+const struct amdgpu_ip_block_version dce_v8_3_ip_block = {
.type = AMD_IP_BLOCK_TYPE_DCE,
.major = 8,
.minor = 3,
@@ -3558,8 +3602,7 @@ const struct amdgpu_ip_block_version dce_v8_3_ip_block =
.funcs = &dce_v8_0_ip_funcs,
};
-const struct amdgpu_ip_block_version dce_v8_5_ip_block =
-{
+const struct amdgpu_ip_block_version dce_v8_5_ip_block = {
.type = AMD_IP_BLOCK_TYPE_DCE,
.major = 8,
.minor = 5,
diff --git a/drivers/gpu/drm/amd/amdgpu/dce_virtual.c b/drivers/gpu/drm/amd/amdgpu/dce_virtual.c
deleted file mode 100644
index 9810af712cc0..000000000000
--- a/drivers/gpu/drm/amd/amdgpu/dce_virtual.c
+++ /dev/null
@@ -1,769 +0,0 @@
-/*
- * Copyright 2014 Advanced Micro Devices, Inc.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in
- * all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
- * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
- * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
- * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
- * OTHER DEALINGS IN THE SOFTWARE.
- *
- */
-
-#include <drm/drm_vblank.h>
-
-#include "amdgpu.h"
-#include "amdgpu_pm.h"
-#include "amdgpu_i2c.h"
-#include "atom.h"
-#include "amdgpu_pll.h"
-#include "amdgpu_connectors.h"
-#ifdef CONFIG_DRM_AMDGPU_SI
-#include "dce_v6_0.h"
-#endif
-#ifdef CONFIG_DRM_AMDGPU_CIK
-#include "dce_v8_0.h"
-#endif
-#include "dce_v10_0.h"
-#include "dce_v11_0.h"
-#include "dce_virtual.h"
-#include "ivsrcid/ivsrcid_vislands30.h"
-
-#define DCE_VIRTUAL_VBLANK_PERIOD 16666666
-
-
-static void dce_virtual_set_display_funcs(struct amdgpu_device *adev);
-static void dce_virtual_set_irq_funcs(struct amdgpu_device *adev);
-static int dce_virtual_connector_encoder_init(struct amdgpu_device *adev,
- int index);
-static int dce_virtual_pageflip(struct amdgpu_device *adev,
- unsigned crtc_id);
-static enum hrtimer_restart dce_virtual_vblank_timer_handle(struct hrtimer *vblank_timer);
-static void dce_virtual_set_crtc_vblank_interrupt_state(struct amdgpu_device *adev,
- int crtc,
- enum amdgpu_interrupt_state state);
-
-static u32 dce_virtual_vblank_get_counter(struct amdgpu_device *adev, int crtc)
-{
- return 0;
-}
-
-static void dce_virtual_page_flip(struct amdgpu_device *adev,
- int crtc_id, u64 crtc_base, bool async)
-{
- return;
-}
-
-static int dce_virtual_crtc_get_scanoutpos(struct amdgpu_device *adev, int crtc,
- u32 *vbl, u32 *position)
-{
- *vbl = 0;
- *position = 0;
-
- return -EINVAL;
-}
-
-static bool dce_virtual_hpd_sense(struct amdgpu_device *adev,
- enum amdgpu_hpd_id hpd)
-{
- return true;
-}
-
-static void dce_virtual_hpd_set_polarity(struct amdgpu_device *adev,
- enum amdgpu_hpd_id hpd)
-{
- return;
-}
-
-static u32 dce_virtual_hpd_get_gpio_reg(struct amdgpu_device *adev)
-{
- return 0;
-}
-
-/**
- * dce_virtual_bandwidth_update - program display watermarks
- *
- * @adev: amdgpu_device pointer
- *
- * Calculate and program the display watermarks and line
- * buffer allocation (CIK).
- */
-static void dce_virtual_bandwidth_update(struct amdgpu_device *adev)
-{
- return;
-}
-
-static int dce_virtual_crtc_gamma_set(struct drm_crtc *crtc, u16 *red,
- u16 *green, u16 *blue, uint32_t size,
- struct drm_modeset_acquire_ctx *ctx)
-{
- return 0;
-}
-
-static void dce_virtual_crtc_destroy(struct drm_crtc *crtc)
-{
- struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc);
-
- drm_crtc_cleanup(crtc);
- kfree(amdgpu_crtc);
-}
-
-static const struct drm_crtc_funcs dce_virtual_crtc_funcs = {
- .cursor_set2 = NULL,
- .cursor_move = NULL,
- .gamma_set = dce_virtual_crtc_gamma_set,
- .set_config = amdgpu_display_crtc_set_config,
- .destroy = dce_virtual_crtc_destroy,
- .page_flip_target = amdgpu_display_crtc_page_flip_target,
- .get_vblank_counter = amdgpu_get_vblank_counter_kms,
- .enable_vblank = amdgpu_enable_vblank_kms,
- .disable_vblank = amdgpu_disable_vblank_kms,
- .get_vblank_timestamp = drm_crtc_vblank_helper_get_vblank_timestamp,
-};
-
-static void dce_virtual_crtc_dpms(struct drm_crtc *crtc, int mode)
-{
- struct drm_device *dev = crtc->dev;
- struct amdgpu_device *adev = drm_to_adev(dev);
- struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc);
- unsigned type;
-
- switch (mode) {
- case DRM_MODE_DPMS_ON:
- amdgpu_crtc->enabled = true;
- /* Make sure VBLANK interrupts are still enabled */
- type = amdgpu_display_crtc_idx_to_irq_type(adev,
- amdgpu_crtc->crtc_id);
- amdgpu_irq_update(adev, &adev->crtc_irq, type);
- drm_crtc_vblank_on(crtc);
- break;
- case DRM_MODE_DPMS_STANDBY:
- case DRM_MODE_DPMS_SUSPEND:
- case DRM_MODE_DPMS_OFF:
- drm_crtc_vblank_off(crtc);
- amdgpu_crtc->enabled = false;
- break;
- }
-}
-
-
-static void dce_virtual_crtc_prepare(struct drm_crtc *crtc)
-{
- dce_virtual_crtc_dpms(crtc, DRM_MODE_DPMS_OFF);
-}
-
-static void dce_virtual_crtc_commit(struct drm_crtc *crtc)
-{
- dce_virtual_crtc_dpms(crtc, DRM_MODE_DPMS_ON);
-}
-
-static void dce_virtual_crtc_disable(struct drm_crtc *crtc)
-{
- struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc);
- struct drm_device *dev = crtc->dev;
-
- if (dev->num_crtcs)
- drm_crtc_vblank_off(crtc);
-
- amdgpu_crtc->enabled = false;
- amdgpu_crtc->pll_id = ATOM_PPLL_INVALID;
- amdgpu_crtc->encoder = NULL;
- amdgpu_crtc->connector = NULL;
-}
-
-static int dce_virtual_crtc_mode_set(struct drm_crtc *crtc,
- struct drm_display_mode *mode,
- struct drm_display_mode *adjusted_mode,
- int x, int y, struct drm_framebuffer *old_fb)
-{
- struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc);
-
- /* update the hw version fpr dpm */
- amdgpu_crtc->hw_mode = *adjusted_mode;
-
- return 0;
-}
-
-static bool dce_virtual_crtc_mode_fixup(struct drm_crtc *crtc,
- const struct drm_display_mode *mode,
- struct drm_display_mode *adjusted_mode)
-{
- return true;
-}
-
-
-static int dce_virtual_crtc_set_base(struct drm_crtc *crtc, int x, int y,
- struct drm_framebuffer *old_fb)
-{
- return 0;
-}
-
-static int dce_virtual_crtc_set_base_atomic(struct drm_crtc *crtc,
- struct drm_framebuffer *fb,
- int x, int y, enum mode_set_atomic state)
-{
- return 0;
-}
-
-static const struct drm_crtc_helper_funcs dce_virtual_crtc_helper_funcs = {
- .dpms = dce_virtual_crtc_dpms,
- .mode_fixup = dce_virtual_crtc_mode_fixup,
- .mode_set = dce_virtual_crtc_mode_set,
- .mode_set_base = dce_virtual_crtc_set_base,
- .mode_set_base_atomic = dce_virtual_crtc_set_base_atomic,
- .prepare = dce_virtual_crtc_prepare,
- .commit = dce_virtual_crtc_commit,
- .disable = dce_virtual_crtc_disable,
- .get_scanout_position = amdgpu_crtc_get_scanout_position,
-};
-
-static int dce_virtual_crtc_init(struct amdgpu_device *adev, int index)
-{
- struct amdgpu_crtc *amdgpu_crtc;
-
- amdgpu_crtc = kzalloc(sizeof(struct amdgpu_crtc) +
- (AMDGPUFB_CONN_LIMIT * sizeof(struct drm_connector *)), GFP_KERNEL);
- if (amdgpu_crtc == NULL)
- return -ENOMEM;
-
- drm_crtc_init(adev_to_drm(adev), &amdgpu_crtc->base, &dce_virtual_crtc_funcs);
-
- drm_mode_crtc_set_gamma_size(&amdgpu_crtc->base, 256);
- amdgpu_crtc->crtc_id = index;
- adev->mode_info.crtcs[index] = amdgpu_crtc;
-
- amdgpu_crtc->pll_id = ATOM_PPLL_INVALID;
- amdgpu_crtc->encoder = NULL;
- amdgpu_crtc->connector = NULL;
- amdgpu_crtc->vsync_timer_enabled = AMDGPU_IRQ_STATE_DISABLE;
- drm_crtc_helper_add(&amdgpu_crtc->base, &dce_virtual_crtc_helper_funcs);
-
- hrtimer_init(&amdgpu_crtc->vblank_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
- hrtimer_set_expires(&amdgpu_crtc->vblank_timer, DCE_VIRTUAL_VBLANK_PERIOD);
- amdgpu_crtc->vblank_timer.function = dce_virtual_vblank_timer_handle;
- hrtimer_start(&amdgpu_crtc->vblank_timer,
- DCE_VIRTUAL_VBLANK_PERIOD, HRTIMER_MODE_REL);
- return 0;
-}
-
-static int dce_virtual_early_init(void *handle)
-{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
-
- dce_virtual_set_display_funcs(adev);
- dce_virtual_set_irq_funcs(adev);
-
- adev->mode_info.num_hpd = 1;
- adev->mode_info.num_dig = 1;
- return 0;
-}
-
-static struct drm_encoder *
-dce_virtual_encoder(struct drm_connector *connector)
-{
- struct drm_encoder *encoder;
-
- drm_connector_for_each_possible_encoder(connector, encoder) {
- if (encoder->encoder_type == DRM_MODE_ENCODER_VIRTUAL)
- return encoder;
- }
-
- /* pick the first one */
- drm_connector_for_each_possible_encoder(connector, encoder)
- return encoder;
-
- return NULL;
-}
-
-static int dce_virtual_get_modes(struct drm_connector *connector)
-{
- struct drm_device *dev = connector->dev;
- struct drm_display_mode *mode = NULL;
- unsigned i;
- static const struct mode_size {
- int w;
- int h;
- } common_modes[] = {
- { 640, 480},
- { 720, 480},
- { 800, 600},
- { 848, 480},
- {1024, 768},
- {1152, 768},
- {1280, 720},
- {1280, 800},
- {1280, 854},
- {1280, 960},
- {1280, 1024},
- {1440, 900},
- {1400, 1050},
- {1680, 1050},
- {1600, 1200},
- {1920, 1080},
- {1920, 1200},
- {2560, 1440},
- {4096, 3112},
- {3656, 2664},
- {3840, 2160},
- {4096, 2160},
- };
-
- for (i = 0; i < ARRAY_SIZE(common_modes); i++) {
- mode = drm_cvt_mode(dev, common_modes[i].w, common_modes[i].h, 60, false, false, false);
- drm_mode_probed_add(connector, mode);
- }
-
- return 0;
-}
-
-static enum drm_mode_status dce_virtual_mode_valid(struct drm_connector *connector,
- struct drm_display_mode *mode)
-{
- return MODE_OK;
-}
-
-static int
-dce_virtual_dpms(struct drm_connector *connector, int mode)
-{
- return 0;
-}
-
-static int
-dce_virtual_set_property(struct drm_connector *connector,
- struct drm_property *property,
- uint64_t val)
-{
- return 0;
-}
-
-static void dce_virtual_destroy(struct drm_connector *connector)
-{
- drm_connector_unregister(connector);
- drm_connector_cleanup(connector);
- kfree(connector);
-}
-
-static void dce_virtual_force(struct drm_connector *connector)
-{
- return;
-}
-
-static const struct drm_connector_helper_funcs dce_virtual_connector_helper_funcs = {
- .get_modes = dce_virtual_get_modes,
- .mode_valid = dce_virtual_mode_valid,
- .best_encoder = dce_virtual_encoder,
-};
-
-static const struct drm_connector_funcs dce_virtual_connector_funcs = {
- .dpms = dce_virtual_dpms,
- .fill_modes = drm_helper_probe_single_connector_modes,
- .set_property = dce_virtual_set_property,
- .destroy = dce_virtual_destroy,
- .force = dce_virtual_force,
-};
-
-static int dce_virtual_sw_init(void *handle)
-{
- int r, i;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
-
- r = amdgpu_irq_add_id(adev, AMDGPU_IRQ_CLIENTID_LEGACY, VISLANDS30_IV_SRCID_SMU_DISP_TIMER2_TRIGGER, &adev->crtc_irq);
- if (r)
- return r;
-
- adev_to_drm(adev)->max_vblank_count = 0;
-
- adev_to_drm(adev)->mode_config.funcs = &amdgpu_mode_funcs;
-
- adev_to_drm(adev)->mode_config.max_width = 16384;
- adev_to_drm(adev)->mode_config.max_height = 16384;
-
- adev_to_drm(adev)->mode_config.preferred_depth = 24;
- adev_to_drm(adev)->mode_config.prefer_shadow = 1;
-
- adev_to_drm(adev)->mode_config.fb_base = adev->gmc.aper_base;
-
- r = amdgpu_display_modeset_create_props(adev);
- if (r)
- return r;
-
- adev_to_drm(adev)->mode_config.max_width = 16384;
- adev_to_drm(adev)->mode_config.max_height = 16384;
-
- /* allocate crtcs, encoders, connectors */
- for (i = 0; i < adev->mode_info.num_crtc; i++) {
- r = dce_virtual_crtc_init(adev, i);
- if (r)
- return r;
- r = dce_virtual_connector_encoder_init(adev, i);
- if (r)
- return r;
- }
-
- drm_kms_helper_poll_init(adev_to_drm(adev));
-
- adev->mode_info.mode_config_initialized = true;
- return 0;
-}
-
-static int dce_virtual_sw_fini(void *handle)
-{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
-
- kfree(adev->mode_info.bios_hardcoded_edid);
-
- drm_kms_helper_poll_fini(adev_to_drm(adev));
-
- drm_mode_config_cleanup(adev_to_drm(adev));
- /* clear crtcs pointer to avoid dce irq finish routine access freed data */
- memset(adev->mode_info.crtcs, 0, sizeof(adev->mode_info.crtcs[0]) * AMDGPU_MAX_CRTCS);
- adev->mode_info.mode_config_initialized = false;
- return 0;
-}
-
-static int dce_virtual_hw_init(void *handle)
-{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
-
- switch (adev->asic_type) {
-#ifdef CONFIG_DRM_AMDGPU_SI
- case CHIP_TAHITI:
- case CHIP_PITCAIRN:
- case CHIP_VERDE:
- case CHIP_OLAND:
- dce_v6_0_disable_dce(adev);
- break;
-#endif
-#ifdef CONFIG_DRM_AMDGPU_CIK
- case CHIP_BONAIRE:
- case CHIP_HAWAII:
- case CHIP_KAVERI:
- case CHIP_KABINI:
- case CHIP_MULLINS:
- dce_v8_0_disable_dce(adev);
- break;
-#endif
- case CHIP_FIJI:
- case CHIP_TONGA:
- dce_v10_0_disable_dce(adev);
- break;
- case CHIP_CARRIZO:
- case CHIP_STONEY:
- case CHIP_POLARIS10:
- case CHIP_POLARIS11:
- case CHIP_VEGAM:
- dce_v11_0_disable_dce(adev);
- break;
- case CHIP_TOPAZ:
-#ifdef CONFIG_DRM_AMDGPU_SI
- case CHIP_HAINAN:
-#endif
- /* no DCE */
- break;
- default:
- break;
- }
- return 0;
-}
-
-static int dce_virtual_hw_fini(void *handle)
-{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
- int i = 0;
-
- for (i = 0; i<adev->mode_info.num_crtc; i++)
- if (adev->mode_info.crtcs[i])
- hrtimer_cancel(&adev->mode_info.crtcs[i]->vblank_timer);
-
- return 0;
-}
-
-static int dce_virtual_suspend(void *handle)
-{
- return dce_virtual_hw_fini(handle);
-}
-
-static int dce_virtual_resume(void *handle)
-{
- return dce_virtual_hw_init(handle);
-}
-
-static bool dce_virtual_is_idle(void *handle)
-{
- return true;
-}
-
-static int dce_virtual_wait_for_idle(void *handle)
-{
- return 0;
-}
-
-static int dce_virtual_soft_reset(void *handle)
-{
- return 0;
-}
-
-static int dce_virtual_set_clockgating_state(void *handle,
- enum amd_clockgating_state state)
-{
- return 0;
-}
-
-static int dce_virtual_set_powergating_state(void *handle,
- enum amd_powergating_state state)
-{
- return 0;
-}
-
-static const struct amd_ip_funcs dce_virtual_ip_funcs = {
- .name = "dce_virtual",
- .early_init = dce_virtual_early_init,
- .late_init = NULL,
- .sw_init = dce_virtual_sw_init,
- .sw_fini = dce_virtual_sw_fini,
- .hw_init = dce_virtual_hw_init,
- .hw_fini = dce_virtual_hw_fini,
- .suspend = dce_virtual_suspend,
- .resume = dce_virtual_resume,
- .is_idle = dce_virtual_is_idle,
- .wait_for_idle = dce_virtual_wait_for_idle,
- .soft_reset = dce_virtual_soft_reset,
- .set_clockgating_state = dce_virtual_set_clockgating_state,
- .set_powergating_state = dce_virtual_set_powergating_state,
-};
-
-/* these are handled by the primary encoders */
-static void dce_virtual_encoder_prepare(struct drm_encoder *encoder)
-{
- return;
-}
-
-static void dce_virtual_encoder_commit(struct drm_encoder *encoder)
-{
- return;
-}
-
-static void
-dce_virtual_encoder_mode_set(struct drm_encoder *encoder,
- struct drm_display_mode *mode,
- struct drm_display_mode *adjusted_mode)
-{
- return;
-}
-
-static void dce_virtual_encoder_disable(struct drm_encoder *encoder)
-{
- return;
-}
-
-static void
-dce_virtual_encoder_dpms(struct drm_encoder *encoder, int mode)
-{
- return;
-}
-
-static bool dce_virtual_encoder_mode_fixup(struct drm_encoder *encoder,
- const struct drm_display_mode *mode,
- struct drm_display_mode *adjusted_mode)
-{
- return true;
-}
-
-static const struct drm_encoder_helper_funcs dce_virtual_encoder_helper_funcs = {
- .dpms = dce_virtual_encoder_dpms,
- .mode_fixup = dce_virtual_encoder_mode_fixup,
- .prepare = dce_virtual_encoder_prepare,
- .mode_set = dce_virtual_encoder_mode_set,
- .commit = dce_virtual_encoder_commit,
- .disable = dce_virtual_encoder_disable,
-};
-
-static void dce_virtual_encoder_destroy(struct drm_encoder *encoder)
-{
- drm_encoder_cleanup(encoder);
- kfree(encoder);
-}
-
-static const struct drm_encoder_funcs dce_virtual_encoder_funcs = {
- .destroy = dce_virtual_encoder_destroy,
-};
-
-static int dce_virtual_connector_encoder_init(struct amdgpu_device *adev,
- int index)
-{
- struct drm_encoder *encoder;
- struct drm_connector *connector;
-
- /* add a new encoder */
- encoder = kzalloc(sizeof(struct drm_encoder), GFP_KERNEL);
- if (!encoder)
- return -ENOMEM;
- encoder->possible_crtcs = 1 << index;
- drm_encoder_init(adev_to_drm(adev), encoder, &dce_virtual_encoder_funcs,
- DRM_MODE_ENCODER_VIRTUAL, NULL);
- drm_encoder_helper_add(encoder, &dce_virtual_encoder_helper_funcs);
-
- connector = kzalloc(sizeof(struct drm_connector), GFP_KERNEL);
- if (!connector) {
- kfree(encoder);
- return -ENOMEM;
- }
-
- /* add a new connector */
- drm_connector_init(adev_to_drm(adev), connector, &dce_virtual_connector_funcs,
- DRM_MODE_CONNECTOR_VIRTUAL);
- drm_connector_helper_add(connector, &dce_virtual_connector_helper_funcs);
- connector->display_info.subpixel_order = SubPixelHorizontalRGB;
- connector->interlace_allowed = false;
- connector->doublescan_allowed = false;
-
- /* link them */
- drm_connector_attach_encoder(connector, encoder);
-
- return 0;
-}
-
-static const struct amdgpu_display_funcs dce_virtual_display_funcs = {
- .bandwidth_update = &dce_virtual_bandwidth_update,
- .vblank_get_counter = &dce_virtual_vblank_get_counter,
- .backlight_set_level = NULL,
- .backlight_get_level = NULL,
- .hpd_sense = &dce_virtual_hpd_sense,
- .hpd_set_polarity = &dce_virtual_hpd_set_polarity,
- .hpd_get_gpio_reg = &dce_virtual_hpd_get_gpio_reg,
- .page_flip = &dce_virtual_page_flip,
- .page_flip_get_scanoutpos = &dce_virtual_crtc_get_scanoutpos,
- .add_encoder = NULL,
- .add_connector = NULL,
-};
-
-static void dce_virtual_set_display_funcs(struct amdgpu_device *adev)
-{
- adev->mode_info.funcs = &dce_virtual_display_funcs;
-}
-
-static int dce_virtual_pageflip(struct amdgpu_device *adev,
- unsigned crtc_id)
-{
- unsigned long flags;
- struct amdgpu_crtc *amdgpu_crtc;
- struct amdgpu_flip_work *works;
-
- amdgpu_crtc = adev->mode_info.crtcs[crtc_id];
-
- if (crtc_id >= adev->mode_info.num_crtc) {
- DRM_ERROR("invalid pageflip crtc %d\n", crtc_id);
- return -EINVAL;
- }
-
- /* IRQ could occur when in initial stage */
- if (amdgpu_crtc == NULL)
- return 0;
-
- spin_lock_irqsave(&adev_to_drm(adev)->event_lock, flags);
- works = amdgpu_crtc->pflip_works;
- if (amdgpu_crtc->pflip_status != AMDGPU_FLIP_SUBMITTED) {
- DRM_DEBUG_DRIVER("amdgpu_crtc->pflip_status = %d != "
- "AMDGPU_FLIP_SUBMITTED(%d)\n",
- amdgpu_crtc->pflip_status,
- AMDGPU_FLIP_SUBMITTED);
- spin_unlock_irqrestore(&adev_to_drm(adev)->event_lock, flags);
- return 0;
- }
-
- /* page flip completed. clean up */
- amdgpu_crtc->pflip_status = AMDGPU_FLIP_NONE;
- amdgpu_crtc->pflip_works = NULL;
-
- /* wakeup usersapce */
- if (works->event)
- drm_crtc_send_vblank_event(&amdgpu_crtc->base, works->event);
-
- spin_unlock_irqrestore(&adev_to_drm(adev)->event_lock, flags);
-
- drm_crtc_vblank_put(&amdgpu_crtc->base);
- amdgpu_bo_unref(&works->old_abo);
- kfree(works->shared);
- kfree(works);
-
- return 0;
-}
-
-static enum hrtimer_restart dce_virtual_vblank_timer_handle(struct hrtimer *vblank_timer)
-{
- struct amdgpu_crtc *amdgpu_crtc = container_of(vblank_timer,
- struct amdgpu_crtc, vblank_timer);
- struct drm_device *ddev = amdgpu_crtc->base.dev;
- struct amdgpu_device *adev = drm_to_adev(ddev);
- struct amdgpu_irq_src *source = adev->irq.client[AMDGPU_IRQ_CLIENTID_LEGACY].sources
- [VISLANDS30_IV_SRCID_SMU_DISP_TIMER2_TRIGGER];
- int irq_type = amdgpu_display_crtc_idx_to_irq_type(adev,
- amdgpu_crtc->crtc_id);
-
- if (amdgpu_irq_enabled(adev, source, irq_type)) {
- drm_handle_vblank(ddev, amdgpu_crtc->crtc_id);
- dce_virtual_pageflip(adev, amdgpu_crtc->crtc_id);
- }
- hrtimer_start(vblank_timer, DCE_VIRTUAL_VBLANK_PERIOD,
- HRTIMER_MODE_REL);
-
- return HRTIMER_NORESTART;
-}
-
-static void dce_virtual_set_crtc_vblank_interrupt_state(struct amdgpu_device *adev,
- int crtc,
- enum amdgpu_interrupt_state state)
-{
- if (crtc >= adev->mode_info.num_crtc || !adev->mode_info.crtcs[crtc]) {
- DRM_DEBUG("invalid crtc %d\n", crtc);
- return;
- }
-
- adev->mode_info.crtcs[crtc]->vsync_timer_enabled = state;
- DRM_DEBUG("[FM]set crtc %d vblank interrupt state %d\n", crtc, state);
-}
-
-
-static int dce_virtual_set_crtc_irq_state(struct amdgpu_device *adev,
- struct amdgpu_irq_src *source,
- unsigned type,
- enum amdgpu_interrupt_state state)
-{
- if (type > AMDGPU_CRTC_IRQ_VBLANK6)
- return -EINVAL;
-
- dce_virtual_set_crtc_vblank_interrupt_state(adev, type, state);
-
- return 0;
-}
-
-static const struct amdgpu_irq_src_funcs dce_virtual_crtc_irq_funcs = {
- .set = dce_virtual_set_crtc_irq_state,
- .process = NULL,
-};
-
-static void dce_virtual_set_irq_funcs(struct amdgpu_device *adev)
-{
- adev->crtc_irq.num_types = AMDGPU_CRTC_IRQ_VBLANK6 + 1;
- adev->crtc_irq.funcs = &dce_virtual_crtc_irq_funcs;
-}
-
-const struct amdgpu_ip_block_version dce_virtual_ip_block =
-{
- .type = AMD_IP_BLOCK_TYPE_DCE,
- .major = 1,
- .minor = 0,
- .rev = 0,
- .funcs = &dce_virtual_ip_funcs,
-};
diff --git a/drivers/gpu/drm/amd/amdgpu/dce_virtual.h b/drivers/gpu/drm/amd/amdgpu/dce_virtual.h
deleted file mode 100644
index ed422012c8c6..000000000000
--- a/drivers/gpu/drm/amd/amdgpu/dce_virtual.h
+++ /dev/null
@@ -1,30 +0,0 @@
-/*
- * Copyright 2014 Advanced Micro Devices, Inc.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in
- * all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
- * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
- * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
- * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
- * OTHER DEALINGS IN THE SOFTWARE.
- *
- */
-
-#ifndef __DCE_VIRTUAL_H__
-#define __DCE_VIRTUAL_H__
-
-extern const struct amdgpu_ip_block_version dce_virtual_ip_block;
-
-#endif
-
diff --git a/drivers/gpu/drm/amd/amdgpu/df_v1_7.c b/drivers/gpu/drm/amd/amdgpu/df_v1_7.c
index 2d01ac0d4c11..cd298556f7a6 100644
--- a/drivers/gpu/drm/amd/amdgpu/df_v1_7.c
+++ b/drivers/gpu/drm/amd/amdgpu/df_v1_7.c
@@ -70,6 +70,8 @@ static u32 df_v1_7_get_hbm_channel_number(struct amdgpu_device *adev)
int fb_channel_number;
fb_channel_number = adev->df.funcs->get_fb_channel_number(adev);
+ if (fb_channel_number >= ARRAY_SIZE(df_v1_7_channel_number))
+ fb_channel_number = 0;
return df_v1_7_channel_number[fb_channel_number];
}
@@ -94,12 +96,12 @@ static void df_v1_7_update_medium_grain_clock_gating(struct amdgpu_device *adev,
WREG32_SOC15(DF, 0, mmDF_PIE_AON0_DfGlobalClkGater, tmp);
}
- /* Exit boradcast mode */
+ /* Exit broadcast mode */
adev->df.funcs->enable_broadcast_mode(adev, false);
}
static void df_v1_7_get_clockgating_state(struct amdgpu_device *adev,
- u32 *flags)
+ u64 *flags)
{
u32 tmp;
diff --git a/drivers/gpu/drm/amd/amdgpu/df_v3_6.c b/drivers/gpu/drm/amd/amdgpu/df_v3_6.c
index 6b4b30a8dce5..621aeca53880 100644
--- a/drivers/gpu/drm/amd/amdgpu/df_v3_6.c
+++ b/drivers/gpu/drm/amd/amdgpu/df_v3_6.c
@@ -205,7 +205,7 @@ static ssize_t df_v3_6_get_df_cntr_avail(struct device *dev,
count++;
}
- return snprintf(buf, PAGE_SIZE, "%i\n", count);
+ return sysfs_emit(buf, "%i\n", count);
}
/* device attr for available perfmon counters */
@@ -219,11 +219,11 @@ static void df_v3_6_query_hashes(struct amdgpu_device *adev)
adev->df.hash_status.hash_2m = false;
adev->df.hash_status.hash_1g = false;
- if (adev->asic_type != CHIP_ARCTURUS)
- return;
-
- /* encoding for hash-enabled on Arcturus */
- if (adev->df.funcs->get_fb_channel_number(adev) == 0xe) {
+ /* encoding for hash-enabled on Arcturus and Aldebaran */
+ if ((adev->asic_type == CHIP_ARCTURUS &&
+ adev->df.funcs->get_fb_channel_number(adev) == 0xe) ||
+ (adev->asic_type == CHIP_ALDEBARAN &&
+ adev->df.funcs->get_fb_channel_number(adev) == 0x1e)) {
tmp = RREG32_SOC15(DF, 0, mmDF_CS_UMC_AON0_DfGlobalCtrl);
adev->df.hash_status.hash_64k = REG_GET_FIELD(tmp,
DF_CS_UMC_AON0_DfGlobalCtrl,
@@ -254,8 +254,8 @@ static void df_v3_6_sw_init(struct amdgpu_device *adev)
static void df_v3_6_sw_fini(struct amdgpu_device *adev)
{
-
- device_remove_file(adev->dev, &dev_attr_df_cntr_avail);
+ if (adev->dev->kobj.sd)
+ device_remove_file(adev->dev, &dev_attr_df_cntr_avail);
}
@@ -277,8 +277,14 @@ static u32 df_v3_6_get_fb_channel_number(struct amdgpu_device *adev)
{
u32 tmp;
- tmp = RREG32_SOC15(DF, 0, mmDF_CS_UMC_AON0_DramBaseAddress0);
- tmp &= DF_CS_UMC_AON0_DramBaseAddress0__IntLvNumChan_MASK;
+ if (adev->asic_type == CHIP_ALDEBARAN) {
+ tmp = RREG32_SOC15(DF, 0, mmDF_GCM_AON0_DramMegaBaseAddress0);
+ tmp &=
+ ALDEBARAN_DF_CS_UMC_AON0_DramBaseAddress0__IntLvNumChan_MASK;
+ } else {
+ tmp = RREG32_SOC15(DF, 0, mmDF_CS_UMC_AON0_DramBaseAddress0);
+ tmp &= DF_CS_UMC_AON0_DramBaseAddress0__IntLvNumChan_MASK;
+ }
tmp >>= DF_CS_UMC_AON0_DramBaseAddress0__IntLvNumChan__SHIFT;
return tmp;
@@ -326,7 +332,7 @@ static void df_v3_6_update_medium_grain_clock_gating(struct amdgpu_device *adev,
}
static void df_v3_6_get_clockgating_state(struct amdgpu_device *adev,
- u32 *flags)
+ u64 *flags)
{
u32 tmp;
@@ -452,7 +458,7 @@ static int df_v3_6_pmc_add_cntr(struct amdgpu_device *adev,
#define DEFERRED_ARM_MASK (1 << 31)
static int df_v3_6_pmc_set_deferred(struct amdgpu_device *adev,
- int counter_idx, uint64_t config,
+ uint64_t config, int counter_idx,
bool is_deferred)
{
@@ -470,8 +476,8 @@ static int df_v3_6_pmc_set_deferred(struct amdgpu_device *adev,
}
static bool df_v3_6_pmc_is_deferred(struct amdgpu_device *adev,
- int counter_idx,
- uint64_t config)
+ uint64_t config,
+ int counter_idx)
{
return (df_v3_6_pmc_has_counter(adev, config, counter_idx) &&
(adev->df_perfmon_config_assign_mask[counter_idx]
@@ -568,6 +574,8 @@ static int df_v3_6_pmc_stop(struct amdgpu_device *adev, uint64_t config,
if (ret)
return ret;
+ df_v3_6_perfmon_wreg(adev, lo_base_addr, lo_val,
+ hi_base_addr, hi_val);
if (is_remove) {
df_v3_6_reset_perfmon_cntr(adev, config, counter_idx);
@@ -629,6 +637,36 @@ static void df_v3_6_pmc_get_count(struct amdgpu_device *adev,
}
}
+static bool df_v3_6_query_ras_poison_mode(struct amdgpu_device *adev)
+{
+ uint32_t hw_assert_msklo, hw_assert_mskhi;
+ uint32_t v0, v1, v28, v31;
+
+ hw_assert_msklo = RREG32_SOC15(DF, 0,
+ mmDF_CS_UMC_AON0_HardwareAssertMaskLow);
+ hw_assert_mskhi = RREG32_SOC15(DF, 0,
+ mmDF_NCS_PG0_HardwareAssertMaskHigh);
+
+ v0 = REG_GET_FIELD(hw_assert_msklo,
+ DF_CS_UMC_AON0_HardwareAssertMaskLow, HWAssertMsk0);
+ v1 = REG_GET_FIELD(hw_assert_msklo,
+ DF_CS_UMC_AON0_HardwareAssertMaskLow, HWAssertMsk1);
+ v28 = REG_GET_FIELD(hw_assert_mskhi,
+ DF_NCS_PG0_HardwareAssertMaskHigh, HWAssertMsk28);
+ v31 = REG_GET_FIELD(hw_assert_mskhi,
+ DF_NCS_PG0_HardwareAssertMaskHigh, HWAssertMsk31);
+
+ if (v0 && v1 && v28 && v31)
+ return true;
+ else if (!v0 && !v1 && !v28 && !v31)
+ return false;
+ else {
+ dev_warn(adev->dev, "DF poison setting is inconsistent(%d:%d:%d:%d)!\n",
+ v0, v1, v28, v31);
+ return false;
+ }
+}
+
const struct amdgpu_df_funcs df_v3_6_funcs = {
.sw_init = df_v3_6_sw_init,
.sw_fini = df_v3_6_sw_fini,
@@ -643,4 +681,5 @@ const struct amdgpu_df_funcs df_v3_6_funcs = {
.pmc_get_count = df_v3_6_pmc_get_count,
.get_fica = df_v3_6_get_fica,
.set_fica = df_v3_6_set_fica,
+ .query_ras_poison_mode = df_v3_6_query_ras_poison_mode,
};
diff --git a/drivers/gpu/drm/amd/amdgpu/df_v4_15.c b/drivers/gpu/drm/amd/amdgpu/df_v4_15.c
new file mode 100644
index 000000000000..2a573e33908b
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/df_v4_15.c
@@ -0,0 +1,45 @@
+/*
+ * Copyright 2024 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+#include "amdgpu.h"
+#include "df_v4_15.h"
+
+#include "df/df_4_15_offset.h"
+#include "df/df_4_15_sh_mask.h"
+
+static void df_v4_15_hw_init(struct amdgpu_device *adev)
+{
+ if (adev->have_atomics_support) {
+ uint32_t tmp;
+ uint32_t dis_lcl_proc = (1 << 1 |
+ 1 << 2 |
+ 1 << 13);
+
+ tmp = RREG32_SOC15(DF, 0, regNCSConfigurationRegister1);
+ tmp |= (dis_lcl_proc << NCSConfigurationRegister1__DisIntAtomicsLclProcessing__SHIFT);
+ WREG32_SOC15(DF, 0, regNCSConfigurationRegister1, tmp);
+ }
+}
+
+const struct amdgpu_df_funcs df_v4_15_funcs = {
+ .hw_init = df_v4_15_hw_init
+};
diff --git a/drivers/gpu/drm/amd/amdgpu/df_v4_15.h b/drivers/gpu/drm/amd/amdgpu/df_v4_15.h
new file mode 100644
index 000000000000..dddf2422112a
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/df_v4_15.h
@@ -0,0 +1,30 @@
+/*
+ * Copyright 2024 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#ifndef __DF_V4_15_H__
+#define __DF_V4_15_H__
+
+extern const struct amdgpu_df_funcs df_v4_15_funcs;
+
+#endif /* __DF_V4_15_H__ */
+
diff --git a/drivers/gpu/drm/amd/amdgpu/df_v4_3.c b/drivers/gpu/drm/amd/amdgpu/df_v4_3.c
new file mode 100644
index 000000000000..e8b9e19ede2e
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/df_v4_3.c
@@ -0,0 +1,61 @@
+/*
+ * Copyright 2022 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+#include "amdgpu.h"
+#include "df_v4_3.h"
+
+#include "df/df_4_3_offset.h"
+#include "df/df_4_3_sh_mask.h"
+
+static bool df_v4_3_query_ras_poison_mode(struct amdgpu_device *adev)
+{
+ uint32_t hw_assert_msklo, hw_assert_mskhi;
+ uint32_t v0, v1, v28, v31;
+
+ hw_assert_msklo = RREG32_SOC15(DF, 0,
+ regDF_CS_UMC_AON0_HardwareAssertMaskLow);
+ hw_assert_mskhi = RREG32_SOC15(DF, 0,
+ regDF_NCS_PG0_HardwareAssertMaskHigh);
+
+ v0 = REG_GET_FIELD(hw_assert_msklo,
+ DF_CS_UMC_AON0_HardwareAssertMaskLow, HWAssertMsk0);
+ v1 = REG_GET_FIELD(hw_assert_msklo,
+ DF_CS_UMC_AON0_HardwareAssertMaskLow, HWAssertMsk1);
+ v28 = REG_GET_FIELD(hw_assert_mskhi,
+ DF_NCS_PG0_HardwareAssertMaskHigh, HWAssertMsk28);
+ v31 = REG_GET_FIELD(hw_assert_mskhi,
+ DF_NCS_PG0_HardwareAssertMaskHigh, HWAssertMsk31);
+
+ if (v0 && v1 && v28 && v31)
+ return true;
+ else if (!v0 && !v1 && !v28 && !v31)
+ return false;
+ else {
+ dev_warn(adev->dev, "DF poison setting is inconsistent(%d:%d:%d:%d)!\n",
+ v0, v1, v28, v31);
+ return false;
+ }
+}
+
+const struct amdgpu_df_funcs df_v4_3_funcs = {
+ .query_ras_poison_mode = df_v4_3_query_ras_poison_mode,
+};
diff --git a/drivers/gpu/drm/amd/amdgpu/df_v4_3.h b/drivers/gpu/drm/amd/amdgpu/df_v4_3.h
new file mode 100644
index 000000000000..06ef0724edd3
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/df_v4_3.h
@@ -0,0 +1,31 @@
+/*
+ * Copyright 2022 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#ifndef __DF_V4_3_H__
+#define __DF_V4_3_H__
+
+#include "soc15_common.h"
+
+extern const struct amdgpu_df_funcs df_v4_3_funcs;
+
+#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/df_v4_6_2.c b/drivers/gpu/drm/amd/amdgpu/df_v4_6_2.c
new file mode 100644
index 000000000000..a47960a0babd
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/df_v4_6_2.c
@@ -0,0 +1,34 @@
+/*
+ * Copyright 2023 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+#include "amdgpu.h"
+#include "df_v4_6_2.h"
+
+static bool df_v4_6_2_query_ras_poison_mode(struct amdgpu_device *adev)
+{
+ /* return true since related regs are inaccessible */
+ return true;
+}
+
+const struct amdgpu_df_funcs df_v4_6_2_funcs = {
+ .query_ras_poison_mode = df_v4_6_2_query_ras_poison_mode,
+};
diff --git a/drivers/gpu/drm/amd/amdgpu/df_v4_6_2.h b/drivers/gpu/drm/amd/amdgpu/df_v4_6_2.h
new file mode 100644
index 000000000000..3bc3e6d216e2
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/df_v4_6_2.h
@@ -0,0 +1,31 @@
+/*
+ * Copyright 2023 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#ifndef __DF_V4_6_2_H__
+#define __DF_V4_6_2_H__
+
+#include "soc15_common.h"
+
+extern const struct amdgpu_df_funcs df_v4_6_2_funcs;
+
+#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/dimgrey_cavefish_reg_init.c b/drivers/gpu/drm/amd/amdgpu/dimgrey_cavefish_reg_init.c
index e9f177e9e3cf..e9f177e9e3cf 100755..100644
--- a/drivers/gpu/drm/amd/amdgpu/dimgrey_cavefish_reg_init.c
+++ b/drivers/gpu/drm/amd/amdgpu/dimgrey_cavefish_reg_init.c
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
index 45d1172b7bff..d75b9940f248 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
@@ -29,7 +29,6 @@
#include "amdgpu.h"
#include "amdgpu_gfx.h"
#include "amdgpu_psp.h"
-#include "amdgpu_smu.h"
#include "nv.h"
#include "nvd.h"
@@ -41,20 +40,20 @@
#include "ivsrcid/gfx/irqsrcs_gfx_10_1.h"
#include "soc15.h"
-#include "soc15d.h"
#include "soc15_common.h"
#include "clearstate_gfx10.h"
#include "v10_structs.h"
#include "gfx_v10_0.h"
+#include "gfx_v10_0_cleaner_shader.h"
#include "nbio_v2_3.h"
-/**
+/*
* Navi10 has two graphic rings to share each graphic pipe.
* 1. Primary ring
* 2. Async ring
*/
#define GFX10_NUM_GFX_RINGS_NV1X 1
-#define GFX10_NUM_GFX_RINGS_Sienna_Cichlid 1
+#define GFX10_NUM_GFX_RINGS_Sienna_Cichlid 2
#define GFX10_MEC_HPD_SIZE 2048
#define F32_CE_PROGRAM_RAM_SIZE 65536
@@ -103,10 +102,21 @@
#define mmGCR_GENERAL_CNTL_Sienna_Cichlid 0x1580
#define mmGCR_GENERAL_CNTL_Sienna_Cichlid_BASE_IDX 0
+#define mmGOLDEN_TSC_COUNT_UPPER_Cyan_Skillfish 0x0105
+#define mmGOLDEN_TSC_COUNT_UPPER_Cyan_Skillfish_BASE_IDX 1
+#define mmGOLDEN_TSC_COUNT_LOWER_Cyan_Skillfish 0x0106
+#define mmGOLDEN_TSC_COUNT_LOWER_Cyan_Skillfish_BASE_IDX 1
+
#define mmGOLDEN_TSC_COUNT_UPPER_Vangogh 0x0025
#define mmGOLDEN_TSC_COUNT_UPPER_Vangogh_BASE_IDX 1
#define mmGOLDEN_TSC_COUNT_LOWER_Vangogh 0x0026
#define mmGOLDEN_TSC_COUNT_LOWER_Vangogh_BASE_IDX 1
+
+#define mmGOLDEN_TSC_COUNT_UPPER_GC_10_3_6 0x002d
+#define mmGOLDEN_TSC_COUNT_UPPER_GC_10_3_6_BASE_IDX 1
+#define mmGOLDEN_TSC_COUNT_LOWER_GC_10_3_6 0x002e
+#define mmGOLDEN_TSC_COUNT_LOWER_GC_10_3_6_BASE_IDX 1
+
#define mmSPI_CONFIG_CNTL_1_Vangogh 0x2441
#define mmSPI_CONFIG_CNTL_1_Vangogh_BASE_IDX 1
#define mmVGT_TF_MEMORY_BASE_HI_Vangogh 0x2261
@@ -174,6 +184,9 @@
#define mmGC_THROTTLE_CTRL_Sienna_Cichlid 0x2030
#define mmGC_THROTTLE_CTRL_Sienna_Cichlid_BASE_IDX 0
+#define mmRLC_SPARE_INT_0_Sienna_Cichlid 0x4ca5
+#define mmRLC_SPARE_INT_0_Sienna_Cichlid_BASE_IDX 1
+
MODULE_FIRMWARE("amdgpu/navi10_ce.bin");
MODULE_FIRMWARE("amdgpu/navi10_pfp.bin");
MODULE_FIRMWARE("amdgpu/navi10_me.bin");
@@ -228,8 +241,246 @@ MODULE_FIRMWARE("amdgpu/dimgrey_cavefish_mec.bin");
MODULE_FIRMWARE("amdgpu/dimgrey_cavefish_mec2.bin");
MODULE_FIRMWARE("amdgpu/dimgrey_cavefish_rlc.bin");
-static const struct soc15_reg_golden golden_settings_gc_10_1[] =
-{
+MODULE_FIRMWARE("amdgpu/beige_goby_ce.bin");
+MODULE_FIRMWARE("amdgpu/beige_goby_pfp.bin");
+MODULE_FIRMWARE("amdgpu/beige_goby_me.bin");
+MODULE_FIRMWARE("amdgpu/beige_goby_mec.bin");
+MODULE_FIRMWARE("amdgpu/beige_goby_mec2.bin");
+MODULE_FIRMWARE("amdgpu/beige_goby_rlc.bin");
+
+MODULE_FIRMWARE("amdgpu/yellow_carp_ce.bin");
+MODULE_FIRMWARE("amdgpu/yellow_carp_pfp.bin");
+MODULE_FIRMWARE("amdgpu/yellow_carp_me.bin");
+MODULE_FIRMWARE("amdgpu/yellow_carp_mec.bin");
+MODULE_FIRMWARE("amdgpu/yellow_carp_mec2.bin");
+MODULE_FIRMWARE("amdgpu/yellow_carp_rlc.bin");
+
+MODULE_FIRMWARE("amdgpu/cyan_skillfish2_ce.bin");
+MODULE_FIRMWARE("amdgpu/cyan_skillfish2_pfp.bin");
+MODULE_FIRMWARE("amdgpu/cyan_skillfish2_me.bin");
+MODULE_FIRMWARE("amdgpu/cyan_skillfish2_mec.bin");
+MODULE_FIRMWARE("amdgpu/cyan_skillfish2_mec2.bin");
+MODULE_FIRMWARE("amdgpu/cyan_skillfish2_rlc.bin");
+
+MODULE_FIRMWARE("amdgpu/gc_10_3_6_ce.bin");
+MODULE_FIRMWARE("amdgpu/gc_10_3_6_pfp.bin");
+MODULE_FIRMWARE("amdgpu/gc_10_3_6_me.bin");
+MODULE_FIRMWARE("amdgpu/gc_10_3_6_mec.bin");
+MODULE_FIRMWARE("amdgpu/gc_10_3_6_mec2.bin");
+MODULE_FIRMWARE("amdgpu/gc_10_3_6_rlc.bin");
+
+MODULE_FIRMWARE("amdgpu/gc_10_3_7_ce.bin");
+MODULE_FIRMWARE("amdgpu/gc_10_3_7_pfp.bin");
+MODULE_FIRMWARE("amdgpu/gc_10_3_7_me.bin");
+MODULE_FIRMWARE("amdgpu/gc_10_3_7_mec.bin");
+MODULE_FIRMWARE("amdgpu/gc_10_3_7_mec2.bin");
+MODULE_FIRMWARE("amdgpu/gc_10_3_7_rlc.bin");
+
+static const struct amdgpu_hwip_reg_entry gc_reg_list_10_1[] = {
+ SOC15_REG_ENTRY_STR(GC, 0, mmGRBM_STATUS),
+ SOC15_REG_ENTRY_STR(GC, 0, mmGRBM_STATUS2),
+ SOC15_REG_ENTRY_STR(GC, 0, mmGRBM_STATUS3),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_STALLED_STAT1),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_STALLED_STAT2),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_CPC_STALLED_STAT1),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_CPF_STALLED_STAT1),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_BUSY_STAT),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_CPC_BUSY_STAT),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_CPF_BUSY_STAT),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_CPC_BUSY_STAT2),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_CPF_BUSY_STAT2),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_CPF_STATUS),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_GFX_ERROR),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_GFX_HPD_STATUS0),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_RB_BASE),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_RB_RPTR),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_RB_WPTR),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_RB0_BASE),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_RB0_RPTR),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_RB0_WPTR),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_RB1_BASE),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_RB1_RPTR),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_RB1_WPTR),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_RB2_BASE),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_RB2_WPTR),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_RB2_WPTR),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_CE_IB1_CMD_BUFSZ),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_CE_IB2_CMD_BUFSZ),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_IB1_CMD_BUFSZ),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_IB2_CMD_BUFSZ),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_CE_IB1_BASE_LO),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_CE_IB1_BASE_HI),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_CE_IB1_BUFSZ),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_CE_IB2_BASE_LO),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_CE_IB2_BASE_HI),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_CE_IB2_BUFSZ),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_IB1_BASE_LO),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_IB1_BASE_HI),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_IB1_BUFSZ),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_IB2_BASE_LO),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_IB2_BASE_HI),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_IB2_BUFSZ),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCPF_UTCL1_STATUS),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCPC_UTCL1_STATUS),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCPG_UTCL1_STATUS),
+ SOC15_REG_ENTRY_STR(GC, 0, mmGDS_PROTECTION_FAULT),
+ SOC15_REG_ENTRY_STR(GC, 0, mmGDS_VM_PROTECTION_FAULT),
+ SOC15_REG_ENTRY_STR(GC, 0, mmIA_UTCL1_STATUS),
+ SOC15_REG_ENTRY_STR(GC, 0, mmIA_UTCL1_STATUS_2),
+ SOC15_REG_ENTRY_STR(GC, 0, mmPA_CL_CNTL_STATUS),
+ SOC15_REG_ENTRY_STR(GC, 0, mmRLC_UTCL1_STATUS),
+ SOC15_REG_ENTRY_STR(GC, 0, mmRMI_UTCL1_STATUS),
+ SOC15_REG_ENTRY_STR(GC, 0, mmSQC_DCACHE_UTCL0_STATUS),
+ SOC15_REG_ENTRY_STR(GC, 0, mmSQC_ICACHE_UTCL0_STATUS),
+ SOC15_REG_ENTRY_STR(GC, 0, mmSQG_UTCL0_STATUS),
+ SOC15_REG_ENTRY_STR(GC, 0, mmTCP_UTCL0_STATUS),
+ SOC15_REG_ENTRY_STR(GC, 0, mmWD_UTCL1_STATUS),
+ SOC15_REG_ENTRY_STR(GC, 0, mmGCVM_L2_PROTECTION_FAULT_CNTL),
+ SOC15_REG_ENTRY_STR(GC, 0, mmGCVM_L2_PROTECTION_FAULT_STATUS),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_DEBUG),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_MEC_CNTL),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_MES_CNTL),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_CE_INSTR_PNTR),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_MEC1_INSTR_PNTR),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_MEC2_INSTR_PNTR),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_MES_DEBUG_INTERRUPT_INSTR_PNTR),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_MES_INSTR_PNTR),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_ME_INSTR_PNTR),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_PFP_INSTR_PNTR),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_CPC_STATUS),
+ SOC15_REG_ENTRY_STR(GC, 0, mmRLC_STAT),
+ SOC15_REG_ENTRY_STR(GC, 0, mmRLC_SMU_COMMAND),
+ SOC15_REG_ENTRY_STR(GC, 0, mmRLC_SMU_MESSAGE),
+ SOC15_REG_ENTRY_STR(GC, 0, mmRLC_SMU_ARGUMENT_1),
+ SOC15_REG_ENTRY_STR(GC, 0, mmRLC_SMU_ARGUMENT_2),
+ SOC15_REG_ENTRY_STR(GC, 0, mmRLC_SMU_ARGUMENT_3),
+ SOC15_REG_ENTRY_STR(GC, 0, mmRLC_SMU_ARGUMENT_4),
+ SOC15_REG_ENTRY_STR(GC, 0, mmSMU_RLC_RESPONSE),
+ SOC15_REG_ENTRY_STR(GC, 0, mmRLC_SAFE_MODE),
+ SOC15_REG_ENTRY_STR(GC, 0, mmRLC_SMU_SAFE_MODE),
+ SOC15_REG_ENTRY_STR(GC, 0, mmRLC_RLCS_GPM_STAT_2),
+ SOC15_REG_ENTRY_STR(GC, 0, mmRLC_SPP_STATUS),
+ SOC15_REG_ENTRY_STR(GC, 0, mmRLC_RLCS_BOOTLOAD_STATUS),
+ SOC15_REG_ENTRY_STR(GC, 0, mmRLC_INT_STAT),
+ SOC15_REG_ENTRY_STR(GC, 0, mmRLC_GPM_GENERAL_6),
+ SOC15_REG_ENTRY_STR(GC, 0, mmRLC_GPM_DEBUG_INST_A),
+ SOC15_REG_ENTRY_STR(GC, 0, mmRLC_GPM_DEBUG_INST_B),
+ SOC15_REG_ENTRY_STR(GC, 0, mmRLC_GPM_DEBUG_INST_ADDR),
+ SOC15_REG_ENTRY_STR(GC, 0, mmRLC_LX6_CORE_PDEBUG_INST),
+ /* cp header registers */
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_MES_HEADER_DUMP),
+ /* SE status registers */
+ SOC15_REG_ENTRY_STR(GC, 0, mmGRBM_STATUS_SE0),
+ SOC15_REG_ENTRY_STR(GC, 0, mmGRBM_STATUS_SE1),
+ SOC15_REG_ENTRY_STR(GC, 0, mmGRBM_STATUS_SE2),
+ SOC15_REG_ENTRY_STR(GC, 0, mmGRBM_STATUS_SE3)
+};
+
+static const struct amdgpu_hwip_reg_entry gc_cp_reg_list_10[] = {
+ /* compute registers */
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_VMID),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_PERSISTENT_STATE),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_PIPE_PRIORITY),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_QUEUE_PRIORITY),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_QUANTUM),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_PQ_BASE),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_PQ_BASE_HI),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_PQ_RPTR),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR_HI),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_PQ_CONTROL),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_IB_BASE_ADDR),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_IB_BASE_ADDR_HI),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_IB_RPTR),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_IB_CONTROL),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_DEQUEUE_REQUEST),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_EOP_BASE_ADDR),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_EOP_BASE_ADDR_HI),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_EOP_CONTROL),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_EOP_RPTR),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_EOP_WPTR),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_EOP_EVENTS),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_CTX_SAVE_BASE_ADDR_LO),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_CTX_SAVE_BASE_ADDR_HI),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_CTX_SAVE_CONTROL),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_CNTL_STACK_OFFSET),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_CNTL_STACK_SIZE),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_WG_STATE_OFFSET),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_CTX_SAVE_SIZE),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_GDS_RESOURCE_STATE),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_ERROR),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_EOP_WPTR_MEM),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_PQ_WPTR_LO),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_PQ_WPTR_HI),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_SUSPEND_CNTL_STACK_OFFSET),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_SUSPEND_CNTL_STACK_DW_CNT),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_SUSPEND_WG_STATE_OFFSET),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_DEQUEUE_STATUS),
+ /* cp header registers */
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_MEC_ME1_HEADER_DUMP),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_MEC_ME1_HEADER_DUMP),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_MEC_ME1_HEADER_DUMP),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_MEC_ME1_HEADER_DUMP),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_MEC_ME1_HEADER_DUMP),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_MEC_ME1_HEADER_DUMP),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_MEC_ME1_HEADER_DUMP),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_MEC_ME1_HEADER_DUMP),
+};
+
+static const struct amdgpu_hwip_reg_entry gc_gfx_queue_reg_list_10[] = {
+ /* gfx queue registers */
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_GFX_HQD_ACTIVE),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_GFX_HQD_QUEUE_PRIORITY),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_GFX_HQD_BASE),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_GFX_HQD_BASE_HI),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_GFX_HQD_OFFSET),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_GFX_HQD_CSMD_RPTR),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_GFX_HQD_WPTR),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_GFX_HQD_WPTR_HI),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_GFX_HQD_DEQUEUE_REQUEST),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_GFX_HQD_MAPPED),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_GFX_HQD_QUE_MGR_CONTROL),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_GFX_HQD_HQ_CONTROL0),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_GFX_HQD_HQ_STATUS0),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_GFX_HQD_CE_WPTR_POLL_ADDR_LO),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_GFX_HQD_CE_WPTR_POLL_ADDR_HI),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_GFX_HQD_CE_OFFSET),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_GFX_HQD_CE_CSMD_RPTR),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_GFX_HQD_CE_WPTR),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_GFX_HQD_CE_WPTR_HI),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_GFX_MQD_BASE_ADDR),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_GFX_MQD_BASE_ADDR_HI),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_RB_WPTR_POLL_ADDR_LO),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_RB_WPTR_POLL_ADDR_HI),
+ /* gfx header registers */
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_CE_HEADER_DUMP),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_CE_HEADER_DUMP),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_CE_HEADER_DUMP),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_CE_HEADER_DUMP),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_CE_HEADER_DUMP),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_CE_HEADER_DUMP),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_CE_HEADER_DUMP),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_CE_HEADER_DUMP),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_PFP_HEADER_DUMP),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_PFP_HEADER_DUMP),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_PFP_HEADER_DUMP),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_PFP_HEADER_DUMP),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_PFP_HEADER_DUMP),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_PFP_HEADER_DUMP),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_PFP_HEADER_DUMP),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_PFP_HEADER_DUMP),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_ME_HEADER_DUMP),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_ME_HEADER_DUMP),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_ME_HEADER_DUMP),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_ME_HEADER_DUMP),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_ME_HEADER_DUMP),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_ME_HEADER_DUMP),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_ME_HEADER_DUMP),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_ME_HEADER_DUMP),
+};
+
+static const struct soc15_reg_golden golden_settings_gc_10_1[] = {
SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_4, 0xffffffff, 0x00400014),
SOC15_REG_GOLDEN_VALUE(GC, 0, mmCGTT_CPF_CLK_CTRL, 0xfcff8fff, 0xf8000100),
SOC15_REG_GOLDEN_VALUE(GC, 0, mmCGTT_SPI_CLK_CTRL, 0xcd000000, 0x0d000100),
@@ -272,13 +523,11 @@ static const struct soc15_reg_golden golden_settings_gc_10_1[] =
SOC15_REG_GOLDEN_VALUE(GC, 0, mmUTCL1_CTRL, 0x00c00000, 0x00c00000)
};
-static const struct soc15_reg_golden golden_settings_gc_10_0_nv10[] =
-{
+static const struct soc15_reg_golden golden_settings_gc_10_0_nv10[] = {
/* Pending on emulation bring up */
};
-static const struct soc15_reg_golden golden_settings_gc_rlc_spm_10_0_nv10[] =
-{
+static const struct soc15_reg_golden golden_settings_gc_rlc_spm_10_0_nv10[] = {
SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xe0000000, 0x0),
SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x28),
@@ -1333,8 +1582,7 @@ static const struct soc15_reg_golden golden_settings_gc_rlc_spm_10_0_nv10[] =
SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xFFFFFFFF, 0xe0000000)
};
-static const struct soc15_reg_golden golden_settings_gc_10_1_1[] =
-{
+static const struct soc15_reg_golden golden_settings_gc_10_1_1[] = {
SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_4, 0xffffffff, 0x003c0014),
SOC15_REG_GOLDEN_VALUE(GC, 0, mmCGTT_GS_NGG_CLK_CTRL, 0xffff8fff, 0xffff8100),
SOC15_REG_GOLDEN_VALUE(GC, 0, mmCGTT_IA_CLK_CTRL, 0xffff0fff, 0xffff0100),
@@ -1375,8 +1623,7 @@ static const struct soc15_reg_golden golden_settings_gc_10_1_1[] =
SOC15_REG_GOLDEN_VALUE(GC, 0, mmUTCL1_CTRL, 0x00c00000, 0x00c00000),
};
-static const struct soc15_reg_golden golden_settings_gc_10_1_2[] =
-{
+static const struct soc15_reg_golden golden_settings_gc_10_1_2[] = {
SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_4, 0x003e001f, 0x003c0014),
SOC15_REG_GOLDEN_VALUE(GC, 0, mmCGTT_GS_NGG_CLK_CTRL, 0xffff8fff, 0xffff8100),
SOC15_REG_GOLDEN_VALUE(GC, 0, mmCGTT_IA_CLK_CTRL, 0xffff0fff, 0xffff0100),
@@ -1391,9 +1638,10 @@ static const struct soc15_reg_golden golden_settings_gc_10_1_2[] =
SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG, 0xffffffff, 0x20000000),
SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG2, 0xffffffff, 0x00000420),
SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG3, 0xffffffff, 0x00000200),
- SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG4, 0xffffffff, 0x04800000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG4, 0xffffffff, 0x04900000),
SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DFSM_TILES_IN_FLIGHT, 0x0000ffff, 0x0000003f),
SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_LAST_OF_BURST_CONFIG, 0xffffffff, 0x03860204),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0x0c1800ff, 0x00000044),
SOC15_REG_GOLDEN_VALUE(GC, 0, mmGCR_GENERAL_CNTL, 0x1ff0ffff, 0x00000500),
SOC15_REG_GOLDEN_VALUE(GC, 0, mmGE_PRIV_CONTROL, 0x00007fff, 0x000001fe),
SOC15_REG_GOLDEN_VALUE(GC, 0, mmGL1_PIPE_STEER, 0xffffffff, 0xe4e4e4e4),
@@ -1411,55 +1659,20 @@ static const struct soc15_reg_golden golden_settings_gc_10_1_2[] =
SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_2, 0x00000820, 0x00000820),
SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000),
SOC15_REG_GOLDEN_VALUE(GC, 0, mmRMI_SPARE, 0xffffffff, 0xffff3101),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_CONFIG_CNTL_1, 0x001f0000, 0x00070104),
SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_ALU_CLK_CTRL, 0xffffffff, 0xffffffff),
SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_ARB_CONFIG, 0x00000133, 0x00000130),
SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_LDS_CLK_CTRL, 0xffffffff, 0xffffffff),
SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0xfff7ffff, 0x01030000),
SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CNTL, 0xffdf80ff, 0x479c0010),
- SOC15_REG_GOLDEN_VALUE(GC, 0, mmUTCL1_CTRL, 0xffffffff, 0x00800000)
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmUTCL1_CTRL, 0xffffffff, 0x00c00000)
};
-static void gfx_v10_rlcg_wreg(struct amdgpu_device *adev, u32 offset, u32 v)
-{
- static void *scratch_reg0;
- static void *scratch_reg1;
- static void *spare_int;
- uint32_t i = 0;
- uint32_t retries = 50000;
-
- scratch_reg0 = adev->rmmio + (adev->reg_offset[GC_HWIP][0][mmSCRATCH_REG0_BASE_IDX] + mmSCRATCH_REG0)*4;
- scratch_reg1 = adev->rmmio + (adev->reg_offset[GC_HWIP][0][mmSCRATCH_REG1_BASE_IDX] + mmSCRATCH_REG1)*4;
- spare_int = adev->rmmio + (adev->reg_offset[GC_HWIP][0][mmRLC_SPARE_INT_BASE_IDX] + mmRLC_SPARE_INT)*4;
-
- if (amdgpu_sriov_runtime(adev)) {
- pr_err("shouldn't call rlcg write register during runtime\n");
- return;
- }
-
- writel(v, scratch_reg0);
- writel(offset | 0x80000000, scratch_reg1);
- writel(1, spare_int);
- for (i = 0; i < retries; i++) {
- u32 tmp;
-
- tmp = readl(scratch_reg1);
- if (!(tmp & 0x80000000))
- break;
-
- udelay(10);
- }
-
- if (i >= retries)
- pr_err("timeout: rlcg program reg:0x%05x failed !\n", offset);
-}
-
-static const struct soc15_reg_golden golden_settings_gc_10_1_nv14[] =
-{
+static const struct soc15_reg_golden golden_settings_gc_10_1_nv14[] = {
/* Pending on emulation bring up */
};
-static const struct soc15_reg_golden golden_settings_gc_rlc_spm_10_1_nv14[] =
-{
+static const struct soc15_reg_golden golden_settings_gc_rlc_spm_10_1_nv14[] = {
SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xE0000000L, 0x0),
SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x28),
@@ -2082,13 +2295,11 @@ static const struct soc15_reg_golden golden_settings_gc_rlc_spm_10_1_nv14[] =
SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xFFFFFFFF, 0xe0000000)
};
-static const struct soc15_reg_golden golden_settings_gc_10_1_2_nv12[] =
-{
+static const struct soc15_reg_golden golden_settings_gc_10_1_2_nv12[] = {
/* Pending on emulation bring up */
};
-static const struct soc15_reg_golden golden_settings_gc_rlc_spm_10_1_2_nv12[] =
-{
+static const struct soc15_reg_golden golden_settings_gc_rlc_spm_10_1_2_nv12[] = {
SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xe0000000L, 0x0),
SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x28),
@@ -3143,8 +3354,7 @@ static const struct soc15_reg_golden golden_settings_gc_rlc_spm_10_1_2_nv12[] =
SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xFFFFFFFF, 0xe0000000)
};
-static const struct soc15_reg_golden golden_settings_gc_10_3[] =
-{
+static const struct soc15_reg_golden golden_settings_gc_10_3[] = {
SOC15_REG_GOLDEN_VALUE(GC, 0, mmCGTT_SPI_CS_CLK_CTRL, 0x78000000, 0x78000100),
SOC15_REG_GOLDEN_VALUE(GC, 0, mmCGTT_SPI_PS_CLK_CTRL, 0xff7f0fff, 0x78000100),
SOC15_REG_GOLDEN_VALUE(GC, 0, mmCGTT_SPI_RA0_CLK_CTRL, 0xff7f0fff, 0x30000100),
@@ -3153,7 +3363,7 @@ static const struct soc15_reg_golden golden_settings_gc_10_3[] =
SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG3, 0xffffffff, 0x00000280),
SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG4, 0xffffffff, 0x00800000),
SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_EXCEPTION_CONTROL, 0x7fff0f1f, 0x00b80000),
- SOC15_REG_GOLDEN_VALUE(GC, 0 ,mmGCEA_SDP_TAG_RESERVE0, 0xffffffff, 0x10100100),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGCEA_SDP_TAG_RESERVE0, 0xffffffff, 0x10100100),
SOC15_REG_GOLDEN_VALUE(GC, 0, mmGCEA_SDP_TAG_RESERVE1, 0xffffffff, 0x17000088),
SOC15_REG_GOLDEN_VALUE(GC, 0, mmGCR_GENERAL_CNTL_Sienna_Cichlid, 0x1ff1ffff, 0x00000500),
SOC15_REG_GOLDEN_VALUE(GC, 0, mmGCUTCL2_CGTT_CLK_CTRL_Sienna_Cichlid, 0xff000000, 0xff008080),
@@ -3185,17 +3395,16 @@ static const struct soc15_reg_golden golden_settings_gc_10_3[] =
SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_PERFCOUNTER7_SELECT, 0xf0f001ff, 0x00000000),
SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_PERFCOUNTER8_SELECT, 0xf0f001ff, 0x00000000),
SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_PERFCOUNTER9_SELECT, 0xf0f001ff, 0x00000000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmSX_DEBUG_1, 0x00010000, 0x00010020),
SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0xfff7ffff, 0x01030000),
SOC15_REG_GOLDEN_VALUE(GC, 0, mmUTCL1_CTRL, 0xffbfffff, 0x00a00000)
};
-static const struct soc15_reg_golden golden_settings_gc_10_3_sienna_cichlid[] =
-{
+static const struct soc15_reg_golden golden_settings_gc_10_3_sienna_cichlid[] = {
/* Pending on emulation bring up */
};
-static const struct soc15_reg_golden golden_settings_gc_10_3_2[] =
-{
+static const struct soc15_reg_golden golden_settings_gc_10_3_2[] = {
SOC15_REG_GOLDEN_VALUE(GC, 0, mmCGTT_SPI_CS_CLK_CTRL, 0xff7f0fff, 0x78000100),
SOC15_REG_GOLDEN_VALUE(GC, 0, mmCGTT_SPI_PS_CLK_CTRL, 0xff7f0fff, 0x78000100),
SOC15_REG_GOLDEN_VALUE(GC, 0, mmCGTT_SPI_RA0_CLK_CTRL, 0xff7f0fff, 0x30000100),
@@ -3242,8 +3451,7 @@ static const struct soc15_reg_golden golden_settings_gc_10_3_2[] =
SOC15_REG_GOLDEN_VALUE(GC, 0, mmLDS_CONFIG, 0x00000020, 0x00000020),
};
-static const struct soc15_reg_golden golden_settings_gc_10_3_vangogh[] =
-{
+static const struct soc15_reg_golden golden_settings_gc_10_3_vangogh[] = {
SOC15_REG_GOLDEN_VALUE(GC, 0, mmCGTT_SPI_RA0_CLK_CTRL, 0xff7f0fff, 0x30000100),
SOC15_REG_GOLDEN_VALUE(GC, 0, mmCGTT_SPI_RA1_CLK_CTRL, 0xff7f0fff, 0x7e000100),
SOC15_REG_GOLDEN_VALUE(GC, 0, mmCH_PIPE_STEER, 0x000000ff, 0x000000e4),
@@ -3264,6 +3472,7 @@ static const struct soc15_reg_golden golden_settings_gc_10_3_vangogh[] =
SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_2, 0xffffffbf, 0x00000020),
SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_CONFIG_CNTL_1_Vangogh, 0xffffffff, 0x00070103),
SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQG_CONFIG, 0x000017ff, 0x00001000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmSX_DEBUG_1, 0x00010000, 0x00010020),
SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0xfff7ffff, 0x01030000),
SOC15_REG_GOLDEN_VALUE(GC, 0, mmUTCL1_CTRL, 0xffffffff, 0x00400000),
SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_GS_MAX_WAVE_ID, 0x00000fff, 0x000000ff),
@@ -3272,15 +3481,37 @@ static const struct soc15_reg_golden golden_settings_gc_10_3_vangogh[] =
SOC15_REG_GOLDEN_VALUE(GC, 0, mmLDS_CONFIG, 0x00000020, 0x00000020),
};
-static const struct soc15_reg_golden golden_settings_gc_10_3_4[] =
-{
+static const struct soc15_reg_golden golden_settings_gc_10_3_3[] = {
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmCGTT_SPI_CS_CLK_CTRL, 0xff7f0fff, 0x78000100),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmCH_PIPE_STEER, 0x000000ff, 0x000000e4),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPF_GCR_CNTL, 0x0007ffff, 0x0000c200),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG3, 0xffffffff, 0x00000280),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG4, 0xffffffff, 0x00800000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0x0c1807ff, 0x00000242),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGCR_GENERAL_CNTL_Vangogh, 0x1ff1ffff, 0x00000500),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGL1_PIPE_STEER, 0x000000ff, 0x000000e4),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGL2_PIPE_STEER_0, 0x77777777, 0x32103210),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGL2_PIPE_STEER_1, 0x77777777, 0x32103210),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGL2A_ADDR_MATCH_MASK, 0xffffffff, 0xfffffff3),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGL2C_ADDR_MATCH_MASK, 0xffffffff, 0xfffffff3),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGL2C_CM_CTRL1, 0xff8fff0f, 0x580f1008),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGL2C_CTRL3, 0xf7ffffff, 0x00f80988),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmLDS_CONFIG, 0x000001ff, 0x00000020),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_CL_ENHANCE, 0xf17fffff, 0x01200007),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_BINNER_TIMEOUT_COUNTER, 0xffffffff, 0x00000800),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_2, 0xffffffbf, 0x00000820),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0xfff7ffff, 0x01030000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmUTCL1_CTRL, 0xffffffff, 0x00100000)
+};
+
+static const struct soc15_reg_golden golden_settings_gc_10_3_4[] = {
SOC15_REG_GOLDEN_VALUE(GC, 0, mmCGTT_SPI_CS_CLK_CTRL, 0x78000000, 0x78000100),
SOC15_REG_GOLDEN_VALUE(GC, 0, mmCGTT_SPI_RA0_CLK_CTRL, 0x30000000, 0x30000100),
SOC15_REG_GOLDEN_VALUE(GC, 0, mmCGTT_SPI_RA1_CLK_CTRL, 0x7e000000, 0x7e000100),
SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPF_GCR_CNTL, 0x0007ffff, 0x0000c000),
SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG3, 0x00000280, 0x00000280),
SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG4, 0x07800000, 0x00800000),
- SOC15_REG_GOLDEN_VALUE(GC, 0, mmGCR_GENERAL_CNTL, 0x00001d00, 0x00000500),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGCR_GENERAL_CNTL_Sienna_Cichlid, 0x00001d00, 0x00000500),
SOC15_REG_GOLDEN_VALUE(GC, 0, mmGE_PC_CNTL, 0x003c0000, 0x00280400),
SOC15_REG_GOLDEN_VALUE(GC, 0, mmGL2A_ADDR_MATCH_MASK, 0xffffffff, 0xffffffcf),
SOC15_REG_GOLDEN_VALUE(GC, 0, mmGL2C_ADDR_MATCH_MASK, 0xffffffff, 0xffffffcf),
@@ -3306,27 +3537,153 @@ static const struct soc15_reg_golden golden_settings_gc_10_3_4[] =
SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_PERFCOUNTER7_SELECT, 0xf0f001ff, 0x00000000),
SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_PERFCOUNTER8_SELECT, 0xf0f001ff, 0x00000000),
SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_PERFCOUNTER9_SELECT, 0xf0f001ff, 0x00000000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmSX_DEBUG_1, 0x00010000, 0x00010020),
SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0x01030000, 0x01030000),
SOC15_REG_GOLDEN_VALUE(GC, 0, mmUTCL1_CTRL, 0x03a00000, 0x00a00000),
SOC15_REG_GOLDEN_VALUE(GC, 0, mmLDS_CONFIG, 0x00000020, 0x00000020)
};
+static const struct soc15_reg_golden golden_settings_gc_10_3_5[] = {
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmCGTT_SPI_CS_CLK_CTRL, 0x78000000, 0x78000100),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmCGTT_SPI_RA0_CLK_CTRL, 0xb0000ff0, 0x30000100),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmCGTT_SPI_RA1_CLK_CTRL, 0xff000000, 0x7e000100),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPF_GCR_CNTL, 0x0007ffff, 0x0000c000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG3, 0xffffffff, 0x00000280),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG4, 0xffffffff, 0x00800000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGCR_GENERAL_CNTL_Sienna_Cichlid, 0x1ff1ffff, 0x00000500),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGL2A_ADDR_MATCH_MASK, 0xffffffff, 0xffffffcf),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGL2C_ADDR_MATCH_MASK, 0xffffffff, 0xffffffcf),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGL2C_CM_CTRL1, 0xff8fff0f, 0x580f1008),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGL2C_CTRL3, 0xf7ffffff, 0x00f80988),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmLDS_CONFIG, 0x000001ff, 0x00000020),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_CL_ENHANCE, 0xf17fffff, 0x01200007),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_CONFIG, 0xe07df47f, 0x00180070),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_PERFCOUNTER0_SELECT, 0xf0f001ff, 0x00000000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_PERFCOUNTER1_SELECT, 0xf0f001ff, 0x00000000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_PERFCOUNTER10_SELECT, 0xf0f001ff, 0x00000000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_PERFCOUNTER11_SELECT, 0xf0f001ff, 0x00000000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_PERFCOUNTER12_SELECT, 0xf0f001ff, 0x00000000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_PERFCOUNTER13_SELECT, 0xf0f001ff, 0x00000000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_PERFCOUNTER14_SELECT, 0xf0f001ff, 0x00000000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_PERFCOUNTER15_SELECT, 0xf0f001ff, 0x00000000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_PERFCOUNTER2_SELECT, 0xf0f001ff, 0x00000000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_PERFCOUNTER3_SELECT, 0xf0f001ff, 0x00000000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_PERFCOUNTER4_SELECT, 0xf0f001ff, 0x00000000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_PERFCOUNTER5_SELECT, 0xf0f001ff, 0x00000000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_PERFCOUNTER6_SELECT, 0xf0f001ff, 0x00000000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_PERFCOUNTER7_SELECT, 0xf0f001ff, 0x00000000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_PERFCOUNTER8_SELECT, 0xf0f001ff, 0x00000000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_PERFCOUNTER9_SELECT, 0xf0f001ff, 0x00000000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0xfff7ffff, 0x01030000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmUTCL1_CTRL, 0xffbfffff, 0x00a00000)
+};
+
+static const struct soc15_reg_golden golden_settings_gc_10_0_cyan_skillfish[] = {
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGE_FAST_CLKS, 0x3fffffff, 0x0000493e),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmCGTT_CPF_CLK_CTRL, 0xfcff8fff, 0xf8000100),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmCGTT_SPI_CLK_CTRL, 0xff7f0fff, 0x3c000100),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_3, 0xa0000000, 0xa0000000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_4, 0x00008000, 0x003c8014),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmCH_DRAM_BURST_CTRL, 0x00000010, 0x00000017),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmCH_PIPE_STEER, 0xffffffff, 0xd8d8d8d8),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmCH_VC5_ENABLE, 0x00000003, 0x00000003),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_SD_CNTL, 0x800007ff, 0x000005ff),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG, 0xffffffff, 0x20000000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG3, 0xffffffff, 0x00000200),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG4, 0xffffffff, 0x04800000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_LAST_OF_BURST_CONFIG, 0xffffffff, 0x03860210),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0x0c1800ff, 0x00000044),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGCR_GENERAL_CNTL, 0x00009d00, 0x00008500),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGCMC_VM_CACHEABLE_DRAM_ADDRESS_END, 0xffffffff, 0x000fffff),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGL1_DRAM_BURST_CTRL, 0x00000010, 0x00000017),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGL1_PIPE_STEER, 0xfcfcfcfc, 0xd8d8d8d8),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGL2_PIPE_STEER_0, 0x77707770, 0x21302130),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGL2_PIPE_STEER_1, 0x77707770, 0x21302130),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGL2A_ADDR_MATCH_MASK, 0xffffffff, 0xffffffcf),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGL2C_ADDR_MATCH_MASK, 0xffffffff, 0xffffffcf),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGL2C_CGTT_SCLK_CTRL, 0x10000000, 0x10000100),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGL2C_CTRL2, 0xfc02002f, 0x9402002f),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGL2C_CTRL3, 0x00002188, 0x00000188),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE, 0x08000009, 0x08000009),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_BINNER_EVENT_CNTL_0, 0xcc3fcc03, 0x842a4c02),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_LINE_STIPPLE_STATE, 0x0000000f, 0x00000000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRMI_SPARE, 0xffff3109, 0xffff3101),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_ARB_CONFIG, 0x00000100, 0x00000130),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_LDS_CLK_CTRL, 0xffffffff, 0xffffffff),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0x00030008, 0x01030000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmUTCL1_CTRL, 0x00800000, 0x00800000)
+};
+
+static const struct soc15_reg_golden golden_settings_gc_10_3_6[] = {
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmCGTT_SPI_CS_CLK_CTRL, 0xff7f0fff, 0x78000100),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmCH_PIPE_STEER, 0x000000ff, 0x00000044),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPF_GCR_CNTL, 0x0007ffff, 0x0000c200),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG3, 0xffffffff, 0x00000280),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG4, 0xffffffff, 0x00800000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0x0c1807ff, 0x00000042),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGCR_GENERAL_CNTL_Vangogh, 0x1ff1ffff, 0x00000500),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGL1_PIPE_STEER, 0x000000ff, 0x00000044),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGL2_PIPE_STEER_0, 0x77777777, 0x32103210),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGL2_PIPE_STEER_1, 0x77777777, 0x32103210),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGL2A_ADDR_MATCH_MASK, 0xffffffff, 0xfffffff3),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGL2C_ADDR_MATCH_MASK, 0xffffffff, 0xfffffff3),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGL2C_CM_CTRL1, 0xff8fff0f, 0x580f1008),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGL2C_CTRL3, 0xf7ffffff, 0x00f80988),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmLDS_CONFIG, 0x000001ff, 0x00000020),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_CL_ENHANCE, 0xf17fffff, 0x01200007),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_BINNER_TIMEOUT_COUNTER, 0xffffffff, 0x00000800),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_2, 0xffffffbf, 0x00000820),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQG_CONFIG, 0x000017ff, 0x00001000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmSX_DEBUG_1, 0xffffff7f, 0x00010020),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0xfff7ffff, 0x01030000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmUTCL1_CTRL, 0xffffffff, 0x00100000)
+};
+
+static const struct soc15_reg_golden golden_settings_gc_10_3_7[] = {
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmCGTT_SPI_CS_CLK_CTRL, 0xff7f0fff, 0x78000100),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmCH_PIPE_STEER, 0x000000ff, 0x000000e4),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPF_GCR_CNTL, 0x0007ffff, 0x0000c200),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG3, 0xffffffff, 0x00000280),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG4, 0xffffffff, 0x00800000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0x0c1807ff, 0x00000041),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGCR_GENERAL_CNTL_Vangogh, 0x1ff1ffff, 0x00000500),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGL1_PIPE_STEER, 0x000000ff, 0x000000e4),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGL2_PIPE_STEER_0, 0x77777777, 0x32103210),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGL2_PIPE_STEER_1, 0x77777777, 0x32103210),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGL2A_ADDR_MATCH_MASK, 0xffffffff, 0xffffffff),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGL2C_ADDR_MATCH_MASK, 0xffffffff, 0xffffffff),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGL2C_CM_CTRL1, 0xff8fff0f, 0x580f1008),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGL2C_CTRL3, 0xf7ffffff, 0x00f80988),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmLDS_CONFIG, 0x000001ff, 0x00000020),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_CL_ENHANCE, 0xf000003f, 0x01200007),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_BINNER_TIMEOUT_COUNTER, 0xffffffff, 0x00000800),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_2, 0xffffffbf, 0x00000820),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQG_CONFIG, 0x000017ff, 0x00001000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmSX_DEBUG_1, 0xffffff7f, 0x00010020),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0xfff7ffff, 0x01030000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmUTCL1_CTRL, 0xffffffff, 0x00100000)
+};
+
#define DEFAULT_SH_MEM_CONFIG \
((SH_MEM_ADDRESS_MODE_64 << SH_MEM_CONFIG__ADDRESS_MODE__SHIFT) | \
(SH_MEM_ALIGNMENT_MODE_UNALIGNED << SH_MEM_CONFIG__ALIGNMENT_MODE__SHIFT) | \
(SH_MEM_RETRY_MODE_ALL << SH_MEM_CONFIG__RETRY_MODE__SHIFT) | \
(3 << SH_MEM_CONFIG__INITIAL_INST_PREFETCH__SHIFT))
+/* TODO: pending on golden setting value of gb address config */
+#define CYAN_SKILLFISH_GB_ADDR_CONFIG_GOLDEN 0x00100044
static void gfx_v10_0_set_ring_funcs(struct amdgpu_device *adev);
static void gfx_v10_0_set_irq_funcs(struct amdgpu_device *adev);
static void gfx_v10_0_set_gds_init(struct amdgpu_device *adev);
static void gfx_v10_0_set_rlc_funcs(struct amdgpu_device *adev);
+static void gfx_v10_0_set_mqd_funcs(struct amdgpu_device *adev);
static int gfx_v10_0_get_cu_info(struct amdgpu_device *adev,
struct amdgpu_cu_info *cu_info);
static uint64_t gfx_v10_0_get_gpu_clock_counter(struct amdgpu_device *adev);
static void gfx_v10_0_select_se_sh(struct amdgpu_device *adev, u32 se_num,
- u32 sh_num, u32 instance);
+ u32 sh_num, u32 instance, int xcc_id);
static u32 gfx_v10_0_get_wgp_active_bitmap_per_sh(struct amdgpu_device *adev);
static int gfx_v10_0_rlc_backdoor_autoload_buffer_init(struct amdgpu_device *adev);
@@ -3339,16 +3696,29 @@ static void gfx_v10_0_ring_emit_frame_cntl(struct amdgpu_ring *ring, bool start,
static u32 gfx_v10_3_get_disabled_sa(struct amdgpu_device *adev);
static void gfx_v10_3_program_pbb_mode(struct amdgpu_device *adev);
static void gfx_v10_3_set_power_brake_sequence(struct amdgpu_device *adev);
-
+static void gfx_v10_0_ring_invalidate_tlbs(struct amdgpu_ring *ring,
+ uint16_t pasid, uint32_t flush_type,
+ bool all_hub, uint8_t dst_sel);
+static void gfx_v10_0_update_spm_vmid_internal(struct amdgpu_device *adev,
+ unsigned int vmid);
+
+static int gfx_v10_0_set_powergating_state(struct amdgpu_ip_block *ip_block,
+ enum amd_powergating_state state);
static void gfx10_kiq_set_resources(struct amdgpu_ring *kiq_ring, uint64_t queue_mask)
{
+ struct amdgpu_device *adev = kiq_ring->adev;
+ u64 shader_mc_addr;
+
+ /* Cleaner shader MC address */
+ shader_mc_addr = adev->gfx.cleaner_shader_gpu_addr >> 8;
+
amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_SET_RESOURCES, 6));
amdgpu_ring_write(kiq_ring, PACKET3_SET_RESOURCES_VMID_MASK(0) |
PACKET3_SET_RESOURCES_QUEUE_TYPE(0)); /* vmid_mask:0 queue_type:0 (KIQ) */
amdgpu_ring_write(kiq_ring, lower_32_bits(queue_mask)); /* queue mask lo */
amdgpu_ring_write(kiq_ring, upper_32_bits(queue_mask)); /* queue mask hi */
- amdgpu_ring_write(kiq_ring, 0); /* gws mask lo */
- amdgpu_ring_write(kiq_ring, 0); /* gws mask hi */
+ amdgpu_ring_write(kiq_ring, lower_32_bits(shader_mc_addr)); /* cleaner shader addr lo */
+ amdgpu_ring_write(kiq_ring, upper_32_bits(shader_mc_addr)); /* cleaner shader addr hi */
amdgpu_ring_write(kiq_ring, 0); /* oac mask */
amdgpu_ring_write(kiq_ring, 0); /* gds heap base:0, gds heap size:0 */
}
@@ -3356,10 +3726,23 @@ static void gfx10_kiq_set_resources(struct amdgpu_ring *kiq_ring, uint64_t queue
static void gfx10_kiq_map_queues(struct amdgpu_ring *kiq_ring,
struct amdgpu_ring *ring)
{
- struct amdgpu_device *adev = kiq_ring->adev;
uint64_t mqd_addr = amdgpu_bo_gpu_offset(ring->mqd_obj);
- uint64_t wptr_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
- uint32_t eng_sel = ring->funcs->type == AMDGPU_RING_TYPE_GFX ? 4 : 0;
+ uint64_t wptr_addr = ring->wptr_gpu_addr;
+ uint32_t eng_sel = 0;
+
+ switch (ring->funcs->type) {
+ case AMDGPU_RING_TYPE_COMPUTE:
+ eng_sel = 0;
+ break;
+ case AMDGPU_RING_TYPE_GFX:
+ eng_sel = 4;
+ break;
+ case AMDGPU_RING_TYPE_MES:
+ eng_sel = 5;
+ break;
+ default:
+ WARN_ON(1);
+ }
amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_MAP_QUEUES, 5));
/* Q_sel:0, vmid:0, vidmem: 1, engine:0, num_Q:1*/
@@ -3432,12 +3815,59 @@ static void gfx10_kiq_invalidate_tlbs(struct amdgpu_ring *kiq_ring,
uint16_t pasid, uint32_t flush_type,
bool all_hub)
{
- amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_INVALIDATE_TLBS, 0));
- amdgpu_ring_write(kiq_ring,
- PACKET3_INVALIDATE_TLBS_DST_SEL(1) |
- PACKET3_INVALIDATE_TLBS_ALL_HUB(all_hub) |
- PACKET3_INVALIDATE_TLBS_PASID(pasid) |
- PACKET3_INVALIDATE_TLBS_FLUSH_TYPE(flush_type));
+ gfx_v10_0_ring_invalidate_tlbs(kiq_ring, pasid, flush_type, all_hub, 1);
+}
+
+static void gfx_v10_0_kiq_reset_hw_queue(struct amdgpu_ring *kiq_ring, uint32_t queue_type,
+ uint32_t me_id, uint32_t pipe_id, uint32_t queue_id,
+ uint32_t xcc_id, uint32_t vmid)
+{
+ struct amdgpu_device *adev = kiq_ring->adev;
+ unsigned i;
+ uint32_t tmp;
+
+ /* enter save mode */
+ amdgpu_gfx_rlc_enter_safe_mode(adev, xcc_id);
+ mutex_lock(&adev->srbm_mutex);
+ nv_grbm_select(adev, me_id, pipe_id, queue_id, 0);
+
+ if (queue_type == AMDGPU_RING_TYPE_COMPUTE) {
+ WREG32_SOC15(GC, 0, mmCP_HQD_DEQUEUE_REQUEST, 0x2);
+ WREG32_SOC15(GC, 0, mmSPI_COMPUTE_QUEUE_RESET, 0x1);
+ /* wait till dequeue take effects */
+ for (i = 0; i < adev->usec_timeout; i++) {
+ if (!(RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE) & 1))
+ break;
+ udelay(1);
+ }
+ if (i >= adev->usec_timeout)
+ dev_err(adev->dev, "fail to wait on hqd deactive\n");
+ } else if (queue_type == AMDGPU_RING_TYPE_GFX) {
+ WREG32_SOC15(GC, 0, mmGRBM_GFX_INDEX,
+ (uint32_t)(0x1 << GRBM_GFX_INDEX__SE_BROADCAST_WRITES__SHIFT));
+ tmp = REG_SET_FIELD(0, CP_VMID_RESET, RESET_REQUEST, 1 << vmid);
+ if (pipe_id == 0)
+ tmp = REG_SET_FIELD(tmp, CP_VMID_RESET, PIPE0_QUEUES, 1 << queue_id);
+ else
+ tmp = REG_SET_FIELD(tmp, CP_VMID_RESET, PIPE1_QUEUES, 1 << queue_id);
+ WREG32_SOC15(GC, 0, mmCP_VMID_RESET, tmp);
+
+ /* wait till dequeue take effects */
+ for (i = 0; i < adev->usec_timeout; i++) {
+ if (!(RREG32_SOC15(GC, 0, mmCP_GFX_HQD_ACTIVE) & 1))
+ break;
+ udelay(1);
+ }
+ if (i >= adev->usec_timeout)
+ dev_err(adev->dev, "failed to wait on gfx hqd deactivate\n");
+ } else {
+ dev_err(adev->dev, "reset queue_type(%d) not supported\n", queue_type);
+ }
+
+ nv_grbm_select(adev, 0, 0, 0, 0);
+ mutex_unlock(&adev->srbm_mutex);
+ /* exit safe mode */
+ amdgpu_gfx_rlc_exit_safe_mode(adev, 0);
}
static const struct kiq_pm4_funcs gfx_v10_0_kiq_pm4_funcs = {
@@ -3446,6 +3876,7 @@ static const struct kiq_pm4_funcs gfx_v10_0_kiq_pm4_funcs = {
.kiq_unmap_queues = gfx10_kiq_unmap_queues,
.kiq_query_status = gfx10_kiq_query_status,
.kiq_invalidate_tlbs = gfx10_kiq_invalidate_tlbs,
+ .kiq_reset_hw_queue = gfx_v10_0_kiq_reset_hw_queue,
.set_resources_size = 8,
.map_queues_size = 7,
.unmap_queues_size = 6,
@@ -3455,23 +3886,23 @@ static const struct kiq_pm4_funcs gfx_v10_0_kiq_pm4_funcs = {
static void gfx_v10_0_set_kiq_pm4_funcs(struct amdgpu_device *adev)
{
- adev->gfx.kiq.pmf = &gfx_v10_0_kiq_pm4_funcs;
+ adev->gfx.kiq[0].pmf = &gfx_v10_0_kiq_pm4_funcs;
}
static void gfx_v10_0_init_spm_golden_registers(struct amdgpu_device *adev)
{
- switch (adev->asic_type) {
- case CHIP_NAVI10:
+ switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
+ case IP_VERSION(10, 1, 10):
soc15_program_register_sequence(adev,
golden_settings_gc_rlc_spm_10_0_nv10,
(const u32)ARRAY_SIZE(golden_settings_gc_rlc_spm_10_0_nv10));
break;
- case CHIP_NAVI14:
+ case IP_VERSION(10, 1, 1):
soc15_program_register_sequence(adev,
golden_settings_gc_rlc_spm_10_1_nv14,
(const u32)ARRAY_SIZE(golden_settings_gc_rlc_spm_10_1_nv14));
break;
- case CHIP_NAVI12:
+ case IP_VERSION(10, 1, 2):
soc15_program_register_sequence(adev,
golden_settings_gc_rlc_spm_10_1_2_nv12,
(const u32)ARRAY_SIZE(golden_settings_gc_rlc_spm_10_1_2_nv12));
@@ -3483,8 +3914,11 @@ static void gfx_v10_0_init_spm_golden_registers(struct amdgpu_device *adev)
static void gfx_v10_0_init_golden_registers(struct amdgpu_device *adev)
{
- switch (adev->asic_type) {
- case CHIP_NAVI10:
+ if (amdgpu_sriov_vf(adev))
+ return;
+
+ switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
+ case IP_VERSION(10, 1, 10):
soc15_program_register_sequence(adev,
golden_settings_gc_10_1,
(const u32)ARRAY_SIZE(golden_settings_gc_10_1));
@@ -3492,7 +3926,7 @@ static void gfx_v10_0_init_golden_registers(struct amdgpu_device *adev)
golden_settings_gc_10_0_nv10,
(const u32)ARRAY_SIZE(golden_settings_gc_10_0_nv10));
break;
- case CHIP_NAVI14:
+ case IP_VERSION(10, 1, 1):
soc15_program_register_sequence(adev,
golden_settings_gc_10_1_1,
(const u32)ARRAY_SIZE(golden_settings_gc_10_1_1));
@@ -3500,7 +3934,7 @@ static void gfx_v10_0_init_golden_registers(struct amdgpu_device *adev)
golden_settings_gc_10_1_nv14,
(const u32)ARRAY_SIZE(golden_settings_gc_10_1_nv14));
break;
- case CHIP_NAVI12:
+ case IP_VERSION(10, 1, 2):
soc15_program_register_sequence(adev,
golden_settings_gc_10_1_2,
(const u32)ARRAY_SIZE(golden_settings_gc_10_1_2));
@@ -3508,7 +3942,7 @@ static void gfx_v10_0_init_golden_registers(struct amdgpu_device *adev)
golden_settings_gc_10_1_2_nv12,
(const u32)ARRAY_SIZE(golden_settings_gc_10_1_2_nv12));
break;
- case CHIP_SIENNA_CICHLID:
+ case IP_VERSION(10, 3, 0):
soc15_program_register_sequence(adev,
golden_settings_gc_10_3,
(const u32)ARRAY_SIZE(golden_settings_gc_10_3));
@@ -3516,20 +3950,46 @@ static void gfx_v10_0_init_golden_registers(struct amdgpu_device *adev)
golden_settings_gc_10_3_sienna_cichlid,
(const u32)ARRAY_SIZE(golden_settings_gc_10_3_sienna_cichlid));
break;
- case CHIP_NAVY_FLOUNDER:
+ case IP_VERSION(10, 3, 2):
soc15_program_register_sequence(adev,
golden_settings_gc_10_3_2,
(const u32)ARRAY_SIZE(golden_settings_gc_10_3_2));
break;
- case CHIP_VANGOGH:
+ case IP_VERSION(10, 3, 1):
soc15_program_register_sequence(adev,
golden_settings_gc_10_3_vangogh,
(const u32)ARRAY_SIZE(golden_settings_gc_10_3_vangogh));
break;
- case CHIP_DIMGREY_CAVEFISH:
+ case IP_VERSION(10, 3, 3):
+ soc15_program_register_sequence(adev,
+ golden_settings_gc_10_3_3,
+ (const u32)ARRAY_SIZE(golden_settings_gc_10_3_3));
+ break;
+ case IP_VERSION(10, 3, 4):
+ soc15_program_register_sequence(adev,
+ golden_settings_gc_10_3_4,
+ (const u32)ARRAY_SIZE(golden_settings_gc_10_3_4));
+ break;
+ case IP_VERSION(10, 3, 5):
+ soc15_program_register_sequence(adev,
+ golden_settings_gc_10_3_5,
+ (const u32)ARRAY_SIZE(golden_settings_gc_10_3_5));
+ break;
+ case IP_VERSION(10, 1, 3):
+ case IP_VERSION(10, 1, 4):
+ soc15_program_register_sequence(adev,
+ golden_settings_gc_10_0_cyan_skillfish,
+ (const u32)ARRAY_SIZE(golden_settings_gc_10_0_cyan_skillfish));
+ break;
+ case IP_VERSION(10, 3, 6):
soc15_program_register_sequence(adev,
- golden_settings_gc_10_3_4,
- (const u32)ARRAY_SIZE(golden_settings_gc_10_3_4));
+ golden_settings_gc_10_3_6,
+ (const u32)ARRAY_SIZE(golden_settings_gc_10_3_6));
+ break;
+ case IP_VERSION(10, 3, 7):
+ soc15_program_register_sequence(adev,
+ golden_settings_gc_10_3_7,
+ (const u32)ARRAY_SIZE(golden_settings_gc_10_3_7));
break;
default:
break;
@@ -3537,13 +3997,6 @@ static void gfx_v10_0_init_golden_registers(struct amdgpu_device *adev)
gfx_v10_0_init_spm_golden_registers(adev);
}
-static void gfx_v10_0_scratch_init(struct amdgpu_device *adev)
-{
- adev->gfx.scratch.num_reg = 8;
- adev->gfx.scratch.reg_base = SOC15_REG_OFFSET(GC, 0, mmSCRATCH_REG0);
- adev->gfx.scratch.free_mask = (1u << adev->gfx.scratch.num_reg) - 1;
-}
-
static void gfx_v10_0_write_data_to_reg(struct amdgpu_ring *ring, int eng_sel,
bool wc, uint32_t reg, uint32_t val)
{
@@ -3580,29 +4033,22 @@ static void gfx_v10_0_wait_reg_mem(struct amdgpu_ring *ring, int eng_sel,
static int gfx_v10_0_ring_test_ring(struct amdgpu_ring *ring)
{
struct amdgpu_device *adev = ring->adev;
- uint32_t scratch;
+ uint32_t scratch = SOC15_REG_OFFSET(GC, 0, mmSCRATCH_REG0);
uint32_t tmp = 0;
- unsigned i;
+ unsigned int i;
int r;
- r = amdgpu_gfx_scratch_get(adev, &scratch);
- if (r) {
- DRM_ERROR("amdgpu: cp failed to get scratch reg (%d).\n", r);
- return r;
- }
-
WREG32(scratch, 0xCAFEDEAD);
-
r = amdgpu_ring_alloc(ring, 3);
if (r) {
DRM_ERROR("amdgpu: cp failed to lock ring %d (%d).\n",
ring->idx, r);
- amdgpu_gfx_scratch_free(adev, scratch);
return r;
}
amdgpu_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
- amdgpu_ring_write(ring, (scratch - PACKET3_SET_UCONFIG_REG_START));
+ amdgpu_ring_write(ring, scratch -
+ PACKET3_SET_UCONFIG_REG_START);
amdgpu_ring_write(ring, 0xDEADBEEF);
amdgpu_ring_commit(ring);
@@ -3619,8 +4065,6 @@ static int gfx_v10_0_ring_test_ring(struct amdgpu_ring *ring)
if (i >= adev->usec_timeout)
r = -ETIMEDOUT;
- amdgpu_gfx_scratch_free(adev, scratch);
-
return r;
}
@@ -3629,22 +4073,26 @@ static int gfx_v10_0_ring_test_ib(struct amdgpu_ring *ring, long timeout)
struct amdgpu_device *adev = ring->adev;
struct amdgpu_ib ib;
struct dma_fence *f = NULL;
- unsigned index;
+ unsigned int index;
uint64_t gpu_addr;
- uint32_t tmp;
+ uint32_t *cpu_ptr;
long r;
+ memset(&ib, 0, sizeof(ib));
+
r = amdgpu_device_wb_get(adev, &index);
if (r)
return r;
gpu_addr = adev->wb.gpu_addr + (index * 4);
adev->wb.wb[index] = cpu_to_le32(0xCAFEDEAD);
- memset(&ib, 0, sizeof(ib));
- r = amdgpu_ib_get(adev, NULL, 16,
- AMDGPU_IB_POOL_DIRECT, &ib);
- if (r)
+ cpu_ptr = &adev->wb.wb[index];
+
+ r = amdgpu_ib_get(adev, NULL, 20, AMDGPU_IB_POOL_DIRECT, &ib);
+ if (r) {
+ DRM_ERROR("amdgpu: failed to get ib (%ld).\n", r);
goto err1;
+ }
ib.ptr[0] = PACKET3(PACKET3_WRITE_DATA, 3);
ib.ptr[1] = WRITE_DATA_DST_SEL(5) | WR_CONFIRM;
@@ -3665,13 +4113,12 @@ static int gfx_v10_0_ring_test_ib(struct amdgpu_ring *ring, long timeout)
goto err2;
}
- tmp = adev->wb.wb[index];
- if (tmp == 0xDEADBEEF)
+ if (le32_to_cpu(*cpu_ptr) == 0xDEADBEEF)
r = 0;
else
r = -EINVAL;
err2:
- amdgpu_ib_free(adev, &ib, NULL);
+ amdgpu_ib_free(&ib, NULL);
dma_fence_put(f);
err1:
amdgpu_device_wb_free(adev, index);
@@ -3680,18 +4127,12 @@ err1:
static void gfx_v10_0_free_microcode(struct amdgpu_device *adev)
{
- release_firmware(adev->gfx.pfp_fw);
- adev->gfx.pfp_fw = NULL;
- release_firmware(adev->gfx.me_fw);
- adev->gfx.me_fw = NULL;
- release_firmware(adev->gfx.ce_fw);
- adev->gfx.ce_fw = NULL;
- release_firmware(adev->gfx.rlc_fw);
- adev->gfx.rlc_fw = NULL;
- release_firmware(adev->gfx.mec_fw);
- adev->gfx.mec_fw = NULL;
- release_firmware(adev->gfx.mec2_fw);
- adev->gfx.mec2_fw = NULL;
+ amdgpu_ucode_release(&adev->gfx.pfp_fw);
+ amdgpu_ucode_release(&adev->gfx.me_fw);
+ amdgpu_ucode_release(&adev->gfx.ce_fw);
+ amdgpu_ucode_release(&adev->gfx.rlc_fw);
+ amdgpu_ucode_release(&adev->gfx.mec_fw);
+ amdgpu_ucode_release(&adev->gfx.mec2_fw);
kfree(adev->gfx.rlc.register_list_format);
}
@@ -3700,10 +4141,12 @@ static void gfx_v10_0_check_fw_write_wait(struct amdgpu_device *adev)
{
adev->gfx.cp_fw_write_wait = false;
- switch (adev->asic_type) {
- case CHIP_NAVI10:
- case CHIP_NAVI12:
- case CHIP_NAVI14:
+ switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
+ case IP_VERSION(10, 1, 10):
+ case IP_VERSION(10, 1, 2):
+ case IP_VERSION(10, 1, 1):
+ case IP_VERSION(10, 1, 3):
+ case IP_VERSION(10, 1, 4):
if ((adev->gfx.me_fw_version >= 0x00000046) &&
(adev->gfx.me_feature_version >= 27) &&
(adev->gfx.pfp_fw_version >= 0x00000068) &&
@@ -3712,10 +4155,14 @@ static void gfx_v10_0_check_fw_write_wait(struct amdgpu_device *adev)
(adev->gfx.mec_feature_version >= 27))
adev->gfx.cp_fw_write_wait = true;
break;
- case CHIP_SIENNA_CICHLID:
- case CHIP_NAVY_FLOUNDER:
- case CHIP_VANGOGH:
- case CHIP_DIMGREY_CAVEFISH:
+ case IP_VERSION(10, 3, 0):
+ case IP_VERSION(10, 3, 2):
+ case IP_VERSION(10, 3, 1):
+ case IP_VERSION(10, 3, 4):
+ case IP_VERSION(10, 3, 5):
+ case IP_VERSION(10, 3, 6):
+ case IP_VERSION(10, 3, 3):
+ case IP_VERSION(10, 3, 7):
adev->gfx.cp_fw_write_wait = true;
break;
default:
@@ -3726,39 +4173,6 @@ static void gfx_v10_0_check_fw_write_wait(struct amdgpu_device *adev)
DRM_WARN_ONCE("CP firmware version too old, please update!");
}
-
-static void gfx_v10_0_init_rlc_ext_microcode(struct amdgpu_device *adev)
-{
- const struct rlc_firmware_header_v2_1 *rlc_hdr;
-
- rlc_hdr = (const struct rlc_firmware_header_v2_1 *)adev->gfx.rlc_fw->data;
- adev->gfx.rlc_srlc_fw_version = le32_to_cpu(rlc_hdr->save_restore_list_cntl_ucode_ver);
- adev->gfx.rlc_srlc_feature_version = le32_to_cpu(rlc_hdr->save_restore_list_cntl_feature_ver);
- adev->gfx.rlc.save_restore_list_cntl_size_bytes = le32_to_cpu(rlc_hdr->save_restore_list_cntl_size_bytes);
- adev->gfx.rlc.save_restore_list_cntl = (u8 *)rlc_hdr + le32_to_cpu(rlc_hdr->save_restore_list_cntl_offset_bytes);
- adev->gfx.rlc_srlg_fw_version = le32_to_cpu(rlc_hdr->save_restore_list_gpm_ucode_ver);
- adev->gfx.rlc_srlg_feature_version = le32_to_cpu(rlc_hdr->save_restore_list_gpm_feature_ver);
- adev->gfx.rlc.save_restore_list_gpm_size_bytes = le32_to_cpu(rlc_hdr->save_restore_list_gpm_size_bytes);
- adev->gfx.rlc.save_restore_list_gpm = (u8 *)rlc_hdr + le32_to_cpu(rlc_hdr->save_restore_list_gpm_offset_bytes);
- adev->gfx.rlc_srls_fw_version = le32_to_cpu(rlc_hdr->save_restore_list_srm_ucode_ver);
- adev->gfx.rlc_srls_feature_version = le32_to_cpu(rlc_hdr->save_restore_list_srm_feature_ver);
- adev->gfx.rlc.save_restore_list_srm_size_bytes = le32_to_cpu(rlc_hdr->save_restore_list_srm_size_bytes);
- adev->gfx.rlc.save_restore_list_srm = (u8 *)rlc_hdr + le32_to_cpu(rlc_hdr->save_restore_list_srm_offset_bytes);
- adev->gfx.rlc.reg_list_format_direct_reg_list_length =
- le32_to_cpu(rlc_hdr->reg_list_format_direct_reg_list_length);
-}
-
-static void gfx_v10_0_init_rlc_iram_dram_microcode(struct amdgpu_device *adev)
-{
- const struct rlc_firmware_header_v2_2 *rlc_hdr;
-
- rlc_hdr = (const struct rlc_firmware_header_v2_2 *)adev->gfx.rlc_fw->data;
- adev->gfx.rlc.rlc_iram_ucode_size_bytes = le32_to_cpu(rlc_hdr->rlc_iram_ucode_size_bytes);
- adev->gfx.rlc.rlc_iram_ucode = (u8 *)rlc_hdr + le32_to_cpu(rlc_hdr->rlc_iram_ucode_offset_bytes);
- adev->gfx.rlc.rlc_dram_ucode_size_bytes = le32_to_cpu(rlc_hdr->rlc_dram_ucode_size_bytes);
- adev->gfx.rlc.rlc_dram_ucode = (u8 *)rlc_hdr + le32_to_cpu(rlc_hdr->rlc_dram_ucode_offset_bytes);
-}
-
static bool gfx_v10_0_navi10_gfxoff_should_enable(struct amdgpu_device *adev)
{
bool ret = false;
@@ -3773,13 +4187,13 @@ static bool gfx_v10_0_navi10_gfxoff_should_enable(struct amdgpu_device *adev)
break;
}
- return ret ;
+ return ret;
}
static void gfx_v10_0_check_gfxoff_flag(struct amdgpu_device *adev)
{
- switch (adev->asic_type) {
- case CHIP_NAVI10:
+ switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
+ case IP_VERSION(10, 1, 10):
if (!gfx_v10_0_navi10_gfxoff_should_enable(adev))
adev->pm.pp_feature &= ~PP_GFXOFF_MASK;
break;
@@ -3790,289 +4204,87 @@ static void gfx_v10_0_check_gfxoff_flag(struct amdgpu_device *adev)
static int gfx_v10_0_init_microcode(struct amdgpu_device *adev)
{
- const char *chip_name;
- char fw_name[40];
- char wks[10];
+ char fw_name[53];
+ char ucode_prefix[30];
+ const char *wks = "";
int err;
- struct amdgpu_firmware_info *info = NULL;
- const struct common_firmware_header *header = NULL;
- const struct gfx_firmware_header_v1_0 *cp_hdr;
const struct rlc_firmware_header_v2_0 *rlc_hdr;
- unsigned int *tmp = NULL;
- unsigned int i = 0;
uint16_t version_major;
uint16_t version_minor;
DRM_DEBUG("\n");
- memset(wks, 0, sizeof(wks));
- switch (adev->asic_type) {
- case CHIP_NAVI10:
- chip_name = "navi10";
- break;
- case CHIP_NAVI14:
- chip_name = "navi14";
- if (!(adev->pdev->device == 0x7340 &&
- adev->pdev->revision != 0x00))
- snprintf(wks, sizeof(wks), "_wks");
- break;
- case CHIP_NAVI12:
- chip_name = "navi12";
- break;
- case CHIP_SIENNA_CICHLID:
- chip_name = "sienna_cichlid";
- break;
- case CHIP_NAVY_FLOUNDER:
- chip_name = "navy_flounder";
- break;
- case CHIP_VANGOGH:
- chip_name = "vangogh";
- break;
- case CHIP_DIMGREY_CAVEFISH:
- chip_name = "dimgrey_cavefish";
- break;
- default:
- BUG();
- }
+ if (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(10, 1, 1) &&
+ (!(adev->pdev->device == 0x7340 && adev->pdev->revision != 0x00)))
+ wks = "_wks";
+ amdgpu_ucode_ip_version_decode(adev, GC_HWIP, ucode_prefix, sizeof(ucode_prefix));
- snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_pfp%s.bin", chip_name, wks);
- err = request_firmware(&adev->gfx.pfp_fw, fw_name, adev->dev);
- if (err)
- goto out;
- err = amdgpu_ucode_validate(adev->gfx.pfp_fw);
+ err = amdgpu_ucode_request(adev, &adev->gfx.pfp_fw,
+ AMDGPU_UCODE_REQUIRED,
+ "amdgpu/%s_pfp%s.bin", ucode_prefix, wks);
if (err)
goto out;
- cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.pfp_fw->data;
- adev->gfx.pfp_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
- adev->gfx.pfp_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
+ amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_PFP);
- snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_me%s.bin", chip_name, wks);
- err = request_firmware(&adev->gfx.me_fw, fw_name, adev->dev);
- if (err)
- goto out;
- err = amdgpu_ucode_validate(adev->gfx.me_fw);
+ err = amdgpu_ucode_request(adev, &adev->gfx.me_fw,
+ AMDGPU_UCODE_REQUIRED,
+ "amdgpu/%s_me%s.bin", ucode_prefix, wks);
if (err)
goto out;
- cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.me_fw->data;
- adev->gfx.me_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
- adev->gfx.me_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
+ amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_ME);
- snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_ce%s.bin", chip_name, wks);
- err = request_firmware(&adev->gfx.ce_fw, fw_name, adev->dev);
+ err = amdgpu_ucode_request(adev, &adev->gfx.ce_fw,
+ AMDGPU_UCODE_REQUIRED,
+ "amdgpu/%s_ce%s.bin", ucode_prefix, wks);
if (err)
goto out;
- err = amdgpu_ucode_validate(adev->gfx.ce_fw);
- if (err)
- goto out;
- cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.ce_fw->data;
- adev->gfx.ce_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
- adev->gfx.ce_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
+ amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_CE);
if (!amdgpu_sriov_vf(adev)) {
- snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_rlc.bin", chip_name);
+ snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_rlc.bin", ucode_prefix);
err = request_firmware(&adev->gfx.rlc_fw, fw_name, adev->dev);
if (err)
goto out;
- err = amdgpu_ucode_validate(adev->gfx.rlc_fw);
+
+ /* don't validate this firmware. There are apparently firmwares
+ * in the wild with incorrect size in the header
+ */
rlc_hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data;
version_major = le16_to_cpu(rlc_hdr->header.header_version_major);
version_minor = le16_to_cpu(rlc_hdr->header.header_version_minor);
-
- adev->gfx.rlc_fw_version = le32_to_cpu(rlc_hdr->header.ucode_version);
- adev->gfx.rlc_feature_version = le32_to_cpu(rlc_hdr->ucode_feature_version);
- adev->gfx.rlc.save_and_restore_offset =
- le32_to_cpu(rlc_hdr->save_and_restore_offset);
- adev->gfx.rlc.clear_state_descriptor_offset =
- le32_to_cpu(rlc_hdr->clear_state_descriptor_offset);
- adev->gfx.rlc.avail_scratch_ram_locations =
- le32_to_cpu(rlc_hdr->avail_scratch_ram_locations);
- adev->gfx.rlc.reg_restore_list_size =
- le32_to_cpu(rlc_hdr->reg_restore_list_size);
- adev->gfx.rlc.reg_list_format_start =
- le32_to_cpu(rlc_hdr->reg_list_format_start);
- adev->gfx.rlc.reg_list_format_separate_start =
- le32_to_cpu(rlc_hdr->reg_list_format_separate_start);
- adev->gfx.rlc.starting_offsets_start =
- le32_to_cpu(rlc_hdr->starting_offsets_start);
- adev->gfx.rlc.reg_list_format_size_bytes =
- le32_to_cpu(rlc_hdr->reg_list_format_size_bytes);
- adev->gfx.rlc.reg_list_size_bytes =
- le32_to_cpu(rlc_hdr->reg_list_size_bytes);
- adev->gfx.rlc.register_list_format =
- kmalloc(adev->gfx.rlc.reg_list_format_size_bytes +
- adev->gfx.rlc.reg_list_size_bytes, GFP_KERNEL);
- if (!adev->gfx.rlc.register_list_format) {
- err = -ENOMEM;
+ err = amdgpu_gfx_rlc_init_microcode(adev, version_major, version_minor);
+ if (err)
goto out;
- }
-
- tmp = (unsigned int *)((uintptr_t)rlc_hdr +
- le32_to_cpu(rlc_hdr->reg_list_format_array_offset_bytes));
- for (i = 0 ; i < (rlc_hdr->reg_list_format_size_bytes >> 2); i++)
- adev->gfx.rlc.register_list_format[i] = le32_to_cpu(tmp[i]);
-
- adev->gfx.rlc.register_restore = adev->gfx.rlc.register_list_format + i;
-
- tmp = (unsigned int *)((uintptr_t)rlc_hdr +
- le32_to_cpu(rlc_hdr->reg_list_array_offset_bytes));
- for (i = 0 ; i < (rlc_hdr->reg_list_size_bytes >> 2); i++)
- adev->gfx.rlc.register_restore[i] = le32_to_cpu(tmp[i]);
-
- if (version_major == 2) {
- if (version_minor >= 1)
- gfx_v10_0_init_rlc_ext_microcode(adev);
- if (version_minor == 2)
- gfx_v10_0_init_rlc_iram_dram_microcode(adev);
- }
}
- snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec%s.bin", chip_name, wks);
- err = request_firmware(&adev->gfx.mec_fw, fw_name, adev->dev);
+ err = amdgpu_ucode_request(adev, &adev->gfx.mec_fw,
+ AMDGPU_UCODE_REQUIRED,
+ "amdgpu/%s_mec%s.bin", ucode_prefix, wks);
if (err)
goto out;
- err = amdgpu_ucode_validate(adev->gfx.mec_fw);
- if (err)
- goto out;
- cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
- adev->gfx.mec_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
- adev->gfx.mec_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
+ amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_MEC1);
+ amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_MEC1_JT);
- snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec2%s.bin", chip_name, wks);
- err = request_firmware(&adev->gfx.mec2_fw, fw_name, adev->dev);
+ err = amdgpu_ucode_request(adev, &adev->gfx.mec2_fw,
+ AMDGPU_UCODE_REQUIRED,
+ "amdgpu/%s_mec2%s.bin", ucode_prefix, wks);
if (!err) {
- err = amdgpu_ucode_validate(adev->gfx.mec2_fw);
- if (err)
- goto out;
- cp_hdr = (const struct gfx_firmware_header_v1_0 *)
- adev->gfx.mec2_fw->data;
- adev->gfx.mec2_fw_version =
- le32_to_cpu(cp_hdr->header.ucode_version);
- adev->gfx.mec2_feature_version =
- le32_to_cpu(cp_hdr->ucode_feature_version);
+ amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_MEC2);
+ amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_MEC2_JT);
} else {
err = 0;
adev->gfx.mec2_fw = NULL;
}
- if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
- info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_PFP];
- info->ucode_id = AMDGPU_UCODE_ID_CP_PFP;
- info->fw = adev->gfx.pfp_fw;
- header = (const struct common_firmware_header *)info->fw->data;
- adev->firmware.fw_size +=
- ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
-
- info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_ME];
- info->ucode_id = AMDGPU_UCODE_ID_CP_ME;
- info->fw = adev->gfx.me_fw;
- header = (const struct common_firmware_header *)info->fw->data;
- adev->firmware.fw_size +=
- ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
-
- info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_CE];
- info->ucode_id = AMDGPU_UCODE_ID_CP_CE;
- info->fw = adev->gfx.ce_fw;
- header = (const struct common_firmware_header *)info->fw->data;
- adev->firmware.fw_size +=
- ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
-
- info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_G];
- info->ucode_id = AMDGPU_UCODE_ID_RLC_G;
- info->fw = adev->gfx.rlc_fw;
- if (info->fw) {
- header = (const struct common_firmware_header *)info->fw->data;
- adev->firmware.fw_size +=
- ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
- }
- if (adev->gfx.rlc.save_restore_list_cntl_size_bytes &&
- adev->gfx.rlc.save_restore_list_gpm_size_bytes &&
- adev->gfx.rlc.save_restore_list_srm_size_bytes) {
- info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_RESTORE_LIST_CNTL];
- info->ucode_id = AMDGPU_UCODE_ID_RLC_RESTORE_LIST_CNTL;
- info->fw = adev->gfx.rlc_fw;
- adev->firmware.fw_size +=
- ALIGN(adev->gfx.rlc.save_restore_list_cntl_size_bytes, PAGE_SIZE);
-
- info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_RESTORE_LIST_GPM_MEM];
- info->ucode_id = AMDGPU_UCODE_ID_RLC_RESTORE_LIST_GPM_MEM;
- info->fw = adev->gfx.rlc_fw;
- adev->firmware.fw_size +=
- ALIGN(adev->gfx.rlc.save_restore_list_gpm_size_bytes, PAGE_SIZE);
-
- info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_RESTORE_LIST_SRM_MEM];
- info->ucode_id = AMDGPU_UCODE_ID_RLC_RESTORE_LIST_SRM_MEM;
- info->fw = adev->gfx.rlc_fw;
- adev->firmware.fw_size +=
- ALIGN(adev->gfx.rlc.save_restore_list_srm_size_bytes, PAGE_SIZE);
-
- if (adev->gfx.rlc.rlc_iram_ucode_size_bytes &&
- adev->gfx.rlc.rlc_dram_ucode_size_bytes) {
- info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_IRAM];
- info->ucode_id = AMDGPU_UCODE_ID_RLC_IRAM;
- info->fw = adev->gfx.rlc_fw;
- adev->firmware.fw_size +=
- ALIGN(adev->gfx.rlc.rlc_iram_ucode_size_bytes, PAGE_SIZE);
-
- info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_DRAM];
- info->ucode_id = AMDGPU_UCODE_ID_RLC_DRAM;
- info->fw = adev->gfx.rlc_fw;
- adev->firmware.fw_size +=
- ALIGN(adev->gfx.rlc.rlc_dram_ucode_size_bytes, PAGE_SIZE);
- }
- }
-
- info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC1];
- info->ucode_id = AMDGPU_UCODE_ID_CP_MEC1;
- info->fw = adev->gfx.mec_fw;
- header = (const struct common_firmware_header *)info->fw->data;
- cp_hdr = (const struct gfx_firmware_header_v1_0 *)info->fw->data;
- adev->firmware.fw_size +=
- ALIGN(le32_to_cpu(header->ucode_size_bytes) -
- le32_to_cpu(cp_hdr->jt_size) * 4, PAGE_SIZE);
-
- info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC1_JT];
- info->ucode_id = AMDGPU_UCODE_ID_CP_MEC1_JT;
- info->fw = adev->gfx.mec_fw;
- adev->firmware.fw_size +=
- ALIGN(le32_to_cpu(cp_hdr->jt_size) * 4, PAGE_SIZE);
-
- if (adev->gfx.mec2_fw) {
- info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC2];
- info->ucode_id = AMDGPU_UCODE_ID_CP_MEC2;
- info->fw = adev->gfx.mec2_fw;
- header = (const struct common_firmware_header *)info->fw->data;
- cp_hdr = (const struct gfx_firmware_header_v1_0 *)info->fw->data;
- adev->firmware.fw_size +=
- ALIGN(le32_to_cpu(header->ucode_size_bytes) -
- le32_to_cpu(cp_hdr->jt_size) * 4,
- PAGE_SIZE);
- info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC2_JT];
- info->ucode_id = AMDGPU_UCODE_ID_CP_MEC2_JT;
- info->fw = adev->gfx.mec2_fw;
- adev->firmware.fw_size +=
- ALIGN(le32_to_cpu(cp_hdr->jt_size) * 4,
- PAGE_SIZE);
- }
- }
-
gfx_v10_0_check_fw_write_wait(adev);
out:
if (err) {
- dev_err(adev->dev,
- "gfx10: Failed to load firmware \"%s\"\n",
- fw_name);
- release_firmware(adev->gfx.pfp_fw);
- adev->gfx.pfp_fw = NULL;
- release_firmware(adev->gfx.me_fw);
- adev->gfx.me_fw = NULL;
- release_firmware(adev->gfx.ce_fw);
- adev->gfx.ce_fw = NULL;
- release_firmware(adev->gfx.rlc_fw);
- adev->gfx.rlc_fw = NULL;
- release_firmware(adev->gfx.mec_fw);
- adev->gfx.mec_fw = NULL;
- release_firmware(adev->gfx.mec2_fw);
- adev->gfx.mec2_fw = NULL;
+ amdgpu_ucode_release(&adev->gfx.pfp_fw);
+ amdgpu_ucode_release(&adev->gfx.me_fw);
+ amdgpu_ucode_release(&adev->gfx.ce_fw);
+ amdgpu_ucode_release(&adev->gfx.rlc_fw);
+ amdgpu_ucode_release(&adev->gfx.mec_fw);
+ amdgpu_ucode_release(&adev->gfx.mec2_fw);
}
gfx_v10_0_check_gfxoff_flag(adev);
@@ -4110,12 +4322,9 @@ static u32 gfx_v10_0_get_csb_size(struct amdgpu_device *adev)
return count;
}
-static void gfx_v10_0_get_csb_buffer(struct amdgpu_device *adev,
- volatile u32 *buffer)
+static void gfx_v10_0_get_csb_buffer(struct amdgpu_device *adev, u32 *buffer)
{
- u32 count = 0, i;
- const struct cs_section_def *sect = NULL;
- const struct cs_extent_def *ext = NULL;
+ u32 count = 0;
int ctx_reg_offset;
if (adev->gfx.rlc.cs_data == NULL)
@@ -4123,39 +4332,15 @@ static void gfx_v10_0_get_csb_buffer(struct amdgpu_device *adev,
if (buffer == NULL)
return;
- buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
- buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
+ count = amdgpu_gfx_csb_preamble_start(buffer);
+ count = amdgpu_gfx_csb_data_parser(adev, buffer, count);
- buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CONTEXT_CONTROL, 1));
- buffer[count++] = cpu_to_le32(0x80000000);
- buffer[count++] = cpu_to_le32(0x80000000);
-
- for (sect = adev->gfx.rlc.cs_data; sect->section != NULL; ++sect) {
- for (ext = sect->section; ext->extent != NULL; ++ext) {
- if (sect->id == SECT_CONTEXT) {
- buffer[count++] =
- cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, ext->reg_count));
- buffer[count++] = cpu_to_le32(ext->reg_index -
- PACKET3_SET_CONTEXT_REG_START);
- for (i = 0; i < ext->reg_count; i++)
- buffer[count++] = cpu_to_le32(ext->extent[i]);
- } else {
- return;
- }
- }
- }
-
- ctx_reg_offset =
- SOC15_REG_OFFSET(GC, 0, mmPA_SC_TILE_STEERING_OVERRIDE) - PACKET3_SET_CONTEXT_REG_START;
+ ctx_reg_offset = SOC15_REG_OFFSET(GC, 0, mmPA_SC_TILE_STEERING_OVERRIDE) - PACKET3_SET_CONTEXT_REG_START;
buffer[count++] = cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, 1));
buffer[count++] = cpu_to_le32(ctx_reg_offset);
buffer[count++] = cpu_to_le32(adev->gfx.config.pa_sc_tile_steering_override);
- buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
- buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_END_CLEAR_STATE);
-
- buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CLEAR_STATE, 0));
- buffer[count++] = cpu_to_le32(0);
+ amdgpu_gfx_csb_preamble_end(buffer, count);
}
static void gfx_v10_0_rlc_fini(struct amdgpu_device *adev)
@@ -4171,6 +4356,30 @@ static void gfx_v10_0_rlc_fini(struct amdgpu_device *adev)
(void **)&adev->gfx.rlc.cp_table_ptr);
}
+static void gfx_v10_0_init_rlcg_reg_access_ctrl(struct amdgpu_device *adev)
+{
+ struct amdgpu_rlcg_reg_access_ctrl *reg_access_ctrl;
+
+ reg_access_ctrl = &adev->gfx.rlc.reg_access_ctrl[0];
+ reg_access_ctrl->scratch_reg0 = SOC15_REG_OFFSET(GC, 0, mmSCRATCH_REG0);
+ reg_access_ctrl->scratch_reg1 = SOC15_REG_OFFSET(GC, 0, mmSCRATCH_REG1);
+ reg_access_ctrl->scratch_reg2 = SOC15_REG_OFFSET(GC, 0, mmSCRATCH_REG2);
+ reg_access_ctrl->scratch_reg3 = SOC15_REG_OFFSET(GC, 0, mmSCRATCH_REG3);
+ reg_access_ctrl->grbm_cntl = SOC15_REG_OFFSET(GC, 0, mmGRBM_GFX_CNTL);
+ reg_access_ctrl->grbm_idx = SOC15_REG_OFFSET(GC, 0, mmGRBM_GFX_INDEX);
+ switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
+ case IP_VERSION(10, 3, 0):
+ reg_access_ctrl->spare_int =
+ SOC15_REG_OFFSET(GC, 0, mmRLC_SPARE_INT_0_Sienna_Cichlid);
+ break;
+ default:
+ reg_access_ctrl->spare_int =
+ SOC15_REG_OFFSET(GC, 0, mmRLC_SPARE_INT);
+ break;
+ }
+ adev->gfx.rlc.rlcg_reg_access_supported = true;
+}
+
static int gfx_v10_0_rlc_init(struct amdgpu_device *adev)
{
const struct cs_section_def *cs_data;
@@ -4187,10 +4396,6 @@ static int gfx_v10_0_rlc_init(struct amdgpu_device *adev)
return r;
}
- /* init spm vmid with 0xf */
- if (adev->gfx.rlc.funcs->update_spm_vmid)
- adev->gfx.rlc.funcs->update_spm_vmid(adev, 0xf);
-
return 0;
}
@@ -4200,19 +4405,11 @@ static void gfx_v10_0_mec_fini(struct amdgpu_device *adev)
amdgpu_bo_free_kernel(&adev->gfx.mec.mec_fw_obj, NULL, NULL);
}
-static int gfx_v10_0_me_init(struct amdgpu_device *adev)
+static void gfx_v10_0_me_init(struct amdgpu_device *adev)
{
- int r;
-
bitmap_zero(adev->gfx.me.queue_bitmap, AMDGPU_MAX_GFX_QUEUES);
amdgpu_gfx_graphics_queue_acquire(adev);
-
- r = gfx_v10_0_init_microcode(adev);
- if (r)
- DRM_ERROR("Failed to load gfx firmware!\n");
-
- return r;
}
static int gfx_v10_0_mec_init(struct amdgpu_device *adev)
@@ -4220,13 +4417,13 @@ static int gfx_v10_0_mec_init(struct amdgpu_device *adev)
int r;
u32 *hpd;
const __le32 *fw_data = NULL;
- unsigned fw_size;
+ unsigned int fw_size;
u32 *fw = NULL;
size_t mec_hpd_size;
const struct gfx_firmware_header_v1_0 *mec_hdr = NULL;
- bitmap_zero(adev->gfx.mec.queue_bitmap, AMDGPU_MAX_COMPUTE_QUEUES);
+ bitmap_zero(adev->gfx.mec_bitmap[0].queue_bitmap, AMDGPU_MAX_COMPUTE_QUEUES);
/* take ownership of the relevant compute queues */
amdgpu_gfx_compute_queue_acquire(adev);
@@ -4298,11 +4495,12 @@ static void wave_read_regs(struct amdgpu_device *adev, uint32_t wave,
*(out++) = RREG32_SOC15(GC, 0, mmSQ_IND_DATA);
}
-static void gfx_v10_0_read_wave_data(struct amdgpu_device *adev, uint32_t simd, uint32_t wave, uint32_t *dst, int *no_fields)
+static void gfx_v10_0_read_wave_data(struct amdgpu_device *adev, uint32_t xcc_id, uint32_t simd, uint32_t wave, uint32_t *dst, int *no_fields)
{
/* in gfx10 the SIMD_ID is specified as part of the INSTANCE
* field when performing a select_se_sh so it should be
- * zero here */
+ * zero here
+ */
WARN_ON(simd != 0);
/* type 2 wave data */
@@ -4322,9 +4520,10 @@ static void gfx_v10_0_read_wave_data(struct amdgpu_device *adev, uint32_t simd,
dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_IB_STS2);
dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_IB_DBG1);
dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_M0);
+ dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_MODE);
}
-static void gfx_v10_0_read_wave_sgprs(struct amdgpu_device *adev, uint32_t simd,
+static void gfx_v10_0_read_wave_sgprs(struct amdgpu_device *adev, uint32_t xcc_id, uint32_t simd,
uint32_t wave, uint32_t start,
uint32_t size, uint32_t *dst)
{
@@ -4335,7 +4534,7 @@ static void gfx_v10_0_read_wave_sgprs(struct amdgpu_device *adev, uint32_t simd,
dst);
}
-static void gfx_v10_0_read_wave_vgprs(struct amdgpu_device *adev, uint32_t simd,
+static void gfx_v10_0_read_wave_vgprs(struct amdgpu_device *adev, uint32_t xcc_id, uint32_t simd,
uint32_t wave, uint32_t thread,
uint32_t start, uint32_t size,
uint32_t *dst)
@@ -4346,7 +4545,7 @@ static void gfx_v10_0_read_wave_vgprs(struct amdgpu_device *adev, uint32_t simd,
}
static void gfx_v10_0_select_me_pipe_q(struct amdgpu_device *adev,
- u32 me, u32 pipe, u32 q, u32 vm)
+ u32 me, u32 pipe, u32 q, u32 vm, u32 xcc_id)
{
nv_grbm_select(adev, me, pipe, q, vm);
}
@@ -4382,12 +4581,10 @@ static void gfx_v10_0_gpu_early_init(struct amdgpu_device *adev)
{
u32 gb_addr_config;
- adev->gfx.funcs = &gfx_v10_0_gfx_funcs;
-
- switch (adev->asic_type) {
- case CHIP_NAVI10:
- case CHIP_NAVI14:
- case CHIP_NAVI12:
+ switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
+ case IP_VERSION(10, 1, 10):
+ case IP_VERSION(10, 1, 1):
+ case IP_VERSION(10, 1, 2):
adev->gfx.config.max_hw_contexts = 8;
adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
@@ -4395,10 +4592,14 @@ static void gfx_v10_0_gpu_early_init(struct amdgpu_device *adev)
adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0;
gb_addr_config = RREG32_SOC15(GC, 0, mmGB_ADDR_CONFIG);
break;
- case CHIP_SIENNA_CICHLID:
- case CHIP_NAVY_FLOUNDER:
- case CHIP_VANGOGH:
- case CHIP_DIMGREY_CAVEFISH:
+ case IP_VERSION(10, 3, 0):
+ case IP_VERSION(10, 3, 2):
+ case IP_VERSION(10, 3, 1):
+ case IP_VERSION(10, 3, 4):
+ case IP_VERSION(10, 3, 5):
+ case IP_VERSION(10, 3, 6):
+ case IP_VERSION(10, 3, 3):
+ case IP_VERSION(10, 3, 7):
adev->gfx.config.max_hw_contexts = 8;
adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
@@ -4408,6 +4609,15 @@ static void gfx_v10_0_gpu_early_init(struct amdgpu_device *adev)
adev->gfx.config.gb_addr_config_fields.num_pkrs =
1 << REG_GET_FIELD(gb_addr_config, GB_ADDR_CONFIG, NUM_PKRS);
break;
+ case IP_VERSION(10, 1, 3):
+ case IP_VERSION(10, 1, 4):
+ adev->gfx.config.max_hw_contexts = 8;
+ adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
+ adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
+ adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
+ adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0;
+ gb_addr_config = CYAN_SKILLFISH_GB_ADDR_CONFIG_GOLDEN;
+ break;
default:
BUG();
break;
@@ -4439,9 +4649,9 @@ static void gfx_v10_0_gpu_early_init(struct amdgpu_device *adev)
static int gfx_v10_0_gfx_ring_init(struct amdgpu_device *adev, int ring_id,
int me, int pipe, int queue)
{
- int r;
struct amdgpu_ring *ring;
unsigned int irq_type;
+ unsigned int hw_prio;
ring = &adev->gfx.gfx_ring[ring_id];
@@ -4456,22 +4666,20 @@ static int gfx_v10_0_gfx_ring_init(struct amdgpu_device *adev, int ring_id,
ring->doorbell_index = adev->doorbell_index.gfx_ring0 << 1;
else
ring->doorbell_index = adev->doorbell_index.gfx_ring1 << 1;
+ ring->vm_hub = AMDGPU_GFXHUB(0);
sprintf(ring->name, "gfx_%d.%d.%d", ring->me, ring->pipe, ring->queue);
irq_type = AMDGPU_CP_IRQ_GFX_ME0_PIPE0_EOP + ring->pipe;
- r = amdgpu_ring_init(adev, ring, 1024,
- &adev->gfx.eop_irq, irq_type,
- AMDGPU_RING_PRIO_DEFAULT);
- if (r)
- return r;
- return 0;
+ hw_prio = amdgpu_gfx_is_high_priority_graphics_queue(adev, ring) ?
+ AMDGPU_GFX_PIPE_PRIO_HIGH : AMDGPU_GFX_PIPE_PRIO_NORMAL;
+ return amdgpu_ring_init(adev, ring, 1024, &adev->gfx.eop_irq, irq_type,
+ hw_prio, NULL);
}
static int gfx_v10_0_compute_ring_init(struct amdgpu_device *adev, int ring_id,
int mec, int pipe, int queue)
{
- int r;
- unsigned irq_type;
+ unsigned int irq_type;
struct amdgpu_ring *ring;
unsigned int hw_prio;
@@ -4487,46 +4695,93 @@ static int gfx_v10_0_compute_ring_init(struct amdgpu_device *adev, int ring_id,
ring->doorbell_index = (adev->doorbell_index.mec_ring0 + ring_id) << 1;
ring->eop_gpu_addr = adev->gfx.mec.hpd_eop_gpu_addr
+ (ring_id * GFX10_MEC_HPD_SIZE);
+ ring->vm_hub = AMDGPU_GFXHUB(0);
sprintf(ring->name, "comp_%d.%d.%d", ring->me, ring->pipe, ring->queue);
irq_type = AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP
+ ((ring->me - 1) * adev->gfx.mec.num_pipe_per_mec)
+ ring->pipe;
hw_prio = amdgpu_gfx_is_high_priority_compute_queue(adev, ring) ?
- AMDGPU_GFX_PIPE_PRIO_HIGH : AMDGPU_GFX_PIPE_PRIO_NORMAL;
+ AMDGPU_RING_PRIO_2 : AMDGPU_RING_PRIO_DEFAULT;
/* type-2 packets are deprecated on MEC, use type-3 instead */
- r = amdgpu_ring_init(adev, ring, 1024,
- &adev->gfx.eop_irq, irq_type, hw_prio);
- if (r)
- return r;
+ return amdgpu_ring_init(adev, ring, 1024, &adev->gfx.eop_irq, irq_type,
+ hw_prio, NULL);
+}
- return 0;
+static void gfx_v10_0_alloc_ip_dump(struct amdgpu_device *adev)
+{
+ uint32_t reg_count = ARRAY_SIZE(gc_reg_list_10_1);
+ uint32_t *ptr;
+ uint32_t inst;
+
+ ptr = kcalloc(reg_count, sizeof(uint32_t), GFP_KERNEL);
+ if (!ptr) {
+ DRM_ERROR("Failed to allocate memory for GFX IP Dump\n");
+ adev->gfx.ip_dump_core = NULL;
+ } else {
+ adev->gfx.ip_dump_core = ptr;
+ }
+
+ /* Allocate memory for compute queue registers for all the instances */
+ reg_count = ARRAY_SIZE(gc_cp_reg_list_10);
+ inst = adev->gfx.mec.num_mec * adev->gfx.mec.num_pipe_per_mec *
+ adev->gfx.mec.num_queue_per_pipe;
+
+ ptr = kcalloc(reg_count * inst, sizeof(uint32_t), GFP_KERNEL);
+ if (!ptr) {
+ DRM_ERROR("Failed to allocate memory for Compute Queues IP Dump\n");
+ adev->gfx.ip_dump_compute_queues = NULL;
+ } else {
+ adev->gfx.ip_dump_compute_queues = ptr;
+ }
+
+ /* Allocate memory for gfx queue registers for all the instances */
+ reg_count = ARRAY_SIZE(gc_gfx_queue_reg_list_10);
+ inst = adev->gfx.me.num_me * adev->gfx.me.num_pipe_per_me *
+ adev->gfx.me.num_queue_per_pipe;
+
+ ptr = kcalloc(reg_count * inst, sizeof(uint32_t), GFP_KERNEL);
+ if (!ptr) {
+ DRM_ERROR("Failed to allocate memory for GFX Queues IP Dump\n");
+ adev->gfx.ip_dump_gfx_queues = NULL;
+ } else {
+ adev->gfx.ip_dump_gfx_queues = ptr;
+ }
}
-static int gfx_v10_0_sw_init(void *handle)
+static int gfx_v10_0_sw_init(struct amdgpu_ip_block *ip_block)
{
int i, j, k, r, ring_id = 0;
- struct amdgpu_kiq *kiq;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
-
- switch (adev->asic_type) {
- case CHIP_NAVI10:
- case CHIP_NAVI14:
- case CHIP_NAVI12:
+ int xcc_id = 0;
+ struct amdgpu_device *adev = ip_block->adev;
+ int num_queue_per_pipe = 1; /* we only enable 1 KGQ per pipe */
+
+ INIT_DELAYED_WORK(&adev->gfx.idle_work, amdgpu_gfx_profile_idle_work_handler);
+
+ switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
+ case IP_VERSION(10, 1, 10):
+ case IP_VERSION(10, 1, 1):
+ case IP_VERSION(10, 1, 2):
+ case IP_VERSION(10, 1, 3):
+ case IP_VERSION(10, 1, 4):
adev->gfx.me.num_me = 1;
adev->gfx.me.num_pipe_per_me = 1;
- adev->gfx.me.num_queue_per_pipe = 1;
+ adev->gfx.me.num_queue_per_pipe = 8;
adev->gfx.mec.num_mec = 2;
adev->gfx.mec.num_pipe_per_mec = 4;
adev->gfx.mec.num_queue_per_pipe = 8;
break;
- case CHIP_SIENNA_CICHLID:
- case CHIP_NAVY_FLOUNDER:
- case CHIP_VANGOGH:
- case CHIP_DIMGREY_CAVEFISH:
+ case IP_VERSION(10, 3, 0):
+ case IP_VERSION(10, 3, 2):
+ case IP_VERSION(10, 3, 1):
+ case IP_VERSION(10, 3, 4):
+ case IP_VERSION(10, 3, 5):
+ case IP_VERSION(10, 3, 6):
+ case IP_VERSION(10, 3, 3):
+ case IP_VERSION(10, 3, 7):
adev->gfx.me.num_me = 1;
- adev->gfx.me.num_pipe_per_me = 1;
- adev->gfx.me.num_queue_per_pipe = 1;
+ adev->gfx.me.num_pipe_per_me = 2;
+ adev->gfx.me.num_queue_per_pipe = 2;
adev->gfx.mec.num_mec = 2;
adev->gfx.mec.num_pipe_per_mec = 4;
adev->gfx.mec.num_queue_per_pipe = 4;
@@ -4540,11 +4795,79 @@ static int gfx_v10_0_sw_init(void *handle)
adev->gfx.mec.num_queue_per_pipe = 8;
break;
}
+ switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
+ case IP_VERSION(10, 1, 10):
+ case IP_VERSION(10, 1, 1):
+ case IP_VERSION(10, 1, 2):
+ adev->gfx.cleaner_shader_ptr = gfx_10_1_10_cleaner_shader_hex;
+ adev->gfx.cleaner_shader_size = sizeof(gfx_10_1_10_cleaner_shader_hex);
+ if (adev->gfx.me_fw_version >= 101 &&
+ adev->gfx.pfp_fw_version >= 158 &&
+ adev->gfx.mec_fw_version >= 151) {
+ adev->gfx.enable_cleaner_shader = true;
+ r = amdgpu_gfx_cleaner_shader_sw_init(adev, adev->gfx.cleaner_shader_size);
+ if (r) {
+ adev->gfx.enable_cleaner_shader = false;
+ dev_err(adev->dev, "Failed to initialize cleaner shader\n");
+ }
+ }
+ break;
+ case IP_VERSION(10, 3, 0):
+ case IP_VERSION(10, 3, 1):
+ case IP_VERSION(10, 3, 2):
+ case IP_VERSION(10, 3, 3):
+ case IP_VERSION(10, 3, 4):
+ case IP_VERSION(10, 3, 5):
+ adev->gfx.cleaner_shader_ptr = gfx_10_3_0_cleaner_shader_hex;
+ adev->gfx.cleaner_shader_size = sizeof(gfx_10_3_0_cleaner_shader_hex);
+ if (adev->gfx.me_fw_version >= 64 &&
+ adev->gfx.pfp_fw_version >= 100 &&
+ adev->gfx.mec_fw_version >= 122) {
+ adev->gfx.enable_cleaner_shader = true;
+ r = amdgpu_gfx_cleaner_shader_sw_init(adev, adev->gfx.cleaner_shader_size);
+ if (r) {
+ adev->gfx.enable_cleaner_shader = false;
+ dev_err(adev->dev, "Failed to initialize cleaner shader\n");
+ }
+ }
+ break;
+ case IP_VERSION(10, 3, 6):
+ adev->gfx.cleaner_shader_ptr = gfx_10_3_0_cleaner_shader_hex;
+ adev->gfx.cleaner_shader_size = sizeof(gfx_10_3_0_cleaner_shader_hex);
+ if (adev->gfx.me_fw_version >= 14 &&
+ adev->gfx.pfp_fw_version >= 17 &&
+ adev->gfx.mec_fw_version >= 24) {
+ adev->gfx.enable_cleaner_shader = true;
+ r = amdgpu_gfx_cleaner_shader_sw_init(adev, adev->gfx.cleaner_shader_size);
+ if (r) {
+ adev->gfx.enable_cleaner_shader = false;
+ dev_err(adev->dev, "Failed to initialize cleaner shader\n");
+ }
+ }
+ break;
+ case IP_VERSION(10, 3, 7):
+ adev->gfx.cleaner_shader_ptr = gfx_10_3_0_cleaner_shader_hex;
+ adev->gfx.cleaner_shader_size = sizeof(gfx_10_3_0_cleaner_shader_hex);
+ if (adev->gfx.me_fw_version >= 4 &&
+ adev->gfx.pfp_fw_version >= 9 &&
+ adev->gfx.mec_fw_version >= 12) {
+ adev->gfx.enable_cleaner_shader = true;
+ r = amdgpu_gfx_cleaner_shader_sw_init(adev, adev->gfx.cleaner_shader_size);
+ if (r) {
+ adev->gfx.enable_cleaner_shader = false;
+ dev_err(adev->dev, "Failed to initialize cleaner shader\n");
+ }
+ }
+ break;
+ default:
+ adev->gfx.enable_cleaner_shader = false;
+ break;
+ }
/* KIQ event */
r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP,
GFX_10_1__SRCID__CP_IB2_INTERRUPT_PKT,
- &adev->gfx.kiq.irq);
+ &adev->gfx.kiq[0].irq);
if (r)
return r;
@@ -4555,6 +4878,13 @@ static int gfx_v10_0_sw_init(void *handle)
if (r)
return r;
+ /* Bad opcode Event */
+ r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP,
+ GFX_10_1__SRCID__CP_BAD_OPCODE_ERROR,
+ &adev->gfx.bad_op_irq);
+ if (r)
+ return r;
+
/* Privileged reg */
r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, GFX_10_1__SRCID__CP_PRIV_REG_FAULT,
&adev->gfx.priv_reg_irq);
@@ -4569,16 +4899,16 @@ static int gfx_v10_0_sw_init(void *handle)
adev->gfx.gfx_current_status = AMDGPU_GFX_NORMAL_MODE;
- gfx_v10_0_scratch_init(adev);
+ gfx_v10_0_me_init(adev);
- r = gfx_v10_0_me_init(adev);
- if (r)
- return r;
-
- r = gfx_v10_0_rlc_init(adev);
- if (r) {
- DRM_ERROR("Failed to init rlc BOs!\n");
- return r;
+ if (adev->gfx.rlc.funcs) {
+ if (adev->gfx.rlc.funcs->init) {
+ r = adev->gfx.rlc.funcs->init(adev);
+ if (r) {
+ dev_err(adev->dev, "Failed to init rlc BOs!\n");
+ return r;
+ }
+ }
}
r = gfx_v10_0_mec_init(adev);
@@ -4589,7 +4919,7 @@ static int gfx_v10_0_sw_init(void *handle)
/* set up the gfx ring */
for (i = 0; i < adev->gfx.me.num_me; i++) {
- for (j = 0; j < adev->gfx.me.num_queue_per_pipe; j++) {
+ for (j = 0; j < num_queue_per_pipe; j++) {
for (k = 0; k < adev->gfx.me.num_pipe_per_me; k++) {
if (!amdgpu_gfx_is_me_queue_enabled(adev, i, k, j))
continue;
@@ -4608,8 +4938,8 @@ static int gfx_v10_0_sw_init(void *handle)
for (i = 0; i < adev->gfx.mec.num_mec; ++i) {
for (j = 0; j < adev->gfx.mec.num_queue_per_pipe; j++) {
for (k = 0; k < adev->gfx.mec.num_pipe_per_mec; k++) {
- if (!amdgpu_gfx_is_mec_queue_enabled(adev, i, k,
- j))
+ if (!amdgpu_gfx_is_mec_queue_enabled(adev, 0, i,
+ k, j))
continue;
r = gfx_v10_0_compute_ring_init(adev, ring_id,
@@ -4622,18 +4952,27 @@ static int gfx_v10_0_sw_init(void *handle)
}
}
- r = amdgpu_gfx_kiq_init(adev, GFX10_MEC_HPD_SIZE);
+ adev->gfx.gfx_supported_reset =
+ amdgpu_get_soft_full_reset_mask(&adev->gfx.gfx_ring[0]);
+ adev->gfx.compute_supported_reset =
+ amdgpu_get_soft_full_reset_mask(&adev->gfx.compute_ring[0]);
+ if (!amdgpu_sriov_vf(adev) &&
+ !adev->debug_disable_gpu_ring_reset) {
+ adev->gfx.compute_supported_reset |= AMDGPU_RESET_TYPE_PER_QUEUE;
+ adev->gfx.gfx_supported_reset |= AMDGPU_RESET_TYPE_PER_QUEUE;
+ }
+
+ r = amdgpu_gfx_kiq_init(adev, GFX10_MEC_HPD_SIZE, 0);
if (r) {
DRM_ERROR("Failed to init KIQ BOs!\n");
return r;
}
- kiq = &adev->gfx.kiq;
- r = amdgpu_gfx_kiq_init_ring(adev, &kiq->ring, &kiq->irq);
+ r = amdgpu_gfx_kiq_init_ring(adev, xcc_id);
if (r)
return r;
- r = amdgpu_gfx_mqd_sw_init(adev, sizeof(struct v10_compute_mqd));
+ r = amdgpu_gfx_mqd_sw_init(adev, sizeof(struct v10_compute_mqd), 0);
if (r)
return r;
@@ -4648,6 +4987,12 @@ static int gfx_v10_0_sw_init(void *handle)
gfx_v10_0_gpu_early_init(adev);
+ gfx_v10_0_alloc_ip_dump(adev);
+
+ r = amdgpu_gfx_sysfs_init(adev);
+ if (r)
+ return r;
+
return 0;
}
@@ -4672,19 +5017,22 @@ static void gfx_v10_0_me_fini(struct amdgpu_device *adev)
(void **)&adev->gfx.me.me_fw_ptr);
}
-static int gfx_v10_0_sw_fini(void *handle)
+static int gfx_v10_0_sw_fini(struct amdgpu_ip_block *ip_block)
{
int i;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
for (i = 0; i < adev->gfx.num_gfx_rings; i++)
amdgpu_ring_fini(&adev->gfx.gfx_ring[i]);
for (i = 0; i < adev->gfx.num_compute_rings; i++)
amdgpu_ring_fini(&adev->gfx.compute_ring[i]);
- amdgpu_gfx_mqd_sw_fini(adev);
- amdgpu_gfx_kiq_free_ring(&adev->gfx.kiq.ring);
- amdgpu_gfx_kiq_fini(adev);
+ amdgpu_gfx_mqd_sw_fini(adev, 0);
+
+ amdgpu_gfx_kiq_free_ring(&adev->gfx.kiq[0].ring);
+ amdgpu_gfx_kiq_fini(adev, 0);
+
+ amdgpu_gfx_cleaner_shader_sw_fini(adev);
gfx_v10_0_pfp_fini(adev);
gfx_v10_0_ce_fini(adev);
@@ -4696,12 +5044,17 @@ static int gfx_v10_0_sw_fini(void *handle)
gfx_v10_0_rlc_backdoor_autoload_buffer_fini(adev);
gfx_v10_0_free_microcode(adev);
+ amdgpu_gfx_sysfs_fini(adev);
+
+ kfree(adev->gfx.ip_dump_core);
+ kfree(adev->gfx.ip_dump_compute_queues);
+ kfree(adev->gfx.ip_dump_gfx_queues);
return 0;
}
static void gfx_v10_0_select_se_sh(struct amdgpu_device *adev, u32 se_num,
- u32 sh_num, u32 instance)
+ u32 sh_num, u32 instance, int xcc_id)
{
u32 data;
@@ -4756,16 +5109,21 @@ static void gfx_v10_0_setup_rb(struct amdgpu_device *adev)
for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
bitmap = i * adev->gfx.config.max_sh_per_se + j;
- if ((adev->asic_type == CHIP_SIENNA_CICHLID) &&
+ if (((amdgpu_ip_version(adev, GC_HWIP, 0) ==
+ IP_VERSION(10, 3, 0)) ||
+ (amdgpu_ip_version(adev, GC_HWIP, 0) ==
+ IP_VERSION(10, 3, 3)) ||
+ (amdgpu_ip_version(adev, GC_HWIP, 0) ==
+ IP_VERSION(10, 3, 6))) &&
((gfx_v10_3_get_disabled_sa(adev) >> bitmap) & 1))
continue;
- gfx_v10_0_select_se_sh(adev, i, j, 0xffffffff);
+ gfx_v10_0_select_se_sh(adev, i, j, 0xffffffff, 0);
data = gfx_v10_0_get_rb_active_bitmap(adev);
active_rbs |= data << ((i * adev->gfx.config.max_sh_per_se + j) *
rb_bitmap_width_per_sh);
}
}
- gfx_v10_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
+ gfx_v10_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, 0);
mutex_unlock(&adev->grbm_idx_mutex);
adev->gfx.config.backend_enable_mask = active_rbs;
@@ -4782,8 +5140,9 @@ static u32 gfx_v10_0_init_pa_sc_tile_steering_override(struct amdgpu_device *ade
uint32_t pa_sc_tile_steering_override;
/* for ASICs that integrates GFX v10.3
- * pa_sc_tile_steering_override should be set to 0 */
- if (adev->asic_type >= CHIP_SIENNA_CICHLID)
+ * pa_sc_tile_steering_override should be set to 0
+ */
+ if (amdgpu_ip_version(adev, GC_HWIP, 0) >= IP_VERSION(10, 3, 0))
return 0;
/* init num_sc */
@@ -4812,6 +5171,29 @@ static u32 gfx_v10_0_init_pa_sc_tile_steering_override(struct amdgpu_device *ade
#define DEFAULT_SH_MEM_BASES (0x6000)
+static void gfx_v10_0_debug_trap_config_init(struct amdgpu_device *adev,
+ uint32_t first_vmid,
+ uint32_t last_vmid)
+{
+ uint32_t data;
+ uint32_t trap_config_vmid_mask = 0;
+ int i;
+
+ /* Calculate trap config vmid mask */
+ for (i = first_vmid; i < last_vmid; i++)
+ trap_config_vmid_mask |= (1 << i);
+
+ data = REG_SET_FIELD(0, SPI_GDBG_TRAP_CONFIG,
+ VMID_SEL, trap_config_vmid_mask);
+ data = REG_SET_FIELD(data, SPI_GDBG_TRAP_CONFIG,
+ TRAP_EN, 1);
+ WREG32(SOC15_REG_OFFSET(GC, 0, mmSPI_GDBG_TRAP_CONFIG), data);
+ WREG32(SOC15_REG_OFFSET(GC, 0, mmSPI_GDBG_TRAP_MASK), 0);
+
+ WREG32(SOC15_REG_OFFSET(GC, 0, mmSPI_GDBG_TRAP_DATA0), 0);
+ WREG32(SOC15_REG_OFFSET(GC, 0, mmSPI_GDBG_TRAP_DATA1), 0);
+}
+
static void gfx_v10_0_init_compute_vmid(struct amdgpu_device *adev)
{
int i;
@@ -4835,14 +5217,19 @@ static void gfx_v10_0_init_compute_vmid(struct amdgpu_device *adev)
nv_grbm_select(adev, 0, 0, 0, 0);
mutex_unlock(&adev->srbm_mutex);
- /* Initialize all compute VMIDs to have no GDS, GWS, or OA
- acccess. These should be enabled by FW for target VMIDs. */
+ /*
+ * Initialize all compute VMIDs to have no GDS, GWS, or OA
+ * access. These should be enabled by FW for target VMIDs.
+ */
for (i = adev->vm_manager.first_kfd_vmid; i < AMDGPU_NUM_VMID; i++) {
WREG32_SOC15_OFFSET(GC, 0, mmGDS_VMID0_BASE, 2 * i, 0);
WREG32_SOC15_OFFSET(GC, 0, mmGDS_VMID0_SIZE, 2 * i, 0);
WREG32_SOC15_OFFSET(GC, 0, mmGDS_GWS_VMID0, i, 0);
WREG32_SOC15_OFFSET(GC, 0, mmGDS_OA_VMID0, i, 0);
}
+
+ gfx_v10_0_debug_trap_config_init(adev, adev->vm_manager.first_kfd_vmid,
+ AMDGPU_NUM_VMID);
}
static void gfx_v10_0_init_gds_vmid(struct amdgpu_device *adev)
@@ -4891,47 +5278,44 @@ static void gfx_v10_0_tcp_harvest(struct amdgpu_device *adev)
4 + /* RMI */
1); /* SQG */
- if (adev->asic_type == CHIP_NAVI10 ||
- adev->asic_type == CHIP_NAVI14 ||
- adev->asic_type == CHIP_NAVI12) {
- mutex_lock(&adev->grbm_idx_mutex);
- for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
- for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
- gfx_v10_0_select_se_sh(adev, i, j, 0xffffffff);
- wgp_active_bitmap = gfx_v10_0_get_wgp_active_bitmap_per_sh(adev);
- /*
- * Set corresponding TCP bits for the inactive WGPs in
- * GCRD_SA_TARGETS_DISABLE
- */
- gcrd_targets_disable_tcp = 0;
- /* Set TCP & SQC bits in UTCL1_UTCL0_INVREQ_DISABLE */
- utcl_invreq_disable = 0;
-
- for (k = 0; k < max_wgp_per_sh; k++) {
- if (!(wgp_active_bitmap & (1 << k))) {
- gcrd_targets_disable_tcp |= 3 << (2 * k);
- utcl_invreq_disable |= (3 << (2 * k)) |
- (3 << (2 * (max_wgp_per_sh + k)));
- }
+ mutex_lock(&adev->grbm_idx_mutex);
+ for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
+ for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
+ gfx_v10_0_select_se_sh(adev, i, j, 0xffffffff, 0);
+ wgp_active_bitmap = gfx_v10_0_get_wgp_active_bitmap_per_sh(adev);
+ /*
+ * Set corresponding TCP bits for the inactive WGPs in
+ * GCRD_SA_TARGETS_DISABLE
+ */
+ gcrd_targets_disable_tcp = 0;
+ /* Set TCP & SQC bits in UTCL1_UTCL0_INVREQ_DISABLE */
+ utcl_invreq_disable = 0;
+
+ for (k = 0; k < max_wgp_per_sh; k++) {
+ if (!(wgp_active_bitmap & (1 << k))) {
+ gcrd_targets_disable_tcp |= 3 << (2 * k);
+ gcrd_targets_disable_tcp |= 1 << (k + (max_wgp_per_sh * 2));
+ utcl_invreq_disable |= (3 << (2 * k)) |
+ (3 << (2 * (max_wgp_per_sh + k)));
}
-
- tmp = RREG32_SOC15(GC, 0, mmUTCL1_UTCL0_INVREQ_DISABLE);
- /* only override TCP & SQC bits */
- tmp &= 0xffffffff << (4 * max_wgp_per_sh);
- tmp |= (utcl_invreq_disable & utcl_invreq_disable_mask);
- WREG32_SOC15(GC, 0, mmUTCL1_UTCL0_INVREQ_DISABLE, tmp);
-
- tmp = RREG32_SOC15(GC, 0, mmGCRD_SA_TARGETS_DISABLE);
- /* only override TCP bits */
- tmp &= 0xffffffff << (2 * max_wgp_per_sh);
- tmp |= (gcrd_targets_disable_tcp & gcrd_targets_disable_mask);
- WREG32_SOC15(GC, 0, mmGCRD_SA_TARGETS_DISABLE, tmp);
}
- }
- gfx_v10_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
- mutex_unlock(&adev->grbm_idx_mutex);
+ tmp = RREG32_SOC15(GC, 0, mmUTCL1_UTCL0_INVREQ_DISABLE);
+ /* only override TCP & SQC bits */
+ tmp &= (0xffffffffU << (4 * max_wgp_per_sh));
+ tmp |= (utcl_invreq_disable & utcl_invreq_disable_mask);
+ WREG32_SOC15(GC, 0, mmUTCL1_UTCL0_INVREQ_DISABLE, tmp);
+
+ tmp = RREG32_SOC15(GC, 0, mmGCRD_SA_TARGETS_DISABLE);
+ /* only override TCP & SQC bits */
+ tmp &= (0xffffffffU << (3 * max_wgp_per_sh));
+ tmp |= (gcrd_targets_disable_tcp & gcrd_targets_disable_mask);
+ WREG32_SOC15(GC, 0, mmGCRD_SA_TARGETS_DISABLE, tmp);
+ }
}
+
+ gfx_v10_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, 0);
+ mutex_unlock(&adev->grbm_idx_mutex);
}
static void gfx_v10_0_get_tcc_info(struct amdgpu_device *adev)
@@ -4939,7 +5323,7 @@ static void gfx_v10_0_get_tcc_info(struct amdgpu_device *adev)
/* TCCs are global (not instanced). */
uint32_t tcc_disable;
- if (adev->asic_type >= CHIP_SIENNA_CICHLID) {
+ if (amdgpu_ip_version(adev, GC_HWIP, 0) >= IP_VERSION(10, 3, 0)) {
tcc_disable = RREG32_SOC15(GC, 0, mmCGTS_TCC_DISABLE_gc_10_3) |
RREG32_SOC15(GC, 0, mmCGTS_USER_TCC_DISABLE_gc_10_3);
} else {
@@ -4957,7 +5341,8 @@ static void gfx_v10_0_constants_init(struct amdgpu_device *adev)
u32 tmp;
int i;
- WREG32_FIELD15(GC, 0, GRBM_CNTL, READ_TIMEOUT, 0xff);
+ if (!amdgpu_sriov_vf(adev))
+ WREG32_FIELD15(GC, 0, GRBM_CNTL, READ_TIMEOUT, 0xff);
gfx_v10_0_setup_rb(adev);
gfx_v10_0_get_cu_info(adev, &adev->gfx.cu_info);
@@ -4968,7 +5353,7 @@ static void gfx_v10_0_constants_init(struct amdgpu_device *adev)
/* XXX SH_MEM regs */
/* where to put LDS, scratch, GPUVM in FSA64 space */
mutex_lock(&adev->srbm_mutex);
- for (i = 0; i < adev->vm_manager.id_mgr[AMDGPU_GFXHUB_0].num_ids; i++) {
+ for (i = 0; i < adev->vm_manager.id_mgr[AMDGPU_GFXHUB(0)].num_ids; i++) {
nv_grbm_select(adev, 0, 0, 0, i);
/* CP and shaders */
WREG32_SOC15(GC, 0, mmSH_MEM_CONFIG, DEFAULT_SH_MEM_CONFIG);
@@ -4989,26 +5374,74 @@ static void gfx_v10_0_constants_init(struct amdgpu_device *adev)
}
+static u32 gfx_v10_0_get_cpg_int_cntl(struct amdgpu_device *adev,
+ int me, int pipe)
+{
+ if (me != 0)
+ return 0;
+
+ switch (pipe) {
+ case 0:
+ return SOC15_REG_OFFSET(GC, 0, mmCP_INT_CNTL_RING0);
+ case 1:
+ return SOC15_REG_OFFSET(GC, 0, mmCP_INT_CNTL_RING1);
+ default:
+ return 0;
+ }
+}
+
+static u32 gfx_v10_0_get_cpc_int_cntl(struct amdgpu_device *adev,
+ int me, int pipe)
+{
+ /*
+ * amdgpu controls only the first MEC. That's why this function only
+ * handles the setting of interrupts for this specific MEC. All other
+ * pipes' interrupts are set by amdkfd.
+ */
+ if (me != 1)
+ return 0;
+
+ switch (pipe) {
+ case 0:
+ return SOC15_REG_OFFSET(GC, 0, mmCP_ME1_PIPE0_INT_CNTL);
+ case 1:
+ return SOC15_REG_OFFSET(GC, 0, mmCP_ME1_PIPE1_INT_CNTL);
+ case 2:
+ return SOC15_REG_OFFSET(GC, 0, mmCP_ME1_PIPE2_INT_CNTL);
+ case 3:
+ return SOC15_REG_OFFSET(GC, 0, mmCP_ME1_PIPE3_INT_CNTL);
+ default:
+ return 0;
+ }
+}
+
static void gfx_v10_0_enable_gui_idle_interrupt(struct amdgpu_device *adev,
bool enable)
{
- u32 tmp;
+ u32 tmp, cp_int_cntl_reg;
+ int i, j;
if (amdgpu_sriov_vf(adev))
return;
- tmp = RREG32_SOC15(GC, 0, mmCP_INT_CNTL_RING0);
-
- tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_BUSY_INT_ENABLE,
- enable ? 1 : 0);
- tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_EMPTY_INT_ENABLE,
- enable ? 1 : 0);
- tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CMP_BUSY_INT_ENABLE,
- enable ? 1 : 0);
- tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, GFX_IDLE_INT_ENABLE,
- enable ? 1 : 0);
-
- WREG32_SOC15(GC, 0, mmCP_INT_CNTL_RING0, tmp);
+ for (i = 0; i < adev->gfx.me.num_me; i++) {
+ for (j = 0; j < adev->gfx.me.num_pipe_per_me; j++) {
+ cp_int_cntl_reg = gfx_v10_0_get_cpg_int_cntl(adev, i, j);
+
+ if (cp_int_cntl_reg) {
+ tmp = RREG32_SOC15_IP(GC, cp_int_cntl_reg);
+ tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_BUSY_INT_ENABLE,
+ enable ? 1 : 0);
+ tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_EMPTY_INT_ENABLE,
+ enable ? 1 : 0);
+ tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CMP_BUSY_INT_ENABLE,
+ enable ? 1 : 0);
+ tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, GFX_IDLE_INT_ENABLE,
+ enable ? 1 : 0);
+ WREG32_SOC15_IP(GC, cp_int_cntl_reg, tmp);
+ }
+ }
+ }
}
static int gfx_v10_0_init_csb(struct amdgpu_device *adev)
@@ -5016,7 +5449,7 @@ static int gfx_v10_0_init_csb(struct amdgpu_device *adev)
adev->gfx.rlc.funcs->get_csb_buffer(adev, adev->gfx.rlc.cs_ptr);
/* csib */
- if (adev->asic_type == CHIP_NAVI12) {
+ if (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(10, 1, 2)) {
WREG32_SOC15_RLC(GC, 0, mmRLC_CSIB_ADDR_HI,
adev->gfx.rlc.clear_state_gpu_addr >> 32);
WREG32_SOC15_RLC(GC, 0, mmRLC_CSIB_ADDR_LO,
@@ -5072,8 +5505,10 @@ static void gfx_v10_0_rlc_smu_handshake_cntl(struct amdgpu_device *adev,
static void gfx_v10_0_rlc_start(struct amdgpu_device *adev)
{
- /* TODO: enable rlc & smu handshake until smu
- * and gfxoff feature works as expected */
+ /*
+ * TODO: enable rlc & smu handshake until smu
+ * and gfxoff feature works as expected
+ */
if (!(amdgpu_pp_feature_mask & PP_GFXOFF_MASK))
gfx_v10_0_rlc_smu_handshake_cntl(adev, false);
@@ -5086,17 +5521,17 @@ static void gfx_v10_0_rlc_enable_srm(struct amdgpu_device *adev)
uint32_t tmp;
/* enable Save Restore Machine */
- tmp = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_CNTL));
+ tmp = RREG32_SOC15(GC, 0, mmRLC_SRM_CNTL);
tmp |= RLC_SRM_CNTL__AUTO_INCR_ADDR_MASK;
tmp |= RLC_SRM_CNTL__SRM_ENABLE_MASK;
- WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_CNTL), tmp);
+ WREG32_SOC15(GC, 0, mmRLC_SRM_CNTL, tmp);
}
static int gfx_v10_0_rlc_load_microcode(struct amdgpu_device *adev)
{
const struct rlc_firmware_header_v2_0 *hdr;
const __le32 *fw_data;
- unsigned i, fw_size;
+ unsigned int i, fw_size;
if (!adev->gfx.rlc_fw)
return -EINVAL;
@@ -5124,7 +5559,8 @@ static int gfx_v10_0_rlc_resume(struct amdgpu_device *adev)
{
int r;
- if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
+ if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP &&
+ adev->psp.autoload_supported) {
r = gfx_v10_0_wait_for_rlc_autoload_complete(adev);
if (r)
@@ -5132,6 +5568,8 @@ static int gfx_v10_0_rlc_resume(struct amdgpu_device *adev)
gfx_v10_0_init_csb(adev);
+ gfx_v10_0_update_spm_vmid_internal(adev, 0xf);
+
if (!amdgpu_sriov_vf(adev)) /* enable RLC SRM */
gfx_v10_0_rlc_enable_srm(adev);
} else {
@@ -5162,6 +5600,8 @@ static int gfx_v10_0_rlc_resume(struct amdgpu_device *adev)
gfx_v10_0_init_csb(adev);
+ gfx_v10_0_update_spm_vmid_internal(adev, 0xf);
+
adev->gfx.rlc.funcs->start(adev);
if (adev->firmware.load_type == AMDGPU_FW_LOAD_RLC_BACKDOOR_AUTO) {
@@ -5170,6 +5610,7 @@ static int gfx_v10_0_rlc_resume(struct amdgpu_device *adev)
return r;
}
}
+
return 0;
}
@@ -5184,7 +5625,7 @@ static int gfx_v10_0_parse_rlc_toc(struct amdgpu_device *adev)
int ret;
RLC_TABLE_OF_CONTENT *rlc_toc;
- ret = amdgpu_bo_create_reserved(adev, adev->psp.toc_bin_size, PAGE_SIZE,
+ ret = amdgpu_bo_create_reserved(adev, adev->psp.toc.size_bytes, PAGE_SIZE,
AMDGPU_GEM_DOMAIN_GTT,
&adev->gfx.rlc.rlc_toc_bo,
&adev->gfx.rlc.rlc_toc_gpu_addr,
@@ -5195,7 +5636,7 @@ static int gfx_v10_0_parse_rlc_toc(struct amdgpu_device *adev)
}
/* Copy toc from psp sos fw to rlc toc buffer */
- memcpy(adev->gfx.rlc.rlc_toc_buf, adev->psp.toc_start_addr, adev->psp.toc_bin_size);
+ memcpy(adev->gfx.rlc.rlc_toc_buf, adev->psp.toc.start_addr, adev->psp.toc.size_bytes);
rlc_toc = (RLC_TABLE_OF_CONTENT *)adev->gfx.rlc.rlc_toc_buf;
while (rlc_toc && (rlc_toc->id > FIRMWARE_ID_INVALID) &&
@@ -5637,11 +6078,13 @@ static int gfx_v10_0_cp_gfx_enable(struct amdgpu_device *adev, bool enable)
tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_HALT, enable ? 0 : 1);
tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, CE_HALT, enable ? 0 : 1);
- if (adev->asic_type == CHIP_NAVI12) {
+ if (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(10, 1, 2))
WREG32_SOC15_RLC(GC, 0, mmCP_ME_CNTL, tmp);
- } else {
+ else
WREG32_SOC15(GC, 0, mmCP_ME_CNTL, tmp);
- }
+
+ if (amdgpu_in_reset(adev) && !enable)
+ return 0;
for (i = 0; i < adev->usec_timeout; i++) {
if (RREG32_SOC15(GC, 0, mmCP_STAT) == 0)
@@ -5660,7 +6103,7 @@ static int gfx_v10_0_cp_gfx_load_pfp_microcode(struct amdgpu_device *adev)
int r;
const struct gfx_firmware_header_v1_0 *pfp_hdr;
const __le32 *fw_data;
- unsigned i, fw_size;
+ unsigned int i, fw_size;
uint32_t tmp;
uint32_t usec_timeout = 50000; /* wait for 50ms */
@@ -5709,7 +6152,7 @@ static int gfx_v10_0_cp_gfx_load_pfp_microcode(struct amdgpu_device *adev)
}
if (amdgpu_emu_mode == 1)
- adev->hdp.funcs->flush_hdp(adev, NULL);
+ amdgpu_device_flush_hdp(adev, NULL);
tmp = RREG32_SOC15(GC, 0, mmCP_PFP_IC_BASE_CNTL);
tmp = REG_SET_FIELD(tmp, CP_PFP_IC_BASE_CNTL, VMID, 0);
@@ -5738,7 +6181,7 @@ static int gfx_v10_0_cp_gfx_load_ce_microcode(struct amdgpu_device *adev)
int r;
const struct gfx_firmware_header_v1_0 *ce_hdr;
const __le32 *fw_data;
- unsigned i, fw_size;
+ unsigned int i, fw_size;
uint32_t tmp;
uint32_t usec_timeout = 50000; /* wait for 50ms */
@@ -5787,7 +6230,7 @@ static int gfx_v10_0_cp_gfx_load_ce_microcode(struct amdgpu_device *adev)
}
if (amdgpu_emu_mode == 1)
- adev->hdp.funcs->flush_hdp(adev, NULL);
+ amdgpu_device_flush_hdp(adev, NULL);
tmp = RREG32_SOC15(GC, 0, mmCP_CE_IC_BASE_CNTL);
tmp = REG_SET_FIELD(tmp, CP_CE_IC_BASE_CNTL, VMID, 0);
@@ -5815,7 +6258,7 @@ static int gfx_v10_0_cp_gfx_load_me_microcode(struct amdgpu_device *adev)
int r;
const struct gfx_firmware_header_v1_0 *me_hdr;
const __le32 *fw_data;
- unsigned i, fw_size;
+ unsigned int i, fw_size;
uint32_t tmp;
uint32_t usec_timeout = 50000; /* wait for 50ms */
@@ -5864,7 +6307,7 @@ static int gfx_v10_0_cp_gfx_load_me_microcode(struct amdgpu_device *adev)
}
if (amdgpu_emu_mode == 1)
- adev->hdp.funcs->flush_hdp(adev, NULL);
+ amdgpu_device_flush_hdp(adev, NULL);
tmp = RREG32_SOC15(GC, 0, mmCP_ME_IC_BASE_CNTL);
tmp = REG_SET_FIELD(tmp, CP_ME_IC_BASE_CNTL, VMID, 0);
@@ -6026,11 +6469,15 @@ static void gfx_v10_0_cp_gfx_set_doorbell(struct amdgpu_device *adev,
}
WREG32_SOC15(GC, 0, mmCP_RB_DOORBELL_CONTROL, tmp);
}
- switch (adev->asic_type) {
- case CHIP_SIENNA_CICHLID:
- case CHIP_NAVY_FLOUNDER:
- case CHIP_VANGOGH:
- case CHIP_DIMGREY_CAVEFISH:
+ switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
+ case IP_VERSION(10, 3, 0):
+ case IP_VERSION(10, 3, 2):
+ case IP_VERSION(10, 3, 1):
+ case IP_VERSION(10, 3, 4):
+ case IP_VERSION(10, 3, 5):
+ case IP_VERSION(10, 3, 6):
+ case IP_VERSION(10, 3, 3):
+ case IP_VERSION(10, 3, 7):
tmp = REG_SET_FIELD(0, CP_RB_DOORBELL_RANGE_LOWER,
DOORBELL_RANGE_LOWER_Sienna_Cichlid, ring->doorbell_index);
WREG32_SOC15(GC, 0, mmCP_RB_DOORBELL_RANGE_LOWER, tmp);
@@ -6055,7 +6502,6 @@ static int gfx_v10_0_cp_gfx_resume(struct amdgpu_device *adev)
u32 tmp;
u32 rb_bufsz;
u64 rb_addr, rptr_addr, wptr_gpu_addr;
- u32 i;
/* Set the write pointer delay */
WREG32_SOC15(GC, 0, mmCP_RB_WPTR_DELAY, 0);
@@ -6082,13 +6528,13 @@ static int gfx_v10_0_cp_gfx_resume(struct amdgpu_device *adev)
WREG32_SOC15(GC, 0, mmCP_RB0_WPTR, lower_32_bits(ring->wptr));
WREG32_SOC15(GC, 0, mmCP_RB0_WPTR_HI, upper_32_bits(ring->wptr));
- /* set the wb address wether it's enabled or not */
- rptr_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4);
+ /* set the wb address whether it's enabled or not */
+ rptr_addr = ring->rptr_gpu_addr;
WREG32_SOC15(GC, 0, mmCP_RB0_RPTR_ADDR, lower_32_bits(rptr_addr));
WREG32_SOC15(GC, 0, mmCP_RB0_RPTR_ADDR_HI, upper_32_bits(rptr_addr) &
CP_RB_RPTR_ADDR_HI__RB_RPTR_ADDR_HI_MASK);
- wptr_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
+ wptr_gpu_addr = ring->wptr_gpu_addr;
WREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_ADDR_LO,
lower_32_bits(wptr_gpu_addr));
WREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_ADDR_HI,
@@ -6120,12 +6566,12 @@ static int gfx_v10_0_cp_gfx_resume(struct amdgpu_device *adev)
ring->wptr = 0;
WREG32_SOC15(GC, 0, mmCP_RB1_WPTR, lower_32_bits(ring->wptr));
WREG32_SOC15(GC, 0, mmCP_RB1_WPTR_HI, upper_32_bits(ring->wptr));
- /* Set the wb address wether it's enabled or not */
- rptr_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4);
+ /* Set the wb address whether it's enabled or not */
+ rptr_addr = ring->rptr_gpu_addr;
WREG32_SOC15(GC, 0, mmCP_RB1_RPTR_ADDR, lower_32_bits(rptr_addr));
WREG32_SOC15(GC, 0, mmCP_RB1_RPTR_ADDR_HI, upper_32_bits(rptr_addr) &
CP_RB1_RPTR_ADDR_HI__RB_RPTR_ADDR_HI_MASK);
- wptr_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
+ wptr_gpu_addr = ring->wptr_gpu_addr;
WREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_ADDR_LO,
lower_32_bits(wptr_gpu_addr));
WREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_ADDR_HI,
@@ -6150,22 +6596,21 @@ static int gfx_v10_0_cp_gfx_resume(struct amdgpu_device *adev)
/* start the ring */
gfx_v10_0_cp_gfx_start(adev);
- for (i = 0; i < adev->gfx.num_gfx_rings; i++) {
- ring = &adev->gfx.gfx_ring[i];
- ring->sched.ready = true;
- }
-
return 0;
}
static void gfx_v10_0_cp_compute_enable(struct amdgpu_device *adev, bool enable)
{
if (enable) {
- switch (adev->asic_type) {
- case CHIP_SIENNA_CICHLID:
- case CHIP_NAVY_FLOUNDER:
- case CHIP_VANGOGH:
- case CHIP_DIMGREY_CAVEFISH:
+ switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
+ case IP_VERSION(10, 3, 0):
+ case IP_VERSION(10, 3, 2):
+ case IP_VERSION(10, 3, 1):
+ case IP_VERSION(10, 3, 4):
+ case IP_VERSION(10, 3, 5):
+ case IP_VERSION(10, 3, 6):
+ case IP_VERSION(10, 3, 3):
+ case IP_VERSION(10, 3, 7):
WREG32_SOC15(GC, 0, mmCP_MEC_CNTL_Sienna_Cichlid, 0);
break;
default:
@@ -6173,11 +6618,15 @@ static void gfx_v10_0_cp_compute_enable(struct amdgpu_device *adev, bool enable)
break;
}
} else {
- switch (adev->asic_type) {
- case CHIP_SIENNA_CICHLID:
- case CHIP_NAVY_FLOUNDER:
- case CHIP_VANGOGH:
- case CHIP_DIMGREY_CAVEFISH:
+ switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
+ case IP_VERSION(10, 3, 0):
+ case IP_VERSION(10, 3, 2):
+ case IP_VERSION(10, 3, 1):
+ case IP_VERSION(10, 3, 4):
+ case IP_VERSION(10, 3, 5):
+ case IP_VERSION(10, 3, 6):
+ case IP_VERSION(10, 3, 3):
+ case IP_VERSION(10, 3, 7):
WREG32_SOC15(GC, 0, mmCP_MEC_CNTL_Sienna_Cichlid,
(CP_MEC_CNTL__MEC_ME1_HALT_MASK |
CP_MEC_CNTL__MEC_ME2_HALT_MASK));
@@ -6188,7 +6637,7 @@ static void gfx_v10_0_cp_compute_enable(struct amdgpu_device *adev, bool enable)
CP_MEC_CNTL__MEC_ME2_HALT_MASK));
break;
}
- adev->gfx.kiq.ring.sched.ready = false;
+ adev->gfx.kiq[0].ring.sched.ready = false;
}
udelay(50);
}
@@ -6197,7 +6646,7 @@ static int gfx_v10_0_cp_compute_load_microcode(struct amdgpu_device *adev)
{
const struct gfx_firmware_header_v1_0 *mec_hdr;
const __le32 *fw_data;
- unsigned i;
+ unsigned int i;
u32 tmp;
u32 usec_timeout = 50000; /* Wait for 50 ms */
@@ -6233,7 +6682,7 @@ static int gfx_v10_0_cp_compute_load_microcode(struct amdgpu_device *adev)
}
if (amdgpu_emu_mode == 1)
- adev->hdp.funcs->flush_hdp(adev, NULL);
+ amdgpu_device_flush_hdp(adev, NULL);
tmp = RREG32_SOC15(GC, 0, mmCP_CPC_IC_BASE_CNTL);
tmp = REG_SET_FIELD(tmp, CP_CPC_IC_BASE_CNTL, CACHE_POLICY, 0);
@@ -6269,33 +6718,51 @@ static void gfx_v10_0_kiq_setting(struct amdgpu_ring *ring)
struct amdgpu_device *adev = ring->adev;
/* tell RLC which is KIQ queue */
- switch (adev->asic_type) {
- case CHIP_SIENNA_CICHLID:
- case CHIP_NAVY_FLOUNDER:
- case CHIP_VANGOGH:
- case CHIP_DIMGREY_CAVEFISH:
+ switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
+ case IP_VERSION(10, 3, 0):
+ case IP_VERSION(10, 3, 2):
+ case IP_VERSION(10, 3, 1):
+ case IP_VERSION(10, 3, 4):
+ case IP_VERSION(10, 3, 5):
+ case IP_VERSION(10, 3, 6):
+ case IP_VERSION(10, 3, 3):
+ case IP_VERSION(10, 3, 7):
tmp = RREG32_SOC15(GC, 0, mmRLC_CP_SCHEDULERS_Sienna_Cichlid);
tmp &= 0xffffff00;
tmp |= (ring->me << 5) | (ring->pipe << 3) | (ring->queue);
- WREG32_SOC15(GC, 0, mmRLC_CP_SCHEDULERS_Sienna_Cichlid, tmp);
- tmp |= 0x80;
- WREG32_SOC15(GC, 0, mmRLC_CP_SCHEDULERS_Sienna_Cichlid, tmp);
+ WREG32_SOC15(GC, 0, mmRLC_CP_SCHEDULERS_Sienna_Cichlid, tmp | 0x80);
break;
default:
tmp = RREG32_SOC15(GC, 0, mmRLC_CP_SCHEDULERS);
tmp &= 0xffffff00;
tmp |= (ring->me << 5) | (ring->pipe << 3) | (ring->queue);
- WREG32_SOC15(GC, 0, mmRLC_CP_SCHEDULERS, tmp);
- tmp |= 0x80;
- WREG32_SOC15(GC, 0, mmRLC_CP_SCHEDULERS, tmp);
+ WREG32_SOC15(GC, 0, mmRLC_CP_SCHEDULERS, tmp | 0x80);
break;
}
}
-static int gfx_v10_0_gfx_mqd_init(struct amdgpu_ring *ring)
+static void gfx_v10_0_gfx_mqd_set_priority(struct amdgpu_device *adev,
+ struct v10_gfx_mqd *mqd,
+ struct amdgpu_mqd_prop *prop)
{
- struct amdgpu_device *adev = ring->adev;
- struct v10_gfx_mqd *mqd = ring->mqd_ptr;
+ bool priority = 0;
+ u32 tmp;
+
+ /* set up default queue priority level
+ * 0x0 = low priority, 0x1 = high priority
+ */
+ if (prop->hqd_pipe_priority == AMDGPU_GFX_PIPE_PRIO_HIGH)
+ priority = 1;
+
+ tmp = RREG32_SOC15(GC, 0, mmCP_GFX_HQD_QUEUE_PRIORITY);
+ tmp = REG_SET_FIELD(tmp, CP_GFX_HQD_QUEUE_PRIORITY, PRIORITY_LEVEL, priority);
+ mqd->cp_gfx_hqd_queue_priority = tmp;
+}
+
+static int gfx_v10_0_gfx_mqd_init(struct amdgpu_device *adev, void *m,
+ struct amdgpu_mqd_prop *prop)
+{
+ struct v10_gfx_mqd *mqd = m;
uint64_t hqd_gpu_addr, wb_gpu_addr;
uint32_t tmp;
uint32_t rb_bufsz;
@@ -6305,8 +6772,8 @@ static int gfx_v10_0_gfx_mqd_init(struct amdgpu_ring *ring)
mqd->cp_gfx_hqd_wptr_hi = 0;
/* set the pointer to the MQD */
- mqd->cp_mqd_base_addr = ring->mqd_gpu_addr & 0xfffffffc;
- mqd->cp_mqd_base_addr_hi = upper_32_bits(ring->mqd_gpu_addr);
+ mqd->cp_mqd_base_addr = prop->mqd_gpu_addr & 0xfffffffc;
+ mqd->cp_mqd_base_addr_hi = upper_32_bits(prop->mqd_gpu_addr);
/* set up mqd control */
tmp = RREG32_SOC15(GC, 0, mmCP_GFX_MQD_CONTROL);
@@ -6320,11 +6787,8 @@ static int gfx_v10_0_gfx_mqd_init(struct amdgpu_ring *ring)
tmp = REG_SET_FIELD(tmp, CP_GFX_HQD_VMID, VMID, 0);
mqd->cp_gfx_hqd_vmid = 0;
- /* set up default queue priority level
- * 0x0 = low priority, 0x1 = high priority */
- tmp = RREG32_SOC15(GC, 0, mmCP_GFX_HQD_QUEUE_PRIORITY);
- tmp = REG_SET_FIELD(tmp, CP_GFX_HQD_QUEUE_PRIORITY, PRIORITY_LEVEL, 0);
- mqd->cp_gfx_hqd_queue_priority = tmp;
+ /* set up gfx queue priority */
+ gfx_v10_0_gfx_mqd_set_priority(adev, mqd, prop);
/* set up time quantum */
tmp = RREG32_SOC15(GC, 0, mmCP_GFX_HQD_QUANTUM);
@@ -6332,23 +6796,23 @@ static int gfx_v10_0_gfx_mqd_init(struct amdgpu_ring *ring)
mqd->cp_gfx_hqd_quantum = tmp;
/* set up gfx hqd base. this is similar as CP_RB_BASE */
- hqd_gpu_addr = ring->gpu_addr >> 8;
+ hqd_gpu_addr = prop->hqd_base_gpu_addr >> 8;
mqd->cp_gfx_hqd_base = hqd_gpu_addr;
mqd->cp_gfx_hqd_base_hi = upper_32_bits(hqd_gpu_addr);
/* set up hqd_rptr_addr/_hi, similar as CP_RB_RPTR */
- wb_gpu_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4);
+ wb_gpu_addr = prop->rptr_gpu_addr;
mqd->cp_gfx_hqd_rptr_addr = wb_gpu_addr & 0xfffffffc;
mqd->cp_gfx_hqd_rptr_addr_hi =
upper_32_bits(wb_gpu_addr) & 0xffff;
/* set up rb_wptr_poll addr */
- wb_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
+ wb_gpu_addr = prop->wptr_gpu_addr;
mqd->cp_rb_wptr_poll_addr_lo = wb_gpu_addr & 0xfffffffc;
mqd->cp_rb_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff;
/* set up the gfx_hqd_control, similar as CP_RB0_CNTL */
- rb_bufsz = order_base_2(ring->ring_size / 4) - 1;
+ rb_bufsz = order_base_2(prop->queue_size / 4) - 1;
tmp = RREG32_SOC15(GC, 0, mmCP_GFX_HQD_CNTL);
tmp = REG_SET_FIELD(tmp, CP_GFX_HQD_CNTL, RB_BUFSZ, rb_bufsz);
tmp = REG_SET_FIELD(tmp, CP_GFX_HQD_CNTL, RB_BLKSZ, rb_bufsz - 2);
@@ -6359,9 +6823,9 @@ static int gfx_v10_0_gfx_mqd_init(struct amdgpu_ring *ring)
/* set up cp_doorbell_control */
tmp = RREG32_SOC15(GC, 0, mmCP_RB_DOORBELL_CONTROL);
- if (ring->use_doorbell) {
+ if (prop->use_doorbell) {
tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
- DOORBELL_OFFSET, ring->doorbell_index);
+ DOORBELL_OFFSET, prop->doorbell_index);
tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
DOORBELL_EN, 1);
} else
@@ -6369,13 +6833,7 @@ static int gfx_v10_0_gfx_mqd_init(struct amdgpu_ring *ring)
DOORBELL_EN, 0);
mqd->cp_rb_doorbell_control = tmp;
- /*if there are 2 gfx rings, set the lower doorbell range of the first ring,
- *otherwise the range of the second ring will override the first ring */
- if (ring->doorbell_index == adev->doorbell_index.gfx_ring0 << 1)
- gfx_v10_0_cp_gfx_set_doorbell(adev, ring);
-
/* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */
- ring->wptr = 0;
mqd->cp_gfx_hqd_rptr = RREG32_SOC15(GC, 0, mmCP_GFX_HQD_RPTR);
/* active the queue */
@@ -6384,175 +6842,71 @@ static int gfx_v10_0_gfx_mqd_init(struct amdgpu_ring *ring)
return 0;
}
-#ifdef BRING_UP_DEBUG
-static int gfx_v10_0_gfx_queue_init_register(struct amdgpu_ring *ring)
-{
- struct amdgpu_device *adev = ring->adev;
- struct v10_gfx_mqd *mqd = ring->mqd_ptr;
-
- /* set mmCP_GFX_HQD_WPTR/_HI to 0 */
- WREG32_SOC15(GC, 0, mmCP_GFX_HQD_WPTR, mqd->cp_gfx_hqd_wptr);
- WREG32_SOC15(GC, 0, mmCP_GFX_HQD_WPTR_HI, mqd->cp_gfx_hqd_wptr_hi);
-
- /* set GFX_MQD_BASE */
- WREG32_SOC15(GC, 0, mmCP_MQD_BASE_ADDR, mqd->cp_mqd_base_addr);
- WREG32_SOC15(GC, 0, mmCP_MQD_BASE_ADDR_HI, mqd->cp_mqd_base_addr_hi);
-
- /* set GFX_MQD_CONTROL */
- WREG32_SOC15(GC, 0, mmCP_GFX_MQD_CONTROL, mqd->cp_gfx_mqd_control);
-
- /* set GFX_HQD_VMID to 0 */
- WREG32_SOC15(GC, 0, mmCP_GFX_HQD_VMID, mqd->cp_gfx_hqd_vmid);
-
- WREG32_SOC15(GC, 0, mmCP_GFX_HQD_QUEUE_PRIORITY,
- mqd->cp_gfx_hqd_queue_priority);
- WREG32_SOC15(GC, 0, mmCP_GFX_HQD_QUANTUM, mqd->cp_gfx_hqd_quantum);
-
- /* set GFX_HQD_BASE, similar as CP_RB_BASE */
- WREG32_SOC15(GC, 0, mmCP_GFX_HQD_BASE, mqd->cp_gfx_hqd_base);
- WREG32_SOC15(GC, 0, mmCP_GFX_HQD_BASE_HI, mqd->cp_gfx_hqd_base_hi);
-
- /* set GFX_HQD_RPTR_ADDR, similar as CP_RB_RPTR */
- WREG32_SOC15(GC, 0, mmCP_GFX_HQD_RPTR_ADDR, mqd->cp_gfx_hqd_rptr_addr);
- WREG32_SOC15(GC, 0, mmCP_GFX_HQD_RPTR_ADDR_HI, mqd->cp_gfx_hqd_rptr_addr_hi);
-
- /* set GFX_HQD_CNTL, similar as CP_RB_CNTL */
- WREG32_SOC15(GC, 0, mmCP_GFX_HQD_CNTL, mqd->cp_gfx_hqd_cntl);
-
- /* set RB_WPTR_POLL_ADDR */
- WREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_ADDR_LO, mqd->cp_rb_wptr_poll_addr_lo);
- WREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_ADDR_HI, mqd->cp_rb_wptr_poll_addr_hi);
-
- /* set RB_DOORBELL_CONTROL */
- WREG32_SOC15(GC, 0, mmCP_RB_DOORBELL_CONTROL, mqd->cp_rb_doorbell_control);
-
- /* active the queue */
- WREG32_SOC15(GC, 0, mmCP_GFX_HQD_ACTIVE, mqd->cp_gfx_hqd_active);
-
- return 0;
-}
-#endif
-
-static int gfx_v10_0_gfx_init_queue(struct amdgpu_ring *ring)
+static int gfx_v10_0_kgq_init_queue(struct amdgpu_ring *ring, bool reset)
{
struct amdgpu_device *adev = ring->adev;
struct v10_gfx_mqd *mqd = ring->mqd_ptr;
int mqd_idx = ring - &adev->gfx.gfx_ring[0];
- if (!amdgpu_in_reset(adev) && !adev->in_suspend) {
+ if (!reset && !amdgpu_in_reset(adev) && !adev->in_suspend) {
memset((void *)mqd, 0, sizeof(*mqd));
mutex_lock(&adev->srbm_mutex);
nv_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
- gfx_v10_0_gfx_mqd_init(ring);
-#ifdef BRING_UP_DEBUG
- gfx_v10_0_gfx_queue_init_register(ring);
-#endif
+ amdgpu_ring_init_mqd(ring);
+
+ /*
+ * if there are 2 gfx rings, set the lower doorbell
+ * range of the first ring, otherwise the range of
+ * the second ring will override the first ring
+ */
+ if (ring->doorbell_index == adev->doorbell_index.gfx_ring0 << 1)
+ gfx_v10_0_cp_gfx_set_doorbell(adev, ring);
+
nv_grbm_select(adev, 0, 0, 0, 0);
mutex_unlock(&adev->srbm_mutex);
if (adev->gfx.me.mqd_backup[mqd_idx])
- memcpy(adev->gfx.me.mqd_backup[mqd_idx], mqd, sizeof(*mqd));
- } else if (amdgpu_in_reset(adev)) {
- /* reset mqd with the backup copy */
- if (adev->gfx.me.mqd_backup[mqd_idx])
- memcpy(mqd, adev->gfx.me.mqd_backup[mqd_idx], sizeof(*mqd));
- /* reset the ring */
- ring->wptr = 0;
- adev->wb.wb[ring->wptr_offs] = 0;
- amdgpu_ring_clear_ring(ring);
-#ifdef BRING_UP_DEBUG
+ memcpy_fromio(adev->gfx.me.mqd_backup[mqd_idx], mqd, sizeof(*mqd));
+ } else {
mutex_lock(&adev->srbm_mutex);
nv_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
- gfx_v10_0_gfx_queue_init_register(ring);
+ if (ring->doorbell_index == adev->doorbell_index.gfx_ring0 << 1)
+ gfx_v10_0_cp_gfx_set_doorbell(adev, ring);
+
nv_grbm_select(adev, 0, 0, 0, 0);
mutex_unlock(&adev->srbm_mutex);
-#endif
- } else {
+ /* restore mqd with the backup copy */
+ if (adev->gfx.me.mqd_backup[mqd_idx])
+ memcpy_toio(mqd, adev->gfx.me.mqd_backup[mqd_idx], sizeof(*mqd));
+ /* reset the ring */
+ ring->wptr = 0;
+ *ring->wptr_cpu_addr = 0;
amdgpu_ring_clear_ring(ring);
}
return 0;
}
-#ifndef BRING_UP_DEBUG
-static int gfx_v10_0_kiq_enable_kgq(struct amdgpu_device *adev)
-{
- struct amdgpu_kiq *kiq = &adev->gfx.kiq;
- struct amdgpu_ring *kiq_ring = &adev->gfx.kiq.ring;
- int r, i;
-
- if (!kiq->pmf || !kiq->pmf->kiq_map_queues)
- return -EINVAL;
-
- r = amdgpu_ring_alloc(kiq_ring, kiq->pmf->map_queues_size *
- adev->gfx.num_gfx_rings);
- if (r) {
- DRM_ERROR("Failed to lock KIQ (%d).\n", r);
- return r;
- }
-
- for (i = 0; i < adev->gfx.num_gfx_rings; i++)
- kiq->pmf->kiq_map_queues(kiq_ring, &adev->gfx.gfx_ring[i]);
-
- return amdgpu_ring_test_helper(kiq_ring);
-}
-#endif
-
static int gfx_v10_0_cp_async_gfx_ring_resume(struct amdgpu_device *adev)
{
int r, i;
- struct amdgpu_ring *ring;
for (i = 0; i < adev->gfx.num_gfx_rings; i++) {
- ring = &adev->gfx.gfx_ring[i];
-
- r = amdgpu_bo_reserve(ring->mqd_obj, false);
- if (unlikely(r != 0))
- goto done;
-
- r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&ring->mqd_ptr);
- if (!r) {
- r = gfx_v10_0_gfx_init_queue(ring);
- amdgpu_bo_kunmap(ring->mqd_obj);
- ring->mqd_ptr = NULL;
- }
- amdgpu_bo_unreserve(ring->mqd_obj);
+ r = gfx_v10_0_kgq_init_queue(&adev->gfx.gfx_ring[i], false);
if (r)
- goto done;
- }
-#ifndef BRING_UP_DEBUG
- r = gfx_v10_0_kiq_enable_kgq(adev);
- if (r)
- goto done;
-#endif
- r = gfx_v10_0_cp_gfx_start(adev);
- if (r)
- goto done;
-
- for (i = 0; i < adev->gfx.num_gfx_rings; i++) {
- ring = &adev->gfx.gfx_ring[i];
- ring->sched.ready = true;
+ return r;
}
-done:
- return r;
-}
-static void gfx_v10_0_compute_mqd_set_priority(struct amdgpu_ring *ring, struct v10_compute_mqd *mqd)
-{
- struct amdgpu_device *adev = ring->adev;
+ r = amdgpu_gfx_enable_kgq(adev, 0);
+ if (r)
+ return r;
- if (ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE) {
- if (amdgpu_gfx_is_high_priority_compute_queue(adev, ring)) {
- mqd->cp_hqd_pipe_priority = AMDGPU_GFX_PIPE_PRIO_HIGH;
- mqd->cp_hqd_queue_priority =
- AMDGPU_GFX_QUEUE_PRIORITY_MAXIMUM;
- }
- }
+ return gfx_v10_0_cp_gfx_start(adev);
}
-static int gfx_v10_0_compute_mqd_init(struct amdgpu_ring *ring)
+static int gfx_v10_0_compute_mqd_init(struct amdgpu_device *adev, void *m,
+ struct amdgpu_mqd_prop *prop)
{
- struct amdgpu_device *adev = ring->adev;
- struct v10_compute_mqd *mqd = ring->mqd_ptr;
+ struct v10_compute_mqd *mqd = m;
uint64_t hqd_gpu_addr, wb_gpu_addr, eop_base_addr;
uint32_t tmp;
@@ -6564,7 +6918,7 @@ static int gfx_v10_0_compute_mqd_init(struct amdgpu_ring *ring)
mqd->compute_static_thread_mgmt_se3 = 0xffffffff;
mqd->compute_misc_reserved = 0x00000003;
- eop_base_addr = ring->eop_gpu_addr >> 8;
+ eop_base_addr = prop->eop_gpu_addr >> 8;
mqd->cp_hqd_eop_base_addr_lo = eop_base_addr;
mqd->cp_hqd_eop_base_addr_hi = upper_32_bits(eop_base_addr);
@@ -6578,9 +6932,9 @@ static int gfx_v10_0_compute_mqd_init(struct amdgpu_ring *ring)
/* enable doorbell? */
tmp = RREG32_SOC15(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL);
- if (ring->use_doorbell) {
+ if (prop->use_doorbell) {
tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
- DOORBELL_OFFSET, ring->doorbell_index);
+ DOORBELL_OFFSET, prop->doorbell_index);
tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
DOORBELL_EN, 1);
tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
@@ -6595,15 +6949,14 @@ static int gfx_v10_0_compute_mqd_init(struct amdgpu_ring *ring)
mqd->cp_hqd_pq_doorbell_control = tmp;
/* disable the queue if it's active */
- ring->wptr = 0;
mqd->cp_hqd_dequeue_request = 0;
mqd->cp_hqd_pq_rptr = 0;
mqd->cp_hqd_pq_wptr_lo = 0;
mqd->cp_hqd_pq_wptr_hi = 0;
/* set the pointer to the MQD */
- mqd->cp_mqd_base_addr_lo = ring->mqd_gpu_addr & 0xfffffffc;
- mqd->cp_mqd_base_addr_hi = upper_32_bits(ring->mqd_gpu_addr);
+ mqd->cp_mqd_base_addr_lo = prop->mqd_gpu_addr & 0xfffffffc;
+ mqd->cp_mqd_base_addr_hi = upper_32_bits(prop->mqd_gpu_addr);
/* set MQD vmid to 0 */
tmp = RREG32_SOC15(GC, 0, mmCP_MQD_CONTROL);
@@ -6611,55 +6964,38 @@ static int gfx_v10_0_compute_mqd_init(struct amdgpu_ring *ring)
mqd->cp_mqd_control = tmp;
/* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
- hqd_gpu_addr = ring->gpu_addr >> 8;
+ hqd_gpu_addr = prop->hqd_base_gpu_addr >> 8;
mqd->cp_hqd_pq_base_lo = hqd_gpu_addr;
mqd->cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr);
/* set up the HQD, this is similar to CP_RB0_CNTL */
tmp = RREG32_SOC15(GC, 0, mmCP_HQD_PQ_CONTROL);
tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, QUEUE_SIZE,
- (order_base_2(ring->ring_size / 4) - 1));
+ (order_base_2(prop->queue_size / 4) - 1));
tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, RPTR_BLOCK_SIZE,
- ((order_base_2(AMDGPU_GPU_PAGE_SIZE / 4) - 1) << 8));
+ (order_base_2(AMDGPU_GPU_PAGE_SIZE / 4) - 1));
#ifdef __BIG_ENDIAN
tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ENDIAN_SWAP, 1);
#endif
- tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, UNORD_DISPATCH, 0);
- tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, TUNNEL_DISPATCH, 0);
+ tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, UNORD_DISPATCH, 1);
+ tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, TUNNEL_DISPATCH,
+ prop->allow_tunneling);
tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, PRIV_STATE, 1);
tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, KMD_QUEUE, 1);
mqd->cp_hqd_pq_control = tmp;
/* set the wb address whether it's enabled or not */
- wb_gpu_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4);
+ wb_gpu_addr = prop->rptr_gpu_addr;
mqd->cp_hqd_pq_rptr_report_addr_lo = wb_gpu_addr & 0xfffffffc;
mqd->cp_hqd_pq_rptr_report_addr_hi =
upper_32_bits(wb_gpu_addr) & 0xffff;
/* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */
- wb_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
+ wb_gpu_addr = prop->wptr_gpu_addr;
mqd->cp_hqd_pq_wptr_poll_addr_lo = wb_gpu_addr & 0xfffffffc;
mqd->cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff;
- tmp = 0;
- /* enable the doorbell if requested */
- if (ring->use_doorbell) {
- tmp = RREG32_SOC15(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL);
- tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
- DOORBELL_OFFSET, ring->doorbell_index);
-
- tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
- DOORBELL_EN, 1);
- tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
- DOORBELL_SOURCE, 0);
- tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
- DOORBELL_HIT, 0);
- }
-
- mqd->cp_hqd_pq_doorbell_control = tmp;
-
/* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */
- ring->wptr = 0;
mqd->cp_hqd_pq_rptr = RREG32_SOC15(GC, 0, mmCP_HQD_PQ_RPTR);
/* set the vmid for the queue */
@@ -6675,13 +7011,10 @@ static int gfx_v10_0_compute_mqd_init(struct amdgpu_ring *ring)
mqd->cp_hqd_ib_control = tmp;
/* set static priority for a compute queue/ring */
- gfx_v10_0_compute_mqd_set_priority(ring, mqd);
+ mqd->cp_hqd_pipe_priority = prop->hqd_pipe_priority;
+ mqd->cp_hqd_queue_priority = prop->hqd_queue_priority;
- /* map_queues packet doesn't need activate the queue,
- * so only kiq need set this field.
- */
- if (ring->funcs->type == AMDGPU_RING_TYPE_KIQ)
- mqd->cp_hqd_active = 1;
+ mqd->cp_hqd_active = prop->hqd_active;
return 0;
}
@@ -6699,20 +7032,6 @@ static int gfx_v10_0_kiq_init_register(struct amdgpu_ring *ring)
/* disable wptr polling */
WREG32_FIELD15(GC, 0, CP_PQ_WPTR_POLL_CNTL, EN, 0);
- /* write the EOP addr */
- WREG32_SOC15(GC, 0, mmCP_HQD_EOP_BASE_ADDR,
- mqd->cp_hqd_eop_base_addr_lo);
- WREG32_SOC15(GC, 0, mmCP_HQD_EOP_BASE_ADDR_HI,
- mqd->cp_hqd_eop_base_addr_hi);
-
- /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
- WREG32_SOC15(GC, 0, mmCP_HQD_EOP_CONTROL,
- mqd->cp_hqd_eop_control);
-
- /* enable doorbell? */
- WREG32_SOC15(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL,
- mqd->cp_hqd_pq_doorbell_control);
-
/* disable the queue if it's active */
if (RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE) & 1) {
WREG32_SOC15(GC, 0, mmCP_HQD_DEQUEUE_REQUEST, 1);
@@ -6731,6 +7050,19 @@ static int gfx_v10_0_kiq_init_register(struct amdgpu_ring *ring)
mqd->cp_hqd_pq_wptr_hi);
}
+ /* disable doorbells */
+ WREG32_SOC15(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL, 0);
+
+ /* write the EOP addr */
+ WREG32_SOC15(GC, 0, mmCP_HQD_EOP_BASE_ADDR,
+ mqd->cp_hqd_eop_base_addr_lo);
+ WREG32_SOC15(GC, 0, mmCP_HQD_EOP_BASE_ADDR_HI,
+ mqd->cp_hqd_eop_base_addr_hi);
+
+ /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
+ WREG32_SOC15(GC, 0, mmCP_HQD_EOP_CONTROL,
+ mqd->cp_hqd_eop_control);
+
/* set the pointer to the MQD */
WREG32_SOC15(GC, 0, mmCP_MQD_BASE_ADDR,
mqd->cp_mqd_base_addr_lo);
@@ -6800,14 +7132,13 @@ static int gfx_v10_0_kiq_init_queue(struct amdgpu_ring *ring)
{
struct amdgpu_device *adev = ring->adev;
struct v10_compute_mqd *mqd = ring->mqd_ptr;
- int mqd_idx = AMDGPU_MAX_COMPUTE_RINGS;
gfx_v10_0_kiq_setting(ring);
if (amdgpu_in_reset(adev)) { /* for GPU_RESET case */
/* reset MQD to a clean status */
- if (adev->gfx.mec.mqd_backup[mqd_idx])
- memcpy(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(*mqd));
+ if (adev->gfx.kiq[0].mqd_backup)
+ memcpy_toio(mqd, adev->gfx.kiq[0].mqd_backup, sizeof(*mqd));
/* reset ring buffer */
ring->wptr = 0;
@@ -6820,46 +7151,45 @@ static int gfx_v10_0_kiq_init_queue(struct amdgpu_ring *ring)
mutex_unlock(&adev->srbm_mutex);
} else {
memset((void *)mqd, 0, sizeof(*mqd));
+ if (amdgpu_sriov_vf(adev) && adev->in_suspend)
+ amdgpu_ring_clear_ring(ring);
mutex_lock(&adev->srbm_mutex);
nv_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
- gfx_v10_0_compute_mqd_init(ring);
+ amdgpu_ring_init_mqd(ring);
gfx_v10_0_kiq_init_register(ring);
nv_grbm_select(adev, 0, 0, 0, 0);
mutex_unlock(&adev->srbm_mutex);
- if (adev->gfx.mec.mqd_backup[mqd_idx])
- memcpy(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(*mqd));
+ if (adev->gfx.kiq[0].mqd_backup)
+ memcpy_fromio(adev->gfx.kiq[0].mqd_backup, mqd, sizeof(*mqd));
}
return 0;
}
-static int gfx_v10_0_kcq_init_queue(struct amdgpu_ring *ring)
+static int gfx_v10_0_kcq_init_queue(struct amdgpu_ring *ring, bool restore)
{
struct amdgpu_device *adev = ring->adev;
struct v10_compute_mqd *mqd = ring->mqd_ptr;
int mqd_idx = ring - &adev->gfx.compute_ring[0];
- if (!amdgpu_in_reset(adev) && !adev->in_suspend) {
+ if (!restore && !amdgpu_in_reset(adev) && !adev->in_suspend) {
memset((void *)mqd, 0, sizeof(*mqd));
mutex_lock(&adev->srbm_mutex);
nv_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
- gfx_v10_0_compute_mqd_init(ring);
+ amdgpu_ring_init_mqd(ring);
nv_grbm_select(adev, 0, 0, 0, 0);
mutex_unlock(&adev->srbm_mutex);
if (adev->gfx.mec.mqd_backup[mqd_idx])
- memcpy(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(*mqd));
- } else if (amdgpu_in_reset(adev)) { /* for GPU_RESET case */
- /* reset MQD to a clean status */
+ memcpy_fromio(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(*mqd));
+ } else {
+ /* restore MQD to a clean status */
if (adev->gfx.mec.mqd_backup[mqd_idx])
- memcpy(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(*mqd));
-
+ memcpy_toio(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(*mqd));
/* reset ring buffer */
ring->wptr = 0;
- atomic64_set((atomic64_t *)&adev->wb.wb[ring->wptr_offs], 0);
- amdgpu_ring_clear_ring(ring);
- } else {
+ atomic64_set((atomic64_t *)ring->wptr_cpu_addr, 0);
amdgpu_ring_clear_ring(ring);
}
@@ -6868,54 +7198,24 @@ static int gfx_v10_0_kcq_init_queue(struct amdgpu_ring *ring)
static int gfx_v10_0_kiq_resume(struct amdgpu_device *adev)
{
- struct amdgpu_ring *ring;
- int r;
-
- ring = &adev->gfx.kiq.ring;
-
- r = amdgpu_bo_reserve(ring->mqd_obj, false);
- if (unlikely(r != 0))
- return r;
-
- r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&ring->mqd_ptr);
- if (unlikely(r != 0))
- return r;
-
- gfx_v10_0_kiq_init_queue(ring);
- amdgpu_bo_kunmap(ring->mqd_obj);
- ring->mqd_ptr = NULL;
- amdgpu_bo_unreserve(ring->mqd_obj);
- ring->sched.ready = true;
+ gfx_v10_0_kiq_init_queue(&adev->gfx.kiq[0].ring);
return 0;
}
static int gfx_v10_0_kcq_resume(struct amdgpu_device *adev)
{
- struct amdgpu_ring *ring = NULL;
- int r = 0, i;
+ int i, r;
gfx_v10_0_cp_compute_enable(adev, true);
for (i = 0; i < adev->gfx.num_compute_rings; i++) {
- ring = &adev->gfx.compute_ring[i];
-
- r = amdgpu_bo_reserve(ring->mqd_obj, false);
- if (unlikely(r != 0))
- goto done;
- r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&ring->mqd_ptr);
- if (!r) {
- r = gfx_v10_0_kcq_init_queue(ring);
- amdgpu_bo_kunmap(ring->mqd_obj);
- ring->mqd_ptr = NULL;
- }
- amdgpu_bo_unreserve(ring->mqd_obj);
+ r = gfx_v10_0_kcq_init_queue(&adev->gfx.compute_ring[i],
+ false);
if (r)
- goto done;
+ return r;
}
- r = amdgpu_gfx_enable_kcq(adev);
-done:
- return r;
+ return amdgpu_gfx_enable_kcq(adev, 0);
}
static int gfx_v10_0_cp_resume(struct amdgpu_device *adev)
@@ -6982,25 +7282,29 @@ static bool gfx_v10_0_check_grbm_cam_remapping(struct amdgpu_device *adev)
{
uint32_t data, pattern = 0xDEADBEEF;
- /* check if mmVGT_ESGS_RING_SIZE_UMD
- * has been remapped to mmVGT_ESGS_RING_SIZE */
- switch (adev->asic_type) {
- case CHIP_SIENNA_CICHLID:
- case CHIP_NAVY_FLOUNDER:
- case CHIP_DIMGREY_CAVEFISH:
+ /*
+ * check if mmVGT_ESGS_RING_SIZE_UMD
+ * has been remapped to mmVGT_ESGS_RING_SIZE
+ */
+ switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
+ case IP_VERSION(10, 3, 0):
+ case IP_VERSION(10, 3, 2):
+ case IP_VERSION(10, 3, 4):
+ case IP_VERSION(10, 3, 5):
data = RREG32_SOC15(GC, 0, mmVGT_ESGS_RING_SIZE_Sienna_Cichlid);
WREG32_SOC15(GC, 0, mmVGT_ESGS_RING_SIZE_Sienna_Cichlid, 0);
WREG32_SOC15(GC, 0, mmVGT_ESGS_RING_SIZE_UMD, pattern);
if (RREG32_SOC15(GC, 0, mmVGT_ESGS_RING_SIZE_Sienna_Cichlid) == pattern) {
- WREG32_SOC15(GC, 0, mmVGT_ESGS_RING_SIZE_UMD , data);
+ WREG32_SOC15(GC, 0, mmVGT_ESGS_RING_SIZE_UMD, data);
return true;
- } else {
- WREG32_SOC15(GC, 0, mmVGT_ESGS_RING_SIZE_Sienna_Cichlid, data);
- return false;
}
+ WREG32_SOC15(GC, 0, mmVGT_ESGS_RING_SIZE_Sienna_Cichlid, data);
break;
- case CHIP_VANGOGH:
+ case IP_VERSION(10, 3, 1):
+ case IP_VERSION(10, 3, 3):
+ case IP_VERSION(10, 3, 6):
+ case IP_VERSION(10, 3, 7):
return true;
default:
data = RREG32_SOC15(GC, 0, mmVGT_ESGS_RING_SIZE);
@@ -7010,27 +7314,36 @@ static bool gfx_v10_0_check_grbm_cam_remapping(struct amdgpu_device *adev)
if (RREG32_SOC15(GC, 0, mmVGT_ESGS_RING_SIZE) == pattern) {
WREG32_SOC15(GC, 0, mmVGT_ESGS_RING_SIZE_UMD, data);
return true;
- } else {
- WREG32_SOC15(GC, 0, mmVGT_ESGS_RING_SIZE, data);
- return false;
}
+ WREG32_SOC15(GC, 0, mmVGT_ESGS_RING_SIZE, data);
break;
}
+
+ return false;
}
static void gfx_v10_0_setup_grbm_cam_remapping(struct amdgpu_device *adev)
{
uint32_t data;
- /* initialize cam_index to 0
- * index will auto-inc after each data writting */
+ if (amdgpu_sriov_vf(adev))
+ return;
+
+ /*
+ * Initialize cam_index to 0
+ * index will auto-inc after each data writing
+ */
WREG32_SOC15(GC, 0, mmGRBM_CAM_INDEX, 0);
- switch (adev->asic_type) {
- case CHIP_SIENNA_CICHLID:
- case CHIP_NAVY_FLOUNDER:
- case CHIP_VANGOGH:
- case CHIP_DIMGREY_CAVEFISH:
+ switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
+ case IP_VERSION(10, 3, 0):
+ case IP_VERSION(10, 3, 2):
+ case IP_VERSION(10, 3, 1):
+ case IP_VERSION(10, 3, 4):
+ case IP_VERSION(10, 3, 5):
+ case IP_VERSION(10, 3, 6):
+ case IP_VERSION(10, 3, 3):
+ case IP_VERSION(10, 3, 7):
/* mmVGT_TF_RING_SIZE_UMD -> mmVGT_TF_RING_SIZE */
data = (SOC15_REG_OFFSET(GC, 0, mmVGT_TF_RING_SIZE_UMD) <<
GRBM_CAM_DATA__CAM_ADDR__SHIFT) |
@@ -7149,6 +7462,7 @@ static void gfx_v10_0_setup_grbm_cam_remapping(struct amdgpu_device *adev)
static void gfx_v10_0_disable_gpa_mode(struct amdgpu_device *adev)
{
uint32_t data;
+
data = RREG32_SOC15(GC, 0, mmCPC_PSP_DEBUG);
data |= CPC_PSP_DEBUG__GPA_OVERRIDE_MASK;
WREG32_SOC15(GC, 0, mmCPC_PSP_DEBUG, data);
@@ -7158,31 +7472,26 @@ static void gfx_v10_0_disable_gpa_mode(struct amdgpu_device *adev)
WREG32_SOC15(GC, 0, mmCPG_PSP_DEBUG, data);
}
-static int gfx_v10_0_hw_init(void *handle)
+static int gfx_v10_0_hw_init(struct amdgpu_ip_block *ip_block)
{
int r;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
if (!amdgpu_emu_mode)
gfx_v10_0_init_golden_registers(adev);
+ amdgpu_gfx_cleaner_shader_init(adev, adev->gfx.cleaner_shader_size,
+ adev->gfx.cleaner_shader_ptr);
+
if (adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT) {
/**
* For gfx 10, rlc firmware loading relies on smu firmware is
* loaded firstly, so in direct type, it has to load smc ucode
* here before rlc.
*/
- if (adev->smu.ppt_funcs != NULL && !(adev->flags & AMD_IS_APU)) {
- r = smu_load_microcode(&adev->smu);
- if (r)
- return r;
-
- r = smu_check_fw_status(&adev->smu);
- if (r) {
- pr_err("SMC firmware status is not correct\n");
- return r;
- }
- }
+ r = amdgpu_pm_load_smu_firmware(adev, NULL);
+ if (r)
+ return r;
gfx_v10_0_disable_gpa_mode(adev);
}
@@ -7200,71 +7509,56 @@ static int gfx_v10_0_hw_init(void *handle)
* init golden registers and rlc resume may override some registers,
* reconfig them here
*/
- gfx_v10_0_tcp_harvest(adev);
+ if (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(10, 1, 10) ||
+ amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(10, 1, 1) ||
+ amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(10, 1, 2))
+ gfx_v10_0_tcp_harvest(adev);
r = gfx_v10_0_cp_resume(adev);
if (r)
return r;
- if (adev->asic_type == CHIP_SIENNA_CICHLID)
+ if (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(10, 3, 0))
gfx_v10_3_program_pbb_mode(adev);
- if (adev->asic_type >= CHIP_SIENNA_CICHLID)
+ if (amdgpu_ip_version(adev, GC_HWIP, 0) >= IP_VERSION(10, 3, 0) && !amdgpu_sriov_vf(adev))
gfx_v10_3_set_power_brake_sequence(adev);
return r;
}
-#ifndef BRING_UP_DEBUG
-static int gfx_v10_0_kiq_disable_kgq(struct amdgpu_device *adev)
+static int gfx_v10_0_hw_fini(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_kiq *kiq = &adev->gfx.kiq;
- struct amdgpu_ring *kiq_ring = &kiq->ring;
- int i;
-
- if (!kiq->pmf || !kiq->pmf->kiq_unmap_queues)
- return -EINVAL;
-
- if (amdgpu_ring_alloc(kiq_ring, kiq->pmf->unmap_queues_size *
- adev->gfx.num_gfx_rings))
- return -ENOMEM;
+ struct amdgpu_device *adev = ip_block->adev;
- for (i = 0; i < adev->gfx.num_gfx_rings; i++)
- kiq->pmf->kiq_unmap_queues(kiq_ring, &adev->gfx.gfx_ring[i],
- PREEMPT_QUEUES, 0, 0);
-
- return amdgpu_ring_test_helper(kiq_ring);
-}
-#endif
-
-static int gfx_v10_0_hw_fini(void *handle)
-{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
- int r;
- uint32_t tmp;
+ cancel_delayed_work_sync(&adev->gfx.idle_work);
amdgpu_irq_put(adev, &adev->gfx.priv_reg_irq, 0);
amdgpu_irq_put(adev, &adev->gfx.priv_inst_irq, 0);
+ amdgpu_irq_put(adev, &adev->gfx.bad_op_irq, 0);
- if (!adev->in_pci_err_recovery) {
-#ifndef BRING_UP_DEBUG
+ /* WA added for Vangogh asic fixing the SMU suspend failure
+ * It needs to set power gating again during gfxoff control
+ * otherwise the gfxoff disallowing will be failed to set.
+ */
+ if (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(10, 3, 1))
+ gfx_v10_0_set_powergating_state(ip_block, AMD_PG_STATE_UNGATE);
+
+ if (!adev->no_hw_access) {
if (amdgpu_async_gfx_ring) {
- r = gfx_v10_0_kiq_disable_kgq(adev);
- if (r)
+ if (amdgpu_gfx_disable_kgq(adev, 0))
DRM_ERROR("KGQ disable failed\n");
}
-#endif
- if (amdgpu_gfx_disable_kcq(adev))
+
+ if (amdgpu_gfx_disable_kcq(adev, 0))
DRM_ERROR("KCQ disable failed\n");
}
if (amdgpu_sriov_vf(adev)) {
gfx_v10_0_cp_gfx_enable(adev, false);
- /* Program KIQ position of RLC_CP_SCHEDULERS during destroy */
- tmp = RREG32_SOC15(GC, 0, mmRLC_CP_SCHEDULERS);
- tmp &= 0xffffff00;
- WREG32_SOC15(GC, 0, mmRLC_CP_SCHEDULERS, tmp);
-
+ /* Remove the steps of clearing KIQ position.
+ * It causes GFX hang when another Win guest is rendering.
+ */
return 0;
}
gfx_v10_0_cp_enable(adev, false);
@@ -7273,19 +7567,19 @@ static int gfx_v10_0_hw_fini(void *handle)
return 0;
}
-static int gfx_v10_0_suspend(void *handle)
+static int gfx_v10_0_suspend(struct amdgpu_ip_block *ip_block)
{
- return gfx_v10_0_hw_fini(handle);
+ return gfx_v10_0_hw_fini(ip_block);
}
-static int gfx_v10_0_resume(void *handle)
+static int gfx_v10_0_resume(struct amdgpu_ip_block *ip_block)
{
- return gfx_v10_0_hw_init(handle);
+ return gfx_v10_0_hw_init(ip_block);
}
-static bool gfx_v10_0_is_idle(void *handle)
+static bool gfx_v10_0_is_idle(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
if (REG_GET_FIELD(RREG32_SOC15(GC, 0, mmGRBM_STATUS),
GRBM_STATUS, GUI_ACTIVE))
@@ -7294,11 +7588,11 @@ static bool gfx_v10_0_is_idle(void *handle)
return true;
}
-static int gfx_v10_0_wait_for_idle(void *handle)
+static int gfx_v10_0_wait_for_idle(struct amdgpu_ip_block *ip_block)
{
- unsigned i;
+ unsigned int i;
u32 tmp;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
for (i = 0; i < adev->usec_timeout; i++) {
/* read MC_STATUS */
@@ -7312,11 +7606,11 @@ static int gfx_v10_0_wait_for_idle(void *handle)
return -ETIMEDOUT;
}
-static int gfx_v10_0_soft_reset(void *handle)
+static int gfx_v10_0_soft_reset(struct amdgpu_ip_block *ip_block)
{
u32 grbm_soft_reset = 0;
u32 tmp;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
/* GRBM_STATUS */
tmp = RREG32_SOC15(GC, 0, mmGRBM_STATUS);
@@ -7341,11 +7635,14 @@ static int gfx_v10_0_soft_reset(void *handle)
/* GRBM_STATUS2 */
tmp = RREG32_SOC15(GC, 0, mmGRBM_STATUS2);
- switch (adev->asic_type) {
- case CHIP_SIENNA_CICHLID:
- case CHIP_NAVY_FLOUNDER:
- case CHIP_VANGOGH:
- case CHIP_DIMGREY_CAVEFISH:
+ switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
+ case IP_VERSION(10, 3, 0):
+ case IP_VERSION(10, 3, 2):
+ case IP_VERSION(10, 3, 1):
+ case IP_VERSION(10, 3, 4):
+ case IP_VERSION(10, 3, 5):
+ case IP_VERSION(10, 3, 6):
+ case IP_VERSION(10, 3, 3):
if (REG_GET_FIELD(tmp, GRBM_STATUS2, RLC_BUSY_Sienna_Cichlid))
grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
GRBM_SOFT_RESET,
@@ -7371,19 +7668,17 @@ static int gfx_v10_0_soft_reset(void *handle)
/* Disable MEC parsing/prefetching */
gfx_v10_0_cp_compute_enable(adev, false);
- if (grbm_soft_reset) {
- tmp = RREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET);
- tmp |= grbm_soft_reset;
- dev_info(adev->dev, "GRBM_SOFT_RESET=0x%08X\n", tmp);
- WREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET, tmp);
- tmp = RREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET);
+ tmp = RREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET);
+ tmp |= grbm_soft_reset;
+ dev_info(adev->dev, "GRBM_SOFT_RESET=0x%08X\n", tmp);
+ WREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET, tmp);
+ tmp = RREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET);
- udelay(50);
+ udelay(50);
- tmp &= ~grbm_soft_reset;
- WREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET, tmp);
- tmp = RREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET);
- }
+ tmp &= ~grbm_soft_reset;
+ WREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET, tmp);
+ tmp = RREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET);
/* Wait a little for things to settle down */
udelay(50);
@@ -7393,22 +7688,73 @@ static int gfx_v10_0_soft_reset(void *handle)
static uint64_t gfx_v10_0_get_gpu_clock_counter(struct amdgpu_device *adev)
{
- uint64_t clock;
-
- amdgpu_gfx_off_ctrl(adev, false);
- mutex_lock(&adev->gfx.gpu_clock_mutex);
- switch (adev->asic_type) {
- case CHIP_VANGOGH:
- clock = (uint64_t)RREG32_SOC15(SMUIO, 0, mmGOLDEN_TSC_COUNT_LOWER_Vangogh) |
- ((uint64_t)RREG32_SOC15(SMUIO, 0, mmGOLDEN_TSC_COUNT_UPPER_Vangogh) << 32ULL);
+ uint64_t clock, clock_lo, clock_hi, hi_check;
+
+ switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
+ case IP_VERSION(10, 1, 3):
+ case IP_VERSION(10, 1, 4):
+ preempt_disable();
+ clock_hi = RREG32_SOC15_NO_KIQ(SMUIO, 0, mmGOLDEN_TSC_COUNT_UPPER_Cyan_Skillfish);
+ clock_lo = RREG32_SOC15_NO_KIQ(SMUIO, 0, mmGOLDEN_TSC_COUNT_LOWER_Cyan_Skillfish);
+ hi_check = RREG32_SOC15_NO_KIQ(SMUIO, 0, mmGOLDEN_TSC_COUNT_UPPER_Cyan_Skillfish);
+ /* The SMUIO TSC clock frequency is 100MHz, which sets 32-bit carry over
+ * roughly every 42 seconds.
+ */
+ if (hi_check != clock_hi) {
+ clock_lo = RREG32_SOC15_NO_KIQ(SMUIO, 0, mmGOLDEN_TSC_COUNT_LOWER_Cyan_Skillfish);
+ clock_hi = hi_check;
+ }
+ preempt_enable();
+ clock = clock_lo | (clock_hi << 32ULL);
+ break;
+ case IP_VERSION(10, 3, 1):
+ case IP_VERSION(10, 3, 3):
+ case IP_VERSION(10, 3, 7):
+ preempt_disable();
+ clock_hi = RREG32_SOC15_NO_KIQ(SMUIO, 0, mmGOLDEN_TSC_COUNT_UPPER_Vangogh);
+ clock_lo = RREG32_SOC15_NO_KIQ(SMUIO, 0, mmGOLDEN_TSC_COUNT_LOWER_Vangogh);
+ hi_check = RREG32_SOC15_NO_KIQ(SMUIO, 0, mmGOLDEN_TSC_COUNT_UPPER_Vangogh);
+ /* The SMUIO TSC clock frequency is 100MHz, which sets 32-bit carry over
+ * roughly every 42 seconds.
+ */
+ if (hi_check != clock_hi) {
+ clock_lo = RREG32_SOC15_NO_KIQ(SMUIO, 0, mmGOLDEN_TSC_COUNT_LOWER_Vangogh);
+ clock_hi = hi_check;
+ }
+ preempt_enable();
+ clock = clock_lo | (clock_hi << 32ULL);
+ break;
+ case IP_VERSION(10, 3, 6):
+ preempt_disable();
+ clock_hi = RREG32_SOC15_NO_KIQ(SMUIO, 0, mmGOLDEN_TSC_COUNT_UPPER_GC_10_3_6);
+ clock_lo = RREG32_SOC15_NO_KIQ(SMUIO, 0, mmGOLDEN_TSC_COUNT_LOWER_GC_10_3_6);
+ hi_check = RREG32_SOC15_NO_KIQ(SMUIO, 0, mmGOLDEN_TSC_COUNT_UPPER_GC_10_3_6);
+ /* The SMUIO TSC clock frequency is 100MHz, which sets 32-bit carry over
+ * roughly every 42 seconds.
+ */
+ if (hi_check != clock_hi) {
+ clock_lo = RREG32_SOC15_NO_KIQ(SMUIO, 0, mmGOLDEN_TSC_COUNT_LOWER_GC_10_3_6);
+ clock_hi = hi_check;
+ }
+ preempt_enable();
+ clock = clock_lo | (clock_hi << 32ULL);
break;
default:
- clock = (uint64_t)RREG32_SOC15(SMUIO, 0, mmGOLDEN_TSC_COUNT_LOWER) |
- ((uint64_t)RREG32_SOC15(SMUIO, 0, mmGOLDEN_TSC_COUNT_UPPER) << 32ULL);
+ preempt_disable();
+ clock_hi = RREG32_SOC15_NO_KIQ(SMUIO, 0, mmGOLDEN_TSC_COUNT_UPPER);
+ clock_lo = RREG32_SOC15_NO_KIQ(SMUIO, 0, mmGOLDEN_TSC_COUNT_LOWER);
+ hi_check = RREG32_SOC15_NO_KIQ(SMUIO, 0, mmGOLDEN_TSC_COUNT_UPPER);
+ /* The SMUIO TSC clock frequency is 100MHz, which sets 32-bit carry over
+ * roughly every 42 seconds.
+ */
+ if (hi_check != clock_hi) {
+ clock_lo = RREG32_SOC15_NO_KIQ(SMUIO, 0, mmGOLDEN_TSC_COUNT_LOWER);
+ clock_hi = hi_check;
+ }
+ preempt_enable();
+ clock = clock_lo | (clock_hi << 32ULL);
break;
}
- mutex_unlock(&adev->gfx.gpu_clock_mutex);
- amdgpu_gfx_off_ctrl(adev, true);
return clock;
}
@@ -7441,20 +7787,28 @@ static void gfx_v10_0_ring_emit_gds_switch(struct amdgpu_ring *ring,
(1 << (oa_size + oa_base)) - (1 << oa_base));
}
-static int gfx_v10_0_early_init(void *handle)
+static int gfx_v10_0_early_init(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
- switch (adev->asic_type) {
- case CHIP_NAVI10:
- case CHIP_NAVI14:
- case CHIP_NAVI12:
+ adev->gfx.funcs = &gfx_v10_0_gfx_funcs;
+
+ switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
+ case IP_VERSION(10, 1, 10):
+ case IP_VERSION(10, 1, 1):
+ case IP_VERSION(10, 1, 2):
+ case IP_VERSION(10, 1, 3):
+ case IP_VERSION(10, 1, 4):
adev->gfx.num_gfx_rings = GFX10_NUM_GFX_RINGS_NV1X;
break;
- case CHIP_SIENNA_CICHLID:
- case CHIP_NAVY_FLOUNDER:
- case CHIP_VANGOGH:
- case CHIP_DIMGREY_CAVEFISH:
+ case IP_VERSION(10, 3, 0):
+ case IP_VERSION(10, 3, 2):
+ case IP_VERSION(10, 3, 1):
+ case IP_VERSION(10, 3, 4):
+ case IP_VERSION(10, 3, 5):
+ case IP_VERSION(10, 3, 6):
+ case IP_VERSION(10, 3, 3):
+ case IP_VERSION(10, 3, 7):
adev->gfx.num_gfx_rings = GFX10_NUM_GFX_RINGS_Sienna_Cichlid;
break;
default:
@@ -7469,13 +7823,17 @@ static int gfx_v10_0_early_init(void *handle)
gfx_v10_0_set_irq_funcs(adev);
gfx_v10_0_set_gds_init(adev);
gfx_v10_0_set_rlc_funcs(adev);
+ gfx_v10_0_set_mqd_funcs(adev);
- return 0;
+ /* init rlcg reg access ctrl */
+ gfx_v10_0_init_rlcg_reg_access_ctrl(adev);
+
+ return gfx_v10_0_init_microcode(adev);
}
-static int gfx_v10_0_late_init(void *handle)
+static int gfx_v10_0_late_init(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
int r;
r = amdgpu_irq_get(adev, &adev->gfx.priv_reg_irq, 0);
@@ -7486,6 +7844,10 @@ static int gfx_v10_0_late_init(void *handle)
if (r)
return r;
+ r = amdgpu_irq_get(adev, &adev->gfx.bad_op_irq, 0);
+ if (r)
+ return r;
+
return 0;
}
@@ -7498,19 +7860,23 @@ static bool gfx_v10_0_is_rlc_enabled(struct amdgpu_device *adev)
return (REG_GET_FIELD(rlc_cntl, RLC_CNTL, RLC_ENABLE_F32)) ? true : false;
}
-static void gfx_v10_0_set_safe_mode(struct amdgpu_device *adev)
+static void gfx_v10_0_set_safe_mode(struct amdgpu_device *adev, int xcc_id)
{
uint32_t data;
- unsigned i;
+ unsigned int i;
data = RLC_SAFE_MODE__CMD_MASK;
data |= (1 << RLC_SAFE_MODE__MESSAGE__SHIFT);
- switch (adev->asic_type) {
- case CHIP_SIENNA_CICHLID:
- case CHIP_NAVY_FLOUNDER:
- case CHIP_VANGOGH:
- case CHIP_DIMGREY_CAVEFISH:
+ switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
+ case IP_VERSION(10, 3, 0):
+ case IP_VERSION(10, 3, 2):
+ case IP_VERSION(10, 3, 1):
+ case IP_VERSION(10, 3, 4):
+ case IP_VERSION(10, 3, 5):
+ case IP_VERSION(10, 3, 6):
+ case IP_VERSION(10, 3, 3):
+ case IP_VERSION(10, 3, 7):
WREG32_SOC15(GC, 0, mmRLC_SAFE_MODE_Sienna_Cichlid, data);
/* wait for RLC_SAFE_MODE */
@@ -7535,16 +7901,20 @@ static void gfx_v10_0_set_safe_mode(struct amdgpu_device *adev)
}
}
-static void gfx_v10_0_unset_safe_mode(struct amdgpu_device *adev)
+static void gfx_v10_0_unset_safe_mode(struct amdgpu_device *adev, int xcc_id)
{
uint32_t data;
data = RLC_SAFE_MODE__CMD_MASK;
- switch (adev->asic_type) {
- case CHIP_SIENNA_CICHLID:
- case CHIP_NAVY_FLOUNDER:
- case CHIP_VANGOGH:
- case CHIP_DIMGREY_CAVEFISH:
+ switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
+ case IP_VERSION(10, 3, 0):
+ case IP_VERSION(10, 3, 2):
+ case IP_VERSION(10, 3, 1):
+ case IP_VERSION(10, 3, 4):
+ case IP_VERSION(10, 3, 5):
+ case IP_VERSION(10, 3, 6):
+ case IP_VERSION(10, 3, 3):
+ case IP_VERSION(10, 3, 7):
WREG32_SOC15(GC, 0, mmRLC_SAFE_MODE_Sienna_Cichlid, data);
break;
default:
@@ -7558,6 +7928,9 @@ static void gfx_v10_0_update_medium_grain_clock_gating(struct amdgpu_device *ade
{
uint32_t data, def;
+ if (!(adev->cg_flags & (AMD_CG_SUPPORT_GFX_MGCG | AMD_CG_SUPPORT_GFX_MGLS)))
+ return;
+
/* It is disabled by HW by default */
if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG)) {
/* 0 - Disable some blocks' MGCG */
@@ -7572,6 +7945,7 @@ static void gfx_v10_0_update_medium_grain_clock_gating(struct amdgpu_device *ade
RLC_CGTT_MGCG_OVERRIDE__RLC_CGTT_SCLK_OVERRIDE_MASK |
RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGCG_OVERRIDE_MASK |
RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGLS_OVERRIDE_MASK |
+ RLC_CGTT_MGCG_OVERRIDE__GFXIP_FGCG_OVERRIDE_MASK |
RLC_CGTT_MGCG_OVERRIDE__ENABLE_CGTS_LEGACY_MASK);
if (def != data)
@@ -7594,13 +7968,15 @@ static void gfx_v10_0_update_medium_grain_clock_gating(struct amdgpu_device *ade
WREG32_SOC15(GC, 0, mmCP_MEM_SLP_CNTL, data);
}
}
- } else {
+ } else if (!enable || !(adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG)) {
/* 1 - MGCG_OVERRIDE */
def = data = RREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE);
data |= (RLC_CGTT_MGCG_OVERRIDE__RLC_CGTT_SCLK_OVERRIDE_MASK |
RLC_CGTT_MGCG_OVERRIDE__GRBM_CGTT_SCLK_OVERRIDE_MASK |
RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGCG_OVERRIDE_MASK |
- RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGLS_OVERRIDE_MASK);
+ RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGLS_OVERRIDE_MASK |
+ RLC_CGTT_MGCG_OVERRIDE__GFXIP_FGCG_OVERRIDE_MASK |
+ RLC_CGTT_MGCG_OVERRIDE__ENABLE_CGTS_LEGACY_MASK);
if (def != data)
WREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE, data);
@@ -7626,22 +8002,34 @@ static void gfx_v10_0_update_3d_clock_gating(struct amdgpu_device *adev,
{
uint32_t data, def;
+ if (!(adev->cg_flags & (AMD_CG_SUPPORT_GFX_3D_CGCG | AMD_CG_SUPPORT_GFX_3D_CGLS)))
+ return;
+
/* Enable 3D CGCG/CGLS */
- if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGCG)) {
+ if (enable) {
/* write cmd to clear cgcg/cgls ov */
def = data = RREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE);
+
/* unset CGCG override */
- data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_GFX3D_CG_OVERRIDE_MASK;
+ if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGCG)
+ data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_GFX3D_CG_OVERRIDE_MASK;
+
/* update CGCG and CGLS override bits */
if (def != data)
WREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE, data);
+
/* enable 3Dcgcg FSM(0x0000363f) */
def = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D);
- data = (0x36 << RLC_CGCG_CGLS_CTRL_3D__CGCG_GFX_IDLE_THRESHOLD__SHIFT) |
- RLC_CGCG_CGLS_CTRL_3D__CGCG_EN_MASK;
+ data = 0;
+
+ if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGCG)
+ data = (0x36 << RLC_CGCG_CGLS_CTRL_3D__CGCG_GFX_IDLE_THRESHOLD__SHIFT) |
+ RLC_CGCG_CGLS_CTRL_3D__CGCG_EN_MASK;
+
if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGLS)
data |= (0x000F << RLC_CGCG_CGLS_CTRL_3D__CGLS_REP_COMPANSAT_DELAY__SHIFT) |
RLC_CGCG_CGLS_CTRL_3D__CGLS_EN_MASK;
+
if (def != data)
WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D, data);
@@ -7654,9 +8042,14 @@ static void gfx_v10_0_update_3d_clock_gating(struct amdgpu_device *adev,
} else {
/* Disable CGCG/CGLS */
def = data = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D);
+
/* disable cgcg, cgls should be disabled */
- data &= ~(RLC_CGCG_CGLS_CTRL_3D__CGCG_EN_MASK |
- RLC_CGCG_CGLS_CTRL_3D__CGLS_EN_MASK);
+ if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGCG)
+ data &= ~RLC_CGCG_CGLS_CTRL_3D__CGCG_EN_MASK;
+
+ if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGLS)
+ data &= ~RLC_CGCG_CGLS_CTRL_3D__CGLS_EN_MASK;
+
/* disable cgcg and cgls in FSM */
if (def != data)
WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D, data);
@@ -7668,25 +8061,35 @@ static void gfx_v10_0_update_coarse_grain_clock_gating(struct amdgpu_device *ade
{
uint32_t def, data;
- if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG)) {
+ if (!(adev->cg_flags & (AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_CGLS)))
+ return;
+
+ if (enable) {
def = data = RREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE);
+
/* unset CGCG override */
- data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_CGCG_OVERRIDE_MASK;
+ if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG)
+ data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_CGCG_OVERRIDE_MASK;
+
if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS)
data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_CGLS_OVERRIDE_MASK;
- else
- data |= RLC_CGTT_MGCG_OVERRIDE__GFXIP_CGLS_OVERRIDE_MASK;
+
/* update CGCG and CGLS override bits */
if (def != data)
WREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE, data);
/* enable cgcg FSM(0x0000363F) */
def = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL);
- data = (0x36 << RLC_CGCG_CGLS_CTRL__CGCG_GFX_IDLE_THRESHOLD__SHIFT) |
- RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK;
+ data = 0;
+
+ if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG)
+ data = (0x36 << RLC_CGCG_CGLS_CTRL__CGCG_GFX_IDLE_THRESHOLD__SHIFT) |
+ RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK;
+
if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS)
data |= (0x000F << RLC_CGCG_CGLS_CTRL__CGLS_REP_COMPANSAT_DELAY__SHIFT) |
RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK;
+
if (def != data)
WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL, data);
@@ -7698,8 +8101,14 @@ static void gfx_v10_0_update_coarse_grain_clock_gating(struct amdgpu_device *ade
WREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_CNTL, data);
} else {
def = data = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL);
+
/* reset CGCG/CGLS bits */
- data &= ~(RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK | RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK);
+ if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG)
+ data &= ~RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK;
+
+ if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS)
+ data &= ~RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK;
+
/* disable cgcg and cgls in FSM */
if (def != data)
WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL, data);
@@ -7711,7 +8120,10 @@ static void gfx_v10_0_update_fine_grain_clock_gating(struct amdgpu_device *adev,
{
uint32_t def, data;
- if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_FGCG)) {
+ if (!(adev->cg_flags & AMD_CG_SUPPORT_GFX_FGCG))
+ return;
+
+ if (enable) {
def = data = RREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE);
/* unset FGCG override */
data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_FGCG_OVERRIDE_MASK;
@@ -7742,10 +8154,101 @@ static void gfx_v10_0_update_fine_grain_clock_gating(struct amdgpu_device *adev,
}
}
+static void gfx_v10_0_apply_medium_grain_clock_gating_workaround(struct amdgpu_device *adev)
+{
+ uint32_t reg_data = 0;
+ uint32_t reg_idx = 0;
+ uint32_t i;
+
+ const uint32_t tcp_ctrl_regs[] = {
+ mmCGTS_SA0_WGP00_CU0_TCP_CTRL_REG,
+ mmCGTS_SA0_WGP00_CU1_TCP_CTRL_REG,
+ mmCGTS_SA0_WGP01_CU0_TCP_CTRL_REG,
+ mmCGTS_SA0_WGP01_CU1_TCP_CTRL_REG,
+ mmCGTS_SA0_WGP02_CU0_TCP_CTRL_REG,
+ mmCGTS_SA0_WGP02_CU1_TCP_CTRL_REG,
+ mmCGTS_SA0_WGP10_CU0_TCP_CTRL_REG,
+ mmCGTS_SA0_WGP10_CU1_TCP_CTRL_REG,
+ mmCGTS_SA0_WGP11_CU0_TCP_CTRL_REG,
+ mmCGTS_SA0_WGP11_CU1_TCP_CTRL_REG,
+ mmCGTS_SA0_WGP12_CU0_TCP_CTRL_REG,
+ mmCGTS_SA0_WGP12_CU1_TCP_CTRL_REG,
+ mmCGTS_SA1_WGP00_CU0_TCP_CTRL_REG,
+ mmCGTS_SA1_WGP00_CU1_TCP_CTRL_REG,
+ mmCGTS_SA1_WGP01_CU0_TCP_CTRL_REG,
+ mmCGTS_SA1_WGP01_CU1_TCP_CTRL_REG,
+ mmCGTS_SA1_WGP02_CU0_TCP_CTRL_REG,
+ mmCGTS_SA1_WGP02_CU1_TCP_CTRL_REG,
+ mmCGTS_SA1_WGP10_CU0_TCP_CTRL_REG,
+ mmCGTS_SA1_WGP10_CU1_TCP_CTRL_REG,
+ mmCGTS_SA1_WGP11_CU0_TCP_CTRL_REG,
+ mmCGTS_SA1_WGP11_CU1_TCP_CTRL_REG,
+ mmCGTS_SA1_WGP12_CU0_TCP_CTRL_REG,
+ mmCGTS_SA1_WGP12_CU1_TCP_CTRL_REG
+ };
+
+ const uint32_t tcp_ctrl_regs_nv12[] = {
+ mmCGTS_SA0_WGP00_CU0_TCP_CTRL_REG,
+ mmCGTS_SA0_WGP00_CU1_TCP_CTRL_REG,
+ mmCGTS_SA0_WGP01_CU0_TCP_CTRL_REG,
+ mmCGTS_SA0_WGP01_CU1_TCP_CTRL_REG,
+ mmCGTS_SA0_WGP02_CU0_TCP_CTRL_REG,
+ mmCGTS_SA0_WGP02_CU1_TCP_CTRL_REG,
+ mmCGTS_SA0_WGP10_CU0_TCP_CTRL_REG,
+ mmCGTS_SA0_WGP10_CU1_TCP_CTRL_REG,
+ mmCGTS_SA0_WGP11_CU0_TCP_CTRL_REG,
+ mmCGTS_SA0_WGP11_CU1_TCP_CTRL_REG,
+ mmCGTS_SA1_WGP00_CU0_TCP_CTRL_REG,
+ mmCGTS_SA1_WGP00_CU1_TCP_CTRL_REG,
+ mmCGTS_SA1_WGP01_CU0_TCP_CTRL_REG,
+ mmCGTS_SA1_WGP01_CU1_TCP_CTRL_REG,
+ mmCGTS_SA1_WGP02_CU0_TCP_CTRL_REG,
+ mmCGTS_SA1_WGP02_CU1_TCP_CTRL_REG,
+ mmCGTS_SA1_WGP10_CU0_TCP_CTRL_REG,
+ mmCGTS_SA1_WGP10_CU1_TCP_CTRL_REG,
+ mmCGTS_SA1_WGP11_CU0_TCP_CTRL_REG,
+ mmCGTS_SA1_WGP11_CU1_TCP_CTRL_REG,
+ };
+
+ const uint32_t sm_ctlr_regs[] = {
+ mmCGTS_SA0_QUAD0_SM_CTRL_REG,
+ mmCGTS_SA0_QUAD1_SM_CTRL_REG,
+ mmCGTS_SA1_QUAD0_SM_CTRL_REG,
+ mmCGTS_SA1_QUAD1_SM_CTRL_REG
+ };
+
+ if (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(10, 1, 2)) {
+ for (i = 0; i < ARRAY_SIZE(tcp_ctrl_regs_nv12); i++) {
+ reg_idx = adev->reg_offset[GC_HWIP][0][mmCGTS_SA0_WGP00_CU0_TCP_CTRL_REG_BASE_IDX] +
+ tcp_ctrl_regs_nv12[i];
+ reg_data = RREG32(reg_idx);
+ reg_data |= CGTS_SA0_WGP00_CU0_TCP_CTRL_REG__TCPI_LS_OVERRIDE_MASK;
+ WREG32(reg_idx, reg_data);
+ }
+ } else {
+ for (i = 0; i < ARRAY_SIZE(tcp_ctrl_regs); i++) {
+ reg_idx = adev->reg_offset[GC_HWIP][0][mmCGTS_SA0_WGP00_CU0_TCP_CTRL_REG_BASE_IDX] +
+ tcp_ctrl_regs[i];
+ reg_data = RREG32(reg_idx);
+ reg_data |= CGTS_SA0_WGP00_CU0_TCP_CTRL_REG__TCPI_LS_OVERRIDE_MASK;
+ WREG32(reg_idx, reg_data);
+ }
+ }
+
+ for (i = 0; i < ARRAY_SIZE(sm_ctlr_regs); i++) {
+ reg_idx = adev->reg_offset[GC_HWIP][0][mmCGTS_SA0_QUAD0_SM_CTRL_REG_BASE_IDX] +
+ sm_ctlr_regs[i];
+ reg_data = RREG32(reg_idx);
+ reg_data &= ~CGTS_SA0_QUAD0_SM_CTRL_REG__SM_MODE_MASK;
+ reg_data |= 2 << CGTS_SA0_QUAD0_SM_CTRL_REG__SM_MODE__SHIFT;
+ WREG32(reg_idx, reg_data);
+ }
+}
+
static int gfx_v10_0_update_gfx_clock_gating(struct amdgpu_device *adev,
bool enable)
{
- amdgpu_gfx_rlc_enter_safe_mode(adev);
+ amdgpu_gfx_rlc_enter_safe_mode(adev, 0);
if (enable) {
/* enable FGCG firstly*/
@@ -7758,6 +8261,14 @@ static int gfx_v10_0_update_gfx_clock_gating(struct amdgpu_device *adev,
gfx_v10_0_update_3d_clock_gating(adev, enable);
/* === CGCG + CGLS === */
gfx_v10_0_update_coarse_grain_clock_gating(adev, enable);
+
+ if ((amdgpu_ip_version(adev, GC_HWIP, 0) ==
+ IP_VERSION(10, 1, 10)) ||
+ (amdgpu_ip_version(adev, GC_HWIP, 0) ==
+ IP_VERSION(10, 1, 1)) ||
+ (amdgpu_ip_version(adev, GC_HWIP, 0) ==
+ IP_VERSION(10, 1, 2)))
+ gfx_v10_0_apply_medium_grain_clock_gating_workaround(adev);
} else {
/* CGCG/CGLS should be disabled before MGCG/MGLS
* === CGCG + CGLS ===
@@ -7779,28 +8290,41 @@ static int gfx_v10_0_update_gfx_clock_gating(struct amdgpu_device *adev,
AMD_CG_SUPPORT_GFX_3D_CGLS))
gfx_v10_0_enable_gui_idle_interrupt(adev, enable);
- amdgpu_gfx_rlc_exit_safe_mode(adev);
+ amdgpu_gfx_rlc_exit_safe_mode(adev, 0);
return 0;
}
-static void gfx_v10_0_update_spm_vmid(struct amdgpu_device *adev, unsigned vmid)
+static void gfx_v10_0_update_spm_vmid_internal(struct amdgpu_device *adev,
+ unsigned int vmid)
{
- u32 reg, data;
+ u32 reg, pre_data, data;
reg = SOC15_REG_OFFSET(GC, 0, mmRLC_SPM_MC_CNTL);
- if (amdgpu_sriov_is_pp_one_vf(adev))
- data = RREG32_NO_KIQ(reg);
+ /* not for *_SOC15 */
+ if (amdgpu_sriov_is_pp_one_vf(adev) && !amdgpu_sriov_runtime(adev))
+ pre_data = RREG32_NO_KIQ(reg);
else
- data = RREG32(reg);
+ pre_data = RREG32(reg);
- data &= ~RLC_SPM_MC_CNTL__RLC_SPM_VMID_MASK;
+ data = pre_data & (~RLC_SPM_MC_CNTL__RLC_SPM_VMID_MASK);
data |= (vmid & RLC_SPM_MC_CNTL__RLC_SPM_VMID_MASK) << RLC_SPM_MC_CNTL__RLC_SPM_VMID__SHIFT;
- if (amdgpu_sriov_is_pp_one_vf(adev))
- WREG32_SOC15_NO_KIQ(GC, 0, mmRLC_SPM_MC_CNTL, data);
- else
- WREG32_SOC15(GC, 0, mmRLC_SPM_MC_CNTL, data);
+ if (pre_data != data) {
+ if (amdgpu_sriov_is_pp_one_vf(adev) && !amdgpu_sriov_runtime(adev)) {
+ WREG32_SOC15_NO_KIQ(GC, 0, mmRLC_SPM_MC_CNTL, data);
+ } else
+ WREG32_SOC15(GC, 0, mmRLC_SPM_MC_CNTL, data);
+ }
+}
+
+static void gfx_v10_0_update_spm_vmid(struct amdgpu_device *adev, struct amdgpu_ring *ring, unsigned int vmid)
+{
+ amdgpu_gfx_off_ctrl(adev, false);
+
+ gfx_v10_0_update_spm_vmid_internal(adev, vmid);
+
+ amdgpu_gfx_off_ctrl(adev, true);
}
static bool gfx_v10_0_check_rlcg_range(struct amdgpu_device *adev,
@@ -7847,22 +8371,32 @@ static void gfx_v10_cntl_power_gating(struct amdgpu_device *adev, bool enable)
* in refclk count. Note that RLC FW is modified to take 16 bits from
* RLC_PG_DELAY_3[15:0] as the hysteresis instead of just 8 bits.
*
- * The recommendation from RLC team is setting RLC_PG_DELAY_3 to 200us(0x4E20)
- * as part of CGPG enablement starting point.
+ * The recommendation from RLC team is setting RLC_PG_DELAY_3 to 200us as part)
+ * of CGPG enablement starting point.
+ * Power/performance team will optimize it and might give a new value later.
*/
- if (enable && (adev->pg_flags & AMD_PG_SUPPORT_GFX_PG) && adev->asic_type == CHIP_VANGOGH) {
- data = 0x4E20 & RLC_PG_DELAY_3__CGCG_ACTIVE_BEFORE_CGPG_MASK_Vangogh;
- WREG32_SOC15(GC, 0, mmRLC_PG_DELAY_3, data);
+ if (enable && (adev->pg_flags & AMD_PG_SUPPORT_GFX_PG)) {
+ switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
+ case IP_VERSION(10, 3, 1):
+ case IP_VERSION(10, 3, 3):
+ case IP_VERSION(10, 3, 6):
+ case IP_VERSION(10, 3, 7):
+ data = 0x4E20 & RLC_PG_DELAY_3__CGCG_ACTIVE_BEFORE_CGPG_MASK_Vangogh;
+ WREG32_SOC15(GC, 0, mmRLC_PG_DELAY_3, data);
+ break;
+ default:
+ break;
+ }
}
}
static void gfx_v10_cntl_pg(struct amdgpu_device *adev, bool enable)
{
- amdgpu_gfx_rlc_enter_safe_mode(adev);
+ amdgpu_gfx_rlc_enter_safe_mode(adev, 0);
gfx_v10_cntl_power_gating(adev, enable);
- amdgpu_gfx_rlc_exit_safe_mode(adev);
+ amdgpu_gfx_rlc_exit_safe_mode(adev, 0);
}
static const struct amdgpu_rlc_funcs gfx_v10_0_rlc_funcs = {
@@ -7891,31 +8425,40 @@ static const struct amdgpu_rlc_funcs gfx_v10_0_rlc_funcs_sriov = {
.reset = gfx_v10_0_rlc_reset,
.start = gfx_v10_0_rlc_start,
.update_spm_vmid = gfx_v10_0_update_spm_vmid,
- .rlcg_wreg = gfx_v10_rlcg_wreg,
.is_rlcg_access_range = gfx_v10_0_is_rlcg_access_range,
};
-static int gfx_v10_0_set_powergating_state(void *handle,
+static int gfx_v10_0_set_powergating_state(struct amdgpu_ip_block *ip_block,
enum amd_powergating_state state)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
bool enable = (state == AMD_PG_STATE_GATE);
if (amdgpu_sriov_vf(adev))
return 0;
- switch (adev->asic_type) {
- case CHIP_NAVI10:
- case CHIP_NAVI14:
- case CHIP_NAVI12:
- case CHIP_SIENNA_CICHLID:
- case CHIP_NAVY_FLOUNDER:
- case CHIP_DIMGREY_CAVEFISH:
+ switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
+ case IP_VERSION(10, 1, 10):
+ case IP_VERSION(10, 1, 1):
+ case IP_VERSION(10, 1, 2):
+ case IP_VERSION(10, 3, 0):
+ case IP_VERSION(10, 3, 2):
+ case IP_VERSION(10, 3, 4):
+ case IP_VERSION(10, 3, 5):
amdgpu_gfx_off_ctrl(adev, enable);
break;
- case CHIP_VANGOGH:
+ case IP_VERSION(10, 3, 1):
+ case IP_VERSION(10, 3, 3):
+ case IP_VERSION(10, 3, 6):
+ case IP_VERSION(10, 3, 7):
+ if (!enable)
+ amdgpu_gfx_off_ctrl(adev, false);
+
gfx_v10_cntl_pg(adev, enable);
- amdgpu_gfx_off_ctrl(adev, enable);
+
+ if (enable)
+ amdgpu_gfx_off_ctrl(adev, true);
+
break;
default:
break;
@@ -7923,22 +8466,26 @@ static int gfx_v10_0_set_powergating_state(void *handle,
return 0;
}
-static int gfx_v10_0_set_clockgating_state(void *handle,
+static int gfx_v10_0_set_clockgating_state(struct amdgpu_ip_block *ip_block,
enum amd_clockgating_state state)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
if (amdgpu_sriov_vf(adev))
return 0;
- switch (adev->asic_type) {
- case CHIP_NAVI10:
- case CHIP_NAVI14:
- case CHIP_NAVI12:
- case CHIP_SIENNA_CICHLID:
- case CHIP_NAVY_FLOUNDER:
- case CHIP_VANGOGH:
- case CHIP_DIMGREY_CAVEFISH:
+ switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
+ case IP_VERSION(10, 1, 10):
+ case IP_VERSION(10, 1, 1):
+ case IP_VERSION(10, 1, 2):
+ case IP_VERSION(10, 3, 0):
+ case IP_VERSION(10, 3, 2):
+ case IP_VERSION(10, 3, 1):
+ case IP_VERSION(10, 3, 4):
+ case IP_VERSION(10, 3, 5):
+ case IP_VERSION(10, 3, 6):
+ case IP_VERSION(10, 3, 3):
+ case IP_VERSION(10, 3, 7):
gfx_v10_0_update_gfx_clock_gating(adev,
state == AMD_CG_STATE_GATE);
break;
@@ -7948,9 +8495,9 @@ static int gfx_v10_0_set_clockgating_state(void *handle,
return 0;
}
-static void gfx_v10_0_get_clockgating_state(void *handle, u32 *flags)
+static void gfx_v10_0_get_clockgating_state(struct amdgpu_ip_block *ip_block, u64 *flags)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
int data;
/* AMD_CG_SUPPORT_GFX_FGCG */
@@ -7994,7 +8541,8 @@ static void gfx_v10_0_get_clockgating_state(void *handle, u32 *flags)
static u64 gfx_v10_0_ring_get_rptr_gfx(struct amdgpu_ring *ring)
{
- return ring->adev->wb.wb[ring->rptr_offs]; /* gfx10 is 32bit rptr*/
+ /* gfx10 is 32bit rptr*/
+ return *(uint32_t *)ring->rptr_cpu_addr;
}
static u64 gfx_v10_0_ring_get_wptr_gfx(struct amdgpu_ring *ring)
@@ -8004,7 +8552,7 @@ static u64 gfx_v10_0_ring_get_wptr_gfx(struct amdgpu_ring *ring)
/* XXX check if swapping is necessary on BE */
if (ring->use_doorbell) {
- wptr = atomic64_read((atomic64_t *)&adev->wb.wb[ring->wptr_offs]);
+ wptr = atomic64_read((atomic64_t *)ring->wptr_cpu_addr);
} else {
wptr = RREG32_SOC15(GC, 0, mmCP_RB0_WPTR);
wptr += (u64)RREG32_SOC15(GC, 0, mmCP_RB0_WPTR_HI) << 32;
@@ -8019,17 +8567,21 @@ static void gfx_v10_0_ring_set_wptr_gfx(struct amdgpu_ring *ring)
if (ring->use_doorbell) {
/* XXX check if swapping is necessary on BE */
- atomic64_set((atomic64_t *)&adev->wb.wb[ring->wptr_offs], ring->wptr);
+ atomic64_set((atomic64_t *)ring->wptr_cpu_addr,
+ ring->wptr);
WDOORBELL64(ring->doorbell_index, ring->wptr);
} else {
- WREG32_SOC15(GC, 0, mmCP_RB0_WPTR, lower_32_bits(ring->wptr));
- WREG32_SOC15(GC, 0, mmCP_RB0_WPTR_HI, upper_32_bits(ring->wptr));
+ WREG32_SOC15(GC, 0, mmCP_RB0_WPTR,
+ lower_32_bits(ring->wptr));
+ WREG32_SOC15(GC, 0, mmCP_RB0_WPTR_HI,
+ upper_32_bits(ring->wptr));
}
}
static u64 gfx_v10_0_ring_get_rptr_compute(struct amdgpu_ring *ring)
{
- return ring->adev->wb.wb[ring->rptr_offs]; /* gfx10 hardware is 32bit rptr */
+ /* gfx10 hardware is 32bit rptr */
+ return *(uint32_t *)ring->rptr_cpu_addr;
}
static u64 gfx_v10_0_ring_get_wptr_compute(struct amdgpu_ring *ring)
@@ -8038,7 +8590,7 @@ static u64 gfx_v10_0_ring_get_wptr_compute(struct amdgpu_ring *ring)
/* XXX check if swapping is necessary on BE */
if (ring->use_doorbell)
- wptr = atomic64_read((atomic64_t *)&ring->adev->wb.wb[ring->wptr_offs]);
+ wptr = atomic64_read((atomic64_t *)ring->wptr_cpu_addr);
else
BUG();
return wptr;
@@ -8048,9 +8600,9 @@ static void gfx_v10_0_ring_set_wptr_compute(struct amdgpu_ring *ring)
{
struct amdgpu_device *adev = ring->adev;
- /* XXX check if swapping is necessary on BE */
if (ring->use_doorbell) {
- atomic64_set((atomic64_t *)&adev->wb.wb[ring->wptr_offs], ring->wptr);
+ atomic64_set((atomic64_t *)ring->wptr_cpu_addr,
+ ring->wptr);
WDOORBELL64(ring->doorbell_index, ring->wptr);
} else {
BUG(); /* only DOORBELL method supported on gfx10 now */
@@ -8076,7 +8628,7 @@ static void gfx_v10_0_ring_emit_hdp_flush(struct amdgpu_ring *ring)
}
reg_mem_engine = 0;
} else {
- ref_and_mask = nbio_hf_reg->ref_and_mask_cp0;
+ ref_and_mask = nbio_hf_reg->ref_and_mask_cp0 << ring->pipe;
reg_mem_engine = 1; /* pfp */
}
@@ -8091,7 +8643,7 @@ static void gfx_v10_0_ring_emit_ib_gfx(struct amdgpu_ring *ring,
struct amdgpu_ib *ib,
uint32_t flags)
{
- unsigned vmid = AMDGPU_JOB_GET_VMID(job);
+ unsigned int vmid = AMDGPU_JOB_GET_VMID(job);
u32 header, control = 0;
if (ib->flags & AMDGPU_IB_FLAG_CE)
@@ -8101,7 +8653,7 @@ static void gfx_v10_0_ring_emit_ib_gfx(struct amdgpu_ring *ring,
control |= ib->length_dw | (vmid << 24);
- if ((amdgpu_sriov_vf(ring->adev) || amdgpu_mcbp) && (ib->flags & AMDGPU_IB_FLAG_PREEMPT)) {
+ if (ring->adev->gfx.mcbp && (ib->flags & AMDGPU_IB_FLAG_PREEMPT)) {
control |= INDIRECT_BUFFER_PRE_ENB(1);
if (flags & AMDGPU_IB_PREEMPTED)
@@ -8128,7 +8680,7 @@ static void gfx_v10_0_ring_emit_ib_compute(struct amdgpu_ring *ring,
struct amdgpu_ib *ib,
uint32_t flags)
{
- unsigned vmid = AMDGPU_JOB_GET_VMID(job);
+ unsigned int vmid = AMDGPU_JOB_GET_VMID(job);
u32 control = INDIRECT_BUFFER_VALID | ib->length_dw | (vmid << 24);
/* Currently, there is a high possibility to get wave ID mismatch
@@ -8159,7 +8711,7 @@ static void gfx_v10_0_ring_emit_ib_compute(struct amdgpu_ring *ring,
}
static void gfx_v10_0_ring_emit_fence(struct amdgpu_ring *ring, u64 addr,
- u64 seq, unsigned flags)
+ u64 seq, unsigned int flags)
{
bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT;
bool int_sel = flags & AMDGPU_FENCE_FLAG_INT;
@@ -8201,8 +8753,20 @@ static void gfx_v10_0_ring_emit_pipeline_sync(struct amdgpu_ring *ring)
upper_32_bits(addr), seq, 0xffffffff, 4);
}
+static void gfx_v10_0_ring_invalidate_tlbs(struct amdgpu_ring *ring,
+ uint16_t pasid, uint32_t flush_type,
+ bool all_hub, uint8_t dst_sel)
+{
+ amdgpu_ring_write(ring, PACKET3(PACKET3_INVALIDATE_TLBS, 0));
+ amdgpu_ring_write(ring,
+ PACKET3_INVALIDATE_TLBS_DST_SEL(dst_sel) |
+ PACKET3_INVALIDATE_TLBS_ALL_HUB(all_hub) |
+ PACKET3_INVALIDATE_TLBS_PASID(pasid) |
+ PACKET3_INVALIDATE_TLBS_FLUSH_TYPE(flush_type));
+}
+
static void gfx_v10_0_ring_emit_vm_flush(struct amdgpu_ring *ring,
- unsigned vmid, uint64_t pd_addr)
+ unsigned int vmid, uint64_t pd_addr)
{
amdgpu_gmc_emit_flush_gpu_tlb(ring, vmid, pd_addr);
@@ -8252,7 +8816,7 @@ static void gfx_v10_0_ring_emit_cntxcntl(struct amdgpu_ring *ring,
{
uint32_t dw2 = 0;
- if (amdgpu_mcbp || amdgpu_sriov_vf(ring->adev))
+ if (ring->adev->gfx.mcbp)
gfx_v10_0_ring_emit_ce_meta(ring,
(!amdgpu_sriov_vf(ring->adev) && flags & AMDGPU_IB_PREEMPTED) ? true : false);
@@ -8281,38 +8845,28 @@ static void gfx_v10_0_ring_emit_cntxcntl(struct amdgpu_ring *ring,
amdgpu_ring_write(ring, 0);
}
-static unsigned gfx_v10_0_ring_emit_init_cond_exec(struct amdgpu_ring *ring)
+static unsigned int gfx_v10_0_ring_emit_init_cond_exec(struct amdgpu_ring *ring,
+ uint64_t addr)
{
- unsigned ret;
+ unsigned int ret;
amdgpu_ring_write(ring, PACKET3(PACKET3_COND_EXEC, 3));
- amdgpu_ring_write(ring, lower_32_bits(ring->cond_exe_gpu_addr));
- amdgpu_ring_write(ring, upper_32_bits(ring->cond_exe_gpu_addr));
- amdgpu_ring_write(ring, 0); /* discard following DWs if *cond_exec_gpu_addr==0 */
+ amdgpu_ring_write(ring, lower_32_bits(addr));
+ amdgpu_ring_write(ring, upper_32_bits(addr));
+ /* discard following DWs if *cond_exec_gpu_addr==0 */
+ amdgpu_ring_write(ring, 0);
ret = ring->wptr & ring->buf_mask;
- amdgpu_ring_write(ring, 0x55aa55aa); /* patch dummy value later */
+ /* patch dummy value later */
+ amdgpu_ring_write(ring, 0);
return ret;
}
-static void gfx_v10_0_ring_emit_patch_cond_exec(struct amdgpu_ring *ring, unsigned offset)
-{
- unsigned cur;
- BUG_ON(offset > ring->buf_mask);
- BUG_ON(ring->ring[offset] != 0x55aa55aa);
-
- cur = (ring->wptr - 1) & ring->buf_mask;
- if (likely(cur > offset))
- ring->ring[offset] = cur - offset;
- else
- ring->ring[offset] = (ring->buf_mask + 1) - offset + cur;
-}
-
static int gfx_v10_0_ring_preempt_ib(struct amdgpu_ring *ring)
{
int i, r = 0;
struct amdgpu_device *adev = ring->adev;
- struct amdgpu_kiq *kiq = &adev->gfx.kiq;
+ struct amdgpu_kiq *kiq = &adev->gfx.kiq[0];
struct amdgpu_ring *kiq_ring = &kiq->ring;
unsigned long flags;
@@ -8359,26 +8913,26 @@ static void gfx_v10_0_ring_emit_ce_meta(struct amdgpu_ring *ring, bool resume)
{
struct amdgpu_device *adev = ring->adev;
struct v10_ce_ib_state ce_payload = {0};
- uint64_t csa_addr;
+ uint64_t offset, ce_payload_gpu_addr;
+ void *ce_payload_cpu_addr;
int cnt;
cnt = (sizeof(ce_payload) >> 2) + 4 - 2;
- csa_addr = amdgpu_csa_vaddr(ring->adev);
+
+ offset = offsetof(struct v10_gfx_meta_data, ce_payload);
+ ce_payload_gpu_addr = amdgpu_csa_vaddr(ring->adev) + offset;
+ ce_payload_cpu_addr = adev->virt.csa_cpu_addr + offset;
amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, cnt));
amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(2) |
WRITE_DATA_DST_SEL(8) |
WR_CONFIRM) |
WRITE_DATA_CACHE_POLICY(0));
- amdgpu_ring_write(ring, lower_32_bits(csa_addr +
- offsetof(struct v10_gfx_meta_data, ce_payload)));
- amdgpu_ring_write(ring, upper_32_bits(csa_addr +
- offsetof(struct v10_gfx_meta_data, ce_payload)));
+ amdgpu_ring_write(ring, lower_32_bits(ce_payload_gpu_addr));
+ amdgpu_ring_write(ring, upper_32_bits(ce_payload_gpu_addr));
if (resume)
- amdgpu_ring_write_multiple(ring, adev->virt.csa_cpu_addr +
- offsetof(struct v10_gfx_meta_data,
- ce_payload),
+ amdgpu_ring_write_multiple(ring, ce_payload_cpu_addr,
sizeof(ce_payload) >> 2);
else
amdgpu_ring_write_multiple(ring, (void *)&ce_payload,
@@ -8389,12 +8943,18 @@ static void gfx_v10_0_ring_emit_de_meta(struct amdgpu_ring *ring, bool resume)
{
struct amdgpu_device *adev = ring->adev;
struct v10_de_ib_state de_payload = {0};
- uint64_t csa_addr, gds_addr;
+ uint64_t offset, gds_addr, de_payload_gpu_addr;
+ void *de_payload_cpu_addr;
int cnt;
- csa_addr = amdgpu_csa_vaddr(ring->adev);
- gds_addr = ALIGN(csa_addr + AMDGPU_CSA_SIZE - adev->gds.gds_size,
+ offset = offsetof(struct v10_gfx_meta_data, de_payload);
+ de_payload_gpu_addr = amdgpu_csa_vaddr(ring->adev) + offset;
+ de_payload_cpu_addr = adev->virt.csa_cpu_addr + offset;
+
+ gds_addr = ALIGN(amdgpu_csa_vaddr(ring->adev) +
+ AMDGPU_CSA_SIZE - adev->gds.gds_size,
PAGE_SIZE);
+
de_payload.gds_backup_addrlo = lower_32_bits(gds_addr);
de_payload.gds_backup_addrhi = upper_32_bits(gds_addr);
@@ -8404,15 +8964,11 @@ static void gfx_v10_0_ring_emit_de_meta(struct amdgpu_ring *ring, bool resume)
WRITE_DATA_DST_SEL(8) |
WR_CONFIRM) |
WRITE_DATA_CACHE_POLICY(0));
- amdgpu_ring_write(ring, lower_32_bits(csa_addr +
- offsetof(struct v10_gfx_meta_data, de_payload)));
- amdgpu_ring_write(ring, upper_32_bits(csa_addr +
- offsetof(struct v10_gfx_meta_data, de_payload)));
+ amdgpu_ring_write(ring, lower_32_bits(de_payload_gpu_addr));
+ amdgpu_ring_write(ring, upper_32_bits(de_payload_gpu_addr));
if (resume)
- amdgpu_ring_write_multiple(ring, adev->virt.csa_cpu_addr +
- offsetof(struct v10_gfx_meta_data,
- de_payload),
+ amdgpu_ring_write_multiple(ring, de_payload_cpu_addr,
sizeof(de_payload) >> 2);
else
amdgpu_ring_write_multiple(ring, (void *)&de_payload,
@@ -8492,19 +9048,6 @@ static void gfx_v10_0_ring_emit_reg_write_reg_wait(struct amdgpu_ring *ring,
ref, mask);
}
-static void gfx_v10_0_ring_soft_recovery(struct amdgpu_ring *ring,
- unsigned vmid)
-{
- struct amdgpu_device *adev = ring->adev;
- uint32_t value = 0;
-
- value = REG_SET_FIELD(value, SQ_CMD, CMD, 0x03);
- value = REG_SET_FIELD(value, SQ_CMD, MODE, 0x01);
- value = REG_SET_FIELD(value, SQ_CMD, CHECK_VMID, 1);
- value = REG_SET_FIELD(value, SQ_CMD, VM_ID, vmid);
- WREG32_SOC15(GC, 0, mmSQ_CMD, value);
-}
-
static void
gfx_v10_0_set_gfx_eop_interrupt_state(struct amdgpu_device *adev,
uint32_t me, uint32_t pipe,
@@ -8531,16 +9074,16 @@ gfx_v10_0_set_gfx_eop_interrupt_state(struct amdgpu_device *adev,
switch (state) {
case AMDGPU_IRQ_STATE_DISABLE:
- cp_int_cntl = RREG32(cp_int_cntl_reg);
+ cp_int_cntl = RREG32_SOC15_IP(GC, cp_int_cntl_reg);
cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_INT_CNTL_RING0,
TIME_STAMP_INT_ENABLE, 0);
- WREG32(cp_int_cntl_reg, cp_int_cntl);
+ WREG32_SOC15_IP(GC, cp_int_cntl_reg, cp_int_cntl);
break;
case AMDGPU_IRQ_STATE_ENABLE:
- cp_int_cntl = RREG32(cp_int_cntl_reg);
+ cp_int_cntl = RREG32_SOC15_IP(GC, cp_int_cntl_reg);
cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_INT_CNTL_RING0,
TIME_STAMP_INT_ENABLE, 1);
- WREG32(cp_int_cntl_reg, cp_int_cntl);
+ WREG32_SOC15_IP(GC, cp_int_cntl_reg, cp_int_cntl);
break;
default:
break;
@@ -8584,16 +9127,16 @@ static void gfx_v10_0_set_compute_eop_interrupt_state(struct amdgpu_device *adev
switch (state) {
case AMDGPU_IRQ_STATE_DISABLE:
- mec_int_cntl = RREG32(mec_int_cntl_reg);
+ mec_int_cntl = RREG32_SOC15_IP(GC, mec_int_cntl_reg);
mec_int_cntl = REG_SET_FIELD(mec_int_cntl, CP_ME1_PIPE0_INT_CNTL,
TIME_STAMP_INT_ENABLE, 0);
- WREG32(mec_int_cntl_reg, mec_int_cntl);
+ WREG32_SOC15_IP(GC, mec_int_cntl_reg, mec_int_cntl);
break;
case AMDGPU_IRQ_STATE_ENABLE:
- mec_int_cntl = RREG32(mec_int_cntl_reg);
+ mec_int_cntl = RREG32_SOC15_IP(GC, mec_int_cntl_reg);
mec_int_cntl = REG_SET_FIELD(mec_int_cntl, CP_ME1_PIPE0_INT_CNTL,
TIME_STAMP_INT_ENABLE, 1);
- WREG32(mec_int_cntl_reg, mec_int_cntl);
+ WREG32_SOC15_IP(GC, mec_int_cntl_reg, mec_int_cntl);
break;
default:
break;
@@ -8602,7 +9145,7 @@ static void gfx_v10_0_set_compute_eop_interrupt_state(struct amdgpu_device *adev
static int gfx_v10_0_set_eop_interrupt_state(struct amdgpu_device *adev,
struct amdgpu_irq_src *src,
- unsigned type,
+ unsigned int type,
enum amdgpu_interrupt_state state)
{
switch (type) {
@@ -8651,6 +9194,7 @@ static int gfx_v10_0_eop_irq(struct amdgpu_device *adev,
struct amdgpu_ring *ring;
DRM_DEBUG("IH: CP EOP\n");
+
me_id = (entry->ring_id & 0x0c) >> 2;
pipe_id = (entry->ring_id & 0x03) >> 0;
queue_id = (entry->ring_id & 0x70) >> 4;
@@ -8667,27 +9211,58 @@ static int gfx_v10_0_eop_irq(struct amdgpu_device *adev,
for (i = 0; i < adev->gfx.num_compute_rings; i++) {
ring = &adev->gfx.compute_ring[i];
/* Per-queue interrupt is supported for MEC starting from VI.
- * The interrupt can only be enabled/disabled per pipe instead of per queue.
- */
- if ((ring->me == me_id) && (ring->pipe == pipe_id) && (ring->queue == queue_id))
+ * The interrupt can only be enabled/disabled per pipe instead
+ * of per queue.
+ */
+ if ((ring->me == me_id) &&
+ (ring->pipe == pipe_id) &&
+ (ring->queue == queue_id))
amdgpu_fence_process(ring);
}
break;
}
+
return 0;
}
static int gfx_v10_0_set_priv_reg_fault_state(struct amdgpu_device *adev,
struct amdgpu_irq_src *source,
- unsigned type,
+ unsigned int type,
enum amdgpu_interrupt_state state)
{
+ u32 cp_int_cntl_reg, cp_int_cntl;
+ int i, j;
+
switch (state) {
case AMDGPU_IRQ_STATE_DISABLE:
case AMDGPU_IRQ_STATE_ENABLE:
- WREG32_FIELD15(GC, 0, CP_INT_CNTL_RING0,
- PRIV_REG_INT_ENABLE,
- state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0);
+ for (i = 0; i < adev->gfx.me.num_me; i++) {
+ for (j = 0; j < adev->gfx.me.num_pipe_per_me; j++) {
+ cp_int_cntl_reg = gfx_v10_0_get_cpg_int_cntl(adev, i, j);
+
+ if (cp_int_cntl_reg) {
+ cp_int_cntl = RREG32_SOC15_IP(GC, cp_int_cntl_reg);
+ cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_INT_CNTL_RING0,
+ PRIV_REG_INT_ENABLE,
+ state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0);
+ WREG32_SOC15_IP(GC, cp_int_cntl_reg, cp_int_cntl);
+ }
+ }
+ }
+ for (i = 0; i < adev->gfx.mec.num_mec; i++) {
+ for (j = 0; j < adev->gfx.mec.num_pipe_per_mec; j++) {
+ /* MECs start at 1 */
+ cp_int_cntl_reg = gfx_v10_0_get_cpc_int_cntl(adev, i + 1, j);
+
+ if (cp_int_cntl_reg) {
+ cp_int_cntl = RREG32_SOC15_IP(GC, cp_int_cntl_reg);
+ cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_ME1_PIPE0_INT_CNTL,
+ PRIV_REG_INT_ENABLE,
+ state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0);
+ WREG32_SOC15_IP(GC, cp_int_cntl_reg, cp_int_cntl);
+ }
+ }
+ }
break;
default:
break;
@@ -8696,17 +9271,75 @@ static int gfx_v10_0_set_priv_reg_fault_state(struct amdgpu_device *adev,
return 0;
}
+static int gfx_v10_0_set_bad_op_fault_state(struct amdgpu_device *adev,
+ struct amdgpu_irq_src *source,
+ unsigned type,
+ enum amdgpu_interrupt_state state)
+{
+ u32 cp_int_cntl_reg, cp_int_cntl;
+ int i, j;
+
+ switch (state) {
+ case AMDGPU_IRQ_STATE_DISABLE:
+ case AMDGPU_IRQ_STATE_ENABLE:
+ for (i = 0; i < adev->gfx.me.num_me; i++) {
+ for (j = 0; j < adev->gfx.me.num_pipe_per_me; j++) {
+ cp_int_cntl_reg = gfx_v10_0_get_cpg_int_cntl(adev, i, j);
+
+ if (cp_int_cntl_reg) {
+ cp_int_cntl = RREG32_SOC15_IP(GC, cp_int_cntl_reg);
+ cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_INT_CNTL_RING0,
+ OPCODE_ERROR_INT_ENABLE,
+ state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0);
+ WREG32_SOC15_IP(GC, cp_int_cntl_reg, cp_int_cntl);
+ }
+ }
+ }
+ for (i = 0; i < adev->gfx.mec.num_mec; i++) {
+ for (j = 0; j < adev->gfx.mec.num_pipe_per_mec; j++) {
+ /* MECs start at 1 */
+ cp_int_cntl_reg = gfx_v10_0_get_cpc_int_cntl(adev, i + 1, j);
+
+ if (cp_int_cntl_reg) {
+ cp_int_cntl = RREG32_SOC15_IP(GC, cp_int_cntl_reg);
+ cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_ME1_PIPE0_INT_CNTL,
+ OPCODE_ERROR_INT_ENABLE,
+ state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0);
+ WREG32_SOC15_IP(GC, cp_int_cntl_reg, cp_int_cntl);
+ }
+ }
+ }
+ break;
+ default:
+ break;
+ }
+ return 0;
+}
+
static int gfx_v10_0_set_priv_inst_fault_state(struct amdgpu_device *adev,
struct amdgpu_irq_src *source,
- unsigned type,
+ unsigned int type,
enum amdgpu_interrupt_state state)
{
+ u32 cp_int_cntl_reg, cp_int_cntl;
+ int i, j;
+
switch (state) {
case AMDGPU_IRQ_STATE_DISABLE:
case AMDGPU_IRQ_STATE_ENABLE:
- WREG32_FIELD15(GC, 0, CP_INT_CNTL_RING0,
- PRIV_INSTR_INT_ENABLE,
- state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0);
+ for (i = 0; i < adev->gfx.me.num_me; i++) {
+ for (j = 0; j < adev->gfx.me.num_pipe_per_me; j++) {
+ cp_int_cntl_reg = gfx_v10_0_get_cpg_int_cntl(adev, i, j);
+
+ if (cp_int_cntl_reg) {
+ cp_int_cntl = RREG32_SOC15_IP(GC, cp_int_cntl_reg);
+ cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_INT_CNTL_RING0,
+ PRIV_INSTR_INT_ENABLE,
+ state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0);
+ WREG32_SOC15_IP(GC, cp_int_cntl_reg, cp_int_cntl);
+ }
+ }
+ }
break;
default:
break;
@@ -8730,8 +9363,8 @@ static void gfx_v10_0_handle_priv_fault(struct amdgpu_device *adev,
case 0:
for (i = 0; i < adev->gfx.num_gfx_rings; i++) {
ring = &adev->gfx.gfx_ring[i];
- /* we only enabled 1 gfx queue per pipe for now */
- if (ring->me == me_id && ring->pipe == pipe_id)
+ if (ring->me == me_id && ring->pipe == pipe_id &&
+ ring->queue == queue_id)
drm_sched_fault(&ring->sched);
}
break;
@@ -8758,6 +9391,15 @@ static int gfx_v10_0_priv_reg_irq(struct amdgpu_device *adev,
return 0;
}
+static int gfx_v10_0_bad_op_irq(struct amdgpu_device *adev,
+ struct amdgpu_irq_src *source,
+ struct amdgpu_iv_entry *entry)
+{
+ DRM_ERROR("Illegal opcode in command stream \n");
+ gfx_v10_0_handle_priv_fault(adev, entry);
+ return 0;
+}
+
static int gfx_v10_0_priv_inst_irq(struct amdgpu_device *adev,
struct amdgpu_irq_src *source,
struct amdgpu_iv_entry *entry)
@@ -8773,7 +9415,7 @@ static int gfx_v10_0_kiq_set_interrupt_state(struct amdgpu_device *adev,
enum amdgpu_interrupt_state state)
{
uint32_t tmp, target;
- struct amdgpu_ring *ring = &(adev->gfx.kiq.ring);
+ struct amdgpu_ring *ring = &(adev->gfx.kiq[0].ring);
if (ring->me == 1)
target = SOC15_REG_OFFSET(GC, 0, mmCP_ME1_PIPE0_INT_CNTL);
@@ -8789,20 +9431,20 @@ static int gfx_v10_0_kiq_set_interrupt_state(struct amdgpu_device *adev,
GENERIC2_INT_ENABLE, 0);
WREG32_SOC15(GC, 0, mmCPC_INT_CNTL, tmp);
- tmp = RREG32(target);
+ tmp = RREG32_SOC15_IP(GC, target);
tmp = REG_SET_FIELD(tmp, CP_ME2_PIPE0_INT_CNTL,
GENERIC2_INT_ENABLE, 0);
- WREG32(target, tmp);
+ WREG32_SOC15_IP(GC, target, tmp);
} else {
tmp = RREG32_SOC15(GC, 0, mmCPC_INT_CNTL);
tmp = REG_SET_FIELD(tmp, CPC_INT_CNTL,
GENERIC2_INT_ENABLE, 1);
WREG32_SOC15(GC, 0, mmCPC_INT_CNTL, tmp);
- tmp = RREG32(target);
+ tmp = RREG32_SOC15_IP(GC, target);
tmp = REG_SET_FIELD(tmp, CP_ME2_PIPE0_INT_CNTL,
GENERIC2_INT_ENABLE, 1);
- WREG32(target, tmp);
+ WREG32_SOC15_IP(GC, target, tmp);
}
break;
default:
@@ -8817,7 +9459,7 @@ static int gfx_v10_0_kiq_irq(struct amdgpu_device *adev,
struct amdgpu_iv_entry *entry)
{
u8 me_id, pipe_id, queue_id;
- struct amdgpu_ring *ring = &(adev->gfx.kiq.ring);
+ struct amdgpu_ring *ring = &(adev->gfx.kiq[0].ring);
me_id = (entry->ring_id & 0x0c) >> 2;
pipe_id = (entry->ring_id & 0x03) >> 0;
@@ -8852,6 +9494,319 @@ static void gfx_v10_0_emit_mem_sync(struct amdgpu_ring *ring)
amdgpu_ring_write(ring, gcr_cntl); /* GCR_CNTL */
}
+static void gfx_v10_ring_insert_nop(struct amdgpu_ring *ring, uint32_t num_nop)
+{
+ /* Header itself is a NOP packet */
+ if (num_nop == 1) {
+ amdgpu_ring_write(ring, ring->funcs->nop);
+ return;
+ }
+
+ /* Max HW optimization till 0x3ffe, followed by remaining one NOP at a time*/
+ amdgpu_ring_write(ring, PACKET3(PACKET3_NOP, min(num_nop - 2, 0x3ffe)));
+
+ /* Header is at index 0, followed by num_nops - 1 NOP packet's */
+ amdgpu_ring_insert_nop(ring, num_nop - 1);
+}
+
+static int gfx_v10_0_reset_kgq(struct amdgpu_ring *ring,
+ unsigned int vmid,
+ struct amdgpu_fence *timedout_fence)
+{
+ struct amdgpu_device *adev = ring->adev;
+ struct amdgpu_kiq *kiq = &adev->gfx.kiq[0];
+ struct amdgpu_ring *kiq_ring = &kiq->ring;
+ unsigned long flags;
+ u32 tmp;
+ u64 addr;
+ int r;
+
+ if (!kiq->pmf || !kiq->pmf->kiq_unmap_queues)
+ return -EINVAL;
+
+ amdgpu_ring_reset_helper_begin(ring, timedout_fence);
+
+ spin_lock_irqsave(&kiq->ring_lock, flags);
+
+ if (amdgpu_ring_alloc(kiq_ring, 5 + 7 + 7)) {
+ spin_unlock_irqrestore(&kiq->ring_lock, flags);
+ return -ENOMEM;
+ }
+
+ addr = amdgpu_bo_gpu_offset(ring->mqd_obj) +
+ offsetof(struct v10_gfx_mqd, cp_gfx_hqd_active);
+ tmp = REG_SET_FIELD(0, CP_VMID_RESET, RESET_REQUEST, 1 << vmid);
+ if (ring->pipe == 0)
+ tmp = REG_SET_FIELD(tmp, CP_VMID_RESET, PIPE0_QUEUES, 1 << ring->queue);
+ else
+ tmp = REG_SET_FIELD(tmp, CP_VMID_RESET, PIPE1_QUEUES, 1 << ring->queue);
+
+ gfx_v10_0_ring_emit_wreg(kiq_ring,
+ SOC15_REG_OFFSET(GC, 0, mmCP_VMID_RESET), tmp);
+ gfx_v10_0_wait_reg_mem(kiq_ring, 0, 1, 0,
+ lower_32_bits(addr), upper_32_bits(addr),
+ 0, 1, 0x20);
+ gfx_v10_0_ring_emit_reg_wait(kiq_ring,
+ SOC15_REG_OFFSET(GC, 0, mmCP_VMID_RESET), 0, 0xffffffff);
+ amdgpu_ring_commit(kiq_ring);
+ r = amdgpu_ring_test_ring(kiq_ring);
+ spin_unlock_irqrestore(&kiq->ring_lock, flags);
+ if (r)
+ return r;
+
+ r = gfx_v10_0_kgq_init_queue(ring, true);
+ if (r) {
+ DRM_ERROR("fail to init kgq\n");
+ return r;
+ }
+
+ spin_lock_irqsave(&kiq->ring_lock, flags);
+
+ if (amdgpu_ring_alloc(kiq_ring, kiq->pmf->map_queues_size)) {
+ spin_unlock_irqrestore(&kiq->ring_lock, flags);
+ return -ENOMEM;
+ }
+ kiq->pmf->kiq_map_queues(kiq_ring, ring);
+ amdgpu_ring_commit(kiq_ring);
+ r = amdgpu_ring_test_ring(kiq_ring);
+ spin_unlock_irqrestore(&kiq->ring_lock, flags);
+ if (r)
+ return r;
+
+ return amdgpu_ring_reset_helper_end(ring, timedout_fence);
+}
+
+static int gfx_v10_0_reset_kcq(struct amdgpu_ring *ring,
+ unsigned int vmid,
+ struct amdgpu_fence *timedout_fence)
+{
+ struct amdgpu_device *adev = ring->adev;
+ struct amdgpu_kiq *kiq = &adev->gfx.kiq[0];
+ struct amdgpu_ring *kiq_ring = &kiq->ring;
+ unsigned long flags;
+ int i, r;
+
+ if (!kiq->pmf || !kiq->pmf->kiq_unmap_queues)
+ return -EINVAL;
+
+ amdgpu_ring_reset_helper_begin(ring, timedout_fence);
+
+ spin_lock_irqsave(&kiq->ring_lock, flags);
+
+ if (amdgpu_ring_alloc(kiq_ring, kiq->pmf->unmap_queues_size)) {
+ spin_unlock_irqrestore(&kiq->ring_lock, flags);
+ return -ENOMEM;
+ }
+
+ kiq->pmf->kiq_unmap_queues(kiq_ring, ring, RESET_QUEUES,
+ 0, 0);
+ amdgpu_ring_commit(kiq_ring);
+ r = amdgpu_ring_test_ring(kiq_ring);
+ spin_unlock_irqrestore(&kiq->ring_lock, flags);
+ if (r)
+ return r;
+
+ /* make sure dequeue is complete*/
+ amdgpu_gfx_rlc_enter_safe_mode(adev, 0);
+ mutex_lock(&adev->srbm_mutex);
+ nv_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
+ for (i = 0; i < adev->usec_timeout; i++) {
+ if (!(RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE) & 1))
+ break;
+ udelay(1);
+ }
+ if (i >= adev->usec_timeout)
+ r = -ETIMEDOUT;
+ nv_grbm_select(adev, 0, 0, 0, 0);
+ mutex_unlock(&adev->srbm_mutex);
+ amdgpu_gfx_rlc_exit_safe_mode(adev, 0);
+ if (r) {
+ dev_err(adev->dev, "fail to wait on hqd deactivate\n");
+ return r;
+ }
+
+ r = gfx_v10_0_kcq_init_queue(ring, true);
+ if (r) {
+ dev_err(adev->dev, "fail to init kcq\n");
+ return r;
+ }
+
+ spin_lock_irqsave(&kiq->ring_lock, flags);
+ if (amdgpu_ring_alloc(kiq_ring, kiq->pmf->map_queues_size)) {
+ spin_unlock_irqrestore(&kiq->ring_lock, flags);
+ return -ENOMEM;
+ }
+ kiq->pmf->kiq_map_queues(kiq_ring, ring);
+ amdgpu_ring_commit(kiq_ring);
+ r = amdgpu_ring_test_ring(kiq_ring);
+ spin_unlock_irqrestore(&kiq->ring_lock, flags);
+ if (r)
+ return r;
+
+ return amdgpu_ring_reset_helper_end(ring, timedout_fence);
+}
+
+static void gfx_v10_ip_print(struct amdgpu_ip_block *ip_block, struct drm_printer *p)
+{
+ struct amdgpu_device *adev = ip_block->adev;
+ uint32_t i, j, k, reg, index = 0;
+ uint32_t reg_count = ARRAY_SIZE(gc_reg_list_10_1);
+
+ if (!adev->gfx.ip_dump_core)
+ return;
+
+ for (i = 0; i < reg_count; i++)
+ drm_printf(p, "%-50s \t 0x%08x\n",
+ gc_reg_list_10_1[i].reg_name,
+ adev->gfx.ip_dump_core[i]);
+
+ /* print compute queue registers for all instances */
+ if (!adev->gfx.ip_dump_compute_queues)
+ return;
+
+ reg_count = ARRAY_SIZE(gc_cp_reg_list_10);
+ drm_printf(p, "\nnum_mec: %d num_pipe: %d num_queue: %d\n",
+ adev->gfx.mec.num_mec,
+ adev->gfx.mec.num_pipe_per_mec,
+ adev->gfx.mec.num_queue_per_pipe);
+
+ for (i = 0; i < adev->gfx.mec.num_mec; i++) {
+ for (j = 0; j < adev->gfx.mec.num_pipe_per_mec; j++) {
+ for (k = 0; k < adev->gfx.mec.num_queue_per_pipe; k++) {
+ drm_printf(p, "\nmec %d, pipe %d, queue %d\n", i, j, k);
+ for (reg = 0; reg < reg_count; reg++) {
+ if (i && gc_cp_reg_list_10[reg].reg_offset == mmCP_MEC_ME1_HEADER_DUMP)
+ drm_printf(p, "%-50s \t 0x%08x\n",
+ "mmCP_MEC_ME2_HEADER_DUMP",
+ adev->gfx.ip_dump_compute_queues[index + reg]);
+ else
+ drm_printf(p, "%-50s \t 0x%08x\n",
+ gc_cp_reg_list_10[reg].reg_name,
+ adev->gfx.ip_dump_compute_queues[index + reg]);
+ }
+ index += reg_count;
+ }
+ }
+ }
+
+ /* print gfx queue registers for all instances */
+ if (!adev->gfx.ip_dump_gfx_queues)
+ return;
+
+ index = 0;
+ reg_count = ARRAY_SIZE(gc_gfx_queue_reg_list_10);
+ drm_printf(p, "\nnum_me: %d num_pipe: %d num_queue: %d\n",
+ adev->gfx.me.num_me,
+ adev->gfx.me.num_pipe_per_me,
+ adev->gfx.me.num_queue_per_pipe);
+
+ for (i = 0; i < adev->gfx.me.num_me; i++) {
+ for (j = 0; j < adev->gfx.me.num_pipe_per_me; j++) {
+ for (k = 0; k < adev->gfx.me.num_queue_per_pipe; k++) {
+ drm_printf(p, "\nme %d, pipe %d, queue %d\n", i, j, k);
+ for (reg = 0; reg < reg_count; reg++) {
+ drm_printf(p, "%-50s \t 0x%08x\n",
+ gc_gfx_queue_reg_list_10[reg].reg_name,
+ adev->gfx.ip_dump_gfx_queues[index + reg]);
+ }
+ index += reg_count;
+ }
+ }
+ }
+}
+
+static void gfx_v10_ip_dump(struct amdgpu_ip_block *ip_block)
+{
+ struct amdgpu_device *adev = ip_block->adev;
+ uint32_t i, j, k, reg, index = 0;
+ uint32_t reg_count = ARRAY_SIZE(gc_reg_list_10_1);
+
+ if (!adev->gfx.ip_dump_core)
+ return;
+
+ amdgpu_gfx_off_ctrl(adev, false);
+ for (i = 0; i < reg_count; i++)
+ adev->gfx.ip_dump_core[i] = RREG32(SOC15_REG_ENTRY_OFFSET(gc_reg_list_10_1[i]));
+ amdgpu_gfx_off_ctrl(adev, true);
+
+ /* dump compute queue registers for all instances */
+ if (!adev->gfx.ip_dump_compute_queues)
+ return;
+
+ reg_count = ARRAY_SIZE(gc_cp_reg_list_10);
+ amdgpu_gfx_off_ctrl(adev, false);
+ mutex_lock(&adev->srbm_mutex);
+ for (i = 0; i < adev->gfx.mec.num_mec; i++) {
+ for (j = 0; j < adev->gfx.mec.num_pipe_per_mec; j++) {
+ for (k = 0; k < adev->gfx.mec.num_queue_per_pipe; k++) {
+ /* ME0 is for GFX so start from 1 for CP */
+ nv_grbm_select(adev, adev->gfx.me.num_me + i, j, k, 0);
+
+ for (reg = 0; reg < reg_count; reg++) {
+ if (i && gc_cp_reg_list_10[reg].reg_offset == mmCP_MEC_ME1_HEADER_DUMP)
+ adev->gfx.ip_dump_compute_queues[index + reg] =
+ RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_MEC_ME2_HEADER_DUMP));
+ else
+ adev->gfx.ip_dump_compute_queues[index + reg] =
+ RREG32(SOC15_REG_ENTRY_OFFSET(
+ gc_cp_reg_list_10[reg]));
+ }
+ index += reg_count;
+ }
+ }
+ }
+ nv_grbm_select(adev, 0, 0, 0, 0);
+ mutex_unlock(&adev->srbm_mutex);
+ amdgpu_gfx_off_ctrl(adev, true);
+
+ /* dump gfx queue registers for all instances */
+ if (!adev->gfx.ip_dump_gfx_queues)
+ return;
+
+ index = 0;
+ reg_count = ARRAY_SIZE(gc_gfx_queue_reg_list_10);
+ amdgpu_gfx_off_ctrl(adev, false);
+ mutex_lock(&adev->srbm_mutex);
+ for (i = 0; i < adev->gfx.me.num_me; i++) {
+ for (j = 0; j < adev->gfx.me.num_pipe_per_me; j++) {
+ for (k = 0; k < adev->gfx.me.num_queue_per_pipe; k++) {
+ nv_grbm_select(adev, i, j, k, 0);
+
+ for (reg = 0; reg < reg_count; reg++) {
+ adev->gfx.ip_dump_gfx_queues[index + reg] =
+ RREG32(SOC15_REG_ENTRY_OFFSET(
+ gc_gfx_queue_reg_list_10[reg]));
+ }
+ index += reg_count;
+ }
+ }
+ }
+ nv_grbm_select(adev, 0, 0, 0, 0);
+ mutex_unlock(&adev->srbm_mutex);
+ amdgpu_gfx_off_ctrl(adev, true);
+}
+
+static void gfx_v10_0_ring_emit_cleaner_shader(struct amdgpu_ring *ring)
+{
+ /* Emit the cleaner shader */
+ amdgpu_ring_write(ring, PACKET3(PACKET3_RUN_CLEANER_SHADER, 0));
+ amdgpu_ring_write(ring, 0); /* RESERVED field, programmed to zero */
+}
+
+static void gfx_v10_0_ring_begin_use(struct amdgpu_ring *ring)
+{
+ amdgpu_gfx_profile_ring_begin_use(ring);
+
+ amdgpu_gfx_enforce_isolation_ring_begin_use(ring);
+}
+
+static void gfx_v10_0_ring_end_use(struct amdgpu_ring *ring)
+{
+ amdgpu_gfx_profile_ring_end_use(ring);
+
+ amdgpu_gfx_enforce_isolation_ring_end_use(ring);
+}
+
static const struct amd_ip_funcs gfx_v10_0_ip_funcs = {
.name = "gfx_v10_0",
.early_init = gfx_v10_0_early_init,
@@ -8868,6 +9823,8 @@ static const struct amd_ip_funcs gfx_v10_0_ip_funcs = {
.set_clockgating_state = gfx_v10_0_set_clockgating_state,
.set_powergating_state = gfx_v10_0_set_powergating_state,
.get_clockgating_state = gfx_v10_0_get_clockgating_state,
+ .dump_ip_state = gfx_v10_ip_dump,
+ .print_ip_state = gfx_v10_ip_print,
};
static const struct amdgpu_ring_funcs gfx_v10_0_ring_funcs_gfx = {
@@ -8875,7 +9832,7 @@ static const struct amdgpu_ring_funcs gfx_v10_0_ring_funcs_gfx = {
.align_mask = 0xff,
.nop = PACKET3(PACKET3_NOP, 0x3FFF),
.support_64bit_ptrs = true,
- .vmhub = AMDGPU_GFXHUB_0,
+ .secure_submission_supported = true,
.get_rptr = gfx_v10_0_ring_get_rptr_gfx,
.get_wptr = gfx_v10_0_ring_get_wptr_gfx,
.set_wptr = gfx_v10_0_ring_set_wptr_gfx,
@@ -8884,7 +9841,7 @@ static const struct amdgpu_ring_funcs gfx_v10_0_ring_funcs_gfx = {
7 + /* PIPELINE_SYNC */
SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 +
SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 +
- 2 + /* VM_FLUSH */
+ 4 + /* VM_FLUSH */
8 + /* FENCE for VM_FLUSH */
20 + /* GDS switch */
4 + /* double SWITCH_BUFFER,
@@ -8900,7 +9857,8 @@ static const struct amdgpu_ring_funcs gfx_v10_0_ring_funcs_gfx = {
5 + /* HDP_INVL */
8 + 8 + /* FENCE x2 */
2 + /* SWITCH_BUFFER */
- 8, /* gfx_v10_0_emit_mem_sync */
+ 8 + /* gfx_v10_0_emit_mem_sync */
+ 2, /* gfx_v10_0_ring_emit_cleaner_shader */
.emit_ib_size = 4, /* gfx_v10_0_ring_emit_ib_gfx */
.emit_ib = gfx_v10_0_ring_emit_ib_gfx,
.emit_fence = gfx_v10_0_ring_emit_fence,
@@ -8910,19 +9868,21 @@ static const struct amdgpu_ring_funcs gfx_v10_0_ring_funcs_gfx = {
.emit_hdp_flush = gfx_v10_0_ring_emit_hdp_flush,
.test_ring = gfx_v10_0_ring_test_ring,
.test_ib = gfx_v10_0_ring_test_ib,
- .insert_nop = amdgpu_ring_insert_nop,
+ .insert_nop = gfx_v10_ring_insert_nop,
.pad_ib = amdgpu_ring_generic_pad_ib,
.emit_switch_buffer = gfx_v10_0_ring_emit_sb,
.emit_cntxcntl = gfx_v10_0_ring_emit_cntxcntl,
.init_cond_exec = gfx_v10_0_ring_emit_init_cond_exec,
- .patch_cond_exec = gfx_v10_0_ring_emit_patch_cond_exec,
.preempt_ib = gfx_v10_0_ring_preempt_ib,
.emit_frame_cntl = gfx_v10_0_ring_emit_frame_cntl,
.emit_wreg = gfx_v10_0_ring_emit_wreg,
.emit_reg_wait = gfx_v10_0_ring_emit_reg_wait,
.emit_reg_write_reg_wait = gfx_v10_0_ring_emit_reg_write_reg_wait,
- .soft_recovery = gfx_v10_0_ring_soft_recovery,
.emit_mem_sync = gfx_v10_0_emit_mem_sync,
+ .reset = gfx_v10_0_reset_kgq,
+ .emit_cleaner_shader = gfx_v10_0_ring_emit_cleaner_shader,
+ .begin_use = gfx_v10_0_ring_begin_use,
+ .end_use = gfx_v10_0_ring_end_use,
};
static const struct amdgpu_ring_funcs gfx_v10_0_ring_funcs_compute = {
@@ -8930,7 +9890,6 @@ static const struct amdgpu_ring_funcs gfx_v10_0_ring_funcs_compute = {
.align_mask = 0xff,
.nop = PACKET3(PACKET3_NOP, 0x3FFF),
.support_64bit_ptrs = true,
- .vmhub = AMDGPU_GFXHUB_0,
.get_rptr = gfx_v10_0_ring_get_rptr_compute,
.get_wptr = gfx_v10_0_ring_get_wptr_compute,
.set_wptr = gfx_v10_0_ring_set_wptr_compute,
@@ -8943,7 +9902,8 @@ static const struct amdgpu_ring_funcs gfx_v10_0_ring_funcs_compute = {
SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 +
2 + /* gfx_v10_0_ring_emit_vm_flush */
8 + 8 + 8 + /* gfx_v10_0_ring_emit_fence x3 for user fence, vm fence */
- 8, /* gfx_v10_0_emit_mem_sync */
+ 8 + /* gfx_v10_0_emit_mem_sync */
+ 2, /* gfx_v10_0_ring_emit_cleaner_shader */
.emit_ib_size = 7, /* gfx_v10_0_ring_emit_ib_compute */
.emit_ib = gfx_v10_0_ring_emit_ib_compute,
.emit_fence = gfx_v10_0_ring_emit_fence,
@@ -8953,12 +9913,16 @@ static const struct amdgpu_ring_funcs gfx_v10_0_ring_funcs_compute = {
.emit_hdp_flush = gfx_v10_0_ring_emit_hdp_flush,
.test_ring = gfx_v10_0_ring_test_ring,
.test_ib = gfx_v10_0_ring_test_ib,
- .insert_nop = amdgpu_ring_insert_nop,
+ .insert_nop = gfx_v10_ring_insert_nop,
.pad_ib = amdgpu_ring_generic_pad_ib,
.emit_wreg = gfx_v10_0_ring_emit_wreg,
.emit_reg_wait = gfx_v10_0_ring_emit_reg_wait,
.emit_reg_write_reg_wait = gfx_v10_0_ring_emit_reg_write_reg_wait,
.emit_mem_sync = gfx_v10_0_emit_mem_sync,
+ .reset = gfx_v10_0_reset_kcq,
+ .emit_cleaner_shader = gfx_v10_0_ring_emit_cleaner_shader,
+ .begin_use = gfx_v10_0_ring_begin_use,
+ .end_use = gfx_v10_0_ring_end_use,
};
static const struct amdgpu_ring_funcs gfx_v10_0_ring_funcs_kiq = {
@@ -8966,7 +9930,6 @@ static const struct amdgpu_ring_funcs gfx_v10_0_ring_funcs_kiq = {
.align_mask = 0xff,
.nop = PACKET3(PACKET3_NOP, 0x3FFF),
.support_64bit_ptrs = true,
- .vmhub = AMDGPU_GFXHUB_0,
.get_rptr = gfx_v10_0_ring_get_rptr_compute,
.get_wptr = gfx_v10_0_ring_get_wptr_compute,
.set_wptr = gfx_v10_0_ring_set_wptr_compute,
@@ -8977,7 +9940,6 @@ static const struct amdgpu_ring_funcs gfx_v10_0_ring_funcs_kiq = {
7 + /* gfx_v10_0_ring_emit_pipeline_sync */
SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 +
SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 +
- 2 + /* gfx_v10_0_ring_emit_vm_flush */
8 + 8 + 8, /* gfx_v10_0_ring_emit_fence_kiq x3 for user fence, vm fence */
.emit_ib_size = 7, /* gfx_v10_0_ring_emit_ib_compute */
.emit_ib = gfx_v10_0_ring_emit_ib_compute,
@@ -8990,13 +9952,14 @@ static const struct amdgpu_ring_funcs gfx_v10_0_ring_funcs_kiq = {
.emit_wreg = gfx_v10_0_ring_emit_wreg,
.emit_reg_wait = gfx_v10_0_ring_emit_reg_wait,
.emit_reg_write_reg_wait = gfx_v10_0_ring_emit_reg_write_reg_wait,
+ .emit_hdp_flush = gfx_v10_0_ring_emit_hdp_flush,
};
static void gfx_v10_0_set_ring_funcs(struct amdgpu_device *adev)
{
int i;
- adev->gfx.kiq.ring.funcs = &gfx_v10_0_ring_funcs_kiq;
+ adev->gfx.kiq[0].ring.funcs = &gfx_v10_0_ring_funcs_kiq;
for (i = 0; i < adev->gfx.num_gfx_rings; i++)
adev->gfx.gfx_ring[i].funcs = &gfx_v10_0_ring_funcs_gfx;
@@ -9015,6 +9978,11 @@ static const struct amdgpu_irq_src_funcs gfx_v10_0_priv_reg_irq_funcs = {
.process = gfx_v10_0_priv_reg_irq,
};
+static const struct amdgpu_irq_src_funcs gfx_v10_0_bad_op_irq_funcs = {
+ .set = gfx_v10_0_set_bad_op_fault_state,
+ .process = gfx_v10_0_bad_op_irq,
+};
+
static const struct amdgpu_irq_src_funcs gfx_v10_0_priv_inst_irq_funcs = {
.set = gfx_v10_0_set_priv_inst_fault_state,
.process = gfx_v10_0_priv_inst_irq,
@@ -9030,28 +9998,37 @@ static void gfx_v10_0_set_irq_funcs(struct amdgpu_device *adev)
adev->gfx.eop_irq.num_types = AMDGPU_CP_IRQ_LAST;
adev->gfx.eop_irq.funcs = &gfx_v10_0_eop_irq_funcs;
- adev->gfx.kiq.irq.num_types = AMDGPU_CP_KIQ_IRQ_LAST;
- adev->gfx.kiq.irq.funcs = &gfx_v10_0_kiq_irq_funcs;
+ adev->gfx.kiq[0].irq.num_types = AMDGPU_CP_KIQ_IRQ_LAST;
+ adev->gfx.kiq[0].irq.funcs = &gfx_v10_0_kiq_irq_funcs;
adev->gfx.priv_reg_irq.num_types = 1;
adev->gfx.priv_reg_irq.funcs = &gfx_v10_0_priv_reg_irq_funcs;
+ adev->gfx.bad_op_irq.num_types = 1;
+ adev->gfx.bad_op_irq.funcs = &gfx_v10_0_bad_op_irq_funcs;
+
adev->gfx.priv_inst_irq.num_types = 1;
adev->gfx.priv_inst_irq.funcs = &gfx_v10_0_priv_inst_irq_funcs;
}
static void gfx_v10_0_set_rlc_funcs(struct amdgpu_device *adev)
{
- switch (adev->asic_type) {
- case CHIP_NAVI10:
- case CHIP_NAVI14:
- case CHIP_SIENNA_CICHLID:
- case CHIP_NAVY_FLOUNDER:
- case CHIP_VANGOGH:
- case CHIP_DIMGREY_CAVEFISH:
+ switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
+ case IP_VERSION(10, 1, 10):
+ case IP_VERSION(10, 1, 1):
+ case IP_VERSION(10, 1, 3):
+ case IP_VERSION(10, 1, 4):
+ case IP_VERSION(10, 3, 2):
+ case IP_VERSION(10, 3, 1):
+ case IP_VERSION(10, 3, 4):
+ case IP_VERSION(10, 3, 5):
+ case IP_VERSION(10, 3, 6):
+ case IP_VERSION(10, 3, 3):
+ case IP_VERSION(10, 3, 7):
adev->gfx.rlc.funcs = &gfx_v10_0_rlc_funcs;
break;
- case CHIP_NAVI12:
+ case IP_VERSION(10, 1, 2):
+ case IP_VERSION(10, 3, 0):
adev->gfx.rlc.funcs = &gfx_v10_0_rlc_funcs_sriov;
break;
default:
@@ -9061,7 +10038,7 @@ static void gfx_v10_0_set_rlc_funcs(struct amdgpu_device *adev)
static void gfx_v10_0_set_gds_init(struct amdgpu_device *adev)
{
- unsigned total_cu = adev->gfx.config.max_cu_per_sh *
+ unsigned int total_cu = adev->gfx.config.max_cu_per_sh *
adev->gfx.config.max_sh_per_se *
adev->gfx.config.max_shader_engines;
@@ -9071,6 +10048,20 @@ static void gfx_v10_0_set_gds_init(struct amdgpu_device *adev)
adev->gds.oa_size = 16;
}
+static void gfx_v10_0_set_mqd_funcs(struct amdgpu_device *adev)
+{
+ /* set gfx eng mqd */
+ adev->mqds[AMDGPU_HW_IP_GFX].mqd_size =
+ sizeof(struct v10_gfx_mqd);
+ adev->mqds[AMDGPU_HW_IP_GFX].init_mqd =
+ gfx_v10_0_gfx_mqd_init;
+ /* set compute eng mqd */
+ adev->mqds[AMDGPU_HW_IP_COMPUTE].mqd_size =
+ sizeof(struct v10_compute_mqd);
+ adev->mqds[AMDGPU_HW_IP_COMPUTE].init_mqd =
+ gfx_v10_0_compute_mqd_init;
+}
+
static void gfx_v10_0_set_user_wgp_inactive_bitmap_per_sh(struct amdgpu_device *adev,
u32 bitmap)
{
@@ -9087,17 +10078,22 @@ static void gfx_v10_0_set_user_wgp_inactive_bitmap_per_sh(struct amdgpu_device *
static u32 gfx_v10_0_get_wgp_active_bitmap_per_sh(struct amdgpu_device *adev)
{
- u32 data, wgp_bitmask;
- data = RREG32_SOC15(GC, 0, mmCC_GC_SHADER_ARRAY_CONFIG);
- data |= RREG32_SOC15(GC, 0, mmGC_USER_SHADER_ARRAY_CONFIG);
+ u32 disabled_mask =
+ ~amdgpu_gfx_create_bitmask(adev->gfx.config.max_cu_per_sh >> 1);
+ u32 efuse_setting = 0;
+ u32 vbios_setting = 0;
+
+ efuse_setting = RREG32_SOC15(GC, 0, mmCC_GC_SHADER_ARRAY_CONFIG);
+ efuse_setting &= CC_GC_SHADER_ARRAY_CONFIG__INACTIVE_WGPS_MASK;
+ efuse_setting >>= CC_GC_SHADER_ARRAY_CONFIG__INACTIVE_WGPS__SHIFT;
- data &= CC_GC_SHADER_ARRAY_CONFIG__INACTIVE_WGPS_MASK;
- data >>= CC_GC_SHADER_ARRAY_CONFIG__INACTIVE_WGPS__SHIFT;
+ vbios_setting = RREG32_SOC15(GC, 0, mmGC_USER_SHADER_ARRAY_CONFIG);
+ vbios_setting &= GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_WGPS_MASK;
+ vbios_setting >>= GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_WGPS__SHIFT;
- wgp_bitmask =
- amdgpu_gfx_create_bitmask(adev->gfx.config.max_cu_per_sh >> 1);
+ disabled_mask |= efuse_setting | vbios_setting;
- return (~data) & wgp_bitmask;
+ return (~disabled_mask);
}
static u32 gfx_v10_0_get_cu_active_bitmap_per_sh(struct amdgpu_device *adev)
@@ -9123,7 +10119,7 @@ static int gfx_v10_0_get_cu_info(struct amdgpu_device *adev,
{
int i, j, k, counter, active_cu_number = 0;
u32 mask, bitmap, ao_bitmap, ao_cu_mask = 0;
- unsigned disable_masks[4 * 2];
+ unsigned int disable_masks[4 * 2];
if (!adev || !cu_info)
return -EINVAL;
@@ -9134,18 +10130,25 @@ static int gfx_v10_0_get_cu_info(struct amdgpu_device *adev,
for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
bitmap = i * adev->gfx.config.max_sh_per_se + j;
- if ((adev->asic_type == CHIP_SIENNA_CICHLID) &&
+ if (((amdgpu_ip_version(adev, GC_HWIP, 0) ==
+ IP_VERSION(10, 3, 0)) ||
+ (amdgpu_ip_version(adev, GC_HWIP, 0) ==
+ IP_VERSION(10, 3, 3)) ||
+ (amdgpu_ip_version(adev, GC_HWIP, 0) ==
+ IP_VERSION(10, 3, 6)) ||
+ (amdgpu_ip_version(adev, GC_HWIP, 0) ==
+ IP_VERSION(10, 3, 7))) &&
((gfx_v10_3_get_disabled_sa(adev) >> bitmap) & 1))
continue;
mask = 1;
ao_bitmap = 0;
counter = 0;
- gfx_v10_0_select_se_sh(adev, i, j, 0xffffffff);
+ gfx_v10_0_select_se_sh(adev, i, j, 0xffffffff, 0);
if (i < 4 && j < 2)
gfx_v10_0_set_user_wgp_inactive_bitmap_per_sh(
adev, disable_masks[i * 2 + j]);
bitmap = gfx_v10_0_get_cu_active_bitmap_per_sh(adev);
- cu_info->bitmap[i][j] = bitmap;
+ cu_info->bitmap[0][i][j] = bitmap;
for (k = 0; k < adev->gfx.config.max_cu_per_sh; k++) {
if (bitmap & mask) {
@@ -9161,7 +10164,7 @@ static int gfx_v10_0_get_cu_info(struct amdgpu_device *adev,
cu_info->ao_cu_bitmap[i][j] = ao_bitmap;
}
}
- gfx_v10_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
+ gfx_v10_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, 0);
mutex_unlock(&adev->grbm_idx_mutex);
cu_info->number = active_cu_number;
@@ -9237,8 +10240,7 @@ static void gfx_v10_3_set_power_brake_sequence(struct amdgpu_device *adev)
(0x1 << DIDT_SQ_THROTTLE_CTRL__PWRBRK_STALL_EN__SHIFT));
}
-const struct amdgpu_ip_block_version gfx_v10_0_ip_block =
-{
+const struct amdgpu_ip_block_version gfx_v10_0_ip_block = {
.type = AMD_IP_BLOCK_TYPE_GFX,
.major = 10,
.minor = 0,
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0_cleaner_shader.h b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0_cleaner_shader.h
new file mode 100644
index 000000000000..f67569ccf9f6
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0_cleaner_shader.h
@@ -0,0 +1,91 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright 2025 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+/* Define the cleaner shader gfx_10_1_10 */
+static const u32 gfx_10_1_10_cleaner_shader_hex[] = {
+ 0xb0804004, 0xbf8a0000,
+ 0xbf068100, 0xbf840023,
+ 0xbe8203b8, 0xbefc0380,
+ 0x7e008480, 0x7e028480,
+ 0x7e048480, 0x7e068480,
+ 0x7e088480, 0x7e0a8480,
+ 0x7e0c8480, 0x7e0e8480,
+ 0xbefc0302, 0x80828802,
+ 0xbf84fff5, 0xbe8203ff,
+ 0x80000000, 0x87020102,
+ 0xbf840012, 0xbefe03c1,
+ 0xbeff03c1, 0xd7650001,
+ 0x0001007f, 0xd7660001,
+ 0x0002027e, 0x16020288,
+ 0xbe8203bf, 0xbefc03c1,
+ 0xd9382000, 0x00020201,
+ 0xd9386040, 0x00040401,
+ 0xd70f6a01, 0x000202ff,
+ 0x00000400, 0x80828102,
+ 0xbf84fff7, 0xbefc03ff,
+ 0x00000068, 0xbe803000,
+ 0xbe813000, 0xbe823000,
+ 0xbe833000, 0x80fc847c,
+ 0xbf84fffa, 0xbeea0480,
+ 0xbeec0480, 0xbeee0480,
+ 0xbef00480, 0xbef20480,
+ 0xbef40480, 0xbef60480,
+ 0xbef80480, 0xbefa0480,
+ 0xbf810000, 0xbf9f0000,
+ 0xbf9f0000, 0xbf9f0000,
+ 0xbf9f0000, 0xbf9f0000,
+};
+
+/* Define the cleaner shader gfx_10_3_0 */
+static const u32 gfx_10_3_0_cleaner_shader_hex[] = {
+ 0xb0804004, 0xbf8a0000,
+ 0xbe8203b8, 0xbefc0380,
+ 0x7e008480, 0x7e028480,
+ 0x7e048480, 0x7e068480,
+ 0x7e088480, 0x7e0a8480,
+ 0x7e0c8480, 0x7e0e8480,
+ 0xbefc0302, 0x80828802,
+ 0xbf84fff5, 0xbe8203ff,
+ 0x80000000, 0x87020002,
+ 0xbf840012, 0xbefe03c1,
+ 0xbeff03c1, 0xd7650001,
+ 0x0001007f, 0xd7660001,
+ 0x0002027e, 0x16020288,
+ 0xbe8203bf, 0xbefc03c1,
+ 0xd9382000, 0x00020201,
+ 0xd9386040, 0x00040401,
+ 0xd70f6a01, 0x000202ff,
+ 0x00000400, 0x80828102,
+ 0xbf84fff7, 0xbefc03ff,
+ 0x00000068, 0xbe803080,
+ 0xbe813080, 0xbe823080,
+ 0xbe833080, 0x80fc847c,
+ 0xbf84fffa, 0xbeea0480,
+ 0xbeec0480, 0xbeee0480,
+ 0xbef00480, 0xbef20480,
+ 0xbef40480, 0xbef60480,
+ 0xbef80480, 0xbefa0480,
+ 0xbf810000, 0xbf9f0000,
+ 0xbf9f0000, 0xbf9f0000,
+ 0xbf9f0000, 0xbf9f0000,
+};
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v10_1_10_cleaner_shader.asm b/drivers/gpu/drm/amd/amdgpu/gfx_v10_1_10_cleaner_shader.asm
new file mode 100644
index 000000000000..54f7ed9e2801
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v10_1_10_cleaner_shader.asm
@@ -0,0 +1,125 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright 2025 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+// This shader is to clean LDS, SGPRs and VGPRs. It is first 64 Dwords or 256 bytes of 256 Dwords cleaner shader.
+
+// GFX10.1 : Clear SGPRs, VGPRs and LDS
+// Launch 32 waves per CU (16 per SIMD) as a workgroup (threadgroup) to fill every wave slot
+// Waves are "wave32" and have 64 VGPRs each, which uses all 1024 VGPRs per SIMD
+// Waves are launched in "CU" mode, and the workgroup shares 64KB of LDS (half of the WGP's LDS)
+// It takes 2 workgroups to use all of LDS: one on each CU of the WGP
+// Each wave clears SGPRs 0 - 107
+// Each wave clears VGPRs 0 - 63
+// The first wave of the workgroup clears its 64KB of LDS
+// The shader starts with "S_BARRIER" to ensure SPI has launched all waves of the workgroup
+// before any wave in the workgroup could end. Without this, it is possible not all SGPRs get cleared.
+
+
+shader main
+ asic(GFX10.1)
+ type(CS)
+ wave_size(32)
+// Note: original source code from SQ team
+//
+// Create 32 waves in a threadgroup (CS waves)
+// Each allocates 64 VGPRs
+// The workgroup allocates all of LDS (64kbytes)
+//
+// Takes about 2500 clocks to run.
+// (theorhetical fastest = 1024clks vgpr + 640lds = 1660 clks)
+//
+ S_BARRIER
+ s_cmp_eq_u32 s0, 1 // Bit0 is set, sgpr0 is set then clear VGPRS and LDS as FW set COMPUTE_USER_DATA_0
+ s_cbranch_scc0 label_0023 // Clean VGPRs and LDS if sgpr0 of wave is set, scc = (s0 == 1)
+
+ s_mov_b32 s2, 0x00000038 // Loop 64/8=8 times (loop unrolled for performance)
+ s_mov_b32 m0, 0
+ //
+ // CLEAR VGPRs
+ //
+label_0005:
+ v_movreld_b32 v0, 0
+ v_movreld_b32 v1, 0
+ v_movreld_b32 v2, 0
+ v_movreld_b32 v3, 0
+ v_movreld_b32 v4, 0
+ v_movreld_b32 v5, 0
+ v_movreld_b32 v6, 0
+ v_movreld_b32 v7, 0
+ s_mov_b32 m0, s2
+ s_sub_u32 s2, s2, 8
+ s_cbranch_scc0 label_0005
+ //
+ s_mov_b32 s2, 0x80000000 // Bit31 is first_wave
+ s_and_b32 s2, s2, s1 // sgpr0 has tg_size (first_wave) term as in ucode only COMPUTE_PGM_RSRC2.tg_size_en is set
+ s_cbranch_scc0 label_0023 // Clean LDS if its first wave of ThreadGroup/WorkGroup
+ // CLEAR LDS
+ //
+ s_mov_b32 exec_lo, 0xffffffff
+ s_mov_b32 exec_hi, 0xffffffff
+ v_mbcnt_lo_u32_b32 v1, exec_hi, 0 // Set V1 to thread-ID (0..63)
+ v_mbcnt_hi_u32_b32 v1, exec_lo, v1 // Set V1 to thread-ID (0..63)
+ v_mul_u32_u24 v1, 0x00000008, v1 // * 8, so each thread is a double-dword address (8byte)
+ s_mov_b32 s2, 0x00000003f // 64 loop iterations
+ s_mov_b32 m0, 0xffffffff
+ // Clear all of LDS space
+ // Each FirstWave of WorkGroup clears 64kbyte block
+
+label_001F:
+ ds_write2_b64 v1, v[2:3], v[2:3] offset1:32
+ ds_write2_b64 v1, v[4:5], v[4:5] offset0:64 offset1:96
+ v_add_co_u32 v1, vcc, 0x00000400, v1
+ s_sub_u32 s2, s2, 1
+ s_cbranch_scc0 label_001F
+
+ //
+ // CLEAR SGPRs
+ //
+label_0023:
+ s_mov_b32 m0, 0x00000068 // Loop 108/4=27 times (loop unrolled for performance)
+label_sgpr_loop:
+ s_movreld_b32 s0, s0
+ s_movreld_b32 s1, s0
+ s_movreld_b32 s2, s0
+ s_movreld_b32 s3, s0
+ s_sub_u32 m0, m0, 4
+ s_cbranch_scc0 label_sgpr_loop
+
+ //clear vcc
+ s_mov_b64 vcc, 0 //clear vcc
+ //s_setreg_imm32_b32 hw_reg_shader_flat_scratch_lo, 0 //clear flat scratch lo SGPR
+ //s_setreg_imm32_b32 hw_reg_shader_flat_scratch_hi, 0 //clear flat scratch hi SGPR
+ s_mov_b64 ttmp0, 0 //Clear ttmp0 and ttmp1
+ s_mov_b64 ttmp2, 0 //Clear ttmp2 and ttmp3
+ s_mov_b64 ttmp4, 0 //Clear ttmp4 and ttmp5
+ s_mov_b64 ttmp6, 0 //Clear ttmp6 and ttmp7
+ s_mov_b64 ttmp8, 0 //Clear ttmp8 and ttmp9
+ s_mov_b64 ttmp10, 0 //Clear ttmp10 and ttmp11
+ s_mov_b64 ttmp12, 0 //Clear ttmp12 and ttmp13
+ s_mov_b64 ttmp14, 0 //Clear ttmp14 and ttmp15
+
+ s_endpgm
+
+end
+
+
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v10_3_0_cleaner_shader.asm b/drivers/gpu/drm/amd/amdgpu/gfx_v10_3_0_cleaner_shader.asm
new file mode 100644
index 000000000000..0e1c246166c0
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v10_3_0_cleaner_shader.asm
@@ -0,0 +1,124 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright 2025 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+// This shader is to clean LDS, SGPRs and VGPRs. It is first 64 Dwords or 256 bytes of 192 Dwords cleaner shader.
+//To turn this shader program on for complitaion change this to main and lower shader main to main_1
+
+// GFX10.3 : Clear SGPRs, VGPRs and LDS
+// Launch 32 waves per CU (16 per SIMD) as a workgroup (threadgroup) to fill every wave slot
+// Waves are "wave32" and have 64 VGPRs each, which uses all 1024 VGPRs per SIMD
+// Waves are launched in "CU" mode, and the workgroup shares 64KB of LDS (half of the WGP's LDS)
+// It takes 2 workgroups to use all of LDS: one on each CU of the WGP
+// Each wave clears SGPRs 0 - 107
+// Each wave clears VGPRs 0 - 63
+// The first wave of the workgroup clears its 64KB of LDS
+// The shader starts with "S_BARRIER" to ensure SPI has launched all waves of the workgroup
+// before any wave in the workgroup could end. Without this, it is possible not all SGPRs get cleared.
+
+
+shader main
+ asic(GFX10)
+ type(CS)
+ wave_size(32)
+// Note: original source code from SQ team
+
+//
+// Create 32 waves in a threadgroup (CS waves)
+// Each allocates 64 VGPRs
+// The workgroup allocates all of LDS (64kbytes)
+//
+// Takes about 2500 clocks to run.
+// (theorhetical fastest = 1024clks vgpr + 640lds = 1660 clks)
+//
+ S_BARRIER
+ s_mov_b32 s2, 0x00000038 // Loop 64/8=8 times (loop unrolled for performance)
+ s_mov_b32 m0, 0
+ //
+ // CLEAR VGPRs
+ //
+label_0005:
+ v_movreld_b32 v0, 0
+ v_movreld_b32 v1, 0
+ v_movreld_b32 v2, 0
+ v_movreld_b32 v3, 0
+ v_movreld_b32 v4, 0
+ v_movreld_b32 v5, 0
+ v_movreld_b32 v6, 0
+ v_movreld_b32 v7, 0
+ s_mov_b32 m0, s2
+ s_sub_u32 s2, s2, 8
+ s_cbranch_scc0 label_0005
+ //
+ s_mov_b32 s2, 0x80000000 // Bit31 is first_wave
+ s_and_b32 s2, s2, s0 // sgpr0 has tg_size (first_wave) term as in ucode only COMPUTE_PGM_RSRC2.tg_size_en is set
+ s_cbranch_scc0 label_0023 // Clean LDS if its first wave of ThreadGroup/WorkGroup
+ // CLEAR LDS
+ //
+ s_mov_b32 exec_lo, 0xffffffff
+ s_mov_b32 exec_hi, 0xffffffff
+ v_mbcnt_lo_u32_b32 v1, exec_hi, 0 // Set V1 to thread-ID (0..63)
+ v_mbcnt_hi_u32_b32 v1, exec_lo, v1 // Set V1 to thread-ID (0..63)
+ v_mul_u32_u24 v1, 0x00000008, v1 // * 8, so each thread is a double-dword address (8byte)
+ s_mov_b32 s2, 0x00000003f // 64 loop iterations
+ s_mov_b32 m0, 0xffffffff
+ // Clear all of LDS space
+ // Each FirstWave of WorkGroup clears 64kbyte block
+
+label_001F:
+ ds_write2_b64 v1, v[2:3], v[2:3] offset1:32
+ ds_write2_b64 v1, v[4:5], v[4:5] offset0:64 offset1:96
+ v_add_co_u32 v1, vcc, 0x00000400, v1
+ s_sub_u32 s2, s2, 1
+ s_cbranch_scc0 label_001F
+
+ //
+ // CLEAR SGPRs
+ //
+label_0023:
+ s_mov_b32 m0, 0x00000068 // Loop 108/4=27 times (loop unrolled for performance)
+label_sgpr_loop:
+ s_movreld_b32 s0, 0
+ s_movreld_b32 s1, 0
+ s_movreld_b32 s2, 0
+ s_movreld_b32 s3, 0
+ s_sub_u32 m0, m0, 4
+ s_cbranch_scc0 label_sgpr_loop
+
+ //clear vcc
+ s_mov_b32 flat_scratch_lo, 0 //clear flat scratch lo SGPR
+ s_mov_b32 flat_scratch_hi, 0 //clear flat scratch hi SGPR
+ s_mov_b64 vcc, 0 //clear vcc
+ s_mov_b64 ttmp0, 0 //Clear ttmp0 and ttmp1
+ s_mov_b64 ttmp2, 0 //Clear ttmp2 and ttmp3
+ s_mov_b64 ttmp4, 0 //Clear ttmp4 and ttmp5
+ s_mov_b64 ttmp6, 0 //Clear ttmp6 and ttmp7
+ s_mov_b64 ttmp8, 0 //Clear ttmp8 and ttmp9
+ s_mov_b64 ttmp10, 0 //Clear ttmp10 and ttmp11
+ s_mov_b64 ttmp12, 0 //Clear ttmp12 and ttmp13
+ s_mov_b64 ttmp14, 0 //Clear ttmp14 and ttmp15
+
+ s_endpgm
+
+end
+
+
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c
new file mode 100644
index 000000000000..8a2ee2de390f
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c
@@ -0,0 +1,7538 @@
+/*
+ * Copyright 2021 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+#include <linux/delay.h>
+#include <linux/kernel.h>
+#include <linux/firmware.h>
+#include <linux/module.h>
+#include <linux/pci.h>
+#include "amdgpu.h"
+#include "amdgpu_gfx.h"
+#include "amdgpu_psp.h"
+#include "amdgpu_smu.h"
+#include "imu_v11_0.h"
+#include "soc21.h"
+#include "nvd.h"
+
+#include "gc/gc_11_0_0_offset.h"
+#include "gc/gc_11_0_0_sh_mask.h"
+#include "smuio/smuio_13_0_6_offset.h"
+#include "smuio/smuio_13_0_6_sh_mask.h"
+#include "navi10_enum.h"
+#include "ivsrcid/gfx/irqsrcs_gfx_11_0_0.h"
+
+#include "soc15.h"
+#include "clearstate_gfx11.h"
+#include "v11_structs.h"
+#include "gfx_v11_0.h"
+#include "gfx_v11_0_cleaner_shader.h"
+#include "gfx_v11_0_3.h"
+#include "nbio_v4_3.h"
+#include "mes_v11_0.h"
+#include "mes_userqueue.h"
+#include "amdgpu_userq_fence.h"
+
+#define GFX11_NUM_GFX_RINGS 1
+#define GFX11_MEC_HPD_SIZE 2048
+
+#define RLCG_UCODE_LOADING_START_ADDRESS 0x00002000L
+#define RLC_PG_DELAY_3_DEFAULT_GC_11_0_1 0x1388
+
+#define regCGTT_WD_CLK_CTRL 0x5086
+#define regCGTT_WD_CLK_CTRL_BASE_IDX 1
+#define regRLC_RLCS_BOOTLOAD_STATUS_gc_11_0_1 0x4e7e
+#define regRLC_RLCS_BOOTLOAD_STATUS_gc_11_0_1_BASE_IDX 1
+#define regPC_CONFIG_CNTL_1 0x194d
+#define regPC_CONFIG_CNTL_1_BASE_IDX 1
+
+#define regCP_GFX_MQD_CONTROL_DEFAULT 0x00000100
+#define regCP_GFX_HQD_VMID_DEFAULT 0x00000000
+#define regCP_GFX_HQD_QUEUE_PRIORITY_DEFAULT 0x00000000
+#define regCP_GFX_HQD_QUANTUM_DEFAULT 0x00000a01
+#define regCP_GFX_HQD_CNTL_DEFAULT 0x00a00000
+#define regCP_RB_DOORBELL_CONTROL_DEFAULT 0x00000000
+#define regCP_GFX_HQD_RPTR_DEFAULT 0x00000000
+
+#define regCP_HQD_EOP_CONTROL_DEFAULT 0x00000006
+#define regCP_HQD_PQ_DOORBELL_CONTROL_DEFAULT 0x00000000
+#define regCP_MQD_CONTROL_DEFAULT 0x00000100
+#define regCP_HQD_PQ_CONTROL_DEFAULT 0x00308509
+#define regCP_HQD_PQ_DOORBELL_CONTROL_DEFAULT 0x00000000
+#define regCP_HQD_PQ_RPTR_DEFAULT 0x00000000
+#define regCP_HQD_PERSISTENT_STATE_DEFAULT 0x0be05501
+#define regCP_HQD_IB_CONTROL_DEFAULT 0x00300000
+
+MODULE_FIRMWARE("amdgpu/gc_11_0_0_pfp.bin");
+MODULE_FIRMWARE("amdgpu/gc_11_0_0_me.bin");
+MODULE_FIRMWARE("amdgpu/gc_11_0_0_mec.bin");
+MODULE_FIRMWARE("amdgpu/gc_11_0_0_rlc.bin");
+MODULE_FIRMWARE("amdgpu/gc_11_0_0_rlc_kicker.bin");
+MODULE_FIRMWARE("amdgpu/gc_11_0_0_rlc_1.bin");
+MODULE_FIRMWARE("amdgpu/gc_11_0_0_toc.bin");
+MODULE_FIRMWARE("amdgpu/gc_11_0_1_pfp.bin");
+MODULE_FIRMWARE("amdgpu/gc_11_0_1_me.bin");
+MODULE_FIRMWARE("amdgpu/gc_11_0_1_mec.bin");
+MODULE_FIRMWARE("amdgpu/gc_11_0_1_rlc.bin");
+MODULE_FIRMWARE("amdgpu/gc_11_0_2_pfp.bin");
+MODULE_FIRMWARE("amdgpu/gc_11_0_2_me.bin");
+MODULE_FIRMWARE("amdgpu/gc_11_0_2_mec.bin");
+MODULE_FIRMWARE("amdgpu/gc_11_0_2_rlc.bin");
+MODULE_FIRMWARE("amdgpu/gc_11_0_3_pfp.bin");
+MODULE_FIRMWARE("amdgpu/gc_11_0_3_me.bin");
+MODULE_FIRMWARE("amdgpu/gc_11_0_3_mec.bin");
+MODULE_FIRMWARE("amdgpu/gc_11_0_3_rlc.bin");
+MODULE_FIRMWARE("amdgpu/gc_11_0_4_pfp.bin");
+MODULE_FIRMWARE("amdgpu/gc_11_0_4_me.bin");
+MODULE_FIRMWARE("amdgpu/gc_11_0_4_mec.bin");
+MODULE_FIRMWARE("amdgpu/gc_11_0_4_rlc.bin");
+MODULE_FIRMWARE("amdgpu/gc_11_5_0_pfp.bin");
+MODULE_FIRMWARE("amdgpu/gc_11_5_0_me.bin");
+MODULE_FIRMWARE("amdgpu/gc_11_5_0_mec.bin");
+MODULE_FIRMWARE("amdgpu/gc_11_5_0_rlc.bin");
+MODULE_FIRMWARE("amdgpu/gc_11_5_1_pfp.bin");
+MODULE_FIRMWARE("amdgpu/gc_11_5_1_me.bin");
+MODULE_FIRMWARE("amdgpu/gc_11_5_1_mec.bin");
+MODULE_FIRMWARE("amdgpu/gc_11_5_1_rlc.bin");
+MODULE_FIRMWARE("amdgpu/gc_11_5_2_pfp.bin");
+MODULE_FIRMWARE("amdgpu/gc_11_5_2_me.bin");
+MODULE_FIRMWARE("amdgpu/gc_11_5_2_mec.bin");
+MODULE_FIRMWARE("amdgpu/gc_11_5_2_rlc.bin");
+MODULE_FIRMWARE("amdgpu/gc_11_5_3_pfp.bin");
+MODULE_FIRMWARE("amdgpu/gc_11_5_3_me.bin");
+MODULE_FIRMWARE("amdgpu/gc_11_5_3_mec.bin");
+MODULE_FIRMWARE("amdgpu/gc_11_5_3_rlc.bin");
+
+static const struct amdgpu_hwip_reg_entry gc_reg_list_11_0[] = {
+ SOC15_REG_ENTRY_STR(GC, 0, regGRBM_STATUS),
+ SOC15_REG_ENTRY_STR(GC, 0, regGRBM_STATUS2),
+ SOC15_REG_ENTRY_STR(GC, 0, regGRBM_STATUS3),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_STALLED_STAT1),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_STALLED_STAT2),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_STALLED_STAT3),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_CPC_STALLED_STAT1),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_CPF_STALLED_STAT1),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_BUSY_STAT),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_CPC_BUSY_STAT),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_CPF_BUSY_STAT),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_CPC_BUSY_STAT2),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_CPF_BUSY_STAT2),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_CPF_STATUS),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_ERROR),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_HPD_STATUS0),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_RB_BASE),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_RB_RPTR),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_RB_WPTR),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_RB0_BASE),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_RB0_RPTR),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_RB0_WPTR),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_RB1_BASE),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_RB1_RPTR),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_RB1_WPTR),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_IB1_CMD_BUFSZ),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_IB2_CMD_BUFSZ),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_IB1_BASE_LO),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_IB1_BASE_HI),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_IB1_BUFSZ),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_IB2_BASE_LO),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_IB2_BASE_HI),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_IB2_BUFSZ),
+ SOC15_REG_ENTRY_STR(GC, 0, regCPF_UTCL1_STATUS),
+ SOC15_REG_ENTRY_STR(GC, 0, regCPC_UTCL1_STATUS),
+ SOC15_REG_ENTRY_STR(GC, 0, regCPG_UTCL1_STATUS),
+ SOC15_REG_ENTRY_STR(GC, 0, regGDS_PROTECTION_FAULT),
+ SOC15_REG_ENTRY_STR(GC, 0, regGDS_VM_PROTECTION_FAULT),
+ SOC15_REG_ENTRY_STR(GC, 0, regIA_UTCL1_STATUS),
+ SOC15_REG_ENTRY_STR(GC, 0, regIA_UTCL1_STATUS_2),
+ SOC15_REG_ENTRY_STR(GC, 0, regPA_CL_CNTL_STATUS),
+ SOC15_REG_ENTRY_STR(GC, 0, regRLC_UTCL1_STATUS),
+ SOC15_REG_ENTRY_STR(GC, 0, regRMI_UTCL1_STATUS),
+ SOC15_REG_ENTRY_STR(GC, 0, regSQC_CACHES),
+ SOC15_REG_ENTRY_STR(GC, 0, regSQG_STATUS),
+ SOC15_REG_ENTRY_STR(GC, 0, regWD_UTCL1_STATUS),
+ SOC15_REG_ENTRY_STR(GC, 0, regGCVM_L2_PROTECTION_FAULT_CNTL),
+ SOC15_REG_ENTRY_STR(GC, 0, regGCVM_L2_PROTECTION_FAULT_STATUS),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_DEBUG),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_MEC_CNTL),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_MES_CNTL),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_MEC1_INSTR_PNTR),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_MES_DEBUG_INTERRUPT_INSTR_PNTR),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_MES_INSTR_PNTR),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_ME_INSTR_PNTR),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_PFP_INSTR_PNTR),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_CPC_STATUS),
+ /* cp header registers */
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_MES_HEADER_DUMP),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_MES_HEADER_DUMP),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_MES_HEADER_DUMP),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_MES_HEADER_DUMP),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_MES_HEADER_DUMP),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_MES_HEADER_DUMP),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_MES_HEADER_DUMP),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_MES_HEADER_DUMP),
+ /* SE status registers */
+ SOC15_REG_ENTRY_STR(GC, 0, regGRBM_STATUS_SE0),
+ SOC15_REG_ENTRY_STR(GC, 0, regGRBM_STATUS_SE1),
+ SOC15_REG_ENTRY_STR(GC, 0, regGRBM_STATUS_SE2),
+ SOC15_REG_ENTRY_STR(GC, 0, regGRBM_STATUS_SE3),
+ SOC15_REG_ENTRY_STR(GC, 0, regGRBM_STATUS_SE4),
+ SOC15_REG_ENTRY_STR(GC, 0, regGRBM_STATUS_SE5)
+};
+
+static const struct amdgpu_hwip_reg_entry gc_cp_reg_list_11[] = {
+ /* compute registers */
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_VMID),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_PERSISTENT_STATE),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_PIPE_PRIORITY),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_QUEUE_PRIORITY),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_QUANTUM),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_PQ_BASE),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_PQ_BASE_HI),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_PQ_RPTR),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_PQ_WPTR_POLL_ADDR),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_PQ_WPTR_POLL_ADDR_HI),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_PQ_DOORBELL_CONTROL),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_PQ_CONTROL),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_IB_BASE_ADDR),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_IB_BASE_ADDR_HI),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_IB_RPTR),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_IB_CONTROL),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_DEQUEUE_REQUEST),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_EOP_BASE_ADDR),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_EOP_BASE_ADDR_HI),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_EOP_CONTROL),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_EOP_RPTR),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_EOP_WPTR),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_EOP_EVENTS),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_CTX_SAVE_BASE_ADDR_LO),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_CTX_SAVE_BASE_ADDR_HI),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_CTX_SAVE_CONTROL),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_CNTL_STACK_OFFSET),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_CNTL_STACK_SIZE),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_WG_STATE_OFFSET),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_CTX_SAVE_SIZE),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_GDS_RESOURCE_STATE),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_ERROR),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_EOP_WPTR_MEM),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_PQ_WPTR_LO),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_PQ_WPTR_HI),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_SUSPEND_CNTL_STACK_OFFSET),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_SUSPEND_CNTL_STACK_DW_CNT),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_SUSPEND_WG_STATE_OFFSET),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_DEQUEUE_STATUS),
+ /* cp header registers */
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_ME_HEADER_DUMP),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_ME_HEADER_DUMP),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_ME_HEADER_DUMP),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_ME_HEADER_DUMP),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_ME_HEADER_DUMP),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_ME_HEADER_DUMP),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_ME_HEADER_DUMP),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_ME_HEADER_DUMP),
+};
+
+static const struct amdgpu_hwip_reg_entry gc_gfx_queue_reg_list_11[] = {
+ /* gfx queue registers */
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_HQD_ACTIVE),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_HQD_VMID),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_HQD_QUEUE_PRIORITY),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_HQD_QUANTUM),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_HQD_BASE),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_HQD_BASE_HI),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_HQD_OFFSET),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_HQD_CNTL),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_HQD_CSMD_RPTR),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_HQD_WPTR),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_HQD_WPTR_HI),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_HQD_DEQUEUE_REQUEST),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_HQD_MAPPED),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_HQD_QUE_MGR_CONTROL),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_HQD_HQ_CONTROL0),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_HQD_HQ_STATUS0),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_MQD_BASE_ADDR),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_MQD_BASE_ADDR_HI),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_RB_WPTR_POLL_ADDR_LO),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_RB_WPTR_POLL_ADDR_HI),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_RB_RPTR),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_IB1_BASE_LO),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_IB1_BASE_HI),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_IB1_CMD_BUFSZ),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_IB1_BUFSZ),
+ /* cp header registers */
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_PFP_HEADER_DUMP),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_PFP_HEADER_DUMP),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_PFP_HEADER_DUMP),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_PFP_HEADER_DUMP),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_PFP_HEADER_DUMP),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_PFP_HEADER_DUMP),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_PFP_HEADER_DUMP),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_PFP_HEADER_DUMP),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_ME_HEADER_DUMP),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_ME_HEADER_DUMP),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_ME_HEADER_DUMP),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_ME_HEADER_DUMP),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_ME_HEADER_DUMP),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_ME_HEADER_DUMP),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_ME_HEADER_DUMP),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_ME_HEADER_DUMP),
+};
+
+static const struct soc15_reg_golden golden_settings_gc_11_0[] = {
+ SOC15_REG_GOLDEN_VALUE(GC, 0, regTCP_CNTL, 0x20000000, 0x20000000)
+};
+
+static const struct soc15_reg_golden golden_settings_gc_11_0_1[] =
+{
+ SOC15_REG_GOLDEN_VALUE(GC, 0, regCGTT_GS_NGG_CLK_CTRL, 0x9fff8fff, 0x00000010),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, regCGTT_WD_CLK_CTRL, 0xffff8fff, 0x00000010),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, regCPF_GCR_CNTL, 0x0007ffff, 0x0000c200),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, regGL2C_CTRL3, 0xffff001b, 0x00f01988),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, regPA_CL_ENHANCE, 0xf0ffffff, 0x00880007),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, regPA_SC_ENHANCE_3, 0xfffffffd, 0x00000008),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, regPA_SC_VRS_SURFACE_CNTL_1, 0xfff891ff, 0x55480100),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, regTA_CNTL_AUX, 0xf7f7ffff, 0x01030000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, regTCP_CNTL2, 0xfcffffff, 0x0000000a)
+};
+
+#define DEFAULT_SH_MEM_CONFIG \
+ ((SH_MEM_ADDRESS_MODE_64 << SH_MEM_CONFIG__ADDRESS_MODE__SHIFT) | \
+ (SH_MEM_ALIGNMENT_MODE_UNALIGNED << SH_MEM_CONFIG__ALIGNMENT_MODE__SHIFT) | \
+ (3 << SH_MEM_CONFIG__INITIAL_INST_PREFETCH__SHIFT))
+
+static void gfx_v11_0_disable_gpa_mode(struct amdgpu_device *adev);
+static void gfx_v11_0_set_ring_funcs(struct amdgpu_device *adev);
+static void gfx_v11_0_set_irq_funcs(struct amdgpu_device *adev);
+static void gfx_v11_0_set_gds_init(struct amdgpu_device *adev);
+static void gfx_v11_0_set_rlc_funcs(struct amdgpu_device *adev);
+static void gfx_v11_0_set_mqd_funcs(struct amdgpu_device *adev);
+static void gfx_v11_0_set_imu_funcs(struct amdgpu_device *adev);
+static int gfx_v11_0_get_cu_info(struct amdgpu_device *adev,
+ struct amdgpu_cu_info *cu_info);
+static uint64_t gfx_v11_0_get_gpu_clock_counter(struct amdgpu_device *adev);
+static void gfx_v11_0_select_se_sh(struct amdgpu_device *adev, u32 se_num,
+ u32 sh_num, u32 instance, int xcc_id);
+static u32 gfx_v11_0_get_wgp_active_bitmap_per_sh(struct amdgpu_device *adev);
+
+static void gfx_v11_0_ring_emit_de_meta(struct amdgpu_ring *ring, bool resume);
+static void gfx_v11_0_ring_emit_frame_cntl(struct amdgpu_ring *ring, bool start, bool secure);
+static void gfx_v11_0_ring_emit_wreg(struct amdgpu_ring *ring, uint32_t reg,
+ uint32_t val);
+static int gfx_v11_0_wait_for_rlc_autoload_complete(struct amdgpu_device *adev);
+static void gfx_v11_0_ring_invalidate_tlbs(struct amdgpu_ring *ring,
+ uint16_t pasid, uint32_t flush_type,
+ bool all_hub, uint8_t dst_sel);
+static void gfx_v11_0_set_safe_mode(struct amdgpu_device *adev, int xcc_id);
+static void gfx_v11_0_unset_safe_mode(struct amdgpu_device *adev, int xcc_id);
+static void gfx_v11_0_update_perf_clk(struct amdgpu_device *adev,
+ bool enable);
+
+static void gfx11_kiq_set_resources(struct amdgpu_ring *kiq_ring, uint64_t queue_mask)
+{
+ struct amdgpu_device *adev = kiq_ring->adev;
+ u64 shader_mc_addr;
+
+ /* Cleaner shader MC address */
+ shader_mc_addr = adev->gfx.cleaner_shader_gpu_addr >> 8;
+
+ amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_SET_RESOURCES, 6));
+ amdgpu_ring_write(kiq_ring, PACKET3_SET_RESOURCES_VMID_MASK(0) |
+ PACKET3_SET_RESOURCES_UNMAP_LATENTY(0xa) | /* unmap_latency: 0xa (~ 1s) */
+ PACKET3_SET_RESOURCES_QUEUE_TYPE(0)); /* vmid_mask:0 queue_type:0 (KIQ) */
+ amdgpu_ring_write(kiq_ring, lower_32_bits(queue_mask)); /* queue mask lo */
+ amdgpu_ring_write(kiq_ring, upper_32_bits(queue_mask)); /* queue mask hi */
+ amdgpu_ring_write(kiq_ring, lower_32_bits(shader_mc_addr)); /* cleaner shader addr lo */
+ amdgpu_ring_write(kiq_ring, upper_32_bits(shader_mc_addr)); /* cleaner shader addr hi */
+ amdgpu_ring_write(kiq_ring, 0); /* oac mask */
+ amdgpu_ring_write(kiq_ring, 0); /* gds heap base:0, gds heap size:0 */
+}
+
+static void gfx11_kiq_map_queues(struct amdgpu_ring *kiq_ring,
+ struct amdgpu_ring *ring)
+{
+ uint64_t mqd_addr = amdgpu_bo_gpu_offset(ring->mqd_obj);
+ uint64_t wptr_addr = ring->wptr_gpu_addr;
+ uint32_t me = 0, eng_sel = 0;
+
+ switch (ring->funcs->type) {
+ case AMDGPU_RING_TYPE_COMPUTE:
+ me = 1;
+ eng_sel = 0;
+ break;
+ case AMDGPU_RING_TYPE_GFX:
+ me = 0;
+ eng_sel = 4;
+ break;
+ case AMDGPU_RING_TYPE_MES:
+ me = 2;
+ eng_sel = 5;
+ break;
+ default:
+ WARN_ON(1);
+ }
+
+ amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_MAP_QUEUES, 5));
+ /* Q_sel:0, vmid:0, vidmem: 1, engine:0, num_Q:1*/
+ amdgpu_ring_write(kiq_ring, /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */
+ PACKET3_MAP_QUEUES_QUEUE_SEL(0) | /* Queue_Sel */
+ PACKET3_MAP_QUEUES_VMID(0) | /* VMID */
+ PACKET3_MAP_QUEUES_QUEUE(ring->queue) |
+ PACKET3_MAP_QUEUES_PIPE(ring->pipe) |
+ PACKET3_MAP_QUEUES_ME((me)) |
+ PACKET3_MAP_QUEUES_QUEUE_TYPE(0) | /*queue_type: normal compute queue */
+ PACKET3_MAP_QUEUES_ALLOC_FORMAT(0) | /* alloc format: all_on_one_pipe */
+ PACKET3_MAP_QUEUES_ENGINE_SEL(eng_sel) |
+ PACKET3_MAP_QUEUES_NUM_QUEUES(1)); /* num_queues: must be 1 */
+ amdgpu_ring_write(kiq_ring, PACKET3_MAP_QUEUES_DOORBELL_OFFSET(ring->doorbell_index));
+ amdgpu_ring_write(kiq_ring, lower_32_bits(mqd_addr));
+ amdgpu_ring_write(kiq_ring, upper_32_bits(mqd_addr));
+ amdgpu_ring_write(kiq_ring, lower_32_bits(wptr_addr));
+ amdgpu_ring_write(kiq_ring, upper_32_bits(wptr_addr));
+}
+
+static void gfx11_kiq_unmap_queues(struct amdgpu_ring *kiq_ring,
+ struct amdgpu_ring *ring,
+ enum amdgpu_unmap_queues_action action,
+ u64 gpu_addr, u64 seq)
+{
+ struct amdgpu_device *adev = kiq_ring->adev;
+ uint32_t eng_sel = ring->funcs->type == AMDGPU_RING_TYPE_GFX ? 4 : 0;
+
+ if (adev->enable_mes && !adev->gfx.kiq[0].ring.sched.ready) {
+ amdgpu_mes_unmap_legacy_queue(adev, ring, action, gpu_addr, seq);
+ return;
+ }
+
+ amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_UNMAP_QUEUES, 4));
+ amdgpu_ring_write(kiq_ring, /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */
+ PACKET3_UNMAP_QUEUES_ACTION(action) |
+ PACKET3_UNMAP_QUEUES_QUEUE_SEL(0) |
+ PACKET3_UNMAP_QUEUES_ENGINE_SEL(eng_sel) |
+ PACKET3_UNMAP_QUEUES_NUM_QUEUES(1));
+ amdgpu_ring_write(kiq_ring,
+ PACKET3_UNMAP_QUEUES_DOORBELL_OFFSET0(ring->doorbell_index));
+
+ if (action == PREEMPT_QUEUES_NO_UNMAP) {
+ amdgpu_ring_write(kiq_ring, lower_32_bits(gpu_addr));
+ amdgpu_ring_write(kiq_ring, upper_32_bits(gpu_addr));
+ amdgpu_ring_write(kiq_ring, seq);
+ } else {
+ amdgpu_ring_write(kiq_ring, 0);
+ amdgpu_ring_write(kiq_ring, 0);
+ amdgpu_ring_write(kiq_ring, 0);
+ }
+}
+
+static void gfx11_kiq_query_status(struct amdgpu_ring *kiq_ring,
+ struct amdgpu_ring *ring,
+ u64 addr,
+ u64 seq)
+{
+ uint32_t eng_sel = ring->funcs->type == AMDGPU_RING_TYPE_GFX ? 4 : 0;
+
+ amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_QUERY_STATUS, 5));
+ amdgpu_ring_write(kiq_ring,
+ PACKET3_QUERY_STATUS_CONTEXT_ID(0) |
+ PACKET3_QUERY_STATUS_INTERRUPT_SEL(0) |
+ PACKET3_QUERY_STATUS_COMMAND(2));
+ amdgpu_ring_write(kiq_ring, /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */
+ PACKET3_QUERY_STATUS_DOORBELL_OFFSET(ring->doorbell_index) |
+ PACKET3_QUERY_STATUS_ENG_SEL(eng_sel));
+ amdgpu_ring_write(kiq_ring, lower_32_bits(addr));
+ amdgpu_ring_write(kiq_ring, upper_32_bits(addr));
+ amdgpu_ring_write(kiq_ring, lower_32_bits(seq));
+ amdgpu_ring_write(kiq_ring, upper_32_bits(seq));
+}
+
+static void gfx11_kiq_invalidate_tlbs(struct amdgpu_ring *kiq_ring,
+ uint16_t pasid, uint32_t flush_type,
+ bool all_hub)
+{
+ gfx_v11_0_ring_invalidate_tlbs(kiq_ring, pasid, flush_type, all_hub, 1);
+}
+
+static const struct kiq_pm4_funcs gfx_v11_0_kiq_pm4_funcs = {
+ .kiq_set_resources = gfx11_kiq_set_resources,
+ .kiq_map_queues = gfx11_kiq_map_queues,
+ .kiq_unmap_queues = gfx11_kiq_unmap_queues,
+ .kiq_query_status = gfx11_kiq_query_status,
+ .kiq_invalidate_tlbs = gfx11_kiq_invalidate_tlbs,
+ .set_resources_size = 8,
+ .map_queues_size = 7,
+ .unmap_queues_size = 6,
+ .query_status_size = 7,
+ .invalidate_tlbs_size = 2,
+};
+
+static void gfx_v11_0_set_kiq_pm4_funcs(struct amdgpu_device *adev)
+{
+ adev->gfx.kiq[0].pmf = &gfx_v11_0_kiq_pm4_funcs;
+}
+
+static void gfx_v11_0_init_golden_registers(struct amdgpu_device *adev)
+{
+ if (amdgpu_sriov_vf(adev))
+ return;
+
+ switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
+ case IP_VERSION(11, 0, 1):
+ case IP_VERSION(11, 0, 4):
+ soc15_program_register_sequence(adev,
+ golden_settings_gc_11_0_1,
+ (const u32)ARRAY_SIZE(golden_settings_gc_11_0_1));
+ break;
+ default:
+ break;
+ }
+ soc15_program_register_sequence(adev,
+ golden_settings_gc_11_0,
+ (const u32)ARRAY_SIZE(golden_settings_gc_11_0));
+
+}
+
+static void gfx_v11_0_write_data_to_reg(struct amdgpu_ring *ring, int eng_sel,
+ bool wc, uint32_t reg, uint32_t val)
+{
+ amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
+ amdgpu_ring_write(ring, WRITE_DATA_ENGINE_SEL(eng_sel) |
+ WRITE_DATA_DST_SEL(0) | (wc ? WR_CONFIRM : 0));
+ amdgpu_ring_write(ring, reg);
+ amdgpu_ring_write(ring, 0);
+ amdgpu_ring_write(ring, val);
+}
+
+static void gfx_v11_0_wait_reg_mem(struct amdgpu_ring *ring, int eng_sel,
+ int mem_space, int opt, uint32_t addr0,
+ uint32_t addr1, uint32_t ref, uint32_t mask,
+ uint32_t inv)
+{
+ amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
+ amdgpu_ring_write(ring,
+ /* memory (1) or register (0) */
+ (WAIT_REG_MEM_MEM_SPACE(mem_space) |
+ WAIT_REG_MEM_OPERATION(opt) | /* wait */
+ WAIT_REG_MEM_FUNCTION(3) | /* equal */
+ WAIT_REG_MEM_ENGINE(eng_sel)));
+
+ if (mem_space)
+ BUG_ON(addr0 & 0x3); /* Dword align */
+ amdgpu_ring_write(ring, addr0);
+ amdgpu_ring_write(ring, addr1);
+ amdgpu_ring_write(ring, ref);
+ amdgpu_ring_write(ring, mask);
+ amdgpu_ring_write(ring, inv); /* poll interval */
+}
+
+static void gfx_v11_ring_insert_nop(struct amdgpu_ring *ring, uint32_t num_nop)
+{
+ /* Header itself is a NOP packet */
+ if (num_nop == 1) {
+ amdgpu_ring_write(ring, ring->funcs->nop);
+ return;
+ }
+
+ /* Max HW optimization till 0x3ffe, followed by remaining one NOP at a time*/
+ amdgpu_ring_write(ring, PACKET3(PACKET3_NOP, min(num_nop - 2, 0x3ffe)));
+
+ /* Header is at index 0, followed by num_nops - 1 NOP packet's */
+ amdgpu_ring_insert_nop(ring, num_nop - 1);
+}
+
+static int gfx_v11_0_ring_test_ring(struct amdgpu_ring *ring)
+{
+ struct amdgpu_device *adev = ring->adev;
+ uint32_t scratch = SOC15_REG_OFFSET(GC, 0, regSCRATCH_REG0);
+ uint32_t tmp = 0;
+ unsigned i;
+ int r;
+
+ WREG32(scratch, 0xCAFEDEAD);
+ r = amdgpu_ring_alloc(ring, 5);
+ if (r) {
+ DRM_ERROR("amdgpu: cp failed to lock ring %d (%d).\n",
+ ring->idx, r);
+ return r;
+ }
+
+ if (ring->funcs->type == AMDGPU_RING_TYPE_KIQ) {
+ gfx_v11_0_ring_emit_wreg(ring, scratch, 0xDEADBEEF);
+ } else {
+ amdgpu_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
+ amdgpu_ring_write(ring, scratch -
+ PACKET3_SET_UCONFIG_REG_START);
+ amdgpu_ring_write(ring, 0xDEADBEEF);
+ }
+ amdgpu_ring_commit(ring);
+
+ for (i = 0; i < adev->usec_timeout; i++) {
+ tmp = RREG32(scratch);
+ if (tmp == 0xDEADBEEF)
+ break;
+ if (amdgpu_emu_mode == 1)
+ msleep(1);
+ else
+ udelay(1);
+ }
+
+ if (i >= adev->usec_timeout)
+ r = -ETIMEDOUT;
+ return r;
+}
+
+static int gfx_v11_0_ring_test_ib(struct amdgpu_ring *ring, long timeout)
+{
+ struct amdgpu_device *adev = ring->adev;
+ struct amdgpu_ib ib;
+ struct dma_fence *f = NULL;
+ unsigned index;
+ uint64_t gpu_addr;
+ uint32_t *cpu_ptr;
+ long r;
+
+ /* MES KIQ fw hasn't indirect buffer support for now */
+ if (adev->enable_mes_kiq &&
+ ring->funcs->type == AMDGPU_RING_TYPE_KIQ)
+ return 0;
+
+ memset(&ib, 0, sizeof(ib));
+
+ r = amdgpu_device_wb_get(adev, &index);
+ if (r)
+ return r;
+
+ gpu_addr = adev->wb.gpu_addr + (index * 4);
+ adev->wb.wb[index] = cpu_to_le32(0xCAFEDEAD);
+ cpu_ptr = &adev->wb.wb[index];
+
+ r = amdgpu_ib_get(adev, NULL, 20, AMDGPU_IB_POOL_DIRECT, &ib);
+ if (r) {
+ DRM_ERROR("amdgpu: failed to get ib (%ld).\n", r);
+ goto err1;
+ }
+
+ ib.ptr[0] = PACKET3(PACKET3_WRITE_DATA, 3);
+ ib.ptr[1] = WRITE_DATA_DST_SEL(5) | WR_CONFIRM;
+ ib.ptr[2] = lower_32_bits(gpu_addr);
+ ib.ptr[3] = upper_32_bits(gpu_addr);
+ ib.ptr[4] = 0xDEADBEEF;
+ ib.length_dw = 5;
+
+ r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f);
+ if (r)
+ goto err2;
+
+ r = dma_fence_wait_timeout(f, false, timeout);
+ if (r == 0) {
+ r = -ETIMEDOUT;
+ goto err2;
+ } else if (r < 0) {
+ goto err2;
+ }
+
+ if (le32_to_cpu(*cpu_ptr) == 0xDEADBEEF)
+ r = 0;
+ else
+ r = -EINVAL;
+err2:
+ amdgpu_ib_free(&ib, NULL);
+ dma_fence_put(f);
+err1:
+ amdgpu_device_wb_free(adev, index);
+ return r;
+}
+
+static void gfx_v11_0_free_microcode(struct amdgpu_device *adev)
+{
+ amdgpu_ucode_release(&adev->gfx.pfp_fw);
+ amdgpu_ucode_release(&adev->gfx.me_fw);
+ amdgpu_ucode_release(&adev->gfx.rlc_fw);
+ amdgpu_ucode_release(&adev->gfx.mec_fw);
+
+ kfree(adev->gfx.rlc.register_list_format);
+}
+
+static int gfx_v11_0_init_toc_microcode(struct amdgpu_device *adev, const char *ucode_prefix)
+{
+ const struct psp_firmware_header_v1_0 *toc_hdr;
+ int err = 0;
+
+ err = amdgpu_ucode_request(adev, &adev->psp.toc_fw,
+ AMDGPU_UCODE_REQUIRED,
+ "amdgpu/%s_toc.bin", ucode_prefix);
+ if (err)
+ goto out;
+
+ toc_hdr = (const struct psp_firmware_header_v1_0 *)adev->psp.toc_fw->data;
+ adev->psp.toc.fw_version = le32_to_cpu(toc_hdr->header.ucode_version);
+ adev->psp.toc.feature_version = le32_to_cpu(toc_hdr->sos.fw_version);
+ adev->psp.toc.size_bytes = le32_to_cpu(toc_hdr->header.ucode_size_bytes);
+ adev->psp.toc.start_addr = (uint8_t *)toc_hdr +
+ le32_to_cpu(toc_hdr->header.ucode_array_offset_bytes);
+ return 0;
+out:
+ amdgpu_ucode_release(&adev->psp.toc_fw);
+ return err;
+}
+
+static void gfx_v11_0_check_fw_cp_gfx_shadow(struct amdgpu_device *adev)
+{
+ switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
+ case IP_VERSION(11, 0, 0):
+ case IP_VERSION(11, 0, 2):
+ case IP_VERSION(11, 0, 3):
+ if ((adev->gfx.me_fw_version >= 1505) &&
+ (adev->gfx.pfp_fw_version >= 1600) &&
+ (adev->gfx.mec_fw_version >= 512)) {
+ if (amdgpu_sriov_vf(adev))
+ adev->gfx.cp_gfx_shadow = true;
+ else
+ adev->gfx.cp_gfx_shadow = false;
+ }
+ break;
+ default:
+ adev->gfx.cp_gfx_shadow = false;
+ break;
+ }
+}
+
+static int gfx_v11_0_init_microcode(struct amdgpu_device *adev)
+{
+ char ucode_prefix[25];
+ int err;
+ const struct rlc_firmware_header_v2_0 *rlc_hdr;
+ uint16_t version_major;
+ uint16_t version_minor;
+
+ DRM_DEBUG("\n");
+
+ amdgpu_ucode_ip_version_decode(adev, GC_HWIP, ucode_prefix, sizeof(ucode_prefix));
+ err = amdgpu_ucode_request(adev, &adev->gfx.pfp_fw,
+ AMDGPU_UCODE_REQUIRED,
+ "amdgpu/%s_pfp.bin", ucode_prefix);
+ if (err)
+ goto out;
+ /* check pfp fw hdr version to decide if enable rs64 for gfx11.*/
+ adev->gfx.rs64_enable = amdgpu_ucode_hdr_version(
+ (union amdgpu_firmware_header *)
+ adev->gfx.pfp_fw->data, 2, 0);
+ if (adev->gfx.rs64_enable) {
+ dev_info(adev->dev, "CP RS64 enable\n");
+ amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_RS64_PFP);
+ amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_RS64_PFP_P0_STACK);
+ amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_RS64_PFP_P1_STACK);
+ } else {
+ amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_PFP);
+ }
+
+ err = amdgpu_ucode_request(adev, &adev->gfx.me_fw,
+ AMDGPU_UCODE_REQUIRED,
+ "amdgpu/%s_me.bin", ucode_prefix);
+ if (err)
+ goto out;
+ if (adev->gfx.rs64_enable) {
+ amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_RS64_ME);
+ amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_RS64_ME_P0_STACK);
+ amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_RS64_ME_P1_STACK);
+ } else {
+ amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_ME);
+ }
+
+ if (!amdgpu_sriov_vf(adev)) {
+ if (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(11, 0, 0) &&
+ adev->pdev->revision == 0xCE)
+ err = amdgpu_ucode_request(adev, &adev->gfx.rlc_fw,
+ AMDGPU_UCODE_REQUIRED,
+ "amdgpu/gc_11_0_0_rlc_1.bin");
+ else if (amdgpu_is_kicker_fw(adev))
+ err = amdgpu_ucode_request(adev, &adev->gfx.rlc_fw,
+ AMDGPU_UCODE_REQUIRED,
+ "amdgpu/%s_rlc_kicker.bin", ucode_prefix);
+ else
+ err = amdgpu_ucode_request(adev, &adev->gfx.rlc_fw,
+ AMDGPU_UCODE_REQUIRED,
+ "amdgpu/%s_rlc.bin", ucode_prefix);
+ if (err)
+ goto out;
+ rlc_hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data;
+ version_major = le16_to_cpu(rlc_hdr->header.header_version_major);
+ version_minor = le16_to_cpu(rlc_hdr->header.header_version_minor);
+ err = amdgpu_gfx_rlc_init_microcode(adev, version_major, version_minor);
+ if (err)
+ goto out;
+ }
+
+ err = amdgpu_ucode_request(adev, &adev->gfx.mec_fw,
+ AMDGPU_UCODE_REQUIRED,
+ "amdgpu/%s_mec.bin", ucode_prefix);
+ if (err)
+ goto out;
+ if (adev->gfx.rs64_enable) {
+ amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_RS64_MEC);
+ amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_RS64_MEC_P0_STACK);
+ amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_RS64_MEC_P1_STACK);
+ amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_RS64_MEC_P2_STACK);
+ amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_RS64_MEC_P3_STACK);
+ } else {
+ amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_MEC1);
+ amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_MEC1_JT);
+ }
+
+ if (adev->firmware.load_type == AMDGPU_FW_LOAD_RLC_BACKDOOR_AUTO)
+ err = gfx_v11_0_init_toc_microcode(adev, ucode_prefix);
+
+ /* only one MEC for gfx 11.0.0. */
+ adev->gfx.mec2_fw = NULL;
+
+ gfx_v11_0_check_fw_cp_gfx_shadow(adev);
+
+ if (adev->gfx.imu.funcs && adev->gfx.imu.funcs->init_microcode) {
+ err = adev->gfx.imu.funcs->init_microcode(adev);
+ if (err)
+ DRM_ERROR("Failed to init imu firmware!\n");
+ return err;
+ }
+
+out:
+ if (err) {
+ amdgpu_ucode_release(&adev->gfx.pfp_fw);
+ amdgpu_ucode_release(&adev->gfx.me_fw);
+ amdgpu_ucode_release(&adev->gfx.rlc_fw);
+ amdgpu_ucode_release(&adev->gfx.mec_fw);
+ }
+
+ return err;
+}
+
+static u32 gfx_v11_0_get_csb_size(struct amdgpu_device *adev)
+{
+ u32 count = 0;
+ const struct cs_section_def *sect = NULL;
+ const struct cs_extent_def *ext = NULL;
+
+ /* begin clear state */
+ count += 2;
+ /* context control state */
+ count += 3;
+
+ for (sect = gfx11_cs_data; sect->section != NULL; ++sect) {
+ for (ext = sect->section; ext->extent != NULL; ++ext) {
+ if (sect->id == SECT_CONTEXT)
+ count += 2 + ext->reg_count;
+ else
+ return 0;
+ }
+ }
+
+ /* set PA_SC_TILE_STEERING_OVERRIDE */
+ count += 3;
+ /* end clear state */
+ count += 2;
+ /* clear state */
+ count += 2;
+
+ return count;
+}
+
+static void gfx_v11_0_get_csb_buffer(struct amdgpu_device *adev, u32 *buffer)
+{
+ u32 count = 0;
+ int ctx_reg_offset;
+
+ if (adev->gfx.rlc.cs_data == NULL)
+ return;
+ if (buffer == NULL)
+ return;
+
+ count = amdgpu_gfx_csb_preamble_start(buffer);
+ count = amdgpu_gfx_csb_data_parser(adev, buffer, count);
+
+ ctx_reg_offset = SOC15_REG_OFFSET(GC, 0, regPA_SC_TILE_STEERING_OVERRIDE) - PACKET3_SET_CONTEXT_REG_START;
+ buffer[count++] = cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, 1));
+ buffer[count++] = cpu_to_le32(ctx_reg_offset);
+ buffer[count++] = cpu_to_le32(adev->gfx.config.pa_sc_tile_steering_override);
+
+ amdgpu_gfx_csb_preamble_end(buffer, count);
+}
+
+static void gfx_v11_0_rlc_fini(struct amdgpu_device *adev)
+{
+ /* clear state block */
+ amdgpu_bo_free_kernel(&adev->gfx.rlc.clear_state_obj,
+ &adev->gfx.rlc.clear_state_gpu_addr,
+ (void **)&adev->gfx.rlc.cs_ptr);
+
+ /* jump table block */
+ amdgpu_bo_free_kernel(&adev->gfx.rlc.cp_table_obj,
+ &adev->gfx.rlc.cp_table_gpu_addr,
+ (void **)&adev->gfx.rlc.cp_table_ptr);
+}
+
+static void gfx_v11_0_init_rlcg_reg_access_ctrl(struct amdgpu_device *adev)
+{
+ struct amdgpu_rlcg_reg_access_ctrl *reg_access_ctrl;
+
+ reg_access_ctrl = &adev->gfx.rlc.reg_access_ctrl[0];
+ reg_access_ctrl->scratch_reg0 = SOC15_REG_OFFSET(GC, 0, regSCRATCH_REG0);
+ reg_access_ctrl->scratch_reg1 = SOC15_REG_OFFSET(GC, 0, regSCRATCH_REG1);
+ reg_access_ctrl->scratch_reg2 = SOC15_REG_OFFSET(GC, 0, regSCRATCH_REG2);
+ reg_access_ctrl->scratch_reg3 = SOC15_REG_OFFSET(GC, 0, regSCRATCH_REG3);
+ reg_access_ctrl->grbm_cntl = SOC15_REG_OFFSET(GC, 0, regGRBM_GFX_CNTL);
+ reg_access_ctrl->grbm_idx = SOC15_REG_OFFSET(GC, 0, regGRBM_GFX_INDEX);
+ reg_access_ctrl->spare_int = SOC15_REG_OFFSET(GC, 0, regRLC_SPARE_INT_0);
+ adev->gfx.rlc.rlcg_reg_access_supported = true;
+}
+
+static int gfx_v11_0_rlc_init(struct amdgpu_device *adev)
+{
+ const struct cs_section_def *cs_data;
+ int r;
+
+ adev->gfx.rlc.cs_data = gfx11_cs_data;
+
+ cs_data = adev->gfx.rlc.cs_data;
+
+ if (cs_data) {
+ /* init clear state block */
+ r = amdgpu_gfx_rlc_init_csb(adev);
+ if (r)
+ return r;
+ }
+
+ /* init spm vmid with 0xf */
+ if (adev->gfx.rlc.funcs->update_spm_vmid)
+ adev->gfx.rlc.funcs->update_spm_vmid(adev, NULL, 0xf);
+
+ return 0;
+}
+
+static void gfx_v11_0_mec_fini(struct amdgpu_device *adev)
+{
+ amdgpu_bo_free_kernel(&adev->gfx.mec.hpd_eop_obj, NULL, NULL);
+ amdgpu_bo_free_kernel(&adev->gfx.mec.mec_fw_obj, NULL, NULL);
+ amdgpu_bo_free_kernel(&adev->gfx.mec.mec_fw_data_obj, NULL, NULL);
+}
+
+static void gfx_v11_0_me_init(struct amdgpu_device *adev)
+{
+ bitmap_zero(adev->gfx.me.queue_bitmap, AMDGPU_MAX_GFX_QUEUES);
+
+ amdgpu_gfx_graphics_queue_acquire(adev);
+}
+
+static int gfx_v11_0_mec_init(struct amdgpu_device *adev)
+{
+ int r;
+ u32 *hpd;
+ size_t mec_hpd_size;
+
+ bitmap_zero(adev->gfx.mec_bitmap[0].queue_bitmap, AMDGPU_MAX_COMPUTE_QUEUES);
+
+ /* take ownership of the relevant compute queues */
+ amdgpu_gfx_compute_queue_acquire(adev);
+ mec_hpd_size = adev->gfx.num_compute_rings * GFX11_MEC_HPD_SIZE;
+
+ if (mec_hpd_size) {
+ r = amdgpu_bo_create_reserved(adev, mec_hpd_size, PAGE_SIZE,
+ AMDGPU_GEM_DOMAIN_GTT,
+ &adev->gfx.mec.hpd_eop_obj,
+ &adev->gfx.mec.hpd_eop_gpu_addr,
+ (void **)&hpd);
+ if (r) {
+ dev_warn(adev->dev, "(%d) create HDP EOP bo failed\n", r);
+ gfx_v11_0_mec_fini(adev);
+ return r;
+ }
+
+ memset(hpd, 0, mec_hpd_size);
+
+ amdgpu_bo_kunmap(adev->gfx.mec.hpd_eop_obj);
+ amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj);
+ }
+
+ return 0;
+}
+
+static uint32_t wave_read_ind(struct amdgpu_device *adev, uint32_t wave, uint32_t address)
+{
+ WREG32_SOC15(GC, 0, regSQ_IND_INDEX,
+ (wave << SQ_IND_INDEX__WAVE_ID__SHIFT) |
+ (address << SQ_IND_INDEX__INDEX__SHIFT));
+ return RREG32_SOC15(GC, 0, regSQ_IND_DATA);
+}
+
+static void wave_read_regs(struct amdgpu_device *adev, uint32_t wave,
+ uint32_t thread, uint32_t regno,
+ uint32_t num, uint32_t *out)
+{
+ WREG32_SOC15(GC, 0, regSQ_IND_INDEX,
+ (wave << SQ_IND_INDEX__WAVE_ID__SHIFT) |
+ (regno << SQ_IND_INDEX__INDEX__SHIFT) |
+ (thread << SQ_IND_INDEX__WORKITEM_ID__SHIFT) |
+ (SQ_IND_INDEX__AUTO_INCR_MASK));
+ while (num--)
+ *(out++) = RREG32_SOC15(GC, 0, regSQ_IND_DATA);
+}
+
+static void gfx_v11_0_read_wave_data(struct amdgpu_device *adev, uint32_t xcc_id, uint32_t simd, uint32_t wave, uint32_t *dst, int *no_fields)
+{
+ /* in gfx11 the SIMD_ID is specified as part of the INSTANCE
+ * field when performing a select_se_sh so it should be
+ * zero here */
+ WARN_ON(simd != 0);
+
+ /* type 3 wave data */
+ dst[(*no_fields)++] = 3;
+ dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_STATUS);
+ dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_PC_LO);
+ dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_PC_HI);
+ dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_EXEC_LO);
+ dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_EXEC_HI);
+ dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_HW_ID1);
+ dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_HW_ID2);
+ dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_GPR_ALLOC);
+ dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_LDS_ALLOC);
+ dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_TRAPSTS);
+ dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_IB_STS);
+ dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_IB_STS2);
+ dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_IB_DBG1);
+ dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_M0);
+ dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_MODE);
+}
+
+static void gfx_v11_0_read_wave_sgprs(struct amdgpu_device *adev, uint32_t xcc_id, uint32_t simd,
+ uint32_t wave, uint32_t start,
+ uint32_t size, uint32_t *dst)
+{
+ WARN_ON(simd != 0);
+
+ wave_read_regs(
+ adev, wave, 0, start + SQIND_WAVE_SGPRS_OFFSET, size,
+ dst);
+}
+
+static void gfx_v11_0_read_wave_vgprs(struct amdgpu_device *adev, uint32_t xcc_id, uint32_t simd,
+ uint32_t wave, uint32_t thread,
+ uint32_t start, uint32_t size,
+ uint32_t *dst)
+{
+ wave_read_regs(
+ adev, wave, thread,
+ start + SQIND_WAVE_VGPRS_OFFSET, size, dst);
+}
+
+static void gfx_v11_0_select_me_pipe_q(struct amdgpu_device *adev,
+ u32 me, u32 pipe, u32 q, u32 vm, u32 xcc_id)
+{
+ soc21_grbm_select(adev, me, pipe, q, vm);
+}
+
+/* all sizes are in bytes */
+#define MQD_SHADOW_BASE_SIZE 73728
+#define MQD_SHADOW_BASE_ALIGNMENT 256
+#define MQD_FWWORKAREA_SIZE 484
+#define MQD_FWWORKAREA_ALIGNMENT 256
+
+static void gfx_v11_0_get_gfx_shadow_info_nocheck(struct amdgpu_device *adev,
+ struct amdgpu_gfx_shadow_info *shadow_info)
+{
+ shadow_info->shadow_size = MQD_SHADOW_BASE_SIZE;
+ shadow_info->shadow_alignment = MQD_SHADOW_BASE_ALIGNMENT;
+ shadow_info->csa_size = MQD_FWWORKAREA_SIZE;
+ shadow_info->csa_alignment = MQD_FWWORKAREA_ALIGNMENT;
+}
+
+static int gfx_v11_0_get_gfx_shadow_info(struct amdgpu_device *adev,
+ struct amdgpu_gfx_shadow_info *shadow_info,
+ bool skip_check)
+{
+ if (adev->gfx.cp_gfx_shadow || skip_check) {
+ gfx_v11_0_get_gfx_shadow_info_nocheck(adev, shadow_info);
+ return 0;
+ } else {
+ memset(shadow_info, 0, sizeof(struct amdgpu_gfx_shadow_info));
+ return -ENOTSUPP;
+ }
+}
+
+static const struct amdgpu_gfx_funcs gfx_v11_0_gfx_funcs = {
+ .get_gpu_clock_counter = &gfx_v11_0_get_gpu_clock_counter,
+ .select_se_sh = &gfx_v11_0_select_se_sh,
+ .read_wave_data = &gfx_v11_0_read_wave_data,
+ .read_wave_sgprs = &gfx_v11_0_read_wave_sgprs,
+ .read_wave_vgprs = &gfx_v11_0_read_wave_vgprs,
+ .select_me_pipe_q = &gfx_v11_0_select_me_pipe_q,
+ .update_perfmon_mgcg = &gfx_v11_0_update_perf_clk,
+ .get_gfx_shadow_info = &gfx_v11_0_get_gfx_shadow_info,
+};
+
+static int gfx_v11_0_gpu_early_init(struct amdgpu_device *adev)
+{
+ switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
+ case IP_VERSION(11, 0, 0):
+ case IP_VERSION(11, 0, 2):
+ adev->gfx.config.max_hw_contexts = 8;
+ adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
+ adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
+ adev->gfx.config.sc_hiz_tile_fifo_size = 0;
+ adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0;
+ break;
+ case IP_VERSION(11, 0, 3):
+ adev->gfx.ras = &gfx_v11_0_3_ras;
+ adev->gfx.config.max_hw_contexts = 8;
+ adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
+ adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
+ adev->gfx.config.sc_hiz_tile_fifo_size = 0;
+ adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0;
+ break;
+ case IP_VERSION(11, 0, 1):
+ case IP_VERSION(11, 0, 4):
+ case IP_VERSION(11, 5, 0):
+ case IP_VERSION(11, 5, 1):
+ case IP_VERSION(11, 5, 2):
+ case IP_VERSION(11, 5, 3):
+ adev->gfx.config.max_hw_contexts = 8;
+ adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
+ adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
+ adev->gfx.config.sc_hiz_tile_fifo_size = 0x80;
+ adev->gfx.config.sc_earlyz_tile_fifo_size = 0x300;
+ break;
+ default:
+ BUG();
+ break;
+ }
+
+ return 0;
+}
+
+static int gfx_v11_0_gfx_ring_init(struct amdgpu_device *adev, int ring_id,
+ int me, int pipe, int queue)
+{
+ struct amdgpu_ring *ring;
+ unsigned int irq_type;
+ unsigned int hw_prio;
+
+ ring = &adev->gfx.gfx_ring[ring_id];
+
+ ring->me = me;
+ ring->pipe = pipe;
+ ring->queue = queue;
+
+ ring->ring_obj = NULL;
+ ring->use_doorbell = true;
+ if (adev->gfx.disable_kq) {
+ ring->no_scheduler = true;
+ ring->no_user_submission = true;
+ }
+
+ if (!ring_id)
+ ring->doorbell_index = adev->doorbell_index.gfx_ring0 << 1;
+ else
+ ring->doorbell_index = adev->doorbell_index.gfx_ring1 << 1;
+ ring->vm_hub = AMDGPU_GFXHUB(0);
+ sprintf(ring->name, "gfx_%d.%d.%d", ring->me, ring->pipe, ring->queue);
+
+ irq_type = AMDGPU_CP_IRQ_GFX_ME0_PIPE0_EOP + ring->pipe;
+ hw_prio = amdgpu_gfx_is_high_priority_graphics_queue(adev, ring) ?
+ AMDGPU_GFX_PIPE_PRIO_HIGH : AMDGPU_GFX_PIPE_PRIO_NORMAL;
+ return amdgpu_ring_init(adev, ring, 1024, &adev->gfx.eop_irq, irq_type,
+ hw_prio, NULL);
+}
+
+static int gfx_v11_0_compute_ring_init(struct amdgpu_device *adev, int ring_id,
+ int mec, int pipe, int queue)
+{
+ int r;
+ unsigned irq_type;
+ struct amdgpu_ring *ring;
+ unsigned int hw_prio;
+
+ ring = &adev->gfx.compute_ring[ring_id];
+
+ /* mec0 is me1 */
+ ring->me = mec + 1;
+ ring->pipe = pipe;
+ ring->queue = queue;
+
+ ring->ring_obj = NULL;
+ ring->use_doorbell = true;
+ ring->doorbell_index = (adev->doorbell_index.mec_ring0 + ring_id) << 1;
+ ring->eop_gpu_addr = adev->gfx.mec.hpd_eop_gpu_addr
+ + (ring_id * GFX11_MEC_HPD_SIZE);
+ ring->vm_hub = AMDGPU_GFXHUB(0);
+ sprintf(ring->name, "comp_%d.%d.%d", ring->me, ring->pipe, ring->queue);
+
+ irq_type = AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP
+ + ((ring->me - 1) * adev->gfx.mec.num_pipe_per_mec)
+ + ring->pipe;
+ hw_prio = amdgpu_gfx_is_high_priority_compute_queue(adev, ring) ?
+ AMDGPU_GFX_PIPE_PRIO_HIGH : AMDGPU_GFX_PIPE_PRIO_NORMAL;
+ /* type-2 packets are deprecated on MEC, use type-3 instead */
+ r = amdgpu_ring_init(adev, ring, 1024, &adev->gfx.eop_irq, irq_type,
+ hw_prio, NULL);
+ if (r)
+ return r;
+
+ return 0;
+}
+
+static struct {
+ SOC21_FIRMWARE_ID id;
+ unsigned int offset;
+ unsigned int size;
+} rlc_autoload_info[SOC21_FIRMWARE_ID_MAX];
+
+static void gfx_v11_0_parse_rlc_toc(struct amdgpu_device *adev, void *rlc_toc)
+{
+ RLC_TABLE_OF_CONTENT *ucode = rlc_toc;
+
+ while (ucode && (ucode->id > SOC21_FIRMWARE_ID_INVALID) &&
+ (ucode->id < SOC21_FIRMWARE_ID_MAX)) {
+ rlc_autoload_info[ucode->id].id = ucode->id;
+ rlc_autoload_info[ucode->id].offset = ucode->offset * 4;
+ rlc_autoload_info[ucode->id].size = ucode->size * 4;
+
+ ucode++;
+ }
+}
+
+static uint32_t gfx_v11_0_calc_toc_total_size(struct amdgpu_device *adev)
+{
+ uint32_t total_size = 0;
+ SOC21_FIRMWARE_ID id;
+
+ gfx_v11_0_parse_rlc_toc(adev, adev->psp.toc.start_addr);
+
+ for (id = SOC21_FIRMWARE_ID_RLC_G_UCODE; id < SOC21_FIRMWARE_ID_MAX; id++)
+ total_size += rlc_autoload_info[id].size;
+
+ /* In case the offset in rlc toc ucode is aligned */
+ if (total_size < rlc_autoload_info[SOC21_FIRMWARE_ID_MAX-1].offset)
+ total_size = rlc_autoload_info[SOC21_FIRMWARE_ID_MAX-1].offset +
+ rlc_autoload_info[SOC21_FIRMWARE_ID_MAX-1].size;
+
+ return total_size;
+}
+
+static int gfx_v11_0_rlc_autoload_buffer_init(struct amdgpu_device *adev)
+{
+ int r;
+ uint32_t total_size;
+
+ total_size = gfx_v11_0_calc_toc_total_size(adev);
+
+ r = amdgpu_bo_create_reserved(adev, total_size, 64 * 1024,
+ AMDGPU_GEM_DOMAIN_VRAM |
+ AMDGPU_GEM_DOMAIN_GTT,
+ &adev->gfx.rlc.rlc_autoload_bo,
+ &adev->gfx.rlc.rlc_autoload_gpu_addr,
+ (void **)&adev->gfx.rlc.rlc_autoload_ptr);
+
+ if (r) {
+ dev_err(adev->dev, "(%d) failed to create fw autoload bo\n", r);
+ return r;
+ }
+
+ return 0;
+}
+
+static void gfx_v11_0_rlc_backdoor_autoload_copy_ucode(struct amdgpu_device *adev,
+ SOC21_FIRMWARE_ID id,
+ const void *fw_data,
+ uint32_t fw_size,
+ uint32_t *fw_autoload_mask)
+{
+ uint32_t toc_offset;
+ uint32_t toc_fw_size;
+ char *ptr = adev->gfx.rlc.rlc_autoload_ptr;
+
+ if (id <= SOC21_FIRMWARE_ID_INVALID || id >= SOC21_FIRMWARE_ID_MAX)
+ return;
+
+ toc_offset = rlc_autoload_info[id].offset;
+ toc_fw_size = rlc_autoload_info[id].size;
+
+ if (fw_size == 0)
+ fw_size = toc_fw_size;
+
+ if (fw_size > toc_fw_size)
+ fw_size = toc_fw_size;
+
+ memcpy(ptr + toc_offset, fw_data, fw_size);
+
+ if (fw_size < toc_fw_size)
+ memset(ptr + toc_offset + fw_size, 0, toc_fw_size - fw_size);
+
+ if ((id != SOC21_FIRMWARE_ID_RS64_PFP) && (id != SOC21_FIRMWARE_ID_RS64_ME))
+ *(uint64_t *)fw_autoload_mask |= 1ULL << id;
+}
+
+static void gfx_v11_0_rlc_backdoor_autoload_copy_toc_ucode(struct amdgpu_device *adev,
+ uint32_t *fw_autoload_mask)
+{
+ void *data;
+ uint32_t size;
+ uint64_t *toc_ptr;
+
+ *(uint64_t *)fw_autoload_mask |= 0x1;
+
+ DRM_DEBUG("rlc autoload enabled fw: 0x%llx\n", *(uint64_t *)fw_autoload_mask);
+
+ data = adev->psp.toc.start_addr;
+ size = rlc_autoload_info[SOC21_FIRMWARE_ID_RLC_TOC].size;
+
+ toc_ptr = (uint64_t *)data + size / 8 - 1;
+ *toc_ptr = *(uint64_t *)fw_autoload_mask;
+
+ gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev, SOC21_FIRMWARE_ID_RLC_TOC,
+ data, size, fw_autoload_mask);
+}
+
+static void gfx_v11_0_rlc_backdoor_autoload_copy_gfx_ucode(struct amdgpu_device *adev,
+ uint32_t *fw_autoload_mask)
+{
+ const __le32 *fw_data;
+ uint32_t fw_size;
+ const struct gfx_firmware_header_v1_0 *cp_hdr;
+ const struct gfx_firmware_header_v2_0 *cpv2_hdr;
+ const struct rlc_firmware_header_v2_0 *rlc_hdr;
+ const struct rlc_firmware_header_v2_2 *rlcv22_hdr;
+ uint16_t version_major, version_minor;
+
+ if (adev->gfx.rs64_enable) {
+ /* pfp ucode */
+ cpv2_hdr = (const struct gfx_firmware_header_v2_0 *)
+ adev->gfx.pfp_fw->data;
+ /* instruction */
+ fw_data = (const __le32 *)(adev->gfx.pfp_fw->data +
+ le32_to_cpu(cpv2_hdr->ucode_offset_bytes));
+ fw_size = le32_to_cpu(cpv2_hdr->ucode_size_bytes);
+ gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev, SOC21_FIRMWARE_ID_RS64_PFP,
+ fw_data, fw_size, fw_autoload_mask);
+ /* data */
+ fw_data = (const __le32 *)(adev->gfx.pfp_fw->data +
+ le32_to_cpu(cpv2_hdr->data_offset_bytes));
+ fw_size = le32_to_cpu(cpv2_hdr->data_size_bytes);
+ gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev, SOC21_FIRMWARE_ID_RS64_PFP_P0_STACK,
+ fw_data, fw_size, fw_autoload_mask);
+ gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev, SOC21_FIRMWARE_ID_RS64_PFP_P1_STACK,
+ fw_data, fw_size, fw_autoload_mask);
+ /* me ucode */
+ cpv2_hdr = (const struct gfx_firmware_header_v2_0 *)
+ adev->gfx.me_fw->data;
+ /* instruction */
+ fw_data = (const __le32 *)(adev->gfx.me_fw->data +
+ le32_to_cpu(cpv2_hdr->ucode_offset_bytes));
+ fw_size = le32_to_cpu(cpv2_hdr->ucode_size_bytes);
+ gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev, SOC21_FIRMWARE_ID_RS64_ME,
+ fw_data, fw_size, fw_autoload_mask);
+ /* data */
+ fw_data = (const __le32 *)(adev->gfx.me_fw->data +
+ le32_to_cpu(cpv2_hdr->data_offset_bytes));
+ fw_size = le32_to_cpu(cpv2_hdr->data_size_bytes);
+ gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev, SOC21_FIRMWARE_ID_RS64_ME_P0_STACK,
+ fw_data, fw_size, fw_autoload_mask);
+ gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev, SOC21_FIRMWARE_ID_RS64_ME_P1_STACK,
+ fw_data, fw_size, fw_autoload_mask);
+ /* mec ucode */
+ cpv2_hdr = (const struct gfx_firmware_header_v2_0 *)
+ adev->gfx.mec_fw->data;
+ /* instruction */
+ fw_data = (const __le32 *) (adev->gfx.mec_fw->data +
+ le32_to_cpu(cpv2_hdr->ucode_offset_bytes));
+ fw_size = le32_to_cpu(cpv2_hdr->ucode_size_bytes);
+ gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev, SOC21_FIRMWARE_ID_RS64_MEC,
+ fw_data, fw_size, fw_autoload_mask);
+ /* data */
+ fw_data = (const __le32 *) (adev->gfx.mec_fw->data +
+ le32_to_cpu(cpv2_hdr->data_offset_bytes));
+ fw_size = le32_to_cpu(cpv2_hdr->data_size_bytes);
+ gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev, SOC21_FIRMWARE_ID_RS64_MEC_P0_STACK,
+ fw_data, fw_size, fw_autoload_mask);
+ gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev, SOC21_FIRMWARE_ID_RS64_MEC_P1_STACK,
+ fw_data, fw_size, fw_autoload_mask);
+ gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev, SOC21_FIRMWARE_ID_RS64_MEC_P2_STACK,
+ fw_data, fw_size, fw_autoload_mask);
+ gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev, SOC21_FIRMWARE_ID_RS64_MEC_P3_STACK,
+ fw_data, fw_size, fw_autoload_mask);
+ } else {
+ /* pfp ucode */
+ cp_hdr = (const struct gfx_firmware_header_v1_0 *)
+ adev->gfx.pfp_fw->data;
+ fw_data = (const __le32 *)(adev->gfx.pfp_fw->data +
+ le32_to_cpu(cp_hdr->header.ucode_array_offset_bytes));
+ fw_size = le32_to_cpu(cp_hdr->header.ucode_size_bytes);
+ gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev, SOC21_FIRMWARE_ID_CP_PFP,
+ fw_data, fw_size, fw_autoload_mask);
+
+ /* me ucode */
+ cp_hdr = (const struct gfx_firmware_header_v1_0 *)
+ adev->gfx.me_fw->data;
+ fw_data = (const __le32 *)(adev->gfx.me_fw->data +
+ le32_to_cpu(cp_hdr->header.ucode_array_offset_bytes));
+ fw_size = le32_to_cpu(cp_hdr->header.ucode_size_bytes);
+ gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev, SOC21_FIRMWARE_ID_CP_ME,
+ fw_data, fw_size, fw_autoload_mask);
+
+ /* mec ucode */
+ cp_hdr = (const struct gfx_firmware_header_v1_0 *)
+ adev->gfx.mec_fw->data;
+ fw_data = (const __le32 *) (adev->gfx.mec_fw->data +
+ le32_to_cpu(cp_hdr->header.ucode_array_offset_bytes));
+ fw_size = le32_to_cpu(cp_hdr->header.ucode_size_bytes) -
+ cp_hdr->jt_size * 4;
+ gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev, SOC21_FIRMWARE_ID_CP_MEC,
+ fw_data, fw_size, fw_autoload_mask);
+ }
+
+ /* rlc ucode */
+ rlc_hdr = (const struct rlc_firmware_header_v2_0 *)
+ adev->gfx.rlc_fw->data;
+ fw_data = (const __le32 *)(adev->gfx.rlc_fw->data +
+ le32_to_cpu(rlc_hdr->header.ucode_array_offset_bytes));
+ fw_size = le32_to_cpu(rlc_hdr->header.ucode_size_bytes);
+ gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev, SOC21_FIRMWARE_ID_RLC_G_UCODE,
+ fw_data, fw_size, fw_autoload_mask);
+
+ version_major = le16_to_cpu(rlc_hdr->header.header_version_major);
+ version_minor = le16_to_cpu(rlc_hdr->header.header_version_minor);
+ if (version_major == 2) {
+ if (version_minor >= 2) {
+ rlcv22_hdr = (const struct rlc_firmware_header_v2_2 *)adev->gfx.rlc_fw->data;
+
+ fw_data = (const __le32 *)(adev->gfx.rlc_fw->data +
+ le32_to_cpu(rlcv22_hdr->rlc_iram_ucode_offset_bytes));
+ fw_size = le32_to_cpu(rlcv22_hdr->rlc_iram_ucode_size_bytes);
+ gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev, SOC21_FIRMWARE_ID_RLX6_UCODE,
+ fw_data, fw_size, fw_autoload_mask);
+
+ fw_data = (const __le32 *)(adev->gfx.rlc_fw->data +
+ le32_to_cpu(rlcv22_hdr->rlc_dram_ucode_offset_bytes));
+ fw_size = le32_to_cpu(rlcv22_hdr->rlc_dram_ucode_size_bytes);
+ gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev, SOC21_FIRMWARE_ID_RLX6_DRAM_BOOT,
+ fw_data, fw_size, fw_autoload_mask);
+ }
+ }
+}
+
+static void gfx_v11_0_rlc_backdoor_autoload_copy_sdma_ucode(struct amdgpu_device *adev,
+ uint32_t *fw_autoload_mask)
+{
+ const __le32 *fw_data;
+ uint32_t fw_size;
+ const struct sdma_firmware_header_v2_0 *sdma_hdr;
+
+ sdma_hdr = (const struct sdma_firmware_header_v2_0 *)
+ adev->sdma.instance[0].fw->data;
+ fw_data = (const __le32 *) (adev->sdma.instance[0].fw->data +
+ le32_to_cpu(sdma_hdr->header.ucode_array_offset_bytes));
+ fw_size = le32_to_cpu(sdma_hdr->ctx_ucode_size_bytes);
+
+ gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev,
+ SOC21_FIRMWARE_ID_SDMA_UCODE_TH0, fw_data, fw_size, fw_autoload_mask);
+
+ fw_data = (const __le32 *) (adev->sdma.instance[0].fw->data +
+ le32_to_cpu(sdma_hdr->ctl_ucode_offset));
+ fw_size = le32_to_cpu(sdma_hdr->ctl_ucode_size_bytes);
+
+ gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev,
+ SOC21_FIRMWARE_ID_SDMA_UCODE_TH1, fw_data, fw_size, fw_autoload_mask);
+}
+
+static void gfx_v11_0_rlc_backdoor_autoload_copy_mes_ucode(struct amdgpu_device *adev,
+ uint32_t *fw_autoload_mask)
+{
+ const __le32 *fw_data;
+ unsigned fw_size;
+ const struct mes_firmware_header_v1_0 *mes_hdr;
+ int pipe, ucode_id, data_id;
+
+ for (pipe = 0; pipe < 2; pipe++) {
+ if (pipe==0) {
+ ucode_id = SOC21_FIRMWARE_ID_RS64_MES_P0;
+ data_id = SOC21_FIRMWARE_ID_RS64_MES_P0_STACK;
+ } else {
+ ucode_id = SOC21_FIRMWARE_ID_RS64_MES_P1;
+ data_id = SOC21_FIRMWARE_ID_RS64_MES_P1_STACK;
+ }
+
+ mes_hdr = (const struct mes_firmware_header_v1_0 *)
+ adev->mes.fw[pipe]->data;
+
+ fw_data = (const __le32 *)(adev->mes.fw[pipe]->data +
+ le32_to_cpu(mes_hdr->mes_ucode_offset_bytes));
+ fw_size = le32_to_cpu(mes_hdr->mes_ucode_size_bytes);
+
+ gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev,
+ ucode_id, fw_data, fw_size, fw_autoload_mask);
+
+ fw_data = (const __le32 *)(adev->mes.fw[pipe]->data +
+ le32_to_cpu(mes_hdr->mes_ucode_data_offset_bytes));
+ fw_size = le32_to_cpu(mes_hdr->mes_ucode_data_size_bytes);
+
+ gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev,
+ data_id, fw_data, fw_size, fw_autoload_mask);
+ }
+}
+
+static int gfx_v11_0_rlc_backdoor_autoload_enable(struct amdgpu_device *adev)
+{
+ uint32_t rlc_g_offset, rlc_g_size;
+ uint64_t gpu_addr;
+ uint32_t autoload_fw_id[2];
+
+ memset(autoload_fw_id, 0, sizeof(uint32_t) * 2);
+
+ /* RLC autoload sequence 2: copy ucode */
+ gfx_v11_0_rlc_backdoor_autoload_copy_sdma_ucode(adev, autoload_fw_id);
+ gfx_v11_0_rlc_backdoor_autoload_copy_gfx_ucode(adev, autoload_fw_id);
+ gfx_v11_0_rlc_backdoor_autoload_copy_mes_ucode(adev, autoload_fw_id);
+ gfx_v11_0_rlc_backdoor_autoload_copy_toc_ucode(adev, autoload_fw_id);
+
+ rlc_g_offset = rlc_autoload_info[SOC21_FIRMWARE_ID_RLC_G_UCODE].offset;
+ rlc_g_size = rlc_autoload_info[SOC21_FIRMWARE_ID_RLC_G_UCODE].size;
+ gpu_addr = adev->gfx.rlc.rlc_autoload_gpu_addr + rlc_g_offset;
+
+ WREG32_SOC15(GC, 0, regGFX_IMU_RLC_BOOTLOADER_ADDR_HI, upper_32_bits(gpu_addr));
+ WREG32_SOC15(GC, 0, regGFX_IMU_RLC_BOOTLOADER_ADDR_LO, lower_32_bits(gpu_addr));
+
+ WREG32_SOC15(GC, 0, regGFX_IMU_RLC_BOOTLOADER_SIZE, rlc_g_size);
+
+ /* RLC autoload sequence 3: load IMU fw */
+ if (adev->gfx.imu.funcs->load_microcode)
+ adev->gfx.imu.funcs->load_microcode(adev);
+ /* RLC autoload sequence 4 init IMU fw */
+ if (adev->gfx.imu.funcs->setup_imu)
+ adev->gfx.imu.funcs->setup_imu(adev);
+ if (adev->gfx.imu.funcs->start_imu)
+ adev->gfx.imu.funcs->start_imu(adev);
+
+ /* RLC autoload sequence 5 disable gpa mode */
+ gfx_v11_0_disable_gpa_mode(adev);
+
+ return 0;
+}
+
+static void gfx_v11_0_alloc_ip_dump(struct amdgpu_device *adev)
+{
+ uint32_t reg_count = ARRAY_SIZE(gc_reg_list_11_0);
+ uint32_t *ptr;
+ uint32_t inst;
+
+ ptr = kcalloc(reg_count, sizeof(uint32_t), GFP_KERNEL);
+ if (!ptr) {
+ DRM_ERROR("Failed to allocate memory for GFX IP Dump\n");
+ adev->gfx.ip_dump_core = NULL;
+ } else {
+ adev->gfx.ip_dump_core = ptr;
+ }
+
+ /* Allocate memory for compute queue registers for all the instances */
+ reg_count = ARRAY_SIZE(gc_cp_reg_list_11);
+ inst = adev->gfx.mec.num_mec * adev->gfx.mec.num_pipe_per_mec *
+ adev->gfx.mec.num_queue_per_pipe;
+
+ ptr = kcalloc(reg_count * inst, sizeof(uint32_t), GFP_KERNEL);
+ if (!ptr) {
+ DRM_ERROR("Failed to allocate memory for Compute Queues IP Dump\n");
+ adev->gfx.ip_dump_compute_queues = NULL;
+ } else {
+ adev->gfx.ip_dump_compute_queues = ptr;
+ }
+
+ /* Allocate memory for gfx queue registers for all the instances */
+ reg_count = ARRAY_SIZE(gc_gfx_queue_reg_list_11);
+ inst = adev->gfx.me.num_me * adev->gfx.me.num_pipe_per_me *
+ adev->gfx.me.num_queue_per_pipe;
+
+ ptr = kcalloc(reg_count * inst, sizeof(uint32_t), GFP_KERNEL);
+ if (!ptr) {
+ DRM_ERROR("Failed to allocate memory for GFX Queues IP Dump\n");
+ adev->gfx.ip_dump_gfx_queues = NULL;
+ } else {
+ adev->gfx.ip_dump_gfx_queues = ptr;
+ }
+}
+
+static int gfx_v11_0_sw_init(struct amdgpu_ip_block *ip_block)
+{
+ int i, j, k, r, ring_id;
+ int xcc_id = 0;
+ struct amdgpu_device *adev = ip_block->adev;
+ int num_queue_per_pipe = 1; /* we only enable 1 KGQ per pipe */
+
+ INIT_DELAYED_WORK(&adev->gfx.idle_work, amdgpu_gfx_profile_idle_work_handler);
+
+ switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
+ case IP_VERSION(11, 0, 0):
+ case IP_VERSION(11, 0, 1):
+ case IP_VERSION(11, 0, 2):
+ case IP_VERSION(11, 0, 3):
+ case IP_VERSION(11, 0, 4):
+ case IP_VERSION(11, 5, 0):
+ case IP_VERSION(11, 5, 1):
+ case IP_VERSION(11, 5, 2):
+ case IP_VERSION(11, 5, 3):
+ adev->gfx.me.num_me = 1;
+ adev->gfx.me.num_pipe_per_me = 1;
+ adev->gfx.me.num_queue_per_pipe = 2;
+ adev->gfx.mec.num_mec = 1;
+ adev->gfx.mec.num_pipe_per_mec = 4;
+ adev->gfx.mec.num_queue_per_pipe = 4;
+ break;
+ default:
+ adev->gfx.me.num_me = 1;
+ adev->gfx.me.num_pipe_per_me = 1;
+ adev->gfx.me.num_queue_per_pipe = 1;
+ adev->gfx.mec.num_mec = 1;
+ adev->gfx.mec.num_pipe_per_mec = 4;
+ adev->gfx.mec.num_queue_per_pipe = 8;
+ break;
+ }
+
+ switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
+ case IP_VERSION(11, 0, 0):
+ case IP_VERSION(11, 0, 2):
+ case IP_VERSION(11, 0, 3):
+ if (!adev->gfx.disable_uq &&
+ adev->gfx.me_fw_version >= 2420 &&
+ adev->gfx.pfp_fw_version >= 2580 &&
+ adev->gfx.mec_fw_version >= 2650 &&
+ adev->mes.fw_version[0] >= 120) {
+ adev->userq_funcs[AMDGPU_HW_IP_GFX] = &userq_mes_funcs;
+ adev->userq_funcs[AMDGPU_HW_IP_COMPUTE] = &userq_mes_funcs;
+ }
+ break;
+ case IP_VERSION(11, 0, 1):
+ case IP_VERSION(11, 0, 4):
+ case IP_VERSION(11, 5, 0):
+ case IP_VERSION(11, 5, 1):
+ case IP_VERSION(11, 5, 2):
+ case IP_VERSION(11, 5, 3):
+ /* add firmware version checks here */
+ if (0 && !adev->gfx.disable_uq) {
+ adev->userq_funcs[AMDGPU_HW_IP_GFX] = &userq_mes_funcs;
+ adev->userq_funcs[AMDGPU_HW_IP_COMPUTE] = &userq_mes_funcs;
+ }
+ break;
+ default:
+ break;
+ }
+
+ switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
+ case IP_VERSION(11, 0, 0):
+ case IP_VERSION(11, 0, 2):
+ case IP_VERSION(11, 0, 3):
+ adev->gfx.cleaner_shader_ptr = gfx_11_0_3_cleaner_shader_hex;
+ adev->gfx.cleaner_shader_size = sizeof(gfx_11_0_3_cleaner_shader_hex);
+ if (adev->gfx.me_fw_version >= 2280 &&
+ adev->gfx.pfp_fw_version >= 2370 &&
+ adev->gfx.mec_fw_version >= 2450 &&
+ adev->mes.fw_version[0] >= 99) {
+ adev->gfx.enable_cleaner_shader = true;
+ r = amdgpu_gfx_cleaner_shader_sw_init(adev, adev->gfx.cleaner_shader_size);
+ if (r) {
+ adev->gfx.enable_cleaner_shader = false;
+ dev_err(adev->dev, "Failed to initialize cleaner shader\n");
+ }
+ }
+ break;
+ case IP_VERSION(11, 0, 1):
+ case IP_VERSION(11, 0, 4):
+ adev->gfx.cleaner_shader_ptr = gfx_11_0_3_cleaner_shader_hex;
+ adev->gfx.cleaner_shader_size = sizeof(gfx_11_0_3_cleaner_shader_hex);
+ if (adev->gfx.pfp_fw_version >= 102 &&
+ adev->gfx.mec_fw_version >= 66 &&
+ adev->mes.fw_version[0] >= 128) {
+ adev->gfx.enable_cleaner_shader = true;
+ r = amdgpu_gfx_cleaner_shader_sw_init(adev, adev->gfx.cleaner_shader_size);
+ if (r) {
+ adev->gfx.enable_cleaner_shader = false;
+ dev_err(adev->dev, "Failed to initialize cleaner shader\n");
+ }
+ }
+ break;
+ case IP_VERSION(11, 5, 0):
+ case IP_VERSION(11, 5, 1):
+ adev->gfx.cleaner_shader_ptr = gfx_11_0_3_cleaner_shader_hex;
+ adev->gfx.cleaner_shader_size = sizeof(gfx_11_0_3_cleaner_shader_hex);
+ if (adev->gfx.mec_fw_version >= 26 &&
+ adev->mes.fw_version[0] >= 114) {
+ adev->gfx.enable_cleaner_shader = true;
+ r = amdgpu_gfx_cleaner_shader_sw_init(adev, adev->gfx.cleaner_shader_size);
+ if (r) {
+ adev->gfx.enable_cleaner_shader = false;
+ dev_err(adev->dev, "Failed to initialize cleaner shader\n");
+ }
+ }
+ break;
+ case IP_VERSION(11, 5, 2):
+ adev->gfx.cleaner_shader_ptr = gfx_11_0_3_cleaner_shader_hex;
+ adev->gfx.cleaner_shader_size = sizeof(gfx_11_0_3_cleaner_shader_hex);
+ if (adev->gfx.me_fw_version >= 12 &&
+ adev->gfx.pfp_fw_version >= 15 &&
+ adev->gfx.mec_fw_version >= 15) {
+ adev->gfx.enable_cleaner_shader = true;
+ r = amdgpu_gfx_cleaner_shader_sw_init(adev, adev->gfx.cleaner_shader_size);
+ if (r) {
+ adev->gfx.enable_cleaner_shader = false;
+ dev_err(adev->dev, "Failed to initialize cleaner shader\n");
+ }
+ }
+ break;
+ case IP_VERSION(11, 5, 3):
+ adev->gfx.cleaner_shader_ptr = gfx_11_0_3_cleaner_shader_hex;
+ adev->gfx.cleaner_shader_size = sizeof(gfx_11_0_3_cleaner_shader_hex);
+ if (adev->gfx.me_fw_version >= 7 &&
+ adev->gfx.pfp_fw_version >= 8 &&
+ adev->gfx.mec_fw_version >= 8) {
+ adev->gfx.enable_cleaner_shader = true;
+ r = amdgpu_gfx_cleaner_shader_sw_init(adev, adev->gfx.cleaner_shader_size);
+ if (r) {
+ adev->gfx.enable_cleaner_shader = false;
+ dev_err(adev->dev, "Failed to initialize cleaner shader\n");
+ }
+ }
+ break;
+ default:
+ adev->gfx.enable_cleaner_shader = false;
+ break;
+ }
+
+ /* Enable CG flag in one VF mode for enabling RLC safe mode enter/exit */
+ if (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(11, 0, 3) &&
+ amdgpu_sriov_is_pp_one_vf(adev))
+ adev->cg_flags = AMD_CG_SUPPORT_GFX_CGCG;
+
+ /* EOP Event */
+ r = amdgpu_irq_add_id(adev, SOC21_IH_CLIENTID_GRBM_CP,
+ GFX_11_0_0__SRCID__CP_EOP_INTERRUPT,
+ &adev->gfx.eop_irq);
+ if (r)
+ return r;
+
+ /* Bad opcode Event */
+ r = amdgpu_irq_add_id(adev, SOC21_IH_CLIENTID_GRBM_CP,
+ GFX_11_0_0__SRCID__CP_BAD_OPCODE_ERROR,
+ &adev->gfx.bad_op_irq);
+ if (r)
+ return r;
+
+ /* Privileged reg */
+ r = amdgpu_irq_add_id(adev, SOC21_IH_CLIENTID_GRBM_CP,
+ GFX_11_0_0__SRCID__CP_PRIV_REG_FAULT,
+ &adev->gfx.priv_reg_irq);
+ if (r)
+ return r;
+
+ /* Privileged inst */
+ r = amdgpu_irq_add_id(adev, SOC21_IH_CLIENTID_GRBM_CP,
+ GFX_11_0_0__SRCID__CP_PRIV_INSTR_FAULT,
+ &adev->gfx.priv_inst_irq);
+ if (r)
+ return r;
+
+ /* FED error */
+ r = amdgpu_irq_add_id(adev, SOC21_IH_CLIENTID_GFX,
+ GFX_11_0_0__SRCID__RLC_GC_FED_INTERRUPT,
+ &adev->gfx.rlc_gc_fed_irq);
+ if (r)
+ return r;
+
+ adev->gfx.gfx_current_status = AMDGPU_GFX_NORMAL_MODE;
+
+ gfx_v11_0_me_init(adev);
+
+ r = gfx_v11_0_rlc_init(adev);
+ if (r) {
+ DRM_ERROR("Failed to init rlc BOs!\n");
+ return r;
+ }
+
+ r = gfx_v11_0_mec_init(adev);
+ if (r) {
+ DRM_ERROR("Failed to init MEC BOs!\n");
+ return r;
+ }
+
+ if (adev->gfx.num_gfx_rings) {
+ ring_id = 0;
+ /* set up the gfx ring */
+ for (i = 0; i < adev->gfx.me.num_me; i++) {
+ for (j = 0; j < num_queue_per_pipe; j++) {
+ for (k = 0; k < adev->gfx.me.num_pipe_per_me; k++) {
+ if (!amdgpu_gfx_is_me_queue_enabled(adev, i, k, j))
+ continue;
+
+ r = gfx_v11_0_gfx_ring_init(adev, ring_id,
+ i, k, j);
+ if (r)
+ return r;
+ ring_id++;
+ }
+ }
+ }
+ }
+
+ if (adev->gfx.num_compute_rings) {
+ ring_id = 0;
+ /* set up the compute queues - allocate horizontally across pipes */
+ for (i = 0; i < adev->gfx.mec.num_mec; ++i) {
+ for (j = 0; j < adev->gfx.mec.num_queue_per_pipe; j++) {
+ for (k = 0; k < adev->gfx.mec.num_pipe_per_mec; k++) {
+ if (!amdgpu_gfx_is_mec_queue_enabled(adev, 0, i,
+ k, j))
+ continue;
+
+ r = gfx_v11_0_compute_ring_init(adev, ring_id,
+ i, k, j);
+ if (r)
+ return r;
+
+ ring_id++;
+ }
+ }
+ }
+ }
+
+ adev->gfx.gfx_supported_reset =
+ amdgpu_get_soft_full_reset_mask(&adev->gfx.gfx_ring[0]);
+ adev->gfx.compute_supported_reset =
+ amdgpu_get_soft_full_reset_mask(&adev->gfx.compute_ring[0]);
+ switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
+ case IP_VERSION(11, 0, 0):
+ case IP_VERSION(11, 0, 2):
+ case IP_VERSION(11, 0, 3):
+ if ((adev->gfx.me_fw_version >= 2280) &&
+ (adev->gfx.mec_fw_version >= 2410) &&
+ !amdgpu_sriov_vf(adev) &&
+ !adev->debug_disable_gpu_ring_reset) {
+ adev->gfx.compute_supported_reset |= AMDGPU_RESET_TYPE_PER_QUEUE;
+ adev->gfx.gfx_supported_reset |= AMDGPU_RESET_TYPE_PER_QUEUE;
+ }
+ break;
+ default:
+ if (!amdgpu_sriov_vf(adev) &&
+ !adev->debug_disable_gpu_ring_reset) {
+ adev->gfx.compute_supported_reset |= AMDGPU_RESET_TYPE_PER_QUEUE;
+ adev->gfx.gfx_supported_reset |= AMDGPU_RESET_TYPE_PER_QUEUE;
+ }
+ break;
+ }
+
+ if (!adev->enable_mes_kiq) {
+ r = amdgpu_gfx_kiq_init(adev, GFX11_MEC_HPD_SIZE, 0);
+ if (r) {
+ DRM_ERROR("Failed to init KIQ BOs!\n");
+ return r;
+ }
+
+ r = amdgpu_gfx_kiq_init_ring(adev, xcc_id);
+ if (r)
+ return r;
+ }
+
+ r = amdgpu_gfx_mqd_sw_init(adev, sizeof(struct v11_compute_mqd), 0);
+ if (r)
+ return r;
+
+ /* allocate visible FB for rlc auto-loading fw */
+ if (adev->firmware.load_type == AMDGPU_FW_LOAD_RLC_BACKDOOR_AUTO) {
+ r = gfx_v11_0_rlc_autoload_buffer_init(adev);
+ if (r)
+ return r;
+ }
+
+ r = gfx_v11_0_gpu_early_init(adev);
+ if (r)
+ return r;
+
+ if (amdgpu_gfx_ras_sw_init(adev)) {
+ dev_err(adev->dev, "Failed to initialize gfx ras block!\n");
+ return -EINVAL;
+ }
+
+ gfx_v11_0_alloc_ip_dump(adev);
+
+ r = amdgpu_gfx_sysfs_init(adev);
+ if (r)
+ return r;
+
+ return 0;
+}
+
+static void gfx_v11_0_pfp_fini(struct amdgpu_device *adev)
+{
+ amdgpu_bo_free_kernel(&adev->gfx.pfp.pfp_fw_obj,
+ &adev->gfx.pfp.pfp_fw_gpu_addr,
+ (void **)&adev->gfx.pfp.pfp_fw_ptr);
+
+ amdgpu_bo_free_kernel(&adev->gfx.pfp.pfp_fw_data_obj,
+ &adev->gfx.pfp.pfp_fw_data_gpu_addr,
+ (void **)&adev->gfx.pfp.pfp_fw_data_ptr);
+}
+
+static void gfx_v11_0_me_fini(struct amdgpu_device *adev)
+{
+ amdgpu_bo_free_kernel(&adev->gfx.me.me_fw_obj,
+ &adev->gfx.me.me_fw_gpu_addr,
+ (void **)&adev->gfx.me.me_fw_ptr);
+
+ amdgpu_bo_free_kernel(&adev->gfx.me.me_fw_data_obj,
+ &adev->gfx.me.me_fw_data_gpu_addr,
+ (void **)&adev->gfx.me.me_fw_data_ptr);
+}
+
+static void gfx_v11_0_rlc_autoload_buffer_fini(struct amdgpu_device *adev)
+{
+ amdgpu_bo_free_kernel(&adev->gfx.rlc.rlc_autoload_bo,
+ &adev->gfx.rlc.rlc_autoload_gpu_addr,
+ (void **)&adev->gfx.rlc.rlc_autoload_ptr);
+}
+
+static int gfx_v11_0_sw_fini(struct amdgpu_ip_block *ip_block)
+{
+ int i;
+ struct amdgpu_device *adev = ip_block->adev;
+
+ for (i = 0; i < adev->gfx.num_gfx_rings; i++)
+ amdgpu_ring_fini(&adev->gfx.gfx_ring[i]);
+ for (i = 0; i < adev->gfx.num_compute_rings; i++)
+ amdgpu_ring_fini(&adev->gfx.compute_ring[i]);
+
+ amdgpu_gfx_mqd_sw_fini(adev, 0);
+
+ if (!adev->enable_mes_kiq) {
+ amdgpu_gfx_kiq_free_ring(&adev->gfx.kiq[0].ring);
+ amdgpu_gfx_kiq_fini(adev, 0);
+ }
+
+ amdgpu_gfx_cleaner_shader_sw_fini(adev);
+
+ gfx_v11_0_pfp_fini(adev);
+ gfx_v11_0_me_fini(adev);
+ gfx_v11_0_rlc_fini(adev);
+ gfx_v11_0_mec_fini(adev);
+
+ if (adev->firmware.load_type == AMDGPU_FW_LOAD_RLC_BACKDOOR_AUTO)
+ gfx_v11_0_rlc_autoload_buffer_fini(adev);
+
+ gfx_v11_0_free_microcode(adev);
+
+ amdgpu_gfx_sysfs_fini(adev);
+
+ kfree(adev->gfx.ip_dump_core);
+ kfree(adev->gfx.ip_dump_compute_queues);
+ kfree(adev->gfx.ip_dump_gfx_queues);
+
+ return 0;
+}
+
+static void gfx_v11_0_select_se_sh(struct amdgpu_device *adev, u32 se_num,
+ u32 sh_num, u32 instance, int xcc_id)
+{
+ u32 data;
+
+ if (instance == 0xffffffff)
+ data = REG_SET_FIELD(0, GRBM_GFX_INDEX,
+ INSTANCE_BROADCAST_WRITES, 1);
+ else
+ data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_INDEX,
+ instance);
+
+ if (se_num == 0xffffffff)
+ data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_BROADCAST_WRITES,
+ 1);
+ else
+ data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_INDEX, se_num);
+
+ if (sh_num == 0xffffffff)
+ data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SA_BROADCAST_WRITES,
+ 1);
+ else
+ data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SA_INDEX, sh_num);
+
+ WREG32_SOC15(GC, 0, regGRBM_GFX_INDEX, data);
+}
+
+static u32 gfx_v11_0_get_sa_active_bitmap(struct amdgpu_device *adev)
+{
+ u32 gc_disabled_sa_mask, gc_user_disabled_sa_mask, sa_mask;
+
+ gc_disabled_sa_mask = RREG32_SOC15(GC, 0, regCC_GC_SA_UNIT_DISABLE);
+ gc_disabled_sa_mask = REG_GET_FIELD(gc_disabled_sa_mask,
+ CC_GC_SA_UNIT_DISABLE,
+ SA_DISABLE);
+ gc_user_disabled_sa_mask = RREG32_SOC15(GC, 0, regGC_USER_SA_UNIT_DISABLE);
+ gc_user_disabled_sa_mask = REG_GET_FIELD(gc_user_disabled_sa_mask,
+ GC_USER_SA_UNIT_DISABLE,
+ SA_DISABLE);
+ sa_mask = amdgpu_gfx_create_bitmask(adev->gfx.config.max_sh_per_se *
+ adev->gfx.config.max_shader_engines);
+
+ return sa_mask & (~(gc_disabled_sa_mask | gc_user_disabled_sa_mask));
+}
+
+static u32 gfx_v11_0_get_rb_active_bitmap(struct amdgpu_device *adev)
+{
+ u32 gc_disabled_rb_mask, gc_user_disabled_rb_mask;
+ u32 rb_mask;
+
+ gc_disabled_rb_mask = RREG32_SOC15(GC, 0, regCC_RB_BACKEND_DISABLE);
+ gc_disabled_rb_mask = REG_GET_FIELD(gc_disabled_rb_mask,
+ CC_RB_BACKEND_DISABLE,
+ BACKEND_DISABLE);
+ gc_user_disabled_rb_mask = RREG32_SOC15(GC, 0, regGC_USER_RB_BACKEND_DISABLE);
+ gc_user_disabled_rb_mask = REG_GET_FIELD(gc_user_disabled_rb_mask,
+ GC_USER_RB_BACKEND_DISABLE,
+ BACKEND_DISABLE);
+ rb_mask = amdgpu_gfx_create_bitmask(adev->gfx.config.max_backends_per_se *
+ adev->gfx.config.max_shader_engines);
+
+ return rb_mask & (~(gc_disabled_rb_mask | gc_user_disabled_rb_mask));
+}
+
+static void gfx_v11_0_setup_rb(struct amdgpu_device *adev)
+{
+ u32 rb_bitmap_per_sa;
+ u32 rb_bitmap_width_per_sa;
+ u32 max_sa;
+ u32 active_sa_bitmap;
+ u32 global_active_rb_bitmap;
+ u32 active_rb_bitmap = 0;
+ u32 i;
+
+ /* query sa bitmap from SA_UNIT_DISABLE registers */
+ active_sa_bitmap = gfx_v11_0_get_sa_active_bitmap(adev);
+ /* query rb bitmap from RB_BACKEND_DISABLE registers */
+ global_active_rb_bitmap = gfx_v11_0_get_rb_active_bitmap(adev);
+
+ /* generate active rb bitmap according to active sa bitmap */
+ max_sa = adev->gfx.config.max_shader_engines *
+ adev->gfx.config.max_sh_per_se;
+ rb_bitmap_width_per_sa = adev->gfx.config.max_backends_per_se /
+ adev->gfx.config.max_sh_per_se;
+ rb_bitmap_per_sa = amdgpu_gfx_create_bitmask(rb_bitmap_width_per_sa);
+
+ for (i = 0; i < max_sa; i++) {
+ if (active_sa_bitmap & (1 << i))
+ active_rb_bitmap |= (rb_bitmap_per_sa << (i * rb_bitmap_width_per_sa));
+ }
+
+ active_rb_bitmap &= global_active_rb_bitmap;
+ adev->gfx.config.backend_enable_mask = active_rb_bitmap;
+ adev->gfx.config.num_rbs = hweight32(active_rb_bitmap);
+}
+
+#define DEFAULT_SH_MEM_BASES (0x6000)
+#define LDS_APP_BASE 0x1
+#define SCRATCH_APP_BASE 0x2
+
+static void gfx_v11_0_init_compute_vmid(struct amdgpu_device *adev)
+{
+ int i;
+ uint32_t sh_mem_bases;
+ uint32_t data;
+
+ /*
+ * Configure apertures:
+ * LDS: 0x60000000'00000000 - 0x60000001'00000000 (4GB)
+ * Scratch: 0x60000001'00000000 - 0x60000002'00000000 (4GB)
+ * GPUVM: 0x60010000'00000000 - 0x60020000'00000000 (1TB)
+ */
+ sh_mem_bases = (LDS_APP_BASE << SH_MEM_BASES__SHARED_BASE__SHIFT) |
+ SCRATCH_APP_BASE;
+
+ mutex_lock(&adev->srbm_mutex);
+ for (i = adev->vm_manager.first_kfd_vmid; i < AMDGPU_NUM_VMID; i++) {
+ soc21_grbm_select(adev, 0, 0, 0, i);
+ /* CP and shaders */
+ WREG32_SOC15(GC, 0, regSH_MEM_CONFIG, DEFAULT_SH_MEM_CONFIG);
+ WREG32_SOC15(GC, 0, regSH_MEM_BASES, sh_mem_bases);
+
+ /* Enable trap for each kfd vmid. */
+ data = RREG32_SOC15(GC, 0, regSPI_GDBG_PER_VMID_CNTL);
+ data = REG_SET_FIELD(data, SPI_GDBG_PER_VMID_CNTL, TRAP_EN, 1);
+ WREG32_SOC15(GC, 0, regSPI_GDBG_PER_VMID_CNTL, data);
+ }
+ soc21_grbm_select(adev, 0, 0, 0, 0);
+ mutex_unlock(&adev->srbm_mutex);
+
+ /*
+ * Initialize all compute VMIDs to have no GDS, GWS, or OA
+ * access. These should be enabled by FW for target VMIDs.
+ */
+ for (i = adev->vm_manager.first_kfd_vmid; i < AMDGPU_NUM_VMID; i++) {
+ WREG32_SOC15_OFFSET(GC, 0, regGDS_VMID0_BASE, 2 * i, 0);
+ WREG32_SOC15_OFFSET(GC, 0, regGDS_VMID0_SIZE, 2 * i, 0);
+ WREG32_SOC15_OFFSET(GC, 0, regGDS_GWS_VMID0, i, 0);
+ WREG32_SOC15_OFFSET(GC, 0, regGDS_OA_VMID0, i, 0);
+ }
+}
+
+static void gfx_v11_0_init_gds_vmid(struct amdgpu_device *adev)
+{
+ int vmid;
+
+ /*
+ * Initialize all compute and user-gfx VMIDs to have no GDS, GWS, or OA
+ * access. Compute VMIDs should be enabled by FW for target VMIDs,
+ * the driver can enable them for graphics. VMID0 should maintain
+ * access so that HWS firmware can save/restore entries.
+ */
+ for (vmid = 1; vmid < 16; vmid++) {
+ WREG32_SOC15_OFFSET(GC, 0, regGDS_VMID0_BASE, 2 * vmid, 0);
+ WREG32_SOC15_OFFSET(GC, 0, regGDS_VMID0_SIZE, 2 * vmid, 0);
+ WREG32_SOC15_OFFSET(GC, 0, regGDS_GWS_VMID0, vmid, 0);
+ WREG32_SOC15_OFFSET(GC, 0, regGDS_OA_VMID0, vmid, 0);
+ }
+}
+
+static void gfx_v11_0_tcp_harvest(struct amdgpu_device *adev)
+{
+ /* TODO: harvest feature to be added later. */
+}
+
+static void gfx_v11_0_get_tcc_info(struct amdgpu_device *adev)
+{
+ /* TCCs are global (not instanced). */
+ uint32_t tcc_disable = RREG32_SOC15(GC, 0, regCGTS_TCC_DISABLE) |
+ RREG32_SOC15(GC, 0, regCGTS_USER_TCC_DISABLE);
+
+ adev->gfx.config.tcc_disabled_mask =
+ REG_GET_FIELD(tcc_disable, CGTS_TCC_DISABLE, TCC_DISABLE) |
+ (REG_GET_FIELD(tcc_disable, CGTS_TCC_DISABLE, HI_TCC_DISABLE) << 16);
+}
+
+static void gfx_v11_0_constants_init(struct amdgpu_device *adev)
+{
+ u32 tmp;
+ int i;
+
+ if (!amdgpu_sriov_vf(adev))
+ WREG32_FIELD15_PREREG(GC, 0, GRBM_CNTL, READ_TIMEOUT, 0xff);
+
+ gfx_v11_0_setup_rb(adev);
+ gfx_v11_0_get_cu_info(adev, &adev->gfx.cu_info);
+ gfx_v11_0_get_tcc_info(adev);
+ adev->gfx.config.pa_sc_tile_steering_override = 0;
+
+ /* Set whether texture coordinate truncation is conformant. */
+ tmp = RREG32_SOC15(GC, 0, regTA_CNTL2);
+ adev->gfx.config.ta_cntl2_truncate_coord_mode =
+ REG_GET_FIELD(tmp, TA_CNTL2, TRUNCATE_COORD_MODE);
+
+ /* XXX SH_MEM regs */
+ /* where to put LDS, scratch, GPUVM in FSA64 space */
+ mutex_lock(&adev->srbm_mutex);
+ for (i = 0; i < adev->vm_manager.id_mgr[AMDGPU_GFXHUB(0)].num_ids; i++) {
+ soc21_grbm_select(adev, 0, 0, 0, i);
+ /* CP and shaders */
+ WREG32_SOC15(GC, 0, regSH_MEM_CONFIG, DEFAULT_SH_MEM_CONFIG);
+ if (i != 0) {
+ tmp = REG_SET_FIELD(0, SH_MEM_BASES, PRIVATE_BASE,
+ (adev->gmc.private_aperture_start >> 48));
+ tmp = REG_SET_FIELD(tmp, SH_MEM_BASES, SHARED_BASE,
+ (adev->gmc.shared_aperture_start >> 48));
+ WREG32_SOC15(GC, 0, regSH_MEM_BASES, tmp);
+ }
+ }
+ soc21_grbm_select(adev, 0, 0, 0, 0);
+
+ mutex_unlock(&adev->srbm_mutex);
+
+ gfx_v11_0_init_compute_vmid(adev);
+ gfx_v11_0_init_gds_vmid(adev);
+}
+
+static u32 gfx_v11_0_get_cpg_int_cntl(struct amdgpu_device *adev,
+ int me, int pipe)
+{
+ if (me != 0)
+ return 0;
+
+ switch (pipe) {
+ case 0:
+ return SOC15_REG_OFFSET(GC, 0, regCP_INT_CNTL_RING0);
+ case 1:
+ return SOC15_REG_OFFSET(GC, 0, regCP_INT_CNTL_RING1);
+ default:
+ return 0;
+ }
+}
+
+static u32 gfx_v11_0_get_cpc_int_cntl(struct amdgpu_device *adev,
+ int me, int pipe)
+{
+ /*
+ * amdgpu controls only the first MEC. That's why this function only
+ * handles the setting of interrupts for this specific MEC. All other
+ * pipes' interrupts are set by amdkfd.
+ */
+ if (me != 1)
+ return 0;
+
+ switch (pipe) {
+ case 0:
+ return SOC15_REG_OFFSET(GC, 0, regCP_ME1_PIPE0_INT_CNTL);
+ case 1:
+ return SOC15_REG_OFFSET(GC, 0, regCP_ME1_PIPE1_INT_CNTL);
+ case 2:
+ return SOC15_REG_OFFSET(GC, 0, regCP_ME1_PIPE2_INT_CNTL);
+ case 3:
+ return SOC15_REG_OFFSET(GC, 0, regCP_ME1_PIPE3_INT_CNTL);
+ default:
+ return 0;
+ }
+}
+
+static void gfx_v11_0_enable_gui_idle_interrupt(struct amdgpu_device *adev,
+ bool enable)
+{
+ u32 tmp, cp_int_cntl_reg;
+ int i, j;
+
+ if (amdgpu_sriov_vf(adev))
+ return;
+
+ for (i = 0; i < adev->gfx.me.num_me; i++) {
+ for (j = 0; j < adev->gfx.me.num_pipe_per_me; j++) {
+ cp_int_cntl_reg = gfx_v11_0_get_cpg_int_cntl(adev, i, j);
+
+ if (cp_int_cntl_reg) {
+ tmp = RREG32_SOC15_IP(GC, cp_int_cntl_reg);
+ tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_BUSY_INT_ENABLE,
+ enable ? 1 : 0);
+ tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_EMPTY_INT_ENABLE,
+ enable ? 1 : 0);
+ tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CMP_BUSY_INT_ENABLE,
+ enable ? 1 : 0);
+ tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, GFX_IDLE_INT_ENABLE,
+ enable ? 1 : 0);
+ WREG32_SOC15_IP(GC, cp_int_cntl_reg, tmp);
+ }
+ }
+ }
+}
+
+static int gfx_v11_0_init_csb(struct amdgpu_device *adev)
+{
+ adev->gfx.rlc.funcs->get_csb_buffer(adev, adev->gfx.rlc.cs_ptr);
+
+ WREG32_SOC15(GC, 0, regRLC_CSIB_ADDR_HI,
+ adev->gfx.rlc.clear_state_gpu_addr >> 32);
+ WREG32_SOC15(GC, 0, regRLC_CSIB_ADDR_LO,
+ adev->gfx.rlc.clear_state_gpu_addr & 0xfffffffc);
+ WREG32_SOC15(GC, 0, regRLC_CSIB_LENGTH, adev->gfx.rlc.clear_state_size);
+
+ return 0;
+}
+
+static void gfx_v11_0_rlc_stop(struct amdgpu_device *adev)
+{
+ u32 tmp = RREG32_SOC15(GC, 0, regRLC_CNTL);
+
+ tmp = REG_SET_FIELD(tmp, RLC_CNTL, RLC_ENABLE_F32, 0);
+ WREG32_SOC15(GC, 0, regRLC_CNTL, tmp);
+}
+
+static void gfx_v11_0_rlc_reset(struct amdgpu_device *adev)
+{
+ WREG32_FIELD15_PREREG(GC, 0, GRBM_SOFT_RESET, SOFT_RESET_RLC, 1);
+ udelay(50);
+ WREG32_FIELD15_PREREG(GC, 0, GRBM_SOFT_RESET, SOFT_RESET_RLC, 0);
+ udelay(50);
+}
+
+static void gfx_v11_0_rlc_smu_handshake_cntl(struct amdgpu_device *adev,
+ bool enable)
+{
+ uint32_t rlc_pg_cntl;
+
+ rlc_pg_cntl = RREG32_SOC15(GC, 0, regRLC_PG_CNTL);
+
+ if (!enable) {
+ /* RLC_PG_CNTL[23] = 0 (default)
+ * RLC will wait for handshake acks with SMU
+ * GFXOFF will be enabled
+ * RLC_PG_CNTL[23] = 1
+ * RLC will not issue any message to SMU
+ * hence no handshake between SMU & RLC
+ * GFXOFF will be disabled
+ */
+ rlc_pg_cntl |= RLC_PG_CNTL__SMU_HANDSHAKE_DISABLE_MASK;
+ } else
+ rlc_pg_cntl &= ~RLC_PG_CNTL__SMU_HANDSHAKE_DISABLE_MASK;
+ WREG32_SOC15(GC, 0, regRLC_PG_CNTL, rlc_pg_cntl);
+}
+
+static void gfx_v11_0_rlc_start(struct amdgpu_device *adev)
+{
+ /* TODO: enable rlc & smu handshake until smu
+ * and gfxoff feature works as expected */
+ if (!(amdgpu_pp_feature_mask & PP_GFXOFF_MASK))
+ gfx_v11_0_rlc_smu_handshake_cntl(adev, false);
+
+ WREG32_FIELD15_PREREG(GC, 0, RLC_CNTL, RLC_ENABLE_F32, 1);
+ udelay(50);
+}
+
+static void gfx_v11_0_rlc_enable_srm(struct amdgpu_device *adev)
+{
+ uint32_t tmp;
+
+ /* enable Save Restore Machine */
+ tmp = RREG32(SOC15_REG_OFFSET(GC, 0, regRLC_SRM_CNTL));
+ tmp |= RLC_SRM_CNTL__AUTO_INCR_ADDR_MASK;
+ tmp |= RLC_SRM_CNTL__SRM_ENABLE_MASK;
+ WREG32(SOC15_REG_OFFSET(GC, 0, regRLC_SRM_CNTL), tmp);
+}
+
+static void gfx_v11_0_load_rlcg_microcode(struct amdgpu_device *adev)
+{
+ const struct rlc_firmware_header_v2_0 *hdr;
+ const __le32 *fw_data;
+ unsigned i, fw_size;
+
+ hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data;
+ fw_data = (const __le32 *)(adev->gfx.rlc_fw->data +
+ le32_to_cpu(hdr->header.ucode_array_offset_bytes));
+ fw_size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4;
+
+ WREG32_SOC15(GC, 0, regRLC_GPM_UCODE_ADDR,
+ RLCG_UCODE_LOADING_START_ADDRESS);
+
+ for (i = 0; i < fw_size; i++)
+ WREG32_SOC15(GC, 0, regRLC_GPM_UCODE_DATA,
+ le32_to_cpup(fw_data++));
+
+ WREG32_SOC15(GC, 0, regRLC_GPM_UCODE_ADDR, adev->gfx.rlc_fw_version);
+}
+
+static void gfx_v11_0_load_rlc_iram_dram_microcode(struct amdgpu_device *adev)
+{
+ const struct rlc_firmware_header_v2_2 *hdr;
+ const __le32 *fw_data;
+ unsigned i, fw_size;
+ u32 tmp;
+
+ hdr = (const struct rlc_firmware_header_v2_2 *)adev->gfx.rlc_fw->data;
+
+ fw_data = (const __le32 *)(adev->gfx.rlc_fw->data +
+ le32_to_cpu(hdr->rlc_iram_ucode_offset_bytes));
+ fw_size = le32_to_cpu(hdr->rlc_iram_ucode_size_bytes) / 4;
+
+ WREG32_SOC15(GC, 0, regRLC_LX6_IRAM_ADDR, 0);
+
+ for (i = 0; i < fw_size; i++) {
+ if ((amdgpu_emu_mode == 1) && (i % 100 == 99))
+ msleep(1);
+ WREG32_SOC15(GC, 0, regRLC_LX6_IRAM_DATA,
+ le32_to_cpup(fw_data++));
+ }
+
+ WREG32_SOC15(GC, 0, regRLC_LX6_IRAM_ADDR, adev->gfx.rlc_fw_version);
+
+ fw_data = (const __le32 *)(adev->gfx.rlc_fw->data +
+ le32_to_cpu(hdr->rlc_dram_ucode_offset_bytes));
+ fw_size = le32_to_cpu(hdr->rlc_dram_ucode_size_bytes) / 4;
+
+ WREG32_SOC15(GC, 0, regRLC_LX6_DRAM_ADDR, 0);
+ for (i = 0; i < fw_size; i++) {
+ if ((amdgpu_emu_mode == 1) && (i % 100 == 99))
+ msleep(1);
+ WREG32_SOC15(GC, 0, regRLC_LX6_DRAM_DATA,
+ le32_to_cpup(fw_data++));
+ }
+
+ WREG32_SOC15(GC, 0, regRLC_LX6_IRAM_ADDR, adev->gfx.rlc_fw_version);
+
+ tmp = RREG32_SOC15(GC, 0, regRLC_LX6_CNTL);
+ tmp = REG_SET_FIELD(tmp, RLC_LX6_CNTL, PDEBUG_ENABLE, 1);
+ tmp = REG_SET_FIELD(tmp, RLC_LX6_CNTL, BRESET, 0);
+ WREG32_SOC15(GC, 0, regRLC_LX6_CNTL, tmp);
+}
+
+static void gfx_v11_0_load_rlcp_rlcv_microcode(struct amdgpu_device *adev)
+{
+ const struct rlc_firmware_header_v2_3 *hdr;
+ const __le32 *fw_data;
+ unsigned i, fw_size;
+ u32 tmp;
+
+ hdr = (const struct rlc_firmware_header_v2_3 *)adev->gfx.rlc_fw->data;
+
+ fw_data = (const __le32 *)(adev->gfx.rlc_fw->data +
+ le32_to_cpu(hdr->rlcp_ucode_offset_bytes));
+ fw_size = le32_to_cpu(hdr->rlcp_ucode_size_bytes) / 4;
+
+ WREG32_SOC15(GC, 0, regRLC_PACE_UCODE_ADDR, 0);
+
+ for (i = 0; i < fw_size; i++) {
+ if ((amdgpu_emu_mode == 1) && (i % 100 == 99))
+ msleep(1);
+ WREG32_SOC15(GC, 0, regRLC_PACE_UCODE_DATA,
+ le32_to_cpup(fw_data++));
+ }
+
+ WREG32_SOC15(GC, 0, regRLC_PACE_UCODE_ADDR, adev->gfx.rlc_fw_version);
+
+ tmp = RREG32_SOC15(GC, 0, regRLC_GPM_THREAD_ENABLE);
+ tmp = REG_SET_FIELD(tmp, RLC_GPM_THREAD_ENABLE, THREAD1_ENABLE, 1);
+ WREG32_SOC15(GC, 0, regRLC_GPM_THREAD_ENABLE, tmp);
+
+ fw_data = (const __le32 *)(adev->gfx.rlc_fw->data +
+ le32_to_cpu(hdr->rlcv_ucode_offset_bytes));
+ fw_size = le32_to_cpu(hdr->rlcv_ucode_size_bytes) / 4;
+
+ WREG32_SOC15(GC, 0, regRLC_GPU_IOV_UCODE_ADDR, 0);
+
+ for (i = 0; i < fw_size; i++) {
+ if ((amdgpu_emu_mode == 1) && (i % 100 == 99))
+ msleep(1);
+ WREG32_SOC15(GC, 0, regRLC_GPU_IOV_UCODE_DATA,
+ le32_to_cpup(fw_data++));
+ }
+
+ WREG32_SOC15(GC, 0, regRLC_GPU_IOV_UCODE_ADDR, adev->gfx.rlc_fw_version);
+
+ tmp = RREG32_SOC15(GC, 0, regRLC_GPU_IOV_F32_CNTL);
+ tmp = REG_SET_FIELD(tmp, RLC_GPU_IOV_F32_CNTL, ENABLE, 1);
+ WREG32_SOC15(GC, 0, regRLC_GPU_IOV_F32_CNTL, tmp);
+}
+
+static int gfx_v11_0_rlc_load_microcode(struct amdgpu_device *adev)
+{
+ const struct rlc_firmware_header_v2_0 *hdr;
+ uint16_t version_major;
+ uint16_t version_minor;
+
+ if (!adev->gfx.rlc_fw)
+ return -EINVAL;
+
+ hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data;
+ amdgpu_ucode_print_rlc_hdr(&hdr->header);
+
+ version_major = le16_to_cpu(hdr->header.header_version_major);
+ version_minor = le16_to_cpu(hdr->header.header_version_minor);
+
+ if (version_major == 2) {
+ gfx_v11_0_load_rlcg_microcode(adev);
+ if (amdgpu_dpm == 1) {
+ if (version_minor >= 2)
+ gfx_v11_0_load_rlc_iram_dram_microcode(adev);
+ if (version_minor == 3)
+ gfx_v11_0_load_rlcp_rlcv_microcode(adev);
+ }
+
+ return 0;
+ }
+
+ return -EINVAL;
+}
+
+static int gfx_v11_0_rlc_resume(struct amdgpu_device *adev)
+{
+ int r;
+
+ if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
+ gfx_v11_0_init_csb(adev);
+
+ if (!amdgpu_sriov_vf(adev)) /* enable RLC SRM */
+ gfx_v11_0_rlc_enable_srm(adev);
+ } else {
+ if (amdgpu_sriov_vf(adev)) {
+ gfx_v11_0_init_csb(adev);
+ return 0;
+ }
+
+ adev->gfx.rlc.funcs->stop(adev);
+
+ /* disable CG */
+ WREG32_SOC15(GC, 0, regRLC_CGCG_CGLS_CTRL, 0);
+
+ /* disable PG */
+ WREG32_SOC15(GC, 0, regRLC_PG_CNTL, 0);
+
+ if (adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT) {
+ /* legacy rlc firmware loading */
+ r = gfx_v11_0_rlc_load_microcode(adev);
+ if (r)
+ return r;
+ }
+
+ gfx_v11_0_init_csb(adev);
+
+ adev->gfx.rlc.funcs->start(adev);
+ }
+ return 0;
+}
+
+static int gfx_v11_0_config_me_cache(struct amdgpu_device *adev, uint64_t addr)
+{
+ uint32_t usec_timeout = 50000; /* wait for 50ms */
+ uint32_t tmp;
+ int i;
+
+ /* Trigger an invalidation of the L1 instruction caches */
+ tmp = RREG32_SOC15(GC, 0, regCP_ME_IC_OP_CNTL);
+ tmp = REG_SET_FIELD(tmp, CP_ME_IC_OP_CNTL, INVALIDATE_CACHE, 1);
+ WREG32_SOC15(GC, 0, regCP_ME_IC_OP_CNTL, tmp);
+
+ /* Wait for invalidation complete */
+ for (i = 0; i < usec_timeout; i++) {
+ tmp = RREG32_SOC15(GC, 0, regCP_ME_IC_OP_CNTL);
+ if (1 == REG_GET_FIELD(tmp, CP_ME_IC_OP_CNTL,
+ INVALIDATE_CACHE_COMPLETE))
+ break;
+ udelay(1);
+ }
+
+ if (i >= usec_timeout) {
+ dev_err(adev->dev, "failed to invalidate instruction cache\n");
+ return -EINVAL;
+ }
+
+ if (amdgpu_emu_mode == 1)
+ amdgpu_device_flush_hdp(adev, NULL);
+
+ tmp = RREG32_SOC15(GC, 0, regCP_ME_IC_BASE_CNTL);
+ tmp = REG_SET_FIELD(tmp, CP_ME_IC_BASE_CNTL, VMID, 0);
+ tmp = REG_SET_FIELD(tmp, CP_ME_IC_BASE_CNTL, CACHE_POLICY, 0);
+ tmp = REG_SET_FIELD(tmp, CP_ME_IC_BASE_CNTL, EXE_DISABLE, 0);
+ tmp = REG_SET_FIELD(tmp, CP_ME_IC_BASE_CNTL, ADDRESS_CLAMP, 1);
+ WREG32_SOC15(GC, 0, regCP_ME_IC_BASE_CNTL, tmp);
+
+ /* Program me ucode address into intruction cache address register */
+ WREG32_SOC15(GC, 0, regCP_ME_IC_BASE_LO,
+ lower_32_bits(addr) & 0xFFFFF000);
+ WREG32_SOC15(GC, 0, regCP_ME_IC_BASE_HI,
+ upper_32_bits(addr));
+
+ return 0;
+}
+
+static int gfx_v11_0_config_pfp_cache(struct amdgpu_device *adev, uint64_t addr)
+{
+ uint32_t usec_timeout = 50000; /* wait for 50ms */
+ uint32_t tmp;
+ int i;
+
+ /* Trigger an invalidation of the L1 instruction caches */
+ tmp = RREG32_SOC15(GC, 0, regCP_PFP_IC_OP_CNTL);
+ tmp = REG_SET_FIELD(tmp, CP_PFP_IC_OP_CNTL, INVALIDATE_CACHE, 1);
+ WREG32_SOC15(GC, 0, regCP_PFP_IC_OP_CNTL, tmp);
+
+ /* Wait for invalidation complete */
+ for (i = 0; i < usec_timeout; i++) {
+ tmp = RREG32_SOC15(GC, 0, regCP_PFP_IC_OP_CNTL);
+ if (1 == REG_GET_FIELD(tmp, CP_PFP_IC_OP_CNTL,
+ INVALIDATE_CACHE_COMPLETE))
+ break;
+ udelay(1);
+ }
+
+ if (i >= usec_timeout) {
+ dev_err(adev->dev, "failed to invalidate instruction cache\n");
+ return -EINVAL;
+ }
+
+ if (amdgpu_emu_mode == 1)
+ amdgpu_device_flush_hdp(adev, NULL);
+
+ tmp = RREG32_SOC15(GC, 0, regCP_PFP_IC_BASE_CNTL);
+ tmp = REG_SET_FIELD(tmp, CP_PFP_IC_BASE_CNTL, VMID, 0);
+ tmp = REG_SET_FIELD(tmp, CP_PFP_IC_BASE_CNTL, CACHE_POLICY, 0);
+ tmp = REG_SET_FIELD(tmp, CP_PFP_IC_BASE_CNTL, EXE_DISABLE, 0);
+ tmp = REG_SET_FIELD(tmp, CP_PFP_IC_BASE_CNTL, ADDRESS_CLAMP, 1);
+ WREG32_SOC15(GC, 0, regCP_PFP_IC_BASE_CNTL, tmp);
+
+ /* Program pfp ucode address into intruction cache address register */
+ WREG32_SOC15(GC, 0, regCP_PFP_IC_BASE_LO,
+ lower_32_bits(addr) & 0xFFFFF000);
+ WREG32_SOC15(GC, 0, regCP_PFP_IC_BASE_HI,
+ upper_32_bits(addr));
+
+ return 0;
+}
+
+static int gfx_v11_0_config_mec_cache(struct amdgpu_device *adev, uint64_t addr)
+{
+ uint32_t usec_timeout = 50000; /* wait for 50ms */
+ uint32_t tmp;
+ int i;
+
+ /* Trigger an invalidation of the L1 instruction caches */
+ tmp = RREG32_SOC15(GC, 0, regCP_CPC_IC_OP_CNTL);
+ tmp = REG_SET_FIELD(tmp, CP_CPC_IC_OP_CNTL, INVALIDATE_CACHE, 1);
+
+ WREG32_SOC15(GC, 0, regCP_CPC_IC_OP_CNTL, tmp);
+
+ /* Wait for invalidation complete */
+ for (i = 0; i < usec_timeout; i++) {
+ tmp = RREG32_SOC15(GC, 0, regCP_CPC_IC_OP_CNTL);
+ if (1 == REG_GET_FIELD(tmp, CP_CPC_IC_OP_CNTL,
+ INVALIDATE_CACHE_COMPLETE))
+ break;
+ udelay(1);
+ }
+
+ if (i >= usec_timeout) {
+ dev_err(adev->dev, "failed to invalidate instruction cache\n");
+ return -EINVAL;
+ }
+
+ if (amdgpu_emu_mode == 1)
+ amdgpu_device_flush_hdp(adev, NULL);
+
+ tmp = RREG32_SOC15(GC, 0, regCP_CPC_IC_BASE_CNTL);
+ tmp = REG_SET_FIELD(tmp, CP_CPC_IC_BASE_CNTL, CACHE_POLICY, 0);
+ tmp = REG_SET_FIELD(tmp, CP_CPC_IC_BASE_CNTL, EXE_DISABLE, 0);
+ tmp = REG_SET_FIELD(tmp, CP_CPC_IC_BASE_CNTL, ADDRESS_CLAMP, 1);
+ WREG32_SOC15(GC, 0, regCP_CPC_IC_BASE_CNTL, tmp);
+
+ /* Program mec1 ucode address into intruction cache address register */
+ WREG32_SOC15(GC, 0, regCP_CPC_IC_BASE_LO,
+ lower_32_bits(addr) & 0xFFFFF000);
+ WREG32_SOC15(GC, 0, regCP_CPC_IC_BASE_HI,
+ upper_32_bits(addr));
+
+ return 0;
+}
+
+static int gfx_v11_0_config_pfp_cache_rs64(struct amdgpu_device *adev, uint64_t addr, uint64_t addr2)
+{
+ uint32_t usec_timeout = 50000; /* wait for 50ms */
+ uint32_t tmp;
+ unsigned i, pipe_id;
+ const struct gfx_firmware_header_v2_0 *pfp_hdr;
+
+ pfp_hdr = (const struct gfx_firmware_header_v2_0 *)
+ adev->gfx.pfp_fw->data;
+
+ WREG32_SOC15(GC, 0, regCP_PFP_IC_BASE_LO,
+ lower_32_bits(addr));
+ WREG32_SOC15(GC, 0, regCP_PFP_IC_BASE_HI,
+ upper_32_bits(addr));
+
+ tmp = RREG32_SOC15(GC, 0, regCP_PFP_IC_BASE_CNTL);
+ tmp = REG_SET_FIELD(tmp, CP_PFP_IC_BASE_CNTL, VMID, 0);
+ tmp = REG_SET_FIELD(tmp, CP_PFP_IC_BASE_CNTL, CACHE_POLICY, 0);
+ tmp = REG_SET_FIELD(tmp, CP_PFP_IC_BASE_CNTL, EXE_DISABLE, 0);
+ WREG32_SOC15(GC, 0, regCP_PFP_IC_BASE_CNTL, tmp);
+
+ /*
+ * Programming any of the CP_PFP_IC_BASE registers
+ * forces invalidation of the ME L1 I$. Wait for the
+ * invalidation complete
+ */
+ for (i = 0; i < usec_timeout; i++) {
+ tmp = RREG32_SOC15(GC, 0, regCP_PFP_IC_OP_CNTL);
+ if (1 == REG_GET_FIELD(tmp, CP_PFP_IC_OP_CNTL,
+ INVALIDATE_CACHE_COMPLETE))
+ break;
+ udelay(1);
+ }
+
+ if (i >= usec_timeout) {
+ dev_err(adev->dev, "failed to invalidate instruction cache\n");
+ return -EINVAL;
+ }
+
+ /* Prime the L1 instruction caches */
+ tmp = RREG32_SOC15(GC, 0, regCP_PFP_IC_OP_CNTL);
+ tmp = REG_SET_FIELD(tmp, CP_PFP_IC_OP_CNTL, PRIME_ICACHE, 1);
+ WREG32_SOC15(GC, 0, regCP_PFP_IC_OP_CNTL, tmp);
+ /* Waiting for cache primed*/
+ for (i = 0; i < usec_timeout; i++) {
+ tmp = RREG32_SOC15(GC, 0, regCP_PFP_IC_OP_CNTL);
+ if (1 == REG_GET_FIELD(tmp, CP_PFP_IC_OP_CNTL,
+ ICACHE_PRIMED))
+ break;
+ udelay(1);
+ }
+
+ if (i >= usec_timeout) {
+ dev_err(adev->dev, "failed to prime instruction cache\n");
+ return -EINVAL;
+ }
+
+ mutex_lock(&adev->srbm_mutex);
+ for (pipe_id = 0; pipe_id < adev->gfx.me.num_pipe_per_me; pipe_id++) {
+ soc21_grbm_select(adev, 0, pipe_id, 0, 0);
+ WREG32_SOC15(GC, 0, regCP_PFP_PRGRM_CNTR_START,
+ (pfp_hdr->ucode_start_addr_hi << 30) |
+ (pfp_hdr->ucode_start_addr_lo >> 2));
+ WREG32_SOC15(GC, 0, regCP_PFP_PRGRM_CNTR_START_HI,
+ pfp_hdr->ucode_start_addr_hi >> 2);
+
+ /*
+ * Program CP_ME_CNTL to reset given PIPE to take
+ * effect of CP_PFP_PRGRM_CNTR_START.
+ */
+ tmp = RREG32_SOC15(GC, 0, regCP_ME_CNTL);
+ if (pipe_id == 0)
+ tmp = REG_SET_FIELD(tmp, CP_ME_CNTL,
+ PFP_PIPE0_RESET, 1);
+ else
+ tmp = REG_SET_FIELD(tmp, CP_ME_CNTL,
+ PFP_PIPE1_RESET, 1);
+ WREG32_SOC15(GC, 0, regCP_ME_CNTL, tmp);
+
+ /* Clear pfp pipe0 reset bit. */
+ if (pipe_id == 0)
+ tmp = REG_SET_FIELD(tmp, CP_ME_CNTL,
+ PFP_PIPE0_RESET, 0);
+ else
+ tmp = REG_SET_FIELD(tmp, CP_ME_CNTL,
+ PFP_PIPE1_RESET, 0);
+ WREG32_SOC15(GC, 0, regCP_ME_CNTL, tmp);
+
+ WREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_BASE0_LO,
+ lower_32_bits(addr2));
+ WREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_BASE0_HI,
+ upper_32_bits(addr2));
+ }
+ soc21_grbm_select(adev, 0, 0, 0, 0);
+ mutex_unlock(&adev->srbm_mutex);
+
+ tmp = RREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_BASE_CNTL);
+ tmp = REG_SET_FIELD(tmp, CP_GFX_RS64_DC_BASE_CNTL, VMID, 0);
+ tmp = REG_SET_FIELD(tmp, CP_GFX_RS64_DC_BASE_CNTL, CACHE_POLICY, 0);
+ WREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_BASE_CNTL, tmp);
+
+ /* Invalidate the data caches */
+ tmp = RREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_OP_CNTL);
+ tmp = REG_SET_FIELD(tmp, CP_GFX_RS64_DC_OP_CNTL, INVALIDATE_DCACHE, 1);
+ WREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_OP_CNTL, tmp);
+
+ for (i = 0; i < usec_timeout; i++) {
+ tmp = RREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_OP_CNTL);
+ if (1 == REG_GET_FIELD(tmp, CP_GFX_RS64_DC_OP_CNTL,
+ INVALIDATE_DCACHE_COMPLETE))
+ break;
+ udelay(1);
+ }
+
+ if (i >= usec_timeout) {
+ dev_err(adev->dev, "failed to invalidate RS64 data cache\n");
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+static int gfx_v11_0_config_me_cache_rs64(struct amdgpu_device *adev, uint64_t addr, uint64_t addr2)
+{
+ uint32_t usec_timeout = 50000; /* wait for 50ms */
+ uint32_t tmp;
+ unsigned i, pipe_id;
+ const struct gfx_firmware_header_v2_0 *me_hdr;
+
+ me_hdr = (const struct gfx_firmware_header_v2_0 *)
+ adev->gfx.me_fw->data;
+
+ WREG32_SOC15(GC, 0, regCP_ME_IC_BASE_LO,
+ lower_32_bits(addr));
+ WREG32_SOC15(GC, 0, regCP_ME_IC_BASE_HI,
+ upper_32_bits(addr));
+
+ tmp = RREG32_SOC15(GC, 0, regCP_ME_IC_BASE_CNTL);
+ tmp = REG_SET_FIELD(tmp, CP_ME_IC_BASE_CNTL, VMID, 0);
+ tmp = REG_SET_FIELD(tmp, CP_ME_IC_BASE_CNTL, CACHE_POLICY, 0);
+ tmp = REG_SET_FIELD(tmp, CP_ME_IC_BASE_CNTL, EXE_DISABLE, 0);
+ WREG32_SOC15(GC, 0, regCP_ME_IC_BASE_CNTL, tmp);
+
+ /*
+ * Programming any of the CP_ME_IC_BASE registers
+ * forces invalidation of the ME L1 I$. Wait for the
+ * invalidation complete
+ */
+ for (i = 0; i < usec_timeout; i++) {
+ tmp = RREG32_SOC15(GC, 0, regCP_ME_IC_OP_CNTL);
+ if (1 == REG_GET_FIELD(tmp, CP_ME_IC_OP_CNTL,
+ INVALIDATE_CACHE_COMPLETE))
+ break;
+ udelay(1);
+ }
+
+ if (i >= usec_timeout) {
+ dev_err(adev->dev, "failed to invalidate instruction cache\n");
+ return -EINVAL;
+ }
+
+ /* Prime the instruction caches */
+ tmp = RREG32_SOC15(GC, 0, regCP_ME_IC_OP_CNTL);
+ tmp = REG_SET_FIELD(tmp, CP_ME_IC_OP_CNTL, PRIME_ICACHE, 1);
+ WREG32_SOC15(GC, 0, regCP_ME_IC_OP_CNTL, tmp);
+
+ /* Waiting for instruction cache primed*/
+ for (i = 0; i < usec_timeout; i++) {
+ tmp = RREG32_SOC15(GC, 0, regCP_ME_IC_OP_CNTL);
+ if (1 == REG_GET_FIELD(tmp, CP_ME_IC_OP_CNTL,
+ ICACHE_PRIMED))
+ break;
+ udelay(1);
+ }
+
+ if (i >= usec_timeout) {
+ dev_err(adev->dev, "failed to prime instruction cache\n");
+ return -EINVAL;
+ }
+
+ mutex_lock(&adev->srbm_mutex);
+ for (pipe_id = 0; pipe_id < adev->gfx.me.num_pipe_per_me; pipe_id++) {
+ soc21_grbm_select(adev, 0, pipe_id, 0, 0);
+ WREG32_SOC15(GC, 0, regCP_ME_PRGRM_CNTR_START,
+ (me_hdr->ucode_start_addr_hi << 30) |
+ (me_hdr->ucode_start_addr_lo >> 2) );
+ WREG32_SOC15(GC, 0, regCP_ME_PRGRM_CNTR_START_HI,
+ me_hdr->ucode_start_addr_hi>>2);
+
+ /*
+ * Program CP_ME_CNTL to reset given PIPE to take
+ * effect of CP_PFP_PRGRM_CNTR_START.
+ */
+ tmp = RREG32_SOC15(GC, 0, regCP_ME_CNTL);
+ if (pipe_id == 0)
+ tmp = REG_SET_FIELD(tmp, CP_ME_CNTL,
+ ME_PIPE0_RESET, 1);
+ else
+ tmp = REG_SET_FIELD(tmp, CP_ME_CNTL,
+ ME_PIPE1_RESET, 1);
+ WREG32_SOC15(GC, 0, regCP_ME_CNTL, tmp);
+
+ /* Clear pfp pipe0 reset bit. */
+ if (pipe_id == 0)
+ tmp = REG_SET_FIELD(tmp, CP_ME_CNTL,
+ ME_PIPE0_RESET, 0);
+ else
+ tmp = REG_SET_FIELD(tmp, CP_ME_CNTL,
+ ME_PIPE1_RESET, 0);
+ WREG32_SOC15(GC, 0, regCP_ME_CNTL, tmp);
+
+ WREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_BASE1_LO,
+ lower_32_bits(addr2));
+ WREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_BASE1_HI,
+ upper_32_bits(addr2));
+ }
+ soc21_grbm_select(adev, 0, 0, 0, 0);
+ mutex_unlock(&adev->srbm_mutex);
+
+ tmp = RREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_BASE_CNTL);
+ tmp = REG_SET_FIELD(tmp, CP_GFX_RS64_DC_BASE_CNTL, VMID, 0);
+ tmp = REG_SET_FIELD(tmp, CP_GFX_RS64_DC_BASE_CNTL, CACHE_POLICY, 0);
+ WREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_BASE_CNTL, tmp);
+
+ /* Invalidate the data caches */
+ tmp = RREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_OP_CNTL);
+ tmp = REG_SET_FIELD(tmp, CP_GFX_RS64_DC_OP_CNTL, INVALIDATE_DCACHE, 1);
+ WREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_OP_CNTL, tmp);
+
+ for (i = 0; i < usec_timeout; i++) {
+ tmp = RREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_OP_CNTL);
+ if (1 == REG_GET_FIELD(tmp, CP_GFX_RS64_DC_OP_CNTL,
+ INVALIDATE_DCACHE_COMPLETE))
+ break;
+ udelay(1);
+ }
+
+ if (i >= usec_timeout) {
+ dev_err(adev->dev, "failed to invalidate RS64 data cache\n");
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+static int gfx_v11_0_config_mec_cache_rs64(struct amdgpu_device *adev, uint64_t addr, uint64_t addr2)
+{
+ uint32_t usec_timeout = 50000; /* wait for 50ms */
+ uint32_t tmp;
+ unsigned i;
+ const struct gfx_firmware_header_v2_0 *mec_hdr;
+
+ mec_hdr = (const struct gfx_firmware_header_v2_0 *)
+ adev->gfx.mec_fw->data;
+
+ tmp = RREG32_SOC15(GC, 0, regCP_CPC_IC_BASE_CNTL);
+ tmp = REG_SET_FIELD(tmp, CP_CPC_IC_BASE_CNTL, VMID, 0);
+ tmp = REG_SET_FIELD(tmp, CP_CPC_IC_BASE_CNTL, EXE_DISABLE, 0);
+ tmp = REG_SET_FIELD(tmp, CP_CPC_IC_BASE_CNTL, CACHE_POLICY, 0);
+ WREG32_SOC15(GC, 0, regCP_CPC_IC_BASE_CNTL, tmp);
+
+ tmp = RREG32_SOC15(GC, 0, regCP_MEC_DC_BASE_CNTL);
+ tmp = REG_SET_FIELD(tmp, CP_MEC_DC_BASE_CNTL, VMID, 0);
+ tmp = REG_SET_FIELD(tmp, CP_MEC_DC_BASE_CNTL, CACHE_POLICY, 0);
+ WREG32_SOC15(GC, 0, regCP_MEC_DC_BASE_CNTL, tmp);
+
+ mutex_lock(&adev->srbm_mutex);
+ for (i = 0; i < adev->gfx.mec.num_pipe_per_mec; i++) {
+ soc21_grbm_select(adev, 1, i, 0, 0);
+
+ WREG32_SOC15(GC, 0, regCP_MEC_MDBASE_LO, addr2);
+ WREG32_SOC15(GC, 0, regCP_MEC_MDBASE_HI,
+ upper_32_bits(addr2));
+
+ WREG32_SOC15(GC, 0, regCP_MEC_RS64_PRGRM_CNTR_START,
+ mec_hdr->ucode_start_addr_lo >> 2 |
+ mec_hdr->ucode_start_addr_hi << 30);
+ WREG32_SOC15(GC, 0, regCP_MEC_RS64_PRGRM_CNTR_START_HI,
+ mec_hdr->ucode_start_addr_hi >> 2);
+
+ WREG32_SOC15(GC, 0, regCP_CPC_IC_BASE_LO, addr);
+ WREG32_SOC15(GC, 0, regCP_CPC_IC_BASE_HI,
+ upper_32_bits(addr));
+ }
+ mutex_unlock(&adev->srbm_mutex);
+ soc21_grbm_select(adev, 0, 0, 0, 0);
+
+ /* Trigger an invalidation of the L1 instruction caches */
+ tmp = RREG32_SOC15(GC, 0, regCP_MEC_DC_OP_CNTL);
+ tmp = REG_SET_FIELD(tmp, CP_MEC_DC_OP_CNTL, INVALIDATE_DCACHE, 1);
+ WREG32_SOC15(GC, 0, regCP_MEC_DC_OP_CNTL, tmp);
+
+ /* Wait for invalidation complete */
+ for (i = 0; i < usec_timeout; i++) {
+ tmp = RREG32_SOC15(GC, 0, regCP_MEC_DC_OP_CNTL);
+ if (1 == REG_GET_FIELD(tmp, CP_MEC_DC_OP_CNTL,
+ INVALIDATE_DCACHE_COMPLETE))
+ break;
+ udelay(1);
+ }
+
+ if (i >= usec_timeout) {
+ dev_err(adev->dev, "failed to invalidate instruction cache\n");
+ return -EINVAL;
+ }
+
+ /* Trigger an invalidation of the L1 instruction caches */
+ tmp = RREG32_SOC15(GC, 0, regCP_CPC_IC_OP_CNTL);
+ tmp = REG_SET_FIELD(tmp, CP_CPC_IC_OP_CNTL, INVALIDATE_CACHE, 1);
+ WREG32_SOC15(GC, 0, regCP_CPC_IC_OP_CNTL, tmp);
+
+ /* Wait for invalidation complete */
+ for (i = 0; i < usec_timeout; i++) {
+ tmp = RREG32_SOC15(GC, 0, regCP_CPC_IC_OP_CNTL);
+ if (1 == REG_GET_FIELD(tmp, CP_CPC_IC_OP_CNTL,
+ INVALIDATE_CACHE_COMPLETE))
+ break;
+ udelay(1);
+ }
+
+ if (i >= usec_timeout) {
+ dev_err(adev->dev, "failed to invalidate instruction cache\n");
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+static void gfx_v11_0_config_gfx_rs64(struct amdgpu_device *adev)
+{
+ const struct gfx_firmware_header_v2_0 *pfp_hdr;
+ const struct gfx_firmware_header_v2_0 *me_hdr;
+ const struct gfx_firmware_header_v2_0 *mec_hdr;
+ uint32_t pipe_id, tmp;
+
+ mec_hdr = (const struct gfx_firmware_header_v2_0 *)
+ adev->gfx.mec_fw->data;
+ me_hdr = (const struct gfx_firmware_header_v2_0 *)
+ adev->gfx.me_fw->data;
+ pfp_hdr = (const struct gfx_firmware_header_v2_0 *)
+ adev->gfx.pfp_fw->data;
+
+ /* config pfp program start addr */
+ for (pipe_id = 0; pipe_id < 2; pipe_id++) {
+ soc21_grbm_select(adev, 0, pipe_id, 0, 0);
+ WREG32_SOC15(GC, 0, regCP_PFP_PRGRM_CNTR_START,
+ (pfp_hdr->ucode_start_addr_hi << 30) |
+ (pfp_hdr->ucode_start_addr_lo >> 2));
+ WREG32_SOC15(GC, 0, regCP_PFP_PRGRM_CNTR_START_HI,
+ pfp_hdr->ucode_start_addr_hi >> 2);
+ }
+ soc21_grbm_select(adev, 0, 0, 0, 0);
+
+ /* reset pfp pipe */
+ tmp = RREG32_SOC15(GC, 0, regCP_ME_CNTL);
+ tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_PIPE0_RESET, 1);
+ tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_PIPE1_RESET, 1);
+ WREG32_SOC15(GC, 0, regCP_ME_CNTL, tmp);
+
+ /* clear pfp pipe reset */
+ tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_PIPE0_RESET, 0);
+ tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_PIPE1_RESET, 0);
+ WREG32_SOC15(GC, 0, regCP_ME_CNTL, tmp);
+
+ /* config me program start addr */
+ for (pipe_id = 0; pipe_id < 2; pipe_id++) {
+ soc21_grbm_select(adev, 0, pipe_id, 0, 0);
+ WREG32_SOC15(GC, 0, regCP_ME_PRGRM_CNTR_START,
+ (me_hdr->ucode_start_addr_hi << 30) |
+ (me_hdr->ucode_start_addr_lo >> 2) );
+ WREG32_SOC15(GC, 0, regCP_ME_PRGRM_CNTR_START_HI,
+ me_hdr->ucode_start_addr_hi>>2);
+ }
+ soc21_grbm_select(adev, 0, 0, 0, 0);
+
+ /* reset me pipe */
+ tmp = RREG32_SOC15(GC, 0, regCP_ME_CNTL);
+ tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_PIPE0_RESET, 1);
+ tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_PIPE1_RESET, 1);
+ WREG32_SOC15(GC, 0, regCP_ME_CNTL, tmp);
+
+ /* clear me pipe reset */
+ tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_PIPE0_RESET, 0);
+ tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_PIPE1_RESET, 0);
+ WREG32_SOC15(GC, 0, regCP_ME_CNTL, tmp);
+
+ /* config mec program start addr */
+ for (pipe_id = 0; pipe_id < 4; pipe_id++) {
+ soc21_grbm_select(adev, 1, pipe_id, 0, 0);
+ WREG32_SOC15(GC, 0, regCP_MEC_RS64_PRGRM_CNTR_START,
+ mec_hdr->ucode_start_addr_lo >> 2 |
+ mec_hdr->ucode_start_addr_hi << 30);
+ WREG32_SOC15(GC, 0, regCP_MEC_RS64_PRGRM_CNTR_START_HI,
+ mec_hdr->ucode_start_addr_hi >> 2);
+ }
+ soc21_grbm_select(adev, 0, 0, 0, 0);
+
+ /* reset mec pipe */
+ tmp = RREG32_SOC15(GC, 0, regCP_MEC_RS64_CNTL);
+ tmp = REG_SET_FIELD(tmp, CP_MEC_RS64_CNTL, MEC_PIPE0_RESET, 1);
+ tmp = REG_SET_FIELD(tmp, CP_MEC_RS64_CNTL, MEC_PIPE1_RESET, 1);
+ tmp = REG_SET_FIELD(tmp, CP_MEC_RS64_CNTL, MEC_PIPE2_RESET, 1);
+ tmp = REG_SET_FIELD(tmp, CP_MEC_RS64_CNTL, MEC_PIPE3_RESET, 1);
+ WREG32_SOC15(GC, 0, regCP_MEC_RS64_CNTL, tmp);
+
+ /* clear mec pipe reset */
+ tmp = REG_SET_FIELD(tmp, CP_MEC_RS64_CNTL, MEC_PIPE0_RESET, 0);
+ tmp = REG_SET_FIELD(tmp, CP_MEC_RS64_CNTL, MEC_PIPE1_RESET, 0);
+ tmp = REG_SET_FIELD(tmp, CP_MEC_RS64_CNTL, MEC_PIPE2_RESET, 0);
+ tmp = REG_SET_FIELD(tmp, CP_MEC_RS64_CNTL, MEC_PIPE3_RESET, 0);
+ WREG32_SOC15(GC, 0, regCP_MEC_RS64_CNTL, tmp);
+}
+
+static int gfx_v11_0_wait_for_rlc_autoload_complete(struct amdgpu_device *adev)
+{
+ uint32_t cp_status;
+ uint32_t bootload_status;
+ int i, r;
+ uint64_t addr, addr2;
+
+ for (i = 0; i < adev->usec_timeout; i++) {
+ cp_status = RREG32_SOC15(GC, 0, regCP_STAT);
+
+ if (amdgpu_ip_version(adev, GC_HWIP, 0) ==
+ IP_VERSION(11, 0, 1) ||
+ amdgpu_ip_version(adev, GC_HWIP, 0) ==
+ IP_VERSION(11, 0, 4) ||
+ amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(11, 5, 0) ||
+ amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(11, 5, 1) ||
+ amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(11, 5, 2) ||
+ amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(11, 5, 3))
+ bootload_status = RREG32_SOC15(GC, 0,
+ regRLC_RLCS_BOOTLOAD_STATUS_gc_11_0_1);
+ else
+ bootload_status = RREG32_SOC15(GC, 0, regRLC_RLCS_BOOTLOAD_STATUS);
+
+ if ((cp_status == 0) &&
+ (REG_GET_FIELD(bootload_status,
+ RLC_RLCS_BOOTLOAD_STATUS, BOOTLOAD_COMPLETE) == 1)) {
+ break;
+ }
+ udelay(1);
+ }
+
+ if (i >= adev->usec_timeout) {
+ dev_err(adev->dev, "rlc autoload: gc ucode autoload timeout\n");
+ return -ETIMEDOUT;
+ }
+
+ if (adev->firmware.load_type == AMDGPU_FW_LOAD_RLC_BACKDOOR_AUTO) {
+ if (adev->gfx.rs64_enable) {
+ addr = adev->gfx.rlc.rlc_autoload_gpu_addr +
+ rlc_autoload_info[SOC21_FIRMWARE_ID_RS64_ME].offset;
+ addr2 = adev->gfx.rlc.rlc_autoload_gpu_addr +
+ rlc_autoload_info[SOC21_FIRMWARE_ID_RS64_ME_P0_STACK].offset;
+ r = gfx_v11_0_config_me_cache_rs64(adev, addr, addr2);
+ if (r)
+ return r;
+ addr = adev->gfx.rlc.rlc_autoload_gpu_addr +
+ rlc_autoload_info[SOC21_FIRMWARE_ID_RS64_PFP].offset;
+ addr2 = adev->gfx.rlc.rlc_autoload_gpu_addr +
+ rlc_autoload_info[SOC21_FIRMWARE_ID_RS64_PFP_P0_STACK].offset;
+ r = gfx_v11_0_config_pfp_cache_rs64(adev, addr, addr2);
+ if (r)
+ return r;
+ addr = adev->gfx.rlc.rlc_autoload_gpu_addr +
+ rlc_autoload_info[SOC21_FIRMWARE_ID_RS64_MEC].offset;
+ addr2 = adev->gfx.rlc.rlc_autoload_gpu_addr +
+ rlc_autoload_info[SOC21_FIRMWARE_ID_RS64_MEC_P0_STACK].offset;
+ r = gfx_v11_0_config_mec_cache_rs64(adev, addr, addr2);
+ if (r)
+ return r;
+ } else {
+ addr = adev->gfx.rlc.rlc_autoload_gpu_addr +
+ rlc_autoload_info[SOC21_FIRMWARE_ID_CP_ME].offset;
+ r = gfx_v11_0_config_me_cache(adev, addr);
+ if (r)
+ return r;
+ addr = adev->gfx.rlc.rlc_autoload_gpu_addr +
+ rlc_autoload_info[SOC21_FIRMWARE_ID_CP_PFP].offset;
+ r = gfx_v11_0_config_pfp_cache(adev, addr);
+ if (r)
+ return r;
+ addr = adev->gfx.rlc.rlc_autoload_gpu_addr +
+ rlc_autoload_info[SOC21_FIRMWARE_ID_CP_MEC].offset;
+ r = gfx_v11_0_config_mec_cache(adev, addr);
+ if (r)
+ return r;
+ }
+ }
+
+ return 0;
+}
+
+static int gfx_v11_0_cp_gfx_enable(struct amdgpu_device *adev, bool enable)
+{
+ int i;
+ u32 tmp = RREG32_SOC15(GC, 0, regCP_ME_CNTL);
+
+ tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_HALT, enable ? 0 : 1);
+ tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_HALT, enable ? 0 : 1);
+ WREG32_SOC15(GC, 0, regCP_ME_CNTL, tmp);
+
+ for (i = 0; i < adev->usec_timeout; i++) {
+ if (RREG32_SOC15(GC, 0, regCP_STAT) == 0)
+ break;
+ udelay(1);
+ }
+
+ if (i >= adev->usec_timeout)
+ DRM_ERROR("failed to %s cp gfx\n", enable ? "unhalt" : "halt");
+
+ return 0;
+}
+
+static int gfx_v11_0_cp_gfx_load_pfp_microcode(struct amdgpu_device *adev)
+{
+ int r;
+ const struct gfx_firmware_header_v1_0 *pfp_hdr;
+ const __le32 *fw_data;
+ unsigned i, fw_size;
+
+ pfp_hdr = (const struct gfx_firmware_header_v1_0 *)
+ adev->gfx.pfp_fw->data;
+
+ amdgpu_ucode_print_gfx_hdr(&pfp_hdr->header);
+
+ fw_data = (const __le32 *)(adev->gfx.pfp_fw->data +
+ le32_to_cpu(pfp_hdr->header.ucode_array_offset_bytes));
+ fw_size = le32_to_cpu(pfp_hdr->header.ucode_size_bytes);
+
+ r = amdgpu_bo_create_reserved(adev, pfp_hdr->header.ucode_size_bytes,
+ PAGE_SIZE, AMDGPU_GEM_DOMAIN_GTT,
+ &adev->gfx.pfp.pfp_fw_obj,
+ &adev->gfx.pfp.pfp_fw_gpu_addr,
+ (void **)&adev->gfx.pfp.pfp_fw_ptr);
+ if (r) {
+ dev_err(adev->dev, "(%d) failed to create pfp fw bo\n", r);
+ gfx_v11_0_pfp_fini(adev);
+ return r;
+ }
+
+ memcpy(adev->gfx.pfp.pfp_fw_ptr, fw_data, fw_size);
+
+ amdgpu_bo_kunmap(adev->gfx.pfp.pfp_fw_obj);
+ amdgpu_bo_unreserve(adev->gfx.pfp.pfp_fw_obj);
+
+ gfx_v11_0_config_pfp_cache(adev, adev->gfx.pfp.pfp_fw_gpu_addr);
+
+ WREG32_SOC15(GC, 0, regCP_HYP_PFP_UCODE_ADDR, 0);
+
+ for (i = 0; i < pfp_hdr->jt_size; i++)
+ WREG32_SOC15(GC, 0, regCP_HYP_PFP_UCODE_DATA,
+ le32_to_cpup(fw_data + pfp_hdr->jt_offset + i));
+
+ WREG32_SOC15(GC, 0, regCP_HYP_PFP_UCODE_ADDR, adev->gfx.pfp_fw_version);
+
+ return 0;
+}
+
+static int gfx_v11_0_cp_gfx_load_pfp_microcode_rs64(struct amdgpu_device *adev)
+{
+ int r;
+ const struct gfx_firmware_header_v2_0 *pfp_hdr;
+ const __le32 *fw_ucode, *fw_data;
+ unsigned i, pipe_id, fw_ucode_size, fw_data_size;
+ uint32_t tmp;
+ uint32_t usec_timeout = 50000; /* wait for 50ms */
+
+ pfp_hdr = (const struct gfx_firmware_header_v2_0 *)
+ adev->gfx.pfp_fw->data;
+
+ amdgpu_ucode_print_gfx_hdr(&pfp_hdr->header);
+
+ /* instruction */
+ fw_ucode = (const __le32 *)(adev->gfx.pfp_fw->data +
+ le32_to_cpu(pfp_hdr->ucode_offset_bytes));
+ fw_ucode_size = le32_to_cpu(pfp_hdr->ucode_size_bytes);
+ /* data */
+ fw_data = (const __le32 *)(adev->gfx.pfp_fw->data +
+ le32_to_cpu(pfp_hdr->data_offset_bytes));
+ fw_data_size = le32_to_cpu(pfp_hdr->data_size_bytes);
+
+ /* 64kb align */
+ r = amdgpu_bo_create_reserved(adev, fw_ucode_size,
+ 64 * 1024,
+ AMDGPU_GEM_DOMAIN_VRAM |
+ AMDGPU_GEM_DOMAIN_GTT,
+ &adev->gfx.pfp.pfp_fw_obj,
+ &adev->gfx.pfp.pfp_fw_gpu_addr,
+ (void **)&adev->gfx.pfp.pfp_fw_ptr);
+ if (r) {
+ dev_err(adev->dev, "(%d) failed to create pfp ucode fw bo\n", r);
+ gfx_v11_0_pfp_fini(adev);
+ return r;
+ }
+
+ r = amdgpu_bo_create_reserved(adev, fw_data_size,
+ 64 * 1024,
+ AMDGPU_GEM_DOMAIN_VRAM |
+ AMDGPU_GEM_DOMAIN_GTT,
+ &adev->gfx.pfp.pfp_fw_data_obj,
+ &adev->gfx.pfp.pfp_fw_data_gpu_addr,
+ (void **)&adev->gfx.pfp.pfp_fw_data_ptr);
+ if (r) {
+ dev_err(adev->dev, "(%d) failed to create pfp data fw bo\n", r);
+ gfx_v11_0_pfp_fini(adev);
+ return r;
+ }
+
+ memcpy(adev->gfx.pfp.pfp_fw_ptr, fw_ucode, fw_ucode_size);
+ memcpy(adev->gfx.pfp.pfp_fw_data_ptr, fw_data, fw_data_size);
+
+ amdgpu_bo_kunmap(adev->gfx.pfp.pfp_fw_obj);
+ amdgpu_bo_kunmap(adev->gfx.pfp.pfp_fw_data_obj);
+ amdgpu_bo_unreserve(adev->gfx.pfp.pfp_fw_obj);
+ amdgpu_bo_unreserve(adev->gfx.pfp.pfp_fw_data_obj);
+
+ if (amdgpu_emu_mode == 1)
+ amdgpu_device_flush_hdp(adev, NULL);
+
+ WREG32_SOC15(GC, 0, regCP_PFP_IC_BASE_LO,
+ lower_32_bits(adev->gfx.pfp.pfp_fw_gpu_addr));
+ WREG32_SOC15(GC, 0, regCP_PFP_IC_BASE_HI,
+ upper_32_bits(adev->gfx.pfp.pfp_fw_gpu_addr));
+
+ tmp = RREG32_SOC15(GC, 0, regCP_PFP_IC_BASE_CNTL);
+ tmp = REG_SET_FIELD(tmp, CP_PFP_IC_BASE_CNTL, VMID, 0);
+ tmp = REG_SET_FIELD(tmp, CP_PFP_IC_BASE_CNTL, CACHE_POLICY, 0);
+ tmp = REG_SET_FIELD(tmp, CP_PFP_IC_BASE_CNTL, EXE_DISABLE, 0);
+ WREG32_SOC15(GC, 0, regCP_PFP_IC_BASE_CNTL, tmp);
+
+ /*
+ * Programming any of the CP_PFP_IC_BASE registers
+ * forces invalidation of the ME L1 I$. Wait for the
+ * invalidation complete
+ */
+ for (i = 0; i < usec_timeout; i++) {
+ tmp = RREG32_SOC15(GC, 0, regCP_PFP_IC_OP_CNTL);
+ if (1 == REG_GET_FIELD(tmp, CP_PFP_IC_OP_CNTL,
+ INVALIDATE_CACHE_COMPLETE))
+ break;
+ udelay(1);
+ }
+
+ if (i >= usec_timeout) {
+ dev_err(adev->dev, "failed to invalidate instruction cache\n");
+ return -EINVAL;
+ }
+
+ /* Prime the L1 instruction caches */
+ tmp = RREG32_SOC15(GC, 0, regCP_PFP_IC_OP_CNTL);
+ tmp = REG_SET_FIELD(tmp, CP_PFP_IC_OP_CNTL, PRIME_ICACHE, 1);
+ WREG32_SOC15(GC, 0, regCP_PFP_IC_OP_CNTL, tmp);
+ /* Waiting for cache primed*/
+ for (i = 0; i < usec_timeout; i++) {
+ tmp = RREG32_SOC15(GC, 0, regCP_PFP_IC_OP_CNTL);
+ if (1 == REG_GET_FIELD(tmp, CP_PFP_IC_OP_CNTL,
+ ICACHE_PRIMED))
+ break;
+ udelay(1);
+ }
+
+ if (i >= usec_timeout) {
+ dev_err(adev->dev, "failed to prime instruction cache\n");
+ return -EINVAL;
+ }
+
+ mutex_lock(&adev->srbm_mutex);
+ for (pipe_id = 0; pipe_id < adev->gfx.me.num_pipe_per_me; pipe_id++) {
+ soc21_grbm_select(adev, 0, pipe_id, 0, 0);
+ WREG32_SOC15(GC, 0, regCP_PFP_PRGRM_CNTR_START,
+ (pfp_hdr->ucode_start_addr_hi << 30) |
+ (pfp_hdr->ucode_start_addr_lo >> 2) );
+ WREG32_SOC15(GC, 0, regCP_PFP_PRGRM_CNTR_START_HI,
+ pfp_hdr->ucode_start_addr_hi>>2);
+
+ /*
+ * Program CP_ME_CNTL to reset given PIPE to take
+ * effect of CP_PFP_PRGRM_CNTR_START.
+ */
+ tmp = RREG32_SOC15(GC, 0, regCP_ME_CNTL);
+ if (pipe_id == 0)
+ tmp = REG_SET_FIELD(tmp, CP_ME_CNTL,
+ PFP_PIPE0_RESET, 1);
+ else
+ tmp = REG_SET_FIELD(tmp, CP_ME_CNTL,
+ PFP_PIPE1_RESET, 1);
+ WREG32_SOC15(GC, 0, regCP_ME_CNTL, tmp);
+
+ /* Clear pfp pipe0 reset bit. */
+ if (pipe_id == 0)
+ tmp = REG_SET_FIELD(tmp, CP_ME_CNTL,
+ PFP_PIPE0_RESET, 0);
+ else
+ tmp = REG_SET_FIELD(tmp, CP_ME_CNTL,
+ PFP_PIPE1_RESET, 0);
+ WREG32_SOC15(GC, 0, regCP_ME_CNTL, tmp);
+
+ WREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_BASE0_LO,
+ lower_32_bits(adev->gfx.pfp.pfp_fw_data_gpu_addr));
+ WREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_BASE0_HI,
+ upper_32_bits(adev->gfx.pfp.pfp_fw_data_gpu_addr));
+ }
+ soc21_grbm_select(adev, 0, 0, 0, 0);
+ mutex_unlock(&adev->srbm_mutex);
+
+ tmp = RREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_BASE_CNTL);
+ tmp = REG_SET_FIELD(tmp, CP_GFX_RS64_DC_BASE_CNTL, VMID, 0);
+ tmp = REG_SET_FIELD(tmp, CP_GFX_RS64_DC_BASE_CNTL, CACHE_POLICY, 0);
+ WREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_BASE_CNTL, tmp);
+
+ /* Invalidate the data caches */
+ tmp = RREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_OP_CNTL);
+ tmp = REG_SET_FIELD(tmp, CP_GFX_RS64_DC_OP_CNTL, INVALIDATE_DCACHE, 1);
+ WREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_OP_CNTL, tmp);
+
+ for (i = 0; i < usec_timeout; i++) {
+ tmp = RREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_OP_CNTL);
+ if (1 == REG_GET_FIELD(tmp, CP_GFX_RS64_DC_OP_CNTL,
+ INVALIDATE_DCACHE_COMPLETE))
+ break;
+ udelay(1);
+ }
+
+ if (i >= usec_timeout) {
+ dev_err(adev->dev, "failed to invalidate RS64 data cache\n");
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+static int gfx_v11_0_cp_gfx_load_me_microcode(struct amdgpu_device *adev)
+{
+ int r;
+ const struct gfx_firmware_header_v1_0 *me_hdr;
+ const __le32 *fw_data;
+ unsigned i, fw_size;
+
+ me_hdr = (const struct gfx_firmware_header_v1_0 *)
+ adev->gfx.me_fw->data;
+
+ amdgpu_ucode_print_gfx_hdr(&me_hdr->header);
+
+ fw_data = (const __le32 *)(adev->gfx.me_fw->data +
+ le32_to_cpu(me_hdr->header.ucode_array_offset_bytes));
+ fw_size = le32_to_cpu(me_hdr->header.ucode_size_bytes);
+
+ r = amdgpu_bo_create_reserved(adev, me_hdr->header.ucode_size_bytes,
+ PAGE_SIZE, AMDGPU_GEM_DOMAIN_GTT,
+ &adev->gfx.me.me_fw_obj,
+ &adev->gfx.me.me_fw_gpu_addr,
+ (void **)&adev->gfx.me.me_fw_ptr);
+ if (r) {
+ dev_err(adev->dev, "(%d) failed to create me fw bo\n", r);
+ gfx_v11_0_me_fini(adev);
+ return r;
+ }
+
+ memcpy(adev->gfx.me.me_fw_ptr, fw_data, fw_size);
+
+ amdgpu_bo_kunmap(adev->gfx.me.me_fw_obj);
+ amdgpu_bo_unreserve(adev->gfx.me.me_fw_obj);
+
+ gfx_v11_0_config_me_cache(adev, adev->gfx.me.me_fw_gpu_addr);
+
+ WREG32_SOC15(GC, 0, regCP_HYP_ME_UCODE_ADDR, 0);
+
+ for (i = 0; i < me_hdr->jt_size; i++)
+ WREG32_SOC15(GC, 0, regCP_HYP_ME_UCODE_DATA,
+ le32_to_cpup(fw_data + me_hdr->jt_offset + i));
+
+ WREG32_SOC15(GC, 0, regCP_HYP_ME_UCODE_ADDR, adev->gfx.me_fw_version);
+
+ return 0;
+}
+
+static int gfx_v11_0_cp_gfx_load_me_microcode_rs64(struct amdgpu_device *adev)
+{
+ int r;
+ const struct gfx_firmware_header_v2_0 *me_hdr;
+ const __le32 *fw_ucode, *fw_data;
+ unsigned i, pipe_id, fw_ucode_size, fw_data_size;
+ uint32_t tmp;
+ uint32_t usec_timeout = 50000; /* wait for 50ms */
+
+ me_hdr = (const struct gfx_firmware_header_v2_0 *)
+ adev->gfx.me_fw->data;
+
+ amdgpu_ucode_print_gfx_hdr(&me_hdr->header);
+
+ /* instruction */
+ fw_ucode = (const __le32 *)(adev->gfx.me_fw->data +
+ le32_to_cpu(me_hdr->ucode_offset_bytes));
+ fw_ucode_size = le32_to_cpu(me_hdr->ucode_size_bytes);
+ /* data */
+ fw_data = (const __le32 *)(adev->gfx.me_fw->data +
+ le32_to_cpu(me_hdr->data_offset_bytes));
+ fw_data_size = le32_to_cpu(me_hdr->data_size_bytes);
+
+ /* 64kb align*/
+ r = amdgpu_bo_create_reserved(adev, fw_ucode_size,
+ 64 * 1024,
+ AMDGPU_GEM_DOMAIN_VRAM |
+ AMDGPU_GEM_DOMAIN_GTT,
+ &adev->gfx.me.me_fw_obj,
+ &adev->gfx.me.me_fw_gpu_addr,
+ (void **)&adev->gfx.me.me_fw_ptr);
+ if (r) {
+ dev_err(adev->dev, "(%d) failed to create me ucode bo\n", r);
+ gfx_v11_0_me_fini(adev);
+ return r;
+ }
+
+ r = amdgpu_bo_create_reserved(adev, fw_data_size,
+ 64 * 1024,
+ AMDGPU_GEM_DOMAIN_VRAM |
+ AMDGPU_GEM_DOMAIN_GTT,
+ &adev->gfx.me.me_fw_data_obj,
+ &adev->gfx.me.me_fw_data_gpu_addr,
+ (void **)&adev->gfx.me.me_fw_data_ptr);
+ if (r) {
+ dev_err(adev->dev, "(%d) failed to create me data bo\n", r);
+ gfx_v11_0_pfp_fini(adev);
+ return r;
+ }
+
+ memcpy(adev->gfx.me.me_fw_ptr, fw_ucode, fw_ucode_size);
+ memcpy(adev->gfx.me.me_fw_data_ptr, fw_data, fw_data_size);
+
+ amdgpu_bo_kunmap(adev->gfx.me.me_fw_obj);
+ amdgpu_bo_kunmap(adev->gfx.me.me_fw_data_obj);
+ amdgpu_bo_unreserve(adev->gfx.me.me_fw_obj);
+ amdgpu_bo_unreserve(adev->gfx.me.me_fw_data_obj);
+
+ if (amdgpu_emu_mode == 1)
+ amdgpu_device_flush_hdp(adev, NULL);
+
+ WREG32_SOC15(GC, 0, regCP_ME_IC_BASE_LO,
+ lower_32_bits(adev->gfx.me.me_fw_gpu_addr));
+ WREG32_SOC15(GC, 0, regCP_ME_IC_BASE_HI,
+ upper_32_bits(adev->gfx.me.me_fw_gpu_addr));
+
+ tmp = RREG32_SOC15(GC, 0, regCP_ME_IC_BASE_CNTL);
+ tmp = REG_SET_FIELD(tmp, CP_ME_IC_BASE_CNTL, VMID, 0);
+ tmp = REG_SET_FIELD(tmp, CP_ME_IC_BASE_CNTL, CACHE_POLICY, 0);
+ tmp = REG_SET_FIELD(tmp, CP_ME_IC_BASE_CNTL, EXE_DISABLE, 0);
+ WREG32_SOC15(GC, 0, regCP_ME_IC_BASE_CNTL, tmp);
+
+ /*
+ * Programming any of the CP_ME_IC_BASE registers
+ * forces invalidation of the ME L1 I$. Wait for the
+ * invalidation complete
+ */
+ for (i = 0; i < usec_timeout; i++) {
+ tmp = RREG32_SOC15(GC, 0, regCP_ME_IC_OP_CNTL);
+ if (1 == REG_GET_FIELD(tmp, CP_ME_IC_OP_CNTL,
+ INVALIDATE_CACHE_COMPLETE))
+ break;
+ udelay(1);
+ }
+
+ if (i >= usec_timeout) {
+ dev_err(adev->dev, "failed to invalidate instruction cache\n");
+ return -EINVAL;
+ }
+
+ /* Prime the instruction caches */
+ tmp = RREG32_SOC15(GC, 0, regCP_ME_IC_OP_CNTL);
+ tmp = REG_SET_FIELD(tmp, CP_ME_IC_OP_CNTL, PRIME_ICACHE, 1);
+ WREG32_SOC15(GC, 0, regCP_ME_IC_OP_CNTL, tmp);
+
+ /* Waiting for instruction cache primed*/
+ for (i = 0; i < usec_timeout; i++) {
+ tmp = RREG32_SOC15(GC, 0, regCP_ME_IC_OP_CNTL);
+ if (1 == REG_GET_FIELD(tmp, CP_ME_IC_OP_CNTL,
+ ICACHE_PRIMED))
+ break;
+ udelay(1);
+ }
+
+ if (i >= usec_timeout) {
+ dev_err(adev->dev, "failed to prime instruction cache\n");
+ return -EINVAL;
+ }
+
+ mutex_lock(&adev->srbm_mutex);
+ for (pipe_id = 0; pipe_id < adev->gfx.me.num_pipe_per_me; pipe_id++) {
+ soc21_grbm_select(adev, 0, pipe_id, 0, 0);
+ WREG32_SOC15(GC, 0, regCP_ME_PRGRM_CNTR_START,
+ (me_hdr->ucode_start_addr_hi << 30) |
+ (me_hdr->ucode_start_addr_lo >> 2) );
+ WREG32_SOC15(GC, 0, regCP_ME_PRGRM_CNTR_START_HI,
+ me_hdr->ucode_start_addr_hi>>2);
+
+ /*
+ * Program CP_ME_CNTL to reset given PIPE to take
+ * effect of CP_PFP_PRGRM_CNTR_START.
+ */
+ tmp = RREG32_SOC15(GC, 0, regCP_ME_CNTL);
+ if (pipe_id == 0)
+ tmp = REG_SET_FIELD(tmp, CP_ME_CNTL,
+ ME_PIPE0_RESET, 1);
+ else
+ tmp = REG_SET_FIELD(tmp, CP_ME_CNTL,
+ ME_PIPE1_RESET, 1);
+ WREG32_SOC15(GC, 0, regCP_ME_CNTL, tmp);
+
+ /* Clear pfp pipe0 reset bit. */
+ if (pipe_id == 0)
+ tmp = REG_SET_FIELD(tmp, CP_ME_CNTL,
+ ME_PIPE0_RESET, 0);
+ else
+ tmp = REG_SET_FIELD(tmp, CP_ME_CNTL,
+ ME_PIPE1_RESET, 0);
+ WREG32_SOC15(GC, 0, regCP_ME_CNTL, tmp);
+
+ WREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_BASE1_LO,
+ lower_32_bits(adev->gfx.me.me_fw_data_gpu_addr));
+ WREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_BASE1_HI,
+ upper_32_bits(adev->gfx.me.me_fw_data_gpu_addr));
+ }
+ soc21_grbm_select(adev, 0, 0, 0, 0);
+ mutex_unlock(&adev->srbm_mutex);
+
+ tmp = RREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_BASE_CNTL);
+ tmp = REG_SET_FIELD(tmp, CP_GFX_RS64_DC_BASE_CNTL, VMID, 0);
+ tmp = REG_SET_FIELD(tmp, CP_GFX_RS64_DC_BASE_CNTL, CACHE_POLICY, 0);
+ WREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_BASE_CNTL, tmp);
+
+ /* Invalidate the data caches */
+ tmp = RREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_OP_CNTL);
+ tmp = REG_SET_FIELD(tmp, CP_GFX_RS64_DC_OP_CNTL, INVALIDATE_DCACHE, 1);
+ WREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_OP_CNTL, tmp);
+
+ for (i = 0; i < usec_timeout; i++) {
+ tmp = RREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_OP_CNTL);
+ if (1 == REG_GET_FIELD(tmp, CP_GFX_RS64_DC_OP_CNTL,
+ INVALIDATE_DCACHE_COMPLETE))
+ break;
+ udelay(1);
+ }
+
+ if (i >= usec_timeout) {
+ dev_err(adev->dev, "failed to invalidate RS64 data cache\n");
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+static int gfx_v11_0_cp_gfx_load_microcode(struct amdgpu_device *adev)
+{
+ int r;
+
+ if (!adev->gfx.me_fw || !adev->gfx.pfp_fw)
+ return -EINVAL;
+
+ gfx_v11_0_cp_gfx_enable(adev, false);
+
+ if (adev->gfx.rs64_enable)
+ r = gfx_v11_0_cp_gfx_load_pfp_microcode_rs64(adev);
+ else
+ r = gfx_v11_0_cp_gfx_load_pfp_microcode(adev);
+ if (r) {
+ dev_err(adev->dev, "(%d) failed to load pfp fw\n", r);
+ return r;
+ }
+
+ if (adev->gfx.rs64_enable)
+ r = gfx_v11_0_cp_gfx_load_me_microcode_rs64(adev);
+ else
+ r = gfx_v11_0_cp_gfx_load_me_microcode(adev);
+ if (r) {
+ dev_err(adev->dev, "(%d) failed to load me fw\n", r);
+ return r;
+ }
+
+ return 0;
+}
+
+static int gfx_v11_0_cp_gfx_start(struct amdgpu_device *adev)
+{
+ struct amdgpu_ring *ring;
+ const struct cs_section_def *sect = NULL;
+ const struct cs_extent_def *ext = NULL;
+ int r, i;
+ int ctx_reg_offset;
+
+ /* init the CP */
+ WREG32_SOC15(GC, 0, regCP_MAX_CONTEXT,
+ adev->gfx.config.max_hw_contexts - 1);
+ WREG32_SOC15(GC, 0, regCP_DEVICE_ID, 1);
+
+ if (!amdgpu_async_gfx_ring)
+ gfx_v11_0_cp_gfx_enable(adev, true);
+
+ ring = &adev->gfx.gfx_ring[0];
+ r = amdgpu_ring_alloc(ring, gfx_v11_0_get_csb_size(adev));
+ if (r) {
+ DRM_ERROR("amdgpu: cp failed to lock ring (%d).\n", r);
+ return r;
+ }
+
+ amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
+ amdgpu_ring_write(ring, PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
+
+ amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
+ amdgpu_ring_write(ring, 0x80000000);
+ amdgpu_ring_write(ring, 0x80000000);
+
+ for (sect = gfx11_cs_data; sect->section != NULL; ++sect) {
+ for (ext = sect->section; ext->extent != NULL; ++ext) {
+ if (sect->id == SECT_CONTEXT) {
+ amdgpu_ring_write(ring,
+ PACKET3(PACKET3_SET_CONTEXT_REG,
+ ext->reg_count));
+ amdgpu_ring_write(ring, ext->reg_index -
+ PACKET3_SET_CONTEXT_REG_START);
+ for (i = 0; i < ext->reg_count; i++)
+ amdgpu_ring_write(ring, ext->extent[i]);
+ }
+ }
+ }
+
+ ctx_reg_offset =
+ SOC15_REG_OFFSET(GC, 0, regPA_SC_TILE_STEERING_OVERRIDE) - PACKET3_SET_CONTEXT_REG_START;
+ amdgpu_ring_write(ring, PACKET3(PACKET3_SET_CONTEXT_REG, 1));
+ amdgpu_ring_write(ring, ctx_reg_offset);
+ amdgpu_ring_write(ring, adev->gfx.config.pa_sc_tile_steering_override);
+
+ amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
+ amdgpu_ring_write(ring, PACKET3_PREAMBLE_END_CLEAR_STATE);
+
+ amdgpu_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0));
+ amdgpu_ring_write(ring, 0);
+
+ amdgpu_ring_commit(ring);
+
+ /* submit cs packet to copy state 0 to next available state */
+ if (adev->gfx.num_gfx_rings > 1) {
+ /* maximum supported gfx ring is 2 */
+ ring = &adev->gfx.gfx_ring[1];
+ r = amdgpu_ring_alloc(ring, 2);
+ if (r) {
+ DRM_ERROR("amdgpu: cp failed to lock ring (%d).\n", r);
+ return r;
+ }
+
+ amdgpu_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0));
+ amdgpu_ring_write(ring, 0);
+
+ amdgpu_ring_commit(ring);
+ }
+ return 0;
+}
+
+static void gfx_v11_0_cp_gfx_switch_pipe(struct amdgpu_device *adev,
+ CP_PIPE_ID pipe)
+{
+ u32 tmp;
+
+ tmp = RREG32_SOC15(GC, 0, regGRBM_GFX_CNTL);
+ tmp = REG_SET_FIELD(tmp, GRBM_GFX_CNTL, PIPEID, pipe);
+
+ WREG32_SOC15(GC, 0, regGRBM_GFX_CNTL, tmp);
+}
+
+static void gfx_v11_0_cp_gfx_set_doorbell(struct amdgpu_device *adev,
+ struct amdgpu_ring *ring)
+{
+ u32 tmp;
+
+ tmp = RREG32_SOC15(GC, 0, regCP_RB_DOORBELL_CONTROL);
+ if (ring->use_doorbell) {
+ tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
+ DOORBELL_OFFSET, ring->doorbell_index);
+ tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
+ DOORBELL_EN, 1);
+ } else {
+ tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
+ DOORBELL_EN, 0);
+ }
+ WREG32_SOC15(GC, 0, regCP_RB_DOORBELL_CONTROL, tmp);
+
+ tmp = REG_SET_FIELD(0, CP_RB_DOORBELL_RANGE_LOWER,
+ DOORBELL_RANGE_LOWER, ring->doorbell_index);
+ WREG32_SOC15(GC, 0, regCP_RB_DOORBELL_RANGE_LOWER, tmp);
+
+ WREG32_SOC15(GC, 0, regCP_RB_DOORBELL_RANGE_UPPER,
+ CP_RB_DOORBELL_RANGE_UPPER__DOORBELL_RANGE_UPPER_MASK);
+}
+
+static int gfx_v11_0_cp_gfx_resume(struct amdgpu_device *adev)
+{
+ struct amdgpu_ring *ring;
+ u32 tmp;
+ u32 rb_bufsz;
+ u64 rb_addr, rptr_addr, wptr_gpu_addr;
+
+ /* Set the write pointer delay */
+ WREG32_SOC15(GC, 0, regCP_RB_WPTR_DELAY, 0);
+
+ /* set the RB to use vmid 0 */
+ WREG32_SOC15(GC, 0, regCP_RB_VMID, 0);
+
+ /* Init gfx ring 0 for pipe 0 */
+ mutex_lock(&adev->srbm_mutex);
+ gfx_v11_0_cp_gfx_switch_pipe(adev, PIPE_ID0);
+
+ /* Set ring buffer size */
+ ring = &adev->gfx.gfx_ring[0];
+ rb_bufsz = order_base_2(ring->ring_size / 8);
+ tmp = REG_SET_FIELD(0, CP_RB0_CNTL, RB_BUFSZ, rb_bufsz);
+ tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, RB_BLKSZ, rb_bufsz - 2);
+ WREG32_SOC15(GC, 0, regCP_RB0_CNTL, tmp);
+
+ /* Initialize the ring buffer's write pointers */
+ ring->wptr = 0;
+ WREG32_SOC15(GC, 0, regCP_RB0_WPTR, lower_32_bits(ring->wptr));
+ WREG32_SOC15(GC, 0, regCP_RB0_WPTR_HI, upper_32_bits(ring->wptr));
+
+ /* set the wb address whether it's enabled or not */
+ rptr_addr = ring->rptr_gpu_addr;
+ WREG32_SOC15(GC, 0, regCP_RB0_RPTR_ADDR, lower_32_bits(rptr_addr));
+ WREG32_SOC15(GC, 0, regCP_RB0_RPTR_ADDR_HI, upper_32_bits(rptr_addr) &
+ CP_RB_RPTR_ADDR_HI__RB_RPTR_ADDR_HI_MASK);
+
+ wptr_gpu_addr = ring->wptr_gpu_addr;
+ WREG32_SOC15(GC, 0, regCP_RB_WPTR_POLL_ADDR_LO,
+ lower_32_bits(wptr_gpu_addr));
+ WREG32_SOC15(GC, 0, regCP_RB_WPTR_POLL_ADDR_HI,
+ upper_32_bits(wptr_gpu_addr));
+
+ mdelay(1);
+ WREG32_SOC15(GC, 0, regCP_RB0_CNTL, tmp);
+
+ rb_addr = ring->gpu_addr >> 8;
+ WREG32_SOC15(GC, 0, regCP_RB0_BASE, rb_addr);
+ WREG32_SOC15(GC, 0, regCP_RB0_BASE_HI, upper_32_bits(rb_addr));
+
+ WREG32_SOC15(GC, 0, regCP_RB_ACTIVE, 1);
+
+ gfx_v11_0_cp_gfx_set_doorbell(adev, ring);
+ mutex_unlock(&adev->srbm_mutex);
+
+ /* Init gfx ring 1 for pipe 1 */
+ if (adev->gfx.num_gfx_rings > 1) {
+ mutex_lock(&adev->srbm_mutex);
+ gfx_v11_0_cp_gfx_switch_pipe(adev, PIPE_ID1);
+ /* maximum supported gfx ring is 2 */
+ ring = &adev->gfx.gfx_ring[1];
+ rb_bufsz = order_base_2(ring->ring_size / 8);
+ tmp = REG_SET_FIELD(0, CP_RB1_CNTL, RB_BUFSZ, rb_bufsz);
+ tmp = REG_SET_FIELD(tmp, CP_RB1_CNTL, RB_BLKSZ, rb_bufsz - 2);
+ WREG32_SOC15(GC, 0, regCP_RB1_CNTL, tmp);
+ /* Initialize the ring buffer's write pointers */
+ ring->wptr = 0;
+ WREG32_SOC15(GC, 0, regCP_RB1_WPTR, lower_32_bits(ring->wptr));
+ WREG32_SOC15(GC, 0, regCP_RB1_WPTR_HI, upper_32_bits(ring->wptr));
+ /* Set the wb address whether it's enabled or not */
+ rptr_addr = ring->rptr_gpu_addr;
+ WREG32_SOC15(GC, 0, regCP_RB1_RPTR_ADDR, lower_32_bits(rptr_addr));
+ WREG32_SOC15(GC, 0, regCP_RB1_RPTR_ADDR_HI, upper_32_bits(rptr_addr) &
+ CP_RB1_RPTR_ADDR_HI__RB_RPTR_ADDR_HI_MASK);
+ wptr_gpu_addr = ring->wptr_gpu_addr;
+ WREG32_SOC15(GC, 0, regCP_RB_WPTR_POLL_ADDR_LO,
+ lower_32_bits(wptr_gpu_addr));
+ WREG32_SOC15(GC, 0, regCP_RB_WPTR_POLL_ADDR_HI,
+ upper_32_bits(wptr_gpu_addr));
+
+ mdelay(1);
+ WREG32_SOC15(GC, 0, regCP_RB1_CNTL, tmp);
+
+ rb_addr = ring->gpu_addr >> 8;
+ WREG32_SOC15(GC, 0, regCP_RB1_BASE, rb_addr);
+ WREG32_SOC15(GC, 0, regCP_RB1_BASE_HI, upper_32_bits(rb_addr));
+ WREG32_SOC15(GC, 0, regCP_RB1_ACTIVE, 1);
+
+ gfx_v11_0_cp_gfx_set_doorbell(adev, ring);
+ mutex_unlock(&adev->srbm_mutex);
+ }
+ /* Switch to pipe 0 */
+ mutex_lock(&adev->srbm_mutex);
+ gfx_v11_0_cp_gfx_switch_pipe(adev, PIPE_ID0);
+ mutex_unlock(&adev->srbm_mutex);
+
+ /* start the ring */
+ gfx_v11_0_cp_gfx_start(adev);
+
+ return 0;
+}
+
+static void gfx_v11_0_cp_compute_enable(struct amdgpu_device *adev, bool enable)
+{
+ u32 data;
+
+ if (adev->gfx.rs64_enable) {
+ data = RREG32_SOC15(GC, 0, regCP_MEC_RS64_CNTL);
+ data = REG_SET_FIELD(data, CP_MEC_RS64_CNTL, MEC_INVALIDATE_ICACHE,
+ enable ? 0 : 1);
+ data = REG_SET_FIELD(data, CP_MEC_RS64_CNTL, MEC_PIPE0_RESET,
+ enable ? 0 : 1);
+ data = REG_SET_FIELD(data, CP_MEC_RS64_CNTL, MEC_PIPE1_RESET,
+ enable ? 0 : 1);
+ data = REG_SET_FIELD(data, CP_MEC_RS64_CNTL, MEC_PIPE2_RESET,
+ enable ? 0 : 1);
+ data = REG_SET_FIELD(data, CP_MEC_RS64_CNTL, MEC_PIPE3_RESET,
+ enable ? 0 : 1);
+ data = REG_SET_FIELD(data, CP_MEC_RS64_CNTL, MEC_PIPE0_ACTIVE,
+ enable ? 1 : 0);
+ data = REG_SET_FIELD(data, CP_MEC_RS64_CNTL, MEC_PIPE1_ACTIVE,
+ enable ? 1 : 0);
+ data = REG_SET_FIELD(data, CP_MEC_RS64_CNTL, MEC_PIPE2_ACTIVE,
+ enable ? 1 : 0);
+ data = REG_SET_FIELD(data, CP_MEC_RS64_CNTL, MEC_PIPE3_ACTIVE,
+ enable ? 1 : 0);
+ data = REG_SET_FIELD(data, CP_MEC_RS64_CNTL, MEC_HALT,
+ enable ? 0 : 1);
+ WREG32_SOC15(GC, 0, regCP_MEC_RS64_CNTL, data);
+ } else {
+ data = RREG32_SOC15(GC, 0, regCP_MEC_CNTL);
+
+ if (enable) {
+ data = REG_SET_FIELD(data, CP_MEC_CNTL, MEC_ME1_HALT, 0);
+ if (!adev->enable_mes_kiq)
+ data = REG_SET_FIELD(data, CP_MEC_CNTL,
+ MEC_ME2_HALT, 0);
+ } else {
+ data = REG_SET_FIELD(data, CP_MEC_CNTL, MEC_ME1_HALT, 1);
+ data = REG_SET_FIELD(data, CP_MEC_CNTL, MEC_ME2_HALT, 1);
+ }
+ WREG32_SOC15(GC, 0, regCP_MEC_CNTL, data);
+ }
+
+ udelay(50);
+}
+
+static int gfx_v11_0_cp_compute_load_microcode(struct amdgpu_device *adev)
+{
+ const struct gfx_firmware_header_v1_0 *mec_hdr;
+ const __le32 *fw_data;
+ unsigned i, fw_size;
+ u32 *fw = NULL;
+ int r;
+
+ if (!adev->gfx.mec_fw)
+ return -EINVAL;
+
+ gfx_v11_0_cp_compute_enable(adev, false);
+
+ mec_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
+ amdgpu_ucode_print_gfx_hdr(&mec_hdr->header);
+
+ fw_data = (const __le32 *)
+ (adev->gfx.mec_fw->data +
+ le32_to_cpu(mec_hdr->header.ucode_array_offset_bytes));
+ fw_size = le32_to_cpu(mec_hdr->header.ucode_size_bytes);
+
+ r = amdgpu_bo_create_reserved(adev, mec_hdr->header.ucode_size_bytes,
+ PAGE_SIZE, AMDGPU_GEM_DOMAIN_GTT,
+ &adev->gfx.mec.mec_fw_obj,
+ &adev->gfx.mec.mec_fw_gpu_addr,
+ (void **)&fw);
+ if (r) {
+ dev_err(adev->dev, "(%d) failed to create mec fw bo\n", r);
+ gfx_v11_0_mec_fini(adev);
+ return r;
+ }
+
+ memcpy(fw, fw_data, fw_size);
+
+ amdgpu_bo_kunmap(adev->gfx.mec.mec_fw_obj);
+ amdgpu_bo_unreserve(adev->gfx.mec.mec_fw_obj);
+
+ gfx_v11_0_config_mec_cache(adev, adev->gfx.mec.mec_fw_gpu_addr);
+
+ /* MEC1 */
+ WREG32_SOC15(GC, 0, regCP_MEC_ME1_UCODE_ADDR, 0);
+
+ for (i = 0; i < mec_hdr->jt_size; i++)
+ WREG32_SOC15(GC, 0, regCP_MEC_ME1_UCODE_DATA,
+ le32_to_cpup(fw_data + mec_hdr->jt_offset + i));
+
+ WREG32_SOC15(GC, 0, regCP_MEC_ME1_UCODE_ADDR, adev->gfx.mec_fw_version);
+
+ return 0;
+}
+
+static int gfx_v11_0_cp_compute_load_microcode_rs64(struct amdgpu_device *adev)
+{
+ const struct gfx_firmware_header_v2_0 *mec_hdr;
+ const __le32 *fw_ucode, *fw_data;
+ u32 tmp, fw_ucode_size, fw_data_size;
+ u32 i, usec_timeout = 50000; /* Wait for 50 ms */
+ u32 *fw_ucode_ptr, *fw_data_ptr;
+ int r;
+
+ if (!adev->gfx.mec_fw)
+ return -EINVAL;
+
+ gfx_v11_0_cp_compute_enable(adev, false);
+
+ mec_hdr = (const struct gfx_firmware_header_v2_0 *)adev->gfx.mec_fw->data;
+ amdgpu_ucode_print_gfx_hdr(&mec_hdr->header);
+
+ fw_ucode = (const __le32 *) (adev->gfx.mec_fw->data +
+ le32_to_cpu(mec_hdr->ucode_offset_bytes));
+ fw_ucode_size = le32_to_cpu(mec_hdr->ucode_size_bytes);
+
+ fw_data = (const __le32 *) (adev->gfx.mec_fw->data +
+ le32_to_cpu(mec_hdr->data_offset_bytes));
+ fw_data_size = le32_to_cpu(mec_hdr->data_size_bytes);
+
+ r = amdgpu_bo_create_reserved(adev, fw_ucode_size,
+ 64 * 1024,
+ AMDGPU_GEM_DOMAIN_VRAM |
+ AMDGPU_GEM_DOMAIN_GTT,
+ &adev->gfx.mec.mec_fw_obj,
+ &adev->gfx.mec.mec_fw_gpu_addr,
+ (void **)&fw_ucode_ptr);
+ if (r) {
+ dev_err(adev->dev, "(%d) failed to create mec fw ucode bo\n", r);
+ gfx_v11_0_mec_fini(adev);
+ return r;
+ }
+
+ r = amdgpu_bo_create_reserved(adev, fw_data_size,
+ 64 * 1024,
+ AMDGPU_GEM_DOMAIN_VRAM |
+ AMDGPU_GEM_DOMAIN_GTT,
+ &adev->gfx.mec.mec_fw_data_obj,
+ &adev->gfx.mec.mec_fw_data_gpu_addr,
+ (void **)&fw_data_ptr);
+ if (r) {
+ dev_err(adev->dev, "(%d) failed to create mec fw ucode bo\n", r);
+ gfx_v11_0_mec_fini(adev);
+ return r;
+ }
+
+ memcpy(fw_ucode_ptr, fw_ucode, fw_ucode_size);
+ memcpy(fw_data_ptr, fw_data, fw_data_size);
+
+ amdgpu_bo_kunmap(adev->gfx.mec.mec_fw_obj);
+ amdgpu_bo_kunmap(adev->gfx.mec.mec_fw_data_obj);
+ amdgpu_bo_unreserve(adev->gfx.mec.mec_fw_obj);
+ amdgpu_bo_unreserve(adev->gfx.mec.mec_fw_data_obj);
+
+ tmp = RREG32_SOC15(GC, 0, regCP_CPC_IC_BASE_CNTL);
+ tmp = REG_SET_FIELD(tmp, CP_CPC_IC_BASE_CNTL, VMID, 0);
+ tmp = REG_SET_FIELD(tmp, CP_CPC_IC_BASE_CNTL, EXE_DISABLE, 0);
+ tmp = REG_SET_FIELD(tmp, CP_CPC_IC_BASE_CNTL, CACHE_POLICY, 0);
+ WREG32_SOC15(GC, 0, regCP_CPC_IC_BASE_CNTL, tmp);
+
+ tmp = RREG32_SOC15(GC, 0, regCP_MEC_DC_BASE_CNTL);
+ tmp = REG_SET_FIELD(tmp, CP_MEC_DC_BASE_CNTL, VMID, 0);
+ tmp = REG_SET_FIELD(tmp, CP_MEC_DC_BASE_CNTL, CACHE_POLICY, 0);
+ WREG32_SOC15(GC, 0, regCP_MEC_DC_BASE_CNTL, tmp);
+
+ mutex_lock(&adev->srbm_mutex);
+ for (i = 0; i < adev->gfx.mec.num_pipe_per_mec; i++) {
+ soc21_grbm_select(adev, 1, i, 0, 0);
+
+ WREG32_SOC15(GC, 0, regCP_MEC_MDBASE_LO, adev->gfx.mec.mec_fw_data_gpu_addr);
+ WREG32_SOC15(GC, 0, regCP_MEC_MDBASE_HI,
+ upper_32_bits(adev->gfx.mec.mec_fw_data_gpu_addr));
+
+ WREG32_SOC15(GC, 0, regCP_MEC_RS64_PRGRM_CNTR_START,
+ mec_hdr->ucode_start_addr_lo >> 2 |
+ mec_hdr->ucode_start_addr_hi << 30);
+ WREG32_SOC15(GC, 0, regCP_MEC_RS64_PRGRM_CNTR_START_HI,
+ mec_hdr->ucode_start_addr_hi >> 2);
+
+ WREG32_SOC15(GC, 0, regCP_CPC_IC_BASE_LO, adev->gfx.mec.mec_fw_gpu_addr);
+ WREG32_SOC15(GC, 0, regCP_CPC_IC_BASE_HI,
+ upper_32_bits(adev->gfx.mec.mec_fw_gpu_addr));
+ }
+ mutex_unlock(&adev->srbm_mutex);
+ soc21_grbm_select(adev, 0, 0, 0, 0);
+
+ /* Trigger an invalidation of the L1 instruction caches */
+ tmp = RREG32_SOC15(GC, 0, regCP_MEC_DC_OP_CNTL);
+ tmp = REG_SET_FIELD(tmp, CP_MEC_DC_OP_CNTL, INVALIDATE_DCACHE, 1);
+ WREG32_SOC15(GC, 0, regCP_MEC_DC_OP_CNTL, tmp);
+
+ /* Wait for invalidation complete */
+ for (i = 0; i < usec_timeout; i++) {
+ tmp = RREG32_SOC15(GC, 0, regCP_MEC_DC_OP_CNTL);
+ if (1 == REG_GET_FIELD(tmp, CP_MEC_DC_OP_CNTL,
+ INVALIDATE_DCACHE_COMPLETE))
+ break;
+ udelay(1);
+ }
+
+ if (i >= usec_timeout) {
+ dev_err(adev->dev, "failed to invalidate instruction cache\n");
+ return -EINVAL;
+ }
+
+ /* Trigger an invalidation of the L1 instruction caches */
+ tmp = RREG32_SOC15(GC, 0, regCP_CPC_IC_OP_CNTL);
+ tmp = REG_SET_FIELD(tmp, CP_CPC_IC_OP_CNTL, INVALIDATE_CACHE, 1);
+ WREG32_SOC15(GC, 0, regCP_CPC_IC_OP_CNTL, tmp);
+
+ /* Wait for invalidation complete */
+ for (i = 0; i < usec_timeout; i++) {
+ tmp = RREG32_SOC15(GC, 0, regCP_CPC_IC_OP_CNTL);
+ if (1 == REG_GET_FIELD(tmp, CP_CPC_IC_OP_CNTL,
+ INVALIDATE_CACHE_COMPLETE))
+ break;
+ udelay(1);
+ }
+
+ if (i >= usec_timeout) {
+ dev_err(adev->dev, "failed to invalidate instruction cache\n");
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+static void gfx_v11_0_kiq_setting(struct amdgpu_ring *ring)
+{
+ uint32_t tmp;
+ struct amdgpu_device *adev = ring->adev;
+
+ /* tell RLC which is KIQ queue */
+ tmp = RREG32_SOC15(GC, 0, regRLC_CP_SCHEDULERS);
+ tmp &= 0xffffff00;
+ tmp |= (ring->me << 5) | (ring->pipe << 3) | (ring->queue);
+ WREG32_SOC15(GC, 0, regRLC_CP_SCHEDULERS, tmp | 0x80);
+}
+
+static void gfx_v11_0_cp_set_doorbell_range(struct amdgpu_device *adev)
+{
+ /* set graphics engine doorbell range */
+ WREG32_SOC15(GC, 0, regCP_RB_DOORBELL_RANGE_LOWER,
+ (adev->doorbell_index.gfx_ring0 * 2) << 2);
+ WREG32_SOC15(GC, 0, regCP_RB_DOORBELL_RANGE_UPPER,
+ (adev->doorbell_index.gfx_userqueue_end * 2) << 2);
+
+ /* set compute engine doorbell range */
+ WREG32_SOC15(GC, 0, regCP_MEC_DOORBELL_RANGE_LOWER,
+ (adev->doorbell_index.kiq * 2) << 2);
+ WREG32_SOC15(GC, 0, regCP_MEC_DOORBELL_RANGE_UPPER,
+ (adev->doorbell_index.userqueue_end * 2) << 2);
+}
+
+static void gfx_v11_0_gfx_mqd_set_priority(struct amdgpu_device *adev,
+ struct v11_gfx_mqd *mqd,
+ struct amdgpu_mqd_prop *prop)
+{
+ bool priority = 0;
+ u32 tmp;
+
+ /* set up default queue priority level
+ * 0x0 = low priority, 0x1 = high priority
+ */
+ if (prop->hqd_pipe_priority == AMDGPU_GFX_PIPE_PRIO_HIGH)
+ priority = 1;
+
+ tmp = regCP_GFX_HQD_QUEUE_PRIORITY_DEFAULT;
+ tmp = REG_SET_FIELD(tmp, CP_GFX_HQD_QUEUE_PRIORITY, PRIORITY_LEVEL, priority);
+ mqd->cp_gfx_hqd_queue_priority = tmp;
+}
+
+static int gfx_v11_0_gfx_mqd_init(struct amdgpu_device *adev, void *m,
+ struct amdgpu_mqd_prop *prop)
+{
+ struct v11_gfx_mqd *mqd = m;
+ uint64_t hqd_gpu_addr, wb_gpu_addr;
+ uint32_t tmp;
+ uint32_t rb_bufsz;
+
+ /* set up gfx hqd wptr */
+ mqd->cp_gfx_hqd_wptr = 0;
+ mqd->cp_gfx_hqd_wptr_hi = 0;
+
+ /* set the pointer to the MQD */
+ mqd->cp_mqd_base_addr = prop->mqd_gpu_addr & 0xfffffffc;
+ mqd->cp_mqd_base_addr_hi = upper_32_bits(prop->mqd_gpu_addr);
+
+ /* set up mqd control */
+ tmp = regCP_GFX_MQD_CONTROL_DEFAULT;
+ tmp = REG_SET_FIELD(tmp, CP_GFX_MQD_CONTROL, VMID, 0);
+ tmp = REG_SET_FIELD(tmp, CP_GFX_MQD_CONTROL, PRIV_STATE, 1);
+ tmp = REG_SET_FIELD(tmp, CP_GFX_MQD_CONTROL, CACHE_POLICY, 0);
+ mqd->cp_gfx_mqd_control = tmp;
+
+ /* set up gfx_hqd_vimd with 0x0 to indicate the ring buffer's vmid */
+ tmp = regCP_GFX_HQD_VMID_DEFAULT;
+ tmp = REG_SET_FIELD(tmp, CP_GFX_HQD_VMID, VMID, 0);
+ mqd->cp_gfx_hqd_vmid = 0;
+
+ /* set up gfx queue priority */
+ gfx_v11_0_gfx_mqd_set_priority(adev, mqd, prop);
+
+ /* set up time quantum */
+ tmp = regCP_GFX_HQD_QUANTUM_DEFAULT;
+ tmp = REG_SET_FIELD(tmp, CP_GFX_HQD_QUANTUM, QUANTUM_EN, 1);
+ mqd->cp_gfx_hqd_quantum = tmp;
+
+ /* set up gfx hqd base. this is similar as CP_RB_BASE */
+ hqd_gpu_addr = prop->hqd_base_gpu_addr >> 8;
+ mqd->cp_gfx_hqd_base = hqd_gpu_addr;
+ mqd->cp_gfx_hqd_base_hi = upper_32_bits(hqd_gpu_addr);
+
+ /* set up hqd_rptr_addr/_hi, similar as CP_RB_RPTR */
+ wb_gpu_addr = prop->rptr_gpu_addr;
+ mqd->cp_gfx_hqd_rptr_addr = wb_gpu_addr & 0xfffffffc;
+ mqd->cp_gfx_hqd_rptr_addr_hi =
+ upper_32_bits(wb_gpu_addr) & 0xffff;
+
+ /* set up rb_wptr_poll addr */
+ wb_gpu_addr = prop->wptr_gpu_addr;
+ mqd->cp_rb_wptr_poll_addr_lo = wb_gpu_addr & 0xfffffffc;
+ mqd->cp_rb_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff;
+
+ /* set up the gfx_hqd_control, similar as CP_RB0_CNTL */
+ rb_bufsz = order_base_2(prop->queue_size / 4) - 1;
+ tmp = regCP_GFX_HQD_CNTL_DEFAULT;
+ tmp = REG_SET_FIELD(tmp, CP_GFX_HQD_CNTL, RB_BUFSZ, rb_bufsz);
+ tmp = REG_SET_FIELD(tmp, CP_GFX_HQD_CNTL, RB_BLKSZ, rb_bufsz - 2);
+#ifdef __BIG_ENDIAN
+ tmp = REG_SET_FIELD(tmp, CP_GFX_HQD_CNTL, BUF_SWAP, 1);
+#endif
+ if (prop->tmz_queue)
+ tmp = REG_SET_FIELD(tmp, CP_GFX_HQD_CNTL, TMZ_MATCH, 1);
+ if (!prop->kernel_queue)
+ tmp = REG_SET_FIELD(tmp, CP_GFX_HQD_CNTL, RB_NON_PRIV, 1);
+ mqd->cp_gfx_hqd_cntl = tmp;
+
+ /* set up cp_doorbell_control */
+ tmp = regCP_RB_DOORBELL_CONTROL_DEFAULT;
+ if (prop->use_doorbell) {
+ tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
+ DOORBELL_OFFSET, prop->doorbell_index);
+ tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
+ DOORBELL_EN, 1);
+ } else
+ tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
+ DOORBELL_EN, 0);
+ mqd->cp_rb_doorbell_control = tmp;
+
+ /* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */
+ mqd->cp_gfx_hqd_rptr = regCP_GFX_HQD_RPTR_DEFAULT;
+
+ /* active the queue */
+ mqd->cp_gfx_hqd_active = 1;
+
+ /* set gfx UQ items */
+ mqd->shadow_base_lo = lower_32_bits(prop->shadow_addr);
+ mqd->shadow_base_hi = upper_32_bits(prop->shadow_addr);
+ mqd->gds_bkup_base_lo = lower_32_bits(prop->gds_bkup_addr);
+ mqd->gds_bkup_base_hi = upper_32_bits(prop->gds_bkup_addr);
+ mqd->fw_work_area_base_lo = lower_32_bits(prop->csa_addr);
+ mqd->fw_work_area_base_hi = upper_32_bits(prop->csa_addr);
+ mqd->fence_address_lo = lower_32_bits(prop->fence_address);
+ mqd->fence_address_hi = upper_32_bits(prop->fence_address);
+
+ return 0;
+}
+
+static int gfx_v11_0_kgq_init_queue(struct amdgpu_ring *ring, bool reset)
+{
+ struct amdgpu_device *adev = ring->adev;
+ struct v11_gfx_mqd *mqd = ring->mqd_ptr;
+ int mqd_idx = ring - &adev->gfx.gfx_ring[0];
+
+ if (!reset && !amdgpu_in_reset(adev) && !adev->in_suspend) {
+ memset((void *)mqd, 0, sizeof(*mqd));
+ mutex_lock(&adev->srbm_mutex);
+ soc21_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
+ amdgpu_ring_init_mqd(ring);
+ soc21_grbm_select(adev, 0, 0, 0, 0);
+ mutex_unlock(&adev->srbm_mutex);
+ if (adev->gfx.me.mqd_backup[mqd_idx])
+ memcpy_fromio(adev->gfx.me.mqd_backup[mqd_idx], mqd, sizeof(*mqd));
+ } else {
+ /* restore mqd with the backup copy */
+ if (adev->gfx.me.mqd_backup[mqd_idx])
+ memcpy_toio(mqd, adev->gfx.me.mqd_backup[mqd_idx], sizeof(*mqd));
+ /* reset the ring */
+ ring->wptr = 0;
+ *ring->wptr_cpu_addr = 0;
+ amdgpu_ring_clear_ring(ring);
+ }
+
+ return 0;
+}
+
+static int gfx_v11_0_cp_async_gfx_ring_resume(struct amdgpu_device *adev)
+{
+ int r, i;
+
+ for (i = 0; i < adev->gfx.num_gfx_rings; i++) {
+ r = gfx_v11_0_kgq_init_queue(&adev->gfx.gfx_ring[i], false);
+ if (r)
+ return r;
+ }
+
+ r = amdgpu_gfx_enable_kgq(adev, 0);
+ if (r)
+ return r;
+
+ return gfx_v11_0_cp_gfx_start(adev);
+}
+
+static int gfx_v11_0_compute_mqd_init(struct amdgpu_device *adev, void *m,
+ struct amdgpu_mqd_prop *prop)
+{
+ struct v11_compute_mqd *mqd = m;
+ uint64_t hqd_gpu_addr, wb_gpu_addr, eop_base_addr;
+ uint32_t tmp;
+
+ mqd->header = 0xC0310800;
+ mqd->compute_pipelinestat_enable = 0x00000001;
+ mqd->compute_static_thread_mgmt_se0 = 0xffffffff;
+ mqd->compute_static_thread_mgmt_se1 = 0xffffffff;
+ mqd->compute_static_thread_mgmt_se2 = 0xffffffff;
+ mqd->compute_static_thread_mgmt_se3 = 0xffffffff;
+ mqd->compute_misc_reserved = 0x00000007;
+
+ eop_base_addr = prop->eop_gpu_addr >> 8;
+ mqd->cp_hqd_eop_base_addr_lo = eop_base_addr;
+ mqd->cp_hqd_eop_base_addr_hi = upper_32_bits(eop_base_addr);
+
+ /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
+ tmp = regCP_HQD_EOP_CONTROL_DEFAULT;
+ tmp = REG_SET_FIELD(tmp, CP_HQD_EOP_CONTROL, EOP_SIZE,
+ (order_base_2(GFX11_MEC_HPD_SIZE / 4) - 1));
+
+ mqd->cp_hqd_eop_control = tmp;
+
+ /* enable doorbell? */
+ tmp = regCP_HQD_PQ_DOORBELL_CONTROL_DEFAULT;
+
+ if (prop->use_doorbell) {
+ tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
+ DOORBELL_OFFSET, prop->doorbell_index);
+ tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
+ DOORBELL_EN, 1);
+ tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
+ DOORBELL_SOURCE, 0);
+ tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
+ DOORBELL_HIT, 0);
+ } else {
+ tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
+ DOORBELL_EN, 0);
+ }
+
+ mqd->cp_hqd_pq_doorbell_control = tmp;
+
+ /* disable the queue if it's active */
+ mqd->cp_hqd_dequeue_request = 0;
+ mqd->cp_hqd_pq_rptr = 0;
+ mqd->cp_hqd_pq_wptr_lo = 0;
+ mqd->cp_hqd_pq_wptr_hi = 0;
+
+ /* set the pointer to the MQD */
+ mqd->cp_mqd_base_addr_lo = prop->mqd_gpu_addr & 0xfffffffc;
+ mqd->cp_mqd_base_addr_hi = upper_32_bits(prop->mqd_gpu_addr);
+
+ /* set MQD vmid to 0 */
+ tmp = regCP_MQD_CONTROL_DEFAULT;
+ tmp = REG_SET_FIELD(tmp, CP_MQD_CONTROL, VMID, 0);
+ mqd->cp_mqd_control = tmp;
+
+ /* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
+ hqd_gpu_addr = prop->hqd_base_gpu_addr >> 8;
+ mqd->cp_hqd_pq_base_lo = hqd_gpu_addr;
+ mqd->cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr);
+
+ /* set up the HQD, this is similar to CP_RB0_CNTL */
+ tmp = regCP_HQD_PQ_CONTROL_DEFAULT;
+ tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, QUEUE_SIZE,
+ (order_base_2(prop->queue_size / 4) - 1));
+ tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, RPTR_BLOCK_SIZE,
+ (order_base_2(AMDGPU_GPU_PAGE_SIZE / 4) - 1));
+ tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, UNORD_DISPATCH, 1);
+ tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, TUNNEL_DISPATCH,
+ prop->allow_tunneling);
+ if (prop->kernel_queue) {
+ tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, PRIV_STATE, 1);
+ tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, KMD_QUEUE, 1);
+ }
+ if (prop->tmz_queue)
+ tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, TMZ, 1);
+ mqd->cp_hqd_pq_control = tmp;
+
+ /* set the wb address whether it's enabled or not */
+ wb_gpu_addr = prop->rptr_gpu_addr;
+ mqd->cp_hqd_pq_rptr_report_addr_lo = wb_gpu_addr & 0xfffffffc;
+ mqd->cp_hqd_pq_rptr_report_addr_hi =
+ upper_32_bits(wb_gpu_addr) & 0xffff;
+
+ /* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */
+ wb_gpu_addr = prop->wptr_gpu_addr;
+ mqd->cp_hqd_pq_wptr_poll_addr_lo = wb_gpu_addr & 0xfffffffc;
+ mqd->cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff;
+
+ tmp = 0;
+ /* enable the doorbell if requested */
+ if (prop->use_doorbell) {
+ tmp = regCP_HQD_PQ_DOORBELL_CONTROL_DEFAULT;
+ tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
+ DOORBELL_OFFSET, prop->doorbell_index);
+
+ tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
+ DOORBELL_EN, 1);
+ tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
+ DOORBELL_SOURCE, 0);
+ tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
+ DOORBELL_HIT, 0);
+ }
+
+ mqd->cp_hqd_pq_doorbell_control = tmp;
+
+ /* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */
+ mqd->cp_hqd_pq_rptr = regCP_HQD_PQ_RPTR_DEFAULT;
+
+ /* set the vmid for the queue */
+ mqd->cp_hqd_vmid = 0;
+
+ tmp = regCP_HQD_PERSISTENT_STATE_DEFAULT;
+ tmp = REG_SET_FIELD(tmp, CP_HQD_PERSISTENT_STATE, PRELOAD_SIZE, 0x55);
+ mqd->cp_hqd_persistent_state = tmp;
+
+ /* set MIN_IB_AVAIL_SIZE */
+ tmp = regCP_HQD_IB_CONTROL_DEFAULT;
+ tmp = REG_SET_FIELD(tmp, CP_HQD_IB_CONTROL, MIN_IB_AVAIL_SIZE, 3);
+ mqd->cp_hqd_ib_control = tmp;
+
+ /* set static priority for a compute queue/ring */
+ mqd->cp_hqd_pipe_priority = prop->hqd_pipe_priority;
+ mqd->cp_hqd_queue_priority = prop->hqd_queue_priority;
+
+ mqd->cp_hqd_active = prop->hqd_active;
+
+ /* set UQ fenceaddress */
+ mqd->fence_address_lo = lower_32_bits(prop->fence_address);
+ mqd->fence_address_hi = upper_32_bits(prop->fence_address);
+
+ return 0;
+}
+
+static int gfx_v11_0_kiq_init_register(struct amdgpu_ring *ring)
+{
+ struct amdgpu_device *adev = ring->adev;
+ struct v11_compute_mqd *mqd = ring->mqd_ptr;
+ int j;
+
+ /* inactivate the queue */
+ if (amdgpu_sriov_vf(adev))
+ WREG32_SOC15(GC, 0, regCP_HQD_ACTIVE, 0);
+
+ /* disable wptr polling */
+ WREG32_FIELD15_PREREG(GC, 0, CP_PQ_WPTR_POLL_CNTL, EN, 0);
+
+ /* write the EOP addr */
+ WREG32_SOC15(GC, 0, regCP_HQD_EOP_BASE_ADDR,
+ mqd->cp_hqd_eop_base_addr_lo);
+ WREG32_SOC15(GC, 0, regCP_HQD_EOP_BASE_ADDR_HI,
+ mqd->cp_hqd_eop_base_addr_hi);
+
+ /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
+ WREG32_SOC15(GC, 0, regCP_HQD_EOP_CONTROL,
+ mqd->cp_hqd_eop_control);
+
+ /* enable doorbell? */
+ WREG32_SOC15(GC, 0, regCP_HQD_PQ_DOORBELL_CONTROL,
+ mqd->cp_hqd_pq_doorbell_control);
+
+ /* disable the queue if it's active */
+ if (RREG32_SOC15(GC, 0, regCP_HQD_ACTIVE) & 1) {
+ WREG32_SOC15(GC, 0, regCP_HQD_DEQUEUE_REQUEST, 1);
+ for (j = 0; j < adev->usec_timeout; j++) {
+ if (!(RREG32_SOC15(GC, 0, regCP_HQD_ACTIVE) & 1))
+ break;
+ udelay(1);
+ }
+ WREG32_SOC15(GC, 0, regCP_HQD_DEQUEUE_REQUEST,
+ mqd->cp_hqd_dequeue_request);
+ WREG32_SOC15(GC, 0, regCP_HQD_PQ_RPTR,
+ mqd->cp_hqd_pq_rptr);
+ WREG32_SOC15(GC, 0, regCP_HQD_PQ_WPTR_LO,
+ mqd->cp_hqd_pq_wptr_lo);
+ WREG32_SOC15(GC, 0, regCP_HQD_PQ_WPTR_HI,
+ mqd->cp_hqd_pq_wptr_hi);
+ }
+
+ /* set the pointer to the MQD */
+ WREG32_SOC15(GC, 0, regCP_MQD_BASE_ADDR,
+ mqd->cp_mqd_base_addr_lo);
+ WREG32_SOC15(GC, 0, regCP_MQD_BASE_ADDR_HI,
+ mqd->cp_mqd_base_addr_hi);
+
+ /* set MQD vmid to 0 */
+ WREG32_SOC15(GC, 0, regCP_MQD_CONTROL,
+ mqd->cp_mqd_control);
+
+ /* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
+ WREG32_SOC15(GC, 0, regCP_HQD_PQ_BASE,
+ mqd->cp_hqd_pq_base_lo);
+ WREG32_SOC15(GC, 0, regCP_HQD_PQ_BASE_HI,
+ mqd->cp_hqd_pq_base_hi);
+
+ /* set up the HQD, this is similar to CP_RB0_CNTL */
+ WREG32_SOC15(GC, 0, regCP_HQD_PQ_CONTROL,
+ mqd->cp_hqd_pq_control);
+
+ /* set the wb address whether it's enabled or not */
+ WREG32_SOC15(GC, 0, regCP_HQD_PQ_RPTR_REPORT_ADDR,
+ mqd->cp_hqd_pq_rptr_report_addr_lo);
+ WREG32_SOC15(GC, 0, regCP_HQD_PQ_RPTR_REPORT_ADDR_HI,
+ mqd->cp_hqd_pq_rptr_report_addr_hi);
+
+ /* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */
+ WREG32_SOC15(GC, 0, regCP_HQD_PQ_WPTR_POLL_ADDR,
+ mqd->cp_hqd_pq_wptr_poll_addr_lo);
+ WREG32_SOC15(GC, 0, regCP_HQD_PQ_WPTR_POLL_ADDR_HI,
+ mqd->cp_hqd_pq_wptr_poll_addr_hi);
+
+ /* enable the doorbell if requested */
+ if (ring->use_doorbell) {
+ WREG32_SOC15(GC, 0, regCP_MEC_DOORBELL_RANGE_LOWER,
+ (adev->doorbell_index.kiq * 2) << 2);
+ WREG32_SOC15(GC, 0, regCP_MEC_DOORBELL_RANGE_UPPER,
+ (adev->doorbell_index.userqueue_end * 2) << 2);
+ }
+
+ WREG32_SOC15(GC, 0, regCP_HQD_PQ_DOORBELL_CONTROL,
+ mqd->cp_hqd_pq_doorbell_control);
+
+ /* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */
+ WREG32_SOC15(GC, 0, regCP_HQD_PQ_WPTR_LO,
+ mqd->cp_hqd_pq_wptr_lo);
+ WREG32_SOC15(GC, 0, regCP_HQD_PQ_WPTR_HI,
+ mqd->cp_hqd_pq_wptr_hi);
+
+ /* set the vmid for the queue */
+ WREG32_SOC15(GC, 0, regCP_HQD_VMID, mqd->cp_hqd_vmid);
+
+ WREG32_SOC15(GC, 0, regCP_HQD_PERSISTENT_STATE,
+ mqd->cp_hqd_persistent_state);
+
+ /* activate the queue */
+ WREG32_SOC15(GC, 0, regCP_HQD_ACTIVE,
+ mqd->cp_hqd_active);
+
+ if (ring->use_doorbell)
+ WREG32_FIELD15_PREREG(GC, 0, CP_PQ_STATUS, DOORBELL_ENABLE, 1);
+
+ return 0;
+}
+
+static int gfx_v11_0_kiq_init_queue(struct amdgpu_ring *ring)
+{
+ struct amdgpu_device *adev = ring->adev;
+ struct v11_compute_mqd *mqd = ring->mqd_ptr;
+
+ gfx_v11_0_kiq_setting(ring);
+
+ if (amdgpu_in_reset(adev)) { /* for GPU_RESET case */
+ /* reset MQD to a clean status */
+ if (adev->gfx.kiq[0].mqd_backup)
+ memcpy_toio(mqd, adev->gfx.kiq[0].mqd_backup, sizeof(*mqd));
+
+ /* reset ring buffer */
+ ring->wptr = 0;
+ amdgpu_ring_clear_ring(ring);
+
+ mutex_lock(&adev->srbm_mutex);
+ soc21_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
+ gfx_v11_0_kiq_init_register(ring);
+ soc21_grbm_select(adev, 0, 0, 0, 0);
+ mutex_unlock(&adev->srbm_mutex);
+ } else {
+ memset((void *)mqd, 0, sizeof(*mqd));
+ if (amdgpu_sriov_vf(adev) && adev->in_suspend)
+ amdgpu_ring_clear_ring(ring);
+ mutex_lock(&adev->srbm_mutex);
+ soc21_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
+ amdgpu_ring_init_mqd(ring);
+ gfx_v11_0_kiq_init_register(ring);
+ soc21_grbm_select(adev, 0, 0, 0, 0);
+ mutex_unlock(&adev->srbm_mutex);
+
+ if (adev->gfx.kiq[0].mqd_backup)
+ memcpy_fromio(adev->gfx.kiq[0].mqd_backup, mqd, sizeof(*mqd));
+ }
+
+ return 0;
+}
+
+static int gfx_v11_0_kcq_init_queue(struct amdgpu_ring *ring, bool reset)
+{
+ struct amdgpu_device *adev = ring->adev;
+ struct v11_compute_mqd *mqd = ring->mqd_ptr;
+ int mqd_idx = ring - &adev->gfx.compute_ring[0];
+
+ if (!reset && !amdgpu_in_reset(adev) && !adev->in_suspend) {
+ memset((void *)mqd, 0, sizeof(*mqd));
+ mutex_lock(&adev->srbm_mutex);
+ soc21_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
+ amdgpu_ring_init_mqd(ring);
+ soc21_grbm_select(adev, 0, 0, 0, 0);
+ mutex_unlock(&adev->srbm_mutex);
+
+ if (adev->gfx.mec.mqd_backup[mqd_idx])
+ memcpy_fromio(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(*mqd));
+ } else {
+ /* restore MQD to a clean status */
+ if (adev->gfx.mec.mqd_backup[mqd_idx])
+ memcpy_toio(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(*mqd));
+ /* reset ring buffer */
+ ring->wptr = 0;
+ atomic64_set((atomic64_t *)ring->wptr_cpu_addr, 0);
+ amdgpu_ring_clear_ring(ring);
+ }
+
+ return 0;
+}
+
+static int gfx_v11_0_kiq_resume(struct amdgpu_device *adev)
+{
+ gfx_v11_0_kiq_init_queue(&adev->gfx.kiq[0].ring);
+ return 0;
+}
+
+static int gfx_v11_0_kcq_resume(struct amdgpu_device *adev)
+{
+ int i, r;
+
+ if (!amdgpu_async_gfx_ring)
+ gfx_v11_0_cp_compute_enable(adev, true);
+
+ for (i = 0; i < adev->gfx.num_compute_rings; i++) {
+ r = gfx_v11_0_kcq_init_queue(&adev->gfx.compute_ring[i], false);
+ if (r)
+ return r;
+ }
+
+ return amdgpu_gfx_enable_kcq(adev, 0);
+}
+
+static int gfx_v11_0_cp_resume(struct amdgpu_device *adev)
+{
+ int r, i;
+ struct amdgpu_ring *ring;
+
+ if (!(adev->flags & AMD_IS_APU))
+ gfx_v11_0_enable_gui_idle_interrupt(adev, false);
+
+ if (adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT) {
+ /* legacy firmware loading */
+ r = gfx_v11_0_cp_gfx_load_microcode(adev);
+ if (r)
+ return r;
+
+ if (adev->gfx.rs64_enable)
+ r = gfx_v11_0_cp_compute_load_microcode_rs64(adev);
+ else
+ r = gfx_v11_0_cp_compute_load_microcode(adev);
+ if (r)
+ return r;
+ }
+
+ gfx_v11_0_cp_set_doorbell_range(adev);
+
+ if (amdgpu_async_gfx_ring) {
+ gfx_v11_0_cp_compute_enable(adev, true);
+ gfx_v11_0_cp_gfx_enable(adev, true);
+ }
+
+ if (adev->enable_mes_kiq && adev->mes.kiq_hw_init)
+ r = amdgpu_mes_kiq_hw_init(adev);
+ else
+ r = gfx_v11_0_kiq_resume(adev);
+ if (r)
+ return r;
+
+ r = gfx_v11_0_kcq_resume(adev);
+ if (r)
+ return r;
+
+ if (!amdgpu_async_gfx_ring) {
+ r = gfx_v11_0_cp_gfx_resume(adev);
+ if (r)
+ return r;
+ } else {
+ r = gfx_v11_0_cp_async_gfx_ring_resume(adev);
+ if (r)
+ return r;
+ }
+
+ if (adev->gfx.disable_kq) {
+ for (i = 0; i < adev->gfx.num_gfx_rings; i++) {
+ ring = &adev->gfx.gfx_ring[i];
+ /* we don't want to set ring->ready */
+ r = amdgpu_ring_test_ring(ring);
+ if (r)
+ return r;
+ }
+ if (amdgpu_async_gfx_ring)
+ amdgpu_gfx_disable_kgq(adev, 0);
+ } else {
+ for (i = 0; i < adev->gfx.num_gfx_rings; i++) {
+ ring = &adev->gfx.gfx_ring[i];
+ r = amdgpu_ring_test_helper(ring);
+ if (r)
+ return r;
+ }
+ }
+
+ for (i = 0; i < adev->gfx.num_compute_rings; i++) {
+ ring = &adev->gfx.compute_ring[i];
+ r = amdgpu_ring_test_helper(ring);
+ if (r)
+ return r;
+ }
+
+ return 0;
+}
+
+static void gfx_v11_0_cp_enable(struct amdgpu_device *adev, bool enable)
+{
+ gfx_v11_0_cp_gfx_enable(adev, enable);
+ gfx_v11_0_cp_compute_enable(adev, enable);
+}
+
+static int gfx_v11_0_gfxhub_enable(struct amdgpu_device *adev)
+{
+ int r;
+ bool value;
+
+ r = adev->gfxhub.funcs->gart_enable(adev);
+ if (r)
+ return r;
+
+ amdgpu_device_flush_hdp(adev, NULL);
+
+ value = amdgpu_vm_fault_stop != AMDGPU_VM_FAULT_STOP_ALWAYS;
+
+ adev->gfxhub.funcs->set_fault_enable_default(adev, value);
+ /* TODO investigate why this and the hdp flush above is needed,
+ * are we missing a flush somewhere else? */
+ adev->gmc.gmc_funcs->flush_gpu_tlb(adev, 0, AMDGPU_GFXHUB(0), 0);
+
+ return 0;
+}
+
+static void gfx_v11_0_select_cp_fw_arch(struct amdgpu_device *adev)
+{
+ u32 tmp;
+
+ /* select RS64 */
+ if (adev->gfx.rs64_enable) {
+ tmp = RREG32_SOC15(GC, 0, regCP_GFX_CNTL);
+ tmp = REG_SET_FIELD(tmp, CP_GFX_CNTL, ENGINE_SEL, 1);
+ WREG32_SOC15(GC, 0, regCP_GFX_CNTL, tmp);
+
+ tmp = RREG32_SOC15(GC, 0, regCP_MEC_ISA_CNTL);
+ tmp = REG_SET_FIELD(tmp, CP_MEC_ISA_CNTL, ISA_MODE, 1);
+ WREG32_SOC15(GC, 0, regCP_MEC_ISA_CNTL, tmp);
+ }
+
+ if (amdgpu_emu_mode == 1)
+ msleep(100);
+}
+
+static int get_gb_addr_config(struct amdgpu_device * adev)
+{
+ u32 gb_addr_config;
+
+ gb_addr_config = RREG32_SOC15(GC, 0, regGB_ADDR_CONFIG);
+ if (gb_addr_config == 0)
+ return -EINVAL;
+
+ adev->gfx.config.gb_addr_config_fields.num_pkrs =
+ 1 << REG_GET_FIELD(gb_addr_config, GB_ADDR_CONFIG, NUM_PKRS);
+
+ adev->gfx.config.gb_addr_config = gb_addr_config;
+
+ adev->gfx.config.gb_addr_config_fields.num_pipes = 1 <<
+ REG_GET_FIELD(adev->gfx.config.gb_addr_config,
+ GB_ADDR_CONFIG, NUM_PIPES);
+
+ adev->gfx.config.max_tile_pipes =
+ adev->gfx.config.gb_addr_config_fields.num_pipes;
+
+ adev->gfx.config.gb_addr_config_fields.max_compress_frags = 1 <<
+ REG_GET_FIELD(adev->gfx.config.gb_addr_config,
+ GB_ADDR_CONFIG, MAX_COMPRESSED_FRAGS);
+ adev->gfx.config.gb_addr_config_fields.num_rb_per_se = 1 <<
+ REG_GET_FIELD(adev->gfx.config.gb_addr_config,
+ GB_ADDR_CONFIG, NUM_RB_PER_SE);
+ adev->gfx.config.gb_addr_config_fields.num_se = 1 <<
+ REG_GET_FIELD(adev->gfx.config.gb_addr_config,
+ GB_ADDR_CONFIG, NUM_SHADER_ENGINES);
+ adev->gfx.config.gb_addr_config_fields.pipe_interleave_size = 1 << (8 +
+ REG_GET_FIELD(adev->gfx.config.gb_addr_config,
+ GB_ADDR_CONFIG, PIPE_INTERLEAVE_SIZE));
+
+ return 0;
+}
+
+static void gfx_v11_0_disable_gpa_mode(struct amdgpu_device *adev)
+{
+ uint32_t data;
+
+ data = RREG32_SOC15(GC, 0, regCPC_PSP_DEBUG);
+ data |= CPC_PSP_DEBUG__GPA_OVERRIDE_MASK;
+ WREG32_SOC15(GC, 0, regCPC_PSP_DEBUG, data);
+
+ data = RREG32_SOC15(GC, 0, regCPG_PSP_DEBUG);
+ data |= CPG_PSP_DEBUG__GPA_OVERRIDE_MASK;
+ WREG32_SOC15(GC, 0, regCPG_PSP_DEBUG, data);
+}
+
+static int gfx_v11_0_hw_init(struct amdgpu_ip_block *ip_block)
+{
+ int r;
+ struct amdgpu_device *adev = ip_block->adev;
+
+ amdgpu_gfx_cleaner_shader_init(adev, adev->gfx.cleaner_shader_size,
+ adev->gfx.cleaner_shader_ptr);
+
+ if (adev->firmware.load_type == AMDGPU_FW_LOAD_RLC_BACKDOOR_AUTO) {
+ if (adev->gfx.imu.funcs) {
+ /* RLC autoload sequence 1: Program rlc ram */
+ if (adev->gfx.imu.funcs->program_rlc_ram)
+ adev->gfx.imu.funcs->program_rlc_ram(adev);
+ /* rlc autoload firmware */
+ r = gfx_v11_0_rlc_backdoor_autoload_enable(adev);
+ if (r)
+ return r;
+ }
+ } else {
+ if (adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT) {
+ if (adev->gfx.imu.funcs && (amdgpu_dpm > 0)) {
+ if (adev->gfx.imu.funcs->load_microcode)
+ adev->gfx.imu.funcs->load_microcode(adev);
+ if (adev->gfx.imu.funcs->setup_imu)
+ adev->gfx.imu.funcs->setup_imu(adev);
+ if (adev->gfx.imu.funcs->start_imu)
+ adev->gfx.imu.funcs->start_imu(adev);
+ }
+
+ /* disable gpa mode in backdoor loading */
+ gfx_v11_0_disable_gpa_mode(adev);
+ }
+ }
+
+ if ((adev->firmware.load_type == AMDGPU_FW_LOAD_RLC_BACKDOOR_AUTO) ||
+ (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP)) {
+ r = gfx_v11_0_wait_for_rlc_autoload_complete(adev);
+ if (r) {
+ dev_err(adev->dev, "(%d) failed to wait rlc autoload complete\n", r);
+ return r;
+ }
+ }
+
+ adev->gfx.is_poweron = true;
+
+ if(get_gb_addr_config(adev))
+ DRM_WARN("Invalid gb_addr_config !\n");
+
+ if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP &&
+ adev->gfx.rs64_enable)
+ gfx_v11_0_config_gfx_rs64(adev);
+
+ r = gfx_v11_0_gfxhub_enable(adev);
+ if (r)
+ return r;
+
+ if (!amdgpu_emu_mode)
+ gfx_v11_0_init_golden_registers(adev);
+
+ if ((adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT) ||
+ (adev->firmware.load_type == AMDGPU_FW_LOAD_RLC_BACKDOOR_AUTO && amdgpu_dpm == 1)) {
+ /**
+ * For gfx 11, rlc firmware loading relies on smu firmware is
+ * loaded firstly, so in direct type, it has to load smc ucode
+ * here before rlc.
+ */
+ r = amdgpu_pm_load_smu_firmware(adev, NULL);
+ if (r)
+ return r;
+ }
+
+ gfx_v11_0_constants_init(adev);
+
+ if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP)
+ gfx_v11_0_select_cp_fw_arch(adev);
+
+ if (adev->nbio.funcs->gc_doorbell_init)
+ adev->nbio.funcs->gc_doorbell_init(adev);
+
+ r = gfx_v11_0_rlc_resume(adev);
+ if (r)
+ return r;
+
+ /*
+ * init golden registers and rlc resume may override some registers,
+ * reconfig them here
+ */
+ gfx_v11_0_tcp_harvest(adev);
+
+ r = gfx_v11_0_cp_resume(adev);
+ if (r)
+ return r;
+
+ /* get IMU version from HW if it's not set */
+ if (!adev->gfx.imu_fw_version)
+ adev->gfx.imu_fw_version = RREG32_SOC15(GC, 0, regGFX_IMU_SCRATCH_0);
+
+ return r;
+}
+
+static int gfx_v11_0_set_userq_eop_interrupts(struct amdgpu_device *adev,
+ bool enable)
+{
+ unsigned int irq_type;
+ int m, p, r;
+
+ if (adev->userq_funcs[AMDGPU_HW_IP_GFX]) {
+ for (m = 0; m < adev->gfx.me.num_me; m++) {
+ for (p = 0; p < adev->gfx.me.num_pipe_per_me; p++) {
+ irq_type = AMDGPU_CP_IRQ_GFX_ME0_PIPE0_EOP + p;
+ if (enable)
+ r = amdgpu_irq_get(adev, &adev->gfx.eop_irq,
+ irq_type);
+ else
+ r = amdgpu_irq_put(adev, &adev->gfx.eop_irq,
+ irq_type);
+ if (r)
+ return r;
+ }
+ }
+ }
+
+ if (adev->userq_funcs[AMDGPU_HW_IP_COMPUTE]) {
+ for (m = 0; m < adev->gfx.mec.num_mec; ++m) {
+ for (p = 0; p < adev->gfx.mec.num_pipe_per_mec; p++) {
+ irq_type = AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP
+ + (m * adev->gfx.mec.num_pipe_per_mec)
+ + p;
+ if (enable)
+ r = amdgpu_irq_get(adev, &adev->gfx.eop_irq,
+ irq_type);
+ else
+ r = amdgpu_irq_put(adev, &adev->gfx.eop_irq,
+ irq_type);
+ if (r)
+ return r;
+ }
+ }
+ }
+
+ return 0;
+}
+
+static int gfx_v11_0_hw_fini(struct amdgpu_ip_block *ip_block)
+{
+ struct amdgpu_device *adev = ip_block->adev;
+
+ cancel_delayed_work_sync(&adev->gfx.idle_work);
+
+ amdgpu_irq_put(adev, &adev->gfx.priv_reg_irq, 0);
+ amdgpu_irq_put(adev, &adev->gfx.priv_inst_irq, 0);
+ amdgpu_irq_put(adev, &adev->gfx.bad_op_irq, 0);
+ gfx_v11_0_set_userq_eop_interrupts(adev, false);
+
+ if (!adev->no_hw_access) {
+ if (amdgpu_async_gfx_ring &&
+ !adev->gfx.disable_kq) {
+ if (amdgpu_gfx_disable_kgq(adev, 0))
+ DRM_ERROR("KGQ disable failed\n");
+ }
+
+ if (amdgpu_gfx_disable_kcq(adev, 0))
+ DRM_ERROR("KCQ disable failed\n");
+
+ amdgpu_mes_kiq_hw_fini(adev);
+ }
+
+ if (amdgpu_sriov_vf(adev))
+ /* Remove the steps disabling CPG and clearing KIQ position,
+ * so that CP could perform IDLE-SAVE during switch. Those
+ * steps are necessary to avoid a DMAR error in gfx9 but it is
+ * not reproduced on gfx11.
+ */
+ return 0;
+
+ gfx_v11_0_cp_enable(adev, false);
+ gfx_v11_0_enable_gui_idle_interrupt(adev, false);
+
+ adev->gfxhub.funcs->gart_disable(adev);
+
+ adev->gfx.is_poweron = false;
+
+ return 0;
+}
+
+static int gfx_v11_0_suspend(struct amdgpu_ip_block *ip_block)
+{
+ return gfx_v11_0_hw_fini(ip_block);
+}
+
+static int gfx_v11_0_resume(struct amdgpu_ip_block *ip_block)
+{
+ return gfx_v11_0_hw_init(ip_block);
+}
+
+static bool gfx_v11_0_is_idle(struct amdgpu_ip_block *ip_block)
+{
+ struct amdgpu_device *adev = ip_block->adev;
+
+ if (REG_GET_FIELD(RREG32_SOC15(GC, 0, regGRBM_STATUS),
+ GRBM_STATUS, GUI_ACTIVE))
+ return false;
+ else
+ return true;
+}
+
+static int gfx_v11_0_wait_for_idle(struct amdgpu_ip_block *ip_block)
+{
+ unsigned i;
+ u32 tmp;
+ struct amdgpu_device *adev = ip_block->adev;
+
+ for (i = 0; i < adev->usec_timeout; i++) {
+ /* read MC_STATUS */
+ tmp = RREG32_SOC15(GC, 0, regGRBM_STATUS) &
+ GRBM_STATUS__GUI_ACTIVE_MASK;
+
+ if (!REG_GET_FIELD(tmp, GRBM_STATUS, GUI_ACTIVE))
+ return 0;
+ udelay(1);
+ }
+ return -ETIMEDOUT;
+}
+
+int gfx_v11_0_request_gfx_index_mutex(struct amdgpu_device *adev,
+ bool req)
+{
+ u32 i, tmp, val;
+
+ for (i = 0; i < adev->usec_timeout; i++) {
+ /* Request with MeId=2, PipeId=0 */
+ tmp = REG_SET_FIELD(0, CP_GFX_INDEX_MUTEX, REQUEST, req);
+ tmp = REG_SET_FIELD(tmp, CP_GFX_INDEX_MUTEX, CLIENTID, 4);
+ WREG32_SOC15(GC, 0, regCP_GFX_INDEX_MUTEX, tmp);
+
+ val = RREG32_SOC15(GC, 0, regCP_GFX_INDEX_MUTEX);
+ if (req) {
+ if (val == tmp)
+ break;
+ } else {
+ tmp = REG_SET_FIELD(tmp, CP_GFX_INDEX_MUTEX,
+ REQUEST, 1);
+
+ /* unlocked or locked by firmware */
+ if (val != tmp)
+ break;
+ }
+ udelay(1);
+ }
+
+ if (i >= adev->usec_timeout)
+ return -EINVAL;
+
+ return 0;
+}
+
+static int gfx_v11_0_soft_reset(struct amdgpu_ip_block *ip_block)
+{
+ u32 grbm_soft_reset = 0;
+ u32 tmp;
+ int r, i, j, k;
+ struct amdgpu_device *adev = ip_block->adev;
+
+ amdgpu_gfx_rlc_enter_safe_mode(adev, 0);
+
+ tmp = RREG32_SOC15(GC, 0, regCP_INT_CNTL);
+ tmp = REG_SET_FIELD(tmp, CP_INT_CNTL, CMP_BUSY_INT_ENABLE, 0);
+ tmp = REG_SET_FIELD(tmp, CP_INT_CNTL, CNTX_BUSY_INT_ENABLE, 0);
+ tmp = REG_SET_FIELD(tmp, CP_INT_CNTL, CNTX_EMPTY_INT_ENABLE, 0);
+ tmp = REG_SET_FIELD(tmp, CP_INT_CNTL, GFX_IDLE_INT_ENABLE, 0);
+ WREG32_SOC15(GC, 0, regCP_INT_CNTL, tmp);
+
+ mutex_lock(&adev->srbm_mutex);
+ for (i = 0; i < adev->gfx.mec.num_mec; ++i) {
+ for (j = 0; j < adev->gfx.mec.num_queue_per_pipe; j++) {
+ for (k = 0; k < adev->gfx.mec.num_pipe_per_mec; k++) {
+ soc21_grbm_select(adev, i, k, j, 0);
+
+ WREG32_SOC15(GC, 0, regCP_HQD_DEQUEUE_REQUEST, 0x2);
+ WREG32_SOC15(GC, 0, regSPI_COMPUTE_QUEUE_RESET, 0x1);
+ }
+ }
+ }
+ for (i = 0; i < adev->gfx.me.num_me; ++i) {
+ for (j = 0; j < adev->gfx.me.num_queue_per_pipe; j++) {
+ for (k = 0; k < adev->gfx.me.num_pipe_per_me; k++) {
+ soc21_grbm_select(adev, i, k, j, 0);
+
+ WREG32_SOC15(GC, 0, regCP_GFX_HQD_DEQUEUE_REQUEST, 0x1);
+ }
+ }
+ }
+ soc21_grbm_select(adev, 0, 0, 0, 0);
+ mutex_unlock(&adev->srbm_mutex);
+
+ /* Try to acquire the gfx mutex before access to CP_VMID_RESET */
+ mutex_lock(&adev->gfx.reset_sem_mutex);
+ r = gfx_v11_0_request_gfx_index_mutex(adev, true);
+ if (r) {
+ mutex_unlock(&adev->gfx.reset_sem_mutex);
+ DRM_ERROR("Failed to acquire the gfx mutex during soft reset\n");
+ return r;
+ }
+
+ WREG32_SOC15(GC, 0, regCP_VMID_RESET, 0xfffffffe);
+
+ // Read CP_VMID_RESET register three times.
+ // to get sufficient time for GFX_HQD_ACTIVE reach 0
+ RREG32_SOC15(GC, 0, regCP_VMID_RESET);
+ RREG32_SOC15(GC, 0, regCP_VMID_RESET);
+ RREG32_SOC15(GC, 0, regCP_VMID_RESET);
+
+ /* release the gfx mutex */
+ r = gfx_v11_0_request_gfx_index_mutex(adev, false);
+ mutex_unlock(&adev->gfx.reset_sem_mutex);
+ if (r) {
+ DRM_ERROR("Failed to release the gfx mutex during soft reset\n");
+ return r;
+ }
+
+ for (i = 0; i < adev->usec_timeout; i++) {
+ if (!RREG32_SOC15(GC, 0, regCP_HQD_ACTIVE) &&
+ !RREG32_SOC15(GC, 0, regCP_GFX_HQD_ACTIVE))
+ break;
+ udelay(1);
+ }
+ if (i >= adev->usec_timeout) {
+ printk("Failed to wait all pipes clean\n");
+ return -EINVAL;
+ }
+
+ /********** trigger soft reset ***********/
+ grbm_soft_reset = RREG32_SOC15(GC, 0, regGRBM_SOFT_RESET);
+ grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET,
+ SOFT_RESET_CP, 1);
+ grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET,
+ SOFT_RESET_GFX, 1);
+ grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET,
+ SOFT_RESET_CPF, 1);
+ grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET,
+ SOFT_RESET_CPC, 1);
+ grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET,
+ SOFT_RESET_CPG, 1);
+ WREG32_SOC15(GC, 0, regGRBM_SOFT_RESET, grbm_soft_reset);
+ /********** exit soft reset ***********/
+ grbm_soft_reset = RREG32_SOC15(GC, 0, regGRBM_SOFT_RESET);
+ grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET,
+ SOFT_RESET_CP, 0);
+ grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET,
+ SOFT_RESET_GFX, 0);
+ grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET,
+ SOFT_RESET_CPF, 0);
+ grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET,
+ SOFT_RESET_CPC, 0);
+ grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET,
+ SOFT_RESET_CPG, 0);
+ WREG32_SOC15(GC, 0, regGRBM_SOFT_RESET, grbm_soft_reset);
+
+ tmp = RREG32_SOC15(GC, 0, regCP_SOFT_RESET_CNTL);
+ tmp = REG_SET_FIELD(tmp, CP_SOFT_RESET_CNTL, CMP_HQD_REG_RESET, 0x1);
+ WREG32_SOC15(GC, 0, regCP_SOFT_RESET_CNTL, tmp);
+
+ WREG32_SOC15(GC, 0, regCP_ME_CNTL, 0x0);
+ WREG32_SOC15(GC, 0, regCP_MEC_RS64_CNTL, 0x0);
+
+ for (i = 0; i < adev->usec_timeout; i++) {
+ if (!RREG32_SOC15(GC, 0, regCP_VMID_RESET))
+ break;
+ udelay(1);
+ }
+ if (i >= adev->usec_timeout) {
+ printk("Failed to wait CP_VMID_RESET to 0\n");
+ return -EINVAL;
+ }
+
+ tmp = RREG32_SOC15(GC, 0, regCP_INT_CNTL);
+ tmp = REG_SET_FIELD(tmp, CP_INT_CNTL, CMP_BUSY_INT_ENABLE, 1);
+ tmp = REG_SET_FIELD(tmp, CP_INT_CNTL, CNTX_BUSY_INT_ENABLE, 1);
+ tmp = REG_SET_FIELD(tmp, CP_INT_CNTL, CNTX_EMPTY_INT_ENABLE, 1);
+ tmp = REG_SET_FIELD(tmp, CP_INT_CNTL, GFX_IDLE_INT_ENABLE, 1);
+ WREG32_SOC15(GC, 0, regCP_INT_CNTL, tmp);
+
+ amdgpu_gfx_rlc_exit_safe_mode(adev, 0);
+
+ return gfx_v11_0_cp_resume(adev);
+}
+
+static bool gfx_v11_0_check_soft_reset(struct amdgpu_ip_block *ip_block)
+{
+ int i, r;
+ struct amdgpu_device *adev = ip_block->adev;
+ struct amdgpu_ring *ring;
+ long tmo = msecs_to_jiffies(1000);
+
+ for (i = 0; i < adev->gfx.num_gfx_rings; i++) {
+ ring = &adev->gfx.gfx_ring[i];
+ r = amdgpu_ring_test_ib(ring, tmo);
+ if (r)
+ return true;
+ }
+
+ for (i = 0; i < adev->gfx.num_compute_rings; i++) {
+ ring = &adev->gfx.compute_ring[i];
+ r = amdgpu_ring_test_ib(ring, tmo);
+ if (r)
+ return true;
+ }
+
+ return false;
+}
+
+static int gfx_v11_0_post_soft_reset(struct amdgpu_ip_block *ip_block)
+{
+ struct amdgpu_device *adev = ip_block->adev;
+ /**
+ * GFX soft reset will impact MES, need resume MES when do GFX soft reset
+ */
+ return amdgpu_mes_resume(adev);
+}
+
+static uint64_t gfx_v11_0_get_gpu_clock_counter(struct amdgpu_device *adev)
+{
+ uint64_t clock;
+ uint64_t clock_counter_lo, clock_counter_hi_pre, clock_counter_hi_after;
+
+ if (amdgpu_sriov_vf(adev)) {
+ amdgpu_gfx_off_ctrl(adev, false);
+ mutex_lock(&adev->gfx.gpu_clock_mutex);
+ clock_counter_hi_pre = (uint64_t)RREG32_SOC15(GC, 0, regCP_MES_MTIME_HI);
+ clock_counter_lo = (uint64_t)RREG32_SOC15(GC, 0, regCP_MES_MTIME_LO);
+ clock_counter_hi_after = (uint64_t)RREG32_SOC15(GC, 0, regCP_MES_MTIME_HI);
+ if (clock_counter_hi_pre != clock_counter_hi_after)
+ clock_counter_lo = (uint64_t)RREG32_SOC15(GC, 0, regCP_MES_MTIME_LO);
+ mutex_unlock(&adev->gfx.gpu_clock_mutex);
+ amdgpu_gfx_off_ctrl(adev, true);
+ } else {
+ preempt_disable();
+ clock_counter_hi_pre = (uint64_t)RREG32_SOC15(SMUIO, 0, regGOLDEN_TSC_COUNT_UPPER);
+ clock_counter_lo = (uint64_t)RREG32_SOC15(SMUIO, 0, regGOLDEN_TSC_COUNT_LOWER);
+ clock_counter_hi_after = (uint64_t)RREG32_SOC15(SMUIO, 0, regGOLDEN_TSC_COUNT_UPPER);
+ if (clock_counter_hi_pre != clock_counter_hi_after)
+ clock_counter_lo = (uint64_t)RREG32_SOC15(SMUIO, 0, regGOLDEN_TSC_COUNT_LOWER);
+ preempt_enable();
+ }
+ clock = clock_counter_lo | (clock_counter_hi_after << 32ULL);
+
+ return clock;
+}
+
+static void gfx_v11_0_ring_emit_gds_switch(struct amdgpu_ring *ring,
+ uint32_t vmid,
+ uint32_t gds_base, uint32_t gds_size,
+ uint32_t gws_base, uint32_t gws_size,
+ uint32_t oa_base, uint32_t oa_size)
+{
+ struct amdgpu_device *adev = ring->adev;
+
+ /* GDS Base */
+ gfx_v11_0_write_data_to_reg(ring, 0, false,
+ SOC15_REG_OFFSET(GC, 0, regGDS_VMID0_BASE) + 2 * vmid,
+ gds_base);
+
+ /* GDS Size */
+ gfx_v11_0_write_data_to_reg(ring, 0, false,
+ SOC15_REG_OFFSET(GC, 0, regGDS_VMID0_SIZE) + 2 * vmid,
+ gds_size);
+
+ /* GWS */
+ gfx_v11_0_write_data_to_reg(ring, 0, false,
+ SOC15_REG_OFFSET(GC, 0, regGDS_GWS_VMID0) + vmid,
+ gws_size << GDS_GWS_VMID0__SIZE__SHIFT | gws_base);
+
+ /* OA */
+ gfx_v11_0_write_data_to_reg(ring, 0, false,
+ SOC15_REG_OFFSET(GC, 0, regGDS_OA_VMID0) + vmid,
+ (1 << (oa_size + oa_base)) - (1 << oa_base));
+}
+
+static int gfx_v11_0_early_init(struct amdgpu_ip_block *ip_block)
+{
+ struct amdgpu_device *adev = ip_block->adev;
+
+ switch (amdgpu_user_queue) {
+ case -1:
+ case 0:
+ default:
+ adev->gfx.disable_kq = false;
+ adev->gfx.disable_uq = true;
+ break;
+ case 1:
+ adev->gfx.disable_kq = false;
+ adev->gfx.disable_uq = false;
+ break;
+ case 2:
+ adev->gfx.disable_kq = true;
+ adev->gfx.disable_uq = false;
+ break;
+ }
+
+ adev->gfx.funcs = &gfx_v11_0_gfx_funcs;
+
+ if (adev->gfx.disable_kq) {
+ /* We need one GFX ring temporarily to set up
+ * the clear state.
+ */
+ adev->gfx.num_gfx_rings = 1;
+ adev->gfx.num_compute_rings = 0;
+ } else {
+ adev->gfx.num_gfx_rings = GFX11_NUM_GFX_RINGS;
+ adev->gfx.num_compute_rings = min(amdgpu_gfx_get_num_kcq(adev),
+ AMDGPU_MAX_COMPUTE_RINGS);
+ }
+
+ gfx_v11_0_set_kiq_pm4_funcs(adev);
+ gfx_v11_0_set_ring_funcs(adev);
+ gfx_v11_0_set_irq_funcs(adev);
+ gfx_v11_0_set_gds_init(adev);
+ gfx_v11_0_set_rlc_funcs(adev);
+ gfx_v11_0_set_mqd_funcs(adev);
+ gfx_v11_0_set_imu_funcs(adev);
+
+ gfx_v11_0_init_rlcg_reg_access_ctrl(adev);
+
+ return gfx_v11_0_init_microcode(adev);
+}
+
+static int gfx_v11_0_late_init(struct amdgpu_ip_block *ip_block)
+{
+ struct amdgpu_device *adev = ip_block->adev;
+ int r;
+
+ r = amdgpu_irq_get(adev, &adev->gfx.priv_reg_irq, 0);
+ if (r)
+ return r;
+
+ r = amdgpu_irq_get(adev, &adev->gfx.priv_inst_irq, 0);
+ if (r)
+ return r;
+
+ r = amdgpu_irq_get(adev, &adev->gfx.bad_op_irq, 0);
+ if (r)
+ return r;
+
+ r = gfx_v11_0_set_userq_eop_interrupts(adev, true);
+ if (r)
+ return r;
+
+ return 0;
+}
+
+static bool gfx_v11_0_is_rlc_enabled(struct amdgpu_device *adev)
+{
+ uint32_t rlc_cntl;
+
+ /* if RLC is not enabled, do nothing */
+ rlc_cntl = RREG32_SOC15(GC, 0, regRLC_CNTL);
+ return (REG_GET_FIELD(rlc_cntl, RLC_CNTL, RLC_ENABLE_F32)) ? true : false;
+}
+
+static void gfx_v11_0_set_safe_mode(struct amdgpu_device *adev, int xcc_id)
+{
+ uint32_t data;
+ unsigned i;
+
+ data = RLC_SAFE_MODE__CMD_MASK;
+ data |= (1 << RLC_SAFE_MODE__MESSAGE__SHIFT);
+
+ WREG32_SOC15(GC, 0, regRLC_SAFE_MODE, data);
+
+ /* wait for RLC_SAFE_MODE */
+ for (i = 0; i < adev->usec_timeout; i++) {
+ if (!REG_GET_FIELD(RREG32_SOC15(GC, 0, regRLC_SAFE_MODE),
+ RLC_SAFE_MODE, CMD))
+ break;
+ udelay(1);
+ }
+}
+
+static void gfx_v11_0_unset_safe_mode(struct amdgpu_device *adev, int xcc_id)
+{
+ WREG32_SOC15(GC, 0, regRLC_SAFE_MODE, RLC_SAFE_MODE__CMD_MASK);
+}
+
+static void gfx_v11_0_update_perf_clk(struct amdgpu_device *adev,
+ bool enable)
+{
+ uint32_t def, data;
+
+ if (!(adev->cg_flags & AMD_CG_SUPPORT_GFX_PERF_CLK))
+ return;
+
+ def = data = RREG32_SOC15(GC, 0, regRLC_CGTT_MGCG_OVERRIDE);
+
+ if (enable)
+ data &= ~RLC_CGTT_MGCG_OVERRIDE__PERFMON_CLOCK_STATE_MASK;
+ else
+ data |= RLC_CGTT_MGCG_OVERRIDE__PERFMON_CLOCK_STATE_MASK;
+
+ if (def != data)
+ WREG32_SOC15(GC, 0, regRLC_CGTT_MGCG_OVERRIDE, data);
+}
+
+static void gfx_v11_0_update_sram_fgcg(struct amdgpu_device *adev,
+ bool enable)
+{
+ uint32_t def, data;
+
+ if (!(adev->cg_flags & AMD_CG_SUPPORT_GFX_FGCG))
+ return;
+
+ def = data = RREG32_SOC15(GC, 0, regRLC_CGTT_MGCG_OVERRIDE);
+
+ if (enable)
+ data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_FGCG_OVERRIDE_MASK;
+ else
+ data |= RLC_CGTT_MGCG_OVERRIDE__GFXIP_FGCG_OVERRIDE_MASK;
+
+ if (def != data)
+ WREG32_SOC15(GC, 0, regRLC_CGTT_MGCG_OVERRIDE, data);
+}
+
+static void gfx_v11_0_update_repeater_fgcg(struct amdgpu_device *adev,
+ bool enable)
+{
+ uint32_t def, data;
+
+ if (!(adev->cg_flags & AMD_CG_SUPPORT_REPEATER_FGCG))
+ return;
+
+ def = data = RREG32_SOC15(GC, 0, regRLC_CGTT_MGCG_OVERRIDE);
+
+ if (enable)
+ data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_REPEATER_FGCG_OVERRIDE_MASK;
+ else
+ data |= RLC_CGTT_MGCG_OVERRIDE__GFXIP_REPEATER_FGCG_OVERRIDE_MASK;
+
+ if (def != data)
+ WREG32_SOC15(GC, 0, regRLC_CGTT_MGCG_OVERRIDE, data);
+}
+
+static void gfx_v11_0_update_medium_grain_clock_gating(struct amdgpu_device *adev,
+ bool enable)
+{
+ uint32_t data, def;
+
+ if (!(adev->cg_flags & (AMD_CG_SUPPORT_GFX_MGCG | AMD_CG_SUPPORT_GFX_MGLS)))
+ return;
+
+ /* It is disabled by HW by default */
+ if (enable) {
+ if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG) {
+ /* 1 - RLC_CGTT_MGCG_OVERRIDE */
+ def = data = RREG32_SOC15(GC, 0, regRLC_CGTT_MGCG_OVERRIDE);
+
+ data &= ~(RLC_CGTT_MGCG_OVERRIDE__GRBM_CGTT_SCLK_OVERRIDE_MASK |
+ RLC_CGTT_MGCG_OVERRIDE__RLC_CGTT_SCLK_OVERRIDE_MASK |
+ RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGCG_OVERRIDE_MASK);
+
+ if (def != data)
+ WREG32_SOC15(GC, 0, regRLC_CGTT_MGCG_OVERRIDE, data);
+ }
+ } else {
+ if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG) {
+ def = data = RREG32_SOC15(GC, 0, regRLC_CGTT_MGCG_OVERRIDE);
+
+ data |= (RLC_CGTT_MGCG_OVERRIDE__RLC_CGTT_SCLK_OVERRIDE_MASK |
+ RLC_CGTT_MGCG_OVERRIDE__GRBM_CGTT_SCLK_OVERRIDE_MASK |
+ RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGCG_OVERRIDE_MASK);
+
+ if (def != data)
+ WREG32_SOC15(GC, 0, regRLC_CGTT_MGCG_OVERRIDE, data);
+ }
+ }
+}
+
+static void gfx_v11_0_update_coarse_grain_clock_gating(struct amdgpu_device *adev,
+ bool enable)
+{
+ uint32_t def, data;
+
+ if (!(adev->cg_flags &
+ (AMD_CG_SUPPORT_GFX_CGCG |
+ AMD_CG_SUPPORT_GFX_CGLS |
+ AMD_CG_SUPPORT_GFX_3D_CGCG |
+ AMD_CG_SUPPORT_GFX_3D_CGLS)))
+ return;
+
+ if (enable) {
+ def = data = RREG32_SOC15(GC, 0, regRLC_CGTT_MGCG_OVERRIDE);
+
+ /* unset CGCG override */
+ if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG)
+ data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_CGCG_OVERRIDE_MASK;
+ if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS)
+ data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_CGLS_OVERRIDE_MASK;
+ if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGCG ||
+ adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGLS)
+ data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_GFX3D_CG_OVERRIDE_MASK;
+
+ /* update CGCG override bits */
+ if (def != data)
+ WREG32_SOC15(GC, 0, regRLC_CGTT_MGCG_OVERRIDE, data);
+
+ /* enable cgcg FSM(0x0000363F) */
+ def = data = RREG32_SOC15(GC, 0, regRLC_CGCG_CGLS_CTRL);
+
+ if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG) {
+ data &= ~RLC_CGCG_CGLS_CTRL__CGCG_GFX_IDLE_THRESHOLD_MASK;
+ data |= (0x36 << RLC_CGCG_CGLS_CTRL__CGCG_GFX_IDLE_THRESHOLD__SHIFT) |
+ RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK;
+ }
+
+ if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS) {
+ data &= ~RLC_CGCG_CGLS_CTRL__CGLS_REP_COMPANSAT_DELAY_MASK;
+ data |= (0x000F << RLC_CGCG_CGLS_CTRL__CGLS_REP_COMPANSAT_DELAY__SHIFT) |
+ RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK;
+ }
+
+ if (def != data)
+ WREG32_SOC15(GC, 0, regRLC_CGCG_CGLS_CTRL, data);
+
+ /* Program RLC_CGCG_CGLS_CTRL_3D */
+ def = data = RREG32_SOC15(GC, 0, regRLC_CGCG_CGLS_CTRL_3D);
+
+ if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGCG) {
+ data &= ~RLC_CGCG_CGLS_CTRL_3D__CGCG_GFX_IDLE_THRESHOLD_MASK;
+ data |= (0x36 << RLC_CGCG_CGLS_CTRL_3D__CGCG_GFX_IDLE_THRESHOLD__SHIFT) |
+ RLC_CGCG_CGLS_CTRL_3D__CGCG_EN_MASK;
+ }
+
+ if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGLS) {
+ data &= ~RLC_CGCG_CGLS_CTRL_3D__CGLS_REP_COMPANSAT_DELAY_MASK;
+ data |= (0xf << RLC_CGCG_CGLS_CTRL_3D__CGLS_REP_COMPANSAT_DELAY__SHIFT) |
+ RLC_CGCG_CGLS_CTRL_3D__CGLS_EN_MASK;
+ }
+
+ if (def != data)
+ WREG32_SOC15(GC, 0, regRLC_CGCG_CGLS_CTRL_3D, data);
+
+ /* set IDLE_POLL_COUNT(0x00900100) */
+ def = data = RREG32_SOC15(GC, 0, regCP_RB_WPTR_POLL_CNTL);
+
+ data &= ~(CP_RB_WPTR_POLL_CNTL__POLL_FREQUENCY_MASK | CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT_MASK);
+ data |= (0x0100 << CP_RB_WPTR_POLL_CNTL__POLL_FREQUENCY__SHIFT) |
+ (0x0090 << CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT__SHIFT);
+
+ if (def != data)
+ WREG32_SOC15(GC, 0, regCP_RB_WPTR_POLL_CNTL, data);
+
+ data = RREG32_SOC15(GC, 0, regCP_INT_CNTL);
+ data = REG_SET_FIELD(data, CP_INT_CNTL, CNTX_BUSY_INT_ENABLE, 1);
+ data = REG_SET_FIELD(data, CP_INT_CNTL, CNTX_EMPTY_INT_ENABLE, 1);
+ data = REG_SET_FIELD(data, CP_INT_CNTL, CMP_BUSY_INT_ENABLE, 1);
+ data = REG_SET_FIELD(data, CP_INT_CNTL, GFX_IDLE_INT_ENABLE, 1);
+ WREG32_SOC15(GC, 0, regCP_INT_CNTL, data);
+
+ data = RREG32_SOC15(GC, 0, regSDMA0_RLC_CGCG_CTRL);
+ data = REG_SET_FIELD(data, SDMA0_RLC_CGCG_CTRL, CGCG_INT_ENABLE, 1);
+ WREG32_SOC15(GC, 0, regSDMA0_RLC_CGCG_CTRL, data);
+
+ /* Some ASICs only have one SDMA instance, not need to configure SDMA1 */
+ if (adev->sdma.num_instances > 1) {
+ data = RREG32_SOC15(GC, 0, regSDMA1_RLC_CGCG_CTRL);
+ data = REG_SET_FIELD(data, SDMA1_RLC_CGCG_CTRL, CGCG_INT_ENABLE, 1);
+ WREG32_SOC15(GC, 0, regSDMA1_RLC_CGCG_CTRL, data);
+ }
+ } else {
+ /* Program RLC_CGCG_CGLS_CTRL */
+ def = data = RREG32_SOC15(GC, 0, regRLC_CGCG_CGLS_CTRL);
+
+ if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG)
+ data &= ~RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK;
+
+ if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS)
+ data &= ~RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK;
+
+ if (def != data)
+ WREG32_SOC15(GC, 0, regRLC_CGCG_CGLS_CTRL, data);
+
+ /* Program RLC_CGCG_CGLS_CTRL_3D */
+ def = data = RREG32_SOC15(GC, 0, regRLC_CGCG_CGLS_CTRL_3D);
+
+ if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGCG)
+ data &= ~RLC_CGCG_CGLS_CTRL_3D__CGCG_EN_MASK;
+ if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGLS)
+ data &= ~RLC_CGCG_CGLS_CTRL_3D__CGLS_EN_MASK;
+
+ if (def != data)
+ WREG32_SOC15(GC, 0, regRLC_CGCG_CGLS_CTRL_3D, data);
+
+ data = RREG32_SOC15(GC, 0, regSDMA0_RLC_CGCG_CTRL);
+ data &= ~SDMA0_RLC_CGCG_CTRL__CGCG_INT_ENABLE_MASK;
+ WREG32_SOC15(GC, 0, regSDMA0_RLC_CGCG_CTRL, data);
+
+ /* Some ASICs only have one SDMA instance, not need to configure SDMA1 */
+ if (adev->sdma.num_instances > 1) {
+ data = RREG32_SOC15(GC, 0, regSDMA1_RLC_CGCG_CTRL);
+ data &= ~SDMA1_RLC_CGCG_CTRL__CGCG_INT_ENABLE_MASK;
+ WREG32_SOC15(GC, 0, regSDMA1_RLC_CGCG_CTRL, data);
+ }
+ }
+}
+
+static int gfx_v11_0_update_gfx_clock_gating(struct amdgpu_device *adev,
+ bool enable)
+{
+ amdgpu_gfx_rlc_enter_safe_mode(adev, 0);
+
+ gfx_v11_0_update_coarse_grain_clock_gating(adev, enable);
+
+ gfx_v11_0_update_medium_grain_clock_gating(adev, enable);
+
+ gfx_v11_0_update_repeater_fgcg(adev, enable);
+
+ gfx_v11_0_update_sram_fgcg(adev, enable);
+
+ gfx_v11_0_update_perf_clk(adev, enable);
+
+ if (adev->cg_flags &
+ (AMD_CG_SUPPORT_GFX_MGCG |
+ AMD_CG_SUPPORT_GFX_CGLS |
+ AMD_CG_SUPPORT_GFX_CGCG |
+ AMD_CG_SUPPORT_GFX_3D_CGCG |
+ AMD_CG_SUPPORT_GFX_3D_CGLS))
+ gfx_v11_0_enable_gui_idle_interrupt(adev, enable);
+
+ amdgpu_gfx_rlc_exit_safe_mode(adev, 0);
+
+ return 0;
+}
+
+static void gfx_v11_0_update_spm_vmid(struct amdgpu_device *adev, struct amdgpu_ring *ring, unsigned vmid)
+{
+ u32 reg, pre_data, data;
+
+ amdgpu_gfx_off_ctrl(adev, false);
+ reg = SOC15_REG_OFFSET(GC, 0, regRLC_SPM_MC_CNTL);
+ if (amdgpu_sriov_is_pp_one_vf(adev) && !amdgpu_sriov_runtime(adev))
+ pre_data = RREG32_NO_KIQ(reg);
+ else
+ pre_data = RREG32(reg);
+
+ data = pre_data & (~RLC_SPM_MC_CNTL__RLC_SPM_VMID_MASK);
+ data |= (vmid & RLC_SPM_MC_CNTL__RLC_SPM_VMID_MASK) << RLC_SPM_MC_CNTL__RLC_SPM_VMID__SHIFT;
+
+ if (pre_data != data) {
+ if (amdgpu_sriov_is_pp_one_vf(adev) && !amdgpu_sriov_runtime(adev)) {
+ WREG32_SOC15_NO_KIQ(GC, 0, regRLC_SPM_MC_CNTL, data);
+ } else
+ WREG32_SOC15(GC, 0, regRLC_SPM_MC_CNTL, data);
+ }
+ amdgpu_gfx_off_ctrl(adev, true);
+
+ if (ring
+ && amdgpu_sriov_is_pp_one_vf(adev)
+ && (pre_data != data)
+ && ((ring->funcs->type == AMDGPU_RING_TYPE_GFX)
+ || (ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE))) {
+ amdgpu_ring_emit_wreg(ring, reg, data);
+ }
+}
+
+static const struct amdgpu_rlc_funcs gfx_v11_0_rlc_funcs = {
+ .is_rlc_enabled = gfx_v11_0_is_rlc_enabled,
+ .set_safe_mode = gfx_v11_0_set_safe_mode,
+ .unset_safe_mode = gfx_v11_0_unset_safe_mode,
+ .init = gfx_v11_0_rlc_init,
+ .get_csb_size = gfx_v11_0_get_csb_size,
+ .get_csb_buffer = gfx_v11_0_get_csb_buffer,
+ .resume = gfx_v11_0_rlc_resume,
+ .stop = gfx_v11_0_rlc_stop,
+ .reset = gfx_v11_0_rlc_reset,
+ .start = gfx_v11_0_rlc_start,
+ .update_spm_vmid = gfx_v11_0_update_spm_vmid,
+};
+
+static void gfx_v11_cntl_power_gating(struct amdgpu_device *adev, bool enable)
+{
+ u32 data = RREG32_SOC15(GC, 0, regRLC_PG_CNTL);
+
+ if (enable && (adev->pg_flags & AMD_PG_SUPPORT_GFX_PG))
+ data |= RLC_PG_CNTL__GFX_POWER_GATING_ENABLE_MASK;
+ else
+ data &= ~RLC_PG_CNTL__GFX_POWER_GATING_ENABLE_MASK;
+
+ WREG32_SOC15(GC, 0, regRLC_PG_CNTL, data);
+
+ // Program RLC_PG_DELAY3 for CGPG hysteresis
+ if (enable && (adev->pg_flags & AMD_PG_SUPPORT_GFX_PG)) {
+ switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
+ case IP_VERSION(11, 0, 1):
+ case IP_VERSION(11, 0, 4):
+ case IP_VERSION(11, 5, 0):
+ case IP_VERSION(11, 5, 1):
+ case IP_VERSION(11, 5, 2):
+ case IP_VERSION(11, 5, 3):
+ WREG32_SOC15(GC, 0, regRLC_PG_DELAY_3, RLC_PG_DELAY_3_DEFAULT_GC_11_0_1);
+ break;
+ default:
+ break;
+ }
+ }
+}
+
+static void gfx_v11_cntl_pg(struct amdgpu_device *adev, bool enable)
+{
+ amdgpu_gfx_rlc_enter_safe_mode(adev, 0);
+
+ gfx_v11_cntl_power_gating(adev, enable);
+
+ amdgpu_gfx_rlc_exit_safe_mode(adev, 0);
+}
+
+static int gfx_v11_0_set_powergating_state(struct amdgpu_ip_block *ip_block,
+ enum amd_powergating_state state)
+{
+ struct amdgpu_device *adev = ip_block->adev;
+ bool enable = (state == AMD_PG_STATE_GATE);
+
+ if (amdgpu_sriov_vf(adev))
+ return 0;
+
+ switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
+ case IP_VERSION(11, 0, 0):
+ case IP_VERSION(11, 0, 2):
+ case IP_VERSION(11, 0, 3):
+ amdgpu_gfx_off_ctrl(adev, enable);
+ break;
+ case IP_VERSION(11, 0, 1):
+ case IP_VERSION(11, 0, 4):
+ case IP_VERSION(11, 5, 0):
+ case IP_VERSION(11, 5, 1):
+ case IP_VERSION(11, 5, 2):
+ case IP_VERSION(11, 5, 3):
+ if (!enable)
+ amdgpu_gfx_off_ctrl(adev, false);
+
+ gfx_v11_cntl_pg(adev, enable);
+
+ if (enable)
+ amdgpu_gfx_off_ctrl(adev, true);
+
+ break;
+ default:
+ break;
+ }
+
+ return 0;
+}
+
+static int gfx_v11_0_set_clockgating_state(struct amdgpu_ip_block *ip_block,
+ enum amd_clockgating_state state)
+{
+ struct amdgpu_device *adev = ip_block->adev;
+
+ if (amdgpu_sriov_vf(adev))
+ return 0;
+
+ switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
+ case IP_VERSION(11, 0, 0):
+ case IP_VERSION(11, 0, 1):
+ case IP_VERSION(11, 0, 2):
+ case IP_VERSION(11, 0, 3):
+ case IP_VERSION(11, 0, 4):
+ case IP_VERSION(11, 5, 0):
+ case IP_VERSION(11, 5, 1):
+ case IP_VERSION(11, 5, 2):
+ case IP_VERSION(11, 5, 3):
+ gfx_v11_0_update_gfx_clock_gating(adev,
+ state == AMD_CG_STATE_GATE);
+ break;
+ default:
+ break;
+ }
+
+ return 0;
+}
+
+static void gfx_v11_0_get_clockgating_state(struct amdgpu_ip_block *ip_block, u64 *flags)
+{
+ struct amdgpu_device *adev = ip_block->adev;
+ int data;
+
+ /* AMD_CG_SUPPORT_GFX_MGCG */
+ data = RREG32_SOC15(GC, 0, regRLC_CGTT_MGCG_OVERRIDE);
+ if (!(data & RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGCG_OVERRIDE_MASK))
+ *flags |= AMD_CG_SUPPORT_GFX_MGCG;
+
+ /* AMD_CG_SUPPORT_REPEATER_FGCG */
+ if (!(data & RLC_CGTT_MGCG_OVERRIDE__GFXIP_REPEATER_FGCG_OVERRIDE_MASK))
+ *flags |= AMD_CG_SUPPORT_REPEATER_FGCG;
+
+ /* AMD_CG_SUPPORT_GFX_FGCG */
+ if (!(data & RLC_CGTT_MGCG_OVERRIDE__GFXIP_FGCG_OVERRIDE_MASK))
+ *flags |= AMD_CG_SUPPORT_GFX_FGCG;
+
+ /* AMD_CG_SUPPORT_GFX_PERF_CLK */
+ if (!(data & RLC_CGTT_MGCG_OVERRIDE__PERFMON_CLOCK_STATE_MASK))
+ *flags |= AMD_CG_SUPPORT_GFX_PERF_CLK;
+
+ /* AMD_CG_SUPPORT_GFX_CGCG */
+ data = RREG32_SOC15(GC, 0, regRLC_CGCG_CGLS_CTRL);
+ if (data & RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK)
+ *flags |= AMD_CG_SUPPORT_GFX_CGCG;
+
+ /* AMD_CG_SUPPORT_GFX_CGLS */
+ if (data & RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK)
+ *flags |= AMD_CG_SUPPORT_GFX_CGLS;
+
+ /* AMD_CG_SUPPORT_GFX_3D_CGCG */
+ data = RREG32_SOC15(GC, 0, regRLC_CGCG_CGLS_CTRL_3D);
+ if (data & RLC_CGCG_CGLS_CTRL_3D__CGCG_EN_MASK)
+ *flags |= AMD_CG_SUPPORT_GFX_3D_CGCG;
+
+ /* AMD_CG_SUPPORT_GFX_3D_CGLS */
+ if (data & RLC_CGCG_CGLS_CTRL_3D__CGLS_EN_MASK)
+ *flags |= AMD_CG_SUPPORT_GFX_3D_CGLS;
+}
+
+static u64 gfx_v11_0_ring_get_rptr_gfx(struct amdgpu_ring *ring)
+{
+ /* gfx11 is 32bit rptr*/
+ return *(uint32_t *)ring->rptr_cpu_addr;
+}
+
+static u64 gfx_v11_0_ring_get_wptr_gfx(struct amdgpu_ring *ring)
+{
+ struct amdgpu_device *adev = ring->adev;
+ u64 wptr;
+
+ /* XXX check if swapping is necessary on BE */
+ if (ring->use_doorbell) {
+ wptr = atomic64_read((atomic64_t *)ring->wptr_cpu_addr);
+ } else {
+ wptr = RREG32_SOC15(GC, 0, regCP_RB0_WPTR);
+ wptr += (u64)RREG32_SOC15(GC, 0, regCP_RB0_WPTR_HI) << 32;
+ }
+
+ return wptr;
+}
+
+static void gfx_v11_0_ring_set_wptr_gfx(struct amdgpu_ring *ring)
+{
+ struct amdgpu_device *adev = ring->adev;
+
+ if (ring->use_doorbell) {
+ /* XXX check if swapping is necessary on BE */
+ atomic64_set((atomic64_t *)ring->wptr_cpu_addr,
+ ring->wptr);
+ WDOORBELL64(ring->doorbell_index, ring->wptr);
+ } else {
+ WREG32_SOC15(GC, 0, regCP_RB0_WPTR,
+ lower_32_bits(ring->wptr));
+ WREG32_SOC15(GC, 0, regCP_RB0_WPTR_HI,
+ upper_32_bits(ring->wptr));
+ }
+}
+
+static u64 gfx_v11_0_ring_get_rptr_compute(struct amdgpu_ring *ring)
+{
+ /* gfx11 hardware is 32bit rptr */
+ return *(uint32_t *)ring->rptr_cpu_addr;
+}
+
+static u64 gfx_v11_0_ring_get_wptr_compute(struct amdgpu_ring *ring)
+{
+ u64 wptr;
+
+ /* XXX check if swapping is necessary on BE */
+ if (ring->use_doorbell)
+ wptr = atomic64_read((atomic64_t *)ring->wptr_cpu_addr);
+ else
+ BUG();
+ return wptr;
+}
+
+static void gfx_v11_0_ring_set_wptr_compute(struct amdgpu_ring *ring)
+{
+ struct amdgpu_device *adev = ring->adev;
+
+ /* XXX check if swapping is necessary on BE */
+ if (ring->use_doorbell) {
+ atomic64_set((atomic64_t *)ring->wptr_cpu_addr,
+ ring->wptr);
+ WDOORBELL64(ring->doorbell_index, ring->wptr);
+ } else {
+ BUG(); /* only DOORBELL method supported on gfx11 now */
+ }
+}
+
+static void gfx_v11_0_ring_emit_hdp_flush(struct amdgpu_ring *ring)
+{
+ struct amdgpu_device *adev = ring->adev;
+ u32 ref_and_mask, reg_mem_engine;
+ const struct nbio_hdp_flush_reg *nbio_hf_reg = adev->nbio.hdp_flush_reg;
+
+ if (ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE) {
+ switch (ring->me) {
+ case 1:
+ ref_and_mask = nbio_hf_reg->ref_and_mask_cp2 << ring->pipe;
+ break;
+ case 2:
+ ref_and_mask = nbio_hf_reg->ref_and_mask_cp6 << ring->pipe;
+ break;
+ default:
+ return;
+ }
+ reg_mem_engine = 0;
+ } else {
+ ref_and_mask = nbio_hf_reg->ref_and_mask_cp0 << ring->pipe;
+ reg_mem_engine = 1; /* pfp */
+ }
+
+ gfx_v11_0_wait_reg_mem(ring, reg_mem_engine, 0, 1,
+ adev->nbio.funcs->get_hdp_flush_req_offset(adev),
+ adev->nbio.funcs->get_hdp_flush_done_offset(adev),
+ ref_and_mask, ref_and_mask, 0x20);
+}
+
+static void gfx_v11_0_ring_emit_ib_gfx(struct amdgpu_ring *ring,
+ struct amdgpu_job *job,
+ struct amdgpu_ib *ib,
+ uint32_t flags)
+{
+ unsigned vmid = AMDGPU_JOB_GET_VMID(job);
+ u32 header, control = 0;
+
+ header = PACKET3(PACKET3_INDIRECT_BUFFER, 2);
+
+ control |= ib->length_dw | (vmid << 24);
+
+ if (ring->adev->gfx.mcbp && (ib->flags & AMDGPU_IB_FLAG_PREEMPT)) {
+ control |= INDIRECT_BUFFER_PRE_ENB(1);
+
+ if (flags & AMDGPU_IB_PREEMPTED)
+ control |= INDIRECT_BUFFER_PRE_RESUME(1);
+
+ if (vmid && !ring->adev->gfx.rs64_enable)
+ gfx_v11_0_ring_emit_de_meta(ring,
+ !amdgpu_sriov_vf(ring->adev) && (flags & AMDGPU_IB_PREEMPTED));
+ }
+
+ amdgpu_ring_write(ring, header);
+ BUG_ON(ib->gpu_addr & 0x3); /* Dword align */
+ amdgpu_ring_write(ring,
+#ifdef __BIG_ENDIAN
+ (2 << 0) |
+#endif
+ lower_32_bits(ib->gpu_addr));
+ amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr));
+ amdgpu_ring_write(ring, control);
+}
+
+static void gfx_v11_0_ring_emit_ib_compute(struct amdgpu_ring *ring,
+ struct amdgpu_job *job,
+ struct amdgpu_ib *ib,
+ uint32_t flags)
+{
+ unsigned vmid = AMDGPU_JOB_GET_VMID(job);
+ u32 control = INDIRECT_BUFFER_VALID | ib->length_dw | (vmid << 24);
+
+ /* Currently, there is a high possibility to get wave ID mismatch
+ * between ME and GDS, leading to a hw deadlock, because ME generates
+ * different wave IDs than the GDS expects. This situation happens
+ * randomly when at least 5 compute pipes use GDS ordered append.
+ * The wave IDs generated by ME are also wrong after suspend/resume.
+ * Those are probably bugs somewhere else in the kernel driver.
+ *
+ * Writing GDS_COMPUTE_MAX_WAVE_ID resets wave ID counters in ME and
+ * GDS to 0 for this ring (me/pipe).
+ */
+ if (ib->flags & AMDGPU_IB_FLAG_RESET_GDS_MAX_WAVE_ID) {
+ amdgpu_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG, 1));
+ amdgpu_ring_write(ring, regGDS_COMPUTE_MAX_WAVE_ID);
+ amdgpu_ring_write(ring, ring->adev->gds.gds_compute_max_wave_id);
+ }
+
+ amdgpu_ring_write(ring, PACKET3(PACKET3_INDIRECT_BUFFER, 2));
+ BUG_ON(ib->gpu_addr & 0x3); /* Dword align */
+ amdgpu_ring_write(ring,
+#ifdef __BIG_ENDIAN
+ (2 << 0) |
+#endif
+ lower_32_bits(ib->gpu_addr));
+ amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr));
+ amdgpu_ring_write(ring, control);
+}
+
+static void gfx_v11_0_ring_emit_fence(struct amdgpu_ring *ring, u64 addr,
+ u64 seq, unsigned flags)
+{
+ bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT;
+ bool int_sel = flags & AMDGPU_FENCE_FLAG_INT;
+
+ /* RELEASE_MEM - flush caches, send int */
+ amdgpu_ring_write(ring, PACKET3(PACKET3_RELEASE_MEM, 6));
+ amdgpu_ring_write(ring, (PACKET3_RELEASE_MEM_GCR_SEQ |
+ PACKET3_RELEASE_MEM_GCR_GL2_WB |
+ PACKET3_RELEASE_MEM_GCR_GLM_INV | /* must be set with GLM_WB */
+ PACKET3_RELEASE_MEM_GCR_GLM_WB |
+ PACKET3_RELEASE_MEM_CACHE_POLICY(3) |
+ PACKET3_RELEASE_MEM_EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
+ PACKET3_RELEASE_MEM_EVENT_INDEX(5)));
+ amdgpu_ring_write(ring, (PACKET3_RELEASE_MEM_DATA_SEL(write64bit ? 2 : 1) |
+ PACKET3_RELEASE_MEM_INT_SEL(int_sel ? 2 : 0)));
+
+ /*
+ * the address should be Qword aligned if 64bit write, Dword
+ * aligned if only send 32bit data low (discard data high)
+ */
+ if (write64bit)
+ BUG_ON(addr & 0x7);
+ else
+ BUG_ON(addr & 0x3);
+ amdgpu_ring_write(ring, lower_32_bits(addr));
+ amdgpu_ring_write(ring, upper_32_bits(addr));
+ amdgpu_ring_write(ring, lower_32_bits(seq));
+ amdgpu_ring_write(ring, upper_32_bits(seq));
+ amdgpu_ring_write(ring, 0);
+}
+
+static void gfx_v11_0_ring_emit_pipeline_sync(struct amdgpu_ring *ring)
+{
+ int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX);
+ uint32_t seq = ring->fence_drv.sync_seq;
+ uint64_t addr = ring->fence_drv.gpu_addr;
+
+ gfx_v11_0_wait_reg_mem(ring, usepfp, 1, 0, lower_32_bits(addr),
+ upper_32_bits(addr), seq, 0xffffffff, 4);
+}
+
+static void gfx_v11_0_ring_invalidate_tlbs(struct amdgpu_ring *ring,
+ uint16_t pasid, uint32_t flush_type,
+ bool all_hub, uint8_t dst_sel)
+{
+ amdgpu_ring_write(ring, PACKET3(PACKET3_INVALIDATE_TLBS, 0));
+ amdgpu_ring_write(ring,
+ PACKET3_INVALIDATE_TLBS_DST_SEL(dst_sel) |
+ PACKET3_INVALIDATE_TLBS_ALL_HUB(all_hub) |
+ PACKET3_INVALIDATE_TLBS_PASID(pasid) |
+ PACKET3_INVALIDATE_TLBS_FLUSH_TYPE(flush_type));
+}
+
+static void gfx_v11_0_ring_emit_vm_flush(struct amdgpu_ring *ring,
+ unsigned vmid, uint64_t pd_addr)
+{
+ amdgpu_gmc_emit_flush_gpu_tlb(ring, vmid, pd_addr);
+
+ /* compute doesn't have PFP */
+ if (ring->funcs->type == AMDGPU_RING_TYPE_GFX) {
+ /* sync PFP to ME, otherwise we might get invalid PFP reads */
+ amdgpu_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
+ amdgpu_ring_write(ring, 0x0);
+ }
+
+ /* Make sure that we can't skip the SET_Q_MODE packets when the VM
+ * changed in any way.
+ */
+ ring->set_q_mode_offs = 0;
+ ring->set_q_mode_ptr = NULL;
+}
+
+static void gfx_v11_0_ring_emit_fence_kiq(struct amdgpu_ring *ring, u64 addr,
+ u64 seq, unsigned int flags)
+{
+ struct amdgpu_device *adev = ring->adev;
+
+ /* we only allocate 32bit for each seq wb address */
+ BUG_ON(flags & AMDGPU_FENCE_FLAG_64BIT);
+
+ /* write fence seq to the "addr" */
+ amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
+ amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
+ WRITE_DATA_DST_SEL(5) | WR_CONFIRM));
+ amdgpu_ring_write(ring, lower_32_bits(addr));
+ amdgpu_ring_write(ring, upper_32_bits(addr));
+ amdgpu_ring_write(ring, lower_32_bits(seq));
+
+ if (flags & AMDGPU_FENCE_FLAG_INT) {
+ /* set register to trigger INT */
+ amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
+ amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
+ WRITE_DATA_DST_SEL(0) | WR_CONFIRM));
+ amdgpu_ring_write(ring, SOC15_REG_OFFSET(GC, 0, regCPC_INT_STATUS));
+ amdgpu_ring_write(ring, 0);
+ amdgpu_ring_write(ring, 0x20000000); /* src_id is 178 */
+ }
+}
+
+static void gfx_v11_0_ring_emit_cntxcntl(struct amdgpu_ring *ring,
+ uint32_t flags)
+{
+ uint32_t dw2 = 0;
+
+ dw2 |= 0x80000000; /* set load_enable otherwise this package is just NOPs */
+ if (flags & AMDGPU_HAVE_CTX_SWITCH) {
+ /* set load_global_config & load_global_uconfig */
+ dw2 |= 0x8001;
+ /* set load_cs_sh_regs */
+ dw2 |= 0x01000000;
+ /* set load_per_context_state & load_gfx_sh_regs for GFX */
+ dw2 |= 0x10002;
+ }
+
+ amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
+ amdgpu_ring_write(ring, dw2);
+ amdgpu_ring_write(ring, 0);
+}
+
+static unsigned gfx_v11_0_ring_emit_init_cond_exec(struct amdgpu_ring *ring,
+ uint64_t addr)
+{
+ unsigned ret;
+
+ amdgpu_ring_write(ring, PACKET3(PACKET3_COND_EXEC, 3));
+ amdgpu_ring_write(ring, lower_32_bits(addr));
+ amdgpu_ring_write(ring, upper_32_bits(addr));
+ /* discard following DWs if *cond_exec_gpu_addr==0 */
+ amdgpu_ring_write(ring, 0);
+ ret = ring->wptr & ring->buf_mask;
+ /* patch dummy value later */
+ amdgpu_ring_write(ring, 0);
+
+ return ret;
+}
+
+static void gfx_v11_0_ring_emit_gfx_shadow(struct amdgpu_ring *ring,
+ u64 shadow_va, u64 csa_va,
+ u64 gds_va, bool init_shadow,
+ int vmid)
+{
+ struct amdgpu_device *adev = ring->adev;
+ unsigned int offs, end;
+
+ if (!adev->gfx.cp_gfx_shadow || !ring->ring_obj)
+ return;
+
+ /*
+ * The logic here isn't easy to understand because we need to keep state
+ * accross multiple executions of the function as well as between the
+ * CPU and GPU. The general idea is that the newly written GPU command
+ * has a condition on the previous one and only executed if really
+ * necessary.
+ */
+
+ /*
+ * The dw in the NOP controls if the next SET_Q_MODE packet should be
+ * executed or not. Reserve 64bits just to be on the save side.
+ */
+ amdgpu_ring_write(ring, PACKET3(PACKET3_NOP, 1));
+ offs = ring->wptr & ring->buf_mask;
+
+ /*
+ * We start with skipping the prefix SET_Q_MODE and always executing
+ * the postfix SET_Q_MODE packet. This is changed below with a
+ * WRITE_DATA command when the postfix executed.
+ */
+ amdgpu_ring_write(ring, shadow_va ? 1 : 0);
+ amdgpu_ring_write(ring, 0);
+
+ if (ring->set_q_mode_offs) {
+ uint64_t addr;
+
+ addr = amdgpu_bo_gpu_offset(ring->ring_obj);
+ addr += ring->set_q_mode_offs << 2;
+ end = gfx_v11_0_ring_emit_init_cond_exec(ring, addr);
+ }
+
+ /*
+ * When the postfix SET_Q_MODE packet executes we need to make sure that the
+ * next prefix SET_Q_MODE packet executes as well.
+ */
+ if (!shadow_va) {
+ uint64_t addr;
+
+ addr = amdgpu_bo_gpu_offset(ring->ring_obj);
+ addr += offs << 2;
+ amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
+ amdgpu_ring_write(ring, WRITE_DATA_DST_SEL(5) | WR_CONFIRM);
+ amdgpu_ring_write(ring, lower_32_bits(addr));
+ amdgpu_ring_write(ring, upper_32_bits(addr));
+ amdgpu_ring_write(ring, 0x1);
+ }
+
+ amdgpu_ring_write(ring, PACKET3(PACKET3_SET_Q_PREEMPTION_MODE, 7));
+ amdgpu_ring_write(ring, lower_32_bits(shadow_va));
+ amdgpu_ring_write(ring, upper_32_bits(shadow_va));
+ amdgpu_ring_write(ring, lower_32_bits(gds_va));
+ amdgpu_ring_write(ring, upper_32_bits(gds_va));
+ amdgpu_ring_write(ring, lower_32_bits(csa_va));
+ amdgpu_ring_write(ring, upper_32_bits(csa_va));
+ amdgpu_ring_write(ring, shadow_va ?
+ PACKET3_SET_Q_PREEMPTION_MODE_IB_VMID(vmid) : 0);
+ amdgpu_ring_write(ring, init_shadow ?
+ PACKET3_SET_Q_PREEMPTION_MODE_INIT_SHADOW_MEM : 0);
+
+ if (ring->set_q_mode_offs)
+ amdgpu_ring_patch_cond_exec(ring, end);
+
+ if (shadow_va) {
+ uint64_t token = shadow_va ^ csa_va ^ gds_va ^ vmid;
+
+ /*
+ * If the tokens match try to skip the last postfix SET_Q_MODE
+ * packet to avoid saving/restoring the state all the time.
+ */
+ if (ring->set_q_mode_ptr && ring->set_q_mode_token == token)
+ *ring->set_q_mode_ptr = 0;
+
+ ring->set_q_mode_token = token;
+ } else {
+ ring->set_q_mode_ptr = &ring->ring[ring->set_q_mode_offs];
+ }
+
+ ring->set_q_mode_offs = offs;
+}
+
+static int gfx_v11_0_ring_preempt_ib(struct amdgpu_ring *ring)
+{
+ int i, r = 0;
+ struct amdgpu_device *adev = ring->adev;
+ struct amdgpu_kiq *kiq = &adev->gfx.kiq[0];
+ struct amdgpu_ring *kiq_ring = &kiq->ring;
+ unsigned long flags;
+
+ if (adev->enable_mes)
+ return -EINVAL;
+
+ if (!kiq->pmf || !kiq->pmf->kiq_unmap_queues)
+ return -EINVAL;
+
+ spin_lock_irqsave(&kiq->ring_lock, flags);
+
+ if (amdgpu_ring_alloc(kiq_ring, kiq->pmf->unmap_queues_size)) {
+ spin_unlock_irqrestore(&kiq->ring_lock, flags);
+ return -ENOMEM;
+ }
+
+ /* assert preemption condition */
+ amdgpu_ring_set_preempt_cond_exec(ring, false);
+
+ /* assert IB preemption, emit the trailing fence */
+ kiq->pmf->kiq_unmap_queues(kiq_ring, ring, PREEMPT_QUEUES_NO_UNMAP,
+ ring->trail_fence_gpu_addr,
+ ++ring->trail_seq);
+ amdgpu_ring_commit(kiq_ring);
+
+ spin_unlock_irqrestore(&kiq->ring_lock, flags);
+
+ /* poll the trailing fence */
+ for (i = 0; i < adev->usec_timeout; i++) {
+ if (ring->trail_seq ==
+ le32_to_cpu(*(ring->trail_fence_cpu_addr)))
+ break;
+ udelay(1);
+ }
+
+ if (i >= adev->usec_timeout) {
+ r = -EINVAL;
+ DRM_ERROR("ring %d failed to preempt ib\n", ring->idx);
+ }
+
+ /* deassert preemption condition */
+ amdgpu_ring_set_preempt_cond_exec(ring, true);
+ return r;
+}
+
+static void gfx_v11_0_ring_emit_de_meta(struct amdgpu_ring *ring, bool resume)
+{
+ struct amdgpu_device *adev = ring->adev;
+ struct v10_de_ib_state de_payload = {0};
+ uint64_t offset, gds_addr, de_payload_gpu_addr;
+ void *de_payload_cpu_addr;
+ int cnt;
+
+ offset = offsetof(struct v10_gfx_meta_data, de_payload);
+ de_payload_gpu_addr = amdgpu_csa_vaddr(ring->adev) + offset;
+ de_payload_cpu_addr = adev->virt.csa_cpu_addr + offset;
+
+ gds_addr = ALIGN(amdgpu_csa_vaddr(ring->adev) +
+ AMDGPU_CSA_SIZE - adev->gds.gds_size,
+ PAGE_SIZE);
+
+ de_payload.gds_backup_addrlo = lower_32_bits(gds_addr);
+ de_payload.gds_backup_addrhi = upper_32_bits(gds_addr);
+
+ cnt = (sizeof(de_payload) >> 2) + 4 - 2;
+ amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, cnt));
+ amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(1) |
+ WRITE_DATA_DST_SEL(8) |
+ WR_CONFIRM) |
+ WRITE_DATA_CACHE_POLICY(0));
+ amdgpu_ring_write(ring, lower_32_bits(de_payload_gpu_addr));
+ amdgpu_ring_write(ring, upper_32_bits(de_payload_gpu_addr));
+
+ if (resume)
+ amdgpu_ring_write_multiple(ring, de_payload_cpu_addr,
+ sizeof(de_payload) >> 2);
+ else
+ amdgpu_ring_write_multiple(ring, (void *)&de_payload,
+ sizeof(de_payload) >> 2);
+}
+
+static void gfx_v11_0_ring_emit_frame_cntl(struct amdgpu_ring *ring, bool start,
+ bool secure)
+{
+ uint32_t v = secure ? FRAME_TMZ : 0;
+
+ amdgpu_ring_write(ring, PACKET3(PACKET3_FRAME_CONTROL, 0));
+ amdgpu_ring_write(ring, v | FRAME_CMD(start ? 0 : 1));
+}
+
+static void gfx_v11_0_ring_emit_rreg(struct amdgpu_ring *ring, uint32_t reg,
+ uint32_t reg_val_offs)
+{
+ struct amdgpu_device *adev = ring->adev;
+
+ amdgpu_ring_write(ring, PACKET3(PACKET3_COPY_DATA, 4));
+ amdgpu_ring_write(ring, 0 | /* src: register*/
+ (5 << 8) | /* dst: memory */
+ (1 << 20)); /* write confirm */
+ amdgpu_ring_write(ring, reg);
+ amdgpu_ring_write(ring, 0);
+ amdgpu_ring_write(ring, lower_32_bits(adev->wb.gpu_addr +
+ reg_val_offs * 4));
+ amdgpu_ring_write(ring, upper_32_bits(adev->wb.gpu_addr +
+ reg_val_offs * 4));
+}
+
+static void gfx_v11_0_ring_emit_wreg(struct amdgpu_ring *ring, uint32_t reg,
+ uint32_t val)
+{
+ uint32_t cmd = 0;
+
+ switch (ring->funcs->type) {
+ case AMDGPU_RING_TYPE_GFX:
+ cmd = WRITE_DATA_ENGINE_SEL(1) | WR_CONFIRM;
+ break;
+ case AMDGPU_RING_TYPE_KIQ:
+ cmd = (1 << 16); /* no inc addr */
+ break;
+ default:
+ cmd = WR_CONFIRM;
+ break;
+ }
+ amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
+ amdgpu_ring_write(ring, cmd);
+ amdgpu_ring_write(ring, reg);
+ amdgpu_ring_write(ring, 0);
+ amdgpu_ring_write(ring, val);
+}
+
+static void gfx_v11_0_ring_emit_reg_wait(struct amdgpu_ring *ring, uint32_t reg,
+ uint32_t val, uint32_t mask)
+{
+ gfx_v11_0_wait_reg_mem(ring, 0, 0, 0, reg, 0, val, mask, 0x20);
+}
+
+static void gfx_v11_0_ring_emit_reg_write_reg_wait(struct amdgpu_ring *ring,
+ uint32_t reg0, uint32_t reg1,
+ uint32_t ref, uint32_t mask)
+{
+ int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX);
+
+ gfx_v11_0_wait_reg_mem(ring, usepfp, 0, 1, reg0, reg1,
+ ref, mask, 0x20);
+}
+
+static void
+gfx_v11_0_set_gfx_eop_interrupt_state(struct amdgpu_device *adev,
+ uint32_t me, uint32_t pipe,
+ enum amdgpu_interrupt_state state)
+{
+ uint32_t cp_int_cntl, cp_int_cntl_reg;
+
+ if (!me) {
+ switch (pipe) {
+ case 0:
+ cp_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, regCP_INT_CNTL_RING0);
+ break;
+ case 1:
+ cp_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, regCP_INT_CNTL_RING1);
+ break;
+ default:
+ DRM_DEBUG("invalid pipe %d\n", pipe);
+ return;
+ }
+ } else {
+ DRM_DEBUG("invalid me %d\n", me);
+ return;
+ }
+
+ switch (state) {
+ case AMDGPU_IRQ_STATE_DISABLE:
+ cp_int_cntl = RREG32_SOC15_IP(GC, cp_int_cntl_reg);
+ cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_INT_CNTL_RING0,
+ TIME_STAMP_INT_ENABLE, 0);
+ cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_INT_CNTL_RING0,
+ GENERIC0_INT_ENABLE, 0);
+ WREG32_SOC15_IP(GC, cp_int_cntl_reg, cp_int_cntl);
+ break;
+ case AMDGPU_IRQ_STATE_ENABLE:
+ cp_int_cntl = RREG32_SOC15_IP(GC, cp_int_cntl_reg);
+ cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_INT_CNTL_RING0,
+ TIME_STAMP_INT_ENABLE, 1);
+ cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_INT_CNTL_RING0,
+ GENERIC0_INT_ENABLE, 1);
+ WREG32_SOC15_IP(GC, cp_int_cntl_reg, cp_int_cntl);
+ break;
+ default:
+ break;
+ }
+}
+
+static void gfx_v11_0_set_compute_eop_interrupt_state(struct amdgpu_device *adev,
+ int me, int pipe,
+ enum amdgpu_interrupt_state state)
+{
+ u32 mec_int_cntl, mec_int_cntl_reg;
+
+ /*
+ * amdgpu controls only the first MEC. That's why this function only
+ * handles the setting of interrupts for this specific MEC. All other
+ * pipes' interrupts are set by amdkfd.
+ */
+
+ if (me == 1) {
+ switch (pipe) {
+ case 0:
+ mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, regCP_ME1_PIPE0_INT_CNTL);
+ break;
+ case 1:
+ mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, regCP_ME1_PIPE1_INT_CNTL);
+ break;
+ case 2:
+ mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, regCP_ME1_PIPE2_INT_CNTL);
+ break;
+ case 3:
+ mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, regCP_ME1_PIPE3_INT_CNTL);
+ break;
+ default:
+ DRM_DEBUG("invalid pipe %d\n", pipe);
+ return;
+ }
+ } else {
+ DRM_DEBUG("invalid me %d\n", me);
+ return;
+ }
+
+ switch (state) {
+ case AMDGPU_IRQ_STATE_DISABLE:
+ mec_int_cntl = RREG32_SOC15_IP(GC, mec_int_cntl_reg);
+ mec_int_cntl = REG_SET_FIELD(mec_int_cntl, CP_ME1_PIPE0_INT_CNTL,
+ TIME_STAMP_INT_ENABLE, 0);
+ mec_int_cntl = REG_SET_FIELD(mec_int_cntl, CP_ME1_PIPE0_INT_CNTL,
+ GENERIC0_INT_ENABLE, 0);
+ WREG32_SOC15_IP(GC, mec_int_cntl_reg, mec_int_cntl);
+ break;
+ case AMDGPU_IRQ_STATE_ENABLE:
+ mec_int_cntl = RREG32_SOC15_IP(GC, mec_int_cntl_reg);
+ mec_int_cntl = REG_SET_FIELD(mec_int_cntl, CP_ME1_PIPE0_INT_CNTL,
+ TIME_STAMP_INT_ENABLE, 1);
+ mec_int_cntl = REG_SET_FIELD(mec_int_cntl, CP_ME1_PIPE0_INT_CNTL,
+ GENERIC0_INT_ENABLE, 1);
+ WREG32_SOC15_IP(GC, mec_int_cntl_reg, mec_int_cntl);
+ break;
+ default:
+ break;
+ }
+}
+
+static int gfx_v11_0_set_eop_interrupt_state(struct amdgpu_device *adev,
+ struct amdgpu_irq_src *src,
+ unsigned type,
+ enum amdgpu_interrupt_state state)
+{
+ switch (type) {
+ case AMDGPU_CP_IRQ_GFX_ME0_PIPE0_EOP:
+ gfx_v11_0_set_gfx_eop_interrupt_state(adev, 0, 0, state);
+ break;
+ case AMDGPU_CP_IRQ_GFX_ME0_PIPE1_EOP:
+ gfx_v11_0_set_gfx_eop_interrupt_state(adev, 0, 1, state);
+ break;
+ case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP:
+ gfx_v11_0_set_compute_eop_interrupt_state(adev, 1, 0, state);
+ break;
+ case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE1_EOP:
+ gfx_v11_0_set_compute_eop_interrupt_state(adev, 1, 1, state);
+ break;
+ case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE2_EOP:
+ gfx_v11_0_set_compute_eop_interrupt_state(adev, 1, 2, state);
+ break;
+ case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE3_EOP:
+ gfx_v11_0_set_compute_eop_interrupt_state(adev, 1, 3, state);
+ break;
+ default:
+ break;
+ }
+ return 0;
+}
+
+static int gfx_v11_0_eop_irq(struct amdgpu_device *adev,
+ struct amdgpu_irq_src *source,
+ struct amdgpu_iv_entry *entry)
+{
+ u32 doorbell_offset = entry->src_data[0];
+ u8 me_id, pipe_id, queue_id;
+ struct amdgpu_ring *ring;
+ int i;
+
+ DRM_DEBUG("IH: CP EOP\n");
+
+ if (adev->enable_mes && doorbell_offset) {
+ struct amdgpu_userq_fence_driver *fence_drv = NULL;
+ struct xarray *xa = &adev->userq_xa;
+ unsigned long flags;
+
+ xa_lock_irqsave(xa, flags);
+ fence_drv = xa_load(xa, doorbell_offset);
+ if (fence_drv)
+ amdgpu_userq_fence_driver_process(fence_drv);
+ xa_unlock_irqrestore(xa, flags);
+ } else {
+ me_id = (entry->ring_id & 0x0c) >> 2;
+ pipe_id = (entry->ring_id & 0x03) >> 0;
+ queue_id = (entry->ring_id & 0x70) >> 4;
+
+ switch (me_id) {
+ case 0:
+ if (pipe_id == 0)
+ amdgpu_fence_process(&adev->gfx.gfx_ring[0]);
+ else
+ amdgpu_fence_process(&adev->gfx.gfx_ring[1]);
+ break;
+ case 1:
+ case 2:
+ for (i = 0; i < adev->gfx.num_compute_rings; i++) {
+ ring = &adev->gfx.compute_ring[i];
+ /* Per-queue interrupt is supported for MEC starting from VI.
+ * The interrupt can only be enabled/disabled per pipe instead
+ * of per queue.
+ */
+ if ((ring->me == me_id) &&
+ (ring->pipe == pipe_id) &&
+ (ring->queue == queue_id))
+ amdgpu_fence_process(ring);
+ }
+ break;
+ }
+ }
+
+ return 0;
+}
+
+static int gfx_v11_0_set_priv_reg_fault_state(struct amdgpu_device *adev,
+ struct amdgpu_irq_src *source,
+ unsigned int type,
+ enum amdgpu_interrupt_state state)
+{
+ u32 cp_int_cntl_reg, cp_int_cntl;
+ int i, j;
+
+ switch (state) {
+ case AMDGPU_IRQ_STATE_DISABLE:
+ case AMDGPU_IRQ_STATE_ENABLE:
+ for (i = 0; i < adev->gfx.me.num_me; i++) {
+ for (j = 0; j < adev->gfx.me.num_pipe_per_me; j++) {
+ cp_int_cntl_reg = gfx_v11_0_get_cpg_int_cntl(adev, i, j);
+
+ if (cp_int_cntl_reg) {
+ cp_int_cntl = RREG32_SOC15_IP(GC, cp_int_cntl_reg);
+ cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_INT_CNTL_RING0,
+ PRIV_REG_INT_ENABLE,
+ state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0);
+ WREG32_SOC15_IP(GC, cp_int_cntl_reg, cp_int_cntl);
+ }
+ }
+ }
+ for (i = 0; i < adev->gfx.mec.num_mec; i++) {
+ for (j = 0; j < adev->gfx.mec.num_pipe_per_mec; j++) {
+ /* MECs start at 1 */
+ cp_int_cntl_reg = gfx_v11_0_get_cpc_int_cntl(adev, i + 1, j);
+
+ if (cp_int_cntl_reg) {
+ cp_int_cntl = RREG32_SOC15_IP(GC, cp_int_cntl_reg);
+ cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_ME1_PIPE0_INT_CNTL,
+ PRIV_REG_INT_ENABLE,
+ state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0);
+ WREG32_SOC15_IP(GC, cp_int_cntl_reg, cp_int_cntl);
+ }
+ }
+ }
+ break;
+ default:
+ break;
+ }
+
+ return 0;
+}
+
+static int gfx_v11_0_set_bad_op_fault_state(struct amdgpu_device *adev,
+ struct amdgpu_irq_src *source,
+ unsigned type,
+ enum amdgpu_interrupt_state state)
+{
+ u32 cp_int_cntl_reg, cp_int_cntl;
+ int i, j;
+
+ switch (state) {
+ case AMDGPU_IRQ_STATE_DISABLE:
+ case AMDGPU_IRQ_STATE_ENABLE:
+ for (i = 0; i < adev->gfx.me.num_me; i++) {
+ for (j = 0; j < adev->gfx.me.num_pipe_per_me; j++) {
+ cp_int_cntl_reg = gfx_v11_0_get_cpg_int_cntl(adev, i, j);
+
+ if (cp_int_cntl_reg) {
+ cp_int_cntl = RREG32_SOC15_IP(GC, cp_int_cntl_reg);
+ cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_INT_CNTL_RING0,
+ OPCODE_ERROR_INT_ENABLE,
+ state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0);
+ WREG32_SOC15_IP(GC, cp_int_cntl_reg, cp_int_cntl);
+ }
+ }
+ }
+ for (i = 0; i < adev->gfx.mec.num_mec; i++) {
+ for (j = 0; j < adev->gfx.mec.num_pipe_per_mec; j++) {
+ /* MECs start at 1 */
+ cp_int_cntl_reg = gfx_v11_0_get_cpc_int_cntl(adev, i + 1, j);
+
+ if (cp_int_cntl_reg) {
+ cp_int_cntl = RREG32_SOC15_IP(GC, cp_int_cntl_reg);
+ cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_ME1_PIPE0_INT_CNTL,
+ OPCODE_ERROR_INT_ENABLE,
+ state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0);
+ WREG32_SOC15_IP(GC, cp_int_cntl_reg, cp_int_cntl);
+ }
+ }
+ }
+ break;
+ default:
+ break;
+ }
+ return 0;
+}
+
+static int gfx_v11_0_set_priv_inst_fault_state(struct amdgpu_device *adev,
+ struct amdgpu_irq_src *source,
+ unsigned int type,
+ enum amdgpu_interrupt_state state)
+{
+ u32 cp_int_cntl_reg, cp_int_cntl;
+ int i, j;
+
+ switch (state) {
+ case AMDGPU_IRQ_STATE_DISABLE:
+ case AMDGPU_IRQ_STATE_ENABLE:
+ for (i = 0; i < adev->gfx.me.num_me; i++) {
+ for (j = 0; j < adev->gfx.me.num_pipe_per_me; j++) {
+ cp_int_cntl_reg = gfx_v11_0_get_cpg_int_cntl(adev, i, j);
+
+ if (cp_int_cntl_reg) {
+ cp_int_cntl = RREG32_SOC15_IP(GC, cp_int_cntl_reg);
+ cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_INT_CNTL_RING0,
+ PRIV_INSTR_INT_ENABLE,
+ state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0);
+ WREG32_SOC15_IP(GC, cp_int_cntl_reg, cp_int_cntl);
+ }
+ }
+ }
+ break;
+ default:
+ break;
+ }
+
+ return 0;
+}
+
+static void gfx_v11_0_handle_priv_fault(struct amdgpu_device *adev,
+ struct amdgpu_iv_entry *entry)
+{
+ u8 me_id, pipe_id, queue_id;
+ struct amdgpu_ring *ring;
+ int i;
+
+ me_id = (entry->ring_id & 0x0c) >> 2;
+ pipe_id = (entry->ring_id & 0x03) >> 0;
+ queue_id = (entry->ring_id & 0x70) >> 4;
+
+ if (!adev->gfx.disable_kq) {
+ switch (me_id) {
+ case 0:
+ for (i = 0; i < adev->gfx.num_gfx_rings; i++) {
+ ring = &adev->gfx.gfx_ring[i];
+ if (ring->me == me_id && ring->pipe == pipe_id &&
+ ring->queue == queue_id)
+ drm_sched_fault(&ring->sched);
+ }
+ break;
+ case 1:
+ case 2:
+ for (i = 0; i < adev->gfx.num_compute_rings; i++) {
+ ring = &adev->gfx.compute_ring[i];
+ if (ring->me == me_id && ring->pipe == pipe_id &&
+ ring->queue == queue_id)
+ drm_sched_fault(&ring->sched);
+ }
+ break;
+ default:
+ BUG();
+ break;
+ }
+ }
+}
+
+static int gfx_v11_0_priv_reg_irq(struct amdgpu_device *adev,
+ struct amdgpu_irq_src *source,
+ struct amdgpu_iv_entry *entry)
+{
+ DRM_ERROR("Illegal register access in command stream\n");
+ gfx_v11_0_handle_priv_fault(adev, entry);
+ return 0;
+}
+
+static int gfx_v11_0_bad_op_irq(struct amdgpu_device *adev,
+ struct amdgpu_irq_src *source,
+ struct amdgpu_iv_entry *entry)
+{
+ DRM_ERROR("Illegal opcode in command stream \n");
+ gfx_v11_0_handle_priv_fault(adev, entry);
+ return 0;
+}
+
+static int gfx_v11_0_priv_inst_irq(struct amdgpu_device *adev,
+ struct amdgpu_irq_src *source,
+ struct amdgpu_iv_entry *entry)
+{
+ DRM_ERROR("Illegal instruction in command stream\n");
+ gfx_v11_0_handle_priv_fault(adev, entry);
+ return 0;
+}
+
+static int gfx_v11_0_rlc_gc_fed_irq(struct amdgpu_device *adev,
+ struct amdgpu_irq_src *source,
+ struct amdgpu_iv_entry *entry)
+{
+ if (adev->gfx.ras && adev->gfx.ras->rlc_gc_fed_irq)
+ return adev->gfx.ras->rlc_gc_fed_irq(adev, source, entry);
+
+ return 0;
+}
+
+#if 0
+static int gfx_v11_0_kiq_set_interrupt_state(struct amdgpu_device *adev,
+ struct amdgpu_irq_src *src,
+ unsigned int type,
+ enum amdgpu_interrupt_state state)
+{
+ uint32_t tmp, target;
+ struct amdgpu_ring *ring = &(adev->gfx.kiq[0].ring);
+
+ target = SOC15_REG_OFFSET(GC, 0, regCP_ME1_PIPE0_INT_CNTL);
+ target += ring->pipe;
+
+ switch (type) {
+ case AMDGPU_CP_KIQ_IRQ_DRIVER0:
+ if (state == AMDGPU_IRQ_STATE_DISABLE) {
+ tmp = RREG32_SOC15(GC, 0, regCPC_INT_CNTL);
+ tmp = REG_SET_FIELD(tmp, CPC_INT_CNTL,
+ GENERIC2_INT_ENABLE, 0);
+ WREG32_SOC15(GC, 0, regCPC_INT_CNTL, tmp);
+
+ tmp = RREG32_SOC15_IP(GC, target);
+ tmp = REG_SET_FIELD(tmp, CP_ME1_PIPE0_INT_CNTL,
+ GENERIC2_INT_ENABLE, 0);
+ WREG32_SOC15_IP(GC, target, tmp);
+ } else {
+ tmp = RREG32_SOC15(GC, 0, regCPC_INT_CNTL);
+ tmp = REG_SET_FIELD(tmp, CPC_INT_CNTL,
+ GENERIC2_INT_ENABLE, 1);
+ WREG32_SOC15(GC, 0, regCPC_INT_CNTL, tmp);
+
+ tmp = RREG32_SOC15_IP(GC, target);
+ tmp = REG_SET_FIELD(tmp, CP_ME1_PIPE0_INT_CNTL,
+ GENERIC2_INT_ENABLE, 1);
+ WREG32_SOC15_IP(GC, target, tmp);
+ }
+ break;
+ default:
+ BUG(); /* kiq only support GENERIC2_INT now */
+ break;
+ }
+ return 0;
+}
+#endif
+
+static void gfx_v11_0_emit_mem_sync(struct amdgpu_ring *ring)
+{
+ const unsigned int gcr_cntl =
+ PACKET3_ACQUIRE_MEM_GCR_CNTL_GL2_INV(1) |
+ PACKET3_ACQUIRE_MEM_GCR_CNTL_GL2_WB(1) |
+ PACKET3_ACQUIRE_MEM_GCR_CNTL_GLM_INV(1) |
+ PACKET3_ACQUIRE_MEM_GCR_CNTL_GLM_WB(1) |
+ PACKET3_ACQUIRE_MEM_GCR_CNTL_GL1_INV(1) |
+ PACKET3_ACQUIRE_MEM_GCR_CNTL_GLV_INV(1) |
+ PACKET3_ACQUIRE_MEM_GCR_CNTL_GLK_INV(1) |
+ PACKET3_ACQUIRE_MEM_GCR_CNTL_GLI_INV(1);
+
+ /* ACQUIRE_MEM - make one or more surfaces valid for use by the subsequent operations */
+ amdgpu_ring_write(ring, PACKET3(PACKET3_ACQUIRE_MEM, 6));
+ amdgpu_ring_write(ring, 0); /* CP_COHER_CNTL */
+ amdgpu_ring_write(ring, 0xffffffff); /* CP_COHER_SIZE */
+ amdgpu_ring_write(ring, 0xffffff); /* CP_COHER_SIZE_HI */
+ amdgpu_ring_write(ring, 0); /* CP_COHER_BASE */
+ amdgpu_ring_write(ring, 0); /* CP_COHER_BASE_HI */
+ amdgpu_ring_write(ring, 0x0000000A); /* POLL_INTERVAL */
+ amdgpu_ring_write(ring, gcr_cntl); /* GCR_CNTL */
+}
+
+static bool gfx_v11_pipe_reset_support(struct amdgpu_device *adev)
+{
+ /* Disable the pipe reset until the CPFW fully support it.*/
+ dev_warn_once(adev->dev, "The CPFW hasn't support pipe reset yet.\n");
+ return false;
+}
+
+
+static int gfx_v11_reset_gfx_pipe(struct amdgpu_ring *ring)
+{
+ struct amdgpu_device *adev = ring->adev;
+ uint32_t reset_pipe = 0, clean_pipe = 0;
+ int r;
+
+ if (!gfx_v11_pipe_reset_support(adev))
+ return -EOPNOTSUPP;
+
+ gfx_v11_0_set_safe_mode(adev, 0);
+ mutex_lock(&adev->srbm_mutex);
+ soc21_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
+
+ switch (ring->pipe) {
+ case 0:
+ reset_pipe = REG_SET_FIELD(reset_pipe, CP_ME_CNTL,
+ PFP_PIPE0_RESET, 1);
+ reset_pipe = REG_SET_FIELD(reset_pipe, CP_ME_CNTL,
+ ME_PIPE0_RESET, 1);
+ clean_pipe = REG_SET_FIELD(clean_pipe, CP_ME_CNTL,
+ PFP_PIPE0_RESET, 0);
+ clean_pipe = REG_SET_FIELD(clean_pipe, CP_ME_CNTL,
+ ME_PIPE0_RESET, 0);
+ break;
+ case 1:
+ reset_pipe = REG_SET_FIELD(reset_pipe, CP_ME_CNTL,
+ PFP_PIPE1_RESET, 1);
+ reset_pipe = REG_SET_FIELD(reset_pipe, CP_ME_CNTL,
+ ME_PIPE1_RESET, 1);
+ clean_pipe = REG_SET_FIELD(clean_pipe, CP_ME_CNTL,
+ PFP_PIPE1_RESET, 0);
+ clean_pipe = REG_SET_FIELD(clean_pipe, CP_ME_CNTL,
+ ME_PIPE1_RESET, 0);
+ break;
+ default:
+ break;
+ }
+
+ WREG32_SOC15(GC, 0, regCP_ME_CNTL, reset_pipe);
+ WREG32_SOC15(GC, 0, regCP_ME_CNTL, clean_pipe);
+
+ r = (RREG32(SOC15_REG_OFFSET(GC, 0, regCP_GFX_RS64_INSTR_PNTR1)) << 2) -
+ RS64_FW_UC_START_ADDR_LO;
+ soc21_grbm_select(adev, 0, 0, 0, 0);
+ mutex_unlock(&adev->srbm_mutex);
+ gfx_v11_0_unset_safe_mode(adev, 0);
+
+ dev_info(adev->dev, "The ring %s pipe reset to the ME firmware start PC: %s\n", ring->name,
+ r == 0 ? "successfully" : "failed");
+ /* FIXME: Sometimes driver can't cache the ME firmware start PC correctly,
+ * so the pipe reset status relies on the later gfx ring test result.
+ */
+ return 0;
+}
+
+static int gfx_v11_0_reset_kgq(struct amdgpu_ring *ring,
+ unsigned int vmid,
+ struct amdgpu_fence *timedout_fence)
+{
+ struct amdgpu_device *adev = ring->adev;
+ int r;
+
+ amdgpu_ring_reset_helper_begin(ring, timedout_fence);
+
+ r = amdgpu_mes_reset_legacy_queue(ring->adev, ring, vmid, false);
+ if (r) {
+
+ dev_warn(adev->dev, "reset via MES failed and try pipe reset %d\n", r);
+ r = gfx_v11_reset_gfx_pipe(ring);
+ if (r)
+ return r;
+ }
+
+ r = gfx_v11_0_kgq_init_queue(ring, true);
+ if (r) {
+ dev_err(adev->dev, "failed to init kgq\n");
+ return r;
+ }
+
+ r = amdgpu_mes_map_legacy_queue(adev, ring);
+ if (r) {
+ dev_err(adev->dev, "failed to remap kgq\n");
+ return r;
+ }
+
+ return amdgpu_ring_reset_helper_end(ring, timedout_fence);
+}
+
+static int gfx_v11_0_reset_compute_pipe(struct amdgpu_ring *ring)
+{
+
+ struct amdgpu_device *adev = ring->adev;
+ uint32_t reset_pipe = 0, clean_pipe = 0;
+ int r;
+
+ if (!gfx_v11_pipe_reset_support(adev))
+ return -EOPNOTSUPP;
+
+ gfx_v11_0_set_safe_mode(adev, 0);
+ mutex_lock(&adev->srbm_mutex);
+ soc21_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
+
+ reset_pipe = RREG32_SOC15(GC, 0, regCP_MEC_RS64_CNTL);
+ clean_pipe = reset_pipe;
+
+ if (adev->gfx.rs64_enable) {
+
+ switch (ring->pipe) {
+ case 0:
+ reset_pipe = REG_SET_FIELD(reset_pipe, CP_MEC_RS64_CNTL,
+ MEC_PIPE0_RESET, 1);
+ clean_pipe = REG_SET_FIELD(clean_pipe, CP_MEC_RS64_CNTL,
+ MEC_PIPE0_RESET, 0);
+ break;
+ case 1:
+ reset_pipe = REG_SET_FIELD(reset_pipe, CP_MEC_RS64_CNTL,
+ MEC_PIPE1_RESET, 1);
+ clean_pipe = REG_SET_FIELD(clean_pipe, CP_MEC_RS64_CNTL,
+ MEC_PIPE1_RESET, 0);
+ break;
+ case 2:
+ reset_pipe = REG_SET_FIELD(reset_pipe, CP_MEC_RS64_CNTL,
+ MEC_PIPE2_RESET, 1);
+ clean_pipe = REG_SET_FIELD(clean_pipe, CP_MEC_RS64_CNTL,
+ MEC_PIPE2_RESET, 0);
+ break;
+ case 3:
+ reset_pipe = REG_SET_FIELD(reset_pipe, CP_MEC_RS64_CNTL,
+ MEC_PIPE3_RESET, 1);
+ clean_pipe = REG_SET_FIELD(clean_pipe, CP_MEC_RS64_CNTL,
+ MEC_PIPE3_RESET, 0);
+ break;
+ default:
+ break;
+ }
+ WREG32_SOC15(GC, 0, regCP_MEC_RS64_CNTL, reset_pipe);
+ WREG32_SOC15(GC, 0, regCP_MEC_RS64_CNTL, clean_pipe);
+ r = (RREG32_SOC15(GC, 0, regCP_MEC_RS64_INSTR_PNTR) << 2) -
+ RS64_FW_UC_START_ADDR_LO;
+ } else {
+ if (ring->me == 1) {
+ switch (ring->pipe) {
+ case 0:
+ reset_pipe = REG_SET_FIELD(reset_pipe, CP_MEC_CNTL,
+ MEC_ME1_PIPE0_RESET, 1);
+ clean_pipe = REG_SET_FIELD(clean_pipe, CP_MEC_CNTL,
+ MEC_ME1_PIPE0_RESET, 0);
+ break;
+ case 1:
+ reset_pipe = REG_SET_FIELD(reset_pipe, CP_MEC_CNTL,
+ MEC_ME1_PIPE1_RESET, 1);
+ clean_pipe = REG_SET_FIELD(clean_pipe, CP_MEC_CNTL,
+ MEC_ME1_PIPE1_RESET, 0);
+ break;
+ case 2:
+ reset_pipe = REG_SET_FIELD(reset_pipe, CP_MEC_CNTL,
+ MEC_ME1_PIPE2_RESET, 1);
+ clean_pipe = REG_SET_FIELD(clean_pipe, CP_MEC_CNTL,
+ MEC_ME1_PIPE2_RESET, 0);
+ break;
+ case 3:
+ reset_pipe = REG_SET_FIELD(reset_pipe, CP_MEC_CNTL,
+ MEC_ME1_PIPE3_RESET, 1);
+ clean_pipe = REG_SET_FIELD(clean_pipe, CP_MEC_CNTL,
+ MEC_ME1_PIPE3_RESET, 0);
+ break;
+ default:
+ break;
+ }
+ /* mec1 fw pc: CP_MEC1_INSTR_PNTR */
+ } else {
+ switch (ring->pipe) {
+ case 0:
+ reset_pipe = REG_SET_FIELD(reset_pipe, CP_MEC_CNTL,
+ MEC_ME2_PIPE0_RESET, 1);
+ clean_pipe = REG_SET_FIELD(clean_pipe, CP_MEC_CNTL,
+ MEC_ME2_PIPE0_RESET, 0);
+ break;
+ case 1:
+ reset_pipe = REG_SET_FIELD(reset_pipe, CP_MEC_CNTL,
+ MEC_ME2_PIPE1_RESET, 1);
+ clean_pipe = REG_SET_FIELD(clean_pipe, CP_MEC_CNTL,
+ MEC_ME2_PIPE1_RESET, 0);
+ break;
+ case 2:
+ reset_pipe = REG_SET_FIELD(reset_pipe, CP_MEC_CNTL,
+ MEC_ME2_PIPE2_RESET, 1);
+ clean_pipe = REG_SET_FIELD(clean_pipe, CP_MEC_CNTL,
+ MEC_ME2_PIPE2_RESET, 0);
+ break;
+ case 3:
+ reset_pipe = REG_SET_FIELD(reset_pipe, CP_MEC_CNTL,
+ MEC_ME2_PIPE3_RESET, 1);
+ clean_pipe = REG_SET_FIELD(clean_pipe, CP_MEC_CNTL,
+ MEC_ME2_PIPE3_RESET, 0);
+ break;
+ default:
+ break;
+ }
+ /* mec2 fw pc: CP:CP_MEC2_INSTR_PNTR */
+ }
+ WREG32_SOC15(GC, 0, regCP_MEC_CNTL, reset_pipe);
+ WREG32_SOC15(GC, 0, regCP_MEC_CNTL, clean_pipe);
+ r = RREG32(SOC15_REG_OFFSET(GC, 0, regCP_MEC1_INSTR_PNTR));
+ }
+
+ soc21_grbm_select(adev, 0, 0, 0, 0);
+ mutex_unlock(&adev->srbm_mutex);
+ gfx_v11_0_unset_safe_mode(adev, 0);
+
+ dev_info(adev->dev, "The ring %s pipe resets to MEC FW start PC: %s\n", ring->name,
+ r == 0 ? "successfully" : "failed");
+ /*FIXME:Sometimes driver can't cache the MEC firmware start PC correctly, so the pipe
+ * reset status relies on the compute ring test result.
+ */
+ return 0;
+}
+
+static int gfx_v11_0_reset_kcq(struct amdgpu_ring *ring,
+ unsigned int vmid,
+ struct amdgpu_fence *timedout_fence)
+{
+ struct amdgpu_device *adev = ring->adev;
+ int r = 0;
+
+ amdgpu_ring_reset_helper_begin(ring, timedout_fence);
+
+ r = amdgpu_mes_reset_legacy_queue(ring->adev, ring, vmid, true);
+ if (r) {
+ dev_warn(adev->dev, "fail(%d) to reset kcq and try pipe reset\n", r);
+ r = gfx_v11_0_reset_compute_pipe(ring);
+ if (r)
+ return r;
+ }
+
+ r = gfx_v11_0_kcq_init_queue(ring, true);
+ if (r) {
+ dev_err(adev->dev, "fail to init kcq\n");
+ return r;
+ }
+ r = amdgpu_mes_map_legacy_queue(adev, ring);
+ if (r) {
+ dev_err(adev->dev, "failed to remap kcq\n");
+ return r;
+ }
+
+ return amdgpu_ring_reset_helper_end(ring, timedout_fence);
+}
+
+static void gfx_v11_ip_print(struct amdgpu_ip_block *ip_block, struct drm_printer *p)
+{
+ struct amdgpu_device *adev = ip_block->adev;
+ uint32_t i, j, k, reg, index = 0;
+ uint32_t reg_count = ARRAY_SIZE(gc_reg_list_11_0);
+
+ if (!adev->gfx.ip_dump_core)
+ return;
+
+ for (i = 0; i < reg_count; i++)
+ drm_printf(p, "%-50s \t 0x%08x\n",
+ gc_reg_list_11_0[i].reg_name,
+ adev->gfx.ip_dump_core[i]);
+
+ /* print compute queue registers for all instances */
+ if (!adev->gfx.ip_dump_compute_queues)
+ return;
+
+ reg_count = ARRAY_SIZE(gc_cp_reg_list_11);
+ drm_printf(p, "\nnum_mec: %d num_pipe: %d num_queue: %d\n",
+ adev->gfx.mec.num_mec,
+ adev->gfx.mec.num_pipe_per_mec,
+ adev->gfx.mec.num_queue_per_pipe);
+
+ for (i = 0; i < adev->gfx.mec.num_mec; i++) {
+ for (j = 0; j < adev->gfx.mec.num_pipe_per_mec; j++) {
+ for (k = 0; k < adev->gfx.mec.num_queue_per_pipe; k++) {
+ drm_printf(p, "\nmec %d, pipe %d, queue %d\n", i, j, k);
+ for (reg = 0; reg < reg_count; reg++) {
+ if (i && gc_cp_reg_list_11[reg].reg_offset == regCP_MEC_ME1_HEADER_DUMP)
+ drm_printf(p, "%-50s \t 0x%08x\n",
+ "regCP_MEC_ME2_HEADER_DUMP",
+ adev->gfx.ip_dump_compute_queues[index + reg]);
+ else
+ drm_printf(p, "%-50s \t 0x%08x\n",
+ gc_cp_reg_list_11[reg].reg_name,
+ adev->gfx.ip_dump_compute_queues[index + reg]);
+ }
+ index += reg_count;
+ }
+ }
+ }
+
+ /* print gfx queue registers for all instances */
+ if (!adev->gfx.ip_dump_gfx_queues)
+ return;
+
+ index = 0;
+ reg_count = ARRAY_SIZE(gc_gfx_queue_reg_list_11);
+ drm_printf(p, "\nnum_me: %d num_pipe: %d num_queue: %d\n",
+ adev->gfx.me.num_me,
+ adev->gfx.me.num_pipe_per_me,
+ adev->gfx.me.num_queue_per_pipe);
+
+ for (i = 0; i < adev->gfx.me.num_me; i++) {
+ for (j = 0; j < adev->gfx.me.num_pipe_per_me; j++) {
+ for (k = 0; k < adev->gfx.me.num_queue_per_pipe; k++) {
+ drm_printf(p, "\nme %d, pipe %d, queue %d\n", i, j, k);
+ for (reg = 0; reg < reg_count; reg++) {
+ drm_printf(p, "%-50s \t 0x%08x\n",
+ gc_gfx_queue_reg_list_11[reg].reg_name,
+ adev->gfx.ip_dump_gfx_queues[index + reg]);
+ }
+ index += reg_count;
+ }
+ }
+ }
+}
+
+static void gfx_v11_ip_dump(struct amdgpu_ip_block *ip_block)
+{
+ struct amdgpu_device *adev = ip_block->adev;
+ uint32_t i, j, k, reg, index = 0;
+ uint32_t reg_count = ARRAY_SIZE(gc_reg_list_11_0);
+
+ if (!adev->gfx.ip_dump_core)
+ return;
+
+ amdgpu_gfx_off_ctrl(adev, false);
+ for (i = 0; i < reg_count; i++)
+ adev->gfx.ip_dump_core[i] = RREG32(SOC15_REG_ENTRY_OFFSET(gc_reg_list_11_0[i]));
+ amdgpu_gfx_off_ctrl(adev, true);
+
+ /* dump compute queue registers for all instances */
+ if (!adev->gfx.ip_dump_compute_queues)
+ return;
+
+ reg_count = ARRAY_SIZE(gc_cp_reg_list_11);
+ amdgpu_gfx_off_ctrl(adev, false);
+ mutex_lock(&adev->srbm_mutex);
+ for (i = 0; i < adev->gfx.mec.num_mec; i++) {
+ for (j = 0; j < adev->gfx.mec.num_pipe_per_mec; j++) {
+ for (k = 0; k < adev->gfx.mec.num_queue_per_pipe; k++) {
+ /* ME0 is for GFX so start from 1 for CP */
+ soc21_grbm_select(adev, adev->gfx.me.num_me + i, j, k, 0);
+ for (reg = 0; reg < reg_count; reg++) {
+ if (i &&
+ gc_cp_reg_list_11[reg].reg_offset ==
+ regCP_MEC_ME1_HEADER_DUMP)
+ adev->gfx.ip_dump_compute_queues[index + reg] =
+ RREG32(SOC15_REG_OFFSET(GC, 0,
+ regCP_MEC_ME2_HEADER_DUMP));
+ else
+ adev->gfx.ip_dump_compute_queues[index + reg] =
+ RREG32(SOC15_REG_ENTRY_OFFSET(
+ gc_cp_reg_list_11[reg]));
+ }
+ index += reg_count;
+ }
+ }
+ }
+ soc21_grbm_select(adev, 0, 0, 0, 0);
+ mutex_unlock(&adev->srbm_mutex);
+ amdgpu_gfx_off_ctrl(adev, true);
+
+ /* dump gfx queue registers for all instances */
+ if (!adev->gfx.ip_dump_gfx_queues)
+ return;
+
+ index = 0;
+ reg_count = ARRAY_SIZE(gc_gfx_queue_reg_list_11);
+ amdgpu_gfx_off_ctrl(adev, false);
+ mutex_lock(&adev->srbm_mutex);
+ for (i = 0; i < adev->gfx.me.num_me; i++) {
+ for (j = 0; j < adev->gfx.me.num_pipe_per_me; j++) {
+ for (k = 0; k < adev->gfx.me.num_queue_per_pipe; k++) {
+ soc21_grbm_select(adev, i, j, k, 0);
+
+ for (reg = 0; reg < reg_count; reg++) {
+ adev->gfx.ip_dump_gfx_queues[index + reg] =
+ RREG32(SOC15_REG_ENTRY_OFFSET(
+ gc_gfx_queue_reg_list_11[reg]));
+ }
+ index += reg_count;
+ }
+ }
+ }
+ soc21_grbm_select(adev, 0, 0, 0, 0);
+ mutex_unlock(&adev->srbm_mutex);
+ amdgpu_gfx_off_ctrl(adev, true);
+}
+
+static void gfx_v11_0_ring_emit_cleaner_shader(struct amdgpu_ring *ring)
+{
+ /* Emit the cleaner shader */
+ amdgpu_ring_write(ring, PACKET3(PACKET3_RUN_CLEANER_SHADER, 0));
+ amdgpu_ring_write(ring, 0); /* RESERVED field, programmed to zero */
+}
+
+static void gfx_v11_0_ring_begin_use(struct amdgpu_ring *ring)
+{
+ amdgpu_gfx_profile_ring_begin_use(ring);
+
+ amdgpu_gfx_enforce_isolation_ring_begin_use(ring);
+}
+
+static void gfx_v11_0_ring_end_use(struct amdgpu_ring *ring)
+{
+ amdgpu_gfx_profile_ring_end_use(ring);
+
+ amdgpu_gfx_enforce_isolation_ring_end_use(ring);
+}
+
+static const struct amd_ip_funcs gfx_v11_0_ip_funcs = {
+ .name = "gfx_v11_0",
+ .early_init = gfx_v11_0_early_init,
+ .late_init = gfx_v11_0_late_init,
+ .sw_init = gfx_v11_0_sw_init,
+ .sw_fini = gfx_v11_0_sw_fini,
+ .hw_init = gfx_v11_0_hw_init,
+ .hw_fini = gfx_v11_0_hw_fini,
+ .suspend = gfx_v11_0_suspend,
+ .resume = gfx_v11_0_resume,
+ .is_idle = gfx_v11_0_is_idle,
+ .wait_for_idle = gfx_v11_0_wait_for_idle,
+ .soft_reset = gfx_v11_0_soft_reset,
+ .check_soft_reset = gfx_v11_0_check_soft_reset,
+ .post_soft_reset = gfx_v11_0_post_soft_reset,
+ .set_clockgating_state = gfx_v11_0_set_clockgating_state,
+ .set_powergating_state = gfx_v11_0_set_powergating_state,
+ .get_clockgating_state = gfx_v11_0_get_clockgating_state,
+ .dump_ip_state = gfx_v11_ip_dump,
+ .print_ip_state = gfx_v11_ip_print,
+};
+
+static const struct amdgpu_ring_funcs gfx_v11_0_ring_funcs_gfx = {
+ .type = AMDGPU_RING_TYPE_GFX,
+ .align_mask = 0xff,
+ .nop = PACKET3(PACKET3_NOP, 0x3FFF),
+ .support_64bit_ptrs = true,
+ .secure_submission_supported = true,
+ .get_rptr = gfx_v11_0_ring_get_rptr_gfx,
+ .get_wptr = gfx_v11_0_ring_get_wptr_gfx,
+ .set_wptr = gfx_v11_0_ring_set_wptr_gfx,
+ .emit_frame_size = /* totally 247 maximum if 16 IBs */
+ 5 + /* update_spm_vmid */
+ 5 + /* COND_EXEC */
+ 22 + /* SET_Q_PREEMPTION_MODE */
+ 7 + /* PIPELINE_SYNC */
+ SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 +
+ SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 +
+ 4 + /* VM_FLUSH */
+ 8 + /* FENCE for VM_FLUSH */
+ 20 + /* GDS switch */
+ 5 + /* COND_EXEC */
+ 7 + /* HDP_flush */
+ 4 + /* VGT_flush */
+ 31 + /* DE_META */
+ 3 + /* CNTX_CTRL */
+ 5 + /* HDP_INVL */
+ 22 + /* SET_Q_PREEMPTION_MODE */
+ 8 + 8 + /* FENCE x2 */
+ 8 + /* gfx_v11_0_emit_mem_sync */
+ 2, /* gfx_v11_0_ring_emit_cleaner_shader */
+ .emit_ib_size = 4, /* gfx_v11_0_ring_emit_ib_gfx */
+ .emit_ib = gfx_v11_0_ring_emit_ib_gfx,
+ .emit_fence = gfx_v11_0_ring_emit_fence,
+ .emit_pipeline_sync = gfx_v11_0_ring_emit_pipeline_sync,
+ .emit_vm_flush = gfx_v11_0_ring_emit_vm_flush,
+ .emit_gds_switch = gfx_v11_0_ring_emit_gds_switch,
+ .emit_hdp_flush = gfx_v11_0_ring_emit_hdp_flush,
+ .test_ring = gfx_v11_0_ring_test_ring,
+ .test_ib = gfx_v11_0_ring_test_ib,
+ .insert_nop = gfx_v11_ring_insert_nop,
+ .pad_ib = amdgpu_ring_generic_pad_ib,
+ .emit_cntxcntl = gfx_v11_0_ring_emit_cntxcntl,
+ .emit_gfx_shadow = gfx_v11_0_ring_emit_gfx_shadow,
+ .init_cond_exec = gfx_v11_0_ring_emit_init_cond_exec,
+ .preempt_ib = gfx_v11_0_ring_preempt_ib,
+ .emit_frame_cntl = gfx_v11_0_ring_emit_frame_cntl,
+ .emit_wreg = gfx_v11_0_ring_emit_wreg,
+ .emit_reg_wait = gfx_v11_0_ring_emit_reg_wait,
+ .emit_reg_write_reg_wait = gfx_v11_0_ring_emit_reg_write_reg_wait,
+ .emit_mem_sync = gfx_v11_0_emit_mem_sync,
+ .reset = gfx_v11_0_reset_kgq,
+ .emit_cleaner_shader = gfx_v11_0_ring_emit_cleaner_shader,
+ .begin_use = gfx_v11_0_ring_begin_use,
+ .end_use = gfx_v11_0_ring_end_use,
+};
+
+static const struct amdgpu_ring_funcs gfx_v11_0_ring_funcs_compute = {
+ .type = AMDGPU_RING_TYPE_COMPUTE,
+ .align_mask = 0xff,
+ .nop = PACKET3(PACKET3_NOP, 0x3FFF),
+ .support_64bit_ptrs = true,
+ .get_rptr = gfx_v11_0_ring_get_rptr_compute,
+ .get_wptr = gfx_v11_0_ring_get_wptr_compute,
+ .set_wptr = gfx_v11_0_ring_set_wptr_compute,
+ .emit_frame_size =
+ 5 + /* update_spm_vmid */
+ 20 + /* gfx_v11_0_ring_emit_gds_switch */
+ 7 + /* gfx_v11_0_ring_emit_hdp_flush */
+ 5 + /* hdp invalidate */
+ 7 + /* gfx_v11_0_ring_emit_pipeline_sync */
+ SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 +
+ SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 +
+ 2 + /* gfx_v11_0_ring_emit_vm_flush */
+ 8 + 8 + 8 + /* gfx_v11_0_ring_emit_fence x3 for user fence, vm fence */
+ 8 + /* gfx_v11_0_emit_mem_sync */
+ 2, /* gfx_v11_0_ring_emit_cleaner_shader */
+ .emit_ib_size = 7, /* gfx_v11_0_ring_emit_ib_compute */
+ .emit_ib = gfx_v11_0_ring_emit_ib_compute,
+ .emit_fence = gfx_v11_0_ring_emit_fence,
+ .emit_pipeline_sync = gfx_v11_0_ring_emit_pipeline_sync,
+ .emit_vm_flush = gfx_v11_0_ring_emit_vm_flush,
+ .emit_gds_switch = gfx_v11_0_ring_emit_gds_switch,
+ .emit_hdp_flush = gfx_v11_0_ring_emit_hdp_flush,
+ .test_ring = gfx_v11_0_ring_test_ring,
+ .test_ib = gfx_v11_0_ring_test_ib,
+ .insert_nop = gfx_v11_ring_insert_nop,
+ .pad_ib = amdgpu_ring_generic_pad_ib,
+ .emit_wreg = gfx_v11_0_ring_emit_wreg,
+ .emit_reg_wait = gfx_v11_0_ring_emit_reg_wait,
+ .emit_reg_write_reg_wait = gfx_v11_0_ring_emit_reg_write_reg_wait,
+ .emit_mem_sync = gfx_v11_0_emit_mem_sync,
+ .reset = gfx_v11_0_reset_kcq,
+ .emit_cleaner_shader = gfx_v11_0_ring_emit_cleaner_shader,
+ .begin_use = gfx_v11_0_ring_begin_use,
+ .end_use = gfx_v11_0_ring_end_use,
+};
+
+static const struct amdgpu_ring_funcs gfx_v11_0_ring_funcs_kiq = {
+ .type = AMDGPU_RING_TYPE_KIQ,
+ .align_mask = 0xff,
+ .nop = PACKET3(PACKET3_NOP, 0x3FFF),
+ .support_64bit_ptrs = true,
+ .get_rptr = gfx_v11_0_ring_get_rptr_compute,
+ .get_wptr = gfx_v11_0_ring_get_wptr_compute,
+ .set_wptr = gfx_v11_0_ring_set_wptr_compute,
+ .emit_frame_size =
+ 20 + /* gfx_v11_0_ring_emit_gds_switch */
+ 7 + /* gfx_v11_0_ring_emit_hdp_flush */
+ 5 + /*hdp invalidate */
+ 7 + /* gfx_v11_0_ring_emit_pipeline_sync */
+ SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 +
+ SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 +
+ 8 + 8 + 8, /* gfx_v11_0_ring_emit_fence_kiq x3 for user fence, vm fence */
+ .emit_ib_size = 7, /* gfx_v11_0_ring_emit_ib_compute */
+ .emit_ib = gfx_v11_0_ring_emit_ib_compute,
+ .emit_fence = gfx_v11_0_ring_emit_fence_kiq,
+ .test_ring = gfx_v11_0_ring_test_ring,
+ .test_ib = gfx_v11_0_ring_test_ib,
+ .insert_nop = amdgpu_ring_insert_nop,
+ .pad_ib = amdgpu_ring_generic_pad_ib,
+ .emit_rreg = gfx_v11_0_ring_emit_rreg,
+ .emit_wreg = gfx_v11_0_ring_emit_wreg,
+ .emit_reg_wait = gfx_v11_0_ring_emit_reg_wait,
+ .emit_reg_write_reg_wait = gfx_v11_0_ring_emit_reg_write_reg_wait,
+ .emit_hdp_flush = gfx_v11_0_ring_emit_hdp_flush,
+};
+
+static void gfx_v11_0_set_ring_funcs(struct amdgpu_device *adev)
+{
+ int i;
+
+ adev->gfx.kiq[0].ring.funcs = &gfx_v11_0_ring_funcs_kiq;
+
+ for (i = 0; i < adev->gfx.num_gfx_rings; i++)
+ adev->gfx.gfx_ring[i].funcs = &gfx_v11_0_ring_funcs_gfx;
+
+ for (i = 0; i < adev->gfx.num_compute_rings; i++)
+ adev->gfx.compute_ring[i].funcs = &gfx_v11_0_ring_funcs_compute;
+}
+
+static const struct amdgpu_irq_src_funcs gfx_v11_0_eop_irq_funcs = {
+ .set = gfx_v11_0_set_eop_interrupt_state,
+ .process = gfx_v11_0_eop_irq,
+};
+
+static const struct amdgpu_irq_src_funcs gfx_v11_0_priv_reg_irq_funcs = {
+ .set = gfx_v11_0_set_priv_reg_fault_state,
+ .process = gfx_v11_0_priv_reg_irq,
+};
+
+static const struct amdgpu_irq_src_funcs gfx_v11_0_bad_op_irq_funcs = {
+ .set = gfx_v11_0_set_bad_op_fault_state,
+ .process = gfx_v11_0_bad_op_irq,
+};
+
+static const struct amdgpu_irq_src_funcs gfx_v11_0_priv_inst_irq_funcs = {
+ .set = gfx_v11_0_set_priv_inst_fault_state,
+ .process = gfx_v11_0_priv_inst_irq,
+};
+
+static const struct amdgpu_irq_src_funcs gfx_v11_0_rlc_gc_fed_irq_funcs = {
+ .process = gfx_v11_0_rlc_gc_fed_irq,
+};
+
+static void gfx_v11_0_set_irq_funcs(struct amdgpu_device *adev)
+{
+ adev->gfx.eop_irq.num_types = AMDGPU_CP_IRQ_LAST;
+ adev->gfx.eop_irq.funcs = &gfx_v11_0_eop_irq_funcs;
+
+ adev->gfx.priv_reg_irq.num_types = 1;
+ adev->gfx.priv_reg_irq.funcs = &gfx_v11_0_priv_reg_irq_funcs;
+
+ adev->gfx.bad_op_irq.num_types = 1;
+ adev->gfx.bad_op_irq.funcs = &gfx_v11_0_bad_op_irq_funcs;
+
+ adev->gfx.priv_inst_irq.num_types = 1;
+ adev->gfx.priv_inst_irq.funcs = &gfx_v11_0_priv_inst_irq_funcs;
+
+ adev->gfx.rlc_gc_fed_irq.num_types = 1; /* 0x80 FED error */
+ adev->gfx.rlc_gc_fed_irq.funcs = &gfx_v11_0_rlc_gc_fed_irq_funcs;
+
+}
+
+static void gfx_v11_0_set_imu_funcs(struct amdgpu_device *adev)
+{
+ if (adev->flags & AMD_IS_APU)
+ adev->gfx.imu.mode = MISSION_MODE;
+ else
+ adev->gfx.imu.mode = DEBUG_MODE;
+
+ adev->gfx.imu.funcs = &gfx_v11_0_imu_funcs;
+}
+
+static void gfx_v11_0_set_rlc_funcs(struct amdgpu_device *adev)
+{
+ adev->gfx.rlc.funcs = &gfx_v11_0_rlc_funcs;
+}
+
+static void gfx_v11_0_set_gds_init(struct amdgpu_device *adev)
+{
+ unsigned total_cu = adev->gfx.config.max_cu_per_sh *
+ adev->gfx.config.max_sh_per_se *
+ adev->gfx.config.max_shader_engines;
+
+ adev->gds.gds_size = 0x1000;
+ adev->gds.gds_compute_max_wave_id = total_cu * 32 - 1;
+ adev->gds.gws_size = 64;
+ adev->gds.oa_size = 16;
+}
+
+static void gfx_v11_0_set_mqd_funcs(struct amdgpu_device *adev)
+{
+ /* set gfx eng mqd */
+ adev->mqds[AMDGPU_HW_IP_GFX].mqd_size =
+ sizeof(struct v11_gfx_mqd);
+ adev->mqds[AMDGPU_HW_IP_GFX].init_mqd =
+ gfx_v11_0_gfx_mqd_init;
+ /* set compute eng mqd */
+ adev->mqds[AMDGPU_HW_IP_COMPUTE].mqd_size =
+ sizeof(struct v11_compute_mqd);
+ adev->mqds[AMDGPU_HW_IP_COMPUTE].init_mqd =
+ gfx_v11_0_compute_mqd_init;
+}
+
+static void gfx_v11_0_set_user_wgp_inactive_bitmap_per_sh(struct amdgpu_device *adev,
+ u32 bitmap)
+{
+ u32 data;
+
+ if (!bitmap)
+ return;
+
+ data = bitmap << GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_WGPS__SHIFT;
+ data &= GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_WGPS_MASK;
+
+ WREG32_SOC15(GC, 0, regGC_USER_SHADER_ARRAY_CONFIG, data);
+}
+
+static u32 gfx_v11_0_get_wgp_active_bitmap_per_sh(struct amdgpu_device *adev)
+{
+ u32 data, wgp_bitmask;
+ data = RREG32_SOC15(GC, 0, regCC_GC_SHADER_ARRAY_CONFIG);
+ data |= RREG32_SOC15(GC, 0, regGC_USER_SHADER_ARRAY_CONFIG);
+
+ data &= CC_GC_SHADER_ARRAY_CONFIG__INACTIVE_WGPS_MASK;
+ data >>= CC_GC_SHADER_ARRAY_CONFIG__INACTIVE_WGPS__SHIFT;
+
+ wgp_bitmask =
+ amdgpu_gfx_create_bitmask(adev->gfx.config.max_cu_per_sh >> 1);
+
+ return (~data) & wgp_bitmask;
+}
+
+static u32 gfx_v11_0_get_cu_active_bitmap_per_sh(struct amdgpu_device *adev)
+{
+ u32 wgp_idx, wgp_active_bitmap;
+ u32 cu_bitmap_per_wgp, cu_active_bitmap;
+
+ wgp_active_bitmap = gfx_v11_0_get_wgp_active_bitmap_per_sh(adev);
+ cu_active_bitmap = 0;
+
+ for (wgp_idx = 0; wgp_idx < 16; wgp_idx++) {
+ /* if there is one WGP enabled, it means 2 CUs will be enabled */
+ cu_bitmap_per_wgp = 3 << (2 * wgp_idx);
+ if (wgp_active_bitmap & (1 << wgp_idx))
+ cu_active_bitmap |= cu_bitmap_per_wgp;
+ }
+
+ return cu_active_bitmap;
+}
+
+static int gfx_v11_0_get_cu_info(struct amdgpu_device *adev,
+ struct amdgpu_cu_info *cu_info)
+{
+ int i, j, k, counter, active_cu_number = 0;
+ u32 mask, bitmap;
+ unsigned disable_masks[8 * 2];
+
+ if (!adev || !cu_info)
+ return -EINVAL;
+
+ amdgpu_gfx_parse_disable_cu(disable_masks, 8, 2);
+
+ mutex_lock(&adev->grbm_idx_mutex);
+ for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
+ for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
+ bitmap = i * adev->gfx.config.max_sh_per_se + j;
+ if (!((gfx_v11_0_get_sa_active_bitmap(adev) >> bitmap) & 1))
+ continue;
+ mask = 1;
+ counter = 0;
+ gfx_v11_0_select_se_sh(adev, i, j, 0xffffffff, 0);
+ if (i < 8 && j < 2)
+ gfx_v11_0_set_user_wgp_inactive_bitmap_per_sh(
+ adev, disable_masks[i * 2 + j]);
+ bitmap = gfx_v11_0_get_cu_active_bitmap_per_sh(adev);
+
+ /**
+ * GFX11 could support more than 4 SEs, while the bitmap
+ * in cu_info struct is 4x4 and ioctl interface struct
+ * drm_amdgpu_info_device should keep stable.
+ * So we use last two columns of bitmap to store cu mask for
+ * SEs 4 to 7, the layout of the bitmap is as below:
+ * SE0: {SH0,SH1} --> {bitmap[0][0], bitmap[0][1]}
+ * SE1: {SH0,SH1} --> {bitmap[1][0], bitmap[1][1]}
+ * SE2: {SH0,SH1} --> {bitmap[2][0], bitmap[2][1]}
+ * SE3: {SH0,SH1} --> {bitmap[3][0], bitmap[3][1]}
+ * SE4: {SH0,SH1} --> {bitmap[0][2], bitmap[0][3]}
+ * SE5: {SH0,SH1} --> {bitmap[1][2], bitmap[1][3]}
+ * SE6: {SH0,SH1} --> {bitmap[2][2], bitmap[2][3]}
+ * SE7: {SH0,SH1} --> {bitmap[3][2], bitmap[3][3]}
+ */
+ cu_info->bitmap[0][i % 4][j + (i / 4) * 2] = bitmap;
+
+ for (k = 0; k < adev->gfx.config.max_cu_per_sh; k++) {
+ if (bitmap & mask)
+ counter++;
+
+ mask <<= 1;
+ }
+ active_cu_number += counter;
+ }
+ }
+ gfx_v11_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, 0);
+ mutex_unlock(&adev->grbm_idx_mutex);
+
+ cu_info->number = active_cu_number;
+ cu_info->simd_per_cu = NUM_SIMD_PER_CU;
+
+ return 0;
+}
+
+const struct amdgpu_ip_block_version gfx_v11_0_ip_block =
+{
+ .type = AMD_IP_BLOCK_TYPE_GFX,
+ .major = 11,
+ .minor = 0,
+ .rev = 0,
+ .funcs = &gfx_v11_0_ip_funcs,
+};
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.h b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.h
new file mode 100644
index 000000000000..157a5c812259
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.h
@@ -0,0 +1,32 @@
+/*
+ * Copyright 2021 dvanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#ifndef __GFX_V11_0_H__
+#define __GFX_V11_0_H__
+
+extern const struct amdgpu_ip_block_version gfx_v11_0_ip_block;
+
+int gfx_v11_0_request_gfx_index_mutex(struct amdgpu_device *adev,
+ bool req);
+
+#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0_3.c b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0_3.c
new file mode 100644
index 000000000000..999bb3cc88b7
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0_3.c
@@ -0,0 +1,110 @@
+/*
+ * Copyright 2023 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#include "amdgpu.h"
+#include "soc21.h"
+#include "gc/gc_11_0_3_offset.h"
+#include "gc/gc_11_0_3_sh_mask.h"
+#include "ivsrcid/gfx/irqsrcs_gfx_11_0_0.h"
+#include "soc15.h"
+#include "soc15d.h"
+#include "gfx_v11_0.h"
+
+
+static int gfx_v11_0_3_rlc_gc_fed_irq(struct amdgpu_device *adev,
+ struct amdgpu_irq_src *source,
+ struct amdgpu_iv_entry *entry)
+{
+ uint32_t rlc_status0 = 0, rlc_status1 = 0;
+ struct ras_common_if *ras_if = NULL;
+ struct ras_dispatch_if ih_data = {
+ .entry = entry,
+ };
+
+ rlc_status0 = RREG32(SOC15_REG_OFFSET(GC, 0, regRLC_RLCS_FED_STATUS_0));
+ rlc_status1 = RREG32(SOC15_REG_OFFSET(GC, 0, regRLC_RLCS_FED_STATUS_1));
+
+ if (!rlc_status0 && !rlc_status1) {
+ dev_warn(adev->dev, "RLC_GC_FED irq is generated, but rlc_status0 and rlc_status1 are empty!\n");
+ return 0;
+ }
+
+ /* Use RLC_RLCS_FED_STATUS_0/1 to distinguish FED error block. */
+ if (REG_GET_FIELD(rlc_status0, RLC_RLCS_FED_STATUS_0, SDMA0_FED_ERR) ||
+ REG_GET_FIELD(rlc_status0, RLC_RLCS_FED_STATUS_0, SDMA1_FED_ERR))
+ ras_if = adev->sdma.ras_if;
+ else
+ ras_if = adev->gfx.ras_if;
+
+ if (!ras_if) {
+ dev_err(adev->dev, "Gfx or sdma ras block not initialized, rlc_status0:0x%x.\n",
+ rlc_status0);
+ return -EINVAL;
+ }
+
+ dev_warn(adev->dev, "RLC %s FED IRQ\n", ras_if->name);
+
+ if (!amdgpu_sriov_vf(adev)) {
+ ih_data.head = *ras_if;
+ amdgpu_ras_interrupt_dispatch(adev, &ih_data);
+ } else {
+ if (adev->virt.ops && adev->virt.ops->ras_poison_handler)
+ adev->virt.ops->ras_poison_handler(adev, ras_if->block);
+ else
+ dev_warn(adev->dev,
+ "No ras_poison_handler interface in SRIOV for %s!\n", ras_if->name);
+ }
+
+ return 0;
+}
+
+static int gfx_v11_0_3_poison_consumption_handler(struct amdgpu_device *adev,
+ struct amdgpu_iv_entry *entry)
+{
+ /* Workaround: when vmid and pasid are both zero, trigger gpu reset in KGD. */
+ if (entry && (entry->client_id == SOC21_IH_CLIENTID_GFX) &&
+ (entry->src_id == GFX_11_0_0__SRCID__RLC_GC_FED_INTERRUPT) &&
+ !entry->vmid && !entry->pasid) {
+ struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
+ uint32_t rlc_status0 = 0;
+
+ rlc_status0 = RREG32_SOC15(GC, 0, regRLC_RLCS_FED_STATUS_0);
+
+ if (REG_GET_FIELD(rlc_status0, RLC_RLCS_FED_STATUS_0, SDMA0_FED_ERR) ||
+ REG_GET_FIELD(rlc_status0, RLC_RLCS_FED_STATUS_0, SDMA1_FED_ERR)) {
+ struct amdgpu_ras *ras = amdgpu_ras_get_context(adev);
+
+ ras->gpu_reset_flags |= AMDGPU_RAS_GPU_RESET_MODE2_RESET;
+ }
+
+ if (con && !amdgpu_ras_is_rma(adev))
+ amdgpu_ras_reset_gpu(adev);
+ }
+
+ return 0;
+}
+
+struct amdgpu_gfx_ras gfx_v11_0_3_ras = {
+ .rlc_gc_fed_irq = gfx_v11_0_3_rlc_gc_fed_irq,
+ .poison_consumption_handler = gfx_v11_0_3_poison_consumption_handler,
+};
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0_3.h b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0_3.h
new file mode 100644
index 000000000000..672c7920b3d0
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0_3.h
@@ -0,0 +1,29 @@
+/*
+ * Copyright 2023 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#ifndef __GFX_V11_0_3_H__
+#define __GFX_V11_0_3_H__
+
+extern struct amdgpu_gfx_ras gfx_v11_0_3_ras;
+
+#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0_3_cleaner_shader.asm b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0_3_cleaner_shader.asm
new file mode 100644
index 000000000000..9b90b66368c7
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0_3_cleaner_shader.asm
@@ -0,0 +1,118 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright 2024 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+// This shader is to clean LDS, SGPRs and VGPRs. It is first 64 Dwords or 256 bytes of 192 Dwords cleaner shader.
+//To turn this shader program on for complitaion change this to main and lower shader main to main_1
+
+// Navi3 : Clear SGPRs, VGPRs and LDS
+// Launch 32 waves per CU (16 per SIMD) as a workgroup (threadgroup) to fill every wave slot
+// Waves are "wave32" and have 64 VGPRs each, which uses all 1024 VGPRs per SIMD
+// Waves are launched in "CU" mode, and the workgroup shares 64KB of LDS (half of the WGP's LDS)
+// It takes 2 workgroups to use all of LDS: one on each CU of the WGP
+// Each wave clears SGPRs 0 - 107
+// Each wave clears VGPRs 0 - 63
+// The first wave of the workgroup clears its 64KB of LDS
+// The shader starts with "S_BARRIER" to ensure SPI has launched all waves of the workgroup
+// before any wave in the workgroup could end. Without this, it is possible not all SGPRs get cleared.
+
+shader main
+ asic(GFX11)
+ type(CS)
+ wave_size(32)
+// Note: original source code from SQ team
+
+// Takes about 2500 clocks to run.
+// (theorhetical fastest = 1024clks vgpr + 640lds = 1660 clks)
+//
+ S_BARRIER
+
+ //
+ // CLEAR VGPRs
+ //
+ s_mov_b32 m0, 0x00000058 // Loop 96/8=12 times (loop unrolled for performance)
+
+label_0005:
+ v_movreld_b32 v0, 0
+ v_movreld_b32 v1, 0
+ v_movreld_b32 v2, 0
+ v_movreld_b32 v3, 0
+ v_movreld_b32 v4, 0
+ v_movreld_b32 v5, 0
+ v_movreld_b32 v6, 0
+ v_movreld_b32 v7, 0
+ s_sub_u32 m0, m0, 8
+ s_cbranch_scc0 label_0005
+ //
+ //
+
+ s_mov_b32 s2, 0x80000000 // Bit31 is first_wave
+ s_and_b32 s2, s2, s0 // sgpr0 has tg_size (first_wave) term as in ucode only COMPUTE_PGM_RSRC2.tg_size_en is set
+ s_cbranch_scc0 label_0023 // Clean LDS if its first wave of ThreadGroup/WorkGroup
+ // CLEAR LDS
+ //
+ s_mov_b32 exec_lo, 0xffffffff
+ s_mov_b32 exec_hi, 0xffffffff
+ v_mbcnt_lo_u32_b32 v1, exec_hi, 0 // Set V1 to thread-ID (0..63)
+ v_mbcnt_hi_u32_b32 v1, exec_lo, v1 // Set V1 to thread-ID (0..63)
+ v_mul_u32_u24 v1, 0x00000008, v1 // * 8, so each thread is a double-dword address (8byte)
+ s_mov_b32 s2, 0x00000003f // 64 loop iterations
+ s_mov_b32 m0, 0xffffffff
+ // Clear all of LDS space
+ // Each FirstWave of WorkGroup clears 64kbyte block
+
+label_001F:
+ ds_write2_b64 v1, v[2:3], v[2:3] offset1:32
+ ds_write2_b64 v1, v[4:5], v[4:5] offset0:64 offset1:96
+ v_add_co_u32 v1, vcc, 0x00000400, v1
+ s_sub_u32 s2, s2, 1
+ s_cbranch_scc0 label_001F
+ //
+ // CLEAR SGPRs
+ //
+label_0023:
+ s_mov_b32 m0, 0x00000068 // Loop 108/4=27 times (loop unrolled for performance)
+label_sgpr_loop:
+ s_movreld_b32 s0, 0
+ s_movreld_b32 s1, 0
+ s_movreld_b32 s2, 0
+ s_movreld_b32 s3, 0
+ s_sub_u32 m0, m0, 4
+ s_cbranch_scc0 label_sgpr_loop
+
+ //clear vcc
+ s_mov_b64 vcc, 0 //clear vcc
+ s_mov_b32 flat_scratch_lo, 0 //clear flat scratch lo SGPR
+ s_mov_b32 flat_scratch_hi, 0 //clear flat scratch hi SGPR
+ s_mov_b64 ttmp0, 0 //Clear ttmp0 and ttmp1
+ s_mov_b64 ttmp2, 0 //Clear ttmp2 and ttmp3
+ s_mov_b64 ttmp4, 0 //Clear ttmp4 and ttmp5
+ s_mov_b64 ttmp6, 0 //Clear ttmp6 and ttmp7
+ s_mov_b64 ttmp8, 0 //Clear ttmp8 and ttmp9
+ s_mov_b64 ttmp10, 0 //Clear ttmp10 and ttmp11
+ s_mov_b64 ttmp12, 0 //Clear ttmp12 and ttmp13
+ s_mov_b64 ttmp14, 0 //Clear ttmp14 and ttmp15
+
+ s_endpgm
+
+end
+
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0_cleaner_shader.h b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0_cleaner_shader.h
new file mode 100644
index 000000000000..3218cc04f543
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0_cleaner_shader.h
@@ -0,0 +1,56 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright 2024 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+/* Define the cleaner shader gfx_11_0_3 */
+static const u32 gfx_11_0_3_cleaner_shader_hex[] = {
+ 0xb0804006, 0xbe8200ff,
+ 0x00000058, 0xbefd0080,
+ 0x7e008480, 0x7e028480,
+ 0x7e048480, 0x7e068480,
+ 0x7e088480, 0x7e0a8480,
+ 0x7e0c8480, 0x7e0e8480,
+ 0xbefd0002, 0x80828802,
+ 0xbfa1fff5, 0xbe8200ff,
+ 0x80000000, 0x8b020002,
+ 0xbfa10012, 0xbefe00c1,
+ 0xbeff00c1, 0xd71f0001,
+ 0x0001007f, 0xd7200001,
+ 0x0002027e, 0x16020288,
+ 0xbe8200bf, 0xbefd00c1,
+ 0xd9382000, 0x00020201,
+ 0xd9386040, 0x00040401,
+ 0xd7006a01, 0x000202ff,
+ 0x00000400, 0x80828102,
+ 0xbfa1fff7, 0xbefd00ff,
+ 0x00000068, 0xbe804280,
+ 0xbe814280, 0xbe824280,
+ 0xbe834280, 0x80fd847d,
+ 0xbfa1fffa, 0xbeea0180,
+ 0xbeec0180, 0xbeee0180,
+ 0xbef00180, 0xbef20180,
+ 0xbef40180, 0xbef60180,
+ 0xbef80180, 0xbefa0180,
+ 0xbfb00000, 0xbf9f0000,
+ 0xbf9f0000, 0xbf9f0000,
+ 0xbf9f0000, 0xbf9f0000,
+};
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c
new file mode 100644
index 000000000000..d01d2712cf57
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c
@@ -0,0 +1,5793 @@
+/*
+ * Copyright 2023 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+#include <linux/delay.h>
+#include <linux/kernel.h>
+#include <linux/firmware.h>
+#include <linux/module.h>
+#include <linux/pci.h>
+#include "amdgpu.h"
+#include "amdgpu_gfx.h"
+#include "amdgpu_psp.h"
+#include "amdgpu_smu.h"
+#include "imu_v12_0.h"
+#include "soc24.h"
+#include "nvd.h"
+
+#include "gc/gc_12_0_0_offset.h"
+#include "gc/gc_12_0_0_sh_mask.h"
+#include "soc24_enum.h"
+#include "ivsrcid/gfx/irqsrcs_gfx_12_0_0.h"
+
+#include "soc15.h"
+#include "clearstate_gfx12.h"
+#include "v12_structs.h"
+#include "gfx_v12_0.h"
+#include "nbif_v6_3_1.h"
+#include "mes_v12_0.h"
+#include "mes_userqueue.h"
+#include "amdgpu_userq_fence.h"
+
+#define GFX12_NUM_GFX_RINGS 1
+#define GFX12_MEC_HPD_SIZE 2048
+
+#define RLCG_UCODE_LOADING_START_ADDRESS 0x00002000L
+
+#define regCP_GFX_MQD_CONTROL_DEFAULT 0x00000100
+#define regCP_GFX_HQD_VMID_DEFAULT 0x00000000
+#define regCP_GFX_HQD_QUEUE_PRIORITY_DEFAULT 0x00000000
+#define regCP_GFX_HQD_QUANTUM_DEFAULT 0x00000a01
+#define regCP_GFX_HQD_CNTL_DEFAULT 0x00f00000
+#define regCP_RB_DOORBELL_CONTROL_DEFAULT 0x00000000
+#define regCP_GFX_HQD_RPTR_DEFAULT 0x00000000
+
+#define regCP_HQD_EOP_CONTROL_DEFAULT 0x00000006
+#define regCP_HQD_PQ_DOORBELL_CONTROL_DEFAULT 0x00000000
+#define regCP_MQD_CONTROL_DEFAULT 0x00000100
+#define regCP_HQD_PQ_CONTROL_DEFAULT 0x00308509
+#define regCP_HQD_PQ_DOORBELL_CONTROL_DEFAULT 0x00000000
+#define regCP_HQD_PQ_RPTR_DEFAULT 0x00000000
+#define regCP_HQD_PERSISTENT_STATE_DEFAULT 0x0be05501
+#define regCP_HQD_IB_CONTROL_DEFAULT 0x00300000
+
+
+MODULE_FIRMWARE("amdgpu/gc_12_0_0_pfp.bin");
+MODULE_FIRMWARE("amdgpu/gc_12_0_0_me.bin");
+MODULE_FIRMWARE("amdgpu/gc_12_0_0_mec.bin");
+MODULE_FIRMWARE("amdgpu/gc_12_0_0_rlc.bin");
+MODULE_FIRMWARE("amdgpu/gc_12_0_0_toc.bin");
+MODULE_FIRMWARE("amdgpu/gc_12_0_1_pfp.bin");
+MODULE_FIRMWARE("amdgpu/gc_12_0_1_me.bin");
+MODULE_FIRMWARE("amdgpu/gc_12_0_1_mec.bin");
+MODULE_FIRMWARE("amdgpu/gc_12_0_1_rlc.bin");
+MODULE_FIRMWARE("amdgpu/gc_12_0_1_rlc_kicker.bin");
+MODULE_FIRMWARE("amdgpu/gc_12_0_1_toc.bin");
+
+static const struct amdgpu_hwip_reg_entry gc_reg_list_12_0[] = {
+ SOC15_REG_ENTRY_STR(GC, 0, regGRBM_STATUS),
+ SOC15_REG_ENTRY_STR(GC, 0, regGRBM_STATUS2),
+ SOC15_REG_ENTRY_STR(GC, 0, regGRBM_STATUS3),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_STALLED_STAT1),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_STALLED_STAT2),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_STALLED_STAT3),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_CPC_STALLED_STAT1),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_CPF_STALLED_STAT1),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_BUSY_STAT),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_CPC_BUSY_STAT),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_CPF_BUSY_STAT),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_CPC_BUSY_STAT2),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_CPF_BUSY_STAT2),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_CPF_STATUS),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_ERROR),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_HPD_STATUS0),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_RB_BASE),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_RB_RPTR),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_RB_WPTR),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_RB0_BASE),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_RB0_RPTR),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_RB0_WPTR),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_IB1_CMD_BUFSZ),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_IB2_CMD_BUFSZ),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_IB1_BASE_LO),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_IB1_BASE_HI),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_IB1_BUFSZ),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_IB2_BASE_LO),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_IB2_BASE_HI),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_IB2_BUFSZ),
+ SOC15_REG_ENTRY_STR(GC, 0, regCPF_UTCL1_STATUS),
+ SOC15_REG_ENTRY_STR(GC, 0, regCPC_UTCL1_STATUS),
+ SOC15_REG_ENTRY_STR(GC, 0, regCPG_UTCL1_STATUS),
+ SOC15_REG_ENTRY_STR(GC, 0, regIA_UTCL1_STATUS),
+ SOC15_REG_ENTRY_STR(GC, 0, regIA_UTCL1_STATUS_2),
+ SOC15_REG_ENTRY_STR(GC, 0, regPA_CL_CNTL_STATUS),
+ SOC15_REG_ENTRY_STR(GC, 0, regRMI_UTCL1_STATUS),
+ SOC15_REG_ENTRY_STR(GC, 0, regSQC_CACHES),
+ SOC15_REG_ENTRY_STR(GC, 0, regSQG_STATUS),
+ SOC15_REG_ENTRY_STR(GC, 0, regWD_UTCL1_STATUS),
+ SOC15_REG_ENTRY_STR(GC, 0, regGCVM_L2_PROTECTION_FAULT_CNTL),
+ SOC15_REG_ENTRY_STR(GC, 0, regGCVM_L2_PROTECTION_FAULT_STATUS_LO32),
+ SOC15_REG_ENTRY_STR(GC, 0, regGCVM_L2_PROTECTION_FAULT_STATUS_HI32),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_DEBUG),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_MEC_CNTL),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_MES_CNTL),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_MES_INSTR_PNTR),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_ME_INSTR_PNTR),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_PFP_INSTR_PNTR),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_CPC_STATUS),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_RS64_INSTR_PNTR0),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_RS64_INSTR_PNTR1),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_MEC_RS64_INSTR_PNTR),
+ /* cp header registers */
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_MES_HEADER_DUMP),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_MES_HEADER_DUMP),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_MES_HEADER_DUMP),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_MES_HEADER_DUMP),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_MES_HEADER_DUMP),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_MES_HEADER_DUMP),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_MES_HEADER_DUMP),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_MES_HEADER_DUMP),
+ /* SE status registers */
+ SOC15_REG_ENTRY_STR(GC, 0, regGRBM_STATUS_SE0),
+ SOC15_REG_ENTRY_STR(GC, 0, regGRBM_STATUS_SE1),
+ SOC15_REG_ENTRY_STR(GC, 0, regGRBM_STATUS_SE2),
+ SOC15_REG_ENTRY_STR(GC, 0, regGRBM_STATUS_SE3)
+};
+
+static const struct amdgpu_hwip_reg_entry gc_cp_reg_list_12[] = {
+ /* compute registers */
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_VMID),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_PERSISTENT_STATE),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_PIPE_PRIORITY),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_QUEUE_PRIORITY),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_QUANTUM),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_PQ_BASE),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_PQ_BASE_HI),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_PQ_RPTR),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_PQ_WPTR_POLL_ADDR),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_PQ_WPTR_POLL_ADDR_HI),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_PQ_DOORBELL_CONTROL),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_PQ_CONTROL),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_IB_BASE_ADDR),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_IB_BASE_ADDR_HI),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_IB_RPTR),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_IB_CONTROL),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_DEQUEUE_REQUEST),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_EOP_BASE_ADDR),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_EOP_BASE_ADDR_HI),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_EOP_CONTROL),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_EOP_RPTR),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_EOP_WPTR),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_EOP_EVENTS),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_CTX_SAVE_BASE_ADDR_LO),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_CTX_SAVE_BASE_ADDR_HI),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_CTX_SAVE_CONTROL),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_CNTL_STACK_OFFSET),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_CNTL_STACK_SIZE),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_WG_STATE_OFFSET),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_CTX_SAVE_SIZE),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_GDS_RESOURCE_STATE),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_ERROR),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_EOP_WPTR_MEM),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_PQ_WPTR_LO),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_PQ_WPTR_HI),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_SUSPEND_CNTL_STACK_OFFSET),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_SUSPEND_CNTL_STACK_DW_CNT),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_SUSPEND_WG_STATE_OFFSET),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_DEQUEUE_STATUS),
+ /* cp header registers */
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_MEC_ME1_HEADER_DUMP),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_MEC_ME1_HEADER_DUMP),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_MEC_ME1_HEADER_DUMP),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_MEC_ME1_HEADER_DUMP),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_MEC_ME1_HEADER_DUMP),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_MEC_ME1_HEADER_DUMP),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_MEC_ME1_HEADER_DUMP),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_MEC_ME1_HEADER_DUMP),
+};
+
+static const struct amdgpu_hwip_reg_entry gc_gfx_queue_reg_list_12[] = {
+ /* gfx queue registers */
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_HQD_ACTIVE),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_HQD_VMID),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_HQD_QUEUE_PRIORITY),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_HQD_QUANTUM),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_HQD_BASE),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_HQD_BASE_HI),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_HQD_OFFSET),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_HQD_CNTL),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_HQD_CSMD_RPTR),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_HQD_WPTR),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_HQD_WPTR_HI),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_HQD_DEQUEUE_REQUEST),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_HQD_MAPPED),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_HQD_QUE_MGR_CONTROL),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_HQD_HQ_CONTROL0),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_HQD_HQ_STATUS0),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_MQD_BASE_ADDR),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_MQD_BASE_ADDR_HI),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_RB_WPTR_POLL_ADDR_LO),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_RB_WPTR_POLL_ADDR_HI),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_RB_RPTR),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_IB1_BASE_LO),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_IB1_BASE_HI),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_IB1_CMD_BUFSZ),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_IB1_BUFSZ),
+ /* cp header registers */
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_PFP_HEADER_DUMP),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_PFP_HEADER_DUMP),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_PFP_HEADER_DUMP),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_PFP_HEADER_DUMP),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_PFP_HEADER_DUMP),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_PFP_HEADER_DUMP),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_PFP_HEADER_DUMP),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_PFP_HEADER_DUMP),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_ME_HEADER_DUMP),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_ME_HEADER_DUMP),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_ME_HEADER_DUMP),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_ME_HEADER_DUMP),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_ME_HEADER_DUMP),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_ME_HEADER_DUMP),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_ME_HEADER_DUMP),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_ME_HEADER_DUMP),
+};
+
+static const struct soc15_reg_golden golden_settings_gc_12_0_rev0[] = {
+ SOC15_REG_GOLDEN_VALUE(GC, 0, regDB_MEM_CONFIG, 0x0000000f, 0x0000000f),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, regCB_HW_CONTROL_1, 0x03000000, 0x03000000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, regGL2C_CTRL5, 0x00000070, 0x00000020)
+};
+
+static const struct soc15_reg_golden golden_settings_gc_12_0[] = {
+ SOC15_REG_GOLDEN_VALUE(GC, 0, regDB_MEM_CONFIG, 0x00008000, 0x00008000),
+};
+
+#define DEFAULT_SH_MEM_CONFIG \
+ ((SH_MEM_ADDRESS_MODE_64 << SH_MEM_CONFIG__ADDRESS_MODE__SHIFT) | \
+ (SH_MEM_ALIGNMENT_MODE_UNALIGNED << SH_MEM_CONFIG__ALIGNMENT_MODE__SHIFT) | \
+ (3 << SH_MEM_CONFIG__INITIAL_INST_PREFETCH__SHIFT))
+
+static void gfx_v12_0_disable_gpa_mode(struct amdgpu_device *adev);
+static void gfx_v12_0_set_ring_funcs(struct amdgpu_device *adev);
+static void gfx_v12_0_set_irq_funcs(struct amdgpu_device *adev);
+static void gfx_v12_0_set_rlc_funcs(struct amdgpu_device *adev);
+static void gfx_v12_0_set_mqd_funcs(struct amdgpu_device *adev);
+static void gfx_v12_0_set_imu_funcs(struct amdgpu_device *adev);
+static int gfx_v12_0_get_cu_info(struct amdgpu_device *adev,
+ struct amdgpu_cu_info *cu_info);
+static uint64_t gfx_v12_0_get_gpu_clock_counter(struct amdgpu_device *adev);
+static void gfx_v12_0_select_se_sh(struct amdgpu_device *adev, u32 se_num,
+ u32 sh_num, u32 instance, int xcc_id);
+static u32 gfx_v12_0_get_wgp_active_bitmap_per_sh(struct amdgpu_device *adev);
+
+static void gfx_v12_0_ring_emit_frame_cntl(struct amdgpu_ring *ring, bool start, bool secure);
+static void gfx_v12_0_ring_emit_wreg(struct amdgpu_ring *ring, uint32_t reg,
+ uint32_t val);
+static int gfx_v12_0_wait_for_rlc_autoload_complete(struct amdgpu_device *adev);
+static void gfx_v12_0_ring_invalidate_tlbs(struct amdgpu_ring *ring,
+ uint16_t pasid, uint32_t flush_type,
+ bool all_hub, uint8_t dst_sel);
+static void gfx_v12_0_set_safe_mode(struct amdgpu_device *adev, int xcc_id);
+static void gfx_v12_0_unset_safe_mode(struct amdgpu_device *adev, int xcc_id);
+static void gfx_v12_0_update_perf_clk(struct amdgpu_device *adev,
+ bool enable);
+
+static void gfx_v12_0_kiq_set_resources(struct amdgpu_ring *kiq_ring,
+ uint64_t queue_mask)
+{
+ amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_SET_RESOURCES, 6));
+ amdgpu_ring_write(kiq_ring, PACKET3_SET_RESOURCES_VMID_MASK(0) |
+ PACKET3_SET_RESOURCES_QUEUE_TYPE(0)); /* vmid_mask:0 queue_type:0 (KIQ) */
+ amdgpu_ring_write(kiq_ring, lower_32_bits(queue_mask)); /* queue mask lo */
+ amdgpu_ring_write(kiq_ring, upper_32_bits(queue_mask)); /* queue mask hi */
+ amdgpu_ring_write(kiq_ring, 0); /* gws mask lo */
+ amdgpu_ring_write(kiq_ring, 0); /* gws mask hi */
+ amdgpu_ring_write(kiq_ring, 0); /* oac mask */
+ amdgpu_ring_write(kiq_ring, 0);
+}
+
+static void gfx_v12_0_kiq_map_queues(struct amdgpu_ring *kiq_ring,
+ struct amdgpu_ring *ring)
+{
+ uint64_t mqd_addr = amdgpu_bo_gpu_offset(ring->mqd_obj);
+ uint64_t wptr_addr = ring->wptr_gpu_addr;
+ uint32_t me = 0, eng_sel = 0;
+
+ switch (ring->funcs->type) {
+ case AMDGPU_RING_TYPE_COMPUTE:
+ me = 1;
+ eng_sel = 0;
+ break;
+ case AMDGPU_RING_TYPE_GFX:
+ me = 0;
+ eng_sel = 4;
+ break;
+ case AMDGPU_RING_TYPE_MES:
+ me = 2;
+ eng_sel = 5;
+ break;
+ default:
+ WARN_ON(1);
+ }
+
+ amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_MAP_QUEUES, 5));
+ /* Q_sel:0, vmid:0, vidmem: 1, engine:0, num_Q:1*/
+ amdgpu_ring_write(kiq_ring, /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */
+ PACKET3_MAP_QUEUES_QUEUE_SEL(0) | /* Queue_Sel */
+ PACKET3_MAP_QUEUES_VMID(0) | /* VMID */
+ PACKET3_MAP_QUEUES_QUEUE(ring->queue) |
+ PACKET3_MAP_QUEUES_PIPE(ring->pipe) |
+ PACKET3_MAP_QUEUES_ME((me)) |
+ PACKET3_MAP_QUEUES_QUEUE_TYPE(0) | /*queue_type: normal compute queue */
+ PACKET3_MAP_QUEUES_ALLOC_FORMAT(0) | /* alloc format: all_on_one_pipe */
+ PACKET3_MAP_QUEUES_ENGINE_SEL(eng_sel) |
+ PACKET3_MAP_QUEUES_NUM_QUEUES(1)); /* num_queues: must be 1 */
+ amdgpu_ring_write(kiq_ring, PACKET3_MAP_QUEUES_DOORBELL_OFFSET(ring->doorbell_index));
+ amdgpu_ring_write(kiq_ring, lower_32_bits(mqd_addr));
+ amdgpu_ring_write(kiq_ring, upper_32_bits(mqd_addr));
+ amdgpu_ring_write(kiq_ring, lower_32_bits(wptr_addr));
+ amdgpu_ring_write(kiq_ring, upper_32_bits(wptr_addr));
+}
+
+static void gfx_v12_0_kiq_unmap_queues(struct amdgpu_ring *kiq_ring,
+ struct amdgpu_ring *ring,
+ enum amdgpu_unmap_queues_action action,
+ u64 gpu_addr, u64 seq)
+{
+ struct amdgpu_device *adev = kiq_ring->adev;
+ uint32_t eng_sel = ring->funcs->type == AMDGPU_RING_TYPE_GFX ? 4 : 0;
+
+ if (adev->enable_mes && !adev->gfx.kiq[0].ring.sched.ready) {
+ amdgpu_mes_unmap_legacy_queue(adev, ring, action, gpu_addr, seq);
+ return;
+ }
+
+ amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_UNMAP_QUEUES, 4));
+ amdgpu_ring_write(kiq_ring, /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */
+ PACKET3_UNMAP_QUEUES_ACTION(action) |
+ PACKET3_UNMAP_QUEUES_QUEUE_SEL(0) |
+ PACKET3_UNMAP_QUEUES_ENGINE_SEL(eng_sel) |
+ PACKET3_UNMAP_QUEUES_NUM_QUEUES(1));
+ amdgpu_ring_write(kiq_ring,
+ PACKET3_UNMAP_QUEUES_DOORBELL_OFFSET0(ring->doorbell_index));
+
+ if (action == PREEMPT_QUEUES_NO_UNMAP) {
+ amdgpu_ring_write(kiq_ring, lower_32_bits(gpu_addr));
+ amdgpu_ring_write(kiq_ring, upper_32_bits(gpu_addr));
+ amdgpu_ring_write(kiq_ring, seq);
+ } else {
+ amdgpu_ring_write(kiq_ring, 0);
+ amdgpu_ring_write(kiq_ring, 0);
+ amdgpu_ring_write(kiq_ring, 0);
+ }
+}
+
+static void gfx_v12_0_kiq_query_status(struct amdgpu_ring *kiq_ring,
+ struct amdgpu_ring *ring,
+ u64 addr, u64 seq)
+{
+ uint32_t eng_sel = ring->funcs->type == AMDGPU_RING_TYPE_GFX ? 4 : 0;
+
+ amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_QUERY_STATUS, 5));
+ amdgpu_ring_write(kiq_ring,
+ PACKET3_QUERY_STATUS_CONTEXT_ID(0) |
+ PACKET3_QUERY_STATUS_INTERRUPT_SEL(0) |
+ PACKET3_QUERY_STATUS_COMMAND(2));
+ amdgpu_ring_write(kiq_ring, /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */
+ PACKET3_QUERY_STATUS_DOORBELL_OFFSET(ring->doorbell_index) |
+ PACKET3_QUERY_STATUS_ENG_SEL(eng_sel));
+ amdgpu_ring_write(kiq_ring, lower_32_bits(addr));
+ amdgpu_ring_write(kiq_ring, upper_32_bits(addr));
+ amdgpu_ring_write(kiq_ring, lower_32_bits(seq));
+ amdgpu_ring_write(kiq_ring, upper_32_bits(seq));
+}
+
+static void gfx_v12_0_kiq_invalidate_tlbs(struct amdgpu_ring *kiq_ring,
+ uint16_t pasid,
+ uint32_t flush_type,
+ bool all_hub)
+{
+ gfx_v12_0_ring_invalidate_tlbs(kiq_ring, pasid, flush_type, all_hub, 1);
+}
+
+static const struct kiq_pm4_funcs gfx_v12_0_kiq_pm4_funcs = {
+ .kiq_set_resources = gfx_v12_0_kiq_set_resources,
+ .kiq_map_queues = gfx_v12_0_kiq_map_queues,
+ .kiq_unmap_queues = gfx_v12_0_kiq_unmap_queues,
+ .kiq_query_status = gfx_v12_0_kiq_query_status,
+ .kiq_invalidate_tlbs = gfx_v12_0_kiq_invalidate_tlbs,
+ .set_resources_size = 8,
+ .map_queues_size = 7,
+ .unmap_queues_size = 6,
+ .query_status_size = 7,
+ .invalidate_tlbs_size = 2,
+};
+
+static void gfx_v12_0_set_kiq_pm4_funcs(struct amdgpu_device *adev)
+{
+ adev->gfx.kiq[0].pmf = &gfx_v12_0_kiq_pm4_funcs;
+}
+
+static void gfx_v12_0_wait_reg_mem(struct amdgpu_ring *ring, int eng_sel,
+ int mem_space, int opt, uint32_t addr0,
+ uint32_t addr1, uint32_t ref,
+ uint32_t mask, uint32_t inv)
+{
+ amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
+ amdgpu_ring_write(ring,
+ /* memory (1) or register (0) */
+ (WAIT_REG_MEM_MEM_SPACE(mem_space) |
+ WAIT_REG_MEM_OPERATION(opt) | /* wait */
+ WAIT_REG_MEM_FUNCTION(3) | /* equal */
+ WAIT_REG_MEM_ENGINE(eng_sel)));
+
+ if (mem_space)
+ BUG_ON(addr0 & 0x3); /* Dword align */
+ amdgpu_ring_write(ring, addr0);
+ amdgpu_ring_write(ring, addr1);
+ amdgpu_ring_write(ring, ref);
+ amdgpu_ring_write(ring, mask);
+ amdgpu_ring_write(ring, inv); /* poll interval */
+}
+
+static int gfx_v12_0_ring_test_ring(struct amdgpu_ring *ring)
+{
+ struct amdgpu_device *adev = ring->adev;
+ uint32_t scratch = SOC15_REG_OFFSET(GC, 0, regSCRATCH_REG0);
+ uint32_t tmp = 0;
+ unsigned i;
+ int r;
+
+ WREG32(scratch, 0xCAFEDEAD);
+ r = amdgpu_ring_alloc(ring, 5);
+ if (r) {
+ dev_err(adev->dev,
+ "amdgpu: cp failed to lock ring %d (%d).\n",
+ ring->idx, r);
+ return r;
+ }
+
+ if (ring->funcs->type == AMDGPU_RING_TYPE_KIQ) {
+ gfx_v12_0_ring_emit_wreg(ring, scratch, 0xDEADBEEF);
+ } else {
+ amdgpu_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
+ amdgpu_ring_write(ring, scratch -
+ PACKET3_SET_UCONFIG_REG_START);
+ amdgpu_ring_write(ring, 0xDEADBEEF);
+ }
+ amdgpu_ring_commit(ring);
+
+ for (i = 0; i < adev->usec_timeout; i++) {
+ tmp = RREG32(scratch);
+ if (tmp == 0xDEADBEEF)
+ break;
+ if (amdgpu_emu_mode == 1)
+ msleep(1);
+ else
+ udelay(1);
+ }
+
+ if (i >= adev->usec_timeout)
+ r = -ETIMEDOUT;
+ return r;
+}
+
+static int gfx_v12_0_ring_test_ib(struct amdgpu_ring *ring, long timeout)
+{
+ struct amdgpu_device *adev = ring->adev;
+ struct amdgpu_ib ib;
+ struct dma_fence *f = NULL;
+ unsigned index;
+ uint64_t gpu_addr;
+ uint32_t *cpu_ptr;
+ long r;
+
+ /* MES KIQ fw hasn't indirect buffer support for now */
+ if (adev->enable_mes_kiq &&
+ ring->funcs->type == AMDGPU_RING_TYPE_KIQ)
+ return 0;
+
+ memset(&ib, 0, sizeof(ib));
+
+ r = amdgpu_device_wb_get(adev, &index);
+ if (r)
+ return r;
+
+ gpu_addr = adev->wb.gpu_addr + (index * 4);
+ adev->wb.wb[index] = cpu_to_le32(0xCAFEDEAD);
+ cpu_ptr = &adev->wb.wb[index];
+
+ r = amdgpu_ib_get(adev, NULL, 16, AMDGPU_IB_POOL_DIRECT, &ib);
+ if (r) {
+ dev_err(adev->dev, "amdgpu: failed to get ib (%ld).\n", r);
+ goto err1;
+ }
+
+ ib.ptr[0] = PACKET3(PACKET3_WRITE_DATA, 3);
+ ib.ptr[1] = WRITE_DATA_DST_SEL(5) | WR_CONFIRM;
+ ib.ptr[2] = lower_32_bits(gpu_addr);
+ ib.ptr[3] = upper_32_bits(gpu_addr);
+ ib.ptr[4] = 0xDEADBEEF;
+ ib.length_dw = 5;
+
+ r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f);
+ if (r)
+ goto err2;
+
+ r = dma_fence_wait_timeout(f, false, timeout);
+ if (r == 0) {
+ r = -ETIMEDOUT;
+ goto err2;
+ } else if (r < 0) {
+ goto err2;
+ }
+
+ if (le32_to_cpu(*cpu_ptr) == 0xDEADBEEF)
+ r = 0;
+ else
+ r = -EINVAL;
+err2:
+ amdgpu_ib_free(&ib, NULL);
+ dma_fence_put(f);
+err1:
+ amdgpu_device_wb_free(adev, index);
+ return r;
+}
+
+static void gfx_v12_0_free_microcode(struct amdgpu_device *adev)
+{
+ amdgpu_ucode_release(&adev->gfx.pfp_fw);
+ amdgpu_ucode_release(&adev->gfx.me_fw);
+ amdgpu_ucode_release(&adev->gfx.rlc_fw);
+ amdgpu_ucode_release(&adev->gfx.mec_fw);
+
+ kfree(adev->gfx.rlc.register_list_format);
+}
+
+static int gfx_v12_0_init_toc_microcode(struct amdgpu_device *adev, const char *ucode_prefix)
+{
+ const struct psp_firmware_header_v1_0 *toc_hdr;
+ int err = 0;
+
+ err = amdgpu_ucode_request(adev, &adev->psp.toc_fw,
+ AMDGPU_UCODE_REQUIRED,
+ "amdgpu/%s_toc.bin", ucode_prefix);
+ if (err)
+ goto out;
+
+ toc_hdr = (const struct psp_firmware_header_v1_0 *)adev->psp.toc_fw->data;
+ adev->psp.toc.fw_version = le32_to_cpu(toc_hdr->header.ucode_version);
+ adev->psp.toc.feature_version = le32_to_cpu(toc_hdr->sos.fw_version);
+ adev->psp.toc.size_bytes = le32_to_cpu(toc_hdr->header.ucode_size_bytes);
+ adev->psp.toc.start_addr = (uint8_t *)toc_hdr +
+ le32_to_cpu(toc_hdr->header.ucode_array_offset_bytes);
+ return 0;
+out:
+ amdgpu_ucode_release(&adev->psp.toc_fw);
+ return err;
+}
+
+static int gfx_v12_0_init_microcode(struct amdgpu_device *adev)
+{
+ char ucode_prefix[30];
+ int err;
+ const struct rlc_firmware_header_v2_0 *rlc_hdr;
+ uint16_t version_major;
+ uint16_t version_minor;
+
+ DRM_DEBUG("\n");
+
+ amdgpu_ucode_ip_version_decode(adev, GC_HWIP, ucode_prefix, sizeof(ucode_prefix));
+
+ err = amdgpu_ucode_request(adev, &adev->gfx.pfp_fw,
+ AMDGPU_UCODE_REQUIRED,
+ "amdgpu/%s_pfp.bin", ucode_prefix);
+ if (err)
+ goto out;
+ amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_RS64_PFP);
+ amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_RS64_PFP_P0_STACK);
+
+ err = amdgpu_ucode_request(adev, &adev->gfx.me_fw,
+ AMDGPU_UCODE_REQUIRED,
+ "amdgpu/%s_me.bin", ucode_prefix);
+ if (err)
+ goto out;
+ amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_RS64_ME);
+ amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_RS64_ME_P0_STACK);
+
+ if (!amdgpu_sriov_vf(adev)) {
+ if (amdgpu_is_kicker_fw(adev))
+ err = amdgpu_ucode_request(adev, &adev->gfx.rlc_fw,
+ AMDGPU_UCODE_REQUIRED,
+ "amdgpu/%s_rlc_kicker.bin", ucode_prefix);
+ else
+ err = amdgpu_ucode_request(adev, &adev->gfx.rlc_fw,
+ AMDGPU_UCODE_REQUIRED,
+ "amdgpu/%s_rlc.bin", ucode_prefix);
+ if (err)
+ goto out;
+ rlc_hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data;
+ version_major = le16_to_cpu(rlc_hdr->header.header_version_major);
+ version_minor = le16_to_cpu(rlc_hdr->header.header_version_minor);
+ err = amdgpu_gfx_rlc_init_microcode(adev, version_major, version_minor);
+ if (err)
+ goto out;
+ }
+
+ err = amdgpu_ucode_request(adev, &adev->gfx.mec_fw,
+ AMDGPU_UCODE_REQUIRED,
+ "amdgpu/%s_mec.bin", ucode_prefix);
+ if (err)
+ goto out;
+ amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_RS64_MEC);
+ amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_RS64_MEC_P0_STACK);
+ amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_RS64_MEC_P1_STACK);
+
+ if (adev->firmware.load_type == AMDGPU_FW_LOAD_RLC_BACKDOOR_AUTO)
+ err = gfx_v12_0_init_toc_microcode(adev, ucode_prefix);
+
+ /* only one MEC for gfx 12 */
+ adev->gfx.mec2_fw = NULL;
+
+ if (adev->gfx.imu.funcs) {
+ if (adev->gfx.imu.funcs->init_microcode) {
+ err = adev->gfx.imu.funcs->init_microcode(adev);
+ if (err)
+ dev_err(adev->dev, "Failed to load imu firmware!\n");
+ }
+ }
+
+out:
+ if (err) {
+ amdgpu_ucode_release(&adev->gfx.pfp_fw);
+ amdgpu_ucode_release(&adev->gfx.me_fw);
+ amdgpu_ucode_release(&adev->gfx.rlc_fw);
+ amdgpu_ucode_release(&adev->gfx.mec_fw);
+ }
+
+ return err;
+}
+
+static u32 gfx_v12_0_get_csb_size(struct amdgpu_device *adev)
+{
+ u32 count = 0;
+ const struct cs_section_def *sect = NULL;
+ const struct cs_extent_def *ext = NULL;
+
+ count += 1;
+
+ for (sect = gfx12_cs_data; sect->section != NULL; ++sect) {
+ if (sect->id == SECT_CONTEXT) {
+ for (ext = sect->section; ext->extent != NULL; ++ext)
+ count += 2 + ext->reg_count;
+ } else
+ return 0;
+ }
+
+ return count;
+}
+
+static void gfx_v12_0_get_csb_buffer(struct amdgpu_device *adev, u32 *buffer)
+{
+ u32 count = 0, clustercount = 0, i;
+ const struct cs_section_def *sect = NULL;
+ const struct cs_extent_def *ext = NULL;
+
+ if (adev->gfx.rlc.cs_data == NULL)
+ return;
+ if (buffer == NULL)
+ return;
+
+ count += 1;
+
+ for (sect = adev->gfx.rlc.cs_data; sect->section != NULL; ++sect) {
+ if (sect->id == SECT_CONTEXT) {
+ for (ext = sect->section; ext->extent != NULL; ++ext) {
+ clustercount++;
+ buffer[count++] = ext->reg_count;
+ buffer[count++] = ext->reg_index;
+
+ for (i = 0; i < ext->reg_count; i++)
+ buffer[count++] = cpu_to_le32(ext->extent[i]);
+ }
+ } else
+ return;
+ }
+
+ buffer[0] = clustercount;
+}
+
+static void gfx_v12_0_rlc_fini(struct amdgpu_device *adev)
+{
+ /* clear state block */
+ amdgpu_bo_free_kernel(&adev->gfx.rlc.clear_state_obj,
+ &adev->gfx.rlc.clear_state_gpu_addr,
+ (void **)&adev->gfx.rlc.cs_ptr);
+
+ /* jump table block */
+ amdgpu_bo_free_kernel(&adev->gfx.rlc.cp_table_obj,
+ &adev->gfx.rlc.cp_table_gpu_addr,
+ (void **)&adev->gfx.rlc.cp_table_ptr);
+}
+
+static void gfx_v12_0_init_rlcg_reg_access_ctrl(struct amdgpu_device *adev)
+{
+ struct amdgpu_rlcg_reg_access_ctrl *reg_access_ctrl;
+
+ reg_access_ctrl = &adev->gfx.rlc.reg_access_ctrl[0];
+ reg_access_ctrl->scratch_reg0 = SOC15_REG_OFFSET(GC, 0, regSCRATCH_REG0);
+ reg_access_ctrl->scratch_reg1 = SOC15_REG_OFFSET(GC, 0, regSCRATCH_REG1);
+ reg_access_ctrl->scratch_reg2 = SOC15_REG_OFFSET(GC, 0, regSCRATCH_REG2);
+ reg_access_ctrl->scratch_reg3 = SOC15_REG_OFFSET(GC, 0, regSCRATCH_REG3);
+ reg_access_ctrl->grbm_cntl = SOC15_REG_OFFSET(GC, 0, regGRBM_GFX_CNTL);
+ reg_access_ctrl->grbm_idx = SOC15_REG_OFFSET(GC, 0, regGRBM_GFX_INDEX);
+ reg_access_ctrl->spare_int = SOC15_REG_OFFSET(GC, 0, regRLC_SPARE_INT_0);
+ adev->gfx.rlc.rlcg_reg_access_supported = true;
+}
+
+static int gfx_v12_0_rlc_init(struct amdgpu_device *adev)
+{
+ const struct cs_section_def *cs_data;
+ int r;
+
+ adev->gfx.rlc.cs_data = gfx12_cs_data;
+
+ cs_data = adev->gfx.rlc.cs_data;
+
+ if (cs_data) {
+ /* init clear state block */
+ r = amdgpu_gfx_rlc_init_csb(adev);
+ if (r)
+ return r;
+ }
+
+ /* init spm vmid with 0xf */
+ if (adev->gfx.rlc.funcs->update_spm_vmid)
+ adev->gfx.rlc.funcs->update_spm_vmid(adev, NULL, 0xf);
+
+ return 0;
+}
+
+static void gfx_v12_0_mec_fini(struct amdgpu_device *adev)
+{
+ amdgpu_bo_free_kernel(&adev->gfx.mec.hpd_eop_obj, NULL, NULL);
+ amdgpu_bo_free_kernel(&adev->gfx.mec.mec_fw_obj, NULL, NULL);
+ amdgpu_bo_free_kernel(&adev->gfx.mec.mec_fw_data_obj, NULL, NULL);
+}
+
+static void gfx_v12_0_me_init(struct amdgpu_device *adev)
+{
+ bitmap_zero(adev->gfx.me.queue_bitmap, AMDGPU_MAX_GFX_QUEUES);
+
+ amdgpu_gfx_graphics_queue_acquire(adev);
+}
+
+static int gfx_v12_0_mec_init(struct amdgpu_device *adev)
+{
+ int r;
+ u32 *hpd;
+ size_t mec_hpd_size;
+
+ bitmap_zero(adev->gfx.mec_bitmap[0].queue_bitmap, AMDGPU_MAX_COMPUTE_QUEUES);
+
+ /* take ownership of the relevant compute queues */
+ amdgpu_gfx_compute_queue_acquire(adev);
+ mec_hpd_size = adev->gfx.num_compute_rings * GFX12_MEC_HPD_SIZE;
+
+ if (mec_hpd_size) {
+ r = amdgpu_bo_create_reserved(adev, mec_hpd_size, PAGE_SIZE,
+ AMDGPU_GEM_DOMAIN_GTT,
+ &adev->gfx.mec.hpd_eop_obj,
+ &adev->gfx.mec.hpd_eop_gpu_addr,
+ (void **)&hpd);
+ if (r) {
+ dev_warn(adev->dev, "(%d) create HDP EOP bo failed\n", r);
+ gfx_v12_0_mec_fini(adev);
+ return r;
+ }
+
+ memset(hpd, 0, mec_hpd_size);
+
+ amdgpu_bo_kunmap(adev->gfx.mec.hpd_eop_obj);
+ amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj);
+ }
+
+ return 0;
+}
+
+static uint32_t wave_read_ind(struct amdgpu_device *adev, uint32_t wave, uint32_t address)
+{
+ WREG32_SOC15(GC, 0, regSQ_IND_INDEX,
+ (wave << SQ_IND_INDEX__WAVE_ID__SHIFT) |
+ (address << SQ_IND_INDEX__INDEX__SHIFT));
+ return RREG32_SOC15(GC, 0, regSQ_IND_DATA);
+}
+
+static void wave_read_regs(struct amdgpu_device *adev, uint32_t wave,
+ uint32_t thread, uint32_t regno,
+ uint32_t num, uint32_t *out)
+{
+ WREG32_SOC15(GC, 0, regSQ_IND_INDEX,
+ (wave << SQ_IND_INDEX__WAVE_ID__SHIFT) |
+ (regno << SQ_IND_INDEX__INDEX__SHIFT) |
+ (thread << SQ_IND_INDEX__WORKITEM_ID__SHIFT) |
+ (SQ_IND_INDEX__AUTO_INCR_MASK));
+ while (num--)
+ *(out++) = RREG32_SOC15(GC, 0, regSQ_IND_DATA);
+}
+
+static void gfx_v12_0_read_wave_data(struct amdgpu_device *adev,
+ uint32_t xcc_id,
+ uint32_t simd, uint32_t wave,
+ uint32_t *dst, int *no_fields)
+{
+ /* in gfx12 the SIMD_ID is specified as part of the INSTANCE
+ * field when performing a select_se_sh so it should be
+ * zero here */
+ WARN_ON(simd != 0);
+
+ /* type 4 wave data */
+ dst[(*no_fields)++] = 4;
+ dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_STATUS);
+ dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_PC_LO);
+ dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_PC_HI);
+ dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_EXEC_LO);
+ dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_EXEC_HI);
+ dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_HW_ID1);
+ dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_HW_ID2);
+ dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_GPR_ALLOC);
+ dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_LDS_ALLOC);
+ dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_IB_STS);
+ dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_IB_STS2);
+ dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_IB_DBG1);
+ dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_M0);
+ dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_MODE);
+ dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_STATE_PRIV);
+ dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_EXCP_FLAG_PRIV);
+ dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_EXCP_FLAG_USER);
+ dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_TRAP_CTRL);
+ dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_ACTIVE);
+ dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_VALID_AND_IDLE);
+ dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_DVGPR_ALLOC_LO);
+ dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_DVGPR_ALLOC_HI);
+ dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_SCHED_MODE);
+}
+
+static void gfx_v12_0_read_wave_sgprs(struct amdgpu_device *adev,
+ uint32_t xcc_id, uint32_t simd,
+ uint32_t wave, uint32_t start,
+ uint32_t size, uint32_t *dst)
+{
+ WARN_ON(simd != 0);
+
+ wave_read_regs(
+ adev, wave, 0, start + SQIND_WAVE_SGPRS_OFFSET, size,
+ dst);
+}
+
+static void gfx_v12_0_read_wave_vgprs(struct amdgpu_device *adev,
+ uint32_t xcc_id, uint32_t simd,
+ uint32_t wave, uint32_t thread,
+ uint32_t start, uint32_t size,
+ uint32_t *dst)
+{
+ wave_read_regs(
+ adev, wave, thread,
+ start + SQIND_WAVE_VGPRS_OFFSET, size, dst);
+}
+
+static void gfx_v12_0_select_me_pipe_q(struct amdgpu_device *adev,
+ u32 me, u32 pipe, u32 q, u32 vm, u32 xcc_id)
+{
+ soc24_grbm_select(adev, me, pipe, q, vm);
+}
+
+/* all sizes are in bytes */
+#define MQD_SHADOW_BASE_SIZE 73728
+#define MQD_SHADOW_BASE_ALIGNMENT 256
+#define MQD_FWWORKAREA_SIZE 484
+#define MQD_FWWORKAREA_ALIGNMENT 256
+
+static void gfx_v12_0_get_gfx_shadow_info_nocheck(struct amdgpu_device *adev,
+ struct amdgpu_gfx_shadow_info *shadow_info)
+{
+ shadow_info->shadow_size = MQD_SHADOW_BASE_SIZE;
+ shadow_info->shadow_alignment = MQD_SHADOW_BASE_ALIGNMENT;
+ shadow_info->csa_size = MQD_FWWORKAREA_SIZE;
+ shadow_info->csa_alignment = MQD_FWWORKAREA_ALIGNMENT;
+}
+
+static int gfx_v12_0_get_gfx_shadow_info(struct amdgpu_device *adev,
+ struct amdgpu_gfx_shadow_info *shadow_info,
+ bool skip_check)
+{
+ if (adev->gfx.cp_gfx_shadow || skip_check) {
+ gfx_v12_0_get_gfx_shadow_info_nocheck(adev, shadow_info);
+ return 0;
+ }
+
+ memset(shadow_info, 0, sizeof(struct amdgpu_gfx_shadow_info));
+ return -EINVAL;
+}
+
+static const struct amdgpu_gfx_funcs gfx_v12_0_gfx_funcs = {
+ .get_gpu_clock_counter = &gfx_v12_0_get_gpu_clock_counter,
+ .select_se_sh = &gfx_v12_0_select_se_sh,
+ .read_wave_data = &gfx_v12_0_read_wave_data,
+ .read_wave_sgprs = &gfx_v12_0_read_wave_sgprs,
+ .read_wave_vgprs = &gfx_v12_0_read_wave_vgprs,
+ .select_me_pipe_q = &gfx_v12_0_select_me_pipe_q,
+ .update_perfmon_mgcg = &gfx_v12_0_update_perf_clk,
+ .get_gfx_shadow_info = &gfx_v12_0_get_gfx_shadow_info,
+};
+
+static int gfx_v12_0_gpu_early_init(struct amdgpu_device *adev)
+{
+
+ switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
+ case IP_VERSION(12, 0, 0):
+ case IP_VERSION(12, 0, 1):
+ adev->gfx.config.max_hw_contexts = 8;
+ adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
+ adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
+ adev->gfx.config.sc_hiz_tile_fifo_size = 0;
+ adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0;
+ break;
+ default:
+ BUG();
+ break;
+ }
+
+ return 0;
+}
+
+static int gfx_v12_0_gfx_ring_init(struct amdgpu_device *adev, int ring_id,
+ int me, int pipe, int queue)
+{
+ int r;
+ struct amdgpu_ring *ring;
+ unsigned int irq_type;
+
+ ring = &adev->gfx.gfx_ring[ring_id];
+
+ ring->me = me;
+ ring->pipe = pipe;
+ ring->queue = queue;
+
+ ring->ring_obj = NULL;
+ ring->use_doorbell = true;
+
+ if (!ring_id)
+ ring->doorbell_index = adev->doorbell_index.gfx_ring0 << 1;
+ else
+ ring->doorbell_index = adev->doorbell_index.gfx_ring1 << 1;
+ ring->vm_hub = AMDGPU_GFXHUB(0);
+ sprintf(ring->name, "gfx_%d.%d.%d", ring->me, ring->pipe, ring->queue);
+
+ irq_type = AMDGPU_CP_IRQ_GFX_ME0_PIPE0_EOP + ring->pipe;
+ r = amdgpu_ring_init(adev, ring, 1024, &adev->gfx.eop_irq, irq_type,
+ AMDGPU_RING_PRIO_DEFAULT, NULL);
+ if (r)
+ return r;
+ return 0;
+}
+
+static int gfx_v12_0_compute_ring_init(struct amdgpu_device *adev, int ring_id,
+ int mec, int pipe, int queue)
+{
+ int r;
+ unsigned irq_type;
+ struct amdgpu_ring *ring;
+ unsigned int hw_prio;
+
+ ring = &adev->gfx.compute_ring[ring_id];
+
+ /* mec0 is me1 */
+ ring->me = mec + 1;
+ ring->pipe = pipe;
+ ring->queue = queue;
+
+ ring->ring_obj = NULL;
+ ring->use_doorbell = true;
+ ring->doorbell_index = (adev->doorbell_index.mec_ring0 + ring_id) << 1;
+ ring->eop_gpu_addr = adev->gfx.mec.hpd_eop_gpu_addr
+ + (ring_id * GFX12_MEC_HPD_SIZE);
+ ring->vm_hub = AMDGPU_GFXHUB(0);
+ sprintf(ring->name, "comp_%d.%d.%d", ring->me, ring->pipe, ring->queue);
+
+ irq_type = AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP
+ + ((ring->me - 1) * adev->gfx.mec.num_pipe_per_mec)
+ + ring->pipe;
+ hw_prio = amdgpu_gfx_is_high_priority_compute_queue(adev, ring) ?
+ AMDGPU_GFX_PIPE_PRIO_HIGH : AMDGPU_GFX_PIPE_PRIO_NORMAL;
+ /* type-2 packets are deprecated on MEC, use type-3 instead */
+ r = amdgpu_ring_init(adev, ring, 1024, &adev->gfx.eop_irq, irq_type,
+ hw_prio, NULL);
+ if (r)
+ return r;
+
+ return 0;
+}
+
+static struct {
+ SOC24_FIRMWARE_ID id;
+ unsigned int offset;
+ unsigned int size;
+ unsigned int size_x16;
+} rlc_autoload_info[SOC24_FIRMWARE_ID_MAX];
+
+#define RLC_TOC_OFFSET_DWUNIT 8
+#define RLC_SIZE_MULTIPLE 1024
+#define RLC_TOC_UMF_SIZE_inM 23ULL
+#define RLC_TOC_FORMAT_API 165ULL
+
+static void gfx_v12_0_parse_rlc_toc(struct amdgpu_device *adev, void *rlc_toc)
+{
+ RLC_TABLE_OF_CONTENT_V2 *ucode = rlc_toc;
+
+ while (ucode && (ucode->id > SOC24_FIRMWARE_ID_INVALID)) {
+ rlc_autoload_info[ucode->id].id = ucode->id;
+ rlc_autoload_info[ucode->id].offset =
+ ucode->offset * RLC_TOC_OFFSET_DWUNIT * 4;
+ rlc_autoload_info[ucode->id].size =
+ ucode->size_x16 ? ucode->size * RLC_SIZE_MULTIPLE * 4 :
+ ucode->size * 4;
+ ucode++;
+ }
+}
+
+static uint32_t gfx_v12_0_calc_toc_total_size(struct amdgpu_device *adev)
+{
+ uint32_t total_size = 0;
+ SOC24_FIRMWARE_ID id;
+
+ gfx_v12_0_parse_rlc_toc(adev, adev->psp.toc.start_addr);
+
+ for (id = SOC24_FIRMWARE_ID_RLC_G_UCODE; id < SOC24_FIRMWARE_ID_MAX; id++)
+ total_size += rlc_autoload_info[id].size;
+
+ /* In case the offset in rlc toc ucode is aligned */
+ if (total_size < rlc_autoload_info[SOC24_FIRMWARE_ID_MAX-1].offset)
+ total_size = rlc_autoload_info[SOC24_FIRMWARE_ID_MAX-1].offset +
+ rlc_autoload_info[SOC24_FIRMWARE_ID_MAX-1].size;
+ if (total_size < (RLC_TOC_UMF_SIZE_inM << 20))
+ total_size = RLC_TOC_UMF_SIZE_inM << 20;
+
+ return total_size;
+}
+
+static int gfx_v12_0_rlc_autoload_buffer_init(struct amdgpu_device *adev)
+{
+ int r;
+ uint32_t total_size;
+
+ total_size = gfx_v12_0_calc_toc_total_size(adev);
+
+ r = amdgpu_bo_create_reserved(adev, total_size, 64 * 1024,
+ AMDGPU_GEM_DOMAIN_VRAM,
+ &adev->gfx.rlc.rlc_autoload_bo,
+ &adev->gfx.rlc.rlc_autoload_gpu_addr,
+ (void **)&adev->gfx.rlc.rlc_autoload_ptr);
+
+ if (r) {
+ dev_err(adev->dev, "(%d) failed to create fw autoload bo\n", r);
+ return r;
+ }
+
+ return 0;
+}
+
+static void gfx_v12_0_rlc_backdoor_autoload_copy_ucode(struct amdgpu_device *adev,
+ SOC24_FIRMWARE_ID id,
+ const void *fw_data,
+ uint32_t fw_size)
+{
+ uint32_t toc_offset;
+ uint32_t toc_fw_size;
+ char *ptr = adev->gfx.rlc.rlc_autoload_ptr;
+
+ if (id <= SOC24_FIRMWARE_ID_INVALID || id >= SOC24_FIRMWARE_ID_MAX)
+ return;
+
+ toc_offset = rlc_autoload_info[id].offset;
+ toc_fw_size = rlc_autoload_info[id].size;
+
+ if (fw_size == 0)
+ fw_size = toc_fw_size;
+
+ if (fw_size > toc_fw_size)
+ fw_size = toc_fw_size;
+
+ memcpy(ptr + toc_offset, fw_data, fw_size);
+
+ if (fw_size < toc_fw_size)
+ memset(ptr + toc_offset + fw_size, 0, toc_fw_size - fw_size);
+}
+
+static void
+gfx_v12_0_rlc_backdoor_autoload_copy_toc_ucode(struct amdgpu_device *adev)
+{
+ void *data;
+ uint32_t size;
+ uint32_t *toc_ptr;
+
+ data = adev->psp.toc.start_addr;
+ size = rlc_autoload_info[SOC24_FIRMWARE_ID_RLC_TOC].size;
+
+ toc_ptr = (uint32_t *)data + size / 4 - 2;
+ *toc_ptr = (RLC_TOC_FORMAT_API << 24) | 0x1;
+
+ gfx_v12_0_rlc_backdoor_autoload_copy_ucode(adev, SOC24_FIRMWARE_ID_RLC_TOC,
+ data, size);
+}
+
+static void
+gfx_v12_0_rlc_backdoor_autoload_copy_gfx_ucode(struct amdgpu_device *adev)
+{
+ const __le32 *fw_data;
+ uint32_t fw_size;
+ const struct gfx_firmware_header_v2_0 *cpv2_hdr;
+ const struct rlc_firmware_header_v2_0 *rlc_hdr;
+ const struct rlc_firmware_header_v2_1 *rlcv21_hdr;
+ const struct rlc_firmware_header_v2_2 *rlcv22_hdr;
+ uint16_t version_major, version_minor;
+
+ /* pfp ucode */
+ cpv2_hdr = (const struct gfx_firmware_header_v2_0 *)
+ adev->gfx.pfp_fw->data;
+ /* instruction */
+ fw_data = (const __le32 *)(adev->gfx.pfp_fw->data +
+ le32_to_cpu(cpv2_hdr->ucode_offset_bytes));
+ fw_size = le32_to_cpu(cpv2_hdr->ucode_size_bytes);
+ gfx_v12_0_rlc_backdoor_autoload_copy_ucode(adev, SOC24_FIRMWARE_ID_RS64_PFP,
+ fw_data, fw_size);
+ /* data */
+ fw_data = (const __le32 *)(adev->gfx.pfp_fw->data +
+ le32_to_cpu(cpv2_hdr->data_offset_bytes));
+ fw_size = le32_to_cpu(cpv2_hdr->data_size_bytes);
+ gfx_v12_0_rlc_backdoor_autoload_copy_ucode(adev, SOC24_FIRMWARE_ID_RS64_PFP_P0_STACK,
+ fw_data, fw_size);
+ gfx_v12_0_rlc_backdoor_autoload_copy_ucode(adev, SOC24_FIRMWARE_ID_RS64_PFP_P1_STACK,
+ fw_data, fw_size);
+ /* me ucode */
+ cpv2_hdr = (const struct gfx_firmware_header_v2_0 *)
+ adev->gfx.me_fw->data;
+ /* instruction */
+ fw_data = (const __le32 *)(adev->gfx.me_fw->data +
+ le32_to_cpu(cpv2_hdr->ucode_offset_bytes));
+ fw_size = le32_to_cpu(cpv2_hdr->ucode_size_bytes);
+ gfx_v12_0_rlc_backdoor_autoload_copy_ucode(adev, SOC24_FIRMWARE_ID_RS64_ME,
+ fw_data, fw_size);
+ /* data */
+ fw_data = (const __le32 *)(adev->gfx.me_fw->data +
+ le32_to_cpu(cpv2_hdr->data_offset_bytes));
+ fw_size = le32_to_cpu(cpv2_hdr->data_size_bytes);
+ gfx_v12_0_rlc_backdoor_autoload_copy_ucode(adev, SOC24_FIRMWARE_ID_RS64_ME_P0_STACK,
+ fw_data, fw_size);
+ gfx_v12_0_rlc_backdoor_autoload_copy_ucode(adev, SOC24_FIRMWARE_ID_RS64_ME_P1_STACK,
+ fw_data, fw_size);
+ /* mec ucode */
+ cpv2_hdr = (const struct gfx_firmware_header_v2_0 *)
+ adev->gfx.mec_fw->data;
+ /* instruction */
+ fw_data = (const __le32 *) (adev->gfx.mec_fw->data +
+ le32_to_cpu(cpv2_hdr->ucode_offset_bytes));
+ fw_size = le32_to_cpu(cpv2_hdr->ucode_size_bytes);
+ gfx_v12_0_rlc_backdoor_autoload_copy_ucode(adev, SOC24_FIRMWARE_ID_RS64_MEC,
+ fw_data, fw_size);
+ /* data */
+ fw_data = (const __le32 *) (adev->gfx.mec_fw->data +
+ le32_to_cpu(cpv2_hdr->data_offset_bytes));
+ fw_size = le32_to_cpu(cpv2_hdr->data_size_bytes);
+ gfx_v12_0_rlc_backdoor_autoload_copy_ucode(adev, SOC24_FIRMWARE_ID_RS64_MEC_P0_STACK,
+ fw_data, fw_size);
+ gfx_v12_0_rlc_backdoor_autoload_copy_ucode(adev, SOC24_FIRMWARE_ID_RS64_MEC_P1_STACK,
+ fw_data, fw_size);
+ gfx_v12_0_rlc_backdoor_autoload_copy_ucode(adev, SOC24_FIRMWARE_ID_RS64_MEC_P2_STACK,
+ fw_data, fw_size);
+ gfx_v12_0_rlc_backdoor_autoload_copy_ucode(adev, SOC24_FIRMWARE_ID_RS64_MEC_P3_STACK,
+ fw_data, fw_size);
+
+ /* rlc ucode */
+ rlc_hdr = (const struct rlc_firmware_header_v2_0 *)
+ adev->gfx.rlc_fw->data;
+ fw_data = (const __le32 *)(adev->gfx.rlc_fw->data +
+ le32_to_cpu(rlc_hdr->header.ucode_array_offset_bytes));
+ fw_size = le32_to_cpu(rlc_hdr->header.ucode_size_bytes);
+ gfx_v12_0_rlc_backdoor_autoload_copy_ucode(adev, SOC24_FIRMWARE_ID_RLC_G_UCODE,
+ fw_data, fw_size);
+
+ version_major = le16_to_cpu(rlc_hdr->header.header_version_major);
+ version_minor = le16_to_cpu(rlc_hdr->header.header_version_minor);
+ if (version_major == 2) {
+ if (version_minor >= 1) {
+ rlcv21_hdr = (const struct rlc_firmware_header_v2_1 *)adev->gfx.rlc_fw->data;
+
+ fw_data = (const __le32 *)(adev->gfx.rlc_fw->data +
+ le32_to_cpu(rlcv21_hdr->save_restore_list_gpm_offset_bytes));
+ fw_size = le32_to_cpu(rlcv21_hdr->save_restore_list_gpm_size_bytes);
+ gfx_v12_0_rlc_backdoor_autoload_copy_ucode(adev, SOC24_FIRMWARE_ID_RLCG_SCRATCH,
+ fw_data, fw_size);
+
+ fw_data = (const __le32 *)(adev->gfx.rlc_fw->data +
+ le32_to_cpu(rlcv21_hdr->save_restore_list_srm_offset_bytes));
+ fw_size = le32_to_cpu(rlcv21_hdr->save_restore_list_srm_size_bytes);
+ gfx_v12_0_rlc_backdoor_autoload_copy_ucode(adev, SOC24_FIRMWARE_ID_RLC_SRM_ARAM,
+ fw_data, fw_size);
+ }
+ if (version_minor >= 2) {
+ rlcv22_hdr = (const struct rlc_firmware_header_v2_2 *)adev->gfx.rlc_fw->data;
+
+ fw_data = (const __le32 *)(adev->gfx.rlc_fw->data +
+ le32_to_cpu(rlcv22_hdr->rlc_iram_ucode_offset_bytes));
+ fw_size = le32_to_cpu(rlcv22_hdr->rlc_iram_ucode_size_bytes);
+ gfx_v12_0_rlc_backdoor_autoload_copy_ucode(adev, SOC24_FIRMWARE_ID_RLX6_UCODE,
+ fw_data, fw_size);
+
+ fw_data = (const __le32 *)(adev->gfx.rlc_fw->data +
+ le32_to_cpu(rlcv22_hdr->rlc_dram_ucode_offset_bytes));
+ fw_size = le32_to_cpu(rlcv22_hdr->rlc_dram_ucode_size_bytes);
+ gfx_v12_0_rlc_backdoor_autoload_copy_ucode(adev, SOC24_FIRMWARE_ID_RLX6_DRAM_BOOT,
+ fw_data, fw_size);
+ }
+ }
+}
+
+static void
+gfx_v12_0_rlc_backdoor_autoload_copy_sdma_ucode(struct amdgpu_device *adev)
+{
+ const __le32 *fw_data;
+ uint32_t fw_size;
+ const struct sdma_firmware_header_v3_0 *sdma_hdr;
+
+ sdma_hdr = (const struct sdma_firmware_header_v3_0 *)
+ adev->sdma.instance[0].fw->data;
+ fw_data = (const __le32 *) (adev->sdma.instance[0].fw->data +
+ le32_to_cpu(sdma_hdr->ucode_offset_bytes));
+ fw_size = le32_to_cpu(sdma_hdr->ucode_size_bytes);
+
+ gfx_v12_0_rlc_backdoor_autoload_copy_ucode(adev, SOC24_FIRMWARE_ID_SDMA_UCODE_TH0,
+ fw_data, fw_size);
+}
+
+static void
+gfx_v12_0_rlc_backdoor_autoload_copy_mes_ucode(struct amdgpu_device *adev)
+{
+ const __le32 *fw_data;
+ unsigned fw_size;
+ const struct mes_firmware_header_v1_0 *mes_hdr;
+ int pipe, ucode_id, data_id;
+
+ for (pipe = 0; pipe < 2; pipe++) {
+ if (pipe == 0) {
+ ucode_id = SOC24_FIRMWARE_ID_RS64_MES_P0;
+ data_id = SOC24_FIRMWARE_ID_RS64_MES_P0_STACK;
+ } else {
+ ucode_id = SOC24_FIRMWARE_ID_RS64_MES_P1;
+ data_id = SOC24_FIRMWARE_ID_RS64_MES_P1_STACK;
+ }
+
+ mes_hdr = (const struct mes_firmware_header_v1_0 *)
+ adev->mes.fw[pipe]->data;
+
+ fw_data = (const __le32 *)(adev->mes.fw[pipe]->data +
+ le32_to_cpu(mes_hdr->mes_ucode_offset_bytes));
+ fw_size = le32_to_cpu(mes_hdr->mes_ucode_size_bytes);
+
+ gfx_v12_0_rlc_backdoor_autoload_copy_ucode(adev, ucode_id, fw_data, fw_size);
+
+ fw_data = (const __le32 *)(adev->mes.fw[pipe]->data +
+ le32_to_cpu(mes_hdr->mes_ucode_data_offset_bytes));
+ fw_size = le32_to_cpu(mes_hdr->mes_ucode_data_size_bytes);
+
+ gfx_v12_0_rlc_backdoor_autoload_copy_ucode(adev, data_id, fw_data, fw_size);
+ }
+}
+
+static int gfx_v12_0_rlc_backdoor_autoload_enable(struct amdgpu_device *adev)
+{
+ uint32_t rlc_g_offset, rlc_g_size;
+ uint64_t gpu_addr;
+ uint32_t data;
+
+ /* RLC autoload sequence 2: copy ucode */
+ gfx_v12_0_rlc_backdoor_autoload_copy_sdma_ucode(adev);
+ gfx_v12_0_rlc_backdoor_autoload_copy_gfx_ucode(adev);
+ gfx_v12_0_rlc_backdoor_autoload_copy_mes_ucode(adev);
+ gfx_v12_0_rlc_backdoor_autoload_copy_toc_ucode(adev);
+
+ rlc_g_offset = rlc_autoload_info[SOC24_FIRMWARE_ID_RLC_G_UCODE].offset;
+ rlc_g_size = rlc_autoload_info[SOC24_FIRMWARE_ID_RLC_G_UCODE].size;
+ gpu_addr = adev->gfx.rlc.rlc_autoload_gpu_addr + rlc_g_offset - adev->gmc.vram_start;
+
+ WREG32_SOC15(GC, 0, regGFX_IMU_RLC_BOOTLOADER_ADDR_HI, upper_32_bits(gpu_addr));
+ WREG32_SOC15(GC, 0, regGFX_IMU_RLC_BOOTLOADER_ADDR_LO, lower_32_bits(gpu_addr));
+
+ WREG32_SOC15(GC, 0, regGFX_IMU_RLC_BOOTLOADER_SIZE, rlc_g_size);
+
+ if (adev->gfx.imu.funcs && (amdgpu_dpm > 0)) {
+ /* RLC autoload sequence 3: load IMU fw */
+ if (adev->gfx.imu.funcs->load_microcode)
+ adev->gfx.imu.funcs->load_microcode(adev);
+ /* RLC autoload sequence 4 init IMU fw */
+ if (adev->gfx.imu.funcs->setup_imu)
+ adev->gfx.imu.funcs->setup_imu(adev);
+ if (adev->gfx.imu.funcs->start_imu)
+ adev->gfx.imu.funcs->start_imu(adev);
+
+ /* RLC autoload sequence 5 disable gpa mode */
+ gfx_v12_0_disable_gpa_mode(adev);
+ } else {
+ /* unhalt rlc to start autoload without imu */
+ data = RREG32_SOC15(GC, 0, regRLC_GPM_THREAD_ENABLE);
+ data = REG_SET_FIELD(data, RLC_GPM_THREAD_ENABLE, THREAD0_ENABLE, 1);
+ data = REG_SET_FIELD(data, RLC_GPM_THREAD_ENABLE, THREAD1_ENABLE, 1);
+ WREG32_SOC15(GC, 0, regRLC_GPM_THREAD_ENABLE, data);
+ WREG32_SOC15(GC, 0, regRLC_CNTL, RLC_CNTL__RLC_ENABLE_F32_MASK);
+ }
+
+ return 0;
+}
+
+static void gfx_v12_0_alloc_ip_dump(struct amdgpu_device *adev)
+{
+ uint32_t reg_count = ARRAY_SIZE(gc_reg_list_12_0);
+ uint32_t *ptr;
+ uint32_t inst;
+
+ ptr = kcalloc(reg_count, sizeof(uint32_t), GFP_KERNEL);
+ if (!ptr) {
+ DRM_ERROR("Failed to allocate memory for GFX IP Dump\n");
+ adev->gfx.ip_dump_core = NULL;
+ } else {
+ adev->gfx.ip_dump_core = ptr;
+ }
+
+ /* Allocate memory for compute queue registers for all the instances */
+ reg_count = ARRAY_SIZE(gc_cp_reg_list_12);
+ inst = adev->gfx.mec.num_mec * adev->gfx.mec.num_pipe_per_mec *
+ adev->gfx.mec.num_queue_per_pipe;
+
+ ptr = kcalloc(reg_count * inst, sizeof(uint32_t), GFP_KERNEL);
+ if (!ptr) {
+ DRM_ERROR("Failed to allocate memory for Compute Queues IP Dump\n");
+ adev->gfx.ip_dump_compute_queues = NULL;
+ } else {
+ adev->gfx.ip_dump_compute_queues = ptr;
+ }
+
+ /* Allocate memory for gfx queue registers for all the instances */
+ reg_count = ARRAY_SIZE(gc_gfx_queue_reg_list_12);
+ inst = adev->gfx.me.num_me * adev->gfx.me.num_pipe_per_me *
+ adev->gfx.me.num_queue_per_pipe;
+
+ ptr = kcalloc(reg_count * inst, sizeof(uint32_t), GFP_KERNEL);
+ if (!ptr) {
+ DRM_ERROR("Failed to allocate memory for GFX Queues IP Dump\n");
+ adev->gfx.ip_dump_gfx_queues = NULL;
+ } else {
+ adev->gfx.ip_dump_gfx_queues = ptr;
+ }
+}
+
+static int gfx_v12_0_sw_init(struct amdgpu_ip_block *ip_block)
+{
+ int i, j, k, r, ring_id = 0;
+ unsigned num_compute_rings;
+ int xcc_id = 0;
+ struct amdgpu_device *adev = ip_block->adev;
+ int num_queue_per_pipe = 1; /* we only enable 1 KGQ per pipe */
+
+ INIT_DELAYED_WORK(&adev->gfx.idle_work, amdgpu_gfx_profile_idle_work_handler);
+
+ switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
+ case IP_VERSION(12, 0, 0):
+ case IP_VERSION(12, 0, 1):
+ adev->gfx.me.num_me = 1;
+ adev->gfx.me.num_pipe_per_me = 1;
+ adev->gfx.me.num_queue_per_pipe = 8;
+ adev->gfx.mec.num_mec = 1;
+ adev->gfx.mec.num_pipe_per_mec = 2;
+ adev->gfx.mec.num_queue_per_pipe = 4;
+ break;
+ default:
+ adev->gfx.me.num_me = 1;
+ adev->gfx.me.num_pipe_per_me = 1;
+ adev->gfx.me.num_queue_per_pipe = 1;
+ adev->gfx.mec.num_mec = 1;
+ adev->gfx.mec.num_pipe_per_mec = 4;
+ adev->gfx.mec.num_queue_per_pipe = 8;
+ break;
+ }
+
+ switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
+ case IP_VERSION(12, 0, 0):
+ case IP_VERSION(12, 0, 1):
+ if (!adev->gfx.disable_uq &&
+ adev->gfx.me_fw_version >= 2780 &&
+ adev->gfx.pfp_fw_version >= 2840 &&
+ adev->gfx.mec_fw_version >= 3050 &&
+ adev->mes.fw_version[0] >= 123) {
+ adev->userq_funcs[AMDGPU_HW_IP_GFX] = &userq_mes_funcs;
+ adev->userq_funcs[AMDGPU_HW_IP_COMPUTE] = &userq_mes_funcs;
+ }
+ break;
+ default:
+ break;
+ }
+
+ switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
+ case IP_VERSION(12, 0, 0):
+ case IP_VERSION(12, 0, 1):
+ if (adev->gfx.me_fw_version >= 2480 &&
+ adev->gfx.pfp_fw_version >= 2530 &&
+ adev->gfx.mec_fw_version >= 2680 &&
+ adev->mes.fw_version[0] >= 100)
+ adev->gfx.enable_cleaner_shader = true;
+ break;
+ default:
+ adev->gfx.enable_cleaner_shader = false;
+ break;
+ }
+
+ if (adev->gfx.num_compute_rings) {
+ /* recalculate compute rings to use based on hardware configuration */
+ num_compute_rings = (adev->gfx.mec.num_pipe_per_mec *
+ adev->gfx.mec.num_queue_per_pipe) / 2;
+ adev->gfx.num_compute_rings = min(adev->gfx.num_compute_rings,
+ num_compute_rings);
+ }
+
+ /* EOP Event */
+ r = amdgpu_irq_add_id(adev, SOC21_IH_CLIENTID_GRBM_CP,
+ GFX_12_0_0__SRCID__CP_EOP_INTERRUPT,
+ &adev->gfx.eop_irq);
+ if (r)
+ return r;
+
+ /* Bad opcode Event */
+ r = amdgpu_irq_add_id(adev, SOC21_IH_CLIENTID_GRBM_CP,
+ GFX_12_0_0__SRCID__CP_BAD_OPCODE_ERROR,
+ &adev->gfx.bad_op_irq);
+ if (r)
+ return r;
+
+ /* Privileged reg */
+ r = amdgpu_irq_add_id(adev, SOC21_IH_CLIENTID_GRBM_CP,
+ GFX_12_0_0__SRCID__CP_PRIV_REG_FAULT,
+ &adev->gfx.priv_reg_irq);
+ if (r)
+ return r;
+
+ /* Privileged inst */
+ r = amdgpu_irq_add_id(adev, SOC21_IH_CLIENTID_GRBM_CP,
+ GFX_12_0_0__SRCID__CP_PRIV_INSTR_FAULT,
+ &adev->gfx.priv_inst_irq);
+ if (r)
+ return r;
+
+ adev->gfx.gfx_current_status = AMDGPU_GFX_NORMAL_MODE;
+
+ gfx_v12_0_me_init(adev);
+
+ r = gfx_v12_0_rlc_init(adev);
+ if (r) {
+ dev_err(adev->dev, "Failed to init rlc BOs!\n");
+ return r;
+ }
+
+ r = gfx_v12_0_mec_init(adev);
+ if (r) {
+ dev_err(adev->dev, "Failed to init MEC BOs!\n");
+ return r;
+ }
+
+ if (adev->gfx.num_gfx_rings) {
+ /* set up the gfx ring */
+ for (i = 0; i < adev->gfx.me.num_me; i++) {
+ for (j = 0; j < num_queue_per_pipe; j++) {
+ for (k = 0; k < adev->gfx.me.num_pipe_per_me; k++) {
+ if (!amdgpu_gfx_is_me_queue_enabled(adev, i, k, j))
+ continue;
+
+ r = gfx_v12_0_gfx_ring_init(adev, ring_id,
+ i, k, j);
+ if (r)
+ return r;
+ ring_id++;
+ }
+ }
+ }
+ }
+
+ if (adev->gfx.num_compute_rings) {
+ ring_id = 0;
+ /* set up the compute queues - allocate horizontally across pipes */
+ for (i = 0; i < adev->gfx.mec.num_mec; ++i) {
+ for (j = 0; j < adev->gfx.mec.num_queue_per_pipe; j++) {
+ for (k = 0; k < adev->gfx.mec.num_pipe_per_mec; k++) {
+ if (!amdgpu_gfx_is_mec_queue_enabled(adev,
+ 0, i, k, j))
+ continue;
+
+ r = gfx_v12_0_compute_ring_init(adev, ring_id,
+ i, k, j);
+ if (r)
+ return r;
+
+ ring_id++;
+ }
+ }
+ }
+ }
+
+ adev->gfx.gfx_supported_reset =
+ amdgpu_get_soft_full_reset_mask(&adev->gfx.gfx_ring[0]);
+ adev->gfx.compute_supported_reset =
+ amdgpu_get_soft_full_reset_mask(&adev->gfx.compute_ring[0]);
+ switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
+ case IP_VERSION(12, 0, 0):
+ case IP_VERSION(12, 0, 1):
+ if ((adev->gfx.me_fw_version >= 2660) &&
+ (adev->gfx.mec_fw_version >= 2920) &&
+ !amdgpu_sriov_vf(adev) &&
+ !adev->debug_disable_gpu_ring_reset) {
+ adev->gfx.compute_supported_reset |= AMDGPU_RESET_TYPE_PER_QUEUE;
+ adev->gfx.gfx_supported_reset |= AMDGPU_RESET_TYPE_PER_QUEUE;
+ }
+ break;
+ default:
+ break;
+ }
+
+ if (!adev->enable_mes_kiq) {
+ r = amdgpu_gfx_kiq_init(adev, GFX12_MEC_HPD_SIZE, 0);
+ if (r) {
+ dev_err(adev->dev, "Failed to init KIQ BOs!\n");
+ return r;
+ }
+
+ r = amdgpu_gfx_kiq_init_ring(adev, xcc_id);
+ if (r)
+ return r;
+ }
+
+ r = amdgpu_gfx_mqd_sw_init(adev, sizeof(struct v12_compute_mqd), 0);
+ if (r)
+ return r;
+
+ /* allocate visible FB for rlc auto-loading fw */
+ if (adev->firmware.load_type == AMDGPU_FW_LOAD_RLC_BACKDOOR_AUTO) {
+ r = gfx_v12_0_rlc_autoload_buffer_init(adev);
+ if (r)
+ return r;
+ }
+
+ r = gfx_v12_0_gpu_early_init(adev);
+ if (r)
+ return r;
+
+ gfx_v12_0_alloc_ip_dump(adev);
+
+ r = amdgpu_gfx_sysfs_init(adev);
+ if (r)
+ return r;
+
+ return 0;
+}
+
+static void gfx_v12_0_pfp_fini(struct amdgpu_device *adev)
+{
+ amdgpu_bo_free_kernel(&adev->gfx.pfp.pfp_fw_obj,
+ &adev->gfx.pfp.pfp_fw_gpu_addr,
+ (void **)&adev->gfx.pfp.pfp_fw_ptr);
+
+ amdgpu_bo_free_kernel(&adev->gfx.pfp.pfp_fw_data_obj,
+ &adev->gfx.pfp.pfp_fw_data_gpu_addr,
+ (void **)&adev->gfx.pfp.pfp_fw_data_ptr);
+}
+
+static void gfx_v12_0_me_fini(struct amdgpu_device *adev)
+{
+ amdgpu_bo_free_kernel(&adev->gfx.me.me_fw_obj,
+ &adev->gfx.me.me_fw_gpu_addr,
+ (void **)&adev->gfx.me.me_fw_ptr);
+
+ amdgpu_bo_free_kernel(&adev->gfx.me.me_fw_data_obj,
+ &adev->gfx.me.me_fw_data_gpu_addr,
+ (void **)&adev->gfx.me.me_fw_data_ptr);
+}
+
+static void gfx_v12_0_rlc_autoload_buffer_fini(struct amdgpu_device *adev)
+{
+ amdgpu_bo_free_kernel(&adev->gfx.rlc.rlc_autoload_bo,
+ &adev->gfx.rlc.rlc_autoload_gpu_addr,
+ (void **)&adev->gfx.rlc.rlc_autoload_ptr);
+}
+
+static int gfx_v12_0_sw_fini(struct amdgpu_ip_block *ip_block)
+{
+ int i;
+ struct amdgpu_device *adev = ip_block->adev;
+
+ for (i = 0; i < adev->gfx.num_gfx_rings; i++)
+ amdgpu_ring_fini(&adev->gfx.gfx_ring[i]);
+ for (i = 0; i < adev->gfx.num_compute_rings; i++)
+ amdgpu_ring_fini(&adev->gfx.compute_ring[i]);
+
+ amdgpu_gfx_mqd_sw_fini(adev, 0);
+
+ if (!adev->enable_mes_kiq) {
+ amdgpu_gfx_kiq_free_ring(&adev->gfx.kiq[0].ring);
+ amdgpu_gfx_kiq_fini(adev, 0);
+ }
+
+ gfx_v12_0_pfp_fini(adev);
+ gfx_v12_0_me_fini(adev);
+ gfx_v12_0_rlc_fini(adev);
+ gfx_v12_0_mec_fini(adev);
+
+ if (adev->firmware.load_type == AMDGPU_FW_LOAD_RLC_BACKDOOR_AUTO)
+ gfx_v12_0_rlc_autoload_buffer_fini(adev);
+
+ gfx_v12_0_free_microcode(adev);
+
+ amdgpu_gfx_sysfs_fini(adev);
+
+ kfree(adev->gfx.ip_dump_core);
+ kfree(adev->gfx.ip_dump_compute_queues);
+ kfree(adev->gfx.ip_dump_gfx_queues);
+
+ return 0;
+}
+
+static void gfx_v12_0_select_se_sh(struct amdgpu_device *adev, u32 se_num,
+ u32 sh_num, u32 instance, int xcc_id)
+{
+ u32 data;
+
+ if (instance == 0xffffffff)
+ data = REG_SET_FIELD(0, GRBM_GFX_INDEX,
+ INSTANCE_BROADCAST_WRITES, 1);
+ else
+ data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_INDEX,
+ instance);
+
+ if (se_num == 0xffffffff)
+ data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_BROADCAST_WRITES,
+ 1);
+ else
+ data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_INDEX, se_num);
+
+ if (sh_num == 0xffffffff)
+ data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SA_BROADCAST_WRITES,
+ 1);
+ else
+ data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SA_INDEX, sh_num);
+
+ WREG32_SOC15(GC, 0, regGRBM_GFX_INDEX, data);
+}
+
+static u32 gfx_v12_0_get_sa_active_bitmap(struct amdgpu_device *adev)
+{
+ u32 gc_disabled_sa_mask, gc_user_disabled_sa_mask, sa_mask;
+
+ gc_disabled_sa_mask = RREG32_SOC15(GC, 0, regGRBM_CC_GC_SA_UNIT_DISABLE);
+ gc_disabled_sa_mask = REG_GET_FIELD(gc_disabled_sa_mask,
+ GRBM_CC_GC_SA_UNIT_DISABLE,
+ SA_DISABLE);
+ gc_user_disabled_sa_mask = RREG32_SOC15(GC, 0, regGRBM_GC_USER_SA_UNIT_DISABLE);
+ gc_user_disabled_sa_mask = REG_GET_FIELD(gc_user_disabled_sa_mask,
+ GRBM_GC_USER_SA_UNIT_DISABLE,
+ SA_DISABLE);
+ sa_mask = amdgpu_gfx_create_bitmask(adev->gfx.config.max_sh_per_se *
+ adev->gfx.config.max_shader_engines);
+
+ return sa_mask & (~(gc_disabled_sa_mask | gc_user_disabled_sa_mask));
+}
+
+static u32 gfx_v12_0_get_rb_active_bitmap(struct amdgpu_device *adev)
+{
+ u32 gc_disabled_rb_mask, gc_user_disabled_rb_mask;
+ u32 rb_mask;
+
+ gc_disabled_rb_mask = RREG32_SOC15(GC, 0, regCC_RB_BACKEND_DISABLE);
+ gc_disabled_rb_mask = REG_GET_FIELD(gc_disabled_rb_mask,
+ CC_RB_BACKEND_DISABLE,
+ BACKEND_DISABLE);
+ gc_user_disabled_rb_mask = RREG32_SOC15(GC, 0, regGC_USER_RB_BACKEND_DISABLE);
+ gc_user_disabled_rb_mask = REG_GET_FIELD(gc_user_disabled_rb_mask,
+ GC_USER_RB_BACKEND_DISABLE,
+ BACKEND_DISABLE);
+ rb_mask = amdgpu_gfx_create_bitmask(adev->gfx.config.max_backends_per_se *
+ adev->gfx.config.max_shader_engines);
+
+ return rb_mask & (~(gc_disabled_rb_mask | gc_user_disabled_rb_mask));
+}
+
+static void gfx_v12_0_setup_rb(struct amdgpu_device *adev)
+{
+ u32 rb_bitmap_per_sa;
+ u32 rb_bitmap_width_per_sa;
+ u32 max_sa;
+ u32 active_sa_bitmap;
+ u32 global_active_rb_bitmap;
+ u32 active_rb_bitmap = 0;
+ u32 i;
+
+ /* query sa bitmap from SA_UNIT_DISABLE registers */
+ active_sa_bitmap = gfx_v12_0_get_sa_active_bitmap(adev);
+ /* query rb bitmap from RB_BACKEND_DISABLE registers */
+ global_active_rb_bitmap = gfx_v12_0_get_rb_active_bitmap(adev);
+
+ /* generate active rb bitmap according to active sa bitmap */
+ max_sa = adev->gfx.config.max_shader_engines *
+ adev->gfx.config.max_sh_per_se;
+ rb_bitmap_width_per_sa = adev->gfx.config.max_backends_per_se /
+ adev->gfx.config.max_sh_per_se;
+ rb_bitmap_per_sa = amdgpu_gfx_create_bitmask(rb_bitmap_width_per_sa);
+
+ for (i = 0; i < max_sa; i++) {
+ if (active_sa_bitmap & (1 << i))
+ active_rb_bitmap |= (rb_bitmap_per_sa << (i * rb_bitmap_width_per_sa));
+ }
+
+ active_rb_bitmap &= global_active_rb_bitmap;
+ adev->gfx.config.backend_enable_mask = active_rb_bitmap;
+ adev->gfx.config.num_rbs = hweight32(active_rb_bitmap);
+}
+
+#define LDS_APP_BASE 0x1
+#define SCRATCH_APP_BASE 0x2
+
+static void gfx_v12_0_init_compute_vmid(struct amdgpu_device *adev)
+{
+ int i;
+ uint32_t sh_mem_bases;
+ uint32_t data;
+
+ /*
+ * Configure apertures:
+ * LDS: 0x60000000'00000000 - 0x60000001'00000000 (4GB)
+ * Scratch: 0x60000001'00000000 - 0x60000002'00000000 (4GB)
+ * GPUVM: 0x60010000'00000000 - 0x60020000'00000000 (1TB)
+ */
+ sh_mem_bases = (LDS_APP_BASE << SH_MEM_BASES__SHARED_BASE__SHIFT) |
+ SCRATCH_APP_BASE;
+
+ mutex_lock(&adev->srbm_mutex);
+ for (i = adev->vm_manager.first_kfd_vmid; i < AMDGPU_NUM_VMID; i++) {
+ soc24_grbm_select(adev, 0, 0, 0, i);
+ /* CP and shaders */
+ WREG32_SOC15(GC, 0, regSH_MEM_CONFIG, DEFAULT_SH_MEM_CONFIG);
+ WREG32_SOC15(GC, 0, regSH_MEM_BASES, sh_mem_bases);
+
+ /* Enable trap for each kfd vmid. */
+ data = RREG32_SOC15(GC, 0, regSPI_GDBG_PER_VMID_CNTL);
+ data = REG_SET_FIELD(data, SPI_GDBG_PER_VMID_CNTL, TRAP_EN, 1);
+ WREG32_SOC15(GC, 0, regSPI_GDBG_PER_VMID_CNTL, data);
+ }
+ soc24_grbm_select(adev, 0, 0, 0, 0);
+ mutex_unlock(&adev->srbm_mutex);
+}
+
+static void gfx_v12_0_tcp_harvest(struct amdgpu_device *adev)
+{
+ /* TODO: harvest feature to be added later. */
+}
+
+static void gfx_v12_0_get_tcc_info(struct amdgpu_device *adev)
+{
+}
+
+static void gfx_v12_0_constants_init(struct amdgpu_device *adev)
+{
+ u32 tmp;
+ int i;
+
+ if (!amdgpu_sriov_vf(adev))
+ WREG32_FIELD15_PREREG(GC, 0, GRBM_CNTL, READ_TIMEOUT, 0xff);
+
+ gfx_v12_0_setup_rb(adev);
+ gfx_v12_0_get_cu_info(adev, &adev->gfx.cu_info);
+ gfx_v12_0_get_tcc_info(adev);
+ adev->gfx.config.pa_sc_tile_steering_override = 0;
+
+ /* XXX SH_MEM regs */
+ /* where to put LDS, scratch, GPUVM in FSA64 space */
+ mutex_lock(&adev->srbm_mutex);
+ for (i = 0; i < adev->vm_manager.id_mgr[AMDGPU_GFXHUB(0)].num_ids; i++) {
+ soc24_grbm_select(adev, 0, 0, 0, i);
+ /* CP and shaders */
+ WREG32_SOC15(GC, 0, regSH_MEM_CONFIG, DEFAULT_SH_MEM_CONFIG);
+ if (i != 0) {
+ tmp = REG_SET_FIELD(0, SH_MEM_BASES, PRIVATE_BASE,
+ (adev->gmc.private_aperture_start >> 48));
+ tmp = REG_SET_FIELD(tmp, SH_MEM_BASES, SHARED_BASE,
+ (adev->gmc.shared_aperture_start >> 48));
+ WREG32_SOC15(GC, 0, regSH_MEM_BASES, tmp);
+ }
+ }
+ soc24_grbm_select(adev, 0, 0, 0, 0);
+
+ mutex_unlock(&adev->srbm_mutex);
+
+ gfx_v12_0_init_compute_vmid(adev);
+}
+
+static u32 gfx_v12_0_get_cpg_int_cntl(struct amdgpu_device *adev,
+ int me, int pipe)
+{
+ if (me != 0)
+ return 0;
+
+ switch (pipe) {
+ case 0:
+ return SOC15_REG_OFFSET(GC, 0, regCP_INT_CNTL_RING0);
+ default:
+ return 0;
+ }
+}
+
+static u32 gfx_v12_0_get_cpc_int_cntl(struct amdgpu_device *adev,
+ int me, int pipe)
+{
+ /*
+ * amdgpu controls only the first MEC. That's why this function only
+ * handles the setting of interrupts for this specific MEC. All other
+ * pipes' interrupts are set by amdkfd.
+ */
+ if (me != 1)
+ return 0;
+
+ switch (pipe) {
+ case 0:
+ return SOC15_REG_OFFSET(GC, 0, regCP_ME1_PIPE0_INT_CNTL);
+ case 1:
+ return SOC15_REG_OFFSET(GC, 0, regCP_ME1_PIPE1_INT_CNTL);
+ default:
+ return 0;
+ }
+}
+
+static void gfx_v12_0_enable_gui_idle_interrupt(struct amdgpu_device *adev,
+ bool enable)
+{
+ u32 tmp, cp_int_cntl_reg;
+ int i, j;
+
+ if (amdgpu_sriov_vf(adev))
+ return;
+
+ for (i = 0; i < adev->gfx.me.num_me; i++) {
+ for (j = 0; j < adev->gfx.me.num_pipe_per_me; j++) {
+ cp_int_cntl_reg = gfx_v12_0_get_cpg_int_cntl(adev, i, j);
+
+ if (cp_int_cntl_reg) {
+ tmp = RREG32_SOC15_IP(GC, cp_int_cntl_reg);
+ tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_BUSY_INT_ENABLE,
+ enable ? 1 : 0);
+ tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_EMPTY_INT_ENABLE,
+ enable ? 1 : 0);
+ tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CMP_BUSY_INT_ENABLE,
+ enable ? 1 : 0);
+ tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, GFX_IDLE_INT_ENABLE,
+ enable ? 1 : 0);
+ WREG32_SOC15_IP(GC, cp_int_cntl_reg, tmp);
+ }
+ }
+ }
+}
+
+static int gfx_v12_0_init_csb(struct amdgpu_device *adev)
+{
+ adev->gfx.rlc.funcs->get_csb_buffer(adev, adev->gfx.rlc.cs_ptr);
+
+ WREG32_SOC15(GC, 0, regRLC_CSIB_ADDR_HI,
+ adev->gfx.rlc.clear_state_gpu_addr >> 32);
+ WREG32_SOC15(GC, 0, regRLC_CSIB_ADDR_LO,
+ adev->gfx.rlc.clear_state_gpu_addr & 0xfffffffc);
+ WREG32_SOC15(GC, 0, regRLC_CSIB_LENGTH, adev->gfx.rlc.clear_state_size);
+
+ return 0;
+}
+
+static void gfx_v12_0_rlc_stop(struct amdgpu_device *adev)
+{
+ u32 tmp = RREG32_SOC15(GC, 0, regRLC_CNTL);
+
+ tmp = REG_SET_FIELD(tmp, RLC_CNTL, RLC_ENABLE_F32, 0);
+ WREG32_SOC15(GC, 0, regRLC_CNTL, tmp);
+}
+
+static void gfx_v12_0_rlc_reset(struct amdgpu_device *adev)
+{
+ WREG32_FIELD15_PREREG(GC, 0, GRBM_SOFT_RESET, SOFT_RESET_RLC, 1);
+ udelay(50);
+ WREG32_FIELD15_PREREG(GC, 0, GRBM_SOFT_RESET, SOFT_RESET_RLC, 0);
+ udelay(50);
+}
+
+static void gfx_v12_0_rlc_smu_handshake_cntl(struct amdgpu_device *adev,
+ bool enable)
+{
+ uint32_t rlc_pg_cntl;
+
+ rlc_pg_cntl = RREG32_SOC15(GC, 0, regRLC_PG_CNTL);
+
+ if (!enable) {
+ /* RLC_PG_CNTL[23] = 0 (default)
+ * RLC will wait for handshake acks with SMU
+ * GFXOFF will be enabled
+ * RLC_PG_CNTL[23] = 1
+ * RLC will not issue any message to SMU
+ * hence no handshake between SMU & RLC
+ * GFXOFF will be disabled
+ */
+ rlc_pg_cntl |= RLC_PG_CNTL__SMU_HANDSHAKE_DISABLE_MASK;
+ } else
+ rlc_pg_cntl &= ~RLC_PG_CNTL__SMU_HANDSHAKE_DISABLE_MASK;
+ WREG32_SOC15(GC, 0, regRLC_PG_CNTL, rlc_pg_cntl);
+}
+
+static void gfx_v12_0_rlc_start(struct amdgpu_device *adev)
+{
+ /* TODO: enable rlc & smu handshake until smu
+ * and gfxoff feature works as expected */
+ if (!(amdgpu_pp_feature_mask & PP_GFXOFF_MASK))
+ gfx_v12_0_rlc_smu_handshake_cntl(adev, false);
+
+ WREG32_FIELD15_PREREG(GC, 0, RLC_CNTL, RLC_ENABLE_F32, 1);
+ udelay(50);
+}
+
+static void gfx_v12_0_rlc_enable_srm(struct amdgpu_device *adev)
+{
+ uint32_t tmp;
+
+ /* enable Save Restore Machine */
+ tmp = RREG32(SOC15_REG_OFFSET(GC, 0, regRLC_SRM_CNTL));
+ tmp |= RLC_SRM_CNTL__AUTO_INCR_ADDR_MASK;
+ tmp |= RLC_SRM_CNTL__SRM_ENABLE_MASK;
+ WREG32(SOC15_REG_OFFSET(GC, 0, regRLC_SRM_CNTL), tmp);
+}
+
+static void gfx_v12_0_load_rlcg_microcode(struct amdgpu_device *adev)
+{
+ const struct rlc_firmware_header_v2_0 *hdr;
+ const __le32 *fw_data;
+ unsigned i, fw_size;
+
+ hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data;
+ fw_data = (const __le32 *)(adev->gfx.rlc_fw->data +
+ le32_to_cpu(hdr->header.ucode_array_offset_bytes));
+ fw_size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4;
+
+ WREG32_SOC15(GC, 0, regRLC_GPM_UCODE_ADDR,
+ RLCG_UCODE_LOADING_START_ADDRESS);
+
+ for (i = 0; i < fw_size; i++)
+ WREG32_SOC15(GC, 0, regRLC_GPM_UCODE_DATA,
+ le32_to_cpup(fw_data++));
+
+ WREG32_SOC15(GC, 0, regRLC_GPM_UCODE_ADDR, adev->gfx.rlc_fw_version);
+}
+
+static void gfx_v12_0_load_rlc_iram_dram_microcode(struct amdgpu_device *adev)
+{
+ const struct rlc_firmware_header_v2_2 *hdr;
+ const __le32 *fw_data;
+ unsigned i, fw_size;
+ u32 tmp;
+
+ hdr = (const struct rlc_firmware_header_v2_2 *)adev->gfx.rlc_fw->data;
+
+ fw_data = (const __le32 *)(adev->gfx.rlc_fw->data +
+ le32_to_cpu(hdr->rlc_iram_ucode_offset_bytes));
+ fw_size = le32_to_cpu(hdr->rlc_iram_ucode_size_bytes) / 4;
+
+ WREG32_SOC15(GC, 0, regRLC_LX6_IRAM_ADDR, 0);
+
+ for (i = 0; i < fw_size; i++) {
+ if ((amdgpu_emu_mode == 1) && (i % 100 == 99))
+ msleep(1);
+ WREG32_SOC15(GC, 0, regRLC_LX6_IRAM_DATA,
+ le32_to_cpup(fw_data++));
+ }
+
+ WREG32_SOC15(GC, 0, regRLC_LX6_IRAM_ADDR, adev->gfx.rlc_fw_version);
+
+ fw_data = (const __le32 *)(adev->gfx.rlc_fw->data +
+ le32_to_cpu(hdr->rlc_dram_ucode_offset_bytes));
+ fw_size = le32_to_cpu(hdr->rlc_dram_ucode_size_bytes) / 4;
+
+ WREG32_SOC15(GC, 0, regRLC_LX6_DRAM_ADDR, 0);
+ for (i = 0; i < fw_size; i++) {
+ if ((amdgpu_emu_mode == 1) && (i % 100 == 99))
+ msleep(1);
+ WREG32_SOC15(GC, 0, regRLC_LX6_DRAM_DATA,
+ le32_to_cpup(fw_data++));
+ }
+
+ WREG32_SOC15(GC, 0, regRLC_LX6_IRAM_ADDR, adev->gfx.rlc_fw_version);
+
+ tmp = RREG32_SOC15(GC, 0, regRLC_LX6_CNTL);
+ tmp = REG_SET_FIELD(tmp, RLC_LX6_CNTL, PDEBUG_ENABLE, 1);
+ tmp = REG_SET_FIELD(tmp, RLC_LX6_CNTL, BRESET, 0);
+ WREG32_SOC15(GC, 0, regRLC_LX6_CNTL, tmp);
+}
+
+static int gfx_v12_0_rlc_load_microcode(struct amdgpu_device *adev)
+{
+ const struct rlc_firmware_header_v2_0 *hdr;
+ uint16_t version_major;
+ uint16_t version_minor;
+
+ if (!adev->gfx.rlc_fw)
+ return -EINVAL;
+
+ hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data;
+ amdgpu_ucode_print_rlc_hdr(&hdr->header);
+
+ version_major = le16_to_cpu(hdr->header.header_version_major);
+ version_minor = le16_to_cpu(hdr->header.header_version_minor);
+
+ if (version_major == 2) {
+ gfx_v12_0_load_rlcg_microcode(adev);
+ if (amdgpu_dpm == 1) {
+ if (version_minor >= 2)
+ gfx_v12_0_load_rlc_iram_dram_microcode(adev);
+ }
+
+ return 0;
+ }
+
+ return -EINVAL;
+}
+
+static int gfx_v12_0_rlc_resume(struct amdgpu_device *adev)
+{
+ int r;
+
+ if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
+ gfx_v12_0_init_csb(adev);
+
+ if (!amdgpu_sriov_vf(adev)) /* enable RLC SRM */
+ gfx_v12_0_rlc_enable_srm(adev);
+ } else {
+ if (amdgpu_sriov_vf(adev)) {
+ gfx_v12_0_init_csb(adev);
+ return 0;
+ }
+
+ adev->gfx.rlc.funcs->stop(adev);
+
+ /* disable CG */
+ WREG32_SOC15(GC, 0, regRLC_CGCG_CGLS_CTRL, 0);
+
+ /* disable PG */
+ WREG32_SOC15(GC, 0, regRLC_PG_CNTL, 0);
+
+ if (adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT) {
+ /* legacy rlc firmware loading */
+ r = gfx_v12_0_rlc_load_microcode(adev);
+ if (r)
+ return r;
+ }
+
+ gfx_v12_0_init_csb(adev);
+
+ adev->gfx.rlc.funcs->start(adev);
+ }
+
+ return 0;
+}
+
+static void gfx_v12_0_config_gfx_rs64(struct amdgpu_device *adev)
+{
+ const struct gfx_firmware_header_v2_0 *pfp_hdr;
+ const struct gfx_firmware_header_v2_0 *me_hdr;
+ const struct gfx_firmware_header_v2_0 *mec_hdr;
+ uint32_t pipe_id, tmp;
+
+ mec_hdr = (const struct gfx_firmware_header_v2_0 *)
+ adev->gfx.mec_fw->data;
+ me_hdr = (const struct gfx_firmware_header_v2_0 *)
+ adev->gfx.me_fw->data;
+ pfp_hdr = (const struct gfx_firmware_header_v2_0 *)
+ adev->gfx.pfp_fw->data;
+
+ /* config pfp program start addr */
+ for (pipe_id = 0; pipe_id < 2; pipe_id++) {
+ soc24_grbm_select(adev, 0, pipe_id, 0, 0);
+ WREG32_SOC15(GC, 0, regCP_PFP_PRGRM_CNTR_START,
+ (pfp_hdr->ucode_start_addr_hi << 30) |
+ (pfp_hdr->ucode_start_addr_lo >> 2));
+ WREG32_SOC15(GC, 0, regCP_PFP_PRGRM_CNTR_START_HI,
+ pfp_hdr->ucode_start_addr_hi >> 2);
+ }
+ soc24_grbm_select(adev, 0, 0, 0, 0);
+
+ /* reset pfp pipe */
+ tmp = RREG32_SOC15(GC, 0, regCP_ME_CNTL);
+ tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_PIPE0_RESET, 1);
+ tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_PIPE1_RESET, 1);
+ WREG32_SOC15(GC, 0, regCP_ME_CNTL, tmp);
+
+ /* clear pfp pipe reset */
+ tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_PIPE0_RESET, 0);
+ tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_PIPE1_RESET, 0);
+ WREG32_SOC15(GC, 0, regCP_ME_CNTL, tmp);
+
+ /* config me program start addr */
+ for (pipe_id = 0; pipe_id < 2; pipe_id++) {
+ soc24_grbm_select(adev, 0, pipe_id, 0, 0);
+ WREG32_SOC15(GC, 0, regCP_ME_PRGRM_CNTR_START,
+ (me_hdr->ucode_start_addr_hi << 30) |
+ (me_hdr->ucode_start_addr_lo >> 2));
+ WREG32_SOC15(GC, 0, regCP_ME_PRGRM_CNTR_START_HI,
+ me_hdr->ucode_start_addr_hi>>2);
+ }
+ soc24_grbm_select(adev, 0, 0, 0, 0);
+
+ /* reset me pipe */
+ tmp = RREG32_SOC15(GC, 0, regCP_ME_CNTL);
+ tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_PIPE0_RESET, 1);
+ tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_PIPE1_RESET, 1);
+ WREG32_SOC15(GC, 0, regCP_ME_CNTL, tmp);
+
+ /* clear me pipe reset */
+ tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_PIPE0_RESET, 0);
+ tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_PIPE1_RESET, 0);
+ WREG32_SOC15(GC, 0, regCP_ME_CNTL, tmp);
+
+ /* config mec program start addr */
+ for (pipe_id = 0; pipe_id < 4; pipe_id++) {
+ soc24_grbm_select(adev, 1, pipe_id, 0, 0);
+ WREG32_SOC15(GC, 0, regCP_MEC_RS64_PRGRM_CNTR_START,
+ mec_hdr->ucode_start_addr_lo >> 2 |
+ mec_hdr->ucode_start_addr_hi << 30);
+ WREG32_SOC15(GC, 0, regCP_MEC_RS64_PRGRM_CNTR_START_HI,
+ mec_hdr->ucode_start_addr_hi >> 2);
+ }
+ soc24_grbm_select(adev, 0, 0, 0, 0);
+
+ /* reset mec pipe */
+ tmp = RREG32_SOC15(GC, 0, regCP_MEC_RS64_CNTL);
+ tmp = REG_SET_FIELD(tmp, CP_MEC_RS64_CNTL, MEC_PIPE0_RESET, 1);
+ tmp = REG_SET_FIELD(tmp, CP_MEC_RS64_CNTL, MEC_PIPE1_RESET, 1);
+ tmp = REG_SET_FIELD(tmp, CP_MEC_RS64_CNTL, MEC_PIPE2_RESET, 1);
+ tmp = REG_SET_FIELD(tmp, CP_MEC_RS64_CNTL, MEC_PIPE3_RESET, 1);
+ WREG32_SOC15(GC, 0, regCP_MEC_RS64_CNTL, tmp);
+
+ /* clear mec pipe reset */
+ tmp = REG_SET_FIELD(tmp, CP_MEC_RS64_CNTL, MEC_PIPE0_RESET, 0);
+ tmp = REG_SET_FIELD(tmp, CP_MEC_RS64_CNTL, MEC_PIPE1_RESET, 0);
+ tmp = REG_SET_FIELD(tmp, CP_MEC_RS64_CNTL, MEC_PIPE2_RESET, 0);
+ tmp = REG_SET_FIELD(tmp, CP_MEC_RS64_CNTL, MEC_PIPE3_RESET, 0);
+ WREG32_SOC15(GC, 0, regCP_MEC_RS64_CNTL, tmp);
+}
+
+static void gfx_v12_0_set_pfp_ucode_start_addr(struct amdgpu_device *adev)
+{
+ const struct gfx_firmware_header_v2_0 *cp_hdr;
+ unsigned pipe_id, tmp;
+
+ cp_hdr = (const struct gfx_firmware_header_v2_0 *)
+ adev->gfx.pfp_fw->data;
+ mutex_lock(&adev->srbm_mutex);
+ for (pipe_id = 0; pipe_id < adev->gfx.me.num_pipe_per_me; pipe_id++) {
+ soc24_grbm_select(adev, 0, pipe_id, 0, 0);
+ WREG32_SOC15(GC, 0, regCP_PFP_PRGRM_CNTR_START,
+ (cp_hdr->ucode_start_addr_hi << 30) |
+ (cp_hdr->ucode_start_addr_lo >> 2));
+ WREG32_SOC15(GC, 0, regCP_PFP_PRGRM_CNTR_START_HI,
+ cp_hdr->ucode_start_addr_hi>>2);
+
+ /*
+ * Program CP_ME_CNTL to reset given PIPE to take
+ * effect of CP_PFP_PRGRM_CNTR_START.
+ */
+ tmp = RREG32_SOC15(GC, 0, regCP_ME_CNTL);
+ if (pipe_id == 0)
+ tmp = REG_SET_FIELD(tmp, CP_ME_CNTL,
+ PFP_PIPE0_RESET, 1);
+ else
+ tmp = REG_SET_FIELD(tmp, CP_ME_CNTL,
+ PFP_PIPE1_RESET, 1);
+ WREG32_SOC15(GC, 0, regCP_ME_CNTL, tmp);
+
+ /* Clear pfp pipe0 reset bit. */
+ if (pipe_id == 0)
+ tmp = REG_SET_FIELD(tmp, CP_ME_CNTL,
+ PFP_PIPE0_RESET, 0);
+ else
+ tmp = REG_SET_FIELD(tmp, CP_ME_CNTL,
+ PFP_PIPE1_RESET, 0);
+ WREG32_SOC15(GC, 0, regCP_ME_CNTL, tmp);
+ }
+ soc24_grbm_select(adev, 0, 0, 0, 0);
+ mutex_unlock(&adev->srbm_mutex);
+}
+
+static void gfx_v12_0_set_me_ucode_start_addr(struct amdgpu_device *adev)
+{
+ const struct gfx_firmware_header_v2_0 *cp_hdr;
+ unsigned pipe_id, tmp;
+
+ cp_hdr = (const struct gfx_firmware_header_v2_0 *)
+ adev->gfx.me_fw->data;
+ mutex_lock(&adev->srbm_mutex);
+ for (pipe_id = 0; pipe_id < adev->gfx.me.num_pipe_per_me; pipe_id++) {
+ soc24_grbm_select(adev, 0, pipe_id, 0, 0);
+ WREG32_SOC15(GC, 0, regCP_ME_PRGRM_CNTR_START,
+ (cp_hdr->ucode_start_addr_hi << 30) |
+ (cp_hdr->ucode_start_addr_lo >> 2) );
+ WREG32_SOC15(GC, 0, regCP_ME_PRGRM_CNTR_START_HI,
+ cp_hdr->ucode_start_addr_hi>>2);
+
+ /*
+ * Program CP_ME_CNTL to reset given PIPE to take
+ * effect of CP_ME_PRGRM_CNTR_START.
+ */
+ tmp = RREG32_SOC15(GC, 0, regCP_ME_CNTL);
+ if (pipe_id == 0)
+ tmp = REG_SET_FIELD(tmp, CP_ME_CNTL,
+ ME_PIPE0_RESET, 1);
+ else
+ tmp = REG_SET_FIELD(tmp, CP_ME_CNTL,
+ ME_PIPE1_RESET, 1);
+ WREG32_SOC15(GC, 0, regCP_ME_CNTL, tmp);
+
+ /* Clear pfp pipe0 reset bit. */
+ if (pipe_id == 0)
+ tmp = REG_SET_FIELD(tmp, CP_ME_CNTL,
+ ME_PIPE0_RESET, 0);
+ else
+ tmp = REG_SET_FIELD(tmp, CP_ME_CNTL,
+ ME_PIPE1_RESET, 0);
+ WREG32_SOC15(GC, 0, regCP_ME_CNTL, tmp);
+ }
+ soc24_grbm_select(adev, 0, 0, 0, 0);
+ mutex_unlock(&adev->srbm_mutex);
+}
+
+static void gfx_v12_0_set_mec_ucode_start_addr(struct amdgpu_device *adev)
+{
+ const struct gfx_firmware_header_v2_0 *cp_hdr;
+ unsigned pipe_id;
+
+ cp_hdr = (const struct gfx_firmware_header_v2_0 *)
+ adev->gfx.mec_fw->data;
+ mutex_lock(&adev->srbm_mutex);
+ for (pipe_id = 0; pipe_id < adev->gfx.mec.num_pipe_per_mec; pipe_id++) {
+ soc24_grbm_select(adev, 1, pipe_id, 0, 0);
+ WREG32_SOC15(GC, 0, regCP_MEC_RS64_PRGRM_CNTR_START,
+ cp_hdr->ucode_start_addr_lo >> 2 |
+ cp_hdr->ucode_start_addr_hi << 30);
+ WREG32_SOC15(GC, 0, regCP_MEC_RS64_PRGRM_CNTR_START_HI,
+ cp_hdr->ucode_start_addr_hi >> 2);
+ }
+ soc24_grbm_select(adev, 0, 0, 0, 0);
+ mutex_unlock(&adev->srbm_mutex);
+}
+
+static int gfx_v12_0_wait_for_rlc_autoload_complete(struct amdgpu_device *adev)
+{
+ uint32_t cp_status;
+ uint32_t bootload_status;
+ int i;
+
+ for (i = 0; i < adev->usec_timeout; i++) {
+ cp_status = RREG32_SOC15(GC, 0, regCP_STAT);
+ bootload_status = RREG32_SOC15(GC, 0, regRLC_RLCS_BOOTLOAD_STATUS);
+
+ if ((cp_status == 0) &&
+ (REG_GET_FIELD(bootload_status,
+ RLC_RLCS_BOOTLOAD_STATUS, BOOTLOAD_COMPLETE) == 1)) {
+ break;
+ }
+ udelay(1);
+ if (amdgpu_emu_mode)
+ msleep(10);
+ }
+
+ if (i >= adev->usec_timeout) {
+ dev_err(adev->dev, "rlc autoload: gc ucode autoload timeout\n");
+ return -ETIMEDOUT;
+ }
+
+ if (adev->firmware.load_type == AMDGPU_FW_LOAD_RLC_BACKDOOR_AUTO) {
+ gfx_v12_0_set_pfp_ucode_start_addr(adev);
+ gfx_v12_0_set_me_ucode_start_addr(adev);
+ gfx_v12_0_set_mec_ucode_start_addr(adev);
+ }
+
+ return 0;
+}
+
+static int gfx_v12_0_cp_gfx_enable(struct amdgpu_device *adev, bool enable)
+{
+ int i;
+ u32 tmp = RREG32_SOC15(GC, 0, regCP_ME_CNTL);
+
+ tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_HALT, enable ? 0 : 1);
+ tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_HALT, enable ? 0 : 1);
+ WREG32_SOC15(GC, 0, regCP_ME_CNTL, tmp);
+
+ for (i = 0; i < adev->usec_timeout; i++) {
+ if (RREG32_SOC15(GC, 0, regCP_STAT) == 0)
+ break;
+ udelay(1);
+ }
+
+ if (i >= adev->usec_timeout)
+ DRM_ERROR("failed to %s cp gfx\n", enable ? "unhalt" : "halt");
+
+ return 0;
+}
+
+static int gfx_v12_0_cp_gfx_load_pfp_microcode_rs64(struct amdgpu_device *adev)
+{
+ int r;
+ const struct gfx_firmware_header_v2_0 *pfp_hdr;
+ const __le32 *fw_ucode, *fw_data;
+ unsigned i, pipe_id, fw_ucode_size, fw_data_size;
+ uint32_t tmp;
+ uint32_t usec_timeout = 50000; /* wait for 50ms */
+
+ pfp_hdr = (const struct gfx_firmware_header_v2_0 *)
+ adev->gfx.pfp_fw->data;
+
+ amdgpu_ucode_print_gfx_hdr(&pfp_hdr->header);
+
+ /* instruction */
+ fw_ucode = (const __le32 *)(adev->gfx.pfp_fw->data +
+ le32_to_cpu(pfp_hdr->ucode_offset_bytes));
+ fw_ucode_size = le32_to_cpu(pfp_hdr->ucode_size_bytes);
+ /* data */
+ fw_data = (const __le32 *)(adev->gfx.pfp_fw->data +
+ le32_to_cpu(pfp_hdr->data_offset_bytes));
+ fw_data_size = le32_to_cpu(pfp_hdr->data_size_bytes);
+
+ /* 64kb align */
+ r = amdgpu_bo_create_reserved(adev, fw_ucode_size,
+ 64 * 1024, AMDGPU_GEM_DOMAIN_VRAM,
+ &adev->gfx.pfp.pfp_fw_obj,
+ &adev->gfx.pfp.pfp_fw_gpu_addr,
+ (void **)&adev->gfx.pfp.pfp_fw_ptr);
+ if (r) {
+ dev_err(adev->dev, "(%d) failed to create pfp ucode fw bo\n", r);
+ gfx_v12_0_pfp_fini(adev);
+ return r;
+ }
+
+ r = amdgpu_bo_create_reserved(adev, fw_data_size,
+ 64 * 1024, AMDGPU_GEM_DOMAIN_VRAM,
+ &adev->gfx.pfp.pfp_fw_data_obj,
+ &adev->gfx.pfp.pfp_fw_data_gpu_addr,
+ (void **)&adev->gfx.pfp.pfp_fw_data_ptr);
+ if (r) {
+ dev_err(adev->dev, "(%d) failed to create pfp data fw bo\n", r);
+ gfx_v12_0_pfp_fini(adev);
+ return r;
+ }
+
+ memcpy(adev->gfx.pfp.pfp_fw_ptr, fw_ucode, fw_ucode_size);
+ memcpy(adev->gfx.pfp.pfp_fw_data_ptr, fw_data, fw_data_size);
+
+ amdgpu_bo_kunmap(adev->gfx.pfp.pfp_fw_obj);
+ amdgpu_bo_kunmap(adev->gfx.pfp.pfp_fw_data_obj);
+ amdgpu_bo_unreserve(adev->gfx.pfp.pfp_fw_obj);
+ amdgpu_bo_unreserve(adev->gfx.pfp.pfp_fw_data_obj);
+
+ if (amdgpu_emu_mode == 1)
+ amdgpu_device_flush_hdp(adev, NULL);
+
+ WREG32_SOC15(GC, 0, regCP_PFP_IC_BASE_LO,
+ lower_32_bits(adev->gfx.pfp.pfp_fw_gpu_addr));
+ WREG32_SOC15(GC, 0, regCP_PFP_IC_BASE_HI,
+ upper_32_bits(adev->gfx.pfp.pfp_fw_gpu_addr));
+
+ tmp = RREG32_SOC15(GC, 0, regCP_PFP_IC_BASE_CNTL);
+ tmp = REG_SET_FIELD(tmp, CP_PFP_IC_BASE_CNTL, VMID, 0);
+ tmp = REG_SET_FIELD(tmp, CP_PFP_IC_BASE_CNTL, CACHE_POLICY, 0);
+ tmp = REG_SET_FIELD(tmp, CP_PFP_IC_BASE_CNTL, EXE_DISABLE, 0);
+ WREG32_SOC15(GC, 0, regCP_PFP_IC_BASE_CNTL, tmp);
+
+ /*
+ * Programming any of the CP_PFP_IC_BASE registers
+ * forces invalidation of the ME L1 I$. Wait for the
+ * invalidation complete
+ */
+ for (i = 0; i < usec_timeout; i++) {
+ tmp = RREG32_SOC15(GC, 0, regCP_PFP_IC_OP_CNTL);
+ if (1 == REG_GET_FIELD(tmp, CP_PFP_IC_OP_CNTL,
+ INVALIDATE_CACHE_COMPLETE))
+ break;
+ udelay(1);
+ }
+
+ if (i >= usec_timeout) {
+ dev_err(adev->dev, "failed to invalidate instruction cache\n");
+ return -EINVAL;
+ }
+
+ /* Prime the L1 instruction caches */
+ tmp = RREG32_SOC15(GC, 0, regCP_PFP_IC_OP_CNTL);
+ tmp = REG_SET_FIELD(tmp, CP_PFP_IC_OP_CNTL, PRIME_ICACHE, 1);
+ WREG32_SOC15(GC, 0, regCP_PFP_IC_OP_CNTL, tmp);
+ /* Waiting for cache primed*/
+ for (i = 0; i < usec_timeout; i++) {
+ tmp = RREG32_SOC15(GC, 0, regCP_PFP_IC_OP_CNTL);
+ if (1 == REG_GET_FIELD(tmp, CP_PFP_IC_OP_CNTL,
+ ICACHE_PRIMED))
+ break;
+ udelay(1);
+ }
+
+ if (i >= usec_timeout) {
+ dev_err(adev->dev, "failed to prime instruction cache\n");
+ return -EINVAL;
+ }
+
+ mutex_lock(&adev->srbm_mutex);
+ for (pipe_id = 0; pipe_id < adev->gfx.me.num_pipe_per_me; pipe_id++) {
+ soc24_grbm_select(adev, 0, pipe_id, 0, 0);
+
+ WREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_BASE0_LO,
+ lower_32_bits(adev->gfx.pfp.pfp_fw_data_gpu_addr));
+ WREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_BASE0_HI,
+ upper_32_bits(adev->gfx.pfp.pfp_fw_data_gpu_addr));
+ }
+ soc24_grbm_select(adev, 0, 0, 0, 0);
+ mutex_unlock(&adev->srbm_mutex);
+
+ tmp = RREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_BASE_CNTL);
+ tmp = REG_SET_FIELD(tmp, CP_GFX_RS64_DC_BASE_CNTL, VMID, 0);
+ tmp = REG_SET_FIELD(tmp, CP_GFX_RS64_DC_BASE_CNTL, CACHE_POLICY, 0);
+ WREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_BASE_CNTL, tmp);
+
+ /* Invalidate the data caches */
+ tmp = RREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_OP_CNTL);
+ tmp = REG_SET_FIELD(tmp, CP_GFX_RS64_DC_OP_CNTL, INVALIDATE_DCACHE, 1);
+ WREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_OP_CNTL, tmp);
+
+ for (i = 0; i < usec_timeout; i++) {
+ tmp = RREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_OP_CNTL);
+ if (1 == REG_GET_FIELD(tmp, CP_GFX_RS64_DC_OP_CNTL,
+ INVALIDATE_DCACHE_COMPLETE))
+ break;
+ udelay(1);
+ }
+
+ if (i >= usec_timeout) {
+ dev_err(adev->dev, "failed to invalidate RS64 data cache\n");
+ return -EINVAL;
+ }
+
+ gfx_v12_0_set_pfp_ucode_start_addr(adev);
+
+ return 0;
+}
+
+static int gfx_v12_0_cp_gfx_load_me_microcode_rs64(struct amdgpu_device *adev)
+{
+ int r;
+ const struct gfx_firmware_header_v2_0 *me_hdr;
+ const __le32 *fw_ucode, *fw_data;
+ unsigned i, pipe_id, fw_ucode_size, fw_data_size;
+ uint32_t tmp;
+ uint32_t usec_timeout = 50000; /* wait for 50ms */
+
+ me_hdr = (const struct gfx_firmware_header_v2_0 *)
+ adev->gfx.me_fw->data;
+
+ amdgpu_ucode_print_gfx_hdr(&me_hdr->header);
+
+ /* instruction */
+ fw_ucode = (const __le32 *)(adev->gfx.me_fw->data +
+ le32_to_cpu(me_hdr->ucode_offset_bytes));
+ fw_ucode_size = le32_to_cpu(me_hdr->ucode_size_bytes);
+ /* data */
+ fw_data = (const __le32 *)(adev->gfx.me_fw->data +
+ le32_to_cpu(me_hdr->data_offset_bytes));
+ fw_data_size = le32_to_cpu(me_hdr->data_size_bytes);
+
+ /* 64kb align*/
+ r = amdgpu_bo_create_reserved(adev, fw_ucode_size,
+ 64 * 1024, AMDGPU_GEM_DOMAIN_VRAM,
+ &adev->gfx.me.me_fw_obj,
+ &adev->gfx.me.me_fw_gpu_addr,
+ (void **)&adev->gfx.me.me_fw_ptr);
+ if (r) {
+ dev_err(adev->dev, "(%d) failed to create me ucode bo\n", r);
+ gfx_v12_0_me_fini(adev);
+ return r;
+ }
+
+ r = amdgpu_bo_create_reserved(adev, fw_data_size,
+ 64 * 1024, AMDGPU_GEM_DOMAIN_VRAM,
+ &adev->gfx.me.me_fw_data_obj,
+ &adev->gfx.me.me_fw_data_gpu_addr,
+ (void **)&adev->gfx.me.me_fw_data_ptr);
+ if (r) {
+ dev_err(adev->dev, "(%d) failed to create me data bo\n", r);
+ gfx_v12_0_me_fini(adev);
+ return r;
+ }
+
+ memcpy(adev->gfx.me.me_fw_ptr, fw_ucode, fw_ucode_size);
+ memcpy(adev->gfx.me.me_fw_data_ptr, fw_data, fw_data_size);
+
+ amdgpu_bo_kunmap(adev->gfx.me.me_fw_obj);
+ amdgpu_bo_kunmap(adev->gfx.me.me_fw_data_obj);
+ amdgpu_bo_unreserve(adev->gfx.me.me_fw_obj);
+ amdgpu_bo_unreserve(adev->gfx.me.me_fw_data_obj);
+
+ if (amdgpu_emu_mode == 1)
+ amdgpu_device_flush_hdp(adev, NULL);
+
+ WREG32_SOC15(GC, 0, regCP_ME_IC_BASE_LO,
+ lower_32_bits(adev->gfx.me.me_fw_gpu_addr));
+ WREG32_SOC15(GC, 0, regCP_ME_IC_BASE_HI,
+ upper_32_bits(adev->gfx.me.me_fw_gpu_addr));
+
+ tmp = RREG32_SOC15(GC, 0, regCP_ME_IC_BASE_CNTL);
+ tmp = REG_SET_FIELD(tmp, CP_ME_IC_BASE_CNTL, VMID, 0);
+ tmp = REG_SET_FIELD(tmp, CP_ME_IC_BASE_CNTL, CACHE_POLICY, 0);
+ tmp = REG_SET_FIELD(tmp, CP_ME_IC_BASE_CNTL, EXE_DISABLE, 0);
+ WREG32_SOC15(GC, 0, regCP_ME_IC_BASE_CNTL, tmp);
+
+ /*
+ * Programming any of the CP_ME_IC_BASE registers
+ * forces invalidation of the ME L1 I$. Wait for the
+ * invalidation complete
+ */
+ for (i = 0; i < usec_timeout; i++) {
+ tmp = RREG32_SOC15(GC, 0, regCP_ME_IC_OP_CNTL);
+ if (1 == REG_GET_FIELD(tmp, CP_ME_IC_OP_CNTL,
+ INVALIDATE_CACHE_COMPLETE))
+ break;
+ udelay(1);
+ }
+
+ if (i >= usec_timeout) {
+ dev_err(adev->dev, "failed to invalidate instruction cache\n");
+ return -EINVAL;
+ }
+
+ /* Prime the instruction caches */
+ tmp = RREG32_SOC15(GC, 0, regCP_ME_IC_OP_CNTL);
+ tmp = REG_SET_FIELD(tmp, CP_ME_IC_OP_CNTL, PRIME_ICACHE, 1);
+ WREG32_SOC15(GC, 0, regCP_ME_IC_OP_CNTL, tmp);
+
+ /* Waiting for instruction cache primed*/
+ for (i = 0; i < usec_timeout; i++) {
+ tmp = RREG32_SOC15(GC, 0, regCP_ME_IC_OP_CNTL);
+ if (1 == REG_GET_FIELD(tmp, CP_ME_IC_OP_CNTL,
+ ICACHE_PRIMED))
+ break;
+ udelay(1);
+ }
+
+ if (i >= usec_timeout) {
+ dev_err(adev->dev, "failed to prime instruction cache\n");
+ return -EINVAL;
+ }
+
+ mutex_lock(&adev->srbm_mutex);
+ for (pipe_id = 0; pipe_id < adev->gfx.me.num_pipe_per_me; pipe_id++) {
+ soc24_grbm_select(adev, 0, pipe_id, 0, 0);
+
+ WREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_BASE1_LO,
+ lower_32_bits(adev->gfx.me.me_fw_data_gpu_addr));
+ WREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_BASE1_HI,
+ upper_32_bits(adev->gfx.me.me_fw_data_gpu_addr));
+ }
+ soc24_grbm_select(adev, 0, 0, 0, 0);
+ mutex_unlock(&adev->srbm_mutex);
+
+ tmp = RREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_BASE_CNTL);
+ tmp = REG_SET_FIELD(tmp, CP_GFX_RS64_DC_BASE_CNTL, VMID, 0);
+ tmp = REG_SET_FIELD(tmp, CP_GFX_RS64_DC_BASE_CNTL, CACHE_POLICY, 0);
+ WREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_BASE_CNTL, tmp);
+
+ /* Invalidate the data caches */
+ tmp = RREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_OP_CNTL);
+ tmp = REG_SET_FIELD(tmp, CP_GFX_RS64_DC_OP_CNTL, INVALIDATE_DCACHE, 1);
+ WREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_OP_CNTL, tmp);
+
+ for (i = 0; i < usec_timeout; i++) {
+ tmp = RREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_OP_CNTL);
+ if (1 == REG_GET_FIELD(tmp, CP_GFX_RS64_DC_OP_CNTL,
+ INVALIDATE_DCACHE_COMPLETE))
+ break;
+ udelay(1);
+ }
+
+ if (i >= usec_timeout) {
+ dev_err(adev->dev, "failed to invalidate RS64 data cache\n");
+ return -EINVAL;
+ }
+
+ gfx_v12_0_set_me_ucode_start_addr(adev);
+
+ return 0;
+}
+
+static int gfx_v12_0_cp_gfx_load_microcode(struct amdgpu_device *adev)
+{
+ int r;
+
+ if (!adev->gfx.me_fw || !adev->gfx.pfp_fw)
+ return -EINVAL;
+
+ gfx_v12_0_cp_gfx_enable(adev, false);
+
+ r = gfx_v12_0_cp_gfx_load_pfp_microcode_rs64(adev);
+ if (r) {
+ dev_err(adev->dev, "(%d) failed to load pfp fw\n", r);
+ return r;
+ }
+
+ r = gfx_v12_0_cp_gfx_load_me_microcode_rs64(adev);
+ if (r) {
+ dev_err(adev->dev, "(%d) failed to load me fw\n", r);
+ return r;
+ }
+
+ return 0;
+}
+
+static int gfx_v12_0_cp_gfx_start(struct amdgpu_device *adev)
+{
+ /* init the CP */
+ WREG32_SOC15(GC, 0, regCP_MAX_CONTEXT,
+ adev->gfx.config.max_hw_contexts - 1);
+ WREG32_SOC15(GC, 0, regCP_DEVICE_ID, 1);
+
+ if (!amdgpu_async_gfx_ring)
+ gfx_v12_0_cp_gfx_enable(adev, true);
+
+ return 0;
+}
+
+static void gfx_v12_0_cp_gfx_switch_pipe(struct amdgpu_device *adev,
+ CP_PIPE_ID pipe)
+{
+ u32 tmp;
+
+ tmp = RREG32_SOC15(GC, 0, regGRBM_GFX_CNTL);
+ tmp = REG_SET_FIELD(tmp, GRBM_GFX_CNTL, PIPEID, pipe);
+
+ WREG32_SOC15(GC, 0, regGRBM_GFX_CNTL, tmp);
+}
+
+static void gfx_v12_0_cp_gfx_set_doorbell(struct amdgpu_device *adev,
+ struct amdgpu_ring *ring)
+{
+ u32 tmp;
+
+ tmp = RREG32_SOC15(GC, 0, regCP_RB_DOORBELL_CONTROL);
+ if (ring->use_doorbell) {
+ tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
+ DOORBELL_OFFSET, ring->doorbell_index);
+ tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
+ DOORBELL_EN, 1);
+ } else {
+ tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
+ DOORBELL_EN, 0);
+ }
+ WREG32_SOC15(GC, 0, regCP_RB_DOORBELL_CONTROL, tmp);
+
+ tmp = REG_SET_FIELD(0, CP_RB_DOORBELL_RANGE_LOWER,
+ DOORBELL_RANGE_LOWER, ring->doorbell_index);
+ WREG32_SOC15(GC, 0, regCP_RB_DOORBELL_RANGE_LOWER, tmp);
+
+ WREG32_SOC15(GC, 0, regCP_RB_DOORBELL_RANGE_UPPER,
+ CP_RB_DOORBELL_RANGE_UPPER__DOORBELL_RANGE_UPPER_MASK);
+}
+
+static int gfx_v12_0_cp_gfx_resume(struct amdgpu_device *adev)
+{
+ struct amdgpu_ring *ring;
+ u32 tmp;
+ u32 rb_bufsz;
+ u64 rb_addr, rptr_addr, wptr_gpu_addr;
+
+ /* Set the write pointer delay */
+ WREG32_SOC15(GC, 0, regCP_RB_WPTR_DELAY, 0);
+
+ /* set the RB to use vmid 0 */
+ WREG32_SOC15(GC, 0, regCP_RB_VMID, 0);
+
+ /* Init gfx ring 0 for pipe 0 */
+ mutex_lock(&adev->srbm_mutex);
+ gfx_v12_0_cp_gfx_switch_pipe(adev, PIPE_ID0);
+
+ /* Set ring buffer size */
+ ring = &adev->gfx.gfx_ring[0];
+ rb_bufsz = order_base_2(ring->ring_size / 8);
+ tmp = REG_SET_FIELD(0, CP_RB0_CNTL, RB_BUFSZ, rb_bufsz);
+ tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, RB_BLKSZ, rb_bufsz - 2);
+ WREG32_SOC15(GC, 0, regCP_RB0_CNTL, tmp);
+
+ /* Initialize the ring buffer's write pointers */
+ ring->wptr = 0;
+ WREG32_SOC15(GC, 0, regCP_RB0_WPTR, lower_32_bits(ring->wptr));
+ WREG32_SOC15(GC, 0, regCP_RB0_WPTR_HI, upper_32_bits(ring->wptr));
+
+ /* set the wb address whether it's enabled or not */
+ rptr_addr = ring->rptr_gpu_addr;
+ WREG32_SOC15(GC, 0, regCP_RB0_RPTR_ADDR, lower_32_bits(rptr_addr));
+ WREG32_SOC15(GC, 0, regCP_RB0_RPTR_ADDR_HI, upper_32_bits(rptr_addr) &
+ CP_RB_RPTR_ADDR_HI__RB_RPTR_ADDR_HI_MASK);
+
+ wptr_gpu_addr = ring->wptr_gpu_addr;
+ WREG32_SOC15(GC, 0, regCP_RB_WPTR_POLL_ADDR_LO,
+ lower_32_bits(wptr_gpu_addr));
+ WREG32_SOC15(GC, 0, regCP_RB_WPTR_POLL_ADDR_HI,
+ upper_32_bits(wptr_gpu_addr));
+
+ mdelay(1);
+ WREG32_SOC15(GC, 0, regCP_RB0_CNTL, tmp);
+
+ rb_addr = ring->gpu_addr >> 8;
+ WREG32_SOC15(GC, 0, regCP_RB0_BASE, rb_addr);
+ WREG32_SOC15(GC, 0, regCP_RB0_BASE_HI, upper_32_bits(rb_addr));
+
+ WREG32_SOC15(GC, 0, regCP_RB_ACTIVE, 1);
+
+ gfx_v12_0_cp_gfx_set_doorbell(adev, ring);
+ mutex_unlock(&adev->srbm_mutex);
+
+ /* Switch to pipe 0 */
+ mutex_lock(&adev->srbm_mutex);
+ gfx_v12_0_cp_gfx_switch_pipe(adev, PIPE_ID0);
+ mutex_unlock(&adev->srbm_mutex);
+
+ /* start the ring */
+ gfx_v12_0_cp_gfx_start(adev);
+ return 0;
+}
+
+static void gfx_v12_0_cp_compute_enable(struct amdgpu_device *adev, bool enable)
+{
+ u32 data;
+
+ data = RREG32_SOC15(GC, 0, regCP_MEC_RS64_CNTL);
+ data = REG_SET_FIELD(data, CP_MEC_RS64_CNTL, MEC_INVALIDATE_ICACHE,
+ enable ? 0 : 1);
+ data = REG_SET_FIELD(data, CP_MEC_RS64_CNTL, MEC_PIPE0_RESET,
+ enable ? 0 : 1);
+ data = REG_SET_FIELD(data, CP_MEC_RS64_CNTL, MEC_PIPE1_RESET,
+ enable ? 0 : 1);
+ data = REG_SET_FIELD(data, CP_MEC_RS64_CNTL, MEC_PIPE2_RESET,
+ enable ? 0 : 1);
+ data = REG_SET_FIELD(data, CP_MEC_RS64_CNTL, MEC_PIPE3_RESET,
+ enable ? 0 : 1);
+ data = REG_SET_FIELD(data, CP_MEC_RS64_CNTL, MEC_PIPE0_ACTIVE,
+ enable ? 1 : 0);
+ data = REG_SET_FIELD(data, CP_MEC_RS64_CNTL, MEC_PIPE1_ACTIVE,
+ enable ? 1 : 0);
+ data = REG_SET_FIELD(data, CP_MEC_RS64_CNTL, MEC_PIPE2_ACTIVE,
+ enable ? 1 : 0);
+ data = REG_SET_FIELD(data, CP_MEC_RS64_CNTL, MEC_PIPE3_ACTIVE,
+ enable ? 1 : 0);
+ data = REG_SET_FIELD(data, CP_MEC_RS64_CNTL, MEC_HALT,
+ enable ? 0 : 1);
+ WREG32_SOC15(GC, 0, regCP_MEC_RS64_CNTL, data);
+
+ adev->gfx.kiq[0].ring.sched.ready = enable;
+
+ udelay(50);
+}
+
+static int gfx_v12_0_cp_compute_load_microcode_rs64(struct amdgpu_device *adev)
+{
+ const struct gfx_firmware_header_v2_0 *mec_hdr;
+ const __le32 *fw_ucode, *fw_data;
+ u32 tmp, fw_ucode_size, fw_data_size;
+ u32 i, usec_timeout = 50000; /* Wait for 50 ms */
+ u32 *fw_ucode_ptr, *fw_data_ptr;
+ int r;
+
+ if (!adev->gfx.mec_fw)
+ return -EINVAL;
+
+ gfx_v12_0_cp_compute_enable(adev, false);
+
+ mec_hdr = (const struct gfx_firmware_header_v2_0 *)adev->gfx.mec_fw->data;
+ amdgpu_ucode_print_gfx_hdr(&mec_hdr->header);
+
+ fw_ucode = (const __le32 *) (adev->gfx.mec_fw->data +
+ le32_to_cpu(mec_hdr->ucode_offset_bytes));
+ fw_ucode_size = le32_to_cpu(mec_hdr->ucode_size_bytes);
+
+ fw_data = (const __le32 *) (adev->gfx.mec_fw->data +
+ le32_to_cpu(mec_hdr->data_offset_bytes));
+ fw_data_size = le32_to_cpu(mec_hdr->data_size_bytes);
+
+ r = amdgpu_bo_create_reserved(adev, fw_ucode_size,
+ 64 * 1024, AMDGPU_GEM_DOMAIN_VRAM,
+ &adev->gfx.mec.mec_fw_obj,
+ &adev->gfx.mec.mec_fw_gpu_addr,
+ (void **)&fw_ucode_ptr);
+ if (r) {
+ dev_err(adev->dev, "(%d) failed to create mec fw ucode bo\n", r);
+ gfx_v12_0_mec_fini(adev);
+ return r;
+ }
+
+ r = amdgpu_bo_create_reserved(adev,
+ ALIGN(fw_data_size, 64 * 1024) *
+ adev->gfx.mec.num_pipe_per_mec,
+ 64 * 1024, AMDGPU_GEM_DOMAIN_VRAM,
+ &adev->gfx.mec.mec_fw_data_obj,
+ &adev->gfx.mec.mec_fw_data_gpu_addr,
+ (void **)&fw_data_ptr);
+ if (r) {
+ dev_err(adev->dev, "(%d) failed to create mec fw ucode bo\n", r);
+ gfx_v12_0_mec_fini(adev);
+ return r;
+ }
+
+ memcpy(fw_ucode_ptr, fw_ucode, fw_ucode_size);
+ for (i = 0; i < adev->gfx.mec.num_pipe_per_mec; i++) {
+ memcpy(fw_data_ptr + i * ALIGN(fw_data_size, 64 * 1024) / 4, fw_data, fw_data_size);
+ }
+
+ amdgpu_bo_kunmap(adev->gfx.mec.mec_fw_obj);
+ amdgpu_bo_kunmap(adev->gfx.mec.mec_fw_data_obj);
+ amdgpu_bo_unreserve(adev->gfx.mec.mec_fw_obj);
+ amdgpu_bo_unreserve(adev->gfx.mec.mec_fw_data_obj);
+
+ tmp = RREG32_SOC15(GC, 0, regCP_CPC_IC_BASE_CNTL);
+ tmp = REG_SET_FIELD(tmp, CP_CPC_IC_BASE_CNTL, VMID, 0);
+ tmp = REG_SET_FIELD(tmp, CP_CPC_IC_BASE_CNTL, EXE_DISABLE, 0);
+ tmp = REG_SET_FIELD(tmp, CP_CPC_IC_BASE_CNTL, CACHE_POLICY, 0);
+ WREG32_SOC15(GC, 0, regCP_CPC_IC_BASE_CNTL, tmp);
+
+ tmp = RREG32_SOC15(GC, 0, regCP_MEC_DC_BASE_CNTL);
+ tmp = REG_SET_FIELD(tmp, CP_MEC_DC_BASE_CNTL, VMID, 0);
+ tmp = REG_SET_FIELD(tmp, CP_MEC_DC_BASE_CNTL, CACHE_POLICY, 0);
+ WREG32_SOC15(GC, 0, regCP_MEC_DC_BASE_CNTL, tmp);
+
+ mutex_lock(&adev->srbm_mutex);
+ for (i = 0; i < adev->gfx.mec.num_pipe_per_mec; i++) {
+ soc24_grbm_select(adev, 1, i, 0, 0);
+
+ WREG32_SOC15(GC, 0, regCP_MEC_MDBASE_LO,
+ lower_32_bits(adev->gfx.mec.mec_fw_data_gpu_addr +
+ i * ALIGN(fw_data_size, 64 * 1024)));
+ WREG32_SOC15(GC, 0, regCP_MEC_MDBASE_HI,
+ upper_32_bits(adev->gfx.mec.mec_fw_data_gpu_addr +
+ i * ALIGN(fw_data_size, 64 * 1024)));
+
+ WREG32_SOC15(GC, 0, regCP_CPC_IC_BASE_LO,
+ lower_32_bits(adev->gfx.mec.mec_fw_gpu_addr));
+ WREG32_SOC15(GC, 0, regCP_CPC_IC_BASE_HI,
+ upper_32_bits(adev->gfx.mec.mec_fw_gpu_addr));
+ }
+ mutex_unlock(&adev->srbm_mutex);
+ soc24_grbm_select(adev, 0, 0, 0, 0);
+
+ /* Trigger an invalidation of the L1 instruction caches */
+ tmp = RREG32_SOC15(GC, 0, regCP_MEC_DC_OP_CNTL);
+ tmp = REG_SET_FIELD(tmp, CP_MEC_DC_OP_CNTL, INVALIDATE_DCACHE, 1);
+ WREG32_SOC15(GC, 0, regCP_MEC_DC_OP_CNTL, tmp);
+
+ /* Wait for invalidation complete */
+ for (i = 0; i < usec_timeout; i++) {
+ tmp = RREG32_SOC15(GC, 0, regCP_MEC_DC_OP_CNTL);
+ if (1 == REG_GET_FIELD(tmp, CP_MEC_DC_OP_CNTL,
+ INVALIDATE_DCACHE_COMPLETE))
+ break;
+ udelay(1);
+ }
+
+ if (i >= usec_timeout) {
+ dev_err(adev->dev, "failed to invalidate instruction cache\n");
+ return -EINVAL;
+ }
+
+ /* Trigger an invalidation of the L1 instruction caches */
+ tmp = RREG32_SOC15(GC, 0, regCP_CPC_IC_OP_CNTL);
+ tmp = REG_SET_FIELD(tmp, CP_CPC_IC_OP_CNTL, INVALIDATE_CACHE, 1);
+ WREG32_SOC15(GC, 0, regCP_CPC_IC_OP_CNTL, tmp);
+
+ /* Wait for invalidation complete */
+ for (i = 0; i < usec_timeout; i++) {
+ tmp = RREG32_SOC15(GC, 0, regCP_CPC_IC_OP_CNTL);
+ if (1 == REG_GET_FIELD(tmp, CP_CPC_IC_OP_CNTL,
+ INVALIDATE_CACHE_COMPLETE))
+ break;
+ udelay(1);
+ }
+
+ if (i >= usec_timeout) {
+ dev_err(adev->dev, "failed to invalidate instruction cache\n");
+ return -EINVAL;
+ }
+
+ gfx_v12_0_set_mec_ucode_start_addr(adev);
+
+ return 0;
+}
+
+static void gfx_v12_0_kiq_setting(struct amdgpu_ring *ring)
+{
+ uint32_t tmp;
+ struct amdgpu_device *adev = ring->adev;
+
+ /* tell RLC which is KIQ queue */
+ tmp = RREG32_SOC15(GC, 0, regRLC_CP_SCHEDULERS);
+ tmp &= 0xffffff00;
+ tmp |= (ring->me << 5) | (ring->pipe << 3) | (ring->queue);
+ WREG32_SOC15(GC, 0, regRLC_CP_SCHEDULERS, tmp | 0x80);
+}
+
+static void gfx_v12_0_cp_set_doorbell_range(struct amdgpu_device *adev)
+{
+ /* set graphics engine doorbell range */
+ WREG32_SOC15(GC, 0, regCP_RB_DOORBELL_RANGE_LOWER,
+ (adev->doorbell_index.gfx_ring0 * 2) << 2);
+ WREG32_SOC15(GC, 0, regCP_RB_DOORBELL_RANGE_UPPER,
+ (adev->doorbell_index.gfx_userqueue_end * 2) << 2);
+
+ /* set compute engine doorbell range */
+ WREG32_SOC15(GC, 0, regCP_MEC_DOORBELL_RANGE_LOWER,
+ (adev->doorbell_index.kiq * 2) << 2);
+ WREG32_SOC15(GC, 0, regCP_MEC_DOORBELL_RANGE_UPPER,
+ (adev->doorbell_index.userqueue_end * 2) << 2);
+}
+
+static int gfx_v12_0_gfx_mqd_init(struct amdgpu_device *adev, void *m,
+ struct amdgpu_mqd_prop *prop)
+{
+ struct v12_gfx_mqd *mqd = m;
+ uint64_t hqd_gpu_addr, wb_gpu_addr;
+ uint32_t tmp;
+ uint32_t rb_bufsz;
+
+ /* set up gfx hqd wptr */
+ mqd->cp_gfx_hqd_wptr = 0;
+ mqd->cp_gfx_hqd_wptr_hi = 0;
+
+ /* set the pointer to the MQD */
+ mqd->cp_mqd_base_addr = prop->mqd_gpu_addr & 0xfffffffc;
+ mqd->cp_mqd_base_addr_hi = upper_32_bits(prop->mqd_gpu_addr);
+
+ /* set up mqd control */
+ tmp = regCP_GFX_MQD_CONTROL_DEFAULT;
+ tmp = REG_SET_FIELD(tmp, CP_GFX_MQD_CONTROL, VMID, 0);
+ tmp = REG_SET_FIELD(tmp, CP_GFX_MQD_CONTROL, PRIV_STATE, 1);
+ tmp = REG_SET_FIELD(tmp, CP_GFX_MQD_CONTROL, CACHE_POLICY, 0);
+ mqd->cp_gfx_mqd_control = tmp;
+
+ /* set up gfx_hqd_vimd with 0x0 to indicate the ring buffer's vmid */
+ tmp = regCP_GFX_HQD_VMID_DEFAULT;
+ tmp = REG_SET_FIELD(tmp, CP_GFX_HQD_VMID, VMID, 0);
+ mqd->cp_gfx_hqd_vmid = 0;
+
+ /* set up default queue priority level
+ * 0x0 = low priority, 0x1 = high priority */
+ tmp = regCP_GFX_HQD_QUEUE_PRIORITY_DEFAULT;
+ tmp = REG_SET_FIELD(tmp, CP_GFX_HQD_QUEUE_PRIORITY, PRIORITY_LEVEL, 0);
+ mqd->cp_gfx_hqd_queue_priority = tmp;
+
+ /* set up time quantum */
+ tmp = regCP_GFX_HQD_QUANTUM_DEFAULT;
+ tmp = REG_SET_FIELD(tmp, CP_GFX_HQD_QUANTUM, QUANTUM_EN, 1);
+ mqd->cp_gfx_hqd_quantum = tmp;
+
+ /* set up gfx hqd base. this is similar as CP_RB_BASE */
+ hqd_gpu_addr = prop->hqd_base_gpu_addr >> 8;
+ mqd->cp_gfx_hqd_base = hqd_gpu_addr;
+ mqd->cp_gfx_hqd_base_hi = upper_32_bits(hqd_gpu_addr);
+
+ /* set up hqd_rptr_addr/_hi, similar as CP_RB_RPTR */
+ wb_gpu_addr = prop->rptr_gpu_addr;
+ mqd->cp_gfx_hqd_rptr_addr = wb_gpu_addr & 0xfffffffc;
+ mqd->cp_gfx_hqd_rptr_addr_hi =
+ upper_32_bits(wb_gpu_addr) & 0xffff;
+
+ /* set up rb_wptr_poll addr */
+ wb_gpu_addr = prop->wptr_gpu_addr;
+ mqd->cp_rb_wptr_poll_addr_lo = wb_gpu_addr & 0xfffffffc;
+ mqd->cp_rb_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff;
+
+ /* set up the gfx_hqd_control, similar as CP_RB0_CNTL */
+ rb_bufsz = order_base_2(prop->queue_size / 4) - 1;
+ tmp = regCP_GFX_HQD_CNTL_DEFAULT;
+ tmp = REG_SET_FIELD(tmp, CP_GFX_HQD_CNTL, RB_BUFSZ, rb_bufsz);
+ tmp = REG_SET_FIELD(tmp, CP_GFX_HQD_CNTL, RB_BLKSZ, rb_bufsz - 2);
+#ifdef __BIG_ENDIAN
+ tmp = REG_SET_FIELD(tmp, CP_GFX_HQD_CNTL, BUF_SWAP, 1);
+#endif
+ if (prop->tmz_queue)
+ tmp = REG_SET_FIELD(tmp, CP_GFX_HQD_CNTL, TMZ_MATCH, 1);
+ if (!prop->kernel_queue)
+ tmp = REG_SET_FIELD(tmp, CP_GFX_HQD_CNTL, RB_NON_PRIV, 1);
+ mqd->cp_gfx_hqd_cntl = tmp;
+
+ /* set up cp_doorbell_control */
+ tmp = regCP_RB_DOORBELL_CONTROL_DEFAULT;
+ if (prop->use_doorbell) {
+ tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
+ DOORBELL_OFFSET, prop->doorbell_index);
+ tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
+ DOORBELL_EN, 1);
+ } else
+ tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
+ DOORBELL_EN, 0);
+ mqd->cp_rb_doorbell_control = tmp;
+
+ /* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */
+ mqd->cp_gfx_hqd_rptr = regCP_GFX_HQD_RPTR_DEFAULT;
+
+ /* active the queue */
+ mqd->cp_gfx_hqd_active = 1;
+
+ /* set gfx UQ items */
+ mqd->shadow_base_lo = lower_32_bits(prop->shadow_addr);
+ mqd->shadow_base_hi = upper_32_bits(prop->shadow_addr);
+ mqd->fw_work_area_base_lo = lower_32_bits(prop->csa_addr);
+ mqd->fw_work_area_base_hi = upper_32_bits(prop->csa_addr);
+ mqd->fence_address_lo = lower_32_bits(prop->fence_address);
+ mqd->fence_address_hi = upper_32_bits(prop->fence_address);
+
+ return 0;
+}
+
+static int gfx_v12_0_kgq_init_queue(struct amdgpu_ring *ring, bool reset)
+{
+ struct amdgpu_device *adev = ring->adev;
+ struct v12_gfx_mqd *mqd = ring->mqd_ptr;
+ int mqd_idx = ring - &adev->gfx.gfx_ring[0];
+
+ if (!reset && !amdgpu_in_reset(adev) && !adev->in_suspend) {
+ memset((void *)mqd, 0, sizeof(*mqd));
+ mutex_lock(&adev->srbm_mutex);
+ soc24_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
+ amdgpu_ring_init_mqd(ring);
+ soc24_grbm_select(adev, 0, 0, 0, 0);
+ mutex_unlock(&adev->srbm_mutex);
+ if (adev->gfx.me.mqd_backup[mqd_idx])
+ memcpy_fromio(adev->gfx.me.mqd_backup[mqd_idx], mqd, sizeof(*mqd));
+ } else {
+ /* restore mqd with the backup copy */
+ if (adev->gfx.me.mqd_backup[mqd_idx])
+ memcpy_toio(mqd, adev->gfx.me.mqd_backup[mqd_idx], sizeof(*mqd));
+ /* reset the ring */
+ ring->wptr = 0;
+ *ring->wptr_cpu_addr = 0;
+ amdgpu_ring_clear_ring(ring);
+ }
+
+ return 0;
+}
+
+static int gfx_v12_0_cp_async_gfx_ring_resume(struct amdgpu_device *adev)
+{
+ int i, r;
+
+ for (i = 0; i < adev->gfx.num_gfx_rings; i++) {
+ r = gfx_v12_0_kgq_init_queue(&adev->gfx.gfx_ring[i], false);
+ if (r)
+ return r;
+ }
+
+ r = amdgpu_gfx_enable_kgq(adev, 0);
+ if (r)
+ return r;
+
+ return gfx_v12_0_cp_gfx_start(adev);
+}
+
+static int gfx_v12_0_compute_mqd_init(struct amdgpu_device *adev, void *m,
+ struct amdgpu_mqd_prop *prop)
+{
+ struct v12_compute_mqd *mqd = m;
+ uint64_t hqd_gpu_addr, wb_gpu_addr, eop_base_addr;
+ uint32_t tmp;
+
+ mqd->header = 0xC0310800;
+ mqd->compute_pipelinestat_enable = 0x00000001;
+ mqd->compute_static_thread_mgmt_se0 = 0xffffffff;
+ mqd->compute_static_thread_mgmt_se1 = 0xffffffff;
+ mqd->compute_static_thread_mgmt_se2 = 0xffffffff;
+ mqd->compute_static_thread_mgmt_se3 = 0xffffffff;
+ mqd->compute_misc_reserved = 0x00000007;
+
+ eop_base_addr = prop->eop_gpu_addr >> 8;
+ mqd->cp_hqd_eop_base_addr_lo = eop_base_addr;
+ mqd->cp_hqd_eop_base_addr_hi = upper_32_bits(eop_base_addr);
+
+ /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
+ tmp = regCP_HQD_EOP_CONTROL_DEFAULT;
+ tmp = REG_SET_FIELD(tmp, CP_HQD_EOP_CONTROL, EOP_SIZE,
+ (order_base_2(GFX12_MEC_HPD_SIZE / 4) - 1));
+
+ mqd->cp_hqd_eop_control = tmp;
+
+ /* enable doorbell? */
+ tmp = regCP_HQD_PQ_DOORBELL_CONTROL_DEFAULT;
+
+ if (prop->use_doorbell) {
+ tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
+ DOORBELL_OFFSET, prop->doorbell_index);
+ tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
+ DOORBELL_EN, 1);
+ tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
+ DOORBELL_SOURCE, 0);
+ tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
+ DOORBELL_HIT, 0);
+ } else {
+ tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
+ DOORBELL_EN, 0);
+ }
+
+ mqd->cp_hqd_pq_doorbell_control = tmp;
+
+ /* disable the queue if it's active */
+ mqd->cp_hqd_dequeue_request = 0;
+ mqd->cp_hqd_pq_rptr = 0;
+ mqd->cp_hqd_pq_wptr_lo = 0;
+ mqd->cp_hqd_pq_wptr_hi = 0;
+
+ /* set the pointer to the MQD */
+ mqd->cp_mqd_base_addr_lo = prop->mqd_gpu_addr & 0xfffffffc;
+ mqd->cp_mqd_base_addr_hi = upper_32_bits(prop->mqd_gpu_addr);
+
+ /* set MQD vmid to 0 */
+ tmp = regCP_MQD_CONTROL_DEFAULT;
+ tmp = REG_SET_FIELD(tmp, CP_MQD_CONTROL, VMID, 0);
+ mqd->cp_mqd_control = tmp;
+
+ /* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
+ hqd_gpu_addr = prop->hqd_base_gpu_addr >> 8;
+ mqd->cp_hqd_pq_base_lo = hqd_gpu_addr;
+ mqd->cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr);
+
+ /* set up the HQD, this is similar to CP_RB0_CNTL */
+ tmp = regCP_HQD_PQ_CONTROL_DEFAULT;
+ tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, QUEUE_SIZE,
+ (order_base_2(prop->queue_size / 4) - 1));
+ tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, RPTR_BLOCK_SIZE,
+ (order_base_2(AMDGPU_GPU_PAGE_SIZE / 4) - 1));
+ tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, UNORD_DISPATCH, 1);
+ tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, TUNNEL_DISPATCH, 0);
+ if (prop->kernel_queue) {
+ tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, PRIV_STATE, 1);
+ tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, KMD_QUEUE, 1);
+ }
+ if (prop->tmz_queue)
+ tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, TMZ, 1);
+ mqd->cp_hqd_pq_control = tmp;
+
+ /* set the wb address whether it's enabled or not */
+ wb_gpu_addr = prop->rptr_gpu_addr;
+ mqd->cp_hqd_pq_rptr_report_addr_lo = wb_gpu_addr & 0xfffffffc;
+ mqd->cp_hqd_pq_rptr_report_addr_hi =
+ upper_32_bits(wb_gpu_addr) & 0xffff;
+
+ /* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */
+ wb_gpu_addr = prop->wptr_gpu_addr;
+ mqd->cp_hqd_pq_wptr_poll_addr_lo = wb_gpu_addr & 0xfffffffc;
+ mqd->cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff;
+
+ tmp = 0;
+ /* enable the doorbell if requested */
+ if (prop->use_doorbell) {
+ tmp = regCP_HQD_PQ_DOORBELL_CONTROL_DEFAULT;
+ tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
+ DOORBELL_OFFSET, prop->doorbell_index);
+
+ tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
+ DOORBELL_EN, 1);
+ tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
+ DOORBELL_SOURCE, 0);
+ tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
+ DOORBELL_HIT, 0);
+ }
+
+ mqd->cp_hqd_pq_doorbell_control = tmp;
+
+ /* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */
+ mqd->cp_hqd_pq_rptr = regCP_HQD_PQ_RPTR_DEFAULT;
+
+ /* set the vmid for the queue */
+ mqd->cp_hqd_vmid = 0;
+
+ tmp = regCP_HQD_PERSISTENT_STATE_DEFAULT;
+ tmp = REG_SET_FIELD(tmp, CP_HQD_PERSISTENT_STATE, PRELOAD_SIZE, 0x55);
+ mqd->cp_hqd_persistent_state = tmp;
+
+ /* set MIN_IB_AVAIL_SIZE */
+ tmp = regCP_HQD_IB_CONTROL_DEFAULT;
+ tmp = REG_SET_FIELD(tmp, CP_HQD_IB_CONTROL, MIN_IB_AVAIL_SIZE, 3);
+ mqd->cp_hqd_ib_control = tmp;
+
+ /* set static priority for a compute queue/ring */
+ mqd->cp_hqd_pipe_priority = prop->hqd_pipe_priority;
+ mqd->cp_hqd_queue_priority = prop->hqd_queue_priority;
+
+ mqd->cp_hqd_active = prop->hqd_active;
+
+ /* set UQ fenceaddress */
+ mqd->fence_address_lo = lower_32_bits(prop->fence_address);
+ mqd->fence_address_hi = upper_32_bits(prop->fence_address);
+
+ return 0;
+}
+
+static int gfx_v12_0_kiq_init_register(struct amdgpu_ring *ring)
+{
+ struct amdgpu_device *adev = ring->adev;
+ struct v12_compute_mqd *mqd = ring->mqd_ptr;
+ int j;
+
+ /* inactivate the queue */
+ if (amdgpu_sriov_vf(adev))
+ WREG32_SOC15(GC, 0, regCP_HQD_ACTIVE, 0);
+
+ /* disable wptr polling */
+ WREG32_FIELD15_PREREG(GC, 0, CP_PQ_WPTR_POLL_CNTL, EN, 0);
+
+ /* write the EOP addr */
+ WREG32_SOC15(GC, 0, regCP_HQD_EOP_BASE_ADDR,
+ mqd->cp_hqd_eop_base_addr_lo);
+ WREG32_SOC15(GC, 0, regCP_HQD_EOP_BASE_ADDR_HI,
+ mqd->cp_hqd_eop_base_addr_hi);
+
+ /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
+ WREG32_SOC15(GC, 0, regCP_HQD_EOP_CONTROL,
+ mqd->cp_hqd_eop_control);
+
+ /* enable doorbell? */
+ WREG32_SOC15(GC, 0, regCP_HQD_PQ_DOORBELL_CONTROL,
+ mqd->cp_hqd_pq_doorbell_control);
+
+ /* disable the queue if it's active */
+ if (RREG32_SOC15(GC, 0, regCP_HQD_ACTIVE) & 1) {
+ WREG32_SOC15(GC, 0, regCP_HQD_DEQUEUE_REQUEST, 1);
+ for (j = 0; j < adev->usec_timeout; j++) {
+ if (!(RREG32_SOC15(GC, 0, regCP_HQD_ACTIVE) & 1))
+ break;
+ udelay(1);
+ }
+ WREG32_SOC15(GC, 0, regCP_HQD_DEQUEUE_REQUEST,
+ mqd->cp_hqd_dequeue_request);
+ WREG32_SOC15(GC, 0, regCP_HQD_PQ_RPTR,
+ mqd->cp_hqd_pq_rptr);
+ WREG32_SOC15(GC, 0, regCP_HQD_PQ_WPTR_LO,
+ mqd->cp_hqd_pq_wptr_lo);
+ WREG32_SOC15(GC, 0, regCP_HQD_PQ_WPTR_HI,
+ mqd->cp_hqd_pq_wptr_hi);
+ }
+
+ /* set the pointer to the MQD */
+ WREG32_SOC15(GC, 0, regCP_MQD_BASE_ADDR,
+ mqd->cp_mqd_base_addr_lo);
+ WREG32_SOC15(GC, 0, regCP_MQD_BASE_ADDR_HI,
+ mqd->cp_mqd_base_addr_hi);
+
+ /* set MQD vmid to 0 */
+ WREG32_SOC15(GC, 0, regCP_MQD_CONTROL,
+ mqd->cp_mqd_control);
+
+ /* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
+ WREG32_SOC15(GC, 0, regCP_HQD_PQ_BASE,
+ mqd->cp_hqd_pq_base_lo);
+ WREG32_SOC15(GC, 0, regCP_HQD_PQ_BASE_HI,
+ mqd->cp_hqd_pq_base_hi);
+
+ /* set up the HQD, this is similar to CP_RB0_CNTL */
+ WREG32_SOC15(GC, 0, regCP_HQD_PQ_CONTROL,
+ mqd->cp_hqd_pq_control);
+
+ /* set the wb address whether it's enabled or not */
+ WREG32_SOC15(GC, 0, regCP_HQD_PQ_RPTR_REPORT_ADDR,
+ mqd->cp_hqd_pq_rptr_report_addr_lo);
+ WREG32_SOC15(GC, 0, regCP_HQD_PQ_RPTR_REPORT_ADDR_HI,
+ mqd->cp_hqd_pq_rptr_report_addr_hi);
+
+ /* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */
+ WREG32_SOC15(GC, 0, regCP_HQD_PQ_WPTR_POLL_ADDR,
+ mqd->cp_hqd_pq_wptr_poll_addr_lo);
+ WREG32_SOC15(GC, 0, regCP_HQD_PQ_WPTR_POLL_ADDR_HI,
+ mqd->cp_hqd_pq_wptr_poll_addr_hi);
+
+ /* enable the doorbell if requested */
+ if (ring->use_doorbell) {
+ WREG32_SOC15(GC, 0, regCP_MEC_DOORBELL_RANGE_LOWER,
+ (adev->doorbell_index.kiq * 2) << 2);
+ WREG32_SOC15(GC, 0, regCP_MEC_DOORBELL_RANGE_UPPER,
+ (adev->doorbell_index.userqueue_end * 2) << 2);
+ }
+
+ WREG32_SOC15(GC, 0, regCP_HQD_PQ_DOORBELL_CONTROL,
+ mqd->cp_hqd_pq_doorbell_control);
+
+ /* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */
+ WREG32_SOC15(GC, 0, regCP_HQD_PQ_WPTR_LO,
+ mqd->cp_hqd_pq_wptr_lo);
+ WREG32_SOC15(GC, 0, regCP_HQD_PQ_WPTR_HI,
+ mqd->cp_hqd_pq_wptr_hi);
+
+ /* set the vmid for the queue */
+ WREG32_SOC15(GC, 0, regCP_HQD_VMID, mqd->cp_hqd_vmid);
+
+ WREG32_SOC15(GC, 0, regCP_HQD_PERSISTENT_STATE,
+ mqd->cp_hqd_persistent_state);
+
+ /* activate the queue */
+ WREG32_SOC15(GC, 0, regCP_HQD_ACTIVE,
+ mqd->cp_hqd_active);
+
+ if (ring->use_doorbell)
+ WREG32_FIELD15_PREREG(GC, 0, CP_PQ_STATUS, DOORBELL_ENABLE, 1);
+
+ return 0;
+}
+
+static int gfx_v12_0_kiq_init_queue(struct amdgpu_ring *ring)
+{
+ struct amdgpu_device *adev = ring->adev;
+ struct v12_compute_mqd *mqd = ring->mqd_ptr;
+ int mqd_idx = AMDGPU_MAX_COMPUTE_RINGS;
+
+ gfx_v12_0_kiq_setting(ring);
+
+ if (amdgpu_in_reset(adev)) { /* for GPU_RESET case */
+ /* reset MQD to a clean status */
+ if (adev->gfx.mec.mqd_backup[mqd_idx])
+ memcpy(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(*mqd));
+
+ /* reset ring buffer */
+ ring->wptr = 0;
+ amdgpu_ring_clear_ring(ring);
+
+ mutex_lock(&adev->srbm_mutex);
+ soc24_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
+ gfx_v12_0_kiq_init_register(ring);
+ soc24_grbm_select(adev, 0, 0, 0, 0);
+ mutex_unlock(&adev->srbm_mutex);
+ } else {
+ memset((void *)mqd, 0, sizeof(*mqd));
+ if (amdgpu_sriov_vf(adev) && adev->in_suspend)
+ amdgpu_ring_clear_ring(ring);
+ mutex_lock(&adev->srbm_mutex);
+ soc24_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
+ amdgpu_ring_init_mqd(ring);
+ gfx_v12_0_kiq_init_register(ring);
+ soc24_grbm_select(adev, 0, 0, 0, 0);
+ mutex_unlock(&adev->srbm_mutex);
+
+ if (adev->gfx.mec.mqd_backup[mqd_idx])
+ memcpy(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(*mqd));
+ }
+
+ return 0;
+}
+
+static int gfx_v12_0_kcq_init_queue(struct amdgpu_ring *ring, bool reset)
+{
+ struct amdgpu_device *adev = ring->adev;
+ struct v12_compute_mqd *mqd = ring->mqd_ptr;
+ int mqd_idx = ring - &adev->gfx.compute_ring[0];
+
+ if (!reset && !amdgpu_in_reset(adev) && !adev->in_suspend) {
+ memset((void *)mqd, 0, sizeof(*mqd));
+ mutex_lock(&adev->srbm_mutex);
+ soc24_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
+ amdgpu_ring_init_mqd(ring);
+ soc24_grbm_select(adev, 0, 0, 0, 0);
+ mutex_unlock(&adev->srbm_mutex);
+
+ if (adev->gfx.mec.mqd_backup[mqd_idx])
+ memcpy_fromio(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(*mqd));
+ } else {
+ /* restore MQD to a clean status */
+ if (adev->gfx.mec.mqd_backup[mqd_idx])
+ memcpy_toio(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(*mqd));
+ /* reset ring buffer */
+ ring->wptr = 0;
+ atomic64_set((atomic64_t *)ring->wptr_cpu_addr, 0);
+ amdgpu_ring_clear_ring(ring);
+ }
+
+ return 0;
+}
+
+static int gfx_v12_0_kiq_resume(struct amdgpu_device *adev)
+{
+ gfx_v12_0_kiq_init_queue(&adev->gfx.kiq[0].ring);
+ adev->gfx.kiq[0].ring.sched.ready = true;
+ return 0;
+}
+
+static int gfx_v12_0_kcq_resume(struct amdgpu_device *adev)
+{
+ int i, r;
+
+ if (!amdgpu_async_gfx_ring)
+ gfx_v12_0_cp_compute_enable(adev, true);
+
+ for (i = 0; i < adev->gfx.num_compute_rings; i++) {
+ r = gfx_v12_0_kcq_init_queue(&adev->gfx.compute_ring[i], false);
+ if (r)
+ return r;
+ }
+
+ return amdgpu_gfx_enable_kcq(adev, 0);
+}
+
+static int gfx_v12_0_cp_resume(struct amdgpu_device *adev)
+{
+ int r, i;
+ struct amdgpu_ring *ring;
+
+ if (!(adev->flags & AMD_IS_APU))
+ gfx_v12_0_enable_gui_idle_interrupt(adev, false);
+
+ if (adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT) {
+ /* legacy firmware loading */
+ r = gfx_v12_0_cp_gfx_load_microcode(adev);
+ if (r)
+ return r;
+
+ r = gfx_v12_0_cp_compute_load_microcode_rs64(adev);
+ if (r)
+ return r;
+ }
+
+ gfx_v12_0_cp_set_doorbell_range(adev);
+
+ if (amdgpu_async_gfx_ring) {
+ gfx_v12_0_cp_compute_enable(adev, true);
+ gfx_v12_0_cp_gfx_enable(adev, true);
+ }
+
+ if (adev->enable_mes_kiq && adev->mes.kiq_hw_init)
+ r = amdgpu_mes_kiq_hw_init(adev);
+ else
+ r = gfx_v12_0_kiq_resume(adev);
+ if (r)
+ return r;
+
+ r = gfx_v12_0_kcq_resume(adev);
+ if (r)
+ return r;
+
+ if (!amdgpu_async_gfx_ring) {
+ r = gfx_v12_0_cp_gfx_resume(adev);
+ if (r)
+ return r;
+ } else {
+ r = gfx_v12_0_cp_async_gfx_ring_resume(adev);
+ if (r)
+ return r;
+ }
+
+ for (i = 0; i < adev->gfx.num_gfx_rings; i++) {
+ ring = &adev->gfx.gfx_ring[i];
+ r = amdgpu_ring_test_helper(ring);
+ if (r)
+ return r;
+ }
+
+ for (i = 0; i < adev->gfx.num_compute_rings; i++) {
+ ring = &adev->gfx.compute_ring[i];
+ r = amdgpu_ring_test_helper(ring);
+ if (r)
+ return r;
+ }
+
+ return 0;
+}
+
+static void gfx_v12_0_cp_enable(struct amdgpu_device *adev, bool enable)
+{
+ gfx_v12_0_cp_gfx_enable(adev, enable);
+ gfx_v12_0_cp_compute_enable(adev, enable);
+}
+
+static int gfx_v12_0_gfxhub_enable(struct amdgpu_device *adev)
+{
+ int r;
+ bool value;
+
+ r = adev->gfxhub.funcs->gart_enable(adev);
+ if (r)
+ return r;
+
+ amdgpu_device_flush_hdp(adev, NULL);
+
+ value = amdgpu_vm_fault_stop != AMDGPU_VM_FAULT_STOP_ALWAYS;
+
+ adev->gfxhub.funcs->set_fault_enable_default(adev, value);
+ /* TODO investigate why this and the hdp flush above is needed,
+ * are we missing a flush somewhere else? */
+ adev->gmc.gmc_funcs->flush_gpu_tlb(adev, 0, AMDGPU_GFXHUB(0), 0);
+
+ return 0;
+}
+
+static int get_gb_addr_config(struct amdgpu_device *adev)
+{
+ u32 gb_addr_config;
+
+ gb_addr_config = RREG32_SOC15(GC, 0, regGB_ADDR_CONFIG);
+ if (gb_addr_config == 0)
+ return -EINVAL;
+
+ adev->gfx.config.gb_addr_config_fields.num_pkrs =
+ 1 << REG_GET_FIELD(gb_addr_config, GB_ADDR_CONFIG, NUM_PKRS);
+
+ adev->gfx.config.gb_addr_config = gb_addr_config;
+
+ adev->gfx.config.gb_addr_config_fields.num_pipes = 1 <<
+ REG_GET_FIELD(adev->gfx.config.gb_addr_config,
+ GB_ADDR_CONFIG, NUM_PIPES);
+
+ adev->gfx.config.max_tile_pipes =
+ adev->gfx.config.gb_addr_config_fields.num_pipes;
+
+ adev->gfx.config.gb_addr_config_fields.max_compress_frags = 1 <<
+ REG_GET_FIELD(adev->gfx.config.gb_addr_config,
+ GB_ADDR_CONFIG, MAX_COMPRESSED_FRAGS);
+ adev->gfx.config.gb_addr_config_fields.num_rb_per_se = 1 <<
+ REG_GET_FIELD(adev->gfx.config.gb_addr_config,
+ GB_ADDR_CONFIG, NUM_RB_PER_SE);
+ adev->gfx.config.gb_addr_config_fields.num_se = 1 <<
+ REG_GET_FIELD(adev->gfx.config.gb_addr_config,
+ GB_ADDR_CONFIG, NUM_SHADER_ENGINES);
+ adev->gfx.config.gb_addr_config_fields.pipe_interleave_size = 1 << (8 +
+ REG_GET_FIELD(adev->gfx.config.gb_addr_config,
+ GB_ADDR_CONFIG, PIPE_INTERLEAVE_SIZE));
+
+ return 0;
+}
+
+static void gfx_v12_0_disable_gpa_mode(struct amdgpu_device *adev)
+{
+ uint32_t data;
+
+ data = RREG32_SOC15(GC, 0, regCPC_PSP_DEBUG);
+ data |= CPC_PSP_DEBUG__GPA_OVERRIDE_MASK;
+ WREG32_SOC15(GC, 0, regCPC_PSP_DEBUG, data);
+
+ data = RREG32_SOC15(GC, 0, regCPG_PSP_DEBUG);
+ data |= CPG_PSP_DEBUG__GPA_OVERRIDE_MASK;
+ WREG32_SOC15(GC, 0, regCPG_PSP_DEBUG, data);
+}
+
+static void gfx_v12_0_init_golden_registers(struct amdgpu_device *adev)
+{
+ if (amdgpu_sriov_vf(adev))
+ return;
+
+ switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
+ case IP_VERSION(12, 0, 0):
+ case IP_VERSION(12, 0, 1):
+ soc15_program_register_sequence(adev,
+ golden_settings_gc_12_0,
+ (const u32)ARRAY_SIZE(golden_settings_gc_12_0));
+
+ if (adev->rev_id == 0)
+ soc15_program_register_sequence(adev,
+ golden_settings_gc_12_0_rev0,
+ (const u32)ARRAY_SIZE(golden_settings_gc_12_0_rev0));
+ break;
+ default:
+ break;
+ }
+}
+
+static int gfx_v12_0_hw_init(struct amdgpu_ip_block *ip_block)
+{
+ int r;
+ struct amdgpu_device *adev = ip_block->adev;
+
+ if (adev->firmware.load_type == AMDGPU_FW_LOAD_RLC_BACKDOOR_AUTO) {
+ if (adev->gfx.imu.funcs && (amdgpu_dpm > 0)) {
+ /* RLC autoload sequence 1: Program rlc ram */
+ if (adev->gfx.imu.funcs->program_rlc_ram)
+ adev->gfx.imu.funcs->program_rlc_ram(adev);
+ }
+ /* rlc autoload firmware */
+ r = gfx_v12_0_rlc_backdoor_autoload_enable(adev);
+ if (r)
+ return r;
+ } else {
+ if (adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT) {
+ if (adev->gfx.imu.funcs && (amdgpu_dpm > 0)) {
+ if (adev->gfx.imu.funcs->load_microcode)
+ adev->gfx.imu.funcs->load_microcode(adev);
+ if (adev->gfx.imu.funcs->setup_imu)
+ adev->gfx.imu.funcs->setup_imu(adev);
+ if (adev->gfx.imu.funcs->start_imu)
+ adev->gfx.imu.funcs->start_imu(adev);
+ }
+
+ /* disable gpa mode in backdoor loading */
+ gfx_v12_0_disable_gpa_mode(adev);
+ }
+ }
+
+ if ((adev->firmware.load_type == AMDGPU_FW_LOAD_RLC_BACKDOOR_AUTO) ||
+ (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP)) {
+ r = gfx_v12_0_wait_for_rlc_autoload_complete(adev);
+ if (r) {
+ dev_err(adev->dev, "(%d) failed to wait rlc autoload complete\n", r);
+ return r;
+ }
+ }
+
+ if (!amdgpu_emu_mode)
+ gfx_v12_0_init_golden_registers(adev);
+
+ adev->gfx.is_poweron = true;
+
+ if (get_gb_addr_config(adev))
+ DRM_WARN("Invalid gb_addr_config !\n");
+
+ if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP)
+ gfx_v12_0_config_gfx_rs64(adev);
+
+ r = gfx_v12_0_gfxhub_enable(adev);
+ if (r)
+ return r;
+
+ if ((adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT ||
+ adev->firmware.load_type == AMDGPU_FW_LOAD_RLC_BACKDOOR_AUTO) &&
+ (amdgpu_dpm == 1)) {
+ /**
+ * For gfx 12, rlc firmware loading relies on smu firmware is
+ * loaded firstly, so in direct type, it has to load smc ucode
+ * here before rlc.
+ */
+ r = amdgpu_pm_load_smu_firmware(adev, NULL);
+ if (r)
+ return r;
+ }
+
+ gfx_v12_0_constants_init(adev);
+
+ if (adev->nbio.funcs->gc_doorbell_init)
+ adev->nbio.funcs->gc_doorbell_init(adev);
+
+ r = gfx_v12_0_rlc_resume(adev);
+ if (r)
+ return r;
+
+ /*
+ * init golden registers and rlc resume may override some registers,
+ * reconfig them here
+ */
+ gfx_v12_0_tcp_harvest(adev);
+
+ r = gfx_v12_0_cp_resume(adev);
+ if (r)
+ return r;
+
+ return r;
+}
+
+static int gfx_v12_0_set_userq_eop_interrupts(struct amdgpu_device *adev,
+ bool enable)
+{
+ unsigned int irq_type;
+ int m, p, r;
+
+ if (adev->userq_funcs[AMDGPU_HW_IP_GFX]) {
+ for (m = 0; m < adev->gfx.me.num_me; m++) {
+ for (p = 0; p < adev->gfx.me.num_pipe_per_me; p++) {
+ irq_type = AMDGPU_CP_IRQ_GFX_ME0_PIPE0_EOP + p;
+ if (enable)
+ r = amdgpu_irq_get(adev, &adev->gfx.eop_irq,
+ irq_type);
+ else
+ r = amdgpu_irq_put(adev, &adev->gfx.eop_irq,
+ irq_type);
+ if (r)
+ return r;
+ }
+ }
+ }
+
+ if (adev->userq_funcs[AMDGPU_HW_IP_COMPUTE]) {
+ for (m = 0; m < adev->gfx.mec.num_mec; ++m) {
+ for (p = 0; p < adev->gfx.mec.num_pipe_per_mec; p++) {
+ irq_type = AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP
+ + (m * adev->gfx.mec.num_pipe_per_mec)
+ + p;
+ if (enable)
+ r = amdgpu_irq_get(adev, &adev->gfx.eop_irq,
+ irq_type);
+ else
+ r = amdgpu_irq_put(adev, &adev->gfx.eop_irq,
+ irq_type);
+ if (r)
+ return r;
+ }
+ }
+ }
+
+ return 0;
+}
+
+static int gfx_v12_0_hw_fini(struct amdgpu_ip_block *ip_block)
+{
+ struct amdgpu_device *adev = ip_block->adev;
+ uint32_t tmp;
+
+ cancel_delayed_work_sync(&adev->gfx.idle_work);
+
+ amdgpu_irq_put(adev, &adev->gfx.priv_reg_irq, 0);
+ amdgpu_irq_put(adev, &adev->gfx.priv_inst_irq, 0);
+ amdgpu_irq_put(adev, &adev->gfx.bad_op_irq, 0);
+ gfx_v12_0_set_userq_eop_interrupts(adev, false);
+
+ if (!adev->no_hw_access) {
+ if (amdgpu_async_gfx_ring) {
+ if (amdgpu_gfx_disable_kgq(adev, 0))
+ DRM_ERROR("KGQ disable failed\n");
+ }
+
+ if (amdgpu_gfx_disable_kcq(adev, 0))
+ DRM_ERROR("KCQ disable failed\n");
+
+ amdgpu_mes_kiq_hw_fini(adev);
+ }
+
+ if (amdgpu_sriov_vf(adev)) {
+ gfx_v12_0_cp_gfx_enable(adev, false);
+ /* Program KIQ position of RLC_CP_SCHEDULERS during destroy */
+ tmp = RREG32_SOC15(GC, 0, regRLC_CP_SCHEDULERS);
+ tmp &= 0xffffff00;
+ WREG32_SOC15(GC, 0, regRLC_CP_SCHEDULERS, tmp);
+
+ return 0;
+ }
+ gfx_v12_0_cp_enable(adev, false);
+ gfx_v12_0_enable_gui_idle_interrupt(adev, false);
+
+ adev->gfxhub.funcs->gart_disable(adev);
+
+ adev->gfx.is_poweron = false;
+
+ return 0;
+}
+
+static int gfx_v12_0_suspend(struct amdgpu_ip_block *ip_block)
+{
+ return gfx_v12_0_hw_fini(ip_block);
+}
+
+static int gfx_v12_0_resume(struct amdgpu_ip_block *ip_block)
+{
+ return gfx_v12_0_hw_init(ip_block);
+}
+
+static bool gfx_v12_0_is_idle(struct amdgpu_ip_block *ip_block)
+{
+ struct amdgpu_device *adev = ip_block->adev;
+
+ if (REG_GET_FIELD(RREG32_SOC15(GC, 0, regGRBM_STATUS),
+ GRBM_STATUS, GUI_ACTIVE))
+ return false;
+ else
+ return true;
+}
+
+static int gfx_v12_0_wait_for_idle(struct amdgpu_ip_block *ip_block)
+{
+ unsigned i;
+ u32 tmp;
+ struct amdgpu_device *adev = ip_block->adev;
+
+ for (i = 0; i < adev->usec_timeout; i++) {
+ /* read MC_STATUS */
+ tmp = RREG32_SOC15(GC, 0, regGRBM_STATUS) &
+ GRBM_STATUS__GUI_ACTIVE_MASK;
+
+ if (!REG_GET_FIELD(tmp, GRBM_STATUS, GUI_ACTIVE))
+ return 0;
+ udelay(1);
+ }
+ return -ETIMEDOUT;
+}
+
+static uint64_t gfx_v12_0_get_gpu_clock_counter(struct amdgpu_device *adev)
+{
+ uint64_t clock = 0;
+
+ if (adev->smuio.funcs &&
+ adev->smuio.funcs->get_gpu_clock_counter)
+ clock = adev->smuio.funcs->get_gpu_clock_counter(adev);
+ else
+ dev_warn(adev->dev, "query gpu clock counter is not supported\n");
+
+ return clock;
+}
+
+static int gfx_v12_0_early_init(struct amdgpu_ip_block *ip_block)
+{
+ struct amdgpu_device *adev = ip_block->adev;
+
+ switch (amdgpu_user_queue) {
+ case -1:
+ case 0:
+ default:
+ adev->gfx.disable_kq = false;
+ adev->gfx.disable_uq = true;
+ break;
+ case 1:
+ adev->gfx.disable_kq = false;
+ adev->gfx.disable_uq = false;
+ break;
+ case 2:
+ adev->gfx.disable_kq = true;
+ adev->gfx.disable_uq = false;
+ break;
+ }
+
+ adev->gfx.funcs = &gfx_v12_0_gfx_funcs;
+
+ if (adev->gfx.disable_kq) {
+ adev->gfx.num_gfx_rings = 0;
+ adev->gfx.num_compute_rings = 0;
+ } else {
+ adev->gfx.num_gfx_rings = GFX12_NUM_GFX_RINGS;
+ adev->gfx.num_compute_rings = min(amdgpu_gfx_get_num_kcq(adev),
+ AMDGPU_MAX_COMPUTE_RINGS);
+ }
+
+ gfx_v12_0_set_kiq_pm4_funcs(adev);
+ gfx_v12_0_set_ring_funcs(adev);
+ gfx_v12_0_set_irq_funcs(adev);
+ gfx_v12_0_set_rlc_funcs(adev);
+ gfx_v12_0_set_mqd_funcs(adev);
+ gfx_v12_0_set_imu_funcs(adev);
+
+ gfx_v12_0_init_rlcg_reg_access_ctrl(adev);
+
+ return gfx_v12_0_init_microcode(adev);
+}
+
+static int gfx_v12_0_late_init(struct amdgpu_ip_block *ip_block)
+{
+ struct amdgpu_device *adev = ip_block->adev;
+ int r;
+
+ r = amdgpu_irq_get(adev, &adev->gfx.priv_reg_irq, 0);
+ if (r)
+ return r;
+
+ r = amdgpu_irq_get(adev, &adev->gfx.priv_inst_irq, 0);
+ if (r)
+ return r;
+
+ r = amdgpu_irq_get(adev, &adev->gfx.bad_op_irq, 0);
+ if (r)
+ return r;
+
+ r = gfx_v12_0_set_userq_eop_interrupts(adev, true);
+ if (r)
+ return r;
+
+ return 0;
+}
+
+static bool gfx_v12_0_is_rlc_enabled(struct amdgpu_device *adev)
+{
+ uint32_t rlc_cntl;
+
+ /* if RLC is not enabled, do nothing */
+ rlc_cntl = RREG32_SOC15(GC, 0, regRLC_CNTL);
+ return (REG_GET_FIELD(rlc_cntl, RLC_CNTL, RLC_ENABLE_F32)) ? true : false;
+}
+
+static void gfx_v12_0_set_safe_mode(struct amdgpu_device *adev,
+ int xcc_id)
+{
+ uint32_t data;
+ unsigned i;
+
+ data = RLC_SAFE_MODE__CMD_MASK;
+ data |= (1 << RLC_SAFE_MODE__MESSAGE__SHIFT);
+
+ WREG32_SOC15(GC, 0, regRLC_SAFE_MODE, data);
+
+ /* wait for RLC_SAFE_MODE */
+ for (i = 0; i < adev->usec_timeout; i++) {
+ if (!REG_GET_FIELD(RREG32_SOC15(GC, 0, regRLC_SAFE_MODE),
+ RLC_SAFE_MODE, CMD))
+ break;
+ udelay(1);
+ }
+}
+
+static void gfx_v12_0_unset_safe_mode(struct amdgpu_device *adev,
+ int xcc_id)
+{
+ WREG32_SOC15(GC, 0, regRLC_SAFE_MODE, RLC_SAFE_MODE__CMD_MASK);
+}
+
+static void gfx_v12_0_update_perf_clk(struct amdgpu_device *adev,
+ bool enable)
+{
+ uint32_t def, data;
+
+ if (!(adev->cg_flags & AMD_CG_SUPPORT_GFX_PERF_CLK))
+ return;
+
+ def = data = RREG32_SOC15(GC, 0, regRLC_CGTT_MGCG_OVERRIDE);
+
+ if (enable)
+ data &= ~RLC_CGTT_MGCG_OVERRIDE__PERFMON_CLOCK_STATE_MASK;
+ else
+ data |= RLC_CGTT_MGCG_OVERRIDE__PERFMON_CLOCK_STATE_MASK;
+
+ if (def != data)
+ WREG32_SOC15(GC, 0, regRLC_CGTT_MGCG_OVERRIDE, data);
+}
+
+static void gfx_v12_0_update_spm_vmid(struct amdgpu_device *adev,
+ struct amdgpu_ring *ring,
+ unsigned vmid)
+{
+ u32 reg, data;
+
+ reg = SOC15_REG_OFFSET(GC, 0, regRLC_SPM_MC_CNTL);
+ if (amdgpu_sriov_is_pp_one_vf(adev))
+ data = RREG32_NO_KIQ(reg);
+ else
+ data = RREG32(reg);
+
+ data &= ~RLC_SPM_MC_CNTL__RLC_SPM_VMID_MASK;
+ data |= (vmid & RLC_SPM_MC_CNTL__RLC_SPM_VMID_MASK) << RLC_SPM_MC_CNTL__RLC_SPM_VMID__SHIFT;
+
+ if (amdgpu_sriov_is_pp_one_vf(adev))
+ WREG32_SOC15_NO_KIQ(GC, 0, regRLC_SPM_MC_CNTL, data);
+ else
+ WREG32_SOC15(GC, 0, regRLC_SPM_MC_CNTL, data);
+
+ if (ring
+ && amdgpu_sriov_is_pp_one_vf(adev)
+ && ((ring->funcs->type == AMDGPU_RING_TYPE_GFX)
+ || (ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE))) {
+ uint32_t reg = SOC15_REG_OFFSET(GC, 0, regRLC_SPM_MC_CNTL);
+ amdgpu_ring_emit_wreg(ring, reg, data);
+ }
+}
+
+static const struct amdgpu_rlc_funcs gfx_v12_0_rlc_funcs = {
+ .is_rlc_enabled = gfx_v12_0_is_rlc_enabled,
+ .set_safe_mode = gfx_v12_0_set_safe_mode,
+ .unset_safe_mode = gfx_v12_0_unset_safe_mode,
+ .init = gfx_v12_0_rlc_init,
+ .get_csb_size = gfx_v12_0_get_csb_size,
+ .get_csb_buffer = gfx_v12_0_get_csb_buffer,
+ .resume = gfx_v12_0_rlc_resume,
+ .stop = gfx_v12_0_rlc_stop,
+ .reset = gfx_v12_0_rlc_reset,
+ .start = gfx_v12_0_rlc_start,
+ .update_spm_vmid = gfx_v12_0_update_spm_vmid,
+};
+
+#if 0
+static void gfx_v12_cntl_power_gating(struct amdgpu_device *adev, bool enable)
+{
+ /* TODO */
+}
+
+static void gfx_v12_cntl_pg(struct amdgpu_device *adev, bool enable)
+{
+ /* TODO */
+}
+#endif
+
+static int gfx_v12_0_set_powergating_state(struct amdgpu_ip_block *ip_block,
+ enum amd_powergating_state state)
+{
+ struct amdgpu_device *adev = ip_block->adev;
+ bool enable = (state == AMD_PG_STATE_GATE);
+
+ if (amdgpu_sriov_vf(adev))
+ return 0;
+
+ switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
+ case IP_VERSION(12, 0, 0):
+ case IP_VERSION(12, 0, 1):
+ amdgpu_gfx_off_ctrl(adev, enable);
+ break;
+ default:
+ break;
+ }
+
+ return 0;
+}
+
+static void gfx_v12_0_update_coarse_grain_clock_gating(struct amdgpu_device *adev,
+ bool enable)
+{
+ uint32_t def, data;
+
+ if (!(adev->cg_flags &
+ (AMD_CG_SUPPORT_GFX_CGCG |
+ AMD_CG_SUPPORT_GFX_CGLS |
+ AMD_CG_SUPPORT_GFX_3D_CGCG |
+ AMD_CG_SUPPORT_GFX_3D_CGLS)))
+ return;
+
+ if (enable) {
+ def = data = RREG32_SOC15(GC, 0, regRLC_CGTT_MGCG_OVERRIDE);
+
+ /* unset CGCG override */
+ if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG)
+ data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_CGCG_OVERRIDE_MASK;
+ if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS)
+ data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_CGLS_OVERRIDE_MASK;
+ if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGCG ||
+ adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGLS)
+ data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_GFX3D_CG_OVERRIDE_MASK;
+
+ /* update CGCG override bits */
+ if (def != data)
+ WREG32_SOC15(GC, 0, regRLC_CGTT_MGCG_OVERRIDE, data);
+
+ /* enable cgcg FSM(0x0000363F) */
+ def = data = RREG32_SOC15(GC, 0, regRLC_CGCG_CGLS_CTRL);
+
+ if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG) {
+ data &= ~RLC_CGCG_CGLS_CTRL__CGCG_GFX_IDLE_THRESHOLD_MASK;
+ data |= (0x36 << RLC_CGCG_CGLS_CTRL__CGCG_GFX_IDLE_THRESHOLD__SHIFT) |
+ RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK;
+ }
+
+ if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS) {
+ data &= ~RLC_CGCG_CGLS_CTRL__CGLS_REP_COMPANSAT_DELAY_MASK;
+ data |= (0x000F << RLC_CGCG_CGLS_CTRL__CGLS_REP_COMPANSAT_DELAY__SHIFT) |
+ RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK;
+ }
+
+ if (def != data)
+ WREG32_SOC15(GC, 0, regRLC_CGCG_CGLS_CTRL, data);
+
+ /* Program RLC_CGCG_CGLS_CTRL_3D */
+ def = data = RREG32_SOC15(GC, 0, regRLC_CGCG_CGLS_CTRL_3D);
+
+ if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGCG) {
+ data &= ~RLC_CGCG_CGLS_CTRL_3D__CGCG_GFX_IDLE_THRESHOLD_MASK;
+ data |= (0x36 << RLC_CGCG_CGLS_CTRL_3D__CGCG_GFX_IDLE_THRESHOLD__SHIFT) |
+ RLC_CGCG_CGLS_CTRL_3D__CGCG_EN_MASK;
+ }
+
+ if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGLS) {
+ data &= ~RLC_CGCG_CGLS_CTRL_3D__CGLS_REP_COMPANSAT_DELAY_MASK;
+ data |= (0xf << RLC_CGCG_CGLS_CTRL_3D__CGLS_REP_COMPANSAT_DELAY__SHIFT) |
+ RLC_CGCG_CGLS_CTRL_3D__CGLS_EN_MASK;
+ }
+
+ if (def != data)
+ WREG32_SOC15(GC, 0, regRLC_CGCG_CGLS_CTRL_3D, data);
+
+ /* set IDLE_POLL_COUNT(0x00900100) */
+ def = data = RREG32_SOC15(GC, 0, regCP_RB_WPTR_POLL_CNTL);
+
+ data &= ~(CP_RB_WPTR_POLL_CNTL__POLL_FREQUENCY_MASK | CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT_MASK);
+ data |= (0x0100 << CP_RB_WPTR_POLL_CNTL__POLL_FREQUENCY__SHIFT) |
+ (0x0090 << CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT__SHIFT);
+
+ if (def != data)
+ WREG32_SOC15(GC, 0, regCP_RB_WPTR_POLL_CNTL, data);
+
+ data = RREG32_SOC15(GC, 0, regCP_INT_CNTL);
+ data = REG_SET_FIELD(data, CP_INT_CNTL, CNTX_BUSY_INT_ENABLE, 1);
+ data = REG_SET_FIELD(data, CP_INT_CNTL, CNTX_EMPTY_INT_ENABLE, 1);
+ data = REG_SET_FIELD(data, CP_INT_CNTL, CMP_BUSY_INT_ENABLE, 1);
+ data = REG_SET_FIELD(data, CP_INT_CNTL, GFX_IDLE_INT_ENABLE, 1);
+ WREG32_SOC15(GC, 0, regCP_INT_CNTL, data);
+
+ data = RREG32_SOC15(GC, 0, regSDMA0_RLC_CGCG_CTRL);
+ data = REG_SET_FIELD(data, SDMA0_RLC_CGCG_CTRL, CGCG_INT_ENABLE, 1);
+ WREG32_SOC15(GC, 0, regSDMA0_RLC_CGCG_CTRL, data);
+
+ /* Some ASICs only have one SDMA instance, not need to configure SDMA1 */
+ if (adev->sdma.num_instances > 1) {
+ data = RREG32_SOC15(GC, 0, regSDMA1_RLC_CGCG_CTRL);
+ data = REG_SET_FIELD(data, SDMA1_RLC_CGCG_CTRL, CGCG_INT_ENABLE, 1);
+ WREG32_SOC15(GC, 0, regSDMA1_RLC_CGCG_CTRL, data);
+ }
+ } else {
+ /* Program RLC_CGCG_CGLS_CTRL */
+ def = data = RREG32_SOC15(GC, 0, regRLC_CGCG_CGLS_CTRL);
+
+ if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG)
+ data &= ~RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK;
+
+ if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS)
+ data &= ~RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK;
+
+ if (def != data)
+ WREG32_SOC15(GC, 0, regRLC_CGCG_CGLS_CTRL, data);
+
+ /* Program RLC_CGCG_CGLS_CTRL_3D */
+ def = data = RREG32_SOC15(GC, 0, regRLC_CGCG_CGLS_CTRL_3D);
+
+ if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGCG)
+ data &= ~RLC_CGCG_CGLS_CTRL_3D__CGCG_EN_MASK;
+ if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGLS)
+ data &= ~RLC_CGCG_CGLS_CTRL_3D__CGLS_EN_MASK;
+
+ if (def != data)
+ WREG32_SOC15(GC, 0, regRLC_CGCG_CGLS_CTRL_3D, data);
+ }
+}
+
+static void gfx_v12_0_update_medium_grain_clock_gating(struct amdgpu_device *adev,
+ bool enable)
+{
+ uint32_t data, def;
+ if (!(adev->cg_flags & (AMD_CG_SUPPORT_GFX_MGCG | AMD_CG_SUPPORT_GFX_MGLS)))
+ return;
+
+ /* It is disabled by HW by default */
+ if (enable) {
+ if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG) {
+ /* 1 - RLC_CGTT_MGCG_OVERRIDE */
+ def = data = RREG32_SOC15(GC, 0, regRLC_CGTT_MGCG_OVERRIDE);
+
+ data &= ~(RLC_CGTT_MGCG_OVERRIDE__GRBM_CGTT_SCLK_OVERRIDE_MASK |
+ RLC_CGTT_MGCG_OVERRIDE__RLC_CGTT_SCLK_OVERRIDE_MASK |
+ RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGCG_OVERRIDE_MASK);
+
+ if (def != data)
+ WREG32_SOC15(GC, 0, regRLC_CGTT_MGCG_OVERRIDE, data);
+ }
+ } else {
+ if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG) {
+ def = data = RREG32_SOC15(GC, 0, regRLC_CGTT_MGCG_OVERRIDE);
+
+ data |= (RLC_CGTT_MGCG_OVERRIDE__RLC_CGTT_SCLK_OVERRIDE_MASK |
+ RLC_CGTT_MGCG_OVERRIDE__GRBM_CGTT_SCLK_OVERRIDE_MASK |
+ RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGCG_OVERRIDE_MASK);
+
+ if (def != data)
+ WREG32_SOC15(GC, 0, regRLC_CGTT_MGCG_OVERRIDE, data);
+ }
+ }
+}
+
+static void gfx_v12_0_update_repeater_fgcg(struct amdgpu_device *adev,
+ bool enable)
+{
+ uint32_t def, data;
+
+ if (!(adev->cg_flags & AMD_CG_SUPPORT_REPEATER_FGCG))
+ return;
+
+ def = data = RREG32_SOC15(GC, 0, regRLC_CGTT_MGCG_OVERRIDE);
+
+ if (enable)
+ data &= ~(RLC_CGTT_MGCG_OVERRIDE__GFXIP_REPEATER_FGCG_OVERRIDE_MASK |
+ RLC_CGTT_MGCG_OVERRIDE__RLC_REPEATER_FGCG_OVERRIDE_MASK);
+ else
+ data |= RLC_CGTT_MGCG_OVERRIDE__GFXIP_REPEATER_FGCG_OVERRIDE_MASK |
+ RLC_CGTT_MGCG_OVERRIDE__RLC_REPEATER_FGCG_OVERRIDE_MASK;
+
+ if (def != data)
+ WREG32_SOC15(GC, 0, regRLC_CGTT_MGCG_OVERRIDE, data);
+}
+
+static void gfx_v12_0_update_sram_fgcg(struct amdgpu_device *adev,
+ bool enable)
+{
+ uint32_t def, data;
+
+ if (!(adev->cg_flags & AMD_CG_SUPPORT_GFX_FGCG))
+ return;
+
+ def = data = RREG32_SOC15(GC, 0, regRLC_CGTT_MGCG_OVERRIDE);
+
+ if (enable)
+ data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_FGCG_OVERRIDE_MASK;
+ else
+ data |= RLC_CGTT_MGCG_OVERRIDE__GFXIP_FGCG_OVERRIDE_MASK;
+
+ if (def != data)
+ WREG32_SOC15(GC, 0, regRLC_CGTT_MGCG_OVERRIDE, data);
+}
+
+static int gfx_v12_0_update_gfx_clock_gating(struct amdgpu_device *adev,
+ bool enable)
+{
+ amdgpu_gfx_rlc_enter_safe_mode(adev, 0);
+
+ gfx_v12_0_update_coarse_grain_clock_gating(adev, enable);
+
+ gfx_v12_0_update_medium_grain_clock_gating(adev, enable);
+
+ gfx_v12_0_update_repeater_fgcg(adev, enable);
+
+ gfx_v12_0_update_sram_fgcg(adev, enable);
+
+ gfx_v12_0_update_perf_clk(adev, enable);
+
+ if (adev->cg_flags &
+ (AMD_CG_SUPPORT_GFX_MGCG |
+ AMD_CG_SUPPORT_GFX_CGLS |
+ AMD_CG_SUPPORT_GFX_CGCG |
+ AMD_CG_SUPPORT_GFX_3D_CGCG |
+ AMD_CG_SUPPORT_GFX_3D_CGLS))
+ gfx_v12_0_enable_gui_idle_interrupt(adev, enable);
+
+ amdgpu_gfx_rlc_exit_safe_mode(adev, 0);
+
+ return 0;
+}
+
+static int gfx_v12_0_set_clockgating_state(struct amdgpu_ip_block *ip_block,
+ enum amd_clockgating_state state)
+{
+ struct amdgpu_device *adev = ip_block->adev;
+
+ if (amdgpu_sriov_vf(adev))
+ return 0;
+
+ switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
+ case IP_VERSION(12, 0, 0):
+ case IP_VERSION(12, 0, 1):
+ gfx_v12_0_update_gfx_clock_gating(adev,
+ state == AMD_CG_STATE_GATE);
+ break;
+ default:
+ break;
+ }
+
+ return 0;
+}
+
+static void gfx_v12_0_get_clockgating_state(struct amdgpu_ip_block *ip_block, u64 *flags)
+{
+ struct amdgpu_device *adev = ip_block->adev;
+ int data;
+
+ /* AMD_CG_SUPPORT_GFX_MGCG */
+ data = RREG32_SOC15(GC, 0, regRLC_CGTT_MGCG_OVERRIDE);
+ if (!(data & RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGCG_OVERRIDE_MASK))
+ *flags |= AMD_CG_SUPPORT_GFX_MGCG;
+
+ /* AMD_CG_SUPPORT_REPEATER_FGCG */
+ if (!(data & RLC_CGTT_MGCG_OVERRIDE__GFXIP_REPEATER_FGCG_OVERRIDE_MASK))
+ *flags |= AMD_CG_SUPPORT_REPEATER_FGCG;
+
+ /* AMD_CG_SUPPORT_GFX_FGCG */
+ if (!(data & RLC_CGTT_MGCG_OVERRIDE__GFXIP_FGCG_OVERRIDE_MASK))
+ *flags |= AMD_CG_SUPPORT_GFX_FGCG;
+
+ /* AMD_CG_SUPPORT_GFX_PERF_CLK */
+ if (!(data & RLC_CGTT_MGCG_OVERRIDE__PERFMON_CLOCK_STATE_MASK))
+ *flags |= AMD_CG_SUPPORT_GFX_PERF_CLK;
+
+ /* AMD_CG_SUPPORT_GFX_CGCG */
+ data = RREG32_SOC15(GC, 0, regRLC_CGCG_CGLS_CTRL);
+ if (data & RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK)
+ *flags |= AMD_CG_SUPPORT_GFX_CGCG;
+
+ /* AMD_CG_SUPPORT_GFX_CGLS */
+ if (data & RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK)
+ *flags |= AMD_CG_SUPPORT_GFX_CGLS;
+
+ /* AMD_CG_SUPPORT_GFX_3D_CGCG */
+ data = RREG32_SOC15(GC, 0, regRLC_CGCG_CGLS_CTRL_3D);
+ if (data & RLC_CGCG_CGLS_CTRL_3D__CGCG_EN_MASK)
+ *flags |= AMD_CG_SUPPORT_GFX_3D_CGCG;
+
+ /* AMD_CG_SUPPORT_GFX_3D_CGLS */
+ if (data & RLC_CGCG_CGLS_CTRL_3D__CGLS_EN_MASK)
+ *flags |= AMD_CG_SUPPORT_GFX_3D_CGLS;
+}
+
+static u64 gfx_v12_0_ring_get_rptr_gfx(struct amdgpu_ring *ring)
+{
+ /* gfx12 is 32bit rptr*/
+ return *(uint32_t *)ring->rptr_cpu_addr;
+}
+
+static u64 gfx_v12_0_ring_get_wptr_gfx(struct amdgpu_ring *ring)
+{
+ struct amdgpu_device *adev = ring->adev;
+ u64 wptr;
+
+ /* XXX check if swapping is necessary on BE */
+ if (ring->use_doorbell) {
+ wptr = atomic64_read((atomic64_t *)ring->wptr_cpu_addr);
+ } else {
+ wptr = RREG32_SOC15(GC, 0, regCP_RB0_WPTR);
+ wptr += (u64)RREG32_SOC15(GC, 0, regCP_RB0_WPTR_HI) << 32;
+ }
+
+ return wptr;
+}
+
+static void gfx_v12_0_ring_set_wptr_gfx(struct amdgpu_ring *ring)
+{
+ struct amdgpu_device *adev = ring->adev;
+
+ if (ring->use_doorbell) {
+ /* XXX check if swapping is necessary on BE */
+ atomic64_set((atomic64_t *)ring->wptr_cpu_addr,
+ ring->wptr);
+ WDOORBELL64(ring->doorbell_index, ring->wptr);
+ } else {
+ WREG32_SOC15(GC, 0, regCP_RB0_WPTR,
+ lower_32_bits(ring->wptr));
+ WREG32_SOC15(GC, 0, regCP_RB0_WPTR_HI,
+ upper_32_bits(ring->wptr));
+ }
+}
+
+static u64 gfx_v12_0_ring_get_rptr_compute(struct amdgpu_ring *ring)
+{
+ /* gfx12 hardware is 32bit rptr */
+ return *(uint32_t *)ring->rptr_cpu_addr;
+}
+
+static u64 gfx_v12_0_ring_get_wptr_compute(struct amdgpu_ring *ring)
+{
+ u64 wptr;
+
+ /* XXX check if swapping is necessary on BE */
+ if (ring->use_doorbell)
+ wptr = atomic64_read((atomic64_t *)ring->wptr_cpu_addr);
+ else
+ BUG();
+ return wptr;
+}
+
+static void gfx_v12_0_ring_set_wptr_compute(struct amdgpu_ring *ring)
+{
+ struct amdgpu_device *adev = ring->adev;
+
+ /* XXX check if swapping is necessary on BE */
+ if (ring->use_doorbell) {
+ atomic64_set((atomic64_t *)ring->wptr_cpu_addr,
+ ring->wptr);
+ WDOORBELL64(ring->doorbell_index, ring->wptr);
+ } else {
+ BUG(); /* only DOORBELL method supported on gfx12 now */
+ }
+}
+
+static void gfx_v12_0_ring_emit_hdp_flush(struct amdgpu_ring *ring)
+{
+ struct amdgpu_device *adev = ring->adev;
+ u32 ref_and_mask, reg_mem_engine;
+ const struct nbio_hdp_flush_reg *nbio_hf_reg = adev->nbio.hdp_flush_reg;
+
+ if (ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE) {
+ switch (ring->me) {
+ case 1:
+ ref_and_mask = nbio_hf_reg->ref_and_mask_cp2 << ring->pipe;
+ break;
+ case 2:
+ ref_and_mask = nbio_hf_reg->ref_and_mask_cp6 << ring->pipe;
+ break;
+ default:
+ return;
+ }
+ reg_mem_engine = 0;
+ } else {
+ ref_and_mask = nbio_hf_reg->ref_and_mask_cp0;
+ reg_mem_engine = 1; /* pfp */
+ }
+
+ gfx_v12_0_wait_reg_mem(ring, reg_mem_engine, 0, 1,
+ adev->nbio.funcs->get_hdp_flush_req_offset(adev),
+ adev->nbio.funcs->get_hdp_flush_done_offset(adev),
+ ref_and_mask, ref_and_mask, 0x20);
+}
+
+static void gfx_v12_0_ring_emit_ib_gfx(struct amdgpu_ring *ring,
+ struct amdgpu_job *job,
+ struct amdgpu_ib *ib,
+ uint32_t flags)
+{
+ unsigned vmid = AMDGPU_JOB_GET_VMID(job);
+ u32 header, control = 0;
+
+ header = PACKET3(PACKET3_INDIRECT_BUFFER, 2);
+
+ control |= ib->length_dw | (vmid << 24);
+
+ amdgpu_ring_write(ring, header);
+ BUG_ON(ib->gpu_addr & 0x3); /* Dword align */
+ amdgpu_ring_write(ring,
+#ifdef __BIG_ENDIAN
+ (2 << 0) |
+#endif
+ lower_32_bits(ib->gpu_addr));
+ amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr));
+ amdgpu_ring_write(ring, control);
+}
+
+static void gfx_v12_0_ring_emit_ib_compute(struct amdgpu_ring *ring,
+ struct amdgpu_job *job,
+ struct amdgpu_ib *ib,
+ uint32_t flags)
+{
+ unsigned vmid = AMDGPU_JOB_GET_VMID(job);
+ u32 control = INDIRECT_BUFFER_VALID | ib->length_dw | (vmid << 24);
+
+ amdgpu_ring_write(ring, PACKET3(PACKET3_INDIRECT_BUFFER, 2));
+ BUG_ON(ib->gpu_addr & 0x3); /* Dword align */
+ amdgpu_ring_write(ring,
+#ifdef __BIG_ENDIAN
+ (2 << 0) |
+#endif
+ lower_32_bits(ib->gpu_addr));
+ amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr));
+ amdgpu_ring_write(ring, control);
+}
+
+static void gfx_v12_0_ring_emit_fence(struct amdgpu_ring *ring, u64 addr,
+ u64 seq, unsigned flags)
+{
+ bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT;
+ bool int_sel = flags & AMDGPU_FENCE_FLAG_INT;
+
+ /* RELEASE_MEM - flush caches, send int */
+ amdgpu_ring_write(ring, PACKET3(PACKET3_RELEASE_MEM, 6));
+ amdgpu_ring_write(ring, (PACKET3_RELEASE_MEM_GCR_SEQ |
+ PACKET3_RELEASE_MEM_GCR_GL2_WB |
+ PACKET3_RELEASE_MEM_CACHE_POLICY(3) |
+ PACKET3_RELEASE_MEM_EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
+ PACKET3_RELEASE_MEM_EVENT_INDEX(5)));
+ amdgpu_ring_write(ring, (PACKET3_RELEASE_MEM_DATA_SEL(write64bit ? 2 : 1) |
+ PACKET3_RELEASE_MEM_INT_SEL(int_sel ? 2 : 0)));
+
+ /*
+ * the address should be Qword aligned if 64bit write, Dword
+ * aligned if only send 32bit data low (discard data high)
+ */
+ if (write64bit)
+ BUG_ON(addr & 0x7);
+ else
+ BUG_ON(addr & 0x3);
+ amdgpu_ring_write(ring, lower_32_bits(addr));
+ amdgpu_ring_write(ring, upper_32_bits(addr));
+ amdgpu_ring_write(ring, lower_32_bits(seq));
+ amdgpu_ring_write(ring, upper_32_bits(seq));
+ amdgpu_ring_write(ring, 0);
+}
+
+static void gfx_v12_0_ring_emit_pipeline_sync(struct amdgpu_ring *ring)
+{
+ int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX);
+ uint32_t seq = ring->fence_drv.sync_seq;
+ uint64_t addr = ring->fence_drv.gpu_addr;
+
+ gfx_v12_0_wait_reg_mem(ring, usepfp, 1, 0, lower_32_bits(addr),
+ upper_32_bits(addr), seq, 0xffffffff, 4);
+}
+
+static void gfx_v12_0_ring_invalidate_tlbs(struct amdgpu_ring *ring,
+ uint16_t pasid, uint32_t flush_type,
+ bool all_hub, uint8_t dst_sel)
+{
+ amdgpu_ring_write(ring, PACKET3(PACKET3_INVALIDATE_TLBS, 0));
+ amdgpu_ring_write(ring,
+ PACKET3_INVALIDATE_TLBS_DST_SEL(dst_sel) |
+ PACKET3_INVALIDATE_TLBS_ALL_HUB(all_hub) |
+ PACKET3_INVALIDATE_TLBS_PASID(pasid) |
+ PACKET3_INVALIDATE_TLBS_FLUSH_TYPE(flush_type));
+}
+
+static void gfx_v12_0_ring_emit_vm_flush(struct amdgpu_ring *ring,
+ unsigned vmid, uint64_t pd_addr)
+{
+ amdgpu_gmc_emit_flush_gpu_tlb(ring, vmid, pd_addr);
+
+ /* compute doesn't have PFP */
+ if (ring->funcs->type == AMDGPU_RING_TYPE_GFX) {
+ /* sync PFP to ME, otherwise we might get invalid PFP reads */
+ amdgpu_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
+ amdgpu_ring_write(ring, 0x0);
+ }
+}
+
+static void gfx_v12_0_ring_emit_fence_kiq(struct amdgpu_ring *ring, u64 addr,
+ u64 seq, unsigned int flags)
+{
+ struct amdgpu_device *adev = ring->adev;
+
+ /* we only allocate 32bit for each seq wb address */
+ BUG_ON(flags & AMDGPU_FENCE_FLAG_64BIT);
+
+ /* write fence seq to the "addr" */
+ amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
+ amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
+ WRITE_DATA_DST_SEL(5) | WR_CONFIRM));
+ amdgpu_ring_write(ring, lower_32_bits(addr));
+ amdgpu_ring_write(ring, upper_32_bits(addr));
+ amdgpu_ring_write(ring, lower_32_bits(seq));
+
+ if (flags & AMDGPU_FENCE_FLAG_INT) {
+ /* set register to trigger INT */
+ amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
+ amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
+ WRITE_DATA_DST_SEL(0) | WR_CONFIRM));
+ amdgpu_ring_write(ring, SOC15_REG_OFFSET(GC, 0, regCPC_INT_STATUS));
+ amdgpu_ring_write(ring, 0);
+ amdgpu_ring_write(ring, 0x20000000); /* src_id is 178 */
+ }
+}
+
+static void gfx_v12_0_ring_emit_cntxcntl(struct amdgpu_ring *ring,
+ uint32_t flags)
+{
+ uint32_t dw2 = 0;
+
+ dw2 |= 0x80000000; /* set load_enable otherwise this package is just NOPs */
+ if (flags & AMDGPU_HAVE_CTX_SWITCH) {
+ /* set load_global_config & load_global_uconfig */
+ dw2 |= 0x8001;
+ /* set load_cs_sh_regs */
+ dw2 |= 0x01000000;
+ /* set load_per_context_state & load_gfx_sh_regs for GFX */
+ dw2 |= 0x10002;
+ }
+
+ amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
+ amdgpu_ring_write(ring, dw2);
+ amdgpu_ring_write(ring, 0);
+}
+
+static unsigned gfx_v12_0_ring_emit_init_cond_exec(struct amdgpu_ring *ring,
+ uint64_t addr)
+{
+ unsigned ret;
+
+ amdgpu_ring_write(ring, PACKET3(PACKET3_COND_EXEC, 3));
+ amdgpu_ring_write(ring, lower_32_bits(addr));
+ amdgpu_ring_write(ring, upper_32_bits(addr));
+ /* discard following DWs if *cond_exec_gpu_addr==0 */
+ amdgpu_ring_write(ring, 0);
+ ret = ring->wptr & ring->buf_mask;
+ /* patch dummy value later */
+ amdgpu_ring_write(ring, 0);
+
+ return ret;
+}
+
+static int gfx_v12_0_ring_preempt_ib(struct amdgpu_ring *ring)
+{
+ int i, r = 0;
+ struct amdgpu_device *adev = ring->adev;
+ struct amdgpu_kiq *kiq = &adev->gfx.kiq[0];
+ struct amdgpu_ring *kiq_ring = &kiq->ring;
+ unsigned long flags;
+
+ if (adev->enable_mes)
+ return -EINVAL;
+
+ if (!kiq->pmf || !kiq->pmf->kiq_unmap_queues)
+ return -EINVAL;
+
+ spin_lock_irqsave(&kiq->ring_lock, flags);
+
+ if (amdgpu_ring_alloc(kiq_ring, kiq->pmf->unmap_queues_size)) {
+ spin_unlock_irqrestore(&kiq->ring_lock, flags);
+ return -ENOMEM;
+ }
+
+ /* assert preemption condition */
+ amdgpu_ring_set_preempt_cond_exec(ring, false);
+
+ /* assert IB preemption, emit the trailing fence */
+ kiq->pmf->kiq_unmap_queues(kiq_ring, ring, PREEMPT_QUEUES_NO_UNMAP,
+ ring->trail_fence_gpu_addr,
+ ++ring->trail_seq);
+ amdgpu_ring_commit(kiq_ring);
+
+ spin_unlock_irqrestore(&kiq->ring_lock, flags);
+
+ /* poll the trailing fence */
+ for (i = 0; i < adev->usec_timeout; i++) {
+ if (ring->trail_seq ==
+ le32_to_cpu(*(ring->trail_fence_cpu_addr)))
+ break;
+ udelay(1);
+ }
+
+ if (i >= adev->usec_timeout) {
+ r = -EINVAL;
+ DRM_ERROR("ring %d failed to preempt ib\n", ring->idx);
+ }
+
+ /* deassert preemption condition */
+ amdgpu_ring_set_preempt_cond_exec(ring, true);
+ return r;
+}
+
+static void gfx_v12_0_ring_emit_frame_cntl(struct amdgpu_ring *ring,
+ bool start,
+ bool secure)
+{
+ uint32_t v = secure ? FRAME_TMZ : 0;
+
+ amdgpu_ring_write(ring, PACKET3(PACKET3_FRAME_CONTROL, 0));
+ amdgpu_ring_write(ring, v | FRAME_CMD(start ? 0 : 1));
+}
+
+static void gfx_v12_0_ring_emit_rreg(struct amdgpu_ring *ring, uint32_t reg,
+ uint32_t reg_val_offs)
+{
+ struct amdgpu_device *adev = ring->adev;
+
+ amdgpu_ring_write(ring, PACKET3(PACKET3_COPY_DATA, 4));
+ amdgpu_ring_write(ring, 0 | /* src: register*/
+ (5 << 8) | /* dst: memory */
+ (1 << 20)); /* write confirm */
+ amdgpu_ring_write(ring, reg);
+ amdgpu_ring_write(ring, 0);
+ amdgpu_ring_write(ring, lower_32_bits(adev->wb.gpu_addr +
+ reg_val_offs * 4));
+ amdgpu_ring_write(ring, upper_32_bits(adev->wb.gpu_addr +
+ reg_val_offs * 4));
+}
+
+static void gfx_v12_0_ring_emit_wreg(struct amdgpu_ring *ring,
+ uint32_t reg,
+ uint32_t val)
+{
+ uint32_t cmd = 0;
+
+ switch (ring->funcs->type) {
+ case AMDGPU_RING_TYPE_GFX:
+ cmd = WRITE_DATA_ENGINE_SEL(1) | WR_CONFIRM;
+ break;
+ case AMDGPU_RING_TYPE_KIQ:
+ cmd = (1 << 16); /* no inc addr */
+ break;
+ default:
+ cmd = WR_CONFIRM;
+ break;
+ }
+ amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
+ amdgpu_ring_write(ring, cmd);
+ amdgpu_ring_write(ring, reg);
+ amdgpu_ring_write(ring, 0);
+ amdgpu_ring_write(ring, val);
+}
+
+static void gfx_v12_0_ring_emit_reg_wait(struct amdgpu_ring *ring, uint32_t reg,
+ uint32_t val, uint32_t mask)
+{
+ gfx_v12_0_wait_reg_mem(ring, 0, 0, 0, reg, 0, val, mask, 0x20);
+}
+
+static void gfx_v12_0_ring_emit_reg_write_reg_wait(struct amdgpu_ring *ring,
+ uint32_t reg0, uint32_t reg1,
+ uint32_t ref, uint32_t mask)
+{
+ int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX);
+
+ gfx_v12_0_wait_reg_mem(ring, usepfp, 0, 1, reg0, reg1,
+ ref, mask, 0x20);
+}
+
+static void
+gfx_v12_0_set_gfx_eop_interrupt_state(struct amdgpu_device *adev,
+ uint32_t me, uint32_t pipe,
+ enum amdgpu_interrupt_state state)
+{
+ uint32_t cp_int_cntl, cp_int_cntl_reg;
+
+ if (!me) {
+ switch (pipe) {
+ case 0:
+ cp_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, regCP_INT_CNTL_RING0);
+ break;
+ default:
+ DRM_DEBUG("invalid pipe %d\n", pipe);
+ return;
+ }
+ } else {
+ DRM_DEBUG("invalid me %d\n", me);
+ return;
+ }
+
+ switch (state) {
+ case AMDGPU_IRQ_STATE_DISABLE:
+ cp_int_cntl = RREG32_SOC15_IP(GC, cp_int_cntl_reg);
+ cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_INT_CNTL_RING0,
+ TIME_STAMP_INT_ENABLE, 0);
+ cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_INT_CNTL_RING0,
+ GENERIC0_INT_ENABLE, 0);
+ WREG32_SOC15_IP(GC, cp_int_cntl_reg, cp_int_cntl);
+ break;
+ case AMDGPU_IRQ_STATE_ENABLE:
+ cp_int_cntl = RREG32_SOC15_IP(GC, cp_int_cntl_reg);
+ cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_INT_CNTL_RING0,
+ TIME_STAMP_INT_ENABLE, 1);
+ cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_INT_CNTL_RING0,
+ GENERIC0_INT_ENABLE, 1);
+ WREG32_SOC15_IP(GC, cp_int_cntl_reg, cp_int_cntl);
+ break;
+ default:
+ break;
+ }
+}
+
+static void gfx_v12_0_set_compute_eop_interrupt_state(struct amdgpu_device *adev,
+ int me, int pipe,
+ enum amdgpu_interrupt_state state)
+{
+ u32 mec_int_cntl, mec_int_cntl_reg;
+
+ /*
+ * amdgpu controls only the first MEC. That's why this function only
+ * handles the setting of interrupts for this specific MEC. All other
+ * pipes' interrupts are set by amdkfd.
+ */
+
+ if (me == 1) {
+ switch (pipe) {
+ case 0:
+ mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, regCP_ME1_PIPE0_INT_CNTL);
+ break;
+ case 1:
+ mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, regCP_ME1_PIPE1_INT_CNTL);
+ break;
+ default:
+ DRM_DEBUG("invalid pipe %d\n", pipe);
+ return;
+ }
+ } else {
+ DRM_DEBUG("invalid me %d\n", me);
+ return;
+ }
+
+ switch (state) {
+ case AMDGPU_IRQ_STATE_DISABLE:
+ mec_int_cntl = RREG32_SOC15_IP(GC, mec_int_cntl_reg);
+ mec_int_cntl = REG_SET_FIELD(mec_int_cntl, CP_ME1_PIPE0_INT_CNTL,
+ TIME_STAMP_INT_ENABLE, 0);
+ mec_int_cntl = REG_SET_FIELD(mec_int_cntl, CP_ME1_PIPE0_INT_CNTL,
+ GENERIC0_INT_ENABLE, 0);
+ WREG32_SOC15_IP(GC, mec_int_cntl_reg, mec_int_cntl);
+ break;
+ case AMDGPU_IRQ_STATE_ENABLE:
+ mec_int_cntl = RREG32_SOC15_IP(GC, mec_int_cntl_reg);
+ mec_int_cntl = REG_SET_FIELD(mec_int_cntl, CP_ME1_PIPE0_INT_CNTL,
+ TIME_STAMP_INT_ENABLE, 1);
+ mec_int_cntl = REG_SET_FIELD(mec_int_cntl, CP_ME1_PIPE0_INT_CNTL,
+ GENERIC0_INT_ENABLE, 1);
+ WREG32_SOC15_IP(GC, mec_int_cntl_reg, mec_int_cntl);
+ break;
+ default:
+ break;
+ }
+}
+
+static int gfx_v12_0_set_eop_interrupt_state(struct amdgpu_device *adev,
+ struct amdgpu_irq_src *src,
+ unsigned type,
+ enum amdgpu_interrupt_state state)
+{
+ switch (type) {
+ case AMDGPU_CP_IRQ_GFX_ME0_PIPE0_EOP:
+ gfx_v12_0_set_gfx_eop_interrupt_state(adev, 0, 0, state);
+ break;
+ case AMDGPU_CP_IRQ_GFX_ME0_PIPE1_EOP:
+ gfx_v12_0_set_gfx_eop_interrupt_state(adev, 0, 1, state);
+ break;
+ case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP:
+ gfx_v12_0_set_compute_eop_interrupt_state(adev, 1, 0, state);
+ break;
+ case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE1_EOP:
+ gfx_v12_0_set_compute_eop_interrupt_state(adev, 1, 1, state);
+ break;
+ case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE2_EOP:
+ gfx_v12_0_set_compute_eop_interrupt_state(adev, 1, 2, state);
+ break;
+ case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE3_EOP:
+ gfx_v12_0_set_compute_eop_interrupt_state(adev, 1, 3, state);
+ break;
+ default:
+ break;
+ }
+ return 0;
+}
+
+static int gfx_v12_0_eop_irq(struct amdgpu_device *adev,
+ struct amdgpu_irq_src *source,
+ struct amdgpu_iv_entry *entry)
+{
+ u32 doorbell_offset = entry->src_data[0];
+ u8 me_id, pipe_id, queue_id;
+ struct amdgpu_ring *ring;
+ int i;
+
+ DRM_DEBUG("IH: CP EOP\n");
+
+ if (adev->enable_mes && doorbell_offset) {
+ struct amdgpu_userq_fence_driver *fence_drv = NULL;
+ struct xarray *xa = &adev->userq_xa;
+ unsigned long flags;
+
+ xa_lock_irqsave(xa, flags);
+ fence_drv = xa_load(xa, doorbell_offset);
+ if (fence_drv)
+ amdgpu_userq_fence_driver_process(fence_drv);
+ xa_unlock_irqrestore(xa, flags);
+ } else {
+ me_id = (entry->ring_id & 0x0c) >> 2;
+ pipe_id = (entry->ring_id & 0x03) >> 0;
+ queue_id = (entry->ring_id & 0x70) >> 4;
+
+ switch (me_id) {
+ case 0:
+ if (pipe_id == 0)
+ amdgpu_fence_process(&adev->gfx.gfx_ring[0]);
+ else
+ amdgpu_fence_process(&adev->gfx.gfx_ring[1]);
+ break;
+ case 1:
+ case 2:
+ for (i = 0; i < adev->gfx.num_compute_rings; i++) {
+ ring = &adev->gfx.compute_ring[i];
+ /* Per-queue interrupt is supported for MEC starting from VI.
+ * The interrupt can only be enabled/disabled per pipe instead
+ * of per queue.
+ */
+ if ((ring->me == me_id) &&
+ (ring->pipe == pipe_id) &&
+ (ring->queue == queue_id))
+ amdgpu_fence_process(ring);
+ }
+ break;
+ }
+ }
+
+ return 0;
+}
+
+static int gfx_v12_0_set_priv_reg_fault_state(struct amdgpu_device *adev,
+ struct amdgpu_irq_src *source,
+ unsigned int type,
+ enum amdgpu_interrupt_state state)
+{
+ u32 cp_int_cntl_reg, cp_int_cntl;
+ int i, j;
+
+ switch (state) {
+ case AMDGPU_IRQ_STATE_DISABLE:
+ case AMDGPU_IRQ_STATE_ENABLE:
+ for (i = 0; i < adev->gfx.me.num_me; i++) {
+ for (j = 0; j < adev->gfx.me.num_pipe_per_me; j++) {
+ cp_int_cntl_reg = gfx_v12_0_get_cpg_int_cntl(adev, i, j);
+
+ if (cp_int_cntl_reg) {
+ cp_int_cntl = RREG32_SOC15_IP(GC, cp_int_cntl_reg);
+ cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_INT_CNTL_RING0,
+ PRIV_REG_INT_ENABLE,
+ state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0);
+ WREG32_SOC15_IP(GC, cp_int_cntl_reg, cp_int_cntl);
+ }
+ }
+ }
+ for (i = 0; i < adev->gfx.mec.num_mec; i++) {
+ for (j = 0; j < adev->gfx.mec.num_pipe_per_mec; j++) {
+ /* MECs start at 1 */
+ cp_int_cntl_reg = gfx_v12_0_get_cpc_int_cntl(adev, i + 1, j);
+
+ if (cp_int_cntl_reg) {
+ cp_int_cntl = RREG32_SOC15_IP(GC, cp_int_cntl_reg);
+ cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_ME1_PIPE0_INT_CNTL,
+ PRIV_REG_INT_ENABLE,
+ state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0);
+ WREG32_SOC15_IP(GC, cp_int_cntl_reg, cp_int_cntl);
+ }
+ }
+ }
+ break;
+ default:
+ break;
+ }
+
+ return 0;
+}
+
+static int gfx_v12_0_set_bad_op_fault_state(struct amdgpu_device *adev,
+ struct amdgpu_irq_src *source,
+ unsigned type,
+ enum amdgpu_interrupt_state state)
+{
+ u32 cp_int_cntl_reg, cp_int_cntl;
+ int i, j;
+
+ switch (state) {
+ case AMDGPU_IRQ_STATE_DISABLE:
+ case AMDGPU_IRQ_STATE_ENABLE:
+ for (i = 0; i < adev->gfx.me.num_me; i++) {
+ for (j = 0; j < adev->gfx.me.num_pipe_per_me; j++) {
+ cp_int_cntl_reg = gfx_v12_0_get_cpg_int_cntl(adev, i, j);
+
+ if (cp_int_cntl_reg) {
+ cp_int_cntl = RREG32_SOC15_IP(GC, cp_int_cntl_reg);
+ cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_INT_CNTL_RING0,
+ OPCODE_ERROR_INT_ENABLE,
+ state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0);
+ WREG32_SOC15_IP(GC, cp_int_cntl_reg, cp_int_cntl);
+ }
+ }
+ }
+ for (i = 0; i < adev->gfx.mec.num_mec; i++) {
+ for (j = 0; j < adev->gfx.mec.num_pipe_per_mec; j++) {
+ /* MECs start at 1 */
+ cp_int_cntl_reg = gfx_v12_0_get_cpc_int_cntl(adev, i + 1, j);
+
+ if (cp_int_cntl_reg) {
+ cp_int_cntl = RREG32_SOC15_IP(GC, cp_int_cntl_reg);
+ cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_ME1_PIPE0_INT_CNTL,
+ OPCODE_ERROR_INT_ENABLE,
+ state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0);
+ WREG32_SOC15_IP(GC, cp_int_cntl_reg, cp_int_cntl);
+ }
+ }
+ }
+ break;
+ default:
+ break;
+ }
+ return 0;
+}
+
+static int gfx_v12_0_set_priv_inst_fault_state(struct amdgpu_device *adev,
+ struct amdgpu_irq_src *source,
+ unsigned int type,
+ enum amdgpu_interrupt_state state)
+{
+ u32 cp_int_cntl_reg, cp_int_cntl;
+ int i, j;
+
+ switch (state) {
+ case AMDGPU_IRQ_STATE_DISABLE:
+ case AMDGPU_IRQ_STATE_ENABLE:
+ for (i = 0; i < adev->gfx.me.num_me; i++) {
+ for (j = 0; j < adev->gfx.me.num_pipe_per_me; j++) {
+ cp_int_cntl_reg = gfx_v12_0_get_cpg_int_cntl(adev, i, j);
+
+ if (cp_int_cntl_reg) {
+ cp_int_cntl = RREG32_SOC15_IP(GC, cp_int_cntl_reg);
+ cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_INT_CNTL_RING0,
+ PRIV_INSTR_INT_ENABLE,
+ state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0);
+ WREG32_SOC15_IP(GC, cp_int_cntl_reg, cp_int_cntl);
+ }
+ }
+ }
+ break;
+ default:
+ break;
+ }
+
+ return 0;
+}
+
+static void gfx_v12_0_handle_priv_fault(struct amdgpu_device *adev,
+ struct amdgpu_iv_entry *entry)
+{
+ u8 me_id, pipe_id, queue_id;
+ struct amdgpu_ring *ring;
+ int i;
+
+ me_id = (entry->ring_id & 0x0c) >> 2;
+ pipe_id = (entry->ring_id & 0x03) >> 0;
+ queue_id = (entry->ring_id & 0x70) >> 4;
+
+ if (!adev->gfx.disable_kq) {
+ switch (me_id) {
+ case 0:
+ for (i = 0; i < adev->gfx.num_gfx_rings; i++) {
+ ring = &adev->gfx.gfx_ring[i];
+ if (ring->me == me_id && ring->pipe == pipe_id &&
+ ring->queue == queue_id)
+ drm_sched_fault(&ring->sched);
+ }
+ break;
+ case 1:
+ case 2:
+ for (i = 0; i < adev->gfx.num_compute_rings; i++) {
+ ring = &adev->gfx.compute_ring[i];
+ if (ring->me == me_id && ring->pipe == pipe_id &&
+ ring->queue == queue_id)
+ drm_sched_fault(&ring->sched);
+ }
+ break;
+ default:
+ BUG();
+ break;
+ }
+ }
+}
+
+static int gfx_v12_0_priv_reg_irq(struct amdgpu_device *adev,
+ struct amdgpu_irq_src *source,
+ struct amdgpu_iv_entry *entry)
+{
+ DRM_ERROR("Illegal register access in command stream\n");
+ gfx_v12_0_handle_priv_fault(adev, entry);
+ return 0;
+}
+
+static int gfx_v12_0_bad_op_irq(struct amdgpu_device *adev,
+ struct amdgpu_irq_src *source,
+ struct amdgpu_iv_entry *entry)
+{
+ DRM_ERROR("Illegal opcode in command stream \n");
+ gfx_v12_0_handle_priv_fault(adev, entry);
+ return 0;
+}
+
+static int gfx_v12_0_priv_inst_irq(struct amdgpu_device *adev,
+ struct amdgpu_irq_src *source,
+ struct amdgpu_iv_entry *entry)
+{
+ DRM_ERROR("Illegal instruction in command stream\n");
+ gfx_v12_0_handle_priv_fault(adev, entry);
+ return 0;
+}
+
+static void gfx_v12_0_emit_mem_sync(struct amdgpu_ring *ring)
+{
+ const unsigned int gcr_cntl =
+ PACKET3_ACQUIRE_MEM_GCR_CNTL_GL2_INV(1) |
+ PACKET3_ACQUIRE_MEM_GCR_CNTL_GL2_WB(1) |
+ PACKET3_ACQUIRE_MEM_GCR_CNTL_GLM_INV(1) |
+ PACKET3_ACQUIRE_MEM_GCR_CNTL_GLM_WB(1) |
+ PACKET3_ACQUIRE_MEM_GCR_CNTL_GL1_INV(1) |
+ PACKET3_ACQUIRE_MEM_GCR_CNTL_GLV_INV(1) |
+ PACKET3_ACQUIRE_MEM_GCR_CNTL_GLK_INV(1) |
+ PACKET3_ACQUIRE_MEM_GCR_CNTL_GLI_INV(1);
+
+ /* ACQUIRE_MEM - make one or more surfaces valid for use by the subsequent operations */
+ amdgpu_ring_write(ring, PACKET3(PACKET3_ACQUIRE_MEM, 6));
+ amdgpu_ring_write(ring, 0); /* CP_COHER_CNTL */
+ amdgpu_ring_write(ring, 0xffffffff); /* CP_COHER_SIZE */
+ amdgpu_ring_write(ring, 0xffffff); /* CP_COHER_SIZE_HI */
+ amdgpu_ring_write(ring, 0); /* CP_COHER_BASE */
+ amdgpu_ring_write(ring, 0); /* CP_COHER_BASE_HI */
+ amdgpu_ring_write(ring, 0x0000000A); /* POLL_INTERVAL */
+ amdgpu_ring_write(ring, gcr_cntl); /* GCR_CNTL */
+}
+
+static void gfx_v12_ring_insert_nop(struct amdgpu_ring *ring, uint32_t num_nop)
+{
+ /* Header itself is a NOP packet */
+ if (num_nop == 1) {
+ amdgpu_ring_write(ring, ring->funcs->nop);
+ return;
+ }
+
+ /* Max HW optimization till 0x3ffe, followed by remaining one NOP at a time*/
+ amdgpu_ring_write(ring, PACKET3(PACKET3_NOP, min(num_nop - 2, 0x3ffe)));
+
+ /* Header is at index 0, followed by num_nops - 1 NOP packet's */
+ amdgpu_ring_insert_nop(ring, num_nop - 1);
+}
+
+static void gfx_v12_0_ring_emit_cleaner_shader(struct amdgpu_ring *ring)
+{
+ /* Emit the cleaner shader */
+ amdgpu_ring_write(ring, PACKET3(PACKET3_RUN_CLEANER_SHADER, 0));
+ amdgpu_ring_write(ring, 0); /* RESERVED field, programmed to zero */
+}
+
+static void gfx_v12_ip_print(struct amdgpu_ip_block *ip_block, struct drm_printer *p)
+{
+ struct amdgpu_device *adev = ip_block->adev;
+ uint32_t i, j, k, reg, index = 0;
+ uint32_t reg_count = ARRAY_SIZE(gc_reg_list_12_0);
+
+ if (!adev->gfx.ip_dump_core)
+ return;
+
+ for (i = 0; i < reg_count; i++)
+ drm_printf(p, "%-50s \t 0x%08x\n",
+ gc_reg_list_12_0[i].reg_name,
+ adev->gfx.ip_dump_core[i]);
+
+ /* print compute queue registers for all instances */
+ if (!adev->gfx.ip_dump_compute_queues)
+ return;
+
+ reg_count = ARRAY_SIZE(gc_cp_reg_list_12);
+ drm_printf(p, "\nnum_mec: %d num_pipe: %d num_queue: %d\n",
+ adev->gfx.mec.num_mec,
+ adev->gfx.mec.num_pipe_per_mec,
+ adev->gfx.mec.num_queue_per_pipe);
+
+ for (i = 0; i < adev->gfx.mec.num_mec; i++) {
+ for (j = 0; j < adev->gfx.mec.num_pipe_per_mec; j++) {
+ for (k = 0; k < adev->gfx.mec.num_queue_per_pipe; k++) {
+ drm_printf(p, "\nmec %d, pipe %d, queue %d\n", i, j, k);
+ for (reg = 0; reg < reg_count; reg++) {
+ drm_printf(p, "%-50s \t 0x%08x\n",
+ gc_cp_reg_list_12[reg].reg_name,
+ adev->gfx.ip_dump_compute_queues[index + reg]);
+ }
+ index += reg_count;
+ }
+ }
+ }
+
+ /* print gfx queue registers for all instances */
+ if (!adev->gfx.ip_dump_gfx_queues)
+ return;
+
+ index = 0;
+ reg_count = ARRAY_SIZE(gc_gfx_queue_reg_list_12);
+ drm_printf(p, "\nnum_me: %d num_pipe: %d num_queue: %d\n",
+ adev->gfx.me.num_me,
+ adev->gfx.me.num_pipe_per_me,
+ adev->gfx.me.num_queue_per_pipe);
+
+ for (i = 0; i < adev->gfx.me.num_me; i++) {
+ for (j = 0; j < adev->gfx.me.num_pipe_per_me; j++) {
+ for (k = 0; k < adev->gfx.me.num_queue_per_pipe; k++) {
+ drm_printf(p, "\nme %d, pipe %d, queue %d\n", i, j, k);
+ for (reg = 0; reg < reg_count; reg++) {
+ drm_printf(p, "%-50s \t 0x%08x\n",
+ gc_gfx_queue_reg_list_12[reg].reg_name,
+ adev->gfx.ip_dump_gfx_queues[index + reg]);
+ }
+ index += reg_count;
+ }
+ }
+ }
+}
+
+static void gfx_v12_ip_dump(struct amdgpu_ip_block *ip_block)
+{
+ struct amdgpu_device *adev = ip_block->adev;
+ uint32_t i, j, k, reg, index = 0;
+ uint32_t reg_count = ARRAY_SIZE(gc_reg_list_12_0);
+
+ if (!adev->gfx.ip_dump_core)
+ return;
+
+ amdgpu_gfx_off_ctrl(adev, false);
+ for (i = 0; i < reg_count; i++)
+ adev->gfx.ip_dump_core[i] = RREG32(SOC15_REG_ENTRY_OFFSET(gc_reg_list_12_0[i]));
+ amdgpu_gfx_off_ctrl(adev, true);
+
+ /* dump compute queue registers for all instances */
+ if (!adev->gfx.ip_dump_compute_queues)
+ return;
+
+ reg_count = ARRAY_SIZE(gc_cp_reg_list_12);
+ amdgpu_gfx_off_ctrl(adev, false);
+ mutex_lock(&adev->srbm_mutex);
+ for (i = 0; i < adev->gfx.mec.num_mec; i++) {
+ for (j = 0; j < adev->gfx.mec.num_pipe_per_mec; j++) {
+ for (k = 0; k < adev->gfx.mec.num_queue_per_pipe; k++) {
+ /* ME0 is for GFX so start from 1 for CP */
+ soc24_grbm_select(adev, adev->gfx.me.num_me + i, j, k, 0);
+ for (reg = 0; reg < reg_count; reg++) {
+ adev->gfx.ip_dump_compute_queues[index + reg] =
+ RREG32(SOC15_REG_ENTRY_OFFSET(
+ gc_cp_reg_list_12[reg]));
+ }
+ index += reg_count;
+ }
+ }
+ }
+ soc24_grbm_select(adev, 0, 0, 0, 0);
+ mutex_unlock(&adev->srbm_mutex);
+ amdgpu_gfx_off_ctrl(adev, true);
+
+ /* dump gfx queue registers for all instances */
+ if (!adev->gfx.ip_dump_gfx_queues)
+ return;
+
+ index = 0;
+ reg_count = ARRAY_SIZE(gc_gfx_queue_reg_list_12);
+ amdgpu_gfx_off_ctrl(adev, false);
+ mutex_lock(&adev->srbm_mutex);
+ for (i = 0; i < adev->gfx.me.num_me; i++) {
+ for (j = 0; j < adev->gfx.me.num_pipe_per_me; j++) {
+ for (k = 0; k < adev->gfx.me.num_queue_per_pipe; k++) {
+ soc24_grbm_select(adev, i, j, k, 0);
+
+ for (reg = 0; reg < reg_count; reg++) {
+ adev->gfx.ip_dump_gfx_queues[index + reg] =
+ RREG32(SOC15_REG_ENTRY_OFFSET(
+ gc_gfx_queue_reg_list_12[reg]));
+ }
+ index += reg_count;
+ }
+ }
+ }
+ soc24_grbm_select(adev, 0, 0, 0, 0);
+ mutex_unlock(&adev->srbm_mutex);
+ amdgpu_gfx_off_ctrl(adev, true);
+}
+
+static bool gfx_v12_pipe_reset_support(struct amdgpu_device *adev)
+{
+ /* Disable the pipe reset until the CPFW fully support it.*/
+ dev_warn_once(adev->dev, "The CPFW hasn't support pipe reset yet.\n");
+ return false;
+}
+
+static int gfx_v12_reset_gfx_pipe(struct amdgpu_ring *ring)
+{
+ struct amdgpu_device *adev = ring->adev;
+ uint32_t reset_pipe = 0, clean_pipe = 0;
+ int r;
+
+ if (!gfx_v12_pipe_reset_support(adev))
+ return -EOPNOTSUPP;
+
+ gfx_v12_0_set_safe_mode(adev, 0);
+ mutex_lock(&adev->srbm_mutex);
+ soc24_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
+
+ switch (ring->pipe) {
+ case 0:
+ reset_pipe = REG_SET_FIELD(reset_pipe, CP_ME_CNTL,
+ PFP_PIPE0_RESET, 1);
+ reset_pipe = REG_SET_FIELD(reset_pipe, CP_ME_CNTL,
+ ME_PIPE0_RESET, 1);
+ clean_pipe = REG_SET_FIELD(clean_pipe, CP_ME_CNTL,
+ PFP_PIPE0_RESET, 0);
+ clean_pipe = REG_SET_FIELD(clean_pipe, CP_ME_CNTL,
+ ME_PIPE0_RESET, 0);
+ break;
+ case 1:
+ reset_pipe = REG_SET_FIELD(reset_pipe, CP_ME_CNTL,
+ PFP_PIPE1_RESET, 1);
+ reset_pipe = REG_SET_FIELD(reset_pipe, CP_ME_CNTL,
+ ME_PIPE1_RESET, 1);
+ clean_pipe = REG_SET_FIELD(clean_pipe, CP_ME_CNTL,
+ PFP_PIPE1_RESET, 0);
+ clean_pipe = REG_SET_FIELD(clean_pipe, CP_ME_CNTL,
+ ME_PIPE1_RESET, 0);
+ break;
+ default:
+ break;
+ }
+
+ WREG32_SOC15(GC, 0, regCP_ME_CNTL, reset_pipe);
+ WREG32_SOC15(GC, 0, regCP_ME_CNTL, clean_pipe);
+
+ r = (RREG32(SOC15_REG_OFFSET(GC, 0, regCP_GFX_RS64_INSTR_PNTR1)) << 2) -
+ RS64_FW_UC_START_ADDR_LO;
+ soc24_grbm_select(adev, 0, 0, 0, 0);
+ mutex_unlock(&adev->srbm_mutex);
+ gfx_v12_0_unset_safe_mode(adev, 0);
+
+ dev_info(adev->dev, "The ring %s pipe reset: %s\n", ring->name,
+ r == 0 ? "successfully" : "failed");
+ /* Sometimes the ME start pc counter can't cache correctly, so the
+ * PC check only as a reference and pipe reset result rely on the
+ * later ring test.
+ */
+ return 0;
+}
+
+static int gfx_v12_0_reset_kgq(struct amdgpu_ring *ring,
+ unsigned int vmid,
+ struct amdgpu_fence *timedout_fence)
+{
+ struct amdgpu_device *adev = ring->adev;
+ int r;
+
+ amdgpu_ring_reset_helper_begin(ring, timedout_fence);
+
+ r = amdgpu_mes_reset_legacy_queue(ring->adev, ring, vmid, false);
+ if (r) {
+ dev_warn(adev->dev, "reset via MES failed and try pipe reset %d\n", r);
+ r = gfx_v12_reset_gfx_pipe(ring);
+ if (r)
+ return r;
+ }
+
+ r = gfx_v12_0_kgq_init_queue(ring, true);
+ if (r) {
+ dev_err(adev->dev, "failed to init kgq\n");
+ return r;
+ }
+
+ r = amdgpu_mes_map_legacy_queue(adev, ring);
+ if (r) {
+ dev_err(adev->dev, "failed to remap kgq\n");
+ return r;
+ }
+
+ return amdgpu_ring_reset_helper_end(ring, timedout_fence);
+}
+
+static int gfx_v12_0_reset_compute_pipe(struct amdgpu_ring *ring)
+{
+ struct amdgpu_device *adev = ring->adev;
+ uint32_t reset_pipe = 0, clean_pipe = 0;
+ int r = 0;
+
+ if (!gfx_v12_pipe_reset_support(adev))
+ return -EOPNOTSUPP;
+
+ gfx_v12_0_set_safe_mode(adev, 0);
+ mutex_lock(&adev->srbm_mutex);
+ soc24_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
+
+ reset_pipe = RREG32_SOC15(GC, 0, regCP_MEC_RS64_CNTL);
+ clean_pipe = reset_pipe;
+
+ if (adev->gfx.rs64_enable) {
+ switch (ring->pipe) {
+ case 0:
+ reset_pipe = REG_SET_FIELD(reset_pipe, CP_MEC_RS64_CNTL,
+ MEC_PIPE0_RESET, 1);
+ clean_pipe = REG_SET_FIELD(clean_pipe, CP_MEC_RS64_CNTL,
+ MEC_PIPE0_RESET, 0);
+ break;
+ case 1:
+ reset_pipe = REG_SET_FIELD(reset_pipe, CP_MEC_RS64_CNTL,
+ MEC_PIPE1_RESET, 1);
+ clean_pipe = REG_SET_FIELD(clean_pipe, CP_MEC_RS64_CNTL,
+ MEC_PIPE1_RESET, 0);
+ break;
+ case 2:
+ reset_pipe = REG_SET_FIELD(reset_pipe, CP_MEC_RS64_CNTL,
+ MEC_PIPE2_RESET, 1);
+ clean_pipe = REG_SET_FIELD(clean_pipe, CP_MEC_RS64_CNTL,
+ MEC_PIPE2_RESET, 0);
+ break;
+ case 3:
+ reset_pipe = REG_SET_FIELD(reset_pipe, CP_MEC_RS64_CNTL,
+ MEC_PIPE3_RESET, 1);
+ clean_pipe = REG_SET_FIELD(clean_pipe, CP_MEC_RS64_CNTL,
+ MEC_PIPE3_RESET, 0);
+ break;
+ default:
+ break;
+ }
+ WREG32_SOC15(GC, 0, regCP_MEC_RS64_CNTL, reset_pipe);
+ WREG32_SOC15(GC, 0, regCP_MEC_RS64_CNTL, clean_pipe);
+ r = (RREG32_SOC15(GC, 0, regCP_MEC_RS64_INSTR_PNTR) << 2) -
+ RS64_FW_UC_START_ADDR_LO;
+ } else {
+ switch (ring->pipe) {
+ case 0:
+ reset_pipe = REG_SET_FIELD(reset_pipe, CP_MEC_CNTL,
+ MEC_ME1_PIPE0_RESET, 1);
+ clean_pipe = REG_SET_FIELD(clean_pipe, CP_MEC_CNTL,
+ MEC_ME1_PIPE0_RESET, 0);
+ break;
+ case 1:
+ reset_pipe = REG_SET_FIELD(reset_pipe, CP_MEC_CNTL,
+ MEC_ME1_PIPE1_RESET, 1);
+ clean_pipe = REG_SET_FIELD(clean_pipe, CP_MEC_CNTL,
+ MEC_ME1_PIPE1_RESET, 0);
+ break;
+ default:
+ break;
+ }
+ WREG32_SOC15(GC, 0, regCP_MEC_CNTL, reset_pipe);
+ WREG32_SOC15(GC, 0, regCP_MEC_CNTL, clean_pipe);
+ /* Doesn't find the F32 MEC instruction pointer register, and suppose
+ * the driver won't run into the F32 mode.
+ */
+ }
+
+ soc24_grbm_select(adev, 0, 0, 0, 0);
+ mutex_unlock(&adev->srbm_mutex);
+ gfx_v12_0_unset_safe_mode(adev, 0);
+
+ dev_info(adev->dev, "The ring %s pipe resets: %s\n", ring->name,
+ r == 0 ? "successfully" : "failed");
+ /* Need the ring test to verify the pipe reset result.*/
+ return 0;
+}
+
+static int gfx_v12_0_reset_kcq(struct amdgpu_ring *ring,
+ unsigned int vmid,
+ struct amdgpu_fence *timedout_fence)
+{
+ struct amdgpu_device *adev = ring->adev;
+ int r;
+
+ amdgpu_ring_reset_helper_begin(ring, timedout_fence);
+
+ r = amdgpu_mes_reset_legacy_queue(ring->adev, ring, vmid, true);
+ if (r) {
+ dev_warn(adev->dev, "fail(%d) to reset kcq and try pipe reset\n", r);
+ r = gfx_v12_0_reset_compute_pipe(ring);
+ if (r)
+ return r;
+ }
+
+ r = gfx_v12_0_kcq_init_queue(ring, true);
+ if (r) {
+ dev_err(adev->dev, "failed to init kcq\n");
+ return r;
+ }
+ r = amdgpu_mes_map_legacy_queue(adev, ring);
+ if (r) {
+ dev_err(adev->dev, "failed to remap kcq\n");
+ return r;
+ }
+
+ return amdgpu_ring_reset_helper_end(ring, timedout_fence);
+}
+
+static void gfx_v12_0_ring_begin_use(struct amdgpu_ring *ring)
+{
+ amdgpu_gfx_profile_ring_begin_use(ring);
+
+ amdgpu_gfx_enforce_isolation_ring_begin_use(ring);
+}
+
+static void gfx_v12_0_ring_end_use(struct amdgpu_ring *ring)
+{
+ amdgpu_gfx_profile_ring_end_use(ring);
+
+ amdgpu_gfx_enforce_isolation_ring_end_use(ring);
+}
+
+static const struct amd_ip_funcs gfx_v12_0_ip_funcs = {
+ .name = "gfx_v12_0",
+ .early_init = gfx_v12_0_early_init,
+ .late_init = gfx_v12_0_late_init,
+ .sw_init = gfx_v12_0_sw_init,
+ .sw_fini = gfx_v12_0_sw_fini,
+ .hw_init = gfx_v12_0_hw_init,
+ .hw_fini = gfx_v12_0_hw_fini,
+ .suspend = gfx_v12_0_suspend,
+ .resume = gfx_v12_0_resume,
+ .is_idle = gfx_v12_0_is_idle,
+ .wait_for_idle = gfx_v12_0_wait_for_idle,
+ .set_clockgating_state = gfx_v12_0_set_clockgating_state,
+ .set_powergating_state = gfx_v12_0_set_powergating_state,
+ .get_clockgating_state = gfx_v12_0_get_clockgating_state,
+ .dump_ip_state = gfx_v12_ip_dump,
+ .print_ip_state = gfx_v12_ip_print,
+};
+
+static const struct amdgpu_ring_funcs gfx_v12_0_ring_funcs_gfx = {
+ .type = AMDGPU_RING_TYPE_GFX,
+ .align_mask = 0xff,
+ .nop = PACKET3(PACKET3_NOP, 0x3FFF),
+ .support_64bit_ptrs = true,
+ .secure_submission_supported = true,
+ .get_rptr = gfx_v12_0_ring_get_rptr_gfx,
+ .get_wptr = gfx_v12_0_ring_get_wptr_gfx,
+ .set_wptr = gfx_v12_0_ring_set_wptr_gfx,
+ .emit_frame_size = /* totally 242 maximum if 16 IBs */
+ 5 + /* COND_EXEC */
+ 7 + /* PIPELINE_SYNC */
+ SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 +
+ SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 +
+ 2 + /* VM_FLUSH */
+ 8 + /* FENCE for VM_FLUSH */
+ 5 + /* COND_EXEC */
+ 7 + /* HDP_flush */
+ 4 + /* VGT_flush */
+ 31 + /* DE_META */
+ 3 + /* CNTX_CTRL */
+ 5 + /* HDP_INVL */
+ 8 + 8 + /* FENCE x2 */
+ 8 + /* gfx_v12_0_emit_mem_sync */
+ 2, /* gfx_v12_0_ring_emit_cleaner_shader */
+ .emit_ib_size = 4, /* gfx_v12_0_ring_emit_ib_gfx */
+ .emit_ib = gfx_v12_0_ring_emit_ib_gfx,
+ .emit_fence = gfx_v12_0_ring_emit_fence,
+ .emit_pipeline_sync = gfx_v12_0_ring_emit_pipeline_sync,
+ .emit_vm_flush = gfx_v12_0_ring_emit_vm_flush,
+ .emit_hdp_flush = gfx_v12_0_ring_emit_hdp_flush,
+ .test_ring = gfx_v12_0_ring_test_ring,
+ .test_ib = gfx_v12_0_ring_test_ib,
+ .insert_nop = gfx_v12_ring_insert_nop,
+ .pad_ib = amdgpu_ring_generic_pad_ib,
+ .emit_cntxcntl = gfx_v12_0_ring_emit_cntxcntl,
+ .init_cond_exec = gfx_v12_0_ring_emit_init_cond_exec,
+ .preempt_ib = gfx_v12_0_ring_preempt_ib,
+ .emit_frame_cntl = gfx_v12_0_ring_emit_frame_cntl,
+ .emit_wreg = gfx_v12_0_ring_emit_wreg,
+ .emit_reg_wait = gfx_v12_0_ring_emit_reg_wait,
+ .emit_reg_write_reg_wait = gfx_v12_0_ring_emit_reg_write_reg_wait,
+ .emit_mem_sync = gfx_v12_0_emit_mem_sync,
+ .reset = gfx_v12_0_reset_kgq,
+ .emit_cleaner_shader = gfx_v12_0_ring_emit_cleaner_shader,
+ .begin_use = gfx_v12_0_ring_begin_use,
+ .end_use = gfx_v12_0_ring_end_use,
+};
+
+static const struct amdgpu_ring_funcs gfx_v12_0_ring_funcs_compute = {
+ .type = AMDGPU_RING_TYPE_COMPUTE,
+ .align_mask = 0xff,
+ .nop = PACKET3(PACKET3_NOP, 0x3FFF),
+ .support_64bit_ptrs = true,
+ .get_rptr = gfx_v12_0_ring_get_rptr_compute,
+ .get_wptr = gfx_v12_0_ring_get_wptr_compute,
+ .set_wptr = gfx_v12_0_ring_set_wptr_compute,
+ .emit_frame_size =
+ 7 + /* gfx_v12_0_ring_emit_hdp_flush */
+ 5 + /* hdp invalidate */
+ 7 + /* gfx_v12_0_ring_emit_pipeline_sync */
+ SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 +
+ SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 +
+ 2 + /* gfx_v12_0_ring_emit_vm_flush */
+ 8 + 8 + 8 + /* gfx_v12_0_ring_emit_fence x3 for user fence, vm fence */
+ 8 + /* gfx_v12_0_emit_mem_sync */
+ 2, /* gfx_v12_0_ring_emit_cleaner_shader */
+ .emit_ib_size = 7, /* gfx_v12_0_ring_emit_ib_compute */
+ .emit_ib = gfx_v12_0_ring_emit_ib_compute,
+ .emit_fence = gfx_v12_0_ring_emit_fence,
+ .emit_pipeline_sync = gfx_v12_0_ring_emit_pipeline_sync,
+ .emit_vm_flush = gfx_v12_0_ring_emit_vm_flush,
+ .emit_hdp_flush = gfx_v12_0_ring_emit_hdp_flush,
+ .test_ring = gfx_v12_0_ring_test_ring,
+ .test_ib = gfx_v12_0_ring_test_ib,
+ .insert_nop = gfx_v12_ring_insert_nop,
+ .pad_ib = amdgpu_ring_generic_pad_ib,
+ .emit_wreg = gfx_v12_0_ring_emit_wreg,
+ .emit_reg_wait = gfx_v12_0_ring_emit_reg_wait,
+ .emit_reg_write_reg_wait = gfx_v12_0_ring_emit_reg_write_reg_wait,
+ .emit_mem_sync = gfx_v12_0_emit_mem_sync,
+ .reset = gfx_v12_0_reset_kcq,
+ .emit_cleaner_shader = gfx_v12_0_ring_emit_cleaner_shader,
+ .begin_use = gfx_v12_0_ring_begin_use,
+ .end_use = gfx_v12_0_ring_end_use,
+};
+
+static const struct amdgpu_ring_funcs gfx_v12_0_ring_funcs_kiq = {
+ .type = AMDGPU_RING_TYPE_KIQ,
+ .align_mask = 0xff,
+ .nop = PACKET3(PACKET3_NOP, 0x3FFF),
+ .support_64bit_ptrs = true,
+ .get_rptr = gfx_v12_0_ring_get_rptr_compute,
+ .get_wptr = gfx_v12_0_ring_get_wptr_compute,
+ .set_wptr = gfx_v12_0_ring_set_wptr_compute,
+ .emit_frame_size =
+ 7 + /* gfx_v12_0_ring_emit_hdp_flush */
+ 5 + /*hdp invalidate */
+ 7 + /* gfx_v12_0_ring_emit_pipeline_sync */
+ SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 +
+ SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 +
+ 2 + /* gfx_v12_0_ring_emit_vm_flush */
+ 8 + 8 + 8, /* gfx_v12_0_ring_emit_fence_kiq x3 for user fence, vm fence */
+ .emit_ib_size = 7, /* gfx_v12_0_ring_emit_ib_compute */
+ .emit_ib = gfx_v12_0_ring_emit_ib_compute,
+ .emit_fence = gfx_v12_0_ring_emit_fence_kiq,
+ .test_ring = gfx_v12_0_ring_test_ring,
+ .test_ib = gfx_v12_0_ring_test_ib,
+ .insert_nop = amdgpu_ring_insert_nop,
+ .pad_ib = amdgpu_ring_generic_pad_ib,
+ .emit_rreg = gfx_v12_0_ring_emit_rreg,
+ .emit_wreg = gfx_v12_0_ring_emit_wreg,
+ .emit_reg_wait = gfx_v12_0_ring_emit_reg_wait,
+ .emit_reg_write_reg_wait = gfx_v12_0_ring_emit_reg_write_reg_wait,
+ .emit_hdp_flush = gfx_v12_0_ring_emit_hdp_flush,
+};
+
+static void gfx_v12_0_set_ring_funcs(struct amdgpu_device *adev)
+{
+ int i;
+
+ adev->gfx.kiq[0].ring.funcs = &gfx_v12_0_ring_funcs_kiq;
+
+ for (i = 0; i < adev->gfx.num_gfx_rings; i++)
+ adev->gfx.gfx_ring[i].funcs = &gfx_v12_0_ring_funcs_gfx;
+
+ for (i = 0; i < adev->gfx.num_compute_rings; i++)
+ adev->gfx.compute_ring[i].funcs = &gfx_v12_0_ring_funcs_compute;
+}
+
+static const struct amdgpu_irq_src_funcs gfx_v12_0_eop_irq_funcs = {
+ .set = gfx_v12_0_set_eop_interrupt_state,
+ .process = gfx_v12_0_eop_irq,
+};
+
+static const struct amdgpu_irq_src_funcs gfx_v12_0_priv_reg_irq_funcs = {
+ .set = gfx_v12_0_set_priv_reg_fault_state,
+ .process = gfx_v12_0_priv_reg_irq,
+};
+
+static const struct amdgpu_irq_src_funcs gfx_v12_0_bad_op_irq_funcs = {
+ .set = gfx_v12_0_set_bad_op_fault_state,
+ .process = gfx_v12_0_bad_op_irq,
+};
+
+static const struct amdgpu_irq_src_funcs gfx_v12_0_priv_inst_irq_funcs = {
+ .set = gfx_v12_0_set_priv_inst_fault_state,
+ .process = gfx_v12_0_priv_inst_irq,
+};
+
+static void gfx_v12_0_set_irq_funcs(struct amdgpu_device *adev)
+{
+ adev->gfx.eop_irq.num_types = AMDGPU_CP_IRQ_LAST;
+ adev->gfx.eop_irq.funcs = &gfx_v12_0_eop_irq_funcs;
+
+ adev->gfx.priv_reg_irq.num_types = 1;
+ adev->gfx.priv_reg_irq.funcs = &gfx_v12_0_priv_reg_irq_funcs;
+
+ adev->gfx.bad_op_irq.num_types = 1;
+ adev->gfx.bad_op_irq.funcs = &gfx_v12_0_bad_op_irq_funcs;
+
+ adev->gfx.priv_inst_irq.num_types = 1;
+ adev->gfx.priv_inst_irq.funcs = &gfx_v12_0_priv_inst_irq_funcs;
+}
+
+static void gfx_v12_0_set_imu_funcs(struct amdgpu_device *adev)
+{
+ if (adev->flags & AMD_IS_APU)
+ adev->gfx.imu.mode = MISSION_MODE;
+ else
+ adev->gfx.imu.mode = DEBUG_MODE;
+
+ adev->gfx.imu.funcs = &gfx_v12_0_imu_funcs;
+}
+
+static void gfx_v12_0_set_rlc_funcs(struct amdgpu_device *adev)
+{
+ adev->gfx.rlc.funcs = &gfx_v12_0_rlc_funcs;
+}
+
+static void gfx_v12_0_set_mqd_funcs(struct amdgpu_device *adev)
+{
+ /* set gfx eng mqd */
+ adev->mqds[AMDGPU_HW_IP_GFX].mqd_size =
+ sizeof(struct v12_gfx_mqd);
+ adev->mqds[AMDGPU_HW_IP_GFX].init_mqd =
+ gfx_v12_0_gfx_mqd_init;
+ /* set compute eng mqd */
+ adev->mqds[AMDGPU_HW_IP_COMPUTE].mqd_size =
+ sizeof(struct v12_compute_mqd);
+ adev->mqds[AMDGPU_HW_IP_COMPUTE].init_mqd =
+ gfx_v12_0_compute_mqd_init;
+}
+
+static void gfx_v12_0_set_user_wgp_inactive_bitmap_per_sh(struct amdgpu_device *adev,
+ u32 bitmap)
+{
+ u32 data;
+
+ if (!bitmap)
+ return;
+
+ data = bitmap << GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_WGPS__SHIFT;
+ data &= GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_WGPS_MASK;
+
+ WREG32_SOC15(GC, 0, regGC_USER_SHADER_ARRAY_CONFIG, data);
+}
+
+static u32 gfx_v12_0_get_wgp_active_bitmap_per_sh(struct amdgpu_device *adev)
+{
+ u32 data, wgp_bitmask;
+ data = RREG32_SOC15(GC, 0, regCC_GC_SHADER_ARRAY_CONFIG);
+ data |= RREG32_SOC15(GC, 0, regGC_USER_SHADER_ARRAY_CONFIG);
+
+ data &= CC_GC_SHADER_ARRAY_CONFIG__INACTIVE_WGPS_MASK;
+ data >>= CC_GC_SHADER_ARRAY_CONFIG__INACTIVE_WGPS__SHIFT;
+
+ wgp_bitmask =
+ amdgpu_gfx_create_bitmask(adev->gfx.config.max_cu_per_sh >> 1);
+
+ return (~data) & wgp_bitmask;
+}
+
+static u32 gfx_v12_0_get_cu_active_bitmap_per_sh(struct amdgpu_device *adev)
+{
+ u32 wgp_idx, wgp_active_bitmap;
+ u32 cu_bitmap_per_wgp, cu_active_bitmap;
+
+ wgp_active_bitmap = gfx_v12_0_get_wgp_active_bitmap_per_sh(adev);
+ cu_active_bitmap = 0;
+
+ for (wgp_idx = 0; wgp_idx < 16; wgp_idx++) {
+ /* if there is one WGP enabled, it means 2 CUs will be enabled */
+ cu_bitmap_per_wgp = 3 << (2 * wgp_idx);
+ if (wgp_active_bitmap & (1 << wgp_idx))
+ cu_active_bitmap |= cu_bitmap_per_wgp;
+ }
+
+ return cu_active_bitmap;
+}
+
+static int gfx_v12_0_get_cu_info(struct amdgpu_device *adev,
+ struct amdgpu_cu_info *cu_info)
+{
+ int i, j, k, counter, active_cu_number = 0;
+ u32 mask, bitmap;
+ unsigned disable_masks[8 * 2];
+
+ if (!adev || !cu_info)
+ return -EINVAL;
+
+ amdgpu_gfx_parse_disable_cu(disable_masks, 8, 2);
+
+ mutex_lock(&adev->grbm_idx_mutex);
+ for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
+ for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
+ bitmap = i * adev->gfx.config.max_sh_per_se + j;
+ if (!((gfx_v12_0_get_sa_active_bitmap(adev) >> bitmap) & 1))
+ continue;
+ mask = 1;
+ counter = 0;
+ gfx_v12_0_select_se_sh(adev, i, j, 0xffffffff, 0);
+ if (i < 8 && j < 2)
+ gfx_v12_0_set_user_wgp_inactive_bitmap_per_sh(
+ adev, disable_masks[i * 2 + j]);
+ bitmap = gfx_v12_0_get_cu_active_bitmap_per_sh(adev);
+
+ /**
+ * GFX12 could support more than 4 SEs, while the bitmap
+ * in cu_info struct is 4x4 and ioctl interface struct
+ * drm_amdgpu_info_device should keep stable.
+ * So we use last two columns of bitmap to store cu mask for
+ * SEs 4 to 7, the layout of the bitmap is as below:
+ * SE0: {SH0,SH1} --> {bitmap[0][0], bitmap[0][1]}
+ * SE1: {SH0,SH1} --> {bitmap[1][0], bitmap[1][1]}
+ * SE2: {SH0,SH1} --> {bitmap[2][0], bitmap[2][1]}
+ * SE3: {SH0,SH1} --> {bitmap[3][0], bitmap[3][1]}
+ * SE4: {SH0,SH1} --> {bitmap[0][2], bitmap[0][3]}
+ * SE5: {SH0,SH1} --> {bitmap[1][2], bitmap[1][3]}
+ * SE6: {SH0,SH1} --> {bitmap[2][2], bitmap[2][3]}
+ * SE7: {SH0,SH1} --> {bitmap[3][2], bitmap[3][3]}
+ */
+ cu_info->bitmap[0][i % 4][j + (i / 4) * 2] = bitmap;
+
+ for (k = 0; k < adev->gfx.config.max_cu_per_sh; k++) {
+ if (bitmap & mask)
+ counter++;
+
+ mask <<= 1;
+ }
+ active_cu_number += counter;
+ }
+ }
+ gfx_v12_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, 0);
+ mutex_unlock(&adev->grbm_idx_mutex);
+
+ cu_info->number = active_cu_number;
+ cu_info->simd_per_cu = NUM_SIMD_PER_CU;
+
+ return 0;
+}
+
+const struct amdgpu_ip_block_version gfx_v12_0_ip_block = {
+ .type = AMD_IP_BLOCK_TYPE_GFX,
+ .major = 12,
+ .minor = 0,
+ .rev = 0,
+ .funcs = &gfx_v12_0_ip_funcs,
+};
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.h b/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.h
new file mode 100644
index 000000000000..f7184b2dc4e8
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.h
@@ -0,0 +1,31 @@
+/*
+ * Copyright 2023 dvanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#ifndef __GFX_V12_0_H__
+#define __GFX_V12_0_H__
+
+extern const struct amdgpu_ip_block_version gfx_v12_0_ip_block;
+
+int gfx_v12_0_request_gfx_index_mutex(struct amdgpu_device *adev,
+ bool req);
+#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v6_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v6_0.c
index ca74638dec9b..80565392313f 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v6_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v6_0.c
@@ -28,19 +28,33 @@
#include "amdgpu_gfx.h"
#include "amdgpu_ucode.h"
#include "clearstate_si.h"
+#include "si.h"
+#include "sid.h"
+
#include "bif/bif_3_0_d.h"
#include "bif/bif_3_0_sh_mask.h"
+
#include "oss/oss_1_0_d.h"
#include "oss/oss_1_0_sh_mask.h"
+
#include "gca/gfx_6_0_d.h"
#include "gca/gfx_6_0_sh_mask.h"
+#include "gca/gfx_7_2_enum.h"
+
#include "gmc/gmc_6_0_d.h"
#include "gmc/gmc_6_0_sh_mask.h"
+
#include "dce/dce_6_0_d.h"
#include "dce/dce_6_0_sh_mask.h"
-#include "gca/gfx_7_2_enum.h"
+
#include "si_enums.h"
-#include "si.h"
+
+#define TAHITI_GB_ADDR_CONFIG_GOLDEN 0x12011003
+#define VERDE_GB_ADDR_CONFIG_GOLDEN 0x12010002
+#define HAINAN_GB_ADDR_CONFIG_GOLDEN 0x02010001
+
+#define GFX6_NUM_GFX_RINGS 1
+#define GFX6_NUM_COMPUTE_RINGS 2
static void gfx_v6_0_set_ring_funcs(struct amdgpu_device *adev);
static void gfx_v6_0_set_irq_funcs(struct amdgpu_device *adev);
@@ -72,7 +86,7 @@ MODULE_FIRMWARE("amdgpu/hainan_ce.bin");
MODULE_FIRMWARE("amdgpu/hainan_rlc.bin");
static u32 gfx_v6_0_get_csb_size(struct amdgpu_device *adev);
-static void gfx_v6_0_get_csb_buffer(struct amdgpu_device *adev, volatile u32 *buffer);
+static void gfx_v6_0_get_csb_buffer(struct amdgpu_device *adev, u32 *buffer);
//static void gfx_v6_0_init_cp_pg_table(struct amdgpu_device *adev);
static void gfx_v6_0_init_pg(struct amdgpu_device *adev);
@@ -311,7 +325,6 @@ static const u32 verde_rlc_save_restore_register_list[] =
static int gfx_v6_0_init_microcode(struct amdgpu_device *adev)
{
const char *chip_name;
- char fw_name[30];
int err;
const struct gfx_firmware_header_v1_0 *cp_hdr;
const struct rlc_firmware_header_v1_0 *rlc_hdr;
@@ -337,59 +350,49 @@ static int gfx_v6_0_init_microcode(struct amdgpu_device *adev)
default: BUG();
}
- snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_pfp.bin", chip_name);
- err = request_firmware(&adev->gfx.pfp_fw, fw_name, adev->dev);
- if (err)
- goto out;
- err = amdgpu_ucode_validate(adev->gfx.pfp_fw);
+ err = amdgpu_ucode_request(adev, &adev->gfx.pfp_fw,
+ AMDGPU_UCODE_REQUIRED,
+ "amdgpu/%s_pfp.bin", chip_name);
if (err)
goto out;
cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.pfp_fw->data;
adev->gfx.pfp_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
adev->gfx.pfp_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
- snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_me.bin", chip_name);
- err = request_firmware(&adev->gfx.me_fw, fw_name, adev->dev);
- if (err)
- goto out;
- err = amdgpu_ucode_validate(adev->gfx.me_fw);
+ err = amdgpu_ucode_request(adev, &adev->gfx.me_fw,
+ AMDGPU_UCODE_REQUIRED,
+ "amdgpu/%s_me.bin", chip_name);
if (err)
goto out;
cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.me_fw->data;
adev->gfx.me_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
adev->gfx.me_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
- snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_ce.bin", chip_name);
- err = request_firmware(&adev->gfx.ce_fw, fw_name, adev->dev);
- if (err)
- goto out;
- err = amdgpu_ucode_validate(adev->gfx.ce_fw);
+ err = amdgpu_ucode_request(adev, &adev->gfx.ce_fw,
+ AMDGPU_UCODE_REQUIRED,
+ "amdgpu/%s_ce.bin", chip_name);
if (err)
goto out;
cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.ce_fw->data;
adev->gfx.ce_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
adev->gfx.ce_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
- snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_rlc.bin", chip_name);
- err = request_firmware(&adev->gfx.rlc_fw, fw_name, adev->dev);
+ err = amdgpu_ucode_request(adev, &adev->gfx.rlc_fw,
+ AMDGPU_UCODE_REQUIRED,
+ "amdgpu/%s_rlc.bin", chip_name);
if (err)
goto out;
- err = amdgpu_ucode_validate(adev->gfx.rlc_fw);
rlc_hdr = (const struct rlc_firmware_header_v1_0 *)adev->gfx.rlc_fw->data;
adev->gfx.rlc_fw_version = le32_to_cpu(rlc_hdr->header.ucode_version);
adev->gfx.rlc_feature_version = le32_to_cpu(rlc_hdr->ucode_feature_version);
out:
if (err) {
- pr_err("gfx6: Failed to load firmware \"%s\"\n", fw_name);
- release_firmware(adev->gfx.pfp_fw);
- adev->gfx.pfp_fw = NULL;
- release_firmware(adev->gfx.me_fw);
- adev->gfx.me_fw = NULL;
- release_firmware(adev->gfx.ce_fw);
- adev->gfx.ce_fw = NULL;
- release_firmware(adev->gfx.rlc_fw);
- adev->gfx.rlc_fw = NULL;
+ pr_err("gfx6: Failed to load firmware %s gfx firmware\n", chip_name);
+ amdgpu_ucode_release(&adev->gfx.pfp_fw);
+ amdgpu_ucode_release(&adev->gfx.me_fw);
+ amdgpu_ucode_release(&adev->gfx.ce_fw);
+ amdgpu_ucode_release(&adev->gfx.rlc_fw);
}
return err;
}
@@ -1299,7 +1302,7 @@ static void gfx_v6_0_tiling_mode_table_init(struct amdgpu_device *adev)
}
static void gfx_v6_0_select_se_sh(struct amdgpu_device *adev, u32 se_num,
- u32 sh_num, u32 instance)
+ u32 sh_num, u32 instance, int xcc_id)
{
u32 data;
@@ -1452,12 +1455,12 @@ static void gfx_v6_0_write_harvested_raster_configs(struct amdgpu_device *adev,
}
/* GRBM_GFX_INDEX has a different offset on SI */
- gfx_v6_0_select_se_sh(adev, se, 0xffffffff, 0xffffffff);
+ gfx_v6_0_select_se_sh(adev, se, 0xffffffff, 0xffffffff, 0);
WREG32(mmPA_SC_RASTER_CONFIG, raster_config_se);
}
/* GRBM_GFX_INDEX has a different offset on SI */
- gfx_v6_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
+ gfx_v6_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, 0);
}
static void gfx_v6_0_setup_rb(struct amdgpu_device *adev)
@@ -1473,14 +1476,14 @@ static void gfx_v6_0_setup_rb(struct amdgpu_device *adev)
mutex_lock(&adev->grbm_idx_mutex);
for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
- gfx_v6_0_select_se_sh(adev, i, j, 0xffffffff);
+ gfx_v6_0_select_se_sh(adev, i, j, 0xffffffff, 0);
data = gfx_v6_0_get_rb_active_bitmap(adev);
active_rbs |= data <<
((i * adev->gfx.config.max_sh_per_se + j) *
rb_bitmap_width_per_sh);
}
}
- gfx_v6_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
+ gfx_v6_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, 0);
adev->gfx.config.backend_enable_mask = active_rbs;
adev->gfx.config.num_rbs = hweight32(active_rbs);
@@ -1501,7 +1504,7 @@ static void gfx_v6_0_setup_rb(struct amdgpu_device *adev)
/* cache the values for userspace */
for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
- gfx_v6_0_select_se_sh(adev, i, j, 0xffffffff);
+ gfx_v6_0_select_se_sh(adev, i, j, 0xffffffff, 0);
adev->gfx.config.rb_config[i][j].rb_backend_disable =
RREG32(mmCC_RB_BACKEND_DISABLE);
adev->gfx.config.rb_config[i][j].user_rb_backend_disable =
@@ -1510,7 +1513,7 @@ static void gfx_v6_0_setup_rb(struct amdgpu_device *adev)
RREG32(mmPA_SC_RASTER_CONFIG);
}
}
- gfx_v6_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
+ gfx_v6_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, 0);
mutex_unlock(&adev->grbm_idx_mutex);
}
@@ -1549,7 +1552,7 @@ static void gfx_v6_0_setup_spi(struct amdgpu_device *adev)
mutex_lock(&adev->grbm_idx_mutex);
for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
- gfx_v6_0_select_se_sh(adev, i, j, 0xffffffff);
+ gfx_v6_0_select_se_sh(adev, i, j, 0xffffffff, 0);
data = RREG32(mmSPI_STATIC_THREAD_MGMT_3);
active_cu = gfx_v6_0_get_cu_enabled(adev);
@@ -1564,7 +1567,7 @@ static void gfx_v6_0_setup_spi(struct amdgpu_device *adev)
}
}
}
- gfx_v6_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
+ gfx_v6_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, 0);
mutex_unlock(&adev->grbm_idx_mutex);
}
@@ -1732,10 +1735,14 @@ static void gfx_v6_0_constants_init(struct amdgpu_device *adev)
gfx_v6_0_get_cu_info(adev);
gfx_v6_0_config_init(adev);
- WREG32(mmCP_QUEUE_THRESHOLDS, ((0x16 << CP_QUEUE_THRESHOLDS__ROQ_IB1_START__SHIFT) |
- (0x2b << CP_QUEUE_THRESHOLDS__ROQ_IB2_START__SHIFT)));
- WREG32(mmCP_MEQ_THRESHOLDS, (0x30 << CP_MEQ_THRESHOLDS__MEQ1_START__SHIFT) |
- (0x60 << CP_MEQ_THRESHOLDS__MEQ2_START__SHIFT));
+ WREG32(mmCP_QUEUE_THRESHOLDS,
+ ((0x16 << CP_QUEUE_THRESHOLDS__ROQ_IB1_START__SHIFT) |
+ (0x2b << CP_QUEUE_THRESHOLDS__ROQ_IB2_START__SHIFT)));
+
+ /* set HW defaults for 3D engine */
+ WREG32(mmCP_MEQ_THRESHOLDS,
+ (0x30 << CP_MEQ_THRESHOLDS__MEQ1_START__SHIFT) |
+ (0x60 << CP_MEQ_THRESHOLDS__MEQ2_START__SHIFT));
sx_debug_1 = RREG32(mmSX_DEBUG_1);
WREG32(mmSX_DEBUG_1, sx_debug_1);
@@ -1778,39 +1785,26 @@ static void gfx_v6_0_constants_init(struct amdgpu_device *adev)
udelay(50);
}
-
-static void gfx_v6_0_scratch_init(struct amdgpu_device *adev)
-{
- adev->gfx.scratch.num_reg = 8;
- adev->gfx.scratch.reg_base = mmSCRATCH_REG0;
- adev->gfx.scratch.free_mask = (1u << adev->gfx.scratch.num_reg) - 1;
-}
-
static int gfx_v6_0_ring_test_ring(struct amdgpu_ring *ring)
{
struct amdgpu_device *adev = ring->adev;
- uint32_t scratch;
uint32_t tmp = 0;
unsigned i;
int r;
- r = amdgpu_gfx_scratch_get(adev, &scratch);
- if (r)
- return r;
-
- WREG32(scratch, 0xCAFEDEAD);
+ WREG32(mmSCRATCH_REG0, 0xCAFEDEAD);
r = amdgpu_ring_alloc(ring, 3);
if (r)
- goto error_free_scratch;
+ return r;
amdgpu_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG, 1));
- amdgpu_ring_write(ring, (scratch - PACKET3_SET_CONFIG_REG_START));
+ amdgpu_ring_write(ring, mmSCRATCH_REG0 - PACKET3_SET_CONFIG_REG_START);
amdgpu_ring_write(ring, 0xDEADBEEF);
amdgpu_ring_commit(ring);
for (i = 0; i < adev->usec_timeout; i++) {
- tmp = RREG32(scratch);
+ tmp = RREG32(mmSCRATCH_REG0);
if (tmp == 0xDEADBEEF)
break;
udelay(1);
@@ -1818,9 +1812,6 @@ static int gfx_v6_0_ring_test_ring(struct amdgpu_ring *ring)
if (i >= adev->usec_timeout)
r = -ETIMEDOUT;
-
-error_free_scratch:
- amdgpu_gfx_scratch_free(adev, scratch);
return r;
}
@@ -1903,50 +1894,42 @@ static void gfx_v6_0_ring_emit_ib(struct amdgpu_ring *ring,
static int gfx_v6_0_ring_test_ib(struct amdgpu_ring *ring, long timeout)
{
struct amdgpu_device *adev = ring->adev;
- struct amdgpu_ib ib;
struct dma_fence *f = NULL;
- uint32_t scratch;
+ struct amdgpu_ib ib;
uint32_t tmp = 0;
long r;
- r = amdgpu_gfx_scratch_get(adev, &scratch);
- if (r)
- return r;
-
- WREG32(scratch, 0xCAFEDEAD);
+ WREG32(mmSCRATCH_REG0, 0xCAFEDEAD);
memset(&ib, 0, sizeof(ib));
- r = amdgpu_ib_get(adev, NULL, 256,
- AMDGPU_IB_POOL_DIRECT, &ib);
+ r = amdgpu_ib_get(adev, NULL, 256, AMDGPU_IB_POOL_DIRECT, &ib);
if (r)
- goto err1;
+ return r;
ib.ptr[0] = PACKET3(PACKET3_SET_CONFIG_REG, 1);
- ib.ptr[1] = ((scratch - PACKET3_SET_CONFIG_REG_START));
+ ib.ptr[1] = mmSCRATCH_REG0 - PACKET3_SET_CONFIG_REG_START;
ib.ptr[2] = 0xDEADBEEF;
ib.length_dw = 3;
r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f);
if (r)
- goto err2;
+ goto error;
r = dma_fence_wait_timeout(f, false, timeout);
if (r == 0) {
r = -ETIMEDOUT;
- goto err2;
+ goto error;
} else if (r < 0) {
- goto err2;
+ goto error;
}
- tmp = RREG32(scratch);
+ tmp = RREG32(mmSCRATCH_REG0);
if (tmp == 0xDEADBEEF)
r = 0;
else
r = -EINVAL;
-err2:
- amdgpu_ib_free(adev, &ib, NULL);
+error:
+ amdgpu_ib_free(&ib, NULL);
dma_fence_put(f);
-err1:
- amdgpu_gfx_scratch_free(adev, scratch);
return r;
}
@@ -2117,7 +2100,7 @@ static int gfx_v6_0_cp_gfx_resume(struct amdgpu_device *adev)
WREG32(mmCP_RB0_WPTR, ring->wptr);
/* set the wb address whether it's enabled or not */
- rptr_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4);
+ rptr_addr = ring->rptr_gpu_addr;
WREG32(mmCP_RB0_RPTR_ADDR, lower_32_bits(rptr_addr));
WREG32(mmCP_RB0_RPTR_ADDR_HI, upper_32_bits(rptr_addr) & 0xFF);
@@ -2139,7 +2122,7 @@ static int gfx_v6_0_cp_gfx_resume(struct amdgpu_device *adev)
static u64 gfx_v6_0_ring_get_rptr(struct amdgpu_ring *ring)
{
- return ring->adev->wb.wb[ring->rptr_offs];
+ return *ring->rptr_cpu_addr;
}
static u64 gfx_v6_0_ring_get_wptr(struct amdgpu_ring *ring)
@@ -2203,7 +2186,7 @@ static int gfx_v6_0_cp_compute_resume(struct amdgpu_device *adev)
ring->wptr = 0;
WREG32(mmCP_RB1_WPTR, ring->wptr);
- rptr_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4);
+ rptr_addr = ring->rptr_gpu_addr;
WREG32(mmCP_RB1_RPTR_ADDR, lower_32_bits(rptr_addr));
WREG32(mmCP_RB1_RPTR_ADDR_HI, upper_32_bits(rptr_addr) & 0xFF);
@@ -2222,7 +2205,7 @@ static int gfx_v6_0_cp_compute_resume(struct amdgpu_device *adev)
WREG32(mmCP_RB2_CNTL, tmp | CP_RB2_CNTL__RB_RPTR_WR_ENA_MASK);
ring->wptr = 0;
WREG32(mmCP_RB2_WPTR, ring->wptr);
- rptr_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4);
+ rptr_addr = ring->rptr_gpu_addr;
WREG32(mmCP_RB2_RPTR_ADDR, lower_32_bits(rptr_addr));
WREG32(mmCP_RB2_RPTR_ADDR_HI, upper_32_bits(rptr_addr) & 0xFF);
@@ -2371,7 +2354,7 @@ static void gfx_v6_0_ring_emit_wreg(struct amdgpu_ring *ring,
static int gfx_v6_0_rlc_init(struct amdgpu_device *adev)
{
const u32 *src_ptr;
- volatile u32 *dst_ptr;
+ u32 *dst_ptr;
u32 dws;
u64 reg_list_mc_addr;
const struct cs_section_def *cs_data;
@@ -2399,7 +2382,8 @@ static int gfx_v6_0_rlc_init(struct amdgpu_device *adev)
dws = adev->gfx.rlc.clear_state_size + (256 / 4);
r = amdgpu_bo_create_reserved(adev, dws * 4, PAGE_SIZE,
- AMDGPU_GEM_DOMAIN_VRAM,
+ AMDGPU_GEM_DOMAIN_VRAM |
+ AMDGPU_GEM_DOMAIN_GTT,
&adev->gfx.rlc.clear_state_obj,
&adev->gfx.rlc.clear_state_gpu_addr,
(void **)&adev->gfx.rlc.cs_ptr);
@@ -2428,7 +2412,7 @@ static void gfx_v6_0_enable_lbpw(struct amdgpu_device *adev, bool enable)
WREG32_FIELD(RLC_LB_CNTL, LOAD_BALANCE_ENABLE, enable ? 1 : 0);
if (!enable) {
- gfx_v6_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
+ gfx_v6_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, 0);
WREG32(mmSPI_LB_CU_MASK, 0x00ff);
}
}
@@ -2871,47 +2855,23 @@ static u32 gfx_v6_0_get_csb_size(struct amdgpu_device *adev)
return count;
}
-static void gfx_v6_0_get_csb_buffer(struct amdgpu_device *adev,
- volatile u32 *buffer)
+static void gfx_v6_0_get_csb_buffer(struct amdgpu_device *adev, u32 *buffer)
{
- u32 count = 0, i;
- const struct cs_section_def *sect = NULL;
- const struct cs_extent_def *ext = NULL;
+ u32 count = 0;
if (adev->gfx.rlc.cs_data == NULL)
return;
if (buffer == NULL)
return;
- buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
- buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
- buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CONTEXT_CONTROL, 1));
- buffer[count++] = cpu_to_le32(0x80000000);
- buffer[count++] = cpu_to_le32(0x80000000);
-
- for (sect = adev->gfx.rlc.cs_data; sect->section != NULL; ++sect) {
- for (ext = sect->section; ext->extent != NULL; ++ext) {
- if (sect->id == SECT_CONTEXT) {
- buffer[count++] =
- cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, ext->reg_count));
- buffer[count++] = cpu_to_le32(ext->reg_index - 0xa000);
- for (i = 0; i < ext->reg_count; i++)
- buffer[count++] = cpu_to_le32(ext->extent[i]);
- } else {
- return;
- }
- }
- }
+ count = amdgpu_gfx_csb_preamble_start(buffer);
+ count = amdgpu_gfx_csb_data_parser(adev, buffer, count);
buffer[count++] = cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, 1));
buffer[count++] = cpu_to_le32(mmPA_SC_RASTER_CONFIG - PACKET3_SET_CONTEXT_REG_START);
buffer[count++] = cpu_to_le32(adev->gfx.config.rb_config[0][0].raster_config);
- buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
- buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_END_CLEAR_STATE);
-
- buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CLEAR_STATE, 0));
- buffer[count++] = cpu_to_le32(0);
+ amdgpu_gfx_csb_preamble_end(buffer, count);
}
static void gfx_v6_0_init_pg(struct amdgpu_device *adev)
@@ -3005,7 +2965,7 @@ static void wave_read_regs(struct amdgpu_device *adev, uint32_t simd,
*(out++) = RREG32(mmSQ_IND_DATA);
}
-static void gfx_v6_0_read_wave_data(struct amdgpu_device *adev, uint32_t simd, uint32_t wave, uint32_t *dst, int *no_fields)
+static void gfx_v6_0_read_wave_data(struct amdgpu_device *adev, uint32_t xcc_id, uint32_t simd, uint32_t wave, uint32_t *dst, int *no_fields)
{
/* type 0 wave data */
dst[(*no_fields)++] = 0;
@@ -3027,9 +2987,10 @@ static void gfx_v6_0_read_wave_data(struct amdgpu_device *adev, uint32_t simd, u
dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TMA_HI);
dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_IB_DBG0);
dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_M0);
+ dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_MODE);
}
-static void gfx_v6_0_read_wave_sgprs(struct amdgpu_device *adev, uint32_t simd,
+static void gfx_v6_0_read_wave_sgprs(struct amdgpu_device *adev, uint32_t xcc_id, uint32_t simd,
uint32_t wave, uint32_t start,
uint32_t size, uint32_t *dst)
{
@@ -3039,7 +3000,7 @@ static void gfx_v6_0_read_wave_sgprs(struct amdgpu_device *adev, uint32_t simd,
}
static void gfx_v6_0_select_me_pipe_q(struct amdgpu_device *adev,
- u32 me, u32 pipe, u32 q, u32 vm)
+ u32 me, u32 pipe, u32 q, u32 vm, u32 xcc_id)
{
DRM_INFO("Not implemented\n");
}
@@ -3060,10 +3021,11 @@ static const struct amdgpu_rlc_funcs gfx_v6_0_rlc_funcs = {
.start = gfx_v6_0_rlc_start
};
-static int gfx_v6_0_early_init(void *handle)
+static int gfx_v6_0_early_init(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
+ adev->gfx.xcc_mask = 1;
adev->gfx.num_gfx_rings = GFX6_NUM_GFX_RINGS;
adev->gfx.num_compute_rings = min(amdgpu_gfx_get_num_kcq(adev),
GFX6_NUM_COMPUTE_RINGS);
@@ -3075,10 +3037,10 @@ static int gfx_v6_0_early_init(void *handle)
return 0;
}
-static int gfx_v6_0_sw_init(void *handle)
+static int gfx_v6_0_sw_init(struct amdgpu_ip_block *ip_block)
{
struct amdgpu_ring *ring;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
int i, r;
r = amdgpu_irq_add_id(adev, AMDGPU_IRQ_CLIENTID_LEGACY, 181, &adev->gfx.eop_irq);
@@ -3093,8 +3055,6 @@ static int gfx_v6_0_sw_init(void *handle)
if (r)
return r;
- gfx_v6_0_scratch_init(adev);
-
r = gfx_v6_0_init_microcode(adev);
if (r) {
DRM_ERROR("Failed to load gfx firmware!\n");
@@ -3111,10 +3071,10 @@ static int gfx_v6_0_sw_init(void *handle)
ring = &adev->gfx.gfx_ring[i];
ring->ring_obj = NULL;
sprintf(ring->name, "gfx");
- r = amdgpu_ring_init(adev, ring, 1024,
+ r = amdgpu_ring_init(adev, ring, 2048,
&adev->gfx.eop_irq,
AMDGPU_CP_IRQ_GFX_ME0_PIPE0_EOP,
- AMDGPU_RING_PRIO_DEFAULT);
+ AMDGPU_RING_PRIO_DEFAULT, NULL);
if (r)
return r;
}
@@ -3137,18 +3097,23 @@ static int gfx_v6_0_sw_init(void *handle)
irq_type = AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP + ring->pipe;
r = amdgpu_ring_init(adev, ring, 1024,
&adev->gfx.eop_irq, irq_type,
- AMDGPU_RING_PRIO_DEFAULT);
+ AMDGPU_RING_PRIO_DEFAULT, NULL);
if (r)
return r;
}
+ adev->gfx.gfx_supported_reset =
+ amdgpu_get_soft_full_reset_mask(&adev->gfx.gfx_ring[0]);
+ adev->gfx.compute_supported_reset =
+ amdgpu_get_soft_full_reset_mask(&adev->gfx.compute_ring[0]);
+
return r;
}
-static int gfx_v6_0_sw_fini(void *handle)
+static int gfx_v6_0_sw_fini(struct amdgpu_ip_block *ip_block)
{
int i;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
for (i = 0; i < adev->gfx.num_gfx_rings; i++)
amdgpu_ring_fini(&adev->gfx.gfx_ring[i]);
@@ -3160,10 +3125,10 @@ static int gfx_v6_0_sw_fini(void *handle)
return 0;
}
-static int gfx_v6_0_hw_init(void *handle)
+static int gfx_v6_0_hw_init(struct amdgpu_ip_block *ip_block)
{
int r;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
gfx_v6_0_constants_init(adev);
@@ -3180,9 +3145,9 @@ static int gfx_v6_0_hw_init(void *handle)
return r;
}
-static int gfx_v6_0_hw_fini(void *handle)
+static int gfx_v6_0_hw_fini(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
gfx_v6_0_cp_enable(adev, false);
adev->gfx.rlc.funcs->stop(adev);
@@ -3191,23 +3156,19 @@ static int gfx_v6_0_hw_fini(void *handle)
return 0;
}
-static int gfx_v6_0_suspend(void *handle)
+static int gfx_v6_0_suspend(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
-
- return gfx_v6_0_hw_fini(adev);
+ return gfx_v6_0_hw_fini(ip_block);
}
-static int gfx_v6_0_resume(void *handle)
+static int gfx_v6_0_resume(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
-
- return gfx_v6_0_hw_init(adev);
+ return gfx_v6_0_hw_init(ip_block);
}
-static bool gfx_v6_0_is_idle(void *handle)
+static bool gfx_v6_0_is_idle(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
if (RREG32(mmGRBM_STATUS) & GRBM_STATUS__GUI_ACTIVE_MASK)
return false;
@@ -3215,24 +3176,19 @@ static bool gfx_v6_0_is_idle(void *handle)
return true;
}
-static int gfx_v6_0_wait_for_idle(void *handle)
+static int gfx_v6_0_wait_for_idle(struct amdgpu_ip_block *ip_block)
{
unsigned i;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
for (i = 0; i < adev->usec_timeout; i++) {
- if (gfx_v6_0_is_idle(handle))
+ if (gfx_v6_0_is_idle(ip_block))
return 0;
udelay(1);
}
return -ETIMEDOUT;
}
-static int gfx_v6_0_soft_reset(void *handle)
-{
- return 0;
-}
-
static void gfx_v6_0_set_gfx_eop_interrupt_state(struct amdgpu_device *adev,
enum amdgpu_interrupt_state state)
{
@@ -3420,11 +3376,11 @@ static int gfx_v6_0_priv_inst_irq(struct amdgpu_device *adev,
return 0;
}
-static int gfx_v6_0_set_clockgating_state(void *handle,
+static int gfx_v6_0_set_clockgating_state(struct amdgpu_ip_block *ip_block,
enum amd_clockgating_state state)
{
bool gate = false;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
if (state == AMD_CG_STATE_GATE)
gate = true;
@@ -3442,11 +3398,11 @@ static int gfx_v6_0_set_clockgating_state(void *handle,
return 0;
}
-static int gfx_v6_0_set_powergating_state(void *handle,
+static int gfx_v6_0_set_powergating_state(struct amdgpu_ip_block *ip_block,
enum amd_powergating_state state)
{
bool gate = false;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
if (state == AMD_PG_STATE_GATE)
gate = true;
@@ -3482,7 +3438,6 @@ static void gfx_v6_0_emit_mem_sync(struct amdgpu_ring *ring)
static const struct amd_ip_funcs gfx_v6_0_ip_funcs = {
.name = "gfx_v6_0",
.early_init = gfx_v6_0_early_init,
- .late_init = NULL,
.sw_init = gfx_v6_0_sw_init,
.sw_fini = gfx_v6_0_sw_fini,
.hw_init = gfx_v6_0_hw_init,
@@ -3491,7 +3446,6 @@ static const struct amd_ip_funcs gfx_v6_0_ip_funcs = {
.resume = gfx_v6_0_resume,
.is_idle = gfx_v6_0_is_idle,
.wait_for_idle = gfx_v6_0_wait_for_idle,
- .soft_reset = gfx_v6_0_soft_reset,
.set_clockgating_state = gfx_v6_0_set_clockgating_state,
.set_powergating_state = gfx_v6_0_set_powergating_state,
};
@@ -3609,12 +3563,12 @@ static void gfx_v6_0_get_cu_info(struct amdgpu_device *adev)
mask = 1;
ao_bitmap = 0;
counter = 0;
- gfx_v6_0_select_se_sh(adev, i, j, 0xffffffff);
+ gfx_v6_0_select_se_sh(adev, i, j, 0xffffffff, 0);
if (i < 4 && j < 2)
gfx_v6_0_set_user_cu_inactive_bitmap(
adev, disable_masks[i * 2 + j]);
bitmap = gfx_v6_0_get_cu_enabled(adev);
- cu_info->bitmap[i][j] = bitmap;
+ cu_info->bitmap[0][i][j] = bitmap;
for (k = 0; k < adev->gfx.config.max_cu_per_sh; k++) {
if (bitmap & mask) {
@@ -3631,7 +3585,7 @@ static void gfx_v6_0_get_cu_info(struct amdgpu_device *adev)
}
}
- gfx_v6_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
+ gfx_v6_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, 0);
mutex_unlock(&adev->grbm_idx_mutex);
cu_info->number = active_cu_number;
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c
index a368724c3dfc..2b7aba22ecc1 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c
@@ -55,6 +55,9 @@
#define GFX7_NUM_GFX_RINGS 1
#define GFX7_MEC_HPD_SIZE 2048
+#define BONAIRE_GB_ADDR_CONFIG_GOLDEN 0x12010001
+#define HAWAII_GB_ADDR_CONFIG_GOLDEN 0x12011003
+
static void gfx_v7_0_set_ring_funcs(struct amdgpu_device *adev);
static void gfx_v7_0_set_irq_funcs(struct amdgpu_device *adev);
static void gfx_v7_0_set_gds_init(struct amdgpu_device *adev);
@@ -90,8 +93,7 @@ MODULE_FIRMWARE("amdgpu/mullins_ce.bin");
MODULE_FIRMWARE("amdgpu/mullins_rlc.bin");
MODULE_FIRMWARE("amdgpu/mullins_mec.bin");
-static const struct amdgpu_gds_reg_offset amdgpu_gds_reg_offset[] =
-{
+static const struct amdgpu_gds_reg_offset amdgpu_gds_reg_offset[] = {
{mmGDS_VMID0_BASE, mmGDS_VMID0_SIZE, mmGDS_GWS_VMID0, mmGDS_OA_VMID0},
{mmGDS_VMID1_BASE, mmGDS_VMID1_SIZE, mmGDS_GWS_VMID1, mmGDS_OA_VMID1},
{mmGDS_VMID2_BASE, mmGDS_VMID2_SIZE, mmGDS_GWS_VMID2, mmGDS_OA_VMID2},
@@ -110,8 +112,7 @@ static const struct amdgpu_gds_reg_offset amdgpu_gds_reg_offset[] =
{mmGDS_VMID15_BASE, mmGDS_VMID15_SIZE, mmGDS_GWS_VMID15, mmGDS_OA_VMID15}
};
-static const u32 spectre_rlc_save_restore_register_list[] =
-{
+static const u32 spectre_rlc_save_restore_register_list[] = {
(0x0e00 << 16) | (0xc12c >> 2),
0x00000000,
(0x0e00 << 16) | (0xc140 >> 2),
@@ -557,8 +558,7 @@ static const u32 spectre_rlc_save_restore_register_list[] =
(0x0e00 << 16) | (0x9600 >> 2),
};
-static const u32 kalindi_rlc_save_restore_register_list[] =
-{
+static const u32 kalindi_rlc_save_restore_register_list[] = {
(0x0e00 << 16) | (0xc12c >> 2),
0x00000000,
(0x0e00 << 16) | (0xc140 >> 2),
@@ -883,10 +883,20 @@ static const u32 kalindi_rlc_save_restore_register_list[] =
};
static u32 gfx_v7_0_get_csb_size(struct amdgpu_device *adev);
-static void gfx_v7_0_get_csb_buffer(struct amdgpu_device *adev, volatile u32 *buffer);
+static void gfx_v7_0_get_csb_buffer(struct amdgpu_device *adev, u32 *buffer);
static void gfx_v7_0_init_pg(struct amdgpu_device *adev);
static void gfx_v7_0_get_cu_info(struct amdgpu_device *adev);
+static void gfx_v7_0_free_microcode(struct amdgpu_device *adev)
+{
+ amdgpu_ucode_release(&adev->gfx.pfp_fw);
+ amdgpu_ucode_release(&adev->gfx.me_fw);
+ amdgpu_ucode_release(&adev->gfx.ce_fw);
+ amdgpu_ucode_release(&adev->gfx.mec_fw);
+ amdgpu_ucode_release(&adev->gfx.mec2_fw);
+ amdgpu_ucode_release(&adev->gfx.rlc_fw);
+}
+
/*
* Core functions
*/
@@ -902,7 +912,6 @@ static void gfx_v7_0_get_cu_info(struct amdgpu_device *adev);
static int gfx_v7_0_init_microcode(struct amdgpu_device *adev)
{
const char *chip_name;
- char fw_name[30];
int err;
DRM_DEBUG("\n");
@@ -923,92 +932,53 @@ static int gfx_v7_0_init_microcode(struct amdgpu_device *adev)
case CHIP_MULLINS:
chip_name = "mullins";
break;
- default: BUG();
+ default:
+ BUG();
}
- snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_pfp.bin", chip_name);
- err = request_firmware(&adev->gfx.pfp_fw, fw_name, adev->dev);
- if (err)
- goto out;
- err = amdgpu_ucode_validate(adev->gfx.pfp_fw);
+ err = amdgpu_ucode_request(adev, &adev->gfx.pfp_fw,
+ AMDGPU_UCODE_REQUIRED,
+ "amdgpu/%s_pfp.bin", chip_name);
if (err)
goto out;
- snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_me.bin", chip_name);
- err = request_firmware(&adev->gfx.me_fw, fw_name, adev->dev);
- if (err)
- goto out;
- err = amdgpu_ucode_validate(adev->gfx.me_fw);
+ err = amdgpu_ucode_request(adev, &adev->gfx.me_fw,
+ AMDGPU_UCODE_REQUIRED,
+ "amdgpu/%s_me.bin", chip_name);
if (err)
goto out;
- snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_ce.bin", chip_name);
- err = request_firmware(&adev->gfx.ce_fw, fw_name, adev->dev);
- if (err)
- goto out;
- err = amdgpu_ucode_validate(adev->gfx.ce_fw);
+ err = amdgpu_ucode_request(adev, &adev->gfx.ce_fw,
+ AMDGPU_UCODE_REQUIRED,
+ "amdgpu/%s_ce.bin", chip_name);
if (err)
goto out;
- snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec.bin", chip_name);
- err = request_firmware(&adev->gfx.mec_fw, fw_name, adev->dev);
- if (err)
- goto out;
- err = amdgpu_ucode_validate(adev->gfx.mec_fw);
+ err = amdgpu_ucode_request(adev, &adev->gfx.mec_fw,
+ AMDGPU_UCODE_REQUIRED,
+ "amdgpu/%s_mec.bin", chip_name);
if (err)
goto out;
if (adev->asic_type == CHIP_KAVERI) {
- snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec2.bin", chip_name);
- err = request_firmware(&adev->gfx.mec2_fw, fw_name, adev->dev);
- if (err)
- goto out;
- err = amdgpu_ucode_validate(adev->gfx.mec2_fw);
+ err = amdgpu_ucode_request(adev, &adev->gfx.mec2_fw,
+ AMDGPU_UCODE_REQUIRED,
+ "amdgpu/%s_mec2.bin", chip_name);
if (err)
goto out;
}
- snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_rlc.bin", chip_name);
- err = request_firmware(&adev->gfx.rlc_fw, fw_name, adev->dev);
- if (err)
- goto out;
- err = amdgpu_ucode_validate(adev->gfx.rlc_fw);
-
+ err = amdgpu_ucode_request(adev, &adev->gfx.rlc_fw,
+ AMDGPU_UCODE_REQUIRED,
+ "amdgpu/%s_rlc.bin", chip_name);
out:
if (err) {
- pr_err("gfx7: Failed to load firmware \"%s\"\n", fw_name);
- release_firmware(adev->gfx.pfp_fw);
- adev->gfx.pfp_fw = NULL;
- release_firmware(adev->gfx.me_fw);
- adev->gfx.me_fw = NULL;
- release_firmware(adev->gfx.ce_fw);
- adev->gfx.ce_fw = NULL;
- release_firmware(adev->gfx.mec_fw);
- adev->gfx.mec_fw = NULL;
- release_firmware(adev->gfx.mec2_fw);
- adev->gfx.mec2_fw = NULL;
- release_firmware(adev->gfx.rlc_fw);
- adev->gfx.rlc_fw = NULL;
+ pr_err("gfx7: Failed to load firmware %s gfx firmware\n", chip_name);
+ gfx_v7_0_free_microcode(adev);
}
return err;
}
-static void gfx_v7_0_free_microcode(struct amdgpu_device *adev)
-{
- release_firmware(adev->gfx.pfp_fw);
- adev->gfx.pfp_fw = NULL;
- release_firmware(adev->gfx.me_fw);
- adev->gfx.me_fw = NULL;
- release_firmware(adev->gfx.ce_fw);
- adev->gfx.ce_fw = NULL;
- release_firmware(adev->gfx.mec_fw);
- adev->gfx.mec_fw = NULL;
- release_firmware(adev->gfx.mec2_fw);
- adev->gfx.mec2_fw = NULL;
- release_firmware(adev->gfx.rlc_fw);
- adev->gfx.rlc_fw = NULL;
-}
-
/**
* gfx_v7_0_tiling_mode_table_init - init the hw tiling table
*
@@ -1582,11 +1552,12 @@ static void gfx_v7_0_tiling_mode_table_init(struct amdgpu_device *adev)
* @sh_num: sh block to address
* @instance: Certain registers are instanced per SE or SH.
* 0xffffffff means broadcast to all SEs or SHs (CIK).
- *
+ * @xcc_id: xcc accelerated compute core id
* Select which SE, SH combinations to address.
*/
static void gfx_v7_0_select_se_sh(struct amdgpu_device *adev,
- u32 se_num, u32 sh_num, u32 instance)
+ u32 se_num, u32 sh_num, u32 instance,
+ int xcc_id)
{
u32 data;
@@ -1766,13 +1737,13 @@ gfx_v7_0_write_harvested_raster_configs(struct amdgpu_device *adev,
}
/* GRBM_GFX_INDEX has a different offset on CI+ */
- gfx_v7_0_select_se_sh(adev, se, 0xffffffff, 0xffffffff);
+ gfx_v7_0_select_se_sh(adev, se, 0xffffffff, 0xffffffff, 0);
WREG32(mmPA_SC_RASTER_CONFIG, raster_config_se);
WREG32(mmPA_SC_RASTER_CONFIG_1, raster_config_1);
}
/* GRBM_GFX_INDEX has a different offset on CI+ */
- gfx_v7_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
+ gfx_v7_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, 0);
}
/**
@@ -1795,13 +1766,13 @@ static void gfx_v7_0_setup_rb(struct amdgpu_device *adev)
mutex_lock(&adev->grbm_idx_mutex);
for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
- gfx_v7_0_select_se_sh(adev, i, j, 0xffffffff);
+ gfx_v7_0_select_se_sh(adev, i, j, 0xffffffff, 0);
data = gfx_v7_0_get_rb_active_bitmap(adev);
active_rbs |= data << ((i * adev->gfx.config.max_sh_per_se + j) *
rb_bitmap_width_per_sh);
}
}
- gfx_v7_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
+ gfx_v7_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, 0);
adev->gfx.config.backend_enable_mask = active_rbs;
adev->gfx.config.num_rbs = hweight32(active_rbs);
@@ -1824,7 +1795,7 @@ static void gfx_v7_0_setup_rb(struct amdgpu_device *adev)
/* cache the values for userspace */
for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
- gfx_v7_0_select_se_sh(adev, i, j, 0xffffffff);
+ gfx_v7_0_select_se_sh(adev, i, j, 0xffffffff, 0);
adev->gfx.config.rb_config[i][j].rb_backend_disable =
RREG32(mmCC_RB_BACKEND_DISABLE);
adev->gfx.config.rb_config[i][j].user_rb_backend_disable =
@@ -1835,7 +1806,7 @@ static void gfx_v7_0_setup_rb(struct amdgpu_device *adev)
RREG32(mmPA_SC_RASTER_CONFIG_1);
}
}
- gfx_v7_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
+ gfx_v7_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, 0);
mutex_unlock(&adev->grbm_idx_mutex);
}
@@ -1877,7 +1848,7 @@ static void gfx_v7_0_init_compute_vmid(struct amdgpu_device *adev)
mutex_unlock(&adev->srbm_mutex);
/* Initialize all compute VMIDs to have no GDS, GWS, or OA
- acccess. These should be enabled by FW for target VMIDs. */
+ access. These should be enabled by FW for target VMIDs. */
for (i = adev->vm_manager.first_kfd_vmid; i < AMDGPU_NUM_VMID; i++) {
WREG32(amdgpu_gds_reg_offset[i].mem_base, 0);
WREG32(amdgpu_gds_reg_offset[i].mem_size, 0);
@@ -1945,7 +1916,7 @@ static void gfx_v7_0_constants_init(struct amdgpu_device *adev)
* making sure that the following register writes will be broadcasted
* to all the shaders
*/
- gfx_v7_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
+ gfx_v7_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, 0);
/* XXX SH_MEM regs */
/* where to put LDS, scratch, GPUVM in FSA64 space */
@@ -2049,26 +2020,6 @@ static void gfx_v7_0_constants_init(struct amdgpu_device *adev)
udelay(50);
}
-/*
- * GPU scratch registers helpers function.
- */
-/**
- * gfx_v7_0_scratch_init - setup driver info for CP scratch regs
- *
- * @adev: amdgpu_device pointer
- *
- * Set up the number and offset of the CP scratch registers.
- * NOTE: use of CP scratch registers is a legacy inferface and
- * is not used by default on newer asics (r6xx+). On newer asics,
- * memory buffers are used for fences rather than scratch regs.
- */
-static void gfx_v7_0_scratch_init(struct amdgpu_device *adev)
-{
- adev->gfx.scratch.num_reg = 8;
- adev->gfx.scratch.reg_base = mmSCRATCH_REG0;
- adev->gfx.scratch.free_mask = (1u << adev->gfx.scratch.num_reg) - 1;
-}
-
/**
* gfx_v7_0_ring_test_ring - basic gfx ring test
*
@@ -2082,41 +2033,33 @@ static void gfx_v7_0_scratch_init(struct amdgpu_device *adev)
static int gfx_v7_0_ring_test_ring(struct amdgpu_ring *ring)
{
struct amdgpu_device *adev = ring->adev;
- uint32_t scratch;
uint32_t tmp = 0;
unsigned i;
int r;
- r = amdgpu_gfx_scratch_get(adev, &scratch);
- if (r)
- return r;
-
- WREG32(scratch, 0xCAFEDEAD);
+ WREG32(mmSCRATCH_REG0, 0xCAFEDEAD);
r = amdgpu_ring_alloc(ring, 3);
if (r)
- goto error_free_scratch;
+ return r;
amdgpu_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
- amdgpu_ring_write(ring, (scratch - PACKET3_SET_UCONFIG_REG_START));
+ amdgpu_ring_write(ring, mmSCRATCH_REG0 - PACKET3_SET_UCONFIG_REG_START);
amdgpu_ring_write(ring, 0xDEADBEEF);
amdgpu_ring_commit(ring);
for (i = 0; i < adev->usec_timeout; i++) {
- tmp = RREG32(scratch);
+ tmp = RREG32(mmSCRATCH_REG0);
if (tmp == 0xDEADBEEF)
break;
udelay(1);
}
if (i >= adev->usec_timeout)
r = -ETIMEDOUT;
-
-error_free_scratch:
- amdgpu_gfx_scratch_free(adev, scratch);
return r;
}
/**
- * gfx_v7_0_ring_emit_hdp - emit an hdp flush on the cp
+ * gfx_v7_0_ring_emit_hdp_flush - emit an hdp flush on the cp
*
* @ring: amdgpu_ring structure holding ring information
*
@@ -2172,7 +2115,7 @@ static void gfx_v7_0_ring_emit_vgt_flush(struct amdgpu_ring *ring)
* @seq: sequence number
* @flags: fence related flags
*
- * Emits a fence sequnce number on the gfx ring and flushes
+ * Emits a fence sequence number on the gfx ring and flushes
* GPU caches.
*/
static void gfx_v7_0_ring_emit_fence_gfx(struct amdgpu_ring *ring, u64 addr,
@@ -2180,6 +2123,8 @@ static void gfx_v7_0_ring_emit_fence_gfx(struct amdgpu_ring *ring, u64 addr,
{
bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT;
bool int_sel = flags & AMDGPU_FENCE_FLAG_INT;
+ bool exec = flags & AMDGPU_FENCE_FLAG_EXEC;
+
/* Workaround for cache flush problems. First send a dummy EOP
* event down the pipe with seq one below.
*/
@@ -2199,7 +2144,8 @@ static void gfx_v7_0_ring_emit_fence_gfx(struct amdgpu_ring *ring, u64 addr,
amdgpu_ring_write(ring, (EOP_TCL1_ACTION_EN |
EOP_TC_ACTION_EN |
EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
- EVENT_INDEX(5)));
+ EVENT_INDEX(5) |
+ (exec ? EOP_EXEC : 0)));
amdgpu_ring_write(ring, addr & 0xfffffffc);
amdgpu_ring_write(ring, (upper_32_bits(addr) & 0xffff) |
DATA_SEL(write64bit ? 2 : 1) | INT_SEL(int_sel ? 2 : 0));
@@ -2215,7 +2161,7 @@ static void gfx_v7_0_ring_emit_fence_gfx(struct amdgpu_ring *ring, u64 addr,
* @seq: sequence number
* @flags: fence related flags
*
- * Emits a fence sequnce number on the compute ring and flushes
+ * Emits a fence sequence number on the compute ring and flushes
* GPU caches.
*/
static void gfx_v7_0_ring_emit_fence_compute(struct amdgpu_ring *ring,
@@ -2242,17 +2188,17 @@ static void gfx_v7_0_ring_emit_fence_compute(struct amdgpu_ring *ring,
* IB stuff
*/
/**
- * gfx_v7_0_ring_emit_ib - emit an IB (Indirect Buffer) on the ring
+ * gfx_v7_0_ring_emit_ib_gfx - emit an IB (Indirect Buffer) on the ring
*
* @ring: amdgpu_ring structure holding ring information
- * @job: job to retrive vmid from
+ * @job: job to retrieve vmid from
* @ib: amdgpu indirect buffer object
* @flags: options (AMDGPU_HAVE_CTX_SWITCH)
*
* Emits an DE (drawing engine) or CE (constant engine) IB
* on the gfx ring. IBs are usually generated by userspace
* acceleration drivers and submitted to the kernel for
- * sheduling on the ring. This function schedules the IB
+ * scheduling on the ring. This function schedules the IB
* on the gfx ring for execution by the GPU.
*/
static void gfx_v7_0_ring_emit_ib_gfx(struct amdgpu_ring *ring,
@@ -2355,54 +2301,46 @@ static int gfx_v7_0_ring_test_ib(struct amdgpu_ring *ring, long timeout)
struct amdgpu_device *adev = ring->adev;
struct amdgpu_ib ib;
struct dma_fence *f = NULL;
- uint32_t scratch;
uint32_t tmp = 0;
long r;
- r = amdgpu_gfx_scratch_get(adev, &scratch);
- if (r)
- return r;
-
- WREG32(scratch, 0xCAFEDEAD);
+ WREG32(mmSCRATCH_REG0, 0xCAFEDEAD);
memset(&ib, 0, sizeof(ib));
- r = amdgpu_ib_get(adev, NULL, 256,
- AMDGPU_IB_POOL_DIRECT, &ib);
+ r = amdgpu_ib_get(adev, NULL, 256, AMDGPU_IB_POOL_DIRECT, &ib);
if (r)
- goto err1;
+ return r;
ib.ptr[0] = PACKET3(PACKET3_SET_UCONFIG_REG, 1);
- ib.ptr[1] = ((scratch - PACKET3_SET_UCONFIG_REG_START));
+ ib.ptr[1] = mmSCRATCH_REG0 - PACKET3_SET_UCONFIG_REG_START;
ib.ptr[2] = 0xDEADBEEF;
ib.length_dw = 3;
r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f);
if (r)
- goto err2;
+ goto error;
r = dma_fence_wait_timeout(f, false, timeout);
if (r == 0) {
r = -ETIMEDOUT;
- goto err2;
+ goto error;
} else if (r < 0) {
- goto err2;
+ goto error;
}
- tmp = RREG32(scratch);
+ tmp = RREG32(mmSCRATCH_REG0);
if (tmp == 0xDEADBEEF)
r = 0;
else
r = -EINVAL;
-err2:
- amdgpu_ib_free(adev, &ib, NULL);
+error:
+ amdgpu_ib_free(&ib, NULL);
dma_fence_put(f);
-err1:
- amdgpu_gfx_scratch_free(adev, scratch);
return r;
}
/*
* CP.
- * On CIK, gfx and compute now have independant command processors.
+ * On CIK, gfx and compute now have independent command processors.
*
* GFX
* Gfx consists of a single ring and can process both gfx jobs and
@@ -2630,8 +2568,8 @@ static int gfx_v7_0_cp_gfx_resume(struct amdgpu_device *adev)
ring->wptr = 0;
WREG32(mmCP_RB0_WPTR, lower_32_bits(ring->wptr));
- /* set the wb address wether it's enabled or not */
- rptr_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4);
+ /* set the wb address whether it's enabled or not */
+ rptr_addr = ring->rptr_gpu_addr;
WREG32(mmCP_RB0_RPTR_ADDR, lower_32_bits(rptr_addr));
WREG32(mmCP_RB0_RPTR_ADDR_HI, upper_32_bits(rptr_addr) & 0xFF);
@@ -2656,7 +2594,7 @@ static int gfx_v7_0_cp_gfx_resume(struct amdgpu_device *adev)
static u64 gfx_v7_0_ring_get_rptr(struct amdgpu_ring *ring)
{
- return ring->adev->wb.wb[ring->rptr_offs];
+ return *ring->rptr_cpu_addr;
}
static u64 gfx_v7_0_ring_get_wptr_gfx(struct amdgpu_ring *ring)
@@ -2677,7 +2615,7 @@ static void gfx_v7_0_ring_set_wptr_gfx(struct amdgpu_ring *ring)
static u64 gfx_v7_0_ring_get_wptr_compute(struct amdgpu_ring *ring)
{
/* XXX check if swapping is necessary on BE */
- return ring->adev->wb.wb[ring->wptr_offs];
+ return *ring->wptr_cpu_addr;
}
static void gfx_v7_0_ring_set_wptr_compute(struct amdgpu_ring *ring)
@@ -2685,7 +2623,7 @@ static void gfx_v7_0_ring_set_wptr_compute(struct amdgpu_ring *ring)
struct amdgpu_device *adev = ring->adev;
/* XXX check if swapping is necessary on BE */
- adev->wb.wb[ring->wptr_offs] = lower_32_bits(ring->wptr);
+ *ring->wptr_cpu_addr = lower_32_bits(ring->wptr);
WDOORBELL32(ring->doorbell_index, lower_32_bits(ring->wptr));
}
@@ -2798,7 +2736,7 @@ static int gfx_v7_0_mec_init(struct amdgpu_device *adev)
u32 *hpd;
size_t mec_hpd_size;
- bitmap_zero(adev->gfx.mec.queue_bitmap, AMDGPU_MAX_COMPUTE_QUEUES);
+ bitmap_zero(adev->gfx.mec_bitmap[0].queue_bitmap, AMDGPU_MAX_COMPUTE_QUEUES);
/* take ownership of the relevant compute queues */
amdgpu_gfx_compute_queue_acquire(adev);
@@ -2808,7 +2746,8 @@ static int gfx_v7_0_mec_init(struct amdgpu_device *adev)
* GFX7_MEC_HPD_SIZE * 2;
r = amdgpu_bo_create_reserved(adev, mec_hpd_size, PAGE_SIZE,
- AMDGPU_GEM_DOMAIN_VRAM,
+ AMDGPU_GEM_DOMAIN_VRAM |
+ AMDGPU_GEM_DOMAIN_GTT,
&adev->gfx.mec.hpd_eop_obj,
&adev->gfx.mec.hpd_eop_gpu_addr,
(void **)&hpd);
@@ -2827,45 +2766,6 @@ static int gfx_v7_0_mec_init(struct amdgpu_device *adev)
return 0;
}
-struct hqd_registers
-{
- u32 cp_mqd_base_addr;
- u32 cp_mqd_base_addr_hi;
- u32 cp_hqd_active;
- u32 cp_hqd_vmid;
- u32 cp_hqd_persistent_state;
- u32 cp_hqd_pipe_priority;
- u32 cp_hqd_queue_priority;
- u32 cp_hqd_quantum;
- u32 cp_hqd_pq_base;
- u32 cp_hqd_pq_base_hi;
- u32 cp_hqd_pq_rptr;
- u32 cp_hqd_pq_rptr_report_addr;
- u32 cp_hqd_pq_rptr_report_addr_hi;
- u32 cp_hqd_pq_wptr_poll_addr;
- u32 cp_hqd_pq_wptr_poll_addr_hi;
- u32 cp_hqd_pq_doorbell_control;
- u32 cp_hqd_pq_wptr;
- u32 cp_hqd_pq_control;
- u32 cp_hqd_ib_base_addr;
- u32 cp_hqd_ib_base_addr_hi;
- u32 cp_hqd_ib_rptr;
- u32 cp_hqd_ib_control;
- u32 cp_hqd_iq_timer;
- u32 cp_hqd_iq_rptr;
- u32 cp_hqd_dequeue_request;
- u32 cp_hqd_dma_offload;
- u32 cp_hqd_sema_cmd;
- u32 cp_hqd_msg_type;
- u32 cp_hqd_atomic0_preop_lo;
- u32 cp_hqd_atomic0_preop_hi;
- u32 cp_hqd_atomic1_preop_lo;
- u32 cp_hqd_atomic1_preop_hi;
- u32 cp_hqd_hq_scheduler0;
- u32 cp_hqd_hq_scheduler1;
- u32 cp_mqd_control;
-};
-
static void gfx_v7_0_compute_pipe_init(struct amdgpu_device *adev,
int mec, int pipe)
{
@@ -2981,12 +2881,12 @@ static void gfx_v7_0_mqd_init(struct amdgpu_device *adev,
CP_HQD_PQ_CONTROL__KMD_QUEUE_MASK; /* assuming kernel queue control */
/* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */
- wb_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
+ wb_gpu_addr = ring->wptr_gpu_addr;
mqd->cp_hqd_pq_wptr_poll_addr_lo = wb_gpu_addr & 0xfffffffc;
mqd->cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff;
- /* set the wb address wether it's enabled or not */
- wb_gpu_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4);
+ /* set the wb address whether it's enabled or not */
+ wb_gpu_addr = ring->rptr_gpu_addr;
mqd->cp_hqd_pq_rptr_report_addr_lo = wb_gpu_addr & 0xfffffffc;
mqd->cp_hqd_pq_rptr_report_addr_hi =
upper_32_bits(wb_gpu_addr) & 0xffff;
@@ -3196,9 +3096,9 @@ static int gfx_v7_0_cp_resume(struct amdgpu_device *adev)
}
/**
- * gfx_v7_0_ring_emit_vm_flush - cik vm flush using the CP
+ * gfx_v7_0_ring_emit_pipeline_sync - cik vm flush using the CP
*
- * @ring: the ring to emmit the commands to
+ * @ring: the ring to emit the commands to
*
* Sync the command pipeline with the PFP. E.g. wait for everything
* to be completed.
@@ -3220,7 +3120,7 @@ static void gfx_v7_0_ring_emit_pipeline_sync(struct amdgpu_ring *ring)
amdgpu_ring_write(ring, 4); /* poll interval */
if (usepfp) {
- /* synce CE with ME to prevent CE fetch CEIB before context switch done */
+ /* sync CE with ME to prevent CE fetch CEIB before context switch done */
amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
amdgpu_ring_write(ring, 0);
amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
@@ -3345,7 +3245,7 @@ static int gfx_v7_0_rlc_init(struct amdgpu_device *adev)
/* init spm vmid with 0xf */
if (adev->gfx.rlc.funcs->update_spm_vmid)
- adev->gfx.rlc.funcs->update_spm_vmid(adev, 0xf);
+ adev->gfx.rlc.funcs->update_spm_vmid(adev, NULL, 0xf);
return 0;
}
@@ -3370,7 +3270,7 @@ static void gfx_v7_0_wait_for_rlc_serdes(struct amdgpu_device *adev)
mutex_lock(&adev->grbm_idx_mutex);
for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
- gfx_v7_0_select_se_sh(adev, i, j, 0xffffffff);
+ gfx_v7_0_select_se_sh(adev, i, j, 0xffffffff, 0);
for (k = 0; k < adev->usec_timeout; k++) {
if (RREG32(mmRLC_SERDES_CU_MASTER_BUSY) == 0)
break;
@@ -3378,7 +3278,7 @@ static void gfx_v7_0_wait_for_rlc_serdes(struct amdgpu_device *adev)
}
}
}
- gfx_v7_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
+ gfx_v7_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, 0);
mutex_unlock(&adev->grbm_idx_mutex);
mask = RLC_SERDES_NONCU_MASTER_BUSY__SE_MASTER_BUSY_MASK |
@@ -3430,7 +3330,7 @@ static bool gfx_v7_0_is_rlc_enabled(struct amdgpu_device *adev)
return true;
}
-static void gfx_v7_0_set_safe_mode(struct amdgpu_device *adev)
+static void gfx_v7_0_set_safe_mode(struct amdgpu_device *adev, int xcc_id)
{
u32 tmp, i, mask;
@@ -3452,7 +3352,7 @@ static void gfx_v7_0_set_safe_mode(struct amdgpu_device *adev)
}
}
-static void gfx_v7_0_unset_safe_mode(struct amdgpu_device *adev)
+static void gfx_v7_0_unset_safe_mode(struct amdgpu_device *adev, int xcc_id)
{
u32 tmp;
@@ -3543,7 +3443,7 @@ static int gfx_v7_0_rlc_resume(struct amdgpu_device *adev)
WREG32(mmRLC_LB_CNTR_MAX, 0x00008000);
mutex_lock(&adev->grbm_idx_mutex);
- gfx_v7_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
+ gfx_v7_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, 0);
WREG32(mmRLC_LB_INIT_CU_MASK, 0xffffffff);
WREG32(mmRLC_LB_PARAMS, 0x00600408);
WREG32(mmRLC_LB_CNTL, 0x80000004);
@@ -3571,16 +3471,20 @@ static int gfx_v7_0_rlc_resume(struct amdgpu_device *adev)
return 0;
}
-static void gfx_v7_0_update_spm_vmid(struct amdgpu_device *adev, unsigned vmid)
+static void gfx_v7_0_update_spm_vmid(struct amdgpu_device *adev, struct amdgpu_ring *ring, unsigned vmid)
{
u32 data;
+ amdgpu_gfx_off_ctrl(adev, false);
+
data = RREG32(mmRLC_SPM_VMID);
data &= ~RLC_SPM_VMID__RLC_SPM_VMID_MASK;
data |= (vmid & RLC_SPM_VMID__RLC_SPM_VMID_MASK) << RLC_SPM_VMID__RLC_SPM_VMID__SHIFT;
WREG32(mmRLC_SPM_VMID, data);
+
+ amdgpu_gfx_off_ctrl(adev, true);
}
static void gfx_v7_0_enable_cgcg(struct amdgpu_device *adev, bool enable)
@@ -3595,7 +3499,7 @@ static void gfx_v7_0_enable_cgcg(struct amdgpu_device *adev, bool enable)
tmp = gfx_v7_0_halt_rlc(adev);
mutex_lock(&adev->grbm_idx_mutex);
- gfx_v7_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
+ gfx_v7_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, 0);
WREG32(mmRLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff);
WREG32(mmRLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff);
tmp2 = RLC_SERDES_WR_CTRL__BPM_ADDR_MASK |
@@ -3649,7 +3553,7 @@ static void gfx_v7_0_enable_mgcg(struct amdgpu_device *adev, bool enable)
tmp = gfx_v7_0_halt_rlc(adev);
mutex_lock(&adev->grbm_idx_mutex);
- gfx_v7_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
+ gfx_v7_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, 0);
WREG32(mmRLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff);
WREG32(mmRLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff);
data = RLC_SERDES_WR_CTRL__BPM_ADDR_MASK |
@@ -3700,7 +3604,7 @@ static void gfx_v7_0_enable_mgcg(struct amdgpu_device *adev, bool enable)
tmp = gfx_v7_0_halt_rlc(adev);
mutex_lock(&adev->grbm_idx_mutex);
- gfx_v7_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
+ gfx_v7_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, 0);
WREG32(mmRLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff);
WREG32(mmRLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff);
data = RLC_SERDES_WR_CTRL__BPM_ADDR_MASK | RLC_SERDES_WR_CTRL__MGCG_OVERRIDE_1_MASK;
@@ -3978,70 +3882,24 @@ static u32 gfx_v7_0_get_csb_size(struct amdgpu_device *adev)
return count;
}
-static void gfx_v7_0_get_csb_buffer(struct amdgpu_device *adev,
- volatile u32 *buffer)
+static void gfx_v7_0_get_csb_buffer(struct amdgpu_device *adev, u32 *buffer)
{
- u32 count = 0, i;
- const struct cs_section_def *sect = NULL;
- const struct cs_extent_def *ext = NULL;
+ u32 count = 0;
if (adev->gfx.rlc.cs_data == NULL)
return;
if (buffer == NULL)
return;
- buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
- buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
-
- buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CONTEXT_CONTROL, 1));
- buffer[count++] = cpu_to_le32(0x80000000);
- buffer[count++] = cpu_to_le32(0x80000000);
-
- for (sect = adev->gfx.rlc.cs_data; sect->section != NULL; ++sect) {
- for (ext = sect->section; ext->extent != NULL; ++ext) {
- if (sect->id == SECT_CONTEXT) {
- buffer[count++] =
- cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, ext->reg_count));
- buffer[count++] = cpu_to_le32(ext->reg_index - PACKET3_SET_CONTEXT_REG_START);
- for (i = 0; i < ext->reg_count; i++)
- buffer[count++] = cpu_to_le32(ext->extent[i]);
- } else {
- return;
- }
- }
- }
+ count = amdgpu_gfx_csb_preamble_start(buffer);
+ count = amdgpu_gfx_csb_data_parser(adev, buffer, count);
buffer[count++] = cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, 2));
buffer[count++] = cpu_to_le32(mmPA_SC_RASTER_CONFIG - PACKET3_SET_CONTEXT_REG_START);
- switch (adev->asic_type) {
- case CHIP_BONAIRE:
- buffer[count++] = cpu_to_le32(0x16000012);
- buffer[count++] = cpu_to_le32(0x00000000);
- break;
- case CHIP_KAVERI:
- buffer[count++] = cpu_to_le32(0x00000000); /* XXX */
- buffer[count++] = cpu_to_le32(0x00000000);
- break;
- case CHIP_KABINI:
- case CHIP_MULLINS:
- buffer[count++] = cpu_to_le32(0x00000000); /* XXX */
- buffer[count++] = cpu_to_le32(0x00000000);
- break;
- case CHIP_HAWAII:
- buffer[count++] = cpu_to_le32(0x3a00161a);
- buffer[count++] = cpu_to_le32(0x0000002e);
- break;
- default:
- buffer[count++] = cpu_to_le32(0x00000000);
- buffer[count++] = cpu_to_le32(0x00000000);
- break;
- }
-
- buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
- buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_END_CLEAR_STATE);
+ buffer[count++] = cpu_to_le32(adev->gfx.config.rb_config[0][0].raster_config);
+ buffer[count++] = cpu_to_le32(adev->gfx.config.rb_config[0][0].raster_config_1);
- buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CLEAR_STATE, 0));
- buffer[count++] = cpu_to_le32(0);
+ amdgpu_gfx_csb_preamble_end(buffer, count);
}
static void gfx_v7_0_init_pg(struct amdgpu_device *adev)
@@ -4176,7 +4034,7 @@ static void wave_read_regs(struct amdgpu_device *adev, uint32_t simd,
*(out++) = RREG32(mmSQ_IND_DATA);
}
-static void gfx_v7_0_read_wave_data(struct amdgpu_device *adev, uint32_t simd, uint32_t wave, uint32_t *dst, int *no_fields)
+static void gfx_v7_0_read_wave_data(struct amdgpu_device *adev, uint32_t xcc_id, uint32_t simd, uint32_t wave, uint32_t *dst, int *no_fields)
{
/* type 0 wave data */
dst[(*no_fields)++] = 0;
@@ -4198,9 +4056,10 @@ static void gfx_v7_0_read_wave_data(struct amdgpu_device *adev, uint32_t simd, u
dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TMA_HI);
dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_IB_DBG0);
dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_M0);
+ dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_MODE);
}
-static void gfx_v7_0_read_wave_sgprs(struct amdgpu_device *adev, uint32_t simd,
+static void gfx_v7_0_read_wave_sgprs(struct amdgpu_device *adev, uint32_t xcc_id, uint32_t simd,
uint32_t wave, uint32_t start,
uint32_t size, uint32_t *dst)
{
@@ -4210,7 +4069,7 @@ static void gfx_v7_0_read_wave_sgprs(struct amdgpu_device *adev, uint32_t simd,
}
static void gfx_v7_0_select_me_pipe_q(struct amdgpu_device *adev,
- u32 me, u32 pipe, u32 q, u32 vm)
+ u32 me, u32 pipe, u32 q, u32 vm, u32 xcc_id)
{
cik_srbm_select(adev, me, pipe, q, vm);
}
@@ -4238,10 +4097,11 @@ static const struct amdgpu_rlc_funcs gfx_v7_0_rlc_funcs = {
.update_spm_vmid = gfx_v7_0_update_spm_vmid
};
-static int gfx_v7_0_early_init(void *handle)
+static int gfx_v7_0_early_init(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
+ adev->gfx.xcc_mask = 1;
adev->gfx.num_gfx_rings = GFX7_NUM_GFX_RINGS;
adev->gfx.num_compute_rings = min(amdgpu_gfx_get_num_kcq(adev),
AMDGPU_MAX_COMPUTE_RINGS);
@@ -4254,9 +4114,9 @@ static int gfx_v7_0_early_init(void *handle)
return 0;
}
-static int gfx_v7_0_late_init(void *handle)
+static int gfx_v7_0_late_init(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
int r;
r = amdgpu_irq_get(adev, &adev->gfx.priv_reg_irq, 0);
@@ -4438,7 +4298,7 @@ static int gfx_v7_0_compute_ring_init(struct amdgpu_device *adev, int ring_id,
/* type-2 packets are deprecated on MEC, use type-3 instead */
r = amdgpu_ring_init(adev, ring, 1024,
&adev->gfx.eop_irq, irq_type,
- AMDGPU_RING_PRIO_DEFAULT);
+ AMDGPU_RING_PRIO_DEFAULT, NULL);
if (r)
return r;
@@ -4446,10 +4306,10 @@ static int gfx_v7_0_compute_ring_init(struct amdgpu_device *adev, int ring_id,
return 0;
}
-static int gfx_v7_0_sw_init(void *handle)
+static int gfx_v7_0_sw_init(struct amdgpu_ip_block *ip_block)
{
struct amdgpu_ring *ring;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
int i, j, k, r, ring_id;
switch (adev->asic_type) {
@@ -4484,8 +4344,6 @@ static int gfx_v7_0_sw_init(void *handle)
if (r)
return r;
- gfx_v7_0_scratch_init(adev);
-
r = gfx_v7_0_init_microcode(adev);
if (r) {
DRM_ERROR("Failed to load gfx firmware!\n");
@@ -4512,7 +4370,7 @@ static int gfx_v7_0_sw_init(void *handle)
r = amdgpu_ring_init(adev, ring, 1024,
&adev->gfx.eop_irq,
AMDGPU_CP_IRQ_GFX_ME0_PIPE0_EOP,
- AMDGPU_RING_PRIO_DEFAULT);
+ AMDGPU_RING_PRIO_DEFAULT, NULL);
if (r)
return r;
}
@@ -4522,7 +4380,8 @@ static int gfx_v7_0_sw_init(void *handle)
for (i = 0; i < adev->gfx.mec.num_mec; ++i) {
for (j = 0; j < adev->gfx.mec.num_queue_per_pipe; j++) {
for (k = 0; k < adev->gfx.mec.num_pipe_per_mec; k++) {
- if (!amdgpu_gfx_is_mec_queue_enabled(adev, i, k, j))
+ if (!amdgpu_gfx_is_mec_queue_enabled(adev, 0, i,
+ k, j))
continue;
r = gfx_v7_0_compute_ring_init(adev,
@@ -4540,12 +4399,17 @@ static int gfx_v7_0_sw_init(void *handle)
gfx_v7_0_gpu_early_init(adev);
+ adev->gfx.gfx_supported_reset =
+ amdgpu_get_soft_full_reset_mask(&adev->gfx.gfx_ring[0]);
+ adev->gfx.compute_supported_reset =
+ amdgpu_get_soft_full_reset_mask(&adev->gfx.compute_ring[0]);
+
return r;
}
-static int gfx_v7_0_sw_fini(void *handle)
+static int gfx_v7_0_sw_fini(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
int i;
for (i = 0; i < adev->gfx.num_gfx_rings; i++)
@@ -4569,10 +4433,10 @@ static int gfx_v7_0_sw_fini(void *handle)
return 0;
}
-static int gfx_v7_0_hw_init(void *handle)
+static int gfx_v7_0_hw_init(struct amdgpu_ip_block *ip_block)
{
int r;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
gfx_v7_0_constants_init(adev);
@@ -4590,9 +4454,9 @@ static int gfx_v7_0_hw_init(void *handle)
return r;
}
-static int gfx_v7_0_hw_fini(void *handle)
+static int gfx_v7_0_hw_fini(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
amdgpu_irq_put(adev, &adev->gfx.priv_reg_irq, 0);
amdgpu_irq_put(adev, &adev->gfx.priv_inst_irq, 0);
@@ -4603,23 +4467,19 @@ static int gfx_v7_0_hw_fini(void *handle)
return 0;
}
-static int gfx_v7_0_suspend(void *handle)
+static int gfx_v7_0_suspend(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
-
- return gfx_v7_0_hw_fini(adev);
+ return gfx_v7_0_hw_fini(ip_block);
}
-static int gfx_v7_0_resume(void *handle)
+static int gfx_v7_0_resume(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
-
- return gfx_v7_0_hw_init(adev);
+ return gfx_v7_0_hw_init(ip_block);
}
-static bool gfx_v7_0_is_idle(void *handle)
+static bool gfx_v7_0_is_idle(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
if (RREG32(mmGRBM_STATUS) & GRBM_STATUS__GUI_ACTIVE_MASK)
return false;
@@ -4627,11 +4487,11 @@ static bool gfx_v7_0_is_idle(void *handle)
return true;
}
-static int gfx_v7_0_wait_for_idle(void *handle)
+static int gfx_v7_0_wait_for_idle(struct amdgpu_ip_block *ip_block)
{
unsigned i;
u32 tmp;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
for (i = 0; i < adev->usec_timeout; i++) {
/* read MC_STATUS */
@@ -4644,11 +4504,11 @@ static int gfx_v7_0_wait_for_idle(void *handle)
return -ETIMEDOUT;
}
-static int gfx_v7_0_soft_reset(void *handle)
+static int gfx_v7_0_soft_reset(struct amdgpu_ip_block *ip_block)
{
u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
u32 tmp;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
/* GRBM_STATUS */
tmp = RREG32(mmGRBM_STATUS);
@@ -4954,11 +4814,11 @@ static int gfx_v7_0_priv_inst_irq(struct amdgpu_device *adev,
return 0;
}
-static int gfx_v7_0_set_clockgating_state(void *handle,
+static int gfx_v7_0_set_clockgating_state(struct amdgpu_ip_block *ip_block,
enum amd_clockgating_state state)
{
bool gate = false;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
if (state == AMD_CG_STATE_GATE)
gate = true;
@@ -4977,11 +4837,11 @@ static int gfx_v7_0_set_clockgating_state(void *handle,
return 0;
}
-static int gfx_v7_0_set_powergating_state(void *handle,
+static int gfx_v7_0_set_powergating_state(struct amdgpu_ip_block *ip_block,
enum amd_powergating_state state)
{
bool gate = false;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
if (state == AMD_PG_STATE_GATE)
gate = true;
@@ -5107,6 +4967,7 @@ static const struct amdgpu_ring_funcs gfx_v7_0_ring_funcs_compute = {
.insert_nop = amdgpu_ring_insert_nop,
.pad_ib = amdgpu_ring_generic_pad_ib,
.emit_wreg = gfx_v7_0_ring_emit_wreg,
+ .soft_recovery = gfx_v7_0_ring_soft_recovery,
.emit_mem_sync = gfx_v7_0_emit_mem_sync_compute,
};
@@ -5180,18 +5041,18 @@ static void gfx_v7_0_get_cu_info(struct amdgpu_device *adev)
mask = 1;
ao_bitmap = 0;
counter = 0;
- gfx_v7_0_select_se_sh(adev, i, j, 0xffffffff);
+ gfx_v7_0_select_se_sh(adev, i, j, 0xffffffff, 0);
if (i < 4 && j < 2)
gfx_v7_0_set_user_cu_inactive_bitmap(
adev, disable_masks[i * 2 + j]);
bitmap = gfx_v7_0_get_cu_active_bitmap(adev);
- cu_info->bitmap[i][j] = bitmap;
+ cu_info->bitmap[0][i][j] = bitmap;
- for (k = 0; k < adev->gfx.config.max_cu_per_sh; k ++) {
+ for (k = 0; k < adev->gfx.config.max_cu_per_sh; k++) {
if (bitmap & mask) {
if (counter < ao_cu_num)
ao_bitmap |= mask;
- counter ++;
+ counter++;
}
mask <<= 1;
}
@@ -5201,7 +5062,7 @@ static void gfx_v7_0_get_cu_info(struct amdgpu_device *adev)
cu_info->ao_cu_bitmap[i][j] = ao_bitmap;
}
}
- gfx_v7_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
+ gfx_v7_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, 0);
mutex_unlock(&adev->grbm_idx_mutex);
cu_info->number = active_cu_number;
@@ -5213,8 +5074,7 @@ static void gfx_v7_0_get_cu_info(struct amdgpu_device *adev)
cu_info->lds_size = 64;
}
-const struct amdgpu_ip_block_version gfx_v7_1_ip_block =
-{
+const struct amdgpu_ip_block_version gfx_v7_1_ip_block = {
.type = AMD_IP_BLOCK_TYPE_GFX,
.major = 7,
.minor = 1,
@@ -5222,8 +5082,7 @@ const struct amdgpu_ip_block_version gfx_v7_1_ip_block =
.funcs = &gfx_v7_0_ip_funcs,
};
-const struct amdgpu_ip_block_version gfx_v7_2_ip_block =
-{
+const struct amdgpu_ip_block_version gfx_v7_2_ip_block = {
.type = AMD_IP_BLOCK_TYPE_GFX,
.major = 7,
.minor = 2,
@@ -5231,8 +5090,7 @@ const struct amdgpu_ip_block_version gfx_v7_2_ip_block =
.funcs = &gfx_v7_0_ip_funcs,
};
-const struct amdgpu_ip_block_version gfx_v7_3_ip_block =
-{
+const struct amdgpu_ip_block_version gfx_v7_3_ip_block = {
.type = AMD_IP_BLOCK_TYPE_GFX,
.major = 7,
.minor = 3,
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
index 84d2eaa38101..1c87375e1dd5 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
@@ -835,37 +835,25 @@ static void gfx_v8_0_init_golden_registers(struct amdgpu_device *adev)
}
}
-static void gfx_v8_0_scratch_init(struct amdgpu_device *adev)
-{
- adev->gfx.scratch.num_reg = 8;
- adev->gfx.scratch.reg_base = mmSCRATCH_REG0;
- adev->gfx.scratch.free_mask = (1u << adev->gfx.scratch.num_reg) - 1;
-}
-
static int gfx_v8_0_ring_test_ring(struct amdgpu_ring *ring)
{
struct amdgpu_device *adev = ring->adev;
- uint32_t scratch;
uint32_t tmp = 0;
unsigned i;
int r;
- r = amdgpu_gfx_scratch_get(adev, &scratch);
- if (r)
- return r;
-
- WREG32(scratch, 0xCAFEDEAD);
+ WREG32(mmSCRATCH_REG0, 0xCAFEDEAD);
r = amdgpu_ring_alloc(ring, 3);
if (r)
- goto error_free_scratch;
+ return r;
amdgpu_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
- amdgpu_ring_write(ring, (scratch - PACKET3_SET_UCONFIG_REG_START));
+ amdgpu_ring_write(ring, mmSCRATCH_REG0 - PACKET3_SET_UCONFIG_REG_START);
amdgpu_ring_write(ring, 0xDEADBEEF);
amdgpu_ring_commit(ring);
for (i = 0; i < adev->usec_timeout; i++) {
- tmp = RREG32(scratch);
+ tmp = RREG32(mmSCRATCH_REG0);
if (tmp == 0xDEADBEEF)
break;
udelay(1);
@@ -874,8 +862,6 @@ static int gfx_v8_0_ring_test_ring(struct amdgpu_ring *ring)
if (i >= adev->usec_timeout)
r = -ETIMEDOUT;
-error_free_scratch:
- amdgpu_gfx_scratch_free(adev, scratch);
return r;
}
@@ -897,8 +883,8 @@ static int gfx_v8_0_ring_test_ib(struct amdgpu_ring *ring, long timeout)
gpu_addr = adev->wb.gpu_addr + (index * 4);
adev->wb.wb[index] = cpu_to_le32(0xCAFEDEAD);
memset(&ib, 0, sizeof(ib));
- r = amdgpu_ib_get(adev, NULL, 16,
- AMDGPU_IB_POOL_DIRECT, &ib);
+
+ r = amdgpu_ib_get(adev, NULL, 20, AMDGPU_IB_POOL_DIRECT, &ib);
if (r)
goto err1;
@@ -928,7 +914,7 @@ static int gfx_v8_0_ring_test_ib(struct amdgpu_ring *ring, long timeout)
r = -EINVAL;
err2:
- amdgpu_ib_free(adev, &ib, NULL);
+ amdgpu_ib_free(&ib, NULL);
dma_fence_put(f);
err1:
amdgpu_device_wb_free(adev, index);
@@ -938,20 +924,14 @@ err1:
static void gfx_v8_0_free_microcode(struct amdgpu_device *adev)
{
- release_firmware(adev->gfx.pfp_fw);
- adev->gfx.pfp_fw = NULL;
- release_firmware(adev->gfx.me_fw);
- adev->gfx.me_fw = NULL;
- release_firmware(adev->gfx.ce_fw);
- adev->gfx.ce_fw = NULL;
- release_firmware(adev->gfx.rlc_fw);
- adev->gfx.rlc_fw = NULL;
- release_firmware(adev->gfx.mec_fw);
- adev->gfx.mec_fw = NULL;
+ amdgpu_ucode_release(&adev->gfx.pfp_fw);
+ amdgpu_ucode_release(&adev->gfx.me_fw);
+ amdgpu_ucode_release(&adev->gfx.ce_fw);
+ amdgpu_ucode_release(&adev->gfx.rlc_fw);
+ amdgpu_ucode_release(&adev->gfx.mec_fw);
if ((adev->asic_type != CHIP_STONEY) &&
(adev->asic_type != CHIP_TOPAZ))
- release_firmware(adev->gfx.mec2_fw);
- adev->gfx.mec2_fw = NULL;
+ amdgpu_ucode_release(&adev->gfx.mec2_fw);
kfree(adev->gfx.rlc.register_list_format);
}
@@ -959,7 +939,6 @@ static void gfx_v8_0_free_microcode(struct amdgpu_device *adev)
static int gfx_v8_0_init_microcode(struct amdgpu_device *adev)
{
const char *chip_name;
- char fw_name[30];
int err;
struct amdgpu_firmware_info *info = NULL;
const struct common_firmware_header *header = NULL;
@@ -1002,62 +981,62 @@ static int gfx_v8_0_init_microcode(struct amdgpu_device *adev)
}
if (adev->asic_type >= CHIP_POLARIS10 && adev->asic_type <= CHIP_POLARIS12) {
- snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_pfp_2.bin", chip_name);
- err = request_firmware(&adev->gfx.pfp_fw, fw_name, adev->dev);
- if (err == -ENOENT) {
- snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_pfp.bin", chip_name);
- err = request_firmware(&adev->gfx.pfp_fw, fw_name, adev->dev);
+ err = amdgpu_ucode_request(adev, &adev->gfx.pfp_fw,
+ AMDGPU_UCODE_OPTIONAL,
+ "amdgpu/%s_pfp_2.bin", chip_name);
+ if (err == -ENODEV) {
+ err = amdgpu_ucode_request(adev, &adev->gfx.pfp_fw,
+ AMDGPU_UCODE_REQUIRED,
+ "amdgpu/%s_pfp.bin", chip_name);
}
} else {
- snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_pfp.bin", chip_name);
- err = request_firmware(&adev->gfx.pfp_fw, fw_name, adev->dev);
+ err = amdgpu_ucode_request(adev, &adev->gfx.pfp_fw,
+ AMDGPU_UCODE_REQUIRED,
+ "amdgpu/%s_pfp.bin", chip_name);
}
if (err)
goto out;
- err = amdgpu_ucode_validate(adev->gfx.pfp_fw);
- if (err)
- goto out;
cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.pfp_fw->data;
adev->gfx.pfp_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
adev->gfx.pfp_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
if (adev->asic_type >= CHIP_POLARIS10 && adev->asic_type <= CHIP_POLARIS12) {
- snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_me_2.bin", chip_name);
- err = request_firmware(&adev->gfx.me_fw, fw_name, adev->dev);
- if (err == -ENOENT) {
- snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_me.bin", chip_name);
- err = request_firmware(&adev->gfx.me_fw, fw_name, adev->dev);
+ err = amdgpu_ucode_request(adev, &adev->gfx.me_fw,
+ AMDGPU_UCODE_OPTIONAL,
+ "amdgpu/%s_me_2.bin", chip_name);
+ if (err == -ENODEV) {
+ err = amdgpu_ucode_request(adev, &adev->gfx.me_fw,
+ AMDGPU_UCODE_REQUIRED,
+ "amdgpu/%s_me.bin", chip_name);
}
} else {
- snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_me.bin", chip_name);
- err = request_firmware(&adev->gfx.me_fw, fw_name, adev->dev);
+ err = amdgpu_ucode_request(adev, &adev->gfx.me_fw,
+ AMDGPU_UCODE_REQUIRED,
+ "amdgpu/%s_me.bin", chip_name);
}
if (err)
goto out;
- err = amdgpu_ucode_validate(adev->gfx.me_fw);
- if (err)
- goto out;
cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.me_fw->data;
adev->gfx.me_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
adev->gfx.me_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
if (adev->asic_type >= CHIP_POLARIS10 && adev->asic_type <= CHIP_POLARIS12) {
- snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_ce_2.bin", chip_name);
- err = request_firmware(&adev->gfx.ce_fw, fw_name, adev->dev);
- if (err == -ENOENT) {
- snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_ce.bin", chip_name);
- err = request_firmware(&adev->gfx.ce_fw, fw_name, adev->dev);
+ err = amdgpu_ucode_request(adev, &adev->gfx.ce_fw,
+ AMDGPU_UCODE_OPTIONAL,
+ "amdgpu/%s_ce_2.bin", chip_name);
+ if (err == -ENODEV) {
+ err = amdgpu_ucode_request(adev, &adev->gfx.ce_fw,
+ AMDGPU_UCODE_REQUIRED,
+ "amdgpu/%s_ce.bin", chip_name);
}
} else {
- snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_ce.bin", chip_name);
- err = request_firmware(&adev->gfx.ce_fw, fw_name, adev->dev);
+ err = amdgpu_ucode_request(adev, &adev->gfx.ce_fw,
+ AMDGPU_UCODE_REQUIRED,
+ "amdgpu/%s_ce.bin", chip_name);
}
if (err)
goto out;
- err = amdgpu_ucode_validate(adev->gfx.ce_fw);
- if (err)
- goto out;
cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.ce_fw->data;
adev->gfx.ce_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
adev->gfx.ce_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
@@ -1073,11 +1052,11 @@ static int gfx_v8_0_init_microcode(struct amdgpu_device *adev)
} else
adev->virt.chained_ib_support = false;
- snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_rlc.bin", chip_name);
- err = request_firmware(&adev->gfx.rlc_fw, fw_name, adev->dev);
+ err = amdgpu_ucode_request(adev, &adev->gfx.rlc_fw,
+ AMDGPU_UCODE_REQUIRED,
+ "amdgpu/%s_rlc.bin", chip_name);
if (err)
goto out;
- err = amdgpu_ucode_validate(adev->gfx.rlc_fw);
rlc_hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data;
adev->gfx.rlc_fw_version = le32_to_cpu(rlc_hdr->header.ucode_version);
adev->gfx.rlc_feature_version = le32_to_cpu(rlc_hdr->ucode_feature_version);
@@ -1123,21 +1102,21 @@ static int gfx_v8_0_init_microcode(struct amdgpu_device *adev)
adev->gfx.rlc.register_restore[i] = le32_to_cpu(tmp[i]);
if (adev->asic_type >= CHIP_POLARIS10 && adev->asic_type <= CHIP_POLARIS12) {
- snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec_2.bin", chip_name);
- err = request_firmware(&adev->gfx.mec_fw, fw_name, adev->dev);
- if (err == -ENOENT) {
- snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec.bin", chip_name);
- err = request_firmware(&adev->gfx.mec_fw, fw_name, adev->dev);
+ err = amdgpu_ucode_request(adev, &adev->gfx.mec_fw,
+ AMDGPU_UCODE_OPTIONAL,
+ "amdgpu/%s_mec_2.bin", chip_name);
+ if (err == -ENODEV) {
+ err = amdgpu_ucode_request(adev, &adev->gfx.mec_fw,
+ AMDGPU_UCODE_REQUIRED,
+ "amdgpu/%s_mec.bin", chip_name);
}
} else {
- snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec.bin", chip_name);
- err = request_firmware(&adev->gfx.mec_fw, fw_name, adev->dev);
+ err = amdgpu_ucode_request(adev, &adev->gfx.mec_fw,
+ AMDGPU_UCODE_REQUIRED,
+ "amdgpu/%s_mec.bin", chip_name);
}
if (err)
goto out;
- err = amdgpu_ucode_validate(adev->gfx.mec_fw);
- if (err)
- goto out;
cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
adev->gfx.mec_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
adev->gfx.mec_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
@@ -1145,20 +1124,20 @@ static int gfx_v8_0_init_microcode(struct amdgpu_device *adev)
if ((adev->asic_type != CHIP_STONEY) &&
(adev->asic_type != CHIP_TOPAZ)) {
if (adev->asic_type >= CHIP_POLARIS10 && adev->asic_type <= CHIP_POLARIS12) {
- snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec2_2.bin", chip_name);
- err = request_firmware(&adev->gfx.mec2_fw, fw_name, adev->dev);
- if (err == -ENOENT) {
- snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec2.bin", chip_name);
- err = request_firmware(&adev->gfx.mec2_fw, fw_name, adev->dev);
+ err = amdgpu_ucode_request(adev, &adev->gfx.mec2_fw,
+ AMDGPU_UCODE_OPTIONAL,
+ "amdgpu/%s_mec2_2.bin", chip_name);
+ if (err == -ENODEV) {
+ err = amdgpu_ucode_request(adev, &adev->gfx.mec2_fw,
+ AMDGPU_UCODE_REQUIRED,
+ "amdgpu/%s_mec2.bin", chip_name);
}
} else {
- snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec2.bin", chip_name);
- err = request_firmware(&adev->gfx.mec2_fw, fw_name, adev->dev);
+ err = amdgpu_ucode_request(adev, &adev->gfx.mec2_fw,
+ AMDGPU_UCODE_REQUIRED,
+ "amdgpu/%s_mec2.bin", chip_name);
}
if (!err) {
- err = amdgpu_ucode_validate(adev->gfx.mec2_fw);
- if (err)
- goto out;
cp_hdr = (const struct gfx_firmware_header_v1_0 *)
adev->gfx.mec2_fw->data;
adev->gfx.mec2_fw_version =
@@ -1230,70 +1209,35 @@ static int gfx_v8_0_init_microcode(struct amdgpu_device *adev)
out:
if (err) {
- dev_err(adev->dev,
- "gfx8: Failed to load firmware \"%s\"\n",
- fw_name);
- release_firmware(adev->gfx.pfp_fw);
- adev->gfx.pfp_fw = NULL;
- release_firmware(adev->gfx.me_fw);
- adev->gfx.me_fw = NULL;
- release_firmware(adev->gfx.ce_fw);
- adev->gfx.ce_fw = NULL;
- release_firmware(adev->gfx.rlc_fw);
- adev->gfx.rlc_fw = NULL;
- release_firmware(adev->gfx.mec_fw);
- adev->gfx.mec_fw = NULL;
- release_firmware(adev->gfx.mec2_fw);
- adev->gfx.mec2_fw = NULL;
+ dev_err(adev->dev, "gfx8: Failed to load firmware %s gfx firmware\n", chip_name);
+ amdgpu_ucode_release(&adev->gfx.pfp_fw);
+ amdgpu_ucode_release(&adev->gfx.me_fw);
+ amdgpu_ucode_release(&adev->gfx.ce_fw);
+ amdgpu_ucode_release(&adev->gfx.rlc_fw);
+ amdgpu_ucode_release(&adev->gfx.mec_fw);
+ amdgpu_ucode_release(&adev->gfx.mec2_fw);
}
return err;
}
-static void gfx_v8_0_get_csb_buffer(struct amdgpu_device *adev,
- volatile u32 *buffer)
+static void gfx_v8_0_get_csb_buffer(struct amdgpu_device *adev, u32 *buffer)
{
- u32 count = 0, i;
- const struct cs_section_def *sect = NULL;
- const struct cs_extent_def *ext = NULL;
+ u32 count = 0;
if (adev->gfx.rlc.cs_data == NULL)
return;
if (buffer == NULL)
return;
- buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
- buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
-
- buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CONTEXT_CONTROL, 1));
- buffer[count++] = cpu_to_le32(0x80000000);
- buffer[count++] = cpu_to_le32(0x80000000);
-
- for (sect = adev->gfx.rlc.cs_data; sect->section != NULL; ++sect) {
- for (ext = sect->section; ext->extent != NULL; ++ext) {
- if (sect->id == SECT_CONTEXT) {
- buffer[count++] =
- cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, ext->reg_count));
- buffer[count++] = cpu_to_le32(ext->reg_index -
- PACKET3_SET_CONTEXT_REG_START);
- for (i = 0; i < ext->reg_count; i++)
- buffer[count++] = cpu_to_le32(ext->extent[i]);
- } else {
- return;
- }
- }
- }
+ count = amdgpu_gfx_csb_preamble_start(buffer);
+ count = amdgpu_gfx_csb_data_parser(adev, buffer, count);
buffer[count++] = cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, 2));
- buffer[count++] = cpu_to_le32(mmPA_SC_RASTER_CONFIG -
- PACKET3_SET_CONTEXT_REG_START);
+ buffer[count++] = cpu_to_le32(mmPA_SC_RASTER_CONFIG - PACKET3_SET_CONTEXT_REG_START);
buffer[count++] = cpu_to_le32(adev->gfx.config.rb_config[0][0].raster_config);
buffer[count++] = cpu_to_le32(adev->gfx.config.rb_config[0][0].raster_config_1);
- buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
- buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_END_CLEAR_STATE);
-
- buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CLEAR_STATE, 0));
- buffer[count++] = cpu_to_le32(0);
+ amdgpu_gfx_csb_preamble_end(buffer, count);
}
static int gfx_v8_0_cp_jump_table_num(struct amdgpu_device *adev)
@@ -1330,7 +1274,7 @@ static int gfx_v8_0_rlc_init(struct amdgpu_device *adev)
/* init spm vmid with 0xf */
if (adev->gfx.rlc.funcs->update_spm_vmid)
- adev->gfx.rlc.funcs->update_spm_vmid(adev, 0xf);
+ adev->gfx.rlc.funcs->update_spm_vmid(adev, NULL, 0xf);
return 0;
}
@@ -1346,7 +1290,7 @@ static int gfx_v8_0_mec_init(struct amdgpu_device *adev)
u32 *hpd;
size_t mec_hpd_size;
- bitmap_zero(adev->gfx.mec.queue_bitmap, AMDGPU_MAX_COMPUTE_QUEUES);
+ bitmap_zero(adev->gfx.mec_bitmap[0].queue_bitmap, AMDGPU_MAX_COMPUTE_QUEUES);
/* take ownership of the relevant compute queues */
amdgpu_gfx_compute_queue_acquire(adev);
@@ -1354,7 +1298,8 @@ static int gfx_v8_0_mec_init(struct amdgpu_device *adev)
mec_hpd_size = adev->gfx.num_compute_rings * GFX8_MEC_HPD_SIZE;
if (mec_hpd_size) {
r = amdgpu_bo_create_reserved(adev, mec_hpd_size, PAGE_SIZE,
- AMDGPU_GEM_DOMAIN_VRAM,
+ AMDGPU_GEM_DOMAIN_VRAM |
+ AMDGPU_GEM_DOMAIN_GTT,
&adev->gfx.mec.hpd_eop_obj,
&adev->gfx.mec.hpd_eop_gpu_addr,
(void **)&hpd);
@@ -1684,7 +1629,7 @@ static int gfx_v8_0_do_edc_gpr_workarounds(struct amdgpu_device *adev)
RREG32(sec_ded_counter_registers[i]);
fail:
- amdgpu_ib_free(adev, &ib, NULL);
+ amdgpu_ib_free(&ib, NULL);
dma_fence_put(f);
return r;
@@ -1925,10 +1870,10 @@ static int gfx_v8_0_compute_ring_init(struct amdgpu_device *adev, int ring_id,
+ ring->pipe;
hw_prio = amdgpu_gfx_is_high_priority_compute_queue(adev, ring) ?
- AMDGPU_GFX_PIPE_PRIO_HIGH : AMDGPU_RING_PRIO_DEFAULT;
+ AMDGPU_RING_PRIO_2 : AMDGPU_RING_PRIO_DEFAULT;
/* type-2 packets are deprecated on MEC, use type-3 instead */
- r = amdgpu_ring_init(adev, ring, 1024,
- &adev->gfx.eop_irq, irq_type, hw_prio);
+ r = amdgpu_ring_init(adev, ring, 1024, &adev->gfx.eop_irq, irq_type,
+ hw_prio, NULL);
if (r)
return r;
@@ -1938,12 +1883,12 @@ static int gfx_v8_0_compute_ring_init(struct amdgpu_device *adev, int ring_id,
static void gfx_v8_0_sq_irq_work_func(struct work_struct *work);
-static int gfx_v8_0_sw_init(void *handle)
+static int gfx_v8_0_sw_init(struct amdgpu_ip_block *ip_block)
{
int i, j, k, r, ring_id;
+ int xcc_id = 0;
struct amdgpu_ring *ring;
- struct amdgpu_kiq *kiq;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
switch (adev->asic_type) {
case CHIP_TONGA:
@@ -2000,8 +1945,6 @@ static int gfx_v8_0_sw_init(void *handle)
adev->gfx.gfx_current_status = AMDGPU_GFX_NORMAL_MODE;
- gfx_v8_0_scratch_init(adev);
-
r = gfx_v8_0_init_microcode(adev);
if (r) {
DRM_ERROR("Failed to load gfx firmware!\n");
@@ -2033,7 +1976,7 @@ static int gfx_v8_0_sw_init(void *handle)
r = amdgpu_ring_init(adev, ring, 1024, &adev->gfx.eop_irq,
AMDGPU_CP_IRQ_GFX_ME0_PIPE0_EOP,
- AMDGPU_RING_PRIO_DEFAULT);
+ AMDGPU_RING_PRIO_DEFAULT, NULL);
if (r)
return r;
}
@@ -2044,7 +1987,8 @@ static int gfx_v8_0_sw_init(void *handle)
for (i = 0; i < adev->gfx.mec.num_mec; ++i) {
for (j = 0; j < adev->gfx.mec.num_queue_per_pipe; j++) {
for (k = 0; k < adev->gfx.mec.num_pipe_per_mec; k++) {
- if (!amdgpu_gfx_is_mec_queue_enabled(adev, i, k, j))
+ if (!amdgpu_gfx_is_mec_queue_enabled(adev, 0, i,
+ k, j))
continue;
r = gfx_v8_0_compute_ring_init(adev,
@@ -2058,19 +2002,18 @@ static int gfx_v8_0_sw_init(void *handle)
}
}
- r = amdgpu_gfx_kiq_init(adev, GFX8_MEC_HPD_SIZE);
+ r = amdgpu_gfx_kiq_init(adev, GFX8_MEC_HPD_SIZE, 0);
if (r) {
DRM_ERROR("Failed to init KIQ BOs!\n");
return r;
}
- kiq = &adev->gfx.kiq;
- r = amdgpu_gfx_kiq_init_ring(adev, &kiq->ring, &kiq->irq);
+ r = amdgpu_gfx_kiq_init_ring(adev, xcc_id);
if (r)
return r;
/* create MQD for all compute queues as well as KIQ for SRIOV case */
- r = amdgpu_gfx_mqd_sw_init(adev, sizeof(struct vi_mqd_allocation));
+ r = amdgpu_gfx_mqd_sw_init(adev, sizeof(struct vi_mqd_allocation), 0);
if (r)
return r;
@@ -2080,12 +2023,17 @@ static int gfx_v8_0_sw_init(void *handle)
if (r)
return r;
+ adev->gfx.gfx_supported_reset =
+ amdgpu_get_soft_full_reset_mask(&adev->gfx.gfx_ring[0]);
+ adev->gfx.compute_supported_reset =
+ amdgpu_get_soft_full_reset_mask(&adev->gfx.compute_ring[0]);
+
return 0;
}
-static int gfx_v8_0_sw_fini(void *handle)
+static int gfx_v8_0_sw_fini(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
int i;
for (i = 0; i < adev->gfx.num_gfx_rings; i++)
@@ -2093,9 +2041,9 @@ static int gfx_v8_0_sw_fini(void *handle)
for (i = 0; i < adev->gfx.num_compute_rings; i++)
amdgpu_ring_fini(&adev->gfx.compute_ring[i]);
- amdgpu_gfx_mqd_sw_fini(adev);
- amdgpu_gfx_kiq_free_ring(&adev->gfx.kiq.ring);
- amdgpu_gfx_kiq_fini(adev);
+ amdgpu_gfx_mqd_sw_fini(adev, 0);
+ amdgpu_gfx_kiq_free_ring(&adev->gfx.kiq[0].ring);
+ amdgpu_gfx_kiq_fini(adev, 0);
gfx_v8_0_mec_fini(adev);
amdgpu_gfx_rlc_fini(adev);
@@ -3437,7 +3385,8 @@ static void gfx_v8_0_tiling_mode_table_init(struct amdgpu_device *adev)
}
static void gfx_v8_0_select_se_sh(struct amdgpu_device *adev,
- u32 se_num, u32 sh_num, u32 instance)
+ u32 se_num, u32 sh_num, u32 instance,
+ int xcc_id)
{
u32 data;
@@ -3460,7 +3409,7 @@ static void gfx_v8_0_select_se_sh(struct amdgpu_device *adev,
}
static void gfx_v8_0_select_me_pipe_q(struct amdgpu_device *adev,
- u32 me, u32 pipe, u32 q, u32 vm)
+ u32 me, u32 pipe, u32 q, u32 vm, u32 xcc_id)
{
vi_srbm_select(adev, me, pipe, q, vm);
}
@@ -3621,13 +3570,13 @@ gfx_v8_0_write_harvested_raster_configs(struct amdgpu_device *adev,
}
/* GRBM_GFX_INDEX has a different offset on VI */
- gfx_v8_0_select_se_sh(adev, se, 0xffffffff, 0xffffffff);
+ gfx_v8_0_select_se_sh(adev, se, 0xffffffff, 0xffffffff, 0);
WREG32(mmPA_SC_RASTER_CONFIG, raster_config_se);
WREG32(mmPA_SC_RASTER_CONFIG_1, raster_config_1);
}
/* GRBM_GFX_INDEX has a different offset on VI */
- gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
+ gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, 0);
}
static void gfx_v8_0_setup_rb(struct amdgpu_device *adev)
@@ -3643,13 +3592,13 @@ static void gfx_v8_0_setup_rb(struct amdgpu_device *adev)
mutex_lock(&adev->grbm_idx_mutex);
for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
- gfx_v8_0_select_se_sh(adev, i, j, 0xffffffff);
+ gfx_v8_0_select_se_sh(adev, i, j, 0xffffffff, 0);
data = gfx_v8_0_get_rb_active_bitmap(adev);
active_rbs |= data << ((i * adev->gfx.config.max_sh_per_se + j) *
rb_bitmap_width_per_sh);
}
}
- gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
+ gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, 0);
adev->gfx.config.backend_enable_mask = active_rbs;
adev->gfx.config.num_rbs = hweight32(active_rbs);
@@ -3672,7 +3621,7 @@ static void gfx_v8_0_setup_rb(struct amdgpu_device *adev)
/* cache the values for userspace */
for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
- gfx_v8_0_select_se_sh(adev, i, j, 0xffffffff);
+ gfx_v8_0_select_se_sh(adev, i, j, 0xffffffff, 0);
adev->gfx.config.rb_config[i][j].rb_backend_disable =
RREG32(mmCC_RB_BACKEND_DISABLE);
adev->gfx.config.rb_config[i][j].user_rb_backend_disable =
@@ -3683,7 +3632,7 @@ static void gfx_v8_0_setup_rb(struct amdgpu_device *adev)
RREG32(mmPA_SC_RASTER_CONFIG_1);
}
}
- gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
+ gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, 0);
mutex_unlock(&adev->grbm_idx_mutex);
}
@@ -3730,7 +3679,7 @@ static void gfx_v8_0_init_compute_vmid(struct amdgpu_device *adev)
mutex_unlock(&adev->srbm_mutex);
/* Initialize all compute VMIDs to have no GDS, GWS, or OA
- acccess. These should be enabled by FW for target VMIDs. */
+ access. These should be enabled by FW for target VMIDs. */
for (i = adev->vm_manager.first_kfd_vmid; i < AMDGPU_NUM_VMID; i++) {
WREG32(amdgpu_gds_reg_offset[i].mem_base, 0);
WREG32(amdgpu_gds_reg_offset[i].mem_size, 0);
@@ -3830,7 +3779,7 @@ static void gfx_v8_0_constants_init(struct amdgpu_device *adev)
* making sure that the following register writes will be broadcasted
* to all the shaders
*/
- gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
+ gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, 0);
WREG32(mmPA_SC_FIFO_SIZE,
(adev->gfx.config.sc_prim_fifo_size_frontend <<
@@ -3861,7 +3810,7 @@ static void gfx_v8_0_wait_for_rlc_serdes(struct amdgpu_device *adev)
mutex_lock(&adev->grbm_idx_mutex);
for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
- gfx_v8_0_select_se_sh(adev, i, j, 0xffffffff);
+ gfx_v8_0_select_se_sh(adev, i, j, 0xffffffff, 0);
for (k = 0; k < adev->usec_timeout; k++) {
if (RREG32(mmRLC_SERDES_CU_MASTER_BUSY) == 0)
break;
@@ -3869,7 +3818,7 @@ static void gfx_v8_0_wait_for_rlc_serdes(struct amdgpu_device *adev)
}
if (k == adev->usec_timeout) {
gfx_v8_0_select_se_sh(adev, 0xffffffff,
- 0xffffffff, 0xffffffff);
+ 0xffffffff, 0xffffffff, 0);
mutex_unlock(&adev->grbm_idx_mutex);
DRM_INFO("Timeout wait for RLC serdes %u,%u\n",
i, j);
@@ -3877,7 +3826,7 @@ static void gfx_v8_0_wait_for_rlc_serdes(struct amdgpu_device *adev)
}
}
}
- gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
+ gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, 0);
mutex_unlock(&adev->grbm_idx_mutex);
mask = RLC_SERDES_NONCU_MASTER_BUSY__SE_MASTER_BUSY_MASK |
@@ -4305,12 +4254,12 @@ static int gfx_v8_0_cp_gfx_resume(struct amdgpu_device *adev)
ring->wptr = 0;
WREG32(mmCP_RB0_WPTR, lower_32_bits(ring->wptr));
- /* set the wb address wether it's enabled or not */
- rptr_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4);
+ /* set the wb address whether it's enabled or not */
+ rptr_addr = ring->rptr_gpu_addr;
WREG32(mmCP_RB0_RPTR_ADDR, lower_32_bits(rptr_addr));
WREG32(mmCP_RB0_RPTR_ADDR_HI, upper_32_bits(rptr_addr) & 0xFF);
- wptr_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
+ wptr_gpu_addr = ring->wptr_gpu_addr;
WREG32(mmCP_RB_WPTR_POLL_ADDR_LO, lower_32_bits(wptr_gpu_addr));
WREG32(mmCP_RB_WPTR_POLL_ADDR_HI, upper_32_bits(wptr_gpu_addr));
mdelay(1);
@@ -4324,7 +4273,6 @@ static int gfx_v8_0_cp_gfx_resume(struct amdgpu_device *adev)
/* start the ring */
amdgpu_ring_clear_ring(ring);
gfx_v8_0_cp_gfx_start(adev);
- ring->sched.ready = true;
return 0;
}
@@ -4335,7 +4283,7 @@ static void gfx_v8_0_cp_compute_enable(struct amdgpu_device *adev, bool enable)
WREG32(mmCP_MEC_CNTL, 0);
} else {
WREG32(mmCP_MEC_CNTL, (CP_MEC_CNTL__MEC_ME1_HALT_MASK | CP_MEC_CNTL__MEC_ME2_HALT_MASK));
- adev->gfx.kiq.ring.sched.ready = false;
+ adev->gfx.kiq[0].ring.sched.ready = false;
}
udelay(50);
}
@@ -4350,19 +4298,17 @@ static void gfx_v8_0_kiq_setting(struct amdgpu_ring *ring)
tmp = RREG32(mmRLC_CP_SCHEDULERS);
tmp &= 0xffffff00;
tmp |= (ring->me << 5) | (ring->pipe << 3) | (ring->queue);
- WREG32(mmRLC_CP_SCHEDULERS, tmp);
- tmp |= 0x80;
- WREG32(mmRLC_CP_SCHEDULERS, tmp);
+ WREG32(mmRLC_CP_SCHEDULERS, tmp | 0x80);
}
static int gfx_v8_0_kiq_kcq_enable(struct amdgpu_device *adev)
{
- struct amdgpu_ring *kiq_ring = &adev->gfx.kiq.ring;
+ struct amdgpu_ring *kiq_ring = &adev->gfx.kiq[0].ring;
uint64_t queue_mask = 0;
int r, i;
for (i = 0; i < AMDGPU_MAX_COMPUTE_QUEUES; ++i) {
- if (!test_bit(i, adev->gfx.mec.queue_bitmap))
+ if (!test_bit(i, adev->gfx.mec_bitmap[0].queue_bitmap))
continue;
/* This situation may be hit in the future if a new HW
@@ -4393,7 +4339,7 @@ static int gfx_v8_0_kiq_kcq_enable(struct amdgpu_device *adev)
for (i = 0; i < adev->gfx.num_compute_rings; i++) {
struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
uint64_t mqd_addr = amdgpu_bo_gpu_offset(ring->mqd_obj);
- uint64_t wptr_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
+ uint64_t wptr_addr = ring->wptr_gpu_addr;
/* map queues */
amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_MAP_QUEUES, 5));
@@ -4506,7 +4452,7 @@ static int gfx_v8_0_mqd_init(struct amdgpu_ring *ring)
tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, QUEUE_SIZE,
(order_base_2(ring->ring_size / 4) - 1));
tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, RPTR_BLOCK_SIZE,
- ((order_base_2(AMDGPU_GPU_PAGE_SIZE / 4) - 1) << 8));
+ (order_base_2(AMDGPU_GPU_PAGE_SIZE / 4) - 1));
#ifdef __BIG_ENDIAN
tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ENDIAN_SWAP, 1);
#endif
@@ -4517,13 +4463,13 @@ static int gfx_v8_0_mqd_init(struct amdgpu_ring *ring)
mqd->cp_hqd_pq_control = tmp;
/* set the wb address whether it's enabled or not */
- wb_gpu_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4);
+ wb_gpu_addr = ring->rptr_gpu_addr;
mqd->cp_hqd_pq_rptr_report_addr_lo = wb_gpu_addr & 0xfffffffc;
mqd->cp_hqd_pq_rptr_report_addr_hi =
upper_32_bits(wb_gpu_addr) & 0xffff;
/* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */
- wb_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
+ wb_gpu_addr = ring->wptr_gpu_addr;
mqd->cp_hqd_pq_wptr_poll_addr_lo = wb_gpu_addr & 0xfffffffc;
mqd->cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff;
@@ -4638,14 +4584,13 @@ static int gfx_v8_0_kiq_init_queue(struct amdgpu_ring *ring)
{
struct amdgpu_device *adev = ring->adev;
struct vi_mqd *mqd = ring->mqd_ptr;
- int mqd_idx = AMDGPU_MAX_COMPUTE_RINGS;
gfx_v8_0_kiq_setting(ring);
if (amdgpu_in_reset(adev)) { /* for GPU_RESET case */
/* reset MQD to a clean status */
- if (adev->gfx.mec.mqd_backup[mqd_idx])
- memcpy(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(struct vi_mqd_allocation));
+ if (adev->gfx.kiq[0].mqd_backup)
+ memcpy(mqd, adev->gfx.kiq[0].mqd_backup, sizeof(struct vi_mqd_allocation));
/* reset ring buffer */
ring->wptr = 0;
@@ -4659,6 +4604,8 @@ static int gfx_v8_0_kiq_init_queue(struct amdgpu_ring *ring)
memset((void *)mqd, 0, sizeof(struct vi_mqd_allocation));
((struct vi_mqd_allocation *)mqd)->dynamic_cu_mask = 0xFFFFFFFF;
((struct vi_mqd_allocation *)mqd)->dynamic_rb_mask = 0xFFFFFFFF;
+ if (amdgpu_sriov_vf(adev) && adev->in_suspend)
+ amdgpu_ring_clear_ring(ring);
mutex_lock(&adev->srbm_mutex);
vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
gfx_v8_0_mqd_init(ring);
@@ -4666,8 +4613,8 @@ static int gfx_v8_0_kiq_init_queue(struct amdgpu_ring *ring)
vi_srbm_select(adev, 0, 0, 0, 0);
mutex_unlock(&adev->srbm_mutex);
- if (adev->gfx.mec.mqd_backup[mqd_idx])
- memcpy(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(struct vi_mqd_allocation));
+ if (adev->gfx.kiq[0].mqd_backup)
+ memcpy(adev->gfx.kiq[0].mqd_backup, mqd, sizeof(struct vi_mqd_allocation));
}
return 0;
@@ -4691,14 +4638,13 @@ static int gfx_v8_0_kcq_init_queue(struct amdgpu_ring *ring)
if (adev->gfx.mec.mqd_backup[mqd_idx])
memcpy(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(struct vi_mqd_allocation));
- } else if (amdgpu_in_reset(adev)) { /* for GPU_RESET case */
- /* reset MQD to a clean status */
+ } else {
+ /* restore MQD to a clean status */
if (adev->gfx.mec.mqd_backup[mqd_idx])
memcpy(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(struct vi_mqd_allocation));
/* reset ring buffer */
ring->wptr = 0;
- amdgpu_ring_clear_ring(ring);
- } else {
+ atomic64_set((atomic64_t *)ring->wptr_cpu_addr, 0);
amdgpu_ring_clear_ring(ring);
}
return 0;
@@ -4716,59 +4662,25 @@ static void gfx_v8_0_set_mec_doorbell_range(struct amdgpu_device *adev)
static int gfx_v8_0_kiq_resume(struct amdgpu_device *adev)
{
- struct amdgpu_ring *ring;
- int r;
-
- ring = &adev->gfx.kiq.ring;
-
- r = amdgpu_bo_reserve(ring->mqd_obj, false);
- if (unlikely(r != 0))
- return r;
-
- r = amdgpu_bo_kmap(ring->mqd_obj, &ring->mqd_ptr);
- if (unlikely(r != 0))
- return r;
-
- gfx_v8_0_kiq_init_queue(ring);
- amdgpu_bo_kunmap(ring->mqd_obj);
- ring->mqd_ptr = NULL;
- amdgpu_bo_unreserve(ring->mqd_obj);
- ring->sched.ready = true;
+ gfx_v8_0_kiq_init_queue(&adev->gfx.kiq[0].ring);
return 0;
}
static int gfx_v8_0_kcq_resume(struct amdgpu_device *adev)
{
- struct amdgpu_ring *ring = NULL;
- int r = 0, i;
+ int i, r;
gfx_v8_0_cp_compute_enable(adev, true);
for (i = 0; i < adev->gfx.num_compute_rings; i++) {
- ring = &adev->gfx.compute_ring[i];
-
- r = amdgpu_bo_reserve(ring->mqd_obj, false);
- if (unlikely(r != 0))
- goto done;
- r = amdgpu_bo_kmap(ring->mqd_obj, &ring->mqd_ptr);
- if (!r) {
- r = gfx_v8_0_kcq_init_queue(ring);
- amdgpu_bo_kunmap(ring->mqd_obj);
- ring->mqd_ptr = NULL;
- }
- amdgpu_bo_unreserve(ring->mqd_obj);
+ r = gfx_v8_0_kcq_init_queue(&adev->gfx.compute_ring[i]);
if (r)
- goto done;
+ return r;
}
gfx_v8_0_set_mec_doorbell_range(adev);
- r = gfx_v8_0_kiq_kcq_enable(adev);
- if (r)
- goto done;
-
-done:
- return r;
+ return gfx_v8_0_kiq_kcq_enable(adev);
}
static int gfx_v8_0_cp_test_all_rings(struct amdgpu_device *adev)
@@ -4782,7 +4694,7 @@ static int gfx_v8_0_cp_test_all_rings(struct amdgpu_device *adev)
if (r)
return r;
- ring = &adev->gfx.kiq.ring;
+ ring = &adev->gfx.kiq[0].ring;
r = amdgpu_ring_test_helper(ring);
if (r)
return r;
@@ -4829,10 +4741,10 @@ static void gfx_v8_0_cp_enable(struct amdgpu_device *adev, bool enable)
gfx_v8_0_cp_compute_enable(adev, enable);
}
-static int gfx_v8_0_hw_init(void *handle)
+static int gfx_v8_0_hw_init(struct amdgpu_ip_block *ip_block)
{
int r;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
gfx_v8_0_init_golden_registers(adev);
gfx_v8_0_constants_init(adev);
@@ -4849,7 +4761,7 @@ static int gfx_v8_0_hw_init(void *handle)
static int gfx_v8_0_kcq_disable(struct amdgpu_device *adev)
{
int r, i;
- struct amdgpu_ring *kiq_ring = &adev->gfx.kiq.ring;
+ struct amdgpu_ring *kiq_ring = &adev->gfx.kiq[0].ring;
r = amdgpu_ring_alloc(kiq_ring, 6 * adev->gfx.num_compute_rings);
if (r)
@@ -4869,6 +4781,13 @@ static int gfx_v8_0_kcq_disable(struct amdgpu_device *adev)
amdgpu_ring_write(kiq_ring, 0);
amdgpu_ring_write(kiq_ring, 0);
}
+ /* Submit unmap queue packet */
+ amdgpu_ring_commit(kiq_ring);
+ /*
+ * Ring test will do a basic scratch register change check. Just run
+ * this to ensure that unmap queues that is submitted before got
+ * processed successfully before returning.
+ */
r = amdgpu_ring_test_helper(kiq_ring);
if (r)
DRM_ERROR("KCQ disable failed\n");
@@ -4876,9 +4795,9 @@ static int gfx_v8_0_kcq_disable(struct amdgpu_device *adev)
return r;
}
-static bool gfx_v8_0_is_idle(void *handle)
+static bool gfx_v8_0_is_idle(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
if (REG_GET_FIELD(RREG32(mmGRBM_STATUS), GRBM_STATUS, GUI_ACTIVE)
|| RREG32(mmGRBM_STATUS2) != 0x8)
@@ -4911,13 +4830,13 @@ static int gfx_v8_0_wait_for_rlc_idle(void *handle)
return -ETIMEDOUT;
}
-static int gfx_v8_0_wait_for_idle(void *handle)
+static int gfx_v8_0_wait_for_idle(struct amdgpu_ip_block *ip_block)
{
unsigned int i;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
for (i = 0; i < adev->usec_timeout; i++) {
- if (gfx_v8_0_is_idle(handle))
+ if (gfx_v8_0_is_idle(ip_block))
return 0;
udelay(1);
@@ -4925,9 +4844,9 @@ static int gfx_v8_0_wait_for_idle(void *handle)
return -ETIMEDOUT;
}
-static int gfx_v8_0_hw_fini(void *handle)
+static int gfx_v8_0_hw_fini(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
amdgpu_irq_put(adev, &adev->gfx.priv_reg_irq, 0);
amdgpu_irq_put(adev, &adev->gfx.priv_inst_irq, 0);
@@ -4943,8 +4862,9 @@ static int gfx_v8_0_hw_fini(void *handle)
pr_debug("For SRIOV client, shouldn't do anything.\n");
return 0;
}
- amdgpu_gfx_rlc_enter_safe_mode(adev);
- if (!gfx_v8_0_wait_for_idle(adev))
+
+ amdgpu_gfx_rlc_enter_safe_mode(adev, 0);
+ if (!gfx_v8_0_wait_for_idle(ip_block))
gfx_v8_0_cp_enable(adev, false);
else
pr_err("cp is busy, skip halt cp\n");
@@ -4952,24 +4872,24 @@ static int gfx_v8_0_hw_fini(void *handle)
adev->gfx.rlc.funcs->stop(adev);
else
pr_err("rlc is busy, skip halt rlc\n");
- amdgpu_gfx_rlc_exit_safe_mode(adev);
+ amdgpu_gfx_rlc_exit_safe_mode(adev, 0);
return 0;
}
-static int gfx_v8_0_suspend(void *handle)
+static int gfx_v8_0_suspend(struct amdgpu_ip_block *ip_block)
{
- return gfx_v8_0_hw_fini(handle);
+ return gfx_v8_0_hw_fini(ip_block);
}
-static int gfx_v8_0_resume(void *handle)
+static int gfx_v8_0_resume(struct amdgpu_ip_block *ip_block)
{
- return gfx_v8_0_hw_init(handle);
+ return gfx_v8_0_hw_init(ip_block);
}
-static bool gfx_v8_0_check_soft_reset(void *handle)
+static bool gfx_v8_0_check_soft_reset(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
u32 tmp;
@@ -5029,9 +4949,9 @@ static bool gfx_v8_0_check_soft_reset(void *handle)
}
}
-static int gfx_v8_0_pre_soft_reset(void *handle)
+static int gfx_v8_0_pre_soft_reset(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
u32 grbm_soft_reset = 0;
if ((!adev->gfx.grbm_soft_reset) &&
@@ -5070,9 +4990,9 @@ static int gfx_v8_0_pre_soft_reset(void *handle)
return 0;
}
-static int gfx_v8_0_soft_reset(void *handle)
+static int gfx_v8_0_soft_reset(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
u32 tmp;
@@ -5132,9 +5052,9 @@ static int gfx_v8_0_soft_reset(void *handle)
return 0;
}
-static int gfx_v8_0_post_soft_reset(void *handle)
+static int gfx_v8_0_post_soft_reset(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
u32 grbm_soft_reset = 0;
if ((!adev->gfx.grbm_soft_reset) &&
@@ -5257,7 +5177,7 @@ static void wave_read_regs(struct amdgpu_device *adev, uint32_t simd,
*(out++) = RREG32(mmSQ_IND_DATA);
}
-static void gfx_v8_0_read_wave_data(struct amdgpu_device *adev, uint32_t simd, uint32_t wave, uint32_t *dst, int *no_fields)
+static void gfx_v8_0_read_wave_data(struct amdgpu_device *adev, uint32_t xcc_id, uint32_t simd, uint32_t wave, uint32_t *dst, int *no_fields)
{
/* type 0 wave data */
dst[(*no_fields)++] = 0;
@@ -5279,9 +5199,10 @@ static void gfx_v8_0_read_wave_data(struct amdgpu_device *adev, uint32_t simd, u
dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TMA_HI);
dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_IB_DBG0);
dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_M0);
+ dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_MODE);
}
-static void gfx_v8_0_read_wave_sgprs(struct amdgpu_device *adev, uint32_t simd,
+static void gfx_v8_0_read_wave_sgprs(struct amdgpu_device *adev, uint32_t xcc_id, uint32_t simd,
uint32_t wave, uint32_t start,
uint32_t size, uint32_t *dst)
{
@@ -5299,10 +5220,11 @@ static const struct amdgpu_gfx_funcs gfx_v8_0_gfx_funcs = {
.select_me_pipe_q = &gfx_v8_0_select_me_pipe_q
};
-static int gfx_v8_0_early_init(void *handle)
+static int gfx_v8_0_early_init(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
+ adev->gfx.xcc_mask = 1;
adev->gfx.num_gfx_rings = GFX8_NUM_GFX_RINGS;
adev->gfx.num_compute_rings = min(amdgpu_gfx_get_num_kcq(adev),
AMDGPU_MAX_COMPUTE_RINGS);
@@ -5315,9 +5237,9 @@ static int gfx_v8_0_early_init(void *handle)
return 0;
}
-static int gfx_v8_0_late_init(void *handle)
+static int gfx_v8_0_late_init(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
int r;
r = amdgpu_irq_get(adev, &adev->gfx.priv_reg_irq, 0);
@@ -5357,7 +5279,7 @@ static void gfx_v8_0_enable_gfx_static_mg_power_gating(struct amdgpu_device *ade
(adev->asic_type == CHIP_POLARIS12) ||
(adev->asic_type == CHIP_VEGAM))
/* Send msg to SMU via Powerplay */
- amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_GFX, enable);
+ amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_GFX, enable, 0);
WREG32_FIELD(RLC_PG_CNTL, STATIC_PER_CU_PG_ENABLE, enable ? 1 : 0);
}
@@ -5403,10 +5325,10 @@ static void cz_update_gfx_cg_power_gating(struct amdgpu_device *adev,
}
}
-static int gfx_v8_0_set_powergating_state(void *handle,
+static int gfx_v8_0_set_powergating_state(struct amdgpu_ip_block *ip_block,
enum amd_powergating_state state)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
bool enable = (state == AMD_PG_STATE_GATE);
if (amdgpu_sriov_vf(adev))
@@ -5416,7 +5338,7 @@ static int gfx_v8_0_set_powergating_state(void *handle,
AMD_PG_SUPPORT_RLC_SMU_HS |
AMD_PG_SUPPORT_CP |
AMD_PG_SUPPORT_GFX_DMG))
- amdgpu_gfx_rlc_enter_safe_mode(adev);
+ amdgpu_gfx_rlc_enter_safe_mode(adev, 0);
switch (adev->asic_type) {
case CHIP_CARRIZO:
case CHIP_STONEY:
@@ -5470,13 +5392,13 @@ static int gfx_v8_0_set_powergating_state(void *handle,
AMD_PG_SUPPORT_RLC_SMU_HS |
AMD_PG_SUPPORT_CP |
AMD_PG_SUPPORT_GFX_DMG))
- amdgpu_gfx_rlc_exit_safe_mode(adev);
+ amdgpu_gfx_rlc_exit_safe_mode(adev, 0);
return 0;
}
-static void gfx_v8_0_get_clockgating_state(void *handle, u32 *flags)
+static void gfx_v8_0_get_clockgating_state(struct amdgpu_ip_block *ip_block, u64 *flags)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
int data;
if (amdgpu_sriov_vf(adev))
@@ -5521,7 +5443,7 @@ static void gfx_v8_0_send_serdes_cmd(struct amdgpu_device *adev,
{
uint32_t data;
- gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
+ gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, 0);
WREG32(mmRLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff);
WREG32(mmRLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff);
@@ -5575,7 +5497,7 @@ static bool gfx_v8_0_is_rlc_enabled(struct amdgpu_device *adev)
return true;
}
-static void gfx_v8_0_set_safe_mode(struct amdgpu_device *adev)
+static void gfx_v8_0_set_safe_mode(struct amdgpu_device *adev, int xcc_id)
{
uint32_t data;
unsigned i;
@@ -5602,7 +5524,7 @@ static void gfx_v8_0_set_safe_mode(struct amdgpu_device *adev)
}
}
-static void gfx_v8_0_unset_safe_mode(struct amdgpu_device *adev)
+static void gfx_v8_0_unset_safe_mode(struct amdgpu_device *adev, int xcc_id)
{
uint32_t data;
unsigned i;
@@ -5619,10 +5541,12 @@ static void gfx_v8_0_unset_safe_mode(struct amdgpu_device *adev)
}
}
-static void gfx_v8_0_update_spm_vmid(struct amdgpu_device *adev, unsigned vmid)
+static void gfx_v8_0_update_spm_vmid(struct amdgpu_device *adev, struct amdgpu_ring *ring, unsigned vmid)
{
u32 data;
+ amdgpu_gfx_off_ctrl(adev, false);
+
if (amdgpu_sriov_is_pp_one_vf(adev))
data = RREG32_NO_KIQ(mmRLC_SPM_VMID);
else
@@ -5635,6 +5559,8 @@ static void gfx_v8_0_update_spm_vmid(struct amdgpu_device *adev, unsigned vmid)
WREG32_NO_KIQ(mmRLC_SPM_VMID, data);
else
WREG32(mmRLC_SPM_VMID, data);
+
+ amdgpu_gfx_off_ctrl(adev, true);
}
static const struct amdgpu_rlc_funcs iceland_rlc_funcs = {
@@ -5657,8 +5583,6 @@ static void gfx_v8_0_update_medium_grain_clock_gating(struct amdgpu_device *adev
{
uint32_t temp, data;
- amdgpu_gfx_rlc_enter_safe_mode(adev);
-
/* It is disabled by HW by default */
if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG)) {
if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) {
@@ -5752,8 +5676,6 @@ static void gfx_v8_0_update_medium_grain_clock_gating(struct amdgpu_device *adev
/* 7- wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
gfx_v8_0_wait_for_rlc_serdes(adev);
}
-
- amdgpu_gfx_rlc_exit_safe_mode(adev);
}
static void gfx_v8_0_update_coarse_grain_clock_gating(struct amdgpu_device *adev,
@@ -5763,8 +5685,6 @@ static void gfx_v8_0_update_coarse_grain_clock_gating(struct amdgpu_device *adev
temp = data = RREG32(mmRLC_CGCG_CGLS_CTRL);
- amdgpu_gfx_rlc_enter_safe_mode(adev);
-
if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG)) {
temp1 = data1 = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
data1 &= ~RLC_CGTT_MGCG_OVERRIDE__CGCG_MASK;
@@ -5826,7 +5746,7 @@ static void gfx_v8_0_update_coarse_grain_clock_gating(struct amdgpu_device *adev
/* wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
gfx_v8_0_wait_for_rlc_serdes(adev);
- /* write cmd to Set CGCG Overrride */
+ /* write cmd to Set CGCG Override */
gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGCG_OVERRIDE, SET_BPM_SERDES_CMD);
/* wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
@@ -5845,12 +5765,12 @@ static void gfx_v8_0_update_coarse_grain_clock_gating(struct amdgpu_device *adev
}
gfx_v8_0_wait_for_rlc_serdes(adev);
-
- amdgpu_gfx_rlc_exit_safe_mode(adev);
}
static int gfx_v8_0_update_gfx_clock_gating(struct amdgpu_device *adev,
bool enable)
{
+ amdgpu_gfx_rlc_enter_safe_mode(adev, 0);
+
if (enable) {
/* CGCG/CGLS should be enabled after MGCG/MGLS/TS(CG/LS)
* === MGCG + MGLS + TS(CG/LS) ===
@@ -5864,6 +5784,8 @@ static int gfx_v8_0_update_gfx_clock_gating(struct amdgpu_device *adev,
gfx_v8_0_update_coarse_grain_clock_gating(adev, enable);
gfx_v8_0_update_medium_grain_clock_gating(adev, enable);
}
+
+ amdgpu_gfx_rlc_exit_safe_mode(adev, 0);
return 0;
}
@@ -6014,10 +5936,10 @@ static int gfx_v8_0_polaris_update_gfx_clock_gating(struct amdgpu_device *adev,
return 0;
}
-static int gfx_v8_0_set_clockgating_state(void *handle,
+static int gfx_v8_0_set_clockgating_state(struct amdgpu_ip_block *ip_block,
enum amd_clockgating_state state)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
if (amdgpu_sriov_vf(adev))
return 0;
@@ -6046,7 +5968,7 @@ static int gfx_v8_0_set_clockgating_state(void *handle,
static u64 gfx_v8_0_ring_get_rptr(struct amdgpu_ring *ring)
{
- return ring->adev->wb.wb[ring->rptr_offs];
+ return *ring->rptr_cpu_addr;
}
static u64 gfx_v8_0_ring_get_wptr_gfx(struct amdgpu_ring *ring)
@@ -6055,7 +5977,7 @@ static u64 gfx_v8_0_ring_get_wptr_gfx(struct amdgpu_ring *ring)
if (ring->use_doorbell)
/* XXX check if swapping is necessary on BE */
- return ring->adev->wb.wb[ring->wptr_offs];
+ return *ring->wptr_cpu_addr;
else
return RREG32(mmCP_RB0_WPTR);
}
@@ -6066,7 +5988,7 @@ static void gfx_v8_0_ring_set_wptr_gfx(struct amdgpu_ring *ring)
if (ring->use_doorbell) {
/* XXX check if swapping is necessary on BE */
- adev->wb.wb[ring->wptr_offs] = lower_32_bits(ring->wptr);
+ *ring->wptr_cpu_addr = lower_32_bits(ring->wptr);
WDOORBELL32(ring->doorbell_index, lower_32_bits(ring->wptr));
} else {
WREG32(mmCP_RB0_WPTR, lower_32_bits(ring->wptr));
@@ -6189,6 +6111,7 @@ static void gfx_v8_0_ring_emit_fence_gfx(struct amdgpu_ring *ring, u64 addr,
{
bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT;
bool int_sel = flags & AMDGPU_FENCE_FLAG_INT;
+ bool exec = flags & AMDGPU_FENCE_FLAG_EXEC;
/* Workaround for cache flush problems. First send a dummy EOP
* event down the pipe with seq one below.
@@ -6212,7 +6135,8 @@ static void gfx_v8_0_ring_emit_fence_gfx(struct amdgpu_ring *ring, u64 addr,
EOP_TC_ACTION_EN |
EOP_TC_WB_ACTION_EN |
EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
- EVENT_INDEX(5)));
+ EVENT_INDEX(5) |
+ (exec ? EOP_EXEC : 0)));
amdgpu_ring_write(ring, addr & 0xfffffffc);
amdgpu_ring_write(ring, (upper_32_bits(addr) & 0xffff) |
DATA_SEL(write64bit ? 2 : 1) | INT_SEL(int_sel ? 2 : 0));
@@ -6266,7 +6190,7 @@ static void gfx_v8_0_ring_emit_vm_flush(struct amdgpu_ring *ring,
static u64 gfx_v8_0_ring_get_wptr_compute(struct amdgpu_ring *ring)
{
- return ring->adev->wb.wb[ring->wptr_offs];
+ return *ring->wptr_cpu_addr;
}
static void gfx_v8_0_ring_set_wptr_compute(struct amdgpu_ring *ring)
@@ -6274,7 +6198,7 @@ static void gfx_v8_0_ring_set_wptr_compute(struct amdgpu_ring *ring)
struct amdgpu_device *adev = ring->adev;
/* XXX check if swapping is necessary on BE */
- adev->wb.wb[ring->wptr_offs] = lower_32_bits(ring->wptr);
+ *ring->wptr_cpu_addr = lower_32_bits(ring->wptr);
WDOORBELL32(ring->doorbell_index, lower_32_bits(ring->wptr));
}
@@ -6363,33 +6287,22 @@ static void gfx_v8_ring_emit_cntxcntl(struct amdgpu_ring *ring, uint32_t flags)
amdgpu_ring_write(ring, 0);
}
-static unsigned gfx_v8_0_ring_emit_init_cond_exec(struct amdgpu_ring *ring)
+static unsigned gfx_v8_0_ring_emit_init_cond_exec(struct amdgpu_ring *ring,
+ uint64_t addr)
{
unsigned ret;
amdgpu_ring_write(ring, PACKET3(PACKET3_COND_EXEC, 3));
- amdgpu_ring_write(ring, lower_32_bits(ring->cond_exe_gpu_addr));
- amdgpu_ring_write(ring, upper_32_bits(ring->cond_exe_gpu_addr));
- amdgpu_ring_write(ring, 0); /* discard following DWs if *cond_exec_gpu_addr==0 */
+ amdgpu_ring_write(ring, lower_32_bits(addr));
+ amdgpu_ring_write(ring, upper_32_bits(addr));
+ /* discard following DWs if *cond_exec_gpu_addr==0 */
+ amdgpu_ring_write(ring, 0);
ret = ring->wptr & ring->buf_mask;
- amdgpu_ring_write(ring, 0x55aa55aa); /* patch dummy value later */
+ /* patch dummy value later */
+ amdgpu_ring_write(ring, 0);
return ret;
}
-static void gfx_v8_0_ring_emit_patch_cond_exec(struct amdgpu_ring *ring, unsigned offset)
-{
- unsigned cur;
-
- BUG_ON(offset > ring->buf_mask);
- BUG_ON(ring->ring[offset] != 0x55aa55aa);
-
- cur = (ring->wptr & ring->buf_mask) - 1;
- if (likely(cur > offset))
- ring->ring[offset] = cur - offset;
- else
- ring->ring[offset] = (ring->ring_size >> 2) - offset + cur;
-}
-
static void gfx_v8_0_ring_emit_rreg(struct amdgpu_ring *ring, uint32_t reg,
uint32_t reg_val_offs)
{
@@ -6718,7 +6631,8 @@ static int gfx_v8_0_cp_ecc_error_irq(struct amdgpu_device *adev,
return 0;
}
-static void gfx_v8_0_parse_sq_irq(struct amdgpu_device *adev, unsigned ih_data)
+static void gfx_v8_0_parse_sq_irq(struct amdgpu_device *adev, unsigned ih_data,
+ bool from_wq)
{
u32 enc, se_id, sh_id, cu_id;
char type[20];
@@ -6756,13 +6670,13 @@ static void gfx_v8_0_parse_sq_irq(struct amdgpu_device *adev, unsigned ih_data)
* or from BH in which case we can access SQ_EDC_INFO
* instance
*/
- if (in_task()) {
+ if (from_wq) {
mutex_lock(&adev->grbm_idx_mutex);
- gfx_v8_0_select_se_sh(adev, se_id, sh_id, cu_id);
+ gfx_v8_0_select_se_sh(adev, se_id, sh_id, cu_id, 0);
sq_edc_source = REG_GET_FIELD(RREG32(mmSQ_EDC_INFO), SQ_EDC_INFO, SOURCE);
- gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
+ gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, 0);
mutex_unlock(&adev->grbm_idx_mutex);
}
@@ -6794,7 +6708,7 @@ static void gfx_v8_0_sq_irq_work_func(struct work_struct *work)
struct amdgpu_device *adev = container_of(work, struct amdgpu_device, gfx.sq_work.work);
struct sq_work *sq_work = container_of(work, struct sq_work, work);
- gfx_v8_0_parse_sq_irq(adev, sq_work->ih_data);
+ gfx_v8_0_parse_sq_irq(adev, sq_work->ih_data, true);
}
static int gfx_v8_0_sq_irq(struct amdgpu_device *adev,
@@ -6809,7 +6723,7 @@ static int gfx_v8_0_sq_irq(struct amdgpu_device *adev,
* just print whatever info is possible directly from the ISR.
*/
if (work_pending(&adev->gfx.sq_work.work)) {
- gfx_v8_0_parse_sq_irq(adev, ih_data);
+ gfx_v8_0_parse_sq_irq(adev, ih_data, false);
} else {
adev->gfx.sq_work.ih_data = ih_data;
schedule_work(&adev->gfx.sq_work.work);
@@ -6968,7 +6882,6 @@ static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_gfx = {
.emit_switch_buffer = gfx_v8_ring_emit_sb,
.emit_cntxcntl = gfx_v8_ring_emit_cntxcntl,
.init_cond_exec = gfx_v8_0_ring_emit_init_cond_exec,
- .patch_cond_exec = gfx_v8_0_ring_emit_patch_cond_exec,
.emit_wreg = gfx_v8_0_ring_emit_wreg,
.soft_recovery = gfx_v8_0_ring_soft_recovery,
.emit_mem_sync = gfx_v8_0_emit_mem_sync,
@@ -7004,6 +6917,7 @@ static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_compute = {
.insert_nop = amdgpu_ring_insert_nop,
.pad_ib = amdgpu_ring_generic_pad_ib,
.emit_wreg = gfx_v8_0_ring_emit_wreg,
+ .soft_recovery = gfx_v8_0_ring_soft_recovery,
.emit_mem_sync = gfx_v8_0_emit_mem_sync_compute,
.emit_wave_limit = gfx_v8_0_emit_wave_limit,
};
@@ -7030,13 +6944,14 @@ static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_kiq = {
.pad_ib = amdgpu_ring_generic_pad_ib,
.emit_rreg = gfx_v8_0_ring_emit_rreg,
.emit_wreg = gfx_v8_0_ring_emit_wreg,
+ .emit_hdp_flush = gfx_v8_0_ring_emit_hdp_flush,
};
static void gfx_v8_0_set_ring_funcs(struct amdgpu_device *adev)
{
int i;
- adev->gfx.kiq.ring.funcs = &gfx_v8_0_ring_funcs_kiq;
+ adev->gfx.kiq[0].ring.funcs = &gfx_v8_0_ring_funcs_kiq;
for (i = 0; i < adev->gfx.num_gfx_rings; i++)
adev->gfx.gfx_ring[i].funcs = &gfx_v8_0_ring_funcs_gfx;
@@ -7151,12 +7066,12 @@ static void gfx_v8_0_get_cu_info(struct amdgpu_device *adev)
mask = 1;
ao_bitmap = 0;
counter = 0;
- gfx_v8_0_select_se_sh(adev, i, j, 0xffffffff);
+ gfx_v8_0_select_se_sh(adev, i, j, 0xffffffff, 0);
if (i < 4 && j < 2)
gfx_v8_0_set_user_cu_inactive_bitmap(
adev, disable_masks[i * 2 + j]);
bitmap = gfx_v8_0_get_cu_active_bitmap(adev);
- cu_info->bitmap[i][j] = bitmap;
+ cu_info->bitmap[0][i][j] = bitmap;
for (k = 0; k < adev->gfx.config.max_cu_per_sh; k ++) {
if (bitmap & mask) {
@@ -7172,7 +7087,7 @@ static void gfx_v8_0_get_cu_info(struct amdgpu_device *adev)
cu_info->ao_cu_bitmap[i][j] = ao_bitmap;
}
}
- gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
+ gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, 0);
mutex_unlock(&adev->grbm_idx_mutex);
cu_info->number = active_cu_number;
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
index 65db88bb6cbc..0148d7ff34d9 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
@@ -47,14 +47,18 @@
#include "amdgpu_ras.h"
+#include "amdgpu_ring_mux.h"
#include "gfx_v9_4.h"
#include "gfx_v9_0.h"
+#include "gfx_v9_0_cleaner_shader.h"
+#include "gfx_v9_4_2.h"
#include "asic_reg/pwr/pwr_10_0_offset.h"
#include "asic_reg/pwr/pwr_10_0_sh_mask.h"
#include "asic_reg/gc/gc_9_0_default.h"
#define GFX9_NUM_GFX_RINGS 1
+#define GFX9_NUM_SW_GFX_RINGS 2
#define GFX9_MEC_HPD_SIZE 4096
#define RLCG_UCODE_LOADING_START_ADDRESS 0x00002000L
#define RLC_SAVE_RESTORE_ADDR_STARTING_OFFSET 0x00000000L
@@ -107,14 +111,12 @@ MODULE_FIRMWARE("amdgpu/raven2_rlc.bin");
MODULE_FIRMWARE("amdgpu/raven_kicker_rlc.bin");
MODULE_FIRMWARE("amdgpu/arcturus_mec.bin");
-MODULE_FIRMWARE("amdgpu/arcturus_mec2.bin");
MODULE_FIRMWARE("amdgpu/arcturus_rlc.bin");
MODULE_FIRMWARE("amdgpu/renoir_ce.bin");
MODULE_FIRMWARE("amdgpu/renoir_pfp.bin");
MODULE_FIRMWARE("amdgpu/renoir_me.bin");
MODULE_FIRMWARE("amdgpu/renoir_mec.bin");
-MODULE_FIRMWARE("amdgpu/renoir_mec2.bin");
MODULE_FIRMWARE("amdgpu/renoir_rlc.bin");
MODULE_FIRMWARE("amdgpu/green_sardine_ce.bin");
@@ -124,6 +126,12 @@ MODULE_FIRMWARE("amdgpu/green_sardine_mec.bin");
MODULE_FIRMWARE("amdgpu/green_sardine_mec2.bin");
MODULE_FIRMWARE("amdgpu/green_sardine_rlc.bin");
+MODULE_FIRMWARE("amdgpu/aldebaran_mec.bin");
+MODULE_FIRMWARE("amdgpu/aldebaran_mec2.bin");
+MODULE_FIRMWARE("amdgpu/aldebaran_rlc.bin");
+MODULE_FIRMWARE("amdgpu/aldebaran_sjt_mec.bin");
+MODULE_FIRMWARE("amdgpu/aldebaran_sjt_mec2.bin");
+
#define mmTCP_CHAN_STEER_0_ARCT 0x0b03
#define mmTCP_CHAN_STEER_0_ARCT_BASE_IDX 0
#define mmTCP_CHAN_STEER_1_ARCT 0x0b04
@@ -137,6 +145,167 @@ MODULE_FIRMWARE("amdgpu/green_sardine_rlc.bin");
#define mmTCP_CHAN_STEER_5_ARCT 0x0b0c
#define mmTCP_CHAN_STEER_5_ARCT_BASE_IDX 0
+#define mmGOLDEN_TSC_COUNT_UPPER_Renoir 0x0025
+#define mmGOLDEN_TSC_COUNT_UPPER_Renoir_BASE_IDX 1
+#define mmGOLDEN_TSC_COUNT_LOWER_Renoir 0x0026
+#define mmGOLDEN_TSC_COUNT_LOWER_Renoir_BASE_IDX 1
+
+static const struct amdgpu_hwip_reg_entry gc_reg_list_9[] = {
+ SOC15_REG_ENTRY_STR(GC, 0, mmGRBM_STATUS),
+ SOC15_REG_ENTRY_STR(GC, 0, mmGRBM_STATUS2),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_STALLED_STAT1),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_STALLED_STAT2),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_CPC_STALLED_STAT1),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_CPF_STALLED_STAT1),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_BUSY_STAT),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_CPC_BUSY_STAT),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_CPF_BUSY_STAT),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_CPF_STATUS),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_GFX_ERROR),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_RB_BASE),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_RB_RPTR),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_RB_WPTR),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_RB0_BASE),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_RB0_RPTR),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_RB0_WPTR),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_RB1_BASE),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_RB1_RPTR),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_RB1_WPTR),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_RB2_BASE),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_RB2_WPTR),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_RB2_WPTR),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_CE_IB1_CMD_BUFSZ),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_CE_IB2_CMD_BUFSZ),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_IB1_CMD_BUFSZ),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_IB2_CMD_BUFSZ),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_CE_IB1_BASE_LO),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_CE_IB1_BASE_HI),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_CE_IB1_BUFSZ),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_CE_IB2_BASE_LO),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_CE_IB2_BASE_HI),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_CE_IB2_BUFSZ),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_IB1_BASE_LO),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_IB1_BASE_HI),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_IB1_BUFSZ),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_IB2_BASE_LO),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_IB2_BASE_HI),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_IB2_BUFSZ),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCPF_UTCL1_STATUS),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCPC_UTCL1_STATUS),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCPG_UTCL1_STATUS),
+ SOC15_REG_ENTRY_STR(GC, 0, mmGDS_PROTECTION_FAULT),
+ SOC15_REG_ENTRY_STR(GC, 0, mmGDS_VM_PROTECTION_FAULT),
+ SOC15_REG_ENTRY_STR(GC, 0, mmIA_UTCL1_STATUS),
+ SOC15_REG_ENTRY_STR(GC, 0, mmIA_UTCL1_CNTL),
+ SOC15_REG_ENTRY_STR(GC, 0, mmPA_CL_CNTL_STATUS),
+ SOC15_REG_ENTRY_STR(GC, 0, mmRLC_UTCL1_STATUS),
+ SOC15_REG_ENTRY_STR(GC, 0, mmRMI_UTCL1_STATUS),
+ SOC15_REG_ENTRY_STR(GC, 0, mmSQC_DCACHE_UTCL1_STATUS),
+ SOC15_REG_ENTRY_STR(GC, 0, mmSQC_ICACHE_UTCL1_STATUS),
+ SOC15_REG_ENTRY_STR(GC, 0, mmSQ_UTCL1_STATUS),
+ SOC15_REG_ENTRY_STR(GC, 0, mmTCP_UTCL1_STATUS),
+ SOC15_REG_ENTRY_STR(GC, 0, mmWD_UTCL1_STATUS),
+ SOC15_REG_ENTRY_STR(GC, 0, mmVM_L2_PROTECTION_FAULT_CNTL),
+ SOC15_REG_ENTRY_STR(GC, 0, mmVM_L2_PROTECTION_FAULT_STATUS),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_DEBUG),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_MEC_CNTL),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_CE_INSTR_PNTR),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_MEC1_INSTR_PNTR),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_MEC2_INSTR_PNTR),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_ME_INSTR_PNTR),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_PFP_INSTR_PNTR),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_CPC_STATUS),
+ SOC15_REG_ENTRY_STR(GC, 0, mmRLC_STAT),
+ SOC15_REG_ENTRY_STR(GC, 0, mmRLC_SMU_COMMAND),
+ SOC15_REG_ENTRY_STR(GC, 0, mmRLC_SMU_MESSAGE),
+ SOC15_REG_ENTRY_STR(GC, 0, mmRLC_SMU_ARGUMENT_1),
+ SOC15_REG_ENTRY_STR(GC, 0, mmRLC_SMU_ARGUMENT_2),
+ SOC15_REG_ENTRY_STR(GC, 0, mmSMU_RLC_RESPONSE),
+ SOC15_REG_ENTRY_STR(GC, 0, mmRLC_SAFE_MODE),
+ SOC15_REG_ENTRY_STR(GC, 0, mmRLC_SMU_SAFE_MODE),
+ SOC15_REG_ENTRY_STR(GC, 0, mmRLC_INT_STAT),
+ SOC15_REG_ENTRY_STR(GC, 0, mmRLC_GPM_GENERAL_6),
+ /* SE status registers */
+ SOC15_REG_ENTRY_STR(GC, 0, mmGRBM_STATUS_SE0),
+ SOC15_REG_ENTRY_STR(GC, 0, mmGRBM_STATUS_SE1),
+ SOC15_REG_ENTRY_STR(GC, 0, mmGRBM_STATUS_SE2),
+ SOC15_REG_ENTRY_STR(GC, 0, mmGRBM_STATUS_SE3),
+ /* packet headers */
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_CE_HEADER_DUMP),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_CE_HEADER_DUMP),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_CE_HEADER_DUMP),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_CE_HEADER_DUMP),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_CE_HEADER_DUMP),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_CE_HEADER_DUMP),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_CE_HEADER_DUMP),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_CE_HEADER_DUMP),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_PFP_HEADER_DUMP),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_PFP_HEADER_DUMP),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_PFP_HEADER_DUMP),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_PFP_HEADER_DUMP),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_PFP_HEADER_DUMP),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_PFP_HEADER_DUMP),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_PFP_HEADER_DUMP),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_PFP_HEADER_DUMP),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_ME_HEADER_DUMP),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_ME_HEADER_DUMP),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_ME_HEADER_DUMP),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_ME_HEADER_DUMP),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_ME_HEADER_DUMP),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_ME_HEADER_DUMP),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_ME_HEADER_DUMP),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_ME_HEADER_DUMP)
+};
+
+static const struct amdgpu_hwip_reg_entry gc_cp_reg_list_9[] = {
+ /* compute queue registers */
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_VMID),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_ACTIVE),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_PERSISTENT_STATE),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_PIPE_PRIORITY),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_QUEUE_PRIORITY),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_QUANTUM),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_PQ_BASE),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_PQ_BASE_HI),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_PQ_RPTR),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR_HI),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_PQ_CONTROL),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_IB_BASE_ADDR),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_IB_BASE_ADDR_HI),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_IB_RPTR),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_IB_CONTROL),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_DEQUEUE_REQUEST),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_EOP_BASE_ADDR),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_EOP_BASE_ADDR_HI),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_EOP_CONTROL),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_EOP_RPTR),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_EOP_WPTR),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_EOP_EVENTS),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_CTX_SAVE_BASE_ADDR_LO),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_CTX_SAVE_BASE_ADDR_HI),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_CTX_SAVE_CONTROL),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_CNTL_STACK_OFFSET),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_CNTL_STACK_SIZE),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_WG_STATE_OFFSET),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_CTX_SAVE_SIZE),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_GDS_RESOURCE_STATE),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_ERROR),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_EOP_WPTR_MEM),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_PQ_WPTR_LO),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_PQ_WPTR_HI),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_GFX_STATUS),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_MEC_ME1_HEADER_DUMP),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_MEC_ME1_HEADER_DUMP),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_MEC_ME1_HEADER_DUMP),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_MEC_ME1_HEADER_DUMP),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_MEC_ME1_HEADER_DUMP),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_MEC_ME1_HEADER_DUMP),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_MEC_ME1_HEADER_DUMP),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_MEC_ME1_HEADER_DUMP)
+};
+
enum ta_ras_gfx_subblock {
/*CPC*/
TA_RAS_BLOCK__GFX_CPC_INDEX_START = 0,
@@ -731,59 +900,6 @@ static const u32 GFX_RLC_SRM_INDEX_CNTL_DATA_OFFSETS[] =
mmRLC_SRM_INDEX_CNTL_DATA_7 - mmRLC_SRM_INDEX_CNTL_DATA_0,
};
-static void gfx_v9_0_rlcg_wreg(struct amdgpu_device *adev, u32 offset, u32 v)
-{
- static void *scratch_reg0;
- static void *scratch_reg1;
- static void *scratch_reg2;
- static void *scratch_reg3;
- static void *spare_int;
- static uint32_t grbm_cntl;
- static uint32_t grbm_idx;
-
- scratch_reg0 = adev->rmmio + (adev->reg_offset[GC_HWIP][0][mmSCRATCH_REG0_BASE_IDX] + mmSCRATCH_REG0)*4;
- scratch_reg1 = adev->rmmio + (adev->reg_offset[GC_HWIP][0][mmSCRATCH_REG1_BASE_IDX] + mmSCRATCH_REG1)*4;
- scratch_reg2 = adev->rmmio + (adev->reg_offset[GC_HWIP][0][mmSCRATCH_REG1_BASE_IDX] + mmSCRATCH_REG2)*4;
- scratch_reg3 = adev->rmmio + (adev->reg_offset[GC_HWIP][0][mmSCRATCH_REG1_BASE_IDX] + mmSCRATCH_REG3)*4;
- spare_int = adev->rmmio + (adev->reg_offset[GC_HWIP][0][mmRLC_SPARE_INT_BASE_IDX] + mmRLC_SPARE_INT)*4;
-
- grbm_cntl = adev->reg_offset[GC_HWIP][0][mmGRBM_GFX_CNTL_BASE_IDX] + mmGRBM_GFX_CNTL;
- grbm_idx = adev->reg_offset[GC_HWIP][0][mmGRBM_GFX_INDEX_BASE_IDX] + mmGRBM_GFX_INDEX;
-
- if (amdgpu_sriov_runtime(adev)) {
- pr_err("shouldn't call rlcg write register during runtime\n");
- return;
- }
-
- if (offset == grbm_cntl || offset == grbm_idx) {
- if (offset == grbm_cntl)
- writel(v, scratch_reg2);
- else if (offset == grbm_idx)
- writel(v, scratch_reg3);
-
- writel(v, ((void __iomem *)adev->rmmio) + (offset * 4));
- } else {
- uint32_t i = 0;
- uint32_t retries = 50000;
-
- writel(v, scratch_reg0);
- writel(offset | 0x80000000, scratch_reg1);
- writel(1, spare_int);
- for (i = 0; i < retries; i++) {
- u32 tmp;
-
- tmp = readl(scratch_reg1);
- if (!(tmp & 0x80000000))
- break;
-
- udelay(10);
- }
- if (i >= retries)
- pr_err("timeout: rlcg program reg:0x%05x failed !\n", offset);
- }
-
-}
-
#define VEGA10_GB_ADDR_CONFIG_GOLDEN 0x2a114042
#define VEGA12_GB_ADDR_CONFIG_GOLDEN 0x24104041
#define RAVEN_GB_ADDR_CONFIG_GOLDEN 0x24000042
@@ -796,17 +912,27 @@ static void gfx_v9_0_set_rlc_funcs(struct amdgpu_device *adev);
static int gfx_v9_0_get_cu_info(struct amdgpu_device *adev,
struct amdgpu_cu_info *cu_info);
static uint64_t gfx_v9_0_get_gpu_clock_counter(struct amdgpu_device *adev);
-static void gfx_v9_0_ring_emit_de_meta(struct amdgpu_ring *ring);
+static void gfx_v9_0_ring_emit_de_meta(struct amdgpu_ring *ring, bool resume, bool usegds);
static u64 gfx_v9_0_ring_get_rptr_compute(struct amdgpu_ring *ring);
-static int gfx_v9_0_query_ras_error_count(struct amdgpu_device *adev,
+static void gfx_v9_0_query_ras_error_count(struct amdgpu_device *adev,
void *ras_error_status);
static int gfx_v9_0_ras_error_inject(struct amdgpu_device *adev,
- void *inject_if);
+ void *inject_if, uint32_t instance_mask);
static void gfx_v9_0_reset_ras_error_count(struct amdgpu_device *adev);
+static void gfx_v9_0_update_spm_vmid_internal(struct amdgpu_device *adev,
+ unsigned int vmid);
+static void gfx_v9_0_set_safe_mode(struct amdgpu_device *adev, int xcc_id);
+static void gfx_v9_0_unset_safe_mode(struct amdgpu_device *adev, int xcc_id);
static void gfx_v9_0_kiq_set_resources(struct amdgpu_ring *kiq_ring,
uint64_t queue_mask)
{
+ struct amdgpu_device *adev = kiq_ring->adev;
+ u64 shader_mc_addr;
+
+ /* Cleaner shader MC address */
+ shader_mc_addr = adev->gfx.cleaner_shader_gpu_addr >> 8;
+
amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_SET_RESOURCES, 6));
amdgpu_ring_write(kiq_ring,
PACKET3_SET_RESOURCES_VMID_MASK(0) |
@@ -816,8 +942,8 @@ static void gfx_v9_0_kiq_set_resources(struct amdgpu_ring *kiq_ring,
lower_32_bits(queue_mask)); /* queue mask lo */
amdgpu_ring_write(kiq_ring,
upper_32_bits(queue_mask)); /* queue mask hi */
- amdgpu_ring_write(kiq_ring, 0); /* gws mask lo */
- amdgpu_ring_write(kiq_ring, 0); /* gws mask hi */
+ amdgpu_ring_write(kiq_ring, lower_32_bits(shader_mc_addr)); /* cleaner shader addr lo */
+ amdgpu_ring_write(kiq_ring, upper_32_bits(shader_mc_addr)); /* cleaner shader addr hi */
amdgpu_ring_write(kiq_ring, 0); /* oac mask */
amdgpu_ring_write(kiq_ring, 0); /* gds heap base:0, gds heap size:0 */
}
@@ -825,9 +951,8 @@ static void gfx_v9_0_kiq_set_resources(struct amdgpu_ring *kiq_ring,
static void gfx_v9_0_kiq_map_queues(struct amdgpu_ring *kiq_ring,
struct amdgpu_ring *ring)
{
- struct amdgpu_device *adev = kiq_ring->adev;
uint64_t mqd_addr = amdgpu_bo_gpu_offset(ring->mqd_obj);
- uint64_t wptr_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
+ uint64_t wptr_addr = ring->wptr_gpu_addr;
uint32_t eng_sel = ring->funcs->type == AMDGPU_RING_TYPE_GFX ? 4 : 0;
amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_MAP_QUEUES, 5));
@@ -870,9 +995,10 @@ static void gfx_v9_0_kiq_unmap_queues(struct amdgpu_ring *kiq_ring,
PACKET3_UNMAP_QUEUES_DOORBELL_OFFSET0(ring->doorbell_index));
if (action == PREEMPT_QUEUES_NO_UNMAP) {
- amdgpu_ring_write(kiq_ring, lower_32_bits(gpu_addr));
- amdgpu_ring_write(kiq_ring, upper_32_bits(gpu_addr));
- amdgpu_ring_write(kiq_ring, seq);
+ amdgpu_ring_write(kiq_ring, lower_32_bits(ring->wptr & ring->buf_mask));
+ amdgpu_ring_write(kiq_ring, 0);
+ amdgpu_ring_write(kiq_ring, 0);
+
} else {
amdgpu_ring_write(kiq_ring, 0);
amdgpu_ring_write(kiq_ring, 0);
@@ -914,12 +1040,47 @@ static void gfx_v9_0_kiq_invalidate_tlbs(struct amdgpu_ring *kiq_ring,
PACKET3_INVALIDATE_TLBS_FLUSH_TYPE(flush_type));
}
+
+static void gfx_v9_0_kiq_reset_hw_queue(struct amdgpu_ring *kiq_ring, uint32_t queue_type,
+ uint32_t me_id, uint32_t pipe_id, uint32_t queue_id,
+ uint32_t xcc_id, uint32_t vmid)
+{
+ struct amdgpu_device *adev = kiq_ring->adev;
+ unsigned i;
+
+ /* enter save mode */
+ amdgpu_gfx_rlc_enter_safe_mode(adev, xcc_id);
+ mutex_lock(&adev->srbm_mutex);
+ soc15_grbm_select(adev, me_id, pipe_id, queue_id, 0, 0);
+
+ if (queue_type == AMDGPU_RING_TYPE_COMPUTE) {
+ WREG32_SOC15(GC, 0, mmCP_HQD_DEQUEUE_REQUEST, 0x2);
+ WREG32_SOC15(GC, 0, mmSPI_COMPUTE_QUEUE_RESET, 0x1);
+ /* wait till dequeue take effects */
+ for (i = 0; i < adev->usec_timeout; i++) {
+ if (!(RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE) & 1))
+ break;
+ udelay(1);
+ }
+ if (i >= adev->usec_timeout)
+ dev_err(adev->dev, "fail to wait on hqd deactive\n");
+ } else {
+ dev_err(adev->dev, "reset queue_type(%d) not supported\n", queue_type);
+ }
+
+ soc15_grbm_select(adev, 0, 0, 0, 0, 0);
+ mutex_unlock(&adev->srbm_mutex);
+ /* exit safe mode */
+ amdgpu_gfx_rlc_exit_safe_mode(adev, xcc_id);
+}
+
static const struct kiq_pm4_funcs gfx_v9_0_kiq_pm4_funcs = {
.kiq_set_resources = gfx_v9_0_kiq_set_resources,
.kiq_map_queues = gfx_v9_0_kiq_map_queues,
.kiq_unmap_queues = gfx_v9_0_kiq_unmap_queues,
.kiq_query_status = gfx_v9_0_kiq_query_status,
.kiq_invalidate_tlbs = gfx_v9_0_kiq_invalidate_tlbs,
+ .kiq_reset_hw_queue = gfx_v9_0_kiq_reset_hw_queue,
.set_resources_size = 8,
.map_queues_size = 7,
.unmap_queues_size = 6,
@@ -929,13 +1090,13 @@ static const struct kiq_pm4_funcs gfx_v9_0_kiq_pm4_funcs = {
static void gfx_v9_0_set_kiq_pm4_funcs(struct amdgpu_device *adev)
{
- adev->gfx.kiq.pmf = &gfx_v9_0_kiq_pm4_funcs;
+ adev->gfx.kiq[0].pmf = &gfx_v9_0_kiq_pm4_funcs;
}
static void gfx_v9_0_init_golden_registers(struct amdgpu_device *adev)
{
- switch (adev->asic_type) {
- case CHIP_VEGA10:
+ switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
+ case IP_VERSION(9, 0, 1):
soc15_program_register_sequence(adev,
golden_settings_gc_9_0,
ARRAY_SIZE(golden_settings_gc_9_0));
@@ -943,7 +1104,7 @@ static void gfx_v9_0_init_golden_registers(struct amdgpu_device *adev)
golden_settings_gc_9_0_vg10,
ARRAY_SIZE(golden_settings_gc_9_0_vg10));
break;
- case CHIP_VEGA12:
+ case IP_VERSION(9, 2, 1):
soc15_program_register_sequence(adev,
golden_settings_gc_9_2_1,
ARRAY_SIZE(golden_settings_gc_9_2_1));
@@ -951,7 +1112,7 @@ static void gfx_v9_0_init_golden_registers(struct amdgpu_device *adev)
golden_settings_gc_9_2_1_vg12,
ARRAY_SIZE(golden_settings_gc_9_2_1_vg12));
break;
- case CHIP_VEGA20:
+ case IP_VERSION(9, 4, 0):
soc15_program_register_sequence(adev,
golden_settings_gc_9_0,
ARRAY_SIZE(golden_settings_gc_9_0));
@@ -959,12 +1120,13 @@ static void gfx_v9_0_init_golden_registers(struct amdgpu_device *adev)
golden_settings_gc_9_0_vg20,
ARRAY_SIZE(golden_settings_gc_9_0_vg20));
break;
- case CHIP_ARCTURUS:
+ case IP_VERSION(9, 4, 1):
soc15_program_register_sequence(adev,
golden_settings_gc_9_4_1_arct,
ARRAY_SIZE(golden_settings_gc_9_4_1_arct));
break;
- case CHIP_RAVEN:
+ case IP_VERSION(9, 2, 2):
+ case IP_VERSION(9, 1, 0):
soc15_program_register_sequence(adev, golden_settings_gc_9_1,
ARRAY_SIZE(golden_settings_gc_9_1));
if (adev->apu_flags & AMD_APU_IS_RAVEN2)
@@ -976,27 +1138,25 @@ static void gfx_v9_0_init_golden_registers(struct amdgpu_device *adev)
golden_settings_gc_9_1_rv1,
ARRAY_SIZE(golden_settings_gc_9_1_rv1));
break;
- case CHIP_RENOIR:
+ case IP_VERSION(9, 3, 0):
soc15_program_register_sequence(adev,
golden_settings_gc_9_1_rn,
ARRAY_SIZE(golden_settings_gc_9_1_rn));
return; /* for renoir, don't need common goldensetting */
+ case IP_VERSION(9, 4, 2):
+ gfx_v9_4_2_init_golden_registers(adev,
+ adev->smuio.funcs->get_die_id(adev));
+ break;
default:
break;
}
- if (adev->asic_type != CHIP_ARCTURUS)
+ if ((amdgpu_ip_version(adev, GC_HWIP, 0) != IP_VERSION(9, 4, 1)) &&
+ (amdgpu_ip_version(adev, GC_HWIP, 0) != IP_VERSION(9, 4, 2)))
soc15_program_register_sequence(adev, golden_settings_gc_9_x_common,
(const u32)ARRAY_SIZE(golden_settings_gc_9_x_common));
}
-static void gfx_v9_0_scratch_init(struct amdgpu_device *adev)
-{
- adev->gfx.scratch.num_reg = 8;
- adev->gfx.scratch.reg_base = SOC15_REG_OFFSET(GC, 0, mmSCRATCH_REG0);
- adev->gfx.scratch.free_mask = (1u << adev->gfx.scratch.num_reg) - 1;
-}
-
static void gfx_v9_0_write_data_to_reg(struct amdgpu_ring *ring, int eng_sel,
bool wc, uint32_t reg, uint32_t val)
{
@@ -1034,22 +1194,18 @@ static void gfx_v9_0_wait_reg_mem(struct amdgpu_ring *ring, int eng_sel,
static int gfx_v9_0_ring_test_ring(struct amdgpu_ring *ring)
{
struct amdgpu_device *adev = ring->adev;
- uint32_t scratch;
+ uint32_t scratch = SOC15_REG_OFFSET(GC, 0, mmSCRATCH_REG0);
uint32_t tmp = 0;
unsigned i;
int r;
- r = amdgpu_gfx_scratch_get(adev, &scratch);
- if (r)
- return r;
-
WREG32(scratch, 0xCAFEDEAD);
r = amdgpu_ring_alloc(ring, 3);
if (r)
- goto error_free_scratch;
+ return r;
amdgpu_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
- amdgpu_ring_write(ring, (scratch - PACKET3_SET_UCONFIG_REG_START));
+ amdgpu_ring_write(ring, scratch - PACKET3_SET_UCONFIG_REG_START);
amdgpu_ring_write(ring, 0xDEADBEEF);
amdgpu_ring_commit(ring);
@@ -1062,9 +1218,6 @@ static int gfx_v9_0_ring_test_ring(struct amdgpu_ring *ring)
if (i >= adev->usec_timeout)
r = -ETIMEDOUT;
-
-error_free_scratch:
- amdgpu_gfx_scratch_free(adev, scratch);
return r;
}
@@ -1086,8 +1239,8 @@ static int gfx_v9_0_ring_test_ib(struct amdgpu_ring *ring, long timeout)
gpu_addr = adev->wb.gpu_addr + (index * 4);
adev->wb.wb[index] = cpu_to_le32(0xCAFEDEAD);
memset(&ib, 0, sizeof(ib));
- r = amdgpu_ib_get(adev, NULL, 16,
- AMDGPU_IB_POOL_DIRECT, &ib);
+
+ r = amdgpu_ib_get(adev, NULL, 20, AMDGPU_IB_POOL_DIRECT, &ib);
if (r)
goto err1;
@@ -1117,7 +1270,7 @@ static int gfx_v9_0_ring_test_ib(struct amdgpu_ring *ring, long timeout)
r = -EINVAL;
err2:
- amdgpu_ib_free(adev, &ib, NULL);
+ amdgpu_ib_free(&ib, NULL);
dma_fence_put(f);
err1:
amdgpu_device_wb_free(adev, index);
@@ -1127,57 +1280,31 @@ err1:
static void gfx_v9_0_free_microcode(struct amdgpu_device *adev)
{
- release_firmware(adev->gfx.pfp_fw);
- adev->gfx.pfp_fw = NULL;
- release_firmware(adev->gfx.me_fw);
- adev->gfx.me_fw = NULL;
- release_firmware(adev->gfx.ce_fw);
- adev->gfx.ce_fw = NULL;
- release_firmware(adev->gfx.rlc_fw);
- adev->gfx.rlc_fw = NULL;
- release_firmware(adev->gfx.mec_fw);
- adev->gfx.mec_fw = NULL;
- release_firmware(adev->gfx.mec2_fw);
- adev->gfx.mec2_fw = NULL;
+ amdgpu_ucode_release(&adev->gfx.pfp_fw);
+ amdgpu_ucode_release(&adev->gfx.me_fw);
+ amdgpu_ucode_release(&adev->gfx.ce_fw);
+ amdgpu_ucode_release(&adev->gfx.rlc_fw);
+ amdgpu_ucode_release(&adev->gfx.mec_fw);
+ amdgpu_ucode_release(&adev->gfx.mec2_fw);
kfree(adev->gfx.rlc.register_list_format);
}
-static void gfx_v9_0_init_rlc_ext_microcode(struct amdgpu_device *adev)
-{
- const struct rlc_firmware_header_v2_1 *rlc_hdr;
-
- rlc_hdr = (const struct rlc_firmware_header_v2_1 *)adev->gfx.rlc_fw->data;
- adev->gfx.rlc_srlc_fw_version = le32_to_cpu(rlc_hdr->save_restore_list_cntl_ucode_ver);
- adev->gfx.rlc_srlc_feature_version = le32_to_cpu(rlc_hdr->save_restore_list_cntl_feature_ver);
- adev->gfx.rlc.save_restore_list_cntl_size_bytes = le32_to_cpu(rlc_hdr->save_restore_list_cntl_size_bytes);
- adev->gfx.rlc.save_restore_list_cntl = (u8 *)rlc_hdr + le32_to_cpu(rlc_hdr->save_restore_list_cntl_offset_bytes);
- adev->gfx.rlc_srlg_fw_version = le32_to_cpu(rlc_hdr->save_restore_list_gpm_ucode_ver);
- adev->gfx.rlc_srlg_feature_version = le32_to_cpu(rlc_hdr->save_restore_list_gpm_feature_ver);
- adev->gfx.rlc.save_restore_list_gpm_size_bytes = le32_to_cpu(rlc_hdr->save_restore_list_gpm_size_bytes);
- adev->gfx.rlc.save_restore_list_gpm = (u8 *)rlc_hdr + le32_to_cpu(rlc_hdr->save_restore_list_gpm_offset_bytes);
- adev->gfx.rlc_srls_fw_version = le32_to_cpu(rlc_hdr->save_restore_list_srm_ucode_ver);
- adev->gfx.rlc_srls_feature_version = le32_to_cpu(rlc_hdr->save_restore_list_srm_feature_ver);
- adev->gfx.rlc.save_restore_list_srm_size_bytes = le32_to_cpu(rlc_hdr->save_restore_list_srm_size_bytes);
- adev->gfx.rlc.save_restore_list_srm = (u8 *)rlc_hdr + le32_to_cpu(rlc_hdr->save_restore_list_srm_offset_bytes);
- adev->gfx.rlc.reg_list_format_direct_reg_list_length =
- le32_to_cpu(rlc_hdr->reg_list_format_direct_reg_list_length);
-}
-
static void gfx_v9_0_check_fw_write_wait(struct amdgpu_device *adev)
{
adev->gfx.me_fw_write_wait = false;
adev->gfx.mec_fw_write_wait = false;
- if ((adev->asic_type != CHIP_ARCTURUS) &&
+ if ((amdgpu_ip_version(adev, GC_HWIP, 0) != IP_VERSION(9, 4, 1)) &&
+ (amdgpu_ip_version(adev, GC_HWIP, 0) != IP_VERSION(9, 4, 2)) &&
((adev->gfx.mec_fw_version < 0x000001a5) ||
- (adev->gfx.mec_feature_version < 46) ||
- (adev->gfx.pfp_fw_version < 0x000000b7) ||
- (adev->gfx.pfp_feature_version < 46)))
+ (adev->gfx.mec_feature_version < 46) ||
+ (adev->gfx.pfp_fw_version < 0x000000b7) ||
+ (adev->gfx.pfp_feature_version < 46)))
DRM_WARN_ONCE("CP firmware version too old, please update!");
- switch (adev->asic_type) {
- case CHIP_VEGA10:
+ switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
+ case IP_VERSION(9, 0, 1):
if ((adev->gfx.me_fw_version >= 0x0000009c) &&
(adev->gfx.me_feature_version >= 42) &&
(adev->gfx.pfp_fw_version >= 0x000000b1) &&
@@ -1188,7 +1315,7 @@ static void gfx_v9_0_check_fw_write_wait(struct amdgpu_device *adev)
(adev->gfx.mec_feature_version >= 42))
adev->gfx.mec_fw_write_wait = true;
break;
- case CHIP_VEGA12:
+ case IP_VERSION(9, 2, 1):
if ((adev->gfx.me_fw_version >= 0x0000009c) &&
(adev->gfx.me_feature_version >= 44) &&
(adev->gfx.pfp_fw_version >= 0x000000b2) &&
@@ -1199,7 +1326,7 @@ static void gfx_v9_0_check_fw_write_wait(struct amdgpu_device *adev)
(adev->gfx.mec_feature_version >= 44))
adev->gfx.mec_fw_write_wait = true;
break;
- case CHIP_VEGA20:
+ case IP_VERSION(9, 4, 0):
if ((adev->gfx.me_fw_version >= 0x0000009c) &&
(adev->gfx.me_feature_version >= 44) &&
(adev->gfx.pfp_fw_version >= 0x000000b2) &&
@@ -1210,7 +1337,8 @@ static void gfx_v9_0_check_fw_write_wait(struct amdgpu_device *adev)
(adev->gfx.mec_feature_version >= 44))
adev->gfx.mec_fw_write_wait = true;
break;
- case CHIP_RAVEN:
+ case IP_VERSION(9, 1, 0):
+ case IP_VERSION(9, 2, 2):
if ((adev->gfx.me_fw_version >= 0x0000009c) &&
(adev->gfx.me_feature_version >= 42) &&
(adev->gfx.pfp_fw_version >= 0x000000b1) &&
@@ -1243,6 +1371,12 @@ static const struct amdgpu_gfxoff_quirk amdgpu_gfxoff_quirk_list[] = {
{ 0x1002, 0x15dd, 0x103c, 0x83e7, 0xd3 },
/* GFXOFF is unstable on C6 parts with a VBIOS 113-RAVEN-114 */
{ 0x1002, 0x15dd, 0x1002, 0x15dd, 0xc6 },
+ /* Apple MacBook Pro (15-inch, 2019) Radeon Pro Vega 20 4 GB */
+ { 0x1002, 0x69af, 0x106b, 0x019a, 0xc0 },
+ /* https://bbs.openkylin.top/t/topic/171497 */
+ { 0x1002, 0x15d8, 0x19e5, 0x3e14, 0xc2 },
+ /* HP 705G4 DM with R5 2400G */
+ { 0x1002, 0x15dd, 0x103c, 0x8464, 0xd6 },
{ 0, 0, 0, 0, 0 },
};
@@ -1271,17 +1405,28 @@ static bool is_raven_kicker(struct amdgpu_device *adev)
return false;
}
+static bool check_if_enlarge_doorbell_range(struct amdgpu_device *adev)
+{
+ if ((amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 3, 0)) &&
+ (adev->gfx.me_fw_version >= 0x000000a5) &&
+ (adev->gfx.me_feature_version >= 52))
+ return true;
+ else
+ return false;
+}
+
static void gfx_v9_0_check_if_need_gfxoff(struct amdgpu_device *adev)
{
if (gfx_v9_0_should_disable_gfxoff(adev->pdev))
adev->pm.pp_feature &= ~PP_GFXOFF_MASK;
- switch (adev->asic_type) {
- case CHIP_VEGA10:
- case CHIP_VEGA12:
- case CHIP_VEGA20:
+ switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
+ case IP_VERSION(9, 0, 1):
+ case IP_VERSION(9, 2, 1):
+ case IP_VERSION(9, 4, 0):
break;
- case CHIP_RAVEN:
+ case IP_VERSION(9, 2, 2):
+ case IP_VERSION(9, 1, 0):
if (!((adev->apu_flags & AMD_APU_IS_RAVEN2) ||
(adev->apu_flags & AMD_APU_IS_PICASSO)) &&
((!is_raven_kicker(adev) &&
@@ -1295,7 +1440,7 @@ static void gfx_v9_0_check_if_need_gfxoff(struct amdgpu_device *adev)
AMD_PG_SUPPORT_CP |
AMD_PG_SUPPORT_RLC_SMU_HS;
break;
- case CHIP_RENOIR:
+ case IP_VERSION(9, 3, 0):
if (adev->pm.pp_feature & PP_GFXOFF_MASK)
adev->pg_flags |= AMD_PG_SUPPORT_GFX_PG |
AMD_PG_SUPPORT_CP |
@@ -1307,95 +1452,45 @@ static void gfx_v9_0_check_if_need_gfxoff(struct amdgpu_device *adev)
}
static int gfx_v9_0_init_cp_gfx_microcode(struct amdgpu_device *adev,
- const char *chip_name)
+ char *chip_name)
{
- char fw_name[30];
int err;
- struct amdgpu_firmware_info *info = NULL;
- const struct common_firmware_header *header = NULL;
- const struct gfx_firmware_header_v1_0 *cp_hdr;
- snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_pfp.bin", chip_name);
- err = request_firmware(&adev->gfx.pfp_fw, fw_name, adev->dev);
- if (err)
- goto out;
- err = amdgpu_ucode_validate(adev->gfx.pfp_fw);
+ err = amdgpu_ucode_request(adev, &adev->gfx.pfp_fw,
+ AMDGPU_UCODE_REQUIRED,
+ "amdgpu/%s_pfp.bin", chip_name);
if (err)
goto out;
- cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.pfp_fw->data;
- adev->gfx.pfp_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
- adev->gfx.pfp_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
+ amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_PFP);
- snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_me.bin", chip_name);
- err = request_firmware(&adev->gfx.me_fw, fw_name, adev->dev);
+ err = amdgpu_ucode_request(adev, &adev->gfx.me_fw,
+ AMDGPU_UCODE_REQUIRED,
+ "amdgpu/%s_me.bin", chip_name);
if (err)
goto out;
- err = amdgpu_ucode_validate(adev->gfx.me_fw);
- if (err)
- goto out;
- cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.me_fw->data;
- adev->gfx.me_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
- adev->gfx.me_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
+ amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_ME);
- snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_ce.bin", chip_name);
- err = request_firmware(&adev->gfx.ce_fw, fw_name, adev->dev);
+ err = amdgpu_ucode_request(adev, &adev->gfx.ce_fw,
+ AMDGPU_UCODE_REQUIRED,
+ "amdgpu/%s_ce.bin", chip_name);
if (err)
goto out;
- err = amdgpu_ucode_validate(adev->gfx.ce_fw);
- if (err)
- goto out;
- cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.ce_fw->data;
- adev->gfx.ce_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
- adev->gfx.ce_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
-
- if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
- info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_PFP];
- info->ucode_id = AMDGPU_UCODE_ID_CP_PFP;
- info->fw = adev->gfx.pfp_fw;
- header = (const struct common_firmware_header *)info->fw->data;
- adev->firmware.fw_size +=
- ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
-
- info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_ME];
- info->ucode_id = AMDGPU_UCODE_ID_CP_ME;
- info->fw = adev->gfx.me_fw;
- header = (const struct common_firmware_header *)info->fw->data;
- adev->firmware.fw_size +=
- ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
-
- info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_CE];
- info->ucode_id = AMDGPU_UCODE_ID_CP_CE;
- info->fw = adev->gfx.ce_fw;
- header = (const struct common_firmware_header *)info->fw->data;
- adev->firmware.fw_size +=
- ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
- }
+ amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_CE);
out:
if (err) {
- dev_err(adev->dev,
- "gfx9: Failed to load firmware \"%s\"\n",
- fw_name);
- release_firmware(adev->gfx.pfp_fw);
- adev->gfx.pfp_fw = NULL;
- release_firmware(adev->gfx.me_fw);
- adev->gfx.me_fw = NULL;
- release_firmware(adev->gfx.ce_fw);
- adev->gfx.ce_fw = NULL;
+ amdgpu_ucode_release(&adev->gfx.pfp_fw);
+ amdgpu_ucode_release(&adev->gfx.me_fw);
+ amdgpu_ucode_release(&adev->gfx.ce_fw);
}
return err;
}
static int gfx_v9_0_init_rlc_microcode(struct amdgpu_device *adev,
- const char *chip_name)
+ char *chip_name)
{
- char fw_name[30];
int err;
- struct amdgpu_firmware_info *info = NULL;
- const struct common_firmware_header *header = NULL;
const struct rlc_firmware_header_v2_0 *rlc_hdr;
- unsigned int *tmp = NULL;
- unsigned int i = 0;
uint16_t version_major;
uint16_t version_minor;
uint32_t smu_version;
@@ -1411,253 +1506,114 @@ static int gfx_v9_0_init_rlc_microcode(struct amdgpu_device *adev,
if (!strcmp(chip_name, "picasso") &&
(((adev->pdev->revision >= 0xC8) && (adev->pdev->revision <= 0xCF)) ||
((adev->pdev->revision >= 0xD8) && (adev->pdev->revision <= 0xDF))))
- snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_rlc_am4.bin", chip_name);
+ err = amdgpu_ucode_request(adev, &adev->gfx.rlc_fw,
+ AMDGPU_UCODE_REQUIRED,
+ "amdgpu/%s_rlc_am4.bin", chip_name);
else if (!strcmp(chip_name, "raven") && (amdgpu_pm_load_smu_firmware(adev, &smu_version) == 0) &&
(smu_version >= 0x41e2b))
/**
*SMC is loaded by SBIOS on APU and it's able to get the SMU version directly.
*/
- snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_kicker_rlc.bin", chip_name);
+ err = amdgpu_ucode_request(adev, &adev->gfx.rlc_fw,
+ AMDGPU_UCODE_REQUIRED,
+ "amdgpu/%s_kicker_rlc.bin", chip_name);
else
- snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_rlc.bin", chip_name);
- err = request_firmware(&adev->gfx.rlc_fw, fw_name, adev->dev);
+ err = amdgpu_ucode_request(adev, &adev->gfx.rlc_fw,
+ AMDGPU_UCODE_REQUIRED,
+ "amdgpu/%s_rlc.bin", chip_name);
if (err)
goto out;
- err = amdgpu_ucode_validate(adev->gfx.rlc_fw);
- rlc_hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data;
+ rlc_hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data;
version_major = le16_to_cpu(rlc_hdr->header.header_version_major);
version_minor = le16_to_cpu(rlc_hdr->header.header_version_minor);
- if (version_major == 2 && version_minor == 1)
- adev->gfx.rlc.is_rlc_v2_1 = true;
-
- adev->gfx.rlc_fw_version = le32_to_cpu(rlc_hdr->header.ucode_version);
- adev->gfx.rlc_feature_version = le32_to_cpu(rlc_hdr->ucode_feature_version);
- adev->gfx.rlc.save_and_restore_offset =
- le32_to_cpu(rlc_hdr->save_and_restore_offset);
- adev->gfx.rlc.clear_state_descriptor_offset =
- le32_to_cpu(rlc_hdr->clear_state_descriptor_offset);
- adev->gfx.rlc.avail_scratch_ram_locations =
- le32_to_cpu(rlc_hdr->avail_scratch_ram_locations);
- adev->gfx.rlc.reg_restore_list_size =
- le32_to_cpu(rlc_hdr->reg_restore_list_size);
- adev->gfx.rlc.reg_list_format_start =
- le32_to_cpu(rlc_hdr->reg_list_format_start);
- adev->gfx.rlc.reg_list_format_separate_start =
- le32_to_cpu(rlc_hdr->reg_list_format_separate_start);
- adev->gfx.rlc.starting_offsets_start =
- le32_to_cpu(rlc_hdr->starting_offsets_start);
- adev->gfx.rlc.reg_list_format_size_bytes =
- le32_to_cpu(rlc_hdr->reg_list_format_size_bytes);
- adev->gfx.rlc.reg_list_size_bytes =
- le32_to_cpu(rlc_hdr->reg_list_size_bytes);
- adev->gfx.rlc.register_list_format =
- kmalloc(adev->gfx.rlc.reg_list_format_size_bytes +
- adev->gfx.rlc.reg_list_size_bytes, GFP_KERNEL);
- if (!adev->gfx.rlc.register_list_format) {
- err = -ENOMEM;
- goto out;
- }
-
- tmp = (unsigned int *)((uintptr_t)rlc_hdr +
- le32_to_cpu(rlc_hdr->reg_list_format_array_offset_bytes));
- for (i = 0 ; i < (adev->gfx.rlc.reg_list_format_size_bytes >> 2); i++)
- adev->gfx.rlc.register_list_format[i] = le32_to_cpu(tmp[i]);
-
- adev->gfx.rlc.register_restore = adev->gfx.rlc.register_list_format + i;
-
- tmp = (unsigned int *)((uintptr_t)rlc_hdr +
- le32_to_cpu(rlc_hdr->reg_list_array_offset_bytes));
- for (i = 0 ; i < (adev->gfx.rlc.reg_list_size_bytes >> 2); i++)
- adev->gfx.rlc.register_restore[i] = le32_to_cpu(tmp[i]);
-
- if (adev->gfx.rlc.is_rlc_v2_1)
- gfx_v9_0_init_rlc_ext_microcode(adev);
-
- if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
- info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_G];
- info->ucode_id = AMDGPU_UCODE_ID_RLC_G;
- info->fw = adev->gfx.rlc_fw;
- header = (const struct common_firmware_header *)info->fw->data;
- adev->firmware.fw_size +=
- ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
-
- if (adev->gfx.rlc.is_rlc_v2_1 &&
- adev->gfx.rlc.save_restore_list_cntl_size_bytes &&
- adev->gfx.rlc.save_restore_list_gpm_size_bytes &&
- adev->gfx.rlc.save_restore_list_srm_size_bytes) {
- info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_RESTORE_LIST_CNTL];
- info->ucode_id = AMDGPU_UCODE_ID_RLC_RESTORE_LIST_CNTL;
- info->fw = adev->gfx.rlc_fw;
- adev->firmware.fw_size +=
- ALIGN(adev->gfx.rlc.save_restore_list_cntl_size_bytes, PAGE_SIZE);
-
- info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_RESTORE_LIST_GPM_MEM];
- info->ucode_id = AMDGPU_UCODE_ID_RLC_RESTORE_LIST_GPM_MEM;
- info->fw = adev->gfx.rlc_fw;
- adev->firmware.fw_size +=
- ALIGN(adev->gfx.rlc.save_restore_list_gpm_size_bytes, PAGE_SIZE);
-
- info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_RESTORE_LIST_SRM_MEM];
- info->ucode_id = AMDGPU_UCODE_ID_RLC_RESTORE_LIST_SRM_MEM;
- info->fw = adev->gfx.rlc_fw;
- adev->firmware.fw_size +=
- ALIGN(adev->gfx.rlc.save_restore_list_srm_size_bytes, PAGE_SIZE);
- }
- }
-
+ err = amdgpu_gfx_rlc_init_microcode(adev, version_major, version_minor);
out:
- if (err) {
- dev_err(adev->dev,
- "gfx9: Failed to load firmware \"%s\"\n",
- fw_name);
- release_firmware(adev->gfx.rlc_fw);
- adev->gfx.rlc_fw = NULL;
- }
+ if (err)
+ amdgpu_ucode_release(&adev->gfx.rlc_fw);
+
return err;
}
+static bool gfx_v9_0_load_mec2_fw_bin_support(struct amdgpu_device *adev)
+{
+ if (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 2) ||
+ amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 1) ||
+ amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 3, 0))
+ return false;
+
+ return true;
+}
+
static int gfx_v9_0_init_cp_compute_microcode(struct amdgpu_device *adev,
- const char *chip_name)
+ char *chip_name)
{
- char fw_name[30];
int err;
- struct amdgpu_firmware_info *info = NULL;
- const struct common_firmware_header *header = NULL;
- const struct gfx_firmware_header_v1_0 *cp_hdr;
- snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec.bin", chip_name);
- err = request_firmware(&adev->gfx.mec_fw, fw_name, adev->dev);
- if (err)
- goto out;
- err = amdgpu_ucode_validate(adev->gfx.mec_fw);
+ if (amdgpu_sriov_vf(adev) && (adev->asic_type == CHIP_ALDEBARAN))
+ err = amdgpu_ucode_request(adev, &adev->gfx.mec_fw,
+ AMDGPU_UCODE_REQUIRED,
+ "amdgpu/%s_sjt_mec.bin", chip_name);
+ else
+ err = amdgpu_ucode_request(adev, &adev->gfx.mec_fw,
+ AMDGPU_UCODE_REQUIRED,
+ "amdgpu/%s_mec.bin", chip_name);
if (err)
goto out;
- cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
- adev->gfx.mec_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
- adev->gfx.mec_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
+ amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_MEC1);
+ amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_MEC1_JT);
- snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec2.bin", chip_name);
- err = request_firmware(&adev->gfx.mec2_fw, fw_name, adev->dev);
- if (!err) {
- err = amdgpu_ucode_validate(adev->gfx.mec2_fw);
- if (err)
- goto out;
- cp_hdr = (const struct gfx_firmware_header_v1_0 *)
- adev->gfx.mec2_fw->data;
- adev->gfx.mec2_fw_version =
- le32_to_cpu(cp_hdr->header.ucode_version);
- adev->gfx.mec2_feature_version =
- le32_to_cpu(cp_hdr->ucode_feature_version);
- } else {
- err = 0;
- adev->gfx.mec2_fw = NULL;
- }
-
- if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
- info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC1];
- info->ucode_id = AMDGPU_UCODE_ID_CP_MEC1;
- info->fw = adev->gfx.mec_fw;
- header = (const struct common_firmware_header *)info->fw->data;
- cp_hdr = (const struct gfx_firmware_header_v1_0 *)info->fw->data;
- adev->firmware.fw_size +=
- ALIGN(le32_to_cpu(header->ucode_size_bytes) - le32_to_cpu(cp_hdr->jt_size) * 4, PAGE_SIZE);
-
- info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC1_JT];
- info->ucode_id = AMDGPU_UCODE_ID_CP_MEC1_JT;
- info->fw = adev->gfx.mec_fw;
- adev->firmware.fw_size +=
- ALIGN(le32_to_cpu(cp_hdr->jt_size) * 4, PAGE_SIZE);
-
- if (adev->gfx.mec2_fw) {
- info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC2];
- info->ucode_id = AMDGPU_UCODE_ID_CP_MEC2;
- info->fw = adev->gfx.mec2_fw;
- header = (const struct common_firmware_header *)info->fw->data;
- cp_hdr = (const struct gfx_firmware_header_v1_0 *)info->fw->data;
- adev->firmware.fw_size +=
- ALIGN(le32_to_cpu(header->ucode_size_bytes) - le32_to_cpu(cp_hdr->jt_size) * 4, PAGE_SIZE);
-
- /* TODO: Determine if MEC2 JT FW loading can be removed
- for all GFX V9 asic and above */
- if (adev->asic_type != CHIP_ARCTURUS &&
- adev->asic_type != CHIP_RENOIR) {
- info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC2_JT];
- info->ucode_id = AMDGPU_UCODE_ID_CP_MEC2_JT;
- info->fw = adev->gfx.mec2_fw;
- adev->firmware.fw_size +=
- ALIGN(le32_to_cpu(cp_hdr->jt_size) * 4,
- PAGE_SIZE);
- }
+ if (gfx_v9_0_load_mec2_fw_bin_support(adev)) {
+ if (amdgpu_sriov_vf(adev) && (adev->asic_type == CHIP_ALDEBARAN))
+ err = amdgpu_ucode_request(adev, &adev->gfx.mec2_fw,
+ AMDGPU_UCODE_REQUIRED,
+ "amdgpu/%s_sjt_mec2.bin", chip_name);
+ else
+ err = amdgpu_ucode_request(adev, &adev->gfx.mec2_fw,
+ AMDGPU_UCODE_REQUIRED,
+ "amdgpu/%s_mec2.bin", chip_name);
+ if (!err) {
+ amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_MEC2);
+ amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_MEC2_JT);
+ } else {
+ err = 0;
+ amdgpu_ucode_release(&adev->gfx.mec2_fw);
}
+ } else {
+ adev->gfx.mec2_fw_version = adev->gfx.mec_fw_version;
+ adev->gfx.mec2_feature_version = adev->gfx.mec_feature_version;
}
-out:
gfx_v9_0_check_if_need_gfxoff(adev);
gfx_v9_0_check_fw_write_wait(adev);
- if (err) {
- dev_err(adev->dev,
- "gfx9: Failed to load firmware \"%s\"\n",
- fw_name);
- release_firmware(adev->gfx.mec_fw);
- adev->gfx.mec_fw = NULL;
- release_firmware(adev->gfx.mec2_fw);
- adev->gfx.mec2_fw = NULL;
- }
+
+out:
+ if (err)
+ amdgpu_ucode_release(&adev->gfx.mec_fw);
return err;
}
static int gfx_v9_0_init_microcode(struct amdgpu_device *adev)
{
- const char *chip_name;
+ char ucode_prefix[30];
int r;
DRM_DEBUG("\n");
-
- switch (adev->asic_type) {
- case CHIP_VEGA10:
- chip_name = "vega10";
- break;
- case CHIP_VEGA12:
- chip_name = "vega12";
- break;
- case CHIP_VEGA20:
- chip_name = "vega20";
- break;
- case CHIP_RAVEN:
- if (adev->apu_flags & AMD_APU_IS_RAVEN2)
- chip_name = "raven2";
- else if (adev->apu_flags & AMD_APU_IS_PICASSO)
- chip_name = "picasso";
- else
- chip_name = "raven";
- break;
- case CHIP_ARCTURUS:
- chip_name = "arcturus";
- break;
- case CHIP_RENOIR:
- if (adev->apu_flags & AMD_APU_IS_RENOIR)
- chip_name = "renoir";
- else
- chip_name = "green_sardine";
- break;
- default:
- BUG();
- }
+ amdgpu_ucode_ip_version_decode(adev, GC_HWIP, ucode_prefix, sizeof(ucode_prefix));
/* No CPG in Arcturus */
if (adev->gfx.num_gfx_rings) {
- r = gfx_v9_0_init_cp_gfx_microcode(adev, chip_name);
+ r = gfx_v9_0_init_cp_gfx_microcode(adev, ucode_prefix);
if (r)
return r;
}
- r = gfx_v9_0_init_rlc_microcode(adev, chip_name);
+ r = gfx_v9_0_init_rlc_microcode(adev, ucode_prefix);
if (r)
return r;
- r = gfx_v9_0_init_cp_compute_microcode(adev, chip_name);
+ r = gfx_v9_0_init_cp_compute_microcode(adev, ucode_prefix);
if (r)
return r;
@@ -1692,45 +1648,18 @@ static u32 gfx_v9_0_get_csb_size(struct amdgpu_device *adev)
return count;
}
-static void gfx_v9_0_get_csb_buffer(struct amdgpu_device *adev,
- volatile u32 *buffer)
+static void gfx_v9_0_get_csb_buffer(struct amdgpu_device *adev, u32 *buffer)
{
- u32 count = 0, i;
- const struct cs_section_def *sect = NULL;
- const struct cs_extent_def *ext = NULL;
+ u32 count = 0;
if (adev->gfx.rlc.cs_data == NULL)
return;
if (buffer == NULL)
return;
- buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
- buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
-
- buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CONTEXT_CONTROL, 1));
- buffer[count++] = cpu_to_le32(0x80000000);
- buffer[count++] = cpu_to_le32(0x80000000);
-
- for (sect = adev->gfx.rlc.cs_data; sect->section != NULL; ++sect) {
- for (ext = sect->section; ext->extent != NULL; ++ext) {
- if (sect->id == SECT_CONTEXT) {
- buffer[count++] =
- cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, ext->reg_count));
- buffer[count++] = cpu_to_le32(ext->reg_index -
- PACKET3_SET_CONTEXT_REG_START);
- for (i = 0; i < ext->reg_count; i++)
- buffer[count++] = cpu_to_le32(ext->extent[i]);
- } else {
- return;
- }
- }
- }
-
- buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
- buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_END_CLEAR_STATE);
-
- buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CLEAR_STATE, 0));
- buffer[count++] = cpu_to_le32(0);
+ count = amdgpu_gfx_csb_preamble_start(buffer);
+ count = amdgpu_gfx_csb_data_parser(adev, buffer, count);
+ amdgpu_gfx_csb_preamble_end(buffer, count);
}
static void gfx_v9_0_init_always_on_cu_mask(struct amdgpu_device *adev)
@@ -1743,7 +1672,7 @@ static void gfx_v9_0_init_always_on_cu_mask(struct amdgpu_device *adev)
if (adev->flags & AMD_IS_APU)
always_on_cu_num = 4;
- else if (adev->asic_type == CHIP_VEGA12)
+ else if (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 2, 1))
always_on_cu_num = 8;
else
always_on_cu_num = 12;
@@ -1754,10 +1683,10 @@ static void gfx_v9_0_init_always_on_cu_mask(struct amdgpu_device *adev)
mask = 1;
cu_bitmap = 0;
counter = 0;
- gfx_v9_0_select_se_sh(adev, i, j, 0xffffffff);
+ amdgpu_gfx_select_se_sh(adev, i, j, 0xffffffff, 0);
for (k = 0; k < adev->gfx.config.max_cu_per_sh; k ++) {
- if (cu_info->bitmap[i][j] & mask) {
+ if (cu_info->bitmap[0][i][j] & mask) {
if (counter == pg_always_on_cu_num)
WREG32_SOC15(GC, 0, mmRLC_PG_ALWAYS_ON_CU_MASK, cu_bitmap);
if (counter < always_on_cu_num)
@@ -1773,7 +1702,7 @@ static void gfx_v9_0_init_always_on_cu_mask(struct amdgpu_device *adev)
cu_info->ao_cu_bitmap[i][j] = cu_bitmap;
}
}
- gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
+ amdgpu_gfx_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, 0);
mutex_unlock(&adev->grbm_idx_mutex);
}
@@ -1795,7 +1724,7 @@ static void gfx_v9_0_init_lbpw(struct amdgpu_device *adev)
mutex_lock(&adev->grbm_idx_mutex);
/* set mmRLC_LB_INIT_CU_MASK thru broadcast mode to enable all SE/SH*/
- gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
+ amdgpu_gfx_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, 0);
WREG32_SOC15(GC, 0, mmRLC_LB_INIT_CU_MASK, 0xffffffff);
/* set mmRLC_LB_PARAMS = 0x003F_1006 */
@@ -1844,7 +1773,7 @@ static void gfx_v9_4_init_lbpw(struct amdgpu_device *adev)
mutex_lock(&adev->grbm_idx_mutex);
/* set mmRLC_LB_INIT_CU_MASK thru broadcast mode to enable all SE/SH*/
- gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
+ amdgpu_gfx_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, 0);
WREG32_SOC15(GC, 0, mmRLC_LB_INIT_CU_MASK, 0xffffffff);
/* set mmRLC_LB_PARAMS = 0x003F_1006 */
@@ -1882,7 +1811,25 @@ static void gfx_v9_0_enable_lbpw(struct amdgpu_device *adev, bool enable)
static int gfx_v9_0_cp_jump_table_num(struct amdgpu_device *adev)
{
- return 5;
+ if (gfx_v9_0_load_mec2_fw_bin_support(adev))
+ return 5;
+ else
+ return 4;
+}
+
+static void gfx_v9_0_init_rlcg_reg_access_ctrl(struct amdgpu_device *adev)
+{
+ struct amdgpu_rlcg_reg_access_ctrl *reg_access_ctrl;
+
+ reg_access_ctrl = &adev->gfx.rlc.reg_access_ctrl[0];
+ reg_access_ctrl->scratch_reg0 = SOC15_REG_OFFSET(GC, 0, mmSCRATCH_REG0);
+ reg_access_ctrl->scratch_reg1 = SOC15_REG_OFFSET(GC, 0, mmSCRATCH_REG1);
+ reg_access_ctrl->scratch_reg2 = SOC15_REG_OFFSET(GC, 0, mmSCRATCH_REG2);
+ reg_access_ctrl->scratch_reg3 = SOC15_REG_OFFSET(GC, 0, mmSCRATCH_REG3);
+ reg_access_ctrl->grbm_cntl = SOC15_REG_OFFSET(GC, 0, mmGRBM_GFX_CNTL);
+ reg_access_ctrl->grbm_idx = SOC15_REG_OFFSET(GC, 0, mmGRBM_GFX_INDEX);
+ reg_access_ctrl->spare_int = SOC15_REG_OFFSET(GC, 0, mmRLC_SPARE_INT);
+ adev->gfx.rlc.rlcg_reg_access_supported = true;
}
static int gfx_v9_0_rlc_init(struct amdgpu_device *adev)
@@ -1909,21 +1856,6 @@ static int gfx_v9_0_rlc_init(struct amdgpu_device *adev)
return r;
}
- switch (adev->asic_type) {
- case CHIP_RAVEN:
- gfx_v9_0_init_lbpw(adev);
- break;
- case CHIP_VEGA20:
- gfx_v9_4_init_lbpw(adev);
- break;
- default:
- break;
- }
-
- /* init spm vmid with 0xf */
- if (adev->gfx.rlc.funcs->update_spm_vmid)
- adev->gfx.rlc.funcs->update_spm_vmid(adev, 0xf);
-
return 0;
}
@@ -1944,14 +1876,15 @@ static int gfx_v9_0_mec_init(struct amdgpu_device *adev)
const struct gfx_firmware_header_v1_0 *mec_hdr;
- bitmap_zero(adev->gfx.mec.queue_bitmap, AMDGPU_MAX_COMPUTE_QUEUES);
+ bitmap_zero(adev->gfx.mec_bitmap[0].queue_bitmap, AMDGPU_MAX_COMPUTE_QUEUES);
/* take ownership of the relevant compute queues */
amdgpu_gfx_compute_queue_acquire(adev);
mec_hpd_size = adev->gfx.num_compute_rings * GFX9_MEC_HPD_SIZE;
if (mec_hpd_size) {
r = amdgpu_bo_create_reserved(adev, mec_hpd_size, PAGE_SIZE,
- AMDGPU_GEM_DOMAIN_VRAM,
+ AMDGPU_GEM_DOMAIN_VRAM |
+ AMDGPU_GEM_DOMAIN_GTT,
&adev->gfx.mec.hpd_eop_obj,
&adev->gfx.mec.hpd_eop_gpu_addr,
(void **)&hpd);
@@ -2018,7 +1951,7 @@ static void wave_read_regs(struct amdgpu_device *adev, uint32_t simd,
*(out++) = RREG32_SOC15(GC, 0, mmSQ_IND_DATA);
}
-static void gfx_v9_0_read_wave_data(struct amdgpu_device *adev, uint32_t simd, uint32_t wave, uint32_t *dst, int *no_fields)
+static void gfx_v9_0_read_wave_data(struct amdgpu_device *adev, uint32_t xcc_id, uint32_t simd, uint32_t wave, uint32_t *dst, int *no_fields)
{
/* type 1 wave data */
dst[(*no_fields)++] = 1;
@@ -2036,9 +1969,10 @@ static void gfx_v9_0_read_wave_data(struct amdgpu_device *adev, uint32_t simd, u
dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_IB_STS);
dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_IB_DBG0);
dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_M0);
+ dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_MODE);
}
-static void gfx_v9_0_read_wave_sgprs(struct amdgpu_device *adev, uint32_t simd,
+static void gfx_v9_0_read_wave_sgprs(struct amdgpu_device *adev, uint32_t xcc_id, uint32_t simd,
uint32_t wave, uint32_t start,
uint32_t size, uint32_t *dst)
{
@@ -2047,7 +1981,7 @@ static void gfx_v9_0_read_wave_sgprs(struct amdgpu_device *adev, uint32_t simd,
start + SQIND_WAVE_SGPRS_OFFSET, size, dst);
}
-static void gfx_v9_0_read_wave_vgprs(struct amdgpu_device *adev, uint32_t simd,
+static void gfx_v9_0_read_wave_vgprs(struct amdgpu_device *adev, uint32_t xcc_id, uint32_t simd,
uint32_t wave, uint32_t thread,
uint32_t start, uint32_t size,
uint32_t *dst)
@@ -2058,34 +1992,30 @@ static void gfx_v9_0_read_wave_vgprs(struct amdgpu_device *adev, uint32_t simd,
}
static void gfx_v9_0_select_me_pipe_q(struct amdgpu_device *adev,
- u32 me, u32 pipe, u32 q, u32 vm)
+ u32 me, u32 pipe, u32 q, u32 vm, u32 xcc_id)
{
- soc15_grbm_select(adev, me, pipe, q, vm);
+ soc15_grbm_select(adev, me, pipe, q, vm, 0);
}
static const struct amdgpu_gfx_funcs gfx_v9_0_gfx_funcs = {
- .get_gpu_clock_counter = &gfx_v9_0_get_gpu_clock_counter,
- .select_se_sh = &gfx_v9_0_select_se_sh,
- .read_wave_data = &gfx_v9_0_read_wave_data,
- .read_wave_sgprs = &gfx_v9_0_read_wave_sgprs,
- .read_wave_vgprs = &gfx_v9_0_read_wave_vgprs,
- .select_me_pipe_q = &gfx_v9_0_select_me_pipe_q,
- .ras_error_inject = &gfx_v9_0_ras_error_inject,
- .query_ras_error_count = &gfx_v9_0_query_ras_error_count,
- .reset_ras_error_count = &gfx_v9_0_reset_ras_error_count,
+ .get_gpu_clock_counter = &gfx_v9_0_get_gpu_clock_counter,
+ .select_se_sh = &gfx_v9_0_select_se_sh,
+ .read_wave_data = &gfx_v9_0_read_wave_data,
+ .read_wave_sgprs = &gfx_v9_0_read_wave_sgprs,
+ .read_wave_vgprs = &gfx_v9_0_read_wave_vgprs,
+ .select_me_pipe_q = &gfx_v9_0_select_me_pipe_q,
+};
+
+const struct amdgpu_ras_block_hw_ops gfx_v9_0_ras_ops = {
+ .ras_error_inject = &gfx_v9_0_ras_error_inject,
+ .query_ras_error_count = &gfx_v9_0_query_ras_error_count,
+ .reset_ras_error_count = &gfx_v9_0_reset_ras_error_count,
};
-static const struct amdgpu_gfx_funcs gfx_v9_4_gfx_funcs = {
- .get_gpu_clock_counter = &gfx_v9_0_get_gpu_clock_counter,
- .select_se_sh = &gfx_v9_0_select_se_sh,
- .read_wave_data = &gfx_v9_0_read_wave_data,
- .read_wave_sgprs = &gfx_v9_0_read_wave_sgprs,
- .read_wave_vgprs = &gfx_v9_0_read_wave_vgprs,
- .select_me_pipe_q = &gfx_v9_0_select_me_pipe_q,
- .ras_error_inject = &gfx_v9_4_ras_error_inject,
- .query_ras_error_count = &gfx_v9_4_query_ras_error_count,
- .reset_ras_error_count = &gfx_v9_4_reset_ras_error_count,
- .query_ras_error_status = &gfx_v9_4_query_ras_error_status,
+static struct amdgpu_gfx_ras gfx_v9_0_ras = {
+ .ras_block = {
+ .hw_ops = &gfx_v9_0_ras_ops,
+ },
};
static int gfx_v9_0_gpu_early_init(struct amdgpu_device *adev)
@@ -2093,10 +2023,8 @@ static int gfx_v9_0_gpu_early_init(struct amdgpu_device *adev)
u32 gb_addr_config;
int err;
- adev->gfx.funcs = &gfx_v9_0_gfx_funcs;
-
- switch (adev->asic_type) {
- case CHIP_VEGA10:
+ switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
+ case IP_VERSION(9, 0, 1):
adev->gfx.config.max_hw_contexts = 8;
adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
@@ -2104,7 +2032,7 @@ static int gfx_v9_0_gpu_early_init(struct amdgpu_device *adev)
adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0;
gb_addr_config = VEGA10_GB_ADDR_CONFIG_GOLDEN;
break;
- case CHIP_VEGA12:
+ case IP_VERSION(9, 2, 1):
adev->gfx.config.max_hw_contexts = 8;
adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
@@ -2113,7 +2041,8 @@ static int gfx_v9_0_gpu_early_init(struct amdgpu_device *adev)
gb_addr_config = VEGA12_GB_ADDR_CONFIG_GOLDEN;
DRM_INFO("fix gfx.config for vega12\n");
break;
- case CHIP_VEGA20:
+ case IP_VERSION(9, 4, 0):
+ adev->gfx.ras = &gfx_v9_0_ras;
adev->gfx.config.max_hw_contexts = 8;
adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
@@ -2127,7 +2056,8 @@ static int gfx_v9_0_gpu_early_init(struct amdgpu_device *adev)
if (err)
return err;
break;
- case CHIP_RAVEN:
+ case IP_VERSION(9, 2, 2):
+ case IP_VERSION(9, 1, 0):
adev->gfx.config.max_hw_contexts = 8;
adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
@@ -2138,8 +2068,8 @@ static int gfx_v9_0_gpu_early_init(struct amdgpu_device *adev)
else
gb_addr_config = RAVEN_GB_ADDR_CONFIG_GOLDEN;
break;
- case CHIP_ARCTURUS:
- adev->gfx.funcs = &gfx_v9_4_gfx_funcs;
+ case IP_VERSION(9, 4, 1):
+ adev->gfx.ras = &gfx_v9_4_ras;
adev->gfx.config.max_hw_contexts = 8;
adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
@@ -2149,7 +2079,7 @@ static int gfx_v9_0_gpu_early_init(struct amdgpu_device *adev)
gb_addr_config &= ~0xf3e777ff;
gb_addr_config |= 0x22014042;
break;
- case CHIP_RENOIR:
+ case IP_VERSION(9, 3, 0):
adev->gfx.config.max_hw_contexts = 8;
adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
@@ -2159,6 +2089,21 @@ static int gfx_v9_0_gpu_early_init(struct amdgpu_device *adev)
gb_addr_config &= ~0xf3e777ff;
gb_addr_config |= 0x22010042;
break;
+ case IP_VERSION(9, 4, 2):
+ adev->gfx.ras = &gfx_v9_4_2_ras;
+ adev->gfx.config.max_hw_contexts = 8;
+ adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
+ adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
+ adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
+ adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0;
+ gb_addr_config = RREG32_SOC15(GC, 0, mmGB_ADDR_CONFIG);
+ gb_addr_config &= ~0xf3e777ff;
+ gb_addr_config |= 0x22014042;
+ /* check vbios table if gpu info is not available */
+ err = amdgpu_atomfirmware_get_gfx_info(adev);
+ if (err)
+ return err;
+ break;
default:
BUG();
break;
@@ -2223,32 +2168,64 @@ static int gfx_v9_0_compute_ring_init(struct amdgpu_device *adev, int ring_id,
ring->doorbell_index = (adev->doorbell_index.mec_ring0 + ring_id) << 1;
ring->eop_gpu_addr = adev->gfx.mec.hpd_eop_gpu_addr
+ (ring_id * GFX9_MEC_HPD_SIZE);
+ ring->vm_hub = AMDGPU_GFXHUB(0);
sprintf(ring->name, "comp_%d.%d.%d", ring->me, ring->pipe, ring->queue);
irq_type = AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP
+ ((ring->me - 1) * adev->gfx.mec.num_pipe_per_mec)
+ ring->pipe;
hw_prio = amdgpu_gfx_is_high_priority_compute_queue(adev, ring) ?
- AMDGPU_GFX_PIPE_PRIO_HIGH : AMDGPU_GFX_PIPE_PRIO_NORMAL;
+ AMDGPU_RING_PRIO_2 : AMDGPU_RING_PRIO_DEFAULT;
/* type-2 packets are deprecated on MEC, use type-3 instead */
- return amdgpu_ring_init(adev, ring, 1024,
- &adev->gfx.eop_irq, irq_type, hw_prio);
+ return amdgpu_ring_init(adev, ring, 1024, &adev->gfx.eop_irq, irq_type,
+ hw_prio, NULL);
+}
+
+static void gfx_v9_0_alloc_ip_dump(struct amdgpu_device *adev)
+{
+ uint32_t reg_count = ARRAY_SIZE(gc_reg_list_9);
+ uint32_t *ptr;
+ uint32_t inst;
+
+ ptr = kcalloc(reg_count, sizeof(uint32_t), GFP_KERNEL);
+ if (!ptr) {
+ DRM_ERROR("Failed to allocate memory for GFX IP Dump\n");
+ adev->gfx.ip_dump_core = NULL;
+ } else {
+ adev->gfx.ip_dump_core = ptr;
+ }
+
+ /* Allocate memory for compute queue registers for all the instances */
+ reg_count = ARRAY_SIZE(gc_cp_reg_list_9);
+ inst = adev->gfx.mec.num_mec * adev->gfx.mec.num_pipe_per_mec *
+ adev->gfx.mec.num_queue_per_pipe;
+
+ ptr = kcalloc(reg_count * inst, sizeof(uint32_t), GFP_KERNEL);
+ if (!ptr) {
+ DRM_ERROR("Failed to allocate memory for Compute Queues IP Dump\n");
+ adev->gfx.ip_dump_compute_queues = NULL;
+ } else {
+ adev->gfx.ip_dump_compute_queues = ptr;
+ }
}
-static int gfx_v9_0_sw_init(void *handle)
+static int gfx_v9_0_sw_init(struct amdgpu_ip_block *ip_block)
{
int i, j, k, r, ring_id;
+ int xcc_id = 0;
struct amdgpu_ring *ring;
- struct amdgpu_kiq *kiq;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
-
- switch (adev->asic_type) {
- case CHIP_VEGA10:
- case CHIP_VEGA12:
- case CHIP_VEGA20:
- case CHIP_RAVEN:
- case CHIP_ARCTURUS:
- case CHIP_RENOIR:
+ struct amdgpu_device *adev = ip_block->adev;
+ unsigned int hw_prio;
+
+ switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
+ case IP_VERSION(9, 0, 1):
+ case IP_VERSION(9, 2, 1):
+ case IP_VERSION(9, 4, 0):
+ case IP_VERSION(9, 2, 2):
+ case IP_VERSION(9, 1, 0):
+ case IP_VERSION(9, 4, 1):
+ case IP_VERSION(9, 3, 0):
+ case IP_VERSION(9, 4, 2):
adev->gfx.mec.num_mec = 2;
break;
default:
@@ -2256,6 +2233,43 @@ static int gfx_v9_0_sw_init(void *handle)
break;
}
+ switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
+ case IP_VERSION(9, 0, 1):
+ case IP_VERSION(9, 2, 1):
+ case IP_VERSION(9, 4, 0):
+ case IP_VERSION(9, 2, 2):
+ case IP_VERSION(9, 1, 0):
+ case IP_VERSION(9, 3, 0):
+ adev->gfx.cleaner_shader_ptr = gfx_9_4_2_cleaner_shader_hex;
+ adev->gfx.cleaner_shader_size = sizeof(gfx_9_4_2_cleaner_shader_hex);
+ if (adev->gfx.me_fw_version >= 167 &&
+ adev->gfx.pfp_fw_version >= 196 &&
+ adev->gfx.mec_fw_version >= 474) {
+ adev->gfx.enable_cleaner_shader = true;
+ r = amdgpu_gfx_cleaner_shader_sw_init(adev, adev->gfx.cleaner_shader_size);
+ if (r) {
+ adev->gfx.enable_cleaner_shader = false;
+ dev_err(adev->dev, "Failed to initialize cleaner shader\n");
+ }
+ }
+ break;
+ case IP_VERSION(9, 4, 2):
+ adev->gfx.cleaner_shader_ptr = gfx_9_4_2_cleaner_shader_hex;
+ adev->gfx.cleaner_shader_size = sizeof(gfx_9_4_2_cleaner_shader_hex);
+ if (adev->gfx.mec_fw_version >= 88) {
+ adev->gfx.enable_cleaner_shader = true;
+ r = amdgpu_gfx_cleaner_shader_sw_init(adev, adev->gfx.cleaner_shader_size);
+ if (r) {
+ adev->gfx.enable_cleaner_shader = false;
+ dev_err(adev->dev, "Failed to initialize cleaner shader\n");
+ }
+ }
+ break;
+ default:
+ adev->gfx.enable_cleaner_shader = false;
+ break;
+ }
+
adev->gfx.mec.num_pipe_per_mec = 4;
adev->gfx.mec.num_queue_per_pipe = 8;
@@ -2264,6 +2278,13 @@ static int gfx_v9_0_sw_init(void *handle)
if (r)
return r;
+ /* Bad opcode Event */
+ r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP,
+ GFX_9_0__SRCID__CP_BAD_OPCODE_ERROR,
+ &adev->gfx.bad_op_irq);
+ if (r)
+ return r;
+
/* Privileged reg */
r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, GFX_9_0__SRCID__CP_PRIV_REG_FAULT,
&adev->gfx.priv_reg_irq);
@@ -2290,18 +2311,14 @@ static int gfx_v9_0_sw_init(void *handle)
adev->gfx.gfx_current_status = AMDGPU_GFX_NORMAL_MODE;
- gfx_v9_0_scratch_init(adev);
-
- r = gfx_v9_0_init_microcode(adev);
- if (r) {
- DRM_ERROR("Failed to load gfx firmware!\n");
- return r;
- }
-
- r = adev->gfx.rlc.funcs->init(adev);
- if (r) {
- DRM_ERROR("Failed to init rlc BOs!\n");
- return r;
+ if (adev->gfx.rlc.funcs) {
+ if (adev->gfx.rlc.funcs->init) {
+ r = adev->gfx.rlc.funcs->init(adev);
+ if (r) {
+ dev_err(adev->dev, "Failed to init rlc BOs!\n");
+ return r;
+ }
+ }
}
r = gfx_v9_0_mec_init(adev);
@@ -2320,20 +2337,60 @@ static int gfx_v9_0_sw_init(void *handle)
sprintf(ring->name, "gfx_%d", i);
ring->use_doorbell = true;
ring->doorbell_index = adev->doorbell_index.gfx_ring0 << 1;
- r = amdgpu_ring_init(adev, ring, 1024,
- &adev->gfx.eop_irq,
+
+ /* disable scheduler on the real ring */
+ ring->no_scheduler = adev->gfx.mcbp;
+ ring->vm_hub = AMDGPU_GFXHUB(0);
+ r = amdgpu_ring_init(adev, ring, 1024, &adev->gfx.eop_irq,
AMDGPU_CP_IRQ_GFX_ME0_PIPE0_EOP,
- AMDGPU_RING_PRIO_DEFAULT);
+ AMDGPU_RING_PRIO_DEFAULT, NULL);
if (r)
return r;
}
+ /* set up the software rings */
+ if (adev->gfx.mcbp && adev->gfx.num_gfx_rings) {
+ for (i = 0; i < GFX9_NUM_SW_GFX_RINGS; i++) {
+ ring = &adev->gfx.sw_gfx_ring[i];
+ ring->ring_obj = NULL;
+ sprintf(ring->name, amdgpu_sw_ring_name(i));
+ ring->use_doorbell = true;
+ ring->doorbell_index = adev->doorbell_index.gfx_ring0 << 1;
+ ring->is_sw_ring = true;
+ hw_prio = amdgpu_sw_ring_priority(i);
+ ring->vm_hub = AMDGPU_GFXHUB(0);
+ r = amdgpu_ring_init(adev, ring, 1024, &adev->gfx.eop_irq,
+ AMDGPU_CP_IRQ_GFX_ME0_PIPE0_EOP, hw_prio,
+ NULL);
+ if (r)
+ return r;
+ ring->wptr = 0;
+ }
+
+ /* init the muxer and add software rings */
+ r = amdgpu_ring_mux_init(&adev->gfx.muxer, &adev->gfx.gfx_ring[0],
+ GFX9_NUM_SW_GFX_RINGS);
+ if (r) {
+ DRM_ERROR("amdgpu_ring_mux_init failed(%d)\n", r);
+ return r;
+ }
+ for (i = 0; i < GFX9_NUM_SW_GFX_RINGS; i++) {
+ r = amdgpu_ring_mux_add_sw_ring(&adev->gfx.muxer,
+ &adev->gfx.sw_gfx_ring[i]);
+ if (r) {
+ DRM_ERROR("amdgpu_ring_mux_add_sw_ring failed(%d)\n", r);
+ return r;
+ }
+ }
+ }
+
/* set up the compute queues - allocate horizontally across pipes */
ring_id = 0;
for (i = 0; i < adev->gfx.mec.num_mec; ++i) {
for (j = 0; j < adev->gfx.mec.num_queue_per_pipe; j++) {
for (k = 0; k < adev->gfx.mec.num_pipe_per_mec; k++) {
- if (!amdgpu_gfx_is_mec_queue_enabled(adev, i, k, j))
+ if (!amdgpu_gfx_is_mec_queue_enabled(adev, 0, i,
+ k, j))
continue;
r = gfx_v9_0_compute_ring_init(adev,
@@ -2347,19 +2404,26 @@ static int gfx_v9_0_sw_init(void *handle)
}
}
- r = amdgpu_gfx_kiq_init(adev, GFX9_MEC_HPD_SIZE);
+ /* TODO: Add queue reset mask when FW fully supports it */
+ adev->gfx.gfx_supported_reset =
+ amdgpu_get_soft_full_reset_mask(&adev->gfx.gfx_ring[0]);
+ adev->gfx.compute_supported_reset =
+ amdgpu_get_soft_full_reset_mask(&adev->gfx.compute_ring[0]);
+ if (!amdgpu_sriov_vf(adev) && !adev->debug_disable_gpu_ring_reset)
+ adev->gfx.compute_supported_reset |= AMDGPU_RESET_TYPE_PER_QUEUE;
+
+ r = amdgpu_gfx_kiq_init(adev, GFX9_MEC_HPD_SIZE, 0);
if (r) {
DRM_ERROR("Failed to init KIQ BOs!\n");
return r;
}
- kiq = &adev->gfx.kiq;
- r = amdgpu_gfx_kiq_init_ring(adev, &kiq->ring, &kiq->irq);
+ r = amdgpu_gfx_kiq_init_ring(adev, xcc_id);
if (r)
return r;
/* create MQD for all compute queues as wel as KIQ for SRIOV case */
- r = amdgpu_gfx_mqd_sw_init(adev, sizeof(struct v9_mqd_allocation));
+ r = amdgpu_gfx_mqd_sw_init(adev, sizeof(struct v9_mqd_allocation), 0);
if (r)
return r;
@@ -2369,28 +2433,47 @@ static int gfx_v9_0_sw_init(void *handle)
if (r)
return r;
+ if (amdgpu_gfx_ras_sw_init(adev)) {
+ dev_err(adev->dev, "Failed to initialize gfx ras block!\n");
+ return -EINVAL;
+ }
+
+ gfx_v9_0_alloc_ip_dump(adev);
+
+ r = amdgpu_gfx_sysfs_init(adev);
+ if (r)
+ return r;
+
return 0;
}
-static int gfx_v9_0_sw_fini(void *handle)
+static int gfx_v9_0_sw_fini(struct amdgpu_ip_block *ip_block)
{
int i;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
- amdgpu_gfx_ras_fini(adev);
+ if (adev->gfx.mcbp && adev->gfx.num_gfx_rings) {
+ for (i = 0; i < GFX9_NUM_SW_GFX_RINGS; i++)
+ amdgpu_ring_fini(&adev->gfx.sw_gfx_ring[i]);
+ amdgpu_ring_mux_fini(&adev->gfx.muxer);
+ }
for (i = 0; i < adev->gfx.num_gfx_rings; i++)
amdgpu_ring_fini(&adev->gfx.gfx_ring[i]);
for (i = 0; i < adev->gfx.num_compute_rings; i++)
amdgpu_ring_fini(&adev->gfx.compute_ring[i]);
- amdgpu_gfx_mqd_sw_fini(adev);
- amdgpu_gfx_kiq_free_ring(&adev->gfx.kiq.ring);
- amdgpu_gfx_kiq_fini(adev);
+ amdgpu_gfx_mqd_sw_fini(adev, 0);
+ amdgpu_gfx_kiq_free_ring(&adev->gfx.kiq[0].ring);
+ amdgpu_gfx_kiq_fini(adev, 0);
+
+ amdgpu_gfx_cleaner_shader_sw_fini(adev);
gfx_v9_0_mec_fini(adev);
- amdgpu_bo_unref(&adev->gfx.rlc.clear_state_obj);
+ amdgpu_bo_free_kernel(&adev->gfx.rlc.clear_state_obj,
+ &adev->gfx.rlc.clear_state_gpu_addr,
+ (void **)&adev->gfx.rlc.cs_ptr);
if (adev->flags & AMD_IS_APU) {
amdgpu_bo_free_kernel(&adev->gfx.rlc.cp_table_obj,
&adev->gfx.rlc.cp_table_gpu_addr,
@@ -2398,6 +2481,11 @@ static int gfx_v9_0_sw_fini(void *handle)
}
gfx_v9_0_free_microcode(adev);
+ amdgpu_gfx_sysfs_fini(adev);
+
+ kfree(adev->gfx.ip_dump_core);
+ kfree(adev->gfx.ip_dump_compute_queues);
+
return 0;
}
@@ -2408,7 +2496,7 @@ static void gfx_v9_0_tiling_mode_table_init(struct amdgpu_device *adev)
}
void gfx_v9_0_select_se_sh(struct amdgpu_device *adev, u32 se_num, u32 sh_num,
- u32 instance)
+ u32 instance, int xcc_id)
{
u32 data;
@@ -2457,19 +2545,42 @@ static void gfx_v9_0_setup_rb(struct amdgpu_device *adev)
mutex_lock(&adev->grbm_idx_mutex);
for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
- gfx_v9_0_select_se_sh(adev, i, j, 0xffffffff);
+ amdgpu_gfx_select_se_sh(adev, i, j, 0xffffffff, 0);
data = gfx_v9_0_get_rb_active_bitmap(adev);
active_rbs |= data << ((i * adev->gfx.config.max_sh_per_se + j) *
rb_bitmap_width_per_sh);
}
}
- gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
+ amdgpu_gfx_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, 0);
mutex_unlock(&adev->grbm_idx_mutex);
adev->gfx.config.backend_enable_mask = active_rbs;
adev->gfx.config.num_rbs = hweight32(active_rbs);
}
+static void gfx_v9_0_debug_trap_config_init(struct amdgpu_device *adev,
+ uint32_t first_vmid,
+ uint32_t last_vmid)
+{
+ uint32_t data;
+ uint32_t trap_config_vmid_mask = 0;
+ int i;
+
+ /* Calculate trap config vmid mask */
+ for (i = first_vmid; i < last_vmid; i++)
+ trap_config_vmid_mask |= (1 << i);
+
+ data = REG_SET_FIELD(0, SPI_GDBG_TRAP_CONFIG,
+ VMID_SEL, trap_config_vmid_mask);
+ data = REG_SET_FIELD(data, SPI_GDBG_TRAP_CONFIG,
+ TRAP_EN, 1);
+ WREG32(SOC15_REG_OFFSET(GC, 0, mmSPI_GDBG_TRAP_CONFIG), data);
+ WREG32(SOC15_REG_OFFSET(GC, 0, mmSPI_GDBG_TRAP_MASK), 0);
+
+ WREG32(SOC15_REG_OFFSET(GC, 0, mmSPI_GDBG_TRAP_DATA0), 0);
+ WREG32(SOC15_REG_OFFSET(GC, 0, mmSPI_GDBG_TRAP_DATA1), 0);
+}
+
#define DEFAULT_SH_MEM_BASES (0x6000)
static void gfx_v9_0_init_compute_vmid(struct amdgpu_device *adev)
{
@@ -2491,16 +2602,16 @@ static void gfx_v9_0_init_compute_vmid(struct amdgpu_device *adev)
mutex_lock(&adev->srbm_mutex);
for (i = adev->vm_manager.first_kfd_vmid; i < AMDGPU_NUM_VMID; i++) {
- soc15_grbm_select(adev, 0, 0, 0, i);
+ soc15_grbm_select(adev, 0, 0, 0, i, 0);
/* CP and shaders */
WREG32_SOC15_RLC(GC, 0, mmSH_MEM_CONFIG, sh_mem_config);
WREG32_SOC15_RLC(GC, 0, mmSH_MEM_BASES, sh_mem_bases);
}
- soc15_grbm_select(adev, 0, 0, 0, 0);
+ soc15_grbm_select(adev, 0, 0, 0, 0, 0);
mutex_unlock(&adev->srbm_mutex);
/* Initialize all compute VMIDs to have no GDS, GWS, or OA
- acccess. These should be enabled by FW for target VMIDs. */
+ access. These should be enabled by FW for target VMIDs. */
for (i = adev->vm_manager.first_kfd_vmid; i < AMDGPU_NUM_VMID; i++) {
WREG32_SOC15_OFFSET(GC, 0, mmGDS_VMID0_BASE, 2 * i, 0);
WREG32_SOC15_OFFSET(GC, 0, mmGDS_VMID0_SIZE, 2 * i, 0);
@@ -2531,13 +2642,16 @@ static void gfx_v9_0_init_sq_config(struct amdgpu_device *adev)
{
uint32_t tmp;
- switch (adev->asic_type) {
- case CHIP_ARCTURUS:
+ switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
+ case IP_VERSION(9, 4, 1):
tmp = RREG32_SOC15(GC, 0, mmSQ_CONFIG);
- tmp = REG_SET_FIELD(tmp, SQ_CONFIG,
- DISABLE_BARRIER_WAITCNT, 1);
+ tmp = REG_SET_FIELD(tmp, SQ_CONFIG, DISABLE_BARRIER_WAITCNT,
+ !READ_ONCE(adev->barrier_has_auto_waitcnt));
WREG32_SOC15(GC, 0, mmSQ_CONFIG, tmp);
break;
+ case IP_VERSION(9, 4, 2):
+ gfx_v9_4_2_init_sq(adev);
+ break;
default:
break;
}
@@ -2548,19 +2662,23 @@ static void gfx_v9_0_constants_init(struct amdgpu_device *adev)
u32 tmp;
int i;
- WREG32_FIELD15_RLC(GC, 0, GRBM_CNTL, READ_TIMEOUT, 0xff);
+ if (!amdgpu_sriov_vf(adev) ||
+ amdgpu_ip_version(adev, GC_HWIP, 0) != IP_VERSION(9, 4, 2)) {
+ WREG32_FIELD15_RLC(GC, 0, GRBM_CNTL, READ_TIMEOUT, 0xff);
+ }
gfx_v9_0_tiling_mode_table_init(adev);
- gfx_v9_0_setup_rb(adev);
+ if (adev->gfx.num_gfx_rings)
+ gfx_v9_0_setup_rb(adev);
gfx_v9_0_get_cu_info(adev, &adev->gfx.cu_info);
adev->gfx.config.db_debug2 = RREG32_SOC15(GC, 0, mmDB_DEBUG2);
/* XXX SH_MEM regs */
/* where to put LDS, scratch, GPUVM in FSA64 space */
mutex_lock(&adev->srbm_mutex);
- for (i = 0; i < adev->vm_manager.id_mgr[AMDGPU_GFXHUB_0].num_ids; i++) {
- soc15_grbm_select(adev, 0, 0, 0, i);
+ for (i = 0; i < adev->vm_manager.id_mgr[AMDGPU_GFXHUB(0)].num_ids; i++) {
+ soc15_grbm_select(adev, 0, 0, 0, i, 0);
/* CP and shaders */
if (i == 0) {
tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, ALIGNMENT_MODE,
@@ -2582,7 +2700,7 @@ static void gfx_v9_0_constants_init(struct amdgpu_device *adev)
WREG32_SOC15_RLC(GC, 0, mmSH_MEM_BASES, tmp);
}
}
- soc15_grbm_select(adev, 0, 0, 0, 0);
+ soc15_grbm_select(adev, 0, 0, 0, 0, 0);
mutex_unlock(&adev->srbm_mutex);
@@ -2599,15 +2717,15 @@ static void gfx_v9_0_wait_for_rlc_serdes(struct amdgpu_device *adev)
mutex_lock(&adev->grbm_idx_mutex);
for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
- gfx_v9_0_select_se_sh(adev, i, j, 0xffffffff);
+ amdgpu_gfx_select_se_sh(adev, i, j, 0xffffffff, 0);
for (k = 0; k < adev->usec_timeout; k++) {
if (RREG32_SOC15(GC, 0, mmRLC_SERDES_CU_MASTER_BUSY) == 0)
break;
udelay(1);
}
if (k == adev->usec_timeout) {
- gfx_v9_0_select_se_sh(adev, 0xffffffff,
- 0xffffffff, 0xffffffff);
+ amdgpu_gfx_select_se_sh(adev, 0xffffffff,
+ 0xffffffff, 0xffffffff, 0);
mutex_unlock(&adev->grbm_idx_mutex);
DRM_INFO("Timeout wait for RLC serdes %u,%u\n",
i, j);
@@ -2615,7 +2733,7 @@ static void gfx_v9_0_wait_for_rlc_serdes(struct amdgpu_device *adev)
}
}
}
- gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
+ amdgpu_gfx_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, 0);
mutex_unlock(&adev->grbm_idx_mutex);
mask = RLC_SERDES_NONCU_MASTER_BUSY__SE_MASTER_BUSY_MASK |
@@ -2634,17 +2752,15 @@ static void gfx_v9_0_enable_gui_idle_interrupt(struct amdgpu_device *adev,
{
u32 tmp;
- /* don't toggle interrupts that are only applicable
- * to me0 pipe0 on AISCs that have me0 removed */
- if (!adev->gfx.num_gfx_rings)
- return;
+ /* These interrupts should be enabled to drive DS clock */
tmp= RREG32_SOC15(GC, 0, mmCP_INT_CNTL_RING0);
tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_BUSY_INT_ENABLE, enable ? 1 : 0);
tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_EMPTY_INT_ENABLE, enable ? 1 : 0);
tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CMP_BUSY_INT_ENABLE, enable ? 1 : 0);
- tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, GFX_IDLE_INT_ENABLE, enable ? 1 : 0);
+ if (adev->gfx.num_gfx_rings)
+ tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, GFX_IDLE_INT_ENABLE, enable ? 1 : 0);
WREG32_SOC15(GC, 0, mmCP_INT_CNTL_RING0, tmp);
}
@@ -2869,7 +2985,7 @@ static void gfx_v9_0_init_gfx_power_gating(struct amdgpu_device *adev)
/* program GRBM_REG_SAVE_GFX_IDLE_THRESHOLD to 0x55f0 */
data |= (0x55f0 << RLC_AUTO_PG_CTRL__GRBM_REG_SAVE_GFX_IDLE_THRESHOLD__SHIFT);
WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_AUTO_PG_CTRL), data);
- if (adev->asic_type != CHIP_RENOIR)
+ if (amdgpu_ip_version(adev, GC_HWIP, 0) != IP_VERSION(9, 3, 0))
pwr_10_0_gfxip_control_over_cgpg(adev, true);
}
}
@@ -2981,7 +3097,8 @@ static void gfx_v9_0_init_pg(struct amdgpu_device *adev)
* And it's needed by gfxoff feature.
*/
if (adev->gfx.rlc.is_rlc_v2_1) {
- if (adev->asic_type == CHIP_VEGA12 ||
+ if (amdgpu_ip_version(adev, GC_HWIP, 0) ==
+ IP_VERSION(9, 2, 1) ||
(adev->apu_flags & AMD_APU_IS_RAVEN2))
gfx_v9_1_init_rlc_save_restore_list(adev);
gfx_v9_0_enable_save_restore_machine(adev);
@@ -2993,8 +3110,8 @@ static void gfx_v9_0_init_pg(struct amdgpu_device *adev)
AMD_PG_SUPPORT_CP |
AMD_PG_SUPPORT_GDS |
AMD_PG_SUPPORT_RLC_SMU_HS)) {
- WREG32(mmRLC_JUMP_TABLE_RESTORE,
- adev->gfx.rlc.cp_table_gpu_addr >> 8);
+ WREG32_SOC15(GC, 0, mmRLC_JUMP_TABLE_RESTORE,
+ adev->gfx.rlc.cp_table_gpu_addr >> 8);
gfx_v9_0_init_gfx_power_gating(adev);
}
}
@@ -3094,14 +3211,17 @@ static int gfx_v9_0_rlc_resume(struct amdgpu_device *adev)
return r;
}
- switch (adev->asic_type) {
- case CHIP_RAVEN:
+ switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
+ case IP_VERSION(9, 2, 2):
+ case IP_VERSION(9, 1, 0):
+ gfx_v9_0_init_lbpw(adev);
if (amdgpu_lbpw == 0)
gfx_v9_0_enable_lbpw(adev, false);
else
gfx_v9_0_enable_lbpw(adev, true);
break;
- case CHIP_VEGA20:
+ case IP_VERSION(9, 4, 0):
+ gfx_v9_4_init_lbpw(adev);
if (amdgpu_lbpw > 0)
gfx_v9_0_enable_lbpw(adev, true);
else
@@ -3111,6 +3231,8 @@ static int gfx_v9_0_rlc_resume(struct amdgpu_device *adev)
break;
}
+ gfx_v9_0_update_spm_vmid_internal(adev, 0xf);
+
adev->gfx.rlc.funcs->start(adev);
return 0;
@@ -3120,6 +3242,15 @@ static void gfx_v9_0_cp_gfx_enable(struct amdgpu_device *adev, bool enable)
{
u32 tmp = RREG32_SOC15(GC, 0, mmCP_ME_CNTL);
+ tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, CE_INVALIDATE_ICACHE, enable ? 0 : 1);
+ tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_INVALIDATE_ICACHE, enable ? 0 : 1);
+ tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_INVALIDATE_ICACHE, enable ? 0 : 1);
+ tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, CE_PIPE0_RESET, enable ? 0 : 1);
+ tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, CE_PIPE1_RESET, enable ? 0 : 1);
+ tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_PIPE0_RESET, enable ? 0 : 1);
+ tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_PIPE1_RESET, enable ? 0 : 1);
+ tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_PIPE0_RESET, enable ? 0 : 1);
+ tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_PIPE1_RESET, enable ? 0 : 1);
tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_HALT, enable ? 0 : 1);
tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_HALT, enable ? 0 : 1);
tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, CE_HALT, enable ? 0 : 1);
@@ -3197,6 +3328,14 @@ static int gfx_v9_0_cp_gfx_start(struct amdgpu_device *adev)
gfx_v9_0_cp_gfx_enable(adev, true);
+ /* Now only limit the quirk on the APU gfx9 series and already
+ * confirmed that the APU gfx10/gfx11 needn't such update.
+ */
+ if (adev->flags & AMD_IS_APU &&
+ adev->in_s3 && !pm_resume_via_firmware()) {
+ DRM_INFO("Will skip the CSB packet resubmit\n");
+ return 0;
+ }
r = amdgpu_ring_alloc(ring, gfx_v9_0_get_csb_size(adev) + 4 + 3);
if (r) {
DRM_ERROR("amdgpu: cp failed to lock ring (%d).\n", r);
@@ -3274,12 +3413,12 @@ static int gfx_v9_0_cp_gfx_resume(struct amdgpu_device *adev)
WREG32_SOC15(GC, 0, mmCP_RB0_WPTR, lower_32_bits(ring->wptr));
WREG32_SOC15(GC, 0, mmCP_RB0_WPTR_HI, upper_32_bits(ring->wptr));
- /* set the wb address wether it's enabled or not */
- rptr_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4);
+ /* set the wb address whether it's enabled or not */
+ rptr_addr = ring->rptr_gpu_addr;
WREG32_SOC15(GC, 0, mmCP_RB0_RPTR_ADDR, lower_32_bits(rptr_addr));
WREG32_SOC15(GC, 0, mmCP_RB0_RPTR_ADDR_HI, upper_32_bits(rptr_addr) & CP_RB_RPTR_ADDR_HI__RB_RPTR_ADDR_HI_MASK);
- wptr_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
+ wptr_gpu_addr = ring->wptr_gpu_addr;
WREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_ADDR_LO, lower_32_bits(wptr_gpu_addr));
WREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_ADDR_HI, upper_32_bits(wptr_gpu_addr));
@@ -3311,7 +3450,6 @@ static int gfx_v9_0_cp_gfx_resume(struct amdgpu_device *adev)
/* start the ring */
gfx_v9_0_cp_gfx_start(adev);
- ring->sched.ready = true;
return 0;
}
@@ -3322,8 +3460,16 @@ static void gfx_v9_0_cp_compute_enable(struct amdgpu_device *adev, bool enable)
WREG32_SOC15_RLC(GC, 0, mmCP_MEC_CNTL, 0);
} else {
WREG32_SOC15_RLC(GC, 0, mmCP_MEC_CNTL,
- (CP_MEC_CNTL__MEC_ME1_HALT_MASK | CP_MEC_CNTL__MEC_ME2_HALT_MASK));
- adev->gfx.kiq.ring.sched.ready = false;
+ (CP_MEC_CNTL__MEC_INVALIDATE_ICACHE_MASK |
+ CP_MEC_CNTL__MEC_ME1_PIPE0_RESET_MASK |
+ CP_MEC_CNTL__MEC_ME1_PIPE1_RESET_MASK |
+ CP_MEC_CNTL__MEC_ME1_PIPE2_RESET_MASK |
+ CP_MEC_CNTL__MEC_ME1_PIPE3_RESET_MASK |
+ CP_MEC_CNTL__MEC_ME2_PIPE0_RESET_MASK |
+ CP_MEC_CNTL__MEC_ME2_PIPE1_RESET_MASK |
+ CP_MEC_CNTL__MEC_ME1_HALT_MASK |
+ CP_MEC_CNTL__MEC_ME2_HALT_MASK));
+ adev->gfx.kiq[0].ring.sched.ready = false;
}
udelay(50);
}
@@ -3380,9 +3526,7 @@ static void gfx_v9_0_kiq_setting(struct amdgpu_ring *ring)
tmp = RREG32_SOC15(GC, 0, mmRLC_CP_SCHEDULERS);
tmp &= 0xffffff00;
tmp |= (ring->me << 5) | (ring->pipe << 3) | (ring->queue);
- WREG32_SOC15_RLC(GC, 0, mmRLC_CP_SCHEDULERS, tmp);
- tmp |= 0x80;
- WREG32_SOC15_RLC(GC, 0, mmRLC_CP_SCHEDULERS, tmp);
+ WREG32_SOC15_RLC(GC, 0, mmRLC_CP_SCHEDULERS, tmp | 0x80);
}
static void gfx_v9_0_mqd_set_priority(struct amdgpu_ring *ring, struct v9_mqd *mqd)
@@ -3480,7 +3624,7 @@ static int gfx_v9_0_mqd_init(struct amdgpu_ring *ring)
tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, QUEUE_SIZE,
(order_base_2(ring->ring_size / 4) - 1));
tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, RPTR_BLOCK_SIZE,
- ((order_base_2(AMDGPU_GPU_PAGE_SIZE / 4) - 1) << 8));
+ (order_base_2(AMDGPU_GPU_PAGE_SIZE / 4) - 1));
#ifdef __BIG_ENDIAN
tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ENDIAN_SWAP, 1);
#endif
@@ -3491,33 +3635,16 @@ static int gfx_v9_0_mqd_init(struct amdgpu_ring *ring)
mqd->cp_hqd_pq_control = tmp;
/* set the wb address whether it's enabled or not */
- wb_gpu_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4);
+ wb_gpu_addr = ring->rptr_gpu_addr;
mqd->cp_hqd_pq_rptr_report_addr_lo = wb_gpu_addr & 0xfffffffc;
mqd->cp_hqd_pq_rptr_report_addr_hi =
upper_32_bits(wb_gpu_addr) & 0xffff;
/* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */
- wb_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
+ wb_gpu_addr = ring->wptr_gpu_addr;
mqd->cp_hqd_pq_wptr_poll_addr_lo = wb_gpu_addr & 0xfffffffc;
mqd->cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff;
- tmp = 0;
- /* enable the doorbell if requested */
- if (ring->use_doorbell) {
- tmp = RREG32_SOC15(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL);
- tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
- DOORBELL_OFFSET, ring->doorbell_index);
-
- tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
- DOORBELL_EN, 1);
- tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
- DOORBELL_SOURCE, 0);
- tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
- DOORBELL_HIT, 0);
- }
-
- mqd->cp_hqd_pq_doorbell_control = tmp;
-
/* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */
ring->wptr = 0;
mqd->cp_hqd_pq_rptr = RREG32_SOC15(GC, 0, mmCP_HQD_PQ_RPTR);
@@ -3536,7 +3663,7 @@ static int gfx_v9_0_mqd_init(struct amdgpu_ring *ring)
/* set static priority for a queue/ring */
gfx_v9_0_mqd_set_priority(ring, mqd);
- mqd->cp_hqd_quantum = RREG32(mmCP_HQD_QUANTUM);
+ mqd->cp_hqd_quantum = RREG32_SOC15(GC, 0, mmCP_HQD_QUANTUM);
/* map_queues packet doesn't need activate the queue,
* so only kiq need set this field.
@@ -3623,7 +3750,16 @@ static int gfx_v9_0_kiq_init_register(struct amdgpu_ring *ring)
if (ring->use_doorbell) {
WREG32_SOC15(GC, 0, mmCP_MEC_DOORBELL_RANGE_LOWER,
(adev->doorbell_index.kiq * 2) << 2);
- WREG32_SOC15(GC, 0, mmCP_MEC_DOORBELL_RANGE_UPPER,
+ /* If GC has entered CGPG, ringing doorbell > first page
+ * doesn't wakeup GC. Enlarge CP_MEC_DOORBELL_RANGE_UPPER to
+ * workaround this issue. And this change has to align with firmware
+ * update.
+ */
+ if (check_if_enlarge_doorbell_range(adev))
+ WREG32_SOC15(GC, 0, mmCP_MEC_DOORBELL_RANGE_UPPER,
+ (adev->doorbell.size - 4));
+ else
+ WREG32_SOC15(GC, 0, mmCP_MEC_DOORBELL_RANGE_UPPER,
(adev->doorbell_index.userqueue_end * 2) << 2);
}
@@ -3695,70 +3831,82 @@ static int gfx_v9_0_kiq_init_queue(struct amdgpu_ring *ring)
{
struct amdgpu_device *adev = ring->adev;
struct v9_mqd *mqd = ring->mqd_ptr;
- int mqd_idx = AMDGPU_MAX_COMPUTE_RINGS;
+ struct v9_mqd *tmp_mqd;
gfx_v9_0_kiq_setting(ring);
- if (amdgpu_in_reset(adev)) { /* for GPU_RESET case */
- /* reset MQD to a clean status */
- if (adev->gfx.mec.mqd_backup[mqd_idx])
- memcpy(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(struct v9_mqd_allocation));
+ /* GPU could be in bad state during probe, driver trigger the reset
+ * after load the SMU, in this case , the mqd is not be initialized.
+ * driver need to re-init the mqd.
+ * check mqd->cp_hqd_pq_control since this value should not be 0
+ */
+ tmp_mqd = (struct v9_mqd *)adev->gfx.kiq[0].mqd_backup;
+ if (amdgpu_in_reset(adev) && tmp_mqd->cp_hqd_pq_control){
+ /* for GPU_RESET case , reset MQD to a clean status */
+ if (adev->gfx.kiq[0].mqd_backup)
+ memcpy(mqd, adev->gfx.kiq[0].mqd_backup, sizeof(struct v9_mqd_allocation));
/* reset ring buffer */
ring->wptr = 0;
amdgpu_ring_clear_ring(ring);
mutex_lock(&adev->srbm_mutex);
- soc15_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
+ soc15_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0, 0);
gfx_v9_0_kiq_init_register(ring);
- soc15_grbm_select(adev, 0, 0, 0, 0);
+ soc15_grbm_select(adev, 0, 0, 0, 0, 0);
mutex_unlock(&adev->srbm_mutex);
} else {
memset((void *)mqd, 0, sizeof(struct v9_mqd_allocation));
((struct v9_mqd_allocation *)mqd)->dynamic_cu_mask = 0xFFFFFFFF;
((struct v9_mqd_allocation *)mqd)->dynamic_rb_mask = 0xFFFFFFFF;
+ if (amdgpu_sriov_vf(adev) && adev->in_suspend)
+ amdgpu_ring_clear_ring(ring);
mutex_lock(&adev->srbm_mutex);
- soc15_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
+ soc15_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0, 0);
gfx_v9_0_mqd_init(ring);
gfx_v9_0_kiq_init_register(ring);
- soc15_grbm_select(adev, 0, 0, 0, 0);
+ soc15_grbm_select(adev, 0, 0, 0, 0, 0);
mutex_unlock(&adev->srbm_mutex);
- if (adev->gfx.mec.mqd_backup[mqd_idx])
- memcpy(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(struct v9_mqd_allocation));
+ if (adev->gfx.kiq[0].mqd_backup)
+ memcpy(adev->gfx.kiq[0].mqd_backup, mqd, sizeof(struct v9_mqd_allocation));
}
return 0;
}
-static int gfx_v9_0_kcq_init_queue(struct amdgpu_ring *ring)
+static int gfx_v9_0_kcq_init_queue(struct amdgpu_ring *ring, bool restore)
{
struct amdgpu_device *adev = ring->adev;
struct v9_mqd *mqd = ring->mqd_ptr;
int mqd_idx = ring - &adev->gfx.compute_ring[0];
+ struct v9_mqd *tmp_mqd;
- if (!amdgpu_in_reset(adev) && !adev->in_suspend) {
+ /* Same as above kiq init, driver need to re-init the mqd if mqd->cp_hqd_pq_control
+ * is not be initialized before
+ */
+ tmp_mqd = (struct v9_mqd *)adev->gfx.mec.mqd_backup[mqd_idx];
+
+ if (!restore && (!tmp_mqd->cp_hqd_pq_control ||
+ (!amdgpu_in_reset(adev) && !adev->in_suspend))) {
memset((void *)mqd, 0, sizeof(struct v9_mqd_allocation));
((struct v9_mqd_allocation *)mqd)->dynamic_cu_mask = 0xFFFFFFFF;
((struct v9_mqd_allocation *)mqd)->dynamic_rb_mask = 0xFFFFFFFF;
mutex_lock(&adev->srbm_mutex);
- soc15_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
+ soc15_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0, 0);
gfx_v9_0_mqd_init(ring);
- soc15_grbm_select(adev, 0, 0, 0, 0);
+ soc15_grbm_select(adev, 0, 0, 0, 0, 0);
mutex_unlock(&adev->srbm_mutex);
if (adev->gfx.mec.mqd_backup[mqd_idx])
memcpy(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(struct v9_mqd_allocation));
- } else if (amdgpu_in_reset(adev)) { /* for GPU_RESET case */
- /* reset MQD to a clean status */
+ } else {
+ /* restore MQD to a clean status */
if (adev->gfx.mec.mqd_backup[mqd_idx])
memcpy(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(struct v9_mqd_allocation));
-
/* reset ring buffer */
ring->wptr = 0;
- atomic64_set((atomic64_t *)&adev->wb.wb[ring->wptr_offs], 0);
- amdgpu_ring_clear_ring(ring);
- } else {
+ atomic64_set((atomic64_t *)ring->wptr_cpu_addr, 0);
amdgpu_ring_clear_ring(ring);
}
@@ -3767,54 +3915,23 @@ static int gfx_v9_0_kcq_init_queue(struct amdgpu_ring *ring)
static int gfx_v9_0_kiq_resume(struct amdgpu_device *adev)
{
- struct amdgpu_ring *ring;
- int r;
-
- ring = &adev->gfx.kiq.ring;
-
- r = amdgpu_bo_reserve(ring->mqd_obj, false);
- if (unlikely(r != 0))
- return r;
-
- r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&ring->mqd_ptr);
- if (unlikely(r != 0))
- return r;
-
- gfx_v9_0_kiq_init_queue(ring);
- amdgpu_bo_kunmap(ring->mqd_obj);
- ring->mqd_ptr = NULL;
- amdgpu_bo_unreserve(ring->mqd_obj);
- ring->sched.ready = true;
+ gfx_v9_0_kiq_init_queue(&adev->gfx.kiq[0].ring);
return 0;
}
static int gfx_v9_0_kcq_resume(struct amdgpu_device *adev)
{
- struct amdgpu_ring *ring = NULL;
- int r = 0, i;
+ int i, r;
gfx_v9_0_cp_compute_enable(adev, true);
for (i = 0; i < adev->gfx.num_compute_rings; i++) {
- ring = &adev->gfx.compute_ring[i];
-
- r = amdgpu_bo_reserve(ring->mqd_obj, false);
- if (unlikely(r != 0))
- goto done;
- r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&ring->mqd_ptr);
- if (!r) {
- r = gfx_v9_0_kcq_init_queue(ring);
- amdgpu_bo_kunmap(ring->mqd_obj);
- ring->mqd_ptr = NULL;
- }
- amdgpu_bo_unreserve(ring->mqd_obj);
+ r = gfx_v9_0_kcq_init_queue(&adev->gfx.compute_ring[i], false);
if (r)
- goto done;
+ return r;
}
- r = amdgpu_gfx_enable_kcq(adev);
-done:
- return r;
+ return amdgpu_gfx_enable_kcq(adev, 0);
}
static int gfx_v9_0_cp_resume(struct amdgpu_device *adev)
@@ -3838,6 +3955,10 @@ static int gfx_v9_0_cp_resume(struct amdgpu_device *adev)
return r;
}
+ if (adev->gfx.num_gfx_rings)
+ gfx_v9_0_cp_gfx_enable(adev, false);
+ gfx_v9_0_cp_compute_enable(adev, false);
+
r = gfx_v9_0_kiq_resume(adev);
if (r)
return r;
@@ -3873,7 +3994,8 @@ static void gfx_v9_0_init_tcp_config(struct amdgpu_device *adev)
{
u32 tmp;
- if (adev->asic_type != CHIP_ARCTURUS)
+ if (amdgpu_ip_version(adev, GC_HWIP, 0) != IP_VERSION(9, 4, 1) &&
+ amdgpu_ip_version(adev, GC_HWIP, 0) != IP_VERSION(9, 4, 2))
return;
tmp = RREG32_SOC15(GC, 0, mmTCP_ADDR_CONFIG);
@@ -3893,10 +4015,13 @@ static void gfx_v9_0_cp_enable(struct amdgpu_device *adev, bool enable)
gfx_v9_0_cp_compute_enable(adev, enable);
}
-static int gfx_v9_0_hw_init(void *handle)
+static int gfx_v9_0_hw_init(struct amdgpu_ip_block *ip_block)
{
int r;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
+
+ amdgpu_gfx_cleaner_shader_init(adev, adev->gfx.cleaner_shader_size,
+ adev->gfx.cleaner_shader_ptr);
if (!amdgpu_sriov_vf(adev))
gfx_v9_0_init_golden_registers(adev);
@@ -3913,21 +4038,27 @@ static int gfx_v9_0_hw_init(void *handle)
if (r)
return r;
+ if (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 2) &&
+ !amdgpu_sriov_vf(adev))
+ gfx_v9_4_2_set_power_brake_sequence(adev);
+
return r;
}
-static int gfx_v9_0_hw_fini(void *handle)
+static int gfx_v9_0_hw_fini(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
- amdgpu_irq_put(adev, &adev->gfx.cp_ecc_error_irq, 0);
+ if (amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX))
+ amdgpu_irq_put(adev, &adev->gfx.cp_ecc_error_irq, 0);
amdgpu_irq_put(adev, &adev->gfx.priv_reg_irq, 0);
amdgpu_irq_put(adev, &adev->gfx.priv_inst_irq, 0);
+ amdgpu_irq_put(adev, &adev->gfx.bad_op_irq, 0);
/* DF freeze and kcq disable will fail */
if (!amdgpu_ras_intr_triggered())
/* disable KCQ to avoid CPC touch memory not valid anymore */
- amdgpu_gfx_disable_kcq(adev);
+ amdgpu_gfx_disable_kcq(adev, 0);
if (amdgpu_sriov_vf(adev)) {
gfx_v9_0_cp_gfx_enable(adev, false);
@@ -3945,33 +4076,40 @@ static int gfx_v9_0_hw_fini(void *handle)
*/
if (!amdgpu_in_reset(adev) && !adev->in_suspend) {
mutex_lock(&adev->srbm_mutex);
- soc15_grbm_select(adev, adev->gfx.kiq.ring.me,
- adev->gfx.kiq.ring.pipe,
- adev->gfx.kiq.ring.queue, 0);
- gfx_v9_0_kiq_fini_register(&adev->gfx.kiq.ring);
- soc15_grbm_select(adev, 0, 0, 0, 0);
+ soc15_grbm_select(adev, adev->gfx.kiq[0].ring.me,
+ adev->gfx.kiq[0].ring.pipe,
+ adev->gfx.kiq[0].ring.queue, 0, 0);
+ gfx_v9_0_kiq_fini_register(&adev->gfx.kiq[0].ring);
+ soc15_grbm_select(adev, 0, 0, 0, 0, 0);
mutex_unlock(&adev->srbm_mutex);
}
gfx_v9_0_cp_enable(adev, false);
- adev->gfx.rlc.funcs->stop(adev);
+ /* Skip stopping RLC with A+A reset or when RLC controls GFX clock */
+ if ((adev->gmc.xgmi.connected_to_cpu && amdgpu_in_reset(adev)) ||
+ (amdgpu_ip_version(adev, GC_HWIP, 0) >= IP_VERSION(9, 4, 2))) {
+ dev_dbg(adev->dev, "Skipping RLC halt\n");
+ return 0;
+ }
+
+ adev->gfx.rlc.funcs->stop(adev);
return 0;
}
-static int gfx_v9_0_suspend(void *handle)
+static int gfx_v9_0_suspend(struct amdgpu_ip_block *ip_block)
{
- return gfx_v9_0_hw_fini(handle);
+ return gfx_v9_0_hw_fini(ip_block);
}
-static int gfx_v9_0_resume(void *handle)
+static int gfx_v9_0_resume(struct amdgpu_ip_block *ip_block)
{
- return gfx_v9_0_hw_init(handle);
+ return gfx_v9_0_hw_init(ip_block);
}
-static bool gfx_v9_0_is_idle(void *handle)
+static bool gfx_v9_0_is_idle(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
if (REG_GET_FIELD(RREG32_SOC15(GC, 0, mmGRBM_STATUS),
GRBM_STATUS, GUI_ACTIVE))
@@ -3980,24 +4118,24 @@ static bool gfx_v9_0_is_idle(void *handle)
return true;
}
-static int gfx_v9_0_wait_for_idle(void *handle)
+static int gfx_v9_0_wait_for_idle(struct amdgpu_ip_block *ip_block)
{
unsigned i;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
for (i = 0; i < adev->usec_timeout; i++) {
- if (gfx_v9_0_is_idle(handle))
+ if (gfx_v9_0_is_idle(ip_block))
return 0;
udelay(1);
}
return -ETIMEDOUT;
}
-static int gfx_v9_0_soft_reset(void *handle)
+static int gfx_v9_0_soft_reset(struct amdgpu_ip_block *ip_block)
{
u32 grbm_soft_reset = 0;
u32 tmp;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
/* GRBM_STATUS */
tmp = RREG32_SOC15(GC, 0, mmGRBM_STATUS);
@@ -4036,19 +4174,17 @@ static int gfx_v9_0_soft_reset(void *handle)
/* Disable MEC parsing/prefetching */
gfx_v9_0_cp_compute_enable(adev, false);
- if (grbm_soft_reset) {
- tmp = RREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET);
- tmp |= grbm_soft_reset;
- dev_info(adev->dev, "GRBM_SOFT_RESET=0x%08X\n", tmp);
- WREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET, tmp);
- tmp = RREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET);
+ tmp = RREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET);
+ tmp |= grbm_soft_reset;
+ dev_info(adev->dev, "GRBM_SOFT_RESET=0x%08X\n", tmp);
+ WREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET, tmp);
+ tmp = RREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET);
- udelay(50);
+ udelay(50);
- tmp &= ~grbm_soft_reset;
- WREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET, tmp);
- tmp = RREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET);
- }
+ tmp &= ~grbm_soft_reset;
+ WREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET, tmp);
+ tmp = RREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET);
/* Wait a little for things to settle down */
udelay(50);
@@ -4062,7 +4198,7 @@ static uint64_t gfx_v9_0_kiq_read_clock(struct amdgpu_device *adev)
unsigned long flags;
uint32_t seq, reg_val_offs = 0;
uint64_t value = 0;
- struct amdgpu_kiq *kiq = &adev->gfx.kiq;
+ struct amdgpu_kiq *kiq = &adev->gfx.kiq[0];
struct amdgpu_ring *ring = &kiq->ring;
BUG_ON(!ring->funcs->emit_rreg);
@@ -4101,7 +4237,7 @@ static uint64_t gfx_v9_0_kiq_read_clock(struct amdgpu_device *adev)
*
* also don't wait anymore for IRQ context
* */
- if (r < 1 && (amdgpu_in_reset(adev) || in_interrupt()))
+ if (r < 1 && (amdgpu_in_reset(adev)))
goto failed_kiq_read;
might_sleep();
@@ -4132,19 +4268,40 @@ failed_kiq_read:
static uint64_t gfx_v9_0_get_gpu_clock_counter(struct amdgpu_device *adev)
{
- uint64_t clock;
+ uint64_t clock, clock_lo, clock_hi, hi_check;
- amdgpu_gfx_off_ctrl(adev, false);
- mutex_lock(&adev->gfx.gpu_clock_mutex);
- if (adev->asic_type == CHIP_VEGA10 && amdgpu_sriov_runtime(adev)) {
- clock = gfx_v9_0_kiq_read_clock(adev);
- } else {
- WREG32_SOC15(GC, 0, mmRLC_CAPTURE_GPU_CLOCK_COUNT, 1);
- clock = (uint64_t)RREG32_SOC15(GC, 0, mmRLC_GPU_CLOCK_COUNT_LSB) |
- ((uint64_t)RREG32_SOC15(GC, 0, mmRLC_GPU_CLOCK_COUNT_MSB) << 32ULL);
+ switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
+ case IP_VERSION(9, 3, 0):
+ preempt_disable();
+ clock_hi = RREG32_SOC15_NO_KIQ(SMUIO, 0, mmGOLDEN_TSC_COUNT_UPPER_Renoir);
+ clock_lo = RREG32_SOC15_NO_KIQ(SMUIO, 0, mmGOLDEN_TSC_COUNT_LOWER_Renoir);
+ hi_check = RREG32_SOC15_NO_KIQ(SMUIO, 0, mmGOLDEN_TSC_COUNT_UPPER_Renoir);
+ /* The SMUIO TSC clock frequency is 100MHz, which sets 32-bit carry over
+ * roughly every 42 seconds.
+ */
+ if (hi_check != clock_hi) {
+ clock_lo = RREG32_SOC15_NO_KIQ(SMUIO, 0, mmGOLDEN_TSC_COUNT_LOWER_Renoir);
+ clock_hi = hi_check;
+ }
+ preempt_enable();
+ clock = clock_lo | (clock_hi << 32ULL);
+ break;
+ default:
+ amdgpu_gfx_off_ctrl(adev, false);
+ mutex_lock(&adev->gfx.gpu_clock_mutex);
+ if (amdgpu_ip_version(adev, GC_HWIP, 0) ==
+ IP_VERSION(9, 0, 1) &&
+ amdgpu_sriov_runtime(adev)) {
+ clock = gfx_v9_0_kiq_read_clock(adev);
+ } else {
+ WREG32_SOC15(GC, 0, mmRLC_CAPTURE_GPU_CLOCK_COUNT, 1);
+ clock = (uint64_t)RREG32_SOC15(GC, 0, mmRLC_GPU_CLOCK_COUNT_LSB) |
+ ((uint64_t)RREG32_SOC15(GC, 0, mmRLC_GPU_CLOCK_COUNT_MSB) << 32ULL);
+ }
+ mutex_unlock(&adev->gfx.gpu_clock_mutex);
+ amdgpu_gfx_off_ctrl(adev, true);
+ break;
}
- mutex_unlock(&adev->gfx.gpu_clock_mutex);
- amdgpu_gfx_off_ctrl(adev, true);
return clock;
}
@@ -4486,7 +4643,7 @@ static int gfx_v9_0_do_edc_gpr_workarounds(struct amdgpu_device *adev)
if (!ring->sched.ready)
return 0;
- if (adev->asic_type == CHIP_ARCTURUS) {
+ if (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 1)) {
vgpr_init_shader_ptr = vgpr_init_compute_shader_arcturus;
vgpr_init_shader_size = sizeof(vgpr_init_compute_shader_arcturus);
vgpr_init_regs_ptr = vgpr_init_regs_arcturus;
@@ -4626,20 +4783,24 @@ static int gfx_v9_0_do_edc_gpr_workarounds(struct amdgpu_device *adev)
}
fail:
- amdgpu_ib_free(adev, &ib, NULL);
+ amdgpu_ib_free(&ib, NULL);
dma_fence_put(f);
return r;
}
-static int gfx_v9_0_early_init(void *handle)
+static int gfx_v9_0_early_init(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
+
+ adev->gfx.funcs = &gfx_v9_0_gfx_funcs;
- if (adev->asic_type == CHIP_ARCTURUS)
+ if (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 1) ||
+ amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 2))
adev->gfx.num_gfx_rings = 0;
else
adev->gfx.num_gfx_rings = GFX9_NUM_GFX_RINGS;
+ adev->gfx.xcc_mask = 1;
adev->gfx.num_compute_rings = min(amdgpu_gfx_get_num_kcq(adev),
AMDGPU_MAX_COMPUTE_RINGS);
gfx_v9_0_set_kiq_pm4_funcs(adev);
@@ -4648,12 +4809,15 @@ static int gfx_v9_0_early_init(void *handle)
gfx_v9_0_set_gds_init(adev);
gfx_v9_0_set_rlc_funcs(adev);
- return 0;
+ /* init rlcg reg access ctrl */
+ gfx_v9_0_init_rlcg_reg_access_ctrl(adev);
+
+ return gfx_v9_0_init_microcode(adev);
}
-static int gfx_v9_0_ecc_late_init(void *handle)
+static int gfx_v9_0_ecc_late_init(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
int r;
/*
@@ -4662,31 +4826,32 @@ static int gfx_v9_0_ecc_late_init(void *handle)
* to GDS in suspend/resume sequence on several cards. So just
* limit this operation in cold boot sequence.
*/
- if (!adev->in_suspend) {
+ if ((!adev->in_suspend) &&
+ (adev->gds.gds_size)) {
r = gfx_v9_0_do_edc_gds_workarounds(adev);
if (r)
return r;
}
/* requires IBs so do in late init after IB pool is initialized */
- r = gfx_v9_0_do_edc_gpr_workarounds(adev);
- if (r)
- return r;
-
- if (adev->gfx.funcs &&
- adev->gfx.funcs->reset_ras_error_count)
- adev->gfx.funcs->reset_ras_error_count(adev);
+ if (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 2))
+ r = gfx_v9_4_2_do_edc_gpr_workarounds(adev);
+ else
+ r = gfx_v9_0_do_edc_gpr_workarounds(adev);
- r = amdgpu_gfx_ras_late_init(adev);
if (r)
return r;
+ if (adev->gfx.ras &&
+ adev->gfx.ras->enable_watchdog_timer)
+ adev->gfx.ras->enable_watchdog_timer(adev);
+
return 0;
}
-static int gfx_v9_0_late_init(void *handle)
+static int gfx_v9_0_late_init(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
int r;
r = amdgpu_irq_get(adev, &adev->gfx.priv_reg_irq, 0);
@@ -4697,10 +4862,21 @@ static int gfx_v9_0_late_init(void *handle)
if (r)
return r;
- r = gfx_v9_0_ecc_late_init(handle);
+ r = amdgpu_irq_get(adev, &adev->gfx.bad_op_irq, 0);
if (r)
return r;
+ r = gfx_v9_0_ecc_late_init(ip_block);
+ if (r)
+ return r;
+
+ if (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 2))
+ gfx_v9_4_2_debug_trap_config_init(adev,
+ adev->vm_manager.first_kfd_vmid, AMDGPU_NUM_VMID);
+ else
+ gfx_v9_0_debug_trap_config_init(adev,
+ adev->vm_manager.first_kfd_vmid, AMDGPU_NUM_VMID);
+
return 0;
}
@@ -4716,7 +4892,7 @@ static bool gfx_v9_0_is_rlc_enabled(struct amdgpu_device *adev)
return true;
}
-static void gfx_v9_0_set_safe_mode(struct amdgpu_device *adev)
+static void gfx_v9_0_set_safe_mode(struct amdgpu_device *adev, int xcc_id)
{
uint32_t data;
unsigned i;
@@ -4733,7 +4909,7 @@ static void gfx_v9_0_set_safe_mode(struct amdgpu_device *adev)
}
}
-static void gfx_v9_0_unset_safe_mode(struct amdgpu_device *adev)
+static void gfx_v9_0_unset_safe_mode(struct amdgpu_device *adev, int xcc_id)
{
uint32_t data;
@@ -4744,7 +4920,7 @@ static void gfx_v9_0_unset_safe_mode(struct amdgpu_device *adev)
static void gfx_v9_0_update_gfx_cg_power_gating(struct amdgpu_device *adev,
bool enable)
{
- amdgpu_gfx_rlc_enter_safe_mode(adev);
+ amdgpu_gfx_rlc_enter_safe_mode(adev, 0);
if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_PG) && enable) {
gfx_v9_0_enable_gfx_cg_power_gating(adev, true);
@@ -4756,7 +4932,7 @@ static void gfx_v9_0_update_gfx_cg_power_gating(struct amdgpu_device *adev,
gfx_v9_0_enable_gfx_pipeline_powergating(adev, false);
}
- amdgpu_gfx_rlc_exit_safe_mode(adev);
+ amdgpu_gfx_rlc_exit_safe_mode(adev, 0);
}
static void gfx_v9_0_update_gfx_mg_power_gating(struct amdgpu_device *adev,
@@ -4783,14 +4959,12 @@ static void gfx_v9_0_update_medium_grain_clock_gating(struct amdgpu_device *adev
{
uint32_t data, def;
- amdgpu_gfx_rlc_enter_safe_mode(adev);
-
/* It is disabled by HW by default */
if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG)) {
/* 1 - RLC_CGTT_MGCG_OVERRIDE */
def = data = RREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE);
- if (adev->asic_type != CHIP_VEGA12)
+ if (amdgpu_ip_version(adev, GC_HWIP, 0) != IP_VERSION(9, 2, 1))
data &= ~RLC_CGTT_MGCG_OVERRIDE__CPF_CGTT_SCLK_OVERRIDE_MASK;
data &= ~(RLC_CGTT_MGCG_OVERRIDE__GRBM_CGTT_SCLK_OVERRIDE_MASK |
@@ -4824,7 +4998,7 @@ static void gfx_v9_0_update_medium_grain_clock_gating(struct amdgpu_device *adev
/* 1 - MGCG_OVERRIDE */
def = data = RREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE);
- if (adev->asic_type != CHIP_VEGA12)
+ if (amdgpu_ip_version(adev, GC_HWIP, 0) != IP_VERSION(9, 2, 1))
data |= RLC_CGTT_MGCG_OVERRIDE__CPF_CGTT_SCLK_OVERRIDE_MASK;
data |= (RLC_CGTT_MGCG_OVERRIDE__RLC_CGTT_SCLK_OVERRIDE_MASK |
@@ -4849,8 +5023,6 @@ static void gfx_v9_0_update_medium_grain_clock_gating(struct amdgpu_device *adev
WREG32_SOC15(GC, 0, mmCP_MEM_SLP_CNTL, data);
}
}
-
- amdgpu_gfx_rlc_exit_safe_mode(adev);
}
static void gfx_v9_0_update_3d_clock_gating(struct amdgpu_device *adev,
@@ -4858,13 +5030,11 @@ static void gfx_v9_0_update_3d_clock_gating(struct amdgpu_device *adev,
{
uint32_t data, def;
- if (adev->asic_type == CHIP_ARCTURUS)
+ if (!adev->gfx.num_gfx_rings)
return;
- amdgpu_gfx_rlc_enter_safe_mode(adev);
-
/* Enable 3D CGCG/CGLS */
- if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGCG)) {
+ if (enable) {
/* write cmd to clear cgcg/cgls ov */
def = data = RREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE);
/* unset CGCG override */
@@ -4876,8 +5046,12 @@ static void gfx_v9_0_update_3d_clock_gating(struct amdgpu_device *adev,
/* enable 3Dcgcg FSM(0x0000363f) */
def = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D);
- data = (0x36 << RLC_CGCG_CGLS_CTRL_3D__CGCG_GFX_IDLE_THRESHOLD__SHIFT) |
- RLC_CGCG_CGLS_CTRL_3D__CGCG_EN_MASK;
+ if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGCG)
+ data = (0x36 << RLC_CGCG_CGLS_CTRL_3D__CGCG_GFX_IDLE_THRESHOLD__SHIFT) |
+ RLC_CGCG_CGLS_CTRL_3D__CGCG_EN_MASK;
+ else
+ data = 0x0 << RLC_CGCG_CGLS_CTRL_3D__CGCG_GFX_IDLE_THRESHOLD__SHIFT;
+
if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGLS)
data |= (0x000F << RLC_CGCG_CGLS_CTRL_3D__CGLS_REP_COMPANSAT_DELAY__SHIFT) |
RLC_CGCG_CGLS_CTRL_3D__CGLS_EN_MASK;
@@ -4900,8 +5074,6 @@ static void gfx_v9_0_update_3d_clock_gating(struct amdgpu_device *adev,
if (def != data)
WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D, data);
}
-
- amdgpu_gfx_rlc_exit_safe_mode(adev);
}
static void gfx_v9_0_update_coarse_grain_clock_gating(struct amdgpu_device *adev,
@@ -4909,8 +5081,6 @@ static void gfx_v9_0_update_coarse_grain_clock_gating(struct amdgpu_device *adev
{
uint32_t def, data;
- amdgpu_gfx_rlc_enter_safe_mode(adev);
-
if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG)) {
def = data = RREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE);
/* unset CGCG override */
@@ -4926,7 +5096,7 @@ static void gfx_v9_0_update_coarse_grain_clock_gating(struct amdgpu_device *adev
/* enable cgcg FSM(0x0000363F) */
def = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL);
- if (adev->asic_type == CHIP_ARCTURUS)
+ if (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 1))
data = (0x2000 << RLC_CGCG_CGLS_CTRL__CGCG_GFX_IDLE_THRESHOLD__SHIFT) |
RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK;
else
@@ -4952,13 +5122,12 @@ static void gfx_v9_0_update_coarse_grain_clock_gating(struct amdgpu_device *adev
if (def != data)
WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL, data);
}
-
- amdgpu_gfx_rlc_exit_safe_mode(adev);
}
static int gfx_v9_0_update_gfx_clock_gating(struct amdgpu_device *adev,
bool enable)
{
+ amdgpu_gfx_rlc_enter_safe_mode(adev, 0);
if (enable) {
/* CGCG/CGLS should be enabled after MGCG/MGLS
* === MGCG + MGLS ===
@@ -4978,10 +5147,12 @@ static int gfx_v9_0_update_gfx_clock_gating(struct amdgpu_device *adev,
/* === MGCG + MGLS === */
gfx_v9_0_update_medium_grain_clock_gating(adev, enable);
}
+ amdgpu_gfx_rlc_exit_safe_mode(adev, 0);
return 0;
}
-static void gfx_v9_0_update_spm_vmid(struct amdgpu_device *adev, unsigned vmid)
+static void gfx_v9_0_update_spm_vmid_internal(struct amdgpu_device *adev,
+ unsigned int vmid)
{
u32 reg, data;
@@ -4989,7 +5160,7 @@ static void gfx_v9_0_update_spm_vmid(struct amdgpu_device *adev, unsigned vmid)
if (amdgpu_sriov_is_pp_one_vf(adev))
data = RREG32_NO_KIQ(reg);
else
- data = RREG32(reg);
+ data = RREG32_SOC15(GC, 0, mmRLC_SPM_MC_CNTL);
data &= ~RLC_SPM_MC_CNTL__RLC_SPM_VMID_MASK;
data |= (vmid & RLC_SPM_MC_CNTL__RLC_SPM_VMID_MASK) << RLC_SPM_MC_CNTL__RLC_SPM_VMID__SHIFT;
@@ -5000,6 +5171,15 @@ static void gfx_v9_0_update_spm_vmid(struct amdgpu_device *adev, unsigned vmid)
WREG32_SOC15(GC, 0, mmRLC_SPM_MC_CNTL, data);
}
+static void gfx_v9_0_update_spm_vmid(struct amdgpu_device *adev, struct amdgpu_ring *ring, unsigned int vmid)
+{
+ amdgpu_gfx_off_ctrl(adev, false);
+
+ gfx_v9_0_update_spm_vmid_internal(adev, vmid);
+
+ amdgpu_gfx_off_ctrl(adev, true);
+}
+
static bool gfx_v9_0_check_rlcg_range(struct amdgpu_device *adev,
uint32_t offset,
struct soc15_reg_rlcg *entries, int arr_size)
@@ -5042,21 +5222,21 @@ static const struct amdgpu_rlc_funcs gfx_v9_0_rlc_funcs = {
.reset = gfx_v9_0_rlc_reset,
.start = gfx_v9_0_rlc_start,
.update_spm_vmid = gfx_v9_0_update_spm_vmid,
- .rlcg_wreg = gfx_v9_0_rlcg_wreg,
.is_rlcg_access_range = gfx_v9_0_is_rlcg_access_range,
};
-static int gfx_v9_0_set_powergating_state(void *handle,
+static int gfx_v9_0_set_powergating_state(struct amdgpu_ip_block *ip_block,
enum amd_powergating_state state)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
bool enable = (state == AMD_PG_STATE_GATE);
- switch (adev->asic_type) {
- case CHIP_RAVEN:
- case CHIP_RENOIR:
+ switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
+ case IP_VERSION(9, 2, 2):
+ case IP_VERSION(9, 1, 0):
+ case IP_VERSION(9, 3, 0):
if (!enable)
- amdgpu_gfx_off_ctrl(adev, false);
+ amdgpu_gfx_off_ctrl_immediate(adev, false);
if (adev->pg_flags & AMD_PG_SUPPORT_RLC_SMU_HS) {
gfx_v9_0_enable_sck_slow_down_on_power_up(adev, true);
@@ -5078,10 +5258,10 @@ static int gfx_v9_0_set_powergating_state(void *handle,
gfx_v9_0_update_gfx_mg_power_gating(adev, enable);
if (enable)
- amdgpu_gfx_off_ctrl(adev, true);
+ amdgpu_gfx_off_ctrl_immediate(adev, true);
break;
- case CHIP_VEGA12:
- amdgpu_gfx_off_ctrl(adev, enable);
+ case IP_VERSION(9, 2, 1):
+ amdgpu_gfx_off_ctrl_immediate(adev, enable);
break;
default:
break;
@@ -5090,21 +5270,23 @@ static int gfx_v9_0_set_powergating_state(void *handle,
return 0;
}
-static int gfx_v9_0_set_clockgating_state(void *handle,
+static int gfx_v9_0_set_clockgating_state(struct amdgpu_ip_block *ip_block,
enum amd_clockgating_state state)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
if (amdgpu_sriov_vf(adev))
return 0;
- switch (adev->asic_type) {
- case CHIP_VEGA10:
- case CHIP_VEGA12:
- case CHIP_VEGA20:
- case CHIP_RAVEN:
- case CHIP_ARCTURUS:
- case CHIP_RENOIR:
+ switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
+ case IP_VERSION(9, 0, 1):
+ case IP_VERSION(9, 2, 1):
+ case IP_VERSION(9, 4, 0):
+ case IP_VERSION(9, 2, 2):
+ case IP_VERSION(9, 1, 0):
+ case IP_VERSION(9, 4, 1):
+ case IP_VERSION(9, 3, 0):
+ case IP_VERSION(9, 4, 2):
gfx_v9_0_update_gfx_clock_gating(adev,
state == AMD_CG_STATE_GATE);
break;
@@ -5114,9 +5296,9 @@ static int gfx_v9_0_set_clockgating_state(void *handle,
return 0;
}
-static void gfx_v9_0_get_clockgating_state(void *handle, u32 *flags)
+static void gfx_v9_0_get_clockgating_state(struct amdgpu_ip_block *ip_block, u64 *flags)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
int data;
if (amdgpu_sriov_vf(adev))
@@ -5146,7 +5328,7 @@ static void gfx_v9_0_get_clockgating_state(void *handle, u32 *flags)
if (data & CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK)
*flags |= AMD_CG_SUPPORT_GFX_CP_LS | AMD_CG_SUPPORT_GFX_MGLS;
- if (adev->asic_type != CHIP_ARCTURUS) {
+ if (amdgpu_ip_version(adev, GC_HWIP, 0) != IP_VERSION(9, 4, 1)) {
/* AMD_CG_SUPPORT_GFX_3D_CGCG */
data = RREG32_KIQ(SOC15_REG_OFFSET(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D));
if (data & RLC_CGCG_CGLS_CTRL_3D__CGCG_EN_MASK)
@@ -5160,7 +5342,7 @@ static void gfx_v9_0_get_clockgating_state(void *handle, u32 *flags)
static u64 gfx_v9_0_ring_get_rptr_gfx(struct amdgpu_ring *ring)
{
- return ring->adev->wb.wb[ring->rptr_offs]; /* gfx9 is 32bit rptr*/
+ return *ring->rptr_cpu_addr; /* gfx9 is 32bit rptr*/
}
static u64 gfx_v9_0_ring_get_wptr_gfx(struct amdgpu_ring *ring)
@@ -5170,7 +5352,7 @@ static u64 gfx_v9_0_ring_get_wptr_gfx(struct amdgpu_ring *ring)
/* XXX check if swapping is necessary on BE */
if (ring->use_doorbell) {
- wptr = atomic64_read((atomic64_t *)&adev->wb.wb[ring->wptr_offs]);
+ wptr = atomic64_read((atomic64_t *)ring->wptr_cpu_addr);
} else {
wptr = RREG32_SOC15(GC, 0, mmCP_RB0_WPTR);
wptr += (u64)RREG32_SOC15(GC, 0, mmCP_RB0_WPTR_HI) << 32;
@@ -5185,7 +5367,7 @@ static void gfx_v9_0_ring_set_wptr_gfx(struct amdgpu_ring *ring)
if (ring->use_doorbell) {
/* XXX check if swapping is necessary on BE */
- atomic64_set((atomic64_t *)&adev->wb.wb[ring->wptr_offs], ring->wptr);
+ atomic64_set((atomic64_t *)ring->wptr_cpu_addr, ring->wptr);
WDOORBELL64(ring->doorbell_index, ring->wptr);
} else {
WREG32_SOC15(GC, 0, mmCP_RB0_WPTR, lower_32_bits(ring->wptr));
@@ -5237,11 +5419,18 @@ static void gfx_v9_0_ring_emit_ib_gfx(struct amdgpu_ring *ring,
control |= ib->length_dw | (vmid << 24);
- if (amdgpu_sriov_vf(ring->adev) && (ib->flags & AMDGPU_IB_FLAG_PREEMPT)) {
+ if (ib->flags & AMDGPU_IB_FLAG_PREEMPT) {
control |= INDIRECT_BUFFER_PRE_ENB(1);
+ if (flags & AMDGPU_IB_PREEMPTED)
+ control |= INDIRECT_BUFFER_PRE_RESUME(1);
+
if (!(ib->flags & AMDGPU_IB_FLAG_CE) && vmid)
- gfx_v9_0_ring_emit_de_meta(ring);
+ gfx_v9_0_ring_emit_de_meta(ring,
+ (!amdgpu_sriov_vf(ring->adev) &&
+ flags & AMDGPU_IB_PREEMPTED) ?
+ true : false,
+ job->gds_size > 0 && job->gds_base != 0);
}
amdgpu_ring_write(ring, header);
@@ -5252,9 +5441,70 @@ static void gfx_v9_0_ring_emit_ib_gfx(struct amdgpu_ring *ring,
#endif
lower_32_bits(ib->gpu_addr));
amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr));
+ amdgpu_ring_ib_on_emit_cntl(ring);
amdgpu_ring_write(ring, control);
}
+static void gfx_v9_0_ring_patch_cntl(struct amdgpu_ring *ring,
+ unsigned offset)
+{
+ u32 control = ring->ring[offset];
+
+ control |= INDIRECT_BUFFER_PRE_RESUME(1);
+ ring->ring[offset] = control;
+}
+
+static void gfx_v9_0_ring_patch_ce_meta(struct amdgpu_ring *ring,
+ unsigned offset)
+{
+ struct amdgpu_device *adev = ring->adev;
+ void *ce_payload_cpu_addr;
+ uint64_t payload_offset, payload_size;
+
+ payload_size = sizeof(struct v9_ce_ib_state);
+
+ payload_offset = offsetof(struct v9_gfx_meta_data, ce_payload);
+ ce_payload_cpu_addr = adev->virt.csa_cpu_addr + payload_offset;
+
+ if (offset + (payload_size >> 2) <= ring->buf_mask + 1) {
+ memcpy((void *)&ring->ring[offset], ce_payload_cpu_addr, payload_size);
+ } else {
+ memcpy((void *)&ring->ring[offset], ce_payload_cpu_addr,
+ (ring->buf_mask + 1 - offset) << 2);
+ payload_size -= (ring->buf_mask + 1 - offset) << 2;
+ memcpy((void *)&ring->ring[0],
+ ce_payload_cpu_addr + ((ring->buf_mask + 1 - offset) << 2),
+ payload_size);
+ }
+}
+
+static void gfx_v9_0_ring_patch_de_meta(struct amdgpu_ring *ring,
+ unsigned offset)
+{
+ struct amdgpu_device *adev = ring->adev;
+ void *de_payload_cpu_addr;
+ uint64_t payload_offset, payload_size;
+
+ payload_size = sizeof(struct v9_de_ib_state);
+
+ payload_offset = offsetof(struct v9_gfx_meta_data, de_payload);
+ de_payload_cpu_addr = adev->virt.csa_cpu_addr + payload_offset;
+
+ ((struct v9_de_ib_state *)de_payload_cpu_addr)->ib_completion_status =
+ IB_COMPLETION_STATUS_PREEMPTED;
+
+ if (offset + (payload_size >> 2) <= ring->buf_mask + 1) {
+ memcpy((void *)&ring->ring[offset], de_payload_cpu_addr, payload_size);
+ } else {
+ memcpy((void *)&ring->ring[offset], de_payload_cpu_addr,
+ (ring->buf_mask + 1 - offset) << 2);
+ payload_size -= (ring->buf_mask + 1 - offset) << 2;
+ memcpy((void *)&ring->ring[0],
+ de_payload_cpu_addr + ((ring->buf_mask + 1 - offset) << 2),
+ payload_size);
+ }
+}
+
static void gfx_v9_0_ring_emit_ib_compute(struct amdgpu_ring *ring,
struct amdgpu_job *job,
struct amdgpu_ib *ib,
@@ -5296,17 +5546,24 @@ static void gfx_v9_0_ring_emit_fence(struct amdgpu_ring *ring, u64 addr,
bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT;
bool int_sel = flags & AMDGPU_FENCE_FLAG_INT;
bool writeback = flags & AMDGPU_FENCE_FLAG_TC_WB_ONLY;
+ bool exec = flags & AMDGPU_FENCE_FLAG_EXEC;
+ uint32_t dw2 = 0;
/* RELEASE_MEM - flush caches, send int */
amdgpu_ring_write(ring, PACKET3(PACKET3_RELEASE_MEM, 6));
- amdgpu_ring_write(ring, ((writeback ? (EOP_TC_WB_ACTION_EN |
- EOP_TC_NC_ACTION_EN) :
- (EOP_TCL1_ACTION_EN |
- EOP_TC_ACTION_EN |
- EOP_TC_WB_ACTION_EN |
- EOP_TC_MD_ACTION_EN)) |
- EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
- EVENT_INDEX(5)));
+
+ if (writeback) {
+ dw2 = EOP_TC_NC_ACTION_EN;
+ } else {
+ dw2 = EOP_TCL1_ACTION_EN | EOP_TC_ACTION_EN |
+ EOP_TC_MD_ACTION_EN;
+ }
+ dw2 |= EOP_TC_WB_ACTION_EN | EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
+ EVENT_INDEX(5);
+ if (exec)
+ dw2 |= EOP_EXEC;
+
+ amdgpu_ring_write(ring, dw2);
amdgpu_ring_write(ring, DATA_SEL(write64bit ? 2 : 1) | INT_SEL(int_sel ? 2 : 0));
/*
@@ -5350,7 +5607,7 @@ static void gfx_v9_0_ring_emit_vm_flush(struct amdgpu_ring *ring,
static u64 gfx_v9_0_ring_get_rptr_compute(struct amdgpu_ring *ring)
{
- return ring->adev->wb.wb[ring->rptr_offs]; /* gfx9 hardware is 32bit rptr */
+ return *ring->rptr_cpu_addr; /* gfx9 hardware is 32bit rptr */
}
static u64 gfx_v9_0_ring_get_wptr_compute(struct amdgpu_ring *ring)
@@ -5359,7 +5616,7 @@ static u64 gfx_v9_0_ring_get_wptr_compute(struct amdgpu_ring *ring)
/* XXX check if swapping is necessary on BE */
if (ring->use_doorbell)
- wptr = atomic64_read((atomic64_t *)&ring->adev->wb.wb[ring->wptr_offs]);
+ wptr = atomic64_read((atomic64_t *)ring->wptr_cpu_addr);
else
BUG();
return wptr;
@@ -5371,7 +5628,7 @@ static void gfx_v9_0_ring_set_wptr_compute(struct amdgpu_ring *ring)
/* XXX check if swapping is necessary on BE */
if (ring->use_doorbell) {
- atomic64_set((atomic64_t *)&adev->wb.wb[ring->wptr_offs], ring->wptr);
+ atomic64_set((atomic64_t *)ring->wptr_cpu_addr, ring->wptr);
WDOORBELL64(ring->doorbell_index, ring->wptr);
} else{
BUG(); /* only DOORBELL method supported on gfx9 now */
@@ -5411,35 +5668,116 @@ static void gfx_v9_ring_emit_sb(struct amdgpu_ring *ring)
amdgpu_ring_write(ring, 0);
}
-static void gfx_v9_0_ring_emit_ce_meta(struct amdgpu_ring *ring)
+static void gfx_v9_0_ring_emit_ce_meta(struct amdgpu_ring *ring, bool resume)
{
+ struct amdgpu_device *adev = ring->adev;
struct v9_ce_ib_state ce_payload = {0};
- uint64_t csa_addr;
+ uint64_t offset, ce_payload_gpu_addr;
+ void *ce_payload_cpu_addr;
int cnt;
cnt = (sizeof(ce_payload) >> 2) + 4 - 2;
- csa_addr = amdgpu_csa_vaddr(ring->adev);
+
+ offset = offsetof(struct v9_gfx_meta_data, ce_payload);
+ ce_payload_gpu_addr = amdgpu_csa_vaddr(ring->adev) + offset;
+ ce_payload_cpu_addr = adev->virt.csa_cpu_addr + offset;
amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, cnt));
amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(2) |
WRITE_DATA_DST_SEL(8) |
WR_CONFIRM) |
WRITE_DATA_CACHE_POLICY(0));
- amdgpu_ring_write(ring, lower_32_bits(csa_addr + offsetof(struct v9_gfx_meta_data, ce_payload)));
- amdgpu_ring_write(ring, upper_32_bits(csa_addr + offsetof(struct v9_gfx_meta_data, ce_payload)));
- amdgpu_ring_write_multiple(ring, (void *)&ce_payload, sizeof(ce_payload) >> 2);
+ amdgpu_ring_write(ring, lower_32_bits(ce_payload_gpu_addr));
+ amdgpu_ring_write(ring, upper_32_bits(ce_payload_gpu_addr));
+
+ amdgpu_ring_ib_on_emit_ce(ring);
+
+ if (resume)
+ amdgpu_ring_write_multiple(ring, ce_payload_cpu_addr,
+ sizeof(ce_payload) >> 2);
+ else
+ amdgpu_ring_write_multiple(ring, (void *)&ce_payload,
+ sizeof(ce_payload) >> 2);
+}
+
+static int gfx_v9_0_ring_preempt_ib(struct amdgpu_ring *ring)
+{
+ int i, r = 0;
+ struct amdgpu_device *adev = ring->adev;
+ struct amdgpu_kiq *kiq = &adev->gfx.kiq[0];
+ struct amdgpu_ring *kiq_ring = &kiq->ring;
+ unsigned long flags;
+
+ if (!kiq->pmf || !kiq->pmf->kiq_unmap_queues)
+ return -EINVAL;
+
+ spin_lock_irqsave(&kiq->ring_lock, flags);
+
+ if (amdgpu_ring_alloc(kiq_ring, kiq->pmf->unmap_queues_size)) {
+ spin_unlock_irqrestore(&kiq->ring_lock, flags);
+ return -ENOMEM;
+ }
+
+ /* assert preemption condition */
+ amdgpu_ring_set_preempt_cond_exec(ring, false);
+
+ ring->trail_seq += 1;
+ amdgpu_ring_alloc(ring, 13);
+ gfx_v9_0_ring_emit_fence(ring, ring->trail_fence_gpu_addr,
+ ring->trail_seq, AMDGPU_FENCE_FLAG_EXEC | AMDGPU_FENCE_FLAG_INT);
+
+ /* assert IB preemption, emit the trailing fence */
+ kiq->pmf->kiq_unmap_queues(kiq_ring, ring, PREEMPT_QUEUES_NO_UNMAP,
+ ring->trail_fence_gpu_addr,
+ ring->trail_seq);
+
+ amdgpu_ring_commit(kiq_ring);
+ spin_unlock_irqrestore(&kiq->ring_lock, flags);
+
+ /* poll the trailing fence */
+ for (i = 0; i < adev->usec_timeout; i++) {
+ if (ring->trail_seq ==
+ le32_to_cpu(*ring->trail_fence_cpu_addr))
+ break;
+ udelay(1);
+ }
+
+ if (i >= adev->usec_timeout) {
+ r = -EINVAL;
+ DRM_WARN("ring %d timeout to preempt ib\n", ring->idx);
+ }
+
+ /*reset the CP_VMID_PREEMPT after trailing fence*/
+ amdgpu_ring_emit_wreg(ring,
+ SOC15_REG_OFFSET(GC, 0, mmCP_VMID_PREEMPT),
+ 0x0);
+ amdgpu_ring_commit(ring);
+
+ /* deassert preemption condition */
+ amdgpu_ring_set_preempt_cond_exec(ring, true);
+ return r;
}
-static void gfx_v9_0_ring_emit_de_meta(struct amdgpu_ring *ring)
+static void gfx_v9_0_ring_emit_de_meta(struct amdgpu_ring *ring, bool resume, bool usegds)
{
+ struct amdgpu_device *adev = ring->adev;
struct v9_de_ib_state de_payload = {0};
- uint64_t csa_addr, gds_addr;
+ uint64_t offset, gds_addr, de_payload_gpu_addr;
+ void *de_payload_cpu_addr;
int cnt;
- csa_addr = amdgpu_csa_vaddr(ring->adev);
- gds_addr = csa_addr + 4096;
- de_payload.gds_backup_addrlo = lower_32_bits(gds_addr);
- de_payload.gds_backup_addrhi = upper_32_bits(gds_addr);
+ offset = offsetof(struct v9_gfx_meta_data, de_payload);
+ de_payload_gpu_addr = amdgpu_csa_vaddr(ring->adev) + offset;
+ de_payload_cpu_addr = adev->virt.csa_cpu_addr + offset;
+
+ gds_addr = ALIGN(amdgpu_csa_vaddr(ring->adev) +
+ AMDGPU_CSA_SIZE - adev->gds.gds_size,
+ PAGE_SIZE);
+
+ if (usegds) {
+ de_payload.gds_backup_addrlo = lower_32_bits(gds_addr);
+ de_payload.gds_backup_addrhi = upper_32_bits(gds_addr);
+ }
cnt = (sizeof(de_payload) >> 2) + 4 - 2;
amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, cnt));
@@ -5447,9 +5785,16 @@ static void gfx_v9_0_ring_emit_de_meta(struct amdgpu_ring *ring)
WRITE_DATA_DST_SEL(8) |
WR_CONFIRM) |
WRITE_DATA_CACHE_POLICY(0));
- amdgpu_ring_write(ring, lower_32_bits(csa_addr + offsetof(struct v9_gfx_meta_data, de_payload)));
- amdgpu_ring_write(ring, upper_32_bits(csa_addr + offsetof(struct v9_gfx_meta_data, de_payload)));
- amdgpu_ring_write_multiple(ring, (void *)&de_payload, sizeof(de_payload) >> 2);
+ amdgpu_ring_write(ring, lower_32_bits(de_payload_gpu_addr));
+ amdgpu_ring_write(ring, upper_32_bits(de_payload_gpu_addr));
+
+ amdgpu_ring_ib_on_emit_de(ring);
+ if (resume)
+ amdgpu_ring_write_multiple(ring, de_payload_cpu_addr,
+ sizeof(de_payload) >> 2);
+ else
+ amdgpu_ring_write_multiple(ring, (void *)&de_payload,
+ sizeof(de_payload) >> 2);
}
static void gfx_v9_0_ring_emit_frame_cntl(struct amdgpu_ring *ring, bool start,
@@ -5465,8 +5810,9 @@ static void gfx_v9_ring_emit_cntxcntl(struct amdgpu_ring *ring, uint32_t flags)
{
uint32_t dw2 = 0;
- if (amdgpu_sriov_vf(ring->adev))
- gfx_v9_0_ring_emit_ce_meta(ring);
+ gfx_v9_0_ring_emit_ce_meta(ring,
+ (!amdgpu_sriov_vf(ring->adev) &&
+ flags & AMDGPU_IB_PREEMPTED) ? true : false);
dw2 |= 0x80000000; /* set load_enable otherwise this package is just NOPs */
if (flags & AMDGPU_HAVE_CTX_SWITCH) {
@@ -5493,31 +5839,21 @@ static void gfx_v9_ring_emit_cntxcntl(struct amdgpu_ring *ring, uint32_t flags)
amdgpu_ring_write(ring, 0);
}
-static unsigned gfx_v9_0_ring_emit_init_cond_exec(struct amdgpu_ring *ring)
+static unsigned gfx_v9_0_ring_emit_init_cond_exec(struct amdgpu_ring *ring,
+ uint64_t addr)
{
unsigned ret;
amdgpu_ring_write(ring, PACKET3(PACKET3_COND_EXEC, 3));
- amdgpu_ring_write(ring, lower_32_bits(ring->cond_exe_gpu_addr));
- amdgpu_ring_write(ring, upper_32_bits(ring->cond_exe_gpu_addr));
- amdgpu_ring_write(ring, 0); /* discard following DWs if *cond_exec_gpu_addr==0 */
+ amdgpu_ring_write(ring, lower_32_bits(addr));
+ amdgpu_ring_write(ring, upper_32_bits(addr));
+ /* discard following DWs if *cond_exec_gpu_addr==0 */
+ amdgpu_ring_write(ring, 0);
ret = ring->wptr & ring->buf_mask;
- amdgpu_ring_write(ring, 0x55aa55aa); /* patch dummy value later */
+ /* patch dummy value later */
+ amdgpu_ring_write(ring, 0);
return ret;
}
-static void gfx_v9_0_ring_emit_patch_cond_exec(struct amdgpu_ring *ring, unsigned offset)
-{
- unsigned cur;
- BUG_ON(offset > ring->buf_mask);
- BUG_ON(ring->ring[offset] != 0x55aa55aa);
-
- cur = (ring->wptr & ring->buf_mask) - 1;
- if (likely(cur > offset))
- ring->ring[offset] = cur - offset;
- else
- ring->ring[offset] = (ring->ring_size>>2) - offset + cur;
-}
-
static void gfx_v9_0_ring_emit_rreg(struct amdgpu_ring *ring, uint32_t reg,
uint32_t reg_val_offs)
{
@@ -5590,7 +5926,9 @@ static void gfx_v9_0_ring_soft_recovery(struct amdgpu_ring *ring, unsigned vmid)
value = REG_SET_FIELD(value, SQ_CMD, MODE, 0x01);
value = REG_SET_FIELD(value, SQ_CMD, CHECK_VMID, 1);
value = REG_SET_FIELD(value, SQ_CMD, VM_ID, vmid);
+ amdgpu_gfx_rlc_enter_safe_mode(adev, 0);
WREG32_SOC15(GC, 0, mmSQ_CMD, value);
+ amdgpu_gfx_rlc_exit_safe_mode(adev, 0);
}
static void gfx_v9_0_set_gfx_eop_interrupt_state(struct amdgpu_device *adev,
@@ -5645,33 +5983,111 @@ static void gfx_v9_0_set_compute_eop_interrupt_state(struct amdgpu_device *adev,
switch (state) {
case AMDGPU_IRQ_STATE_DISABLE:
- mec_int_cntl = RREG32(mec_int_cntl_reg);
+ mec_int_cntl = RREG32_SOC15_IP(GC,mec_int_cntl_reg);
mec_int_cntl = REG_SET_FIELD(mec_int_cntl, CP_ME1_PIPE0_INT_CNTL,
TIME_STAMP_INT_ENABLE, 0);
- WREG32(mec_int_cntl_reg, mec_int_cntl);
+ WREG32_SOC15_IP(GC, mec_int_cntl_reg, mec_int_cntl);
break;
case AMDGPU_IRQ_STATE_ENABLE:
- mec_int_cntl = RREG32(mec_int_cntl_reg);
+ mec_int_cntl = RREG32_SOC15_IP(GC, mec_int_cntl_reg);
mec_int_cntl = REG_SET_FIELD(mec_int_cntl, CP_ME1_PIPE0_INT_CNTL,
TIME_STAMP_INT_ENABLE, 1);
- WREG32(mec_int_cntl_reg, mec_int_cntl);
+ WREG32_SOC15_IP(GC, mec_int_cntl_reg, mec_int_cntl);
break;
default:
break;
}
}
+static u32 gfx_v9_0_get_cpc_int_cntl(struct amdgpu_device *adev,
+ int me, int pipe)
+{
+ /*
+ * amdgpu controls only the first MEC. That's why this function only
+ * handles the setting of interrupts for this specific MEC. All other
+ * pipes' interrupts are set by amdkfd.
+ */
+ if (me != 1)
+ return 0;
+
+ switch (pipe) {
+ case 0:
+ return SOC15_REG_OFFSET(GC, 0, mmCP_ME1_PIPE0_INT_CNTL);
+ case 1:
+ return SOC15_REG_OFFSET(GC, 0, mmCP_ME1_PIPE1_INT_CNTL);
+ case 2:
+ return SOC15_REG_OFFSET(GC, 0, mmCP_ME1_PIPE2_INT_CNTL);
+ case 3:
+ return SOC15_REG_OFFSET(GC, 0, mmCP_ME1_PIPE3_INT_CNTL);
+ default:
+ return 0;
+ }
+}
+
static int gfx_v9_0_set_priv_reg_fault_state(struct amdgpu_device *adev,
struct amdgpu_irq_src *source,
unsigned type,
enum amdgpu_interrupt_state state)
{
+ u32 cp_int_cntl_reg, cp_int_cntl;
+ int i, j;
+
switch (state) {
case AMDGPU_IRQ_STATE_DISABLE:
case AMDGPU_IRQ_STATE_ENABLE:
WREG32_FIELD15(GC, 0, CP_INT_CNTL_RING0,
PRIV_REG_INT_ENABLE,
state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0);
+ for (i = 0; i < adev->gfx.mec.num_mec; i++) {
+ for (j = 0; j < adev->gfx.mec.num_pipe_per_mec; j++) {
+ /* MECs start at 1 */
+ cp_int_cntl_reg = gfx_v9_0_get_cpc_int_cntl(adev, i + 1, j);
+
+ if (cp_int_cntl_reg) {
+ cp_int_cntl = RREG32_SOC15_IP(GC, cp_int_cntl_reg);
+ cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_ME1_PIPE0_INT_CNTL,
+ PRIV_REG_INT_ENABLE,
+ state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0);
+ WREG32_SOC15_IP(GC, cp_int_cntl_reg, cp_int_cntl);
+ }
+ }
+ }
+ break;
+ default:
+ break;
+ }
+
+ return 0;
+}
+
+static int gfx_v9_0_set_bad_op_fault_state(struct amdgpu_device *adev,
+ struct amdgpu_irq_src *source,
+ unsigned type,
+ enum amdgpu_interrupt_state state)
+{
+ u32 cp_int_cntl_reg, cp_int_cntl;
+ int i, j;
+
+ switch (state) {
+ case AMDGPU_IRQ_STATE_DISABLE:
+ case AMDGPU_IRQ_STATE_ENABLE:
+ WREG32_FIELD15(GC, 0, CP_INT_CNTL_RING0,
+ OPCODE_ERROR_INT_ENABLE,
+ state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0);
+ for (i = 0; i < adev->gfx.mec.num_mec; i++) {
+ for (j = 0; j < adev->gfx.mec.num_pipe_per_mec; j++) {
+ /* MECs start at 1 */
+ cp_int_cntl_reg = gfx_v9_0_get_cpc_int_cntl(adev, i + 1, j);
+
+ if (cp_int_cntl_reg) {
+ cp_int_cntl = RREG32_SOC15_IP(GC, cp_int_cntl_reg);
+ cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_ME1_PIPE0_INT_CNTL,
+ OPCODE_ERROR_INT_ENABLE,
+ state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0);
+ WREG32_SOC15_IP(GC, cp_int_cntl_reg, cp_int_cntl);
+ }
+ }
+ }
break;
default:
break;
@@ -5792,7 +6208,15 @@ static int gfx_v9_0_eop_irq(struct amdgpu_device *adev,
switch (me_id) {
case 0:
- amdgpu_fence_process(&adev->gfx.gfx_ring[0]);
+ if (adev->gfx.num_gfx_rings) {
+ if (!adev->gfx.mcbp) {
+ amdgpu_fence_process(&adev->gfx.gfx_ring[0]);
+ } else if (!amdgpu_mcbp_handle_trailing_fence_irq(&adev->gfx.muxer)) {
+ /* Fence signals are handled on the software rings*/
+ for (i = 0; i < GFX9_NUM_SW_GFX_RINGS; i++)
+ amdgpu_fence_process(&adev->gfx.sw_gfx_ring[i]);
+ }
+ }
break;
case 1:
case 2:
@@ -5845,6 +6269,15 @@ static int gfx_v9_0_priv_reg_irq(struct amdgpu_device *adev,
return 0;
}
+static int gfx_v9_0_bad_op_irq(struct amdgpu_device *adev,
+ struct amdgpu_irq_src *source,
+ struct amdgpu_iv_entry *entry)
+{
+ DRM_ERROR("Illegal opcode in command stream\n");
+ gfx_v9_0_fault(adev, entry);
+ return 0;
+}
+
static int gfx_v9_0_priv_inst_irq(struct amdgpu_device *adev,
struct amdgpu_irq_src *source,
struct amdgpu_iv_entry *entry)
@@ -6297,7 +6730,7 @@ static const struct soc15_ras_field_entry gfx_v9_0_ras_fields[] = {
};
static int gfx_v9_0_ras_error_inject(struct amdgpu_device *adev,
- void *inject_if)
+ void *inject_if, uint32_t instance_mask)
{
struct ras_inject_if *info = (struct ras_inject_if *)inject_if;
int ret;
@@ -6336,13 +6769,13 @@ static int gfx_v9_0_ras_error_inject(struct amdgpu_device *adev,
block_info.value = info->value;
mutex_lock(&adev->grbm_idx_mutex);
- ret = psp_ras_trigger_error(&adev->psp, &block_info);
+ ret = psp_ras_trigger_error(&adev->psp, &block_info, instance_mask);
mutex_unlock(&adev->grbm_idx_mutex);
return ret;
}
-static const char *vml2_mems[] = {
+static const char * const vml2_mems[] = {
"UTC_VML2_BANK_CACHE_0_BIGK_MEM0",
"UTC_VML2_BANK_CACHE_0_BIGK_MEM1",
"UTC_VML2_BANK_CACHE_0_4K_MEM0",
@@ -6361,7 +6794,7 @@ static const char *vml2_mems[] = {
"UTC_VML2_BANK_CACHE_3_4K_MEM1",
};
-static const char *vml2_walker_mems[] = {
+static const char * const vml2_walker_mems[] = {
"UTC_VML2_CACHE_PDE0_MEM0",
"UTC_VML2_CACHE_PDE0_MEM1",
"UTC_VML2_CACHE_PDE1_MEM0",
@@ -6371,7 +6804,7 @@ static const char *vml2_walker_mems[] = {
"UTC_VML2_RDIF_LOG_FIFO",
};
-static const char *atc_l2_cache_2m_mems[] = {
+static const char * const atc_l2_cache_2m_mems[] = {
"UTC_ATCL2_CACHE_2M_BANK0_WAY0_MEM",
"UTC_ATCL2_CACHE_2M_BANK0_WAY1_MEM",
"UTC_ATCL2_CACHE_2M_BANK1_WAY0_MEM",
@@ -6564,7 +6997,7 @@ static void gfx_v9_0_reset_ras_error_count(struct amdgpu_device *adev)
for (i = 0; i < ARRAY_SIZE(gfx_v9_0_edc_counter_regs); i++) {
for (j = 0; j < gfx_v9_0_edc_counter_regs[i].se_num; j++) {
for (k = 0; k < gfx_v9_0_edc_counter_regs[i].instance; k++) {
- gfx_v9_0_select_se_sh(adev, j, 0x0, k);
+ amdgpu_gfx_select_se_sh(adev, j, 0x0, k, 0);
RREG32(SOC15_REG_ENTRY_OFFSET(gfx_v9_0_edc_counter_regs[i]));
}
}
@@ -6607,7 +7040,7 @@ static void gfx_v9_0_reset_ras_error_count(struct amdgpu_device *adev)
WREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_INDEX, 255);
}
-static int gfx_v9_0_query_ras_error_count(struct amdgpu_device *adev,
+static void gfx_v9_0_query_ras_error_count(struct amdgpu_device *adev,
void *ras_error_status)
{
struct ras_err_data *err_data = (struct ras_err_data *)ras_error_status;
@@ -6616,7 +7049,7 @@ static int gfx_v9_0_query_ras_error_count(struct amdgpu_device *adev,
uint32_t reg_value;
if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX))
- return -EINVAL;
+ return;
err_data->ue_count = 0;
err_data->ce_count = 0;
@@ -6626,7 +7059,7 @@ static int gfx_v9_0_query_ras_error_count(struct amdgpu_device *adev,
for (i = 0; i < ARRAY_SIZE(gfx_v9_0_edc_counter_regs); i++) {
for (j = 0; j < gfx_v9_0_edc_counter_regs[i].se_num; j++) {
for (k = 0; k < gfx_v9_0_edc_counter_regs[i].instance; k++) {
- gfx_v9_0_select_se_sh(adev, j, 0, k);
+ amdgpu_gfx_select_se_sh(adev, j, 0, k, 0);
reg_value =
RREG32(SOC15_REG_ENTRY_OFFSET(gfx_v9_0_edc_counter_regs[i]));
if (reg_value)
@@ -6641,12 +7074,10 @@ static int gfx_v9_0_query_ras_error_count(struct amdgpu_device *adev,
err_data->ce_count += sec_count;
err_data->ue_count += ded_count;
- gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
+ amdgpu_gfx_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, 0);
mutex_unlock(&adev->grbm_idx_mutex);
gfx_v9_0_query_utc_edc_status(adev, err_data);
-
- return 0;
}
static void gfx_v9_0_emit_mem_sync(struct amdgpu_ring *ring)
@@ -6727,6 +7158,230 @@ static void gfx_v9_0_emit_wave_limit(struct amdgpu_ring *ring, bool enable)
}
}
+static void gfx_v9_ring_insert_nop(struct amdgpu_ring *ring, uint32_t num_nop)
+{
+ /* Header itself is a NOP packet */
+ if (num_nop == 1) {
+ amdgpu_ring_write(ring, ring->funcs->nop);
+ return;
+ }
+
+ /* Max HW optimization till 0x3ffe, followed by remaining one NOP at a time*/
+ amdgpu_ring_write(ring, PACKET3(PACKET3_NOP, min(num_nop - 2, 0x3ffe)));
+
+ /* Header is at index 0, followed by num_nops - 1 NOP packet's */
+ amdgpu_ring_insert_nop(ring, num_nop - 1);
+}
+
+static int gfx_v9_0_reset_kcq(struct amdgpu_ring *ring,
+ unsigned int vmid,
+ struct amdgpu_fence *timedout_fence)
+{
+ struct amdgpu_device *adev = ring->adev;
+ struct amdgpu_kiq *kiq = &adev->gfx.kiq[0];
+ struct amdgpu_ring *kiq_ring = &kiq->ring;
+ unsigned long flags;
+ int i, r;
+
+ if (!kiq->pmf || !kiq->pmf->kiq_unmap_queues)
+ return -EINVAL;
+
+ amdgpu_ring_reset_helper_begin(ring, timedout_fence);
+
+ spin_lock_irqsave(&kiq->ring_lock, flags);
+
+ if (amdgpu_ring_alloc(kiq_ring, kiq->pmf->unmap_queues_size)) {
+ spin_unlock_irqrestore(&kiq->ring_lock, flags);
+ return -ENOMEM;
+ }
+
+ kiq->pmf->kiq_unmap_queues(kiq_ring, ring, RESET_QUEUES,
+ 0, 0);
+ amdgpu_ring_commit(kiq_ring);
+
+ spin_unlock_irqrestore(&kiq->ring_lock, flags);
+
+ r = amdgpu_ring_test_ring(kiq_ring);
+ if (r)
+ return r;
+
+ /* make sure dequeue is complete*/
+ amdgpu_gfx_rlc_enter_safe_mode(adev, 0);
+ mutex_lock(&adev->srbm_mutex);
+ soc15_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0, 0);
+ for (i = 0; i < adev->usec_timeout; i++) {
+ if (!(RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE) & 1))
+ break;
+ udelay(1);
+ }
+ if (i >= adev->usec_timeout)
+ r = -ETIMEDOUT;
+ soc15_grbm_select(adev, 0, 0, 0, 0, 0);
+ mutex_unlock(&adev->srbm_mutex);
+ amdgpu_gfx_rlc_exit_safe_mode(adev, 0);
+ if (r) {
+ dev_err(adev->dev, "fail to wait on hqd deactive\n");
+ return r;
+ }
+
+ r = gfx_v9_0_kcq_init_queue(ring, true);
+ if (r) {
+ dev_err(adev->dev, "fail to init kcq\n");
+ return r;
+ }
+ spin_lock_irqsave(&kiq->ring_lock, flags);
+ r = amdgpu_ring_alloc(kiq_ring, kiq->pmf->map_queues_size);
+ if (r) {
+ spin_unlock_irqrestore(&kiq->ring_lock, flags);
+ return -ENOMEM;
+ }
+ kiq->pmf->kiq_map_queues(kiq_ring, ring);
+ amdgpu_ring_commit(kiq_ring);
+ r = amdgpu_ring_test_ring(kiq_ring);
+ spin_unlock_irqrestore(&kiq->ring_lock, flags);
+ if (r) {
+ DRM_ERROR("fail to remap queue\n");
+ return r;
+ }
+ return amdgpu_ring_reset_helper_end(ring, timedout_fence);
+}
+
+static void gfx_v9_ip_print(struct amdgpu_ip_block *ip_block, struct drm_printer *p)
+{
+ struct amdgpu_device *adev = ip_block->adev;
+ uint32_t i, j, k, reg, index = 0;
+ uint32_t reg_count = ARRAY_SIZE(gc_reg_list_9);
+
+ if (!adev->gfx.ip_dump_core)
+ return;
+
+ for (i = 0; i < reg_count; i++)
+ drm_printf(p, "%-50s \t 0x%08x\n",
+ gc_reg_list_9[i].reg_name,
+ adev->gfx.ip_dump_core[i]);
+
+ /* print compute queue registers for all instances */
+ if (!adev->gfx.ip_dump_compute_queues)
+ return;
+
+ reg_count = ARRAY_SIZE(gc_cp_reg_list_9);
+ drm_printf(p, "\nnum_mec: %d num_pipe: %d num_queue: %d\n",
+ adev->gfx.mec.num_mec,
+ adev->gfx.mec.num_pipe_per_mec,
+ adev->gfx.mec.num_queue_per_pipe);
+
+ for (i = 0; i < adev->gfx.mec.num_mec; i++) {
+ for (j = 0; j < adev->gfx.mec.num_pipe_per_mec; j++) {
+ for (k = 0; k < adev->gfx.mec.num_queue_per_pipe; k++) {
+ drm_printf(p, "\nmec %d, pipe %d, queue %d\n", i, j, k);
+ for (reg = 0; reg < reg_count; reg++) {
+ if (i && gc_cp_reg_list_9[reg].reg_offset == mmCP_MEC_ME1_HEADER_DUMP)
+ drm_printf(p, "%-50s \t 0x%08x\n",
+ "mmCP_MEC_ME2_HEADER_DUMP",
+ adev->gfx.ip_dump_compute_queues[index + reg]);
+ else
+ drm_printf(p, "%-50s \t 0x%08x\n",
+ gc_cp_reg_list_9[reg].reg_name,
+ adev->gfx.ip_dump_compute_queues[index + reg]);
+ }
+ index += reg_count;
+ }
+ }
+ }
+
+}
+
+static void gfx_v9_ip_dump(struct amdgpu_ip_block *ip_block)
+{
+ struct amdgpu_device *adev = ip_block->adev;
+ uint32_t i, j, k, reg, index = 0;
+ uint32_t reg_count = ARRAY_SIZE(gc_reg_list_9);
+
+ if (!adev->gfx.ip_dump_core || !adev->gfx.num_gfx_rings)
+ return;
+
+ amdgpu_gfx_off_ctrl(adev, false);
+ for (i = 0; i < reg_count; i++)
+ adev->gfx.ip_dump_core[i] = RREG32(SOC15_REG_ENTRY_OFFSET(gc_reg_list_9[i]));
+ amdgpu_gfx_off_ctrl(adev, true);
+
+ /* dump compute queue registers for all instances */
+ if (!adev->gfx.ip_dump_compute_queues)
+ return;
+
+ reg_count = ARRAY_SIZE(gc_cp_reg_list_9);
+ amdgpu_gfx_off_ctrl(adev, false);
+ mutex_lock(&adev->srbm_mutex);
+ for (i = 0; i < adev->gfx.mec.num_mec; i++) {
+ for (j = 0; j < adev->gfx.mec.num_pipe_per_mec; j++) {
+ for (k = 0; k < adev->gfx.mec.num_queue_per_pipe; k++) {
+ /* ME0 is for GFX so start from 1 for CP */
+ soc15_grbm_select(adev, 1 + i, j, k, 0, 0);
+
+ for (reg = 0; reg < reg_count; reg++) {
+ if (i && gc_cp_reg_list_9[reg].reg_offset == mmCP_MEC_ME1_HEADER_DUMP)
+ adev->gfx.ip_dump_compute_queues[index + reg] =
+ RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_MEC_ME2_HEADER_DUMP));
+ else
+ adev->gfx.ip_dump_compute_queues[index + reg] =
+ RREG32(SOC15_REG_ENTRY_OFFSET(
+ gc_cp_reg_list_9[reg]));
+ }
+ index += reg_count;
+ }
+ }
+ }
+ soc15_grbm_select(adev, 0, 0, 0, 0, 0);
+ mutex_unlock(&adev->srbm_mutex);
+ amdgpu_gfx_off_ctrl(adev, true);
+
+}
+
+static void gfx_v9_0_ring_emit_cleaner_shader(struct amdgpu_ring *ring)
+{
+ struct amdgpu_device *adev = ring->adev;
+
+ /* Emit the cleaner shader */
+ if (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 2))
+ amdgpu_ring_write(ring, PACKET3(PACKET3_RUN_CLEANER_SHADER, 0));
+ else
+ amdgpu_ring_write(ring, PACKET3(PACKET3_RUN_CLEANER_SHADER_9_0, 0));
+
+ amdgpu_ring_write(ring, 0); /* RESERVED field, programmed to zero */
+}
+
+static void gfx_v9_0_ring_begin_use_compute(struct amdgpu_ring *ring)
+{
+ struct amdgpu_device *adev = ring->adev;
+ struct amdgpu_ip_block *gfx_block =
+ amdgpu_device_ip_get_ip_block(adev, AMD_IP_BLOCK_TYPE_GFX);
+
+ amdgpu_gfx_enforce_isolation_ring_begin_use(ring);
+
+ /* Raven and PCO APUs seem to have stability issues
+ * with compute and gfxoff and gfx pg. Disable gfx pg during
+ * submission and allow again afterwards.
+ */
+ if (gfx_block && amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 1, 0))
+ gfx_v9_0_set_powergating_state(gfx_block, AMD_PG_STATE_UNGATE);
+}
+
+static void gfx_v9_0_ring_end_use_compute(struct amdgpu_ring *ring)
+{
+ struct amdgpu_device *adev = ring->adev;
+ struct amdgpu_ip_block *gfx_block =
+ amdgpu_device_ip_get_ip_block(adev, AMD_IP_BLOCK_TYPE_GFX);
+
+ /* Raven and PCO APUs seem to have stability issues
+ * with compute and gfxoff and gfx pg. Disable gfx pg during
+ * submission and allow again afterwards.
+ */
+ if (gfx_block && amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 1, 0))
+ gfx_v9_0_set_powergating_state(gfx_block, AMD_PG_STATE_GATE);
+
+ amdgpu_gfx_enforce_isolation_ring_end_use(ring);
+}
+
static const struct amd_ip_funcs gfx_v9_0_ip_funcs = {
.name = "gfx_v9_0",
.early_init = gfx_v9_0_early_init,
@@ -6743,6 +7398,8 @@ static const struct amd_ip_funcs gfx_v9_0_ip_funcs = {
.set_clockgating_state = gfx_v9_0_set_clockgating_state,
.set_powergating_state = gfx_v9_0_set_powergating_state,
.get_clockgating_state = gfx_v9_0_get_clockgating_state,
+ .dump_ip_state = gfx_v9_ip_dump,
+ .print_ip_state = gfx_v9_ip_print,
};
static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_gfx = {
@@ -6750,7 +7407,7 @@ static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_gfx = {
.align_mask = 0xff,
.nop = PACKET3(PACKET3_NOP, 0x3FFF),
.support_64bit_ptrs = true,
- .vmhub = AMDGPU_GFXHUB_0,
+ .secure_submission_supported = true,
.get_rptr = gfx_v9_0_ring_get_rptr_gfx,
.get_wptr = gfx_v9_0_ring_get_wptr_gfx,
.set_wptr = gfx_v9_0_ring_set_wptr_gfx,
@@ -6774,7 +7431,65 @@ static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_gfx = {
5 + /* HDP_INVL */
8 + 8 + /* FENCE x2 */
2 + /* SWITCH_BUFFER */
- 7, /* gfx_v9_0_emit_mem_sync */
+ 7 + /* gfx_v9_0_emit_mem_sync */
+ 2, /* gfx_v9_0_ring_emit_cleaner_shader */
+ .emit_ib_size = 4, /* gfx_v9_0_ring_emit_ib_gfx */
+ .emit_ib = gfx_v9_0_ring_emit_ib_gfx,
+ .emit_fence = gfx_v9_0_ring_emit_fence,
+ .emit_pipeline_sync = gfx_v9_0_ring_emit_pipeline_sync,
+ .emit_vm_flush = gfx_v9_0_ring_emit_vm_flush,
+ .emit_gds_switch = gfx_v9_0_ring_emit_gds_switch,
+ .emit_hdp_flush = gfx_v9_0_ring_emit_hdp_flush,
+ .test_ring = gfx_v9_0_ring_test_ring,
+ .insert_nop = gfx_v9_ring_insert_nop,
+ .pad_ib = amdgpu_ring_generic_pad_ib,
+ .emit_switch_buffer = gfx_v9_ring_emit_sb,
+ .emit_cntxcntl = gfx_v9_ring_emit_cntxcntl,
+ .init_cond_exec = gfx_v9_0_ring_emit_init_cond_exec,
+ .preempt_ib = gfx_v9_0_ring_preempt_ib,
+ .emit_frame_cntl = gfx_v9_0_ring_emit_frame_cntl,
+ .emit_wreg = gfx_v9_0_ring_emit_wreg,
+ .emit_reg_wait = gfx_v9_0_ring_emit_reg_wait,
+ .emit_reg_write_reg_wait = gfx_v9_0_ring_emit_reg_write_reg_wait,
+ .soft_recovery = gfx_v9_0_ring_soft_recovery,
+ .emit_mem_sync = gfx_v9_0_emit_mem_sync,
+ .emit_cleaner_shader = gfx_v9_0_ring_emit_cleaner_shader,
+ .begin_use = amdgpu_gfx_enforce_isolation_ring_begin_use,
+ .end_use = amdgpu_gfx_enforce_isolation_ring_end_use,
+};
+
+static const struct amdgpu_ring_funcs gfx_v9_0_sw_ring_funcs_gfx = {
+ .type = AMDGPU_RING_TYPE_GFX,
+ .align_mask = 0xff,
+ .nop = PACKET3(PACKET3_NOP, 0x3FFF),
+ .support_64bit_ptrs = true,
+ .secure_submission_supported = true,
+ .get_rptr = amdgpu_sw_ring_get_rptr_gfx,
+ .get_wptr = amdgpu_sw_ring_get_wptr_gfx,
+ .set_wptr = amdgpu_sw_ring_set_wptr_gfx,
+ .emit_frame_size = /* totally 242 maximum if 16 IBs */
+ 5 + /* COND_EXEC */
+ 7 + /* PIPELINE_SYNC */
+ SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 +
+ SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 +
+ 2 + /* VM_FLUSH */
+ 8 + /* FENCE for VM_FLUSH */
+ 20 + /* GDS switch */
+ 4 + /* double SWITCH_BUFFER,
+ * the first COND_EXEC jump to the place just
+ * prior to this double SWITCH_BUFFER
+ */
+ 5 + /* COND_EXEC */
+ 7 + /* HDP_flush */
+ 4 + /* VGT_flush */
+ 14 + /* CE_META */
+ 31 + /* DE_META */
+ 3 + /* CNTX_CTRL */
+ 5 + /* HDP_INVL */
+ 8 + 8 + /* FENCE x2 */
+ 2 + /* SWITCH_BUFFER */
+ 7 + /* gfx_v9_0_emit_mem_sync */
+ 2, /* gfx_v9_0_ring_emit_cleaner_shader */
.emit_ib_size = 4, /* gfx_v9_0_ring_emit_ib_gfx */
.emit_ib = gfx_v9_0_ring_emit_ib_gfx,
.emit_fence = gfx_v9_0_ring_emit_fence,
@@ -6784,18 +7499,23 @@ static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_gfx = {
.emit_hdp_flush = gfx_v9_0_ring_emit_hdp_flush,
.test_ring = gfx_v9_0_ring_test_ring,
.test_ib = gfx_v9_0_ring_test_ib,
- .insert_nop = amdgpu_ring_insert_nop,
+ .insert_nop = gfx_v9_ring_insert_nop,
.pad_ib = amdgpu_ring_generic_pad_ib,
.emit_switch_buffer = gfx_v9_ring_emit_sb,
.emit_cntxcntl = gfx_v9_ring_emit_cntxcntl,
.init_cond_exec = gfx_v9_0_ring_emit_init_cond_exec,
- .patch_cond_exec = gfx_v9_0_ring_emit_patch_cond_exec,
.emit_frame_cntl = gfx_v9_0_ring_emit_frame_cntl,
.emit_wreg = gfx_v9_0_ring_emit_wreg,
.emit_reg_wait = gfx_v9_0_ring_emit_reg_wait,
.emit_reg_write_reg_wait = gfx_v9_0_ring_emit_reg_write_reg_wait,
.soft_recovery = gfx_v9_0_ring_soft_recovery,
.emit_mem_sync = gfx_v9_0_emit_mem_sync,
+ .patch_cntl = gfx_v9_0_ring_patch_cntl,
+ .patch_de = gfx_v9_0_ring_patch_de_meta,
+ .patch_ce = gfx_v9_0_ring_patch_ce_meta,
+ .emit_cleaner_shader = gfx_v9_0_ring_emit_cleaner_shader,
+ .begin_use = amdgpu_gfx_enforce_isolation_ring_begin_use,
+ .end_use = amdgpu_gfx_enforce_isolation_ring_end_use,
};
static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_compute = {
@@ -6803,7 +7523,6 @@ static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_compute = {
.align_mask = 0xff,
.nop = PACKET3(PACKET3_NOP, 0x3FFF),
.support_64bit_ptrs = true,
- .vmhub = AMDGPU_GFXHUB_0,
.get_rptr = gfx_v9_0_ring_get_rptr_compute,
.get_wptr = gfx_v9_0_ring_get_wptr_compute,
.set_wptr = gfx_v9_0_ring_set_wptr_compute,
@@ -6814,11 +7533,11 @@ static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_compute = {
7 + /* gfx_v9_0_ring_emit_pipeline_sync */
SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 +
SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 +
- 2 + /* gfx_v9_0_ring_emit_vm_flush */
8 + 8 + 8 + /* gfx_v9_0_ring_emit_fence x3 for user fence, vm fence */
7 + /* gfx_v9_0_emit_mem_sync */
5 + /* gfx_v9_0_emit_wave_limit for updating mmSPI_WCL_PIPE_PERCENT_GFX register */
- 15, /* for updating 3 mmSPI_WCL_PIPE_PERCENT_CS registers */
+ 15 + /* for updating 3 mmSPI_WCL_PIPE_PERCENT_CS registers */
+ 2, /* gfx_v9_0_ring_emit_cleaner_shader */
.emit_ib_size = 7, /* gfx_v9_0_ring_emit_ib_compute */
.emit_ib = gfx_v9_0_ring_emit_ib_compute,
.emit_fence = gfx_v9_0_ring_emit_fence,
@@ -6828,13 +7547,18 @@ static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_compute = {
.emit_hdp_flush = gfx_v9_0_ring_emit_hdp_flush,
.test_ring = gfx_v9_0_ring_test_ring,
.test_ib = gfx_v9_0_ring_test_ib,
- .insert_nop = amdgpu_ring_insert_nop,
+ .insert_nop = gfx_v9_ring_insert_nop,
.pad_ib = amdgpu_ring_generic_pad_ib,
.emit_wreg = gfx_v9_0_ring_emit_wreg,
.emit_reg_wait = gfx_v9_0_ring_emit_reg_wait,
.emit_reg_write_reg_wait = gfx_v9_0_ring_emit_reg_write_reg_wait,
+ .soft_recovery = gfx_v9_0_ring_soft_recovery,
.emit_mem_sync = gfx_v9_0_emit_mem_sync,
.emit_wave_limit = gfx_v9_0_emit_wave_limit,
+ .reset = gfx_v9_0_reset_kcq,
+ .emit_cleaner_shader = gfx_v9_0_ring_emit_cleaner_shader,
+ .begin_use = gfx_v9_0_ring_begin_use_compute,
+ .end_use = gfx_v9_0_ring_end_use_compute,
};
static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_kiq = {
@@ -6842,7 +7566,6 @@ static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_kiq = {
.align_mask = 0xff,
.nop = PACKET3(PACKET3_NOP, 0x3FFF),
.support_64bit_ptrs = true,
- .vmhub = AMDGPU_GFXHUB_0,
.get_rptr = gfx_v9_0_ring_get_rptr_compute,
.get_wptr = gfx_v9_0_ring_get_wptr_compute,
.set_wptr = gfx_v9_0_ring_set_wptr_compute,
@@ -6853,7 +7576,6 @@ static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_kiq = {
7 + /* gfx_v9_0_ring_emit_pipeline_sync */
SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 +
SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 +
- 2 + /* gfx_v9_0_ring_emit_vm_flush */
8 + 8 + 8, /* gfx_v9_0_ring_emit_fence_kiq x3 for user fence, vm fence */
.emit_ib_size = 7, /* gfx_v9_0_ring_emit_ib_compute */
.emit_fence = gfx_v9_0_ring_emit_fence_kiq,
@@ -6864,17 +7586,23 @@ static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_kiq = {
.emit_wreg = gfx_v9_0_ring_emit_wreg,
.emit_reg_wait = gfx_v9_0_ring_emit_reg_wait,
.emit_reg_write_reg_wait = gfx_v9_0_ring_emit_reg_write_reg_wait,
+ .emit_hdp_flush = gfx_v9_0_ring_emit_hdp_flush,
};
static void gfx_v9_0_set_ring_funcs(struct amdgpu_device *adev)
{
int i;
- adev->gfx.kiq.ring.funcs = &gfx_v9_0_ring_funcs_kiq;
+ adev->gfx.kiq[0].ring.funcs = &gfx_v9_0_ring_funcs_kiq;
for (i = 0; i < adev->gfx.num_gfx_rings; i++)
adev->gfx.gfx_ring[i].funcs = &gfx_v9_0_ring_funcs_gfx;
+ if (adev->gfx.mcbp && adev->gfx.num_gfx_rings) {
+ for (i = 0; i < GFX9_NUM_SW_GFX_RINGS; i++)
+ adev->gfx.sw_gfx_ring[i].funcs = &gfx_v9_0_sw_ring_funcs_gfx;
+ }
+
for (i = 0; i < adev->gfx.num_compute_rings; i++)
adev->gfx.compute_ring[i].funcs = &gfx_v9_0_ring_funcs_compute;
}
@@ -6889,6 +7617,11 @@ static const struct amdgpu_irq_src_funcs gfx_v9_0_priv_reg_irq_funcs = {
.process = gfx_v9_0_priv_reg_irq,
};
+static const struct amdgpu_irq_src_funcs gfx_v9_0_bad_op_irq_funcs = {
+ .set = gfx_v9_0_set_bad_op_fault_state,
+ .process = gfx_v9_0_bad_op_irq,
+};
+
static const struct amdgpu_irq_src_funcs gfx_v9_0_priv_inst_irq_funcs = {
.set = gfx_v9_0_set_priv_inst_fault_state,
.process = gfx_v9_0_priv_inst_irq,
@@ -6908,6 +7641,9 @@ static void gfx_v9_0_set_irq_funcs(struct amdgpu_device *adev)
adev->gfx.priv_reg_irq.num_types = 1;
adev->gfx.priv_reg_irq.funcs = &gfx_v9_0_priv_reg_irq_funcs;
+ adev->gfx.bad_op_irq.num_types = 1;
+ adev->gfx.bad_op_irq.funcs = &gfx_v9_0_bad_op_irq_funcs;
+
adev->gfx.priv_inst_irq.num_types = 1;
adev->gfx.priv_inst_irq.funcs = &gfx_v9_0_priv_inst_irq_funcs;
@@ -6917,13 +7653,15 @@ static void gfx_v9_0_set_irq_funcs(struct amdgpu_device *adev)
static void gfx_v9_0_set_rlc_funcs(struct amdgpu_device *adev)
{
- switch (adev->asic_type) {
- case CHIP_VEGA10:
- case CHIP_VEGA12:
- case CHIP_VEGA20:
- case CHIP_RAVEN:
- case CHIP_ARCTURUS:
- case CHIP_RENOIR:
+ switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
+ case IP_VERSION(9, 0, 1):
+ case IP_VERSION(9, 2, 1):
+ case IP_VERSION(9, 4, 0):
+ case IP_VERSION(9, 2, 2):
+ case IP_VERSION(9, 1, 0):
+ case IP_VERSION(9, 4, 1):
+ case IP_VERSION(9, 3, 0):
+ case IP_VERSION(9, 4, 2):
adev->gfx.rlc.funcs = &gfx_v9_0_rlc_funcs;
break;
default:
@@ -6934,38 +7672,50 @@ static void gfx_v9_0_set_rlc_funcs(struct amdgpu_device *adev)
static void gfx_v9_0_set_gds_init(struct amdgpu_device *adev)
{
/* init asci gds info */
- switch (adev->asic_type) {
- case CHIP_VEGA10:
- case CHIP_VEGA12:
- case CHIP_VEGA20:
+ switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
+ case IP_VERSION(9, 0, 1):
+ case IP_VERSION(9, 2, 1):
+ case IP_VERSION(9, 4, 0):
adev->gds.gds_size = 0x10000;
break;
- case CHIP_RAVEN:
- case CHIP_ARCTURUS:
+ case IP_VERSION(9, 2, 2):
+ case IP_VERSION(9, 1, 0):
+ case IP_VERSION(9, 4, 1):
adev->gds.gds_size = 0x1000;
break;
+ case IP_VERSION(9, 4, 2):
+ /* aldebaran removed all the GDS internal memory,
+ * only support GWS opcode in kernel, like barrier
+ * semaphore.etc */
+ adev->gds.gds_size = 0;
+ break;
default:
adev->gds.gds_size = 0x10000;
break;
}
- switch (adev->asic_type) {
- case CHIP_VEGA10:
- case CHIP_VEGA20:
+ switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
+ case IP_VERSION(9, 0, 1):
+ case IP_VERSION(9, 4, 0):
adev->gds.gds_compute_max_wave_id = 0x7ff;
break;
- case CHIP_VEGA12:
+ case IP_VERSION(9, 2, 1):
adev->gds.gds_compute_max_wave_id = 0x27f;
break;
- case CHIP_RAVEN:
+ case IP_VERSION(9, 2, 2):
+ case IP_VERSION(9, 1, 0):
if (adev->apu_flags & AMD_APU_IS_RAVEN2)
adev->gds.gds_compute_max_wave_id = 0x77; /* raven2 */
else
adev->gds.gds_compute_max_wave_id = 0x15f; /* raven1 */
break;
- case CHIP_ARCTURUS:
+ case IP_VERSION(9, 4, 1):
adev->gds.gds_compute_max_wave_id = 0xfff;
break;
+ case IP_VERSION(9, 4, 2):
+ /* deprecated for Aldebaran, no usage at all */
+ adev->gds.gds_compute_max_wave_id = 0;
+ break;
default:
/* this really depends on the chip */
adev->gds.gds_compute_max_wave_id = 0x7ff;
@@ -7032,7 +7782,7 @@ static int gfx_v9_0_get_cu_info(struct amdgpu_device *adev,
mask = 1;
ao_bitmap = 0;
counter = 0;
- gfx_v9_0_select_se_sh(adev, i, j, 0xffffffff);
+ amdgpu_gfx_select_se_sh(adev, i, j, 0xffffffff, 0);
gfx_v9_0_set_user_cu_inactive_bitmap(
adev, disable_masks[i * adev->gfx.config.max_sh_per_se + j]);
bitmap = gfx_v9_0_get_cu_active_bitmap(adev);
@@ -7049,7 +7799,7 @@ static int gfx_v9_0_get_cu_info(struct amdgpu_device *adev,
* SE6,SH0 --> bitmap[2][1]
* SE7,SH0 --> bitmap[3][1]
*/
- cu_info->bitmap[i % 4][j + i / 4] = bitmap;
+ cu_info->bitmap[0][i % 4][j + i / 4] = bitmap;
for (k = 0; k < adev->gfx.config.max_cu_per_sh; k ++) {
if (bitmap & mask) {
@@ -7065,7 +7815,7 @@ static int gfx_v9_0_get_cu_info(struct amdgpu_device *adev,
cu_info->ao_cu_bitmap[i % 4][j + i / 4] = ao_bitmap;
}
}
- gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
+ amdgpu_gfx_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, 0);
mutex_unlock(&adev->grbm_idx_mutex);
cu_info->number = active_cu_number;
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.h b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.h
index dfe8d4841f58..f9f6edc5e558 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.h
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.h
@@ -27,6 +27,6 @@
extern const struct amdgpu_ip_block_version gfx_v9_0_ip_block;
void gfx_v9_0_select_se_sh(struct amdgpu_device *adev, u32 se_num, u32 sh_num,
- u32 instance);
+ u32 instance, int xcc_id);
#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0_cleaner_shader.h b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0_cleaner_shader.h
new file mode 100644
index 000000000000..0b6bd09b7529
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0_cleaner_shader.h
@@ -0,0 +1,68 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright 2024 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+/* Define the cleaner shader gfx_9_0 */
+static const u32 __maybe_unused gfx_9_0_cleaner_shader_hex[] = {
+ /* Add the cleaner shader code here */
+};
+
+/* Define the cleaner shader gfx_9_4_2 */
+static const u32 gfx_9_4_2_cleaner_shader_hex[] = {
+ 0xbf068100, 0xbf84003b,
+ 0xbf8a0000, 0xb07c0000,
+ 0xbe8200ff, 0x00000078,
+ 0xbf110802, 0x7e000280,
+ 0x7e020280, 0x7e040280,
+ 0x7e060280, 0x7e080280,
+ 0x7e0a0280, 0x7e0c0280,
+ 0x7e0e0280, 0x80828802,
+ 0xbe803202, 0xbf84fff5,
+ 0xbf9c0000, 0xbe8200ff,
+ 0x80000000, 0x86020102,
+ 0xbf840011, 0xbefe00c1,
+ 0xbeff00c1, 0xd28c0001,
+ 0x0001007f, 0xd28d0001,
+ 0x0002027e, 0x10020288,
+ 0xbe8200bf, 0xbefc00c1,
+ 0xd89c2000, 0x00020201,
+ 0xd89c6040, 0x00040401,
+ 0x320202ff, 0x00000400,
+ 0x80828102, 0xbf84fff8,
+ 0xbefc00ff, 0x0000005c,
+ 0xbf800000, 0xbe802c80,
+ 0xbe812c80, 0xbe822c80,
+ 0xbe832c80, 0x80fc847c,
+ 0xbf84fffa, 0xbee60080,
+ 0xbee70080, 0xbeea0180,
+ 0xbeec0180, 0xbeee0180,
+ 0xbef00180, 0xbef20180,
+ 0xbef40180, 0xbef60180,
+ 0xbef80180, 0xbefa0180,
+ 0xbf810000, 0xbf8d0001,
+ 0xbefc00ff, 0x0000005c,
+ 0xbf800000, 0xbe802c80,
+ 0xbe812c80, 0xbe822c80,
+ 0xbe832c80, 0x80fc847c,
+ 0xbf84fffa, 0xbee60080,
+ 0xbee70080, 0xbeea01ff,
+ 0x000000ee, 0xbf810000,
+};
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4.c
index bc699d680ce8..6028afd81690 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4.c
@@ -27,7 +27,6 @@
#include "amdgpu_gfx.h"
#include "soc15.h"
#include "soc15d.h"
-#include "amdgpu_atomfirmware.h"
#include "amdgpu_pm.h"
#include "gc/gc_9_4_1_offset.h"
@@ -863,8 +862,8 @@ static int gfx_v9_4_ras_error_count(struct amdgpu_device *adev,
return 0;
}
-int gfx_v9_4_query_ras_error_count(struct amdgpu_device *adev,
- void *ras_error_status)
+static void gfx_v9_4_query_ras_error_count(struct amdgpu_device *adev,
+ void *ras_error_status)
{
struct ras_err_data *err_data = (struct ras_err_data *)ras_error_status;
uint32_t sec_count = 0, ded_count = 0;
@@ -872,7 +871,7 @@ int gfx_v9_4_query_ras_error_count(struct amdgpu_device *adev,
uint32_t reg_value;
if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX))
- return -EINVAL;
+ return;
err_data->ue_count = 0;
err_data->ce_count = 0;
@@ -903,10 +902,9 @@ int gfx_v9_4_query_ras_error_count(struct amdgpu_device *adev,
gfx_v9_4_query_utc_edc_status(adev, err_data);
- return 0;
}
-void gfx_v9_4_reset_ras_error_count(struct amdgpu_device *adev)
+static void gfx_v9_4_reset_ras_error_count(struct amdgpu_device *adev)
{
int i, j, k;
@@ -971,32 +969,11 @@ void gfx_v9_4_reset_ras_error_count(struct amdgpu_device *adev)
WREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_DSM_INDEX, 255);
}
-int gfx_v9_4_ras_error_inject(struct amdgpu_device *adev, void *inject_if)
-{
- struct ras_inject_if *info = (struct ras_inject_if *)inject_if;
- int ret;
- struct ta_ras_trigger_error_input block_info = { 0 };
-
- if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX))
- return -EINVAL;
-
- block_info.block_id = amdgpu_ras_block_to_ta(info->head.block);
- block_info.sub_block_index = info->head.sub_block_index;
- block_info.inject_error_type = amdgpu_ras_error_to_ta(info->head.type);
- block_info.address = info->address;
- block_info.value = info->value;
-
- mutex_lock(&adev->grbm_idx_mutex);
- ret = psp_ras_trigger_error(&adev->psp, &block_info);
- mutex_unlock(&adev->grbm_idx_mutex);
-
- return ret;
-}
-
-static const struct soc15_reg_entry gfx_v9_4_rdrsp_status_regs =
- { SOC15_REG_ENTRY(GC, 0, mmGCEA_ERR_STATUS), 0, 1, 32 };
+static const struct soc15_reg_entry gfx_v9_4_ea_err_status_regs = {
+ SOC15_REG_ENTRY(GC, 0, mmGCEA_ERR_STATUS), 0, 1, 32
+};
-void gfx_v9_4_query_ras_error_status(struct amdgpu_device *adev)
+static void gfx_v9_4_query_ras_error_status(struct amdgpu_device *adev)
{
uint32_t i, j;
uint32_t reg_value;
@@ -1006,18 +983,37 @@ void gfx_v9_4_query_ras_error_status(struct amdgpu_device *adev)
mutex_lock(&adev->grbm_idx_mutex);
- for (i = 0; i < gfx_v9_4_rdrsp_status_regs.se_num; i++) {
- for (j = 0; j < gfx_v9_4_rdrsp_status_regs.instance;
+ for (i = 0; i < gfx_v9_4_ea_err_status_regs.se_num; i++) {
+ for (j = 0; j < gfx_v9_4_ea_err_status_regs.instance;
j++) {
gfx_v9_4_select_se_sh(adev, i, 0, j);
reg_value = RREG32(SOC15_REG_ENTRY_OFFSET(
- gfx_v9_4_rdrsp_status_regs));
- if (reg_value)
+ gfx_v9_4_ea_err_status_regs));
+ if (REG_GET_FIELD(reg_value, GCEA_ERR_STATUS, SDP_RDRSP_STATUS) ||
+ REG_GET_FIELD(reg_value, GCEA_ERR_STATUS, SDP_WRRSP_STATUS) ||
+ REG_GET_FIELD(reg_value, GCEA_ERR_STATUS, SDP_RDRSP_DATAPARITY_ERROR)) {
+ /* SDP read/write error/parity error in FUE_IS_FATAL mode
+ * can cause system fatal error in arcturas. Harvest the error
+ * status before GPU reset */
dev_warn(adev->dev, "GCEA err detected at instance: %d, status: 0x%x!\n",
j, reg_value);
+ }
}
}
gfx_v9_4_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
mutex_unlock(&adev->grbm_idx_mutex);
}
+
+
+const struct amdgpu_ras_block_hw_ops gfx_v9_4_ras_ops = {
+ .query_ras_error_count = &gfx_v9_4_query_ras_error_count,
+ .reset_ras_error_count = &gfx_v9_4_reset_ras_error_count,
+ .query_ras_error_status = &gfx_v9_4_query_ras_error_status,
+};
+
+struct amdgpu_gfx_ras gfx_v9_4_ras = {
+ .ras_block = {
+ .hw_ops = &gfx_v9_4_ras_ops,
+ },
+};
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4.h b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4.h
index 875f18473a98..ca520a767267 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4.h
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4.h
@@ -24,16 +24,6 @@
#ifndef __GFX_V9_4_H__
#define __GFX_V9_4_H__
-void gfx_v9_4_clear_ras_edc_counter(struct amdgpu_device *adev);
-
-int gfx_v9_4_query_ras_error_count(struct amdgpu_device *adev,
- void *ras_error_status);
-
-int gfx_v9_4_ras_error_inject(struct amdgpu_device *adev,
- void *inject_if);
-
-void gfx_v9_4_reset_ras_error_count(struct amdgpu_device *adev);
-
-void gfx_v9_4_query_ras_error_status(struct amdgpu_device *adev);
+extern struct amdgpu_gfx_ras gfx_v9_4_ras;
#endif /* __GFX_V9_4_H__ */
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_2.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_2.c
new file mode 100644
index 000000000000..8058ea91ecaf
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_2.c
@@ -0,0 +1,1938 @@
+/*
+ * Copyright 2020 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+#include "amdgpu.h"
+#include "soc15.h"
+#include "soc15d.h"
+
+#include "gc/gc_9_4_2_offset.h"
+#include "gc/gc_9_4_2_sh_mask.h"
+#include "gfx_v9_0.h"
+
+#include "gfx_v9_4_2.h"
+#include "amdgpu_ras.h"
+#include "amdgpu_gfx.h"
+
+#define SE_ID_MAX 8
+#define CU_ID_MAX 16
+#define SIMD_ID_MAX 4
+#define WAVE_ID_MAX 10
+
+enum gfx_v9_4_2_utc_type {
+ VML2_MEM,
+ VML2_WALKER_MEM,
+ UTCL2_MEM,
+ ATC_L2_CACHE_2M,
+ ATC_L2_CACHE_32K,
+ ATC_L2_CACHE_4K
+};
+
+struct gfx_v9_4_2_utc_block {
+ enum gfx_v9_4_2_utc_type type;
+ uint32_t num_banks;
+ uint32_t num_ways;
+ uint32_t num_mem_blocks;
+ struct soc15_reg idx_reg;
+ struct soc15_reg data_reg;
+ uint32_t sec_count_mask;
+ uint32_t sec_count_shift;
+ uint32_t ded_count_mask;
+ uint32_t ded_count_shift;
+ uint32_t clear;
+};
+
+static const struct soc15_reg_golden golden_settings_gc_9_4_2_alde_die_0[] = {
+ SOC15_REG_GOLDEN_VALUE(GC, 0, regTCP_CHAN_STEER_0, 0x3fffffff, 0x141dc920),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, regTCP_CHAN_STEER_1, 0x3fffffff, 0x3b458b93),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, regTCP_CHAN_STEER_2, 0x3fffffff, 0x1a4f5583),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, regTCP_CHAN_STEER_3, 0x3fffffff, 0x317717f6),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, regTCP_CHAN_STEER_4, 0x3fffffff, 0x107cc1e6),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, regTCP_CHAN_STEER_5, 0x3ff, 0x351),
+};
+
+static const struct soc15_reg_golden golden_settings_gc_9_4_2_alde_die_1[] = {
+ SOC15_REG_GOLDEN_VALUE(GC, 0, regTCP_CHAN_STEER_0, 0x3fffffff, 0x2591aa38),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, regTCP_CHAN_STEER_1, 0x3fffffff, 0xac9e88b),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, regTCP_CHAN_STEER_2, 0x3fffffff, 0x2bc3369b),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, regTCP_CHAN_STEER_3, 0x3fffffff, 0xfb74ee),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, regTCP_CHAN_STEER_4, 0x3fffffff, 0x21f0a2fe),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, regTCP_CHAN_STEER_5, 0x3ff, 0x49),
+};
+
+static const struct soc15_reg_golden golden_settings_gc_9_4_2_alde[] = {
+ SOC15_REG_GOLDEN_VALUE(GC, 0, regGB_ADDR_CONFIG, 0xffff77ff, 0x2a114042),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, regTA_CNTL_AUX, 0xfffffeef, 0x10b0000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, regTCP_UTCL1_CNTL1, 0xffffffff, 0x30800400),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, regTCI_CNTL_3, 0xff, 0x20),
+};
+
+/*
+ * This shader is used to clear VGPRS and LDS, and also write the input
+ * pattern into the write back buffer, which will be used by driver to
+ * check whether all SIMDs have been covered.
+*/
+static const u32 vgpr_init_compute_shader_aldebaran[] = {
+ 0xb8840904, 0xb8851a04, 0xb8861344, 0xb8831804, 0x9208ff06, 0x00000280,
+ 0x9209a805, 0x920a8a04, 0x81080908, 0x81080a08, 0x81080308, 0x8e078208,
+ 0x81078407, 0xc0410080, 0x00000007, 0xbf8c0000, 0xbf8a0000, 0xd3d94000,
+ 0x18000080, 0xd3d94001, 0x18000080, 0xd3d94002, 0x18000080, 0xd3d94003,
+ 0x18000080, 0xd3d94004, 0x18000080, 0xd3d94005, 0x18000080, 0xd3d94006,
+ 0x18000080, 0xd3d94007, 0x18000080, 0xd3d94008, 0x18000080, 0xd3d94009,
+ 0x18000080, 0xd3d9400a, 0x18000080, 0xd3d9400b, 0x18000080, 0xd3d9400c,
+ 0x18000080, 0xd3d9400d, 0x18000080, 0xd3d9400e, 0x18000080, 0xd3d9400f,
+ 0x18000080, 0xd3d94010, 0x18000080, 0xd3d94011, 0x18000080, 0xd3d94012,
+ 0x18000080, 0xd3d94013, 0x18000080, 0xd3d94014, 0x18000080, 0xd3d94015,
+ 0x18000080, 0xd3d94016, 0x18000080, 0xd3d94017, 0x18000080, 0xd3d94018,
+ 0x18000080, 0xd3d94019, 0x18000080, 0xd3d9401a, 0x18000080, 0xd3d9401b,
+ 0x18000080, 0xd3d9401c, 0x18000080, 0xd3d9401d, 0x18000080, 0xd3d9401e,
+ 0x18000080, 0xd3d9401f, 0x18000080, 0xd3d94020, 0x18000080, 0xd3d94021,
+ 0x18000080, 0xd3d94022, 0x18000080, 0xd3d94023, 0x18000080, 0xd3d94024,
+ 0x18000080, 0xd3d94025, 0x18000080, 0xd3d94026, 0x18000080, 0xd3d94027,
+ 0x18000080, 0xd3d94028, 0x18000080, 0xd3d94029, 0x18000080, 0xd3d9402a,
+ 0x18000080, 0xd3d9402b, 0x18000080, 0xd3d9402c, 0x18000080, 0xd3d9402d,
+ 0x18000080, 0xd3d9402e, 0x18000080, 0xd3d9402f, 0x18000080, 0xd3d94030,
+ 0x18000080, 0xd3d94031, 0x18000080, 0xd3d94032, 0x18000080, 0xd3d94033,
+ 0x18000080, 0xd3d94034, 0x18000080, 0xd3d94035, 0x18000080, 0xd3d94036,
+ 0x18000080, 0xd3d94037, 0x18000080, 0xd3d94038, 0x18000080, 0xd3d94039,
+ 0x18000080, 0xd3d9403a, 0x18000080, 0xd3d9403b, 0x18000080, 0xd3d9403c,
+ 0x18000080, 0xd3d9403d, 0x18000080, 0xd3d9403e, 0x18000080, 0xd3d9403f,
+ 0x18000080, 0xd3d94040, 0x18000080, 0xd3d94041, 0x18000080, 0xd3d94042,
+ 0x18000080, 0xd3d94043, 0x18000080, 0xd3d94044, 0x18000080, 0xd3d94045,
+ 0x18000080, 0xd3d94046, 0x18000080, 0xd3d94047, 0x18000080, 0xd3d94048,
+ 0x18000080, 0xd3d94049, 0x18000080, 0xd3d9404a, 0x18000080, 0xd3d9404b,
+ 0x18000080, 0xd3d9404c, 0x18000080, 0xd3d9404d, 0x18000080, 0xd3d9404e,
+ 0x18000080, 0xd3d9404f, 0x18000080, 0xd3d94050, 0x18000080, 0xd3d94051,
+ 0x18000080, 0xd3d94052, 0x18000080, 0xd3d94053, 0x18000080, 0xd3d94054,
+ 0x18000080, 0xd3d94055, 0x18000080, 0xd3d94056, 0x18000080, 0xd3d94057,
+ 0x18000080, 0xd3d94058, 0x18000080, 0xd3d94059, 0x18000080, 0xd3d9405a,
+ 0x18000080, 0xd3d9405b, 0x18000080, 0xd3d9405c, 0x18000080, 0xd3d9405d,
+ 0x18000080, 0xd3d9405e, 0x18000080, 0xd3d9405f, 0x18000080, 0xd3d94060,
+ 0x18000080, 0xd3d94061, 0x18000080, 0xd3d94062, 0x18000080, 0xd3d94063,
+ 0x18000080, 0xd3d94064, 0x18000080, 0xd3d94065, 0x18000080, 0xd3d94066,
+ 0x18000080, 0xd3d94067, 0x18000080, 0xd3d94068, 0x18000080, 0xd3d94069,
+ 0x18000080, 0xd3d9406a, 0x18000080, 0xd3d9406b, 0x18000080, 0xd3d9406c,
+ 0x18000080, 0xd3d9406d, 0x18000080, 0xd3d9406e, 0x18000080, 0xd3d9406f,
+ 0x18000080, 0xd3d94070, 0x18000080, 0xd3d94071, 0x18000080, 0xd3d94072,
+ 0x18000080, 0xd3d94073, 0x18000080, 0xd3d94074, 0x18000080, 0xd3d94075,
+ 0x18000080, 0xd3d94076, 0x18000080, 0xd3d94077, 0x18000080, 0xd3d94078,
+ 0x18000080, 0xd3d94079, 0x18000080, 0xd3d9407a, 0x18000080, 0xd3d9407b,
+ 0x18000080, 0xd3d9407c, 0x18000080, 0xd3d9407d, 0x18000080, 0xd3d9407e,
+ 0x18000080, 0xd3d9407f, 0x18000080, 0xd3d94080, 0x18000080, 0xd3d94081,
+ 0x18000080, 0xd3d94082, 0x18000080, 0xd3d94083, 0x18000080, 0xd3d94084,
+ 0x18000080, 0xd3d94085, 0x18000080, 0xd3d94086, 0x18000080, 0xd3d94087,
+ 0x18000080, 0xd3d94088, 0x18000080, 0xd3d94089, 0x18000080, 0xd3d9408a,
+ 0x18000080, 0xd3d9408b, 0x18000080, 0xd3d9408c, 0x18000080, 0xd3d9408d,
+ 0x18000080, 0xd3d9408e, 0x18000080, 0xd3d9408f, 0x18000080, 0xd3d94090,
+ 0x18000080, 0xd3d94091, 0x18000080, 0xd3d94092, 0x18000080, 0xd3d94093,
+ 0x18000080, 0xd3d94094, 0x18000080, 0xd3d94095, 0x18000080, 0xd3d94096,
+ 0x18000080, 0xd3d94097, 0x18000080, 0xd3d94098, 0x18000080, 0xd3d94099,
+ 0x18000080, 0xd3d9409a, 0x18000080, 0xd3d9409b, 0x18000080, 0xd3d9409c,
+ 0x18000080, 0xd3d9409d, 0x18000080, 0xd3d9409e, 0x18000080, 0xd3d9409f,
+ 0x18000080, 0xd3d940a0, 0x18000080, 0xd3d940a1, 0x18000080, 0xd3d940a2,
+ 0x18000080, 0xd3d940a3, 0x18000080, 0xd3d940a4, 0x18000080, 0xd3d940a5,
+ 0x18000080, 0xd3d940a6, 0x18000080, 0xd3d940a7, 0x18000080, 0xd3d940a8,
+ 0x18000080, 0xd3d940a9, 0x18000080, 0xd3d940aa, 0x18000080, 0xd3d940ab,
+ 0x18000080, 0xd3d940ac, 0x18000080, 0xd3d940ad, 0x18000080, 0xd3d940ae,
+ 0x18000080, 0xd3d940af, 0x18000080, 0xd3d940b0, 0x18000080, 0xd3d940b1,
+ 0x18000080, 0xd3d940b2, 0x18000080, 0xd3d940b3, 0x18000080, 0xd3d940b4,
+ 0x18000080, 0xd3d940b5, 0x18000080, 0xd3d940b6, 0x18000080, 0xd3d940b7,
+ 0x18000080, 0xd3d940b8, 0x18000080, 0xd3d940b9, 0x18000080, 0xd3d940ba,
+ 0x18000080, 0xd3d940bb, 0x18000080, 0xd3d940bc, 0x18000080, 0xd3d940bd,
+ 0x18000080, 0xd3d940be, 0x18000080, 0xd3d940bf, 0x18000080, 0xd3d940c0,
+ 0x18000080, 0xd3d940c1, 0x18000080, 0xd3d940c2, 0x18000080, 0xd3d940c3,
+ 0x18000080, 0xd3d940c4, 0x18000080, 0xd3d940c5, 0x18000080, 0xd3d940c6,
+ 0x18000080, 0xd3d940c7, 0x18000080, 0xd3d940c8, 0x18000080, 0xd3d940c9,
+ 0x18000080, 0xd3d940ca, 0x18000080, 0xd3d940cb, 0x18000080, 0xd3d940cc,
+ 0x18000080, 0xd3d940cd, 0x18000080, 0xd3d940ce, 0x18000080, 0xd3d940cf,
+ 0x18000080, 0xd3d940d0, 0x18000080, 0xd3d940d1, 0x18000080, 0xd3d940d2,
+ 0x18000080, 0xd3d940d3, 0x18000080, 0xd3d940d4, 0x18000080, 0xd3d940d5,
+ 0x18000080, 0xd3d940d6, 0x18000080, 0xd3d940d7, 0x18000080, 0xd3d940d8,
+ 0x18000080, 0xd3d940d9, 0x18000080, 0xd3d940da, 0x18000080, 0xd3d940db,
+ 0x18000080, 0xd3d940dc, 0x18000080, 0xd3d940dd, 0x18000080, 0xd3d940de,
+ 0x18000080, 0xd3d940df, 0x18000080, 0xd3d940e0, 0x18000080, 0xd3d940e1,
+ 0x18000080, 0xd3d940e2, 0x18000080, 0xd3d940e3, 0x18000080, 0xd3d940e4,
+ 0x18000080, 0xd3d940e5, 0x18000080, 0xd3d940e6, 0x18000080, 0xd3d940e7,
+ 0x18000080, 0xd3d940e8, 0x18000080, 0xd3d940e9, 0x18000080, 0xd3d940ea,
+ 0x18000080, 0xd3d940eb, 0x18000080, 0xd3d940ec, 0x18000080, 0xd3d940ed,
+ 0x18000080, 0xd3d940ee, 0x18000080, 0xd3d940ef, 0x18000080, 0xd3d940f0,
+ 0x18000080, 0xd3d940f1, 0x18000080, 0xd3d940f2, 0x18000080, 0xd3d940f3,
+ 0x18000080, 0xd3d940f4, 0x18000080, 0xd3d940f5, 0x18000080, 0xd3d940f6,
+ 0x18000080, 0xd3d940f7, 0x18000080, 0xd3d940f8, 0x18000080, 0xd3d940f9,
+ 0x18000080, 0xd3d940fa, 0x18000080, 0xd3d940fb, 0x18000080, 0xd3d940fc,
+ 0x18000080, 0xd3d940fd, 0x18000080, 0xd3d940fe, 0x18000080, 0xd3d940ff,
+ 0x18000080, 0xb07c0000, 0xbe8a00ff, 0x000000f8, 0xbf11080a, 0x7e000280,
+ 0x7e020280, 0x7e040280, 0x7e060280, 0x7e080280, 0x7e0a0280, 0x7e0c0280,
+ 0x7e0e0280, 0x808a880a, 0xbe80320a, 0xbf84fff5, 0xbf9c0000, 0xd28c0001,
+ 0x0001007f, 0xd28d0001, 0x0002027e, 0x10020288, 0xbe8b0004, 0xb78b4000,
+ 0xd1196a01, 0x00001701, 0xbe8a0087, 0xbefc00c1, 0xd89c4000, 0x00020201,
+ 0xd89cc080, 0x00040401, 0x320202ff, 0x00000800, 0x808a810a, 0xbf84fff8,
+ 0xbf810000,
+};
+
+const struct soc15_reg_entry vgpr_init_regs_aldebaran[] = {
+ { SOC15_REG_ENTRY(GC, 0, regCOMPUTE_RESOURCE_LIMITS), 0x0000000 },
+ { SOC15_REG_ENTRY(GC, 0, regCOMPUTE_NUM_THREAD_X), 0x40 },
+ { SOC15_REG_ENTRY(GC, 0, regCOMPUTE_NUM_THREAD_Y), 4 },
+ { SOC15_REG_ENTRY(GC, 0, regCOMPUTE_NUM_THREAD_Z), 1 },
+ { SOC15_REG_ENTRY(GC, 0, regCOMPUTE_PGM_RSRC1), 0xbf },
+ { SOC15_REG_ENTRY(GC, 0, regCOMPUTE_PGM_RSRC2), 0x400006 }, /* 64KB LDS */
+ { SOC15_REG_ENTRY(GC, 0, regCOMPUTE_PGM_RSRC3), 0x3F }, /* 63 - accum-offset = 256 */
+ { SOC15_REG_ENTRY(GC, 0, regCOMPUTE_STATIC_THREAD_MGMT_SE0), 0xffffffff },
+ { SOC15_REG_ENTRY(GC, 0, regCOMPUTE_STATIC_THREAD_MGMT_SE1), 0xffffffff },
+ { SOC15_REG_ENTRY(GC, 0, regCOMPUTE_STATIC_THREAD_MGMT_SE2), 0xffffffff },
+ { SOC15_REG_ENTRY(GC, 0, regCOMPUTE_STATIC_THREAD_MGMT_SE3), 0xffffffff },
+ { SOC15_REG_ENTRY(GC, 0, regCOMPUTE_STATIC_THREAD_MGMT_SE4), 0xffffffff },
+ { SOC15_REG_ENTRY(GC, 0, regCOMPUTE_STATIC_THREAD_MGMT_SE5), 0xffffffff },
+ { SOC15_REG_ENTRY(GC, 0, regCOMPUTE_STATIC_THREAD_MGMT_SE6), 0xffffffff },
+ { SOC15_REG_ENTRY(GC, 0, regCOMPUTE_STATIC_THREAD_MGMT_SE7), 0xffffffff },
+};
+
+/*
+ * The below shaders are used to clear SGPRS, and also write the input
+ * pattern into the write back buffer. The first two dispatch should be
+ * scheduled simultaneously which make sure that all SGPRS could be
+ * allocated, so the dispatch 1 need check write back buffer before scheduled,
+ * make sure that waves of dispatch 0 are all dispacthed to all simds
+ * balanced. both dispatch 0 and dispatch 1 should be halted until all waves
+ * are dispatched, and then driver write a pattern to the shared memory to make
+ * all waves continue.
+*/
+static const u32 sgpr112_init_compute_shader_aldebaran[] = {
+ 0xb8840904, 0xb8851a04, 0xb8861344, 0xb8831804, 0x9208ff06, 0x00000280,
+ 0x9209a805, 0x920a8a04, 0x81080908, 0x81080a08, 0x81080308, 0x8e078208,
+ 0x81078407, 0xc0410080, 0x00000007, 0xbf8c0000, 0xbf8e003f, 0xc0030200,
+ 0x00000000, 0xbf8c0000, 0xbf06ff08, 0xdeadbeaf, 0xbf84fff9, 0x81028102,
+ 0xc0410080, 0x00000007, 0xbf8c0000, 0xbf8a0000, 0xbefc0080, 0xbeea0080,
+ 0xbeeb0080, 0xbf00f280, 0xbee60080, 0xbee70080, 0xbee80080, 0xbee90080,
+ 0xbefe0080, 0xbeff0080, 0xbe880080, 0xbe890080, 0xbe8a0080, 0xbe8b0080,
+ 0xbe8c0080, 0xbe8d0080, 0xbe8e0080, 0xbe8f0080, 0xbe900080, 0xbe910080,
+ 0xbe920080, 0xbe930080, 0xbe940080, 0xbe950080, 0xbe960080, 0xbe970080,
+ 0xbe980080, 0xbe990080, 0xbe9a0080, 0xbe9b0080, 0xbe9c0080, 0xbe9d0080,
+ 0xbe9e0080, 0xbe9f0080, 0xbea00080, 0xbea10080, 0xbea20080, 0xbea30080,
+ 0xbea40080, 0xbea50080, 0xbea60080, 0xbea70080, 0xbea80080, 0xbea90080,
+ 0xbeaa0080, 0xbeab0080, 0xbeac0080, 0xbead0080, 0xbeae0080, 0xbeaf0080,
+ 0xbeb00080, 0xbeb10080, 0xbeb20080, 0xbeb30080, 0xbeb40080, 0xbeb50080,
+ 0xbeb60080, 0xbeb70080, 0xbeb80080, 0xbeb90080, 0xbeba0080, 0xbebb0080,
+ 0xbebc0080, 0xbebd0080, 0xbebe0080, 0xbebf0080, 0xbec00080, 0xbec10080,
+ 0xbec20080, 0xbec30080, 0xbec40080, 0xbec50080, 0xbec60080, 0xbec70080,
+ 0xbec80080, 0xbec90080, 0xbeca0080, 0xbecb0080, 0xbecc0080, 0xbecd0080,
+ 0xbece0080, 0xbecf0080, 0xbed00080, 0xbed10080, 0xbed20080, 0xbed30080,
+ 0xbed40080, 0xbed50080, 0xbed60080, 0xbed70080, 0xbed80080, 0xbed90080,
+ 0xbeda0080, 0xbedb0080, 0xbedc0080, 0xbedd0080, 0xbede0080, 0xbedf0080,
+ 0xbee00080, 0xbee10080, 0xbee20080, 0xbee30080, 0xbee40080, 0xbee50080,
+ 0xbf810000
+};
+
+const struct soc15_reg_entry sgpr112_init_regs_aldebaran[] = {
+ { SOC15_REG_ENTRY(GC, 0, regCOMPUTE_RESOURCE_LIMITS), 0x0000000 },
+ { SOC15_REG_ENTRY(GC, 0, regCOMPUTE_NUM_THREAD_X), 0x40 },
+ { SOC15_REG_ENTRY(GC, 0, regCOMPUTE_NUM_THREAD_Y), 8 },
+ { SOC15_REG_ENTRY(GC, 0, regCOMPUTE_NUM_THREAD_Z), 1 },
+ { SOC15_REG_ENTRY(GC, 0, regCOMPUTE_PGM_RSRC1), 0x340 },
+ { SOC15_REG_ENTRY(GC, 0, regCOMPUTE_PGM_RSRC2), 0x6 },
+ { SOC15_REG_ENTRY(GC, 0, regCOMPUTE_PGM_RSRC3), 0x0 },
+ { SOC15_REG_ENTRY(GC, 0, regCOMPUTE_STATIC_THREAD_MGMT_SE0), 0xffffffff },
+ { SOC15_REG_ENTRY(GC, 0, regCOMPUTE_STATIC_THREAD_MGMT_SE1), 0xffffffff },
+ { SOC15_REG_ENTRY(GC, 0, regCOMPUTE_STATIC_THREAD_MGMT_SE2), 0xffffffff },
+ { SOC15_REG_ENTRY(GC, 0, regCOMPUTE_STATIC_THREAD_MGMT_SE3), 0xffffffff },
+ { SOC15_REG_ENTRY(GC, 0, regCOMPUTE_STATIC_THREAD_MGMT_SE4), 0xffffffff },
+ { SOC15_REG_ENTRY(GC, 0, regCOMPUTE_STATIC_THREAD_MGMT_SE5), 0xffffffff },
+ { SOC15_REG_ENTRY(GC, 0, regCOMPUTE_STATIC_THREAD_MGMT_SE6), 0xffffffff },
+ { SOC15_REG_ENTRY(GC, 0, regCOMPUTE_STATIC_THREAD_MGMT_SE7), 0xffffffff },
+};
+
+static const u32 sgpr96_init_compute_shader_aldebaran[] = {
+ 0xb8840904, 0xb8851a04, 0xb8861344, 0xb8831804, 0x9208ff06, 0x00000280,
+ 0x9209a805, 0x920a8a04, 0x81080908, 0x81080a08, 0x81080308, 0x8e078208,
+ 0x81078407, 0xc0410080, 0x00000007, 0xbf8c0000, 0xbf8e003f, 0xc0030200,
+ 0x00000000, 0xbf8c0000, 0xbf06ff08, 0xdeadbeaf, 0xbf84fff9, 0x81028102,
+ 0xc0410080, 0x00000007, 0xbf8c0000, 0xbf8a0000, 0xbefc0080, 0xbeea0080,
+ 0xbeeb0080, 0xbf00f280, 0xbee60080, 0xbee70080, 0xbee80080, 0xbee90080,
+ 0xbefe0080, 0xbeff0080, 0xbe880080, 0xbe890080, 0xbe8a0080, 0xbe8b0080,
+ 0xbe8c0080, 0xbe8d0080, 0xbe8e0080, 0xbe8f0080, 0xbe900080, 0xbe910080,
+ 0xbe920080, 0xbe930080, 0xbe940080, 0xbe950080, 0xbe960080, 0xbe970080,
+ 0xbe980080, 0xbe990080, 0xbe9a0080, 0xbe9b0080, 0xbe9c0080, 0xbe9d0080,
+ 0xbe9e0080, 0xbe9f0080, 0xbea00080, 0xbea10080, 0xbea20080, 0xbea30080,
+ 0xbea40080, 0xbea50080, 0xbea60080, 0xbea70080, 0xbea80080, 0xbea90080,
+ 0xbeaa0080, 0xbeab0080, 0xbeac0080, 0xbead0080, 0xbeae0080, 0xbeaf0080,
+ 0xbeb00080, 0xbeb10080, 0xbeb20080, 0xbeb30080, 0xbeb40080, 0xbeb50080,
+ 0xbeb60080, 0xbeb70080, 0xbeb80080, 0xbeb90080, 0xbeba0080, 0xbebb0080,
+ 0xbebc0080, 0xbebd0080, 0xbebe0080, 0xbebf0080, 0xbec00080, 0xbec10080,
+ 0xbec20080, 0xbec30080, 0xbec40080, 0xbec50080, 0xbec60080, 0xbec70080,
+ 0xbec80080, 0xbec90080, 0xbeca0080, 0xbecb0080, 0xbecc0080, 0xbecd0080,
+ 0xbece0080, 0xbecf0080, 0xbed00080, 0xbed10080, 0xbed20080, 0xbed30080,
+ 0xbed40080, 0xbed50080, 0xbed60080, 0xbed70080, 0xbed80080, 0xbed90080,
+ 0xbf810000,
+};
+
+const struct soc15_reg_entry sgpr96_init_regs_aldebaran[] = {
+ { SOC15_REG_ENTRY(GC, 0, regCOMPUTE_RESOURCE_LIMITS), 0x0000000 },
+ { SOC15_REG_ENTRY(GC, 0, regCOMPUTE_NUM_THREAD_X), 0x40 },
+ { SOC15_REG_ENTRY(GC, 0, regCOMPUTE_NUM_THREAD_Y), 0xc },
+ { SOC15_REG_ENTRY(GC, 0, regCOMPUTE_NUM_THREAD_Z), 1 },
+ { SOC15_REG_ENTRY(GC, 0, regCOMPUTE_PGM_RSRC1), 0x2c0 },
+ { SOC15_REG_ENTRY(GC, 0, regCOMPUTE_PGM_RSRC2), 0x6 },
+ { SOC15_REG_ENTRY(GC, 0, regCOMPUTE_PGM_RSRC3), 0x0 },
+ { SOC15_REG_ENTRY(GC, 0, regCOMPUTE_STATIC_THREAD_MGMT_SE0), 0xffffffff },
+ { SOC15_REG_ENTRY(GC, 0, regCOMPUTE_STATIC_THREAD_MGMT_SE1), 0xffffffff },
+ { SOC15_REG_ENTRY(GC, 0, regCOMPUTE_STATIC_THREAD_MGMT_SE2), 0xffffffff },
+ { SOC15_REG_ENTRY(GC, 0, regCOMPUTE_STATIC_THREAD_MGMT_SE3), 0xffffffff },
+ { SOC15_REG_ENTRY(GC, 0, regCOMPUTE_STATIC_THREAD_MGMT_SE4), 0xffffffff },
+ { SOC15_REG_ENTRY(GC, 0, regCOMPUTE_STATIC_THREAD_MGMT_SE5), 0xffffffff },
+ { SOC15_REG_ENTRY(GC, 0, regCOMPUTE_STATIC_THREAD_MGMT_SE6), 0xffffffff },
+ { SOC15_REG_ENTRY(GC, 0, regCOMPUTE_STATIC_THREAD_MGMT_SE7), 0xffffffff },
+};
+
+/*
+ * This shader is used to clear the uninitiated sgprs after the above
+ * two dispatches, because of hardware feature, dispath 0 couldn't clear
+ * top hole sgprs. Therefore need 4 waves per SIMD to cover these sgprs
+*/
+static const u32 sgpr64_init_compute_shader_aldebaran[] = {
+ 0xb8840904, 0xb8851a04, 0xb8861344, 0xb8831804, 0x9208ff06, 0x00000280,
+ 0x9209a805, 0x920a8a04, 0x81080908, 0x81080a08, 0x81080308, 0x8e078208,
+ 0x81078407, 0xc0410080, 0x00000007, 0xbf8c0000, 0xbf8e003f, 0xc0030200,
+ 0x00000000, 0xbf8c0000, 0xbf06ff08, 0xdeadbeaf, 0xbf84fff9, 0x81028102,
+ 0xc0410080, 0x00000007, 0xbf8c0000, 0xbf8a0000, 0xbefc0080, 0xbeea0080,
+ 0xbeeb0080, 0xbf00f280, 0xbee60080, 0xbee70080, 0xbee80080, 0xbee90080,
+ 0xbefe0080, 0xbeff0080, 0xbe880080, 0xbe890080, 0xbe8a0080, 0xbe8b0080,
+ 0xbe8c0080, 0xbe8d0080, 0xbe8e0080, 0xbe8f0080, 0xbe900080, 0xbe910080,
+ 0xbe920080, 0xbe930080, 0xbe940080, 0xbe950080, 0xbe960080, 0xbe970080,
+ 0xbe980080, 0xbe990080, 0xbe9a0080, 0xbe9b0080, 0xbe9c0080, 0xbe9d0080,
+ 0xbe9e0080, 0xbe9f0080, 0xbea00080, 0xbea10080, 0xbea20080, 0xbea30080,
+ 0xbea40080, 0xbea50080, 0xbea60080, 0xbea70080, 0xbea80080, 0xbea90080,
+ 0xbeaa0080, 0xbeab0080, 0xbeac0080, 0xbead0080, 0xbeae0080, 0xbeaf0080,
+ 0xbeb00080, 0xbeb10080, 0xbeb20080, 0xbeb30080, 0xbeb40080, 0xbeb50080,
+ 0xbeb60080, 0xbeb70080, 0xbeb80080, 0xbeb90080, 0xbf810000,
+};
+
+const struct soc15_reg_entry sgpr64_init_regs_aldebaran[] = {
+ { SOC15_REG_ENTRY(GC, 0, regCOMPUTE_RESOURCE_LIMITS), 0x0000000 },
+ { SOC15_REG_ENTRY(GC, 0, regCOMPUTE_NUM_THREAD_X), 0x40 },
+ { SOC15_REG_ENTRY(GC, 0, regCOMPUTE_NUM_THREAD_Y), 0x10 },
+ { SOC15_REG_ENTRY(GC, 0, regCOMPUTE_NUM_THREAD_Z), 1 },
+ { SOC15_REG_ENTRY(GC, 0, regCOMPUTE_PGM_RSRC1), 0x1c0 },
+ { SOC15_REG_ENTRY(GC, 0, regCOMPUTE_PGM_RSRC2), 0x6 },
+ { SOC15_REG_ENTRY(GC, 0, regCOMPUTE_PGM_RSRC3), 0x0 },
+ { SOC15_REG_ENTRY(GC, 0, regCOMPUTE_STATIC_THREAD_MGMT_SE0), 0xffffffff },
+ { SOC15_REG_ENTRY(GC, 0, regCOMPUTE_STATIC_THREAD_MGMT_SE1), 0xffffffff },
+ { SOC15_REG_ENTRY(GC, 0, regCOMPUTE_STATIC_THREAD_MGMT_SE2), 0xffffffff },
+ { SOC15_REG_ENTRY(GC, 0, regCOMPUTE_STATIC_THREAD_MGMT_SE3), 0xffffffff },
+ { SOC15_REG_ENTRY(GC, 0, regCOMPUTE_STATIC_THREAD_MGMT_SE4), 0xffffffff },
+ { SOC15_REG_ENTRY(GC, 0, regCOMPUTE_STATIC_THREAD_MGMT_SE5), 0xffffffff },
+ { SOC15_REG_ENTRY(GC, 0, regCOMPUTE_STATIC_THREAD_MGMT_SE6), 0xffffffff },
+ { SOC15_REG_ENTRY(GC, 0, regCOMPUTE_STATIC_THREAD_MGMT_SE7), 0xffffffff },
+};
+
+static int gfx_v9_4_2_run_shader(struct amdgpu_device *adev,
+ struct amdgpu_ring *ring,
+ struct amdgpu_ib *ib,
+ const u32 *shader_ptr, u32 shader_size,
+ const struct soc15_reg_entry *init_regs, u32 regs_size,
+ u32 compute_dim_x, u64 wb_gpu_addr, u32 pattern,
+ struct dma_fence **fence_ptr)
+{
+ int r, i;
+ uint32_t total_size, shader_offset;
+ u64 gpu_addr;
+
+ total_size = (regs_size * 3 + 4 + 5 + 5) * 4;
+ total_size = ALIGN(total_size, 256);
+ shader_offset = total_size;
+ total_size += ALIGN(shader_size, 256);
+
+ /* allocate an indirect buffer to put the commands in */
+ memset(ib, 0, sizeof(*ib));
+ r = amdgpu_ib_get(adev, NULL, total_size,
+ AMDGPU_IB_POOL_DIRECT, ib);
+ if (r) {
+ dev_err(adev->dev, "failed to get ib (%d).\n", r);
+ return r;
+ }
+
+ /* load the compute shaders */
+ for (i = 0; i < shader_size/sizeof(u32); i++)
+ ib->ptr[i + (shader_offset / 4)] = shader_ptr[i];
+
+ /* init the ib length to 0 */
+ ib->length_dw = 0;
+
+ /* write the register state for the compute dispatch */
+ for (i = 0; i < regs_size; i++) {
+ ib->ptr[ib->length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
+ ib->ptr[ib->length_dw++] = SOC15_REG_ENTRY_OFFSET(init_regs[i])
+ - PACKET3_SET_SH_REG_START;
+ ib->ptr[ib->length_dw++] = init_regs[i].reg_value;
+ }
+
+ /* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
+ gpu_addr = (ib->gpu_addr + (u64)shader_offset) >> 8;
+ ib->ptr[ib->length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
+ ib->ptr[ib->length_dw++] = SOC15_REG_OFFSET(GC, 0, regCOMPUTE_PGM_LO)
+ - PACKET3_SET_SH_REG_START;
+ ib->ptr[ib->length_dw++] = lower_32_bits(gpu_addr);
+ ib->ptr[ib->length_dw++] = upper_32_bits(gpu_addr);
+
+ /* write the wb buffer address */
+ ib->ptr[ib->length_dw++] = PACKET3(PACKET3_SET_SH_REG, 3);
+ ib->ptr[ib->length_dw++] = SOC15_REG_OFFSET(GC, 0, regCOMPUTE_USER_DATA_0)
+ - PACKET3_SET_SH_REG_START;
+ ib->ptr[ib->length_dw++] = lower_32_bits(wb_gpu_addr);
+ ib->ptr[ib->length_dw++] = upper_32_bits(wb_gpu_addr);
+ ib->ptr[ib->length_dw++] = pattern;
+
+ /* write dispatch packet */
+ ib->ptr[ib->length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
+ ib->ptr[ib->length_dw++] = compute_dim_x; /* x */
+ ib->ptr[ib->length_dw++] = 1; /* y */
+ ib->ptr[ib->length_dw++] = 1; /* z */
+ ib->ptr[ib->length_dw++] =
+ REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
+
+ /* shedule the ib on the ring */
+ r = amdgpu_ib_schedule(ring, 1, ib, NULL, fence_ptr);
+ if (r) {
+ dev_err(adev->dev, "ib submit failed (%d).\n", r);
+ amdgpu_ib_free(ib, NULL);
+ }
+ return r;
+}
+
+static void gfx_v9_4_2_log_wave_assignment(struct amdgpu_device *adev, uint32_t *wb_ptr)
+{
+ uint32_t se, cu, simd, wave;
+ uint32_t offset = 0;
+ char *str;
+ int size;
+
+ str = kmalloc(256, GFP_KERNEL);
+ if (!str)
+ return;
+
+ dev_dbg(adev->dev, "wave assignment:\n");
+
+ for (se = 0; se < adev->gfx.config.max_shader_engines; se++) {
+ for (cu = 0; cu < CU_ID_MAX; cu++) {
+ memset(str, 0, 256);
+ size = sprintf(str, "SE[%02d]CU[%02d]: ", se, cu);
+ for (simd = 0; simd < SIMD_ID_MAX; simd++) {
+ size += sprintf(str + size, "[");
+ for (wave = 0; wave < WAVE_ID_MAX; wave++) {
+ size += sprintf(str + size, "%x", wb_ptr[offset]);
+ offset++;
+ }
+ size += sprintf(str + size, "] ");
+ }
+ dev_dbg(adev->dev, "%s\n", str);
+ }
+ }
+
+ kfree(str);
+}
+
+static int gfx_v9_4_2_wait_for_waves_assigned(struct amdgpu_device *adev,
+ uint32_t *wb_ptr, uint32_t mask,
+ uint32_t pattern, uint32_t num_wave, bool wait)
+{
+ uint32_t se, cu, simd, wave;
+ uint32_t loop = 0;
+ uint32_t wave_cnt;
+ uint32_t offset;
+
+ do {
+ wave_cnt = 0;
+ offset = 0;
+
+ for (se = 0; se < adev->gfx.config.max_shader_engines; se++)
+ for (cu = 0; cu < CU_ID_MAX; cu++)
+ for (simd = 0; simd < SIMD_ID_MAX; simd++)
+ for (wave = 0; wave < WAVE_ID_MAX; wave++) {
+ if (((1 << wave) & mask) &&
+ (wb_ptr[offset] == pattern))
+ wave_cnt++;
+
+ offset++;
+ }
+
+ if (wave_cnt == num_wave)
+ return 0;
+
+ mdelay(1);
+ } while (++loop < 2000 && wait);
+
+ dev_err(adev->dev, "actual wave num: %d, expected wave num: %d\n",
+ wave_cnt, num_wave);
+
+ gfx_v9_4_2_log_wave_assignment(adev, wb_ptr);
+
+ return -EBADSLT;
+}
+
+static int gfx_v9_4_2_do_sgprs_init(struct amdgpu_device *adev)
+{
+ int r;
+ int wb_size = adev->gfx.config.max_shader_engines *
+ CU_ID_MAX * SIMD_ID_MAX * WAVE_ID_MAX;
+ struct amdgpu_ib wb_ib;
+ struct amdgpu_ib disp_ibs[3];
+ struct dma_fence *fences[3];
+ u32 pattern[3] = { 0x1, 0x5, 0xa };
+
+ /* bail if the compute ring is not ready */
+ if (!adev->gfx.compute_ring[0].sched.ready ||
+ !adev->gfx.compute_ring[1].sched.ready)
+ return 0;
+
+ /* allocate the write-back buffer from IB */
+ memset(&wb_ib, 0, sizeof(wb_ib));
+ r = amdgpu_ib_get(adev, NULL, (1 + wb_size) * sizeof(uint32_t),
+ AMDGPU_IB_POOL_DIRECT, &wb_ib);
+ if (r) {
+ dev_err(adev->dev, "failed to get ib (%d) for wb\n", r);
+ return r;
+ }
+ memset(wb_ib.ptr, 0, (1 + wb_size) * sizeof(uint32_t));
+
+ r = gfx_v9_4_2_run_shader(adev,
+ &adev->gfx.compute_ring[0],
+ &disp_ibs[0],
+ sgpr112_init_compute_shader_aldebaran,
+ sizeof(sgpr112_init_compute_shader_aldebaran),
+ sgpr112_init_regs_aldebaran,
+ ARRAY_SIZE(sgpr112_init_regs_aldebaran),
+ adev->gfx.cu_info.number,
+ wb_ib.gpu_addr, pattern[0], &fences[0]);
+ if (r) {
+ dev_err(adev->dev, "failed to clear first 224 sgprs\n");
+ goto pro_end;
+ }
+
+ r = gfx_v9_4_2_wait_for_waves_assigned(adev,
+ &wb_ib.ptr[1], 0b11,
+ pattern[0],
+ adev->gfx.cu_info.number * SIMD_ID_MAX * 2,
+ true);
+ if (r) {
+ dev_err(adev->dev, "wave coverage failed when clear first 224 sgprs\n");
+ wb_ib.ptr[0] = 0xdeadbeaf; /* stop waves */
+ goto disp0_failed;
+ }
+
+ r = gfx_v9_4_2_run_shader(adev,
+ &adev->gfx.compute_ring[1],
+ &disp_ibs[1],
+ sgpr96_init_compute_shader_aldebaran,
+ sizeof(sgpr96_init_compute_shader_aldebaran),
+ sgpr96_init_regs_aldebaran,
+ ARRAY_SIZE(sgpr96_init_regs_aldebaran),
+ adev->gfx.cu_info.number * 2,
+ wb_ib.gpu_addr, pattern[1], &fences[1]);
+ if (r) {
+ dev_err(adev->dev, "failed to clear next 576 sgprs\n");
+ goto disp0_failed;
+ }
+
+ r = gfx_v9_4_2_wait_for_waves_assigned(adev,
+ &wb_ib.ptr[1], 0b11111100,
+ pattern[1], adev->gfx.cu_info.number * SIMD_ID_MAX * 6,
+ true);
+ if (r) {
+ dev_err(adev->dev, "wave coverage failed when clear first 576 sgprs\n");
+ wb_ib.ptr[0] = 0xdeadbeaf; /* stop waves */
+ goto disp1_failed;
+ }
+
+ wb_ib.ptr[0] = 0xdeadbeaf; /* stop waves */
+
+ /* wait for the GPU to finish processing the IB */
+ r = dma_fence_wait(fences[0], false);
+ if (r) {
+ dev_err(adev->dev, "timeout to clear first 224 sgprs\n");
+ goto disp1_failed;
+ }
+
+ r = dma_fence_wait(fences[1], false);
+ if (r) {
+ dev_err(adev->dev, "timeout to clear first 576 sgprs\n");
+ goto disp1_failed;
+ }
+
+ memset(wb_ib.ptr, 0, (1 + wb_size) * sizeof(uint32_t));
+ r = gfx_v9_4_2_run_shader(adev,
+ &adev->gfx.compute_ring[0],
+ &disp_ibs[2],
+ sgpr64_init_compute_shader_aldebaran,
+ sizeof(sgpr64_init_compute_shader_aldebaran),
+ sgpr64_init_regs_aldebaran,
+ ARRAY_SIZE(sgpr64_init_regs_aldebaran),
+ adev->gfx.cu_info.number,
+ wb_ib.gpu_addr, pattern[2], &fences[2]);
+ if (r) {
+ dev_err(adev->dev, "failed to clear first 256 sgprs\n");
+ goto disp1_failed;
+ }
+
+ r = gfx_v9_4_2_wait_for_waves_assigned(adev,
+ &wb_ib.ptr[1], 0b1111,
+ pattern[2],
+ adev->gfx.cu_info.number * SIMD_ID_MAX * 4,
+ true);
+ if (r) {
+ dev_err(adev->dev, "wave coverage failed when clear first 256 sgprs\n");
+ wb_ib.ptr[0] = 0xdeadbeaf; /* stop waves */
+ goto disp2_failed;
+ }
+
+ wb_ib.ptr[0] = 0xdeadbeaf; /* stop waves */
+
+ r = dma_fence_wait(fences[2], false);
+ if (r) {
+ dev_err(adev->dev, "timeout to clear first 256 sgprs\n");
+ goto disp2_failed;
+ }
+
+disp2_failed:
+ amdgpu_ib_free(&disp_ibs[2], NULL);
+ dma_fence_put(fences[2]);
+disp1_failed:
+ amdgpu_ib_free(&disp_ibs[1], NULL);
+ dma_fence_put(fences[1]);
+disp0_failed:
+ amdgpu_ib_free(&disp_ibs[0], NULL);
+ dma_fence_put(fences[0]);
+pro_end:
+ amdgpu_ib_free(&wb_ib, NULL);
+
+ if (r)
+ dev_info(adev->dev, "Init SGPRS Failed\n");
+ else
+ dev_info(adev->dev, "Init SGPRS Successfully\n");
+
+ return r;
+}
+
+static int gfx_v9_4_2_do_vgprs_init(struct amdgpu_device *adev)
+{
+ int r;
+ /* CU_ID: 0~15, SIMD_ID: 0~3, WAVE_ID: 0 ~ 9 */
+ int wb_size = adev->gfx.config.max_shader_engines *
+ CU_ID_MAX * SIMD_ID_MAX * WAVE_ID_MAX;
+ struct amdgpu_ib wb_ib;
+ struct amdgpu_ib disp_ib;
+ struct dma_fence *fence;
+ u32 pattern = 0xa;
+
+ /* bail if the compute ring is not ready */
+ if (!adev->gfx.compute_ring[0].sched.ready)
+ return 0;
+
+ /* allocate the write-back buffer from IB */
+ memset(&wb_ib, 0, sizeof(wb_ib));
+ r = amdgpu_ib_get(adev, NULL, (1 + wb_size) * sizeof(uint32_t),
+ AMDGPU_IB_POOL_DIRECT, &wb_ib);
+ if (r) {
+ dev_err(adev->dev, "failed to get ib (%d) for wb.\n", r);
+ return r;
+ }
+ memset(wb_ib.ptr, 0, (1 + wb_size) * sizeof(uint32_t));
+
+ r = gfx_v9_4_2_run_shader(adev,
+ &adev->gfx.compute_ring[0],
+ &disp_ib,
+ vgpr_init_compute_shader_aldebaran,
+ sizeof(vgpr_init_compute_shader_aldebaran),
+ vgpr_init_regs_aldebaran,
+ ARRAY_SIZE(vgpr_init_regs_aldebaran),
+ adev->gfx.cu_info.number,
+ wb_ib.gpu_addr, pattern, &fence);
+ if (r) {
+ dev_err(adev->dev, "failed to clear vgprs\n");
+ goto pro_end;
+ }
+
+ /* wait for the GPU to finish processing the IB */
+ r = dma_fence_wait(fence, false);
+ if (r) {
+ dev_err(adev->dev, "timeout to clear vgprs\n");
+ goto disp_failed;
+ }
+
+ r = gfx_v9_4_2_wait_for_waves_assigned(adev,
+ &wb_ib.ptr[1], 0b1,
+ pattern,
+ adev->gfx.cu_info.number * SIMD_ID_MAX,
+ false);
+ if (r) {
+ dev_err(adev->dev, "failed to cover all simds when clearing vgprs\n");
+ goto disp_failed;
+ }
+
+disp_failed:
+ amdgpu_ib_free(&disp_ib, NULL);
+ dma_fence_put(fence);
+pro_end:
+ amdgpu_ib_free(&wb_ib, NULL);
+
+ if (r)
+ dev_info(adev->dev, "Init VGPRS Failed\n");
+ else
+ dev_info(adev->dev, "Init VGPRS Successfully\n");
+
+ return r;
+}
+
+int gfx_v9_4_2_do_edc_gpr_workarounds(struct amdgpu_device *adev)
+{
+ /* only support when RAS is enabled */
+ if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX))
+ return 0;
+
+ /* Workaround for ALDEBARAN, skip GPRs init in GPU reset.
+ Will remove it once GPRs init algorithm works for all CU settings. */
+ if (amdgpu_in_reset(adev))
+ return 0;
+
+ gfx_v9_4_2_do_sgprs_init(adev);
+
+ gfx_v9_4_2_do_vgprs_init(adev);
+
+ return 0;
+}
+
+static void gfx_v9_4_2_query_sq_timeout_status(struct amdgpu_device *adev);
+static void gfx_v9_4_2_reset_sq_timeout_status(struct amdgpu_device *adev);
+
+void gfx_v9_4_2_init_golden_registers(struct amdgpu_device *adev,
+ uint32_t die_id)
+{
+ soc15_program_register_sequence(adev,
+ golden_settings_gc_9_4_2_alde,
+ ARRAY_SIZE(golden_settings_gc_9_4_2_alde));
+
+ /* apply golden settings per die */
+ switch (die_id) {
+ case 0:
+ soc15_program_register_sequence(adev,
+ golden_settings_gc_9_4_2_alde_die_0,
+ ARRAY_SIZE(golden_settings_gc_9_4_2_alde_die_0));
+ break;
+ case 1:
+ soc15_program_register_sequence(adev,
+ golden_settings_gc_9_4_2_alde_die_1,
+ ARRAY_SIZE(golden_settings_gc_9_4_2_alde_die_1));
+ break;
+ default:
+ dev_warn(adev->dev,
+ "invalid die id %d, ignore channel fabricid remap settings\n",
+ die_id);
+ break;
+ }
+}
+
+void gfx_v9_4_2_init_sq(struct amdgpu_device *adev)
+{
+ uint32_t data;
+
+ if (adev->gfx.mec_fw_version >= 98) {
+ adev->gmc.xnack_flags |= AMDGPU_GMC_XNACK_FLAG_CHAIN;
+ data = RREG32_SOC15(GC, 0, regSQ_CONFIG1);
+ data = REG_SET_FIELD(data, SQ_CONFIG1, DISABLE_XNACK_CHECK_IN_RETRY_DISABLE, 1);
+ WREG32_SOC15(GC, 0, regSQ_CONFIG1, data);
+ }
+}
+
+void gfx_v9_4_2_debug_trap_config_init(struct amdgpu_device *adev,
+ uint32_t first_vmid,
+ uint32_t last_vmid)
+{
+ uint32_t data;
+ int i;
+
+ mutex_lock(&adev->srbm_mutex);
+
+ for (i = first_vmid; i < last_vmid; i++) {
+ data = 0;
+ soc15_grbm_select(adev, 0, 0, 0, i, 0);
+ data = REG_SET_FIELD(data, SPI_GDBG_PER_VMID_CNTL, TRAP_EN, 1);
+ data = REG_SET_FIELD(data, SPI_GDBG_PER_VMID_CNTL, EXCP_EN, 0);
+ data = REG_SET_FIELD(data, SPI_GDBG_PER_VMID_CNTL, EXCP_REPLACE,
+ 0);
+ WREG32(SOC15_REG_OFFSET(GC, 0, regSPI_GDBG_PER_VMID_CNTL), data);
+ }
+
+ soc15_grbm_select(adev, 0, 0, 0, 0, 0);
+ mutex_unlock(&adev->srbm_mutex);
+
+ WREG32(SOC15_REG_OFFSET(GC, 0, regSPI_GDBG_TRAP_DATA0), 0);
+ WREG32(SOC15_REG_OFFSET(GC, 0, regSPI_GDBG_TRAP_DATA1), 0);
+}
+
+void gfx_v9_4_2_set_power_brake_sequence(struct amdgpu_device *adev)
+{
+ u32 tmp;
+
+ gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, 0);
+
+ tmp = 0;
+ tmp = REG_SET_FIELD(tmp, GC_THROTTLE_CTRL, PATTERN_MODE, 1);
+ WREG32_SOC15(GC, 0, regGC_THROTTLE_CTRL, tmp);
+
+ tmp = 0;
+ tmp = REG_SET_FIELD(tmp, GC_THROTTLE_CTRL1, PWRBRK_STALL_EN, 1);
+ WREG32_SOC15(GC, 0, regGC_THROTTLE_CTRL1, tmp);
+
+ WREG32_SOC15(GC, 0, regGC_CAC_IND_INDEX, ixPWRBRK_STALL_PATTERN_CTRL);
+ tmp = 0;
+ tmp = REG_SET_FIELD(tmp, PWRBRK_STALL_PATTERN_CTRL, PWRBRK_END_STEP, 0x12);
+ WREG32_SOC15(GC, 0, regGC_CAC_IND_DATA, tmp);
+}
+
+static const struct soc15_reg_entry gfx_v9_4_2_edc_counter_regs[] = {
+ /* CPF */
+ { SOC15_REG_ENTRY(GC, 0, regCPF_EDC_ROQ_CNT), 0, 1, 1 },
+ { SOC15_REG_ENTRY(GC, 0, regCPF_EDC_TAG_CNT), 0, 1, 1 },
+ /* CPC */
+ { SOC15_REG_ENTRY(GC, 0, regCPC_EDC_SCRATCH_CNT), 0, 1, 1 },
+ { SOC15_REG_ENTRY(GC, 0, regCPC_EDC_UCODE_CNT), 0, 1, 1 },
+ { SOC15_REG_ENTRY(GC, 0, regDC_EDC_STATE_CNT), 0, 1, 1 },
+ { SOC15_REG_ENTRY(GC, 0, regDC_EDC_CSINVOC_CNT), 0, 1, 1 },
+ { SOC15_REG_ENTRY(GC, 0, regDC_EDC_RESTORE_CNT), 0, 1, 1 },
+ /* GDS */
+ { SOC15_REG_ENTRY(GC, 0, regGDS_EDC_CNT), 0, 1, 1 },
+ { SOC15_REG_ENTRY(GC, 0, regGDS_EDC_GRBM_CNT), 0, 1, 1 },
+ { SOC15_REG_ENTRY(GC, 0, regGDS_EDC_OA_DED), 0, 1, 1 },
+ { SOC15_REG_ENTRY(GC, 0, regGDS_EDC_OA_PHY_CNT), 0, 1, 1 },
+ { SOC15_REG_ENTRY(GC, 0, regGDS_EDC_OA_PIPE_CNT), 0, 1, 1 },
+ /* RLC */
+ { SOC15_REG_ENTRY(GC, 0, regRLC_EDC_CNT), 0, 1, 1 },
+ { SOC15_REG_ENTRY(GC, 0, regRLC_EDC_CNT2), 0, 1, 1 },
+ /* SPI */
+ { SOC15_REG_ENTRY(GC, 0, regSPI_EDC_CNT), 0, 8, 1 },
+ /* SQC */
+ { SOC15_REG_ENTRY(GC, 0, regSQC_EDC_CNT), 0, 8, 7 },
+ { SOC15_REG_ENTRY(GC, 0, regSQC_EDC_CNT2), 0, 8, 7 },
+ { SOC15_REG_ENTRY(GC, 0, regSQC_EDC_CNT3), 0, 8, 7 },
+ { SOC15_REG_ENTRY(GC, 0, regSQC_EDC_PARITY_CNT3), 0, 8, 7 },
+ /* SQ */
+ { SOC15_REG_ENTRY(GC, 0, regSQ_EDC_CNT), 0, 8, 14 },
+ /* TCP */
+ { SOC15_REG_ENTRY(GC, 0, regTCP_EDC_CNT_NEW), 0, 8, 14 },
+ /* TCI */
+ { SOC15_REG_ENTRY(GC, 0, regTCI_EDC_CNT), 0, 1, 69 },
+ /* TCC */
+ { SOC15_REG_ENTRY(GC, 0, regTCC_EDC_CNT), 0, 1, 16 },
+ { SOC15_REG_ENTRY(GC, 0, regTCC_EDC_CNT2), 0, 1, 16 },
+ /* TCA */
+ { SOC15_REG_ENTRY(GC, 0, regTCA_EDC_CNT), 0, 1, 2 },
+ /* TCX */
+ { SOC15_REG_ENTRY(GC, 0, regTCX_EDC_CNT), 0, 1, 2 },
+ { SOC15_REG_ENTRY(GC, 0, regTCX_EDC_CNT2), 0, 1, 2 },
+ /* TD */
+ { SOC15_REG_ENTRY(GC, 0, regTD_EDC_CNT), 0, 8, 14 },
+ /* TA */
+ { SOC15_REG_ENTRY(GC, 0, regTA_EDC_CNT), 0, 8, 14 },
+ /* GCEA */
+ { SOC15_REG_ENTRY(GC, 0, regGCEA_EDC_CNT), 0, 1, 16 },
+ { SOC15_REG_ENTRY(GC, 0, regGCEA_EDC_CNT2), 0, 1, 16 },
+ { SOC15_REG_ENTRY(GC, 0, regGCEA_EDC_CNT3), 0, 1, 16 },
+};
+
+static void gfx_v9_4_2_select_se_sh(struct amdgpu_device *adev, u32 se_num,
+ u32 sh_num, u32 instance)
+{
+ u32 data;
+
+ if (instance == 0xffffffff)
+ data = REG_SET_FIELD(0, GRBM_GFX_INDEX,
+ INSTANCE_BROADCAST_WRITES, 1);
+ else
+ data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_INDEX,
+ instance);
+
+ if (se_num == 0xffffffff)
+ data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_BROADCAST_WRITES,
+ 1);
+ else
+ data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_INDEX, se_num);
+
+ if (sh_num == 0xffffffff)
+ data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_BROADCAST_WRITES,
+ 1);
+ else
+ data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_INDEX, sh_num);
+
+ WREG32_SOC15_RLC_SHADOW_EX(reg, GC, 0, regGRBM_GFX_INDEX, data);
+}
+
+static const struct soc15_ras_field_entry gfx_v9_4_2_ras_fields[] = {
+ /* CPF */
+ { "CPF_ROQ_ME2", SOC15_REG_ENTRY(GC, 0, regCPF_EDC_ROQ_CNT),
+ SOC15_REG_FIELD(CPF_EDC_ROQ_CNT, SEC_COUNT_ME2),
+ SOC15_REG_FIELD(CPF_EDC_ROQ_CNT, DED_COUNT_ME2) },
+ { "CPF_ROQ_ME1", SOC15_REG_ENTRY(GC, 0, regCPF_EDC_ROQ_CNT),
+ SOC15_REG_FIELD(CPF_EDC_ROQ_CNT, SEC_COUNT_ME1),
+ SOC15_REG_FIELD(CPF_EDC_ROQ_CNT, DED_COUNT_ME1) },
+ { "CPF_TCIU_TAG", SOC15_REG_ENTRY(GC, 0, regCPF_EDC_TAG_CNT),
+ SOC15_REG_FIELD(CPF_EDC_TAG_CNT, SEC_COUNT),
+ SOC15_REG_FIELD(CPF_EDC_TAG_CNT, DED_COUNT) },
+
+ /* CPC */
+ { "CPC_SCRATCH", SOC15_REG_ENTRY(GC, 0, regCPC_EDC_SCRATCH_CNT),
+ SOC15_REG_FIELD(CPC_EDC_SCRATCH_CNT, SEC_COUNT),
+ SOC15_REG_FIELD(CPC_EDC_SCRATCH_CNT, DED_COUNT) },
+ { "CPC_UCODE", SOC15_REG_ENTRY(GC, 0, regCPC_EDC_UCODE_CNT),
+ SOC15_REG_FIELD(CPC_EDC_UCODE_CNT, SEC_COUNT),
+ SOC15_REG_FIELD(CPC_EDC_UCODE_CNT, DED_COUNT) },
+ { "CPC_DC_STATE_RAM_ME1", SOC15_REG_ENTRY(GC, 0, regDC_EDC_STATE_CNT),
+ SOC15_REG_FIELD(DC_EDC_STATE_CNT, SEC_COUNT_ME1),
+ SOC15_REG_FIELD(DC_EDC_STATE_CNT, DED_COUNT_ME1) },
+ { "CPC_DC_CSINVOC_RAM_ME1",
+ SOC15_REG_ENTRY(GC, 0, regDC_EDC_CSINVOC_CNT),
+ SOC15_REG_FIELD(DC_EDC_CSINVOC_CNT, SEC_COUNT_ME1),
+ SOC15_REG_FIELD(DC_EDC_CSINVOC_CNT, DED_COUNT_ME1) },
+ { "CPC_DC_RESTORE_RAM_ME1",
+ SOC15_REG_ENTRY(GC, 0, regDC_EDC_RESTORE_CNT),
+ SOC15_REG_FIELD(DC_EDC_RESTORE_CNT, SEC_COUNT_ME1),
+ SOC15_REG_FIELD(DC_EDC_RESTORE_CNT, DED_COUNT_ME1) },
+ { "CPC_DC_CSINVOC_RAM1_ME1",
+ SOC15_REG_ENTRY(GC, 0, regDC_EDC_CSINVOC_CNT),
+ SOC15_REG_FIELD(DC_EDC_CSINVOC_CNT, SEC_COUNT1_ME1),
+ SOC15_REG_FIELD(DC_EDC_CSINVOC_CNT, DED_COUNT1_ME1) },
+ { "CPC_DC_RESTORE_RAM1_ME1",
+ SOC15_REG_ENTRY(GC, 0, regDC_EDC_RESTORE_CNT),
+ SOC15_REG_FIELD(DC_EDC_RESTORE_CNT, SEC_COUNT1_ME1),
+ SOC15_REG_FIELD(DC_EDC_RESTORE_CNT, DED_COUNT1_ME1) },
+
+ /* GDS */
+ { "GDS_GRBM", SOC15_REG_ENTRY(GC, 0, regGDS_EDC_GRBM_CNT),
+ SOC15_REG_FIELD(GDS_EDC_GRBM_CNT, SEC),
+ SOC15_REG_FIELD(GDS_EDC_GRBM_CNT, DED) },
+ { "GDS_MEM", SOC15_REG_ENTRY(GC, 0, regGDS_EDC_CNT),
+ SOC15_REG_FIELD(GDS_EDC_CNT, GDS_MEM_SEC),
+ SOC15_REG_FIELD(GDS_EDC_CNT, GDS_MEM_DED) },
+ { "GDS_PHY_CMD_RAM_MEM", SOC15_REG_ENTRY(GC, 0, regGDS_EDC_OA_PHY_CNT),
+ SOC15_REG_FIELD(GDS_EDC_OA_PHY_CNT, PHY_CMD_RAM_MEM_SEC),
+ SOC15_REG_FIELD(GDS_EDC_OA_PHY_CNT, PHY_CMD_RAM_MEM_DED) },
+ { "GDS_PHY_DATA_RAM_MEM", SOC15_REG_ENTRY(GC, 0, regGDS_EDC_OA_PHY_CNT),
+ SOC15_REG_FIELD(GDS_EDC_OA_PHY_CNT, PHY_DATA_RAM_MEM_SEC),
+ SOC15_REG_FIELD(GDS_EDC_OA_PHY_CNT, PHY_DATA_RAM_MEM_DED) },
+ { "GDS_ME0_CS_PIPE_MEM", SOC15_REG_ENTRY(GC, 0, regGDS_EDC_OA_PHY_CNT),
+ SOC15_REG_FIELD(GDS_EDC_OA_PHY_CNT, ME0_CS_PIPE_MEM_SEC),
+ SOC15_REG_FIELD(GDS_EDC_OA_PHY_CNT, ME0_CS_PIPE_MEM_DED) },
+ { "GDS_ME1_PIPE0_PIPE_MEM",
+ SOC15_REG_ENTRY(GC, 0, regGDS_EDC_OA_PIPE_CNT),
+ SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE0_PIPE_MEM_SEC),
+ SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE0_PIPE_MEM_DED) },
+ { "GDS_ME1_PIPE1_PIPE_MEM",
+ SOC15_REG_ENTRY(GC, 0, regGDS_EDC_OA_PIPE_CNT),
+ SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE1_PIPE_MEM_SEC),
+ SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE1_PIPE_MEM_DED) },
+ { "GDS_ME1_PIPE2_PIPE_MEM",
+ SOC15_REG_ENTRY(GC, 0, regGDS_EDC_OA_PIPE_CNT),
+ SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE2_PIPE_MEM_SEC),
+ SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE2_PIPE_MEM_DED) },
+ { "GDS_ME1_PIPE3_PIPE_MEM",
+ SOC15_REG_ENTRY(GC, 0, regGDS_EDC_OA_PIPE_CNT),
+ SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE3_PIPE_MEM_SEC),
+ SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE3_PIPE_MEM_DED) },
+ { "GDS_ME0_GFXHP3D_PIX_DED",
+ SOC15_REG_ENTRY(GC, 0, regGDS_EDC_OA_DED), 0, 0,
+ SOC15_REG_FIELD(GDS_EDC_OA_DED, ME0_GFXHP3D_PIX_DED) },
+ { "GDS_ME0_GFXHP3D_VTX_DED",
+ SOC15_REG_ENTRY(GC, 0, regGDS_EDC_OA_DED), 0, 0,
+ SOC15_REG_FIELD(GDS_EDC_OA_DED, ME0_GFXHP3D_VTX_DED) },
+ { "GDS_ME0_CS_DED",
+ SOC15_REG_ENTRY(GC, 0, regGDS_EDC_OA_DED), 0, 0,
+ SOC15_REG_FIELD(GDS_EDC_OA_DED, ME0_CS_DED) },
+ { "GDS_ME0_GFXHP3D_GS_DED",
+ SOC15_REG_ENTRY(GC, 0, regGDS_EDC_OA_DED), 0, 0,
+ SOC15_REG_FIELD(GDS_EDC_OA_DED, ME0_GFXHP3D_GS_DED) },
+ { "GDS_ME1_PIPE0_DED",
+ SOC15_REG_ENTRY(GC, 0, regGDS_EDC_OA_DED), 0, 0,
+ SOC15_REG_FIELD(GDS_EDC_OA_DED, ME1_PIPE0_DED) },
+ { "GDS_ME1_PIPE1_DED",
+ SOC15_REG_ENTRY(GC, 0, regGDS_EDC_OA_DED), 0, 0,
+ SOC15_REG_FIELD(GDS_EDC_OA_DED, ME1_PIPE1_DED) },
+ { "GDS_ME1_PIPE2_DED",
+ SOC15_REG_ENTRY(GC, 0, regGDS_EDC_OA_DED), 0, 0,
+ SOC15_REG_FIELD(GDS_EDC_OA_DED, ME1_PIPE2_DED) },
+ { "GDS_ME1_PIPE3_DED",
+ SOC15_REG_ENTRY(GC, 0, regGDS_EDC_OA_DED), 0, 0,
+ SOC15_REG_FIELD(GDS_EDC_OA_DED, ME1_PIPE3_DED) },
+ { "GDS_ME2_PIPE0_DED",
+ SOC15_REG_ENTRY(GC, 0, regGDS_EDC_OA_DED), 0, 0,
+ SOC15_REG_FIELD(GDS_EDC_OA_DED, ME2_PIPE0_DED) },
+ { "GDS_ME2_PIPE1_DED",
+ SOC15_REG_ENTRY(GC, 0, regGDS_EDC_OA_DED), 0, 0,
+ SOC15_REG_FIELD(GDS_EDC_OA_DED, ME2_PIPE1_DED) },
+ { "GDS_ME2_PIPE2_DED",
+ SOC15_REG_ENTRY(GC, 0, regGDS_EDC_OA_DED), 0, 0,
+ SOC15_REG_FIELD(GDS_EDC_OA_DED, ME2_PIPE2_DED) },
+ { "GDS_ME2_PIPE3_DED",
+ SOC15_REG_ENTRY(GC, 0, regGDS_EDC_OA_DED), 0, 0,
+ SOC15_REG_FIELD(GDS_EDC_OA_DED, ME2_PIPE3_DED) },
+
+ /* RLC */
+ { "RLCG_INSTR_RAM", SOC15_REG_ENTRY(GC, 0, regRLC_EDC_CNT),
+ SOC15_REG_FIELD(RLC_EDC_CNT, RLCG_INSTR_RAM_SEC_COUNT),
+ SOC15_REG_FIELD(RLC_EDC_CNT, RLCG_INSTR_RAM_DED_COUNT) },
+ { "RLCG_SCRATCH_RAM", SOC15_REG_ENTRY(GC, 0, regRLC_EDC_CNT),
+ SOC15_REG_FIELD(RLC_EDC_CNT, RLCG_SCRATCH_RAM_SEC_COUNT),
+ SOC15_REG_FIELD(RLC_EDC_CNT, RLCG_SCRATCH_RAM_DED_COUNT) },
+ { "RLCV_INSTR_RAM", SOC15_REG_ENTRY(GC, 0, regRLC_EDC_CNT),
+ SOC15_REG_FIELD(RLC_EDC_CNT, RLCV_INSTR_RAM_SEC_COUNT),
+ SOC15_REG_FIELD(RLC_EDC_CNT, RLCV_INSTR_RAM_DED_COUNT) },
+ { "RLCV_SCRATCH_RAM", SOC15_REG_ENTRY(GC, 0, regRLC_EDC_CNT),
+ SOC15_REG_FIELD(RLC_EDC_CNT, RLCV_SCRATCH_RAM_SEC_COUNT),
+ SOC15_REG_FIELD(RLC_EDC_CNT, RLCV_SCRATCH_RAM_DED_COUNT) },
+ { "RLC_TCTAG_RAM", SOC15_REG_ENTRY(GC, 0, regRLC_EDC_CNT),
+ SOC15_REG_FIELD(RLC_EDC_CNT, RLC_TCTAG_RAM_SEC_COUNT),
+ SOC15_REG_FIELD(RLC_EDC_CNT, RLC_TCTAG_RAM_DED_COUNT) },
+ { "RLC_SPM_SCRATCH_RAM", SOC15_REG_ENTRY(GC, 0, regRLC_EDC_CNT),
+ SOC15_REG_FIELD(RLC_EDC_CNT, RLC_SPM_SCRATCH_RAM_SEC_COUNT),
+ SOC15_REG_FIELD(RLC_EDC_CNT, RLC_SPM_SCRATCH_RAM_DED_COUNT) },
+ { "RLC_SRM_DATA_RAM", SOC15_REG_ENTRY(GC, 0, regRLC_EDC_CNT),
+ SOC15_REG_FIELD(RLC_EDC_CNT, RLC_SRM_DATA_RAM_SEC_COUNT),
+ SOC15_REG_FIELD(RLC_EDC_CNT, RLC_SRM_DATA_RAM_DED_COUNT) },
+ { "RLC_SRM_ADDR_RAM", SOC15_REG_ENTRY(GC, 0, regRLC_EDC_CNT),
+ SOC15_REG_FIELD(RLC_EDC_CNT, RLC_SRM_ADDR_RAM_SEC_COUNT),
+ SOC15_REG_FIELD(RLC_EDC_CNT, RLC_SRM_ADDR_RAM_DED_COUNT) },
+ { "RLC_SPM_SE0_SCRATCH_RAM", SOC15_REG_ENTRY(GC, 0, regRLC_EDC_CNT2),
+ SOC15_REG_FIELD(RLC_EDC_CNT2, RLC_SPM_SE0_SCRATCH_RAM_SEC_COUNT),
+ SOC15_REG_FIELD(RLC_EDC_CNT2, RLC_SPM_SE0_SCRATCH_RAM_DED_COUNT) },
+ { "RLC_SPM_SE1_SCRATCH_RAM", SOC15_REG_ENTRY(GC, 0, regRLC_EDC_CNT2),
+ SOC15_REG_FIELD(RLC_EDC_CNT2, RLC_SPM_SE1_SCRATCH_RAM_SEC_COUNT),
+ SOC15_REG_FIELD(RLC_EDC_CNT2, RLC_SPM_SE1_SCRATCH_RAM_DED_COUNT) },
+ { "RLC_SPM_SE2_SCRATCH_RAM", SOC15_REG_ENTRY(GC, 0, regRLC_EDC_CNT2),
+ SOC15_REG_FIELD(RLC_EDC_CNT2, RLC_SPM_SE2_SCRATCH_RAM_SEC_COUNT),
+ SOC15_REG_FIELD(RLC_EDC_CNT2, RLC_SPM_SE2_SCRATCH_RAM_DED_COUNT) },
+ { "RLC_SPM_SE3_SCRATCH_RAM", SOC15_REG_ENTRY(GC, 0, regRLC_EDC_CNT2),
+ SOC15_REG_FIELD(RLC_EDC_CNT2, RLC_SPM_SE3_SCRATCH_RAM_SEC_COUNT),
+ SOC15_REG_FIELD(RLC_EDC_CNT2, RLC_SPM_SE3_SCRATCH_RAM_DED_COUNT) },
+ { "RLC_SPM_SE4_SCRATCH_RAM", SOC15_REG_ENTRY(GC, 0, regRLC_EDC_CNT2),
+ SOC15_REG_FIELD(RLC_EDC_CNT2, RLC_SPM_SE4_SCRATCH_RAM_SEC_COUNT),
+ SOC15_REG_FIELD(RLC_EDC_CNT2, RLC_SPM_SE4_SCRATCH_RAM_DED_COUNT) },
+ { "RLC_SPM_SE5_SCRATCH_RAM", SOC15_REG_ENTRY(GC, 0, regRLC_EDC_CNT2),
+ SOC15_REG_FIELD(RLC_EDC_CNT2, RLC_SPM_SE5_SCRATCH_RAM_SEC_COUNT),
+ SOC15_REG_FIELD(RLC_EDC_CNT2, RLC_SPM_SE5_SCRATCH_RAM_DED_COUNT) },
+ { "RLC_SPM_SE6_SCRATCH_RAM", SOC15_REG_ENTRY(GC, 0, regRLC_EDC_CNT2),
+ SOC15_REG_FIELD(RLC_EDC_CNT2, RLC_SPM_SE6_SCRATCH_RAM_SEC_COUNT),
+ SOC15_REG_FIELD(RLC_EDC_CNT2, RLC_SPM_SE6_SCRATCH_RAM_DED_COUNT) },
+ { "RLC_SPM_SE7_SCRATCH_RAM", SOC15_REG_ENTRY(GC, 0, regRLC_EDC_CNT2),
+ SOC15_REG_FIELD(RLC_EDC_CNT2, RLC_SPM_SE7_SCRATCH_RAM_SEC_COUNT),
+ SOC15_REG_FIELD(RLC_EDC_CNT2, RLC_SPM_SE7_SCRATCH_RAM_DED_COUNT) },
+
+ /* SPI */
+ { "SPI_SR_MEM", SOC15_REG_ENTRY(GC, 0, regSPI_EDC_CNT),
+ SOC15_REG_FIELD(SPI_EDC_CNT, SPI_SR_MEM_SEC_COUNT),
+ SOC15_REG_FIELD(SPI_EDC_CNT, SPI_SR_MEM_DED_COUNT) },
+ { "SPI_GDS_EXPREQ", SOC15_REG_ENTRY(GC, 0, regSPI_EDC_CNT),
+ SOC15_REG_FIELD(SPI_EDC_CNT, SPI_GDS_EXPREQ_SEC_COUNT),
+ SOC15_REG_FIELD(SPI_EDC_CNT, SPI_GDS_EXPREQ_DED_COUNT) },
+ { "SPI_WB_GRANT_30", SOC15_REG_ENTRY(GC, 0, regSPI_EDC_CNT),
+ SOC15_REG_FIELD(SPI_EDC_CNT, SPI_WB_GRANT_30_SEC_COUNT),
+ SOC15_REG_FIELD(SPI_EDC_CNT, SPI_WB_GRANT_30_DED_COUNT) },
+ { "SPI_LIFE_CNT", SOC15_REG_ENTRY(GC, 0, regSPI_EDC_CNT),
+ SOC15_REG_FIELD(SPI_EDC_CNT, SPI_LIFE_CNT_SEC_COUNT),
+ SOC15_REG_FIELD(SPI_EDC_CNT, SPI_LIFE_CNT_DED_COUNT) },
+
+ /* SQC - regSQC_EDC_CNT */
+ { "SQC_DATA_CU0_WRITE_DATA_BUF", SOC15_REG_ENTRY(GC, 0, regSQC_EDC_CNT),
+ SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU0_WRITE_DATA_BUF_SEC_COUNT),
+ SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU0_WRITE_DATA_BUF_DED_COUNT) },
+ { "SQC_DATA_CU0_UTCL1_LFIFO", SOC15_REG_ENTRY(GC, 0, regSQC_EDC_CNT),
+ SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU0_UTCL1_LFIFO_SEC_COUNT),
+ SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU0_UTCL1_LFIFO_DED_COUNT) },
+ { "SQC_DATA_CU1_WRITE_DATA_BUF", SOC15_REG_ENTRY(GC, 0, regSQC_EDC_CNT),
+ SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU1_WRITE_DATA_BUF_SEC_COUNT),
+ SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU1_WRITE_DATA_BUF_DED_COUNT) },
+ { "SQC_DATA_CU1_UTCL1_LFIFO", SOC15_REG_ENTRY(GC, 0, regSQC_EDC_CNT),
+ SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU1_UTCL1_LFIFO_SEC_COUNT),
+ SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU1_UTCL1_LFIFO_DED_COUNT) },
+ { "SQC_DATA_CU2_WRITE_DATA_BUF", SOC15_REG_ENTRY(GC, 0, regSQC_EDC_CNT),
+ SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU2_WRITE_DATA_BUF_SEC_COUNT),
+ SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU2_WRITE_DATA_BUF_DED_COUNT) },
+ { "SQC_DATA_CU2_UTCL1_LFIFO", SOC15_REG_ENTRY(GC, 0, regSQC_EDC_CNT),
+ SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU2_UTCL1_LFIFO_SEC_COUNT),
+ SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU2_UTCL1_LFIFO_DED_COUNT) },
+ { "SQC_DATA_CU3_WRITE_DATA_BUF", SOC15_REG_ENTRY(GC, 0, regSQC_EDC_CNT),
+ SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU3_WRITE_DATA_BUF_SEC_COUNT),
+ SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU3_WRITE_DATA_BUF_DED_COUNT) },
+ { "SQC_DATA_CU3_UTCL1_LFIFO", SOC15_REG_ENTRY(GC, 0, regSQC_EDC_CNT),
+ SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU3_UTCL1_LFIFO_SEC_COUNT),
+ SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU3_UTCL1_LFIFO_DED_COUNT) },
+
+ /* SQC - regSQC_EDC_CNT2 */
+ { "SQC_INST_BANKA_TAG_RAM", SOC15_REG_ENTRY(GC, 0, regSQC_EDC_CNT2),
+ SOC15_REG_FIELD(SQC_EDC_CNT2, INST_BANKA_TAG_RAM_SEC_COUNT),
+ SOC15_REG_FIELD(SQC_EDC_CNT2, INST_BANKA_TAG_RAM_DED_COUNT) },
+ { "SQC_INST_BANKA_BANK_RAM", SOC15_REG_ENTRY(GC, 0, regSQC_EDC_CNT2),
+ SOC15_REG_FIELD(SQC_EDC_CNT2, INST_BANKA_BANK_RAM_SEC_COUNT),
+ SOC15_REG_FIELD(SQC_EDC_CNT2, INST_BANKA_BANK_RAM_DED_COUNT) },
+ { "SQC_DATA_BANKA_TAG_RAM", SOC15_REG_ENTRY(GC, 0, regSQC_EDC_CNT2),
+ SOC15_REG_FIELD(SQC_EDC_CNT2, DATA_BANKA_TAG_RAM_SEC_COUNT),
+ SOC15_REG_FIELD(SQC_EDC_CNT2, DATA_BANKA_TAG_RAM_DED_COUNT) },
+ { "SQC_DATA_BANKA_BANK_RAM", SOC15_REG_ENTRY(GC, 0, regSQC_EDC_CNT2),
+ SOC15_REG_FIELD(SQC_EDC_CNT2, DATA_BANKA_BANK_RAM_SEC_COUNT),
+ SOC15_REG_FIELD(SQC_EDC_CNT2, DATA_BANKA_BANK_RAM_DED_COUNT) },
+ { "SQC_INST_UTCL1_LFIFO", SOC15_REG_ENTRY(GC, 0, regSQC_EDC_CNT2),
+ SOC15_REG_FIELD(SQC_EDC_CNT2, INST_UTCL1_LFIFO_SEC_COUNT),
+ SOC15_REG_FIELD(SQC_EDC_CNT2, INST_UTCL1_LFIFO_DED_COUNT) },
+ { "SQC_DATA_BANKA_DIRTY_BIT_RAM", SOC15_REG_ENTRY(GC, 0, regSQC_EDC_CNT2),
+ SOC15_REG_FIELD(SQC_EDC_CNT2, DATA_BANKA_DIRTY_BIT_RAM_SEC_COUNT),
+ SOC15_REG_FIELD(SQC_EDC_CNT2, DATA_BANKA_DIRTY_BIT_RAM_DED_COUNT) },
+
+ /* SQC - regSQC_EDC_CNT3 */
+ { "SQC_INST_BANKB_TAG_RAM", SOC15_REG_ENTRY(GC, 0, regSQC_EDC_CNT3),
+ SOC15_REG_FIELD(SQC_EDC_CNT3, INST_BANKB_TAG_RAM_SEC_COUNT),
+ SOC15_REG_FIELD(SQC_EDC_CNT3, INST_BANKB_TAG_RAM_DED_COUNT) },
+ { "SQC_INST_BANKB_BANK_RAM", SOC15_REG_ENTRY(GC, 0, regSQC_EDC_CNT3),
+ SOC15_REG_FIELD(SQC_EDC_CNT3, INST_BANKB_BANK_RAM_SEC_COUNT),
+ SOC15_REG_FIELD(SQC_EDC_CNT3, INST_BANKB_BANK_RAM_DED_COUNT) },
+ { "SQC_DATA_BANKB_TAG_RAM", SOC15_REG_ENTRY(GC, 0, regSQC_EDC_CNT3),
+ SOC15_REG_FIELD(SQC_EDC_CNT3, DATA_BANKB_TAG_RAM_SEC_COUNT),
+ SOC15_REG_FIELD(SQC_EDC_CNT3, DATA_BANKB_TAG_RAM_DED_COUNT) },
+ { "SQC_DATA_BANKB_BANK_RAM", SOC15_REG_ENTRY(GC, 0, regSQC_EDC_CNT3),
+ SOC15_REG_FIELD(SQC_EDC_CNT3, DATA_BANKB_BANK_RAM_SEC_COUNT),
+ SOC15_REG_FIELD(SQC_EDC_CNT3, DATA_BANKB_BANK_RAM_DED_COUNT) },
+ { "SQC_DATA_BANKB_DIRTY_BIT_RAM", SOC15_REG_ENTRY(GC, 0, regSQC_EDC_CNT3),
+ SOC15_REG_FIELD(SQC_EDC_CNT3, DATA_BANKB_DIRTY_BIT_RAM_SEC_COUNT),
+ SOC15_REG_FIELD(SQC_EDC_CNT3, DATA_BANKB_DIRTY_BIT_RAM_DED_COUNT) },
+
+ /* SQC - regSQC_EDC_PARITY_CNT3 */
+ { "SQC_INST_BANKA_UTCL1_MISS_FIFO", SOC15_REG_ENTRY(GC, 0, regSQC_EDC_PARITY_CNT3),
+ SOC15_REG_FIELD(SQC_EDC_PARITY_CNT3, INST_BANKA_UTCL1_MISS_FIFO_SEC_COUNT),
+ SOC15_REG_FIELD(SQC_EDC_PARITY_CNT3, INST_BANKA_UTCL1_MISS_FIFO_DED_COUNT) },
+ { "SQC_INST_BANKA_MISS_FIFO", SOC15_REG_ENTRY(GC, 0, regSQC_EDC_PARITY_CNT3),
+ SOC15_REG_FIELD(SQC_EDC_PARITY_CNT3, INST_BANKA_MISS_FIFO_SEC_COUNT),
+ SOC15_REG_FIELD(SQC_EDC_PARITY_CNT3, INST_BANKA_MISS_FIFO_DED_COUNT) },
+ { "SQC_DATA_BANKA_HIT_FIFO", SOC15_REG_ENTRY(GC, 0, regSQC_EDC_PARITY_CNT3),
+ SOC15_REG_FIELD(SQC_EDC_PARITY_CNT3, DATA_BANKA_HIT_FIFO_SEC_COUNT),
+ SOC15_REG_FIELD(SQC_EDC_PARITY_CNT3, DATA_BANKA_HIT_FIFO_DED_COUNT) },
+ { "SQC_DATA_BANKA_MISS_FIFO", SOC15_REG_ENTRY(GC, 0, regSQC_EDC_PARITY_CNT3),
+ SOC15_REG_FIELD(SQC_EDC_PARITY_CNT3, DATA_BANKA_MISS_FIFO_SEC_COUNT),
+ SOC15_REG_FIELD(SQC_EDC_PARITY_CNT3, DATA_BANKA_MISS_FIFO_DED_COUNT) },
+ { "SQC_INST_BANKB_UTCL1_MISS_FIFO", SOC15_REG_ENTRY(GC, 0, regSQC_EDC_PARITY_CNT3),
+ SOC15_REG_FIELD(SQC_EDC_PARITY_CNT3, INST_BANKB_UTCL1_MISS_FIFO_SEC_COUNT),
+ SOC15_REG_FIELD(SQC_EDC_PARITY_CNT3, INST_BANKB_UTCL1_MISS_FIFO_DED_COUNT) },
+ { "SQC_INST_BANKB_MISS_FIFO", SOC15_REG_ENTRY(GC, 0, regSQC_EDC_PARITY_CNT3),
+ SOC15_REG_FIELD(SQC_EDC_PARITY_CNT3, INST_BANKB_MISS_FIFO_SEC_COUNT),
+ SOC15_REG_FIELD(SQC_EDC_PARITY_CNT3, INST_BANKB_MISS_FIFO_DED_COUNT) },
+ { "SQC_DATA_BANKB_HIT_FIFO", SOC15_REG_ENTRY(GC, 0, regSQC_EDC_PARITY_CNT3),
+ SOC15_REG_FIELD(SQC_EDC_PARITY_CNT3, DATA_BANKB_HIT_FIFO_SEC_COUNT),
+ SOC15_REG_FIELD(SQC_EDC_PARITY_CNT3, DATA_BANKB_HIT_FIFO_DED_COUNT) },
+ { "SQC_DATA_BANKB_MISS_FIFO", SOC15_REG_ENTRY(GC, 0, regSQC_EDC_PARITY_CNT3),
+ SOC15_REG_FIELD(SQC_EDC_PARITY_CNT3, DATA_BANKB_MISS_FIFO_SEC_COUNT),
+ SOC15_REG_FIELD(SQC_EDC_PARITY_CNT3, DATA_BANKB_MISS_FIFO_DED_COUNT) },
+
+ /* SQ */
+ { "SQ_LDS_D", SOC15_REG_ENTRY(GC, 0, regSQ_EDC_CNT),
+ SOC15_REG_FIELD(SQ_EDC_CNT, LDS_D_SEC_COUNT),
+ SOC15_REG_FIELD(SQ_EDC_CNT, LDS_D_DED_COUNT) },
+ { "SQ_LDS_I", SOC15_REG_ENTRY(GC, 0, regSQ_EDC_CNT),
+ SOC15_REG_FIELD(SQ_EDC_CNT, LDS_I_SEC_COUNT),
+ SOC15_REG_FIELD(SQ_EDC_CNT, LDS_I_DED_COUNT) },
+ { "SQ_SGPR", SOC15_REG_ENTRY(GC, 0, regSQ_EDC_CNT),
+ SOC15_REG_FIELD(SQ_EDC_CNT, SGPR_SEC_COUNT),
+ SOC15_REG_FIELD(SQ_EDC_CNT, SGPR_DED_COUNT) },
+ { "SQ_VGPR0", SOC15_REG_ENTRY(GC, 0, regSQ_EDC_CNT),
+ SOC15_REG_FIELD(SQ_EDC_CNT, VGPR0_SEC_COUNT),
+ SOC15_REG_FIELD(SQ_EDC_CNT, VGPR0_DED_COUNT) },
+ { "SQ_VGPR1", SOC15_REG_ENTRY(GC, 0, regSQ_EDC_CNT),
+ SOC15_REG_FIELD(SQ_EDC_CNT, VGPR1_SEC_COUNT),
+ SOC15_REG_FIELD(SQ_EDC_CNT, VGPR1_DED_COUNT) },
+ { "SQ_VGPR2", SOC15_REG_ENTRY(GC, 0, regSQ_EDC_CNT),
+ SOC15_REG_FIELD(SQ_EDC_CNT, VGPR2_SEC_COUNT),
+ SOC15_REG_FIELD(SQ_EDC_CNT, VGPR2_DED_COUNT) },
+ { "SQ_VGPR3", SOC15_REG_ENTRY(GC, 0, regSQ_EDC_CNT),
+ SOC15_REG_FIELD(SQ_EDC_CNT, VGPR3_SEC_COUNT),
+ SOC15_REG_FIELD(SQ_EDC_CNT, VGPR3_DED_COUNT) },
+
+ /* TCP */
+ { "TCP_CACHE_RAM", SOC15_REG_ENTRY(GC, 0, regTCP_EDC_CNT_NEW),
+ SOC15_REG_FIELD(TCP_EDC_CNT_NEW, CACHE_RAM_SEC_COUNT),
+ SOC15_REG_FIELD(TCP_EDC_CNT_NEW, CACHE_RAM_DED_COUNT) },
+ { "TCP_LFIFO_RAM", SOC15_REG_ENTRY(GC, 0, regTCP_EDC_CNT_NEW),
+ SOC15_REG_FIELD(TCP_EDC_CNT_NEW, LFIFO_RAM_SEC_COUNT),
+ SOC15_REG_FIELD(TCP_EDC_CNT_NEW, LFIFO_RAM_DED_COUNT) },
+ { "TCP_CMD_FIFO", SOC15_REG_ENTRY(GC, 0, regTCP_EDC_CNT_NEW),
+ SOC15_REG_FIELD(TCP_EDC_CNT_NEW, CMD_FIFO_SEC_COUNT),
+ SOC15_REG_FIELD(TCP_EDC_CNT_NEW, CMD_FIFO_DED_COUNT) },
+ { "TCP_VM_FIFO", SOC15_REG_ENTRY(GC, 0, regTCP_EDC_CNT_NEW),
+ SOC15_REG_FIELD(TCP_EDC_CNT_NEW, VM_FIFO_SEC_COUNT),
+ SOC15_REG_FIELD(TCP_EDC_CNT_NEW, VM_FIFO_DED_COUNT) },
+ { "TCP_DB_RAM", SOC15_REG_ENTRY(GC, 0, regTCP_EDC_CNT_NEW),
+ SOC15_REG_FIELD(TCP_EDC_CNT_NEW, DB_RAM_SEC_COUNT),
+ SOC15_REG_FIELD(TCP_EDC_CNT_NEW, DB_RAM_DED_COUNT) },
+ { "TCP_UTCL1_LFIFO0", SOC15_REG_ENTRY(GC, 0, regTCP_EDC_CNT_NEW),
+ SOC15_REG_FIELD(TCP_EDC_CNT_NEW, UTCL1_LFIFO0_SEC_COUNT),
+ SOC15_REG_FIELD(TCP_EDC_CNT_NEW, UTCL1_LFIFO0_DED_COUNT) },
+ { "TCP_UTCL1_LFIFO1", SOC15_REG_ENTRY(GC, 0, regTCP_EDC_CNT_NEW),
+ SOC15_REG_FIELD(TCP_EDC_CNT_NEW, UTCL1_LFIFO1_SEC_COUNT),
+ SOC15_REG_FIELD(TCP_EDC_CNT_NEW, UTCL1_LFIFO1_DED_COUNT) },
+
+ /* TCI */
+ { "TCI_WRITE_RAM", SOC15_REG_ENTRY(GC, 0, regTCI_EDC_CNT),
+ SOC15_REG_FIELD(TCI_EDC_CNT, WRITE_RAM_SEC_COUNT),
+ SOC15_REG_FIELD(TCI_EDC_CNT, WRITE_RAM_DED_COUNT) },
+
+ /* TCC */
+ { "TCC_CACHE_DATA", SOC15_REG_ENTRY(GC, 0, regTCC_EDC_CNT),
+ SOC15_REG_FIELD(TCC_EDC_CNT, CACHE_DATA_SEC_COUNT),
+ SOC15_REG_FIELD(TCC_EDC_CNT, CACHE_DATA_DED_COUNT) },
+ { "TCC_CACHE_DIRTY", SOC15_REG_ENTRY(GC, 0, regTCC_EDC_CNT),
+ SOC15_REG_FIELD(TCC_EDC_CNT, CACHE_DIRTY_SEC_COUNT),
+ SOC15_REG_FIELD(TCC_EDC_CNT, CACHE_DIRTY_DED_COUNT) },
+ { "TCC_HIGH_RATE_TAG", SOC15_REG_ENTRY(GC, 0, regTCC_EDC_CNT),
+ SOC15_REG_FIELD(TCC_EDC_CNT, HIGH_RATE_TAG_SEC_COUNT),
+ SOC15_REG_FIELD(TCC_EDC_CNT, HIGH_RATE_TAG_DED_COUNT) },
+ { "TCC_LOW_RATE_TAG", SOC15_REG_ENTRY(GC, 0, regTCC_EDC_CNT),
+ SOC15_REG_FIELD(TCC_EDC_CNT, LOW_RATE_TAG_SEC_COUNT),
+ SOC15_REG_FIELD(TCC_EDC_CNT, LOW_RATE_TAG_DED_COUNT) },
+ { "TCC_SRC_FIFO", SOC15_REG_ENTRY(GC, 0, regTCC_EDC_CNT),
+ SOC15_REG_FIELD(TCC_EDC_CNT, SRC_FIFO_SEC_COUNT),
+ SOC15_REG_FIELD(TCC_EDC_CNT, SRC_FIFO_DED_COUNT) },
+ { "TCC_LATENCY_FIFO", SOC15_REG_ENTRY(GC, 0, regTCC_EDC_CNT),
+ SOC15_REG_FIELD(TCC_EDC_CNT, LATENCY_FIFO_SEC_COUNT),
+ SOC15_REG_FIELD(TCC_EDC_CNT, LATENCY_FIFO_DED_COUNT) },
+ { "TCC_LATENCY_FIFO_NEXT_RAM", SOC15_REG_ENTRY(GC, 0, regTCC_EDC_CNT),
+ SOC15_REG_FIELD(TCC_EDC_CNT, LATENCY_FIFO_NEXT_RAM_SEC_COUNT),
+ SOC15_REG_FIELD(TCC_EDC_CNT, LATENCY_FIFO_NEXT_RAM_DED_COUNT) },
+ { "TCC_CACHE_TAG_PROBE_FIFO", SOC15_REG_ENTRY(GC, 0, regTCC_EDC_CNT2),
+ SOC15_REG_FIELD(TCC_EDC_CNT2, CACHE_TAG_PROBE_FIFO_SEC_COUNT),
+ SOC15_REG_FIELD(TCC_EDC_CNT2, CACHE_TAG_PROBE_FIFO_DED_COUNT) },
+ { "TCC_UC_ATOMIC_FIFO", SOC15_REG_ENTRY(GC, 0, regTCC_EDC_CNT2),
+ SOC15_REG_FIELD(TCC_EDC_CNT2, UC_ATOMIC_FIFO_SEC_COUNT),
+ SOC15_REG_FIELD(TCC_EDC_CNT2, UC_ATOMIC_FIFO_DED_COUNT) },
+ { "TCC_WRITE_CACHE_READ", SOC15_REG_ENTRY(GC, 0, regTCC_EDC_CNT2),
+ SOC15_REG_FIELD(TCC_EDC_CNT2, WRITE_CACHE_READ_SEC_COUNT),
+ SOC15_REG_FIELD(TCC_EDC_CNT2, WRITE_CACHE_READ_DED_COUNT) },
+ { "TCC_RETURN_CONTROL", SOC15_REG_ENTRY(GC, 0, regTCC_EDC_CNT2),
+ SOC15_REG_FIELD(TCC_EDC_CNT2, RETURN_CONTROL_SEC_COUNT),
+ SOC15_REG_FIELD(TCC_EDC_CNT2, RETURN_CONTROL_DED_COUNT) },
+ { "TCC_IN_USE_TRANSFER", SOC15_REG_ENTRY(GC, 0, regTCC_EDC_CNT2),
+ SOC15_REG_FIELD(TCC_EDC_CNT2, IN_USE_TRANSFER_SEC_COUNT),
+ SOC15_REG_FIELD(TCC_EDC_CNT2, IN_USE_TRANSFER_DED_COUNT) },
+ { "TCC_IN_USE_DEC", SOC15_REG_ENTRY(GC, 0, regTCC_EDC_CNT2),
+ SOC15_REG_FIELD(TCC_EDC_CNT2, IN_USE_DEC_SEC_COUNT),
+ SOC15_REG_FIELD(TCC_EDC_CNT2, IN_USE_DEC_DED_COUNT) },
+ { "TCC_WRITE_RETURN", SOC15_REG_ENTRY(GC, 0, regTCC_EDC_CNT2),
+ SOC15_REG_FIELD(TCC_EDC_CNT2, WRITE_RETURN_SEC_COUNT),
+ SOC15_REG_FIELD(TCC_EDC_CNT2, WRITE_RETURN_DED_COUNT) },
+ { "TCC_RETURN_DATA", SOC15_REG_ENTRY(GC, 0, regTCC_EDC_CNT2),
+ SOC15_REG_FIELD(TCC_EDC_CNT2, RETURN_DATA_SEC_COUNT),
+ SOC15_REG_FIELD(TCC_EDC_CNT2, RETURN_DATA_DED_COUNT) },
+
+ /* TCA */
+ { "TCA_HOLE_FIFO", SOC15_REG_ENTRY(GC, 0, regTCA_EDC_CNT),
+ SOC15_REG_FIELD(TCA_EDC_CNT, HOLE_FIFO_SEC_COUNT),
+ SOC15_REG_FIELD(TCA_EDC_CNT, HOLE_FIFO_DED_COUNT) },
+ { "TCA_REQ_FIFO", SOC15_REG_ENTRY(GC, 0, regTCA_EDC_CNT),
+ SOC15_REG_FIELD(TCA_EDC_CNT, REQ_FIFO_SEC_COUNT),
+ SOC15_REG_FIELD(TCA_EDC_CNT, REQ_FIFO_DED_COUNT) },
+
+ /* TCX */
+ { "TCX_GROUP0", SOC15_REG_ENTRY(GC, 0, regTCX_EDC_CNT),
+ SOC15_REG_FIELD(TCX_EDC_CNT, GROUP0_SEC_COUNT),
+ SOC15_REG_FIELD(TCX_EDC_CNT, GROUP0_DED_COUNT) },
+ { "TCX_GROUP1", SOC15_REG_ENTRY(GC, 0, regTCX_EDC_CNT),
+ SOC15_REG_FIELD(TCX_EDC_CNT, GROUP1_SEC_COUNT),
+ SOC15_REG_FIELD(TCX_EDC_CNT, GROUP1_DED_COUNT) },
+ { "TCX_GROUP2", SOC15_REG_ENTRY(GC, 0, regTCX_EDC_CNT),
+ SOC15_REG_FIELD(TCX_EDC_CNT, GROUP2_SEC_COUNT),
+ SOC15_REG_FIELD(TCX_EDC_CNT, GROUP2_DED_COUNT) },
+ { "TCX_GROUP3", SOC15_REG_ENTRY(GC, 0, regTCX_EDC_CNT),
+ SOC15_REG_FIELD(TCX_EDC_CNT, GROUP3_SEC_COUNT),
+ SOC15_REG_FIELD(TCX_EDC_CNT, GROUP3_DED_COUNT) },
+ { "TCX_GROUP4", SOC15_REG_ENTRY(GC, 0, regTCX_EDC_CNT),
+ SOC15_REG_FIELD(TCX_EDC_CNT, GROUP4_SEC_COUNT),
+ SOC15_REG_FIELD(TCX_EDC_CNT, GROUP4_DED_COUNT) },
+ { "TCX_GROUP5", SOC15_REG_ENTRY(GC, 0, regTCX_EDC_CNT),
+ SOC15_REG_FIELD(TCX_EDC_CNT, GROUP5_SED_COUNT), 0, 0 },
+ { "TCX_GROUP6", SOC15_REG_ENTRY(GC, 0, regTCX_EDC_CNT),
+ SOC15_REG_FIELD(TCX_EDC_CNT, GROUP6_SED_COUNT), 0, 0 },
+ { "TCX_GROUP7", SOC15_REG_ENTRY(GC, 0, regTCX_EDC_CNT),
+ SOC15_REG_FIELD(TCX_EDC_CNT, GROUP7_SED_COUNT), 0, 0 },
+ { "TCX_GROUP8", SOC15_REG_ENTRY(GC, 0, regTCX_EDC_CNT),
+ SOC15_REG_FIELD(TCX_EDC_CNT, GROUP8_SED_COUNT), 0, 0 },
+ { "TCX_GROUP9", SOC15_REG_ENTRY(GC, 0, regTCX_EDC_CNT),
+ SOC15_REG_FIELD(TCX_EDC_CNT, GROUP9_SED_COUNT), 0, 0 },
+ { "TCX_GROUP10", SOC15_REG_ENTRY(GC, 0, regTCX_EDC_CNT),
+ SOC15_REG_FIELD(TCX_EDC_CNT, GROUP10_SED_COUNT), 0, 0 },
+ { "TCX_GROUP11", SOC15_REG_ENTRY(GC, 0, regTCX_EDC_CNT2),
+ SOC15_REG_FIELD(TCX_EDC_CNT2, GROUP11_SED_COUNT), 0, 0 },
+ { "TCX_GROUP12", SOC15_REG_ENTRY(GC, 0, regTCX_EDC_CNT2),
+ SOC15_REG_FIELD(TCX_EDC_CNT2, GROUP12_SED_COUNT), 0, 0 },
+ { "TCX_GROUP13", SOC15_REG_ENTRY(GC, 0, regTCX_EDC_CNT2),
+ SOC15_REG_FIELD(TCX_EDC_CNT2, GROUP13_SED_COUNT), 0, 0 },
+ { "TCX_GROUP14", SOC15_REG_ENTRY(GC, 0, regTCX_EDC_CNT2),
+ SOC15_REG_FIELD(TCX_EDC_CNT2, GROUP14_SED_COUNT), 0, 0 },
+
+ /* TD */
+ { "TD_SS_FIFO_LO", SOC15_REG_ENTRY(GC, 0, regTD_EDC_CNT),
+ SOC15_REG_FIELD(TD_EDC_CNT, SS_FIFO_LO_SEC_COUNT),
+ SOC15_REG_FIELD(TD_EDC_CNT, SS_FIFO_LO_DED_COUNT) },
+ { "TD_SS_FIFO_HI", SOC15_REG_ENTRY(GC, 0, regTD_EDC_CNT),
+ SOC15_REG_FIELD(TD_EDC_CNT, SS_FIFO_HI_SEC_COUNT),
+ SOC15_REG_FIELD(TD_EDC_CNT, SS_FIFO_HI_DED_COUNT) },
+ { "TD_CS_FIFO", SOC15_REG_ENTRY(GC, 0, regTD_EDC_CNT),
+ SOC15_REG_FIELD(TD_EDC_CNT, CS_FIFO_SEC_COUNT),
+ SOC15_REG_FIELD(TD_EDC_CNT, CS_FIFO_DED_COUNT) },
+
+ /* TA */
+ { "TA_FS_DFIFO", SOC15_REG_ENTRY(GC, 0, regTA_EDC_CNT),
+ SOC15_REG_FIELD(TA_EDC_CNT, TA_FS_DFIFO_SEC_COUNT),
+ SOC15_REG_FIELD(TA_EDC_CNT, TA_FS_DFIFO_DED_COUNT) },
+ { "TA_FS_AFIFO_LO", SOC15_REG_ENTRY(GC, 0, regTA_EDC_CNT),
+ SOC15_REG_FIELD(TA_EDC_CNT, TA_FS_AFIFO_LO_SEC_COUNT),
+ SOC15_REG_FIELD(TA_EDC_CNT, TA_FS_AFIFO_LO_DED_COUNT) },
+ { "TA_FL_LFIFO", SOC15_REG_ENTRY(GC, 0, regTA_EDC_CNT),
+ SOC15_REG_FIELD(TA_EDC_CNT, TA_FL_LFIFO_SEC_COUNT),
+ SOC15_REG_FIELD(TA_EDC_CNT, TA_FL_LFIFO_DED_COUNT) },
+ { "TA_FX_LFIFO", SOC15_REG_ENTRY(GC, 0, regTA_EDC_CNT),
+ SOC15_REG_FIELD(TA_EDC_CNT, TA_FX_LFIFO_SEC_COUNT),
+ SOC15_REG_FIELD(TA_EDC_CNT, TA_FX_LFIFO_DED_COUNT) },
+ { "TA_FS_CFIFO", SOC15_REG_ENTRY(GC, 0, regTA_EDC_CNT),
+ SOC15_REG_FIELD(TA_EDC_CNT, TA_FS_CFIFO_SEC_COUNT),
+ SOC15_REG_FIELD(TA_EDC_CNT, TA_FS_CFIFO_DED_COUNT) },
+ { "TA_FS_AFIFO_HI", SOC15_REG_ENTRY(GC, 0, regTA_EDC_CNT),
+ SOC15_REG_FIELD(TA_EDC_CNT, TA_FS_AFIFO_HI_SEC_COUNT),
+ SOC15_REG_FIELD(TA_EDC_CNT, TA_FS_AFIFO_HI_DED_COUNT) },
+
+ /* EA - regGCEA_EDC_CNT */
+ { "EA_DRAMRD_CMDMEM", SOC15_REG_ENTRY(GC, 0, regGCEA_EDC_CNT),
+ SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMRD_CMDMEM_SEC_COUNT),
+ SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMRD_CMDMEM_DED_COUNT) },
+ { "EA_DRAMWR_CMDMEM", SOC15_REG_ENTRY(GC, 0, regGCEA_EDC_CNT),
+ SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMWR_CMDMEM_SEC_COUNT),
+ SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMWR_CMDMEM_DED_COUNT) },
+ { "EA_DRAMWR_DATAMEM", SOC15_REG_ENTRY(GC, 0, regGCEA_EDC_CNT),
+ SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMWR_DATAMEM_SEC_COUNT),
+ SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMWR_DATAMEM_DED_COUNT) },
+ { "EA_RRET_TAGMEM", SOC15_REG_ENTRY(GC, 0, regGCEA_EDC_CNT),
+ SOC15_REG_FIELD(GCEA_EDC_CNT, RRET_TAGMEM_SEC_COUNT),
+ SOC15_REG_FIELD(GCEA_EDC_CNT, RRET_TAGMEM_DED_COUNT) },
+ { "EA_WRET_TAGMEM", SOC15_REG_ENTRY(GC, 0, regGCEA_EDC_CNT),
+ SOC15_REG_FIELD(GCEA_EDC_CNT, WRET_TAGMEM_SEC_COUNT),
+ SOC15_REG_FIELD(GCEA_EDC_CNT, WRET_TAGMEM_DED_COUNT) },
+ { "EA_IOWR_DATAMEM", SOC15_REG_ENTRY(GC, 0, regGCEA_EDC_CNT),
+ SOC15_REG_FIELD(GCEA_EDC_CNT, IOWR_DATAMEM_SEC_COUNT),
+ SOC15_REG_FIELD(GCEA_EDC_CNT, IOWR_DATAMEM_DED_COUNT) },
+ { "EA_DRAMRD_PAGEMEM", SOC15_REG_ENTRY(GC, 0, regGCEA_EDC_CNT),
+ SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMRD_PAGEMEM_SED_COUNT), 0, 0 },
+ { "EA_DRAMWR_PAGEMEM", SOC15_REG_ENTRY(GC, 0, regGCEA_EDC_CNT),
+ SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMWR_PAGEMEM_SED_COUNT), 0, 0 },
+ { "EA_IORD_CMDMEM", SOC15_REG_ENTRY(GC, 0, regGCEA_EDC_CNT),
+ SOC15_REG_FIELD(GCEA_EDC_CNT, IORD_CMDMEM_SED_COUNT), 0, 0 },
+ { "EA_IOWR_CMDMEM", SOC15_REG_ENTRY(GC, 0, regGCEA_EDC_CNT),
+ SOC15_REG_FIELD(GCEA_EDC_CNT, IOWR_CMDMEM_SED_COUNT), 0, 0 },
+
+ /* EA - regGCEA_EDC_CNT2 */
+ { "EA_GMIRD_CMDMEM", SOC15_REG_ENTRY(GC, 0, regGCEA_EDC_CNT2),
+ SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIRD_CMDMEM_SEC_COUNT),
+ SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIRD_CMDMEM_DED_COUNT) },
+ { "EA_GMIWR_CMDMEM", SOC15_REG_ENTRY(GC, 0, regGCEA_EDC_CNT2),
+ SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIWR_CMDMEM_SEC_COUNT),
+ SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIWR_CMDMEM_DED_COUNT) },
+ { "EA_GMIWR_DATAMEM", SOC15_REG_ENTRY(GC, 0, regGCEA_EDC_CNT2),
+ SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIWR_DATAMEM_SEC_COUNT),
+ SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIWR_DATAMEM_DED_COUNT) },
+ { "EA_GMIRD_PAGEMEM", SOC15_REG_ENTRY(GC, 0, regGCEA_EDC_CNT2),
+ SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIRD_PAGEMEM_SED_COUNT), 0, 0 },
+ { "EA_GMIWR_PAGEMEM", SOC15_REG_ENTRY(GC, 0, regGCEA_EDC_CNT2),
+ SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIWR_PAGEMEM_SED_COUNT), 0, 0 },
+ { "EA_MAM_D0MEM", SOC15_REG_ENTRY(GC, 0, regGCEA_EDC_CNT2),
+ SOC15_REG_FIELD(GCEA_EDC_CNT2, MAM_D0MEM_SED_COUNT),
+ SOC15_REG_FIELD(GCEA_EDC_CNT2, MAM_D0MEM_DED_COUNT) },
+ { "EA_MAM_D1MEM", SOC15_REG_ENTRY(GC, 0, regGCEA_EDC_CNT2),
+ SOC15_REG_FIELD(GCEA_EDC_CNT2, MAM_D1MEM_SED_COUNT),
+ SOC15_REG_FIELD(GCEA_EDC_CNT2, MAM_D1MEM_DED_COUNT) },
+ { "EA_MAM_D2MEM", SOC15_REG_ENTRY(GC, 0, regGCEA_EDC_CNT2),
+ SOC15_REG_FIELD(GCEA_EDC_CNT2, MAM_D2MEM_SED_COUNT),
+ SOC15_REG_FIELD(GCEA_EDC_CNT2, MAM_D2MEM_DED_COUNT) },
+ { "EA_MAM_D3MEM", SOC15_REG_ENTRY(GC, 0, regGCEA_EDC_CNT2),
+ SOC15_REG_FIELD(GCEA_EDC_CNT2, MAM_D3MEM_SED_COUNT),
+ SOC15_REG_FIELD(GCEA_EDC_CNT2, MAM_D3MEM_DED_COUNT) },
+
+ /* EA - regGCEA_EDC_CNT3 */
+ { "EA_DRAMRD_PAGEMEM", SOC15_REG_ENTRY(GC, 0, regGCEA_EDC_CNT3), 0, 0,
+ SOC15_REG_FIELD(GCEA_EDC_CNT3, DRAMRD_PAGEMEM_DED_COUNT) },
+ { "EA_DRAMWR_PAGEMEM", SOC15_REG_ENTRY(GC, 0, regGCEA_EDC_CNT3), 0, 0,
+ SOC15_REG_FIELD(GCEA_EDC_CNT3, DRAMWR_PAGEMEM_DED_COUNT) },
+ { "EA_IORD_CMDMEM", SOC15_REG_ENTRY(GC, 0, regGCEA_EDC_CNT3), 0, 0,
+ SOC15_REG_FIELD(GCEA_EDC_CNT3, IORD_CMDMEM_DED_COUNT) },
+ { "EA_IOWR_CMDMEM", SOC15_REG_ENTRY(GC, 0, regGCEA_EDC_CNT3), 0, 0,
+ SOC15_REG_FIELD(GCEA_EDC_CNT3, IOWR_CMDMEM_DED_COUNT) },
+ { "EA_GMIRD_PAGEMEM", SOC15_REG_ENTRY(GC, 0, regGCEA_EDC_CNT3), 0, 0,
+ SOC15_REG_FIELD(GCEA_EDC_CNT3, GMIRD_PAGEMEM_DED_COUNT) },
+ { "EA_GMIWR_PAGEMEM", SOC15_REG_ENTRY(GC, 0, regGCEA_EDC_CNT3), 0, 0,
+ SOC15_REG_FIELD(GCEA_EDC_CNT3, GMIWR_PAGEMEM_DED_COUNT) },
+ { "EA_MAM_A0MEM", SOC15_REG_ENTRY(GC, 0, regGCEA_EDC_CNT3),
+ SOC15_REG_FIELD(GCEA_EDC_CNT3, MAM_A0MEM_SEC_COUNT),
+ SOC15_REG_FIELD(GCEA_EDC_CNT3, MAM_A0MEM_DED_COUNT) },
+ { "EA_MAM_A1MEM", SOC15_REG_ENTRY(GC, 0, regGCEA_EDC_CNT3),
+ SOC15_REG_FIELD(GCEA_EDC_CNT3, MAM_A1MEM_SEC_COUNT),
+ SOC15_REG_FIELD(GCEA_EDC_CNT3, MAM_A1MEM_DED_COUNT) },
+ { "EA_MAM_A2MEM", SOC15_REG_ENTRY(GC, 0, regGCEA_EDC_CNT3),
+ SOC15_REG_FIELD(GCEA_EDC_CNT3, MAM_A2MEM_SEC_COUNT),
+ SOC15_REG_FIELD(GCEA_EDC_CNT3, MAM_A2MEM_DED_COUNT) },
+ { "EA_MAM_A3MEM", SOC15_REG_ENTRY(GC, 0, regGCEA_EDC_CNT3),
+ SOC15_REG_FIELD(GCEA_EDC_CNT3, MAM_A3MEM_SEC_COUNT),
+ SOC15_REG_FIELD(GCEA_EDC_CNT3, MAM_A3MEM_DED_COUNT) },
+ { "EA_MAM_AFMEM", SOC15_REG_ENTRY(GC, 0, regGCEA_EDC_CNT3),
+ SOC15_REG_FIELD(GCEA_EDC_CNT3, MAM_AFMEM_SEC_COUNT),
+ SOC15_REG_FIELD(GCEA_EDC_CNT3, MAM_AFMEM_DED_COUNT) },
+};
+
+static const char * const vml2_walker_mems[] = {
+ "UTC_VML2_CACHE_PDE0_MEM0",
+ "UTC_VML2_CACHE_PDE0_MEM1",
+ "UTC_VML2_CACHE_PDE1_MEM0",
+ "UTC_VML2_CACHE_PDE1_MEM1",
+ "UTC_VML2_CACHE_PDE2_MEM0",
+ "UTC_VML2_CACHE_PDE2_MEM1",
+ "UTC_VML2_RDIF_ARADDRS",
+ "UTC_VML2_RDIF_LOG_FIFO",
+ "UTC_VML2_QUEUE_REQ",
+ "UTC_VML2_QUEUE_RET",
+};
+
+static struct gfx_v9_4_2_utc_block gfx_v9_4_2_utc_blocks[] = {
+ { VML2_MEM, 8, 2, 2,
+ { SOC15_REG_ENTRY(GC, 0, regVML2_MEM_ECC_INDEX) },
+ { SOC15_REG_ENTRY(GC, 0, regVML2_MEM_ECC_CNTL) },
+ SOC15_REG_FIELD(VML2_MEM_ECC_CNTL, SEC_COUNT),
+ SOC15_REG_FIELD(VML2_MEM_ECC_CNTL, DED_COUNT),
+ REG_SET_FIELD(0, VML2_MEM_ECC_CNTL, WRITE_COUNTERS, 1) },
+ { VML2_WALKER_MEM, ARRAY_SIZE(vml2_walker_mems), 1, 1,
+ { SOC15_REG_ENTRY(GC, 0, regVML2_WALKER_MEM_ECC_INDEX) },
+ { SOC15_REG_ENTRY(GC, 0, regVML2_WALKER_MEM_ECC_CNTL) },
+ SOC15_REG_FIELD(VML2_WALKER_MEM_ECC_CNTL, SEC_COUNT),
+ SOC15_REG_FIELD(VML2_WALKER_MEM_ECC_CNTL, DED_COUNT),
+ REG_SET_FIELD(0, VML2_WALKER_MEM_ECC_CNTL, WRITE_COUNTERS, 1) },
+ { UTCL2_MEM, 18, 1, 2,
+ { SOC15_REG_ENTRY(GC, 0, regUTCL2_MEM_ECC_INDEX) },
+ { SOC15_REG_ENTRY(GC, 0, regUTCL2_MEM_ECC_CNTL) },
+ SOC15_REG_FIELD(UTCL2_MEM_ECC_CNTL, SEC_COUNT),
+ SOC15_REG_FIELD(UTCL2_MEM_ECC_CNTL, DED_COUNT),
+ REG_SET_FIELD(0, UTCL2_MEM_ECC_CNTL, WRITE_COUNTERS, 1) },
+ { ATC_L2_CACHE_2M, 8, 2, 1,
+ { SOC15_REG_ENTRY(GC, 0, regATC_L2_CACHE_2M_DSM_INDEX) },
+ { SOC15_REG_ENTRY(GC, 0, regATC_L2_CACHE_2M_DSM_CNTL) },
+ SOC15_REG_FIELD(ATC_L2_CACHE_2M_DSM_CNTL, SEC_COUNT),
+ SOC15_REG_FIELD(ATC_L2_CACHE_2M_DSM_CNTL, DED_COUNT),
+ REG_SET_FIELD(0, ATC_L2_CACHE_2M_DSM_CNTL, WRITE_COUNTERS, 1) },
+ { ATC_L2_CACHE_32K, 8, 2, 2,
+ { SOC15_REG_ENTRY(GC, 0, regATC_L2_CACHE_32K_DSM_INDEX) },
+ { SOC15_REG_ENTRY(GC, 0, regATC_L2_CACHE_32K_DSM_CNTL) },
+ SOC15_REG_FIELD(ATC_L2_CACHE_32K_DSM_CNTL, SEC_COUNT),
+ SOC15_REG_FIELD(ATC_L2_CACHE_32K_DSM_CNTL, DED_COUNT),
+ REG_SET_FIELD(0, ATC_L2_CACHE_32K_DSM_CNTL, WRITE_COUNTERS, 1) },
+ { ATC_L2_CACHE_4K, 8, 2, 8,
+ { SOC15_REG_ENTRY(GC, 0, regATC_L2_CACHE_4K_DSM_INDEX) },
+ { SOC15_REG_ENTRY(GC, 0, regATC_L2_CACHE_4K_DSM_CNTL) },
+ SOC15_REG_FIELD(ATC_L2_CACHE_4K_DSM_CNTL, SEC_COUNT),
+ SOC15_REG_FIELD(ATC_L2_CACHE_4K_DSM_CNTL, DED_COUNT),
+ REG_SET_FIELD(0, ATC_L2_CACHE_4K_DSM_CNTL, WRITE_COUNTERS, 1) },
+};
+
+static const struct soc15_reg_entry gfx_v9_4_2_ea_err_status_regs = {
+ SOC15_REG_ENTRY(GC, 0, regGCEA_ERR_STATUS), 0, 1, 16
+};
+
+static int gfx_v9_4_2_get_reg_error_count(struct amdgpu_device *adev,
+ const struct soc15_reg_entry *reg,
+ uint32_t se_id, uint32_t inst_id,
+ uint32_t value, uint32_t *sec_count,
+ uint32_t *ded_count)
+{
+ uint32_t i;
+ uint32_t sec_cnt, ded_cnt;
+
+ for (i = 0; i < ARRAY_SIZE(gfx_v9_4_2_ras_fields); i++) {
+ if (gfx_v9_4_2_ras_fields[i].reg_offset != reg->reg_offset ||
+ gfx_v9_4_2_ras_fields[i].seg != reg->seg ||
+ gfx_v9_4_2_ras_fields[i].inst != reg->inst)
+ continue;
+
+ sec_cnt = SOC15_RAS_REG_FIELD_VAL(
+ value, gfx_v9_4_2_ras_fields[i], sec);
+ if (sec_cnt) {
+ dev_info(adev->dev,
+ "GFX SubBlock %s, Instance[%d][%d], SEC %d\n",
+ gfx_v9_4_2_ras_fields[i].name, se_id, inst_id,
+ sec_cnt);
+ *sec_count += sec_cnt;
+ }
+
+ ded_cnt = SOC15_RAS_REG_FIELD_VAL(
+ value, gfx_v9_4_2_ras_fields[i], ded);
+ if (ded_cnt) {
+ dev_info(adev->dev,
+ "GFX SubBlock %s, Instance[%d][%d], DED %d\n",
+ gfx_v9_4_2_ras_fields[i].name, se_id, inst_id,
+ ded_cnt);
+ *ded_count += ded_cnt;
+ }
+ }
+
+ return 0;
+}
+
+static int gfx_v9_4_2_query_sram_edc_count(struct amdgpu_device *adev,
+ uint32_t *sec_count, uint32_t *ded_count)
+{
+ uint32_t i, j, k, data;
+ uint32_t sec_cnt = 0, ded_cnt = 0;
+
+ if (sec_count && ded_count) {
+ *sec_count = 0;
+ *ded_count = 0;
+ }
+
+ mutex_lock(&adev->grbm_idx_mutex);
+
+ for (i = 0; i < ARRAY_SIZE(gfx_v9_4_2_edc_counter_regs); i++) {
+ for (j = 0; j < gfx_v9_4_2_edc_counter_regs[i].se_num; j++) {
+ for (k = 0; k < gfx_v9_4_2_edc_counter_regs[i].instance;
+ k++) {
+ gfx_v9_4_2_select_se_sh(adev, j, 0, k);
+
+ /* if sec/ded_count is null, just clear counter */
+ if (!sec_count || !ded_count) {
+ WREG32(SOC15_REG_ENTRY_OFFSET(
+ gfx_v9_4_2_edc_counter_regs[i]), 0);
+ continue;
+ }
+
+ data = RREG32(SOC15_REG_ENTRY_OFFSET(
+ gfx_v9_4_2_edc_counter_regs[i]));
+
+ if (!data)
+ continue;
+
+ gfx_v9_4_2_get_reg_error_count(adev,
+ &gfx_v9_4_2_edc_counter_regs[i],
+ j, k, data, &sec_cnt, &ded_cnt);
+
+ /* clear counter after read */
+ WREG32(SOC15_REG_ENTRY_OFFSET(
+ gfx_v9_4_2_edc_counter_regs[i]), 0);
+ }
+ }
+ }
+
+ if (sec_count && ded_count) {
+ *sec_count += sec_cnt;
+ *ded_count += ded_cnt;
+ }
+
+ gfx_v9_4_2_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
+ mutex_unlock(&adev->grbm_idx_mutex);
+
+ return 0;
+}
+
+static void gfx_v9_4_2_log_utc_edc_count(struct amdgpu_device *adev,
+ struct gfx_v9_4_2_utc_block *blk,
+ uint32_t instance, uint32_t sec_cnt,
+ uint32_t ded_cnt)
+{
+ uint32_t bank, way, mem;
+ static const char * const vml2_way_str[] = { "BIGK", "4K" };
+ static const char * const utcl2_router_str[] = { "VMC", "APT" };
+
+ mem = instance % blk->num_mem_blocks;
+ way = (instance / blk->num_mem_blocks) % blk->num_ways;
+ bank = instance / (blk->num_mem_blocks * blk->num_ways);
+
+ switch (blk->type) {
+ case VML2_MEM:
+ dev_info(
+ adev->dev,
+ "GFX SubBlock UTC_VML2_BANK_CACHE_%d_%s_MEM%d, SED %d, DED %d\n",
+ bank, vml2_way_str[way], mem, sec_cnt, ded_cnt);
+ break;
+ case VML2_WALKER_MEM:
+ dev_info(adev->dev, "GFX SubBlock %s, SED %d, DED %d\n",
+ vml2_walker_mems[bank], sec_cnt, ded_cnt);
+ break;
+ case UTCL2_MEM:
+ dev_info(
+ adev->dev,
+ "GFX SubBlock UTCL2_ROUTER_IFIF%d_GROUP0_%s, SED %d, DED %d\n",
+ bank, utcl2_router_str[mem], sec_cnt, ded_cnt);
+ break;
+ case ATC_L2_CACHE_2M:
+ dev_info(
+ adev->dev,
+ "GFX SubBlock UTC_ATCL2_CACHE_2M_BANK%d_WAY%d_MEM, SED %d, DED %d\n",
+ bank, way, sec_cnt, ded_cnt);
+ break;
+ case ATC_L2_CACHE_32K:
+ dev_info(
+ adev->dev,
+ "GFX SubBlock UTC_ATCL2_CACHE_32K_BANK%d_WAY%d_MEM%d, SED %d, DED %d\n",
+ bank, way, mem, sec_cnt, ded_cnt);
+ break;
+ case ATC_L2_CACHE_4K:
+ dev_info(
+ adev->dev,
+ "GFX SubBlock UTC_ATCL2_CACHE_4K_BANK%d_WAY%d_MEM%d, SED %d, DED %d\n",
+ bank, way, mem, sec_cnt, ded_cnt);
+ break;
+ }
+}
+
+static int gfx_v9_4_2_query_utc_edc_count(struct amdgpu_device *adev,
+ uint32_t *sec_count,
+ uint32_t *ded_count)
+{
+ uint32_t i, j, data;
+ uint32_t sec_cnt, ded_cnt;
+ uint32_t num_instances;
+ struct gfx_v9_4_2_utc_block *blk;
+
+ if (sec_count && ded_count) {
+ *sec_count = 0;
+ *ded_count = 0;
+ }
+
+ for (i = 0; i < ARRAY_SIZE(gfx_v9_4_2_utc_blocks); i++) {
+ blk = &gfx_v9_4_2_utc_blocks[i];
+ num_instances =
+ blk->num_banks * blk->num_ways * blk->num_mem_blocks;
+ for (j = 0; j < num_instances; j++) {
+ WREG32(SOC15_REG_ENTRY_OFFSET(blk->idx_reg), j);
+
+ /* if sec/ded_count is NULL, just clear counter */
+ if (!sec_count || !ded_count) {
+ WREG32(SOC15_REG_ENTRY_OFFSET(blk->data_reg),
+ blk->clear);
+ continue;
+ }
+
+ data = RREG32(SOC15_REG_ENTRY_OFFSET(blk->data_reg));
+ if (!data)
+ continue;
+
+ sec_cnt = SOC15_RAS_REG_FIELD_VAL(data, *blk, sec);
+ *sec_count += sec_cnt;
+ ded_cnt = SOC15_RAS_REG_FIELD_VAL(data, *blk, ded);
+ *ded_count += ded_cnt;
+
+ /* clear counter after read */
+ WREG32(SOC15_REG_ENTRY_OFFSET(blk->data_reg),
+ blk->clear);
+
+ /* print the edc count */
+ if (sec_cnt || ded_cnt)
+ gfx_v9_4_2_log_utc_edc_count(adev, blk, j, sec_cnt,
+ ded_cnt);
+ }
+ }
+
+ return 0;
+}
+
+static void gfx_v9_4_2_query_ras_error_count(struct amdgpu_device *adev,
+ void *ras_error_status)
+{
+ struct ras_err_data *err_data = (struct ras_err_data *)ras_error_status;
+ uint32_t sec_count = 0, ded_count = 0;
+
+ if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX))
+ return;
+
+ err_data->ue_count = 0;
+ err_data->ce_count = 0;
+
+ gfx_v9_4_2_query_sram_edc_count(adev, &sec_count, &ded_count);
+ err_data->ce_count += sec_count;
+ err_data->ue_count += ded_count;
+
+ gfx_v9_4_2_query_utc_edc_count(adev, &sec_count, &ded_count);
+ err_data->ce_count += sec_count;
+ err_data->ue_count += ded_count;
+
+}
+
+static void gfx_v9_4_2_reset_utc_err_status(struct amdgpu_device *adev)
+{
+ WREG32_SOC15(GC, 0, regUTCL2_MEM_ECC_STATUS, 0x3);
+ WREG32_SOC15(GC, 0, regVML2_MEM_ECC_STATUS, 0x3);
+ WREG32_SOC15(GC, 0, regVML2_WALKER_MEM_ECC_STATUS, 0x3);
+}
+
+static void gfx_v9_4_2_reset_ea_err_status(struct amdgpu_device *adev)
+{
+ uint32_t i, j;
+ uint32_t value;
+
+ mutex_lock(&adev->grbm_idx_mutex);
+ for (i = 0; i < gfx_v9_4_2_ea_err_status_regs.se_num; i++) {
+ for (j = 0; j < gfx_v9_4_2_ea_err_status_regs.instance;
+ j++) {
+ gfx_v9_4_2_select_se_sh(adev, i, 0, j);
+ value = RREG32(SOC15_REG_ENTRY_OFFSET(
+ gfx_v9_4_2_ea_err_status_regs));
+ value = REG_SET_FIELD(value, GCEA_ERR_STATUS, CLEAR_ERROR_STATUS, 0x1);
+ WREG32(SOC15_REG_ENTRY_OFFSET(gfx_v9_4_2_ea_err_status_regs), value);
+ }
+ }
+ gfx_v9_4_2_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
+ mutex_unlock(&adev->grbm_idx_mutex);
+}
+
+static void gfx_v9_4_2_reset_ras_error_count(struct amdgpu_device *adev)
+{
+ if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX))
+ return;
+
+ gfx_v9_4_2_query_sram_edc_count(adev, NULL, NULL);
+ gfx_v9_4_2_query_utc_edc_count(adev, NULL, NULL);
+}
+
+static void gfx_v9_4_2_query_ea_err_status(struct amdgpu_device *adev)
+{
+ uint32_t i, j;
+ uint32_t reg_value;
+
+ mutex_lock(&adev->grbm_idx_mutex);
+
+ for (i = 0; i < gfx_v9_4_2_ea_err_status_regs.se_num; i++) {
+ for (j = 0; j < gfx_v9_4_2_ea_err_status_regs.instance;
+ j++) {
+ gfx_v9_4_2_select_se_sh(adev, i, 0, j);
+ reg_value = RREG32(SOC15_REG_ENTRY_OFFSET(
+ gfx_v9_4_2_ea_err_status_regs));
+
+ if (REG_GET_FIELD(reg_value, GCEA_ERR_STATUS, SDP_RDRSP_STATUS) ||
+ REG_GET_FIELD(reg_value, GCEA_ERR_STATUS, SDP_WRRSP_STATUS) ||
+ REG_GET_FIELD(reg_value, GCEA_ERR_STATUS, SDP_RDRSP_DATAPARITY_ERROR)) {
+ dev_warn(adev->dev, "GCEA err detected at instance: %d, status: 0x%x!\n",
+ j, reg_value);
+ }
+ /* clear after read */
+ reg_value = REG_SET_FIELD(reg_value, GCEA_ERR_STATUS,
+ CLEAR_ERROR_STATUS, 0x1);
+ WREG32(SOC15_REG_ENTRY_OFFSET(gfx_v9_4_2_ea_err_status_regs), reg_value);
+ }
+ }
+
+ gfx_v9_4_2_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
+ mutex_unlock(&adev->grbm_idx_mutex);
+}
+
+static void gfx_v9_4_2_query_utc_err_status(struct amdgpu_device *adev)
+{
+ uint32_t data;
+
+ data = RREG32_SOC15(GC, 0, regUTCL2_MEM_ECC_STATUS);
+ if (data) {
+ dev_warn(adev->dev, "GFX UTCL2 Mem Ecc Status: 0x%x!\n", data);
+ WREG32_SOC15(GC, 0, regUTCL2_MEM_ECC_STATUS, 0x3);
+ }
+
+ data = RREG32_SOC15(GC, 0, regVML2_MEM_ECC_STATUS);
+ if (data) {
+ dev_warn(adev->dev, "GFX VML2 Mem Ecc Status: 0x%x!\n", data);
+ WREG32_SOC15(GC, 0, regVML2_MEM_ECC_STATUS, 0x3);
+ }
+
+ data = RREG32_SOC15(GC, 0, regVML2_WALKER_MEM_ECC_STATUS);
+ if (data) {
+ dev_warn(adev->dev, "GFX VML2 Walker Mem Ecc Status: 0x%x!\n", data);
+ WREG32_SOC15(GC, 0, regVML2_WALKER_MEM_ECC_STATUS, 0x3);
+ }
+}
+
+static void gfx_v9_4_2_query_ras_error_status(struct amdgpu_device *adev)
+{
+ if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX))
+ return;
+
+ gfx_v9_4_2_query_ea_err_status(adev);
+ gfx_v9_4_2_query_utc_err_status(adev);
+ gfx_v9_4_2_query_sq_timeout_status(adev);
+}
+
+static void gfx_v9_4_2_reset_ras_error_status(struct amdgpu_device *adev)
+{
+ if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX))
+ return;
+
+ gfx_v9_4_2_reset_utc_err_status(adev);
+ gfx_v9_4_2_reset_ea_err_status(adev);
+ gfx_v9_4_2_reset_sq_timeout_status(adev);
+}
+
+static void gfx_v9_4_2_enable_watchdog_timer(struct amdgpu_device *adev)
+{
+ uint32_t i;
+ uint32_t data;
+
+ data = REG_SET_FIELD(0, SQ_TIMEOUT_CONFIG, TIMEOUT_FATAL_DISABLE,
+ amdgpu_watchdog_timer.timeout_fatal_disable ? 1 :
+ 0);
+
+ if (amdgpu_watchdog_timer.timeout_fatal_disable &&
+ (amdgpu_watchdog_timer.period < 1 ||
+ amdgpu_watchdog_timer.period > 0x23)) {
+ dev_warn(adev->dev, "Watchdog period range is 1 to 0x23\n");
+ amdgpu_watchdog_timer.period = 0x23;
+ }
+ data = REG_SET_FIELD(data, SQ_TIMEOUT_CONFIG, PERIOD_SEL,
+ amdgpu_watchdog_timer.period);
+
+ mutex_lock(&adev->grbm_idx_mutex);
+ for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
+ gfx_v9_4_2_select_se_sh(adev, i, 0xffffffff, 0xffffffff);
+ WREG32_SOC15(GC, 0, regSQ_TIMEOUT_CONFIG, data);
+ }
+ gfx_v9_4_2_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
+ mutex_unlock(&adev->grbm_idx_mutex);
+}
+
+static uint32_t wave_read_ind(struct amdgpu_device *adev, uint32_t simd, uint32_t wave, uint32_t address)
+{
+ WREG32_SOC15_RLC_EX(reg, GC, 0, regSQ_IND_INDEX,
+ (wave << SQ_IND_INDEX__WAVE_ID__SHIFT) |
+ (simd << SQ_IND_INDEX__SIMD_ID__SHIFT) |
+ (address << SQ_IND_INDEX__INDEX__SHIFT) |
+ (SQ_IND_INDEX__FORCE_READ_MASK));
+ return RREG32_SOC15(GC, 0, regSQ_IND_DATA);
+}
+
+static void gfx_v9_4_2_log_cu_timeout_status(struct amdgpu_device *adev,
+ uint32_t status)
+{
+ struct amdgpu_cu_info *cu_info = &adev->gfx.cu_info;
+ uint32_t i, simd, wave;
+ uint32_t wave_status;
+ uint32_t wave_pc_lo, wave_pc_hi;
+ uint32_t wave_exec_lo, wave_exec_hi;
+ uint32_t wave_inst_dw0, wave_inst_dw1;
+ uint32_t wave_ib_sts;
+
+ for (i = 0; i < 32; i++) {
+ if (!((i << 1) & status))
+ continue;
+
+ simd = i / cu_info->max_waves_per_simd;
+ wave = i % cu_info->max_waves_per_simd;
+
+ wave_status = wave_read_ind(adev, simd, wave, ixSQ_WAVE_STATUS);
+ wave_pc_lo = wave_read_ind(adev, simd, wave, ixSQ_WAVE_PC_LO);
+ wave_pc_hi = wave_read_ind(adev, simd, wave, ixSQ_WAVE_PC_HI);
+ wave_exec_lo =
+ wave_read_ind(adev, simd, wave, ixSQ_WAVE_EXEC_LO);
+ wave_exec_hi =
+ wave_read_ind(adev, simd, wave, ixSQ_WAVE_EXEC_HI);
+ wave_inst_dw0 =
+ wave_read_ind(adev, simd, wave, ixSQ_WAVE_INST_DW0);
+ wave_inst_dw1 =
+ wave_read_ind(adev, simd, wave, ixSQ_WAVE_INST_DW1);
+ wave_ib_sts = wave_read_ind(adev, simd, wave, ixSQ_WAVE_IB_STS);
+
+ dev_info(
+ adev->dev,
+ "\t SIMD %d, Wave %d: status 0x%x, pc 0x%llx, exec 0x%llx, inst 0x%llx, ib_sts 0x%x\n",
+ simd, wave, wave_status,
+ ((uint64_t)wave_pc_hi << 32 | wave_pc_lo),
+ ((uint64_t)wave_exec_hi << 32 | wave_exec_lo),
+ ((uint64_t)wave_inst_dw1 << 32 | wave_inst_dw0),
+ wave_ib_sts);
+ }
+}
+
+static void gfx_v9_4_2_query_sq_timeout_status(struct amdgpu_device *adev)
+{
+ uint32_t se_idx, sh_idx, cu_idx;
+ uint32_t status;
+
+ mutex_lock(&adev->grbm_idx_mutex);
+ for (se_idx = 0; se_idx < adev->gfx.config.max_shader_engines;
+ se_idx++) {
+ for (sh_idx = 0; sh_idx < adev->gfx.config.max_sh_per_se;
+ sh_idx++) {
+ for (cu_idx = 0;
+ cu_idx < adev->gfx.config.max_cu_per_sh;
+ cu_idx++) {
+ gfx_v9_4_2_select_se_sh(adev, se_idx, sh_idx,
+ cu_idx);
+ status = RREG32_SOC15(GC, 0,
+ regSQ_TIMEOUT_STATUS);
+ if (status != 0) {
+ dev_info(
+ adev->dev,
+ "GFX Watchdog Timeout: SE %d, SH %d, CU %d\n",
+ se_idx, sh_idx, cu_idx);
+ gfx_v9_4_2_log_cu_timeout_status(
+ adev, status);
+ }
+ /* clear old status */
+ WREG32_SOC15(GC, 0, regSQ_TIMEOUT_STATUS, 0);
+ }
+ }
+ }
+ gfx_v9_4_2_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
+ mutex_unlock(&adev->grbm_idx_mutex);
+}
+
+static void gfx_v9_4_2_reset_sq_timeout_status(struct amdgpu_device *adev)
+{
+ uint32_t se_idx, sh_idx, cu_idx;
+
+ mutex_lock(&adev->grbm_idx_mutex);
+ for (se_idx = 0; se_idx < adev->gfx.config.max_shader_engines;
+ se_idx++) {
+ for (sh_idx = 0; sh_idx < adev->gfx.config.max_sh_per_se;
+ sh_idx++) {
+ for (cu_idx = 0;
+ cu_idx < adev->gfx.config.max_cu_per_sh;
+ cu_idx++) {
+ gfx_v9_4_2_select_se_sh(adev, se_idx, sh_idx,
+ cu_idx);
+ WREG32_SOC15(GC, 0, regSQ_TIMEOUT_STATUS, 0);
+ }
+ }
+ }
+ gfx_v9_4_2_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
+ mutex_unlock(&adev->grbm_idx_mutex);
+}
+
+
+
+struct amdgpu_ras_block_hw_ops gfx_v9_4_2_ras_ops = {
+ .query_ras_error_count = &gfx_v9_4_2_query_ras_error_count,
+ .reset_ras_error_count = &gfx_v9_4_2_reset_ras_error_count,
+ .query_ras_error_status = &gfx_v9_4_2_query_ras_error_status,
+ .reset_ras_error_status = &gfx_v9_4_2_reset_ras_error_status,
+};
+
+struct amdgpu_gfx_ras gfx_v9_4_2_ras = {
+ .ras_block = {
+ .hw_ops = &gfx_v9_4_2_ras_ops,
+ },
+ .enable_watchdog_timer = &gfx_v9_4_2_enable_watchdog_timer,
+};
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_2.h b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_2.h
new file mode 100644
index 000000000000..a603724c1dfc
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_2.h
@@ -0,0 +1,37 @@
+/*
+ * Copyright 2020 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#ifndef __GFX_V9_4_2_H__
+#define __GFX_V9_4_2_H__
+
+void gfx_v9_4_2_debug_trap_config_init(struct amdgpu_device *adev,
+ uint32_t first_vmid, uint32_t last_vmid);
+void gfx_v9_4_2_init_golden_registers(struct amdgpu_device *adev,
+ uint32_t die_id);
+void gfx_v9_4_2_init_sq(struct amdgpu_device *adev);
+void gfx_v9_4_2_set_power_brake_sequence(struct amdgpu_device *adev);
+int gfx_v9_4_2_do_edc_gpr_workarounds(struct amdgpu_device *adev);
+
+extern struct amdgpu_gfx_ras gfx_v9_4_2_ras;
+
+#endif /* __GFX_V9_4_2_H__ */
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_2_cleaner_shader.asm b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_2_cleaner_shader.asm
new file mode 100644
index 000000000000..35b8cf9070bd
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_2_cleaner_shader.asm
@@ -0,0 +1,153 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright 2024 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+// This shader is to clean LDS, SGPRs and VGPRs. It is first 64 Dwords or 256 bytes of 192 Dwords cleaner shader.
+//To turn this shader program on for complitaion change this to main and lower shader main to main_1
+
+// MI200 : Clear SGPRs, VGPRs and LDS
+// Uses two kernels launched separately:
+// 1. Clean VGPRs, LDS, and lower SGPRs
+// Launches one workgroup per CU, each workgroup with 4x wave64 per SIMD in the CU
+// Waves are "wave64" and have 128 VGPRs each, which uses all 512 VGPRs per SIMD
+// Waves in the workgroup share the 64KB of LDS
+// Each wave clears SGPRs 0 - 95. Because there are 4 waves/SIMD, this is physical SGPRs 0-383
+// Each wave clears 128 VGPRs, so all 512 in the SIMD
+// The first wave of the workgroup clears its 64KB of LDS
+// The shader starts with "S_BARRIER" to ensure SPI has launched all waves of the workgroup
+// before any wave in the workgroup could end. Without this, it is possible not all SGPRs get cleared.
+// 2. Clean remaining SGPRs
+// Launches a workgroup with 24 waves per workgroup, yielding 6 waves per SIMD in each CU
+// Waves are allocating 96 SGPRs
+// CP sets up SPI_RESOURCE_RESERVE_* registers to prevent these waves from allocating SGPRs 0-223.
+// As such, these 6 waves per SIMD are allocated physical SGPRs 224-799
+// Barriers do not work for >16 waves per workgroup, so we cannot start with S_BARRIER
+// Instead, the shader starts with an S_SETHALT 1. Once all waves are launched CP will send unhalt command
+// The shader then clears all SGPRs allocated to it, cleaning out physical SGPRs 224-799
+
+shader main
+ asic(MI200)
+ type(CS)
+ wave_size(64)
+// Note: original source code from SQ team
+
+// (theorhetical fastest = ~512clks vgpr + 1536 lds + ~128 sgpr = 2176 clks)
+
+ s_cmp_eq_u32 s0, 1 // Bit0 is set, sgpr0 is set then clear VGPRS and LDS as FW set COMPUTE_USER_DATA_3
+ s_cbranch_scc0 label_0023 // Clean VGPRs and LDS if sgpr0 of wave is set, scc = (s3 == 1)
+ S_BARRIER
+
+ s_movk_i32 m0, 0x0000
+ s_mov_b32 s2, 0x00000078 // Loop 128/8=16 times (loop unrolled for performance)
+ //
+ // CLEAR VGPRs
+ //
+ s_set_gpr_idx_on s2, 0x8 // enable Dest VGPR indexing
+label_0005:
+ v_mov_b32 v0, 0
+ v_mov_b32 v1, 0
+ v_mov_b32 v2, 0
+ v_mov_b32 v3, 0
+ v_mov_b32 v4, 0
+ v_mov_b32 v5, 0
+ v_mov_b32 v6, 0
+ v_mov_b32 v7, 0
+ s_sub_u32 s2, s2, 8
+ s_set_gpr_idx_idx s2
+ s_cbranch_scc0 label_0005
+ s_set_gpr_idx_off
+
+ //
+ //
+
+ s_mov_b32 s2, 0x80000000 // Bit31 is first_wave
+ s_and_b32 s2, s2, s1 // sgpr0 has tg_size (first_wave) term as in ucode only COMPUTE_PGM_RSRC2.tg_size_en is set
+ s_cbranch_scc0 label_clean_sgpr_1 // Clean LDS if its first wave of ThreadGroup/WorkGroup
+ // CLEAR LDS
+ //
+ s_mov_b32 exec_lo, 0xffffffff
+ s_mov_b32 exec_hi, 0xffffffff
+ v_mbcnt_lo_u32_b32 v1, exec_hi, 0 // Set V1 to thread-ID (0..63)
+ v_mbcnt_hi_u32_b32 v1, exec_lo, v1 // Set V1 to thread-ID (0..63)
+ v_mul_u32_u24 v1, 0x00000008, v1 // * 8, so each thread is a double-dword address (8byte)
+ s_mov_b32 s2, 0x00000003f // 64 loop iterations
+ s_mov_b32 m0, 0xffffffff
+ // Clear all of LDS space
+ // Each FirstWave of WorkGroup clears 64kbyte block
+
+label_001F:
+ ds_write2_b64 v1, v[2:3], v[2:3] offset1:32
+ ds_write2_b64 v1, v[4:5], v[4:5] offset0:64 offset1:96
+ v_add_co_u32 v1, vcc, 0x00000400, v1
+ s_sub_u32 s2, s2, 1
+ s_cbranch_scc0 label_001F
+ //
+ // CLEAR SGPRs
+ //
+label_clean_sgpr_1:
+ s_mov_b32 m0, 0x0000005c // Loop 96/4=24 times (loop unrolled for performance)
+ s_nop 0
+label_sgpr_loop:
+ s_movreld_b32 s0, 0
+ s_movreld_b32 s1, 0
+ s_movreld_b32 s2, 0
+ s_movreld_b32 s3, 0
+ s_sub_u32 m0, m0, 4
+ s_cbranch_scc0 label_sgpr_loop
+
+ //clear vcc, flat scratch
+ s_mov_b32 flat_scratch_lo, 0 //clear flat scratch lo SGPR
+ s_mov_b32 flat_scratch_hi, 0 //clear flat scratch hi SGPR
+ s_mov_b64 vcc, 0 //clear vcc
+ s_mov_b64 ttmp0, 0 //Clear ttmp0 and ttmp1
+ s_mov_b64 ttmp2, 0 //Clear ttmp2 and ttmp3
+ s_mov_b64 ttmp4, 0 //Clear ttmp4 and ttmp5
+ s_mov_b64 ttmp6, 0 //Clear ttmp6 and ttmp7
+ s_mov_b64 ttmp8, 0 //Clear ttmp8 and ttmp9
+ s_mov_b64 ttmp10, 0 //Clear ttmp10 and ttmp11
+ s_mov_b64 ttmp12, 0 //Clear ttmp12 and ttmp13
+ s_mov_b64 ttmp14, 0 //Clear ttmp14 and ttmp15
+s_endpgm
+
+label_0023:
+
+ s_sethalt 1
+
+ s_mov_b32 m0, 0x0000005c // Loop 96/4=24 times (loop unrolled for performance)
+ s_nop 0
+label_sgpr_loop1:
+
+ s_movreld_b32 s0, 0
+ s_movreld_b32 s1, 0
+ s_movreld_b32 s2, 0
+ s_movreld_b32 s3, 0
+ s_sub_u32 m0, m0, 4
+ s_cbranch_scc0 label_sgpr_loop1
+
+ //clear vcc, flat scratch
+ s_mov_b32 flat_scratch_lo, 0 //clear flat scratch lo SGPR
+ s_mov_b32 flat_scratch_hi, 0 //clear flat scratch hi SGPR
+ s_mov_b64 vcc, 0xee //clear vcc
+
+s_endpgm
+end
+
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c
new file mode 100644
index 000000000000..cbb74ffc4792
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c
@@ -0,0 +1,5062 @@
+/*
+ * Copyright 2022 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+#include <linux/firmware.h>
+
+#include "amdgpu.h"
+#include "amdgpu_gfx.h"
+#include "soc15.h"
+#include "soc15d.h"
+#include "soc15_common.h"
+#include "vega10_enum.h"
+
+#include "v9_structs.h"
+
+#include "ivsrcid/gfx/irqsrcs_gfx_9_0.h"
+
+#include "gc/gc_9_4_3_offset.h"
+#include "gc/gc_9_4_3_sh_mask.h"
+
+#include "gfx_v9_4_3.h"
+#include "gfx_v9_4_3_cleaner_shader.h"
+#include "amdgpu_xcp.h"
+#include "amdgpu_aca.h"
+
+MODULE_FIRMWARE("amdgpu/gc_9_4_3_mec.bin");
+MODULE_FIRMWARE("amdgpu/gc_9_4_4_mec.bin");
+MODULE_FIRMWARE("amdgpu/gc_9_5_0_mec.bin");
+MODULE_FIRMWARE("amdgpu/gc_9_4_3_rlc.bin");
+MODULE_FIRMWARE("amdgpu/gc_9_4_4_rlc.bin");
+MODULE_FIRMWARE("amdgpu/gc_9_5_0_rlc.bin");
+MODULE_FIRMWARE("amdgpu/gc_9_4_3_sjt_mec.bin");
+MODULE_FIRMWARE("amdgpu/gc_9_4_4_sjt_mec.bin");
+
+#define GFX9_MEC_HPD_SIZE 4096
+#define RLCG_UCODE_LOADING_START_ADDRESS 0x00002000L
+
+#define GOLDEN_GB_ADDR_CONFIG 0x2a114042
+#define CP_HQD_PERSISTENT_STATE_DEFAULT 0xbe05301
+
+#define XCC_REG_RANGE_0_LOW 0x2000 /* XCC gfxdec0 lower Bound */
+#define XCC_REG_RANGE_0_HIGH 0x3400 /* XCC gfxdec0 upper Bound */
+#define XCC_REG_RANGE_1_LOW 0xA000 /* XCC gfxdec1 lower Bound */
+#define XCC_REG_RANGE_1_HIGH 0x10000 /* XCC gfxdec1 upper Bound */
+
+#define NORMALIZE_XCC_REG_OFFSET(offset) \
+ (offset & 0xFFFF)
+
+static const struct amdgpu_hwip_reg_entry gc_reg_list_9_4_3[] = {
+ SOC15_REG_ENTRY_STR(GC, 0, regGRBM_STATUS),
+ SOC15_REG_ENTRY_STR(GC, 0, regGRBM_STATUS2),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_STALLED_STAT1),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_STALLED_STAT2),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_CPC_STALLED_STAT1),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_CPF_STALLED_STAT1),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_BUSY_STAT),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_CPC_BUSY_STAT),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_CPF_BUSY_STAT),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_CPF_STATUS),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_ERROR),
+ SOC15_REG_ENTRY_STR(GC, 0, regCPF_UTCL1_STATUS),
+ SOC15_REG_ENTRY_STR(GC, 0, regCPC_UTCL1_STATUS),
+ SOC15_REG_ENTRY_STR(GC, 0, regCPG_UTCL1_STATUS),
+ SOC15_REG_ENTRY_STR(GC, 0, regGDS_PROTECTION_FAULT),
+ SOC15_REG_ENTRY_STR(GC, 0, regGDS_VM_PROTECTION_FAULT),
+ SOC15_REG_ENTRY_STR(GC, 0, regRLC_UTCL1_STATUS),
+ SOC15_REG_ENTRY_STR(GC, 0, regRMI_UTCL1_STATUS),
+ SOC15_REG_ENTRY_STR(GC, 0, regSQC_DCACHE_UTCL1_STATUS),
+ SOC15_REG_ENTRY_STR(GC, 0, regSQC_ICACHE_UTCL1_STATUS),
+ SOC15_REG_ENTRY_STR(GC, 0, regSQ_UTCL1_STATUS),
+ SOC15_REG_ENTRY_STR(GC, 0, regTCP_UTCL1_STATUS),
+ SOC15_REG_ENTRY_STR(GC, 0, regWD_UTCL1_STATUS),
+ SOC15_REG_ENTRY_STR(GC, 0, regVM_L2_PROTECTION_FAULT_CNTL),
+ SOC15_REG_ENTRY_STR(GC, 0, regVM_L2_PROTECTION_FAULT_STATUS),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_DEBUG),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_MEC_CNTL),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_MEC1_INSTR_PNTR),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_MEC2_INSTR_PNTR),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_CPC_STATUS),
+ SOC15_REG_ENTRY_STR(GC, 0, regRLC_STAT),
+ SOC15_REG_ENTRY_STR(GC, 0, regRLC_SMU_COMMAND),
+ SOC15_REG_ENTRY_STR(GC, 0, regRLC_SMU_MESSAGE),
+ SOC15_REG_ENTRY_STR(GC, 0, regRLC_SMU_ARGUMENT_1),
+ SOC15_REG_ENTRY_STR(GC, 0, regRLC_SMU_ARGUMENT_2),
+ SOC15_REG_ENTRY_STR(GC, 0, regSMU_RLC_RESPONSE),
+ SOC15_REG_ENTRY_STR(GC, 0, regRLC_SAFE_MODE),
+ SOC15_REG_ENTRY_STR(GC, 0, regRLC_SMU_SAFE_MODE),
+ SOC15_REG_ENTRY_STR(GC, 0, regRLC_INT_STAT),
+ SOC15_REG_ENTRY_STR(GC, 0, regRLC_GPM_GENERAL_6),
+ /* SE status registers */
+ SOC15_REG_ENTRY_STR(GC, 0, regGRBM_STATUS_SE0),
+ SOC15_REG_ENTRY_STR(GC, 0, regGRBM_STATUS_SE1),
+ SOC15_REG_ENTRY_STR(GC, 0, regGRBM_STATUS_SE2),
+ SOC15_REG_ENTRY_STR(GC, 0, regGRBM_STATUS_SE3)
+};
+
+static const struct amdgpu_hwip_reg_entry gc_cp_reg_list_9_4_3[] = {
+ /* compute queue registers */
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_VMID),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_ACTIVE),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_PERSISTENT_STATE),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_PIPE_PRIORITY),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_QUEUE_PRIORITY),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_QUANTUM),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_PQ_BASE),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_PQ_BASE_HI),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_PQ_RPTR),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_PQ_WPTR_POLL_ADDR),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_PQ_WPTR_POLL_ADDR_HI),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_PQ_DOORBELL_CONTROL),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_PQ_CONTROL),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_IB_BASE_ADDR),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_IB_BASE_ADDR_HI),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_IB_RPTR),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_IB_CONTROL),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_DEQUEUE_REQUEST),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_EOP_BASE_ADDR),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_EOP_BASE_ADDR_HI),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_EOP_CONTROL),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_EOP_RPTR),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_EOP_WPTR),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_EOP_EVENTS),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_CTX_SAVE_BASE_ADDR_LO),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_CTX_SAVE_BASE_ADDR_HI),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_CTX_SAVE_CONTROL),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_CNTL_STACK_OFFSET),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_CNTL_STACK_SIZE),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_WG_STATE_OFFSET),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_CTX_SAVE_SIZE),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_GDS_RESOURCE_STATE),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_ERROR),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_EOP_WPTR_MEM),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_PQ_WPTR_LO),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_PQ_WPTR_HI),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_GFX_STATUS),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_MEC_ME1_HEADER_DUMP),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_MEC_ME1_HEADER_DUMP),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_MEC_ME1_HEADER_DUMP),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_MEC_ME1_HEADER_DUMP),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_MEC_ME1_HEADER_DUMP),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_MEC_ME1_HEADER_DUMP),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_MEC_ME1_HEADER_DUMP),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_MEC_ME1_HEADER_DUMP),
+};
+
+struct amdgpu_gfx_ras gfx_v9_4_3_ras;
+
+static void gfx_v9_4_3_set_ring_funcs(struct amdgpu_device *adev);
+static void gfx_v9_4_3_set_irq_funcs(struct amdgpu_device *adev);
+static void gfx_v9_4_3_set_gds_init(struct amdgpu_device *adev);
+static void gfx_v9_4_3_set_rlc_funcs(struct amdgpu_device *adev);
+static int gfx_v9_4_3_get_cu_info(struct amdgpu_device *adev,
+ struct amdgpu_cu_info *cu_info);
+static void gfx_v9_4_3_xcc_set_safe_mode(struct amdgpu_device *adev, int xcc_id);
+static void gfx_v9_4_3_xcc_unset_safe_mode(struct amdgpu_device *adev, int xcc_id);
+
+static void gfx_v9_4_3_kiq_set_resources(struct amdgpu_ring *kiq_ring,
+ uint64_t queue_mask)
+{
+ struct amdgpu_device *adev = kiq_ring->adev;
+ u64 shader_mc_addr;
+
+ /* Cleaner shader MC address */
+ shader_mc_addr = adev->gfx.cleaner_shader_gpu_addr >> 8;
+
+ amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_SET_RESOURCES, 6));
+ amdgpu_ring_write(kiq_ring,
+ PACKET3_SET_RESOURCES_VMID_MASK(0) |
+ /* vmid_mask:0* queue_type:0 (KIQ) */
+ PACKET3_SET_RESOURCES_QUEUE_TYPE(0));
+ amdgpu_ring_write(kiq_ring,
+ lower_32_bits(queue_mask)); /* queue mask lo */
+ amdgpu_ring_write(kiq_ring,
+ upper_32_bits(queue_mask)); /* queue mask hi */
+ amdgpu_ring_write(kiq_ring, lower_32_bits(shader_mc_addr)); /* cleaner shader addr lo */
+ amdgpu_ring_write(kiq_ring, upper_32_bits(shader_mc_addr)); /* cleaner shader addr hi */
+ amdgpu_ring_write(kiq_ring, 0); /* oac mask */
+ amdgpu_ring_write(kiq_ring, 0); /* gds heap base:0, gds heap size:0 */
+}
+
+static void gfx_v9_4_3_kiq_map_queues(struct amdgpu_ring *kiq_ring,
+ struct amdgpu_ring *ring)
+{
+ struct amdgpu_device *adev = kiq_ring->adev;
+ uint64_t mqd_addr = amdgpu_bo_gpu_offset(ring->mqd_obj);
+ uint64_t wptr_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
+ uint32_t eng_sel = ring->funcs->type == AMDGPU_RING_TYPE_GFX ? 4 : 0;
+
+ amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_MAP_QUEUES, 5));
+ /* Q_sel:0, vmid:0, vidmem: 1, engine:0, num_Q:1*/
+ amdgpu_ring_write(kiq_ring, /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */
+ PACKET3_MAP_QUEUES_QUEUE_SEL(0) | /* Queue_Sel */
+ PACKET3_MAP_QUEUES_VMID(0) | /* VMID */
+ PACKET3_MAP_QUEUES_QUEUE(ring->queue) |
+ PACKET3_MAP_QUEUES_PIPE(ring->pipe) |
+ PACKET3_MAP_QUEUES_ME((ring->me == 1 ? 0 : 1)) |
+ /*queue_type: normal compute queue */
+ PACKET3_MAP_QUEUES_QUEUE_TYPE(0) |
+ /* alloc format: all_on_one_pipe */
+ PACKET3_MAP_QUEUES_ALLOC_FORMAT(0) |
+ PACKET3_MAP_QUEUES_ENGINE_SEL(eng_sel) |
+ /* num_queues: must be 1 */
+ PACKET3_MAP_QUEUES_NUM_QUEUES(1));
+ amdgpu_ring_write(kiq_ring,
+ PACKET3_MAP_QUEUES_DOORBELL_OFFSET(ring->doorbell_index));
+ amdgpu_ring_write(kiq_ring, lower_32_bits(mqd_addr));
+ amdgpu_ring_write(kiq_ring, upper_32_bits(mqd_addr));
+ amdgpu_ring_write(kiq_ring, lower_32_bits(wptr_addr));
+ amdgpu_ring_write(kiq_ring, upper_32_bits(wptr_addr));
+}
+
+static void gfx_v9_4_3_kiq_unmap_queues(struct amdgpu_ring *kiq_ring,
+ struct amdgpu_ring *ring,
+ enum amdgpu_unmap_queues_action action,
+ u64 gpu_addr, u64 seq)
+{
+ uint32_t eng_sel = ring->funcs->type == AMDGPU_RING_TYPE_GFX ? 4 : 0;
+
+ amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_UNMAP_QUEUES, 4));
+ amdgpu_ring_write(kiq_ring, /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */
+ PACKET3_UNMAP_QUEUES_ACTION(action) |
+ PACKET3_UNMAP_QUEUES_QUEUE_SEL(0) |
+ PACKET3_UNMAP_QUEUES_ENGINE_SEL(eng_sel) |
+ PACKET3_UNMAP_QUEUES_NUM_QUEUES(1));
+ amdgpu_ring_write(kiq_ring,
+ PACKET3_UNMAP_QUEUES_DOORBELL_OFFSET0(ring->doorbell_index));
+
+ if (action == PREEMPT_QUEUES_NO_UNMAP) {
+ amdgpu_ring_write(kiq_ring, lower_32_bits(gpu_addr));
+ amdgpu_ring_write(kiq_ring, upper_32_bits(gpu_addr));
+ amdgpu_ring_write(kiq_ring, seq);
+ } else {
+ amdgpu_ring_write(kiq_ring, 0);
+ amdgpu_ring_write(kiq_ring, 0);
+ amdgpu_ring_write(kiq_ring, 0);
+ }
+}
+
+static void gfx_v9_4_3_kiq_query_status(struct amdgpu_ring *kiq_ring,
+ struct amdgpu_ring *ring,
+ u64 addr,
+ u64 seq)
+{
+ uint32_t eng_sel = ring->funcs->type == AMDGPU_RING_TYPE_GFX ? 4 : 0;
+
+ amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_QUERY_STATUS, 5));
+ amdgpu_ring_write(kiq_ring,
+ PACKET3_QUERY_STATUS_CONTEXT_ID(0) |
+ PACKET3_QUERY_STATUS_INTERRUPT_SEL(0) |
+ PACKET3_QUERY_STATUS_COMMAND(2));
+ /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */
+ amdgpu_ring_write(kiq_ring,
+ PACKET3_QUERY_STATUS_DOORBELL_OFFSET(ring->doorbell_index) |
+ PACKET3_QUERY_STATUS_ENG_SEL(eng_sel));
+ amdgpu_ring_write(kiq_ring, lower_32_bits(addr));
+ amdgpu_ring_write(kiq_ring, upper_32_bits(addr));
+ amdgpu_ring_write(kiq_ring, lower_32_bits(seq));
+ amdgpu_ring_write(kiq_ring, upper_32_bits(seq));
+}
+
+static void gfx_v9_4_3_kiq_invalidate_tlbs(struct amdgpu_ring *kiq_ring,
+ uint16_t pasid, uint32_t flush_type,
+ bool all_hub)
+{
+ amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_INVALIDATE_TLBS, 0));
+ amdgpu_ring_write(kiq_ring,
+ PACKET3_INVALIDATE_TLBS_DST_SEL(1) |
+ PACKET3_INVALIDATE_TLBS_ALL_HUB(all_hub) |
+ PACKET3_INVALIDATE_TLBS_PASID(pasid) |
+ PACKET3_INVALIDATE_TLBS_FLUSH_TYPE(flush_type));
+}
+
+static void gfx_v9_4_3_kiq_reset_hw_queue(struct amdgpu_ring *kiq_ring, uint32_t queue_type,
+ uint32_t me_id, uint32_t pipe_id, uint32_t queue_id,
+ uint32_t xcc_id, uint32_t vmid)
+{
+ struct amdgpu_device *adev = kiq_ring->adev;
+ unsigned i;
+
+ /* enter save mode */
+ amdgpu_gfx_rlc_enter_safe_mode(adev, xcc_id);
+ mutex_lock(&adev->srbm_mutex);
+ soc15_grbm_select(adev, me_id, pipe_id, queue_id, 0, xcc_id);
+
+ if (queue_type == AMDGPU_RING_TYPE_COMPUTE) {
+ WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_HQD_DEQUEUE_REQUEST, 0x2);
+ WREG32_SOC15(GC, GET_INST(GC, xcc_id), regSPI_COMPUTE_QUEUE_RESET, 0x1);
+ /* wait till dequeue take effects */
+ for (i = 0; i < adev->usec_timeout; i++) {
+ if (!(RREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_HQD_ACTIVE) & 1))
+ break;
+ udelay(1);
+ }
+ if (i >= adev->usec_timeout)
+ dev_err(adev->dev, "fail to wait on hqd deactive\n");
+ } else {
+ dev_err(adev->dev, "reset queue_type(%d) not supported\n\n", queue_type);
+ }
+
+ soc15_grbm_select(adev, 0, 0, 0, 0, 0);
+ mutex_unlock(&adev->srbm_mutex);
+ /* exit safe mode */
+ amdgpu_gfx_rlc_exit_safe_mode(adev, xcc_id);
+}
+
+static const struct kiq_pm4_funcs gfx_v9_4_3_kiq_pm4_funcs = {
+ .kiq_set_resources = gfx_v9_4_3_kiq_set_resources,
+ .kiq_map_queues = gfx_v9_4_3_kiq_map_queues,
+ .kiq_unmap_queues = gfx_v9_4_3_kiq_unmap_queues,
+ .kiq_query_status = gfx_v9_4_3_kiq_query_status,
+ .kiq_invalidate_tlbs = gfx_v9_4_3_kiq_invalidate_tlbs,
+ .kiq_reset_hw_queue = gfx_v9_4_3_kiq_reset_hw_queue,
+ .set_resources_size = 8,
+ .map_queues_size = 7,
+ .unmap_queues_size = 6,
+ .query_status_size = 7,
+ .invalidate_tlbs_size = 2,
+};
+
+static void gfx_v9_4_3_set_kiq_pm4_funcs(struct amdgpu_device *adev)
+{
+ int i, num_xcc;
+
+ num_xcc = NUM_XCC(adev->gfx.xcc_mask);
+ for (i = 0; i < num_xcc; i++)
+ adev->gfx.kiq[i].pmf = &gfx_v9_4_3_kiq_pm4_funcs;
+}
+
+static void gfx_v9_4_3_init_golden_registers(struct amdgpu_device *adev)
+{
+ int i, num_xcc, dev_inst;
+
+ num_xcc = NUM_XCC(adev->gfx.xcc_mask);
+ for (i = 0; i < num_xcc; i++) {
+ dev_inst = GET_INST(GC, i);
+
+ WREG32_SOC15(GC, dev_inst, regGB_ADDR_CONFIG,
+ GOLDEN_GB_ADDR_CONFIG);
+ WREG32_FIELD15_PREREG(GC, dev_inst, TCP_UTCL1_CNTL2, SPARE, 0x1);
+ }
+}
+
+static uint32_t gfx_v9_4_3_normalize_xcc_reg_offset(uint32_t reg)
+{
+ uint32_t normalized_reg = NORMALIZE_XCC_REG_OFFSET(reg);
+
+ /* If it is an XCC reg, normalize the reg to keep
+ lower 16 bits in local xcc */
+
+ if (((normalized_reg >= XCC_REG_RANGE_0_LOW) && (normalized_reg < XCC_REG_RANGE_0_HIGH)) ||
+ ((normalized_reg >= XCC_REG_RANGE_1_LOW) && (normalized_reg < XCC_REG_RANGE_1_HIGH)))
+ return normalized_reg;
+ else
+ return reg;
+}
+
+static void gfx_v9_4_3_write_data_to_reg(struct amdgpu_ring *ring, int eng_sel,
+ bool wc, uint32_t reg, uint32_t val)
+{
+ reg = gfx_v9_4_3_normalize_xcc_reg_offset(reg);
+ amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
+ amdgpu_ring_write(ring, WRITE_DATA_ENGINE_SEL(eng_sel) |
+ WRITE_DATA_DST_SEL(0) |
+ (wc ? WR_CONFIRM : 0));
+ amdgpu_ring_write(ring, reg);
+ amdgpu_ring_write(ring, 0);
+ amdgpu_ring_write(ring, val);
+}
+
+static void gfx_v9_4_3_wait_reg_mem(struct amdgpu_ring *ring, int eng_sel,
+ int mem_space, int opt, uint32_t addr0,
+ uint32_t addr1, uint32_t ref, uint32_t mask,
+ uint32_t inv)
+{
+ /* Only do the normalization on regspace */
+ if (mem_space == 0) {
+ addr0 = gfx_v9_4_3_normalize_xcc_reg_offset(addr0);
+ addr1 = gfx_v9_4_3_normalize_xcc_reg_offset(addr1);
+ }
+
+ amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
+ amdgpu_ring_write(ring,
+ /* memory (1) or register (0) */
+ (WAIT_REG_MEM_MEM_SPACE(mem_space) |
+ WAIT_REG_MEM_OPERATION(opt) | /* wait */
+ WAIT_REG_MEM_FUNCTION(3) | /* equal */
+ WAIT_REG_MEM_ENGINE(eng_sel)));
+
+ if (mem_space)
+ BUG_ON(addr0 & 0x3); /* Dword align */
+ amdgpu_ring_write(ring, addr0);
+ amdgpu_ring_write(ring, addr1);
+ amdgpu_ring_write(ring, ref);
+ amdgpu_ring_write(ring, mask);
+ amdgpu_ring_write(ring, inv); /* poll interval */
+}
+
+static int gfx_v9_4_3_ring_test_ring(struct amdgpu_ring *ring)
+{
+ uint32_t scratch_reg0_offset, xcc_offset;
+ struct amdgpu_device *adev = ring->adev;
+ uint32_t tmp = 0;
+ unsigned i;
+ int r;
+
+ /* Use register offset which is local to XCC in the packet */
+ xcc_offset = SOC15_REG_OFFSET(GC, 0, regSCRATCH_REG0);
+ scratch_reg0_offset = SOC15_REG_OFFSET(GC, GET_INST(GC, ring->xcc_id), regSCRATCH_REG0);
+ WREG32(scratch_reg0_offset, 0xCAFEDEAD);
+ tmp = RREG32(scratch_reg0_offset);
+
+ r = amdgpu_ring_alloc(ring, 3);
+ if (r)
+ return r;
+
+ amdgpu_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
+ amdgpu_ring_write(ring, xcc_offset - PACKET3_SET_UCONFIG_REG_START);
+ amdgpu_ring_write(ring, 0xDEADBEEF);
+ amdgpu_ring_commit(ring);
+
+ for (i = 0; i < adev->usec_timeout; i++) {
+ tmp = RREG32(scratch_reg0_offset);
+ if (tmp == 0xDEADBEEF)
+ break;
+ udelay(1);
+ }
+
+ if (i >= adev->usec_timeout)
+ r = -ETIMEDOUT;
+ return r;
+}
+
+static int gfx_v9_4_3_ring_test_ib(struct amdgpu_ring *ring, long timeout)
+{
+ struct amdgpu_device *adev = ring->adev;
+ struct amdgpu_ib ib;
+ struct dma_fence *f = NULL;
+
+ unsigned index;
+ uint64_t gpu_addr;
+ uint32_t tmp;
+ long r;
+
+ r = amdgpu_device_wb_get(adev, &index);
+ if (r)
+ return r;
+
+ gpu_addr = adev->wb.gpu_addr + (index * 4);
+ adev->wb.wb[index] = cpu_to_le32(0xCAFEDEAD);
+ memset(&ib, 0, sizeof(ib));
+
+ r = amdgpu_ib_get(adev, NULL, 20, AMDGPU_IB_POOL_DIRECT, &ib);
+ if (r)
+ goto err1;
+
+ ib.ptr[0] = PACKET3(PACKET3_WRITE_DATA, 3);
+ ib.ptr[1] = WRITE_DATA_DST_SEL(5) | WR_CONFIRM;
+ ib.ptr[2] = lower_32_bits(gpu_addr);
+ ib.ptr[3] = upper_32_bits(gpu_addr);
+ ib.ptr[4] = 0xDEADBEEF;
+ ib.length_dw = 5;
+
+ r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f);
+ if (r)
+ goto err2;
+
+ r = dma_fence_wait_timeout(f, false, timeout);
+ if (r == 0) {
+ r = -ETIMEDOUT;
+ goto err2;
+ } else if (r < 0) {
+ goto err2;
+ }
+
+ tmp = adev->wb.wb[index];
+ if (tmp == 0xDEADBEEF)
+ r = 0;
+ else
+ r = -EINVAL;
+
+err2:
+ amdgpu_ib_free(&ib, NULL);
+ dma_fence_put(f);
+err1:
+ amdgpu_device_wb_free(adev, index);
+ return r;
+}
+
+
+/* This value might differs per partition */
+static uint64_t gfx_v9_4_3_get_gpu_clock_counter(struct amdgpu_device *adev)
+{
+ uint64_t clock;
+
+ mutex_lock(&adev->gfx.gpu_clock_mutex);
+ WREG32_SOC15(GC, GET_INST(GC, 0), regRLC_CAPTURE_GPU_CLOCK_COUNT, 1);
+ clock = (uint64_t)RREG32_SOC15(GC, GET_INST(GC, 0), regRLC_GPU_CLOCK_COUNT_LSB) |
+ ((uint64_t)RREG32_SOC15(GC, GET_INST(GC, 0), regRLC_GPU_CLOCK_COUNT_MSB) << 32ULL);
+ mutex_unlock(&adev->gfx.gpu_clock_mutex);
+
+ return clock;
+}
+
+static void gfx_v9_4_3_free_microcode(struct amdgpu_device *adev)
+{
+ amdgpu_ucode_release(&adev->gfx.pfp_fw);
+ amdgpu_ucode_release(&adev->gfx.me_fw);
+ amdgpu_ucode_release(&adev->gfx.ce_fw);
+ amdgpu_ucode_release(&adev->gfx.rlc_fw);
+ amdgpu_ucode_release(&adev->gfx.mec_fw);
+ amdgpu_ucode_release(&adev->gfx.mec2_fw);
+
+ kfree(adev->gfx.rlc.register_list_format);
+}
+
+static int gfx_v9_4_3_init_rlc_microcode(struct amdgpu_device *adev,
+ const char *chip_name)
+{
+ int err;
+ const struct rlc_firmware_header_v2_0 *rlc_hdr;
+ uint16_t version_major;
+ uint16_t version_minor;
+
+
+ err = amdgpu_ucode_request(adev, &adev->gfx.rlc_fw,
+ AMDGPU_UCODE_REQUIRED,
+ "amdgpu/%s_rlc.bin", chip_name);
+ if (err)
+ goto out;
+ rlc_hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data;
+
+ version_major = le16_to_cpu(rlc_hdr->header.header_version_major);
+ version_minor = le16_to_cpu(rlc_hdr->header.header_version_minor);
+ err = amdgpu_gfx_rlc_init_microcode(adev, version_major, version_minor);
+out:
+ if (err)
+ amdgpu_ucode_release(&adev->gfx.rlc_fw);
+
+ return err;
+}
+
+static int gfx_v9_4_3_init_cp_compute_microcode(struct amdgpu_device *adev,
+ const char *chip_name)
+{
+ int err;
+
+ if (amdgpu_sriov_vf(adev)) {
+ err = amdgpu_ucode_request(adev, &adev->gfx.mec_fw,
+ AMDGPU_UCODE_REQUIRED,
+ "amdgpu/%s_sjt_mec.bin", chip_name);
+
+ if (err)
+ err = amdgpu_ucode_request(adev, &adev->gfx.mec_fw,
+ AMDGPU_UCODE_REQUIRED,
+ "amdgpu/%s_mec.bin", chip_name);
+ } else
+ err = amdgpu_ucode_request(adev, &adev->gfx.mec_fw,
+ AMDGPU_UCODE_REQUIRED,
+ "amdgpu/%s_mec.bin", chip_name);
+ if (err)
+ goto out;
+ amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_MEC1);
+ amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_MEC1_JT);
+
+ adev->gfx.mec2_fw_version = adev->gfx.mec_fw_version;
+ adev->gfx.mec2_feature_version = adev->gfx.mec_feature_version;
+
+out:
+ if (err)
+ amdgpu_ucode_release(&adev->gfx.mec_fw);
+ return err;
+}
+
+static int gfx_v9_4_3_init_microcode(struct amdgpu_device *adev)
+{
+ char ucode_prefix[15];
+ int r;
+
+ amdgpu_ucode_ip_version_decode(adev, GC_HWIP, ucode_prefix, sizeof(ucode_prefix));
+
+ r = gfx_v9_4_3_init_rlc_microcode(adev, ucode_prefix);
+ if (r)
+ return r;
+
+ r = gfx_v9_4_3_init_cp_compute_microcode(adev, ucode_prefix);
+ if (r)
+ return r;
+
+ return r;
+}
+
+static void gfx_v9_4_3_mec_fini(struct amdgpu_device *adev)
+{
+ amdgpu_bo_free_kernel(&adev->gfx.mec.hpd_eop_obj, NULL, NULL);
+ amdgpu_bo_free_kernel(&adev->gfx.mec.mec_fw_obj, NULL, NULL);
+}
+
+static int gfx_v9_4_3_mec_init(struct amdgpu_device *adev)
+{
+ int r, i, num_xcc;
+ u32 *hpd;
+ const __le32 *fw_data;
+ unsigned fw_size;
+ u32 *fw;
+ size_t mec_hpd_size;
+
+ const struct gfx_firmware_header_v1_0 *mec_hdr;
+
+ num_xcc = NUM_XCC(adev->gfx.xcc_mask);
+ for (i = 0; i < num_xcc; i++)
+ bitmap_zero(adev->gfx.mec_bitmap[i].queue_bitmap,
+ AMDGPU_MAX_COMPUTE_QUEUES);
+
+ /* take ownership of the relevant compute queues */
+ amdgpu_gfx_compute_queue_acquire(adev);
+ mec_hpd_size =
+ adev->gfx.num_compute_rings * num_xcc * GFX9_MEC_HPD_SIZE;
+ if (mec_hpd_size) {
+ r = amdgpu_bo_create_reserved(adev, mec_hpd_size, PAGE_SIZE,
+ AMDGPU_GEM_DOMAIN_VRAM |
+ AMDGPU_GEM_DOMAIN_GTT,
+ &adev->gfx.mec.hpd_eop_obj,
+ &adev->gfx.mec.hpd_eop_gpu_addr,
+ (void **)&hpd);
+ if (r) {
+ dev_warn(adev->dev, "(%d) create HDP EOP bo failed\n", r);
+ gfx_v9_4_3_mec_fini(adev);
+ return r;
+ }
+
+ if (amdgpu_emu_mode == 1) {
+ for (i = 0; i < mec_hpd_size / 4; i++) {
+ memset((void *)(hpd + i), 0, 4);
+ if (i % 50 == 0)
+ msleep(1);
+ }
+ } else {
+ memset(hpd, 0, mec_hpd_size);
+ }
+
+ amdgpu_bo_kunmap(adev->gfx.mec.hpd_eop_obj);
+ amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj);
+ }
+
+ mec_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
+
+ fw_data = (const __le32 *)
+ (adev->gfx.mec_fw->data +
+ le32_to_cpu(mec_hdr->header.ucode_array_offset_bytes));
+ fw_size = le32_to_cpu(mec_hdr->header.ucode_size_bytes);
+
+ r = amdgpu_bo_create_reserved(adev, mec_hdr->header.ucode_size_bytes,
+ PAGE_SIZE, AMDGPU_GEM_DOMAIN_GTT,
+ &adev->gfx.mec.mec_fw_obj,
+ &adev->gfx.mec.mec_fw_gpu_addr,
+ (void **)&fw);
+ if (r) {
+ dev_warn(adev->dev, "(%d) create mec firmware bo failed\n", r);
+ gfx_v9_4_3_mec_fini(adev);
+ return r;
+ }
+
+ memcpy(fw, fw_data, fw_size);
+
+ amdgpu_bo_kunmap(adev->gfx.mec.mec_fw_obj);
+ amdgpu_bo_unreserve(adev->gfx.mec.mec_fw_obj);
+
+ return 0;
+}
+
+static void gfx_v9_4_3_xcc_select_se_sh(struct amdgpu_device *adev, u32 se_num,
+ u32 sh_num, u32 instance, int xcc_id)
+{
+ u32 data;
+
+ if (instance == 0xffffffff)
+ data = REG_SET_FIELD(0, GRBM_GFX_INDEX,
+ INSTANCE_BROADCAST_WRITES, 1);
+ else
+ data = REG_SET_FIELD(0, GRBM_GFX_INDEX,
+ INSTANCE_INDEX, instance);
+
+ if (se_num == 0xffffffff)
+ data = REG_SET_FIELD(data, GRBM_GFX_INDEX,
+ SE_BROADCAST_WRITES, 1);
+ else
+ data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_INDEX, se_num);
+
+ if (sh_num == 0xffffffff)
+ data = REG_SET_FIELD(data, GRBM_GFX_INDEX,
+ SH_BROADCAST_WRITES, 1);
+ else
+ data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_INDEX, sh_num);
+
+ WREG32_SOC15_RLC_SHADOW_EX(reg, GC, GET_INST(GC, xcc_id), regGRBM_GFX_INDEX, data);
+}
+
+static uint32_t wave_read_ind(struct amdgpu_device *adev, uint32_t xcc_id, uint32_t simd, uint32_t wave, uint32_t address)
+{
+ WREG32_SOC15_RLC(GC, GET_INST(GC, xcc_id), regSQ_IND_INDEX,
+ (wave << SQ_IND_INDEX__WAVE_ID__SHIFT) |
+ (simd << SQ_IND_INDEX__SIMD_ID__SHIFT) |
+ (address << SQ_IND_INDEX__INDEX__SHIFT) |
+ (SQ_IND_INDEX__FORCE_READ_MASK));
+ return RREG32_SOC15(GC, GET_INST(GC, xcc_id), regSQ_IND_DATA);
+}
+
+static void wave_read_regs(struct amdgpu_device *adev, uint32_t xcc_id, uint32_t simd,
+ uint32_t wave, uint32_t thread,
+ uint32_t regno, uint32_t num, uint32_t *out)
+{
+ WREG32_SOC15_RLC(GC, GET_INST(GC, xcc_id), regSQ_IND_INDEX,
+ (wave << SQ_IND_INDEX__WAVE_ID__SHIFT) |
+ (simd << SQ_IND_INDEX__SIMD_ID__SHIFT) |
+ (regno << SQ_IND_INDEX__INDEX__SHIFT) |
+ (thread << SQ_IND_INDEX__THREAD_ID__SHIFT) |
+ (SQ_IND_INDEX__FORCE_READ_MASK) |
+ (SQ_IND_INDEX__AUTO_INCR_MASK));
+ while (num--)
+ *(out++) = RREG32_SOC15(GC, GET_INST(GC, xcc_id), regSQ_IND_DATA);
+}
+
+static void gfx_v9_4_3_read_wave_data(struct amdgpu_device *adev,
+ uint32_t xcc_id, uint32_t simd, uint32_t wave,
+ uint32_t *dst, int *no_fields)
+{
+ /* type 1 wave data */
+ dst[(*no_fields)++] = 1;
+ dst[(*no_fields)++] = wave_read_ind(adev, xcc_id, simd, wave, ixSQ_WAVE_STATUS);
+ dst[(*no_fields)++] = wave_read_ind(adev, xcc_id, simd, wave, ixSQ_WAVE_PC_LO);
+ dst[(*no_fields)++] = wave_read_ind(adev, xcc_id, simd, wave, ixSQ_WAVE_PC_HI);
+ dst[(*no_fields)++] = wave_read_ind(adev, xcc_id, simd, wave, ixSQ_WAVE_EXEC_LO);
+ dst[(*no_fields)++] = wave_read_ind(adev, xcc_id, simd, wave, ixSQ_WAVE_EXEC_HI);
+ dst[(*no_fields)++] = wave_read_ind(adev, xcc_id, simd, wave, ixSQ_WAVE_HW_ID);
+ dst[(*no_fields)++] = wave_read_ind(adev, xcc_id, simd, wave, ixSQ_WAVE_INST_DW0);
+ dst[(*no_fields)++] = wave_read_ind(adev, xcc_id, simd, wave, ixSQ_WAVE_INST_DW1);
+ dst[(*no_fields)++] = wave_read_ind(adev, xcc_id, simd, wave, ixSQ_WAVE_GPR_ALLOC);
+ dst[(*no_fields)++] = wave_read_ind(adev, xcc_id, simd, wave, ixSQ_WAVE_LDS_ALLOC);
+ dst[(*no_fields)++] = wave_read_ind(adev, xcc_id, simd, wave, ixSQ_WAVE_TRAPSTS);
+ dst[(*no_fields)++] = wave_read_ind(adev, xcc_id, simd, wave, ixSQ_WAVE_IB_STS);
+ dst[(*no_fields)++] = wave_read_ind(adev, xcc_id, simd, wave, ixSQ_WAVE_IB_DBG0);
+ dst[(*no_fields)++] = wave_read_ind(adev, xcc_id, simd, wave, ixSQ_WAVE_M0);
+ dst[(*no_fields)++] = wave_read_ind(adev, xcc_id, simd, wave, ixSQ_WAVE_MODE);
+}
+
+static void gfx_v9_4_3_read_wave_sgprs(struct amdgpu_device *adev, uint32_t xcc_id, uint32_t simd,
+ uint32_t wave, uint32_t start,
+ uint32_t size, uint32_t *dst)
+{
+ wave_read_regs(adev, xcc_id, simd, wave, 0,
+ start + SQIND_WAVE_SGPRS_OFFSET, size, dst);
+}
+
+static void gfx_v9_4_3_read_wave_vgprs(struct amdgpu_device *adev, uint32_t xcc_id, uint32_t simd,
+ uint32_t wave, uint32_t thread,
+ uint32_t start, uint32_t size,
+ uint32_t *dst)
+{
+ wave_read_regs(adev, xcc_id, simd, wave, thread,
+ start + SQIND_WAVE_VGPRS_OFFSET, size, dst);
+}
+
+static void gfx_v9_4_3_select_me_pipe_q(struct amdgpu_device *adev,
+ u32 me, u32 pipe, u32 q, u32 vm, u32 xcc_id)
+{
+ soc15_grbm_select(adev, me, pipe, q, vm, GET_INST(GC, xcc_id));
+}
+
+static int gfx_v9_4_3_get_xccs_per_xcp(struct amdgpu_device *adev)
+{
+ u32 xcp_ctl;
+
+ /* Value is expected to be the same on all, fetch from first instance */
+ xcp_ctl = RREG32_SOC15(GC, GET_INST(GC, 0), regCP_HYP_XCP_CTL);
+
+ return REG_GET_FIELD(xcp_ctl, CP_HYP_XCP_CTL, NUM_XCC_IN_XCP);
+}
+
+static int gfx_v9_4_3_switch_compute_partition(struct amdgpu_device *adev,
+ int num_xccs_per_xcp)
+{
+ int ret, i, num_xcc;
+ u32 tmp = 0;
+
+ if (adev->psp.funcs) {
+ ret = psp_spatial_partition(&adev->psp,
+ NUM_XCC(adev->gfx.xcc_mask) /
+ num_xccs_per_xcp);
+ if (ret)
+ return ret;
+ } else {
+ num_xcc = NUM_XCC(adev->gfx.xcc_mask);
+
+ for (i = 0; i < num_xcc; i++) {
+ tmp = REG_SET_FIELD(tmp, CP_HYP_XCP_CTL, NUM_XCC_IN_XCP,
+ num_xccs_per_xcp);
+ tmp = REG_SET_FIELD(tmp, CP_HYP_XCP_CTL, VIRTUAL_XCC_ID,
+ i % num_xccs_per_xcp);
+ WREG32_SOC15(GC, GET_INST(GC, i), regCP_HYP_XCP_CTL,
+ tmp);
+ }
+ ret = 0;
+ }
+
+ adev->gfx.num_xcc_per_xcp = num_xccs_per_xcp;
+
+ return ret;
+}
+
+static int gfx_v9_4_3_ih_to_xcc_inst(struct amdgpu_device *adev, int ih_node)
+{
+ int xcc;
+
+ xcc = hweight8(adev->gfx.xcc_mask & GENMASK(ih_node / 2, 0));
+ if (!xcc) {
+ dev_err(adev->dev, "Couldn't find xcc mapping from IH node");
+ return -EINVAL;
+ }
+
+ return xcc - 1;
+}
+
+static const struct amdgpu_gfx_funcs gfx_v9_4_3_gfx_funcs = {
+ .get_gpu_clock_counter = &gfx_v9_4_3_get_gpu_clock_counter,
+ .select_se_sh = &gfx_v9_4_3_xcc_select_se_sh,
+ .read_wave_data = &gfx_v9_4_3_read_wave_data,
+ .read_wave_sgprs = &gfx_v9_4_3_read_wave_sgprs,
+ .read_wave_vgprs = &gfx_v9_4_3_read_wave_vgprs,
+ .select_me_pipe_q = &gfx_v9_4_3_select_me_pipe_q,
+ .switch_partition_mode = &gfx_v9_4_3_switch_compute_partition,
+ .ih_node_to_logical_xcc = &gfx_v9_4_3_ih_to_xcc_inst,
+ .get_xccs_per_xcp = &gfx_v9_4_3_get_xccs_per_xcp,
+};
+
+static int gfx_v9_4_3_aca_bank_parser(struct aca_handle *handle,
+ struct aca_bank *bank, enum aca_smu_type type,
+ void *data)
+{
+ struct aca_bank_info info;
+ u64 misc0;
+ u32 instlo;
+ int ret;
+
+ ret = aca_bank_info_decode(bank, &info);
+ if (ret)
+ return ret;
+
+ /* NOTE: overwrite info.die_id with xcd id for gfx */
+ instlo = ACA_REG__IPID__INSTANCEIDLO(bank->regs[ACA_REG_IDX_IPID]);
+ instlo &= GENMASK(31, 1);
+ info.die_id = instlo == mmSMNAID_XCD0_MCA_SMU ? 0 : 1;
+
+ misc0 = bank->regs[ACA_REG_IDX_MISC0];
+
+ switch (type) {
+ case ACA_SMU_TYPE_UE:
+ bank->aca_err_type = ACA_ERROR_TYPE_UE;
+ ret = aca_error_cache_log_bank_error(handle, &info, bank->aca_err_type, 1ULL);
+ break;
+ case ACA_SMU_TYPE_CE:
+ bank->aca_err_type = ACA_ERROR_TYPE_CE;
+ ret = aca_error_cache_log_bank_error(handle, &info, bank->aca_err_type,
+ ACA_REG__MISC0__ERRCNT(misc0));
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ return ret;
+}
+
+static bool gfx_v9_4_3_aca_bank_is_valid(struct aca_handle *handle, struct aca_bank *bank,
+ enum aca_smu_type type, void *data)
+{
+ u32 instlo;
+
+ instlo = ACA_REG__IPID__INSTANCEIDLO(bank->regs[ACA_REG_IDX_IPID]);
+ instlo &= GENMASK(31, 1);
+ switch (instlo) {
+ case mmSMNAID_XCD0_MCA_SMU:
+ case mmSMNAID_XCD1_MCA_SMU:
+ case mmSMNXCD_XCD0_MCA_SMU:
+ return true;
+ default:
+ break;
+ }
+
+ return false;
+}
+
+static const struct aca_bank_ops gfx_v9_4_3_aca_bank_ops = {
+ .aca_bank_parser = gfx_v9_4_3_aca_bank_parser,
+ .aca_bank_is_valid = gfx_v9_4_3_aca_bank_is_valid,
+};
+
+static const struct aca_info gfx_v9_4_3_aca_info = {
+ .hwip = ACA_HWIP_TYPE_SMU,
+ .mask = ACA_ERROR_UE_MASK | ACA_ERROR_CE_MASK,
+ .bank_ops = &gfx_v9_4_3_aca_bank_ops,
+};
+
+static int gfx_v9_4_3_gpu_early_init(struct amdgpu_device *adev)
+{
+ adev->gfx.funcs = &gfx_v9_4_3_gfx_funcs;
+ adev->gfx.ras = &gfx_v9_4_3_ras;
+
+ adev->gfx.config.max_hw_contexts = 8;
+ adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
+ adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
+ adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
+ adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0;
+ adev->gfx.config.gb_addr_config = GOLDEN_GB_ADDR_CONFIG;
+
+ adev->gfx.config.gb_addr_config_fields.num_pipes = 1 <<
+ REG_GET_FIELD(
+ adev->gfx.config.gb_addr_config,
+ GB_ADDR_CONFIG,
+ NUM_PIPES);
+
+ adev->gfx.config.max_tile_pipes =
+ adev->gfx.config.gb_addr_config_fields.num_pipes;
+
+ adev->gfx.config.gb_addr_config_fields.num_banks = 1 <<
+ REG_GET_FIELD(
+ adev->gfx.config.gb_addr_config,
+ GB_ADDR_CONFIG,
+ NUM_BANKS);
+ adev->gfx.config.gb_addr_config_fields.max_compress_frags = 1 <<
+ REG_GET_FIELD(
+ adev->gfx.config.gb_addr_config,
+ GB_ADDR_CONFIG,
+ MAX_COMPRESSED_FRAGS);
+ adev->gfx.config.gb_addr_config_fields.num_rb_per_se = 1 <<
+ REG_GET_FIELD(
+ adev->gfx.config.gb_addr_config,
+ GB_ADDR_CONFIG,
+ NUM_RB_PER_SE);
+ adev->gfx.config.gb_addr_config_fields.num_se = 1 <<
+ REG_GET_FIELD(
+ adev->gfx.config.gb_addr_config,
+ GB_ADDR_CONFIG,
+ NUM_SHADER_ENGINES);
+ adev->gfx.config.gb_addr_config_fields.pipe_interleave_size = 1 << (8 +
+ REG_GET_FIELD(
+ adev->gfx.config.gb_addr_config,
+ GB_ADDR_CONFIG,
+ PIPE_INTERLEAVE_SIZE));
+
+ return 0;
+}
+
+static int gfx_v9_4_3_compute_ring_init(struct amdgpu_device *adev, int ring_id,
+ int xcc_id, int mec, int pipe, int queue)
+{
+ unsigned irq_type;
+ struct amdgpu_ring *ring = &adev->gfx.compute_ring[ring_id];
+ unsigned int hw_prio;
+ uint32_t xcc_doorbell_start;
+
+ ring = &adev->gfx.compute_ring[xcc_id * adev->gfx.num_compute_rings +
+ ring_id];
+
+ /* mec0 is me1 */
+ ring->xcc_id = xcc_id;
+ ring->me = mec + 1;
+ ring->pipe = pipe;
+ ring->queue = queue;
+
+ ring->ring_obj = NULL;
+ ring->use_doorbell = true;
+ xcc_doorbell_start = adev->doorbell_index.mec_ring0 +
+ xcc_id * adev->doorbell_index.xcc_doorbell_range;
+ ring->doorbell_index = (xcc_doorbell_start + ring_id) << 1;
+ ring->eop_gpu_addr = adev->gfx.mec.hpd_eop_gpu_addr +
+ (ring_id + xcc_id * adev->gfx.num_compute_rings) *
+ GFX9_MEC_HPD_SIZE;
+ ring->vm_hub = AMDGPU_GFXHUB(xcc_id);
+ sprintf(ring->name, "comp_%d.%d.%d.%d",
+ ring->xcc_id, ring->me, ring->pipe, ring->queue);
+
+ irq_type = AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP
+ + ((ring->me - 1) * adev->gfx.mec.num_pipe_per_mec)
+ + ring->pipe;
+ hw_prio = amdgpu_gfx_is_high_priority_compute_queue(adev, ring) ?
+ AMDGPU_GFX_PIPE_PRIO_HIGH : AMDGPU_GFX_PIPE_PRIO_NORMAL;
+ /* type-2 packets are deprecated on MEC, use type-3 instead */
+ return amdgpu_ring_init(adev, ring, 1024, &adev->gfx.eop_irq, irq_type,
+ hw_prio, NULL);
+}
+
+static void gfx_v9_4_3_alloc_ip_dump(struct amdgpu_device *adev)
+{
+ uint32_t reg_count = ARRAY_SIZE(gc_reg_list_9_4_3);
+ uint32_t *ptr, num_xcc, inst;
+
+ num_xcc = NUM_XCC(adev->gfx.xcc_mask);
+
+ ptr = kcalloc(reg_count * num_xcc, sizeof(uint32_t), GFP_KERNEL);
+ if (!ptr) {
+ DRM_ERROR("Failed to allocate memory for GFX IP Dump\n");
+ adev->gfx.ip_dump_core = NULL;
+ } else {
+ adev->gfx.ip_dump_core = ptr;
+ }
+
+ /* Allocate memory for compute queue registers for all the instances */
+ reg_count = ARRAY_SIZE(gc_cp_reg_list_9_4_3);
+ inst = adev->gfx.mec.num_mec * adev->gfx.mec.num_pipe_per_mec *
+ adev->gfx.mec.num_queue_per_pipe;
+
+ ptr = kcalloc(reg_count * inst * num_xcc, sizeof(uint32_t), GFP_KERNEL);
+ if (!ptr) {
+ DRM_ERROR("Failed to allocate memory for Compute Queues IP Dump\n");
+ adev->gfx.ip_dump_compute_queues = NULL;
+ } else {
+ adev->gfx.ip_dump_compute_queues = ptr;
+ }
+}
+
+static int gfx_v9_4_3_sw_init(struct amdgpu_ip_block *ip_block)
+{
+ int i, j, k, r, ring_id, xcc_id, num_xcc;
+ struct amdgpu_device *adev = ip_block->adev;
+
+ switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
+ case IP_VERSION(9, 4, 3):
+ case IP_VERSION(9, 4, 4):
+ adev->gfx.cleaner_shader_ptr = gfx_9_4_3_cleaner_shader_hex;
+ adev->gfx.cleaner_shader_size = sizeof(gfx_9_4_3_cleaner_shader_hex);
+ if (adev->gfx.mec_fw_version >= 153) {
+ adev->gfx.enable_cleaner_shader = true;
+ r = amdgpu_gfx_cleaner_shader_sw_init(adev, adev->gfx.cleaner_shader_size);
+ if (r) {
+ adev->gfx.enable_cleaner_shader = false;
+ dev_err(adev->dev, "Failed to initialize cleaner shader\n");
+ }
+ }
+ break;
+ default:
+ adev->gfx.enable_cleaner_shader = false;
+ break;
+ }
+
+ adev->gfx.mec.num_mec = 2;
+ adev->gfx.mec.num_pipe_per_mec = 4;
+ adev->gfx.mec.num_queue_per_pipe = 8;
+
+ num_xcc = NUM_XCC(adev->gfx.xcc_mask);
+
+ /* EOP Event */
+ r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, GFX_9_0__SRCID__CP_EOP_INTERRUPT, &adev->gfx.eop_irq);
+ if (r)
+ return r;
+
+ /* Bad opcode Event */
+ r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP,
+ GFX_9_0__SRCID__CP_BAD_OPCODE_ERROR,
+ &adev->gfx.bad_op_irq);
+ if (r)
+ return r;
+
+ /* Privileged reg */
+ r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, GFX_9_0__SRCID__CP_PRIV_REG_FAULT,
+ &adev->gfx.priv_reg_irq);
+ if (r)
+ return r;
+
+ /* Privileged inst */
+ r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, GFX_9_0__SRCID__CP_PRIV_INSTR_FAULT,
+ &adev->gfx.priv_inst_irq);
+ if (r)
+ return r;
+
+ adev->gfx.gfx_current_status = AMDGPU_GFX_NORMAL_MODE;
+
+ r = adev->gfx.rlc.funcs->init(adev);
+ if (r) {
+ DRM_ERROR("Failed to init rlc BOs!\n");
+ return r;
+ }
+
+ r = gfx_v9_4_3_mec_init(adev);
+ if (r) {
+ DRM_ERROR("Failed to init MEC BOs!\n");
+ return r;
+ }
+
+ /* set up the compute queues - allocate horizontally across pipes */
+ for (xcc_id = 0; xcc_id < num_xcc; xcc_id++) {
+ ring_id = 0;
+ for (i = 0; i < adev->gfx.mec.num_mec; ++i) {
+ for (j = 0; j < adev->gfx.mec.num_queue_per_pipe; j++) {
+ for (k = 0; k < adev->gfx.mec.num_pipe_per_mec;
+ k++) {
+ if (!amdgpu_gfx_is_mec_queue_enabled(
+ adev, xcc_id, i, k, j))
+ continue;
+
+ r = gfx_v9_4_3_compute_ring_init(adev,
+ ring_id,
+ xcc_id,
+ i, k, j);
+ if (r)
+ return r;
+
+ ring_id++;
+ }
+ }
+ }
+
+ r = amdgpu_gfx_kiq_init(adev, GFX9_MEC_HPD_SIZE, xcc_id);
+ if (r) {
+ DRM_ERROR("Failed to init KIQ BOs!\n");
+ return r;
+ }
+
+ r = amdgpu_gfx_kiq_init_ring(adev, xcc_id);
+ if (r)
+ return r;
+
+ /* create MQD for all compute queues as wel as KIQ for SRIOV case */
+ r = amdgpu_gfx_mqd_sw_init(adev,
+ sizeof(struct v9_mqd_allocation), xcc_id);
+ if (r)
+ return r;
+ }
+
+ adev->gfx.compute_supported_reset =
+ amdgpu_get_soft_full_reset_mask(&adev->gfx.compute_ring[0]);
+ switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
+ case IP_VERSION(9, 4, 3):
+ case IP_VERSION(9, 4, 4):
+ if ((adev->gfx.mec_fw_version >= 155) &&
+ !amdgpu_sriov_vf(adev) &&
+ !adev->debug_disable_gpu_ring_reset) {
+ adev->gfx.compute_supported_reset |= AMDGPU_RESET_TYPE_PER_QUEUE;
+ adev->gfx.compute_supported_reset |= AMDGPU_RESET_TYPE_PER_PIPE;
+ }
+ break;
+ case IP_VERSION(9, 5, 0):
+ if ((adev->gfx.mec_fw_version >= 21) &&
+ !amdgpu_sriov_vf(adev) &&
+ !adev->debug_disable_gpu_ring_reset) {
+ adev->gfx.compute_supported_reset |= AMDGPU_RESET_TYPE_PER_QUEUE;
+ adev->gfx.compute_supported_reset |= AMDGPU_RESET_TYPE_PER_PIPE;
+ }
+ break;
+ default:
+ break;
+ }
+ r = gfx_v9_4_3_gpu_early_init(adev);
+ if (r)
+ return r;
+
+ r = amdgpu_gfx_ras_sw_init(adev);
+ if (r)
+ return r;
+
+ r = amdgpu_gfx_sysfs_init(adev);
+ if (r)
+ return r;
+
+ gfx_v9_4_3_alloc_ip_dump(adev);
+
+ return 0;
+}
+
+static int gfx_v9_4_3_sw_fini(struct amdgpu_ip_block *ip_block)
+{
+ int i, num_xcc;
+ struct amdgpu_device *adev = ip_block->adev;
+
+ num_xcc = NUM_XCC(adev->gfx.xcc_mask);
+ for (i = 0; i < adev->gfx.num_compute_rings * num_xcc; i++)
+ amdgpu_ring_fini(&adev->gfx.compute_ring[i]);
+
+ for (i = 0; i < num_xcc; i++) {
+ amdgpu_gfx_mqd_sw_fini(adev, i);
+ amdgpu_gfx_kiq_free_ring(&adev->gfx.kiq[i].ring);
+ amdgpu_gfx_kiq_fini(adev, i);
+ }
+
+ amdgpu_gfx_cleaner_shader_sw_fini(adev);
+
+ gfx_v9_4_3_mec_fini(adev);
+ amdgpu_bo_unref(&adev->gfx.rlc.clear_state_obj);
+ gfx_v9_4_3_free_microcode(adev);
+ amdgpu_gfx_sysfs_fini(adev);
+
+ kfree(adev->gfx.ip_dump_core);
+ kfree(adev->gfx.ip_dump_compute_queues);
+
+ return 0;
+}
+
+#define DEFAULT_SH_MEM_BASES (0x6000)
+static void gfx_v9_4_3_xcc_init_compute_vmid(struct amdgpu_device *adev,
+ int xcc_id)
+{
+ int i;
+ uint32_t sh_mem_config;
+ uint32_t sh_mem_bases;
+ uint32_t data;
+
+ /*
+ * Configure apertures:
+ * LDS: 0x60000000'00000000 - 0x60000001'00000000 (4GB)
+ * Scratch: 0x60000001'00000000 - 0x60000002'00000000 (4GB)
+ * GPUVM: 0x60010000'00000000 - 0x60020000'00000000 (1TB)
+ */
+ sh_mem_bases = DEFAULT_SH_MEM_BASES | (DEFAULT_SH_MEM_BASES << 16);
+
+ sh_mem_config = SH_MEM_ADDRESS_MODE_64 |
+ SH_MEM_ALIGNMENT_MODE_UNALIGNED <<
+ SH_MEM_CONFIG__ALIGNMENT_MODE__SHIFT;
+
+ mutex_lock(&adev->srbm_mutex);
+ for (i = adev->vm_manager.first_kfd_vmid; i < AMDGPU_NUM_VMID; i++) {
+ soc15_grbm_select(adev, 0, 0, 0, i, GET_INST(GC, xcc_id));
+ /* CP and shaders */
+ WREG32_SOC15_RLC(GC, GET_INST(GC, xcc_id), regSH_MEM_CONFIG, sh_mem_config);
+ WREG32_SOC15_RLC(GC, GET_INST(GC, xcc_id), regSH_MEM_BASES, sh_mem_bases);
+
+ /* Enable trap for each kfd vmid. */
+ data = RREG32_SOC15(GC, GET_INST(GC, xcc_id), regSPI_GDBG_PER_VMID_CNTL);
+ data = REG_SET_FIELD(data, SPI_GDBG_PER_VMID_CNTL, TRAP_EN, 1);
+ WREG32_SOC15_RLC(GC, GET_INST(GC, xcc_id), regSPI_GDBG_PER_VMID_CNTL, data);
+ }
+ soc15_grbm_select(adev, 0, 0, 0, 0, GET_INST(GC, xcc_id));
+ mutex_unlock(&adev->srbm_mutex);
+
+ /*
+ * Initialize all compute VMIDs to have no GDS, GWS, or OA
+ * access. These should be enabled by FW for target VMIDs.
+ */
+ for (i = adev->vm_manager.first_kfd_vmid; i < AMDGPU_NUM_VMID; i++) {
+ WREG32_SOC15_OFFSET(GC, GET_INST(GC, xcc_id), regGDS_VMID0_BASE, 2 * i, 0);
+ WREG32_SOC15_OFFSET(GC, GET_INST(GC, xcc_id), regGDS_VMID0_SIZE, 2 * i, 0);
+ WREG32_SOC15_OFFSET(GC, GET_INST(GC, xcc_id), regGDS_GWS_VMID0, i, 0);
+ WREG32_SOC15_OFFSET(GC, GET_INST(GC, xcc_id), regGDS_OA_VMID0, i, 0);
+ }
+}
+
+static void gfx_v9_4_3_xcc_init_gds_vmid(struct amdgpu_device *adev, int xcc_id)
+{
+ int vmid;
+
+ /*
+ * Initialize all compute and user-gfx VMIDs to have no GDS, GWS, or OA
+ * access. Compute VMIDs should be enabled by FW for target VMIDs,
+ * the driver can enable them for graphics. VMID0 should maintain
+ * access so that HWS firmware can save/restore entries.
+ */
+ for (vmid = 1; vmid < AMDGPU_NUM_VMID; vmid++) {
+ WREG32_SOC15_OFFSET(GC, GET_INST(GC, xcc_id), regGDS_VMID0_BASE, 2 * vmid, 0);
+ WREG32_SOC15_OFFSET(GC, GET_INST(GC, xcc_id), regGDS_VMID0_SIZE, 2 * vmid, 0);
+ WREG32_SOC15_OFFSET(GC, GET_INST(GC, xcc_id), regGDS_GWS_VMID0, vmid, 0);
+ WREG32_SOC15_OFFSET(GC, GET_INST(GC, xcc_id), regGDS_OA_VMID0, vmid, 0);
+ }
+}
+
+/* For ASICs that needs xnack chain and MEC version supports, set SG_CONFIG1
+ * DISABLE_XNACK_CHECK_IN_RETRY_DISABLE bit and inform KFD to set xnack_chain
+ * bit in SET_RESOURCES
+ */
+static void gfx_v9_4_3_xcc_init_sq(struct amdgpu_device *adev, int xcc_id)
+{
+ uint32_t data;
+
+ if (!(adev->gmc.xnack_flags & AMDGPU_GMC_XNACK_FLAG_CHAIN))
+ return;
+
+ data = RREG32_SOC15(GC, GET_INST(GC, xcc_id), regSQ_CONFIG1);
+ data = REG_SET_FIELD(data, SQ_CONFIG1, DISABLE_XNACK_CHECK_IN_RETRY_DISABLE, 1);
+ WREG32_SOC15(GC, xcc_id, regSQ_CONFIG1, data);
+}
+
+static void gfx_v9_4_3_xcc_constants_init(struct amdgpu_device *adev,
+ int xcc_id)
+{
+ u32 tmp;
+ int i;
+
+ /* XXX SH_MEM regs */
+ /* where to put LDS, scratch, GPUVM in FSA64 space */
+ mutex_lock(&adev->srbm_mutex);
+ for (i = 0; i < adev->vm_manager.id_mgr[AMDGPU_GFXHUB(0)].num_ids; i++) {
+ soc15_grbm_select(adev, 0, 0, 0, i, GET_INST(GC, xcc_id));
+ /* CP and shaders */
+ if (i == 0) {
+ tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, ALIGNMENT_MODE,
+ SH_MEM_ALIGNMENT_MODE_UNALIGNED);
+ tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, RETRY_DISABLE,
+ !!adev->gmc.noretry);
+ WREG32_SOC15_RLC(GC, GET_INST(GC, xcc_id),
+ regSH_MEM_CONFIG, tmp);
+ WREG32_SOC15_RLC(GC, GET_INST(GC, xcc_id),
+ regSH_MEM_BASES, 0);
+ } else {
+ tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, ALIGNMENT_MODE,
+ SH_MEM_ALIGNMENT_MODE_UNALIGNED);
+ tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, RETRY_DISABLE,
+ !!adev->gmc.noretry);
+ WREG32_SOC15_RLC(GC, GET_INST(GC, xcc_id),
+ regSH_MEM_CONFIG, tmp);
+ tmp = REG_SET_FIELD(0, SH_MEM_BASES, PRIVATE_BASE,
+ (adev->gmc.private_aperture_start >>
+ 48));
+ tmp = REG_SET_FIELD(tmp, SH_MEM_BASES, SHARED_BASE,
+ (adev->gmc.shared_aperture_start >>
+ 48));
+ WREG32_SOC15_RLC(GC, GET_INST(GC, xcc_id),
+ regSH_MEM_BASES, tmp);
+ }
+ }
+ soc15_grbm_select(adev, 0, 0, 0, 0, GET_INST(GC, 0));
+
+ mutex_unlock(&adev->srbm_mutex);
+
+ gfx_v9_4_3_xcc_init_compute_vmid(adev, xcc_id);
+ gfx_v9_4_3_xcc_init_gds_vmid(adev, xcc_id);
+ gfx_v9_4_3_xcc_init_sq(adev, xcc_id);
+}
+
+static void gfx_v9_4_3_constants_init(struct amdgpu_device *adev)
+{
+ int i, num_xcc;
+
+ num_xcc = NUM_XCC(adev->gfx.xcc_mask);
+
+ gfx_v9_4_3_get_cu_info(adev, &adev->gfx.cu_info);
+ adev->gfx.config.db_debug2 =
+ RREG32_SOC15(GC, GET_INST(GC, 0), regDB_DEBUG2);
+
+ switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
+ /* ToDo: GC 9.4.4 */
+ case IP_VERSION(9, 4, 3):
+ if (adev->gfx.mec_fw_version >= 184 &&
+ (amdgpu_sriov_reg_access_sq_config(adev) ||
+ !amdgpu_sriov_vf(adev)))
+ adev->gmc.xnack_flags |= AMDGPU_GMC_XNACK_FLAG_CHAIN;
+ break;
+ case IP_VERSION(9, 5, 0):
+ if (adev->gfx.mec_fw_version >= 23)
+ adev->gmc.xnack_flags |= AMDGPU_GMC_XNACK_FLAG_CHAIN;
+ break;
+ default:
+ break;
+ }
+
+ for (i = 0; i < num_xcc; i++)
+ gfx_v9_4_3_xcc_constants_init(adev, i);
+}
+
+static void
+gfx_v9_4_3_xcc_enable_save_restore_machine(struct amdgpu_device *adev,
+ int xcc_id)
+{
+ WREG32_FIELD15_PREREG(GC, GET_INST(GC, xcc_id), RLC_SRM_CNTL, SRM_ENABLE, 1);
+}
+
+static void gfx_v9_4_3_xcc_init_pg(struct amdgpu_device *adev, int xcc_id)
+{
+ /*
+ * Rlc save restore list is workable since v2_1.
+ */
+ gfx_v9_4_3_xcc_enable_save_restore_machine(adev, xcc_id);
+}
+
+static void gfx_v9_4_3_xcc_disable_gpa_mode(struct amdgpu_device *adev, int xcc_id)
+{
+ uint32_t data;
+
+ data = RREG32_SOC15(GC, GET_INST(GC, xcc_id), regCPC_PSP_DEBUG);
+ data |= CPC_PSP_DEBUG__UTCL2IUGPAOVERRIDE_MASK;
+ WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCPC_PSP_DEBUG, data);
+}
+
+static bool gfx_v9_4_3_is_rlc_enabled(struct amdgpu_device *adev)
+{
+ uint32_t rlc_setting;
+
+ /* if RLC is not enabled, do nothing */
+ rlc_setting = RREG32_SOC15(GC, GET_INST(GC, 0), regRLC_CNTL);
+ if (!(rlc_setting & RLC_CNTL__RLC_ENABLE_F32_MASK))
+ return false;
+
+ return true;
+}
+
+static void gfx_v9_4_3_xcc_set_safe_mode(struct amdgpu_device *adev, int xcc_id)
+{
+ uint32_t data;
+ unsigned i;
+
+ data = RLC_SAFE_MODE__CMD_MASK;
+ data |= (1 << RLC_SAFE_MODE__MESSAGE__SHIFT);
+ WREG32_SOC15(GC, GET_INST(GC, xcc_id), regRLC_SAFE_MODE, data);
+
+ /* wait for RLC_SAFE_MODE */
+ for (i = 0; i < adev->usec_timeout; i++) {
+ if (!REG_GET_FIELD(RREG32_SOC15(GC, GET_INST(GC, xcc_id), regRLC_SAFE_MODE), RLC_SAFE_MODE, CMD))
+ break;
+ udelay(1);
+ }
+}
+
+static void gfx_v9_4_3_xcc_unset_safe_mode(struct amdgpu_device *adev,
+ int xcc_id)
+{
+ uint32_t data;
+
+ data = RLC_SAFE_MODE__CMD_MASK;
+ WREG32_SOC15(GC, GET_INST(GC, xcc_id), regRLC_SAFE_MODE, data);
+}
+
+static void gfx_v9_4_3_init_rlcg_reg_access_ctrl(struct amdgpu_device *adev)
+{
+ int xcc_id, num_xcc;
+ struct amdgpu_rlcg_reg_access_ctrl *reg_access_ctrl;
+
+ num_xcc = NUM_XCC(adev->gfx.xcc_mask);
+ for (xcc_id = 0; xcc_id < num_xcc; xcc_id++) {
+ reg_access_ctrl = &adev->gfx.rlc.reg_access_ctrl[GET_INST(GC, xcc_id)];
+ reg_access_ctrl->scratch_reg0 = SOC15_REG_OFFSET(GC, GET_INST(GC, xcc_id), regSCRATCH_REG0);
+ reg_access_ctrl->scratch_reg1 = SOC15_REG_OFFSET(GC, GET_INST(GC, xcc_id), regSCRATCH_REG1);
+ reg_access_ctrl->scratch_reg2 = SOC15_REG_OFFSET(GC, GET_INST(GC, xcc_id), regSCRATCH_REG2);
+ reg_access_ctrl->scratch_reg3 = SOC15_REG_OFFSET(GC, GET_INST(GC, xcc_id), regSCRATCH_REG3);
+ reg_access_ctrl->grbm_cntl = SOC15_REG_OFFSET(GC, GET_INST(GC, xcc_id), regGRBM_GFX_CNTL);
+ reg_access_ctrl->grbm_idx = SOC15_REG_OFFSET(GC, GET_INST(GC, xcc_id), regGRBM_GFX_INDEX);
+ reg_access_ctrl->spare_int = SOC15_REG_OFFSET(GC, GET_INST(GC, xcc_id), regRLC_SPARE_INT);
+ }
+ adev->gfx.rlc.rlcg_reg_access_supported = true;
+}
+
+static int gfx_v9_4_3_rlc_init(struct amdgpu_device *adev)
+{
+ /* init spm vmid with 0xf */
+ if (adev->gfx.rlc.funcs->update_spm_vmid)
+ adev->gfx.rlc.funcs->update_spm_vmid(adev, NULL, 0xf);
+
+ return 0;
+}
+
+static void gfx_v9_4_3_xcc_wait_for_rlc_serdes(struct amdgpu_device *adev,
+ int xcc_id)
+{
+ u32 i, j, k;
+ u32 mask;
+
+ mutex_lock(&adev->grbm_idx_mutex);
+ for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
+ for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
+ gfx_v9_4_3_xcc_select_se_sh(adev, i, j, 0xffffffff,
+ xcc_id);
+ for (k = 0; k < adev->usec_timeout; k++) {
+ if (RREG32_SOC15(GC, GET_INST(GC, xcc_id), regRLC_SERDES_CU_MASTER_BUSY) == 0)
+ break;
+ udelay(1);
+ }
+ if (k == adev->usec_timeout) {
+ gfx_v9_4_3_xcc_select_se_sh(adev, 0xffffffff,
+ 0xffffffff,
+ 0xffffffff, xcc_id);
+ mutex_unlock(&adev->grbm_idx_mutex);
+ DRM_INFO("Timeout wait for RLC serdes %u,%u\n",
+ i, j);
+ return;
+ }
+ }
+ }
+ gfx_v9_4_3_xcc_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff,
+ xcc_id);
+ mutex_unlock(&adev->grbm_idx_mutex);
+
+ mask = RLC_SERDES_NONCU_MASTER_BUSY__SE_MASTER_BUSY_MASK |
+ RLC_SERDES_NONCU_MASTER_BUSY__GC_MASTER_BUSY_MASK |
+ RLC_SERDES_NONCU_MASTER_BUSY__TC0_MASTER_BUSY_MASK |
+ RLC_SERDES_NONCU_MASTER_BUSY__TC1_MASTER_BUSY_MASK;
+ for (k = 0; k < adev->usec_timeout; k++) {
+ if ((RREG32_SOC15(GC, GET_INST(GC, xcc_id), regRLC_SERDES_NONCU_MASTER_BUSY) & mask) == 0)
+ break;
+ udelay(1);
+ }
+}
+
+static void gfx_v9_4_3_xcc_enable_gui_idle_interrupt(struct amdgpu_device *adev,
+ bool enable, int xcc_id)
+{
+ u32 tmp;
+
+ /* These interrupts should be enabled to drive DS clock */
+
+ tmp = RREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_INT_CNTL_RING0);
+
+ tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_BUSY_INT_ENABLE, enable ? 1 : 0);
+ tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_EMPTY_INT_ENABLE, enable ? 1 : 0);
+ tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CMP_BUSY_INT_ENABLE, enable ? 1 : 0);
+
+ WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_INT_CNTL_RING0, tmp);
+}
+
+static void gfx_v9_4_3_xcc_rlc_stop(struct amdgpu_device *adev, int xcc_id)
+{
+ WREG32_FIELD15_PREREG(GC, GET_INST(GC, xcc_id), RLC_CNTL,
+ RLC_ENABLE_F32, 0);
+ gfx_v9_4_3_xcc_enable_gui_idle_interrupt(adev, false, xcc_id);
+ gfx_v9_4_3_xcc_wait_for_rlc_serdes(adev, xcc_id);
+}
+
+static void gfx_v9_4_3_rlc_stop(struct amdgpu_device *adev)
+{
+ int i, num_xcc;
+
+ num_xcc = NUM_XCC(adev->gfx.xcc_mask);
+ for (i = 0; i < num_xcc; i++)
+ gfx_v9_4_3_xcc_rlc_stop(adev, i);
+}
+
+static void gfx_v9_4_3_xcc_rlc_reset(struct amdgpu_device *adev, int xcc_id)
+{
+ WREG32_FIELD15_PREREG(GC, GET_INST(GC, xcc_id), GRBM_SOFT_RESET,
+ SOFT_RESET_RLC, 1);
+ udelay(50);
+ WREG32_FIELD15_PREREG(GC, GET_INST(GC, xcc_id), GRBM_SOFT_RESET,
+ SOFT_RESET_RLC, 0);
+ udelay(50);
+}
+
+static void gfx_v9_4_3_rlc_reset(struct amdgpu_device *adev)
+{
+ int i, num_xcc;
+
+ num_xcc = NUM_XCC(adev->gfx.xcc_mask);
+ for (i = 0; i < num_xcc; i++)
+ gfx_v9_4_3_xcc_rlc_reset(adev, i);
+}
+
+static void gfx_v9_4_3_xcc_rlc_start(struct amdgpu_device *adev, int xcc_id)
+{
+ WREG32_FIELD15_PREREG(GC, GET_INST(GC, xcc_id), RLC_CNTL,
+ RLC_ENABLE_F32, 1);
+ udelay(50);
+
+ /* carrizo do enable cp interrupt after cp inited */
+ if (!(adev->flags & AMD_IS_APU)) {
+ gfx_v9_4_3_xcc_enable_gui_idle_interrupt(adev, true, xcc_id);
+ udelay(50);
+ }
+}
+
+static void gfx_v9_4_3_rlc_start(struct amdgpu_device *adev)
+{
+#ifdef AMDGPU_RLC_DEBUG_RETRY
+ u32 rlc_ucode_ver;
+#endif
+ int i, num_xcc;
+
+ num_xcc = NUM_XCC(adev->gfx.xcc_mask);
+ for (i = 0; i < num_xcc; i++) {
+ gfx_v9_4_3_xcc_rlc_start(adev, i);
+#ifdef AMDGPU_RLC_DEBUG_RETRY
+ /* RLC_GPM_GENERAL_6 : RLC Ucode version */
+ rlc_ucode_ver = RREG32_SOC15(GC, GET_INST(GC, i), regRLC_GPM_GENERAL_6);
+ if (rlc_ucode_ver == 0x108) {
+ dev_info(adev->dev,
+ "Using rlc debug ucode. regRLC_GPM_GENERAL_6 ==0x08%x / fw_ver == %i \n",
+ rlc_ucode_ver, adev->gfx.rlc_fw_version);
+ /* RLC_GPM_TIMER_INT_3 : Timer interval in RefCLK cycles,
+ * default is 0x9C4 to create a 100us interval */
+ WREG32_SOC15(GC, GET_INST(GC, i), regRLC_GPM_TIMER_INT_3, 0x9C4);
+ /* RLC_GPM_GENERAL_12 : Minimum gap between wptr and rptr
+ * to disable the page fault retry interrupts, default is
+ * 0x100 (256) */
+ WREG32_SOC15(GC, GET_INST(GC, i), regRLC_GPM_GENERAL_12, 0x100);
+ }
+#endif
+ }
+}
+
+static int gfx_v9_4_3_xcc_rlc_load_microcode(struct amdgpu_device *adev,
+ int xcc_id)
+{
+ const struct rlc_firmware_header_v2_0 *hdr;
+ const __le32 *fw_data;
+ unsigned i, fw_size;
+
+ if (!adev->gfx.rlc_fw)
+ return -EINVAL;
+
+ hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data;
+ amdgpu_ucode_print_rlc_hdr(&hdr->header);
+
+ fw_data = (const __le32 *)(adev->gfx.rlc_fw->data +
+ le32_to_cpu(hdr->header.ucode_array_offset_bytes));
+ fw_size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4;
+
+ WREG32_SOC15(GC, GET_INST(GC, xcc_id), regRLC_GPM_UCODE_ADDR,
+ RLCG_UCODE_LOADING_START_ADDRESS);
+ for (i = 0; i < fw_size; i++) {
+ if (amdgpu_emu_mode == 1 && i % 100 == 0) {
+ dev_info(adev->dev, "Write RLC ucode data %u DWs\n", i);
+ msleep(1);
+ }
+ WREG32_SOC15(GC, GET_INST(GC, xcc_id), regRLC_GPM_UCODE_DATA, le32_to_cpup(fw_data++));
+ }
+ WREG32_SOC15(GC, GET_INST(GC, xcc_id), regRLC_GPM_UCODE_ADDR, adev->gfx.rlc_fw_version);
+
+ return 0;
+}
+
+static int gfx_v9_4_3_xcc_rlc_resume(struct amdgpu_device *adev, int xcc_id)
+{
+ int r;
+
+ if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) {
+ gfx_v9_4_3_xcc_rlc_stop(adev, xcc_id);
+ /* legacy rlc firmware loading */
+ r = gfx_v9_4_3_xcc_rlc_load_microcode(adev, xcc_id);
+ if (r)
+ return r;
+ gfx_v9_4_3_xcc_rlc_start(adev, xcc_id);
+ }
+
+ amdgpu_gfx_rlc_enter_safe_mode(adev, xcc_id);
+ /* disable CG */
+ WREG32_SOC15(GC, GET_INST(GC, xcc_id), regRLC_CGCG_CGLS_CTRL, 0);
+ gfx_v9_4_3_xcc_init_pg(adev, xcc_id);
+ amdgpu_gfx_rlc_exit_safe_mode(adev, xcc_id);
+
+ return 0;
+}
+
+static int gfx_v9_4_3_rlc_resume(struct amdgpu_device *adev)
+{
+ int r, i, num_xcc;
+
+ if (amdgpu_sriov_vf(adev))
+ return 0;
+
+ num_xcc = NUM_XCC(adev->gfx.xcc_mask);
+ for (i = 0; i < num_xcc; i++) {
+ r = gfx_v9_4_3_xcc_rlc_resume(adev, i);
+ if (r)
+ return r;
+ }
+
+ return 0;
+}
+
+static void gfx_v9_4_3_update_spm_vmid(struct amdgpu_device *adev, struct amdgpu_ring *ring,
+ unsigned vmid)
+{
+ u32 reg, pre_data, data;
+
+ reg = SOC15_REG_OFFSET(GC, GET_INST(GC, 0), regRLC_SPM_MC_CNTL);
+ if (amdgpu_sriov_is_pp_one_vf(adev) && !amdgpu_sriov_runtime(adev))
+ pre_data = RREG32_NO_KIQ(reg);
+ else
+ pre_data = RREG32(reg);
+
+ data = pre_data & (~RLC_SPM_MC_CNTL__RLC_SPM_VMID_MASK);
+ data |= (vmid & RLC_SPM_MC_CNTL__RLC_SPM_VMID_MASK) << RLC_SPM_MC_CNTL__RLC_SPM_VMID__SHIFT;
+
+ if (pre_data != data) {
+ if (amdgpu_sriov_is_pp_one_vf(adev) && !amdgpu_sriov_runtime(adev)) {
+ WREG32_SOC15_NO_KIQ(GC, GET_INST(GC, 0), regRLC_SPM_MC_CNTL, data);
+ } else
+ WREG32_SOC15(GC, GET_INST(GC, 0), regRLC_SPM_MC_CNTL, data);
+ }
+}
+
+static const struct soc15_reg_rlcg rlcg_access_gc_9_4_3[] = {
+ {SOC15_REG_ENTRY(GC, 0, regGRBM_GFX_INDEX)},
+ {SOC15_REG_ENTRY(GC, 0, regSQ_IND_INDEX)},
+};
+
+static bool gfx_v9_4_3_check_rlcg_range(struct amdgpu_device *adev,
+ uint32_t offset,
+ struct soc15_reg_rlcg *entries, int arr_size)
+{
+ int i, inst;
+ uint32_t reg;
+
+ if (!entries)
+ return false;
+
+ for (i = 0; i < arr_size; i++) {
+ const struct soc15_reg_rlcg *entry;
+
+ entry = &entries[i];
+ inst = adev->ip_map.logical_to_dev_inst ?
+ adev->ip_map.logical_to_dev_inst(
+ adev, entry->hwip, entry->instance) :
+ entry->instance;
+ reg = adev->reg_offset[entry->hwip][inst][entry->segment] +
+ entry->reg;
+ if (offset == reg)
+ return true;
+ }
+
+ return false;
+}
+
+static bool gfx_v9_4_3_is_rlcg_access_range(struct amdgpu_device *adev, u32 offset)
+{
+ return gfx_v9_4_3_check_rlcg_range(adev, offset,
+ (void *)rlcg_access_gc_9_4_3,
+ ARRAY_SIZE(rlcg_access_gc_9_4_3));
+}
+
+static void gfx_v9_4_3_xcc_cp_compute_enable(struct amdgpu_device *adev,
+ bool enable, int xcc_id)
+{
+ if (enable) {
+ WREG32_SOC15_RLC(GC, GET_INST(GC, xcc_id), regCP_MEC_CNTL, 0);
+ } else {
+ WREG32_SOC15_RLC(GC, GET_INST(GC, xcc_id), regCP_MEC_CNTL,
+ (CP_MEC_CNTL__MEC_INVALIDATE_ICACHE_MASK |
+ CP_MEC_CNTL__MEC_ME1_PIPE0_RESET_MASK |
+ CP_MEC_CNTL__MEC_ME1_PIPE1_RESET_MASK |
+ CP_MEC_CNTL__MEC_ME1_PIPE2_RESET_MASK |
+ CP_MEC_CNTL__MEC_ME1_PIPE3_RESET_MASK |
+ CP_MEC_CNTL__MEC_ME2_PIPE0_RESET_MASK |
+ CP_MEC_CNTL__MEC_ME2_PIPE1_RESET_MASK |
+ CP_MEC_CNTL__MEC_ME1_HALT_MASK |
+ CP_MEC_CNTL__MEC_ME2_HALT_MASK));
+ adev->gfx.kiq[xcc_id].ring.sched.ready = false;
+ }
+ udelay(50);
+}
+
+static int gfx_v9_4_3_xcc_cp_compute_load_microcode(struct amdgpu_device *adev,
+ int xcc_id)
+{
+ const struct gfx_firmware_header_v1_0 *mec_hdr;
+ const __le32 *fw_data;
+ unsigned i;
+ u32 tmp;
+ u32 mec_ucode_addr_offset;
+ u32 mec_ucode_data_offset;
+
+ if (!adev->gfx.mec_fw)
+ return -EINVAL;
+
+ gfx_v9_4_3_xcc_cp_compute_enable(adev, false, xcc_id);
+
+ mec_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
+ amdgpu_ucode_print_gfx_hdr(&mec_hdr->header);
+
+ fw_data = (const __le32 *)
+ (adev->gfx.mec_fw->data +
+ le32_to_cpu(mec_hdr->header.ucode_array_offset_bytes));
+ tmp = 0;
+ tmp = REG_SET_FIELD(tmp, CP_CPC_IC_BASE_CNTL, VMID, 0);
+ tmp = REG_SET_FIELD(tmp, CP_CPC_IC_BASE_CNTL, CACHE_POLICY, 0);
+ WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_CPC_IC_BASE_CNTL, tmp);
+
+ WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_CPC_IC_BASE_LO,
+ adev->gfx.mec.mec_fw_gpu_addr & 0xFFFFF000);
+ WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_CPC_IC_BASE_HI,
+ upper_32_bits(adev->gfx.mec.mec_fw_gpu_addr));
+
+ mec_ucode_addr_offset =
+ SOC15_REG_OFFSET(GC, GET_INST(GC, xcc_id), regCP_MEC_ME1_UCODE_ADDR);
+ mec_ucode_data_offset =
+ SOC15_REG_OFFSET(GC, GET_INST(GC, xcc_id), regCP_MEC_ME1_UCODE_DATA);
+
+ /* MEC1 */
+ WREG32(mec_ucode_addr_offset, mec_hdr->jt_offset);
+ for (i = 0; i < mec_hdr->jt_size; i++)
+ WREG32(mec_ucode_data_offset,
+ le32_to_cpup(fw_data + mec_hdr->jt_offset + i));
+
+ WREG32(mec_ucode_addr_offset, adev->gfx.mec_fw_version);
+ /* Todo : Loading MEC2 firmware is only necessary if MEC2 should run different microcode than MEC1. */
+
+ return 0;
+}
+
+/* KIQ functions */
+static void gfx_v9_4_3_xcc_kiq_setting(struct amdgpu_ring *ring, int xcc_id)
+{
+ uint32_t tmp;
+ struct amdgpu_device *adev = ring->adev;
+
+ /* tell RLC which is KIQ queue */
+ tmp = RREG32_SOC15(GC, GET_INST(GC, xcc_id), regRLC_CP_SCHEDULERS);
+ tmp &= 0xffffff00;
+ tmp |= (ring->me << 5) | (ring->pipe << 3) | (ring->queue);
+ WREG32_SOC15_RLC(GC, GET_INST(GC, xcc_id), regRLC_CP_SCHEDULERS, tmp | 0x80);
+}
+
+static void gfx_v9_4_3_mqd_set_priority(struct amdgpu_ring *ring, struct v9_mqd *mqd)
+{
+ struct amdgpu_device *adev = ring->adev;
+
+ if (ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE) {
+ if (amdgpu_gfx_is_high_priority_compute_queue(adev, ring)) {
+ mqd->cp_hqd_pipe_priority = AMDGPU_GFX_PIPE_PRIO_HIGH;
+ mqd->cp_hqd_queue_priority =
+ AMDGPU_GFX_QUEUE_PRIORITY_MAXIMUM;
+ }
+ }
+}
+
+static int gfx_v9_4_3_xcc_mqd_init(struct amdgpu_ring *ring, int xcc_id)
+{
+ struct amdgpu_device *adev = ring->adev;
+ struct v9_mqd *mqd = ring->mqd_ptr;
+ uint64_t hqd_gpu_addr, wb_gpu_addr, eop_base_addr;
+ uint32_t tmp;
+
+ mqd->header = 0xC0310800;
+ mqd->compute_pipelinestat_enable = 0x00000001;
+ mqd->compute_static_thread_mgmt_se0 = 0xffffffff;
+ mqd->compute_static_thread_mgmt_se1 = 0xffffffff;
+ mqd->compute_static_thread_mgmt_se2 = 0xffffffff;
+ mqd->compute_static_thread_mgmt_se3 = 0xffffffff;
+ mqd->compute_misc_reserved = 0x00000003;
+
+ mqd->dynamic_cu_mask_addr_lo =
+ lower_32_bits(ring->mqd_gpu_addr
+ + offsetof(struct v9_mqd_allocation, dynamic_cu_mask));
+ mqd->dynamic_cu_mask_addr_hi =
+ upper_32_bits(ring->mqd_gpu_addr
+ + offsetof(struct v9_mqd_allocation, dynamic_cu_mask));
+
+ eop_base_addr = ring->eop_gpu_addr >> 8;
+ mqd->cp_hqd_eop_base_addr_lo = eop_base_addr;
+ mqd->cp_hqd_eop_base_addr_hi = upper_32_bits(eop_base_addr);
+
+ /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
+ tmp = RREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_HQD_EOP_CONTROL);
+ tmp = REG_SET_FIELD(tmp, CP_HQD_EOP_CONTROL, EOP_SIZE,
+ (order_base_2(GFX9_MEC_HPD_SIZE / 4) - 1));
+
+ mqd->cp_hqd_eop_control = tmp;
+
+ /* enable doorbell? */
+ tmp = RREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_HQD_PQ_DOORBELL_CONTROL);
+
+ if (ring->use_doorbell) {
+ tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
+ DOORBELL_OFFSET, ring->doorbell_index);
+ tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
+ DOORBELL_EN, 1);
+ tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
+ DOORBELL_SOURCE, 0);
+ tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
+ DOORBELL_HIT, 0);
+ if (amdgpu_sriov_multi_vf_mode(adev))
+ tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
+ DOORBELL_MODE, 1);
+ } else {
+ tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
+ DOORBELL_EN, 0);
+ }
+
+ mqd->cp_hqd_pq_doorbell_control = tmp;
+
+ /* disable the queue if it's active */
+ ring->wptr = 0;
+ mqd->cp_hqd_dequeue_request = 0;
+ mqd->cp_hqd_pq_rptr = 0;
+ mqd->cp_hqd_pq_wptr_lo = 0;
+ mqd->cp_hqd_pq_wptr_hi = 0;
+
+ /* set the pointer to the MQD */
+ mqd->cp_mqd_base_addr_lo = ring->mqd_gpu_addr & 0xfffffffc;
+ mqd->cp_mqd_base_addr_hi = upper_32_bits(ring->mqd_gpu_addr);
+
+ /* set MQD vmid to 0 */
+ tmp = RREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_MQD_CONTROL);
+ tmp = REG_SET_FIELD(tmp, CP_MQD_CONTROL, VMID, 0);
+ mqd->cp_mqd_control = tmp;
+
+ /* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
+ hqd_gpu_addr = ring->gpu_addr >> 8;
+ mqd->cp_hqd_pq_base_lo = hqd_gpu_addr;
+ mqd->cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr);
+
+ /* set up the HQD, this is similar to CP_RB0_CNTL */
+ tmp = RREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_HQD_PQ_CONTROL);
+ tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, QUEUE_SIZE,
+ (order_base_2(ring->ring_size / 4) - 1));
+ tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, RPTR_BLOCK_SIZE,
+ ((order_base_2(AMDGPU_GPU_PAGE_SIZE / 4) - 1) << 8));
+#ifdef __BIG_ENDIAN
+ tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ENDIAN_SWAP, 1);
+#endif
+ tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, UNORD_DISPATCH, 0);
+ tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ROQ_PQ_IB_FLIP, 0);
+ tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, PRIV_STATE, 1);
+ tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, KMD_QUEUE, 1);
+ mqd->cp_hqd_pq_control = tmp;
+
+ /* set the wb address whether it's enabled or not */
+ wb_gpu_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4);
+ mqd->cp_hqd_pq_rptr_report_addr_lo = wb_gpu_addr & 0xfffffffc;
+ mqd->cp_hqd_pq_rptr_report_addr_hi =
+ upper_32_bits(wb_gpu_addr) & 0xffff;
+
+ /* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */
+ wb_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
+ mqd->cp_hqd_pq_wptr_poll_addr_lo = wb_gpu_addr & 0xfffffffc;
+ mqd->cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff;
+
+ /* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */
+ ring->wptr = 0;
+ mqd->cp_hqd_pq_rptr = RREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_HQD_PQ_RPTR);
+
+ /* set the vmid for the queue */
+ mqd->cp_hqd_vmid = 0;
+
+ tmp = RREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_HQD_PERSISTENT_STATE);
+ tmp = REG_SET_FIELD(tmp, CP_HQD_PERSISTENT_STATE, PRELOAD_SIZE, 0x53);
+ mqd->cp_hqd_persistent_state = tmp;
+
+ /* set MIN_IB_AVAIL_SIZE */
+ tmp = RREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_HQD_IB_CONTROL);
+ tmp = REG_SET_FIELD(tmp, CP_HQD_IB_CONTROL, MIN_IB_AVAIL_SIZE, 3);
+ mqd->cp_hqd_ib_control = tmp;
+
+ /* set static priority for a queue/ring */
+ gfx_v9_4_3_mqd_set_priority(ring, mqd);
+ mqd->cp_hqd_quantum = RREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_HQD_QUANTUM);
+
+ /* map_queues packet doesn't need activate the queue,
+ * so only kiq need set this field.
+ */
+ if (ring->funcs->type == AMDGPU_RING_TYPE_KIQ)
+ mqd->cp_hqd_active = 1;
+
+ return 0;
+}
+
+static int gfx_v9_4_3_xcc_kiq_init_register(struct amdgpu_ring *ring,
+ int xcc_id)
+{
+ struct amdgpu_device *adev = ring->adev;
+ struct v9_mqd *mqd = ring->mqd_ptr;
+ int j;
+
+ /* disable wptr polling */
+ WREG32_FIELD15_PREREG(GC, GET_INST(GC, xcc_id), CP_PQ_WPTR_POLL_CNTL, EN, 0);
+
+ WREG32_SOC15_RLC(GC, GET_INST(GC, xcc_id), regCP_HQD_EOP_BASE_ADDR,
+ mqd->cp_hqd_eop_base_addr_lo);
+ WREG32_SOC15_RLC(GC, GET_INST(GC, xcc_id), regCP_HQD_EOP_BASE_ADDR_HI,
+ mqd->cp_hqd_eop_base_addr_hi);
+
+ /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
+ WREG32_SOC15_RLC(GC, GET_INST(GC, xcc_id), regCP_HQD_EOP_CONTROL,
+ mqd->cp_hqd_eop_control);
+
+ /* enable doorbell? */
+ WREG32_SOC15_RLC(GC, GET_INST(GC, xcc_id), regCP_HQD_PQ_DOORBELL_CONTROL,
+ mqd->cp_hqd_pq_doorbell_control);
+
+ /* disable the queue if it's active */
+ if (RREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_HQD_ACTIVE) & 1) {
+ WREG32_SOC15_RLC(GC, GET_INST(GC, xcc_id), regCP_HQD_DEQUEUE_REQUEST, 1);
+ for (j = 0; j < adev->usec_timeout; j++) {
+ if (!(RREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_HQD_ACTIVE) & 1))
+ break;
+ udelay(1);
+ }
+ WREG32_SOC15_RLC(GC, GET_INST(GC, xcc_id), regCP_HQD_DEQUEUE_REQUEST,
+ mqd->cp_hqd_dequeue_request);
+ WREG32_SOC15_RLC(GC, GET_INST(GC, xcc_id), regCP_HQD_PQ_RPTR,
+ mqd->cp_hqd_pq_rptr);
+ WREG32_SOC15_RLC(GC, GET_INST(GC, xcc_id), regCP_HQD_PQ_WPTR_LO,
+ mqd->cp_hqd_pq_wptr_lo);
+ WREG32_SOC15_RLC(GC, GET_INST(GC, xcc_id), regCP_HQD_PQ_WPTR_HI,
+ mqd->cp_hqd_pq_wptr_hi);
+ }
+
+ /* set the pointer to the MQD */
+ WREG32_SOC15_RLC(GC, GET_INST(GC, xcc_id), regCP_MQD_BASE_ADDR,
+ mqd->cp_mqd_base_addr_lo);
+ WREG32_SOC15_RLC(GC, GET_INST(GC, xcc_id), regCP_MQD_BASE_ADDR_HI,
+ mqd->cp_mqd_base_addr_hi);
+
+ /* set MQD vmid to 0 */
+ WREG32_SOC15_RLC(GC, GET_INST(GC, xcc_id), regCP_MQD_CONTROL,
+ mqd->cp_mqd_control);
+
+ /* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
+ WREG32_SOC15_RLC(GC, GET_INST(GC, xcc_id), regCP_HQD_PQ_BASE,
+ mqd->cp_hqd_pq_base_lo);
+ WREG32_SOC15_RLC(GC, GET_INST(GC, xcc_id), regCP_HQD_PQ_BASE_HI,
+ mqd->cp_hqd_pq_base_hi);
+
+ /* set up the HQD, this is similar to CP_RB0_CNTL */
+ WREG32_SOC15_RLC(GC, GET_INST(GC, xcc_id), regCP_HQD_PQ_CONTROL,
+ mqd->cp_hqd_pq_control);
+
+ /* set the wb address whether it's enabled or not */
+ WREG32_SOC15_RLC(GC, GET_INST(GC, xcc_id), regCP_HQD_PQ_RPTR_REPORT_ADDR,
+ mqd->cp_hqd_pq_rptr_report_addr_lo);
+ WREG32_SOC15_RLC(GC, GET_INST(GC, xcc_id), regCP_HQD_PQ_RPTR_REPORT_ADDR_HI,
+ mqd->cp_hqd_pq_rptr_report_addr_hi);
+
+ /* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */
+ WREG32_SOC15_RLC(GC, GET_INST(GC, xcc_id), regCP_HQD_PQ_WPTR_POLL_ADDR,
+ mqd->cp_hqd_pq_wptr_poll_addr_lo);
+ WREG32_SOC15_RLC(GC, GET_INST(GC, xcc_id), regCP_HQD_PQ_WPTR_POLL_ADDR_HI,
+ mqd->cp_hqd_pq_wptr_poll_addr_hi);
+
+ /* enable the doorbell if requested */
+ if (ring->use_doorbell) {
+ WREG32_SOC15(
+ GC, GET_INST(GC, xcc_id),
+ regCP_MEC_DOORBELL_RANGE_LOWER,
+ ((adev->doorbell_index.kiq +
+ xcc_id * adev->doorbell_index.xcc_doorbell_range) *
+ 2) << 2);
+ WREG32_SOC15(
+ GC, GET_INST(GC, xcc_id),
+ regCP_MEC_DOORBELL_RANGE_UPPER,
+ ((adev->doorbell_index.userqueue_end +
+ xcc_id * adev->doorbell_index.xcc_doorbell_range) *
+ 2) << 2);
+ }
+
+ WREG32_SOC15_RLC(GC, GET_INST(GC, xcc_id), regCP_HQD_PQ_DOORBELL_CONTROL,
+ mqd->cp_hqd_pq_doorbell_control);
+
+ /* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */
+ WREG32_SOC15_RLC(GC, GET_INST(GC, xcc_id), regCP_HQD_PQ_WPTR_LO,
+ mqd->cp_hqd_pq_wptr_lo);
+ WREG32_SOC15_RLC(GC, GET_INST(GC, xcc_id), regCP_HQD_PQ_WPTR_HI,
+ mqd->cp_hqd_pq_wptr_hi);
+
+ /* set the vmid for the queue */
+ WREG32_SOC15_RLC(GC, GET_INST(GC, xcc_id), regCP_HQD_VMID, mqd->cp_hqd_vmid);
+
+ WREG32_SOC15_RLC(GC, GET_INST(GC, xcc_id), regCP_HQD_PERSISTENT_STATE,
+ mqd->cp_hqd_persistent_state);
+
+ /* activate the queue */
+ WREG32_SOC15_RLC(GC, GET_INST(GC, xcc_id), regCP_HQD_ACTIVE,
+ mqd->cp_hqd_active);
+
+ if (ring->use_doorbell)
+ WREG32_FIELD15_PREREG(GC, GET_INST(GC, xcc_id), CP_PQ_STATUS, DOORBELL_ENABLE, 1);
+
+ return 0;
+}
+
+static int gfx_v9_4_3_xcc_q_fini_register(struct amdgpu_ring *ring,
+ int xcc_id)
+{
+ struct amdgpu_device *adev = ring->adev;
+ int j;
+
+ /* disable the queue if it's active */
+ if (RREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_HQD_ACTIVE) & 1) {
+
+ WREG32_SOC15_RLC(GC, GET_INST(GC, xcc_id), regCP_HQD_DEQUEUE_REQUEST, 1);
+
+ for (j = 0; j < adev->usec_timeout; j++) {
+ if (!(RREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_HQD_ACTIVE) & 1))
+ break;
+ udelay(1);
+ }
+
+ if (j == AMDGPU_MAX_USEC_TIMEOUT) {
+ DRM_DEBUG("%s dequeue request failed.\n", ring->name);
+
+ /* Manual disable if dequeue request times out */
+ WREG32_SOC15_RLC(GC, GET_INST(GC, xcc_id), regCP_HQD_ACTIVE, 0);
+ }
+
+ WREG32_SOC15_RLC(GC, GET_INST(GC, xcc_id), regCP_HQD_DEQUEUE_REQUEST,
+ 0);
+ }
+
+ WREG32_SOC15_RLC(GC, GET_INST(GC, xcc_id), regCP_HQD_IQ_TIMER, 0);
+ WREG32_SOC15_RLC(GC, GET_INST(GC, xcc_id), regCP_HQD_IB_CONTROL, 0);
+ WREG32_SOC15_RLC(GC, GET_INST(GC, xcc_id), regCP_HQD_PERSISTENT_STATE, CP_HQD_PERSISTENT_STATE_DEFAULT);
+ WREG32_SOC15_RLC(GC, GET_INST(GC, xcc_id), regCP_HQD_PQ_DOORBELL_CONTROL, 0x40000000);
+ WREG32_SOC15_RLC(GC, GET_INST(GC, xcc_id), regCP_HQD_PQ_DOORBELL_CONTROL, 0);
+ WREG32_SOC15_RLC(GC, GET_INST(GC, xcc_id), regCP_HQD_PQ_RPTR, 0);
+ WREG32_SOC15_RLC(GC, GET_INST(GC, xcc_id), regCP_HQD_PQ_WPTR_HI, 0);
+ WREG32_SOC15_RLC(GC, GET_INST(GC, xcc_id), regCP_HQD_PQ_WPTR_LO, 0);
+
+ return 0;
+}
+
+static int gfx_v9_4_3_xcc_kiq_init_queue(struct amdgpu_ring *ring, int xcc_id)
+{
+ struct amdgpu_device *adev = ring->adev;
+ struct v9_mqd *mqd = ring->mqd_ptr;
+ struct v9_mqd *tmp_mqd;
+
+ gfx_v9_4_3_xcc_kiq_setting(ring, xcc_id);
+
+ /* GPU could be in bad state during probe, driver trigger the reset
+ * after load the SMU, in this case , the mqd is not be initialized.
+ * driver need to re-init the mqd.
+ * check mqd->cp_hqd_pq_control since this value should not be 0
+ */
+ tmp_mqd = (struct v9_mqd *)adev->gfx.kiq[xcc_id].mqd_backup;
+ if (amdgpu_in_reset(adev) && tmp_mqd->cp_hqd_pq_control) {
+ /* for GPU_RESET case , reset MQD to a clean status */
+ if (adev->gfx.kiq[xcc_id].mqd_backup)
+ memcpy(mqd, adev->gfx.kiq[xcc_id].mqd_backup, sizeof(struct v9_mqd_allocation));
+
+ /* reset ring buffer */
+ ring->wptr = 0;
+ amdgpu_ring_clear_ring(ring);
+ mutex_lock(&adev->srbm_mutex);
+ soc15_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0, GET_INST(GC, xcc_id));
+ gfx_v9_4_3_xcc_kiq_init_register(ring, xcc_id);
+ soc15_grbm_select(adev, 0, 0, 0, 0, GET_INST(GC, xcc_id));
+ mutex_unlock(&adev->srbm_mutex);
+ } else {
+ memset((void *)mqd, 0, sizeof(struct v9_mqd_allocation));
+ ((struct v9_mqd_allocation *)mqd)->dynamic_cu_mask = 0xFFFFFFFF;
+ ((struct v9_mqd_allocation *)mqd)->dynamic_rb_mask = 0xFFFFFFFF;
+ mutex_lock(&adev->srbm_mutex);
+ if (amdgpu_sriov_vf(adev) && adev->in_suspend)
+ amdgpu_ring_clear_ring(ring);
+ soc15_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0, GET_INST(GC, xcc_id));
+ gfx_v9_4_3_xcc_mqd_init(ring, xcc_id);
+ gfx_v9_4_3_xcc_kiq_init_register(ring, xcc_id);
+ soc15_grbm_select(adev, 0, 0, 0, 0, GET_INST(GC, xcc_id));
+ mutex_unlock(&adev->srbm_mutex);
+
+ if (adev->gfx.kiq[xcc_id].mqd_backup)
+ memcpy(adev->gfx.kiq[xcc_id].mqd_backup, mqd, sizeof(struct v9_mqd_allocation));
+ }
+
+ return 0;
+}
+
+static void gfx_v9_4_3_xcc_kcq_init_queue(struct amdgpu_ring *ring, int xcc_id,
+ bool restore)
+{
+ struct amdgpu_device *adev = ring->adev;
+ struct v9_mqd *mqd = ring->mqd_ptr;
+ int mqd_idx = ring - &adev->gfx.compute_ring[0];
+ struct v9_mqd *tmp_mqd;
+
+ /* Same as above kiq init, driver need to re-init the mqd if mqd->cp_hqd_pq_control
+ * is not be initialized before
+ */
+ tmp_mqd = (struct v9_mqd *)adev->gfx.mec.mqd_backup[mqd_idx];
+
+ if (!restore && (!tmp_mqd->cp_hqd_pq_control ||
+ (!amdgpu_in_reset(adev) && !adev->in_suspend))) {
+ memset((void *)mqd, 0, sizeof(struct v9_mqd_allocation));
+ ((struct v9_mqd_allocation *)mqd)->dynamic_cu_mask = 0xFFFFFFFF;
+ ((struct v9_mqd_allocation *)mqd)->dynamic_rb_mask = 0xFFFFFFFF;
+ mutex_lock(&adev->srbm_mutex);
+ soc15_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0, GET_INST(GC, xcc_id));
+ gfx_v9_4_3_xcc_mqd_init(ring, xcc_id);
+ soc15_grbm_select(adev, 0, 0, 0, 0, GET_INST(GC, xcc_id));
+ mutex_unlock(&adev->srbm_mutex);
+
+ if (adev->gfx.mec.mqd_backup[mqd_idx])
+ memcpy(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(struct v9_mqd_allocation));
+ } else {
+ /* restore MQD to a clean status */
+ if (adev->gfx.mec.mqd_backup[mqd_idx])
+ memcpy(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(struct v9_mqd_allocation));
+ /* reset ring buffer */
+ ring->wptr = 0;
+ atomic64_set((atomic64_t *)&adev->wb.wb[ring->wptr_offs], 0);
+ amdgpu_ring_clear_ring(ring);
+ }
+}
+
+static int gfx_v9_4_3_xcc_kcq_fini_register(struct amdgpu_device *adev, int xcc_id)
+{
+ struct amdgpu_ring *ring;
+ int j;
+
+ for (j = 0; j < adev->gfx.num_compute_rings; j++) {
+ ring = &adev->gfx.compute_ring[j + xcc_id * adev->gfx.num_compute_rings];
+ if (!amdgpu_in_reset(adev) && !adev->in_suspend) {
+ mutex_lock(&adev->srbm_mutex);
+ soc15_grbm_select(adev, ring->me,
+ ring->pipe,
+ ring->queue, 0, GET_INST(GC, xcc_id));
+ gfx_v9_4_3_xcc_q_fini_register(ring, xcc_id);
+ soc15_grbm_select(adev, 0, 0, 0, 0, GET_INST(GC, xcc_id));
+ mutex_unlock(&adev->srbm_mutex);
+ }
+ }
+
+ return 0;
+}
+
+static int gfx_v9_4_3_xcc_kiq_resume(struct amdgpu_device *adev, int xcc_id)
+{
+ gfx_v9_4_3_xcc_kiq_init_queue(&adev->gfx.kiq[xcc_id].ring, xcc_id);
+ return 0;
+}
+
+static int gfx_v9_4_3_xcc_kcq_resume(struct amdgpu_device *adev, int xcc_id)
+{
+ struct amdgpu_ring *ring;
+ int i;
+
+ gfx_v9_4_3_xcc_cp_compute_enable(adev, true, xcc_id);
+
+ for (i = 0; i < adev->gfx.num_compute_rings; i++) {
+ ring = &adev->gfx.compute_ring[i + xcc_id *
+ adev->gfx.num_compute_rings];
+
+ gfx_v9_4_3_xcc_kcq_init_queue(ring, xcc_id, false);
+ }
+
+ return amdgpu_gfx_enable_kcq(adev, xcc_id);
+}
+
+static int gfx_v9_4_3_xcc_cp_resume(struct amdgpu_device *adev, int xcc_id)
+{
+ struct amdgpu_ring *ring;
+ int r, j;
+
+ gfx_v9_4_3_xcc_enable_gui_idle_interrupt(adev, false, xcc_id);
+
+ if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) {
+ gfx_v9_4_3_xcc_disable_gpa_mode(adev, xcc_id);
+
+ r = gfx_v9_4_3_xcc_cp_compute_load_microcode(adev, xcc_id);
+ if (r)
+ return r;
+ } else {
+ gfx_v9_4_3_xcc_cp_compute_enable(adev, false, xcc_id);
+ }
+
+ r = gfx_v9_4_3_xcc_kiq_resume(adev, xcc_id);
+ if (r)
+ return r;
+
+ r = gfx_v9_4_3_xcc_kcq_resume(adev, xcc_id);
+ if (r)
+ return r;
+
+ for (j = 0; j < adev->gfx.num_compute_rings; j++) {
+ ring = &adev->gfx.compute_ring
+ [j + xcc_id * adev->gfx.num_compute_rings];
+ r = amdgpu_ring_test_helper(ring);
+ if (r)
+ return r;
+ }
+
+ gfx_v9_4_3_xcc_enable_gui_idle_interrupt(adev, true, xcc_id);
+
+ return 0;
+}
+
+static int gfx_v9_4_3_cp_resume(struct amdgpu_device *adev)
+{
+ int r = 0, i, num_xcc, num_xcp, num_xcc_per_xcp;
+
+ num_xcc = NUM_XCC(adev->gfx.xcc_mask);
+ if (amdgpu_sriov_vf(adev)) {
+ enum amdgpu_gfx_partition mode;
+
+ mode = amdgpu_xcp_query_partition_mode(adev->xcp_mgr,
+ AMDGPU_XCP_FL_NONE);
+ if (mode == AMDGPU_UNKNOWN_COMPUTE_PARTITION_MODE)
+ return -EINVAL;
+ num_xcc_per_xcp = gfx_v9_4_3_get_xccs_per_xcp(adev);
+ adev->gfx.num_xcc_per_xcp = num_xcc_per_xcp;
+ num_xcp = num_xcc / num_xcc_per_xcp;
+ r = amdgpu_xcp_init(adev->xcp_mgr, num_xcp, mode);
+
+ } else {
+ if (adev->in_suspend)
+ amdgpu_xcp_restore_partition_mode(adev->xcp_mgr);
+ else if (amdgpu_xcp_query_partition_mode(adev->xcp_mgr,
+ AMDGPU_XCP_FL_NONE) ==
+ AMDGPU_UNKNOWN_COMPUTE_PARTITION_MODE)
+ r = amdgpu_xcp_switch_partition_mode(
+ adev->xcp_mgr, amdgpu_user_partt_mode);
+ }
+ if (r)
+ return r;
+
+ for (i = 0; i < num_xcc; i++) {
+ r = gfx_v9_4_3_xcc_cp_resume(adev, i);
+ if (r)
+ return r;
+ }
+
+ return 0;
+}
+
+static void gfx_v9_4_3_xcc_fini(struct amdgpu_device *adev, int xcc_id)
+{
+ if (amdgpu_gfx_disable_kcq(adev, xcc_id))
+ DRM_ERROR("XCD %d KCQ disable failed\n", xcc_id);
+
+ if (amdgpu_sriov_vf(adev)) {
+ /* must disable polling for SRIOV when hw finished, otherwise
+ * CPC engine may still keep fetching WB address which is already
+ * invalid after sw finished and trigger DMAR reading error in
+ * hypervisor side.
+ */
+ WREG32_FIELD15_PREREG(GC, GET_INST(GC, xcc_id), CP_PQ_WPTR_POLL_CNTL, EN, 0);
+ return;
+ }
+
+ /* Use deinitialize sequence from CAIL when unbinding device
+ * from driver, otherwise KIQ is hanging when binding back
+ */
+ if (!amdgpu_in_reset(adev) && !adev->in_suspend) {
+ mutex_lock(&adev->srbm_mutex);
+ soc15_grbm_select(adev, adev->gfx.kiq[xcc_id].ring.me,
+ adev->gfx.kiq[xcc_id].ring.pipe,
+ adev->gfx.kiq[xcc_id].ring.queue, 0,
+ GET_INST(GC, xcc_id));
+ gfx_v9_4_3_xcc_q_fini_register(&adev->gfx.kiq[xcc_id].ring,
+ xcc_id);
+ soc15_grbm_select(adev, 0, 0, 0, 0, GET_INST(GC, xcc_id));
+ mutex_unlock(&adev->srbm_mutex);
+ }
+
+ gfx_v9_4_3_xcc_kcq_fini_register(adev, xcc_id);
+ gfx_v9_4_3_xcc_cp_compute_enable(adev, false, xcc_id);
+}
+
+static int gfx_v9_4_3_hw_init(struct amdgpu_ip_block *ip_block)
+{
+ int r;
+ struct amdgpu_device *adev = ip_block->adev;
+
+ amdgpu_gfx_cleaner_shader_init(adev, adev->gfx.cleaner_shader_size,
+ adev->gfx.cleaner_shader_ptr);
+
+ if (!amdgpu_sriov_vf(adev))
+ gfx_v9_4_3_init_golden_registers(adev);
+
+ gfx_v9_4_3_constants_init(adev);
+
+ r = adev->gfx.rlc.funcs->resume(adev);
+ if (r)
+ return r;
+
+ r = gfx_v9_4_3_cp_resume(adev);
+ if (r)
+ return r;
+
+ return r;
+}
+
+static int gfx_v9_4_3_hw_fini(struct amdgpu_ip_block *ip_block)
+{
+ struct amdgpu_device *adev = ip_block->adev;
+ int i, num_xcc;
+
+ amdgpu_irq_put(adev, &adev->gfx.priv_reg_irq, 0);
+ amdgpu_irq_put(adev, &adev->gfx.priv_inst_irq, 0);
+ amdgpu_irq_put(adev, &adev->gfx.bad_op_irq, 0);
+
+ num_xcc = NUM_XCC(adev->gfx.xcc_mask);
+ for (i = 0; i < num_xcc; i++) {
+ gfx_v9_4_3_xcc_fini(adev, i);
+ }
+
+ return 0;
+}
+
+static int gfx_v9_4_3_suspend(struct amdgpu_ip_block *ip_block)
+{
+ return gfx_v9_4_3_hw_fini(ip_block);
+}
+
+static int gfx_v9_4_3_resume(struct amdgpu_ip_block *ip_block)
+{
+ return gfx_v9_4_3_hw_init(ip_block);
+}
+
+static bool gfx_v9_4_3_is_idle(struct amdgpu_ip_block *ip_block)
+{
+ struct amdgpu_device *adev = ip_block->adev;
+ int i, num_xcc;
+
+ num_xcc = NUM_XCC(adev->gfx.xcc_mask);
+ for (i = 0; i < num_xcc; i++) {
+ if (REG_GET_FIELD(RREG32_SOC15(GC, GET_INST(GC, i), regGRBM_STATUS),
+ GRBM_STATUS, GUI_ACTIVE))
+ return false;
+ }
+ return true;
+}
+
+static int gfx_v9_4_3_wait_for_idle(struct amdgpu_ip_block *ip_block)
+{
+ unsigned i;
+ struct amdgpu_device *adev = ip_block->adev;
+
+ for (i = 0; i < adev->usec_timeout; i++) {
+ if (gfx_v9_4_3_is_idle(ip_block))
+ return 0;
+ udelay(1);
+ }
+ return -ETIMEDOUT;
+}
+
+static int gfx_v9_4_3_soft_reset(struct amdgpu_ip_block *ip_block)
+{
+ u32 grbm_soft_reset = 0;
+ u32 tmp;
+ struct amdgpu_device *adev = ip_block->adev;
+
+ /* GRBM_STATUS */
+ tmp = RREG32_SOC15(GC, GET_INST(GC, 0), regGRBM_STATUS);
+ if (tmp & (GRBM_STATUS__PA_BUSY_MASK | GRBM_STATUS__SC_BUSY_MASK |
+ GRBM_STATUS__BCI_BUSY_MASK | GRBM_STATUS__SX_BUSY_MASK |
+ GRBM_STATUS__TA_BUSY_MASK | GRBM_STATUS__VGT_BUSY_MASK |
+ GRBM_STATUS__DB_BUSY_MASK | GRBM_STATUS__CB_BUSY_MASK |
+ GRBM_STATUS__GDS_BUSY_MASK | GRBM_STATUS__SPI_BUSY_MASK |
+ GRBM_STATUS__IA_BUSY_MASK | GRBM_STATUS__IA_BUSY_NO_DMA_MASK)) {
+ grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
+ GRBM_SOFT_RESET, SOFT_RESET_CP, 1);
+ grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
+ GRBM_SOFT_RESET, SOFT_RESET_GFX, 1);
+ }
+
+ if (tmp & (GRBM_STATUS__CP_BUSY_MASK | GRBM_STATUS__CP_COHERENCY_BUSY_MASK)) {
+ grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
+ GRBM_SOFT_RESET, SOFT_RESET_CP, 1);
+ }
+
+ /* GRBM_STATUS2 */
+ tmp = RREG32_SOC15(GC, GET_INST(GC, 0), regGRBM_STATUS2);
+ if (REG_GET_FIELD(tmp, GRBM_STATUS2, RLC_BUSY))
+ grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
+ GRBM_SOFT_RESET, SOFT_RESET_RLC, 1);
+
+
+ if (grbm_soft_reset) {
+ /* stop the rlc */
+ adev->gfx.rlc.funcs->stop(adev);
+
+ /* Disable MEC parsing/prefetching */
+ gfx_v9_4_3_xcc_cp_compute_enable(adev, false, 0);
+
+ tmp = RREG32_SOC15(GC, GET_INST(GC, 0), regGRBM_SOFT_RESET);
+ tmp |= grbm_soft_reset;
+ dev_info(adev->dev, "GRBM_SOFT_RESET=0x%08X\n", tmp);
+ WREG32_SOC15(GC, GET_INST(GC, 0), regGRBM_SOFT_RESET, tmp);
+ tmp = RREG32_SOC15(GC, GET_INST(GC, 0), regGRBM_SOFT_RESET);
+
+ udelay(50);
+
+ tmp &= ~grbm_soft_reset;
+ WREG32_SOC15(GC, GET_INST(GC, 0), regGRBM_SOFT_RESET, tmp);
+ tmp = RREG32_SOC15(GC, GET_INST(GC, 0), regGRBM_SOFT_RESET);
+
+ /* Wait a little for things to settle down */
+ udelay(50);
+ }
+ return 0;
+}
+
+static void gfx_v9_4_3_ring_emit_gds_switch(struct amdgpu_ring *ring,
+ uint32_t vmid,
+ uint32_t gds_base, uint32_t gds_size,
+ uint32_t gws_base, uint32_t gws_size,
+ uint32_t oa_base, uint32_t oa_size)
+{
+ struct amdgpu_device *adev = ring->adev;
+
+ /* GDS Base */
+ gfx_v9_4_3_write_data_to_reg(ring, 0, false,
+ SOC15_REG_OFFSET(GC, GET_INST(GC, 0), regGDS_VMID0_BASE) + 2 * vmid,
+ gds_base);
+
+ /* GDS Size */
+ gfx_v9_4_3_write_data_to_reg(ring, 0, false,
+ SOC15_REG_OFFSET(GC, GET_INST(GC, 0), regGDS_VMID0_SIZE) + 2 * vmid,
+ gds_size);
+
+ /* GWS */
+ gfx_v9_4_3_write_data_to_reg(ring, 0, false,
+ SOC15_REG_OFFSET(GC, GET_INST(GC, 0), regGDS_GWS_VMID0) + vmid,
+ gws_size << GDS_GWS_VMID0__SIZE__SHIFT | gws_base);
+
+ /* OA */
+ gfx_v9_4_3_write_data_to_reg(ring, 0, false,
+ SOC15_REG_OFFSET(GC, GET_INST(GC, 0), regGDS_OA_VMID0) + vmid,
+ (1 << (oa_size + oa_base)) - (1 << oa_base));
+}
+
+static int gfx_v9_4_3_early_init(struct amdgpu_ip_block *ip_block)
+{
+ struct amdgpu_device *adev = ip_block->adev;
+
+ adev->gfx.num_compute_rings = min(amdgpu_gfx_get_num_kcq(adev),
+ AMDGPU_MAX_COMPUTE_RINGS);
+ gfx_v9_4_3_set_kiq_pm4_funcs(adev);
+ gfx_v9_4_3_set_ring_funcs(adev);
+ gfx_v9_4_3_set_irq_funcs(adev);
+ gfx_v9_4_3_set_gds_init(adev);
+ gfx_v9_4_3_set_rlc_funcs(adev);
+
+ /* init rlcg reg access ctrl */
+ gfx_v9_4_3_init_rlcg_reg_access_ctrl(adev);
+
+ return gfx_v9_4_3_init_microcode(adev);
+}
+
+static int gfx_v9_4_3_late_init(struct amdgpu_ip_block *ip_block)
+{
+ struct amdgpu_device *adev = ip_block->adev;
+ int r;
+
+ r = amdgpu_irq_get(adev, &adev->gfx.priv_reg_irq, 0);
+ if (r)
+ return r;
+
+ r = amdgpu_irq_get(adev, &adev->gfx.priv_inst_irq, 0);
+ if (r)
+ return r;
+
+ r = amdgpu_irq_get(adev, &adev->gfx.bad_op_irq, 0);
+ if (r)
+ return r;
+
+ if (adev->gfx.ras &&
+ adev->gfx.ras->enable_watchdog_timer)
+ adev->gfx.ras->enable_watchdog_timer(adev);
+
+ return 0;
+}
+
+static void gfx_v9_4_3_xcc_update_sram_fgcg(struct amdgpu_device *adev,
+ bool enable, int xcc_id)
+{
+ uint32_t def, data;
+
+ if (!(adev->cg_flags & AMD_CG_SUPPORT_GFX_FGCG))
+ return;
+
+ def = data = RREG32_SOC15(GC, GET_INST(GC, xcc_id),
+ regRLC_CGTT_MGCG_OVERRIDE);
+
+ if (enable)
+ data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_FGCG_OVERRIDE_MASK;
+ else
+ data |= RLC_CGTT_MGCG_OVERRIDE__GFXIP_FGCG_OVERRIDE_MASK;
+
+ if (def != data)
+ WREG32_SOC15(GC, GET_INST(GC, xcc_id),
+ regRLC_CGTT_MGCG_OVERRIDE, data);
+
+}
+
+static void gfx_v9_4_3_xcc_update_repeater_fgcg(struct amdgpu_device *adev,
+ bool enable, int xcc_id)
+{
+ uint32_t def, data;
+
+ if (!(adev->cg_flags & AMD_CG_SUPPORT_REPEATER_FGCG))
+ return;
+
+ def = data = RREG32_SOC15(GC, GET_INST(GC, xcc_id),
+ regRLC_CGTT_MGCG_OVERRIDE);
+
+ if (enable)
+ data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_REP_FGCG_OVERRIDE_MASK;
+ else
+ data |= RLC_CGTT_MGCG_OVERRIDE__GFXIP_REP_FGCG_OVERRIDE_MASK;
+
+ if (def != data)
+ WREG32_SOC15(GC, GET_INST(GC, xcc_id),
+ regRLC_CGTT_MGCG_OVERRIDE, data);
+}
+
+static void
+gfx_v9_4_3_xcc_update_medium_grain_clock_gating(struct amdgpu_device *adev,
+ bool enable, int xcc_id)
+{
+ uint32_t data, def;
+
+ /* It is disabled by HW by default */
+ if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG)) {
+ /* 1 - RLC_CGTT_MGCG_OVERRIDE */
+ def = data = RREG32_SOC15(GC, GET_INST(GC, xcc_id), regRLC_CGTT_MGCG_OVERRIDE);
+
+ data &= ~(RLC_CGTT_MGCG_OVERRIDE__GRBM_CGTT_SCLK_OVERRIDE_MASK |
+ RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGCG_OVERRIDE_MASK |
+ RLC_CGTT_MGCG_OVERRIDE__RLC_CGTT_SCLK_OVERRIDE_MASK |
+ RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGLS_OVERRIDE_MASK);
+
+ if (def != data)
+ WREG32_SOC15(GC, GET_INST(GC, xcc_id), regRLC_CGTT_MGCG_OVERRIDE, data);
+
+ /* MGLS is a global flag to control all MGLS in GFX */
+ if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) {
+ /* 2 - RLC memory Light sleep */
+ if (adev->cg_flags & AMD_CG_SUPPORT_GFX_RLC_LS) {
+ def = data = RREG32_SOC15(GC, GET_INST(GC, xcc_id), regRLC_MEM_SLP_CNTL);
+ data |= RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK;
+ if (def != data)
+ WREG32_SOC15(GC, GET_INST(GC, xcc_id), regRLC_MEM_SLP_CNTL, data);
+ }
+ /* 3 - CP memory Light sleep */
+ if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CP_LS) {
+ def = data = RREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_MEM_SLP_CNTL);
+ data |= CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK;
+ if (def != data)
+ WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_MEM_SLP_CNTL, data);
+ }
+ }
+ } else {
+ /* 1 - MGCG_OVERRIDE */
+ def = data = RREG32_SOC15(GC, GET_INST(GC, xcc_id), regRLC_CGTT_MGCG_OVERRIDE);
+
+ data |= (RLC_CGTT_MGCG_OVERRIDE__RLC_CGTT_SCLK_OVERRIDE_MASK |
+ RLC_CGTT_MGCG_OVERRIDE__GRBM_CGTT_SCLK_OVERRIDE_MASK |
+ RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGCG_OVERRIDE_MASK |
+ RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGLS_OVERRIDE_MASK);
+
+ if (def != data)
+ WREG32_SOC15(GC, GET_INST(GC, xcc_id), regRLC_CGTT_MGCG_OVERRIDE, data);
+
+ /* 2 - disable MGLS in RLC */
+ data = RREG32_SOC15(GC, GET_INST(GC, xcc_id), regRLC_MEM_SLP_CNTL);
+ if (data & RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK) {
+ data &= ~RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK;
+ WREG32_SOC15(GC, GET_INST(GC, xcc_id), regRLC_MEM_SLP_CNTL, data);
+ }
+
+ /* 3 - disable MGLS in CP */
+ data = RREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_MEM_SLP_CNTL);
+ if (data & CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK) {
+ data &= ~CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK;
+ WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_MEM_SLP_CNTL, data);
+ }
+ }
+
+}
+
+static void
+gfx_v9_4_3_xcc_update_coarse_grain_clock_gating(struct amdgpu_device *adev,
+ bool enable, int xcc_id)
+{
+ uint32_t def, data;
+
+ if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG)) {
+
+ def = data = RREG32_SOC15(GC, GET_INST(GC, xcc_id), regRLC_CGTT_MGCG_OVERRIDE);
+ /* unset CGCG override */
+ data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_CGCG_OVERRIDE_MASK;
+ if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS)
+ data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_CGLS_OVERRIDE_MASK;
+ else
+ data |= RLC_CGTT_MGCG_OVERRIDE__GFXIP_CGLS_OVERRIDE_MASK;
+ /* update CGCG and CGLS override bits */
+ if (def != data)
+ WREG32_SOC15(GC, GET_INST(GC, xcc_id), regRLC_CGTT_MGCG_OVERRIDE, data);
+
+ /* CGCG Hysteresis: 400us */
+ def = RREG32_SOC15(GC, GET_INST(GC, xcc_id), regRLC_CGCG_CGLS_CTRL);
+
+ data = (0x2710
+ << RLC_CGCG_CGLS_CTRL__CGCG_GFX_IDLE_THRESHOLD__SHIFT) |
+ RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK;
+ if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS)
+ data |= (0x000F << RLC_CGCG_CGLS_CTRL__CGLS_REP_COMPANSAT_DELAY__SHIFT) |
+ RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK;
+ if (def != data)
+ WREG32_SOC15(GC, GET_INST(GC, xcc_id), regRLC_CGCG_CGLS_CTRL, data);
+
+ /* set IDLE_POLL_COUNT(0x33450100)*/
+ def = RREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_RB_WPTR_POLL_CNTL);
+ data = (0x0100 << CP_RB_WPTR_POLL_CNTL__POLL_FREQUENCY__SHIFT) |
+ (0x3345 << CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT__SHIFT);
+ if (def != data)
+ WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_RB_WPTR_POLL_CNTL, data);
+ } else {
+ def = data = RREG32_SOC15(GC, GET_INST(GC, xcc_id), regRLC_CGCG_CGLS_CTRL);
+ /* reset CGCG/CGLS bits */
+ data &= ~(RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK | RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK);
+ /* disable cgcg and cgls in FSM */
+ if (def != data)
+ WREG32_SOC15(GC, GET_INST(GC, xcc_id), regRLC_CGCG_CGLS_CTRL, data);
+ }
+
+}
+
+static int gfx_v9_4_3_xcc_update_gfx_clock_gating(struct amdgpu_device *adev,
+ bool enable, int xcc_id)
+{
+ amdgpu_gfx_rlc_enter_safe_mode(adev, xcc_id);
+
+ if (enable) {
+ /* FGCG */
+ gfx_v9_4_3_xcc_update_sram_fgcg(adev, enable, xcc_id);
+ gfx_v9_4_3_xcc_update_repeater_fgcg(adev, enable, xcc_id);
+
+ /* CGCG/CGLS should be enabled after MGCG/MGLS
+ * === MGCG + MGLS ===
+ */
+ gfx_v9_4_3_xcc_update_medium_grain_clock_gating(adev, enable,
+ xcc_id);
+ /* === CGCG + CGLS === */
+ gfx_v9_4_3_xcc_update_coarse_grain_clock_gating(adev, enable,
+ xcc_id);
+ } else {
+ /* CGCG/CGLS should be disabled before MGCG/MGLS
+ * === CGCG + CGLS ===
+ */
+ gfx_v9_4_3_xcc_update_coarse_grain_clock_gating(adev, enable,
+ xcc_id);
+ /* === MGCG + MGLS === */
+ gfx_v9_4_3_xcc_update_medium_grain_clock_gating(adev, enable,
+ xcc_id);
+
+ /* FGCG */
+ gfx_v9_4_3_xcc_update_sram_fgcg(adev, enable, xcc_id);
+ gfx_v9_4_3_xcc_update_repeater_fgcg(adev, enable, xcc_id);
+ }
+
+ amdgpu_gfx_rlc_exit_safe_mode(adev, xcc_id);
+
+ return 0;
+}
+
+static const struct amdgpu_rlc_funcs gfx_v9_4_3_rlc_funcs = {
+ .is_rlc_enabled = gfx_v9_4_3_is_rlc_enabled,
+ .set_safe_mode = gfx_v9_4_3_xcc_set_safe_mode,
+ .unset_safe_mode = gfx_v9_4_3_xcc_unset_safe_mode,
+ .init = gfx_v9_4_3_rlc_init,
+ .resume = gfx_v9_4_3_rlc_resume,
+ .stop = gfx_v9_4_3_rlc_stop,
+ .reset = gfx_v9_4_3_rlc_reset,
+ .start = gfx_v9_4_3_rlc_start,
+ .update_spm_vmid = gfx_v9_4_3_update_spm_vmid,
+ .is_rlcg_access_range = gfx_v9_4_3_is_rlcg_access_range,
+};
+
+static int gfx_v9_4_3_set_powergating_state(struct amdgpu_ip_block *ip_block,
+ enum amd_powergating_state state)
+{
+ return 0;
+}
+
+static int gfx_v9_4_3_set_clockgating_state(struct amdgpu_ip_block *ip_block,
+ enum amd_clockgating_state state)
+{
+ struct amdgpu_device *adev = ip_block->adev;
+ int i, num_xcc;
+
+ if (amdgpu_sriov_vf(adev))
+ return 0;
+
+ num_xcc = NUM_XCC(adev->gfx.xcc_mask);
+ for (i = 0; i < num_xcc; i++)
+ gfx_v9_4_3_xcc_update_gfx_clock_gating(
+ adev, state == AMD_CG_STATE_GATE, i);
+
+ return 0;
+}
+
+static void gfx_v9_4_3_get_clockgating_state(struct amdgpu_ip_block *ip_block, u64 *flags)
+{
+ struct amdgpu_device *adev = ip_block->adev;
+ int data;
+
+ if (amdgpu_sriov_vf(adev))
+ *flags = 0;
+
+ /* AMD_CG_SUPPORT_GFX_MGCG */
+ data = RREG32_KIQ(SOC15_REG_OFFSET(GC, GET_INST(GC, 0), regRLC_CGTT_MGCG_OVERRIDE));
+ if (!(data & RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGCG_OVERRIDE_MASK))
+ *flags |= AMD_CG_SUPPORT_GFX_MGCG;
+
+ /* AMD_CG_SUPPORT_GFX_CGCG */
+ data = RREG32_KIQ(SOC15_REG_OFFSET(GC, GET_INST(GC, 0), regRLC_CGCG_CGLS_CTRL));
+ if (data & RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK)
+ *flags |= AMD_CG_SUPPORT_GFX_CGCG;
+
+ /* AMD_CG_SUPPORT_GFX_CGLS */
+ if (data & RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK)
+ *flags |= AMD_CG_SUPPORT_GFX_CGLS;
+
+ /* AMD_CG_SUPPORT_GFX_RLC_LS */
+ data = RREG32_KIQ(SOC15_REG_OFFSET(GC, GET_INST(GC, 0), regRLC_MEM_SLP_CNTL));
+ if (data & RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK)
+ *flags |= AMD_CG_SUPPORT_GFX_RLC_LS | AMD_CG_SUPPORT_GFX_MGLS;
+
+ /* AMD_CG_SUPPORT_GFX_CP_LS */
+ data = RREG32_KIQ(SOC15_REG_OFFSET(GC, GET_INST(GC, 0), regCP_MEM_SLP_CNTL));
+ if (data & CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK)
+ *flags |= AMD_CG_SUPPORT_GFX_CP_LS | AMD_CG_SUPPORT_GFX_MGLS;
+}
+
+static void gfx_v9_4_3_ring_emit_hdp_flush(struct amdgpu_ring *ring)
+{
+ struct amdgpu_device *adev = ring->adev;
+ u32 ref_and_mask, reg_mem_engine;
+ const struct nbio_hdp_flush_reg *nbio_hf_reg = adev->nbio.hdp_flush_reg;
+
+ if (ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE) {
+ switch (ring->me) {
+ case 1:
+ ref_and_mask = nbio_hf_reg->ref_and_mask_cp2 << ring->pipe;
+ break;
+ case 2:
+ ref_and_mask = nbio_hf_reg->ref_and_mask_cp6 << ring->pipe;
+ break;
+ default:
+ return;
+ }
+ reg_mem_engine = 0;
+ } else {
+ ref_and_mask = nbio_hf_reg->ref_and_mask_cp0;
+ reg_mem_engine = 1; /* pfp */
+ }
+
+ gfx_v9_4_3_wait_reg_mem(ring, reg_mem_engine, 0, 1,
+ adev->nbio.funcs->get_hdp_flush_req_offset(adev),
+ adev->nbio.funcs->get_hdp_flush_done_offset(adev),
+ ref_and_mask, ref_and_mask, 0x20);
+}
+
+static void gfx_v9_4_3_ring_emit_ib_compute(struct amdgpu_ring *ring,
+ struct amdgpu_job *job,
+ struct amdgpu_ib *ib,
+ uint32_t flags)
+{
+ unsigned vmid = AMDGPU_JOB_GET_VMID(job);
+ u32 control = INDIRECT_BUFFER_VALID | ib->length_dw | (vmid << 24);
+
+ /* Currently, there is a high possibility to get wave ID mismatch
+ * between ME and GDS, leading to a hw deadlock, because ME generates
+ * different wave IDs than the GDS expects. This situation happens
+ * randomly when at least 5 compute pipes use GDS ordered append.
+ * The wave IDs generated by ME are also wrong after suspend/resume.
+ * Those are probably bugs somewhere else in the kernel driver.
+ *
+ * Writing GDS_COMPUTE_MAX_WAVE_ID resets wave ID counters in ME and
+ * GDS to 0 for this ring (me/pipe).
+ */
+ if (ib->flags & AMDGPU_IB_FLAG_RESET_GDS_MAX_WAVE_ID) {
+ amdgpu_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG, 1));
+ amdgpu_ring_write(ring, regGDS_COMPUTE_MAX_WAVE_ID);
+ amdgpu_ring_write(ring, ring->adev->gds.gds_compute_max_wave_id);
+ }
+
+ amdgpu_ring_write(ring, PACKET3(PACKET3_INDIRECT_BUFFER, 2));
+ BUG_ON(ib->gpu_addr & 0x3); /* Dword align */
+ amdgpu_ring_write(ring,
+#ifdef __BIG_ENDIAN
+ (2 << 0) |
+#endif
+ lower_32_bits(ib->gpu_addr));
+ amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr));
+ amdgpu_ring_write(ring, control);
+}
+
+static void gfx_v9_4_3_ring_emit_fence(struct amdgpu_ring *ring, u64 addr,
+ u64 seq, unsigned flags)
+{
+ bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT;
+ bool int_sel = flags & AMDGPU_FENCE_FLAG_INT;
+ bool writeback = flags & AMDGPU_FENCE_FLAG_TC_WB_ONLY;
+
+ /* RELEASE_MEM - flush caches, send int */
+ amdgpu_ring_write(ring, PACKET3(PACKET3_RELEASE_MEM, 6));
+ amdgpu_ring_write(ring, ((writeback ? (EOP_TC_WB_ACTION_EN |
+ EOP_TC_NC_ACTION_EN) :
+ (EOP_TCL1_ACTION_EN |
+ EOP_TC_ACTION_EN |
+ EOP_TC_WB_ACTION_EN |
+ EOP_TC_MD_ACTION_EN)) |
+ EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
+ EVENT_INDEX(5)));
+ amdgpu_ring_write(ring, DATA_SEL(write64bit ? 2 : 1) | INT_SEL(int_sel ? 2 : 0));
+
+ /*
+ * the address should be Qword aligned if 64bit write, Dword
+ * aligned if only send 32bit data low (discard data high)
+ */
+ if (write64bit)
+ BUG_ON(addr & 0x7);
+ else
+ BUG_ON(addr & 0x3);
+ amdgpu_ring_write(ring, lower_32_bits(addr));
+ amdgpu_ring_write(ring, upper_32_bits(addr));
+ amdgpu_ring_write(ring, lower_32_bits(seq));
+ amdgpu_ring_write(ring, upper_32_bits(seq));
+ amdgpu_ring_write(ring, 0);
+}
+
+static void gfx_v9_4_3_ring_emit_pipeline_sync(struct amdgpu_ring *ring)
+{
+ int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX);
+ uint32_t seq = ring->fence_drv.sync_seq;
+ uint64_t addr = ring->fence_drv.gpu_addr;
+
+ gfx_v9_4_3_wait_reg_mem(ring, usepfp, 1, 0,
+ lower_32_bits(addr), upper_32_bits(addr),
+ seq, 0xffffffff, 4);
+}
+
+static void gfx_v9_4_3_ring_emit_vm_flush(struct amdgpu_ring *ring,
+ unsigned vmid, uint64_t pd_addr)
+{
+ amdgpu_gmc_emit_flush_gpu_tlb(ring, vmid, pd_addr);
+}
+
+static u64 gfx_v9_4_3_ring_get_rptr_compute(struct amdgpu_ring *ring)
+{
+ return ring->adev->wb.wb[ring->rptr_offs]; /* gfx9 hardware is 32bit rptr */
+}
+
+static u64 gfx_v9_4_3_ring_get_wptr_compute(struct amdgpu_ring *ring)
+{
+ u64 wptr;
+
+ /* XXX check if swapping is necessary on BE */
+ if (ring->use_doorbell)
+ wptr = atomic64_read((atomic64_t *)&ring->adev->wb.wb[ring->wptr_offs]);
+ else
+ BUG();
+ return wptr;
+}
+
+static void gfx_v9_4_3_ring_set_wptr_compute(struct amdgpu_ring *ring)
+{
+ struct amdgpu_device *adev = ring->adev;
+
+ /* XXX check if swapping is necessary on BE */
+ if (ring->use_doorbell) {
+ atomic64_set((atomic64_t *)&adev->wb.wb[ring->wptr_offs], ring->wptr);
+ WDOORBELL64(ring->doorbell_index, ring->wptr);
+ } else {
+ BUG(); /* only DOORBELL method supported on gfx9 now */
+ }
+}
+
+static void gfx_v9_4_3_ring_emit_fence_kiq(struct amdgpu_ring *ring, u64 addr,
+ u64 seq, unsigned int flags)
+{
+ struct amdgpu_device *adev = ring->adev;
+
+ /* we only allocate 32bit for each seq wb address */
+ BUG_ON(flags & AMDGPU_FENCE_FLAG_64BIT);
+
+ /* write fence seq to the "addr" */
+ amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
+ amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
+ WRITE_DATA_DST_SEL(5) | WR_CONFIRM));
+ amdgpu_ring_write(ring, lower_32_bits(addr));
+ amdgpu_ring_write(ring, upper_32_bits(addr));
+ amdgpu_ring_write(ring, lower_32_bits(seq));
+
+ if (flags & AMDGPU_FENCE_FLAG_INT) {
+ /* set register to trigger INT */
+ amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
+ amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
+ WRITE_DATA_DST_SEL(0) | WR_CONFIRM));
+ amdgpu_ring_write(ring, SOC15_REG_OFFSET(GC, GET_INST(GC, 0), regCPC_INT_STATUS));
+ amdgpu_ring_write(ring, 0);
+ amdgpu_ring_write(ring, 0x20000000); /* src_id is 178 */
+ }
+}
+
+static void gfx_v9_4_3_ring_emit_rreg(struct amdgpu_ring *ring, uint32_t reg,
+ uint32_t reg_val_offs)
+{
+ struct amdgpu_device *adev = ring->adev;
+
+ reg = gfx_v9_4_3_normalize_xcc_reg_offset(reg);
+
+ amdgpu_ring_write(ring, PACKET3(PACKET3_COPY_DATA, 4));
+ amdgpu_ring_write(ring, 0 | /* src: register*/
+ (5 << 8) | /* dst: memory */
+ (1 << 20)); /* write confirm */
+ amdgpu_ring_write(ring, reg);
+ amdgpu_ring_write(ring, 0);
+ amdgpu_ring_write(ring, lower_32_bits(adev->wb.gpu_addr +
+ reg_val_offs * 4));
+ amdgpu_ring_write(ring, upper_32_bits(adev->wb.gpu_addr +
+ reg_val_offs * 4));
+}
+
+static void gfx_v9_4_3_ring_emit_wreg(struct amdgpu_ring *ring, uint32_t reg,
+ uint32_t val)
+{
+ uint32_t cmd = 0;
+
+ reg = gfx_v9_4_3_normalize_xcc_reg_offset(reg);
+
+ switch (ring->funcs->type) {
+ case AMDGPU_RING_TYPE_GFX:
+ cmd = WRITE_DATA_ENGINE_SEL(1) | WR_CONFIRM;
+ break;
+ case AMDGPU_RING_TYPE_KIQ:
+ cmd = (1 << 16); /* no inc addr */
+ break;
+ default:
+ cmd = WR_CONFIRM;
+ break;
+ }
+ amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
+ amdgpu_ring_write(ring, cmd);
+ amdgpu_ring_write(ring, reg);
+ amdgpu_ring_write(ring, 0);
+ amdgpu_ring_write(ring, val);
+}
+
+static void gfx_v9_4_3_ring_emit_reg_wait(struct amdgpu_ring *ring, uint32_t reg,
+ uint32_t val, uint32_t mask)
+{
+ gfx_v9_4_3_wait_reg_mem(ring, 0, 0, 0, reg, 0, val, mask, 0x20);
+}
+
+static void gfx_v9_4_3_ring_emit_reg_write_reg_wait(struct amdgpu_ring *ring,
+ uint32_t reg0, uint32_t reg1,
+ uint32_t ref, uint32_t mask)
+{
+ amdgpu_ring_emit_reg_write_reg_wait_helper(ring, reg0, reg1,
+ ref, mask);
+}
+
+static void gfx_v9_4_3_ring_soft_recovery(struct amdgpu_ring *ring,
+ unsigned vmid)
+{
+ struct amdgpu_device *adev = ring->adev;
+ uint32_t value = 0;
+
+ value = REG_SET_FIELD(value, SQ_CMD, CMD, 0x03);
+ value = REG_SET_FIELD(value, SQ_CMD, MODE, 0x01);
+ value = REG_SET_FIELD(value, SQ_CMD, CHECK_VMID, 1);
+ value = REG_SET_FIELD(value, SQ_CMD, VM_ID, vmid);
+ amdgpu_gfx_rlc_enter_safe_mode(adev, ring->xcc_id);
+ WREG32_SOC15(GC, GET_INST(GC, ring->xcc_id), regSQ_CMD, value);
+ amdgpu_gfx_rlc_exit_safe_mode(adev, ring->xcc_id);
+}
+
+static void gfx_v9_4_3_xcc_set_compute_eop_interrupt_state(
+ struct amdgpu_device *adev, int me, int pipe,
+ enum amdgpu_interrupt_state state, int xcc_id)
+{
+ u32 mec_int_cntl, mec_int_cntl_reg;
+
+ /*
+ * amdgpu controls only the first MEC. That's why this function only
+ * handles the setting of interrupts for this specific MEC. All other
+ * pipes' interrupts are set by amdkfd.
+ */
+
+ if (me == 1) {
+ switch (pipe) {
+ case 0:
+ mec_int_cntl_reg = SOC15_REG_OFFSET(GC, GET_INST(GC, xcc_id), regCP_ME1_PIPE0_INT_CNTL);
+ break;
+ case 1:
+ mec_int_cntl_reg = SOC15_REG_OFFSET(GC, GET_INST(GC, xcc_id), regCP_ME1_PIPE1_INT_CNTL);
+ break;
+ case 2:
+ mec_int_cntl_reg = SOC15_REG_OFFSET(GC, GET_INST(GC, xcc_id), regCP_ME1_PIPE2_INT_CNTL);
+ break;
+ case 3:
+ mec_int_cntl_reg = SOC15_REG_OFFSET(GC, GET_INST(GC, xcc_id), regCP_ME1_PIPE3_INT_CNTL);
+ break;
+ default:
+ DRM_DEBUG("invalid pipe %d\n", pipe);
+ return;
+ }
+ } else {
+ DRM_DEBUG("invalid me %d\n", me);
+ return;
+ }
+
+ switch (state) {
+ case AMDGPU_IRQ_STATE_DISABLE:
+ mec_int_cntl = RREG32_XCC(mec_int_cntl_reg, xcc_id);
+ mec_int_cntl = REG_SET_FIELD(mec_int_cntl, CP_ME1_PIPE0_INT_CNTL,
+ TIME_STAMP_INT_ENABLE, 0);
+ WREG32_XCC(mec_int_cntl_reg, mec_int_cntl, xcc_id);
+ break;
+ case AMDGPU_IRQ_STATE_ENABLE:
+ mec_int_cntl = RREG32_XCC(mec_int_cntl_reg, xcc_id);
+ mec_int_cntl = REG_SET_FIELD(mec_int_cntl, CP_ME1_PIPE0_INT_CNTL,
+ TIME_STAMP_INT_ENABLE, 1);
+ WREG32_XCC(mec_int_cntl_reg, mec_int_cntl, xcc_id);
+ break;
+ default:
+ break;
+ }
+}
+
+static u32 gfx_v9_4_3_get_cpc_int_cntl(struct amdgpu_device *adev,
+ int xcc_id, int me, int pipe)
+{
+ /*
+ * amdgpu controls only the first MEC. That's why this function only
+ * handles the setting of interrupts for this specific MEC. All other
+ * pipes' interrupts are set by amdkfd.
+ */
+ if (me != 1)
+ return 0;
+
+ switch (pipe) {
+ case 0:
+ return SOC15_REG_OFFSET(GC, GET_INST(GC, xcc_id), regCP_ME1_PIPE0_INT_CNTL);
+ case 1:
+ return SOC15_REG_OFFSET(GC, GET_INST(GC, xcc_id), regCP_ME1_PIPE1_INT_CNTL);
+ case 2:
+ return SOC15_REG_OFFSET(GC, GET_INST(GC, xcc_id), regCP_ME1_PIPE2_INT_CNTL);
+ case 3:
+ return SOC15_REG_OFFSET(GC, GET_INST(GC, xcc_id), regCP_ME1_PIPE3_INT_CNTL);
+ default:
+ return 0;
+ }
+}
+
+static int gfx_v9_4_3_set_priv_reg_fault_state(struct amdgpu_device *adev,
+ struct amdgpu_irq_src *source,
+ unsigned type,
+ enum amdgpu_interrupt_state state)
+{
+ u32 mec_int_cntl_reg, mec_int_cntl;
+ int i, j, k, num_xcc;
+
+ num_xcc = NUM_XCC(adev->gfx.xcc_mask);
+ switch (state) {
+ case AMDGPU_IRQ_STATE_DISABLE:
+ case AMDGPU_IRQ_STATE_ENABLE:
+ for (i = 0; i < num_xcc; i++) {
+ WREG32_FIELD15_PREREG(GC, GET_INST(GC, i), CP_INT_CNTL_RING0,
+ PRIV_REG_INT_ENABLE,
+ state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0);
+ for (j = 0; j < adev->gfx.mec.num_mec; j++) {
+ for (k = 0; k < adev->gfx.mec.num_pipe_per_mec; k++) {
+ /* MECs start at 1 */
+ mec_int_cntl_reg = gfx_v9_4_3_get_cpc_int_cntl(adev, i, j + 1, k);
+
+ if (mec_int_cntl_reg) {
+ mec_int_cntl = RREG32_XCC(mec_int_cntl_reg, i);
+ mec_int_cntl = REG_SET_FIELD(mec_int_cntl, CP_ME1_PIPE0_INT_CNTL,
+ PRIV_REG_INT_ENABLE,
+ state == AMDGPU_IRQ_STATE_ENABLE ?
+ 1 : 0);
+ WREG32_XCC(mec_int_cntl_reg, mec_int_cntl, i);
+ }
+ }
+ }
+ }
+ break;
+ default:
+ break;
+ }
+
+ return 0;
+}
+
+static int gfx_v9_4_3_set_bad_op_fault_state(struct amdgpu_device *adev,
+ struct amdgpu_irq_src *source,
+ unsigned type,
+ enum amdgpu_interrupt_state state)
+{
+ u32 mec_int_cntl_reg, mec_int_cntl;
+ int i, j, k, num_xcc;
+
+ num_xcc = NUM_XCC(adev->gfx.xcc_mask);
+ switch (state) {
+ case AMDGPU_IRQ_STATE_DISABLE:
+ case AMDGPU_IRQ_STATE_ENABLE:
+ for (i = 0; i < num_xcc; i++) {
+ WREG32_FIELD15_PREREG(GC, GET_INST(GC, i), CP_INT_CNTL_RING0,
+ OPCODE_ERROR_INT_ENABLE,
+ state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0);
+ for (j = 0; j < adev->gfx.mec.num_mec; j++) {
+ for (k = 0; k < adev->gfx.mec.num_pipe_per_mec; k++) {
+ /* MECs start at 1 */
+ mec_int_cntl_reg = gfx_v9_4_3_get_cpc_int_cntl(adev, i, j + 1, k);
+
+ if (mec_int_cntl_reg) {
+ mec_int_cntl = RREG32_XCC(mec_int_cntl_reg, i);
+ mec_int_cntl = REG_SET_FIELD(mec_int_cntl, CP_ME1_PIPE0_INT_CNTL,
+ OPCODE_ERROR_INT_ENABLE,
+ state == AMDGPU_IRQ_STATE_ENABLE ?
+ 1 : 0);
+ WREG32_XCC(mec_int_cntl_reg, mec_int_cntl, i);
+ }
+ }
+ }
+ }
+ break;
+ default:
+ break;
+ }
+
+ return 0;
+}
+
+static int gfx_v9_4_3_set_priv_inst_fault_state(struct amdgpu_device *adev,
+ struct amdgpu_irq_src *source,
+ unsigned type,
+ enum amdgpu_interrupt_state state)
+{
+ int i, num_xcc;
+
+ num_xcc = NUM_XCC(adev->gfx.xcc_mask);
+ switch (state) {
+ case AMDGPU_IRQ_STATE_DISABLE:
+ case AMDGPU_IRQ_STATE_ENABLE:
+ for (i = 0; i < num_xcc; i++)
+ WREG32_FIELD15_PREREG(GC, GET_INST(GC, i), CP_INT_CNTL_RING0,
+ PRIV_INSTR_INT_ENABLE,
+ state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0);
+ break;
+ default:
+ break;
+ }
+
+ return 0;
+}
+
+static int gfx_v9_4_3_set_eop_interrupt_state(struct amdgpu_device *adev,
+ struct amdgpu_irq_src *src,
+ unsigned type,
+ enum amdgpu_interrupt_state state)
+{
+ int i, num_xcc;
+
+ num_xcc = NUM_XCC(adev->gfx.xcc_mask);
+ for (i = 0; i < num_xcc; i++) {
+ switch (type) {
+ case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP:
+ gfx_v9_4_3_xcc_set_compute_eop_interrupt_state(
+ adev, 1, 0, state, i);
+ break;
+ case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE1_EOP:
+ gfx_v9_4_3_xcc_set_compute_eop_interrupt_state(
+ adev, 1, 1, state, i);
+ break;
+ case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE2_EOP:
+ gfx_v9_4_3_xcc_set_compute_eop_interrupt_state(
+ adev, 1, 2, state, i);
+ break;
+ case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE3_EOP:
+ gfx_v9_4_3_xcc_set_compute_eop_interrupt_state(
+ adev, 1, 3, state, i);
+ break;
+ case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE0_EOP:
+ gfx_v9_4_3_xcc_set_compute_eop_interrupt_state(
+ adev, 2, 0, state, i);
+ break;
+ case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE1_EOP:
+ gfx_v9_4_3_xcc_set_compute_eop_interrupt_state(
+ adev, 2, 1, state, i);
+ break;
+ case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE2_EOP:
+ gfx_v9_4_3_xcc_set_compute_eop_interrupt_state(
+ adev, 2, 2, state, i);
+ break;
+ case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE3_EOP:
+ gfx_v9_4_3_xcc_set_compute_eop_interrupt_state(
+ adev, 2, 3, state, i);
+ break;
+ default:
+ break;
+ }
+ }
+
+ return 0;
+}
+
+static int gfx_v9_4_3_eop_irq(struct amdgpu_device *adev,
+ struct amdgpu_irq_src *source,
+ struct amdgpu_iv_entry *entry)
+{
+ int i, xcc_id;
+ u8 me_id, pipe_id, queue_id;
+ struct amdgpu_ring *ring;
+
+ DRM_DEBUG("IH: CP EOP\n");
+ me_id = (entry->ring_id & 0x0c) >> 2;
+ pipe_id = (entry->ring_id & 0x03) >> 0;
+ queue_id = (entry->ring_id & 0x70) >> 4;
+
+ xcc_id = gfx_v9_4_3_ih_to_xcc_inst(adev, entry->node_id);
+
+ if (xcc_id == -EINVAL)
+ return -EINVAL;
+
+ switch (me_id) {
+ case 0:
+ case 1:
+ case 2:
+ for (i = 0; i < adev->gfx.num_compute_rings; i++) {
+ ring = &adev->gfx.compute_ring
+ [i +
+ xcc_id * adev->gfx.num_compute_rings];
+ /* Per-queue interrupt is supported for MEC starting from VI.
+ * The interrupt can only be enabled/disabled per pipe instead of per queue.
+ */
+
+ if ((ring->me == me_id) && (ring->pipe == pipe_id) && (ring->queue == queue_id))
+ amdgpu_fence_process(ring);
+ }
+ break;
+ }
+ return 0;
+}
+
+static void gfx_v9_4_3_fault(struct amdgpu_device *adev,
+ struct amdgpu_iv_entry *entry)
+{
+ u8 me_id, pipe_id, queue_id;
+ struct amdgpu_ring *ring;
+ int i, xcc_id;
+
+ me_id = (entry->ring_id & 0x0c) >> 2;
+ pipe_id = (entry->ring_id & 0x03) >> 0;
+ queue_id = (entry->ring_id & 0x70) >> 4;
+
+ xcc_id = gfx_v9_4_3_ih_to_xcc_inst(adev, entry->node_id);
+
+ if (xcc_id == -EINVAL)
+ return;
+
+ switch (me_id) {
+ case 0:
+ case 1:
+ case 2:
+ for (i = 0; i < adev->gfx.num_compute_rings; i++) {
+ ring = &adev->gfx.compute_ring
+ [i +
+ xcc_id * adev->gfx.num_compute_rings];
+ if (ring->me == me_id && ring->pipe == pipe_id &&
+ ring->queue == queue_id)
+ drm_sched_fault(&ring->sched);
+ }
+ break;
+ }
+}
+
+static int gfx_v9_4_3_priv_reg_irq(struct amdgpu_device *adev,
+ struct amdgpu_irq_src *source,
+ struct amdgpu_iv_entry *entry)
+{
+ DRM_ERROR("Illegal register access in command stream\n");
+ gfx_v9_4_3_fault(adev, entry);
+ return 0;
+}
+
+static int gfx_v9_4_3_bad_op_irq(struct amdgpu_device *adev,
+ struct amdgpu_irq_src *source,
+ struct amdgpu_iv_entry *entry)
+{
+ DRM_ERROR("Illegal opcode in command stream\n");
+ gfx_v9_4_3_fault(adev, entry);
+ return 0;
+}
+
+static int gfx_v9_4_3_priv_inst_irq(struct amdgpu_device *adev,
+ struct amdgpu_irq_src *source,
+ struct amdgpu_iv_entry *entry)
+{
+ DRM_ERROR("Illegal instruction in command stream\n");
+ gfx_v9_4_3_fault(adev, entry);
+ return 0;
+}
+
+static void gfx_v9_4_3_emit_mem_sync(struct amdgpu_ring *ring)
+{
+ const unsigned int cp_coher_cntl =
+ PACKET3_ACQUIRE_MEM_CP_COHER_CNTL_SH_ICACHE_ACTION_ENA(1) |
+ PACKET3_ACQUIRE_MEM_CP_COHER_CNTL_SH_KCACHE_ACTION_ENA(1) |
+ PACKET3_ACQUIRE_MEM_CP_COHER_CNTL_TC_ACTION_ENA(1) |
+ PACKET3_ACQUIRE_MEM_CP_COHER_CNTL_TCL1_ACTION_ENA(1) |
+ PACKET3_ACQUIRE_MEM_CP_COHER_CNTL_TC_WB_ACTION_ENA(1);
+
+ /* ACQUIRE_MEM -make one or more surfaces valid for use by the subsequent operations */
+ amdgpu_ring_write(ring, PACKET3(PACKET3_ACQUIRE_MEM, 5));
+ amdgpu_ring_write(ring, cp_coher_cntl); /* CP_COHER_CNTL */
+ amdgpu_ring_write(ring, 0xffffffff); /* CP_COHER_SIZE */
+ amdgpu_ring_write(ring, 0xffffff); /* CP_COHER_SIZE_HI */
+ amdgpu_ring_write(ring, 0); /* CP_COHER_BASE */
+ amdgpu_ring_write(ring, 0); /* CP_COHER_BASE_HI */
+ amdgpu_ring_write(ring, 0x0000000A); /* POLL_INTERVAL */
+}
+
+static void gfx_v9_4_3_emit_wave_limit_cs(struct amdgpu_ring *ring,
+ uint32_t pipe, bool enable)
+{
+ struct amdgpu_device *adev = ring->adev;
+ uint32_t val;
+ uint32_t wcl_cs_reg;
+
+ /* regSPI_WCL_PIPE_PERCENT_CS[0-7]_DEFAULT values are same */
+ val = enable ? 0x1 : 0x7f;
+
+ switch (pipe) {
+ case 0:
+ wcl_cs_reg = SOC15_REG_OFFSET(GC, GET_INST(GC, 0), regSPI_WCL_PIPE_PERCENT_CS0);
+ break;
+ case 1:
+ wcl_cs_reg = SOC15_REG_OFFSET(GC, GET_INST(GC, 0), regSPI_WCL_PIPE_PERCENT_CS1);
+ break;
+ case 2:
+ wcl_cs_reg = SOC15_REG_OFFSET(GC, GET_INST(GC, 0), regSPI_WCL_PIPE_PERCENT_CS2);
+ break;
+ case 3:
+ wcl_cs_reg = SOC15_REG_OFFSET(GC, GET_INST(GC, 0), regSPI_WCL_PIPE_PERCENT_CS3);
+ break;
+ default:
+ DRM_DEBUG("invalid pipe %d\n", pipe);
+ return;
+ }
+
+ amdgpu_ring_emit_wreg(ring, wcl_cs_reg, val);
+
+}
+static void gfx_v9_4_3_emit_wave_limit(struct amdgpu_ring *ring, bool enable)
+{
+ struct amdgpu_device *adev = ring->adev;
+ uint32_t val;
+ int i;
+
+ /* regSPI_WCL_PIPE_PERCENT_GFX is 7 bit multiplier register to limit
+ * number of gfx waves. Setting 5 bit will make sure gfx only gets
+ * around 25% of gpu resources.
+ */
+ val = enable ? 0x1f : 0x07ffffff;
+ amdgpu_ring_emit_wreg(ring,
+ SOC15_REG_OFFSET(GC, GET_INST(GC, 0), regSPI_WCL_PIPE_PERCENT_GFX),
+ val);
+
+ /* Restrict waves for normal/low priority compute queues as well
+ * to get best QoS for high priority compute jobs.
+ *
+ * amdgpu controls only 1st ME(0-3 CS pipes).
+ */
+ for (i = 0; i < adev->gfx.mec.num_pipe_per_mec; i++) {
+ if (i != ring->pipe)
+ gfx_v9_4_3_emit_wave_limit_cs(ring, i, enable);
+
+ }
+}
+
+static int gfx_v9_4_3_unmap_done(struct amdgpu_device *adev, uint32_t me,
+ uint32_t pipe, uint32_t queue,
+ uint32_t xcc_id)
+{
+ int i, r;
+ /* make sure dequeue is complete*/
+ gfx_v9_4_3_xcc_set_safe_mode(adev, xcc_id);
+ mutex_lock(&adev->srbm_mutex);
+ soc15_grbm_select(adev, me, pipe, queue, 0, GET_INST(GC, xcc_id));
+ for (i = 0; i < adev->usec_timeout; i++) {
+ if (!(RREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_HQD_ACTIVE) & 1))
+ break;
+ udelay(1);
+ }
+ if (i >= adev->usec_timeout)
+ r = -ETIMEDOUT;
+ else
+ r = 0;
+ soc15_grbm_select(adev, 0, 0, 0, 0, GET_INST(GC, xcc_id));
+ mutex_unlock(&adev->srbm_mutex);
+ gfx_v9_4_3_xcc_unset_safe_mode(adev, xcc_id);
+
+ return r;
+
+}
+
+static bool gfx_v9_4_3_pipe_reset_support(struct amdgpu_device *adev)
+{
+ if (!!(adev->gfx.compute_supported_reset & AMDGPU_RESET_TYPE_PER_PIPE))
+ return true;
+ else
+ dev_warn_once(adev->dev, "Please use the latest MEC version to see whether support pipe reset\n");
+
+ return false;
+}
+
+static int gfx_v9_4_3_reset_hw_pipe(struct amdgpu_ring *ring)
+{
+ struct amdgpu_device *adev = ring->adev;
+ uint32_t reset_pipe, clean_pipe;
+ int r;
+
+ if (!gfx_v9_4_3_pipe_reset_support(adev))
+ return -EINVAL;
+
+ gfx_v9_4_3_xcc_set_safe_mode(adev, ring->xcc_id);
+ mutex_lock(&adev->srbm_mutex);
+
+ reset_pipe = RREG32_SOC15(GC, GET_INST(GC, ring->xcc_id), regCP_MEC_CNTL);
+ clean_pipe = reset_pipe;
+
+ if (ring->me == 1) {
+ switch (ring->pipe) {
+ case 0:
+ reset_pipe = REG_SET_FIELD(reset_pipe, CP_MEC_CNTL,
+ MEC_ME1_PIPE0_RESET, 1);
+ break;
+ case 1:
+ reset_pipe = REG_SET_FIELD(reset_pipe, CP_MEC_CNTL,
+ MEC_ME1_PIPE1_RESET, 1);
+ break;
+ case 2:
+ reset_pipe = REG_SET_FIELD(reset_pipe, CP_MEC_CNTL,
+ MEC_ME1_PIPE2_RESET, 1);
+ break;
+ case 3:
+ reset_pipe = REG_SET_FIELD(reset_pipe, CP_MEC_CNTL,
+ MEC_ME1_PIPE3_RESET, 1);
+ break;
+ default:
+ break;
+ }
+ } else {
+ if (ring->pipe)
+ reset_pipe = REG_SET_FIELD(reset_pipe, CP_MEC_CNTL,
+ MEC_ME2_PIPE1_RESET, 1);
+ else
+ reset_pipe = REG_SET_FIELD(reset_pipe, CP_MEC_CNTL,
+ MEC_ME2_PIPE0_RESET, 1);
+ }
+
+ WREG32_SOC15(GC, GET_INST(GC, ring->xcc_id), regCP_MEC_CNTL, reset_pipe);
+ WREG32_SOC15(GC, GET_INST(GC, ring->xcc_id), regCP_MEC_CNTL, clean_pipe);
+ mutex_unlock(&adev->srbm_mutex);
+ gfx_v9_4_3_xcc_unset_safe_mode(adev, ring->xcc_id);
+
+ r = gfx_v9_4_3_unmap_done(adev, ring->me, ring->pipe, ring->queue, ring->xcc_id);
+ return r;
+}
+
+static int gfx_v9_4_3_reset_kcq(struct amdgpu_ring *ring,
+ unsigned int vmid,
+ struct amdgpu_fence *timedout_fence)
+{
+ struct amdgpu_device *adev = ring->adev;
+ struct amdgpu_kiq *kiq = &adev->gfx.kiq[ring->xcc_id];
+ struct amdgpu_ring *kiq_ring = &kiq->ring;
+ int reset_mode = AMDGPU_RESET_TYPE_PER_QUEUE;
+ unsigned long flags;
+ int r;
+
+ if (!kiq->pmf || !kiq->pmf->kiq_unmap_queues)
+ return -EINVAL;
+
+ amdgpu_ring_reset_helper_begin(ring, timedout_fence);
+
+ spin_lock_irqsave(&kiq->ring_lock, flags);
+
+ if (amdgpu_ring_alloc(kiq_ring, kiq->pmf->unmap_queues_size)) {
+ spin_unlock_irqrestore(&kiq->ring_lock, flags);
+ return -ENOMEM;
+ }
+
+ kiq->pmf->kiq_unmap_queues(kiq_ring, ring, RESET_QUEUES,
+ 0, 0);
+ amdgpu_ring_commit(kiq_ring);
+
+ spin_unlock_irqrestore(&kiq->ring_lock, flags);
+
+ r = amdgpu_ring_test_ring(kiq_ring);
+ if (r) {
+ dev_err(adev->dev, "kiq ring test failed after ring: %s queue reset\n",
+ ring->name);
+ goto pipe_reset;
+ }
+
+ r = gfx_v9_4_3_unmap_done(adev, ring->me, ring->pipe, ring->queue, ring->xcc_id);
+ if (r)
+ dev_err(adev->dev, "fail to wait on hqd deactive and will try pipe reset\n");
+
+pipe_reset:
+ if (r) {
+ if (!(adev->gfx.compute_supported_reset & AMDGPU_RESET_TYPE_PER_PIPE))
+ return -EOPNOTSUPP;
+ r = gfx_v9_4_3_reset_hw_pipe(ring);
+ reset_mode = AMDGPU_RESET_TYPE_PER_PIPE;
+ dev_info(adev->dev, "ring: %s pipe reset :%s\n", ring->name,
+ r ? "failed" : "successfully");
+ if (r)
+ return r;
+ }
+
+ gfx_v9_4_3_xcc_kcq_init_queue(ring, ring->xcc_id, true);
+
+ spin_lock_irqsave(&kiq->ring_lock, flags);
+ r = amdgpu_ring_alloc(kiq_ring, kiq->pmf->map_queues_size);
+ if (r) {
+ spin_unlock_irqrestore(&kiq->ring_lock, flags);
+ return -ENOMEM;
+ }
+ kiq->pmf->kiq_map_queues(kiq_ring, ring);
+ amdgpu_ring_commit(kiq_ring);
+ r = amdgpu_ring_test_ring(kiq_ring);
+ spin_unlock_irqrestore(&kiq->ring_lock, flags);
+ if (r) {
+ if (reset_mode == AMDGPU_RESET_TYPE_PER_QUEUE)
+ goto pipe_reset;
+
+ dev_err(adev->dev, "fail to remap queue\n");
+ return r;
+ }
+
+ if (reset_mode == AMDGPU_RESET_TYPE_PER_QUEUE) {
+ r = amdgpu_ring_test_ring(ring);
+ if (r)
+ goto pipe_reset;
+ }
+
+
+ return amdgpu_ring_reset_helper_end(ring, timedout_fence);
+}
+
+enum amdgpu_gfx_cp_ras_mem_id {
+ AMDGPU_GFX_CP_MEM1 = 1,
+ AMDGPU_GFX_CP_MEM2,
+ AMDGPU_GFX_CP_MEM3,
+ AMDGPU_GFX_CP_MEM4,
+ AMDGPU_GFX_CP_MEM5,
+};
+
+enum amdgpu_gfx_gcea_ras_mem_id {
+ AMDGPU_GFX_GCEA_IOWR_CMDMEM = 4,
+ AMDGPU_GFX_GCEA_IORD_CMDMEM,
+ AMDGPU_GFX_GCEA_GMIWR_CMDMEM,
+ AMDGPU_GFX_GCEA_GMIRD_CMDMEM,
+ AMDGPU_GFX_GCEA_DRAMWR_CMDMEM,
+ AMDGPU_GFX_GCEA_DRAMRD_CMDMEM,
+ AMDGPU_GFX_GCEA_MAM_DMEM0,
+ AMDGPU_GFX_GCEA_MAM_DMEM1,
+ AMDGPU_GFX_GCEA_MAM_DMEM2,
+ AMDGPU_GFX_GCEA_MAM_DMEM3,
+ AMDGPU_GFX_GCEA_MAM_AMEM0,
+ AMDGPU_GFX_GCEA_MAM_AMEM1,
+ AMDGPU_GFX_GCEA_MAM_AMEM2,
+ AMDGPU_GFX_GCEA_MAM_AMEM3,
+ AMDGPU_GFX_GCEA_MAM_AFLUSH_BUFFER,
+ AMDGPU_GFX_GCEA_WRET_TAGMEM,
+ AMDGPU_GFX_GCEA_RRET_TAGMEM,
+ AMDGPU_GFX_GCEA_IOWR_DATAMEM,
+ AMDGPU_GFX_GCEA_GMIWR_DATAMEM,
+ AMDGPU_GFX_GCEA_DRAM_DATAMEM,
+};
+
+enum amdgpu_gfx_gc_cane_ras_mem_id {
+ AMDGPU_GFX_GC_CANE_MEM0 = 0,
+};
+
+enum amdgpu_gfx_gcutcl2_ras_mem_id {
+ AMDGPU_GFX_GCUTCL2_MEM2P512X95 = 160,
+};
+
+enum amdgpu_gfx_gds_ras_mem_id {
+ AMDGPU_GFX_GDS_MEM0 = 0,
+};
+
+enum amdgpu_gfx_lds_ras_mem_id {
+ AMDGPU_GFX_LDS_BANK0 = 0,
+ AMDGPU_GFX_LDS_BANK1,
+ AMDGPU_GFX_LDS_BANK2,
+ AMDGPU_GFX_LDS_BANK3,
+ AMDGPU_GFX_LDS_BANK4,
+ AMDGPU_GFX_LDS_BANK5,
+ AMDGPU_GFX_LDS_BANK6,
+ AMDGPU_GFX_LDS_BANK7,
+ AMDGPU_GFX_LDS_BANK8,
+ AMDGPU_GFX_LDS_BANK9,
+ AMDGPU_GFX_LDS_BANK10,
+ AMDGPU_GFX_LDS_BANK11,
+ AMDGPU_GFX_LDS_BANK12,
+ AMDGPU_GFX_LDS_BANK13,
+ AMDGPU_GFX_LDS_BANK14,
+ AMDGPU_GFX_LDS_BANK15,
+ AMDGPU_GFX_LDS_BANK16,
+ AMDGPU_GFX_LDS_BANK17,
+ AMDGPU_GFX_LDS_BANK18,
+ AMDGPU_GFX_LDS_BANK19,
+ AMDGPU_GFX_LDS_BANK20,
+ AMDGPU_GFX_LDS_BANK21,
+ AMDGPU_GFX_LDS_BANK22,
+ AMDGPU_GFX_LDS_BANK23,
+ AMDGPU_GFX_LDS_BANK24,
+ AMDGPU_GFX_LDS_BANK25,
+ AMDGPU_GFX_LDS_BANK26,
+ AMDGPU_GFX_LDS_BANK27,
+ AMDGPU_GFX_LDS_BANK28,
+ AMDGPU_GFX_LDS_BANK29,
+ AMDGPU_GFX_LDS_BANK30,
+ AMDGPU_GFX_LDS_BANK31,
+ AMDGPU_GFX_LDS_SP_BUFFER_A,
+ AMDGPU_GFX_LDS_SP_BUFFER_B,
+};
+
+enum amdgpu_gfx_rlc_ras_mem_id {
+ AMDGPU_GFX_RLC_GPMF32 = 1,
+ AMDGPU_GFX_RLC_RLCVF32,
+ AMDGPU_GFX_RLC_SCRATCH,
+ AMDGPU_GFX_RLC_SRM_ARAM,
+ AMDGPU_GFX_RLC_SRM_DRAM,
+ AMDGPU_GFX_RLC_TCTAG,
+ AMDGPU_GFX_RLC_SPM_SE,
+ AMDGPU_GFX_RLC_SPM_GRBMT,
+};
+
+enum amdgpu_gfx_sp_ras_mem_id {
+ AMDGPU_GFX_SP_SIMDID0 = 0,
+};
+
+enum amdgpu_gfx_spi_ras_mem_id {
+ AMDGPU_GFX_SPI_MEM0 = 0,
+ AMDGPU_GFX_SPI_MEM1,
+ AMDGPU_GFX_SPI_MEM2,
+ AMDGPU_GFX_SPI_MEM3,
+};
+
+enum amdgpu_gfx_sqc_ras_mem_id {
+ AMDGPU_GFX_SQC_INST_CACHE_A = 100,
+ AMDGPU_GFX_SQC_INST_CACHE_B = 101,
+ AMDGPU_GFX_SQC_INST_CACHE_TAG_A = 102,
+ AMDGPU_GFX_SQC_INST_CACHE_TAG_B = 103,
+ AMDGPU_GFX_SQC_INST_CACHE_MISS_FIFO_A = 104,
+ AMDGPU_GFX_SQC_INST_CACHE_MISS_FIFO_B = 105,
+ AMDGPU_GFX_SQC_INST_CACHE_GATCL1_MISS_FIFO_A = 106,
+ AMDGPU_GFX_SQC_INST_CACHE_GATCL1_MISS_FIFO_B = 107,
+ AMDGPU_GFX_SQC_DATA_CACHE_A = 200,
+ AMDGPU_GFX_SQC_DATA_CACHE_B = 201,
+ AMDGPU_GFX_SQC_DATA_CACHE_TAG_A = 202,
+ AMDGPU_GFX_SQC_DATA_CACHE_TAG_B = 203,
+ AMDGPU_GFX_SQC_DATA_CACHE_MISS_FIFO_A = 204,
+ AMDGPU_GFX_SQC_DATA_CACHE_MISS_FIFO_B = 205,
+ AMDGPU_GFX_SQC_DATA_CACHE_HIT_FIFO_A = 206,
+ AMDGPU_GFX_SQC_DATA_CACHE_HIT_FIFO_B = 207,
+ AMDGPU_GFX_SQC_DIRTY_BIT_A = 208,
+ AMDGPU_GFX_SQC_DIRTY_BIT_B = 209,
+ AMDGPU_GFX_SQC_WRITE_DATA_BUFFER_CU0 = 210,
+ AMDGPU_GFX_SQC_WRITE_DATA_BUFFER_CU1 = 211,
+ AMDGPU_GFX_SQC_UTCL1_MISS_LFIFO_DATA_CACHE_A = 212,
+ AMDGPU_GFX_SQC_UTCL1_MISS_LFIFO_DATA_CACHE_B = 213,
+ AMDGPU_GFX_SQC_UTCL1_MISS_LFIFO_INST_CACHE = 108,
+};
+
+enum amdgpu_gfx_sq_ras_mem_id {
+ AMDGPU_GFX_SQ_SGPR_MEM0 = 0,
+ AMDGPU_GFX_SQ_SGPR_MEM1,
+ AMDGPU_GFX_SQ_SGPR_MEM2,
+ AMDGPU_GFX_SQ_SGPR_MEM3,
+};
+
+enum amdgpu_gfx_ta_ras_mem_id {
+ AMDGPU_GFX_TA_FS_AFIFO_RAM_LO = 1,
+ AMDGPU_GFX_TA_FS_AFIFO_RAM_HI,
+ AMDGPU_GFX_TA_FS_CFIFO_RAM,
+ AMDGPU_GFX_TA_FSX_LFIFO,
+ AMDGPU_GFX_TA_FS_DFIFO_RAM,
+};
+
+enum amdgpu_gfx_tcc_ras_mem_id {
+ AMDGPU_GFX_TCC_MEM1 = 1,
+};
+
+enum amdgpu_gfx_tca_ras_mem_id {
+ AMDGPU_GFX_TCA_MEM1 = 1,
+};
+
+enum amdgpu_gfx_tci_ras_mem_id {
+ AMDGPU_GFX_TCIW_MEM = 1,
+};
+
+enum amdgpu_gfx_tcp_ras_mem_id {
+ AMDGPU_GFX_TCP_LFIFO0 = 1,
+ AMDGPU_GFX_TCP_SET0BANK0_RAM,
+ AMDGPU_GFX_TCP_SET0BANK1_RAM,
+ AMDGPU_GFX_TCP_SET0BANK2_RAM,
+ AMDGPU_GFX_TCP_SET0BANK3_RAM,
+ AMDGPU_GFX_TCP_SET1BANK0_RAM,
+ AMDGPU_GFX_TCP_SET1BANK1_RAM,
+ AMDGPU_GFX_TCP_SET1BANK2_RAM,
+ AMDGPU_GFX_TCP_SET1BANK3_RAM,
+ AMDGPU_GFX_TCP_SET2BANK0_RAM,
+ AMDGPU_GFX_TCP_SET2BANK1_RAM,
+ AMDGPU_GFX_TCP_SET2BANK2_RAM,
+ AMDGPU_GFX_TCP_SET2BANK3_RAM,
+ AMDGPU_GFX_TCP_SET3BANK0_RAM,
+ AMDGPU_GFX_TCP_SET3BANK1_RAM,
+ AMDGPU_GFX_TCP_SET3BANK2_RAM,
+ AMDGPU_GFX_TCP_SET3BANK3_RAM,
+ AMDGPU_GFX_TCP_VM_FIFO,
+ AMDGPU_GFX_TCP_DB_TAGRAM0,
+ AMDGPU_GFX_TCP_DB_TAGRAM1,
+ AMDGPU_GFX_TCP_DB_TAGRAM2,
+ AMDGPU_GFX_TCP_DB_TAGRAM3,
+ AMDGPU_GFX_TCP_UTCL1_LFIFO_PROBE0,
+ AMDGPU_GFX_TCP_UTCL1_LFIFO_PROBE1,
+ AMDGPU_GFX_TCP_CMD_FIFO,
+};
+
+enum amdgpu_gfx_td_ras_mem_id {
+ AMDGPU_GFX_TD_UTD_CS_FIFO_MEM = 1,
+ AMDGPU_GFX_TD_UTD_SS_FIFO_LO_MEM,
+ AMDGPU_GFX_TD_UTD_SS_FIFO_HI_MEM,
+};
+
+enum amdgpu_gfx_tcx_ras_mem_id {
+ AMDGPU_GFX_TCX_FIFOD0 = 0,
+ AMDGPU_GFX_TCX_FIFOD1,
+ AMDGPU_GFX_TCX_FIFOD2,
+ AMDGPU_GFX_TCX_FIFOD3,
+ AMDGPU_GFX_TCX_FIFOD4,
+ AMDGPU_GFX_TCX_FIFOD5,
+ AMDGPU_GFX_TCX_FIFOD6,
+ AMDGPU_GFX_TCX_FIFOD7,
+ AMDGPU_GFX_TCX_FIFOB0,
+ AMDGPU_GFX_TCX_FIFOB1,
+ AMDGPU_GFX_TCX_FIFOB2,
+ AMDGPU_GFX_TCX_FIFOB3,
+ AMDGPU_GFX_TCX_FIFOB4,
+ AMDGPU_GFX_TCX_FIFOB5,
+ AMDGPU_GFX_TCX_FIFOB6,
+ AMDGPU_GFX_TCX_FIFOB7,
+ AMDGPU_GFX_TCX_FIFOA0,
+ AMDGPU_GFX_TCX_FIFOA1,
+ AMDGPU_GFX_TCX_FIFOA2,
+ AMDGPU_GFX_TCX_FIFOA3,
+ AMDGPU_GFX_TCX_FIFOA4,
+ AMDGPU_GFX_TCX_FIFOA5,
+ AMDGPU_GFX_TCX_FIFOA6,
+ AMDGPU_GFX_TCX_FIFOA7,
+ AMDGPU_GFX_TCX_CFIFO0,
+ AMDGPU_GFX_TCX_CFIFO1,
+ AMDGPU_GFX_TCX_CFIFO2,
+ AMDGPU_GFX_TCX_CFIFO3,
+ AMDGPU_GFX_TCX_CFIFO4,
+ AMDGPU_GFX_TCX_CFIFO5,
+ AMDGPU_GFX_TCX_CFIFO6,
+ AMDGPU_GFX_TCX_CFIFO7,
+ AMDGPU_GFX_TCX_FIFO_ACKB0,
+ AMDGPU_GFX_TCX_FIFO_ACKB1,
+ AMDGPU_GFX_TCX_FIFO_ACKB2,
+ AMDGPU_GFX_TCX_FIFO_ACKB3,
+ AMDGPU_GFX_TCX_FIFO_ACKB4,
+ AMDGPU_GFX_TCX_FIFO_ACKB5,
+ AMDGPU_GFX_TCX_FIFO_ACKB6,
+ AMDGPU_GFX_TCX_FIFO_ACKB7,
+ AMDGPU_GFX_TCX_FIFO_ACKD0,
+ AMDGPU_GFX_TCX_FIFO_ACKD1,
+ AMDGPU_GFX_TCX_FIFO_ACKD2,
+ AMDGPU_GFX_TCX_FIFO_ACKD3,
+ AMDGPU_GFX_TCX_FIFO_ACKD4,
+ AMDGPU_GFX_TCX_FIFO_ACKD5,
+ AMDGPU_GFX_TCX_FIFO_ACKD6,
+ AMDGPU_GFX_TCX_FIFO_ACKD7,
+ AMDGPU_GFX_TCX_DST_FIFOA0,
+ AMDGPU_GFX_TCX_DST_FIFOA1,
+ AMDGPU_GFX_TCX_DST_FIFOA2,
+ AMDGPU_GFX_TCX_DST_FIFOA3,
+ AMDGPU_GFX_TCX_DST_FIFOA4,
+ AMDGPU_GFX_TCX_DST_FIFOA5,
+ AMDGPU_GFX_TCX_DST_FIFOA6,
+ AMDGPU_GFX_TCX_DST_FIFOA7,
+ AMDGPU_GFX_TCX_DST_FIFOB0,
+ AMDGPU_GFX_TCX_DST_FIFOB1,
+ AMDGPU_GFX_TCX_DST_FIFOB2,
+ AMDGPU_GFX_TCX_DST_FIFOB3,
+ AMDGPU_GFX_TCX_DST_FIFOB4,
+ AMDGPU_GFX_TCX_DST_FIFOB5,
+ AMDGPU_GFX_TCX_DST_FIFOB6,
+ AMDGPU_GFX_TCX_DST_FIFOB7,
+ AMDGPU_GFX_TCX_DST_FIFOD0,
+ AMDGPU_GFX_TCX_DST_FIFOD1,
+ AMDGPU_GFX_TCX_DST_FIFOD2,
+ AMDGPU_GFX_TCX_DST_FIFOD3,
+ AMDGPU_GFX_TCX_DST_FIFOD4,
+ AMDGPU_GFX_TCX_DST_FIFOD5,
+ AMDGPU_GFX_TCX_DST_FIFOD6,
+ AMDGPU_GFX_TCX_DST_FIFOD7,
+ AMDGPU_GFX_TCX_DST_FIFO_ACKB0,
+ AMDGPU_GFX_TCX_DST_FIFO_ACKB1,
+ AMDGPU_GFX_TCX_DST_FIFO_ACKB2,
+ AMDGPU_GFX_TCX_DST_FIFO_ACKB3,
+ AMDGPU_GFX_TCX_DST_FIFO_ACKB4,
+ AMDGPU_GFX_TCX_DST_FIFO_ACKB5,
+ AMDGPU_GFX_TCX_DST_FIFO_ACKB6,
+ AMDGPU_GFX_TCX_DST_FIFO_ACKB7,
+ AMDGPU_GFX_TCX_DST_FIFO_ACKD0,
+ AMDGPU_GFX_TCX_DST_FIFO_ACKD1,
+ AMDGPU_GFX_TCX_DST_FIFO_ACKD2,
+ AMDGPU_GFX_TCX_DST_FIFO_ACKD3,
+ AMDGPU_GFX_TCX_DST_FIFO_ACKD4,
+ AMDGPU_GFX_TCX_DST_FIFO_ACKD5,
+ AMDGPU_GFX_TCX_DST_FIFO_ACKD6,
+ AMDGPU_GFX_TCX_DST_FIFO_ACKD7,
+};
+
+enum amdgpu_gfx_atc_l2_ras_mem_id {
+ AMDGPU_GFX_ATC_L2_MEM0 = 0,
+};
+
+enum amdgpu_gfx_utcl2_ras_mem_id {
+ AMDGPU_GFX_UTCL2_MEM0 = 0,
+};
+
+enum amdgpu_gfx_vml2_ras_mem_id {
+ AMDGPU_GFX_VML2_MEM0 = 0,
+};
+
+enum amdgpu_gfx_vml2_walker_ras_mem_id {
+ AMDGPU_GFX_VML2_WALKER_MEM0 = 0,
+};
+
+static const struct amdgpu_ras_memory_id_entry gfx_v9_4_3_ras_cp_mem_list[] = {
+ {AMDGPU_GFX_CP_MEM1, "CP_MEM1"},
+ {AMDGPU_GFX_CP_MEM2, "CP_MEM2"},
+ {AMDGPU_GFX_CP_MEM3, "CP_MEM3"},
+ {AMDGPU_GFX_CP_MEM4, "CP_MEM4"},
+ {AMDGPU_GFX_CP_MEM5, "CP_MEM5"},
+};
+
+static const struct amdgpu_ras_memory_id_entry gfx_v9_4_3_ras_gcea_mem_list[] = {
+ {AMDGPU_GFX_GCEA_IOWR_CMDMEM, "GCEA_IOWR_CMDMEM"},
+ {AMDGPU_GFX_GCEA_IORD_CMDMEM, "GCEA_IORD_CMDMEM"},
+ {AMDGPU_GFX_GCEA_GMIWR_CMDMEM, "GCEA_GMIWR_CMDMEM"},
+ {AMDGPU_GFX_GCEA_GMIRD_CMDMEM, "GCEA_GMIRD_CMDMEM"},
+ {AMDGPU_GFX_GCEA_DRAMWR_CMDMEM, "GCEA_DRAMWR_CMDMEM"},
+ {AMDGPU_GFX_GCEA_DRAMRD_CMDMEM, "GCEA_DRAMRD_CMDMEM"},
+ {AMDGPU_GFX_GCEA_MAM_DMEM0, "GCEA_MAM_DMEM0"},
+ {AMDGPU_GFX_GCEA_MAM_DMEM1, "GCEA_MAM_DMEM1"},
+ {AMDGPU_GFX_GCEA_MAM_DMEM2, "GCEA_MAM_DMEM2"},
+ {AMDGPU_GFX_GCEA_MAM_DMEM3, "GCEA_MAM_DMEM3"},
+ {AMDGPU_GFX_GCEA_MAM_AMEM0, "GCEA_MAM_AMEM0"},
+ {AMDGPU_GFX_GCEA_MAM_AMEM1, "GCEA_MAM_AMEM1"},
+ {AMDGPU_GFX_GCEA_MAM_AMEM2, "GCEA_MAM_AMEM2"},
+ {AMDGPU_GFX_GCEA_MAM_AMEM3, "GCEA_MAM_AMEM3"},
+ {AMDGPU_GFX_GCEA_MAM_AFLUSH_BUFFER, "GCEA_MAM_AFLUSH_BUFFER"},
+ {AMDGPU_GFX_GCEA_WRET_TAGMEM, "GCEA_WRET_TAGMEM"},
+ {AMDGPU_GFX_GCEA_RRET_TAGMEM, "GCEA_RRET_TAGMEM"},
+ {AMDGPU_GFX_GCEA_IOWR_DATAMEM, "GCEA_IOWR_DATAMEM"},
+ {AMDGPU_GFX_GCEA_GMIWR_DATAMEM, "GCEA_GMIWR_DATAMEM"},
+ {AMDGPU_GFX_GCEA_DRAM_DATAMEM, "GCEA_DRAM_DATAMEM"},
+};
+
+static const struct amdgpu_ras_memory_id_entry gfx_v9_4_3_ras_gc_cane_mem_list[] = {
+ {AMDGPU_GFX_GC_CANE_MEM0, "GC_CANE_MEM0"},
+};
+
+static const struct amdgpu_ras_memory_id_entry gfx_v9_4_3_ras_gcutcl2_mem_list[] = {
+ {AMDGPU_GFX_GCUTCL2_MEM2P512X95, "GCUTCL2_MEM2P512X95"},
+};
+
+static const struct amdgpu_ras_memory_id_entry gfx_v9_4_3_ras_gds_mem_list[] = {
+ {AMDGPU_GFX_GDS_MEM0, "GDS_MEM"},
+};
+
+static const struct amdgpu_ras_memory_id_entry gfx_v9_4_3_ras_lds_mem_list[] = {
+ {AMDGPU_GFX_LDS_BANK0, "LDS_BANK0"},
+ {AMDGPU_GFX_LDS_BANK1, "LDS_BANK1"},
+ {AMDGPU_GFX_LDS_BANK2, "LDS_BANK2"},
+ {AMDGPU_GFX_LDS_BANK3, "LDS_BANK3"},
+ {AMDGPU_GFX_LDS_BANK4, "LDS_BANK4"},
+ {AMDGPU_GFX_LDS_BANK5, "LDS_BANK5"},
+ {AMDGPU_GFX_LDS_BANK6, "LDS_BANK6"},
+ {AMDGPU_GFX_LDS_BANK7, "LDS_BANK7"},
+ {AMDGPU_GFX_LDS_BANK8, "LDS_BANK8"},
+ {AMDGPU_GFX_LDS_BANK9, "LDS_BANK9"},
+ {AMDGPU_GFX_LDS_BANK10, "LDS_BANK10"},
+ {AMDGPU_GFX_LDS_BANK11, "LDS_BANK11"},
+ {AMDGPU_GFX_LDS_BANK12, "LDS_BANK12"},
+ {AMDGPU_GFX_LDS_BANK13, "LDS_BANK13"},
+ {AMDGPU_GFX_LDS_BANK14, "LDS_BANK14"},
+ {AMDGPU_GFX_LDS_BANK15, "LDS_BANK15"},
+ {AMDGPU_GFX_LDS_BANK16, "LDS_BANK16"},
+ {AMDGPU_GFX_LDS_BANK17, "LDS_BANK17"},
+ {AMDGPU_GFX_LDS_BANK18, "LDS_BANK18"},
+ {AMDGPU_GFX_LDS_BANK19, "LDS_BANK19"},
+ {AMDGPU_GFX_LDS_BANK20, "LDS_BANK20"},
+ {AMDGPU_GFX_LDS_BANK21, "LDS_BANK21"},
+ {AMDGPU_GFX_LDS_BANK22, "LDS_BANK22"},
+ {AMDGPU_GFX_LDS_BANK23, "LDS_BANK23"},
+ {AMDGPU_GFX_LDS_BANK24, "LDS_BANK24"},
+ {AMDGPU_GFX_LDS_BANK25, "LDS_BANK25"},
+ {AMDGPU_GFX_LDS_BANK26, "LDS_BANK26"},
+ {AMDGPU_GFX_LDS_BANK27, "LDS_BANK27"},
+ {AMDGPU_GFX_LDS_BANK28, "LDS_BANK28"},
+ {AMDGPU_GFX_LDS_BANK29, "LDS_BANK29"},
+ {AMDGPU_GFX_LDS_BANK30, "LDS_BANK30"},
+ {AMDGPU_GFX_LDS_BANK31, "LDS_BANK31"},
+ {AMDGPU_GFX_LDS_SP_BUFFER_A, "LDS_SP_BUFFER_A"},
+ {AMDGPU_GFX_LDS_SP_BUFFER_B, "LDS_SP_BUFFER_B"},
+};
+
+static const struct amdgpu_ras_memory_id_entry gfx_v9_4_3_ras_rlc_mem_list[] = {
+ {AMDGPU_GFX_RLC_GPMF32, "RLC_GPMF32"},
+ {AMDGPU_GFX_RLC_RLCVF32, "RLC_RLCVF32"},
+ {AMDGPU_GFX_RLC_SCRATCH, "RLC_SCRATCH"},
+ {AMDGPU_GFX_RLC_SRM_ARAM, "RLC_SRM_ARAM"},
+ {AMDGPU_GFX_RLC_SRM_DRAM, "RLC_SRM_DRAM"},
+ {AMDGPU_GFX_RLC_TCTAG, "RLC_TCTAG"},
+ {AMDGPU_GFX_RLC_SPM_SE, "RLC_SPM_SE"},
+ {AMDGPU_GFX_RLC_SPM_GRBMT, "RLC_SPM_GRBMT"},
+};
+
+static const struct amdgpu_ras_memory_id_entry gfx_v9_4_3_ras_sp_mem_list[] = {
+ {AMDGPU_GFX_SP_SIMDID0, "SP_SIMDID0"},
+};
+
+static const struct amdgpu_ras_memory_id_entry gfx_v9_4_3_ras_spi_mem_list[] = {
+ {AMDGPU_GFX_SPI_MEM0, "SPI_MEM0"},
+ {AMDGPU_GFX_SPI_MEM1, "SPI_MEM1"},
+ {AMDGPU_GFX_SPI_MEM2, "SPI_MEM2"},
+ {AMDGPU_GFX_SPI_MEM3, "SPI_MEM3"},
+};
+
+static const struct amdgpu_ras_memory_id_entry gfx_v9_4_3_ras_sqc_mem_list[] = {
+ {AMDGPU_GFX_SQC_INST_CACHE_A, "SQC_INST_CACHE_A"},
+ {AMDGPU_GFX_SQC_INST_CACHE_B, "SQC_INST_CACHE_B"},
+ {AMDGPU_GFX_SQC_INST_CACHE_TAG_A, "SQC_INST_CACHE_TAG_A"},
+ {AMDGPU_GFX_SQC_INST_CACHE_TAG_B, "SQC_INST_CACHE_TAG_B"},
+ {AMDGPU_GFX_SQC_INST_CACHE_MISS_FIFO_A, "SQC_INST_CACHE_MISS_FIFO_A"},
+ {AMDGPU_GFX_SQC_INST_CACHE_MISS_FIFO_B, "SQC_INST_CACHE_MISS_FIFO_B"},
+ {AMDGPU_GFX_SQC_INST_CACHE_GATCL1_MISS_FIFO_A, "SQC_INST_CACHE_GATCL1_MISS_FIFO_A"},
+ {AMDGPU_GFX_SQC_INST_CACHE_GATCL1_MISS_FIFO_B, "SQC_INST_CACHE_GATCL1_MISS_FIFO_B"},
+ {AMDGPU_GFX_SQC_DATA_CACHE_A, "SQC_DATA_CACHE_A"},
+ {AMDGPU_GFX_SQC_DATA_CACHE_B, "SQC_DATA_CACHE_B"},
+ {AMDGPU_GFX_SQC_DATA_CACHE_TAG_A, "SQC_DATA_CACHE_TAG_A"},
+ {AMDGPU_GFX_SQC_DATA_CACHE_TAG_B, "SQC_DATA_CACHE_TAG_B"},
+ {AMDGPU_GFX_SQC_DATA_CACHE_MISS_FIFO_A, "SQC_DATA_CACHE_MISS_FIFO_A"},
+ {AMDGPU_GFX_SQC_DATA_CACHE_MISS_FIFO_B, "SQC_DATA_CACHE_MISS_FIFO_B"},
+ {AMDGPU_GFX_SQC_DATA_CACHE_HIT_FIFO_A, "SQC_DATA_CACHE_HIT_FIFO_A"},
+ {AMDGPU_GFX_SQC_DATA_CACHE_HIT_FIFO_B, "SQC_DATA_CACHE_HIT_FIFO_B"},
+ {AMDGPU_GFX_SQC_DIRTY_BIT_A, "SQC_DIRTY_BIT_A"},
+ {AMDGPU_GFX_SQC_DIRTY_BIT_B, "SQC_DIRTY_BIT_B"},
+ {AMDGPU_GFX_SQC_WRITE_DATA_BUFFER_CU0, "SQC_WRITE_DATA_BUFFER_CU0"},
+ {AMDGPU_GFX_SQC_WRITE_DATA_BUFFER_CU1, "SQC_WRITE_DATA_BUFFER_CU1"},
+ {AMDGPU_GFX_SQC_UTCL1_MISS_LFIFO_DATA_CACHE_A, "SQC_UTCL1_MISS_LFIFO_DATA_CACHE_A"},
+ {AMDGPU_GFX_SQC_UTCL1_MISS_LFIFO_DATA_CACHE_B, "SQC_UTCL1_MISS_LFIFO_DATA_CACHE_B"},
+ {AMDGPU_GFX_SQC_UTCL1_MISS_LFIFO_INST_CACHE, "SQC_UTCL1_MISS_LFIFO_INST_CACHE"},
+};
+
+static const struct amdgpu_ras_memory_id_entry gfx_v9_4_3_ras_sq_mem_list[] = {
+ {AMDGPU_GFX_SQ_SGPR_MEM0, "SQ_SGPR_MEM0"},
+ {AMDGPU_GFX_SQ_SGPR_MEM1, "SQ_SGPR_MEM1"},
+ {AMDGPU_GFX_SQ_SGPR_MEM2, "SQ_SGPR_MEM2"},
+ {AMDGPU_GFX_SQ_SGPR_MEM3, "SQ_SGPR_MEM3"},
+};
+
+static const struct amdgpu_ras_memory_id_entry gfx_v9_4_3_ras_ta_mem_list[] = {
+ {AMDGPU_GFX_TA_FS_AFIFO_RAM_LO, "TA_FS_AFIFO_RAM_LO"},
+ {AMDGPU_GFX_TA_FS_AFIFO_RAM_HI, "TA_FS_AFIFO_RAM_HI"},
+ {AMDGPU_GFX_TA_FS_CFIFO_RAM, "TA_FS_CFIFO_RAM"},
+ {AMDGPU_GFX_TA_FSX_LFIFO, "TA_FSX_LFIFO"},
+ {AMDGPU_GFX_TA_FS_DFIFO_RAM, "TA_FS_DFIFO_RAM"},
+};
+
+static const struct amdgpu_ras_memory_id_entry gfx_v9_4_3_ras_tcc_mem_list[] = {
+ {AMDGPU_GFX_TCC_MEM1, "TCC_MEM1"},
+};
+
+static const struct amdgpu_ras_memory_id_entry gfx_v9_4_3_ras_tca_mem_list[] = {
+ {AMDGPU_GFX_TCA_MEM1, "TCA_MEM1"},
+};
+
+static const struct amdgpu_ras_memory_id_entry gfx_v9_4_3_ras_tci_mem_list[] = {
+ {AMDGPU_GFX_TCIW_MEM, "TCIW_MEM"},
+};
+
+static const struct amdgpu_ras_memory_id_entry gfx_v9_4_3_ras_tcp_mem_list[] = {
+ {AMDGPU_GFX_TCP_LFIFO0, "TCP_LFIFO0"},
+ {AMDGPU_GFX_TCP_SET0BANK0_RAM, "TCP_SET0BANK0_RAM"},
+ {AMDGPU_GFX_TCP_SET0BANK1_RAM, "TCP_SET0BANK1_RAM"},
+ {AMDGPU_GFX_TCP_SET0BANK2_RAM, "TCP_SET0BANK2_RAM"},
+ {AMDGPU_GFX_TCP_SET0BANK3_RAM, "TCP_SET0BANK3_RAM"},
+ {AMDGPU_GFX_TCP_SET1BANK0_RAM, "TCP_SET1BANK0_RAM"},
+ {AMDGPU_GFX_TCP_SET1BANK1_RAM, "TCP_SET1BANK1_RAM"},
+ {AMDGPU_GFX_TCP_SET1BANK2_RAM, "TCP_SET1BANK2_RAM"},
+ {AMDGPU_GFX_TCP_SET1BANK3_RAM, "TCP_SET1BANK3_RAM"},
+ {AMDGPU_GFX_TCP_SET2BANK0_RAM, "TCP_SET2BANK0_RAM"},
+ {AMDGPU_GFX_TCP_SET2BANK1_RAM, "TCP_SET2BANK1_RAM"},
+ {AMDGPU_GFX_TCP_SET2BANK2_RAM, "TCP_SET2BANK2_RAM"},
+ {AMDGPU_GFX_TCP_SET2BANK3_RAM, "TCP_SET2BANK3_RAM"},
+ {AMDGPU_GFX_TCP_SET3BANK0_RAM, "TCP_SET3BANK0_RAM"},
+ {AMDGPU_GFX_TCP_SET3BANK1_RAM, "TCP_SET3BANK1_RAM"},
+ {AMDGPU_GFX_TCP_SET3BANK2_RAM, "TCP_SET3BANK2_RAM"},
+ {AMDGPU_GFX_TCP_SET3BANK3_RAM, "TCP_SET3BANK3_RAM"},
+ {AMDGPU_GFX_TCP_VM_FIFO, "TCP_VM_FIFO"},
+ {AMDGPU_GFX_TCP_DB_TAGRAM0, "TCP_DB_TAGRAM0"},
+ {AMDGPU_GFX_TCP_DB_TAGRAM1, "TCP_DB_TAGRAM1"},
+ {AMDGPU_GFX_TCP_DB_TAGRAM2, "TCP_DB_TAGRAM2"},
+ {AMDGPU_GFX_TCP_DB_TAGRAM3, "TCP_DB_TAGRAM3"},
+ {AMDGPU_GFX_TCP_UTCL1_LFIFO_PROBE0, "TCP_UTCL1_LFIFO_PROBE0"},
+ {AMDGPU_GFX_TCP_UTCL1_LFIFO_PROBE1, "TCP_UTCL1_LFIFO_PROBE1"},
+ {AMDGPU_GFX_TCP_CMD_FIFO, "TCP_CMD_FIFO"},
+};
+
+static const struct amdgpu_ras_memory_id_entry gfx_v9_4_3_ras_td_mem_list[] = {
+ {AMDGPU_GFX_TD_UTD_CS_FIFO_MEM, "TD_UTD_CS_FIFO_MEM"},
+ {AMDGPU_GFX_TD_UTD_SS_FIFO_LO_MEM, "TD_UTD_SS_FIFO_LO_MEM"},
+ {AMDGPU_GFX_TD_UTD_SS_FIFO_HI_MEM, "TD_UTD_SS_FIFO_HI_MEM"},
+};
+
+static const struct amdgpu_ras_memory_id_entry gfx_v9_4_3_ras_tcx_mem_list[] = {
+ {AMDGPU_GFX_TCX_FIFOD0, "TCX_FIFOD0"},
+ {AMDGPU_GFX_TCX_FIFOD1, "TCX_FIFOD1"},
+ {AMDGPU_GFX_TCX_FIFOD2, "TCX_FIFOD2"},
+ {AMDGPU_GFX_TCX_FIFOD3, "TCX_FIFOD3"},
+ {AMDGPU_GFX_TCX_FIFOD4, "TCX_FIFOD4"},
+ {AMDGPU_GFX_TCX_FIFOD5, "TCX_FIFOD5"},
+ {AMDGPU_GFX_TCX_FIFOD6, "TCX_FIFOD6"},
+ {AMDGPU_GFX_TCX_FIFOD7, "TCX_FIFOD7"},
+ {AMDGPU_GFX_TCX_FIFOB0, "TCX_FIFOB0"},
+ {AMDGPU_GFX_TCX_FIFOB1, "TCX_FIFOB1"},
+ {AMDGPU_GFX_TCX_FIFOB2, "TCX_FIFOB2"},
+ {AMDGPU_GFX_TCX_FIFOB3, "TCX_FIFOB3"},
+ {AMDGPU_GFX_TCX_FIFOB4, "TCX_FIFOB4"},
+ {AMDGPU_GFX_TCX_FIFOB5, "TCX_FIFOB5"},
+ {AMDGPU_GFX_TCX_FIFOB6, "TCX_FIFOB6"},
+ {AMDGPU_GFX_TCX_FIFOB7, "TCX_FIFOB7"},
+ {AMDGPU_GFX_TCX_FIFOA0, "TCX_FIFOA0"},
+ {AMDGPU_GFX_TCX_FIFOA1, "TCX_FIFOA1"},
+ {AMDGPU_GFX_TCX_FIFOA2, "TCX_FIFOA2"},
+ {AMDGPU_GFX_TCX_FIFOA3, "TCX_FIFOA3"},
+ {AMDGPU_GFX_TCX_FIFOA4, "TCX_FIFOA4"},
+ {AMDGPU_GFX_TCX_FIFOA5, "TCX_FIFOA5"},
+ {AMDGPU_GFX_TCX_FIFOA6, "TCX_FIFOA6"},
+ {AMDGPU_GFX_TCX_FIFOA7, "TCX_FIFOA7"},
+ {AMDGPU_GFX_TCX_CFIFO0, "TCX_CFIFO0"},
+ {AMDGPU_GFX_TCX_CFIFO1, "TCX_CFIFO1"},
+ {AMDGPU_GFX_TCX_CFIFO2, "TCX_CFIFO2"},
+ {AMDGPU_GFX_TCX_CFIFO3, "TCX_CFIFO3"},
+ {AMDGPU_GFX_TCX_CFIFO4, "TCX_CFIFO4"},
+ {AMDGPU_GFX_TCX_CFIFO5, "TCX_CFIFO5"},
+ {AMDGPU_GFX_TCX_CFIFO6, "TCX_CFIFO6"},
+ {AMDGPU_GFX_TCX_CFIFO7, "TCX_CFIFO7"},
+ {AMDGPU_GFX_TCX_FIFO_ACKB0, "TCX_FIFO_ACKB0"},
+ {AMDGPU_GFX_TCX_FIFO_ACKB1, "TCX_FIFO_ACKB1"},
+ {AMDGPU_GFX_TCX_FIFO_ACKB2, "TCX_FIFO_ACKB2"},
+ {AMDGPU_GFX_TCX_FIFO_ACKB3, "TCX_FIFO_ACKB3"},
+ {AMDGPU_GFX_TCX_FIFO_ACKB4, "TCX_FIFO_ACKB4"},
+ {AMDGPU_GFX_TCX_FIFO_ACKB5, "TCX_FIFO_ACKB5"},
+ {AMDGPU_GFX_TCX_FIFO_ACKB6, "TCX_FIFO_ACKB6"},
+ {AMDGPU_GFX_TCX_FIFO_ACKB7, "TCX_FIFO_ACKB7"},
+ {AMDGPU_GFX_TCX_FIFO_ACKD0, "TCX_FIFO_ACKD0"},
+ {AMDGPU_GFX_TCX_FIFO_ACKD1, "TCX_FIFO_ACKD1"},
+ {AMDGPU_GFX_TCX_FIFO_ACKD2, "TCX_FIFO_ACKD2"},
+ {AMDGPU_GFX_TCX_FIFO_ACKD3, "TCX_FIFO_ACKD3"},
+ {AMDGPU_GFX_TCX_FIFO_ACKD4, "TCX_FIFO_ACKD4"},
+ {AMDGPU_GFX_TCX_FIFO_ACKD5, "TCX_FIFO_ACKD5"},
+ {AMDGPU_GFX_TCX_FIFO_ACKD6, "TCX_FIFO_ACKD6"},
+ {AMDGPU_GFX_TCX_FIFO_ACKD7, "TCX_FIFO_ACKD7"},
+ {AMDGPU_GFX_TCX_DST_FIFOA0, "TCX_DST_FIFOA0"},
+ {AMDGPU_GFX_TCX_DST_FIFOA1, "TCX_DST_FIFOA1"},
+ {AMDGPU_GFX_TCX_DST_FIFOA2, "TCX_DST_FIFOA2"},
+ {AMDGPU_GFX_TCX_DST_FIFOA3, "TCX_DST_FIFOA3"},
+ {AMDGPU_GFX_TCX_DST_FIFOA4, "TCX_DST_FIFOA4"},
+ {AMDGPU_GFX_TCX_DST_FIFOA5, "TCX_DST_FIFOA5"},
+ {AMDGPU_GFX_TCX_DST_FIFOA6, "TCX_DST_FIFOA6"},
+ {AMDGPU_GFX_TCX_DST_FIFOA7, "TCX_DST_FIFOA7"},
+ {AMDGPU_GFX_TCX_DST_FIFOB0, "TCX_DST_FIFOB0"},
+ {AMDGPU_GFX_TCX_DST_FIFOB1, "TCX_DST_FIFOB1"},
+ {AMDGPU_GFX_TCX_DST_FIFOB2, "TCX_DST_FIFOB2"},
+ {AMDGPU_GFX_TCX_DST_FIFOB3, "TCX_DST_FIFOB3"},
+ {AMDGPU_GFX_TCX_DST_FIFOB4, "TCX_DST_FIFOB4"},
+ {AMDGPU_GFX_TCX_DST_FIFOB5, "TCX_DST_FIFOB5"},
+ {AMDGPU_GFX_TCX_DST_FIFOB6, "TCX_DST_FIFOB6"},
+ {AMDGPU_GFX_TCX_DST_FIFOB7, "TCX_DST_FIFOB7"},
+ {AMDGPU_GFX_TCX_DST_FIFOD0, "TCX_DST_FIFOD0"},
+ {AMDGPU_GFX_TCX_DST_FIFOD1, "TCX_DST_FIFOD1"},
+ {AMDGPU_GFX_TCX_DST_FIFOD2, "TCX_DST_FIFOD2"},
+ {AMDGPU_GFX_TCX_DST_FIFOD3, "TCX_DST_FIFOD3"},
+ {AMDGPU_GFX_TCX_DST_FIFOD4, "TCX_DST_FIFOD4"},
+ {AMDGPU_GFX_TCX_DST_FIFOD5, "TCX_DST_FIFOD5"},
+ {AMDGPU_GFX_TCX_DST_FIFOD6, "TCX_DST_FIFOD6"},
+ {AMDGPU_GFX_TCX_DST_FIFOD7, "TCX_DST_FIFOD7"},
+ {AMDGPU_GFX_TCX_DST_FIFO_ACKB0, "TCX_DST_FIFO_ACKB0"},
+ {AMDGPU_GFX_TCX_DST_FIFO_ACKB1, "TCX_DST_FIFO_ACKB1"},
+ {AMDGPU_GFX_TCX_DST_FIFO_ACKB2, "TCX_DST_FIFO_ACKB2"},
+ {AMDGPU_GFX_TCX_DST_FIFO_ACKB3, "TCX_DST_FIFO_ACKB3"},
+ {AMDGPU_GFX_TCX_DST_FIFO_ACKB4, "TCX_DST_FIFO_ACKB4"},
+ {AMDGPU_GFX_TCX_DST_FIFO_ACKB5, "TCX_DST_FIFO_ACKB5"},
+ {AMDGPU_GFX_TCX_DST_FIFO_ACKB6, "TCX_DST_FIFO_ACKB6"},
+ {AMDGPU_GFX_TCX_DST_FIFO_ACKB7, "TCX_DST_FIFO_ACKB7"},
+ {AMDGPU_GFX_TCX_DST_FIFO_ACKD0, "TCX_DST_FIFO_ACKD0"},
+ {AMDGPU_GFX_TCX_DST_FIFO_ACKD1, "TCX_DST_FIFO_ACKD1"},
+ {AMDGPU_GFX_TCX_DST_FIFO_ACKD2, "TCX_DST_FIFO_ACKD2"},
+ {AMDGPU_GFX_TCX_DST_FIFO_ACKD3, "TCX_DST_FIFO_ACKD3"},
+ {AMDGPU_GFX_TCX_DST_FIFO_ACKD4, "TCX_DST_FIFO_ACKD4"},
+ {AMDGPU_GFX_TCX_DST_FIFO_ACKD5, "TCX_DST_FIFO_ACKD5"},
+ {AMDGPU_GFX_TCX_DST_FIFO_ACKD6, "TCX_DST_FIFO_ACKD6"},
+ {AMDGPU_GFX_TCX_DST_FIFO_ACKD7, "TCX_DST_FIFO_ACKD7"},
+};
+
+static const struct amdgpu_ras_memory_id_entry gfx_v9_4_3_ras_atc_l2_mem_list[] = {
+ {AMDGPU_GFX_ATC_L2_MEM, "ATC_L2_MEM"},
+};
+
+static const struct amdgpu_ras_memory_id_entry gfx_v9_4_3_ras_utcl2_mem_list[] = {
+ {AMDGPU_GFX_UTCL2_MEM, "UTCL2_MEM"},
+};
+
+static const struct amdgpu_ras_memory_id_entry gfx_v9_4_3_ras_vml2_mem_list[] = {
+ {AMDGPU_GFX_VML2_MEM, "VML2_MEM"},
+};
+
+static const struct amdgpu_ras_memory_id_entry gfx_v9_4_3_ras_vml2_walker_mem_list[] = {
+ {AMDGPU_GFX_VML2_WALKER_MEM, "VML2_WALKER_MEM"},
+};
+
+static const struct amdgpu_gfx_ras_mem_id_entry gfx_v9_4_3_ras_mem_list_array[AMDGPU_GFX_MEM_TYPE_NUM] = {
+ AMDGPU_GFX_MEMID_ENT(gfx_v9_4_3_ras_cp_mem_list)
+ AMDGPU_GFX_MEMID_ENT(gfx_v9_4_3_ras_gcea_mem_list)
+ AMDGPU_GFX_MEMID_ENT(gfx_v9_4_3_ras_gc_cane_mem_list)
+ AMDGPU_GFX_MEMID_ENT(gfx_v9_4_3_ras_gcutcl2_mem_list)
+ AMDGPU_GFX_MEMID_ENT(gfx_v9_4_3_ras_gds_mem_list)
+ AMDGPU_GFX_MEMID_ENT(gfx_v9_4_3_ras_lds_mem_list)
+ AMDGPU_GFX_MEMID_ENT(gfx_v9_4_3_ras_rlc_mem_list)
+ AMDGPU_GFX_MEMID_ENT(gfx_v9_4_3_ras_sp_mem_list)
+ AMDGPU_GFX_MEMID_ENT(gfx_v9_4_3_ras_spi_mem_list)
+ AMDGPU_GFX_MEMID_ENT(gfx_v9_4_3_ras_sqc_mem_list)
+ AMDGPU_GFX_MEMID_ENT(gfx_v9_4_3_ras_sq_mem_list)
+ AMDGPU_GFX_MEMID_ENT(gfx_v9_4_3_ras_ta_mem_list)
+ AMDGPU_GFX_MEMID_ENT(gfx_v9_4_3_ras_tcc_mem_list)
+ AMDGPU_GFX_MEMID_ENT(gfx_v9_4_3_ras_tca_mem_list)
+ AMDGPU_GFX_MEMID_ENT(gfx_v9_4_3_ras_tci_mem_list)
+ AMDGPU_GFX_MEMID_ENT(gfx_v9_4_3_ras_tcp_mem_list)
+ AMDGPU_GFX_MEMID_ENT(gfx_v9_4_3_ras_td_mem_list)
+ AMDGPU_GFX_MEMID_ENT(gfx_v9_4_3_ras_tcx_mem_list)
+ AMDGPU_GFX_MEMID_ENT(gfx_v9_4_3_ras_atc_l2_mem_list)
+ AMDGPU_GFX_MEMID_ENT(gfx_v9_4_3_ras_utcl2_mem_list)
+ AMDGPU_GFX_MEMID_ENT(gfx_v9_4_3_ras_vml2_mem_list)
+ AMDGPU_GFX_MEMID_ENT(gfx_v9_4_3_ras_vml2_walker_mem_list)
+};
+
+static const struct amdgpu_gfx_ras_reg_entry gfx_v9_4_3_ce_reg_list[] = {
+ {{AMDGPU_RAS_REG_ENTRY(GC, 0, regRLC_CE_ERR_STATUS_LOW, regRLC_CE_ERR_STATUS_HIGH),
+ 1, (AMDGPU_RAS_ERR_INFO_VALID | AMDGPU_RAS_ERR_STATUS_VALID), "RLC"},
+ AMDGPU_GFX_RLC_MEM, 1},
+ {{AMDGPU_RAS_REG_ENTRY(GC, 0, regCPC_CE_ERR_STATUS_LO, regCPC_CE_ERR_STATUS_HI),
+ 1, (AMDGPU_RAS_ERR_INFO_VALID | AMDGPU_RAS_ERR_STATUS_VALID), "CPC"},
+ AMDGPU_GFX_CP_MEM, 1},
+ {{AMDGPU_RAS_REG_ENTRY(GC, 0, regCPF_CE_ERR_STATUS_LO, regCPF_CE_ERR_STATUS_HI),
+ 1, (AMDGPU_RAS_ERR_INFO_VALID | AMDGPU_RAS_ERR_STATUS_VALID), "CPF"},
+ AMDGPU_GFX_CP_MEM, 1},
+ {{AMDGPU_RAS_REG_ENTRY(GC, 0, regCPG_CE_ERR_STATUS_LO, regCPG_CE_ERR_STATUS_HI),
+ 1, (AMDGPU_RAS_ERR_INFO_VALID | AMDGPU_RAS_ERR_STATUS_VALID), "CPG"},
+ AMDGPU_GFX_CP_MEM, 1},
+ {{AMDGPU_RAS_REG_ENTRY(GC, 0, regGDS_CE_ERR_STATUS_LO, regGDS_CE_ERR_STATUS_HI),
+ 1, (AMDGPU_RAS_ERR_INFO_VALID | AMDGPU_RAS_ERR_STATUS_VALID), "GDS"},
+ AMDGPU_GFX_GDS_MEM, 1},
+ {{AMDGPU_RAS_REG_ENTRY(GC, 0, regGC_CANE_CE_ERR_STATUS_LO, regGC_CANE_CE_ERR_STATUS_HI),
+ 1, (AMDGPU_RAS_ERR_INFO_VALID | AMDGPU_RAS_ERR_STATUS_VALID), "CANE"},
+ AMDGPU_GFX_GC_CANE_MEM, 1},
+ {{AMDGPU_RAS_REG_ENTRY(GC, 0, regSPI_CE_ERR_STATUS_LO, regSPI_CE_ERR_STATUS_HI),
+ 1, (AMDGPU_RAS_ERR_INFO_VALID | AMDGPU_RAS_ERR_STATUS_VALID), "SPI"},
+ AMDGPU_GFX_SPI_MEM, 1},
+ {{AMDGPU_RAS_REG_ENTRY(GC, 0, regSP0_CE_ERR_STATUS_LO, regSP0_CE_ERR_STATUS_HI),
+ 10, (AMDGPU_RAS_ERR_INFO_VALID | AMDGPU_RAS_ERR_STATUS_VALID), "SP0"},
+ AMDGPU_GFX_SP_MEM, 4},
+ {{AMDGPU_RAS_REG_ENTRY(GC, 0, regSP1_CE_ERR_STATUS_LO, regSP1_CE_ERR_STATUS_HI),
+ 10, (AMDGPU_RAS_ERR_INFO_VALID | AMDGPU_RAS_ERR_STATUS_VALID), "SP1"},
+ AMDGPU_GFX_SP_MEM, 4},
+ {{AMDGPU_RAS_REG_ENTRY(GC, 0, regSQ_CE_ERR_STATUS_LO, regSQ_CE_ERR_STATUS_HI),
+ 10, (AMDGPU_RAS_ERR_INFO_VALID | AMDGPU_RAS_ERR_STATUS_VALID), "SQ"},
+ AMDGPU_GFX_SQ_MEM, 4},
+ {{AMDGPU_RAS_REG_ENTRY(GC, 0, regSQC_CE_EDC_LO, regSQC_CE_EDC_HI),
+ 5, (AMDGPU_RAS_ERR_INFO_VALID | AMDGPU_RAS_ERR_STATUS_VALID), "SQC"},
+ AMDGPU_GFX_SQC_MEM, 4},
+ {{AMDGPU_RAS_REG_ENTRY(GC, 0, regTCX_CE_ERR_STATUS_LO, regTCX_CE_ERR_STATUS_HI),
+ 2, (AMDGPU_RAS_ERR_INFO_VALID | AMDGPU_RAS_ERR_STATUS_VALID), "TCX"},
+ AMDGPU_GFX_TCX_MEM, 1},
+ {{AMDGPU_RAS_REG_ENTRY(GC, 0, regTCC_CE_ERR_STATUS_LO, regTCC_CE_ERR_STATUS_HI),
+ 16, (AMDGPU_RAS_ERR_INFO_VALID | AMDGPU_RAS_ERR_STATUS_VALID), "TCC"},
+ AMDGPU_GFX_TCC_MEM, 1},
+ {{AMDGPU_RAS_REG_ENTRY(GC, 0, regTA_CE_EDC_LO, regTA_CE_EDC_HI),
+ 10, (AMDGPU_RAS_ERR_INFO_VALID | AMDGPU_RAS_ERR_STATUS_VALID), "TA"},
+ AMDGPU_GFX_TA_MEM, 4},
+ {{AMDGPU_RAS_REG_ENTRY(GC, 0, regTCI_CE_EDC_LO_REG, regTCI_CE_EDC_HI_REG),
+ 27, (AMDGPU_RAS_ERR_INFO_VALID | AMDGPU_RAS_ERR_STATUS_VALID), "TCI"},
+ AMDGPU_GFX_TCI_MEM, 1},
+ {{AMDGPU_RAS_REG_ENTRY(GC, 0, regTCP_CE_EDC_LO_REG, regTCP_CE_EDC_HI_REG),
+ 10, (AMDGPU_RAS_ERR_INFO_VALID | AMDGPU_RAS_ERR_STATUS_VALID), "TCP"},
+ AMDGPU_GFX_TCP_MEM, 4},
+ {{AMDGPU_RAS_REG_ENTRY(GC, 0, regTD_CE_EDC_LO, regTD_CE_EDC_HI),
+ 10, (AMDGPU_RAS_ERR_INFO_VALID | AMDGPU_RAS_ERR_STATUS_VALID), "TD"},
+ AMDGPU_GFX_TD_MEM, 4},
+ {{AMDGPU_RAS_REG_ENTRY(GC, 0, regGCEA_CE_ERR_STATUS_LO, regGCEA_CE_ERR_STATUS_HI),
+ 16, (AMDGPU_RAS_ERR_INFO_VALID | AMDGPU_RAS_ERR_STATUS_VALID), "GCEA"},
+ AMDGPU_GFX_GCEA_MEM, 1},
+ {{AMDGPU_RAS_REG_ENTRY(GC, 0, regLDS_CE_ERR_STATUS_LO, regLDS_CE_ERR_STATUS_HI),
+ 10, (AMDGPU_RAS_ERR_INFO_VALID | AMDGPU_RAS_ERR_STATUS_VALID), "LDS"},
+ AMDGPU_GFX_LDS_MEM, 4},
+};
+
+static const struct amdgpu_gfx_ras_reg_entry gfx_v9_4_3_ue_reg_list[] = {
+ {{AMDGPU_RAS_REG_ENTRY(GC, 0, regRLC_UE_ERR_STATUS_LOW, regRLC_UE_ERR_STATUS_HIGH),
+ 1, (AMDGPU_RAS_ERR_INFO_VALID | AMDGPU_RAS_ERR_STATUS_VALID), "RLC"},
+ AMDGPU_GFX_RLC_MEM, 1},
+ {{AMDGPU_RAS_REG_ENTRY(GC, 0, regCPC_UE_ERR_STATUS_LO, regCPC_UE_ERR_STATUS_HI),
+ 1, (AMDGPU_RAS_ERR_INFO_VALID | AMDGPU_RAS_ERR_STATUS_VALID), "CPC"},
+ AMDGPU_GFX_CP_MEM, 1},
+ {{AMDGPU_RAS_REG_ENTRY(GC, 0, regCPF_UE_ERR_STATUS_LO, regCPF_UE_ERR_STATUS_HI),
+ 1, (AMDGPU_RAS_ERR_INFO_VALID | AMDGPU_RAS_ERR_STATUS_VALID), "CPF"},
+ AMDGPU_GFX_CP_MEM, 1},
+ {{AMDGPU_RAS_REG_ENTRY(GC, 0, regCPG_UE_ERR_STATUS_LO, regCPG_UE_ERR_STATUS_HI),
+ 1, (AMDGPU_RAS_ERR_INFO_VALID | AMDGPU_RAS_ERR_STATUS_VALID), "CPG"},
+ AMDGPU_GFX_CP_MEM, 1},
+ {{AMDGPU_RAS_REG_ENTRY(GC, 0, regGDS_UE_ERR_STATUS_LO, regGDS_UE_ERR_STATUS_HI),
+ 1, (AMDGPU_RAS_ERR_INFO_VALID | AMDGPU_RAS_ERR_STATUS_VALID), "GDS"},
+ AMDGPU_GFX_GDS_MEM, 1},
+ {{AMDGPU_RAS_REG_ENTRY(GC, 0, regGC_CANE_UE_ERR_STATUS_LO, regGC_CANE_UE_ERR_STATUS_HI),
+ 1, (AMDGPU_RAS_ERR_INFO_VALID | AMDGPU_RAS_ERR_STATUS_VALID), "CANE"},
+ AMDGPU_GFX_GC_CANE_MEM, 1},
+ {{AMDGPU_RAS_REG_ENTRY(GC, 0, regSPI_UE_ERR_STATUS_LO, regSPI_UE_ERR_STATUS_HI),
+ 1, (AMDGPU_RAS_ERR_INFO_VALID | AMDGPU_RAS_ERR_STATUS_VALID), "SPI"},
+ AMDGPU_GFX_SPI_MEM, 1},
+ {{AMDGPU_RAS_REG_ENTRY(GC, 0, regSP0_UE_ERR_STATUS_LO, regSP0_UE_ERR_STATUS_HI),
+ 10, (AMDGPU_RAS_ERR_INFO_VALID | AMDGPU_RAS_ERR_STATUS_VALID), "SP0"},
+ AMDGPU_GFX_SP_MEM, 4},
+ {{AMDGPU_RAS_REG_ENTRY(GC, 0, regSP1_UE_ERR_STATUS_LO, regSP1_UE_ERR_STATUS_HI),
+ 10, (AMDGPU_RAS_ERR_INFO_VALID | AMDGPU_RAS_ERR_STATUS_VALID), "SP1"},
+ AMDGPU_GFX_SP_MEM, 4},
+ {{AMDGPU_RAS_REG_ENTRY(GC, 0, regSQ_UE_ERR_STATUS_LO, regSQ_UE_ERR_STATUS_HI),
+ 10, (AMDGPU_RAS_ERR_INFO_VALID | AMDGPU_RAS_ERR_STATUS_VALID), "SQ"},
+ AMDGPU_GFX_SQ_MEM, 4},
+ {{AMDGPU_RAS_REG_ENTRY(GC, 0, regSQC_UE_EDC_LO, regSQC_UE_EDC_HI),
+ 5, (AMDGPU_RAS_ERR_INFO_VALID | AMDGPU_RAS_ERR_STATUS_VALID), "SQC"},
+ AMDGPU_GFX_SQC_MEM, 4},
+ {{AMDGPU_RAS_REG_ENTRY(GC, 0, regTCX_UE_ERR_STATUS_LO, regTCX_UE_ERR_STATUS_HI),
+ 2, (AMDGPU_RAS_ERR_INFO_VALID | AMDGPU_RAS_ERR_STATUS_VALID), "TCX"},
+ AMDGPU_GFX_TCX_MEM, 1},
+ {{AMDGPU_RAS_REG_ENTRY(GC, 0, regTCC_UE_ERR_STATUS_LO, regTCC_UE_ERR_STATUS_HI),
+ 16, (AMDGPU_RAS_ERR_INFO_VALID | AMDGPU_RAS_ERR_STATUS_VALID), "TCC"},
+ AMDGPU_GFX_TCC_MEM, 1},
+ {{AMDGPU_RAS_REG_ENTRY(GC, 0, regTA_UE_EDC_LO, regTA_UE_EDC_HI),
+ 10, (AMDGPU_RAS_ERR_INFO_VALID | AMDGPU_RAS_ERR_STATUS_VALID), "TA"},
+ AMDGPU_GFX_TA_MEM, 4},
+ {{AMDGPU_RAS_REG_ENTRY(GC, 0, regTCI_UE_EDC_LO_REG, regTCI_UE_EDC_HI_REG),
+ 27, (AMDGPU_RAS_ERR_INFO_VALID | AMDGPU_RAS_ERR_STATUS_VALID), "TCI"},
+ AMDGPU_GFX_TCI_MEM, 1},
+ {{AMDGPU_RAS_REG_ENTRY(GC, 0, regTCP_UE_EDC_LO_REG, regTCP_UE_EDC_HI_REG),
+ 10, (AMDGPU_RAS_ERR_INFO_VALID | AMDGPU_RAS_ERR_STATUS_VALID), "TCP"},
+ AMDGPU_GFX_TCP_MEM, 4},
+ {{AMDGPU_RAS_REG_ENTRY(GC, 0, regTD_UE_EDC_LO, regTD_UE_EDC_HI),
+ 10, (AMDGPU_RAS_ERR_INFO_VALID | AMDGPU_RAS_ERR_STATUS_VALID), "TD"},
+ AMDGPU_GFX_TD_MEM, 4},
+ {{AMDGPU_RAS_REG_ENTRY(GC, 0, regTCA_UE_ERR_STATUS_LO, regTCA_UE_ERR_STATUS_HI),
+ 2, (AMDGPU_RAS_ERR_INFO_VALID | AMDGPU_RAS_ERR_STATUS_VALID), "TCA"},
+ AMDGPU_GFX_TCA_MEM, 1},
+ {{AMDGPU_RAS_REG_ENTRY(GC, 0, regGCEA_UE_ERR_STATUS_LO, regGCEA_UE_ERR_STATUS_HI),
+ 16, (AMDGPU_RAS_ERR_INFO_VALID | AMDGPU_RAS_ERR_STATUS_VALID), "GCEA"},
+ AMDGPU_GFX_GCEA_MEM, 1},
+ {{AMDGPU_RAS_REG_ENTRY(GC, 0, regLDS_UE_ERR_STATUS_LO, regLDS_UE_ERR_STATUS_HI),
+ 10, (AMDGPU_RAS_ERR_INFO_VALID | AMDGPU_RAS_ERR_STATUS_VALID), "LDS"},
+ AMDGPU_GFX_LDS_MEM, 4},
+};
+
+static void gfx_v9_4_3_inst_query_ras_err_count(struct amdgpu_device *adev,
+ void *ras_error_status, int xcc_id)
+{
+ struct ras_err_data *err_data = (struct ras_err_data *)ras_error_status;
+ unsigned long ce_count = 0, ue_count = 0;
+ uint32_t i, j, k;
+
+ /* NOTE: convert xcc_id to physical XCD ID (XCD0 or XCD1) */
+ struct amdgpu_smuio_mcm_config_info mcm_info = {
+ .socket_id = adev->smuio.funcs->get_socket_id(adev),
+ .die_id = xcc_id & 0x01 ? 1 : 0,
+ };
+
+ mutex_lock(&adev->grbm_idx_mutex);
+
+ for (i = 0; i < ARRAY_SIZE(gfx_v9_4_3_ce_reg_list); i++) {
+ for (j = 0; j < gfx_v9_4_3_ce_reg_list[i].se_num; j++) {
+ for (k = 0; k < gfx_v9_4_3_ce_reg_list[i].reg_entry.reg_inst; k++) {
+ /* no need to select if instance number is 1 */
+ if (gfx_v9_4_3_ce_reg_list[i].se_num > 1 ||
+ gfx_v9_4_3_ce_reg_list[i].reg_entry.reg_inst > 1)
+ gfx_v9_4_3_xcc_select_se_sh(adev, j, 0, k, xcc_id);
+
+ amdgpu_ras_inst_query_ras_error_count(adev,
+ &(gfx_v9_4_3_ce_reg_list[i].reg_entry),
+ 1,
+ gfx_v9_4_3_ras_mem_list_array[gfx_v9_4_3_ce_reg_list[i].mem_id_type].mem_id_ent,
+ gfx_v9_4_3_ras_mem_list_array[gfx_v9_4_3_ce_reg_list[i].mem_id_type].size,
+ GET_INST(GC, xcc_id),
+ AMDGPU_RAS_ERROR__SINGLE_CORRECTABLE,
+ &ce_count);
+
+ amdgpu_ras_inst_query_ras_error_count(adev,
+ &(gfx_v9_4_3_ue_reg_list[i].reg_entry),
+ 1,
+ gfx_v9_4_3_ras_mem_list_array[gfx_v9_4_3_ue_reg_list[i].mem_id_type].mem_id_ent,
+ gfx_v9_4_3_ras_mem_list_array[gfx_v9_4_3_ue_reg_list[i].mem_id_type].size,
+ GET_INST(GC, xcc_id),
+ AMDGPU_RAS_ERROR__MULTI_UNCORRECTABLE,
+ &ue_count);
+ }
+ }
+ }
+
+ /* handle extra register entries of UE */
+ for (; i < ARRAY_SIZE(gfx_v9_4_3_ue_reg_list); i++) {
+ for (j = 0; j < gfx_v9_4_3_ue_reg_list[i].se_num; j++) {
+ for (k = 0; k < gfx_v9_4_3_ue_reg_list[i].reg_entry.reg_inst; k++) {
+ /* no need to select if instance number is 1 */
+ if (gfx_v9_4_3_ue_reg_list[i].se_num > 1 ||
+ gfx_v9_4_3_ue_reg_list[i].reg_entry.reg_inst > 1)
+ gfx_v9_4_3_xcc_select_se_sh(adev, j, 0, k, xcc_id);
+
+ amdgpu_ras_inst_query_ras_error_count(adev,
+ &(gfx_v9_4_3_ue_reg_list[i].reg_entry),
+ 1,
+ gfx_v9_4_3_ras_mem_list_array[gfx_v9_4_3_ue_reg_list[i].mem_id_type].mem_id_ent,
+ gfx_v9_4_3_ras_mem_list_array[gfx_v9_4_3_ue_reg_list[i].mem_id_type].size,
+ GET_INST(GC, xcc_id),
+ AMDGPU_RAS_ERROR__MULTI_UNCORRECTABLE,
+ &ue_count);
+ }
+ }
+ }
+
+ gfx_v9_4_3_xcc_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff,
+ xcc_id);
+ mutex_unlock(&adev->grbm_idx_mutex);
+
+ /* the caller should make sure initialize value of
+ * err_data->ue_count and err_data->ce_count
+ */
+ amdgpu_ras_error_statistic_ue_count(err_data, &mcm_info, ue_count);
+ amdgpu_ras_error_statistic_ce_count(err_data, &mcm_info, ce_count);
+}
+
+static void gfx_v9_4_3_inst_reset_ras_err_count(struct amdgpu_device *adev,
+ void *ras_error_status, int xcc_id)
+{
+ uint32_t i, j, k;
+
+ mutex_lock(&adev->grbm_idx_mutex);
+
+ for (i = 0; i < ARRAY_SIZE(gfx_v9_4_3_ce_reg_list); i++) {
+ for (j = 0; j < gfx_v9_4_3_ce_reg_list[i].se_num; j++) {
+ for (k = 0; k < gfx_v9_4_3_ce_reg_list[i].reg_entry.reg_inst; k++) {
+ /* no need to select if instance number is 1 */
+ if (gfx_v9_4_3_ce_reg_list[i].se_num > 1 ||
+ gfx_v9_4_3_ce_reg_list[i].reg_entry.reg_inst > 1)
+ gfx_v9_4_3_xcc_select_se_sh(adev, j, 0, k, xcc_id);
+
+ amdgpu_ras_inst_reset_ras_error_count(adev,
+ &(gfx_v9_4_3_ce_reg_list[i].reg_entry),
+ 1,
+ GET_INST(GC, xcc_id));
+
+ amdgpu_ras_inst_reset_ras_error_count(adev,
+ &(gfx_v9_4_3_ue_reg_list[i].reg_entry),
+ 1,
+ GET_INST(GC, xcc_id));
+ }
+ }
+ }
+
+ /* handle extra register entries of UE */
+ for (; i < ARRAY_SIZE(gfx_v9_4_3_ue_reg_list); i++) {
+ for (j = 0; j < gfx_v9_4_3_ue_reg_list[i].se_num; j++) {
+ for (k = 0; k < gfx_v9_4_3_ue_reg_list[i].reg_entry.reg_inst; k++) {
+ /* no need to select if instance number is 1 */
+ if (gfx_v9_4_3_ue_reg_list[i].se_num > 1 ||
+ gfx_v9_4_3_ue_reg_list[i].reg_entry.reg_inst > 1)
+ gfx_v9_4_3_xcc_select_se_sh(adev, j, 0, k, xcc_id);
+
+ amdgpu_ras_inst_reset_ras_error_count(adev,
+ &(gfx_v9_4_3_ue_reg_list[i].reg_entry),
+ 1,
+ GET_INST(GC, xcc_id));
+ }
+ }
+ }
+
+ gfx_v9_4_3_xcc_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff,
+ xcc_id);
+ mutex_unlock(&adev->grbm_idx_mutex);
+}
+
+static void gfx_v9_4_3_inst_enable_watchdog_timer(struct amdgpu_device *adev,
+ void *ras_error_status, int xcc_id)
+{
+ uint32_t i;
+ uint32_t data;
+
+ if (amdgpu_sriov_vf(adev))
+ return;
+
+ data = RREG32_SOC15(GC, GET_INST(GC, 0), regSQ_TIMEOUT_CONFIG);
+ data = REG_SET_FIELD(data, SQ_TIMEOUT_CONFIG, TIMEOUT_FATAL_DISABLE,
+ amdgpu_watchdog_timer.timeout_fatal_disable ? 1 : 0);
+
+ if (amdgpu_watchdog_timer.timeout_fatal_disable &&
+ (amdgpu_watchdog_timer.period < 1 ||
+ amdgpu_watchdog_timer.period > 0x23)) {
+ dev_warn(adev->dev, "Watchdog period range is 1 to 0x23\n");
+ amdgpu_watchdog_timer.period = 0x23;
+ }
+ data = REG_SET_FIELD(data, SQ_TIMEOUT_CONFIG, PERIOD_SEL,
+ amdgpu_watchdog_timer.period);
+
+ mutex_lock(&adev->grbm_idx_mutex);
+ for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
+ gfx_v9_4_3_xcc_select_se_sh(adev, i, 0xffffffff, 0xffffffff, xcc_id);
+ WREG32_SOC15(GC, GET_INST(GC, xcc_id), regSQ_TIMEOUT_CONFIG, data);
+ }
+ gfx_v9_4_3_xcc_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff,
+ xcc_id);
+ mutex_unlock(&adev->grbm_idx_mutex);
+}
+
+static void gfx_v9_4_3_query_ras_error_count(struct amdgpu_device *adev,
+ void *ras_error_status)
+{
+ amdgpu_gfx_ras_error_func(adev, ras_error_status,
+ gfx_v9_4_3_inst_query_ras_err_count);
+}
+
+static void gfx_v9_4_3_reset_ras_error_count(struct amdgpu_device *adev)
+{
+ amdgpu_gfx_ras_error_func(adev, NULL, gfx_v9_4_3_inst_reset_ras_err_count);
+}
+
+static void gfx_v9_4_3_enable_watchdog_timer(struct amdgpu_device *adev)
+{
+ amdgpu_gfx_ras_error_func(adev, NULL, gfx_v9_4_3_inst_enable_watchdog_timer);
+}
+
+static void gfx_v9_4_3_ring_insert_nop(struct amdgpu_ring *ring, uint32_t num_nop)
+{
+ /* Header itself is a NOP packet */
+ if (num_nop == 1) {
+ amdgpu_ring_write(ring, ring->funcs->nop);
+ return;
+ }
+
+ /* Max HW optimization till 0x3ffe, followed by remaining one NOP at a time*/
+ amdgpu_ring_write(ring, PACKET3(PACKET3_NOP, min(num_nop - 2, 0x3ffe)));
+
+ /* Header is at index 0, followed by num_nops - 1 NOP packet's */
+ amdgpu_ring_insert_nop(ring, num_nop - 1);
+}
+
+static void gfx_v9_4_3_ip_print(struct amdgpu_ip_block *ip_block, struct drm_printer *p)
+{
+ struct amdgpu_device *adev = ip_block->adev;
+ uint32_t i, j, k;
+ uint32_t xcc_id, xcc_offset, inst_offset;
+ uint32_t num_xcc, reg, num_inst;
+ uint32_t reg_count = ARRAY_SIZE(gc_reg_list_9_4_3);
+
+ if (!adev->gfx.ip_dump_core)
+ return;
+
+ num_xcc = NUM_XCC(adev->gfx.xcc_mask);
+ drm_printf(p, "Number of Instances:%d\n", num_xcc);
+ for (xcc_id = 0; xcc_id < num_xcc; xcc_id++) {
+ xcc_offset = xcc_id * reg_count;
+ drm_printf(p, "\nInstance id:%d\n", xcc_id);
+ for (i = 0; i < reg_count; i++)
+ drm_printf(p, "%-50s \t 0x%08x\n",
+ gc_reg_list_9_4_3[i].reg_name,
+ adev->gfx.ip_dump_core[xcc_offset + i]);
+ }
+
+ /* print compute queue registers for all instances */
+ if (!adev->gfx.ip_dump_compute_queues)
+ return;
+
+ num_inst = adev->gfx.mec.num_mec * adev->gfx.mec.num_pipe_per_mec *
+ adev->gfx.mec.num_queue_per_pipe;
+
+ reg_count = ARRAY_SIZE(gc_cp_reg_list_9_4_3);
+ drm_printf(p, "\nnum_xcc: %d num_mec: %d num_pipe: %d num_queue: %d\n",
+ num_xcc,
+ adev->gfx.mec.num_mec,
+ adev->gfx.mec.num_pipe_per_mec,
+ adev->gfx.mec.num_queue_per_pipe);
+
+ for (xcc_id = 0; xcc_id < num_xcc; xcc_id++) {
+ xcc_offset = xcc_id * reg_count * num_inst;
+ inst_offset = 0;
+ for (i = 0; i < adev->gfx.mec.num_mec; i++) {
+ for (j = 0; j < adev->gfx.mec.num_pipe_per_mec; j++) {
+ for (k = 0; k < adev->gfx.mec.num_queue_per_pipe; k++) {
+ drm_printf(p,
+ "\nxcc:%d mec:%d, pipe:%d, queue:%d\n",
+ xcc_id, i, j, k);
+ for (reg = 0; reg < reg_count; reg++) {
+ if (i && gc_cp_reg_list_9_4_3[reg].reg_offset ==
+ regCP_MEC_ME1_HEADER_DUMP)
+ drm_printf(p,
+ "%-50s \t 0x%08x\n",
+ "regCP_MEC_ME2_HEADER_DUMP",
+ adev->gfx.ip_dump_compute_queues
+ [xcc_offset + inst_offset +
+ reg]);
+ else
+ drm_printf(p,
+ "%-50s \t 0x%08x\n",
+ gc_cp_reg_list_9_4_3[reg].reg_name,
+ adev->gfx.ip_dump_compute_queues
+ [xcc_offset + inst_offset +
+ reg]);
+ }
+ inst_offset += reg_count;
+ }
+ }
+ }
+ }
+}
+
+static void gfx_v9_4_3_ip_dump(struct amdgpu_ip_block *ip_block)
+{
+ struct amdgpu_device *adev = ip_block->adev;
+ uint32_t i, j, k;
+ uint32_t num_xcc, reg, num_inst;
+ uint32_t xcc_id, xcc_offset, inst_offset;
+ uint32_t reg_count = ARRAY_SIZE(gc_reg_list_9_4_3);
+
+ if (!adev->gfx.ip_dump_core)
+ return;
+
+ num_xcc = NUM_XCC(adev->gfx.xcc_mask);
+
+ for (xcc_id = 0; xcc_id < num_xcc; xcc_id++) {
+ xcc_offset = xcc_id * reg_count;
+ for (i = 0; i < reg_count; i++)
+ adev->gfx.ip_dump_core[xcc_offset + i] =
+ RREG32(SOC15_REG_ENTRY_OFFSET_INST(gc_reg_list_9_4_3[i],
+ GET_INST(GC, xcc_id)));
+ }
+
+ /* dump compute queue registers for all instances */
+ if (!adev->gfx.ip_dump_compute_queues)
+ return;
+
+ num_inst = adev->gfx.mec.num_mec * adev->gfx.mec.num_pipe_per_mec *
+ adev->gfx.mec.num_queue_per_pipe;
+ reg_count = ARRAY_SIZE(gc_cp_reg_list_9_4_3);
+ mutex_lock(&adev->srbm_mutex);
+ for (xcc_id = 0; xcc_id < num_xcc; xcc_id++) {
+ xcc_offset = xcc_id * reg_count * num_inst;
+ inst_offset = 0;
+ for (i = 0; i < adev->gfx.mec.num_mec; i++) {
+ for (j = 0; j < adev->gfx.mec.num_pipe_per_mec; j++) {
+ for (k = 0; k < adev->gfx.mec.num_queue_per_pipe; k++) {
+ /* ME0 is for GFX so start from 1 for CP */
+ soc15_grbm_select(adev, 1 + i, j, k, 0,
+ GET_INST(GC, xcc_id));
+
+ for (reg = 0; reg < reg_count; reg++) {
+ if (i && gc_cp_reg_list_9_4_3[reg].reg_offset ==
+ regCP_MEC_ME1_HEADER_DUMP)
+ adev->gfx.ip_dump_compute_queues
+ [xcc_offset +
+ inst_offset + reg] =
+ RREG32(SOC15_REG_OFFSET(GC, GET_INST(GC, xcc_id),
+ regCP_MEC_ME2_HEADER_DUMP));
+ else
+ adev->gfx.ip_dump_compute_queues
+ [xcc_offset +
+ inst_offset + reg] =
+ RREG32(SOC15_REG_ENTRY_OFFSET_INST(
+ gc_cp_reg_list_9_4_3[reg],
+ GET_INST(GC, xcc_id)));
+ }
+ inst_offset += reg_count;
+ }
+ }
+ }
+ }
+ soc15_grbm_select(adev, 0, 0, 0, 0, 0);
+ mutex_unlock(&adev->srbm_mutex);
+}
+
+static void gfx_v9_4_3_ring_emit_cleaner_shader(struct amdgpu_ring *ring)
+{
+ /* Emit the cleaner shader */
+ amdgpu_ring_write(ring, PACKET3(PACKET3_RUN_CLEANER_SHADER, 0));
+ amdgpu_ring_write(ring, 0); /* RESERVED field, programmed to zero */
+}
+
+static const struct amd_ip_funcs gfx_v9_4_3_ip_funcs = {
+ .name = "gfx_v9_4_3",
+ .early_init = gfx_v9_4_3_early_init,
+ .late_init = gfx_v9_4_3_late_init,
+ .sw_init = gfx_v9_4_3_sw_init,
+ .sw_fini = gfx_v9_4_3_sw_fini,
+ .hw_init = gfx_v9_4_3_hw_init,
+ .hw_fini = gfx_v9_4_3_hw_fini,
+ .suspend = gfx_v9_4_3_suspend,
+ .resume = gfx_v9_4_3_resume,
+ .is_idle = gfx_v9_4_3_is_idle,
+ .wait_for_idle = gfx_v9_4_3_wait_for_idle,
+ .soft_reset = gfx_v9_4_3_soft_reset,
+ .set_clockgating_state = gfx_v9_4_3_set_clockgating_state,
+ .set_powergating_state = gfx_v9_4_3_set_powergating_state,
+ .get_clockgating_state = gfx_v9_4_3_get_clockgating_state,
+ .dump_ip_state = gfx_v9_4_3_ip_dump,
+ .print_ip_state = gfx_v9_4_3_ip_print,
+};
+
+static const struct amdgpu_ring_funcs gfx_v9_4_3_ring_funcs_compute = {
+ .type = AMDGPU_RING_TYPE_COMPUTE,
+ .align_mask = 0xff,
+ .nop = PACKET3(PACKET3_NOP, 0x3FFF),
+ .support_64bit_ptrs = true,
+ .get_rptr = gfx_v9_4_3_ring_get_rptr_compute,
+ .get_wptr = gfx_v9_4_3_ring_get_wptr_compute,
+ .set_wptr = gfx_v9_4_3_ring_set_wptr_compute,
+ .emit_frame_size =
+ 20 + /* gfx_v9_4_3_ring_emit_gds_switch */
+ 7 + /* gfx_v9_4_3_ring_emit_hdp_flush */
+ 5 + /* hdp invalidate */
+ 7 + /* gfx_v9_4_3_ring_emit_pipeline_sync */
+ SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 +
+ SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 +
+ 2 + /* gfx_v9_4_3_ring_emit_vm_flush */
+ 8 + 8 + 8 + /* gfx_v9_4_3_ring_emit_fence x3 for user fence, vm fence */
+ 7 + /* gfx_v9_4_3_emit_mem_sync */
+ 5 + /* gfx_v9_4_3_emit_wave_limit for updating regSPI_WCL_PIPE_PERCENT_GFX register */
+ 15 + /* for updating 3 regSPI_WCL_PIPE_PERCENT_CS registers */
+ 2, /* gfx_v9_4_3_ring_emit_cleaner_shader */
+ .emit_ib_size = 7, /* gfx_v9_4_3_ring_emit_ib_compute */
+ .emit_ib = gfx_v9_4_3_ring_emit_ib_compute,
+ .emit_fence = gfx_v9_4_3_ring_emit_fence,
+ .emit_pipeline_sync = gfx_v9_4_3_ring_emit_pipeline_sync,
+ .emit_vm_flush = gfx_v9_4_3_ring_emit_vm_flush,
+ .emit_gds_switch = gfx_v9_4_3_ring_emit_gds_switch,
+ .emit_hdp_flush = gfx_v9_4_3_ring_emit_hdp_flush,
+ .test_ring = gfx_v9_4_3_ring_test_ring,
+ .test_ib = gfx_v9_4_3_ring_test_ib,
+ .insert_nop = gfx_v9_4_3_ring_insert_nop,
+ .pad_ib = amdgpu_ring_generic_pad_ib,
+ .emit_wreg = gfx_v9_4_3_ring_emit_wreg,
+ .emit_reg_wait = gfx_v9_4_3_ring_emit_reg_wait,
+ .emit_reg_write_reg_wait = gfx_v9_4_3_ring_emit_reg_write_reg_wait,
+ .soft_recovery = gfx_v9_4_3_ring_soft_recovery,
+ .emit_mem_sync = gfx_v9_4_3_emit_mem_sync,
+ .emit_wave_limit = gfx_v9_4_3_emit_wave_limit,
+ .reset = gfx_v9_4_3_reset_kcq,
+ .emit_cleaner_shader = gfx_v9_4_3_ring_emit_cleaner_shader,
+ .begin_use = amdgpu_gfx_enforce_isolation_ring_begin_use,
+ .end_use = amdgpu_gfx_enforce_isolation_ring_end_use,
+};
+
+static const struct amdgpu_ring_funcs gfx_v9_4_3_ring_funcs_kiq = {
+ .type = AMDGPU_RING_TYPE_KIQ,
+ .align_mask = 0xff,
+ .nop = PACKET3(PACKET3_NOP, 0x3FFF),
+ .support_64bit_ptrs = true,
+ .get_rptr = gfx_v9_4_3_ring_get_rptr_compute,
+ .get_wptr = gfx_v9_4_3_ring_get_wptr_compute,
+ .set_wptr = gfx_v9_4_3_ring_set_wptr_compute,
+ .emit_frame_size =
+ 20 + /* gfx_v9_4_3_ring_emit_gds_switch */
+ 7 + /* gfx_v9_4_3_ring_emit_hdp_flush */
+ 5 + /* hdp invalidate */
+ 7 + /* gfx_v9_4_3_ring_emit_pipeline_sync */
+ SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 +
+ SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 +
+ 2 + /* gfx_v9_4_3_ring_emit_vm_flush */
+ 8 + 8 + 8, /* gfx_v9_4_3_ring_emit_fence_kiq x3 for user fence, vm fence */
+ .emit_ib_size = 7, /* gfx_v9_4_3_ring_emit_ib_compute */
+ .emit_fence = gfx_v9_4_3_ring_emit_fence_kiq,
+ .test_ring = gfx_v9_4_3_ring_test_ring,
+ .insert_nop = amdgpu_ring_insert_nop,
+ .pad_ib = amdgpu_ring_generic_pad_ib,
+ .emit_rreg = gfx_v9_4_3_ring_emit_rreg,
+ .emit_wreg = gfx_v9_4_3_ring_emit_wreg,
+ .emit_reg_wait = gfx_v9_4_3_ring_emit_reg_wait,
+ .emit_reg_write_reg_wait = gfx_v9_4_3_ring_emit_reg_write_reg_wait,
+ .emit_hdp_flush = gfx_v9_4_3_ring_emit_hdp_flush,
+};
+
+static void gfx_v9_4_3_set_ring_funcs(struct amdgpu_device *adev)
+{
+ int i, j, num_xcc;
+
+ num_xcc = NUM_XCC(adev->gfx.xcc_mask);
+ for (i = 0; i < num_xcc; i++) {
+ adev->gfx.kiq[i].ring.funcs = &gfx_v9_4_3_ring_funcs_kiq;
+
+ for (j = 0; j < adev->gfx.num_compute_rings; j++)
+ adev->gfx.compute_ring[j + i * adev->gfx.num_compute_rings].funcs
+ = &gfx_v9_4_3_ring_funcs_compute;
+ }
+}
+
+static const struct amdgpu_irq_src_funcs gfx_v9_4_3_eop_irq_funcs = {
+ .set = gfx_v9_4_3_set_eop_interrupt_state,
+ .process = gfx_v9_4_3_eop_irq,
+};
+
+static const struct amdgpu_irq_src_funcs gfx_v9_4_3_priv_reg_irq_funcs = {
+ .set = gfx_v9_4_3_set_priv_reg_fault_state,
+ .process = gfx_v9_4_3_priv_reg_irq,
+};
+
+static const struct amdgpu_irq_src_funcs gfx_v9_4_3_bad_op_irq_funcs = {
+ .set = gfx_v9_4_3_set_bad_op_fault_state,
+ .process = gfx_v9_4_3_bad_op_irq,
+};
+
+static const struct amdgpu_irq_src_funcs gfx_v9_4_3_priv_inst_irq_funcs = {
+ .set = gfx_v9_4_3_set_priv_inst_fault_state,
+ .process = gfx_v9_4_3_priv_inst_irq,
+};
+
+static void gfx_v9_4_3_set_irq_funcs(struct amdgpu_device *adev)
+{
+ adev->gfx.eop_irq.num_types = AMDGPU_CP_IRQ_LAST;
+ adev->gfx.eop_irq.funcs = &gfx_v9_4_3_eop_irq_funcs;
+
+ adev->gfx.priv_reg_irq.num_types = 1;
+ adev->gfx.priv_reg_irq.funcs = &gfx_v9_4_3_priv_reg_irq_funcs;
+
+ adev->gfx.bad_op_irq.num_types = 1;
+ adev->gfx.bad_op_irq.funcs = &gfx_v9_4_3_bad_op_irq_funcs;
+
+ adev->gfx.priv_inst_irq.num_types = 1;
+ adev->gfx.priv_inst_irq.funcs = &gfx_v9_4_3_priv_inst_irq_funcs;
+}
+
+static void gfx_v9_4_3_set_rlc_funcs(struct amdgpu_device *adev)
+{
+ adev->gfx.rlc.funcs = &gfx_v9_4_3_rlc_funcs;
+}
+
+
+static void gfx_v9_4_3_set_gds_init(struct amdgpu_device *adev)
+{
+ /* 9.4.3 variants removed all the GDS internal memory,
+ * only support GWS opcode in kernel, like barrier
+ * semaphore.etc */
+
+ /* init asic gds info */
+ adev->gds.gds_size = 0;
+ adev->gds.gds_compute_max_wave_id = 0;
+ adev->gds.gws_size = 64;
+ adev->gds.oa_size = 16;
+}
+
+static void gfx_v9_4_3_set_user_cu_inactive_bitmap(struct amdgpu_device *adev,
+ u32 bitmap, int xcc_id)
+{
+ u32 data;
+
+ if (!bitmap)
+ return;
+
+ data = bitmap << GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_CUS__SHIFT;
+ data &= GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_CUS_MASK;
+
+ WREG32_SOC15(GC, GET_INST(GC, xcc_id), regGC_USER_SHADER_ARRAY_CONFIG, data);
+}
+
+static u32 gfx_v9_4_3_get_cu_active_bitmap(struct amdgpu_device *adev, int xcc_id)
+{
+ u32 data, mask;
+
+ data = RREG32_SOC15(GC, GET_INST(GC, xcc_id), regCC_GC_SHADER_ARRAY_CONFIG);
+ data |= RREG32_SOC15(GC, GET_INST(GC, xcc_id), regGC_USER_SHADER_ARRAY_CONFIG);
+
+ data &= CC_GC_SHADER_ARRAY_CONFIG__INACTIVE_CUS_MASK;
+ data >>= CC_GC_SHADER_ARRAY_CONFIG__INACTIVE_CUS__SHIFT;
+
+ mask = amdgpu_gfx_create_bitmask(adev->gfx.config.max_cu_per_sh);
+
+ return (~data) & mask;
+}
+
+static int gfx_v9_4_3_get_cu_info(struct amdgpu_device *adev,
+ struct amdgpu_cu_info *cu_info)
+{
+ int i, j, k, prev_counter, counter, xcc_id, active_cu_number = 0;
+ u32 mask, bitmap, ao_bitmap, ao_cu_mask = 0, tmp;
+ unsigned disable_masks[4 * 4];
+ bool is_symmetric_cus;
+
+ if (!adev || !cu_info)
+ return -EINVAL;
+
+ /*
+ * 16 comes from bitmap array size 4*4, and it can cover all gfx9 ASICs
+ */
+ if (adev->gfx.config.max_shader_engines *
+ adev->gfx.config.max_sh_per_se > 16)
+ return -EINVAL;
+
+ amdgpu_gfx_parse_disable_cu(disable_masks,
+ adev->gfx.config.max_shader_engines,
+ adev->gfx.config.max_sh_per_se);
+
+ mutex_lock(&adev->grbm_idx_mutex);
+ for (xcc_id = 0; xcc_id < NUM_XCC(adev->gfx.xcc_mask); xcc_id++) {
+ is_symmetric_cus = true;
+ for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
+ for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
+ mask = 1;
+ ao_bitmap = 0;
+ counter = 0;
+ gfx_v9_4_3_xcc_select_se_sh(adev, i, j, 0xffffffff, xcc_id);
+ gfx_v9_4_3_set_user_cu_inactive_bitmap(
+ adev,
+ disable_masks[i * adev->gfx.config.max_sh_per_se + j],
+ xcc_id);
+ bitmap = gfx_v9_4_3_get_cu_active_bitmap(adev, xcc_id);
+
+ cu_info->bitmap[xcc_id][i][j] = bitmap;
+
+ for (k = 0; k < adev->gfx.config.max_cu_per_sh; k++) {
+ if (bitmap & mask) {
+ if (counter < adev->gfx.config.max_cu_per_sh)
+ ao_bitmap |= mask;
+ counter++;
+ }
+ mask <<= 1;
+ }
+ active_cu_number += counter;
+ if (i < 2 && j < 2)
+ ao_cu_mask |= (ao_bitmap << (i * 16 + j * 8));
+ cu_info->ao_cu_bitmap[i][j] = ao_bitmap;
+ }
+ if (i && is_symmetric_cus && prev_counter != counter)
+ is_symmetric_cus = false;
+ prev_counter = counter;
+ }
+ if (is_symmetric_cus) {
+ tmp = RREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_CPC_DEBUG);
+ tmp = REG_SET_FIELD(tmp, CP_CPC_DEBUG, CPC_HARVESTING_RELAUNCH_DISABLE, 1);
+ tmp = REG_SET_FIELD(tmp, CP_CPC_DEBUG, CPC_HARVESTING_DISPATCH_DISABLE, 1);
+ WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_CPC_DEBUG, tmp);
+ }
+ gfx_v9_4_3_xcc_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff,
+ xcc_id);
+ }
+ mutex_unlock(&adev->grbm_idx_mutex);
+
+ cu_info->number = active_cu_number;
+ cu_info->ao_cu_mask = ao_cu_mask;
+ cu_info->simd_per_cu = NUM_SIMD_PER_CU;
+
+ return 0;
+}
+
+const struct amdgpu_ip_block_version gfx_v9_4_3_ip_block = {
+ .type = AMD_IP_BLOCK_TYPE_GFX,
+ .major = 9,
+ .minor = 4,
+ .rev = 3,
+ .funcs = &gfx_v9_4_3_ip_funcs,
+};
+
+static int gfx_v9_4_3_xcp_resume(void *handle, uint32_t inst_mask)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ uint32_t tmp_mask;
+ int i, r;
+
+ /* TODO : Initialize golden regs */
+ /* gfx_v9_4_3_init_golden_registers(adev); */
+
+ tmp_mask = inst_mask;
+ for_each_inst(i, tmp_mask)
+ gfx_v9_4_3_xcc_constants_init(adev, i);
+
+ if (!amdgpu_sriov_vf(adev)) {
+ tmp_mask = inst_mask;
+ for_each_inst(i, tmp_mask) {
+ r = gfx_v9_4_3_xcc_rlc_resume(adev, i);
+ if (r)
+ return r;
+ }
+ }
+
+ tmp_mask = inst_mask;
+ for_each_inst(i, tmp_mask) {
+ r = gfx_v9_4_3_xcc_cp_resume(adev, i);
+ if (r)
+ return r;
+ }
+
+ return 0;
+}
+
+static int gfx_v9_4_3_xcp_suspend(void *handle, uint32_t inst_mask)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ int i;
+
+ for_each_inst(i, inst_mask)
+ gfx_v9_4_3_xcc_fini(adev, i);
+
+ return 0;
+}
+
+struct amdgpu_xcp_ip_funcs gfx_v9_4_3_xcp_funcs = {
+ .suspend = &gfx_v9_4_3_xcp_suspend,
+ .resume = &gfx_v9_4_3_xcp_resume
+};
+
+struct amdgpu_ras_block_hw_ops gfx_v9_4_3_ras_ops = {
+ .query_ras_error_count = &gfx_v9_4_3_query_ras_error_count,
+ .reset_ras_error_count = &gfx_v9_4_3_reset_ras_error_count,
+};
+
+static int gfx_v9_4_3_ras_late_init(struct amdgpu_device *adev, struct ras_common_if *ras_block)
+{
+ int r;
+
+ r = amdgpu_ras_block_late_init(adev, ras_block);
+ if (r)
+ return r;
+
+ r = amdgpu_ras_bind_aca(adev, AMDGPU_RAS_BLOCK__GFX,
+ &gfx_v9_4_3_aca_info,
+ NULL);
+ if (r)
+ goto late_fini;
+
+ return 0;
+
+late_fini:
+ amdgpu_ras_block_late_fini(adev, ras_block);
+
+ return r;
+}
+
+struct amdgpu_gfx_ras gfx_v9_4_3_ras = {
+ .ras_block = {
+ .hw_ops = &gfx_v9_4_3_ras_ops,
+ .ras_late_init = &gfx_v9_4_3_ras_late_init,
+ },
+ .enable_watchdog_timer = &gfx_v9_4_3_enable_watchdog_timer,
+};
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.h b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.h
new file mode 100644
index 000000000000..42d67ee0e7ef
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.h
@@ -0,0 +1,31 @@
+/*
+ * Copyright 2022 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#ifndef __GFX_V9_4_3_H__
+#define __GFX_V9_4_3_H__
+
+extern const struct amdgpu_ip_block_version gfx_v9_4_3_ip_block;
+
+extern struct amdgpu_xcp_ip_funcs gfx_v9_4_3_xcp_funcs;
+
+#endif /* __GFX_V9_4_3_H__ */
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3_cleaner_shader.asm b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3_cleaner_shader.asm
new file mode 100644
index 000000000000..d5325ef80ab0
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3_cleaner_shader.asm
@@ -0,0 +1,153 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright 2024 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+// This shader is to clean LDS, SGPRs and VGPRs. It is first 64 Dwords or 256 bytes of 192 Dwords cleaner shader.
+//To turn this shader program on for complitaion change this to main and lower shader main to main_1
+
+// MI300 : Clear SGPRs, VGPRs and LDS
+// Uses two kernels launched separately:
+// 1. Clean VGPRs, LDS, and lower SGPRs
+// Launches one workgroup per CU, each workgroup with 4x wave64 per SIMD in the CU
+// Waves are "wave64" and have 128 VGPRs each, which uses all 512 VGPRs per SIMD
+// Waves in the workgroup share the 64KB of LDS
+// Each wave clears SGPRs 0 - 95. Because there are 4 waves/SIMD, this is physical SGPRs 0-383
+// Each wave clears 128 VGPRs, so all 512 in the SIMD
+// The first wave of the workgroup clears its 64KB of LDS
+// The shader starts with "S_BARRIER" to ensure SPI has launched all waves of the workgroup
+// before any wave in the workgroup could end. Without this, it is possible not all SGPRs get cleared.
+// 2. Clean remaining SGPRs
+// Launches a workgroup with 24 waves per workgroup, yielding 6 waves per SIMD in each CU
+// Waves are allocating 96 SGPRs
+// CP sets up SPI_RESOURCE_RESERVE_* registers to prevent these waves from allocating SGPRs 0-223.
+// As such, these 6 waves per SIMD are allocated physical SGPRs 224-799
+// Barriers do not work for >16 waves per workgroup, so we cannot start with S_BARRIER
+// Instead, the shader starts with an S_SETHALT 1. Once all waves are launched CP will send unhalt command
+// The shader then clears all SGPRs allocated to it, cleaning out physical SGPRs 224-799
+
+shader main
+ asic(MI300)
+ type(CS)
+ wave_size(64)
+// Note: original source code from SQ team
+
+// (theorhetical fastest = ~512clks vgpr + 1536 lds + ~128 sgpr = 2176 clks)
+
+ s_cmp_eq_u32 s0, 1 // Bit0 is set, sgpr0 is set then clear VGPRS and LDS as FW set COMPUTE_USER_DATA_3
+ s_cbranch_scc0 label_0023 // Clean VGPRs and LDS if sgpr0 of wave is set, scc = (s3 == 1)
+ S_BARRIER
+
+ s_movk_i32 m0, 0x0000
+ s_mov_b32 s2, 0x00000078 // Loop 128/8=16 times (loop unrolled for performance)
+ //
+ // CLEAR VGPRs
+ //
+ s_set_gpr_idx_on s2, 0x8 // enable Dest VGPR indexing
+label_0005:
+ v_mov_b32 v0, 0
+ v_mov_b32 v1, 0
+ v_mov_b32 v2, 0
+ v_mov_b32 v3, 0
+ v_mov_b32 v4, 0
+ v_mov_b32 v5, 0
+ v_mov_b32 v6, 0
+ v_mov_b32 v7, 0
+ s_sub_u32 s2, s2, 8
+ s_set_gpr_idx_idx s2
+ s_cbranch_scc0 label_0005
+ s_set_gpr_idx_off
+
+ //
+ //
+
+ s_mov_b32 s2, 0x80000000 // Bit31 is first_wave
+ s_and_b32 s2, s2, s1 // sgpr0 has tg_size (first_wave) term as in ucode only COMPUTE_PGM_RSRC2.tg_size_en is set
+ s_cbranch_scc0 label_clean_sgpr_1 // Clean LDS if its first wave of ThreadGroup/WorkGroup
+ // CLEAR LDS
+ //
+ s_mov_b32 exec_lo, 0xffffffff
+ s_mov_b32 exec_hi, 0xffffffff
+ v_mbcnt_lo_u32_b32 v1, exec_hi, 0 // Set V1 to thread-ID (0..63)
+ v_mbcnt_hi_u32_b32 v1, exec_lo, v1 // Set V1 to thread-ID (0..63)
+ v_mul_u32_u24 v1, 0x00000008, v1 // * 8, so each thread is a double-dword address (8byte)
+ s_mov_b32 s2, 0x00000003f // 64 loop iteraions
+ s_mov_b32 m0, 0xffffffff
+ // Clear all of LDS space
+ // Each FirstWave of WorkGroup clears 64kbyte block
+
+label_001F:
+ ds_write2_b64 v1, v[2:3], v[2:3] offset1:32
+ ds_write2_b64 v1, v[4:5], v[4:5] offset0:64 offset1:96
+ v_add_co_u32 v1, vcc, 0x00000400, v1
+ s_sub_u32 s2, s2, 1
+ s_cbranch_scc0 label_001F
+ //
+ // CLEAR SGPRs
+ //
+label_clean_sgpr_1:
+ s_mov_b32 m0, 0x0000005c // Loop 96/4=24 times (loop unrolled for performance)
+ s_nop 0
+label_sgpr_loop:
+ s_movreld_b32 s0, 0
+ s_movreld_b32 s1, 0
+ s_movreld_b32 s2, 0
+ s_movreld_b32 s3, 0
+ s_sub_u32 m0, m0, 4
+ s_cbranch_scc0 label_sgpr_loop
+
+ //clear vcc, flat scratch
+ s_mov_b32 flat_scratch_lo, 0 //clear flat scratch lo SGPR
+ s_mov_b32 flat_scratch_hi, 0 //clear flat scratch hi SGPR
+ s_mov_b64 vcc, 0 //clear vcc
+ s_mov_b64 ttmp0, 0 //Clear ttmp0 and ttmp1
+ s_mov_b64 ttmp2, 0 //Clear ttmp2 and ttmp3
+ s_mov_b64 ttmp4, 0 //Clear ttmp4 and ttmp5
+ s_mov_b64 ttmp6, 0 //Clear ttmp6 and ttmp7
+ s_mov_b64 ttmp8, 0 //Clear ttmp8 and ttmp9
+ s_mov_b64 ttmp10, 0 //Clear ttmp10 and ttmp11
+ s_mov_b64 ttmp12, 0 //Clear ttmp12 and ttmp13
+ s_mov_b64 ttmp14, 0 //Clear ttmp14 and ttmp15
+s_endpgm
+
+label_0023:
+
+ s_sethalt 1
+
+ s_mov_b32 m0, 0x0000005c // Loop 96/4=24 times (loop unrolled for performance)
+ s_nop 0
+label_sgpr_loop1:
+
+ s_movreld_b32 s0, 0
+ s_movreld_b32 s1, 0
+ s_movreld_b32 s2, 0
+ s_movreld_b32 s3, 0
+ s_sub_u32 m0, m0, 4
+ s_cbranch_scc0 label_sgpr_loop1
+
+ //clear vcc, flat scratch
+ s_mov_b32 flat_scratch_lo, 0 //clear flat scratch lo SGPR
+ s_mov_b32 flat_scratch_hi, 0 //clear flat scratch hi SGPR
+ s_mov_b64 vcc, 0xee //clear vcc
+
+s_endpgm
+end
+
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3_cleaner_shader.h b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3_cleaner_shader.h
new file mode 100644
index 000000000000..69aa567c6c1d
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3_cleaner_shader.h
@@ -0,0 +1,64 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright 2024 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+/* Define the cleaner shader gfx_9_4_3 */
+static const u32 gfx_9_4_3_cleaner_shader_hex[] = {
+ 0xbf068100, 0xbf84003b,
+ 0xbf8a0000, 0xb07c0000,
+ 0xbe8200ff, 0x00000078,
+ 0xbf110802, 0x7e000280,
+ 0x7e020280, 0x7e040280,
+ 0x7e060280, 0x7e080280,
+ 0x7e0a0280, 0x7e0c0280,
+ 0x7e0e0280, 0x80828802,
+ 0xbe803202, 0xbf84fff5,
+ 0xbf9c0000, 0xbe8200ff,
+ 0x80000000, 0x86020102,
+ 0xbf840011, 0xbefe00c1,
+ 0xbeff00c1, 0xd28c0001,
+ 0x0001007f, 0xd28d0001,
+ 0x0002027e, 0x10020288,
+ 0xbe8200bf, 0xbefc00c1,
+ 0xd89c2000, 0x00020201,
+ 0xd89c6040, 0x00040401,
+ 0x320202ff, 0x00000400,
+ 0x80828102, 0xbf84fff8,
+ 0xbefc00ff, 0x0000005c,
+ 0xbf800000, 0xbe802c80,
+ 0xbe812c80, 0xbe822c80,
+ 0xbe832c80, 0x80fc847c,
+ 0xbf84fffa, 0xbee60080,
+ 0xbee70080, 0xbeea0180,
+ 0xbeec0180, 0xbeee0180,
+ 0xbef00180, 0xbef20180,
+ 0xbef40180, 0xbef60180,
+ 0xbef80180, 0xbefa0180,
+ 0xbf810000, 0xbf8d0001,
+ 0xbefc00ff, 0x0000005c,
+ 0xbf800000, 0xbe802c80,
+ 0xbe812c80, 0xbe822c80,
+ 0xbe832c80, 0x80fc847c,
+ 0xbf84fffa, 0xbee60080,
+ 0xbee70080, 0xbeea01ff,
+ 0x000000ee, 0xbf810000,
+};
diff --git a/drivers/gpu/drm/amd/amdgpu/gfxhub_v11_5_0.c b/drivers/gpu/drm/amd/amdgpu/gfxhub_v11_5_0.c
new file mode 100644
index 000000000000..f9949fedfbb9
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/gfxhub_v11_5_0.c
@@ -0,0 +1,516 @@
+/*
+ * Copyright 2023 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#include "amdgpu.h"
+#include "gfxhub_v11_5_0.h"
+
+#include "gc/gc_11_5_0_offset.h"
+#include "gc/gc_11_5_0_sh_mask.h"
+
+#include "navi10_enum.h"
+#include "soc15_common.h"
+
+#define regGCVM_L2_CNTL3_DEFAULT 0x80100007
+#define regGCVM_L2_CNTL4_DEFAULT 0x000000c1
+#define regGCVM_L2_CNTL5_DEFAULT 0x00003fe0
+
+
+static const char *gfxhub_client_ids[] = {
+ "CB/DB",
+ "Reserved",
+ "GE1",
+ "GE2",
+ "CPF",
+ "CPC",
+ "CPG",
+ "RLC",
+ "TCP",
+ "SQC (inst)",
+ "SQC (data)",
+ "SQG",
+ "Reserved",
+ "SDMA0",
+ "SDMA1",
+ "GCR",
+ "SDMA2",
+ "SDMA3",
+};
+
+static uint32_t gfxhub_v11_5_0_get_invalidate_req(unsigned int vmid,
+ uint32_t flush_type)
+{
+ u32 req = 0;
+
+ /* invalidate using legacy mode on vmid*/
+ req = REG_SET_FIELD(req, GCVM_INVALIDATE_ENG0_REQ,
+ PER_VMID_INVALIDATE_REQ, 1 << vmid);
+ req = REG_SET_FIELD(req, GCVM_INVALIDATE_ENG0_REQ, FLUSH_TYPE, flush_type);
+ req = REG_SET_FIELD(req, GCVM_INVALIDATE_ENG0_REQ, INVALIDATE_L2_PTES, 1);
+ req = REG_SET_FIELD(req, GCVM_INVALIDATE_ENG0_REQ, INVALIDATE_L2_PDE0, 1);
+ req = REG_SET_FIELD(req, GCVM_INVALIDATE_ENG0_REQ, INVALIDATE_L2_PDE1, 1);
+ req = REG_SET_FIELD(req, GCVM_INVALIDATE_ENG0_REQ, INVALIDATE_L2_PDE2, 1);
+ req = REG_SET_FIELD(req, GCVM_INVALIDATE_ENG0_REQ, INVALIDATE_L1_PTES, 1);
+ req = REG_SET_FIELD(req, GCVM_INVALIDATE_ENG0_REQ,
+ CLEAR_PROTECTION_FAULT_STATUS_ADDR, 0);
+
+ return req;
+}
+
+static void
+gfxhub_v11_5_0_print_l2_protection_fault_status(struct amdgpu_device *adev,
+ uint32_t status)
+{
+ u32 cid = REG_GET_FIELD(status,
+ GCVM_L2_PROTECTION_FAULT_STATUS, CID);
+
+ dev_err(adev->dev,
+ "GCVM_L2_PROTECTION_FAULT_STATUS:0x%08X\n",
+ status);
+ dev_err(adev->dev, "\t Faulty UTCL2 client ID: %s (0x%x)\n",
+ cid >= ARRAY_SIZE(gfxhub_client_ids) ? "unknown" : gfxhub_client_ids[cid],
+ cid);
+ dev_err(adev->dev, "\t MORE_FAULTS: 0x%lx\n",
+ REG_GET_FIELD(status,
+ GCVM_L2_PROTECTION_FAULT_STATUS, MORE_FAULTS));
+ dev_err(adev->dev, "\t WALKER_ERROR: 0x%lx\n",
+ REG_GET_FIELD(status,
+ GCVM_L2_PROTECTION_FAULT_STATUS, WALKER_ERROR));
+ dev_err(adev->dev, "\t PERMISSION_FAULTS: 0x%lx\n",
+ REG_GET_FIELD(status,
+ GCVM_L2_PROTECTION_FAULT_STATUS, PERMISSION_FAULTS));
+ dev_err(adev->dev, "\t MAPPING_ERROR: 0x%lx\n",
+ REG_GET_FIELD(status,
+ GCVM_L2_PROTECTION_FAULT_STATUS, MAPPING_ERROR));
+ dev_err(adev->dev, "\t RW: 0x%lx\n",
+ REG_GET_FIELD(status,
+ GCVM_L2_PROTECTION_FAULT_STATUS, RW));
+}
+
+static u64 gfxhub_v11_5_0_get_fb_location(struct amdgpu_device *adev)
+{
+ u64 base = RREG32_SOC15(GC, 0, regGCMC_VM_FB_LOCATION_BASE);
+
+ base &= GCMC_VM_FB_LOCATION_BASE__FB_BASE_MASK;
+ base <<= 24;
+
+ return base;
+}
+
+static u64 gfxhub_v11_5_0_get_mc_fb_offset(struct amdgpu_device *adev)
+{
+ return (u64)RREG32_SOC15(GC, 0, regGCMC_VM_FB_OFFSET) << 24;
+}
+
+static void gfxhub_v11_5_0_setup_vm_pt_regs(struct amdgpu_device *adev, uint32_t vmid,
+ uint64_t page_table_base)
+{
+ struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_GFXHUB(0)];
+
+ WREG32_SOC15_OFFSET(GC, 0, regGCVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_LO32,
+ hub->ctx_addr_distance * vmid,
+ lower_32_bits(page_table_base));
+
+ WREG32_SOC15_OFFSET(GC, 0, regGCVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_HI32,
+ hub->ctx_addr_distance * vmid,
+ upper_32_bits(page_table_base));
+}
+
+static void gfxhub_v11_5_0_init_gart_aperture_regs(struct amdgpu_device *adev)
+{
+ uint64_t pt_base = amdgpu_gmc_pd_addr(adev->gart.bo);
+
+ gfxhub_v11_5_0_setup_vm_pt_regs(adev, 0, pt_base);
+
+ WREG32_SOC15(GC, 0, regGCVM_CONTEXT0_PAGE_TABLE_START_ADDR_LO32,
+ (u32)(adev->gmc.gart_start >> 12));
+ WREG32_SOC15(GC, 0, regGCVM_CONTEXT0_PAGE_TABLE_START_ADDR_HI32,
+ (u32)(adev->gmc.gart_start >> 44));
+
+ WREG32_SOC15(GC, 0, regGCVM_CONTEXT0_PAGE_TABLE_END_ADDR_LO32,
+ (u32)(adev->gmc.gart_end >> 12));
+ WREG32_SOC15(GC, 0, regGCVM_CONTEXT0_PAGE_TABLE_END_ADDR_HI32,
+ (u32)(adev->gmc.gart_end >> 44));
+}
+
+static void gfxhub_v11_5_0_init_system_aperture_regs(struct amdgpu_device *adev)
+{
+ uint64_t value;
+
+ WREG32_SOC15(GC, 0, regGCMC_VM_AGP_BASE, 0);
+ WREG32_SOC15(GC, 0, regGCMC_VM_AGP_BOT, adev->gmc.agp_start >> 24);
+ WREG32_SOC15(GC, 0, regGCMC_VM_AGP_TOP, adev->gmc.agp_end >> 24);
+
+ /* Program the system aperture low logical page number. */
+ WREG32_SOC15(GC, 0, regGCMC_VM_SYSTEM_APERTURE_LOW_ADDR,
+ min(adev->gmc.fb_start, adev->gmc.agp_start) >> 18);
+
+ WREG32_SOC15(GC, 0, regGCMC_VM_SYSTEM_APERTURE_HIGH_ADDR,
+ max(adev->gmc.fb_end, adev->gmc.agp_end) >> 18);
+
+ /* Set default page address. */
+ value = amdgpu_gmc_vram_mc2pa(adev, adev->mem_scratch.gpu_addr);
+ WREG32_SOC15(GC, 0, regGCMC_VM_SYSTEM_APERTURE_DEFAULT_ADDR_LSB,
+ (u32)(value >> 12));
+ WREG32_SOC15(GC, 0, regGCMC_VM_SYSTEM_APERTURE_DEFAULT_ADDR_MSB,
+ (u32)(value >> 44));
+
+ /* Program "protection fault". */
+ WREG32_SOC15(GC, 0, regGCVM_L2_PROTECTION_FAULT_DEFAULT_ADDR_LO32,
+ (u32)(adev->dummy_page_addr >> 12));
+ WREG32_SOC15(GC, 0, regGCVM_L2_PROTECTION_FAULT_DEFAULT_ADDR_HI32,
+ (u32)((u64)adev->dummy_page_addr >> 44));
+
+ WREG32_FIELD15_PREREG(GC, 0, GCVM_L2_PROTECTION_FAULT_CNTL2,
+ ACTIVE_PAGE_MIGRATION_PTE_READ_RETRY, 1);
+}
+
+static void gfxhub_v11_5_0_init_tlb_regs(struct amdgpu_device *adev)
+{
+ uint32_t tmp;
+
+ /* Setup TLB control */
+ tmp = RREG32_SOC15(GC, 0, regGCMC_VM_MX_L1_TLB_CNTL);
+
+ tmp = REG_SET_FIELD(tmp, GCMC_VM_MX_L1_TLB_CNTL, ENABLE_L1_TLB, 1);
+ tmp = REG_SET_FIELD(tmp, GCMC_VM_MX_L1_TLB_CNTL, SYSTEM_ACCESS_MODE, 3);
+ tmp = REG_SET_FIELD(tmp, GCMC_VM_MX_L1_TLB_CNTL,
+ ENABLE_ADVANCED_DRIVER_MODEL, 1);
+ tmp = REG_SET_FIELD(tmp, GCMC_VM_MX_L1_TLB_CNTL,
+ SYSTEM_APERTURE_UNMAPPED_ACCESS, 0);
+ tmp = REG_SET_FIELD(tmp, GCMC_VM_MX_L1_TLB_CNTL, ECO_BITS, 0);
+ tmp = REG_SET_FIELD(tmp, GCMC_VM_MX_L1_TLB_CNTL,
+ MTYPE, MTYPE_UC); /* UC, uncached */
+
+ WREG32_SOC15(GC, 0, regGCMC_VM_MX_L1_TLB_CNTL, tmp);
+}
+
+static void gfxhub_v11_5_0_init_cache_regs(struct amdgpu_device *adev)
+{
+ uint32_t tmp;
+
+ /* These registers are not accessible to VF-SRIOV.
+ * The PF will program them instead.
+ */
+ if (amdgpu_sriov_vf(adev))
+ return;
+
+ /* Setup L2 cache */
+ tmp = RREG32_SOC15(GC, 0, regGCVM_L2_CNTL);
+ tmp = REG_SET_FIELD(tmp, GCVM_L2_CNTL, ENABLE_L2_CACHE, 1);
+ tmp = REG_SET_FIELD(tmp, GCVM_L2_CNTL, ENABLE_L2_FRAGMENT_PROCESSING, 0);
+ tmp = REG_SET_FIELD(tmp, GCVM_L2_CNTL,
+ ENABLE_DEFAULT_PAGE_OUT_TO_SYSTEM_MEMORY, 1);
+ /* XXX for emulation, Refer to closed source code.*/
+ tmp = REG_SET_FIELD(tmp, GCVM_L2_CNTL,
+ L2_PDE0_CACHE_TAG_GENERATION_MODE, 0);
+ tmp = REG_SET_FIELD(tmp, GCVM_L2_CNTL, PDE_FAULT_CLASSIFICATION, 0);
+ tmp = REG_SET_FIELD(tmp, GCVM_L2_CNTL, CONTEXT1_IDENTITY_ACCESS_MODE, 1);
+ tmp = REG_SET_FIELD(tmp, GCVM_L2_CNTL, IDENTITY_MODE_FRAGMENT_SIZE, 0);
+ WREG32_SOC15(GC, 0, regGCVM_L2_CNTL, tmp);
+
+ tmp = RREG32_SOC15(GC, 0, regGCVM_L2_CNTL2);
+ tmp = REG_SET_FIELD(tmp, GCVM_L2_CNTL2, INVALIDATE_ALL_L1_TLBS, 1);
+ tmp = REG_SET_FIELD(tmp, GCVM_L2_CNTL2, INVALIDATE_L2_CACHE, 1);
+ WREG32_SOC15(GC, 0, regGCVM_L2_CNTL2, tmp);
+
+ tmp = regGCVM_L2_CNTL3_DEFAULT;
+ if (adev->gmc.translate_further) {
+ tmp = REG_SET_FIELD(tmp, GCVM_L2_CNTL3, BANK_SELECT, 12);
+ tmp = REG_SET_FIELD(tmp, GCVM_L2_CNTL3,
+ L2_CACHE_BIGK_FRAGMENT_SIZE, 9);
+ } else {
+ tmp = REG_SET_FIELD(tmp, GCVM_L2_CNTL3, BANK_SELECT, 9);
+ tmp = REG_SET_FIELD(tmp, GCVM_L2_CNTL3,
+ L2_CACHE_BIGK_FRAGMENT_SIZE, 6);
+ }
+ WREG32_SOC15(GC, 0, regGCVM_L2_CNTL3, tmp);
+
+ tmp = regGCVM_L2_CNTL4_DEFAULT;
+ tmp = REG_SET_FIELD(tmp, GCVM_L2_CNTL4, VMC_TAP_PDE_REQUEST_PHYSICAL, 0);
+ tmp = REG_SET_FIELD(tmp, GCVM_L2_CNTL4, VMC_TAP_PTE_REQUEST_PHYSICAL, 0);
+ WREG32_SOC15(GC, 0, regGCVM_L2_CNTL4, tmp);
+
+ tmp = regGCVM_L2_CNTL5_DEFAULT;
+ tmp = REG_SET_FIELD(tmp, GCVM_L2_CNTL5, L2_CACHE_SMALLK_FRAGMENT_SIZE, 0);
+ WREG32_SOC15(GC, 0, regGCVM_L2_CNTL5, tmp);
+}
+
+static void gfxhub_v11_5_0_enable_system_domain(struct amdgpu_device *adev)
+{
+ uint32_t tmp;
+
+ tmp = RREG32_SOC15(GC, 0, regGCVM_CONTEXT0_CNTL);
+ tmp = REG_SET_FIELD(tmp, GCVM_CONTEXT0_CNTL, ENABLE_CONTEXT, 1);
+ tmp = REG_SET_FIELD(tmp, GCVM_CONTEXT0_CNTL, PAGE_TABLE_DEPTH, 0);
+ tmp = REG_SET_FIELD(tmp, GCVM_CONTEXT0_CNTL,
+ RETRY_PERMISSION_OR_INVALID_PAGE_FAULT, 0);
+ WREG32_SOC15(GC, 0, regGCVM_CONTEXT0_CNTL, tmp);
+}
+
+static void gfxhub_v11_5_0_disable_identity_aperture(struct amdgpu_device *adev)
+{
+ /* These registers are not accessible to VF-SRIOV.
+ * The PF will program them instead.
+ */
+ if (amdgpu_sriov_vf(adev))
+ return;
+
+ WREG32_SOC15(GC, 0, regGCVM_L2_CONTEXT1_IDENTITY_APERTURE_LOW_ADDR_LO32,
+ 0xFFFFFFFF);
+ WREG32_SOC15(GC, 0, regGCVM_L2_CONTEXT1_IDENTITY_APERTURE_LOW_ADDR_HI32,
+ 0x0000000F);
+
+ WREG32_SOC15(GC, 0, regGCVM_L2_CONTEXT1_IDENTITY_APERTURE_HIGH_ADDR_LO32,
+ 0);
+ WREG32_SOC15(GC, 0, regGCVM_L2_CONTEXT1_IDENTITY_APERTURE_HIGH_ADDR_HI32,
+ 0);
+
+ WREG32_SOC15(GC, 0, regGCVM_L2_CONTEXT_IDENTITY_PHYSICAL_OFFSET_LO32, 0);
+ WREG32_SOC15(GC, 0, regGCVM_L2_CONTEXT_IDENTITY_PHYSICAL_OFFSET_HI32, 0);
+
+}
+
+static void gfxhub_v11_5_0_setup_vmid_config(struct amdgpu_device *adev)
+{
+ struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_GFXHUB(0)];
+ int i;
+ uint32_t tmp;
+
+ for (i = 0; i <= 14; i++) {
+ tmp = RREG32_SOC15_OFFSET(GC, 0, regGCVM_CONTEXT1_CNTL, i * hub->ctx_distance);
+ tmp = REG_SET_FIELD(tmp, GCVM_CONTEXT1_CNTL, ENABLE_CONTEXT, 1);
+ tmp = REG_SET_FIELD(tmp, GCVM_CONTEXT1_CNTL, PAGE_TABLE_DEPTH,
+ adev->vm_manager.num_level);
+ tmp = REG_SET_FIELD(tmp, GCVM_CONTEXT1_CNTL,
+ RANGE_PROTECTION_FAULT_ENABLE_DEFAULT, 1);
+ tmp = REG_SET_FIELD(tmp, GCVM_CONTEXT1_CNTL,
+ DUMMY_PAGE_PROTECTION_FAULT_ENABLE_DEFAULT, 1);
+ tmp = REG_SET_FIELD(tmp, GCVM_CONTEXT1_CNTL,
+ PDE0_PROTECTION_FAULT_ENABLE_DEFAULT, 1);
+ tmp = REG_SET_FIELD(tmp, GCVM_CONTEXT1_CNTL,
+ VALID_PROTECTION_FAULT_ENABLE_DEFAULT, 1);
+ tmp = REG_SET_FIELD(tmp, GCVM_CONTEXT1_CNTL,
+ READ_PROTECTION_FAULT_ENABLE_DEFAULT, 1);
+ tmp = REG_SET_FIELD(tmp, GCVM_CONTEXT1_CNTL,
+ WRITE_PROTECTION_FAULT_ENABLE_DEFAULT, 1);
+ tmp = REG_SET_FIELD(tmp, GCVM_CONTEXT1_CNTL,
+ EXECUTE_PROTECTION_FAULT_ENABLE_DEFAULT, 1);
+ tmp = REG_SET_FIELD(tmp, GCVM_CONTEXT1_CNTL,
+ PAGE_TABLE_BLOCK_SIZE,
+ adev->vm_manager.block_size - 9);
+ /* Send no-retry XNACK on fault to suppress VM fault storm. */
+ tmp = REG_SET_FIELD(tmp, GCVM_CONTEXT1_CNTL,
+ RETRY_PERMISSION_OR_INVALID_PAGE_FAULT,
+ !amdgpu_noretry);
+ WREG32_SOC15_OFFSET(GC, 0, regGCVM_CONTEXT1_CNTL,
+ i * hub->ctx_distance, tmp);
+ WREG32_SOC15_OFFSET(GC, 0, regGCVM_CONTEXT1_PAGE_TABLE_START_ADDR_LO32,
+ i * hub->ctx_addr_distance, 0);
+ WREG32_SOC15_OFFSET(GC, 0, regGCVM_CONTEXT1_PAGE_TABLE_START_ADDR_HI32,
+ i * hub->ctx_addr_distance, 0);
+ WREG32_SOC15_OFFSET(GC, 0, regGCVM_CONTEXT1_PAGE_TABLE_END_ADDR_LO32,
+ i * hub->ctx_addr_distance,
+ lower_32_bits(adev->vm_manager.max_pfn - 1));
+ WREG32_SOC15_OFFSET(GC, 0, regGCVM_CONTEXT1_PAGE_TABLE_END_ADDR_HI32,
+ i * hub->ctx_addr_distance,
+ upper_32_bits(adev->vm_manager.max_pfn - 1));
+ }
+
+ hub->vm_cntx_cntl = tmp;
+}
+
+static void gfxhub_v11_5_0_program_invalidation(struct amdgpu_device *adev)
+{
+ struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_GFXHUB(0)];
+ unsigned i;
+
+ for (i = 0 ; i < 18; ++i) {
+ WREG32_SOC15_OFFSET(GC, 0, regGCVM_INVALIDATE_ENG0_ADDR_RANGE_LO32,
+ i * hub->eng_addr_distance, 0xffffffff);
+ WREG32_SOC15_OFFSET(GC, 0, regGCVM_INVALIDATE_ENG0_ADDR_RANGE_HI32,
+ i * hub->eng_addr_distance, 0x1f);
+ }
+}
+
+static int gfxhub_v11_5_0_gart_enable(struct amdgpu_device *adev)
+{
+ if (amdgpu_sriov_vf(adev)) {
+ /*
+ * GCMC_VM_FB_LOCATION_BASE/TOP is NULL for VF, becuase they are
+ * VF copy registers so vbios post doesn't program them, for
+ * SRIOV driver need to program them
+ */
+ WREG32_SOC15(GC, 0, regGCMC_VM_FB_LOCATION_BASE,
+ adev->gmc.vram_start >> 24);
+ WREG32_SOC15(GC, 0, regGCMC_VM_FB_LOCATION_TOP,
+ adev->gmc.vram_end >> 24);
+ }
+
+ /* GART Enable. */
+ gfxhub_v11_5_0_init_gart_aperture_regs(adev);
+ gfxhub_v11_5_0_init_system_aperture_regs(adev);
+ gfxhub_v11_5_0_init_tlb_regs(adev);
+ gfxhub_v11_5_0_init_cache_regs(adev);
+
+ gfxhub_v11_5_0_enable_system_domain(adev);
+ gfxhub_v11_5_0_disable_identity_aperture(adev);
+ gfxhub_v11_5_0_setup_vmid_config(adev);
+ gfxhub_v11_5_0_program_invalidation(adev);
+
+ return 0;
+}
+
+static void gfxhub_v11_5_0_gart_disable(struct amdgpu_device *adev)
+{
+ struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_GFXHUB(0)];
+ u32 tmp;
+ u32 i;
+
+ /* Disable all tables */
+ for (i = 0; i < 16; i++)
+ WREG32_SOC15_OFFSET(GC, 0, regGCVM_CONTEXT0_CNTL,
+ i * hub->ctx_distance, 0);
+
+ /* Setup TLB control */
+ tmp = RREG32_SOC15(GC, 0, regGCMC_VM_MX_L1_TLB_CNTL);
+ tmp = REG_SET_FIELD(tmp, GCMC_VM_MX_L1_TLB_CNTL, ENABLE_L1_TLB, 0);
+ tmp = REG_SET_FIELD(tmp, GCMC_VM_MX_L1_TLB_CNTL,
+ ENABLE_ADVANCED_DRIVER_MODEL, 0);
+ WREG32_SOC15(GC, 0, regGCMC_VM_MX_L1_TLB_CNTL, tmp);
+
+ /* Setup L2 cache */
+ WREG32_FIELD15_PREREG(GC, 0, GCVM_L2_CNTL, ENABLE_L2_CACHE, 0);
+ WREG32_SOC15(GC, 0, regGCVM_L2_CNTL3, 0);
+}
+
+/**
+ * gfxhub_v11_5_0_set_fault_enable_default - update GART/VM fault handling
+ *
+ * @adev: amdgpu_device pointer
+ * @value: true redirects VM faults to the default page
+ */
+static void gfxhub_v11_5_0_set_fault_enable_default(struct amdgpu_device *adev,
+ bool value)
+{
+ u32 tmp;
+
+ /* NO halt CP when page fault */
+ tmp = RREG32_SOC15(GC, 0, regCP_DEBUG);
+ tmp = REG_SET_FIELD(tmp, CP_DEBUG, CPG_UTCL1_ERROR_HALT_DISABLE, 1);
+ WREG32_SOC15(GC, 0, regCP_DEBUG, tmp);
+
+ /* These registers are not accessible to VF-SRIOV.
+ * The PF will program them instead.
+ */
+ if (amdgpu_sriov_vf(adev))
+ return;
+
+ tmp = RREG32_SOC15(GC, 0, regGCVM_L2_PROTECTION_FAULT_CNTL);
+ tmp = REG_SET_FIELD(tmp, GCVM_L2_PROTECTION_FAULT_CNTL,
+ RANGE_PROTECTION_FAULT_ENABLE_DEFAULT, value);
+ tmp = REG_SET_FIELD(tmp, GCVM_L2_PROTECTION_FAULT_CNTL,
+ PDE0_PROTECTION_FAULT_ENABLE_DEFAULT, value);
+ tmp = REG_SET_FIELD(tmp, GCVM_L2_PROTECTION_FAULT_CNTL,
+ PDE1_PROTECTION_FAULT_ENABLE_DEFAULT, value);
+ tmp = REG_SET_FIELD(tmp, GCVM_L2_PROTECTION_FAULT_CNTL,
+ PDE2_PROTECTION_FAULT_ENABLE_DEFAULT, value);
+ tmp = REG_SET_FIELD(tmp, GCVM_L2_PROTECTION_FAULT_CNTL,
+ TRANSLATE_FURTHER_PROTECTION_FAULT_ENABLE_DEFAULT,
+ value);
+ tmp = REG_SET_FIELD(tmp, GCVM_L2_PROTECTION_FAULT_CNTL,
+ NACK_PROTECTION_FAULT_ENABLE_DEFAULT, value);
+ tmp = REG_SET_FIELD(tmp, GCVM_L2_PROTECTION_FAULT_CNTL,
+ DUMMY_PAGE_PROTECTION_FAULT_ENABLE_DEFAULT, value);
+ tmp = REG_SET_FIELD(tmp, GCVM_L2_PROTECTION_FAULT_CNTL,
+ VALID_PROTECTION_FAULT_ENABLE_DEFAULT, value);
+ tmp = REG_SET_FIELD(tmp, GCVM_L2_PROTECTION_FAULT_CNTL,
+ READ_PROTECTION_FAULT_ENABLE_DEFAULT, value);
+ tmp = REG_SET_FIELD(tmp, GCVM_L2_PROTECTION_FAULT_CNTL,
+ WRITE_PROTECTION_FAULT_ENABLE_DEFAULT, value);
+ tmp = REG_SET_FIELD(tmp, GCVM_L2_PROTECTION_FAULT_CNTL,
+ EXECUTE_PROTECTION_FAULT_ENABLE_DEFAULT, value);
+ if (!value) {
+ tmp = REG_SET_FIELD(tmp, GCVM_L2_PROTECTION_FAULT_CNTL,
+ CRASH_ON_NO_RETRY_FAULT, 1);
+ tmp = REG_SET_FIELD(tmp, GCVM_L2_PROTECTION_FAULT_CNTL,
+ CRASH_ON_RETRY_FAULT, 1);
+ }
+ WREG32_SOC15(GC, 0, regGCVM_L2_PROTECTION_FAULT_CNTL, tmp);
+}
+
+static const struct amdgpu_vmhub_funcs gfxhub_v11_5_0_vmhub_funcs = {
+ .print_l2_protection_fault_status = gfxhub_v11_5_0_print_l2_protection_fault_status,
+ .get_invalidate_req = gfxhub_v11_5_0_get_invalidate_req,
+};
+
+static void gfxhub_v11_5_0_init(struct amdgpu_device *adev)
+{
+ struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_GFXHUB(0)];
+
+ hub->ctx0_ptb_addr_lo32 =
+ SOC15_REG_OFFSET(GC, 0,
+ regGCVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_LO32);
+ hub->ctx0_ptb_addr_hi32 =
+ SOC15_REG_OFFSET(GC, 0,
+ regGCVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_HI32);
+ hub->vm_inv_eng0_sem =
+ SOC15_REG_OFFSET(GC, 0, regGCVM_INVALIDATE_ENG0_SEM);
+ hub->vm_inv_eng0_req =
+ SOC15_REG_OFFSET(GC, 0, regGCVM_INVALIDATE_ENG0_REQ);
+ hub->vm_inv_eng0_ack =
+ SOC15_REG_OFFSET(GC, 0, regGCVM_INVALIDATE_ENG0_ACK);
+ hub->vm_context0_cntl =
+ SOC15_REG_OFFSET(GC, 0, regGCVM_CONTEXT0_CNTL);
+ hub->vm_l2_pro_fault_status =
+ SOC15_REG_OFFSET(GC, 0, regGCVM_L2_PROTECTION_FAULT_STATUS);
+ hub->vm_l2_pro_fault_cntl =
+ SOC15_REG_OFFSET(GC, 0, regGCVM_L2_PROTECTION_FAULT_CNTL);
+
+ hub->ctx_distance = regGCVM_CONTEXT1_CNTL - regGCVM_CONTEXT0_CNTL;
+ hub->ctx_addr_distance = regGCVM_CONTEXT1_PAGE_TABLE_BASE_ADDR_LO32 -
+ regGCVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_LO32;
+ hub->eng_distance = regGCVM_INVALIDATE_ENG1_REQ -
+ regGCVM_INVALIDATE_ENG0_REQ;
+ hub->eng_addr_distance = regGCVM_INVALIDATE_ENG1_ADDR_RANGE_LO32 -
+ regGCVM_INVALIDATE_ENG0_ADDR_RANGE_LO32;
+
+ hub->vm_cntx_cntl_vm_fault = GCVM_CONTEXT1_CNTL__RANGE_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK |
+ GCVM_CONTEXT1_CNTL__DUMMY_PAGE_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK |
+ GCVM_CONTEXT1_CNTL__PDE0_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK |
+ GCVM_CONTEXT1_CNTL__VALID_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK |
+ GCVM_CONTEXT1_CNTL__READ_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK |
+ GCVM_CONTEXT1_CNTL__WRITE_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK |
+ GCVM_CONTEXT1_CNTL__EXECUTE_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK;
+
+ hub->vmhub_funcs = &gfxhub_v11_5_0_vmhub_funcs;
+}
+
+const struct amdgpu_gfxhub_funcs gfxhub_v11_5_0_funcs = {
+ .get_fb_location = gfxhub_v11_5_0_get_fb_location,
+ .get_mc_fb_offset = gfxhub_v11_5_0_get_mc_fb_offset,
+ .setup_vm_pt_regs = gfxhub_v11_5_0_setup_vm_pt_regs,
+ .gart_enable = gfxhub_v11_5_0_gart_enable,
+ .gart_disable = gfxhub_v11_5_0_gart_disable,
+ .set_fault_enable_default = gfxhub_v11_5_0_set_fault_enable_default,
+ .init = gfxhub_v11_5_0_init,
+};
diff --git a/drivers/gpu/drm/amd/amdgpu/gfxhub_v11_5_0.h b/drivers/gpu/drm/amd/amdgpu/gfxhub_v11_5_0.h
new file mode 100644
index 000000000000..265ab631b3d0
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/gfxhub_v11_5_0.h
@@ -0,0 +1,29 @@
+/*
+ * Copyright 2023 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#ifndef __GFXHUB_V11_5_0_H__
+#define __GFXHUB_V11_5_0_H__
+
+extern const struct amdgpu_gfxhub_funcs gfxhub_v11_5_0_funcs;
+
+#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/gfxhub_v12_0.c b/drivers/gpu/drm/amd/amdgpu/gfxhub_v12_0.c
new file mode 100644
index 000000000000..7609b9cecae8
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/gfxhub_v12_0.c
@@ -0,0 +1,521 @@
+/*
+ * Copyright 2023 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#include "amdgpu.h"
+#include "gfxhub_v12_0.h"
+
+#include "gc/gc_12_0_0_offset.h"
+#include "gc/gc_12_0_0_sh_mask.h"
+#include "soc24_enum.h"
+#include "soc15_common.h"
+
+#define regGCVM_L2_CNTL3_DEFAULT 0x80120007
+#define regGCVM_L2_CNTL4_DEFAULT 0x000000c1
+#define regGCVM_L2_CNTL5_DEFAULT 0x00003fe0
+#define regGRBM_GFX_INDEX_DEFAULT 0xe0000000
+
+static const char *gfxhub_client_ids[] = {
+ "CB",
+ "DB",
+ "GE1",
+ "GE2",
+ "CPF",
+ "CPC",
+ "CPG",
+ "RLC",
+ "TCP",
+ "SQC (inst)",
+ "SQC (data)",
+ "SQG/PC/SC",
+ "Reserved",
+ "SDMA0",
+ "SDMA1",
+ "GCR",
+ "Reserved",
+ "Reserved",
+ "WGS",
+ "DSM",
+ "PA"
+};
+
+static uint32_t gfxhub_v12_0_get_invalidate_req(unsigned int vmid,
+ uint32_t flush_type)
+{
+ u32 req = 0;
+
+ /* invalidate using legacy mode on vmid*/
+ req = REG_SET_FIELD(req, GCVM_INVALIDATE_ENG0_REQ,
+ PER_VMID_INVALIDATE_REQ, 1 << vmid);
+ req = REG_SET_FIELD(req, GCVM_INVALIDATE_ENG0_REQ, FLUSH_TYPE, flush_type);
+ req = REG_SET_FIELD(req, GCVM_INVALIDATE_ENG0_REQ, INVALIDATE_L2_PTES, 1);
+ req = REG_SET_FIELD(req, GCVM_INVALIDATE_ENG0_REQ, INVALIDATE_L2_PDE0, 1);
+ req = REG_SET_FIELD(req, GCVM_INVALIDATE_ENG0_REQ, INVALIDATE_L2_PDE1, 1);
+ req = REG_SET_FIELD(req, GCVM_INVALIDATE_ENG0_REQ, INVALIDATE_L2_PDE2, 1);
+ req = REG_SET_FIELD(req, GCVM_INVALIDATE_ENG0_REQ, INVALIDATE_L1_PTES, 1);
+ req = REG_SET_FIELD(req, GCVM_INVALIDATE_ENG0_REQ,
+ CLEAR_PROTECTION_FAULT_STATUS_ADDR, 0);
+
+ return req;
+}
+
+static void
+gfxhub_v12_0_print_l2_protection_fault_status(struct amdgpu_device *adev,
+ uint32_t status)
+{
+ u32 cid = REG_GET_FIELD(status,
+ GCVM_L2_PROTECTION_FAULT_STATUS_LO32, CID);
+
+ dev_err(adev->dev,
+ "GCVM_L2_PROTECTION_FAULT_STATUS:0x%08X\n",
+ status);
+ dev_err(adev->dev, "\t Faulty UTCL2 client ID: %s (0x%x)\n",
+ cid >= ARRAY_SIZE(gfxhub_client_ids) ? "unknown" : gfxhub_client_ids[cid],
+ cid);
+ dev_err(adev->dev, "\t MORE_FAULTS: 0x%lx\n",
+ REG_GET_FIELD(status,
+ GCVM_L2_PROTECTION_FAULT_STATUS_LO32, MORE_FAULTS));
+ dev_err(adev->dev, "\t WALKER_ERROR: 0x%lx\n",
+ REG_GET_FIELD(status,
+ GCVM_L2_PROTECTION_FAULT_STATUS_LO32, WALKER_ERROR));
+ dev_err(adev->dev, "\t PERMISSION_FAULTS: 0x%lx\n",
+ REG_GET_FIELD(status,
+ GCVM_L2_PROTECTION_FAULT_STATUS_LO32, PERMISSION_FAULTS));
+ dev_err(adev->dev, "\t MAPPING_ERROR: 0x%lx\n",
+ REG_GET_FIELD(status,
+ GCVM_L2_PROTECTION_FAULT_STATUS_LO32, MAPPING_ERROR));
+ dev_err(adev->dev, "\t RW: 0x%lx\n",
+ REG_GET_FIELD(status,
+ GCVM_L2_PROTECTION_FAULT_STATUS_LO32, RW));
+}
+
+static u64 gfxhub_v12_0_get_fb_location(struct amdgpu_device *adev)
+{
+ u64 base = RREG32_SOC15(GC, 0, regGCMC_VM_FB_LOCATION_BASE);
+
+ base &= GCMC_VM_FB_LOCATION_BASE__FB_BASE_MASK;
+ base <<= 24;
+
+ return base;
+}
+
+static u64 gfxhub_v12_0_get_mc_fb_offset(struct amdgpu_device *adev)
+{
+ return (u64)RREG32_SOC15(GC, 0, regGCMC_VM_FB_OFFSET) << 24;
+}
+
+static void gfxhub_v12_0_setup_vm_pt_regs(struct amdgpu_device *adev,
+ uint32_t vmid,
+ uint64_t page_table_base)
+{
+ struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_GFXHUB(0)];
+
+ WREG32_SOC15_OFFSET(GC, 0, regGCVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_LO32,
+ hub->ctx_addr_distance * vmid,
+ lower_32_bits(page_table_base));
+
+ WREG32_SOC15_OFFSET(GC, 0, regGCVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_HI32,
+ hub->ctx_addr_distance * vmid,
+ upper_32_bits(page_table_base));
+}
+
+static void gfxhub_v12_0_init_gart_aperture_regs(struct amdgpu_device *adev)
+{
+ uint64_t pt_base = amdgpu_gmc_pd_addr(adev->gart.bo);
+
+ gfxhub_v12_0_setup_vm_pt_regs(adev, 0, pt_base);
+
+ WREG32_SOC15(GC, 0, regGCVM_CONTEXT0_PAGE_TABLE_START_ADDR_LO32,
+ (u32)(adev->gmc.gart_start >> 12));
+ WREG32_SOC15(GC, 0, regGCVM_CONTEXT0_PAGE_TABLE_START_ADDR_HI32,
+ (u32)(adev->gmc.gart_start >> 44));
+
+ WREG32_SOC15(GC, 0, regGCVM_CONTEXT0_PAGE_TABLE_END_ADDR_LO32,
+ (u32)(adev->gmc.gart_end >> 12));
+ WREG32_SOC15(GC, 0, regGCVM_CONTEXT0_PAGE_TABLE_END_ADDR_HI32,
+ (u32)(adev->gmc.gart_end >> 44));
+}
+
+static void gfxhub_v12_0_init_system_aperture_regs(struct amdgpu_device *adev)
+{
+ uint64_t value;
+
+ /* Program the AGP BAR */
+ WREG32_SOC15(GC, 0, regGCMC_VM_AGP_BASE, 0);
+ WREG32_SOC15(GC, 0, regGCMC_VM_AGP_BOT, adev->gmc.agp_start >> 24);
+ WREG32_SOC15(GC, 0, regGCMC_VM_AGP_TOP, adev->gmc.agp_end >> 24);
+
+ /* Program the system aperture low logical page number. */
+ WREG32_SOC15(GC, 0, regGCMC_VM_SYSTEM_APERTURE_LOW_ADDR,
+ min(adev->gmc.fb_start, adev->gmc.agp_start) >> 18);
+ WREG32_SOC15(GC, 0, regGCMC_VM_SYSTEM_APERTURE_HIGH_ADDR,
+ max(adev->gmc.fb_end, adev->gmc.agp_end) >> 18);
+
+ /* Set default page address. */
+ value = adev->mem_scratch.gpu_addr - adev->gmc.vram_start
+ + adev->vm_manager.vram_base_offset;
+ WREG32_SOC15(GC, 0, regGCMC_VM_SYSTEM_APERTURE_DEFAULT_ADDR_LSB,
+ (u32)(value >> 12));
+ WREG32_SOC15(GC, 0, regGCMC_VM_SYSTEM_APERTURE_DEFAULT_ADDR_MSB,
+ (u32)(value >> 44));
+
+ /* Program "protection fault". */
+ WREG32_SOC15(GC, 0, regGCVM_L2_PROTECTION_FAULT_DEFAULT_ADDR_LO32,
+ (u32)(adev->dummy_page_addr >> 12));
+ WREG32_SOC15(GC, 0, regGCVM_L2_PROTECTION_FAULT_DEFAULT_ADDR_HI32,
+ (u32)((u64)adev->dummy_page_addr >> 44));
+
+ WREG32_FIELD15_PREREG(GC, 0, GCVM_L2_PROTECTION_FAULT_CNTL2,
+ ACTIVE_PAGE_MIGRATION_PTE_READ_RETRY, 1);
+}
+
+
+static void gfxhub_v12_0_init_tlb_regs(struct amdgpu_device *adev)
+{
+ uint32_t tmp;
+
+ /* Setup TLB control */
+ tmp = RREG32_SOC15(GC, 0, regGCMC_VM_MX_L1_TLB_CNTL);
+
+ tmp = REG_SET_FIELD(tmp, GCMC_VM_MX_L1_TLB_CNTL, ENABLE_L1_TLB, 1);
+ tmp = REG_SET_FIELD(tmp, GCMC_VM_MX_L1_TLB_CNTL, SYSTEM_ACCESS_MODE, 3);
+ tmp = REG_SET_FIELD(tmp, GCMC_VM_MX_L1_TLB_CNTL,
+ ENABLE_ADVANCED_DRIVER_MODEL, 1);
+ tmp = REG_SET_FIELD(tmp, GCMC_VM_MX_L1_TLB_CNTL,
+ SYSTEM_APERTURE_UNMAPPED_ACCESS, 0);
+ tmp = REG_SET_FIELD(tmp, GCMC_VM_MX_L1_TLB_CNTL, ECO_BITS, 0);
+ tmp = REG_SET_FIELD(tmp, GCMC_VM_MX_L1_TLB_CNTL,
+ MTYPE, MTYPE_UC); /* UC, uncached */
+
+ WREG32_SOC15(GC, 0, regGCMC_VM_MX_L1_TLB_CNTL, tmp);
+}
+
+static void gfxhub_v12_0_init_cache_regs(struct amdgpu_device *adev)
+{
+ uint32_t tmp;
+
+ /* These registers are not accessible to VF-SRIOV.
+ * The PF will program them instead.
+ */
+ if (amdgpu_sriov_vf(adev))
+ return;
+
+ /* Setup L2 cache */
+ tmp = RREG32_SOC15(GC, 0, regGCVM_L2_CNTL);
+ tmp = REG_SET_FIELD(tmp, GCVM_L2_CNTL, ENABLE_L2_CACHE, 1);
+ tmp = REG_SET_FIELD(tmp, GCVM_L2_CNTL, ENABLE_L2_FRAGMENT_PROCESSING, 0);
+ tmp = REG_SET_FIELD(tmp, GCVM_L2_CNTL,
+ ENABLE_DEFAULT_PAGE_OUT_TO_SYSTEM_MEMORY, 1);
+ /* XXX for emulation, Refer to closed source code.*/
+ tmp = REG_SET_FIELD(tmp, GCVM_L2_CNTL,
+ L2_PDE0_CACHE_TAG_GENERATION_MODE, 0);
+ tmp = REG_SET_FIELD(tmp, GCVM_L2_CNTL, PDE_FAULT_CLASSIFICATION, 0);
+ tmp = REG_SET_FIELD(tmp, GCVM_L2_CNTL, CONTEXT1_IDENTITY_ACCESS_MODE, 1);
+ tmp = REG_SET_FIELD(tmp, GCVM_L2_CNTL, IDENTITY_MODE_FRAGMENT_SIZE, 0);
+ WREG32_SOC15(GC, 0, regGCVM_L2_CNTL, tmp);
+
+ tmp = RREG32_SOC15(GC, 0, regGCVM_L2_CNTL2);
+ tmp = REG_SET_FIELD(tmp, GCVM_L2_CNTL2, INVALIDATE_ALL_L1_TLBS, 1);
+ tmp = REG_SET_FIELD(tmp, GCVM_L2_CNTL2, INVALIDATE_L2_CACHE, 1);
+ WREG32_SOC15(GC, 0, regGCVM_L2_CNTL2, tmp);
+
+ tmp = regGCVM_L2_CNTL3_DEFAULT;
+ if (adev->gmc.translate_further) {
+ tmp = REG_SET_FIELD(tmp, GCVM_L2_CNTL3, BANK_SELECT, 12);
+ tmp = REG_SET_FIELD(tmp, GCVM_L2_CNTL3,
+ L2_CACHE_BIGK_FRAGMENT_SIZE, 9);
+ } else {
+ tmp = REG_SET_FIELD(tmp, GCVM_L2_CNTL3, BANK_SELECT, 9);
+ tmp = REG_SET_FIELD(tmp, GCVM_L2_CNTL3,
+ L2_CACHE_BIGK_FRAGMENT_SIZE, 6);
+ }
+ WREG32_SOC15(GC, 0, regGCVM_L2_CNTL3, tmp);
+
+ tmp = regGCVM_L2_CNTL4_DEFAULT;
+ tmp = REG_SET_FIELD(tmp, GCVM_L2_CNTL4, VMC_TAP_PDE_REQUEST_PHYSICAL, 0);
+ tmp = REG_SET_FIELD(tmp, GCVM_L2_CNTL4, VMC_TAP_PTE_REQUEST_PHYSICAL, 0);
+ WREG32_SOC15(GC, 0, regGCVM_L2_CNTL4, tmp);
+
+ tmp = regGCVM_L2_CNTL5_DEFAULT;
+ tmp = REG_SET_FIELD(tmp, GCVM_L2_CNTL5, L2_CACHE_SMALLK_FRAGMENT_SIZE, 0);
+ WREG32_SOC15(GC, 0, regGCVM_L2_CNTL5, tmp);
+}
+
+static void gfxhub_v12_0_enable_system_domain(struct amdgpu_device *adev)
+{
+ uint32_t tmp;
+
+ tmp = RREG32_SOC15(GC, 0, regGCVM_CONTEXT0_CNTL);
+ tmp = REG_SET_FIELD(tmp, GCVM_CONTEXT0_CNTL, ENABLE_CONTEXT, 1);
+ tmp = REG_SET_FIELD(tmp, GCVM_CONTEXT0_CNTL, PAGE_TABLE_DEPTH, 0);
+ tmp = REG_SET_FIELD(tmp, GCVM_CONTEXT0_CNTL,
+ RETRY_PERMISSION_OR_INVALID_PAGE_FAULT, 0);
+ WREG32_SOC15(GC, 0, regGCVM_CONTEXT0_CNTL, tmp);
+}
+
+static void gfxhub_v12_0_disable_identity_aperture(struct amdgpu_device *adev)
+{
+ /* These registers are not accessible to VF-SRIOV.
+ * The PF will program them instead.
+ */
+ if (amdgpu_sriov_vf(adev))
+ return;
+
+ WREG32_SOC15(GC, 0, regGCVM_L2_CONTEXT1_IDENTITY_APERTURE_LOW_ADDR_LO32,
+ 0xFFFFFFFF);
+ WREG32_SOC15(GC, 0, regGCVM_L2_CONTEXT1_IDENTITY_APERTURE_LOW_ADDR_HI32,
+ 0x0000000F);
+
+ WREG32_SOC15(GC, 0, regGCVM_L2_CONTEXT1_IDENTITY_APERTURE_HIGH_ADDR_LO32,
+ 0);
+ WREG32_SOC15(GC, 0, regGCVM_L2_CONTEXT1_IDENTITY_APERTURE_HIGH_ADDR_HI32,
+ 0);
+
+ WREG32_SOC15(GC, 0, regGCVM_L2_CONTEXT_IDENTITY_PHYSICAL_OFFSET_LO32, 0);
+ WREG32_SOC15(GC, 0, regGCVM_L2_CONTEXT_IDENTITY_PHYSICAL_OFFSET_HI32, 0);
+
+}
+
+static void gfxhub_v12_0_setup_vmid_config(struct amdgpu_device *adev)
+{
+ struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_GFXHUB(0)];
+ int i;
+ uint32_t tmp;
+
+ for (i = 0; i <= 14; i++) {
+ tmp = RREG32_SOC15_OFFSET(GC, 0, regGCVM_CONTEXT1_CNTL, i);
+ tmp = REG_SET_FIELD(tmp, GCVM_CONTEXT1_CNTL, ENABLE_CONTEXT, 1);
+ tmp = REG_SET_FIELD(tmp, GCVM_CONTEXT1_CNTL, PAGE_TABLE_DEPTH,
+ adev->vm_manager.num_level);
+ tmp = REG_SET_FIELD(tmp, GCVM_CONTEXT1_CNTL,
+ RANGE_PROTECTION_FAULT_ENABLE_DEFAULT, 1);
+ tmp = REG_SET_FIELD(tmp, GCVM_CONTEXT1_CNTL,
+ DUMMY_PAGE_PROTECTION_FAULT_ENABLE_DEFAULT, 1);
+ tmp = REG_SET_FIELD(tmp, GCVM_CONTEXT1_CNTL,
+ PDE0_PROTECTION_FAULT_ENABLE_DEFAULT, 1);
+ tmp = REG_SET_FIELD(tmp, GCVM_CONTEXT1_CNTL,
+ VALID_PROTECTION_FAULT_ENABLE_DEFAULT, 1);
+ tmp = REG_SET_FIELD(tmp, GCVM_CONTEXT1_CNTL,
+ READ_PROTECTION_FAULT_ENABLE_DEFAULT, 1);
+ tmp = REG_SET_FIELD(tmp, GCVM_CONTEXT1_CNTL,
+ WRITE_PROTECTION_FAULT_ENABLE_DEFAULT, 1);
+ tmp = REG_SET_FIELD(tmp, GCVM_CONTEXT1_CNTL,
+ EXECUTE_PROTECTION_FAULT_ENABLE_DEFAULT, 1);
+ tmp = REG_SET_FIELD(tmp, GCVM_CONTEXT1_CNTL,
+ PAGE_TABLE_BLOCK_SIZE,
+ adev->vm_manager.block_size - 9);
+ /* Send no-retry XNACK on fault to suppress VM fault storm. */
+ tmp = REG_SET_FIELD(tmp, GCVM_CONTEXT1_CNTL,
+ RETRY_PERMISSION_OR_INVALID_PAGE_FAULT,
+ !amdgpu_noretry);
+ WREG32_SOC15_OFFSET(GC, 0, regGCVM_CONTEXT1_CNTL,
+ i * hub->ctx_distance, tmp);
+ WREG32_SOC15_OFFSET(GC, 0, regGCVM_CONTEXT1_PAGE_TABLE_START_ADDR_LO32,
+ i * hub->ctx_addr_distance, 0);
+ WREG32_SOC15_OFFSET(GC, 0, regGCVM_CONTEXT1_PAGE_TABLE_START_ADDR_HI32,
+ i * hub->ctx_addr_distance, 0);
+ WREG32_SOC15_OFFSET(GC, 0, regGCVM_CONTEXT1_PAGE_TABLE_END_ADDR_LO32,
+ i * hub->ctx_addr_distance,
+ lower_32_bits(adev->vm_manager.max_pfn - 1));
+ WREG32_SOC15_OFFSET(GC, 0, regGCVM_CONTEXT1_PAGE_TABLE_END_ADDR_HI32,
+ i * hub->ctx_addr_distance,
+ upper_32_bits(adev->vm_manager.max_pfn - 1));
+ }
+
+ hub->vm_cntx_cntl = tmp;
+}
+
+static void gfxhub_v12_0_program_invalidation(struct amdgpu_device *adev)
+{
+ struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_GFXHUB(0)];
+ unsigned i;
+
+ for (i = 0 ; i < 18; ++i) {
+ WREG32_SOC15_OFFSET(GC, 0, regGCVM_INVALIDATE_ENG0_ADDR_RANGE_LO32,
+ i * hub->eng_addr_distance, 0xffffffff);
+ WREG32_SOC15_OFFSET(GC, 0, regGCVM_INVALIDATE_ENG0_ADDR_RANGE_HI32,
+ i * hub->eng_addr_distance, 0x1f);
+ }
+}
+
+static int gfxhub_v12_0_gart_enable(struct amdgpu_device *adev)
+{
+ if (amdgpu_sriov_vf(adev)) {
+ /*
+ * GCMC_VM_FB_LOCATION_BASE/TOP is NULL for VF, becuase they are
+ * VF copy registers so vbios post doesn't program them, for
+ * SRIOV driver need to program them
+ */
+ WREG32_SOC15(GC, 0, regGCMC_VM_FB_LOCATION_BASE,
+ adev->gmc.vram_start >> 24);
+ WREG32_SOC15(GC, 0, regGCMC_VM_FB_LOCATION_TOP,
+ adev->gmc.vram_end >> 24);
+ }
+
+ /* GART Enable. */
+ gfxhub_v12_0_init_gart_aperture_regs(adev);
+ gfxhub_v12_0_init_system_aperture_regs(adev);
+ gfxhub_v12_0_init_tlb_regs(adev);
+ gfxhub_v12_0_init_cache_regs(adev);
+
+ gfxhub_v12_0_enable_system_domain(adev);
+ gfxhub_v12_0_disable_identity_aperture(adev);
+ gfxhub_v12_0_setup_vmid_config(adev);
+ gfxhub_v12_0_program_invalidation(adev);
+
+ return 0;
+}
+
+static void gfxhub_v12_0_gart_disable(struct amdgpu_device *adev)
+{
+ struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_GFXHUB(0)];
+ u32 tmp;
+ u32 i;
+
+ /* Disable all tables */
+ for (i = 0; i < 16; i++)
+ WREG32_SOC15_OFFSET(GC, 0, regGCVM_CONTEXT0_CNTL,
+ i * hub->ctx_distance, 0);
+
+ /* Setup TLB control */
+ tmp = RREG32_SOC15(GC, 0, regGCMC_VM_MX_L1_TLB_CNTL);
+ tmp = REG_SET_FIELD(tmp, GCMC_VM_MX_L1_TLB_CNTL, ENABLE_L1_TLB, 0);
+ tmp = REG_SET_FIELD(tmp, GCMC_VM_MX_L1_TLB_CNTL,
+ ENABLE_ADVANCED_DRIVER_MODEL, 0);
+ WREG32_SOC15(GC, 0, regGCMC_VM_MX_L1_TLB_CNTL, tmp);
+
+ /* Setup L2 cache */
+ WREG32_FIELD15_PREREG(GC, 0, GCVM_L2_CNTL, ENABLE_L2_CACHE, 0);
+ WREG32_SOC15(GC, 0, regGCVM_L2_CNTL3, 0);
+}
+
+/**
+ * gfxhub_v12_0_set_fault_enable_default - update GART/VM fault handling
+ *
+ * @adev: amdgpu_device pointer
+ * @value: true redirects VM faults to the default page
+ */
+static void gfxhub_v12_0_set_fault_enable_default(struct amdgpu_device *adev,
+ bool value)
+{
+ u32 tmp;
+
+ /* NO halt CP when page fault */
+ tmp = RREG32_SOC15(GC, 0, regCP_DEBUG);
+ tmp = REG_SET_FIELD(tmp, CP_DEBUG, CPG_UTCL1_ERROR_HALT_DISABLE, 1);
+ WREG32_SOC15(GC, 0, regCP_DEBUG, tmp);
+
+ /* These registers are not accessible to VF-SRIOV.
+ * The PF will program them instead.
+ */
+ if (amdgpu_sriov_vf(adev))
+ return;
+
+ tmp = RREG32_SOC15(GC, 0, regGCVM_L2_PROTECTION_FAULT_CNTL);
+ tmp = REG_SET_FIELD(tmp, GCVM_L2_PROTECTION_FAULT_CNTL,
+ RANGE_PROTECTION_FAULT_ENABLE_DEFAULT, value);
+ tmp = REG_SET_FIELD(tmp, GCVM_L2_PROTECTION_FAULT_CNTL,
+ PDE0_PROTECTION_FAULT_ENABLE_DEFAULT, value);
+ tmp = REG_SET_FIELD(tmp, GCVM_L2_PROTECTION_FAULT_CNTL,
+ PDE1_PROTECTION_FAULT_ENABLE_DEFAULT, value);
+ tmp = REG_SET_FIELD(tmp, GCVM_L2_PROTECTION_FAULT_CNTL,
+ PDE2_PROTECTION_FAULT_ENABLE_DEFAULT, value);
+ tmp = REG_SET_FIELD(tmp, GCVM_L2_PROTECTION_FAULT_CNTL,
+ TRANSLATE_FURTHER_PROTECTION_FAULT_ENABLE_DEFAULT,
+ value);
+ tmp = REG_SET_FIELD(tmp, GCVM_L2_PROTECTION_FAULT_CNTL,
+ NACK_PROTECTION_FAULT_ENABLE_DEFAULT, value);
+ tmp = REG_SET_FIELD(tmp, GCVM_L2_PROTECTION_FAULT_CNTL,
+ DUMMY_PAGE_PROTECTION_FAULT_ENABLE_DEFAULT, value);
+ tmp = REG_SET_FIELD(tmp, GCVM_L2_PROTECTION_FAULT_CNTL,
+ VALID_PROTECTION_FAULT_ENABLE_DEFAULT, value);
+ tmp = REG_SET_FIELD(tmp, GCVM_L2_PROTECTION_FAULT_CNTL,
+ READ_PROTECTION_FAULT_ENABLE_DEFAULT, value);
+ tmp = REG_SET_FIELD(tmp, GCVM_L2_PROTECTION_FAULT_CNTL,
+ WRITE_PROTECTION_FAULT_ENABLE_DEFAULT, value);
+ tmp = REG_SET_FIELD(tmp, GCVM_L2_PROTECTION_FAULT_CNTL,
+ EXECUTE_PROTECTION_FAULT_ENABLE_DEFAULT, value);
+ if (!value) {
+ tmp = REG_SET_FIELD(tmp, GCVM_L2_PROTECTION_FAULT_CNTL,
+ CRASH_ON_NO_RETRY_FAULT, 1);
+ tmp = REG_SET_FIELD(tmp, GCVM_L2_PROTECTION_FAULT_CNTL,
+ CRASH_ON_RETRY_FAULT, 1);
+ }
+ WREG32_SOC15(GC, 0, regGCVM_L2_PROTECTION_FAULT_CNTL, tmp);
+}
+
+static const struct amdgpu_vmhub_funcs gfxhub_v12_0_vmhub_funcs = {
+ .print_l2_protection_fault_status = gfxhub_v12_0_print_l2_protection_fault_status,
+ .get_invalidate_req = gfxhub_v12_0_get_invalidate_req,
+};
+
+static void gfxhub_v12_0_init(struct amdgpu_device *adev)
+{
+ struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_GFXHUB(0)];
+
+ hub->ctx0_ptb_addr_lo32 =
+ SOC15_REG_OFFSET(GC, 0,
+ regGCVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_LO32);
+ hub->ctx0_ptb_addr_hi32 =
+ SOC15_REG_OFFSET(GC, 0,
+ regGCVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_HI32);
+ hub->vm_inv_eng0_sem =
+ SOC15_REG_OFFSET(GC, 0, regGCVM_INVALIDATE_ENG0_SEM);
+ hub->vm_inv_eng0_req =
+ SOC15_REG_OFFSET(GC, 0, regGCVM_INVALIDATE_ENG0_REQ);
+ hub->vm_inv_eng0_ack =
+ SOC15_REG_OFFSET(GC, 0, regGCVM_INVALIDATE_ENG0_ACK);
+ hub->vm_context0_cntl =
+ SOC15_REG_OFFSET(GC, 0, regGCVM_CONTEXT0_CNTL);
+ hub->vm_l2_pro_fault_status =
+ SOC15_REG_OFFSET(GC, 0, regGCVM_L2_PROTECTION_FAULT_STATUS_LO32);
+ hub->vm_l2_pro_fault_cntl =
+ SOC15_REG_OFFSET(GC, 0, regGCVM_L2_PROTECTION_FAULT_CNTL);
+
+ hub->ctx_distance = regGCVM_CONTEXT1_CNTL - regGCVM_CONTEXT0_CNTL;
+ hub->ctx_addr_distance = regGCVM_CONTEXT1_PAGE_TABLE_BASE_ADDR_LO32 -
+ regGCVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_LO32;
+ hub->eng_distance = regGCVM_INVALIDATE_ENG1_REQ -
+ regGCVM_INVALIDATE_ENG0_REQ;
+ hub->eng_addr_distance = regGCVM_INVALIDATE_ENG1_ADDR_RANGE_LO32 -
+ regGCVM_INVALIDATE_ENG0_ADDR_RANGE_LO32;
+
+ hub->vm_cntx_cntl_vm_fault = GCVM_CONTEXT1_CNTL__RANGE_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK |
+ GCVM_CONTEXT1_CNTL__DUMMY_PAGE_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK |
+ GCVM_CONTEXT1_CNTL__PDE0_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK |
+ GCVM_CONTEXT1_CNTL__VALID_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK |
+ GCVM_CONTEXT1_CNTL__READ_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK |
+ GCVM_CONTEXT1_CNTL__WRITE_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK |
+ GCVM_CONTEXT1_CNTL__EXECUTE_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK;
+
+ hub->vmhub_funcs = &gfxhub_v12_0_vmhub_funcs;
+}
+
+const struct amdgpu_gfxhub_funcs gfxhub_v12_0_funcs = {
+ .get_fb_location = gfxhub_v12_0_get_fb_location,
+ .get_mc_fb_offset = gfxhub_v12_0_get_mc_fb_offset,
+ .setup_vm_pt_regs = gfxhub_v12_0_setup_vm_pt_regs,
+ .gart_enable = gfxhub_v12_0_gart_enable,
+ .gart_disable = gfxhub_v12_0_gart_disable,
+ .set_fault_enable_default = gfxhub_v12_0_set_fault_enable_default,
+ .init = gfxhub_v12_0_init,
+};
diff --git a/drivers/gpu/drm/amd/amdgpu/gfxhub_v12_0.h b/drivers/gpu/drm/amd/amdgpu/gfxhub_v12_0.h
new file mode 100644
index 000000000000..f1258265f802
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/gfxhub_v12_0.h
@@ -0,0 +1,29 @@
+/*
+ * Copyright 2023 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#ifndef __GFXHUB_V12_0_H__
+#define __GFXHUB_V12_0_H__
+
+extern const struct amdgpu_gfxhub_funcs gfxhub_v12_0_funcs;
+
+#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_0.c b/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_0.c
index 6ddd53ba8b77..a7bfc9f41d0e 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_0.c
@@ -40,7 +40,7 @@ static void gfxhub_v1_0_setup_vm_pt_regs(struct amdgpu_device *adev,
uint32_t vmid,
uint64_t page_table_base)
{
- struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_GFXHUB_0];
+ struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_GFXHUB(0)];
WREG32_SOC15_OFFSET(GC, 0, mmVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_LO32,
hub->ctx_addr_distance * vmid,
@@ -53,37 +53,59 @@ static void gfxhub_v1_0_setup_vm_pt_regs(struct amdgpu_device *adev,
static void gfxhub_v1_0_init_gart_aperture_regs(struct amdgpu_device *adev)
{
- uint64_t pt_base = amdgpu_gmc_pd_addr(adev->gart.bo);
+ uint64_t pt_base;
- gfxhub_v1_0_setup_vm_pt_regs(adev, 0, pt_base);
+ if (adev->gmc.pdb0_bo)
+ pt_base = amdgpu_gmc_pd_addr(adev->gmc.pdb0_bo);
+ else
+ pt_base = amdgpu_gmc_pd_addr(adev->gart.bo);
- WREG32_SOC15(GC, 0, mmVM_CONTEXT0_PAGE_TABLE_START_ADDR_LO32,
- (u32)(adev->gmc.gart_start >> 12));
- WREG32_SOC15(GC, 0, mmVM_CONTEXT0_PAGE_TABLE_START_ADDR_HI32,
- (u32)(adev->gmc.gart_start >> 44));
+ gfxhub_v1_0_setup_vm_pt_regs(adev, 0, pt_base);
- WREG32_SOC15(GC, 0, mmVM_CONTEXT0_PAGE_TABLE_END_ADDR_LO32,
- (u32)(adev->gmc.gart_end >> 12));
- WREG32_SOC15(GC, 0, mmVM_CONTEXT0_PAGE_TABLE_END_ADDR_HI32,
- (u32)(adev->gmc.gart_end >> 44));
+ /* If use GART for FB translation, vmid0 page table covers both
+ * vram and system memory (gart)
+ */
+ if (adev->gmc.pdb0_bo) {
+ WREG32_SOC15(GC, 0, mmVM_CONTEXT0_PAGE_TABLE_START_ADDR_LO32,
+ (u32)(adev->gmc.fb_start >> 12));
+ WREG32_SOC15(GC, 0, mmVM_CONTEXT0_PAGE_TABLE_START_ADDR_HI32,
+ (u32)(adev->gmc.fb_start >> 44));
+
+ WREG32_SOC15(GC, 0, mmVM_CONTEXT0_PAGE_TABLE_END_ADDR_LO32,
+ (u32)(adev->gmc.gart_end >> 12));
+ WREG32_SOC15(GC, 0, mmVM_CONTEXT0_PAGE_TABLE_END_ADDR_HI32,
+ (u32)(adev->gmc.gart_end >> 44));
+ } else {
+ WREG32_SOC15(GC, 0, mmVM_CONTEXT0_PAGE_TABLE_START_ADDR_LO32,
+ (u32)(adev->gmc.gart_start >> 12));
+ WREG32_SOC15(GC, 0, mmVM_CONTEXT0_PAGE_TABLE_START_ADDR_HI32,
+ (u32)(adev->gmc.gart_start >> 44));
+
+ WREG32_SOC15(GC, 0, mmVM_CONTEXT0_PAGE_TABLE_END_ADDR_LO32,
+ (u32)(adev->gmc.gart_end >> 12));
+ WREG32_SOC15(GC, 0, mmVM_CONTEXT0_PAGE_TABLE_END_ADDR_HI32,
+ (u32)(adev->gmc.gart_end >> 44));
+ }
}
static void gfxhub_v1_0_init_system_aperture_regs(struct amdgpu_device *adev)
{
uint64_t value;
- /* Program the AGP BAR */
- WREG32_SOC15_RLC(GC, 0, mmMC_VM_AGP_BASE, 0);
- WREG32_SOC15_RLC(GC, 0, mmMC_VM_AGP_BOT, adev->gmc.agp_start >> 24);
- WREG32_SOC15_RLC(GC, 0, mmMC_VM_AGP_TOP, adev->gmc.agp_end >> 24);
-
if (!amdgpu_sriov_vf(adev) || adev->asic_type <= CHIP_VEGA10) {
+ /* Program the AGP BAR */
+ WREG32_SOC15_RLC(GC, 0, mmMC_VM_AGP_BASE, 0);
+ WREG32_SOC15_RLC(GC, 0, mmMC_VM_AGP_BOT, adev->gmc.agp_start >> 24);
+ WREG32_SOC15_RLC(GC, 0, mmMC_VM_AGP_TOP, adev->gmc.agp_end >> 24);
+
/* Program the system aperture low logical page number. */
WREG32_SOC15_RLC(GC, 0, mmMC_VM_SYSTEM_APERTURE_LOW_ADDR,
min(adev->gmc.fb_start, adev->gmc.agp_start) >> 18);
- if (adev->apu_flags & AMD_APU_IS_RAVEN2)
- /*
+ if (adev->apu_flags & (AMD_APU_IS_RAVEN2 |
+ AMD_APU_IS_RENOIR |
+ AMD_APU_IS_GREEN_SARDINE))
+ /*
* Raven2 has a HW issue that it is unable to use the
* vram which is out of MC_VM_SYSTEM_APERTURE_HIGH_ADDR.
* So here is the workaround that increase system
@@ -100,8 +122,7 @@ static void gfxhub_v1_0_init_system_aperture_regs(struct amdgpu_device *adev)
max(adev->gmc.fb_end, adev->gmc.agp_end) >> 18);
/* Set default page address. */
- value = adev->vram_scratch.gpu_addr - adev->gmc.vram_start +
- adev->vm_manager.vram_base_offset;
+ value = amdgpu_gmc_vram_mc2pa(adev, adev->mem_scratch.gpu_addr);
WREG32_SOC15(GC, 0, mmMC_VM_SYSTEM_APERTURE_DEFAULT_ADDR_LSB,
(u32)(value >> 12));
WREG32_SOC15(GC, 0, mmMC_VM_SYSTEM_APERTURE_DEFAULT_ADDR_MSB,
@@ -116,6 +137,18 @@ static void gfxhub_v1_0_init_system_aperture_regs(struct amdgpu_device *adev)
WREG32_FIELD15(GC, 0, VM_L2_PROTECTION_FAULT_CNTL2,
ACTIVE_PAGE_MIGRATION_PTE_READ_RETRY, 1);
}
+
+ /* In the case squeezing vram into GART aperture, we don't use
+ * FB aperture and AGP aperture. Disable them.
+ */
+ if (adev->gmc.pdb0_bo) {
+ WREG32_SOC15(GC, 0, mmMC_VM_FB_LOCATION_TOP, 0);
+ WREG32_SOC15(GC, 0, mmMC_VM_FB_LOCATION_BASE, 0x00FFFFFF);
+ WREG32_SOC15(GC, 0, mmMC_VM_AGP_TOP, 0);
+ WREG32_SOC15(GC, 0, mmMC_VM_AGP_BOT, 0xFFFFFF);
+ WREG32_SOC15(GC, 0, mmMC_VM_SYSTEM_APERTURE_LOW_ADDR, 0x3FFFFFFF);
+ WREG32_SOC15(GC, 0, mmMC_VM_SYSTEM_APERTURE_HIGH_ADDR, 0);
+ }
}
static void gfxhub_v1_0_init_tlb_regs(struct amdgpu_device *adev)
@@ -131,7 +164,6 @@ static void gfxhub_v1_0_init_tlb_regs(struct amdgpu_device *adev)
ENABLE_ADVANCED_DRIVER_MODEL, 1);
tmp = REG_SET_FIELD(tmp, MC_VM_MX_L1_TLB_CNTL,
SYSTEM_APERTURE_UNMAPPED_ACCESS, 0);
- tmp = REG_SET_FIELD(tmp, MC_VM_MX_L1_TLB_CNTL, ECO_BITS, 0);
tmp = REG_SET_FIELD(tmp, MC_VM_MX_L1_TLB_CNTL,
MTYPE, MTYPE_UC);/* XXX for emulation. */
tmp = REG_SET_FIELD(tmp, MC_VM_MX_L1_TLB_CNTL, ATC_EN, 1);
@@ -173,8 +205,13 @@ static void gfxhub_v1_0_init_cache_regs(struct amdgpu_device *adev)
WREG32_SOC15_RLC(GC, 0, mmVM_L2_CNTL3, tmp);
tmp = mmVM_L2_CNTL4_DEFAULT;
- tmp = REG_SET_FIELD(tmp, VM_L2_CNTL4, VMC_TAP_PDE_REQUEST_PHYSICAL, 0);
- tmp = REG_SET_FIELD(tmp, VM_L2_CNTL4, VMC_TAP_PTE_REQUEST_PHYSICAL, 0);
+ if (adev->gmc.xgmi.connected_to_cpu) {
+ tmp = REG_SET_FIELD(tmp, VM_L2_CNTL4, VMC_TAP_PDE_REQUEST_PHYSICAL, 1);
+ tmp = REG_SET_FIELD(tmp, VM_L2_CNTL4, VMC_TAP_PTE_REQUEST_PHYSICAL, 1);
+ } else {
+ tmp = REG_SET_FIELD(tmp, VM_L2_CNTL4, VMC_TAP_PDE_REQUEST_PHYSICAL, 0);
+ tmp = REG_SET_FIELD(tmp, VM_L2_CNTL4, VMC_TAP_PTE_REQUEST_PHYSICAL, 0);
+ }
WREG32_SOC15_RLC(GC, 0, mmVM_L2_CNTL4, tmp);
}
@@ -184,7 +221,10 @@ static void gfxhub_v1_0_enable_system_domain(struct amdgpu_device *adev)
tmp = RREG32_SOC15(GC, 0, mmVM_CONTEXT0_CNTL);
tmp = REG_SET_FIELD(tmp, VM_CONTEXT0_CNTL, ENABLE_CONTEXT, 1);
- tmp = REG_SET_FIELD(tmp, VM_CONTEXT0_CNTL, PAGE_TABLE_DEPTH, 0);
+ tmp = REG_SET_FIELD(tmp, VM_CONTEXT0_CNTL, PAGE_TABLE_DEPTH,
+ adev->gmc.vmid0_page_table_depth);
+ tmp = REG_SET_FIELD(tmp, VM_CONTEXT0_CNTL, PAGE_TABLE_BLOCK_SIZE,
+ adev->gmc.vmid0_page_table_block_size);
tmp = REG_SET_FIELD(tmp, VM_CONTEXT0_CNTL,
RETRY_PERMISSION_OR_INVALID_PAGE_FAULT, 0);
WREG32_SOC15(GC, 0, mmVM_CONTEXT0_CNTL, tmp);
@@ -209,8 +249,8 @@ static void gfxhub_v1_0_disable_identity_aperture(struct amdgpu_device *adev)
static void gfxhub_v1_0_setup_vmid_config(struct amdgpu_device *adev)
{
- struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_GFXHUB_0];
- unsigned num_level, block_size;
+ struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_GFXHUB(0)];
+ unsigned int num_level, block_size;
uint32_t tmp;
int i;
@@ -222,7 +262,7 @@ static void gfxhub_v1_0_setup_vmid_config(struct amdgpu_device *adev)
block_size -= 9;
for (i = 0; i <= 14; i++) {
- tmp = RREG32_SOC15_OFFSET(GC, 0, mmVM_CONTEXT1_CNTL, i);
+ tmp = RREG32_SOC15_OFFSET(GC, 0, mmVM_CONTEXT1_CNTL, i * hub->ctx_distance);
tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL, ENABLE_CONTEXT, 1);
tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL, PAGE_TABLE_DEPTH,
num_level);
@@ -244,10 +284,14 @@ static void gfxhub_v1_0_setup_vmid_config(struct amdgpu_device *adev)
tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL,
PAGE_TABLE_BLOCK_SIZE,
block_size);
- /* Send no-retry XNACK on fault to suppress VM fault storm. */
+ /* Send no-retry XNACK on fault to suppress VM fault storm.
+ * On Aldebaran, XNACK can be enabled in the SQ per-process.
+ * Retry faults need to be enabled for that to work.
+ */
tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL,
RETRY_PERMISSION_OR_INVALID_PAGE_FAULT,
- !adev->gmc.noretry);
+ !adev->gmc.noretry ||
+ adev->asic_type == CHIP_ALDEBARAN);
WREG32_SOC15_OFFSET(GC, 0, mmVM_CONTEXT1_CNTL,
i * hub->ctx_distance, tmp);
WREG32_SOC15_OFFSET(GC, 0, mmVM_CONTEXT1_PAGE_TABLE_START_ADDR_LO32,
@@ -265,8 +309,8 @@ static void gfxhub_v1_0_setup_vmid_config(struct amdgpu_device *adev)
static void gfxhub_v1_0_program_invalidation(struct amdgpu_device *adev)
{
- struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_GFXHUB_0];
- unsigned i;
+ struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_GFXHUB(0)];
+ unsigned int i;
for (i = 0 ; i < 18; ++i) {
WREG32_SOC15_OFFSET(GC, 0, mmVM_INVALIDATE_ENG0_ADDR_RANGE_LO32,
@@ -278,18 +322,6 @@ static void gfxhub_v1_0_program_invalidation(struct amdgpu_device *adev)
static int gfxhub_v1_0_gart_enable(struct amdgpu_device *adev)
{
- if (amdgpu_sriov_vf(adev) && adev->asic_type != CHIP_ARCTURUS) {
- /*
- * MC_VM_FB_LOCATION_BASE/TOP is NULL for VF, becuase they are
- * VF copy registers so vbios post doesn't program them, for
- * SRIOV driver need to program them
- */
- WREG32_SOC15_RLC(GC, 0, mmMC_VM_FB_LOCATION_BASE,
- adev->gmc.vram_start >> 24);
- WREG32_SOC15_RLC(GC, 0, mmMC_VM_FB_LOCATION_TOP,
- adev->gmc.vram_end >> 24);
- }
-
/* GART Enable. */
gfxhub_v1_0_init_gart_aperture_regs(adev);
gfxhub_v1_0_init_system_aperture_regs(adev);
@@ -308,7 +340,7 @@ static int gfxhub_v1_0_gart_enable(struct amdgpu_device *adev)
static void gfxhub_v1_0_gart_disable(struct amdgpu_device *adev)
{
- struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_GFXHUB_0];
+ struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_GFXHUB(0)];
u32 tmp;
u32 i;
@@ -317,6 +349,10 @@ static void gfxhub_v1_0_gart_disable(struct amdgpu_device *adev)
WREG32_SOC15_OFFSET(GC, 0, mmVM_CONTEXT0_CNTL,
i * hub->ctx_distance, 0);
+ if (amdgpu_sriov_vf(adev))
+ /* Avoid write to GMC registers */
+ return;
+
/* Setup TLB control */
tmp = RREG32_SOC15(GC, 0, mmMC_VM_MX_L1_TLB_CNTL);
tmp = REG_SET_FIELD(tmp, MC_VM_MX_L1_TLB_CNTL, ENABLE_L1_TLB, 0);
@@ -341,6 +377,7 @@ static void gfxhub_v1_0_set_fault_enable_default(struct amdgpu_device *adev,
bool value)
{
u32 tmp;
+
tmp = RREG32_SOC15(GC, 0, mmVM_L2_PROTECTION_FAULT_CNTL);
tmp = REG_SET_FIELD(tmp, VM_L2_PROTECTION_FAULT_CNTL,
RANGE_PROTECTION_FAULT_ENABLE_DEFAULT, value);
@@ -377,7 +414,7 @@ static void gfxhub_v1_0_set_fault_enable_default(struct amdgpu_device *adev,
static void gfxhub_v1_0_init(struct amdgpu_device *adev)
{
- struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_GFXHUB_0];
+ struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_GFXHUB(0)];
hub->ctx0_ptb_addr_lo32 =
SOC15_REG_OFFSET(GC, 0,
@@ -406,7 +443,6 @@ static void gfxhub_v1_0_init(struct amdgpu_device *adev)
mmVM_INVALIDATE_ENG0_ADDR_RANGE_LO32;
}
-
const struct amdgpu_gfxhub_funcs gfxhub_v1_0_funcs = {
.get_mc_fb_offset = gfxhub_v1_0_get_mc_fb_offset,
.setup_vm_pt_regs = gfxhub_v1_0_setup_vm_pt_regs,
diff --git a/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_1.c b/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_1.c
index c0ab71df0d90..90f0aefbdb39 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_1.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_1.c
@@ -28,13 +28,44 @@
#include "soc15_common.h"
+#define mmMC_VM_XGMI_LFB_CNTL_ALDE 0x0978
+#define mmMC_VM_XGMI_LFB_CNTL_ALDE_BASE_IDX 0
+#define mmMC_VM_XGMI_LFB_SIZE_ALDE 0x0979
+#define mmMC_VM_XGMI_LFB_SIZE_ALDE_BASE_IDX 0
+//MC_VM_XGMI_LFB_CNTL
+#define MC_VM_XGMI_LFB_CNTL_ALDE__PF_LFB_REGION__SHIFT 0x0
+#define MC_VM_XGMI_LFB_CNTL_ALDE__PF_MAX_REGION__SHIFT 0x4
+#define MC_VM_XGMI_LFB_CNTL_ALDE__PF_LFB_REGION_MASK 0x0000000FL
+#define MC_VM_XGMI_LFB_CNTL_ALDE__PF_MAX_REGION_MASK 0x000000F0L
+//MC_VM_XGMI_LFB_SIZE
+#define MC_VM_XGMI_LFB_SIZE_ALDE__PF_LFB_SIZE__SHIFT 0x0
+#define MC_VM_XGMI_LFB_SIZE_ALDE__PF_LFB_SIZE_MASK 0x0001FFFFL
+
int gfxhub_v1_1_get_xgmi_info(struct amdgpu_device *adev)
{
- u32 xgmi_lfb_cntl = RREG32_SOC15(GC, 0, mmMC_VM_XGMI_LFB_CNTL);
- u32 max_region =
- REG_GET_FIELD(xgmi_lfb_cntl, MC_VM_XGMI_LFB_CNTL, PF_MAX_REGION);
- u32 max_num_physical_nodes = 0;
- u32 max_physical_node_id = 0;
+ u32 max_num_physical_nodes;
+ u32 max_physical_node_id;
+ u32 xgmi_lfb_cntl;
+ u32 max_region;
+ u64 seg_size;
+
+ if (adev->asic_type == CHIP_ALDEBARAN) {
+ xgmi_lfb_cntl = RREG32_SOC15(GC, 0, mmMC_VM_XGMI_LFB_CNTL_ALDE);
+ seg_size = REG_GET_FIELD(
+ RREG32_SOC15(GC, 0, mmMC_VM_XGMI_LFB_SIZE_ALDE),
+ MC_VM_XGMI_LFB_SIZE, PF_LFB_SIZE) << 24;
+ max_region =
+ REG_GET_FIELD(xgmi_lfb_cntl, MC_VM_XGMI_LFB_CNTL_ALDE, PF_MAX_REGION);
+ } else {
+ xgmi_lfb_cntl = RREG32_SOC15(GC, 0, mmMC_VM_XGMI_LFB_CNTL);
+ seg_size = REG_GET_FIELD(
+ RREG32_SOC15(GC, 0, mmMC_VM_XGMI_LFB_SIZE),
+ MC_VM_XGMI_LFB_SIZE, PF_LFB_SIZE) << 24;
+ max_region =
+ REG_GET_FIELD(xgmi_lfb_cntl, MC_VM_XGMI_LFB_CNTL, PF_MAX_REGION);
+ }
+
+
switch (adev->asic_type) {
case CHIP_VEGA20:
@@ -45,23 +76,35 @@ int gfxhub_v1_1_get_xgmi_info(struct amdgpu_device *adev)
max_num_physical_nodes = 8;
max_physical_node_id = 7;
break;
+ case CHIP_ALDEBARAN:
+ max_num_physical_nodes = 16;
+ max_physical_node_id = 15;
+ break;
default:
return -EINVAL;
}
/* PF_MAX_REGION=0 means xgmi is disabled */
- if (max_region) {
+ if (max_region || adev->gmc.xgmi.connected_to_cpu) {
adev->gmc.xgmi.num_physical_nodes = max_region + 1;
+
if (adev->gmc.xgmi.num_physical_nodes > max_num_physical_nodes)
return -EINVAL;
- adev->gmc.xgmi.physical_node_id =
- REG_GET_FIELD(xgmi_lfb_cntl, MC_VM_XGMI_LFB_CNTL, PF_LFB_REGION);
+ if (adev->asic_type == CHIP_ALDEBARAN) {
+ adev->gmc.xgmi.physical_node_id =
+ REG_GET_FIELD(xgmi_lfb_cntl, MC_VM_XGMI_LFB_CNTL_ALDE,
+ PF_LFB_REGION);
+ } else {
+ adev->gmc.xgmi.physical_node_id =
+ REG_GET_FIELD(xgmi_lfb_cntl, MC_VM_XGMI_LFB_CNTL,
+ PF_LFB_REGION);
+ }
+
if (adev->gmc.xgmi.physical_node_id > max_physical_node_id)
return -EINVAL;
- adev->gmc.xgmi.node_segment_size = REG_GET_FIELD(
- RREG32_SOC15(GC, 0, mmMC_VM_XGMI_LFB_SIZE),
- MC_VM_XGMI_LFB_SIZE, PF_LFB_SIZE) << 24;
+
+ adev->gmc.xgmi.node_segment_size = seg_size;
}
return 0;
diff --git a/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_2.c b/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_2.c
new file mode 100644
index 000000000000..6c03bf9f1ae8
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_2.c
@@ -0,0 +1,674 @@
+/*
+ * Copyright 2022 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+#include "amdgpu.h"
+#include "amdgpu_xcp.h"
+#include "gfxhub_v1_2.h"
+#include "gfxhub_v1_1.h"
+
+#include "gc/gc_9_4_3_offset.h"
+#include "gc/gc_9_4_3_sh_mask.h"
+#include "vega10_enum.h"
+
+#include "soc15_common.h"
+
+#define regVM_L2_CNTL3_DEFAULT 0x80100007
+#define regVM_L2_CNTL4_DEFAULT 0x000000c1
+
+static u64 gfxhub_v1_2_get_mc_fb_offset(struct amdgpu_device *adev)
+{
+ return (u64)RREG32_SOC15(GC, GET_INST(GC, 0), regMC_VM_FB_OFFSET) << 24;
+}
+
+static void gfxhub_v1_2_xcc_setup_vm_pt_regs(struct amdgpu_device *adev,
+ uint32_t vmid,
+ uint64_t page_table_base,
+ uint32_t xcc_mask)
+{
+ struct amdgpu_vmhub *hub;
+ int i;
+
+ for_each_inst(i, xcc_mask) {
+ hub = &adev->vmhub[AMDGPU_GFXHUB(i)];
+ WREG32_SOC15_OFFSET(GC, GET_INST(GC, i),
+ regVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_LO32,
+ hub->ctx_addr_distance * vmid,
+ lower_32_bits(page_table_base));
+
+ WREG32_SOC15_OFFSET(GC, GET_INST(GC, i),
+ regVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_HI32,
+ hub->ctx_addr_distance * vmid,
+ upper_32_bits(page_table_base));
+ }
+}
+
+static void gfxhub_v1_2_setup_vm_pt_regs(struct amdgpu_device *adev,
+ uint32_t vmid,
+ uint64_t page_table_base)
+{
+ uint32_t xcc_mask;
+
+ xcc_mask = GENMASK(NUM_XCC(adev->gfx.xcc_mask) - 1, 0);
+ gfxhub_v1_2_xcc_setup_vm_pt_regs(adev, vmid, page_table_base, xcc_mask);
+}
+
+static void gfxhub_v1_2_xcc_init_gart_aperture_regs(struct amdgpu_device *adev,
+ uint32_t xcc_mask)
+{
+ uint64_t gart_start = amdgpu_virt_xgmi_migrate_enabled(adev) ?
+ adev->gmc.vram_start : adev->gmc.fb_start;
+ uint64_t pt_base;
+ int i;
+
+ if (adev->gmc.pdb0_bo)
+ pt_base = amdgpu_gmc_pd_addr(adev->gmc.pdb0_bo);
+ else
+ pt_base = amdgpu_gmc_pd_addr(adev->gart.bo);
+
+ gfxhub_v1_2_xcc_setup_vm_pt_regs(adev, 0, pt_base, xcc_mask);
+
+ /* If use GART for FB translation, vmid0 page table covers both
+ * vram and system memory (gart)
+ */
+ for_each_inst(i, xcc_mask) {
+ if (adev->gmc.pdb0_bo) {
+ WREG32_SOC15(GC, GET_INST(GC, i),
+ regVM_CONTEXT0_PAGE_TABLE_START_ADDR_LO32,
+ (u32)(gart_start >> 12));
+ WREG32_SOC15(GC, GET_INST(GC, i),
+ regVM_CONTEXT0_PAGE_TABLE_START_ADDR_HI32,
+ (u32)(gart_start >> 44));
+
+ WREG32_SOC15(GC, GET_INST(GC, i),
+ regVM_CONTEXT0_PAGE_TABLE_END_ADDR_LO32,
+ (u32)(adev->gmc.gart_end >> 12));
+ WREG32_SOC15(GC, GET_INST(GC, i),
+ regVM_CONTEXT0_PAGE_TABLE_END_ADDR_HI32,
+ (u32)(adev->gmc.gart_end >> 44));
+ } else {
+ WREG32_SOC15(GC, GET_INST(GC, i),
+ regVM_CONTEXT0_PAGE_TABLE_START_ADDR_LO32,
+ (u32)(adev->gmc.gart_start >> 12));
+ WREG32_SOC15(GC, GET_INST(GC, i),
+ regVM_CONTEXT0_PAGE_TABLE_START_ADDR_HI32,
+ (u32)(adev->gmc.gart_start >> 44));
+
+ WREG32_SOC15(GC, GET_INST(GC, i),
+ regVM_CONTEXT0_PAGE_TABLE_END_ADDR_LO32,
+ (u32)(adev->gmc.gart_end >> 12));
+ WREG32_SOC15(GC, GET_INST(GC, i),
+ regVM_CONTEXT0_PAGE_TABLE_END_ADDR_HI32,
+ (u32)(adev->gmc.gart_end >> 44));
+ }
+ }
+}
+
+static void
+gfxhub_v1_2_xcc_init_system_aperture_regs(struct amdgpu_device *adev,
+ uint32_t xcc_mask)
+{
+ uint64_t value;
+ uint32_t tmp;
+ int i;
+
+ for_each_inst(i, xcc_mask) {
+ /* Program the AGP BAR */
+ WREG32_SOC15_RLC(GC, GET_INST(GC, i), regMC_VM_AGP_BASE, 0);
+ WREG32_SOC15_RLC(GC, GET_INST(GC, i), regMC_VM_AGP_BOT, adev->gmc.agp_start >> 24);
+ WREG32_SOC15_RLC(GC, GET_INST(GC, i), regMC_VM_AGP_TOP, adev->gmc.agp_end >> 24);
+
+ if (!amdgpu_sriov_vf(adev) || adev->asic_type <= CHIP_VEGA10) {
+ /* Program the system aperture low logical page number. */
+ WREG32_SOC15_RLC(GC, GET_INST(GC, i), regMC_VM_SYSTEM_APERTURE_LOW_ADDR,
+ min(adev->gmc.fb_start, adev->gmc.agp_start) >> 18);
+
+ if (adev->apu_flags & (AMD_APU_IS_RAVEN2 |
+ AMD_APU_IS_RENOIR |
+ AMD_APU_IS_GREEN_SARDINE))
+ /*
+ * Raven2 has a HW issue that it is unable to use the
+ * vram which is out of MC_VM_SYSTEM_APERTURE_HIGH_ADDR.
+ * So here is the workaround that increase system
+ * aperture high address (add 1) to get rid of the VM
+ * fault and hardware hang.
+ */
+ WREG32_SOC15_RLC(GC, GET_INST(GC, i),
+ regMC_VM_SYSTEM_APERTURE_HIGH_ADDR,
+ max((adev->gmc.fb_end >> 18) + 0x1,
+ adev->gmc.agp_end >> 18));
+ else
+ WREG32_SOC15_RLC(GC, GET_INST(GC, i),
+ regMC_VM_SYSTEM_APERTURE_HIGH_ADDR,
+ max(adev->gmc.fb_end, adev->gmc.agp_end) >> 18);
+
+ /* Set default page address. */
+ value = amdgpu_gmc_vram_mc2pa(adev, adev->mem_scratch.gpu_addr);
+ WREG32_SOC15(GC, GET_INST(GC, i), regMC_VM_SYSTEM_APERTURE_DEFAULT_ADDR_LSB,
+ (u32)(value >> 12));
+ WREG32_SOC15(GC, GET_INST(GC, i), regMC_VM_SYSTEM_APERTURE_DEFAULT_ADDR_MSB,
+ (u32)(value >> 44));
+
+ /* Program "protection fault". */
+ WREG32_SOC15(GC, GET_INST(GC, i), regVM_L2_PROTECTION_FAULT_DEFAULT_ADDR_LO32,
+ (u32)(adev->dummy_page_addr >> 12));
+ WREG32_SOC15(GC, GET_INST(GC, i), regVM_L2_PROTECTION_FAULT_DEFAULT_ADDR_HI32,
+ (u32)((u64)adev->dummy_page_addr >> 44));
+
+ tmp = RREG32_SOC15(GC, GET_INST(GC, i), regVM_L2_PROTECTION_FAULT_CNTL2);
+ tmp = REG_SET_FIELD(tmp, VM_L2_PROTECTION_FAULT_CNTL2,
+ ACTIVE_PAGE_MIGRATION_PTE_READ_RETRY, 1);
+ WREG32_SOC15(GC, GET_INST(GC, i), regVM_L2_PROTECTION_FAULT_CNTL2, tmp);
+ }
+
+ /* In the case squeezing vram into GART aperture, we don't use
+ * FB aperture and AGP aperture. Disable them.
+ */
+ if (adev->gmc.pdb0_bo && adev->gmc.xgmi.connected_to_cpu) {
+ WREG32_SOC15(GC, GET_INST(GC, i), regMC_VM_FB_LOCATION_TOP, 0);
+ WREG32_SOC15(GC, GET_INST(GC, i), regMC_VM_FB_LOCATION_BASE, 0x00FFFFFF);
+ WREG32_SOC15(GC, GET_INST(GC, i), regMC_VM_AGP_TOP, 0);
+ WREG32_SOC15(GC, GET_INST(GC, i), regMC_VM_AGP_BOT, 0xFFFFFF);
+ WREG32_SOC15(GC, GET_INST(GC, i), regMC_VM_SYSTEM_APERTURE_LOW_ADDR, 0x3FFFFFFF);
+ WREG32_SOC15(GC, GET_INST(GC, i), regMC_VM_SYSTEM_APERTURE_HIGH_ADDR, 0);
+ }
+ }
+}
+
+static void gfxhub_v1_2_xcc_init_tlb_regs(struct amdgpu_device *adev,
+ uint32_t xcc_mask)
+{
+ uint32_t tmp;
+ int i;
+
+ for_each_inst(i, xcc_mask) {
+ /* Setup TLB control */
+ tmp = RREG32_SOC15(GC, GET_INST(GC, i), regMC_VM_MX_L1_TLB_CNTL);
+
+ tmp = REG_SET_FIELD(tmp, MC_VM_MX_L1_TLB_CNTL,
+ ENABLE_L1_TLB, 1);
+ tmp = REG_SET_FIELD(tmp, MC_VM_MX_L1_TLB_CNTL,
+ SYSTEM_ACCESS_MODE, 3);
+ tmp = REG_SET_FIELD(tmp, MC_VM_MX_L1_TLB_CNTL,
+ ENABLE_ADVANCED_DRIVER_MODEL, 1);
+ tmp = REG_SET_FIELD(tmp, MC_VM_MX_L1_TLB_CNTL,
+ SYSTEM_APERTURE_UNMAPPED_ACCESS, 0);
+ tmp = REG_SET_FIELD(tmp, MC_VM_MX_L1_TLB_CNTL,
+ MTYPE, MTYPE_UC);/* XXX for emulation. */
+ tmp = REG_SET_FIELD(tmp, MC_VM_MX_L1_TLB_CNTL, ATC_EN, 1);
+
+ WREG32_SOC15_RLC(GC, GET_INST(GC, i), regMC_VM_MX_L1_TLB_CNTL, tmp);
+ }
+}
+
+static void gfxhub_v1_2_xcc_init_cache_regs(struct amdgpu_device *adev,
+ uint32_t xcc_mask)
+{
+ uint32_t tmp;
+ int i;
+
+ for_each_inst(i, xcc_mask) {
+ /* Setup L2 cache */
+ tmp = RREG32_SOC15(GC, GET_INST(GC, i), regVM_L2_CNTL);
+ tmp = REG_SET_FIELD(tmp, VM_L2_CNTL, ENABLE_L2_CACHE, 1);
+ tmp = REG_SET_FIELD(tmp, VM_L2_CNTL, ENABLE_L2_FRAGMENT_PROCESSING, 1);
+ /* XXX for emulation, Refer to closed source code.*/
+ tmp = REG_SET_FIELD(tmp, VM_L2_CNTL, L2_PDE0_CACHE_TAG_GENERATION_MODE,
+ 0);
+ tmp = REG_SET_FIELD(tmp, VM_L2_CNTL, PDE_FAULT_CLASSIFICATION, 0);
+ tmp = REG_SET_FIELD(tmp, VM_L2_CNTL, CONTEXT1_IDENTITY_ACCESS_MODE, 1);
+ tmp = REG_SET_FIELD(tmp, VM_L2_CNTL, IDENTITY_MODE_FRAGMENT_SIZE, 0);
+ WREG32_SOC15_RLC(GC, GET_INST(GC, i), regVM_L2_CNTL, tmp);
+
+ tmp = RREG32_SOC15(GC, GET_INST(GC, i), regVM_L2_CNTL2);
+ tmp = REG_SET_FIELD(tmp, VM_L2_CNTL2, INVALIDATE_ALL_L1_TLBS, 1);
+ tmp = REG_SET_FIELD(tmp, VM_L2_CNTL2, INVALIDATE_L2_CACHE, 1);
+ WREG32_SOC15_RLC(GC, GET_INST(GC, i), regVM_L2_CNTL2, tmp);
+
+ tmp = regVM_L2_CNTL3_DEFAULT;
+ if (adev->gmc.translate_further) {
+ tmp = REG_SET_FIELD(tmp, VM_L2_CNTL3, BANK_SELECT, 12);
+ tmp = REG_SET_FIELD(tmp, VM_L2_CNTL3,
+ L2_CACHE_BIGK_FRAGMENT_SIZE, 9);
+ } else {
+ tmp = REG_SET_FIELD(tmp, VM_L2_CNTL3, BANK_SELECT, 9);
+ tmp = REG_SET_FIELD(tmp, VM_L2_CNTL3,
+ L2_CACHE_BIGK_FRAGMENT_SIZE, 6);
+ }
+ WREG32_SOC15_RLC(GC, GET_INST(GC, i), regVM_L2_CNTL3, tmp);
+
+ tmp = regVM_L2_CNTL4_DEFAULT;
+ /* For AMD APP APUs setup WC memory */
+ if (adev->gmc.xgmi.connected_to_cpu || adev->gmc.is_app_apu) {
+ tmp = REG_SET_FIELD(tmp, VM_L2_CNTL4, VMC_TAP_PDE_REQUEST_PHYSICAL, 1);
+ tmp = REG_SET_FIELD(tmp, VM_L2_CNTL4, VMC_TAP_PTE_REQUEST_PHYSICAL, 1);
+ } else {
+ tmp = REG_SET_FIELD(tmp, VM_L2_CNTL4, VMC_TAP_PDE_REQUEST_PHYSICAL, 0);
+ tmp = REG_SET_FIELD(tmp, VM_L2_CNTL4, VMC_TAP_PTE_REQUEST_PHYSICAL, 0);
+ }
+ WREG32_SOC15_RLC(GC, GET_INST(GC, i), regVM_L2_CNTL4, tmp);
+ }
+}
+
+static void gfxhub_v1_2_xcc_enable_system_domain(struct amdgpu_device *adev,
+ uint32_t xcc_mask)
+{
+ uint32_t tmp;
+ int i;
+
+ for_each_inst(i, xcc_mask) {
+ tmp = RREG32_SOC15(GC, GET_INST(GC, i), regVM_CONTEXT0_CNTL);
+ tmp = REG_SET_FIELD(tmp, VM_CONTEXT0_CNTL, ENABLE_CONTEXT, 1);
+ tmp = REG_SET_FIELD(tmp, VM_CONTEXT0_CNTL, PAGE_TABLE_DEPTH,
+ adev->gmc.vmid0_page_table_depth);
+ tmp = REG_SET_FIELD(tmp, VM_CONTEXT0_CNTL, PAGE_TABLE_BLOCK_SIZE,
+ adev->gmc.vmid0_page_table_block_size);
+ tmp = REG_SET_FIELD(tmp, VM_CONTEXT0_CNTL,
+ RETRY_PERMISSION_OR_INVALID_PAGE_FAULT, 0);
+ WREG32_SOC15(GC, GET_INST(GC, i), regVM_CONTEXT0_CNTL, tmp);
+ }
+}
+
+static void
+gfxhub_v1_2_xcc_disable_identity_aperture(struct amdgpu_device *adev,
+ uint32_t xcc_mask)
+{
+ int i;
+
+ for_each_inst(i, xcc_mask) {
+ WREG32_SOC15(GC, GET_INST(GC, i),
+ regVM_L2_CONTEXT1_IDENTITY_APERTURE_LOW_ADDR_LO32,
+ 0XFFFFFFFF);
+ WREG32_SOC15(GC, GET_INST(GC, i),
+ regVM_L2_CONTEXT1_IDENTITY_APERTURE_LOW_ADDR_HI32,
+ 0x0000000F);
+
+ WREG32_SOC15(GC, GET_INST(GC, i),
+ regVM_L2_CONTEXT1_IDENTITY_APERTURE_HIGH_ADDR_LO32,
+ 0);
+ WREG32_SOC15(GC, GET_INST(GC, i),
+ regVM_L2_CONTEXT1_IDENTITY_APERTURE_HIGH_ADDR_HI32,
+ 0);
+
+ WREG32_SOC15(GC, GET_INST(GC, i),
+ regVM_L2_CONTEXT_IDENTITY_PHYSICAL_OFFSET_LO32, 0);
+ WREG32_SOC15(GC, GET_INST(GC, i),
+ regVM_L2_CONTEXT_IDENTITY_PHYSICAL_OFFSET_HI32, 0);
+ }
+}
+
+static inline bool
+gfxhub_v1_2_per_process_xnack_support(struct amdgpu_device *adev)
+{
+ /*
+ * TODO: Check if this function is really needed, so far only 9.4.3
+ * variants use GFXHUB 1.2
+ */
+ return !!adev->aid_mask;
+}
+
+static void gfxhub_v1_2_xcc_setup_vmid_config(struct amdgpu_device *adev,
+ uint32_t xcc_mask)
+{
+ struct amdgpu_vmhub *hub;
+ unsigned int num_level, block_size;
+ uint32_t tmp;
+ int i, j;
+
+ num_level = adev->vm_manager.num_level;
+ block_size = adev->vm_manager.block_size;
+ if (adev->gmc.translate_further)
+ num_level -= 1;
+ else
+ block_size -= 9;
+
+ for_each_inst(j, xcc_mask) {
+ hub = &adev->vmhub[AMDGPU_GFXHUB(j)];
+ for (i = 0; i <= 14; i++) {
+ tmp = RREG32_SOC15_OFFSET(GC, GET_INST(GC, j), regVM_CONTEXT1_CNTL,
+ i * hub->ctx_distance);
+ tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL, ENABLE_CONTEXT, 1);
+ tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL, PAGE_TABLE_DEPTH,
+ num_level);
+ tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL,
+ RANGE_PROTECTION_FAULT_ENABLE_DEFAULT, 1);
+ tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL,
+ DUMMY_PAGE_PROTECTION_FAULT_ENABLE_DEFAULT,
+ 1);
+ tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL,
+ PDE0_PROTECTION_FAULT_ENABLE_DEFAULT, 1);
+ tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL,
+ VALID_PROTECTION_FAULT_ENABLE_DEFAULT, 1);
+ tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL,
+ READ_PROTECTION_FAULT_ENABLE_DEFAULT, 1);
+ tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL,
+ WRITE_PROTECTION_FAULT_ENABLE_DEFAULT, 1);
+ tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL,
+ EXECUTE_PROTECTION_FAULT_ENABLE_DEFAULT, 1);
+ tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL,
+ PAGE_TABLE_BLOCK_SIZE,
+ block_size);
+ /* Send no-retry XNACK on fault to suppress VM fault storm.
+ * On 9.4.3 variants, XNACK can be enabled in
+ * the SQ per-process.
+ * Retry faults need to be enabled for that to work.
+ */
+ tmp = REG_SET_FIELD(
+ tmp, VM_CONTEXT1_CNTL,
+ RETRY_PERMISSION_OR_INVALID_PAGE_FAULT,
+ !adev->gmc.noretry ||
+ gfxhub_v1_2_per_process_xnack_support(
+ adev));
+ WREG32_SOC15_OFFSET(GC, GET_INST(GC, j), regVM_CONTEXT1_CNTL,
+ i * hub->ctx_distance, tmp);
+ WREG32_SOC15_OFFSET(GC, GET_INST(GC, j),
+ regVM_CONTEXT1_PAGE_TABLE_START_ADDR_LO32,
+ i * hub->ctx_addr_distance, 0);
+ WREG32_SOC15_OFFSET(GC, GET_INST(GC, j),
+ regVM_CONTEXT1_PAGE_TABLE_START_ADDR_HI32,
+ i * hub->ctx_addr_distance, 0);
+ WREG32_SOC15_OFFSET(GC, GET_INST(GC, j),
+ regVM_CONTEXT1_PAGE_TABLE_END_ADDR_LO32,
+ i * hub->ctx_addr_distance,
+ lower_32_bits(adev->vm_manager.max_pfn - 1));
+ WREG32_SOC15_OFFSET(GC, GET_INST(GC, j),
+ regVM_CONTEXT1_PAGE_TABLE_END_ADDR_HI32,
+ i * hub->ctx_addr_distance,
+ upper_32_bits(adev->vm_manager.max_pfn - 1));
+ }
+ }
+}
+
+static void gfxhub_v1_2_xcc_program_invalidation(struct amdgpu_device *adev,
+ uint32_t xcc_mask)
+{
+ struct amdgpu_vmhub *hub;
+ unsigned int i, j;
+
+ for_each_inst(j, xcc_mask) {
+ hub = &adev->vmhub[AMDGPU_GFXHUB(j)];
+
+ for (i = 0 ; i < 18; ++i) {
+ WREG32_SOC15_OFFSET(GC, GET_INST(GC, j), regVM_INVALIDATE_ENG0_ADDR_RANGE_LO32,
+ i * hub->eng_addr_distance, 0xffffffff);
+ WREG32_SOC15_OFFSET(GC, GET_INST(GC, j), regVM_INVALIDATE_ENG0_ADDR_RANGE_HI32,
+ i * hub->eng_addr_distance, 0x1f);
+ }
+ }
+}
+
+static int gfxhub_v1_2_xcc_gart_enable(struct amdgpu_device *adev,
+ uint32_t xcc_mask)
+{
+ /* GART Enable. */
+ gfxhub_v1_2_xcc_init_gart_aperture_regs(adev, xcc_mask);
+ gfxhub_v1_2_xcc_init_system_aperture_regs(adev, xcc_mask);
+ gfxhub_v1_2_xcc_init_tlb_regs(adev, xcc_mask);
+ if (!amdgpu_sriov_vf(adev))
+ gfxhub_v1_2_xcc_init_cache_regs(adev, xcc_mask);
+
+ gfxhub_v1_2_xcc_enable_system_domain(adev, xcc_mask);
+ if (!amdgpu_sriov_vf(adev))
+ gfxhub_v1_2_xcc_disable_identity_aperture(adev, xcc_mask);
+ gfxhub_v1_2_xcc_setup_vmid_config(adev, xcc_mask);
+ gfxhub_v1_2_xcc_program_invalidation(adev, xcc_mask);
+
+ return 0;
+}
+
+static int gfxhub_v1_2_gart_enable(struct amdgpu_device *adev)
+{
+ uint32_t xcc_mask;
+
+ xcc_mask = GENMASK(NUM_XCC(adev->gfx.xcc_mask) - 1, 0);
+ return gfxhub_v1_2_xcc_gart_enable(adev, xcc_mask);
+}
+
+static void gfxhub_v1_2_xcc_gart_disable(struct amdgpu_device *adev,
+ uint32_t xcc_mask)
+{
+ struct amdgpu_vmhub *hub;
+ u32 tmp;
+ u32 i, j;
+
+ for_each_inst(j, xcc_mask) {
+ hub = &adev->vmhub[AMDGPU_GFXHUB(j)];
+ /* Disable all tables */
+ for (i = 0; i < 16; i++)
+ WREG32_SOC15_OFFSET(GC, GET_INST(GC, j), regVM_CONTEXT0_CNTL,
+ i * hub->ctx_distance, 0);
+
+ /* Setup TLB control */
+ tmp = RREG32_SOC15(GC, GET_INST(GC, j), regMC_VM_MX_L1_TLB_CNTL);
+ tmp = REG_SET_FIELD(tmp, MC_VM_MX_L1_TLB_CNTL, ENABLE_L1_TLB, 0);
+ tmp = REG_SET_FIELD(tmp,
+ MC_VM_MX_L1_TLB_CNTL,
+ ENABLE_ADVANCED_DRIVER_MODEL,
+ 0);
+ WREG32_SOC15_RLC(GC, GET_INST(GC, j), regMC_VM_MX_L1_TLB_CNTL, tmp);
+
+ /* Setup L2 cache */
+ if (!amdgpu_sriov_vf(adev)) {
+ tmp = RREG32_SOC15(GC, GET_INST(GC, j), regVM_L2_CNTL);
+ tmp = REG_SET_FIELD(tmp, VM_L2_CNTL, ENABLE_L2_CACHE, 0);
+ WREG32_SOC15(GC, GET_INST(GC, j), regVM_L2_CNTL, tmp);
+ WREG32_SOC15(GC, GET_INST(GC, j), regVM_L2_CNTL3, 0);
+ }
+ }
+}
+
+static void gfxhub_v1_2_gart_disable(struct amdgpu_device *adev)
+{
+ uint32_t xcc_mask;
+
+ xcc_mask = GENMASK(NUM_XCC(adev->gfx.xcc_mask) - 1, 0);
+ gfxhub_v1_2_xcc_gart_disable(adev, xcc_mask);
+}
+
+static void gfxhub_v1_2_xcc_set_fault_enable_default(struct amdgpu_device *adev,
+ bool value,
+ uint32_t xcc_mask)
+{
+ u32 tmp;
+ int i;
+
+ for_each_inst(i, xcc_mask) {
+ tmp = RREG32_SOC15(GC, GET_INST(GC, i), regVM_L2_PROTECTION_FAULT_CNTL);
+ tmp = REG_SET_FIELD(tmp, VM_L2_PROTECTION_FAULT_CNTL,
+ RANGE_PROTECTION_FAULT_ENABLE_DEFAULT, value);
+ tmp = REG_SET_FIELD(tmp, VM_L2_PROTECTION_FAULT_CNTL,
+ PDE0_PROTECTION_FAULT_ENABLE_DEFAULT, value);
+ tmp = REG_SET_FIELD(tmp, VM_L2_PROTECTION_FAULT_CNTL,
+ PDE1_PROTECTION_FAULT_ENABLE_DEFAULT, value);
+ tmp = REG_SET_FIELD(tmp, VM_L2_PROTECTION_FAULT_CNTL,
+ PDE2_PROTECTION_FAULT_ENABLE_DEFAULT, value);
+ tmp = REG_SET_FIELD(tmp,
+ VM_L2_PROTECTION_FAULT_CNTL,
+ TRANSLATE_FURTHER_PROTECTION_FAULT_ENABLE_DEFAULT,
+ value);
+ tmp = REG_SET_FIELD(tmp, VM_L2_PROTECTION_FAULT_CNTL,
+ NACK_PROTECTION_FAULT_ENABLE_DEFAULT, value);
+ tmp = REG_SET_FIELD(tmp, VM_L2_PROTECTION_FAULT_CNTL,
+ DUMMY_PAGE_PROTECTION_FAULT_ENABLE_DEFAULT, value);
+ tmp = REG_SET_FIELD(tmp, VM_L2_PROTECTION_FAULT_CNTL,
+ VALID_PROTECTION_FAULT_ENABLE_DEFAULT, value);
+ tmp = REG_SET_FIELD(tmp, VM_L2_PROTECTION_FAULT_CNTL,
+ READ_PROTECTION_FAULT_ENABLE_DEFAULT, value);
+ tmp = REG_SET_FIELD(tmp, VM_L2_PROTECTION_FAULT_CNTL,
+ WRITE_PROTECTION_FAULT_ENABLE_DEFAULT, value);
+ tmp = REG_SET_FIELD(tmp, VM_L2_PROTECTION_FAULT_CNTL,
+ EXECUTE_PROTECTION_FAULT_ENABLE_DEFAULT, value);
+ if (!value) {
+ tmp = REG_SET_FIELD(tmp, VM_L2_PROTECTION_FAULT_CNTL,
+ CRASH_ON_NO_RETRY_FAULT, 1);
+ tmp = REG_SET_FIELD(tmp, VM_L2_PROTECTION_FAULT_CNTL,
+ CRASH_ON_RETRY_FAULT, 1);
+ }
+ WREG32_SOC15(GC, GET_INST(GC, i), regVM_L2_PROTECTION_FAULT_CNTL, tmp);
+ }
+}
+
+/**
+ * gfxhub_v1_2_set_fault_enable_default - update GART/VM fault handling
+ *
+ * @adev: amdgpu_device pointer
+ * @value: true redirects VM faults to the default page
+ */
+static void gfxhub_v1_2_set_fault_enable_default(struct amdgpu_device *adev,
+ bool value)
+{
+ uint32_t xcc_mask;
+
+ xcc_mask = GENMASK(NUM_XCC(adev->gfx.xcc_mask) - 1, 0);
+ gfxhub_v1_2_xcc_set_fault_enable_default(adev, value, xcc_mask);
+}
+
+static void gfxhub_v1_2_xcc_init(struct amdgpu_device *adev, uint32_t xcc_mask)
+{
+ struct amdgpu_vmhub *hub;
+ int i;
+
+ for_each_inst(i, xcc_mask) {
+ hub = &adev->vmhub[AMDGPU_GFXHUB(i)];
+
+ hub->ctx0_ptb_addr_lo32 =
+ SOC15_REG_OFFSET(GC, GET_INST(GC, i),
+ regVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_LO32);
+ hub->ctx0_ptb_addr_hi32 =
+ SOC15_REG_OFFSET(GC, GET_INST(GC, i),
+ regVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_HI32);
+ hub->vm_inv_eng0_sem =
+ SOC15_REG_OFFSET(GC, GET_INST(GC, i), regVM_INVALIDATE_ENG0_SEM);
+ hub->vm_inv_eng0_req =
+ SOC15_REG_OFFSET(GC, GET_INST(GC, i), regVM_INVALIDATE_ENG0_REQ);
+ hub->vm_inv_eng0_ack =
+ SOC15_REG_OFFSET(GC, GET_INST(GC, i), regVM_INVALIDATE_ENG0_ACK);
+ hub->vm_context0_cntl =
+ SOC15_REG_OFFSET(GC, GET_INST(GC, i), regVM_CONTEXT0_CNTL);
+ hub->vm_l2_pro_fault_status =
+ SOC15_REG_OFFSET(GC, GET_INST(GC, i),
+ regVM_L2_PROTECTION_FAULT_STATUS);
+ hub->vm_l2_pro_fault_cntl =
+ SOC15_REG_OFFSET(GC, GET_INST(GC, i), regVM_L2_PROTECTION_FAULT_CNTL);
+
+ hub->ctx_distance = regVM_CONTEXT1_CNTL -
+ regVM_CONTEXT0_CNTL;
+ hub->ctx_addr_distance =
+ regVM_CONTEXT1_PAGE_TABLE_BASE_ADDR_LO32 -
+ regVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_LO32;
+ hub->eng_distance = regVM_INVALIDATE_ENG1_REQ -
+ regVM_INVALIDATE_ENG0_REQ;
+ hub->eng_addr_distance =
+ regVM_INVALIDATE_ENG1_ADDR_RANGE_LO32 -
+ regVM_INVALIDATE_ENG0_ADDR_RANGE_LO32;
+ }
+}
+
+static void gfxhub_v1_2_init(struct amdgpu_device *adev)
+{
+ uint32_t xcc_mask;
+
+ xcc_mask = GENMASK(NUM_XCC(adev->gfx.xcc_mask) - 1, 0);
+ gfxhub_v1_2_xcc_init(adev, xcc_mask);
+}
+
+static int gfxhub_v1_2_get_xgmi_info(struct amdgpu_device *adev)
+{
+ u32 max_num_physical_nodes;
+ u32 max_physical_node_id;
+ u32 xgmi_lfb_cntl;
+ u32 max_region;
+ u64 seg_size;
+
+ xgmi_lfb_cntl = RREG32_SOC15(GC, GET_INST(GC, 0), regMC_VM_XGMI_LFB_CNTL);
+ seg_size = REG_GET_FIELD(
+ RREG32_SOC15(GC, GET_INST(GC, 0), regMC_VM_XGMI_LFB_SIZE),
+ MC_VM_XGMI_LFB_SIZE, PF_LFB_SIZE) << 24;
+ max_region =
+ REG_GET_FIELD(xgmi_lfb_cntl, MC_VM_XGMI_LFB_CNTL, PF_MAX_REGION);
+
+
+
+ max_num_physical_nodes = 8;
+ max_physical_node_id = 7;
+
+ /* PF_MAX_REGION=0 means xgmi is disabled */
+ if (max_region || adev->gmc.xgmi.connected_to_cpu) {
+ adev->gmc.xgmi.num_physical_nodes = max_region + 1;
+
+ if (adev->gmc.xgmi.num_physical_nodes > max_num_physical_nodes)
+ return -EINVAL;
+
+ adev->gmc.xgmi.physical_node_id =
+ REG_GET_FIELD(xgmi_lfb_cntl, MC_VM_XGMI_LFB_CNTL,
+ PF_LFB_REGION);
+
+ if (adev->gmc.xgmi.physical_node_id > max_physical_node_id)
+ return -EINVAL;
+
+ adev->gmc.xgmi.node_segment_size = seg_size;
+ }
+
+ return 0;
+}
+
+const struct amdgpu_gfxhub_funcs gfxhub_v1_2_funcs = {
+ .get_mc_fb_offset = gfxhub_v1_2_get_mc_fb_offset,
+ .setup_vm_pt_regs = gfxhub_v1_2_setup_vm_pt_regs,
+ .gart_enable = gfxhub_v1_2_gart_enable,
+ .gart_disable = gfxhub_v1_2_gart_disable,
+ .set_fault_enable_default = gfxhub_v1_2_set_fault_enable_default,
+ .init = gfxhub_v1_2_init,
+ .get_xgmi_info = gfxhub_v1_2_get_xgmi_info,
+};
+
+static int gfxhub_v1_2_xcp_resume(void *handle, uint32_t inst_mask)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ bool value;
+
+ if (amdgpu_vm_fault_stop == AMDGPU_VM_FAULT_STOP_ALWAYS)
+ value = false;
+ else
+ value = true;
+
+ gfxhub_v1_2_xcc_set_fault_enable_default(adev, value, inst_mask);
+
+ if (!amdgpu_sriov_vf(adev))
+ return gfxhub_v1_2_xcc_gart_enable(adev, inst_mask);
+
+ return 0;
+}
+
+static int gfxhub_v1_2_xcp_suspend(void *handle, uint32_t inst_mask)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+
+ if (!amdgpu_sriov_vf(adev))
+ gfxhub_v1_2_xcc_gart_disable(adev, inst_mask);
+
+ return 0;
+}
+
+struct amdgpu_xcp_ip_funcs gfxhub_v1_2_xcp_funcs = {
+ .suspend = &gfxhub_v1_2_xcp_suspend,
+ .resume = &gfxhub_v1_2_xcp_resume
+};
diff --git a/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_2.h b/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_2.h
new file mode 100644
index 000000000000..997e9f90c990
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_2.h
@@ -0,0 +1,31 @@
+/*
+ * Copyright 2022 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#ifndef __GFXHUB_V1_2_H__
+#define __GFXHUB_V1_2_H__
+
+extern const struct amdgpu_gfxhub_funcs gfxhub_v1_2_funcs;
+
+extern struct amdgpu_xcp_ip_funcs gfxhub_v1_2_xcp_funcs;
+
+#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/gfxhub_v2_0.c b/drivers/gpu/drm/amd/amdgpu/gfxhub_v2_0.c
index 2aecc6a243e8..793faf62cb07 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfxhub_v2_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfxhub_v2_0.c
@@ -31,7 +31,7 @@
#include "soc15_common.h"
-static const char *gfxhub_client_ids[] = {
+static const char * const gfxhub_client_ids[] = {
"CB/DB",
"Reserved",
"GE1",
@@ -120,7 +120,7 @@ static u64 gfxhub_v2_0_get_mc_fb_offset(struct amdgpu_device *adev)
static void gfxhub_v2_0_setup_vm_pt_regs(struct amdgpu_device *adev, uint32_t vmid,
uint64_t page_table_base)
{
- struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_GFXHUB_0];
+ struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_GFXHUB(0)];
WREG32_SOC15_OFFSET(GC, 0, mmGCVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_LO32,
hub->ctx_addr_distance * vmid,
@@ -165,8 +165,7 @@ static void gfxhub_v2_0_init_system_aperture_regs(struct amdgpu_device *adev)
max(adev->gmc.fb_end, adev->gmc.agp_end) >> 18);
/* Set default page address. */
- value = adev->vram_scratch.gpu_addr - adev->gmc.vram_start
- + adev->vm_manager.vram_base_offset;
+ value = amdgpu_gmc_vram_mc2pa(adev, adev->mem_scratch.gpu_addr);
WREG32_SOC15(GC, 0, mmGCMC_VM_SYSTEM_APERTURE_DEFAULT_ADDR_LSB,
(u32)(value >> 12));
WREG32_SOC15(GC, 0, mmGCMC_VM_SYSTEM_APERTURE_DEFAULT_ADDR_MSB,
@@ -197,7 +196,6 @@ static void gfxhub_v2_0_init_tlb_regs(struct amdgpu_device *adev)
ENABLE_ADVANCED_DRIVER_MODEL, 1);
tmp = REG_SET_FIELD(tmp, GCMC_VM_MX_L1_TLB_CNTL,
SYSTEM_APERTURE_UNMAPPED_ACCESS, 0);
- tmp = REG_SET_FIELD(tmp, GCMC_VM_MX_L1_TLB_CNTL, ECO_BITS, 0);
tmp = REG_SET_FIELD(tmp, GCMC_VM_MX_L1_TLB_CNTL,
MTYPE, MTYPE_UC); /* UC, uncached */
@@ -284,12 +282,12 @@ static void gfxhub_v2_0_disable_identity_aperture(struct amdgpu_device *adev)
static void gfxhub_v2_0_setup_vmid_config(struct amdgpu_device *adev)
{
- struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_GFXHUB_0];
+ struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_GFXHUB(0)];
int i;
uint32_t tmp;
for (i = 0; i <= 14; i++) {
- tmp = RREG32_SOC15_OFFSET(GC, 0, mmGCVM_CONTEXT1_CNTL, i);
+ tmp = RREG32_SOC15_OFFSET(GC, 0, mmGCVM_CONTEXT1_CNTL, i * hub->ctx_distance);
tmp = REG_SET_FIELD(tmp, GCVM_CONTEXT1_CNTL, ENABLE_CONTEXT, 1);
tmp = REG_SET_FIELD(tmp, GCVM_CONTEXT1_CNTL, PAGE_TABLE_DEPTH,
adev->vm_manager.num_level);
@@ -327,12 +325,14 @@ static void gfxhub_v2_0_setup_vmid_config(struct amdgpu_device *adev)
i * hub->ctx_addr_distance,
upper_32_bits(adev->vm_manager.max_pfn - 1));
}
+
+ hub->vm_cntx_cntl = tmp;
}
static void gfxhub_v2_0_program_invalidation(struct amdgpu_device *adev)
{
- struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_GFXHUB_0];
- unsigned i;
+ struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_GFXHUB(0)];
+ unsigned int i;
for (i = 0 ; i < 18; ++i) {
WREG32_SOC15_OFFSET(GC, 0, mmGCVM_INVALIDATE_ENG0_ADDR_RANGE_LO32,
@@ -360,7 +360,7 @@ static int gfxhub_v2_0_gart_enable(struct amdgpu_device *adev)
static void gfxhub_v2_0_gart_disable(struct amdgpu_device *adev)
{
- struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_GFXHUB_0];
+ struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_GFXHUB(0)];
u32 tmp;
u32 i;
@@ -393,6 +393,7 @@ static void gfxhub_v2_0_set_fault_enable_default(struct amdgpu_device *adev,
bool value)
{
u32 tmp;
+
tmp = RREG32_SOC15(GC, 0, mmGCVM_L2_PROTECTION_FAULT_CNTL);
tmp = REG_SET_FIELD(tmp, GCVM_L2_PROTECTION_FAULT_CNTL,
RANGE_PROTECTION_FAULT_ENABLE_DEFAULT, value);
@@ -433,7 +434,7 @@ static const struct amdgpu_vmhub_funcs gfxhub_v2_0_vmhub_funcs = {
static void gfxhub_v2_0_init(struct amdgpu_device *adev)
{
- struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_GFXHUB_0];
+ struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_GFXHUB(0)];
hub->ctx0_ptb_addr_lo32 =
SOC15_REG_OFFSET(GC, 0,
@@ -470,6 +471,9 @@ static void gfxhub_v2_0_init(struct amdgpu_device *adev)
GCVM_CONTEXT1_CNTL__WRITE_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK |
GCVM_CONTEXT1_CNTL__EXECUTE_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK;
+ /* TODO: This is only needed on some Navi 1x revisions */
+ hub->sdma_invalidation_workaround = true;
+
hub->vmhub_funcs = &gfxhub_v2_0_vmhub_funcs;
}
diff --git a/drivers/gpu/drm/amd/amdgpu/gfxhub_v2_1.c b/drivers/gpu/drm/amd/amdgpu/gfxhub_v2_1.c
index 410fd3a1a388..deb95fab02df 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfxhub_v2_1.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfxhub_v2_1.c
@@ -31,7 +31,10 @@
#include "soc15_common.h"
-static const char *gfxhub_client_ids[] = {
+#define mmGCUTCL2_HARVEST_BYPASS_GROUPS_YELLOW_CARP 0x16f8
+#define mmGCUTCL2_HARVEST_BYPASS_GROUPS_YELLOW_CARP_BASE_IDX 0
+
+static const char * const gfxhub_client_ids[] = {
"CB/DB",
"Reserved",
"GE1",
@@ -120,7 +123,7 @@ static u64 gfxhub_v2_1_get_mc_fb_offset(struct amdgpu_device *adev)
static void gfxhub_v2_1_setup_vm_pt_regs(struct amdgpu_device *adev, uint32_t vmid,
uint64_t page_table_base)
{
- struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_GFXHUB_0];
+ struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_GFXHUB(0)];
WREG32_SOC15_OFFSET(GC, 0, mmGCVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_LO32,
hub->ctx_addr_distance * vmid,
@@ -152,6 +155,9 @@ static void gfxhub_v2_1_init_system_aperture_regs(struct amdgpu_device *adev)
{
uint64_t value;
+ if (amdgpu_sriov_vf(adev))
+ return;
+
/* Program the AGP BAR */
WREG32_SOC15(GC, 0, mmGCMC_VM_AGP_BASE, 0);
WREG32_SOC15(GC, 0, mmGCMC_VM_AGP_BOT, adev->gmc.agp_start >> 24);
@@ -164,8 +170,7 @@ static void gfxhub_v2_1_init_system_aperture_regs(struct amdgpu_device *adev)
max(adev->gmc.fb_end, adev->gmc.agp_end) >> 18);
/* Set default page address. */
- value = adev->vram_scratch.gpu_addr - adev->gmc.vram_start
- + adev->vm_manager.vram_base_offset;
+ value = amdgpu_gmc_vram_mc2pa(adev, adev->mem_scratch.gpu_addr);
WREG32_SOC15(GC, 0, mmGCMC_VM_SYSTEM_APERTURE_DEFAULT_ADDR_LSB,
(u32)(value >> 12));
WREG32_SOC15(GC, 0, mmGCMC_VM_SYSTEM_APERTURE_DEFAULT_ADDR_MSB,
@@ -195,7 +200,6 @@ static void gfxhub_v2_1_init_tlb_regs(struct amdgpu_device *adev)
ENABLE_ADVANCED_DRIVER_MODEL, 1);
tmp = REG_SET_FIELD(tmp, GCMC_VM_MX_L1_TLB_CNTL,
SYSTEM_APERTURE_UNMAPPED_ACCESS, 0);
- tmp = REG_SET_FIELD(tmp, GCMC_VM_MX_L1_TLB_CNTL, ECO_BITS, 0);
tmp = REG_SET_FIELD(tmp, GCMC_VM_MX_L1_TLB_CNTL,
MTYPE, MTYPE_UC); /* UC, uncached */
@@ -290,12 +294,12 @@ static void gfxhub_v2_1_disable_identity_aperture(struct amdgpu_device *adev)
static void gfxhub_v2_1_setup_vmid_config(struct amdgpu_device *adev)
{
- struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_GFXHUB_0];
+ struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_GFXHUB(0)];
int i;
uint32_t tmp;
for (i = 0; i <= 14; i++) {
- tmp = RREG32_SOC15_OFFSET(GC, 0, mmGCVM_CONTEXT1_CNTL, i);
+ tmp = RREG32_SOC15_OFFSET(GC, 0, mmGCVM_CONTEXT1_CNTL, i * hub->ctx_distance);
tmp = REG_SET_FIELD(tmp, GCVM_CONTEXT1_CNTL, ENABLE_CONTEXT, 1);
tmp = REG_SET_FIELD(tmp, GCVM_CONTEXT1_CNTL, PAGE_TABLE_DEPTH,
adev->vm_manager.num_level);
@@ -333,12 +337,14 @@ static void gfxhub_v2_1_setup_vmid_config(struct amdgpu_device *adev)
i * hub->ctx_addr_distance,
upper_32_bits(adev->vm_manager.max_pfn - 1));
}
+
+ hub->vm_cntx_cntl = tmp;
}
static void gfxhub_v2_1_program_invalidation(struct amdgpu_device *adev)
{
- struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_GFXHUB_0];
- unsigned i;
+ struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_GFXHUB(0)];
+ unsigned int i;
for (i = 0 ; i < 18; ++i) {
WREG32_SOC15_OFFSET(GC, 0, mmGCVM_INVALIDATE_ENG0_ADDR_RANGE_LO32,
@@ -378,7 +384,7 @@ static int gfxhub_v2_1_gart_enable(struct amdgpu_device *adev)
static void gfxhub_v2_1_gart_disable(struct amdgpu_device *adev)
{
- struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_GFXHUB_0];
+ struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_GFXHUB(0)];
u32 tmp;
u32 i;
@@ -394,6 +400,9 @@ static void gfxhub_v2_1_gart_disable(struct amdgpu_device *adev)
ENABLE_ADVANCED_DRIVER_MODEL, 0);
WREG32_SOC15(GC, 0, mmGCMC_VM_MX_L1_TLB_CNTL, tmp);
+ if (amdgpu_sriov_vf(adev))
+ return;
+
/* Setup L2 cache */
WREG32_FIELD15(GC, 0, GCVM_L2_CNTL, ENABLE_L2_CACHE, 0);
WREG32_SOC15(GC, 0, mmGCVM_L2_CNTL3, 0);
@@ -456,7 +465,7 @@ static const struct amdgpu_vmhub_funcs gfxhub_v2_1_vmhub_funcs = {
static void gfxhub_v2_1_init(struct amdgpu_device *adev)
{
- struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_GFXHUB_0];
+ struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_GFXHUB(0)];
hub->ctx0_ptb_addr_lo32 =
SOC15_REG_OFFSET(GC, 0,
@@ -496,40 +505,151 @@ static void gfxhub_v2_1_init(struct amdgpu_device *adev)
hub->vmhub_funcs = &gfxhub_v2_1_vmhub_funcs;
}
-static int gfxhub_v2_1_get_xgmi_info(struct amdgpu_device *adev)
+static void gfxhub_v2_1_utcl2_harvest(struct amdgpu_device *adev)
{
- u32 xgmi_lfb_cntl = RREG32_SOC15(GC, 0, mmGCMC_VM_XGMI_LFB_CNTL);
- u32 max_region =
- REG_GET_FIELD(xgmi_lfb_cntl, GCMC_VM_XGMI_LFB_CNTL, PF_MAX_REGION);
- u32 max_num_physical_nodes = 0;
- u32 max_physical_node_id = 0;
-
- switch (adev->asic_type) {
- case CHIP_SIENNA_CICHLID:
- max_num_physical_nodes = 4;
- max_physical_node_id = 3;
+ int i;
+ u32 tmp = 0, disabled_sa = 0;
+ u32 efuse_setting, vbios_setting;
+
+ u32 max_sa_mask = amdgpu_gfx_create_bitmask(
+ adev->gfx.config.max_sh_per_se *
+ adev->gfx.config.max_shader_engines);
+
+ switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
+ case IP_VERSION(10, 3, 1):
+ case IP_VERSION(10, 3, 3):
+ /* Get SA disabled bitmap from eFuse setting */
+ efuse_setting = RREG32_SOC15(GC, 0, mmCC_GC_SA_UNIT_DISABLE);
+ efuse_setting &= CC_GC_SA_UNIT_DISABLE__SA_DISABLE_MASK;
+ efuse_setting >>= CC_GC_SA_UNIT_DISABLE__SA_DISABLE__SHIFT;
+
+ /* Get SA disabled bitmap from VBIOS setting */
+ vbios_setting = RREG32_SOC15(GC, 0, mmGC_USER_SA_UNIT_DISABLE);
+ vbios_setting &= GC_USER_SA_UNIT_DISABLE__SA_DISABLE_MASK;
+ vbios_setting >>= GC_USER_SA_UNIT_DISABLE__SA_DISABLE__SHIFT;
+
+ disabled_sa |= efuse_setting | vbios_setting;
+ /* Make sure not to report harvested SAs beyond the max SA count */
+ disabled_sa &= max_sa_mask;
+
+ for (i = 0; disabled_sa > 0; i++) {
+ if (disabled_sa & 1)
+ tmp |= 0x3 << (i * 2);
+ disabled_sa >>= 1;
+ }
+ disabled_sa = tmp;
+
+ WREG32_SOC15(GC, 0, mmGCUTCL2_HARVEST_BYPASS_GROUPS_YELLOW_CARP, disabled_sa);
break;
default:
- return -EINVAL;
+ break;
}
+}
- /* PF_MAX_REGION=0 means xgmi is disabled */
- if (max_region) {
- adev->gmc.xgmi.num_physical_nodes = max_region + 1;
- if (adev->gmc.xgmi.num_physical_nodes > max_num_physical_nodes)
- return -EINVAL;
+static void gfxhub_v2_1_save_regs(struct amdgpu_device *adev)
+{
+ int i;
+
+ adev->gmc.VM_L2_CNTL = RREG32_SOC15(GC, 0, mmGCVM_L2_CNTL);
+ adev->gmc.VM_L2_CNTL2 = RREG32_SOC15(GC, 0, mmGCVM_L2_CNTL2);
+ adev->gmc.VM_DUMMY_PAGE_FAULT_CNTL = RREG32_SOC15(GC, 0, mmGCVM_DUMMY_PAGE_FAULT_CNTL);
+ adev->gmc.VM_DUMMY_PAGE_FAULT_ADDR_LO32 = RREG32_SOC15(GC, 0, mmGCVM_DUMMY_PAGE_FAULT_ADDR_LO32);
+ adev->gmc.VM_DUMMY_PAGE_FAULT_ADDR_HI32 = RREG32_SOC15(GC, 0, mmGCVM_DUMMY_PAGE_FAULT_ADDR_HI32);
+ adev->gmc.VM_L2_PROTECTION_FAULT_CNTL = RREG32_SOC15(GC, 0, mmGCVM_L2_PROTECTION_FAULT_CNTL);
+ adev->gmc.VM_L2_PROTECTION_FAULT_CNTL2 = RREG32_SOC15(GC, 0, mmGCVM_L2_PROTECTION_FAULT_CNTL2);
+ adev->gmc.VM_L2_PROTECTION_FAULT_MM_CNTL3 = RREG32_SOC15(GC, 0, mmGCVM_L2_PROTECTION_FAULT_MM_CNTL3);
+ adev->gmc.VM_L2_PROTECTION_FAULT_MM_CNTL4 = RREG32_SOC15(GC, 0, mmGCVM_L2_PROTECTION_FAULT_MM_CNTL4);
+ adev->gmc.VM_L2_PROTECTION_FAULT_ADDR_LO32 = RREG32_SOC15(GC, 0, mmGCVM_L2_PROTECTION_FAULT_ADDR_LO32);
+ adev->gmc.VM_L2_PROTECTION_FAULT_ADDR_HI32 = RREG32_SOC15(GC, 0, mmGCVM_L2_PROTECTION_FAULT_ADDR_HI32);
+ adev->gmc.VM_DEBUG = RREG32_SOC15(GC, 0, mmGCVM_DEBUG);
+ adev->gmc.VM_L2_MM_GROUP_RT_CLASSES = RREG32_SOC15(GC, 0, mmGCVM_L2_MM_GROUP_RT_CLASSES);
+ adev->gmc.VM_L2_BANK_SELECT_RESERVED_CID = RREG32_SOC15(GC, 0, mmGCVM_L2_BANK_SELECT_RESERVED_CID);
+ adev->gmc.VM_L2_BANK_SELECT_RESERVED_CID2 = RREG32_SOC15(GC, 0, mmGCVM_L2_BANK_SELECT_RESERVED_CID2);
+ adev->gmc.VM_L2_CACHE_PARITY_CNTL = RREG32_SOC15(GC, 0, mmGCVM_L2_CACHE_PARITY_CNTL);
+ adev->gmc.VM_L2_IH_LOG_CNTL = RREG32_SOC15(GC, 0, mmGCVM_L2_IH_LOG_CNTL);
+
+ for (i = 0; i <= 15; i++) {
+ adev->gmc.VM_CONTEXT_CNTL[i] = RREG32_SOC15_OFFSET(GC, 0, mmGCVM_CONTEXT0_CNTL, i);
+ adev->gmc.VM_CONTEXT_PAGE_TABLE_BASE_ADDR_LO32[i] = RREG32_SOC15_OFFSET(GC, 0, mmGCVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_LO32, i * 2);
+ adev->gmc.VM_CONTEXT_PAGE_TABLE_BASE_ADDR_HI32[i] = RREG32_SOC15_OFFSET(GC, 0, mmGCVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_HI32, i * 2);
+ adev->gmc.VM_CONTEXT_PAGE_TABLE_START_ADDR_LO32[i] = RREG32_SOC15_OFFSET(GC, 0, mmGCVM_CONTEXT0_PAGE_TABLE_START_ADDR_LO32, i * 2);
+ adev->gmc.VM_CONTEXT_PAGE_TABLE_START_ADDR_HI32[i] = RREG32_SOC15_OFFSET(GC, 0, mmGCVM_CONTEXT0_PAGE_TABLE_START_ADDR_HI32, i * 2);
+ adev->gmc.VM_CONTEXT_PAGE_TABLE_END_ADDR_LO32[i] = RREG32_SOC15_OFFSET(GC, 0, mmGCVM_CONTEXT0_PAGE_TABLE_END_ADDR_LO32, i * 2);
+ adev->gmc.VM_CONTEXT_PAGE_TABLE_END_ADDR_HI32[i] = RREG32_SOC15_OFFSET(GC, 0, mmGCVM_CONTEXT0_PAGE_TABLE_END_ADDR_HI32, i * 2);
+ }
- adev->gmc.xgmi.physical_node_id =
- REG_GET_FIELD(xgmi_lfb_cntl, GCMC_VM_XGMI_LFB_CNTL, PF_LFB_REGION);
- if (adev->gmc.xgmi.physical_node_id > max_physical_node_id)
- return -EINVAL;
+ adev->gmc.MC_VM_MX_L1_TLB_CNTL = RREG32_SOC15(GC, 0, mmGCMC_VM_MX_L1_TLB_CNTL);
+}
+
+static void gfxhub_v2_1_restore_regs(struct amdgpu_device *adev)
+{
+ int i;
- adev->gmc.xgmi.node_segment_size = REG_GET_FIELD(
- RREG32_SOC15(GC, 0, mmGCMC_VM_XGMI_LFB_SIZE),
- GCMC_VM_XGMI_LFB_SIZE, PF_LFB_SIZE) << 24;
+ WREG32_SOC15(GC, 0, mmGCVM_L2_CNTL, adev->gmc.VM_L2_CNTL);
+ WREG32_SOC15(GC, 0, mmGCVM_L2_CNTL2, adev->gmc.VM_L2_CNTL2);
+ WREG32_SOC15(GC, 0, mmGCVM_DUMMY_PAGE_FAULT_CNTL, adev->gmc.VM_DUMMY_PAGE_FAULT_CNTL);
+ WREG32_SOC15(GC, 0, mmGCVM_DUMMY_PAGE_FAULT_ADDR_LO32, adev->gmc.VM_DUMMY_PAGE_FAULT_ADDR_LO32);
+ WREG32_SOC15(GC, 0, mmGCVM_DUMMY_PAGE_FAULT_ADDR_HI32, adev->gmc.VM_DUMMY_PAGE_FAULT_ADDR_HI32);
+ WREG32_SOC15(GC, 0, mmGCVM_L2_PROTECTION_FAULT_CNTL, adev->gmc.VM_L2_PROTECTION_FAULT_CNTL);
+ WREG32_SOC15(GC, 0, mmGCVM_L2_PROTECTION_FAULT_CNTL2, adev->gmc.VM_L2_PROTECTION_FAULT_CNTL2);
+ WREG32_SOC15(GC, 0, mmGCVM_L2_PROTECTION_FAULT_MM_CNTL3, adev->gmc.VM_L2_PROTECTION_FAULT_MM_CNTL3);
+ WREG32_SOC15(GC, 0, mmGCVM_L2_PROTECTION_FAULT_MM_CNTL4, adev->gmc.VM_L2_PROTECTION_FAULT_MM_CNTL4);
+ WREG32_SOC15(GC, 0, mmGCVM_L2_PROTECTION_FAULT_ADDR_LO32, adev->gmc.VM_L2_PROTECTION_FAULT_ADDR_LO32);
+ WREG32_SOC15(GC, 0, mmGCVM_L2_PROTECTION_FAULT_ADDR_HI32, adev->gmc.VM_L2_PROTECTION_FAULT_ADDR_HI32);
+ WREG32_SOC15(GC, 0, mmGCVM_DEBUG, adev->gmc.VM_DEBUG);
+ WREG32_SOC15(GC, 0, mmGCVM_L2_MM_GROUP_RT_CLASSES, adev->gmc.VM_L2_MM_GROUP_RT_CLASSES);
+ WREG32_SOC15(GC, 0, mmGCVM_L2_BANK_SELECT_RESERVED_CID, adev->gmc.VM_L2_BANK_SELECT_RESERVED_CID);
+ WREG32_SOC15(GC, 0, mmGCVM_L2_BANK_SELECT_RESERVED_CID2, adev->gmc.VM_L2_BANK_SELECT_RESERVED_CID2);
+ WREG32_SOC15(GC, 0, mmGCVM_L2_CACHE_PARITY_CNTL, adev->gmc.VM_L2_CACHE_PARITY_CNTL);
+ WREG32_SOC15(GC, 0, mmGCVM_L2_IH_LOG_CNTL, adev->gmc.VM_L2_IH_LOG_CNTL);
+
+ for (i = 0; i <= 15; i++) {
+ WREG32_SOC15_OFFSET(GC, 0, mmGCVM_CONTEXT0_CNTL, i, adev->gmc.VM_CONTEXT_CNTL[i]);
+ WREG32_SOC15_OFFSET(GC, 0, mmGCVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_LO32, i * 2, adev->gmc.VM_CONTEXT_PAGE_TABLE_BASE_ADDR_LO32[i]);
+ WREG32_SOC15_OFFSET(GC, 0, mmGCVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_HI32, i * 2, adev->gmc.VM_CONTEXT_PAGE_TABLE_BASE_ADDR_HI32[i]);
+ WREG32_SOC15_OFFSET(GC, 0, mmGCVM_CONTEXT0_PAGE_TABLE_START_ADDR_LO32, i * 2, adev->gmc.VM_CONTEXT_PAGE_TABLE_START_ADDR_LO32[i]);
+ WREG32_SOC15_OFFSET(GC, 0, mmGCVM_CONTEXT0_PAGE_TABLE_START_ADDR_HI32, i * 2, adev->gmc.VM_CONTEXT_PAGE_TABLE_START_ADDR_HI32[i]);
+ WREG32_SOC15_OFFSET(GC, 0, mmGCVM_CONTEXT0_PAGE_TABLE_END_ADDR_LO32, i * 2, adev->gmc.VM_CONTEXT_PAGE_TABLE_END_ADDR_LO32[i]);
+ WREG32_SOC15_OFFSET(GC, 0, mmGCVM_CONTEXT0_PAGE_TABLE_END_ADDR_HI32, i * 2, adev->gmc.VM_CONTEXT_PAGE_TABLE_END_ADDR_HI32[i]);
}
- return 0;
+ WREG32_SOC15(GC, 0, mmGCMC_VM_FB_LOCATION_BASE, adev->gmc.vram_start >> 24);
+ WREG32_SOC15(GC, 0, mmGCMC_VM_FB_LOCATION_TOP, adev->gmc.vram_end >> 24);
+ WREG32_SOC15(GC, 0, mmGCMC_VM_MX_L1_TLB_CNTL, adev->gmc.MC_VM_MX_L1_TLB_CNTL);
+}
+
+static void gfxhub_v2_1_halt(struct amdgpu_device *adev)
+{
+ struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_GFXHUB(0)];
+ int i;
+ uint32_t tmp;
+ int time = 1000;
+
+ gfxhub_v2_1_set_fault_enable_default(adev, false);
+
+ for (i = 0; i <= 14; i++) {
+ WREG32_SOC15_OFFSET(GC, 0, mmGCVM_CONTEXT1_PAGE_TABLE_START_ADDR_LO32,
+ i * hub->ctx_addr_distance, ~0);
+ WREG32_SOC15_OFFSET(GC, 0, mmGCVM_CONTEXT1_PAGE_TABLE_START_ADDR_HI32,
+ i * hub->ctx_addr_distance, ~0);
+ WREG32_SOC15_OFFSET(GC, 0, mmGCVM_CONTEXT1_PAGE_TABLE_END_ADDR_LO32,
+ i * hub->ctx_addr_distance,
+ 0);
+ WREG32_SOC15_OFFSET(GC, 0, mmGCVM_CONTEXT1_PAGE_TABLE_END_ADDR_HI32,
+ i * hub->ctx_addr_distance,
+ 0);
+ }
+ tmp = RREG32_SOC15(GC, 0, mmGRBM_STATUS2);
+ while ((tmp & (GRBM_STATUS2__EA_BUSY_MASK |
+ GRBM_STATUS2__EA_LINK_BUSY_MASK)) != 0 &&
+ time) {
+ udelay(100);
+ time--;
+ tmp = RREG32_SOC15(GC, 0, mmGRBM_STATUS2);
+ }
+
+ if (!time)
+ DRM_WARN("failed to wait for GRBM(EA) idle\n");
}
const struct amdgpu_gfxhub_funcs gfxhub_v2_1_funcs = {
@@ -540,5 +660,8 @@ const struct amdgpu_gfxhub_funcs gfxhub_v2_1_funcs = {
.gart_disable = gfxhub_v2_1_gart_disable,
.set_fault_enable_default = gfxhub_v2_1_set_fault_enable_default,
.init = gfxhub_v2_1_init,
- .get_xgmi_info = gfxhub_v2_1_get_xgmi_info,
+ .utcl2_harvest = gfxhub_v2_1_utcl2_harvest,
+ .mode2_save_regs = gfxhub_v2_1_save_regs,
+ .mode2_restore_regs = gfxhub_v2_1_restore_regs,
+ .halt = gfxhub_v2_1_halt,
};
diff --git a/drivers/gpu/drm/amd/amdgpu/gfxhub_v3_0.c b/drivers/gpu/drm/amd/amdgpu/gfxhub_v3_0.c
new file mode 100644
index 000000000000..abe30c8bd2ba
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/gfxhub_v3_0.c
@@ -0,0 +1,513 @@
+/*
+ * Copyright 2021 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#include "amdgpu.h"
+#include "gfxhub_v3_0.h"
+
+#include "gc/gc_11_0_0_offset.h"
+#include "gc/gc_11_0_0_sh_mask.h"
+#include "gc/gc_11_0_0_default.h"
+#include "navi10_enum.h"
+#include "soc15_common.h"
+
+static const char * const gfxhub_client_ids[] = {
+ "CB/DB",
+ "Reserved",
+ "GE1",
+ "GE2",
+ "CPF",
+ "CPC",
+ "CPG",
+ "RLC",
+ "TCP",
+ "SQC (inst)",
+ "SQC (data)",
+ "SQG",
+ "Reserved",
+ "SDMA0",
+ "SDMA1",
+ "GCR",
+ "SDMA2",
+ "SDMA3",
+};
+
+static uint32_t gfxhub_v3_0_get_invalidate_req(unsigned int vmid,
+ uint32_t flush_type)
+{
+ u32 req = 0;
+
+ /* invalidate using legacy mode on vmid*/
+ req = REG_SET_FIELD(req, GCVM_INVALIDATE_ENG0_REQ,
+ PER_VMID_INVALIDATE_REQ, 1 << vmid);
+ req = REG_SET_FIELD(req, GCVM_INVALIDATE_ENG0_REQ, FLUSH_TYPE, flush_type);
+ req = REG_SET_FIELD(req, GCVM_INVALIDATE_ENG0_REQ, INVALIDATE_L2_PTES, 1);
+ req = REG_SET_FIELD(req, GCVM_INVALIDATE_ENG0_REQ, INVALIDATE_L2_PDE0, 1);
+ req = REG_SET_FIELD(req, GCVM_INVALIDATE_ENG0_REQ, INVALIDATE_L2_PDE1, 1);
+ req = REG_SET_FIELD(req, GCVM_INVALIDATE_ENG0_REQ, INVALIDATE_L2_PDE2, 1);
+ req = REG_SET_FIELD(req, GCVM_INVALIDATE_ENG0_REQ, INVALIDATE_L1_PTES, 1);
+ req = REG_SET_FIELD(req, GCVM_INVALIDATE_ENG0_REQ,
+ CLEAR_PROTECTION_FAULT_STATUS_ADDR, 0);
+
+ return req;
+}
+
+static void
+gfxhub_v3_0_print_l2_protection_fault_status(struct amdgpu_device *adev,
+ uint32_t status)
+{
+ u32 cid = REG_GET_FIELD(status,
+ GCVM_L2_PROTECTION_FAULT_STATUS, CID);
+
+ dev_err(adev->dev,
+ "GCVM_L2_PROTECTION_FAULT_STATUS:0x%08X\n",
+ status);
+ dev_err(adev->dev, "\t Faulty UTCL2 client ID: %s (0x%x)\n",
+ cid >= ARRAY_SIZE(gfxhub_client_ids) ? "unknown" : gfxhub_client_ids[cid],
+ cid);
+ dev_err(adev->dev, "\t MORE_FAULTS: 0x%lx\n",
+ REG_GET_FIELD(status,
+ GCVM_L2_PROTECTION_FAULT_STATUS, MORE_FAULTS));
+ dev_err(adev->dev, "\t WALKER_ERROR: 0x%lx\n",
+ REG_GET_FIELD(status,
+ GCVM_L2_PROTECTION_FAULT_STATUS, WALKER_ERROR));
+ dev_err(adev->dev, "\t PERMISSION_FAULTS: 0x%lx\n",
+ REG_GET_FIELD(status,
+ GCVM_L2_PROTECTION_FAULT_STATUS, PERMISSION_FAULTS));
+ dev_err(adev->dev, "\t MAPPING_ERROR: 0x%lx\n",
+ REG_GET_FIELD(status,
+ GCVM_L2_PROTECTION_FAULT_STATUS, MAPPING_ERROR));
+ dev_err(adev->dev, "\t RW: 0x%lx\n",
+ REG_GET_FIELD(status,
+ GCVM_L2_PROTECTION_FAULT_STATUS, RW));
+}
+
+static u64 gfxhub_v3_0_get_fb_location(struct amdgpu_device *adev)
+{
+ u64 base = RREG32_SOC15(GC, 0, regGCMC_VM_FB_LOCATION_BASE);
+
+ base &= GCMC_VM_FB_LOCATION_BASE__FB_BASE_MASK;
+ base <<= 24;
+
+ return base;
+}
+
+static u64 gfxhub_v3_0_get_mc_fb_offset(struct amdgpu_device *adev)
+{
+ return (u64)RREG32_SOC15(GC, 0, regGCMC_VM_FB_OFFSET) << 24;
+}
+
+static void gfxhub_v3_0_setup_vm_pt_regs(struct amdgpu_device *adev, uint32_t vmid,
+ uint64_t page_table_base)
+{
+ struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_GFXHUB(0)];
+
+ WREG32_SOC15_OFFSET(GC, 0, regGCVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_LO32,
+ hub->ctx_addr_distance * vmid,
+ lower_32_bits(page_table_base));
+
+ WREG32_SOC15_OFFSET(GC, 0, regGCVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_HI32,
+ hub->ctx_addr_distance * vmid,
+ upper_32_bits(page_table_base));
+}
+
+static void gfxhub_v3_0_init_gart_aperture_regs(struct amdgpu_device *adev)
+{
+ uint64_t pt_base = amdgpu_gmc_pd_addr(adev->gart.bo);
+
+ gfxhub_v3_0_setup_vm_pt_regs(adev, 0, pt_base);
+
+ WREG32_SOC15(GC, 0, regGCVM_CONTEXT0_PAGE_TABLE_START_ADDR_LO32,
+ (u32)(adev->gmc.gart_start >> 12));
+ WREG32_SOC15(GC, 0, regGCVM_CONTEXT0_PAGE_TABLE_START_ADDR_HI32,
+ (u32)(adev->gmc.gart_start >> 44));
+
+ WREG32_SOC15(GC, 0, regGCVM_CONTEXT0_PAGE_TABLE_END_ADDR_LO32,
+ (u32)(adev->gmc.gart_end >> 12));
+ WREG32_SOC15(GC, 0, regGCVM_CONTEXT0_PAGE_TABLE_END_ADDR_HI32,
+ (u32)(adev->gmc.gart_end >> 44));
+}
+
+static void gfxhub_v3_0_init_system_aperture_regs(struct amdgpu_device *adev)
+{
+ uint64_t value;
+
+ /* Program the AGP BAR */
+ WREG32_SOC15(GC, 0, regGCMC_VM_AGP_BASE, 0);
+ WREG32_SOC15(GC, 0, regGCMC_VM_AGP_BOT, adev->gmc.agp_start >> 24);
+ WREG32_SOC15(GC, 0, regGCMC_VM_AGP_TOP, adev->gmc.agp_end >> 24);
+
+
+ /* Program the system aperture low logical page number. */
+ WREG32_SOC15(GC, 0, regGCMC_VM_SYSTEM_APERTURE_LOW_ADDR,
+ min(adev->gmc.fb_start, adev->gmc.agp_start) >> 18);
+ WREG32_SOC15(GC, 0, regGCMC_VM_SYSTEM_APERTURE_HIGH_ADDR,
+ max(adev->gmc.fb_end, adev->gmc.agp_end) >> 18);
+
+ /* Set default page address. */
+ value = amdgpu_gmc_vram_mc2pa(adev, adev->mem_scratch.gpu_addr);
+ WREG32_SOC15(GC, 0, regGCMC_VM_SYSTEM_APERTURE_DEFAULT_ADDR_LSB,
+ (u32)(value >> 12));
+ WREG32_SOC15(GC, 0, regGCMC_VM_SYSTEM_APERTURE_DEFAULT_ADDR_MSB,
+ (u32)(value >> 44));
+
+ /* Program "protection fault". */
+ WREG32_SOC15(GC, 0, regGCVM_L2_PROTECTION_FAULT_DEFAULT_ADDR_LO32,
+ (u32)(adev->dummy_page_addr >> 12));
+ WREG32_SOC15(GC, 0, regGCVM_L2_PROTECTION_FAULT_DEFAULT_ADDR_HI32,
+ (u32)((u64)adev->dummy_page_addr >> 44));
+
+ WREG32_FIELD15_PREREG(GC, 0, GCVM_L2_PROTECTION_FAULT_CNTL2,
+ ACTIVE_PAGE_MIGRATION_PTE_READ_RETRY, 1);
+}
+
+
+static void gfxhub_v3_0_init_tlb_regs(struct amdgpu_device *adev)
+{
+ uint32_t tmp;
+
+ /* Setup TLB control */
+ tmp = RREG32_SOC15(GC, 0, regGCMC_VM_MX_L1_TLB_CNTL);
+
+ tmp = REG_SET_FIELD(tmp, GCMC_VM_MX_L1_TLB_CNTL, ENABLE_L1_TLB, 1);
+ tmp = REG_SET_FIELD(tmp, GCMC_VM_MX_L1_TLB_CNTL, SYSTEM_ACCESS_MODE, 3);
+ tmp = REG_SET_FIELD(tmp, GCMC_VM_MX_L1_TLB_CNTL,
+ ENABLE_ADVANCED_DRIVER_MODEL, 1);
+ tmp = REG_SET_FIELD(tmp, GCMC_VM_MX_L1_TLB_CNTL,
+ SYSTEM_APERTURE_UNMAPPED_ACCESS, 0);
+ tmp = REG_SET_FIELD(tmp, GCMC_VM_MX_L1_TLB_CNTL, ECO_BITS, 0);
+ tmp = REG_SET_FIELD(tmp, GCMC_VM_MX_L1_TLB_CNTL,
+ MTYPE, MTYPE_UC); /* UC, uncached */
+
+ WREG32_SOC15(GC, 0, regGCMC_VM_MX_L1_TLB_CNTL, tmp);
+}
+
+static void gfxhub_v3_0_init_cache_regs(struct amdgpu_device *adev)
+{
+ uint32_t tmp;
+
+ /* These registers are not accessible to VF-SRIOV.
+ * The PF will program them instead.
+ */
+ if (amdgpu_sriov_vf(adev))
+ return;
+
+ /* Setup L2 cache */
+ tmp = RREG32_SOC15(GC, 0, regGCVM_L2_CNTL);
+ tmp = REG_SET_FIELD(tmp, GCVM_L2_CNTL, ENABLE_L2_CACHE, 1);
+ tmp = REG_SET_FIELD(tmp, GCVM_L2_CNTL, ENABLE_L2_FRAGMENT_PROCESSING, 0);
+ tmp = REG_SET_FIELD(tmp, GCVM_L2_CNTL,
+ ENABLE_DEFAULT_PAGE_OUT_TO_SYSTEM_MEMORY, 1);
+ /* XXX for emulation, Refer to closed source code.*/
+ tmp = REG_SET_FIELD(tmp, GCVM_L2_CNTL,
+ L2_PDE0_CACHE_TAG_GENERATION_MODE, 0);
+ tmp = REG_SET_FIELD(tmp, GCVM_L2_CNTL, PDE_FAULT_CLASSIFICATION, 0);
+ tmp = REG_SET_FIELD(tmp, GCVM_L2_CNTL, CONTEXT1_IDENTITY_ACCESS_MODE, 1);
+ tmp = REG_SET_FIELD(tmp, GCVM_L2_CNTL, IDENTITY_MODE_FRAGMENT_SIZE, 0);
+ WREG32_SOC15(GC, 0, regGCVM_L2_CNTL, tmp);
+
+ tmp = RREG32_SOC15(GC, 0, regGCVM_L2_CNTL2);
+ tmp = REG_SET_FIELD(tmp, GCVM_L2_CNTL2, INVALIDATE_ALL_L1_TLBS, 1);
+ tmp = REG_SET_FIELD(tmp, GCVM_L2_CNTL2, INVALIDATE_L2_CACHE, 1);
+ WREG32_SOC15(GC, 0, regGCVM_L2_CNTL2, tmp);
+
+ tmp = regGCVM_L2_CNTL3_DEFAULT;
+ if (adev->gmc.translate_further) {
+ tmp = REG_SET_FIELD(tmp, GCVM_L2_CNTL3, BANK_SELECT, 12);
+ tmp = REG_SET_FIELD(tmp, GCVM_L2_CNTL3,
+ L2_CACHE_BIGK_FRAGMENT_SIZE, 9);
+ } else {
+ tmp = REG_SET_FIELD(tmp, GCVM_L2_CNTL3, BANK_SELECT, 9);
+ tmp = REG_SET_FIELD(tmp, GCVM_L2_CNTL3,
+ L2_CACHE_BIGK_FRAGMENT_SIZE, 6);
+ }
+ WREG32_SOC15(GC, 0, regGCVM_L2_CNTL3, tmp);
+
+ tmp = regGCVM_L2_CNTL4_DEFAULT;
+ tmp = REG_SET_FIELD(tmp, GCVM_L2_CNTL4, VMC_TAP_PDE_REQUEST_PHYSICAL, 0);
+ tmp = REG_SET_FIELD(tmp, GCVM_L2_CNTL4, VMC_TAP_PTE_REQUEST_PHYSICAL, 0);
+ WREG32_SOC15(GC, 0, regGCVM_L2_CNTL4, tmp);
+
+ tmp = regGCVM_L2_CNTL5_DEFAULT;
+ tmp = REG_SET_FIELD(tmp, GCVM_L2_CNTL5, L2_CACHE_SMALLK_FRAGMENT_SIZE, 0);
+ WREG32_SOC15(GC, 0, regGCVM_L2_CNTL5, tmp);
+}
+
+static void gfxhub_v3_0_enable_system_domain(struct amdgpu_device *adev)
+{
+ uint32_t tmp;
+
+ tmp = RREG32_SOC15(GC, 0, regGCVM_CONTEXT0_CNTL);
+ tmp = REG_SET_FIELD(tmp, GCVM_CONTEXT0_CNTL, ENABLE_CONTEXT, 1);
+ tmp = REG_SET_FIELD(tmp, GCVM_CONTEXT0_CNTL, PAGE_TABLE_DEPTH, 0);
+ tmp = REG_SET_FIELD(tmp, GCVM_CONTEXT0_CNTL,
+ RETRY_PERMISSION_OR_INVALID_PAGE_FAULT, 0);
+ WREG32_SOC15(GC, 0, regGCVM_CONTEXT0_CNTL, tmp);
+}
+
+static void gfxhub_v3_0_disable_identity_aperture(struct amdgpu_device *adev)
+{
+ /* These registers are not accessible to VF-SRIOV.
+ * The PF will program them instead.
+ */
+ if (amdgpu_sriov_vf(adev))
+ return;
+
+ WREG32_SOC15(GC, 0, regGCVM_L2_CONTEXT1_IDENTITY_APERTURE_LOW_ADDR_LO32,
+ 0xFFFFFFFF);
+ WREG32_SOC15(GC, 0, regGCVM_L2_CONTEXT1_IDENTITY_APERTURE_LOW_ADDR_HI32,
+ 0x0000000F);
+
+ WREG32_SOC15(GC, 0, regGCVM_L2_CONTEXT1_IDENTITY_APERTURE_HIGH_ADDR_LO32,
+ 0);
+ WREG32_SOC15(GC, 0, regGCVM_L2_CONTEXT1_IDENTITY_APERTURE_HIGH_ADDR_HI32,
+ 0);
+
+ WREG32_SOC15(GC, 0, regGCVM_L2_CONTEXT_IDENTITY_PHYSICAL_OFFSET_LO32, 0);
+ WREG32_SOC15(GC, 0, regGCVM_L2_CONTEXT_IDENTITY_PHYSICAL_OFFSET_HI32, 0);
+
+}
+
+static void gfxhub_v3_0_setup_vmid_config(struct amdgpu_device *adev)
+{
+ struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_GFXHUB(0)];
+ int i;
+ uint32_t tmp;
+
+ for (i = 0; i <= 14; i++) {
+ tmp = RREG32_SOC15_OFFSET(GC, 0, regGCVM_CONTEXT1_CNTL, i * hub->ctx_distance);
+ tmp = REG_SET_FIELD(tmp, GCVM_CONTEXT1_CNTL, ENABLE_CONTEXT, 1);
+ tmp = REG_SET_FIELD(tmp, GCVM_CONTEXT1_CNTL, PAGE_TABLE_DEPTH,
+ adev->vm_manager.num_level);
+ tmp = REG_SET_FIELD(tmp, GCVM_CONTEXT1_CNTL,
+ RANGE_PROTECTION_FAULT_ENABLE_DEFAULT, 1);
+ tmp = REG_SET_FIELD(tmp, GCVM_CONTEXT1_CNTL,
+ DUMMY_PAGE_PROTECTION_FAULT_ENABLE_DEFAULT, 1);
+ tmp = REG_SET_FIELD(tmp, GCVM_CONTEXT1_CNTL,
+ PDE0_PROTECTION_FAULT_ENABLE_DEFAULT, 1);
+ tmp = REG_SET_FIELD(tmp, GCVM_CONTEXT1_CNTL,
+ VALID_PROTECTION_FAULT_ENABLE_DEFAULT, 1);
+ tmp = REG_SET_FIELD(tmp, GCVM_CONTEXT1_CNTL,
+ READ_PROTECTION_FAULT_ENABLE_DEFAULT, 1);
+ tmp = REG_SET_FIELD(tmp, GCVM_CONTEXT1_CNTL,
+ WRITE_PROTECTION_FAULT_ENABLE_DEFAULT, 1);
+ tmp = REG_SET_FIELD(tmp, GCVM_CONTEXT1_CNTL,
+ EXECUTE_PROTECTION_FAULT_ENABLE_DEFAULT, 1);
+ tmp = REG_SET_FIELD(tmp, GCVM_CONTEXT1_CNTL,
+ PAGE_TABLE_BLOCK_SIZE,
+ adev->vm_manager.block_size - 9);
+ /* Send no-retry XNACK on fault to suppress VM fault storm. */
+ tmp = REG_SET_FIELD(tmp, GCVM_CONTEXT1_CNTL,
+ RETRY_PERMISSION_OR_INVALID_PAGE_FAULT,
+ !amdgpu_noretry);
+ WREG32_SOC15_OFFSET(GC, 0, regGCVM_CONTEXT1_CNTL,
+ i * hub->ctx_distance, tmp);
+ WREG32_SOC15_OFFSET(GC, 0, regGCVM_CONTEXT1_PAGE_TABLE_START_ADDR_LO32,
+ i * hub->ctx_addr_distance, 0);
+ WREG32_SOC15_OFFSET(GC, 0, regGCVM_CONTEXT1_PAGE_TABLE_START_ADDR_HI32,
+ i * hub->ctx_addr_distance, 0);
+ WREG32_SOC15_OFFSET(GC, 0, regGCVM_CONTEXT1_PAGE_TABLE_END_ADDR_LO32,
+ i * hub->ctx_addr_distance,
+ lower_32_bits(adev->vm_manager.max_pfn - 1));
+ WREG32_SOC15_OFFSET(GC, 0, regGCVM_CONTEXT1_PAGE_TABLE_END_ADDR_HI32,
+ i * hub->ctx_addr_distance,
+ upper_32_bits(adev->vm_manager.max_pfn - 1));
+ }
+
+ hub->vm_cntx_cntl = tmp;
+}
+
+static void gfxhub_v3_0_program_invalidation(struct amdgpu_device *adev)
+{
+ struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_GFXHUB(0)];
+ unsigned int i;
+
+ for (i = 0 ; i < 18; ++i) {
+ WREG32_SOC15_OFFSET(GC, 0, regGCVM_INVALIDATE_ENG0_ADDR_RANGE_LO32,
+ i * hub->eng_addr_distance, 0xffffffff);
+ WREG32_SOC15_OFFSET(GC, 0, regGCVM_INVALIDATE_ENG0_ADDR_RANGE_HI32,
+ i * hub->eng_addr_distance, 0x1f);
+ }
+}
+
+static int gfxhub_v3_0_gart_enable(struct amdgpu_device *adev)
+{
+ if (amdgpu_sriov_vf(adev)) {
+ /*
+ * GCMC_VM_FB_LOCATION_BASE/TOP is NULL for VF, becuase they are
+ * VF copy registers so vbios post doesn't program them, for
+ * SRIOV driver need to program them
+ */
+ WREG32_SOC15(GC, 0, regGCMC_VM_FB_LOCATION_BASE,
+ adev->gmc.vram_start >> 24);
+ WREG32_SOC15(GC, 0, regGCMC_VM_FB_LOCATION_TOP,
+ adev->gmc.vram_end >> 24);
+ }
+
+ /* GART Enable. */
+ gfxhub_v3_0_init_gart_aperture_regs(adev);
+ gfxhub_v3_0_init_system_aperture_regs(adev);
+ gfxhub_v3_0_init_tlb_regs(adev);
+ gfxhub_v3_0_init_cache_regs(adev);
+
+ gfxhub_v3_0_enable_system_domain(adev);
+ gfxhub_v3_0_disable_identity_aperture(adev);
+ gfxhub_v3_0_setup_vmid_config(adev);
+ gfxhub_v3_0_program_invalidation(adev);
+
+ return 0;
+}
+
+static void gfxhub_v3_0_gart_disable(struct amdgpu_device *adev)
+{
+ struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_GFXHUB(0)];
+ u32 tmp;
+ u32 i;
+
+ /* Disable all tables */
+ for (i = 0; i < 16; i++)
+ WREG32_SOC15_OFFSET(GC, 0, regGCVM_CONTEXT0_CNTL,
+ i * hub->ctx_distance, 0);
+
+ /* Setup TLB control */
+ tmp = RREG32_SOC15(GC, 0, regGCMC_VM_MX_L1_TLB_CNTL);
+ tmp = REG_SET_FIELD(tmp, GCMC_VM_MX_L1_TLB_CNTL, ENABLE_L1_TLB, 0);
+ tmp = REG_SET_FIELD(tmp, GCMC_VM_MX_L1_TLB_CNTL,
+ ENABLE_ADVANCED_DRIVER_MODEL, 0);
+ WREG32_SOC15(GC, 0, regGCMC_VM_MX_L1_TLB_CNTL, tmp);
+
+ /* Setup L2 cache */
+ WREG32_FIELD15_PREREG(GC, 0, GCVM_L2_CNTL, ENABLE_L2_CACHE, 0);
+ WREG32_SOC15(GC, 0, regGCVM_L2_CNTL3, 0);
+}
+
+/**
+ * gfxhub_v3_0_set_fault_enable_default - update GART/VM fault handling
+ *
+ * @adev: amdgpu_device pointer
+ * @value: true redirects VM faults to the default page
+ */
+static void gfxhub_v3_0_set_fault_enable_default(struct amdgpu_device *adev,
+ bool value)
+{
+ u32 tmp;
+
+ /* NO halt CP when page fault */
+ tmp = RREG32_SOC15(GC, 0, regCP_DEBUG);
+ tmp = REG_SET_FIELD(tmp, CP_DEBUG, CPG_UTCL1_ERROR_HALT_DISABLE, 1);
+ WREG32_SOC15(GC, 0, regCP_DEBUG, tmp);
+
+ /* These registers are not accessible to VF-SRIOV.
+ * The PF will program them instead.
+ */
+ if (amdgpu_sriov_vf(adev))
+ return;
+
+ tmp = RREG32_SOC15(GC, 0, regGCVM_L2_PROTECTION_FAULT_CNTL);
+ tmp = REG_SET_FIELD(tmp, GCVM_L2_PROTECTION_FAULT_CNTL,
+ RANGE_PROTECTION_FAULT_ENABLE_DEFAULT, value);
+ tmp = REG_SET_FIELD(tmp, GCVM_L2_PROTECTION_FAULT_CNTL,
+ PDE0_PROTECTION_FAULT_ENABLE_DEFAULT, value);
+ tmp = REG_SET_FIELD(tmp, GCVM_L2_PROTECTION_FAULT_CNTL,
+ PDE1_PROTECTION_FAULT_ENABLE_DEFAULT, value);
+ tmp = REG_SET_FIELD(tmp, GCVM_L2_PROTECTION_FAULT_CNTL,
+ PDE2_PROTECTION_FAULT_ENABLE_DEFAULT, value);
+ tmp = REG_SET_FIELD(tmp, GCVM_L2_PROTECTION_FAULT_CNTL,
+ TRANSLATE_FURTHER_PROTECTION_FAULT_ENABLE_DEFAULT,
+ value);
+ tmp = REG_SET_FIELD(tmp, GCVM_L2_PROTECTION_FAULT_CNTL,
+ NACK_PROTECTION_FAULT_ENABLE_DEFAULT, value);
+ tmp = REG_SET_FIELD(tmp, GCVM_L2_PROTECTION_FAULT_CNTL,
+ DUMMY_PAGE_PROTECTION_FAULT_ENABLE_DEFAULT, value);
+ tmp = REG_SET_FIELD(tmp, GCVM_L2_PROTECTION_FAULT_CNTL,
+ VALID_PROTECTION_FAULT_ENABLE_DEFAULT, value);
+ tmp = REG_SET_FIELD(tmp, GCVM_L2_PROTECTION_FAULT_CNTL,
+ READ_PROTECTION_FAULT_ENABLE_DEFAULT, value);
+ tmp = REG_SET_FIELD(tmp, GCVM_L2_PROTECTION_FAULT_CNTL,
+ WRITE_PROTECTION_FAULT_ENABLE_DEFAULT, value);
+ tmp = REG_SET_FIELD(tmp, GCVM_L2_PROTECTION_FAULT_CNTL,
+ EXECUTE_PROTECTION_FAULT_ENABLE_DEFAULT, value);
+ if (!value) {
+ tmp = REG_SET_FIELD(tmp, GCVM_L2_PROTECTION_FAULT_CNTL,
+ CRASH_ON_NO_RETRY_FAULT, 1);
+ tmp = REG_SET_FIELD(tmp, GCVM_L2_PROTECTION_FAULT_CNTL,
+ CRASH_ON_RETRY_FAULT, 1);
+ }
+ WREG32_SOC15(GC, 0, regGCVM_L2_PROTECTION_FAULT_CNTL, tmp);
+}
+
+static const struct amdgpu_vmhub_funcs gfxhub_v3_0_vmhub_funcs = {
+ .print_l2_protection_fault_status = gfxhub_v3_0_print_l2_protection_fault_status,
+ .get_invalidate_req = gfxhub_v3_0_get_invalidate_req,
+};
+
+static void gfxhub_v3_0_init(struct amdgpu_device *adev)
+{
+ struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_GFXHUB(0)];
+
+ hub->ctx0_ptb_addr_lo32 =
+ SOC15_REG_OFFSET(GC, 0,
+ regGCVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_LO32);
+ hub->ctx0_ptb_addr_hi32 =
+ SOC15_REG_OFFSET(GC, 0,
+ regGCVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_HI32);
+ hub->vm_inv_eng0_sem =
+ SOC15_REG_OFFSET(GC, 0, regGCVM_INVALIDATE_ENG0_SEM);
+ hub->vm_inv_eng0_req =
+ SOC15_REG_OFFSET(GC, 0, regGCVM_INVALIDATE_ENG0_REQ);
+ hub->vm_inv_eng0_ack =
+ SOC15_REG_OFFSET(GC, 0, regGCVM_INVALIDATE_ENG0_ACK);
+ hub->vm_context0_cntl =
+ SOC15_REG_OFFSET(GC, 0, regGCVM_CONTEXT0_CNTL);
+ hub->vm_l2_pro_fault_status =
+ SOC15_REG_OFFSET(GC, 0, regGCVM_L2_PROTECTION_FAULT_STATUS);
+ hub->vm_l2_pro_fault_cntl =
+ SOC15_REG_OFFSET(GC, 0, regGCVM_L2_PROTECTION_FAULT_CNTL);
+
+ hub->ctx_distance = regGCVM_CONTEXT1_CNTL - regGCVM_CONTEXT0_CNTL;
+ hub->ctx_addr_distance = regGCVM_CONTEXT1_PAGE_TABLE_BASE_ADDR_LO32 -
+ regGCVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_LO32;
+ hub->eng_distance = regGCVM_INVALIDATE_ENG1_REQ -
+ regGCVM_INVALIDATE_ENG0_REQ;
+ hub->eng_addr_distance = regGCVM_INVALIDATE_ENG1_ADDR_RANGE_LO32 -
+ regGCVM_INVALIDATE_ENG0_ADDR_RANGE_LO32;
+
+ hub->vm_cntx_cntl_vm_fault = GCVM_CONTEXT1_CNTL__RANGE_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK |
+ GCVM_CONTEXT1_CNTL__DUMMY_PAGE_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK |
+ GCVM_CONTEXT1_CNTL__PDE0_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK |
+ GCVM_CONTEXT1_CNTL__VALID_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK |
+ GCVM_CONTEXT1_CNTL__READ_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK |
+ GCVM_CONTEXT1_CNTL__WRITE_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK |
+ GCVM_CONTEXT1_CNTL__EXECUTE_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK;
+
+ hub->vmhub_funcs = &gfxhub_v3_0_vmhub_funcs;
+}
+
+const struct amdgpu_gfxhub_funcs gfxhub_v3_0_funcs = {
+ .get_fb_location = gfxhub_v3_0_get_fb_location,
+ .get_mc_fb_offset = gfxhub_v3_0_get_mc_fb_offset,
+ .setup_vm_pt_regs = gfxhub_v3_0_setup_vm_pt_regs,
+ .gart_enable = gfxhub_v3_0_gart_enable,
+ .gart_disable = gfxhub_v3_0_gart_disable,
+ .set_fault_enable_default = gfxhub_v3_0_set_fault_enable_default,
+ .init = gfxhub_v3_0_init,
+};
diff --git a/drivers/gpu/drm/amd/amdgpu/gfxhub_v3_0.h b/drivers/gpu/drm/amd/amdgpu/gfxhub_v3_0.h
new file mode 100644
index 000000000000..ea345e4e072a
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/gfxhub_v3_0.h
@@ -0,0 +1,29 @@
+/*
+ * Copyright 2021 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#ifndef __GFXHUB_V3_0_H__
+#define __GFXHUB_V3_0_H__
+
+extern const struct amdgpu_gfxhub_funcs gfxhub_v3_0_funcs;
+
+#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/gfxhub_v3_0_3.c b/drivers/gpu/drm/amd/amdgpu/gfxhub_v3_0_3.c
new file mode 100644
index 000000000000..b3ef6e71811f
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/gfxhub_v3_0_3.c
@@ -0,0 +1,501 @@
+/*
+ * Copyright 2022 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#include "amdgpu.h"
+#include "gfxhub_v3_0_3.h"
+
+#include "gc/gc_11_0_3_offset.h"
+#include "gc/gc_11_0_3_sh_mask.h"
+#include "navi10_enum.h"
+#include "soc15_common.h"
+
+#define regGCVM_L2_CNTL3_DEFAULT 0x80100007
+#define regGCVM_L2_CNTL4_DEFAULT 0x000000c1
+#define regGCVM_L2_CNTL5_DEFAULT 0x00003fe0
+
+static const char * const gfxhub_client_ids[] = {
+ "CB/DB",
+ "Reserved",
+ "GE1",
+ "GE2",
+ "CPF",
+ "CPC",
+ "CPG",
+ "RLC",
+ "TCP",
+ "SQC (inst)",
+ "SQC (data)",
+ "SQG",
+ "Reserved",
+ "SDMA0",
+ "SDMA1",
+ "GCR",
+ "SDMA2",
+ "SDMA3",
+};
+
+static uint32_t gfxhub_v3_0_3_get_invalidate_req(unsigned int vmid,
+ uint32_t flush_type)
+{
+ u32 req = 0;
+
+ /* invalidate using legacy mode on vmid*/
+ req = REG_SET_FIELD(req, GCVM_INVALIDATE_ENG0_REQ,
+ PER_VMID_INVALIDATE_REQ, 1 << vmid);
+ req = REG_SET_FIELD(req, GCVM_INVALIDATE_ENG0_REQ, FLUSH_TYPE, flush_type);
+ req = REG_SET_FIELD(req, GCVM_INVALIDATE_ENG0_REQ, INVALIDATE_L2_PTES, 1);
+ req = REG_SET_FIELD(req, GCVM_INVALIDATE_ENG0_REQ, INVALIDATE_L2_PDE0, 1);
+ req = REG_SET_FIELD(req, GCVM_INVALIDATE_ENG0_REQ, INVALIDATE_L2_PDE1, 1);
+ req = REG_SET_FIELD(req, GCVM_INVALIDATE_ENG0_REQ, INVALIDATE_L2_PDE2, 1);
+ req = REG_SET_FIELD(req, GCVM_INVALIDATE_ENG0_REQ, INVALIDATE_L1_PTES, 1);
+ req = REG_SET_FIELD(req, GCVM_INVALIDATE_ENG0_REQ,
+ CLEAR_PROTECTION_FAULT_STATUS_ADDR, 0);
+
+ return req;
+}
+
+static void
+gfxhub_v3_0_3_print_l2_protection_fault_status(struct amdgpu_device *adev,
+ uint32_t status)
+{
+ u32 cid = REG_GET_FIELD(status,
+ GCVM_L2_PROTECTION_FAULT_STATUS, CID);
+
+ dev_err(adev->dev,
+ "GCVM_L2_PROTECTION_FAULT_STATUS:0x%08X\n",
+ status);
+ dev_err(adev->dev, "\t Faulty UTCL2 client ID: %s (0x%x)\n",
+ cid >= ARRAY_SIZE(gfxhub_client_ids) ? "unknown" : gfxhub_client_ids[cid],
+ cid);
+ dev_err(adev->dev, "\t MORE_FAULTS: 0x%lx\n",
+ REG_GET_FIELD(status,
+ GCVM_L2_PROTECTION_FAULT_STATUS, MORE_FAULTS));
+ dev_err(adev->dev, "\t WALKER_ERROR: 0x%lx\n",
+ REG_GET_FIELD(status,
+ GCVM_L2_PROTECTION_FAULT_STATUS, WALKER_ERROR));
+ dev_err(adev->dev, "\t PERMISSION_FAULTS: 0x%lx\n",
+ REG_GET_FIELD(status,
+ GCVM_L2_PROTECTION_FAULT_STATUS, PERMISSION_FAULTS));
+ dev_err(adev->dev, "\t MAPPING_ERROR: 0x%lx\n",
+ REG_GET_FIELD(status,
+ GCVM_L2_PROTECTION_FAULT_STATUS, MAPPING_ERROR));
+ dev_err(adev->dev, "\t RW: 0x%lx\n",
+ REG_GET_FIELD(status,
+ GCVM_L2_PROTECTION_FAULT_STATUS, RW));
+}
+
+static u64 gfxhub_v3_0_3_get_fb_location(struct amdgpu_device *adev)
+{
+ u64 base = RREG32_SOC15(GC, 0, regGCMC_VM_FB_LOCATION_BASE);
+
+ base &= GCMC_VM_FB_LOCATION_BASE__FB_BASE_MASK;
+ base <<= 24;
+
+ return base;
+}
+
+static u64 gfxhub_v3_0_3_get_mc_fb_offset(struct amdgpu_device *adev)
+{
+ return (u64)RREG32_SOC15(GC, 0, regGCMC_VM_FB_OFFSET) << 24;
+}
+
+static void gfxhub_v3_0_3_setup_vm_pt_regs(struct amdgpu_device *adev, uint32_t vmid,
+ uint64_t page_table_base)
+{
+ struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_GFXHUB(0)];
+
+ WREG32_SOC15_OFFSET(GC, 0, regGCVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_LO32,
+ hub->ctx_addr_distance * vmid,
+ lower_32_bits(page_table_base));
+
+ WREG32_SOC15_OFFSET(GC, 0, regGCVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_HI32,
+ hub->ctx_addr_distance * vmid,
+ upper_32_bits(page_table_base));
+}
+
+static void gfxhub_v3_0_3_init_gart_aperture_regs(struct amdgpu_device *adev)
+{
+ uint64_t pt_base = amdgpu_gmc_pd_addr(adev->gart.bo);
+
+ gfxhub_v3_0_3_setup_vm_pt_regs(adev, 0, pt_base);
+
+ WREG32_SOC15(GC, 0, regGCVM_CONTEXT0_PAGE_TABLE_START_ADDR_LO32,
+ (u32)(adev->gmc.gart_start >> 12));
+ WREG32_SOC15(GC, 0, regGCVM_CONTEXT0_PAGE_TABLE_START_ADDR_HI32,
+ (u32)(adev->gmc.gart_start >> 44));
+
+ WREG32_SOC15(GC, 0, regGCVM_CONTEXT0_PAGE_TABLE_END_ADDR_LO32,
+ (u32)(adev->gmc.gart_end >> 12));
+ WREG32_SOC15(GC, 0, regGCVM_CONTEXT0_PAGE_TABLE_END_ADDR_HI32,
+ (u32)(adev->gmc.gart_end >> 44));
+}
+
+static void gfxhub_v3_0_3_init_system_aperture_regs(struct amdgpu_device *adev)
+{
+ uint64_t value;
+
+ if (amdgpu_sriov_vf(adev))
+ return;
+
+ /* Disable AGP. */
+ WREG32_SOC15(GC, 0, regGCMC_VM_AGP_BASE, 0);
+ WREG32_SOC15(GC, 0, regGCMC_VM_AGP_BOT, adev->gmc.agp_start >> 24);
+ WREG32_SOC15(GC, 0, regGCMC_VM_AGP_TOP, adev->gmc.agp_end >> 24);
+
+ /* Program the system aperture low logical page number. */
+ WREG32_SOC15(GC, 0, regGCMC_VM_SYSTEM_APERTURE_LOW_ADDR,
+ min(adev->gmc.fb_start, adev->gmc.agp_start) >> 18);
+ WREG32_SOC15(GC, 0, regGCMC_VM_SYSTEM_APERTURE_HIGH_ADDR,
+ max(adev->gmc.fb_end, adev->gmc.agp_end) >> 18);
+
+ /* Set default page address. */
+ value = amdgpu_gmc_vram_mc2pa(adev, adev->mem_scratch.gpu_addr);
+ WREG32_SOC15(GC, 0, regGCMC_VM_SYSTEM_APERTURE_DEFAULT_ADDR_LSB,
+ (u32)(value >> 12));
+ WREG32_SOC15(GC, 0, regGCMC_VM_SYSTEM_APERTURE_DEFAULT_ADDR_MSB,
+ (u32)(value >> 44));
+
+ /* Program "protection fault". */
+ WREG32_SOC15(GC, 0, regGCVM_L2_PROTECTION_FAULT_DEFAULT_ADDR_LO32,
+ (u32)(adev->dummy_page_addr >> 12));
+ WREG32_SOC15(GC, 0, regGCVM_L2_PROTECTION_FAULT_DEFAULT_ADDR_HI32,
+ (u32)((u64)adev->dummy_page_addr >> 44));
+
+ WREG32_FIELD15_PREREG(GC, 0, GCVM_L2_PROTECTION_FAULT_CNTL2,
+ ACTIVE_PAGE_MIGRATION_PTE_READ_RETRY, 1);
+}
+
+
+static void gfxhub_v3_0_3_init_tlb_regs(struct amdgpu_device *adev)
+{
+ uint32_t tmp;
+
+ /* Setup TLB control */
+ tmp = RREG32_SOC15(GC, 0, regGCMC_VM_MX_L1_TLB_CNTL);
+
+ tmp = REG_SET_FIELD(tmp, GCMC_VM_MX_L1_TLB_CNTL, ENABLE_L1_TLB, 1);
+ tmp = REG_SET_FIELD(tmp, GCMC_VM_MX_L1_TLB_CNTL, SYSTEM_ACCESS_MODE, 3);
+ tmp = REG_SET_FIELD(tmp, GCMC_VM_MX_L1_TLB_CNTL,
+ ENABLE_ADVANCED_DRIVER_MODEL, 1);
+ tmp = REG_SET_FIELD(tmp, GCMC_VM_MX_L1_TLB_CNTL,
+ SYSTEM_APERTURE_UNMAPPED_ACCESS, 0);
+ tmp = REG_SET_FIELD(tmp, GCMC_VM_MX_L1_TLB_CNTL, ECO_BITS, 0);
+ tmp = REG_SET_FIELD(tmp, GCMC_VM_MX_L1_TLB_CNTL,
+ MTYPE, MTYPE_UC); /* UC, uncached */
+
+ WREG32_SOC15(GC, 0, regGCMC_VM_MX_L1_TLB_CNTL, tmp);
+}
+
+static void gfxhub_v3_0_3_init_cache_regs(struct amdgpu_device *adev)
+{
+ uint32_t tmp;
+
+ /* These registers are not accessible to VF-SRIOV.
+ * The PF will program them instead.
+ */
+ if (amdgpu_sriov_vf(adev))
+ return;
+
+ /* Setup L2 cache */
+ tmp = RREG32_SOC15(GC, 0, regGCVM_L2_CNTL);
+ tmp = REG_SET_FIELD(tmp, GCVM_L2_CNTL, ENABLE_L2_CACHE, 1);
+ tmp = REG_SET_FIELD(tmp, GCVM_L2_CNTL, ENABLE_L2_FRAGMENT_PROCESSING, 0);
+ tmp = REG_SET_FIELD(tmp, GCVM_L2_CNTL,
+ ENABLE_DEFAULT_PAGE_OUT_TO_SYSTEM_MEMORY, 1);
+ /* XXX for emulation, Refer to closed source code.*/
+ tmp = REG_SET_FIELD(tmp, GCVM_L2_CNTL,
+ L2_PDE0_CACHE_TAG_GENERATION_MODE, 0);
+ tmp = REG_SET_FIELD(tmp, GCVM_L2_CNTL, PDE_FAULT_CLASSIFICATION, 0);
+ tmp = REG_SET_FIELD(tmp, GCVM_L2_CNTL, CONTEXT1_IDENTITY_ACCESS_MODE, 1);
+ tmp = REG_SET_FIELD(tmp, GCVM_L2_CNTL, IDENTITY_MODE_FRAGMENT_SIZE, 0);
+ WREG32_SOC15(GC, 0, regGCVM_L2_CNTL, tmp);
+
+ tmp = RREG32_SOC15(GC, 0, regGCVM_L2_CNTL2);
+ tmp = REG_SET_FIELD(tmp, GCVM_L2_CNTL2, INVALIDATE_ALL_L1_TLBS, 1);
+ tmp = REG_SET_FIELD(tmp, GCVM_L2_CNTL2, INVALIDATE_L2_CACHE, 1);
+ WREG32_SOC15(GC, 0, regGCVM_L2_CNTL2, tmp);
+
+ tmp = regGCVM_L2_CNTL3_DEFAULT;
+ if (adev->gmc.translate_further) {
+ tmp = REG_SET_FIELD(tmp, GCVM_L2_CNTL3, BANK_SELECT, 12);
+ tmp = REG_SET_FIELD(tmp, GCVM_L2_CNTL3,
+ L2_CACHE_BIGK_FRAGMENT_SIZE, 9);
+ } else {
+ tmp = REG_SET_FIELD(tmp, GCVM_L2_CNTL3, BANK_SELECT, 9);
+ tmp = REG_SET_FIELD(tmp, GCVM_L2_CNTL3,
+ L2_CACHE_BIGK_FRAGMENT_SIZE, 6);
+ }
+ WREG32_SOC15(GC, 0, regGCVM_L2_CNTL3, tmp);
+
+ tmp = regGCVM_L2_CNTL4_DEFAULT;
+ tmp = REG_SET_FIELD(tmp, GCVM_L2_CNTL4, VMC_TAP_PDE_REQUEST_PHYSICAL, 0);
+ tmp = REG_SET_FIELD(tmp, GCVM_L2_CNTL4, VMC_TAP_PTE_REQUEST_PHYSICAL, 0);
+ WREG32_SOC15(GC, 0, regGCVM_L2_CNTL4, tmp);
+
+ tmp = regGCVM_L2_CNTL5_DEFAULT;
+ tmp = REG_SET_FIELD(tmp, GCVM_L2_CNTL5, L2_CACHE_SMALLK_FRAGMENT_SIZE, 0);
+ WREG32_SOC15(GC, 0, regGCVM_L2_CNTL5, tmp);
+}
+
+static void gfxhub_v3_0_3_enable_system_domain(struct amdgpu_device *adev)
+{
+ uint32_t tmp;
+
+ tmp = RREG32_SOC15(GC, 0, regGCVM_CONTEXT0_CNTL);
+ tmp = REG_SET_FIELD(tmp, GCVM_CONTEXT0_CNTL, ENABLE_CONTEXT, 1);
+ tmp = REG_SET_FIELD(tmp, GCVM_CONTEXT0_CNTL, PAGE_TABLE_DEPTH, 0);
+ tmp = REG_SET_FIELD(tmp, GCVM_CONTEXT0_CNTL,
+ RETRY_PERMISSION_OR_INVALID_PAGE_FAULT, 0);
+ WREG32_SOC15(GC, 0, regGCVM_CONTEXT0_CNTL, tmp);
+}
+
+static void gfxhub_v3_0_3_disable_identity_aperture(struct amdgpu_device *adev)
+{
+ /* These registers are not accessible to VF-SRIOV.
+ * The PF will program them instead.
+ */
+ if (amdgpu_sriov_vf(adev))
+ return;
+
+ WREG32_SOC15(GC, 0, regGCVM_L2_CONTEXT1_IDENTITY_APERTURE_LOW_ADDR_LO32,
+ 0xFFFFFFFF);
+ WREG32_SOC15(GC, 0, regGCVM_L2_CONTEXT1_IDENTITY_APERTURE_LOW_ADDR_HI32,
+ 0x0000000F);
+
+ WREG32_SOC15(GC, 0, regGCVM_L2_CONTEXT1_IDENTITY_APERTURE_HIGH_ADDR_LO32,
+ 0);
+ WREG32_SOC15(GC, 0, regGCVM_L2_CONTEXT1_IDENTITY_APERTURE_HIGH_ADDR_HI32,
+ 0);
+
+ WREG32_SOC15(GC, 0, regGCVM_L2_CONTEXT_IDENTITY_PHYSICAL_OFFSET_LO32, 0);
+ WREG32_SOC15(GC, 0, regGCVM_L2_CONTEXT_IDENTITY_PHYSICAL_OFFSET_HI32, 0);
+
+}
+
+static void gfxhub_v3_0_3_setup_vmid_config(struct amdgpu_device *adev)
+{
+ struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_GFXHUB(0)];
+ int i;
+ uint32_t tmp;
+
+ for (i = 0; i <= 14; i++) {
+ tmp = RREG32_SOC15_OFFSET(GC, 0, regGCVM_CONTEXT1_CNTL, i * hub->ctx_distance);
+ tmp = REG_SET_FIELD(tmp, GCVM_CONTEXT1_CNTL, ENABLE_CONTEXT, 1);
+ tmp = REG_SET_FIELD(tmp, GCVM_CONTEXT1_CNTL, PAGE_TABLE_DEPTH,
+ adev->vm_manager.num_level);
+ tmp = REG_SET_FIELD(tmp, GCVM_CONTEXT1_CNTL,
+ RANGE_PROTECTION_FAULT_ENABLE_DEFAULT, 1);
+ tmp = REG_SET_FIELD(tmp, GCVM_CONTEXT1_CNTL,
+ DUMMY_PAGE_PROTECTION_FAULT_ENABLE_DEFAULT, 1);
+ tmp = REG_SET_FIELD(tmp, GCVM_CONTEXT1_CNTL,
+ PDE0_PROTECTION_FAULT_ENABLE_DEFAULT, 1);
+ tmp = REG_SET_FIELD(tmp, GCVM_CONTEXT1_CNTL,
+ VALID_PROTECTION_FAULT_ENABLE_DEFAULT, 1);
+ tmp = REG_SET_FIELD(tmp, GCVM_CONTEXT1_CNTL,
+ READ_PROTECTION_FAULT_ENABLE_DEFAULT, 1);
+ tmp = REG_SET_FIELD(tmp, GCVM_CONTEXT1_CNTL,
+ WRITE_PROTECTION_FAULT_ENABLE_DEFAULT, 1);
+ tmp = REG_SET_FIELD(tmp, GCVM_CONTEXT1_CNTL,
+ EXECUTE_PROTECTION_FAULT_ENABLE_DEFAULT, 1);
+ tmp = REG_SET_FIELD(tmp, GCVM_CONTEXT1_CNTL,
+ PAGE_TABLE_BLOCK_SIZE,
+ adev->vm_manager.block_size - 9);
+ /* Send no-retry XNACK on fault to suppress VM fault storm. */
+ tmp = REG_SET_FIELD(tmp, GCVM_CONTEXT1_CNTL,
+ RETRY_PERMISSION_OR_INVALID_PAGE_FAULT,
+ !amdgpu_noretry);
+ WREG32_SOC15_OFFSET(GC, 0, regGCVM_CONTEXT1_CNTL,
+ i * hub->ctx_distance, tmp);
+ WREG32_SOC15_OFFSET(GC, 0, regGCVM_CONTEXT1_PAGE_TABLE_START_ADDR_LO32,
+ i * hub->ctx_addr_distance, 0);
+ WREG32_SOC15_OFFSET(GC, 0, regGCVM_CONTEXT1_PAGE_TABLE_START_ADDR_HI32,
+ i * hub->ctx_addr_distance, 0);
+ WREG32_SOC15_OFFSET(GC, 0, regGCVM_CONTEXT1_PAGE_TABLE_END_ADDR_LO32,
+ i * hub->ctx_addr_distance,
+ lower_32_bits(adev->vm_manager.max_pfn - 1));
+ WREG32_SOC15_OFFSET(GC, 0, regGCVM_CONTEXT1_PAGE_TABLE_END_ADDR_HI32,
+ i * hub->ctx_addr_distance,
+ upper_32_bits(adev->vm_manager.max_pfn - 1));
+ }
+
+ hub->vm_cntx_cntl = tmp;
+}
+
+static void gfxhub_v3_0_3_program_invalidation(struct amdgpu_device *adev)
+{
+ struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_GFXHUB(0)];
+ unsigned int i;
+
+ for (i = 0 ; i < 18; ++i) {
+ WREG32_SOC15_OFFSET(GC, 0, regGCVM_INVALIDATE_ENG0_ADDR_RANGE_LO32,
+ i * hub->eng_addr_distance, 0xffffffff);
+ WREG32_SOC15_OFFSET(GC, 0, regGCVM_INVALIDATE_ENG0_ADDR_RANGE_HI32,
+ i * hub->eng_addr_distance, 0x1f);
+ }
+}
+
+static int gfxhub_v3_0_3_gart_enable(struct amdgpu_device *adev)
+{
+ /* GART Enable. */
+ gfxhub_v3_0_3_init_gart_aperture_regs(adev);
+ gfxhub_v3_0_3_init_system_aperture_regs(adev);
+ gfxhub_v3_0_3_init_tlb_regs(adev);
+ gfxhub_v3_0_3_init_cache_regs(adev);
+
+ gfxhub_v3_0_3_enable_system_domain(adev);
+ gfxhub_v3_0_3_disable_identity_aperture(adev);
+ gfxhub_v3_0_3_setup_vmid_config(adev);
+ gfxhub_v3_0_3_program_invalidation(adev);
+
+ return 0;
+}
+
+static void gfxhub_v3_0_3_gart_disable(struct amdgpu_device *adev)
+{
+ struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_GFXHUB(0)];
+ u32 tmp;
+ u32 i;
+
+ /* Disable all tables */
+ for (i = 0; i < 16; i++)
+ WREG32_SOC15_OFFSET(GC, 0, regGCVM_CONTEXT0_CNTL,
+ i * hub->ctx_distance, 0);
+
+ /* Setup TLB control */
+ tmp = RREG32_SOC15(GC, 0, regGCMC_VM_MX_L1_TLB_CNTL);
+ tmp = REG_SET_FIELD(tmp, GCMC_VM_MX_L1_TLB_CNTL, ENABLE_L1_TLB, 0);
+ tmp = REG_SET_FIELD(tmp, GCMC_VM_MX_L1_TLB_CNTL,
+ ENABLE_ADVANCED_DRIVER_MODEL, 0);
+ WREG32_SOC15(GC, 0, regGCMC_VM_MX_L1_TLB_CNTL, tmp);
+
+ /* Setup L2 cache */
+ WREG32_FIELD15_PREREG(GC, 0, GCVM_L2_CNTL, ENABLE_L2_CACHE, 0);
+ WREG32_SOC15(GC, 0, regGCVM_L2_CNTL3, 0);
+}
+
+/**
+ * gfxhub_v3_0_3_set_fault_enable_default - update GART/VM fault handling
+ *
+ * @adev: amdgpu_device pointer
+ * @value: true redirects VM faults to the default page
+ */
+static void gfxhub_v3_0_3_set_fault_enable_default(struct amdgpu_device *adev,
+ bool value)
+{
+ u32 tmp;
+
+ /* These registers are not accessible to VF-SRIOV.
+ * The PF will program them instead.
+ */
+ if (amdgpu_sriov_vf(adev))
+ return;
+
+ tmp = RREG32_SOC15(GC, 0, regGCVM_L2_PROTECTION_FAULT_CNTL);
+ tmp = REG_SET_FIELD(tmp, GCVM_L2_PROTECTION_FAULT_CNTL,
+ RANGE_PROTECTION_FAULT_ENABLE_DEFAULT, value);
+ tmp = REG_SET_FIELD(tmp, GCVM_L2_PROTECTION_FAULT_CNTL,
+ PDE0_PROTECTION_FAULT_ENABLE_DEFAULT, value);
+ tmp = REG_SET_FIELD(tmp, GCVM_L2_PROTECTION_FAULT_CNTL,
+ PDE1_PROTECTION_FAULT_ENABLE_DEFAULT, value);
+ tmp = REG_SET_FIELD(tmp, GCVM_L2_PROTECTION_FAULT_CNTL,
+ PDE2_PROTECTION_FAULT_ENABLE_DEFAULT, value);
+ tmp = REG_SET_FIELD(tmp, GCVM_L2_PROTECTION_FAULT_CNTL,
+ TRANSLATE_FURTHER_PROTECTION_FAULT_ENABLE_DEFAULT,
+ value);
+ tmp = REG_SET_FIELD(tmp, GCVM_L2_PROTECTION_FAULT_CNTL,
+ NACK_PROTECTION_FAULT_ENABLE_DEFAULT, value);
+ tmp = REG_SET_FIELD(tmp, GCVM_L2_PROTECTION_FAULT_CNTL,
+ DUMMY_PAGE_PROTECTION_FAULT_ENABLE_DEFAULT, value);
+ tmp = REG_SET_FIELD(tmp, GCVM_L2_PROTECTION_FAULT_CNTL,
+ VALID_PROTECTION_FAULT_ENABLE_DEFAULT, value);
+ tmp = REG_SET_FIELD(tmp, GCVM_L2_PROTECTION_FAULT_CNTL,
+ READ_PROTECTION_FAULT_ENABLE_DEFAULT, value);
+ tmp = REG_SET_FIELD(tmp, GCVM_L2_PROTECTION_FAULT_CNTL,
+ WRITE_PROTECTION_FAULT_ENABLE_DEFAULT, value);
+ tmp = REG_SET_FIELD(tmp, GCVM_L2_PROTECTION_FAULT_CNTL,
+ EXECUTE_PROTECTION_FAULT_ENABLE_DEFAULT, value);
+ if (!value) {
+ tmp = REG_SET_FIELD(tmp, GCVM_L2_PROTECTION_FAULT_CNTL,
+ CRASH_ON_NO_RETRY_FAULT, 1);
+ tmp = REG_SET_FIELD(tmp, GCVM_L2_PROTECTION_FAULT_CNTL,
+ CRASH_ON_RETRY_FAULT, 1);
+ }
+ WREG32_SOC15(GC, 0, regGCVM_L2_PROTECTION_FAULT_CNTL, tmp);
+}
+
+static const struct amdgpu_vmhub_funcs gfxhub_v3_0_3_vmhub_funcs = {
+ .print_l2_protection_fault_status = gfxhub_v3_0_3_print_l2_protection_fault_status,
+ .get_invalidate_req = gfxhub_v3_0_3_get_invalidate_req,
+};
+
+static void gfxhub_v3_0_3_init(struct amdgpu_device *adev)
+{
+ struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_GFXHUB(0)];
+
+ hub->ctx0_ptb_addr_lo32 =
+ SOC15_REG_OFFSET(GC, 0,
+ regGCVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_LO32);
+ hub->ctx0_ptb_addr_hi32 =
+ SOC15_REG_OFFSET(GC, 0,
+ regGCVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_HI32);
+ hub->vm_inv_eng0_sem =
+ SOC15_REG_OFFSET(GC, 0, regGCVM_INVALIDATE_ENG0_SEM);
+ hub->vm_inv_eng0_req =
+ SOC15_REG_OFFSET(GC, 0, regGCVM_INVALIDATE_ENG0_REQ);
+ hub->vm_inv_eng0_ack =
+ SOC15_REG_OFFSET(GC, 0, regGCVM_INVALIDATE_ENG0_ACK);
+ hub->vm_context0_cntl =
+ SOC15_REG_OFFSET(GC, 0, regGCVM_CONTEXT0_CNTL);
+ hub->vm_l2_pro_fault_status =
+ SOC15_REG_OFFSET(GC, 0, regGCVM_L2_PROTECTION_FAULT_STATUS);
+ hub->vm_l2_pro_fault_cntl =
+ SOC15_REG_OFFSET(GC, 0, regGCVM_L2_PROTECTION_FAULT_CNTL);
+
+ hub->ctx_distance = regGCVM_CONTEXT1_CNTL - regGCVM_CONTEXT0_CNTL;
+ hub->ctx_addr_distance = regGCVM_CONTEXT1_PAGE_TABLE_BASE_ADDR_LO32 -
+ regGCVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_LO32;
+ hub->eng_distance = regGCVM_INVALIDATE_ENG1_REQ -
+ regGCVM_INVALIDATE_ENG0_REQ;
+ hub->eng_addr_distance = regGCVM_INVALIDATE_ENG1_ADDR_RANGE_LO32 -
+ regGCVM_INVALIDATE_ENG0_ADDR_RANGE_LO32;
+
+ hub->vm_cntx_cntl_vm_fault = GCVM_CONTEXT1_CNTL__RANGE_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK |
+ GCVM_CONTEXT1_CNTL__DUMMY_PAGE_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK |
+ GCVM_CONTEXT1_CNTL__PDE0_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK |
+ GCVM_CONTEXT1_CNTL__VALID_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK |
+ GCVM_CONTEXT1_CNTL__READ_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK |
+ GCVM_CONTEXT1_CNTL__WRITE_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK |
+ GCVM_CONTEXT1_CNTL__EXECUTE_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK;
+
+ hub->vmhub_funcs = &gfxhub_v3_0_3_vmhub_funcs;
+}
+
+const struct amdgpu_gfxhub_funcs gfxhub_v3_0_3_funcs = {
+ .get_fb_location = gfxhub_v3_0_3_get_fb_location,
+ .get_mc_fb_offset = gfxhub_v3_0_3_get_mc_fb_offset,
+ .setup_vm_pt_regs = gfxhub_v3_0_3_setup_vm_pt_regs,
+ .gart_enable = gfxhub_v3_0_3_gart_enable,
+ .gart_disable = gfxhub_v3_0_3_gart_disable,
+ .set_fault_enable_default = gfxhub_v3_0_3_set_fault_enable_default,
+ .init = gfxhub_v3_0_3_init,
+};
diff --git a/drivers/gpu/drm/amd/amdgpu/gfxhub_v3_0_3.h b/drivers/gpu/drm/amd/amdgpu/gfxhub_v3_0_3.h
new file mode 100644
index 000000000000..6153bd5e3083
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/gfxhub_v3_0_3.h
@@ -0,0 +1,29 @@
+/*
+ * Copyright 2022 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#ifndef __GFXHUB_V3_0_3_H__
+#define __GFXHUB_V3_0_3_H__
+
+extern const struct amdgpu_gfxhub_funcs gfxhub_v3_0_3_funcs;
+
+#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
index 3b7c6c31fce1..ce6e04242c52 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
@@ -22,6 +22,9 @@
*/
#include <linux/firmware.h>
#include <linux/pci.h>
+
+#include <drm/drm_cache.h>
+
#include "amdgpu.h"
#include "amdgpu_atomfirmware.h"
#include "gmc_v10_0.h"
@@ -48,16 +51,9 @@
#include "athub_v2_0.h"
#include "athub_v2_1.h"
-#if 0
-static const struct soc15_reg_golden golden_settings_navi10_hdp[] =
-{
- /* TODO add golden setting for hdp */
-};
-#endif
-
static int gmc_v10_0_ecc_interrupt_state(struct amdgpu_device *adev,
struct amdgpu_irq_src *src,
- unsigned type,
+ unsigned int type,
enum amdgpu_interrupt_state state)
{
return 0;
@@ -65,21 +61,33 @@ static int gmc_v10_0_ecc_interrupt_state(struct amdgpu_device *adev,
static int
gmc_v10_0_vm_fault_interrupt_state(struct amdgpu_device *adev,
- struct amdgpu_irq_src *src, unsigned type,
+ struct amdgpu_irq_src *src, unsigned int type,
enum amdgpu_interrupt_state state)
{
switch (state) {
case AMDGPU_IRQ_STATE_DISABLE:
/* MM HUB */
- amdgpu_gmc_set_vm_fault_masks(adev, AMDGPU_MMHUB_0, false);
+ amdgpu_gmc_set_vm_fault_masks(adev, AMDGPU_MMHUB0(0), false);
/* GFX HUB */
- amdgpu_gmc_set_vm_fault_masks(adev, AMDGPU_GFXHUB_0, false);
+ /* This works because this interrupt is only
+ * enabled at init/resume and disabled in
+ * fini/suspend, so the overall state doesn't
+ * change over the course of suspend/resume.
+ */
+ if (!adev->in_s0ix)
+ amdgpu_gmc_set_vm_fault_masks(adev, AMDGPU_GFXHUB(0), false);
break;
case AMDGPU_IRQ_STATE_ENABLE:
/* MM HUB */
- amdgpu_gmc_set_vm_fault_masks(adev, AMDGPU_MMHUB_0, true);
+ amdgpu_gmc_set_vm_fault_masks(adev, AMDGPU_MMHUB0(0), true);
/* GFX HUB */
- amdgpu_gmc_set_vm_fault_masks(adev, AMDGPU_GFXHUB_0, true);
+ /* This works because this interrupt is only
+ * enabled at init/resume and disabled in
+ * fini/suspend, so the overall state doesn't
+ * change over the course of suspend/resume.
+ */
+ if (!adev->in_s0ix)
+ amdgpu_gmc_set_vm_fault_masks(adev, AMDGPU_GFXHUB(0), true);
break;
default:
break;
@@ -92,9 +100,14 @@ static int gmc_v10_0_process_interrupt(struct amdgpu_device *adev,
struct amdgpu_irq_src *source,
struct amdgpu_iv_entry *entry)
{
- bool retry_fault = !!(entry->src_data[1] & 0x80);
- struct amdgpu_vmhub *hub = &adev->vmhub[entry->vmid_src];
- struct amdgpu_task_info task_info;
+ uint32_t vmhub_index = entry->client_id == SOC15_IH_CLIENTID_VMC ?
+ AMDGPU_MMHUB0(0) : AMDGPU_GFXHUB(0);
+ struct amdgpu_vmhub *hub = &adev->vmhub[vmhub_index];
+ bool retry_fault = !!(entry->src_data[1] &
+ AMDGPU_GMC9_FAULT_SOURCE_DATA_RETRY);
+ bool write_fault = !!(entry->src_data[1] &
+ AMDGPU_GMC9_FAULT_SOURCE_DATA_WRITE);
+ struct amdgpu_task_info *task_info;
uint32_t status = 0;
u64 addr;
@@ -106,14 +119,14 @@ static int gmc_v10_0_process_interrupt(struct amdgpu_device *adev,
/* Process it onyl if it's the first fault for this address */
if (entry->ih != &adev->irq.ih_soft &&
- amdgpu_gmc_filter_faults(adev, addr, entry->pasid,
+ amdgpu_gmc_filter_faults(adev, entry->ih, addr, entry->pasid,
entry->timestamp))
return 1;
/* Delegate it to a different ring if the hardware hasn't
* already done it.
*/
- if (in_interrupt()) {
+ if (entry->ih == &adev->irq.ih) {
amdgpu_irq_delegate(adev, entry, 8);
return 1;
}
@@ -121,7 +134,8 @@ static int gmc_v10_0_process_interrupt(struct amdgpu_device *adev,
/* Try to handle the recoverable page faults by filling page
* tables
*/
- if (amdgpu_vm_handle_fault(adev, entry->pasid, addr))
+ if (amdgpu_vm_handle_fault(adev, entry->pasid, 0, 0, addr,
+ entry->timestamp, write_fault))
return 1;
}
@@ -131,31 +145,39 @@ static int gmc_v10_0_process_interrupt(struct amdgpu_device *adev,
* be updated to avoid reading an incorrect value due to
* the new fast GRBM interface.
*/
- if ((entry->vmid_src == AMDGPU_GFXHUB_0) &&
- (adev->asic_type < CHIP_SIENNA_CICHLID))
+ if ((entry->vmid_src == AMDGPU_GFXHUB(0)) &&
+ (amdgpu_ip_version(adev, GC_HWIP, 0) <
+ IP_VERSION(10, 3, 0)))
RREG32(hub->vm_l2_pro_fault_status);
status = RREG32(hub->vm_l2_pro_fault_status);
WREG32_P(hub->vm_l2_pro_fault_cntl, 1, ~1);
+
+ amdgpu_vm_update_fault_cache(adev, entry->pasid, addr, status,
+ entry->vmid_src ? AMDGPU_MMHUB0(0) : AMDGPU_GFXHUB(0));
}
if (!printk_ratelimit())
return 0;
- memset(&task_info, 0, sizeof(struct amdgpu_task_info));
- amdgpu_vm_get_task_info(adev, entry->pasid, &task_info);
-
dev_err(adev->dev,
- "[%s] page fault (src_id:%u ring:%u vmid:%u pasid:%u, "
- "for process %s pid %d thread %s pid %d)\n",
+ "[%s] page fault (src_id:%u ring:%u vmid:%u pasid:%u)\n",
entry->vmid_src ? "mmhub" : "gfxhub",
- entry->src_id, entry->ring_id, entry->vmid,
- entry->pasid, task_info.process_name, task_info.tgid,
- task_info.task_name, task_info.pid);
- dev_err(adev->dev, " in page starting at address 0x%012llx from client %d\n",
- addr, entry->client_id);
+ entry->src_id, entry->ring_id, entry->vmid, entry->pasid);
+ task_info = amdgpu_vm_get_task_info_pasid(adev, entry->pasid);
+ if (task_info) {
+ amdgpu_vm_print_task_info(adev, task_info);
+ amdgpu_vm_put_task_info(task_info);
+ }
- if (!amdgpu_sriov_vf(adev))
+ dev_err(adev->dev, " in page starting at address 0x%016llx from client 0x%x (%s)\n",
+ addr, entry->client_id,
+ soc15_ih_clientid_name[entry->client_id]);
+
+ /* Only print L2 fault status if the status register could be read and
+ * contains useful information
+ */
+ if (status != 0)
hub->vmhub_funcs->print_l2_protection_fault_status(adev,
status);
@@ -193,8 +215,7 @@ static void gmc_v10_0_set_irq_funcs(struct amdgpu_device *adev)
static bool gmc_v10_0_use_invalidate_semaphore(struct amdgpu_device *adev,
uint32_t vmhub)
{
- return ((vmhub == AMDGPU_MMHUB_0 ||
- vmhub == AMDGPU_MMHUB_1) &&
+ return ((vmhub == AMDGPU_MMHUB0(0)) &&
(!amdgpu_sriov_vf(adev)));
}
@@ -218,16 +239,48 @@ static bool gmc_v10_0_get_atc_vmid_pasid_mapping_info(
* by the amdgpu vm/hsa code.
*/
-static void gmc_v10_0_flush_vm_hub(struct amdgpu_device *adev, uint32_t vmid,
- unsigned int vmhub, uint32_t flush_type)
+/**
+ * gmc_v10_0_flush_gpu_tlb - gart tlb flush callback
+ *
+ * @adev: amdgpu_device pointer
+ * @vmid: vm instance to flush
+ * @vmhub: vmhub type
+ * @flush_type: the flush type
+ *
+ * Flush the TLB for the requested page table.
+ */
+static void gmc_v10_0_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid,
+ uint32_t vmhub, uint32_t flush_type)
{
bool use_semaphore = gmc_v10_0_use_invalidate_semaphore(adev, vmhub);
struct amdgpu_vmhub *hub = &adev->vmhub[vmhub];
u32 inv_req = hub->vmhub_funcs->get_invalidate_req(vmid, flush_type);
- u32 tmp;
/* Use register 17 for GART */
- const unsigned eng = 17;
+ const unsigned int eng = 17;
+ unsigned char hub_ip = 0;
+ u32 sem, req, ack;
unsigned int i;
+ u32 tmp;
+
+ sem = hub->vm_inv_eng0_sem + hub->eng_distance * eng;
+ req = hub->vm_inv_eng0_req + hub->eng_distance * eng;
+ ack = hub->vm_inv_eng0_ack + hub->eng_distance * eng;
+
+ /* flush hdp cache */
+ amdgpu_device_flush_hdp(adev, NULL);
+
+ /* This is necessary for SRIOV as well as for GFXOFF to function
+ * properly under bare metal
+ */
+ if (adev->gfx.kiq[0].ring.sched.ready && !adev->enable_mes &&
+ (amdgpu_sriov_runtime(adev) || !amdgpu_sriov_vf(adev))) {
+ amdgpu_gmc_fw_reg_write_reg_wait(adev, req, ack, inv_req,
+ 1 << vmid, GET_INST(GC, 0));
+ return;
+ }
+
+ /* This path is needed before KIQ/MES/GFXOFF are set up */
+ hub_ip = (vmhub == AMDGPU_GFXHUB(0)) ? GC_HWIP : MMHUB_HWIP;
spin_lock(&adev->gmc.invalidate_lock);
/*
@@ -241,8 +294,7 @@ static void gmc_v10_0_flush_vm_hub(struct amdgpu_device *adev, uint32_t vmid,
if (use_semaphore) {
for (i = 0; i < adev->usec_timeout; i++) {
/* a read return value of 1 means semaphore acuqire */
- tmp = RREG32_NO_KIQ(hub->vm_inv_eng0_sem +
- hub->eng_distance * eng);
+ tmp = RREG32_RLC_NO_KIQ(sem, hub_ip);
if (tmp & 0x1)
break;
udelay(1);
@@ -252,20 +304,19 @@ static void gmc_v10_0_flush_vm_hub(struct amdgpu_device *adev, uint32_t vmid,
DRM_ERROR("Timeout waiting for sem acquire in VM flush!\n");
}
- WREG32_NO_KIQ(hub->vm_inv_eng0_req + hub->eng_distance * eng, inv_req);
+ WREG32_RLC_NO_KIQ(req, inv_req, hub_ip);
/*
* Issue a dummy read to wait for the ACK register to be cleared
* to avoid a false ACK due to the new fast GRBM interface.
*/
- if ((vmhub == AMDGPU_GFXHUB_0) &&
- (adev->asic_type < CHIP_SIENNA_CICHLID))
- RREG32_NO_KIQ(hub->vm_inv_eng0_req + hub->eng_distance * eng);
+ if ((vmhub == AMDGPU_GFXHUB(0)) &&
+ (amdgpu_ip_version(adev, GC_HWIP, 0) < IP_VERSION(10, 3, 0)))
+ RREG32_RLC_NO_KIQ(req, hub_ip);
/* Wait for ACK with a delay.*/
for (i = 0; i < adev->usec_timeout; i++) {
- tmp = RREG32_NO_KIQ(hub->vm_inv_eng0_ack +
- hub->eng_distance * eng);
+ tmp = RREG32_RLC_NO_KIQ(ack, hub_ip);
tmp &= 1 << vmid;
if (tmp)
break;
@@ -275,113 +326,13 @@ static void gmc_v10_0_flush_vm_hub(struct amdgpu_device *adev, uint32_t vmid,
/* TODO: It needs to continue working on debugging with semaphore for GFXHUB as well. */
if (use_semaphore)
- /*
- * add semaphore release after invalidation,
- * write with 0 means semaphore release
- */
- WREG32_NO_KIQ(hub->vm_inv_eng0_sem +
- hub->eng_distance * eng, 0);
+ WREG32_RLC_NO_KIQ(sem, 0, hub_ip);
spin_unlock(&adev->gmc.invalidate_lock);
- if (i < adev->usec_timeout)
- return;
-
- DRM_ERROR("Timeout waiting for VM flush ACK!\n");
-}
-
-/**
- * gmc_v10_0_flush_gpu_tlb - gart tlb flush callback
- *
- * @adev: amdgpu_device pointer
- * @vmid: vm instance to flush
- * @vmhub: vmhub type
- * @flush_type: the flush type
- *
- * Flush the TLB for the requested page table.
- */
-static void gmc_v10_0_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid,
- uint32_t vmhub, uint32_t flush_type)
-{
- struct amdgpu_ring *ring = adev->mman.buffer_funcs_ring;
- struct dma_fence *fence;
- struct amdgpu_job *job;
-
- int r;
-
- /* flush hdp cache */
- adev->hdp.funcs->flush_hdp(adev, NULL);
-
- /* For SRIOV run time, driver shouldn't access the register through MMIO
- * Directly use kiq to do the vm invalidation instead
- */
- if (adev->gfx.kiq.ring.sched.ready &&
- (amdgpu_sriov_runtime(adev) || !amdgpu_sriov_vf(adev)) &&
- down_read_trylock(&adev->reset_sem)) {
- struct amdgpu_vmhub *hub = &adev->vmhub[vmhub];
- const unsigned eng = 17;
- u32 inv_req = hub->vmhub_funcs->get_invalidate_req(vmid, flush_type);
- u32 req = hub->vm_inv_eng0_req + hub->eng_distance * eng;
- u32 ack = hub->vm_inv_eng0_ack + hub->eng_distance * eng;
-
- amdgpu_virt_kiq_reg_write_reg_wait(adev, req, ack, inv_req,
- 1 << vmid);
-
- up_read(&adev->reset_sem);
- return;
- }
-
- mutex_lock(&adev->mman.gtt_window_lock);
-
- if (vmhub == AMDGPU_MMHUB_0) {
- gmc_v10_0_flush_vm_hub(adev, vmid, AMDGPU_MMHUB_0, 0);
- mutex_unlock(&adev->mman.gtt_window_lock);
- return;
- }
-
- BUG_ON(vmhub != AMDGPU_GFXHUB_0);
-
- if (!adev->mman.buffer_funcs_enabled ||
- !adev->ib_pool_ready ||
- amdgpu_in_reset(adev) ||
- ring->sched.ready == false) {
- gmc_v10_0_flush_vm_hub(adev, vmid, AMDGPU_GFXHUB_0, 0);
- mutex_unlock(&adev->mman.gtt_window_lock);
- return;
- }
-
- /* The SDMA on Navi has a bug which can theoretically result in memory
- * corruption if an invalidation happens at the same time as an VA
- * translation. Avoid this by doing the invalidation from the SDMA
- * itself.
- */
- r = amdgpu_job_alloc_with_ib(adev, 16 * 4, AMDGPU_IB_POOL_IMMEDIATE,
- &job);
- if (r)
- goto error_alloc;
-
- job->vm_pd_addr = amdgpu_gmc_pd_addr(adev->gart.bo);
- job->vm_needs_flush = true;
- job->ibs->ptr[job->ibs->length_dw++] = ring->funcs->nop;
- amdgpu_ring_pad_ib(ring, &job->ibs[0]);
- r = amdgpu_job_submit(job, &adev->mman.entity,
- AMDGPU_FENCE_OWNER_UNDEFINED, &fence);
- if (r)
- goto error_submit;
-
- mutex_unlock(&adev->mman.gtt_window_lock);
-
- dma_fence_wait(fence, false);
- dma_fence_put(fence);
-
- return;
-
-error_submit:
- amdgpu_job_free(job);
-
-error_alloc:
- mutex_unlock(&adev->mman.gtt_window_lock);
- DRM_ERROR("Error flushing GPU TLB using the SDMA (%d)!\n", r);
+ if (i >= adev->usec_timeout)
+ dev_err(adev->dev, "Timeout waiting for VM flush hub: %d!\n",
+ vmhub);
}
/**
@@ -391,72 +342,44 @@ error_alloc:
* @pasid: pasid to be flush
* @flush_type: the flush type
* @all_hub: Used with PACKET3_INVALIDATE_TLBS_ALL_HUB()
+ * @inst: is used to select which instance of KIQ to use for the invalidation
*
* Flush the TLB for the requested pasid.
*/
-static int gmc_v10_0_flush_gpu_tlb_pasid(struct amdgpu_device *adev,
- uint16_t pasid, uint32_t flush_type,
- bool all_hub)
+static void gmc_v10_0_flush_gpu_tlb_pasid(struct amdgpu_device *adev,
+ uint16_t pasid, uint32_t flush_type,
+ bool all_hub, uint32_t inst)
{
+ uint16_t queried;
int vmid, i;
- signed long r;
- uint32_t seq;
- uint16_t queried_pasid;
- bool ret;
- struct amdgpu_ring *ring = &adev->gfx.kiq.ring;
- struct amdgpu_kiq *kiq = &adev->gfx.kiq;
-
- if (amdgpu_emu_mode == 0 && ring->sched.ready) {
- spin_lock(&adev->gfx.kiq.ring_lock);
- /* 2 dwords flush + 8 dwords fence */
- amdgpu_ring_alloc(ring, kiq->pmf->invalidate_tlbs_size + 8);
- kiq->pmf->kiq_invalidate_tlbs(ring,
- pasid, flush_type, all_hub);
- r = amdgpu_fence_emit_polling(ring, &seq, MAX_KIQ_REG_WAIT);
- if (r) {
- amdgpu_ring_undo(ring);
- spin_unlock(&adev->gfx.kiq.ring_lock);
- return -ETIME;
- }
-
- amdgpu_ring_commit(ring);
- spin_unlock(&adev->gfx.kiq.ring_lock);
- r = amdgpu_fence_wait_polling(ring, seq, adev->usec_timeout);
- if (r < 1) {
- dev_err(adev->dev, "wait for kiq fence error: %ld.\n", r);
- return -ETIME;
- }
-
- return 0;
- }
for (vmid = 1; vmid < AMDGPU_NUM_VMID; vmid++) {
-
- ret = gmc_v10_0_get_atc_vmid_pasid_mapping_info(adev, vmid,
- &queried_pasid);
- if (ret && queried_pasid == pasid) {
- if (all_hub) {
- for (i = 0; i < adev->num_vmhubs; i++)
- gmc_v10_0_flush_gpu_tlb(adev, vmid,
- i, flush_type);
- } else {
- gmc_v10_0_flush_gpu_tlb(adev, vmid,
- AMDGPU_GFXHUB_0, flush_type);
- }
- break;
+ bool valid;
+
+ valid = gmc_v10_0_get_atc_vmid_pasid_mapping_info(adev, vmid,
+ &queried);
+ if (!valid || queried != pasid)
+ continue;
+
+ if (all_hub) {
+ for_each_set_bit(i, adev->vmhubs_mask,
+ AMDGPU_MAX_VMHUBS)
+ gmc_v10_0_flush_gpu_tlb(adev, vmid, i,
+ flush_type);
+ } else {
+ gmc_v10_0_flush_gpu_tlb(adev, vmid, AMDGPU_GFXHUB(0),
+ flush_type);
}
}
-
- return 0;
}
static uint64_t gmc_v10_0_emit_flush_gpu_tlb(struct amdgpu_ring *ring,
- unsigned vmid, uint64_t pd_addr)
+ unsigned int vmid, uint64_t pd_addr)
{
- bool use_semaphore = gmc_v10_0_use_invalidate_semaphore(ring->adev, ring->funcs->vmhub);
- struct amdgpu_vmhub *hub = &ring->adev->vmhub[ring->funcs->vmhub];
+ bool use_semaphore = gmc_v10_0_use_invalidate_semaphore(ring->adev, ring->vm_hub);
+ struct amdgpu_vmhub *hub = &ring->adev->vmhub[ring->vm_hub];
uint32_t req = hub->vmhub_funcs->get_invalidate_req(vmid, 0);
- unsigned eng = ring->vm_inv_eng;
+ unsigned int eng = ring->vm_inv_eng;
/*
* It may lose gpuvm invalidate acknowldege state across power-gating
@@ -498,13 +421,13 @@ static uint64_t gmc_v10_0_emit_flush_gpu_tlb(struct amdgpu_ring *ring,
return pd_addr;
}
-static void gmc_v10_0_emit_pasid_mapping(struct amdgpu_ring *ring, unsigned vmid,
- unsigned pasid)
+static void gmc_v10_0_emit_pasid_mapping(struct amdgpu_ring *ring, unsigned int vmid,
+ unsigned int pasid)
{
struct amdgpu_device *adev = ring->adev;
uint32_t reg;
- if (ring->funcs->vmhub == AMDGPU_GFXHUB_0)
+ if (ring->vm_hub == AMDGPU_GFXHUB(0))
reg = SOC15_REG_OFFSET(OSSSYS, 0, mmIH_VMID_0_LUT) + vmid;
else
reg = SOC15_REG_OFFSET(OSSSYS, 0, mmIH_VMID_0_LUT_MM) + vmid;
@@ -545,30 +468,11 @@ static void gmc_v10_0_emit_pasid_mapping(struct amdgpu_ring *ring, unsigned vmid
* 0 valid
*/
-static uint64_t gmc_v10_0_map_mtype(struct amdgpu_device *adev, uint32_t flags)
-{
- switch (flags) {
- case AMDGPU_VM_MTYPE_DEFAULT:
- return AMDGPU_PTE_MTYPE_NV10(MTYPE_NC);
- case AMDGPU_VM_MTYPE_NC:
- return AMDGPU_PTE_MTYPE_NV10(MTYPE_NC);
- case AMDGPU_VM_MTYPE_WC:
- return AMDGPU_PTE_MTYPE_NV10(MTYPE_WC);
- case AMDGPU_VM_MTYPE_CC:
- return AMDGPU_PTE_MTYPE_NV10(MTYPE_CC);
- case AMDGPU_VM_MTYPE_UC:
- return AMDGPU_PTE_MTYPE_NV10(MTYPE_UC);
- default:
- return AMDGPU_PTE_MTYPE_NV10(MTYPE_NC);
- }
-}
-
static void gmc_v10_0_get_vm_pde(struct amdgpu_device *adev, int level,
uint64_t *addr, uint64_t *flags)
{
if (!(*flags & AMDGPU_PDE_PTE) && !(*flags & AMDGPU_PTE_SYSTEM))
- *addr = adev->vm_manager.vram_base_offset + *addr -
- adev->gmc.vram_start;
+ *addr = amdgpu_gmc_vram_mc2pa(adev, *addr);
BUG_ON(*addr & 0xFFFF00000000003FULL);
if (!adev->gmc.translate_further)
@@ -588,28 +492,56 @@ static void gmc_v10_0_get_vm_pde(struct amdgpu_device *adev, int level,
}
static void gmc_v10_0_get_vm_pte(struct amdgpu_device *adev,
- struct amdgpu_bo_va_mapping *mapping,
+ struct amdgpu_vm *vm,
+ struct amdgpu_bo *bo,
+ uint32_t vm_flags,
uint64_t *flags)
{
- *flags &= ~AMDGPU_PTE_EXECUTABLE;
- *flags |= mapping->flags & AMDGPU_PTE_EXECUTABLE;
+ if (vm_flags & AMDGPU_VM_PAGE_EXECUTABLE)
+ *flags |= AMDGPU_PTE_EXECUTABLE;
+ else
+ *flags &= ~AMDGPU_PTE_EXECUTABLE;
- *flags &= ~AMDGPU_PTE_MTYPE_NV10_MASK;
- *flags |= (mapping->flags & AMDGPU_PTE_MTYPE_NV10_MASK);
+ switch (vm_flags & AMDGPU_VM_MTYPE_MASK) {
+ case AMDGPU_VM_MTYPE_DEFAULT:
+ case AMDGPU_VM_MTYPE_NC:
+ default:
+ *flags = AMDGPU_PTE_MTYPE_NV10(*flags, MTYPE_NC);
+ break;
+ case AMDGPU_VM_MTYPE_WC:
+ *flags = AMDGPU_PTE_MTYPE_NV10(*flags, MTYPE_WC);
+ break;
+ case AMDGPU_VM_MTYPE_CC:
+ *flags = AMDGPU_PTE_MTYPE_NV10(*flags, MTYPE_CC);
+ break;
+ case AMDGPU_VM_MTYPE_UC:
+ *flags = AMDGPU_PTE_MTYPE_NV10(*flags, MTYPE_UC);
+ break;
+ }
- if (mapping->flags & AMDGPU_PTE_PRT) {
+ if (vm_flags & AMDGPU_VM_PAGE_NOALLOC)
+ *flags |= AMDGPU_PTE_NOALLOC;
+ else
+ *flags &= ~AMDGPU_PTE_NOALLOC;
+
+ if (vm_flags & AMDGPU_VM_PAGE_PRT) {
*flags |= AMDGPU_PTE_PRT;
*flags |= AMDGPU_PTE_SNOOPED;
*flags |= AMDGPU_PTE_LOG;
*flags |= AMDGPU_PTE_SYSTEM;
*flags &= ~AMDGPU_PTE_VALID;
}
+
+ if (bo && bo->flags & (AMDGPU_GEM_CREATE_COHERENT |
+ AMDGPU_GEM_CREATE_EXT_COHERENT |
+ AMDGPU_GEM_CREATE_UNCACHED))
+ *flags = AMDGPU_PTE_MTYPE_NV10(*flags, MTYPE_UC);
}
-static unsigned gmc_v10_0_get_vbios_fb_size(struct amdgpu_device *adev)
+static unsigned int gmc_v10_0_get_vbios_fb_size(struct amdgpu_device *adev)
{
u32 d1vga_control = RREG32_SOC15(DCE, 0, mmD1VGA_CONTROL);
- unsigned size;
+ unsigned int size;
if (REG_GET_FIELD(d1vga_control, D1VGA_CONTROL, D1VGA_MODE_ENABLE)) {
size = AMDGPU_VBIOS_VGA_ALLOCATION;
@@ -633,7 +565,6 @@ static const struct amdgpu_gmc_funcs gmc_v10_0_gmc_funcs = {
.flush_gpu_tlb_pasid = gmc_v10_0_flush_gpu_tlb_pasid,
.emit_flush_gpu_tlb = gmc_v10_0_emit_flush_gpu_tlb,
.emit_pasid_mapping = gmc_v10_0_emit_pasid_mapping,
- .map_mtype = gmc_v10_0_map_mtype,
.get_vm_pde = gmc_v10_0_get_vm_pde,
.get_vm_pte = gmc_v10_0_get_vm_pte,
.get_vbios_fb_size = gmc_v10_0_get_vbios_fb_size,
@@ -647,25 +578,27 @@ static void gmc_v10_0_set_gmc_funcs(struct amdgpu_device *adev)
static void gmc_v10_0_set_umc_funcs(struct amdgpu_device *adev)
{
- switch (adev->asic_type) {
- case CHIP_SIENNA_CICHLID:
+ switch (amdgpu_ip_version(adev, UMC_HWIP, 0)) {
+ case IP_VERSION(8, 7, 0):
adev->umc.max_ras_err_cnt_per_query = UMC_V8_7_TOTAL_CHANNEL_NUM;
adev->umc.channel_inst_num = UMC_V8_7_CHANNEL_INSTANCE_NUM;
adev->umc.umc_inst_num = UMC_V8_7_UMC_INSTANCE_NUM;
adev->umc.channel_offs = UMC_V8_7_PER_CHANNEL_OFFSET_SIENNA;
+ adev->umc.retire_unit = 1;
adev->umc.channel_idx_tbl = &umc_v8_7_channel_idx_tbl[0][0];
- adev->umc.funcs = &umc_v8_7_funcs;
+ adev->umc.ras = &umc_v8_7_ras;
break;
default:
break;
}
}
-
static void gmc_v10_0_set_mmhub_funcs(struct amdgpu_device *adev)
{
- switch (adev->asic_type) {
- case CHIP_VANGOGH:
+ switch (amdgpu_ip_version(adev, MMHUB_HWIP, 0)) {
+ case IP_VERSION(2, 3, 0):
+ case IP_VERSION(2, 4, 0):
+ case IP_VERSION(2, 4, 1):
adev->mmhub.funcs = &mmhub_v2_3_funcs;
break;
default:
@@ -676,11 +609,15 @@ static void gmc_v10_0_set_mmhub_funcs(struct amdgpu_device *adev)
static void gmc_v10_0_set_gfxhub_funcs(struct amdgpu_device *adev)
{
- switch (adev->asic_type) {
- case CHIP_SIENNA_CICHLID:
- case CHIP_NAVY_FLOUNDER:
- case CHIP_VANGOGH:
- case CHIP_DIMGREY_CAVEFISH:
+ switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
+ case IP_VERSION(10, 3, 0):
+ case IP_VERSION(10, 3, 2):
+ case IP_VERSION(10, 3, 1):
+ case IP_VERSION(10, 3, 4):
+ case IP_VERSION(10, 3, 5):
+ case IP_VERSION(10, 3, 6):
+ case IP_VERSION(10, 3, 3):
+ case IP_VERSION(10, 3, 7):
adev->gfxhub.funcs = &gfxhub_v2_1_funcs;
break;
default:
@@ -690,9 +627,9 @@ static void gmc_v10_0_set_gfxhub_funcs(struct amdgpu_device *adev)
}
-static int gmc_v10_0_early_init(void *handle)
+static int gmc_v10_0_early_init(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
gmc_v10_0_set_mmhub_funcs(adev);
gmc_v10_0_set_gfxhub_funcs(adev);
@@ -706,13 +643,14 @@ static int gmc_v10_0_early_init(void *handle)
adev->gmc.private_aperture_start = 0x1000000000000000ULL;
adev->gmc.private_aperture_end =
adev->gmc.private_aperture_start + (4ULL << 30) - 1;
+ adev->gmc.noretry_flags = AMDGPU_VM_NORETRY_FLAGS_TF;
return 0;
}
-static int gmc_v10_0_late_init(void *handle)
+static int gmc_v10_0_late_init(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
int r;
r = amdgpu_gmc_allocate_vm_inv_eng(adev);
@@ -736,9 +674,11 @@ static void gmc_v10_0_vram_gtt_location(struct amdgpu_device *adev,
/* add the xgmi offset of the physical node */
base += adev->gmc.xgmi.physical_node_id * adev->gmc.xgmi.node_segment_size;
+ amdgpu_gmc_set_agp_default(adev, mc);
amdgpu_gmc_vram_location(adev, &adev->gmc, base);
- amdgpu_gmc_gart_location(adev, mc);
- amdgpu_gmc_agp_location(adev, mc);
+ amdgpu_gmc_gart_location(adev, mc, AMDGPU_GART_PLACEMENT_BEST_FIT);
+ if (!amdgpu_sriov_vf(adev) && (amdgpu_agp == 1))
+ amdgpu_gmc_agp_location(adev, mc);
/* base offset of vram pages */
adev->vm_manager.vram_base_offset = adev->gfxhub.funcs->get_mc_fb_offset(adev);
@@ -775,33 +715,30 @@ static int gmc_v10_0_mc_init(struct amdgpu_device *adev)
adev->gmc.aper_size = pci_resource_len(adev->pdev, 0);
#ifdef CONFIG_X86_64
- if (adev->flags & AMD_IS_APU) {
+ if ((adev->flags & AMD_IS_APU) && !amdgpu_passthrough(adev)) {
adev->gmc.aper_base = adev->gfxhub.funcs->get_mc_fb_offset(adev);
adev->gmc.aper_size = adev->gmc.real_vram_size;
}
#endif
- /* In case the PCI BAR is larger than the actual amount of vram */
adev->gmc.visible_vram_size = adev->gmc.aper_size;
- if (adev->gmc.visible_vram_size > adev->gmc.real_vram_size)
- adev->gmc.visible_vram_size = adev->gmc.real_vram_size;
/* set the gart size */
if (amdgpu_gart_size == -1) {
- switch (adev->asic_type) {
- case CHIP_NAVI10:
- case CHIP_NAVI14:
- case CHIP_NAVI12:
- case CHIP_SIENNA_CICHLID:
- case CHIP_NAVY_FLOUNDER:
- case CHIP_VANGOGH:
- case CHIP_DIMGREY_CAVEFISH:
+ switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
default:
adev->gmc.gart_size = 512ULL << 20;
break;
+ case IP_VERSION(10, 3, 1): /* DCE SG support */
+ case IP_VERSION(10, 3, 3): /* DCE SG support */
+ case IP_VERSION(10, 3, 6): /* DCE SG support */
+ case IP_VERSION(10, 3, 7): /* DCE SG support */
+ adev->gmc.gart_size = 1024ULL << 20;
+ break;
}
- } else
+ } else {
adev->gmc.gart_size = (u64)amdgpu_gart_size << 20;
+ }
gmc_v10_0_vram_gtt_location(adev, &adev->gmc);
@@ -823,16 +760,16 @@ static int gmc_v10_0_gart_init(struct amdgpu_device *adev)
return r;
adev->gart.table_size = adev->gart.num_gpu_pages * 8;
- adev->gart.gart_pte_flags = AMDGPU_PTE_MTYPE_NV10(MTYPE_UC) |
+ adev->gart.gart_pte_flags = AMDGPU_PTE_MTYPE_NV10(0ULL, MTYPE_UC) |
AMDGPU_PTE_EXECUTABLE;
return amdgpu_gart_table_vram_alloc(adev);
}
-static int gmc_v10_0_sw_init(void *handle)
+static int gmc_v10_0_sw_init(struct amdgpu_ip_block *ip_block)
{
int r, vram_width = 0, vram_type = 0, vram_vendor = 0;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
adev->gfxhub.funcs->init(adev);
@@ -855,15 +792,40 @@ static int gmc_v10_0_sw_init(void *handle)
adev->gmc.vram_vendor = vram_vendor;
}
- switch (adev->asic_type) {
- case CHIP_NAVI10:
- case CHIP_NAVI14:
- case CHIP_NAVI12:
- case CHIP_SIENNA_CICHLID:
- case CHIP_NAVY_FLOUNDER:
- case CHIP_VANGOGH:
- case CHIP_DIMGREY_CAVEFISH:
- adev->num_vmhubs = 2;
+ switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
+ case IP_VERSION(10, 3, 0):
+ adev->gmc.mall_size = 128 * 1024 * 1024;
+ break;
+ case IP_VERSION(10, 3, 2):
+ adev->gmc.mall_size = 96 * 1024 * 1024;
+ break;
+ case IP_VERSION(10, 3, 4):
+ adev->gmc.mall_size = 32 * 1024 * 1024;
+ break;
+ case IP_VERSION(10, 3, 5):
+ adev->gmc.mall_size = 16 * 1024 * 1024;
+ break;
+ default:
+ adev->gmc.mall_size = 0;
+ break;
+ }
+
+ switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
+ case IP_VERSION(10, 1, 10):
+ case IP_VERSION(10, 1, 1):
+ case IP_VERSION(10, 1, 2):
+ case IP_VERSION(10, 1, 3):
+ case IP_VERSION(10, 1, 4):
+ case IP_VERSION(10, 3, 0):
+ case IP_VERSION(10, 3, 2):
+ case IP_VERSION(10, 3, 1):
+ case IP_VERSION(10, 3, 4):
+ case IP_VERSION(10, 3, 5):
+ case IP_VERSION(10, 3, 6):
+ case IP_VERSION(10, 3, 3):
+ case IP_VERSION(10, 3, 7):
+ set_bit(AMDGPU_GFXHUB(0), adev->vmhubs_mask);
+ set_bit(AMDGPU_MMHUB0(0), adev->vmhubs_mask);
/*
* To fulfill 4-level page support,
* vm size is 256TB (48bit), maximum size of Navi10/Navi14/Navi12,
@@ -905,15 +867,11 @@ static int gmc_v10_0_sw_init(void *handle)
r = dma_set_mask_and_coherent(adev->dev, DMA_BIT_MASK(44));
if (r) {
- printk(KERN_WARNING "amdgpu: No suitable DMA available.\n");
+ dev_warn(adev->dev, "amdgpu: No suitable DMA available.\n");
return r;
}
- if (adev->gmc.xgmi.supported) {
- r = adev->gfxhub.funcs->get_xgmi_info(adev);
- if (r)
- return r;
- }
+ adev->need_swiotlb = drm_need_swiotlb(44);
r = gmc_v10_0_mc_init(adev);
if (r)
@@ -940,11 +898,15 @@ static int gmc_v10_0_sw_init(void *handle)
amdgpu_vm_manager_init(adev);
+ r = amdgpu_gmc_ras_sw_init(adev);
+ if (r)
+ return r;
+
return 0;
}
/**
- * gmc_v8_0_gart_fini - vm fini callback
+ * gmc_v10_0_gart_fini - vm fini callback
*
* @adev: amdgpu_device pointer
*
@@ -953,12 +915,11 @@ static int gmc_v10_0_sw_init(void *handle)
static void gmc_v10_0_gart_fini(struct amdgpu_device *adev)
{
amdgpu_gart_table_vram_free(adev);
- amdgpu_gart_fini(adev);
}
-static int gmc_v10_0_sw_fini(void *handle)
+static int gmc_v10_0_sw_fini(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
amdgpu_vm_manager_fini(adev);
gmc_v10_0_gart_fini(adev);
@@ -970,18 +931,6 @@ static int gmc_v10_0_sw_fini(void *handle)
static void gmc_v10_0_init_golden_registers(struct amdgpu_device *adev)
{
- switch (adev->asic_type) {
- case CHIP_NAVI10:
- case CHIP_NAVI14:
- case CHIP_NAVI12:
- case CHIP_SIENNA_CICHLID:
- case CHIP_NAVY_FLOUNDER:
- case CHIP_VANGOGH:
- case CHIP_DIMGREY_CAVEFISH:
- break;
- default:
- break;
- }
}
/**
@@ -999,13 +948,13 @@ static int gmc_v10_0_gart_enable(struct amdgpu_device *adev)
return -EINVAL;
}
- r = amdgpu_gart_table_vram_pin(adev);
- if (r)
- return r;
+ amdgpu_gtt_mgr_recover(&adev->mman.gtt_mgr);
- r = adev->gfxhub.funcs->gart_enable(adev);
- if (r)
- return r;
+ if (!adev->in_s0ix) {
+ r = adev->gfxhub.funcs->gart_enable(adev);
+ if (r)
+ return r;
+ }
r = adev->mmhub.funcs->gart_enable(adev);
if (r)
@@ -1014,37 +963,51 @@ static int gmc_v10_0_gart_enable(struct amdgpu_device *adev)
adev->hdp.funcs->init_registers(adev);
/* Flush HDP after it is initialized */
- adev->hdp.funcs->flush_hdp(adev, NULL);
+ amdgpu_device_flush_hdp(adev, NULL);
- value = (amdgpu_vm_fault_stop == AMDGPU_VM_FAULT_STOP_ALWAYS) ?
- false : true;
+ value = amdgpu_vm_fault_stop != AMDGPU_VM_FAULT_STOP_ALWAYS;
- adev->gfxhub.funcs->set_fault_enable_default(adev, value);
+ if (!adev->in_s0ix)
+ adev->gfxhub.funcs->set_fault_enable_default(adev, value);
adev->mmhub.funcs->set_fault_enable_default(adev, value);
- gmc_v10_0_flush_gpu_tlb(adev, 0, AMDGPU_MMHUB_0, 0);
- gmc_v10_0_flush_gpu_tlb(adev, 0, AMDGPU_GFXHUB_0, 0);
+ gmc_v10_0_flush_gpu_tlb(adev, 0, AMDGPU_MMHUB0(0), 0);
+ if (!adev->in_s0ix)
+ gmc_v10_0_flush_gpu_tlb(adev, 0, AMDGPU_GFXHUB(0), 0);
DRM_INFO("PCIE GART of %uM enabled (table at 0x%016llX).\n",
- (unsigned)(adev->gmc.gart_size >> 20),
+ (unsigned int)(adev->gmc.gart_size >> 20),
(unsigned long long)amdgpu_bo_gpu_offset(adev->gart.bo));
- adev->gart.ready = true;
-
return 0;
}
-static int gmc_v10_0_hw_init(void *handle)
+static int gmc_v10_0_hw_init(struct amdgpu_ip_block *ip_block)
{
+ struct amdgpu_device *adev = ip_block->adev;
int r;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+
+ adev->gmc.flush_pasid_uses_kiq = !amdgpu_emu_mode;
/* The sequence of these two function calls matters.*/
gmc_v10_0_init_golden_registers(adev);
+ /*
+ * harvestable groups in gc_utcl2 need to be programmed before any GFX block
+ * register setup within GMC, or else system hang when harvesting SA.
+ */
+ if (!adev->in_s0ix && adev->gfxhub.funcs && adev->gfxhub.funcs->utcl2_harvest)
+ adev->gfxhub.funcs->utcl2_harvest(adev);
+
r = gmc_v10_0_gart_enable(adev);
if (r)
return r;
+ if (amdgpu_emu_mode == 1) {
+ r = amdgpu_gmc_vram_checking(adev);
+ if (r)
+ return r;
+ }
+
if (adev->umc.funcs && adev->umc.funcs->init_registers)
adev->umc.funcs->init_registers(adev);
@@ -1060,14 +1023,16 @@ static int gmc_v10_0_hw_init(void *handle)
*/
static void gmc_v10_0_gart_disable(struct amdgpu_device *adev)
{
- adev->gfxhub.funcs->gart_disable(adev);
+ if (!adev->in_s0ix)
+ adev->gfxhub.funcs->gart_disable(adev);
adev->mmhub.funcs->gart_disable(adev);
- amdgpu_gart_table_vram_unpin(adev);
}
-static int gmc_v10_0_hw_fini(void *handle)
+static int gmc_v10_0_hw_fini(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
+
+ gmc_v10_0_gart_disable(adev);
if (amdgpu_sriov_vf(adev)) {
/* full access mode, so don't touch any GMC register */
@@ -1075,84 +1040,91 @@ static int gmc_v10_0_hw_fini(void *handle)
return 0;
}
- amdgpu_irq_put(adev, &adev->gmc.ecc_irq, 0);
amdgpu_irq_put(adev, &adev->gmc.vm_fault, 0);
- gmc_v10_0_gart_disable(adev);
+
+ if (adev->gmc.ecc_irq.funcs &&
+ amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__UMC))
+ amdgpu_irq_put(adev, &adev->gmc.ecc_irq, 0);
return 0;
}
-static int gmc_v10_0_suspend(void *handle)
+static int gmc_v10_0_suspend(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
-
- gmc_v10_0_hw_fini(adev);
+ gmc_v10_0_hw_fini(ip_block);
return 0;
}
-static int gmc_v10_0_resume(void *handle)
+static int gmc_v10_0_resume(struct amdgpu_ip_block *ip_block)
{
int r;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
- r = gmc_v10_0_hw_init(adev);
+ r = gmc_v10_0_hw_init(ip_block);
if (r)
return r;
- amdgpu_vmid_reset_all(adev);
+ amdgpu_vmid_reset_all(ip_block->adev);
return 0;
}
-static bool gmc_v10_0_is_idle(void *handle)
+static bool gmc_v10_0_is_idle(struct amdgpu_ip_block *ip_block)
{
/* MC is always ready in GMC v10.*/
return true;
}
-static int gmc_v10_0_wait_for_idle(void *handle)
+static int gmc_v10_0_wait_for_idle(struct amdgpu_ip_block *ip_block)
{
/* There is no need to wait for MC idle in GMC v10.*/
return 0;
}
-static int gmc_v10_0_soft_reset(void *handle)
-{
- return 0;
-}
-
-static int gmc_v10_0_set_clockgating_state(void *handle,
+static int gmc_v10_0_set_clockgating_state(struct amdgpu_ip_block *ip_block,
enum amd_clockgating_state state)
{
int r;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
+
+ /*
+ * The issue mmhub can't disconnect from DF with MMHUB clock gating being disabled
+ * is a new problem observed at DF 3.0.3, however with the same suspend sequence not
+ * seen any issue on the DF 3.0.2 series platform.
+ */
+ if (adev->in_s0ix &&
+ amdgpu_ip_version(adev, DF_HWIP, 0) > IP_VERSION(3, 0, 2)) {
+ dev_dbg(adev->dev, "keep mmhub clock gating being enabled for s0ix\n");
+ return 0;
+ }
r = adev->mmhub.funcs->set_clockgating(adev, state);
if (r)
return r;
- if (adev->asic_type >= CHIP_SIENNA_CICHLID &&
- adev->asic_type <= CHIP_DIMGREY_CAVEFISH)
+ if (amdgpu_ip_version(adev, ATHUB_HWIP, 0) >= IP_VERSION(2, 1, 0))
return athub_v2_1_set_clockgating(adev, state);
else
return athub_v2_0_set_clockgating(adev, state);
}
-static void gmc_v10_0_get_clockgating_state(void *handle, u32 *flags)
+static void gmc_v10_0_get_clockgating_state(struct amdgpu_ip_block *ip_block, u64 *flags)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
+
+ if (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(10, 1, 3) ||
+ amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(10, 1, 4))
+ return;
adev->mmhub.funcs->get_clockgating(adev, flags);
- if (adev->asic_type >= CHIP_SIENNA_CICHLID &&
- adev->asic_type <= CHIP_DIMGREY_CAVEFISH)
+ if (amdgpu_ip_version(adev, ATHUB_HWIP, 0) >= IP_VERSION(2, 1, 0))
athub_v2_1_get_clockgating(adev, flags);
else
athub_v2_0_get_clockgating(adev, flags);
}
-static int gmc_v10_0_set_powergating_state(void *handle,
+static int gmc_v10_0_set_powergating_state(struct amdgpu_ip_block *ip_block,
enum amd_powergating_state state)
{
return 0;
@@ -1170,14 +1142,12 @@ const struct amd_ip_funcs gmc_v10_0_ip_funcs = {
.resume = gmc_v10_0_resume,
.is_idle = gmc_v10_0_is_idle,
.wait_for_idle = gmc_v10_0_wait_for_idle,
- .soft_reset = gmc_v10_0_soft_reset,
.set_clockgating_state = gmc_v10_0_set_clockgating_state,
.set_powergating_state = gmc_v10_0_set_powergating_state,
.get_clockgating_state = gmc_v10_0_get_clockgating_state,
};
-const struct amdgpu_ip_block_version gmc_v10_0_ip_block =
-{
+const struct amdgpu_ip_block_version gmc_v10_0_ip_block = {
.type = AMD_IP_BLOCK_TYPE_GMC,
.major = 10,
.minor = 0,
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c
new file mode 100644
index 000000000000..ba59ee8e398a
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c
@@ -0,0 +1,1085 @@
+/*
+ * Copyright 2021 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+#include <linux/firmware.h>
+#include <linux/pci.h>
+
+#include <drm/drm_cache.h>
+
+#include "amdgpu.h"
+#include "amdgpu_atomfirmware.h"
+#include "gmc_v11_0.h"
+#include "umc_v8_10.h"
+#include "athub/athub_3_0_0_sh_mask.h"
+#include "athub/athub_3_0_0_offset.h"
+#include "dcn/dcn_3_2_0_offset.h"
+#include "dcn/dcn_3_2_0_sh_mask.h"
+#include "oss/osssys_6_0_0_offset.h"
+#include "ivsrcid/vmc/irqsrcs_vmc_1_0.h"
+#include "navi10_enum.h"
+#include "soc15.h"
+#include "soc15d.h"
+#include "soc15_common.h"
+#include "nbio_v4_3.h"
+#include "gfxhub_v3_0.h"
+#include "gfxhub_v3_0_3.h"
+#include "gfxhub_v11_5_0.h"
+#include "mmhub_v3_0.h"
+#include "mmhub_v3_0_1.h"
+#include "mmhub_v3_0_2.h"
+#include "mmhub_v3_3.h"
+#include "athub_v3_0.h"
+
+
+static int gmc_v11_0_ecc_interrupt_state(struct amdgpu_device *adev,
+ struct amdgpu_irq_src *src,
+ unsigned int type,
+ enum amdgpu_interrupt_state state)
+{
+ return 0;
+}
+
+static int
+gmc_v11_0_vm_fault_interrupt_state(struct amdgpu_device *adev,
+ struct amdgpu_irq_src *src, unsigned int type,
+ enum amdgpu_interrupt_state state)
+{
+ switch (state) {
+ case AMDGPU_IRQ_STATE_DISABLE:
+ /* MM HUB */
+ amdgpu_gmc_set_vm_fault_masks(adev, AMDGPU_MMHUB0(0), false);
+ /* GFX HUB */
+ /* This works because this interrupt is only
+ * enabled at init/resume and disabled in
+ * fini/suspend, so the overall state doesn't
+ * change over the course of suspend/resume.
+ */
+ if (!adev->in_s0ix && (adev->in_runpm || adev->in_suspend ||
+ amdgpu_in_reset(adev)))
+ amdgpu_gmc_set_vm_fault_masks(adev, AMDGPU_GFXHUB(0), false);
+ break;
+ case AMDGPU_IRQ_STATE_ENABLE:
+ /* MM HUB */
+ amdgpu_gmc_set_vm_fault_masks(adev, AMDGPU_MMHUB0(0), true);
+ /* GFX HUB */
+ /* This works because this interrupt is only
+ * enabled at init/resume and disabled in
+ * fini/suspend, so the overall state doesn't
+ * change over the course of suspend/resume.
+ */
+ if (!adev->in_s0ix)
+ amdgpu_gmc_set_vm_fault_masks(adev, AMDGPU_GFXHUB(0), true);
+ break;
+ default:
+ break;
+ }
+
+ return 0;
+}
+
+static int gmc_v11_0_process_interrupt(struct amdgpu_device *adev,
+ struct amdgpu_irq_src *source,
+ struct amdgpu_iv_entry *entry)
+{
+ uint32_t vmhub_index = entry->client_id == SOC21_IH_CLIENTID_VMC ?
+ AMDGPU_MMHUB0(0) : AMDGPU_GFXHUB(0);
+ struct amdgpu_vmhub *hub = &adev->vmhub[vmhub_index];
+ bool retry_fault = !!(entry->src_data[1] &
+ AMDGPU_GMC9_FAULT_SOURCE_DATA_RETRY);
+ bool write_fault = !!(entry->src_data[1] &
+ AMDGPU_GMC9_FAULT_SOURCE_DATA_WRITE);
+ uint32_t status = 0;
+ u64 addr;
+
+ addr = (u64)entry->src_data[0] << 12;
+ addr |= ((u64)entry->src_data[1] & 0xf) << 44;
+
+ if (retry_fault) {
+ /* Returning 1 here also prevents sending the IV to the KFD */
+
+ /* Process it only if it's the first fault for this address */
+ if (entry->ih != &adev->irq.ih_soft &&
+ amdgpu_gmc_filter_faults(adev, entry->ih, addr, entry->pasid,
+ entry->timestamp))
+ return 1;
+
+ /* Delegate it to a different ring if the hardware hasn't
+ * already done it.
+ */
+ if (entry->ih == &adev->irq.ih) {
+ amdgpu_irq_delegate(adev, entry, 8);
+ return 1;
+ }
+
+ /* Try to handle the recoverable page faults by filling page
+ * tables
+ */
+ if (amdgpu_vm_handle_fault(adev, entry->pasid, 0, 0, addr,
+ entry->timestamp, write_fault))
+ return 1;
+ }
+
+ if (!amdgpu_sriov_vf(adev)) {
+ /*
+ * Issue a dummy read to wait for the status register to
+ * be updated to avoid reading an incorrect value due to
+ * the new fast GRBM interface.
+ */
+ if (entry->vmid_src == AMDGPU_GFXHUB(0))
+ RREG32(hub->vm_l2_pro_fault_status);
+
+ status = RREG32(hub->vm_l2_pro_fault_status);
+ WREG32_P(hub->vm_l2_pro_fault_cntl, 1, ~1);
+
+ amdgpu_vm_update_fault_cache(adev, entry->pasid, addr, status,
+ entry->vmid_src ? AMDGPU_MMHUB0(0) : AMDGPU_GFXHUB(0));
+ }
+
+ if (printk_ratelimit()) {
+ struct amdgpu_task_info *task_info;
+
+ dev_err(adev->dev,
+ "[%s] page fault (src_id:%u ring:%u vmid:%u pasid:%u)\n",
+ entry->vmid_src ? "mmhub" : "gfxhub",
+ entry->src_id, entry->ring_id, entry->vmid, entry->pasid);
+ task_info = amdgpu_vm_get_task_info_pasid(adev, entry->pasid);
+ if (task_info) {
+ amdgpu_vm_print_task_info(adev, task_info);
+ amdgpu_vm_put_task_info(task_info);
+ }
+
+ dev_err(adev->dev, " in page starting at address 0x%016llx from client %d\n",
+ addr, entry->client_id);
+
+ /* Only print L2 fault status if the status register could be read and
+ * contains useful information
+ */
+ if (status != 0)
+ hub->vmhub_funcs->print_l2_protection_fault_status(adev, status);
+ }
+
+ return 0;
+}
+
+static const struct amdgpu_irq_src_funcs gmc_v11_0_irq_funcs = {
+ .set = gmc_v11_0_vm_fault_interrupt_state,
+ .process = gmc_v11_0_process_interrupt,
+};
+
+static const struct amdgpu_irq_src_funcs gmc_v11_0_ecc_funcs = {
+ .set = gmc_v11_0_ecc_interrupt_state,
+ .process = amdgpu_umc_process_ecc_irq,
+};
+
+static void gmc_v11_0_set_irq_funcs(struct amdgpu_device *adev)
+{
+ adev->gmc.vm_fault.num_types = 1;
+ adev->gmc.vm_fault.funcs = &gmc_v11_0_irq_funcs;
+
+ if (!amdgpu_sriov_vf(adev)) {
+ adev->gmc.ecc_irq.num_types = 1;
+ adev->gmc.ecc_irq.funcs = &gmc_v11_0_ecc_funcs;
+ }
+}
+
+/**
+ * gmc_v11_0_use_invalidate_semaphore - judge whether to use semaphore
+ *
+ * @adev: amdgpu_device pointer
+ * @vmhub: vmhub type
+ *
+ */
+static bool gmc_v11_0_use_invalidate_semaphore(struct amdgpu_device *adev,
+ uint32_t vmhub)
+{
+ return ((vmhub == AMDGPU_MMHUB0(0)) &&
+ (!amdgpu_sriov_vf(adev)));
+}
+
+static bool gmc_v11_0_get_vmid_pasid_mapping_info(
+ struct amdgpu_device *adev,
+ uint8_t vmid, uint16_t *p_pasid)
+{
+ *p_pasid = RREG32(SOC15_REG_OFFSET(OSSSYS, 0, regIH_VMID_0_LUT) + vmid) & 0xffff;
+
+ return !!(*p_pasid);
+}
+
+/**
+ * gmc_v11_0_flush_gpu_tlb - gart tlb flush callback
+ *
+ * @adev: amdgpu_device pointer
+ * @vmid: vm instance to flush
+ * @vmhub: which hub to flush
+ * @flush_type: the flush type
+ *
+ * Flush the TLB for the requested page table.
+ */
+static void gmc_v11_0_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid,
+ uint32_t vmhub, uint32_t flush_type)
+{
+ bool use_semaphore = gmc_v11_0_use_invalidate_semaphore(adev, vmhub);
+ struct amdgpu_vmhub *hub = &adev->vmhub[vmhub];
+ u32 inv_req = hub->vmhub_funcs->get_invalidate_req(vmid, flush_type);
+ /* Use register 17 for GART */
+ const unsigned int eng = 17;
+ unsigned char hub_ip;
+ u32 sem, req, ack;
+ unsigned int i;
+ u32 tmp;
+
+ if ((vmhub == AMDGPU_GFXHUB(0)) && !adev->gfx.is_poweron)
+ return;
+
+ sem = hub->vm_inv_eng0_sem + hub->eng_distance * eng;
+ req = hub->vm_inv_eng0_req + hub->eng_distance * eng;
+ ack = hub->vm_inv_eng0_ack + hub->eng_distance * eng;
+
+ /* flush hdp cache */
+ amdgpu_device_flush_hdp(adev, NULL);
+
+ /* This is necessary for SRIOV as well as for GFXOFF to function
+ * properly under bare metal
+ */
+ if ((adev->gfx.kiq[0].ring.sched.ready || adev->mes.ring[0].sched.ready) &&
+ (amdgpu_sriov_runtime(adev) || !amdgpu_sriov_vf(adev))) {
+ amdgpu_gmc_fw_reg_write_reg_wait(adev, req, ack, inv_req,
+ 1 << vmid, GET_INST(GC, 0));
+ return;
+ }
+
+ /* This path is needed before KIQ/MES/GFXOFF are set up */
+ hub_ip = (vmhub == AMDGPU_GFXHUB(0)) ? GC_HWIP : MMHUB_HWIP;
+
+ spin_lock(&adev->gmc.invalidate_lock);
+ /*
+ * It may lose gpuvm invalidate acknowldege state across power-gating
+ * off cycle, add semaphore acquire before invalidation and semaphore
+ * release after invalidation to avoid entering power gated state
+ * to WA the Issue
+ */
+
+ /* TODO: It needs to continue working on debugging with semaphore for GFXHUB as well. */
+ if (use_semaphore) {
+ for (i = 0; i < adev->usec_timeout; i++) {
+ /* a read return value of 1 means semaphore acuqire */
+ tmp = RREG32_RLC_NO_KIQ(sem, hub_ip);
+ if (tmp & 0x1)
+ break;
+ udelay(1);
+ }
+
+ if (i >= adev->usec_timeout)
+ DRM_ERROR("Timeout waiting for sem acquire in VM flush!\n");
+ }
+
+ WREG32_RLC_NO_KIQ(req, inv_req, hub_ip);
+
+ /* Wait for ACK with a delay.*/
+ for (i = 0; i < adev->usec_timeout; i++) {
+ tmp = RREG32_RLC_NO_KIQ(ack, hub_ip);
+ tmp &= 1 << vmid;
+ if (tmp)
+ break;
+
+ udelay(1);
+ }
+
+ /* TODO: It needs to continue working on debugging with semaphore for GFXHUB as well. */
+ if (use_semaphore)
+ WREG32_RLC_NO_KIQ(sem, 0, hub_ip);
+
+ /* Issue additional private vm invalidation to MMHUB */
+ if ((vmhub != AMDGPU_GFXHUB(0)) &&
+ (hub->vm_l2_bank_select_reserved_cid2) &&
+ !amdgpu_sriov_vf(adev)) {
+ inv_req = RREG32_NO_KIQ(hub->vm_l2_bank_select_reserved_cid2);
+ /* bit 25: RSERVED_CACHE_PRIVATE_INVALIDATION */
+ inv_req |= (1 << 25);
+ /* Issue private invalidation */
+ WREG32_NO_KIQ(hub->vm_l2_bank_select_reserved_cid2, inv_req);
+ /* Read back to ensure invalidation is done*/
+ RREG32_NO_KIQ(hub->vm_l2_bank_select_reserved_cid2);
+ }
+
+ spin_unlock(&adev->gmc.invalidate_lock);
+
+ if (i >= adev->usec_timeout)
+ dev_err(adev->dev, "Timeout waiting for VM flush ACK!\n");
+}
+
+/**
+ * gmc_v11_0_flush_gpu_tlb_pasid - tlb flush via pasid
+ *
+ * @adev: amdgpu_device pointer
+ * @pasid: pasid to be flush
+ * @flush_type: the flush type
+ * @all_hub: flush all hubs
+ * @inst: is used to select which instance of KIQ to use for the invalidation
+ *
+ * Flush the TLB for the requested pasid.
+ */
+static void gmc_v11_0_flush_gpu_tlb_pasid(struct amdgpu_device *adev,
+ uint16_t pasid, uint32_t flush_type,
+ bool all_hub, uint32_t inst)
+{
+ uint16_t queried;
+ int vmid, i;
+
+ for (vmid = 1; vmid < 16; vmid++) {
+ bool valid;
+
+ valid = gmc_v11_0_get_vmid_pasid_mapping_info(adev, vmid,
+ &queried);
+ if (!valid || queried != pasid)
+ continue;
+
+ if (all_hub) {
+ for_each_set_bit(i, adev->vmhubs_mask,
+ AMDGPU_MAX_VMHUBS)
+ gmc_v11_0_flush_gpu_tlb(adev, vmid, i,
+ flush_type);
+ } else {
+ gmc_v11_0_flush_gpu_tlb(adev, vmid, AMDGPU_GFXHUB(0),
+ flush_type);
+ }
+ }
+}
+
+static uint64_t gmc_v11_0_emit_flush_gpu_tlb(struct amdgpu_ring *ring,
+ unsigned int vmid, uint64_t pd_addr)
+{
+ bool use_semaphore = gmc_v11_0_use_invalidate_semaphore(ring->adev, ring->vm_hub);
+ struct amdgpu_vmhub *hub = &ring->adev->vmhub[ring->vm_hub];
+ uint32_t req = hub->vmhub_funcs->get_invalidate_req(vmid, 0);
+ unsigned int eng = ring->vm_inv_eng;
+
+ /*
+ * It may lose gpuvm invalidate acknowldege state across power-gating
+ * off cycle, add semaphore acquire before invalidation and semaphore
+ * release after invalidation to avoid entering power gated state
+ * to WA the Issue
+ */
+
+ /* TODO: It needs to continue working on debugging with semaphore for GFXHUB as well. */
+ if (use_semaphore)
+ /* a read return value of 1 means semaphore acuqire */
+ amdgpu_ring_emit_reg_wait(ring,
+ hub->vm_inv_eng0_sem +
+ hub->eng_distance * eng, 0x1, 0x1);
+
+ amdgpu_ring_emit_wreg(ring, hub->ctx0_ptb_addr_lo32 +
+ (hub->ctx_addr_distance * vmid),
+ lower_32_bits(pd_addr));
+
+ amdgpu_ring_emit_wreg(ring, hub->ctx0_ptb_addr_hi32 +
+ (hub->ctx_addr_distance * vmid),
+ upper_32_bits(pd_addr));
+
+ amdgpu_ring_emit_reg_write_reg_wait(ring, hub->vm_inv_eng0_req +
+ hub->eng_distance * eng,
+ hub->vm_inv_eng0_ack +
+ hub->eng_distance * eng,
+ req, 1 << vmid);
+
+ /* TODO: It needs to continue working on debugging with semaphore for GFXHUB as well. */
+ if (use_semaphore)
+ /*
+ * add semaphore release after invalidation,
+ * write with 0 means semaphore release
+ */
+ amdgpu_ring_emit_wreg(ring, hub->vm_inv_eng0_sem +
+ hub->eng_distance * eng, 0);
+
+ return pd_addr;
+}
+
+static void gmc_v11_0_emit_pasid_mapping(struct amdgpu_ring *ring, unsigned int vmid,
+ unsigned int pasid)
+{
+ struct amdgpu_device *adev = ring->adev;
+ uint32_t reg;
+
+ if (ring->vm_hub == AMDGPU_GFXHUB(0))
+ reg = SOC15_REG_OFFSET(OSSSYS, 0, regIH_VMID_0_LUT) + vmid;
+ else
+ reg = SOC15_REG_OFFSET(OSSSYS, 0, regIH_VMID_0_LUT_MM) + vmid;
+
+ amdgpu_ring_emit_wreg(ring, reg, pasid);
+}
+
+/*
+ * PTE format:
+ * 63:59 reserved
+ * 58:57 reserved
+ * 56 F
+ * 55 L
+ * 54 reserved
+ * 53:52 SW
+ * 51 T
+ * 50:48 mtype
+ * 47:12 4k physical page base address
+ * 11:7 fragment
+ * 6 write
+ * 5 read
+ * 4 exe
+ * 3 Z
+ * 2 snooped
+ * 1 system
+ * 0 valid
+ *
+ * PDE format:
+ * 63:59 block fragment size
+ * 58:55 reserved
+ * 54 P
+ * 53:48 reserved
+ * 47:6 physical base address of PD or PTE
+ * 5:3 reserved
+ * 2 C
+ * 1 system
+ * 0 valid
+ */
+
+static void gmc_v11_0_get_vm_pde(struct amdgpu_device *adev, int level,
+ uint64_t *addr, uint64_t *flags)
+{
+ if (!(*flags & AMDGPU_PDE_PTE) && !(*flags & AMDGPU_PTE_SYSTEM))
+ *addr = amdgpu_gmc_vram_mc2pa(adev, *addr);
+ BUG_ON(*addr & 0xFFFF00000000003FULL);
+
+ if (!adev->gmc.translate_further)
+ return;
+
+ if (level == AMDGPU_VM_PDB1) {
+ /* Set the block fragment size */
+ if (!(*flags & AMDGPU_PDE_PTE))
+ *flags |= AMDGPU_PDE_BFS(0x9);
+
+ } else if (level == AMDGPU_VM_PDB0) {
+ if (*flags & AMDGPU_PDE_PTE)
+ *flags &= ~AMDGPU_PDE_PTE;
+ else
+ *flags |= AMDGPU_PTE_TF;
+ }
+}
+
+static void gmc_v11_0_get_vm_pte(struct amdgpu_device *adev,
+ struct amdgpu_vm *vm,
+ struct amdgpu_bo *bo,
+ uint32_t vm_flags,
+ uint64_t *flags)
+{
+ if (vm_flags & AMDGPU_VM_PAGE_EXECUTABLE)
+ *flags |= AMDGPU_PTE_EXECUTABLE;
+ else
+ *flags &= ~AMDGPU_PTE_EXECUTABLE;
+
+ switch (vm_flags & AMDGPU_VM_MTYPE_MASK) {
+ case AMDGPU_VM_MTYPE_DEFAULT:
+ case AMDGPU_VM_MTYPE_NC:
+ default:
+ *flags = AMDGPU_PTE_MTYPE_NV10(*flags, MTYPE_NC);
+ break;
+ case AMDGPU_VM_MTYPE_WC:
+ *flags = AMDGPU_PTE_MTYPE_NV10(*flags, MTYPE_WC);
+ break;
+ case AMDGPU_VM_MTYPE_CC:
+ *flags = AMDGPU_PTE_MTYPE_NV10(*flags, MTYPE_CC);
+ break;
+ case AMDGPU_VM_MTYPE_UC:
+ *flags = AMDGPU_PTE_MTYPE_NV10(*flags, MTYPE_UC);
+ break;
+ }
+
+ if (vm_flags & AMDGPU_VM_PAGE_NOALLOC)
+ *flags |= AMDGPU_PTE_NOALLOC;
+ else
+ *flags &= ~AMDGPU_PTE_NOALLOC;
+
+ if (vm_flags & AMDGPU_VM_PAGE_PRT) {
+ *flags |= AMDGPU_PTE_PRT;
+ *flags |= AMDGPU_PTE_SNOOPED;
+ *flags |= AMDGPU_PTE_LOG;
+ *flags |= AMDGPU_PTE_SYSTEM;
+ *flags &= ~AMDGPU_PTE_VALID;
+ }
+
+ if (bo && bo->flags & (AMDGPU_GEM_CREATE_COHERENT |
+ AMDGPU_GEM_CREATE_EXT_COHERENT |
+ AMDGPU_GEM_CREATE_UNCACHED))
+ *flags = AMDGPU_PTE_MTYPE_NV10(*flags, MTYPE_UC);
+}
+
+static unsigned int gmc_v11_0_get_vbios_fb_size(struct amdgpu_device *adev)
+{
+ u32 d1vga_control = RREG32_SOC15(DCE, 0, regD1VGA_CONTROL);
+ unsigned int size;
+
+ if (REG_GET_FIELD(d1vga_control, D1VGA_CONTROL, D1VGA_MODE_ENABLE)) {
+ size = AMDGPU_VBIOS_VGA_ALLOCATION;
+ } else {
+ u32 viewport;
+ u32 pitch;
+
+ viewport = RREG32_SOC15(DCE, 0, regHUBP0_DCSURF_PRI_VIEWPORT_DIMENSION);
+ pitch = RREG32_SOC15(DCE, 0, regHUBPREQ0_DCSURF_SURFACE_PITCH);
+ size = (REG_GET_FIELD(viewport,
+ HUBP0_DCSURF_PRI_VIEWPORT_DIMENSION, PRI_VIEWPORT_HEIGHT) *
+ REG_GET_FIELD(pitch, HUBPREQ0_DCSURF_SURFACE_PITCH, PITCH) *
+ 4);
+ }
+
+ return size;
+}
+
+static const struct amdgpu_gmc_funcs gmc_v11_0_gmc_funcs = {
+ .flush_gpu_tlb = gmc_v11_0_flush_gpu_tlb,
+ .flush_gpu_tlb_pasid = gmc_v11_0_flush_gpu_tlb_pasid,
+ .emit_flush_gpu_tlb = gmc_v11_0_emit_flush_gpu_tlb,
+ .emit_pasid_mapping = gmc_v11_0_emit_pasid_mapping,
+ .get_vm_pde = gmc_v11_0_get_vm_pde,
+ .get_vm_pte = gmc_v11_0_get_vm_pte,
+ .get_vbios_fb_size = gmc_v11_0_get_vbios_fb_size,
+};
+
+static void gmc_v11_0_set_gmc_funcs(struct amdgpu_device *adev)
+{
+ adev->gmc.gmc_funcs = &gmc_v11_0_gmc_funcs;
+}
+
+static void gmc_v11_0_set_umc_funcs(struct amdgpu_device *adev)
+{
+ switch (amdgpu_ip_version(adev, UMC_HWIP, 0)) {
+ case IP_VERSION(8, 10, 0):
+ adev->umc.channel_inst_num = UMC_V8_10_CHANNEL_INSTANCE_NUM;
+ adev->umc.umc_inst_num = UMC_V8_10_UMC_INSTANCE_NUM;
+ adev->umc.max_ras_err_cnt_per_query = UMC_V8_10_TOTAL_CHANNEL_NUM(adev);
+ adev->umc.channel_offs = UMC_V8_10_PER_CHANNEL_OFFSET;
+ adev->umc.retire_unit = UMC_V8_10_NA_COL_2BITS_POWER_OF_2_NUM;
+ if (adev->umc.node_inst_num == 4)
+ adev->umc.channel_idx_tbl = &umc_v8_10_channel_idx_tbl_ext0[0][0][0];
+ else
+ adev->umc.channel_idx_tbl = &umc_v8_10_channel_idx_tbl[0][0][0];
+ adev->umc.ras = &umc_v8_10_ras;
+ break;
+ case IP_VERSION(8, 11, 0):
+ break;
+ default:
+ break;
+ }
+}
+
+
+static void gmc_v11_0_set_mmhub_funcs(struct amdgpu_device *adev)
+{
+ switch (amdgpu_ip_version(adev, MMHUB_HWIP, 0)) {
+ case IP_VERSION(3, 0, 1):
+ adev->mmhub.funcs = &mmhub_v3_0_1_funcs;
+ break;
+ case IP_VERSION(3, 0, 2):
+ adev->mmhub.funcs = &mmhub_v3_0_2_funcs;
+ break;
+ case IP_VERSION(3, 3, 0):
+ case IP_VERSION(3, 3, 1):
+ case IP_VERSION(3, 3, 2):
+ adev->mmhub.funcs = &mmhub_v3_3_funcs;
+ break;
+ default:
+ adev->mmhub.funcs = &mmhub_v3_0_funcs;
+ break;
+ }
+}
+
+static void gmc_v11_0_set_gfxhub_funcs(struct amdgpu_device *adev)
+{
+ switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
+ case IP_VERSION(11, 0, 3):
+ adev->gfxhub.funcs = &gfxhub_v3_0_3_funcs;
+ break;
+ case IP_VERSION(11, 5, 0):
+ case IP_VERSION(11, 5, 1):
+ case IP_VERSION(11, 5, 2):
+ case IP_VERSION(11, 5, 3):
+ adev->gfxhub.funcs = &gfxhub_v11_5_0_funcs;
+ break;
+ default:
+ adev->gfxhub.funcs = &gfxhub_v3_0_funcs;
+ break;
+ }
+}
+
+static int gmc_v11_0_early_init(struct amdgpu_ip_block *ip_block)
+{
+ struct amdgpu_device *adev = ip_block->adev;
+
+ gmc_v11_0_set_gfxhub_funcs(adev);
+ gmc_v11_0_set_mmhub_funcs(adev);
+ gmc_v11_0_set_gmc_funcs(adev);
+ gmc_v11_0_set_irq_funcs(adev);
+ gmc_v11_0_set_umc_funcs(adev);
+
+ adev->gmc.shared_aperture_start = 0x2000000000000000ULL;
+ adev->gmc.shared_aperture_end =
+ adev->gmc.shared_aperture_start + (4ULL << 30) - 1;
+ adev->gmc.private_aperture_start = 0x1000000000000000ULL;
+ adev->gmc.private_aperture_end =
+ adev->gmc.private_aperture_start + (4ULL << 30) - 1;
+ adev->gmc.noretry_flags = AMDGPU_VM_NORETRY_FLAGS_TF;
+
+ return 0;
+}
+
+static int gmc_v11_0_late_init(struct amdgpu_ip_block *ip_block)
+{
+ struct amdgpu_device *adev = ip_block->adev;
+ int r;
+
+ r = amdgpu_gmc_allocate_vm_inv_eng(adev);
+ if (r)
+ return r;
+
+ r = amdgpu_gmc_ras_late_init(adev);
+ if (r)
+ return r;
+
+ return amdgpu_irq_get(adev, &adev->gmc.vm_fault, 0);
+}
+
+static void gmc_v11_0_vram_gtt_location(struct amdgpu_device *adev,
+ struct amdgpu_gmc *mc)
+{
+ u64 base = 0;
+
+ base = adev->mmhub.funcs->get_fb_location(adev);
+
+ amdgpu_gmc_set_agp_default(adev, mc);
+ amdgpu_gmc_vram_location(adev, &adev->gmc, base);
+ amdgpu_gmc_gart_location(adev, mc, AMDGPU_GART_PLACEMENT_HIGH);
+ if (!amdgpu_sriov_vf(adev) &&
+ (amdgpu_ip_version(adev, GC_HWIP, 0) < IP_VERSION(11, 5, 0)) &&
+ (amdgpu_agp == 1))
+ amdgpu_gmc_agp_location(adev, mc);
+
+ /* base offset of vram pages */
+ if (amdgpu_sriov_vf(adev))
+ adev->vm_manager.vram_base_offset = 0;
+ else
+ adev->vm_manager.vram_base_offset = adev->mmhub.funcs->get_mc_fb_offset(adev);
+}
+
+/**
+ * gmc_v11_0_mc_init - initialize the memory controller driver params
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * Look up the amount of vram, vram width, and decide how to place
+ * vram and gart within the GPU's physical address space.
+ * Returns 0 for success.
+ */
+static int gmc_v11_0_mc_init(struct amdgpu_device *adev)
+{
+ int r;
+
+ /* size in MB on si */
+ adev->gmc.mc_vram_size =
+ adev->nbio.funcs->get_memsize(adev) * 1024ULL * 1024ULL;
+ adev->gmc.real_vram_size = adev->gmc.mc_vram_size;
+
+ if (!(adev->flags & AMD_IS_APU)) {
+ r = amdgpu_device_resize_fb_bar(adev);
+ if (r)
+ return r;
+ }
+ adev->gmc.aper_base = pci_resource_start(adev->pdev, 0);
+ adev->gmc.aper_size = pci_resource_len(adev->pdev, 0);
+
+#ifdef CONFIG_X86_64
+ if ((adev->flags & AMD_IS_APU) && !amdgpu_passthrough(adev)) {
+ adev->gmc.aper_base = adev->mmhub.funcs->get_mc_fb_offset(adev);
+ adev->gmc.aper_size = adev->gmc.real_vram_size;
+ }
+#endif
+ /* In case the PCI BAR is larger than the actual amount of vram */
+ adev->gmc.visible_vram_size = adev->gmc.aper_size;
+ if (adev->gmc.visible_vram_size > adev->gmc.real_vram_size)
+ adev->gmc.visible_vram_size = adev->gmc.real_vram_size;
+
+ /* set the gart size */
+ if (amdgpu_gart_size == -1)
+ adev->gmc.gart_size = 512ULL << 20;
+ else
+ adev->gmc.gart_size = (u64)amdgpu_gart_size << 20;
+
+ gmc_v11_0_vram_gtt_location(adev, &adev->gmc);
+
+ return 0;
+}
+
+static int gmc_v11_0_gart_init(struct amdgpu_device *adev)
+{
+ int r;
+
+ if (adev->gart.bo) {
+ WARN(1, "PCIE GART already initialized\n");
+ return 0;
+ }
+
+ /* Initialize common gart structure */
+ r = amdgpu_gart_init(adev);
+ if (r)
+ return r;
+
+ adev->gart.table_size = adev->gart.num_gpu_pages * 8;
+ adev->gart.gart_pte_flags = AMDGPU_PTE_MTYPE_NV10(0ULL, MTYPE_UC) |
+ AMDGPU_PTE_EXECUTABLE;
+
+ return amdgpu_gart_table_vram_alloc(adev);
+}
+
+static int gmc_v11_0_sw_init(struct amdgpu_ip_block *ip_block)
+{
+ int r, vram_width = 0, vram_type = 0, vram_vendor = 0;
+ struct amdgpu_device *adev = ip_block->adev;
+
+ adev->mmhub.funcs->init(adev);
+
+ adev->gfxhub.funcs->init(adev);
+
+ spin_lock_init(&adev->gmc.invalidate_lock);
+
+ r = amdgpu_atomfirmware_get_vram_info(adev,
+ &vram_width, &vram_type, &vram_vendor);
+ adev->gmc.vram_width = vram_width;
+
+ adev->gmc.vram_type = vram_type;
+ adev->gmc.vram_vendor = vram_vendor;
+
+ /* The mall_size is already calculated as mall_size_per_umc * num_umc.
+ * However, for gfx1151, which features a 2-to-1 UMC mapping,
+ * the result must be multiplied by 2 to determine the actual mall size.
+ */
+ switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
+ case IP_VERSION(11, 5, 1):
+ adev->gmc.mall_size *= 2;
+ break;
+ default:
+ break;
+ }
+
+ switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
+ case IP_VERSION(11, 0, 0):
+ case IP_VERSION(11, 0, 1):
+ case IP_VERSION(11, 0, 2):
+ case IP_VERSION(11, 0, 3):
+ case IP_VERSION(11, 0, 4):
+ case IP_VERSION(11, 5, 0):
+ case IP_VERSION(11, 5, 1):
+ case IP_VERSION(11, 5, 2):
+ case IP_VERSION(11, 5, 3):
+ set_bit(AMDGPU_GFXHUB(0), adev->vmhubs_mask);
+ set_bit(AMDGPU_MMHUB0(0), adev->vmhubs_mask);
+ /*
+ * To fulfill 4-level page support,
+ * vm size is 256TB (48bit), maximum size,
+ * block size 512 (9bit)
+ */
+ amdgpu_vm_adjust_size(adev, 256 * 1024, 9, 3, 48);
+ break;
+ default:
+ break;
+ }
+
+ /* This interrupt is VMC page fault.*/
+ r = amdgpu_irq_add_id(adev, SOC21_IH_CLIENTID_VMC,
+ VMC_1_0__SRCID__VM_FAULT,
+ &adev->gmc.vm_fault);
+
+ if (r)
+ return r;
+
+ r = amdgpu_irq_add_id(adev, SOC21_IH_CLIENTID_GFX,
+ UTCL2_1_0__SRCID__FAULT,
+ &adev->gmc.vm_fault);
+ if (r)
+ return r;
+
+ if (!amdgpu_sriov_vf(adev)) {
+ /* interrupt sent to DF. */
+ r = amdgpu_irq_add_id(adev, SOC21_IH_CLIENTID_DF, 0,
+ &adev->gmc.ecc_irq);
+ if (r)
+ return r;
+ }
+
+ /*
+ * Set the internal MC address mask This is the max address of the GPU's
+ * internal address space.
+ */
+ adev->gmc.mc_mask = 0xffffffffffffULL; /* 48 bit MC */
+
+ r = dma_set_mask_and_coherent(adev->dev, DMA_BIT_MASK(44));
+ if (r) {
+ dev_warn(adev->dev, "amdgpu: No suitable DMA available.\n");
+ return r;
+ }
+
+ adev->need_swiotlb = drm_need_swiotlb(44);
+
+ r = gmc_v11_0_mc_init(adev);
+ if (r)
+ return r;
+
+ amdgpu_gmc_get_vbios_allocations(adev);
+
+ /* Memory manager */
+ r = amdgpu_bo_init(adev);
+ if (r)
+ return r;
+
+ r = gmc_v11_0_gart_init(adev);
+ if (r)
+ return r;
+
+ /*
+ * number of VMs
+ * VMID 0 is reserved for System
+ * amdgpu graphics/compute will use VMIDs 1-7
+ * amdkfd will use VMIDs 8-15
+ */
+ adev->vm_manager.first_kfd_vmid = adev->gfx.disable_kq ? 1 : 8;
+
+ amdgpu_vm_manager_init(adev);
+
+ r = amdgpu_gmc_ras_sw_init(adev);
+ if (r)
+ return r;
+
+ return 0;
+}
+
+/**
+ * gmc_v11_0_gart_fini - vm fini callback
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * Tears down the driver GART/VM setup (CIK).
+ */
+static void gmc_v11_0_gart_fini(struct amdgpu_device *adev)
+{
+ amdgpu_gart_table_vram_free(adev);
+}
+
+static int gmc_v11_0_sw_fini(struct amdgpu_ip_block *ip_block)
+{
+ struct amdgpu_device *adev = ip_block->adev;
+
+ amdgpu_vm_manager_fini(adev);
+ gmc_v11_0_gart_fini(adev);
+ amdgpu_gem_force_release(adev);
+ amdgpu_bo_fini(adev);
+
+ return 0;
+}
+
+static void gmc_v11_0_init_golden_registers(struct amdgpu_device *adev)
+{
+ if (amdgpu_sriov_vf(adev)) {
+ struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_MMHUB0(0)];
+
+ WREG32(hub->vm_contexts_disable, 0);
+ return;
+ }
+}
+
+/**
+ * gmc_v11_0_gart_enable - gart enable
+ *
+ * @adev: amdgpu_device pointer
+ */
+static int gmc_v11_0_gart_enable(struct amdgpu_device *adev)
+{
+ int r;
+ bool value;
+
+ if (adev->gart.bo == NULL) {
+ dev_err(adev->dev, "No VRAM object for PCIE GART.\n");
+ return -EINVAL;
+ }
+
+ amdgpu_gtt_mgr_recover(&adev->mman.gtt_mgr);
+
+ r = adev->mmhub.funcs->gart_enable(adev);
+ if (r)
+ return r;
+
+ /* Flush HDP after it is initialized */
+ amdgpu_device_flush_hdp(adev, NULL);
+
+ value = amdgpu_vm_fault_stop != AMDGPU_VM_FAULT_STOP_ALWAYS;
+
+ adev->mmhub.funcs->set_fault_enable_default(adev, value);
+ gmc_v11_0_flush_gpu_tlb(adev, 0, AMDGPU_MMHUB0(0), 0);
+
+ DRM_INFO("PCIE GART of %uM enabled (table at 0x%016llX).\n",
+ (unsigned int)(adev->gmc.gart_size >> 20),
+ (unsigned long long)amdgpu_bo_gpu_offset(adev->gart.bo));
+
+ return 0;
+}
+
+static int gmc_v11_0_hw_init(struct amdgpu_ip_block *ip_block)
+{
+ struct amdgpu_device *adev = ip_block->adev;
+ int r;
+
+ adev->gmc.flush_pasid_uses_kiq = !amdgpu_emu_mode;
+
+ /* The sequence of these two function calls matters.*/
+ gmc_v11_0_init_golden_registers(adev);
+
+ r = gmc_v11_0_gart_enable(adev);
+ if (r)
+ return r;
+
+ if (adev->umc.funcs && adev->umc.funcs->init_registers)
+ adev->umc.funcs->init_registers(adev);
+
+ return 0;
+}
+
+/**
+ * gmc_v11_0_gart_disable - gart disable
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * This disables all VM page table.
+ */
+static void gmc_v11_0_gart_disable(struct amdgpu_device *adev)
+{
+ adev->mmhub.funcs->gart_disable(adev);
+}
+
+static int gmc_v11_0_hw_fini(struct amdgpu_ip_block *ip_block)
+{
+ struct amdgpu_device *adev = ip_block->adev;
+
+ if (amdgpu_sriov_vf(adev)) {
+ /* full access mode, so don't touch any GMC register */
+ DRM_DEBUG("For SRIOV client, shouldn't do anything.\n");
+ return 0;
+ }
+
+ amdgpu_irq_put(adev, &adev->gmc.vm_fault, 0);
+
+ if (adev->gmc.ecc_irq.funcs &&
+ amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__UMC))
+ amdgpu_irq_put(adev, &adev->gmc.ecc_irq, 0);
+
+ gmc_v11_0_gart_disable(adev);
+
+ return 0;
+}
+
+static int gmc_v11_0_suspend(struct amdgpu_ip_block *ip_block)
+{
+ gmc_v11_0_hw_fini(ip_block);
+
+ return 0;
+}
+
+static int gmc_v11_0_resume(struct amdgpu_ip_block *ip_block)
+{
+ int r;
+
+ r = gmc_v11_0_hw_init(ip_block);
+ if (r)
+ return r;
+
+ amdgpu_vmid_reset_all(ip_block->adev);
+
+ return 0;
+}
+
+static bool gmc_v11_0_is_idle(struct amdgpu_ip_block *ip_block)
+{
+ /* MC is always ready in GMC v11.*/
+ return true;
+}
+
+static int gmc_v11_0_wait_for_idle(struct amdgpu_ip_block *ip_block)
+{
+ /* There is no need to wait for MC idle in GMC v11.*/
+ return 0;
+}
+
+static int gmc_v11_0_set_clockgating_state(struct amdgpu_ip_block *ip_block,
+ enum amd_clockgating_state state)
+{
+ int r;
+ struct amdgpu_device *adev = ip_block->adev;
+
+ r = adev->mmhub.funcs->set_clockgating(adev, state);
+ if (r)
+ return r;
+
+ return athub_v3_0_set_clockgating(adev, state);
+}
+
+static void gmc_v11_0_get_clockgating_state(struct amdgpu_ip_block *ip_block, u64 *flags)
+{
+ struct amdgpu_device *adev = ip_block->adev;
+
+ adev->mmhub.funcs->get_clockgating(adev, flags);
+
+ athub_v3_0_get_clockgating(adev, flags);
+}
+
+static int gmc_v11_0_set_powergating_state(struct amdgpu_ip_block *ip_block,
+ enum amd_powergating_state state)
+{
+ return 0;
+}
+
+const struct amd_ip_funcs gmc_v11_0_ip_funcs = {
+ .name = "gmc_v11_0",
+ .early_init = gmc_v11_0_early_init,
+ .sw_init = gmc_v11_0_sw_init,
+ .hw_init = gmc_v11_0_hw_init,
+ .late_init = gmc_v11_0_late_init,
+ .sw_fini = gmc_v11_0_sw_fini,
+ .hw_fini = gmc_v11_0_hw_fini,
+ .suspend = gmc_v11_0_suspend,
+ .resume = gmc_v11_0_resume,
+ .is_idle = gmc_v11_0_is_idle,
+ .wait_for_idle = gmc_v11_0_wait_for_idle,
+ .set_clockgating_state = gmc_v11_0_set_clockgating_state,
+ .set_powergating_state = gmc_v11_0_set_powergating_state,
+ .get_clockgating_state = gmc_v11_0_get_clockgating_state,
+};
+
+const struct amdgpu_ip_block_version gmc_v11_0_ip_block = {
+ .type = AMD_IP_BLOCK_TYPE_GMC,
+ .major = 11,
+ .minor = 0,
+ .rev = 0,
+ .funcs = &gmc_v11_0_ip_funcs,
+};
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v11_0.h b/drivers/gpu/drm/amd/amdgpu/gmc_v11_0.h
new file mode 100644
index 000000000000..def4d5516f82
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v11_0.h
@@ -0,0 +1,30 @@
+/*
+ * Copyright 2021 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#ifndef __GMC_V11_0_H__
+#define __GMC_V11_0_H__
+
+extern const struct amd_ip_funcs gmc_v11_0_ip_funcs;
+extern const struct amdgpu_ip_block_version gmc_v11_0_ip_block;
+
+#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v12_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v12_0.c
new file mode 100644
index 000000000000..7a9d6894e321
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v12_0.c
@@ -0,0 +1,1070 @@
+/*
+ * Copyright 2023 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+#include <linux/firmware.h>
+#include <linux/pci.h>
+
+#include <drm/drm_cache.h>
+
+#include "amdgpu.h"
+#include "amdgpu_atomfirmware.h"
+#include "gmc_v12_0.h"
+#include "athub/athub_4_1_0_sh_mask.h"
+#include "athub/athub_4_1_0_offset.h"
+#include "oss/osssys_7_0_0_offset.h"
+#include "ivsrcid/vmc/irqsrcs_vmc_1_0.h"
+#include "soc24_enum.h"
+#include "soc24.h"
+#include "soc15d.h"
+#include "soc15_common.h"
+#include "nbif_v6_3_1.h"
+#include "gfxhub_v12_0.h"
+#include "mmhub_v4_1_0.h"
+#include "athub_v4_1_0.h"
+#include "umc_v8_14.h"
+
+static int gmc_v12_0_ecc_interrupt_state(struct amdgpu_device *adev,
+ struct amdgpu_irq_src *src,
+ unsigned type,
+ enum amdgpu_interrupt_state state)
+{
+ return 0;
+}
+
+static int gmc_v12_0_vm_fault_interrupt_state(struct amdgpu_device *adev,
+ struct amdgpu_irq_src *src, unsigned type,
+ enum amdgpu_interrupt_state state)
+{
+ switch (state) {
+ case AMDGPU_IRQ_STATE_DISABLE:
+ /* MM HUB */
+ amdgpu_gmc_set_vm_fault_masks(adev, AMDGPU_MMHUB0(0), false);
+ /* GFX HUB */
+ /* This works because this interrupt is only
+ * enabled at init/resume and disabled in
+ * fini/suspend, so the overall state doesn't
+ * change over the course of suspend/resume.
+ */
+ if (!adev->in_s0ix)
+ amdgpu_gmc_set_vm_fault_masks(adev, AMDGPU_GFXHUB(0), false);
+ break;
+ case AMDGPU_IRQ_STATE_ENABLE:
+ /* MM HUB */
+ amdgpu_gmc_set_vm_fault_masks(adev, AMDGPU_MMHUB0(0), true);
+ /* GFX HUB */
+ /* This works because this interrupt is only
+ * enabled at init/resume and disabled in
+ * fini/suspend, so the overall state doesn't
+ * change over the course of suspend/resume.
+ */
+ if (!adev->in_s0ix)
+ amdgpu_gmc_set_vm_fault_masks(adev, AMDGPU_GFXHUB(0), true);
+ break;
+ default:
+ break;
+ }
+
+ return 0;
+}
+
+static int gmc_v12_0_process_interrupt(struct amdgpu_device *adev,
+ struct amdgpu_irq_src *source,
+ struct amdgpu_iv_entry *entry)
+{
+ struct amdgpu_vmhub *hub;
+ bool retry_fault = !!(entry->src_data[1] &
+ AMDGPU_GMC9_FAULT_SOURCE_DATA_RETRY);
+ bool write_fault = !!(entry->src_data[1] &
+ AMDGPU_GMC9_FAULT_SOURCE_DATA_WRITE);
+ uint32_t status = 0;
+ u64 addr;
+
+ addr = (u64)entry->src_data[0] << 12;
+ addr |= ((u64)entry->src_data[1] & 0xf) << 44;
+
+ if (entry->client_id == SOC21_IH_CLIENTID_VMC)
+ hub = &adev->vmhub[AMDGPU_MMHUB0(0)];
+ else
+ hub = &adev->vmhub[AMDGPU_GFXHUB(0)];
+
+ if (retry_fault) {
+ /* Returning 1 here also prevents sending the IV to the KFD */
+
+ /* Process it only if it's the first fault for this address */
+ if (entry->ih != &adev->irq.ih_soft &&
+ amdgpu_gmc_filter_faults(adev, entry->ih, addr, entry->pasid,
+ entry->timestamp))
+ return 1;
+
+ /* Delegate it to a different ring if the hardware hasn't
+ * already done it.
+ */
+ if (entry->ih == &adev->irq.ih) {
+ amdgpu_irq_delegate(adev, entry, 8);
+ return 1;
+ }
+
+ /* Try to handle the recoverable page faults by filling page
+ * tables
+ */
+ if (amdgpu_vm_handle_fault(adev, entry->pasid, 0, 0, addr,
+ entry->timestamp, write_fault))
+ return 1;
+ }
+
+ if (!amdgpu_sriov_vf(adev)) {
+ /*
+ * Issue a dummy read to wait for the status register to
+ * be updated to avoid reading an incorrect value due to
+ * the new fast GRBM interface.
+ */
+ if (entry->vmid_src == AMDGPU_GFXHUB(0))
+ RREG32(hub->vm_l2_pro_fault_status);
+
+ status = RREG32(hub->vm_l2_pro_fault_status);
+ WREG32_P(hub->vm_l2_pro_fault_cntl, 1, ~1);
+
+ amdgpu_vm_update_fault_cache(adev, entry->pasid, addr, status,
+ entry->vmid_src ? AMDGPU_MMHUB0(0) : AMDGPU_GFXHUB(0));
+ }
+
+ if (printk_ratelimit()) {
+ struct amdgpu_task_info *task_info;
+
+ dev_err(adev->dev,
+ "[%s] page fault (src_id:%u ring:%u vmid:%u pasid:%u)\n",
+ entry->vmid_src ? "mmhub" : "gfxhub",
+ entry->src_id, entry->ring_id, entry->vmid, entry->pasid);
+ task_info = amdgpu_vm_get_task_info_pasid(adev, entry->pasid);
+ if (task_info) {
+ amdgpu_vm_print_task_info(adev, task_info);
+ amdgpu_vm_put_task_info(task_info);
+ }
+
+ dev_err(adev->dev, " in page starting at address 0x%016llx from client %d\n",
+ addr, entry->client_id);
+
+ /* Only print L2 fault status if the status register could be read and
+ * contains useful information
+ */
+ if (status != 0)
+ hub->vmhub_funcs->print_l2_protection_fault_status(adev, status);
+ }
+
+ return 0;
+}
+
+static const struct amdgpu_irq_src_funcs gmc_v12_0_irq_funcs = {
+ .set = gmc_v12_0_vm_fault_interrupt_state,
+ .process = gmc_v12_0_process_interrupt,
+};
+
+static const struct amdgpu_irq_src_funcs gmc_v12_0_ecc_funcs = {
+ .set = gmc_v12_0_ecc_interrupt_state,
+ .process = amdgpu_umc_process_ecc_irq,
+};
+
+static void gmc_v12_0_set_irq_funcs(struct amdgpu_device *adev)
+{
+ adev->gmc.vm_fault.num_types = 1;
+ adev->gmc.vm_fault.funcs = &gmc_v12_0_irq_funcs;
+
+ if (!amdgpu_sriov_vf(adev)) {
+ adev->gmc.ecc_irq.num_types = 1;
+ adev->gmc.ecc_irq.funcs = &gmc_v12_0_ecc_funcs;
+ }
+}
+
+/**
+ * gmc_v12_0_use_invalidate_semaphore - judge whether to use semaphore
+ *
+ * @adev: amdgpu_device pointer
+ * @vmhub: vmhub type
+ *
+ */
+static bool gmc_v12_0_use_invalidate_semaphore(struct amdgpu_device *adev,
+ uint32_t vmhub)
+{
+ return ((vmhub == AMDGPU_MMHUB0(0)) &&
+ (!amdgpu_sriov_vf(adev)));
+}
+
+static bool gmc_v12_0_get_vmid_pasid_mapping_info(
+ struct amdgpu_device *adev,
+ uint8_t vmid, uint16_t *p_pasid)
+{
+ *p_pasid = RREG32(SOC15_REG_OFFSET(OSSSYS, 0, regIH_VMID_0_LUT) + vmid) & 0xffff;
+
+ return !!(*p_pasid);
+}
+
+/*
+ * GART
+ * VMID 0 is the physical GPU addresses as used by the kernel.
+ * VMIDs 1-15 are used for userspace clients and are handled
+ * by the amdgpu vm/hsa code.
+ */
+
+static void gmc_v12_0_flush_vm_hub(struct amdgpu_device *adev, uint32_t vmid,
+ unsigned int vmhub, uint32_t flush_type)
+{
+ bool use_semaphore = gmc_v12_0_use_invalidate_semaphore(adev, vmhub);
+ struct amdgpu_vmhub *hub = &adev->vmhub[vmhub];
+ u32 inv_req = hub->vmhub_funcs->get_invalidate_req(vmid, flush_type);
+ u32 tmp;
+ /* Use register 17 for GART */
+ const unsigned eng = 17;
+ unsigned int i;
+ unsigned char hub_ip = 0;
+
+ hub_ip = (vmhub == AMDGPU_GFXHUB(0)) ?
+ GC_HWIP : MMHUB_HWIP;
+
+ spin_lock(&adev->gmc.invalidate_lock);
+ /*
+ * It may lose gpuvm invalidate acknowldege state across power-gating
+ * off cycle, add semaphore acquire before invalidation and semaphore
+ * release after invalidation to avoid entering power gated state
+ * to WA the Issue
+ */
+
+ /* TODO: It needs to continue working on debugging with semaphore for GFXHUB as well. */
+ if (use_semaphore) {
+ for (i = 0; i < adev->usec_timeout; i++) {
+ /* a read return value of 1 means semaphore acuqire */
+ tmp = RREG32_RLC_NO_KIQ(hub->vm_inv_eng0_sem +
+ hub->eng_distance * eng, hub_ip);
+ if (tmp & 0x1)
+ break;
+ udelay(1);
+ }
+
+ if (i >= adev->usec_timeout)
+ dev_err(adev->dev,
+ "Timeout waiting for sem acquire in VM flush!\n");
+ }
+
+ WREG32_RLC_NO_KIQ(hub->vm_inv_eng0_req + hub->eng_distance * eng, inv_req, hub_ip);
+
+ /* Wait for ACK with a delay.*/
+ for (i = 0; i < adev->usec_timeout; i++) {
+ tmp = RREG32_RLC_NO_KIQ(hub->vm_inv_eng0_ack +
+ hub->eng_distance * eng, hub_ip);
+ tmp &= 1 << vmid;
+ if (tmp)
+ break;
+
+ udelay(1);
+ }
+
+ /* TODO: It needs to continue working on debugging with semaphore for GFXHUB as well. */
+ if (use_semaphore)
+ /*
+ * add semaphore release after invalidation,
+ * write with 0 means semaphore release
+ */
+ WREG32_RLC_NO_KIQ(hub->vm_inv_eng0_sem +
+ hub->eng_distance * eng, 0, hub_ip);
+
+ /* Issue additional private vm invalidation to MMHUB */
+ if ((vmhub != AMDGPU_GFXHUB(0)) &&
+ (hub->vm_l2_bank_select_reserved_cid2) &&
+ !amdgpu_sriov_vf(adev)) {
+ inv_req = RREG32_NO_KIQ(hub->vm_l2_bank_select_reserved_cid2);
+ /* bit 25: RSERVED_CACHE_PRIVATE_INVALIDATION */
+ inv_req |= (1 << 25);
+ /* Issue private invalidation */
+ WREG32_NO_KIQ(hub->vm_l2_bank_select_reserved_cid2, inv_req);
+ /* Read back to ensure invalidation is done*/
+ RREG32_NO_KIQ(hub->vm_l2_bank_select_reserved_cid2);
+ }
+
+ spin_unlock(&adev->gmc.invalidate_lock);
+
+ if (i < adev->usec_timeout)
+ return;
+
+ dev_err(adev->dev, "Timeout waiting for VM flush ACK!\n");
+}
+
+/**
+ * gmc_v12_0_flush_gpu_tlb - gart tlb flush callback
+ *
+ * @adev: amdgpu_device pointer
+ * @vmid: vm instance to flush
+ * @vmhub: which hub to flush
+ * @flush_type: the flush type
+ *
+ * Flush the TLB for the requested page table.
+ */
+static void gmc_v12_0_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid,
+ uint32_t vmhub, uint32_t flush_type)
+{
+ if ((vmhub == AMDGPU_GFXHUB(0)) && !adev->gfx.is_poweron)
+ return;
+
+ /* flush hdp cache */
+ amdgpu_device_flush_hdp(adev, NULL);
+
+ /* This is necessary for SRIOV as well as for GFXOFF to function
+ * properly under bare metal
+ */
+ if ((adev->gfx.kiq[0].ring.sched.ready || adev->mes.ring[0].sched.ready) &&
+ (amdgpu_sriov_runtime(adev) || !amdgpu_sriov_vf(adev))) {
+ struct amdgpu_vmhub *hub = &adev->vmhub[vmhub];
+ const unsigned eng = 17;
+ u32 inv_req = hub->vmhub_funcs->get_invalidate_req(vmid, flush_type);
+ u32 req = hub->vm_inv_eng0_req + hub->eng_distance * eng;
+ u32 ack = hub->vm_inv_eng0_ack + hub->eng_distance * eng;
+
+ amdgpu_gmc_fw_reg_write_reg_wait(adev, req, ack, inv_req,
+ 1 << vmid, GET_INST(GC, 0));
+ return;
+ }
+
+ gmc_v12_0_flush_vm_hub(adev, vmid, vmhub, 0);
+ return;
+}
+
+/**
+ * gmc_v12_0_flush_gpu_tlb_pasid - tlb flush via pasid
+ *
+ * @adev: amdgpu_device pointer
+ * @pasid: pasid to be flush
+ * @flush_type: the flush type
+ * @all_hub: flush all hubs
+ * @inst: is used to select which instance of KIQ to use for the invalidation
+ *
+ * Flush the TLB for the requested pasid.
+ */
+static void gmc_v12_0_flush_gpu_tlb_pasid(struct amdgpu_device *adev,
+ uint16_t pasid, uint32_t flush_type,
+ bool all_hub, uint32_t inst)
+{
+ uint16_t queried;
+ int vmid, i;
+
+ if (adev->enable_uni_mes && adev->mes.ring[AMDGPU_MES_SCHED_PIPE].sched.ready &&
+ (adev->mes.sched_version & AMDGPU_MES_VERSION_MASK) >= 0x84) {
+ struct mes_inv_tlbs_pasid_input input = {0};
+ input.pasid = pasid;
+ input.flush_type = flush_type;
+ input.hub_id = AMDGPU_GFXHUB(0);
+ /* MES will invalidate all gc_hub for the device from master */
+ adev->mes.funcs->invalidate_tlbs_pasid(&adev->mes, &input);
+ if (all_hub) {
+ /* Only need to invalidate mm_hub now, gfx12 only support one mmhub */
+ input.hub_id = AMDGPU_MMHUB0(0);
+ adev->mes.funcs->invalidate_tlbs_pasid(&adev->mes, &input);
+ }
+ return;
+ }
+
+ for (vmid = 1; vmid < 16; vmid++) {
+ bool valid;
+
+ valid = gmc_v12_0_get_vmid_pasid_mapping_info(adev, vmid,
+ &queried);
+ if (!valid || queried != pasid)
+ continue;
+
+ if (all_hub) {
+ for_each_set_bit(i, adev->vmhubs_mask,
+ AMDGPU_MAX_VMHUBS)
+ gmc_v12_0_flush_gpu_tlb(adev, vmid, i,
+ flush_type);
+ } else {
+ gmc_v12_0_flush_gpu_tlb(adev, vmid, AMDGPU_GFXHUB(0),
+ flush_type);
+ }
+ }
+}
+
+static uint64_t gmc_v12_0_emit_flush_gpu_tlb(struct amdgpu_ring *ring,
+ unsigned vmid, uint64_t pd_addr)
+{
+ bool use_semaphore = gmc_v12_0_use_invalidate_semaphore(ring->adev, ring->vm_hub);
+ struct amdgpu_vmhub *hub = &ring->adev->vmhub[ring->vm_hub];
+ uint32_t req = hub->vmhub_funcs->get_invalidate_req(vmid, 0);
+ unsigned eng = ring->vm_inv_eng;
+
+ /*
+ * It may lose gpuvm invalidate acknowldege state across power-gating
+ * off cycle, add semaphore acquire before invalidation and semaphore
+ * release after invalidation to avoid entering power gated state
+ * to WA the Issue
+ */
+
+ /* TODO: It needs to continue working on debugging with semaphore for GFXHUB as well. */
+ if (use_semaphore)
+ /* a read return value of 1 means semaphore acuqire */
+ amdgpu_ring_emit_reg_wait(ring,
+ hub->vm_inv_eng0_sem +
+ hub->eng_distance * eng, 0x1, 0x1);
+
+ amdgpu_ring_emit_wreg(ring, hub->ctx0_ptb_addr_lo32 +
+ (hub->ctx_addr_distance * vmid),
+ lower_32_bits(pd_addr));
+
+ amdgpu_ring_emit_wreg(ring, hub->ctx0_ptb_addr_hi32 +
+ (hub->ctx_addr_distance * vmid),
+ upper_32_bits(pd_addr));
+
+ amdgpu_ring_emit_reg_write_reg_wait(ring, hub->vm_inv_eng0_req +
+ hub->eng_distance * eng,
+ hub->vm_inv_eng0_ack +
+ hub->eng_distance * eng,
+ req, 1 << vmid);
+
+ /* TODO: It needs to continue working on debugging with semaphore for GFXHUB as well. */
+ if (use_semaphore)
+ /*
+ * add semaphore release after invalidation,
+ * write with 0 means semaphore release
+ */
+ amdgpu_ring_emit_wreg(ring, hub->vm_inv_eng0_sem +
+ hub->eng_distance * eng, 0);
+
+ return pd_addr;
+}
+
+static void gmc_v12_0_emit_pasid_mapping(struct amdgpu_ring *ring, unsigned vmid,
+ unsigned pasid)
+{
+ struct amdgpu_device *adev = ring->adev;
+ uint32_t reg;
+
+ if (ring->vm_hub == AMDGPU_GFXHUB(0))
+ reg = SOC15_REG_OFFSET(OSSSYS, 0, regIH_VMID_0_LUT) + vmid;
+ else
+ reg = SOC15_REG_OFFSET(OSSSYS, 0, regIH_VMID_0_LUT_MM) + vmid;
+
+ amdgpu_ring_emit_wreg(ring, reg, pasid);
+}
+
+/*
+ * PTE format:
+ * 63 P
+ * 62:59 reserved
+ * 58 D
+ * 57 G
+ * 56 T
+ * 55:54 M
+ * 53:52 SW
+ * 51:48 reserved for future
+ * 47:12 4k physical page base address
+ * 11:7 fragment
+ * 6 write
+ * 5 read
+ * 4 exe
+ * 3 Z
+ * 2 snooped
+ * 1 system
+ * 0 valid
+ *
+ * PDE format:
+ * 63 P
+ * 62:58 block fragment size
+ * 57 reserved
+ * 56 A
+ * 55:54 M
+ * 53:52 reserved
+ * 51:48 reserved for future
+ * 47:6 physical base address of PD or PTE
+ * 5:3 reserved
+ * 2 C
+ * 1 system
+ * 0 valid
+ */
+
+static void gmc_v12_0_get_vm_pde(struct amdgpu_device *adev, int level,
+ uint64_t *addr, uint64_t *flags)
+{
+ if (!(*flags & AMDGPU_PDE_PTE_GFX12) && !(*flags & AMDGPU_PTE_SYSTEM))
+ *addr = adev->vm_manager.vram_base_offset + *addr -
+ adev->gmc.vram_start;
+ BUG_ON(*addr & 0xFFFF00000000003FULL);
+
+ if (!adev->gmc.translate_further)
+ return;
+
+ if (level == AMDGPU_VM_PDB1) {
+ /* Set the block fragment size */
+ if (!(*flags & AMDGPU_PDE_PTE_GFX12))
+ *flags |= AMDGPU_PDE_BFS_GFX12(0x9);
+
+ } else if (level == AMDGPU_VM_PDB0) {
+ if (*flags & AMDGPU_PDE_PTE_GFX12)
+ *flags &= ~AMDGPU_PDE_PTE_GFX12;
+ }
+}
+
+static void gmc_v12_0_get_vm_pte(struct amdgpu_device *adev,
+ struct amdgpu_vm *vm,
+ struct amdgpu_bo *bo,
+ uint32_t vm_flags,
+ uint64_t *flags)
+{
+ if (vm_flags & AMDGPU_VM_PAGE_EXECUTABLE)
+ *flags |= AMDGPU_PTE_EXECUTABLE;
+ else
+ *flags &= ~AMDGPU_PTE_EXECUTABLE;
+
+ switch (vm_flags & AMDGPU_VM_MTYPE_MASK) {
+ case AMDGPU_VM_MTYPE_DEFAULT:
+ *flags = AMDGPU_PTE_MTYPE_GFX12(*flags, MTYPE_NC);
+ break;
+ case AMDGPU_VM_MTYPE_NC:
+ default:
+ *flags = AMDGPU_PTE_MTYPE_GFX12(*flags, MTYPE_NC);
+ break;
+ case AMDGPU_VM_MTYPE_UC:
+ *flags = AMDGPU_PTE_MTYPE_GFX12(*flags, MTYPE_UC);
+ break;
+ }
+
+ if (vm_flags & AMDGPU_VM_PAGE_NOALLOC)
+ *flags |= AMDGPU_PTE_NOALLOC;
+ else
+ *flags &= ~AMDGPU_PTE_NOALLOC;
+
+ if (vm_flags & AMDGPU_VM_PAGE_PRT) {
+ *flags |= AMDGPU_PTE_PRT_GFX12;
+ *flags |= AMDGPU_PTE_SNOOPED;
+ *flags |= AMDGPU_PTE_SYSTEM;
+ *flags |= AMDGPU_PTE_IS_PTE;
+ *flags &= ~AMDGPU_PTE_VALID;
+ }
+
+ if (bo && bo->flags & AMDGPU_GEM_CREATE_GFX12_DCC)
+ *flags |= AMDGPU_PTE_DCC;
+
+ if (bo && bo->flags & AMDGPU_GEM_CREATE_UNCACHED)
+ *flags = AMDGPU_PTE_MTYPE_GFX12(*flags, MTYPE_UC);
+}
+
+static unsigned gmc_v12_0_get_vbios_fb_size(struct amdgpu_device *adev)
+{
+ return 0;
+}
+
+static unsigned int gmc_v12_0_get_dcc_alignment(struct amdgpu_device *adev)
+{
+ unsigned int max_tex_channel_caches, alignment;
+
+ if (amdgpu_ip_version(adev, GC_HWIP, 0) != IP_VERSION(12, 0, 0) &&
+ amdgpu_ip_version(adev, GC_HWIP, 0) != IP_VERSION(12, 0, 1))
+ return 0;
+
+ max_tex_channel_caches = adev->gfx.config.max_texture_channel_caches;
+ if (is_power_of_2(max_tex_channel_caches))
+ alignment = (unsigned int)(max_tex_channel_caches / SZ_4);
+ else
+ alignment = roundup_pow_of_two(max_tex_channel_caches);
+
+ return (unsigned int)(alignment * max_tex_channel_caches * SZ_1K);
+}
+
+static const struct amdgpu_gmc_funcs gmc_v12_0_gmc_funcs = {
+ .flush_gpu_tlb = gmc_v12_0_flush_gpu_tlb,
+ .flush_gpu_tlb_pasid = gmc_v12_0_flush_gpu_tlb_pasid,
+ .emit_flush_gpu_tlb = gmc_v12_0_emit_flush_gpu_tlb,
+ .emit_pasid_mapping = gmc_v12_0_emit_pasid_mapping,
+ .get_vm_pde = gmc_v12_0_get_vm_pde,
+ .get_vm_pte = gmc_v12_0_get_vm_pte,
+ .get_vbios_fb_size = gmc_v12_0_get_vbios_fb_size,
+ .get_dcc_alignment = gmc_v12_0_get_dcc_alignment,
+};
+
+static void gmc_v12_0_set_gmc_funcs(struct amdgpu_device *adev)
+{
+ adev->gmc.gmc_funcs = &gmc_v12_0_gmc_funcs;
+}
+
+static void gmc_v12_0_set_umc_funcs(struct amdgpu_device *adev)
+{
+ switch (amdgpu_ip_version(adev, UMC_HWIP, 0)) {
+ case IP_VERSION(8, 14, 0):
+ adev->umc.channel_inst_num = UMC_V8_14_CHANNEL_INSTANCE_NUM;
+ adev->umc.umc_inst_num = UMC_V8_14_UMC_INSTANCE_NUM(adev);
+ adev->umc.node_inst_num = 0;
+ adev->umc.max_ras_err_cnt_per_query = UMC_V8_14_TOTAL_CHANNEL_NUM(adev);
+ adev->umc.channel_offs = UMC_V8_14_PER_CHANNEL_OFFSET;
+ adev->umc.ras = &umc_v8_14_ras;
+ break;
+ default:
+ break;
+ }
+}
+
+
+static void gmc_v12_0_set_mmhub_funcs(struct amdgpu_device *adev)
+{
+ switch (amdgpu_ip_version(adev, MMHUB_HWIP, 0)) {
+ case IP_VERSION(4, 1, 0):
+ adev->mmhub.funcs = &mmhub_v4_1_0_funcs;
+ break;
+ default:
+ break;
+ }
+}
+
+static void gmc_v12_0_set_gfxhub_funcs(struct amdgpu_device *adev)
+{
+ switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
+ case IP_VERSION(12, 0, 0):
+ case IP_VERSION(12, 0, 1):
+ adev->gfxhub.funcs = &gfxhub_v12_0_funcs;
+ break;
+ default:
+ break;
+ }
+}
+
+static int gmc_v12_0_early_init(struct amdgpu_ip_block *ip_block)
+{
+ struct amdgpu_device *adev = ip_block->adev;
+
+ gmc_v12_0_set_gfxhub_funcs(adev);
+ gmc_v12_0_set_mmhub_funcs(adev);
+ gmc_v12_0_set_gmc_funcs(adev);
+ gmc_v12_0_set_irq_funcs(adev);
+ gmc_v12_0_set_umc_funcs(adev);
+
+ adev->gmc.shared_aperture_start = 0x2000000000000000ULL;
+ adev->gmc.shared_aperture_end =
+ adev->gmc.shared_aperture_start + (4ULL << 30) - 1;
+ adev->gmc.private_aperture_start = 0x1000000000000000ULL;
+ adev->gmc.private_aperture_end =
+ adev->gmc.private_aperture_start + (4ULL << 30) - 1;
+
+ return 0;
+}
+
+static int gmc_v12_0_late_init(struct amdgpu_ip_block *ip_block)
+{
+ struct amdgpu_device *adev = ip_block->adev;
+ int r;
+
+ r = amdgpu_gmc_allocate_vm_inv_eng(adev);
+ if (r)
+ return r;
+
+ r = amdgpu_gmc_ras_late_init(adev);
+ if (r)
+ return r;
+
+ return amdgpu_irq_get(adev, &adev->gmc.vm_fault, 0);
+}
+
+static void gmc_v12_0_vram_gtt_location(struct amdgpu_device *adev,
+ struct amdgpu_gmc *mc)
+{
+ u64 base = 0;
+
+ base = adev->mmhub.funcs->get_fb_location(adev);
+
+ amdgpu_gmc_set_agp_default(adev, mc);
+ amdgpu_gmc_vram_location(adev, &adev->gmc, base);
+ amdgpu_gmc_gart_location(adev, mc, AMDGPU_GART_PLACEMENT_LOW);
+ if (!amdgpu_sriov_vf(adev) && (amdgpu_agp == 1))
+ amdgpu_gmc_agp_location(adev, mc);
+
+ /* base offset of vram pages */
+ if (amdgpu_sriov_vf(adev))
+ adev->vm_manager.vram_base_offset = 0;
+ else
+ adev->vm_manager.vram_base_offset = adev->mmhub.funcs->get_mc_fb_offset(adev);
+}
+
+/**
+ * gmc_v12_0_mc_init - initialize the memory controller driver params
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * Look up the amount of vram, vram width, and decide how to place
+ * vram and gart within the GPU's physical address space.
+ * Returns 0 for success.
+ */
+static int gmc_v12_0_mc_init(struct amdgpu_device *adev)
+{
+ int r;
+
+ /* size in MB on si */
+ adev->gmc.mc_vram_size =
+ adev->nbio.funcs->get_memsize(adev) * 1024ULL * 1024ULL;
+ adev->gmc.real_vram_size = adev->gmc.mc_vram_size;
+
+ if (!(adev->flags & AMD_IS_APU)) {
+ r = amdgpu_device_resize_fb_bar(adev);
+ if (r)
+ return r;
+ }
+
+ adev->gmc.aper_base = pci_resource_start(adev->pdev, 0);
+ adev->gmc.aper_size = pci_resource_len(adev->pdev, 0);
+
+#ifdef CONFIG_X86_64
+ if ((adev->flags & AMD_IS_APU) && !amdgpu_passthrough(adev)) {
+ adev->gmc.aper_base = adev->mmhub.funcs->get_mc_fb_offset(adev);
+ adev->gmc.aper_size = adev->gmc.real_vram_size;
+ }
+#endif
+ /* In case the PCI BAR is larger than the actual amount of vram */
+ adev->gmc.visible_vram_size = adev->gmc.aper_size;
+ if (adev->gmc.visible_vram_size > adev->gmc.real_vram_size)
+ adev->gmc.visible_vram_size = adev->gmc.real_vram_size;
+
+ /* set the gart size */
+ if (amdgpu_gart_size == -1) {
+ adev->gmc.gart_size = 512ULL << 20;
+ } else
+ adev->gmc.gart_size = (u64)amdgpu_gart_size << 20;
+
+ gmc_v12_0_vram_gtt_location(adev, &adev->gmc);
+
+ return 0;
+}
+
+static int gmc_v12_0_gart_init(struct amdgpu_device *adev)
+{
+ int r;
+
+ if (adev->gart.bo) {
+ WARN(1, "PCIE GART already initialized\n");
+ return 0;
+ }
+
+ /* Initialize common gart structure */
+ r = amdgpu_gart_init(adev);
+ if (r)
+ return r;
+
+ adev->gart.table_size = adev->gart.num_gpu_pages * 8;
+ adev->gart.gart_pte_flags = AMDGPU_PTE_MTYPE_GFX12(0ULL, MTYPE_UC) |
+ AMDGPU_PTE_EXECUTABLE |
+ AMDGPU_PTE_IS_PTE;
+
+ return amdgpu_gart_table_vram_alloc(adev);
+}
+
+static int gmc_v12_0_sw_init(struct amdgpu_ip_block *ip_block)
+{
+ int r, vram_width = 0, vram_type = 0, vram_vendor = 0;
+ struct amdgpu_device *adev = ip_block->adev;
+
+ adev->mmhub.funcs->init(adev);
+
+ adev->gfxhub.funcs->init(adev);
+
+ spin_lock_init(&adev->gmc.invalidate_lock);
+
+ r = amdgpu_atomfirmware_get_vram_info(adev,
+ &vram_width, &vram_type, &vram_vendor);
+ adev->gmc.vram_width = vram_width;
+
+ adev->gmc.vram_type = vram_type;
+ adev->gmc.vram_vendor = vram_vendor;
+
+ switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
+ case IP_VERSION(12, 0, 0):
+ case IP_VERSION(12, 0, 1):
+ set_bit(AMDGPU_GFXHUB(0), adev->vmhubs_mask);
+ set_bit(AMDGPU_MMHUB0(0), adev->vmhubs_mask);
+ /*
+ * To fulfill 4-level page support,
+ * vm size is 256TB (48bit), maximum size,
+ * block size 512 (9bit)
+ */
+ amdgpu_vm_adjust_size(adev, 256 * 1024, 9, 3, 48);
+ break;
+ default:
+ break;
+ }
+
+ /* This interrupt is VMC page fault.*/
+ r = amdgpu_irq_add_id(adev, SOC21_IH_CLIENTID_VMC,
+ VMC_1_0__SRCID__VM_FAULT,
+ &adev->gmc.vm_fault);
+
+ if (r)
+ return r;
+
+ r = amdgpu_irq_add_id(adev, SOC21_IH_CLIENTID_GFX,
+ UTCL2_1_0__SRCID__FAULT,
+ &adev->gmc.vm_fault);
+ if (r)
+ return r;
+
+ if (!amdgpu_sriov_vf(adev)) {
+ /* interrupt sent to DF. */
+ r = amdgpu_irq_add_id(adev, SOC21_IH_CLIENTID_DF, 0,
+ &adev->gmc.ecc_irq);
+ if (r)
+ return r;
+ }
+
+ /*
+ * Set the internal MC address mask This is the max address of the GPU's
+ * internal address space.
+ */
+ adev->gmc.mc_mask = 0xffffffffffffULL; /* 48 bit MC */
+
+ r = dma_set_mask_and_coherent(adev->dev, DMA_BIT_MASK(44));
+ if (r) {
+ printk(KERN_WARNING "amdgpu: No suitable DMA available.\n");
+ return r;
+ }
+
+ adev->need_swiotlb = drm_need_swiotlb(44);
+
+ r = gmc_v12_0_mc_init(adev);
+ if (r)
+ return r;
+
+ amdgpu_gmc_get_vbios_allocations(adev);
+
+ /* Memory manager */
+ r = amdgpu_bo_init(adev);
+ if (r)
+ return r;
+
+ r = gmc_v12_0_gart_init(adev);
+ if (r)
+ return r;
+
+ /*
+ * number of VMs
+ * VMID 0 is reserved for System
+ * amdgpu graphics/compute will use VMIDs 1-7
+ * amdkfd will use VMIDs 8-15
+ */
+ adev->vm_manager.first_kfd_vmid = adev->gfx.disable_kq ? 1 : 8;
+
+ amdgpu_vm_manager_init(adev);
+
+ r = amdgpu_gmc_ras_sw_init(adev);
+ if (r)
+ return r;
+
+ return 0;
+}
+
+/**
+ * gmc_v12_0_gart_fini - vm fini callback
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * Tears down the driver GART/VM setup (CIK).
+ */
+static void gmc_v12_0_gart_fini(struct amdgpu_device *adev)
+{
+ amdgpu_gart_table_vram_free(adev);
+}
+
+static int gmc_v12_0_sw_fini(struct amdgpu_ip_block *ip_block)
+{
+ struct amdgpu_device *adev = ip_block->adev;
+
+ amdgpu_vm_manager_fini(adev);
+ gmc_v12_0_gart_fini(adev);
+ amdgpu_gem_force_release(adev);
+ amdgpu_bo_fini(adev);
+
+ return 0;
+}
+
+static void gmc_v12_0_init_golden_registers(struct amdgpu_device *adev)
+{
+}
+
+/**
+ * gmc_v12_0_gart_enable - gart enable
+ *
+ * @adev: amdgpu_device pointer
+ */
+static int gmc_v12_0_gart_enable(struct amdgpu_device *adev)
+{
+ int r;
+ bool value;
+
+ if (adev->gart.bo == NULL) {
+ dev_err(adev->dev, "No VRAM object for PCIE GART.\n");
+ return -EINVAL;
+ }
+
+ amdgpu_gtt_mgr_recover(&adev->mman.gtt_mgr);
+
+ r = adev->mmhub.funcs->gart_enable(adev);
+ if (r)
+ return r;
+
+ /* Flush HDP after it is initialized */
+ amdgpu_device_flush_hdp(adev, NULL);
+
+ value = amdgpu_vm_fault_stop != AMDGPU_VM_FAULT_STOP_ALWAYS;
+
+ adev->mmhub.funcs->set_fault_enable_default(adev, value);
+ gmc_v12_0_flush_gpu_tlb(adev, 0, AMDGPU_MMHUB0(0), 0);
+
+ dev_info(adev->dev, "PCIE GART of %uM enabled (table at 0x%016llX).\n",
+ (unsigned)(adev->gmc.gart_size >> 20),
+ (unsigned long long)amdgpu_bo_gpu_offset(adev->gart.bo));
+
+ return 0;
+}
+
+static int gmc_v12_0_hw_init(struct amdgpu_ip_block *ip_block)
+{
+ int r;
+ struct amdgpu_device *adev = ip_block->adev;
+
+ /* The sequence of these two function calls matters.*/
+ gmc_v12_0_init_golden_registers(adev);
+
+ r = gmc_v12_0_gart_enable(adev);
+ if (r)
+ return r;
+
+ if (adev->umc.funcs && adev->umc.funcs->init_registers)
+ adev->umc.funcs->init_registers(adev);
+
+ return 0;
+}
+
+/**
+ * gmc_v12_0_gart_disable - gart disable
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * This disables all VM page table.
+ */
+static void gmc_v12_0_gart_disable(struct amdgpu_device *adev)
+{
+ adev->mmhub.funcs->gart_disable(adev);
+}
+
+static int gmc_v12_0_hw_fini(struct amdgpu_ip_block *ip_block)
+{
+ struct amdgpu_device *adev = ip_block->adev;
+
+ if (amdgpu_sriov_vf(adev)) {
+ /* full access mode, so don't touch any GMC register */
+ DRM_DEBUG("For SRIOV client, shouldn't do anything.\n");
+ return 0;
+ }
+
+ amdgpu_irq_put(adev, &adev->gmc.vm_fault, 0);
+
+ if (adev->gmc.ecc_irq.funcs &&
+ amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__UMC))
+ amdgpu_irq_put(adev, &adev->gmc.ecc_irq, 0);
+
+ gmc_v12_0_gart_disable(adev);
+
+ return 0;
+}
+
+static int gmc_v12_0_suspend(struct amdgpu_ip_block *ip_block)
+{
+ gmc_v12_0_hw_fini(ip_block);
+
+ return 0;
+}
+
+static int gmc_v12_0_resume(struct amdgpu_ip_block *ip_block)
+{
+ int r;
+
+ r = gmc_v12_0_hw_init(ip_block);
+ if (r)
+ return r;
+
+ amdgpu_vmid_reset_all(ip_block->adev);
+
+ return 0;
+}
+
+static bool gmc_v12_0_is_idle(struct amdgpu_ip_block *ip_block)
+{
+ /* MC is always ready in GMC v11.*/
+ return true;
+}
+
+static int gmc_v12_0_wait_for_idle(struct amdgpu_ip_block *ip_block)
+{
+ /* There is no need to wait for MC idle in GMC v11.*/
+ return 0;
+}
+
+static int gmc_v12_0_set_clockgating_state(struct amdgpu_ip_block *ip_block,
+ enum amd_clockgating_state state)
+{
+ int r;
+ struct amdgpu_device *adev = ip_block->adev;
+
+ r = adev->mmhub.funcs->set_clockgating(adev, state);
+ if (r)
+ return r;
+
+ return athub_v4_1_0_set_clockgating(adev, state);
+}
+
+static void gmc_v12_0_get_clockgating_state(struct amdgpu_ip_block *ip_block, u64 *flags)
+{
+ struct amdgpu_device *adev = ip_block->adev;
+
+ adev->mmhub.funcs->get_clockgating(adev, flags);
+
+ athub_v4_1_0_get_clockgating(adev, flags);
+}
+
+static int gmc_v12_0_set_powergating_state(struct amdgpu_ip_block *ip_block,
+ enum amd_powergating_state state)
+{
+ return 0;
+}
+
+const struct amd_ip_funcs gmc_v12_0_ip_funcs = {
+ .name = "gmc_v12_0",
+ .early_init = gmc_v12_0_early_init,
+ .sw_init = gmc_v12_0_sw_init,
+ .hw_init = gmc_v12_0_hw_init,
+ .late_init = gmc_v12_0_late_init,
+ .sw_fini = gmc_v12_0_sw_fini,
+ .hw_fini = gmc_v12_0_hw_fini,
+ .suspend = gmc_v12_0_suspend,
+ .resume = gmc_v12_0_resume,
+ .is_idle = gmc_v12_0_is_idle,
+ .wait_for_idle = gmc_v12_0_wait_for_idle,
+ .set_clockgating_state = gmc_v12_0_set_clockgating_state,
+ .set_powergating_state = gmc_v12_0_set_powergating_state,
+ .get_clockgating_state = gmc_v12_0_get_clockgating_state,
+};
+
+const struct amdgpu_ip_block_version gmc_v12_0_ip_block = {
+ .type = AMD_IP_BLOCK_TYPE_GMC,
+ .major = 12,
+ .minor = 0,
+ .rev = 0,
+ .funcs = &gmc_v12_0_ip_funcs,
+};
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v12_0.h b/drivers/gpu/drm/amd/amdgpu/gmc_v12_0.h
new file mode 100644
index 000000000000..deca93e4a156
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v12_0.h
@@ -0,0 +1,30 @@
+/*
+ * Copyright 2023 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#ifndef __GMC_V12_0_H__
+#define __GMC_V12_0_H__
+
+extern const struct amd_ip_funcs gmc_v12_0_ip_funcs;
+extern const struct amdgpu_ip_block_version gmc_v12_0_ip_block;
+
+#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c
index f5b69484c45a..a8ec95f42926 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c
@@ -43,7 +43,7 @@
static void gmc_v6_0_set_gmc_funcs(struct amdgpu_device *adev);
static void gmc_v6_0_set_irq_funcs(struct amdgpu_device *adev);
-static int gmc_v6_0_wait_for_idle(void *handle);
+static int gmc_v6_0_wait_for_idle(struct amdgpu_ip_block *ip_block);
MODULE_FIRMWARE("amdgpu/tahiti_mc.bin");
MODULE_FIRMWARE("amdgpu/pitcairn_mc.bin");
@@ -64,8 +64,13 @@ MODULE_FIRMWARE("amdgpu/si58_mc.bin");
static void gmc_v6_0_mc_stop(struct amdgpu_device *adev)
{
u32 blackout;
+ struct amdgpu_ip_block *ip_block;
- gmc_v6_0_wait_for_idle((void *)adev);
+ ip_block = amdgpu_device_ip_get_ip_block(adev, AMD_IP_BLOCK_TYPE_GMC);
+ if (!ip_block)
+ return;
+
+ gmc_v6_0_wait_for_idle(ip_block);
blackout = RREG32(mmMC_SHARED_BLACKOUT_CNTL);
if (REG_GET_FIELD(blackout, MC_SHARED_BLACKOUT_CNTL, BLACKOUT_MODE) != 1) {
@@ -98,9 +103,7 @@ static void gmc_v6_0_mc_resume(struct amdgpu_device *adev)
static int gmc_v6_0_init_microcode(struct amdgpu_device *adev)
{
const char *chip_name;
- char fw_name[30];
int err;
- bool is_58_fw = false;
DRM_DEBUG("\n");
@@ -120,30 +123,21 @@ static int gmc_v6_0_init_microcode(struct amdgpu_device *adev)
case CHIP_HAINAN:
chip_name = "hainan";
break;
- default: BUG();
+ default:
+ BUG();
}
/* this memory configuration requires special firmware */
if (((RREG32(mmMC_SEQ_MISC0) & 0xff000000) >> 24) == 0x58)
- is_58_fw = true;
-
- if (is_58_fw)
- snprintf(fw_name, sizeof(fw_name), "amdgpu/si58_mc.bin");
- else
- snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mc.bin", chip_name);
- err = request_firmware(&adev->gmc.fw, fw_name, adev->dev);
- if (err)
- goto out;
-
- err = amdgpu_ucode_validate(adev->gmc.fw);
+ chip_name = "si58";
-out:
+ err = amdgpu_ucode_request(adev, &adev->gmc.fw, AMDGPU_UCODE_REQUIRED,
+ "amdgpu/%s_mc.bin", chip_name);
if (err) {
dev_err(adev->dev,
- "si_mc: Failed to load firmware \"%s\"\n",
- fw_name);
- release_firmware(adev->gmc.fw);
- adev->gmc.fw = NULL;
+ "si_mc: Failed to load firmware \"%s_mc.bin\"\n",
+ chip_name);
+ amdgpu_ucode_release(&adev->gmc.fw);
}
return err;
}
@@ -185,9 +179,8 @@ static int gmc_v6_0_mc_load_microcode(struct amdgpu_device *adev)
WREG32(mmMC_SEQ_IO_DEBUG_DATA, le32_to_cpup(new_io_mc_regs++));
}
/* load the MC ucode */
- for (i = 0; i < ucode_size; i++) {
+ for (i = 0; i < ucode_size; i++)
WREG32(mmMC_SEQ_SUP_PGM, le32_to_cpup(new_fw_data++));
- }
/* put the engine back into the active state */
WREG32(mmMC_SEQ_SUP_CNTL, 0x00000008);
@@ -215,15 +208,19 @@ static void gmc_v6_0_vram_gtt_location(struct amdgpu_device *adev,
struct amdgpu_gmc *mc)
{
u64 base = RREG32(mmMC_VM_FB_LOCATION) & 0xFFFF;
+
base <<= 24;
+ amdgpu_gmc_set_agp_default(adev, mc);
amdgpu_gmc_vram_location(adev, mc, base);
- amdgpu_gmc_gart_location(adev, mc);
+ amdgpu_gmc_gart_location(adev, mc, AMDGPU_GART_PLACEMENT_LOW);
}
static void gmc_v6_0_mc_program(struct amdgpu_device *adev)
{
int i, j;
+ struct amdgpu_ip_block *ip_block;
+
/* Initialize HDP */
for (i = 0, j = 0; i < 32; i++, j += 0x6) {
@@ -235,9 +232,12 @@ static void gmc_v6_0_mc_program(struct amdgpu_device *adev)
}
WREG32(mmHDP_REG_COHERENCY_FLUSH_CNTL, 0);
- if (gmc_v6_0_wait_for_idle((void *)adev)) {
+ ip_block = amdgpu_device_ip_get_ip_block(adev, AMD_IP_BLOCK_TYPE_GMC);
+ if (!ip_block)
+ return;
+
+ if (gmc_v6_0_wait_for_idle(ip_block))
dev_warn(adev->dev, "Wait for MC idle timedout !\n");
- }
if (adev->mode_info.num_crtc) {
u32 tmp;
@@ -249,7 +249,7 @@ static void gmc_v6_0_mc_program(struct amdgpu_device *adev)
/* disable VGA render */
tmp = RREG32(mmVGA_RENDER_CONTROL);
- tmp &= ~VGA_VSTATUS_CNTL;
+ tmp &= VGA_RENDER_CONTROL__VGA_VSTATUS_CNTL_MASK;
WREG32(mmVGA_RENDER_CONTROL, tmp);
}
/* Update configuration */
@@ -258,14 +258,13 @@ static void gmc_v6_0_mc_program(struct amdgpu_device *adev)
WREG32(mmMC_VM_SYSTEM_APERTURE_HIGH_ADDR,
adev->gmc.vram_end >> 12);
WREG32(mmMC_VM_SYSTEM_APERTURE_DEFAULT_ADDR,
- adev->vram_scratch.gpu_addr >> 12);
+ adev->mem_scratch.gpu_addr >> 12);
WREG32(mmMC_VM_AGP_BASE, 0);
- WREG32(mmMC_VM_AGP_TOP, 0x0FFFFFFF);
- WREG32(mmMC_VM_AGP_BOT, 0x0FFFFFFF);
+ WREG32(mmMC_VM_AGP_TOP, adev->gmc.agp_end >> 22);
+ WREG32(mmMC_VM_AGP_BOT, adev->gmc.agp_start >> 22);
- if (gmc_v6_0_wait_for_idle((void *)adev)) {
+ if (gmc_v6_0_wait_for_idle(ip_block))
dev_warn(adev->dev, "Wait for MC idle timedout !\n");
- }
}
static int gmc_v6_0_mc_init(struct amdgpu_device *adev)
@@ -276,13 +275,13 @@ static int gmc_v6_0_mc_init(struct amdgpu_device *adev)
int r;
tmp = RREG32(mmMC_ARB_RAMCFG);
- if (tmp & (1 << 11)) {
+ if (tmp & (1 << 11))
chansize = 16;
- } else if (tmp & MC_ARB_RAMCFG__CHANSIZE_MASK) {
+ else if (tmp & MC_ARB_RAMCFG__CHANSIZE_MASK)
chansize = 64;
- } else {
+ else
chansize = 32;
- }
+
tmp = RREG32(mmMC_SHARED_CHMAP);
switch ((tmp & MC_SHARED_CHMAP__NOOFCHAN_MASK) >> MC_SHARED_CHMAP__NOOFCHAN__SHIFT) {
case 0:
@@ -346,6 +345,7 @@ static int gmc_v6_0_mc_init(struct amdgpu_device *adev)
adev->gmc.gart_size = (u64)amdgpu_gart_size << 20;
}
+ adev->gmc.gart_size += adev->pm.smu_prv_buffer_size;
gmc_v6_0_vram_gtt_location(adev, &adev->gmc);
return 0;
@@ -358,7 +358,7 @@ static void gmc_v6_0_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid,
}
static uint64_t gmc_v6_0_emit_flush_gpu_tlb(struct amdgpu_ring *ring,
- unsigned vmid, uint64_t pd_addr)
+ unsigned int vmid, uint64_t pd_addr)
{
uint32_t reg;
@@ -382,7 +382,9 @@ static void gmc_v6_0_get_vm_pde(struct amdgpu_device *adev, int level,
}
static void gmc_v6_0_get_vm_pte(struct amdgpu_device *adev,
- struct amdgpu_bo_va_mapping *mapping,
+ struct amdgpu_vm *vm,
+ struct amdgpu_bo *bo,
+ uint32_t vm_flags,
uint64_t *flags)
{
*flags &= ~AMDGPU_PTE_EXECUTABLE;
@@ -411,11 +413,11 @@ static void gmc_v6_0_set_fault_enable_default(struct amdgpu_device *adev,
}
/**
- + * gmc_v8_0_set_prt - set PRT VM fault
- + *
- + * @adev: amdgpu_device pointer
- + * @enable: enable/disable VM fault handling for PRT
- +*/
+ * gmc_v8_0_set_prt() - set PRT VM fault
+ *
+ * @adev: amdgpu_device pointer
+ * @enable: enable/disable VM fault handling for PRT
+ */
static void gmc_v6_0_set_prt(struct amdgpu_device *adev, bool enable)
{
u32 tmp;
@@ -441,9 +443,10 @@ static void gmc_v6_0_set_prt(struct amdgpu_device *adev, bool enable)
WREG32(mmVM_PRT_CNTL, tmp);
if (enable) {
- uint32_t low = AMDGPU_VA_RESERVED_SIZE >> AMDGPU_GPU_PAGE_SHIFT;
+ uint32_t low = AMDGPU_VA_RESERVED_BOTTOM >>
+ AMDGPU_GPU_PAGE_SHIFT;
uint32_t high = adev->vm_manager.max_pfn -
- (AMDGPU_VA_RESERVED_SIZE >> AMDGPU_GPU_PAGE_SHIFT);
+ (AMDGPU_VA_RESERVED_TOP >> AMDGPU_GPU_PAGE_SHIFT);
WREG32(mmVM_PRT_APERTURE0_LOW_ADDR, low);
WREG32(mmVM_PRT_APERTURE1_LOW_ADDR, low);
@@ -468,16 +471,14 @@ static void gmc_v6_0_set_prt(struct amdgpu_device *adev, bool enable)
static int gmc_v6_0_gart_enable(struct amdgpu_device *adev)
{
uint64_t table_addr;
- int r, i;
u32 field;
+ int i;
if (adev->gart.bo == NULL) {
dev_err(adev->dev, "No VRAM object for PCIE GART.\n");
return -EINVAL;
}
- r = amdgpu_gart_table_vram_pin(adev);
- if (r)
- return r;
+ amdgpu_gtt_mgr_recover(&adev->mman.gtt_mgr);
table_addr = amdgpu_bo_gpu_offset(adev->gart.bo);
@@ -555,9 +556,8 @@ static int gmc_v6_0_gart_enable(struct amdgpu_device *adev)
gmc_v6_0_flush_gpu_tlb(adev, 0, 0, 0);
dev_info(adev->dev, "PCIE GART of %uM enabled (table at 0x%016llX).\n",
- (unsigned)(adev->gmc.gart_size >> 20),
+ (unsigned int)(adev->gmc.gart_size >> 20),
(unsigned long long)table_addr);
- adev->gart.ready = true;
return 0;
}
@@ -607,40 +607,36 @@ static void gmc_v6_0_gart_disable(struct amdgpu_device *adev)
WREG32(mmVM_L2_CNTL3,
VM_L2_CNTL3__L2_CACHE_BIGK_ASSOCIATIVITY_MASK |
(0UL << VM_L2_CNTL3__L2_CACHE_BIGK_FRAGMENT_SIZE__SHIFT));
- amdgpu_gart_table_vram_unpin(adev);
}
static void gmc_v6_0_vm_decode_fault(struct amdgpu_device *adev,
- u32 status, u32 addr, u32 mc_client)
+ u32 status, u32 addr)
{
u32 mc_id;
u32 vmid = REG_GET_FIELD(status, VM_CONTEXT1_PROTECTION_FAULT_STATUS, VMID);
u32 protections = REG_GET_FIELD(status, VM_CONTEXT1_PROTECTION_FAULT_STATUS,
PROTECTIONS);
- char block[5] = { mc_client >> 24, (mc_client >> 16) & 0xff,
- (mc_client >> 8) & 0xff, mc_client & 0xff, 0 };
mc_id = REG_GET_FIELD(status, VM_CONTEXT1_PROTECTION_FAULT_STATUS,
MEMORY_CLIENT_ID);
- dev_err(adev->dev, "VM fault (0x%02x, vmid %d) at page %u, %s from '%s' (0x%08x) (%d)\n",
+ dev_err(adev->dev, "VM fault (0x%02x, vmid %d) at page %u, %s from %d\n",
protections, vmid, addr,
REG_GET_FIELD(status, VM_CONTEXT1_PROTECTION_FAULT_STATUS,
MEMORY_CLIENT_RW) ?
- "write" : "read", block, mc_client, mc_id);
+ "write" : "read", mc_id);
}
-/*
static const u32 mc_cg_registers[] = {
- MC_HUB_MISC_HUB_CG,
- MC_HUB_MISC_SIP_CG,
- MC_HUB_MISC_VM_CG,
- MC_XPB_CLK_GAT,
- ATC_MISC_CG,
- MC_CITF_MISC_WR_CG,
- MC_CITF_MISC_RD_CG,
- MC_CITF_MISC_VM_CG,
- VM_L2_CG,
+ mmMC_HUB_MISC_HUB_CG,
+ mmMC_HUB_MISC_SIP_CG,
+ mmMC_HUB_MISC_VM_CG,
+ mmMC_XPB_CLK_GAT,
+ mmATC_MISC_CG,
+ mmMC_CITF_MISC_WR_CG,
+ mmMC_CITF_MISC_RD_CG,
+ mmMC_CITF_MISC_VM_CG,
+ mmVM_L2_CG,
};
static const u32 mc_cg_ls_en[] = {
@@ -675,7 +671,7 @@ static void gmc_v6_0_enable_mc_ls(struct amdgpu_device *adev,
for (i = 0; i < ARRAY_SIZE(mc_cg_registers); i++) {
orig = data = RREG32(mc_cg_registers[i]);
- if (enable && (adev->cg_flags & AMDGPU_CG_SUPPORT_MC_LS))
+ if (enable && (adev->cg_flags & AMD_CG_SUPPORT_MC_LS))
data |= mc_cg_ls_en[i];
else
data &= ~mc_cg_ls_en[i];
@@ -692,7 +688,7 @@ static void gmc_v6_0_enable_mc_mgcg(struct amdgpu_device *adev,
for (i = 0; i < ARRAY_SIZE(mc_cg_registers); i++) {
orig = data = RREG32(mc_cg_registers[i]);
- if (enable && (adev->cg_flags & AMDGPU_CG_SUPPORT_MC_MGCG))
+ if (enable && (adev->cg_flags & AMD_CG_SUPPORT_MC_MGCG))
data |= mc_cg_en[i];
else
data &= ~mc_cg_en[i];
@@ -708,7 +704,7 @@ static void gmc_v6_0_enable_bif_mgls(struct amdgpu_device *adev,
orig = data = RREG32_PCIE(ixPCIE_CNTL2);
- if (enable && (adev->cg_flags & AMDGPU_CG_SUPPORT_BIF_LS)) {
+ if (enable && (adev->cg_flags & AMD_CG_SUPPORT_BIF_LS)) {
data = REG_SET_FIELD(data, PCIE_CNTL2, SLV_MEM_LS_EN, 1);
data = REG_SET_FIELD(data, PCIE_CNTL2, MST_MEM_LS_EN, 1);
data = REG_SET_FIELD(data, PCIE_CNTL2, REPLAY_MEM_LS_EN, 1);
@@ -731,7 +727,7 @@ static void gmc_v6_0_enable_hdp_mgcg(struct amdgpu_device *adev,
orig = data = RREG32(mmHDP_HOST_PATH_CNTL);
- if (enable && (adev->cg_flags & AMDGPU_CG_SUPPORT_HDP_MGCG))
+ if (enable && (adev->cg_flags & AMD_CG_SUPPORT_HDP_MGCG))
data = REG_SET_FIELD(data, HDP_HOST_PATH_CNTL, CLOCK_GATING_DIS, 0);
else
data = REG_SET_FIELD(data, HDP_HOST_PATH_CNTL, CLOCK_GATING_DIS, 1);
@@ -747,7 +743,7 @@ static void gmc_v6_0_enable_hdp_ls(struct amdgpu_device *adev,
orig = data = RREG32(mmHDP_MEM_POWER_LS);
- if (enable && (adev->cg_flags & AMDGPU_CG_SUPPORT_HDP_LS))
+ if (enable && (adev->cg_flags & AMD_CG_SUPPORT_HDP_LS))
data = REG_SET_FIELD(data, HDP_MEM_POWER_LS, LS_ENABLE, 1);
else
data = REG_SET_FIELD(data, HDP_MEM_POWER_LS, LS_ENABLE, 0);
@@ -755,7 +751,6 @@ static void gmc_v6_0_enable_hdp_ls(struct amdgpu_device *adev,
if (orig != data)
WREG32(mmHDP_MEM_POWER_LS, data);
}
-*/
static int gmc_v6_0_convert_vram_type(int mc_seq_vram_type)
{
@@ -777,9 +772,9 @@ static int gmc_v6_0_convert_vram_type(int mc_seq_vram_type)
}
}
-static int gmc_v6_0_early_init(void *handle)
+static int gmc_v6_0_early_init(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
gmc_v6_0_set_gmc_funcs(adev);
gmc_v6_0_set_irq_funcs(adev);
@@ -787,9 +782,9 @@ static int gmc_v6_0_early_init(void *handle)
return 0;
}
-static int gmc_v6_0_late_init(void *handle)
+static int gmc_v6_0_late_init(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
if (amdgpu_vm_fault_stop != AMDGPU_VM_FAULT_STOP_ALWAYS)
return amdgpu_irq_get(adev, &adev->gmc.vm_fault, 0);
@@ -797,15 +792,16 @@ static int gmc_v6_0_late_init(void *handle)
return 0;
}
-static unsigned gmc_v6_0_get_vbios_fb_size(struct amdgpu_device *adev)
+static unsigned int gmc_v6_0_get_vbios_fb_size(struct amdgpu_device *adev)
{
u32 d1vga_control = RREG32(mmD1VGA_CONTROL);
- unsigned size;
+ unsigned int size;
if (REG_GET_FIELD(d1vga_control, D1VGA_CONTROL, D1VGA_MODE_ENABLE)) {
size = AMDGPU_VBIOS_VGA_ALLOCATION;
} else {
u32 viewport = RREG32(mmVIEWPORT_SIZE);
+
size = (REG_GET_FIELD(viewport, VIEWPORT_SIZE, VIEWPORT_HEIGHT) *
REG_GET_FIELD(viewport, VIEWPORT_SIZE, VIEWPORT_WIDTH) *
4);
@@ -813,17 +809,18 @@ static unsigned gmc_v6_0_get_vbios_fb_size(struct amdgpu_device *adev)
return size;
}
-static int gmc_v6_0_sw_init(void *handle)
+static int gmc_v6_0_sw_init(struct amdgpu_ip_block *ip_block)
{
int r;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
- adev->num_vmhubs = 1;
+ set_bit(AMDGPU_GFXHUB(0), adev->vmhubs_mask);
if (adev->flags & AMD_IS_APU) {
adev->gmc.vram_type = AMDGPU_VRAM_TYPE_UNKNOWN;
} else {
u32 tmp = RREG32(mmMC_SEQ_MISC0);
+
tmp &= MC_SEQ_MISC0__MT__MASK;
adev->gmc.vram_type = gmc_v6_0_convert_vram_type(tmp);
}
@@ -840,12 +837,12 @@ static int gmc_v6_0_sw_init(void *handle)
adev->gmc.mc_mask = 0xffffffffffULL;
- r = dma_set_mask_and_coherent(adev->dev, DMA_BIT_MASK(44));
+ r = dma_set_mask_and_coherent(adev->dev, DMA_BIT_MASK(40));
if (r) {
dev_warn(adev->dev, "No suitable DMA available.\n");
return r;
}
- adev->need_swiotlb = drm_need_swiotlb(44);
+ adev->need_swiotlb = drm_need_swiotlb(40);
r = gmc_v6_0_init_microcode(adev);
if (r) {
@@ -889,25 +886,23 @@ static int gmc_v6_0_sw_init(void *handle)
return 0;
}
-static int gmc_v6_0_sw_fini(void *handle)
+static int gmc_v6_0_sw_fini(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
amdgpu_gem_force_release(adev);
amdgpu_vm_manager_fini(adev);
amdgpu_gart_table_vram_free(adev);
amdgpu_bo_fini(adev);
- amdgpu_gart_fini(adev);
- release_firmware(adev->gmc.fw);
- adev->gmc.fw = NULL;
+ amdgpu_ucode_release(&adev->gmc.fw);
return 0;
}
-static int gmc_v6_0_hw_init(void *handle)
+static int gmc_v6_0_hw_init(struct amdgpu_ip_block *ip_block)
{
int r;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
gmc_v6_0_mc_program(adev);
@@ -923,12 +918,15 @@ static int gmc_v6_0_hw_init(void *handle)
if (r)
return r;
- return r;
+ if (amdgpu_emu_mode == 1)
+ return amdgpu_gmc_vram_checking(adev);
+
+ return 0;
}
-static int gmc_v6_0_hw_fini(void *handle)
+static int gmc_v6_0_hw_fini(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
amdgpu_irq_put(adev, &adev->gmc.vm_fault, 0);
gmc_v6_0_gart_disable(adev);
@@ -936,21 +934,19 @@ static int gmc_v6_0_hw_fini(void *handle)
return 0;
}
-static int gmc_v6_0_suspend(void *handle)
+static int gmc_v6_0_suspend(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
-
- gmc_v6_0_hw_fini(adev);
+ gmc_v6_0_hw_fini(ip_block);
return 0;
}
-static int gmc_v6_0_resume(void *handle)
+static int gmc_v6_0_resume(struct amdgpu_ip_block *ip_block)
{
int r;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
- r = gmc_v6_0_hw_init(adev);
+ r = gmc_v6_0_hw_init(ip_block);
if (r)
return r;
@@ -959,9 +955,10 @@ static int gmc_v6_0_resume(void *handle)
return 0;
}
-static bool gmc_v6_0_is_idle(void *handle)
+static bool gmc_v6_0_is_idle(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
+
u32 tmp = RREG32(mmSRBM_STATUS);
if (tmp & (SRBM_STATUS__MCB_BUSY_MASK | SRBM_STATUS__MCB_NON_DISPLAY_BUSY_MASK |
@@ -971,13 +968,13 @@ static bool gmc_v6_0_is_idle(void *handle)
return true;
}
-static int gmc_v6_0_wait_for_idle(void *handle)
+static int gmc_v6_0_wait_for_idle(struct amdgpu_ip_block *ip_block)
{
- unsigned i;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ unsigned int i;
+ struct amdgpu_device *adev = ip_block->adev;
for (i = 0; i < adev->usec_timeout; i++) {
- if (gmc_v6_0_is_idle(handle))
+ if (gmc_v6_0_is_idle(ip_block))
return 0;
udelay(1);
}
@@ -985,9 +982,10 @@ static int gmc_v6_0_wait_for_idle(void *handle)
}
-static int gmc_v6_0_soft_reset(void *handle)
+static int gmc_v6_0_soft_reset(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
+
u32 srbm_soft_reset = 0;
u32 tmp = RREG32(mmSRBM_STATUS);
@@ -1004,10 +1002,9 @@ static int gmc_v6_0_soft_reset(void *handle)
if (srbm_soft_reset) {
gmc_v6_0_mc_stop(adev);
- if (gmc_v6_0_wait_for_idle(adev)) {
- dev_warn(adev->dev, "Wait for GMC idle timed out !\n");
- }
+ if (gmc_v6_0_wait_for_idle(ip_block))
+ dev_warn(adev->dev, "Wait for GMC idle timed out !\n");
tmp = RREG32(mmSRBM_SOFT_RESET);
tmp |= srbm_soft_reset;
@@ -1032,7 +1029,7 @@ static int gmc_v6_0_soft_reset(void *handle)
static int gmc_v6_0_vm_fault_interrupt_state(struct amdgpu_device *adev,
struct amdgpu_irq_src *src,
- unsigned type,
+ unsigned int type,
enum amdgpu_interrupt_state state)
{
u32 tmp;
@@ -1073,6 +1070,12 @@ static int gmc_v6_0_process_interrupt(struct amdgpu_device *adev,
{
u32 addr, status;
+ /* Delegate to the soft IRQ handler ring */
+ if (adev->irq.ih_soft.enabled && entry->ih != &adev->irq.ih_soft) {
+ amdgpu_irq_delegate(adev, entry, 4);
+ return 1;
+ }
+
addr = RREG32(mmVM_CONTEXT1_PROTECTION_FAULT_ADDR);
status = RREG32(mmVM_CONTEXT1_PROTECTION_FAULT_STATUS);
WREG32_P(mmVM_CONTEXT1_CNTL2, 1, ~1);
@@ -1080,6 +1083,10 @@ static int gmc_v6_0_process_interrupt(struct amdgpu_device *adev,
if (!addr && !status)
return 0;
+ amdgpu_vm_update_fault_cache(adev, entry->pasid,
+ ((u64)addr) << AMDGPU_GPU_PAGE_SHIFT,
+ status, AMDGPU_GFXHUB(0));
+
if (amdgpu_vm_fault_stop == AMDGPU_VM_FAULT_STOP_FIRST)
gmc_v6_0_set_fault_enable_default(adev, false);
@@ -1090,19 +1097,33 @@ static int gmc_v6_0_process_interrupt(struct amdgpu_device *adev,
addr);
dev_err(adev->dev, " VM_CONTEXT1_PROTECTION_FAULT_STATUS 0x%08X\n",
status);
- gmc_v6_0_vm_decode_fault(adev, status, addr, 0);
+ gmc_v6_0_vm_decode_fault(adev, status, addr);
}
return 0;
}
-static int gmc_v6_0_set_clockgating_state(void *handle,
+static int gmc_v6_0_set_clockgating_state(struct amdgpu_ip_block *ip_block,
enum amd_clockgating_state state)
{
+ struct amdgpu_device *adev = ip_block->adev;
+ bool gate = false;
+
+ if (state == AMD_CG_STATE_GATE)
+ gate = true;
+
+ if (!(adev->flags & AMD_IS_APU)) {
+ gmc_v6_0_enable_mc_mgcg(adev, gate);
+ gmc_v6_0_enable_mc_ls(adev, gate);
+ }
+ gmc_v6_0_enable_bif_mgls(adev, gate);
+ gmc_v6_0_enable_hdp_mgcg(adev, gate);
+ gmc_v6_0_enable_hdp_ls(adev, gate);
+
return 0;
}
-static int gmc_v6_0_set_powergating_state(void *handle,
+static int gmc_v6_0_set_powergating_state(struct amdgpu_ip_block *ip_block,
enum amd_powergating_state state)
{
return 0;
@@ -1150,8 +1171,7 @@ static void gmc_v6_0_set_irq_funcs(struct amdgpu_device *adev)
adev->gmc.vm_fault.funcs = &gmc_v6_0_irq_funcs;
}
-const struct amdgpu_ip_block_version gmc_v6_0_ip_block =
-{
+const struct amdgpu_ip_block_version gmc_v6_0_ip_block = {
.type = AMD_IP_BLOCK_TYPE_GMC,
.major = 6,
.minor = 0,
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c
index dee2b34effb6..fbd0bf147f50 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c
@@ -52,22 +52,20 @@
static void gmc_v7_0_set_gmc_funcs(struct amdgpu_device *adev);
static void gmc_v7_0_set_irq_funcs(struct amdgpu_device *adev);
-static int gmc_v7_0_wait_for_idle(void *handle);
+static int gmc_v7_0_wait_for_idle(struct amdgpu_ip_block *ip_block);
MODULE_FIRMWARE("amdgpu/bonaire_mc.bin");
MODULE_FIRMWARE("amdgpu/hawaii_mc.bin");
MODULE_FIRMWARE("amdgpu/topaz_mc.bin");
-static const u32 golden_settings_iceland_a11[] =
-{
+static const u32 golden_settings_iceland_a11[] = {
mmVM_PRT_APERTURE0_LOW_ADDR, 0x0fffffff, 0x0fffffff,
mmVM_PRT_APERTURE1_LOW_ADDR, 0x0fffffff, 0x0fffffff,
mmVM_PRT_APERTURE2_LOW_ADDR, 0x0fffffff, 0x0fffffff,
mmVM_PRT_APERTURE3_LOW_ADDR, 0x0fffffff, 0x0fffffff
};
-static const u32 iceland_mgcg_cgcg_init[] =
-{
+static const u32 iceland_mgcg_cgcg_init[] = {
mmMC_MEM_POWER_LS, 0xffffffff, 0x00000104
};
@@ -89,9 +87,14 @@ static void gmc_v7_0_init_golden_registers(struct amdgpu_device *adev)
static void gmc_v7_0_mc_stop(struct amdgpu_device *adev)
{
+ struct amdgpu_ip_block *ip_block;
u32 blackout;
- gmc_v7_0_wait_for_idle((void *)adev);
+ ip_block = amdgpu_device_ip_get_ip_block(adev, AMD_IP_BLOCK_TYPE_GMC);
+ if (!ip_block)
+ return;
+
+ gmc_v7_0_wait_for_idle(ip_block);
blackout = RREG32(mmMC_SHARED_BLACKOUT_CNTL);
if (REG_GET_FIELD(blackout, MC_SHARED_BLACKOUT_CNTL, BLACKOUT_MODE) != 1) {
@@ -132,7 +135,6 @@ static void gmc_v7_0_mc_resume(struct amdgpu_device *adev)
static int gmc_v7_0_init_microcode(struct amdgpu_device *adev)
{
const char *chip_name;
- char fw_name[30];
int err;
DRM_DEBUG("\n");
@@ -151,21 +153,15 @@ static int gmc_v7_0_init_microcode(struct amdgpu_device *adev)
case CHIP_KABINI:
case CHIP_MULLINS:
return 0;
- default: BUG();
+ default:
+ return -EINVAL;
}
- snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mc.bin", chip_name);
-
- err = request_firmware(&adev->gmc.fw, fw_name, adev->dev);
- if (err)
- goto out;
- err = amdgpu_ucode_validate(adev->gmc.fw);
-
-out:
+ err = amdgpu_ucode_request(adev, &adev->gmc.fw, AMDGPU_UCODE_REQUIRED,
+ "amdgpu/%s_mc.bin", chip_name);
if (err) {
- pr_err("cik_mc: Failed to load firmware \"%s\"\n", fw_name);
- release_firmware(adev->gmc.fw);
- adev->gmc.fw = NULL;
+ pr_err("cik_mc: Failed to load firmware \"%s_mc.bin\"\n", chip_name);
+ amdgpu_ucode_release(&adev->gmc.fw);
}
return err;
}
@@ -243,10 +239,12 @@ static void gmc_v7_0_vram_gtt_location(struct amdgpu_device *adev,
struct amdgpu_gmc *mc)
{
u64 base = RREG32(mmMC_VM_FB_LOCATION) & 0xFFFF;
+
base <<= 24;
+ amdgpu_gmc_set_agp_default(adev, mc);
amdgpu_gmc_vram_location(adev, mc, base);
- amdgpu_gmc_gart_location(adev, mc);
+ amdgpu_gmc_gart_location(adev, mc, AMDGPU_GART_PLACEMENT_BEST_FIT);
}
/**
@@ -259,9 +257,14 @@ static void gmc_v7_0_vram_gtt_location(struct amdgpu_device *adev,
*/
static void gmc_v7_0_mc_program(struct amdgpu_device *adev)
{
+ struct amdgpu_ip_block *ip_block;
u32 tmp;
int i, j;
+ ip_block = amdgpu_device_ip_get_ip_block(adev, AMD_IP_BLOCK_TYPE_GMC);
+ if (!ip_block)
+ return;
+
/* Initialize HDP */
for (i = 0, j = 0; i < 32; i++, j += 0x6) {
WREG32((0xb05 + j), 0x00000000);
@@ -272,9 +275,9 @@ static void gmc_v7_0_mc_program(struct amdgpu_device *adev)
}
WREG32(mmHDP_REG_COHERENCY_FLUSH_CNTL, 0);
- if (gmc_v7_0_wait_for_idle((void *)adev)) {
+ if (gmc_v7_0_wait_for_idle(ip_block))
dev_warn(adev->dev, "Wait for MC idle timedout !\n");
- }
+
if (adev->mode_info.num_crtc) {
/* Lockout access through VGA aperture*/
tmp = RREG32(mmVGA_HDP_CONTROL);
@@ -292,13 +295,12 @@ static void gmc_v7_0_mc_program(struct amdgpu_device *adev)
WREG32(mmMC_VM_SYSTEM_APERTURE_HIGH_ADDR,
adev->gmc.vram_end >> 12);
WREG32(mmMC_VM_SYSTEM_APERTURE_DEFAULT_ADDR,
- adev->vram_scratch.gpu_addr >> 12);
+ adev->mem_scratch.gpu_addr >> 12);
WREG32(mmMC_VM_AGP_BASE, 0);
- WREG32(mmMC_VM_AGP_TOP, 0x0FFFFFFF);
- WREG32(mmMC_VM_AGP_BOT, 0x0FFFFFFF);
- if (gmc_v7_0_wait_for_idle((void *)adev)) {
+ WREG32(mmMC_VM_AGP_TOP, adev->gmc.agp_end >> 22);
+ WREG32(mmMC_VM_AGP_BOT, adev->gmc.agp_start >> 22);
+ if (gmc_v7_0_wait_for_idle(ip_block))
dev_warn(adev->dev, "Wait for MC idle timedout !\n");
- }
WREG32(mmBIF_FB_EN, BIF_FB_EN__FB_READ_EN_MASK | BIF_FB_EN__FB_WRITE_EN_MASK);
@@ -330,11 +332,11 @@ static int gmc_v7_0_mc_init(struct amdgpu_device *adev)
/* Get VRAM informations */
tmp = RREG32(mmMC_ARB_RAMCFG);
- if (REG_GET_FIELD(tmp, MC_ARB_RAMCFG, CHANSIZE)) {
+ if (REG_GET_FIELD(tmp, MC_ARB_RAMCFG, CHANSIZE))
chansize = 64;
- } else {
+ else
chansize = 32;
- }
+
tmp = RREG32(mmMC_SHARED_CHMAP);
switch (REG_GET_FIELD(tmp, MC_SHARED_CHMAP, NOOFCHAN)) {
case 0:
@@ -381,17 +383,15 @@ static int gmc_v7_0_mc_init(struct amdgpu_device *adev)
adev->gmc.aper_size = pci_resource_len(adev->pdev, 0);
#ifdef CONFIG_X86_64
- if (adev->flags & AMD_IS_APU &&
- adev->gmc.real_vram_size > adev->gmc.aper_size) {
+ if ((adev->flags & AMD_IS_APU) &&
+ adev->gmc.real_vram_size > adev->gmc.aper_size &&
+ !amdgpu_passthrough(adev)) {
adev->gmc.aper_base = ((u64)RREG32(mmMC_VM_FB_OFFSET)) << 22;
adev->gmc.aper_size = adev->gmc.real_vram_size;
}
#endif
- /* In case the PCI BAR is larger than the actual amount of vram */
adev->gmc.visible_vram_size = adev->gmc.aper_size;
- if (adev->gmc.visible_vram_size > adev->gmc.real_vram_size)
- adev->gmc.visible_vram_size = adev->gmc.real_vram_size;
/* set the gart size */
if (amdgpu_gart_size == -1) {
@@ -414,6 +414,7 @@ static int gmc_v7_0_mc_init(struct amdgpu_device *adev)
adev->gmc.gart_size = (u64)amdgpu_gart_size << 20;
}
+ adev->gmc.gart_size += adev->pm.smu_prv_buffer_size;
gmc_v7_0_vram_gtt_location(adev, &adev->gmc);
return 0;
@@ -426,31 +427,27 @@ static int gmc_v7_0_mc_init(struct amdgpu_device *adev)
* @pasid: pasid to be flush
* @flush_type: type of flush
* @all_hub: flush all hubs
+ * @inst: is used to select which instance of KIQ to use for the invalidation
*
* Flush the TLB for the requested pasid.
*/
-static int gmc_v7_0_flush_gpu_tlb_pasid(struct amdgpu_device *adev,
- uint16_t pasid, uint32_t flush_type,
- bool all_hub)
+static void gmc_v7_0_flush_gpu_tlb_pasid(struct amdgpu_device *adev,
+ uint16_t pasid, uint32_t flush_type,
+ bool all_hub, uint32_t inst)
{
+ u32 mask = 0x0;
int vmid;
- unsigned int tmp;
-
- if (amdgpu_in_reset(adev))
- return -EIO;
for (vmid = 1; vmid < 16; vmid++) {
+ u32 tmp = RREG32(mmATC_VMID0_PASID_MAPPING + vmid);
- tmp = RREG32(mmATC_VMID0_PASID_MAPPING + vmid);
if ((tmp & ATC_VMID0_PASID_MAPPING__VALID_MASK) &&
- (tmp & ATC_VMID0_PASID_MAPPING__PASID_MASK) == pasid) {
- WREG32(mmVM_INVALIDATE_REQUEST, 1 << vmid);
- RREG32(mmVM_INVALIDATE_RESPONSE);
- break;
- }
+ (tmp & ATC_VMID0_PASID_MAPPING__PASID_MASK) == pasid)
+ mask |= 1 << vmid;
}
- return 0;
+ WREG32(mmVM_INVALIDATE_REQUEST, mask);
+ RREG32(mmVM_INVALIDATE_RESPONSE);
}
/*
@@ -478,7 +475,7 @@ static void gmc_v7_0_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid,
}
static uint64_t gmc_v7_0_emit_flush_gpu_tlb(struct amdgpu_ring *ring,
- unsigned vmid, uint64_t pd_addr)
+ unsigned int vmid, uint64_t pd_addr)
{
uint32_t reg;
@@ -494,8 +491,8 @@ static uint64_t gmc_v7_0_emit_flush_gpu_tlb(struct amdgpu_ring *ring,
return pd_addr;
}
-static void gmc_v7_0_emit_pasid_mapping(struct amdgpu_ring *ring, unsigned vmid,
- unsigned pasid)
+static void gmc_v7_0_emit_pasid_mapping(struct amdgpu_ring *ring, unsigned int vmid,
+ unsigned int pasid)
{
amdgpu_ring_emit_wreg(ring, mmIH_VMID_0_LUT + vmid, pasid);
}
@@ -507,7 +504,9 @@ static void gmc_v7_0_get_vm_pde(struct amdgpu_device *adev, int level,
}
static void gmc_v7_0_get_vm_pte(struct amdgpu_device *adev,
- struct amdgpu_bo_va_mapping *mapping,
+ struct amdgpu_vm *vm,
+ struct amdgpu_bo *bo,
+ uint32_t vm_flags,
uint64_t *flags)
{
*flags &= ~AMDGPU_PTE_EXECUTABLE;
@@ -515,7 +514,7 @@ static void gmc_v7_0_get_vm_pte(struct amdgpu_device *adev,
}
/**
- * gmc_v8_0_set_fault_enable_default - update VM fault handling
+ * gmc_v7_0_set_fault_enable_default - update VM fault handling
*
* @adev: amdgpu_device pointer
* @value: true redirects VM faults to the default page
@@ -574,9 +573,10 @@ static void gmc_v7_0_set_prt(struct amdgpu_device *adev, bool enable)
WREG32(mmVM_PRT_CNTL, tmp);
if (enable) {
- uint32_t low = AMDGPU_VA_RESERVED_SIZE >> AMDGPU_GPU_PAGE_SHIFT;
+ uint32_t low = AMDGPU_VA_RESERVED_BOTTOM >>
+ AMDGPU_GPU_PAGE_SHIFT;
uint32_t high = adev->vm_manager.max_pfn -
- (AMDGPU_VA_RESERVED_SIZE >> AMDGPU_GPU_PAGE_SHIFT);
+ (AMDGPU_VA_RESERVED_TOP >> AMDGPU_GPU_PAGE_SHIFT);
WREG32(mmVM_PRT_APERTURE0_LOW_ADDR, low);
WREG32(mmVM_PRT_APERTURE1_LOW_ADDR, low);
@@ -612,17 +612,14 @@ static void gmc_v7_0_set_prt(struct amdgpu_device *adev, bool enable)
static int gmc_v7_0_gart_enable(struct amdgpu_device *adev)
{
uint64_t table_addr;
- int r, i;
u32 tmp, field;
+ int i;
if (adev->gart.bo == NULL) {
dev_err(adev->dev, "No VRAM object for PCIE GART.\n");
return -EINVAL;
}
- r = amdgpu_gart_table_vram_pin(adev);
- if (r)
- return r;
-
+ amdgpu_gtt_mgr_recover(&adev->mman.gtt_mgr);
table_addr = amdgpu_bo_gpu_offset(adev->gart.bo);
/* Setup TLB control */
@@ -709,9 +706,8 @@ static int gmc_v7_0_gart_enable(struct amdgpu_device *adev)
gmc_v7_0_flush_gpu_tlb(adev, 0, 0, 0);
DRM_INFO("PCIE GART of %uM enabled (table at 0x%016llX).\n",
- (unsigned)(adev->gmc.gart_size >> 20),
+ (unsigned int)(adev->gmc.gart_size >> 20),
(unsigned long long)table_addr);
- adev->gart.ready = true;
return 0;
}
@@ -757,7 +753,6 @@ static void gmc_v7_0_gart_disable(struct amdgpu_device *adev)
tmp = REG_SET_FIELD(tmp, VM_L2_CNTL, ENABLE_L2_CACHE, 0);
WREG32(mmVM_L2_CNTL, tmp);
WREG32(mmVM_L2_CNTL2, 0);
- amdgpu_gart_table_vram_unpin(adev);
}
/**
@@ -772,7 +767,7 @@ static void gmc_v7_0_gart_disable(struct amdgpu_device *adev)
* Print human readable fault information (CIK).
*/
static void gmc_v7_0_vm_decode_fault(struct amdgpu_device *adev, u32 status,
- u32 addr, u32 mc_client, unsigned pasid)
+ u32 addr, u32 mc_client, unsigned int pasid)
{
u32 vmid = REG_GET_FIELD(status, VM_CONTEXT1_PROTECTION_FAULT_STATUS, VMID);
u32 protections = REG_GET_FIELD(status, VM_CONTEXT1_PROTECTION_FAULT_STATUS,
@@ -939,9 +934,9 @@ static int gmc_v7_0_convert_vram_type(int mc_seq_vram_type)
}
}
-static int gmc_v7_0_early_init(void *handle)
+static int gmc_v7_0_early_init(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
gmc_v7_0_set_gmc_funcs(adev);
gmc_v7_0_set_irq_funcs(adev);
@@ -953,13 +948,14 @@ static int gmc_v7_0_early_init(void *handle)
adev->gmc.shared_aperture_end + 1;
adev->gmc.private_aperture_end =
adev->gmc.private_aperture_start + (4ULL << 30) - 1;
+ adev->gmc.noretry_flags = AMDGPU_VM_NORETRY_FLAGS_TF;
return 0;
}
-static int gmc_v7_0_late_init(void *handle)
+static int gmc_v7_0_late_init(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
if (amdgpu_vm_fault_stop != AMDGPU_VM_FAULT_STOP_ALWAYS)
return amdgpu_irq_get(adev, &adev->gmc.vm_fault, 0);
@@ -967,15 +963,16 @@ static int gmc_v7_0_late_init(void *handle)
return 0;
}
-static unsigned gmc_v7_0_get_vbios_fb_size(struct amdgpu_device *adev)
+static unsigned int gmc_v7_0_get_vbios_fb_size(struct amdgpu_device *adev)
{
u32 d1vga_control = RREG32(mmD1VGA_CONTROL);
- unsigned size;
+ unsigned int size;
if (REG_GET_FIELD(d1vga_control, D1VGA_CONTROL, D1VGA_MODE_ENABLE)) {
size = AMDGPU_VBIOS_VGA_ALLOCATION;
} else {
u32 viewport = RREG32(mmVIEWPORT_SIZE);
+
size = (REG_GET_FIELD(viewport, VIEWPORT_SIZE, VIEWPORT_HEIGHT) *
REG_GET_FIELD(viewport, VIEWPORT_SIZE, VIEWPORT_WIDTH) *
4);
@@ -984,17 +981,18 @@ static unsigned gmc_v7_0_get_vbios_fb_size(struct amdgpu_device *adev)
return size;
}
-static int gmc_v7_0_sw_init(void *handle)
+static int gmc_v7_0_sw_init(struct amdgpu_ip_block *ip_block)
{
int r;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
- adev->num_vmhubs = 1;
+ set_bit(AMDGPU_GFXHUB(0), adev->vmhubs_mask);
if (adev->flags & AMD_IS_APU) {
adev->gmc.vram_type = AMDGPU_VRAM_TYPE_UNKNOWN;
} else {
u32 tmp = RREG32(mmMC_SEQ_MISC0);
+
tmp &= MC_SEQ_MISC0__MT__MASK;
adev->gmc.vram_type = gmc_v7_0_convert_vram_type(tmp);
}
@@ -1070,31 +1068,29 @@ static int gmc_v7_0_sw_init(void *handle)
GFP_KERNEL);
if (!adev->gmc.vm_fault_info)
return -ENOMEM;
- atomic_set(&adev->gmc.vm_fault_info_updated, 0);
+ atomic_set_release(&adev->gmc.vm_fault_info_updated, 0);
return 0;
}
-static int gmc_v7_0_sw_fini(void *handle)
+static int gmc_v7_0_sw_fini(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
amdgpu_gem_force_release(adev);
amdgpu_vm_manager_fini(adev);
kfree(adev->gmc.vm_fault_info);
amdgpu_gart_table_vram_free(adev);
amdgpu_bo_fini(adev);
- amdgpu_gart_fini(adev);
- release_firmware(adev->gmc.fw);
- adev->gmc.fw = NULL;
+ amdgpu_ucode_release(&adev->gmc.fw);
return 0;
}
-static int gmc_v7_0_hw_init(void *handle)
+static int gmc_v7_0_hw_init(struct amdgpu_ip_block *ip_block)
{
int r;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
gmc_v7_0_init_golden_registers(adev);
@@ -1112,12 +1108,15 @@ static int gmc_v7_0_hw_init(void *handle)
if (r)
return r;
- return r;
+ if (amdgpu_emu_mode == 1)
+ return amdgpu_gmc_vram_checking(adev);
+
+ return 0;
}
-static int gmc_v7_0_hw_fini(void *handle)
+static int gmc_v7_0_hw_fini(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
amdgpu_irq_put(adev, &adev->gmc.vm_fault, 0);
gmc_v7_0_gart_disable(adev);
@@ -1125,32 +1124,29 @@ static int gmc_v7_0_hw_fini(void *handle)
return 0;
}
-static int gmc_v7_0_suspend(void *handle)
+static int gmc_v7_0_suspend(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
-
- gmc_v7_0_hw_fini(adev);
+ gmc_v7_0_hw_fini(ip_block);
return 0;
}
-static int gmc_v7_0_resume(void *handle)
+static int gmc_v7_0_resume(struct amdgpu_ip_block *ip_block)
{
int r;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
- r = gmc_v7_0_hw_init(adev);
+ r = gmc_v7_0_hw_init(ip_block);
if (r)
return r;
- amdgpu_vmid_reset_all(adev);
+ amdgpu_vmid_reset_all(ip_block->adev);
return 0;
}
-static bool gmc_v7_0_is_idle(void *handle)
+static bool gmc_v7_0_is_idle(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
u32 tmp = RREG32(mmSRBM_STATUS);
if (tmp & (SRBM_STATUS__MCB_BUSY_MASK | SRBM_STATUS__MCB_NON_DISPLAY_BUSY_MASK |
@@ -1160,20 +1156,13 @@ static bool gmc_v7_0_is_idle(void *handle)
return true;
}
-static int gmc_v7_0_wait_for_idle(void *handle)
+static int gmc_v7_0_wait_for_idle(struct amdgpu_ip_block *ip_block)
{
- unsigned i;
- u32 tmp;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ unsigned int i;
+ struct amdgpu_device *adev = ip_block->adev;
for (i = 0; i < adev->usec_timeout; i++) {
- /* read MC_STATUS */
- tmp = RREG32(mmSRBM_STATUS) & (SRBM_STATUS__MCB_BUSY_MASK |
- SRBM_STATUS__MCB_NON_DISPLAY_BUSY_MASK |
- SRBM_STATUS__MCC_BUSY_MASK |
- SRBM_STATUS__MCD_BUSY_MASK |
- SRBM_STATUS__VMC_BUSY_MASK);
- if (!tmp)
+ if (gmc_v7_0_is_idle(ip_block))
return 0;
udelay(1);
}
@@ -1181,9 +1170,9 @@ static int gmc_v7_0_wait_for_idle(void *handle)
}
-static int gmc_v7_0_soft_reset(void *handle)
+static int gmc_v7_0_soft_reset(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
u32 srbm_soft_reset = 0;
u32 tmp = RREG32(mmSRBM_STATUS);
@@ -1200,10 +1189,8 @@ static int gmc_v7_0_soft_reset(void *handle)
if (srbm_soft_reset) {
gmc_v7_0_mc_stop(adev);
- if (gmc_v7_0_wait_for_idle((void *)adev)) {
+ if (gmc_v7_0_wait_for_idle(ip_block))
dev_warn(adev->dev, "Wait for GMC idle timed out !\n");
- }
-
tmp = RREG32(mmSRBM_SOFT_RESET);
tmp |= srbm_soft_reset;
@@ -1229,7 +1216,7 @@ static int gmc_v7_0_soft_reset(void *handle)
static int gmc_v7_0_vm_fault_interrupt_state(struct amdgpu_device *adev,
struct amdgpu_irq_src *src,
- unsigned type,
+ unsigned int type,
enum amdgpu_interrupt_state state)
{
u32 tmp;
@@ -1274,6 +1261,12 @@ static int gmc_v7_0_process_interrupt(struct amdgpu_device *adev,
{
u32 addr, status, mc_client, vmid;
+ /* Delegate to the soft IRQ handler ring */
+ if (adev->irq.ih_soft.enabled && entry->ih != &adev->irq.ih_soft) {
+ amdgpu_irq_delegate(adev, entry, 4);
+ return 1;
+ }
+
addr = RREG32(mmVM_CONTEXT1_PROTECTION_FAULT_ADDR);
status = RREG32(mmVM_CONTEXT1_PROTECTION_FAULT_STATUS);
mc_client = RREG32(mmVM_CONTEXT1_PROTECTION_FAULT_MCCLIENT);
@@ -1283,6 +1276,9 @@ static int gmc_v7_0_process_interrupt(struct amdgpu_device *adev,
if (!addr && !status)
return 0;
+ amdgpu_vm_update_fault_cache(adev, entry->pasid,
+ ((u64)addr) << AMDGPU_GPU_PAGE_SHIFT, status, AMDGPU_GFXHUB(0));
+
if (amdgpu_vm_fault_stop == AMDGPU_VM_FAULT_STOP_FIRST)
gmc_v7_0_set_fault_enable_default(adev, false);
@@ -1300,7 +1296,7 @@ static int gmc_v7_0_process_interrupt(struct amdgpu_device *adev,
vmid = REG_GET_FIELD(status, VM_CONTEXT1_PROTECTION_FAULT_STATUS,
VMID);
if (amdgpu_amdkfd_is_kfd_vmid(adev, vmid)
- && !atomic_read(&adev->gmc.vm_fault_info_updated)) {
+ && !atomic_read_acquire(&adev->gmc.vm_fault_info_updated)) {
struct kfd_vm_fault_info *info = adev->gmc.vm_fault_info;
u32 protections = REG_GET_FIELD(status,
VM_CONTEXT1_PROTECTION_FAULT_STATUS,
@@ -1316,18 +1312,17 @@ static int gmc_v7_0_process_interrupt(struct amdgpu_device *adev,
info->prot_read = protections & 0x8 ? true : false;
info->prot_write = protections & 0x10 ? true : false;
info->prot_exec = protections & 0x20 ? true : false;
- mb();
- atomic_set(&adev->gmc.vm_fault_info_updated, 1);
+ atomic_set_release(&adev->gmc.vm_fault_info_updated, 1);
}
return 0;
}
-static int gmc_v7_0_set_clockgating_state(void *handle,
+static int gmc_v7_0_set_clockgating_state(struct amdgpu_ip_block *ip_block,
enum amd_clockgating_state state)
{
bool gate = false;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
if (state == AMD_CG_STATE_GATE)
gate = true;
@@ -1343,7 +1338,7 @@ static int gmc_v7_0_set_clockgating_state(void *handle,
return 0;
}
-static int gmc_v7_0_set_powergating_state(void *handle,
+static int gmc_v7_0_set_powergating_state(struct amdgpu_ip_block *ip_block,
enum amd_powergating_state state)
{
return 0;
@@ -1393,8 +1388,7 @@ static void gmc_v7_0_set_irq_funcs(struct amdgpu_device *adev)
adev->gmc.vm_fault.funcs = &gmc_v7_0_irq_funcs;
}
-const struct amdgpu_ip_block_version gmc_v7_0_ip_block =
-{
+const struct amdgpu_ip_block_version gmc_v7_0_ip_block = {
.type = AMD_IP_BLOCK_TYPE_GMC,
.major = 7,
.minor = 0,
@@ -1402,8 +1396,7 @@ const struct amdgpu_ip_block_version gmc_v7_0_ip_block =
.funcs = &gmc_v7_0_ip_funcs,
};
-const struct amdgpu_ip_block_version gmc_v7_4_ip_block =
-{
+const struct amdgpu_ip_block_version gmc_v7_4_ip_block = {
.type = AMD_IP_BLOCK_TYPE_GMC,
.major = 7,
.minor = 4,
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c
index 2d832fc23119..6551b60f2584 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c
@@ -53,18 +53,18 @@
static void gmc_v8_0_set_gmc_funcs(struct amdgpu_device *adev);
static void gmc_v8_0_set_irq_funcs(struct amdgpu_device *adev);
-static int gmc_v8_0_wait_for_idle(void *handle);
+static int gmc_v8_0_wait_for_idle(struct amdgpu_ip_block *ip_block);
MODULE_FIRMWARE("amdgpu/tonga_mc.bin");
MODULE_FIRMWARE("amdgpu/polaris11_mc.bin");
MODULE_FIRMWARE("amdgpu/polaris10_mc.bin");
MODULE_FIRMWARE("amdgpu/polaris12_mc.bin");
+MODULE_FIRMWARE("amdgpu/polaris12_32_mc.bin");
MODULE_FIRMWARE("amdgpu/polaris11_k_mc.bin");
MODULE_FIRMWARE("amdgpu/polaris10_k_mc.bin");
MODULE_FIRMWARE("amdgpu/polaris12_k_mc.bin");
-static const u32 golden_settings_tonga_a11[] =
-{
+static const u32 golden_settings_tonga_a11[] = {
mmMC_ARB_WTM_GRPWT_RD, 0x00000003, 0x00000000,
mmMC_HUB_RDREQ_DMIF_LIMIT, 0x0000007f, 0x00000028,
mmMC_HUB_WDP_UMC, 0x00007fb6, 0x00000991,
@@ -74,34 +74,29 @@ static const u32 golden_settings_tonga_a11[] =
mmVM_PRT_APERTURE3_LOW_ADDR, 0x0fffffff, 0x0fffffff,
};
-static const u32 tonga_mgcg_cgcg_init[] =
-{
+static const u32 tonga_mgcg_cgcg_init[] = {
mmMC_MEM_POWER_LS, 0xffffffff, 0x00000104
};
-static const u32 golden_settings_fiji_a10[] =
-{
+static const u32 golden_settings_fiji_a10[] = {
mmVM_PRT_APERTURE0_LOW_ADDR, 0x0fffffff, 0x0fffffff,
mmVM_PRT_APERTURE1_LOW_ADDR, 0x0fffffff, 0x0fffffff,
mmVM_PRT_APERTURE2_LOW_ADDR, 0x0fffffff, 0x0fffffff,
mmVM_PRT_APERTURE3_LOW_ADDR, 0x0fffffff, 0x0fffffff,
};
-static const u32 fiji_mgcg_cgcg_init[] =
-{
+static const u32 fiji_mgcg_cgcg_init[] = {
mmMC_MEM_POWER_LS, 0xffffffff, 0x00000104
};
-static const u32 golden_settings_polaris11_a11[] =
-{
+static const u32 golden_settings_polaris11_a11[] = {
mmVM_PRT_APERTURE0_LOW_ADDR, 0x0fffffff, 0x0fffffff,
mmVM_PRT_APERTURE1_LOW_ADDR, 0x0fffffff, 0x0fffffff,
mmVM_PRT_APERTURE2_LOW_ADDR, 0x0fffffff, 0x0fffffff,
mmVM_PRT_APERTURE3_LOW_ADDR, 0x0fffffff, 0x0fffffff
};
-static const u32 golden_settings_polaris10_a11[] =
-{
+static const u32 golden_settings_polaris10_a11[] = {
mmMC_ARB_WTM_GRPWT_RD, 0x00000003, 0x00000000,
mmVM_PRT_APERTURE0_LOW_ADDR, 0x0fffffff, 0x0fffffff,
mmVM_PRT_APERTURE1_LOW_ADDR, 0x0fffffff, 0x0fffffff,
@@ -109,19 +104,16 @@ static const u32 golden_settings_polaris10_a11[] =
mmVM_PRT_APERTURE3_LOW_ADDR, 0x0fffffff, 0x0fffffff
};
-static const u32 cz_mgcg_cgcg_init[] =
-{
+static const u32 cz_mgcg_cgcg_init[] = {
mmMC_MEM_POWER_LS, 0xffffffff, 0x00000104
};
-static const u32 stoney_mgcg_cgcg_init[] =
-{
+static const u32 stoney_mgcg_cgcg_init[] = {
mmATC_MISC_CG, 0xffffffff, 0x000c0200,
mmMC_MEM_POWER_LS, 0xffffffff, 0x00000104
};
-static const u32 golden_settings_stoney_common[] =
-{
+static const u32 golden_settings_stoney_common[] = {
mmMC_HUB_RDREQ_UVD, MC_HUB_RDREQ_UVD__PRESCALE_MASK, 0x00000004,
mmMC_RD_GRP_OTH, MC_RD_GRP_OTH__UVD_MASK, 0x00600000
};
@@ -178,8 +170,13 @@ static void gmc_v8_0_init_golden_registers(struct amdgpu_device *adev)
static void gmc_v8_0_mc_stop(struct amdgpu_device *adev)
{
u32 blackout;
+ struct amdgpu_ip_block *ip_block;
+
+ ip_block = amdgpu_device_ip_get_ip_block(adev, AMD_IP_BLOCK_TYPE_GMC);
+ if (!ip_block)
+ return;
- gmc_v8_0_wait_for_idle(adev);
+ gmc_v8_0_wait_for_idle(ip_block);
blackout = RREG32(mmMC_SHARED_BLACKOUT_CNTL);
if (REG_GET_FIELD(blackout, MC_SHARED_BLACKOUT_CNTL, BLACKOUT_MODE) != 1) {
@@ -220,7 +217,6 @@ static void gmc_v8_0_mc_resume(struct amdgpu_device *adev)
static int gmc_v8_0_init_microcode(struct amdgpu_device *adev)
{
const char *chip_name;
- char fw_name[30];
int err;
DRM_DEBUG("\n");
@@ -243,30 +239,31 @@ static int gmc_v8_0_init_microcode(struct amdgpu_device *adev)
chip_name = "polaris10";
break;
case CHIP_POLARIS12:
- if (ASICID_IS_P23(adev->pdev->device, adev->pdev->revision))
+ if (ASICID_IS_P23(adev->pdev->device, adev->pdev->revision)) {
chip_name = "polaris12_k";
- else
- chip_name = "polaris12";
+ } else {
+ WREG32(mmMC_SEQ_IO_DEBUG_INDEX, ixMC_IO_DEBUG_UP_159);
+ /* Polaris12 32bit ASIC needs a special MC firmware */
+ if (RREG32(mmMC_SEQ_IO_DEBUG_DATA) == 0x05b4dc40)
+ chip_name = "polaris12_32";
+ else
+ chip_name = "polaris12";
+ }
break;
case CHIP_FIJI:
case CHIP_CARRIZO:
case CHIP_STONEY:
case CHIP_VEGAM:
return 0;
- default: BUG();
+ default:
+ return -EINVAL;
}
- snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mc.bin", chip_name);
- err = request_firmware(&adev->gmc.fw, fw_name, adev->dev);
- if (err)
- goto out;
- err = amdgpu_ucode_validate(adev->gmc.fw);
-
-out:
+ err = amdgpu_ucode_request(adev, &adev->gmc.fw, AMDGPU_UCODE_REQUIRED,
+ "amdgpu/%s_mc.bin", chip_name);
if (err) {
- pr_err("mc: Failed to load firmware \"%s\"\n", fw_name);
- release_firmware(adev->gmc.fw);
- adev->gmc.fw = NULL;
+ pr_err("mc: Failed to load firmware \"%s_mc.bin\"\n", chip_name);
+ amdgpu_ucode_release(&adev->gmc.fw);
}
return err;
}
@@ -420,8 +417,9 @@ static void gmc_v8_0_vram_gtt_location(struct amdgpu_device *adev,
base = RREG32(mmMC_VM_FB_LOCATION) & 0xFFFF;
base <<= 24;
+ amdgpu_gmc_set_agp_default(adev, mc);
amdgpu_gmc_vram_location(adev, mc, base);
- amdgpu_gmc_gart_location(adev, mc);
+ amdgpu_gmc_gart_location(adev, mc, AMDGPU_GART_PLACEMENT_BEST_FIT);
}
/**
@@ -434,6 +432,7 @@ static void gmc_v8_0_vram_gtt_location(struct amdgpu_device *adev,
*/
static void gmc_v8_0_mc_program(struct amdgpu_device *adev)
{
+ struct amdgpu_ip_block *ip_block;
u32 tmp;
int i, j;
@@ -447,9 +446,13 @@ static void gmc_v8_0_mc_program(struct amdgpu_device *adev)
}
WREG32(mmHDP_REG_COHERENCY_FLUSH_CNTL, 0);
- if (gmc_v8_0_wait_for_idle((void *)adev)) {
+ ip_block = amdgpu_device_ip_get_ip_block(adev, AMD_IP_BLOCK_TYPE_GMC);
+ if (!ip_block)
+ return;
+
+ if (gmc_v8_0_wait_for_idle(ip_block))
dev_warn(adev->dev, "Wait for MC idle timedout !\n");
- }
+
if (adev->mode_info.num_crtc) {
/* Lockout access through VGA aperture*/
tmp = RREG32(mmVGA_HDP_CONTROL);
@@ -467,7 +470,7 @@ static void gmc_v8_0_mc_program(struct amdgpu_device *adev)
WREG32(mmMC_VM_SYSTEM_APERTURE_HIGH_ADDR,
adev->gmc.vram_end >> 12);
WREG32(mmMC_VM_SYSTEM_APERTURE_DEFAULT_ADDR,
- adev->vram_scratch.gpu_addr >> 12);
+ adev->mem_scratch.gpu_addr >> 12);
if (amdgpu_sriov_vf(adev)) {
tmp = ((adev->gmc.vram_end >> 24) & 0xFFFF) << 16;
@@ -480,11 +483,10 @@ static void gmc_v8_0_mc_program(struct amdgpu_device *adev)
}
WREG32(mmMC_VM_AGP_BASE, 0);
- WREG32(mmMC_VM_AGP_TOP, 0x0FFFFFFF);
- WREG32(mmMC_VM_AGP_BOT, 0x0FFFFFFF);
- if (gmc_v8_0_wait_for_idle((void *)adev)) {
+ WREG32(mmMC_VM_AGP_TOP, adev->gmc.agp_end >> 22);
+ WREG32(mmMC_VM_AGP_BOT, adev->gmc.agp_start >> 22);
+ if (gmc_v8_0_wait_for_idle(ip_block))
dev_warn(adev->dev, "Wait for MC idle timedout !\n");
- }
WREG32(mmBIF_FB_EN, BIF_FB_EN__FB_READ_EN_MASK | BIF_FB_EN__FB_WRITE_EN_MASK);
@@ -508,19 +510,19 @@ static void gmc_v8_0_mc_program(struct amdgpu_device *adev)
static int gmc_v8_0_mc_init(struct amdgpu_device *adev)
{
int r;
+ u32 tmp;
adev->gmc.vram_width = amdgpu_atombios_get_vram_width(adev);
if (!adev->gmc.vram_width) {
- u32 tmp;
int chansize, numchan;
/* Get VRAM informations */
tmp = RREG32(mmMC_ARB_RAMCFG);
- if (REG_GET_FIELD(tmp, MC_ARB_RAMCFG, CHANSIZE)) {
+ if (REG_GET_FIELD(tmp, MC_ARB_RAMCFG, CHANSIZE))
chansize = 64;
- } else {
+ else
chansize = 32;
- }
+
tmp = RREG32(mmMC_SHARED_CHMAP);
switch (REG_GET_FIELD(tmp, MC_SHARED_CHMAP, NOOFCHAN)) {
case 0:
@@ -555,8 +557,15 @@ static int gmc_v8_0_mc_init(struct amdgpu_device *adev)
adev->gmc.vram_width = numchan * chansize;
}
/* size in MB on si */
- adev->gmc.mc_vram_size = RREG32(mmCONFIG_MEMSIZE) * 1024ULL * 1024ULL;
- adev->gmc.real_vram_size = RREG32(mmCONFIG_MEMSIZE) * 1024ULL * 1024ULL;
+ tmp = RREG32(mmCONFIG_MEMSIZE);
+ /* some boards may have garbage in the upper 16 bits */
+ if (tmp & 0xffff0000) {
+ DRM_INFO("Probable bad vram size: 0x%08x\n", tmp);
+ if (tmp & 0xffff)
+ tmp &= 0xffff;
+ }
+ adev->gmc.mc_vram_size = tmp * 1024ULL * 1024ULL;
+ adev->gmc.real_vram_size = adev->gmc.mc_vram_size;
if (!(adev->flags & AMD_IS_APU)) {
r = amdgpu_device_resize_fb_bar(adev);
@@ -567,16 +576,13 @@ static int gmc_v8_0_mc_init(struct amdgpu_device *adev)
adev->gmc.aper_size = pci_resource_len(adev->pdev, 0);
#ifdef CONFIG_X86_64
- if (adev->flags & AMD_IS_APU) {
+ if ((adev->flags & AMD_IS_APU) && !amdgpu_passthrough(adev)) {
adev->gmc.aper_base = ((u64)RREG32(mmMC_VM_FB_OFFSET)) << 22;
adev->gmc.aper_size = adev->gmc.real_vram_size;
}
#endif
- /* In case the PCI BAR is larger than the actual amount of vram */
adev->gmc.visible_vram_size = adev->gmc.aper_size;
- if (adev->gmc.visible_vram_size > adev->gmc.real_vram_size)
- adev->gmc.visible_vram_size = adev->gmc.real_vram_size;
/* set the gart size */
if (amdgpu_gart_size == -1) {
@@ -599,6 +605,7 @@ static int gmc_v8_0_mc_init(struct amdgpu_device *adev)
adev->gmc.gart_size = (u64)amdgpu_gart_size << 20;
}
+ adev->gmc.gart_size += adev->pm.smu_prv_buffer_size;
gmc_v8_0_vram_gtt_location(adev, &adev->gmc);
return 0;
@@ -611,32 +618,27 @@ static int gmc_v8_0_mc_init(struct amdgpu_device *adev)
* @pasid: pasid to be flush
* @flush_type: type of flush
* @all_hub: flush all hubs
+ * @inst: is used to select which instance of KIQ to use for the invalidation
*
* Flush the TLB for the requested pasid.
*/
-static int gmc_v8_0_flush_gpu_tlb_pasid(struct amdgpu_device *adev,
- uint16_t pasid, uint32_t flush_type,
- bool all_hub)
+static void gmc_v8_0_flush_gpu_tlb_pasid(struct amdgpu_device *adev,
+ uint16_t pasid, uint32_t flush_type,
+ bool all_hub, uint32_t inst)
{
+ u32 mask = 0x0;
int vmid;
- unsigned int tmp;
-
- if (amdgpu_in_reset(adev))
- return -EIO;
for (vmid = 1; vmid < 16; vmid++) {
+ u32 tmp = RREG32(mmATC_VMID0_PASID_MAPPING + vmid);
- tmp = RREG32(mmATC_VMID0_PASID_MAPPING + vmid);
if ((tmp & ATC_VMID0_PASID_MAPPING__VALID_MASK) &&
- (tmp & ATC_VMID0_PASID_MAPPING__PASID_MASK) == pasid) {
- WREG32(mmVM_INVALIDATE_REQUEST, 1 << vmid);
- RREG32(mmVM_INVALIDATE_RESPONSE);
- break;
- }
+ (tmp & ATC_VMID0_PASID_MAPPING__PASID_MASK) == pasid)
+ mask |= 1 << vmid;
}
- return 0;
-
+ WREG32(mmVM_INVALIDATE_REQUEST, mask);
+ RREG32(mmVM_INVALIDATE_RESPONSE);
}
/*
@@ -664,7 +666,7 @@ static void gmc_v8_0_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid,
}
static uint64_t gmc_v8_0_emit_flush_gpu_tlb(struct amdgpu_ring *ring,
- unsigned vmid, uint64_t pd_addr)
+ unsigned int vmid, uint64_t pd_addr)
{
uint32_t reg;
@@ -680,8 +682,8 @@ static uint64_t gmc_v8_0_emit_flush_gpu_tlb(struct amdgpu_ring *ring,
return pd_addr;
}
-static void gmc_v8_0_emit_pasid_mapping(struct amdgpu_ring *ring, unsigned vmid,
- unsigned pasid)
+static void gmc_v8_0_emit_pasid_mapping(struct amdgpu_ring *ring, unsigned int vmid,
+ unsigned int pasid)
{
amdgpu_ring_emit_wreg(ring, mmIH_VMID_0_LUT + vmid, pasid);
}
@@ -714,11 +716,15 @@ static void gmc_v8_0_get_vm_pde(struct amdgpu_device *adev, int level,
}
static void gmc_v8_0_get_vm_pte(struct amdgpu_device *adev,
- struct amdgpu_bo_va_mapping *mapping,
+ struct amdgpu_vm *vm,
+ struct amdgpu_bo *bo,
+ uint32_t vm_flags,
uint64_t *flags)
{
- *flags &= ~AMDGPU_PTE_EXECUTABLE;
- *flags |= mapping->flags & AMDGPU_PTE_EXECUTABLE;
+ if (vm_flags & AMDGPU_VM_PAGE_EXECUTABLE)
+ *flags |= AMDGPU_PTE_EXECUTABLE;
+ else
+ *flags &= ~AMDGPU_PTE_EXECUTABLE;
*flags &= ~AMDGPU_PTE_PRT;
}
@@ -752,11 +758,11 @@ static void gmc_v8_0_set_fault_enable_default(struct amdgpu_device *adev,
}
/**
- * gmc_v8_0_set_prt - set PRT VM fault
+ * gmc_v8_0_set_prt() - set PRT VM fault
*
* @adev: amdgpu_device pointer
* @enable: enable/disable VM fault handling for PRT
-*/
+ */
static void gmc_v8_0_set_prt(struct amdgpu_device *adev, bool enable)
{
u32 tmp;
@@ -784,9 +790,10 @@ static void gmc_v8_0_set_prt(struct amdgpu_device *adev, bool enable)
WREG32(mmVM_PRT_CNTL, tmp);
if (enable) {
- uint32_t low = AMDGPU_VA_RESERVED_SIZE >> AMDGPU_GPU_PAGE_SHIFT;
+ uint32_t low = AMDGPU_VA_RESERVED_BOTTOM >>
+ AMDGPU_GPU_PAGE_SHIFT;
uint32_t high = adev->vm_manager.max_pfn -
- (AMDGPU_VA_RESERVED_SIZE >> AMDGPU_GPU_PAGE_SHIFT);
+ (AMDGPU_VA_RESERVED_TOP >> AMDGPU_GPU_PAGE_SHIFT);
WREG32(mmVM_PRT_APERTURE0_LOW_ADDR, low);
WREG32(mmVM_PRT_APERTURE1_LOW_ADDR, low);
@@ -822,17 +829,14 @@ static void gmc_v8_0_set_prt(struct amdgpu_device *adev, bool enable)
static int gmc_v8_0_gart_enable(struct amdgpu_device *adev)
{
uint64_t table_addr;
- int r, i;
u32 tmp, field;
+ int i;
if (adev->gart.bo == NULL) {
dev_err(adev->dev, "No VRAM object for PCIE GART.\n");
return -EINVAL;
}
- r = amdgpu_gart_table_vram_pin(adev);
- if (r)
- return r;
-
+ amdgpu_gtt_mgr_recover(&adev->mman.gtt_mgr);
table_addr = amdgpu_bo_gpu_offset(adev->gart.bo);
/* Setup TLB control */
@@ -936,9 +940,8 @@ static int gmc_v8_0_gart_enable(struct amdgpu_device *adev)
gmc_v8_0_flush_gpu_tlb(adev, 0, 0, 0);
DRM_INFO("PCIE GART of %uM enabled (table at 0x%016llX).\n",
- (unsigned)(adev->gmc.gart_size >> 20),
+ (unsigned int)(adev->gmc.gart_size >> 20),
(unsigned long long)table_addr);
- adev->gart.ready = true;
return 0;
}
@@ -984,7 +987,6 @@ static void gmc_v8_0_gart_disable(struct amdgpu_device *adev)
tmp = REG_SET_FIELD(tmp, VM_L2_CNTL, ENABLE_L2_CACHE, 0);
WREG32(mmVM_L2_CNTL, tmp);
WREG32(mmVM_L2_CNTL2, 0);
- amdgpu_gart_table_vram_unpin(adev);
}
/**
@@ -999,7 +1001,7 @@ static void gmc_v8_0_gart_disable(struct amdgpu_device *adev)
* Print human readable fault information (VI).
*/
static void gmc_v8_0_vm_decode_fault(struct amdgpu_device *adev, u32 status,
- u32 addr, u32 mc_client, unsigned pasid)
+ u32 addr, u32 mc_client, unsigned int pasid)
{
u32 vmid = REG_GET_FIELD(status, VM_CONTEXT1_PROTECTION_FAULT_STATUS, VMID);
u32 protections = REG_GET_FIELD(status, VM_CONTEXT1_PROTECTION_FAULT_STATUS,
@@ -1040,9 +1042,9 @@ static int gmc_v8_0_convert_vram_type(int mc_seq_vram_type)
}
}
-static int gmc_v8_0_early_init(void *handle)
+static int gmc_v8_0_early_init(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
gmc_v8_0_set_gmc_funcs(adev);
gmc_v8_0_set_irq_funcs(adev);
@@ -1054,13 +1056,14 @@ static int gmc_v8_0_early_init(void *handle)
adev->gmc.shared_aperture_end + 1;
adev->gmc.private_aperture_end =
adev->gmc.private_aperture_start + (4ULL << 30) - 1;
+ adev->gmc.noretry_flags = AMDGPU_VM_NORETRY_FLAGS_TF;
return 0;
}
-static int gmc_v8_0_late_init(void *handle)
+static int gmc_v8_0_late_init(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
if (amdgpu_vm_fault_stop != AMDGPU_VM_FAULT_STOP_ALWAYS)
return amdgpu_irq_get(adev, &adev->gmc.vm_fault, 0);
@@ -1068,15 +1071,16 @@ static int gmc_v8_0_late_init(void *handle)
return 0;
}
-static unsigned gmc_v8_0_get_vbios_fb_size(struct amdgpu_device *adev)
+static unsigned int gmc_v8_0_get_vbios_fb_size(struct amdgpu_device *adev)
{
u32 d1vga_control = RREG32(mmD1VGA_CONTROL);
- unsigned size;
+ unsigned int size;
if (REG_GET_FIELD(d1vga_control, D1VGA_CONTROL, D1VGA_MODE_ENABLE)) {
size = AMDGPU_VBIOS_VGA_ALLOCATION;
} else {
u32 viewport = RREG32(mmVIEWPORT_SIZE);
+
size = (REG_GET_FIELD(viewport, VIEWPORT_SIZE, VIEWPORT_HEIGHT) *
REG_GET_FIELD(viewport, VIEWPORT_SIZE, VIEWPORT_WIDTH) *
4);
@@ -1087,12 +1091,12 @@ static unsigned gmc_v8_0_get_vbios_fb_size(struct amdgpu_device *adev)
#define mmMC_SEQ_MISC0_FIJI 0xA71
-static int gmc_v8_0_sw_init(void *handle)
+static int gmc_v8_0_sw_init(struct amdgpu_ip_block *ip_block)
{
int r;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
- adev->num_vmhubs = 1;
+ set_bit(AMDGPU_GFXHUB(0), adev->vmhubs_mask);
if (adev->flags & AMD_IS_APU) {
adev->gmc.vram_type = AMDGPU_VRAM_TYPE_UNKNOWN;
@@ -1179,31 +1183,29 @@ static int gmc_v8_0_sw_init(void *handle)
GFP_KERNEL);
if (!adev->gmc.vm_fault_info)
return -ENOMEM;
- atomic_set(&adev->gmc.vm_fault_info_updated, 0);
+ atomic_set_release(&adev->gmc.vm_fault_info_updated, 0);
return 0;
}
-static int gmc_v8_0_sw_fini(void *handle)
+static int gmc_v8_0_sw_fini(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
amdgpu_gem_force_release(adev);
amdgpu_vm_manager_fini(adev);
kfree(adev->gmc.vm_fault_info);
amdgpu_gart_table_vram_free(adev);
amdgpu_bo_fini(adev);
- amdgpu_gart_fini(adev);
- release_firmware(adev->gmc.fw);
- adev->gmc.fw = NULL;
+ amdgpu_ucode_release(&adev->gmc.fw);
return 0;
}
-static int gmc_v8_0_hw_init(void *handle)
+static int gmc_v8_0_hw_init(struct amdgpu_ip_block *ip_block)
{
int r;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
gmc_v8_0_init_golden_registers(adev);
@@ -1229,12 +1231,15 @@ static int gmc_v8_0_hw_init(void *handle)
if (r)
return r;
- return r;
+ if (amdgpu_emu_mode == 1)
+ return amdgpu_gmc_vram_checking(adev);
+
+ return 0;
}
-static int gmc_v8_0_hw_fini(void *handle)
+static int gmc_v8_0_hw_fini(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
amdgpu_irq_put(adev, &adev->gmc.vm_fault, 0);
gmc_v8_0_gart_disable(adev);
@@ -1242,32 +1247,29 @@ static int gmc_v8_0_hw_fini(void *handle)
return 0;
}
-static int gmc_v8_0_suspend(void *handle)
+static int gmc_v8_0_suspend(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
-
- gmc_v8_0_hw_fini(adev);
+ gmc_v8_0_hw_fini(ip_block);
return 0;
}
-static int gmc_v8_0_resume(void *handle)
+static int gmc_v8_0_resume(struct amdgpu_ip_block *ip_block)
{
int r;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
- r = gmc_v8_0_hw_init(adev);
+ r = gmc_v8_0_hw_init(ip_block);
if (r)
return r;
- amdgpu_vmid_reset_all(adev);
+ amdgpu_vmid_reset_all(ip_block->adev);
return 0;
}
-static bool gmc_v8_0_is_idle(void *handle)
+static bool gmc_v8_0_is_idle(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
u32 tmp = RREG32(mmSRBM_STATUS);
if (tmp & (SRBM_STATUS__MCB_BUSY_MASK | SRBM_STATUS__MCB_NON_DISPLAY_BUSY_MASK |
@@ -1277,11 +1279,11 @@ static bool gmc_v8_0_is_idle(void *handle)
return true;
}
-static int gmc_v8_0_wait_for_idle(void *handle)
+static int gmc_v8_0_wait_for_idle(struct amdgpu_ip_block *ip_block)
{
- unsigned i;
+ unsigned int i;
u32 tmp;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
for (i = 0; i < adev->usec_timeout; i++) {
/* read MC_STATUS */
@@ -1299,10 +1301,10 @@ static int gmc_v8_0_wait_for_idle(void *handle)
}
-static bool gmc_v8_0_check_soft_reset(void *handle)
+static bool gmc_v8_0_check_soft_reset(struct amdgpu_ip_block *ip_block)
{
u32 srbm_soft_reset = 0;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
u32 tmp = RREG32(mmSRBM_STATUS);
if (tmp & SRBM_STATUS__VMC_BUSY_MASK)
@@ -1315,33 +1317,34 @@ static bool gmc_v8_0_check_soft_reset(void *handle)
srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset,
SRBM_SOFT_RESET, SOFT_RESET_MC, 1);
}
+
if (srbm_soft_reset) {
adev->gmc.srbm_soft_reset = srbm_soft_reset;
return true;
- } else {
- adev->gmc.srbm_soft_reset = 0;
- return false;
}
+
+ adev->gmc.srbm_soft_reset = 0;
+
+ return false;
}
-static int gmc_v8_0_pre_soft_reset(void *handle)
+static int gmc_v8_0_pre_soft_reset(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
if (!adev->gmc.srbm_soft_reset)
return 0;
gmc_v8_0_mc_stop(adev);
- if (gmc_v8_0_wait_for_idle(adev)) {
+ if (gmc_v8_0_wait_for_idle(ip_block))
dev_warn(adev->dev, "Wait for GMC idle timed out !\n");
- }
return 0;
}
-static int gmc_v8_0_soft_reset(void *handle)
+static int gmc_v8_0_soft_reset(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
u32 srbm_soft_reset;
if (!adev->gmc.srbm_soft_reset)
@@ -1370,9 +1373,9 @@ static int gmc_v8_0_soft_reset(void *handle)
return 0;
}
-static int gmc_v8_0_post_soft_reset(void *handle)
+static int gmc_v8_0_post_soft_reset(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
if (!adev->gmc.srbm_soft_reset)
return 0;
@@ -1383,7 +1386,7 @@ static int gmc_v8_0_post_soft_reset(void *handle)
static int gmc_v8_0_vm_fault_interrupt_state(struct amdgpu_device *adev,
struct amdgpu_irq_src *src,
- unsigned type,
+ unsigned int type,
enum amdgpu_interrupt_state state)
{
u32 tmp;
@@ -1436,6 +1439,12 @@ static int gmc_v8_0_process_interrupt(struct amdgpu_device *adev,
return 0;
}
+ /* Delegate to the soft IRQ handler ring */
+ if (adev->irq.ih_soft.enabled && entry->ih != &adev->irq.ih_soft) {
+ amdgpu_irq_delegate(adev, entry, 4);
+ return 1;
+ }
+
addr = RREG32(mmVM_CONTEXT1_PROTECTION_FAULT_ADDR);
status = RREG32(mmVM_CONTEXT1_PROTECTION_FAULT_STATUS);
mc_client = RREG32(mmVM_CONTEXT1_PROTECTION_FAULT_MCCLIENT);
@@ -1445,22 +1454,29 @@ static int gmc_v8_0_process_interrupt(struct amdgpu_device *adev,
if (!addr && !status)
return 0;
+ amdgpu_vm_update_fault_cache(adev, entry->pasid,
+ ((u64)addr) << AMDGPU_GPU_PAGE_SHIFT, status, AMDGPU_GFXHUB(0));
+
if (amdgpu_vm_fault_stop == AMDGPU_VM_FAULT_STOP_FIRST)
gmc_v8_0_set_fault_enable_default(adev, false);
if (printk_ratelimit()) {
- struct amdgpu_task_info task_info;
+ struct amdgpu_task_info *task_info;
+
+ dev_err(adev->dev, "GPU fault detected: %d 0x%08x\n",
+ entry->src_id, entry->src_data[0]);
- memset(&task_info, 0, sizeof(struct amdgpu_task_info));
- amdgpu_vm_get_task_info(adev, entry->pasid, &task_info);
+ task_info = amdgpu_vm_get_task_info_pasid(adev, entry->pasid);
+ if (task_info) {
+ amdgpu_vm_print_task_info(adev, task_info);
+ amdgpu_vm_put_task_info(task_info);
+ }
- dev_err(adev->dev, "GPU fault detected: %d 0x%08x for process %s pid %d thread %s pid %d\n",
- entry->src_id, entry->src_data[0], task_info.process_name,
- task_info.tgid, task_info.task_name, task_info.pid);
dev_err(adev->dev, " VM_CONTEXT1_PROTECTION_FAULT_ADDR 0x%08X\n",
- addr);
+ addr);
dev_err(adev->dev, " VM_CONTEXT1_PROTECTION_FAULT_STATUS 0x%08X\n",
status);
+
gmc_v8_0_vm_decode_fault(adev, status, addr, mc_client,
entry->pasid);
}
@@ -1468,7 +1484,7 @@ static int gmc_v8_0_process_interrupt(struct amdgpu_device *adev,
vmid = REG_GET_FIELD(status, VM_CONTEXT1_PROTECTION_FAULT_STATUS,
VMID);
if (amdgpu_amdkfd_is_kfd_vmid(adev, vmid)
- && !atomic_read(&adev->gmc.vm_fault_info_updated)) {
+ && !atomic_read_acquire(&adev->gmc.vm_fault_info_updated)) {
struct kfd_vm_fault_info *info = adev->gmc.vm_fault_info;
u32 protections = REG_GET_FIELD(status,
VM_CONTEXT1_PROTECTION_FAULT_STATUS,
@@ -1484,8 +1500,7 @@ static int gmc_v8_0_process_interrupt(struct amdgpu_device *adev,
info->prot_read = protections & 0x8 ? true : false;
info->prot_write = protections & 0x10 ? true : false;
info->prot_exec = protections & 0x20 ? true : false;
- mb();
- atomic_set(&adev->gmc.vm_fault_info_updated, 1);
+ atomic_set_release(&adev->gmc.vm_fault_info_updated, 1);
}
return 0;
@@ -1651,10 +1666,10 @@ static void fiji_update_mc_light_sleep(struct amdgpu_device *adev,
}
}
-static int gmc_v8_0_set_clockgating_state(void *handle,
+static int gmc_v8_0_set_clockgating_state(struct amdgpu_ip_block *ip_block,
enum amd_clockgating_state state)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
if (amdgpu_sriov_vf(adev))
return 0;
@@ -1672,15 +1687,15 @@ static int gmc_v8_0_set_clockgating_state(void *handle,
return 0;
}
-static int gmc_v8_0_set_powergating_state(void *handle,
+static int gmc_v8_0_set_powergating_state(struct amdgpu_ip_block *ip_block,
enum amd_powergating_state state)
{
return 0;
}
-static void gmc_v8_0_get_clockgating_state(void *handle, u32 *flags)
+static void gmc_v8_0_get_clockgating_state(struct amdgpu_ip_block *ip_block, u64 *flags)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
int data;
if (amdgpu_sriov_vf(adev))
@@ -1744,8 +1759,7 @@ static void gmc_v8_0_set_irq_funcs(struct amdgpu_device *adev)
adev->gmc.vm_fault.funcs = &gmc_v8_0_irq_funcs;
}
-const struct amdgpu_ip_block_version gmc_v8_0_ip_block =
-{
+const struct amdgpu_ip_block_version gmc_v8_0_ip_block = {
.type = AMD_IP_BLOCK_TYPE_GMC,
.major = 8,
.minor = 0,
@@ -1753,8 +1767,7 @@ const struct amdgpu_ip_block_version gmc_v8_0_ip_block =
.funcs = &gmc_v8_0_ip_funcs,
};
-const struct amdgpu_ip_block_version gmc_v8_1_ip_block =
-{
+const struct amdgpu_ip_block_version gmc_v8_1_ip_block = {
.type = AMD_IP_BLOCK_TYPE_GMC,
.major = 8,
.minor = 1,
@@ -1762,8 +1775,7 @@ const struct amdgpu_ip_block_version gmc_v8_1_ip_block =
.funcs = &gmc_v8_0_ip_funcs,
};
-const struct amdgpu_ip_block_version gmc_v8_5_ip_block =
-{
+const struct amdgpu_ip_block_version gmc_v8_5_ip_block = {
.type = AMD_IP_BLOCK_TYPE_GMC,
.major = 8,
.minor = 5,
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
index 3686e777c76c..8ad7519f7b58 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
@@ -49,9 +49,16 @@
#include "mmhub_v1_0.h"
#include "athub_v1_0.h"
#include "gfxhub_v1_1.h"
+#include "gfxhub_v1_2.h"
#include "mmhub_v9_4.h"
+#include "mmhub_v1_7.h"
+#include "mmhub_v1_8.h"
#include "umc_v6_1.h"
#include "umc_v6_0.h"
+#include "umc_v6_7.h"
+#include "umc_v12_0.h"
+#include "hdp_v4_0.h"
+#include "mca_v3_0.h"
#include "ivsrcid/vmc/irqsrcs_vmc_1_0.h"
@@ -68,8 +75,10 @@
#define mmDCHUBBUB_SDPIF_MMIO_CNTRL_0 0x049d
#define mmDCHUBBUB_SDPIF_MMIO_CNTRL_0_BASE_IDX 2
+#define mmHUBP0_DCSURF_PRI_VIEWPORT_DIMENSION_DCN2 0x05ea
+#define mmHUBP0_DCSURF_PRI_VIEWPORT_DIMENSION_DCN2_BASE_IDX 2
-static const char *gfxhub_client_ids[] = {
+static const char * const gfxhub_client_ids[] = {
"CB",
"DB",
"IA",
@@ -279,14 +288,53 @@ static const char *mmhub_client_ids_arcturus[][2] = {
[384][1] = "OSS",
};
-static const struct soc15_reg_golden golden_settings_mmhub_1_0_0[] =
-{
+static const char *mmhub_client_ids_aldebaran[][2] = {
+ [2][0] = "MP1",
+ [3][0] = "MP0",
+ [32+1][0] = "DBGU_IO0",
+ [32+2][0] = "DBGU_IO2",
+ [32+4][0] = "MPIO",
+ [96+11][0] = "JPEG0",
+ [96+12][0] = "VCN0",
+ [96+13][0] = "VCNU0",
+ [128+11][0] = "JPEG1",
+ [128+12][0] = "VCN1",
+ [128+13][0] = "VCNU1",
+ [160+1][0] = "XDP",
+ [160+14][0] = "HDP",
+ [256+0][0] = "SDMA0",
+ [256+1][0] = "SDMA1",
+ [256+2][0] = "SDMA2",
+ [256+3][0] = "SDMA3",
+ [256+4][0] = "SDMA4",
+ [384+0][0] = "OSS",
+ [2][1] = "MP1",
+ [3][1] = "MP0",
+ [32+1][1] = "DBGU_IO0",
+ [32+2][1] = "DBGU_IO2",
+ [32+4][1] = "MPIO",
+ [96+11][1] = "JPEG0",
+ [96+12][1] = "VCN0",
+ [96+13][1] = "VCNU0",
+ [128+11][1] = "JPEG1",
+ [128+12][1] = "VCN1",
+ [128+13][1] = "VCNU1",
+ [160+1][1] = "XDP",
+ [160+14][1] = "HDP",
+ [256+0][1] = "SDMA0",
+ [256+1][1] = "SDMA1",
+ [256+2][1] = "SDMA2",
+ [256+3][1] = "SDMA3",
+ [256+4][1] = "SDMA4",
+ [384+0][1] = "OSS",
+};
+
+static const struct soc15_reg_golden golden_settings_mmhub_1_0_0[] = {
SOC15_REG_GOLDEN_VALUE(MMHUB, 0, mmDAGB1_WRCLI2, 0x00000007, 0xfe5fe0fa),
SOC15_REG_GOLDEN_VALUE(MMHUB, 0, mmMMEA1_DRAM_WR_CLI2GRP_MAP0, 0x00000030, 0x55555565)
};
-static const struct soc15_reg_golden golden_settings_athub_1_0_0[] =
-{
+static const struct soc15_reg_golden golden_settings_athub_1_0_0[] = {
SOC15_REG_GOLDEN_VALUE(ATHUB, 0, mmRPB_ARB_CNTL, 0x0000ff00, 0x00000800),
SOC15_REG_GOLDEN_VALUE(ATHUB, 0, mmRPB_ARB_CNTL2, 0x00ff00ff, 0x00080008)
};
@@ -363,13 +411,14 @@ static const uint32_t ecc_umc_mcumc_ctrl_mask_addrs[] = {
static int gmc_v9_0_ecc_interrupt_state(struct amdgpu_device *adev,
struct amdgpu_irq_src *src,
- unsigned type,
+ unsigned int type,
enum amdgpu_interrupt_state state)
{
u32 bits, i, tmp, reg;
/* Devices newer then VEGA10/12 shall have these programming
- sequences performed by PSP BL */
+ * sequences performed by PSP BL
+ */
if (adev->asic_type >= CHIP_VEGA20)
return 0;
@@ -413,7 +462,7 @@ static int gmc_v9_0_ecc_interrupt_state(struct amdgpu_device *adev,
static int gmc_v9_0_vm_fault_interrupt_state(struct amdgpu_device *adev,
struct amdgpu_irq_src *src,
- unsigned type,
+ unsigned int type,
enum amdgpu_interrupt_state state)
{
struct amdgpu_vmhub *hub;
@@ -429,24 +478,58 @@ static int gmc_v9_0_vm_fault_interrupt_state(struct amdgpu_device *adev,
switch (state) {
case AMDGPU_IRQ_STATE_DISABLE:
- for (j = 0; j < adev->num_vmhubs; j++) {
+ for_each_set_bit(j, adev->vmhubs_mask, AMDGPU_MAX_VMHUBS) {
hub = &adev->vmhub[j];
for (i = 0; i < 16; i++) {
reg = hub->vm_context0_cntl + i;
- tmp = RREG32(reg);
+
+ /* This works because this interrupt is only
+ * enabled at init/resume and disabled in
+ * fini/suspend, so the overall state doesn't
+ * change over the course of suspend/resume.
+ */
+ if (adev->in_s0ix && (j == AMDGPU_GFXHUB(0)))
+ continue;
+
+ if (j >= AMDGPU_MMHUB0(0))
+ tmp = RREG32_SOC15_IP(MMHUB, reg);
+ else
+ tmp = RREG32_XCC(reg, j);
+
tmp &= ~bits;
- WREG32(reg, tmp);
+
+ if (j >= AMDGPU_MMHUB0(0))
+ WREG32_SOC15_IP(MMHUB, reg, tmp);
+ else
+ WREG32_XCC(reg, tmp, j);
}
}
break;
case AMDGPU_IRQ_STATE_ENABLE:
- for (j = 0; j < adev->num_vmhubs; j++) {
+ for_each_set_bit(j, adev->vmhubs_mask, AMDGPU_MAX_VMHUBS) {
hub = &adev->vmhub[j];
for (i = 0; i < 16; i++) {
reg = hub->vm_context0_cntl + i;
- tmp = RREG32(reg);
+
+ /* This works because this interrupt is only
+ * enabled at init/resume and disabled in
+ * fini/suspend, so the overall state doesn't
+ * change over the course of suspend/resume.
+ */
+ if (adev->in_s0ix && (j == AMDGPU_GFXHUB(0)))
+ continue;
+
+ if (j >= AMDGPU_MMHUB0(0))
+ tmp = RREG32_SOC15_IP(MMHUB, reg);
+ else
+ tmp = RREG32_XCC(reg, j);
+
tmp |= bits;
- WREG32(reg, tmp);
+
+ if (j >= AMDGPU_MMHUB0(0))
+ WREG32_SOC15_IP(MMHUB, reg, tmp);
+ else
+ WREG32_XCC(reg, tmp, j);
}
}
break;
@@ -461,67 +544,110 @@ static int gmc_v9_0_process_interrupt(struct amdgpu_device *adev,
struct amdgpu_irq_src *source,
struct amdgpu_iv_entry *entry)
{
- bool retry_fault = !!(entry->src_data[1] & 0x80);
- uint32_t status = 0, cid = 0, rw = 0;
- struct amdgpu_task_info task_info;
+ bool retry_fault = !!(entry->src_data[1] &
+ AMDGPU_GMC9_FAULT_SOURCE_DATA_RETRY);
+ bool write_fault = !!(entry->src_data[1] &
+ AMDGPU_GMC9_FAULT_SOURCE_DATA_WRITE);
+ uint32_t status = 0, cid = 0, rw = 0, fed = 0;
+ struct amdgpu_task_info *task_info;
struct amdgpu_vmhub *hub;
const char *mmhub_cid;
const char *hub_name;
+ unsigned int vmhub;
u64 addr;
+ uint32_t cam_index = 0;
+ int ret, xcc_id = 0;
+ uint32_t node_id;
+
+ node_id = entry->node_id;
addr = (u64)entry->src_data[0] << 12;
addr |= ((u64)entry->src_data[1] & 0xf) << 44;
+ if (entry->client_id == SOC15_IH_CLIENTID_VMC) {
+ hub_name = "mmhub0";
+ vmhub = AMDGPU_MMHUB0(node_id / 4);
+ } else if (entry->client_id == SOC15_IH_CLIENTID_VMC1) {
+ hub_name = "mmhub1";
+ vmhub = AMDGPU_MMHUB1(0);
+ } else {
+ hub_name = "gfxhub0";
+ if (adev->gfx.funcs->ih_node_to_logical_xcc) {
+ xcc_id = adev->gfx.funcs->ih_node_to_logical_xcc(adev,
+ node_id);
+ if (xcc_id < 0)
+ xcc_id = 0;
+ }
+ vmhub = xcc_id;
+ }
+ hub = &adev->vmhub[vmhub];
+
if (retry_fault) {
- /* Returning 1 here also prevents sending the IV to the KFD */
+ if (adev->irq.retry_cam_enabled) {
+ /* Delegate it to a different ring if the hardware hasn't
+ * already done it.
+ */
+ if (entry->ih == &adev->irq.ih) {
+ amdgpu_irq_delegate(adev, entry, 8);
+ return 1;
+ }
+
+ cam_index = entry->src_data[2] & 0x3ff;
- /* Process it onyl if it's the first fault for this address */
- if (entry->ih != &adev->irq.ih_soft &&
- amdgpu_gmc_filter_faults(adev, addr, entry->pasid,
+ ret = amdgpu_vm_handle_fault(adev, entry->pasid, entry->vmid, node_id,
+ addr, entry->timestamp, write_fault);
+ WDOORBELL32(adev->irq.retry_cam_doorbell_index, cam_index);
+ if (ret)
+ return 1;
+ } else {
+ /* Process it onyl if it's the first fault for this address */
+ if (entry->ih != &adev->irq.ih_soft &&
+ amdgpu_gmc_filter_faults(adev, entry->ih, addr, entry->pasid,
entry->timestamp))
- return 1;
+ return 1;
+
+ /* Delegate it to a different ring if the hardware hasn't
+ * already done it.
+ */
+ if (entry->ih == &adev->irq.ih) {
+ amdgpu_irq_delegate(adev, entry, 8);
+ return 1;
+ }
- /* Delegate it to a different ring if the hardware hasn't
- * already done it.
- */
- if (in_interrupt()) {
- amdgpu_irq_delegate(adev, entry, 8);
- return 1;
+ /* Try to handle the recoverable page faults by filling page
+ * tables
+ */
+ if (amdgpu_vm_handle_fault(adev, entry->pasid, entry->vmid, node_id,
+ addr, entry->timestamp, write_fault))
+ return 1;
}
-
- /* Try to handle the recoverable page faults by filling page
- * tables
- */
- if (amdgpu_vm_handle_fault(adev, entry->pasid, addr))
- return 1;
}
+ if (kgd2kfd_vmfault_fast_path(adev, entry, retry_fault))
+ return 1;
+
if (!printk_ratelimit())
return 0;
- if (entry->client_id == SOC15_IH_CLIENTID_VMC) {
- hub_name = "mmhub0";
- hub = &adev->vmhub[AMDGPU_MMHUB_0];
- } else if (entry->client_id == SOC15_IH_CLIENTID_VMC1) {
- hub_name = "mmhub1";
- hub = &adev->vmhub[AMDGPU_MMHUB_1];
- } else {
- hub_name = "gfxhub0";
- hub = &adev->vmhub[AMDGPU_GFXHUB_0];
+ dev_err(adev->dev,
+ "[%s] %s page fault (src_id:%u ring:%u vmid:%u pasid:%u)\n", hub_name,
+ retry_fault ? "retry" : "no-retry",
+ entry->src_id, entry->ring_id, entry->vmid, entry->pasid);
+
+ task_info = amdgpu_vm_get_task_info_pasid(adev, entry->pasid);
+ if (task_info) {
+ amdgpu_vm_print_task_info(adev, task_info);
+ amdgpu_vm_put_task_info(task_info);
}
- memset(&task_info, 0, sizeof(struct amdgpu_task_info));
- amdgpu_vm_get_task_info(adev, entry->pasid, &task_info);
+ dev_err(adev->dev, " in page starting at address 0x%016llx from IH client 0x%x (%s)\n",
+ addr, entry->client_id,
+ soc15_ih_clientid_name[entry->client_id]);
- dev_err(adev->dev,
- "[%s] %s page fault (src_id:%u ring:%u vmid:%u "
- "pasid:%u, for process %s pid %d thread %s pid %d)\n",
- hub_name, retry_fault ? "retry" : "no-retry",
- entry->src_id, entry->ring_id, entry->vmid,
- entry->pasid, task_info.process_name, task_info.tgid,
- task_info.task_name, task_info.pid);
- dev_err(adev->dev, " in page starting at address 0x%012llx from client %d\n",
- addr, entry->client_id);
+ if (amdgpu_is_multi_aid(adev))
+ dev_err(adev->dev, " cookie node_id %d fault from die %s%d%s\n",
+ node_id, node_id % 4 == 3 ? "RSV" : "AID", node_id / 4,
+ node_id % 4 == 1 ? ".XCD0" : node_id % 4 == 2 ? ".XCD1" : "");
if (amdgpu_sriov_vf(adev))
return 0;
@@ -531,43 +657,65 @@ static int gmc_v9_0_process_interrupt(struct amdgpu_device *adev,
* be updated to avoid reading an incorrect value due to
* the new fast GRBM interface.
*/
- if (entry->vmid_src == AMDGPU_GFXHUB_0)
+ if ((entry->vmid_src == AMDGPU_GFXHUB(0)) &&
+ (amdgpu_ip_version(adev, GC_HWIP, 0) < IP_VERSION(9, 4, 2)))
RREG32(hub->vm_l2_pro_fault_status);
status = RREG32(hub->vm_l2_pro_fault_status);
cid = REG_GET_FIELD(status, VM_L2_PROTECTION_FAULT_STATUS, CID);
rw = REG_GET_FIELD(status, VM_L2_PROTECTION_FAULT_STATUS, RW);
- WREG32_P(hub->vm_l2_pro_fault_cntl, 1, ~1);
+ fed = REG_GET_FIELD(status, VM_L2_PROTECTION_FAULT_STATUS, FED);
+
+ /* for fed error, kfd will handle it, return directly */
+ if (fed && amdgpu_ras_is_poison_mode_supported(adev) &&
+ (amdgpu_ip_version(adev, GC_HWIP, 0) >= IP_VERSION(9, 4, 2)))
+ return 0;
+
+ /* Only print L2 fault status if the status register could be read and
+ * contains useful information
+ */
+ if (!status)
+ return 0;
+
+ if (!amdgpu_sriov_vf(adev))
+ WREG32_P(hub->vm_l2_pro_fault_cntl, 1, ~1);
+ amdgpu_vm_update_fault_cache(adev, entry->pasid, addr, status, vmhub);
dev_err(adev->dev,
"VM_L2_PROTECTION_FAULT_STATUS:0x%08X\n",
status);
- if (hub == &adev->vmhub[AMDGPU_GFXHUB_0]) {
+ if (entry->vmid_src == AMDGPU_GFXHUB(0)) {
dev_err(adev->dev, "\t Faulty UTCL2 client ID: %s (0x%x)\n",
cid >= ARRAY_SIZE(gfxhub_client_ids) ? "unknown" :
gfxhub_client_ids[cid],
cid);
} else {
- switch (adev->asic_type) {
- case CHIP_VEGA10:
+ switch (amdgpu_ip_version(adev, MMHUB_HWIP, 0)) {
+ case IP_VERSION(9, 0, 0):
mmhub_cid = mmhub_client_ids_vega10[cid][rw];
break;
- case CHIP_VEGA12:
+ case IP_VERSION(9, 3, 0):
mmhub_cid = mmhub_client_ids_vega12[cid][rw];
break;
- case CHIP_VEGA20:
+ case IP_VERSION(9, 4, 0):
mmhub_cid = mmhub_client_ids_vega20[cid][rw];
break;
- case CHIP_ARCTURUS:
+ case IP_VERSION(9, 4, 1):
mmhub_cid = mmhub_client_ids_arcturus[cid][rw];
break;
- case CHIP_RAVEN:
+ case IP_VERSION(9, 1, 0):
+ case IP_VERSION(9, 2, 0):
mmhub_cid = mmhub_client_ids_raven[cid][rw];
break;
- case CHIP_RENOIR:
+ case IP_VERSION(1, 5, 0):
+ case IP_VERSION(2, 4, 0):
mmhub_cid = mmhub_client_ids_renoir[cid][rw];
break;
+ case IP_VERSION(1, 8, 0):
+ case IP_VERSION(9, 4, 2):
+ mmhub_cid = mmhub_client_ids_aldebaran[cid][rw];
+ break;
default:
mmhub_cid = NULL;
break;
@@ -607,7 +755,9 @@ static void gmc_v9_0_set_irq_funcs(struct amdgpu_device *adev)
adev->gmc.vm_fault.num_types = 1;
adev->gmc.vm_fault.funcs = &gmc_v9_0_irq_funcs;
- if (!amdgpu_sriov_vf(adev)) {
+ if (!amdgpu_sriov_vf(adev) &&
+ !adev->gmc.xgmi.connected_to_cpu &&
+ !adev->gmc.is_app_apu) {
adev->gmc.ecc_irq.num_types = 1;
adev->gmc.ecc_irq.funcs = &gmc_v9_0_ecc_funcs;
}
@@ -642,8 +792,12 @@ static uint32_t gmc_v9_0_get_invalidate_req(unsigned int vmid,
static bool gmc_v9_0_use_invalidate_semaphore(struct amdgpu_device *adev,
uint32_t vmhub)
{
- return ((vmhub == AMDGPU_MMHUB_0 ||
- vmhub == AMDGPU_MMHUB_1) &&
+ if (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 2) ||
+ amdgpu_is_multi_aid(adev))
+ return false;
+
+ return ((vmhub == AMDGPU_MMHUB0(0) ||
+ vmhub == AMDGPU_MMHUB1(0)) &&
(!amdgpu_sriov_vf(adev)) &&
(!(!(adev->apu_flags & AMD_APU_IS_RAVEN2) &&
(adev->apu_flags & AMD_APU_IS_PICASSO))));
@@ -682,43 +836,37 @@ static void gmc_v9_0_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid,
uint32_t vmhub, uint32_t flush_type)
{
bool use_semaphore = gmc_v9_0_use_invalidate_semaphore(adev, vmhub);
- const unsigned eng = 17;
- u32 j, inv_req, inv_req2, tmp;
+ u32 j, inv_req, tmp, sem, req, ack, inst;
+ const unsigned int eng = 17;
struct amdgpu_vmhub *hub;
- BUG_ON(vmhub >= adev->num_vmhubs);
+ BUG_ON(vmhub >= AMDGPU_MAX_VMHUBS);
hub = &adev->vmhub[vmhub];
- if (adev->gmc.xgmi.num_physical_nodes &&
- adev->asic_type == CHIP_VEGA20) {
- /* Vega20+XGMI caches PTEs in TC and TLB. Add a
- * heavy-weight TLB flush (type 2), which flushes
- * both. Due to a race condition with concurrent
- * memory accesses using the same TLB cache line, we
- * still need a second TLB flush after this.
- */
- inv_req = gmc_v9_0_get_invalidate_req(vmid, 2);
- inv_req2 = gmc_v9_0_get_invalidate_req(vmid, flush_type);
- } else {
- inv_req = gmc_v9_0_get_invalidate_req(vmid, flush_type);
- inv_req2 = 0;
- }
+ inv_req = gmc_v9_0_get_invalidate_req(vmid, flush_type);
+ sem = hub->vm_inv_eng0_sem + hub->eng_distance * eng;
+ req = hub->vm_inv_eng0_req + hub->eng_distance * eng;
+ ack = hub->vm_inv_eng0_ack + hub->eng_distance * eng;
+
+ if (vmhub >= AMDGPU_MMHUB0(0))
+ inst = 0;
+ else
+ inst = vmhub;
- /* This is necessary for a HW workaround under SRIOV as well
- * as GFXOFF under bare metal
+ /* This is necessary for SRIOV as well as for GFXOFF to function
+ * properly under bare metal
*/
- if (adev->gfx.kiq.ring.sched.ready &&
- (amdgpu_sriov_runtime(adev) || !amdgpu_sriov_vf(adev)) &&
- down_read_trylock(&adev->reset_sem)) {
+ if (adev->gfx.kiq[inst].ring.sched.ready &&
+ (amdgpu_sriov_runtime(adev) || !amdgpu_sriov_vf(adev))) {
uint32_t req = hub->vm_inv_eng0_req + hub->eng_distance * eng;
uint32_t ack = hub->vm_inv_eng0_ack + hub->eng_distance * eng;
- amdgpu_virt_kiq_reg_write_reg_wait(adev, req, ack, inv_req,
- 1 << vmid);
- up_read(&adev->reset_sem);
+ amdgpu_gmc_fw_reg_write_reg_wait(adev, req, ack, inv_req,
+ 1 << vmid, inst);
return;
}
+ /* This path is needed before KIQ/MES/GFXOFF are set up */
spin_lock(&adev->gmc.invalidate_lock);
/*
@@ -731,9 +879,11 @@ static void gmc_v9_0_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid,
/* TODO: It needs to continue working on debugging with semaphore for GFXHUB as well. */
if (use_semaphore) {
for (j = 0; j < adev->usec_timeout; j++) {
- /* a read return value of 1 means semaphore acuqire */
- tmp = RREG32_NO_KIQ(hub->vm_inv_eng0_sem +
- hub->eng_distance * eng);
+ /* a read return value of 1 means semaphore acquire */
+ if (vmhub >= AMDGPU_MMHUB0(0))
+ tmp = RREG32_SOC15_IP_NO_KIQ(MMHUB, sem, GET_INST(GC, inst));
+ else
+ tmp = RREG32_SOC15_IP_NO_KIQ(GC, sem, GET_INST(GC, inst));
if (tmp & 0x1)
break;
udelay(1);
@@ -743,39 +893,41 @@ static void gmc_v9_0_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid,
DRM_ERROR("Timeout waiting for sem acquire in VM flush!\n");
}
- do {
- WREG32_NO_KIQ(hub->vm_inv_eng0_req +
- hub->eng_distance * eng, inv_req);
-
- /*
- * Issue a dummy read to wait for the ACK register to
- * be cleared to avoid a false ACK due to the new fast
- * GRBM interface.
- */
- if (vmhub == AMDGPU_GFXHUB_0)
- RREG32_NO_KIQ(hub->vm_inv_eng0_req +
- hub->eng_distance * eng);
+ if (vmhub >= AMDGPU_MMHUB0(0))
+ WREG32_SOC15_IP_NO_KIQ(MMHUB, req, inv_req, GET_INST(GC, inst));
+ else
+ WREG32_SOC15_IP_NO_KIQ(GC, req, inv_req, GET_INST(GC, inst));
- for (j = 0; j < adev->usec_timeout; j++) {
- tmp = RREG32_NO_KIQ(hub->vm_inv_eng0_ack +
- hub->eng_distance * eng);
- if (tmp & (1 << vmid))
- break;
- udelay(1);
- }
+ /*
+ * Issue a dummy read to wait for the ACK register to
+ * be cleared to avoid a false ACK due to the new fast
+ * GRBM interface.
+ */
+ if ((vmhub == AMDGPU_GFXHUB(0)) &&
+ (amdgpu_ip_version(adev, GC_HWIP, 0) < IP_VERSION(9, 4, 2)))
+ RREG32_NO_KIQ(req);
- inv_req = inv_req2;
- inv_req2 = 0;
- } while (inv_req);
+ for (j = 0; j < adev->usec_timeout; j++) {
+ if (vmhub >= AMDGPU_MMHUB0(0))
+ tmp = RREG32_SOC15_IP_NO_KIQ(MMHUB, ack, GET_INST(GC, inst));
+ else
+ tmp = RREG32_SOC15_IP_NO_KIQ(GC, ack, GET_INST(GC, inst));
+ if (tmp & (1 << vmid))
+ break;
+ udelay(1);
+ }
/* TODO: It needs to continue working on debugging with semaphore for GFXHUB as well. */
- if (use_semaphore)
+ if (use_semaphore) {
/*
* add semaphore release after invalidation,
* write with 0 means semaphore release
*/
- WREG32_NO_KIQ(hub->vm_inv_eng0_sem +
- hub->eng_distance * eng, 0);
+ if (vmhub >= AMDGPU_MMHUB0(0))
+ WREG32_SOC15_IP_NO_KIQ(MMHUB, sem, 0, GET_INST(GC, inst));
+ else
+ WREG32_SOC15_IP_NO_KIQ(GC, sem, 0, GET_INST(GC, inst));
+ }
spin_unlock(&adev->gmc.invalidate_lock);
@@ -792,96 +944,46 @@ static void gmc_v9_0_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid,
* @pasid: pasid to be flush
* @flush_type: the flush type
* @all_hub: flush all hubs
+ * @inst: is used to select which instance of KIQ to use for the invalidation
*
* Flush the TLB for the requested pasid.
*/
-static int gmc_v9_0_flush_gpu_tlb_pasid(struct amdgpu_device *adev,
- uint16_t pasid, uint32_t flush_type,
- bool all_hub)
+static void gmc_v9_0_flush_gpu_tlb_pasid(struct amdgpu_device *adev,
+ uint16_t pasid, uint32_t flush_type,
+ bool all_hub, uint32_t inst)
{
- int vmid, i;
- signed long r;
- uint32_t seq;
- uint16_t queried_pasid;
- bool ret;
- struct amdgpu_ring *ring = &adev->gfx.kiq.ring;
- struct amdgpu_kiq *kiq = &adev->gfx.kiq;
-
- if (amdgpu_in_reset(adev))
- return -EIO;
-
- if (ring->sched.ready && down_read_trylock(&adev->reset_sem)) {
- /* Vega20+XGMI caches PTEs in TC and TLB. Add a
- * heavy-weight TLB flush (type 2), which flushes
- * both. Due to a race condition with concurrent
- * memory accesses using the same TLB cache line, we
- * still need a second TLB flush after this.
- */
- bool vega20_xgmi_wa = (adev->gmc.xgmi.num_physical_nodes &&
- adev->asic_type == CHIP_VEGA20);
- /* 2 dwords flush + 8 dwords fence */
- unsigned int ndw = kiq->pmf->invalidate_tlbs_size + 8;
-
- if (vega20_xgmi_wa)
- ndw += kiq->pmf->invalidate_tlbs_size;
-
- spin_lock(&adev->gfx.kiq.ring_lock);
- /* 2 dwords flush + 8 dwords fence */
- amdgpu_ring_alloc(ring, ndw);
- if (vega20_xgmi_wa)
- kiq->pmf->kiq_invalidate_tlbs(ring,
- pasid, 2, all_hub);
- kiq->pmf->kiq_invalidate_tlbs(ring,
- pasid, flush_type, all_hub);
- r = amdgpu_fence_emit_polling(ring, &seq, MAX_KIQ_REG_WAIT);
- if (r) {
- amdgpu_ring_undo(ring);
- spin_unlock(&adev->gfx.kiq.ring_lock);
- up_read(&adev->reset_sem);
- return -ETIME;
- }
-
- amdgpu_ring_commit(ring);
- spin_unlock(&adev->gfx.kiq.ring_lock);
- r = amdgpu_fence_wait_polling(ring, seq, adev->usec_timeout);
- if (r < 1) {
- dev_err(adev->dev, "wait for kiq fence error: %ld.\n", r);
- up_read(&adev->reset_sem);
- return -ETIME;
- }
- up_read(&adev->reset_sem);
- return 0;
- }
+ uint16_t queried;
+ int i, vmid;
for (vmid = 1; vmid < 16; vmid++) {
-
- ret = gmc_v9_0_get_atc_vmid_pasid_mapping_info(adev, vmid,
- &queried_pasid);
- if (ret && queried_pasid == pasid) {
- if (all_hub) {
- for (i = 0; i < adev->num_vmhubs; i++)
- gmc_v9_0_flush_gpu_tlb(adev, vmid,
- i, flush_type);
- } else {
- gmc_v9_0_flush_gpu_tlb(adev, vmid,
- AMDGPU_GFXHUB_0, flush_type);
- }
- break;
+ bool valid;
+
+ valid = gmc_v9_0_get_atc_vmid_pasid_mapping_info(adev, vmid,
+ &queried);
+ if (!valid || queried != pasid)
+ continue;
+
+ if (all_hub) {
+ for_each_set_bit(i, adev->vmhubs_mask,
+ AMDGPU_MAX_VMHUBS)
+ gmc_v9_0_flush_gpu_tlb(adev, vmid, i,
+ flush_type);
+ } else {
+ gmc_v9_0_flush_gpu_tlb(adev, vmid,
+ AMDGPU_GFXHUB(0),
+ flush_type);
}
}
-
- return 0;
-
}
static uint64_t gmc_v9_0_emit_flush_gpu_tlb(struct amdgpu_ring *ring,
- unsigned vmid, uint64_t pd_addr)
+ unsigned int vmid, uint64_t pd_addr)
{
- bool use_semaphore = gmc_v9_0_use_invalidate_semaphore(ring->adev, ring->funcs->vmhub);
+ bool use_semaphore = gmc_v9_0_use_invalidate_semaphore(ring->adev, ring->vm_hub);
struct amdgpu_device *adev = ring->adev;
- struct amdgpu_vmhub *hub = &adev->vmhub[ring->funcs->vmhub];
+ struct amdgpu_vmhub *hub = &adev->vmhub[ring->vm_hub];
uint32_t req = gmc_v9_0_get_invalidate_req(vmid, 0);
- unsigned eng = ring->vm_inv_eng;
+ unsigned int eng = ring->vm_inv_eng;
/*
* It may lose gpuvm invalidate acknowldege state across power-gating
@@ -923,17 +1025,17 @@ static uint64_t gmc_v9_0_emit_flush_gpu_tlb(struct amdgpu_ring *ring,
return pd_addr;
}
-static void gmc_v9_0_emit_pasid_mapping(struct amdgpu_ring *ring, unsigned vmid,
- unsigned pasid)
+static void gmc_v9_0_emit_pasid_mapping(struct amdgpu_ring *ring, unsigned int vmid,
+ unsigned int pasid)
{
struct amdgpu_device *adev = ring->adev;
uint32_t reg;
/* Do nothing because there's no lut register for mmhub1. */
- if (ring->funcs->vmhub == AMDGPU_MMHUB_1)
+ if (ring->vm_hub == AMDGPU_MMHUB1(0))
return;
- if (ring->funcs->vmhub == AMDGPU_GFXHUB_0)
+ if (ring->vm_hub == AMDGPU_GFXHUB(0))
reg = SOC15_REG_OFFSET(OSSSYS, 0, mmIH_VMID_0_LUT) + vmid;
else
reg = SOC15_REG_OFFSET(OSSSYS, 0, mmIH_VMID_0_LUT_MM) + vmid;
@@ -973,33 +1075,11 @@ static void gmc_v9_0_emit_pasid_mapping(struct amdgpu_ring *ring, unsigned vmid,
* 0 valid
*/
-static uint64_t gmc_v9_0_map_mtype(struct amdgpu_device *adev, uint32_t flags)
-
-{
- switch (flags) {
- case AMDGPU_VM_MTYPE_DEFAULT:
- return AMDGPU_PTE_MTYPE_VG10(MTYPE_NC);
- case AMDGPU_VM_MTYPE_NC:
- return AMDGPU_PTE_MTYPE_VG10(MTYPE_NC);
- case AMDGPU_VM_MTYPE_WC:
- return AMDGPU_PTE_MTYPE_VG10(MTYPE_WC);
- case AMDGPU_VM_MTYPE_RW:
- return AMDGPU_PTE_MTYPE_VG10(MTYPE_RW);
- case AMDGPU_VM_MTYPE_CC:
- return AMDGPU_PTE_MTYPE_VG10(MTYPE_CC);
- case AMDGPU_VM_MTYPE_UC:
- return AMDGPU_PTE_MTYPE_VG10(MTYPE_UC);
- default:
- return AMDGPU_PTE_MTYPE_VG10(MTYPE_NC);
- }
-}
-
static void gmc_v9_0_get_vm_pde(struct amdgpu_device *adev, int level,
uint64_t *addr, uint64_t *flags)
{
if (!(*flags & AMDGPU_PDE_PTE) && !(*flags & AMDGPU_PTE_SYSTEM))
- *addr = adev->vm_manager.vram_base_offset + *addr -
- adev->gmc.vram_start;
+ *addr = amdgpu_gmc_vram_mc2pa(adev, *addr);
BUG_ON(*addr & 0xFFFF00000000003FULL);
if (!adev->gmc.translate_further)
@@ -1011,47 +1091,263 @@ static void gmc_v9_0_get_vm_pde(struct amdgpu_device *adev, int level,
*flags |= AMDGPU_PDE_BFS(0x9);
} else if (level == AMDGPU_VM_PDB0) {
- if (*flags & AMDGPU_PDE_PTE)
+ if (*flags & AMDGPU_PDE_PTE) {
*flags &= ~AMDGPU_PDE_PTE;
- else
+ if (!(*flags & AMDGPU_PTE_VALID))
+ *addr |= 1 << PAGE_SHIFT;
+ } else {
*flags |= AMDGPU_PTE_TF;
+ }
}
}
+static void gmc_v9_0_get_coherence_flags(struct amdgpu_device *adev,
+ struct amdgpu_vm *vm,
+ struct amdgpu_bo *bo,
+ uint32_t vm_flags,
+ uint64_t *flags)
+{
+ struct amdgpu_device *bo_adev = amdgpu_ttm_adev(bo->tbo.bdev);
+ bool is_vram = bo->tbo.resource &&
+ bo->tbo.resource->mem_type == TTM_PL_VRAM;
+ bool coherent = bo->flags & (AMDGPU_GEM_CREATE_COHERENT |
+ AMDGPU_GEM_CREATE_EXT_COHERENT);
+ bool ext_coherent = bo->flags & AMDGPU_GEM_CREATE_EXT_COHERENT;
+ bool uncached = bo->flags & AMDGPU_GEM_CREATE_UNCACHED;
+ unsigned int mtype_local, mtype;
+ uint32_t gc_ip_version = amdgpu_ip_version(adev, GC_HWIP, 0);
+ bool snoop = false;
+ bool is_local;
+
+ dma_resv_assert_held(bo->tbo.base.resv);
+
+ switch (gc_ip_version) {
+ case IP_VERSION(9, 4, 1):
+ case IP_VERSION(9, 4, 2):
+ if (is_vram) {
+ if (bo_adev == adev) {
+ if (uncached)
+ mtype = MTYPE_UC;
+ else if (coherent)
+ mtype = MTYPE_CC;
+ else
+ mtype = MTYPE_RW;
+ /* FIXME: is this still needed? Or does
+ * amdgpu_ttm_tt_pde_flags already handle this?
+ */
+ if (gc_ip_version == IP_VERSION(9, 4, 2) &&
+ adev->gmc.xgmi.connected_to_cpu)
+ snoop = true;
+ } else {
+ if (uncached || coherent)
+ mtype = MTYPE_UC;
+ else
+ mtype = MTYPE_NC;
+ if (amdgpu_xgmi_same_hive(adev, bo_adev))
+ snoop = true;
+ }
+ } else {
+ if (uncached || coherent)
+ mtype = MTYPE_UC;
+ else
+ mtype = MTYPE_NC;
+ /* FIXME: is this still needed? Or does
+ * amdgpu_ttm_tt_pde_flags already handle this?
+ */
+ snoop = true;
+ }
+ break;
+ case IP_VERSION(9, 4, 3):
+ case IP_VERSION(9, 4, 4):
+ case IP_VERSION(9, 5, 0):
+ /* Only local VRAM BOs or system memory on non-NUMA APUs
+ * can be assumed to be local in their entirety. Choose
+ * MTYPE_NC as safe fallback for all system memory BOs on
+ * NUMA systems. Their MTYPE can be overridden per-page in
+ * gmc_v9_0_override_vm_pte_flags.
+ */
+ mtype_local = MTYPE_RW;
+ if (amdgpu_mtype_local == 1) {
+ DRM_INFO_ONCE("Using MTYPE_NC for local memory\n");
+ mtype_local = MTYPE_NC;
+ } else if (amdgpu_mtype_local == 2) {
+ DRM_INFO_ONCE("Using MTYPE_CC for local memory\n");
+ mtype_local = MTYPE_CC;
+ } else {
+ DRM_INFO_ONCE("Using MTYPE_RW for local memory\n");
+ }
+ is_local = (!is_vram && (adev->flags & AMD_IS_APU) &&
+ num_possible_nodes() <= 1) ||
+ (is_vram && adev == bo_adev &&
+ KFD_XCP_MEM_ID(adev, bo->xcp_id) == vm->mem_id);
+ snoop = true;
+ if (uncached) {
+ mtype = MTYPE_UC;
+ } else if (ext_coherent) {
+ mtype = is_local ? MTYPE_CC : MTYPE_UC;
+ } else if (adev->flags & AMD_IS_APU) {
+ mtype = is_local ? mtype_local : MTYPE_NC;
+ } else {
+ /* dGPU */
+ if (is_local)
+ mtype = mtype_local;
+ else if (gc_ip_version < IP_VERSION(9, 5, 0) && !is_vram)
+ mtype = MTYPE_UC;
+ else
+ mtype = MTYPE_NC;
+ }
+
+ break;
+ default:
+ if (uncached || coherent)
+ mtype = MTYPE_UC;
+ else
+ mtype = MTYPE_NC;
+
+ /* FIXME: is this still needed? Or does
+ * amdgpu_ttm_tt_pde_flags already handle this?
+ */
+ if (!is_vram)
+ snoop = true;
+ }
+
+ if (mtype != MTYPE_NC)
+ *flags = AMDGPU_PTE_MTYPE_VG10(*flags, mtype);
+
+ *flags |= snoop ? AMDGPU_PTE_SNOOPED : 0;
+}
+
static void gmc_v9_0_get_vm_pte(struct amdgpu_device *adev,
- struct amdgpu_bo_va_mapping *mapping,
+ struct amdgpu_vm *vm,
+ struct amdgpu_bo *bo,
+ uint32_t vm_flags,
uint64_t *flags)
{
- *flags &= ~AMDGPU_PTE_EXECUTABLE;
- *flags |= mapping->flags & AMDGPU_PTE_EXECUTABLE;
+ if (vm_flags & AMDGPU_VM_PAGE_EXECUTABLE)
+ *flags |= AMDGPU_PTE_EXECUTABLE;
+ else
+ *flags &= ~AMDGPU_PTE_EXECUTABLE;
- *flags &= ~AMDGPU_PTE_MTYPE_VG10_MASK;
- *flags |= mapping->flags & AMDGPU_PTE_MTYPE_VG10_MASK;
+ switch (vm_flags & AMDGPU_VM_MTYPE_MASK) {
+ case AMDGPU_VM_MTYPE_DEFAULT:
+ case AMDGPU_VM_MTYPE_NC:
+ default:
+ *flags = AMDGPU_PTE_MTYPE_VG10(*flags, MTYPE_NC);
+ break;
+ case AMDGPU_VM_MTYPE_WC:
+ *flags |= AMDGPU_PTE_MTYPE_VG10(*flags, MTYPE_WC);
+ break;
+ case AMDGPU_VM_MTYPE_RW:
+ *flags |= AMDGPU_PTE_MTYPE_VG10(*flags, MTYPE_RW);
+ break;
+ case AMDGPU_VM_MTYPE_CC:
+ *flags |= AMDGPU_PTE_MTYPE_VG10(*flags, MTYPE_CC);
+ break;
+ case AMDGPU_VM_MTYPE_UC:
+ *flags |= AMDGPU_PTE_MTYPE_VG10(*flags, MTYPE_UC);
+ break;
+ }
- if (mapping->flags & AMDGPU_PTE_PRT) {
+ if (vm_flags & AMDGPU_VM_PAGE_PRT) {
*flags |= AMDGPU_PTE_PRT;
*flags &= ~AMDGPU_PTE_VALID;
}
- if (adev->asic_type == CHIP_ARCTURUS &&
- !(*flags & AMDGPU_PTE_SYSTEM) &&
- mapping->bo_va->is_xgmi)
- *flags |= AMDGPU_PTE_SNOOPED;
+ if ((*flags & AMDGPU_PTE_VALID) && bo)
+ gmc_v9_0_get_coherence_flags(adev, vm, bo, vm_flags, flags);
+}
+
+static void gmc_v9_0_override_vm_pte_flags(struct amdgpu_device *adev,
+ struct amdgpu_vm *vm,
+ uint64_t addr, uint64_t *flags)
+{
+ int local_node, nid;
+
+ /* Only GFX 9.4.3 APUs associate GPUs with NUMA nodes. Local system
+ * memory can use more efficient MTYPEs.
+ */
+ if (!(adev->flags & AMD_IS_APU) ||
+ amdgpu_ip_version(adev, GC_HWIP, 0) != IP_VERSION(9, 4, 3))
+ return;
+
+ /* Only direct-mapped memory allows us to determine the NUMA node from
+ * the DMA address.
+ */
+ if (!adev->ram_is_direct_mapped) {
+ dev_dbg_ratelimited(adev->dev, "RAM is not direct mapped\n");
+ return;
+ }
+
+ /* MTYPE_NC is the same default and can be overridden.
+ * MTYPE_UC will be present if the memory is extended-coherent
+ * and can also be overridden.
+ */
+ if ((*flags & AMDGPU_PTE_MTYPE_VG10_MASK) !=
+ AMDGPU_PTE_MTYPE_VG10(0ULL, MTYPE_NC) &&
+ (*flags & AMDGPU_PTE_MTYPE_VG10_MASK) !=
+ AMDGPU_PTE_MTYPE_VG10(0ULL, MTYPE_UC)) {
+ dev_dbg_ratelimited(adev->dev, "MTYPE is not NC or UC\n");
+ return;
+ }
+
+ /* FIXME: Only supported on native mode for now. For carve-out, the
+ * NUMA affinity of the GPU/VM needs to come from the PCI info because
+ * memory partitions are not associated with different NUMA nodes.
+ */
+ if (adev->gmc.is_app_apu && vm->mem_id >= 0) {
+ local_node = adev->gmc.mem_partitions[vm->mem_id].numa.node;
+ } else {
+ dev_dbg_ratelimited(adev->dev, "Only native mode APU is supported.\n");
+ return;
+ }
+
+ /* Only handle real RAM. Mappings of PCIe resources don't have struct
+ * page or NUMA nodes.
+ */
+ if (!page_is_ram(addr >> PAGE_SHIFT)) {
+ dev_dbg_ratelimited(adev->dev, "Page is not RAM.\n");
+ return;
+ }
+ nid = pfn_to_nid(addr >> PAGE_SHIFT);
+ dev_dbg_ratelimited(adev->dev, "vm->mem_id=%d, local_node=%d, nid=%d\n",
+ vm->mem_id, local_node, nid);
+ if (nid == local_node) {
+ uint64_t old_flags = *flags;
+ if ((*flags & AMDGPU_PTE_MTYPE_VG10_MASK) ==
+ AMDGPU_PTE_MTYPE_VG10(0ULL, MTYPE_NC)) {
+ unsigned int mtype_local = MTYPE_RW;
+
+ if (amdgpu_mtype_local == 1)
+ mtype_local = MTYPE_NC;
+ else if (amdgpu_mtype_local == 2)
+ mtype_local = MTYPE_CC;
+
+ *flags = AMDGPU_PTE_MTYPE_VG10(*flags, mtype_local);
+ } else {
+ /* MTYPE_UC case */
+ *flags = AMDGPU_PTE_MTYPE_VG10(*flags, MTYPE_CC);
+ }
+
+ dev_dbg_ratelimited(adev->dev, "flags updated from %llx to %llx\n",
+ old_flags, *flags);
+ }
}
-static unsigned gmc_v9_0_get_vbios_fb_size(struct amdgpu_device *adev)
+static unsigned int gmc_v9_0_get_vbios_fb_size(struct amdgpu_device *adev)
{
u32 d1vga_control = RREG32_SOC15(DCE, 0, mmD1VGA_CONTROL);
- unsigned size;
+ unsigned int size;
+
+ /* TODO move to DC so GMC doesn't need to hard-code DCN registers */
if (REG_GET_FIELD(d1vga_control, D1VGA_CONTROL, D1VGA_MODE_ENABLE)) {
size = AMDGPU_VBIOS_VGA_ALLOCATION;
} else {
u32 viewport;
- switch (adev->asic_type) {
- case CHIP_RAVEN:
- case CHIP_RENOIR:
+ switch (amdgpu_ip_version(adev, DCE_HWIP, 0)) {
+ case IP_VERSION(1, 0, 0):
+ case IP_VERSION(1, 0, 1):
viewport = RREG32_SOC15(DCE, 0, mmHUBP0_DCSURF_PRI_VIEWPORT_DIMENSION);
size = (REG_GET_FIELD(viewport,
HUBP0_DCSURF_PRI_VIEWPORT_DIMENSION, PRI_VIEWPORT_HEIGHT) *
@@ -1059,9 +1355,14 @@ static unsigned gmc_v9_0_get_vbios_fb_size(struct amdgpu_device *adev)
HUBP0_DCSURF_PRI_VIEWPORT_DIMENSION, PRI_VIEWPORT_WIDTH) *
4);
break;
- case CHIP_VEGA10:
- case CHIP_VEGA12:
- case CHIP_VEGA20:
+ case IP_VERSION(2, 1, 0):
+ viewport = RREG32_SOC15(DCE, 0, mmHUBP0_DCSURF_PRI_VIEWPORT_DIMENSION_DCN2);
+ size = (REG_GET_FIELD(viewport,
+ HUBP0_DCSURF_PRI_VIEWPORT_DIMENSION, PRI_VIEWPORT_HEIGHT) *
+ REG_GET_FIELD(viewport,
+ HUBP0_DCSURF_PRI_VIEWPORT_DIMENSION, PRI_VIEWPORT_WIDTH) *
+ 4);
+ break;
default:
viewport = RREG32_SOC15(DCE, 0, mmSCL0_VIEWPORT_SIZE);
size = (REG_GET_FIELD(viewport, SCL0_VIEWPORT_SIZE, VIEWPORT_HEIGHT) *
@@ -1074,15 +1375,29 @@ static unsigned gmc_v9_0_get_vbios_fb_size(struct amdgpu_device *adev)
return size;
}
+static bool gmc_v9_0_need_reset_on_init(struct amdgpu_device *adev)
+{
+ if (adev->nbio.funcs && adev->nbio.funcs->is_nps_switch_requested &&
+ adev->nbio.funcs->is_nps_switch_requested(adev)) {
+ adev->gmc.reset_flags |= AMDGPU_GMC_INIT_RESET_NPS;
+ return true;
+ }
+
+ return false;
+}
+
static const struct amdgpu_gmc_funcs gmc_v9_0_gmc_funcs = {
.flush_gpu_tlb = gmc_v9_0_flush_gpu_tlb,
.flush_gpu_tlb_pasid = gmc_v9_0_flush_gpu_tlb_pasid,
.emit_flush_gpu_tlb = gmc_v9_0_emit_flush_gpu_tlb,
.emit_pasid_mapping = gmc_v9_0_emit_pasid_mapping,
- .map_mtype = gmc_v9_0_map_mtype,
.get_vm_pde = gmc_v9_0_get_vm_pde,
.get_vm_pte = gmc_v9_0_get_vm_pte,
+ .override_vm_pte_flags = gmc_v9_0_override_vm_pte_flags,
.get_vbios_fb_size = gmc_v9_0_get_vbios_fb_size,
+ .query_mem_partition_mode = &amdgpu_gmc_query_memory_partition,
+ .request_mem_partition_mode = &amdgpu_gmc_request_memory_partition,
+ .need_reset_on_init = &gmc_v9_0_need_reset_on_init,
};
static void gmc_v9_0_set_gmc_funcs(struct amdgpu_device *adev)
@@ -1092,25 +1407,52 @@ static void gmc_v9_0_set_gmc_funcs(struct amdgpu_device *adev)
static void gmc_v9_0_set_umc_funcs(struct amdgpu_device *adev)
{
- switch (adev->asic_type) {
- case CHIP_VEGA10:
+ switch (amdgpu_ip_version(adev, UMC_HWIP, 0)) {
+ case IP_VERSION(6, 0, 0):
adev->umc.funcs = &umc_v6_0_funcs;
break;
- case CHIP_VEGA20:
+ case IP_VERSION(6, 1, 1):
adev->umc.max_ras_err_cnt_per_query = UMC_V6_1_TOTAL_CHANNEL_NUM;
adev->umc.channel_inst_num = UMC_V6_1_CHANNEL_INSTANCE_NUM;
adev->umc.umc_inst_num = UMC_V6_1_UMC_INSTANCE_NUM;
adev->umc.channel_offs = UMC_V6_1_PER_CHANNEL_OFFSET_VG20;
+ adev->umc.retire_unit = 1;
adev->umc.channel_idx_tbl = &umc_v6_1_channel_idx_tbl[0][0];
- adev->umc.funcs = &umc_v6_1_funcs;
+ adev->umc.ras = &umc_v6_1_ras;
break;
- case CHIP_ARCTURUS:
+ case IP_VERSION(6, 1, 2):
adev->umc.max_ras_err_cnt_per_query = UMC_V6_1_TOTAL_CHANNEL_NUM;
adev->umc.channel_inst_num = UMC_V6_1_CHANNEL_INSTANCE_NUM;
adev->umc.umc_inst_num = UMC_V6_1_UMC_INSTANCE_NUM;
adev->umc.channel_offs = UMC_V6_1_PER_CHANNEL_OFFSET_ARCT;
+ adev->umc.retire_unit = 1;
adev->umc.channel_idx_tbl = &umc_v6_1_channel_idx_tbl[0][0];
- adev->umc.funcs = &umc_v6_1_funcs;
+ adev->umc.ras = &umc_v6_1_ras;
+ break;
+ case IP_VERSION(6, 7, 0):
+ adev->umc.max_ras_err_cnt_per_query =
+ UMC_V6_7_TOTAL_CHANNEL_NUM * UMC_V6_7_BAD_PAGE_NUM_PER_CHANNEL;
+ adev->umc.channel_inst_num = UMC_V6_7_CHANNEL_INSTANCE_NUM;
+ adev->umc.umc_inst_num = UMC_V6_7_UMC_INSTANCE_NUM;
+ adev->umc.channel_offs = UMC_V6_7_PER_CHANNEL_OFFSET;
+ adev->umc.retire_unit = (UMC_V6_7_NA_MAP_PA_NUM * 2);
+ if (!adev->gmc.xgmi.connected_to_cpu)
+ adev->umc.ras = &umc_v6_7_ras;
+ if (1 & adev->smuio.funcs->get_die_id(adev))
+ adev->umc.channel_idx_tbl = &umc_v6_7_channel_idx_tbl_first[0][0];
+ else
+ adev->umc.channel_idx_tbl = &umc_v6_7_channel_idx_tbl_second[0][0];
+ break;
+ case IP_VERSION(12, 0, 0):
+ case IP_VERSION(12, 5, 0):
+ adev->umc.max_ras_err_cnt_per_query =
+ UMC_V12_0_TOTAL_CHANNEL_NUM(adev) * UMC_V12_0_BAD_PAGE_NUM_PER_CHANNEL;
+ adev->umc.channel_inst_num = UMC_V12_0_CHANNEL_INSTANCE_NUM;
+ adev->umc.umc_inst_num = UMC_V12_0_UMC_INSTANCE_NUM;
+ adev->umc.node_inst_num /= UMC_V12_0_UMC_INSTANCE_NUM;
+ adev->umc.channel_offs = UMC_V12_0_PER_CHANNEL_OFFSET;
+ if (!adev->gmc.xgmi.connected_to_cpu && !adev->gmc.is_app_apu)
+ adev->umc.ras = &umc_v12_0_ras;
break;
default:
break;
@@ -1119,30 +1461,161 @@ static void gmc_v9_0_set_umc_funcs(struct amdgpu_device *adev)
static void gmc_v9_0_set_mmhub_funcs(struct amdgpu_device *adev)
{
- switch (adev->asic_type) {
- case CHIP_ARCTURUS:
+ switch (amdgpu_ip_version(adev, MMHUB_HWIP, 0)) {
+ case IP_VERSION(9, 4, 1):
adev->mmhub.funcs = &mmhub_v9_4_funcs;
break;
+ case IP_VERSION(9, 4, 2):
+ adev->mmhub.funcs = &mmhub_v1_7_funcs;
+ break;
+ case IP_VERSION(1, 8, 0):
+ case IP_VERSION(1, 8, 1):
+ adev->mmhub.funcs = &mmhub_v1_8_funcs;
+ break;
default:
adev->mmhub.funcs = &mmhub_v1_0_funcs;
break;
}
}
+static void gmc_v9_0_set_mmhub_ras_funcs(struct amdgpu_device *adev)
+{
+ switch (amdgpu_ip_version(adev, MMHUB_HWIP, 0)) {
+ case IP_VERSION(9, 4, 0):
+ adev->mmhub.ras = &mmhub_v1_0_ras;
+ break;
+ case IP_VERSION(9, 4, 1):
+ adev->mmhub.ras = &mmhub_v9_4_ras;
+ break;
+ case IP_VERSION(9, 4, 2):
+ adev->mmhub.ras = &mmhub_v1_7_ras;
+ break;
+ case IP_VERSION(1, 8, 0):
+ case IP_VERSION(1, 8, 1):
+ adev->mmhub.ras = &mmhub_v1_8_ras;
+ break;
+ default:
+ /* mmhub ras is not available */
+ break;
+ }
+}
+
static void gmc_v9_0_set_gfxhub_funcs(struct amdgpu_device *adev)
{
- adev->gfxhub.funcs = &gfxhub_v1_0_funcs;
+ if (amdgpu_is_multi_aid(adev))
+ adev->gfxhub.funcs = &gfxhub_v1_2_funcs;
+ else
+ adev->gfxhub.funcs = &gfxhub_v1_0_funcs;
}
-static int gmc_v9_0_early_init(void *handle)
+static void gmc_v9_0_set_hdp_ras_funcs(struct amdgpu_device *adev)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ adev->hdp.ras = &hdp_v4_0_ras;
+}
+
+static void gmc_v9_0_set_mca_ras_funcs(struct amdgpu_device *adev)
+{
+ struct amdgpu_mca *mca = &adev->mca;
+
+ /* is UMC the right IP to check for MCA? Maybe DF? */
+ switch (amdgpu_ip_version(adev, UMC_HWIP, 0)) {
+ case IP_VERSION(6, 7, 0):
+ if (!adev->gmc.xgmi.connected_to_cpu) {
+ mca->mp0.ras = &mca_v3_0_mp0_ras;
+ mca->mp1.ras = &mca_v3_0_mp1_ras;
+ mca->mpio.ras = &mca_v3_0_mpio_ras;
+ }
+ break;
+ default:
+ break;
+ }
+}
+
+static void gmc_v9_0_set_xgmi_ras_funcs(struct amdgpu_device *adev)
+{
+ if (!adev->gmc.xgmi.connected_to_cpu)
+ adev->gmc.xgmi.ras = &xgmi_ras;
+}
+
+static void gmc_v9_0_init_nps_details(struct amdgpu_device *adev)
+{
+ enum amdgpu_memory_partition mode;
+ uint32_t supp_modes;
+ int i;
+
+ adev->gmc.supported_nps_modes = 0;
+
+ if (amdgpu_sriov_vf(adev) || (adev->flags & AMD_IS_APU))
+ return;
+
+ mode = amdgpu_gmc_get_memory_partition(adev, &supp_modes);
+
+ /* Mode detected by hardware and supported modes available */
+ if ((mode != UNKNOWN_MEMORY_PARTITION_MODE) && supp_modes) {
+ while ((i = ffs(supp_modes))) {
+ if (AMDGPU_ALL_NPS_MASK & BIT(i))
+ adev->gmc.supported_nps_modes |= BIT(i);
+ supp_modes &= supp_modes - 1;
+ }
+ } else {
+ /*TODO: Check PSP version also which supports NPS switch. Otherwise keep
+ * supported modes as 0.
+ */
+ switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
+ case IP_VERSION(9, 4, 3):
+ case IP_VERSION(9, 4, 4):
+ adev->gmc.supported_nps_modes =
+ BIT(AMDGPU_NPS1_PARTITION_MODE) |
+ BIT(AMDGPU_NPS4_PARTITION_MODE);
+ break;
+ default:
+ break;
+ }
+ }
+}
+
+static int gmc_v9_0_early_init(struct amdgpu_ip_block *ip_block)
+{
+ struct amdgpu_device *adev = ip_block->adev;
+
+ /*
+ * 9.4.0, 9.4.1 and 9.4.3 don't have XGMI defined
+ * in their IP discovery tables
+ */
+ if (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 0) ||
+ amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 1) ||
+ amdgpu_is_multi_aid(adev))
+ adev->gmc.xgmi.supported = true;
+
+ if (amdgpu_ip_version(adev, XGMI_HWIP, 0) == IP_VERSION(6, 1, 0)) {
+ adev->gmc.xgmi.supported = true;
+ adev->gmc.xgmi.connected_to_cpu =
+ adev->smuio.funcs->is_host_gpu_xgmi_supported(adev);
+ }
+
+ if (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 3)) {
+ enum amdgpu_pkg_type pkg_type =
+ adev->smuio.funcs->get_pkg_type(adev);
+ /* On GFXIP 9.4.3. APU, there is no physical VRAM domain present
+ * and the APU, can be in used two possible modes:
+ * - carveout mode
+ * - native APU mode
+ * "is_app_apu" can be used to identify the APU in the native
+ * mode.
+ */
+ adev->gmc.is_app_apu = (pkg_type == AMDGPU_PKG_TYPE_APU &&
+ !pci_resource_len(adev->pdev, 0));
+ }
gmc_v9_0_set_gmc_funcs(adev);
gmc_v9_0_set_irq_funcs(adev);
gmc_v9_0_set_umc_funcs(adev);
gmc_v9_0_set_mmhub_funcs(adev);
+ gmc_v9_0_set_mmhub_ras_funcs(adev);
gmc_v9_0_set_gfxhub_funcs(adev);
+ gmc_v9_0_set_hdp_ras_funcs(adev);
+ gmc_v9_0_set_mca_ras_funcs(adev);
+ gmc_v9_0_set_xgmi_ras_funcs(adev);
adev->gmc.shared_aperture_start = 0x2000000000000000ULL;
adev->gmc.shared_aperture_end =
@@ -1150,13 +1623,14 @@ static int gmc_v9_0_early_init(void *handle)
adev->gmc.private_aperture_start = 0x1000000000000000ULL;
adev->gmc.private_aperture_end =
adev->gmc.private_aperture_start + (4ULL << 30) - 1;
+ adev->gmc.noretry_flags = AMDGPU_VM_NORETRY_FLAGS_TF;
return 0;
}
-static int gmc_v9_0_late_init(void *handle)
+static int gmc_v9_0_late_init(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
int r;
r = amdgpu_gmc_allocate_vm_inv_eng(adev);
@@ -1167,15 +1641,19 @@ static int gmc_v9_0_late_init(void *handle)
* Workaround performance drop issue with VBIOS enables partial
* writes, while disables HBM ECC for vega10.
*/
- if (!amdgpu_sriov_vf(adev) && (adev->asic_type == CHIP_VEGA10)) {
- if (!(adev->ras_features & (1 << AMDGPU_RAS_BLOCK__UMC))) {
- if (adev->df.funcs->enable_ecc_force_par_wr_rmw)
+ if (!amdgpu_sriov_vf(adev) &&
+ (amdgpu_ip_version(adev, UMC_HWIP, 0) == IP_VERSION(6, 0, 0))) {
+ if (!(adev->ras_enabled & (1 << AMDGPU_RAS_BLOCK__UMC))) {
+ if (adev->df.funcs &&
+ adev->df.funcs->enable_ecc_force_par_wr_rmw)
adev->df.funcs->enable_ecc_force_par_wr_rmw(adev, false);
}
}
- if (adev->mmhub.funcs && adev->mmhub.funcs->reset_ras_error_count)
- adev->mmhub.funcs->reset_ras_error_count(adev);
+ if (!amdgpu_persistent_edc_harvesting_supported(adev)) {
+ amdgpu_ras_reset_error_count(adev, AMDGPU_RAS_BLOCK__MMHUB);
+ amdgpu_ras_reset_error_count(adev, AMDGPU_RAS_BLOCK__HDP);
+ }
r = amdgpu_gmc_ras_late_init(adev);
if (r)
@@ -1187,16 +1665,20 @@ static int gmc_v9_0_late_init(void *handle)
static void gmc_v9_0_vram_gtt_location(struct amdgpu_device *adev,
struct amdgpu_gmc *mc)
{
- u64 base = 0;
+ u64 base = adev->mmhub.funcs->get_fb_location(adev);
- if (!amdgpu_sriov_vf(adev))
- base = adev->mmhub.funcs->get_fb_location(adev);
+ amdgpu_gmc_set_agp_default(adev, mc);
/* add the xgmi offset of the physical node */
base += adev->gmc.xgmi.physical_node_id * adev->gmc.xgmi.node_segment_size;
- amdgpu_gmc_vram_location(adev, mc, base);
- amdgpu_gmc_gart_location(adev, mc);
- amdgpu_gmc_agp_location(adev, mc);
+ if (amdgpu_gmc_is_pdb0_enabled(adev)) {
+ amdgpu_gmc_sysvm_location(adev, mc);
+ } else {
+ amdgpu_gmc_vram_location(adev, mc, base);
+ amdgpu_gmc_gart_location(adev, mc, AMDGPU_GART_PLACEMENT_BEST_FIT);
+ if (!amdgpu_sriov_vf(adev) && (amdgpu_agp == 1))
+ amdgpu_gmc_agp_location(adev, mc);
+ }
/* base offset of vram pages */
adev->vm_manager.vram_base_offset = adev->gfxhub.funcs->get_mc_fb_offset(adev);
@@ -1219,11 +1701,17 @@ static int gmc_v9_0_mc_init(struct amdgpu_device *adev)
int r;
/* size in MB on si */
- adev->gmc.mc_vram_size =
- adev->nbio.funcs->get_memsize(adev) * 1024ULL * 1024ULL;
+ if (!adev->gmc.is_app_apu) {
+ adev->gmc.mc_vram_size =
+ adev->nbio.funcs->get_memsize(adev) * 1024ULL * 1024ULL;
+ } else {
+ DRM_DEBUG("Set mc_vram_size = 0 for APP APU\n");
+ adev->gmc.mc_vram_size = 0;
+ }
adev->gmc.real_vram_size = adev->gmc.mc_vram_size;
- if (!(adev->flags & AMD_IS_APU)) {
+ if (!(adev->flags & AMD_IS_APU) &&
+ !adev->gmc.xgmi.connected_to_cpu) {
r = amdgpu_device_resize_fb_bar(adev);
if (r)
return r;
@@ -1232,28 +1720,49 @@ static int gmc_v9_0_mc_init(struct amdgpu_device *adev)
adev->gmc.aper_size = pci_resource_len(adev->pdev, 0);
#ifdef CONFIG_X86_64
- if (adev->flags & AMD_IS_APU) {
- adev->gmc.aper_base = adev->gfxhub.funcs->get_mc_fb_offset(adev);
+ /*
+ * AMD Accelerated Processing Platform (APP) supporting GPU-HOST xgmi
+ * interface can use VRAM through here as it appears system reserved
+ * memory in host address space.
+ *
+ * For APUs, VRAM is just the stolen system memory and can be accessed
+ * directly.
+ *
+ * Otherwise, use the legacy Host Data Path (HDP) through PCIe BAR.
+ */
+
+ /* check whether both host-gpu and gpu-gpu xgmi links exist */
+ if ((!amdgpu_sriov_vf(adev) &&
+ (adev->flags & AMD_IS_APU) && !amdgpu_passthrough(adev)) ||
+ (adev->gmc.xgmi.supported &&
+ adev->gmc.xgmi.connected_to_cpu)) {
+ adev->gmc.aper_base =
+ adev->gfxhub.funcs->get_mc_fb_offset(adev) +
+ adev->gmc.xgmi.physical_node_id *
+ adev->gmc.xgmi.node_segment_size;
adev->gmc.aper_size = adev->gmc.real_vram_size;
}
+
#endif
- /* In case the PCI BAR is larger than the actual amount of vram */
adev->gmc.visible_vram_size = adev->gmc.aper_size;
- if (adev->gmc.visible_vram_size > adev->gmc.real_vram_size)
- adev->gmc.visible_vram_size = adev->gmc.real_vram_size;
/* set the gart size */
if (amdgpu_gart_size == -1) {
- switch (adev->asic_type) {
- case CHIP_VEGA10: /* all engines support GPUVM */
- case CHIP_VEGA12: /* all engines support GPUVM */
- case CHIP_VEGA20:
- case CHIP_ARCTURUS:
+ switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
+ case IP_VERSION(9, 0, 1): /* all engines support GPUVM */
+ case IP_VERSION(9, 2, 1): /* all engines support GPUVM */
+ case IP_VERSION(9, 4, 0):
+ case IP_VERSION(9, 4, 1):
+ case IP_VERSION(9, 4, 2):
+ case IP_VERSION(9, 4, 3):
+ case IP_VERSION(9, 4, 4):
+ case IP_VERSION(9, 5, 0):
default:
adev->gmc.gart_size = 512ULL << 20;
break;
- case CHIP_RAVEN: /* DCE SG support */
- case CHIP_RENOIR:
+ case IP_VERSION(9, 1, 0): /* DCE SG support */
+ case IP_VERSION(9, 2, 2): /* DCE SG support */
+ case IP_VERSION(9, 3, 0):
adev->gmc.gart_size = 1024ULL << 20;
break;
}
@@ -1261,6 +1770,8 @@ static int gmc_v9_0_mc_init(struct amdgpu_device *adev)
adev->gmc.gart_size = (u64)amdgpu_gart_size << 20;
}
+ adev->gmc.gart_size += adev->pm.smu_prv_buffer_size;
+
gmc_v9_0_vram_gtt_location(adev, &adev->gmc);
return 0;
@@ -1274,14 +1785,38 @@ static int gmc_v9_0_gart_init(struct amdgpu_device *adev)
WARN(1, "VEGA10 PCIE GART already initialized\n");
return 0;
}
+
+ if (amdgpu_gmc_is_pdb0_enabled(adev)) {
+ adev->gmc.vmid0_page_table_depth = 1;
+ adev->gmc.vmid0_page_table_block_size = 12;
+ } else {
+ adev->gmc.vmid0_page_table_depth = 0;
+ adev->gmc.vmid0_page_table_block_size = 0;
+ }
+
/* Initialize common gart structure */
r = amdgpu_gart_init(adev);
if (r)
return r;
adev->gart.table_size = adev->gart.num_gpu_pages * 8;
- adev->gart.gart_pte_flags = AMDGPU_PTE_MTYPE_VG10(MTYPE_UC) |
+ adev->gart.gart_pte_flags = AMDGPU_PTE_MTYPE_VG10(0ULL, MTYPE_UC) |
AMDGPU_PTE_EXECUTABLE;
- return amdgpu_gart_table_vram_alloc(adev);
+
+ if (!adev->gmc.real_vram_size) {
+ dev_info(adev->dev, "Put GART in system memory for APU\n");
+ r = amdgpu_gart_table_ram_alloc(adev);
+ if (r)
+ dev_err(adev->dev, "Failed to allocate GART in system memory\n");
+ } else {
+ r = amdgpu_gart_table_vram_alloc(adev);
+ if (r)
+ return r;
+
+ if (amdgpu_gmc_is_pdb0_enabled(adev))
+ r = amdgpu_gmc_pdb0_alloc(adev);
+ }
+
+ return r;
}
/**
@@ -1294,14 +1829,37 @@ static int gmc_v9_0_gart_init(struct amdgpu_device *adev)
*/
static void gmc_v9_0_save_registers(struct amdgpu_device *adev)
{
- if (adev->asic_type == CHIP_RAVEN)
+ if ((amdgpu_ip_version(adev, DCE_HWIP, 0) == IP_VERSION(1, 0, 0)) ||
+ (amdgpu_ip_version(adev, DCE_HWIP, 0) == IP_VERSION(1, 0, 1)))
adev->gmc.sdpif_register = RREG32_SOC15(DCE, 0, mmDCHUBBUB_SDPIF_MMIO_CNTRL_0);
}
-static int gmc_v9_0_sw_init(void *handle)
+static void gmc_v9_4_3_init_vram_info(struct amdgpu_device *adev)
{
- int r, vram_width = 0, vram_type = 0, vram_vendor = 0;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ static const u32 regBIF_BIOS_SCRATCH_4 = 0x50;
+ u32 vram_info;
+
+ adev->gmc.vram_type = AMDGPU_VRAM_TYPE_HBM;
+ adev->gmc.vram_width = 128 * 64;
+
+ if (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 5, 0))
+ adev->gmc.vram_type = AMDGPU_VRAM_TYPE_HBM3E;
+
+ if (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 4) &&
+ adev->rev_id == 0x3)
+ adev->gmc.vram_type = AMDGPU_VRAM_TYPE_HBM3E;
+
+ if (!(adev->flags & AMD_IS_APU) && !amdgpu_sriov_vf(adev)) {
+ vram_info = RREG32(regBIF_BIOS_SCRATCH_4);
+ adev->gmc.vram_vendor = vram_info & 0xF;
+ }
+}
+
+static int gmc_v9_0_sw_init(struct amdgpu_ip_block *ip_block)
+{
+ int r, vram_width = 0, vram_type = 0, vram_vendor = 0, dma_addr_bits;
+ struct amdgpu_device *adev = ip_block->adev;
+ unsigned long inst_mask = adev->aid_mask;
adev->gfxhub.funcs->init(adev);
@@ -1309,35 +1867,51 @@ static int gmc_v9_0_sw_init(void *handle)
spin_lock_init(&adev->gmc.invalidate_lock);
- r = amdgpu_atomfirmware_get_vram_info(adev,
- &vram_width, &vram_type, &vram_vendor);
- if (amdgpu_sriov_vf(adev))
- /* For Vega10 SR-IOV, vram_width can't be read from ATOM as RAVEN,
- * and DF related registers is not readable, seems hardcord is the
- * only way to set the correct vram_width
- */
- adev->gmc.vram_width = 2048;
- else if (amdgpu_emu_mode != 1)
- adev->gmc.vram_width = vram_width;
-
- if (!adev->gmc.vram_width) {
- int chansize, numchan;
-
- /* hbm memory channel size */
- if (adev->flags & AMD_IS_APU)
- chansize = 64;
- else
- chansize = 128;
+ if (amdgpu_is_multi_aid(adev)) {
+ gmc_v9_4_3_init_vram_info(adev);
+ } else if (!adev->bios) {
+ if (adev->flags & AMD_IS_APU) {
+ adev->gmc.vram_type = AMDGPU_VRAM_TYPE_DDR4;
+ adev->gmc.vram_width = 64 * 64;
+ } else {
+ adev->gmc.vram_type = AMDGPU_VRAM_TYPE_HBM;
+ adev->gmc.vram_width = 128 * 64;
+ }
+ } else {
+ r = amdgpu_atomfirmware_get_vram_info(adev,
+ &vram_width, &vram_type, &vram_vendor);
+ if (amdgpu_sriov_vf(adev))
+ /* For Vega10 SR-IOV, vram_width can't be read from ATOM as RAVEN,
+ * and DF related registers is not readable, seems hardcord is the
+ * only way to set the correct vram_width
+ */
+ adev->gmc.vram_width = 2048;
+ else if (amdgpu_emu_mode != 1)
+ adev->gmc.vram_width = vram_width;
+
+ if (!adev->gmc.vram_width) {
+ int chansize, numchan;
+
+ /* hbm memory channel size */
+ if (adev->flags & AMD_IS_APU)
+ chansize = 64;
+ else
+ chansize = 128;
+ if (adev->df.funcs &&
+ adev->df.funcs->get_hbm_channel_number) {
+ numchan = adev->df.funcs->get_hbm_channel_number(adev);
+ adev->gmc.vram_width = numchan * chansize;
+ }
+ }
- numchan = adev->df.funcs->get_hbm_channel_number(adev);
- adev->gmc.vram_width = numchan * chansize;
+ adev->gmc.vram_type = vram_type;
+ adev->gmc.vram_vendor = vram_vendor;
}
-
- adev->gmc.vram_type = vram_type;
- adev->gmc.vram_vendor = vram_vendor;
- switch (adev->asic_type) {
- case CHIP_RAVEN:
- adev->num_vmhubs = 2;
+ switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
+ case IP_VERSION(9, 1, 0):
+ case IP_VERSION(9, 2, 2):
+ set_bit(AMDGPU_GFXHUB(0), adev->vmhubs_mask);
+ set_bit(AMDGPU_MMHUB0(0), adev->vmhubs_mask);
if (adev->rev_id == 0x0 || adev->rev_id == 0x1) {
amdgpu_vm_adjust_size(adev, 256 * 1024, 9, 3, 48);
@@ -1348,29 +1922,44 @@ static int gmc_v9_0_sw_init(void *handle)
adev->vm_manager.num_level > 1;
}
break;
- case CHIP_VEGA10:
- case CHIP_VEGA12:
- case CHIP_VEGA20:
- case CHIP_RENOIR:
- adev->num_vmhubs = 2;
-
+ case IP_VERSION(9, 0, 1):
+ case IP_VERSION(9, 2, 1):
+ case IP_VERSION(9, 4, 0):
+ case IP_VERSION(9, 3, 0):
+ case IP_VERSION(9, 4, 2):
+ set_bit(AMDGPU_GFXHUB(0), adev->vmhubs_mask);
+ set_bit(AMDGPU_MMHUB0(0), adev->vmhubs_mask);
/*
* To fulfill 4-level page support,
* vm size is 256TB (48bit), maximum size of Vega10,
* block size 512 (9bit)
*/
- /* sriov restrict max_pfn below AMDGPU_GMC_HOLE */
- if (amdgpu_sriov_vf(adev))
- amdgpu_vm_adjust_size(adev, 256 * 1024, 9, 3, 47);
- else
- amdgpu_vm_adjust_size(adev, 256 * 1024, 9, 3, 48);
+
+ amdgpu_vm_adjust_size(adev, 256 * 1024, 9, 3, 48);
+ if (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 2))
+ adev->gmc.translate_further = adev->vm_manager.num_level > 1;
break;
- case CHIP_ARCTURUS:
- adev->num_vmhubs = 3;
+ case IP_VERSION(9, 4, 1):
+ set_bit(AMDGPU_GFXHUB(0), adev->vmhubs_mask);
+ set_bit(AMDGPU_MMHUB0(0), adev->vmhubs_mask);
+ set_bit(AMDGPU_MMHUB1(0), adev->vmhubs_mask);
/* Keep the vm size same with Vega20 */
amdgpu_vm_adjust_size(adev, 256 * 1024, 9, 3, 48);
+ adev->gmc.translate_further = adev->vm_manager.num_level > 1;
+ break;
+ case IP_VERSION(9, 4, 3):
+ case IP_VERSION(9, 4, 4):
+ case IP_VERSION(9, 5, 0):
+ bitmap_set(adev->vmhubs_mask, AMDGPU_GFXHUB(0),
+ NUM_XCC(adev->gfx.xcc_mask));
+
+ inst_mask <<= AMDGPU_MMHUB0(0);
+ bitmap_or(adev->vmhubs_mask, adev->vmhubs_mask, &inst_mask, 32);
+
+ amdgpu_vm_adjust_size(adev, 256 * 1024, 9, 3, 48);
+ adev->gmc.translate_further = adev->vm_manager.num_level > 1;
break;
default:
break;
@@ -1382,7 +1971,7 @@ static int gmc_v9_0_sw_init(void *handle)
if (r)
return r;
- if (adev->asic_type == CHIP_ARCTURUS) {
+ if (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 1)) {
r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_VMC1, VMC_1_0__SRCID__VM_FAULT,
&adev->gmc.vm_fault);
if (r)
@@ -1395,7 +1984,9 @@ static int gmc_v9_0_sw_init(void *handle)
if (r)
return r;
- if (!amdgpu_sriov_vf(adev)) {
+ if (!amdgpu_sriov_vf(adev) &&
+ !adev->gmc.xgmi.connected_to_cpu &&
+ !adev->gmc.is_app_apu) {
/* interrupt sent to DF. */
r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_DF, 0,
&adev->gmc.ecc_irq);
@@ -1409,18 +2000,16 @@ static int gmc_v9_0_sw_init(void *handle)
*/
adev->gmc.mc_mask = 0xffffffffffffULL; /* 48 bit MC */
- r = dma_set_mask_and_coherent(adev->dev, DMA_BIT_MASK(44));
+ dma_addr_bits = amdgpu_ip_version(adev, GC_HWIP, 0) >=
+ IP_VERSION(9, 4, 2) ?
+ 48 :
+ 44;
+ r = dma_set_mask_and_coherent(adev->dev, DMA_BIT_MASK(dma_addr_bits));
if (r) {
- printk(KERN_WARNING "amdgpu: No suitable DMA available.\n");
+ dev_warn(adev->dev, "amdgpu: No suitable DMA available.\n");
return r;
}
- adev->need_swiotlb = drm_need_swiotlb(44);
-
- if (adev->gmc.xgmi.supported) {
- r = adev->gfxhub.funcs->get_xgmi_info(adev);
- if (r)
- return r;
- }
+ adev->need_swiotlb = drm_need_swiotlb(dma_addr_bits);
r = gmc_v9_0_mc_init(adev);
if (r)
@@ -1428,6 +2017,12 @@ static int gmc_v9_0_sw_init(void *handle)
amdgpu_gmc_get_vbios_allocations(adev);
+ if (amdgpu_is_multi_aid(adev)) {
+ r = amdgpu_gmc_init_mem_ranges(adev);
+ if (r)
+ return r;
+ }
+
/* Memory manager */
r = amdgpu_bo_init(adev);
if (r)
@@ -1437,6 +2032,7 @@ static int gmc_v9_0_sw_init(void *handle)
if (r)
return r;
+ gmc_v9_0_init_nps_details(adev);
/*
* number of VMs
* VMID 0 is reserved for System
@@ -1448,38 +2044,59 @@ static int gmc_v9_0_sw_init(void *handle)
* for video processing.
*/
adev->vm_manager.first_kfd_vmid =
- adev->asic_type == CHIP_ARCTURUS ? 3 : 8;
+ (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 1) ||
+ amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 2) ||
+ amdgpu_is_multi_aid(adev)) ?
+ 3 :
+ 8;
amdgpu_vm_manager_init(adev);
gmc_v9_0_save_registers(adev);
+ r = amdgpu_gmc_ras_sw_init(adev);
+ if (r)
+ return r;
+
+ if (amdgpu_is_multi_aid(adev))
+ amdgpu_gmc_sysfs_init(adev);
+
return 0;
}
-static int gmc_v9_0_sw_fini(void *handle)
+static int gmc_v9_0_sw_fini(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
+
+ if (amdgpu_is_multi_aid(adev))
+ amdgpu_gmc_sysfs_fini(adev);
amdgpu_gmc_ras_fini(adev);
amdgpu_gem_force_release(adev);
amdgpu_vm_manager_fini(adev);
- amdgpu_gart_table_vram_free(adev);
+ if (!adev->gmc.real_vram_size) {
+ dev_info(adev->dev, "Put GART in system memory for APU free\n");
+ amdgpu_gart_table_ram_free(adev);
+ } else {
+ amdgpu_gart_table_vram_free(adev);
+ }
+ amdgpu_bo_free_kernel(&adev->gmc.pdb0_bo, NULL, &adev->gmc.ptr_pdb0);
amdgpu_bo_fini(adev);
- amdgpu_gart_fini(adev);
+
+ adev->gmc.num_mem_partitions = 0;
+ kfree(adev->gmc.mem_partitions);
return 0;
}
static void gmc_v9_0_init_golden_registers(struct amdgpu_device *adev)
{
-
- switch (adev->asic_type) {
- case CHIP_VEGA10:
+ switch (amdgpu_ip_version(adev, MMHUB_HWIP, 0)) {
+ case IP_VERSION(9, 0, 0):
if (amdgpu_sriov_vf(adev))
break;
fallthrough;
- case CHIP_VEGA20:
+ case IP_VERSION(9, 4, 0):
soc15_program_register_sequence(adev,
golden_settings_mmhub_1_0_0,
ARRAY_SIZE(golden_settings_mmhub_1_0_0));
@@ -1487,9 +2104,8 @@ static void gmc_v9_0_init_golden_registers(struct amdgpu_device *adev)
golden_settings_athub_1_0_0,
ARRAY_SIZE(golden_settings_athub_1_0_0));
break;
- case CHIP_VEGA12:
- break;
- case CHIP_RAVEN:
+ case IP_VERSION(9, 1, 0):
+ case IP_VERSION(9, 2, 0):
/* TODO for renoir */
soc15_program_register_sequence(adev,
golden_settings_athub_1_0_0,
@@ -1509,7 +2125,8 @@ static void gmc_v9_0_init_golden_registers(struct amdgpu_device *adev)
*/
void gmc_v9_0_restore_registers(struct amdgpu_device *adev)
{
- if (adev->asic_type == CHIP_RAVEN) {
+ if ((amdgpu_ip_version(adev, DCE_HWIP, 0) == IP_VERSION(1, 0, 0)) ||
+ (amdgpu_ip_version(adev, DCE_HWIP, 0) == IP_VERSION(1, 0, 1))) {
WREG32_SOC15(DCE, 0, mmDCHUBBUB_SDPIF_MMIO_CNTRL_0, adev->gmc.sdpif_register);
WARN_ON(adev->gmc.sdpif_register !=
RREG32_SOC15(DCE, 0, mmDCHUBBUB_SDPIF_MMIO_CNTRL_0));
@@ -1525,34 +2142,53 @@ static int gmc_v9_0_gart_enable(struct amdgpu_device *adev)
{
int r;
+ if (amdgpu_gmc_is_pdb0_enabled(adev))
+ amdgpu_gmc_init_pdb0(adev);
+
if (adev->gart.bo == NULL) {
dev_err(adev->dev, "No VRAM object for PCIE GART.\n");
return -EINVAL;
}
- r = amdgpu_gart_table_vram_pin(adev);
- if (r)
- return r;
- r = adev->gfxhub.funcs->gart_enable(adev);
- if (r)
- return r;
+ amdgpu_gtt_mgr_recover(&adev->mman.gtt_mgr);
+
+ if (!adev->in_s0ix) {
+ r = adev->gfxhub.funcs->gart_enable(adev);
+ if (r)
+ return r;
+ }
r = adev->mmhub.funcs->gart_enable(adev);
if (r)
return r;
- DRM_INFO("PCIE GART of %uM enabled (table at 0x%016llX).\n",
- (unsigned)(adev->gmc.gart_size >> 20),
- (unsigned long long)amdgpu_bo_gpu_offset(adev->gart.bo));
- adev->gart.ready = true;
+ DRM_INFO("PCIE GART of %uM enabled.\n",
+ (unsigned int)(adev->gmc.gart_size >> 20));
+ if (adev->gmc.pdb0_bo)
+ DRM_INFO("PDB0 located at 0x%016llX\n",
+ (unsigned long long)amdgpu_bo_gpu_offset(adev->gmc.pdb0_bo));
+ DRM_INFO("PTB located at 0x%016llX\n",
+ (unsigned long long)amdgpu_bo_gpu_offset(adev->gart.bo));
+
return 0;
}
-static int gmc_v9_0_hw_init(void *handle)
+static int gmc_v9_0_hw_init(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
bool value;
- int r, i;
+ int i, r;
+
+ adev->gmc.flush_pasid_uses_kiq = true;
+
+ /* Vega20+XGMI caches PTEs in TC and TLB. Add a heavy-weight TLB flush
+ * (type 2), which flushes both. Due to a race condition with
+ * concurrent memory accesses using the same TLB cache line, we still
+ * need a second TLB flush after this.
+ */
+ adev->gmc.flush_tlb_needs_extra_type_2 =
+ amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 0) &&
+ adev->gmc.xgmi.num_physical_nodes;
/* The sequence of these two function calls matters.*/
gmc_v9_0_init_golden_registers(adev);
@@ -1570,7 +2206,7 @@ static int gmc_v9_0_hw_init(void *handle)
adev->hdp.funcs->init_registers(adev);
/* After HDP is initialized, flush HDP.*/
- adev->hdp.funcs->flush_hdp(adev, NULL);
+ amdgpu_device_flush_hdp(adev, NULL);
if (amdgpu_vm_fault_stop == AMDGPU_VM_FAULT_STOP_ALWAYS)
value = false;
@@ -1578,18 +2214,27 @@ static int gmc_v9_0_hw_init(void *handle)
value = true;
if (!amdgpu_sriov_vf(adev)) {
- adev->gfxhub.funcs->set_fault_enable_default(adev, value);
+ if (!adev->in_s0ix)
+ adev->gfxhub.funcs->set_fault_enable_default(adev, value);
adev->mmhub.funcs->set_fault_enable_default(adev, value);
}
- for (i = 0; i < adev->num_vmhubs; ++i)
+ for_each_set_bit(i, adev->vmhubs_mask, AMDGPU_MAX_VMHUBS) {
+ if (adev->in_s0ix && (i == AMDGPU_GFXHUB(0)))
+ continue;
gmc_v9_0_flush_gpu_tlb(adev, 0, i, 0);
+ }
if (adev->umc.funcs && adev->umc.funcs->init_registers)
adev->umc.funcs->init_registers(adev);
r = gmc_v9_0_gart_enable(adev);
+ if (r)
+ return r;
- return r;
+ if (amdgpu_emu_mode == 1)
+ return amdgpu_gmc_vram_checking(adev);
+
+ return 0;
}
/**
@@ -1601,14 +2246,16 @@ static int gmc_v9_0_hw_init(void *handle)
*/
static void gmc_v9_0_gart_disable(struct amdgpu_device *adev)
{
- adev->gfxhub.funcs->gart_disable(adev);
+ if (!adev->in_s0ix)
+ adev->gfxhub.funcs->gart_disable(adev);
adev->mmhub.funcs->gart_disable(adev);
- amdgpu_gart_table_vram_unpin(adev);
}
-static int gmc_v9_0_hw_fini(void *handle)
+static int gmc_v9_0_hw_fini(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
+
+ gmc_v9_0_gart_disable(adev);
if (amdgpu_sriov_vf(adev)) {
/* full access mode, so don't touch any GMC register */
@@ -1616,56 +2263,78 @@ static int gmc_v9_0_hw_fini(void *handle)
return 0;
}
- amdgpu_irq_put(adev, &adev->gmc.ecc_irq, 0);
- amdgpu_irq_put(adev, &adev->gmc.vm_fault, 0);
- gmc_v9_0_gart_disable(adev);
+ /*
+ * Pair the operations did in gmc_v9_0_hw_init and thus maintain
+ * a correct cached state for GMC. Otherwise, the "gate" again
+ * operation on S3 resuming will fail due to wrong cached state.
+ */
+ if (adev->mmhub.funcs->update_power_gating)
+ adev->mmhub.funcs->update_power_gating(adev, false);
+
+ /*
+ * For minimal init, late_init is not called, hence VM fault/RAS irqs
+ * are not enabled.
+ */
+ if (adev->init_lvl->level != AMDGPU_INIT_LEVEL_MINIMAL_XGMI) {
+ amdgpu_irq_put(adev, &adev->gmc.vm_fault, 0);
+
+ if (adev->gmc.ecc_irq.funcs &&
+ amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__UMC))
+ amdgpu_irq_put(adev, &adev->gmc.ecc_irq, 0);
+ }
return 0;
}
-static int gmc_v9_0_suspend(void *handle)
+static int gmc_v9_0_suspend(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
-
- return gmc_v9_0_hw_fini(adev);
+ return gmc_v9_0_hw_fini(ip_block);
}
-static int gmc_v9_0_resume(void *handle)
+static int gmc_v9_0_resume(struct amdgpu_ip_block *ip_block)
{
+ struct amdgpu_device *adev = ip_block->adev;
int r;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
- r = gmc_v9_0_hw_init(adev);
+ /* If a reset is done for NPS mode switch, read the memory range
+ * information again.
+ */
+ if (adev->gmc.reset_flags & AMDGPU_GMC_INIT_RESET_NPS) {
+ amdgpu_gmc_init_sw_mem_ranges(adev, adev->gmc.mem_partitions);
+ adev->gmc.reset_flags &= ~AMDGPU_GMC_INIT_RESET_NPS;
+ }
+
+ r = gmc_v9_0_hw_init(ip_block);
if (r)
return r;
- amdgpu_vmid_reset_all(adev);
+ amdgpu_vmid_reset_all(ip_block->adev);
return 0;
}
-static bool gmc_v9_0_is_idle(void *handle)
+static bool gmc_v9_0_is_idle(struct amdgpu_ip_block *ip_block)
{
/* MC is always ready in GMC v9.*/
return true;
}
-static int gmc_v9_0_wait_for_idle(void *handle)
+static int gmc_v9_0_wait_for_idle(struct amdgpu_ip_block *ip_block)
{
/* There is no need to wait for MC idle in GMC v9.*/
return 0;
}
-static int gmc_v9_0_soft_reset(void *handle)
+static int gmc_v9_0_soft_reset(struct amdgpu_ip_block *ip_block)
{
/* XXX for emulation.*/
return 0;
}
-static int gmc_v9_0_set_clockgating_state(void *handle,
+static int gmc_v9_0_set_clockgating_state(struct amdgpu_ip_block *ip_block,
enum amd_clockgating_state state)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
adev->mmhub.funcs->set_clockgating(adev, state);
@@ -1674,16 +2343,16 @@ static int gmc_v9_0_set_clockgating_state(void *handle,
return 0;
}
-static void gmc_v9_0_get_clockgating_state(void *handle, u32 *flags)
+static void gmc_v9_0_get_clockgating_state(struct amdgpu_ip_block *ip_block, u64 *flags)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
adev->mmhub.funcs->get_clockgating(adev, flags);
athub_v1_0_get_clockgating(adev, flags);
}
-static int gmc_v9_0_set_powergating_state(void *handle,
+static int gmc_v9_0_set_powergating_state(struct amdgpu_ip_block *ip_block,
enum amd_powergating_state state)
{
return 0;
@@ -1707,8 +2376,7 @@ const struct amd_ip_funcs gmc_v9_0_ip_funcs = {
.get_clockgating_state = gmc_v9_0_get_clockgating_state,
};
-const struct amdgpu_ip_block_version gmc_v9_0_ip_block =
-{
+const struct amdgpu_ip_block_version gmc_v9_0_ip_block = {
.type = AMD_IP_BLOCK_TYPE_GMC,
.major = 9,
.minor = 0,
diff --git a/drivers/gpu/drm/amd/amdgpu/hdp_v4_0.c b/drivers/gpu/drm/amd/amdgpu/hdp_v4_0.c
index e46621fed5b9..e6c0d86d3486 100644
--- a/drivers/gpu/drm/amd/amdgpu/hdp_v4_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/hdp_v4_0.c
@@ -21,7 +21,6 @@
*
*/
#include "amdgpu.h"
-#include "amdgpu_atombios.h"
#include "hdp_v4_0.h"
#include "amdgpu_ras.h"
@@ -37,31 +36,48 @@
#define HDP_MEM_POWER_CTRL__RC_MEM_POWER_LS_EN_MASK 0x00020000L
#define mmHDP_MEM_POWER_CTRL_BASE_IDX 0
-static void hdp_v4_0_flush_hdp(struct amdgpu_device *adev,
- struct amdgpu_ring *ring)
-{
- if (!ring || !ring->funcs->emit_wreg)
- WREG32_NO_KIQ((adev->rmmio_remap.reg_offset + KFD_MMIO_REMAP_HDP_MEM_FLUSH_CNTL) >> 2, 0);
- else
- amdgpu_ring_emit_wreg(ring, (adev->rmmio_remap.reg_offset + KFD_MMIO_REMAP_HDP_MEM_FLUSH_CNTL) >> 2, 0);
-}
-
static void hdp_v4_0_invalidate_hdp(struct amdgpu_device *adev,
struct amdgpu_ring *ring)
{
- if (!ring || !ring->funcs->emit_wreg)
+ if (amdgpu_ip_version(adev, HDP_HWIP, 0) == IP_VERSION(4, 4, 0) ||
+ amdgpu_ip_version(adev, HDP_HWIP, 0) == IP_VERSION(4, 4, 2) ||
+ amdgpu_ip_version(adev, HDP_HWIP, 0) == IP_VERSION(4, 4, 5))
+ return;
+
+ if (!ring || !ring->funcs->emit_wreg) {
WREG32_SOC15_NO_KIQ(HDP, 0, mmHDP_READ_CACHE_INVALIDATE, 1);
- else
+ RREG32_SOC15_NO_KIQ(HDP, 0, mmHDP_READ_CACHE_INVALIDATE);
+ } else {
amdgpu_ring_emit_wreg(ring, SOC15_REG_OFFSET(
HDP, 0, mmHDP_READ_CACHE_INVALIDATE), 1);
+ }
}
+static void hdp_v4_0_query_ras_error_count(struct amdgpu_device *adev,
+ void *ras_error_status)
+{
+ struct ras_err_data *err_data = (struct ras_err_data *)ras_error_status;
+
+ err_data->ue_count = 0;
+ err_data->ce_count = 0;
+
+ if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__HDP))
+ return;
+
+ /* HDP SRAM errors are uncorrectable ones (i.e. fatal errors) */
+ err_data->ue_count += RREG32_SOC15(HDP, 0, mmHDP_EDC_CNT);
+};
+
static void hdp_v4_0_reset_ras_error_count(struct amdgpu_device *adev)
{
if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__HDP))
return;
- /*read back hdp ras counter to reset it to 0 */
- RREG32_SOC15(HDP, 0, mmHDP_EDC_CNT);
+
+ if (amdgpu_ip_version(adev, HDP_HWIP, 0) >= IP_VERSION(4, 4, 0))
+ WREG32_SOC15(HDP, 0, mmHDP_EDC_CNT, 0);
+ else
+ /*read back hdp ras counter to reset it to 0 */
+ RREG32_SOC15(HDP, 0, mmHDP_EDC_CNT);
}
static void hdp_v4_0_update_clock_gating(struct amdgpu_device *adev,
@@ -69,9 +85,10 @@ static void hdp_v4_0_update_clock_gating(struct amdgpu_device *adev,
{
uint32_t def, data;
- if (adev->asic_type == CHIP_VEGA10 ||
- adev->asic_type == CHIP_VEGA12 ||
- adev->asic_type == CHIP_RAVEN) {
+ if (amdgpu_ip_version(adev, HDP_HWIP, 0) == IP_VERSION(4, 0, 0) ||
+ amdgpu_ip_version(adev, HDP_HWIP, 0) == IP_VERSION(4, 0, 1) ||
+ amdgpu_ip_version(adev, HDP_HWIP, 0) == IP_VERSION(4, 1, 1) ||
+ amdgpu_ip_version(adev, HDP_HWIP, 0) == IP_VERSION(4, 1, 0)) {
def = data = RREG32(SOC15_REG_OFFSET(HDP, 0, mmHDP_MEM_POWER_LS));
if (enable && (adev->cg_flags & AMD_CG_SUPPORT_HDP_LS))
@@ -101,10 +118,16 @@ static void hdp_v4_0_update_clock_gating(struct amdgpu_device *adev,
}
static void hdp_v4_0_get_clockgating_state(struct amdgpu_device *adev,
- u32 *flags)
+ u64 *flags)
{
int data;
+ if (amdgpu_ip_version(adev, HDP_HWIP, 0) == IP_VERSION(4, 4, 2) ||
+ amdgpu_ip_version(adev, HDP_HWIP, 0) == IP_VERSION(4, 4, 5)) {
+ /* Default enabled */
+ *flags |= AMD_CG_SUPPORT_HDP_MGCG;
+ return;
+ }
/* AMD_CG_SUPPORT_HDP_LS */
data = RREG32(SOC15_REG_OFFSET(HDP, 0, mmHDP_MEM_POWER_LS));
if (data & HDP_MEM_POWER_LS__LS_ENABLE_MASK)
@@ -113,24 +136,41 @@ static void hdp_v4_0_get_clockgating_state(struct amdgpu_device *adev,
static void hdp_v4_0_init_registers(struct amdgpu_device *adev)
{
- switch (adev->asic_type) {
- case CHIP_ARCTURUS:
+ switch (amdgpu_ip_version(adev, HDP_HWIP, 0)) {
+ case IP_VERSION(4, 2, 1):
WREG32_FIELD15(HDP, 0, HDP_MMHUB_CNTL, HDP_MMHUB_GCC, 1);
break;
default:
break;
}
+ /* Do not program registers if VF */
+ if (amdgpu_sriov_vf(adev))
+ return;
+
WREG32_FIELD15(HDP, 0, HDP_MISC_CNTL, FLUSH_INVALIDATE_CACHE, 1);
+ if (amdgpu_ip_version(adev, HDP_HWIP, 0) == IP_VERSION(4, 4, 0))
+ WREG32_FIELD15(HDP, 0, HDP_MISC_CNTL, READ_BUFFER_WATERMARK, 2);
+
WREG32_SOC15(HDP, 0, mmHDP_NONSURFACE_BASE, (adev->gmc.vram_start >> 8));
WREG32_SOC15(HDP, 0, mmHDP_NONSURFACE_BASE_HI, (adev->gmc.vram_start >> 40));
}
+struct amdgpu_ras_block_hw_ops hdp_v4_0_ras_hw_ops = {
+ .query_ras_error_count = hdp_v4_0_query_ras_error_count,
+ .reset_ras_error_count = hdp_v4_0_reset_ras_error_count,
+};
+
+struct amdgpu_hdp_ras hdp_v4_0_ras = {
+ .ras_block = {
+ .hw_ops = &hdp_v4_0_ras_hw_ops,
+ },
+};
+
const struct amdgpu_hdp_funcs hdp_v4_0_funcs = {
- .flush_hdp = hdp_v4_0_flush_hdp,
+ .flush_hdp = amdgpu_hdp_generic_flush,
.invalidate_hdp = hdp_v4_0_invalidate_hdp,
- .reset_ras_error_count = hdp_v4_0_reset_ras_error_count,
.update_clock_gating = hdp_v4_0_update_clock_gating,
.get_clock_gating_state = hdp_v4_0_get_clockgating_state,
.init_registers = hdp_v4_0_init_registers,
diff --git a/drivers/gpu/drm/amd/amdgpu/hdp_v4_0.h b/drivers/gpu/drm/amd/amdgpu/hdp_v4_0.h
index d1e6399e8c46..c44eee9282ab 100644
--- a/drivers/gpu/drm/amd/amdgpu/hdp_v4_0.h
+++ b/drivers/gpu/drm/amd/amdgpu/hdp_v4_0.h
@@ -27,5 +27,6 @@
#include "soc15_common.h"
extern const struct amdgpu_hdp_funcs hdp_v4_0_funcs;
+extern struct amdgpu_hdp_ras hdp_v4_0_ras;
#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/hdp_v5_0.c b/drivers/gpu/drm/amd/amdgpu/hdp_v5_0.c
index 7a15e669b68d..8bc001dc9f63 100644
--- a/drivers/gpu/drm/amd/amdgpu/hdp_v5_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/hdp_v5_0.c
@@ -21,27 +21,18 @@
*
*/
#include "amdgpu.h"
-#include "amdgpu_atombios.h"
#include "hdp_v5_0.h"
#include "hdp/hdp_5_0_0_offset.h"
#include "hdp/hdp_5_0_0_sh_mask.h"
#include <uapi/linux/kfd_ioctl.h>
-static void hdp_v5_0_flush_hdp(struct amdgpu_device *adev,
- struct amdgpu_ring *ring)
-{
- if (!ring || !ring->funcs->emit_wreg)
- WREG32_NO_KIQ((adev->rmmio_remap.reg_offset + KFD_MMIO_REMAP_HDP_MEM_FLUSH_CNTL) >> 2, 0);
- else
- amdgpu_ring_emit_wreg(ring, (adev->rmmio_remap.reg_offset + KFD_MMIO_REMAP_HDP_MEM_FLUSH_CNTL) >> 2, 0);
-}
-
static void hdp_v5_0_invalidate_hdp(struct amdgpu_device *adev,
struct amdgpu_ring *ring)
{
if (!ring || !ring->funcs->emit_wreg) {
WREG32_SOC15_NO_KIQ(HDP, 0, mmHDP_READ_CACHE_INVALIDATE, 1);
+ RREG32_SOC15_NO_KIQ(HDP, 0, mmHDP_READ_CACHE_INVALIDATE);
} else {
amdgpu_ring_emit_wreg(ring, SOC15_REG_OFFSET(
HDP, 0, mmHDP_READ_CACHE_INVALIDATE), 1);
@@ -90,45 +81,56 @@ static void hdp_v5_0_update_mem_power_gating(struct amdgpu_device *adev,
RC_MEM_POWER_SD_EN, 0);
WREG32_SOC15(HDP, 0, mmHDP_MEM_POWER_CTRL, hdp_mem_pwr_cntl);
- /* only one clock gating mode (LS/DS/SD) can be enabled */
- if (adev->cg_flags & AMD_CG_SUPPORT_HDP_LS) {
- hdp_mem_pwr_cntl = REG_SET_FIELD(hdp_mem_pwr_cntl,
- HDP_MEM_POWER_CTRL,
- IPH_MEM_POWER_LS_EN, enable);
- hdp_mem_pwr_cntl = REG_SET_FIELD(hdp_mem_pwr_cntl,
- HDP_MEM_POWER_CTRL,
- RC_MEM_POWER_LS_EN, enable);
- } else if (adev->cg_flags & AMD_CG_SUPPORT_HDP_DS) {
- hdp_mem_pwr_cntl = REG_SET_FIELD(hdp_mem_pwr_cntl,
- HDP_MEM_POWER_CTRL,
- IPH_MEM_POWER_DS_EN, enable);
- hdp_mem_pwr_cntl = REG_SET_FIELD(hdp_mem_pwr_cntl,
- HDP_MEM_POWER_CTRL,
- RC_MEM_POWER_DS_EN, enable);
- } else if (adev->cg_flags & AMD_CG_SUPPORT_HDP_SD) {
- hdp_mem_pwr_cntl = REG_SET_FIELD(hdp_mem_pwr_cntl,
- HDP_MEM_POWER_CTRL,
- IPH_MEM_POWER_SD_EN, enable);
- /* RC should not use shut down mode, fallback to ds */
- hdp_mem_pwr_cntl = REG_SET_FIELD(hdp_mem_pwr_cntl,
- HDP_MEM_POWER_CTRL,
- RC_MEM_POWER_DS_EN, enable);
- }
-
- /* confirmed that IPH_MEM_POWER_CTRL_EN and RC_MEM_POWER_CTRL_EN have to
- * be set for SRAM LS/DS/SD */
- if (adev->cg_flags & (AMD_CG_SUPPORT_HDP_LS | AMD_CG_SUPPORT_HDP_DS |
- AMD_CG_SUPPORT_HDP_SD)) {
- hdp_mem_pwr_cntl = REG_SET_FIELD(hdp_mem_pwr_cntl, HDP_MEM_POWER_CTRL,
- IPH_MEM_POWER_CTRL_EN, 1);
- hdp_mem_pwr_cntl = REG_SET_FIELD(hdp_mem_pwr_cntl, HDP_MEM_POWER_CTRL,
- RC_MEM_POWER_CTRL_EN, 1);
+ /* Already disabled above. The actions below are for "enabled" only */
+ if (enable) {
+ /* only one clock gating mode (LS/DS/SD) can be enabled */
+ if (adev->cg_flags & AMD_CG_SUPPORT_HDP_LS) {
+ hdp_mem_pwr_cntl = REG_SET_FIELD(hdp_mem_pwr_cntl,
+ HDP_MEM_POWER_CTRL,
+ IPH_MEM_POWER_LS_EN, 1);
+ hdp_mem_pwr_cntl = REG_SET_FIELD(hdp_mem_pwr_cntl,
+ HDP_MEM_POWER_CTRL,
+ RC_MEM_POWER_LS_EN, 1);
+ } else if (adev->cg_flags & AMD_CG_SUPPORT_HDP_DS) {
+ hdp_mem_pwr_cntl = REG_SET_FIELD(hdp_mem_pwr_cntl,
+ HDP_MEM_POWER_CTRL,
+ IPH_MEM_POWER_DS_EN, 1);
+ hdp_mem_pwr_cntl = REG_SET_FIELD(hdp_mem_pwr_cntl,
+ HDP_MEM_POWER_CTRL,
+ RC_MEM_POWER_DS_EN, 1);
+ } else if (adev->cg_flags & AMD_CG_SUPPORT_HDP_SD) {
+ hdp_mem_pwr_cntl = REG_SET_FIELD(hdp_mem_pwr_cntl,
+ HDP_MEM_POWER_CTRL,
+ IPH_MEM_POWER_SD_EN, 1);
+ /* RC should not use shut down mode, fallback to ds or ls if allowed */
+ if (adev->cg_flags & AMD_CG_SUPPORT_HDP_DS)
+ hdp_mem_pwr_cntl = REG_SET_FIELD(hdp_mem_pwr_cntl,
+ HDP_MEM_POWER_CTRL,
+ RC_MEM_POWER_DS_EN, 1);
+ else if (adev->cg_flags & AMD_CG_SUPPORT_HDP_LS)
+ hdp_mem_pwr_cntl = REG_SET_FIELD(hdp_mem_pwr_cntl,
+ HDP_MEM_POWER_CTRL,
+ RC_MEM_POWER_LS_EN, 1);
+ }
+
+ /* confirmed that IPH_MEM_POWER_CTRL_EN and RC_MEM_POWER_CTRL_EN have to
+ * be set for SRAM LS/DS/SD */
+ if (adev->cg_flags & (AMD_CG_SUPPORT_HDP_LS | AMD_CG_SUPPORT_HDP_DS |
+ AMD_CG_SUPPORT_HDP_SD)) {
+ hdp_mem_pwr_cntl = REG_SET_FIELD(hdp_mem_pwr_cntl, HDP_MEM_POWER_CTRL,
+ IPH_MEM_POWER_CTRL_EN, 1);
+ hdp_mem_pwr_cntl = REG_SET_FIELD(hdp_mem_pwr_cntl, HDP_MEM_POWER_CTRL,
+ RC_MEM_POWER_CTRL_EN, 1);
+ WREG32_SOC15(HDP, 0, mmHDP_MEM_POWER_CTRL, hdp_mem_pwr_cntl);
+ }
}
- WREG32_SOC15(HDP, 0, mmHDP_MEM_POWER_CTRL, hdp_mem_pwr_cntl);
-
- /* restore IPH & RC clock override after clock/power mode changing */
- WREG32_SOC15(HDP, 0, mmHDP_CLK_CNTL, hdp_clk_cntl1);
+ /* disable IPH & RC clock override after clock/power mode changing */
+ hdp_clk_cntl = REG_SET_FIELD(hdp_clk_cntl, HDP_CLK_CNTL,
+ IPH_MEM_CLK_SOFT_OVERRIDE, 0);
+ hdp_clk_cntl = REG_SET_FIELD(hdp_clk_cntl, HDP_CLK_CNTL,
+ RC_MEM_CLK_SOFT_OVERRIDE, 0);
+ WREG32_SOC15(HDP, 0, mmHDP_CLK_CNTL, hdp_clk_cntl);
}
static void hdp_v5_0_update_medium_grain_clock_gating(struct amdgpu_device *adev,
@@ -170,7 +172,7 @@ static void hdp_v5_0_update_clock_gating(struct amdgpu_device *adev,
}
static void hdp_v5_0_get_clockgating_state(struct amdgpu_device *adev,
- u32 *flags)
+ u64 *flags)
{
uint32_t tmp;
@@ -204,7 +206,7 @@ static void hdp_v5_0_init_registers(struct amdgpu_device *adev)
}
const struct amdgpu_hdp_funcs hdp_v5_0_funcs = {
- .flush_hdp = hdp_v5_0_flush_hdp,
+ .flush_hdp = amdgpu_hdp_generic_flush,
.invalidate_hdp = hdp_v5_0_invalidate_hdp,
.update_clock_gating = hdp_v5_0_update_clock_gating,
.get_clock_gating_state = hdp_v5_0_get_clockgating_state,
diff --git a/drivers/gpu/drm/amd/amdgpu/hdp_v5_2.c b/drivers/gpu/drm/amd/amdgpu/hdp_v5_2.c
new file mode 100644
index 000000000000..40940b4ab400
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/hdp_v5_2.c
@@ -0,0 +1,206 @@
+/*
+ * Copyright 2021 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+#include "amdgpu.h"
+#include "hdp_v5_2.h"
+
+#include "hdp/hdp_5_2_1_offset.h"
+#include "hdp/hdp_5_2_1_sh_mask.h"
+#include <uapi/linux/kfd_ioctl.h>
+
+static void hdp_v5_2_flush_hdp(struct amdgpu_device *adev,
+ struct amdgpu_ring *ring)
+{
+ if (!ring || !ring->funcs->emit_wreg) {
+ WREG32_NO_KIQ((adev->rmmio_remap.reg_offset + KFD_MMIO_REMAP_HDP_MEM_FLUSH_CNTL) >> 2,
+ 0);
+ if (amdgpu_sriov_vf(adev)) {
+ /* this is fine because SR_IOV doesn't remap the register */
+ RREG32_NO_KIQ((adev->rmmio_remap.reg_offset + KFD_MMIO_REMAP_HDP_MEM_FLUSH_CNTL) >> 2);
+ } else {
+ /* We just need to read back a register to post the write.
+ * Reading back the remapped register causes problems on
+ * some platforms so just read back the memory size register.
+ */
+ if (adev->nbio.funcs->get_memsize)
+ adev->nbio.funcs->get_memsize(adev);
+ }
+ } else {
+ amdgpu_ring_emit_wreg(ring,
+ (adev->rmmio_remap.reg_offset + KFD_MMIO_REMAP_HDP_MEM_FLUSH_CNTL) >> 2,
+ 0);
+ }
+}
+
+static void hdp_v5_2_update_mem_power_gating(struct amdgpu_device *adev,
+ bool enable)
+{
+ uint32_t hdp_clk_cntl;
+ uint32_t hdp_mem_pwr_cntl;
+
+ if (!(adev->cg_flags & (AMD_CG_SUPPORT_HDP_LS |
+ AMD_CG_SUPPORT_HDP_DS |
+ AMD_CG_SUPPORT_HDP_SD)))
+ return;
+
+ hdp_clk_cntl = RREG32_SOC15(HDP, 0, regHDP_CLK_CNTL);
+ hdp_mem_pwr_cntl = RREG32_SOC15(HDP, 0, regHDP_MEM_POWER_CTRL);
+
+ /* Before doing clock/power mode switch, forced on MEM clock */
+ hdp_clk_cntl = REG_SET_FIELD(hdp_clk_cntl, HDP_CLK_CNTL,
+ ATOMIC_MEM_CLK_SOFT_OVERRIDE, 1);
+ hdp_clk_cntl = REG_SET_FIELD(hdp_clk_cntl, HDP_CLK_CNTL,
+ RC_MEM_CLK_SOFT_OVERRIDE, 1);
+ WREG32_SOC15(HDP, 0, regHDP_CLK_CNTL, hdp_clk_cntl);
+
+ /* disable clock and power gating before any changing */
+ hdp_mem_pwr_cntl = REG_SET_FIELD(hdp_mem_pwr_cntl, HDP_MEM_POWER_CTRL,
+ ATOMIC_MEM_POWER_CTRL_EN, 0);
+ hdp_mem_pwr_cntl = REG_SET_FIELD(hdp_mem_pwr_cntl, HDP_MEM_POWER_CTRL,
+ ATOMIC_MEM_POWER_LS_EN, 0);
+ hdp_mem_pwr_cntl = REG_SET_FIELD(hdp_mem_pwr_cntl, HDP_MEM_POWER_CTRL,
+ ATOMIC_MEM_POWER_DS_EN, 0);
+ hdp_mem_pwr_cntl = REG_SET_FIELD(hdp_mem_pwr_cntl, HDP_MEM_POWER_CTRL,
+ ATOMIC_MEM_POWER_SD_EN, 0);
+ hdp_mem_pwr_cntl = REG_SET_FIELD(hdp_mem_pwr_cntl, HDP_MEM_POWER_CTRL,
+ RC_MEM_POWER_CTRL_EN, 0);
+ hdp_mem_pwr_cntl = REG_SET_FIELD(hdp_mem_pwr_cntl, HDP_MEM_POWER_CTRL,
+ RC_MEM_POWER_LS_EN, 0);
+ hdp_mem_pwr_cntl = REG_SET_FIELD(hdp_mem_pwr_cntl, HDP_MEM_POWER_CTRL,
+ RC_MEM_POWER_DS_EN, 0);
+ hdp_mem_pwr_cntl = REG_SET_FIELD(hdp_mem_pwr_cntl, HDP_MEM_POWER_CTRL,
+ RC_MEM_POWER_SD_EN, 0);
+ WREG32_SOC15(HDP, 0, regHDP_MEM_POWER_CTRL, hdp_mem_pwr_cntl);
+
+ /* Already disabled above. The actions below are for "enabled" only */
+ if (enable) {
+ /* only one clock gating mode (LS/DS/SD) can be enabled */
+ if (adev->cg_flags & AMD_CG_SUPPORT_HDP_SD) {
+ hdp_mem_pwr_cntl = REG_SET_FIELD(hdp_mem_pwr_cntl,
+ HDP_MEM_POWER_CTRL,
+ ATOMIC_MEM_POWER_SD_EN, 1);
+ hdp_mem_pwr_cntl = REG_SET_FIELD(hdp_mem_pwr_cntl,
+ HDP_MEM_POWER_CTRL,
+ RC_MEM_POWER_SD_EN, 1);
+ } else if (adev->cg_flags & AMD_CG_SUPPORT_HDP_LS) {
+ hdp_mem_pwr_cntl = REG_SET_FIELD(hdp_mem_pwr_cntl,
+ HDP_MEM_POWER_CTRL,
+ ATOMIC_MEM_POWER_LS_EN, 1);
+ hdp_mem_pwr_cntl = REG_SET_FIELD(hdp_mem_pwr_cntl,
+ HDP_MEM_POWER_CTRL,
+ RC_MEM_POWER_LS_EN, 1);
+ } else if (adev->cg_flags & AMD_CG_SUPPORT_HDP_DS) {
+ hdp_mem_pwr_cntl = REG_SET_FIELD(hdp_mem_pwr_cntl,
+ HDP_MEM_POWER_CTRL,
+ ATOMIC_MEM_POWER_DS_EN, 1);
+ hdp_mem_pwr_cntl = REG_SET_FIELD(hdp_mem_pwr_cntl,
+ HDP_MEM_POWER_CTRL,
+ RC_MEM_POWER_DS_EN, 1);
+ }
+
+ /* confirmed that ATOMIC/RC_MEM_POWER_CTRL_EN have to be set for SRAM LS/DS/SD */
+ if (adev->cg_flags & (AMD_CG_SUPPORT_HDP_LS | AMD_CG_SUPPORT_HDP_DS |
+ AMD_CG_SUPPORT_HDP_SD)) {
+ hdp_mem_pwr_cntl = REG_SET_FIELD(hdp_mem_pwr_cntl, HDP_MEM_POWER_CTRL,
+ ATOMIC_MEM_POWER_CTRL_EN, 1);
+ hdp_mem_pwr_cntl = REG_SET_FIELD(hdp_mem_pwr_cntl, HDP_MEM_POWER_CTRL,
+ RC_MEM_POWER_CTRL_EN, 1);
+ WREG32_SOC15(HDP, 0, regHDP_MEM_POWER_CTRL, hdp_mem_pwr_cntl);
+ }
+ }
+
+ /* disable MEM clock override after clock/power mode changing */
+ hdp_clk_cntl = REG_SET_FIELD(hdp_clk_cntl, HDP_CLK_CNTL,
+ ATOMIC_MEM_CLK_SOFT_OVERRIDE, 0);
+ hdp_clk_cntl = REG_SET_FIELD(hdp_clk_cntl, HDP_CLK_CNTL,
+ RC_MEM_CLK_SOFT_OVERRIDE, 0);
+ WREG32_SOC15(HDP, 0, regHDP_CLK_CNTL, hdp_clk_cntl);
+}
+
+static void hdp_v5_2_update_medium_grain_clock_gating(struct amdgpu_device *adev,
+ bool enable)
+{
+ uint32_t hdp_clk_cntl;
+
+ if (!(adev->cg_flags & AMD_CG_SUPPORT_HDP_MGCG))
+ return;
+
+ hdp_clk_cntl = RREG32_SOC15(HDP, 0, regHDP_CLK_CNTL);
+
+ if (enable) {
+ hdp_clk_cntl &=
+ ~(uint32_t)
+ (HDP_CLK_CNTL__ATOMIC_MEM_CLK_SOFT_OVERRIDE_MASK |
+ HDP_CLK_CNTL__RC_MEM_CLK_SOFT_OVERRIDE_MASK |
+ HDP_CLK_CNTL__DBUS_CLK_SOFT_OVERRIDE_MASK |
+ HDP_CLK_CNTL__DYN_CLK_SOFT_OVERRIDE_MASK |
+ HDP_CLK_CNTL__XDP_REG_CLK_SOFT_OVERRIDE_MASK |
+ HDP_CLK_CNTL__HDP_REG_CLK_SOFT_OVERRIDE_MASK);
+ } else {
+ hdp_clk_cntl |= HDP_CLK_CNTL__ATOMIC_MEM_CLK_SOFT_OVERRIDE_MASK |
+ HDP_CLK_CNTL__RC_MEM_CLK_SOFT_OVERRIDE_MASK |
+ HDP_CLK_CNTL__DBUS_CLK_SOFT_OVERRIDE_MASK |
+ HDP_CLK_CNTL__DYN_CLK_SOFT_OVERRIDE_MASK |
+ HDP_CLK_CNTL__XDP_REG_CLK_SOFT_OVERRIDE_MASK |
+ HDP_CLK_CNTL__HDP_REG_CLK_SOFT_OVERRIDE_MASK;
+ }
+
+ WREG32_SOC15(HDP, 0, regHDP_CLK_CNTL, hdp_clk_cntl);
+}
+
+static void hdp_v5_2_get_clockgating_state(struct amdgpu_device *adev,
+ u64 *flags)
+{
+ uint32_t tmp;
+
+ /* AMD_CG_SUPPORT_HDP_MGCG */
+ tmp = RREG32_SOC15(HDP, 0, regHDP_CLK_CNTL);
+ if (!(tmp & (HDP_CLK_CNTL__ATOMIC_MEM_CLK_SOFT_OVERRIDE_MASK |
+ HDP_CLK_CNTL__RC_MEM_CLK_SOFT_OVERRIDE_MASK |
+ HDP_CLK_CNTL__DBUS_CLK_SOFT_OVERRIDE_MASK |
+ HDP_CLK_CNTL__DYN_CLK_SOFT_OVERRIDE_MASK |
+ HDP_CLK_CNTL__XDP_REG_CLK_SOFT_OVERRIDE_MASK |
+ HDP_CLK_CNTL__HDP_REG_CLK_SOFT_OVERRIDE_MASK)))
+ *flags |= AMD_CG_SUPPORT_HDP_MGCG;
+
+ /* AMD_CG_SUPPORT_HDP_LS/DS/SD */
+ tmp = RREG32_SOC15(HDP, 0, regHDP_MEM_POWER_CTRL);
+ if (tmp & HDP_MEM_POWER_CTRL__ATOMIC_MEM_POWER_LS_EN_MASK)
+ *flags |= AMD_CG_SUPPORT_HDP_LS;
+ else if (tmp & HDP_MEM_POWER_CTRL__ATOMIC_MEM_POWER_DS_EN_MASK)
+ *flags |= AMD_CG_SUPPORT_HDP_DS;
+ else if (tmp & HDP_MEM_POWER_CTRL__ATOMIC_MEM_POWER_SD_EN_MASK)
+ *flags |= AMD_CG_SUPPORT_HDP_SD;
+}
+
+static void hdp_v5_2_update_clock_gating(struct amdgpu_device *adev,
+ bool enable)
+{
+ hdp_v5_2_update_mem_power_gating(adev, enable);
+ hdp_v5_2_update_medium_grain_clock_gating(adev, enable);
+}
+
+const struct amdgpu_hdp_funcs hdp_v5_2_funcs = {
+ .flush_hdp = hdp_v5_2_flush_hdp,
+ .update_clock_gating = hdp_v5_2_update_clock_gating,
+ .get_clock_gating_state = hdp_v5_2_get_clockgating_state,
+};
diff --git a/drivers/gpu/drm/amd/amdgpu/hdp_v5_2.h b/drivers/gpu/drm/amd/amdgpu/hdp_v5_2.h
new file mode 100644
index 000000000000..cb2abc0c80ee
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/hdp_v5_2.h
@@ -0,0 +1,31 @@
+/*
+ * Copyright 2021 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#ifndef __HDP_V5_2_H__
+#define __HDP_V5_2_H__
+
+#include "soc15_common.h"
+
+extern const struct amdgpu_hdp_funcs hdp_v5_2_funcs;
+
+#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/hdp_v6_0.c b/drivers/gpu/drm/amd/amdgpu/hdp_v6_0.c
new file mode 100644
index 000000000000..ec20daf4272c
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/hdp_v6_0.c
@@ -0,0 +1,144 @@
+/*
+ * Copyright 2020 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+#include "amdgpu.h"
+#include "hdp_v6_0.h"
+
+#include "hdp/hdp_6_0_0_offset.h"
+#include "hdp/hdp_6_0_0_sh_mask.h"
+#include <uapi/linux/kfd_ioctl.h>
+
+#define regHDP_CLK_CNTL_V6_1 0xd5
+#define regHDP_CLK_CNTL_V6_1_BASE_IDX 0
+
+static void hdp_v6_0_update_clock_gating(struct amdgpu_device *adev,
+ bool enable)
+{
+ uint32_t hdp_clk_cntl;
+ uint32_t hdp_mem_pwr_cntl;
+
+ if (!(adev->cg_flags & (AMD_CG_SUPPORT_HDP_LS |
+ AMD_CG_SUPPORT_HDP_DS |
+ AMD_CG_SUPPORT_HDP_SD)))
+ return;
+
+ if (amdgpu_ip_version(adev, HDP_HWIP, 0) == IP_VERSION(6, 1, 0))
+ hdp_clk_cntl = RREG32_SOC15(HDP, 0, regHDP_CLK_CNTL_V6_1);
+ else
+ hdp_clk_cntl = RREG32_SOC15(HDP, 0, regHDP_CLK_CNTL);
+ hdp_mem_pwr_cntl = RREG32_SOC15(HDP, 0, regHDP_MEM_POWER_CTRL);
+
+ /* Before doing clock/power mode switch,
+ * forced on IPH & RC clock */
+ hdp_clk_cntl = REG_SET_FIELD(hdp_clk_cntl, HDP_CLK_CNTL,
+ RC_MEM_CLK_SOFT_OVERRIDE, 1);
+ if (amdgpu_ip_version(adev, HDP_HWIP, 0) == IP_VERSION(6, 1, 0))
+ WREG32_SOC15(HDP, 0, regHDP_CLK_CNTL_V6_1, hdp_clk_cntl);
+ else
+ WREG32_SOC15(HDP, 0, regHDP_CLK_CNTL, hdp_clk_cntl);
+
+ /* disable clock and power gating before any changing */
+ hdp_mem_pwr_cntl = REG_SET_FIELD(hdp_mem_pwr_cntl, HDP_MEM_POWER_CTRL,
+ ATOMIC_MEM_POWER_CTRL_EN, 0);
+ hdp_mem_pwr_cntl = REG_SET_FIELD(hdp_mem_pwr_cntl, HDP_MEM_POWER_CTRL,
+ ATOMIC_MEM_POWER_LS_EN, 0);
+ hdp_mem_pwr_cntl = REG_SET_FIELD(hdp_mem_pwr_cntl, HDP_MEM_POWER_CTRL,
+ ATOMIC_MEM_POWER_DS_EN, 0);
+ hdp_mem_pwr_cntl = REG_SET_FIELD(hdp_mem_pwr_cntl, HDP_MEM_POWER_CTRL,
+ ATOMIC_MEM_POWER_SD_EN, 0);
+ hdp_mem_pwr_cntl = REG_SET_FIELD(hdp_mem_pwr_cntl, HDP_MEM_POWER_CTRL,
+ RC_MEM_POWER_CTRL_EN, 0);
+ hdp_mem_pwr_cntl = REG_SET_FIELD(hdp_mem_pwr_cntl, HDP_MEM_POWER_CTRL,
+ RC_MEM_POWER_LS_EN, 0);
+ hdp_mem_pwr_cntl = REG_SET_FIELD(hdp_mem_pwr_cntl, HDP_MEM_POWER_CTRL,
+ RC_MEM_POWER_DS_EN, 0);
+ hdp_mem_pwr_cntl = REG_SET_FIELD(hdp_mem_pwr_cntl, HDP_MEM_POWER_CTRL,
+ RC_MEM_POWER_SD_EN, 0);
+ WREG32_SOC15(HDP, 0, regHDP_MEM_POWER_CTRL, hdp_mem_pwr_cntl);
+
+ /* Already disabled above. The actions below are for "enabled" only */
+ if (enable) {
+ /* only one clock gating mode (LS/DS/SD) can be enabled */
+ if (adev->cg_flags & AMD_CG_SUPPORT_HDP_SD) {
+ hdp_mem_pwr_cntl = REG_SET_FIELD(hdp_mem_pwr_cntl,
+ HDP_MEM_POWER_CTRL,
+ ATOMIC_MEM_POWER_SD_EN, 1);
+ hdp_mem_pwr_cntl = REG_SET_FIELD(hdp_mem_pwr_cntl,
+ HDP_MEM_POWER_CTRL,
+ RC_MEM_POWER_SD_EN, 1);
+ } else if (adev->cg_flags & AMD_CG_SUPPORT_HDP_LS) {
+ hdp_mem_pwr_cntl = REG_SET_FIELD(hdp_mem_pwr_cntl,
+ HDP_MEM_POWER_CTRL,
+ ATOMIC_MEM_POWER_LS_EN, 1);
+ hdp_mem_pwr_cntl = REG_SET_FIELD(hdp_mem_pwr_cntl,
+ HDP_MEM_POWER_CTRL,
+ RC_MEM_POWER_LS_EN, 1);
+ } else if (adev->cg_flags & AMD_CG_SUPPORT_HDP_DS) {
+ hdp_mem_pwr_cntl = REG_SET_FIELD(hdp_mem_pwr_cntl,
+ HDP_MEM_POWER_CTRL,
+ ATOMIC_MEM_POWER_DS_EN, 1);
+ hdp_mem_pwr_cntl = REG_SET_FIELD(hdp_mem_pwr_cntl,
+ HDP_MEM_POWER_CTRL,
+ RC_MEM_POWER_DS_EN, 1);
+ }
+
+ /* confirmed that IPH_MEM_POWER_CTRL_EN and RC_MEM_POWER_CTRL_EN have to
+ * be set for SRAM LS/DS/SD */
+ if (adev->cg_flags & (AMD_CG_SUPPORT_HDP_LS | AMD_CG_SUPPORT_HDP_DS |
+ AMD_CG_SUPPORT_HDP_SD)) {
+ hdp_mem_pwr_cntl = REG_SET_FIELD(hdp_mem_pwr_cntl, HDP_MEM_POWER_CTRL,
+ ATOMIC_MEM_POWER_CTRL_EN, 1);
+ hdp_mem_pwr_cntl = REG_SET_FIELD(hdp_mem_pwr_cntl, HDP_MEM_POWER_CTRL,
+ RC_MEM_POWER_CTRL_EN, 1);
+ WREG32_SOC15(HDP, 0, regHDP_MEM_POWER_CTRL, hdp_mem_pwr_cntl);
+ }
+ }
+
+ /* disable IPH & RC clock override after clock/power mode changing */
+ hdp_clk_cntl = REG_SET_FIELD(hdp_clk_cntl, HDP_CLK_CNTL,
+ RC_MEM_CLK_SOFT_OVERRIDE, 0);
+ if (amdgpu_ip_version(adev, HDP_HWIP, 0) == IP_VERSION(6, 1, 0))
+ WREG32_SOC15(HDP, 0, regHDP_CLK_CNTL_V6_1, hdp_clk_cntl);
+ else
+ WREG32_SOC15(HDP, 0, regHDP_CLK_CNTL, hdp_clk_cntl);
+}
+
+static void hdp_v6_0_get_clockgating_state(struct amdgpu_device *adev,
+ u64 *flags)
+{
+ uint32_t tmp;
+
+ /* AMD_CG_SUPPORT_HDP_LS/DS/SD */
+ tmp = RREG32_SOC15(HDP, 0, regHDP_MEM_POWER_CTRL);
+ if (tmp & HDP_MEM_POWER_CTRL__ATOMIC_MEM_POWER_LS_EN_MASK)
+ *flags |= AMD_CG_SUPPORT_HDP_LS;
+ else if (tmp & HDP_MEM_POWER_CTRL__ATOMIC_MEM_POWER_DS_EN_MASK)
+ *flags |= AMD_CG_SUPPORT_HDP_DS;
+ else if (tmp & HDP_MEM_POWER_CTRL__ATOMIC_MEM_POWER_SD_EN_MASK)
+ *flags |= AMD_CG_SUPPORT_HDP_SD;
+}
+
+const struct amdgpu_hdp_funcs hdp_v6_0_funcs = {
+ .flush_hdp = amdgpu_hdp_generic_flush,
+ .update_clock_gating = hdp_v6_0_update_clock_gating,
+ .get_clock_gating_state = hdp_v6_0_get_clockgating_state,
+};
diff --git a/drivers/gpu/drm/amd/amdgpu/hdp_v6_0.h b/drivers/gpu/drm/amd/amdgpu/hdp_v6_0.h
new file mode 100644
index 000000000000..533ecd8c0800
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/hdp_v6_0.h
@@ -0,0 +1,31 @@
+/*
+ * Copyright 2020 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#ifndef __HDP_V6_0_H__
+#define __HDP_V6_0_H__
+
+#include "soc15_common.h"
+
+extern const struct amdgpu_hdp_funcs hdp_v6_0_funcs;
+
+#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/hdp_v7_0.c b/drivers/gpu/drm/amd/amdgpu/hdp_v7_0.c
new file mode 100644
index 000000000000..ed1debc03507
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/hdp_v7_0.c
@@ -0,0 +1,132 @@
+/*
+ * Copyright 2023 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+#include "amdgpu.h"
+#include "hdp_v7_0.h"
+
+#include "hdp/hdp_7_0_0_offset.h"
+#include "hdp/hdp_7_0_0_sh_mask.h"
+#include <uapi/linux/kfd_ioctl.h>
+
+static void hdp_v7_0_update_clock_gating(struct amdgpu_device *adev,
+ bool enable)
+{
+ uint32_t hdp_clk_cntl, hdp_clk_cntl1;
+ uint32_t hdp_mem_pwr_cntl;
+
+ if (!(adev->cg_flags & (AMD_CG_SUPPORT_HDP_LS |
+ AMD_CG_SUPPORT_HDP_DS |
+ AMD_CG_SUPPORT_HDP_SD)))
+ return;
+
+ hdp_clk_cntl = hdp_clk_cntl1 = RREG32_SOC15(HDP, 0,regHDP_CLK_CNTL);
+ hdp_mem_pwr_cntl = RREG32_SOC15(HDP, 0, regHDP_MEM_POWER_CTRL);
+
+ /* Before doing clock/power mode switch,
+ * forced on IPH & RC clock */
+ hdp_clk_cntl = REG_SET_FIELD(hdp_clk_cntl, HDP_CLK_CNTL,
+ RC_MEM_CLK_SOFT_OVERRIDE, 1);
+ WREG32_SOC15(HDP, 0, regHDP_CLK_CNTL, hdp_clk_cntl);
+
+ /* disable clock and power gating before any changing */
+ hdp_mem_pwr_cntl = REG_SET_FIELD(hdp_mem_pwr_cntl, HDP_MEM_POWER_CTRL,
+ ATOMIC_MEM_POWER_CTRL_EN, 0);
+ hdp_mem_pwr_cntl = REG_SET_FIELD(hdp_mem_pwr_cntl, HDP_MEM_POWER_CTRL,
+ ATOMIC_MEM_POWER_LS_EN, 0);
+ hdp_mem_pwr_cntl = REG_SET_FIELD(hdp_mem_pwr_cntl, HDP_MEM_POWER_CTRL,
+ ATOMIC_MEM_POWER_DS_EN, 0);
+ hdp_mem_pwr_cntl = REG_SET_FIELD(hdp_mem_pwr_cntl, HDP_MEM_POWER_CTRL,
+ ATOMIC_MEM_POWER_SD_EN, 0);
+ hdp_mem_pwr_cntl = REG_SET_FIELD(hdp_mem_pwr_cntl, HDP_MEM_POWER_CTRL,
+ RC_MEM_POWER_CTRL_EN, 0);
+ hdp_mem_pwr_cntl = REG_SET_FIELD(hdp_mem_pwr_cntl, HDP_MEM_POWER_CTRL,
+ RC_MEM_POWER_LS_EN, 0);
+ hdp_mem_pwr_cntl = REG_SET_FIELD(hdp_mem_pwr_cntl, HDP_MEM_POWER_CTRL,
+ RC_MEM_POWER_DS_EN, 0);
+ hdp_mem_pwr_cntl = REG_SET_FIELD(hdp_mem_pwr_cntl, HDP_MEM_POWER_CTRL,
+ RC_MEM_POWER_SD_EN, 0);
+ WREG32_SOC15(HDP, 0, regHDP_MEM_POWER_CTRL, hdp_mem_pwr_cntl);
+
+ /* Already disabled above. The actions below are for "enabled" only */
+ if (enable) {
+ /* only one clock gating mode (LS/DS/SD) can be enabled */
+ if (adev->cg_flags & AMD_CG_SUPPORT_HDP_SD) {
+ hdp_mem_pwr_cntl = REG_SET_FIELD(hdp_mem_pwr_cntl,
+ HDP_MEM_POWER_CTRL,
+ ATOMIC_MEM_POWER_SD_EN, 1);
+ hdp_mem_pwr_cntl = REG_SET_FIELD(hdp_mem_pwr_cntl,
+ HDP_MEM_POWER_CTRL,
+ RC_MEM_POWER_SD_EN, 1);
+ } else if (adev->cg_flags & AMD_CG_SUPPORT_HDP_LS) {
+ hdp_mem_pwr_cntl = REG_SET_FIELD(hdp_mem_pwr_cntl,
+ HDP_MEM_POWER_CTRL,
+ ATOMIC_MEM_POWER_LS_EN, 1);
+ hdp_mem_pwr_cntl = REG_SET_FIELD(hdp_mem_pwr_cntl,
+ HDP_MEM_POWER_CTRL,
+ RC_MEM_POWER_LS_EN, 1);
+ } else if (adev->cg_flags & AMD_CG_SUPPORT_HDP_DS) {
+ hdp_mem_pwr_cntl = REG_SET_FIELD(hdp_mem_pwr_cntl,
+ HDP_MEM_POWER_CTRL,
+ ATOMIC_MEM_POWER_DS_EN, 1);
+ hdp_mem_pwr_cntl = REG_SET_FIELD(hdp_mem_pwr_cntl,
+ HDP_MEM_POWER_CTRL,
+ RC_MEM_POWER_DS_EN, 1);
+ }
+
+ /* confirmed that IPH_MEM_POWER_CTRL_EN and RC_MEM_POWER_CTRL_EN have to
+ * be set for SRAM LS/DS/SD */
+ if (adev->cg_flags & (AMD_CG_SUPPORT_HDP_LS | AMD_CG_SUPPORT_HDP_DS |
+ AMD_CG_SUPPORT_HDP_SD)) {
+ hdp_mem_pwr_cntl = REG_SET_FIELD(hdp_mem_pwr_cntl, HDP_MEM_POWER_CTRL,
+ ATOMIC_MEM_POWER_CTRL_EN, 1);
+ hdp_mem_pwr_cntl = REG_SET_FIELD(hdp_mem_pwr_cntl, HDP_MEM_POWER_CTRL,
+ RC_MEM_POWER_CTRL_EN, 1);
+ WREG32_SOC15(HDP, 0, regHDP_MEM_POWER_CTRL, hdp_mem_pwr_cntl);
+ }
+ }
+
+ /* disable IPH & RC clock override after clock/power mode changing */
+ hdp_clk_cntl = REG_SET_FIELD(hdp_clk_cntl, HDP_CLK_CNTL,
+ RC_MEM_CLK_SOFT_OVERRIDE, 0);
+ WREG32_SOC15(HDP, 0, regHDP_CLK_CNTL, hdp_clk_cntl);
+}
+
+static void hdp_v7_0_get_clockgating_state(struct amdgpu_device *adev,
+ u64 *flags)
+{
+ uint32_t tmp;
+
+ /* AMD_CG_SUPPORT_HDP_LS/DS/SD */
+ tmp = RREG32_SOC15(HDP, 0, regHDP_MEM_POWER_CTRL);
+ if (tmp & HDP_MEM_POWER_CTRL__ATOMIC_MEM_POWER_LS_EN_MASK)
+ *flags |= AMD_CG_SUPPORT_HDP_LS;
+ else if (tmp & HDP_MEM_POWER_CTRL__ATOMIC_MEM_POWER_DS_EN_MASK)
+ *flags |= AMD_CG_SUPPORT_HDP_DS;
+ else if (tmp & HDP_MEM_POWER_CTRL__ATOMIC_MEM_POWER_SD_EN_MASK)
+ *flags |= AMD_CG_SUPPORT_HDP_SD;
+}
+
+const struct amdgpu_hdp_funcs hdp_v7_0_funcs = {
+ .flush_hdp = amdgpu_hdp_generic_flush,
+ .update_clock_gating = hdp_v7_0_update_clock_gating,
+ .get_clock_gating_state = hdp_v7_0_get_clockgating_state,
+};
diff --git a/drivers/gpu/drm/amd/amdgpu/hdp_v7_0.h b/drivers/gpu/drm/amd/amdgpu/hdp_v7_0.h
new file mode 100644
index 000000000000..25b69201402d
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/hdp_v7_0.h
@@ -0,0 +1,31 @@
+/*
+ * Copyright 2023 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#ifndef __HDP_V7_0_H__
+#define __HDP_V7_0_H__
+
+#include "soc15_common.h"
+
+extern const struct amdgpu_hdp_funcs hdp_v7_0_funcs;
+
+#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/iceland_ih.c b/drivers/gpu/drm/amd/amdgpu/iceland_ih.c
index cc957471f31e..01cadf898c00 100644
--- a/drivers/gpu/drm/amd/amdgpu/iceland_ih.c
+++ b/drivers/gpu/drm/amd/amdgpu/iceland_ih.c
@@ -157,6 +157,9 @@ static int iceland_ih_irq_init(struct amdgpu_device *adev)
/* enable interrupts */
iceland_ih_enable_interrupts(adev);
+ if (adev->irq.ih_soft.ring_size)
+ adev->irq.ih_soft.enabled = true;
+
return 0;
}
@@ -194,6 +197,9 @@ static u32 iceland_ih_get_wptr(struct amdgpu_device *adev,
wptr = le32_to_cpu(*ih->wptr_cpu);
+ if (ih == &adev->irq.ih_soft)
+ goto out;
+
if (!REG_GET_FIELD(wptr, IH_RB_WPTR, RB_OVERFLOW))
goto out;
@@ -215,6 +221,11 @@ static u32 iceland_ih_get_wptr(struct amdgpu_device *adev,
tmp = REG_SET_FIELD(tmp, IH_RB_CNTL, WPTR_OVERFLOW_CLEAR, 1);
WREG32(mmIH_RB_CNTL, tmp);
+ /* Unset the CLEAR_OVERFLOW bit immediately so new overflows
+ * can be detected.
+ */
+ tmp = REG_SET_FIELD(tmp, IH_RB_CNTL, WPTR_OVERFLOW_CLEAR, 0);
+ WREG32(mmIH_RB_CNTL, tmp);
out:
return (wptr & ih->ptr_mask);
@@ -268,9 +279,9 @@ static void iceland_ih_set_rptr(struct amdgpu_device *adev,
WREG32(mmIH_RB_RPTR, ih->rptr);
}
-static int iceland_ih_early_init(void *handle)
+static int iceland_ih_early_init(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
int ret;
ret = amdgpu_irq_add_domain(adev);
@@ -282,69 +293,61 @@ static int iceland_ih_early_init(void *handle)
return 0;
}
-static int iceland_ih_sw_init(void *handle)
+static int iceland_ih_sw_init(struct amdgpu_ip_block *ip_block)
{
int r;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
r = amdgpu_ih_ring_init(adev, &adev->irq.ih, 64 * 1024, false);
if (r)
return r;
+ r = amdgpu_ih_ring_init(adev, &adev->irq.ih_soft, IH_SW_RING_SIZE, true);
+ if (r)
+ return r;
+
r = amdgpu_irq_init(adev);
return r;
}
-static int iceland_ih_sw_fini(void *handle)
+static int iceland_ih_sw_fini(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
- amdgpu_irq_fini(adev);
- amdgpu_ih_ring_fini(adev, &adev->irq.ih);
+ amdgpu_irq_fini_sw(adev);
amdgpu_irq_remove_domain(adev);
return 0;
}
-static int iceland_ih_hw_init(void *handle)
+static int iceland_ih_hw_init(struct amdgpu_ip_block *ip_block)
{
- int r;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
-
- r = iceland_ih_irq_init(adev);
- if (r)
- return r;
+ struct amdgpu_device *adev = ip_block->adev;
- return 0;
+ return iceland_ih_irq_init(adev);
}
-static int iceland_ih_hw_fini(void *handle)
+static int iceland_ih_hw_fini(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
-
- iceland_ih_irq_disable(adev);
+ iceland_ih_irq_disable(ip_block->adev);
return 0;
}
-static int iceland_ih_suspend(void *handle)
+static int iceland_ih_suspend(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
-
- return iceland_ih_hw_fini(adev);
+ return iceland_ih_hw_fini(ip_block);
}
-static int iceland_ih_resume(void *handle)
+static int iceland_ih_resume(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
-
- return iceland_ih_hw_init(adev);
+ return iceland_ih_hw_init(ip_block);
}
-static bool iceland_ih_is_idle(void *handle)
+static bool iceland_ih_is_idle(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
u32 tmp = RREG32(mmSRBM_STATUS);
if (REG_GET_FIELD(tmp, SRBM_STATUS, IH_BUSY))
@@ -353,11 +356,11 @@ static bool iceland_ih_is_idle(void *handle)
return true;
}
-static int iceland_ih_wait_for_idle(void *handle)
+static int iceland_ih_wait_for_idle(struct amdgpu_ip_block *ip_block)
{
unsigned i;
u32 tmp;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
for (i = 0; i < adev->usec_timeout; i++) {
/* read MC_STATUS */
@@ -369,10 +372,10 @@ static int iceland_ih_wait_for_idle(void *handle)
return -ETIMEDOUT;
}
-static int iceland_ih_soft_reset(void *handle)
+static int iceland_ih_soft_reset(struct amdgpu_ip_block *ip_block)
{
u32 srbm_soft_reset = 0;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
u32 tmp = RREG32(mmSRBM_STATUS);
if (tmp & SRBM_STATUS__IH_BUSY_MASK)
@@ -399,13 +402,13 @@ static int iceland_ih_soft_reset(void *handle)
return 0;
}
-static int iceland_ih_set_clockgating_state(void *handle,
+static int iceland_ih_set_clockgating_state(struct amdgpu_ip_block *ip_block,
enum amd_clockgating_state state)
{
return 0;
}
-static int iceland_ih_set_powergating_state(void *handle,
+static int iceland_ih_set_powergating_state(struct amdgpu_ip_block *ip_block,
enum amd_powergating_state state)
{
return 0;
@@ -414,7 +417,6 @@ static int iceland_ih_set_powergating_state(void *handle,
static const struct amd_ip_funcs iceland_ih_ip_funcs = {
.name = "iceland_ih",
.early_init = iceland_ih_early_init,
- .late_init = NULL,
.sw_init = iceland_ih_sw_init,
.sw_fini = iceland_ih_sw_fini,
.hw_init = iceland_ih_hw_init,
diff --git a/drivers/gpu/drm/amd/amdgpu/ih_v6_0.c b/drivers/gpu/drm/amd/amdgpu/ih_v6_0.c
new file mode 100644
index 000000000000..333e9c30c091
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/ih_v6_0.c
@@ -0,0 +1,817 @@
+/*
+ * Copyright 2021 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#include <linux/pci.h>
+
+#include "amdgpu.h"
+#include "amdgpu_ih.h"
+
+#include "oss/osssys_6_0_0_offset.h"
+#include "oss/osssys_6_0_0_sh_mask.h"
+
+#include "soc15_common.h"
+#include "ih_v6_0.h"
+
+#define MAX_REARM_RETRY 10
+
+static void ih_v6_0_set_interrupt_funcs(struct amdgpu_device *adev);
+
+/**
+ * ih_v6_0_init_register_offset - Initialize register offset for ih rings
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * Initialize register offset ih rings (IH_V6_0).
+ */
+static void ih_v6_0_init_register_offset(struct amdgpu_device *adev)
+{
+ struct amdgpu_ih_regs *ih_regs;
+
+ /* ih ring 2 is removed
+ * ih ring and ih ring 1 are available */
+ if (adev->irq.ih.ring_size) {
+ ih_regs = &adev->irq.ih.ih_regs;
+ ih_regs->ih_rb_base = SOC15_REG_OFFSET(OSSSYS, 0, regIH_RB_BASE);
+ ih_regs->ih_rb_base_hi = SOC15_REG_OFFSET(OSSSYS, 0, regIH_RB_BASE_HI);
+ ih_regs->ih_rb_cntl = SOC15_REG_OFFSET(OSSSYS, 0, regIH_RB_CNTL);
+ ih_regs->ih_rb_wptr = SOC15_REG_OFFSET(OSSSYS, 0, regIH_RB_WPTR);
+ ih_regs->ih_rb_rptr = SOC15_REG_OFFSET(OSSSYS, 0, regIH_RB_RPTR);
+ ih_regs->ih_doorbell_rptr = SOC15_REG_OFFSET(OSSSYS, 0, regIH_DOORBELL_RPTR);
+ ih_regs->ih_rb_wptr_addr_lo = SOC15_REG_OFFSET(OSSSYS, 0, regIH_RB_WPTR_ADDR_LO);
+ ih_regs->ih_rb_wptr_addr_hi = SOC15_REG_OFFSET(OSSSYS, 0, regIH_RB_WPTR_ADDR_HI);
+ ih_regs->psp_reg_id = PSP_REG_IH_RB_CNTL;
+ }
+
+ if (adev->irq.ih1.ring_size) {
+ ih_regs = &adev->irq.ih1.ih_regs;
+ ih_regs->ih_rb_base = SOC15_REG_OFFSET(OSSSYS, 0, regIH_RB_BASE_RING1);
+ ih_regs->ih_rb_base_hi = SOC15_REG_OFFSET(OSSSYS, 0, regIH_RB_BASE_HI_RING1);
+ ih_regs->ih_rb_cntl = SOC15_REG_OFFSET(OSSSYS, 0, regIH_RB_CNTL_RING1);
+ ih_regs->ih_rb_wptr = SOC15_REG_OFFSET(OSSSYS, 0, regIH_RB_WPTR_RING1);
+ ih_regs->ih_rb_rptr = SOC15_REG_OFFSET(OSSSYS, 0, regIH_RB_RPTR_RING1);
+ ih_regs->ih_doorbell_rptr = SOC15_REG_OFFSET(OSSSYS, 0, regIH_DOORBELL_RPTR_RING1);
+ ih_regs->psp_reg_id = PSP_REG_IH_RB_CNTL_RING1;
+ }
+}
+
+/**
+ * force_update_wptr_for_self_int - Force update the wptr for self interrupt
+ *
+ * @adev: amdgpu_device pointer
+ * @threshold: threshold to trigger the wptr reporting
+ * @timeout: timeout to trigger the wptr reporting
+ * @enabled: Enable/disable timeout flush mechanism
+ *
+ * threshold input range: 0 ~ 15, default 0,
+ * real_threshold = 2^threshold
+ * timeout input range: 0 ~ 20, default 8,
+ * real_timeout = (2^timeout) * 1024 / (socclk_freq)
+ *
+ * Force update wptr for self interrupt ( >= SIENNA_CICHLID).
+ */
+static void
+force_update_wptr_for_self_int(struct amdgpu_device *adev,
+ u32 threshold, u32 timeout, bool enabled)
+{
+ u32 ih_cntl, ih_rb_cntl;
+
+ ih_cntl = RREG32_SOC15(OSSSYS, 0, regIH_CNTL2);
+ ih_rb_cntl = RREG32_SOC15(OSSSYS, 0, regIH_RB_CNTL_RING1);
+
+ ih_cntl = REG_SET_FIELD(ih_cntl, IH_CNTL2,
+ SELF_IV_FORCE_WPTR_UPDATE_TIMEOUT, timeout);
+ ih_cntl = REG_SET_FIELD(ih_cntl, IH_CNTL2,
+ SELF_IV_FORCE_WPTR_UPDATE_ENABLE, enabled);
+ ih_rb_cntl = REG_SET_FIELD(ih_rb_cntl, IH_RB_CNTL_RING1,
+ RB_USED_INT_THRESHOLD, threshold);
+
+ if (amdgpu_sriov_vf(adev) && amdgpu_sriov_reg_indirect_ih(adev)) {
+ if (psp_reg_program(&adev->psp, PSP_REG_IH_RB_CNTL_RING1, ih_rb_cntl))
+ return;
+ } else {
+ WREG32_SOC15(OSSSYS, 0, regIH_RB_CNTL_RING1, ih_rb_cntl);
+ }
+
+ WREG32_SOC15(OSSSYS, 0, regIH_CNTL2, ih_cntl);
+}
+
+/**
+ * ih_v6_0_toggle_ring_interrupts - toggle the interrupt ring buffer
+ *
+ * @adev: amdgpu_device pointer
+ * @ih: amdgpu_ih_ring pointer
+ * @enable: true - enable the interrupts, false - disable the interrupts
+ *
+ * Toggle the interrupt ring buffer (IH_V6_0)
+ */
+static int ih_v6_0_toggle_ring_interrupts(struct amdgpu_device *adev,
+ struct amdgpu_ih_ring *ih,
+ bool enable)
+{
+ struct amdgpu_ih_regs *ih_regs;
+ uint32_t tmp;
+
+ ih_regs = &ih->ih_regs;
+
+ tmp = RREG32(ih_regs->ih_rb_cntl);
+ tmp = REG_SET_FIELD(tmp, IH_RB_CNTL, RB_ENABLE, (enable ? 1 : 0));
+
+ if (enable) {
+ /* Unset the CLEAR_OVERFLOW bit to make sure the next step
+ * is switching the bit from 0 to 1
+ */
+ tmp = REG_SET_FIELD(tmp, IH_RB_CNTL, WPTR_OVERFLOW_CLEAR, 0);
+ if (amdgpu_sriov_vf(adev) && amdgpu_sriov_reg_indirect_ih(adev)) {
+ if (psp_reg_program(&adev->psp, ih_regs->psp_reg_id, tmp))
+ return -ETIMEDOUT;
+ } else {
+ WREG32_NO_KIQ(ih_regs->ih_rb_cntl, tmp);
+ }
+
+ /* Clear RB_OVERFLOW bit */
+ tmp = REG_SET_FIELD(tmp, IH_RB_CNTL, WPTR_OVERFLOW_CLEAR, 1);
+ if (amdgpu_sriov_vf(adev) && amdgpu_sriov_reg_indirect_ih(adev)) {
+ if (psp_reg_program(&adev->psp, ih_regs->psp_reg_id, tmp))
+ return -ETIMEDOUT;
+ } else {
+ WREG32_NO_KIQ(ih_regs->ih_rb_cntl, tmp);
+ }
+
+ /* Unset the CLEAR_OVERFLOW bit immediately so new overflows
+ * can be detected.
+ */
+ tmp = REG_SET_FIELD(tmp, IH_RB_CNTL, WPTR_OVERFLOW_CLEAR, 0);
+ }
+
+ /* enable_intr field is only valid in ring0 */
+ if (ih == &adev->irq.ih)
+ tmp = REG_SET_FIELD(tmp, IH_RB_CNTL, ENABLE_INTR, (enable ? 1 : 0));
+
+ if (amdgpu_sriov_vf(adev) && amdgpu_sriov_reg_indirect_ih(adev)) {
+ if (psp_reg_program(&adev->psp, ih_regs->psp_reg_id, tmp))
+ return -ETIMEDOUT;
+ } else {
+ WREG32(ih_regs->ih_rb_cntl, tmp);
+ }
+
+ if (enable) {
+ ih->enabled = true;
+ } else {
+ /* set rptr, wptr to 0 */
+ WREG32(ih_regs->ih_rb_rptr, 0);
+ WREG32(ih_regs->ih_rb_wptr, 0);
+ ih->enabled = false;
+ ih->rptr = 0;
+ }
+
+ return 0;
+}
+
+/**
+ * ih_v6_0_toggle_interrupts - Toggle all the available interrupt ring buffers
+ *
+ * @adev: amdgpu_device pointer
+ * @enable: enable or disable interrupt ring buffers
+ *
+ * Toggle all the available interrupt ring buffers (IH_V6_0).
+ */
+static int ih_v6_0_toggle_interrupts(struct amdgpu_device *adev, bool enable)
+{
+ struct amdgpu_ih_ring *ih[] = {&adev->irq.ih, &adev->irq.ih1};
+ int i;
+ int r;
+
+ for (i = 0; i < ARRAY_SIZE(ih); i++) {
+ if (ih[i]->ring_size) {
+ r = ih_v6_0_toggle_ring_interrupts(adev, ih[i], enable);
+ if (r)
+ return r;
+ }
+ }
+
+ return 0;
+}
+
+static uint32_t ih_v6_0_rb_cntl(struct amdgpu_ih_ring *ih, uint32_t ih_rb_cntl)
+{
+ int rb_bufsz = order_base_2(ih->ring_size / 4);
+
+ ih_rb_cntl = REG_SET_FIELD(ih_rb_cntl, IH_RB_CNTL,
+ MC_SPACE, ih->use_bus_addr ? 2 : 4);
+ ih_rb_cntl = REG_SET_FIELD(ih_rb_cntl, IH_RB_CNTL,
+ WPTR_OVERFLOW_CLEAR, 1);
+ ih_rb_cntl = REG_SET_FIELD(ih_rb_cntl, IH_RB_CNTL,
+ WPTR_OVERFLOW_ENABLE, 1);
+ ih_rb_cntl = REG_SET_FIELD(ih_rb_cntl, IH_RB_CNTL, RB_SIZE, rb_bufsz);
+ /* Ring Buffer write pointer writeback. If enabled, IH_RB_WPTR register
+ * value is written to memory
+ */
+ ih_rb_cntl = REG_SET_FIELD(ih_rb_cntl, IH_RB_CNTL,
+ WPTR_WRITEBACK_ENABLE, 1);
+ ih_rb_cntl = REG_SET_FIELD(ih_rb_cntl, IH_RB_CNTL, MC_SNOOP, 1);
+ ih_rb_cntl = REG_SET_FIELD(ih_rb_cntl, IH_RB_CNTL, MC_RO, 0);
+ ih_rb_cntl = REG_SET_FIELD(ih_rb_cntl, IH_RB_CNTL, MC_VMID, 0);
+
+ return ih_rb_cntl;
+}
+
+static uint32_t ih_v6_0_doorbell_rptr(struct amdgpu_ih_ring *ih)
+{
+ u32 ih_doorbell_rtpr = 0;
+
+ if (ih->use_doorbell) {
+ ih_doorbell_rtpr = REG_SET_FIELD(ih_doorbell_rtpr,
+ IH_DOORBELL_RPTR, OFFSET,
+ ih->doorbell_index);
+ ih_doorbell_rtpr = REG_SET_FIELD(ih_doorbell_rtpr,
+ IH_DOORBELL_RPTR,
+ ENABLE, 1);
+ } else {
+ ih_doorbell_rtpr = REG_SET_FIELD(ih_doorbell_rtpr,
+ IH_DOORBELL_RPTR,
+ ENABLE, 0);
+ }
+ return ih_doorbell_rtpr;
+}
+
+/**
+ * ih_v6_0_enable_ring - enable an ih ring buffer
+ *
+ * @adev: amdgpu_device pointer
+ * @ih: amdgpu_ih_ring pointer
+ *
+ * Enable an ih ring buffer (IH_V6_0)
+ */
+static int ih_v6_0_enable_ring(struct amdgpu_device *adev,
+ struct amdgpu_ih_ring *ih)
+{
+ struct amdgpu_ih_regs *ih_regs;
+ uint32_t tmp;
+
+ ih_regs = &ih->ih_regs;
+
+ /* Ring Buffer base. [39:8] of 40-bit address of the beginning of the ring buffer*/
+ WREG32(ih_regs->ih_rb_base, ih->gpu_addr >> 8);
+ WREG32(ih_regs->ih_rb_base_hi, (ih->gpu_addr >> 40) & 0xff);
+
+ tmp = RREG32(ih_regs->ih_rb_cntl);
+ tmp = ih_v6_0_rb_cntl(ih, tmp);
+ if (ih == &adev->irq.ih)
+ tmp = REG_SET_FIELD(tmp, IH_RB_CNTL, RPTR_REARM, !!adev->irq.msi_enabled);
+ if (ih == &adev->irq.ih1) {
+ tmp = REG_SET_FIELD(tmp, IH_RB_CNTL, WPTR_OVERFLOW_ENABLE, 0);
+ tmp = REG_SET_FIELD(tmp, IH_RB_CNTL, RB_FULL_DRAIN_ENABLE, 1);
+ }
+
+ if (amdgpu_sriov_vf(adev) && amdgpu_sriov_reg_indirect_ih(adev)) {
+ if (psp_reg_program(&adev->psp, ih_regs->psp_reg_id, tmp)) {
+ DRM_ERROR("PSP program IH_RB_CNTL failed!\n");
+ return -ETIMEDOUT;
+ }
+ } else {
+ WREG32(ih_regs->ih_rb_cntl, tmp);
+ }
+
+ if (ih == &adev->irq.ih) {
+ /* set the ih ring 0 writeback address whether it's enabled or not */
+ WREG32(ih_regs->ih_rb_wptr_addr_lo, lower_32_bits(ih->wptr_addr));
+ WREG32(ih_regs->ih_rb_wptr_addr_hi, upper_32_bits(ih->wptr_addr) & 0xFFFF);
+ }
+
+ /* set rptr, wptr to 0 */
+ WREG32(ih_regs->ih_rb_wptr, 0);
+ WREG32(ih_regs->ih_rb_rptr, 0);
+
+ WREG32(ih_regs->ih_doorbell_rptr, ih_v6_0_doorbell_rptr(ih));
+
+ return 0;
+}
+
+/**
+ * ih_v6_0_irq_init - init and enable the interrupt ring
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * Allocate a ring buffer for the interrupt controller,
+ * enable the RLC, disable interrupts, enable the IH
+ * ring buffer and enable it.
+ * Called at device load and reume.
+ * Returns 0 for success, errors for failure.
+ */
+static int ih_v6_0_irq_init(struct amdgpu_device *adev)
+{
+ struct amdgpu_ih_ring *ih[] = {&adev->irq.ih, &adev->irq.ih1};
+ u32 ih_chicken;
+ u32 tmp;
+ int ret;
+ int i;
+
+ /* disable irqs */
+ ret = ih_v6_0_toggle_interrupts(adev, false);
+ if (ret)
+ return ret;
+
+ adev->nbio.funcs->ih_control(adev);
+
+ if (unlikely((adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT) ||
+ (adev->firmware.load_type == AMDGPU_FW_LOAD_RLC_BACKDOOR_AUTO))) {
+ if (ih[0]->use_bus_addr) {
+ ih_chicken = RREG32_SOC15(OSSSYS, 0, regIH_CHICKEN);
+ ih_chicken = REG_SET_FIELD(ih_chicken,
+ IH_CHICKEN, MC_SPACE_GPA_ENABLE, 1);
+ WREG32_SOC15(OSSSYS, 0, regIH_CHICKEN, ih_chicken);
+ }
+ }
+
+ for (i = 0; i < ARRAY_SIZE(ih); i++) {
+ if (ih[i]->ring_size) {
+ ret = ih_v6_0_enable_ring(adev, ih[i]);
+ if (ret)
+ return ret;
+ }
+ ih[i]->overflow = false;
+ }
+
+ /* update doorbell range for ih ring 0 */
+ adev->nbio.funcs->ih_doorbell_range(adev, ih[0]->use_doorbell,
+ ih[0]->doorbell_index);
+
+ tmp = RREG32_SOC15(OSSSYS, 0, regIH_STORM_CLIENT_LIST_CNTL);
+ tmp = REG_SET_FIELD(tmp, IH_STORM_CLIENT_LIST_CNTL,
+ CLIENT18_IS_STORM_CLIENT, 1);
+ WREG32_SOC15(OSSSYS, 0, regIH_STORM_CLIENT_LIST_CNTL, tmp);
+
+ tmp = RREG32_SOC15(OSSSYS, 0, regIH_INT_FLOOD_CNTL);
+ tmp = REG_SET_FIELD(tmp, IH_INT_FLOOD_CNTL, FLOOD_CNTL_ENABLE, 1);
+ WREG32_SOC15(OSSSYS, 0, regIH_INT_FLOOD_CNTL, tmp);
+
+ /* GC/MMHUB UTCL2 page fault interrupts are configured as
+ * MSI storm capable interrupts by deafult. The delay is
+ * used to avoid ISR being called too frequently
+ * when page fault happens on several continuous page
+ * and thus avoid MSI storm */
+ tmp = RREG32_SOC15(OSSSYS, 0, regIH_MSI_STORM_CTRL);
+ tmp = REG_SET_FIELD(tmp, IH_MSI_STORM_CTRL,
+ DELAY, 3);
+ WREG32_SOC15(OSSSYS, 0, regIH_MSI_STORM_CTRL, tmp);
+
+ /* Redirect the interrupts to IH RB1 for dGPU */
+ if (adev->irq.ih1.ring_size) {
+ tmp = RREG32_SOC15(OSSSYS, 0, regIH_RING1_CLIENT_CFG_INDEX);
+ tmp = REG_SET_FIELD(tmp, IH_RING1_CLIENT_CFG_INDEX, INDEX, 0);
+ WREG32_SOC15(OSSSYS, 0, regIH_RING1_CLIENT_CFG_INDEX, tmp);
+
+ tmp = RREG32_SOC15(OSSSYS, 0, regIH_RING1_CLIENT_CFG_DATA);
+ tmp = REG_SET_FIELD(tmp, IH_RING1_CLIENT_CFG_DATA, CLIENT_ID, 0xa);
+ tmp = REG_SET_FIELD(tmp, IH_RING1_CLIENT_CFG_DATA, SOURCE_ID, 0x0);
+ tmp = REG_SET_FIELD(tmp, IH_RING1_CLIENT_CFG_DATA,
+ SOURCE_ID_MATCH_ENABLE, 0x1);
+
+ WREG32_SOC15(OSSSYS, 0, regIH_RING1_CLIENT_CFG_DATA, tmp);
+ }
+
+ pci_set_master(adev->pdev);
+
+ /* enable interrupts */
+ ret = ih_v6_0_toggle_interrupts(adev, true);
+ if (ret)
+ return ret;
+ /* enable wptr force update for self int */
+ force_update_wptr_for_self_int(adev, 0, 8, true);
+
+ if (adev->irq.ih_soft.ring_size)
+ adev->irq.ih_soft.enabled = true;
+
+ return 0;
+}
+
+/**
+ * ih_v6_0_irq_disable - disable interrupts
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * Disable interrupts on the hw.
+ */
+static void ih_v6_0_irq_disable(struct amdgpu_device *adev)
+{
+ force_update_wptr_for_self_int(adev, 0, 8, false);
+ ih_v6_0_toggle_interrupts(adev, false);
+
+ /* Wait and acknowledge irq */
+ mdelay(1);
+}
+
+/**
+ * ih_v6_0_get_wptr - get the IH ring buffer wptr
+ *
+ * @adev: amdgpu_device pointer
+ * @ih: amdgpu_ih_ring pointer
+ *
+ * Get the IH ring buffer wptr from either the register
+ * or the writeback memory buffer. Also check for
+ * ring buffer overflow and deal with it.
+ * Returns the value of the wptr.
+ */
+static u32 ih_v6_0_get_wptr(struct amdgpu_device *adev,
+ struct amdgpu_ih_ring *ih)
+{
+ u32 wptr, tmp;
+ struct amdgpu_ih_regs *ih_regs;
+
+ wptr = le32_to_cpu(*ih->wptr_cpu);
+ ih_regs = &ih->ih_regs;
+
+ if (!REG_GET_FIELD(wptr, IH_RB_WPTR, RB_OVERFLOW))
+ goto out;
+
+ wptr = RREG32_NO_KIQ(ih_regs->ih_rb_wptr);
+ if (!REG_GET_FIELD(wptr, IH_RB_WPTR, RB_OVERFLOW))
+ goto out;
+ if (!amdgpu_sriov_vf(adev))
+ wptr = REG_SET_FIELD(wptr, IH_RB_WPTR, RB_OVERFLOW, 0);
+ else
+ ih->overflow = true;
+
+ /* When a ring buffer overflow happen start parsing interrupt
+ * from the last not overwritten vector (wptr + 32). Hopefully
+ * this should allow us to catch up.
+ */
+ tmp = (wptr + 32) & ih->ptr_mask;
+ dev_warn(adev->dev, "IH ring buffer overflow "
+ "(0x%08X, 0x%08X, 0x%08X)\n",
+ wptr, ih->rptr, tmp);
+ ih->rptr = tmp;
+
+ tmp = RREG32_NO_KIQ(ih_regs->ih_rb_cntl);
+ tmp = REG_SET_FIELD(tmp, IH_RB_CNTL, WPTR_OVERFLOW_CLEAR, 1);
+ WREG32_NO_KIQ(ih_regs->ih_rb_cntl, tmp);
+
+ /* Unset the CLEAR_OVERFLOW bit immediately so new overflows
+ * can be detected.
+ */
+ tmp = REG_SET_FIELD(tmp, IH_RB_CNTL, WPTR_OVERFLOW_CLEAR, 0);
+ WREG32_NO_KIQ(ih_regs->ih_rb_cntl, tmp);
+out:
+ return (wptr & ih->ptr_mask);
+}
+
+/**
+ * ih_v6_0_irq_rearm - rearm IRQ if lost
+ *
+ * @adev: amdgpu_device pointer
+ * @ih: amdgpu_ih_ring pointer
+ *
+ */
+static void ih_v6_0_irq_rearm(struct amdgpu_device *adev,
+ struct amdgpu_ih_ring *ih)
+{
+ uint32_t v = 0;
+ uint32_t i = 0;
+ struct amdgpu_ih_regs *ih_regs;
+
+ ih_regs = &ih->ih_regs;
+
+ /* Rearm IRQ / re-write doorbell if doorbell write is lost */
+ for (i = 0; i < MAX_REARM_RETRY; i++) {
+ v = RREG32_NO_KIQ(ih_regs->ih_rb_rptr);
+ if ((v < ih->ring_size) && (v != ih->rptr))
+ WDOORBELL32(ih->doorbell_index, ih->rptr);
+ else
+ break;
+ }
+}
+
+/**
+ * ih_v6_0_set_rptr - set the IH ring buffer rptr
+ *
+ * @adev: amdgpu_device pointer
+ * @ih: amdgpu_ih_ring pointer
+ *
+ * Set the IH ring buffer rptr.
+ */
+static void ih_v6_0_set_rptr(struct amdgpu_device *adev,
+ struct amdgpu_ih_ring *ih)
+{
+ struct amdgpu_ih_regs *ih_regs;
+
+ if (ih->use_doorbell) {
+ /* XXX check if swapping is necessary on BE */
+ *ih->rptr_cpu = ih->rptr;
+ WDOORBELL32(ih->doorbell_index, ih->rptr);
+
+ if (amdgpu_sriov_vf(adev))
+ ih_v6_0_irq_rearm(adev, ih);
+ } else {
+ ih_regs = &ih->ih_regs;
+ WREG32(ih_regs->ih_rb_rptr, ih->rptr);
+ }
+}
+
+/**
+ * ih_v6_0_self_irq - dispatch work for ring 1
+ *
+ * @adev: amdgpu_device pointer
+ * @source: irq source
+ * @entry: IV with WPTR update
+ *
+ * Update the WPTR from the IV and schedule work to handle the entries.
+ */
+static int ih_v6_0_self_irq(struct amdgpu_device *adev,
+ struct amdgpu_irq_src *source,
+ struct amdgpu_iv_entry *entry)
+{
+ uint32_t wptr = cpu_to_le32(entry->src_data[0]);
+
+ switch (entry->ring_id) {
+ case 1:
+ *adev->irq.ih1.wptr_cpu = wptr;
+ schedule_work(&adev->irq.ih1_work);
+ break;
+ default:
+ break;
+ }
+ return 0;
+}
+
+static const struct amdgpu_irq_src_funcs ih_v6_0_self_irq_funcs = {
+ .process = ih_v6_0_self_irq,
+};
+
+static void ih_v6_0_set_self_irq_funcs(struct amdgpu_device *adev)
+{
+ adev->irq.self_irq.num_types = 0;
+ adev->irq.self_irq.funcs = &ih_v6_0_self_irq_funcs;
+}
+
+static int ih_v6_0_early_init(struct amdgpu_ip_block *ip_block)
+{
+ struct amdgpu_device *adev = ip_block->adev;
+
+ ih_v6_0_set_interrupt_funcs(adev);
+ ih_v6_0_set_self_irq_funcs(adev);
+ return 0;
+}
+
+static int ih_v6_0_sw_init(struct amdgpu_ip_block *ip_block)
+{
+ int r;
+ struct amdgpu_device *adev = ip_block->adev;
+ bool use_bus_addr;
+
+ r = amdgpu_irq_add_id(adev, SOC21_IH_CLIENTID_IH, 0,
+ &adev->irq.self_irq);
+
+ if (r)
+ return r;
+
+ /* use gpu virtual address for ih ring
+ * until ih_checken is programmed to allow
+ * use bus address for ih ring by psp bl */
+ use_bus_addr = adev->firmware.load_type != AMDGPU_FW_LOAD_PSP;
+ r = amdgpu_ih_ring_init(adev, &adev->irq.ih, IH_RING_SIZE, use_bus_addr);
+ if (r)
+ return r;
+
+ adev->irq.ih.use_doorbell = true;
+ adev->irq.ih.doorbell_index = adev->doorbell_index.ih << 1;
+
+ if (!(adev->flags & AMD_IS_APU)) {
+ r = amdgpu_ih_ring_init(adev, &adev->irq.ih1, IH_RING_SIZE,
+ use_bus_addr);
+ if (r)
+ return r;
+
+ adev->irq.ih1.use_doorbell = true;
+ adev->irq.ih1.doorbell_index = (adev->doorbell_index.ih + 1) << 1;
+ }
+
+ /* initialize ih control register offset */
+ ih_v6_0_init_register_offset(adev);
+
+ r = amdgpu_ih_ring_init(adev, &adev->irq.ih_soft, IH_SW_RING_SIZE, true);
+ if (r)
+ return r;
+
+ r = amdgpu_irq_init(adev);
+
+ return r;
+}
+
+static int ih_v6_0_sw_fini(struct amdgpu_ip_block *ip_block)
+{
+ struct amdgpu_device *adev = ip_block->adev;
+
+ amdgpu_irq_fini_sw(adev);
+
+ return 0;
+}
+
+static int ih_v6_0_hw_init(struct amdgpu_ip_block *ip_block)
+{
+ int r;
+ struct amdgpu_device *adev = ip_block->adev;
+
+ r = ih_v6_0_irq_init(adev);
+ if (r)
+ return r;
+
+ return 0;
+}
+
+static int ih_v6_0_hw_fini(struct amdgpu_ip_block *ip_block)
+{
+ ih_v6_0_irq_disable(ip_block->adev);
+
+ return 0;
+}
+
+static int ih_v6_0_suspend(struct amdgpu_ip_block *ip_block)
+{
+ return ih_v6_0_hw_fini(ip_block);
+}
+
+static int ih_v6_0_resume(struct amdgpu_ip_block *ip_block)
+{
+ return ih_v6_0_hw_init(ip_block);
+}
+
+static bool ih_v6_0_is_idle(struct amdgpu_ip_block *ip_block)
+{
+ /* todo */
+ return true;
+}
+
+static int ih_v6_0_wait_for_idle(struct amdgpu_ip_block *ip_block)
+{
+ /* todo */
+ return -ETIMEDOUT;
+}
+
+static int ih_v6_0_soft_reset(struct amdgpu_ip_block *ip_block)
+{
+ /* todo */
+ return 0;
+}
+
+static void ih_v6_0_update_clockgating_state(struct amdgpu_device *adev,
+ bool enable)
+{
+ uint32_t data, def, field_val;
+
+ if (adev->cg_flags & AMD_CG_SUPPORT_IH_CG) {
+ def = data = RREG32_SOC15(OSSSYS, 0, regIH_CLK_CTRL);
+ field_val = enable ? 0 : 1;
+ data = REG_SET_FIELD(data, IH_CLK_CTRL,
+ DBUS_MUX_CLK_SOFT_OVERRIDE, field_val);
+ data = REG_SET_FIELD(data, IH_CLK_CTRL,
+ OSSSYS_SHARE_CLK_SOFT_OVERRIDE, field_val);
+ data = REG_SET_FIELD(data, IH_CLK_CTRL,
+ LIMIT_SMN_CLK_SOFT_OVERRIDE, field_val);
+ data = REG_SET_FIELD(data, IH_CLK_CTRL,
+ DYN_CLK_SOFT_OVERRIDE, field_val);
+ data = REG_SET_FIELD(data, IH_CLK_CTRL,
+ REG_CLK_SOFT_OVERRIDE, field_val);
+ if (def != data)
+ WREG32_SOC15(OSSSYS, 0, regIH_CLK_CTRL, data);
+ }
+}
+
+static int ih_v6_0_set_clockgating_state(struct amdgpu_ip_block *ip_block,
+ enum amd_clockgating_state state)
+{
+ struct amdgpu_device *adev = ip_block->adev;
+
+ ih_v6_0_update_clockgating_state(adev,
+ state == AMD_CG_STATE_GATE);
+ return 0;
+}
+
+static void ih_v6_0_update_ih_mem_power_gating(struct amdgpu_device *adev,
+ bool enable)
+{
+ uint32_t ih_mem_pwr_cntl;
+
+ /* Disable ih sram power cntl before switch powergating mode */
+ ih_mem_pwr_cntl = RREG32_SOC15(OSSSYS, 0, regIH_MEM_POWER_CTRL);
+ ih_mem_pwr_cntl = REG_SET_FIELD(ih_mem_pwr_cntl, IH_MEM_POWER_CTRL,
+ IH_BUFFER_MEM_POWER_CTRL_EN, 0);
+ WREG32_SOC15(OSSSYS, 0, regIH_MEM_POWER_CTRL, ih_mem_pwr_cntl);
+
+ /* It is recommended to set mem powergating mode to DS mode */
+ if (enable) {
+ /* mem power mode */
+ ih_mem_pwr_cntl = REG_SET_FIELD(ih_mem_pwr_cntl, IH_MEM_POWER_CTRL,
+ IH_BUFFER_MEM_POWER_LS_EN, 0);
+ ih_mem_pwr_cntl = REG_SET_FIELD(ih_mem_pwr_cntl, IH_MEM_POWER_CTRL,
+ IH_BUFFER_MEM_POWER_DS_EN, 1);
+ ih_mem_pwr_cntl = REG_SET_FIELD(ih_mem_pwr_cntl, IH_MEM_POWER_CTRL,
+ IH_BUFFER_MEM_POWER_SD_EN, 0);
+ /* cam mem power mode */
+ ih_mem_pwr_cntl = REG_SET_FIELD(ih_mem_pwr_cntl, IH_MEM_POWER_CTRL,
+ IH_RETRY_INT_CAM_MEM_POWER_LS_EN, 0);
+ ih_mem_pwr_cntl = REG_SET_FIELD(ih_mem_pwr_cntl, IH_MEM_POWER_CTRL,
+ IH_RETRY_INT_CAM_MEM_POWER_DS_EN, 1);
+ ih_mem_pwr_cntl = REG_SET_FIELD(ih_mem_pwr_cntl, IH_MEM_POWER_CTRL,
+ IH_RETRY_INT_CAM_MEM_POWER_SD_EN, 0);
+ /* re-enable power cntl */
+ ih_mem_pwr_cntl = REG_SET_FIELD(ih_mem_pwr_cntl, IH_MEM_POWER_CTRL,
+ IH_BUFFER_MEM_POWER_CTRL_EN, 1);
+ } else {
+ /* mem power mode */
+ ih_mem_pwr_cntl = REG_SET_FIELD(ih_mem_pwr_cntl, IH_MEM_POWER_CTRL,
+ IH_BUFFER_MEM_POWER_LS_EN, 0);
+ ih_mem_pwr_cntl = REG_SET_FIELD(ih_mem_pwr_cntl, IH_MEM_POWER_CTRL,
+ IH_BUFFER_MEM_POWER_DS_EN, 0);
+ ih_mem_pwr_cntl = REG_SET_FIELD(ih_mem_pwr_cntl, IH_MEM_POWER_CTRL,
+ IH_BUFFER_MEM_POWER_SD_EN, 0);
+ /* cam mem power mode */
+ ih_mem_pwr_cntl = REG_SET_FIELD(ih_mem_pwr_cntl, IH_MEM_POWER_CTRL,
+ IH_RETRY_INT_CAM_MEM_POWER_LS_EN, 0);
+ ih_mem_pwr_cntl = REG_SET_FIELD(ih_mem_pwr_cntl, IH_MEM_POWER_CTRL,
+ IH_RETRY_INT_CAM_MEM_POWER_DS_EN, 0);
+ ih_mem_pwr_cntl = REG_SET_FIELD(ih_mem_pwr_cntl, IH_MEM_POWER_CTRL,
+ IH_RETRY_INT_CAM_MEM_POWER_SD_EN, 0);
+ /* re-enable power cntl*/
+ ih_mem_pwr_cntl = REG_SET_FIELD(ih_mem_pwr_cntl, IH_MEM_POWER_CTRL,
+ IH_BUFFER_MEM_POWER_CTRL_EN, 1);
+ }
+
+ WREG32_SOC15(OSSSYS, 0, regIH_MEM_POWER_CTRL, ih_mem_pwr_cntl);
+}
+
+static int ih_v6_0_set_powergating_state(struct amdgpu_ip_block *ip_block,
+ enum amd_powergating_state state)
+{
+ struct amdgpu_device *adev = ip_block->adev;
+ bool enable = (state == AMD_PG_STATE_GATE);
+
+ if (adev->pg_flags & AMD_PG_SUPPORT_IH_SRAM_PG)
+ ih_v6_0_update_ih_mem_power_gating(adev, enable);
+
+ return 0;
+}
+
+static void ih_v6_0_get_clockgating_state(struct amdgpu_ip_block *ip_block, u64 *flags)
+{
+ struct amdgpu_device *adev = ip_block->adev;
+
+ if (!RREG32_SOC15(OSSSYS, 0, regIH_CLK_CTRL))
+ *flags |= AMD_CG_SUPPORT_IH_CG;
+}
+
+static const struct amd_ip_funcs ih_v6_0_ip_funcs = {
+ .name = "ih_v6_0",
+ .early_init = ih_v6_0_early_init,
+ .sw_init = ih_v6_0_sw_init,
+ .sw_fini = ih_v6_0_sw_fini,
+ .hw_init = ih_v6_0_hw_init,
+ .hw_fini = ih_v6_0_hw_fini,
+ .suspend = ih_v6_0_suspend,
+ .resume = ih_v6_0_resume,
+ .is_idle = ih_v6_0_is_idle,
+ .wait_for_idle = ih_v6_0_wait_for_idle,
+ .soft_reset = ih_v6_0_soft_reset,
+ .set_clockgating_state = ih_v6_0_set_clockgating_state,
+ .set_powergating_state = ih_v6_0_set_powergating_state,
+ .get_clockgating_state = ih_v6_0_get_clockgating_state,
+};
+
+static const struct amdgpu_ih_funcs ih_v6_0_funcs = {
+ .get_wptr = ih_v6_0_get_wptr,
+ .decode_iv = amdgpu_ih_decode_iv_helper,
+ .decode_iv_ts = amdgpu_ih_decode_iv_ts_helper,
+ .set_rptr = ih_v6_0_set_rptr
+};
+
+static void ih_v6_0_set_interrupt_funcs(struct amdgpu_device *adev)
+{
+ adev->irq.ih_funcs = &ih_v6_0_funcs;
+}
+
+const struct amdgpu_ip_block_version ih_v6_0_ip_block = {
+ .type = AMD_IP_BLOCK_TYPE_IH,
+ .major = 6,
+ .minor = 0,
+ .rev = 0,
+ .funcs = &ih_v6_0_ip_funcs,
+};
diff --git a/drivers/gpu/drm/amd/amdgpu/ih_v6_0.h b/drivers/gpu/drm/amd/amdgpu/ih_v6_0.h
new file mode 100644
index 000000000000..f27b55580716
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/ih_v6_0.h
@@ -0,0 +1,28 @@
+/*
+ * Copyright 2021 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+#ifndef __IH_V6_0_IH_H__
+#define __IH_V6_0_IH_H__
+
+extern const struct amdgpu_ip_block_version ih_v6_0_ip_block;
+
+#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/ih_v6_1.c b/drivers/gpu/drm/amd/amdgpu/ih_v6_1.c
new file mode 100644
index 000000000000..95b3f4e55ec3
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/ih_v6_1.c
@@ -0,0 +1,796 @@
+/*
+ * Copyright 2023 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#include <linux/pci.h>
+
+#include "amdgpu.h"
+#include "amdgpu_ih.h"
+
+#include "oss/osssys_6_1_0_offset.h"
+#include "oss/osssys_6_1_0_sh_mask.h"
+
+#include "soc15_common.h"
+#include "ih_v6_1.h"
+
+#define MAX_REARM_RETRY 10
+
+static void ih_v6_1_set_interrupt_funcs(struct amdgpu_device *adev);
+
+/**
+ * ih_v6_1_init_register_offset - Initialize register offset for ih rings
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * Initialize register offset ih rings (IH_V6_0).
+ */
+static void ih_v6_1_init_register_offset(struct amdgpu_device *adev)
+{
+ struct amdgpu_ih_regs *ih_regs;
+
+ /* ih ring 2 is removed
+ * ih ring and ih ring 1 are available */
+ if (adev->irq.ih.ring_size) {
+ ih_regs = &adev->irq.ih.ih_regs;
+ ih_regs->ih_rb_base = SOC15_REG_OFFSET(OSSSYS, 0, regIH_RB_BASE);
+ ih_regs->ih_rb_base_hi = SOC15_REG_OFFSET(OSSSYS, 0, regIH_RB_BASE_HI);
+ ih_regs->ih_rb_cntl = SOC15_REG_OFFSET(OSSSYS, 0, regIH_RB_CNTL);
+ ih_regs->ih_rb_wptr = SOC15_REG_OFFSET(OSSSYS, 0, regIH_RB_WPTR);
+ ih_regs->ih_rb_rptr = SOC15_REG_OFFSET(OSSSYS, 0, regIH_RB_RPTR);
+ ih_regs->ih_doorbell_rptr = SOC15_REG_OFFSET(OSSSYS, 0, regIH_DOORBELL_RPTR);
+ ih_regs->ih_rb_wptr_addr_lo = SOC15_REG_OFFSET(OSSSYS, 0, regIH_RB_WPTR_ADDR_LO);
+ ih_regs->ih_rb_wptr_addr_hi = SOC15_REG_OFFSET(OSSSYS, 0, regIH_RB_WPTR_ADDR_HI);
+ ih_regs->psp_reg_id = PSP_REG_IH_RB_CNTL;
+ }
+
+ if (adev->irq.ih1.ring_size) {
+ ih_regs = &adev->irq.ih1.ih_regs;
+ ih_regs->ih_rb_base = SOC15_REG_OFFSET(OSSSYS, 0, regIH_RB_BASE_RING1);
+ ih_regs->ih_rb_base_hi = SOC15_REG_OFFSET(OSSSYS, 0, regIH_RB_BASE_HI_RING1);
+ ih_regs->ih_rb_cntl = SOC15_REG_OFFSET(OSSSYS, 0, regIH_RB_CNTL_RING1);
+ ih_regs->ih_rb_wptr = SOC15_REG_OFFSET(OSSSYS, 0, regIH_RB_WPTR_RING1);
+ ih_regs->ih_rb_rptr = SOC15_REG_OFFSET(OSSSYS, 0, regIH_RB_RPTR_RING1);
+ ih_regs->ih_doorbell_rptr = SOC15_REG_OFFSET(OSSSYS, 0, regIH_DOORBELL_RPTR_RING1);
+ ih_regs->psp_reg_id = PSP_REG_IH_RB_CNTL_RING1;
+ }
+}
+
+/**
+ * force_update_wptr_for_self_int - Force update the wptr for self interrupt
+ *
+ * @adev: amdgpu_device pointer
+ * @threshold: threshold to trigger the wptr reporting
+ * @timeout: timeout to trigger the wptr reporting
+ * @enabled: Enable/disable timeout flush mechanism
+ *
+ * threshold input range: 0 ~ 15, default 0,
+ * real_threshold = 2^threshold
+ * timeout input range: 0 ~ 20, default 8,
+ * real_timeout = (2^timeout) * 1024 / (socclk_freq)
+ *
+ * Force update wptr for self interrupt ( >= SIENNA_CICHLID).
+ */
+static void
+force_update_wptr_for_self_int(struct amdgpu_device *adev,
+ u32 threshold, u32 timeout, bool enabled)
+{
+ u32 ih_cntl, ih_rb_cntl;
+
+ ih_cntl = RREG32_SOC15(OSSSYS, 0, regIH_CNTL2);
+ ih_rb_cntl = RREG32_SOC15(OSSSYS, 0, regIH_RB_CNTL_RING1);
+
+ ih_cntl = REG_SET_FIELD(ih_cntl, IH_CNTL2,
+ SELF_IV_FORCE_WPTR_UPDATE_TIMEOUT, timeout);
+ ih_cntl = REG_SET_FIELD(ih_cntl, IH_CNTL2,
+ SELF_IV_FORCE_WPTR_UPDATE_ENABLE, enabled);
+ ih_rb_cntl = REG_SET_FIELD(ih_rb_cntl, IH_RB_CNTL_RING1,
+ RB_USED_INT_THRESHOLD, threshold);
+
+ if (amdgpu_sriov_vf(adev) && amdgpu_sriov_reg_indirect_ih(adev)) {
+ if (psp_reg_program(&adev->psp, PSP_REG_IH_RB_CNTL_RING1, ih_rb_cntl))
+ return;
+ } else {
+ WREG32_SOC15(OSSSYS, 0, regIH_RB_CNTL_RING1, ih_rb_cntl);
+ }
+
+ WREG32_SOC15(OSSSYS, 0, regIH_CNTL2, ih_cntl);
+}
+
+/**
+ * ih_v6_1_toggle_ring_interrupts - toggle the interrupt ring buffer
+ *
+ * @adev: amdgpu_device pointer
+ * @ih: amdgpu_ih_ring pointer
+ * @enable: true - enable the interrupts, false - disable the interrupts
+ *
+ * Toggle the interrupt ring buffer (IH_V6_0)
+ */
+static int ih_v6_1_toggle_ring_interrupts(struct amdgpu_device *adev,
+ struct amdgpu_ih_ring *ih,
+ bool enable)
+{
+ struct amdgpu_ih_regs *ih_regs;
+ uint32_t tmp;
+
+ ih_regs = &ih->ih_regs;
+
+ tmp = RREG32(ih_regs->ih_rb_cntl);
+ tmp = REG_SET_FIELD(tmp, IH_RB_CNTL, RB_ENABLE, (enable ? 1 : 0));
+ /* enable_intr field is only valid in ring0 */
+ if (ih == &adev->irq.ih)
+ tmp = REG_SET_FIELD(tmp, IH_RB_CNTL, ENABLE_INTR, (enable ? 1 : 0));
+
+ if (amdgpu_sriov_vf(adev) && amdgpu_sriov_reg_indirect_ih(adev)) {
+ if (psp_reg_program(&adev->psp, ih_regs->psp_reg_id, tmp))
+ return -ETIMEDOUT;
+ } else {
+ WREG32(ih_regs->ih_rb_cntl, tmp);
+ }
+
+ if (enable) {
+ ih->enabled = true;
+ } else {
+ /* set rptr, wptr to 0 */
+ WREG32(ih_regs->ih_rb_rptr, 0);
+ WREG32(ih_regs->ih_rb_wptr, 0);
+ ih->enabled = false;
+ ih->rptr = 0;
+ }
+
+ return 0;
+}
+
+/**
+ * ih_v6_1_toggle_interrupts - Toggle all the available interrupt ring buffers
+ *
+ * @adev: amdgpu_device pointer
+ * @enable: enable or disable interrupt ring buffers
+ *
+ * Toggle all the available interrupt ring buffers (IH_V6_0).
+ */
+static int ih_v6_1_toggle_interrupts(struct amdgpu_device *adev, bool enable)
+{
+ struct amdgpu_ih_ring *ih[] = {&adev->irq.ih, &adev->irq.ih1};
+ int i;
+ int r;
+
+ for (i = 0; i < ARRAY_SIZE(ih); i++) {
+ if (ih[i]->ring_size) {
+ r = ih_v6_1_toggle_ring_interrupts(adev, ih[i], enable);
+ if (r)
+ return r;
+ }
+ }
+
+ return 0;
+}
+
+static uint32_t ih_v6_1_rb_cntl(struct amdgpu_ih_ring *ih, uint32_t ih_rb_cntl)
+{
+ int rb_bufsz = order_base_2(ih->ring_size / 4);
+
+ ih_rb_cntl = REG_SET_FIELD(ih_rb_cntl, IH_RB_CNTL,
+ MC_SPACE, ih->use_bus_addr ? 2 : 4);
+ ih_rb_cntl = REG_SET_FIELD(ih_rb_cntl, IH_RB_CNTL,
+ WPTR_OVERFLOW_CLEAR, 1);
+ ih_rb_cntl = REG_SET_FIELD(ih_rb_cntl, IH_RB_CNTL,
+ WPTR_OVERFLOW_ENABLE, 1);
+ ih_rb_cntl = REG_SET_FIELD(ih_rb_cntl, IH_RB_CNTL, RB_SIZE, rb_bufsz);
+ /* Ring Buffer write pointer writeback. If enabled, IH_RB_WPTR register
+ * value is written to memory
+ */
+ ih_rb_cntl = REG_SET_FIELD(ih_rb_cntl, IH_RB_CNTL,
+ WPTR_WRITEBACK_ENABLE, 1);
+ ih_rb_cntl = REG_SET_FIELD(ih_rb_cntl, IH_RB_CNTL, MC_SNOOP, 1);
+ ih_rb_cntl = REG_SET_FIELD(ih_rb_cntl, IH_RB_CNTL, MC_RO, 0);
+ ih_rb_cntl = REG_SET_FIELD(ih_rb_cntl, IH_RB_CNTL, MC_VMID, 0);
+
+ return ih_rb_cntl;
+}
+
+static uint32_t ih_v6_1_doorbell_rptr(struct amdgpu_ih_ring *ih)
+{
+ u32 ih_doorbell_rtpr = 0;
+
+ if (ih->use_doorbell) {
+ ih_doorbell_rtpr = REG_SET_FIELD(ih_doorbell_rtpr,
+ IH_DOORBELL_RPTR, OFFSET,
+ ih->doorbell_index);
+ ih_doorbell_rtpr = REG_SET_FIELD(ih_doorbell_rtpr,
+ IH_DOORBELL_RPTR,
+ ENABLE, 1);
+ } else {
+ ih_doorbell_rtpr = REG_SET_FIELD(ih_doorbell_rtpr,
+ IH_DOORBELL_RPTR,
+ ENABLE, 0);
+ }
+ return ih_doorbell_rtpr;
+}
+
+/**
+ * ih_v6_1_enable_ring - enable an ih ring buffer
+ *
+ * @adev: amdgpu_device pointer
+ * @ih: amdgpu_ih_ring pointer
+ *
+ * Enable an ih ring buffer (IH_V6_0)
+ */
+static int ih_v6_1_enable_ring(struct amdgpu_device *adev,
+ struct amdgpu_ih_ring *ih)
+{
+ struct amdgpu_ih_regs *ih_regs;
+ uint32_t tmp;
+
+ ih_regs = &ih->ih_regs;
+
+ /* Ring Buffer base. [39:8] of 40-bit address of the beginning of the ring buffer*/
+ WREG32(ih_regs->ih_rb_base, ih->gpu_addr >> 8);
+ WREG32(ih_regs->ih_rb_base_hi, (ih->gpu_addr >> 40) & 0xff);
+
+ tmp = RREG32(ih_regs->ih_rb_cntl);
+ tmp = ih_v6_1_rb_cntl(ih, tmp);
+ if (ih == &adev->irq.ih)
+ tmp = REG_SET_FIELD(tmp, IH_RB_CNTL, RPTR_REARM, !!adev->irq.msi_enabled);
+ if (ih == &adev->irq.ih1) {
+ tmp = REG_SET_FIELD(tmp, IH_RB_CNTL, WPTR_OVERFLOW_ENABLE, 0);
+ tmp = REG_SET_FIELD(tmp, IH_RB_CNTL, RB_FULL_DRAIN_ENABLE, 1);
+ }
+
+ if (amdgpu_sriov_vf(adev) && amdgpu_sriov_reg_indirect_ih(adev)) {
+ if (psp_reg_program(&adev->psp, ih_regs->psp_reg_id, tmp)) {
+ DRM_ERROR("PSP program IH_RB_CNTL failed!\n");
+ return -ETIMEDOUT;
+ }
+ } else {
+ WREG32(ih_regs->ih_rb_cntl, tmp);
+ }
+
+ if (ih == &adev->irq.ih) {
+ /* set the ih ring 0 writeback address whether it's enabled or not */
+ WREG32(ih_regs->ih_rb_wptr_addr_lo, lower_32_bits(ih->wptr_addr));
+ WREG32(ih_regs->ih_rb_wptr_addr_hi, upper_32_bits(ih->wptr_addr) & 0xFFFF);
+ }
+
+ /* set rptr, wptr to 0 */
+ WREG32(ih_regs->ih_rb_wptr, 0);
+ WREG32(ih_regs->ih_rb_rptr, 0);
+
+ WREG32(ih_regs->ih_doorbell_rptr, ih_v6_1_doorbell_rptr(ih));
+
+ return 0;
+}
+
+/**
+ * ih_v6_1_irq_init - init and enable the interrupt ring
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * Allocate a ring buffer for the interrupt controller,
+ * enable the RLC, disable interrupts, enable the IH
+ * ring buffer and enable it.
+ * Called at device load and reume.
+ * Returns 0 for success, errors for failure.
+ */
+static int ih_v6_1_irq_init(struct amdgpu_device *adev)
+{
+ struct amdgpu_ih_ring *ih[] = {&adev->irq.ih, &adev->irq.ih1};
+ u32 ih_chicken;
+ u32 tmp;
+ int ret;
+ int i;
+
+ /* disable irqs */
+ ret = ih_v6_1_toggle_interrupts(adev, false);
+ if (ret)
+ return ret;
+
+ adev->nbio.funcs->ih_control(adev);
+
+ if (unlikely((adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT) ||
+ (adev->firmware.load_type == AMDGPU_FW_LOAD_RLC_BACKDOOR_AUTO))) {
+ if (ih[0]->use_bus_addr) {
+ ih_chicken = RREG32_SOC15(OSSSYS, 0, regIH_CHICKEN);
+ ih_chicken = REG_SET_FIELD(ih_chicken,
+ IH_CHICKEN, MC_SPACE_GPA_ENABLE, 1);
+ WREG32_SOC15(OSSSYS, 0, regIH_CHICKEN, ih_chicken);
+ }
+ }
+
+ for (i = 0; i < ARRAY_SIZE(ih); i++) {
+ if (ih[i]->ring_size) {
+ ret = ih_v6_1_enable_ring(adev, ih[i]);
+ if (ret)
+ return ret;
+ }
+ }
+
+ /* update doorbell range for ih ring 0 */
+ adev->nbio.funcs->ih_doorbell_range(adev, ih[0]->use_doorbell,
+ ih[0]->doorbell_index);
+
+ tmp = RREG32_SOC15(OSSSYS, 0, regIH_STORM_CLIENT_LIST_CNTL);
+ tmp = REG_SET_FIELD(tmp, IH_STORM_CLIENT_LIST_CNTL,
+ CLIENT18_IS_STORM_CLIENT, 1);
+ WREG32_SOC15(OSSSYS, 0, regIH_STORM_CLIENT_LIST_CNTL, tmp);
+
+ tmp = RREG32_SOC15(OSSSYS, 0, regIH_INT_FLOOD_CNTL);
+ tmp = REG_SET_FIELD(tmp, IH_INT_FLOOD_CNTL, FLOOD_CNTL_ENABLE, 1);
+ WREG32_SOC15(OSSSYS, 0, regIH_INT_FLOOD_CNTL, tmp);
+
+ /* GC/MMHUB UTCL2 page fault interrupts are configured as
+ * MSI storm capable interrupts by deafult. The delay is
+ * used to avoid ISR being called too frequently
+ * when page fault happens on several continuous page
+ * and thus avoid MSI storm */
+ tmp = RREG32_SOC15(OSSSYS, 0, regIH_MSI_STORM_CTRL);
+ tmp = REG_SET_FIELD(tmp, IH_MSI_STORM_CTRL,
+ DELAY, 3);
+ WREG32_SOC15(OSSSYS, 0, regIH_MSI_STORM_CTRL, tmp);
+
+ /* Redirect the interrupts to IH RB1 for dGPU */
+ if (adev->irq.ih1.ring_size) {
+ tmp = RREG32_SOC15(OSSSYS, 0, regIH_RING1_CLIENT_CFG_INDEX);
+ tmp = REG_SET_FIELD(tmp, IH_RING1_CLIENT_CFG_INDEX, INDEX, 0);
+ WREG32_SOC15(OSSSYS, 0, regIH_RING1_CLIENT_CFG_INDEX, tmp);
+
+ tmp = RREG32_SOC15(OSSSYS, 0, regIH_RING1_CLIENT_CFG_DATA);
+ tmp = REG_SET_FIELD(tmp, IH_RING1_CLIENT_CFG_DATA, CLIENT_ID, 0xa);
+ tmp = REG_SET_FIELD(tmp, IH_RING1_CLIENT_CFG_DATA, SOURCE_ID, 0x0);
+ tmp = REG_SET_FIELD(tmp, IH_RING1_CLIENT_CFG_DATA,
+ SOURCE_ID_MATCH_ENABLE, 0x1);
+
+ WREG32_SOC15(OSSSYS, 0, regIH_RING1_CLIENT_CFG_DATA, tmp);
+ }
+
+ pci_set_master(adev->pdev);
+
+ /* enable interrupts */
+ ret = ih_v6_1_toggle_interrupts(adev, true);
+ if (ret)
+ return ret;
+ /* enable wptr force update for self int */
+ force_update_wptr_for_self_int(adev, 0, 8, true);
+
+ if (adev->irq.ih_soft.ring_size)
+ adev->irq.ih_soft.enabled = true;
+
+ return 0;
+}
+
+/**
+ * ih_v6_1_irq_disable - disable interrupts
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * Disable interrupts on the hw.
+ */
+static void ih_v6_1_irq_disable(struct amdgpu_device *adev)
+{
+ force_update_wptr_for_self_int(adev, 0, 8, false);
+ ih_v6_1_toggle_interrupts(adev, false);
+
+ /* Wait and acknowledge irq */
+ mdelay(1);
+}
+
+/**
+ * ih_v6_1_get_wptr - get the IH ring buffer wptr
+ *
+ * @adev: amdgpu_device pointer
+ * @ih: amdgpu_ih_ring pointer
+ *
+ * Get the IH ring buffer wptr from either the register
+ * or the writeback memory buffer. Also check for
+ * ring buffer overflow and deal with it.
+ * Returns the value of the wptr.
+ */
+static u32 ih_v6_1_get_wptr(struct amdgpu_device *adev,
+ struct amdgpu_ih_ring *ih)
+{
+ u32 wptr, tmp;
+ struct amdgpu_ih_regs *ih_regs;
+
+ wptr = le32_to_cpu(*ih->wptr_cpu);
+ ih_regs = &ih->ih_regs;
+
+ if (!REG_GET_FIELD(wptr, IH_RB_WPTR, RB_OVERFLOW))
+ goto out;
+
+ wptr = RREG32_NO_KIQ(ih_regs->ih_rb_wptr);
+ if (!REG_GET_FIELD(wptr, IH_RB_WPTR, RB_OVERFLOW))
+ goto out;
+ wptr = REG_SET_FIELD(wptr, IH_RB_WPTR, RB_OVERFLOW, 0);
+
+ /* When a ring buffer overflow happen start parsing interrupt
+ * from the last not overwritten vector (wptr + 32). Hopefully
+ * this should allow us to catch up.
+ */
+ tmp = (wptr + 32) & ih->ptr_mask;
+ dev_warn(adev->dev, "IH ring buffer overflow "
+ "(0x%08X, 0x%08X, 0x%08X)\n",
+ wptr, ih->rptr, tmp);
+ ih->rptr = tmp;
+
+ tmp = RREG32_NO_KIQ(ih_regs->ih_rb_cntl);
+ tmp = REG_SET_FIELD(tmp, IH_RB_CNTL, WPTR_OVERFLOW_CLEAR, 1);
+ WREG32_NO_KIQ(ih_regs->ih_rb_cntl, tmp);
+
+ /* Unset the CLEAR_OVERFLOW bit immediately so new overflows
+ * can be detected.
+ */
+ tmp = REG_SET_FIELD(tmp, IH_RB_CNTL, WPTR_OVERFLOW_CLEAR, 0);
+ WREG32_NO_KIQ(ih_regs->ih_rb_cntl, tmp);
+
+out:
+ return (wptr & ih->ptr_mask);
+}
+
+/**
+ * ih_v6_1_irq_rearm - rearm IRQ if lost
+ *
+ * @adev: amdgpu_device pointer
+ * @ih: amdgpu_ih_ring pointer
+ *
+ */
+static void ih_v6_1_irq_rearm(struct amdgpu_device *adev,
+ struct amdgpu_ih_ring *ih)
+{
+ uint32_t v = 0;
+ uint32_t i = 0;
+ struct amdgpu_ih_regs *ih_regs;
+
+ ih_regs = &ih->ih_regs;
+
+ /* Rearm IRQ / re-write doorbell if doorbell write is lost */
+ for (i = 0; i < MAX_REARM_RETRY; i++) {
+ v = RREG32_NO_KIQ(ih_regs->ih_rb_rptr);
+ if ((v < ih->ring_size) && (v != ih->rptr))
+ WDOORBELL32(ih->doorbell_index, ih->rptr);
+ else
+ break;
+ }
+}
+
+/**
+ * ih_v6_1_set_rptr - set the IH ring buffer rptr
+ *
+ * @adev: amdgpu_device pointer
+ * @ih: amdgpu_ih_ring pointer
+ *
+ * Set the IH ring buffer rptr.
+ */
+static void ih_v6_1_set_rptr(struct amdgpu_device *adev,
+ struct amdgpu_ih_ring *ih)
+{
+ struct amdgpu_ih_regs *ih_regs;
+
+ if (ih->use_doorbell) {
+ /* XXX check if swapping is necessary on BE */
+ *ih->rptr_cpu = ih->rptr;
+ WDOORBELL32(ih->doorbell_index, ih->rptr);
+
+ if (amdgpu_sriov_vf(adev))
+ ih_v6_1_irq_rearm(adev, ih);
+ } else {
+ ih_regs = &ih->ih_regs;
+ WREG32(ih_regs->ih_rb_rptr, ih->rptr);
+ }
+}
+
+/**
+ * ih_v6_1_self_irq - dispatch work for ring 1
+ *
+ * @adev: amdgpu_device pointer
+ * @source: irq source
+ * @entry: IV with WPTR update
+ *
+ * Update the WPTR from the IV and schedule work to handle the entries.
+ */
+static int ih_v6_1_self_irq(struct amdgpu_device *adev,
+ struct amdgpu_irq_src *source,
+ struct amdgpu_iv_entry *entry)
+{
+ uint32_t wptr = cpu_to_le32(entry->src_data[0]);
+
+ switch (entry->ring_id) {
+ case 1:
+ *adev->irq.ih1.wptr_cpu = wptr;
+ schedule_work(&adev->irq.ih1_work);
+ break;
+ default:
+ break;
+ }
+ return 0;
+}
+
+static const struct amdgpu_irq_src_funcs ih_v6_1_self_irq_funcs = {
+ .process = ih_v6_1_self_irq,
+};
+
+static void ih_v6_1_set_self_irq_funcs(struct amdgpu_device *adev)
+{
+ adev->irq.self_irq.num_types = 0;
+ adev->irq.self_irq.funcs = &ih_v6_1_self_irq_funcs;
+}
+
+static int ih_v6_1_early_init(struct amdgpu_ip_block *ip_block)
+{
+ struct amdgpu_device *adev = ip_block->adev;
+ int ret;
+
+ ret = amdgpu_irq_add_domain(adev);
+ if (ret) {
+ return ret;
+ }
+
+ ih_v6_1_set_interrupt_funcs(adev);
+ ih_v6_1_set_self_irq_funcs(adev);
+ return 0;
+}
+
+static int ih_v6_1_sw_init(struct amdgpu_ip_block *ip_block)
+{
+ int r;
+ struct amdgpu_device *adev = ip_block->adev;
+ bool use_bus_addr;
+
+ r = amdgpu_irq_add_id(adev, SOC21_IH_CLIENTID_IH, 0,
+ &adev->irq.self_irq);
+
+ if (r)
+ return r;
+
+ /* use gpu virtual address for ih ring
+ * until ih_checken is programmed to allow
+ * use bus address for ih ring by psp bl */
+ use_bus_addr = adev->firmware.load_type != AMDGPU_FW_LOAD_PSP;
+ r = amdgpu_ih_ring_init(adev, &adev->irq.ih, 256 * 1024, use_bus_addr);
+ if (r)
+ return r;
+
+ adev->irq.ih.use_doorbell = true;
+ adev->irq.ih.doorbell_index = adev->doorbell_index.ih << 1;
+
+ if (!(adev->flags & AMD_IS_APU)) {
+ r = amdgpu_ih_ring_init(adev, &adev->irq.ih1, IH_RING_SIZE,
+ use_bus_addr);
+ if (r)
+ return r;
+
+ adev->irq.ih1.use_doorbell = true;
+ adev->irq.ih1.doorbell_index = (adev->doorbell_index.ih + 1) << 1;
+ }
+
+ /* initialize ih control register offset */
+ ih_v6_1_init_register_offset(adev);
+
+ r = amdgpu_ih_ring_init(adev, &adev->irq.ih_soft, PAGE_SIZE, true);
+ if (r)
+ return r;
+
+ r = amdgpu_irq_init(adev);
+
+ return r;
+}
+
+static int ih_v6_1_sw_fini(struct amdgpu_ip_block *ip_block)
+{
+ struct amdgpu_device *adev = ip_block->adev;
+
+ amdgpu_irq_fini_sw(adev);
+
+ return 0;
+}
+
+static int ih_v6_1_hw_init(struct amdgpu_ip_block *ip_block)
+{
+ int r;
+ struct amdgpu_device *adev = ip_block->adev;
+
+ r = ih_v6_1_irq_init(adev);
+ if (r)
+ return r;
+
+ return 0;
+}
+
+static int ih_v6_1_hw_fini(struct amdgpu_ip_block *ip_block)
+{
+ ih_v6_1_irq_disable(ip_block->adev);
+
+ return 0;
+}
+
+static int ih_v6_1_suspend(struct amdgpu_ip_block *ip_block)
+{
+ return ih_v6_1_hw_fini(ip_block);
+}
+
+static int ih_v6_1_resume(struct amdgpu_ip_block *ip_block)
+{
+ return ih_v6_1_hw_init(ip_block);
+}
+
+static bool ih_v6_1_is_idle(struct amdgpu_ip_block *ip_block)
+{
+ /* todo */
+ return true;
+}
+
+static int ih_v6_1_wait_for_idle(struct amdgpu_ip_block *ip_block)
+{
+ /* todo */
+ return -ETIMEDOUT;
+}
+
+static int ih_v6_1_soft_reset(struct amdgpu_ip_block *ip_block)
+{
+ /* todo */
+ return 0;
+}
+
+static void ih_v6_1_update_clockgating_state(struct amdgpu_device *adev,
+ bool enable)
+{
+ uint32_t data, def, field_val;
+
+ if (adev->cg_flags & AMD_CG_SUPPORT_IH_CG) {
+ def = data = RREG32_SOC15(OSSSYS, 0, regIH_CLK_CTRL);
+ field_val = enable ? 0 : 1;
+ data = REG_SET_FIELD(data, IH_CLK_CTRL,
+ DBUS_MUX_CLK_SOFT_OVERRIDE, field_val);
+ data = REG_SET_FIELD(data, IH_CLK_CTRL,
+ OSSSYS_SHARE_CLK_SOFT_OVERRIDE, field_val);
+ data = REG_SET_FIELD(data, IH_CLK_CTRL,
+ LIMIT_SMN_CLK_SOFT_OVERRIDE, field_val);
+ data = REG_SET_FIELD(data, IH_CLK_CTRL,
+ DYN_CLK_SOFT_OVERRIDE, field_val);
+ data = REG_SET_FIELD(data, IH_CLK_CTRL,
+ REG_CLK_SOFT_OVERRIDE, field_val);
+ if (def != data)
+ WREG32_SOC15(OSSSYS, 0, regIH_CLK_CTRL, data);
+ }
+
+ return;
+}
+
+static int ih_v6_1_set_clockgating_state(struct amdgpu_ip_block *ip_block,
+ enum amd_clockgating_state state)
+{
+ struct amdgpu_device *adev = ip_block->adev;
+
+ ih_v6_1_update_clockgating_state(adev,
+ state == AMD_CG_STATE_GATE);
+ return 0;
+}
+
+static void ih_v6_1_update_ih_mem_power_gating(struct amdgpu_device *adev,
+ bool enable)
+{
+ uint32_t ih_mem_pwr_cntl;
+
+ /* Disable ih sram power cntl before switch powergating mode */
+ ih_mem_pwr_cntl = RREG32_SOC15(OSSSYS, 0, regIH_MEM_POWER_CTRL);
+ ih_mem_pwr_cntl = REG_SET_FIELD(ih_mem_pwr_cntl, IH_MEM_POWER_CTRL,
+ IH_BUFFER_MEM_POWER_CTRL_EN, 0);
+ WREG32_SOC15(OSSSYS, 0, regIH_MEM_POWER_CTRL, ih_mem_pwr_cntl);
+
+ /* It is recommended to set mem powergating mode to DS mode */
+ if (enable) {
+ /* mem power mode */
+ ih_mem_pwr_cntl = REG_SET_FIELD(ih_mem_pwr_cntl, IH_MEM_POWER_CTRL,
+ IH_BUFFER_MEM_POWER_LS_EN, 0);
+ ih_mem_pwr_cntl = REG_SET_FIELD(ih_mem_pwr_cntl, IH_MEM_POWER_CTRL,
+ IH_BUFFER_MEM_POWER_DS_EN, 1);
+ ih_mem_pwr_cntl = REG_SET_FIELD(ih_mem_pwr_cntl, IH_MEM_POWER_CTRL,
+ IH_BUFFER_MEM_POWER_SD_EN, 0);
+ /* cam mem power mode */
+ ih_mem_pwr_cntl = REG_SET_FIELD(ih_mem_pwr_cntl, IH_MEM_POWER_CTRL,
+ IH_RETRY_INT_CAM_MEM_POWER_LS_EN, 0);
+ ih_mem_pwr_cntl = REG_SET_FIELD(ih_mem_pwr_cntl, IH_MEM_POWER_CTRL,
+ IH_RETRY_INT_CAM_MEM_POWER_DS_EN, 1);
+ ih_mem_pwr_cntl = REG_SET_FIELD(ih_mem_pwr_cntl, IH_MEM_POWER_CTRL,
+ IH_RETRY_INT_CAM_MEM_POWER_SD_EN, 0);
+ /* re-enable power cntl */
+ ih_mem_pwr_cntl = REG_SET_FIELD(ih_mem_pwr_cntl, IH_MEM_POWER_CTRL,
+ IH_BUFFER_MEM_POWER_CTRL_EN, 1);
+ } else {
+ /* mem power mode */
+ ih_mem_pwr_cntl = REG_SET_FIELD(ih_mem_pwr_cntl, IH_MEM_POWER_CTRL,
+ IH_BUFFER_MEM_POWER_LS_EN, 0);
+ ih_mem_pwr_cntl = REG_SET_FIELD(ih_mem_pwr_cntl, IH_MEM_POWER_CTRL,
+ IH_BUFFER_MEM_POWER_DS_EN, 0);
+ ih_mem_pwr_cntl = REG_SET_FIELD(ih_mem_pwr_cntl, IH_MEM_POWER_CTRL,
+ IH_BUFFER_MEM_POWER_SD_EN, 0);
+ /* cam mem power mode */
+ ih_mem_pwr_cntl = REG_SET_FIELD(ih_mem_pwr_cntl, IH_MEM_POWER_CTRL,
+ IH_RETRY_INT_CAM_MEM_POWER_LS_EN, 0);
+ ih_mem_pwr_cntl = REG_SET_FIELD(ih_mem_pwr_cntl, IH_MEM_POWER_CTRL,
+ IH_RETRY_INT_CAM_MEM_POWER_DS_EN, 0);
+ ih_mem_pwr_cntl = REG_SET_FIELD(ih_mem_pwr_cntl, IH_MEM_POWER_CTRL,
+ IH_RETRY_INT_CAM_MEM_POWER_SD_EN, 0);
+ /* re-enable power cntl*/
+ ih_mem_pwr_cntl = REG_SET_FIELD(ih_mem_pwr_cntl, IH_MEM_POWER_CTRL,
+ IH_BUFFER_MEM_POWER_CTRL_EN, 1);
+ }
+
+ WREG32_SOC15(OSSSYS, 0, regIH_MEM_POWER_CTRL, ih_mem_pwr_cntl);
+}
+
+static int ih_v6_1_set_powergating_state(struct amdgpu_ip_block *ip_block,
+ enum amd_powergating_state state)
+{
+ struct amdgpu_device *adev = ip_block->adev;
+ bool enable = (state == AMD_PG_STATE_GATE);
+
+ if (adev->pg_flags & AMD_PG_SUPPORT_IH_SRAM_PG)
+ ih_v6_1_update_ih_mem_power_gating(adev, enable);
+
+ return 0;
+}
+
+static void ih_v6_1_get_clockgating_state(struct amdgpu_ip_block *ip_block, u64 *flags)
+{
+ struct amdgpu_device *adev = ip_block->adev;
+
+ if (!RREG32_SOC15(OSSSYS, 0, regIH_CLK_CTRL))
+ *flags |= AMD_CG_SUPPORT_IH_CG;
+
+ return;
+}
+
+static const struct amd_ip_funcs ih_v6_1_ip_funcs = {
+ .name = "ih_v6_1",
+ .early_init = ih_v6_1_early_init,
+ .sw_init = ih_v6_1_sw_init,
+ .sw_fini = ih_v6_1_sw_fini,
+ .hw_init = ih_v6_1_hw_init,
+ .hw_fini = ih_v6_1_hw_fini,
+ .suspend = ih_v6_1_suspend,
+ .resume = ih_v6_1_resume,
+ .is_idle = ih_v6_1_is_idle,
+ .wait_for_idle = ih_v6_1_wait_for_idle,
+ .soft_reset = ih_v6_1_soft_reset,
+ .set_clockgating_state = ih_v6_1_set_clockgating_state,
+ .set_powergating_state = ih_v6_1_set_powergating_state,
+ .get_clockgating_state = ih_v6_1_get_clockgating_state,
+};
+
+static const struct amdgpu_ih_funcs ih_v6_1_funcs = {
+ .get_wptr = ih_v6_1_get_wptr,
+ .decode_iv = amdgpu_ih_decode_iv_helper,
+ .decode_iv_ts = amdgpu_ih_decode_iv_ts_helper,
+ .set_rptr = ih_v6_1_set_rptr
+};
+
+static void ih_v6_1_set_interrupt_funcs(struct amdgpu_device *adev)
+{
+ adev->irq.ih_funcs = &ih_v6_1_funcs;
+}
+
+const struct amdgpu_ip_block_version ih_v6_1_ip_block = {
+ .type = AMD_IP_BLOCK_TYPE_IH,
+ .major = 6,
+ .minor = 0,
+ .rev = 0,
+ .funcs = &ih_v6_1_ip_funcs,
+};
diff --git a/drivers/gpu/drm/amd/amdgpu/ih_v6_1.h b/drivers/gpu/drm/amd/amdgpu/ih_v6_1.h
new file mode 100644
index 000000000000..2232bc5cbd09
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/ih_v6_1.h
@@ -0,0 +1,28 @@
+/*
+ * Copyright 2023 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+#ifndef __IH_V6_1_IH_H__
+#define __IH_V6_1_IH_H__
+
+extern const struct amdgpu_ip_block_version ih_v6_1_ip_block;
+
+#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/ih_v7_0.c b/drivers/gpu/drm/amd/amdgpu/ih_v7_0.c
new file mode 100644
index 000000000000..b32ea4129c61
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/ih_v7_0.c
@@ -0,0 +1,787 @@
+/*
+ * Copyright 2023 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#include <linux/pci.h>
+
+#include "amdgpu.h"
+#include "amdgpu_ih.h"
+
+#include "oss/osssys_7_0_0_offset.h"
+#include "oss/osssys_7_0_0_sh_mask.h"
+
+#include "soc15_common.h"
+#include "ih_v7_0.h"
+
+#define MAX_REARM_RETRY 10
+
+static void ih_v7_0_set_interrupt_funcs(struct amdgpu_device *adev);
+
+/**
+ * ih_v7_0_init_register_offset - Initialize register offset for ih rings
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * Initialize register offset ih rings (IH_V7_0).
+ */
+static void ih_v7_0_init_register_offset(struct amdgpu_device *adev)
+{
+ struct amdgpu_ih_regs *ih_regs;
+
+ /* ih ring 2 is removed
+ * ih ring and ih ring 1 are available */
+ if (adev->irq.ih.ring_size) {
+ ih_regs = &adev->irq.ih.ih_regs;
+ ih_regs->ih_rb_base = SOC15_REG_OFFSET(OSSSYS, 0, regIH_RB_BASE);
+ ih_regs->ih_rb_base_hi = SOC15_REG_OFFSET(OSSSYS, 0, regIH_RB_BASE_HI);
+ ih_regs->ih_rb_cntl = SOC15_REG_OFFSET(OSSSYS, 0, regIH_RB_CNTL);
+ ih_regs->ih_rb_wptr = SOC15_REG_OFFSET(OSSSYS, 0, regIH_RB_WPTR);
+ ih_regs->ih_rb_rptr = SOC15_REG_OFFSET(OSSSYS, 0, regIH_RB_RPTR);
+ ih_regs->ih_doorbell_rptr = SOC15_REG_OFFSET(OSSSYS, 0, regIH_DOORBELL_RPTR);
+ ih_regs->ih_rb_wptr_addr_lo = SOC15_REG_OFFSET(OSSSYS, 0, regIH_RB_WPTR_ADDR_LO);
+ ih_regs->ih_rb_wptr_addr_hi = SOC15_REG_OFFSET(OSSSYS, 0, regIH_RB_WPTR_ADDR_HI);
+ ih_regs->psp_reg_id = PSP_REG_IH_RB_CNTL;
+ }
+
+ if (adev->irq.ih1.ring_size) {
+ ih_regs = &adev->irq.ih1.ih_regs;
+ ih_regs->ih_rb_base = SOC15_REG_OFFSET(OSSSYS, 0, regIH_RB_BASE_RING1);
+ ih_regs->ih_rb_base_hi = SOC15_REG_OFFSET(OSSSYS, 0, regIH_RB_BASE_HI_RING1);
+ ih_regs->ih_rb_cntl = SOC15_REG_OFFSET(OSSSYS, 0, regIH_RB_CNTL_RING1);
+ ih_regs->ih_rb_wptr = SOC15_REG_OFFSET(OSSSYS, 0, regIH_RB_WPTR_RING1);
+ ih_regs->ih_rb_rptr = SOC15_REG_OFFSET(OSSSYS, 0, regIH_RB_RPTR_RING1);
+ ih_regs->ih_doorbell_rptr = SOC15_REG_OFFSET(OSSSYS, 0, regIH_DOORBELL_RPTR_RING1);
+ ih_regs->psp_reg_id = PSP_REG_IH_RB_CNTL_RING1;
+ }
+}
+
+/**
+ * force_update_wptr_for_self_int - Force update the wptr for self interrupt
+ *
+ * @adev: amdgpu_device pointer
+ * @threshold: threshold to trigger the wptr reporting
+ * @timeout: timeout to trigger the wptr reporting
+ * @enabled: Enable/disable timeout flush mechanism
+ *
+ * threshold input range: 0 ~ 15, default 0,
+ * real_threshold = 2^threshold
+ * timeout input range: 0 ~ 20, default 8,
+ * real_timeout = (2^timeout) * 1024 / (socclk_freq)
+ *
+ * Force update wptr for self interrupt ( >= SIENNA_CICHLID).
+ */
+static void
+force_update_wptr_for_self_int(struct amdgpu_device *adev,
+ u32 threshold, u32 timeout, bool enabled)
+{
+ u32 ih_cntl, ih_rb_cntl;
+
+ ih_cntl = RREG32_SOC15(OSSSYS, 0, regIH_CNTL2);
+ ih_rb_cntl = RREG32_SOC15(OSSSYS, 0, regIH_RB_CNTL_RING1);
+
+ ih_cntl = REG_SET_FIELD(ih_cntl, IH_CNTL2,
+ SELF_IV_FORCE_WPTR_UPDATE_TIMEOUT, timeout);
+ ih_cntl = REG_SET_FIELD(ih_cntl, IH_CNTL2,
+ SELF_IV_FORCE_WPTR_UPDATE_ENABLE, enabled);
+ ih_rb_cntl = REG_SET_FIELD(ih_rb_cntl, IH_RB_CNTL_RING1,
+ RB_USED_INT_THRESHOLD, threshold);
+
+ if (amdgpu_sriov_vf(adev) && amdgpu_sriov_reg_indirect_ih(adev)) {
+ if (psp_reg_program(&adev->psp, PSP_REG_IH_RB_CNTL_RING1, ih_rb_cntl))
+ return;
+ } else {
+ WREG32_SOC15(OSSSYS, 0, regIH_RB_CNTL_RING1, ih_rb_cntl);
+ }
+
+ WREG32_SOC15(OSSSYS, 0, regIH_CNTL2, ih_cntl);
+}
+
+/**
+ * ih_v7_0_toggle_ring_interrupts - toggle the interrupt ring buffer
+ *
+ * @adev: amdgpu_device pointer
+ * @ih: amdgpu_ih_ring pointet
+ * @enable: true - enable the interrupts, false - disable the interrupts
+ *
+ * Toggle the interrupt ring buffer (IH_V7_0)
+ */
+static int ih_v7_0_toggle_ring_interrupts(struct amdgpu_device *adev,
+ struct amdgpu_ih_ring *ih,
+ bool enable)
+{
+ struct amdgpu_ih_regs *ih_regs;
+ uint32_t tmp;
+
+ ih_regs = &ih->ih_regs;
+
+ tmp = RREG32(ih_regs->ih_rb_cntl);
+ tmp = REG_SET_FIELD(tmp, IH_RB_CNTL, RB_ENABLE, (enable ? 1 : 0));
+ /* enable_intr field is only valid in ring0 */
+ if (ih == &adev->irq.ih)
+ tmp = REG_SET_FIELD(tmp, IH_RB_CNTL, ENABLE_INTR, (enable ? 1 : 0));
+
+ if (amdgpu_sriov_vf(adev) && amdgpu_sriov_reg_indirect_ih(adev)) {
+ if (psp_reg_program(&adev->psp, ih_regs->psp_reg_id, tmp))
+ return -ETIMEDOUT;
+ } else {
+ WREG32(ih_regs->ih_rb_cntl, tmp);
+ }
+
+ if (enable) {
+ ih->enabled = true;
+ } else {
+ /* set rptr, wptr to 0 */
+ WREG32(ih_regs->ih_rb_rptr, 0);
+ WREG32(ih_regs->ih_rb_wptr, 0);
+ ih->enabled = false;
+ ih->rptr = 0;
+ }
+
+ return 0;
+}
+
+/**
+ * ih_v7_0_toggle_interrupts - Toggle all the available interrupt ring buffers
+ *
+ * @adev: amdgpu_device pointer
+ * @enable: enable or disable interrupt ring buffers
+ *
+ * Toggle all the available interrupt ring buffers (IH_V7_0).
+ */
+static int ih_v7_0_toggle_interrupts(struct amdgpu_device *adev, bool enable)
+{
+ struct amdgpu_ih_ring *ih[] = {&adev->irq.ih, &adev->irq.ih1};
+ int i;
+ int r;
+
+ for (i = 0; i < ARRAY_SIZE(ih); i++) {
+ if (ih[i]->ring_size) {
+ r = ih_v7_0_toggle_ring_interrupts(adev, ih[i], enable);
+ if (r)
+ return r;
+ }
+ }
+
+ return 0;
+}
+
+static uint32_t ih_v7_0_rb_cntl(struct amdgpu_ih_ring *ih, uint32_t ih_rb_cntl)
+{
+ int rb_bufsz = order_base_2(ih->ring_size / 4);
+
+ ih_rb_cntl = REG_SET_FIELD(ih_rb_cntl, IH_RB_CNTL,
+ MC_SPACE, ih->use_bus_addr ? 2 : 4);
+ ih_rb_cntl = REG_SET_FIELD(ih_rb_cntl, IH_RB_CNTL,
+ WPTR_OVERFLOW_CLEAR, 1);
+ ih_rb_cntl = REG_SET_FIELD(ih_rb_cntl, IH_RB_CNTL,
+ WPTR_OVERFLOW_ENABLE, 1);
+ ih_rb_cntl = REG_SET_FIELD(ih_rb_cntl, IH_RB_CNTL, RB_SIZE, rb_bufsz);
+ /* Ring Buffer write pointer writeback. If enabled, IH_RB_WPTR register
+ * value is written to memory
+ */
+ ih_rb_cntl = REG_SET_FIELD(ih_rb_cntl, IH_RB_CNTL,
+ WPTR_WRITEBACK_ENABLE, 1);
+ ih_rb_cntl = REG_SET_FIELD(ih_rb_cntl, IH_RB_CNTL, MC_SNOOP, 1);
+ ih_rb_cntl = REG_SET_FIELD(ih_rb_cntl, IH_RB_CNTL, MC_RO, 0);
+ ih_rb_cntl = REG_SET_FIELD(ih_rb_cntl, IH_RB_CNTL, MC_VMID, 0);
+
+ return ih_rb_cntl;
+}
+
+static uint32_t ih_v7_0_doorbell_rptr(struct amdgpu_ih_ring *ih)
+{
+ u32 ih_doorbell_rtpr = 0;
+
+ if (ih->use_doorbell) {
+ ih_doorbell_rtpr = REG_SET_FIELD(ih_doorbell_rtpr,
+ IH_DOORBELL_RPTR, OFFSET,
+ ih->doorbell_index);
+ ih_doorbell_rtpr = REG_SET_FIELD(ih_doorbell_rtpr,
+ IH_DOORBELL_RPTR,
+ ENABLE, 1);
+ } else {
+ ih_doorbell_rtpr = REG_SET_FIELD(ih_doorbell_rtpr,
+ IH_DOORBELL_RPTR,
+ ENABLE, 0);
+ }
+ return ih_doorbell_rtpr;
+}
+
+/**
+ * ih_v7_0_enable_ring - enable an ih ring buffer
+ *
+ * @adev: amdgpu_device pointer
+ * @ih: amdgpu_ih_ring pointer
+ *
+ * Enable an ih ring buffer (IH_V7_0)
+ */
+static int ih_v7_0_enable_ring(struct amdgpu_device *adev,
+ struct amdgpu_ih_ring *ih)
+{
+ struct amdgpu_ih_regs *ih_regs;
+ uint32_t tmp;
+
+ ih_regs = &ih->ih_regs;
+
+ /* Ring Buffer base. [39:8] of 40-bit address of the beginning of the ring buffer*/
+ WREG32(ih_regs->ih_rb_base, ih->gpu_addr >> 8);
+ WREG32(ih_regs->ih_rb_base_hi, (ih->gpu_addr >> 40) & 0xff);
+
+ tmp = RREG32(ih_regs->ih_rb_cntl);
+ tmp = ih_v7_0_rb_cntl(ih, tmp);
+ if (ih == &adev->irq.ih)
+ tmp = REG_SET_FIELD(tmp, IH_RB_CNTL, RPTR_REARM, !!adev->irq.msi_enabled);
+ if (ih == &adev->irq.ih1) {
+ tmp = REG_SET_FIELD(tmp, IH_RB_CNTL, WPTR_OVERFLOW_ENABLE, 0);
+ tmp = REG_SET_FIELD(tmp, IH_RB_CNTL, RB_FULL_DRAIN_ENABLE, 1);
+ }
+
+ if (amdgpu_sriov_vf(adev) && amdgpu_sriov_reg_indirect_ih(adev)) {
+ if (psp_reg_program(&adev->psp, ih_regs->psp_reg_id, tmp)) {
+ DRM_ERROR("PSP program IH_RB_CNTL failed!\n");
+ return -ETIMEDOUT;
+ }
+ } else {
+ WREG32(ih_regs->ih_rb_cntl, tmp);
+ }
+
+ if (ih == &adev->irq.ih) {
+ /* set the ih ring 0 writeback address whether it's enabled or not */
+ WREG32(ih_regs->ih_rb_wptr_addr_lo, lower_32_bits(ih->wptr_addr));
+ WREG32(ih_regs->ih_rb_wptr_addr_hi, upper_32_bits(ih->wptr_addr) & 0xFFFF);
+ }
+
+ /* set rptr, wptr to 0 */
+ WREG32(ih_regs->ih_rb_wptr, 0);
+ WREG32(ih_regs->ih_rb_rptr, 0);
+
+ WREG32(ih_regs->ih_doorbell_rptr, ih_v7_0_doorbell_rptr(ih));
+
+ return 0;
+}
+
+/**
+ * ih_v7_0_irq_init - init and enable the interrupt ring
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * Allocate a ring buffer for the interrupt controller,
+ * enable the RLC, disable interrupts, enable the IH
+ * ring buffer and enable it.
+ * Called at device load and reume.
+ * Returns 0 for success, errors for failure.
+ */
+static int ih_v7_0_irq_init(struct amdgpu_device *adev)
+{
+ struct amdgpu_ih_ring *ih[] = {&adev->irq.ih, &adev->irq.ih1};
+ u32 ih_chicken;
+ u32 tmp;
+ int ret;
+ int i;
+
+ /* disable irqs */
+ ret = ih_v7_0_toggle_interrupts(adev, false);
+ if (ret)
+ return ret;
+
+ adev->nbio.funcs->ih_control(adev);
+
+ if (unlikely((adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT) ||
+ (adev->firmware.load_type == AMDGPU_FW_LOAD_RLC_BACKDOOR_AUTO))) {
+ if (ih[0]->use_bus_addr) {
+ ih_chicken = RREG32_SOC15(OSSSYS, 0, regIH_CHICKEN);
+ ih_chicken = REG_SET_FIELD(ih_chicken,
+ IH_CHICKEN, MC_SPACE_GPA_ENABLE, 1);
+ WREG32_SOC15(OSSSYS, 0, regIH_CHICKEN, ih_chicken);
+ }
+ }
+
+ for (i = 0; i < ARRAY_SIZE(ih); i++) {
+ if (ih[i]->ring_size) {
+ ret = ih_v7_0_enable_ring(adev, ih[i]);
+ if (ret)
+ return ret;
+ }
+ }
+
+ /* update doorbell range for ih ring 0 */
+ adev->nbio.funcs->ih_doorbell_range(adev, ih[0]->use_doorbell,
+ ih[0]->doorbell_index);
+
+ tmp = RREG32_SOC15(OSSSYS, 0, regIH_STORM_CLIENT_LIST_CNTL);
+ tmp = REG_SET_FIELD(tmp, IH_STORM_CLIENT_LIST_CNTL,
+ CLIENT18_IS_STORM_CLIENT, 1);
+ WREG32_SOC15(OSSSYS, 0, regIH_STORM_CLIENT_LIST_CNTL, tmp);
+
+ tmp = RREG32_SOC15(OSSSYS, 0, regIH_INT_FLOOD_CNTL);
+ tmp = REG_SET_FIELD(tmp, IH_INT_FLOOD_CNTL, FLOOD_CNTL_ENABLE, 1);
+ WREG32_SOC15(OSSSYS, 0, regIH_INT_FLOOD_CNTL, tmp);
+
+ /* GC/MMHUB UTCL2 page fault interrupts are configured as
+ * MSI storm capable interrupts by deafult. The delay is
+ * used to avoid ISR being called too frequently
+ * when page fault happens on several continuous page
+ * and thus avoid MSI storm */
+ tmp = RREG32_SOC15(OSSSYS, 0, regIH_MSI_STORM_CTRL);
+ tmp = REG_SET_FIELD(tmp, IH_MSI_STORM_CTRL,
+ DELAY, 3);
+ WREG32_SOC15(OSSSYS, 0, regIH_MSI_STORM_CTRL, tmp);
+
+ /* Redirect the interrupts to IH RB1 for dGPU */
+ if (adev->irq.ih1.ring_size) {
+ tmp = RREG32_SOC15(OSSSYS, 0, regIH_RING1_CLIENT_CFG_INDEX);
+ tmp = REG_SET_FIELD(tmp, IH_RING1_CLIENT_CFG_INDEX, INDEX, 0);
+ WREG32_SOC15(OSSSYS, 0, regIH_RING1_CLIENT_CFG_INDEX, tmp);
+
+ tmp = RREG32_SOC15(OSSSYS, 0, regIH_RING1_CLIENT_CFG_DATA);
+ tmp = REG_SET_FIELD(tmp, IH_RING1_CLIENT_CFG_DATA, CLIENT_ID, 0xa);
+ tmp = REG_SET_FIELD(tmp, IH_RING1_CLIENT_CFG_DATA, SOURCE_ID, 0x0);
+ tmp = REG_SET_FIELD(tmp, IH_RING1_CLIENT_CFG_DATA,
+ SOURCE_ID_MATCH_ENABLE, 0x1);
+
+ WREG32_SOC15(OSSSYS, 0, regIH_RING1_CLIENT_CFG_DATA, tmp);
+ }
+
+ pci_set_master(adev->pdev);
+
+ /* enable interrupts */
+ ret = ih_v7_0_toggle_interrupts(adev, true);
+ if (ret)
+ return ret;
+ /* enable wptr force update for self int */
+ force_update_wptr_for_self_int(adev, 0, 8, true);
+
+ if (adev->irq.ih_soft.ring_size)
+ adev->irq.ih_soft.enabled = true;
+
+ return 0;
+}
+
+/**
+ * ih_v7_0_irq_disable - disable interrupts
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * Disable interrupts on the hw.
+ */
+static void ih_v7_0_irq_disable(struct amdgpu_device *adev)
+{
+ force_update_wptr_for_self_int(adev, 0, 8, false);
+ ih_v7_0_toggle_interrupts(adev, false);
+
+ /* Wait and acknowledge irq */
+ mdelay(1);
+}
+
+/**
+ * ih_v7_0_get_wptr() - get the IH ring buffer wptr
+ *
+ * @adev: amdgpu_device pointer
+ * @ih: IH ring buffer to fetch wptr
+ *
+ * Get the IH ring buffer wptr from either the register
+ * or the writeback memory buffer. Also check for
+ * ring buffer overflow and deal with it.
+ * Returns the value of the wptr.
+ */
+static u32 ih_v7_0_get_wptr(struct amdgpu_device *adev,
+ struct amdgpu_ih_ring *ih)
+{
+ u32 wptr, tmp;
+ struct amdgpu_ih_regs *ih_regs;
+
+ wptr = le32_to_cpu(*ih->wptr_cpu);
+ ih_regs = &ih->ih_regs;
+
+ if (!REG_GET_FIELD(wptr, IH_RB_WPTR, RB_OVERFLOW))
+ goto out;
+
+ wptr = RREG32_NO_KIQ(ih_regs->ih_rb_wptr);
+ if (!REG_GET_FIELD(wptr, IH_RB_WPTR, RB_OVERFLOW))
+ goto out;
+ wptr = REG_SET_FIELD(wptr, IH_RB_WPTR, RB_OVERFLOW, 0);
+
+ /* When a ring buffer overflow happen start parsing interrupt
+ * from the last not overwritten vector (wptr + 32). Hopefully
+ * this should allow us to catch up.
+ */
+ tmp = (wptr + 32) & ih->ptr_mask;
+ dev_warn(adev->dev, "IH ring buffer overflow "
+ "(0x%08X, 0x%08X, 0x%08X)\n",
+ wptr, ih->rptr, tmp);
+ ih->rptr = tmp;
+
+ tmp = RREG32_NO_KIQ(ih_regs->ih_rb_cntl);
+ tmp = REG_SET_FIELD(tmp, IH_RB_CNTL, WPTR_OVERFLOW_CLEAR, 1);
+ WREG32_NO_KIQ(ih_regs->ih_rb_cntl, tmp);
+
+ /* Unset the CLEAR_OVERFLOW bit immediately so new overflows
+ * can be detected.
+ */
+ tmp = REG_SET_FIELD(tmp, IH_RB_CNTL, WPTR_OVERFLOW_CLEAR, 0);
+ WREG32_NO_KIQ(ih_regs->ih_rb_cntl, tmp);
+out:
+ return (wptr & ih->ptr_mask);
+}
+
+/**
+ * ih_v7_0_irq_rearm - rearm IRQ if lost
+ *
+ * @adev: amdgpu_device pointer
+ * @ih: IH ring to match
+ *
+ */
+static void ih_v7_0_irq_rearm(struct amdgpu_device *adev,
+ struct amdgpu_ih_ring *ih)
+{
+ uint32_t v = 0;
+ uint32_t i = 0;
+ struct amdgpu_ih_regs *ih_regs;
+
+ ih_regs = &ih->ih_regs;
+
+ /* Rearm IRQ / re-write doorbell if doorbell write is lost */
+ for (i = 0; i < MAX_REARM_RETRY; i++) {
+ v = RREG32_NO_KIQ(ih_regs->ih_rb_rptr);
+ if ((v < ih->ring_size) && (v != ih->rptr))
+ WDOORBELL32(ih->doorbell_index, ih->rptr);
+ else
+ break;
+ }
+}
+
+/**
+ * ih_v7_0_set_rptr - set the IH ring buffer rptr
+ *
+ * @adev: amdgpu_device pointer
+ * @ih: IH ring buffer to set rptr
+ */
+static void ih_v7_0_set_rptr(struct amdgpu_device *adev,
+ struct amdgpu_ih_ring *ih)
+{
+ struct amdgpu_ih_regs *ih_regs;
+
+ if (ih->use_doorbell) {
+ /* XXX check if swapping is necessary on BE */
+ *ih->rptr_cpu = ih->rptr;
+ WDOORBELL32(ih->doorbell_index, ih->rptr);
+
+ if (amdgpu_sriov_vf(adev))
+ ih_v7_0_irq_rearm(adev, ih);
+ } else {
+ ih_regs = &ih->ih_regs;
+ WREG32(ih_regs->ih_rb_rptr, ih->rptr);
+ }
+}
+
+/**
+ * ih_v7_0_self_irq - dispatch work for ring 1
+ *
+ * @adev: amdgpu_device pointer
+ * @source: irq source
+ * @entry: IV with WPTR update
+ *
+ * Update the WPTR from the IV and schedule work to handle the entries.
+ */
+static int ih_v7_0_self_irq(struct amdgpu_device *adev,
+ struct amdgpu_irq_src *source,
+ struct amdgpu_iv_entry *entry)
+{
+ uint32_t wptr = cpu_to_le32(entry->src_data[0]);
+
+ switch (entry->ring_id) {
+ case 1:
+ *adev->irq.ih1.wptr_cpu = wptr;
+ schedule_work(&adev->irq.ih1_work);
+ break;
+ default: break;
+ }
+ return 0;
+}
+
+static const struct amdgpu_irq_src_funcs ih_v7_0_self_irq_funcs = {
+ .process = ih_v7_0_self_irq,
+};
+
+static void ih_v7_0_set_self_irq_funcs(struct amdgpu_device *adev)
+{
+ adev->irq.self_irq.num_types = 0;
+ adev->irq.self_irq.funcs = &ih_v7_0_self_irq_funcs;
+}
+
+static int ih_v7_0_early_init(struct amdgpu_ip_block *ip_block)
+{
+ struct amdgpu_device *adev = ip_block->adev;
+
+ ih_v7_0_set_interrupt_funcs(adev);
+ ih_v7_0_set_self_irq_funcs(adev);
+ return 0;
+}
+
+static int ih_v7_0_sw_init(struct amdgpu_ip_block *ip_block)
+{
+ int r;
+ struct amdgpu_device *adev = ip_block->adev;
+ bool use_bus_addr;
+
+ r = amdgpu_irq_add_id(adev, SOC21_IH_CLIENTID_IH, 0,
+ &adev->irq.self_irq);
+
+ if (r)
+ return r;
+
+ /* use gpu virtual address for ih ring
+ * until ih_checken is programmed to allow
+ * use bus address for ih ring by psp bl */
+ use_bus_addr = adev->firmware.load_type != AMDGPU_FW_LOAD_PSP;
+ r = amdgpu_ih_ring_init(adev, &adev->irq.ih, 256 * 1024, use_bus_addr);
+ if (r)
+ return r;
+
+ adev->irq.ih.use_doorbell = true;
+ adev->irq.ih.doorbell_index = adev->doorbell_index.ih << 1;
+
+ if (!(adev->flags & AMD_IS_APU)) {
+ r = amdgpu_ih_ring_init(adev, &adev->irq.ih1, IH_RING_SIZE,
+ use_bus_addr);
+ if (r)
+ return r;
+
+ adev->irq.ih1.use_doorbell = true;
+ adev->irq.ih1.doorbell_index = (adev->doorbell_index.ih + 1) << 1;
+ }
+
+ /* initialize ih control register offset */
+ ih_v7_0_init_register_offset(adev);
+
+ r = amdgpu_ih_ring_init(adev, &adev->irq.ih_soft, PAGE_SIZE, true);
+ if (r)
+ return r;
+
+ r = amdgpu_irq_init(adev);
+
+ return r;
+}
+
+static int ih_v7_0_sw_fini(struct amdgpu_ip_block *ip_block)
+{
+ struct amdgpu_device *adev = ip_block->adev;
+
+ amdgpu_irq_fini_sw(adev);
+
+ return 0;
+}
+
+static int ih_v7_0_hw_init(struct amdgpu_ip_block *ip_block)
+{
+ int r;
+ struct amdgpu_device *adev = ip_block->adev;
+
+ r = ih_v7_0_irq_init(adev);
+ if (r)
+ return r;
+
+ return 0;
+}
+
+static int ih_v7_0_hw_fini(struct amdgpu_ip_block *ip_block)
+{
+ ih_v7_0_irq_disable(ip_block->adev);
+
+ return 0;
+}
+
+static int ih_v7_0_suspend(struct amdgpu_ip_block *ip_block)
+{
+ return ih_v7_0_hw_fini(ip_block);
+}
+
+static int ih_v7_0_resume(struct amdgpu_ip_block *ip_block)
+{
+ return ih_v7_0_hw_init(ip_block);
+}
+
+static bool ih_v7_0_is_idle(struct amdgpu_ip_block *ip_block)
+{
+ /* todo */
+ return true;
+}
+
+static int ih_v7_0_wait_for_idle(struct amdgpu_ip_block *ip_block)
+{
+ /* todo */
+ return -ETIMEDOUT;
+}
+
+static int ih_v7_0_soft_reset(struct amdgpu_ip_block *ip_block)
+{
+ /* todo */
+ return 0;
+}
+
+static void ih_v7_0_update_clockgating_state(struct amdgpu_device *adev,
+ bool enable)
+{
+ uint32_t data, def, field_val;
+
+ if (adev->cg_flags & AMD_CG_SUPPORT_IH_CG) {
+ def = data = RREG32_SOC15(OSSSYS, 0, regIH_CLK_CTRL);
+ field_val = enable ? 0 : 1;
+ data = REG_SET_FIELD(data, IH_CLK_CTRL,
+ DBUS_MUX_CLK_SOFT_OVERRIDE, field_val);
+ data = REG_SET_FIELD(data, IH_CLK_CTRL,
+ OSSSYS_SHARE_CLK_SOFT_OVERRIDE, field_val);
+ data = REG_SET_FIELD(data, IH_CLK_CTRL,
+ LIMIT_SMN_CLK_SOFT_OVERRIDE, field_val);
+ data = REG_SET_FIELD(data, IH_CLK_CTRL,
+ DYN_CLK_SOFT_OVERRIDE, field_val);
+ data = REG_SET_FIELD(data, IH_CLK_CTRL,
+ REG_CLK_SOFT_OVERRIDE, field_val);
+ if (def != data)
+ WREG32_SOC15(OSSSYS, 0, regIH_CLK_CTRL, data);
+ }
+
+ return;
+}
+
+static int ih_v7_0_set_clockgating_state(struct amdgpu_ip_block *ip_block,
+ enum amd_clockgating_state state)
+{
+ struct amdgpu_device *adev = ip_block->adev;
+
+ ih_v7_0_update_clockgating_state(adev,
+ state == AMD_CG_STATE_GATE);
+ return 0;
+}
+
+static void ih_v7_0_update_ih_mem_power_gating(struct amdgpu_device *adev,
+ bool enable)
+{
+ uint32_t ih_mem_pwr_cntl;
+
+ /* Disable ih sram power cntl before switch powergating mode */
+ ih_mem_pwr_cntl = RREG32_SOC15(OSSSYS, 0, regIH_MEM_POWER_CTRL);
+ ih_mem_pwr_cntl = REG_SET_FIELD(ih_mem_pwr_cntl, IH_MEM_POWER_CTRL,
+ IH_BUFFER_MEM_POWER_CTRL_EN, 0);
+ WREG32_SOC15(OSSSYS, 0, regIH_MEM_POWER_CTRL, ih_mem_pwr_cntl);
+
+ /* It is recommended to set mem powergating mode to DS mode */
+ if (enable) {
+ /* mem power mode */
+ ih_mem_pwr_cntl = REG_SET_FIELD(ih_mem_pwr_cntl, IH_MEM_POWER_CTRL,
+ IH_BUFFER_MEM_POWER_LS_EN, 0);
+ ih_mem_pwr_cntl = REG_SET_FIELD(ih_mem_pwr_cntl, IH_MEM_POWER_CTRL,
+ IH_BUFFER_MEM_POWER_DS_EN, 1);
+ ih_mem_pwr_cntl = REG_SET_FIELD(ih_mem_pwr_cntl, IH_MEM_POWER_CTRL,
+ IH_BUFFER_MEM_POWER_SD_EN, 0);
+ /* cam mem power mode */
+ ih_mem_pwr_cntl = REG_SET_FIELD(ih_mem_pwr_cntl, IH_MEM_POWER_CTRL,
+ IH_RETRY_INT_CAM_MEM_POWER_LS_EN, 0);
+ ih_mem_pwr_cntl = REG_SET_FIELD(ih_mem_pwr_cntl, IH_MEM_POWER_CTRL,
+ IH_RETRY_INT_CAM_MEM_POWER_DS_EN, 1);
+ ih_mem_pwr_cntl = REG_SET_FIELD(ih_mem_pwr_cntl, IH_MEM_POWER_CTRL,
+ IH_RETRY_INT_CAM_MEM_POWER_SD_EN, 0);
+ /* re-enable power cntl */
+ ih_mem_pwr_cntl = REG_SET_FIELD(ih_mem_pwr_cntl, IH_MEM_POWER_CTRL,
+ IH_BUFFER_MEM_POWER_CTRL_EN, 1);
+ } else {
+ /* mem power mode */
+ ih_mem_pwr_cntl = REG_SET_FIELD(ih_mem_pwr_cntl, IH_MEM_POWER_CTRL,
+ IH_BUFFER_MEM_POWER_LS_EN, 0);
+ ih_mem_pwr_cntl = REG_SET_FIELD(ih_mem_pwr_cntl, IH_MEM_POWER_CTRL,
+ IH_BUFFER_MEM_POWER_DS_EN, 0);
+ ih_mem_pwr_cntl = REG_SET_FIELD(ih_mem_pwr_cntl, IH_MEM_POWER_CTRL,
+ IH_BUFFER_MEM_POWER_SD_EN, 0);
+ /* cam mem power mode */
+ ih_mem_pwr_cntl = REG_SET_FIELD(ih_mem_pwr_cntl, IH_MEM_POWER_CTRL,
+ IH_RETRY_INT_CAM_MEM_POWER_LS_EN, 0);
+ ih_mem_pwr_cntl = REG_SET_FIELD(ih_mem_pwr_cntl, IH_MEM_POWER_CTRL,
+ IH_RETRY_INT_CAM_MEM_POWER_DS_EN, 0);
+ ih_mem_pwr_cntl = REG_SET_FIELD(ih_mem_pwr_cntl, IH_MEM_POWER_CTRL,
+ IH_RETRY_INT_CAM_MEM_POWER_SD_EN, 0);
+ /* re-enable power cntl*/
+ ih_mem_pwr_cntl = REG_SET_FIELD(ih_mem_pwr_cntl, IH_MEM_POWER_CTRL,
+ IH_BUFFER_MEM_POWER_CTRL_EN, 1);
+ }
+
+ WREG32_SOC15(OSSSYS, 0, regIH_MEM_POWER_CTRL, ih_mem_pwr_cntl);
+}
+
+static int ih_v7_0_set_powergating_state(struct amdgpu_ip_block *ip_block,
+ enum amd_powergating_state state)
+{
+ struct amdgpu_device *adev = ip_block->adev;
+ bool enable = (state == AMD_PG_STATE_GATE);
+
+ if (adev->pg_flags & AMD_PG_SUPPORT_IH_SRAM_PG)
+ ih_v7_0_update_ih_mem_power_gating(adev, enable);
+
+ return 0;
+}
+
+static void ih_v7_0_get_clockgating_state(struct amdgpu_ip_block *ip_block, u64 *flags)
+{
+ struct amdgpu_device *adev = ip_block->adev;
+
+ if (!RREG32_SOC15(OSSSYS, 0, regIH_CLK_CTRL))
+ *flags |= AMD_CG_SUPPORT_IH_CG;
+
+ return;
+}
+
+static const struct amd_ip_funcs ih_v7_0_ip_funcs = {
+ .name = "ih_v7_0",
+ .early_init = ih_v7_0_early_init,
+ .sw_init = ih_v7_0_sw_init,
+ .sw_fini = ih_v7_0_sw_fini,
+ .hw_init = ih_v7_0_hw_init,
+ .hw_fini = ih_v7_0_hw_fini,
+ .suspend = ih_v7_0_suspend,
+ .resume = ih_v7_0_resume,
+ .is_idle = ih_v7_0_is_idle,
+ .wait_for_idle = ih_v7_0_wait_for_idle,
+ .soft_reset = ih_v7_0_soft_reset,
+ .set_clockgating_state = ih_v7_0_set_clockgating_state,
+ .set_powergating_state = ih_v7_0_set_powergating_state,
+ .get_clockgating_state = ih_v7_0_get_clockgating_state,
+};
+
+static const struct amdgpu_ih_funcs ih_v7_0_funcs = {
+ .get_wptr = ih_v7_0_get_wptr,
+ .decode_iv = amdgpu_ih_decode_iv_helper,
+ .decode_iv_ts = amdgpu_ih_decode_iv_ts_helper,
+ .set_rptr = ih_v7_0_set_rptr
+};
+
+static void ih_v7_0_set_interrupt_funcs(struct amdgpu_device *adev)
+{
+ adev->irq.ih_funcs = &ih_v7_0_funcs;
+}
+
+const struct amdgpu_ip_block_version ih_v7_0_ip_block =
+{
+ .type = AMD_IP_BLOCK_TYPE_IH,
+ .major = 7,
+ .minor = 0,
+ .rev = 0,
+ .funcs = &ih_v7_0_ip_funcs,
+};
diff --git a/drivers/gpu/drm/amd/amdgpu/ih_v7_0.h b/drivers/gpu/drm/amd/amdgpu/ih_v7_0.h
new file mode 100644
index 000000000000..af9dcbc451fd
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/ih_v7_0.h
@@ -0,0 +1,28 @@
+/*
+ * Copyright 2023 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+#ifndef __IH_V7_0_IH_H__
+#define __IH_V7_0_IH_H__
+
+extern const struct amdgpu_ip_block_version ih_v7_0_ip_block;
+
+#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/imu_v11_0.c b/drivers/gpu/drm/amd/amdgpu/imu_v11_0.c
new file mode 100644
index 000000000000..cc626036ed9c
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/imu_v11_0.c
@@ -0,0 +1,393 @@
+/*
+ * Copyright 2021 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#include <linux/firmware.h>
+#include "amdgpu.h"
+#include "amdgpu_imu.h"
+#include "amdgpu_dpm.h"
+
+#include "imu_v11_0_3.h"
+
+#include "gc/gc_11_0_0_offset.h"
+#include "gc/gc_11_0_0_sh_mask.h"
+
+MODULE_FIRMWARE("amdgpu/gc_11_0_0_imu.bin");
+MODULE_FIRMWARE("amdgpu/gc_11_0_0_imu_kicker.bin");
+MODULE_FIRMWARE("amdgpu/gc_11_0_1_imu.bin");
+MODULE_FIRMWARE("amdgpu/gc_11_0_2_imu.bin");
+MODULE_FIRMWARE("amdgpu/gc_11_0_3_imu.bin");
+MODULE_FIRMWARE("amdgpu/gc_11_0_4_imu.bin");
+MODULE_FIRMWARE("amdgpu/gc_11_5_0_imu.bin");
+MODULE_FIRMWARE("amdgpu/gc_11_5_1_imu.bin");
+MODULE_FIRMWARE("amdgpu/gc_11_5_2_imu.bin");
+MODULE_FIRMWARE("amdgpu/gc_11_5_3_imu.bin");
+
+static int imu_v11_0_init_microcode(struct amdgpu_device *adev)
+{
+ char ucode_prefix[30];
+ int err;
+ const struct imu_firmware_header_v1_0 *imu_hdr;
+ struct amdgpu_firmware_info *info = NULL;
+
+ DRM_DEBUG("\n");
+
+ amdgpu_ucode_ip_version_decode(adev, GC_HWIP, ucode_prefix, sizeof(ucode_prefix));
+ if (amdgpu_is_kicker_fw(adev))
+ err = amdgpu_ucode_request(adev, &adev->gfx.imu_fw, AMDGPU_UCODE_REQUIRED,
+ "amdgpu/%s_imu_kicker.bin", ucode_prefix);
+ else
+ err = amdgpu_ucode_request(adev, &adev->gfx.imu_fw, AMDGPU_UCODE_REQUIRED,
+ "amdgpu/%s_imu.bin", ucode_prefix);
+ if (err)
+ goto out;
+
+ imu_hdr = (const struct imu_firmware_header_v1_0 *)adev->gfx.imu_fw->data;
+ //adev->gfx.imu_feature_version = le32_to_cpu(imu_hdr->ucode_feature_version);
+
+ if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
+ info = &adev->firmware.ucode[AMDGPU_UCODE_ID_IMU_I];
+ info->ucode_id = AMDGPU_UCODE_ID_IMU_I;
+ info->fw = adev->gfx.imu_fw;
+ adev->firmware.fw_size +=
+ ALIGN(le32_to_cpu(imu_hdr->imu_iram_ucode_size_bytes), PAGE_SIZE);
+ info = &adev->firmware.ucode[AMDGPU_UCODE_ID_IMU_D];
+ info->ucode_id = AMDGPU_UCODE_ID_IMU_D;
+ info->fw = adev->gfx.imu_fw;
+ adev->firmware.fw_size +=
+ ALIGN(le32_to_cpu(imu_hdr->imu_dram_ucode_size_bytes), PAGE_SIZE);
+ } else
+ adev->gfx.imu_fw_version = le32_to_cpu(imu_hdr->header.ucode_version);
+
+out:
+ if (err) {
+ dev_err(adev->dev,
+ "gfx11: Failed to load firmware \"%s_imu.bin\"\n",
+ ucode_prefix);
+ amdgpu_ucode_release(&adev->gfx.imu_fw);
+ }
+
+ return err;
+}
+
+static int imu_v11_0_load_microcode(struct amdgpu_device *adev)
+{
+ const struct imu_firmware_header_v1_0 *hdr;
+ const __le32 *fw_data;
+ unsigned i, fw_size;
+
+ if (!adev->gfx.imu_fw)
+ return -EINVAL;
+
+ hdr = (const struct imu_firmware_header_v1_0 *)adev->gfx.imu_fw->data;
+ //amdgpu_ucode_print_rlc_hdr(&hdr->header);
+
+ fw_data = (const __le32 *)(adev->gfx.imu_fw->data +
+ le32_to_cpu(hdr->header.ucode_array_offset_bytes));
+ fw_size = le32_to_cpu(hdr->imu_iram_ucode_size_bytes) / 4;
+
+ WREG32_SOC15(GC, 0, regGFX_IMU_I_RAM_ADDR, 0);
+
+ for (i = 0; i < fw_size; i++)
+ WREG32_SOC15(GC, 0, regGFX_IMU_I_RAM_DATA, le32_to_cpup(fw_data++));
+
+ WREG32_SOC15(GC, 0, regGFX_IMU_I_RAM_ADDR, adev->gfx.imu_fw_version);
+
+ fw_data = (const __le32 *)(adev->gfx.imu_fw->data +
+ le32_to_cpu(hdr->header.ucode_array_offset_bytes) +
+ le32_to_cpu(hdr->imu_iram_ucode_size_bytes));
+ fw_size = le32_to_cpu(hdr->imu_dram_ucode_size_bytes) / 4;
+
+ WREG32_SOC15(GC, 0, regGFX_IMU_D_RAM_ADDR, 0);
+
+ for (i = 0; i < fw_size; i++)
+ WREG32_SOC15(GC, 0, regGFX_IMU_D_RAM_DATA, le32_to_cpup(fw_data++));
+
+ WREG32_SOC15(GC, 0, regGFX_IMU_D_RAM_ADDR, adev->gfx.imu_fw_version);
+
+ return 0;
+}
+
+static int imu_v11_0_wait_for_reset_status(struct amdgpu_device *adev)
+{
+ int i, imu_reg_val = 0;
+
+ for (i = 0; i < adev->usec_timeout; i++) {
+ imu_reg_val = RREG32_SOC15(GC, 0, regGFX_IMU_GFX_RESET_CTRL);
+ if ((imu_reg_val & 0x1f) == 0x1f)
+ break;
+ udelay(1);
+ }
+
+ if (i >= adev->usec_timeout) {
+ dev_err(adev->dev, "init imu: IMU start timeout\n");
+ return -ETIMEDOUT;
+ }
+
+ return 0;
+}
+
+static void imu_v11_0_setup(struct amdgpu_device *adev)
+{
+ int imu_reg_val;
+
+ //enable IMU debug mode
+ WREG32_SOC15(GC, 0, regGFX_IMU_C2PMSG_ACCESS_CTRL0, 0xffffff);
+ WREG32_SOC15(GC, 0, regGFX_IMU_C2PMSG_ACCESS_CTRL1, 0xffff);
+
+ if (adev->gfx.imu.mode == DEBUG_MODE) {
+ imu_reg_val = RREG32_SOC15(GC, 0, regGFX_IMU_C2PMSG_16);
+ imu_reg_val |= 0x1;
+ WREG32_SOC15(GC, 0, regGFX_IMU_C2PMSG_16, imu_reg_val);
+ }
+
+ //disable imu Rtavfs, SmsRepair, DfllBTC, and ClkB
+ imu_reg_val = RREG32_SOC15(GC, 0, regGFX_IMU_SCRATCH_10);
+ imu_reg_val |= 0x10007;
+ WREG32_SOC15(GC, 0, regGFX_IMU_SCRATCH_10, imu_reg_val);
+}
+
+static int imu_v11_0_start(struct amdgpu_device *adev)
+{
+ int imu_reg_val;
+
+ //Start IMU by set GFX_IMU_CORE_CTRL.CRESET = 0
+ imu_reg_val = RREG32_SOC15(GC, 0, regGFX_IMU_CORE_CTRL);
+ imu_reg_val &= 0xfffffffe;
+ WREG32_SOC15(GC, 0, regGFX_IMU_CORE_CTRL, imu_reg_val);
+
+ if (adev->flags & AMD_IS_APU)
+ amdgpu_dpm_set_gfx_power_up_by_imu(adev);
+
+ return imu_v11_0_wait_for_reset_status(adev);
+}
+
+static const struct imu_rlc_ram_golden imu_rlc_ram_golden_11[] =
+{
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGUS_IO_RD_COMBINE_FLUSH, 0x00055555, 0xe0000000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGUS_IO_WR_COMBINE_FLUSH, 0x00055555, 0xe0000000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGUS_DRAM_COMBINE_FLUSH, 0x00555555, 0xe0000000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGUS_MISC2, 0x00001ffe, 0xe0000000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGUS_SDP_CREDITS , 0x003f3fff, 0xe0000000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGUS_SDP_TAG_RESERVE1, 0x00000000, 0xe0000000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGUS_SDP_VCC_RESERVE0, 0x00041000, 0xe0000000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGUS_SDP_VCC_RESERVE1, 0x00000000, 0xe0000000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGUS_SDP_VCD_RESERVE0, 0x00040000, 0xe0000000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGUS_SDP_VCD_RESERVE1, 0x00000000, 0xe0000000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGUS_MISC, 0x00000017, 0xe0000000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGUS_SDP_ENABLE, 0x00000001, 0xe0000000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGCEA_SDP_CREDITS , 0x003f3fbf, 0xe0000000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGCEA_SDP_TAG_RESERVE0, 0x10201000, 0xe0000000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGCEA_SDP_TAG_RESERVE1, 0x00000080, 0xe0000000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGCEA_SDP_VCC_RESERVE0, 0x1d041040, 0xe0000000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGCEA_SDP_VCC_RESERVE1, 0x80000000, 0xe0000000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGCEA_SDP_IO_PRIORITY, 0x88888888, 0xe0000000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGCEA_MAM_CTRL, 0x0000d800, 0xe0000000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGCEA_SDP_ARB_FINAL, 0x000003f7, 0xe0000000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGCEA_SDP_ENABLE, 0x00000001, 0xe0000000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGCVM_L2_PROTECTION_FAULT_CNTL2, 0x00020000, 0xe0000000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGCMC_VM_APT_CNTL, 0x0000000c, 0xe0000000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGCMC_VM_CACHEABLE_DRAM_ADDRESS_END, 0x000fffff, 0xe0000000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGCEA_MISC, 0x0c48bff0, 0xe0000000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regCC_GC_SA_UNIT_DISABLE, 0x00fffc01, 0xe0000000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regCC_GC_PRIM_CONFIG, 0x000fffe1, 0xe0000000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regCC_RB_BACKEND_DISABLE, 0x0fffff01, 0xe0000000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regCC_GC_SHADER_ARRAY_CONFIG, 0xfffe0001, 0xe0000000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGCMC_VM_MX_L1_TLB_CNTL, 0x00000500, 0xe0000000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGCMC_VM_SYSTEM_APERTURE_LOW_ADDR, 0x00000001, 0xe0000000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGCMC_VM_SYSTEM_APERTURE_HIGH_ADDR, 0x00000000, 0xe0000000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGCMC_VM_LOCAL_FB_ADDRESS_START, 0x00000000, 0xe0000000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGCMC_VM_LOCAL_FB_ADDRESS_END, 0x000fffff, 0xe0000000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGCVM_CONTEXT0_CNTL, 0x00000000, 0xe0000000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGCVM_CONTEXT1_CNTL, 0x00000000, 0xe0000000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGCMC_VM_NB_TOP_OF_DRAM_SLOT1, 0xff800000, 0xe0000000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGCMC_VM_NB_LOWER_TOP_OF_DRAM2, 0x00000001, 0xe0000000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGCMC_VM_NB_UPPER_TOP_OF_DRAM2, 0x00000fff, 0xe0000000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGCVM_L2_PROTECTION_FAULT_CNTL, 0x00001ffc, 0xe0000000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGCMC_VM_MX_L1_TLB_CNTL, 0x00000501, 0xe0000000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGCVM_L2_CNTL, 0x00080603, 0xe0000000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGCVM_L2_CNTL2, 0x00000003, 0xe0000000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGCVM_L2_CNTL3, 0x00100003, 0xe0000000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGCVM_L2_CNTL5, 0x00003fe0, 0xe0000000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGCVM_CONTEXT0_CNTL, 0x00000001, 0xe0000000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGCVM_L2_CONTEXT0_PER_PFVF_PTE_CACHE_FRAGMENT_SIZES, 0x00000c00, 0xe0000000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGCVM_CONTEXT1_CNTL, 0x00000001, 0xe0000000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGCVM_L2_CONTEXT1_PER_PFVF_PTE_CACHE_FRAGMENT_SIZES, 0x00000c00, 0xe0000000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGB_ADDR_CONFIG, 0x00000545, 0xe0000000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGL2_PIPE_STEER_0, 0x13455431, 0xe0000000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGL2_PIPE_STEER_1, 0x13455431, 0xe0000000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGL2_PIPE_STEER_2, 0x76027602, 0xe0000000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGL2_PIPE_STEER_3, 0x76207620, 0xe0000000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGB_ADDR_CONFIG, 0x00000345, 0xe0000000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGCUTCL2_HARVEST_BYPASS_GROUPS, 0x0000003e, 0xe0000000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGCMC_VM_FB_LOCATION_BASE, 0x00006000, 0xe0000000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGCMC_VM_FB_LOCATION_TOP, 0x000061ff, 0xe0000000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGCMC_VM_APT_CNTL, 0x0000000c, 0xe0000000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGCMC_VM_AGP_BASE, 0x00000000, 0xe0000000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGCMC_VM_AGP_BOT, 0x00000002, 0xe0000000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGCMC_VM_AGP_TOP, 0x00000000, 0xe0000000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGCVM_L2_PROTECTION_FAULT_CNTL2, 0x00020000, 0xe0000000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regSDMA0_UCODE_SELFLOAD_CONTROL, 0x00000210, 0),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regSDMA1_UCODE_SELFLOAD_CONTROL, 0x00000210, 0),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regCPC_PSP_DEBUG, CPC_PSP_DEBUG__GPA_OVERRIDE_MASK, 0),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regCPG_PSP_DEBUG, CPG_PSP_DEBUG__GPA_OVERRIDE_MASK, 0)
+};
+
+static const struct imu_rlc_ram_golden imu_rlc_ram_golden_11_0_2[] =
+{
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGCEA_MISC, 0x0c48bff0, 0xe0000000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGCEA_SDP_CREDITS, 0x003f3fbf, 0xe0000000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGCEA_SDP_TAG_RESERVE0, 0x10200800, 0xe0000000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGCEA_SDP_TAG_RESERVE1, 0x00000088, 0xe0000000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGCEA_SDP_VCC_RESERVE0, 0x1d041040, 0xe0000000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGCEA_SDP_VCC_RESERVE1, 0x80000000, 0xe0000000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGCEA_SDP_IO_PRIORITY, 0x88888888, 0xe0000000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGCEA_MAM_CTRL, 0x0000d800, 0xe0000000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGCEA_SDP_ARB_FINAL, 0x000007ef, 0xe0000000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGCEA_DRAM_PAGE_BURST, 0x20080200, 0xe0000000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGCEA_SDP_ENABLE, 0x00000001, 0xe0000000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGCMC_VM_APT_CNTL, 0x0000000c, 0xe0000000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGCMC_VM_CACHEABLE_DRAM_ADDRESS_END, 0x000fffff, 0xe0000000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGUS_IO_RD_COMBINE_FLUSH, 0x00055555, 0xe0000000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGUS_IO_WR_COMBINE_FLUSH, 0x00055555, 0xe0000000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGUS_DRAM_COMBINE_FLUSH, 0x00555555, 0xe0000000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGUS_MISC2, 0x00001ffe, 0xe0000000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGUS_SDP_CREDITS, 0x003f3fff, 0xe0000000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGUS_SDP_TAG_RESERVE1, 0x00000000, 0xe0000000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGUS_SDP_VCC_RESERVE0, 0x00041000, 0xe0000000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGUS_SDP_VCC_RESERVE1, 0x00000000, 0xe0000000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGUS_SDP_VCD_RESERVE0, 0x00040000, 0xe0000000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGUS_SDP_VCD_RESERVE1, 0x00000000, 0xe0000000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGUS_MISC, 0x00000017, 0xe0000000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGUS_SDP_ENABLE, 0x00000001, 0xe0000000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regCC_GC_SA_UNIT_DISABLE, 0x00fffc01, 0xe0000000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regCC_GC_PRIM_CONFIG, 0x000fffe1, 0xe0000000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regCC_RB_BACKEND_DISABLE, 0x00000f01, 0xe0000000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regCC_GC_SHADER_ARRAY_CONFIG, 0xfffe0001, 0xe0000000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGL1_PIPE_STEER, 0x000000e4, 0xe0000000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regCH_PIPE_STEER, 0x000000e4, 0xe0000000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGL2_PIPE_STEER_0, 0x01231023, 0xe0000000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGB_ADDR_CONFIG, 0x00000243, 0xe0000000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGCUTCL2_HARVEST_BYPASS_GROUPS, 0x00000002, 0xe0000000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGCMC_VM_MX_L1_TLB_CNTL, 0x00000500, 0xe0000000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGCMC_VM_SYSTEM_APERTURE_LOW_ADDR, 0x00000001, 0xe0000000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGCMC_VM_SYSTEM_APERTURE_HIGH_ADDR, 0x00000000, 0xe0000000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGCMC_VM_LOCAL_FB_ADDRESS_START, 0x00000000, 0xe0000000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGCMC_VM_LOCAL_FB_ADDRESS_END, 0x000001ff, 0xe0000000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGCMC_VM_FB_LOCATION_BASE, 0x00006000, 0xe0000000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGCMC_VM_FB_LOCATION_TOP, 0x000061ff, 0xe0000000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGCVM_CONTEXT0_CNTL, 0x00000000, 0xe0000000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGCVM_CONTEXT1_CNTL, 0x00000000, 0xe0000000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGCMC_VM_APT_CNTL, 0x0000000c, 0xe0000000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGCMC_VM_NB_TOP_OF_DRAM_SLOT1, 0xff800000, 0xe0000000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGCMC_VM_NB_LOWER_TOP_OF_DRAM2, 0x00000001, 0xe0000000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGCMC_VM_NB_UPPER_TOP_OF_DRAM2, 0x00000fff, 0xe0000000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGCMC_VM_AGP_BASE, 0x00000000, 0xe0000000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGCMC_VM_AGP_BOT, 0x00000002, 0xe0000000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGCMC_VM_AGP_TOP, 0x00000000, 0xe0000000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGCVM_L2_PROTECTION_FAULT_CNTL, 0x00001ffc, 0xe0000000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGCVM_L2_PROTECTION_FAULT_CNTL2, 0x00002825, 0xe0000000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGCMC_VM_MX_L1_TLB_CNTL, 0x00000501, 0xe0000000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGCVM_L2_CNTL, 0x00080603, 0xe0000000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGCVM_L2_CNTL2, 0x00000003, 0xe0000000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGCVM_L2_CNTL3, 0x00100003, 0xe0000000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGCVM_L2_CNTL5, 0x00003fe0, 0xe0000000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGCVM_CONTEXT0_CNTL, 0x00000001, 0xe0000000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGCVM_L2_CONTEXT0_PER_PFVF_PTE_CACHE_FRAGMENT_SIZES, 0x00000c00, 0xe0000000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGCVM_CONTEXT1_CNTL, 0x00000001, 0xe0000000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGCVM_L2_CONTEXT1_PER_PFVF_PTE_CACHE_FRAGMENT_SIZES, 0x00000c00, 0xe0000000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regSDMA0_UCODE_SELFLOAD_CONTROL, 0x00000210, 0),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regSDMA1_UCODE_SELFLOAD_CONTROL, 0x00000210, 0),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regCPC_PSP_DEBUG, CPC_PSP_DEBUG__GPA_OVERRIDE_MASK, 0),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regCPG_PSP_DEBUG, CPG_PSP_DEBUG__GPA_OVERRIDE_MASK, 0)
+};
+
+static void program_imu_rlc_ram(struct amdgpu_device *adev,
+ const struct imu_rlc_ram_golden *regs,
+ const u32 array_size)
+{
+ const struct imu_rlc_ram_golden *entry;
+ u32 reg, data;
+ int i;
+
+ for (i = 0; i < array_size; ++i) {
+ entry = &regs[i];
+ reg = adev->reg_offset[entry->hwip][entry->instance][entry->segment] + entry->reg;
+ reg |= entry->addr_mask;
+
+ data = entry->data;
+ if (entry->reg == regGCMC_VM_AGP_BASE)
+ data = 0x00ffffff;
+ else if (entry->reg == regGCMC_VM_AGP_TOP)
+ data = 0x0;
+ else if (entry->reg == regGCMC_VM_FB_LOCATION_BASE)
+ data = adev->gmc.vram_start >> 24;
+ else if (entry->reg == regGCMC_VM_FB_LOCATION_TOP)
+ data = adev->gmc.vram_end >> 24;
+
+ WREG32_SOC15(GC, 0, regGFX_IMU_RLC_RAM_ADDR_HIGH, 0);
+ WREG32_SOC15(GC, 0, regGFX_IMU_RLC_RAM_ADDR_LOW, reg);
+ WREG32_SOC15(GC, 0, regGFX_IMU_RLC_RAM_DATA, data);
+ }
+ //Indicate the latest entry
+ WREG32_SOC15(GC, 0, regGFX_IMU_RLC_RAM_ADDR_HIGH, 0);
+ WREG32_SOC15(GC, 0, regGFX_IMU_RLC_RAM_ADDR_LOW, 0);
+ WREG32_SOC15(GC, 0, regGFX_IMU_RLC_RAM_DATA, 0);
+}
+
+static void imu_v11_0_program_rlc_ram(struct amdgpu_device *adev)
+{
+ u32 reg_data;
+
+ WREG32_SOC15(GC, 0, regGFX_IMU_RLC_RAM_INDEX, 0x2);
+
+ switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
+ case IP_VERSION(11, 0, 0):
+ program_imu_rlc_ram(adev, imu_rlc_ram_golden_11,
+ (const u32)ARRAY_SIZE(imu_rlc_ram_golden_11));
+ break;
+ case IP_VERSION(11, 0, 2):
+ program_imu_rlc_ram(adev, imu_rlc_ram_golden_11_0_2,
+ (const u32)ARRAY_SIZE(imu_rlc_ram_golden_11_0_2));
+ break;
+ case IP_VERSION(11, 0, 3):
+ imu_v11_0_3_program_rlc_ram(adev);
+ break;
+ default:
+ BUG();
+ break;
+ }
+
+ //Indicate the contents of the RAM are valid
+ reg_data = RREG32_SOC15(GC, 0, regGFX_IMU_RLC_RAM_INDEX);
+ reg_data |= GFX_IMU_RLC_RAM_INDEX__RAM_VALID_MASK;
+ WREG32_SOC15(GC, 0, regGFX_IMU_RLC_RAM_INDEX, reg_data);
+}
+
+const struct amdgpu_imu_funcs gfx_v11_0_imu_funcs = {
+ .init_microcode = imu_v11_0_init_microcode,
+ .load_microcode = imu_v11_0_load_microcode,
+ .setup_imu = imu_v11_0_setup,
+ .start_imu = imu_v11_0_start,
+ .program_rlc_ram = imu_v11_0_program_rlc_ram,
+ .wait_for_reset_status = imu_v11_0_wait_for_reset_status,
+};
diff --git a/drivers/gpu/drm/amd/amdgpu/imu_v11_0.h b/drivers/gpu/drm/amd/amdgpu/imu_v11_0.h
new file mode 100644
index 000000000000..e71f96fc2f06
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/imu_v11_0.h
@@ -0,0 +1,30 @@
+/*
+ * Copyright 2021 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#ifndef __IMU_V11_0_H__
+#define __IMU_V11_0_H__
+
+extern const struct amdgpu_imu_funcs gfx_v11_0_imu_funcs;
+
+#endif
+
diff --git a/drivers/gpu/drm/amd/amdgpu/imu_v11_0_3.c b/drivers/gpu/drm/amd/amdgpu/imu_v11_0_3.c
new file mode 100644
index 000000000000..fc69c1a29e23
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/imu_v11_0_3.c
@@ -0,0 +1,145 @@
+/*
+ * Copyright 2022 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+#include "amdgpu.h"
+#include "amdgpu_imu.h"
+#include "imu_v11_0_3.h"
+
+#include "gc/gc_11_0_3_offset.h"
+#include "gc/gc_11_0_3_sh_mask.h"
+
+static const struct imu_rlc_ram_golden imu_rlc_ram_golden_11_0_3[] = {
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGUS_IO_RD_COMBINE_FLUSH, 0x00055555, 0xe0000000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGUS_IO_WR_COMBINE_FLUSH, 0x00055555, 0xe0000000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGUS_DRAM_COMBINE_FLUSH, 0x00555555, 0xe0000000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGUS_MISC2, 0x00001ffe, 0xe0000000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGUS_SDP_CREDITS, 0x003f3fff, 0xe0000000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGUS_SDP_TAG_RESERVE1, 0x00000000, 0xe0000000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGUS_SDP_VCC_RESERVE0, 0x00041000, 0xe0000000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGUS_SDP_VCC_RESERVE1, 0x00000000, 0xe0000000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGUS_SDP_VCD_RESERVE0, 0x00040000, 0xe0000000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGUS_SDP_VCD_RESERVE1, 0x00000000, 0xe0000000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGUS_MISC, 0x00000017, 0xe0000000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGUS_SDP_ENABLE, 0x00000001, 0xe0000000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGCEA_SDP_CREDITS, 0x003f3fbf, 0xe0000000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGCEA_SDP_TAG_RESERVE0, 0x10200800, 0xe0000000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGCEA_SDP_TAG_RESERVE1, 0x00000088, 0xe0000000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGCEA_SDP_VCC_RESERVE0, 0x1d041040, 0xe0000000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGCEA_SDP_VCC_RESERVE1, 0x80000000, 0xe0000000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGCEA_SDP_IO_PRIORITY, 0x88888888, 0xe0000000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGCEA_MAM_CTRL, 0x0000d800, 0xe0000000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGCEA_SDP_ARB_FINAL, 0x000007ff, 0xe0000000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGCEA_DRAM_PAGE_BURST, 0x20080200, 0xe0000000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGCEA_SDP_ENABLE, 0x00000001, 0xe0000000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGCVM_L2_PROTECTION_FAULT_CNTL2, 0x00020000, 0xe0000000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGCMC_VM_APT_CNTL, 0x0000000c, 0xe0000000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGCMC_VM_CACHEABLE_DRAM_ADDRESS_END, 0x000fffff, 0xe0000000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGCEA_MISC, 0x0c48bff0, 0xe0000000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regCC_GC_SA_UNIT_DISABLE, 0x00fffc01, 0xe0000000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regCC_GC_PRIM_CONFIG, 0x000fffe1, 0xe0000000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regCC_RB_BACKEND_DISABLE, 0xffffff01, 0xe0000000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regCC_GC_SHADER_ARRAY_CONFIG, 0xfffe0001, 0x40000000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regCC_GC_SHADER_ARRAY_CONFIG, 0xfffe0001, 0x42000000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regCC_GC_SHADER_ARRAY_CONFIG, 0xffff0001, 0x44000000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regCC_GC_SHADER_ARRAY_CONFIG, 0xffff0001, 0x46000000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regCC_GC_SHADER_ARRAY_CONFIG, 0xffff0001, 0x48000000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regCC_GC_SHADER_ARRAY_CONFIG, 0xffff0001, 0x4A000000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regCGTS_TCC_DISABLE, 0x00000001, 0x00000000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regCC_GC_SHADER_RATE_CONFIG, 0x00000001, 0x00000000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regCC_GC_EDC_CONFIG, 0x00000001, 0x00000000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGCMC_VM_MX_L1_TLB_CNTL, 0x00000500, 0xe0000000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGCMC_VM_SYSTEM_APERTURE_LOW_ADDR, 0x00000001, 0xe0000000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGCMC_VM_SYSTEM_APERTURE_HIGH_ADDR, 0x00000000, 0xe0000000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGCMC_VM_LOCAL_FB_ADDRESS_START, 0x00000000, 0xe0000000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGCMC_VM_LOCAL_FB_ADDRESS_END, 0x000005ff, 0xe0000000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGCMC_VM_FB_LOCATION_BASE, 0x00006000, 0xe0000000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGCMC_VM_FB_LOCATION_TOP, 0x000065ff, 0xe0000000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGCVM_CONTEXT0_CNTL, 0x00000000, 0xe0000000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGCVM_CONTEXT1_CNTL, 0x00000000, 0xe0000000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGCMC_VM_NB_TOP_OF_DRAM_SLOT1, 0xff800000, 0xe0000000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGCMC_VM_NB_LOWER_TOP_OF_DRAM2, 0x00000001, 0xe0000000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGCMC_VM_NB_UPPER_TOP_OF_DRAM2, 0x00000fff, 0xe0000000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGCVM_L2_PROTECTION_FAULT_CNTL, 0x00001ffc, 0xe0000000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGCMC_VM_MX_L1_TLB_CNTL, 0x00000551, 0xe0000000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGCVM_L2_CNTL, 0x00080603, 0xe0000000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGCVM_L2_CNTL2, 0x00000003, 0xe0000000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGCVM_L2_CNTL3, 0x00100003, 0xe0000000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGCVM_L2_CNTL5, 0x00003fe0, 0xe0000000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGCVM_CONTEXT0_CNTL, 0x00000001, 0xe0000000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGCVM_L2_CONTEXT0_PER_PFVF_PTE_CACHE_FRAGMENT_SIZES, 0x00000c00, 0xe0000000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGCVM_CONTEXT1_CNTL, 0x00000001, 0xe0000000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGCVM_L2_CONTEXT1_PER_PFVF_PTE_CACHE_FRAGMENT_SIZES, 0x00000c00, 0xe0000000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGB_ADDR_CONFIG, 0x00000444, 0xe0000000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGL2_PIPE_STEER_0, 0x54105410, 0xe0000000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGL2_PIPE_STEER_2, 0x76323276, 0xe0000000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGB_ADDR_CONFIG, 0x00000244, 0xe0000000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGCUTCL2_HARVEST_BYPASS_GROUPS, 0x00000006, 0xe0000000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGCMC_VM_APT_CNTL, 0x0000000c, 0xe0000000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGCMC_VM_AGP_BASE, 0x00000000, 0xe0000000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGCMC_VM_AGP_BOT, 0x00000002, 0xe0000000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGCMC_VM_AGP_TOP, 0x00000000, 0xe0000000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGCVM_L2_PROTECTION_FAULT_CNTL2, 0x00020000, 0xe0000000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regSDMA0_UCODE_SELFLOAD_CONTROL, 0x00000210, 0xe0000000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regSDMA1_UCODE_SELFLOAD_CONTROL, 0x00000210, 0xe0000000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regCPC_PSP_DEBUG, CPC_PSP_DEBUG__GPA_OVERRIDE_MASK, 0xe0000000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regCPG_PSP_DEBUG, CPG_PSP_DEBUG__GPA_OVERRIDE_MASK, 0xe0000000),
+};
+
+static void program_rlc_ram_register_setting(struct amdgpu_device *adev,
+ const struct imu_rlc_ram_golden *regs,
+ const u32 array_size)
+{
+ const struct imu_rlc_ram_golden *entry;
+ u32 reg, data;
+ int i;
+
+ for (i = 0; i < array_size; ++i) {
+ entry = &regs[i];
+ reg = adev->reg_offset[entry->hwip][entry->instance][entry->segment] + entry->reg;
+ reg |= entry->addr_mask;
+
+ data = entry->data;
+ if (entry->reg == regGCMC_VM_AGP_BASE)
+ data = 0x00ffffff;
+ else if (entry->reg == regGCMC_VM_AGP_TOP)
+ data = 0x0;
+ else if (entry->reg == regGCMC_VM_FB_LOCATION_BASE)
+ data = adev->gmc.vram_start >> 24;
+ else if (entry->reg == regGCMC_VM_FB_LOCATION_TOP)
+ data = adev->gmc.vram_end >> 24;
+
+ WREG32_SOC15(GC, 0, regGFX_IMU_RLC_RAM_ADDR_HIGH, 0);
+ WREG32_SOC15(GC, 0, regGFX_IMU_RLC_RAM_ADDR_LOW, reg);
+ WREG32_SOC15(GC, 0, regGFX_IMU_RLC_RAM_DATA, data);
+ }
+ //Indicate the latest entry
+ WREG32_SOC15(GC, 0, regGFX_IMU_RLC_RAM_ADDR_HIGH, 0);
+ WREG32_SOC15(GC, 0, regGFX_IMU_RLC_RAM_ADDR_LOW, 0);
+ WREG32_SOC15(GC, 0, regGFX_IMU_RLC_RAM_DATA, 0);
+}
+
+void imu_v11_0_3_program_rlc_ram(struct amdgpu_device *adev)
+{
+ program_rlc_ram_register_setting(adev,
+ imu_rlc_ram_golden_11_0_3,
+ (const u32)ARRAY_SIZE(imu_rlc_ram_golden_11_0_3));
+}
diff --git a/drivers/gpu/drm/amd/amdgpu/imu_v11_0_3.h b/drivers/gpu/drm/amd/amdgpu/imu_v11_0_3.h
new file mode 100644
index 000000000000..702be568f26b
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/imu_v11_0_3.h
@@ -0,0 +1,29 @@
+/*
+ * Copyright 2022 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#ifndef __IMU_V11_0_3_H__
+#define __IMU_V11_0_3_H__
+
+void imu_v11_0_3_program_rlc_ram(struct amdgpu_device *adev);
+
+#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/imu_v12_0.c b/drivers/gpu/drm/amd/amdgpu/imu_v12_0.c
new file mode 100644
index 000000000000..58cd87db8061
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/imu_v12_0.c
@@ -0,0 +1,406 @@
+/*
+ * Copyright 2023 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#include <linux/firmware.h>
+#include "amdgpu.h"
+#include "amdgpu_imu.h"
+#include "amdgpu_dpm.h"
+
+#include "imu_v12_0.h"
+
+#include "gc/gc_12_0_0_offset.h"
+#include "gc/gc_12_0_0_sh_mask.h"
+#include "mmhub/mmhub_4_1_0_offset.h"
+
+MODULE_FIRMWARE("amdgpu/gc_12_0_0_imu.bin");
+MODULE_FIRMWARE("amdgpu/gc_12_0_1_imu.bin");
+MODULE_FIRMWARE("amdgpu/gc_12_0_1_imu_kicker.bin");
+
+#define TRANSFER_RAM_MASK 0x001c0000
+
+static int imu_v12_0_init_microcode(struct amdgpu_device *adev)
+{
+ char ucode_prefix[30];
+ int err;
+ const struct imu_firmware_header_v1_0 *imu_hdr;
+ struct amdgpu_firmware_info *info = NULL;
+
+ DRM_DEBUG("\n");
+
+ amdgpu_ucode_ip_version_decode(adev, GC_HWIP, ucode_prefix, sizeof(ucode_prefix));
+ if (amdgpu_is_kicker_fw(adev))
+ err = amdgpu_ucode_request(adev, &adev->gfx.imu_fw, AMDGPU_UCODE_REQUIRED,
+ "amdgpu/%s_imu_kicker.bin", ucode_prefix);
+ else
+ err = amdgpu_ucode_request(adev, &adev->gfx.imu_fw, AMDGPU_UCODE_REQUIRED,
+ "amdgpu/%s_imu.bin", ucode_prefix);
+ if (err)
+ goto out;
+
+ imu_hdr = (const struct imu_firmware_header_v1_0 *)adev->gfx.imu_fw->data;
+ adev->gfx.imu_fw_version = le32_to_cpu(imu_hdr->header.ucode_version);
+
+ if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
+ info = &adev->firmware.ucode[AMDGPU_UCODE_ID_IMU_I];
+ info->ucode_id = AMDGPU_UCODE_ID_IMU_I;
+ info->fw = adev->gfx.imu_fw;
+ adev->firmware.fw_size +=
+ ALIGN(le32_to_cpu(imu_hdr->imu_iram_ucode_size_bytes), PAGE_SIZE);
+ info = &adev->firmware.ucode[AMDGPU_UCODE_ID_IMU_D];
+ info->ucode_id = AMDGPU_UCODE_ID_IMU_D;
+ info->fw = adev->gfx.imu_fw;
+ adev->firmware.fw_size +=
+ ALIGN(le32_to_cpu(imu_hdr->imu_dram_ucode_size_bytes), PAGE_SIZE);
+ }
+
+out:
+ if (err) {
+ dev_err(adev->dev,
+ "gfx12: Failed to load firmware \"%s_imu.bin\"\n",
+ ucode_prefix);
+ amdgpu_ucode_release(&adev->gfx.imu_fw);
+ }
+
+ return err;
+}
+
+static int imu_v12_0_load_microcode(struct amdgpu_device *adev)
+{
+ const struct imu_firmware_header_v1_0 *hdr;
+ const __le32 *fw_data;
+ unsigned i, fw_size;
+
+ if (!adev->gfx.imu_fw)
+ return -EINVAL;
+
+ hdr = (const struct imu_firmware_header_v1_0 *)adev->gfx.imu_fw->data;
+
+ fw_data = (const __le32 *)(adev->gfx.imu_fw->data +
+ le32_to_cpu(hdr->header.ucode_array_offset_bytes));
+ fw_size = le32_to_cpu(hdr->imu_iram_ucode_size_bytes) / 4;
+
+ WREG32_SOC15(GC, 0, regGFX_IMU_I_RAM_ADDR, 0);
+
+ for (i = 0; i < fw_size; i++)
+ WREG32_SOC15(GC, 0, regGFX_IMU_I_RAM_DATA, le32_to_cpup(fw_data++));
+
+ WREG32_SOC15(GC, 0, regGFX_IMU_I_RAM_ADDR, adev->gfx.imu_fw_version);
+
+ fw_data = (const __le32 *)(adev->gfx.imu_fw->data +
+ le32_to_cpu(hdr->header.ucode_array_offset_bytes) +
+ le32_to_cpu(hdr->imu_iram_ucode_size_bytes));
+ fw_size = le32_to_cpu(hdr->imu_dram_ucode_size_bytes) / 4;
+
+ WREG32_SOC15(GC, 0, regGFX_IMU_D_RAM_ADDR, 0);
+
+ for (i = 0; i < fw_size; i++)
+ WREG32_SOC15(GC, 0, regGFX_IMU_D_RAM_DATA, le32_to_cpup(fw_data++));
+
+ WREG32_SOC15(GC, 0, regGFX_IMU_D_RAM_ADDR, adev->gfx.imu_fw_version);
+
+ return 0;
+}
+
+static int imu_v12_0_wait_for_reset_status(struct amdgpu_device *adev)
+{
+ u32 imu_reg_val = 0;
+ int i;
+
+ for (i = 0; i < adev->usec_timeout; i++) {
+ imu_reg_val = RREG32_SOC15(GC, 0, regGFX_IMU_GFX_RESET_CTRL);
+ if ((imu_reg_val & 0x1f) == 0x1f)
+ break;
+ udelay(1);
+ }
+
+ if (i >= adev->usec_timeout) {
+ dev_err(adev->dev, "init imu: IMU start timeout\n");
+ return -ETIMEDOUT;
+ }
+
+ return 0;
+}
+
+static void imu_v12_0_setup(struct amdgpu_device *adev)
+{
+ u32 imu_reg_val;
+
+ WREG32_SOC15(GC, 0, regGFX_IMU_C2PMSG_ACCESS_CTRL0, 0xffffff);
+ WREG32_SOC15(GC, 0, regGFX_IMU_C2PMSG_ACCESS_CTRL1, 0xffff);
+
+ if (adev->gfx.imu.mode == DEBUG_MODE) {
+ imu_reg_val = RREG32_SOC15(GC, 0, regGFX_IMU_C2PMSG_16);
+ imu_reg_val |= 0x1;
+ WREG32_SOC15(GC, 0, regGFX_IMU_C2PMSG_16, imu_reg_val);
+
+ imu_reg_val = RREG32_SOC15(GC, 0, regGFX_IMU_SCRATCH_10);
+ imu_reg_val |= 0x20010007;
+ WREG32_SOC15(GC, 0, regGFX_IMU_SCRATCH_10, imu_reg_val);
+
+ }
+}
+
+static int imu_v12_0_start(struct amdgpu_device *adev)
+{
+ u32 imu_reg_val;
+
+ imu_reg_val = RREG32_SOC15(GC, 0, regGFX_IMU_CORE_CTRL);
+ imu_reg_val &= 0xfffffffe;
+ WREG32_SOC15(GC, 0, regGFX_IMU_CORE_CTRL, imu_reg_val);
+
+ if (adev->flags & AMD_IS_APU)
+ amdgpu_dpm_set_gfx_power_up_by_imu(adev);
+
+ return imu_v12_0_wait_for_reset_status(adev);
+}
+
+static const struct imu_rlc_ram_golden imu_rlc_ram_golden_12_0_1[] = {
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regCH_PIPE_STEER, 0x1e4, 0x1c0000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGL1X_PIPE_STEER, 0x1e4, 0x1c0000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGL1_PIPE_STEER, 0x1e4, 0x1c0000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGL2_PIPE_STEER_0, 0x13571357, 0x1c0000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGL2_PIPE_STEER_1, 0x64206420, 0x1c0000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGL2_PIPE_STEER_2, 0x2460246, 0x1c0000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGL2_PIPE_STEER_3, 0x75317531, 0x1c0000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGL2C_CTRL3, 0xc0d41183, 0x1c0000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regSDMA0_CHICKEN_BITS, 0x507d1c0, 0x1c0000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regSDMA1_CHICKEN_BITS, 0x507d1c0, 0x1c0000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regCP_RB_WPTR_POLL_CNTL, 0x600100, 0x1c0000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGC_EA_CPWD_SDP_CREDITS, 0x3f7fff, 0x1c0000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGC_EA_SE_SDP_CREDITS, 0x3f7ebf, 0x1c0000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGC_EA_CPWD_SDP_TAG_RESERVE0, 0x2e00000, 0x1c0000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGC_EA_CPWD_SDP_TAG_RESERVE1, 0x1a078, 0x1c0000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGC_EA_CPWD_SDP_TAG_RESERVE2, 0x0, 0x1c0000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGC_EA_SE_SDP_TAG_RESERVE0, 0x0, 0x1c0000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGC_EA_SE_SDP_TAG_RESERVE1, 0x12030, 0x1c0000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGC_EA_SE_SDP_TAG_RESERVE2, 0x0, 0x1c0000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGC_EA_CPWD_SDP_VCC_RESERVE0, 0x19041000, 0x1c0000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGC_EA_CPWD_SDP_VCC_RESERVE1, 0x80000000, 0x1c0000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGC_EA_SE_SDP_VCC_RESERVE0, 0x1e080000, 0x1c0000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGC_EA_SE_SDP_VCC_RESERVE1, 0x80000000, 0x1c0000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGC_EA_CPWD_SDP_PRIORITY, 0x880, 0x1c0000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGC_EA_SE_SDP_PRIORITY, 0x8880, 0x1c0000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGC_EA_CPWD_SDP_ARB_FINAL, 0x17, 0x1c0000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGC_EA_SE_SDP_ARB_FINAL, 0x77, 0x1c0000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGC_EA_CPWD_SDP_ENABLE, 0x00000001, 0x1c0000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGC_EA_SE_SDP_ENABLE, 0x00000001, 0x1c0000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGCVM_L2_PROTECTION_FAULT_CNTL2, 0x20000, 0x1c0000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGCMC_VM_APT_CNTL, 0x0c, 0x1c0000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGCMC_VM_CACHEABLE_DRAM_ADDRESS_END, 0xfffff, 0x1c0000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGC_EA_CPWD_MISC, 0x0091, 0x1c0000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGC_EA_SE_MISC, 0x0091, 0x1c0000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGRBM_GFX_INDEX, 0xe0000000, 0x1c0000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGCR_GENERAL_CNTL, 0x00008500, 0x1c0000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regPA_CL_ENHANCE, 0x00880007, 0x1c0000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regTD_CNTL, 0x00000001, 0x1c0000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGRBM_GFX_INDEX, 0x00000000, 0x1c0000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regRMI_GENERAL_CNTL, 0x01e00000, 0x1c0000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGRBM_GFX_INDEX, 0x00000001, 0x1c0000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regRMI_GENERAL_CNTL, 0x01e00000, 0x1c0000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGRBM_GFX_INDEX, 0x00000100, 0x1c0000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regRMI_GENERAL_CNTL, 0x01e00000, 0x1c0000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGRBM_GFX_INDEX, 0x00000101, 0x1c0000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regRMI_GENERAL_CNTL, 0x01e00000, 0x1c0000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGRBM_GFX_INDEX, 0xe0000000, 0x1c0000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGB_ADDR_CONFIG, 0x08200545, 0x1c0000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGRBMH_CP_PERFMON_CNTL, 0x00000000, 0x1c0000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regCB_PERFCOUNTER0_SELECT1, 0x000fffff, 0x1c0000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regCP_DEBUG_2, 0x00020000, 0x1c0000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regCP_CPC_DEBUG, 0x00500010, 0x1c0000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGCMC_VM_MX_L1_TLB_CNTL, 0x00000500, 0x1c0000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGCMC_VM_SYSTEM_APERTURE_LOW_ADDR, 0x00000001, 0x1c0000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGCMC_VM_SYSTEM_APERTURE_HIGH_ADDR, 0x00000000, 0x1c0000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGCMC_VM_LOCAL_FB_ADDRESS_START, 0x00000000, 0x1c0000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGCMC_VM_LOCAL_FB_ADDRESS_END, 0x0000000f, 0x1c0000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGCMC_VM_FB_LOCATION_BASE, 0x00006000, 0x1c0000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGCMC_VM_FB_LOCATION_TOP, 0x0000600f, 0x1c0000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGCVM_CONTEXT0_CNTL, 0x00000000, 0x1c0000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGCVM_CONTEXT1_CNTL, 0x00000000, 0x1c0000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGCMC_VM_NB_TOP_OF_DRAM_SLOT1, 0xff800000, 0xe0000000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGCMC_VM_NB_LOWER_TOP_OF_DRAM2, 0x00000001, 0x1c0000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGCMC_VM_NB_UPPER_TOP_OF_DRAM2, 0x0000ffff, 0x1c0000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGCMC_VM_AGP_BASE, 0x00000000, 0x1c0000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGCMC_VM_AGP_BOT, 0x00000002, 0x1c0000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGCMC_VM_AGP_TOP, 0x00000000, 0x1c0000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGCVM_L2_PROTECTION_FAULT_CNTL, 0x00001ffc, 0x1c0000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGCMC_VM_MX_L1_TLB_CNTL, 0x00000551, 0x1c0000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGCVM_L2_CNTL, 0x00080603, 0x1c0000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGCVM_L2_CNTL2, 0x00000003, 0x1c0000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGCVM_L2_CNTL3, 0x00100003, 0x1c0000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGCVM_L2_CNTL5, 0x00003fe0, 0x1c0000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGCMC_VM_SYSTEM_APERTURE_LOW_ADDR, 0x0003d000, 0x1c0000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGCMC_VM_SYSTEM_APERTURE_HIGH_ADDR, 0x0003d7ff, 0x1c0000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGCMC_VM_SYSTEM_APERTURE_DEFAULT_ADDR_LSB, 0, 0x1c0000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGCMC_VM_SYSTEM_APERTURE_DEFAULT_ADDR_MSB, 0, 0x1c0000)
+};
+
+static void program_imu_rlc_ram_old(struct amdgpu_device *adev,
+ const struct imu_rlc_ram_golden *regs,
+ const u32 array_size)
+{
+ const struct imu_rlc_ram_golden *entry;
+ u32 reg, data;
+ int i;
+
+ for (i = 0; i < array_size; ++i) {
+ entry = &regs[i];
+ reg = adev->reg_offset[entry->hwip][entry->instance][entry->segment] + entry->reg;
+ reg |= entry->addr_mask;
+ data = entry->data;
+ if (entry->reg == regGCMC_VM_AGP_BASE)
+ data = 0x00ffffff;
+ else if (entry->reg == regGCMC_VM_AGP_TOP)
+ data = 0x0;
+ else if (entry->reg == regGCMC_VM_FB_LOCATION_BASE)
+ data = adev->gmc.vram_start >> 24;
+ else if (entry->reg == regGCMC_VM_FB_LOCATION_TOP)
+ data = adev->gmc.vram_end >> 24;
+
+ WREG32_SOC15(GC, 0, regGFX_IMU_RLC_RAM_ADDR_HIGH, 0);
+ WREG32_SOC15(GC, 0, regGFX_IMU_RLC_RAM_ADDR_LOW, reg);
+ WREG32_SOC15(GC, 0, regGFX_IMU_RLC_RAM_DATA, data);
+ }
+}
+
+static u32 imu_v12_0_grbm_gfx_index_remap(struct amdgpu_device *adev,
+ u32 data, bool high)
+{
+ u32 val, inst_index;
+
+ inst_index = REG_GET_FIELD(data, GRBM_GFX_INDEX, INSTANCE_INDEX);
+
+ if (high)
+ val = inst_index >> 5;
+ else
+ val = REG_GET_FIELD(data, GRBM_GFX_INDEX, SE_BROADCAST_WRITES) << 18 |
+ REG_GET_FIELD(data, GRBM_GFX_INDEX, SA_BROADCAST_WRITES) << 19 |
+ REG_GET_FIELD(data, GRBM_GFX_INDEX, INSTANCE_BROADCAST_WRITES) << 20 |
+ REG_GET_FIELD(data, GRBM_GFX_INDEX, SE_INDEX) << 21 |
+ REG_GET_FIELD(data, GRBM_GFX_INDEX, SA_INDEX) << 25 |
+ (inst_index & 0x1f);
+
+ return val;
+}
+
+static u32 imu_v12_init_gfxhub_settings(struct amdgpu_device *adev,
+ u32 reg, u32 data)
+{
+ if (reg == SOC15_REG_OFFSET(GC, 0, regGCMC_VM_FB_LOCATION_BASE))
+ return RREG32_SOC15(MMHUB, 0, regMMMC_VM_FB_LOCATION_BASE);
+ else if (reg == SOC15_REG_OFFSET(GC, 0, regGCMC_VM_FB_LOCATION_TOP))
+ return RREG32_SOC15(MMHUB, 0, regMMMC_VM_FB_LOCATION_TOP);
+ else if (reg == SOC15_REG_OFFSET(GC, 0, regGCMC_VM_FB_OFFSET))
+ return RREG32_SOC15(MMHUB, 0, regMMMC_VM_FB_OFFSET);
+ else if (reg == SOC15_REG_OFFSET(GC, 0, regGCMC_VM_AGP_BASE))
+ return RREG32_SOC15(MMHUB, 0, regMMMC_VM_AGP_BASE);
+ else if (reg == SOC15_REG_OFFSET(GC, 0, regGCMC_VM_AGP_BOT))
+ return RREG32_SOC15(MMHUB, 0, regMMMC_VM_AGP_BOT);
+ else if (reg == SOC15_REG_OFFSET(GC, 0, regGCMC_VM_AGP_TOP))
+ return RREG32_SOC15(MMHUB, 0, regMMMC_VM_AGP_TOP);
+ else if (reg == SOC15_REG_OFFSET(GC, 0, regGCMC_VM_MX_L1_TLB_CNTL))
+ return RREG32_SOC15(MMHUB, 0, regMMMC_VM_MX_L1_TLB_CNTL);
+ else if (reg == SOC15_REG_OFFSET(GC, 0, regGCMC_VM_SYSTEM_APERTURE_LOW_ADDR))
+ return RREG32_SOC15(MMHUB, 0, regMMMC_VM_SYSTEM_APERTURE_LOW_ADDR);
+ else if (reg == SOC15_REG_OFFSET(GC, 0, regGCMC_VM_SYSTEM_APERTURE_HIGH_ADDR))
+ return RREG32_SOC15(MMHUB, 0, regMMMC_VM_SYSTEM_APERTURE_HIGH_ADDR);
+ else if (reg == SOC15_REG_OFFSET(GC, 0, regGCMC_VM_LOCAL_FB_ADDRESS_START))
+ return RREG32_SOC15(MMHUB, 0, regMMMC_VM_LOCAL_FB_ADDRESS_START);
+ else if (reg == SOC15_REG_OFFSET(GC, 0, regGCMC_VM_LOCAL_FB_ADDRESS_END))
+ return RREG32_SOC15(MMHUB, 0, regMMMC_VM_LOCAL_FB_ADDRESS_END);
+ else if (reg == SOC15_REG_OFFSET(GC, 0, regGCMC_VM_LOCAL_SYSMEM_ADDRESS_START))
+ return RREG32_SOC15(MMHUB, 0, regMMMC_VM_LOCAL_SYSMEM_ADDRESS_START);
+ else if (reg == SOC15_REG_OFFSET(GC, 0, regGCMC_VM_LOCAL_SYSMEM_ADDRESS_END))
+ return RREG32_SOC15(MMHUB, 0, regMMMC_VM_LOCAL_SYSMEM_ADDRESS_END);
+ else if (reg == SOC15_REG_OFFSET(GC, 0, regGCMC_VM_SYSTEM_APERTURE_DEFAULT_ADDR_LSB))
+ return RREG32_SOC15(MMHUB, 0, regMMMC_VM_SYSTEM_APERTURE_DEFAULT_ADDR_LSB);
+ else if (reg == SOC15_REG_OFFSET(GC, 0, regGCMC_VM_SYSTEM_APERTURE_DEFAULT_ADDR_MSB))
+ return RREG32_SOC15(MMHUB, 0, regMMMC_VM_SYSTEM_APERTURE_DEFAULT_ADDR_MSB);
+ else
+ return data;
+}
+
+static void program_imu_rlc_ram(struct amdgpu_device *adev,
+ const u32 *regs,
+ const u32 array_size)
+{
+ u32 reg, data, val_h = 0, val_l = TRANSFER_RAM_MASK;
+ int i;
+
+ if (array_size % 3)
+ return;
+
+ for (i = 0; i < array_size; i += 3) {
+ reg = regs[i + 0];
+ data = regs[i + 2];
+ data = imu_v12_init_gfxhub_settings(adev, reg, data);
+ if (reg == SOC15_REG_OFFSET(GC, 0, regGRBM_GFX_INDEX)) {
+ val_l = imu_v12_0_grbm_gfx_index_remap(adev, data, false);
+ val_h = imu_v12_0_grbm_gfx_index_remap(adev, data, true);
+ } else {
+ WREG32_SOC15(GC, 0, regGFX_IMU_RLC_RAM_ADDR_HIGH, val_h);
+ WREG32_SOC15(GC, 0, regGFX_IMU_RLC_RAM_ADDR_LOW, reg | val_l);
+ WREG32_SOC15(GC, 0, regGFX_IMU_RLC_RAM_DATA, data);
+ }
+ }
+}
+
+static void imu_v12_0_program_rlc_ram(struct amdgpu_device *adev)
+{
+ u32 reg_data, size = 0;
+ const u32 *data = NULL;
+ int r = -EINVAL;
+
+ WREG32_SOC15(GC, 0, regGFX_IMU_RLC_RAM_INDEX, 0x2);
+
+ switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
+ case IP_VERSION(12, 0, 0):
+ case IP_VERSION(12, 0, 1):
+ if (!r)
+ program_imu_rlc_ram(adev, data, (const u32)size);
+ else
+ program_imu_rlc_ram_old(adev, imu_rlc_ram_golden_12_0_1,
+ (const u32)ARRAY_SIZE(imu_rlc_ram_golden_12_0_1));
+ break;
+ default:
+ BUG();
+ break;
+ }
+
+ //Indicate the latest entry
+ WREG32_SOC15(GC, 0, regGFX_IMU_RLC_RAM_ADDR_HIGH, 0);
+ WREG32_SOC15(GC, 0, regGFX_IMU_RLC_RAM_ADDR_LOW, 0);
+ WREG32_SOC15(GC, 0, regGFX_IMU_RLC_RAM_DATA, 0);
+
+ reg_data = RREG32_SOC15(GC, 0, regGFX_IMU_RLC_RAM_INDEX);
+ reg_data |= GFX_IMU_RLC_RAM_INDEX__RAM_VALID_MASK;
+ WREG32_SOC15(GC, 0, regGFX_IMU_RLC_RAM_INDEX, reg_data);
+}
+
+const struct amdgpu_imu_funcs gfx_v12_0_imu_funcs = {
+ .init_microcode = imu_v12_0_init_microcode,
+ .load_microcode = imu_v12_0_load_microcode,
+ .setup_imu = imu_v12_0_setup,
+ .start_imu = imu_v12_0_start,
+ .program_rlc_ram = imu_v12_0_program_rlc_ram,
+ .wait_for_reset_status = imu_v12_0_wait_for_reset_status,
+};
diff --git a/drivers/gpu/drm/amd/amdgpu/imu_v12_0.h b/drivers/gpu/drm/amd/amdgpu/imu_v12_0.h
new file mode 100644
index 000000000000..a1f50cb1aeab
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/imu_v12_0.h
@@ -0,0 +1,30 @@
+/*
+ * Copyright 2021 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#ifndef __IMU_V12_0_H__
+#define __IMU_V12_0_H__
+
+extern const struct amdgpu_imu_funcs gfx_v12_0_imu_funcs;
+
+#endif
+
diff --git a/drivers/gpu/drm/amd/amdgpu/isp_v4_1_0.c b/drivers/gpu/drm/amd/amdgpu/isp_v4_1_0.c
new file mode 100644
index 000000000000..0027a639c7e6
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/isp_v4_1_0.c
@@ -0,0 +1,191 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright (C) 2024 Advanced Micro Devices, Inc. All rights reserved.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ */
+
+#include "amdgpu.h"
+#include "isp_v4_1_0.h"
+
+static const unsigned int isp_4_1_0_int_srcid[MAX_ISP410_INT_SRC] = {
+ ISP_4_1__SRCID__ISP_RINGBUFFER_WPT9,
+ ISP_4_1__SRCID__ISP_RINGBUFFER_WPT10,
+ ISP_4_1__SRCID__ISP_RINGBUFFER_WPT11,
+ ISP_4_1__SRCID__ISP_RINGBUFFER_WPT12,
+ ISP_4_1__SRCID__ISP_RINGBUFFER_WPT13,
+ ISP_4_1__SRCID__ISP_RINGBUFFER_WPT14,
+ ISP_4_1__SRCID__ISP_RINGBUFFER_WPT15,
+ ISP_4_1__SRCID__ISP_RINGBUFFER_WPT16
+};
+
+static int isp_v4_1_0_hw_init(struct amdgpu_isp *isp)
+{
+ struct amdgpu_device *adev = isp->adev;
+ int idx, int_idx, num_res, r;
+ u64 isp_base;
+
+ if (adev->rmmio_size == 0 || adev->rmmio_size < 0x5289)
+ return -EINVAL;
+
+ isp_base = adev->rmmio_base;
+
+ isp->isp_cell = kcalloc(3, sizeof(struct mfd_cell), GFP_KERNEL);
+ if (!isp->isp_cell) {
+ r = -ENOMEM;
+ drm_err(&adev->ddev,
+ "%s: isp mfd cell alloc failed\n", __func__);
+ goto failure;
+ }
+
+ num_res = MAX_ISP410_MEM_RES + MAX_ISP410_INT_SRC;
+ isp->isp_res = kcalloc(num_res, sizeof(struct resource),
+ GFP_KERNEL);
+ if (!isp->isp_res) {
+ r = -ENOMEM;
+ drm_err(&adev->ddev,
+ "%s: isp mfd res alloc failed\n", __func__);
+ goto failure;
+ }
+
+ isp->isp_pdata = kzalloc(sizeof(*isp->isp_pdata), GFP_KERNEL);
+ if (!isp->isp_pdata) {
+ r = -ENOMEM;
+ drm_err(&adev->ddev,
+ "%s: isp platform data alloc failed\n", __func__);
+ goto failure;
+ }
+
+ /* initialize isp platform data */
+ isp->isp_pdata->adev = (void *)adev;
+ isp->isp_pdata->asic_type = adev->asic_type;
+ isp->isp_pdata->base_rmmio_size = adev->rmmio_size;
+
+ isp->isp_res[0].name = "isp_4_1_0_reg";
+ isp->isp_res[0].flags = IORESOURCE_MEM;
+ isp->isp_res[0].start = isp_base;
+ isp->isp_res[0].end = isp_base + ISP_REGS_OFFSET_END;
+
+ isp->isp_res[1].name = "isp_4_1_phy0_reg";
+ isp->isp_res[1].flags = IORESOURCE_MEM;
+ isp->isp_res[1].start = isp_base + ISP410_PHY0_OFFSET;
+ isp->isp_res[1].end = isp_base + ISP410_PHY0_OFFSET + ISP410_PHY0_SIZE;
+
+ for (idx = MAX_ISP410_MEM_RES, int_idx = 0; idx < num_res; idx++, int_idx++) {
+ isp->isp_res[idx].name = "isp_4_1_0_irq";
+ isp->isp_res[idx].flags = IORESOURCE_IRQ;
+ isp->isp_res[idx].start =
+ amdgpu_irq_create_mapping(adev, isp_4_1_0_int_srcid[int_idx]);
+ isp->isp_res[idx].end =
+ isp->isp_res[idx].start;
+ }
+
+ isp->isp_cell[0].name = "amd_isp_capture";
+ isp->isp_cell[0].num_resources = num_res;
+ isp->isp_cell[0].resources = &isp->isp_res[0];
+ isp->isp_cell[0].platform_data = isp->isp_pdata;
+ isp->isp_cell[0].pdata_size = sizeof(struct isp_platform_data);
+
+ /* initialize isp i2c platform data */
+ isp->isp_i2c_res = kcalloc(1, sizeof(struct resource), GFP_KERNEL);
+ if (!isp->isp_i2c_res) {
+ r = -ENOMEM;
+ drm_err(&adev->ddev,
+ "%s: isp mfd res alloc failed\n", __func__);
+ goto failure;
+ }
+
+ isp->isp_i2c_res[0].name = "isp_i2c0_reg";
+ isp->isp_i2c_res[0].flags = IORESOURCE_MEM;
+ isp->isp_i2c_res[0].start = isp_base + ISP410_I2C0_OFFSET;
+ isp->isp_i2c_res[0].end = isp_base + ISP410_I2C0_OFFSET + ISP410_I2C0_SIZE;
+
+ isp->isp_cell[1].name = "amd_isp_i2c_designware";
+ isp->isp_cell[1].num_resources = 1;
+ isp->isp_cell[1].resources = &isp->isp_i2c_res[0];
+ isp->isp_cell[1].platform_data = isp->isp_pdata;
+ isp->isp_cell[1].pdata_size = sizeof(struct isp_platform_data);
+
+ /* initialize isp gpiochip platform data */
+ isp->isp_gpio_res = kcalloc(1, sizeof(struct resource), GFP_KERNEL);
+ if (!isp->isp_gpio_res) {
+ r = -ENOMEM;
+ drm_err(&adev->ddev,
+ "%s: isp gpio res alloc failed\n", __func__);
+ goto failure;
+ }
+
+ isp->isp_gpio_res[0].name = "isp_gpio_reg";
+ isp->isp_gpio_res[0].flags = IORESOURCE_MEM;
+ isp->isp_gpio_res[0].start = isp_base + ISP410_GPIO_SENSOR_OFFSET;
+ isp->isp_gpio_res[0].end = isp_base + ISP410_GPIO_SENSOR_OFFSET +
+ ISP410_GPIO_SENSOR_SIZE;
+
+ isp->isp_cell[2].name = "amdisp-pinctrl";
+ isp->isp_cell[2].num_resources = 1;
+ isp->isp_cell[2].resources = &isp->isp_gpio_res[0];
+ isp->isp_cell[2].platform_data = isp->isp_pdata;
+ isp->isp_cell[2].pdata_size = sizeof(struct isp_platform_data);
+
+ r = mfd_add_hotplug_devices(isp->parent, isp->isp_cell, 3);
+ if (r) {
+ drm_err(&adev->ddev,
+ "%s: add mfd hotplug device failed\n", __func__);
+ goto failure;
+ }
+
+ return 0;
+
+failure:
+
+ kfree(isp->isp_pdata);
+ kfree(isp->isp_res);
+ kfree(isp->isp_cell);
+ kfree(isp->isp_i2c_res);
+ kfree(isp->isp_gpio_res);
+
+ return r;
+}
+
+static int isp_v4_1_0_hw_fini(struct amdgpu_isp *isp)
+{
+ mfd_remove_devices(isp->parent);
+
+ kfree(isp->isp_res);
+ kfree(isp->isp_cell);
+ kfree(isp->isp_pdata);
+ kfree(isp->isp_i2c_res);
+ kfree(isp->isp_gpio_res);
+
+ return 0;
+}
+
+static const struct isp_funcs isp_v4_1_0_funcs = {
+ .hw_init = isp_v4_1_0_hw_init,
+ .hw_fini = isp_v4_1_0_hw_fini,
+};
+
+void isp_v4_1_0_set_isp_funcs(struct amdgpu_isp *isp)
+{
+ isp->funcs = &isp_v4_1_0_funcs;
+}
diff --git a/drivers/gpu/drm/amd/amdgpu/isp_v4_1_0.h b/drivers/gpu/drm/amd/amdgpu/isp_v4_1_0.h
new file mode 100644
index 000000000000..4d239198edd0
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/isp_v4_1_0.h
@@ -0,0 +1,50 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright (C) 2024 Advanced Micro Devices, Inc. All rights reserved.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ */
+
+#ifndef __ISP_V4_1_0_H__
+#define __ISP_V4_1_0_H__
+
+#include "amdgpu_isp.h"
+
+#include "ivsrcid/isp/irqsrcs_isp_4_1.h"
+
+#define MAX_ISP410_MEM_RES 2
+#define MAX_ISP410_SENSOR_RES 1
+#define MAX_ISP410_INT_SRC 8
+
+#define ISP410_PHY0_OFFSET 0x66700
+#define ISP410_PHY0_SIZE 0xD30
+
+#define ISP410_I2C0_OFFSET 0x66400
+#define ISP410_I2C0_SIZE 0x100
+
+#define ISP410_GPIO_SENSOR_OFFSET 0x6613C
+#define ISP410_GPIO_SENSOR_SIZE 0x54
+
+void isp_v4_1_0_set_isp_funcs(struct amdgpu_isp *isp);
+
+#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/isp_v4_1_1.c b/drivers/gpu/drm/amd/amdgpu/isp_v4_1_1.c
new file mode 100644
index 000000000000..4258d3e0b706
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/isp_v4_1_1.c
@@ -0,0 +1,377 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright (C) 2024 Advanced Micro Devices, Inc. All rights reserved.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ */
+
+#include <linux/gpio/machine.h>
+#include "amdgpu.h"
+#include "isp_v4_1_1.h"
+
+MODULE_FIRMWARE("amdgpu/isp_4_1_1.bin");
+
+#define ISP_PERFORMANCE_STATE_LOW 0
+#define ISP_PERFORMANCE_STATE_HIGH 1
+
+#define ISP_HIGH_PERFORMANC_XCLK 788
+#define ISP_HIGH_PERFORMANC_ICLK 788
+
+static const unsigned int isp_4_1_1_int_srcid[MAX_ISP411_INT_SRC] = {
+ ISP_4_1__SRCID__ISP_RINGBUFFER_WPT9,
+ ISP_4_1__SRCID__ISP_RINGBUFFER_WPT10,
+ ISP_4_1__SRCID__ISP_RINGBUFFER_WPT11,
+ ISP_4_1__SRCID__ISP_RINGBUFFER_WPT12,
+ ISP_4_1__SRCID__ISP_RINGBUFFER_WPT13,
+ ISP_4_1__SRCID__ISP_RINGBUFFER_WPT14,
+ ISP_4_1__SRCID__ISP_RINGBUFFER_WPT15,
+ ISP_4_1__SRCID__ISP_RINGBUFFER_WPT16
+};
+
+static struct gpiod_lookup_table isp_gpio_table = {
+ .dev_id = "amd_isp_capture",
+ .table = {
+ GPIO_LOOKUP("AMDI0030:00", 85, "enable_isp", GPIO_ACTIVE_HIGH),
+ { }
+ },
+};
+
+static struct gpiod_lookup_table isp_sensor_gpio_table = {
+ .dev_id = "i2c-ov05c10",
+ .table = {
+ GPIO_LOOKUP("amdisp-pinctrl", 0, "enable", GPIO_ACTIVE_HIGH),
+ { }
+ },
+};
+
+static int isp_poweroff(struct generic_pm_domain *genpd)
+{
+ struct amdgpu_isp *isp = container_of(genpd, struct amdgpu_isp, ispgpd);
+ struct amdgpu_device *adev = isp->adev;
+
+ return amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_ISP, true, 0);
+}
+
+static int isp_poweron(struct generic_pm_domain *genpd)
+{
+ struct amdgpu_isp *isp = container_of(genpd, struct amdgpu_isp, ispgpd);
+ struct amdgpu_device *adev = isp->adev;
+
+ return amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_ISP, false, 0);
+}
+
+static int isp_set_performance_state(struct generic_pm_domain *genpd,
+ unsigned int state)
+{
+ struct amdgpu_isp *isp = container_of(genpd, struct amdgpu_isp, ispgpd);
+ struct amdgpu_device *adev = isp->adev;
+ u32 iclk, xclk;
+ int ret;
+
+ switch (state) {
+ case ISP_PERFORMANCE_STATE_HIGH:
+ xclk = ISP_HIGH_PERFORMANC_XCLK;
+ iclk = ISP_HIGH_PERFORMANC_ICLK;
+ break;
+ case ISP_PERFORMANCE_STATE_LOW:
+ /* isp runs at default lowest clock-rate on power-on, do nothing */
+ return 0;
+ default:
+ return -EINVAL;
+ }
+
+ ret = amdgpu_dpm_set_soft_freq_range(adev, PP_ISPXCLK, xclk, 0);
+ if (ret) {
+ drm_err(&adev->ddev, "failed to set xclk %u to %u: %d\n",
+ xclk, state, ret);
+ return ret;
+ }
+
+ ret = amdgpu_dpm_set_soft_freq_range(adev, PP_ISPICLK, iclk, 0);
+ if (ret) {
+ drm_err(&adev->ddev, "failed to set iclk %u to %u: %d\n",
+ iclk, state, ret);
+ return ret;
+ }
+
+ return 0;
+}
+
+static int isp_genpd_add_device(struct device *dev, void *data)
+{
+ struct generic_pm_domain *gpd = data;
+ struct platform_device *pdev = container_of(dev, struct platform_device, dev);
+ struct amdgpu_isp *isp = container_of(gpd, struct amdgpu_isp, ispgpd);
+ struct amdgpu_device *adev = isp->adev;
+ int ret;
+
+ if (!pdev)
+ return -EINVAL;
+
+ if (!dev->type->name) {
+ drm_dbg(&adev->ddev, "Invalid device type to add\n");
+ goto exit;
+ }
+
+ if (strcmp(dev->type->name, "mfd_device")) {
+ drm_dbg(&adev->ddev, "Invalid isp mfd device %s to add\n", pdev->mfd_cell->name);
+ goto exit;
+ }
+
+ ret = pm_genpd_add_device(gpd, dev);
+ if (ret) {
+ drm_err(&adev->ddev, "Failed to add dev %s to genpd %d\n",
+ pdev->mfd_cell->name, ret);
+ return -ENODEV;
+ }
+
+exit:
+ /* Continue to add */
+ return 0;
+}
+
+static int isp_genpd_remove_device(struct device *dev, void *data)
+{
+ struct generic_pm_domain *gpd = data;
+ struct platform_device *pdev = container_of(dev, struct platform_device, dev);
+ struct amdgpu_isp *isp = container_of(gpd, struct amdgpu_isp, ispgpd);
+ struct amdgpu_device *adev = isp->adev;
+ int ret;
+
+ if (!pdev)
+ return -EINVAL;
+
+ if (!dev->type->name) {
+ drm_dbg(&adev->ddev, "Invalid device type to remove\n");
+ goto exit;
+ }
+
+ if (strcmp(dev->type->name, "mfd_device")) {
+ drm_dbg(&adev->ddev, "Invalid isp mfd device %s to remove\n",
+ pdev->mfd_cell->name);
+ goto exit;
+ }
+
+ ret = pm_genpd_remove_device(dev);
+ if (ret) {
+ drm_err(&adev->ddev, "Failed to remove dev from genpd %d\n", ret);
+ return -ENODEV;
+ }
+
+exit:
+ /* Continue to remove */
+ return 0;
+}
+
+static int isp_v4_1_1_hw_init(struct amdgpu_isp *isp)
+{
+ const struct software_node *amd_camera_node, *isp4_node;
+ struct amdgpu_device *adev = isp->adev;
+ struct acpi_device *acpi_dev;
+ int idx, int_idx, num_res, r;
+ u64 isp_base;
+
+ if (adev->rmmio_size == 0 || adev->rmmio_size < 0x5289)
+ return -EINVAL;
+
+ r = amdgpu_acpi_get_isp4_dev(&acpi_dev);
+ if (r) {
+ drm_dbg(&adev->ddev, "Invalid isp platform detected (%d)", r);
+ /* allow GPU init to progress */
+ return 0;
+ }
+
+ /* add GPIO resources required for OMNI5C10 sensor */
+ if (!strcmp("OMNI5C10", acpi_device_hid(acpi_dev))) {
+ gpiod_add_lookup_table(&isp_gpio_table);
+ gpiod_add_lookup_table(&isp_sensor_gpio_table);
+ }
+
+ isp_base = adev->rmmio_base;
+
+ isp->ispgpd.name = "ISP_v_4_1_1";
+ isp->ispgpd.power_off = isp_poweroff;
+ isp->ispgpd.power_on = isp_poweron;
+ isp->ispgpd.set_performance_state = isp_set_performance_state;
+
+ r = pm_genpd_init(&isp->ispgpd, NULL, true);
+ if (r) {
+ drm_err(&adev->ddev, "failed to initialize genpd (%d)\n", r);
+ return -EINVAL;
+ }
+
+ isp->isp_cell = kcalloc(3, sizeof(struct mfd_cell), GFP_KERNEL);
+ if (!isp->isp_cell) {
+ r = -ENOMEM;
+ drm_err(&adev->ddev, "isp mfd cell alloc failed (%d)\n", r);
+ goto failure;
+ }
+
+ num_res = MAX_ISP411_MEM_RES + MAX_ISP411_INT_SRC;
+
+ isp->isp_res = kcalloc(num_res, sizeof(struct resource),
+ GFP_KERNEL);
+ if (!isp->isp_res) {
+ r = -ENOMEM;
+ drm_err(&adev->ddev, "isp mfd resource alloc failed (%d)\n", r);
+ goto failure;
+ }
+
+ isp->isp_pdata = kzalloc(sizeof(*isp->isp_pdata), GFP_KERNEL);
+ if (!isp->isp_pdata) {
+ r = -ENOMEM;
+ drm_err(&adev->ddev, "isp platform data alloc failed (%d)\n", r);
+ goto failure;
+ }
+
+ amd_camera_node = (const struct software_node *)acpi_dev->driver_data;
+ isp4_node = software_node_find_by_name(amd_camera_node, "isp4");
+
+ /* initialize isp platform data */
+ isp->isp_pdata->adev = (void *)adev;
+ isp->isp_pdata->asic_type = adev->asic_type;
+ isp->isp_pdata->base_rmmio_size = adev->rmmio_size;
+
+ isp->isp_res[0].name = "isp_4_1_1_reg";
+ isp->isp_res[0].flags = IORESOURCE_MEM;
+ isp->isp_res[0].start = isp_base;
+ isp->isp_res[0].end = isp_base + ISP_REGS_OFFSET_END;
+
+ isp->isp_res[1].name = "isp_4_1_1_phy0_reg";
+ isp->isp_res[1].flags = IORESOURCE_MEM;
+ isp->isp_res[1].start = isp_base + ISP411_PHY0_OFFSET;
+ isp->isp_res[1].end = isp_base + ISP411_PHY0_OFFSET + ISP411_PHY0_SIZE;
+
+ for (idx = MAX_ISP411_MEM_RES, int_idx = 0; idx < num_res; idx++, int_idx++) {
+ isp->isp_res[idx].name = "isp_4_1_1_irq";
+ isp->isp_res[idx].flags = IORESOURCE_IRQ;
+ isp->isp_res[idx].start =
+ amdgpu_irq_create_mapping(adev, isp_4_1_1_int_srcid[int_idx]);
+ isp->isp_res[idx].end =
+ isp->isp_res[idx].start;
+ }
+
+ isp->isp_cell[0].name = "amd_isp_capture";
+ isp->isp_cell[0].num_resources = num_res;
+ isp->isp_cell[0].resources = &isp->isp_res[0];
+ isp->isp_cell[0].platform_data = isp->isp_pdata;
+ isp->isp_cell[0].swnode = isp4_node;
+ isp->isp_cell[0].pdata_size = sizeof(struct isp_platform_data);
+
+ /* initialize isp i2c platform data */
+ isp->isp_i2c_res = kcalloc(1, sizeof(struct resource), GFP_KERNEL);
+ if (!isp->isp_i2c_res) {
+ r = -ENOMEM;
+ drm_err(&adev->ddev, "isp mfd res alloc failed (%d)\n", r);
+ goto failure;
+ }
+
+ isp->isp_i2c_res[0].name = "isp_i2c0_reg";
+ isp->isp_i2c_res[0].flags = IORESOURCE_MEM;
+ isp->isp_i2c_res[0].start = isp_base + ISP411_I2C0_OFFSET;
+ isp->isp_i2c_res[0].end = isp_base + ISP411_I2C0_OFFSET + ISP411_I2C0_SIZE;
+
+ isp->isp_cell[1].name = "amd_isp_i2c_designware";
+ isp->isp_cell[1].num_resources = 1;
+ isp->isp_cell[1].resources = &isp->isp_i2c_res[0];
+ isp->isp_cell[1].platform_data = isp->isp_pdata;
+ isp->isp_cell[1].pdata_size = sizeof(struct isp_platform_data);
+
+ /* initialize isp gpiochip platform data */
+ isp->isp_gpio_res = kcalloc(1, sizeof(struct resource), GFP_KERNEL);
+ if (!isp->isp_gpio_res) {
+ r = -ENOMEM;
+ drm_err(&adev->ddev, "isp gpio resource alloc failed (%d)\n", r);
+ goto failure;
+ }
+
+ isp->isp_gpio_res[0].name = "isp_gpio_reg";
+ isp->isp_gpio_res[0].flags = IORESOURCE_MEM;
+ isp->isp_gpio_res[0].start = isp_base + ISP411_GPIO_SENSOR_OFFSET;
+ isp->isp_gpio_res[0].end = isp_base + ISP411_GPIO_SENSOR_OFFSET +
+ ISP411_GPIO_SENSOR_SIZE;
+
+ isp->isp_cell[2].name = "amdisp-pinctrl";
+ isp->isp_cell[2].num_resources = 1;
+ isp->isp_cell[2].resources = &isp->isp_gpio_res[0];
+ isp->isp_cell[2].platform_data = isp->isp_pdata;
+ isp->isp_cell[2].pdata_size = sizeof(struct isp_platform_data);
+
+ /* add only amd_isp_capture and amd_isp_i2c_designware to genpd */
+ r = mfd_add_hotplug_devices(isp->parent, isp->isp_cell, 2);
+ if (r) {
+ drm_err(&adev->ddev, "add mfd hotplug device failed (%d)\n", r);
+ goto failure;
+ }
+
+ r = device_for_each_child(isp->parent, &isp->ispgpd,
+ isp_genpd_add_device);
+ if (r) {
+ drm_err(&adev->ddev, "failed to add devices to genpd (%d)\n", r);
+ goto failure;
+ }
+
+ r = mfd_add_hotplug_devices(isp->parent, &isp->isp_cell[2], 1);
+ if (r) {
+ drm_err(&adev->ddev, "add pinctl hotplug device failed (%d)\n", r);
+ goto failure;
+ }
+
+ return 0;
+
+failure:
+
+ kfree(isp->isp_pdata);
+ kfree(isp->isp_res);
+ kfree(isp->isp_cell);
+ kfree(isp->isp_i2c_res);
+ kfree(isp->isp_gpio_res);
+
+ return r;
+}
+
+static int isp_v4_1_1_hw_fini(struct amdgpu_isp *isp)
+{
+ device_for_each_child(isp->parent, NULL,
+ isp_genpd_remove_device);
+
+ mfd_remove_devices(isp->parent);
+
+ kfree(isp->isp_res);
+ kfree(isp->isp_cell);
+ kfree(isp->isp_pdata);
+ kfree(isp->isp_i2c_res);
+ kfree(isp->isp_gpio_res);
+
+ return 0;
+}
+
+static const struct isp_funcs isp_v4_1_1_funcs = {
+ .hw_init = isp_v4_1_1_hw_init,
+ .hw_fini = isp_v4_1_1_hw_fini,
+};
+
+void isp_v4_1_1_set_isp_funcs(struct amdgpu_isp *isp)
+{
+ isp->funcs = &isp_v4_1_1_funcs;
+}
diff --git a/drivers/gpu/drm/amd/amdgpu/isp_v4_1_1.h b/drivers/gpu/drm/amd/amdgpu/isp_v4_1_1.h
new file mode 100644
index 000000000000..fe45d70d87f1
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/isp_v4_1_1.h
@@ -0,0 +1,49 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright (C) 2024 Advanced Micro Devices, Inc. All rights reserved.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ */
+
+#ifndef __ISP_V4_1_1_H__
+#define __ISP_V4_1_1_H__
+
+#include "amdgpu_isp.h"
+
+#include "ivsrcid/isp/irqsrcs_isp_4_1.h"
+
+#define MAX_ISP411_MEM_RES 2
+#define MAX_ISP411_INT_SRC 8
+
+#define ISP411_PHY0_OFFSET 0x66700
+#define ISP411_PHY0_SIZE 0xD30
+
+#define ISP411_I2C0_OFFSET 0x66400
+#define ISP411_I2C0_SIZE 0x100
+
+#define ISP411_GPIO_SENSOR_OFFSET 0x6613C
+#define ISP411_GPIO_SENSOR_SIZE 0x54
+
+void isp_v4_1_1_set_isp_funcs(struct amdgpu_isp *isp);
+
+#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/jpeg_v1_0.c b/drivers/gpu/drm/amd/amdgpu/jpeg_v1_0.c
index 7332a320ede8..b5bb7f4d607c 100644
--- a/drivers/gpu/drm/amd/amdgpu/jpeg_v1_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/jpeg_v1_0.c
@@ -23,6 +23,7 @@
#include "amdgpu.h"
#include "amdgpu_jpeg.h"
+#include "amdgpu_cs.h"
#include "soc15.h"
#include "soc15d.h"
#include "vcn_v1_0.h"
@@ -34,6 +35,9 @@
static void jpeg_v1_0_set_dec_ring_funcs(struct amdgpu_device *adev);
static void jpeg_v1_0_set_irq_funcs(struct amdgpu_device *adev);
static void jpeg_v1_0_ring_begin_use(struct amdgpu_ring *ring);
+static int jpeg_v1_dec_ring_parse_cs(struct amdgpu_cs_parser *parser,
+ struct amdgpu_job *job,
+ struct amdgpu_ib *ib);
static void jpeg_v1_0_decode_ring_patch_wreg(struct amdgpu_ring *ring, uint32_t *ptr, uint32_t reg_offset, uint32_t val)
{
@@ -300,7 +304,10 @@ static void jpeg_v1_0_decode_ring_emit_ib(struct amdgpu_ring *ring,
amdgpu_ring_write(ring,
PACKETJ(SOC15_REG_OFFSET(JPEG, 0, mmUVD_LMI_JRBC_IB_VMID), 0, 0, PACKETJ_TYPE0));
- amdgpu_ring_write(ring, (vmid | (vmid << 4)));
+ if (ring->funcs->parse_cs)
+ amdgpu_ring_write(ring, 0);
+ else
+ amdgpu_ring_write(ring, (vmid | (vmid << 4)));
amdgpu_ring_write(ring,
PACKETJ(SOC15_REG_OFFSET(JPEG, 0, mmUVD_LMI_JPEG_VMID), 0, 0, PACKETJ_TYPE0));
@@ -376,7 +383,7 @@ static void jpeg_v1_0_decode_ring_emit_reg_wait(struct amdgpu_ring *ring,
static void jpeg_v1_0_decode_ring_emit_vm_flush(struct amdgpu_ring *ring,
unsigned vmid, uint64_t pd_addr)
{
- struct amdgpu_vmhub *hub = &ring->adev->vmhub[ring->funcs->vmhub];
+ struct amdgpu_vmhub *hub = &ring->adev->vmhub[ring->vm_hub];
uint32_t data0, data1, mask;
pd_addr = amdgpu_gmc_emit_flush_gpu_tlb(ring, vmid, pd_addr);
@@ -437,7 +444,7 @@ static int jpeg_v1_0_process_interrupt(struct amdgpu_device *adev,
switch (entry->src_id) {
case 126:
- amdgpu_fence_process(&adev->jpeg.inst->ring_dec);
+ amdgpu_fence_process(adev->jpeg.inst->ring_dec);
break;
default:
DRM_ERROR("Unhandled interrupt: %d %d\n",
@@ -451,15 +458,16 @@ static int jpeg_v1_0_process_interrupt(struct amdgpu_device *adev,
/**
* jpeg_v1_0_early_init - set function pointers
*
- * @handle: amdgpu_device pointer
+ * @ip_block: Pointer to the amdgpu_ip_block for this hw instance.
*
* Set ring and irq function pointers
*/
-int jpeg_v1_0_early_init(void *handle)
+int jpeg_v1_0_early_init(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
adev->jpeg.num_jpeg_inst = 1;
+ adev->jpeg.num_jpeg_rings = 1;
jpeg_v1_0_set_dec_ring_funcs(adev);
jpeg_v1_0_set_irq_funcs(adev);
@@ -470,12 +478,12 @@ int jpeg_v1_0_early_init(void *handle)
/**
* jpeg_v1_0_sw_init - sw init for JPEG block
*
- * @handle: amdgpu_device pointer
+ * @ip_block: Pointer to the amdgpu_ip_block for this hw instance.
*
*/
-int jpeg_v1_0_sw_init(void *handle)
+int jpeg_v1_0_sw_init(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
struct amdgpu_ring *ring;
int r;
@@ -484,14 +492,15 @@ int jpeg_v1_0_sw_init(void *handle)
if (r)
return r;
- ring = &adev->jpeg.inst->ring_dec;
+ ring = adev->jpeg.inst->ring_dec;
+ ring->vm_hub = AMDGPU_MMHUB0(0);
sprintf(ring->name, "jpeg_dec");
r = amdgpu_ring_init(adev, ring, 512, &adev->jpeg.inst->irq,
- 0, AMDGPU_RING_PRIO_DEFAULT);
+ 0, AMDGPU_RING_PRIO_DEFAULT, NULL);
if (r)
return r;
- adev->jpeg.internal.jpeg_pitch = adev->jpeg.inst->external.jpeg_pitch =
+ adev->jpeg.internal.jpeg_pitch[0] = adev->jpeg.inst->external.jpeg_pitch[0] =
SOC15_REG_OFFSET(JPEG, 0, mmUVD_JPEG_PITCH);
return 0;
@@ -500,15 +509,15 @@ int jpeg_v1_0_sw_init(void *handle)
/**
* jpeg_v1_0_sw_fini - sw fini for JPEG block
*
- * @handle: amdgpu_device pointer
+ * @ip_block: Pointer to the amdgpu_ip_block for this hw instance.
*
* JPEG free up sw allocation
*/
-void jpeg_v1_0_sw_fini(void *handle)
+void jpeg_v1_0_sw_fini(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
- amdgpu_ring_fini(&adev->jpeg.inst[0].ring_dec);
+ amdgpu_ring_fini(adev->jpeg.inst->ring_dec);
}
/**
@@ -521,7 +530,7 @@ void jpeg_v1_0_sw_fini(void *handle)
*/
void jpeg_v1_0_start(struct amdgpu_device *adev, int mode)
{
- struct amdgpu_ring *ring = &adev->jpeg.inst->ring_dec;
+ struct amdgpu_ring *ring = adev->jpeg.inst->ring_dec;
if (mode == 0) {
WREG32_SOC15(JPEG, 0, mmUVD_LMI_JRBC_RB_VMID, 0);
@@ -548,11 +557,11 @@ static const struct amdgpu_ring_funcs jpeg_v1_0_decode_ring_vm_funcs = {
.nop = PACKET0(0x81ff, 0),
.support_64bit_ptrs = false,
.no_user_fence = true,
- .vmhub = AMDGPU_MMHUB_0,
- .extra_dw = 64,
+ .extra_bytes = 256,
.get_rptr = jpeg_v1_0_decode_ring_get_rptr,
.get_wptr = jpeg_v1_0_decode_ring_get_wptr,
.set_wptr = jpeg_v1_0_decode_ring_set_wptr,
+ .parse_cs = jpeg_v1_dec_ring_parse_cs,
.emit_frame_size =
6 + 6 + /* hdp invalidate / flush */
SOC15_FLUSH_GPU_TLB_NUM_WREG * 6 +
@@ -579,8 +588,7 @@ static const struct amdgpu_ring_funcs jpeg_v1_0_decode_ring_vm_funcs = {
static void jpeg_v1_0_set_dec_ring_funcs(struct amdgpu_device *adev)
{
- adev->jpeg.inst->ring_dec.funcs = &jpeg_v1_0_decode_ring_vm_funcs;
- DRM_INFO("JPEG decode is enabled in VM mode\n");
+ adev->jpeg.inst->ring_dec->funcs = &jpeg_v1_0_decode_ring_vm_funcs;
}
static const struct amdgpu_irq_src_funcs jpeg_v1_0_irq_funcs = {
@@ -596,18 +604,84 @@ static void jpeg_v1_0_set_irq_funcs(struct amdgpu_device *adev)
static void jpeg_v1_0_ring_begin_use(struct amdgpu_ring *ring)
{
struct amdgpu_device *adev = ring->adev;
- bool set_clocks = !cancel_delayed_work_sync(&adev->vcn.idle_work);
+ bool set_clocks = !cancel_delayed_work_sync(&adev->vcn.inst[0].idle_work);
int cnt = 0;
- mutex_lock(&adev->vcn.vcn1_jpeg1_workaround);
+ mutex_lock(&adev->vcn.inst[0].vcn1_jpeg1_workaround);
if (amdgpu_fence_wait_empty(&adev->vcn.inst->ring_dec))
DRM_ERROR("JPEG dec: vcn dec ring may not be empty\n");
- for (cnt = 0; cnt < adev->vcn.num_enc_rings; cnt++) {
+ for (cnt = 0; cnt < adev->vcn.inst[0].num_enc_rings; cnt++) {
if (amdgpu_fence_wait_empty(&adev->vcn.inst->ring_enc[cnt]))
DRM_ERROR("JPEG dec: vcn enc ring[%d] may not be empty\n", cnt);
}
vcn_v1_0_set_pg_for_begin_use(ring, set_clocks);
}
+
+/**
+ * jpeg_v1_dec_ring_parse_cs - command submission parser
+ *
+ * @parser: Command submission parser context
+ * @job: the job to parse
+ * @ib: the IB to parse
+ *
+ * Parse the command stream, return -EINVAL for invalid packet,
+ * 0 otherwise
+ */
+static int jpeg_v1_dec_ring_parse_cs(struct amdgpu_cs_parser *parser,
+ struct amdgpu_job *job,
+ struct amdgpu_ib *ib)
+{
+ u32 i, reg, res, cond, type;
+ int ret = 0;
+ struct amdgpu_device *adev = parser->adev;
+
+ for (i = 0; i < ib->length_dw ; i += 2) {
+ reg = CP_PACKETJ_GET_REG(ib->ptr[i]);
+ res = CP_PACKETJ_GET_RES(ib->ptr[i]);
+ cond = CP_PACKETJ_GET_COND(ib->ptr[i]);
+ type = CP_PACKETJ_GET_TYPE(ib->ptr[i]);
+
+ if (res || cond != PACKETJ_CONDITION_CHECK0) /* only allow 0 for now */
+ return -EINVAL;
+
+ if (reg >= JPEG_V1_REG_RANGE_START && reg <= JPEG_V1_REG_RANGE_END)
+ continue;
+
+ switch (type) {
+ case PACKETJ_TYPE0:
+ if (reg != JPEG_V1_LMI_JPEG_WRITE_64BIT_BAR_HIGH &&
+ reg != JPEG_V1_LMI_JPEG_WRITE_64BIT_BAR_LOW &&
+ reg != JPEG_V1_LMI_JPEG_READ_64BIT_BAR_HIGH &&
+ reg != JPEG_V1_LMI_JPEG_READ_64BIT_BAR_LOW &&
+ reg != JPEG_V1_REG_CTX_INDEX &&
+ reg != JPEG_V1_REG_CTX_DATA) {
+ ret = -EINVAL;
+ }
+ break;
+ case PACKETJ_TYPE1:
+ if (reg != JPEG_V1_REG_CTX_DATA)
+ ret = -EINVAL;
+ break;
+ case PACKETJ_TYPE3:
+ if (reg != JPEG_V1_REG_SOFT_RESET)
+ ret = -EINVAL;
+ break;
+ case PACKETJ_TYPE6:
+ if (ib->ptr[i] != CP_PACKETJ_NOP)
+ ret = -EINVAL;
+ break;
+ default:
+ ret = -EINVAL;
+ }
+
+ if (ret) {
+ dev_err(adev->dev, "Invalid packet [0x%08x]!\n", ib->ptr[i]);
+ break;
+ }
+ }
+
+ return ret;
+}
diff --git a/drivers/gpu/drm/amd/amdgpu/jpeg_v1_0.h b/drivers/gpu/drm/amd/amdgpu/jpeg_v1_0.h
index bbf33a6a3972..097328635083 100644
--- a/drivers/gpu/drm/amd/amdgpu/jpeg_v1_0.h
+++ b/drivers/gpu/drm/amd/amdgpu/jpeg_v1_0.h
@@ -24,9 +24,20 @@
#ifndef __JPEG_V1_0_H__
#define __JPEG_V1_0_H__
-int jpeg_v1_0_early_init(void *handle);
-int jpeg_v1_0_sw_init(void *handle);
-void jpeg_v1_0_sw_fini(void *handle);
+int jpeg_v1_0_early_init(struct amdgpu_ip_block *ip_block);
+int jpeg_v1_0_sw_init(struct amdgpu_ip_block *ip_block);
+void jpeg_v1_0_sw_fini(struct amdgpu_ip_block *ip_block);
void jpeg_v1_0_start(struct amdgpu_device *adev, int mode);
+#define JPEG_V1_REG_RANGE_START 0x8000
+#define JPEG_V1_REG_RANGE_END 0x803f
+
+#define JPEG_V1_LMI_JPEG_WRITE_64BIT_BAR_HIGH 0x8238
+#define JPEG_V1_LMI_JPEG_WRITE_64BIT_BAR_LOW 0x8239
+#define JPEG_V1_LMI_JPEG_READ_64BIT_BAR_HIGH 0x825a
+#define JPEG_V1_LMI_JPEG_READ_64BIT_BAR_LOW 0x825b
+#define JPEG_V1_REG_CTX_INDEX 0x8328
+#define JPEG_V1_REG_CTX_DATA 0x8329
+#define JPEG_V1_REG_SOFT_RESET 0x83a0
+
#endif /*__JPEG_V1_0_H__*/
diff --git a/drivers/gpu/drm/amd/amdgpu/jpeg_v2_0.c b/drivers/gpu/drm/amd/amdgpu/jpeg_v2_0.c
index 3b22953aa62e..27c76bd424cf 100644
--- a/drivers/gpu/drm/amd/amdgpu/jpeg_v2_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/jpeg_v2_0.c
@@ -32,43 +32,40 @@
#include "vcn/vcn_2_0_0_sh_mask.h"
#include "ivsrcid/vcn/irqsrcs_vcn_2_0.h"
-#define mmUVD_JRBC_EXTERNAL_REG_INTERNAL_OFFSET 0x1bfff
-#define mmUVD_JPEG_GPCOM_CMD_INTERNAL_OFFSET 0x4029
-#define mmUVD_JPEG_GPCOM_DATA0_INTERNAL_OFFSET 0x402a
-#define mmUVD_JPEG_GPCOM_DATA1_INTERNAL_OFFSET 0x402b
-#define mmUVD_LMI_JRBC_RB_MEM_WR_64BIT_BAR_LOW_INTERNAL_OFFSET 0x40ea
-#define mmUVD_LMI_JRBC_RB_MEM_WR_64BIT_BAR_HIGH_INTERNAL_OFFSET 0x40eb
-#define mmUVD_LMI_JRBC_IB_VMID_INTERNAL_OFFSET 0x40cf
-#define mmUVD_LMI_JPEG_VMID_INTERNAL_OFFSET 0x40d1
-#define mmUVD_LMI_JRBC_IB_64BIT_BAR_LOW_INTERNAL_OFFSET 0x40e8
-#define mmUVD_LMI_JRBC_IB_64BIT_BAR_HIGH_INTERNAL_OFFSET 0x40e9
-#define mmUVD_JRBC_IB_SIZE_INTERNAL_OFFSET 0x4082
-#define mmUVD_LMI_JRBC_RB_MEM_RD_64BIT_BAR_LOW_INTERNAL_OFFSET 0x40ec
-#define mmUVD_LMI_JRBC_RB_MEM_RD_64BIT_BAR_HIGH_INTERNAL_OFFSET 0x40ed
-#define mmUVD_JRBC_RB_COND_RD_TIMER_INTERNAL_OFFSET 0x4085
-#define mmUVD_JRBC_RB_REF_DATA_INTERNAL_OFFSET 0x4084
-#define mmUVD_JRBC_STATUS_INTERNAL_OFFSET 0x4089
-#define mmUVD_JPEG_PITCH_INTERNAL_OFFSET 0x401f
-
-#define JRBC_DEC_EXTERNAL_REG_WRITE_ADDR 0x18000
+static const struct amdgpu_hwip_reg_entry jpeg_reg_list_2_0[] = {
+ SOC15_REG_ENTRY_STR(JPEG, 0, mmUVD_JPEG_POWER_STATUS),
+ SOC15_REG_ENTRY_STR(JPEG, 0, mmUVD_JPEG_INT_STAT),
+ SOC15_REG_ENTRY_STR(JPEG, 0, mmUVD_JRBC_RB_RPTR),
+ SOC15_REG_ENTRY_STR(JPEG, 0, mmUVD_JRBC_RB_WPTR),
+ SOC15_REG_ENTRY_STR(JPEG, 0, mmUVD_JRBC_RB_CNTL),
+ SOC15_REG_ENTRY_STR(JPEG, 0, mmUVD_JRBC_RB_SIZE),
+ SOC15_REG_ENTRY_STR(JPEG, 0, mmUVD_JRBC_STATUS),
+ SOC15_REG_ENTRY_STR(JPEG, 0, mmJPEG_DEC_ADDR_MODE),
+ SOC15_REG_ENTRY_STR(JPEG, 0, mmJPEG_DEC_GFX10_ADDR_CONFIG),
+ SOC15_REG_ENTRY_STR(JPEG, 0, mmJPEG_DEC_Y_GFX10_TILING_SURFACE),
+ SOC15_REG_ENTRY_STR(JPEG, 0, mmJPEG_DEC_UV_GFX10_TILING_SURFACE),
+ SOC15_REG_ENTRY_STR(JPEG, 0, mmUVD_JPEG_PITCH),
+ SOC15_REG_ENTRY_STR(JPEG, 0, mmUVD_JPEG_UV_PITCH),
+};
static void jpeg_v2_0_set_dec_ring_funcs(struct amdgpu_device *adev);
static void jpeg_v2_0_set_irq_funcs(struct amdgpu_device *adev);
-static int jpeg_v2_0_set_powergating_state(void *handle,
+static int jpeg_v2_0_set_powergating_state(struct amdgpu_ip_block *ip_block,
enum amd_powergating_state state);
/**
* jpeg_v2_0_early_init - set function pointers
*
- * @handle: amdgpu_device pointer
+ * @ip_block: Pointer to the amdgpu_ip_block for this hw instance.
*
* Set ring and irq function pointers
*/
-static int jpeg_v2_0_early_init(void *handle)
+static int jpeg_v2_0_early_init(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
adev->jpeg.num_jpeg_inst = 1;
+ adev->jpeg.num_jpeg_rings = 1;
jpeg_v2_0_set_dec_ring_funcs(adev);
jpeg_v2_0_set_irq_funcs(adev);
@@ -79,13 +76,13 @@ static int jpeg_v2_0_early_init(void *handle)
/**
* jpeg_v2_0_sw_init - sw init for JPEG block
*
- * @handle: amdgpu_device pointer
+ * @ip_block: Pointer to the amdgpu_ip_block for this hw instance.
*
* Load firmware and sw initialization
*/
-static int jpeg_v2_0_sw_init(void *handle)
+static int jpeg_v2_0_sw_init(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
struct amdgpu_ring *ring;
int r;
@@ -103,37 +100,50 @@ static int jpeg_v2_0_sw_init(void *handle)
if (r)
return r;
- ring = &adev->jpeg.inst->ring_dec;
+ ring = adev->jpeg.inst->ring_dec;
ring->use_doorbell = true;
ring->doorbell_index = (adev->doorbell_index.vcn.vcn_ring0_1 << 1) + 1;
+ ring->vm_hub = AMDGPU_MMHUB0(0);
sprintf(ring->name, "jpeg_dec");
r = amdgpu_ring_init(adev, ring, 512, &adev->jpeg.inst->irq,
- 0, AMDGPU_RING_PRIO_DEFAULT);
+ 0, AMDGPU_RING_PRIO_DEFAULT, NULL);
if (r)
return r;
- adev->jpeg.internal.jpeg_pitch = mmUVD_JPEG_PITCH_INTERNAL_OFFSET;
- adev->jpeg.inst->external.jpeg_pitch = SOC15_REG_OFFSET(JPEG, 0, mmUVD_JPEG_PITCH);
+ adev->jpeg.internal.jpeg_pitch[0] = mmUVD_JPEG_PITCH_INTERNAL_OFFSET;
+ adev->jpeg.inst->external.jpeg_pitch[0] = SOC15_REG_OFFSET(JPEG, 0, mmUVD_JPEG_PITCH);
- return 0;
+ r = amdgpu_jpeg_reg_dump_init(adev, jpeg_reg_list_2_0, ARRAY_SIZE(jpeg_reg_list_2_0));
+ if (r)
+ return r;
+
+ adev->jpeg.supported_reset =
+ amdgpu_get_soft_full_reset_mask(adev->jpeg.inst[0].ring_dec);
+ if (!amdgpu_sriov_vf(adev))
+ adev->jpeg.supported_reset |= AMDGPU_RESET_TYPE_PER_QUEUE;
+ r = amdgpu_jpeg_sysfs_reset_mask_init(adev);
+
+ return r;
}
/**
* jpeg_v2_0_sw_fini - sw fini for JPEG block
*
- * @handle: amdgpu_device pointer
+ * @ip_block: Pointer to the amdgpu_ip_block for this hw instance.
*
* JPEG suspend and free up sw allocation
*/
-static int jpeg_v2_0_sw_fini(void *handle)
+static int jpeg_v2_0_sw_fini(struct amdgpu_ip_block *ip_block)
{
int r;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
r = amdgpu_jpeg_suspend(adev);
if (r)
return r;
+ amdgpu_jpeg_sysfs_reset_mask_fini(adev);
+
r = amdgpu_jpeg_sw_fini(adev);
return r;
@@ -142,39 +152,36 @@ static int jpeg_v2_0_sw_fini(void *handle)
/**
* jpeg_v2_0_hw_init - start and test JPEG block
*
- * @handle: amdgpu_device pointer
+ * @ip_block: Pointer to the amdgpu_ip_block for this hw instance.
*
*/
-static int jpeg_v2_0_hw_init(void *handle)
+static int jpeg_v2_0_hw_init(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
- struct amdgpu_ring *ring = &adev->jpeg.inst->ring_dec;
- int r;
+ struct amdgpu_device *adev = ip_block->adev;
+ struct amdgpu_ring *ring = adev->jpeg.inst->ring_dec;
adev->nbio.funcs->vcn_doorbell_range(adev, ring->use_doorbell,
(adev->doorbell_index.vcn.vcn_ring0_1 << 1), 0);
- r = amdgpu_ring_test_helper(ring);
- if (!r)
- DRM_INFO("JPEG decode initialized successfully.\n");
-
- return r;
+ return amdgpu_ring_test_helper(ring);
}
/**
* jpeg_v2_0_hw_fini - stop the hardware block
*
- * @handle: amdgpu_device pointer
+ * @ip_block: Pointer to the amdgpu_ip_block for this hw instance.
*
* Stop the JPEG block, mark ring as not ready any more
*/
-static int jpeg_v2_0_hw_fini(void *handle)
+static int jpeg_v2_0_hw_fini(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
+
+ cancel_delayed_work_sync(&adev->jpeg.idle_work);
if (adev->jpeg.cur_state != AMD_PG_STATE_GATE &&
RREG32_SOC15(JPEG, 0, mmUVD_JRBC_STATUS))
- jpeg_v2_0_set_powergating_state(adev, AMD_PG_STATE_GATE);
+ jpeg_v2_0_set_powergating_state(ip_block, AMD_PG_STATE_GATE);
return 0;
}
@@ -182,20 +189,19 @@ static int jpeg_v2_0_hw_fini(void *handle)
/**
* jpeg_v2_0_suspend - suspend JPEG block
*
- * @handle: amdgpu_device pointer
+ * @ip_block: Pointer to the amdgpu_ip_block for this hw instance.
*
* HW fini and suspend JPEG block
*/
-static int jpeg_v2_0_suspend(void *handle)
+static int jpeg_v2_0_suspend(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
int r;
- r = jpeg_v2_0_hw_fini(adev);
+ r = jpeg_v2_0_hw_fini(ip_block);
if (r)
return r;
- r = amdgpu_jpeg_suspend(adev);
+ r = amdgpu_jpeg_suspend(ip_block->adev);
return r;
}
@@ -203,20 +209,19 @@ static int jpeg_v2_0_suspend(void *handle)
/**
* jpeg_v2_0_resume - resume JPEG block
*
- * @handle: amdgpu_device pointer
+ * @ip_block: Pointer to the amdgpu_ip_block for this hw instance.
*
* Resume firmware and hw init JPEG block
*/
-static int jpeg_v2_0_resume(void *handle)
+static int jpeg_v2_0_resume(struct amdgpu_ip_block *ip_block)
{
int r;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
- r = amdgpu_jpeg_resume(adev);
+ r = amdgpu_jpeg_resume(ip_block->adev);
if (r)
return r;
- r = jpeg_v2_0_hw_init(adev);
+ r = jpeg_v2_0_hw_init(ip_block);
return r;
}
@@ -329,7 +334,7 @@ static void jpeg_v2_0_enable_clock_gating(struct amdgpu_device *adev)
*/
static int jpeg_v2_0_start(struct amdgpu_device *adev)
{
- struct amdgpu_ring *ring = &adev->jpeg.inst->ring_dec;
+ struct amdgpu_ring *ring = adev->jpeg.inst->ring_dec;
int r;
if (adev->pm.dpm_enabled)
@@ -425,7 +430,7 @@ static uint64_t jpeg_v2_0_dec_ring_get_wptr(struct amdgpu_ring *ring)
struct amdgpu_device *adev = ring->adev;
if (ring->use_doorbell)
- return adev->wb.wb[ring->wptr_offs];
+ return *ring->wptr_cpu_addr;
else
return RREG32_SOC15(JPEG, 0, mmUVD_JRBC_RB_WPTR);
}
@@ -442,7 +447,7 @@ static void jpeg_v2_0_dec_ring_set_wptr(struct amdgpu_ring *ring)
struct amdgpu_device *adev = ring->adev;
if (ring->use_doorbell) {
- adev->wb.wb[ring->wptr_offs] = lower_32_bits(ring->wptr);
+ *ring->wptr_cpu_addr = lower_32_bits(ring->wptr);
WDOORBELL32(ring->doorbell_index, lower_32_bits(ring->wptr));
} else {
WREG32_SOC15(JPEG, 0, mmUVD_JRBC_RB_WPTR, lower_32_bits(ring->wptr));
@@ -553,13 +558,21 @@ void jpeg_v2_0_dec_ring_emit_ib(struct amdgpu_ring *ring,
{
unsigned vmid = AMDGPU_JOB_GET_VMID(job);
+ amdgpu_ring_write(ring, PACKETJ(mmUVD_JPEG_IH_CTRL_INTERNAL_OFFSET,
+ 0, 0, PACKETJ_TYPE0));
+ amdgpu_ring_write(ring, (vmid << JPEG_IH_CTRL__IH_VMID__SHIFT));
+
amdgpu_ring_write(ring, PACKETJ(mmUVD_LMI_JRBC_IB_VMID_INTERNAL_OFFSET,
0, 0, PACKETJ_TYPE0));
- amdgpu_ring_write(ring, (vmid | (vmid << 4)));
+
+ if (ring->funcs->parse_cs)
+ amdgpu_ring_write(ring, 0);
+ else
+ amdgpu_ring_write(ring, (vmid | (vmid << 4) | (vmid << 8)));
amdgpu_ring_write(ring, PACKETJ(mmUVD_LMI_JPEG_VMID_INTERNAL_OFFSET,
0, 0, PACKETJ_TYPE0));
- amdgpu_ring_write(ring, (vmid | (vmid << 4)));
+ amdgpu_ring_write(ring, (vmid | (vmid << 4) | (vmid << 8)));
amdgpu_ring_write(ring, PACKETJ(mmUVD_LMI_JRBC_IB_64BIT_BAR_LOW_INTERNAL_OFFSET,
0, 0, PACKETJ_TYPE0));
@@ -627,7 +640,7 @@ void jpeg_v2_0_dec_ring_emit_reg_wait(struct amdgpu_ring *ring, uint32_t reg,
void jpeg_v2_0_dec_ring_emit_vm_flush(struct amdgpu_ring *ring,
unsigned vmid, uint64_t pd_addr)
{
- struct amdgpu_vmhub *hub = &ring->adev->vmhub[ring->funcs->vmhub];
+ struct amdgpu_vmhub *hub = &ring->adev->vmhub[ring->vm_hub];
uint32_t data0, data1, mask;
pd_addr = amdgpu_gmc_emit_flush_gpu_tlb(ring, vmid, pd_addr);
@@ -669,18 +682,18 @@ void jpeg_v2_0_dec_ring_nop(struct amdgpu_ring *ring, uint32_t count)
}
}
-static bool jpeg_v2_0_is_idle(void *handle)
+static bool jpeg_v2_0_is_idle(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
return ((RREG32_SOC15(JPEG, 0, mmUVD_JRBC_STATUS) &
UVD_JRBC_STATUS__RB_JOB_DONE_MASK) ==
UVD_JRBC_STATUS__RB_JOB_DONE_MASK);
}
-static int jpeg_v2_0_wait_for_idle(void *handle)
+static int jpeg_v2_0_wait_for_idle(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
int ret;
ret = SOC15_WAIT_ON_RREG(JPEG, 0, mmUVD_JRBC_STATUS, UVD_JRBC_STATUS__RB_JOB_DONE_MASK,
@@ -689,14 +702,14 @@ static int jpeg_v2_0_wait_for_idle(void *handle)
return ret;
}
-static int jpeg_v2_0_set_clockgating_state(void *handle,
+static int jpeg_v2_0_set_clockgating_state(struct amdgpu_ip_block *ip_block,
enum amd_clockgating_state state)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
bool enable = (state == AMD_CG_STATE_GATE);
if (enable) {
- if (!jpeg_v2_0_is_idle(handle))
+ if (!jpeg_v2_0_is_idle(ip_block))
return -EBUSY;
jpeg_v2_0_enable_clock_gating(adev);
} else {
@@ -706,10 +719,10 @@ static int jpeg_v2_0_set_clockgating_state(void *handle,
return 0;
}
-static int jpeg_v2_0_set_powergating_state(void *handle,
+static int jpeg_v2_0_set_powergating_state(struct amdgpu_ip_block *ip_block,
enum amd_powergating_state state)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
int ret;
if (state == adev->jpeg.cur_state)
@@ -742,7 +755,7 @@ static int jpeg_v2_0_process_interrupt(struct amdgpu_device *adev,
switch (entry->src_id) {
case VCN_2_0__SRCID__JPEG_DECODE:
- amdgpu_fence_process(&adev->jpeg.inst->ring_dec);
+ amdgpu_fence_process(adev->jpeg.inst->ring_dec);
break;
default:
DRM_ERROR("Unhandled interrupt: %d %d\n",
@@ -753,10 +766,25 @@ static int jpeg_v2_0_process_interrupt(struct amdgpu_device *adev,
return 0;
}
+static int jpeg_v2_0_ring_reset(struct amdgpu_ring *ring,
+ unsigned int vmid,
+ struct amdgpu_fence *timedout_fence)
+{
+ int r;
+
+ amdgpu_ring_reset_helper_begin(ring, timedout_fence);
+ r = jpeg_v2_0_stop(ring->adev);
+ if (r)
+ return r;
+ r = jpeg_v2_0_start(ring->adev);
+ if (r)
+ return r;
+ return amdgpu_ring_reset_helper_end(ring, timedout_fence);
+}
+
static const struct amd_ip_funcs jpeg_v2_0_ip_funcs = {
.name = "jpeg_v2_0",
.early_init = jpeg_v2_0_early_init,
- .late_init = NULL,
.sw_init = jpeg_v2_0_sw_init,
.sw_fini = jpeg_v2_0_sw_fini,
.hw_init = jpeg_v2_0_hw_init,
@@ -765,28 +793,26 @@ static const struct amd_ip_funcs jpeg_v2_0_ip_funcs = {
.resume = jpeg_v2_0_resume,
.is_idle = jpeg_v2_0_is_idle,
.wait_for_idle = jpeg_v2_0_wait_for_idle,
- .check_soft_reset = NULL,
- .pre_soft_reset = NULL,
- .soft_reset = NULL,
- .post_soft_reset = NULL,
.set_clockgating_state = jpeg_v2_0_set_clockgating_state,
.set_powergating_state = jpeg_v2_0_set_powergating_state,
+ .dump_ip_state = amdgpu_jpeg_dump_ip_state,
+ .print_ip_state = amdgpu_jpeg_print_ip_state,
};
static const struct amdgpu_ring_funcs jpeg_v2_0_dec_ring_vm_funcs = {
.type = AMDGPU_RING_TYPE_VCN_JPEG,
.align_mask = 0xf,
- .vmhub = AMDGPU_MMHUB_0,
.get_rptr = jpeg_v2_0_dec_ring_get_rptr,
.get_wptr = jpeg_v2_0_dec_ring_get_wptr,
.set_wptr = jpeg_v2_0_dec_ring_set_wptr,
+ .parse_cs = amdgpu_jpeg_dec_parse_cs,
.emit_frame_size =
SOC15_FLUSH_GPU_TLB_NUM_WREG * 6 +
SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 8 +
8 + /* jpeg_v2_0_dec_ring_emit_vm_flush */
18 + 18 + /* jpeg_v2_0_dec_ring_emit_fence x2 vm fence */
8 + 16,
- .emit_ib_size = 22, /* jpeg_v2_0_dec_ring_emit_ib */
+ .emit_ib_size = 24, /* jpeg_v2_0_dec_ring_emit_ib */
.emit_ib = jpeg_v2_0_dec_ring_emit_ib,
.emit_fence = jpeg_v2_0_dec_ring_emit_fence,
.emit_vm_flush = jpeg_v2_0_dec_ring_emit_vm_flush,
@@ -801,12 +827,12 @@ static const struct amdgpu_ring_funcs jpeg_v2_0_dec_ring_vm_funcs = {
.emit_wreg = jpeg_v2_0_dec_ring_emit_wreg,
.emit_reg_wait = jpeg_v2_0_dec_ring_emit_reg_wait,
.emit_reg_write_reg_wait = amdgpu_ring_emit_reg_write_reg_wait_helper,
+ .reset = jpeg_v2_0_ring_reset,
};
static void jpeg_v2_0_set_dec_ring_funcs(struct amdgpu_device *adev)
{
- adev->jpeg.inst->ring_dec.funcs = &jpeg_v2_0_dec_ring_vm_funcs;
- DRM_INFO("JPEG decode is enabled in VM mode\n");
+ adev->jpeg.inst->ring_dec->funcs = &jpeg_v2_0_dec_ring_vm_funcs;
}
static const struct amdgpu_irq_src_funcs jpeg_v2_0_irq_funcs = {
@@ -820,8 +846,7 @@ static void jpeg_v2_0_set_irq_funcs(struct amdgpu_device *adev)
adev->jpeg.inst->irq.funcs = &jpeg_v2_0_irq_funcs;
}
-const struct amdgpu_ip_block_version jpeg_v2_0_ip_block =
-{
+const struct amdgpu_ip_block_version jpeg_v2_0_ip_block = {
.type = AMD_IP_BLOCK_TYPE_JPEG,
.major = 2,
.minor = 0,
diff --git a/drivers/gpu/drm/amd/amdgpu/jpeg_v2_0.h b/drivers/gpu/drm/amd/amdgpu/jpeg_v2_0.h
index 15a344ed340f..654e43e83e2c 100644
--- a/drivers/gpu/drm/amd/amdgpu/jpeg_v2_0.h
+++ b/drivers/gpu/drm/amd/amdgpu/jpeg_v2_0.h
@@ -24,6 +24,27 @@
#ifndef __JPEG_V2_0_H__
#define __JPEG_V2_0_H__
+#define mmUVD_JRBC_EXTERNAL_REG_INTERNAL_OFFSET 0x1bfff
+#define mmUVD_JPEG_GPCOM_CMD_INTERNAL_OFFSET 0x4029
+#define mmUVD_JPEG_GPCOM_DATA0_INTERNAL_OFFSET 0x402a
+#define mmUVD_JPEG_GPCOM_DATA1_INTERNAL_OFFSET 0x402b
+#define mmUVD_LMI_JRBC_RB_MEM_WR_64BIT_BAR_LOW_INTERNAL_OFFSET 0x40ea
+#define mmUVD_LMI_JRBC_RB_MEM_WR_64BIT_BAR_HIGH_INTERNAL_OFFSET 0x40eb
+#define mmUVD_LMI_JRBC_IB_VMID_INTERNAL_OFFSET 0x40cf
+#define mmUVD_LMI_JPEG_VMID_INTERNAL_OFFSET 0x40d1
+#define mmUVD_LMI_JRBC_IB_64BIT_BAR_LOW_INTERNAL_OFFSET 0x40e8
+#define mmUVD_LMI_JRBC_IB_64BIT_BAR_HIGH_INTERNAL_OFFSET 0x40e9
+#define mmUVD_JRBC_IB_SIZE_INTERNAL_OFFSET 0x4082
+#define mmUVD_LMI_JRBC_RB_MEM_RD_64BIT_BAR_LOW_INTERNAL_OFFSET 0x40ec
+#define mmUVD_LMI_JRBC_RB_MEM_RD_64BIT_BAR_HIGH_INTERNAL_OFFSET 0x40ed
+#define mmUVD_JRBC_RB_COND_RD_TIMER_INTERNAL_OFFSET 0x4085
+#define mmUVD_JRBC_RB_REF_DATA_INTERNAL_OFFSET 0x4084
+#define mmUVD_JRBC_STATUS_INTERNAL_OFFSET 0x4089
+#define mmUVD_JPEG_PITCH_INTERNAL_OFFSET 0x401f
+#define mmUVD_JPEG_IH_CTRL_INTERNAL_OFFSET 0x4149
+
+#define JRBC_DEC_EXTERNAL_REG_WRITE_ADDR 0x18000
+
void jpeg_v2_0_dec_ring_insert_start(struct amdgpu_ring *ring);
void jpeg_v2_0_dec_ring_insert_end(struct amdgpu_ring *ring);
void jpeg_v2_0_dec_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, u64 seq,
diff --git a/drivers/gpu/drm/amd/amdgpu/jpeg_v2_5.c b/drivers/gpu/drm/amd/amdgpu/jpeg_v2_5.c
index c6724a0e0c43..20983f126b49 100644
--- a/drivers/gpu/drm/amd/amdgpu/jpeg_v2_5.c
+++ b/drivers/gpu/drm/amd/amdgpu/jpeg_v2_5.c
@@ -26,6 +26,7 @@
#include "soc15.h"
#include "soc15d.h"
#include "jpeg_v2_0.h"
+#include "jpeg_v2_5.h"
#include "vcn/vcn_2_5_offset.h"
#include "vcn/vcn_2_5_sh_mask.h"
@@ -35,10 +36,27 @@
#define JPEG25_MAX_HW_INSTANCES_ARCTURUS 2
+static const struct amdgpu_hwip_reg_entry jpeg_reg_list_2_5[] = {
+ SOC15_REG_ENTRY_STR(JPEG, 0, mmUVD_JPEG_POWER_STATUS),
+ SOC15_REG_ENTRY_STR(JPEG, 0, mmUVD_JPEG_INT_STAT),
+ SOC15_REG_ENTRY_STR(JPEG, 0, mmUVD_JRBC_RB_RPTR),
+ SOC15_REG_ENTRY_STR(JPEG, 0, mmUVD_JRBC_RB_WPTR),
+ SOC15_REG_ENTRY_STR(JPEG, 0, mmUVD_JRBC_RB_CNTL),
+ SOC15_REG_ENTRY_STR(JPEG, 0, mmUVD_JRBC_RB_SIZE),
+ SOC15_REG_ENTRY_STR(JPEG, 0, mmUVD_JRBC_STATUS),
+ SOC15_REG_ENTRY_STR(JPEG, 0, mmJPEG_DEC_ADDR_MODE),
+ SOC15_REG_ENTRY_STR(JPEG, 0, mmJPEG_DEC_GFX10_ADDR_CONFIG),
+ SOC15_REG_ENTRY_STR(JPEG, 0, mmJPEG_DEC_Y_GFX10_TILING_SURFACE),
+ SOC15_REG_ENTRY_STR(JPEG, 0, mmJPEG_DEC_UV_GFX10_TILING_SURFACE),
+ SOC15_REG_ENTRY_STR(JPEG, 0, mmUVD_JPEG_PITCH),
+ SOC15_REG_ENTRY_STR(JPEG, 0, mmUVD_JPEG_UV_PITCH),
+};
+
static void jpeg_v2_5_set_dec_ring_funcs(struct amdgpu_device *adev);
static void jpeg_v2_5_set_irq_funcs(struct amdgpu_device *adev);
-static int jpeg_v2_5_set_powergating_state(void *handle,
+static int jpeg_v2_5_set_powergating_state(struct amdgpu_ip_block *ip_block,
enum amd_powergating_state state);
+static void jpeg_v2_5_set_ras_funcs(struct amdgpu_device *adev);
static int amdgpu_ih_clientid_jpeg[] = {
SOC15_IH_CLIENTID_VCN,
@@ -48,16 +66,17 @@ static int amdgpu_ih_clientid_jpeg[] = {
/**
* jpeg_v2_5_early_init - set function pointers
*
- * @handle: amdgpu_device pointer
+ * @ip_block: Pointer to the amdgpu_ip_block for this hw instance.
*
* Set ring and irq function pointers
*/
-static int jpeg_v2_5_early_init(void *handle)
+static int jpeg_v2_5_early_init(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
u32 harvest;
int i;
+ adev->jpeg.num_jpeg_rings = 1;
adev->jpeg.num_jpeg_inst = JPEG25_MAX_HW_INSTANCES_ARCTURUS;
for (i = 0; i < adev->jpeg.num_jpeg_inst; i++) {
harvest = RREG32_SOC15(JPEG, i, mmCC_UVD_HARVESTING);
@@ -70,6 +89,7 @@ static int jpeg_v2_5_early_init(void *handle)
jpeg_v2_5_set_dec_ring_funcs(adev);
jpeg_v2_5_set_irq_funcs(adev);
+ jpeg_v2_5_set_ras_funcs(adev);
return 0;
}
@@ -77,15 +97,15 @@ static int jpeg_v2_5_early_init(void *handle)
/**
* jpeg_v2_5_sw_init - sw init for JPEG block
*
- * @handle: amdgpu_device pointer
+ * @ip_block: Pointer to the amdgpu_ip_block for this hw instance.
*
* Load firmware and sw initialization
*/
-static int jpeg_v2_5_sw_init(void *handle)
+static int jpeg_v2_5_sw_init(struct amdgpu_ip_block *ip_block)
{
struct amdgpu_ring *ring;
int i, r;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
for (i = 0; i < adev->jpeg.num_jpeg_inst; ++i) {
if (adev->jpeg.harvest_config & (1 << i))
@@ -96,6 +116,18 @@ static int jpeg_v2_5_sw_init(void *handle)
VCN_2_0__SRCID__JPEG_DECODE, &adev->jpeg.inst[i].irq);
if (r)
return r;
+
+ /* JPEG DJPEG POISON EVENT */
+ r = amdgpu_irq_add_id(adev, amdgpu_ih_clientid_jpeg[i],
+ VCN_2_6__SRCID_DJPEG0_POISON, &adev->jpeg.inst[i].ras_poison_irq);
+ if (r)
+ return r;
+
+ /* JPEG EJPEG POISON EVENT */
+ r = amdgpu_irq_add_id(adev, amdgpu_ih_clientid_jpeg[i],
+ VCN_2_6__SRCID_EJPEG0_POISON, &adev->jpeg.inst[i].ras_poison_irq);
+ if (r)
+ return r;
}
r = amdgpu_jpeg_sw_init(adev);
@@ -110,38 +142,58 @@ static int jpeg_v2_5_sw_init(void *handle)
if (adev->jpeg.harvest_config & (1 << i))
continue;
- ring = &adev->jpeg.inst[i].ring_dec;
+ ring = adev->jpeg.inst[i].ring_dec;
ring->use_doorbell = true;
+ if (amdgpu_ip_version(adev, UVD_HWIP, 0) == IP_VERSION(2, 5, 0))
+ ring->vm_hub = AMDGPU_MMHUB1(0);
+ else
+ ring->vm_hub = AMDGPU_MMHUB0(0);
ring->doorbell_index = (adev->doorbell_index.vcn.vcn_ring0_1 << 1) + 1 + 8 * i;
sprintf(ring->name, "jpeg_dec_%d", i);
r = amdgpu_ring_init(adev, ring, 512, &adev->jpeg.inst[i].irq,
- 0, AMDGPU_RING_PRIO_DEFAULT);
+ 0, AMDGPU_RING_PRIO_DEFAULT, NULL);
if (r)
return r;
- adev->jpeg.internal.jpeg_pitch = mmUVD_JPEG_PITCH_INTERNAL_OFFSET;
- adev->jpeg.inst[i].external.jpeg_pitch = SOC15_REG_OFFSET(JPEG, i, mmUVD_JPEG_PITCH);
+ adev->jpeg.internal.jpeg_pitch[0] = mmUVD_JPEG_PITCH_INTERNAL_OFFSET;
+ adev->jpeg.inst[i].external.jpeg_pitch[0] = SOC15_REG_OFFSET(JPEG, i, mmUVD_JPEG_PITCH);
}
- return 0;
+ r = amdgpu_jpeg_ras_sw_init(adev);
+ if (r)
+ return r;
+
+ r = amdgpu_jpeg_reg_dump_init(adev, jpeg_reg_list_2_5, ARRAY_SIZE(jpeg_reg_list_2_5));
+ if (r)
+ return r;
+
+ adev->jpeg.supported_reset =
+ amdgpu_get_soft_full_reset_mask(adev->jpeg.inst[0].ring_dec);
+ if (!amdgpu_sriov_vf(adev))
+ adev->jpeg.supported_reset |= AMDGPU_RESET_TYPE_PER_QUEUE;
+ r = amdgpu_jpeg_sysfs_reset_mask_init(adev);
+
+ return r;
}
/**
* jpeg_v2_5_sw_fini - sw fini for JPEG block
*
- * @handle: amdgpu_device pointer
+ * @ip_block: Pointer to the amdgpu_ip_block for this hw instance.
*
* JPEG suspend and free up sw allocation
*/
-static int jpeg_v2_5_sw_fini(void *handle)
+static int jpeg_v2_5_sw_fini(struct amdgpu_ip_block *ip_block)
{
int r;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
r = amdgpu_jpeg_suspend(adev);
if (r)
return r;
+ amdgpu_jpeg_sysfs_reset_mask_fini(adev);
+
r = amdgpu_jpeg_sw_fini(adev);
return r;
@@ -150,12 +202,12 @@ static int jpeg_v2_5_sw_fini(void *handle)
/**
* jpeg_v2_5_hw_init - start and test JPEG block
*
- * @handle: amdgpu_device pointer
+ * @ip_block: Pointer to the amdgpu_ip_block for this hw instance.
*
*/
-static int jpeg_v2_5_hw_init(void *handle)
+static int jpeg_v2_5_hw_init(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
struct amdgpu_ring *ring;
int i, r;
@@ -163,7 +215,7 @@ static int jpeg_v2_5_hw_init(void *handle)
if (adev->jpeg.harvest_config & (1 << i))
continue;
- ring = &adev->jpeg.inst[i].ring_dec;
+ ring = adev->jpeg.inst[i].ring_dec;
adev->nbio.funcs->vcn_doorbell_range(adev, ring->use_doorbell,
(adev->doorbell_index.vcn.vcn_ring0_1 << 1) + 8 * i, i);
@@ -172,34 +224,33 @@ static int jpeg_v2_5_hw_init(void *handle)
return r;
}
- DRM_INFO("JPEG decode initialized successfully.\n");
-
return 0;
}
/**
* jpeg_v2_5_hw_fini - stop the hardware block
*
- * @handle: amdgpu_device pointer
+ * @ip_block: Pointer to the amdgpu_ip_block for this hw instance.
*
* Stop the JPEG block, mark ring as not ready any more
*/
-static int jpeg_v2_5_hw_fini(void *handle)
+static int jpeg_v2_5_hw_fini(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
- struct amdgpu_ring *ring;
+ struct amdgpu_device *adev = ip_block->adev;
int i;
+ cancel_delayed_work_sync(&adev->jpeg.idle_work);
+
for (i = 0; i < adev->jpeg.num_jpeg_inst; ++i) {
if (adev->jpeg.harvest_config & (1 << i))
continue;
- ring = &adev->jpeg.inst[i].ring_dec;
if (adev->jpeg.cur_state != AMD_PG_STATE_GATE &&
RREG32_SOC15(JPEG, i, mmUVD_JRBC_STATUS))
- jpeg_v2_5_set_powergating_state(adev, AMD_PG_STATE_GATE);
+ jpeg_v2_5_set_powergating_state(ip_block, AMD_PG_STATE_GATE);
- ring->sched.ready = false;
+ if (amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__JPEG))
+ amdgpu_irq_put(adev, &adev->jpeg.inst[i].ras_poison_irq, 0);
}
return 0;
@@ -208,20 +259,19 @@ static int jpeg_v2_5_hw_fini(void *handle)
/**
* jpeg_v2_5_suspend - suspend JPEG block
*
- * @handle: amdgpu_device pointer
+ * @ip_block: Pointer to the amdgpu_ip_block for this hw instance.
*
* HW fini and suspend JPEG block
*/
-static int jpeg_v2_5_suspend(void *handle)
+static int jpeg_v2_5_suspend(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
int r;
- r = jpeg_v2_5_hw_fini(adev);
+ r = jpeg_v2_5_hw_fini(ip_block);
if (r)
return r;
- r = amdgpu_jpeg_suspend(adev);
+ r = amdgpu_jpeg_suspend(ip_block->adev);
return r;
}
@@ -229,20 +279,19 @@ static int jpeg_v2_5_suspend(void *handle)
/**
* jpeg_v2_5_resume - resume JPEG block
*
- * @handle: amdgpu_device pointer
+ * @ip_block: Pointer to the amdgpu_ip_block for this hw instance.
*
* Resume firmware and hw init JPEG block
*/
-static int jpeg_v2_5_resume(void *handle)
+static int jpeg_v2_5_resume(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
int r;
- r = amdgpu_jpeg_resume(adev);
+ r = amdgpu_jpeg_resume(ip_block->adev);
if (r)
return r;
- r = jpeg_v2_5_hw_init(adev);
+ r = jpeg_v2_5_hw_init(ip_block);
return r;
}
@@ -289,6 +338,44 @@ static void jpeg_v2_5_enable_clock_gating(struct amdgpu_device *adev, int inst)
WREG32_SOC15(JPEG, inst, mmJPEG_CGC_GATE, data);
}
+static void jpeg_v2_5_start_inst(struct amdgpu_device *adev, int i)
+{
+ struct amdgpu_ring *ring = adev->jpeg.inst[i].ring_dec;
+ /* disable anti hang mechanism */
+ WREG32_P(SOC15_REG_OFFSET(JPEG, i, mmUVD_JPEG_POWER_STATUS), 0,
+ ~UVD_JPEG_POWER_STATUS__JPEG_POWER_STATUS_MASK);
+
+ /* JPEG disable CGC */
+ jpeg_v2_5_disable_clock_gating(adev, i);
+
+ /* MJPEG global tiling registers */
+ WREG32_SOC15(JPEG, i, mmJPEG_DEC_GFX8_ADDR_CONFIG,
+ adev->gfx.config.gb_addr_config);
+ WREG32_SOC15(JPEG, i, mmJPEG_DEC_GFX10_ADDR_CONFIG,
+ adev->gfx.config.gb_addr_config);
+
+ /* enable JMI channel */
+ WREG32_P(SOC15_REG_OFFSET(JPEG, i, mmUVD_JMI_CNTL), 0,
+ ~UVD_JMI_CNTL__SOFT_RESET_MASK);
+
+ /* enable System Interrupt for JRBC */
+ WREG32_P(SOC15_REG_OFFSET(JPEG, i, mmJPEG_SYS_INT_EN),
+ JPEG_SYS_INT_EN__DJRBC_MASK,
+ ~JPEG_SYS_INT_EN__DJRBC_MASK);
+
+ WREG32_SOC15(JPEG, i, mmUVD_LMI_JRBC_RB_VMID, 0);
+ WREG32_SOC15(JPEG, i, mmUVD_JRBC_RB_CNTL, (0x00000001L | 0x00000002L));
+ WREG32_SOC15(JPEG, i, mmUVD_LMI_JRBC_RB_64BIT_BAR_LOW,
+ lower_32_bits(ring->gpu_addr));
+ WREG32_SOC15(JPEG, i, mmUVD_LMI_JRBC_RB_64BIT_BAR_HIGH,
+ upper_32_bits(ring->gpu_addr));
+ WREG32_SOC15(JPEG, i, mmUVD_JRBC_RB_RPTR, 0);
+ WREG32_SOC15(JPEG, i, mmUVD_JRBC_RB_WPTR, 0);
+ WREG32_SOC15(JPEG, i, mmUVD_JRBC_RB_CNTL, 0x00000002L);
+ WREG32_SOC15(JPEG, i, mmUVD_JRBC_RB_SIZE, ring->ring_size / 4);
+ ring->wptr = RREG32_SOC15(JPEG, i, mmUVD_JRBC_RB_WPTR);
+}
+
/**
* jpeg_v2_5_start - start JPEG block
*
@@ -298,52 +385,33 @@ static void jpeg_v2_5_enable_clock_gating(struct amdgpu_device *adev, int inst)
*/
static int jpeg_v2_5_start(struct amdgpu_device *adev)
{
- struct amdgpu_ring *ring;
int i;
for (i = 0; i < adev->jpeg.num_jpeg_inst; ++i) {
if (adev->jpeg.harvest_config & (1 << i))
continue;
+ jpeg_v2_5_start_inst(adev, i);
- ring = &adev->jpeg.inst[i].ring_dec;
- /* disable anti hang mechanism */
- WREG32_P(SOC15_REG_OFFSET(JPEG, i, mmUVD_JPEG_POWER_STATUS), 0,
- ~UVD_JPEG_POWER_STATUS__JPEG_POWER_STATUS_MASK);
-
- /* JPEG disable CGC */
- jpeg_v2_5_disable_clock_gating(adev, i);
-
- /* MJPEG global tiling registers */
- WREG32_SOC15(JPEG, i, mmJPEG_DEC_GFX8_ADDR_CONFIG,
- adev->gfx.config.gb_addr_config);
- WREG32_SOC15(JPEG, i, mmJPEG_DEC_GFX10_ADDR_CONFIG,
- adev->gfx.config.gb_addr_config);
-
- /* enable JMI channel */
- WREG32_P(SOC15_REG_OFFSET(JPEG, i, mmUVD_JMI_CNTL), 0,
- ~UVD_JMI_CNTL__SOFT_RESET_MASK);
-
- /* enable System Interrupt for JRBC */
- WREG32_P(SOC15_REG_OFFSET(JPEG, i, mmJPEG_SYS_INT_EN),
- JPEG_SYS_INT_EN__DJRBC_MASK,
- ~JPEG_SYS_INT_EN__DJRBC_MASK);
-
- WREG32_SOC15(JPEG, i, mmUVD_LMI_JRBC_RB_VMID, 0);
- WREG32_SOC15(JPEG, i, mmUVD_JRBC_RB_CNTL, (0x00000001L | 0x00000002L));
- WREG32_SOC15(JPEG, i, mmUVD_LMI_JRBC_RB_64BIT_BAR_LOW,
- lower_32_bits(ring->gpu_addr));
- WREG32_SOC15(JPEG, i, mmUVD_LMI_JRBC_RB_64BIT_BAR_HIGH,
- upper_32_bits(ring->gpu_addr));
- WREG32_SOC15(JPEG, i, mmUVD_JRBC_RB_RPTR, 0);
- WREG32_SOC15(JPEG, i, mmUVD_JRBC_RB_WPTR, 0);
- WREG32_SOC15(JPEG, i, mmUVD_JRBC_RB_CNTL, 0x00000002L);
- WREG32_SOC15(JPEG, i, mmUVD_JRBC_RB_SIZE, ring->ring_size / 4);
- ring->wptr = RREG32_SOC15(JPEG, i, mmUVD_JRBC_RB_WPTR);
}
return 0;
}
+static void jpeg_v2_5_stop_inst(struct amdgpu_device *adev, int i)
+{
+ /* reset JMI */
+ WREG32_P(SOC15_REG_OFFSET(JPEG, i, mmUVD_JMI_CNTL),
+ UVD_JMI_CNTL__SOFT_RESET_MASK,
+ ~UVD_JMI_CNTL__SOFT_RESET_MASK);
+
+ jpeg_v2_5_enable_clock_gating(adev, i);
+
+ /* enable anti hang mechanism */
+ WREG32_P(SOC15_REG_OFFSET(JPEG, i, mmUVD_JPEG_POWER_STATUS),
+ UVD_JPEG_POWER_STATUS__JPEG_POWER_STATUS_MASK,
+ ~UVD_JPEG_POWER_STATUS__JPEG_POWER_STATUS_MASK);
+}
+
/**
* jpeg_v2_5_stop - stop JPEG block
*
@@ -358,18 +426,7 @@ static int jpeg_v2_5_stop(struct amdgpu_device *adev)
for (i = 0; i < adev->jpeg.num_jpeg_inst; ++i) {
if (adev->jpeg.harvest_config & (1 << i))
continue;
-
- /* reset JMI */
- WREG32_P(SOC15_REG_OFFSET(JPEG, i, mmUVD_JMI_CNTL),
- UVD_JMI_CNTL__SOFT_RESET_MASK,
- ~UVD_JMI_CNTL__SOFT_RESET_MASK);
-
- jpeg_v2_5_enable_clock_gating(adev, i);
-
- /* enable anti hang mechanism */
- WREG32_P(SOC15_REG_OFFSET(JPEG, i, mmUVD_JPEG_POWER_STATUS),
- UVD_JPEG_POWER_STATUS__JPEG_POWER_STATUS_MASK,
- ~UVD_JPEG_POWER_STATUS__JPEG_POWER_STATUS_MASK);
+ jpeg_v2_5_stop_inst(adev, i);
}
return 0;
@@ -401,7 +458,7 @@ static uint64_t jpeg_v2_5_dec_ring_get_wptr(struct amdgpu_ring *ring)
struct amdgpu_device *adev = ring->adev;
if (ring->use_doorbell)
- return adev->wb.wb[ring->wptr_offs];
+ return *ring->wptr_cpu_addr;
else
return RREG32_SOC15(JPEG, ring->me, mmUVD_JRBC_RB_WPTR);
}
@@ -418,16 +475,52 @@ static void jpeg_v2_5_dec_ring_set_wptr(struct amdgpu_ring *ring)
struct amdgpu_device *adev = ring->adev;
if (ring->use_doorbell) {
- adev->wb.wb[ring->wptr_offs] = lower_32_bits(ring->wptr);
+ *ring->wptr_cpu_addr = lower_32_bits(ring->wptr);
WDOORBELL32(ring->doorbell_index, lower_32_bits(ring->wptr));
} else {
WREG32_SOC15(JPEG, ring->me, mmUVD_JRBC_RB_WPTR, lower_32_bits(ring->wptr));
}
}
-static bool jpeg_v2_5_is_idle(void *handle)
+/**
+ * jpeg_v2_6_dec_ring_insert_start - insert a start command
+ *
+ * @ring: amdgpu_ring pointer
+ *
+ * Write a start command to the ring.
+ */
+static void jpeg_v2_6_dec_ring_insert_start(struct amdgpu_ring *ring)
+{
+ amdgpu_ring_write(ring, PACKETJ(mmUVD_JRBC_EXTERNAL_REG_INTERNAL_OFFSET,
+ 0, 0, PACKETJ_TYPE0));
+ amdgpu_ring_write(ring, 0x6aa04); /* PCTL0_MMHUB_DEEPSLEEP_IB */
+
+ amdgpu_ring_write(ring, PACKETJ(JRBC_DEC_EXTERNAL_REG_WRITE_ADDR,
+ 0, 0, PACKETJ_TYPE0));
+ amdgpu_ring_write(ring, 0x80000000 | (1 << (ring->me * 2 + 14)));
+}
+
+/**
+ * jpeg_v2_6_dec_ring_insert_end - insert a end command
+ *
+ * @ring: amdgpu_ring pointer
+ *
+ * Write a end command to the ring.
+ */
+static void jpeg_v2_6_dec_ring_insert_end(struct amdgpu_ring *ring)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ amdgpu_ring_write(ring, PACKETJ(mmUVD_JRBC_EXTERNAL_REG_INTERNAL_OFFSET,
+ 0, 0, PACKETJ_TYPE0));
+ amdgpu_ring_write(ring, 0x6aa04); /* PCTL0_MMHUB_DEEPSLEEP_IB */
+
+ amdgpu_ring_write(ring, PACKETJ(JRBC_DEC_EXTERNAL_REG_WRITE_ADDR,
+ 0, 0, PACKETJ_TYPE0));
+ amdgpu_ring_write(ring, (1 << (ring->me * 2 + 14)));
+}
+
+static bool jpeg_v2_5_is_idle(struct amdgpu_ip_block *ip_block)
+{
+ struct amdgpu_device *adev = ip_block->adev;
int i, ret = 1;
for (i = 0; i < adev->jpeg.num_jpeg_inst; ++i) {
@@ -442,9 +535,9 @@ static bool jpeg_v2_5_is_idle(void *handle)
return ret;
}
-static int jpeg_v2_5_wait_for_idle(void *handle)
+static int jpeg_v2_5_wait_for_idle(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
int i, ret;
for (i = 0; i < adev->jpeg.num_jpeg_inst; ++i) {
@@ -461,10 +554,10 @@ static int jpeg_v2_5_wait_for_idle(void *handle)
return 0;
}
-static int jpeg_v2_5_set_clockgating_state(void *handle,
+static int jpeg_v2_5_set_clockgating_state(struct amdgpu_ip_block *ip_block,
enum amd_clockgating_state state)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
bool enable = (state == AMD_CG_STATE_GATE);
int i;
@@ -473,7 +566,7 @@ static int jpeg_v2_5_set_clockgating_state(void *handle,
continue;
if (enable) {
- if (!jpeg_v2_5_is_idle(handle))
+ if (!jpeg_v2_5_is_idle(ip_block))
return -EBUSY;
jpeg_v2_5_enable_clock_gating(adev, i);
} else {
@@ -484,13 +577,13 @@ static int jpeg_v2_5_set_clockgating_state(void *handle,
return 0;
}
-static int jpeg_v2_5_set_powergating_state(void *handle,
+static int jpeg_v2_5_set_powergating_state(struct amdgpu_ip_block *ip_block,
enum amd_powergating_state state)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
int ret;
- if(state == adev->jpeg.cur_state)
+ if (state == adev->jpeg.cur_state)
return 0;
if (state == AMD_PG_STATE_GATE)
@@ -498,7 +591,7 @@ static int jpeg_v2_5_set_powergating_state(void *handle,
else
ret = jpeg_v2_5_start(adev);
- if(!ret)
+ if (!ret)
adev->jpeg.cur_state = state;
return ret;
@@ -512,6 +605,14 @@ static int jpeg_v2_5_set_interrupt_state(struct amdgpu_device *adev,
return 0;
}
+static int jpeg_v2_6_set_ras_interrupt_state(struct amdgpu_device *adev,
+ struct amdgpu_irq_src *source,
+ unsigned int type,
+ enum amdgpu_interrupt_state state)
+{
+ return 0;
+}
+
static int jpeg_v2_5_process_interrupt(struct amdgpu_device *adev,
struct amdgpu_irq_src *source,
struct amdgpu_iv_entry *entry)
@@ -534,7 +635,7 @@ static int jpeg_v2_5_process_interrupt(struct amdgpu_device *adev,
switch (entry->src_id) {
case VCN_2_0__SRCID__JPEG_DECODE:
- amdgpu_fence_process(&adev->jpeg.inst[ip_instance].ring_dec);
+ amdgpu_fence_process(adev->jpeg.inst[ip_instance].ring_dec);
break;
default:
DRM_ERROR("Unhandled interrupt: %d %d\n",
@@ -545,10 +646,19 @@ static int jpeg_v2_5_process_interrupt(struct amdgpu_device *adev,
return 0;
}
+static int jpeg_v2_5_ring_reset(struct amdgpu_ring *ring,
+ unsigned int vmid,
+ struct amdgpu_fence *timedout_fence)
+{
+ amdgpu_ring_reset_helper_begin(ring, timedout_fence);
+ jpeg_v2_5_stop_inst(ring->adev, ring->me);
+ jpeg_v2_5_start_inst(ring->adev, ring->me);
+ return amdgpu_ring_reset_helper_end(ring, timedout_fence);
+}
+
static const struct amd_ip_funcs jpeg_v2_5_ip_funcs = {
.name = "jpeg_v2_5",
.early_init = jpeg_v2_5_early_init,
- .late_init = NULL,
.sw_init = jpeg_v2_5_sw_init,
.sw_fini = jpeg_v2_5_sw_fini,
.hw_init = jpeg_v2_5_hw_init,
@@ -557,21 +667,36 @@ static const struct amd_ip_funcs jpeg_v2_5_ip_funcs = {
.resume = jpeg_v2_5_resume,
.is_idle = jpeg_v2_5_is_idle,
.wait_for_idle = jpeg_v2_5_wait_for_idle,
- .check_soft_reset = NULL,
- .pre_soft_reset = NULL,
- .soft_reset = NULL,
- .post_soft_reset = NULL,
.set_clockgating_state = jpeg_v2_5_set_clockgating_state,
.set_powergating_state = jpeg_v2_5_set_powergating_state,
+ .dump_ip_state = amdgpu_jpeg_dump_ip_state,
+ .print_ip_state = amdgpu_jpeg_print_ip_state,
+};
+
+static const struct amd_ip_funcs jpeg_v2_6_ip_funcs = {
+ .name = "jpeg_v2_6",
+ .early_init = jpeg_v2_5_early_init,
+ .sw_init = jpeg_v2_5_sw_init,
+ .sw_fini = jpeg_v2_5_sw_fini,
+ .hw_init = jpeg_v2_5_hw_init,
+ .hw_fini = jpeg_v2_5_hw_fini,
+ .suspend = jpeg_v2_5_suspend,
+ .resume = jpeg_v2_5_resume,
+ .is_idle = jpeg_v2_5_is_idle,
+ .wait_for_idle = jpeg_v2_5_wait_for_idle,
+ .set_clockgating_state = jpeg_v2_5_set_clockgating_state,
+ .set_powergating_state = jpeg_v2_5_set_powergating_state,
+ .dump_ip_state = amdgpu_jpeg_dump_ip_state,
+ .print_ip_state = amdgpu_jpeg_print_ip_state,
};
static const struct amdgpu_ring_funcs jpeg_v2_5_dec_ring_vm_funcs = {
.type = AMDGPU_RING_TYPE_VCN_JPEG,
.align_mask = 0xf,
- .vmhub = AMDGPU_MMHUB_1,
.get_rptr = jpeg_v2_5_dec_ring_get_rptr,
.get_wptr = jpeg_v2_5_dec_ring_get_wptr,
.set_wptr = jpeg_v2_5_dec_ring_set_wptr,
+ .parse_cs = amdgpu_jpeg_dec_parse_cs,
.emit_frame_size =
SOC15_FLUSH_GPU_TLB_NUM_WREG * 6 +
SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 8 +
@@ -593,6 +718,38 @@ static const struct amdgpu_ring_funcs jpeg_v2_5_dec_ring_vm_funcs = {
.emit_wreg = jpeg_v2_0_dec_ring_emit_wreg,
.emit_reg_wait = jpeg_v2_0_dec_ring_emit_reg_wait,
.emit_reg_write_reg_wait = amdgpu_ring_emit_reg_write_reg_wait_helper,
+ .reset = jpeg_v2_5_ring_reset,
+};
+
+static const struct amdgpu_ring_funcs jpeg_v2_6_dec_ring_vm_funcs = {
+ .type = AMDGPU_RING_TYPE_VCN_JPEG,
+ .align_mask = 0xf,
+ .get_rptr = jpeg_v2_5_dec_ring_get_rptr,
+ .get_wptr = jpeg_v2_5_dec_ring_get_wptr,
+ .set_wptr = jpeg_v2_5_dec_ring_set_wptr,
+ .parse_cs = amdgpu_jpeg_dec_parse_cs,
+ .emit_frame_size =
+ SOC15_FLUSH_GPU_TLB_NUM_WREG * 6 +
+ SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 8 +
+ 8 + /* jpeg_v2_5_dec_ring_emit_vm_flush */
+ 18 + 18 + /* jpeg_v2_5_dec_ring_emit_fence x2 vm fence */
+ 8 + 16,
+ .emit_ib_size = 22, /* jpeg_v2_5_dec_ring_emit_ib */
+ .emit_ib = jpeg_v2_0_dec_ring_emit_ib,
+ .emit_fence = jpeg_v2_0_dec_ring_emit_fence,
+ .emit_vm_flush = jpeg_v2_0_dec_ring_emit_vm_flush,
+ .test_ring = amdgpu_jpeg_dec_ring_test_ring,
+ .test_ib = amdgpu_jpeg_dec_ring_test_ib,
+ .insert_nop = jpeg_v2_0_dec_ring_nop,
+ .insert_start = jpeg_v2_6_dec_ring_insert_start,
+ .insert_end = jpeg_v2_6_dec_ring_insert_end,
+ .pad_ib = amdgpu_ring_generic_pad_ib,
+ .begin_use = amdgpu_jpeg_ring_begin_use,
+ .end_use = amdgpu_jpeg_ring_end_use,
+ .emit_wreg = jpeg_v2_0_dec_ring_emit_wreg,
+ .emit_reg_wait = jpeg_v2_0_dec_ring_emit_reg_wait,
+ .emit_reg_write_reg_wait = amdgpu_ring_emit_reg_write_reg_wait_helper,
+ .reset = jpeg_v2_5_ring_reset,
};
static void jpeg_v2_5_set_dec_ring_funcs(struct amdgpu_device *adev)
@@ -602,10 +759,11 @@ static void jpeg_v2_5_set_dec_ring_funcs(struct amdgpu_device *adev)
for (i = 0; i < adev->jpeg.num_jpeg_inst; ++i) {
if (adev->jpeg.harvest_config & (1 << i))
continue;
-
- adev->jpeg.inst[i].ring_dec.funcs = &jpeg_v2_5_dec_ring_vm_funcs;
- adev->jpeg.inst[i].ring_dec.me = i;
- DRM_INFO("JPEG(%d) JPEG decode is enabled in VM mode\n", i);
+ if (adev->asic_type == CHIP_ARCTURUS)
+ adev->jpeg.inst[i].ring_dec->funcs = &jpeg_v2_5_dec_ring_vm_funcs;
+ else /* CHIP_ALDEBARAN */
+ adev->jpeg.inst[i].ring_dec->funcs = &jpeg_v2_6_dec_ring_vm_funcs;
+ adev->jpeg.inst[i].ring_dec->me = i;
}
}
@@ -614,6 +772,11 @@ static const struct amdgpu_irq_src_funcs jpeg_v2_5_irq_funcs = {
.process = jpeg_v2_5_process_interrupt,
};
+static const struct amdgpu_irq_src_funcs jpeg_v2_6_ras_irq_funcs = {
+ .set = jpeg_v2_6_set_ras_interrupt_state,
+ .process = amdgpu_jpeg_process_poison_irq,
+};
+
static void jpeg_v2_5_set_irq_funcs(struct amdgpu_device *adev)
{
int i;
@@ -624,14 +787,83 @@ static void jpeg_v2_5_set_irq_funcs(struct amdgpu_device *adev)
adev->jpeg.inst[i].irq.num_types = 1;
adev->jpeg.inst[i].irq.funcs = &jpeg_v2_5_irq_funcs;
+
+ adev->jpeg.inst[i].ras_poison_irq.num_types = 1;
+ adev->jpeg.inst[i].ras_poison_irq.funcs = &jpeg_v2_6_ras_irq_funcs;
}
}
-const struct amdgpu_ip_block_version jpeg_v2_5_ip_block =
-{
+const struct amdgpu_ip_block_version jpeg_v2_5_ip_block = {
.type = AMD_IP_BLOCK_TYPE_JPEG,
.major = 2,
.minor = 5,
.rev = 0,
.funcs = &jpeg_v2_5_ip_funcs,
};
+
+const struct amdgpu_ip_block_version jpeg_v2_6_ip_block = {
+ .type = AMD_IP_BLOCK_TYPE_JPEG,
+ .major = 2,
+ .minor = 6,
+ .rev = 0,
+ .funcs = &jpeg_v2_6_ip_funcs,
+};
+
+static uint32_t jpeg_v2_6_query_poison_by_instance(struct amdgpu_device *adev,
+ uint32_t instance, uint32_t sub_block)
+{
+ uint32_t poison_stat = 0, reg_value = 0;
+
+ switch (sub_block) {
+ case AMDGPU_JPEG_V2_6_JPEG0:
+ reg_value = RREG32_SOC15(JPEG, instance, mmUVD_RAS_JPEG0_STATUS);
+ poison_stat = REG_GET_FIELD(reg_value, UVD_RAS_JPEG0_STATUS, POISONED_PF);
+ break;
+ case AMDGPU_JPEG_V2_6_JPEG1:
+ reg_value = RREG32_SOC15(JPEG, instance, mmUVD_RAS_JPEG1_STATUS);
+ poison_stat = REG_GET_FIELD(reg_value, UVD_RAS_JPEG1_STATUS, POISONED_PF);
+ break;
+ default:
+ break;
+ }
+
+ if (poison_stat)
+ dev_info(adev->dev, "Poison detected in JPEG%d sub_block%d\n",
+ instance, sub_block);
+
+ return poison_stat;
+}
+
+static bool jpeg_v2_6_query_ras_poison_status(struct amdgpu_device *adev)
+{
+ uint32_t inst = 0, sub = 0, poison_stat = 0;
+
+ for (inst = 0; inst < adev->jpeg.num_jpeg_inst; inst++)
+ for (sub = 0; sub < AMDGPU_JPEG_V2_6_MAX_SUB_BLOCK; sub++)
+ poison_stat +=
+ jpeg_v2_6_query_poison_by_instance(adev, inst, sub);
+
+ return !!poison_stat;
+}
+
+const struct amdgpu_ras_block_hw_ops jpeg_v2_6_ras_hw_ops = {
+ .query_poison_status = jpeg_v2_6_query_ras_poison_status,
+};
+
+static struct amdgpu_jpeg_ras jpeg_v2_6_ras = {
+ .ras_block = {
+ .hw_ops = &jpeg_v2_6_ras_hw_ops,
+ .ras_late_init = amdgpu_jpeg_ras_late_init,
+ },
+};
+
+static void jpeg_v2_5_set_ras_funcs(struct amdgpu_device *adev)
+{
+ switch (amdgpu_ip_version(adev, JPEG_HWIP, 0)) {
+ case IP_VERSION(2, 6, 0):
+ adev->jpeg.ras = &jpeg_v2_6_ras;
+ break;
+ default:
+ break;
+ }
+}
diff --git a/drivers/gpu/drm/amd/amdgpu/jpeg_v2_5.h b/drivers/gpu/drm/amd/amdgpu/jpeg_v2_5.h
index 2b4087c02620..1e858c6cdf13 100644
--- a/drivers/gpu/drm/amd/amdgpu/jpeg_v2_5.h
+++ b/drivers/gpu/drm/amd/amdgpu/jpeg_v2_5.h
@@ -24,6 +24,14 @@
#ifndef __JPEG_V2_5_H__
#define __JPEG_V2_5_H__
+enum amdgpu_jpeg_v2_6_sub_block {
+ AMDGPU_JPEG_V2_6_JPEG0 = 0,
+ AMDGPU_JPEG_V2_6_JPEG1,
+
+ AMDGPU_JPEG_V2_6_MAX_SUB_BLOCK,
+};
+
extern const struct amdgpu_ip_block_version jpeg_v2_5_ip_block;
+extern const struct amdgpu_ip_block_version jpeg_v2_6_ip_block;
#endif /* __JPEG_V2_5_H__ */
diff --git a/drivers/gpu/drm/amd/amdgpu/jpeg_v3_0.c b/drivers/gpu/drm/amd/amdgpu/jpeg_v3_0.c
index e8fbb2a0de34..d1a011c40ba2 100644
--- a/drivers/gpu/drm/amd/amdgpu/jpeg_v3_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/jpeg_v3_0.c
@@ -34,27 +34,53 @@
#define mmUVD_JPEG_PITCH_INTERNAL_OFFSET 0x401f
+static const struct amdgpu_hwip_reg_entry jpeg_reg_list_3_0[] = {
+ SOC15_REG_ENTRY_STR(JPEG, 0, mmUVD_JPEG_POWER_STATUS),
+ SOC15_REG_ENTRY_STR(JPEG, 0, mmUVD_JPEG_INT_STAT),
+ SOC15_REG_ENTRY_STR(JPEG, 0, mmUVD_JRBC_RB_RPTR),
+ SOC15_REG_ENTRY_STR(JPEG, 0, mmUVD_JRBC_RB_WPTR),
+ SOC15_REG_ENTRY_STR(JPEG, 0, mmUVD_JRBC_RB_CNTL),
+ SOC15_REG_ENTRY_STR(JPEG, 0, mmUVD_JRBC_RB_SIZE),
+ SOC15_REG_ENTRY_STR(JPEG, 0, mmUVD_JRBC_STATUS),
+ SOC15_REG_ENTRY_STR(JPEG, 0, mmJPEG_DEC_ADDR_MODE),
+ SOC15_REG_ENTRY_STR(JPEG, 0, mmJPEG_DEC_GFX10_ADDR_CONFIG),
+ SOC15_REG_ENTRY_STR(JPEG, 0, mmJPEG_DEC_Y_GFX10_TILING_SURFACE),
+ SOC15_REG_ENTRY_STR(JPEG, 0, mmJPEG_DEC_UV_GFX10_TILING_SURFACE),
+ SOC15_REG_ENTRY_STR(JPEG, 0, mmUVD_JPEG_PITCH),
+ SOC15_REG_ENTRY_STR(JPEG, 0, mmUVD_JPEG_UV_PITCH),
+};
+
static void jpeg_v3_0_set_dec_ring_funcs(struct amdgpu_device *adev);
static void jpeg_v3_0_set_irq_funcs(struct amdgpu_device *adev);
-static int jpeg_v3_0_set_powergating_state(void *handle,
+static int jpeg_v3_0_set_powergating_state(struct amdgpu_ip_block *ip_block,
enum amd_powergating_state state);
/**
* jpeg_v3_0_early_init - set function pointers
*
- * @handle: amdgpu_device pointer
+ * @ip_block: Pointer to the amdgpu_ip_block for this hw instance.
*
* Set ring and irq function pointers
*/
-static int jpeg_v3_0_early_init(void *handle)
+static int jpeg_v3_0_early_init(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
- u32 harvest = RREG32_SOC15(JPEG, 0, mmCC_UVD_HARVESTING);
+ struct amdgpu_device *adev = ip_block->adev;
- if (harvest & CC_UVD_HARVESTING__UVD_DISABLE_MASK)
- return -ENOENT;
+ u32 harvest;
+
+ switch (amdgpu_ip_version(adev, UVD_HWIP, 0)) {
+ case IP_VERSION(3, 1, 1):
+ case IP_VERSION(3, 1, 2):
+ break;
+ default:
+ harvest = RREG32_SOC15(JPEG, 0, mmCC_UVD_HARVESTING);
+ if (harvest & CC_UVD_HARVESTING__UVD_DISABLE_MASK)
+ return -ENOENT;
+ break;
+ }
adev->jpeg.num_jpeg_inst = 1;
+ adev->jpeg.num_jpeg_rings = 1;
jpeg_v3_0_set_dec_ring_funcs(adev);
jpeg_v3_0_set_irq_funcs(adev);
@@ -65,13 +91,13 @@ static int jpeg_v3_0_early_init(void *handle)
/**
* jpeg_v3_0_sw_init - sw init for JPEG block
*
- * @handle: amdgpu_device pointer
+ * @ip_block: Pointer to the amdgpu_ip_block for this hw instance.
*
* Load firmware and sw initialization
*/
-static int jpeg_v3_0_sw_init(void *handle)
+static int jpeg_v3_0_sw_init(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
struct amdgpu_ring *ring;
int r;
@@ -89,37 +115,50 @@ static int jpeg_v3_0_sw_init(void *handle)
if (r)
return r;
- ring = &adev->jpeg.inst->ring_dec;
+ ring = adev->jpeg.inst->ring_dec;
ring->use_doorbell = true;
ring->doorbell_index = (adev->doorbell_index.vcn.vcn_ring0_1 << 1) + 1;
+ ring->vm_hub = AMDGPU_MMHUB0(0);
sprintf(ring->name, "jpeg_dec");
r = amdgpu_ring_init(adev, ring, 512, &adev->jpeg.inst->irq, 0,
- AMDGPU_RING_PRIO_DEFAULT);
+ AMDGPU_RING_PRIO_DEFAULT, NULL);
if (r)
return r;
- adev->jpeg.internal.jpeg_pitch = mmUVD_JPEG_PITCH_INTERNAL_OFFSET;
- adev->jpeg.inst->external.jpeg_pitch = SOC15_REG_OFFSET(JPEG, 0, mmUVD_JPEG_PITCH);
+ adev->jpeg.internal.jpeg_pitch[0] = mmUVD_JPEG_PITCH_INTERNAL_OFFSET;
+ adev->jpeg.inst->external.jpeg_pitch[0] = SOC15_REG_OFFSET(JPEG, 0, mmUVD_JPEG_PITCH);
- return 0;
+ r = amdgpu_jpeg_reg_dump_init(adev, jpeg_reg_list_3_0, ARRAY_SIZE(jpeg_reg_list_3_0));
+ if (r)
+ return r;
+
+ adev->jpeg.supported_reset =
+ amdgpu_get_soft_full_reset_mask(adev->jpeg.inst[0].ring_dec);
+ if (!amdgpu_sriov_vf(adev))
+ adev->jpeg.supported_reset |= AMDGPU_RESET_TYPE_PER_QUEUE;
+ r = amdgpu_jpeg_sysfs_reset_mask_init(adev);
+
+ return r;
}
/**
* jpeg_v3_0_sw_fini - sw fini for JPEG block
*
- * @handle: amdgpu_device pointer
+ * @ip_block: Pointer to the amdgpu_ip_block for this hw instance.
*
* JPEG suspend and free up sw allocation
*/
-static int jpeg_v3_0_sw_fini(void *handle)
+static int jpeg_v3_0_sw_fini(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
int r;
r = amdgpu_jpeg_suspend(adev);
if (r)
return r;
+ amdgpu_jpeg_sysfs_reset_mask_fini(adev);
+
r = amdgpu_jpeg_sw_fini(adev);
return r;
@@ -128,45 +167,36 @@ static int jpeg_v3_0_sw_fini(void *handle)
/**
* jpeg_v3_0_hw_init - start and test JPEG block
*
- * @handle: amdgpu_device pointer
+ * @ip_block: Pointer to the amdgpu_ip_block for this hw instance.
*
*/
-static int jpeg_v3_0_hw_init(void *handle)
+static int jpeg_v3_0_hw_init(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
- struct amdgpu_ring *ring = &adev->jpeg.inst->ring_dec;
- int r;
+ struct amdgpu_device *adev = ip_block->adev;
+ struct amdgpu_ring *ring = adev->jpeg.inst->ring_dec;
adev->nbio.funcs->vcn_doorbell_range(adev, ring->use_doorbell,
(adev->doorbell_index.vcn.vcn_ring0_1 << 1), 0);
- r = amdgpu_ring_test_helper(ring);
- if (r)
- return r;
-
- DRM_INFO("JPEG decode initialized successfully.\n");
-
- return 0;
+ return amdgpu_ring_test_helper(ring);
}
/**
* jpeg_v3_0_hw_fini - stop the hardware block
*
- * @handle: amdgpu_device pointer
+ * @ip_block: Pointer to the amdgpu_ip_block for this hw instance.
*
* Stop the JPEG block, mark ring as not ready any more
*/
-static int jpeg_v3_0_hw_fini(void *handle)
+static int jpeg_v3_0_hw_fini(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
- struct amdgpu_ring *ring;
+ struct amdgpu_device *adev = ip_block->adev;
+
+ cancel_delayed_work_sync(&adev->jpeg.idle_work);
- ring = &adev->jpeg.inst->ring_dec;
if (adev->jpeg.cur_state != AMD_PG_STATE_GATE &&
RREG32_SOC15(JPEG, 0, mmUVD_JRBC_STATUS))
- jpeg_v3_0_set_powergating_state(adev, AMD_PG_STATE_GATE);
-
- ring->sched.ready = false;
+ jpeg_v3_0_set_powergating_state(ip_block, AMD_PG_STATE_GATE);
return 0;
}
@@ -174,20 +204,19 @@ static int jpeg_v3_0_hw_fini(void *handle)
/**
* jpeg_v3_0_suspend - suspend JPEG block
*
- * @handle: amdgpu_device pointer
+ * @ip_block: Pointer to the amdgpu_ip_block for this hw instance.
*
* HW fini and suspend JPEG block
*/
-static int jpeg_v3_0_suspend(void *handle)
+static int jpeg_v3_0_suspend(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
int r;
- r = jpeg_v3_0_hw_fini(adev);
+ r = jpeg_v3_0_hw_fini(ip_block);
if (r)
return r;
- r = amdgpu_jpeg_suspend(adev);
+ r = amdgpu_jpeg_suspend(ip_block->adev);
return r;
}
@@ -195,20 +224,19 @@ static int jpeg_v3_0_suspend(void *handle)
/**
* jpeg_v3_0_resume - resume JPEG block
*
- * @handle: amdgpu_device pointer
+ * @ip_block: Pointer to the amdgpu_ip_block for this hw instance.
*
* Resume firmware and hw init JPEG block
*/
-static int jpeg_v3_0_resume(void *handle)
+static int jpeg_v3_0_resume(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
int r;
- r = amdgpu_jpeg_resume(adev);
+ r = amdgpu_jpeg_resume(ip_block->adev);
if (r)
return r;
- r = jpeg_v3_0_hw_init(adev);
+ r = jpeg_v3_0_hw_init(ip_block);
return r;
}
@@ -322,7 +350,7 @@ static int jpeg_v3_0_enable_static_power_gating(struct amdgpu_device *adev)
*/
static int jpeg_v3_0_start(struct amdgpu_device *adev)
{
- struct amdgpu_ring *ring = &adev->jpeg.inst->ring_dec;
+ struct amdgpu_ring *ring = adev->jpeg.inst->ring_dec;
int r;
if (adev->pm.dpm_enabled)
@@ -421,7 +449,7 @@ static uint64_t jpeg_v3_0_dec_ring_get_wptr(struct amdgpu_ring *ring)
struct amdgpu_device *adev = ring->adev;
if (ring->use_doorbell)
- return adev->wb.wb[ring->wptr_offs];
+ return *ring->wptr_cpu_addr;
else
return RREG32_SOC15(JPEG, 0, mmUVD_JRBC_RB_WPTR);
}
@@ -438,16 +466,16 @@ static void jpeg_v3_0_dec_ring_set_wptr(struct amdgpu_ring *ring)
struct amdgpu_device *adev = ring->adev;
if (ring->use_doorbell) {
- adev->wb.wb[ring->wptr_offs] = lower_32_bits(ring->wptr);
+ *ring->wptr_cpu_addr = lower_32_bits(ring->wptr);
WDOORBELL32(ring->doorbell_index, lower_32_bits(ring->wptr));
} else {
WREG32_SOC15(JPEG, 0, mmUVD_JRBC_RB_WPTR, lower_32_bits(ring->wptr));
}
}
-static bool jpeg_v3_0_is_idle(void *handle)
+static bool jpeg_v3_0_is_idle(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
int ret = 1;
ret &= (((RREG32_SOC15(JPEG, 0, mmUVD_JRBC_STATUS) &
@@ -457,23 +485,23 @@ static bool jpeg_v3_0_is_idle(void *handle)
return ret;
}
-static int jpeg_v3_0_wait_for_idle(void *handle)
+static int jpeg_v3_0_wait_for_idle(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
return SOC15_WAIT_ON_RREG(JPEG, 0, mmUVD_JRBC_STATUS,
UVD_JRBC_STATUS__RB_JOB_DONE_MASK,
UVD_JRBC_STATUS__RB_JOB_DONE_MASK);
}
-static int jpeg_v3_0_set_clockgating_state(void *handle,
+static int jpeg_v3_0_set_clockgating_state(struct amdgpu_ip_block *ip_block,
enum amd_clockgating_state state)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
- bool enable = (state == AMD_CG_STATE_GATE) ? true : false;
+ struct amdgpu_device *adev = ip_block->adev;
+ bool enable = state == AMD_CG_STATE_GATE;
if (enable) {
- if (!jpeg_v3_0_is_idle(handle))
+ if (!jpeg_v3_0_is_idle(ip_block))
return -EBUSY;
jpeg_v3_0_enable_clock_gating(adev);
} else {
@@ -483,10 +511,10 @@ static int jpeg_v3_0_set_clockgating_state(void *handle,
return 0;
}
-static int jpeg_v3_0_set_powergating_state(void *handle,
+static int jpeg_v3_0_set_powergating_state(struct amdgpu_ip_block *ip_block,
enum amd_powergating_state state)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
int ret;
if(state == adev->jpeg.cur_state)
@@ -519,7 +547,7 @@ static int jpeg_v3_0_process_interrupt(struct amdgpu_device *adev,
switch (entry->src_id) {
case VCN_2_0__SRCID__JPEG_DECODE:
- amdgpu_fence_process(&adev->jpeg.inst->ring_dec);
+ amdgpu_fence_process(adev->jpeg.inst->ring_dec);
break;
default:
DRM_ERROR("Unhandled interrupt: %d %d\n",
@@ -530,10 +558,25 @@ static int jpeg_v3_0_process_interrupt(struct amdgpu_device *adev,
return 0;
}
+static int jpeg_v3_0_ring_reset(struct amdgpu_ring *ring,
+ unsigned int vmid,
+ struct amdgpu_fence *timedout_fence)
+{
+ int r;
+
+ amdgpu_ring_reset_helper_begin(ring, timedout_fence);
+ r = jpeg_v3_0_stop(ring->adev);
+ if (r)
+ return r;
+ r = jpeg_v3_0_start(ring->adev);
+ if (r)
+ return r;
+ return amdgpu_ring_reset_helper_end(ring, timedout_fence);
+}
+
static const struct amd_ip_funcs jpeg_v3_0_ip_funcs = {
.name = "jpeg_v3_0",
.early_init = jpeg_v3_0_early_init,
- .late_init = NULL,
.sw_init = jpeg_v3_0_sw_init,
.sw_fini = jpeg_v3_0_sw_fini,
.hw_init = jpeg_v3_0_hw_init,
@@ -542,21 +585,19 @@ static const struct amd_ip_funcs jpeg_v3_0_ip_funcs = {
.resume = jpeg_v3_0_resume,
.is_idle = jpeg_v3_0_is_idle,
.wait_for_idle = jpeg_v3_0_wait_for_idle,
- .check_soft_reset = NULL,
- .pre_soft_reset = NULL,
- .soft_reset = NULL,
- .post_soft_reset = NULL,
.set_clockgating_state = jpeg_v3_0_set_clockgating_state,
.set_powergating_state = jpeg_v3_0_set_powergating_state,
+ .dump_ip_state = amdgpu_jpeg_dump_ip_state,
+ .print_ip_state = amdgpu_jpeg_print_ip_state,
};
static const struct amdgpu_ring_funcs jpeg_v3_0_dec_ring_vm_funcs = {
.type = AMDGPU_RING_TYPE_VCN_JPEG,
.align_mask = 0xf,
- .vmhub = AMDGPU_MMHUB_0,
.get_rptr = jpeg_v3_0_dec_ring_get_rptr,
.get_wptr = jpeg_v3_0_dec_ring_get_wptr,
.set_wptr = jpeg_v3_0_dec_ring_set_wptr,
+ .parse_cs = amdgpu_jpeg_dec_parse_cs,
.emit_frame_size =
SOC15_FLUSH_GPU_TLB_NUM_WREG * 6 +
SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 8 +
@@ -578,12 +619,12 @@ static const struct amdgpu_ring_funcs jpeg_v3_0_dec_ring_vm_funcs = {
.emit_wreg = jpeg_v2_0_dec_ring_emit_wreg,
.emit_reg_wait = jpeg_v2_0_dec_ring_emit_reg_wait,
.emit_reg_write_reg_wait = amdgpu_ring_emit_reg_write_reg_wait_helper,
+ .reset = jpeg_v3_0_ring_reset,
};
static void jpeg_v3_0_set_dec_ring_funcs(struct amdgpu_device *adev)
{
- adev->jpeg.inst->ring_dec.funcs = &jpeg_v3_0_dec_ring_vm_funcs;
- DRM_INFO("JPEG decode is enabled in VM mode\n");
+ adev->jpeg.inst->ring_dec->funcs = &jpeg_v3_0_dec_ring_vm_funcs;
}
static const struct amdgpu_irq_src_funcs jpeg_v3_0_irq_funcs = {
diff --git a/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0.c b/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0.c
new file mode 100644
index 000000000000..33db2c1ae6cc
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0.c
@@ -0,0 +1,878 @@
+/*
+ * Copyright 2021 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#include "amdgpu.h"
+#include "amdgpu_jpeg.h"
+#include "amdgpu_pm.h"
+#include "soc15.h"
+#include "soc15d.h"
+#include "jpeg_v2_0.h"
+#include "jpeg_v4_0.h"
+#include "mmsch_v4_0.h"
+
+#include "vcn/vcn_4_0_0_offset.h"
+#include "vcn/vcn_4_0_0_sh_mask.h"
+#include "ivsrcid/vcn/irqsrcs_vcn_4_0.h"
+
+#define regUVD_JPEG_PITCH_INTERNAL_OFFSET 0x401f
+
+static const struct amdgpu_hwip_reg_entry jpeg_reg_list_4_0[] = {
+ SOC15_REG_ENTRY_STR(JPEG, 0, regUVD_JPEG_POWER_STATUS),
+ SOC15_REG_ENTRY_STR(JPEG, 0, regUVD_JPEG_INT_STAT),
+ SOC15_REG_ENTRY_STR(JPEG, 0, regUVD_JRBC_RB_RPTR),
+ SOC15_REG_ENTRY_STR(JPEG, 0, regUVD_JRBC_RB_WPTR),
+ SOC15_REG_ENTRY_STR(JPEG, 0, regUVD_JRBC_RB_CNTL),
+ SOC15_REG_ENTRY_STR(JPEG, 0, regUVD_JRBC_RB_SIZE),
+ SOC15_REG_ENTRY_STR(JPEG, 0, regUVD_JRBC_STATUS),
+ SOC15_REG_ENTRY_STR(JPEG, 0, regJPEG_DEC_ADDR_MODE),
+ SOC15_REG_ENTRY_STR(JPEG, 0, regJPEG_DEC_GFX10_ADDR_CONFIG),
+ SOC15_REG_ENTRY_STR(JPEG, 0, regJPEG_DEC_Y_GFX10_TILING_SURFACE),
+ SOC15_REG_ENTRY_STR(JPEG, 0, regJPEG_DEC_UV_GFX10_TILING_SURFACE),
+ SOC15_REG_ENTRY_STR(JPEG, 0, regUVD_JPEG_PITCH),
+ SOC15_REG_ENTRY_STR(JPEG, 0, regUVD_JPEG_UV_PITCH),
+};
+
+static int jpeg_v4_0_start_sriov(struct amdgpu_device *adev);
+static void jpeg_v4_0_set_dec_ring_funcs(struct amdgpu_device *adev);
+static void jpeg_v4_0_set_irq_funcs(struct amdgpu_device *adev);
+static int jpeg_v4_0_set_powergating_state(struct amdgpu_ip_block *ip_block,
+ enum amd_powergating_state state);
+static void jpeg_v4_0_set_ras_funcs(struct amdgpu_device *adev);
+static void jpeg_v4_0_dec_ring_set_wptr(struct amdgpu_ring *ring);
+
+/**
+ * jpeg_v4_0_early_init - set function pointers
+ *
+ * @ip_block: Pointer to the amdgpu_ip_block for this hw instance.
+ *
+ * Set ring and irq function pointers
+ */
+static int jpeg_v4_0_early_init(struct amdgpu_ip_block *ip_block)
+{
+ struct amdgpu_device *adev = ip_block->adev;
+
+
+ adev->jpeg.num_jpeg_inst = 1;
+ adev->jpeg.num_jpeg_rings = 1;
+
+ jpeg_v4_0_set_dec_ring_funcs(adev);
+ jpeg_v4_0_set_irq_funcs(adev);
+ jpeg_v4_0_set_ras_funcs(adev);
+
+ return 0;
+}
+
+/**
+ * jpeg_v4_0_sw_init - sw init for JPEG block
+ *
+ * @ip_block: Pointer to the amdgpu_ip_block for this hw instance.
+ *
+ * Load firmware and sw initialization
+ */
+static int jpeg_v4_0_sw_init(struct amdgpu_ip_block *ip_block)
+{
+ struct amdgpu_device *adev = ip_block->adev;
+ struct amdgpu_ring *ring;
+ int r;
+
+ /* JPEG TRAP */
+ r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_VCN,
+ VCN_4_0__SRCID__JPEG_DECODE, &adev->jpeg.inst->irq);
+ if (r)
+ return r;
+
+ /* JPEG DJPEG POISON EVENT */
+ r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_VCN,
+ VCN_4_0__SRCID_DJPEG0_POISON, &adev->jpeg.inst->ras_poison_irq);
+ if (r)
+ return r;
+
+ /* JPEG EJPEG POISON EVENT */
+ r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_VCN,
+ VCN_4_0__SRCID_EJPEG0_POISON, &adev->jpeg.inst->ras_poison_irq);
+ if (r)
+ return r;
+
+ r = amdgpu_jpeg_sw_init(adev);
+ if (r)
+ return r;
+
+ r = amdgpu_jpeg_resume(adev);
+ if (r)
+ return r;
+
+ ring = adev->jpeg.inst->ring_dec;
+ ring->use_doorbell = true;
+ ring->doorbell_index = amdgpu_sriov_vf(adev) ? (((adev->doorbell_index.vcn.vcn_ring0_1) << 1) + 4) : ((adev->doorbell_index.vcn.vcn_ring0_1 << 1) + 1);
+ ring->vm_hub = AMDGPU_MMHUB0(0);
+
+ sprintf(ring->name, "jpeg_dec");
+ r = amdgpu_ring_init(adev, ring, 512, &adev->jpeg.inst->irq, 0,
+ AMDGPU_RING_PRIO_DEFAULT, NULL);
+ if (r)
+ return r;
+
+ adev->jpeg.internal.jpeg_pitch[0] = regUVD_JPEG_PITCH_INTERNAL_OFFSET;
+ adev->jpeg.inst->external.jpeg_pitch[0] = SOC15_REG_OFFSET(JPEG, 0, regUVD_JPEG_PITCH);
+
+ r = amdgpu_jpeg_ras_sw_init(adev);
+ if (r)
+ return r;
+
+ r = amdgpu_jpeg_reg_dump_init(adev, jpeg_reg_list_4_0, ARRAY_SIZE(jpeg_reg_list_4_0));
+ if (r)
+ return r;
+
+ adev->jpeg.supported_reset =
+ amdgpu_get_soft_full_reset_mask(adev->jpeg.inst[0].ring_dec);
+ if (!amdgpu_sriov_vf(adev))
+ adev->jpeg.supported_reset |= AMDGPU_RESET_TYPE_PER_QUEUE;
+ r = amdgpu_jpeg_sysfs_reset_mask_init(adev);
+
+ return r;
+}
+
+/**
+ * jpeg_v4_0_sw_fini - sw fini for JPEG block
+ *
+ * @ip_block: Pointer to the amdgpu_ip_block for this hw instance.
+ *
+ * JPEG suspend and free up sw allocation
+ */
+static int jpeg_v4_0_sw_fini(struct amdgpu_ip_block *ip_block)
+{
+ struct amdgpu_device *adev = ip_block->adev;
+ int r;
+
+ r = amdgpu_jpeg_suspend(adev);
+ if (r)
+ return r;
+
+ amdgpu_jpeg_sysfs_reset_mask_fini(adev);
+ r = amdgpu_jpeg_sw_fini(adev);
+
+ return r;
+}
+
+/**
+ * jpeg_v4_0_hw_init - start and test JPEG block
+ *
+ * @ip_block: Pointer to the amdgpu_ip_block for this hw instance.
+ *
+ */
+static int jpeg_v4_0_hw_init(struct amdgpu_ip_block *ip_block)
+{
+ struct amdgpu_device *adev = ip_block->adev;
+ struct amdgpu_ring *ring = adev->jpeg.inst->ring_dec;
+ int r;
+
+ if (amdgpu_sriov_vf(adev)) {
+ r = jpeg_v4_0_start_sriov(adev);
+ if (r)
+ return r;
+ ring->wptr = 0;
+ ring->wptr_old = 0;
+ jpeg_v4_0_dec_ring_set_wptr(ring);
+ ring->sched.ready = true;
+ } else {
+ adev->nbio.funcs->vcn_doorbell_range(adev, ring->use_doorbell,
+ (adev->doorbell_index.vcn.vcn_ring0_1 << 1), 0);
+
+ WREG32_SOC15(VCN, 0, regVCN_JPEG_DB_CTRL,
+ ring->doorbell_index << VCN_JPEG_DB_CTRL__OFFSET__SHIFT |
+ VCN_JPEG_DB_CTRL__EN_MASK);
+
+ r = amdgpu_ring_test_helper(ring);
+ if (r)
+ return r;
+ }
+
+ return 0;
+}
+
+/**
+ * jpeg_v4_0_hw_fini - stop the hardware block
+ *
+ * @ip_block: Pointer to the amdgpu_ip_block for this hw instance.
+ *
+ * Stop the JPEG block, mark ring as not ready any more
+ */
+static int jpeg_v4_0_hw_fini(struct amdgpu_ip_block *ip_block)
+{
+ struct amdgpu_device *adev = ip_block->adev;
+
+ cancel_delayed_work_sync(&adev->jpeg.idle_work);
+ if (!amdgpu_sriov_vf(adev)) {
+ if (adev->jpeg.cur_state != AMD_PG_STATE_GATE &&
+ RREG32_SOC15(JPEG, 0, regUVD_JRBC_STATUS))
+ jpeg_v4_0_set_powergating_state(ip_block, AMD_PG_STATE_GATE);
+ }
+ if (amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__JPEG))
+ amdgpu_irq_put(adev, &adev->jpeg.inst->ras_poison_irq, 0);
+
+ return 0;
+}
+
+/**
+ * jpeg_v4_0_suspend - suspend JPEG block
+ *
+ * @ip_block: Pointer to the amdgpu_ip_block for this hw instance.
+ *
+ * HW fini and suspend JPEG block
+ */
+static int jpeg_v4_0_suspend(struct amdgpu_ip_block *ip_block)
+{
+ int r;
+
+ r = jpeg_v4_0_hw_fini(ip_block);
+ if (r)
+ return r;
+
+ r = amdgpu_jpeg_suspend(ip_block->adev);
+
+ return r;
+}
+
+/**
+ * jpeg_v4_0_resume - resume JPEG block
+ *
+ * @ip_block: Pointer to the amdgpu_ip_block for this hw instance.
+ *
+ * Resume firmware and hw init JPEG block
+ */
+static int jpeg_v4_0_resume(struct amdgpu_ip_block *ip_block)
+{
+ int r;
+
+ r = amdgpu_jpeg_resume(ip_block->adev);
+ if (r)
+ return r;
+
+ r = jpeg_v4_0_hw_init(ip_block);
+
+ return r;
+}
+
+static void jpeg_v4_0_disable_clock_gating(struct amdgpu_device *adev)
+{
+ uint32_t data = 0;
+
+ data = RREG32_SOC15(JPEG, 0, regJPEG_CGC_CTRL);
+ if (adev->cg_flags & AMD_CG_SUPPORT_JPEG_MGCG) {
+ data |= 1 << JPEG_CGC_CTRL__DYN_CLOCK_MODE__SHIFT;
+ data &= (~JPEG_CGC_CTRL__JPEG_DEC_MODE_MASK);
+ } else {
+ data &= ~JPEG_CGC_CTRL__DYN_CLOCK_MODE__SHIFT;
+ }
+
+ data |= 1 << JPEG_CGC_CTRL__CLK_GATE_DLY_TIMER__SHIFT;
+ data |= 4 << JPEG_CGC_CTRL__CLK_OFF_DELAY__SHIFT;
+ WREG32_SOC15(JPEG, 0, regJPEG_CGC_CTRL, data);
+
+ data = RREG32_SOC15(JPEG, 0, regJPEG_CGC_GATE);
+ data &= ~(JPEG_CGC_GATE__JPEG_DEC_MASK
+ | JPEG_CGC_GATE__JPEG2_DEC_MASK
+ | JPEG_CGC_GATE__JMCIF_MASK
+ | JPEG_CGC_GATE__JRBBM_MASK);
+ WREG32_SOC15(JPEG, 0, regJPEG_CGC_GATE, data);
+}
+
+static void jpeg_v4_0_enable_clock_gating(struct amdgpu_device *adev)
+{
+ uint32_t data = 0;
+
+ data = RREG32_SOC15(JPEG, 0, regJPEG_CGC_CTRL);
+ if (adev->cg_flags & AMD_CG_SUPPORT_JPEG_MGCG) {
+ data |= 1 << JPEG_CGC_CTRL__DYN_CLOCK_MODE__SHIFT;
+ data |= JPEG_CGC_CTRL__JPEG_DEC_MODE_MASK;
+ } else {
+ data &= ~JPEG_CGC_CTRL__DYN_CLOCK_MODE__SHIFT;
+ }
+
+ data |= 1 << JPEG_CGC_CTRL__CLK_GATE_DLY_TIMER__SHIFT;
+ data |= 4 << JPEG_CGC_CTRL__CLK_OFF_DELAY__SHIFT;
+ WREG32_SOC15(JPEG, 0, regJPEG_CGC_CTRL, data);
+
+ data = RREG32_SOC15(JPEG, 0, regJPEG_CGC_GATE);
+ data |= (JPEG_CGC_GATE__JPEG_DEC_MASK
+ |JPEG_CGC_GATE__JPEG2_DEC_MASK
+ |JPEG_CGC_GATE__JMCIF_MASK
+ |JPEG_CGC_GATE__JRBBM_MASK);
+ WREG32_SOC15(JPEG, 0, regJPEG_CGC_GATE, data);
+}
+
+static int jpeg_v4_0_disable_static_power_gating(struct amdgpu_device *adev)
+{
+ if (adev->pg_flags & AMD_PG_SUPPORT_JPEG) {
+ uint32_t data = 0;
+ int r = 0;
+
+ data = 1 << UVD_PGFSM_CONFIG__UVDJ_PWR_CONFIG__SHIFT;
+ WREG32(SOC15_REG_OFFSET(JPEG, 0, regUVD_PGFSM_CONFIG), data);
+
+ r = SOC15_WAIT_ON_RREG(JPEG, 0,
+ regUVD_PGFSM_STATUS, UVD_PGFSM_STATUS_UVDJ_PWR_ON,
+ UVD_PGFSM_STATUS__UVDJ_PWR_STATUS_MASK);
+
+ if (r) {
+ DRM_DEV_ERROR(adev->dev, "amdgpu: JPEG disable power gating failed\n");
+ return r;
+ }
+ }
+
+ /* disable anti hang mechanism */
+ WREG32_P(SOC15_REG_OFFSET(JPEG, 0, regUVD_JPEG_POWER_STATUS), 0,
+ ~UVD_JPEG_POWER_STATUS__JPEG_POWER_STATUS_MASK);
+
+ /* keep the JPEG in static PG mode */
+ WREG32_P(SOC15_REG_OFFSET(JPEG, 0, regUVD_JPEG_POWER_STATUS), 0,
+ ~UVD_JPEG_POWER_STATUS__JPEG_PG_MODE_MASK);
+
+ return 0;
+}
+
+static int jpeg_v4_0_enable_static_power_gating(struct amdgpu_device *adev)
+{
+ /* enable anti hang mechanism */
+ WREG32_P(SOC15_REG_OFFSET(JPEG, 0, regUVD_JPEG_POWER_STATUS),
+ UVD_JPEG_POWER_STATUS__JPEG_POWER_STATUS_MASK,
+ ~UVD_JPEG_POWER_STATUS__JPEG_POWER_STATUS_MASK);
+
+ if (adev->pg_flags & AMD_PG_SUPPORT_JPEG) {
+ uint32_t data = 0;
+ int r = 0;
+
+ data = 2 << UVD_PGFSM_CONFIG__UVDJ_PWR_CONFIG__SHIFT;
+ WREG32(SOC15_REG_OFFSET(JPEG, 0, regUVD_PGFSM_CONFIG), data);
+
+ r = SOC15_WAIT_ON_RREG(JPEG, 0, regUVD_PGFSM_STATUS,
+ (2 << UVD_PGFSM_STATUS__UVDJ_PWR_STATUS__SHIFT),
+ UVD_PGFSM_STATUS__UVDJ_PWR_STATUS_MASK);
+
+ if (r) {
+ DRM_DEV_ERROR(adev->dev, "amdgpu: JPEG enable power gating failed\n");
+ return r;
+ }
+ }
+
+ return 0;
+}
+
+/**
+ * jpeg_v4_0_start - start JPEG block
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * Setup and start the JPEG block
+ */
+static int jpeg_v4_0_start(struct amdgpu_device *adev)
+{
+ struct amdgpu_ring *ring = adev->jpeg.inst->ring_dec;
+ int r;
+
+ if (adev->pm.dpm_enabled)
+ amdgpu_dpm_enable_jpeg(adev, true);
+
+ /* disable power gating */
+ r = jpeg_v4_0_disable_static_power_gating(adev);
+ if (r)
+ return r;
+
+ /* JPEG disable CGC */
+ jpeg_v4_0_disable_clock_gating(adev);
+
+ /* MJPEG global tiling registers */
+ WREG32_SOC15(JPEG, 0, regJPEG_DEC_GFX10_ADDR_CONFIG,
+ adev->gfx.config.gb_addr_config);
+
+
+ /* enable JMI channel */
+ WREG32_P(SOC15_REG_OFFSET(JPEG, 0, regUVD_JMI_CNTL), 0,
+ ~UVD_JMI_CNTL__SOFT_RESET_MASK);
+
+ /* enable System Interrupt for JRBC */
+ WREG32_P(SOC15_REG_OFFSET(JPEG, 0, regJPEG_SYS_INT_EN),
+ JPEG_SYS_INT_EN__DJRBC_MASK,
+ ~JPEG_SYS_INT_EN__DJRBC_MASK);
+
+ WREG32_SOC15(JPEG, 0, regUVD_LMI_JRBC_RB_VMID, 0);
+ WREG32_SOC15(JPEG, 0, regUVD_JRBC_RB_CNTL, (0x00000001L | 0x00000002L));
+ WREG32_SOC15(JPEG, 0, regUVD_LMI_JRBC_RB_64BIT_BAR_LOW,
+ lower_32_bits(ring->gpu_addr));
+ WREG32_SOC15(JPEG, 0, regUVD_LMI_JRBC_RB_64BIT_BAR_HIGH,
+ upper_32_bits(ring->gpu_addr));
+ WREG32_SOC15(JPEG, 0, regUVD_JRBC_RB_RPTR, 0);
+ WREG32_SOC15(JPEG, 0, regUVD_JRBC_RB_WPTR, 0);
+ WREG32_SOC15(JPEG, 0, regUVD_JRBC_RB_CNTL, 0x00000002L);
+ WREG32_SOC15(JPEG, 0, regUVD_JRBC_RB_SIZE, ring->ring_size / 4);
+ ring->wptr = RREG32_SOC15(JPEG, 0, regUVD_JRBC_RB_WPTR);
+
+ return 0;
+}
+
+static int jpeg_v4_0_start_sriov(struct amdgpu_device *adev)
+{
+ struct amdgpu_ring *ring;
+ uint64_t ctx_addr;
+ uint32_t param, resp, expected;
+ uint32_t tmp, timeout;
+
+ struct amdgpu_mm_table *table = &adev->virt.mm_table;
+ uint32_t *table_loc;
+ uint32_t table_size;
+ uint32_t size, size_dw;
+ uint32_t init_status;
+
+ struct mmsch_v4_0_cmd_direct_write
+ direct_wt = { {0} };
+ struct mmsch_v4_0_cmd_end end = { {0} };
+ struct mmsch_v4_0_init_header header;
+
+ direct_wt.cmd_header.command_type =
+ MMSCH_COMMAND__DIRECT_REG_WRITE;
+ end.cmd_header.command_type =
+ MMSCH_COMMAND__END;
+
+ size = sizeof(struct mmsch_v4_0_init_header);
+ table_loc = (uint32_t *)table->cpu_addr;
+ memcpy(&header, (void *)table_loc, size);
+
+ header.version = MMSCH_VERSION;
+ header.total_size = RREG32_SOC15(VCN, 0, regMMSCH_VF_CTX_SIZE);
+
+ header.jpegdec.init_status = 0;
+ header.jpegdec.table_offset = 0;
+ header.jpegdec.table_size = 0;
+
+ table_loc = (uint32_t *)table->cpu_addr;
+ table_loc += header.total_size;
+
+ table_size = 0;
+
+ ring = adev->jpeg.inst->ring_dec;
+
+ MMSCH_V4_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(JPEG, 0,
+ regUVD_LMI_JRBC_RB_64BIT_BAR_LOW),
+ lower_32_bits(ring->gpu_addr));
+ MMSCH_V4_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(JPEG, 0,
+ regUVD_LMI_JRBC_RB_64BIT_BAR_HIGH),
+ upper_32_bits(ring->gpu_addr));
+ MMSCH_V4_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(JPEG, 0,
+ regUVD_JRBC_RB_SIZE), ring->ring_size / 4);
+
+ /* add end packet */
+ MMSCH_V4_0_INSERT_END();
+
+ /* refine header */
+ header.jpegdec.init_status = 0;
+ header.jpegdec.table_offset = header.total_size;
+ header.jpegdec.table_size = table_size;
+ header.total_size += table_size;
+
+ /* Update init table header in memory */
+ size = sizeof(struct mmsch_v4_0_init_header);
+ table_loc = (uint32_t *)table->cpu_addr;
+ memcpy((void *)table_loc, &header, size);
+
+ /* Perform HDP flush before writing to MMSCH registers */
+ amdgpu_device_flush_hdp(adev, NULL);
+
+ /* message MMSCH (in VCN[0]) to initialize this client
+ * 1, write to mmsch_vf_ctx_addr_lo/hi register with GPU mc addr
+ * of memory descriptor location
+ */
+ ctx_addr = table->gpu_addr;
+ WREG32_SOC15(VCN, 0, regMMSCH_VF_CTX_ADDR_LO, lower_32_bits(ctx_addr));
+ WREG32_SOC15(VCN, 0, regMMSCH_VF_CTX_ADDR_HI, upper_32_bits(ctx_addr));
+
+ /* 2, update vmid of descriptor */
+ tmp = RREG32_SOC15(VCN, 0, regMMSCH_VF_VMID);
+ tmp &= ~MMSCH_VF_VMID__VF_CTX_VMID_MASK;
+ /* use domain0 for MM scheduler */
+ tmp |= (0 << MMSCH_VF_VMID__VF_CTX_VMID__SHIFT);
+ WREG32_SOC15(VCN, 0, regMMSCH_VF_VMID, tmp);
+
+ /* 3, notify mmsch about the size of this descriptor */
+ size = header.total_size;
+ WREG32_SOC15(VCN, 0, regMMSCH_VF_CTX_SIZE, size);
+
+ /* 4, set resp to zero */
+ WREG32_SOC15(VCN, 0, regMMSCH_VF_MAILBOX_RESP, 0);
+
+ /* 5, kick off the initialization and wait until
+ * MMSCH_VF_MAILBOX_RESP becomes non-zero
+ */
+ param = 0x00000001;
+ WREG32_SOC15(VCN, 0, regMMSCH_VF_MAILBOX_HOST, param);
+ tmp = 0;
+ timeout = 1000;
+ resp = 0;
+ expected = MMSCH_VF_MAILBOX_RESP__OK;
+ init_status = ((struct mmsch_v4_0_init_header *)(table_loc))->jpegdec.init_status;
+ while (resp != expected) {
+ resp = RREG32_SOC15(VCN, 0, regMMSCH_VF_MAILBOX_RESP);
+
+ if (resp != 0)
+ break;
+ udelay(10);
+ tmp = tmp + 10;
+ if (tmp >= timeout) {
+ DRM_ERROR("failed to init MMSCH. TIME-OUT after %d usec"\
+ " waiting for regMMSCH_VF_MAILBOX_RESP "\
+ "(expected=0x%08x, readback=0x%08x)\n",
+ tmp, expected, resp);
+ return -EBUSY;
+ }
+ }
+ if (resp != expected && resp != MMSCH_VF_MAILBOX_RESP__INCOMPLETE
+ && init_status != MMSCH_VF_ENGINE_STATUS__PASS) {
+ DRM_ERROR("MMSCH init status is incorrect! readback=0x%08x, header init status for jpeg: %x\n", resp, init_status);
+ return -EINVAL;
+ }
+
+ return 0;
+
+}
+
+/**
+ * jpeg_v4_0_stop - stop JPEG block
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * stop the JPEG block
+ */
+static int jpeg_v4_0_stop(struct amdgpu_device *adev)
+{
+ int r;
+
+ /* reset JMI */
+ WREG32_P(SOC15_REG_OFFSET(JPEG, 0, regUVD_JMI_CNTL),
+ UVD_JMI_CNTL__SOFT_RESET_MASK,
+ ~UVD_JMI_CNTL__SOFT_RESET_MASK);
+
+ jpeg_v4_0_enable_clock_gating(adev);
+
+ /* enable power gating */
+ r = jpeg_v4_0_enable_static_power_gating(adev);
+ if (r)
+ return r;
+
+ if (adev->pm.dpm_enabled)
+ amdgpu_dpm_enable_jpeg(adev, false);
+
+ return 0;
+}
+
+/**
+ * jpeg_v4_0_dec_ring_get_rptr - get read pointer
+ *
+ * @ring: amdgpu_ring pointer
+ *
+ * Returns the current hardware read pointer
+ */
+static uint64_t jpeg_v4_0_dec_ring_get_rptr(struct amdgpu_ring *ring)
+{
+ struct amdgpu_device *adev = ring->adev;
+
+ return RREG32_SOC15(JPEG, 0, regUVD_JRBC_RB_RPTR);
+}
+
+/**
+ * jpeg_v4_0_dec_ring_get_wptr - get write pointer
+ *
+ * @ring: amdgpu_ring pointer
+ *
+ * Returns the current hardware write pointer
+ */
+static uint64_t jpeg_v4_0_dec_ring_get_wptr(struct amdgpu_ring *ring)
+{
+ struct amdgpu_device *adev = ring->adev;
+
+ if (ring->use_doorbell)
+ return *ring->wptr_cpu_addr;
+ else
+ return RREG32_SOC15(JPEG, 0, regUVD_JRBC_RB_WPTR);
+}
+
+/**
+ * jpeg_v4_0_dec_ring_set_wptr - set write pointer
+ *
+ * @ring: amdgpu_ring pointer
+ *
+ * Commits the write pointer to the hardware
+ */
+static void jpeg_v4_0_dec_ring_set_wptr(struct amdgpu_ring *ring)
+{
+ struct amdgpu_device *adev = ring->adev;
+
+ if (ring->use_doorbell) {
+ *ring->wptr_cpu_addr = lower_32_bits(ring->wptr);
+ WDOORBELL32(ring->doorbell_index, lower_32_bits(ring->wptr));
+ } else {
+ WREG32_SOC15(JPEG, 0, regUVD_JRBC_RB_WPTR, lower_32_bits(ring->wptr));
+ }
+}
+
+static bool jpeg_v4_0_is_idle(struct amdgpu_ip_block *ip_block)
+{
+ struct amdgpu_device *adev = ip_block->adev;
+ int ret = 1;
+
+ ret &= (((RREG32_SOC15(JPEG, 0, regUVD_JRBC_STATUS) &
+ UVD_JRBC_STATUS__RB_JOB_DONE_MASK) ==
+ UVD_JRBC_STATUS__RB_JOB_DONE_MASK));
+
+ return ret;
+}
+
+static int jpeg_v4_0_wait_for_idle(struct amdgpu_ip_block *ip_block)
+{
+ struct amdgpu_device *adev = ip_block->adev;
+
+ return SOC15_WAIT_ON_RREG(JPEG, 0, regUVD_JRBC_STATUS,
+ UVD_JRBC_STATUS__RB_JOB_DONE_MASK,
+ UVD_JRBC_STATUS__RB_JOB_DONE_MASK);
+}
+
+static int jpeg_v4_0_set_clockgating_state(struct amdgpu_ip_block *ip_block,
+ enum amd_clockgating_state state)
+{
+ struct amdgpu_device *adev = ip_block->adev;
+ bool enable = state == AMD_CG_STATE_GATE;
+
+ if (enable) {
+ if (!jpeg_v4_0_is_idle(ip_block))
+ return -EBUSY;
+ jpeg_v4_0_enable_clock_gating(adev);
+ } else {
+ jpeg_v4_0_disable_clock_gating(adev);
+ }
+
+ return 0;
+}
+
+static int jpeg_v4_0_set_powergating_state(struct amdgpu_ip_block *ip_block,
+ enum amd_powergating_state state)
+{
+ struct amdgpu_device *adev = ip_block->adev;
+ int ret;
+
+ if (amdgpu_sriov_vf(adev)) {
+ adev->jpeg.cur_state = AMD_PG_STATE_UNGATE;
+ return 0;
+ }
+
+ if (state == adev->jpeg.cur_state)
+ return 0;
+
+ if (state == AMD_PG_STATE_GATE)
+ ret = jpeg_v4_0_stop(adev);
+ else
+ ret = jpeg_v4_0_start(adev);
+
+ if (!ret)
+ adev->jpeg.cur_state = state;
+
+ return ret;
+}
+
+static int jpeg_v4_0_set_ras_interrupt_state(struct amdgpu_device *adev,
+ struct amdgpu_irq_src *source,
+ unsigned int type,
+ enum amdgpu_interrupt_state state)
+{
+ return 0;
+}
+
+static int jpeg_v4_0_process_interrupt(struct amdgpu_device *adev,
+ struct amdgpu_irq_src *source,
+ struct amdgpu_iv_entry *entry)
+{
+ DRM_DEBUG("IH: JPEG TRAP\n");
+
+ switch (entry->src_id) {
+ case VCN_4_0__SRCID__JPEG_DECODE:
+ amdgpu_fence_process(adev->jpeg.inst->ring_dec);
+ break;
+ default:
+ DRM_DEV_ERROR(adev->dev, "Unhandled interrupt: %d %d\n",
+ entry->src_id, entry->src_data[0]);
+ break;
+ }
+
+ return 0;
+}
+
+static int jpeg_v4_0_ring_reset(struct amdgpu_ring *ring,
+ unsigned int vmid,
+ struct amdgpu_fence *timedout_fence)
+{
+ int r;
+
+ amdgpu_ring_reset_helper_begin(ring, timedout_fence);
+ r = jpeg_v4_0_stop(ring->adev);
+ if (r)
+ return r;
+ r = jpeg_v4_0_start(ring->adev);
+ if (r)
+ return r;
+ return amdgpu_ring_reset_helper_end(ring, timedout_fence);
+}
+
+static const struct amd_ip_funcs jpeg_v4_0_ip_funcs = {
+ .name = "jpeg_v4_0",
+ .early_init = jpeg_v4_0_early_init,
+ .sw_init = jpeg_v4_0_sw_init,
+ .sw_fini = jpeg_v4_0_sw_fini,
+ .hw_init = jpeg_v4_0_hw_init,
+ .hw_fini = jpeg_v4_0_hw_fini,
+ .suspend = jpeg_v4_0_suspend,
+ .resume = jpeg_v4_0_resume,
+ .is_idle = jpeg_v4_0_is_idle,
+ .wait_for_idle = jpeg_v4_0_wait_for_idle,
+ .set_clockgating_state = jpeg_v4_0_set_clockgating_state,
+ .set_powergating_state = jpeg_v4_0_set_powergating_state,
+ .dump_ip_state = amdgpu_jpeg_dump_ip_state,
+ .print_ip_state = amdgpu_jpeg_print_ip_state,
+};
+
+static const struct amdgpu_ring_funcs jpeg_v4_0_dec_ring_vm_funcs = {
+ .type = AMDGPU_RING_TYPE_VCN_JPEG,
+ .align_mask = 0xf,
+ .get_rptr = jpeg_v4_0_dec_ring_get_rptr,
+ .get_wptr = jpeg_v4_0_dec_ring_get_wptr,
+ .set_wptr = jpeg_v4_0_dec_ring_set_wptr,
+ .parse_cs = amdgpu_jpeg_dec_parse_cs,
+ .emit_frame_size =
+ SOC15_FLUSH_GPU_TLB_NUM_WREG * 6 +
+ SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 8 +
+ 8 + /* jpeg_v4_0_dec_ring_emit_vm_flush */
+ 18 + 18 + /* jpeg_v4_0_dec_ring_emit_fence x2 vm fence */
+ 8 + 16,
+ .emit_ib_size = 22, /* jpeg_v4_0_dec_ring_emit_ib */
+ .emit_ib = jpeg_v2_0_dec_ring_emit_ib,
+ .emit_fence = jpeg_v2_0_dec_ring_emit_fence,
+ .emit_vm_flush = jpeg_v2_0_dec_ring_emit_vm_flush,
+ .test_ring = amdgpu_jpeg_dec_ring_test_ring,
+ .test_ib = amdgpu_jpeg_dec_ring_test_ib,
+ .insert_nop = jpeg_v2_0_dec_ring_nop,
+ .insert_start = jpeg_v2_0_dec_ring_insert_start,
+ .insert_end = jpeg_v2_0_dec_ring_insert_end,
+ .pad_ib = amdgpu_ring_generic_pad_ib,
+ .begin_use = amdgpu_jpeg_ring_begin_use,
+ .end_use = amdgpu_jpeg_ring_end_use,
+ .emit_wreg = jpeg_v2_0_dec_ring_emit_wreg,
+ .emit_reg_wait = jpeg_v2_0_dec_ring_emit_reg_wait,
+ .emit_reg_write_reg_wait = amdgpu_ring_emit_reg_write_reg_wait_helper,
+ .reset = jpeg_v4_0_ring_reset,
+};
+
+static void jpeg_v4_0_set_dec_ring_funcs(struct amdgpu_device *adev)
+{
+ adev->jpeg.inst->ring_dec->funcs = &jpeg_v4_0_dec_ring_vm_funcs;
+}
+
+static const struct amdgpu_irq_src_funcs jpeg_v4_0_irq_funcs = {
+ .process = jpeg_v4_0_process_interrupt,
+};
+
+static const struct amdgpu_irq_src_funcs jpeg_v4_0_ras_irq_funcs = {
+ .set = jpeg_v4_0_set_ras_interrupt_state,
+ .process = amdgpu_jpeg_process_poison_irq,
+};
+
+static void jpeg_v4_0_set_irq_funcs(struct amdgpu_device *adev)
+{
+ adev->jpeg.inst->irq.num_types = 1;
+ adev->jpeg.inst->irq.funcs = &jpeg_v4_0_irq_funcs;
+
+ adev->jpeg.inst->ras_poison_irq.num_types = 1;
+ adev->jpeg.inst->ras_poison_irq.funcs = &jpeg_v4_0_ras_irq_funcs;
+}
+
+const struct amdgpu_ip_block_version jpeg_v4_0_ip_block = {
+ .type = AMD_IP_BLOCK_TYPE_JPEG,
+ .major = 4,
+ .minor = 0,
+ .rev = 0,
+ .funcs = &jpeg_v4_0_ip_funcs,
+};
+
+static uint32_t jpeg_v4_0_query_poison_by_instance(struct amdgpu_device *adev,
+ uint32_t instance, uint32_t sub_block)
+{
+ uint32_t poison_stat = 0, reg_value = 0;
+
+ switch (sub_block) {
+ case AMDGPU_JPEG_V4_0_JPEG0:
+ reg_value = RREG32_SOC15(JPEG, instance, regUVD_RAS_JPEG0_STATUS);
+ poison_stat = REG_GET_FIELD(reg_value, UVD_RAS_JPEG0_STATUS, POISONED_PF);
+ break;
+ case AMDGPU_JPEG_V4_0_JPEG1:
+ reg_value = RREG32_SOC15(JPEG, instance, regUVD_RAS_JPEG1_STATUS);
+ poison_stat = REG_GET_FIELD(reg_value, UVD_RAS_JPEG1_STATUS, POISONED_PF);
+ break;
+ default:
+ break;
+ }
+
+ if (poison_stat)
+ dev_info(adev->dev, "Poison detected in JPEG%d sub_block%d\n",
+ instance, sub_block);
+
+ return poison_stat;
+}
+
+static bool jpeg_v4_0_query_ras_poison_status(struct amdgpu_device *adev)
+{
+ uint32_t inst = 0, sub = 0, poison_stat = 0;
+
+ for (inst = 0; inst < adev->jpeg.num_jpeg_inst; inst++)
+ for (sub = 0; sub < AMDGPU_JPEG_V4_0_MAX_SUB_BLOCK; sub++)
+ poison_stat +=
+ jpeg_v4_0_query_poison_by_instance(adev, inst, sub);
+
+ return !!poison_stat;
+}
+
+const struct amdgpu_ras_block_hw_ops jpeg_v4_0_ras_hw_ops = {
+ .query_poison_status = jpeg_v4_0_query_ras_poison_status,
+};
+
+static struct amdgpu_jpeg_ras jpeg_v4_0_ras = {
+ .ras_block = {
+ .hw_ops = &jpeg_v4_0_ras_hw_ops,
+ .ras_late_init = amdgpu_jpeg_ras_late_init,
+ },
+};
+
+static void jpeg_v4_0_set_ras_funcs(struct amdgpu_device *adev)
+{
+ switch (amdgpu_ip_version(adev, JPEG_HWIP, 0)) {
+ case IP_VERSION(4, 0, 0):
+ adev->jpeg.ras = &jpeg_v4_0_ras;
+ break;
+ default:
+ break;
+ }
+}
diff --git a/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0.h b/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0.h
new file mode 100644
index 000000000000..47638fd4d4e2
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0.h
@@ -0,0 +1,35 @@
+/*
+ * Copyright 2021 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#ifndef __JPEG_V4_0_H__
+#define __JPEG_V4_0_H__
+
+enum amdgpu_jpeg_v4_0_sub_block {
+ AMDGPU_JPEG_V4_0_JPEG0 = 0,
+ AMDGPU_JPEG_V4_0_JPEG1,
+
+ AMDGPU_JPEG_V4_0_MAX_SUB_BLOCK,
+};
+
+extern const struct amdgpu_ip_block_version jpeg_v4_0_ip_block;
+#endif /* __JPEG_V4_0_H__ */
diff --git a/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.c b/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.c
new file mode 100644
index 000000000000..aae7328973d1
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.c
@@ -0,0 +1,1486 @@
+/*
+ * Copyright 2022 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#include "amdgpu.h"
+#include "amdgpu_jpeg.h"
+#include "soc15.h"
+#include "soc15d.h"
+#include "jpeg_v2_0.h"
+#include "jpeg_v4_0_3.h"
+#include "mmsch_v4_0_3.h"
+
+#include "vcn/vcn_4_0_3_offset.h"
+#include "vcn/vcn_4_0_3_sh_mask.h"
+#include "ivsrcid/vcn/irqsrcs_vcn_4_0.h"
+
+#define NORMALIZE_JPEG_REG_OFFSET(offset) \
+ (offset & 0x1FFFF)
+
+enum jpeg_engin_status {
+ UVD_PGFSM_STATUS__UVDJ_PWR_ON = 0,
+ UVD_PGFSM_STATUS__UVDJ_PWR_OFF = 2,
+};
+
+static void jpeg_v4_0_3_set_dec_ring_funcs(struct amdgpu_device *adev);
+static void jpeg_v4_0_3_set_irq_funcs(struct amdgpu_device *adev);
+static int jpeg_v4_0_3_set_powergating_state(struct amdgpu_ip_block *ip_block,
+ enum amd_powergating_state state);
+static void jpeg_v4_0_3_set_ras_funcs(struct amdgpu_device *adev);
+static void jpeg_v4_0_3_dec_ring_set_wptr(struct amdgpu_ring *ring);
+
+static int amdgpu_ih_srcid_jpeg[] = {
+ VCN_4_0__SRCID__JPEG_DECODE,
+ VCN_4_0__SRCID__JPEG1_DECODE,
+ VCN_4_0__SRCID__JPEG2_DECODE,
+ VCN_4_0__SRCID__JPEG3_DECODE,
+ VCN_4_0__SRCID__JPEG4_DECODE,
+ VCN_4_0__SRCID__JPEG5_DECODE,
+ VCN_4_0__SRCID__JPEG6_DECODE,
+ VCN_4_0__SRCID__JPEG7_DECODE
+};
+
+static const struct amdgpu_hwip_reg_entry jpeg_reg_list_4_0_3[] = {
+ SOC15_REG_ENTRY_STR(JPEG, 0, regUVD_JPEG_POWER_STATUS),
+ SOC15_REG_ENTRY_STR(JPEG, 0, regUVD_JPEG_INT_STAT),
+ SOC15_REG_ENTRY_STR(JPEG, 0, regJPEG_SYS_INT_STATUS),
+ SOC15_REG_ENTRY_STR(JPEG, 0, regUVD_JRBC0_UVD_JRBC_RB_RPTR),
+ SOC15_REG_ENTRY_STR(JPEG, 0, regUVD_JRBC0_UVD_JRBC_RB_WPTR),
+ SOC15_REG_ENTRY_STR(JPEG, 0, regUVD_JRBC0_UVD_JRBC_STATUS),
+ SOC15_REG_ENTRY_STR(JPEG, 0, regJPEG_DEC_ADDR_MODE),
+ SOC15_REG_ENTRY_STR(JPEG, 0, regJPEG_DEC_GFX10_ADDR_CONFIG),
+ SOC15_REG_ENTRY_STR(JPEG, 0, regJPEG_DEC_Y_GFX10_TILING_SURFACE),
+ SOC15_REG_ENTRY_STR(JPEG, 0, regJPEG_DEC_UV_GFX10_TILING_SURFACE),
+ SOC15_REG_ENTRY_STR(JPEG, 0, regUVD_JPEG_PITCH),
+ SOC15_REG_ENTRY_STR(JPEG, 0, regUVD_JPEG_UV_PITCH),
+ SOC15_REG_ENTRY_STR(JPEG, 0, regUVD_JRBC1_UVD_JRBC_RB_RPTR),
+ SOC15_REG_ENTRY_STR(JPEG, 0, regUVD_JRBC1_UVD_JRBC_RB_WPTR),
+ SOC15_REG_ENTRY_STR(JPEG, 0, regUVD_JRBC1_UVD_JRBC_STATUS),
+ SOC15_REG_ENTRY_STR(JPEG, 0, regUVD_JRBC2_UVD_JRBC_RB_RPTR),
+ SOC15_REG_ENTRY_STR(JPEG, 0, regUVD_JRBC2_UVD_JRBC_RB_WPTR),
+ SOC15_REG_ENTRY_STR(JPEG, 0, regUVD_JRBC2_UVD_JRBC_STATUS),
+ SOC15_REG_ENTRY_STR(JPEG, 0, regUVD_JRBC3_UVD_JRBC_RB_RPTR),
+ SOC15_REG_ENTRY_STR(JPEG, 0, regUVD_JRBC3_UVD_JRBC_RB_WPTR),
+ SOC15_REG_ENTRY_STR(JPEG, 0, regUVD_JRBC3_UVD_JRBC_STATUS),
+ SOC15_REG_ENTRY_STR(JPEG, 0, regUVD_JRBC4_UVD_JRBC_RB_RPTR),
+ SOC15_REG_ENTRY_STR(JPEG, 0, regUVD_JRBC4_UVD_JRBC_RB_WPTR),
+ SOC15_REG_ENTRY_STR(JPEG, 0, regUVD_JRBC4_UVD_JRBC_STATUS),
+ SOC15_REG_ENTRY_STR(JPEG, 0, regUVD_JRBC5_UVD_JRBC_RB_RPTR),
+ SOC15_REG_ENTRY_STR(JPEG, 0, regUVD_JRBC5_UVD_JRBC_RB_WPTR),
+ SOC15_REG_ENTRY_STR(JPEG, 0, regUVD_JRBC5_UVD_JRBC_STATUS),
+ SOC15_REG_ENTRY_STR(JPEG, 0, regUVD_JRBC6_UVD_JRBC_RB_RPTR),
+ SOC15_REG_ENTRY_STR(JPEG, 0, regUVD_JRBC6_UVD_JRBC_RB_WPTR),
+ SOC15_REG_ENTRY_STR(JPEG, 0, regUVD_JRBC6_UVD_JRBC_STATUS),
+ SOC15_REG_ENTRY_STR(JPEG, 0, regUVD_JRBC7_UVD_JRBC_RB_RPTR),
+ SOC15_REG_ENTRY_STR(JPEG, 0, regUVD_JRBC7_UVD_JRBC_RB_WPTR),
+ SOC15_REG_ENTRY_STR(JPEG, 0, regUVD_JRBC7_UVD_JRBC_STATUS),
+};
+
+static inline bool jpeg_v4_0_3_normalizn_reqd(struct amdgpu_device *adev)
+{
+ return (adev->jpeg.caps & AMDGPU_JPEG_CAPS(RRMT_ENABLED)) == 0;
+}
+
+static inline int jpeg_v4_0_3_core_reg_offset(u32 pipe)
+{
+ if (pipe)
+ return ((0x40 * pipe) - 0xc80);
+ else
+ return 0;
+}
+
+/**
+ * jpeg_v4_0_3_early_init - set function pointers
+ *
+ * @ip_block: Pointer to the amdgpu_ip_block for this hw instance.
+ *
+ * Set ring and irq function pointers
+ */
+static int jpeg_v4_0_3_early_init(struct amdgpu_ip_block *ip_block)
+{
+ struct amdgpu_device *adev = ip_block->adev;
+
+ adev->jpeg.num_jpeg_rings = AMDGPU_MAX_JPEG_RINGS_4_0_3;
+
+ jpeg_v4_0_3_set_dec_ring_funcs(adev);
+ jpeg_v4_0_3_set_irq_funcs(adev);
+ jpeg_v4_0_3_set_ras_funcs(adev);
+
+ return 0;
+}
+
+/**
+ * jpeg_v4_0_3_sw_init - sw init for JPEG block
+ *
+ * @ip_block: Pointer to the amdgpu_ip_block for this hw instance.
+ *
+ * Load firmware and sw initialization
+ */
+static int jpeg_v4_0_3_sw_init(struct amdgpu_ip_block *ip_block)
+{
+ struct amdgpu_device *adev = ip_block->adev;
+ struct amdgpu_ring *ring;
+ int i, j, r, jpeg_inst;
+
+ for (j = 0; j < adev->jpeg.num_jpeg_rings; ++j) {
+ /* JPEG TRAP */
+ r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_VCN,
+ amdgpu_ih_srcid_jpeg[j], &adev->jpeg.inst->irq);
+ if (r)
+ return r;
+ }
+
+ /* JPEG DJPEG POISON EVENT */
+ r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_VCN,
+ VCN_4_0__SRCID_DJPEG0_POISON, &adev->jpeg.inst->ras_poison_irq);
+ if (r)
+ return r;
+
+ /* JPEG EJPEG POISON EVENT */
+ r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_VCN,
+ VCN_4_0__SRCID_EJPEG0_POISON, &adev->jpeg.inst->ras_poison_irq);
+ if (r)
+ return r;
+
+ r = amdgpu_jpeg_sw_init(adev);
+ if (r)
+ return r;
+
+ r = amdgpu_jpeg_resume(adev);
+ if (r)
+ return r;
+
+ for (i = 0; i < adev->jpeg.num_jpeg_inst; ++i) {
+ jpeg_inst = GET_INST(JPEG, i);
+
+ for (j = 0; j < adev->jpeg.num_jpeg_rings; ++j) {
+ ring = &adev->jpeg.inst[i].ring_dec[j];
+ ring->use_doorbell = true;
+ ring->vm_hub = AMDGPU_MMHUB0(adev->jpeg.inst[i].aid_id);
+ if (!amdgpu_sriov_vf(adev)) {
+ ring->doorbell_index =
+ (adev->doorbell_index.vcn.vcn_ring0_1 << 1) +
+ 1 + j + 9 * jpeg_inst;
+ } else {
+ if (j < 4)
+ ring->doorbell_index =
+ (adev->doorbell_index.vcn.vcn_ring0_1 << 1) +
+ 4 + j + 32 * jpeg_inst;
+ else
+ ring->doorbell_index =
+ (adev->doorbell_index.vcn.vcn_ring0_1 << 1) +
+ 8 + j + 32 * jpeg_inst;
+ }
+ sprintf(ring->name, "jpeg_dec_%d.%d", adev->jpeg.inst[i].aid_id, j);
+ r = amdgpu_ring_init(adev, ring, 512, &adev->jpeg.inst->irq, 0,
+ AMDGPU_RING_PRIO_DEFAULT, NULL);
+ if (r)
+ return r;
+
+ adev->jpeg.internal.jpeg_pitch[j] =
+ regUVD_JRBC0_UVD_JRBC_SCRATCH0_INTERNAL_OFFSET;
+ adev->jpeg.inst[i].external.jpeg_pitch[j] =
+ SOC15_REG_OFFSET1(JPEG, jpeg_inst, regUVD_JRBC0_UVD_JRBC_SCRATCH0,
+ jpeg_v4_0_3_core_reg_offset(j));
+ }
+ }
+
+ if (amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__JPEG)) {
+ r = amdgpu_jpeg_ras_sw_init(adev);
+ if (r) {
+ dev_err(adev->dev, "Failed to initialize jpeg ras block!\n");
+ return r;
+ }
+ }
+
+ r = amdgpu_jpeg_reg_dump_init(adev, jpeg_reg_list_4_0_3, ARRAY_SIZE(jpeg_reg_list_4_0_3));
+ if (r)
+ return r;
+
+ adev->jpeg.supported_reset =
+ amdgpu_get_soft_full_reset_mask(adev->jpeg.inst[0].ring_dec);
+ if (!amdgpu_sriov_vf(adev))
+ adev->jpeg.supported_reset |= AMDGPU_RESET_TYPE_PER_QUEUE;
+ r = amdgpu_jpeg_sysfs_reset_mask_init(adev);
+
+ return 0;
+}
+
+/**
+ * jpeg_v4_0_3_sw_fini - sw fini for JPEG block
+ *
+ * @ip_block: Pointer to the amdgpu_ip_block for this hw instance.
+ *
+ * JPEG suspend and free up sw allocation
+ */
+static int jpeg_v4_0_3_sw_fini(struct amdgpu_ip_block *ip_block)
+{
+ struct amdgpu_device *adev = ip_block->adev;
+ int r;
+
+ r = amdgpu_jpeg_suspend(adev);
+ if (r)
+ return r;
+
+ amdgpu_jpeg_sysfs_reset_mask_fini(adev);
+
+ r = amdgpu_jpeg_sw_fini(adev);
+
+ return r;
+}
+
+static int jpeg_v4_0_3_start_sriov(struct amdgpu_device *adev)
+{
+ struct amdgpu_ring *ring;
+ uint64_t ctx_addr;
+ uint32_t param, resp, expected;
+ uint32_t tmp, timeout;
+
+ struct amdgpu_mm_table *table = &adev->virt.mm_table;
+ uint32_t *table_loc;
+ uint32_t table_size;
+ uint32_t size, size_dw, item_offset;
+ uint32_t init_status;
+ int i, j, jpeg_inst;
+
+ struct mmsch_v4_0_cmd_direct_write
+ direct_wt = { {0} };
+ struct mmsch_v4_0_cmd_end end = { {0} };
+ struct mmsch_v4_0_3_init_header header;
+
+ direct_wt.cmd_header.command_type =
+ MMSCH_COMMAND__DIRECT_REG_WRITE;
+ end.cmd_header.command_type =
+ MMSCH_COMMAND__END;
+
+ for (i = 0; i < adev->jpeg.num_jpeg_inst; i++) {
+ jpeg_inst = GET_INST(JPEG, i);
+
+ memset(&header, 0, sizeof(struct mmsch_v4_0_3_init_header));
+ header.version = MMSCH_VERSION;
+ header.total_size = sizeof(struct mmsch_v4_0_3_init_header) >> 2;
+
+ table_loc = (uint32_t *)table->cpu_addr;
+ table_loc += header.total_size;
+
+ item_offset = header.total_size;
+
+ for (j = 0; j < adev->jpeg.num_jpeg_rings; j++) {
+ ring = &adev->jpeg.inst[i].ring_dec[j];
+ table_size = 0;
+
+ tmp = SOC15_REG_OFFSET(JPEG, 0, regUVD_JMI0_UVD_LMI_JRBC_RB_64BIT_BAR_LOW);
+ MMSCH_V4_0_INSERT_DIRECT_WT(tmp, lower_32_bits(ring->gpu_addr));
+ tmp = SOC15_REG_OFFSET(JPEG, 0, regUVD_JMI0_UVD_LMI_JRBC_RB_64BIT_BAR_HIGH);
+ MMSCH_V4_0_INSERT_DIRECT_WT(tmp, upper_32_bits(ring->gpu_addr));
+ tmp = SOC15_REG_OFFSET(JPEG, 0, regUVD_JRBC0_UVD_JRBC_RB_SIZE);
+ MMSCH_V4_0_INSERT_DIRECT_WT(tmp, ring->ring_size / 4);
+
+ if (j <= 3) {
+ header.mjpegdec0[j].table_offset = item_offset;
+ header.mjpegdec0[j].init_status = 0;
+ header.mjpegdec0[j].table_size = table_size;
+ } else {
+ header.mjpegdec1[j - 4].table_offset = item_offset;
+ header.mjpegdec1[j - 4].init_status = 0;
+ header.mjpegdec1[j - 4].table_size = table_size;
+ }
+ header.total_size += table_size;
+ item_offset += table_size;
+ }
+
+ MMSCH_V4_0_INSERT_END();
+
+ /* send init table to MMSCH */
+ size = sizeof(struct mmsch_v4_0_3_init_header);
+ table_loc = (uint32_t *)table->cpu_addr;
+ memcpy((void *)table_loc, &header, size);
+
+ ctx_addr = table->gpu_addr;
+ WREG32_SOC15(VCN, jpeg_inst, regMMSCH_VF_CTX_ADDR_LO, lower_32_bits(ctx_addr));
+ WREG32_SOC15(VCN, jpeg_inst, regMMSCH_VF_CTX_ADDR_HI, upper_32_bits(ctx_addr));
+
+ tmp = RREG32_SOC15(VCN, jpeg_inst, regMMSCH_VF_VMID);
+ tmp &= ~MMSCH_VF_VMID__VF_CTX_VMID_MASK;
+ tmp |= (0 << MMSCH_VF_VMID__VF_CTX_VMID__SHIFT);
+ WREG32_SOC15(VCN, jpeg_inst, regMMSCH_VF_VMID, tmp);
+
+ size = header.total_size;
+ WREG32_SOC15(VCN, jpeg_inst, regMMSCH_VF_CTX_SIZE, size);
+
+ WREG32_SOC15(VCN, jpeg_inst, regMMSCH_VF_MAILBOX_RESP, 0);
+
+ param = 0x00000001;
+ WREG32_SOC15(VCN, jpeg_inst, regMMSCH_VF_MAILBOX_HOST, param);
+ tmp = 0;
+ timeout = 1000;
+ resp = 0;
+ expected = MMSCH_VF_MAILBOX_RESP__OK;
+ init_status =
+ ((struct mmsch_v4_0_3_init_header *)(table_loc))->mjpegdec0[i].init_status;
+ while (resp != expected) {
+ resp = RREG32_SOC15(VCN, jpeg_inst, regMMSCH_VF_MAILBOX_RESP);
+
+ if (resp != 0)
+ break;
+ udelay(10);
+ tmp = tmp + 10;
+ if (tmp >= timeout) {
+ DRM_ERROR("failed to init MMSCH. TIME-OUT after %d usec"\
+ " waiting for regMMSCH_VF_MAILBOX_RESP "\
+ "(expected=0x%08x, readback=0x%08x)\n",
+ tmp, expected, resp);
+ return -EBUSY;
+ }
+ }
+ if (resp != expected && resp != MMSCH_VF_MAILBOX_RESP__INCOMPLETE &&
+ init_status != MMSCH_VF_ENGINE_STATUS__PASS)
+ DRM_ERROR("MMSCH init status is incorrect! readback=0x%08x, header init status for jpeg: %x\n",
+ resp, init_status);
+
+ }
+ return 0;
+}
+
+/**
+ * jpeg_v4_0_3_hw_init - start and test JPEG block
+ *
+ * @ip_block: Pointer to the amdgpu_ip_block for this hw instance.
+ *
+ */
+static int jpeg_v4_0_3_hw_init(struct amdgpu_ip_block *ip_block)
+{
+ struct amdgpu_device *adev = ip_block->adev;
+ struct amdgpu_ring *ring;
+ int i, j, r, jpeg_inst;
+
+ if (amdgpu_sriov_vf(adev)) {
+ r = jpeg_v4_0_3_start_sriov(adev);
+ if (r)
+ return r;
+
+ for (i = 0; i < adev->jpeg.num_jpeg_inst; ++i) {
+ for (j = 0; j < adev->jpeg.num_jpeg_rings; ++j) {
+ ring = &adev->jpeg.inst[i].ring_dec[j];
+ ring->wptr = 0;
+ ring->wptr_old = 0;
+ jpeg_v4_0_3_dec_ring_set_wptr(ring);
+ ring->sched.ready = true;
+ }
+ }
+ } else {
+ /* This flag is not set for VF, assumed to be disabled always */
+ if (RREG32_SOC15(VCN, GET_INST(VCN, 0), regVCN_RRMT_CNTL) &
+ 0x100)
+ adev->jpeg.caps |= AMDGPU_JPEG_CAPS(RRMT_ENABLED);
+
+ for (i = 0; i < adev->jpeg.num_jpeg_inst; ++i) {
+ jpeg_inst = GET_INST(JPEG, i);
+
+ ring = adev->jpeg.inst[i].ring_dec;
+
+ if (ring->use_doorbell)
+ adev->nbio.funcs->vcn_doorbell_range(
+ adev, ring->use_doorbell,
+ (adev->doorbell_index.vcn.vcn_ring0_1 << 1) +
+ 9 * jpeg_inst,
+ adev->jpeg.inst[i].aid_id);
+
+ for (j = 0; j < adev->jpeg.num_jpeg_rings; ++j) {
+ ring = &adev->jpeg.inst[i].ring_dec[j];
+ if (ring->use_doorbell)
+ WREG32_SOC15_OFFSET(
+ VCN, GET_INST(VCN, i),
+ regVCN_JPEG_DB_CTRL,
+ (ring->pipe ? (ring->pipe - 0x15) : 0),
+ ring->doorbell_index
+ << VCN_JPEG_DB_CTRL__OFFSET__SHIFT |
+ VCN_JPEG_DB_CTRL__EN_MASK);
+ r = amdgpu_ring_test_helper(ring);
+ if (r)
+ return r;
+ }
+ }
+ }
+
+ return 0;
+}
+
+/**
+ * jpeg_v4_0_3_hw_fini - stop the hardware block
+ *
+ * @ip_block: Pointer to the amdgpu_ip_block for this hw instance.
+ *
+ * Stop the JPEG block, mark ring as not ready any more
+ */
+static int jpeg_v4_0_3_hw_fini(struct amdgpu_ip_block *ip_block)
+{
+ struct amdgpu_device *adev = ip_block->adev;
+ int ret = 0;
+
+ cancel_delayed_work_sync(&adev->jpeg.idle_work);
+
+ if (!amdgpu_sriov_vf(adev)) {
+ if (adev->jpeg.cur_state != AMD_PG_STATE_GATE)
+ ret = jpeg_v4_0_3_set_powergating_state(ip_block, AMD_PG_STATE_GATE);
+ }
+
+ if (amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__JPEG) && !amdgpu_sriov_vf(adev))
+ amdgpu_irq_put(adev, &adev->jpeg.inst->ras_poison_irq, 0);
+
+ return ret;
+}
+
+/**
+ * jpeg_v4_0_3_suspend - suspend JPEG block
+ *
+ * @ip_block: Pointer to the amdgpu_ip_block for this hw instance.
+ *
+ * HW fini and suspend JPEG block
+ */
+static int jpeg_v4_0_3_suspend(struct amdgpu_ip_block *ip_block)
+{
+ int r;
+
+ r = jpeg_v4_0_3_hw_fini(ip_block);
+ if (r)
+ return r;
+
+ r = amdgpu_jpeg_suspend(ip_block->adev);
+
+ return r;
+}
+
+/**
+ * jpeg_v4_0_3_resume - resume JPEG block
+ *
+ * @ip_block: Pointer to the amdgpu_ip_block for this hw instance.
+ *
+ * Resume firmware and hw init JPEG block
+ */
+static int jpeg_v4_0_3_resume(struct amdgpu_ip_block *ip_block)
+{
+ int r;
+
+ r = amdgpu_jpeg_resume(ip_block->adev);
+ if (r)
+ return r;
+
+ r = jpeg_v4_0_3_hw_init(ip_block);
+
+ return r;
+}
+
+static void jpeg_v4_0_3_disable_clock_gating(struct amdgpu_device *adev, int inst_idx)
+{
+ int i, jpeg_inst;
+ uint32_t data;
+
+ jpeg_inst = GET_INST(JPEG, inst_idx);
+ data = RREG32_SOC15(JPEG, jpeg_inst, regJPEG_CGC_CTRL);
+ if (adev->cg_flags & AMD_CG_SUPPORT_JPEG_MGCG) {
+ data |= 1 << JPEG_CGC_CTRL__DYN_CLOCK_MODE__SHIFT;
+ data &= (~(JPEG_CGC_CTRL__JPEG0_DEC_MODE_MASK << 1));
+ } else {
+ data &= ~JPEG_CGC_CTRL__DYN_CLOCK_MODE__SHIFT;
+ }
+
+ data |= 1 << JPEG_CGC_CTRL__CLK_GATE_DLY_TIMER__SHIFT;
+ data |= 4 << JPEG_CGC_CTRL__CLK_OFF_DELAY__SHIFT;
+ WREG32_SOC15(JPEG, jpeg_inst, regJPEG_CGC_CTRL, data);
+
+ data = RREG32_SOC15(JPEG, jpeg_inst, regJPEG_CGC_GATE);
+ data &= ~(JPEG_CGC_GATE__JMCIF_MASK | JPEG_CGC_GATE__JRBBM_MASK);
+ for (i = 0; i < adev->jpeg.num_jpeg_rings; ++i)
+ data &= ~(JPEG_CGC_GATE__JPEG0_DEC_MASK << i);
+ WREG32_SOC15(JPEG, jpeg_inst, regJPEG_CGC_GATE, data);
+}
+
+static void jpeg_v4_0_3_enable_clock_gating(struct amdgpu_device *adev, int inst_idx)
+{
+ int i, jpeg_inst;
+ uint32_t data;
+
+ jpeg_inst = GET_INST(JPEG, inst_idx);
+ data = RREG32_SOC15(JPEG, jpeg_inst, regJPEG_CGC_CTRL);
+ if (adev->cg_flags & AMD_CG_SUPPORT_JPEG_MGCG) {
+ data |= 1 << JPEG_CGC_CTRL__DYN_CLOCK_MODE__SHIFT;
+ data |= (JPEG_CGC_CTRL__JPEG0_DEC_MODE_MASK << 1);
+ } else {
+ data &= ~JPEG_CGC_CTRL__DYN_CLOCK_MODE__SHIFT;
+ }
+
+ data |= 1 << JPEG_CGC_CTRL__CLK_GATE_DLY_TIMER__SHIFT;
+ data |= 4 << JPEG_CGC_CTRL__CLK_OFF_DELAY__SHIFT;
+ WREG32_SOC15(JPEG, jpeg_inst, regJPEG_CGC_CTRL, data);
+
+ data = RREG32_SOC15(JPEG, jpeg_inst, regJPEG_CGC_GATE);
+ data |= (JPEG_CGC_GATE__JMCIF_MASK | JPEG_CGC_GATE__JRBBM_MASK);
+ for (i = 0; i < adev->jpeg.num_jpeg_rings; ++i)
+ data |= (JPEG_CGC_GATE__JPEG0_DEC_MASK << i);
+ WREG32_SOC15(JPEG, jpeg_inst, regJPEG_CGC_GATE, data);
+}
+
+static void jpeg_v4_0_3_start_inst(struct amdgpu_device *adev, int inst)
+{
+ int jpeg_inst = GET_INST(JPEG, inst);
+
+ WREG32_SOC15(JPEG, jpeg_inst, regUVD_PGFSM_CONFIG,
+ 1 << UVD_PGFSM_CONFIG__UVDJ_PWR_CONFIG__SHIFT);
+ SOC15_WAIT_ON_RREG(JPEG, jpeg_inst, regUVD_PGFSM_STATUS,
+ UVD_PGFSM_STATUS__UVDJ_PWR_ON <<
+ UVD_PGFSM_STATUS__UVDJ_PWR_STATUS__SHIFT,
+ UVD_PGFSM_STATUS__UVDJ_PWR_STATUS_MASK);
+
+ /* disable anti hang mechanism */
+ WREG32_P(SOC15_REG_OFFSET(JPEG, jpeg_inst, regUVD_JPEG_POWER_STATUS),
+ 0, ~UVD_JPEG_POWER_STATUS__JPEG_POWER_STATUS_MASK);
+
+ /* JPEG disable CGC */
+ jpeg_v4_0_3_disable_clock_gating(adev, inst);
+
+ /* MJPEG global tiling registers */
+ WREG32_SOC15(JPEG, jpeg_inst, regJPEG_DEC_GFX8_ADDR_CONFIG,
+ adev->gfx.config.gb_addr_config);
+ WREG32_SOC15(JPEG, jpeg_inst, regJPEG_DEC_GFX10_ADDR_CONFIG,
+ adev->gfx.config.gb_addr_config);
+
+ /* enable JMI channel */
+ WREG32_P(SOC15_REG_OFFSET(JPEG, jpeg_inst, regUVD_JMI_CNTL), 0,
+ ~UVD_JMI_CNTL__SOFT_RESET_MASK);
+}
+
+static void jpeg_v4_0_3_start_jrbc(struct amdgpu_ring *ring)
+{
+ struct amdgpu_device *adev = ring->adev;
+ int jpeg_inst = GET_INST(JPEG, ring->me);
+ int reg_offset = jpeg_v4_0_3_core_reg_offset(ring->pipe);
+
+ /* enable System Interrupt for JRBC */
+ WREG32_P(SOC15_REG_OFFSET(JPEG, jpeg_inst, regJPEG_SYS_INT_EN),
+ JPEG_SYS_INT_EN__DJRBC0_MASK << ring->pipe,
+ ~(JPEG_SYS_INT_EN__DJRBC0_MASK << ring->pipe));
+
+ WREG32_SOC15_OFFSET(JPEG, jpeg_inst,
+ regUVD_JMI0_UVD_LMI_JRBC_RB_VMID,
+ reg_offset, 0);
+ WREG32_SOC15_OFFSET(JPEG, jpeg_inst,
+ regUVD_JRBC0_UVD_JRBC_RB_CNTL,
+ reg_offset,
+ (0x00000001L | 0x00000002L));
+ WREG32_SOC15_OFFSET(JPEG, jpeg_inst,
+ regUVD_JMI0_UVD_LMI_JRBC_RB_64BIT_BAR_LOW,
+ reg_offset, lower_32_bits(ring->gpu_addr));
+ WREG32_SOC15_OFFSET(JPEG, jpeg_inst,
+ regUVD_JMI0_UVD_LMI_JRBC_RB_64BIT_BAR_HIGH,
+ reg_offset, upper_32_bits(ring->gpu_addr));
+ WREG32_SOC15_OFFSET(JPEG, jpeg_inst,
+ regUVD_JRBC0_UVD_JRBC_RB_RPTR,
+ reg_offset, 0);
+ WREG32_SOC15_OFFSET(JPEG, jpeg_inst,
+ regUVD_JRBC0_UVD_JRBC_RB_WPTR,
+ reg_offset, 0);
+ WREG32_SOC15_OFFSET(JPEG, jpeg_inst,
+ regUVD_JRBC0_UVD_JRBC_RB_CNTL,
+ reg_offset, 0x00000002L);
+ WREG32_SOC15_OFFSET(JPEG, jpeg_inst,
+ regUVD_JRBC0_UVD_JRBC_RB_SIZE,
+ reg_offset, ring->ring_size / 4);
+ ring->wptr = RREG32_SOC15_OFFSET(JPEG, jpeg_inst, regUVD_JRBC0_UVD_JRBC_RB_WPTR,
+ reg_offset);
+}
+
+/**
+ * jpeg_v4_0_3_start - start JPEG block
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * Setup and start the JPEG block
+ */
+static int jpeg_v4_0_3_start(struct amdgpu_device *adev)
+{
+ struct amdgpu_ring *ring;
+ int i, j;
+
+ for (i = 0; i < adev->jpeg.num_jpeg_inst; ++i) {
+ jpeg_v4_0_3_start_inst(adev, i);
+ for (j = 0; j < adev->jpeg.num_jpeg_rings; ++j) {
+ ring = &adev->jpeg.inst[i].ring_dec[j];
+ jpeg_v4_0_3_start_jrbc(ring);
+ }
+ }
+
+ return 0;
+}
+
+static void jpeg_v4_0_3_stop_inst(struct amdgpu_device *adev, int inst)
+{
+ int jpeg_inst = GET_INST(JPEG, inst);
+ /* reset JMI */
+ WREG32_P(SOC15_REG_OFFSET(JPEG, jpeg_inst, regUVD_JMI_CNTL),
+ UVD_JMI_CNTL__SOFT_RESET_MASK,
+ ~UVD_JMI_CNTL__SOFT_RESET_MASK);
+
+ jpeg_v4_0_3_enable_clock_gating(adev, inst);
+
+ /* enable anti hang mechanism */
+ WREG32_P(SOC15_REG_OFFSET(JPEG, jpeg_inst, regUVD_JPEG_POWER_STATUS),
+ UVD_JPEG_POWER_STATUS__JPEG_POWER_STATUS_MASK,
+ ~UVD_JPEG_POWER_STATUS__JPEG_POWER_STATUS_MASK);
+
+}
+
+/**
+ * jpeg_v4_0_3_stop - stop JPEG block
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * stop the JPEG block
+ */
+static int jpeg_v4_0_3_stop(struct amdgpu_device *adev)
+{
+ int i;
+
+ for (i = 0; i < adev->jpeg.num_jpeg_inst; ++i)
+ jpeg_v4_0_3_stop_inst(adev, i);
+
+ return 0;
+}
+
+/**
+ * jpeg_v4_0_3_dec_ring_get_rptr - get read pointer
+ *
+ * @ring: amdgpu_ring pointer
+ *
+ * Returns the current hardware read pointer
+ */
+static uint64_t jpeg_v4_0_3_dec_ring_get_rptr(struct amdgpu_ring *ring)
+{
+ struct amdgpu_device *adev = ring->adev;
+
+ return RREG32_SOC15_OFFSET(JPEG, GET_INST(JPEG, ring->me), regUVD_JRBC0_UVD_JRBC_RB_RPTR,
+ jpeg_v4_0_3_core_reg_offset(ring->pipe));
+}
+
+/**
+ * jpeg_v4_0_3_dec_ring_get_wptr - get write pointer
+ *
+ * @ring: amdgpu_ring pointer
+ *
+ * Returns the current hardware write pointer
+ */
+static uint64_t jpeg_v4_0_3_dec_ring_get_wptr(struct amdgpu_ring *ring)
+{
+ struct amdgpu_device *adev = ring->adev;
+
+ if (ring->use_doorbell)
+ return adev->wb.wb[ring->wptr_offs];
+
+ return RREG32_SOC15_OFFSET(JPEG, GET_INST(JPEG, ring->me), regUVD_JRBC0_UVD_JRBC_RB_WPTR,
+ jpeg_v4_0_3_core_reg_offset(ring->pipe));
+}
+
+void jpeg_v4_0_3_ring_emit_hdp_flush(struct amdgpu_ring *ring)
+{
+ /* JPEG engine access for HDP flush doesn't work when RRMT is enabled.
+ * This is a workaround to avoid any HDP flush through JPEG ring.
+ */
+}
+
+/**
+ * jpeg_v4_0_3_dec_ring_set_wptr - set write pointer
+ *
+ * @ring: amdgpu_ring pointer
+ *
+ * Commits the write pointer to the hardware
+ */
+static void jpeg_v4_0_3_dec_ring_set_wptr(struct amdgpu_ring *ring)
+{
+ struct amdgpu_device *adev = ring->adev;
+
+ if (ring->use_doorbell) {
+ adev->wb.wb[ring->wptr_offs] = lower_32_bits(ring->wptr);
+ WDOORBELL32(ring->doorbell_index, lower_32_bits(ring->wptr));
+ } else {
+ WREG32_SOC15_OFFSET(JPEG, GET_INST(JPEG, ring->me), regUVD_JRBC0_UVD_JRBC_RB_WPTR,
+ jpeg_v4_0_3_core_reg_offset(ring->pipe),
+ lower_32_bits(ring->wptr));
+ }
+}
+
+/**
+ * jpeg_v4_0_3_dec_ring_insert_start - insert a start command
+ *
+ * @ring: amdgpu_ring pointer
+ *
+ * Write a start command to the ring.
+ */
+void jpeg_v4_0_3_dec_ring_insert_start(struct amdgpu_ring *ring)
+{
+ if (!amdgpu_sriov_vf(ring->adev)) {
+ amdgpu_ring_write(ring, PACKETJ(regUVD_JRBC_EXTERNAL_REG_INTERNAL_OFFSET,
+ 0, 0, PACKETJ_TYPE0));
+ amdgpu_ring_write(ring, 0x62a04); /* PCTL0_MMHUB_DEEPSLEEP_IB */
+
+ amdgpu_ring_write(ring,
+ PACKETJ(JRBC_DEC_EXTERNAL_REG_WRITE_ADDR, 0,
+ 0, PACKETJ_TYPE0));
+ amdgpu_ring_write(ring, 0x80004000);
+ }
+}
+
+/**
+ * jpeg_v4_0_3_dec_ring_insert_end - insert a end command
+ *
+ * @ring: amdgpu_ring pointer
+ *
+ * Write a end command to the ring.
+ */
+void jpeg_v4_0_3_dec_ring_insert_end(struct amdgpu_ring *ring)
+{
+ if (!amdgpu_sriov_vf(ring->adev)) {
+ amdgpu_ring_write(ring, PACKETJ(regUVD_JRBC_EXTERNAL_REG_INTERNAL_OFFSET,
+ 0, 0, PACKETJ_TYPE0));
+ amdgpu_ring_write(ring, 0x62a04);
+
+ amdgpu_ring_write(ring,
+ PACKETJ(JRBC_DEC_EXTERNAL_REG_WRITE_ADDR, 0,
+ 0, PACKETJ_TYPE0));
+ amdgpu_ring_write(ring, 0x00004000);
+ }
+}
+
+/**
+ * jpeg_v4_0_3_dec_ring_emit_fence - emit an fence & trap command
+ *
+ * @ring: amdgpu_ring pointer
+ * @addr: address
+ * @seq: sequence number
+ * @flags: fence related flags
+ *
+ * Write a fence and a trap command to the ring.
+ */
+void jpeg_v4_0_3_dec_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, u64 seq,
+ unsigned int flags)
+{
+ WARN_ON(flags & AMDGPU_FENCE_FLAG_64BIT);
+
+ amdgpu_ring_write(ring, PACKETJ(regUVD_JPEG_GPCOM_DATA0_INTERNAL_OFFSET,
+ 0, 0, PACKETJ_TYPE0));
+ amdgpu_ring_write(ring, seq);
+
+ amdgpu_ring_write(ring, PACKETJ(regUVD_JPEG_GPCOM_DATA1_INTERNAL_OFFSET,
+ 0, 0, PACKETJ_TYPE0));
+ amdgpu_ring_write(ring, seq);
+
+ amdgpu_ring_write(ring, PACKETJ(regUVD_LMI_JRBC_RB_MEM_WR_64BIT_BAR_LOW_INTERNAL_OFFSET,
+ 0, 0, PACKETJ_TYPE0));
+ amdgpu_ring_write(ring, lower_32_bits(addr));
+
+ amdgpu_ring_write(ring, PACKETJ(regUVD_LMI_JRBC_RB_MEM_WR_64BIT_BAR_HIGH_INTERNAL_OFFSET,
+ 0, 0, PACKETJ_TYPE0));
+ amdgpu_ring_write(ring, upper_32_bits(addr));
+
+ amdgpu_ring_write(ring, PACKETJ(regUVD_JPEG_GPCOM_CMD_INTERNAL_OFFSET,
+ 0, 0, PACKETJ_TYPE0));
+ amdgpu_ring_write(ring, 0x8);
+
+ amdgpu_ring_write(ring, PACKETJ(regUVD_JPEG_GPCOM_CMD_INTERNAL_OFFSET,
+ 0, PACKETJ_CONDITION_CHECK0, PACKETJ_TYPE4));
+ amdgpu_ring_write(ring, 0);
+
+ amdgpu_ring_write(ring, PACKETJ(0, 0, 0, PACKETJ_TYPE6));
+ amdgpu_ring_write(ring, 0);
+
+ amdgpu_ring_write(ring, PACKETJ(0, 0, 0, PACKETJ_TYPE6));
+ amdgpu_ring_write(ring, 0);
+
+ amdgpu_ring_write(ring, PACKETJ(0, 0, 0, PACKETJ_TYPE7));
+ amdgpu_ring_write(ring, 0);
+}
+
+/**
+ * jpeg_v4_0_3_dec_ring_emit_ib - execute indirect buffer
+ *
+ * @ring: amdgpu_ring pointer
+ * @job: job to retrieve vmid from
+ * @ib: indirect buffer to execute
+ * @flags: unused
+ *
+ * Write ring commands to execute the indirect buffer.
+ */
+void jpeg_v4_0_3_dec_ring_emit_ib(struct amdgpu_ring *ring,
+ struct amdgpu_job *job,
+ struct amdgpu_ib *ib,
+ uint32_t flags)
+{
+ unsigned int vmid = AMDGPU_JOB_GET_VMID(job);
+
+ amdgpu_ring_write(ring, PACKETJ(regUVD_LMI_JRBC_IB_VMID_INTERNAL_OFFSET,
+ 0, 0, PACKETJ_TYPE0));
+
+ if (ring->funcs->parse_cs)
+ amdgpu_ring_write(ring, 0);
+ else
+ amdgpu_ring_write(ring, (vmid | (vmid << 4) | (vmid << 8)));
+
+ amdgpu_ring_write(ring, PACKETJ(regUVD_LMI_JPEG_VMID_INTERNAL_OFFSET,
+ 0, 0, PACKETJ_TYPE0));
+ amdgpu_ring_write(ring, (vmid | (vmid << 4) | (vmid << 8)));
+
+ amdgpu_ring_write(ring, PACKETJ(regUVD_LMI_JRBC_IB_64BIT_BAR_LOW_INTERNAL_OFFSET,
+ 0, 0, PACKETJ_TYPE0));
+ amdgpu_ring_write(ring, lower_32_bits(ib->gpu_addr));
+
+ amdgpu_ring_write(ring, PACKETJ(regUVD_LMI_JRBC_IB_64BIT_BAR_HIGH_INTERNAL_OFFSET,
+ 0, 0, PACKETJ_TYPE0));
+ amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr));
+
+ amdgpu_ring_write(ring, PACKETJ(regUVD_JRBC_IB_SIZE_INTERNAL_OFFSET,
+ 0, 0, PACKETJ_TYPE0));
+ amdgpu_ring_write(ring, ib->length_dw);
+
+ amdgpu_ring_write(ring, PACKETJ(regUVD_LMI_JRBC_RB_MEM_RD_64BIT_BAR_LOW_INTERNAL_OFFSET,
+ 0, 0, PACKETJ_TYPE0));
+ amdgpu_ring_write(ring, lower_32_bits(ring->gpu_addr));
+
+ amdgpu_ring_write(ring, PACKETJ(regUVD_LMI_JRBC_RB_MEM_RD_64BIT_BAR_HIGH_INTERNAL_OFFSET,
+ 0, 0, PACKETJ_TYPE0));
+ amdgpu_ring_write(ring, upper_32_bits(ring->gpu_addr));
+
+ amdgpu_ring_write(ring, PACKETJ(0, 0, PACKETJ_CONDITION_CHECK0, PACKETJ_TYPE2));
+ amdgpu_ring_write(ring, 0);
+
+ amdgpu_ring_write(ring, PACKETJ(regUVD_JRBC_RB_COND_RD_TIMER_INTERNAL_OFFSET,
+ 0, 0, PACKETJ_TYPE0));
+ amdgpu_ring_write(ring, 0x01400200);
+
+ amdgpu_ring_write(ring, PACKETJ(regUVD_JRBC_RB_REF_DATA_INTERNAL_OFFSET,
+ 0, 0, PACKETJ_TYPE0));
+ amdgpu_ring_write(ring, 0x2);
+
+ amdgpu_ring_write(ring, PACKETJ(regUVD_JRBC_STATUS_INTERNAL_OFFSET,
+ 0, PACKETJ_CONDITION_CHECK3, PACKETJ_TYPE3));
+ amdgpu_ring_write(ring, 0x2);
+}
+
+void jpeg_v4_0_3_dec_ring_emit_reg_wait(struct amdgpu_ring *ring, uint32_t reg,
+ uint32_t val, uint32_t mask)
+{
+ uint32_t reg_offset;
+
+ /* Use normalized offsets if required */
+ if (jpeg_v4_0_3_normalizn_reqd(ring->adev))
+ reg = NORMALIZE_JPEG_REG_OFFSET(reg);
+
+ reg_offset = (reg << 2);
+
+ amdgpu_ring_write(ring, PACKETJ(regUVD_JRBC_RB_COND_RD_TIMER_INTERNAL_OFFSET,
+ 0, 0, PACKETJ_TYPE0));
+ amdgpu_ring_write(ring, 0x01400200);
+
+ amdgpu_ring_write(ring, PACKETJ(regUVD_JRBC_RB_REF_DATA_INTERNAL_OFFSET,
+ 0, 0, PACKETJ_TYPE0));
+ amdgpu_ring_write(ring, val);
+
+ amdgpu_ring_write(ring, PACKETJ(regUVD_JRBC_EXTERNAL_REG_INTERNAL_OFFSET,
+ 0, 0, PACKETJ_TYPE0));
+ if (reg_offset >= 0x10000 && reg_offset <= 0x105ff) {
+ amdgpu_ring_write(ring, 0);
+ amdgpu_ring_write(ring,
+ PACKETJ((reg_offset >> 2), 0, 0, PACKETJ_TYPE3));
+ } else {
+ amdgpu_ring_write(ring, reg_offset);
+ amdgpu_ring_write(ring, PACKETJ(JRBC_DEC_EXTERNAL_REG_WRITE_ADDR,
+ 0, 0, PACKETJ_TYPE3));
+ }
+ amdgpu_ring_write(ring, mask);
+}
+
+void jpeg_v4_0_3_dec_ring_emit_vm_flush(struct amdgpu_ring *ring,
+ unsigned int vmid, uint64_t pd_addr)
+{
+ struct amdgpu_vmhub *hub = &ring->adev->vmhub[ring->vm_hub];
+ uint32_t data0, data1, mask;
+
+ pd_addr = amdgpu_gmc_emit_flush_gpu_tlb(ring, vmid, pd_addr);
+
+ /* wait for register write */
+ data0 = hub->ctx0_ptb_addr_lo32 + vmid * hub->ctx_addr_distance;
+ data1 = lower_32_bits(pd_addr);
+ mask = 0xffffffff;
+ jpeg_v4_0_3_dec_ring_emit_reg_wait(ring, data0, data1, mask);
+}
+
+void jpeg_v4_0_3_dec_ring_emit_wreg(struct amdgpu_ring *ring, uint32_t reg, uint32_t val)
+{
+ uint32_t reg_offset;
+
+ /* Use normalized offsets if required */
+ if (jpeg_v4_0_3_normalizn_reqd(ring->adev))
+ reg = NORMALIZE_JPEG_REG_OFFSET(reg);
+
+ reg_offset = (reg << 2);
+
+ amdgpu_ring_write(ring, PACKETJ(regUVD_JRBC_EXTERNAL_REG_INTERNAL_OFFSET,
+ 0, 0, PACKETJ_TYPE0));
+ if (reg_offset >= 0x10000 && reg_offset <= 0x105ff) {
+ amdgpu_ring_write(ring, 0);
+ amdgpu_ring_write(ring,
+ PACKETJ((reg_offset >> 2), 0, 0, PACKETJ_TYPE0));
+ } else {
+ amdgpu_ring_write(ring, reg_offset);
+ amdgpu_ring_write(ring, PACKETJ(JRBC_DEC_EXTERNAL_REG_WRITE_ADDR,
+ 0, 0, PACKETJ_TYPE0));
+ }
+ amdgpu_ring_write(ring, val);
+}
+
+void jpeg_v4_0_3_dec_ring_nop(struct amdgpu_ring *ring, uint32_t count)
+{
+ int i;
+
+ WARN_ON(ring->wptr % 2 || count % 2);
+
+ for (i = 0; i < count / 2; i++) {
+ amdgpu_ring_write(ring, PACKETJ(0, 0, 0, PACKETJ_TYPE6));
+ amdgpu_ring_write(ring, 0);
+ }
+}
+
+static bool jpeg_v4_0_3_is_idle(struct amdgpu_ip_block *ip_block)
+{
+ struct amdgpu_device *adev = ip_block->adev;
+ bool ret = false;
+ int i, j;
+
+ for (i = 0; i < adev->jpeg.num_jpeg_inst; ++i) {
+ for (j = 0; j < adev->jpeg.num_jpeg_rings; ++j) {
+ ret &= ((RREG32_SOC15_OFFSET(JPEG, GET_INST(JPEG, i),
+ regUVD_JRBC0_UVD_JRBC_STATUS, jpeg_v4_0_3_core_reg_offset(j)) &
+ UVD_JRBC0_UVD_JRBC_STATUS__RB_JOB_DONE_MASK) ==
+ UVD_JRBC0_UVD_JRBC_STATUS__RB_JOB_DONE_MASK);
+ }
+ }
+
+ return ret;
+}
+
+static int jpeg_v4_0_3_wait_for_idle(struct amdgpu_ip_block *ip_block)
+{
+ struct amdgpu_device *adev = ip_block->adev;
+ int ret = 0;
+ int i, j;
+
+ for (i = 0; i < adev->jpeg.num_jpeg_inst; ++i) {
+ for (j = 0; j < adev->jpeg.num_jpeg_rings; ++j) {
+ ret &= (SOC15_WAIT_ON_RREG_OFFSET(JPEG, GET_INST(JPEG, i),
+ regUVD_JRBC0_UVD_JRBC_STATUS, jpeg_v4_0_3_core_reg_offset(j),
+ UVD_JRBC0_UVD_JRBC_STATUS__RB_JOB_DONE_MASK,
+ UVD_JRBC0_UVD_JRBC_STATUS__RB_JOB_DONE_MASK));
+ }
+ }
+ return ret;
+}
+
+static int jpeg_v4_0_3_set_clockgating_state(struct amdgpu_ip_block *ip_block,
+ enum amd_clockgating_state state)
+{
+ struct amdgpu_device *adev = ip_block->adev;
+ bool enable = state == AMD_CG_STATE_GATE;
+ int i;
+
+ for (i = 0; i < adev->jpeg.num_jpeg_inst; ++i) {
+ if (enable) {
+ if (!jpeg_v4_0_3_is_idle(ip_block))
+ return -EBUSY;
+ jpeg_v4_0_3_enable_clock_gating(adev, i);
+ } else {
+ jpeg_v4_0_3_disable_clock_gating(adev, i);
+ }
+ }
+ return 0;
+}
+
+static int jpeg_v4_0_3_set_powergating_state(struct amdgpu_ip_block *ip_block,
+ enum amd_powergating_state state)
+{
+ struct amdgpu_device *adev = ip_block->adev;
+ int ret;
+
+ if (amdgpu_sriov_vf(adev)) {
+ adev->jpeg.cur_state = AMD_PG_STATE_UNGATE;
+ return 0;
+ }
+
+ if (state == adev->jpeg.cur_state)
+ return 0;
+
+ if (state == AMD_PG_STATE_GATE)
+ ret = jpeg_v4_0_3_stop(adev);
+ else
+ ret = jpeg_v4_0_3_start(adev);
+
+ if (!ret)
+ adev->jpeg.cur_state = state;
+
+ return ret;
+}
+
+static int jpeg_v4_0_3_set_interrupt_state(struct amdgpu_device *adev,
+ struct amdgpu_irq_src *source,
+ unsigned int type,
+ enum amdgpu_interrupt_state state)
+{
+ return 0;
+}
+
+static int jpeg_v4_0_3_set_ras_interrupt_state(struct amdgpu_device *adev,
+ struct amdgpu_irq_src *source,
+ unsigned int type,
+ enum amdgpu_interrupt_state state)
+{
+ return 0;
+}
+
+static int jpeg_v4_0_3_process_interrupt(struct amdgpu_device *adev,
+ struct amdgpu_irq_src *source,
+ struct amdgpu_iv_entry *entry)
+{
+ uint32_t i, inst;
+
+ i = node_id_to_phys_map[entry->node_id];
+ DRM_DEV_DEBUG(adev->dev, "IH: JPEG TRAP\n");
+
+ for (inst = 0; inst < adev->jpeg.num_jpeg_inst; ++inst)
+ if (adev->jpeg.inst[inst].aid_id == i)
+ break;
+
+ if (inst >= adev->jpeg.num_jpeg_inst) {
+ dev_WARN_ONCE(adev->dev, 1,
+ "Interrupt received for unknown JPEG instance %d",
+ entry->node_id);
+ return 0;
+ }
+
+ switch (entry->src_id) {
+ case VCN_4_0__SRCID__JPEG_DECODE:
+ amdgpu_fence_process(&adev->jpeg.inst[inst].ring_dec[0]);
+ break;
+ case VCN_4_0__SRCID__JPEG1_DECODE:
+ amdgpu_fence_process(&adev->jpeg.inst[inst].ring_dec[1]);
+ break;
+ case VCN_4_0__SRCID__JPEG2_DECODE:
+ amdgpu_fence_process(&adev->jpeg.inst[inst].ring_dec[2]);
+ break;
+ case VCN_4_0__SRCID__JPEG3_DECODE:
+ amdgpu_fence_process(&adev->jpeg.inst[inst].ring_dec[3]);
+ break;
+ case VCN_4_0__SRCID__JPEG4_DECODE:
+ amdgpu_fence_process(&adev->jpeg.inst[inst].ring_dec[4]);
+ break;
+ case VCN_4_0__SRCID__JPEG5_DECODE:
+ amdgpu_fence_process(&adev->jpeg.inst[inst].ring_dec[5]);
+ break;
+ case VCN_4_0__SRCID__JPEG6_DECODE:
+ amdgpu_fence_process(&adev->jpeg.inst[inst].ring_dec[6]);
+ break;
+ case VCN_4_0__SRCID__JPEG7_DECODE:
+ amdgpu_fence_process(&adev->jpeg.inst[inst].ring_dec[7]);
+ break;
+ default:
+ DRM_DEV_ERROR(adev->dev, "Unhandled interrupt: %d %d\n",
+ entry->src_id, entry->src_data[0]);
+ break;
+ }
+
+ return 0;
+}
+
+static void jpeg_v4_0_3_core_stall_reset(struct amdgpu_ring *ring)
+{
+ struct amdgpu_device *adev = ring->adev;
+ int jpeg_inst = GET_INST(JPEG, ring->me);
+ int reg_offset = jpeg_v4_0_3_core_reg_offset(ring->pipe);
+
+ WREG32_SOC15_OFFSET(JPEG, jpeg_inst,
+ regUVD_JMI0_UVD_JMI_CLIENT_STALL,
+ reg_offset, 0x1F);
+ SOC15_WAIT_ON_RREG_OFFSET(JPEG, jpeg_inst,
+ regUVD_JMI0_UVD_JMI_CLIENT_CLEAN_STATUS,
+ reg_offset, 0x1F, 0x1F);
+ WREG32_SOC15_OFFSET(JPEG, jpeg_inst,
+ regUVD_JMI0_JPEG_LMI_DROP,
+ reg_offset, 0x1F);
+ WREG32_SOC15(JPEG, jpeg_inst, regJPEG_CORE_RST_CTRL, 1 << ring->pipe);
+ WREG32_SOC15_OFFSET(JPEG, jpeg_inst,
+ regUVD_JMI0_UVD_JMI_CLIENT_STALL,
+ reg_offset, 0x00);
+ WREG32_SOC15_OFFSET(JPEG, jpeg_inst,
+ regUVD_JMI0_JPEG_LMI_DROP,
+ reg_offset, 0x00);
+ WREG32_SOC15(JPEG, jpeg_inst, regJPEG_CORE_RST_CTRL, 0x00);
+}
+
+static int jpeg_v4_0_3_ring_reset(struct amdgpu_ring *ring,
+ unsigned int vmid,
+ struct amdgpu_fence *timedout_fence)
+{
+ if (amdgpu_sriov_vf(ring->adev))
+ return -EOPNOTSUPP;
+
+ amdgpu_ring_reset_helper_begin(ring, timedout_fence);
+ jpeg_v4_0_3_core_stall_reset(ring);
+ jpeg_v4_0_3_start_jrbc(ring);
+ return amdgpu_ring_reset_helper_end(ring, timedout_fence);
+}
+
+static const struct amd_ip_funcs jpeg_v4_0_3_ip_funcs = {
+ .name = "jpeg_v4_0_3",
+ .early_init = jpeg_v4_0_3_early_init,
+ .sw_init = jpeg_v4_0_3_sw_init,
+ .sw_fini = jpeg_v4_0_3_sw_fini,
+ .hw_init = jpeg_v4_0_3_hw_init,
+ .hw_fini = jpeg_v4_0_3_hw_fini,
+ .suspend = jpeg_v4_0_3_suspend,
+ .resume = jpeg_v4_0_3_resume,
+ .is_idle = jpeg_v4_0_3_is_idle,
+ .wait_for_idle = jpeg_v4_0_3_wait_for_idle,
+ .set_clockgating_state = jpeg_v4_0_3_set_clockgating_state,
+ .set_powergating_state = jpeg_v4_0_3_set_powergating_state,
+ .dump_ip_state = amdgpu_jpeg_dump_ip_state,
+ .print_ip_state = amdgpu_jpeg_print_ip_state,
+};
+
+static const struct amdgpu_ring_funcs jpeg_v4_0_3_dec_ring_vm_funcs = {
+ .type = AMDGPU_RING_TYPE_VCN_JPEG,
+ .align_mask = 0xf,
+ .get_rptr = jpeg_v4_0_3_dec_ring_get_rptr,
+ .get_wptr = jpeg_v4_0_3_dec_ring_get_wptr,
+ .set_wptr = jpeg_v4_0_3_dec_ring_set_wptr,
+ .parse_cs = amdgpu_jpeg_dec_parse_cs,
+ .emit_frame_size =
+ SOC15_FLUSH_GPU_TLB_NUM_WREG * 6 +
+ SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 8 +
+ 8 + /* jpeg_v4_0_3_dec_ring_emit_vm_flush */
+ 18 + 18 + /* jpeg_v4_0_3_dec_ring_emit_fence x2 vm fence */
+ 8 + 16,
+ .emit_ib_size = 22, /* jpeg_v4_0_3_dec_ring_emit_ib */
+ .emit_ib = jpeg_v4_0_3_dec_ring_emit_ib,
+ .emit_fence = jpeg_v4_0_3_dec_ring_emit_fence,
+ .emit_vm_flush = jpeg_v4_0_3_dec_ring_emit_vm_flush,
+ .emit_hdp_flush = jpeg_v4_0_3_ring_emit_hdp_flush,
+ .test_ring = amdgpu_jpeg_dec_ring_test_ring,
+ .test_ib = amdgpu_jpeg_dec_ring_test_ib,
+ .insert_nop = jpeg_v4_0_3_dec_ring_nop,
+ .insert_start = jpeg_v4_0_3_dec_ring_insert_start,
+ .insert_end = jpeg_v4_0_3_dec_ring_insert_end,
+ .pad_ib = amdgpu_ring_generic_pad_ib,
+ .begin_use = amdgpu_jpeg_ring_begin_use,
+ .end_use = amdgpu_jpeg_ring_end_use,
+ .emit_wreg = jpeg_v4_0_3_dec_ring_emit_wreg,
+ .emit_reg_wait = jpeg_v4_0_3_dec_ring_emit_reg_wait,
+ .emit_reg_write_reg_wait = amdgpu_ring_emit_reg_write_reg_wait_helper,
+ .reset = jpeg_v4_0_3_ring_reset,
+};
+
+static void jpeg_v4_0_3_set_dec_ring_funcs(struct amdgpu_device *adev)
+{
+ int i, j, jpeg_inst;
+
+ for (i = 0; i < adev->jpeg.num_jpeg_inst; ++i) {
+ for (j = 0; j < adev->jpeg.num_jpeg_rings; ++j) {
+ adev->jpeg.inst[i].ring_dec[j].funcs = &jpeg_v4_0_3_dec_ring_vm_funcs;
+ adev->jpeg.inst[i].ring_dec[j].me = i;
+ adev->jpeg.inst[i].ring_dec[j].pipe = j;
+ }
+ jpeg_inst = GET_INST(JPEG, i);
+ adev->jpeg.inst[i].aid_id =
+ jpeg_inst / adev->jpeg.num_inst_per_aid;
+ }
+}
+
+static const struct amdgpu_irq_src_funcs jpeg_v4_0_3_irq_funcs = {
+ .set = jpeg_v4_0_3_set_interrupt_state,
+ .process = jpeg_v4_0_3_process_interrupt,
+};
+
+static const struct amdgpu_irq_src_funcs jpeg_v4_0_3_ras_irq_funcs = {
+ .set = jpeg_v4_0_3_set_ras_interrupt_state,
+ .process = amdgpu_jpeg_process_poison_irq,
+};
+
+static void jpeg_v4_0_3_set_irq_funcs(struct amdgpu_device *adev)
+{
+ int i;
+
+ for (i = 0; i < adev->jpeg.num_jpeg_inst; ++i) {
+ adev->jpeg.inst->irq.num_types += adev->jpeg.num_jpeg_rings;
+ }
+ adev->jpeg.inst->irq.funcs = &jpeg_v4_0_3_irq_funcs;
+
+ adev->jpeg.inst->ras_poison_irq.num_types = 1;
+ adev->jpeg.inst->ras_poison_irq.funcs = &jpeg_v4_0_3_ras_irq_funcs;
+}
+
+const struct amdgpu_ip_block_version jpeg_v4_0_3_ip_block = {
+ .type = AMD_IP_BLOCK_TYPE_JPEG,
+ .major = 4,
+ .minor = 0,
+ .rev = 3,
+ .funcs = &jpeg_v4_0_3_ip_funcs,
+};
+
+static const struct amdgpu_ras_err_status_reg_entry jpeg_v4_0_3_ue_reg_list[] = {
+ {AMDGPU_RAS_REG_ENTRY(JPEG, 0, regVCN_UE_ERR_STATUS_LO_JPEG0S, regVCN_UE_ERR_STATUS_HI_JPEG0S),
+ 1, (AMDGPU_RAS_ERR_INFO_VALID | AMDGPU_RAS_ERR_STATUS_VALID), "JPEG0S"},
+ {AMDGPU_RAS_REG_ENTRY(JPEG, 0, regVCN_UE_ERR_STATUS_LO_JPEG0D, regVCN_UE_ERR_STATUS_HI_JPEG0D),
+ 1, (AMDGPU_RAS_ERR_INFO_VALID | AMDGPU_RAS_ERR_STATUS_VALID), "JPEG0D"},
+ {AMDGPU_RAS_REG_ENTRY(JPEG, 0, regVCN_UE_ERR_STATUS_LO_JPEG1S, regVCN_UE_ERR_STATUS_HI_JPEG1S),
+ 1, (AMDGPU_RAS_ERR_INFO_VALID | AMDGPU_RAS_ERR_STATUS_VALID), "JPEG1S"},
+ {AMDGPU_RAS_REG_ENTRY(JPEG, 0, regVCN_UE_ERR_STATUS_LO_JPEG1D, regVCN_UE_ERR_STATUS_HI_JPEG1D),
+ 1, (AMDGPU_RAS_ERR_INFO_VALID | AMDGPU_RAS_ERR_STATUS_VALID), "JPEG1D"},
+ {AMDGPU_RAS_REG_ENTRY(JPEG, 0, regVCN_UE_ERR_STATUS_LO_JPEG2S, regVCN_UE_ERR_STATUS_HI_JPEG2S),
+ 1, (AMDGPU_RAS_ERR_INFO_VALID | AMDGPU_RAS_ERR_STATUS_VALID), "JPEG2S"},
+ {AMDGPU_RAS_REG_ENTRY(JPEG, 0, regVCN_UE_ERR_STATUS_LO_JPEG2D, regVCN_UE_ERR_STATUS_HI_JPEG2D),
+ 1, (AMDGPU_RAS_ERR_INFO_VALID | AMDGPU_RAS_ERR_STATUS_VALID), "JPEG2D"},
+ {AMDGPU_RAS_REG_ENTRY(JPEG, 0, regVCN_UE_ERR_STATUS_LO_JPEG3S, regVCN_UE_ERR_STATUS_HI_JPEG3S),
+ 1, (AMDGPU_RAS_ERR_INFO_VALID | AMDGPU_RAS_ERR_STATUS_VALID), "JPEG3S"},
+ {AMDGPU_RAS_REG_ENTRY(JPEG, 0, regVCN_UE_ERR_STATUS_LO_JPEG3D, regVCN_UE_ERR_STATUS_HI_JPEG3D),
+ 1, (AMDGPU_RAS_ERR_INFO_VALID | AMDGPU_RAS_ERR_STATUS_VALID), "JPEG3D"},
+ {AMDGPU_RAS_REG_ENTRY(JPEG, 0, regVCN_UE_ERR_STATUS_LO_JPEG4S, regVCN_UE_ERR_STATUS_HI_JPEG4S),
+ 1, (AMDGPU_RAS_ERR_INFO_VALID | AMDGPU_RAS_ERR_STATUS_VALID), "JPEG4S"},
+ {AMDGPU_RAS_REG_ENTRY(JPEG, 0, regVCN_UE_ERR_STATUS_LO_JPEG4D, regVCN_UE_ERR_STATUS_HI_JPEG4D),
+ 1, (AMDGPU_RAS_ERR_INFO_VALID | AMDGPU_RAS_ERR_STATUS_VALID), "JPEG4D"},
+ {AMDGPU_RAS_REG_ENTRY(JPEG, 0, regVCN_UE_ERR_STATUS_LO_JPEG5S, regVCN_UE_ERR_STATUS_HI_JPEG5S),
+ 1, (AMDGPU_RAS_ERR_INFO_VALID | AMDGPU_RAS_ERR_STATUS_VALID), "JPEG5S"},
+ {AMDGPU_RAS_REG_ENTRY(JPEG, 0, regVCN_UE_ERR_STATUS_LO_JPEG5D, regVCN_UE_ERR_STATUS_HI_JPEG5D),
+ 1, (AMDGPU_RAS_ERR_INFO_VALID | AMDGPU_RAS_ERR_STATUS_VALID), "JPEG5D"},
+ {AMDGPU_RAS_REG_ENTRY(JPEG, 0, regVCN_UE_ERR_STATUS_LO_JPEG6S, regVCN_UE_ERR_STATUS_HI_JPEG6S),
+ 1, (AMDGPU_RAS_ERR_INFO_VALID | AMDGPU_RAS_ERR_STATUS_VALID), "JPEG6S"},
+ {AMDGPU_RAS_REG_ENTRY(JPEG, 0, regVCN_UE_ERR_STATUS_LO_JPEG6D, regVCN_UE_ERR_STATUS_HI_JPEG6D),
+ 1, (AMDGPU_RAS_ERR_INFO_VALID | AMDGPU_RAS_ERR_STATUS_VALID), "JPEG6D"},
+ {AMDGPU_RAS_REG_ENTRY(JPEG, 0, regVCN_UE_ERR_STATUS_LO_JPEG7S, regVCN_UE_ERR_STATUS_HI_JPEG7S),
+ 1, (AMDGPU_RAS_ERR_INFO_VALID | AMDGPU_RAS_ERR_STATUS_VALID), "JPEG7S"},
+ {AMDGPU_RAS_REG_ENTRY(JPEG, 0, regVCN_UE_ERR_STATUS_LO_JPEG7D, regVCN_UE_ERR_STATUS_HI_JPEG7D),
+ 1, (AMDGPU_RAS_ERR_INFO_VALID | AMDGPU_RAS_ERR_STATUS_VALID), "JPEG7D"},
+};
+
+static void jpeg_v4_0_3_inst_query_ras_error_count(struct amdgpu_device *adev,
+ uint32_t jpeg_inst,
+ void *ras_err_status)
+{
+ struct ras_err_data *err_data = (struct ras_err_data *)ras_err_status;
+
+ /* jpeg v4_0_3 only support uncorrectable errors */
+ amdgpu_ras_inst_query_ras_error_count(adev,
+ jpeg_v4_0_3_ue_reg_list,
+ ARRAY_SIZE(jpeg_v4_0_3_ue_reg_list),
+ NULL, 0, GET_INST(VCN, jpeg_inst),
+ AMDGPU_RAS_ERROR__MULTI_UNCORRECTABLE,
+ &err_data->ue_count);
+}
+
+static void jpeg_v4_0_3_query_ras_error_count(struct amdgpu_device *adev,
+ void *ras_err_status)
+{
+ uint32_t i;
+
+ if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__JPEG)) {
+ dev_warn(adev->dev, "JPEG RAS is not supported\n");
+ return;
+ }
+
+ for (i = 0; i < adev->jpeg.num_jpeg_inst; i++)
+ jpeg_v4_0_3_inst_query_ras_error_count(adev, i, ras_err_status);
+}
+
+static void jpeg_v4_0_3_inst_reset_ras_error_count(struct amdgpu_device *adev,
+ uint32_t jpeg_inst)
+{
+ amdgpu_ras_inst_reset_ras_error_count(adev,
+ jpeg_v4_0_3_ue_reg_list,
+ ARRAY_SIZE(jpeg_v4_0_3_ue_reg_list),
+ GET_INST(VCN, jpeg_inst));
+}
+
+static void jpeg_v4_0_3_reset_ras_error_count(struct amdgpu_device *adev)
+{
+ uint32_t i;
+
+ if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__JPEG)) {
+ dev_warn(adev->dev, "JPEG RAS is not supported\n");
+ return;
+ }
+
+ for (i = 0; i < adev->jpeg.num_jpeg_inst; i++)
+ jpeg_v4_0_3_inst_reset_ras_error_count(adev, i);
+}
+
+static uint32_t jpeg_v4_0_3_query_poison_by_instance(struct amdgpu_device *adev,
+ uint32_t instance, uint32_t sub_block)
+{
+ uint32_t poison_stat = 0, reg_value = 0;
+
+ switch (sub_block) {
+ case AMDGPU_JPEG_V4_0_3_JPEG0:
+ reg_value = RREG32_SOC15(JPEG, instance, regUVD_RAS_JPEG0_STATUS);
+ poison_stat = REG_GET_FIELD(reg_value, UVD_RAS_JPEG0_STATUS, POISONED_PF);
+ break;
+ case AMDGPU_JPEG_V4_0_3_JPEG1:
+ reg_value = RREG32_SOC15(JPEG, instance, regUVD_RAS_JPEG1_STATUS);
+ poison_stat = REG_GET_FIELD(reg_value, UVD_RAS_JPEG1_STATUS, POISONED_PF);
+ break;
+ default:
+ break;
+ }
+
+ if (poison_stat)
+ dev_info(adev->dev, "Poison detected in JPEG%d sub_block%d\n",
+ instance, sub_block);
+
+ return poison_stat;
+}
+
+static bool jpeg_v4_0_3_query_ras_poison_status(struct amdgpu_device *adev)
+{
+ uint32_t inst = 0, sub = 0, poison_stat = 0;
+
+ for (inst = 0; inst < adev->jpeg.num_jpeg_inst; inst++)
+ for (sub = 0; sub < AMDGPU_JPEG_V4_0_3_MAX_SUB_BLOCK; sub++)
+ poison_stat +=
+ jpeg_v4_0_3_query_poison_by_instance(adev, inst, sub);
+
+ return !!poison_stat;
+}
+
+static const struct amdgpu_ras_block_hw_ops jpeg_v4_0_3_ras_hw_ops = {
+ .query_ras_error_count = jpeg_v4_0_3_query_ras_error_count,
+ .reset_ras_error_count = jpeg_v4_0_3_reset_ras_error_count,
+ .query_poison_status = jpeg_v4_0_3_query_ras_poison_status,
+};
+
+static int jpeg_v4_0_3_aca_bank_parser(struct aca_handle *handle, struct aca_bank *bank,
+ enum aca_smu_type type, void *data)
+{
+ struct aca_bank_info info;
+ u64 misc0;
+ int ret;
+
+ ret = aca_bank_info_decode(bank, &info);
+ if (ret)
+ return ret;
+
+ misc0 = bank->regs[ACA_REG_IDX_MISC0];
+ switch (type) {
+ case ACA_SMU_TYPE_UE:
+ bank->aca_err_type = ACA_ERROR_TYPE_UE;
+ ret = aca_error_cache_log_bank_error(handle, &info, ACA_ERROR_TYPE_UE,
+ 1ULL);
+ break;
+ case ACA_SMU_TYPE_CE:
+ bank->aca_err_type = ACA_ERROR_TYPE_CE;
+ ret = aca_error_cache_log_bank_error(handle, &info, bank->aca_err_type,
+ ACA_REG__MISC0__ERRCNT(misc0));
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ return ret;
+}
+
+/* reference to smu driver if header file */
+static int jpeg_v4_0_3_err_codes[] = {
+ 16, 17, 18, 19, 20, 21, 22, 23, /* JPEG[0-7][S|D] */
+ 24, 25, 26, 27, 28, 29, 30, 31
+};
+
+static bool jpeg_v4_0_3_aca_bank_is_valid(struct aca_handle *handle, struct aca_bank *bank,
+ enum aca_smu_type type, void *data)
+{
+ u32 instlo;
+
+ instlo = ACA_REG__IPID__INSTANCEIDLO(bank->regs[ACA_REG_IDX_IPID]);
+ instlo &= GENMASK(31, 1);
+
+ if (instlo != mmSMNAID_AID0_MCA_SMU)
+ return false;
+
+ if (aca_bank_check_error_codes(handle->adev, bank,
+ jpeg_v4_0_3_err_codes,
+ ARRAY_SIZE(jpeg_v4_0_3_err_codes)))
+ return false;
+
+ return true;
+}
+
+static const struct aca_bank_ops jpeg_v4_0_3_aca_bank_ops = {
+ .aca_bank_parser = jpeg_v4_0_3_aca_bank_parser,
+ .aca_bank_is_valid = jpeg_v4_0_3_aca_bank_is_valid,
+};
+
+static const struct aca_info jpeg_v4_0_3_aca_info = {
+ .hwip = ACA_HWIP_TYPE_SMU,
+ .mask = ACA_ERROR_UE_MASK,
+ .bank_ops = &jpeg_v4_0_3_aca_bank_ops,
+};
+
+static int jpeg_v4_0_3_ras_late_init(struct amdgpu_device *adev, struct ras_common_if *ras_block)
+{
+ int r;
+
+ r = amdgpu_ras_block_late_init(adev, ras_block);
+ if (r)
+ return r;
+
+ if (amdgpu_ras_is_supported(adev, ras_block->block) &&
+ adev->jpeg.inst->ras_poison_irq.funcs) {
+ r = amdgpu_irq_get(adev, &adev->jpeg.inst->ras_poison_irq, 0);
+ if (r)
+ goto late_fini;
+ }
+
+ r = amdgpu_ras_bind_aca(adev, AMDGPU_RAS_BLOCK__JPEG,
+ &jpeg_v4_0_3_aca_info, NULL);
+ if (r)
+ goto late_fini;
+
+ return 0;
+
+late_fini:
+ amdgpu_ras_block_late_fini(adev, ras_block);
+
+ return r;
+}
+
+static struct amdgpu_jpeg_ras jpeg_v4_0_3_ras = {
+ .ras_block = {
+ .hw_ops = &jpeg_v4_0_3_ras_hw_ops,
+ .ras_late_init = jpeg_v4_0_3_ras_late_init,
+ },
+};
+
+static void jpeg_v4_0_3_set_ras_funcs(struct amdgpu_device *adev)
+{
+ adev->jpeg.ras = &jpeg_v4_0_3_ras;
+}
diff --git a/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.h b/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.h
new file mode 100644
index 000000000000..2e110d04af84
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.h
@@ -0,0 +1,74 @@
+/*
+ * Copyright 2022 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#ifndef __JPEG_V4_0_3_H__
+#define __JPEG_V4_0_3_H__
+
+#define regUVD_JRBC_EXTERNAL_REG_INTERNAL_OFFSET 0x1bfff
+#define regUVD_JPEG_GPCOM_CMD_INTERNAL_OFFSET 0x404d
+#define regUVD_JPEG_GPCOM_DATA0_INTERNAL_OFFSET 0x404e
+#define regUVD_JPEG_GPCOM_DATA1_INTERNAL_OFFSET 0x404f
+#define regUVD_LMI_JRBC_RB_MEM_WR_64BIT_BAR_LOW_INTERNAL_OFFSET 0x40ab
+#define regUVD_LMI_JRBC_RB_MEM_WR_64BIT_BAR_HIGH_INTERNAL_OFFSET 0x40ac
+#define regUVD_LMI_JRBC_IB_VMID_INTERNAL_OFFSET 0x40a4
+#define regUVD_LMI_JPEG_VMID_INTERNAL_OFFSET 0x40a6
+#define regUVD_LMI_JRBC_IB_64BIT_BAR_LOW_INTERNAL_OFFSET 0x40b6
+#define regUVD_LMI_JRBC_IB_64BIT_BAR_HIGH_INTERNAL_OFFSET 0x40b7
+#define regUVD_JRBC_IB_SIZE_INTERNAL_OFFSET 0x4082
+#define regUVD_LMI_JRBC_RB_MEM_RD_64BIT_BAR_LOW_INTERNAL_OFFSET 0x42d4
+#define regUVD_LMI_JRBC_RB_MEM_RD_64BIT_BAR_HIGH_INTERNAL_OFFSET 0x42d5
+#define regUVD_JRBC_RB_COND_RD_TIMER_INTERNAL_OFFSET 0x4085
+#define regUVD_JRBC_RB_REF_DATA_INTERNAL_OFFSET 0x4084
+#define regUVD_JRBC_STATUS_INTERNAL_OFFSET 0x4089
+#define regUVD_JPEG_PITCH_INTERNAL_OFFSET 0x4043
+#define regUVD_JRBC0_UVD_JRBC_SCRATCH0_INTERNAL_OFFSET 0x4094
+#define regUVD_JRBC_EXTERNAL_MCM_ADDR_INTERNAL_OFFSET 0x1bffe
+
+#define JRBC_DEC_EXTERNAL_REG_WRITE_ADDR 0x18000
+
+enum amdgpu_jpeg_v4_0_3_sub_block {
+ AMDGPU_JPEG_V4_0_3_JPEG0 = 0,
+ AMDGPU_JPEG_V4_0_3_JPEG1,
+
+ AMDGPU_JPEG_V4_0_3_MAX_SUB_BLOCK,
+};
+
+extern const struct amdgpu_ip_block_version jpeg_v4_0_3_ip_block;
+
+void jpeg_v4_0_3_dec_ring_emit_ib(struct amdgpu_ring *ring,
+ struct amdgpu_job *job,
+ struct amdgpu_ib *ib,
+ uint32_t flags);
+void jpeg_v4_0_3_dec_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, u64 seq,
+ unsigned int flags);
+void jpeg_v4_0_3_dec_ring_emit_vm_flush(struct amdgpu_ring *ring,
+ unsigned int vmid, uint64_t pd_addr);
+void jpeg_v4_0_3_ring_emit_hdp_flush(struct amdgpu_ring *ring);
+void jpeg_v4_0_3_dec_ring_nop(struct amdgpu_ring *ring, uint32_t count);
+void jpeg_v4_0_3_dec_ring_insert_start(struct amdgpu_ring *ring);
+void jpeg_v4_0_3_dec_ring_insert_end(struct amdgpu_ring *ring);
+void jpeg_v4_0_3_dec_ring_emit_wreg(struct amdgpu_ring *ring, uint32_t reg, uint32_t val);
+void jpeg_v4_0_3_dec_ring_emit_reg_wait(struct amdgpu_ring *ring, uint32_t reg,
+ uint32_t val, uint32_t mask);
+
+#endif /* __JPEG_V4_0_3_H__ */
diff --git a/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_5.c b/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_5.c
new file mode 100644
index 000000000000..54fd9c800c40
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_5.c
@@ -0,0 +1,872 @@
+/*
+ * Copyright 2023 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#include "amdgpu.h"
+#include "amdgpu_jpeg.h"
+#include "amdgpu_pm.h"
+#include "soc15.h"
+#include "soc15d.h"
+#include "jpeg_v2_0.h"
+#include "jpeg_v4_0_5.h"
+#include "mmsch_v4_0.h"
+
+#include "vcn/vcn_4_0_5_offset.h"
+#include "vcn/vcn_4_0_5_sh_mask.h"
+#include "ivsrcid/vcn/irqsrcs_vcn_4_0.h"
+
+#define mmUVD_DPG_LMA_CTL regUVD_DPG_LMA_CTL
+#define mmUVD_DPG_LMA_CTL_BASE_IDX regUVD_DPG_LMA_CTL_BASE_IDX
+#define mmUVD_DPG_LMA_DATA regUVD_DPG_LMA_DATA
+#define mmUVD_DPG_LMA_DATA_BASE_IDX regUVD_DPG_LMA_DATA_BASE_IDX
+
+#define regUVD_JPEG_PITCH_INTERNAL_OFFSET 0x401f
+#define regJPEG_DEC_GFX10_ADDR_CONFIG_INTERNAL_OFFSET 0x4026
+#define regJPEG_SYS_INT_EN_INTERNAL_OFFSET 0x4141
+#define regJPEG_CGC_CTRL_INTERNAL_OFFSET 0x4161
+#define regJPEG_CGC_GATE_INTERNAL_OFFSET 0x4160
+#define regUVD_NO_OP_INTERNAL_OFFSET 0x0029
+
+static const struct amdgpu_hwip_reg_entry jpeg_reg_list_4_0_5[] = {
+ SOC15_REG_ENTRY_STR(JPEG, 0, regUVD_JPEG_POWER_STATUS),
+ SOC15_REG_ENTRY_STR(JPEG, 0, regUVD_JPEG_INT_STAT),
+ SOC15_REG_ENTRY_STR(JPEG, 0, regUVD_JRBC_RB_RPTR),
+ SOC15_REG_ENTRY_STR(JPEG, 0, regUVD_JRBC_RB_WPTR),
+ SOC15_REG_ENTRY_STR(JPEG, 0, regUVD_JRBC_RB_CNTL),
+ SOC15_REG_ENTRY_STR(JPEG, 0, regUVD_JRBC_RB_SIZE),
+ SOC15_REG_ENTRY_STR(JPEG, 0, regUVD_JRBC_STATUS),
+ SOC15_REG_ENTRY_STR(JPEG, 0, regJPEG_DEC_ADDR_MODE),
+ SOC15_REG_ENTRY_STR(JPEG, 0, regJPEG_DEC_GFX10_ADDR_CONFIG),
+ SOC15_REG_ENTRY_STR(JPEG, 0, regJPEG_DEC_Y_GFX10_TILING_SURFACE),
+ SOC15_REG_ENTRY_STR(JPEG, 0, regJPEG_DEC_UV_GFX10_TILING_SURFACE),
+ SOC15_REG_ENTRY_STR(JPEG, 0, regUVD_JPEG_PITCH),
+ SOC15_REG_ENTRY_STR(JPEG, 0, regUVD_JPEG_UV_PITCH),
+};
+
+static void jpeg_v4_0_5_set_dec_ring_funcs(struct amdgpu_device *adev);
+static void jpeg_v4_0_5_set_irq_funcs(struct amdgpu_device *adev);
+static int jpeg_v4_0_5_set_powergating_state(struct amdgpu_ip_block *ip_block,
+ enum amd_powergating_state state);
+static void jpeg_v4_0_5_dec_ring_set_wptr(struct amdgpu_ring *ring);
+
+static int amdgpu_ih_clientid_jpeg[] = {
+ SOC15_IH_CLIENTID_VCN,
+ SOC15_IH_CLIENTID_VCN1
+};
+
+
+
+/**
+ * jpeg_v4_0_5_early_init - set function pointers
+ *
+ * @ip_block: Pointer to the amdgpu_ip_block for this hw instance.
+ *
+ * Set ring and irq function pointers
+ */
+static int jpeg_v4_0_5_early_init(struct amdgpu_ip_block *ip_block)
+{
+ struct amdgpu_device *adev = ip_block->adev;
+
+ switch (amdgpu_ip_version(adev, UVD_HWIP, 0)) {
+ case IP_VERSION(4, 0, 5):
+ adev->jpeg.num_jpeg_inst = 1;
+ break;
+ case IP_VERSION(4, 0, 6):
+ adev->jpeg.num_jpeg_inst = 2;
+ break;
+ default:
+ DRM_DEV_ERROR(adev->dev,
+ "Failed to init vcn ip block(UVD_HWIP:0x%x)\n",
+ amdgpu_ip_version(adev, UVD_HWIP, 0));
+ return -EINVAL;
+ }
+
+ adev->jpeg.num_jpeg_rings = 1;
+
+ jpeg_v4_0_5_set_dec_ring_funcs(adev);
+ jpeg_v4_0_5_set_irq_funcs(adev);
+
+ return 0;
+}
+
+/**
+ * jpeg_v4_0_5_sw_init - sw init for JPEG block
+ *
+ * @ip_block: Pointer to the amdgpu_ip_block for this hw instance.
+ *
+ * Load firmware and sw initialization
+ */
+static int jpeg_v4_0_5_sw_init(struct amdgpu_ip_block *ip_block)
+{
+ struct amdgpu_device *adev = ip_block->adev;
+ struct amdgpu_ring *ring;
+ int r, i;
+
+ for (i = 0; i < adev->jpeg.num_jpeg_inst; ++i) {
+ if (adev->jpeg.harvest_config & (1 << i))
+ continue;
+
+ /* JPEG TRAP */
+ r = amdgpu_irq_add_id(adev, amdgpu_ih_clientid_jpeg[i],
+ VCN_4_0__SRCID__JPEG_DECODE, &adev->jpeg.inst[i].irq);
+ if (r)
+ return r;
+
+ /* JPEG DJPEG POISON EVENT */
+ r = amdgpu_irq_add_id(adev, amdgpu_ih_clientid_jpeg[i],
+ VCN_4_0__SRCID_DJPEG0_POISON, &adev->jpeg.inst[i].irq);
+ if (r)
+ return r;
+
+ /* JPEG EJPEG POISON EVENT */
+ r = amdgpu_irq_add_id(adev, amdgpu_ih_clientid_jpeg[i],
+ VCN_4_0__SRCID_EJPEG0_POISON, &adev->jpeg.inst[i].irq);
+ if (r)
+ return r;
+ }
+
+ r = amdgpu_jpeg_sw_init(adev);
+ if (r)
+ return r;
+
+ r = amdgpu_jpeg_resume(adev);
+ if (r)
+ return r;
+
+ for (i = 0; i < adev->jpeg.num_jpeg_inst; ++i) {
+ if (adev->jpeg.harvest_config & (1 << i))
+ continue;
+
+ ring = adev->jpeg.inst[i].ring_dec;
+ ring->use_doorbell = true;
+ ring->vm_hub = AMDGPU_MMHUB0(0);
+ ring->doorbell_index = (adev->doorbell_index.vcn.vcn_ring0_1 << 1) + 1 + 8 * i;
+ sprintf(ring->name, "jpeg_dec_%d", i);
+ r = amdgpu_ring_init(adev, ring, 512, &adev->jpeg.inst[i].irq,
+ 0, AMDGPU_RING_PRIO_DEFAULT, NULL);
+ if (r)
+ return r;
+
+ adev->jpeg.internal.jpeg_pitch[0] = regUVD_JPEG_PITCH_INTERNAL_OFFSET;
+ adev->jpeg.inst[i].external.jpeg_pitch[0] = SOC15_REG_OFFSET(JPEG, i, regUVD_JPEG_PITCH);
+ }
+
+ r = amdgpu_jpeg_reg_dump_init(adev, jpeg_reg_list_4_0_5, ARRAY_SIZE(jpeg_reg_list_4_0_5));
+ if (r)
+ return r;
+
+ adev->jpeg.supported_reset =
+ amdgpu_get_soft_full_reset_mask(&adev->jpeg.inst[0].ring_dec[0]);
+ if (!amdgpu_sriov_vf(adev))
+ adev->jpeg.supported_reset |= AMDGPU_RESET_TYPE_PER_QUEUE;
+ r = amdgpu_jpeg_sysfs_reset_mask_init(adev);
+ if (r)
+ return r;
+
+ return 0;
+}
+
+/**
+ * jpeg_v4_0_5_sw_fini - sw fini for JPEG block
+ *
+ * @ip_block: Pointer to the amdgpu_ip_block for this hw instance.
+ *
+ * JPEG suspend and free up sw allocation
+ */
+static int jpeg_v4_0_5_sw_fini(struct amdgpu_ip_block *ip_block)
+{
+ struct amdgpu_device *adev = ip_block->adev;
+ int r;
+
+ r = amdgpu_jpeg_suspend(adev);
+ if (r)
+ return r;
+
+ amdgpu_jpeg_sysfs_reset_mask_fini(adev);
+ r = amdgpu_jpeg_sw_fini(adev);
+
+ return r;
+}
+
+/**
+ * jpeg_v4_0_5_hw_init - start and test JPEG block
+ *
+ * @ip_block: Pointer to the amdgpu_ip_block for this hw instance.
+ *
+ */
+static int jpeg_v4_0_5_hw_init(struct amdgpu_ip_block *ip_block)
+{
+ struct amdgpu_device *adev = ip_block->adev;
+ struct amdgpu_ring *ring;
+ int i, r = 0;
+
+ // TODO: Enable ring test with DPG support
+ if (adev->pg_flags & AMD_PG_SUPPORT_JPEG_DPG) {
+ return 0;
+ }
+
+ for (i = 0; i < adev->jpeg.num_jpeg_inst; ++i) {
+ if (adev->jpeg.harvest_config & (1 << i))
+ continue;
+
+ ring = adev->jpeg.inst[i].ring_dec;
+ r = amdgpu_ring_test_helper(ring);
+ if (r)
+ return r;
+ }
+
+ return 0;
+}
+
+/**
+ * jpeg_v4_0_5_hw_fini - stop the hardware block
+ *
+ * @ip_block: Pointer to the amdgpu_ip_block for this hw instance.
+ *
+ * Stop the JPEG block, mark ring as not ready any more
+ */
+static int jpeg_v4_0_5_hw_fini(struct amdgpu_ip_block *ip_block)
+{
+ struct amdgpu_device *adev = ip_block->adev;
+ int i;
+
+ cancel_delayed_work_sync(&adev->jpeg.idle_work);
+
+ for (i = 0; i < adev->jpeg.num_jpeg_inst; ++i) {
+ if (adev->jpeg.harvest_config & (1 << i))
+ continue;
+
+ if (!amdgpu_sriov_vf(adev)) {
+ if (adev->jpeg.cur_state != AMD_PG_STATE_GATE &&
+ RREG32_SOC15(JPEG, i, regUVD_JRBC_STATUS))
+ jpeg_v4_0_5_set_powergating_state(ip_block, AMD_PG_STATE_GATE);
+ }
+ }
+ return 0;
+}
+
+/**
+ * jpeg_v4_0_5_suspend - suspend JPEG block
+ *
+ * @ip_block: Pointer to the amdgpu_ip_block for this hw instance.
+ *
+ * HW fini and suspend JPEG block
+ */
+static int jpeg_v4_0_5_suspend(struct amdgpu_ip_block *ip_block)
+{
+ int r;
+
+ r = jpeg_v4_0_5_hw_fini(ip_block);
+ if (r)
+ return r;
+
+ r = amdgpu_jpeg_suspend(ip_block->adev);
+
+ return r;
+}
+
+/**
+ * jpeg_v4_0_5_resume - resume JPEG block
+ *
+ * @ip_block: Pointer to the amdgpu_ip_block for this hw instance.
+ *
+ * Resume firmware and hw init JPEG block
+ */
+static int jpeg_v4_0_5_resume(struct amdgpu_ip_block *ip_block)
+{
+ int r;
+
+ r = amdgpu_jpeg_resume(ip_block->adev);
+ if (r)
+ return r;
+
+ r = jpeg_v4_0_5_hw_init(ip_block);
+
+ return r;
+}
+
+static void jpeg_v4_0_5_disable_clock_gating(struct amdgpu_device *adev, int inst)
+{
+ uint32_t data = 0;
+
+ data = RREG32_SOC15(JPEG, inst, regJPEG_CGC_CTRL);
+ if (adev->cg_flags & AMD_CG_SUPPORT_JPEG_MGCG) {
+ data |= 1 << JPEG_CGC_CTRL__DYN_CLOCK_MODE__SHIFT;
+ data &= (~JPEG_CGC_CTRL__JPEG_DEC_MODE_MASK);
+ } else {
+ data &= ~JPEG_CGC_CTRL__DYN_CLOCK_MODE__SHIFT;
+ }
+
+ data |= 1 << JPEG_CGC_CTRL__CLK_GATE_DLY_TIMER__SHIFT;
+ data |= 4 << JPEG_CGC_CTRL__CLK_OFF_DELAY__SHIFT;
+ WREG32_SOC15(JPEG, inst, regJPEG_CGC_CTRL, data);
+
+ data = RREG32_SOC15(JPEG, inst, regJPEG_CGC_GATE);
+ data &= ~(JPEG_CGC_GATE__JPEG_DEC_MASK
+ | JPEG_CGC_GATE__JPEG2_DEC_MASK
+ | JPEG_CGC_GATE__JMCIF_MASK
+ | JPEG_CGC_GATE__JRBBM_MASK);
+ WREG32_SOC15(JPEG, inst, regJPEG_CGC_GATE, data);
+}
+
+static void jpeg_v4_0_5_enable_clock_gating(struct amdgpu_device *adev, int inst)
+{
+ uint32_t data = 0;
+
+ data = RREG32_SOC15(JPEG, inst, regJPEG_CGC_CTRL);
+ if (adev->cg_flags & AMD_CG_SUPPORT_JPEG_MGCG) {
+ data |= 1 << JPEG_CGC_CTRL__DYN_CLOCK_MODE__SHIFT;
+ data |= JPEG_CGC_CTRL__JPEG_DEC_MODE_MASK;
+ } else {
+ data &= ~JPEG_CGC_CTRL__DYN_CLOCK_MODE__SHIFT;
+ }
+
+ data |= 1 << JPEG_CGC_CTRL__CLK_GATE_DLY_TIMER__SHIFT;
+ data |= 4 << JPEG_CGC_CTRL__CLK_OFF_DELAY__SHIFT;
+ WREG32_SOC15(JPEG, inst, regJPEG_CGC_CTRL, data);
+
+ data = RREG32_SOC15(JPEG, inst, regJPEG_CGC_GATE);
+ data |= (JPEG_CGC_GATE__JPEG_DEC_MASK
+ |JPEG_CGC_GATE__JPEG2_DEC_MASK
+ |JPEG_CGC_GATE__JMCIF_MASK
+ |JPEG_CGC_GATE__JRBBM_MASK);
+ WREG32_SOC15(JPEG, inst, regJPEG_CGC_GATE, data);
+}
+
+static void jpeg_engine_4_0_5_dpg_clock_gating_mode(struct amdgpu_device *adev,
+ int inst_idx, uint8_t indirect)
+{
+ uint32_t data = 0;
+
+ if (adev->cg_flags & AMD_CG_SUPPORT_JPEG_MGCG)
+ data |= 1 << JPEG_CGC_CTRL__DYN_CLOCK_MODE__SHIFT;
+ else
+ data |= 0 << JPEG_CGC_CTRL__DYN_CLOCK_MODE__SHIFT;
+
+ data |= 1 << JPEG_CGC_CTRL__CLK_GATE_DLY_TIMER__SHIFT;
+ data |= 4 << JPEG_CGC_CTRL__CLK_OFF_DELAY__SHIFT;
+ WREG32_SOC15_JPEG_DPG_MODE(inst_idx, regJPEG_CGC_CTRL_INTERNAL_OFFSET, data, indirect);
+
+ data = 0;
+ WREG32_SOC15_JPEG_DPG_MODE(inst_idx, regJPEG_CGC_GATE_INTERNAL_OFFSET,
+ data, indirect);
+}
+
+static int jpeg_v4_0_5_disable_static_power_gating(struct amdgpu_device *adev, int inst)
+{
+ if (adev->pg_flags & AMD_PG_SUPPORT_JPEG) {
+ WREG32(SOC15_REG_OFFSET(JPEG, inst, regUVD_IPX_DLDO_CONFIG),
+ 1 << UVD_IPX_DLDO_CONFIG__ONO1_PWR_CONFIG__SHIFT);
+ SOC15_WAIT_ON_RREG(JPEG, inst, regUVD_IPX_DLDO_STATUS,
+ 0, UVD_IPX_DLDO_STATUS__ONO1_PWR_STATUS_MASK);
+ }
+
+ /* disable anti hang mechanism */
+ WREG32_P(SOC15_REG_OFFSET(JPEG, inst, regUVD_JPEG_POWER_STATUS), 0,
+ ~UVD_JPEG_POWER_STATUS__JPEG_POWER_STATUS_MASK);
+
+ /* keep the JPEG in static PG mode */
+ WREG32_P(SOC15_REG_OFFSET(JPEG, inst, regUVD_JPEG_POWER_STATUS), 0,
+ ~UVD_JPEG_POWER_STATUS__JPEG_PG_MODE_MASK);
+
+ return 0;
+}
+
+static int jpeg_v4_0_5_enable_static_power_gating(struct amdgpu_device *adev, int inst)
+{
+ /* enable anti hang mechanism */
+ WREG32_P(SOC15_REG_OFFSET(JPEG, inst, regUVD_JPEG_POWER_STATUS),
+ UVD_JPEG_POWER_STATUS__JPEG_POWER_STATUS_MASK,
+ ~UVD_JPEG_POWER_STATUS__JPEG_POWER_STATUS_MASK);
+
+ if (adev->pg_flags & AMD_PG_SUPPORT_JPEG) {
+ WREG32(SOC15_REG_OFFSET(JPEG, inst, regUVD_IPX_DLDO_CONFIG),
+ 2 << UVD_IPX_DLDO_CONFIG__ONO1_PWR_CONFIG__SHIFT);
+ SOC15_WAIT_ON_RREG(JPEG, inst, regUVD_IPX_DLDO_STATUS,
+ 1 << UVD_IPX_DLDO_STATUS__ONO1_PWR_STATUS__SHIFT,
+ UVD_IPX_DLDO_STATUS__ONO1_PWR_STATUS_MASK);
+ }
+
+ return 0;
+}
+
+/**
+ * jpeg_v4_0_5_start_dpg_mode - Jpeg start with dpg mode
+ *
+ * @adev: amdgpu_device pointer
+ * @inst_idx: instance number index
+ * @indirect: indirectly write sram
+ *
+ * Start JPEG block with dpg mode
+ */
+static void jpeg_v4_0_5_start_dpg_mode(struct amdgpu_device *adev, int inst_idx, bool indirect)
+{
+ struct amdgpu_ring *ring = adev->jpeg.inst[inst_idx].ring_dec;
+ uint32_t reg_data = 0;
+
+ /* enable anti hang mechanism */
+ reg_data = RREG32_SOC15(JPEG, inst_idx, regUVD_JPEG_POWER_STATUS);
+ reg_data &= ~UVD_JPEG_POWER_STATUS__JPEG_POWER_STATUS_MASK;
+ reg_data |= 0x1;
+ WREG32_SOC15(JPEG, inst_idx, regUVD_JPEG_POWER_STATUS, reg_data);
+
+ if (adev->pg_flags & AMD_PG_SUPPORT_JPEG) {
+ WREG32(SOC15_REG_OFFSET(JPEG, inst_idx, regUVD_IPX_DLDO_CONFIG),
+ 2 << UVD_IPX_DLDO_CONFIG__ONO1_PWR_CONFIG__SHIFT);
+ SOC15_WAIT_ON_RREG(JPEG, inst_idx, regUVD_IPX_DLDO_STATUS,
+ 1 << UVD_IPX_DLDO_STATUS__ONO1_PWR_STATUS__SHIFT,
+ UVD_IPX_DLDO_STATUS__ONO1_PWR_STATUS_MASK);
+ }
+
+ reg_data = RREG32_SOC15(JPEG, inst_idx, regUVD_JPEG_POWER_STATUS);
+ reg_data |= UVD_JPEG_POWER_STATUS__JPEG_PG_MODE_MASK;
+ WREG32_SOC15(JPEG, inst_idx, regUVD_JPEG_POWER_STATUS, reg_data);
+
+ if (indirect)
+ adev->jpeg.inst[inst_idx].dpg_sram_curr_addr =
+ (uint32_t *)adev->jpeg.inst[inst_idx].dpg_sram_cpu_addr;
+
+ jpeg_engine_4_0_5_dpg_clock_gating_mode(adev, inst_idx, indirect);
+
+ /* MJPEG global tiling registers */
+ WREG32_SOC15_JPEG_DPG_MODE(inst_idx, regJPEG_DEC_GFX10_ADDR_CONFIG_INTERNAL_OFFSET,
+ adev->gfx.config.gb_addr_config, indirect);
+ /* enable System Interrupt for JRBC */
+ WREG32_SOC15_JPEG_DPG_MODE(inst_idx, regJPEG_SYS_INT_EN_INTERNAL_OFFSET,
+ JPEG_SYS_INT_EN__DJRBC_MASK, indirect);
+
+ /* add nop to workaround PSP size check */
+ WREG32_SOC15_JPEG_DPG_MODE(inst_idx, regUVD_NO_OP_INTERNAL_OFFSET, 0, indirect);
+
+ if (indirect)
+ amdgpu_jpeg_psp_update_sram(adev, inst_idx, 0);
+
+ WREG32_SOC15(JPEG, inst_idx, regUVD_LMI_JRBC_RB_VMID, 0);
+ WREG32_SOC15(JPEG, inst_idx, regUVD_JRBC_RB_CNTL, (0x00000001L | 0x00000002L));
+ WREG32_SOC15(JPEG, inst_idx, regUVD_LMI_JRBC_RB_64BIT_BAR_LOW,
+ lower_32_bits(ring->gpu_addr));
+ WREG32_SOC15(JPEG, inst_idx, regUVD_LMI_JRBC_RB_64BIT_BAR_HIGH,
+ upper_32_bits(ring->gpu_addr));
+ WREG32_SOC15(JPEG, inst_idx, regUVD_JRBC_RB_RPTR, 0);
+ WREG32_SOC15(JPEG, inst_idx, regUVD_JRBC_RB_WPTR, 0);
+ WREG32_SOC15(JPEG, inst_idx, regUVD_JRBC_RB_CNTL, 0x00000002L);
+ WREG32_SOC15(JPEG, inst_idx, regUVD_JRBC_RB_SIZE, ring->ring_size / 4);
+ ring->wptr = RREG32_SOC15(JPEG, inst_idx, regUVD_JRBC_RB_WPTR);
+}
+
+/**
+ * jpeg_v4_0_5_stop_dpg_mode - Jpeg stop with dpg mode
+ *
+ * @adev: amdgpu_device pointer
+ * @inst_idx: instance number index
+ *
+ * Stop JPEG block with dpg mode
+ */
+static void jpeg_v4_0_5_stop_dpg_mode(struct amdgpu_device *adev, int inst_idx)
+{
+ uint32_t reg_data = 0;
+
+ reg_data = RREG32_SOC15(JPEG, inst_idx, regUVD_JPEG_POWER_STATUS);
+ reg_data &= ~UVD_JPEG_POWER_STATUS__JPEG_PG_MODE_MASK;
+ WREG32_SOC15(JPEG, inst_idx, regUVD_JPEG_POWER_STATUS, reg_data);
+
+}
+
+/**
+ * jpeg_v4_0_5_start - start JPEG block
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * Setup and start the JPEG block
+ */
+static int jpeg_v4_0_5_start(struct amdgpu_device *adev)
+{
+ struct amdgpu_ring *ring;
+ int r, i;
+
+ if (adev->pm.dpm_enabled)
+ amdgpu_dpm_enable_jpeg(adev, true);
+
+ for (i = 0; i < adev->jpeg.num_jpeg_inst; ++i) {
+ if (adev->jpeg.harvest_config & (1 << i))
+ continue;
+
+ ring = adev->jpeg.inst[i].ring_dec;
+ /* doorbell programming is done for every playback */
+ adev->nbio.funcs->vcn_doorbell_range(adev, ring->use_doorbell,
+ (adev->doorbell_index.vcn.vcn_ring0_1 << 1) + 8 * i, i);
+
+ WREG32_SOC15(VCN, i, regVCN_JPEG_DB_CTRL,
+ ring->doorbell_index << VCN_JPEG_DB_CTRL__OFFSET__SHIFT |
+ VCN_JPEG_DB_CTRL__EN_MASK);
+
+ if (adev->pg_flags & AMD_PG_SUPPORT_JPEG_DPG) {
+ jpeg_v4_0_5_start_dpg_mode(adev, i, adev->jpeg.indirect_sram);
+ continue;
+ }
+
+ /* disable power gating */
+ r = jpeg_v4_0_5_disable_static_power_gating(adev, i);
+ if (r)
+ return r;
+
+ /* JPEG disable CGC */
+ jpeg_v4_0_5_disable_clock_gating(adev, i);
+
+ /* MJPEG global tiling registers */
+ WREG32_SOC15(JPEG, i, regJPEG_DEC_GFX10_ADDR_CONFIG,
+ adev->gfx.config.gb_addr_config);
+
+ /* enable JMI channel */
+ WREG32_P(SOC15_REG_OFFSET(JPEG, i, regUVD_JMI_CNTL), 0,
+ ~UVD_JMI_CNTL__SOFT_RESET_MASK);
+
+ /* enable System Interrupt for JRBC */
+ WREG32_P(SOC15_REG_OFFSET(JPEG, i, regJPEG_SYS_INT_EN),
+ JPEG_SYS_INT_EN__DJRBC_MASK,
+ ~JPEG_SYS_INT_EN__DJRBC_MASK);
+
+ WREG32_SOC15(JPEG, i, regUVD_LMI_JRBC_RB_VMID, 0);
+ WREG32_SOC15(JPEG, i, regUVD_JRBC_RB_CNTL, (0x00000001L | 0x00000002L));
+ WREG32_SOC15(JPEG, i, regUVD_LMI_JRBC_RB_64BIT_BAR_LOW,
+ lower_32_bits(ring->gpu_addr));
+ WREG32_SOC15(JPEG, i, regUVD_LMI_JRBC_RB_64BIT_BAR_HIGH,
+ upper_32_bits(ring->gpu_addr));
+ WREG32_SOC15(JPEG, i, regUVD_JRBC_RB_RPTR, 0);
+ WREG32_SOC15(JPEG, i, regUVD_JRBC_RB_WPTR, 0);
+ WREG32_SOC15(JPEG, i, regUVD_JRBC_RB_CNTL, 0x00000002L);
+ WREG32_SOC15(JPEG, i, regUVD_JRBC_RB_SIZE, ring->ring_size / 4);
+ ring->wptr = RREG32_SOC15(JPEG, i, regUVD_JRBC_RB_WPTR);
+ }
+
+ return 0;
+}
+
+/**
+ * jpeg_v4_0_5_stop - stop JPEG block
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * stop the JPEG block
+ */
+static int jpeg_v4_0_5_stop(struct amdgpu_device *adev)
+{
+ int r, i;
+
+ for (i = 0; i < adev->jpeg.num_jpeg_inst; ++i) {
+ if (adev->jpeg.harvest_config & (1 << i))
+ continue;
+
+ if (adev->pg_flags & AMD_PG_SUPPORT_JPEG_DPG) {
+ jpeg_v4_0_5_stop_dpg_mode(adev, i);
+ continue;
+ }
+
+ /* reset JMI */
+ WREG32_P(SOC15_REG_OFFSET(JPEG, i, regUVD_JMI_CNTL),
+ UVD_JMI_CNTL__SOFT_RESET_MASK,
+ ~UVD_JMI_CNTL__SOFT_RESET_MASK);
+
+ jpeg_v4_0_5_enable_clock_gating(adev, i);
+
+ /* enable power gating */
+ r = jpeg_v4_0_5_enable_static_power_gating(adev, i);
+ if (r)
+ return r;
+ }
+ if (adev->pm.dpm_enabled)
+ amdgpu_dpm_enable_jpeg(adev, false);
+
+ return 0;
+}
+
+/**
+ * jpeg_v4_0_5_dec_ring_get_rptr - get read pointer
+ *
+ * @ring: amdgpu_ring pointer
+ *
+ * Returns the current hardware read pointer
+ */
+static uint64_t jpeg_v4_0_5_dec_ring_get_rptr(struct amdgpu_ring *ring)
+{
+ struct amdgpu_device *adev = ring->adev;
+
+ return RREG32_SOC15(JPEG, ring->me, regUVD_JRBC_RB_RPTR);
+}
+
+/**
+ * jpeg_v4_0_5_dec_ring_get_wptr - get write pointer
+ *
+ * @ring: amdgpu_ring pointer
+ *
+ * Returns the current hardware write pointer
+ */
+static uint64_t jpeg_v4_0_5_dec_ring_get_wptr(struct amdgpu_ring *ring)
+{
+ struct amdgpu_device *adev = ring->adev;
+
+ if (ring->use_doorbell)
+ return *ring->wptr_cpu_addr;
+ else
+ return RREG32_SOC15(JPEG, ring->me, regUVD_JRBC_RB_WPTR);
+}
+
+/**
+ * jpeg_v4_0_5_dec_ring_set_wptr - set write pointer
+ *
+ * @ring: amdgpu_ring pointer
+ *
+ * Commits the write pointer to the hardware
+ */
+static void jpeg_v4_0_5_dec_ring_set_wptr(struct amdgpu_ring *ring)
+{
+ struct amdgpu_device *adev = ring->adev;
+
+ if (ring->use_doorbell) {
+ *ring->wptr_cpu_addr = lower_32_bits(ring->wptr);
+ WDOORBELL32(ring->doorbell_index, lower_32_bits(ring->wptr));
+ } else {
+ WREG32_SOC15(JPEG, ring->me, regUVD_JRBC_RB_WPTR, lower_32_bits(ring->wptr));
+ }
+}
+
+static bool jpeg_v4_0_5_is_idle(struct amdgpu_ip_block *ip_block)
+{
+ struct amdgpu_device *adev = ip_block->adev;
+ int i, ret = 1;
+
+ for (i = 0; i < adev->jpeg.num_jpeg_inst; ++i) {
+ if (adev->jpeg.harvest_config & (1 << i))
+ continue;
+
+ ret &= (((RREG32_SOC15(JPEG, i, regUVD_JRBC_STATUS) &
+ UVD_JRBC_STATUS__RB_JOB_DONE_MASK) ==
+ UVD_JRBC_STATUS__RB_JOB_DONE_MASK));
+ }
+ return ret;
+}
+
+static int jpeg_v4_0_5_wait_for_idle(struct amdgpu_ip_block *ip_block)
+{
+ struct amdgpu_device *adev = ip_block->adev;
+ int i;
+
+ for (i = 0; i < adev->jpeg.num_jpeg_inst; ++i) {
+ if (adev->jpeg.harvest_config & (1 << i))
+ continue;
+
+ return SOC15_WAIT_ON_RREG(JPEG, i, regUVD_JRBC_STATUS,
+ UVD_JRBC_STATUS__RB_JOB_DONE_MASK,
+ UVD_JRBC_STATUS__RB_JOB_DONE_MASK);
+ }
+
+ return 0;
+}
+
+static int jpeg_v4_0_5_set_clockgating_state(struct amdgpu_ip_block *ip_block,
+ enum amd_clockgating_state state)
+{
+ struct amdgpu_device *adev = ip_block->adev;
+ bool enable = state == AMD_CG_STATE_GATE;
+ int i;
+
+ for (i = 0; i < adev->jpeg.num_jpeg_inst; ++i) {
+ if (adev->jpeg.harvest_config & (1 << i))
+ continue;
+
+ if (enable) {
+ if (!jpeg_v4_0_5_is_idle(ip_block))
+ return -EBUSY;
+
+ jpeg_v4_0_5_enable_clock_gating(adev, i);
+ } else {
+ jpeg_v4_0_5_disable_clock_gating(adev, i);
+ }
+ }
+
+ return 0;
+}
+
+static int jpeg_v4_0_5_set_powergating_state(struct amdgpu_ip_block *ip_block,
+ enum amd_powergating_state state)
+{
+ struct amdgpu_device *adev = ip_block->adev;
+ int ret;
+
+ if (amdgpu_sriov_vf(adev)) {
+ adev->jpeg.cur_state = AMD_PG_STATE_UNGATE;
+ return 0;
+ }
+
+ if (state == adev->jpeg.cur_state)
+ return 0;
+
+ if (state == AMD_PG_STATE_GATE)
+ ret = jpeg_v4_0_5_stop(adev);
+ else
+ ret = jpeg_v4_0_5_start(adev);
+
+ if (!ret)
+ adev->jpeg.cur_state = state;
+
+ return ret;
+}
+
+static int jpeg_v4_0_5_process_interrupt(struct amdgpu_device *adev,
+ struct amdgpu_irq_src *source,
+ struct amdgpu_iv_entry *entry)
+{
+ uint32_t ip_instance;
+
+ DRM_DEBUG("IH: JPEG TRAP\n");
+
+ switch (entry->client_id) {
+ case SOC15_IH_CLIENTID_VCN:
+ ip_instance = 0;
+ break;
+ case SOC15_IH_CLIENTID_VCN1:
+ ip_instance = 1;
+ break;
+ default:
+ DRM_ERROR("Unhandled client id: %d\n", entry->client_id);
+ return 0;
+ }
+
+ switch (entry->src_id) {
+ case VCN_4_0__SRCID__JPEG_DECODE:
+ amdgpu_fence_process(adev->jpeg.inst[ip_instance].ring_dec);
+ break;
+ case VCN_4_0__SRCID_DJPEG0_POISON:
+ case VCN_4_0__SRCID_EJPEG0_POISON:
+ amdgpu_jpeg_process_poison_irq(adev, source, entry);
+ break;
+ default:
+ DRM_DEV_ERROR(adev->dev, "Unhandled interrupt: %d %d\n",
+ entry->src_id, entry->src_data[0]);
+ break;
+ }
+
+ return 0;
+}
+
+static int jpeg_v4_0_5_ring_reset(struct amdgpu_ring *ring,
+ unsigned int vmid,
+ struct amdgpu_fence *timedout_fence)
+{
+ int r;
+
+ amdgpu_ring_reset_helper_begin(ring, timedout_fence);
+ r = jpeg_v4_0_5_stop(ring->adev);
+ if (r)
+ return r;
+ r = jpeg_v4_0_5_start(ring->adev);
+ if (r)
+ return r;
+ return amdgpu_ring_reset_helper_end(ring, timedout_fence);
+}
+
+static const struct amd_ip_funcs jpeg_v4_0_5_ip_funcs = {
+ .name = "jpeg_v4_0_5",
+ .early_init = jpeg_v4_0_5_early_init,
+ .sw_init = jpeg_v4_0_5_sw_init,
+ .sw_fini = jpeg_v4_0_5_sw_fini,
+ .hw_init = jpeg_v4_0_5_hw_init,
+ .hw_fini = jpeg_v4_0_5_hw_fini,
+ .suspend = jpeg_v4_0_5_suspend,
+ .resume = jpeg_v4_0_5_resume,
+ .is_idle = jpeg_v4_0_5_is_idle,
+ .wait_for_idle = jpeg_v4_0_5_wait_for_idle,
+ .set_clockgating_state = jpeg_v4_0_5_set_clockgating_state,
+ .set_powergating_state = jpeg_v4_0_5_set_powergating_state,
+ .dump_ip_state = amdgpu_jpeg_dump_ip_state,
+ .print_ip_state = amdgpu_jpeg_print_ip_state,
+};
+
+static const struct amdgpu_ring_funcs jpeg_v4_0_5_dec_ring_vm_funcs = {
+ .type = AMDGPU_RING_TYPE_VCN_JPEG,
+ .align_mask = 0xf,
+ .get_rptr = jpeg_v4_0_5_dec_ring_get_rptr,
+ .get_wptr = jpeg_v4_0_5_dec_ring_get_wptr,
+ .set_wptr = jpeg_v4_0_5_dec_ring_set_wptr,
+ .parse_cs = amdgpu_jpeg_dec_parse_cs,
+ .emit_frame_size =
+ SOC15_FLUSH_GPU_TLB_NUM_WREG * 6 +
+ SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 8 +
+ 8 + /* jpeg_v4_0_5_dec_ring_emit_vm_flush */
+ 18 + 18 + /* jpeg_v4_0_5_dec_ring_emit_fence x2 vm fence */
+ 8 + 16,
+ .emit_ib_size = 22, /* jpeg_v4_0_5_dec_ring_emit_ib */
+ .emit_ib = jpeg_v2_0_dec_ring_emit_ib,
+ .emit_fence = jpeg_v2_0_dec_ring_emit_fence,
+ .emit_vm_flush = jpeg_v2_0_dec_ring_emit_vm_flush,
+ .test_ring = amdgpu_jpeg_dec_ring_test_ring,
+ .test_ib = amdgpu_jpeg_dec_ring_test_ib,
+ .insert_nop = jpeg_v2_0_dec_ring_nop,
+ .insert_start = jpeg_v2_0_dec_ring_insert_start,
+ .insert_end = jpeg_v2_0_dec_ring_insert_end,
+ .pad_ib = amdgpu_ring_generic_pad_ib,
+ .begin_use = amdgpu_jpeg_ring_begin_use,
+ .end_use = amdgpu_jpeg_ring_end_use,
+ .emit_wreg = jpeg_v2_0_dec_ring_emit_wreg,
+ .emit_reg_wait = jpeg_v2_0_dec_ring_emit_reg_wait,
+ .emit_reg_write_reg_wait = amdgpu_ring_emit_reg_write_reg_wait_helper,
+ .reset = jpeg_v4_0_5_ring_reset,
+};
+
+static void jpeg_v4_0_5_set_dec_ring_funcs(struct amdgpu_device *adev)
+{
+ int i;
+
+ for (i = 0; i < adev->jpeg.num_jpeg_inst; ++i) {
+ if (adev->jpeg.harvest_config & (1 << i))
+ continue;
+
+ adev->jpeg.inst[i].ring_dec->funcs = &jpeg_v4_0_5_dec_ring_vm_funcs;
+ adev->jpeg.inst[i].ring_dec->me = i;
+ }
+}
+
+static const struct amdgpu_irq_src_funcs jpeg_v4_0_5_irq_funcs = {
+ .process = jpeg_v4_0_5_process_interrupt,
+};
+
+static void jpeg_v4_0_5_set_irq_funcs(struct amdgpu_device *adev)
+{
+ int i;
+
+ for (i = 0; i < adev->jpeg.num_jpeg_inst; ++i) {
+ if (adev->jpeg.harvest_config & (1 << i))
+ continue;
+
+ adev->jpeg.inst[i].irq.num_types = 1;
+ adev->jpeg.inst[i].irq.funcs = &jpeg_v4_0_5_irq_funcs;
+ }
+}
+
+const struct amdgpu_ip_block_version jpeg_v4_0_5_ip_block = {
+ .type = AMD_IP_BLOCK_TYPE_JPEG,
+ .major = 4,
+ .minor = 0,
+ .rev = 5,
+ .funcs = &jpeg_v4_0_5_ip_funcs,
+};
+
diff --git a/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_5.h b/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_5.h
new file mode 100644
index 000000000000..c5eee572079c
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_5.h
@@ -0,0 +1,35 @@
+/*
+ * Copyright 2023 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#ifndef __JPEG_V4_0_5_H__
+#define __JPEG_V4_0_5_H__
+
+enum amdgpu_jpeg_v4_0_5_sub_block {
+ AMDGPU_JPEG_V4_0_5_JPEG0 = 0,
+
+ AMDGPU_JPEG_V4_0_5_MAX_SUB_BLOCK,
+};
+
+extern const struct amdgpu_ip_block_version jpeg_v4_0_5_ip_block;
+
+#endif /* __JPEG_V4_0_H__ */
diff --git a/drivers/gpu/drm/amd/amdgpu/jpeg_v5_0_0.c b/drivers/gpu/drm/amd/amdgpu/jpeg_v5_0_0.c
new file mode 100644
index 000000000000..46bf15dce2bd
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/jpeg_v5_0_0.c
@@ -0,0 +1,733 @@
+/*
+ * Copyright 2023 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#include "amdgpu.h"
+#include "amdgpu_jpeg.h"
+#include "amdgpu_pm.h"
+#include "soc15.h"
+#include "soc15d.h"
+#include "jpeg_v2_0.h"
+#include "jpeg_v4_0_3.h"
+
+#include "vcn/vcn_5_0_0_offset.h"
+#include "vcn/vcn_5_0_0_sh_mask.h"
+#include "ivsrcid/vcn/irqsrcs_vcn_5_0.h"
+#include "jpeg_v5_0_0.h"
+
+static const struct amdgpu_hwip_reg_entry jpeg_reg_list_5_0[] = {
+ SOC15_REG_ENTRY_STR(JPEG, 0, regUVD_JPEG_POWER_STATUS),
+ SOC15_REG_ENTRY_STR(JPEG, 0, regUVD_JPEG_INT_STAT),
+ SOC15_REG_ENTRY_STR(JPEG, 0, regUVD_JRBC_RB_RPTR),
+ SOC15_REG_ENTRY_STR(JPEG, 0, regUVD_JRBC_RB_WPTR),
+ SOC15_REG_ENTRY_STR(JPEG, 0, regUVD_JRBC_RB_CNTL),
+ SOC15_REG_ENTRY_STR(JPEG, 0, regUVD_JRBC_RB_SIZE),
+ SOC15_REG_ENTRY_STR(JPEG, 0, regUVD_JRBC_STATUS),
+ SOC15_REG_ENTRY_STR(JPEG, 0, regJPEG_DEC_ADDR_MODE),
+ SOC15_REG_ENTRY_STR(JPEG, 0, regJPEG_DEC_GFX10_ADDR_CONFIG),
+ SOC15_REG_ENTRY_STR(JPEG, 0, regJPEG_DEC_Y_GFX10_TILING_SURFACE),
+ SOC15_REG_ENTRY_STR(JPEG, 0, regJPEG_DEC_UV_GFX10_TILING_SURFACE),
+ SOC15_REG_ENTRY_STR(JPEG, 0, regUVD_JPEG_PITCH),
+ SOC15_REG_ENTRY_STR(JPEG, 0, regUVD_JPEG_UV_PITCH),
+};
+
+static void jpeg_v5_0_0_set_dec_ring_funcs(struct amdgpu_device *adev);
+static void jpeg_v5_0_0_set_irq_funcs(struct amdgpu_device *adev);
+static int jpeg_v5_0_0_set_powergating_state(struct amdgpu_ip_block *ip_block,
+ enum amd_powergating_state state);
+
+/**
+ * jpeg_v5_0_0_early_init - set function pointers
+ *
+ * @ip_block: Pointer to the amdgpu_ip_block for this hw instance.
+ *
+ * Set ring and irq function pointers
+ */
+static int jpeg_v5_0_0_early_init(struct amdgpu_ip_block *ip_block)
+{
+ struct amdgpu_device *adev = ip_block->adev;
+
+ adev->jpeg.num_jpeg_inst = 1;
+ adev->jpeg.num_jpeg_rings = 1;
+
+ jpeg_v5_0_0_set_dec_ring_funcs(adev);
+ jpeg_v5_0_0_set_irq_funcs(adev);
+
+ return 0;
+}
+
+/**
+ * jpeg_v5_0_0_sw_init - sw init for JPEG block
+ *
+ * @ip_block: Pointer to the amdgpu_ip_block for this hw instance.
+ *
+ * Load firmware and sw initialization
+ */
+static int jpeg_v5_0_0_sw_init(struct amdgpu_ip_block *ip_block)
+{
+ struct amdgpu_device *adev = ip_block->adev;
+ struct amdgpu_ring *ring;
+ int r;
+
+ /* JPEG TRAP */
+ r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_VCN,
+ VCN_5_0__SRCID__JPEG_DECODE, &adev->jpeg.inst->irq);
+ if (r)
+ return r;
+
+ r = amdgpu_jpeg_sw_init(adev);
+ if (r)
+ return r;
+
+ r = amdgpu_jpeg_resume(adev);
+ if (r)
+ return r;
+
+ ring = adev->jpeg.inst->ring_dec;
+ ring->use_doorbell = true;
+ ring->doorbell_index = (adev->doorbell_index.vcn.vcn_ring0_1 << 1) + 1;
+ ring->vm_hub = AMDGPU_MMHUB0(0);
+
+ sprintf(ring->name, "jpeg_dec");
+ r = amdgpu_ring_init(adev, ring, 512, &adev->jpeg.inst->irq, 0,
+ AMDGPU_RING_PRIO_DEFAULT, NULL);
+ if (r)
+ return r;
+
+ adev->jpeg.internal.jpeg_pitch[0] = regUVD_JPEG_PITCH_INTERNAL_OFFSET;
+ adev->jpeg.inst->external.jpeg_pitch[0] = SOC15_REG_OFFSET(JPEG, 0, regUVD_JPEG_PITCH);
+
+ r = amdgpu_jpeg_reg_dump_init(adev, jpeg_reg_list_5_0, ARRAY_SIZE(jpeg_reg_list_5_0));
+ if (r)
+ return r;
+
+ adev->jpeg.supported_reset =
+ amdgpu_get_soft_full_reset_mask(&adev->jpeg.inst[0].ring_dec[0]);
+ if (!amdgpu_sriov_vf(adev))
+ adev->jpeg.supported_reset |= AMDGPU_RESET_TYPE_PER_QUEUE;
+ r = amdgpu_jpeg_sysfs_reset_mask_init(adev);
+
+ return r;
+}
+
+/**
+ * jpeg_v5_0_0_sw_fini - sw fini for JPEG block
+ *
+ * @ip_block: Pointer to the amdgpu_ip_block for this hw instance.
+ *
+ * JPEG suspend and free up sw allocation
+ */
+static int jpeg_v5_0_0_sw_fini(struct amdgpu_ip_block *ip_block)
+{
+ struct amdgpu_device *adev = ip_block->adev;
+ int r;
+
+ r = amdgpu_jpeg_suspend(adev);
+ if (r)
+ return r;
+
+ amdgpu_jpeg_sysfs_reset_mask_fini(adev);
+ r = amdgpu_jpeg_sw_fini(adev);
+
+ return r;
+}
+
+/**
+ * jpeg_v5_0_0_hw_init - start and test JPEG block
+ *
+ * @ip_block: Pointer to the amdgpu_ip_block for this hw instance.
+ *
+ */
+static int jpeg_v5_0_0_hw_init(struct amdgpu_ip_block *ip_block)
+{
+ struct amdgpu_device *adev = ip_block->adev;
+ struct amdgpu_ring *ring = adev->jpeg.inst->ring_dec;
+ int r;
+
+ adev->nbio.funcs->vcn_doorbell_range(adev, ring->use_doorbell,
+ (adev->doorbell_index.vcn.vcn_ring0_1 << 1), 0);
+
+ /* Skip ring test because pause DPG is not implemented. */
+ if (adev->pg_flags & AMD_PG_SUPPORT_JPEG_DPG)
+ return 0;
+
+ r = amdgpu_ring_test_helper(ring);
+ if (r)
+ return r;
+
+ return 0;
+}
+
+/**
+ * jpeg_v5_0_0_hw_fini - stop the hardware block
+ *
+ * @ip_block: Pointer to the amdgpu_ip_block for this hw instance.
+ *
+ * Stop the JPEG block, mark ring as not ready any more
+ */
+static int jpeg_v5_0_0_hw_fini(struct amdgpu_ip_block *ip_block)
+{
+ struct amdgpu_device *adev = ip_block->adev;
+
+ cancel_delayed_work_sync(&adev->jpeg.idle_work);
+
+ if (adev->jpeg.cur_state != AMD_PG_STATE_GATE &&
+ RREG32_SOC15(JPEG, 0, regUVD_JRBC_STATUS))
+ jpeg_v5_0_0_set_powergating_state(ip_block, AMD_PG_STATE_GATE);
+
+ return 0;
+}
+
+/**
+ * jpeg_v5_0_0_suspend - suspend JPEG block
+ *
+ * @ip_block: Pointer to the amdgpu_ip_block for this hw instance.
+ *
+ * HW fini and suspend JPEG block
+ */
+static int jpeg_v5_0_0_suspend(struct amdgpu_ip_block *ip_block)
+{
+ int r;
+
+ r = jpeg_v5_0_0_hw_fini(ip_block);
+ if (r)
+ return r;
+
+ r = amdgpu_jpeg_suspend(ip_block->adev);
+
+ return r;
+}
+
+/**
+ * jpeg_v5_0_0_resume - resume JPEG block
+ *
+ * @ip_block: Pointer to the amdgpu_ip_block for this hw instance.
+ *
+ * Resume firmware and hw init JPEG block
+ */
+static int jpeg_v5_0_0_resume(struct amdgpu_ip_block *ip_block)
+{
+ int r;
+
+ r = amdgpu_jpeg_resume(ip_block->adev);
+ if (r)
+ return r;
+
+ r = jpeg_v5_0_0_hw_init(ip_block);
+
+ return r;
+}
+
+static void jpeg_v5_0_0_disable_clock_gating(struct amdgpu_device *adev)
+{
+ uint32_t data = 0;
+
+ WREG32_SOC15(JPEG, 0, regJPEG_CGC_GATE, data);
+
+ data = RREG32_SOC15(JPEG, 0, regJPEG_CGC_CTRL);
+ data &= ~(JPEG_CGC_CTRL__JPEG0_DEC_MODE_MASK
+ | JPEG_CGC_CTRL__JPEG_ENC_MODE_MASK);
+ WREG32_SOC15(JPEG, 0, regJPEG_CGC_CTRL, data);
+}
+
+static void jpeg_v5_0_0_enable_clock_gating(struct amdgpu_device *adev)
+{
+ uint32_t data = 0;
+
+ data = RREG32_SOC15(JPEG, 0, regJPEG_CGC_CTRL);
+
+ data |= 1 << JPEG_CGC_CTRL__JPEG0_DEC_MODE__SHIFT;
+ WREG32_SOC15(JPEG, 0, regJPEG_CGC_CTRL, data);
+
+ data = RREG32_SOC15(JPEG, 0, regJPEG_CGC_GATE);
+ data |= (JPEG_CGC_GATE__JPEG0_DEC_MASK
+ |JPEG_CGC_GATE__JPEG_ENC_MASK
+ |JPEG_CGC_GATE__JMCIF_MASK
+ |JPEG_CGC_GATE__JRBBM_MASK);
+ WREG32_SOC15(JPEG, 0, regJPEG_CGC_GATE, data);
+}
+
+static int jpeg_v5_0_0_disable_power_gating(struct amdgpu_device *adev)
+{
+ uint32_t data = 0;
+
+ data = 1 << UVD_IPX_DLDO_CONFIG__ONO1_PWR_CONFIG__SHIFT;
+ WREG32_SOC15(JPEG, 0, regUVD_IPX_DLDO_CONFIG, data);
+ SOC15_WAIT_ON_RREG(JPEG, 0, regUVD_IPX_DLDO_STATUS, 0,
+ UVD_IPX_DLDO_STATUS__ONO1_PWR_STATUS_MASK);
+
+ /* disable anti hang mechanism */
+ WREG32_P(SOC15_REG_OFFSET(JPEG, 0, regUVD_JPEG_POWER_STATUS), 0,
+ ~UVD_JPEG_POWER_STATUS__JPEG_POWER_STATUS_MASK);
+
+ return 0;
+}
+
+static int jpeg_v5_0_0_enable_power_gating(struct amdgpu_device *adev)
+{
+ /* enable anti hang mechanism */
+ WREG32_P(SOC15_REG_OFFSET(JPEG, 0, regUVD_JPEG_POWER_STATUS),
+ UVD_JPEG_POWER_STATUS__JPEG_POWER_STATUS_MASK,
+ ~UVD_JPEG_POWER_STATUS__JPEG_POWER_STATUS_MASK);
+
+ if (adev->pg_flags & AMD_PG_SUPPORT_JPEG) {
+ WREG32(SOC15_REG_OFFSET(JPEG, 0, regUVD_IPX_DLDO_CONFIG),
+ 2 << UVD_IPX_DLDO_CONFIG__ONO1_PWR_CONFIG__SHIFT);
+ SOC15_WAIT_ON_RREG(JPEG, 0, regUVD_IPX_DLDO_STATUS,
+ 1 << UVD_IPX_DLDO_STATUS__ONO1_PWR_STATUS__SHIFT,
+ UVD_IPX_DLDO_STATUS__ONO1_PWR_STATUS_MASK);
+ }
+
+ return 0;
+}
+
+static void jpeg_engine_5_0_0_dpg_clock_gating_mode(struct amdgpu_device *adev,
+ int inst_idx, uint8_t indirect)
+{
+ uint32_t data = 0;
+
+ // JPEG disable CGC
+ if (adev->cg_flags & AMD_CG_SUPPORT_JPEG_MGCG)
+ data = 1 << JPEG_CGC_CTRL__DYN_CLOCK_MODE__SHIFT;
+ else
+ data = 0 << JPEG_CGC_CTRL__DYN_CLOCK_MODE__SHIFT;
+
+ data |= 1 << JPEG_CGC_CTRL__CLK_GATE_DLY_TIMER__SHIFT;
+ data |= 4 << JPEG_CGC_CTRL__CLK_OFF_DELAY__SHIFT;
+
+ if (indirect) {
+ ADD_SOC24_JPEG_TO_DPG_SRAM(inst_idx, vcnipJPEG_CGC_CTRL, data, indirect);
+
+ // Turn on All JPEG clocks
+ data = 0;
+ ADD_SOC24_JPEG_TO_DPG_SRAM(inst_idx, vcnipJPEG_CGC_GATE, data, indirect);
+ } else {
+ WREG32_SOC24_JPEG_DPG_MODE(inst_idx, vcnipJPEG_CGC_CTRL, data, indirect);
+
+ // Turn on All JPEG clocks
+ data = 0;
+ WREG32_SOC24_JPEG_DPG_MODE(inst_idx, vcnipJPEG_CGC_GATE, data, indirect);
+ }
+}
+
+/**
+ * jpeg_v5_0_0_start_dpg_mode - Jpeg start with dpg mode
+ *
+ * @adev: amdgpu_device pointer
+ * @inst_idx: instance number index
+ * @indirect: indirectly write sram
+ *
+ * Start JPEG block with dpg mode
+ */
+static int jpeg_v5_0_0_start_dpg_mode(struct amdgpu_device *adev, int inst_idx, bool indirect)
+{
+ struct amdgpu_ring *ring = adev->jpeg.inst[inst_idx].ring_dec;
+ uint32_t reg_data = 0;
+
+ jpeg_v5_0_0_enable_power_gating(adev);
+
+ // enable dynamic power gating mode
+ reg_data = RREG32_SOC15(JPEG, inst_idx, regUVD_JPEG_POWER_STATUS);
+ reg_data |= UVD_JPEG_POWER_STATUS__JPEG_PG_MODE_MASK;
+ WREG32_SOC15(JPEG, inst_idx, regUVD_JPEG_POWER_STATUS, reg_data);
+
+ if (indirect)
+ adev->jpeg.inst[inst_idx].dpg_sram_curr_addr =
+ (uint32_t *)adev->jpeg.inst[inst_idx].dpg_sram_cpu_addr;
+
+ jpeg_engine_5_0_0_dpg_clock_gating_mode(adev, inst_idx, indirect);
+
+ /* MJPEG global tiling registers */
+ if (indirect)
+ ADD_SOC24_JPEG_TO_DPG_SRAM(inst_idx, vcnipJPEG_DEC_GFX10_ADDR_CONFIG,
+ adev->gfx.config.gb_addr_config, indirect);
+ else
+ WREG32_SOC24_JPEG_DPG_MODE(inst_idx, vcnipJPEG_DEC_GFX10_ADDR_CONFIG,
+ adev->gfx.config.gb_addr_config, 1);
+
+ /* enable System Interrupt for JRBC */
+ if (indirect)
+ ADD_SOC24_JPEG_TO_DPG_SRAM(inst_idx, vcnipJPEG_SYS_INT_EN,
+ JPEG_SYS_INT_EN__DJRBC0_MASK, indirect);
+ else
+ WREG32_SOC24_JPEG_DPG_MODE(inst_idx, vcnipJPEG_SYS_INT_EN,
+ JPEG_SYS_INT_EN__DJRBC0_MASK, 1);
+
+ if (indirect) {
+ /* add nop to workaround PSP size check */
+ ADD_SOC24_JPEG_TO_DPG_SRAM(inst_idx, vcnipUVD_NO_OP, 0, indirect);
+
+ amdgpu_jpeg_psp_update_sram(adev, inst_idx, 0);
+ }
+
+ WREG32_SOC15(VCN, 0, regVCN_JPEG_DB_CTRL,
+ ring->doorbell_index << VCN_JPEG_DB_CTRL__OFFSET__SHIFT |
+ VCN_JPEG_DB_CTRL__EN_MASK);
+
+ WREG32_SOC15(JPEG, inst_idx, regUVD_LMI_JRBC_RB_VMID, 0);
+ WREG32_SOC15(JPEG, inst_idx, regUVD_JRBC_RB_CNTL, (0x00000001L | 0x00000002L));
+ WREG32_SOC15(JPEG, inst_idx, regUVD_LMI_JRBC_RB_64BIT_BAR_LOW,
+ lower_32_bits(ring->gpu_addr));
+ WREG32_SOC15(JPEG, inst_idx, regUVD_LMI_JRBC_RB_64BIT_BAR_HIGH,
+ upper_32_bits(ring->gpu_addr));
+ WREG32_SOC15(JPEG, inst_idx, regUVD_JRBC_RB_RPTR, 0);
+ WREG32_SOC15(JPEG, inst_idx, regUVD_JRBC_RB_WPTR, 0);
+ WREG32_SOC15(JPEG, inst_idx, regUVD_JRBC_RB_CNTL, 0x00000002L);
+ WREG32_SOC15(JPEG, inst_idx, regUVD_JRBC_RB_SIZE, ring->ring_size / 4);
+ ring->wptr = RREG32_SOC15(JPEG, inst_idx, regUVD_JRBC_RB_WPTR);
+
+ return 0;
+}
+
+/**
+ * jpeg_v5_0_0_stop_dpg_mode - Jpeg stop with dpg mode
+ *
+ * @adev: amdgpu_device pointer
+ * @inst_idx: instance number index
+ *
+ * Stop JPEG block with dpg mode
+ */
+static void jpeg_v5_0_0_stop_dpg_mode(struct amdgpu_device *adev, int inst_idx)
+{
+ uint32_t reg_data = 0;
+
+ reg_data = RREG32_SOC15(JPEG, inst_idx, regUVD_JPEG_POWER_STATUS);
+ reg_data &= ~UVD_JPEG_POWER_STATUS__JPEG_PG_MODE_MASK;
+ WREG32_SOC15(JPEG, inst_idx, regUVD_JPEG_POWER_STATUS, reg_data);
+}
+
+/**
+ * jpeg_v5_0_0_start - start JPEG block
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * Setup and start the JPEG block
+ */
+static int jpeg_v5_0_0_start(struct amdgpu_device *adev)
+{
+ struct amdgpu_ring *ring = adev->jpeg.inst->ring_dec;
+ int r;
+
+ if (adev->pm.dpm_enabled)
+ amdgpu_dpm_enable_jpeg(adev, true);
+
+ if (adev->pg_flags & AMD_PG_SUPPORT_JPEG_DPG) {
+ r = jpeg_v5_0_0_start_dpg_mode(adev, 0, adev->jpeg.indirect_sram);
+ return r;
+ }
+
+ /* disable power gating */
+ r = jpeg_v5_0_0_disable_power_gating(adev);
+ if (r)
+ return r;
+
+ /* JPEG disable CGC */
+ jpeg_v5_0_0_disable_clock_gating(adev);
+
+ /* MJPEG global tiling registers */
+ WREG32_SOC15(JPEG, 0, regJPEG_DEC_GFX10_ADDR_CONFIG,
+ adev->gfx.config.gb_addr_config);
+
+ /* enable JMI channel */
+ WREG32_P(SOC15_REG_OFFSET(JPEG, 0, regUVD_JMI_CNTL), 0,
+ ~UVD_JMI_CNTL__SOFT_RESET_MASK);
+
+ /* enable System Interrupt for JRBC */
+ WREG32_P(SOC15_REG_OFFSET(JPEG, 0, regJPEG_SYS_INT_EN),
+ JPEG_SYS_INT_EN__DJRBC0_MASK,
+ ~JPEG_SYS_INT_EN__DJRBC0_MASK);
+
+ WREG32_SOC15(VCN, 0, regVCN_JPEG_DB_CTRL,
+ ring->doorbell_index << VCN_JPEG_DB_CTRL__OFFSET__SHIFT |
+ VCN_JPEG_DB_CTRL__EN_MASK);
+
+ WREG32_SOC15(JPEG, 0, regUVD_LMI_JRBC_RB_VMID, 0);
+ WREG32_SOC15(JPEG, 0, regUVD_JRBC_RB_CNTL, (0x00000001L | 0x00000002L));
+ WREG32_SOC15(JPEG, 0, regUVD_LMI_JRBC_RB_64BIT_BAR_LOW,
+ lower_32_bits(ring->gpu_addr));
+ WREG32_SOC15(JPEG, 0, regUVD_LMI_JRBC_RB_64BIT_BAR_HIGH,
+ upper_32_bits(ring->gpu_addr));
+ WREG32_SOC15(JPEG, 0, regUVD_JRBC_RB_RPTR, 0);
+ WREG32_SOC15(JPEG, 0, regUVD_JRBC_RB_WPTR, 0);
+ WREG32_SOC15(JPEG, 0, regUVD_JRBC_RB_CNTL, 0x00000002L);
+ WREG32_SOC15(JPEG, 0, regUVD_JRBC_RB_SIZE, ring->ring_size / 4);
+ ring->wptr = RREG32_SOC15(JPEG, 0, regUVD_JRBC_RB_WPTR);
+
+ return 0;
+}
+
+/**
+ * jpeg_v5_0_0_stop - stop JPEG block
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * stop the JPEG block
+ */
+static int jpeg_v5_0_0_stop(struct amdgpu_device *adev)
+{
+ int r;
+
+ if (adev->pg_flags & AMD_PG_SUPPORT_JPEG_DPG) {
+ jpeg_v5_0_0_stop_dpg_mode(adev, 0);
+ } else {
+
+ /* reset JMI */
+ WREG32_P(SOC15_REG_OFFSET(JPEG, 0, regUVD_JMI_CNTL),
+ UVD_JMI_CNTL__SOFT_RESET_MASK,
+ ~UVD_JMI_CNTL__SOFT_RESET_MASK);
+
+ jpeg_v5_0_0_enable_clock_gating(adev);
+
+ /* enable power gating */
+ r = jpeg_v5_0_0_enable_power_gating(adev);
+ if (r)
+ return r;
+ }
+
+ if (adev->pm.dpm_enabled)
+ amdgpu_dpm_enable_jpeg(adev, false);
+
+ return 0;
+}
+
+/**
+ * jpeg_v5_0_0_dec_ring_get_rptr - get read pointer
+ *
+ * @ring: amdgpu_ring pointer
+ *
+ * Returns the current hardware read pointer
+ */
+static uint64_t jpeg_v5_0_0_dec_ring_get_rptr(struct amdgpu_ring *ring)
+{
+ struct amdgpu_device *adev = ring->adev;
+
+ return RREG32_SOC15(JPEG, 0, regUVD_JRBC_RB_RPTR);
+}
+
+/**
+ * jpeg_v5_0_0_dec_ring_get_wptr - get write pointer
+ *
+ * @ring: amdgpu_ring pointer
+ *
+ * Returns the current hardware write pointer
+ */
+static uint64_t jpeg_v5_0_0_dec_ring_get_wptr(struct amdgpu_ring *ring)
+{
+ struct amdgpu_device *adev = ring->adev;
+
+ if (ring->use_doorbell)
+ return *ring->wptr_cpu_addr;
+ else
+ return RREG32_SOC15(JPEG, 0, regUVD_JRBC_RB_WPTR);
+}
+
+/**
+ * jpeg_v5_0_0_dec_ring_set_wptr - set write pointer
+ *
+ * @ring: amdgpu_ring pointer
+ *
+ * Commits the write pointer to the hardware
+ */
+static void jpeg_v5_0_0_dec_ring_set_wptr(struct amdgpu_ring *ring)
+{
+ struct amdgpu_device *adev = ring->adev;
+
+ if (ring->use_doorbell) {
+ *ring->wptr_cpu_addr = lower_32_bits(ring->wptr);
+ WDOORBELL32(ring->doorbell_index, lower_32_bits(ring->wptr));
+ } else {
+ WREG32_SOC15(JPEG, 0, regUVD_JRBC_RB_WPTR, lower_32_bits(ring->wptr));
+ }
+}
+
+static bool jpeg_v5_0_0_is_idle(struct amdgpu_ip_block *ip_block)
+{
+ struct amdgpu_device *adev = ip_block->adev;
+ int ret = 1;
+
+ ret &= (((RREG32_SOC15(JPEG, 0, regUVD_JRBC_STATUS) &
+ UVD_JRBC_STATUS__RB_JOB_DONE_MASK) ==
+ UVD_JRBC_STATUS__RB_JOB_DONE_MASK));
+
+ return ret;
+}
+
+static int jpeg_v5_0_0_wait_for_idle(struct amdgpu_ip_block *ip_block)
+{
+ struct amdgpu_device *adev = ip_block->adev;
+
+ return SOC15_WAIT_ON_RREG(JPEG, 0, regUVD_JRBC_STATUS,
+ UVD_JRBC_STATUS__RB_JOB_DONE_MASK,
+ UVD_JRBC_STATUS__RB_JOB_DONE_MASK);
+}
+
+static int jpeg_v5_0_0_set_clockgating_state(struct amdgpu_ip_block *ip_block,
+ enum amd_clockgating_state state)
+{
+ struct amdgpu_device *adev = ip_block->adev;
+ bool enable = state == AMD_CG_STATE_GATE;
+
+ if (enable) {
+ if (!jpeg_v5_0_0_is_idle(ip_block))
+ return -EBUSY;
+ jpeg_v5_0_0_enable_clock_gating(adev);
+ } else {
+ jpeg_v5_0_0_disable_clock_gating(adev);
+ }
+
+ return 0;
+}
+
+static int jpeg_v5_0_0_set_powergating_state(struct amdgpu_ip_block *ip_block,
+ enum amd_powergating_state state)
+{
+ struct amdgpu_device *adev = ip_block->adev;
+ int ret;
+
+ if (state == adev->jpeg.cur_state)
+ return 0;
+
+ if (state == AMD_PG_STATE_GATE)
+ ret = jpeg_v5_0_0_stop(adev);
+ else
+ ret = jpeg_v5_0_0_start(adev);
+
+ if (!ret)
+ adev->jpeg.cur_state = state;
+
+ return ret;
+}
+
+static int jpeg_v5_0_0_set_interrupt_state(struct amdgpu_device *adev,
+ struct amdgpu_irq_src *source,
+ unsigned int type,
+ enum amdgpu_interrupt_state state)
+{
+ return 0;
+}
+
+static int jpeg_v5_0_0_process_interrupt(struct amdgpu_device *adev,
+ struct amdgpu_irq_src *source,
+ struct amdgpu_iv_entry *entry)
+{
+ DRM_DEBUG("IH: JPEG TRAP\n");
+
+ switch (entry->src_id) {
+ case VCN_5_0__SRCID__JPEG_DECODE:
+ amdgpu_fence_process(adev->jpeg.inst->ring_dec);
+ break;
+ default:
+ DRM_DEV_ERROR(adev->dev, "Unhandled interrupt: %d %d\n",
+ entry->src_id, entry->src_data[0]);
+ break;
+ }
+
+ return 0;
+}
+
+static int jpeg_v5_0_0_ring_reset(struct amdgpu_ring *ring,
+ unsigned int vmid,
+ struct amdgpu_fence *timedout_fence)
+{
+ int r;
+
+ amdgpu_ring_reset_helper_begin(ring, timedout_fence);
+ r = jpeg_v5_0_0_stop(ring->adev);
+ if (r)
+ return r;
+ r = jpeg_v5_0_0_start(ring->adev);
+ if (r)
+ return r;
+ return amdgpu_ring_reset_helper_end(ring, timedout_fence);
+}
+
+static const struct amd_ip_funcs jpeg_v5_0_0_ip_funcs = {
+ .name = "jpeg_v5_0_0",
+ .early_init = jpeg_v5_0_0_early_init,
+ .sw_init = jpeg_v5_0_0_sw_init,
+ .sw_fini = jpeg_v5_0_0_sw_fini,
+ .hw_init = jpeg_v5_0_0_hw_init,
+ .hw_fini = jpeg_v5_0_0_hw_fini,
+ .suspend = jpeg_v5_0_0_suspend,
+ .resume = jpeg_v5_0_0_resume,
+ .is_idle = jpeg_v5_0_0_is_idle,
+ .wait_for_idle = jpeg_v5_0_0_wait_for_idle,
+ .set_clockgating_state = jpeg_v5_0_0_set_clockgating_state,
+ .set_powergating_state = jpeg_v5_0_0_set_powergating_state,
+ .dump_ip_state = amdgpu_jpeg_dump_ip_state,
+ .print_ip_state = amdgpu_jpeg_print_ip_state,
+};
+
+static const struct amdgpu_ring_funcs jpeg_v5_0_0_dec_ring_vm_funcs = {
+ .type = AMDGPU_RING_TYPE_VCN_JPEG,
+ .align_mask = 0xf,
+ .get_rptr = jpeg_v5_0_0_dec_ring_get_rptr,
+ .get_wptr = jpeg_v5_0_0_dec_ring_get_wptr,
+ .set_wptr = jpeg_v5_0_0_dec_ring_set_wptr,
+ .parse_cs = amdgpu_jpeg_dec_parse_cs,
+ .emit_frame_size =
+ SOC15_FLUSH_GPU_TLB_NUM_WREG * 6 +
+ SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 8 +
+ 8 + /* jpeg_v5_0_0_dec_ring_emit_vm_flush */
+ 22 + 22 + /* jpeg_v5_0_0_dec_ring_emit_fence x2 vm fence */
+ 8 + 16,
+ .emit_ib_size = 22, /* jpeg_v5_0_0_dec_ring_emit_ib */
+ .emit_ib = jpeg_v4_0_3_dec_ring_emit_ib,
+ .emit_fence = jpeg_v4_0_3_dec_ring_emit_fence,
+ .emit_vm_flush = jpeg_v4_0_3_dec_ring_emit_vm_flush,
+ .test_ring = amdgpu_jpeg_dec_ring_test_ring,
+ .test_ib = amdgpu_jpeg_dec_ring_test_ib,
+ .insert_nop = jpeg_v4_0_3_dec_ring_nop,
+ .insert_start = jpeg_v4_0_3_dec_ring_insert_start,
+ .insert_end = jpeg_v4_0_3_dec_ring_insert_end,
+ .pad_ib = amdgpu_ring_generic_pad_ib,
+ .begin_use = amdgpu_jpeg_ring_begin_use,
+ .end_use = amdgpu_jpeg_ring_end_use,
+ .emit_wreg = jpeg_v4_0_3_dec_ring_emit_wreg,
+ .emit_reg_wait = jpeg_v4_0_3_dec_ring_emit_reg_wait,
+ .emit_reg_write_reg_wait = amdgpu_ring_emit_reg_write_reg_wait_helper,
+ .reset = jpeg_v5_0_0_ring_reset,
+};
+
+static void jpeg_v5_0_0_set_dec_ring_funcs(struct amdgpu_device *adev)
+{
+ adev->jpeg.inst->ring_dec->funcs = &jpeg_v5_0_0_dec_ring_vm_funcs;
+}
+
+static const struct amdgpu_irq_src_funcs jpeg_v5_0_0_irq_funcs = {
+ .set = jpeg_v5_0_0_set_interrupt_state,
+ .process = jpeg_v5_0_0_process_interrupt,
+};
+
+static void jpeg_v5_0_0_set_irq_funcs(struct amdgpu_device *adev)
+{
+ adev->jpeg.inst->irq.num_types = 1;
+ adev->jpeg.inst->irq.funcs = &jpeg_v5_0_0_irq_funcs;
+}
+
+const struct amdgpu_ip_block_version jpeg_v5_0_0_ip_block = {
+ .type = AMD_IP_BLOCK_TYPE_JPEG,
+ .major = 5,
+ .minor = 0,
+ .rev = 0,
+ .funcs = &jpeg_v5_0_0_ip_funcs,
+};
diff --git a/drivers/gpu/drm/amd/amdgpu/jpeg_v5_0_0.h b/drivers/gpu/drm/amd/amdgpu/jpeg_v5_0_0.h
new file mode 100644
index 000000000000..5abb96159814
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/jpeg_v5_0_0.h
@@ -0,0 +1,35 @@
+/*
+ * Copyright 2023 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#ifndef __JPEG_V5_0_0_H__
+#define __JPEG_V5_0_0_H__
+
+#define vcnipJPEG_CGC_GATE 0x4160
+#define vcnipJPEG_CGC_CTRL 0x4161
+#define vcnipJPEG_SYS_INT_EN 0x4141
+#define vcnipUVD_NO_OP 0x0029
+#define vcnipJPEG_DEC_GFX10_ADDR_CONFIG 0x404A
+
+extern const struct amdgpu_ip_block_version jpeg_v5_0_0_ip_block;
+
+#endif /* __JPEG_V5_0_0_H__ */
diff --git a/drivers/gpu/drm/amd/amdgpu/jpeg_v5_0_1.c b/drivers/gpu/drm/amd/amdgpu/jpeg_v5_0_1.c
new file mode 100644
index 000000000000..ab0bf880d3d8
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/jpeg_v5_0_1.c
@@ -0,0 +1,1101 @@
+// SPDX-License-Identifier: GPL-2.0 OR MIT
+/*
+ * Copyright 2014-2024 Advanced Micro Devices, Inc. All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include "amdgpu.h"
+#include "amdgpu_jpeg.h"
+#include "amdgpu_pm.h"
+#include "soc15.h"
+#include "soc15d.h"
+#include "jpeg_v4_0_3.h"
+#include "jpeg_v5_0_1.h"
+#include "mmsch_v5_0.h"
+
+#include "vcn/vcn_5_0_0_offset.h"
+#include "vcn/vcn_5_0_0_sh_mask.h"
+#include "ivsrcid/vcn/irqsrcs_vcn_5_0.h"
+
+static int jpeg_v5_0_1_start_sriov(struct amdgpu_device *adev);
+static void jpeg_v5_0_1_set_dec_ring_funcs(struct amdgpu_device *adev);
+static void jpeg_v5_0_1_set_irq_funcs(struct amdgpu_device *adev);
+static int jpeg_v5_0_1_set_powergating_state(struct amdgpu_ip_block *ip_block,
+ enum amd_powergating_state state);
+static void jpeg_v5_0_1_set_ras_funcs(struct amdgpu_device *adev);
+static void jpeg_v5_0_1_dec_ring_set_wptr(struct amdgpu_ring *ring);
+
+static int amdgpu_ih_srcid_jpeg[] = {
+ VCN_5_0__SRCID__JPEG_DECODE,
+ VCN_5_0__SRCID__JPEG1_DECODE,
+ VCN_5_0__SRCID__JPEG2_DECODE,
+ VCN_5_0__SRCID__JPEG3_DECODE,
+ VCN_5_0__SRCID__JPEG4_DECODE,
+ VCN_5_0__SRCID__JPEG5_DECODE,
+ VCN_5_0__SRCID__JPEG6_DECODE,
+ VCN_5_0__SRCID__JPEG7_DECODE,
+ VCN_5_0__SRCID__JPEG8_DECODE,
+ VCN_5_0__SRCID__JPEG9_DECODE,
+};
+
+static const struct amdgpu_hwip_reg_entry jpeg_reg_list_5_0_1[] = {
+ SOC15_REG_ENTRY_STR(JPEG, 0, regUVD_JPEG_POWER_STATUS),
+ SOC15_REG_ENTRY_STR(JPEG, 0, regUVD_JPEG_INT_STAT),
+ SOC15_REG_ENTRY_STR(JPEG, 0, regUVD_JRBC0_UVD_JRBC_RB_RPTR),
+ SOC15_REG_ENTRY_STR(JPEG, 0, regUVD_JRBC0_UVD_JRBC_RB_WPTR),
+ SOC15_REG_ENTRY_STR(JPEG, 0, regUVD_JRBC0_UVD_JRBC_STATUS),
+ SOC15_REG_ENTRY_STR(JPEG, 0, regJPEG_DEC_ADDR_MODE),
+ SOC15_REG_ENTRY_STR(JPEG, 0, regJPEG_DEC_GFX10_ADDR_CONFIG),
+ SOC15_REG_ENTRY_STR(JPEG, 0, regJPEG_DEC_Y_GFX10_TILING_SURFACE),
+ SOC15_REG_ENTRY_STR(JPEG, 0, regJPEG_DEC_UV_GFX10_TILING_SURFACE),
+ SOC15_REG_ENTRY_STR(JPEG, 0, regUVD_JPEG_PITCH),
+ SOC15_REG_ENTRY_STR(JPEG, 0, regUVD_JPEG_UV_PITCH),
+ SOC15_REG_ENTRY_STR(JPEG, 0, regUVD_JRBC1_UVD_JRBC_RB_RPTR),
+ SOC15_REG_ENTRY_STR(JPEG, 0, regUVD_JRBC1_UVD_JRBC_RB_WPTR),
+ SOC15_REG_ENTRY_STR(JPEG, 0, regUVD_JRBC1_UVD_JRBC_STATUS),
+ SOC15_REG_ENTRY_STR(JPEG, 0, regUVD_JRBC2_UVD_JRBC_RB_RPTR),
+ SOC15_REG_ENTRY_STR(JPEG, 0, regUVD_JRBC2_UVD_JRBC_RB_WPTR),
+ SOC15_REG_ENTRY_STR(JPEG, 0, regUVD_JRBC2_UVD_JRBC_STATUS),
+ SOC15_REG_ENTRY_STR(JPEG, 0, regUVD_JRBC3_UVD_JRBC_RB_RPTR),
+ SOC15_REG_ENTRY_STR(JPEG, 0, regUVD_JRBC3_UVD_JRBC_RB_WPTR),
+ SOC15_REG_ENTRY_STR(JPEG, 0, regUVD_JRBC3_UVD_JRBC_STATUS),
+ SOC15_REG_ENTRY_STR(JPEG, 0, regUVD_JRBC4_UVD_JRBC_RB_RPTR),
+ SOC15_REG_ENTRY_STR(JPEG, 0, regUVD_JRBC4_UVD_JRBC_RB_WPTR),
+ SOC15_REG_ENTRY_STR(JPEG, 0, regUVD_JRBC4_UVD_JRBC_STATUS),
+ SOC15_REG_ENTRY_STR(JPEG, 0, regUVD_JRBC5_UVD_JRBC_RB_RPTR),
+ SOC15_REG_ENTRY_STR(JPEG, 0, regUVD_JRBC5_UVD_JRBC_RB_WPTR),
+ SOC15_REG_ENTRY_STR(JPEG, 0, regUVD_JRBC5_UVD_JRBC_STATUS),
+ SOC15_REG_ENTRY_STR(JPEG, 0, regUVD_JRBC6_UVD_JRBC_RB_RPTR),
+ SOC15_REG_ENTRY_STR(JPEG, 0, regUVD_JRBC6_UVD_JRBC_RB_WPTR),
+ SOC15_REG_ENTRY_STR(JPEG, 0, regUVD_JRBC6_UVD_JRBC_STATUS),
+ SOC15_REG_ENTRY_STR(JPEG, 0, regUVD_JRBC7_UVD_JRBC_RB_RPTR),
+ SOC15_REG_ENTRY_STR(JPEG, 0, regUVD_JRBC7_UVD_JRBC_RB_WPTR),
+ SOC15_REG_ENTRY_STR(JPEG, 0, regUVD_JRBC7_UVD_JRBC_STATUS),
+ SOC15_REG_ENTRY_STR(JPEG, 0, regUVD_JRBC8_UVD_JRBC_RB_RPTR),
+ SOC15_REG_ENTRY_STR(JPEG, 0, regUVD_JRBC8_UVD_JRBC_RB_WPTR),
+ SOC15_REG_ENTRY_STR(JPEG, 0, regUVD_JRBC8_UVD_JRBC_STATUS),
+ SOC15_REG_ENTRY_STR(JPEG, 0, regUVD_JRBC9_UVD_JRBC_RB_RPTR),
+ SOC15_REG_ENTRY_STR(JPEG, 0, regUVD_JRBC9_UVD_JRBC_RB_WPTR),
+ SOC15_REG_ENTRY_STR(JPEG, 0, regUVD_JRBC9_UVD_JRBC_STATUS),
+};
+
+static int jpeg_v5_0_1_core_reg_offset(u32 pipe)
+{
+ if (pipe <= AMDGPU_MAX_JPEG_RINGS_4_0_3)
+ return ((0x40 * pipe) - 0xc80);
+ else
+ return ((0x40 * pipe) - 0x440);
+}
+
+/**
+ * jpeg_v5_0_1_early_init - set function pointers
+ *
+ * @ip_block: Pointer to the amdgpu_ip_block for this hw instance.
+ *
+ * Set ring and irq function pointers
+ */
+static int jpeg_v5_0_1_early_init(struct amdgpu_ip_block *ip_block)
+{
+ struct amdgpu_device *adev = ip_block->adev;
+
+ if (!adev->jpeg.num_jpeg_inst || adev->jpeg.num_jpeg_inst > AMDGPU_MAX_JPEG_INSTANCES)
+ return -ENOENT;
+
+ adev->jpeg.num_jpeg_rings = AMDGPU_MAX_JPEG_RINGS;
+ jpeg_v5_0_1_set_dec_ring_funcs(adev);
+ jpeg_v5_0_1_set_irq_funcs(adev);
+ jpeg_v5_0_1_set_ras_funcs(adev);
+
+ return 0;
+}
+
+/**
+ * jpeg_v5_0_1_sw_init - sw init for JPEG block
+ *
+ * @ip_block: Pointer to the amdgpu_ip_block for this hw instance.
+ *
+ * Load firmware and sw initialization
+ */
+static int jpeg_v5_0_1_sw_init(struct amdgpu_ip_block *ip_block)
+{
+ struct amdgpu_device *adev = ip_block->adev;
+ struct amdgpu_ring *ring;
+ int i, j, r, jpeg_inst;
+
+ for (j = 0; j < adev->jpeg.num_jpeg_rings; ++j) {
+ /* JPEG TRAP */
+ r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_VCN,
+ amdgpu_ih_srcid_jpeg[j], &adev->jpeg.inst->irq);
+ if (r)
+ return r;
+ }
+ /* JPEG DJPEG POISON EVENT */
+ r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_VCN,
+ VCN_5_0__SRCID_DJPEG0_POISON, &adev->jpeg.inst->ras_poison_irq);
+ if (r)
+ return r;
+
+ /* JPEG EJPEG POISON EVENT */
+ r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_VCN,
+ VCN_5_0__SRCID_EJPEG0_POISON, &adev->jpeg.inst->ras_poison_irq);
+ if (r)
+ return r;
+
+ r = amdgpu_jpeg_sw_init(adev);
+ if (r)
+ return r;
+
+ r = amdgpu_jpeg_resume(adev);
+ if (r)
+ return r;
+
+ for (i = 0; i < adev->jpeg.num_jpeg_inst; ++i) {
+ jpeg_inst = GET_INST(JPEG, i);
+
+ for (j = 0; j < adev->jpeg.num_jpeg_rings; ++j) {
+ ring = &adev->jpeg.inst[i].ring_dec[j];
+ ring->use_doorbell = true;
+ ring->vm_hub = AMDGPU_MMHUB0(adev->jpeg.inst[i].aid_id);
+ if (!amdgpu_sriov_vf(adev)) {
+ ring->doorbell_index =
+ (adev->doorbell_index.vcn.vcn_ring0_1 << 1) +
+ 1 + j + 11 * jpeg_inst;
+ } else {
+ ring->doorbell_index =
+ (adev->doorbell_index.vcn.vcn_ring0_1 << 1) +
+ 2 + j + 32 * jpeg_inst;
+ }
+ sprintf(ring->name, "jpeg_dec_%d.%d", adev->jpeg.inst[i].aid_id, j);
+ r = amdgpu_ring_init(adev, ring, 512, &adev->jpeg.inst->irq, 0,
+ AMDGPU_RING_PRIO_DEFAULT, NULL);
+ if (r)
+ return r;
+
+ adev->jpeg.internal.jpeg_pitch[j] =
+ regUVD_JRBC0_UVD_JRBC_SCRATCH0_INTERNAL_OFFSET;
+ adev->jpeg.inst[i].external.jpeg_pitch[j] =
+ SOC15_REG_OFFSET1(JPEG, jpeg_inst, regUVD_JRBC_SCRATCH0,
+ (j ? jpeg_v5_0_1_core_reg_offset(j) : 0));
+ }
+ }
+
+ if (amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__JPEG)) {
+ r = amdgpu_jpeg_ras_sw_init(adev);
+ if (r) {
+ dev_err(adev->dev, "Failed to initialize jpeg ras block!\n");
+ return r;
+ }
+ }
+
+ r = amdgpu_jpeg_reg_dump_init(adev, jpeg_reg_list_5_0_1, ARRAY_SIZE(jpeg_reg_list_5_0_1));
+ if (r)
+ return r;
+
+ adev->jpeg.supported_reset =
+ amdgpu_get_soft_full_reset_mask(&adev->jpeg.inst[0].ring_dec[0]);
+ if (!amdgpu_sriov_vf(adev))
+ adev->jpeg.supported_reset |= AMDGPU_RESET_TYPE_PER_QUEUE;
+ r = amdgpu_jpeg_sysfs_reset_mask_init(adev);
+
+ return r;
+}
+
+/**
+ * jpeg_v5_0_1_sw_fini - sw fini for JPEG block
+ *
+ * @ip_block: Pointer to the amdgpu_ip_block for this hw instance.
+ *
+ * JPEG suspend and free up sw allocation
+ */
+static int jpeg_v5_0_1_sw_fini(struct amdgpu_ip_block *ip_block)
+{
+ struct amdgpu_device *adev = ip_block->adev;
+ int r;
+
+ r = amdgpu_jpeg_suspend(adev);
+ if (r)
+ return r;
+
+ amdgpu_jpeg_sysfs_reset_mask_fini(adev);
+
+ r = amdgpu_jpeg_sw_fini(adev);
+
+ return r;
+}
+
+/**
+ * jpeg_v5_0_1_hw_init - start and test JPEG block
+ *
+ * @ip_block: Pointer to the amdgpu_ip_block for this hw instance.
+ *
+ */
+static int jpeg_v5_0_1_hw_init(struct amdgpu_ip_block *ip_block)
+{
+ struct amdgpu_device *adev = ip_block->adev;
+ struct amdgpu_ring *ring;
+ int i, j, r, jpeg_inst;
+
+ if (amdgpu_sriov_vf(adev)) {
+ r = jpeg_v5_0_1_start_sriov(adev);
+ if (r)
+ return r;
+
+ for (i = 0; i < adev->jpeg.num_jpeg_inst; ++i) {
+ for (j = 0; j < adev->jpeg.num_jpeg_rings; ++j) {
+ ring = &adev->jpeg.inst[i].ring_dec[j];
+ ring->wptr = 0;
+ ring->wptr_old = 0;
+ jpeg_v5_0_1_dec_ring_set_wptr(ring);
+ ring->sched.ready = true;
+ }
+ }
+ return 0;
+ }
+ if (RREG32_SOC15(VCN, GET_INST(VCN, 0), regVCN_RRMT_CNTL) & 0x100)
+ adev->jpeg.caps |= AMDGPU_JPEG_CAPS(RRMT_ENABLED);
+
+ for (i = 0; i < adev->jpeg.num_jpeg_inst; ++i) {
+ jpeg_inst = GET_INST(JPEG, i);
+ ring = adev->jpeg.inst[i].ring_dec;
+ if (ring->use_doorbell)
+ adev->nbio.funcs->vcn_doorbell_range(adev, ring->use_doorbell,
+ (adev->doorbell_index.vcn.vcn_ring0_1 << 1) + 11 * jpeg_inst,
+ adev->jpeg.inst[i].aid_id);
+
+ for (j = 0; j < adev->jpeg.num_jpeg_rings; ++j) {
+ ring = &adev->jpeg.inst[i].ring_dec[j];
+ if (ring->use_doorbell)
+ WREG32_SOC15_OFFSET(VCN, GET_INST(VCN, i), regVCN_JPEG_DB_CTRL,
+ ring->pipe,
+ ring->doorbell_index <<
+ VCN_JPEG_DB_CTRL__OFFSET__SHIFT |
+ VCN_JPEG_DB_CTRL__EN_MASK);
+ r = amdgpu_ring_test_helper(ring);
+ if (r)
+ return r;
+ }
+ }
+
+ return 0;
+}
+
+/**
+ * jpeg_v5_0_1_hw_fini - stop the hardware block
+ *
+ * @ip_block: Pointer to the amdgpu_ip_block for this hw instance.
+ *
+ * Stop the JPEG block, mark ring as not ready any more
+ */
+static int jpeg_v5_0_1_hw_fini(struct amdgpu_ip_block *ip_block)
+{
+ struct amdgpu_device *adev = ip_block->adev;
+ int ret = 0;
+
+ cancel_delayed_work_sync(&adev->jpeg.idle_work);
+
+ if (!amdgpu_sriov_vf(adev)) {
+ if (adev->jpeg.cur_state != AMD_PG_STATE_GATE)
+ ret = jpeg_v5_0_1_set_powergating_state(ip_block, AMD_PG_STATE_GATE);
+ }
+
+ if (amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__JPEG) && !amdgpu_sriov_vf(adev))
+ amdgpu_irq_put(adev, &adev->jpeg.inst->ras_poison_irq, 0);
+
+ return ret;
+}
+
+/**
+ * jpeg_v5_0_1_suspend - suspend JPEG block
+ *
+ * @ip_block: Pointer to the amdgpu_ip_block for this hw instance.
+ *
+ * HW fini and suspend JPEG block
+ */
+static int jpeg_v5_0_1_suspend(struct amdgpu_ip_block *ip_block)
+{
+ struct amdgpu_device *adev = ip_block->adev;
+ int r;
+
+ r = jpeg_v5_0_1_hw_fini(ip_block);
+ if (r)
+ return r;
+
+ r = amdgpu_jpeg_suspend(adev);
+
+ return r;
+}
+
+/**
+ * jpeg_v5_0_1_resume - resume JPEG block
+ *
+ * @ip_block: Pointer to the amdgpu_ip_block for this hw instance.
+ *
+ * Resume firmware and hw init JPEG block
+ */
+static int jpeg_v5_0_1_resume(struct amdgpu_ip_block *ip_block)
+{
+ struct amdgpu_device *adev = ip_block->adev;
+ int r;
+
+ r = amdgpu_jpeg_resume(adev);
+ if (r)
+ return r;
+
+ r = jpeg_v5_0_1_hw_init(ip_block);
+
+ return r;
+}
+
+static void jpeg_v5_0_1_init_inst(struct amdgpu_device *adev, int i)
+{
+ int jpeg_inst = GET_INST(JPEG, i);
+
+ /* disable anti hang mechanism */
+ WREG32_P(SOC15_REG_OFFSET(JPEG, jpeg_inst, regUVD_JPEG_POWER_STATUS), 0,
+ ~UVD_JPEG_POWER_STATUS__JPEG_POWER_STATUS_MASK);
+
+ /* keep the JPEG in static PG mode */
+ WREG32_P(SOC15_REG_OFFSET(JPEG, jpeg_inst, regUVD_JPEG_POWER_STATUS), 0,
+ ~UVD_JPEG_POWER_STATUS__JPEG_PG_MODE_MASK);
+
+ /* MJPEG global tiling registers */
+ WREG32_SOC15(JPEG, 0, regJPEG_DEC_GFX10_ADDR_CONFIG,
+ adev->gfx.config.gb_addr_config);
+
+ /* enable JMI channel */
+ WREG32_P(SOC15_REG_OFFSET(JPEG, jpeg_inst, regUVD_JMI_CNTL), 0,
+ ~UVD_JMI_CNTL__SOFT_RESET_MASK);
+}
+
+static void jpeg_v5_0_1_deinit_inst(struct amdgpu_device *adev, int i)
+{
+ int jpeg_inst = GET_INST(JPEG, i);
+ /* reset JMI */
+ WREG32_P(SOC15_REG_OFFSET(JPEG, jpeg_inst, regUVD_JMI_CNTL),
+ UVD_JMI_CNTL__SOFT_RESET_MASK,
+ ~UVD_JMI_CNTL__SOFT_RESET_MASK);
+
+ /* enable anti hang mechanism */
+ WREG32_P(SOC15_REG_OFFSET(JPEG, jpeg_inst, regUVD_JPEG_POWER_STATUS),
+ UVD_JPEG_POWER_STATUS__JPEG_POWER_STATUS_MASK,
+ ~UVD_JPEG_POWER_STATUS__JPEG_POWER_STATUS_MASK);
+}
+
+static void jpeg_v5_0_1_init_jrbc(struct amdgpu_ring *ring)
+{
+ struct amdgpu_device *adev = ring->adev;
+ u32 reg, data, mask;
+ int jpeg_inst = GET_INST(JPEG, ring->me);
+ int reg_offset = ring->pipe ? jpeg_v5_0_1_core_reg_offset(ring->pipe) : 0;
+
+ /* enable System Interrupt for JRBC */
+ reg = SOC15_REG_OFFSET(JPEG, jpeg_inst, regJPEG_SYS_INT_EN);
+ if (ring->pipe < AMDGPU_MAX_JPEG_RINGS_4_0_3) {
+ data = JPEG_SYS_INT_EN__DJRBC0_MASK << ring->pipe;
+ mask = ~(JPEG_SYS_INT_EN__DJRBC0_MASK << ring->pipe);
+ WREG32_P(reg, data, mask);
+ } else {
+ data = JPEG_SYS_INT_EN__DJRBC0_MASK << (ring->pipe+12);
+ mask = ~(JPEG_SYS_INT_EN__DJRBC0_MASK << (ring->pipe+12));
+ WREG32_P(reg, data, mask);
+ }
+
+ WREG32_SOC15_OFFSET(JPEG, jpeg_inst,
+ regUVD_LMI_JRBC_RB_VMID,
+ reg_offset, 0);
+ WREG32_SOC15_OFFSET(JPEG, jpeg_inst,
+ regUVD_JRBC_RB_CNTL,
+ reg_offset,
+ (0x00000001L | 0x00000002L));
+ WREG32_SOC15_OFFSET(JPEG, jpeg_inst,
+ regUVD_LMI_JRBC_RB_64BIT_BAR_LOW,
+ reg_offset, lower_32_bits(ring->gpu_addr));
+ WREG32_SOC15_OFFSET(JPEG, jpeg_inst,
+ regUVD_LMI_JRBC_RB_64BIT_BAR_HIGH,
+ reg_offset, upper_32_bits(ring->gpu_addr));
+ WREG32_SOC15_OFFSET(JPEG, jpeg_inst,
+ regUVD_JRBC_RB_RPTR,
+ reg_offset, 0);
+ WREG32_SOC15_OFFSET(JPEG, jpeg_inst,
+ regUVD_JRBC_RB_WPTR,
+ reg_offset, 0);
+ WREG32_SOC15_OFFSET(JPEG, jpeg_inst,
+ regUVD_JRBC_RB_CNTL,
+ reg_offset, 0x00000002L);
+ WREG32_SOC15_OFFSET(JPEG, jpeg_inst,
+ regUVD_JRBC_RB_SIZE,
+ reg_offset, ring->ring_size / 4);
+ ring->wptr = RREG32_SOC15_OFFSET(JPEG, jpeg_inst, regUVD_JRBC_RB_WPTR,
+ reg_offset);
+}
+
+static int jpeg_v5_0_1_start_sriov(struct amdgpu_device *adev)
+{
+ struct amdgpu_ring *ring;
+ uint64_t ctx_addr;
+ uint32_t param, resp, expected;
+ uint32_t tmp, timeout;
+
+ struct amdgpu_mm_table *table = &adev->virt.mm_table;
+ uint32_t *table_loc;
+ uint32_t table_size;
+ uint32_t size, size_dw, item_offset;
+ uint32_t init_status;
+ int i, j, jpeg_inst;
+
+ struct mmsch_v5_0_cmd_direct_write
+ direct_wt = { {0} };
+ struct mmsch_v5_0_cmd_end end = { {0} };
+ struct mmsch_v5_0_init_header header;
+
+ direct_wt.cmd_header.command_type =
+ MMSCH_COMMAND__DIRECT_REG_WRITE;
+ end.cmd_header.command_type =
+ MMSCH_COMMAND__END;
+
+ for (i = 0; i < adev->jpeg.num_jpeg_inst; i++) {
+ jpeg_inst = GET_INST(JPEG, i);
+
+ memset(&header, 0, sizeof(struct mmsch_v5_0_init_header));
+ header.version = MMSCH_VERSION;
+ header.total_size = sizeof(struct mmsch_v5_0_init_header) >> 2;
+
+ table_loc = (uint32_t *)table->cpu_addr;
+ table_loc += header.total_size;
+
+ item_offset = header.total_size;
+
+ for (j = 0; j < adev->jpeg.num_jpeg_rings; j++) {
+ ring = &adev->jpeg.inst[i].ring_dec[j];
+ table_size = 0;
+
+ tmp = SOC15_REG_OFFSET(JPEG, 0, regUVD_LMI_JRBC_RB_64BIT_BAR_LOW);
+ MMSCH_V5_0_INSERT_DIRECT_WT(tmp, lower_32_bits(ring->gpu_addr));
+ tmp = SOC15_REG_OFFSET(JPEG, 0, regUVD_LMI_JRBC_RB_64BIT_BAR_HIGH);
+ MMSCH_V5_0_INSERT_DIRECT_WT(tmp, upper_32_bits(ring->gpu_addr));
+ tmp = SOC15_REG_OFFSET(JPEG, 0, regUVD_JRBC_RB_SIZE);
+ MMSCH_V5_0_INSERT_DIRECT_WT(tmp, ring->ring_size / 4);
+
+ if (j < 5) {
+ header.mjpegdec0[j].table_offset = item_offset;
+ header.mjpegdec0[j].init_status = 0;
+ header.mjpegdec0[j].table_size = table_size;
+ } else {
+ header.mjpegdec1[j - 5].table_offset = item_offset;
+ header.mjpegdec1[j - 5].init_status = 0;
+ header.mjpegdec1[j - 5].table_size = table_size;
+ }
+ header.total_size += table_size;
+ item_offset += table_size;
+ }
+
+ MMSCH_V5_0_INSERT_END();
+
+ /* send init table to MMSCH */
+ size = sizeof(struct mmsch_v5_0_init_header);
+ table_loc = (uint32_t *)table->cpu_addr;
+ memcpy((void *)table_loc, &header, size);
+
+ ctx_addr = table->gpu_addr;
+ WREG32_SOC15(VCN, jpeg_inst, regMMSCH_VF_CTX_ADDR_LO, lower_32_bits(ctx_addr));
+ WREG32_SOC15(VCN, jpeg_inst, regMMSCH_VF_CTX_ADDR_HI, upper_32_bits(ctx_addr));
+
+ tmp = RREG32_SOC15(VCN, jpeg_inst, regMMSCH_VF_VMID);
+ tmp &= ~MMSCH_VF_VMID__VF_CTX_VMID_MASK;
+ tmp |= (0 << MMSCH_VF_VMID__VF_CTX_VMID__SHIFT);
+ WREG32_SOC15(VCN, jpeg_inst, regMMSCH_VF_VMID, tmp);
+
+ size = header.total_size;
+ WREG32_SOC15(VCN, jpeg_inst, regMMSCH_VF_CTX_SIZE, size);
+
+ WREG32_SOC15(VCN, jpeg_inst, regMMSCH_VF_MAILBOX_RESP, 0);
+
+ param = 0x00000001;
+ WREG32_SOC15(VCN, jpeg_inst, regMMSCH_VF_MAILBOX_HOST, param);
+ tmp = 0;
+ timeout = 1000;
+ resp = 0;
+ expected = MMSCH_VF_MAILBOX_RESP__OK;
+ init_status =
+ ((struct mmsch_v5_0_init_header *)(table_loc))->mjpegdec0[i].init_status;
+ while (resp != expected) {
+ resp = RREG32_SOC15(VCN, jpeg_inst, regMMSCH_VF_MAILBOX_RESP);
+
+ if (resp != 0)
+ break;
+ udelay(10);
+ tmp = tmp + 10;
+ if (tmp >= timeout) {
+ DRM_ERROR("failed to init MMSCH. TIME-OUT after %d usec"\
+ " waiting for regMMSCH_VF_MAILBOX_RESP "\
+ "(expected=0x%08x, readback=0x%08x)\n",
+ tmp, expected, resp);
+ return -EBUSY;
+ }
+ }
+ if (resp != expected && resp != MMSCH_VF_MAILBOX_RESP__INCOMPLETE &&
+ init_status != MMSCH_VF_ENGINE_STATUS__PASS)
+ DRM_ERROR("MMSCH init status is incorrect! readback=0x%08x, header init status for jpeg: %x\n",
+ resp, init_status);
+
+ }
+ return 0;
+}
+
+/**
+ * jpeg_v5_0_1_start - start JPEG block
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * Setup and start the JPEG block
+ */
+static int jpeg_v5_0_1_start(struct amdgpu_device *adev)
+{
+ struct amdgpu_ring *ring;
+ int i, j;
+
+ for (i = 0; i < adev->jpeg.num_jpeg_inst; ++i) {
+ jpeg_v5_0_1_init_inst(adev, i);
+ for (j = 0; j < adev->jpeg.num_jpeg_rings; ++j) {
+ ring = &adev->jpeg.inst[i].ring_dec[j];
+ jpeg_v5_0_1_init_jrbc(ring);
+ }
+ }
+
+ return 0;
+}
+
+/**
+ * jpeg_v5_0_1_stop - stop JPEG block
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * stop the JPEG block
+ */
+static int jpeg_v5_0_1_stop(struct amdgpu_device *adev)
+{
+ int i;
+
+ for (i = 0; i < adev->jpeg.num_jpeg_inst; ++i)
+ jpeg_v5_0_1_deinit_inst(adev, i);
+
+ return 0;
+}
+
+/**
+ * jpeg_v5_0_1_dec_ring_get_rptr - get read pointer
+ *
+ * @ring: amdgpu_ring pointer
+ *
+ * Returns the current hardware read pointer
+ */
+static uint64_t jpeg_v5_0_1_dec_ring_get_rptr(struct amdgpu_ring *ring)
+{
+ struct amdgpu_device *adev = ring->adev;
+
+ return RREG32_SOC15_OFFSET(JPEG, GET_INST(JPEG, ring->me), regUVD_JRBC_RB_RPTR,
+ ring->pipe ? jpeg_v5_0_1_core_reg_offset(ring->pipe) : 0);
+}
+
+/**
+ * jpeg_v5_0_1_dec_ring_get_wptr - get write pointer
+ *
+ * @ring: amdgpu_ring pointer
+ *
+ * Returns the current hardware write pointer
+ */
+static uint64_t jpeg_v5_0_1_dec_ring_get_wptr(struct amdgpu_ring *ring)
+{
+ struct amdgpu_device *adev = ring->adev;
+
+ if (ring->use_doorbell)
+ return adev->wb.wb[ring->wptr_offs];
+
+ return RREG32_SOC15_OFFSET(JPEG, GET_INST(JPEG, ring->me), regUVD_JRBC_RB_WPTR,
+ ring->pipe ? jpeg_v5_0_1_core_reg_offset(ring->pipe) : 0);
+}
+
+/**
+ * jpeg_v5_0_1_dec_ring_set_wptr - set write pointer
+ *
+ * @ring: amdgpu_ring pointer
+ *
+ * Commits the write pointer to the hardware
+ */
+static void jpeg_v5_0_1_dec_ring_set_wptr(struct amdgpu_ring *ring)
+{
+ struct amdgpu_device *adev = ring->adev;
+
+ if (ring->use_doorbell) {
+ adev->wb.wb[ring->wptr_offs] = lower_32_bits(ring->wptr);
+ WDOORBELL32(ring->doorbell_index, lower_32_bits(ring->wptr));
+ } else {
+ WREG32_SOC15_OFFSET(JPEG, GET_INST(JPEG, ring->me),
+ regUVD_JRBC_RB_WPTR,
+ (ring->pipe ? jpeg_v5_0_1_core_reg_offset(ring->pipe) : 0),
+ lower_32_bits(ring->wptr));
+ }
+}
+
+static bool jpeg_v5_0_1_is_idle(struct amdgpu_ip_block *ip_block)
+{
+ struct amdgpu_device *adev = ip_block->adev;
+ bool ret = false;
+ int i, j;
+
+ for (i = 0; i < adev->jpeg.num_jpeg_inst; ++i) {
+ for (j = 0; j < adev->jpeg.num_jpeg_rings; ++j) {
+ int reg_offset = (j ? jpeg_v5_0_1_core_reg_offset(j) : 0);
+
+ ret &= ((RREG32_SOC15_OFFSET(JPEG, GET_INST(JPEG, i),
+ regUVD_JRBC_STATUS, reg_offset) &
+ UVD_JRBC_STATUS__RB_JOB_DONE_MASK) ==
+ UVD_JRBC_STATUS__RB_JOB_DONE_MASK);
+ }
+ }
+
+ return ret;
+}
+
+static int jpeg_v5_0_1_wait_for_idle(struct amdgpu_ip_block *ip_block)
+{
+ struct amdgpu_device *adev = ip_block->adev;
+ int ret = 0;
+ int i, j;
+
+ for (i = 0; i < adev->jpeg.num_jpeg_inst; ++i) {
+ for (j = 0; j < adev->jpeg.num_jpeg_rings; ++j) {
+ int reg_offset = (j ? jpeg_v5_0_1_core_reg_offset(j) : 0);
+
+ ret &= SOC15_WAIT_ON_RREG_OFFSET(JPEG, GET_INST(JPEG, i),
+ regUVD_JRBC_STATUS, reg_offset,
+ UVD_JRBC_STATUS__RB_JOB_DONE_MASK,
+ UVD_JRBC_STATUS__RB_JOB_DONE_MASK);
+ }
+ }
+ return ret;
+}
+
+static int jpeg_v5_0_1_set_clockgating_state(struct amdgpu_ip_block *ip_block,
+ enum amd_clockgating_state state)
+{
+ struct amdgpu_device *adev = ip_block->adev;
+ bool enable = state == AMD_CG_STATE_GATE;
+
+ int i;
+
+ if (!enable)
+ return 0;
+
+ for (i = 0; i < adev->jpeg.num_jpeg_inst; ++i) {
+ if (!jpeg_v5_0_1_is_idle(ip_block))
+ return -EBUSY;
+ }
+
+ return 0;
+}
+
+static int jpeg_v5_0_1_set_powergating_state(struct amdgpu_ip_block *ip_block,
+ enum amd_powergating_state state)
+{
+ struct amdgpu_device *adev = ip_block->adev;
+ int ret;
+
+ if (amdgpu_sriov_vf(adev)) {
+ adev->jpeg.cur_state = AMD_PG_STATE_UNGATE;
+ return 0;
+ }
+
+ if (state == adev->jpeg.cur_state)
+ return 0;
+
+ if (state == AMD_PG_STATE_GATE)
+ ret = jpeg_v5_0_1_stop(adev);
+ else
+ ret = jpeg_v5_0_1_start(adev);
+
+ if (!ret)
+ adev->jpeg.cur_state = state;
+
+ return ret;
+}
+
+static int jpeg_v5_0_1_set_interrupt_state(struct amdgpu_device *adev,
+ struct amdgpu_irq_src *source,
+ unsigned int type,
+ enum amdgpu_interrupt_state state)
+{
+ return 0;
+}
+
+static int jpeg_v5_0_1_set_ras_interrupt_state(struct amdgpu_device *adev,
+ struct amdgpu_irq_src *source,
+ unsigned int type,
+ enum amdgpu_interrupt_state state)
+{
+ return 0;
+}
+
+
+
+static int jpeg_v5_0_1_process_interrupt(struct amdgpu_device *adev,
+ struct amdgpu_irq_src *source,
+ struct amdgpu_iv_entry *entry)
+{
+ u32 i, inst;
+
+ i = node_id_to_phys_map[entry->node_id];
+ DRM_DEV_DEBUG(adev->dev, "IH: JPEG TRAP\n");
+
+ for (inst = 0; inst < adev->jpeg.num_jpeg_inst; ++inst)
+ if (adev->jpeg.inst[inst].aid_id == i)
+ break;
+
+ if (inst >= adev->jpeg.num_jpeg_inst) {
+ dev_WARN_ONCE(adev->dev, 1,
+ "Interrupt received for unknown JPEG instance %d",
+ entry->node_id);
+ return 0;
+ }
+
+ switch (entry->src_id) {
+ case VCN_5_0__SRCID__JPEG_DECODE:
+ amdgpu_fence_process(&adev->jpeg.inst[inst].ring_dec[0]);
+ break;
+ case VCN_5_0__SRCID__JPEG1_DECODE:
+ amdgpu_fence_process(&adev->jpeg.inst[inst].ring_dec[1]);
+ break;
+ case VCN_5_0__SRCID__JPEG2_DECODE:
+ amdgpu_fence_process(&adev->jpeg.inst[inst].ring_dec[2]);
+ break;
+ case VCN_5_0__SRCID__JPEG3_DECODE:
+ amdgpu_fence_process(&adev->jpeg.inst[inst].ring_dec[3]);
+ break;
+ case VCN_5_0__SRCID__JPEG4_DECODE:
+ amdgpu_fence_process(&adev->jpeg.inst[inst].ring_dec[4]);
+ break;
+ case VCN_5_0__SRCID__JPEG5_DECODE:
+ amdgpu_fence_process(&adev->jpeg.inst[inst].ring_dec[5]);
+ break;
+ case VCN_5_0__SRCID__JPEG6_DECODE:
+ amdgpu_fence_process(&adev->jpeg.inst[inst].ring_dec[6]);
+ break;
+ case VCN_5_0__SRCID__JPEG7_DECODE:
+ amdgpu_fence_process(&adev->jpeg.inst[inst].ring_dec[7]);
+ break;
+ case VCN_5_0__SRCID__JPEG8_DECODE:
+ amdgpu_fence_process(&adev->jpeg.inst[inst].ring_dec[8]);
+ break;
+ case VCN_5_0__SRCID__JPEG9_DECODE:
+ amdgpu_fence_process(&adev->jpeg.inst[inst].ring_dec[9]);
+ break;
+ default:
+ DRM_DEV_ERROR(adev->dev, "Unhandled interrupt: %d %d\n",
+ entry->src_id, entry->src_data[0]);
+ break;
+ }
+
+ return 0;
+}
+
+static void jpeg_v5_0_1_core_stall_reset(struct amdgpu_ring *ring)
+{
+ struct amdgpu_device *adev = ring->adev;
+ int jpeg_inst = GET_INST(JPEG, ring->me);
+ int reg_offset = ring->pipe ? jpeg_v5_0_1_core_reg_offset(ring->pipe) : 0;
+
+ WREG32_SOC15_OFFSET(JPEG, jpeg_inst,
+ regUVD_JMI0_UVD_JMI_CLIENT_STALL,
+ reg_offset, 0x1F);
+ SOC15_WAIT_ON_RREG_OFFSET(JPEG, jpeg_inst,
+ regUVD_JMI0_UVD_JMI_CLIENT_CLEAN_STATUS,
+ reg_offset, 0x1F, 0x1F);
+ WREG32_SOC15_OFFSET(JPEG, jpeg_inst,
+ regUVD_JMI0_JPEG_LMI_DROP,
+ reg_offset, 0x1F);
+ WREG32_SOC15(JPEG, jpeg_inst, regJPEG_CORE_RST_CTRL, 1 << ring->pipe);
+ WREG32_SOC15_OFFSET(JPEG, jpeg_inst,
+ regUVD_JMI0_UVD_JMI_CLIENT_STALL,
+ reg_offset, 0x00);
+ WREG32_SOC15_OFFSET(JPEG, jpeg_inst,
+ regUVD_JMI0_JPEG_LMI_DROP,
+ reg_offset, 0x00);
+ WREG32_SOC15(JPEG, jpeg_inst, regJPEG_CORE_RST_CTRL, 0x00);
+}
+
+static int jpeg_v5_0_1_ring_reset(struct amdgpu_ring *ring,
+ unsigned int vmid,
+ struct amdgpu_fence *timedout_fence)
+{
+ amdgpu_ring_reset_helper_begin(ring, timedout_fence);
+ jpeg_v5_0_1_core_stall_reset(ring);
+ jpeg_v5_0_1_init_jrbc(ring);
+ return amdgpu_ring_reset_helper_end(ring, timedout_fence);
+}
+
+static const struct amd_ip_funcs jpeg_v5_0_1_ip_funcs = {
+ .name = "jpeg_v5_0_1",
+ .early_init = jpeg_v5_0_1_early_init,
+ .late_init = NULL,
+ .sw_init = jpeg_v5_0_1_sw_init,
+ .sw_fini = jpeg_v5_0_1_sw_fini,
+ .hw_init = jpeg_v5_0_1_hw_init,
+ .hw_fini = jpeg_v5_0_1_hw_fini,
+ .suspend = jpeg_v5_0_1_suspend,
+ .resume = jpeg_v5_0_1_resume,
+ .is_idle = jpeg_v5_0_1_is_idle,
+ .wait_for_idle = jpeg_v5_0_1_wait_for_idle,
+ .check_soft_reset = NULL,
+ .pre_soft_reset = NULL,
+ .soft_reset = NULL,
+ .post_soft_reset = NULL,
+ .set_clockgating_state = jpeg_v5_0_1_set_clockgating_state,
+ .set_powergating_state = jpeg_v5_0_1_set_powergating_state,
+ .dump_ip_state = amdgpu_jpeg_dump_ip_state,
+ .print_ip_state = amdgpu_jpeg_print_ip_state,
+};
+
+static const struct amdgpu_ring_funcs jpeg_v5_0_1_dec_ring_vm_funcs = {
+ .type = AMDGPU_RING_TYPE_VCN_JPEG,
+ .align_mask = 0xf,
+ .get_rptr = jpeg_v5_0_1_dec_ring_get_rptr,
+ .get_wptr = jpeg_v5_0_1_dec_ring_get_wptr,
+ .set_wptr = jpeg_v5_0_1_dec_ring_set_wptr,
+ .parse_cs = amdgpu_jpeg_dec_parse_cs,
+ .emit_frame_size =
+ SOC15_FLUSH_GPU_TLB_NUM_WREG * 6 +
+ SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 8 +
+ 8 + /* jpeg_v5_0_1_dec_ring_emit_vm_flush */
+ 22 + 22 + /* jpeg_v5_0_1_dec_ring_emit_fence x2 vm fence */
+ 8 + 16,
+ .emit_ib_size = 22, /* jpeg_v5_0_1_dec_ring_emit_ib */
+ .emit_ib = jpeg_v4_0_3_dec_ring_emit_ib,
+ .emit_fence = jpeg_v4_0_3_dec_ring_emit_fence,
+ .emit_vm_flush = jpeg_v4_0_3_dec_ring_emit_vm_flush,
+ .emit_hdp_flush = jpeg_v4_0_3_ring_emit_hdp_flush,
+ .test_ring = amdgpu_jpeg_dec_ring_test_ring,
+ .test_ib = amdgpu_jpeg_dec_ring_test_ib,
+ .insert_nop = jpeg_v4_0_3_dec_ring_nop,
+ .insert_start = jpeg_v4_0_3_dec_ring_insert_start,
+ .insert_end = jpeg_v4_0_3_dec_ring_insert_end,
+ .pad_ib = amdgpu_ring_generic_pad_ib,
+ .begin_use = amdgpu_jpeg_ring_begin_use,
+ .end_use = amdgpu_jpeg_ring_end_use,
+ .emit_wreg = jpeg_v4_0_3_dec_ring_emit_wreg,
+ .emit_reg_wait = jpeg_v4_0_3_dec_ring_emit_reg_wait,
+ .emit_reg_write_reg_wait = amdgpu_ring_emit_reg_write_reg_wait_helper,
+ .reset = jpeg_v5_0_1_ring_reset,
+};
+
+static void jpeg_v5_0_1_set_dec_ring_funcs(struct amdgpu_device *adev)
+{
+ int i, j, jpeg_inst;
+
+ for (i = 0; i < adev->jpeg.num_jpeg_inst; ++i) {
+ for (j = 0; j < adev->jpeg.num_jpeg_rings; ++j) {
+ adev->jpeg.inst[i].ring_dec[j].funcs = &jpeg_v5_0_1_dec_ring_vm_funcs;
+ adev->jpeg.inst[i].ring_dec[j].me = i;
+ adev->jpeg.inst[i].ring_dec[j].pipe = j;
+ }
+ jpeg_inst = GET_INST(JPEG, i);
+ adev->jpeg.inst[i].aid_id =
+ jpeg_inst / adev->jpeg.num_inst_per_aid;
+ }
+}
+
+static const struct amdgpu_irq_src_funcs jpeg_v5_0_1_irq_funcs = {
+ .set = jpeg_v5_0_1_set_interrupt_state,
+ .process = jpeg_v5_0_1_process_interrupt,
+};
+
+static const struct amdgpu_irq_src_funcs jpeg_v5_0_1_ras_irq_funcs = {
+ .set = jpeg_v5_0_1_set_ras_interrupt_state,
+ .process = amdgpu_jpeg_process_poison_irq,
+};
+
+static void jpeg_v5_0_1_set_irq_funcs(struct amdgpu_device *adev)
+{
+ int i;
+
+ for (i = 0; i < adev->jpeg.num_jpeg_inst; ++i)
+ adev->jpeg.inst->irq.num_types += adev->jpeg.num_jpeg_rings;
+
+ adev->jpeg.inst->irq.funcs = &jpeg_v5_0_1_irq_funcs;
+
+ adev->jpeg.inst->ras_poison_irq.num_types = 1;
+ adev->jpeg.inst->ras_poison_irq.funcs = &jpeg_v5_0_1_ras_irq_funcs;
+
+}
+
+const struct amdgpu_ip_block_version jpeg_v5_0_1_ip_block = {
+ .type = AMD_IP_BLOCK_TYPE_JPEG,
+ .major = 5,
+ .minor = 0,
+ .rev = 1,
+ .funcs = &jpeg_v5_0_1_ip_funcs,
+};
+
+static uint32_t jpeg_v5_0_1_query_poison_by_instance(struct amdgpu_device *adev,
+ uint32_t instance, uint32_t sub_block)
+{
+ uint32_t poison_stat = 0, reg_value = 0;
+
+ switch (sub_block) {
+ case AMDGPU_JPEG_V5_0_1_JPEG0:
+ reg_value = RREG32_SOC15(JPEG, instance, regUVD_RAS_JPEG0_STATUS);
+ poison_stat = REG_GET_FIELD(reg_value, UVD_RAS_JPEG0_STATUS, POISONED_PF);
+ break;
+ case AMDGPU_JPEG_V5_0_1_JPEG1:
+ reg_value = RREG32_SOC15(JPEG, instance, regUVD_RAS_JPEG1_STATUS);
+ poison_stat = REG_GET_FIELD(reg_value, UVD_RAS_JPEG1_STATUS, POISONED_PF);
+ break;
+ default:
+ break;
+ }
+
+ if (poison_stat)
+ dev_info(adev->dev, "Poison detected in JPEG%d sub_block%d\n",
+ instance, sub_block);
+
+ return poison_stat;
+}
+
+static bool jpeg_v5_0_1_query_ras_poison_status(struct amdgpu_device *adev)
+{
+ uint32_t inst = 0, sub = 0, poison_stat = 0;
+
+ for (inst = 0; inst < adev->jpeg.num_jpeg_inst; inst++)
+ for (sub = 0; sub < AMDGPU_JPEG_V5_0_1_MAX_SUB_BLOCK; sub++)
+ poison_stat +=
+ jpeg_v5_0_1_query_poison_by_instance(adev, inst, sub);
+
+ return !!poison_stat;
+}
+
+static const struct amdgpu_ras_block_hw_ops jpeg_v5_0_1_ras_hw_ops = {
+ .query_poison_status = jpeg_v5_0_1_query_ras_poison_status,
+};
+
+static int jpeg_v5_0_1_aca_bank_parser(struct aca_handle *handle, struct aca_bank *bank,
+ enum aca_smu_type type, void *data)
+{
+ struct aca_bank_info info;
+ u64 misc0;
+ int ret;
+
+ ret = aca_bank_info_decode(bank, &info);
+ if (ret)
+ return ret;
+
+ misc0 = bank->regs[ACA_REG_IDX_MISC0];
+ switch (type) {
+ case ACA_SMU_TYPE_UE:
+ bank->aca_err_type = ACA_ERROR_TYPE_UE;
+ ret = aca_error_cache_log_bank_error(handle, &info, ACA_ERROR_TYPE_UE,
+ 1ULL);
+ break;
+ case ACA_SMU_TYPE_CE:
+ bank->aca_err_type = ACA_ERROR_TYPE_CE;
+ ret = aca_error_cache_log_bank_error(handle, &info, bank->aca_err_type,
+ ACA_REG__MISC0__ERRCNT(misc0));
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ return ret;
+}
+
+/* reference to smu driver if header file */
+static int jpeg_v5_0_1_err_codes[] = {
+ 16, 17, 18, 19, 20, 21, 22, 23, /* JPEG[0-9][S|D] */
+ 24, 25, 26, 27, 28, 29, 30, 31,
+ 48, 49, 50, 51,
+};
+
+static bool jpeg_v5_0_1_aca_bank_is_valid(struct aca_handle *handle, struct aca_bank *bank,
+ enum aca_smu_type type, void *data)
+{
+ u32 instlo;
+
+ instlo = ACA_REG__IPID__INSTANCEIDLO(bank->regs[ACA_REG_IDX_IPID]);
+ instlo &= GENMASK(31, 1);
+
+ if (instlo != mmSMNAID_AID0_MCA_SMU)
+ return false;
+
+ if (aca_bank_check_error_codes(handle->adev, bank,
+ jpeg_v5_0_1_err_codes,
+ ARRAY_SIZE(jpeg_v5_0_1_err_codes)))
+ return false;
+
+ return true;
+}
+
+static const struct aca_bank_ops jpeg_v5_0_1_aca_bank_ops = {
+ .aca_bank_parser = jpeg_v5_0_1_aca_bank_parser,
+ .aca_bank_is_valid = jpeg_v5_0_1_aca_bank_is_valid,
+};
+
+static const struct aca_info jpeg_v5_0_1_aca_info = {
+ .hwip = ACA_HWIP_TYPE_SMU,
+ .mask = ACA_ERROR_UE_MASK,
+ .bank_ops = &jpeg_v5_0_1_aca_bank_ops,
+};
+
+static int jpeg_v5_0_1_ras_late_init(struct amdgpu_device *adev, struct ras_common_if *ras_block)
+{
+ int r;
+
+ r = amdgpu_ras_block_late_init(adev, ras_block);
+ if (r)
+ return r;
+
+ r = amdgpu_ras_bind_aca(adev, AMDGPU_RAS_BLOCK__JPEG,
+ &jpeg_v5_0_1_aca_info, NULL);
+ if (r)
+ goto late_fini;
+
+ if (amdgpu_ras_is_supported(adev, ras_block->block) &&
+ adev->jpeg.inst->ras_poison_irq.funcs) {
+ r = amdgpu_irq_get(adev, &adev->jpeg.inst->ras_poison_irq, 0);
+ if (r)
+ goto late_fini;
+ }
+
+ return 0;
+
+late_fini:
+ amdgpu_ras_block_late_fini(adev, ras_block);
+
+ return r;
+}
+
+static struct amdgpu_jpeg_ras jpeg_v5_0_1_ras = {
+ .ras_block = {
+ .hw_ops = &jpeg_v5_0_1_ras_hw_ops,
+ .ras_late_init = jpeg_v5_0_1_ras_late_init,
+ },
+};
+
+static void jpeg_v5_0_1_set_ras_funcs(struct amdgpu_device *adev)
+{
+ adev->jpeg.ras = &jpeg_v5_0_1_ras;
+}
diff --git a/drivers/gpu/drm/amd/amdgpu/jpeg_v5_0_1.h b/drivers/gpu/drm/amd/amdgpu/jpeg_v5_0_1.h
new file mode 100644
index 000000000000..a7e58d5fb246
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/jpeg_v5_0_1.h
@@ -0,0 +1,111 @@
+/*
+ * Copyright 2024 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#ifndef __JPEG_V5_0_1_H__
+#define __JPEG_V5_0_1_H__
+
+extern const struct amdgpu_ip_block_version jpeg_v5_0_1_ip_block;
+
+#define regUVD_JRBC0_UVD_JRBC_SCRATCH0_INTERNAL_OFFSET 0x4094
+#define regUVD_JRBC_EXTERNAL_MCM_ADDR_INTERNAL_OFFSET 0x1bffe
+
+#define regUVD_JRBC0_UVD_JRBC_RB_WPTR 0x0640
+#define regUVD_JRBC0_UVD_JRBC_RB_WPTR_BASE_IDX 1
+#define regUVD_JRBC0_UVD_JRBC_STATUS 0x0649
+#define regUVD_JRBC0_UVD_JRBC_STATUS_BASE_IDX 1
+#define regUVD_JRBC0_UVD_JRBC_RB_RPTR 0x064a
+#define regUVD_JRBC0_UVD_JRBC_RB_RPTR_BASE_IDX 1
+#define regUVD_JRBC1_UVD_JRBC_RB_WPTR 0x0000
+#define regUVD_JRBC1_UVD_JRBC_RB_WPTR_BASE_IDX 0
+#define regUVD_JRBC1_UVD_JRBC_STATUS 0x0009
+#define regUVD_JRBC1_UVD_JRBC_STATUS_BASE_IDX 0
+#define regUVD_JRBC1_UVD_JRBC_RB_RPTR 0x000a
+#define regUVD_JRBC1_UVD_JRBC_RB_RPTR_BASE_IDX 0
+#define regUVD_JRBC2_UVD_JRBC_RB_WPTR 0x0040
+#define regUVD_JRBC2_UVD_JRBC_RB_WPTR_BASE_IDX 0
+#define regUVD_JRBC2_UVD_JRBC_STATUS 0x0049
+#define regUVD_JRBC2_UVD_JRBC_STATUS_BASE_IDX 0
+#define regUVD_JRBC2_UVD_JRBC_RB_RPTR 0x004a
+#define regUVD_JRBC2_UVD_JRBC_RB_RPTR_BASE_IDX 0
+#define regUVD_JRBC3_UVD_JRBC_RB_WPTR 0x0080
+#define regUVD_JRBC3_UVD_JRBC_RB_WPTR_BASE_IDX 0
+#define regUVD_JRBC3_UVD_JRBC_STATUS 0x0089
+#define regUVD_JRBC3_UVD_JRBC_STATUS_BASE_IDX 0
+#define regUVD_JRBC3_UVD_JRBC_RB_RPTR 0x008a
+#define regUVD_JRBC3_UVD_JRBC_RB_RPTR_BASE_IDX 0
+#define regUVD_JRBC4_UVD_JRBC_RB_WPTR 0x00c0
+#define regUVD_JRBC4_UVD_JRBC_RB_WPTR_BASE_IDX 0
+#define regUVD_JRBC4_UVD_JRBC_STATUS 0x00c9
+#define regUVD_JRBC4_UVD_JRBC_STATUS_BASE_IDX 0
+#define regUVD_JRBC4_UVD_JRBC_RB_RPTR 0x00ca
+#define regUVD_JRBC4_UVD_JRBC_RB_RPTR_BASE_IDX 0
+#define regUVD_JRBC5_UVD_JRBC_RB_WPTR 0x0100
+#define regUVD_JRBC5_UVD_JRBC_RB_WPTR_BASE_IDX 0
+#define regUVD_JRBC5_UVD_JRBC_STATUS 0x0109
+#define regUVD_JRBC5_UVD_JRBC_STATUS_BASE_IDX 0
+#define regUVD_JRBC5_UVD_JRBC_RB_RPTR 0x010a
+#define regUVD_JRBC5_UVD_JRBC_RB_RPTR_BASE_IDX 0
+#define regUVD_JRBC6_UVD_JRBC_RB_WPTR 0x0140
+#define regUVD_JRBC6_UVD_JRBC_RB_WPTR_BASE_IDX 0
+#define regUVD_JRBC6_UVD_JRBC_STATUS 0x0149
+#define regUVD_JRBC6_UVD_JRBC_STATUS_BASE_IDX 0
+#define regUVD_JRBC6_UVD_JRBC_RB_RPTR 0x014a
+#define regUVD_JRBC6_UVD_JRBC_RB_RPTR_BASE_IDX 0
+#define regUVD_JRBC7_UVD_JRBC_RB_WPTR 0x0180
+#define regUVD_JRBC7_UVD_JRBC_RB_WPTR_BASE_IDX 0
+#define regUVD_JRBC7_UVD_JRBC_STATUS 0x0189
+#define regUVD_JRBC7_UVD_JRBC_STATUS_BASE_IDX 0
+#define regUVD_JRBC7_UVD_JRBC_RB_RPTR 0x018a
+#define regUVD_JRBC7_UVD_JRBC_RB_RPTR_BASE_IDX 0
+#define regUVD_JRBC8_UVD_JRBC_RB_WPTR 0x01c0
+#define regUVD_JRBC8_UVD_JRBC_RB_WPTR_BASE_IDX 0
+#define regUVD_JRBC8_UVD_JRBC_STATUS 0x01c9
+#define regUVD_JRBC8_UVD_JRBC_STATUS_BASE_IDX 0
+#define regUVD_JRBC8_UVD_JRBC_RB_RPTR 0x01ca
+#define regUVD_JRBC8_UVD_JRBC_RB_RPTR_BASE_IDX 0
+#define regUVD_JRBC9_UVD_JRBC_RB_WPTR 0x0440
+#define regUVD_JRBC9_UVD_JRBC_RB_WPTR_BASE_IDX 1
+#define regUVD_JRBC9_UVD_JRBC_STATUS 0x0449
+#define regUVD_JRBC9_UVD_JRBC_STATUS_BASE_IDX 1
+#define regUVD_JRBC9_UVD_JRBC_RB_RPTR 0x044a
+#define regUVD_JRBC9_UVD_JRBC_RB_RPTR_BASE_IDX 1
+#define regUVD_JMI0_JPEG_LMI_DROP 0x0663
+#define regUVD_JMI0_JPEG_LMI_DROP_BASE_IDX 1
+#define regUVD_JMI0_UVD_JMI_CLIENT_STALL 0x067a
+#define regUVD_JMI0_UVD_JMI_CLIENT_STALL_BASE_IDX 1
+#define regUVD_JMI0_UVD_JMI_CLIENT_CLEAN_STATUS 0x067b
+#define regUVD_JMI0_UVD_JMI_CLIENT_CLEAN_STATUS_BASE_IDX 1
+#define regJPEG_CORE_RST_CTRL 0x072e
+#define regJPEG_CORE_RST_CTRL_BASE_IDX 1
+
+#define regVCN_RRMT_CNTL 0x0940
+#define regVCN_RRMT_CNTL_BASE_IDX 1
+
+enum amdgpu_jpeg_v5_0_1_sub_block {
+ AMDGPU_JPEG_V5_0_1_JPEG0 = 0,
+ AMDGPU_JPEG_V5_0_1_JPEG1,
+
+ AMDGPU_JPEG_V5_0_1_MAX_SUB_BLOCK,
+};
+
+#endif /* __JPEG_V5_0_1_H__ */
diff --git a/drivers/gpu/drm/amd/amdgpu/lsdma_v6_0.c b/drivers/gpu/drm/amd/amdgpu/lsdma_v6_0.c
new file mode 100644
index 000000000000..1a285b531881
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/lsdma_v6_0.c
@@ -0,0 +1,121 @@
+/*
+ * Copyright 2022 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#include <linux/delay.h>
+#include "amdgpu.h"
+#include "lsdma_v6_0.h"
+#include "amdgpu_lsdma.h"
+
+#include "lsdma/lsdma_6_0_0_offset.h"
+#include "lsdma/lsdma_6_0_0_sh_mask.h"
+
+static int lsdma_v6_0_wait_pio_status(struct amdgpu_device *adev)
+{
+ return amdgpu_lsdma_wait_for(adev, SOC15_REG_OFFSET(LSDMA, 0, regLSDMA_PIO_STATUS),
+ LSDMA_PIO_STATUS__PIO_IDLE_MASK | LSDMA_PIO_STATUS__PIO_FIFO_EMPTY_MASK,
+ LSDMA_PIO_STATUS__PIO_IDLE_MASK | LSDMA_PIO_STATUS__PIO_FIFO_EMPTY_MASK);
+}
+
+static int lsdma_v6_0_copy_mem(struct amdgpu_device *adev,
+ uint64_t src_addr,
+ uint64_t dst_addr,
+ uint64_t size)
+{
+ int ret;
+ uint32_t tmp;
+
+ WREG32_SOC15(LSDMA, 0, regLSDMA_PIO_SRC_ADDR_LO, lower_32_bits(src_addr));
+ WREG32_SOC15(LSDMA, 0, regLSDMA_PIO_SRC_ADDR_HI, upper_32_bits(src_addr));
+
+ WREG32_SOC15(LSDMA, 0, regLSDMA_PIO_DST_ADDR_LO, lower_32_bits(dst_addr));
+ WREG32_SOC15(LSDMA, 0, regLSDMA_PIO_DST_ADDR_HI, upper_32_bits(dst_addr));
+
+ WREG32_SOC15(LSDMA, 0, regLSDMA_PIO_CONTROL, 0x0);
+
+ tmp = RREG32_SOC15(LSDMA, 0, regLSDMA_PIO_COMMAND);
+ tmp = REG_SET_FIELD(tmp, LSDMA_PIO_COMMAND, BYTE_COUNT, size);
+ tmp = REG_SET_FIELD(tmp, LSDMA_PIO_COMMAND, SRC_LOCATION, 0);
+ tmp = REG_SET_FIELD(tmp, LSDMA_PIO_COMMAND, DST_LOCATION, 0);
+ tmp = REG_SET_FIELD(tmp, LSDMA_PIO_COMMAND, SRC_ADDR_INC, 0);
+ tmp = REG_SET_FIELD(tmp, LSDMA_PIO_COMMAND, DST_ADDR_INC, 0);
+ tmp = REG_SET_FIELD(tmp, LSDMA_PIO_COMMAND, OVERLAP_DISABLE, 0);
+ tmp = REG_SET_FIELD(tmp, LSDMA_PIO_COMMAND, CONSTANT_FILL, 0);
+ WREG32_SOC15(LSDMA, 0, regLSDMA_PIO_COMMAND, tmp);
+
+ ret = lsdma_v6_0_wait_pio_status(adev);
+ if (ret)
+ dev_err(adev->dev, "LSDMA PIO failed to copy memory!\n");
+
+ return ret;
+}
+
+static int lsdma_v6_0_fill_mem(struct amdgpu_device *adev,
+ uint64_t dst_addr,
+ uint32_t data,
+ uint64_t size)
+{
+ int ret;
+ uint32_t tmp;
+
+ WREG32_SOC15(LSDMA, 0, regLSDMA_PIO_CONSTFILL_DATA, data);
+
+ WREG32_SOC15(LSDMA, 0, regLSDMA_PIO_DST_ADDR_LO, lower_32_bits(dst_addr));
+ WREG32_SOC15(LSDMA, 0, regLSDMA_PIO_DST_ADDR_HI, upper_32_bits(dst_addr));
+
+ WREG32_SOC15(LSDMA, 0, regLSDMA_PIO_CONTROL, 0x0);
+
+ tmp = RREG32_SOC15(LSDMA, 0, regLSDMA_PIO_COMMAND);
+ tmp = REG_SET_FIELD(tmp, LSDMA_PIO_COMMAND, BYTE_COUNT, size);
+ tmp = REG_SET_FIELD(tmp, LSDMA_PIO_COMMAND, SRC_LOCATION, 0);
+ tmp = REG_SET_FIELD(tmp, LSDMA_PIO_COMMAND, DST_LOCATION, 0);
+ tmp = REG_SET_FIELD(tmp, LSDMA_PIO_COMMAND, SRC_ADDR_INC, 0);
+ tmp = REG_SET_FIELD(tmp, LSDMA_PIO_COMMAND, DST_ADDR_INC, 0);
+ tmp = REG_SET_FIELD(tmp, LSDMA_PIO_COMMAND, OVERLAP_DISABLE, 0);
+ tmp = REG_SET_FIELD(tmp, LSDMA_PIO_COMMAND, CONSTANT_FILL, 1);
+ WREG32_SOC15(LSDMA, 0, regLSDMA_PIO_COMMAND, tmp);
+
+ ret = lsdma_v6_0_wait_pio_status(adev);
+ if (ret)
+ dev_err(adev->dev, "LSDMA PIO failed to fill memory!\n");
+
+ return ret;
+}
+
+static void lsdma_v6_0_update_memory_power_gating(struct amdgpu_device *adev,
+ bool enable)
+{
+ uint32_t tmp;
+
+ tmp = RREG32_SOC15(LSDMA, 0, regLSDMA_MEM_POWER_CTRL);
+ tmp = REG_SET_FIELD(tmp, LSDMA_MEM_POWER_CTRL, MEM_POWER_CTRL_EN, 0);
+ WREG32_SOC15(LSDMA, 0, regLSDMA_MEM_POWER_CTRL, tmp);
+
+ tmp = REG_SET_FIELD(tmp, LSDMA_MEM_POWER_CTRL, MEM_POWER_CTRL_EN, enable);
+ WREG32_SOC15(LSDMA, 0, regLSDMA_MEM_POWER_CTRL, tmp);
+}
+
+const struct amdgpu_lsdma_funcs lsdma_v6_0_funcs = {
+ .copy_mem = lsdma_v6_0_copy_mem,
+ .fill_mem = lsdma_v6_0_fill_mem,
+ .update_memory_power_gating = lsdma_v6_0_update_memory_power_gating
+};
diff --git a/drivers/gpu/drm/amd/amdgpu/lsdma_v6_0.h b/drivers/gpu/drm/amd/amdgpu/lsdma_v6_0.h
new file mode 100644
index 000000000000..3ef79be1a9bf
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/lsdma_v6_0.h
@@ -0,0 +1,31 @@
+/*
+ * Copyright 2022 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#ifndef __LSDMA_V6_0_H__
+#define __LSDMA_V6_0_H__
+
+#include "soc15_common.h"
+
+extern const struct amdgpu_lsdma_funcs lsdma_v6_0_funcs;
+
+#endif /* __LSDMA_V6_0_H__ */
diff --git a/drivers/gpu/drm/amd/amdgpu/lsdma_v7_0.c b/drivers/gpu/drm/amd/amdgpu/lsdma_v7_0.c
new file mode 100644
index 000000000000..396262044ea8
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/lsdma_v7_0.c
@@ -0,0 +1,121 @@
+/*
+ * Copyright 2023 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#include <linux/delay.h>
+#include "amdgpu.h"
+#include "lsdma_v7_0.h"
+#include "amdgpu_lsdma.h"
+
+#include "lsdma/lsdma_7_0_0_offset.h"
+#include "lsdma/lsdma_7_0_0_sh_mask.h"
+
+static int lsdma_v7_0_wait_pio_status(struct amdgpu_device *adev)
+{
+ return amdgpu_lsdma_wait_for(adev, SOC15_REG_OFFSET(LSDMA, 0, regLSDMA_PIO_STATUS),
+ LSDMA_PIO_STATUS__PIO_IDLE_MASK | LSDMA_PIO_STATUS__PIO_FIFO_EMPTY_MASK,
+ LSDMA_PIO_STATUS__PIO_IDLE_MASK | LSDMA_PIO_STATUS__PIO_FIFO_EMPTY_MASK);
+}
+
+static int lsdma_v7_0_copy_mem(struct amdgpu_device *adev,
+ uint64_t src_addr,
+ uint64_t dst_addr,
+ uint64_t size)
+{
+ int ret;
+ uint32_t tmp;
+
+ WREG32_SOC15(LSDMA, 0, regLSDMA_PIO_SRC_ADDR_LO, lower_32_bits(src_addr));
+ WREG32_SOC15(LSDMA, 0, regLSDMA_PIO_SRC_ADDR_HI, upper_32_bits(src_addr));
+
+ WREG32_SOC15(LSDMA, 0, regLSDMA_PIO_DST_ADDR_LO, lower_32_bits(dst_addr));
+ WREG32_SOC15(LSDMA, 0, regLSDMA_PIO_DST_ADDR_HI, upper_32_bits(dst_addr));
+
+ WREG32_SOC15(LSDMA, 0, regLSDMA_PIO_CONTROL, 0x0);
+
+ tmp = RREG32_SOC15(LSDMA, 0, regLSDMA_PIO_COMMAND);
+ tmp = REG_SET_FIELD(tmp, LSDMA_PIO_COMMAND, BYTE_COUNT, size);
+ tmp = REG_SET_FIELD(tmp, LSDMA_PIO_COMMAND, SRC_LOCATION, 0);
+ tmp = REG_SET_FIELD(tmp, LSDMA_PIO_COMMAND, DST_LOCATION, 0);
+ tmp = REG_SET_FIELD(tmp, LSDMA_PIO_COMMAND, SRC_ADDR_INC, 0);
+ tmp = REG_SET_FIELD(tmp, LSDMA_PIO_COMMAND, DST_ADDR_INC, 0);
+ tmp = REG_SET_FIELD(tmp, LSDMA_PIO_COMMAND, OVERLAP_DISABLE, 0);
+ tmp = REG_SET_FIELD(tmp, LSDMA_PIO_COMMAND, CONSTANT_FILL, 0);
+ WREG32_SOC15(LSDMA, 0, regLSDMA_PIO_COMMAND, tmp);
+
+ ret = lsdma_v7_0_wait_pio_status(adev);
+ if (ret)
+ dev_err(adev->dev, "LSDMA PIO failed to copy memory!\n");
+
+ return ret;
+}
+
+static int lsdma_v7_0_fill_mem(struct amdgpu_device *adev,
+ uint64_t dst_addr,
+ uint32_t data,
+ uint64_t size)
+{
+ int ret;
+ uint32_t tmp;
+
+ WREG32_SOC15(LSDMA, 0, regLSDMA_PIO_CONSTFILL_DATA, data);
+
+ WREG32_SOC15(LSDMA, 0, regLSDMA_PIO_DST_ADDR_LO, lower_32_bits(dst_addr));
+ WREG32_SOC15(LSDMA, 0, regLSDMA_PIO_DST_ADDR_HI, upper_32_bits(dst_addr));
+
+ WREG32_SOC15(LSDMA, 0, regLSDMA_PIO_CONTROL, 0x0);
+
+ tmp = RREG32_SOC15(LSDMA, 0, regLSDMA_PIO_COMMAND);
+ tmp = REG_SET_FIELD(tmp, LSDMA_PIO_COMMAND, BYTE_COUNT, size);
+ tmp = REG_SET_FIELD(tmp, LSDMA_PIO_COMMAND, SRC_LOCATION, 0);
+ tmp = REG_SET_FIELD(tmp, LSDMA_PIO_COMMAND, DST_LOCATION, 0);
+ tmp = REG_SET_FIELD(tmp, LSDMA_PIO_COMMAND, SRC_ADDR_INC, 0);
+ tmp = REG_SET_FIELD(tmp, LSDMA_PIO_COMMAND, DST_ADDR_INC, 0);
+ tmp = REG_SET_FIELD(tmp, LSDMA_PIO_COMMAND, OVERLAP_DISABLE, 0);
+ tmp = REG_SET_FIELD(tmp, LSDMA_PIO_COMMAND, CONSTANT_FILL, 1);
+ WREG32_SOC15(LSDMA, 0, regLSDMA_PIO_COMMAND, tmp);
+
+ ret = lsdma_v7_0_wait_pio_status(adev);
+ if (ret)
+ dev_err(adev->dev, "LSDMA PIO failed to fill memory!\n");
+
+ return ret;
+}
+
+static void lsdma_v7_0_update_memory_power_gating(struct amdgpu_device *adev,
+ bool enable)
+{
+ uint32_t tmp;
+
+ tmp = RREG32_SOC15(LSDMA, 0, regLSDMA_MEM_POWER_CTRL);
+ tmp = REG_SET_FIELD(tmp, LSDMA_MEM_POWER_CTRL, MEM_POWER_CTRL_EN, 0);
+ WREG32_SOC15(LSDMA, 0, regLSDMA_MEM_POWER_CTRL, tmp);
+
+ tmp = REG_SET_FIELD(tmp, LSDMA_MEM_POWER_CTRL, MEM_POWER_CTRL_EN, enable);
+ WREG32_SOC15(LSDMA, 0, regLSDMA_MEM_POWER_CTRL, tmp);
+}
+
+const struct amdgpu_lsdma_funcs lsdma_v7_0_funcs = {
+ .copy_mem = lsdma_v7_0_copy_mem,
+ .fill_mem = lsdma_v7_0_fill_mem,
+ .update_memory_power_gating = lsdma_v7_0_update_memory_power_gating
+};
diff --git a/drivers/gpu/drm/amd/amdgpu/lsdma_v7_0.h b/drivers/gpu/drm/amd/amdgpu/lsdma_v7_0.h
new file mode 100644
index 000000000000..52b4485cdd98
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/lsdma_v7_0.h
@@ -0,0 +1,31 @@
+/*
+ * Copyright 2023 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#ifndef __LSDMA_V7_0_H__
+#define __LSDMA_V7_0_H__
+
+#include "soc15_common.h"
+
+extern const struct amdgpu_lsdma_funcs lsdma_v7_0_funcs;
+
+#endif /* __LSDMA_V7_0_H__ */
diff --git a/drivers/gpu/drm/amd/amdgpu/mca_v3_0.c b/drivers/gpu/drm/amd/amdgpu/mca_v3_0.c
new file mode 100644
index 000000000000..6dae4a2e2767
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/mca_v3_0.c
@@ -0,0 +1,104 @@
+/*
+ * Copyright 2021 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+#include "amdgpu_ras.h"
+#include "amdgpu.h"
+#include "amdgpu_mca.h"
+
+#define smnMCMP0_STATUST0 0x03830408
+#define smnMCMP1_STATUST0 0x03b30408
+#define smnMCMPIO_STATUST0 0x0c930408
+
+
+static void mca_v3_0_mp0_query_ras_error_count(struct amdgpu_device *adev,
+ void *ras_error_status)
+{
+ amdgpu_mca_query_ras_error_count(adev,
+ smnMCMP0_STATUST0,
+ ras_error_status);
+}
+
+static int mca_v3_0_ras_block_match(struct amdgpu_ras_block_object *block_obj,
+ enum amdgpu_ras_block block, uint32_t sub_block_index)
+{
+ if (!block_obj)
+ return -EINVAL;
+
+ if ((block_obj->ras_comm.block == block) &&
+ (block_obj->ras_comm.sub_block_index == sub_block_index)) {
+ return 0;
+ }
+
+ return -EINVAL;
+}
+
+static const struct amdgpu_ras_block_hw_ops mca_v3_0_mp0_hw_ops = {
+ .query_ras_error_count = mca_v3_0_mp0_query_ras_error_count,
+ .query_ras_error_address = NULL,
+};
+
+struct amdgpu_mca_ras_block mca_v3_0_mp0_ras = {
+ .ras_block = {
+ .hw_ops = &mca_v3_0_mp0_hw_ops,
+ .ras_block_match = mca_v3_0_ras_block_match,
+ },
+};
+
+static void mca_v3_0_mp1_query_ras_error_count(struct amdgpu_device *adev,
+ void *ras_error_status)
+{
+ amdgpu_mca_query_ras_error_count(adev,
+ smnMCMP1_STATUST0,
+ ras_error_status);
+}
+
+static const struct amdgpu_ras_block_hw_ops mca_v3_0_mp1_hw_ops = {
+ .query_ras_error_count = mca_v3_0_mp1_query_ras_error_count,
+ .query_ras_error_address = NULL,
+};
+
+struct amdgpu_mca_ras_block mca_v3_0_mp1_ras = {
+ .ras_block = {
+ .hw_ops = &mca_v3_0_mp1_hw_ops,
+ .ras_block_match = mca_v3_0_ras_block_match,
+ },
+};
+
+static void mca_v3_0_mpio_query_ras_error_count(struct amdgpu_device *adev,
+ void *ras_error_status)
+{
+ amdgpu_mca_query_ras_error_count(adev,
+ smnMCMPIO_STATUST0,
+ ras_error_status);
+}
+
+static const struct amdgpu_ras_block_hw_ops mca_v3_0_mpio_hw_ops = {
+ .query_ras_error_count = mca_v3_0_mpio_query_ras_error_count,
+ .query_ras_error_address = NULL,
+};
+
+struct amdgpu_mca_ras_block mca_v3_0_mpio_ras = {
+ .ras_block = {
+ .hw_ops = &mca_v3_0_mpio_hw_ops,
+ .ras_block_match = mca_v3_0_ras_block_match,
+ },
+};
diff --git a/drivers/gpu/drm/amd/amdgpu/mca_v3_0.h b/drivers/gpu/drm/amd/amdgpu/mca_v3_0.h
new file mode 100644
index 000000000000..d3eaef0d7f2d
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/mca_v3_0.h
@@ -0,0 +1,28 @@
+/*
+ * Copyright (C) 2021 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
+ * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+#ifndef __MCA_V3_0_H__
+#define __MCA_V3_0_H__
+
+extern struct amdgpu_mca_ras_block mca_v3_0_mp0_ras;
+extern struct amdgpu_mca_ras_block mca_v3_0_mp1_ras;
+extern struct amdgpu_mca_ras_block mca_v3_0_mpio_ras;
+
+#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/mes_api_def.h b/drivers/gpu/drm/amd/amdgpu/mes_api_def.h
deleted file mode 100644
index 3f4fca5fd1da..000000000000
--- a/drivers/gpu/drm/amd/amdgpu/mes_api_def.h
+++ /dev/null
@@ -1,443 +0,0 @@
-/*
- * Copyright 2019 Advanced Micro Devices, Inc.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in
- * all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
- * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
- * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
- * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
- * OTHER DEALINGS IN THE SOFTWARE.
- *
- */
-
-#ifndef __MES_API_DEF_H__
-#define __MES_API_DEF_H__
-
-#pragma pack(push, 4)
-
-#define MES_API_VERSION 1
-
-/* Driver submits one API(cmd) as a single Frame and this command size is same
- * for all API to ease the debugging and parsing of ring buffer.
- */
-enum { API_FRAME_SIZE_IN_DWORDS = 64 };
-
-/* To avoid command in scheduler context to be overwritten whenenver mutilple
- * interrupts come in, this creates another queue.
- */
-enum { API_NUMBER_OF_COMMAND_MAX = 32 };
-
-enum MES_API_TYPE {
- MES_API_TYPE_SCHEDULER = 1,
- MES_API_TYPE_MAX
-};
-
-enum MES_SCH_API_OPCODE {
- MES_SCH_API_SET_HW_RSRC = 0,
- MES_SCH_API_SET_SCHEDULING_CONFIG = 1, /* agreegated db, quantums, etc */
- MES_SCH_API_ADD_QUEUE = 2,
- MES_SCH_API_REMOVE_QUEUE = 3,
- MES_SCH_API_PERFORM_YIELD = 4,
- MES_SCH_API_SET_GANG_PRIORITY_LEVEL = 5,
- MES_SCH_API_SUSPEND = 6,
- MES_SCH_API_RESUME = 7,
- MES_SCH_API_RESET = 8,
- MES_SCH_API_SET_LOG_BUFFER = 9,
- MES_SCH_API_CHANGE_GANG_PRORITY = 10,
- MES_SCH_API_QUERY_SCHEDULER_STATUS = 11,
- MES_SCH_API_PROGRAM_GDS = 12,
- MES_SCH_API_SET_DEBUG_VMID = 13,
- MES_SCH_API_MISC = 14,
- MES_SCH_API_MAX = 0xFF
-};
-
-union MES_API_HEADER {
- struct {
- uint32_t type : 4; /* 0 - Invalid; 1 - Scheduling; 2 - TBD */
- uint32_t opcode : 8;
- uint32_t dwsize : 8; /* including header */
- uint32_t reserved : 12;
- };
-
- uint32_t u32All;
-};
-
-enum MES_AMD_PRIORITY_LEVEL {
- AMD_PRIORITY_LEVEL_LOW = 0,
- AMD_PRIORITY_LEVEL_NORMAL = 1,
- AMD_PRIORITY_LEVEL_MEDIUM = 2,
- AMD_PRIORITY_LEVEL_HIGH = 3,
- AMD_PRIORITY_LEVEL_REALTIME = 4,
- AMD_PRIORITY_NUM_LEVELS
-};
-
-enum MES_QUEUE_TYPE {
- MES_QUEUE_TYPE_GFX,
- MES_QUEUE_TYPE_COMPUTE,
- MES_QUEUE_TYPE_SDMA,
- MES_QUEUE_TYPE_MAX,
-};
-
-struct MES_API_STATUS {
- uint64_t api_completion_fence_addr;
- uint64_t api_completion_fence_value;
-};
-
-enum { MAX_COMPUTE_PIPES = 8 };
-enum { MAX_GFX_PIPES = 2 };
-enum { MAX_SDMA_PIPES = 2 };
-
-enum { MAX_COMPUTE_HQD_PER_PIPE = 8 };
-enum { MAX_GFX_HQD_PER_PIPE = 8 };
-enum { MAX_SDMA_HQD_PER_PIPE = 10 };
-
-enum { MAX_QUEUES_IN_A_GANG = 8 };
-
-enum VM_HUB_TYPE {
- VM_HUB_TYPE_GC = 0,
- VM_HUB_TYPE_MM = 1,
- VM_HUB_TYPE_MAX,
-};
-
-enum { VMID_INVALID = 0xffff };
-
-enum { MAX_VMID_GCHUB = 16 };
-enum { MAX_VMID_MMHUB = 16 };
-
-enum MES_LOG_OPERATION {
- MES_LOG_OPERATION_CONTEXT_STATE_CHANGE = 0
-};
-
-enum MES_LOG_CONTEXT_STATE {
- MES_LOG_CONTEXT_STATE_IDLE = 0,
- MES_LOG_CONTEXT_STATE_RUNNING = 1,
- MES_LOG_CONTEXT_STATE_READY = 2,
- MES_LOG_CONTEXT_STATE_READY_STANDBY = 3,
-};
-
-struct MES_LOG_CONTEXT_STATE_CHANGE {
- void *h_context;
- enum MES_LOG_CONTEXT_STATE new_context_state;
-};
-
-struct MES_LOG_ENTRY_HEADER {
- uint32_t first_free_entry_index;
- uint32_t wraparound_count;
- uint64_t number_of_entries;
- uint64_t reserved[2];
-};
-
-struct MES_LOG_ENTRY_DATA {
- uint64_t gpu_time_stamp;
- uint32_t operation_type; /* operation_type is of MES_LOG_OPERATION type */
- uint32_t reserved_operation_type_bits;
- union {
- struct MES_LOG_CONTEXT_STATE_CHANGE context_state_change;
- uint64_t reserved_operation_data[2];
- };
-};
-
-struct MES_LOG_BUFFER {
- struct MES_LOG_ENTRY_HEADER header;
- struct MES_LOG_ENTRY_DATA entries[1];
-};
-
-union MESAPI_SET_HW_RESOURCES {
- struct {
- union MES_API_HEADER header;
- uint32_t vmid_mask_mmhub;
- uint32_t vmid_mask_gfxhub;
- uint32_t gds_size;
- uint32_t paging_vmid;
- uint32_t compute_hqd_mask[MAX_COMPUTE_PIPES];
- uint32_t gfx_hqd_mask[MAX_GFX_PIPES];
- uint32_t sdma_hqd_mask[MAX_SDMA_PIPES];
- uint32_t agreegated_doorbells[AMD_PRIORITY_NUM_LEVELS];
- uint64_t g_sch_ctx_gpu_mc_ptr;
- uint64_t query_status_fence_gpu_mc_ptr;
- struct MES_API_STATUS api_status;
- union {
- struct {
- uint32_t disable_reset : 1;
- uint32_t reserved : 31;
- };
- uint32_t uint32_t_all;
- };
- };
-
- uint32_t max_dwords_in_api[API_FRAME_SIZE_IN_DWORDS];
-};
-
-union MESAPI__ADD_QUEUE {
- struct {
- union MES_API_HEADER header;
- uint32_t process_id;
- uint64_t page_table_base_addr;
- uint64_t process_va_start;
- uint64_t process_va_end;
- uint64_t process_quantum;
- uint64_t process_context_addr;
- uint64_t gang_quantum;
- uint64_t gang_context_addr;
- uint32_t inprocess_gang_priority;
- enum MES_AMD_PRIORITY_LEVEL gang_global_priority_level;
- uint32_t doorbell_offset;
- uint64_t mqd_addr;
- uint64_t wptr_addr;
- enum MES_QUEUE_TYPE queue_type;
- uint32_t gds_base;
- uint32_t gds_size;
- uint32_t gws_base;
- uint32_t gws_size;
- uint32_t oa_mask;
-
- struct {
- uint32_t paging : 1;
- uint32_t debug_vmid : 4;
- uint32_t program_gds : 1;
- uint32_t is_gang_suspended : 1;
- uint32_t is_tmz_queue : 1;
- uint32_t reserved : 24;
- };
- struct MES_API_STATUS api_status;
- };
-
- uint32_t max_dwords_in_api[API_FRAME_SIZE_IN_DWORDS];
-};
-
-union MESAPI__REMOVE_QUEUE {
- struct {
- union MES_API_HEADER header;
- uint32_t doorbell_offset;
- uint64_t gang_context_addr;
-
- struct {
- uint32_t unmap_legacy_gfx_queue : 1;
- uint32_t reserved : 31;
- };
- struct MES_API_STATUS api_status;
- };
-
- uint32_t max_dwords_in_api[API_FRAME_SIZE_IN_DWORDS];
-};
-
-union MESAPI__SET_SCHEDULING_CONFIG {
- struct {
- union MES_API_HEADER header;
- /* Grace period when preempting another priority band for this
- * priority band. The value for idle priority band is ignored,
- * as it never preempts other bands.
- */
- uint64_t grace_period_other_levels[AMD_PRIORITY_NUM_LEVELS];
- /* Default quantum for scheduling across processes within
- * a priority band.
- */
- uint64_t process_quantum_for_level[AMD_PRIORITY_NUM_LEVELS];
- /* Default grace period for processes that preempt each other
- * within a priority band.
- */
- uint64_t process_grace_period_same_level[AMD_PRIORITY_NUM_LEVELS];
- /* For normal level this field specifies the target GPU
- * percentage in situations when it's starved by the high level.
- * Valid values are between 0 and 50, with the default being 10.
- */
- uint32_t normal_yield_percent;
- struct MES_API_STATUS api_status;
- };
-
- uint32_t max_dwords_in_api[API_FRAME_SIZE_IN_DWORDS];
-};
-
-union MESAPI__PERFORM_YIELD {
- struct {
- union MES_API_HEADER header;
- uint32_t dummy;
- struct MES_API_STATUS api_status;
- };
-
- uint32_t max_dwords_in_api[API_FRAME_SIZE_IN_DWORDS];
-};
-
-union MESAPI__CHANGE_GANG_PRIORITY_LEVEL {
- struct {
- union MES_API_HEADER header;
- uint32_t inprocess_gang_priority;
- enum MES_AMD_PRIORITY_LEVEL gang_global_priority_level;
- uint64_t gang_quantum;
- uint64_t gang_context_addr;
- struct MES_API_STATUS api_status;
- };
-
- uint32_t max_dwords_in_api[API_FRAME_SIZE_IN_DWORDS];
-};
-
-union MESAPI__SUSPEND {
- struct {
- union MES_API_HEADER header;
- /* false - suspend all gangs; true - specific gang */
- struct {
- uint32_t suspend_all_gangs : 1;
- uint32_t reserved : 31;
- };
- /* gang_context_addr is valid only if suspend_all = false */
- uint64_t gang_context_addr;
-
- uint64_t suspend_fence_addr;
- uint32_t suspend_fence_value;
-
- struct MES_API_STATUS api_status;
- };
-
- uint32_t max_dwords_in_api[API_FRAME_SIZE_IN_DWORDS];
-};
-
-union MESAPI__RESUME {
- struct {
- union MES_API_HEADER header;
- /* false - resume all gangs; true - specified gang */
- struct {
- uint32_t resume_all_gangs : 1;
- uint32_t reserved : 31;
- };
- /* valid only if resume_all_gangs = false */
- uint64_t gang_context_addr;
-
- struct MES_API_STATUS api_status;
- };
-
- uint32_t max_dwords_in_api[API_FRAME_SIZE_IN_DWORDS];
-};
-
-union MESAPI__RESET {
- struct {
- union MES_API_HEADER header;
-
- struct {
- uint32_t reset_queue : 1;
- uint32_t reserved : 31;
- };
-
- uint64_t gang_context_addr;
- uint32_t doorbell_offset; /* valid only if reset_queue = true */
- struct MES_API_STATUS api_status;
- };
-
- uint32_t max_dwords_in_api[API_FRAME_SIZE_IN_DWORDS];
-};
-
-union MESAPI__SET_LOGGING_BUFFER {
- struct {
- union MES_API_HEADER header;
- /* There are separate log buffers for each queue type */
- enum MES_QUEUE_TYPE log_type;
- /* Log buffer GPU Address */
- uint64_t logging_buffer_addr;
- /* number of entries in the log buffer */
- uint32_t number_of_entries;
- /* Entry index at which CPU interrupt needs to be signalled */
- uint32_t interrupt_entry;
-
- struct MES_API_STATUS api_status;
- };
-
- uint32_t max_dwords_in_api[API_FRAME_SIZE_IN_DWORDS];
-};
-
-union MESAPI__QUERY_MES_STATUS {
- struct {
- union MES_API_HEADER header;
- bool mes_healthy; /* 0 - not healthy, 1 - healthy */
- struct MES_API_STATUS api_status;
- };
-
- uint32_t max_dwords_in_api[API_FRAME_SIZE_IN_DWORDS];
-};
-
-union MESAPI__PROGRAM_GDS {
- struct {
- union MES_API_HEADER header;
- uint64_t process_context_addr;
- uint32_t gds_base;
- uint32_t gds_size;
- uint32_t gws_base;
- uint32_t gws_size;
- uint32_t oa_mask;
- struct MES_API_STATUS api_status;
- };
-
- uint32_t max_dwords_in_api[API_FRAME_SIZE_IN_DWORDS];
-};
-
-union MESAPI__SET_DEBUG_VMID {
- struct {
- union MES_API_HEADER header;
- struct MES_API_STATUS api_status;
- union {
- struct {
- uint32_t use_gds : 1;
- uint32_t reserved : 31;
- } flags;
- uint32_t u32All;
- };
- uint32_t reserved;
- uint32_t debug_vmid;
- uint64_t process_context_addr;
- uint64_t page_table_base_addr;
- uint64_t process_va_start;
- uint64_t process_va_end;
- uint32_t gds_base;
- uint32_t gds_size;
- uint32_t gws_base;
- uint32_t gws_size;
- uint32_t oa_mask;
- };
-
- uint32_t max_dwords_in_api[API_FRAME_SIZE_IN_DWORDS];
-};
-
-enum MESAPI_MISC_OPCODE {
- MESAPI_MISC__MODIFY_REG,
- MESAPI_MISC__MAX,
-};
-
-enum MODIFY_REG_SUBCODE {
- MODIFY_REG__OVERWRITE,
- MODIFY_REG__RMW_OR,
- MODIFY_REG__RMW_AND,
- MODIFY_REG__MAX,
-};
-
-enum { MISC_DATA_MAX_SIZE_IN_DWORDS = 20 };
-
-union MESAPI__MISC {
- struct {
- union MES_API_HEADER header;
- enum MESAPI_MISC_OPCODE opcode;
- struct MES_API_STATUS api_status;
-
- union {
- struct {
- enum MODIFY_REG_SUBCODE subcode;
- uint32_t reg_offset;
- uint32_t reg_value;
- } modify_reg;
- uint32_t data[MISC_DATA_MAX_SIZE_IN_DWORDS];
- };
- };
-
- uint32_t max_dwords_in_api[API_FRAME_SIZE_IN_DWORDS];
-};
-
-#pragma pack(pop)
-#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/mes_userqueue.c b/drivers/gpu/drm/amd/amdgpu/mes_userqueue.c
new file mode 100644
index 000000000000..64cae89357b6
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/mes_userqueue.c
@@ -0,0 +1,501 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright 2024 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+#include <drm/drm_drv.h>
+#include "amdgpu.h"
+#include "amdgpu_gfx.h"
+#include "mes_userqueue.h"
+#include "amdgpu_userq_fence.h"
+
+#define AMDGPU_USERQ_PROC_CTX_SZ PAGE_SIZE
+#define AMDGPU_USERQ_GANG_CTX_SZ PAGE_SIZE
+
+static int
+mes_userq_map_gtt_bo_to_gart(struct amdgpu_bo *bo)
+{
+ int ret;
+
+ ret = amdgpu_bo_reserve(bo, true);
+ if (ret) {
+ DRM_ERROR("Failed to reserve bo. ret %d\n", ret);
+ goto err_reserve_bo_failed;
+ }
+
+ ret = amdgpu_ttm_alloc_gart(&bo->tbo);
+ if (ret) {
+ DRM_ERROR("Failed to bind bo to GART. ret %d\n", ret);
+ goto err_map_bo_gart_failed;
+ }
+
+ amdgpu_bo_unreserve(bo);
+ bo = amdgpu_bo_ref(bo);
+
+ return 0;
+
+err_map_bo_gart_failed:
+ amdgpu_bo_unreserve(bo);
+err_reserve_bo_failed:
+ return ret;
+}
+
+static int
+mes_userq_create_wptr_mapping(struct amdgpu_userq_mgr *uq_mgr,
+ struct amdgpu_usermode_queue *queue,
+ uint64_t wptr)
+{
+ struct amdgpu_bo_va_mapping *wptr_mapping;
+ struct amdgpu_vm *wptr_vm;
+ struct amdgpu_userq_obj *wptr_obj = &queue->wptr_obj;
+ int ret;
+
+ wptr_vm = queue->vm;
+ ret = amdgpu_bo_reserve(wptr_vm->root.bo, false);
+ if (ret)
+ return ret;
+
+ wptr &= AMDGPU_GMC_HOLE_MASK;
+ wptr_mapping = amdgpu_vm_bo_lookup_mapping(wptr_vm, wptr >> PAGE_SHIFT);
+ amdgpu_bo_unreserve(wptr_vm->root.bo);
+ if (!wptr_mapping) {
+ DRM_ERROR("Failed to lookup wptr bo\n");
+ return -EINVAL;
+ }
+
+ wptr_obj->obj = wptr_mapping->bo_va->base.bo;
+ if (wptr_obj->obj->tbo.base.size > PAGE_SIZE) {
+ DRM_ERROR("Requested GART mapping for wptr bo larger than one page\n");
+ return -EINVAL;
+ }
+
+ ret = mes_userq_map_gtt_bo_to_gart(wptr_obj->obj);
+ if (ret) {
+ DRM_ERROR("Failed to map wptr bo to GART\n");
+ return ret;
+ }
+
+ queue->wptr_obj.gpu_addr = amdgpu_bo_gpu_offset_no_check(wptr_obj->obj);
+ return 0;
+}
+
+static int convert_to_mes_priority(int priority)
+{
+ switch (priority) {
+ case AMDGPU_USERQ_CREATE_FLAGS_QUEUE_PRIORITY_NORMAL_LOW:
+ default:
+ return AMDGPU_MES_PRIORITY_LEVEL_NORMAL;
+ case AMDGPU_USERQ_CREATE_FLAGS_QUEUE_PRIORITY_LOW:
+ return AMDGPU_MES_PRIORITY_LEVEL_LOW;
+ case AMDGPU_USERQ_CREATE_FLAGS_QUEUE_PRIORITY_NORMAL_HIGH:
+ return AMDGPU_MES_PRIORITY_LEVEL_MEDIUM;
+ case AMDGPU_USERQ_CREATE_FLAGS_QUEUE_PRIORITY_HIGH:
+ return AMDGPU_MES_PRIORITY_LEVEL_HIGH;
+ }
+}
+
+static int mes_userq_map(struct amdgpu_userq_mgr *uq_mgr,
+ struct amdgpu_usermode_queue *queue)
+{
+ struct amdgpu_device *adev = uq_mgr->adev;
+ struct amdgpu_userq_obj *ctx = &queue->fw_obj;
+ struct amdgpu_mqd_prop *userq_props = queue->userq_prop;
+ struct mes_add_queue_input queue_input;
+ int r;
+
+ memset(&queue_input, 0x0, sizeof(struct mes_add_queue_input));
+
+ queue_input.process_va_start = 0;
+ queue_input.process_va_end = adev->vm_manager.max_pfn - 1;
+
+ /* set process quantum to 10 ms and gang quantum to 1 ms as default */
+ queue_input.process_quantum = 100000;
+ queue_input.gang_quantum = 10000;
+ queue_input.paging = false;
+
+ queue_input.process_context_addr = ctx->gpu_addr;
+ queue_input.gang_context_addr = ctx->gpu_addr + AMDGPU_USERQ_PROC_CTX_SZ;
+ queue_input.inprocess_gang_priority = AMDGPU_MES_PRIORITY_LEVEL_NORMAL;
+ queue_input.gang_global_priority_level = convert_to_mes_priority(queue->priority);
+
+ queue_input.process_id = queue->vm->pasid;
+ queue_input.queue_type = queue->queue_type;
+ queue_input.mqd_addr = queue->mqd.gpu_addr;
+ queue_input.wptr_addr = userq_props->wptr_gpu_addr;
+ queue_input.queue_size = userq_props->queue_size >> 2;
+ queue_input.doorbell_offset = userq_props->doorbell_index;
+ queue_input.page_table_base_addr = amdgpu_gmc_pd_addr(queue->vm->root.bo);
+ queue_input.wptr_mc_addr = queue->wptr_obj.gpu_addr;
+
+ amdgpu_mes_lock(&adev->mes);
+ r = adev->mes.funcs->add_hw_queue(&adev->mes, &queue_input);
+ amdgpu_mes_unlock(&adev->mes);
+ if (r) {
+ DRM_ERROR("Failed to map queue in HW, err (%d)\n", r);
+ return r;
+ }
+
+ DRM_DEBUG_DRIVER("Queue (doorbell:%d) mapped successfully\n", userq_props->doorbell_index);
+ return 0;
+}
+
+static int mes_userq_unmap(struct amdgpu_userq_mgr *uq_mgr,
+ struct amdgpu_usermode_queue *queue)
+{
+ struct amdgpu_device *adev = uq_mgr->adev;
+ struct mes_remove_queue_input queue_input;
+ struct amdgpu_userq_obj *ctx = &queue->fw_obj;
+ int r;
+
+ memset(&queue_input, 0x0, sizeof(struct mes_remove_queue_input));
+ queue_input.doorbell_offset = queue->doorbell_index;
+ queue_input.gang_context_addr = ctx->gpu_addr + AMDGPU_USERQ_PROC_CTX_SZ;
+
+ amdgpu_mes_lock(&adev->mes);
+ r = adev->mes.funcs->remove_hw_queue(&adev->mes, &queue_input);
+ amdgpu_mes_unlock(&adev->mes);
+ if (r)
+ DRM_ERROR("Failed to unmap queue in HW, err (%d)\n", r);
+ return r;
+}
+
+static int mes_userq_create_ctx_space(struct amdgpu_userq_mgr *uq_mgr,
+ struct amdgpu_usermode_queue *queue,
+ struct drm_amdgpu_userq_in *mqd_user)
+{
+ struct amdgpu_userq_obj *ctx = &queue->fw_obj;
+ int r, size;
+
+ /*
+ * The FW expects at least one page space allocated for
+ * process ctx and gang ctx each. Create an object
+ * for the same.
+ */
+ size = AMDGPU_USERQ_PROC_CTX_SZ + AMDGPU_USERQ_GANG_CTX_SZ;
+ r = amdgpu_userq_create_object(uq_mgr, ctx, size);
+ if (r) {
+ DRM_ERROR("Failed to allocate ctx space bo for userqueue, err:%d\n", r);
+ return r;
+ }
+
+ return 0;
+}
+
+static int mes_userq_detect_and_reset(struct amdgpu_device *adev,
+ int queue_type)
+{
+ int db_array_size = amdgpu_mes_get_hung_queue_db_array_size(adev);
+ struct mes_detect_and_reset_queue_input input;
+ struct amdgpu_usermode_queue *queue;
+ unsigned int hung_db_num = 0;
+ unsigned long queue_id;
+ u32 db_array[8];
+ bool found_hung_queue = false;
+ int r, i;
+
+ if (db_array_size > 8) {
+ dev_err(adev->dev, "DB array size (%d vs 8) too small\n",
+ db_array_size);
+ return -EINVAL;
+ }
+
+ memset(&input, 0x0, sizeof(struct mes_detect_and_reset_queue_input));
+
+ input.queue_type = queue_type;
+
+ amdgpu_mes_lock(&adev->mes);
+ r = amdgpu_mes_detect_and_reset_hung_queues(adev, queue_type, false,
+ &hung_db_num, db_array);
+ amdgpu_mes_unlock(&adev->mes);
+ if (r) {
+ dev_err(adev->dev, "Failed to detect and reset queues, err (%d)\n", r);
+ } else if (hung_db_num) {
+ xa_for_each(&adev->userq_doorbell_xa, queue_id, queue) {
+ if (queue->queue_type == queue_type) {
+ for (i = 0; i < hung_db_num; i++) {
+ if (queue->doorbell_index == db_array[i]) {
+ queue->state = AMDGPU_USERQ_STATE_HUNG;
+ found_hung_queue = true;
+ atomic_inc(&adev->gpu_reset_counter);
+ amdgpu_userq_fence_driver_force_completion(queue);
+ drm_dev_wedged_event(adev_to_drm(adev), DRM_WEDGE_RECOVERY_NONE, NULL);
+ }
+ }
+ }
+ }
+ }
+
+ if (found_hung_queue) {
+ /* Resume scheduling after hang recovery */
+ r = amdgpu_mes_resume(adev);
+ }
+
+ return r;
+}
+
+static int mes_userq_mqd_create(struct amdgpu_userq_mgr *uq_mgr,
+ struct drm_amdgpu_userq_in *args_in,
+ struct amdgpu_usermode_queue *queue)
+{
+ struct amdgpu_device *adev = uq_mgr->adev;
+ struct amdgpu_mqd *mqd_hw_default = &adev->mqds[queue->queue_type];
+ struct drm_amdgpu_userq_in *mqd_user = args_in;
+ struct amdgpu_mqd_prop *userq_props;
+ int r;
+
+ /* Structure to initialize MQD for userqueue using generic MQD init function */
+ userq_props = kzalloc(sizeof(struct amdgpu_mqd_prop), GFP_KERNEL);
+ if (!userq_props) {
+ DRM_ERROR("Failed to allocate memory for userq_props\n");
+ return -ENOMEM;
+ }
+
+ r = amdgpu_userq_create_object(uq_mgr, &queue->mqd, mqd_hw_default->mqd_size);
+ if (r) {
+ DRM_ERROR("Failed to create MQD object for userqueue\n");
+ goto free_props;
+ }
+
+ /* Initialize the MQD BO with user given values */
+ userq_props->wptr_gpu_addr = mqd_user->wptr_va;
+ userq_props->rptr_gpu_addr = mqd_user->rptr_va;
+ userq_props->queue_size = mqd_user->queue_size;
+ userq_props->hqd_base_gpu_addr = mqd_user->queue_va;
+ userq_props->mqd_gpu_addr = queue->mqd.gpu_addr;
+ userq_props->use_doorbell = true;
+ userq_props->doorbell_index = queue->doorbell_index;
+ userq_props->fence_address = queue->fence_drv->gpu_addr;
+
+ if (queue->queue_type == AMDGPU_HW_IP_COMPUTE) {
+ struct drm_amdgpu_userq_mqd_compute_gfx11 *compute_mqd;
+
+ if (mqd_user->mqd_size != sizeof(*compute_mqd)) {
+ DRM_ERROR("Invalid compute IP MQD size\n");
+ r = -EINVAL;
+ goto free_mqd;
+ }
+
+ compute_mqd = memdup_user(u64_to_user_ptr(mqd_user->mqd), mqd_user->mqd_size);
+ if (IS_ERR(compute_mqd)) {
+ DRM_ERROR("Failed to read user MQD\n");
+ r = -ENOMEM;
+ goto free_mqd;
+ }
+
+ r = amdgpu_userq_input_va_validate(queue, compute_mqd->eop_va,
+ 2048);
+ if (r)
+ goto free_mqd;
+
+ userq_props->eop_gpu_addr = compute_mqd->eop_va;
+ userq_props->hqd_pipe_priority = AMDGPU_GFX_PIPE_PRIO_NORMAL;
+ userq_props->hqd_queue_priority = AMDGPU_GFX_QUEUE_PRIORITY_MINIMUM;
+ userq_props->hqd_active = false;
+ userq_props->tmz_queue =
+ mqd_user->flags & AMDGPU_USERQ_CREATE_FLAGS_QUEUE_SECURE;
+ kfree(compute_mqd);
+ } else if (queue->queue_type == AMDGPU_HW_IP_GFX) {
+ struct drm_amdgpu_userq_mqd_gfx11 *mqd_gfx_v11;
+ struct amdgpu_gfx_shadow_info shadow_info;
+
+ if (adev->gfx.funcs->get_gfx_shadow_info) {
+ adev->gfx.funcs->get_gfx_shadow_info(adev, &shadow_info, true);
+ } else {
+ r = -EINVAL;
+ goto free_mqd;
+ }
+
+ if (mqd_user->mqd_size != sizeof(*mqd_gfx_v11) || !mqd_user->mqd) {
+ DRM_ERROR("Invalid GFX MQD\n");
+ r = -EINVAL;
+ goto free_mqd;
+ }
+
+ mqd_gfx_v11 = memdup_user(u64_to_user_ptr(mqd_user->mqd), mqd_user->mqd_size);
+ if (IS_ERR(mqd_gfx_v11)) {
+ DRM_ERROR("Failed to read user MQD\n");
+ r = -ENOMEM;
+ goto free_mqd;
+ }
+
+ userq_props->shadow_addr = mqd_gfx_v11->shadow_va;
+ userq_props->csa_addr = mqd_gfx_v11->csa_va;
+ userq_props->tmz_queue =
+ mqd_user->flags & AMDGPU_USERQ_CREATE_FLAGS_QUEUE_SECURE;
+
+ r = amdgpu_userq_input_va_validate(queue, mqd_gfx_v11->shadow_va,
+ shadow_info.shadow_size);
+ if (r)
+ goto free_mqd;
+ r = amdgpu_userq_input_va_validate(queue, mqd_gfx_v11->csa_va,
+ shadow_info.csa_size);
+ if (r)
+ goto free_mqd;
+
+ kfree(mqd_gfx_v11);
+ } else if (queue->queue_type == AMDGPU_HW_IP_DMA) {
+ struct drm_amdgpu_userq_mqd_sdma_gfx11 *mqd_sdma_v11;
+
+ if (mqd_user->mqd_size != sizeof(*mqd_sdma_v11) || !mqd_user->mqd) {
+ DRM_ERROR("Invalid SDMA MQD\n");
+ r = -EINVAL;
+ goto free_mqd;
+ }
+
+ mqd_sdma_v11 = memdup_user(u64_to_user_ptr(mqd_user->mqd), mqd_user->mqd_size);
+ if (IS_ERR(mqd_sdma_v11)) {
+ DRM_ERROR("Failed to read sdma user MQD\n");
+ r = -ENOMEM;
+ goto free_mqd;
+ }
+ r = amdgpu_userq_input_va_validate(queue, mqd_sdma_v11->csa_va,
+ 32);
+ if (r)
+ goto free_mqd;
+
+ userq_props->csa_addr = mqd_sdma_v11->csa_va;
+ kfree(mqd_sdma_v11);
+ }
+
+ queue->userq_prop = userq_props;
+
+ r = mqd_hw_default->init_mqd(adev, (void *)queue->mqd.cpu_ptr, userq_props);
+ if (r) {
+ DRM_ERROR("Failed to initialize MQD for userqueue\n");
+ goto free_mqd;
+ }
+
+ /* Create BO for FW operations */
+ r = mes_userq_create_ctx_space(uq_mgr, queue, mqd_user);
+ if (r) {
+ DRM_ERROR("Failed to allocate BO for userqueue (%d)", r);
+ goto free_mqd;
+ }
+
+ /* FW expects WPTR BOs to be mapped into GART */
+ r = mes_userq_create_wptr_mapping(uq_mgr, queue, userq_props->wptr_gpu_addr);
+ if (r) {
+ DRM_ERROR("Failed to create WPTR mapping\n");
+ goto free_ctx;
+ }
+
+ return 0;
+
+free_ctx:
+ amdgpu_userq_destroy_object(uq_mgr, &queue->fw_obj);
+
+free_mqd:
+ amdgpu_userq_destroy_object(uq_mgr, &queue->mqd);
+
+free_props:
+ kfree(userq_props);
+
+ return r;
+}
+
+static void
+mes_userq_mqd_destroy(struct amdgpu_userq_mgr *uq_mgr,
+ struct amdgpu_usermode_queue *queue)
+{
+ amdgpu_userq_destroy_object(uq_mgr, &queue->fw_obj);
+ kfree(queue->userq_prop);
+ amdgpu_userq_destroy_object(uq_mgr, &queue->mqd);
+}
+
+static int mes_userq_preempt(struct amdgpu_userq_mgr *uq_mgr,
+ struct amdgpu_usermode_queue *queue)
+{
+ struct amdgpu_device *adev = uq_mgr->adev;
+ struct mes_suspend_gang_input queue_input;
+ struct amdgpu_userq_obj *ctx = &queue->fw_obj;
+ signed long timeout = 2100000; /* 2100 ms */
+ u64 fence_gpu_addr;
+ u32 fence_offset;
+ u64 *fence_ptr;
+ int i, r;
+
+ if (queue->state != AMDGPU_USERQ_STATE_MAPPED)
+ return 0;
+ r = amdgpu_device_wb_get(adev, &fence_offset);
+ if (r)
+ return r;
+
+ fence_gpu_addr = adev->wb.gpu_addr + (fence_offset * 4);
+ fence_ptr = (u64 *)&adev->wb.wb[fence_offset];
+ *fence_ptr = 0;
+
+ memset(&queue_input, 0x0, sizeof(struct mes_suspend_gang_input));
+ queue_input.gang_context_addr = ctx->gpu_addr + AMDGPU_USERQ_PROC_CTX_SZ;
+ queue_input.suspend_fence_addr = fence_gpu_addr;
+ queue_input.suspend_fence_value = 1;
+ amdgpu_mes_lock(&adev->mes);
+ r = adev->mes.funcs->suspend_gang(&adev->mes, &queue_input);
+ amdgpu_mes_unlock(&adev->mes);
+ if (r) {
+ DRM_ERROR("Failed to suspend gang: %d\n", r);
+ goto out;
+ }
+
+ for (i = 0; i < timeout; i++) {
+ if (*fence_ptr == 1)
+ goto out;
+ udelay(1);
+ }
+ r = -ETIMEDOUT;
+
+out:
+ amdgpu_device_wb_free(adev, fence_offset);
+ return r;
+}
+
+static int mes_userq_restore(struct amdgpu_userq_mgr *uq_mgr,
+ struct amdgpu_usermode_queue *queue)
+{
+ struct amdgpu_device *adev = uq_mgr->adev;
+ struct mes_resume_gang_input queue_input;
+ struct amdgpu_userq_obj *ctx = &queue->fw_obj;
+ int r;
+
+ if (queue->state == AMDGPU_USERQ_STATE_HUNG)
+ return -EINVAL;
+ if (queue->state != AMDGPU_USERQ_STATE_PREEMPTED)
+ return 0;
+
+ memset(&queue_input, 0x0, sizeof(struct mes_resume_gang_input));
+ queue_input.gang_context_addr = ctx->gpu_addr + AMDGPU_USERQ_PROC_CTX_SZ;
+
+ amdgpu_mes_lock(&adev->mes);
+ r = adev->mes.funcs->resume_gang(&adev->mes, &queue_input);
+ amdgpu_mes_unlock(&adev->mes);
+ if (r)
+ dev_err(adev->dev, "Failed to resume queue, err (%d)\n", r);
+ return r;
+}
+
+const struct amdgpu_userq_funcs userq_mes_funcs = {
+ .mqd_create = mes_userq_mqd_create,
+ .mqd_destroy = mes_userq_mqd_destroy,
+ .unmap = mes_userq_unmap,
+ .map = mes_userq_map,
+ .detect_and_reset = mes_userq_detect_and_reset,
+ .preempt = mes_userq_preempt,
+ .restore = mes_userq_restore,
+};
diff --git a/drivers/gpu/drm/amd/amdgpu/mes_userqueue.h b/drivers/gpu/drm/amd/amdgpu/mes_userqueue.h
new file mode 100644
index 000000000000..090ae8897770
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/mes_userqueue.h
@@ -0,0 +1,30 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright 2024 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#ifndef MES_USERQ_H
+#define MES_USERQ_H
+#include "amdgpu_userq.h"
+
+extern const struct amdgpu_userq_funcs userq_mes_funcs;
+#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/mes_v10_1.c b/drivers/gpu/drm/amd/amdgpu/mes_v10_1.c
deleted file mode 100644
index 7f30629f21a2..000000000000
--- a/drivers/gpu/drm/amd/amdgpu/mes_v10_1.c
+++ /dev/null
@@ -1,1007 +0,0 @@
-/*
- * Copyright 2019 Advanced Micro Devices, Inc.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in
- * all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
- * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
- * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
- * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
- * OTHER DEALINGS IN THE SOFTWARE.
- *
- */
-
-#include <linux/firmware.h>
-#include <linux/module.h>
-#include "amdgpu.h"
-#include "soc15_common.h"
-#include "nv.h"
-#include "gc/gc_10_1_0_offset.h"
-#include "gc/gc_10_1_0_sh_mask.h"
-#include "v10_structs.h"
-#include "mes_api_def.h"
-
-#define mmCP_MES_IC_OP_CNTL_Sienna_Cichlid 0x2820
-#define mmCP_MES_IC_OP_CNTL_Sienna_Cichlid_BASE_IDX 1
-
-MODULE_FIRMWARE("amdgpu/navi10_mes.bin");
-MODULE_FIRMWARE("amdgpu/sienna_cichlid_mes.bin");
-
-static int mes_v10_1_hw_fini(void *handle);
-
-#define MES_EOP_SIZE 2048
-
-static void mes_v10_1_ring_set_wptr(struct amdgpu_ring *ring)
-{
- struct amdgpu_device *adev = ring->adev;
-
- if (ring->use_doorbell) {
- atomic64_set((atomic64_t *)&adev->wb.wb[ring->wptr_offs],
- ring->wptr);
- WDOORBELL64(ring->doorbell_index, ring->wptr);
- } else {
- BUG();
- }
-}
-
-static u64 mes_v10_1_ring_get_rptr(struct amdgpu_ring *ring)
-{
- return ring->adev->wb.wb[ring->rptr_offs];
-}
-
-static u64 mes_v10_1_ring_get_wptr(struct amdgpu_ring *ring)
-{
- u64 wptr;
-
- if (ring->use_doorbell)
- wptr = atomic64_read((atomic64_t *)
- &ring->adev->wb.wb[ring->wptr_offs]);
- else
- BUG();
- return wptr;
-}
-
-static const struct amdgpu_ring_funcs mes_v10_1_ring_funcs = {
- .type = AMDGPU_RING_TYPE_MES,
- .align_mask = 1,
- .nop = 0,
- .support_64bit_ptrs = true,
- .get_rptr = mes_v10_1_ring_get_rptr,
- .get_wptr = mes_v10_1_ring_get_wptr,
- .set_wptr = mes_v10_1_ring_set_wptr,
- .insert_nop = amdgpu_ring_insert_nop,
-};
-
-static int mes_v10_1_submit_pkt_and_poll_completion(struct amdgpu_mes *mes,
- void *pkt, int size)
-{
- int ndw = size / 4;
- signed long r;
- union MESAPI__ADD_QUEUE *x_pkt = pkt;
- struct amdgpu_device *adev = mes->adev;
- struct amdgpu_ring *ring = &mes->ring;
-
- BUG_ON(size % 4 != 0);
-
- if (amdgpu_ring_alloc(ring, ndw))
- return -ENOMEM;
-
- amdgpu_ring_write_multiple(ring, pkt, ndw);
- amdgpu_ring_commit(ring);
-
- DRM_DEBUG("MES msg=%d was emitted\n", x_pkt->header.opcode);
-
- r = amdgpu_fence_wait_polling(ring, ring->fence_drv.sync_seq,
- adev->usec_timeout);
- if (r < 1) {
- DRM_ERROR("MES failed to response msg=%d\n",
- x_pkt->header.opcode);
- return -ETIMEDOUT;
- }
-
- return 0;
-}
-
-static int convert_to_mes_queue_type(int queue_type)
-{
- if (queue_type == AMDGPU_RING_TYPE_GFX)
- return MES_QUEUE_TYPE_GFX;
- else if (queue_type == AMDGPU_RING_TYPE_COMPUTE)
- return MES_QUEUE_TYPE_COMPUTE;
- else if (queue_type == AMDGPU_RING_TYPE_SDMA)
- return MES_QUEUE_TYPE_SDMA;
- else
- BUG();
- return -1;
-}
-
-static int mes_v10_1_add_hw_queue(struct amdgpu_mes *mes,
- struct mes_add_queue_input *input)
-{
- struct amdgpu_device *adev = mes->adev;
- union MESAPI__ADD_QUEUE mes_add_queue_pkt;
-
- memset(&mes_add_queue_pkt, 0, sizeof(mes_add_queue_pkt));
-
- mes_add_queue_pkt.header.type = MES_API_TYPE_SCHEDULER;
- mes_add_queue_pkt.header.opcode = MES_SCH_API_ADD_QUEUE;
- mes_add_queue_pkt.header.dwsize = API_FRAME_SIZE_IN_DWORDS;
-
- mes_add_queue_pkt.process_id = input->process_id;
- mes_add_queue_pkt.page_table_base_addr =
- input->page_table_base_addr - adev->gmc.vram_start;
- mes_add_queue_pkt.process_va_start = input->process_va_start;
- mes_add_queue_pkt.process_va_end = input->process_va_end;
- mes_add_queue_pkt.process_quantum = input->process_quantum;
- mes_add_queue_pkt.process_context_addr = input->process_context_addr;
- mes_add_queue_pkt.gang_quantum = input->gang_quantum;
- mes_add_queue_pkt.gang_context_addr = input->gang_context_addr;
- mes_add_queue_pkt.inprocess_gang_priority =
- input->inprocess_gang_priority;
- mes_add_queue_pkt.gang_global_priority_level =
- input->gang_global_priority_level;
- mes_add_queue_pkt.doorbell_offset = input->doorbell_offset;
- mes_add_queue_pkt.mqd_addr = input->mqd_addr;
- mes_add_queue_pkt.wptr_addr = input->wptr_addr;
- mes_add_queue_pkt.queue_type =
- convert_to_mes_queue_type(input->queue_type);
- mes_add_queue_pkt.paging = input->paging;
-
- mes_add_queue_pkt.api_status.api_completion_fence_addr =
- mes->ring.fence_drv.gpu_addr;
- mes_add_queue_pkt.api_status.api_completion_fence_value =
- ++mes->ring.fence_drv.sync_seq;
-
- return mes_v10_1_submit_pkt_and_poll_completion(mes,
- &mes_add_queue_pkt, sizeof(mes_add_queue_pkt));
-}
-
-static int mes_v10_1_remove_hw_queue(struct amdgpu_mes *mes,
- struct mes_remove_queue_input *input)
-{
- union MESAPI__REMOVE_QUEUE mes_remove_queue_pkt;
-
- memset(&mes_remove_queue_pkt, 0, sizeof(mes_remove_queue_pkt));
-
- mes_remove_queue_pkt.header.type = MES_API_TYPE_SCHEDULER;
- mes_remove_queue_pkt.header.opcode = MES_SCH_API_REMOVE_QUEUE;
- mes_remove_queue_pkt.header.dwsize = API_FRAME_SIZE_IN_DWORDS;
-
- mes_remove_queue_pkt.doorbell_offset = input->doorbell_offset;
- mes_remove_queue_pkt.gang_context_addr = input->gang_context_addr;
-
- mes_remove_queue_pkt.api_status.api_completion_fence_addr =
- mes->ring.fence_drv.gpu_addr;
- mes_remove_queue_pkt.api_status.api_completion_fence_value =
- ++mes->ring.fence_drv.sync_seq;
-
- return mes_v10_1_submit_pkt_and_poll_completion(mes,
- &mes_remove_queue_pkt, sizeof(mes_remove_queue_pkt));
-}
-
-static int mes_v10_1_suspend_gang(struct amdgpu_mes *mes,
- struct mes_suspend_gang_input *input)
-{
- return 0;
-}
-
-static int mes_v10_1_resume_gang(struct amdgpu_mes *mes,
- struct mes_resume_gang_input *input)
-{
- return 0;
-}
-
-static int mes_v10_1_query_sched_status(struct amdgpu_mes *mes)
-{
- union MESAPI__QUERY_MES_STATUS mes_status_pkt;
-
- memset(&mes_status_pkt, 0, sizeof(mes_status_pkt));
-
- mes_status_pkt.header.type = MES_API_TYPE_SCHEDULER;
- mes_status_pkt.header.opcode = MES_SCH_API_QUERY_SCHEDULER_STATUS;
- mes_status_pkt.header.dwsize = API_FRAME_SIZE_IN_DWORDS;
-
- mes_status_pkt.api_status.api_completion_fence_addr =
- mes->ring.fence_drv.gpu_addr;
- mes_status_pkt.api_status.api_completion_fence_value =
- ++mes->ring.fence_drv.sync_seq;
-
- return mes_v10_1_submit_pkt_and_poll_completion(mes,
- &mes_status_pkt, sizeof(mes_status_pkt));
-}
-
-static int mes_v10_1_set_hw_resources(struct amdgpu_mes *mes)
-{
- int i;
- struct amdgpu_device *adev = mes->adev;
- union MESAPI_SET_HW_RESOURCES mes_set_hw_res_pkt;
-
- memset(&mes_set_hw_res_pkt, 0, sizeof(mes_set_hw_res_pkt));
-
- mes_set_hw_res_pkt.header.type = MES_API_TYPE_SCHEDULER;
- mes_set_hw_res_pkt.header.opcode = MES_SCH_API_SET_HW_RSRC;
- mes_set_hw_res_pkt.header.dwsize = API_FRAME_SIZE_IN_DWORDS;
-
- mes_set_hw_res_pkt.vmid_mask_mmhub = mes->vmid_mask_mmhub;
- mes_set_hw_res_pkt.vmid_mask_gfxhub = mes->vmid_mask_gfxhub;
- mes_set_hw_res_pkt.gds_size = adev->gds.gds_size;
- mes_set_hw_res_pkt.paging_vmid = 0;
- mes_set_hw_res_pkt.g_sch_ctx_gpu_mc_ptr = mes->sch_ctx_gpu_addr;
- mes_set_hw_res_pkt.query_status_fence_gpu_mc_ptr =
- mes->query_status_fence_gpu_addr;
-
- for (i = 0; i < MAX_COMPUTE_PIPES; i++)
- mes_set_hw_res_pkt.compute_hqd_mask[i] =
- mes->compute_hqd_mask[i];
-
- for (i = 0; i < MAX_GFX_PIPES; i++)
- mes_set_hw_res_pkt.gfx_hqd_mask[i] = mes->gfx_hqd_mask[i];
-
- for (i = 0; i < MAX_SDMA_PIPES; i++)
- mes_set_hw_res_pkt.sdma_hqd_mask[i] = mes->sdma_hqd_mask[i];
-
- for (i = 0; i < AMD_PRIORITY_NUM_LEVELS; i++)
- mes_set_hw_res_pkt.agreegated_doorbells[i] =
- mes->agreegated_doorbells[i];
-
- mes_set_hw_res_pkt.api_status.api_completion_fence_addr =
- mes->ring.fence_drv.gpu_addr;
- mes_set_hw_res_pkt.api_status.api_completion_fence_value =
- ++mes->ring.fence_drv.sync_seq;
-
- return mes_v10_1_submit_pkt_and_poll_completion(mes,
- &mes_set_hw_res_pkt, sizeof(mes_set_hw_res_pkt));
-}
-
-static const struct amdgpu_mes_funcs mes_v10_1_funcs = {
- .add_hw_queue = mes_v10_1_add_hw_queue,
- .remove_hw_queue = mes_v10_1_remove_hw_queue,
- .suspend_gang = mes_v10_1_suspend_gang,
- .resume_gang = mes_v10_1_resume_gang,
-};
-
-static int mes_v10_1_init_microcode(struct amdgpu_device *adev)
-{
- const char *chip_name;
- char fw_name[30];
- int err;
- const struct mes_firmware_header_v1_0 *mes_hdr;
- struct amdgpu_firmware_info *info;
-
- switch (adev->asic_type) {
- case CHIP_NAVI10:
- chip_name = "navi10";
- break;
- case CHIP_SIENNA_CICHLID:
- chip_name = "sienna_cichlid";
- break;
- default:
- BUG();
- }
-
- snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mes.bin", chip_name);
- err = request_firmware(&adev->mes.fw, fw_name, adev->dev);
- if (err)
- return err;
-
- err = amdgpu_ucode_validate(adev->mes.fw);
- if (err) {
- release_firmware(adev->mes.fw);
- adev->mes.fw = NULL;
- return err;
- }
-
- mes_hdr = (const struct mes_firmware_header_v1_0 *)adev->mes.fw->data;
- adev->mes.ucode_fw_version = le32_to_cpu(mes_hdr->mes_ucode_version);
- adev->mes.ucode_fw_version =
- le32_to_cpu(mes_hdr->mes_ucode_data_version);
- adev->mes.uc_start_addr =
- le32_to_cpu(mes_hdr->mes_uc_start_addr_lo) |
- ((uint64_t)(le32_to_cpu(mes_hdr->mes_uc_start_addr_hi)) << 32);
- adev->mes.data_start_addr =
- le32_to_cpu(mes_hdr->mes_data_start_addr_lo) |
- ((uint64_t)(le32_to_cpu(mes_hdr->mes_data_start_addr_hi)) << 32);
-
- if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
- info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MES];
- info->ucode_id = AMDGPU_UCODE_ID_CP_MES;
- info->fw = adev->mes.fw;
- adev->firmware.fw_size +=
- ALIGN(le32_to_cpu(mes_hdr->mes_ucode_size_bytes),
- PAGE_SIZE);
-
- info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MES_DATA];
- info->ucode_id = AMDGPU_UCODE_ID_CP_MES_DATA;
- info->fw = adev->mes.fw;
- adev->firmware.fw_size +=
- ALIGN(le32_to_cpu(mes_hdr->mes_ucode_data_size_bytes),
- PAGE_SIZE);
- }
-
- return 0;
-}
-
-static void mes_v10_1_free_microcode(struct amdgpu_device *adev)
-{
- release_firmware(adev->mes.fw);
- adev->mes.fw = NULL;
-}
-
-static int mes_v10_1_allocate_ucode_buffer(struct amdgpu_device *adev)
-{
- int r;
- const struct mes_firmware_header_v1_0 *mes_hdr;
- const __le32 *fw_data;
- unsigned fw_size;
-
- mes_hdr = (const struct mes_firmware_header_v1_0 *)
- adev->mes.fw->data;
-
- fw_data = (const __le32 *)(adev->mes.fw->data +
- le32_to_cpu(mes_hdr->mes_ucode_offset_bytes));
- fw_size = le32_to_cpu(mes_hdr->mes_ucode_size_bytes);
-
- r = amdgpu_bo_create_reserved(adev, fw_size,
- PAGE_SIZE, AMDGPU_GEM_DOMAIN_GTT,
- &adev->mes.ucode_fw_obj,
- &adev->mes.ucode_fw_gpu_addr,
- (void **)&adev->mes.ucode_fw_ptr);
- if (r) {
- dev_err(adev->dev, "(%d) failed to create mes fw bo\n", r);
- return r;
- }
-
- memcpy(adev->mes.ucode_fw_ptr, fw_data, fw_size);
-
- amdgpu_bo_kunmap(adev->mes.ucode_fw_obj);
- amdgpu_bo_unreserve(adev->mes.ucode_fw_obj);
-
- return 0;
-}
-
-static int mes_v10_1_allocate_ucode_data_buffer(struct amdgpu_device *adev)
-{
- int r;
- const struct mes_firmware_header_v1_0 *mes_hdr;
- const __le32 *fw_data;
- unsigned fw_size;
-
- mes_hdr = (const struct mes_firmware_header_v1_0 *)
- adev->mes.fw->data;
-
- fw_data = (const __le32 *)(adev->mes.fw->data +
- le32_to_cpu(mes_hdr->mes_ucode_data_offset_bytes));
- fw_size = le32_to_cpu(mes_hdr->mes_ucode_data_size_bytes);
-
- r = amdgpu_bo_create_reserved(adev, fw_size,
- 64 * 1024, AMDGPU_GEM_DOMAIN_GTT,
- &adev->mes.data_fw_obj,
- &adev->mes.data_fw_gpu_addr,
- (void **)&adev->mes.data_fw_ptr);
- if (r) {
- dev_err(adev->dev, "(%d) failed to create mes data fw bo\n", r);
- return r;
- }
-
- memcpy(adev->mes.data_fw_ptr, fw_data, fw_size);
-
- amdgpu_bo_kunmap(adev->mes.data_fw_obj);
- amdgpu_bo_unreserve(adev->mes.data_fw_obj);
-
- return 0;
-}
-
-static void mes_v10_1_free_ucode_buffers(struct amdgpu_device *adev)
-{
- amdgpu_bo_free_kernel(&adev->mes.data_fw_obj,
- &adev->mes.data_fw_gpu_addr,
- (void **)&adev->mes.data_fw_ptr);
-
- amdgpu_bo_free_kernel(&adev->mes.ucode_fw_obj,
- &adev->mes.ucode_fw_gpu_addr,
- (void **)&adev->mes.ucode_fw_ptr);
-}
-
-static void mes_v10_1_enable(struct amdgpu_device *adev, bool enable)
-{
- uint32_t data = 0;
-
- if (enable) {
- data = RREG32_SOC15(GC, 0, mmCP_MES_CNTL);
- data = REG_SET_FIELD(data, CP_MES_CNTL, MES_PIPE0_RESET, 1);
- WREG32_SOC15(GC, 0, mmCP_MES_CNTL, data);
-
- /* set ucode start address */
- WREG32_SOC15(GC, 0, mmCP_MES_PRGRM_CNTR_START,
- (uint32_t)(adev->mes.uc_start_addr) >> 2);
-
- /* clear BYPASS_UNCACHED to avoid hangs after interrupt. */
- data = RREG32_SOC15(GC, 0, mmCP_MES_DC_OP_CNTL);
- data = REG_SET_FIELD(data, CP_MES_DC_OP_CNTL,
- BYPASS_UNCACHED, 0);
- WREG32_SOC15(GC, 0, mmCP_MES_DC_OP_CNTL, data);
-
- /* unhalt MES and activate pipe0 */
- data = REG_SET_FIELD(0, CP_MES_CNTL, MES_PIPE0_ACTIVE, 1);
- WREG32_SOC15(GC, 0, mmCP_MES_CNTL, data);
- } else {
- data = RREG32_SOC15(GC, 0, mmCP_MES_CNTL);
- data = REG_SET_FIELD(data, CP_MES_CNTL, MES_PIPE0_ACTIVE, 0);
- data = REG_SET_FIELD(data, CP_MES_CNTL,
- MES_INVALIDATE_ICACHE, 1);
- data = REG_SET_FIELD(data, CP_MES_CNTL, MES_PIPE0_RESET, 1);
- data = REG_SET_FIELD(data, CP_MES_CNTL, MES_HALT, 1);
- WREG32_SOC15(GC, 0, mmCP_MES_CNTL, data);
- }
-}
-
-/* This function is for backdoor MES firmware */
-static int mes_v10_1_load_microcode(struct amdgpu_device *adev)
-{
- int r;
- uint32_t data;
-
- if (!adev->mes.fw)
- return -EINVAL;
-
- r = mes_v10_1_allocate_ucode_buffer(adev);
- if (r)
- return r;
-
- r = mes_v10_1_allocate_ucode_data_buffer(adev);
- if (r) {
- mes_v10_1_free_ucode_buffers(adev);
- return r;
- }
-
- mes_v10_1_enable(adev, false);
-
- WREG32_SOC15(GC, 0, mmCP_MES_IC_BASE_CNTL, 0);
-
- mutex_lock(&adev->srbm_mutex);
- /* me=3, pipe=0, queue=0 */
- nv_grbm_select(adev, 3, 0, 0, 0);
-
- /* set ucode start address */
- WREG32_SOC15(GC, 0, mmCP_MES_PRGRM_CNTR_START,
- (uint32_t)(adev->mes.uc_start_addr) >> 2);
-
- /* set ucode fimrware address */
- WREG32_SOC15(GC, 0, mmCP_MES_IC_BASE_LO,
- lower_32_bits(adev->mes.ucode_fw_gpu_addr));
- WREG32_SOC15(GC, 0, mmCP_MES_IC_BASE_HI,
- upper_32_bits(adev->mes.ucode_fw_gpu_addr));
-
- /* set ucode instruction cache boundary to 2M-1 */
- WREG32_SOC15(GC, 0, mmCP_MES_MIBOUND_LO, 0x1FFFFF);
-
- /* set ucode data firmware address */
- WREG32_SOC15(GC, 0, mmCP_MES_MDBASE_LO,
- lower_32_bits(adev->mes.data_fw_gpu_addr));
- WREG32_SOC15(GC, 0, mmCP_MES_MDBASE_HI,
- upper_32_bits(adev->mes.data_fw_gpu_addr));
-
- /* Set 0x3FFFF (256K-1) to CP_MES_MDBOUND_LO */
- WREG32_SOC15(GC, 0, mmCP_MES_MDBOUND_LO, 0x3FFFF);
-
- /* invalidate ICACHE */
- switch (adev->asic_type) {
- case CHIP_SIENNA_CICHLID:
- data = RREG32_SOC15(GC, 0, mmCP_MES_IC_OP_CNTL_Sienna_Cichlid);
- break;
- default:
- data = RREG32_SOC15(GC, 0, mmCP_MES_IC_OP_CNTL);
- break;
- }
- data = REG_SET_FIELD(data, CP_MES_IC_OP_CNTL, PRIME_ICACHE, 0);
- data = REG_SET_FIELD(data, CP_MES_IC_OP_CNTL, INVALIDATE_CACHE, 1);
- switch (adev->asic_type) {
- case CHIP_SIENNA_CICHLID:
- WREG32_SOC15(GC, 0, mmCP_MES_IC_OP_CNTL_Sienna_Cichlid, data);
- break;
- default:
- WREG32_SOC15(GC, 0, mmCP_MES_IC_OP_CNTL, data);
- break;
- }
-
- /* prime the ICACHE. */
- switch (adev->asic_type) {
- case CHIP_SIENNA_CICHLID:
- data = RREG32_SOC15(GC, 0, mmCP_MES_IC_OP_CNTL_Sienna_Cichlid);
- break;
- default:
- data = RREG32_SOC15(GC, 0, mmCP_MES_IC_OP_CNTL);
- break;
- }
- data = REG_SET_FIELD(data, CP_MES_IC_OP_CNTL, PRIME_ICACHE, 1);
- switch (adev->asic_type) {
- case CHIP_SIENNA_CICHLID:
- WREG32_SOC15(GC, 0, mmCP_MES_IC_OP_CNTL_Sienna_Cichlid, data);
- break;
- default:
- WREG32_SOC15(GC, 0, mmCP_MES_IC_OP_CNTL, data);
- break;
- }
-
- nv_grbm_select(adev, 0, 0, 0, 0);
- mutex_unlock(&adev->srbm_mutex);
-
- return 0;
-}
-
-static int mes_v10_1_allocate_eop_buf(struct amdgpu_device *adev)
-{
- int r;
- u32 *eop;
-
- r = amdgpu_bo_create_reserved(adev, MES_EOP_SIZE, PAGE_SIZE,
- AMDGPU_GEM_DOMAIN_GTT,
- &adev->mes.eop_gpu_obj,
- &adev->mes.eop_gpu_addr,
- (void **)&eop);
- if (r) {
- dev_warn(adev->dev, "(%d) create EOP bo failed\n", r);
- return r;
- }
-
- memset(eop, 0, adev->mes.eop_gpu_obj->tbo.base.size);
-
- amdgpu_bo_kunmap(adev->mes.eop_gpu_obj);
- amdgpu_bo_unreserve(adev->mes.eop_gpu_obj);
-
- return 0;
-}
-
-static int mes_v10_1_allocate_mem_slots(struct amdgpu_device *adev)
-{
- int r;
-
- r = amdgpu_device_wb_get(adev, &adev->mes.sch_ctx_offs);
- if (r) {
- dev_err(adev->dev,
- "(%d) mes sch_ctx_offs wb alloc failed\n", r);
- return r;
- }
- adev->mes.sch_ctx_gpu_addr =
- adev->wb.gpu_addr + (adev->mes.sch_ctx_offs * 4);
- adev->mes.sch_ctx_ptr =
- (uint64_t *)&adev->wb.wb[adev->mes.sch_ctx_offs];
-
- r = amdgpu_device_wb_get(adev, &adev->mes.query_status_fence_offs);
- if (r) {
- dev_err(adev->dev,
- "(%d) query_status_fence_offs wb alloc failed\n", r);
- return r;
- }
- adev->mes.query_status_fence_gpu_addr =
- adev->wb.gpu_addr + (adev->mes.query_status_fence_offs * 4);
- adev->mes.query_status_fence_ptr =
- (uint64_t *)&adev->wb.wb[adev->mes.query_status_fence_offs];
-
- return 0;
-}
-
-static int mes_v10_1_mqd_init(struct amdgpu_ring *ring)
-{
- struct amdgpu_device *adev = ring->adev;
- struct v10_compute_mqd *mqd = ring->mqd_ptr;
- uint64_t hqd_gpu_addr, wb_gpu_addr, eop_base_addr;
- uint32_t tmp;
-
- mqd->header = 0xC0310800;
- mqd->compute_pipelinestat_enable = 0x00000001;
- mqd->compute_static_thread_mgmt_se0 = 0xffffffff;
- mqd->compute_static_thread_mgmt_se1 = 0xffffffff;
- mqd->compute_static_thread_mgmt_se2 = 0xffffffff;
- mqd->compute_static_thread_mgmt_se3 = 0xffffffff;
- mqd->compute_misc_reserved = 0x00000003;
-
- eop_base_addr = ring->eop_gpu_addr >> 8;
- mqd->cp_hqd_eop_base_addr_lo = eop_base_addr;
- mqd->cp_hqd_eop_base_addr_hi = upper_32_bits(eop_base_addr);
-
- /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
- tmp = RREG32_SOC15(GC, 0, mmCP_HQD_EOP_CONTROL);
- tmp = REG_SET_FIELD(tmp, CP_HQD_EOP_CONTROL, EOP_SIZE,
- (order_base_2(MES_EOP_SIZE / 4) - 1));
-
- mqd->cp_hqd_eop_control = tmp;
-
- /* enable doorbell? */
- tmp = RREG32_SOC15(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL);
-
- if (ring->use_doorbell) {
- tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
- DOORBELL_OFFSET, ring->doorbell_index);
- tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
- DOORBELL_EN, 1);
- tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
- DOORBELL_SOURCE, 0);
- tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
- DOORBELL_HIT, 0);
- }
- else
- tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
- DOORBELL_EN, 0);
-
- mqd->cp_hqd_pq_doorbell_control = tmp;
-
- /* disable the queue if it's active */
- ring->wptr = 0;
- mqd->cp_hqd_dequeue_request = 0;
- mqd->cp_hqd_pq_rptr = 0;
- mqd->cp_hqd_pq_wptr_lo = 0;
- mqd->cp_hqd_pq_wptr_hi = 0;
-
- /* set the pointer to the MQD */
- mqd->cp_mqd_base_addr_lo = ring->mqd_gpu_addr & 0xfffffffc;
- mqd->cp_mqd_base_addr_hi = upper_32_bits(ring->mqd_gpu_addr);
-
- /* set MQD vmid to 0 */
- tmp = RREG32_SOC15(GC, 0, mmCP_MQD_CONTROL);
- tmp = REG_SET_FIELD(tmp, CP_MQD_CONTROL, VMID, 0);
- mqd->cp_mqd_control = tmp;
-
- /* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
- hqd_gpu_addr = ring->gpu_addr >> 8;
- mqd->cp_hqd_pq_base_lo = hqd_gpu_addr;
- mqd->cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr);
-
- /* set up the HQD, this is similar to CP_RB0_CNTL */
- tmp = RREG32_SOC15(GC, 0, mmCP_HQD_PQ_CONTROL);
- tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, QUEUE_SIZE,
- (order_base_2(ring->ring_size / 4) - 1));
- tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, RPTR_BLOCK_SIZE,
- ((order_base_2(AMDGPU_GPU_PAGE_SIZE / 4) - 1) << 8));
-#ifdef __BIG_ENDIAN
- tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ENDIAN_SWAP, 1);
-#endif
- tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, UNORD_DISPATCH, 0);
- tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, TUNNEL_DISPATCH, 0);
- tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, PRIV_STATE, 1);
- tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, KMD_QUEUE, 1);
- mqd->cp_hqd_pq_control = tmp;
-
- /* set the wb address whether it's enabled or not */
- wb_gpu_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4);
- mqd->cp_hqd_pq_rptr_report_addr_lo = wb_gpu_addr & 0xfffffffc;
- mqd->cp_hqd_pq_rptr_report_addr_hi =
- upper_32_bits(wb_gpu_addr) & 0xffff;
-
- /* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */
- wb_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
- mqd->cp_hqd_pq_wptr_poll_addr_lo = wb_gpu_addr & 0xfffffff8;
- mqd->cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff;
-
- tmp = 0;
- /* enable the doorbell if requested */
- if (ring->use_doorbell) {
- tmp = RREG32_SOC15(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL);
- tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
- DOORBELL_OFFSET, ring->doorbell_index);
-
- tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
- DOORBELL_EN, 1);
- tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
- DOORBELL_SOURCE, 0);
- tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
- DOORBELL_HIT, 0);
- }
-
- mqd->cp_hqd_pq_doorbell_control = tmp;
-
- /* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */
- ring->wptr = 0;
- mqd->cp_hqd_pq_rptr = RREG32_SOC15(GC, 0, mmCP_HQD_PQ_RPTR);
-
- /* set the vmid for the queue */
- mqd->cp_hqd_vmid = 0;
-
- tmp = RREG32_SOC15(GC, 0, mmCP_HQD_PERSISTENT_STATE);
- tmp = REG_SET_FIELD(tmp, CP_HQD_PERSISTENT_STATE, PRELOAD_SIZE, 0x53);
- mqd->cp_hqd_persistent_state = tmp;
-
- /* set MIN_IB_AVAIL_SIZE */
- tmp = RREG32_SOC15(GC, 0, mmCP_HQD_IB_CONTROL);
- tmp = REG_SET_FIELD(tmp, CP_HQD_IB_CONTROL, MIN_IB_AVAIL_SIZE, 3);
- mqd->cp_hqd_ib_control = tmp;
-
- /* activate the queue */
- mqd->cp_hqd_active = 1;
- return 0;
-}
-
-static void mes_v10_1_queue_init_register(struct amdgpu_ring *ring)
-{
- struct v10_compute_mqd *mqd = ring->mqd_ptr;
- struct amdgpu_device *adev = ring->adev;
- uint32_t data = 0;
-
- mutex_lock(&adev->srbm_mutex);
- nv_grbm_select(adev, 3, 0, 0, 0);
-
- /* set CP_HQD_VMID.VMID = 0. */
- data = RREG32_SOC15(GC, 0, mmCP_HQD_VMID);
- data = REG_SET_FIELD(data, CP_HQD_VMID, VMID, 0);
- WREG32_SOC15(GC, 0, mmCP_HQD_VMID, data);
-
- /* set CP_HQD_PQ_DOORBELL_CONTROL.DOORBELL_EN=0 */
- data = RREG32_SOC15(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL);
- data = REG_SET_FIELD(data, CP_HQD_PQ_DOORBELL_CONTROL,
- DOORBELL_EN, 0);
- WREG32_SOC15(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL, data);
-
- /* set CP_MQD_BASE_ADDR/HI with the MQD base address */
- WREG32_SOC15(GC, 0, mmCP_MQD_BASE_ADDR, mqd->cp_mqd_base_addr_lo);
- WREG32_SOC15(GC, 0, mmCP_MQD_BASE_ADDR_HI, mqd->cp_mqd_base_addr_hi);
-
- /* set CP_MQD_CONTROL.VMID=0 */
- data = RREG32_SOC15(GC, 0, mmCP_MQD_CONTROL);
- data = REG_SET_FIELD(data, CP_MQD_CONTROL, VMID, 0);
- WREG32_SOC15(GC, 0, mmCP_MQD_CONTROL, 0);
-
- /* set CP_HQD_PQ_BASE/HI with the ring buffer base address */
- WREG32_SOC15(GC, 0, mmCP_HQD_PQ_BASE, mqd->cp_hqd_pq_base_lo);
- WREG32_SOC15(GC, 0, mmCP_HQD_PQ_BASE_HI, mqd->cp_hqd_pq_base_hi);
-
- /* set CP_HQD_PQ_RPTR_REPORT_ADDR/HI */
- WREG32_SOC15(GC, 0, mmCP_HQD_PQ_RPTR_REPORT_ADDR,
- mqd->cp_hqd_pq_rptr_report_addr_lo);
- WREG32_SOC15(GC, 0, mmCP_HQD_PQ_RPTR_REPORT_ADDR_HI,
- mqd->cp_hqd_pq_rptr_report_addr_hi);
-
- /* set CP_HQD_PQ_CONTROL */
- WREG32_SOC15(GC, 0, mmCP_HQD_PQ_CONTROL, mqd->cp_hqd_pq_control);
-
- /* set CP_HQD_PQ_WPTR_POLL_ADDR/HI */
- WREG32_SOC15(GC, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR,
- mqd->cp_hqd_pq_wptr_poll_addr_lo);
- WREG32_SOC15(GC, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR_HI,
- mqd->cp_hqd_pq_wptr_poll_addr_hi);
-
- /* set CP_HQD_PQ_DOORBELL_CONTROL */
- WREG32_SOC15(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL,
- mqd->cp_hqd_pq_doorbell_control);
-
- /* set CP_HQD_PERSISTENT_STATE.PRELOAD_SIZE=0x53 */
- WREG32_SOC15(GC, 0, mmCP_HQD_PERSISTENT_STATE, mqd->cp_hqd_persistent_state);
-
- /* set CP_HQD_ACTIVE.ACTIVE=1 */
- WREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE, mqd->cp_hqd_active);
-
- nv_grbm_select(adev, 0, 0, 0, 0);
- mutex_unlock(&adev->srbm_mutex);
-}
-
-#if 0
-static int mes_v10_1_kiq_enable_queue(struct amdgpu_device *adev)
-{
- struct amdgpu_kiq *kiq = &adev->gfx.kiq;
- struct amdgpu_ring *kiq_ring = &adev->gfx.kiq.ring;
- int r;
-
- if (!kiq->pmf || !kiq->pmf->kiq_map_queues)
- return -EINVAL;
-
- r = amdgpu_ring_alloc(kiq_ring, kiq->pmf->map_queues_size);
- if (r) {
- DRM_ERROR("Failed to lock KIQ (%d).\n", r);
- return r;
- }
-
- kiq->pmf->kiq_map_queues(kiq_ring, &adev->mes.ring);
-
- r = amdgpu_ring_test_ring(kiq_ring);
- if (r) {
- DRM_ERROR("kfq enable failed\n");
- kiq_ring->sched.ready = false;
- }
- return r;
-}
-#endif
-
-static int mes_v10_1_queue_init(struct amdgpu_device *adev)
-{
- int r;
-
- r = mes_v10_1_mqd_init(&adev->mes.ring);
- if (r)
- return r;
-
-#if 0
- r = mes_v10_1_kiq_enable_queue(adev);
- if (r)
- return r;
-#else
- mes_v10_1_queue_init_register(&adev->mes.ring);
-#endif
-
- return 0;
-}
-
-static int mes_v10_1_ring_init(struct amdgpu_device *adev)
-{
- struct amdgpu_ring *ring;
-
- ring = &adev->mes.ring;
-
- ring->funcs = &mes_v10_1_ring_funcs;
-
- ring->me = 3;
- ring->pipe = 0;
- ring->queue = 0;
-
- ring->ring_obj = NULL;
- ring->use_doorbell = true;
- ring->doorbell_index = adev->doorbell_index.mes_ring << 1;
- ring->eop_gpu_addr = adev->mes.eop_gpu_addr;
- ring->no_scheduler = true;
- sprintf(ring->name, "mes_%d.%d.%d", ring->me, ring->pipe, ring->queue);
-
- return amdgpu_ring_init(adev, ring, 1024, NULL, 0, AMDGPU_RING_PRIO_DEFAULT);
-}
-
-static int mes_v10_1_mqd_sw_init(struct amdgpu_device *adev)
-{
- int r, mqd_size = sizeof(struct v10_compute_mqd);
- struct amdgpu_ring *ring = &adev->mes.ring;
-
- if (ring->mqd_obj)
- return 0;
-
- r = amdgpu_bo_create_kernel(adev, mqd_size, PAGE_SIZE,
- AMDGPU_GEM_DOMAIN_GTT, &ring->mqd_obj,
- &ring->mqd_gpu_addr, &ring->mqd_ptr);
- if (r) {
- dev_warn(adev->dev, "failed to create ring mqd bo (%d)", r);
- return r;
- }
-
- /* prepare MQD backup */
- adev->mes.mqd_backup = kmalloc(mqd_size, GFP_KERNEL);
- if (!adev->mes.mqd_backup)
- dev_warn(adev->dev,
- "no memory to create MQD backup for ring %s\n",
- ring->name);
-
- return 0;
-}
-
-static int mes_v10_1_sw_init(void *handle)
-{
- int r;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
-
- adev->mes.adev = adev;
- adev->mes.funcs = &mes_v10_1_funcs;
-
- r = mes_v10_1_init_microcode(adev);
- if (r)
- return r;
-
- r = mes_v10_1_allocate_eop_buf(adev);
- if (r)
- return r;
-
- r = mes_v10_1_mqd_sw_init(adev);
- if (r)
- return r;
-
- r = mes_v10_1_ring_init(adev);
- if (r)
- return r;
-
- r = mes_v10_1_allocate_mem_slots(adev);
- if (r)
- return r;
-
- return 0;
-}
-
-static int mes_v10_1_sw_fini(void *handle)
-{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
-
- amdgpu_device_wb_free(adev, adev->mes.sch_ctx_offs);
- amdgpu_device_wb_free(adev, adev->mes.query_status_fence_offs);
-
- kfree(adev->mes.mqd_backup);
-
- amdgpu_bo_free_kernel(&adev->mes.ring.mqd_obj,
- &adev->mes.ring.mqd_gpu_addr,
- &adev->mes.ring.mqd_ptr);
-
- amdgpu_bo_free_kernel(&adev->mes.eop_gpu_obj,
- &adev->mes.eop_gpu_addr,
- NULL);
-
- mes_v10_1_free_microcode(adev);
-
- return 0;
-}
-
-static int mes_v10_1_hw_init(void *handle)
-{
- int r;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
-
- if (adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT) {
- r = mes_v10_1_load_microcode(adev);
- if (r) {
- DRM_ERROR("failed to MES fw, r=%d\n", r);
- return r;
- }
- }
-
- mes_v10_1_enable(adev, true);
-
- r = mes_v10_1_queue_init(adev);
- if (r)
- goto failure;
-
- r = mes_v10_1_set_hw_resources(&adev->mes);
- if (r)
- goto failure;
-
- r = mes_v10_1_query_sched_status(&adev->mes);
- if (r) {
- DRM_ERROR("MES is busy\n");
- goto failure;
- }
-
- return 0;
-
-failure:
- mes_v10_1_hw_fini(adev);
- return r;
-}
-
-static int mes_v10_1_hw_fini(void *handle)
-{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
-
- mes_v10_1_enable(adev, false);
-
- if (adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT)
- mes_v10_1_free_ucode_buffers(adev);
-
- return 0;
-}
-
-static int mes_v10_1_suspend(void *handle)
-{
- return 0;
-}
-
-static int mes_v10_1_resume(void *handle)
-{
- return 0;
-}
-
-static const struct amd_ip_funcs mes_v10_1_ip_funcs = {
- .name = "mes_v10_1",
- .sw_init = mes_v10_1_sw_init,
- .sw_fini = mes_v10_1_sw_fini,
- .hw_init = mes_v10_1_hw_init,
- .hw_fini = mes_v10_1_hw_fini,
- .suspend = mes_v10_1_suspend,
- .resume = mes_v10_1_resume,
-};
-
-const struct amdgpu_ip_block_version mes_v10_1_ip_block = {
- .type = AMD_IP_BLOCK_TYPE_MES,
- .major = 10,
- .minor = 1,
- .rev = 0,
- .funcs = &mes_v10_1_ip_funcs,
-};
diff --git a/drivers/gpu/drm/amd/amdgpu/mes_v10_1.h b/drivers/gpu/drm/amd/amdgpu/mes_v10_1.h
deleted file mode 100644
index 9afd6ddb01e9..000000000000
--- a/drivers/gpu/drm/amd/amdgpu/mes_v10_1.h
+++ /dev/null
@@ -1,29 +0,0 @@
-/*
- * Copyright 2019 Advanced Micro Devices, Inc.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in
- * all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
- * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
- * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
- * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
- * OTHER DEALINGS IN THE SOFTWARE.
- *
- */
-
-#ifndef __MES_V10_1_H__
-#define __MES_V10_1_H__
-
-extern const struct amdgpu_ip_block_version mes_v10_1_ip_block;
-
-#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c b/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c
new file mode 100644
index 000000000000..3a52754b5cad
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c
@@ -0,0 +1,1760 @@
+/*
+ * Copyright 2019 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#include <linux/firmware.h>
+#include <linux/module.h>
+#include "amdgpu.h"
+#include "soc15_common.h"
+#include "soc21.h"
+#include "gfx_v11_0.h"
+#include "gc/gc_11_0_0_offset.h"
+#include "gc/gc_11_0_0_sh_mask.h"
+#include "gc/gc_11_0_0_default.h"
+#include "v11_structs.h"
+#include "mes_v11_api_def.h"
+
+MODULE_FIRMWARE("amdgpu/gc_11_0_0_mes.bin");
+MODULE_FIRMWARE("amdgpu/gc_11_0_0_mes_2.bin");
+MODULE_FIRMWARE("amdgpu/gc_11_0_0_mes1.bin");
+MODULE_FIRMWARE("amdgpu/gc_11_0_1_mes.bin");
+MODULE_FIRMWARE("amdgpu/gc_11_0_1_mes_2.bin");
+MODULE_FIRMWARE("amdgpu/gc_11_0_1_mes1.bin");
+MODULE_FIRMWARE("amdgpu/gc_11_0_2_mes.bin");
+MODULE_FIRMWARE("amdgpu/gc_11_0_2_mes_2.bin");
+MODULE_FIRMWARE("amdgpu/gc_11_0_2_mes1.bin");
+MODULE_FIRMWARE("amdgpu/gc_11_0_3_mes.bin");
+MODULE_FIRMWARE("amdgpu/gc_11_0_3_mes_2.bin");
+MODULE_FIRMWARE("amdgpu/gc_11_0_3_mes1.bin");
+MODULE_FIRMWARE("amdgpu/gc_11_0_4_mes.bin");
+MODULE_FIRMWARE("amdgpu/gc_11_0_4_mes_2.bin");
+MODULE_FIRMWARE("amdgpu/gc_11_0_4_mes1.bin");
+MODULE_FIRMWARE("amdgpu/gc_11_5_0_mes_2.bin");
+MODULE_FIRMWARE("amdgpu/gc_11_5_0_mes1.bin");
+MODULE_FIRMWARE("amdgpu/gc_11_5_1_mes_2.bin");
+MODULE_FIRMWARE("amdgpu/gc_11_5_1_mes1.bin");
+MODULE_FIRMWARE("amdgpu/gc_11_5_2_mes_2.bin");
+MODULE_FIRMWARE("amdgpu/gc_11_5_2_mes1.bin");
+MODULE_FIRMWARE("amdgpu/gc_11_5_3_mes_2.bin");
+MODULE_FIRMWARE("amdgpu/gc_11_5_3_mes1.bin");
+
+static int mes_v11_0_hw_init(struct amdgpu_ip_block *ip_block);
+static int mes_v11_0_hw_fini(struct amdgpu_ip_block *ip_block);
+static int mes_v11_0_kiq_hw_init(struct amdgpu_device *adev);
+static int mes_v11_0_kiq_hw_fini(struct amdgpu_device *adev);
+
+#define MES_EOP_SIZE 2048
+#define GFX_MES_DRAM_SIZE 0x80000
+#define MES11_HW_RESOURCE_1_SIZE (128 * AMDGPU_GPU_PAGE_SIZE)
+
+#define MES11_HUNG_DB_OFFSET_ARRAY_SIZE 8 /* [0:3] = db offset, [4:7] = hqd info */
+#define MES11_HUNG_HQD_INFO_OFFSET 4
+
+static void mes_v11_0_ring_set_wptr(struct amdgpu_ring *ring)
+{
+ struct amdgpu_device *adev = ring->adev;
+
+ if (ring->use_doorbell) {
+ atomic64_set((atomic64_t *)ring->wptr_cpu_addr,
+ ring->wptr);
+ WDOORBELL64(ring->doorbell_index, ring->wptr);
+ } else {
+ BUG();
+ }
+}
+
+static u64 mes_v11_0_ring_get_rptr(struct amdgpu_ring *ring)
+{
+ return *ring->rptr_cpu_addr;
+}
+
+static u64 mes_v11_0_ring_get_wptr(struct amdgpu_ring *ring)
+{
+ u64 wptr;
+
+ if (ring->use_doorbell)
+ wptr = atomic64_read((atomic64_t *)ring->wptr_cpu_addr);
+ else
+ BUG();
+ return wptr;
+}
+
+static const struct amdgpu_ring_funcs mes_v11_0_ring_funcs = {
+ .type = AMDGPU_RING_TYPE_MES,
+ .align_mask = 1,
+ .nop = 0,
+ .support_64bit_ptrs = true,
+ .get_rptr = mes_v11_0_ring_get_rptr,
+ .get_wptr = mes_v11_0_ring_get_wptr,
+ .set_wptr = mes_v11_0_ring_set_wptr,
+ .insert_nop = amdgpu_ring_insert_nop,
+};
+
+static const char *mes_v11_0_opcodes[] = {
+ "SET_HW_RSRC",
+ "SET_SCHEDULING_CONFIG",
+ "ADD_QUEUE",
+ "REMOVE_QUEUE",
+ "PERFORM_YIELD",
+ "SET_GANG_PRIORITY_LEVEL",
+ "SUSPEND",
+ "RESUME",
+ "RESET",
+ "SET_LOG_BUFFER",
+ "CHANGE_GANG_PRORITY",
+ "QUERY_SCHEDULER_STATUS",
+ "PROGRAM_GDS",
+ "SET_DEBUG_VMID",
+ "MISC",
+ "UPDATE_ROOT_PAGE_TABLE",
+ "AMD_LOG",
+ "unused",
+ "unused",
+ "SET_HW_RSRC_1",
+};
+
+static const char *mes_v11_0_misc_opcodes[] = {
+ "WRITE_REG",
+ "INV_GART",
+ "QUERY_STATUS",
+ "READ_REG",
+ "WAIT_REG_MEM",
+ "SET_SHADER_DEBUGGER",
+};
+
+static const char *mes_v11_0_get_op_string(union MESAPI__MISC *x_pkt)
+{
+ const char *op_str = NULL;
+
+ if (x_pkt->header.opcode < ARRAY_SIZE(mes_v11_0_opcodes))
+ op_str = mes_v11_0_opcodes[x_pkt->header.opcode];
+
+ return op_str;
+}
+
+static const char *mes_v11_0_get_misc_op_string(union MESAPI__MISC *x_pkt)
+{
+ const char *op_str = NULL;
+
+ if ((x_pkt->header.opcode == MES_SCH_API_MISC) &&
+ (x_pkt->opcode < ARRAY_SIZE(mes_v11_0_misc_opcodes)))
+ op_str = mes_v11_0_misc_opcodes[x_pkt->opcode];
+
+ return op_str;
+}
+
+static int mes_v11_0_submit_pkt_and_poll_completion(struct amdgpu_mes *mes,
+ void *pkt, int size,
+ int api_status_off)
+{
+ union MESAPI__QUERY_MES_STATUS mes_status_pkt;
+ signed long timeout = 2100000; /* 2100 ms */
+ struct amdgpu_device *adev = mes->adev;
+ struct amdgpu_ring *ring = &mes->ring[0];
+ struct MES_API_STATUS *api_status;
+ union MESAPI__MISC *x_pkt = pkt;
+ const char *op_str, *misc_op_str;
+ unsigned long flags;
+ u64 status_gpu_addr;
+ u32 seq, status_offset;
+ u64 *status_ptr;
+ signed long r;
+ int ret;
+
+ if (x_pkt->header.opcode >= MES_SCH_API_MAX)
+ return -EINVAL;
+
+ if (amdgpu_emu_mode) {
+ timeout *= 100;
+ } else if (amdgpu_sriov_vf(adev)) {
+ /* Worst case in sriov where all other 15 VF timeout, each VF needs about 600ms */
+ timeout = 15 * 600 * 1000;
+ }
+
+ ret = amdgpu_device_wb_get(adev, &status_offset);
+ if (ret)
+ return ret;
+
+ status_gpu_addr = adev->wb.gpu_addr + (status_offset * 4);
+ status_ptr = (u64 *)&adev->wb.wb[status_offset];
+ *status_ptr = 0;
+
+ spin_lock_irqsave(&mes->ring_lock[0], flags);
+ r = amdgpu_ring_alloc(ring, (size + sizeof(mes_status_pkt)) / 4);
+ if (r)
+ goto error_unlock_free;
+
+ seq = ++ring->fence_drv.sync_seq;
+ r = amdgpu_fence_wait_polling(ring,
+ seq - ring->fence_drv.num_fences_mask,
+ timeout);
+ if (r < 1)
+ goto error_undo;
+
+ api_status = (struct MES_API_STATUS *)((char *)pkt + api_status_off);
+ api_status->api_completion_fence_addr = status_gpu_addr;
+ api_status->api_completion_fence_value = 1;
+
+ amdgpu_ring_write_multiple(ring, pkt, size / 4);
+
+ memset(&mes_status_pkt, 0, sizeof(mes_status_pkt));
+ mes_status_pkt.header.type = MES_API_TYPE_SCHEDULER;
+ mes_status_pkt.header.opcode = MES_SCH_API_QUERY_SCHEDULER_STATUS;
+ mes_status_pkt.header.dwsize = API_FRAME_SIZE_IN_DWORDS;
+ mes_status_pkt.api_status.api_completion_fence_addr =
+ ring->fence_drv.gpu_addr;
+ mes_status_pkt.api_status.api_completion_fence_value = seq;
+
+ amdgpu_ring_write_multiple(ring, &mes_status_pkt,
+ sizeof(mes_status_pkt) / 4);
+
+ amdgpu_ring_commit(ring);
+ spin_unlock_irqrestore(&mes->ring_lock[0], flags);
+
+ op_str = mes_v11_0_get_op_string(x_pkt);
+ misc_op_str = mes_v11_0_get_misc_op_string(x_pkt);
+
+ if (misc_op_str)
+ dev_dbg(adev->dev, "MES msg=%s (%s) was emitted\n", op_str,
+ misc_op_str);
+ else if (op_str)
+ dev_dbg(adev->dev, "MES msg=%s was emitted\n", op_str);
+ else
+ dev_dbg(adev->dev, "MES msg=%d was emitted\n",
+ x_pkt->header.opcode);
+
+ r = amdgpu_fence_wait_polling(ring, seq, timeout);
+ if (r < 1 || !*status_ptr) {
+
+ if (misc_op_str)
+ dev_err(adev->dev, "MES failed to respond to msg=%s (%s)\n",
+ op_str, misc_op_str);
+ else if (op_str)
+ dev_err(adev->dev, "MES failed to respond to msg=%s\n",
+ op_str);
+ else
+ dev_err(adev->dev, "MES failed to respond to msg=%d\n",
+ x_pkt->header.opcode);
+
+ while (halt_if_hws_hang)
+ schedule();
+
+ r = -ETIMEDOUT;
+ goto error_wb_free;
+ }
+
+ amdgpu_device_wb_free(adev, status_offset);
+ return 0;
+
+error_undo:
+ dev_err(adev->dev, "MES ring buffer is full.\n");
+ amdgpu_ring_undo(ring);
+
+error_unlock_free:
+ spin_unlock_irqrestore(&mes->ring_lock[0], flags);
+
+error_wb_free:
+ amdgpu_device_wb_free(adev, status_offset);
+ return r;
+}
+
+static int convert_to_mes_queue_type(int queue_type)
+{
+ if (queue_type == AMDGPU_RING_TYPE_GFX)
+ return MES_QUEUE_TYPE_GFX;
+ else if (queue_type == AMDGPU_RING_TYPE_COMPUTE)
+ return MES_QUEUE_TYPE_COMPUTE;
+ else if (queue_type == AMDGPU_RING_TYPE_SDMA)
+ return MES_QUEUE_TYPE_SDMA;
+ else
+ BUG();
+ return -1;
+}
+
+static int convert_to_mes_priority_level(int priority_level)
+{
+ switch (priority_level) {
+ case AMDGPU_MES_PRIORITY_LEVEL_LOW:
+ return AMD_PRIORITY_LEVEL_LOW;
+ case AMDGPU_MES_PRIORITY_LEVEL_NORMAL:
+ default:
+ return AMD_PRIORITY_LEVEL_NORMAL;
+ case AMDGPU_MES_PRIORITY_LEVEL_MEDIUM:
+ return AMD_PRIORITY_LEVEL_MEDIUM;
+ case AMDGPU_MES_PRIORITY_LEVEL_HIGH:
+ return AMD_PRIORITY_LEVEL_HIGH;
+ case AMDGPU_MES_PRIORITY_LEVEL_REALTIME:
+ return AMD_PRIORITY_LEVEL_REALTIME;
+ }
+}
+
+static int mes_v11_0_add_hw_queue(struct amdgpu_mes *mes,
+ struct mes_add_queue_input *input)
+{
+ struct amdgpu_device *adev = mes->adev;
+ union MESAPI__ADD_QUEUE mes_add_queue_pkt;
+ struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_GFXHUB(0)];
+ uint32_t vm_cntx_cntl = hub->vm_cntx_cntl;
+
+ memset(&mes_add_queue_pkt, 0, sizeof(mes_add_queue_pkt));
+
+ mes_add_queue_pkt.header.type = MES_API_TYPE_SCHEDULER;
+ mes_add_queue_pkt.header.opcode = MES_SCH_API_ADD_QUEUE;
+ mes_add_queue_pkt.header.dwsize = API_FRAME_SIZE_IN_DWORDS;
+
+ mes_add_queue_pkt.process_id = input->process_id;
+ mes_add_queue_pkt.page_table_base_addr = input->page_table_base_addr;
+ mes_add_queue_pkt.process_va_start = input->process_va_start;
+ mes_add_queue_pkt.process_va_end = input->process_va_end;
+ mes_add_queue_pkt.process_quantum = input->process_quantum;
+ mes_add_queue_pkt.process_context_addr = input->process_context_addr;
+ mes_add_queue_pkt.gang_quantum = input->gang_quantum;
+ mes_add_queue_pkt.gang_context_addr = input->gang_context_addr;
+ mes_add_queue_pkt.inprocess_gang_priority =
+ convert_to_mes_priority_level(input->inprocess_gang_priority);
+ mes_add_queue_pkt.gang_global_priority_level =
+ convert_to_mes_priority_level(input->gang_global_priority_level);
+ mes_add_queue_pkt.doorbell_offset = input->doorbell_offset;
+ mes_add_queue_pkt.mqd_addr = input->mqd_addr;
+
+ if (((adev->mes.sched_version & AMDGPU_MES_API_VERSION_MASK) >>
+ AMDGPU_MES_API_VERSION_SHIFT) >= 2)
+ mes_add_queue_pkt.wptr_addr = input->wptr_mc_addr;
+ else
+ mes_add_queue_pkt.wptr_addr = input->wptr_addr;
+
+ mes_add_queue_pkt.queue_type =
+ convert_to_mes_queue_type(input->queue_type);
+ mes_add_queue_pkt.paging = input->paging;
+ mes_add_queue_pkt.vm_context_cntl = vm_cntx_cntl;
+ mes_add_queue_pkt.gws_base = input->gws_base;
+ mes_add_queue_pkt.gws_size = input->gws_size;
+ mes_add_queue_pkt.trap_handler_addr = input->tba_addr;
+ mes_add_queue_pkt.tma_addr = input->tma_addr;
+ mes_add_queue_pkt.trap_en = input->trap_en;
+ mes_add_queue_pkt.skip_process_ctx_clear = input->skip_process_ctx_clear;
+ mes_add_queue_pkt.is_kfd_process = input->is_kfd_process;
+
+ /* For KFD, gds_size is re-used for queue size (needed in MES for AQL queues) */
+ mes_add_queue_pkt.is_aql_queue = input->is_aql_queue;
+ mes_add_queue_pkt.gds_size = input->queue_size;
+
+ mes_add_queue_pkt.exclusively_scheduled = input->exclusively_scheduled;
+
+ return mes_v11_0_submit_pkt_and_poll_completion(mes,
+ &mes_add_queue_pkt, sizeof(mes_add_queue_pkt),
+ offsetof(union MESAPI__ADD_QUEUE, api_status));
+}
+
+static int mes_v11_0_remove_hw_queue(struct amdgpu_mes *mes,
+ struct mes_remove_queue_input *input)
+{
+ union MESAPI__REMOVE_QUEUE mes_remove_queue_pkt;
+ uint32_t mes_rev = mes->sched_version & AMDGPU_MES_VERSION_MASK;
+
+ memset(&mes_remove_queue_pkt, 0, sizeof(mes_remove_queue_pkt));
+
+ mes_remove_queue_pkt.header.type = MES_API_TYPE_SCHEDULER;
+ mes_remove_queue_pkt.header.opcode = MES_SCH_API_REMOVE_QUEUE;
+ mes_remove_queue_pkt.header.dwsize = API_FRAME_SIZE_IN_DWORDS;
+
+ mes_remove_queue_pkt.doorbell_offset = input->doorbell_offset;
+ mes_remove_queue_pkt.gang_context_addr = input->gang_context_addr;
+
+ if (mes_rev >= 0x60)
+ mes_remove_queue_pkt.remove_queue_after_reset = input->remove_queue_after_reset;
+
+ return mes_v11_0_submit_pkt_and_poll_completion(mes,
+ &mes_remove_queue_pkt, sizeof(mes_remove_queue_pkt),
+ offsetof(union MESAPI__REMOVE_QUEUE, api_status));
+}
+
+static int mes_v11_0_reset_queue_mmio(struct amdgpu_mes *mes, uint32_t queue_type,
+ uint32_t me_id, uint32_t pipe_id,
+ uint32_t queue_id, uint32_t vmid)
+{
+ struct amdgpu_device *adev = mes->adev;
+ uint32_t value, reg;
+ int i, r = 0;
+
+ amdgpu_gfx_rlc_enter_safe_mode(adev, 0);
+
+ if (queue_type == AMDGPU_RING_TYPE_GFX) {
+ dev_info(adev->dev, "reset gfx queue (%d:%d:%d: vmid:%d)\n",
+ me_id, pipe_id, queue_id, vmid);
+
+ mutex_lock(&adev->gfx.reset_sem_mutex);
+ gfx_v11_0_request_gfx_index_mutex(adev, true);
+ /* all se allow writes */
+ WREG32_SOC15(GC, 0, regGRBM_GFX_INDEX,
+ (uint32_t)(0x1 << GRBM_GFX_INDEX__SE_BROADCAST_WRITES__SHIFT));
+ value = REG_SET_FIELD(0, CP_VMID_RESET, RESET_REQUEST, 1 << vmid);
+ if (pipe_id == 0)
+ value = REG_SET_FIELD(value, CP_VMID_RESET, PIPE0_QUEUES, 1 << queue_id);
+ else
+ value = REG_SET_FIELD(value, CP_VMID_RESET, PIPE1_QUEUES, 1 << queue_id);
+ WREG32_SOC15(GC, 0, regCP_VMID_RESET, value);
+ gfx_v11_0_request_gfx_index_mutex(adev, false);
+ mutex_unlock(&adev->gfx.reset_sem_mutex);
+
+ mutex_lock(&adev->srbm_mutex);
+ soc21_grbm_select(adev, me_id, pipe_id, queue_id, 0);
+ /* wait till dequeue take effects */
+ for (i = 0; i < adev->usec_timeout; i++) {
+ if (!(RREG32_SOC15(GC, 0, regCP_GFX_HQD_ACTIVE) & 1))
+ break;
+ udelay(1);
+ }
+ if (i >= adev->usec_timeout) {
+ dev_err(adev->dev, "failed to wait on gfx hqd deactivate\n");
+ r = -ETIMEDOUT;
+ }
+
+ soc21_grbm_select(adev, 0, 0, 0, 0);
+ mutex_unlock(&adev->srbm_mutex);
+ } else if (queue_type == AMDGPU_RING_TYPE_COMPUTE) {
+ dev_info(adev->dev, "reset compute queue (%d:%d:%d)\n",
+ me_id, pipe_id, queue_id);
+ mutex_lock(&adev->srbm_mutex);
+ soc21_grbm_select(adev, me_id, pipe_id, queue_id, 0);
+ WREG32_SOC15(GC, 0, regCP_HQD_DEQUEUE_REQUEST, 0x2);
+ WREG32_SOC15(GC, 0, regSPI_COMPUTE_QUEUE_RESET, 0x1);
+
+ /* wait till dequeue take effects */
+ for (i = 0; i < adev->usec_timeout; i++) {
+ if (!(RREG32_SOC15(GC, 0, regCP_HQD_ACTIVE) & 1))
+ break;
+ udelay(1);
+ }
+ if (i >= adev->usec_timeout) {
+ dev_err(adev->dev, "failed to wait on hqd deactivate\n");
+ r = -ETIMEDOUT;
+ }
+ soc21_grbm_select(adev, 0, 0, 0, 0);
+ mutex_unlock(&adev->srbm_mutex);
+ } else if (queue_type == AMDGPU_RING_TYPE_SDMA) {
+ dev_info(adev->dev, "reset sdma queue (%d:%d:%d)\n",
+ me_id, pipe_id, queue_id);
+ switch (me_id) {
+ case 1:
+ reg = SOC15_REG_OFFSET(GC, 0, regSDMA1_QUEUE_RESET_REQ);
+ break;
+ case 0:
+ default:
+ reg = SOC15_REG_OFFSET(GC, 0, regSDMA0_QUEUE_RESET_REQ);
+ break;
+ }
+
+ value = 1 << queue_id;
+ WREG32(reg, value);
+ /* wait for queue reset done */
+ for (i = 0; i < adev->usec_timeout; i++) {
+ if (!(RREG32(reg) & value))
+ break;
+ udelay(1);
+ }
+ if (i >= adev->usec_timeout) {
+ dev_err(adev->dev, "failed to wait on sdma queue reset done\n");
+ r = -ETIMEDOUT;
+ }
+ }
+
+ amdgpu_gfx_rlc_exit_safe_mode(adev, 0);
+ return r;
+}
+
+static int mes_v11_0_map_legacy_queue(struct amdgpu_mes *mes,
+ struct mes_map_legacy_queue_input *input)
+{
+ union MESAPI__ADD_QUEUE mes_add_queue_pkt;
+
+ memset(&mes_add_queue_pkt, 0, sizeof(mes_add_queue_pkt));
+
+ mes_add_queue_pkt.header.type = MES_API_TYPE_SCHEDULER;
+ mes_add_queue_pkt.header.opcode = MES_SCH_API_ADD_QUEUE;
+ mes_add_queue_pkt.header.dwsize = API_FRAME_SIZE_IN_DWORDS;
+
+ mes_add_queue_pkt.pipe_id = input->pipe_id;
+ mes_add_queue_pkt.queue_id = input->queue_id;
+ mes_add_queue_pkt.doorbell_offset = input->doorbell_offset;
+ mes_add_queue_pkt.mqd_addr = input->mqd_addr;
+ mes_add_queue_pkt.wptr_addr = input->wptr_addr;
+ mes_add_queue_pkt.queue_type =
+ convert_to_mes_queue_type(input->queue_type);
+ mes_add_queue_pkt.map_legacy_kq = 1;
+
+ return mes_v11_0_submit_pkt_and_poll_completion(mes,
+ &mes_add_queue_pkt, sizeof(mes_add_queue_pkt),
+ offsetof(union MESAPI__ADD_QUEUE, api_status));
+}
+
+static int mes_v11_0_unmap_legacy_queue(struct amdgpu_mes *mes,
+ struct mes_unmap_legacy_queue_input *input)
+{
+ union MESAPI__REMOVE_QUEUE mes_remove_queue_pkt;
+
+ memset(&mes_remove_queue_pkt, 0, sizeof(mes_remove_queue_pkt));
+
+ mes_remove_queue_pkt.header.type = MES_API_TYPE_SCHEDULER;
+ mes_remove_queue_pkt.header.opcode = MES_SCH_API_REMOVE_QUEUE;
+ mes_remove_queue_pkt.header.dwsize = API_FRAME_SIZE_IN_DWORDS;
+
+ mes_remove_queue_pkt.doorbell_offset = input->doorbell_offset;
+ mes_remove_queue_pkt.gang_context_addr = 0;
+
+ mes_remove_queue_pkt.pipe_id = input->pipe_id;
+ mes_remove_queue_pkt.queue_id = input->queue_id;
+
+ if (input->action == PREEMPT_QUEUES_NO_UNMAP) {
+ mes_remove_queue_pkt.preempt_legacy_gfx_queue = 1;
+ mes_remove_queue_pkt.tf_addr = input->trail_fence_addr;
+ mes_remove_queue_pkt.tf_data =
+ lower_32_bits(input->trail_fence_data);
+ } else {
+ mes_remove_queue_pkt.unmap_legacy_queue = 1;
+ mes_remove_queue_pkt.queue_type =
+ convert_to_mes_queue_type(input->queue_type);
+ }
+
+ return mes_v11_0_submit_pkt_and_poll_completion(mes,
+ &mes_remove_queue_pkt, sizeof(mes_remove_queue_pkt),
+ offsetof(union MESAPI__REMOVE_QUEUE, api_status));
+}
+
+static int mes_v11_0_suspend_gang(struct amdgpu_mes *mes,
+ struct mes_suspend_gang_input *input)
+{
+ union MESAPI__SUSPEND mes_suspend_gang_pkt;
+
+ memset(&mes_suspend_gang_pkt, 0, sizeof(mes_suspend_gang_pkt));
+
+ mes_suspend_gang_pkt.header.type = MES_API_TYPE_SCHEDULER;
+ mes_suspend_gang_pkt.header.opcode = MES_SCH_API_SUSPEND;
+ mes_suspend_gang_pkt.header.dwsize = API_FRAME_SIZE_IN_DWORDS;
+
+ mes_suspend_gang_pkt.suspend_all_gangs = input->suspend_all_gangs;
+ mes_suspend_gang_pkt.gang_context_addr = input->gang_context_addr;
+ mes_suspend_gang_pkt.suspend_fence_addr = input->suspend_fence_addr;
+ mes_suspend_gang_pkt.suspend_fence_value = input->suspend_fence_value;
+
+ return mes_v11_0_submit_pkt_and_poll_completion(mes,
+ &mes_suspend_gang_pkt, sizeof(mes_suspend_gang_pkt),
+ offsetof(union MESAPI__SUSPEND, api_status));
+}
+
+static int mes_v11_0_resume_gang(struct amdgpu_mes *mes,
+ struct mes_resume_gang_input *input)
+{
+ union MESAPI__RESUME mes_resume_gang_pkt;
+
+ memset(&mes_resume_gang_pkt, 0, sizeof(mes_resume_gang_pkt));
+
+ mes_resume_gang_pkt.header.type = MES_API_TYPE_SCHEDULER;
+ mes_resume_gang_pkt.header.opcode = MES_SCH_API_RESUME;
+ mes_resume_gang_pkt.header.dwsize = API_FRAME_SIZE_IN_DWORDS;
+
+ mes_resume_gang_pkt.resume_all_gangs = input->resume_all_gangs;
+ mes_resume_gang_pkt.gang_context_addr = input->gang_context_addr;
+
+ return mes_v11_0_submit_pkt_and_poll_completion(mes,
+ &mes_resume_gang_pkt, sizeof(mes_resume_gang_pkt),
+ offsetof(union MESAPI__RESUME, api_status));
+}
+
+static int mes_v11_0_query_sched_status(struct amdgpu_mes *mes)
+{
+ union MESAPI__QUERY_MES_STATUS mes_status_pkt;
+
+ memset(&mes_status_pkt, 0, sizeof(mes_status_pkt));
+
+ mes_status_pkt.header.type = MES_API_TYPE_SCHEDULER;
+ mes_status_pkt.header.opcode = MES_SCH_API_QUERY_SCHEDULER_STATUS;
+ mes_status_pkt.header.dwsize = API_FRAME_SIZE_IN_DWORDS;
+
+ return mes_v11_0_submit_pkt_and_poll_completion(mes,
+ &mes_status_pkt, sizeof(mes_status_pkt),
+ offsetof(union MESAPI__QUERY_MES_STATUS, api_status));
+}
+
+static int mes_v11_0_misc_op(struct amdgpu_mes *mes,
+ struct mes_misc_op_input *input)
+{
+ union MESAPI__MISC misc_pkt;
+
+ memset(&misc_pkt, 0, sizeof(misc_pkt));
+
+ misc_pkt.header.type = MES_API_TYPE_SCHEDULER;
+ misc_pkt.header.opcode = MES_SCH_API_MISC;
+ misc_pkt.header.dwsize = API_FRAME_SIZE_IN_DWORDS;
+
+ switch (input->op) {
+ case MES_MISC_OP_READ_REG:
+ misc_pkt.opcode = MESAPI_MISC__READ_REG;
+ misc_pkt.read_reg.reg_offset = input->read_reg.reg_offset;
+ misc_pkt.read_reg.buffer_addr = input->read_reg.buffer_addr;
+ break;
+ case MES_MISC_OP_WRITE_REG:
+ misc_pkt.opcode = MESAPI_MISC__WRITE_REG;
+ misc_pkt.write_reg.reg_offset = input->write_reg.reg_offset;
+ misc_pkt.write_reg.reg_value = input->write_reg.reg_value;
+ break;
+ case MES_MISC_OP_WRM_REG_WAIT:
+ misc_pkt.opcode = MESAPI_MISC__WAIT_REG_MEM;
+ misc_pkt.wait_reg_mem.op = WRM_OPERATION__WAIT_REG_MEM;
+ misc_pkt.wait_reg_mem.reference = input->wrm_reg.ref;
+ misc_pkt.wait_reg_mem.mask = input->wrm_reg.mask;
+ misc_pkt.wait_reg_mem.reg_offset1 = input->wrm_reg.reg0;
+ misc_pkt.wait_reg_mem.reg_offset2 = 0;
+ break;
+ case MES_MISC_OP_WRM_REG_WR_WAIT:
+ misc_pkt.opcode = MESAPI_MISC__WAIT_REG_MEM;
+ misc_pkt.wait_reg_mem.op = WRM_OPERATION__WR_WAIT_WR_REG;
+ misc_pkt.wait_reg_mem.reference = input->wrm_reg.ref;
+ misc_pkt.wait_reg_mem.mask = input->wrm_reg.mask;
+ misc_pkt.wait_reg_mem.reg_offset1 = input->wrm_reg.reg0;
+ misc_pkt.wait_reg_mem.reg_offset2 = input->wrm_reg.reg1;
+ break;
+ case MES_MISC_OP_SET_SHADER_DEBUGGER:
+ misc_pkt.opcode = MESAPI_MISC__SET_SHADER_DEBUGGER;
+ misc_pkt.set_shader_debugger.process_context_addr =
+ input->set_shader_debugger.process_context_addr;
+ misc_pkt.set_shader_debugger.flags.u32all =
+ input->set_shader_debugger.flags.u32all;
+ misc_pkt.set_shader_debugger.spi_gdbg_per_vmid_cntl =
+ input->set_shader_debugger.spi_gdbg_per_vmid_cntl;
+ memcpy(misc_pkt.set_shader_debugger.tcp_watch_cntl,
+ input->set_shader_debugger.tcp_watch_cntl,
+ sizeof(misc_pkt.set_shader_debugger.tcp_watch_cntl));
+ misc_pkt.set_shader_debugger.trap_en = input->set_shader_debugger.trap_en;
+ break;
+ case MES_MISC_OP_CHANGE_CONFIG:
+ if ((mes->adev->mes.sched_version & AMDGPU_MES_VERSION_MASK) < 0x63) {
+ dev_warn_once(mes->adev->dev,
+ "MES FW version must be larger than 0x63 to support limit single process feature.\n");
+ return 0;
+ }
+ misc_pkt.opcode = MESAPI_MISC__CHANGE_CONFIG;
+ misc_pkt.change_config.opcode =
+ MESAPI_MISC__CHANGE_CONFIG_OPTION_LIMIT_SINGLE_PROCESS;
+ misc_pkt.change_config.option.bits.limit_single_process =
+ input->change_config.option.limit_single_process;
+ break;
+
+ default:
+ DRM_ERROR("unsupported misc op (%d) \n", input->op);
+ return -EINVAL;
+ }
+
+ return mes_v11_0_submit_pkt_and_poll_completion(mes,
+ &misc_pkt, sizeof(misc_pkt),
+ offsetof(union MESAPI__MISC, api_status));
+}
+
+static int mes_v11_0_set_hw_resources(struct amdgpu_mes *mes)
+{
+ int i;
+ struct amdgpu_device *adev = mes->adev;
+ union MESAPI_SET_HW_RESOURCES mes_set_hw_res_pkt;
+
+ memset(&mes_set_hw_res_pkt, 0, sizeof(mes_set_hw_res_pkt));
+
+ mes_set_hw_res_pkt.header.type = MES_API_TYPE_SCHEDULER;
+ mes_set_hw_res_pkt.header.opcode = MES_SCH_API_SET_HW_RSRC;
+ mes_set_hw_res_pkt.header.dwsize = API_FRAME_SIZE_IN_DWORDS;
+
+ mes_set_hw_res_pkt.vmid_mask_mmhub = mes->vmid_mask_mmhub;
+ mes_set_hw_res_pkt.vmid_mask_gfxhub = mes->vmid_mask_gfxhub;
+ mes_set_hw_res_pkt.gds_size = adev->gds.gds_size;
+ mes_set_hw_res_pkt.paging_vmid = 0;
+ mes_set_hw_res_pkt.g_sch_ctx_gpu_mc_ptr = mes->sch_ctx_gpu_addr[0];
+ mes_set_hw_res_pkt.query_status_fence_gpu_mc_ptr =
+ mes->query_status_fence_gpu_addr[0];
+
+ for (i = 0; i < MAX_COMPUTE_PIPES; i++)
+ mes_set_hw_res_pkt.compute_hqd_mask[i] =
+ mes->compute_hqd_mask[i];
+
+ for (i = 0; i < MAX_GFX_PIPES; i++)
+ mes_set_hw_res_pkt.gfx_hqd_mask[i] =
+ mes->gfx_hqd_mask[i];
+
+ for (i = 0; i < MAX_SDMA_PIPES; i++)
+ mes_set_hw_res_pkt.sdma_hqd_mask[i] = mes->sdma_hqd_mask[i];
+
+ for (i = 0; i < AMD_PRIORITY_NUM_LEVELS; i++)
+ mes_set_hw_res_pkt.aggregated_doorbells[i] =
+ mes->aggregated_doorbells[i];
+
+ for (i = 0; i < 5; i++) {
+ mes_set_hw_res_pkt.gc_base[i] = adev->reg_offset[GC_HWIP][0][i];
+ mes_set_hw_res_pkt.mmhub_base[i] =
+ adev->reg_offset[MMHUB_HWIP][0][i];
+ mes_set_hw_res_pkt.osssys_base[i] =
+ adev->reg_offset[OSSSYS_HWIP][0][i];
+ }
+
+ mes_set_hw_res_pkt.disable_reset = 1;
+ mes_set_hw_res_pkt.disable_mes_log = 1;
+ mes_set_hw_res_pkt.use_different_vmid_compute = 1;
+ mes_set_hw_res_pkt.enable_reg_active_poll = 1;
+ mes_set_hw_res_pkt.enable_level_process_quantum_check = 1;
+ mes_set_hw_res_pkt.oversubscription_timer = 50;
+ if ((mes->adev->mes.sched_version & AMDGPU_MES_VERSION_MASK) >= 0x7f)
+ mes_set_hw_res_pkt.enable_lr_compute_wa = 1;
+ else
+ dev_info_once(mes->adev->dev,
+ "MES FW version must be >= 0x7f to enable LR compute workaround.\n");
+
+ if (amdgpu_mes_log_enable) {
+ mes_set_hw_res_pkt.enable_mes_event_int_logging = 1;
+ mes_set_hw_res_pkt.event_intr_history_gpu_mc_ptr =
+ mes->event_log_gpu_addr;
+ }
+
+ if (adev->enforce_isolation[0] == AMDGPU_ENFORCE_ISOLATION_ENABLE)
+ mes_set_hw_res_pkt.limit_single_process = 1;
+
+ return mes_v11_0_submit_pkt_and_poll_completion(mes,
+ &mes_set_hw_res_pkt, sizeof(mes_set_hw_res_pkt),
+ offsetof(union MESAPI_SET_HW_RESOURCES, api_status));
+}
+
+static int mes_v11_0_set_hw_resources_1(struct amdgpu_mes *mes)
+{
+ union MESAPI_SET_HW_RESOURCES_1 mes_set_hw_res_pkt;
+ memset(&mes_set_hw_res_pkt, 0, sizeof(mes_set_hw_res_pkt));
+
+ mes_set_hw_res_pkt.header.type = MES_API_TYPE_SCHEDULER;
+ mes_set_hw_res_pkt.header.opcode = MES_SCH_API_SET_HW_RSRC_1;
+ mes_set_hw_res_pkt.header.dwsize = API_FRAME_SIZE_IN_DWORDS;
+ mes_set_hw_res_pkt.enable_mes_info_ctx = 1;
+
+ mes_set_hw_res_pkt.cleaner_shader_fence_mc_addr = mes->resource_1_gpu_addr[0];
+ if (amdgpu_sriov_is_mes_info_enable(mes->adev)) {
+ mes_set_hw_res_pkt.mes_info_ctx_mc_addr =
+ mes->resource_1_gpu_addr[0] + AMDGPU_GPU_PAGE_SIZE;
+ mes_set_hw_res_pkt.mes_info_ctx_size = MES11_HW_RESOURCE_1_SIZE;
+ }
+
+ return mes_v11_0_submit_pkt_and_poll_completion(mes,
+ &mes_set_hw_res_pkt, sizeof(mes_set_hw_res_pkt),
+ offsetof(union MESAPI_SET_HW_RESOURCES_1, api_status));
+}
+
+static int mes_v11_0_reset_hw_queue(struct amdgpu_mes *mes,
+ struct mes_reset_queue_input *input)
+{
+ union MESAPI__RESET mes_reset_queue_pkt;
+
+ if (input->use_mmio)
+ return mes_v11_0_reset_queue_mmio(mes, input->queue_type,
+ input->me_id, input->pipe_id,
+ input->queue_id, input->vmid);
+
+ memset(&mes_reset_queue_pkt, 0, sizeof(mes_reset_queue_pkt));
+
+ mes_reset_queue_pkt.header.type = MES_API_TYPE_SCHEDULER;
+ mes_reset_queue_pkt.header.opcode = MES_SCH_API_RESET;
+ mes_reset_queue_pkt.header.dwsize = API_FRAME_SIZE_IN_DWORDS;
+
+ mes_reset_queue_pkt.queue_type =
+ convert_to_mes_queue_type(input->queue_type);
+
+ if (input->legacy_gfx) {
+ mes_reset_queue_pkt.reset_legacy_gfx = 1;
+ mes_reset_queue_pkt.pipe_id_lp = input->pipe_id;
+ mes_reset_queue_pkt.queue_id_lp = input->queue_id;
+ mes_reset_queue_pkt.mqd_mc_addr_lp = input->mqd_addr;
+ mes_reset_queue_pkt.doorbell_offset_lp = input->doorbell_offset;
+ mes_reset_queue_pkt.wptr_addr_lp = input->wptr_addr;
+ mes_reset_queue_pkt.vmid_id_lp = input->vmid;
+ } else {
+ mes_reset_queue_pkt.reset_queue_only = 1;
+ mes_reset_queue_pkt.doorbell_offset = input->doorbell_offset;
+ }
+
+ return mes_v11_0_submit_pkt_and_poll_completion(mes,
+ &mes_reset_queue_pkt, sizeof(mes_reset_queue_pkt),
+ offsetof(union MESAPI__RESET, api_status));
+}
+
+static int mes_v11_0_detect_and_reset_hung_queues(struct amdgpu_mes *mes,
+ struct mes_detect_and_reset_queue_input *input)
+{
+ union MESAPI__RESET mes_reset_queue_pkt;
+
+ memset(&mes_reset_queue_pkt, 0, sizeof(mes_reset_queue_pkt));
+
+ mes_reset_queue_pkt.header.type = MES_API_TYPE_SCHEDULER;
+ mes_reset_queue_pkt.header.opcode = MES_SCH_API_RESET;
+ mes_reset_queue_pkt.header.dwsize = API_FRAME_SIZE_IN_DWORDS;
+
+ mes_reset_queue_pkt.queue_type =
+ convert_to_mes_queue_type(input->queue_type);
+ mes_reset_queue_pkt.doorbell_offset_addr =
+ mes->hung_queue_db_array_gpu_addr;
+
+ if (input->detect_only)
+ mes_reset_queue_pkt.hang_detect_only = 1;
+ else
+ mes_reset_queue_pkt.hang_detect_then_reset = 1;
+
+ return mes_v11_0_submit_pkt_and_poll_completion(mes,
+ &mes_reset_queue_pkt, sizeof(mes_reset_queue_pkt),
+ offsetof(union MESAPI__RESET, api_status));
+}
+
+static const struct amdgpu_mes_funcs mes_v11_0_funcs = {
+ .add_hw_queue = mes_v11_0_add_hw_queue,
+ .remove_hw_queue = mes_v11_0_remove_hw_queue,
+ .map_legacy_queue = mes_v11_0_map_legacy_queue,
+ .unmap_legacy_queue = mes_v11_0_unmap_legacy_queue,
+ .suspend_gang = mes_v11_0_suspend_gang,
+ .resume_gang = mes_v11_0_resume_gang,
+ .misc_op = mes_v11_0_misc_op,
+ .reset_hw_queue = mes_v11_0_reset_hw_queue,
+ .detect_and_reset_hung_queues = mes_v11_0_detect_and_reset_hung_queues,
+};
+
+static int mes_v11_0_allocate_ucode_buffer(struct amdgpu_device *adev,
+ enum amdgpu_mes_pipe pipe)
+{
+ int r;
+ const struct mes_firmware_header_v1_0 *mes_hdr;
+ const __le32 *fw_data;
+ unsigned fw_size;
+
+ mes_hdr = (const struct mes_firmware_header_v1_0 *)
+ adev->mes.fw[pipe]->data;
+
+ fw_data = (const __le32 *)(adev->mes.fw[pipe]->data +
+ le32_to_cpu(mes_hdr->mes_ucode_offset_bytes));
+ fw_size = le32_to_cpu(mes_hdr->mes_ucode_size_bytes);
+
+ r = amdgpu_bo_create_reserved(adev, fw_size,
+ PAGE_SIZE,
+ AMDGPU_GEM_DOMAIN_VRAM |
+ AMDGPU_GEM_DOMAIN_GTT,
+ &adev->mes.ucode_fw_obj[pipe],
+ &adev->mes.ucode_fw_gpu_addr[pipe],
+ (void **)&adev->mes.ucode_fw_ptr[pipe]);
+ if (r) {
+ dev_err(adev->dev, "(%d) failed to create mes fw bo\n", r);
+ return r;
+ }
+
+ memcpy(adev->mes.ucode_fw_ptr[pipe], fw_data, fw_size);
+
+ amdgpu_bo_kunmap(adev->mes.ucode_fw_obj[pipe]);
+ amdgpu_bo_unreserve(adev->mes.ucode_fw_obj[pipe]);
+
+ return 0;
+}
+
+static int mes_v11_0_allocate_ucode_data_buffer(struct amdgpu_device *adev,
+ enum amdgpu_mes_pipe pipe)
+{
+ int r;
+ const struct mes_firmware_header_v1_0 *mes_hdr;
+ const __le32 *fw_data;
+ unsigned fw_size;
+
+ mes_hdr = (const struct mes_firmware_header_v1_0 *)
+ adev->mes.fw[pipe]->data;
+
+ fw_data = (const __le32 *)(adev->mes.fw[pipe]->data +
+ le32_to_cpu(mes_hdr->mes_ucode_data_offset_bytes));
+ fw_size = le32_to_cpu(mes_hdr->mes_ucode_data_size_bytes);
+
+ if (fw_size > GFX_MES_DRAM_SIZE) {
+ dev_err(adev->dev, "PIPE%d ucode data fw size (%d) is greater than dram size (%d)\n",
+ pipe, fw_size, GFX_MES_DRAM_SIZE);
+ return -EINVAL;
+ }
+
+ r = amdgpu_bo_create_reserved(adev, GFX_MES_DRAM_SIZE,
+ 64 * 1024,
+ AMDGPU_GEM_DOMAIN_VRAM |
+ AMDGPU_GEM_DOMAIN_GTT,
+ &adev->mes.data_fw_obj[pipe],
+ &adev->mes.data_fw_gpu_addr[pipe],
+ (void **)&adev->mes.data_fw_ptr[pipe]);
+ if (r) {
+ dev_err(adev->dev, "(%d) failed to create mes data fw bo\n", r);
+ return r;
+ }
+
+ memcpy(adev->mes.data_fw_ptr[pipe], fw_data, fw_size);
+
+ amdgpu_bo_kunmap(adev->mes.data_fw_obj[pipe]);
+ amdgpu_bo_unreserve(adev->mes.data_fw_obj[pipe]);
+
+ return 0;
+}
+
+static void mes_v11_0_free_ucode_buffers(struct amdgpu_device *adev,
+ enum amdgpu_mes_pipe pipe)
+{
+ amdgpu_bo_free_kernel(&adev->mes.data_fw_obj[pipe],
+ &adev->mes.data_fw_gpu_addr[pipe],
+ (void **)&adev->mes.data_fw_ptr[pipe]);
+
+ amdgpu_bo_free_kernel(&adev->mes.ucode_fw_obj[pipe],
+ &adev->mes.ucode_fw_gpu_addr[pipe],
+ (void **)&adev->mes.ucode_fw_ptr[pipe]);
+}
+
+static void mes_v11_0_get_fw_version(struct amdgpu_device *adev)
+{
+ int pipe;
+
+ /* return early if we have already fetched these */
+ if (adev->mes.sched_version && adev->mes.kiq_version)
+ return;
+
+ /* get MES scheduler/KIQ versions */
+ mutex_lock(&adev->srbm_mutex);
+
+ for (pipe = 0; pipe < AMDGPU_MAX_MES_PIPES; pipe++) {
+ soc21_grbm_select(adev, 3, pipe, 0, 0);
+
+ if (pipe == AMDGPU_MES_SCHED_PIPE)
+ adev->mes.sched_version =
+ RREG32_SOC15(GC, 0, regCP_MES_GP3_LO);
+ else if (pipe == AMDGPU_MES_KIQ_PIPE && adev->enable_mes_kiq)
+ adev->mes.kiq_version =
+ RREG32_SOC15(GC, 0, regCP_MES_GP3_LO);
+ }
+
+ soc21_grbm_select(adev, 0, 0, 0, 0);
+ mutex_unlock(&adev->srbm_mutex);
+}
+
+static void mes_v11_0_enable(struct amdgpu_device *adev, bool enable)
+{
+ uint64_t ucode_addr;
+ uint32_t pipe, data = 0;
+
+ if (enable) {
+ if (amdgpu_mes_log_enable) {
+ WREG32_SOC15(GC, 0, regCP_MES_MSCRATCH_LO,
+ lower_32_bits(adev->mes.event_log_gpu_addr + AMDGPU_MES_LOG_BUFFER_SIZE));
+ WREG32_SOC15(GC, 0, regCP_MES_MSCRATCH_HI,
+ upper_32_bits(adev->mes.event_log_gpu_addr + AMDGPU_MES_LOG_BUFFER_SIZE));
+ dev_info(adev->dev, "Setup CP MES MSCRATCH address : 0x%x. 0x%x\n",
+ RREG32_SOC15(GC, 0, regCP_MES_MSCRATCH_HI),
+ RREG32_SOC15(GC, 0, regCP_MES_MSCRATCH_LO));
+ }
+
+ data = RREG32_SOC15(GC, 0, regCP_MES_CNTL);
+ data = REG_SET_FIELD(data, CP_MES_CNTL, MES_PIPE0_RESET, 1);
+ data = REG_SET_FIELD(data, CP_MES_CNTL,
+ MES_PIPE1_RESET, adev->enable_mes_kiq ? 1 : 0);
+ WREG32_SOC15(GC, 0, regCP_MES_CNTL, data);
+
+ mutex_lock(&adev->srbm_mutex);
+ for (pipe = 0; pipe < AMDGPU_MAX_MES_PIPES; pipe++) {
+ if (!adev->enable_mes_kiq &&
+ pipe == AMDGPU_MES_KIQ_PIPE)
+ continue;
+
+ soc21_grbm_select(adev, 3, pipe, 0, 0);
+
+ ucode_addr = adev->mes.uc_start_addr[pipe] >> 2;
+ WREG32_SOC15(GC, 0, regCP_MES_PRGRM_CNTR_START,
+ lower_32_bits(ucode_addr));
+ WREG32_SOC15(GC, 0, regCP_MES_PRGRM_CNTR_START_HI,
+ upper_32_bits(ucode_addr));
+ }
+ soc21_grbm_select(adev, 0, 0, 0, 0);
+ mutex_unlock(&adev->srbm_mutex);
+
+ /* unhalt MES and activate pipe0 */
+ data = REG_SET_FIELD(0, CP_MES_CNTL, MES_PIPE0_ACTIVE, 1);
+ data = REG_SET_FIELD(data, CP_MES_CNTL, MES_PIPE1_ACTIVE,
+ adev->enable_mes_kiq ? 1 : 0);
+ WREG32_SOC15(GC, 0, regCP_MES_CNTL, data);
+
+ if (amdgpu_emu_mode)
+ msleep(100);
+ else
+ udelay(500);
+ } else {
+ data = RREG32_SOC15(GC, 0, regCP_MES_CNTL);
+ data = REG_SET_FIELD(data, CP_MES_CNTL, MES_PIPE0_ACTIVE, 0);
+ data = REG_SET_FIELD(data, CP_MES_CNTL, MES_PIPE1_ACTIVE, 0);
+ data = REG_SET_FIELD(data, CP_MES_CNTL,
+ MES_INVALIDATE_ICACHE, 1);
+ data = REG_SET_FIELD(data, CP_MES_CNTL, MES_PIPE0_RESET, 1);
+ data = REG_SET_FIELD(data, CP_MES_CNTL, MES_PIPE1_RESET,
+ adev->enable_mes_kiq ? 1 : 0);
+ data = REG_SET_FIELD(data, CP_MES_CNTL, MES_HALT, 1);
+ WREG32_SOC15(GC, 0, regCP_MES_CNTL, data);
+ }
+}
+
+/* This function is for backdoor MES firmware */
+static int mes_v11_0_load_microcode(struct amdgpu_device *adev,
+ enum amdgpu_mes_pipe pipe, bool prime_icache)
+{
+ int r;
+ uint32_t data;
+ uint64_t ucode_addr;
+
+ mes_v11_0_enable(adev, false);
+
+ if (!adev->mes.fw[pipe])
+ return -EINVAL;
+
+ r = mes_v11_0_allocate_ucode_buffer(adev, pipe);
+ if (r)
+ return r;
+
+ r = mes_v11_0_allocate_ucode_data_buffer(adev, pipe);
+ if (r) {
+ mes_v11_0_free_ucode_buffers(adev, pipe);
+ return r;
+ }
+
+ mutex_lock(&adev->srbm_mutex);
+ /* me=3, pipe=0, queue=0 */
+ soc21_grbm_select(adev, 3, pipe, 0, 0);
+
+ WREG32_SOC15(GC, 0, regCP_MES_IC_BASE_CNTL, 0);
+
+ /* set ucode start address */
+ ucode_addr = adev->mes.uc_start_addr[pipe] >> 2;
+ WREG32_SOC15(GC, 0, regCP_MES_PRGRM_CNTR_START,
+ lower_32_bits(ucode_addr));
+ WREG32_SOC15(GC, 0, regCP_MES_PRGRM_CNTR_START_HI,
+ upper_32_bits(ucode_addr));
+
+ /* set ucode fimrware address */
+ WREG32_SOC15(GC, 0, regCP_MES_IC_BASE_LO,
+ lower_32_bits(adev->mes.ucode_fw_gpu_addr[pipe]));
+ WREG32_SOC15(GC, 0, regCP_MES_IC_BASE_HI,
+ upper_32_bits(adev->mes.ucode_fw_gpu_addr[pipe]));
+
+ /* set ucode instruction cache boundary to 2M-1 */
+ WREG32_SOC15(GC, 0, regCP_MES_MIBOUND_LO, 0x1FFFFF);
+
+ /* set ucode data firmware address */
+ WREG32_SOC15(GC, 0, regCP_MES_MDBASE_LO,
+ lower_32_bits(adev->mes.data_fw_gpu_addr[pipe]));
+ WREG32_SOC15(GC, 0, regCP_MES_MDBASE_HI,
+ upper_32_bits(adev->mes.data_fw_gpu_addr[pipe]));
+
+ /* Set 0x7FFFF (512K-1) to CP_MES_MDBOUND_LO */
+ WREG32_SOC15(GC, 0, regCP_MES_MDBOUND_LO, 0x7FFFF);
+
+ if (prime_icache) {
+ /* invalidate ICACHE */
+ data = RREG32_SOC15(GC, 0, regCP_MES_IC_OP_CNTL);
+ data = REG_SET_FIELD(data, CP_MES_IC_OP_CNTL, PRIME_ICACHE, 0);
+ data = REG_SET_FIELD(data, CP_MES_IC_OP_CNTL, INVALIDATE_CACHE, 1);
+ WREG32_SOC15(GC, 0, regCP_MES_IC_OP_CNTL, data);
+
+ /* prime the ICACHE. */
+ data = RREG32_SOC15(GC, 0, regCP_MES_IC_OP_CNTL);
+ data = REG_SET_FIELD(data, CP_MES_IC_OP_CNTL, PRIME_ICACHE, 1);
+ WREG32_SOC15(GC, 0, regCP_MES_IC_OP_CNTL, data);
+ }
+
+ soc21_grbm_select(adev, 0, 0, 0, 0);
+ mutex_unlock(&adev->srbm_mutex);
+
+ return 0;
+}
+
+static int mes_v11_0_allocate_eop_buf(struct amdgpu_device *adev,
+ enum amdgpu_mes_pipe pipe)
+{
+ int r;
+ u32 *eop;
+
+ r = amdgpu_bo_create_reserved(adev, MES_EOP_SIZE, PAGE_SIZE,
+ AMDGPU_GEM_DOMAIN_GTT,
+ &adev->mes.eop_gpu_obj[pipe],
+ &adev->mes.eop_gpu_addr[pipe],
+ (void **)&eop);
+ if (r) {
+ dev_warn(adev->dev, "(%d) create EOP bo failed\n", r);
+ return r;
+ }
+
+ memset(eop, 0,
+ adev->mes.eop_gpu_obj[pipe]->tbo.base.size);
+
+ amdgpu_bo_kunmap(adev->mes.eop_gpu_obj[pipe]);
+ amdgpu_bo_unreserve(adev->mes.eop_gpu_obj[pipe]);
+
+ return 0;
+}
+
+static int mes_v11_0_mqd_init(struct amdgpu_ring *ring)
+{
+ struct v11_compute_mqd *mqd = ring->mqd_ptr;
+ uint64_t hqd_gpu_addr, wb_gpu_addr, eop_base_addr;
+ uint32_t tmp;
+
+ memset(mqd, 0, sizeof(*mqd));
+
+ mqd->header = 0xC0310800;
+ mqd->compute_pipelinestat_enable = 0x00000001;
+ mqd->compute_static_thread_mgmt_se0 = 0xffffffff;
+ mqd->compute_static_thread_mgmt_se1 = 0xffffffff;
+ mqd->compute_static_thread_mgmt_se2 = 0xffffffff;
+ mqd->compute_static_thread_mgmt_se3 = 0xffffffff;
+ mqd->compute_misc_reserved = 0x00000007;
+
+ eop_base_addr = ring->eop_gpu_addr >> 8;
+
+ /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
+ tmp = regCP_HQD_EOP_CONTROL_DEFAULT;
+ tmp = REG_SET_FIELD(tmp, CP_HQD_EOP_CONTROL, EOP_SIZE,
+ (order_base_2(MES_EOP_SIZE / 4) - 1));
+
+ mqd->cp_hqd_eop_base_addr_lo = lower_32_bits(eop_base_addr);
+ mqd->cp_hqd_eop_base_addr_hi = upper_32_bits(eop_base_addr);
+ mqd->cp_hqd_eop_control = tmp;
+
+ /* disable the queue if it's active */
+ ring->wptr = 0;
+ mqd->cp_hqd_pq_rptr = 0;
+ mqd->cp_hqd_pq_wptr_lo = 0;
+ mqd->cp_hqd_pq_wptr_hi = 0;
+
+ /* set the pointer to the MQD */
+ mqd->cp_mqd_base_addr_lo = ring->mqd_gpu_addr & 0xfffffffc;
+ mqd->cp_mqd_base_addr_hi = upper_32_bits(ring->mqd_gpu_addr);
+
+ /* set MQD vmid to 0 */
+ tmp = regCP_MQD_CONTROL_DEFAULT;
+ tmp = REG_SET_FIELD(tmp, CP_MQD_CONTROL, VMID, 0);
+ mqd->cp_mqd_control = tmp;
+
+ /* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
+ hqd_gpu_addr = ring->gpu_addr >> 8;
+ mqd->cp_hqd_pq_base_lo = lower_32_bits(hqd_gpu_addr);
+ mqd->cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr);
+
+ /* set the wb address whether it's enabled or not */
+ wb_gpu_addr = ring->rptr_gpu_addr;
+ mqd->cp_hqd_pq_rptr_report_addr_lo = wb_gpu_addr & 0xfffffffc;
+ mqd->cp_hqd_pq_rptr_report_addr_hi =
+ upper_32_bits(wb_gpu_addr) & 0xffff;
+
+ /* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */
+ wb_gpu_addr = ring->wptr_gpu_addr;
+ mqd->cp_hqd_pq_wptr_poll_addr_lo = wb_gpu_addr & 0xfffffff8;
+ mqd->cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff;
+
+ /* set up the HQD, this is similar to CP_RB0_CNTL */
+ tmp = regCP_HQD_PQ_CONTROL_DEFAULT;
+ tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, QUEUE_SIZE,
+ (order_base_2(ring->ring_size / 4) - 1));
+ tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, RPTR_BLOCK_SIZE,
+ ((order_base_2(AMDGPU_GPU_PAGE_SIZE / 4) - 1) << 8));
+ tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, UNORD_DISPATCH, 1);
+ tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, TUNNEL_DISPATCH, 0);
+ tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, PRIV_STATE, 1);
+ tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, KMD_QUEUE, 1);
+ tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, NO_UPDATE_RPTR, 1);
+ mqd->cp_hqd_pq_control = tmp;
+
+ /* enable doorbell */
+ tmp = 0;
+ if (ring->use_doorbell) {
+ tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
+ DOORBELL_OFFSET, ring->doorbell_index);
+ tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
+ DOORBELL_EN, 1);
+ tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
+ DOORBELL_SOURCE, 0);
+ tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
+ DOORBELL_HIT, 0);
+ } else
+ tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
+ DOORBELL_EN, 0);
+ mqd->cp_hqd_pq_doorbell_control = tmp;
+
+ mqd->cp_hqd_vmid = 0;
+ /* activate the queue */
+ mqd->cp_hqd_active = 1;
+
+ tmp = regCP_HQD_PERSISTENT_STATE_DEFAULT;
+ tmp = REG_SET_FIELD(tmp, CP_HQD_PERSISTENT_STATE,
+ PRELOAD_SIZE, 0x55);
+ mqd->cp_hqd_persistent_state = tmp;
+
+ mqd->cp_hqd_ib_control = regCP_HQD_IB_CONTROL_DEFAULT;
+ mqd->cp_hqd_iq_timer = regCP_HQD_IQ_TIMER_DEFAULT;
+ mqd->cp_hqd_quantum = regCP_HQD_QUANTUM_DEFAULT;
+
+ amdgpu_device_flush_hdp(ring->adev, NULL);
+ return 0;
+}
+
+static void mes_v11_0_queue_init_register(struct amdgpu_ring *ring)
+{
+ struct v11_compute_mqd *mqd = ring->mqd_ptr;
+ struct amdgpu_device *adev = ring->adev;
+ uint32_t data = 0;
+
+ mutex_lock(&adev->srbm_mutex);
+ soc21_grbm_select(adev, 3, ring->pipe, 0, 0);
+
+ /* set CP_HQD_VMID.VMID = 0. */
+ data = RREG32_SOC15(GC, 0, regCP_HQD_VMID);
+ data = REG_SET_FIELD(data, CP_HQD_VMID, VMID, 0);
+ WREG32_SOC15(GC, 0, regCP_HQD_VMID, data);
+
+ /* set CP_HQD_PQ_DOORBELL_CONTROL.DOORBELL_EN=0 */
+ data = RREG32_SOC15(GC, 0, regCP_HQD_PQ_DOORBELL_CONTROL);
+ data = REG_SET_FIELD(data, CP_HQD_PQ_DOORBELL_CONTROL,
+ DOORBELL_EN, 0);
+ WREG32_SOC15(GC, 0, regCP_HQD_PQ_DOORBELL_CONTROL, data);
+
+ /* set CP_MQD_BASE_ADDR/HI with the MQD base address */
+ WREG32_SOC15(GC, 0, regCP_MQD_BASE_ADDR, mqd->cp_mqd_base_addr_lo);
+ WREG32_SOC15(GC, 0, regCP_MQD_BASE_ADDR_HI, mqd->cp_mqd_base_addr_hi);
+
+ /* set CP_MQD_CONTROL.VMID=0 */
+ data = RREG32_SOC15(GC, 0, regCP_MQD_CONTROL);
+ data = REG_SET_FIELD(data, CP_MQD_CONTROL, VMID, 0);
+ WREG32_SOC15(GC, 0, regCP_MQD_CONTROL, 0);
+
+ /* set CP_HQD_PQ_BASE/HI with the ring buffer base address */
+ WREG32_SOC15(GC, 0, regCP_HQD_PQ_BASE, mqd->cp_hqd_pq_base_lo);
+ WREG32_SOC15(GC, 0, regCP_HQD_PQ_BASE_HI, mqd->cp_hqd_pq_base_hi);
+
+ /* set CP_HQD_PQ_RPTR_REPORT_ADDR/HI */
+ WREG32_SOC15(GC, 0, regCP_HQD_PQ_RPTR_REPORT_ADDR,
+ mqd->cp_hqd_pq_rptr_report_addr_lo);
+ WREG32_SOC15(GC, 0, regCP_HQD_PQ_RPTR_REPORT_ADDR_HI,
+ mqd->cp_hqd_pq_rptr_report_addr_hi);
+
+ /* set CP_HQD_PQ_CONTROL */
+ WREG32_SOC15(GC, 0, regCP_HQD_PQ_CONTROL, mqd->cp_hqd_pq_control);
+
+ /* set CP_HQD_PQ_WPTR_POLL_ADDR/HI */
+ WREG32_SOC15(GC, 0, regCP_HQD_PQ_WPTR_POLL_ADDR,
+ mqd->cp_hqd_pq_wptr_poll_addr_lo);
+ WREG32_SOC15(GC, 0, regCP_HQD_PQ_WPTR_POLL_ADDR_HI,
+ mqd->cp_hqd_pq_wptr_poll_addr_hi);
+
+ /* set CP_HQD_PQ_DOORBELL_CONTROL */
+ WREG32_SOC15(GC, 0, regCP_HQD_PQ_DOORBELL_CONTROL,
+ mqd->cp_hqd_pq_doorbell_control);
+
+ /* set CP_HQD_PERSISTENT_STATE.PRELOAD_SIZE=0x53 */
+ WREG32_SOC15(GC, 0, regCP_HQD_PERSISTENT_STATE, mqd->cp_hqd_persistent_state);
+
+ /* set CP_HQD_ACTIVE.ACTIVE=1 */
+ WREG32_SOC15(GC, 0, regCP_HQD_ACTIVE, mqd->cp_hqd_active);
+
+ soc21_grbm_select(adev, 0, 0, 0, 0);
+ mutex_unlock(&adev->srbm_mutex);
+}
+
+static int mes_v11_0_kiq_enable_queue(struct amdgpu_device *adev)
+{
+ struct amdgpu_kiq *kiq = &adev->gfx.kiq[0];
+ struct amdgpu_ring *kiq_ring = &adev->gfx.kiq[0].ring;
+ int r;
+
+ if (!kiq->pmf || !kiq->pmf->kiq_map_queues)
+ return -EINVAL;
+
+ r = amdgpu_ring_alloc(kiq_ring, kiq->pmf->map_queues_size);
+ if (r) {
+ DRM_ERROR("Failed to lock KIQ (%d).\n", r);
+ return r;
+ }
+
+ kiq->pmf->kiq_map_queues(kiq_ring, &adev->mes.ring[0]);
+
+ return amdgpu_ring_test_helper(kiq_ring);
+}
+
+static int mes_v11_0_queue_init(struct amdgpu_device *adev,
+ enum amdgpu_mes_pipe pipe)
+{
+ struct amdgpu_ring *ring;
+ int r;
+
+ if (pipe == AMDGPU_MES_KIQ_PIPE)
+ ring = &adev->gfx.kiq[0].ring;
+ else if (pipe == AMDGPU_MES_SCHED_PIPE)
+ ring = &adev->mes.ring[0];
+ else
+ BUG();
+
+ if ((pipe == AMDGPU_MES_SCHED_PIPE) &&
+ (amdgpu_in_reset(adev) || adev->in_suspend)) {
+ *(ring->wptr_cpu_addr) = 0;
+ *(ring->rptr_cpu_addr) = 0;
+ amdgpu_ring_clear_ring(ring);
+ }
+
+ r = mes_v11_0_mqd_init(ring);
+ if (r)
+ return r;
+
+ if (pipe == AMDGPU_MES_SCHED_PIPE) {
+ r = mes_v11_0_kiq_enable_queue(adev);
+ if (r)
+ return r;
+ } else {
+ mes_v11_0_queue_init_register(ring);
+ }
+
+ return 0;
+}
+
+static int mes_v11_0_ring_init(struct amdgpu_device *adev)
+{
+ struct amdgpu_ring *ring;
+
+ ring = &adev->mes.ring[0];
+
+ ring->funcs = &mes_v11_0_ring_funcs;
+
+ ring->me = 3;
+ ring->pipe = 0;
+ ring->queue = 0;
+
+ ring->ring_obj = NULL;
+ ring->use_doorbell = true;
+ ring->doorbell_index = adev->doorbell_index.mes_ring0 << 1;
+ ring->eop_gpu_addr = adev->mes.eop_gpu_addr[AMDGPU_MES_SCHED_PIPE];
+ ring->no_scheduler = true;
+ sprintf(ring->name, "mes_%d.%d.%d", ring->me, ring->pipe, ring->queue);
+
+ return amdgpu_ring_init(adev, ring, 1024, NULL, 0,
+ AMDGPU_RING_PRIO_DEFAULT, NULL);
+}
+
+static int mes_v11_0_kiq_ring_init(struct amdgpu_device *adev)
+{
+ struct amdgpu_ring *ring;
+
+ spin_lock_init(&adev->gfx.kiq[0].ring_lock);
+
+ ring = &adev->gfx.kiq[0].ring;
+
+ ring->me = 3;
+ ring->pipe = 1;
+ ring->queue = 0;
+
+ ring->adev = NULL;
+ ring->ring_obj = NULL;
+ ring->use_doorbell = true;
+ ring->doorbell_index = adev->doorbell_index.mes_ring1 << 1;
+ ring->eop_gpu_addr = adev->mes.eop_gpu_addr[AMDGPU_MES_KIQ_PIPE];
+ ring->no_scheduler = true;
+ sprintf(ring->name, "mes_kiq_%d.%d.%d",
+ ring->me, ring->pipe, ring->queue);
+
+ return amdgpu_ring_init(adev, ring, 1024, NULL, 0,
+ AMDGPU_RING_PRIO_DEFAULT, NULL);
+}
+
+static int mes_v11_0_mqd_sw_init(struct amdgpu_device *adev,
+ enum amdgpu_mes_pipe pipe)
+{
+ int r, mqd_size = sizeof(struct v11_compute_mqd);
+ struct amdgpu_ring *ring;
+
+ if (pipe == AMDGPU_MES_KIQ_PIPE)
+ ring = &adev->gfx.kiq[0].ring;
+ else if (pipe == AMDGPU_MES_SCHED_PIPE)
+ ring = &adev->mes.ring[0];
+ else
+ BUG();
+
+ if (ring->mqd_obj)
+ return 0;
+
+ r = amdgpu_bo_create_kernel(adev, mqd_size, PAGE_SIZE,
+ AMDGPU_GEM_DOMAIN_VRAM |
+ AMDGPU_GEM_DOMAIN_GTT, &ring->mqd_obj,
+ &ring->mqd_gpu_addr, &ring->mqd_ptr);
+ if (r) {
+ dev_warn(adev->dev, "failed to create ring mqd bo (%d)", r);
+ return r;
+ }
+
+ memset(ring->mqd_ptr, 0, mqd_size);
+
+ /* prepare MQD backup */
+ adev->mes.mqd_backup[pipe] = kmalloc(mqd_size, GFP_KERNEL);
+ if (!adev->mes.mqd_backup[pipe]) {
+ dev_warn(adev->dev,
+ "no memory to create MQD backup for ring %s\n",
+ ring->name);
+ return -ENOMEM;
+ }
+
+ return 0;
+}
+
+static int mes_v11_0_sw_init(struct amdgpu_ip_block *ip_block)
+{
+ struct amdgpu_device *adev = ip_block->adev;
+ int pipe, r, bo_size;
+
+ adev->mes.funcs = &mes_v11_0_funcs;
+ adev->mes.kiq_hw_init = &mes_v11_0_kiq_hw_init;
+ adev->mes.kiq_hw_fini = &mes_v11_0_kiq_hw_fini;
+
+ adev->mes.event_log_size = AMDGPU_MES_LOG_BUFFER_SIZE + AMDGPU_MES_MSCRATCH_SIZE;
+
+ r = amdgpu_mes_init(adev);
+ if (r)
+ return r;
+
+ for (pipe = 0; pipe < AMDGPU_MAX_MES_PIPES; pipe++) {
+ if (!adev->enable_mes_kiq && pipe == AMDGPU_MES_KIQ_PIPE)
+ continue;
+
+ r = mes_v11_0_allocate_eop_buf(adev, pipe);
+ if (r)
+ return r;
+
+ r = mes_v11_0_mqd_sw_init(adev, pipe);
+ if (r)
+ return r;
+ }
+
+ if (adev->enable_mes_kiq) {
+ r = mes_v11_0_kiq_ring_init(adev);
+ if (r)
+ return r;
+ }
+
+ r = mes_v11_0_ring_init(adev);
+ if (r)
+ return r;
+
+ bo_size = AMDGPU_GPU_PAGE_SIZE;
+ if (amdgpu_sriov_is_mes_info_enable(adev))
+ bo_size += MES11_HW_RESOURCE_1_SIZE;
+
+ /* Only needed for AMDGPU_MES_SCHED_PIPE on MES 11*/
+ r = amdgpu_bo_create_kernel(adev,
+ bo_size,
+ PAGE_SIZE,
+ AMDGPU_GEM_DOMAIN_VRAM,
+ &adev->mes.resource_1[0],
+ &adev->mes.resource_1_gpu_addr[0],
+ &adev->mes.resource_1_addr[0]);
+ if (r) {
+ dev_err(adev->dev, "(%d) failed to create mes resource_1 bo\n", r);
+ return r;
+ }
+
+ return 0;
+}
+
+static int mes_v11_0_sw_fini(struct amdgpu_ip_block *ip_block)
+{
+ struct amdgpu_device *adev = ip_block->adev;
+ int pipe;
+
+ amdgpu_bo_free_kernel(&adev->mes.resource_1[0], &adev->mes.resource_1_gpu_addr[0],
+ &adev->mes.resource_1_addr[0]);
+
+ for (pipe = 0; pipe < AMDGPU_MAX_MES_PIPES; pipe++) {
+ kfree(adev->mes.mqd_backup[pipe]);
+
+ amdgpu_bo_free_kernel(&adev->mes.eop_gpu_obj[pipe],
+ &adev->mes.eop_gpu_addr[pipe],
+ NULL);
+ amdgpu_ucode_release(&adev->mes.fw[pipe]);
+ }
+
+ amdgpu_bo_free_kernel(&adev->gfx.kiq[0].ring.mqd_obj,
+ &adev->gfx.kiq[0].ring.mqd_gpu_addr,
+ &adev->gfx.kiq[0].ring.mqd_ptr);
+
+ amdgpu_bo_free_kernel(&adev->mes.ring[0].mqd_obj,
+ &adev->mes.ring[0].mqd_gpu_addr,
+ &adev->mes.ring[0].mqd_ptr);
+
+ amdgpu_ring_fini(&adev->gfx.kiq[0].ring);
+ amdgpu_ring_fini(&adev->mes.ring[0]);
+
+ if (adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT) {
+ mes_v11_0_free_ucode_buffers(adev, AMDGPU_MES_KIQ_PIPE);
+ mes_v11_0_free_ucode_buffers(adev, AMDGPU_MES_SCHED_PIPE);
+ }
+
+ amdgpu_mes_fini(adev);
+ return 0;
+}
+
+static void mes_v11_0_kiq_dequeue(struct amdgpu_ring *ring)
+{
+ uint32_t data;
+ int i;
+ struct amdgpu_device *adev = ring->adev;
+
+ mutex_lock(&adev->srbm_mutex);
+ soc21_grbm_select(adev, 3, ring->pipe, 0, 0);
+
+ /* disable the queue if it's active */
+ if (RREG32_SOC15(GC, 0, regCP_HQD_ACTIVE) & 1) {
+ WREG32_SOC15(GC, 0, regCP_HQD_DEQUEUE_REQUEST, 1);
+ for (i = 0; i < adev->usec_timeout; i++) {
+ if (!(RREG32_SOC15(GC, 0, regCP_HQD_ACTIVE) & 1))
+ break;
+ udelay(1);
+ }
+ }
+ data = RREG32_SOC15(GC, 0, regCP_HQD_PQ_DOORBELL_CONTROL);
+ data = REG_SET_FIELD(data, CP_HQD_PQ_DOORBELL_CONTROL,
+ DOORBELL_EN, 0);
+ data = REG_SET_FIELD(data, CP_HQD_PQ_DOORBELL_CONTROL,
+ DOORBELL_HIT, 1);
+ WREG32_SOC15(GC, 0, regCP_HQD_PQ_DOORBELL_CONTROL, data);
+
+ WREG32_SOC15(GC, 0, regCP_HQD_PQ_DOORBELL_CONTROL, 0);
+
+ WREG32_SOC15(GC, 0, regCP_HQD_PQ_WPTR_LO, 0);
+ WREG32_SOC15(GC, 0, regCP_HQD_PQ_WPTR_HI, 0);
+ WREG32_SOC15(GC, 0, regCP_HQD_PQ_RPTR, 0);
+
+ soc21_grbm_select(adev, 0, 0, 0, 0);
+ mutex_unlock(&adev->srbm_mutex);
+}
+
+static void mes_v11_0_kiq_setting(struct amdgpu_ring *ring)
+{
+ uint32_t tmp;
+ struct amdgpu_device *adev = ring->adev;
+
+ /* tell RLC which is KIQ queue */
+ tmp = RREG32_SOC15(GC, 0, regRLC_CP_SCHEDULERS);
+ tmp &= 0xffffff00;
+ tmp |= (ring->me << 5) | (ring->pipe << 3) | (ring->queue);
+ WREG32_SOC15(GC, 0, regRLC_CP_SCHEDULERS, tmp | 0x80);
+}
+
+static void mes_v11_0_kiq_clear(struct amdgpu_device *adev)
+{
+ uint32_t tmp;
+
+ /* tell RLC which is KIQ dequeue */
+ tmp = RREG32_SOC15(GC, 0, regRLC_CP_SCHEDULERS);
+ tmp &= ~RLC_CP_SCHEDULERS__scheduler0_MASK;
+ WREG32_SOC15(GC, 0, regRLC_CP_SCHEDULERS, tmp);
+}
+
+static int mes_v11_0_kiq_hw_init(struct amdgpu_device *adev)
+{
+ int r = 0;
+ struct amdgpu_ip_block *ip_block;
+
+ if (adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT) {
+
+ r = mes_v11_0_load_microcode(adev, AMDGPU_MES_SCHED_PIPE, false);
+ if (r) {
+ DRM_ERROR("failed to load MES fw, r=%d\n", r);
+ return r;
+ }
+
+ r = mes_v11_0_load_microcode(adev, AMDGPU_MES_KIQ_PIPE, true);
+ if (r) {
+ DRM_ERROR("failed to load MES kiq fw, r=%d\n", r);
+ return r;
+ }
+
+ }
+
+ mes_v11_0_enable(adev, true);
+
+ mes_v11_0_get_fw_version(adev);
+
+ mes_v11_0_kiq_setting(&adev->gfx.kiq[0].ring);
+
+ ip_block = amdgpu_device_ip_get_ip_block(adev, AMD_IP_BLOCK_TYPE_MES);
+ if (unlikely(!ip_block)) {
+ dev_err(adev->dev, "Failed to get MES handle\n");
+ return -EINVAL;
+ }
+
+ r = mes_v11_0_queue_init(adev, AMDGPU_MES_KIQ_PIPE);
+ if (r)
+ goto failure;
+
+ if ((adev->mes.sched_version & AMDGPU_MES_VERSION_MASK) >= 0x47)
+ adev->mes.enable_legacy_queue_map = true;
+ else
+ adev->mes.enable_legacy_queue_map = false;
+
+ if (adev->mes.enable_legacy_queue_map) {
+ r = mes_v11_0_hw_init(ip_block);
+ if (r)
+ goto failure;
+ }
+
+ return r;
+
+failure:
+ mes_v11_0_hw_fini(ip_block);
+ return r;
+}
+
+static int mes_v11_0_kiq_hw_fini(struct amdgpu_device *adev)
+{
+ if (adev->mes.ring[0].sched.ready) {
+ mes_v11_0_kiq_dequeue(&adev->mes.ring[0]);
+ adev->mes.ring[0].sched.ready = false;
+ }
+
+ if (amdgpu_sriov_vf(adev)) {
+ mes_v11_0_kiq_dequeue(&adev->gfx.kiq[0].ring);
+ mes_v11_0_kiq_clear(adev);
+ }
+
+ mes_v11_0_enable(adev, false);
+
+ return 0;
+}
+
+static int mes_v11_0_hw_init(struct amdgpu_ip_block *ip_block)
+{
+ int r;
+ struct amdgpu_device *adev = ip_block->adev;
+
+ if (adev->mes.ring[0].sched.ready)
+ goto out;
+
+ if (!adev->enable_mes_kiq) {
+ if (adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT) {
+ r = mes_v11_0_load_microcode(adev,
+ AMDGPU_MES_SCHED_PIPE, true);
+ if (r) {
+ DRM_ERROR("failed to MES fw, r=%d\n", r);
+ return r;
+ }
+ }
+
+ mes_v11_0_enable(adev, true);
+ }
+
+ r = mes_v11_0_queue_init(adev, AMDGPU_MES_SCHED_PIPE);
+ if (r)
+ goto failure;
+
+ r = mes_v11_0_set_hw_resources(&adev->mes);
+ if (r)
+ goto failure;
+
+ if ((adev->mes.sched_version & AMDGPU_MES_VERSION_MASK) >= 0x50) {
+ r = mes_v11_0_set_hw_resources_1(&adev->mes);
+ if (r) {
+ DRM_ERROR("failed mes_v11_0_set_hw_resources_1, r=%d\n", r);
+ goto failure;
+ }
+ }
+
+ r = mes_v11_0_query_sched_status(&adev->mes);
+ if (r) {
+ DRM_ERROR("MES is busy\n");
+ goto failure;
+ }
+
+ r = amdgpu_mes_update_enforce_isolation(adev);
+ if (r)
+ goto failure;
+
+out:
+ /*
+ * Disable KIQ ring usage from the driver once MES is enabled.
+ * MES uses KIQ ring exclusively so driver cannot access KIQ ring
+ * with MES enabled.
+ */
+ adev->gfx.kiq[0].ring.sched.ready = false;
+ adev->mes.ring[0].sched.ready = true;
+
+ return 0;
+
+failure:
+ mes_v11_0_hw_fini(ip_block);
+ return r;
+}
+
+static int mes_v11_0_hw_fini(struct amdgpu_ip_block *ip_block)
+{
+ return 0;
+}
+
+static int mes_v11_0_suspend(struct amdgpu_ip_block *ip_block)
+{
+ return mes_v11_0_hw_fini(ip_block);
+}
+
+static int mes_v11_0_resume(struct amdgpu_ip_block *ip_block)
+{
+ return mes_v11_0_hw_init(ip_block);
+}
+
+static int mes_v11_0_early_init(struct amdgpu_ip_block *ip_block)
+{
+ struct amdgpu_device *adev = ip_block->adev;
+ int pipe, r;
+
+ adev->mes.hung_queue_db_array_size = MES11_HUNG_DB_OFFSET_ARRAY_SIZE;
+ adev->mes.hung_queue_hqd_info_offset = MES11_HUNG_HQD_INFO_OFFSET;
+
+ for (pipe = 0; pipe < AMDGPU_MAX_MES_PIPES; pipe++) {
+ if (!adev->enable_mes_kiq && pipe == AMDGPU_MES_KIQ_PIPE)
+ continue;
+ r = amdgpu_mes_init_microcode(adev, pipe);
+ if (r)
+ return r;
+ }
+
+ return 0;
+}
+
+static const struct amd_ip_funcs mes_v11_0_ip_funcs = {
+ .name = "mes_v11_0",
+ .early_init = mes_v11_0_early_init,
+ .late_init = NULL,
+ .sw_init = mes_v11_0_sw_init,
+ .sw_fini = mes_v11_0_sw_fini,
+ .hw_init = mes_v11_0_hw_init,
+ .hw_fini = mes_v11_0_hw_fini,
+ .suspend = mes_v11_0_suspend,
+ .resume = mes_v11_0_resume,
+};
+
+const struct amdgpu_ip_block_version mes_v11_0_ip_block = {
+ .type = AMD_IP_BLOCK_TYPE_MES,
+ .major = 11,
+ .minor = 0,
+ .rev = 0,
+ .funcs = &mes_v11_0_ip_funcs,
+};
diff --git a/drivers/gpu/drm/amd/amdgpu/mes_v11_0.h b/drivers/gpu/drm/amd/amdgpu/mes_v11_0.h
new file mode 100644
index 000000000000..b3519e1df2b2
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/mes_v11_0.h
@@ -0,0 +1,29 @@
+/*
+ * Copyright 2021 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#ifndef __MES_V11_0_H__
+#define __MES_V11_0_H__
+
+extern const struct amdgpu_ip_block_version mes_v11_0_ip_block;
+
+#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/mes_v12_0.c b/drivers/gpu/drm/amd/amdgpu/mes_v12_0.c
new file mode 100644
index 000000000000..744e95d3984a
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/mes_v12_0.c
@@ -0,0 +1,1942 @@
+/*
+ * Copyright 2023 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#include <linux/firmware.h>
+#include <linux/module.h>
+#include "amdgpu.h"
+#include "gfx_v12_0.h"
+#include "soc15_common.h"
+#include "soc21.h"
+#include "gc/gc_12_0_0_offset.h"
+#include "gc/gc_12_0_0_sh_mask.h"
+#include "gc/gc_11_0_0_default.h"
+#include "v12_structs.h"
+#include "mes_v12_api_def.h"
+
+MODULE_FIRMWARE("amdgpu/gc_12_0_0_mes.bin");
+MODULE_FIRMWARE("amdgpu/gc_12_0_0_mes1.bin");
+MODULE_FIRMWARE("amdgpu/gc_12_0_0_uni_mes.bin");
+MODULE_FIRMWARE("amdgpu/gc_12_0_1_mes.bin");
+MODULE_FIRMWARE("amdgpu/gc_12_0_1_mes1.bin");
+MODULE_FIRMWARE("amdgpu/gc_12_0_1_uni_mes.bin");
+
+static int mes_v12_0_hw_init(struct amdgpu_ip_block *ip_block);
+static int mes_v12_0_hw_fini(struct amdgpu_ip_block *ip_block);
+static int mes_v12_0_kiq_hw_init(struct amdgpu_device *adev);
+static int mes_v12_0_kiq_hw_fini(struct amdgpu_device *adev);
+
+#define MES_EOP_SIZE 2048
+
+#define MES12_HUNG_DB_OFFSET_ARRAY_SIZE 8 /* [0:3] = db offset [4:7] hqd info */
+#define MES12_HUNG_HQD_INFO_OFFSET 4
+
+static void mes_v12_0_ring_set_wptr(struct amdgpu_ring *ring)
+{
+ struct amdgpu_device *adev = ring->adev;
+
+ if (ring->use_doorbell) {
+ atomic64_set((atomic64_t *)ring->wptr_cpu_addr,
+ ring->wptr);
+ WDOORBELL64(ring->doorbell_index, ring->wptr);
+ } else {
+ BUG();
+ }
+}
+
+static u64 mes_v12_0_ring_get_rptr(struct amdgpu_ring *ring)
+{
+ return *ring->rptr_cpu_addr;
+}
+
+static u64 mes_v12_0_ring_get_wptr(struct amdgpu_ring *ring)
+{
+ u64 wptr;
+
+ if (ring->use_doorbell)
+ wptr = atomic64_read((atomic64_t *)ring->wptr_cpu_addr);
+ else
+ BUG();
+ return wptr;
+}
+
+static const struct amdgpu_ring_funcs mes_v12_0_ring_funcs = {
+ .type = AMDGPU_RING_TYPE_MES,
+ .align_mask = 1,
+ .nop = 0,
+ .support_64bit_ptrs = true,
+ .get_rptr = mes_v12_0_ring_get_rptr,
+ .get_wptr = mes_v12_0_ring_get_wptr,
+ .set_wptr = mes_v12_0_ring_set_wptr,
+ .insert_nop = amdgpu_ring_insert_nop,
+};
+
+static const char *mes_v12_0_opcodes[] = {
+ "SET_HW_RSRC",
+ "SET_SCHEDULING_CONFIG",
+ "ADD_QUEUE",
+ "REMOVE_QUEUE",
+ "PERFORM_YIELD",
+ "SET_GANG_PRIORITY_LEVEL",
+ "SUSPEND",
+ "RESUME",
+ "RESET",
+ "SET_LOG_BUFFER",
+ "CHANGE_GANG_PRORITY",
+ "QUERY_SCHEDULER_STATUS",
+ "unused",
+ "SET_DEBUG_VMID",
+ "MISC",
+ "UPDATE_ROOT_PAGE_TABLE",
+ "AMD_LOG",
+ "SET_SE_MODE",
+ "SET_GANG_SUBMIT",
+ "SET_HW_RSRC_1",
+ "INVALIDATE_TLBS",
+};
+
+static const char *mes_v12_0_misc_opcodes[] = {
+ "WRITE_REG",
+ "INV_GART",
+ "QUERY_STATUS",
+ "READ_REG",
+ "WAIT_REG_MEM",
+ "SET_SHADER_DEBUGGER",
+ "NOTIFY_WORK_ON_UNMAPPED_QUEUE",
+ "NOTIFY_TO_UNMAP_PROCESSES",
+};
+
+static const char *mes_v12_0_get_op_string(union MESAPI__MISC *x_pkt)
+{
+ const char *op_str = NULL;
+
+ if (x_pkt->header.opcode < ARRAY_SIZE(mes_v12_0_opcodes))
+ op_str = mes_v12_0_opcodes[x_pkt->header.opcode];
+
+ return op_str;
+}
+
+static const char *mes_v12_0_get_misc_op_string(union MESAPI__MISC *x_pkt)
+{
+ const char *op_str = NULL;
+
+ if ((x_pkt->header.opcode == MES_SCH_API_MISC) &&
+ (x_pkt->opcode < ARRAY_SIZE(mes_v12_0_misc_opcodes)))
+ op_str = mes_v12_0_misc_opcodes[x_pkt->opcode];
+
+ return op_str;
+}
+
+static int mes_v12_0_submit_pkt_and_poll_completion(struct amdgpu_mes *mes,
+ int pipe, void *pkt, int size,
+ int api_status_off)
+{
+ union MESAPI__QUERY_MES_STATUS mes_status_pkt;
+ signed long timeout = 2100000; /* 2100 ms */
+ struct amdgpu_device *adev = mes->adev;
+ struct amdgpu_ring *ring = &mes->ring[pipe];
+ spinlock_t *ring_lock = &mes->ring_lock[pipe];
+ struct MES_API_STATUS *api_status;
+ union MESAPI__MISC *x_pkt = pkt;
+ const char *op_str, *misc_op_str;
+ unsigned long flags;
+ u64 status_gpu_addr;
+ u32 seq, status_offset;
+ u64 *status_ptr;
+ signed long r;
+ int ret;
+
+ if (x_pkt->header.opcode >= MES_SCH_API_MAX)
+ return -EINVAL;
+
+ if (amdgpu_emu_mode) {
+ timeout *= 100;
+ } else if (amdgpu_sriov_vf(adev)) {
+ /* Worst case in sriov where all other 15 VF timeout, each VF needs about 600ms */
+ timeout = 15 * 600 * 1000;
+ }
+
+ ret = amdgpu_device_wb_get(adev, &status_offset);
+ if (ret)
+ return ret;
+
+ status_gpu_addr = adev->wb.gpu_addr + (status_offset * 4);
+ status_ptr = (u64 *)&adev->wb.wb[status_offset];
+ *status_ptr = 0;
+
+ spin_lock_irqsave(ring_lock, flags);
+ r = amdgpu_ring_alloc(ring, (size + sizeof(mes_status_pkt)) / 4);
+ if (r)
+ goto error_unlock_free;
+
+ seq = ++ring->fence_drv.sync_seq;
+ r = amdgpu_fence_wait_polling(ring,
+ seq - ring->fence_drv.num_fences_mask,
+ timeout);
+ if (r < 1)
+ goto error_undo;
+
+ api_status = (struct MES_API_STATUS *)((char *)pkt + api_status_off);
+ api_status->api_completion_fence_addr = status_gpu_addr;
+ api_status->api_completion_fence_value = 1;
+
+ amdgpu_ring_write_multiple(ring, pkt, size / 4);
+
+ memset(&mes_status_pkt, 0, sizeof(mes_status_pkt));
+ mes_status_pkt.header.type = MES_API_TYPE_SCHEDULER;
+ mes_status_pkt.header.opcode = MES_SCH_API_QUERY_SCHEDULER_STATUS;
+ mes_status_pkt.header.dwsize = API_FRAME_SIZE_IN_DWORDS;
+ mes_status_pkt.api_status.api_completion_fence_addr =
+ ring->fence_drv.gpu_addr;
+ mes_status_pkt.api_status.api_completion_fence_value = seq;
+
+ amdgpu_ring_write_multiple(ring, &mes_status_pkt,
+ sizeof(mes_status_pkt) / 4);
+
+ amdgpu_ring_commit(ring);
+ spin_unlock_irqrestore(ring_lock, flags);
+
+ op_str = mes_v12_0_get_op_string(x_pkt);
+ misc_op_str = mes_v12_0_get_misc_op_string(x_pkt);
+
+ if (misc_op_str)
+ dev_dbg(adev->dev, "MES(%d) msg=%s (%s) was emitted\n",
+ pipe, op_str, misc_op_str);
+ else if (op_str)
+ dev_dbg(adev->dev, "MES(%d) msg=%s was emitted\n",
+ pipe, op_str);
+ else
+ dev_dbg(adev->dev, "MES(%d) msg=%d was emitted\n",
+ pipe, x_pkt->header.opcode);
+
+ r = amdgpu_fence_wait_polling(ring, seq, timeout);
+
+ /*
+ * status_ptr[31:0] == 0 (fail) or status_ptr[63:0] == 1 (success).
+ * If status_ptr[31:0] == 0 then status_ptr[63:32] will have debug error information.
+ */
+ if (r < 1 || !(lower_32_bits(*status_ptr))) {
+
+ if (misc_op_str)
+ dev_err(adev->dev, "MES(%d) failed to respond to msg=%s (%s)\n",
+ pipe, op_str, misc_op_str);
+ else if (op_str)
+ dev_err(adev->dev, "MES(%d) failed to respond to msg=%s\n",
+ pipe, op_str);
+ else
+ dev_err(adev->dev, "MES(%d) failed to respond to msg=%d\n",
+ pipe, x_pkt->header.opcode);
+
+ while (halt_if_hws_hang)
+ schedule();
+
+ r = -ETIMEDOUT;
+ goto error_wb_free;
+ }
+
+ amdgpu_device_wb_free(adev, status_offset);
+ return 0;
+
+error_undo:
+ dev_err(adev->dev, "MES ring buffer is full.\n");
+ amdgpu_ring_undo(ring);
+
+error_unlock_free:
+ spin_unlock_irqrestore(ring_lock, flags);
+
+error_wb_free:
+ amdgpu_device_wb_free(adev, status_offset);
+ return r;
+}
+
+static int convert_to_mes_queue_type(int queue_type)
+{
+ if (queue_type == AMDGPU_RING_TYPE_GFX)
+ return MES_QUEUE_TYPE_GFX;
+ else if (queue_type == AMDGPU_RING_TYPE_COMPUTE)
+ return MES_QUEUE_TYPE_COMPUTE;
+ else if (queue_type == AMDGPU_RING_TYPE_SDMA)
+ return MES_QUEUE_TYPE_SDMA;
+ else if (queue_type == AMDGPU_RING_TYPE_MES)
+ return MES_QUEUE_TYPE_SCHQ;
+ else
+ BUG();
+ return -1;
+}
+
+static int convert_to_mes_priority_level(int priority_level)
+{
+ switch (priority_level) {
+ case AMDGPU_MES_PRIORITY_LEVEL_LOW:
+ return AMD_PRIORITY_LEVEL_LOW;
+ case AMDGPU_MES_PRIORITY_LEVEL_NORMAL:
+ default:
+ return AMD_PRIORITY_LEVEL_NORMAL;
+ case AMDGPU_MES_PRIORITY_LEVEL_MEDIUM:
+ return AMD_PRIORITY_LEVEL_MEDIUM;
+ case AMDGPU_MES_PRIORITY_LEVEL_HIGH:
+ return AMD_PRIORITY_LEVEL_HIGH;
+ case AMDGPU_MES_PRIORITY_LEVEL_REALTIME:
+ return AMD_PRIORITY_LEVEL_REALTIME;
+ }
+}
+
+static int mes_v12_0_add_hw_queue(struct amdgpu_mes *mes,
+ struct mes_add_queue_input *input)
+{
+ struct amdgpu_device *adev = mes->adev;
+ union MESAPI__ADD_QUEUE mes_add_queue_pkt;
+ struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_GFXHUB(0)];
+ uint32_t vm_cntx_cntl = hub->vm_cntx_cntl;
+
+ memset(&mes_add_queue_pkt, 0, sizeof(mes_add_queue_pkt));
+
+ mes_add_queue_pkt.header.type = MES_API_TYPE_SCHEDULER;
+ mes_add_queue_pkt.header.opcode = MES_SCH_API_ADD_QUEUE;
+ mes_add_queue_pkt.header.dwsize = API_FRAME_SIZE_IN_DWORDS;
+
+ mes_add_queue_pkt.process_id = input->process_id;
+ mes_add_queue_pkt.page_table_base_addr = input->page_table_base_addr;
+ mes_add_queue_pkt.process_va_start = input->process_va_start;
+ mes_add_queue_pkt.process_va_end = input->process_va_end;
+ mes_add_queue_pkt.process_quantum = input->process_quantum;
+ mes_add_queue_pkt.process_context_addr = input->process_context_addr;
+ mes_add_queue_pkt.gang_quantum = input->gang_quantum;
+ mes_add_queue_pkt.gang_context_addr = input->gang_context_addr;
+ mes_add_queue_pkt.inprocess_gang_priority =
+ convert_to_mes_priority_level(input->inprocess_gang_priority);
+ mes_add_queue_pkt.gang_global_priority_level =
+ convert_to_mes_priority_level(input->gang_global_priority_level);
+ mes_add_queue_pkt.doorbell_offset = input->doorbell_offset;
+ mes_add_queue_pkt.mqd_addr = input->mqd_addr;
+
+ mes_add_queue_pkt.wptr_addr = input->wptr_mc_addr;
+
+ mes_add_queue_pkt.queue_type =
+ convert_to_mes_queue_type(input->queue_type);
+ mes_add_queue_pkt.paging = input->paging;
+ mes_add_queue_pkt.vm_context_cntl = vm_cntx_cntl;
+ mes_add_queue_pkt.gws_base = input->gws_base;
+ mes_add_queue_pkt.gws_size = input->gws_size;
+ mes_add_queue_pkt.trap_handler_addr = input->tba_addr;
+ mes_add_queue_pkt.tma_addr = input->tma_addr;
+ mes_add_queue_pkt.trap_en = input->trap_en;
+ mes_add_queue_pkt.skip_process_ctx_clear = input->skip_process_ctx_clear;
+ mes_add_queue_pkt.is_kfd_process = input->is_kfd_process;
+
+ /* For KFD, gds_size is re-used for queue size (needed in MES for AQL queues) */
+ mes_add_queue_pkt.is_aql_queue = input->is_aql_queue;
+ mes_add_queue_pkt.gds_size = input->queue_size;
+
+ /* For KFD, gds_size is re-used for queue size (needed in MES for AQL queues) */
+ mes_add_queue_pkt.is_aql_queue = input->is_aql_queue;
+ mes_add_queue_pkt.gds_size = input->queue_size;
+
+ return mes_v12_0_submit_pkt_and_poll_completion(mes,
+ AMDGPU_MES_SCHED_PIPE,
+ &mes_add_queue_pkt, sizeof(mes_add_queue_pkt),
+ offsetof(union MESAPI__ADD_QUEUE, api_status));
+}
+
+static int mes_v12_0_remove_hw_queue(struct amdgpu_mes *mes,
+ struct mes_remove_queue_input *input)
+{
+ union MESAPI__REMOVE_QUEUE mes_remove_queue_pkt;
+ uint32_t mes_rev = mes->sched_version & AMDGPU_MES_VERSION_MASK;
+
+ memset(&mes_remove_queue_pkt, 0, sizeof(mes_remove_queue_pkt));
+
+ mes_remove_queue_pkt.header.type = MES_API_TYPE_SCHEDULER;
+ mes_remove_queue_pkt.header.opcode = MES_SCH_API_REMOVE_QUEUE;
+ mes_remove_queue_pkt.header.dwsize = API_FRAME_SIZE_IN_DWORDS;
+
+ mes_remove_queue_pkt.doorbell_offset = input->doorbell_offset;
+ mes_remove_queue_pkt.gang_context_addr = input->gang_context_addr;
+
+ if (mes_rev >= 0x5a)
+ mes_remove_queue_pkt.remove_queue_after_reset = input->remove_queue_after_reset;
+
+ return mes_v12_0_submit_pkt_and_poll_completion(mes,
+ AMDGPU_MES_SCHED_PIPE,
+ &mes_remove_queue_pkt, sizeof(mes_remove_queue_pkt),
+ offsetof(union MESAPI__REMOVE_QUEUE, api_status));
+}
+
+int gfx_v12_0_request_gfx_index_mutex(struct amdgpu_device *adev,
+ bool req)
+{
+ u32 i, tmp, val;
+
+ for (i = 0; i < adev->usec_timeout; i++) {
+ /* Request with MeId=2, PipeId=0 */
+ tmp = REG_SET_FIELD(0, CP_GFX_INDEX_MUTEX, REQUEST, req);
+ tmp = REG_SET_FIELD(tmp, CP_GFX_INDEX_MUTEX, CLIENTID, 4);
+ WREG32_SOC15(GC, 0, regCP_GFX_INDEX_MUTEX, tmp);
+
+ val = RREG32_SOC15(GC, 0, regCP_GFX_INDEX_MUTEX);
+ if (req) {
+ if (val == tmp)
+ break;
+ } else {
+ tmp = REG_SET_FIELD(tmp, CP_GFX_INDEX_MUTEX,
+ REQUEST, 1);
+
+ /* unlocked or locked by firmware */
+ if (val != tmp)
+ break;
+ }
+ udelay(1);
+ }
+
+ if (i >= adev->usec_timeout)
+ return -EINVAL;
+
+ return 0;
+}
+
+static int mes_v12_0_reset_queue_mmio(struct amdgpu_mes *mes, uint32_t queue_type,
+ uint32_t me_id, uint32_t pipe_id,
+ uint32_t queue_id, uint32_t vmid)
+{
+ struct amdgpu_device *adev = mes->adev;
+ uint32_t value, reg;
+ int i, r = 0;
+
+ amdgpu_gfx_rlc_enter_safe_mode(adev, 0);
+
+ if (queue_type == AMDGPU_RING_TYPE_GFX) {
+ dev_info(adev->dev, "reset gfx queue (%d:%d:%d: vmid:%d)\n",
+ me_id, pipe_id, queue_id, vmid);
+
+ mutex_lock(&adev->gfx.reset_sem_mutex);
+ gfx_v12_0_request_gfx_index_mutex(adev, true);
+ /* all se allow writes */
+ WREG32_SOC15(GC, 0, regGRBM_GFX_INDEX,
+ (uint32_t)(0x1 << GRBM_GFX_INDEX__SE_BROADCAST_WRITES__SHIFT));
+ value = REG_SET_FIELD(0, CP_VMID_RESET, RESET_REQUEST, 1 << vmid);
+ if (pipe_id == 0)
+ value = REG_SET_FIELD(value, CP_VMID_RESET, PIPE0_QUEUES, 1 << queue_id);
+ else
+ value = REG_SET_FIELD(value, CP_VMID_RESET, PIPE1_QUEUES, 1 << queue_id);
+ WREG32_SOC15(GC, 0, regCP_VMID_RESET, value);
+ gfx_v12_0_request_gfx_index_mutex(adev, false);
+ mutex_unlock(&adev->gfx.reset_sem_mutex);
+
+ mutex_lock(&adev->srbm_mutex);
+ soc21_grbm_select(adev, me_id, pipe_id, queue_id, 0);
+ /* wait till dequeue take effects */
+ for (i = 0; i < adev->usec_timeout; i++) {
+ if (!(RREG32_SOC15(GC, 0, regCP_GFX_HQD_ACTIVE) & 1))
+ break;
+ udelay(1);
+ }
+ if (i >= adev->usec_timeout) {
+ dev_err(adev->dev, "failed to wait on gfx hqd deactivate\n");
+ r = -ETIMEDOUT;
+ }
+
+ soc21_grbm_select(adev, 0, 0, 0, 0);
+ mutex_unlock(&adev->srbm_mutex);
+ } else if (queue_type == AMDGPU_RING_TYPE_COMPUTE) {
+ dev_info(adev->dev, "reset compute queue (%d:%d:%d)\n",
+ me_id, pipe_id, queue_id);
+ mutex_lock(&adev->srbm_mutex);
+ soc21_grbm_select(adev, me_id, pipe_id, queue_id, 0);
+ WREG32_SOC15(GC, 0, regCP_HQD_DEQUEUE_REQUEST, 0x2);
+ WREG32_SOC15(GC, 0, regSPI_COMPUTE_QUEUE_RESET, 0x1);
+
+ /* wait till dequeue take effects */
+ for (i = 0; i < adev->usec_timeout; i++) {
+ if (!(RREG32_SOC15(GC, 0, regCP_HQD_ACTIVE) & 1))
+ break;
+ udelay(1);
+ }
+ if (i >= adev->usec_timeout) {
+ dev_err(adev->dev, "failed to wait on hqd deactivate\n");
+ r = -ETIMEDOUT;
+ }
+ soc21_grbm_select(adev, 0, 0, 0, 0);
+ mutex_unlock(&adev->srbm_mutex);
+ } else if (queue_type == AMDGPU_RING_TYPE_SDMA) {
+ dev_info(adev->dev, "reset sdma queue (%d:%d:%d)\n",
+ me_id, pipe_id, queue_id);
+ switch (me_id) {
+ case 1:
+ reg = SOC15_REG_OFFSET(GC, 0, regSDMA1_QUEUE_RESET_REQ);
+ break;
+ case 0:
+ default:
+ reg = SOC15_REG_OFFSET(GC, 0, regSDMA0_QUEUE_RESET_REQ);
+ break;
+ }
+
+ value = 1 << queue_id;
+ WREG32(reg, value);
+ /* wait for queue reset done */
+ for (i = 0; i < adev->usec_timeout; i++) {
+ if (!(RREG32(reg) & value))
+ break;
+ udelay(1);
+ }
+ if (i >= adev->usec_timeout) {
+ dev_err(adev->dev, "failed to wait on sdma queue reset done\n");
+ r = -ETIMEDOUT;
+ }
+ }
+
+ amdgpu_gfx_rlc_exit_safe_mode(adev, 0);
+ return r;
+}
+
+static int mes_v12_0_map_legacy_queue(struct amdgpu_mes *mes,
+ struct mes_map_legacy_queue_input *input)
+{
+ union MESAPI__ADD_QUEUE mes_add_queue_pkt;
+ int pipe;
+
+ memset(&mes_add_queue_pkt, 0, sizeof(mes_add_queue_pkt));
+
+ mes_add_queue_pkt.header.type = MES_API_TYPE_SCHEDULER;
+ mes_add_queue_pkt.header.opcode = MES_SCH_API_ADD_QUEUE;
+ mes_add_queue_pkt.header.dwsize = API_FRAME_SIZE_IN_DWORDS;
+
+ mes_add_queue_pkt.pipe_id = input->pipe_id;
+ mes_add_queue_pkt.queue_id = input->queue_id;
+ mes_add_queue_pkt.doorbell_offset = input->doorbell_offset;
+ mes_add_queue_pkt.mqd_addr = input->mqd_addr;
+ mes_add_queue_pkt.wptr_addr = input->wptr_addr;
+ mes_add_queue_pkt.queue_type =
+ convert_to_mes_queue_type(input->queue_type);
+ mes_add_queue_pkt.map_legacy_kq = 1;
+
+ if (mes->adev->enable_uni_mes)
+ pipe = AMDGPU_MES_KIQ_PIPE;
+ else
+ pipe = AMDGPU_MES_SCHED_PIPE;
+
+ return mes_v12_0_submit_pkt_and_poll_completion(mes, pipe,
+ &mes_add_queue_pkt, sizeof(mes_add_queue_pkt),
+ offsetof(union MESAPI__ADD_QUEUE, api_status));
+}
+
+static int mes_v12_0_unmap_legacy_queue(struct amdgpu_mes *mes,
+ struct mes_unmap_legacy_queue_input *input)
+{
+ union MESAPI__REMOVE_QUEUE mes_remove_queue_pkt;
+ int pipe;
+
+ memset(&mes_remove_queue_pkt, 0, sizeof(mes_remove_queue_pkt));
+
+ mes_remove_queue_pkt.header.type = MES_API_TYPE_SCHEDULER;
+ mes_remove_queue_pkt.header.opcode = MES_SCH_API_REMOVE_QUEUE;
+ mes_remove_queue_pkt.header.dwsize = API_FRAME_SIZE_IN_DWORDS;
+
+ mes_remove_queue_pkt.doorbell_offset = input->doorbell_offset;
+ mes_remove_queue_pkt.gang_context_addr = 0;
+
+ mes_remove_queue_pkt.pipe_id = input->pipe_id;
+ mes_remove_queue_pkt.queue_id = input->queue_id;
+
+ if (input->action == PREEMPT_QUEUES_NO_UNMAP) {
+ mes_remove_queue_pkt.preempt_legacy_gfx_queue = 1;
+ mes_remove_queue_pkt.tf_addr = input->trail_fence_addr;
+ mes_remove_queue_pkt.tf_data =
+ lower_32_bits(input->trail_fence_data);
+ } else {
+ mes_remove_queue_pkt.unmap_legacy_queue = 1;
+ mes_remove_queue_pkt.queue_type =
+ convert_to_mes_queue_type(input->queue_type);
+ }
+
+ if (mes->adev->enable_uni_mes)
+ pipe = AMDGPU_MES_KIQ_PIPE;
+ else
+ pipe = AMDGPU_MES_SCHED_PIPE;
+
+ return mes_v12_0_submit_pkt_and_poll_completion(mes, pipe,
+ &mes_remove_queue_pkt, sizeof(mes_remove_queue_pkt),
+ offsetof(union MESAPI__REMOVE_QUEUE, api_status));
+}
+
+static int mes_v12_0_suspend_gang(struct amdgpu_mes *mes,
+ struct mes_suspend_gang_input *input)
+{
+ union MESAPI__SUSPEND mes_suspend_gang_pkt;
+
+ memset(&mes_suspend_gang_pkt, 0, sizeof(mes_suspend_gang_pkt));
+
+ mes_suspend_gang_pkt.header.type = MES_API_TYPE_SCHEDULER;
+ mes_suspend_gang_pkt.header.opcode = MES_SCH_API_SUSPEND;
+ mes_suspend_gang_pkt.header.dwsize = API_FRAME_SIZE_IN_DWORDS;
+
+ mes_suspend_gang_pkt.suspend_all_gangs = input->suspend_all_gangs;
+ mes_suspend_gang_pkt.gang_context_addr = input->gang_context_addr;
+ mes_suspend_gang_pkt.suspend_fence_addr = input->suspend_fence_addr;
+ mes_suspend_gang_pkt.suspend_fence_value = input->suspend_fence_value;
+
+ return mes_v12_0_submit_pkt_and_poll_completion(mes, AMDGPU_MES_SCHED_PIPE,
+ &mes_suspend_gang_pkt, sizeof(mes_suspend_gang_pkt),
+ offsetof(union MESAPI__SUSPEND, api_status));
+}
+
+static int mes_v12_0_resume_gang(struct amdgpu_mes *mes,
+ struct mes_resume_gang_input *input)
+{
+ union MESAPI__RESUME mes_resume_gang_pkt;
+
+ memset(&mes_resume_gang_pkt, 0, sizeof(mes_resume_gang_pkt));
+
+ mes_resume_gang_pkt.header.type = MES_API_TYPE_SCHEDULER;
+ mes_resume_gang_pkt.header.opcode = MES_SCH_API_RESUME;
+ mes_resume_gang_pkt.header.dwsize = API_FRAME_SIZE_IN_DWORDS;
+
+ mes_resume_gang_pkt.resume_all_gangs = input->resume_all_gangs;
+ mes_resume_gang_pkt.gang_context_addr = input->gang_context_addr;
+
+ return mes_v12_0_submit_pkt_and_poll_completion(mes, AMDGPU_MES_SCHED_PIPE,
+ &mes_resume_gang_pkt, sizeof(mes_resume_gang_pkt),
+ offsetof(union MESAPI__RESUME, api_status));
+}
+
+static int mes_v12_0_query_sched_status(struct amdgpu_mes *mes, int pipe)
+{
+ union MESAPI__QUERY_MES_STATUS mes_status_pkt;
+
+ memset(&mes_status_pkt, 0, sizeof(mes_status_pkt));
+
+ mes_status_pkt.header.type = MES_API_TYPE_SCHEDULER;
+ mes_status_pkt.header.opcode = MES_SCH_API_QUERY_SCHEDULER_STATUS;
+ mes_status_pkt.header.dwsize = API_FRAME_SIZE_IN_DWORDS;
+
+ return mes_v12_0_submit_pkt_and_poll_completion(mes, pipe,
+ &mes_status_pkt, sizeof(mes_status_pkt),
+ offsetof(union MESAPI__QUERY_MES_STATUS, api_status));
+}
+
+static int mes_v12_0_misc_op(struct amdgpu_mes *mes,
+ struct mes_misc_op_input *input)
+{
+ union MESAPI__MISC misc_pkt;
+ int pipe;
+
+ if (mes->adev->enable_uni_mes)
+ pipe = AMDGPU_MES_KIQ_PIPE;
+ else
+ pipe = AMDGPU_MES_SCHED_PIPE;
+
+ memset(&misc_pkt, 0, sizeof(misc_pkt));
+
+ misc_pkt.header.type = MES_API_TYPE_SCHEDULER;
+ misc_pkt.header.opcode = MES_SCH_API_MISC;
+ misc_pkt.header.dwsize = API_FRAME_SIZE_IN_DWORDS;
+
+ switch (input->op) {
+ case MES_MISC_OP_READ_REG:
+ misc_pkt.opcode = MESAPI_MISC__READ_REG;
+ misc_pkt.read_reg.reg_offset = input->read_reg.reg_offset;
+ misc_pkt.read_reg.buffer_addr = input->read_reg.buffer_addr;
+ break;
+ case MES_MISC_OP_WRITE_REG:
+ misc_pkt.opcode = MESAPI_MISC__WRITE_REG;
+ misc_pkt.write_reg.reg_offset = input->write_reg.reg_offset;
+ misc_pkt.write_reg.reg_value = input->write_reg.reg_value;
+ break;
+ case MES_MISC_OP_WRM_REG_WAIT:
+ misc_pkt.opcode = MESAPI_MISC__WAIT_REG_MEM;
+ misc_pkt.wait_reg_mem.op = WRM_OPERATION__WAIT_REG_MEM;
+ misc_pkt.wait_reg_mem.reference = input->wrm_reg.ref;
+ misc_pkt.wait_reg_mem.mask = input->wrm_reg.mask;
+ misc_pkt.wait_reg_mem.reg_offset1 = input->wrm_reg.reg0;
+ misc_pkt.wait_reg_mem.reg_offset2 = 0;
+ break;
+ case MES_MISC_OP_WRM_REG_WR_WAIT:
+ misc_pkt.opcode = MESAPI_MISC__WAIT_REG_MEM;
+ misc_pkt.wait_reg_mem.op = WRM_OPERATION__WR_WAIT_WR_REG;
+ misc_pkt.wait_reg_mem.reference = input->wrm_reg.ref;
+ misc_pkt.wait_reg_mem.mask = input->wrm_reg.mask;
+ misc_pkt.wait_reg_mem.reg_offset1 = input->wrm_reg.reg0;
+ misc_pkt.wait_reg_mem.reg_offset2 = input->wrm_reg.reg1;
+ break;
+ case MES_MISC_OP_SET_SHADER_DEBUGGER:
+ pipe = AMDGPU_MES_SCHED_PIPE;
+ misc_pkt.opcode = MESAPI_MISC__SET_SHADER_DEBUGGER;
+ misc_pkt.set_shader_debugger.process_context_addr =
+ input->set_shader_debugger.process_context_addr;
+ misc_pkt.set_shader_debugger.flags.u32all =
+ input->set_shader_debugger.flags.u32all;
+ misc_pkt.set_shader_debugger.spi_gdbg_per_vmid_cntl =
+ input->set_shader_debugger.spi_gdbg_per_vmid_cntl;
+ memcpy(misc_pkt.set_shader_debugger.tcp_watch_cntl,
+ input->set_shader_debugger.tcp_watch_cntl,
+ sizeof(misc_pkt.set_shader_debugger.tcp_watch_cntl));
+ misc_pkt.set_shader_debugger.trap_en = input->set_shader_debugger.trap_en;
+ break;
+ case MES_MISC_OP_CHANGE_CONFIG:
+ misc_pkt.opcode = MESAPI_MISC__CHANGE_CONFIG;
+ misc_pkt.change_config.opcode =
+ MESAPI_MISC__CHANGE_CONFIG_OPTION_LIMIT_SINGLE_PROCESS;
+ misc_pkt.change_config.option.bits.limit_single_process =
+ input->change_config.option.limit_single_process;
+ break;
+
+ default:
+ DRM_ERROR("unsupported misc op (%d) \n", input->op);
+ return -EINVAL;
+ }
+
+ return mes_v12_0_submit_pkt_and_poll_completion(mes, pipe,
+ &misc_pkt, sizeof(misc_pkt),
+ offsetof(union MESAPI__MISC, api_status));
+}
+
+static int mes_v12_0_set_hw_resources_1(struct amdgpu_mes *mes, int pipe)
+{
+ union MESAPI_SET_HW_RESOURCES_1 mes_set_hw_res_1_pkt;
+
+ memset(&mes_set_hw_res_1_pkt, 0, sizeof(mes_set_hw_res_1_pkt));
+
+ mes_set_hw_res_1_pkt.header.type = MES_API_TYPE_SCHEDULER;
+ mes_set_hw_res_1_pkt.header.opcode = MES_SCH_API_SET_HW_RSRC_1;
+ mes_set_hw_res_1_pkt.header.dwsize = API_FRAME_SIZE_IN_DWORDS;
+ mes_set_hw_res_1_pkt.mes_kiq_unmap_timeout = 0xa;
+ mes_set_hw_res_1_pkt.cleaner_shader_fence_mc_addr =
+ mes->resource_1_gpu_addr[pipe];
+
+ return mes_v12_0_submit_pkt_and_poll_completion(mes, pipe,
+ &mes_set_hw_res_1_pkt, sizeof(mes_set_hw_res_1_pkt),
+ offsetof(union MESAPI_SET_HW_RESOURCES_1, api_status));
+}
+
+static int mes_v12_0_set_hw_resources(struct amdgpu_mes *mes, int pipe)
+{
+ int i;
+ struct amdgpu_device *adev = mes->adev;
+ union MESAPI_SET_HW_RESOURCES mes_set_hw_res_pkt;
+
+ memset(&mes_set_hw_res_pkt, 0, sizeof(mes_set_hw_res_pkt));
+
+ mes_set_hw_res_pkt.header.type = MES_API_TYPE_SCHEDULER;
+ mes_set_hw_res_pkt.header.opcode = MES_SCH_API_SET_HW_RSRC;
+ mes_set_hw_res_pkt.header.dwsize = API_FRAME_SIZE_IN_DWORDS;
+
+ if (pipe == AMDGPU_MES_SCHED_PIPE) {
+ mes_set_hw_res_pkt.vmid_mask_mmhub = mes->vmid_mask_mmhub;
+ mes_set_hw_res_pkt.vmid_mask_gfxhub = mes->vmid_mask_gfxhub;
+ mes_set_hw_res_pkt.gds_size = adev->gds.gds_size;
+ mes_set_hw_res_pkt.paging_vmid = 0;
+
+ for (i = 0; i < MAX_COMPUTE_PIPES; i++)
+ mes_set_hw_res_pkt.compute_hqd_mask[i] =
+ mes->compute_hqd_mask[i];
+
+ for (i = 0; i < MAX_GFX_PIPES; i++)
+ mes_set_hw_res_pkt.gfx_hqd_mask[i] =
+ mes->gfx_hqd_mask[i];
+
+ for (i = 0; i < MAX_SDMA_PIPES; i++)
+ mes_set_hw_res_pkt.sdma_hqd_mask[i] =
+ mes->sdma_hqd_mask[i];
+
+ for (i = 0; i < AMD_PRIORITY_NUM_LEVELS; i++)
+ mes_set_hw_res_pkt.aggregated_doorbells[i] =
+ mes->aggregated_doorbells[i];
+ }
+
+ mes_set_hw_res_pkt.g_sch_ctx_gpu_mc_ptr =
+ mes->sch_ctx_gpu_addr[pipe];
+ mes_set_hw_res_pkt.query_status_fence_gpu_mc_ptr =
+ mes->query_status_fence_gpu_addr[pipe];
+
+ for (i = 0; i < 5; i++) {
+ mes_set_hw_res_pkt.gc_base[i] = adev->reg_offset[GC_HWIP][0][i];
+ mes_set_hw_res_pkt.mmhub_base[i] =
+ adev->reg_offset[MMHUB_HWIP][0][i];
+ mes_set_hw_res_pkt.osssys_base[i] =
+ adev->reg_offset[OSSSYS_HWIP][0][i];
+ }
+
+ mes_set_hw_res_pkt.disable_reset = 1;
+ mes_set_hw_res_pkt.disable_mes_log = 1;
+ mes_set_hw_res_pkt.use_different_vmid_compute = 1;
+ mes_set_hw_res_pkt.enable_reg_active_poll = 1;
+ mes_set_hw_res_pkt.enable_level_process_quantum_check = 1;
+ if ((mes->adev->mes.sched_version & AMDGPU_MES_VERSION_MASK) >= 0x82)
+ mes_set_hw_res_pkt.enable_lr_compute_wa = 1;
+ else
+ dev_info_once(adev->dev,
+ "MES FW version must be >= 0x82 to enable LR compute workaround.\n");
+
+ /*
+ * Keep oversubscribe timer for sdma . When we have unmapped doorbell
+ * handling support, other queue will not use the oversubscribe timer.
+ * handling mode - 0: disabled; 1: basic version; 2: basic+ version
+ */
+ mes_set_hw_res_pkt.oversubscription_timer = 50;
+ mes_set_hw_res_pkt.unmapped_doorbell_handling = 1;
+
+ if (amdgpu_mes_log_enable) {
+ mes_set_hw_res_pkt.enable_mes_event_int_logging = 1;
+ mes_set_hw_res_pkt.event_intr_history_gpu_mc_ptr = mes->event_log_gpu_addr +
+ pipe * (AMDGPU_MES_LOG_BUFFER_SIZE + AMDGPU_MES_MSCRATCH_SIZE);
+ }
+
+ if (adev->enforce_isolation[0] == AMDGPU_ENFORCE_ISOLATION_ENABLE)
+ mes_set_hw_res_pkt.limit_single_process = 1;
+
+ return mes_v12_0_submit_pkt_and_poll_completion(mes, pipe,
+ &mes_set_hw_res_pkt, sizeof(mes_set_hw_res_pkt),
+ offsetof(union MESAPI_SET_HW_RESOURCES, api_status));
+}
+
+static void mes_v12_0_init_aggregated_doorbell(struct amdgpu_mes *mes)
+{
+ struct amdgpu_device *adev = mes->adev;
+ uint32_t data;
+
+ data = RREG32_SOC15(GC, 0, regCP_MES_DOORBELL_CONTROL1);
+ data &= ~(CP_MES_DOORBELL_CONTROL1__DOORBELL_OFFSET_MASK |
+ CP_MES_DOORBELL_CONTROL1__DOORBELL_EN_MASK |
+ CP_MES_DOORBELL_CONTROL1__DOORBELL_HIT_MASK);
+ data |= mes->aggregated_doorbells[AMDGPU_MES_PRIORITY_LEVEL_LOW] <<
+ CP_MES_DOORBELL_CONTROL1__DOORBELL_OFFSET__SHIFT;
+ data |= 1 << CP_MES_DOORBELL_CONTROL1__DOORBELL_EN__SHIFT;
+ WREG32_SOC15(GC, 0, regCP_MES_DOORBELL_CONTROL1, data);
+
+ data = RREG32_SOC15(GC, 0, regCP_MES_DOORBELL_CONTROL2);
+ data &= ~(CP_MES_DOORBELL_CONTROL2__DOORBELL_OFFSET_MASK |
+ CP_MES_DOORBELL_CONTROL2__DOORBELL_EN_MASK |
+ CP_MES_DOORBELL_CONTROL2__DOORBELL_HIT_MASK);
+ data |= mes->aggregated_doorbells[AMDGPU_MES_PRIORITY_LEVEL_NORMAL] <<
+ CP_MES_DOORBELL_CONTROL2__DOORBELL_OFFSET__SHIFT;
+ data |= 1 << CP_MES_DOORBELL_CONTROL2__DOORBELL_EN__SHIFT;
+ WREG32_SOC15(GC, 0, regCP_MES_DOORBELL_CONTROL2, data);
+
+ data = RREG32_SOC15(GC, 0, regCP_MES_DOORBELL_CONTROL3);
+ data &= ~(CP_MES_DOORBELL_CONTROL3__DOORBELL_OFFSET_MASK |
+ CP_MES_DOORBELL_CONTROL3__DOORBELL_EN_MASK |
+ CP_MES_DOORBELL_CONTROL3__DOORBELL_HIT_MASK);
+ data |= mes->aggregated_doorbells[AMDGPU_MES_PRIORITY_LEVEL_MEDIUM] <<
+ CP_MES_DOORBELL_CONTROL3__DOORBELL_OFFSET__SHIFT;
+ data |= 1 << CP_MES_DOORBELL_CONTROL3__DOORBELL_EN__SHIFT;
+ WREG32_SOC15(GC, 0, regCP_MES_DOORBELL_CONTROL3, data);
+
+ data = RREG32_SOC15(GC, 0, regCP_MES_DOORBELL_CONTROL4);
+ data &= ~(CP_MES_DOORBELL_CONTROL4__DOORBELL_OFFSET_MASK |
+ CP_MES_DOORBELL_CONTROL4__DOORBELL_EN_MASK |
+ CP_MES_DOORBELL_CONTROL4__DOORBELL_HIT_MASK);
+ data |= mes->aggregated_doorbells[AMDGPU_MES_PRIORITY_LEVEL_HIGH] <<
+ CP_MES_DOORBELL_CONTROL4__DOORBELL_OFFSET__SHIFT;
+ data |= 1 << CP_MES_DOORBELL_CONTROL4__DOORBELL_EN__SHIFT;
+ WREG32_SOC15(GC, 0, regCP_MES_DOORBELL_CONTROL4, data);
+
+ data = RREG32_SOC15(GC, 0, regCP_MES_DOORBELL_CONTROL5);
+ data &= ~(CP_MES_DOORBELL_CONTROL5__DOORBELL_OFFSET_MASK |
+ CP_MES_DOORBELL_CONTROL5__DOORBELL_EN_MASK |
+ CP_MES_DOORBELL_CONTROL5__DOORBELL_HIT_MASK);
+ data |= mes->aggregated_doorbells[AMDGPU_MES_PRIORITY_LEVEL_REALTIME] <<
+ CP_MES_DOORBELL_CONTROL5__DOORBELL_OFFSET__SHIFT;
+ data |= 1 << CP_MES_DOORBELL_CONTROL5__DOORBELL_EN__SHIFT;
+ WREG32_SOC15(GC, 0, regCP_MES_DOORBELL_CONTROL5, data);
+
+ data = 1 << CP_HQD_GFX_CONTROL__DB_UPDATED_MSG_EN__SHIFT;
+ WREG32_SOC15(GC, 0, regCP_HQD_GFX_CONTROL, data);
+}
+
+
+static void mes_v12_0_enable_unmapped_doorbell_handling(
+ struct amdgpu_mes *mes, bool enable)
+{
+ struct amdgpu_device *adev = mes->adev;
+ uint32_t data = RREG32_SOC15(GC, 0, regCP_UNMAPPED_DOORBELL);
+
+ /*
+ * The default PROC_LSB settng is 0xc which means doorbell
+ * addr[16:12] gives the doorbell page number. For kfd, each
+ * process will use 2 pages of doorbell, we need to change the
+ * setting to 0xd
+ */
+ data &= ~CP_UNMAPPED_DOORBELL__PROC_LSB_MASK;
+ data |= 0xd << CP_UNMAPPED_DOORBELL__PROC_LSB__SHIFT;
+
+ data |= (enable ? 1 : 0) << CP_UNMAPPED_DOORBELL__ENABLE__SHIFT;
+
+ WREG32_SOC15(GC, 0, regCP_UNMAPPED_DOORBELL, data);
+}
+
+static int mes_v12_0_reset_hw_queue(struct amdgpu_mes *mes,
+ struct mes_reset_queue_input *input)
+{
+ union MESAPI__RESET mes_reset_queue_pkt;
+ int pipe;
+
+ if (input->use_mmio)
+ return mes_v12_0_reset_queue_mmio(mes, input->queue_type,
+ input->me_id, input->pipe_id,
+ input->queue_id, input->vmid);
+
+ memset(&mes_reset_queue_pkt, 0, sizeof(mes_reset_queue_pkt));
+
+ mes_reset_queue_pkt.header.type = MES_API_TYPE_SCHEDULER;
+ mes_reset_queue_pkt.header.opcode = MES_SCH_API_RESET;
+ mes_reset_queue_pkt.header.dwsize = API_FRAME_SIZE_IN_DWORDS;
+
+ mes_reset_queue_pkt.queue_type =
+ convert_to_mes_queue_type(input->queue_type);
+
+ if (input->legacy_gfx) {
+ mes_reset_queue_pkt.reset_legacy_gfx = 1;
+ mes_reset_queue_pkt.pipe_id_lp = input->pipe_id;
+ mes_reset_queue_pkt.queue_id_lp = input->queue_id;
+ mes_reset_queue_pkt.mqd_mc_addr_lp = input->mqd_addr;
+ mes_reset_queue_pkt.doorbell_offset_lp = input->doorbell_offset;
+ mes_reset_queue_pkt.wptr_addr_lp = input->wptr_addr;
+ mes_reset_queue_pkt.vmid_id_lp = input->vmid;
+ } else {
+ mes_reset_queue_pkt.reset_queue_only = 1;
+ mes_reset_queue_pkt.doorbell_offset = input->doorbell_offset;
+ }
+
+ if (input->is_kq)
+ pipe = AMDGPU_MES_KIQ_PIPE;
+ else
+ pipe = AMDGPU_MES_SCHED_PIPE;
+
+ return mes_v12_0_submit_pkt_and_poll_completion(mes, pipe,
+ &mes_reset_queue_pkt, sizeof(mes_reset_queue_pkt),
+ offsetof(union MESAPI__RESET, api_status));
+}
+
+static int mes_v12_0_detect_and_reset_hung_queues(struct amdgpu_mes *mes,
+ struct mes_detect_and_reset_queue_input *input)
+{
+ union MESAPI__RESET mes_reset_queue_pkt;
+
+ memset(&mes_reset_queue_pkt, 0, sizeof(mes_reset_queue_pkt));
+
+ mes_reset_queue_pkt.header.type = MES_API_TYPE_SCHEDULER;
+ mes_reset_queue_pkt.header.opcode = MES_SCH_API_RESET;
+ mes_reset_queue_pkt.header.dwsize = API_FRAME_SIZE_IN_DWORDS;
+
+ mes_reset_queue_pkt.queue_type =
+ convert_to_mes_queue_type(input->queue_type);
+ mes_reset_queue_pkt.doorbell_offset_addr =
+ mes->hung_queue_db_array_gpu_addr;
+
+ if (input->detect_only)
+ mes_reset_queue_pkt.hang_detect_only = 1;
+ else
+ mes_reset_queue_pkt.hang_detect_then_reset = 1;
+
+ return mes_v12_0_submit_pkt_and_poll_completion(mes, AMDGPU_MES_SCHED_PIPE,
+ &mes_reset_queue_pkt, sizeof(mes_reset_queue_pkt),
+ offsetof(union MESAPI__RESET, api_status));
+}
+
+static int mes_v12_inv_tlb_convert_hub_id(uint8_t id)
+{
+ /*
+ * MES doesn't support invalidate gc_hub on slave xcc individually
+ * master xcc will invalidate all gc_hub for the partition
+ */
+ if (AMDGPU_IS_GFXHUB(id))
+ return 0;
+ else if (AMDGPU_IS_MMHUB0(id))
+ return 1;
+ else
+ return -EINVAL;
+
+}
+
+static int mes_v12_0_inv_tlbs_pasid(struct amdgpu_mes *mes,
+ struct mes_inv_tlbs_pasid_input *input)
+{
+ union MESAPI__INV_TLBS mes_inv_tlbs;
+ int ret;
+
+ memset(&mes_inv_tlbs, 0, sizeof(mes_inv_tlbs));
+
+ mes_inv_tlbs.header.type = MES_API_TYPE_SCHEDULER;
+ mes_inv_tlbs.header.opcode = MES_SCH_API_INV_TLBS;
+ mes_inv_tlbs.header.dwsize = API_FRAME_SIZE_IN_DWORDS;
+
+ mes_inv_tlbs.invalidate_tlbs.inv_sel = 0;
+ mes_inv_tlbs.invalidate_tlbs.flush_type = input->flush_type;
+ mes_inv_tlbs.invalidate_tlbs.inv_sel_id = input->pasid;
+
+ /*convert amdgpu_mes_hub_id to mes expected hub_id */
+ ret = mes_v12_inv_tlb_convert_hub_id(input->hub_id);
+ if (ret < 0)
+ return -EINVAL;
+ mes_inv_tlbs.invalidate_tlbs.hub_id = ret;
+ return mes_v12_0_submit_pkt_and_poll_completion(mes, AMDGPU_MES_KIQ_PIPE,
+ &mes_inv_tlbs, sizeof(mes_inv_tlbs),
+ offsetof(union MESAPI__INV_TLBS, api_status));
+
+}
+
+static const struct amdgpu_mes_funcs mes_v12_0_funcs = {
+ .add_hw_queue = mes_v12_0_add_hw_queue,
+ .remove_hw_queue = mes_v12_0_remove_hw_queue,
+ .map_legacy_queue = mes_v12_0_map_legacy_queue,
+ .unmap_legacy_queue = mes_v12_0_unmap_legacy_queue,
+ .suspend_gang = mes_v12_0_suspend_gang,
+ .resume_gang = mes_v12_0_resume_gang,
+ .misc_op = mes_v12_0_misc_op,
+ .reset_hw_queue = mes_v12_0_reset_hw_queue,
+ .invalidate_tlbs_pasid = mes_v12_0_inv_tlbs_pasid,
+ .detect_and_reset_hung_queues = mes_v12_0_detect_and_reset_hung_queues,
+};
+
+static int mes_v12_0_allocate_ucode_buffer(struct amdgpu_device *adev,
+ enum amdgpu_mes_pipe pipe)
+{
+ int r;
+ const struct mes_firmware_header_v1_0 *mes_hdr;
+ const __le32 *fw_data;
+ unsigned fw_size;
+
+ mes_hdr = (const struct mes_firmware_header_v1_0 *)
+ adev->mes.fw[pipe]->data;
+
+ fw_data = (const __le32 *)(adev->mes.fw[pipe]->data +
+ le32_to_cpu(mes_hdr->mes_ucode_offset_bytes));
+ fw_size = le32_to_cpu(mes_hdr->mes_ucode_size_bytes);
+
+ r = amdgpu_bo_create_reserved(adev, fw_size,
+ PAGE_SIZE,
+ AMDGPU_GEM_DOMAIN_VRAM,
+ &adev->mes.ucode_fw_obj[pipe],
+ &adev->mes.ucode_fw_gpu_addr[pipe],
+ (void **)&adev->mes.ucode_fw_ptr[pipe]);
+ if (r) {
+ dev_err(adev->dev, "(%d) failed to create mes fw bo\n", r);
+ return r;
+ }
+
+ memcpy(adev->mes.ucode_fw_ptr[pipe], fw_data, fw_size);
+
+ amdgpu_bo_kunmap(adev->mes.ucode_fw_obj[pipe]);
+ amdgpu_bo_unreserve(adev->mes.ucode_fw_obj[pipe]);
+
+ return 0;
+}
+
+static int mes_v12_0_allocate_ucode_data_buffer(struct amdgpu_device *adev,
+ enum amdgpu_mes_pipe pipe)
+{
+ int r;
+ const struct mes_firmware_header_v1_0 *mes_hdr;
+ const __le32 *fw_data;
+ unsigned fw_size;
+
+ mes_hdr = (const struct mes_firmware_header_v1_0 *)
+ adev->mes.fw[pipe]->data;
+
+ fw_data = (const __le32 *)(adev->mes.fw[pipe]->data +
+ le32_to_cpu(mes_hdr->mes_ucode_data_offset_bytes));
+ fw_size = le32_to_cpu(mes_hdr->mes_ucode_data_size_bytes);
+
+ r = amdgpu_bo_create_reserved(adev, fw_size,
+ 64 * 1024,
+ AMDGPU_GEM_DOMAIN_VRAM,
+ &adev->mes.data_fw_obj[pipe],
+ &adev->mes.data_fw_gpu_addr[pipe],
+ (void **)&adev->mes.data_fw_ptr[pipe]);
+ if (r) {
+ dev_err(adev->dev, "(%d) failed to create mes data fw bo\n", r);
+ return r;
+ }
+
+ memcpy(adev->mes.data_fw_ptr[pipe], fw_data, fw_size);
+
+ amdgpu_bo_kunmap(adev->mes.data_fw_obj[pipe]);
+ amdgpu_bo_unreserve(adev->mes.data_fw_obj[pipe]);
+
+ return 0;
+}
+
+static void mes_v12_0_free_ucode_buffers(struct amdgpu_device *adev,
+ enum amdgpu_mes_pipe pipe)
+{
+ amdgpu_bo_free_kernel(&adev->mes.data_fw_obj[pipe],
+ &adev->mes.data_fw_gpu_addr[pipe],
+ (void **)&adev->mes.data_fw_ptr[pipe]);
+
+ amdgpu_bo_free_kernel(&adev->mes.ucode_fw_obj[pipe],
+ &adev->mes.ucode_fw_gpu_addr[pipe],
+ (void **)&adev->mes.ucode_fw_ptr[pipe]);
+}
+
+static void mes_v12_0_enable(struct amdgpu_device *adev, bool enable)
+{
+ uint64_t ucode_addr;
+ uint32_t pipe, data = 0;
+
+ if (enable) {
+ mutex_lock(&adev->srbm_mutex);
+ for (pipe = 0; pipe < AMDGPU_MAX_MES_PIPES; pipe++) {
+ soc21_grbm_select(adev, 3, pipe, 0, 0);
+ if (amdgpu_mes_log_enable) {
+ u32 log_size = AMDGPU_MES_LOG_BUFFER_SIZE + AMDGPU_MES_MSCRATCH_SIZE;
+ /* In case uni mes is not enabled, only program for pipe 0 */
+ if (adev->mes.event_log_size >= (pipe + 1) * log_size) {
+ WREG32_SOC15(GC, 0, regCP_MES_MSCRATCH_LO,
+ lower_32_bits(adev->mes.event_log_gpu_addr +
+ pipe * log_size + AMDGPU_MES_LOG_BUFFER_SIZE));
+ WREG32_SOC15(GC, 0, regCP_MES_MSCRATCH_HI,
+ upper_32_bits(adev->mes.event_log_gpu_addr +
+ pipe * log_size + AMDGPU_MES_LOG_BUFFER_SIZE));
+ dev_info(adev->dev, "Setup CP MES MSCRATCH address : 0x%x. 0x%x\n",
+ RREG32_SOC15(GC, 0, regCP_MES_MSCRATCH_HI),
+ RREG32_SOC15(GC, 0, regCP_MES_MSCRATCH_LO));
+ }
+ }
+
+ data = RREG32_SOC15(GC, 0, regCP_MES_CNTL);
+ if (pipe == 0)
+ data = REG_SET_FIELD(data, CP_MES_CNTL, MES_PIPE0_RESET, 1);
+ else
+ data = REG_SET_FIELD(data, CP_MES_CNTL, MES_PIPE1_RESET, 1);
+ WREG32_SOC15(GC, 0, regCP_MES_CNTL, data);
+
+ ucode_addr = adev->mes.uc_start_addr[pipe] >> 2;
+ WREG32_SOC15(GC, 0, regCP_MES_PRGRM_CNTR_START,
+ lower_32_bits(ucode_addr));
+ WREG32_SOC15(GC, 0, regCP_MES_PRGRM_CNTR_START_HI,
+ upper_32_bits(ucode_addr));
+
+ /* unhalt MES and activate one pipe each loop */
+ data = REG_SET_FIELD(0, CP_MES_CNTL, MES_PIPE0_ACTIVE, 1);
+ if (pipe)
+ data = REG_SET_FIELD(data, CP_MES_CNTL, MES_PIPE1_ACTIVE, 1);
+ dev_info(adev->dev, "program CP_MES_CNTL : 0x%x\n", data);
+
+ WREG32_SOC15(GC, 0, regCP_MES_CNTL, data);
+
+ }
+ soc21_grbm_select(adev, 0, 0, 0, 0);
+ mutex_unlock(&adev->srbm_mutex);
+
+ if (amdgpu_emu_mode)
+ msleep(100);
+ else if (adev->enable_uni_mes)
+ udelay(500);
+ else
+ udelay(50);
+ } else {
+ data = RREG32_SOC15(GC, 0, regCP_MES_CNTL);
+ data = REG_SET_FIELD(data, CP_MES_CNTL, MES_PIPE0_ACTIVE, 0);
+ data = REG_SET_FIELD(data, CP_MES_CNTL, MES_PIPE1_ACTIVE, 0);
+ data = REG_SET_FIELD(data, CP_MES_CNTL,
+ MES_INVALIDATE_ICACHE, 1);
+ data = REG_SET_FIELD(data, CP_MES_CNTL, MES_PIPE0_RESET, 1);
+ data = REG_SET_FIELD(data, CP_MES_CNTL, MES_PIPE1_RESET, 1);
+ data = REG_SET_FIELD(data, CP_MES_CNTL, MES_HALT, 1);
+ WREG32_SOC15(GC, 0, regCP_MES_CNTL, data);
+ }
+}
+
+static void mes_v12_0_set_ucode_start_addr(struct amdgpu_device *adev)
+{
+ uint64_t ucode_addr;
+ int pipe;
+
+ mes_v12_0_enable(adev, false);
+
+ mutex_lock(&adev->srbm_mutex);
+ for (pipe = 0; pipe < AMDGPU_MAX_MES_PIPES; pipe++) {
+ /* me=3, queue=0 */
+ soc21_grbm_select(adev, 3, pipe, 0, 0);
+
+ /* set ucode start address */
+ ucode_addr = adev->mes.uc_start_addr[pipe] >> 2;
+ WREG32_SOC15(GC, 0, regCP_MES_PRGRM_CNTR_START,
+ lower_32_bits(ucode_addr));
+ WREG32_SOC15(GC, 0, regCP_MES_PRGRM_CNTR_START_HI,
+ upper_32_bits(ucode_addr));
+
+ soc21_grbm_select(adev, 0, 0, 0, 0);
+ }
+ mutex_unlock(&adev->srbm_mutex);
+}
+
+/* This function is for backdoor MES firmware */
+static int mes_v12_0_load_microcode(struct amdgpu_device *adev,
+ enum amdgpu_mes_pipe pipe, bool prime_icache)
+{
+ int r;
+ uint32_t data;
+
+ mes_v12_0_enable(adev, false);
+
+ if (!adev->mes.fw[pipe])
+ return -EINVAL;
+
+ r = mes_v12_0_allocate_ucode_buffer(adev, pipe);
+ if (r)
+ return r;
+
+ r = mes_v12_0_allocate_ucode_data_buffer(adev, pipe);
+ if (r) {
+ mes_v12_0_free_ucode_buffers(adev, pipe);
+ return r;
+ }
+
+ mutex_lock(&adev->srbm_mutex);
+ /* me=3, pipe=0, queue=0 */
+ soc21_grbm_select(adev, 3, pipe, 0, 0);
+
+ WREG32_SOC15(GC, 0, regCP_MES_IC_BASE_CNTL, 0);
+
+ /* set ucode fimrware address */
+ WREG32_SOC15(GC, 0, regCP_MES_IC_BASE_LO,
+ lower_32_bits(adev->mes.ucode_fw_gpu_addr[pipe]));
+ WREG32_SOC15(GC, 0, regCP_MES_IC_BASE_HI,
+ upper_32_bits(adev->mes.ucode_fw_gpu_addr[pipe]));
+
+ /* set ucode instruction cache boundary to 2M-1 */
+ WREG32_SOC15(GC, 0, regCP_MES_MIBOUND_LO, 0x1FFFFF);
+
+ /* set ucode data firmware address */
+ WREG32_SOC15(GC, 0, regCP_MES_MDBASE_LO,
+ lower_32_bits(adev->mes.data_fw_gpu_addr[pipe]));
+ WREG32_SOC15(GC, 0, regCP_MES_MDBASE_HI,
+ upper_32_bits(adev->mes.data_fw_gpu_addr[pipe]));
+
+ /* Set data cache boundary CP_MES_MDBOUND_LO */
+ WREG32_SOC15(GC, 0, regCP_MES_MDBOUND_LO, 0x7FFFF);
+
+ if (prime_icache) {
+ /* invalidate ICACHE */
+ data = RREG32_SOC15(GC, 0, regCP_MES_IC_OP_CNTL);
+ data = REG_SET_FIELD(data, CP_MES_IC_OP_CNTL, PRIME_ICACHE, 0);
+ data = REG_SET_FIELD(data, CP_MES_IC_OP_CNTL, INVALIDATE_CACHE, 1);
+ WREG32_SOC15(GC, 0, regCP_MES_IC_OP_CNTL, data);
+
+ /* prime the ICACHE. */
+ data = RREG32_SOC15(GC, 0, regCP_MES_IC_OP_CNTL);
+ data = REG_SET_FIELD(data, CP_MES_IC_OP_CNTL, PRIME_ICACHE, 1);
+ WREG32_SOC15(GC, 0, regCP_MES_IC_OP_CNTL, data);
+ }
+
+ soc21_grbm_select(adev, 0, 0, 0, 0);
+ mutex_unlock(&adev->srbm_mutex);
+
+ return 0;
+}
+
+static int mes_v12_0_allocate_eop_buf(struct amdgpu_device *adev,
+ enum amdgpu_mes_pipe pipe)
+{
+ int r;
+ u32 *eop;
+
+ r = amdgpu_bo_create_reserved(adev, MES_EOP_SIZE, PAGE_SIZE,
+ AMDGPU_GEM_DOMAIN_GTT,
+ &adev->mes.eop_gpu_obj[pipe],
+ &adev->mes.eop_gpu_addr[pipe],
+ (void **)&eop);
+ if (r) {
+ dev_warn(adev->dev, "(%d) create EOP bo failed\n", r);
+ return r;
+ }
+
+ memset(eop, 0,
+ adev->mes.eop_gpu_obj[pipe]->tbo.base.size);
+
+ amdgpu_bo_kunmap(adev->mes.eop_gpu_obj[pipe]);
+ amdgpu_bo_unreserve(adev->mes.eop_gpu_obj[pipe]);
+
+ return 0;
+}
+
+static int mes_v12_0_mqd_init(struct amdgpu_ring *ring)
+{
+ struct v12_compute_mqd *mqd = ring->mqd_ptr;
+ uint64_t hqd_gpu_addr, wb_gpu_addr, eop_base_addr;
+ uint32_t tmp;
+
+ mqd->header = 0xC0310800;
+ mqd->compute_pipelinestat_enable = 0x00000001;
+ mqd->compute_static_thread_mgmt_se0 = 0xffffffff;
+ mqd->compute_static_thread_mgmt_se1 = 0xffffffff;
+ mqd->compute_static_thread_mgmt_se2 = 0xffffffff;
+ mqd->compute_static_thread_mgmt_se3 = 0xffffffff;
+ mqd->compute_misc_reserved = 0x00000007;
+
+ eop_base_addr = ring->eop_gpu_addr >> 8;
+
+ /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
+ tmp = regCP_HQD_EOP_CONTROL_DEFAULT;
+ tmp = REG_SET_FIELD(tmp, CP_HQD_EOP_CONTROL, EOP_SIZE,
+ (order_base_2(MES_EOP_SIZE / 4) - 1));
+
+ mqd->cp_hqd_eop_base_addr_lo = lower_32_bits(eop_base_addr);
+ mqd->cp_hqd_eop_base_addr_hi = upper_32_bits(eop_base_addr);
+ mqd->cp_hqd_eop_control = tmp;
+
+ /* disable the queue if it's active */
+ ring->wptr = 0;
+ mqd->cp_hqd_pq_rptr = 0;
+ mqd->cp_hqd_pq_wptr_lo = 0;
+ mqd->cp_hqd_pq_wptr_hi = 0;
+
+ /* set the pointer to the MQD */
+ mqd->cp_mqd_base_addr_lo = ring->mqd_gpu_addr & 0xfffffffc;
+ mqd->cp_mqd_base_addr_hi = upper_32_bits(ring->mqd_gpu_addr);
+
+ /* set MQD vmid to 0 */
+ tmp = regCP_MQD_CONTROL_DEFAULT;
+ tmp = REG_SET_FIELD(tmp, CP_MQD_CONTROL, VMID, 0);
+ mqd->cp_mqd_control = tmp;
+
+ /* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
+ hqd_gpu_addr = ring->gpu_addr >> 8;
+ mqd->cp_hqd_pq_base_lo = lower_32_bits(hqd_gpu_addr);
+ mqd->cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr);
+
+ /* set the wb address whether it's enabled or not */
+ wb_gpu_addr = ring->rptr_gpu_addr;
+ mqd->cp_hqd_pq_rptr_report_addr_lo = wb_gpu_addr & 0xfffffffc;
+ mqd->cp_hqd_pq_rptr_report_addr_hi =
+ upper_32_bits(wb_gpu_addr) & 0xffff;
+
+ /* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */
+ wb_gpu_addr = ring->wptr_gpu_addr;
+ mqd->cp_hqd_pq_wptr_poll_addr_lo = wb_gpu_addr & 0xfffffff8;
+ mqd->cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff;
+
+ /* set up the HQD, this is similar to CP_RB0_CNTL */
+ tmp = regCP_HQD_PQ_CONTROL_DEFAULT;
+ tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, QUEUE_SIZE,
+ (order_base_2(ring->ring_size / 4) - 1));
+ tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, RPTR_BLOCK_SIZE,
+ ((order_base_2(AMDGPU_GPU_PAGE_SIZE / 4) - 1) << 8));
+ tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, UNORD_DISPATCH, 1);
+ tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, TUNNEL_DISPATCH, 0);
+ tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, PRIV_STATE, 1);
+ tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, KMD_QUEUE, 1);
+ tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, NO_UPDATE_RPTR, 1);
+ mqd->cp_hqd_pq_control = tmp;
+
+ /* enable doorbell */
+ tmp = 0;
+ if (ring->use_doorbell) {
+ tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
+ DOORBELL_OFFSET, ring->doorbell_index);
+ tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
+ DOORBELL_EN, 1);
+ tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
+ DOORBELL_SOURCE, 0);
+ tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
+ DOORBELL_HIT, 0);
+ } else {
+ tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
+ DOORBELL_EN, 0);
+ }
+ mqd->cp_hqd_pq_doorbell_control = tmp;
+
+ mqd->cp_hqd_vmid = 0;
+ /* activate the queue */
+ mqd->cp_hqd_active = 1;
+
+ tmp = regCP_HQD_PERSISTENT_STATE_DEFAULT;
+ tmp = REG_SET_FIELD(tmp, CP_HQD_PERSISTENT_STATE,
+ PRELOAD_SIZE, 0x55);
+ mqd->cp_hqd_persistent_state = tmp;
+
+ mqd->cp_hqd_ib_control = regCP_HQD_IB_CONTROL_DEFAULT;
+ mqd->cp_hqd_iq_timer = regCP_HQD_IQ_TIMER_DEFAULT;
+ mqd->cp_hqd_quantum = regCP_HQD_QUANTUM_DEFAULT;
+
+ /*
+ * Set CP_HQD_GFX_CONTROL.DB_UPDATED_MSG_EN[15] to enable unmapped
+ * doorbell handling. This is a reserved CP internal register can
+ * not be accesss by others
+ */
+ mqd->reserved_184 = BIT(15);
+
+ return 0;
+}
+
+static void mes_v12_0_queue_init_register(struct amdgpu_ring *ring)
+{
+ struct v12_compute_mqd *mqd = ring->mqd_ptr;
+ struct amdgpu_device *adev = ring->adev;
+ uint32_t data = 0;
+
+ mutex_lock(&adev->srbm_mutex);
+ soc21_grbm_select(adev, 3, ring->pipe, 0, 0);
+
+ /* set CP_HQD_VMID.VMID = 0. */
+ data = RREG32_SOC15(GC, 0, regCP_HQD_VMID);
+ data = REG_SET_FIELD(data, CP_HQD_VMID, VMID, 0);
+ WREG32_SOC15(GC, 0, regCP_HQD_VMID, data);
+
+ /* set CP_HQD_PQ_DOORBELL_CONTROL.DOORBELL_EN=0 */
+ data = RREG32_SOC15(GC, 0, regCP_HQD_PQ_DOORBELL_CONTROL);
+ data = REG_SET_FIELD(data, CP_HQD_PQ_DOORBELL_CONTROL,
+ DOORBELL_EN, 0);
+ WREG32_SOC15(GC, 0, regCP_HQD_PQ_DOORBELL_CONTROL, data);
+
+ /* set CP_MQD_BASE_ADDR/HI with the MQD base address */
+ WREG32_SOC15(GC, 0, regCP_MQD_BASE_ADDR, mqd->cp_mqd_base_addr_lo);
+ WREG32_SOC15(GC, 0, regCP_MQD_BASE_ADDR_HI, mqd->cp_mqd_base_addr_hi);
+
+ /* set CP_MQD_CONTROL.VMID=0 */
+ data = RREG32_SOC15(GC, 0, regCP_MQD_CONTROL);
+ data = REG_SET_FIELD(data, CP_MQD_CONTROL, VMID, 0);
+ WREG32_SOC15(GC, 0, regCP_MQD_CONTROL, 0);
+
+ /* set CP_HQD_PQ_BASE/HI with the ring buffer base address */
+ WREG32_SOC15(GC, 0, regCP_HQD_PQ_BASE, mqd->cp_hqd_pq_base_lo);
+ WREG32_SOC15(GC, 0, regCP_HQD_PQ_BASE_HI, mqd->cp_hqd_pq_base_hi);
+
+ /* set CP_HQD_PQ_RPTR_REPORT_ADDR/HI */
+ WREG32_SOC15(GC, 0, regCP_HQD_PQ_RPTR_REPORT_ADDR,
+ mqd->cp_hqd_pq_rptr_report_addr_lo);
+ WREG32_SOC15(GC, 0, regCP_HQD_PQ_RPTR_REPORT_ADDR_HI,
+ mqd->cp_hqd_pq_rptr_report_addr_hi);
+
+ /* set CP_HQD_PQ_CONTROL */
+ WREG32_SOC15(GC, 0, regCP_HQD_PQ_CONTROL, mqd->cp_hqd_pq_control);
+
+ /* set CP_HQD_PQ_WPTR_POLL_ADDR/HI */
+ WREG32_SOC15(GC, 0, regCP_HQD_PQ_WPTR_POLL_ADDR,
+ mqd->cp_hqd_pq_wptr_poll_addr_lo);
+ WREG32_SOC15(GC, 0, regCP_HQD_PQ_WPTR_POLL_ADDR_HI,
+ mqd->cp_hqd_pq_wptr_poll_addr_hi);
+
+ /* set CP_HQD_PQ_DOORBELL_CONTROL */
+ WREG32_SOC15(GC, 0, regCP_HQD_PQ_DOORBELL_CONTROL,
+ mqd->cp_hqd_pq_doorbell_control);
+
+ /* set CP_HQD_PERSISTENT_STATE.PRELOAD_SIZE=0x53 */
+ WREG32_SOC15(GC, 0, regCP_HQD_PERSISTENT_STATE, mqd->cp_hqd_persistent_state);
+
+ /* set CP_HQD_ACTIVE.ACTIVE=1 */
+ WREG32_SOC15(GC, 0, regCP_HQD_ACTIVE, mqd->cp_hqd_active);
+
+ soc21_grbm_select(adev, 0, 0, 0, 0);
+ mutex_unlock(&adev->srbm_mutex);
+}
+
+static int mes_v12_0_kiq_enable_queue(struct amdgpu_device *adev)
+{
+ struct amdgpu_kiq *kiq = &adev->gfx.kiq[0];
+ struct amdgpu_ring *kiq_ring = &adev->gfx.kiq[0].ring;
+ int r;
+
+ if (!kiq->pmf || !kiq->pmf->kiq_map_queues)
+ return -EINVAL;
+
+ r = amdgpu_ring_alloc(kiq_ring, kiq->pmf->map_queues_size);
+ if (r) {
+ DRM_ERROR("Failed to lock KIQ (%d).\n", r);
+ return r;
+ }
+
+ kiq->pmf->kiq_map_queues(kiq_ring, &adev->mes.ring[0]);
+
+ r = amdgpu_ring_test_ring(kiq_ring);
+ if (r) {
+ DRM_ERROR("kfq enable failed\n");
+ kiq_ring->sched.ready = false;
+ }
+ return r;
+}
+
+static int mes_v12_0_queue_init(struct amdgpu_device *adev,
+ enum amdgpu_mes_pipe pipe)
+{
+ struct amdgpu_ring *ring;
+ int r;
+
+ if (!adev->enable_uni_mes && pipe == AMDGPU_MES_KIQ_PIPE)
+ ring = &adev->gfx.kiq[0].ring;
+ else
+ ring = &adev->mes.ring[pipe];
+
+ if ((adev->enable_uni_mes || pipe == AMDGPU_MES_SCHED_PIPE) &&
+ (amdgpu_in_reset(adev) || adev->in_suspend)) {
+ *(ring->wptr_cpu_addr) = 0;
+ *(ring->rptr_cpu_addr) = 0;
+ amdgpu_ring_clear_ring(ring);
+ }
+
+ r = mes_v12_0_mqd_init(ring);
+ if (r)
+ return r;
+
+ if (pipe == AMDGPU_MES_SCHED_PIPE) {
+ if (adev->enable_uni_mes)
+ r = amdgpu_mes_map_legacy_queue(adev, ring);
+ else
+ r = mes_v12_0_kiq_enable_queue(adev);
+ if (r)
+ return r;
+ } else {
+ mes_v12_0_queue_init_register(ring);
+ }
+
+ if (((pipe == AMDGPU_MES_SCHED_PIPE) && !adev->mes.sched_version) ||
+ ((pipe == AMDGPU_MES_KIQ_PIPE) && !adev->mes.kiq_version)) {
+ /* get MES scheduler/KIQ versions */
+ mutex_lock(&adev->srbm_mutex);
+ soc21_grbm_select(adev, 3, pipe, 0, 0);
+
+ if (pipe == AMDGPU_MES_SCHED_PIPE)
+ adev->mes.sched_version = RREG32_SOC15(GC, 0, regCP_MES_GP3_LO);
+ else if (pipe == AMDGPU_MES_KIQ_PIPE && adev->enable_mes_kiq)
+ adev->mes.kiq_version = RREG32_SOC15(GC, 0, regCP_MES_GP3_LO);
+
+ soc21_grbm_select(adev, 0, 0, 0, 0);
+ mutex_unlock(&adev->srbm_mutex);
+ }
+
+ return 0;
+}
+
+static int mes_v12_0_ring_init(struct amdgpu_device *adev, int pipe)
+{
+ struct amdgpu_ring *ring;
+
+ ring = &adev->mes.ring[pipe];
+
+ ring->funcs = &mes_v12_0_ring_funcs;
+
+ ring->me = 3;
+ ring->pipe = pipe;
+ ring->queue = 0;
+
+ ring->ring_obj = NULL;
+ ring->use_doorbell = true;
+ ring->eop_gpu_addr = adev->mes.eop_gpu_addr[pipe];
+ ring->no_scheduler = true;
+ sprintf(ring->name, "mes_%d.%d.%d", ring->me, ring->pipe, ring->queue);
+
+ if (pipe == AMDGPU_MES_SCHED_PIPE)
+ ring->doorbell_index = adev->doorbell_index.mes_ring0 << 1;
+ else
+ ring->doorbell_index = adev->doorbell_index.mes_ring1 << 1;
+
+ return amdgpu_ring_init(adev, ring, 1024, NULL, 0,
+ AMDGPU_RING_PRIO_DEFAULT, NULL);
+}
+
+static int mes_v12_0_kiq_ring_init(struct amdgpu_device *adev)
+{
+ struct amdgpu_ring *ring;
+
+ spin_lock_init(&adev->gfx.kiq[0].ring_lock);
+
+ ring = &adev->gfx.kiq[0].ring;
+
+ ring->me = 3;
+ ring->pipe = 1;
+ ring->queue = 0;
+
+ ring->adev = NULL;
+ ring->ring_obj = NULL;
+ ring->use_doorbell = true;
+ ring->doorbell_index = adev->doorbell_index.mes_ring1 << 1;
+ ring->eop_gpu_addr = adev->mes.eop_gpu_addr[AMDGPU_MES_KIQ_PIPE];
+ ring->no_scheduler = true;
+ sprintf(ring->name, "mes_kiq_%d.%d.%d",
+ ring->me, ring->pipe, ring->queue);
+
+ return amdgpu_ring_init(adev, ring, 1024, NULL, 0,
+ AMDGPU_RING_PRIO_DEFAULT, NULL);
+}
+
+static int mes_v12_0_mqd_sw_init(struct amdgpu_device *adev,
+ enum amdgpu_mes_pipe pipe)
+{
+ int r, mqd_size = sizeof(struct v12_compute_mqd);
+ struct amdgpu_ring *ring;
+
+ if (!adev->enable_uni_mes && pipe == AMDGPU_MES_KIQ_PIPE)
+ ring = &adev->gfx.kiq[0].ring;
+ else
+ ring = &adev->mes.ring[pipe];
+
+ if (ring->mqd_obj)
+ return 0;
+
+ r = amdgpu_bo_create_kernel(adev, mqd_size, PAGE_SIZE,
+ AMDGPU_GEM_DOMAIN_GTT, &ring->mqd_obj,
+ &ring->mqd_gpu_addr, &ring->mqd_ptr);
+ if (r) {
+ dev_warn(adev->dev, "failed to create ring mqd bo (%d)", r);
+ return r;
+ }
+
+ memset(ring->mqd_ptr, 0, mqd_size);
+
+ /* prepare MQD backup */
+ adev->mes.mqd_backup[pipe] = kmalloc(mqd_size, GFP_KERNEL);
+ if (!adev->mes.mqd_backup[pipe])
+ dev_warn(adev->dev,
+ "no memory to create MQD backup for ring %s\n",
+ ring->name);
+
+ return 0;
+}
+
+static int mes_v12_0_sw_init(struct amdgpu_ip_block *ip_block)
+{
+ struct amdgpu_device *adev = ip_block->adev;
+ int pipe, r;
+
+ adev->mes.funcs = &mes_v12_0_funcs;
+ adev->mes.kiq_hw_init = &mes_v12_0_kiq_hw_init;
+ adev->mes.kiq_hw_fini = &mes_v12_0_kiq_hw_fini;
+ adev->mes.enable_legacy_queue_map = true;
+
+ adev->mes.event_log_size = adev->enable_uni_mes ?
+ (AMDGPU_MAX_MES_PIPES * (AMDGPU_MES_LOG_BUFFER_SIZE + AMDGPU_MES_MSCRATCH_SIZE)) :
+ (AMDGPU_MES_LOG_BUFFER_SIZE + AMDGPU_MES_MSCRATCH_SIZE);
+ r = amdgpu_mes_init(adev);
+ if (r)
+ return r;
+
+ for (pipe = 0; pipe < AMDGPU_MAX_MES_PIPES; pipe++) {
+ r = mes_v12_0_allocate_eop_buf(adev, pipe);
+ if (r)
+ return r;
+
+ r = mes_v12_0_mqd_sw_init(adev, pipe);
+ if (r)
+ return r;
+
+ if (!adev->enable_uni_mes && pipe == AMDGPU_MES_KIQ_PIPE) {
+ r = mes_v12_0_kiq_ring_init(adev);
+ }
+ else {
+ r = mes_v12_0_ring_init(adev, pipe);
+ if (r)
+ return r;
+ r = amdgpu_bo_create_kernel(adev, AMDGPU_GPU_PAGE_SIZE, PAGE_SIZE,
+ AMDGPU_GEM_DOMAIN_VRAM,
+ &adev->mes.resource_1[pipe],
+ &adev->mes.resource_1_gpu_addr[pipe],
+ &adev->mes.resource_1_addr[pipe]);
+ if (r) {
+ dev_err(adev->dev, "(%d) failed to create mes resource_1 bo pipe[%d]\n", r, pipe);
+ return r;
+ }
+ }
+ }
+
+ return 0;
+}
+
+static int mes_v12_0_sw_fini(struct amdgpu_ip_block *ip_block)
+{
+ struct amdgpu_device *adev = ip_block->adev;
+ int pipe;
+
+ for (pipe = 0; pipe < AMDGPU_MAX_MES_PIPES; pipe++) {
+ amdgpu_bo_free_kernel(&adev->mes.resource_1[pipe],
+ &adev->mes.resource_1_gpu_addr[pipe],
+ &adev->mes.resource_1_addr[pipe]);
+
+ kfree(adev->mes.mqd_backup[pipe]);
+
+ amdgpu_bo_free_kernel(&adev->mes.eop_gpu_obj[pipe],
+ &adev->mes.eop_gpu_addr[pipe],
+ NULL);
+ amdgpu_ucode_release(&adev->mes.fw[pipe]);
+
+ if (adev->enable_uni_mes || pipe == AMDGPU_MES_SCHED_PIPE) {
+ amdgpu_bo_free_kernel(&adev->mes.ring[pipe].mqd_obj,
+ &adev->mes.ring[pipe].mqd_gpu_addr,
+ &adev->mes.ring[pipe].mqd_ptr);
+ amdgpu_ring_fini(&adev->mes.ring[pipe]);
+ }
+ }
+
+ if (!adev->enable_uni_mes) {
+ amdgpu_bo_free_kernel(&adev->gfx.kiq[0].ring.mqd_obj,
+ &adev->gfx.kiq[0].ring.mqd_gpu_addr,
+ &adev->gfx.kiq[0].ring.mqd_ptr);
+ amdgpu_ring_fini(&adev->gfx.kiq[0].ring);
+ }
+
+ if (adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT) {
+ mes_v12_0_free_ucode_buffers(adev, AMDGPU_MES_KIQ_PIPE);
+ mes_v12_0_free_ucode_buffers(adev, AMDGPU_MES_SCHED_PIPE);
+ }
+
+ amdgpu_mes_fini(adev);
+ return 0;
+}
+
+static void mes_v12_0_kiq_dequeue_sched(struct amdgpu_device *adev)
+{
+ uint32_t data;
+ int i;
+
+ mutex_lock(&adev->srbm_mutex);
+ soc21_grbm_select(adev, 3, AMDGPU_MES_SCHED_PIPE, 0, 0);
+
+ /* disable the queue if it's active */
+ if (RREG32_SOC15(GC, 0, regCP_HQD_ACTIVE) & 1) {
+ WREG32_SOC15(GC, 0, regCP_HQD_DEQUEUE_REQUEST, 1);
+ for (i = 0; i < adev->usec_timeout; i++) {
+ if (!(RREG32_SOC15(GC, 0, regCP_HQD_ACTIVE) & 1))
+ break;
+ udelay(1);
+ }
+ }
+ data = RREG32_SOC15(GC, 0, regCP_HQD_PQ_DOORBELL_CONTROL);
+ data = REG_SET_FIELD(data, CP_HQD_PQ_DOORBELL_CONTROL,
+ DOORBELL_EN, 0);
+ data = REG_SET_FIELD(data, CP_HQD_PQ_DOORBELL_CONTROL,
+ DOORBELL_HIT, 1);
+ WREG32_SOC15(GC, 0, regCP_HQD_PQ_DOORBELL_CONTROL, data);
+
+ WREG32_SOC15(GC, 0, regCP_HQD_PQ_DOORBELL_CONTROL, 0);
+
+ WREG32_SOC15(GC, 0, regCP_HQD_PQ_WPTR_LO, 0);
+ WREG32_SOC15(GC, 0, regCP_HQD_PQ_WPTR_HI, 0);
+ WREG32_SOC15(GC, 0, regCP_HQD_PQ_RPTR, 0);
+
+ soc21_grbm_select(adev, 0, 0, 0, 0);
+ mutex_unlock(&adev->srbm_mutex);
+
+ adev->mes.ring[0].sched.ready = false;
+}
+
+static void mes_v12_0_kiq_setting(struct amdgpu_ring *ring)
+{
+ uint32_t tmp;
+ struct amdgpu_device *adev = ring->adev;
+
+ /* tell RLC which is KIQ queue */
+ tmp = RREG32_SOC15(GC, 0, regRLC_CP_SCHEDULERS);
+ tmp &= 0xffffff00;
+ tmp |= (ring->me << 5) | (ring->pipe << 3) | (ring->queue);
+ WREG32_SOC15(GC, 0, regRLC_CP_SCHEDULERS, tmp | 0x80);
+}
+
+static int mes_v12_0_kiq_hw_init(struct amdgpu_device *adev)
+{
+ int r = 0;
+ struct amdgpu_ip_block *ip_block;
+
+ if (adev->enable_uni_mes)
+ mes_v12_0_kiq_setting(&adev->mes.ring[AMDGPU_MES_KIQ_PIPE]);
+ else
+ mes_v12_0_kiq_setting(&adev->gfx.kiq[0].ring);
+
+ if (adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT) {
+
+ r = mes_v12_0_load_microcode(adev, AMDGPU_MES_SCHED_PIPE, false);
+ if (r) {
+ DRM_ERROR("failed to load MES fw, r=%d\n", r);
+ return r;
+ }
+
+ r = mes_v12_0_load_microcode(adev, AMDGPU_MES_KIQ_PIPE, true);
+ if (r) {
+ DRM_ERROR("failed to load MES kiq fw, r=%d\n", r);
+ return r;
+ }
+
+ mes_v12_0_set_ucode_start_addr(adev);
+
+ } else if (adev->firmware.load_type == AMDGPU_FW_LOAD_RLC_BACKDOOR_AUTO)
+ mes_v12_0_set_ucode_start_addr(adev);
+
+ mes_v12_0_enable(adev, true);
+
+ ip_block = amdgpu_device_ip_get_ip_block(adev, AMD_IP_BLOCK_TYPE_MES);
+ if (unlikely(!ip_block)) {
+ dev_err(adev->dev, "Failed to get MES handle\n");
+ return -EINVAL;
+ }
+
+ r = mes_v12_0_queue_init(adev, AMDGPU_MES_KIQ_PIPE);
+ if (r)
+ goto failure;
+
+ if (adev->enable_uni_mes) {
+ r = mes_v12_0_set_hw_resources(&adev->mes, AMDGPU_MES_KIQ_PIPE);
+ if (r)
+ goto failure;
+
+ mes_v12_0_set_hw_resources_1(&adev->mes, AMDGPU_MES_KIQ_PIPE);
+ }
+
+ if (adev->mes.enable_legacy_queue_map) {
+ r = mes_v12_0_hw_init(ip_block);
+ if (r)
+ goto failure;
+ }
+
+ return r;
+
+failure:
+ mes_v12_0_hw_fini(ip_block);
+ return r;
+}
+
+static int mes_v12_0_kiq_hw_fini(struct amdgpu_device *adev)
+{
+ if (adev->mes.ring[0].sched.ready) {
+ if (adev->enable_uni_mes)
+ amdgpu_mes_unmap_legacy_queue(adev,
+ &adev->mes.ring[AMDGPU_MES_SCHED_PIPE],
+ RESET_QUEUES, 0, 0);
+ else
+ mes_v12_0_kiq_dequeue_sched(adev);
+
+ adev->mes.ring[0].sched.ready = false;
+ }
+
+ mes_v12_0_enable(adev, false);
+
+ return 0;
+}
+
+static int mes_v12_0_hw_init(struct amdgpu_ip_block *ip_block)
+{
+ int r;
+ struct amdgpu_device *adev = ip_block->adev;
+
+ if (adev->mes.ring[0].sched.ready)
+ goto out;
+
+ if (!adev->enable_mes_kiq) {
+ if (adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT) {
+ r = mes_v12_0_load_microcode(adev,
+ AMDGPU_MES_SCHED_PIPE, true);
+ if (r) {
+ DRM_ERROR("failed to MES fw, r=%d\n", r);
+ return r;
+ }
+
+ mes_v12_0_set_ucode_start_addr(adev);
+
+ } else if (adev->firmware.load_type ==
+ AMDGPU_FW_LOAD_RLC_BACKDOOR_AUTO) {
+
+ mes_v12_0_set_ucode_start_addr(adev);
+ }
+
+ mes_v12_0_enable(adev, true);
+ }
+
+ /* Enable the MES to handle doorbell ring on unmapped queue */
+ mes_v12_0_enable_unmapped_doorbell_handling(&adev->mes, true);
+
+ r = mes_v12_0_queue_init(adev, AMDGPU_MES_SCHED_PIPE);
+ if (r)
+ goto failure;
+
+ r = mes_v12_0_set_hw_resources(&adev->mes, AMDGPU_MES_SCHED_PIPE);
+ if (r)
+ goto failure;
+
+ if ((adev->mes.sched_version & AMDGPU_MES_VERSION_MASK) >= 0x4b)
+ mes_v12_0_set_hw_resources_1(&adev->mes, AMDGPU_MES_SCHED_PIPE);
+
+ mes_v12_0_init_aggregated_doorbell(&adev->mes);
+
+ r = mes_v12_0_query_sched_status(&adev->mes, AMDGPU_MES_SCHED_PIPE);
+ if (r) {
+ DRM_ERROR("MES is busy\n");
+ goto failure;
+ }
+
+ r = amdgpu_mes_update_enforce_isolation(adev);
+ if (r)
+ goto failure;
+
+out:
+ /*
+ * Disable KIQ ring usage from the driver once MES is enabled.
+ * MES uses KIQ ring exclusively so driver cannot access KIQ ring
+ * with MES enabled.
+ */
+ adev->gfx.kiq[0].ring.sched.ready = false;
+ adev->mes.ring[0].sched.ready = true;
+
+ return 0;
+
+failure:
+ mes_v12_0_hw_fini(ip_block);
+ return r;
+}
+
+static int mes_v12_0_hw_fini(struct amdgpu_ip_block *ip_block)
+{
+ return 0;
+}
+
+static int mes_v12_0_suspend(struct amdgpu_ip_block *ip_block)
+{
+ return mes_v12_0_hw_fini(ip_block);
+}
+
+static int mes_v12_0_resume(struct amdgpu_ip_block *ip_block)
+{
+ return mes_v12_0_hw_init(ip_block);
+}
+
+static int mes_v12_0_early_init(struct amdgpu_ip_block *ip_block)
+{
+ struct amdgpu_device *adev = ip_block->adev;
+ int pipe, r;
+
+ adev->mes.hung_queue_db_array_size = MES12_HUNG_DB_OFFSET_ARRAY_SIZE;
+ adev->mes.hung_queue_hqd_info_offset = MES12_HUNG_HQD_INFO_OFFSET;
+
+ for (pipe = 0; pipe < AMDGPU_MAX_MES_PIPES; pipe++) {
+ r = amdgpu_mes_init_microcode(adev, pipe);
+ if (r)
+ return r;
+ }
+
+ return 0;
+}
+
+static const struct amd_ip_funcs mes_v12_0_ip_funcs = {
+ .name = "mes_v12_0",
+ .early_init = mes_v12_0_early_init,
+ .late_init = NULL,
+ .sw_init = mes_v12_0_sw_init,
+ .sw_fini = mes_v12_0_sw_fini,
+ .hw_init = mes_v12_0_hw_init,
+ .hw_fini = mes_v12_0_hw_fini,
+ .suspend = mes_v12_0_suspend,
+ .resume = mes_v12_0_resume,
+};
+
+const struct amdgpu_ip_block_version mes_v12_0_ip_block = {
+ .type = AMD_IP_BLOCK_TYPE_MES,
+ .major = 12,
+ .minor = 0,
+ .rev = 0,
+ .funcs = &mes_v12_0_ip_funcs,
+};
diff --git a/drivers/gpu/drm/amd/amdgpu/mes_v12_0.h b/drivers/gpu/drm/amd/amdgpu/mes_v12_0.h
new file mode 100644
index 000000000000..ac3740f353aa
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/mes_v12_0.h
@@ -0,0 +1,29 @@
+/*
+ * Copyright 2021 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#ifndef __MES_V12_0_H__
+#define __MES_V12_0_H__
+
+extern const struct amdgpu_ip_block_version mes_v12_0_ip_block;
+
+#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c b/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c
index d7b39c07de20..243eabda0607 100644
--- a/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c
@@ -54,7 +54,7 @@ static u64 mmhub_v1_0_get_fb_location(struct amdgpu_device *adev)
static void mmhub_v1_0_setup_vm_pt_regs(struct amdgpu_device *adev, uint32_t vmid,
uint64_t page_table_base)
{
- struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_MMHUB_0];
+ struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_MMHUB0(0)];
WREG32_SOC15_OFFSET(MMHUB, 0, mmVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_LO32,
hub->ctx_addr_distance * vmid,
@@ -96,7 +96,9 @@ static void mmhub_v1_0_init_system_aperture_regs(struct amdgpu_device *adev)
WREG32_SOC15(MMHUB, 0, mmMC_VM_SYSTEM_APERTURE_LOW_ADDR,
min(adev->gmc.fb_start, adev->gmc.agp_start) >> 18);
- if (adev->apu_flags & AMD_APU_IS_RAVEN2)
+ if (adev->apu_flags & (AMD_APU_IS_RAVEN2 |
+ AMD_APU_IS_RENOIR |
+ AMD_APU_IS_GREEN_SARDINE))
/*
* Raven2 has a HW issue that it is unable to use the vram which
* is out of MC_VM_SYSTEM_APERTURE_HIGH_ADDR. So here is the
@@ -114,8 +116,7 @@ static void mmhub_v1_0_init_system_aperture_regs(struct amdgpu_device *adev)
return;
/* Set default page address. */
- value = adev->vram_scratch.gpu_addr - adev->gmc.vram_start +
- adev->vm_manager.vram_base_offset;
+ value = amdgpu_gmc_vram_mc2pa(adev, adev->mem_scratch.gpu_addr);
WREG32_SOC15(MMHUB, 0, mmMC_VM_SYSTEM_APERTURE_DEFAULT_ADDR_LSB,
(u32)(value >> 12));
WREG32_SOC15(MMHUB, 0, mmMC_VM_SYSTEM_APERTURE_DEFAULT_ADDR_MSB,
@@ -146,7 +147,6 @@ static void mmhub_v1_0_init_tlb_regs(struct amdgpu_device *adev)
ENABLE_ADVANCED_DRIVER_MODEL, 1);
tmp = REG_SET_FIELD(tmp, MC_VM_MX_L1_TLB_CNTL,
SYSTEM_APERTURE_UNMAPPED_ACCESS, 0);
- tmp = REG_SET_FIELD(tmp, MC_VM_MX_L1_TLB_CNTL, ECO_BITS, 0);
tmp = REG_SET_FIELD(tmp, MC_VM_MX_L1_TLB_CNTL,
MTYPE, MTYPE_UC);/* XXX for emulation. */
tmp = REG_SET_FIELD(tmp, MC_VM_MX_L1_TLB_CNTL, ATC_EN, 1);
@@ -178,6 +178,7 @@ static void mmhub_v1_0_init_cache_regs(struct amdgpu_device *adev)
tmp = REG_SET_FIELD(tmp, VM_L2_CNTL2, INVALIDATE_L2_CACHE, 1);
WREG32_SOC15(MMHUB, 0, mmVM_L2_CNTL2, tmp);
+ tmp = mmVM_L2_CNTL3_DEFAULT;
if (adev->gmc.translate_further) {
tmp = REG_SET_FIELD(tmp, VM_L2_CNTL3, BANK_SELECT, 12);
tmp = REG_SET_FIELD(tmp, VM_L2_CNTL3,
@@ -228,9 +229,55 @@ static void mmhub_v1_0_disable_identity_aperture(struct amdgpu_device *adev)
0);
}
+static void mmhub_v1_0_init_saw(struct amdgpu_device *adev)
+{
+ uint64_t pt_base = amdgpu_gmc_pd_addr(adev->gart.bo);
+ uint32_t tmp;
+
+ /* VM_9_X_REGISTER_VM_L2_SAW_CONTEXT0_PAGE_TABLE_BASE_ADDR_LO32 */
+ WREG32_SOC15(MMHUB, 0, mmVM_L2_SAW_CONTEXT0_PAGE_TABLE_BASE_ADDR_LO32,
+ lower_32_bits(pt_base >> 12));
+
+ /* VM_9_X_REGISTER_VM_L2_SAW_CONTEXT0_PAGE_TABLE_BASE_ADDR_HI32 */
+ WREG32_SOC15(MMHUB, 0, mmVM_L2_SAW_CONTEXT0_PAGE_TABLE_BASE_ADDR_HI32,
+ upper_32_bits(pt_base >> 12));
+
+ /* VM_9_X_REGISTER_VM_L2_SAW_CONTEXT0_PAGE_TABLE_START_ADDR_LO32 */
+ WREG32_SOC15(MMHUB, 0, mmVM_L2_SAW_CONTEXT0_PAGE_TABLE_START_ADDR_LO32,
+ (u32)(adev->gmc.gart_start >> 12));
+
+ /* VM_9_X_REGISTER_VM_L2_SAW_CONTEXT0_PAGE_TABLE_START_ADDR_HI32 */
+ WREG32_SOC15(MMHUB, 0, mmVM_L2_SAW_CONTEXT0_PAGE_TABLE_START_ADDR_HI32,
+ (u32)(adev->gmc.gart_start >> 44));
+
+ /* VM_9_X_REGISTER_VM_L2_SAW_CONTEXT0_PAGE_TABLE_END_ADDR_LO32 */
+ WREG32_SOC15(MMHUB, 0, mmVM_L2_SAW_CONTEXT0_PAGE_TABLE_END_ADDR_LO32,
+ (u32)(adev->gmc.gart_end >> 12));
+
+ /* VM_9_X_REGISTER_VM_L2_SAW_CONTEXT0_PAGE_TABLE_END_ADDR_HI32 */
+ WREG32_SOC15(MMHUB, 0, mmVM_L2_SAW_CONTEXT0_PAGE_TABLE_END_ADDR_HI32,
+ (u32)(adev->gmc.gart_end >> 44));
+
+ /* Program SAW CONTEXT0 CNTL */
+ tmp = RREG32_SOC15(MMHUB, 0, mmVM_L2_SAW_CONTEXT0_CNTL);
+ tmp |= 1 << CONTEXT0_CNTL_ENABLE_OFFSET;
+ tmp &= ~(3 << CONTEXT0_CNTL_PAGE_TABLE_DEPTH_OFFSET);
+ WREG32_SOC15(MMHUB, 0, mmVM_L2_SAW_CONTEXT0_CNTL, tmp);
+
+ /* Disable all Contexts except Context0 */
+ tmp = 0xfffe;
+ WREG32_SOC15(MMHUB, 0, mmVM_L2_SAW_CONTEXTS_DISABLE, tmp);
+
+ /* Program SAW CNTL4 */
+ tmp = RREG32_SOC15(MMHUB, 0, mmVM_L2_SAW_CNTL4);
+ tmp |= 1 << VMC_TAP_PDE_REQUEST_SNOOP_OFFSET;
+ tmp |= 1 << VMC_TAP_PTE_REQUEST_SNOOP_OFFSET;
+ WREG32_SOC15(MMHUB, 0, mmVM_L2_SAW_CNTL4, tmp);
+}
+
static void mmhub_v1_0_setup_vmid_config(struct amdgpu_device *adev)
{
- struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_MMHUB_0];
+ struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_MMHUB0(0)];
unsigned num_level, block_size;
uint32_t tmp;
int i;
@@ -243,7 +290,7 @@ static void mmhub_v1_0_setup_vmid_config(struct amdgpu_device *adev)
block_size -= 9;
for (i = 0; i <= 14; i++) {
- tmp = RREG32_SOC15_OFFSET(MMHUB, 0, mmVM_CONTEXT1_CNTL, i);
+ tmp = RREG32_SOC15_OFFSET(MMHUB, 0, mmVM_CONTEXT1_CNTL, i * hub->ctx_distance);
tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL, ENABLE_CONTEXT, 1);
tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL, PAGE_TABLE_DEPTH,
num_level);
@@ -282,11 +329,14 @@ static void mmhub_v1_0_setup_vmid_config(struct amdgpu_device *adev)
i * hub->ctx_addr_distance,
upper_32_bits(adev->vm_manager.max_pfn - 1));
}
+
+ if (amdgpu_ip_version(adev, ISP_HWIP, 0))
+ mmhub_v1_0_init_saw(adev);
}
static void mmhub_v1_0_program_invalidation(struct amdgpu_device *adev)
{
- struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_MMHUB_0];
+ struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_MMHUB0(0)];
unsigned i;
for (i = 0; i < 18; ++i) {
@@ -303,10 +353,10 @@ static void mmhub_v1_0_update_power_gating(struct amdgpu_device *adev,
if (amdgpu_sriov_vf(adev))
return;
- if (enable && adev->pg_flags & AMD_PG_SUPPORT_MMHUB) {
- amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_GMC, true);
-
- }
+ if (adev->pg_flags & AMD_PG_SUPPORT_MMHUB)
+ amdgpu_dpm_set_powergating_by_smu(adev,
+ AMD_IP_BLOCK_TYPE_GMC,
+ enable, 0);
}
static int mmhub_v1_0_gart_enable(struct amdgpu_device *adev)
@@ -339,7 +389,7 @@ static int mmhub_v1_0_gart_enable(struct amdgpu_device *adev)
static void mmhub_v1_0_gart_disable(struct amdgpu_device *adev)
{
- struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_MMHUB_0];
+ struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_MMHUB0(0)];
u32 tmp;
u32 i;
@@ -416,7 +466,7 @@ static void mmhub_v1_0_set_fault_enable_default(struct amdgpu_device *adev, bool
static void mmhub_v1_0_init(struct amdgpu_device *adev)
{
- struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_MMHUB_0];
+ struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_MMHUB0(0)];
hub->ctx0_ptb_addr_lo32 =
SOC15_REG_OFFSET(MMHUB, 0,
@@ -548,7 +598,7 @@ static int mmhub_v1_0_set_clockgating(struct amdgpu_device *adev,
return 0;
}
-static void mmhub_v1_0_get_clockgating(struct amdgpu_device *adev, u32 *flags)
+static void mmhub_v1_0_get_clockgating(struct amdgpu_device *adev, u64 *flags)
{
int data, data1;
@@ -776,10 +826,18 @@ static void mmhub_v1_0_reset_ras_error_count(struct amdgpu_device *adev)
}
}
-const struct amdgpu_mmhub_funcs mmhub_v1_0_funcs = {
- .ras_late_init = amdgpu_mmhub_ras_late_init,
+struct amdgpu_ras_block_hw_ops mmhub_v1_0_ras_hw_ops = {
.query_ras_error_count = mmhub_v1_0_query_ras_error_count,
.reset_ras_error_count = mmhub_v1_0_reset_ras_error_count,
+};
+
+struct amdgpu_mmhub_ras mmhub_v1_0_ras = {
+ .ras_block = {
+ .hw_ops = &mmhub_v1_0_ras_hw_ops,
+ },
+};
+
+const struct amdgpu_mmhub_funcs mmhub_v1_0_funcs = {
.get_fb_location = mmhub_v1_0_get_fb_location,
.init = mmhub_v1_0_init,
.gart_enable = mmhub_v1_0_gart_enable,
diff --git a/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.h b/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.h
index d77f5b65a618..dae7ca48bd8b 100644
--- a/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.h
+++ b/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.h
@@ -24,5 +24,6 @@
#define __MMHUB_V1_0_H__
extern const struct amdgpu_mmhub_funcs mmhub_v1_0_funcs;
+extern struct amdgpu_mmhub_ras mmhub_v1_0_ras;
#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/mmhub_v1_7.c b/drivers/gpu/drm/amd/amdgpu/mmhub_v1_7.c
new file mode 100644
index 000000000000..2adee2b94c37
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/mmhub_v1_7.c
@@ -0,0 +1,1371 @@
+/*
+ * Copyright 2019 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+#include "amdgpu.h"
+#include "amdgpu_ras.h"
+#include "mmhub_v1_7.h"
+
+#include "mmhub/mmhub_1_7_offset.h"
+#include "mmhub/mmhub_1_7_sh_mask.h"
+#include "vega10_enum.h"
+
+#include "soc15_common.h"
+#include "soc15.h"
+
+#define regVM_L2_CNTL3_DEFAULT 0x80100007
+#define regVM_L2_CNTL4_DEFAULT 0x000000c1
+
+static u64 mmhub_v1_7_get_fb_location(struct amdgpu_device *adev)
+{
+ u64 base = RREG32_SOC15(MMHUB, 0, regMC_VM_FB_LOCATION_BASE);
+ u64 top = RREG32_SOC15(MMHUB, 0, regMC_VM_FB_LOCATION_TOP);
+
+ base &= MC_VM_FB_LOCATION_BASE__FB_BASE_MASK;
+ base <<= 24;
+
+ top &= MC_VM_FB_LOCATION_TOP__FB_TOP_MASK;
+ top <<= 24;
+
+ adev->gmc.fb_start = base;
+ adev->gmc.fb_end = top;
+
+ return base;
+}
+
+static void mmhub_v1_7_setup_vm_pt_regs(struct amdgpu_device *adev, uint32_t vmid,
+ uint64_t page_table_base)
+{
+ struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_MMHUB0(0)];
+
+ WREG32_SOC15_OFFSET(MMHUB, 0, regVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_LO32,
+ hub->ctx_addr_distance * vmid, lower_32_bits(page_table_base));
+
+ WREG32_SOC15_OFFSET(MMHUB, 0, regVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_HI32,
+ hub->ctx_addr_distance * vmid, upper_32_bits(page_table_base));
+}
+
+static void mmhub_v1_7_init_gart_aperture_regs(struct amdgpu_device *adev)
+{
+ uint64_t pt_base;
+
+ if (adev->gmc.pdb0_bo)
+ pt_base = amdgpu_gmc_pd_addr(adev->gmc.pdb0_bo);
+ else
+ pt_base = amdgpu_gmc_pd_addr(adev->gart.bo);
+
+ mmhub_v1_7_setup_vm_pt_regs(adev, 0, pt_base);
+
+ /* If use GART for FB translation, vmid0 page table covers both
+ * vram and system memory (gart)
+ */
+ if (adev->gmc.pdb0_bo) {
+ WREG32_SOC15(MMHUB, 0, regVM_CONTEXT0_PAGE_TABLE_START_ADDR_LO32,
+ (u32)(adev->gmc.fb_start >> 12));
+ WREG32_SOC15(MMHUB, 0, regVM_CONTEXT0_PAGE_TABLE_START_ADDR_HI32,
+ (u32)(adev->gmc.fb_start >> 44));
+
+ WREG32_SOC15(MMHUB, 0, regVM_CONTEXT0_PAGE_TABLE_END_ADDR_LO32,
+ (u32)(adev->gmc.gart_end >> 12));
+ WREG32_SOC15(MMHUB, 0, regVM_CONTEXT0_PAGE_TABLE_END_ADDR_HI32,
+ (u32)(adev->gmc.gart_end >> 44));
+
+ } else {
+ WREG32_SOC15(MMHUB, 0, regVM_CONTEXT0_PAGE_TABLE_START_ADDR_LO32,
+ (u32)(adev->gmc.gart_start >> 12));
+ WREG32_SOC15(MMHUB, 0, regVM_CONTEXT0_PAGE_TABLE_START_ADDR_HI32,
+ (u32)(adev->gmc.gart_start >> 44));
+
+ WREG32_SOC15(MMHUB, 0, regVM_CONTEXT0_PAGE_TABLE_END_ADDR_LO32,
+ (u32)(adev->gmc.gart_end >> 12));
+ WREG32_SOC15(MMHUB, 0, regVM_CONTEXT0_PAGE_TABLE_END_ADDR_HI32,
+ (u32)(adev->gmc.gart_end >> 44));
+ }
+}
+
+static void mmhub_v1_7_init_system_aperture_regs(struct amdgpu_device *adev)
+{
+ uint64_t value;
+ uint32_t tmp;
+
+ /* Program the AGP BAR */
+ WREG32_SOC15(MMHUB, 0, regMC_VM_AGP_BASE, 0);
+ WREG32_SOC15(MMHUB, 0, regMC_VM_AGP_BOT, adev->gmc.agp_start >> 24);
+ WREG32_SOC15(MMHUB, 0, regMC_VM_AGP_TOP, adev->gmc.agp_end >> 24);
+
+ if (amdgpu_sriov_vf(adev))
+ return;
+
+ /* Program the system aperture low logical page number. */
+ WREG32_SOC15(MMHUB, 0, regMC_VM_SYSTEM_APERTURE_LOW_ADDR,
+ min(adev->gmc.fb_start, adev->gmc.agp_start) >> 18);
+
+ WREG32_SOC15(MMHUB, 0, regMC_VM_SYSTEM_APERTURE_HIGH_ADDR,
+ max(adev->gmc.fb_end, adev->gmc.agp_end) >> 18);
+
+ /* In the case squeezing vram into GART aperture, we don't use
+ * FB aperture and AGP aperture. Disable them.
+ */
+ if (adev->gmc.pdb0_bo) {
+ WREG32_SOC15(MMHUB, 0, regMC_VM_AGP_BOT, 0xFFFFFF);
+ WREG32_SOC15(MMHUB, 0, regMC_VM_AGP_TOP, 0);
+ WREG32_SOC15(MMHUB, 0, regMC_VM_FB_LOCATION_TOP, 0);
+ WREG32_SOC15(MMHUB, 0, regMC_VM_FB_LOCATION_BASE, 0x00FFFFFF);
+ WREG32_SOC15(MMHUB, 0, regMC_VM_SYSTEM_APERTURE_LOW_ADDR, 0x3FFFFFFF);
+ WREG32_SOC15(MMHUB, 0, regMC_VM_SYSTEM_APERTURE_HIGH_ADDR, 0);
+ }
+
+ /* Set default page address. */
+ value = amdgpu_gmc_vram_mc2pa(adev, adev->mem_scratch.gpu_addr);
+ WREG32_SOC15(MMHUB, 0, regMC_VM_SYSTEM_APERTURE_DEFAULT_ADDR_LSB,
+ (u32)(value >> 12));
+ WREG32_SOC15(MMHUB, 0, regMC_VM_SYSTEM_APERTURE_DEFAULT_ADDR_MSB,
+ (u32)(value >> 44));
+
+ /* Program "protection fault". */
+ WREG32_SOC15(MMHUB, 0, regVM_L2_PROTECTION_FAULT_DEFAULT_ADDR_LO32,
+ (u32)(adev->dummy_page_addr >> 12));
+ WREG32_SOC15(MMHUB, 0, regVM_L2_PROTECTION_FAULT_DEFAULT_ADDR_HI32,
+ (u32)((u64)adev->dummy_page_addr >> 44));
+
+ tmp = RREG32_SOC15(MMHUB, 0, regVM_L2_PROTECTION_FAULT_CNTL2);
+ tmp = REG_SET_FIELD(tmp, VM_L2_PROTECTION_FAULT_CNTL2,
+ ACTIVE_PAGE_MIGRATION_PTE_READ_RETRY, 1);
+ WREG32_SOC15(MMHUB, 0, regVM_L2_PROTECTION_FAULT_CNTL2, tmp);
+}
+
+static void mmhub_v1_7_init_tlb_regs(struct amdgpu_device *adev)
+{
+ uint32_t tmp;
+
+ /* Setup TLB control */
+ tmp = RREG32_SOC15(MMHUB, 0, regMC_VM_MX_L1_TLB_CNTL);
+
+ tmp = REG_SET_FIELD(tmp, MC_VM_MX_L1_TLB_CNTL, ENABLE_L1_TLB, 1);
+ tmp = REG_SET_FIELD(tmp, MC_VM_MX_L1_TLB_CNTL, SYSTEM_ACCESS_MODE, 3);
+ tmp = REG_SET_FIELD(tmp, MC_VM_MX_L1_TLB_CNTL,
+ ENABLE_ADVANCED_DRIVER_MODEL, 1);
+ tmp = REG_SET_FIELD(tmp, MC_VM_MX_L1_TLB_CNTL,
+ SYSTEM_APERTURE_UNMAPPED_ACCESS, 0);
+ tmp = REG_SET_FIELD(tmp, MC_VM_MX_L1_TLB_CNTL,
+ MTYPE, MTYPE_UC);/* XXX for emulation. */
+ tmp = REG_SET_FIELD(tmp, MC_VM_MX_L1_TLB_CNTL, ATC_EN, 1);
+
+ WREG32_SOC15(MMHUB, 0, regMC_VM_MX_L1_TLB_CNTL, tmp);
+}
+
+/* Set snoop bit for SDMA so that SDMA writes probe-invalidates RW lines */
+static void mmhub_v1_7_init_snoop_override_regs(struct amdgpu_device *adev)
+{
+ uint32_t tmp;
+ int i;
+ uint32_t distance = regDAGB1_WRCLI_GPU_SNOOP_OVERRIDE -
+ regDAGB0_WRCLI_GPU_SNOOP_OVERRIDE;
+
+ for (i = 0; i < 5; i++) { /* DAGB instances */
+ tmp = RREG32_SOC15_OFFSET(MMHUB, 0,
+ regDAGB0_WRCLI_GPU_SNOOP_OVERRIDE, i * distance);
+ tmp |= (1 << 15); /* SDMA client is BIT15 */
+ WREG32_SOC15_OFFSET(MMHUB, 0,
+ regDAGB0_WRCLI_GPU_SNOOP_OVERRIDE, i * distance, tmp);
+
+ tmp = RREG32_SOC15_OFFSET(MMHUB, 0,
+ regDAGB0_WRCLI_GPU_SNOOP_OVERRIDE_VALUE, i * distance);
+ tmp |= (1 << 15);
+ WREG32_SOC15_OFFSET(MMHUB, 0,
+ regDAGB0_WRCLI_GPU_SNOOP_OVERRIDE_VALUE, i * distance, tmp);
+ }
+
+}
+
+static void mmhub_v1_7_init_cache_regs(struct amdgpu_device *adev)
+{
+ uint32_t tmp;
+
+ if (amdgpu_sriov_vf(adev))
+ return;
+
+ /* Setup L2 cache */
+ tmp = RREG32_SOC15(MMHUB, 0, regVM_L2_CNTL);
+ tmp = REG_SET_FIELD(tmp, VM_L2_CNTL, ENABLE_L2_CACHE, 1);
+ tmp = REG_SET_FIELD(tmp, VM_L2_CNTL, ENABLE_L2_FRAGMENT_PROCESSING, 1);
+ /* XXX for emulation, Refer to closed source code.*/
+ tmp = REG_SET_FIELD(tmp, VM_L2_CNTL, L2_PDE0_CACHE_TAG_GENERATION_MODE,
+ 0);
+ tmp = REG_SET_FIELD(tmp, VM_L2_CNTL, PDE_FAULT_CLASSIFICATION, 0);
+ tmp = REG_SET_FIELD(tmp, VM_L2_CNTL, CONTEXT1_IDENTITY_ACCESS_MODE, 1);
+ tmp = REG_SET_FIELD(tmp, VM_L2_CNTL, IDENTITY_MODE_FRAGMENT_SIZE, 0);
+ WREG32_SOC15(MMHUB, 0, regVM_L2_CNTL, tmp);
+
+ tmp = RREG32_SOC15(MMHUB, 0, regVM_L2_CNTL2);
+ tmp = REG_SET_FIELD(tmp, VM_L2_CNTL2, INVALIDATE_ALL_L1_TLBS, 1);
+ tmp = REG_SET_FIELD(tmp, VM_L2_CNTL2, INVALIDATE_L2_CACHE, 1);
+ WREG32_SOC15(MMHUB, 0, regVM_L2_CNTL2, tmp);
+
+ tmp = regVM_L2_CNTL3_DEFAULT;
+ if (adev->gmc.translate_further) {
+ tmp = REG_SET_FIELD(tmp, VM_L2_CNTL3, BANK_SELECT, 12);
+ tmp = REG_SET_FIELD(tmp, VM_L2_CNTL3,
+ L2_CACHE_BIGK_FRAGMENT_SIZE, 9);
+ } else {
+ tmp = REG_SET_FIELD(tmp, VM_L2_CNTL3, BANK_SELECT, 9);
+ tmp = REG_SET_FIELD(tmp, VM_L2_CNTL3,
+ L2_CACHE_BIGK_FRAGMENT_SIZE, 6);
+ }
+ WREG32_SOC15(MMHUB, 0, regVM_L2_CNTL3, tmp);
+
+ tmp = regVM_L2_CNTL4_DEFAULT;
+ if (adev->gmc.xgmi.connected_to_cpu) {
+ tmp = REG_SET_FIELD(tmp, VM_L2_CNTL4,
+ VMC_TAP_PDE_REQUEST_PHYSICAL, 1);
+ tmp = REG_SET_FIELD(tmp, VM_L2_CNTL4,
+ VMC_TAP_PTE_REQUEST_PHYSICAL, 1);
+ } else {
+ tmp = REG_SET_FIELD(tmp, VM_L2_CNTL4,
+ VMC_TAP_PDE_REQUEST_PHYSICAL, 0);
+ tmp = REG_SET_FIELD(tmp, VM_L2_CNTL4,
+ VMC_TAP_PTE_REQUEST_PHYSICAL, 0);
+ }
+ WREG32_SOC15(MMHUB, 0, regVM_L2_CNTL4, tmp);
+}
+
+static void mmhub_v1_7_enable_system_domain(struct amdgpu_device *adev)
+{
+ uint32_t tmp;
+
+ tmp = RREG32_SOC15(MMHUB, 0, regVM_CONTEXT0_CNTL);
+ tmp = REG_SET_FIELD(tmp, VM_CONTEXT0_CNTL, ENABLE_CONTEXT, 1);
+ tmp = REG_SET_FIELD(tmp, VM_CONTEXT0_CNTL, PAGE_TABLE_DEPTH,
+ adev->gmc.vmid0_page_table_depth);
+ tmp = REG_SET_FIELD(tmp, VM_CONTEXT0_CNTL, PAGE_TABLE_BLOCK_SIZE,
+ adev->gmc.vmid0_page_table_block_size);
+ tmp = REG_SET_FIELD(tmp, VM_CONTEXT0_CNTL,
+ RETRY_PERMISSION_OR_INVALID_PAGE_FAULT, 0);
+ WREG32_SOC15(MMHUB, 0, regVM_CONTEXT0_CNTL, tmp);
+}
+
+static void mmhub_v1_7_disable_identity_aperture(struct amdgpu_device *adev)
+{
+ if (amdgpu_sriov_vf(adev))
+ return;
+
+ WREG32_SOC15(MMHUB, 0, regVM_L2_CONTEXT1_IDENTITY_APERTURE_LOW_ADDR_LO32,
+ 0XFFFFFFFF);
+ WREG32_SOC15(MMHUB, 0, regVM_L2_CONTEXT1_IDENTITY_APERTURE_LOW_ADDR_HI32,
+ 0x0000000F);
+
+ WREG32_SOC15(MMHUB, 0,
+ regVM_L2_CONTEXT1_IDENTITY_APERTURE_HIGH_ADDR_LO32, 0);
+ WREG32_SOC15(MMHUB, 0,
+ regVM_L2_CONTEXT1_IDENTITY_APERTURE_HIGH_ADDR_HI32, 0);
+
+ WREG32_SOC15(MMHUB, 0, regVM_L2_CONTEXT_IDENTITY_PHYSICAL_OFFSET_LO32,
+ 0);
+ WREG32_SOC15(MMHUB, 0, regVM_L2_CONTEXT_IDENTITY_PHYSICAL_OFFSET_HI32,
+ 0);
+}
+
+static void mmhub_v1_7_setup_vmid_config(struct amdgpu_device *adev)
+{
+ struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_MMHUB0(0)];
+ unsigned num_level, block_size;
+ uint32_t tmp;
+ int i;
+
+ num_level = adev->vm_manager.num_level;
+ block_size = adev->vm_manager.block_size;
+ if (adev->gmc.translate_further)
+ num_level -= 1;
+ else
+ block_size -= 9;
+
+ for (i = 0; i <= 14; i++) {
+ tmp = RREG32_SOC15_OFFSET(MMHUB, 0, regVM_CONTEXT1_CNTL, i * hub->ctx_distance);
+ tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL, ENABLE_CONTEXT, 1);
+ tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL, PAGE_TABLE_DEPTH,
+ num_level);
+ tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL,
+ RANGE_PROTECTION_FAULT_ENABLE_DEFAULT, 1);
+ tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL,
+ DUMMY_PAGE_PROTECTION_FAULT_ENABLE_DEFAULT,
+ 1);
+ tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL,
+ PDE0_PROTECTION_FAULT_ENABLE_DEFAULT, 1);
+ tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL,
+ VALID_PROTECTION_FAULT_ENABLE_DEFAULT, 1);
+ tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL,
+ READ_PROTECTION_FAULT_ENABLE_DEFAULT, 1);
+ tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL,
+ WRITE_PROTECTION_FAULT_ENABLE_DEFAULT, 1);
+ tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL,
+ EXECUTE_PROTECTION_FAULT_ENABLE_DEFAULT, 1);
+ tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL,
+ PAGE_TABLE_BLOCK_SIZE,
+ block_size);
+ /* On Aldebaran, XNACK can be enabled in the SQ per-process.
+ * Retry faults need to be enabled for that to work.
+ */
+ tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL,
+ RETRY_PERMISSION_OR_INVALID_PAGE_FAULT,
+ 1);
+ WREG32_SOC15_OFFSET(MMHUB, 0, regVM_CONTEXT1_CNTL,
+ i * hub->ctx_distance, tmp);
+ WREG32_SOC15_OFFSET(MMHUB, 0, regVM_CONTEXT1_PAGE_TABLE_START_ADDR_LO32,
+ i * hub->ctx_addr_distance, 0);
+ WREG32_SOC15_OFFSET(MMHUB, 0, regVM_CONTEXT1_PAGE_TABLE_START_ADDR_HI32,
+ i * hub->ctx_addr_distance, 0);
+ WREG32_SOC15_OFFSET(MMHUB, 0, regVM_CONTEXT1_PAGE_TABLE_END_ADDR_LO32,
+ i * hub->ctx_addr_distance,
+ lower_32_bits(adev->vm_manager.max_pfn - 1));
+ WREG32_SOC15_OFFSET(MMHUB, 0, regVM_CONTEXT1_PAGE_TABLE_END_ADDR_HI32,
+ i * hub->ctx_addr_distance,
+ upper_32_bits(adev->vm_manager.max_pfn - 1));
+ }
+}
+
+static void mmhub_v1_7_program_invalidation(struct amdgpu_device *adev)
+{
+ struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_MMHUB0(0)];
+ unsigned i;
+
+ for (i = 0; i < 18; ++i) {
+ WREG32_SOC15_OFFSET(MMHUB, 0, regVM_INVALIDATE_ENG0_ADDR_RANGE_LO32,
+ i * hub->eng_addr_distance, 0xffffffff);
+ WREG32_SOC15_OFFSET(MMHUB, 0, regVM_INVALIDATE_ENG0_ADDR_RANGE_HI32,
+ i * hub->eng_addr_distance, 0x1f);
+ }
+}
+
+static int mmhub_v1_7_gart_enable(struct amdgpu_device *adev)
+{
+ /* GART Enable. */
+ mmhub_v1_7_init_gart_aperture_regs(adev);
+ mmhub_v1_7_init_system_aperture_regs(adev);
+ mmhub_v1_7_init_tlb_regs(adev);
+ mmhub_v1_7_init_cache_regs(adev);
+ mmhub_v1_7_init_snoop_override_regs(adev);
+
+ mmhub_v1_7_enable_system_domain(adev);
+ mmhub_v1_7_disable_identity_aperture(adev);
+ mmhub_v1_7_setup_vmid_config(adev);
+ mmhub_v1_7_program_invalidation(adev);
+
+ return 0;
+}
+
+static void mmhub_v1_7_gart_disable(struct amdgpu_device *adev)
+{
+ struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_MMHUB0(0)];
+ u32 tmp;
+ u32 i;
+
+ /* Disable all tables */
+ for (i = 0; i < 16; i++)
+ WREG32_SOC15_OFFSET(MMHUB, 0, regVM_CONTEXT0_CNTL,
+ i * hub->ctx_distance, 0);
+
+ /* Setup TLB control */
+ tmp = RREG32_SOC15(MMHUB, 0, regMC_VM_MX_L1_TLB_CNTL);
+ tmp = REG_SET_FIELD(tmp, MC_VM_MX_L1_TLB_CNTL, ENABLE_L1_TLB, 0);
+ tmp = REG_SET_FIELD(tmp,
+ MC_VM_MX_L1_TLB_CNTL,
+ ENABLE_ADVANCED_DRIVER_MODEL,
+ 0);
+ WREG32_SOC15(MMHUB, 0, regMC_VM_MX_L1_TLB_CNTL, tmp);
+
+ if (!amdgpu_sriov_vf(adev)) {
+ /* Setup L2 cache */
+ tmp = RREG32_SOC15(MMHUB, 0, regVM_L2_CNTL);
+ tmp = REG_SET_FIELD(tmp, VM_L2_CNTL, ENABLE_L2_CACHE, 0);
+ WREG32_SOC15(MMHUB, 0, regVM_L2_CNTL, tmp);
+ WREG32_SOC15(MMHUB, 0, regVM_L2_CNTL3, 0);
+ }
+}
+
+/**
+ * mmhub_v1_7_set_fault_enable_default - update GART/VM fault handling
+ *
+ * @adev: amdgpu_device pointer
+ * @value: true redirects VM faults to the default page
+ */
+static void mmhub_v1_7_set_fault_enable_default(struct amdgpu_device *adev, bool value)
+{
+ u32 tmp;
+
+ if (amdgpu_sriov_vf(adev))
+ return;
+
+ tmp = RREG32_SOC15(MMHUB, 0, regVM_L2_PROTECTION_FAULT_CNTL);
+ tmp = REG_SET_FIELD(tmp, VM_L2_PROTECTION_FAULT_CNTL,
+ RANGE_PROTECTION_FAULT_ENABLE_DEFAULT, value);
+ tmp = REG_SET_FIELD(tmp, VM_L2_PROTECTION_FAULT_CNTL,
+ PDE0_PROTECTION_FAULT_ENABLE_DEFAULT, value);
+ tmp = REG_SET_FIELD(tmp, VM_L2_PROTECTION_FAULT_CNTL,
+ PDE1_PROTECTION_FAULT_ENABLE_DEFAULT, value);
+ tmp = REG_SET_FIELD(tmp, VM_L2_PROTECTION_FAULT_CNTL,
+ PDE2_PROTECTION_FAULT_ENABLE_DEFAULT, value);
+ tmp = REG_SET_FIELD(tmp,
+ VM_L2_PROTECTION_FAULT_CNTL,
+ TRANSLATE_FURTHER_PROTECTION_FAULT_ENABLE_DEFAULT,
+ value);
+ tmp = REG_SET_FIELD(tmp, VM_L2_PROTECTION_FAULT_CNTL,
+ NACK_PROTECTION_FAULT_ENABLE_DEFAULT, value);
+ tmp = REG_SET_FIELD(tmp, VM_L2_PROTECTION_FAULT_CNTL,
+ DUMMY_PAGE_PROTECTION_FAULT_ENABLE_DEFAULT, value);
+ tmp = REG_SET_FIELD(tmp, VM_L2_PROTECTION_FAULT_CNTL,
+ VALID_PROTECTION_FAULT_ENABLE_DEFAULT, value);
+ tmp = REG_SET_FIELD(tmp, VM_L2_PROTECTION_FAULT_CNTL,
+ READ_PROTECTION_FAULT_ENABLE_DEFAULT, value);
+ tmp = REG_SET_FIELD(tmp, VM_L2_PROTECTION_FAULT_CNTL,
+ WRITE_PROTECTION_FAULT_ENABLE_DEFAULT, value);
+ tmp = REG_SET_FIELD(tmp, VM_L2_PROTECTION_FAULT_CNTL,
+ EXECUTE_PROTECTION_FAULT_ENABLE_DEFAULT, value);
+ if (!value) {
+ tmp = REG_SET_FIELD(tmp, VM_L2_PROTECTION_FAULT_CNTL,
+ CRASH_ON_NO_RETRY_FAULT, 1);
+ tmp = REG_SET_FIELD(tmp, VM_L2_PROTECTION_FAULT_CNTL,
+ CRASH_ON_RETRY_FAULT, 1);
+ }
+
+ WREG32_SOC15(MMHUB, 0, regVM_L2_PROTECTION_FAULT_CNTL, tmp);
+}
+
+static void mmhub_v1_7_init(struct amdgpu_device *adev)
+{
+ struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_MMHUB0(0)];
+
+ hub->ctx0_ptb_addr_lo32 =
+ SOC15_REG_OFFSET(MMHUB, 0,
+ regVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_LO32);
+ hub->ctx0_ptb_addr_hi32 =
+ SOC15_REG_OFFSET(MMHUB, 0,
+ regVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_HI32);
+ hub->vm_inv_eng0_req =
+ SOC15_REG_OFFSET(MMHUB, 0, regVM_INVALIDATE_ENG0_REQ);
+ hub->vm_inv_eng0_ack =
+ SOC15_REG_OFFSET(MMHUB, 0, regVM_INVALIDATE_ENG0_ACK);
+ hub->vm_context0_cntl =
+ SOC15_REG_OFFSET(MMHUB, 0, regVM_CONTEXT0_CNTL);
+ hub->vm_l2_pro_fault_status =
+ SOC15_REG_OFFSET(MMHUB, 0, regVM_L2_PROTECTION_FAULT_STATUS);
+ hub->vm_l2_pro_fault_cntl =
+ SOC15_REG_OFFSET(MMHUB, 0, regVM_L2_PROTECTION_FAULT_CNTL);
+
+ hub->ctx_distance = regVM_CONTEXT1_CNTL - regVM_CONTEXT0_CNTL;
+ hub->ctx_addr_distance = regVM_CONTEXT1_PAGE_TABLE_BASE_ADDR_LO32 -
+ regVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_LO32;
+ hub->eng_distance = regVM_INVALIDATE_ENG1_REQ - regVM_INVALIDATE_ENG0_REQ;
+ hub->eng_addr_distance = regVM_INVALIDATE_ENG1_ADDR_RANGE_LO32 -
+ regVM_INVALIDATE_ENG0_ADDR_RANGE_LO32;
+
+}
+
+static void mmhub_v1_7_update_medium_grain_clock_gating(struct amdgpu_device *adev,
+ bool enable)
+{
+ uint32_t def, data, def1, data1, def2 = 0, data2 = 0;
+
+ def = data = RREG32_SOC15(MMHUB, 0, regATC_L2_MISC_CG);
+
+ def1 = data1 = RREG32_SOC15(MMHUB, 0, regDAGB0_CNTL_MISC2);
+ def2 = data2 = RREG32_SOC15(MMHUB, 0, regDAGB1_CNTL_MISC2);
+
+ if (enable) {
+ data |= ATC_L2_MISC_CG__ENABLE_MASK;
+
+ data1 &= ~(DAGB0_CNTL_MISC2__DISABLE_WRREQ_CG_MASK |
+ DAGB0_CNTL_MISC2__DISABLE_WRRET_CG_MASK |
+ DAGB0_CNTL_MISC2__DISABLE_RDREQ_CG_MASK |
+ DAGB0_CNTL_MISC2__DISABLE_RDRET_CG_MASK |
+ DAGB0_CNTL_MISC2__DISABLE_TLBWR_CG_MASK |
+ DAGB0_CNTL_MISC2__DISABLE_TLBRD_CG_MASK);
+
+ data2 &= ~(DAGB1_CNTL_MISC2__DISABLE_WRREQ_CG_MASK |
+ DAGB1_CNTL_MISC2__DISABLE_WRRET_CG_MASK |
+ DAGB1_CNTL_MISC2__DISABLE_RDREQ_CG_MASK |
+ DAGB1_CNTL_MISC2__DISABLE_RDRET_CG_MASK |
+ DAGB1_CNTL_MISC2__DISABLE_TLBWR_CG_MASK |
+ DAGB1_CNTL_MISC2__DISABLE_TLBRD_CG_MASK);
+ } else {
+ data &= ~ATC_L2_MISC_CG__ENABLE_MASK;
+
+ data1 |= (DAGB0_CNTL_MISC2__DISABLE_WRREQ_CG_MASK |
+ DAGB0_CNTL_MISC2__DISABLE_WRRET_CG_MASK |
+ DAGB0_CNTL_MISC2__DISABLE_RDREQ_CG_MASK |
+ DAGB0_CNTL_MISC2__DISABLE_RDRET_CG_MASK |
+ DAGB0_CNTL_MISC2__DISABLE_TLBWR_CG_MASK |
+ DAGB0_CNTL_MISC2__DISABLE_TLBRD_CG_MASK);
+
+ data2 |= (DAGB1_CNTL_MISC2__DISABLE_WRREQ_CG_MASK |
+ DAGB1_CNTL_MISC2__DISABLE_WRRET_CG_MASK |
+ DAGB1_CNTL_MISC2__DISABLE_RDREQ_CG_MASK |
+ DAGB1_CNTL_MISC2__DISABLE_RDRET_CG_MASK |
+ DAGB1_CNTL_MISC2__DISABLE_TLBWR_CG_MASK |
+ DAGB1_CNTL_MISC2__DISABLE_TLBRD_CG_MASK);
+ }
+
+ if (def != data)
+ WREG32_SOC15(MMHUB, 0, regATC_L2_MISC_CG, data);
+
+ if (def1 != data1)
+ WREG32_SOC15(MMHUB, 0, regDAGB0_CNTL_MISC2, data1);
+
+ if (def2 != data2)
+ WREG32_SOC15(MMHUB, 0, regDAGB1_CNTL_MISC2, data2);
+}
+
+static void mmhub_v1_7_update_medium_grain_light_sleep(struct amdgpu_device *adev,
+ bool enable)
+{
+ uint32_t def, data;
+
+ def = data = RREG32_SOC15(MMHUB, 0, regATC_L2_MISC_CG);
+
+ if (enable)
+ data |= ATC_L2_MISC_CG__MEM_LS_ENABLE_MASK;
+ else
+ data &= ~ATC_L2_MISC_CG__MEM_LS_ENABLE_MASK;
+
+ if (def != data)
+ WREG32_SOC15(MMHUB, 0, regATC_L2_MISC_CG, data);
+}
+
+static int mmhub_v1_7_set_clockgating(struct amdgpu_device *adev,
+ enum amd_clockgating_state state)
+{
+ if (amdgpu_sriov_vf(adev))
+ return 0;
+
+ /* Change state only if MCCG support is enabled through driver */
+ if (adev->cg_flags & AMD_CG_SUPPORT_MC_MGCG)
+ mmhub_v1_7_update_medium_grain_clock_gating(adev,
+ state == AMD_CG_STATE_GATE);
+
+ /* Change state only if LS support is enabled through driver */
+ if (adev->cg_flags & AMD_CG_SUPPORT_MC_LS)
+ mmhub_v1_7_update_medium_grain_light_sleep(adev,
+ state == AMD_CG_STATE_GATE);
+
+ return 0;
+}
+
+static void mmhub_v1_7_get_clockgating(struct amdgpu_device *adev, u64 *flags)
+{
+ u32 data, data1;
+
+ if (amdgpu_sriov_vf(adev))
+ *flags = 0;
+
+ data = RREG32_SOC15(MMHUB, 0, regATC_L2_MISC_CG);
+
+ data1 = RREG32_SOC15(MMHUB, 0, regDAGB0_CNTL_MISC2);
+
+ /* AMD_CG_SUPPORT_MC_MGCG */
+ if ((data & ATC_L2_MISC_CG__ENABLE_MASK) &&
+ !(data1 & (DAGB0_CNTL_MISC2__DISABLE_WRREQ_CG_MASK |
+ DAGB0_CNTL_MISC2__DISABLE_WRRET_CG_MASK |
+ DAGB0_CNTL_MISC2__DISABLE_RDREQ_CG_MASK |
+ DAGB0_CNTL_MISC2__DISABLE_RDRET_CG_MASK |
+ DAGB0_CNTL_MISC2__DISABLE_TLBWR_CG_MASK |
+ DAGB0_CNTL_MISC2__DISABLE_TLBRD_CG_MASK)))
+ *flags |= AMD_CG_SUPPORT_MC_MGCG;
+
+ /* AMD_CG_SUPPORT_MC_LS */
+ if (data & ATC_L2_MISC_CG__MEM_LS_ENABLE_MASK)
+ *flags |= AMD_CG_SUPPORT_MC_LS;
+}
+
+static const struct soc15_ras_field_entry mmhub_v1_7_ras_fields[] = {
+ /* MMHUB Range 0 */
+ { "MMEA0_DRAMRD_CMDMEM", SOC15_REG_ENTRY(MMHUB, 0, regMMEA0_EDC_CNT),
+ SOC15_REG_FIELD(MMEA0_EDC_CNT, DRAMRD_CMDMEM_SEC_COUNT),
+ SOC15_REG_FIELD(MMEA0_EDC_CNT, DRAMRD_CMDMEM_DED_COUNT),
+ },
+ { "MMEA0_DRAMWR_CMDMEM", SOC15_REG_ENTRY(MMHUB, 0, regMMEA0_EDC_CNT),
+ SOC15_REG_FIELD(MMEA0_EDC_CNT, DRAMWR_CMDMEM_SEC_COUNT),
+ SOC15_REG_FIELD(MMEA0_EDC_CNT, DRAMWR_CMDMEM_DED_COUNT),
+ },
+ { "MMEA0_DRAMWR_DATAMEM", SOC15_REG_ENTRY(MMHUB, 0, regMMEA0_EDC_CNT),
+ SOC15_REG_FIELD(MMEA0_EDC_CNT, DRAMWR_DATAMEM_SEC_COUNT),
+ SOC15_REG_FIELD(MMEA0_EDC_CNT, DRAMWR_DATAMEM_DED_COUNT),
+ },
+ { "MMEA0_RRET_TAGMEM", SOC15_REG_ENTRY(MMHUB, 0, regMMEA0_EDC_CNT),
+ SOC15_REG_FIELD(MMEA0_EDC_CNT, RRET_TAGMEM_SEC_COUNT),
+ SOC15_REG_FIELD(MMEA0_EDC_CNT, RRET_TAGMEM_DED_COUNT),
+ },
+ { "MMEA0_WRET_TAGMEM", SOC15_REG_ENTRY(MMHUB, 0, regMMEA0_EDC_CNT),
+ SOC15_REG_FIELD(MMEA0_EDC_CNT, WRET_TAGMEM_SEC_COUNT),
+ SOC15_REG_FIELD(MMEA0_EDC_CNT, WRET_TAGMEM_DED_COUNT),
+ },
+ { "MMEA0_IOWR_DATAMEM", SOC15_REG_ENTRY(MMHUB, 0, regMMEA0_EDC_CNT),
+ SOC15_REG_FIELD(MMEA0_EDC_CNT, IOWR_DATAMEM_SEC_COUNT),
+ SOC15_REG_FIELD(MMEA0_EDC_CNT, IOWR_DATAMEM_DED_COUNT),
+ },
+ { "MMEA0_DRAMRD_PAGEMEM", SOC15_REG_ENTRY(MMHUB, 0, regMMEA0_EDC_CNT),
+ SOC15_REG_FIELD(MMEA0_EDC_CNT, DRAMRD_PAGEMEM_SED_COUNT),
+ 0, 0,
+ },
+ { "MMEA0_DRAMWR_PAGEMEM", SOC15_REG_ENTRY(MMHUB, 0, regMMEA0_EDC_CNT),
+ SOC15_REG_FIELD(MMEA0_EDC_CNT, DRAMWR_PAGEMEM_SED_COUNT),
+ 0, 0,
+ },
+ { "MMEA0_IORD_CMDMEM", SOC15_REG_ENTRY(MMHUB, 0, regMMEA0_EDC_CNT),
+ SOC15_REG_FIELD(MMEA0_EDC_CNT, IORD_CMDMEM_SED_COUNT),
+ 0, 0,
+ },
+ { "MMEA0_IOWR_CMDMEM", SOC15_REG_ENTRY(MMHUB, 0, regMMEA0_EDC_CNT),
+ SOC15_REG_FIELD(MMEA0_EDC_CNT, IOWR_CMDMEM_SED_COUNT),
+ 0, 0,
+ },
+ { "MMEA0_GMIRD_CMDMEM", SOC15_REG_ENTRY(MMHUB, 0, regMMEA0_EDC_CNT2),
+ SOC15_REG_FIELD(MMEA0_EDC_CNT2, GMIRD_CMDMEM_SEC_COUNT),
+ SOC15_REG_FIELD(MMEA0_EDC_CNT2, GMIRD_CMDMEM_DED_COUNT),
+ },
+ { "MMEA0_GMIWR_CMDMEM", SOC15_REG_ENTRY(MMHUB, 0, regMMEA0_EDC_CNT2),
+ SOC15_REG_FIELD(MMEA0_EDC_CNT2, GMIWR_CMDMEM_SEC_COUNT),
+ SOC15_REG_FIELD(MMEA0_EDC_CNT2, GMIWR_CMDMEM_DED_COUNT),
+ },
+ { "MMEA0_GMIWR_DATAMEM", SOC15_REG_ENTRY(MMHUB, 0, regMMEA0_EDC_CNT2),
+ SOC15_REG_FIELD(MMEA0_EDC_CNT2, GMIWR_DATAMEM_SEC_COUNT),
+ SOC15_REG_FIELD(MMEA0_EDC_CNT2, GMIWR_DATAMEM_DED_COUNT),
+ },
+ { "MMEA0_GMIRD_PAGEMEM", SOC15_REG_ENTRY(MMHUB, 0, regMMEA0_EDC_CNT2),
+ SOC15_REG_FIELD(MMEA0_EDC_CNT2, GMIRD_PAGEMEM_SED_COUNT),
+ 0, 0,
+ },
+ { "MMEA0_GMIWR_PAGEMEM", SOC15_REG_ENTRY(MMHUB, 0, regMMEA0_EDC_CNT2),
+ SOC15_REG_FIELD(MMEA0_EDC_CNT2, GMIWR_PAGEMEM_SED_COUNT),
+ 0, 0,
+ },
+ { "MMEA0_MAM_D0MEM", SOC15_REG_ENTRY(MMHUB, 0, regMMEA0_EDC_CNT2),
+ SOC15_REG_FIELD(MMEA0_EDC_CNT2, MAM_D0MEM_SED_COUNT),
+ SOC15_REG_FIELD(MMEA0_EDC_CNT2, MAM_D0MEM_DED_COUNT),
+ },
+ { "MMEA0_MAM_D1MEM", SOC15_REG_ENTRY(MMHUB, 0, regMMEA0_EDC_CNT2),
+ SOC15_REG_FIELD(MMEA0_EDC_CNT2, MAM_D1MEM_SED_COUNT),
+ SOC15_REG_FIELD(MMEA0_EDC_CNT2, MAM_D1MEM_DED_COUNT),
+ },
+ { "MMEA0_MAM_D2MEM", SOC15_REG_ENTRY(MMHUB, 0, regMMEA0_EDC_CNT2),
+ SOC15_REG_FIELD(MMEA0_EDC_CNT2, MAM_D2MEM_SED_COUNT),
+ SOC15_REG_FIELD(MMEA0_EDC_CNT2, MAM_D2MEM_DED_COUNT),
+ },
+ { "MMEA0_MAM_D3MEM", SOC15_REG_ENTRY(MMHUB, 0, regMMEA0_EDC_CNT2),
+ SOC15_REG_FIELD(MMEA0_EDC_CNT2, MAM_D3MEM_SED_COUNT),
+ SOC15_REG_FIELD(MMEA0_EDC_CNT2, MAM_D3MEM_DED_COUNT),
+ },
+ { "MMEA0_DRAMRD_PAGEMEM", SOC15_REG_ENTRY(MMHUB, 0, regMMEA0_EDC_CNT3),
+ 0, 0,
+ SOC15_REG_FIELD(MMEA0_EDC_CNT3, DRAMRD_PAGEMEM_DED_COUNT),
+ },
+ { "MMEA0_DRAMWR_PAGEMEM", SOC15_REG_ENTRY(MMHUB, 0, regMMEA0_EDC_CNT3),
+ 0, 0,
+ SOC15_REG_FIELD(MMEA0_EDC_CNT3, DRAMWR_PAGEMEM_DED_COUNT),
+ },
+ { "MMEA0_IORD_CMDMEM", SOC15_REG_ENTRY(MMHUB, 0, regMMEA0_EDC_CNT3),
+ 0, 0,
+ SOC15_REG_FIELD(MMEA0_EDC_CNT3, IORD_CMDMEM_DED_COUNT),
+ },
+ { "MMEA0_IOWR_CMDMEM", SOC15_REG_ENTRY(MMHUB, 0, regMMEA0_EDC_CNT3),
+ 0, 0,
+ SOC15_REG_FIELD(MMEA0_EDC_CNT3, IOWR_CMDMEM_DED_COUNT),
+ },
+ { "MMEA0_GMIRD_PAGEMEM", SOC15_REG_ENTRY(MMHUB, 0, regMMEA0_EDC_CNT3),
+ 0, 0,
+ SOC15_REG_FIELD(MMEA0_EDC_CNT3, GMIRD_PAGEMEM_DED_COUNT),
+ },
+ { "MMEA0_GMIWR_PAGEMEM", SOC15_REG_ENTRY(MMHUB, 0, regMMEA0_EDC_CNT3),
+ 0, 0,
+ SOC15_REG_FIELD(MMEA0_EDC_CNT3, GMIWR_PAGEMEM_DED_COUNT),
+ },
+
+ /* MMHUB Range 1 */
+ { "MMEA1_DRAMRD_CMDMEM", SOC15_REG_ENTRY(MMHUB, 0, regMMEA1_EDC_CNT),
+ SOC15_REG_FIELD(MMEA1_EDC_CNT, DRAMRD_CMDMEM_SEC_COUNT),
+ SOC15_REG_FIELD(MMEA1_EDC_CNT, DRAMRD_CMDMEM_DED_COUNT),
+ },
+ { "MMEA1_DRAMWR_CMDMEM", SOC15_REG_ENTRY(MMHUB, 0, regMMEA1_EDC_CNT),
+ SOC15_REG_FIELD(MMEA1_EDC_CNT, DRAMWR_CMDMEM_SEC_COUNT),
+ SOC15_REG_FIELD(MMEA1_EDC_CNT, DRAMWR_CMDMEM_DED_COUNT),
+ },
+ { "MMEA1_DRAMWR_DATAMEM", SOC15_REG_ENTRY(MMHUB, 0, regMMEA1_EDC_CNT),
+ SOC15_REG_FIELD(MMEA1_EDC_CNT, DRAMWR_DATAMEM_SEC_COUNT),
+ SOC15_REG_FIELD(MMEA1_EDC_CNT, DRAMWR_DATAMEM_DED_COUNT),
+ },
+ { "MMEA1_RRET_TAGMEM", SOC15_REG_ENTRY(MMHUB, 0, regMMEA1_EDC_CNT),
+ SOC15_REG_FIELD(MMEA1_EDC_CNT, RRET_TAGMEM_SEC_COUNT),
+ SOC15_REG_FIELD(MMEA1_EDC_CNT, RRET_TAGMEM_DED_COUNT),
+ },
+ { "MMEA1_WRET_TAGMEM", SOC15_REG_ENTRY(MMHUB, 0, regMMEA1_EDC_CNT),
+ SOC15_REG_FIELD(MMEA1_EDC_CNT, WRET_TAGMEM_SEC_COUNT),
+ SOC15_REG_FIELD(MMEA1_EDC_CNT, WRET_TAGMEM_DED_COUNT),
+ },
+ { "MMEA1_IOWR_DATAMEM", SOC15_REG_ENTRY(MMHUB, 0, regMMEA1_EDC_CNT),
+ SOC15_REG_FIELD(MMEA1_EDC_CNT, IOWR_DATAMEM_SEC_COUNT),
+ SOC15_REG_FIELD(MMEA1_EDC_CNT, IOWR_DATAMEM_DED_COUNT),
+ },
+ { "MMEA1_DRAMRD_PAGEMEM", SOC15_REG_ENTRY(MMHUB, 0, regMMEA1_EDC_CNT),
+ SOC15_REG_FIELD(MMEA1_EDC_CNT, DRAMRD_PAGEMEM_SED_COUNT),
+ 0, 0,
+ },
+ { "MMEA1_DRAMWR_PAGEMEM", SOC15_REG_ENTRY(MMHUB, 0, regMMEA1_EDC_CNT),
+ SOC15_REG_FIELD(MMEA1_EDC_CNT, DRAMWR_PAGEMEM_SED_COUNT),
+ 0, 0,
+ },
+ { "MMEA1_IORD_CMDMEM", SOC15_REG_ENTRY(MMHUB, 0, regMMEA1_EDC_CNT),
+ SOC15_REG_FIELD(MMEA1_EDC_CNT, IORD_CMDMEM_SED_COUNT),
+ 0, 0,
+ },
+ { "MMEA1_IOWR_CMDMEM", SOC15_REG_ENTRY(MMHUB, 0, regMMEA1_EDC_CNT),
+ SOC15_REG_FIELD(MMEA1_EDC_CNT, IOWR_CMDMEM_SED_COUNT),
+ 0, 0,
+ },
+ { "MMEA1_GMIRD_CMDMEM", SOC15_REG_ENTRY(MMHUB, 0, regMMEA1_EDC_CNT2),
+ SOC15_REG_FIELD(MMEA1_EDC_CNT2, GMIRD_CMDMEM_SEC_COUNT),
+ SOC15_REG_FIELD(MMEA1_EDC_CNT2, GMIRD_CMDMEM_DED_COUNT),
+ },
+ { "MMEA1_GMIWR_CMDMEM", SOC15_REG_ENTRY(MMHUB, 0, regMMEA1_EDC_CNT2),
+ SOC15_REG_FIELD(MMEA1_EDC_CNT2, GMIWR_CMDMEM_SEC_COUNT),
+ SOC15_REG_FIELD(MMEA1_EDC_CNT2, GMIWR_CMDMEM_DED_COUNT),
+ },
+ { "MMEA1_GMIWR_DATAMEM", SOC15_REG_ENTRY(MMHUB, 0, regMMEA1_EDC_CNT2),
+ SOC15_REG_FIELD(MMEA1_EDC_CNT2, GMIWR_DATAMEM_SEC_COUNT),
+ SOC15_REG_FIELD(MMEA1_EDC_CNT2, GMIWR_DATAMEM_DED_COUNT),
+ },
+ { "MMEA1_GMIRD_PAGEMEM", SOC15_REG_ENTRY(MMHUB, 0, regMMEA1_EDC_CNT2),
+ SOC15_REG_FIELD(MMEA1_EDC_CNT2, GMIRD_PAGEMEM_SED_COUNT),
+ 0, 0,
+ },
+ { "MMEA1_GMIWR_PAGEMEM", SOC15_REG_ENTRY(MMHUB, 0, regMMEA1_EDC_CNT2),
+ SOC15_REG_FIELD(MMEA1_EDC_CNT2, GMIWR_PAGEMEM_SED_COUNT),
+ 0, 0,
+ },
+ { "MMEA1_MAM_D0MEM", SOC15_REG_ENTRY(MMHUB, 0, regMMEA1_EDC_CNT2),
+ SOC15_REG_FIELD(MMEA1_EDC_CNT2, MAM_D0MEM_SED_COUNT),
+ SOC15_REG_FIELD(MMEA1_EDC_CNT2, MAM_D0MEM_DED_COUNT),
+ },
+ { "MMEA1_MAM_D1MEM", SOC15_REG_ENTRY(MMHUB, 0, regMMEA1_EDC_CNT2),
+ SOC15_REG_FIELD(MMEA1_EDC_CNT2, MAM_D1MEM_SED_COUNT),
+ SOC15_REG_FIELD(MMEA1_EDC_CNT2, MAM_D1MEM_DED_COUNT),
+ },
+ { "MMEA1_MAM_D2MEM", SOC15_REG_ENTRY(MMHUB, 0, regMMEA1_EDC_CNT2),
+ SOC15_REG_FIELD(MMEA1_EDC_CNT2, MAM_D2MEM_SED_COUNT),
+ SOC15_REG_FIELD(MMEA1_EDC_CNT2, MAM_D2MEM_DED_COUNT),
+ },
+ { "MMEA1_MAM_D3MEM", SOC15_REG_ENTRY(MMHUB, 0, regMMEA1_EDC_CNT2),
+ SOC15_REG_FIELD(MMEA1_EDC_CNT2, MAM_D3MEM_SED_COUNT),
+ SOC15_REG_FIELD(MMEA1_EDC_CNT2, MAM_D3MEM_DED_COUNT),
+ },
+ { "MMEA1_DRAMRD_PAGEMEM", SOC15_REG_ENTRY(MMHUB, 0, regMMEA1_EDC_CNT3),
+ 0, 0,
+ SOC15_REG_FIELD(MMEA1_EDC_CNT3, DRAMRD_PAGEMEM_DED_COUNT),
+ },
+ { "MMEA1_DRAMWR_PAGEMEM", SOC15_REG_ENTRY(MMHUB, 0, regMMEA1_EDC_CNT3),
+ 0, 0,
+ SOC15_REG_FIELD(MMEA1_EDC_CNT3, DRAMWR_PAGEMEM_DED_COUNT),
+ },
+ { "MMEA1_IORD_CMDMEM", SOC15_REG_ENTRY(MMHUB, 0, regMMEA1_EDC_CNT3),
+ 0, 0,
+ SOC15_REG_FIELD(MMEA1_EDC_CNT3, IORD_CMDMEM_DED_COUNT),
+ },
+ { "MMEA1_IOWR_CMDMEM", SOC15_REG_ENTRY(MMHUB, 0, regMMEA1_EDC_CNT3),
+ 0, 0,
+ SOC15_REG_FIELD(MMEA1_EDC_CNT3, IOWR_CMDMEM_DED_COUNT),
+ },
+ { "MMEA1_GMIRD_PAGEMEM", SOC15_REG_ENTRY(MMHUB, 0, regMMEA1_EDC_CNT3),
+ 0, 0,
+ SOC15_REG_FIELD(MMEA1_EDC_CNT3, GMIRD_PAGEMEM_DED_COUNT),
+ },
+ { "MMEA1_GMIWR_PAGEMEM", SOC15_REG_ENTRY(MMHUB, 0, regMMEA1_EDC_CNT3),
+ 0, 0,
+ SOC15_REG_FIELD(MMEA1_EDC_CNT3, GMIWR_PAGEMEM_DED_COUNT),
+ },
+
+ /* MMHAB Range 2*/
+ { "MMEA2_DRAMRD_CMDMEM", SOC15_REG_ENTRY(MMHUB, 0, regMMEA2_EDC_CNT),
+ SOC15_REG_FIELD(MMEA2_EDC_CNT, DRAMRD_CMDMEM_SEC_COUNT),
+ SOC15_REG_FIELD(MMEA2_EDC_CNT, DRAMRD_CMDMEM_DED_COUNT),
+ },
+ { "MMEA2_DRAMWR_CMDMEM", SOC15_REG_ENTRY(MMHUB, 0, regMMEA2_EDC_CNT),
+ SOC15_REG_FIELD(MMEA2_EDC_CNT, DRAMWR_CMDMEM_SEC_COUNT),
+ SOC15_REG_FIELD(MMEA2_EDC_CNT, DRAMWR_CMDMEM_DED_COUNT),
+ },
+ { "MMEA2_DRAMWR_DATAMEM", SOC15_REG_ENTRY(MMHUB, 0, regMMEA2_EDC_CNT),
+ SOC15_REG_FIELD(MMEA2_EDC_CNT, DRAMWR_DATAMEM_SEC_COUNT),
+ SOC15_REG_FIELD(MMEA2_EDC_CNT, DRAMWR_DATAMEM_DED_COUNT),
+ },
+ { "MMEA2_RRET_TAGMEM", SOC15_REG_ENTRY(MMHUB, 0, regMMEA2_EDC_CNT),
+ SOC15_REG_FIELD(MMEA2_EDC_CNT, RRET_TAGMEM_SEC_COUNT),
+ SOC15_REG_FIELD(MMEA2_EDC_CNT, RRET_TAGMEM_DED_COUNT),
+ },
+ { "MMEA2_WRET_TAGMEM", SOC15_REG_ENTRY(MMHUB, 0, regMMEA2_EDC_CNT),
+ SOC15_REG_FIELD(MMEA2_EDC_CNT, WRET_TAGMEM_SEC_COUNT),
+ SOC15_REG_FIELD(MMEA2_EDC_CNT, WRET_TAGMEM_DED_COUNT),
+ },
+ { "MMEA2_IOWR_DATAMEM", SOC15_REG_ENTRY(MMHUB, 0, regMMEA2_EDC_CNT),
+ SOC15_REG_FIELD(MMEA2_EDC_CNT, IOWR_DATAMEM_SEC_COUNT),
+ SOC15_REG_FIELD(MMEA2_EDC_CNT, IOWR_DATAMEM_DED_COUNT),
+ },
+ { "MMEA2_DRAMRD_PAGEMEM", SOC15_REG_ENTRY(MMHUB, 0, regMMEA2_EDC_CNT),
+ SOC15_REG_FIELD(MMEA2_EDC_CNT, DRAMRD_PAGEMEM_SED_COUNT),
+ 0, 0,
+ },
+ { "MMEA2_DRAMWR_PAGEMEM", SOC15_REG_ENTRY(MMHUB, 0, regMMEA2_EDC_CNT),
+ SOC15_REG_FIELD(MMEA2_EDC_CNT, DRAMWR_PAGEMEM_SED_COUNT),
+ 0, 0,
+ },
+ { "MMEA2_IORD_CMDMEM", SOC15_REG_ENTRY(MMHUB, 0, regMMEA2_EDC_CNT),
+ SOC15_REG_FIELD(MMEA2_EDC_CNT, IORD_CMDMEM_SED_COUNT),
+ 0, 0,
+ },
+ { "MMEA2_IOWR_CMDMEM", SOC15_REG_ENTRY(MMHUB, 0, regMMEA2_EDC_CNT),
+ SOC15_REG_FIELD(MMEA2_EDC_CNT, IOWR_CMDMEM_SED_COUNT),
+ 0, 0,
+ },
+ { "MMEA2_GMIRD_CMDMEM", SOC15_REG_ENTRY(MMHUB, 0, regMMEA2_EDC_CNT2),
+ SOC15_REG_FIELD(MMEA2_EDC_CNT2, GMIRD_CMDMEM_SEC_COUNT),
+ SOC15_REG_FIELD(MMEA2_EDC_CNT2, GMIRD_CMDMEM_DED_COUNT),
+ },
+ { "MMEA2_GMIWR_CMDMEM", SOC15_REG_ENTRY(MMHUB, 0, regMMEA2_EDC_CNT2),
+ SOC15_REG_FIELD(MMEA2_EDC_CNT2, GMIWR_CMDMEM_SEC_COUNT),
+ SOC15_REG_FIELD(MMEA2_EDC_CNT2, GMIWR_CMDMEM_DED_COUNT),
+ },
+ { "MMEA2_GMIWR_DATAMEM", SOC15_REG_ENTRY(MMHUB, 0, regMMEA2_EDC_CNT2),
+ SOC15_REG_FIELD(MMEA2_EDC_CNT2, GMIWR_DATAMEM_SEC_COUNT),
+ SOC15_REG_FIELD(MMEA2_EDC_CNT2, GMIWR_DATAMEM_DED_COUNT),
+ },
+ { "MMEA2_GMIRD_PAGEMEM", SOC15_REG_ENTRY(MMHUB, 0, regMMEA2_EDC_CNT2),
+ SOC15_REG_FIELD(MMEA2_EDC_CNT2, GMIRD_PAGEMEM_SED_COUNT),
+ 0, 0,
+ },
+ { "MMEA2_GMIWR_PAGEMEM", SOC15_REG_ENTRY(MMHUB, 0, regMMEA2_EDC_CNT2),
+ SOC15_REG_FIELD(MMEA2_EDC_CNT2, GMIWR_PAGEMEM_SED_COUNT),
+ 0, 0,
+ },
+ { "MMEA2_MAM_D0MEM", SOC15_REG_ENTRY(MMHUB, 0, regMMEA2_EDC_CNT2),
+ SOC15_REG_FIELD(MMEA0_EDC_CNT2, MAM_D0MEM_SED_COUNT),
+ SOC15_REG_FIELD(MMEA0_EDC_CNT2, MAM_D0MEM_DED_COUNT),
+ },
+ { "MMEA2_MAM_D1MEM", SOC15_REG_ENTRY(MMHUB, 0, regMMEA2_EDC_CNT2),
+ SOC15_REG_FIELD(MMEA2_EDC_CNT2, MAM_D1MEM_SED_COUNT),
+ SOC15_REG_FIELD(MMEA2_EDC_CNT2, MAM_D1MEM_DED_COUNT),
+ },
+ { "MMEA2_MAM_D2MEM", SOC15_REG_ENTRY(MMHUB, 0, regMMEA2_EDC_CNT2),
+ SOC15_REG_FIELD(MMEA2_EDC_CNT2, MAM_D2MEM_SED_COUNT),
+ SOC15_REG_FIELD(MMEA2_EDC_CNT2, MAM_D2MEM_DED_COUNT),
+ },
+ { "MMEA2_MAM_D3MEM", SOC15_REG_ENTRY(MMHUB, 0, regMMEA2_EDC_CNT2),
+ SOC15_REG_FIELD(MMEA2_EDC_CNT2, MAM_D3MEM_SED_COUNT),
+ SOC15_REG_FIELD(MMEA2_EDC_CNT2, MAM_D3MEM_DED_COUNT),
+ },
+ { "MMEA2_DRAMRD_PAGEMEM", SOC15_REG_ENTRY(MMHUB, 0, regMMEA2_EDC_CNT3),
+ 0, 0,
+ SOC15_REG_FIELD(MMEA2_EDC_CNT3, DRAMRD_PAGEMEM_DED_COUNT),
+ },
+ { "MMEA2_DRAMWR_PAGEMEM", SOC15_REG_ENTRY(MMHUB, 0, regMMEA2_EDC_CNT3),
+ 0, 0,
+ SOC15_REG_FIELD(MMEA2_EDC_CNT3, DRAMWR_PAGEMEM_DED_COUNT),
+ },
+ { "MMEA2_IORD_CMDMEM", SOC15_REG_ENTRY(MMHUB, 0, regMMEA2_EDC_CNT3),
+ 0, 0,
+ SOC15_REG_FIELD(MMEA2_EDC_CNT3, IORD_CMDMEM_DED_COUNT),
+ },
+ { "MMEA2_IOWR_CMDMEM", SOC15_REG_ENTRY(MMHUB, 0, regMMEA2_EDC_CNT3),
+ 0, 0,
+ SOC15_REG_FIELD(MMEA2_EDC_CNT3, IOWR_CMDMEM_DED_COUNT),
+ },
+ { "MMEA2_GMIRD_PAGEMEM", SOC15_REG_ENTRY(MMHUB, 0, regMMEA2_EDC_CNT3),
+ 0, 0,
+ SOC15_REG_FIELD(MMEA2_EDC_CNT3, GMIRD_PAGEMEM_DED_COUNT),
+ },
+ { "MMEA2_GMIWR_PAGEMEM", SOC15_REG_ENTRY(MMHUB, 0, regMMEA2_EDC_CNT3),
+ 0, 0,
+ SOC15_REG_FIELD(MMEA2_EDC_CNT3, GMIWR_PAGEMEM_DED_COUNT),
+ },
+
+ /* MMHUB Rang 3 */
+ { "MMEA3_DRAMRD_CMDMEM", SOC15_REG_ENTRY(MMHUB, 0, regMMEA3_EDC_CNT),
+ SOC15_REG_FIELD(MMEA3_EDC_CNT, DRAMRD_CMDMEM_SEC_COUNT),
+ SOC15_REG_FIELD(MMEA3_EDC_CNT, DRAMRD_CMDMEM_DED_COUNT),
+ },
+ { "MMEA3_DRAMWR_CMDMEM", SOC15_REG_ENTRY(MMHUB, 0, regMMEA3_EDC_CNT),
+ SOC15_REG_FIELD(MMEA3_EDC_CNT, DRAMWR_CMDMEM_SEC_COUNT),
+ SOC15_REG_FIELD(MMEA3_EDC_CNT, DRAMWR_CMDMEM_DED_COUNT),
+ },
+ { "MMEA3_DRAMWR_DATAMEM", SOC15_REG_ENTRY(MMHUB, 0, regMMEA3_EDC_CNT),
+ SOC15_REG_FIELD(MMEA3_EDC_CNT, DRAMWR_DATAMEM_SEC_COUNT),
+ SOC15_REG_FIELD(MMEA3_EDC_CNT, DRAMWR_DATAMEM_DED_COUNT),
+ },
+ { "MMEA3_RRET_TAGMEM", SOC15_REG_ENTRY(MMHUB, 0, regMMEA3_EDC_CNT),
+ SOC15_REG_FIELD(MMEA3_EDC_CNT, RRET_TAGMEM_SEC_COUNT),
+ SOC15_REG_FIELD(MMEA3_EDC_CNT, RRET_TAGMEM_DED_COUNT),
+ },
+ { "MMEA3_WRET_TAGMEM", SOC15_REG_ENTRY(MMHUB, 0, regMMEA3_EDC_CNT),
+ SOC15_REG_FIELD(MMEA3_EDC_CNT, WRET_TAGMEM_SEC_COUNT),
+ SOC15_REG_FIELD(MMEA3_EDC_CNT, WRET_TAGMEM_DED_COUNT),
+ },
+ { "MMEA3_IOWR_DATAMEM", SOC15_REG_ENTRY(MMHUB, 0, regMMEA3_EDC_CNT),
+ SOC15_REG_FIELD(MMEA3_EDC_CNT, IOWR_DATAMEM_SEC_COUNT),
+ SOC15_REG_FIELD(MMEA3_EDC_CNT, IOWR_DATAMEM_DED_COUNT),
+ },
+ { "MMEA3_DRAMRD_PAGEMEM", SOC15_REG_ENTRY(MMHUB, 0, regMMEA3_EDC_CNT),
+ SOC15_REG_FIELD(MMEA3_EDC_CNT, DRAMRD_PAGEMEM_SED_COUNT),
+ 0, 0,
+ },
+ { "MMEA3_DRAMWR_PAGEMEM", SOC15_REG_ENTRY(MMHUB, 0, regMMEA3_EDC_CNT),
+ SOC15_REG_FIELD(MMEA3_EDC_CNT, DRAMWR_PAGEMEM_SED_COUNT),
+ 0, 0,
+ },
+ { "MMEA3_IORD_CMDMEM", SOC15_REG_ENTRY(MMHUB, 0, regMMEA3_EDC_CNT),
+ SOC15_REG_FIELD(MMEA3_EDC_CNT, IORD_CMDMEM_SED_COUNT),
+ 0, 0,
+ },
+ { "MMEA3_IOWR_CMDMEM", SOC15_REG_ENTRY(MMHUB, 0, regMMEA3_EDC_CNT),
+ SOC15_REG_FIELD(MMEA3_EDC_CNT, IOWR_CMDMEM_SED_COUNT),
+ 0, 0,
+ },
+ { "MMEA3_GMIRD_CMDMEM", SOC15_REG_ENTRY(MMHUB, 0, regMMEA3_EDC_CNT2),
+ SOC15_REG_FIELD(MMEA3_EDC_CNT2, GMIRD_CMDMEM_SEC_COUNT),
+ SOC15_REG_FIELD(MMEA3_EDC_CNT2, GMIRD_CMDMEM_DED_COUNT),
+ },
+ { "MMEA3_GMIWR_CMDMEM", SOC15_REG_ENTRY(MMHUB, 0, regMMEA3_EDC_CNT2),
+ SOC15_REG_FIELD(MMEA3_EDC_CNT2, GMIWR_CMDMEM_SEC_COUNT),
+ SOC15_REG_FIELD(MMEA3_EDC_CNT2, GMIWR_CMDMEM_DED_COUNT),
+ },
+ { "MMEA3_GMIWR_DATAMEM", SOC15_REG_ENTRY(MMHUB, 0, regMMEA3_EDC_CNT2),
+ SOC15_REG_FIELD(MMEA3_EDC_CNT2, GMIWR_DATAMEM_SEC_COUNT),
+ SOC15_REG_FIELD(MMEA3_EDC_CNT2, GMIWR_DATAMEM_DED_COUNT),
+ },
+ { "MMEA3_GMIRD_PAGEMEM", SOC15_REG_ENTRY(MMHUB, 0, regMMEA3_EDC_CNT2),
+ SOC15_REG_FIELD(MMEA3_EDC_CNT2, GMIRD_PAGEMEM_SED_COUNT),
+ 0, 0,
+ },
+ { "MMEA3_GMIWR_PAGEMEM", SOC15_REG_ENTRY(MMHUB, 0, regMMEA3_EDC_CNT2),
+ SOC15_REG_FIELD(MMEA3_EDC_CNT2, GMIWR_PAGEMEM_SED_COUNT),
+ 0, 0,
+ },
+ { "MMEA3_MAM_D0MEM", SOC15_REG_ENTRY(MMHUB, 0, regMMEA3_EDC_CNT2),
+ SOC15_REG_FIELD(MMEA3_EDC_CNT2, MAM_D0MEM_SED_COUNT),
+ SOC15_REG_FIELD(MMEA3_EDC_CNT2, MAM_D0MEM_DED_COUNT),
+ },
+ { "MMEA3_MAM_D1MEM", SOC15_REG_ENTRY(MMHUB, 0, regMMEA3_EDC_CNT2),
+ SOC15_REG_FIELD(MMEA3_EDC_CNT2, MAM_D1MEM_SED_COUNT),
+ SOC15_REG_FIELD(MMEA3_EDC_CNT2, MAM_D1MEM_DED_COUNT),
+ },
+ { "MMEA3_MAM_D2MEM", SOC15_REG_ENTRY(MMHUB, 0, regMMEA3_EDC_CNT2),
+ SOC15_REG_FIELD(MMEA3_EDC_CNT2, MAM_D2MEM_SED_COUNT),
+ SOC15_REG_FIELD(MMEA3_EDC_CNT2, MAM_D2MEM_DED_COUNT),
+ },
+ { "MMEA3_MAM_D3MEM", SOC15_REG_ENTRY(MMHUB, 0, regMMEA3_EDC_CNT2),
+ SOC15_REG_FIELD(MMEA3_EDC_CNT2, MAM_D3MEM_SED_COUNT),
+ SOC15_REG_FIELD(MMEA3_EDC_CNT2, MAM_D3MEM_DED_COUNT),
+ },
+ { "MMEA3_DRAMRD_PAGEMEM", SOC15_REG_ENTRY(MMHUB, 0, regMMEA3_EDC_CNT3),
+ 0, 0,
+ SOC15_REG_FIELD(MMEA3_EDC_CNT3, DRAMRD_PAGEMEM_DED_COUNT),
+ },
+ { "MMEA3_DRAMWR_PAGEMEM", SOC15_REG_ENTRY(MMHUB, 0, regMMEA3_EDC_CNT3),
+ 0, 0,
+ SOC15_REG_FIELD(MMEA3_EDC_CNT3, DRAMWR_PAGEMEM_DED_COUNT),
+ },
+ { "MMEA3_IORD_CMDMEM", SOC15_REG_ENTRY(MMHUB, 0, regMMEA3_EDC_CNT3),
+ 0, 0,
+ SOC15_REG_FIELD(MMEA3_EDC_CNT3, IORD_CMDMEM_DED_COUNT),
+ },
+ { "MMEA3_IOWR_CMDMEM", SOC15_REG_ENTRY(MMHUB, 0, regMMEA3_EDC_CNT3),
+ 0, 0,
+ SOC15_REG_FIELD(MMEA3_EDC_CNT3, IOWR_CMDMEM_DED_COUNT),
+ },
+ { "MMEA3_GMIRD_PAGEMEM", SOC15_REG_ENTRY(MMHUB, 0, regMMEA3_EDC_CNT3),
+ 0, 0,
+ SOC15_REG_FIELD(MMEA3_EDC_CNT3, GMIRD_PAGEMEM_DED_COUNT),
+ },
+ { "MMEA3_GMIWR_PAGEMEM", SOC15_REG_ENTRY(MMHUB, 0, regMMEA3_EDC_CNT3),
+ 0, 0,
+ SOC15_REG_FIELD(MMEA3_EDC_CNT3, GMIWR_PAGEMEM_DED_COUNT),
+ },
+
+ /* MMHUB Range 4 */
+ { "MMEA4_DRAMRD_CMDMEM", SOC15_REG_ENTRY(MMHUB, 0, regMMEA4_EDC_CNT),
+ SOC15_REG_FIELD(MMEA4_EDC_CNT, DRAMRD_CMDMEM_SEC_COUNT),
+ SOC15_REG_FIELD(MMEA4_EDC_CNT, DRAMRD_CMDMEM_DED_COUNT),
+ },
+ { "MMEA4_DRAMWR_CMDMEM", SOC15_REG_ENTRY(MMHUB, 0, regMMEA4_EDC_CNT),
+ SOC15_REG_FIELD(MMEA4_EDC_CNT, DRAMWR_CMDMEM_SEC_COUNT),
+ SOC15_REG_FIELD(MMEA4_EDC_CNT, DRAMWR_CMDMEM_DED_COUNT),
+ },
+ { "MMEA4_DRAMWR_DATAMEM", SOC15_REG_ENTRY(MMHUB, 0, regMMEA4_EDC_CNT),
+ SOC15_REG_FIELD(MMEA4_EDC_CNT, DRAMWR_DATAMEM_SEC_COUNT),
+ SOC15_REG_FIELD(MMEA4_EDC_CNT, DRAMWR_DATAMEM_DED_COUNT),
+ },
+ { "MMEA4_RRET_TAGMEM", SOC15_REG_ENTRY(MMHUB, 0, regMMEA4_EDC_CNT),
+ SOC15_REG_FIELD(MMEA4_EDC_CNT, RRET_TAGMEM_SEC_COUNT),
+ SOC15_REG_FIELD(MMEA4_EDC_CNT, RRET_TAGMEM_DED_COUNT),
+ },
+ { "MMEA4_WRET_TAGMEM", SOC15_REG_ENTRY(MMHUB, 0, regMMEA4_EDC_CNT),
+ SOC15_REG_FIELD(MMEA4_EDC_CNT, WRET_TAGMEM_SEC_COUNT),
+ SOC15_REG_FIELD(MMEA4_EDC_CNT, WRET_TAGMEM_DED_COUNT),
+ },
+ { "MMEA4_IOWR_DATAMEM", SOC15_REG_ENTRY(MMHUB, 0, regMMEA4_EDC_CNT),
+ SOC15_REG_FIELD(MMEA4_EDC_CNT, IOWR_DATAMEM_SEC_COUNT),
+ SOC15_REG_FIELD(MMEA4_EDC_CNT, IOWR_DATAMEM_DED_COUNT),
+ },
+ { "MMEA4_DRAMRD_PAGEMEM", SOC15_REG_ENTRY(MMHUB, 0, regMMEA4_EDC_CNT),
+ SOC15_REG_FIELD(MMEA4_EDC_CNT, DRAMRD_PAGEMEM_SED_COUNT),
+ 0, 0,
+ },
+ { "MMEA4_DRAMWR_PAGEMEM", SOC15_REG_ENTRY(MMHUB, 0, regMMEA4_EDC_CNT),
+ SOC15_REG_FIELD(MMEA4_EDC_CNT, DRAMWR_PAGEMEM_SED_COUNT),
+ 0, 0,
+ },
+ { "MMEA4_IORD_CMDMEM", SOC15_REG_ENTRY(MMHUB, 0, regMMEA4_EDC_CNT),
+ SOC15_REG_FIELD(MMEA4_EDC_CNT, IORD_CMDMEM_SED_COUNT),
+ 0, 0,
+ },
+ { "MMEA4_IOWR_CMDMEM", SOC15_REG_ENTRY(MMHUB, 0, regMMEA4_EDC_CNT),
+ SOC15_REG_FIELD(MMEA4_EDC_CNT, IOWR_CMDMEM_SED_COUNT),
+ 0, 0,
+ },
+ { "MMEA4_GMIRD_CMDMEM", SOC15_REG_ENTRY(MMHUB, 0, regMMEA4_EDC_CNT2),
+ SOC15_REG_FIELD(MMEA4_EDC_CNT2, GMIRD_CMDMEM_SEC_COUNT),
+ SOC15_REG_FIELD(MMEA4_EDC_CNT2, GMIRD_CMDMEM_DED_COUNT),
+ },
+ { "MMEA4_GMIWR_CMDMEM", SOC15_REG_ENTRY(MMHUB, 0, regMMEA4_EDC_CNT2),
+ SOC15_REG_FIELD(MMEA4_EDC_CNT2, GMIWR_CMDMEM_SEC_COUNT),
+ SOC15_REG_FIELD(MMEA4_EDC_CNT2, GMIWR_CMDMEM_DED_COUNT),
+ },
+ { "MMEA4_GMIWR_DATAMEM", SOC15_REG_ENTRY(MMHUB, 0, regMMEA4_EDC_CNT2),
+ SOC15_REG_FIELD(MMEA4_EDC_CNT2, GMIWR_DATAMEM_SEC_COUNT),
+ SOC15_REG_FIELD(MMEA4_EDC_CNT2, GMIWR_DATAMEM_DED_COUNT),
+ },
+ { "MMEA4_GMIRD_PAGEMEM", SOC15_REG_ENTRY(MMHUB, 0, regMMEA4_EDC_CNT2),
+ SOC15_REG_FIELD(MMEA4_EDC_CNT2, GMIRD_PAGEMEM_SED_COUNT),
+ 0, 0,
+ },
+ { "MMEA4_GMIWR_PAGEMEM", SOC15_REG_ENTRY(MMHUB, 0, regMMEA4_EDC_CNT2),
+ SOC15_REG_FIELD(MMEA4_EDC_CNT2, GMIWR_PAGEMEM_SED_COUNT),
+ 0, 0,
+ },
+ { "MMEA4_MAM_D0MEM", SOC15_REG_ENTRY(MMHUB, 0, regMMEA4_EDC_CNT2),
+ SOC15_REG_FIELD(MMEA4_EDC_CNT2, MAM_D0MEM_SED_COUNT),
+ SOC15_REG_FIELD(MMEA4_EDC_CNT2, MAM_D0MEM_DED_COUNT),
+ },
+ { "MMEA4_MAM_D1MEM", SOC15_REG_ENTRY(MMHUB, 0, regMMEA4_EDC_CNT2),
+ SOC15_REG_FIELD(MMEA4_EDC_CNT2, MAM_D1MEM_SED_COUNT),
+ SOC15_REG_FIELD(MMEA4_EDC_CNT2, MAM_D1MEM_DED_COUNT),
+ },
+ { "MMEA4_MAM_D2MEM", SOC15_REG_ENTRY(MMHUB, 0, regMMEA4_EDC_CNT2),
+ SOC15_REG_FIELD(MMEA4_EDC_CNT2, MAM_D2MEM_SED_COUNT),
+ SOC15_REG_FIELD(MMEA4_EDC_CNT2, MAM_D2MEM_DED_COUNT),
+ },
+ { "MMEA4_MAM_D3MEM", SOC15_REG_ENTRY(MMHUB, 0, regMMEA4_EDC_CNT2),
+ SOC15_REG_FIELD(MMEA4_EDC_CNT2, MAM_D3MEM_SED_COUNT),
+ SOC15_REG_FIELD(MMEA4_EDC_CNT2, MAM_D3MEM_DED_COUNT),
+ },
+ { "MMEA4_DRAMRD_PAGEMEM", SOC15_REG_ENTRY(MMHUB, 0, regMMEA4_EDC_CNT3),
+ 0, 0,
+ SOC15_REG_FIELD(MMEA4_EDC_CNT3, DRAMRD_PAGEMEM_DED_COUNT),
+ },
+ { "MMEA4_DRAMWR_PAGEMEM", SOC15_REG_ENTRY(MMHUB, 0, regMMEA4_EDC_CNT3),
+ 0, 0,
+ SOC15_REG_FIELD(MMEA4_EDC_CNT3, DRAMWR_PAGEMEM_DED_COUNT),
+ },
+ { "MMEA4_IORD_CMDMEM", SOC15_REG_ENTRY(MMHUB, 0, regMMEA4_EDC_CNT3),
+ 0, 0,
+ SOC15_REG_FIELD(MMEA4_EDC_CNT3, IORD_CMDMEM_DED_COUNT),
+ },
+ { "MMEA4_IOWR_CMDMEM", SOC15_REG_ENTRY(MMHUB, 0, regMMEA4_EDC_CNT3),
+ 0, 0,
+ SOC15_REG_FIELD(MMEA4_EDC_CNT3, IOWR_CMDMEM_DED_COUNT),
+ },
+ { "MMEA4_GMIRD_PAGEMEM", SOC15_REG_ENTRY(MMHUB, 0, regMMEA4_EDC_CNT3),
+ 0, 0,
+ SOC15_REG_FIELD(MMEA4_EDC_CNT3, GMIRD_PAGEMEM_DED_COUNT),
+ },
+ { "MMEA4_GMIWR_PAGEMEM", SOC15_REG_ENTRY(MMHUB, 0, regMMEA4_EDC_CNT3),
+ 0, 0,
+ SOC15_REG_FIELD(MMEA4_EDC_CNT3, GMIWR_PAGEMEM_DED_COUNT),
+ },
+
+ /* MMHUAB Range 5 */
+ { "MMEA5_DRAMRD_CMDMEM", SOC15_REG_ENTRY(MMHUB, 0, regMMEA5_EDC_CNT),
+ SOC15_REG_FIELD(MMEA5_EDC_CNT, DRAMRD_CMDMEM_SEC_COUNT),
+ SOC15_REG_FIELD(MMEA5_EDC_CNT, DRAMRD_CMDMEM_DED_COUNT),
+ },
+ { "MMEA5_DRAMWR_CMDMEM", SOC15_REG_ENTRY(MMHUB, 0, regMMEA5_EDC_CNT),
+ SOC15_REG_FIELD(MMEA5_EDC_CNT, DRAMWR_CMDMEM_SEC_COUNT),
+ SOC15_REG_FIELD(MMEA5_EDC_CNT, DRAMWR_CMDMEM_DED_COUNT),
+ },
+ { "MMEA5_DRAMWR_DATAMEM", SOC15_REG_ENTRY(MMHUB, 0, regMMEA5_EDC_CNT),
+ SOC15_REG_FIELD(MMEA5_EDC_CNT, DRAMWR_DATAMEM_SEC_COUNT),
+ SOC15_REG_FIELD(MMEA5_EDC_CNT, DRAMWR_DATAMEM_DED_COUNT),
+ },
+ { "MMEA5_RRET_TAGMEM", SOC15_REG_ENTRY(MMHUB, 0, regMMEA5_EDC_CNT),
+ SOC15_REG_FIELD(MMEA5_EDC_CNT, RRET_TAGMEM_SEC_COUNT),
+ SOC15_REG_FIELD(MMEA5_EDC_CNT, RRET_TAGMEM_DED_COUNT),
+ },
+ { "MMEA5_WRET_TAGMEM", SOC15_REG_ENTRY(MMHUB, 0, regMMEA5_EDC_CNT),
+ SOC15_REG_FIELD(MMEA5_EDC_CNT, WRET_TAGMEM_SEC_COUNT),
+ SOC15_REG_FIELD(MMEA5_EDC_CNT, WRET_TAGMEM_DED_COUNT),
+ },
+ { "MMEA5_IOWR_DATAMEM", SOC15_REG_ENTRY(MMHUB, 0, regMMEA5_EDC_CNT),
+ SOC15_REG_FIELD(MMEA5_EDC_CNT, IOWR_DATAMEM_SEC_COUNT),
+ SOC15_REG_FIELD(MMEA5_EDC_CNT, IOWR_DATAMEM_DED_COUNT),
+ },
+ { "MMEA5_DRAMRD_PAGEMEM", SOC15_REG_ENTRY(MMHUB, 0, regMMEA5_EDC_CNT),
+ SOC15_REG_FIELD(MMEA5_EDC_CNT, DRAMRD_PAGEMEM_SED_COUNT),
+ 0, 0,
+ },
+ { "MMEA5_DRAMWR_PAGEMEM", SOC15_REG_ENTRY(MMHUB, 0, regMMEA5_EDC_CNT),
+ SOC15_REG_FIELD(MMEA5_EDC_CNT, DRAMWR_PAGEMEM_SED_COUNT),
+ 0, 0,
+ },
+ { "MMEA5_IORD_CMDMEM", SOC15_REG_ENTRY(MMHUB, 0, regMMEA5_EDC_CNT),
+ SOC15_REG_FIELD(MMEA5_EDC_CNT, IORD_CMDMEM_SED_COUNT),
+ 0, 0,
+ },
+ { "MMEA5_IOWR_CMDMEM", SOC15_REG_ENTRY(MMHUB, 0, regMMEA5_EDC_CNT),
+ SOC15_REG_FIELD(MMEA5_EDC_CNT, IOWR_CMDMEM_SED_COUNT),
+ 0, 0,
+ },
+ { "MMEA5_GMIRD_CMDMEM", SOC15_REG_ENTRY(MMHUB, 0, regMMEA5_EDC_CNT2),
+ SOC15_REG_FIELD(MMEA5_EDC_CNT2, GMIRD_CMDMEM_SEC_COUNT),
+ SOC15_REG_FIELD(MMEA5_EDC_CNT2, GMIRD_CMDMEM_DED_COUNT),
+ },
+ { "MMEA5_GMIWR_CMDMEM", SOC15_REG_ENTRY(MMHUB, 0, regMMEA5_EDC_CNT2),
+ SOC15_REG_FIELD(MMEA5_EDC_CNT2, GMIWR_CMDMEM_SEC_COUNT),
+ SOC15_REG_FIELD(MMEA5_EDC_CNT2, GMIWR_CMDMEM_DED_COUNT),
+ },
+ { "MMEA5_GMIWR_DATAMEM", SOC15_REG_ENTRY(MMHUB, 0, regMMEA5_EDC_CNT2),
+ SOC15_REG_FIELD(MMEA5_EDC_CNT2, GMIWR_DATAMEM_SEC_COUNT),
+ SOC15_REG_FIELD(MMEA5_EDC_CNT2, GMIWR_DATAMEM_DED_COUNT),
+ },
+ { "MMEA5_GMIRD_PAGEMEM", SOC15_REG_ENTRY(MMHUB, 0, regMMEA5_EDC_CNT2),
+ SOC15_REG_FIELD(MMEA5_EDC_CNT2, GMIRD_PAGEMEM_SED_COUNT),
+ 0, 0,
+ },
+ { "MMEA5_GMIWR_PAGEMEM", SOC15_REG_ENTRY(MMHUB, 0, regMMEA5_EDC_CNT2),
+ SOC15_REG_FIELD(MMEA5_EDC_CNT2, GMIWR_PAGEMEM_SED_COUNT),
+ 0, 0,
+ },
+ { "MMEA5_MAM_D0MEM", SOC15_REG_ENTRY(MMHUB, 0, regMMEA5_EDC_CNT2),
+ SOC15_REG_FIELD(MMEA5_EDC_CNT2, MAM_D0MEM_SED_COUNT),
+ SOC15_REG_FIELD(MMEA5_EDC_CNT2, MAM_D0MEM_DED_COUNT),
+ },
+ { "MMEA5_MAM_D1MEM", SOC15_REG_ENTRY(MMHUB, 0, regMMEA5_EDC_CNT2),
+ SOC15_REG_FIELD(MMEA5_EDC_CNT2, MAM_D1MEM_SED_COUNT),
+ SOC15_REG_FIELD(MMEA5_EDC_CNT2, MAM_D1MEM_DED_COUNT),
+ },
+ { "MMEA5_MAM_D2MEM", SOC15_REG_ENTRY(MMHUB, 0, regMMEA5_EDC_CNT2),
+ SOC15_REG_FIELD(MMEA5_EDC_CNT2, MAM_D2MEM_SED_COUNT),
+ SOC15_REG_FIELD(MMEA5_EDC_CNT2, MAM_D2MEM_DED_COUNT),
+ },
+ { "MMEA5_MAM_D3MEM", SOC15_REG_ENTRY(MMHUB, 0, regMMEA5_EDC_CNT2),
+ SOC15_REG_FIELD(MMEA5_EDC_CNT2, MAM_D3MEM_SED_COUNT),
+ SOC15_REG_FIELD(MMEA5_EDC_CNT2, MAM_D3MEM_DED_COUNT),
+ },
+ { "MMEA5_DRAMRD_PAGEMEM", SOC15_REG_ENTRY(MMHUB, 0, regMMEA5_EDC_CNT3),
+ 0, 0,
+ SOC15_REG_FIELD(MMEA5_EDC_CNT3, DRAMRD_PAGEMEM_DED_COUNT),
+ },
+ { "MMEA5_DRAMWR_PAGEMEM", SOC15_REG_ENTRY(MMHUB, 0, regMMEA5_EDC_CNT3),
+ 0, 0,
+ SOC15_REG_FIELD(MMEA5_EDC_CNT3, DRAMWR_PAGEMEM_DED_COUNT),
+ },
+ { "MMEA5_IORD_CMDMEM", SOC15_REG_ENTRY(MMHUB, 0, regMMEA5_EDC_CNT3),
+ 0, 0,
+ SOC15_REG_FIELD(MMEA5_EDC_CNT3, IORD_CMDMEM_DED_COUNT),
+ },
+ { "MMEA5_IOWR_CMDMEM", SOC15_REG_ENTRY(MMHUB, 0, regMMEA5_EDC_CNT3),
+ 0, 0,
+ SOC15_REG_FIELD(MMEA5_EDC_CNT3, IOWR_CMDMEM_DED_COUNT),
+ },
+ { "MMEA5_GMIRD_PAGEMEM", SOC15_REG_ENTRY(MMHUB, 0, regMMEA5_EDC_CNT3),
+ 0, 0,
+ SOC15_REG_FIELD(MMEA5_EDC_CNT3, GMIRD_PAGEMEM_DED_COUNT),
+ },
+ { "MMEA5_GMIWR_PAGEMEM", SOC15_REG_ENTRY(MMHUB, 0, regMMEA5_EDC_CNT3),
+ 0, 0,
+ SOC15_REG_FIELD(MMEA5_EDC_CNT3, GMIWR_PAGEMEM_DED_COUNT),
+ },
+};
+
+static const struct soc15_reg_entry mmhub_v1_7_edc_cnt_regs[] = {
+ { SOC15_REG_ENTRY(MMHUB, 0, regMMEA0_EDC_CNT), 0, 0, 0 },
+ { SOC15_REG_ENTRY(MMHUB, 0, regMMEA0_EDC_CNT2), 0, 0, 0 },
+ { SOC15_REG_ENTRY(MMHUB, 0, regMMEA0_EDC_CNT3), 0, 0, 0 },
+ { SOC15_REG_ENTRY(MMHUB, 0, regMMEA1_EDC_CNT), 0, 0, 0 },
+ { SOC15_REG_ENTRY(MMHUB, 0, regMMEA1_EDC_CNT2), 0, 0, 0 },
+ { SOC15_REG_ENTRY(MMHUB, 0, regMMEA1_EDC_CNT3), 0, 0, 0 },
+ { SOC15_REG_ENTRY(MMHUB, 0, regMMEA2_EDC_CNT), 0, 0, 0 },
+ { SOC15_REG_ENTRY(MMHUB, 0, regMMEA2_EDC_CNT2), 0, 0, 0 },
+ { SOC15_REG_ENTRY(MMHUB, 0, regMMEA2_EDC_CNT3), 0, 0, 0 },
+ { SOC15_REG_ENTRY(MMHUB, 0, regMMEA3_EDC_CNT), 0, 0, 0 },
+ { SOC15_REG_ENTRY(MMHUB, 0, regMMEA3_EDC_CNT2), 0, 0, 0 },
+ { SOC15_REG_ENTRY(MMHUB, 0, regMMEA3_EDC_CNT3), 0, 0, 0 },
+ { SOC15_REG_ENTRY(MMHUB, 0, regMMEA4_EDC_CNT), 0, 0, 0 },
+ { SOC15_REG_ENTRY(MMHUB, 0, regMMEA4_EDC_CNT2), 0, 0, 0 },
+ { SOC15_REG_ENTRY(MMHUB, 0, regMMEA4_EDC_CNT3), 0, 0, 0 },
+ { SOC15_REG_ENTRY(MMHUB, 0, regMMEA5_EDC_CNT), 0, 0, 0 },
+ { SOC15_REG_ENTRY(MMHUB, 0, regMMEA5_EDC_CNT2), 0, 0, 0 },
+ { SOC15_REG_ENTRY(MMHUB, 0, regMMEA5_EDC_CNT3), 0, 0, 0 },
+};
+
+static int mmhub_v1_7_get_ras_error_count(struct amdgpu_device *adev,
+ const struct soc15_reg_entry *reg,
+ uint32_t value,
+ uint32_t *sec_count,
+ uint32_t *ded_count)
+{
+ uint32_t i;
+ uint32_t sec_cnt, ded_cnt;
+
+ for (i = 0; i < ARRAY_SIZE(mmhub_v1_7_ras_fields); i++) {
+ if(mmhub_v1_7_ras_fields[i].reg_offset != reg->reg_offset)
+ continue;
+
+ sec_cnt = (value &
+ mmhub_v1_7_ras_fields[i].sec_count_mask) >>
+ mmhub_v1_7_ras_fields[i].sec_count_shift;
+ if (sec_cnt) {
+ dev_info(adev->dev, "MMHUB SubBlock %s, SEC %d\n",
+ mmhub_v1_7_ras_fields[i].name,
+ sec_cnt);
+ *sec_count += sec_cnt;
+ }
+
+ ded_cnt = (value &
+ mmhub_v1_7_ras_fields[i].ded_count_mask) >>
+ mmhub_v1_7_ras_fields[i].ded_count_shift;
+ if (ded_cnt) {
+ dev_info(adev->dev, "MMHUB SubBlock %s, DED %d\n",
+ mmhub_v1_7_ras_fields[i].name,
+ ded_cnt);
+ *ded_count += ded_cnt;
+ }
+ }
+
+ return 0;
+}
+
+static void mmhub_v1_7_query_ras_error_count(struct amdgpu_device *adev,
+ void *ras_error_status)
+{
+ struct ras_err_data *err_data = (struct ras_err_data *)ras_error_status;
+ uint32_t sec_count = 0, ded_count = 0;
+ uint32_t i;
+ uint32_t reg_value;
+
+ err_data->ue_count = 0;
+ err_data->ce_count = 0;
+
+ for (i = 0; i < ARRAY_SIZE(mmhub_v1_7_edc_cnt_regs); i++) {
+ reg_value =
+ RREG32(SOC15_REG_ENTRY_OFFSET(mmhub_v1_7_edc_cnt_regs[i]));
+ if (reg_value)
+ mmhub_v1_7_get_ras_error_count(adev, &mmhub_v1_7_edc_cnt_regs[i],
+ reg_value, &sec_count, &ded_count);
+ }
+
+ err_data->ce_count += sec_count;
+ err_data->ue_count += ded_count;
+}
+
+static void mmhub_v1_7_reset_ras_error_count(struct amdgpu_device *adev)
+{
+ uint32_t i;
+
+ /* write 0 to reset the edc counters */
+ if (amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__MMHUB)) {
+ for (i = 0; i < ARRAY_SIZE(mmhub_v1_7_edc_cnt_regs); i++)
+ WREG32(SOC15_REG_ENTRY_OFFSET(mmhub_v1_7_edc_cnt_regs[i]), 0);
+ }
+}
+
+static const struct soc15_reg_entry mmhub_v1_7_ea_err_status_regs[] = {
+ { SOC15_REG_ENTRY(MMHUB, 0, regMMEA0_ERR_STATUS), 0, 0, 0 },
+ { SOC15_REG_ENTRY(MMHUB, 0, regMMEA1_ERR_STATUS), 0, 0, 0 },
+ { SOC15_REG_ENTRY(MMHUB, 0, regMMEA2_ERR_STATUS), 0, 0, 0 },
+ { SOC15_REG_ENTRY(MMHUB, 0, regMMEA3_ERR_STATUS), 0, 0, 0 },
+ { SOC15_REG_ENTRY(MMHUB, 0, regMMEA4_ERR_STATUS), 0, 0, 0 },
+ { SOC15_REG_ENTRY(MMHUB, 0, regMMEA5_ERR_STATUS), 0, 0, 0 },
+};
+
+static void mmhub_v1_7_query_ras_error_status(struct amdgpu_device *adev)
+{
+ int i;
+ uint32_t reg_value;
+
+ if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__MMHUB))
+ return;
+
+ for (i = 0; i < ARRAY_SIZE(mmhub_v1_7_ea_err_status_regs); i++) {
+ reg_value =
+ RREG32(SOC15_REG_ENTRY_OFFSET(mmhub_v1_7_ea_err_status_regs[i]));
+ if (REG_GET_FIELD(reg_value, MMEA0_ERR_STATUS, SDP_RDRSP_STATUS) ||
+ REG_GET_FIELD(reg_value, MMEA0_ERR_STATUS, SDP_WRRSP_STATUS) ||
+ REG_GET_FIELD(reg_value, MMEA0_ERR_STATUS, SDP_RDRSP_DATAPARITY_ERROR)) {
+ dev_warn(adev->dev, "MMHUB EA err detected at instance: %d, status: 0x%x!\n",
+ i, reg_value);
+ }
+ }
+}
+
+static void mmhub_v1_7_reset_ras_error_status(struct amdgpu_device *adev)
+{
+ int i;
+ uint32_t reg_value;
+
+ if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__MMHUB))
+ return;
+
+ for (i = 0; i < ARRAY_SIZE(mmhub_v1_7_ea_err_status_regs); i++) {
+ reg_value = RREG32(SOC15_REG_ENTRY_OFFSET(
+ mmhub_v1_7_ea_err_status_regs[i]));
+ reg_value = REG_SET_FIELD(reg_value, MMEA0_ERR_STATUS,
+ CLEAR_ERROR_STATUS, 0x01);
+ WREG32(SOC15_REG_ENTRY_OFFSET(mmhub_v1_7_ea_err_status_regs[i]),
+ reg_value);
+ }
+}
+
+struct amdgpu_ras_block_hw_ops mmhub_v1_7_ras_hw_ops = {
+ .query_ras_error_count = mmhub_v1_7_query_ras_error_count,
+ .reset_ras_error_count = mmhub_v1_7_reset_ras_error_count,
+ .query_ras_error_status = mmhub_v1_7_query_ras_error_status,
+ .reset_ras_error_status = mmhub_v1_7_reset_ras_error_status,
+};
+
+struct amdgpu_mmhub_ras mmhub_v1_7_ras = {
+ .ras_block = {
+ .hw_ops = &mmhub_v1_7_ras_hw_ops,
+ },
+};
+
+const struct amdgpu_mmhub_funcs mmhub_v1_7_funcs = {
+ .get_fb_location = mmhub_v1_7_get_fb_location,
+ .init = mmhub_v1_7_init,
+ .gart_enable = mmhub_v1_7_gart_enable,
+ .set_fault_enable_default = mmhub_v1_7_set_fault_enable_default,
+ .gart_disable = mmhub_v1_7_gart_disable,
+ .set_clockgating = mmhub_v1_7_set_clockgating,
+ .get_clockgating = mmhub_v1_7_get_clockgating,
+ .setup_vm_pt_regs = mmhub_v1_7_setup_vm_pt_regs,
+};
diff --git a/drivers/gpu/drm/amd/amdgpu/mmhub_v1_7.h b/drivers/gpu/drm/amd/amdgpu/mmhub_v1_7.h
new file mode 100644
index 000000000000..629f49052137
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/mmhub_v1_7.h
@@ -0,0 +1,29 @@
+/*
+ * Copyright 2016 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+#ifndef __MMHUB_V1_7_H__
+#define __MMHUB_V1_7_H__
+
+extern const struct amdgpu_mmhub_funcs mmhub_v1_7_funcs;
+extern struct amdgpu_mmhub_ras mmhub_v1_7_ras;
+
+#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/mmhub_v1_8.c b/drivers/gpu/drm/amd/amdgpu/mmhub_v1_8.c
new file mode 100644
index 000000000000..cc688ae79e84
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/mmhub_v1_8.c
@@ -0,0 +1,871 @@
+/*
+ * Copyright 2022 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+#include "amdgpu.h"
+#include "mmhub_v1_8.h"
+
+#include "mmhub/mmhub_1_8_0_offset.h"
+#include "mmhub/mmhub_1_8_0_sh_mask.h"
+#include "vega10_enum.h"
+
+#include "soc15_common.h"
+#include "soc15.h"
+#include "amdgpu_ras.h"
+#include "amdgpu_psp.h"
+
+#define regVM_L2_CNTL3_DEFAULT 0x80100007
+#define regVM_L2_CNTL4_DEFAULT 0x000000c1
+
+static u64 mmhub_v1_8_get_fb_location(struct amdgpu_device *adev)
+{
+ u64 base = RREG32_SOC15(MMHUB, 0, regMC_VM_FB_LOCATION_BASE);
+ u64 top = RREG32_SOC15(MMHUB, 0, regMC_VM_FB_LOCATION_TOP);
+
+ base &= MC_VM_FB_LOCATION_BASE__FB_BASE_MASK;
+ base <<= 24;
+
+ top &= MC_VM_FB_LOCATION_TOP__FB_TOP_MASK;
+ top <<= 24;
+
+ adev->gmc.fb_start = base;
+ adev->gmc.fb_end = top;
+
+ return base;
+}
+
+static void mmhub_v1_8_setup_vm_pt_regs(struct amdgpu_device *adev, uint32_t vmid,
+ uint64_t page_table_base)
+{
+ struct amdgpu_vmhub *hub;
+ u32 inst_mask;
+ int i;
+
+ inst_mask = adev->aid_mask;
+ for_each_inst(i, inst_mask) {
+ hub = &adev->vmhub[AMDGPU_MMHUB0(i)];
+ WREG32_SOC15_OFFSET(MMHUB, i,
+ regVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_LO32,
+ hub->ctx_addr_distance * vmid,
+ lower_32_bits(page_table_base));
+
+ WREG32_SOC15_OFFSET(MMHUB, i,
+ regVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_HI32,
+ hub->ctx_addr_distance * vmid,
+ upper_32_bits(page_table_base));
+ }
+}
+
+static void mmhub_v1_8_init_gart_aperture_regs(struct amdgpu_device *adev)
+{
+ uint64_t gart_start = amdgpu_virt_xgmi_migrate_enabled(adev) ?
+ adev->gmc.vram_start : adev->gmc.fb_start;
+ uint64_t pt_base;
+ u32 inst_mask;
+ int i;
+
+ if (adev->gmc.pdb0_bo)
+ pt_base = amdgpu_gmc_pd_addr(adev->gmc.pdb0_bo);
+ else
+ pt_base = amdgpu_gmc_pd_addr(adev->gart.bo);
+
+ mmhub_v1_8_setup_vm_pt_regs(adev, 0, pt_base);
+
+ /* If use GART for FB translation, vmid0 page table covers both
+ * vram and system memory (gart)
+ */
+ inst_mask = adev->aid_mask;
+ for_each_inst(i, inst_mask) {
+ if (adev->gmc.pdb0_bo) {
+ WREG32_SOC15(MMHUB, i,
+ regVM_CONTEXT0_PAGE_TABLE_START_ADDR_LO32,
+ (u32)(gart_start >> 12));
+ WREG32_SOC15(MMHUB, i,
+ regVM_CONTEXT0_PAGE_TABLE_START_ADDR_HI32,
+ (u32)(gart_start >> 44));
+
+ WREG32_SOC15(MMHUB, i,
+ regVM_CONTEXT0_PAGE_TABLE_END_ADDR_LO32,
+ (u32)(adev->gmc.gart_end >> 12));
+ WREG32_SOC15(MMHUB, i,
+ regVM_CONTEXT0_PAGE_TABLE_END_ADDR_HI32,
+ (u32)(adev->gmc.gart_end >> 44));
+
+ } else {
+ WREG32_SOC15(MMHUB, i,
+ regVM_CONTEXT0_PAGE_TABLE_START_ADDR_LO32,
+ (u32)(adev->gmc.gart_start >> 12));
+ WREG32_SOC15(MMHUB, i,
+ regVM_CONTEXT0_PAGE_TABLE_START_ADDR_HI32,
+ (u32)(adev->gmc.gart_start >> 44));
+
+ WREG32_SOC15(MMHUB, i,
+ regVM_CONTEXT0_PAGE_TABLE_END_ADDR_LO32,
+ (u32)(adev->gmc.gart_end >> 12));
+ WREG32_SOC15(MMHUB, i,
+ regVM_CONTEXT0_PAGE_TABLE_END_ADDR_HI32,
+ (u32)(adev->gmc.gart_end >> 44));
+ }
+ }
+}
+
+static void mmhub_v1_8_init_system_aperture_regs(struct amdgpu_device *adev)
+{
+ uint32_t tmp, inst_mask;
+ uint64_t value;
+ int i;
+
+ if (amdgpu_sriov_vf(adev))
+ return;
+
+ inst_mask = adev->aid_mask;
+ for_each_inst(i, inst_mask) {
+ /* Program the AGP BAR */
+ WREG32_SOC15(MMHUB, i, regMC_VM_AGP_BASE, 0);
+ WREG32_SOC15(MMHUB, i, regMC_VM_AGP_BOT,
+ adev->gmc.agp_start >> 24);
+ WREG32_SOC15(MMHUB, i, regMC_VM_AGP_TOP,
+ adev->gmc.agp_end >> 24);
+
+ /* Program the system aperture low logical page number. */
+ WREG32_SOC15(MMHUB, i, regMC_VM_SYSTEM_APERTURE_LOW_ADDR,
+ min(adev->gmc.fb_start, adev->gmc.agp_start) >> 18);
+
+ WREG32_SOC15(MMHUB, i, regMC_VM_SYSTEM_APERTURE_HIGH_ADDR,
+ max(adev->gmc.fb_end, adev->gmc.agp_end) >> 18);
+
+ /* In the case squeezing vram into GART aperture, we don't use
+ * FB aperture and AGP aperture. Disable them.
+ */
+ if (adev->gmc.pdb0_bo) {
+ WREG32_SOC15(MMHUB, i, regMC_VM_AGP_BOT, 0xFFFFFF);
+ WREG32_SOC15(MMHUB, i, regMC_VM_AGP_TOP, 0);
+ WREG32_SOC15(MMHUB, i, regMC_VM_FB_LOCATION_TOP, 0);
+ WREG32_SOC15(MMHUB, i, regMC_VM_FB_LOCATION_BASE,
+ 0x00FFFFFF);
+ WREG32_SOC15(MMHUB, i,
+ regMC_VM_SYSTEM_APERTURE_LOW_ADDR,
+ 0x3FFFFFFF);
+ WREG32_SOC15(MMHUB, i,
+ regMC_VM_SYSTEM_APERTURE_HIGH_ADDR, 0);
+ }
+
+ /* Set default page address. */
+ value = amdgpu_gmc_vram_mc2pa(adev, adev->mem_scratch.gpu_addr);
+ WREG32_SOC15(MMHUB, i, regMC_VM_SYSTEM_APERTURE_DEFAULT_ADDR_LSB,
+ (u32)(value >> 12));
+ WREG32_SOC15(MMHUB, i, regMC_VM_SYSTEM_APERTURE_DEFAULT_ADDR_MSB,
+ (u32)(value >> 44));
+
+ /* Program "protection fault". */
+ WREG32_SOC15(MMHUB, i,
+ regVM_L2_PROTECTION_FAULT_DEFAULT_ADDR_LO32,
+ (u32)(adev->dummy_page_addr >> 12));
+ WREG32_SOC15(MMHUB, i,
+ regVM_L2_PROTECTION_FAULT_DEFAULT_ADDR_HI32,
+ (u32)((u64)adev->dummy_page_addr >> 44));
+
+ tmp = RREG32_SOC15(MMHUB, i, regVM_L2_PROTECTION_FAULT_CNTL2);
+ tmp = REG_SET_FIELD(tmp, VM_L2_PROTECTION_FAULT_CNTL2,
+ ACTIVE_PAGE_MIGRATION_PTE_READ_RETRY, 1);
+ WREG32_SOC15(MMHUB, i, regVM_L2_PROTECTION_FAULT_CNTL2, tmp);
+ }
+}
+
+static void mmhub_v1_8_init_tlb_regs(struct amdgpu_device *adev)
+{
+ uint32_t tmp, inst_mask;
+ int i;
+
+ if (amdgpu_sriov_reg_indirect_l1_tlb_cntl(adev)) {
+ tmp = RREG32_SOC15(MMHUB, 0, regMC_VM_MX_L1_TLB_CNTL);
+
+ tmp = REG_SET_FIELD(tmp, MC_VM_MX_L1_TLB_CNTL, ENABLE_L1_TLB,
+ 1);
+ tmp = REG_SET_FIELD(tmp, MC_VM_MX_L1_TLB_CNTL,
+ SYSTEM_ACCESS_MODE, 3);
+ tmp = REG_SET_FIELD(tmp, MC_VM_MX_L1_TLB_CNTL,
+ ENABLE_ADVANCED_DRIVER_MODEL, 1);
+ tmp = REG_SET_FIELD(tmp, MC_VM_MX_L1_TLB_CNTL,
+ SYSTEM_APERTURE_UNMAPPED_ACCESS, 0);
+ tmp = REG_SET_FIELD(tmp, MC_VM_MX_L1_TLB_CNTL,
+ MTYPE, MTYPE_UC);/* XXX for emulation. */
+ tmp = REG_SET_FIELD(tmp, MC_VM_MX_L1_TLB_CNTL, ATC_EN, 1);
+
+ psp_reg_program_no_ring(&adev->psp, tmp, PSP_REG_MMHUB_L1_TLB_CNTL);
+ } else {
+ inst_mask = adev->aid_mask;
+ for_each_inst(i, inst_mask) {
+ tmp = RREG32_SOC15(MMHUB, i, regMC_VM_MX_L1_TLB_CNTL);
+
+ tmp = REG_SET_FIELD(tmp, MC_VM_MX_L1_TLB_CNTL, ENABLE_L1_TLB,
+ 1);
+ tmp = REG_SET_FIELD(tmp, MC_VM_MX_L1_TLB_CNTL,
+ SYSTEM_ACCESS_MODE, 3);
+ tmp = REG_SET_FIELD(tmp, MC_VM_MX_L1_TLB_CNTL,
+ ENABLE_ADVANCED_DRIVER_MODEL, 1);
+ tmp = REG_SET_FIELD(tmp, MC_VM_MX_L1_TLB_CNTL,
+ SYSTEM_APERTURE_UNMAPPED_ACCESS, 0);
+ tmp = REG_SET_FIELD(tmp, MC_VM_MX_L1_TLB_CNTL,
+ MTYPE, MTYPE_UC);/* XXX for emulation. */
+ tmp = REG_SET_FIELD(tmp, MC_VM_MX_L1_TLB_CNTL, ATC_EN, 1);
+
+ WREG32_SOC15(MMHUB, i, regMC_VM_MX_L1_TLB_CNTL, tmp);
+ }
+ }
+}
+
+/* Set snoop bit for SDMA so that SDMA writes probe-invalidates RW lines */
+static void mmhub_v1_8_init_snoop_override_regs(struct amdgpu_device *adev)
+{
+ uint32_t tmp, inst_mask;
+ int i, j;
+ uint32_t distance = regDAGB1_WRCLI_GPU_SNOOP_OVERRIDE -
+ regDAGB0_WRCLI_GPU_SNOOP_OVERRIDE;
+
+ if (amdgpu_sriov_vf(adev))
+ return;
+
+ inst_mask = adev->aid_mask;
+ for_each_inst(i, inst_mask) {
+ for (j = 0; j < 5; j++) { /* DAGB instances */
+ tmp = RREG32_SOC15_OFFSET(MMHUB, i,
+ regDAGB0_WRCLI_GPU_SNOOP_OVERRIDE, j * distance);
+ tmp |= (1 << 15); /* SDMA client is BIT15 */
+ WREG32_SOC15_OFFSET(MMHUB, i,
+ regDAGB0_WRCLI_GPU_SNOOP_OVERRIDE, j * distance, tmp);
+
+ tmp = RREG32_SOC15_OFFSET(MMHUB, i,
+ regDAGB0_WRCLI_GPU_SNOOP_OVERRIDE_VALUE, j * distance);
+ tmp |= (1 << 15);
+ WREG32_SOC15_OFFSET(MMHUB, i,
+ regDAGB0_WRCLI_GPU_SNOOP_OVERRIDE_VALUE, j * distance, tmp);
+ }
+ }
+}
+
+static void mmhub_v1_8_init_cache_regs(struct amdgpu_device *adev)
+{
+ uint32_t tmp, inst_mask;
+ int i;
+
+ if (amdgpu_sriov_vf(adev))
+ return;
+
+ /* Setup L2 cache */
+ inst_mask = adev->aid_mask;
+ for_each_inst(i, inst_mask) {
+ tmp = RREG32_SOC15(MMHUB, i, regVM_L2_CNTL);
+ tmp = REG_SET_FIELD(tmp, VM_L2_CNTL, ENABLE_L2_CACHE, 1);
+ tmp = REG_SET_FIELD(tmp, VM_L2_CNTL,
+ ENABLE_L2_FRAGMENT_PROCESSING, 1);
+ /* XXX for emulation, Refer to closed source code.*/
+ tmp = REG_SET_FIELD(tmp, VM_L2_CNTL,
+ L2_PDE0_CACHE_TAG_GENERATION_MODE, 0);
+ tmp = REG_SET_FIELD(tmp, VM_L2_CNTL, PDE_FAULT_CLASSIFICATION,
+ 0);
+ tmp = REG_SET_FIELD(tmp, VM_L2_CNTL,
+ CONTEXT1_IDENTITY_ACCESS_MODE, 1);
+ tmp = REG_SET_FIELD(tmp, VM_L2_CNTL,
+ IDENTITY_MODE_FRAGMENT_SIZE, 0);
+ WREG32_SOC15(MMHUB, i, regVM_L2_CNTL, tmp);
+
+ tmp = RREG32_SOC15(MMHUB, i, regVM_L2_CNTL2);
+ tmp = REG_SET_FIELD(tmp, VM_L2_CNTL2, INVALIDATE_ALL_L1_TLBS,
+ 1);
+ tmp = REG_SET_FIELD(tmp, VM_L2_CNTL2, INVALIDATE_L2_CACHE, 1);
+ WREG32_SOC15(MMHUB, i, regVM_L2_CNTL2, tmp);
+
+ tmp = regVM_L2_CNTL3_DEFAULT;
+ if (adev->gmc.translate_further) {
+ tmp = REG_SET_FIELD(tmp, VM_L2_CNTL3, BANK_SELECT, 12);
+ tmp = REG_SET_FIELD(tmp, VM_L2_CNTL3,
+ L2_CACHE_BIGK_FRAGMENT_SIZE, 9);
+ } else {
+ tmp = REG_SET_FIELD(tmp, VM_L2_CNTL3, BANK_SELECT, 9);
+ tmp = REG_SET_FIELD(tmp, VM_L2_CNTL3,
+ L2_CACHE_BIGK_FRAGMENT_SIZE, 6);
+ }
+ WREG32_SOC15(MMHUB, i, regVM_L2_CNTL3, tmp);
+
+ tmp = regVM_L2_CNTL4_DEFAULT;
+ /* For AMD APP APUs setup WC memory */
+ if (adev->gmc.xgmi.connected_to_cpu || adev->gmc.is_app_apu) {
+ tmp = REG_SET_FIELD(tmp, VM_L2_CNTL4,
+ VMC_TAP_PDE_REQUEST_PHYSICAL, 1);
+ tmp = REG_SET_FIELD(tmp, VM_L2_CNTL4,
+ VMC_TAP_PTE_REQUEST_PHYSICAL, 1);
+ } else {
+ tmp = REG_SET_FIELD(tmp, VM_L2_CNTL4,
+ VMC_TAP_PDE_REQUEST_PHYSICAL, 0);
+ tmp = REG_SET_FIELD(tmp, VM_L2_CNTL4,
+ VMC_TAP_PTE_REQUEST_PHYSICAL, 0);
+ }
+ WREG32_SOC15(MMHUB, i, regVM_L2_CNTL4, tmp);
+ }
+}
+
+static void mmhub_v1_8_enable_system_domain(struct amdgpu_device *adev)
+{
+ uint32_t tmp, inst_mask;
+ int i;
+
+ inst_mask = adev->aid_mask;
+ for_each_inst(i, inst_mask) {
+ tmp = RREG32_SOC15(MMHUB, i, regVM_CONTEXT0_CNTL);
+ tmp = REG_SET_FIELD(tmp, VM_CONTEXT0_CNTL, ENABLE_CONTEXT, 1);
+ tmp = REG_SET_FIELD(tmp, VM_CONTEXT0_CNTL, PAGE_TABLE_DEPTH,
+ adev->gmc.vmid0_page_table_depth);
+ tmp = REG_SET_FIELD(tmp,
+ VM_CONTEXT0_CNTL, PAGE_TABLE_BLOCK_SIZE,
+ adev->gmc.vmid0_page_table_block_size);
+ tmp = REG_SET_FIELD(tmp, VM_CONTEXT0_CNTL,
+ RETRY_PERMISSION_OR_INVALID_PAGE_FAULT, 0);
+ WREG32_SOC15(MMHUB, i, regVM_CONTEXT0_CNTL, tmp);
+ }
+}
+
+static void mmhub_v1_8_disable_identity_aperture(struct amdgpu_device *adev)
+{
+ u32 inst_mask;
+ int i;
+
+ if (amdgpu_sriov_vf(adev))
+ return;
+
+ inst_mask = adev->aid_mask;
+ for_each_inst(i, inst_mask) {
+ WREG32_SOC15(MMHUB, i,
+ regVM_L2_CONTEXT1_IDENTITY_APERTURE_LOW_ADDR_LO32,
+ 0XFFFFFFFF);
+ WREG32_SOC15(MMHUB, i,
+ regVM_L2_CONTEXT1_IDENTITY_APERTURE_LOW_ADDR_HI32,
+ 0x0000000F);
+
+ WREG32_SOC15(MMHUB, i,
+ regVM_L2_CONTEXT1_IDENTITY_APERTURE_HIGH_ADDR_LO32,
+ 0);
+ WREG32_SOC15(MMHUB, i,
+ regVM_L2_CONTEXT1_IDENTITY_APERTURE_HIGH_ADDR_HI32,
+ 0);
+
+ WREG32_SOC15(MMHUB, i,
+ regVM_L2_CONTEXT_IDENTITY_PHYSICAL_OFFSET_LO32, 0);
+ WREG32_SOC15(MMHUB, i,
+ regVM_L2_CONTEXT_IDENTITY_PHYSICAL_OFFSET_HI32, 0);
+ }
+}
+
+static void mmhub_v1_8_setup_vmid_config(struct amdgpu_device *adev)
+{
+ struct amdgpu_vmhub *hub;
+ unsigned int num_level, block_size;
+ uint32_t tmp, inst_mask;
+ int i, j;
+
+ num_level = adev->vm_manager.num_level;
+ block_size = adev->vm_manager.block_size;
+ if (adev->gmc.translate_further)
+ num_level -= 1;
+ else
+ block_size -= 9;
+
+ inst_mask = adev->aid_mask;
+ for_each_inst(j, inst_mask) {
+ hub = &adev->vmhub[AMDGPU_MMHUB0(j)];
+ for (i = 0; i <= 14; i++) {
+ tmp = RREG32_SOC15_OFFSET(MMHUB, j, regVM_CONTEXT1_CNTL,
+ i * hub->ctx_distance);
+ tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL,
+ ENABLE_CONTEXT, 1);
+ tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL,
+ PAGE_TABLE_DEPTH, num_level);
+ tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL,
+ RANGE_PROTECTION_FAULT_ENABLE_DEFAULT, 1);
+ tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL,
+ DUMMY_PAGE_PROTECTION_FAULT_ENABLE_DEFAULT, 1);
+ tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL,
+ PDE0_PROTECTION_FAULT_ENABLE_DEFAULT, 1);
+ tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL,
+ VALID_PROTECTION_FAULT_ENABLE_DEFAULT, 1);
+ tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL,
+ READ_PROTECTION_FAULT_ENABLE_DEFAULT, 1);
+ tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL,
+ WRITE_PROTECTION_FAULT_ENABLE_DEFAULT, 1);
+ tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL,
+ EXECUTE_PROTECTION_FAULT_ENABLE_DEFAULT, 1);
+ tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL,
+ PAGE_TABLE_BLOCK_SIZE,
+ block_size);
+ /* On 9.4.3, XNACK can be enabled in the SQ
+ * per-process. Retry faults need to be enabled for
+ * that to work.
+ */
+ tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL,
+ RETRY_PERMISSION_OR_INVALID_PAGE_FAULT, 1);
+ WREG32_SOC15_OFFSET(MMHUB, j, regVM_CONTEXT1_CNTL,
+ i * hub->ctx_distance, tmp);
+ WREG32_SOC15_OFFSET(MMHUB, j,
+ regVM_CONTEXT1_PAGE_TABLE_START_ADDR_LO32,
+ i * hub->ctx_addr_distance, 0);
+ WREG32_SOC15_OFFSET(MMHUB, j,
+ regVM_CONTEXT1_PAGE_TABLE_START_ADDR_HI32,
+ i * hub->ctx_addr_distance, 0);
+ WREG32_SOC15_OFFSET(MMHUB, j,
+ regVM_CONTEXT1_PAGE_TABLE_END_ADDR_LO32,
+ i * hub->ctx_addr_distance,
+ lower_32_bits(adev->vm_manager.max_pfn - 1));
+ WREG32_SOC15_OFFSET(MMHUB, j,
+ regVM_CONTEXT1_PAGE_TABLE_END_ADDR_HI32,
+ i * hub->ctx_addr_distance,
+ upper_32_bits(adev->vm_manager.max_pfn - 1));
+ }
+ }
+}
+
+static void mmhub_v1_8_program_invalidation(struct amdgpu_device *adev)
+{
+ struct amdgpu_vmhub *hub;
+ u32 i, j, inst_mask;
+
+ inst_mask = adev->aid_mask;
+ for_each_inst(j, inst_mask) {
+ hub = &adev->vmhub[AMDGPU_MMHUB0(j)];
+ for (i = 0; i < 18; ++i) {
+ WREG32_SOC15_OFFSET(MMHUB, j,
+ regVM_INVALIDATE_ENG0_ADDR_RANGE_LO32,
+ i * hub->eng_addr_distance, 0xffffffff);
+ WREG32_SOC15_OFFSET(MMHUB, j,
+ regVM_INVALIDATE_ENG0_ADDR_RANGE_HI32,
+ i * hub->eng_addr_distance, 0x1f);
+ }
+ }
+}
+
+static int mmhub_v1_8_gart_enable(struct amdgpu_device *adev)
+{
+ /* GART Enable. */
+ mmhub_v1_8_init_gart_aperture_regs(adev);
+ mmhub_v1_8_init_system_aperture_regs(adev);
+ mmhub_v1_8_init_tlb_regs(adev);
+ mmhub_v1_8_init_cache_regs(adev);
+ mmhub_v1_8_init_snoop_override_regs(adev);
+
+ mmhub_v1_8_enable_system_domain(adev);
+ mmhub_v1_8_disable_identity_aperture(adev);
+ mmhub_v1_8_setup_vmid_config(adev);
+ mmhub_v1_8_program_invalidation(adev);
+
+ return 0;
+}
+
+static void mmhub_v1_8_disable_l1_tlb(struct amdgpu_device *adev)
+{
+ u32 tmp;
+ u32 i, inst_mask;
+
+ if (amdgpu_sriov_reg_indirect_l1_tlb_cntl(adev)) {
+ tmp = RREG32_SOC15(MMHUB, 0, regMC_VM_MX_L1_TLB_CNTL);
+ tmp = REG_SET_FIELD(tmp, MC_VM_MX_L1_TLB_CNTL, ENABLE_L1_TLB, 0);
+ tmp = REG_SET_FIELD(tmp, MC_VM_MX_L1_TLB_CNTL,
+ ENABLE_ADVANCED_DRIVER_MODEL, 0);
+ psp_reg_program_no_ring(&adev->psp, tmp, PSP_REG_MMHUB_L1_TLB_CNTL);
+ } else {
+ inst_mask = adev->aid_mask;
+ for_each_inst(i, inst_mask) {
+ tmp = RREG32_SOC15(MMHUB, i, regMC_VM_MX_L1_TLB_CNTL);
+ tmp = REG_SET_FIELD(tmp, MC_VM_MX_L1_TLB_CNTL, ENABLE_L1_TLB,
+ 0);
+ tmp = REG_SET_FIELD(tmp, MC_VM_MX_L1_TLB_CNTL,
+ ENABLE_ADVANCED_DRIVER_MODEL, 0);
+ WREG32_SOC15(MMHUB, i, regMC_VM_MX_L1_TLB_CNTL, tmp);
+ }
+ }
+}
+
+static void mmhub_v1_8_gart_disable(struct amdgpu_device *adev)
+{
+ struct amdgpu_vmhub *hub;
+ u32 tmp;
+ u32 i, j, inst_mask;
+
+ /* Disable all tables */
+ inst_mask = adev->aid_mask;
+ for_each_inst(j, inst_mask) {
+ hub = &adev->vmhub[AMDGPU_MMHUB0(j)];
+ for (i = 0; i < 16; i++)
+ WREG32_SOC15_OFFSET(MMHUB, j, regVM_CONTEXT0_CNTL,
+ i * hub->ctx_distance, 0);
+ if (!amdgpu_sriov_vf(adev)) {
+ /* Setup L2 cache */
+ tmp = RREG32_SOC15(MMHUB, j, regVM_L2_CNTL);
+ tmp = REG_SET_FIELD(tmp, VM_L2_CNTL, ENABLE_L2_CACHE,
+ 0);
+ WREG32_SOC15(MMHUB, j, regVM_L2_CNTL, tmp);
+ WREG32_SOC15(MMHUB, j, regVM_L2_CNTL3, 0);
+ }
+ }
+
+ mmhub_v1_8_disable_l1_tlb(adev);
+}
+
+/**
+ * mmhub_v1_8_set_fault_enable_default - update GART/VM fault handling
+ *
+ * @adev: amdgpu_device pointer
+ * @value: true redirects VM faults to the default page
+ */
+static void mmhub_v1_8_set_fault_enable_default(struct amdgpu_device *adev, bool value)
+{
+ u32 tmp, inst_mask;
+ int i;
+
+ if (amdgpu_sriov_vf(adev))
+ return;
+
+ inst_mask = adev->aid_mask;
+ for_each_inst(i, inst_mask) {
+ tmp = RREG32_SOC15(MMHUB, i, regVM_L2_PROTECTION_FAULT_CNTL);
+ tmp = REG_SET_FIELD(tmp, VM_L2_PROTECTION_FAULT_CNTL,
+ RANGE_PROTECTION_FAULT_ENABLE_DEFAULT, value);
+ tmp = REG_SET_FIELD(tmp, VM_L2_PROTECTION_FAULT_CNTL,
+ PDE0_PROTECTION_FAULT_ENABLE_DEFAULT, value);
+ tmp = REG_SET_FIELD(tmp, VM_L2_PROTECTION_FAULT_CNTL,
+ PDE1_PROTECTION_FAULT_ENABLE_DEFAULT, value);
+ tmp = REG_SET_FIELD(tmp, VM_L2_PROTECTION_FAULT_CNTL,
+ PDE2_PROTECTION_FAULT_ENABLE_DEFAULT, value);
+ tmp = REG_SET_FIELD(tmp, VM_L2_PROTECTION_FAULT_CNTL,
+ TRANSLATE_FURTHER_PROTECTION_FAULT_ENABLE_DEFAULT,
+ value);
+ tmp = REG_SET_FIELD(tmp, VM_L2_PROTECTION_FAULT_CNTL,
+ NACK_PROTECTION_FAULT_ENABLE_DEFAULT, value);
+ tmp = REG_SET_FIELD(tmp, VM_L2_PROTECTION_FAULT_CNTL,
+ DUMMY_PAGE_PROTECTION_FAULT_ENABLE_DEFAULT, value);
+ tmp = REG_SET_FIELD(tmp, VM_L2_PROTECTION_FAULT_CNTL,
+ VALID_PROTECTION_FAULT_ENABLE_DEFAULT, value);
+ tmp = REG_SET_FIELD(tmp, VM_L2_PROTECTION_FAULT_CNTL,
+ READ_PROTECTION_FAULT_ENABLE_DEFAULT, value);
+ tmp = REG_SET_FIELD(tmp, VM_L2_PROTECTION_FAULT_CNTL,
+ WRITE_PROTECTION_FAULT_ENABLE_DEFAULT, value);
+ tmp = REG_SET_FIELD(tmp, VM_L2_PROTECTION_FAULT_CNTL,
+ EXECUTE_PROTECTION_FAULT_ENABLE_DEFAULT, value);
+ if (!value) {
+ tmp = REG_SET_FIELD(tmp, VM_L2_PROTECTION_FAULT_CNTL,
+ CRASH_ON_NO_RETRY_FAULT, 1);
+ tmp = REG_SET_FIELD(tmp, VM_L2_PROTECTION_FAULT_CNTL,
+ CRASH_ON_RETRY_FAULT, 1);
+ }
+
+ WREG32_SOC15(MMHUB, i, regVM_L2_PROTECTION_FAULT_CNTL, tmp);
+ }
+}
+
+static void mmhub_v1_8_init(struct amdgpu_device *adev)
+{
+ struct amdgpu_vmhub *hub;
+ u32 inst_mask;
+ int i;
+
+ inst_mask = adev->aid_mask;
+ for_each_inst(i, inst_mask) {
+ hub = &adev->vmhub[AMDGPU_MMHUB0(i)];
+
+ hub->ctx0_ptb_addr_lo32 = SOC15_REG_OFFSET(MMHUB, i,
+ regVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_LO32);
+ hub->ctx0_ptb_addr_hi32 = SOC15_REG_OFFSET(MMHUB, i,
+ regVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_HI32);
+ hub->vm_inv_eng0_req =
+ SOC15_REG_OFFSET(MMHUB, i, regVM_INVALIDATE_ENG0_REQ);
+ hub->vm_inv_eng0_ack =
+ SOC15_REG_OFFSET(MMHUB, i, regVM_INVALIDATE_ENG0_ACK);
+ hub->vm_context0_cntl =
+ SOC15_REG_OFFSET(MMHUB, i, regVM_CONTEXT0_CNTL);
+ hub->vm_l2_pro_fault_status = SOC15_REG_OFFSET(MMHUB, i,
+ regVM_L2_PROTECTION_FAULT_STATUS);
+ hub->vm_l2_pro_fault_cntl = SOC15_REG_OFFSET(MMHUB, i,
+ regVM_L2_PROTECTION_FAULT_CNTL);
+
+ hub->ctx_distance = regVM_CONTEXT1_CNTL - regVM_CONTEXT0_CNTL;
+ hub->ctx_addr_distance =
+ regVM_CONTEXT1_PAGE_TABLE_BASE_ADDR_LO32 -
+ regVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_LO32;
+ hub->eng_distance = regVM_INVALIDATE_ENG1_REQ -
+ regVM_INVALIDATE_ENG0_REQ;
+ hub->eng_addr_distance = regVM_INVALIDATE_ENG1_ADDR_RANGE_LO32 -
+ regVM_INVALIDATE_ENG0_ADDR_RANGE_LO32;
+ }
+}
+
+static int mmhub_v1_8_set_clockgating(struct amdgpu_device *adev,
+ enum amd_clockgating_state state)
+{
+ return 0;
+}
+
+static void mmhub_v1_8_get_clockgating(struct amdgpu_device *adev, u64 *flags)
+{
+
+}
+
+const struct amdgpu_mmhub_funcs mmhub_v1_8_funcs = {
+ .get_fb_location = mmhub_v1_8_get_fb_location,
+ .init = mmhub_v1_8_init,
+ .gart_enable = mmhub_v1_8_gart_enable,
+ .set_fault_enable_default = mmhub_v1_8_set_fault_enable_default,
+ .gart_disable = mmhub_v1_8_gart_disable,
+ .setup_vm_pt_regs = mmhub_v1_8_setup_vm_pt_regs,
+ .set_clockgating = mmhub_v1_8_set_clockgating,
+ .get_clockgating = mmhub_v1_8_get_clockgating,
+};
+
+static const struct amdgpu_ras_err_status_reg_entry mmhub_v1_8_ce_reg_list[] = {
+ {AMDGPU_RAS_REG_ENTRY(MMHUB, 0, regMMEA0_CE_ERR_STATUS_LO, regMMEA0_CE_ERR_STATUS_HI),
+ 1, (AMDGPU_RAS_ERR_INFO_VALID | AMDGPU_RAS_ERR_STATUS_VALID), "MMEA0"},
+ {AMDGPU_RAS_REG_ENTRY(MMHUB, 0, regMMEA1_CE_ERR_STATUS_LO, regMMEA1_CE_ERR_STATUS_HI),
+ 1, (AMDGPU_RAS_ERR_INFO_VALID | AMDGPU_RAS_ERR_STATUS_VALID), "MMEA1"},
+ {AMDGPU_RAS_REG_ENTRY(MMHUB, 0, regMMEA2_CE_ERR_STATUS_LO, regMMEA2_CE_ERR_STATUS_HI),
+ 1, (AMDGPU_RAS_ERR_INFO_VALID | AMDGPU_RAS_ERR_STATUS_VALID), "MMEA2"},
+ {AMDGPU_RAS_REG_ENTRY(MMHUB, 0, regMMEA3_CE_ERR_STATUS_LO, regMMEA3_CE_ERR_STATUS_HI),
+ 1, (AMDGPU_RAS_ERR_INFO_VALID | AMDGPU_RAS_ERR_STATUS_VALID), "MMEA3"},
+ {AMDGPU_RAS_REG_ENTRY(MMHUB, 0, regMMEA4_CE_ERR_STATUS_LO, regMMEA4_CE_ERR_STATUS_HI),
+ 1, (AMDGPU_RAS_ERR_INFO_VALID | AMDGPU_RAS_ERR_STATUS_VALID), "MMEA4"},
+ {AMDGPU_RAS_REG_ENTRY(MMHUB, 0, regMM_CANE_CE_ERR_STATUS_LO, regMM_CANE_CE_ERR_STATUS_HI),
+ 1, 0, "MM_CANE"},
+};
+
+static const struct amdgpu_ras_err_status_reg_entry mmhub_v1_8_ue_reg_list[] = {
+ {AMDGPU_RAS_REG_ENTRY(MMHUB, 0, regMMEA0_UE_ERR_STATUS_LO, regMMEA0_UE_ERR_STATUS_HI),
+ 1, (AMDGPU_RAS_ERR_INFO_VALID | AMDGPU_RAS_ERR_STATUS_VALID), "MMEA0"},
+ {AMDGPU_RAS_REG_ENTRY(MMHUB, 0, regMMEA1_UE_ERR_STATUS_LO, regMMEA1_UE_ERR_STATUS_HI),
+ 1, (AMDGPU_RAS_ERR_INFO_VALID | AMDGPU_RAS_ERR_STATUS_VALID), "MMEA1"},
+ {AMDGPU_RAS_REG_ENTRY(MMHUB, 0, regMMEA2_UE_ERR_STATUS_LO, regMMEA2_UE_ERR_STATUS_HI),
+ 1, (AMDGPU_RAS_ERR_INFO_VALID | AMDGPU_RAS_ERR_STATUS_VALID), "MMEA2"},
+ {AMDGPU_RAS_REG_ENTRY(MMHUB, 0, regMMEA3_UE_ERR_STATUS_LO, regMMEA3_UE_ERR_STATUS_HI),
+ 1, (AMDGPU_RAS_ERR_INFO_VALID | AMDGPU_RAS_ERR_STATUS_VALID), "MMEA3"},
+ {AMDGPU_RAS_REG_ENTRY(MMHUB, 0, regMMEA4_UE_ERR_STATUS_LO, regMMEA4_UE_ERR_STATUS_HI),
+ 1, (AMDGPU_RAS_ERR_INFO_VALID | AMDGPU_RAS_ERR_STATUS_VALID), "MMEA4"},
+ {AMDGPU_RAS_REG_ENTRY(MMHUB, 0, regMM_CANE_UE_ERR_STATUS_LO, regMM_CANE_UE_ERR_STATUS_HI),
+ 1, 0, "MM_CANE"},
+};
+
+static const struct amdgpu_ras_memory_id_entry mmhub_v1_8_ras_memory_list[] = {
+ {AMDGPU_MMHUB_WGMI_PAGEMEM, "MMEA_WGMI_PAGEMEM"},
+ {AMDGPU_MMHUB_RGMI_PAGEMEM, "MMEA_RGMI_PAGEMEM"},
+ {AMDGPU_MMHUB_WDRAM_PAGEMEM, "MMEA_WDRAM_PAGEMEM"},
+ {AMDGPU_MMHUB_RDRAM_PAGEMEM, "MMEA_RDRAM_PAGEMEM"},
+ {AMDGPU_MMHUB_WIO_CMDMEM, "MMEA_WIO_CMDMEM"},
+ {AMDGPU_MMHUB_RIO_CMDMEM, "MMEA_RIO_CMDMEM"},
+ {AMDGPU_MMHUB_WGMI_CMDMEM, "MMEA_WGMI_CMDMEM"},
+ {AMDGPU_MMHUB_RGMI_CMDMEM, "MMEA_RGMI_CMDMEM"},
+ {AMDGPU_MMHUB_WDRAM_CMDMEM, "MMEA_WDRAM_CMDMEM"},
+ {AMDGPU_MMHUB_RDRAM_CMDMEM, "MMEA_RDRAM_CMDMEM"},
+ {AMDGPU_MMHUB_MAM_DMEM0, "MMEA_MAM_DMEM0"},
+ {AMDGPU_MMHUB_MAM_DMEM1, "MMEA_MAM_DMEM1"},
+ {AMDGPU_MMHUB_MAM_DMEM2, "MMEA_MAM_DMEM2"},
+ {AMDGPU_MMHUB_MAM_DMEM3, "MMEA_MAM_DMEM3"},
+ {AMDGPU_MMHUB_WRET_TAGMEM, "MMEA_WRET_TAGMEM"},
+ {AMDGPU_MMHUB_RRET_TAGMEM, "MMEA_RRET_TAGMEM"},
+ {AMDGPU_MMHUB_WIO_DATAMEM, "MMEA_WIO_DATAMEM"},
+ {AMDGPU_MMHUB_WGMI_DATAMEM, "MMEA_WGMI_DATAMEM"},
+ {AMDGPU_MMHUB_WDRAM_DATAMEM, "MMEA_WDRAM_DATAMEM"},
+};
+
+static void mmhub_v1_8_inst_query_ras_error_count(struct amdgpu_device *adev,
+ uint32_t mmhub_inst,
+ void *ras_err_status)
+{
+ struct ras_err_data *err_data = (struct ras_err_data *)ras_err_status;
+ unsigned long ue_count = 0, ce_count = 0;
+
+ /* NOTE: mmhub is converted by aid_mask and the range is 0-3,
+ * which can be used as die ID directly */
+ struct amdgpu_smuio_mcm_config_info mcm_info = {
+ .socket_id = adev->smuio.funcs->get_socket_id(adev),
+ .die_id = mmhub_inst,
+ };
+
+ amdgpu_ras_inst_query_ras_error_count(adev,
+ mmhub_v1_8_ce_reg_list,
+ ARRAY_SIZE(mmhub_v1_8_ce_reg_list),
+ mmhub_v1_8_ras_memory_list,
+ ARRAY_SIZE(mmhub_v1_8_ras_memory_list),
+ mmhub_inst,
+ AMDGPU_RAS_ERROR__SINGLE_CORRECTABLE,
+ &ce_count);
+ amdgpu_ras_inst_query_ras_error_count(adev,
+ mmhub_v1_8_ue_reg_list,
+ ARRAY_SIZE(mmhub_v1_8_ue_reg_list),
+ mmhub_v1_8_ras_memory_list,
+ ARRAY_SIZE(mmhub_v1_8_ras_memory_list),
+ mmhub_inst,
+ AMDGPU_RAS_ERROR__MULTI_UNCORRECTABLE,
+ &ue_count);
+
+ amdgpu_ras_error_statistic_ce_count(err_data, &mcm_info, ce_count);
+ amdgpu_ras_error_statistic_ue_count(err_data, &mcm_info, ue_count);
+}
+
+static void mmhub_v1_8_query_ras_error_count(struct amdgpu_device *adev,
+ void *ras_err_status)
+{
+ uint32_t inst_mask;
+ uint32_t i;
+
+ if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__MMHUB)) {
+ dev_warn(adev->dev, "MMHUB RAS is not supported\n");
+ return;
+ }
+
+ inst_mask = adev->aid_mask;
+ for_each_inst(i, inst_mask)
+ mmhub_v1_8_inst_query_ras_error_count(adev, i, ras_err_status);
+}
+
+static void mmhub_v1_8_inst_reset_ras_error_count(struct amdgpu_device *adev,
+ uint32_t mmhub_inst)
+{
+ amdgpu_ras_inst_reset_ras_error_count(adev,
+ mmhub_v1_8_ce_reg_list,
+ ARRAY_SIZE(mmhub_v1_8_ce_reg_list),
+ mmhub_inst);
+ amdgpu_ras_inst_reset_ras_error_count(adev,
+ mmhub_v1_8_ue_reg_list,
+ ARRAY_SIZE(mmhub_v1_8_ue_reg_list),
+ mmhub_inst);
+}
+
+static void mmhub_v1_8_reset_ras_error_count(struct amdgpu_device *adev)
+{
+ uint32_t inst_mask;
+ uint32_t i;
+
+ if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__MMHUB)) {
+ dev_warn(adev->dev, "MMHUB RAS is not supported\n");
+ return;
+ }
+
+ inst_mask = adev->aid_mask;
+ for_each_inst(i, inst_mask)
+ mmhub_v1_8_inst_reset_ras_error_count(adev, i);
+}
+
+static const struct amdgpu_ras_block_hw_ops mmhub_v1_8_ras_hw_ops = {
+ .query_ras_error_count = mmhub_v1_8_query_ras_error_count,
+ .reset_ras_error_count = mmhub_v1_8_reset_ras_error_count,
+};
+
+static int mmhub_v1_8_aca_bank_parser(struct aca_handle *handle, struct aca_bank *bank,
+ enum aca_smu_type type, void *data)
+{
+ struct aca_bank_info info;
+ u64 misc0;
+ int ret;
+
+ ret = aca_bank_info_decode(bank, &info);
+ if (ret)
+ return ret;
+
+ misc0 = bank->regs[ACA_REG_IDX_MISC0];
+ switch (type) {
+ case ACA_SMU_TYPE_UE:
+ bank->aca_err_type = ACA_ERROR_TYPE_UE;
+ ret = aca_error_cache_log_bank_error(handle, &info, ACA_ERROR_TYPE_UE,
+ 1ULL);
+ break;
+ case ACA_SMU_TYPE_CE:
+ bank->aca_err_type = ACA_ERROR_TYPE_CE;
+ ret = aca_error_cache_log_bank_error(handle, &info, bank->aca_err_type,
+ ACA_REG__MISC0__ERRCNT(misc0));
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ return ret;
+}
+
+/* reference to smu driver if header file */
+static int mmhub_v1_8_err_codes[] = {
+ 0, 1, 2, 3, 4, /* CODE_DAGB0 - 4 */
+ 5, 6, 7, 8, 9, /* CODE_EA0 - 4 */
+ 10, /* CODE_UTCL2_ROUTER */
+ 11, /* CODE_VML2 */
+ 12, /* CODE_VML2_WALKER */
+ 13, /* CODE_MMCANE */
+};
+
+static bool mmhub_v1_8_aca_bank_is_valid(struct aca_handle *handle, struct aca_bank *bank,
+ enum aca_smu_type type, void *data)
+{
+ u32 instlo;
+
+ instlo = ACA_REG__IPID__INSTANCEIDLO(bank->regs[ACA_REG_IDX_IPID]);
+ instlo &= GENMASK(31, 1);
+
+ if (instlo != mmSMNAID_AID0_MCA_SMU)
+ return false;
+
+ if (aca_bank_check_error_codes(handle->adev, bank,
+ mmhub_v1_8_err_codes,
+ ARRAY_SIZE(mmhub_v1_8_err_codes)))
+ return false;
+
+ return true;
+}
+
+static const struct aca_bank_ops mmhub_v1_8_aca_bank_ops = {
+ .aca_bank_parser = mmhub_v1_8_aca_bank_parser,
+ .aca_bank_is_valid = mmhub_v1_8_aca_bank_is_valid,
+};
+
+static const struct aca_info mmhub_v1_8_aca_info = {
+ .hwip = ACA_HWIP_TYPE_SMU,
+ .mask = ACA_ERROR_UE_MASK,
+ .bank_ops = &mmhub_v1_8_aca_bank_ops,
+};
+
+static int mmhub_v1_8_ras_late_init(struct amdgpu_device *adev, struct ras_common_if *ras_block)
+{
+ int r;
+
+ r = amdgpu_ras_block_late_init(adev, ras_block);
+ if (r)
+ return r;
+
+ r = amdgpu_ras_bind_aca(adev, AMDGPU_RAS_BLOCK__MMHUB,
+ &mmhub_v1_8_aca_info, NULL);
+ if (r)
+ goto late_fini;
+
+ return 0;
+
+late_fini:
+ amdgpu_ras_block_late_fini(adev, ras_block);
+
+ return r;
+}
+
+struct amdgpu_mmhub_ras mmhub_v1_8_ras = {
+ .ras_block = {
+ .hw_ops = &mmhub_v1_8_ras_hw_ops,
+ .ras_late_init = mmhub_v1_8_ras_late_init,
+ },
+};
diff --git a/drivers/gpu/drm/amd/amdgpu/mmhub_v1_8.h b/drivers/gpu/drm/amd/amdgpu/mmhub_v1_8.h
new file mode 100644
index 000000000000..126f0075ac50
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/mmhub_v1_8.h
@@ -0,0 +1,29 @@
+/*
+ * Copyright 2022 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+#ifndef __MMHUB_V1_8_H__
+#define __MMHUB_V1_8_H__
+
+extern const struct amdgpu_mmhub_funcs mmhub_v1_8_funcs;
+extern struct amdgpu_mmhub_ras mmhub_v1_8_ras;
+
+#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/mmhub_v2_0.c b/drivers/gpu/drm/amd/amdgpu/mmhub_v2_0.c
index f107385faba2..a0cc8e218ca1 100644
--- a/drivers/gpu/drm/amd/amdgpu/mmhub_v2_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/mmhub_v2_0.c
@@ -29,10 +29,9 @@
#include "mmhub/mmhub_2_0_0_default.h"
#include "navi10_enum.h"
+#include "gc/gc_10_1_0_offset.h"
#include "soc15_common.h"
-#define mmMM_ATC_L2_MISC_CG_Sienna_Cichlid 0x064d
-#define mmMM_ATC_L2_MISC_CG_Sienna_Cichlid_BASE_IDX 0
#define mmDAGB0_CNTL_MISC2_Sienna_Cichlid 0x0070
#define mmDAGB0_CNTL_MISC2_Sienna_Cichlid_BASE_IDX 0
@@ -93,6 +92,30 @@ static const char *mmhub_client_ids_sienna_cichlid[][2] = {
[15][1] = "OSS",
};
+static const char *mmhub_client_ids_beige_goby[][2] = {
+ [3][0] = "DCEDMC",
+ [4][0] = "DCEVGA",
+ [5][0] = "MP0",
+ [6][0] = "MP1",
+ [8][0] = "VMC",
+ [9][0] = "VCNU0",
+ [11][0] = "VCN0",
+ [14][0] = "HDP",
+ [15][0] = "OSS",
+ [0][1] = "DBGU0",
+ [1][1] = "DBGU1",
+ [2][1] = "DCEDWB",
+ [3][1] = "DCEDMC",
+ [4][1] = "DCEVGA",
+ [5][1] = "MP0",
+ [6][1] = "MP1",
+ [7][1] = "XDP",
+ [9][1] = "VCNU0",
+ [11][1] = "VCN0",
+ [14][1] = "HDP",
+ [15][1] = "OSS",
+};
+
static uint32_t mmhub_v2_0_get_invalidate_req(unsigned int vmid,
uint32_t flush_type)
{
@@ -128,17 +151,18 @@ mmhub_v2_0_print_l2_protection_fault_status(struct amdgpu_device *adev,
dev_err(adev->dev,
"MMVM_L2_PROTECTION_FAULT_STATUS:0x%08X\n",
status);
- switch (adev->asic_type) {
- case CHIP_NAVI10:
- case CHIP_NAVI12:
- case CHIP_NAVI14:
+ switch (amdgpu_ip_version(adev, MMHUB_HWIP, 0)) {
+ case IP_VERSION(2, 0, 0):
+ case IP_VERSION(2, 0, 2):
mmhub_cid = mmhub_client_ids_navi1x[cid][rw];
break;
- case CHIP_SIENNA_CICHLID:
- case CHIP_NAVY_FLOUNDER:
- case CHIP_DIMGREY_CAVEFISH:
+ case IP_VERSION(2, 1, 0):
+ case IP_VERSION(2, 1, 1):
mmhub_cid = mmhub_client_ids_sienna_cichlid[cid][rw];
break;
+ case IP_VERSION(2, 1, 2):
+ mmhub_cid = mmhub_client_ids_beige_goby[cid][rw];
+ break;
default:
mmhub_cid = NULL;
break;
@@ -163,13 +187,13 @@ mmhub_v2_0_print_l2_protection_fault_status(struct amdgpu_device *adev,
static void mmhub_v2_0_setup_vm_pt_regs(struct amdgpu_device *adev, uint32_t vmid,
uint64_t page_table_base)
{
- struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_MMHUB_0];
+ struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_MMHUB0(0)];
- WREG32_SOC15_OFFSET(MMHUB, 0, mmMMVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_LO32,
+ WREG32_SOC15_OFFSET_RLC(MMHUB, 0, mmMMVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_LO32,
hub->ctx_addr_distance * vmid,
lower_32_bits(page_table_base));
- WREG32_SOC15_OFFSET(MMHUB, 0, mmMMVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_HI32,
+ WREG32_SOC15_OFFSET_RLC(MMHUB, 0, mmMMVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_HI32,
hub->ctx_addr_distance * vmid,
upper_32_bits(page_table_base));
}
@@ -180,14 +204,14 @@ static void mmhub_v2_0_init_gart_aperture_regs(struct amdgpu_device *adev)
mmhub_v2_0_setup_vm_pt_regs(adev, 0, pt_base);
- WREG32_SOC15(MMHUB, 0, mmMMVM_CONTEXT0_PAGE_TABLE_START_ADDR_LO32,
+ WREG32_SOC15_RLC(MMHUB, 0, mmMMVM_CONTEXT0_PAGE_TABLE_START_ADDR_LO32,
(u32)(adev->gmc.gart_start >> 12));
- WREG32_SOC15(MMHUB, 0, mmMMVM_CONTEXT0_PAGE_TABLE_START_ADDR_HI32,
+ WREG32_SOC15_RLC(MMHUB, 0, mmMMVM_CONTEXT0_PAGE_TABLE_START_ADDR_HI32,
(u32)(adev->gmc.gart_start >> 44));
- WREG32_SOC15(MMHUB, 0, mmMMVM_CONTEXT0_PAGE_TABLE_END_ADDR_LO32,
+ WREG32_SOC15_RLC(MMHUB, 0, mmMMVM_CONTEXT0_PAGE_TABLE_END_ADDR_LO32,
(u32)(adev->gmc.gart_end >> 12));
- WREG32_SOC15(MMHUB, 0, mmMMVM_CONTEXT0_PAGE_TABLE_END_ADDR_HI32,
+ WREG32_SOC15_RLC(MMHUB, 0, mmMMVM_CONTEXT0_PAGE_TABLE_END_ADDR_HI32,
(u32)(adev->gmc.gart_end >> 44));
}
@@ -196,12 +220,12 @@ static void mmhub_v2_0_init_system_aperture_regs(struct amdgpu_device *adev)
uint64_t value;
uint32_t tmp;
- /* Program the AGP BAR */
- WREG32_SOC15(MMHUB, 0, mmMMMC_VM_AGP_BASE, 0);
- WREG32_SOC15(MMHUB, 0, mmMMMC_VM_AGP_BOT, adev->gmc.agp_start >> 24);
- WREG32_SOC15(MMHUB, 0, mmMMMC_VM_AGP_TOP, adev->gmc.agp_end >> 24);
-
if (!amdgpu_sriov_vf(adev)) {
+ /* Program the AGP BAR */
+ WREG32_SOC15_RLC(MMHUB, 0, mmMMMC_VM_AGP_BASE, 0);
+ WREG32_SOC15_RLC(MMHUB, 0, mmMMMC_VM_AGP_BOT, adev->gmc.agp_start >> 24);
+ WREG32_SOC15_RLC(MMHUB, 0, mmMMMC_VM_AGP_TOP, adev->gmc.agp_end >> 24);
+
/* Program the system aperture low logical page number. */
WREG32_SOC15(MMHUB, 0, mmMMMC_VM_SYSTEM_APERTURE_LOW_ADDR,
min(adev->gmc.fb_start, adev->gmc.agp_start) >> 18);
@@ -210,8 +234,7 @@ static void mmhub_v2_0_init_system_aperture_regs(struct amdgpu_device *adev)
}
/* Set default page address. */
- value = adev->vram_scratch.gpu_addr - adev->gmc.vram_start +
- adev->vm_manager.vram_base_offset;
+ value = amdgpu_gmc_vram_mc2pa(adev, adev->mem_scratch.gpu_addr);
WREG32_SOC15(MMHUB, 0, mmMMMC_VM_SYSTEM_APERTURE_DEFAULT_ADDR_LSB,
(u32)(value >> 12));
WREG32_SOC15(MMHUB, 0, mmMMMC_VM_SYSTEM_APERTURE_DEFAULT_ADDR_MSB,
@@ -242,7 +265,6 @@ static void mmhub_v2_0_init_tlb_regs(struct amdgpu_device *adev)
ENABLE_ADVANCED_DRIVER_MODEL, 1);
tmp = REG_SET_FIELD(tmp, MMMC_VM_MX_L1_TLB_CNTL,
SYSTEM_APERTURE_UNMAPPED_ACCESS, 0);
- tmp = REG_SET_FIELD(tmp, MMMC_VM_MX_L1_TLB_CNTL, ECO_BITS, 0);
tmp = REG_SET_FIELD(tmp, MMMC_VM_MX_L1_TLB_CNTL,
MTYPE, MTYPE_UC); /* UC, uncached */
@@ -297,7 +319,7 @@ static void mmhub_v2_0_init_cache_regs(struct amdgpu_device *adev)
tmp = mmMMVM_L2_CNTL5_DEFAULT;
tmp = REG_SET_FIELD(tmp, MMVM_L2_CNTL5, L2_CACHE_SMALLK_FRAGMENT_SIZE, 0);
- WREG32_SOC15(GC, 0, mmMMVM_L2_CNTL5, tmp);
+ WREG32_SOC15(MMHUB, 0, mmMMVM_L2_CNTL5, tmp);
}
static void mmhub_v2_0_enable_system_domain(struct amdgpu_device *adev)
@@ -309,7 +331,7 @@ static void mmhub_v2_0_enable_system_domain(struct amdgpu_device *adev)
tmp = REG_SET_FIELD(tmp, MMVM_CONTEXT0_CNTL, PAGE_TABLE_DEPTH, 0);
tmp = REG_SET_FIELD(tmp, MMVM_CONTEXT0_CNTL,
RETRY_PERMISSION_OR_INVALID_PAGE_FAULT, 0);
- WREG32_SOC15(MMHUB, 0, mmMMVM_CONTEXT0_CNTL, tmp);
+ WREG32_SOC15_RLC(MMHUB, 0, mmMMVM_CONTEXT0_CNTL, tmp);
}
static void mmhub_v2_0_disable_identity_aperture(struct amdgpu_device *adev)
@@ -340,12 +362,12 @@ static void mmhub_v2_0_disable_identity_aperture(struct amdgpu_device *adev)
static void mmhub_v2_0_setup_vmid_config(struct amdgpu_device *adev)
{
- struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_MMHUB_0];
+ struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_MMHUB0(0)];
int i;
uint32_t tmp;
for (i = 0; i <= 14; i++) {
- tmp = RREG32_SOC15_OFFSET(MMHUB, 0, mmMMVM_CONTEXT1_CNTL, i);
+ tmp = RREG32_SOC15_OFFSET(MMHUB, 0, mmMMVM_CONTEXT1_CNTL, i * hub->ctx_distance);
tmp = REG_SET_FIELD(tmp, MMVM_CONTEXT1_CNTL, ENABLE_CONTEXT, 1);
tmp = REG_SET_FIELD(tmp, MMVM_CONTEXT1_CNTL, PAGE_TABLE_DEPTH,
adev->vm_manager.num_level);
@@ -371,30 +393,32 @@ static void mmhub_v2_0_setup_vmid_config(struct amdgpu_device *adev)
tmp = REG_SET_FIELD(tmp, MMVM_CONTEXT1_CNTL,
RETRY_PERMISSION_OR_INVALID_PAGE_FAULT,
!adev->gmc.noretry);
- WREG32_SOC15_OFFSET(MMHUB, 0, mmMMVM_CONTEXT1_CNTL,
+ WREG32_SOC15_OFFSET_RLC(MMHUB, 0, mmMMVM_CONTEXT1_CNTL,
i * hub->ctx_distance, tmp);
- WREG32_SOC15_OFFSET(MMHUB, 0, mmMMVM_CONTEXT1_PAGE_TABLE_START_ADDR_LO32,
+ WREG32_SOC15_OFFSET_RLC(MMHUB, 0, mmMMVM_CONTEXT1_PAGE_TABLE_START_ADDR_LO32,
i * hub->ctx_addr_distance, 0);
- WREG32_SOC15_OFFSET(MMHUB, 0, mmMMVM_CONTEXT1_PAGE_TABLE_START_ADDR_HI32,
+ WREG32_SOC15_OFFSET_RLC(MMHUB, 0, mmMMVM_CONTEXT1_PAGE_TABLE_START_ADDR_HI32,
i * hub->ctx_addr_distance, 0);
- WREG32_SOC15_OFFSET(MMHUB, 0, mmMMVM_CONTEXT1_PAGE_TABLE_END_ADDR_LO32,
+ WREG32_SOC15_OFFSET_RLC(MMHUB, 0, mmMMVM_CONTEXT1_PAGE_TABLE_END_ADDR_LO32,
i * hub->ctx_addr_distance,
lower_32_bits(adev->vm_manager.max_pfn - 1));
- WREG32_SOC15_OFFSET(MMHUB, 0, mmMMVM_CONTEXT1_PAGE_TABLE_END_ADDR_HI32,
+ WREG32_SOC15_OFFSET_RLC(MMHUB, 0, mmMMVM_CONTEXT1_PAGE_TABLE_END_ADDR_HI32,
i * hub->ctx_addr_distance,
upper_32_bits(adev->vm_manager.max_pfn - 1));
}
+
+ hub->vm_cntx_cntl = tmp;
}
static void mmhub_v2_0_program_invalidation(struct amdgpu_device *adev)
{
- struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_MMHUB_0];
+ struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_MMHUB0(0)];
unsigned i;
for (i = 0; i < 18; ++i) {
- WREG32_SOC15_OFFSET(MMHUB, 0, mmMMVM_INVALIDATE_ENG0_ADDR_RANGE_LO32,
+ WREG32_SOC15_OFFSET_RLC(MMHUB, 0, mmMMVM_INVALIDATE_ENG0_ADDR_RANGE_LO32,
i * hub->eng_addr_distance, 0xffffffff);
- WREG32_SOC15_OFFSET(MMHUB, 0, mmMMVM_INVALIDATE_ENG0_ADDR_RANGE_HI32,
+ WREG32_SOC15_OFFSET_RLC(MMHUB, 0, mmMMVM_INVALIDATE_ENG0_ADDR_RANGE_HI32,
i * hub->eng_addr_distance, 0x1f);
}
}
@@ -417,13 +441,13 @@ static int mmhub_v2_0_gart_enable(struct amdgpu_device *adev)
static void mmhub_v2_0_gart_disable(struct amdgpu_device *adev)
{
- struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_MMHUB_0];
+ struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_MMHUB0(0)];
u32 tmp;
u32 i;
/* Disable all tables */
for (i = 0; i < AMDGPU_NUM_VMID; i++)
- WREG32_SOC15_OFFSET(MMHUB, 0, mmMMVM_CONTEXT0_CNTL,
+ WREG32_SOC15_OFFSET_RLC(MMHUB, 0, mmMMVM_CONTEXT0_CNTL,
i * hub->ctx_distance, 0);
/* Setup TLB control */
@@ -496,7 +520,7 @@ static const struct amdgpu_vmhub_funcs mmhub_v2_0_vmhub_funcs = {
static void mmhub_v2_0_init(struct amdgpu_device *adev)
{
- struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_MMHUB_0];
+ struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_MMHUB0(0)];
hub->ctx0_ptb_addr_lo32 =
SOC15_REG_OFFSET(MMHUB, 0,
@@ -541,11 +565,13 @@ static void mmhub_v2_0_update_medium_grain_clock_gating(struct amdgpu_device *ad
{
uint32_t def, data, def1, data1;
- switch (adev->asic_type) {
- case CHIP_SIENNA_CICHLID:
- case CHIP_NAVY_FLOUNDER:
- case CHIP_DIMGREY_CAVEFISH:
- def = data = RREG32_SOC15(MMHUB, 0, mmMM_ATC_L2_MISC_CG_Sienna_Cichlid);
+ if (!(adev->cg_flags & AMD_CG_SUPPORT_MC_MGCG))
+ return;
+
+ switch (amdgpu_ip_version(adev, MMHUB_HWIP, 0)) {
+ case IP_VERSION(2, 1, 0):
+ case IP_VERSION(2, 1, 1):
+ case IP_VERSION(2, 1, 2):
def1 = data1 = RREG32_SOC15(MMHUB, 0, mmDAGB0_CNTL_MISC2_Sienna_Cichlid);
break;
default:
@@ -554,7 +580,7 @@ static void mmhub_v2_0_update_medium_grain_clock_gating(struct amdgpu_device *ad
break;
}
- if (enable && (adev->cg_flags & AMD_CG_SUPPORT_MC_MGCG)) {
+ if (enable) {
data |= MM_ATC_L2_MISC_CG__ENABLE_MASK;
data1 &= ~(DAGB0_CNTL_MISC2__DISABLE_WRREQ_CG_MASK |
@@ -575,12 +601,10 @@ static void mmhub_v2_0_update_medium_grain_clock_gating(struct amdgpu_device *ad
DAGB0_CNTL_MISC2__DISABLE_TLBRD_CG_MASK);
}
- switch (adev->asic_type) {
- case CHIP_SIENNA_CICHLID:
- case CHIP_NAVY_FLOUNDER:
- case CHIP_DIMGREY_CAVEFISH:
- if (def != data)
- WREG32_SOC15(MMHUB, 0, mmMM_ATC_L2_MISC_CG_Sienna_Cichlid, data);
+ switch (amdgpu_ip_version(adev, MMHUB_HWIP, 0)) {
+ case IP_VERSION(2, 1, 0):
+ case IP_VERSION(2, 1, 1):
+ case IP_VERSION(2, 1, 2):
if (def1 != data1)
WREG32_SOC15(MMHUB, 0, mmDAGB0_CNTL_MISC2_Sienna_Cichlid, data1);
break;
@@ -598,34 +622,27 @@ static void mmhub_v2_0_update_medium_grain_light_sleep(struct amdgpu_device *ade
{
uint32_t def, data;
- switch (adev->asic_type) {
- case CHIP_SIENNA_CICHLID:
- case CHIP_NAVY_FLOUNDER:
- case CHIP_DIMGREY_CAVEFISH:
- def = data = RREG32_SOC15(MMHUB, 0, mmMM_ATC_L2_MISC_CG_Sienna_Cichlid);
- break;
+ if (!(adev->cg_flags & AMD_CG_SUPPORT_MC_LS))
+ return;
+
+ switch (amdgpu_ip_version(adev, MMHUB_HWIP, 0)) {
+ case IP_VERSION(2, 1, 0):
+ case IP_VERSION(2, 1, 1):
+ case IP_VERSION(2, 1, 2):
+ /* There is no ATCL2 in MMHUB for 2.1.x */
+ return;
default:
def = data = RREG32_SOC15(MMHUB, 0, mmMM_ATC_L2_MISC_CG);
break;
}
- if (enable && (adev->cg_flags & AMD_CG_SUPPORT_MC_LS))
+ if (enable)
data |= MM_ATC_L2_MISC_CG__MEM_LS_ENABLE_MASK;
else
data &= ~MM_ATC_L2_MISC_CG__MEM_LS_ENABLE_MASK;
- if (def != data) {
- switch (adev->asic_type) {
- case CHIP_SIENNA_CICHLID:
- case CHIP_NAVY_FLOUNDER:
- case CHIP_DIMGREY_CAVEFISH:
- WREG32_SOC15(MMHUB, 0, mmMM_ATC_L2_MISC_CG_Sienna_Cichlid, data);
- break;
- default:
- WREG32_SOC15(MMHUB, 0, mmMM_ATC_L2_MISC_CG, data);
- break;
- }
- }
+ if (def != data)
+ WREG32_SOC15(MMHUB, 0, mmMM_ATC_L2_MISC_CG, data);
}
static int mmhub_v2_0_set_clockgating(struct amdgpu_device *adev,
@@ -634,13 +651,12 @@ static int mmhub_v2_0_set_clockgating(struct amdgpu_device *adev,
if (amdgpu_sriov_vf(adev))
return 0;
- switch (adev->asic_type) {
- case CHIP_NAVI10:
- case CHIP_NAVI14:
- case CHIP_NAVI12:
- case CHIP_SIENNA_CICHLID:
- case CHIP_NAVY_FLOUNDER:
- case CHIP_DIMGREY_CAVEFISH:
+ switch (amdgpu_ip_version(adev, MMHUB_HWIP, 0)) {
+ case IP_VERSION(2, 0, 0):
+ case IP_VERSION(2, 0, 2):
+ case IP_VERSION(2, 1, 0):
+ case IP_VERSION(2, 1, 1):
+ case IP_VERSION(2, 1, 2):
mmhub_v2_0_update_medium_grain_clock_gating(adev,
state == AMD_CG_STATE_GATE);
mmhub_v2_0_update_medium_grain_light_sleep(adev,
@@ -653,18 +669,21 @@ static int mmhub_v2_0_set_clockgating(struct amdgpu_device *adev,
return 0;
}
-static void mmhub_v2_0_get_clockgating(struct amdgpu_device *adev, u32 *flags)
+static void mmhub_v2_0_get_clockgating(struct amdgpu_device *adev, u64 *flags)
{
- int data, data1;
+ u32 data, data1;
if (amdgpu_sriov_vf(adev))
*flags = 0;
- switch (adev->asic_type) {
- case CHIP_SIENNA_CICHLID:
- case CHIP_NAVY_FLOUNDER:
- case CHIP_DIMGREY_CAVEFISH:
- data = RREG32_SOC15(MMHUB, 0, mmMM_ATC_L2_MISC_CG_Sienna_Cichlid);
+ switch (amdgpu_ip_version(adev, MMHUB_HWIP, 0)) {
+ case IP_VERSION(2, 1, 0):
+ case IP_VERSION(2, 1, 1):
+ case IP_VERSION(2, 1, 2):
+ /* There is no ATCL2 in MMHUB for 2.1.x. Keep the status
+ * based on DAGB
+ */
+ data = MM_ATC_L2_MISC_CG__ENABLE_MASK;
data1 = RREG32_SOC15(MMHUB, 0, mmDAGB0_CNTL_MISC2_Sienna_Cichlid);
break;
default:
@@ -689,7 +708,6 @@ static void mmhub_v2_0_get_clockgating(struct amdgpu_device *adev, u32 *flags)
}
const struct amdgpu_mmhub_funcs mmhub_v2_0_funcs = {
- .ras_late_init = amdgpu_mmhub_ras_late_init,
.init = mmhub_v2_0_init,
.gart_enable = mmhub_v2_0_gart_enable,
.set_fault_enable_default = mmhub_v2_0_set_fault_enable_default,
diff --git a/drivers/gpu/drm/amd/amdgpu/mmhub_v2_3.c b/drivers/gpu/drm/amd/amdgpu/mmhub_v2_3.c
index ab9be5ad5a5f..5eb8122e2746 100644
--- a/drivers/gpu/drm/amd/amdgpu/mmhub_v2_3.c
+++ b/drivers/gpu/drm/amd/amdgpu/mmhub_v2_3.c
@@ -90,8 +90,10 @@ mmhub_v2_3_print_l2_protection_fault_status(struct amdgpu_device *adev,
dev_err(adev->dev,
"MMVM_L2_PROTECTION_FAULT_STATUS:0x%08X\n",
status);
- switch (adev->asic_type) {
- case CHIP_VANGOGH:
+ switch (amdgpu_ip_version(adev, MMHUB_HWIP, 0)) {
+ case IP_VERSION(2, 3, 0):
+ case IP_VERSION(2, 4, 0):
+ case IP_VERSION(2, 4, 1):
mmhub_cid = mmhub_client_ids_vangogh[cid][rw];
break;
default:
@@ -119,7 +121,7 @@ static void mmhub_v2_3_setup_vm_pt_regs(struct amdgpu_device *adev,
uint32_t vmid,
uint64_t page_table_base)
{
- struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_MMHUB_0];
+ struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_MMHUB0(0)];
WREG32_SOC15_OFFSET(MMHUB, 0, mmMMVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_LO32,
hub->ctx_addr_distance * vmid, lower_32_bits(page_table_base));
@@ -162,8 +164,7 @@ static void mmhub_v2_3_init_system_aperture_regs(struct amdgpu_device *adev)
max(adev->gmc.fb_end, adev->gmc.agp_end) >> 18);
/* Set default page address. */
- value = adev->vram_scratch.gpu_addr - adev->gmc.vram_start +
- adev->vm_manager.vram_base_offset;
+ value = amdgpu_gmc_vram_mc2pa(adev, adev->mem_scratch.gpu_addr);
WREG32_SOC15(MMHUB, 0, mmMMMC_VM_SYSTEM_APERTURE_DEFAULT_ADDR_LSB,
(u32)(value >> 12));
WREG32_SOC15(MMHUB, 0, mmMMMC_VM_SYSTEM_APERTURE_DEFAULT_ADDR_MSB,
@@ -194,7 +195,6 @@ static void mmhub_v2_3_init_tlb_regs(struct amdgpu_device *adev)
ENABLE_ADVANCED_DRIVER_MODEL, 1);
tmp = REG_SET_FIELD(tmp, MMMC_VM_MX_L1_TLB_CNTL,
SYSTEM_APERTURE_UNMAPPED_ACCESS, 0);
- tmp = REG_SET_FIELD(tmp, MMMC_VM_MX_L1_TLB_CNTL, ECO_BITS, 0);
tmp = REG_SET_FIELD(tmp, MMMC_VM_MX_L1_TLB_CNTL,
MTYPE, MTYPE_UC); /* UC, uncached */
@@ -243,7 +243,7 @@ static void mmhub_v2_3_init_cache_regs(struct amdgpu_device *adev)
tmp = mmMMVM_L2_CNTL5_DEFAULT;
tmp = REG_SET_FIELD(tmp, MMVM_L2_CNTL5, L2_CACHE_SMALLK_FRAGMENT_SIZE, 0);
- WREG32_SOC15(GC, 0, mmMMVM_L2_CNTL5, tmp);
+ WREG32_SOC15(MMHUB, 0, mmMMVM_L2_CNTL5, tmp);
}
static void mmhub_v2_3_enable_system_domain(struct amdgpu_device *adev)
@@ -280,12 +280,12 @@ static void mmhub_v2_3_disable_identity_aperture(struct amdgpu_device *adev)
static void mmhub_v2_3_setup_vmid_config(struct amdgpu_device *adev)
{
- struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_MMHUB_0];
+ struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_MMHUB0(0)];
int i;
uint32_t tmp;
for (i = 0; i <= 14; i++) {
- tmp = RREG32_SOC15_OFFSET(MMHUB, 0, mmMMVM_CONTEXT1_CNTL, i);
+ tmp = RREG32_SOC15_OFFSET(MMHUB, 0, mmMMVM_CONTEXT1_CNTL, i * hub->ctx_distance);
tmp = REG_SET_FIELD(tmp, MMVM_CONTEXT1_CNTL, ENABLE_CONTEXT, 1);
tmp = REG_SET_FIELD(tmp, MMVM_CONTEXT1_CNTL, PAGE_TABLE_DEPTH,
adev->vm_manager.num_level);
@@ -324,12 +324,14 @@ static void mmhub_v2_3_setup_vmid_config(struct amdgpu_device *adev)
i * hub->ctx_addr_distance,
upper_32_bits(adev->vm_manager.max_pfn - 1));
}
+
+ hub->vm_cntx_cntl = tmp;
}
static void mmhub_v2_3_program_invalidation(struct amdgpu_device *adev)
{
- struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_MMHUB_0];
- unsigned i;
+ struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_MMHUB0(0)];
+ unsigned int i;
for (i = 0; i < 18; ++i) {
WREG32_SOC15_OFFSET(MMHUB, 0,
@@ -371,7 +373,7 @@ static int mmhub_v2_3_gart_enable(struct amdgpu_device *adev)
static void mmhub_v2_3_gart_disable(struct amdgpu_device *adev)
{
- struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_MMHUB_0];
+ struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_MMHUB0(0)];
u32 tmp;
u32 i;
@@ -404,6 +406,7 @@ static void mmhub_v2_3_set_fault_enable_default(struct amdgpu_device *adev,
bool value)
{
u32 tmp;
+
tmp = RREG32_SOC15(MMHUB, 0, mmMMVM_L2_PROTECTION_FAULT_CNTL);
tmp = REG_SET_FIELD(tmp, MMVM_L2_PROTECTION_FAULT_CNTL,
RANGE_PROTECTION_FAULT_ENABLE_DEFAULT, value);
@@ -444,7 +447,7 @@ static const struct amdgpu_vmhub_funcs mmhub_v2_3_vmhub_funcs = {
static void mmhub_v2_3_init(struct amdgpu_device *adev)
{
- struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_MMHUB_0];
+ struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_MMHUB0(0)];
hub->ctx0_ptb_addr_lo32 =
SOC15_REG_OFFSET(MMHUB, 0,
@@ -497,11 +500,11 @@ mmhub_v2_3_update_medium_grain_clock_gating(struct amdgpu_device *adev,
if (enable && (adev->cg_flags & AMD_CG_SUPPORT_MC_MGCG)) {
data &= ~MM_ATC_L2_CGTT_CLK_CTRL__SOFT_OVERRIDE_MASK;
data1 &= ~(DAGB0_CNTL_MISC2__DISABLE_WRREQ_CG_MASK |
- DAGB0_CNTL_MISC2__DISABLE_WRRET_CG_MASK |
- DAGB0_CNTL_MISC2__DISABLE_RDREQ_CG_MASK |
- DAGB0_CNTL_MISC2__DISABLE_RDRET_CG_MASK |
- DAGB0_CNTL_MISC2__DISABLE_TLBWR_CG_MASK |
- DAGB0_CNTL_MISC2__DISABLE_TLBRD_CG_MASK);
+ DAGB0_CNTL_MISC2__DISABLE_WRRET_CG_MASK |
+ DAGB0_CNTL_MISC2__DISABLE_RDREQ_CG_MASK |
+ DAGB0_CNTL_MISC2__DISABLE_RDRET_CG_MASK |
+ DAGB0_CNTL_MISC2__DISABLE_TLBWR_CG_MASK |
+ DAGB0_CNTL_MISC2__DISABLE_TLBRD_CG_MASK);
} else {
data |= MM_ATC_L2_CGTT_CLK_CTRL__SOFT_OVERRIDE_MASK;
@@ -570,14 +573,14 @@ static int mmhub_v2_3_set_clockgating(struct amdgpu_device *adev,
return 0;
mmhub_v2_3_update_medium_grain_clock_gating(adev,
- state == AMD_CG_STATE_GATE ? true : false);
+ state == AMD_CG_STATE_GATE);
mmhub_v2_3_update_medium_grain_light_sleep(adev,
- state == AMD_CG_STATE_GATE ? true : false);
+ state == AMD_CG_STATE_GATE);
return 0;
}
-static void mmhub_v2_3_get_clockgating(struct amdgpu_device *adev, u32 *flags)
+static void mmhub_v2_3_get_clockgating(struct amdgpu_device *adev, u64 *flags)
{
int data, data1, data2, data3;
@@ -591,13 +594,13 @@ static void mmhub_v2_3_get_clockgating(struct amdgpu_device *adev, u32 *flags)
/* AMD_CG_SUPPORT_MC_MGCG */
if (!(data & (DAGB0_CNTL_MISC2__DISABLE_WRREQ_CG_MASK |
- DAGB0_CNTL_MISC2__DISABLE_WRRET_CG_MASK |
- DAGB0_CNTL_MISC2__DISABLE_RDREQ_CG_MASK |
- DAGB0_CNTL_MISC2__DISABLE_RDRET_CG_MASK |
- DAGB0_CNTL_MISC2__DISABLE_TLBWR_CG_MASK |
- DAGB0_CNTL_MISC2__DISABLE_TLBRD_CG_MASK))
+ DAGB0_CNTL_MISC2__DISABLE_WRRET_CG_MASK |
+ DAGB0_CNTL_MISC2__DISABLE_RDREQ_CG_MASK |
+ DAGB0_CNTL_MISC2__DISABLE_RDRET_CG_MASK |
+ DAGB0_CNTL_MISC2__DISABLE_TLBWR_CG_MASK |
+ DAGB0_CNTL_MISC2__DISABLE_TLBRD_CG_MASK))
&& !(data1 & MM_ATC_L2_CGTT_CLK_CTRL__SOFT_OVERRIDE_MASK)) {
- *flags |= AMD_CG_SUPPORT_MC_MGCG;
+ *flags |= AMD_CG_SUPPORT_MC_MGCG;
}
/* AMD_CG_SUPPORT_MC_LS */
@@ -616,7 +619,6 @@ static void mmhub_v2_3_get_clockgating(struct amdgpu_device *adev, u32 *flags)
}
const struct amdgpu_mmhub_funcs mmhub_v2_3_funcs = {
- .ras_late_init = amdgpu_mmhub_ras_late_init,
.init = mmhub_v2_3_init,
.gart_enable = mmhub_v2_3_gart_enable,
.set_fault_enable_default = mmhub_v2_3_set_fault_enable_default,
diff --git a/drivers/gpu/drm/amd/amdgpu/mmhub_v3_0.c b/drivers/gpu/drm/amd/amdgpu/mmhub_v3_0.c
new file mode 100644
index 000000000000..7d5242df58a5
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/mmhub_v3_0.c
@@ -0,0 +1,664 @@
+/*
+ * Copyright 2021 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#include "amdgpu.h"
+#include "mmhub_v3_0.h"
+
+#include "mmhub/mmhub_3_0_0_offset.h"
+#include "mmhub/mmhub_3_0_0_sh_mask.h"
+#include "navi10_enum.h"
+
+#include "soc15_common.h"
+
+#define regMMVM_L2_CNTL3_DEFAULT 0x80100007
+#define regMMVM_L2_CNTL4_DEFAULT 0x000000c1
+#define regMMVM_L2_CNTL5_DEFAULT 0x00003fe0
+
+static const char *mmhub_client_ids_v3_0_0[][2] = {
+ [0][0] = "VMC",
+ [4][0] = "DCEDMC",
+ [5][0] = "DCEVGA",
+ [6][0] = "MP0",
+ [7][0] = "MP1",
+ [8][0] = "MPIO",
+ [16][0] = "HDP",
+ [17][0] = "LSDMA",
+ [18][0] = "JPEG",
+ [19][0] = "VCNU0",
+ [21][0] = "VSCH",
+ [22][0] = "VCNU1",
+ [23][0] = "VCN1",
+ [32+20][0] = "VCN0",
+ [2][1] = "DBGUNBIO",
+ [3][1] = "DCEDWB",
+ [4][1] = "DCEDMC",
+ [5][1] = "DCEVGA",
+ [6][1] = "MP0",
+ [7][1] = "MP1",
+ [8][1] = "MPIO",
+ [10][1] = "DBGU0",
+ [11][1] = "DBGU1",
+ [12][1] = "DBGU2",
+ [13][1] = "DBGU3",
+ [14][1] = "XDP",
+ [15][1] = "OSSSYS",
+ [16][1] = "HDP",
+ [17][1] = "LSDMA",
+ [18][1] = "JPEG",
+ [19][1] = "VCNU0",
+ [20][1] = "VCN0",
+ [21][1] = "VSCH",
+ [22][1] = "VCNU1",
+ [23][1] = "VCN1",
+};
+
+static uint32_t mmhub_v3_0_get_invalidate_req(unsigned int vmid,
+ uint32_t flush_type)
+{
+ u32 req = 0;
+
+ /* invalidate using legacy mode on vmid*/
+ req = REG_SET_FIELD(req, MMVM_INVALIDATE_ENG0_REQ,
+ PER_VMID_INVALIDATE_REQ, 1 << vmid);
+ req = REG_SET_FIELD(req, MMVM_INVALIDATE_ENG0_REQ, FLUSH_TYPE, flush_type);
+ req = REG_SET_FIELD(req, MMVM_INVALIDATE_ENG0_REQ, INVALIDATE_L2_PTES, 1);
+ req = REG_SET_FIELD(req, MMVM_INVALIDATE_ENG0_REQ, INVALIDATE_L2_PDE0, 1);
+ req = REG_SET_FIELD(req, MMVM_INVALIDATE_ENG0_REQ, INVALIDATE_L2_PDE1, 1);
+ req = REG_SET_FIELD(req, MMVM_INVALIDATE_ENG0_REQ, INVALIDATE_L2_PDE2, 1);
+ req = REG_SET_FIELD(req, MMVM_INVALIDATE_ENG0_REQ, INVALIDATE_L1_PTES, 1);
+ req = REG_SET_FIELD(req, MMVM_INVALIDATE_ENG0_REQ,
+ CLEAR_PROTECTION_FAULT_STATUS_ADDR, 0);
+
+ return req;
+}
+
+static void
+mmhub_v3_0_print_l2_protection_fault_status(struct amdgpu_device *adev,
+ uint32_t status)
+{
+ uint32_t cid, rw;
+ const char *mmhub_cid = NULL;
+
+ cid = REG_GET_FIELD(status,
+ MMVM_L2_PROTECTION_FAULT_STATUS, CID);
+ rw = REG_GET_FIELD(status,
+ MMVM_L2_PROTECTION_FAULT_STATUS, RW);
+
+ dev_err(adev->dev,
+ "MMVM_L2_PROTECTION_FAULT_STATUS:0x%08X\n",
+ status);
+ switch (amdgpu_ip_version(adev, MMHUB_HWIP, 0)) {
+ case IP_VERSION(3, 0, 0):
+ case IP_VERSION(3, 0, 1):
+ mmhub_cid = mmhub_client_ids_v3_0_0[cid][rw];
+ break;
+ default:
+ mmhub_cid = NULL;
+ break;
+ }
+ dev_err(adev->dev, "\t Faulty UTCL2 client ID: %s (0x%x)\n",
+ mmhub_cid ? mmhub_cid : "unknown", cid);
+ dev_err(adev->dev, "\t MORE_FAULTS: 0x%lx\n",
+ REG_GET_FIELD(status,
+ MMVM_L2_PROTECTION_FAULT_STATUS, MORE_FAULTS));
+ dev_err(adev->dev, "\t WALKER_ERROR: 0x%lx\n",
+ REG_GET_FIELD(status,
+ MMVM_L2_PROTECTION_FAULT_STATUS, WALKER_ERROR));
+ dev_err(adev->dev, "\t PERMISSION_FAULTS: 0x%lx\n",
+ REG_GET_FIELD(status,
+ MMVM_L2_PROTECTION_FAULT_STATUS, PERMISSION_FAULTS));
+ dev_err(adev->dev, "\t MAPPING_ERROR: 0x%lx\n",
+ REG_GET_FIELD(status,
+ MMVM_L2_PROTECTION_FAULT_STATUS, MAPPING_ERROR));
+ dev_err(adev->dev, "\t RW: 0x%x\n", rw);
+}
+
+static void mmhub_v3_0_setup_vm_pt_regs(struct amdgpu_device *adev, uint32_t vmid,
+ uint64_t page_table_base)
+{
+ struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_MMHUB0(0)];
+
+ WREG32_SOC15_OFFSET(MMHUB, 0, regMMVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_LO32,
+ hub->ctx_addr_distance * vmid,
+ lower_32_bits(page_table_base));
+
+ WREG32_SOC15_OFFSET(MMHUB, 0, regMMVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_HI32,
+ hub->ctx_addr_distance * vmid,
+ upper_32_bits(page_table_base));
+}
+
+static void mmhub_v3_0_init_gart_aperture_regs(struct amdgpu_device *adev)
+{
+ uint64_t pt_base = amdgpu_gmc_pd_addr(adev->gart.bo);
+
+ mmhub_v3_0_setup_vm_pt_regs(adev, 0, pt_base);
+
+ WREG32_SOC15(MMHUB, 0, regMMVM_CONTEXT0_PAGE_TABLE_START_ADDR_LO32,
+ (u32)(adev->gmc.gart_start >> 12));
+ WREG32_SOC15(MMHUB, 0, regMMVM_CONTEXT0_PAGE_TABLE_START_ADDR_HI32,
+ (u32)(adev->gmc.gart_start >> 44));
+
+ WREG32_SOC15(MMHUB, 0, regMMVM_CONTEXT0_PAGE_TABLE_END_ADDR_LO32,
+ (u32)(adev->gmc.gart_end >> 12));
+ WREG32_SOC15(MMHUB, 0, regMMVM_CONTEXT0_PAGE_TABLE_END_ADDR_HI32,
+ (u32)(adev->gmc.gart_end >> 44));
+}
+
+static void mmhub_v3_0_init_system_aperture_regs(struct amdgpu_device *adev)
+{
+ uint64_t value;
+ uint32_t tmp;
+
+ if (amdgpu_sriov_vf(adev))
+ return;
+
+ /*
+ * the new L1 policy will block SRIOV guest from writing
+ * these regs, and they will be programed at host.
+ * so skip programing these regs.
+ */
+ /* Program the AGP BAR */
+ WREG32_SOC15(MMHUB, 0, regMMMC_VM_AGP_BASE, 0);
+ WREG32_SOC15(MMHUB, 0, regMMMC_VM_AGP_BOT, adev->gmc.agp_start >> 24);
+ WREG32_SOC15(MMHUB, 0, regMMMC_VM_AGP_TOP, adev->gmc.agp_end >> 24);
+
+ /* Program the system aperture low logical page number. */
+ WREG32_SOC15(MMHUB, 0, regMMMC_VM_SYSTEM_APERTURE_LOW_ADDR,
+ min(adev->gmc.fb_start, adev->gmc.agp_start) >> 18);
+ WREG32_SOC15(MMHUB, 0, regMMMC_VM_SYSTEM_APERTURE_HIGH_ADDR,
+ max(adev->gmc.fb_end, adev->gmc.agp_end) >> 18);
+
+ /* Set default page address. */
+ value = amdgpu_gmc_vram_mc2pa(adev, adev->mem_scratch.gpu_addr);
+ WREG32_SOC15(MMHUB, 0, regMMMC_VM_SYSTEM_APERTURE_DEFAULT_ADDR_LSB,
+ (u32)(value >> 12));
+ WREG32_SOC15(MMHUB, 0, regMMMC_VM_SYSTEM_APERTURE_DEFAULT_ADDR_MSB,
+ (u32)(value >> 44));
+
+ /* Program "protection fault". */
+ WREG32_SOC15(MMHUB, 0, regMMVM_L2_PROTECTION_FAULT_DEFAULT_ADDR_LO32,
+ (u32)(adev->dummy_page_addr >> 12));
+ WREG32_SOC15(MMHUB, 0, regMMVM_L2_PROTECTION_FAULT_DEFAULT_ADDR_HI32,
+ (u32)((u64)adev->dummy_page_addr >> 44));
+
+ tmp = RREG32_SOC15(MMHUB, 0, regMMVM_L2_PROTECTION_FAULT_CNTL2);
+ tmp = REG_SET_FIELD(tmp, MMVM_L2_PROTECTION_FAULT_CNTL2,
+ ACTIVE_PAGE_MIGRATION_PTE_READ_RETRY, 1);
+ WREG32_SOC15(MMHUB, 0, regMMVM_L2_PROTECTION_FAULT_CNTL2, tmp);
+}
+
+static void mmhub_v3_0_init_tlb_regs(struct amdgpu_device *adev)
+{
+ uint32_t tmp;
+
+ /* Setup TLB control */
+ tmp = RREG32_SOC15(MMHUB, 0, regMMMC_VM_MX_L1_TLB_CNTL);
+
+ tmp = REG_SET_FIELD(tmp, MMMC_VM_MX_L1_TLB_CNTL, ENABLE_L1_TLB, 1);
+ tmp = REG_SET_FIELD(tmp, MMMC_VM_MX_L1_TLB_CNTL, SYSTEM_ACCESS_MODE, 3);
+ tmp = REG_SET_FIELD(tmp, MMMC_VM_MX_L1_TLB_CNTL,
+ ENABLE_ADVANCED_DRIVER_MODEL, 1);
+ tmp = REG_SET_FIELD(tmp, MMMC_VM_MX_L1_TLB_CNTL,
+ SYSTEM_APERTURE_UNMAPPED_ACCESS, 0);
+ tmp = REG_SET_FIELD(tmp, MMMC_VM_MX_L1_TLB_CNTL, ECO_BITS, 0);
+ tmp = REG_SET_FIELD(tmp, MMMC_VM_MX_L1_TLB_CNTL,
+ MTYPE, MTYPE_UC); /* UC, uncached */
+
+ WREG32_SOC15(MMHUB, 0, regMMMC_VM_MX_L1_TLB_CNTL, tmp);
+}
+
+static void mmhub_v3_0_init_cache_regs(struct amdgpu_device *adev)
+{
+ uint32_t tmp;
+
+ /* These registers are not accessible to VF-SRIOV.
+ * The PF will program them instead.
+ */
+ if (amdgpu_sriov_vf(adev))
+ return;
+
+ /* Setup L2 cache */
+ tmp = RREG32_SOC15(MMHUB, 0, regMMVM_L2_CNTL);
+ tmp = REG_SET_FIELD(tmp, MMVM_L2_CNTL, ENABLE_L2_CACHE, 1);
+ tmp = REG_SET_FIELD(tmp, MMVM_L2_CNTL, ENABLE_L2_FRAGMENT_PROCESSING, 0);
+ tmp = REG_SET_FIELD(tmp, MMVM_L2_CNTL,
+ ENABLE_DEFAULT_PAGE_OUT_TO_SYSTEM_MEMORY, 1);
+ /* XXX for emulation, Refer to closed source code.*/
+ tmp = REG_SET_FIELD(tmp, MMVM_L2_CNTL, L2_PDE0_CACHE_TAG_GENERATION_MODE,
+ 0);
+ tmp = REG_SET_FIELD(tmp, MMVM_L2_CNTL, PDE_FAULT_CLASSIFICATION, 0);
+ tmp = REG_SET_FIELD(tmp, MMVM_L2_CNTL, CONTEXT1_IDENTITY_ACCESS_MODE, 1);
+ tmp = REG_SET_FIELD(tmp, MMVM_L2_CNTL, IDENTITY_MODE_FRAGMENT_SIZE, 0);
+ WREG32_SOC15(MMHUB, 0, regMMVM_L2_CNTL, tmp);
+
+ tmp = RREG32_SOC15(MMHUB, 0, regMMVM_L2_CNTL2);
+ tmp = REG_SET_FIELD(tmp, MMVM_L2_CNTL2, INVALIDATE_ALL_L1_TLBS, 1);
+ tmp = REG_SET_FIELD(tmp, MMVM_L2_CNTL2, INVALIDATE_L2_CACHE, 1);
+ WREG32_SOC15(MMHUB, 0, regMMVM_L2_CNTL2, tmp);
+
+ tmp = regMMVM_L2_CNTL3_DEFAULT;
+ if (adev->gmc.translate_further) {
+ tmp = REG_SET_FIELD(tmp, MMVM_L2_CNTL3, BANK_SELECT, 12);
+ tmp = REG_SET_FIELD(tmp, MMVM_L2_CNTL3,
+ L2_CACHE_BIGK_FRAGMENT_SIZE, 9);
+ } else {
+ tmp = REG_SET_FIELD(tmp, MMVM_L2_CNTL3, BANK_SELECT, 9);
+ tmp = REG_SET_FIELD(tmp, MMVM_L2_CNTL3,
+ L2_CACHE_BIGK_FRAGMENT_SIZE, 6);
+ }
+ WREG32_SOC15(MMHUB, 0, regMMVM_L2_CNTL3, tmp);
+
+ tmp = regMMVM_L2_CNTL4_DEFAULT;
+ tmp = REG_SET_FIELD(tmp, MMVM_L2_CNTL4, VMC_TAP_PDE_REQUEST_PHYSICAL, 0);
+ tmp = REG_SET_FIELD(tmp, MMVM_L2_CNTL4, VMC_TAP_PTE_REQUEST_PHYSICAL, 0);
+ WREG32_SOC15(MMHUB, 0, regMMVM_L2_CNTL4, tmp);
+
+ tmp = regMMVM_L2_CNTL5_DEFAULT;
+ tmp = REG_SET_FIELD(tmp, MMVM_L2_CNTL5, L2_CACHE_SMALLK_FRAGMENT_SIZE, 0);
+ WREG32_SOC15(MMHUB, 0, regMMVM_L2_CNTL5, tmp);
+}
+
+static void mmhub_v3_0_enable_system_domain(struct amdgpu_device *adev)
+{
+ uint32_t tmp;
+
+ tmp = RREG32_SOC15(MMHUB, 0, regMMVM_CONTEXT0_CNTL);
+ tmp = REG_SET_FIELD(tmp, MMVM_CONTEXT0_CNTL, ENABLE_CONTEXT, 1);
+ tmp = REG_SET_FIELD(tmp, MMVM_CONTEXT0_CNTL, PAGE_TABLE_DEPTH, 0);
+ tmp = REG_SET_FIELD(tmp, MMVM_CONTEXT0_CNTL,
+ RETRY_PERMISSION_OR_INVALID_PAGE_FAULT, 0);
+ WREG32_SOC15(MMHUB, 0, regMMVM_CONTEXT0_CNTL, tmp);
+}
+
+static void mmhub_v3_0_disable_identity_aperture(struct amdgpu_device *adev)
+{
+ /* These registers are not accessible to VF-SRIOV.
+ * The PF will program them instead.
+ */
+ if (amdgpu_sriov_vf(adev))
+ return;
+
+ WREG32_SOC15(MMHUB, 0,
+ regMMVM_L2_CONTEXT1_IDENTITY_APERTURE_LOW_ADDR_LO32,
+ 0xFFFFFFFF);
+ WREG32_SOC15(MMHUB, 0,
+ regMMVM_L2_CONTEXT1_IDENTITY_APERTURE_LOW_ADDR_HI32,
+ 0x0000000F);
+
+ WREG32_SOC15(MMHUB, 0,
+ regMMVM_L2_CONTEXT1_IDENTITY_APERTURE_HIGH_ADDR_LO32, 0);
+ WREG32_SOC15(MMHUB, 0,
+ regMMVM_L2_CONTEXT1_IDENTITY_APERTURE_HIGH_ADDR_HI32, 0);
+
+ WREG32_SOC15(MMHUB, 0, regMMVM_L2_CONTEXT_IDENTITY_PHYSICAL_OFFSET_LO32,
+ 0);
+ WREG32_SOC15(MMHUB, 0, regMMVM_L2_CONTEXT_IDENTITY_PHYSICAL_OFFSET_HI32,
+ 0);
+}
+
+static void mmhub_v3_0_setup_vmid_config(struct amdgpu_device *adev)
+{
+ struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_MMHUB0(0)];
+ int i;
+ uint32_t tmp;
+
+ for (i = 0; i <= 14; i++) {
+ tmp = RREG32_SOC15_OFFSET(MMHUB, 0, regMMVM_CONTEXT1_CNTL, i * hub->ctx_distance);
+ tmp = REG_SET_FIELD(tmp, MMVM_CONTEXT1_CNTL, ENABLE_CONTEXT, 1);
+ tmp = REG_SET_FIELD(tmp, MMVM_CONTEXT1_CNTL, PAGE_TABLE_DEPTH,
+ adev->vm_manager.num_level);
+ tmp = REG_SET_FIELD(tmp, MMVM_CONTEXT1_CNTL,
+ RANGE_PROTECTION_FAULT_ENABLE_DEFAULT, 1);
+ tmp = REG_SET_FIELD(tmp, MMVM_CONTEXT1_CNTL,
+ DUMMY_PAGE_PROTECTION_FAULT_ENABLE_DEFAULT,
+ 1);
+ tmp = REG_SET_FIELD(tmp, MMVM_CONTEXT1_CNTL,
+ PDE0_PROTECTION_FAULT_ENABLE_DEFAULT, 1);
+ tmp = REG_SET_FIELD(tmp, MMVM_CONTEXT1_CNTL,
+ VALID_PROTECTION_FAULT_ENABLE_DEFAULT, 1);
+ tmp = REG_SET_FIELD(tmp, MMVM_CONTEXT1_CNTL,
+ READ_PROTECTION_FAULT_ENABLE_DEFAULT, 1);
+ tmp = REG_SET_FIELD(tmp, MMVM_CONTEXT1_CNTL,
+ WRITE_PROTECTION_FAULT_ENABLE_DEFAULT, 1);
+ tmp = REG_SET_FIELD(tmp, MMVM_CONTEXT1_CNTL,
+ EXECUTE_PROTECTION_FAULT_ENABLE_DEFAULT, 1);
+ tmp = REG_SET_FIELD(tmp, MMVM_CONTEXT1_CNTL,
+ PAGE_TABLE_BLOCK_SIZE,
+ adev->vm_manager.block_size - 9);
+ /* Send no-retry XNACK on fault to suppress VM fault storm. */
+ tmp = REG_SET_FIELD(tmp, MMVM_CONTEXT1_CNTL,
+ RETRY_PERMISSION_OR_INVALID_PAGE_FAULT,
+ !amdgpu_noretry);
+ WREG32_SOC15_OFFSET(MMHUB, 0, regMMVM_CONTEXT1_CNTL,
+ i * hub->ctx_distance, tmp);
+ WREG32_SOC15_OFFSET(MMHUB, 0, regMMVM_CONTEXT1_PAGE_TABLE_START_ADDR_LO32,
+ i * hub->ctx_addr_distance, 0);
+ WREG32_SOC15_OFFSET(MMHUB, 0, regMMVM_CONTEXT1_PAGE_TABLE_START_ADDR_HI32,
+ i * hub->ctx_addr_distance, 0);
+ WREG32_SOC15_OFFSET(MMHUB, 0, regMMVM_CONTEXT1_PAGE_TABLE_END_ADDR_LO32,
+ i * hub->ctx_addr_distance,
+ lower_32_bits(adev->vm_manager.max_pfn - 1));
+ WREG32_SOC15_OFFSET(MMHUB, 0, regMMVM_CONTEXT1_PAGE_TABLE_END_ADDR_HI32,
+ i * hub->ctx_addr_distance,
+ upper_32_bits(adev->vm_manager.max_pfn - 1));
+ }
+
+ hub->vm_cntx_cntl = tmp;
+}
+
+static void mmhub_v3_0_program_invalidation(struct amdgpu_device *adev)
+{
+ struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_MMHUB0(0)];
+ unsigned i;
+
+ for (i = 0; i < 18; ++i) {
+ WREG32_SOC15_OFFSET(MMHUB, 0, regMMVM_INVALIDATE_ENG0_ADDR_RANGE_LO32,
+ i * hub->eng_addr_distance, 0xffffffff);
+ WREG32_SOC15_OFFSET(MMHUB, 0, regMMVM_INVALIDATE_ENG0_ADDR_RANGE_HI32,
+ i * hub->eng_addr_distance, 0x1f);
+ }
+}
+
+static int mmhub_v3_0_gart_enable(struct amdgpu_device *adev)
+{
+ /* GART Enable. */
+ mmhub_v3_0_init_gart_aperture_regs(adev);
+ mmhub_v3_0_init_system_aperture_regs(adev);
+ mmhub_v3_0_init_tlb_regs(adev);
+ mmhub_v3_0_init_cache_regs(adev);
+
+ mmhub_v3_0_enable_system_domain(adev);
+ mmhub_v3_0_disable_identity_aperture(adev);
+ mmhub_v3_0_setup_vmid_config(adev);
+ mmhub_v3_0_program_invalidation(adev);
+
+ return 0;
+}
+
+static void mmhub_v3_0_gart_disable(struct amdgpu_device *adev)
+{
+ struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_MMHUB0(0)];
+ u32 tmp;
+ u32 i;
+
+ /* Disable all tables */
+ for (i = 0; i < 16; i++)
+ WREG32_SOC15_OFFSET(MMHUB, 0, regMMVM_CONTEXT0_CNTL,
+ i * hub->ctx_distance, 0);
+
+ /* Setup TLB control */
+ tmp = RREG32_SOC15(MMHUB, 0, regMMMC_VM_MX_L1_TLB_CNTL);
+ tmp = REG_SET_FIELD(tmp, MMMC_VM_MX_L1_TLB_CNTL, ENABLE_L1_TLB, 0);
+ tmp = REG_SET_FIELD(tmp, MMMC_VM_MX_L1_TLB_CNTL,
+ ENABLE_ADVANCED_DRIVER_MODEL, 0);
+ WREG32_SOC15(MMHUB, 0, regMMMC_VM_MX_L1_TLB_CNTL, tmp);
+
+ /* Setup L2 cache */
+ tmp = RREG32_SOC15(MMHUB, 0, regMMVM_L2_CNTL);
+ tmp = REG_SET_FIELD(tmp, MMVM_L2_CNTL, ENABLE_L2_CACHE, 0);
+ WREG32_SOC15(MMHUB, 0, regMMVM_L2_CNTL, tmp);
+ WREG32_SOC15(MMHUB, 0, regMMVM_L2_CNTL3, 0);
+}
+
+/**
+ * mmhub_v3_0_set_fault_enable_default - update GART/VM fault handling
+ *
+ * @adev: amdgpu_device pointer
+ * @value: true redirects VM faults to the default page
+ */
+static void mmhub_v3_0_set_fault_enable_default(struct amdgpu_device *adev, bool value)
+{
+ u32 tmp;
+
+ /* These registers are not accessible to VF-SRIOV.
+ * The PF will program them instead.
+ */
+ if (amdgpu_sriov_vf(adev))
+ return;
+
+ tmp = RREG32_SOC15(MMHUB, 0, regMMVM_L2_PROTECTION_FAULT_CNTL);
+ tmp = REG_SET_FIELD(tmp, MMVM_L2_PROTECTION_FAULT_CNTL,
+ RANGE_PROTECTION_FAULT_ENABLE_DEFAULT, value);
+ tmp = REG_SET_FIELD(tmp, MMVM_L2_PROTECTION_FAULT_CNTL,
+ PDE0_PROTECTION_FAULT_ENABLE_DEFAULT, value);
+ tmp = REG_SET_FIELD(tmp, MMVM_L2_PROTECTION_FAULT_CNTL,
+ PDE1_PROTECTION_FAULT_ENABLE_DEFAULT, value);
+ tmp = REG_SET_FIELD(tmp, MMVM_L2_PROTECTION_FAULT_CNTL,
+ PDE2_PROTECTION_FAULT_ENABLE_DEFAULT, value);
+ tmp = REG_SET_FIELD(tmp, MMVM_L2_PROTECTION_FAULT_CNTL,
+ TRANSLATE_FURTHER_PROTECTION_FAULT_ENABLE_DEFAULT,
+ value);
+ tmp = REG_SET_FIELD(tmp, MMVM_L2_PROTECTION_FAULT_CNTL,
+ NACK_PROTECTION_FAULT_ENABLE_DEFAULT, value);
+ tmp = REG_SET_FIELD(tmp, MMVM_L2_PROTECTION_FAULT_CNTL,
+ DUMMY_PAGE_PROTECTION_FAULT_ENABLE_DEFAULT, value);
+ tmp = REG_SET_FIELD(tmp, MMVM_L2_PROTECTION_FAULT_CNTL,
+ VALID_PROTECTION_FAULT_ENABLE_DEFAULT, value);
+ tmp = REG_SET_FIELD(tmp, MMVM_L2_PROTECTION_FAULT_CNTL,
+ READ_PROTECTION_FAULT_ENABLE_DEFAULT, value);
+ tmp = REG_SET_FIELD(tmp, MMVM_L2_PROTECTION_FAULT_CNTL,
+ WRITE_PROTECTION_FAULT_ENABLE_DEFAULT, value);
+ tmp = REG_SET_FIELD(tmp, MMVM_L2_PROTECTION_FAULT_CNTL,
+ EXECUTE_PROTECTION_FAULT_ENABLE_DEFAULT, value);
+ if (!value) {
+ tmp = REG_SET_FIELD(tmp, MMVM_L2_PROTECTION_FAULT_CNTL,
+ CRASH_ON_NO_RETRY_FAULT, 1);
+ tmp = REG_SET_FIELD(tmp, MMVM_L2_PROTECTION_FAULT_CNTL,
+ CRASH_ON_RETRY_FAULT, 1);
+ }
+ WREG32_SOC15(MMHUB, 0, regMMVM_L2_PROTECTION_FAULT_CNTL, tmp);
+}
+
+static const struct amdgpu_vmhub_funcs mmhub_v3_0_vmhub_funcs = {
+ .print_l2_protection_fault_status = mmhub_v3_0_print_l2_protection_fault_status,
+ .get_invalidate_req = mmhub_v3_0_get_invalidate_req,
+};
+
+static void mmhub_v3_0_init(struct amdgpu_device *adev)
+{
+ struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_MMHUB0(0)];
+
+ hub->ctx0_ptb_addr_lo32 =
+ SOC15_REG_OFFSET(MMHUB, 0,
+ regMMVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_LO32);
+ hub->ctx0_ptb_addr_hi32 =
+ SOC15_REG_OFFSET(MMHUB, 0,
+ regMMVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_HI32);
+ hub->vm_inv_eng0_sem =
+ SOC15_REG_OFFSET(MMHUB, 0, regMMVM_INVALIDATE_ENG0_SEM);
+ hub->vm_inv_eng0_req =
+ SOC15_REG_OFFSET(MMHUB, 0, regMMVM_INVALIDATE_ENG0_REQ);
+ hub->vm_inv_eng0_ack =
+ SOC15_REG_OFFSET(MMHUB, 0, regMMVM_INVALIDATE_ENG0_ACK);
+ hub->vm_context0_cntl =
+ SOC15_REG_OFFSET(MMHUB, 0, regMMVM_CONTEXT0_CNTL);
+ hub->vm_l2_pro_fault_status =
+ SOC15_REG_OFFSET(MMHUB, 0, regMMVM_L2_PROTECTION_FAULT_STATUS);
+ hub->vm_l2_pro_fault_cntl =
+ SOC15_REG_OFFSET(MMHUB, 0, regMMVM_L2_PROTECTION_FAULT_CNTL);
+
+ hub->ctx_distance = regMMVM_CONTEXT1_CNTL - regMMVM_CONTEXT0_CNTL;
+ hub->ctx_addr_distance = regMMVM_CONTEXT1_PAGE_TABLE_BASE_ADDR_LO32 -
+ regMMVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_LO32;
+ hub->eng_distance = regMMVM_INVALIDATE_ENG1_REQ -
+ regMMVM_INVALIDATE_ENG0_REQ;
+ hub->eng_addr_distance = regMMVM_INVALIDATE_ENG1_ADDR_RANGE_LO32 -
+ regMMVM_INVALIDATE_ENG0_ADDR_RANGE_LO32;
+
+ hub->vm_cntx_cntl_vm_fault = MMVM_CONTEXT1_CNTL__RANGE_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK |
+ MMVM_CONTEXT1_CNTL__DUMMY_PAGE_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK |
+ MMVM_CONTEXT1_CNTL__PDE0_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK |
+ MMVM_CONTEXT1_CNTL__VALID_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK |
+ MMVM_CONTEXT1_CNTL__READ_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK |
+ MMVM_CONTEXT1_CNTL__WRITE_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK |
+ MMVM_CONTEXT1_CNTL__EXECUTE_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK;
+
+ hub->vm_l2_bank_select_reserved_cid2 =
+ SOC15_REG_OFFSET(MMHUB, 0, regMMVM_L2_BANK_SELECT_RESERVED_CID2);
+
+ hub->vm_contexts_disable =
+ SOC15_REG_OFFSET(MMHUB, 0, regMMVM_CONTEXTS_DISABLE);
+
+ hub->vmhub_funcs = &mmhub_v3_0_vmhub_funcs;
+}
+
+static u64 mmhub_v3_0_get_fb_location(struct amdgpu_device *adev)
+{
+ u64 base;
+
+ base = RREG32_SOC15(MMHUB, 0, regMMMC_VM_FB_LOCATION_BASE);
+
+ base &= MMMC_VM_FB_LOCATION_BASE__FB_BASE_MASK;
+ base <<= 24;
+
+ return base;
+}
+
+static u64 mmhub_v3_0_get_mc_fb_offset(struct amdgpu_device *adev)
+{
+ return (u64)RREG32_SOC15(MMHUB, 0, regMMMC_VM_FB_OFFSET) << 24;
+}
+
+static void mmhub_v3_0_update_medium_grain_clock_gating(struct amdgpu_device *adev,
+ bool enable)
+{
+ uint32_t def, data;
+#if 0
+ uint32_t def1, data1, def2 = 0, data2 = 0;
+#endif
+
+ def = data = RREG32_SOC15(MMHUB, 0, regMM_ATC_L2_MISC_CG);
+#if 0
+ def1 = data1 = RREG32_SOC15(MMHUB, 0, regDAGB0_CNTL_MISC2);
+ def2 = data2 = RREG32_SOC15(MMHUB, 0, regDAGB1_CNTL_MISC2);
+#endif
+
+ if (enable) {
+ data |= MM_ATC_L2_MISC_CG__ENABLE_MASK;
+#if 0
+ data1 &= ~(DAGB0_CNTL_MISC2__DISABLE_WRREQ_CG_MASK |
+ DAGB0_CNTL_MISC2__DISABLE_WRRET_CG_MASK |
+ DAGB0_CNTL_MISC2__DISABLE_RDREQ_CG_MASK |
+ DAGB0_CNTL_MISC2__DISABLE_RDRET_CG_MASK |
+ DAGB0_CNTL_MISC2__DISABLE_TLBWR_CG_MASK |
+ DAGB0_CNTL_MISC2__DISABLE_TLBRD_CG_MASK);
+
+ data2 &= ~(DAGB1_CNTL_MISC2__DISABLE_WRREQ_CG_MASK |
+ DAGB1_CNTL_MISC2__DISABLE_WRRET_CG_MASK |
+ DAGB1_CNTL_MISC2__DISABLE_RDREQ_CG_MASK |
+ DAGB1_CNTL_MISC2__DISABLE_RDRET_CG_MASK |
+ DAGB1_CNTL_MISC2__DISABLE_TLBWR_CG_MASK |
+ DAGB1_CNTL_MISC2__DISABLE_TLBRD_CG_MASK);
+#endif
+ } else {
+ data &= ~MM_ATC_L2_MISC_CG__ENABLE_MASK;
+#if 0
+ data1 |= (DAGB0_CNTL_MISC2__DISABLE_WRREQ_CG_MASK |
+ DAGB0_CNTL_MISC2__DISABLE_WRRET_CG_MASK |
+ DAGB0_CNTL_MISC2__DISABLE_RDREQ_CG_MASK |
+ DAGB0_CNTL_MISC2__DISABLE_RDRET_CG_MASK |
+ DAGB0_CNTL_MISC2__DISABLE_TLBWR_CG_MASK |
+ DAGB0_CNTL_MISC2__DISABLE_TLBRD_CG_MASK);
+
+ data2 |= (DAGB1_CNTL_MISC2__DISABLE_WRREQ_CG_MASK |
+ DAGB1_CNTL_MISC2__DISABLE_WRRET_CG_MASK |
+ DAGB1_CNTL_MISC2__DISABLE_RDREQ_CG_MASK |
+ DAGB1_CNTL_MISC2__DISABLE_RDRET_CG_MASK |
+ DAGB1_CNTL_MISC2__DISABLE_TLBWR_CG_MASK |
+ DAGB1_CNTL_MISC2__DISABLE_TLBRD_CG_MASK);
+#endif
+ }
+
+ if (def != data)
+ WREG32_SOC15(MMHUB, 0, regMM_ATC_L2_MISC_CG, data);
+#if 0
+ if (def1 != data1)
+ WREG32_SOC15(MMHUB, 0, regDAGB0_CNTL_MISC2, data1);
+
+ if (def2 != data2)
+ WREG32_SOC15(MMHUB, 0, regDAGB1_CNTL_MISC2, data2);
+#endif
+}
+
+static void mmhub_v3_0_update_medium_grain_light_sleep(struct amdgpu_device *adev,
+ bool enable)
+{
+ uint32_t def, data;
+
+ def = data = RREG32_SOC15(MMHUB, 0, regMM_ATC_L2_MISC_CG);
+
+ if (enable)
+ data |= MM_ATC_L2_MISC_CG__MEM_LS_ENABLE_MASK;
+ else
+ data &= ~MM_ATC_L2_MISC_CG__MEM_LS_ENABLE_MASK;
+
+ if (def != data)
+ WREG32_SOC15(MMHUB, 0, regMM_ATC_L2_MISC_CG, data);
+}
+
+static int mmhub_v3_0_set_clockgating(struct amdgpu_device *adev,
+ enum amd_clockgating_state state)
+{
+ if (amdgpu_sriov_vf(adev))
+ return 0;
+
+ if (adev->cg_flags & AMD_CG_SUPPORT_MC_MGCG)
+ mmhub_v3_0_update_medium_grain_clock_gating(adev,
+ state == AMD_CG_STATE_GATE);
+
+ if (adev->cg_flags & AMD_CG_SUPPORT_MC_LS)
+ mmhub_v3_0_update_medium_grain_light_sleep(adev,
+ state == AMD_CG_STATE_GATE);
+
+ return 0;
+}
+
+static void mmhub_v3_0_get_clockgating(struct amdgpu_device *adev, u64 *flags)
+{
+ int data;
+
+ if (amdgpu_sriov_vf(adev))
+ *flags = 0;
+
+ data = RREG32_SOC15(MMHUB, 0, regMM_ATC_L2_MISC_CG);
+
+ /* AMD_CG_SUPPORT_MC_MGCG */
+ if (data & MM_ATC_L2_MISC_CG__ENABLE_MASK)
+ *flags |= AMD_CG_SUPPORT_MC_MGCG;
+
+ /* AMD_CG_SUPPORT_MC_LS */
+ if (data & MM_ATC_L2_MISC_CG__MEM_LS_ENABLE_MASK)
+ *flags |= AMD_CG_SUPPORT_MC_LS;
+}
+
+const struct amdgpu_mmhub_funcs mmhub_v3_0_funcs = {
+ .init = mmhub_v3_0_init,
+ .get_fb_location = mmhub_v3_0_get_fb_location,
+ .get_mc_fb_offset = mmhub_v3_0_get_mc_fb_offset,
+ .gart_enable = mmhub_v3_0_gart_enable,
+ .set_fault_enable_default = mmhub_v3_0_set_fault_enable_default,
+ .gart_disable = mmhub_v3_0_gart_disable,
+ .set_clockgating = mmhub_v3_0_set_clockgating,
+ .get_clockgating = mmhub_v3_0_get_clockgating,
+ .setup_vm_pt_regs = mmhub_v3_0_setup_vm_pt_regs,
+};
diff --git a/drivers/gpu/drm/amd/amdgpu/mmhub_v3_0.h b/drivers/gpu/drm/amd/amdgpu/mmhub_v3_0.h
new file mode 100644
index 000000000000..3ced20f350bb
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/mmhub_v3_0.h
@@ -0,0 +1,28 @@
+/*
+ * Copyright 2021 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+#ifndef __MMHUB_V3_0_H__
+#define __MMHUB_V3_0_H__
+
+extern const struct amdgpu_mmhub_funcs mmhub_v3_0_funcs;
+
+#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/mmhub_v3_0_1.c b/drivers/gpu/drm/amd/amdgpu/mmhub_v3_0_1.c
new file mode 100644
index 000000000000..910337dc28d1
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/mmhub_v3_0_1.c
@@ -0,0 +1,597 @@
+/*
+ * Copyright 2022 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#include "amdgpu.h"
+#include "mmhub_v3_0_1.h"
+
+#include "mmhub/mmhub_3_0_1_offset.h"
+#include "mmhub/mmhub_3_0_1_sh_mask.h"
+#include "navi10_enum.h"
+
+#include "soc15_common.h"
+
+#define regMMVM_L2_CNTL3_DEFAULT 0x80100007
+#define regMMVM_L2_CNTL4_DEFAULT 0x000000c1
+#define regMMVM_L2_CNTL5_DEFAULT 0x00003fe0
+
+static const char *mmhub_client_ids_v3_0_1[][2] = {
+ [0][0] = "VMC",
+ [1][0] = "ISPXT",
+ [2][0] = "ISPIXT",
+ [4][0] = "DCEDMC",
+ [5][0] = "DCEVGA",
+ [6][0] = "MP0",
+ [7][0] = "MP1",
+ [8][0] = "MPM",
+ [12][0] = "ISPTNR",
+ [14][0] = "ISPCRD0",
+ [15][0] = "ISPCRD1",
+ [16][0] = "ISPCRD2",
+ [22][0] = "HDP",
+ [23][0] = "LSDMA",
+ [24][0] = "JPEG",
+ [27][0] = "VSCH",
+ [28][0] = "VCNU",
+ [29][0] = "VCN",
+ [1][1] = "ISPXT",
+ [2][1] = "ISPIXT",
+ [3][1] = "DCEDWB",
+ [4][1] = "DCEDMC",
+ [5][1] = "DCEVGA",
+ [6][1] = "MP0",
+ [7][1] = "MP1",
+ [8][1] = "MPM",
+ [10][1] = "ISPMWR0",
+ [11][1] = "ISPMWR1",
+ [12][1] = "ISPTNR",
+ [13][1] = "ISPSWR",
+ [14][1] = "ISPCWR0",
+ [15][1] = "ISPCWR1",
+ [16][1] = "ISPCWR2",
+ [17][1] = "ISPCWR3",
+ [18][1] = "XDP",
+ [21][1] = "OSSSYS",
+ [22][1] = "HDP",
+ [23][1] = "LSDMA",
+ [24][1] = "JPEG",
+ [27][1] = "VSCH",
+ [28][1] = "VCNU",
+ [29][1] = "VCN",
+};
+
+static uint32_t mmhub_v3_0_1_get_invalidate_req(unsigned int vmid,
+ uint32_t flush_type)
+{
+ u32 req = 0;
+
+ /* invalidate using legacy mode on vmid*/
+ req = REG_SET_FIELD(req, MMVM_INVALIDATE_ENG0_REQ,
+ PER_VMID_INVALIDATE_REQ, 1 << vmid);
+ req = REG_SET_FIELD(req, MMVM_INVALIDATE_ENG0_REQ, FLUSH_TYPE, flush_type);
+ req = REG_SET_FIELD(req, MMVM_INVALIDATE_ENG0_REQ, INVALIDATE_L2_PTES, 1);
+ req = REG_SET_FIELD(req, MMVM_INVALIDATE_ENG0_REQ, INVALIDATE_L2_PDE0, 1);
+ req = REG_SET_FIELD(req, MMVM_INVALIDATE_ENG0_REQ, INVALIDATE_L2_PDE1, 1);
+ req = REG_SET_FIELD(req, MMVM_INVALIDATE_ENG0_REQ, INVALIDATE_L2_PDE2, 1);
+ req = REG_SET_FIELD(req, MMVM_INVALIDATE_ENG0_REQ, INVALIDATE_L1_PTES, 1);
+ req = REG_SET_FIELD(req, MMVM_INVALIDATE_ENG0_REQ,
+ CLEAR_PROTECTION_FAULT_STATUS_ADDR, 0);
+
+ return req;
+}
+
+static void
+mmhub_v3_0_1_print_l2_protection_fault_status(struct amdgpu_device *adev,
+ uint32_t status)
+{
+ uint32_t cid, rw;
+ const char *mmhub_cid = NULL;
+
+ cid = REG_GET_FIELD(status,
+ MMVM_L2_PROTECTION_FAULT_STATUS, CID);
+ rw = REG_GET_FIELD(status,
+ MMVM_L2_PROTECTION_FAULT_STATUS, RW);
+
+ dev_err(adev->dev,
+ "MMVM_L2_PROTECTION_FAULT_STATUS:0x%08X\n",
+ status);
+
+ switch (amdgpu_ip_version(adev, MMHUB_HWIP, 0)) {
+ case IP_VERSION(3, 0, 1):
+ mmhub_cid = mmhub_client_ids_v3_0_1[cid][rw];
+ break;
+ default:
+ mmhub_cid = NULL;
+ break;
+ }
+
+ dev_err(adev->dev, "\t Faulty UTCL2 client ID: %s (0x%x)\n",
+ mmhub_cid ? mmhub_cid : "unknown", cid);
+ dev_err(adev->dev, "\t MORE_FAULTS: 0x%lx\n",
+ REG_GET_FIELD(status,
+ MMVM_L2_PROTECTION_FAULT_STATUS, MORE_FAULTS));
+ dev_err(adev->dev, "\t WALKER_ERROR: 0x%lx\n",
+ REG_GET_FIELD(status,
+ MMVM_L2_PROTECTION_FAULT_STATUS, WALKER_ERROR));
+ dev_err(adev->dev, "\t PERMISSION_FAULTS: 0x%lx\n",
+ REG_GET_FIELD(status,
+ MMVM_L2_PROTECTION_FAULT_STATUS, PERMISSION_FAULTS));
+ dev_err(adev->dev, "\t MAPPING_ERROR: 0x%lx\n",
+ REG_GET_FIELD(status,
+ MMVM_L2_PROTECTION_FAULT_STATUS, MAPPING_ERROR));
+ dev_err(adev->dev, "\t RW: 0x%x\n", rw);
+}
+
+static void mmhub_v3_0_1_setup_vm_pt_regs(struct amdgpu_device *adev,
+ uint32_t vmid,
+ uint64_t page_table_base)
+{
+ struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_MMHUB0(0)];
+
+ WREG32_SOC15_OFFSET(MMHUB, 0, regMMVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_LO32,
+ hub->ctx_addr_distance * vmid,
+ lower_32_bits(page_table_base));
+
+ WREG32_SOC15_OFFSET(MMHUB, 0, regMMVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_HI32,
+ hub->ctx_addr_distance * vmid,
+ upper_32_bits(page_table_base));
+}
+
+static void mmhub_v3_0_1_init_gart_aperture_regs(struct amdgpu_device *adev)
+{
+ uint64_t pt_base = amdgpu_gmc_pd_addr(adev->gart.bo);
+
+ mmhub_v3_0_1_setup_vm_pt_regs(adev, 0, pt_base);
+
+ WREG32_SOC15(MMHUB, 0, regMMVM_CONTEXT0_PAGE_TABLE_START_ADDR_LO32,
+ (u32)(adev->gmc.gart_start >> 12));
+ WREG32_SOC15(MMHUB, 0, regMMVM_CONTEXT0_PAGE_TABLE_START_ADDR_HI32,
+ (u32)(adev->gmc.gart_start >> 44));
+
+ WREG32_SOC15(MMHUB, 0, regMMVM_CONTEXT0_PAGE_TABLE_END_ADDR_LO32,
+ (u32)(adev->gmc.gart_end >> 12));
+ WREG32_SOC15(MMHUB, 0, regMMVM_CONTEXT0_PAGE_TABLE_END_ADDR_HI32,
+ (u32)(adev->gmc.gart_end >> 44));
+}
+
+static void mmhub_v3_0_1_init_system_aperture_regs(struct amdgpu_device *adev)
+{
+ uint64_t value;
+ uint32_t tmp;
+
+ /* Program the AGP BAR */
+ WREG32_SOC15(MMHUB, 0, regMMMC_VM_AGP_BASE, 0);
+ WREG32_SOC15(MMHUB, 0, regMMMC_VM_AGP_BOT, adev->gmc.agp_start >> 24);
+ WREG32_SOC15(MMHUB, 0, regMMMC_VM_AGP_TOP, adev->gmc.agp_end >> 24);
+
+ /*
+ * the new L1 policy will block SRIOV guest from writing
+ * these regs, and they will be programed at host.
+ * so skip programing these regs.
+ */
+ /* Program the system aperture low logical page number. */
+ WREG32_SOC15(MMHUB, 0, regMMMC_VM_SYSTEM_APERTURE_LOW_ADDR,
+ min(adev->gmc.fb_start, adev->gmc.agp_start) >> 18);
+ WREG32_SOC15(MMHUB, 0, regMMMC_VM_SYSTEM_APERTURE_HIGH_ADDR,
+ max(adev->gmc.fb_end, adev->gmc.agp_end) >> 18);
+
+ /* Set default page address. */
+ value = amdgpu_gmc_vram_mc2pa(adev, adev->mem_scratch.gpu_addr);
+ WREG32_SOC15(MMHUB, 0, regMMMC_VM_SYSTEM_APERTURE_DEFAULT_ADDR_LSB,
+ (u32)(value >> 12));
+ WREG32_SOC15(MMHUB, 0, regMMMC_VM_SYSTEM_APERTURE_DEFAULT_ADDR_MSB,
+ (u32)(value >> 44));
+
+ /* Program "protection fault". */
+ WREG32_SOC15(MMHUB, 0, regMMVM_L2_PROTECTION_FAULT_DEFAULT_ADDR_LO32,
+ (u32)(adev->dummy_page_addr >> 12));
+ WREG32_SOC15(MMHUB, 0, regMMVM_L2_PROTECTION_FAULT_DEFAULT_ADDR_HI32,
+ (u32)((u64)adev->dummy_page_addr >> 44));
+
+ tmp = RREG32_SOC15(MMHUB, 0, regMMVM_L2_PROTECTION_FAULT_CNTL2);
+ tmp = REG_SET_FIELD(tmp, MMVM_L2_PROTECTION_FAULT_CNTL2,
+ ACTIVE_PAGE_MIGRATION_PTE_READ_RETRY, 1);
+ WREG32_SOC15(MMHUB, 0, regMMVM_L2_PROTECTION_FAULT_CNTL2, tmp);
+}
+
+static void mmhub_v3_0_1_init_tlb_regs(struct amdgpu_device *adev)
+{
+ uint32_t tmp;
+
+ /* Setup TLB control */
+ tmp = RREG32_SOC15(MMHUB, 0, regMMMC_VM_MX_L1_TLB_CNTL);
+
+ tmp = REG_SET_FIELD(tmp, MMMC_VM_MX_L1_TLB_CNTL, ENABLE_L1_TLB, 1);
+ tmp = REG_SET_FIELD(tmp, MMMC_VM_MX_L1_TLB_CNTL, SYSTEM_ACCESS_MODE, 3);
+ tmp = REG_SET_FIELD(tmp, MMMC_VM_MX_L1_TLB_CNTL,
+ ENABLE_ADVANCED_DRIVER_MODEL, 1);
+ tmp = REG_SET_FIELD(tmp, MMMC_VM_MX_L1_TLB_CNTL,
+ SYSTEM_APERTURE_UNMAPPED_ACCESS, 0);
+ tmp = REG_SET_FIELD(tmp, MMMC_VM_MX_L1_TLB_CNTL, ECO_BITS, 0);
+ tmp = REG_SET_FIELD(tmp, MMMC_VM_MX_L1_TLB_CNTL,
+ MTYPE, MTYPE_UC); /* UC, uncached */
+
+ WREG32_SOC15(MMHUB, 0, regMMMC_VM_MX_L1_TLB_CNTL, tmp);
+}
+
+static void mmhub_v3_0_1_init_cache_regs(struct amdgpu_device *adev)
+{
+ uint32_t tmp;
+
+ /* Setup L2 cache */
+ tmp = RREG32_SOC15(MMHUB, 0, regMMVM_L2_CNTL);
+ tmp = REG_SET_FIELD(tmp, MMVM_L2_CNTL, ENABLE_L2_CACHE, 1);
+ tmp = REG_SET_FIELD(tmp, MMVM_L2_CNTL, ENABLE_L2_FRAGMENT_PROCESSING, 0);
+ tmp = REG_SET_FIELD(tmp, MMVM_L2_CNTL,
+ ENABLE_DEFAULT_PAGE_OUT_TO_SYSTEM_MEMORY, 1);
+ /* XXX for emulation, Refer to closed source code.*/
+ tmp = REG_SET_FIELD(tmp, MMVM_L2_CNTL, L2_PDE0_CACHE_TAG_GENERATION_MODE,
+ 0);
+ tmp = REG_SET_FIELD(tmp, MMVM_L2_CNTL, PDE_FAULT_CLASSIFICATION, 0);
+ tmp = REG_SET_FIELD(tmp, MMVM_L2_CNTL, CONTEXT1_IDENTITY_ACCESS_MODE, 1);
+ tmp = REG_SET_FIELD(tmp, MMVM_L2_CNTL, IDENTITY_MODE_FRAGMENT_SIZE, 0);
+ WREG32_SOC15(MMHUB, 0, regMMVM_L2_CNTL, tmp);
+
+ tmp = RREG32_SOC15(MMHUB, 0, regMMVM_L2_CNTL2);
+ tmp = REG_SET_FIELD(tmp, MMVM_L2_CNTL2, INVALIDATE_ALL_L1_TLBS, 1);
+ tmp = REG_SET_FIELD(tmp, MMVM_L2_CNTL2, INVALIDATE_L2_CACHE, 1);
+ WREG32_SOC15(MMHUB, 0, regMMVM_L2_CNTL2, tmp);
+
+ tmp = regMMVM_L2_CNTL3_DEFAULT;
+ if (adev->gmc.translate_further) {
+ tmp = REG_SET_FIELD(tmp, MMVM_L2_CNTL3, BANK_SELECT, 12);
+ tmp = REG_SET_FIELD(tmp, MMVM_L2_CNTL3,
+ L2_CACHE_BIGK_FRAGMENT_SIZE, 9);
+ } else {
+ tmp = REG_SET_FIELD(tmp, MMVM_L2_CNTL3, BANK_SELECT, 9);
+ tmp = REG_SET_FIELD(tmp, MMVM_L2_CNTL3,
+ L2_CACHE_BIGK_FRAGMENT_SIZE, 6);
+ }
+ WREG32_SOC15(MMHUB, 0, regMMVM_L2_CNTL3, tmp);
+
+ tmp = regMMVM_L2_CNTL4_DEFAULT;
+ tmp = REG_SET_FIELD(tmp, MMVM_L2_CNTL4, VMC_TAP_PDE_REQUEST_PHYSICAL, 0);
+ tmp = REG_SET_FIELD(tmp, MMVM_L2_CNTL4, VMC_TAP_PTE_REQUEST_PHYSICAL, 0);
+ WREG32_SOC15(MMHUB, 0, regMMVM_L2_CNTL4, tmp);
+
+ tmp = regMMVM_L2_CNTL5_DEFAULT;
+ tmp = REG_SET_FIELD(tmp, MMVM_L2_CNTL5, L2_CACHE_SMALLK_FRAGMENT_SIZE, 0);
+ WREG32_SOC15(MMHUB, 0, regMMVM_L2_CNTL5, tmp);
+}
+
+static void mmhub_v3_0_1_enable_system_domain(struct amdgpu_device *adev)
+{
+ uint32_t tmp;
+
+ tmp = RREG32_SOC15(MMHUB, 0, regMMVM_CONTEXT0_CNTL);
+ tmp = REG_SET_FIELD(tmp, MMVM_CONTEXT0_CNTL, ENABLE_CONTEXT, 1);
+ tmp = REG_SET_FIELD(tmp, MMVM_CONTEXT0_CNTL, PAGE_TABLE_DEPTH, 0);
+ tmp = REG_SET_FIELD(tmp, MMVM_CONTEXT0_CNTL,
+ RETRY_PERMISSION_OR_INVALID_PAGE_FAULT, 0);
+ WREG32_SOC15(MMHUB, 0, regMMVM_CONTEXT0_CNTL, tmp);
+}
+
+static void mmhub_v3_0_1_disable_identity_aperture(struct amdgpu_device *adev)
+{
+ WREG32_SOC15(MMHUB, 0,
+ regMMVM_L2_CONTEXT1_IDENTITY_APERTURE_LOW_ADDR_LO32,
+ 0xFFFFFFFF);
+ WREG32_SOC15(MMHUB, 0,
+ regMMVM_L2_CONTEXT1_IDENTITY_APERTURE_LOW_ADDR_HI32,
+ 0x0000000F);
+
+ WREG32_SOC15(MMHUB, 0,
+ regMMVM_L2_CONTEXT1_IDENTITY_APERTURE_HIGH_ADDR_LO32, 0);
+ WREG32_SOC15(MMHUB, 0,
+ regMMVM_L2_CONTEXT1_IDENTITY_APERTURE_HIGH_ADDR_HI32, 0);
+
+ WREG32_SOC15(MMHUB, 0, regMMVM_L2_CONTEXT_IDENTITY_PHYSICAL_OFFSET_LO32,
+ 0);
+ WREG32_SOC15(MMHUB, 0, regMMVM_L2_CONTEXT_IDENTITY_PHYSICAL_OFFSET_HI32,
+ 0);
+}
+
+static void mmhub_v3_0_1_setup_vmid_config(struct amdgpu_device *adev)
+{
+ struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_MMHUB0(0)];
+ int i;
+ uint32_t tmp;
+
+ for (i = 0; i <= 14; i++) {
+ tmp = RREG32_SOC15_OFFSET(MMHUB, 0, regMMVM_CONTEXT1_CNTL, i * hub->ctx_distance);
+ tmp = REG_SET_FIELD(tmp, MMVM_CONTEXT1_CNTL, ENABLE_CONTEXT, 1);
+ tmp = REG_SET_FIELD(tmp, MMVM_CONTEXT1_CNTL, PAGE_TABLE_DEPTH,
+ adev->vm_manager.num_level);
+ tmp = REG_SET_FIELD(tmp, MMVM_CONTEXT1_CNTL,
+ RANGE_PROTECTION_FAULT_ENABLE_DEFAULT, 1);
+ tmp = REG_SET_FIELD(tmp, MMVM_CONTEXT1_CNTL,
+ DUMMY_PAGE_PROTECTION_FAULT_ENABLE_DEFAULT,
+ 1);
+ tmp = REG_SET_FIELD(tmp, MMVM_CONTEXT1_CNTL,
+ PDE0_PROTECTION_FAULT_ENABLE_DEFAULT, 1);
+ tmp = REG_SET_FIELD(tmp, MMVM_CONTEXT1_CNTL,
+ VALID_PROTECTION_FAULT_ENABLE_DEFAULT, 1);
+ tmp = REG_SET_FIELD(tmp, MMVM_CONTEXT1_CNTL,
+ READ_PROTECTION_FAULT_ENABLE_DEFAULT, 1);
+ tmp = REG_SET_FIELD(tmp, MMVM_CONTEXT1_CNTL,
+ WRITE_PROTECTION_FAULT_ENABLE_DEFAULT, 1);
+ tmp = REG_SET_FIELD(tmp, MMVM_CONTEXT1_CNTL,
+ EXECUTE_PROTECTION_FAULT_ENABLE_DEFAULT, 1);
+ tmp = REG_SET_FIELD(tmp, MMVM_CONTEXT1_CNTL,
+ PAGE_TABLE_BLOCK_SIZE,
+ adev->vm_manager.block_size - 9);
+ /* Send no-retry XNACK on fault to suppress VM fault storm. */
+ tmp = REG_SET_FIELD(tmp, MMVM_CONTEXT1_CNTL,
+ RETRY_PERMISSION_OR_INVALID_PAGE_FAULT,
+ !amdgpu_noretry);
+ WREG32_SOC15_OFFSET(MMHUB, 0, regMMVM_CONTEXT1_CNTL,
+ i * hub->ctx_distance, tmp);
+ WREG32_SOC15_OFFSET(MMHUB, 0, regMMVM_CONTEXT1_PAGE_TABLE_START_ADDR_LO32,
+ i * hub->ctx_addr_distance, 0);
+ WREG32_SOC15_OFFSET(MMHUB, 0, regMMVM_CONTEXT1_PAGE_TABLE_START_ADDR_HI32,
+ i * hub->ctx_addr_distance, 0);
+ WREG32_SOC15_OFFSET(MMHUB, 0, regMMVM_CONTEXT1_PAGE_TABLE_END_ADDR_LO32,
+ i * hub->ctx_addr_distance,
+ lower_32_bits(adev->vm_manager.max_pfn - 1));
+ WREG32_SOC15_OFFSET(MMHUB, 0, regMMVM_CONTEXT1_PAGE_TABLE_END_ADDR_HI32,
+ i * hub->ctx_addr_distance,
+ upper_32_bits(adev->vm_manager.max_pfn - 1));
+ }
+
+ hub->vm_cntx_cntl = tmp;
+}
+
+static void mmhub_v3_0_1_program_invalidation(struct amdgpu_device *adev)
+{
+ struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_MMHUB0(0)];
+ unsigned i;
+
+ for (i = 0; i < 18; ++i) {
+ WREG32_SOC15_OFFSET(MMHUB, 0, regMMVM_INVALIDATE_ENG0_ADDR_RANGE_LO32,
+ i * hub->eng_addr_distance, 0xffffffff);
+ WREG32_SOC15_OFFSET(MMHUB, 0, regMMVM_INVALIDATE_ENG0_ADDR_RANGE_HI32,
+ i * hub->eng_addr_distance, 0x1f);
+ }
+}
+
+static int mmhub_v3_0_1_gart_enable(struct amdgpu_device *adev)
+{
+ /* GART Enable. */
+ mmhub_v3_0_1_init_gart_aperture_regs(adev);
+ mmhub_v3_0_1_init_system_aperture_regs(adev);
+ mmhub_v3_0_1_init_tlb_regs(adev);
+ mmhub_v3_0_1_init_cache_regs(adev);
+
+ mmhub_v3_0_1_enable_system_domain(adev);
+ mmhub_v3_0_1_disable_identity_aperture(adev);
+ mmhub_v3_0_1_setup_vmid_config(adev);
+ mmhub_v3_0_1_program_invalidation(adev);
+
+ return 0;
+}
+
+static void mmhub_v3_0_1_gart_disable(struct amdgpu_device *adev)
+{
+ struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_MMHUB0(0)];
+ u32 tmp;
+ u32 i;
+
+ /* Disable all tables */
+ for (i = 0; i < 16; i++)
+ WREG32_SOC15_OFFSET(MMHUB, 0, regMMVM_CONTEXT0_CNTL,
+ i * hub->ctx_distance, 0);
+
+ /* Setup TLB control */
+ tmp = RREG32_SOC15(MMHUB, 0, regMMMC_VM_MX_L1_TLB_CNTL);
+ tmp = REG_SET_FIELD(tmp, MMMC_VM_MX_L1_TLB_CNTL, ENABLE_L1_TLB, 0);
+ tmp = REG_SET_FIELD(tmp, MMMC_VM_MX_L1_TLB_CNTL,
+ ENABLE_ADVANCED_DRIVER_MODEL, 0);
+ WREG32_SOC15(MMHUB, 0, regMMMC_VM_MX_L1_TLB_CNTL, tmp);
+
+ /* Setup L2 cache */
+ tmp = RREG32_SOC15(MMHUB, 0, regMMVM_L2_CNTL);
+ tmp = REG_SET_FIELD(tmp, MMVM_L2_CNTL, ENABLE_L2_CACHE, 0);
+ WREG32_SOC15(MMHUB, 0, regMMVM_L2_CNTL, tmp);
+ WREG32_SOC15(MMHUB, 0, regMMVM_L2_CNTL3, 0);
+}
+
+/**
+ * mmhub_v3_0_1_set_fault_enable_default - update GART/VM fault handling
+ *
+ * @adev: amdgpu_device pointer
+ * @value: true redirects VM faults to the default page
+ */
+static void mmhub_v3_0_1_set_fault_enable_default(struct amdgpu_device *adev,
+ bool value)
+{
+ u32 tmp;
+
+ tmp = RREG32_SOC15(MMHUB, 0, regMMVM_L2_PROTECTION_FAULT_CNTL);
+ tmp = REG_SET_FIELD(tmp, MMVM_L2_PROTECTION_FAULT_CNTL,
+ RANGE_PROTECTION_FAULT_ENABLE_DEFAULT, value);
+ tmp = REG_SET_FIELD(tmp, MMVM_L2_PROTECTION_FAULT_CNTL,
+ PDE0_PROTECTION_FAULT_ENABLE_DEFAULT, value);
+ tmp = REG_SET_FIELD(tmp, MMVM_L2_PROTECTION_FAULT_CNTL,
+ PDE1_PROTECTION_FAULT_ENABLE_DEFAULT, value);
+ tmp = REG_SET_FIELD(tmp, MMVM_L2_PROTECTION_FAULT_CNTL,
+ PDE2_PROTECTION_FAULT_ENABLE_DEFAULT, value);
+ tmp = REG_SET_FIELD(tmp, MMVM_L2_PROTECTION_FAULT_CNTL,
+ TRANSLATE_FURTHER_PROTECTION_FAULT_ENABLE_DEFAULT,
+ value);
+ tmp = REG_SET_FIELD(tmp, MMVM_L2_PROTECTION_FAULT_CNTL,
+ NACK_PROTECTION_FAULT_ENABLE_DEFAULT, value);
+ tmp = REG_SET_FIELD(tmp, MMVM_L2_PROTECTION_FAULT_CNTL,
+ DUMMY_PAGE_PROTECTION_FAULT_ENABLE_DEFAULT, value);
+ tmp = REG_SET_FIELD(tmp, MMVM_L2_PROTECTION_FAULT_CNTL,
+ VALID_PROTECTION_FAULT_ENABLE_DEFAULT, value);
+ tmp = REG_SET_FIELD(tmp, MMVM_L2_PROTECTION_FAULT_CNTL,
+ READ_PROTECTION_FAULT_ENABLE_DEFAULT, value);
+ tmp = REG_SET_FIELD(tmp, MMVM_L2_PROTECTION_FAULT_CNTL,
+ WRITE_PROTECTION_FAULT_ENABLE_DEFAULT, value);
+ tmp = REG_SET_FIELD(tmp, MMVM_L2_PROTECTION_FAULT_CNTL,
+ EXECUTE_PROTECTION_FAULT_ENABLE_DEFAULT, value);
+ if (!value) {
+ tmp = REG_SET_FIELD(tmp, MMVM_L2_PROTECTION_FAULT_CNTL,
+ CRASH_ON_NO_RETRY_FAULT, 1);
+ tmp = REG_SET_FIELD(tmp, MMVM_L2_PROTECTION_FAULT_CNTL,
+ CRASH_ON_RETRY_FAULT, 1);
+ }
+ WREG32_SOC15(MMHUB, 0, regMMVM_L2_PROTECTION_FAULT_CNTL, tmp);
+}
+
+static const struct amdgpu_vmhub_funcs mmhub_v3_0_1_vmhub_funcs = {
+ .print_l2_protection_fault_status = mmhub_v3_0_1_print_l2_protection_fault_status,
+ .get_invalidate_req = mmhub_v3_0_1_get_invalidate_req,
+};
+
+static void mmhub_v3_0_1_init(struct amdgpu_device *adev)
+{
+ struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_MMHUB0(0)];
+
+ hub->ctx0_ptb_addr_lo32 =
+ SOC15_REG_OFFSET(MMHUB, 0,
+ regMMVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_LO32);
+ hub->ctx0_ptb_addr_hi32 =
+ SOC15_REG_OFFSET(MMHUB, 0,
+ regMMVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_HI32);
+ hub->vm_inv_eng0_sem =
+ SOC15_REG_OFFSET(MMHUB, 0, regMMVM_INVALIDATE_ENG0_SEM);
+ hub->vm_inv_eng0_req =
+ SOC15_REG_OFFSET(MMHUB, 0, regMMVM_INVALIDATE_ENG0_REQ);
+ hub->vm_inv_eng0_ack =
+ SOC15_REG_OFFSET(MMHUB, 0, regMMVM_INVALIDATE_ENG0_ACK);
+ hub->vm_context0_cntl =
+ SOC15_REG_OFFSET(MMHUB, 0, regMMVM_CONTEXT0_CNTL);
+ hub->vm_l2_pro_fault_status =
+ SOC15_REG_OFFSET(MMHUB, 0, regMMVM_L2_PROTECTION_FAULT_STATUS);
+ hub->vm_l2_pro_fault_cntl =
+ SOC15_REG_OFFSET(MMHUB, 0, regMMVM_L2_PROTECTION_FAULT_CNTL);
+
+ hub->ctx_distance = regMMVM_CONTEXT1_CNTL - regMMVM_CONTEXT0_CNTL;
+ hub->ctx_addr_distance = regMMVM_CONTEXT1_PAGE_TABLE_BASE_ADDR_LO32 -
+ regMMVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_LO32;
+ hub->eng_distance = regMMVM_INVALIDATE_ENG1_REQ -
+ regMMVM_INVALIDATE_ENG0_REQ;
+ hub->eng_addr_distance = regMMVM_INVALIDATE_ENG1_ADDR_RANGE_LO32 -
+ regMMVM_INVALIDATE_ENG0_ADDR_RANGE_LO32;
+
+ hub->vm_cntx_cntl_vm_fault = MMVM_CONTEXT1_CNTL__RANGE_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK |
+ MMVM_CONTEXT1_CNTL__DUMMY_PAGE_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK |
+ MMVM_CONTEXT1_CNTL__PDE0_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK |
+ MMVM_CONTEXT1_CNTL__VALID_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK |
+ MMVM_CONTEXT1_CNTL__READ_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK |
+ MMVM_CONTEXT1_CNTL__WRITE_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK |
+ MMVM_CONTEXT1_CNTL__EXECUTE_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK;
+
+ hub->vmhub_funcs = &mmhub_v3_0_1_vmhub_funcs;
+}
+
+static u64 mmhub_v3_0_1_get_fb_location(struct amdgpu_device *adev)
+{
+ u64 base;
+
+ base = RREG32_SOC15(MMHUB, 0, regMMMC_VM_FB_LOCATION_BASE);
+ base &= MMMC_VM_FB_LOCATION_BASE__FB_BASE_MASK;
+ base <<= 24;
+
+ return base;
+}
+
+static u64 mmhub_v3_0_1_get_mc_fb_offset(struct amdgpu_device *adev)
+{
+ return (u64)RREG32_SOC15(MMHUB, 0, regMMMC_VM_FB_OFFSET) << 24;
+}
+
+static void mmhub_v3_0_1_update_medium_grain_clock_gating(struct amdgpu_device *adev,
+ bool enable)
+{
+ uint32_t def, data;
+
+ def = data = RREG32_SOC15(MMHUB, 0, regMM_ATC_L2_MISC_CG);
+
+ if (enable)
+ data |= MM_ATC_L2_MISC_CG__ENABLE_MASK;
+ else
+ data &= ~MM_ATC_L2_MISC_CG__ENABLE_MASK;
+
+ if (def != data)
+ WREG32_SOC15(MMHUB, 0, regMM_ATC_L2_MISC_CG, data);
+}
+
+static void mmhub_v3_0_1_update_medium_grain_light_sleep(struct amdgpu_device *adev,
+ bool enable)
+{
+ uint32_t def, data;
+
+ def = data = RREG32_SOC15(MMHUB, 0, regMM_ATC_L2_MISC_CG);
+
+ if (enable)
+ data |= MM_ATC_L2_MISC_CG__MEM_LS_ENABLE_MASK;
+ else
+ data &= ~MM_ATC_L2_MISC_CG__MEM_LS_ENABLE_MASK;
+
+ if (def != data)
+ WREG32_SOC15(MMHUB, 0, regMM_ATC_L2_MISC_CG, data);
+}
+
+static int mmhub_v3_0_1_set_clockgating(struct amdgpu_device *adev,
+ enum amd_clockgating_state state)
+{
+ if (amdgpu_sriov_vf(adev))
+ return 0;
+
+ mmhub_v3_0_1_update_medium_grain_clock_gating(adev,
+ state == AMD_CG_STATE_GATE);
+ mmhub_v3_0_1_update_medium_grain_light_sleep(adev,
+ state == AMD_CG_STATE_GATE);
+ return 0;
+}
+
+static void mmhub_v3_0_1_get_clockgating(struct amdgpu_device *adev, u64 *flags)
+{
+ int data;
+
+ if (amdgpu_sriov_vf(adev))
+ *flags = 0;
+
+ data = RREG32_SOC15(MMHUB, 0, regMM_ATC_L2_MISC_CG);
+
+ /* AMD_CG_SUPPORT_MC_MGCG */
+ if (data & MM_ATC_L2_MISC_CG__ENABLE_MASK)
+ *flags |= AMD_CG_SUPPORT_MC_MGCG;
+
+ /* AMD_CG_SUPPORT_MC_LS */
+ if (data & MM_ATC_L2_MISC_CG__MEM_LS_ENABLE_MASK)
+ *flags |= AMD_CG_SUPPORT_MC_LS;
+}
+
+const struct amdgpu_mmhub_funcs mmhub_v3_0_1_funcs = {
+ .init = mmhub_v3_0_1_init,
+ .get_fb_location = mmhub_v3_0_1_get_fb_location,
+ .get_mc_fb_offset = mmhub_v3_0_1_get_mc_fb_offset,
+ .gart_enable = mmhub_v3_0_1_gart_enable,
+ .set_fault_enable_default = mmhub_v3_0_1_set_fault_enable_default,
+ .gart_disable = mmhub_v3_0_1_gart_disable,
+ .set_clockgating = mmhub_v3_0_1_set_clockgating,
+ .get_clockgating = mmhub_v3_0_1_get_clockgating,
+ .setup_vm_pt_regs = mmhub_v3_0_1_setup_vm_pt_regs,
+};
diff --git a/drivers/gpu/drm/amd/amdgpu/mmhub_v3_0_1.h b/drivers/gpu/drm/amd/amdgpu/mmhub_v3_0_1.h
new file mode 100644
index 000000000000..4c1246735e7d
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/mmhub_v3_0_1.h
@@ -0,0 +1,28 @@
+/*
+ * Copyright 2022 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+#ifndef __MMHUB_V3_0_1_H__
+#define __MMHUB_V3_0_1_H__
+
+extern const struct amdgpu_mmhub_funcs mmhub_v3_0_1_funcs;
+
+#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/mmhub_v3_0_2.c b/drivers/gpu/drm/amd/amdgpu/mmhub_v3_0_2.c
new file mode 100644
index 000000000000..f0f182f033b9
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/mmhub_v3_0_2.c
@@ -0,0 +1,570 @@
+/*
+ * Copyright 2022 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#include "amdgpu.h"
+#include "mmhub_v3_0_2.h"
+
+#include "mmhub/mmhub_3_0_2_offset.h"
+#include "mmhub/mmhub_3_0_2_sh_mask.h"
+#include "navi10_enum.h"
+
+#include "soc15_common.h"
+
+#define regMMVM_L2_CNTL3_DEFAULT 0x80100007
+#define regMMVM_L2_CNTL4_DEFAULT 0x000000c1
+#define regMMVM_L2_CNTL5_DEFAULT 0x00003fe0
+
+static const char *mmhub_client_ids_v3_0_2[][2] = {
+ [0][0] = "VMC",
+ [4][0] = "DCEDMC",
+ [5][0] = "DCEVGA",
+ [6][0] = "MP0",
+ [7][0] = "MP1",
+ [8][0] = "MPIO",
+ [16][0] = "HDP",
+ [17][0] = "LSDMA",
+ [18][0] = "JPEG",
+ [19][0] = "VCNU0",
+ [21][0] = "VSCH",
+ [22][0] = "VCNU1",
+ [23][0] = "VCN1",
+ [32+20][0] = "VCN0",
+ [2][1] = "DBGUNBIO",
+ [3][1] = "DCEDWB",
+ [4][1] = "DCEDMC",
+ [5][1] = "DCEVGA",
+ [6][1] = "MP0",
+ [7][1] = "MP1",
+ [8][1] = "MPIO",
+ [10][1] = "DBGU0",
+ [11][1] = "DBGU1",
+ [12][1] = "DBGU2",
+ [13][1] = "DBGU3",
+ [14][1] = "XDP",
+ [15][1] = "OSSSYS",
+ [16][1] = "HDP",
+ [17][1] = "LSDMA",
+ [18][1] = "JPEG",
+ [19][1] = "VCNU0",
+ [20][1] = "VCN0",
+ [21][1] = "VSCH",
+ [22][1] = "VCNU1",
+ [23][1] = "VCN1",
+};
+
+static uint32_t mmhub_v3_0_2_get_invalidate_req(unsigned int vmid,
+ uint32_t flush_type)
+{
+ u32 req = 0;
+
+ /* invalidate using legacy mode on vmid*/
+ req = REG_SET_FIELD(req, MMVM_INVALIDATE_ENG0_REQ,
+ PER_VMID_INVALIDATE_REQ, 1 << vmid);
+ req = REG_SET_FIELD(req, MMVM_INVALIDATE_ENG0_REQ, FLUSH_TYPE, flush_type);
+ req = REG_SET_FIELD(req, MMVM_INVALIDATE_ENG0_REQ, INVALIDATE_L2_PTES, 1);
+ req = REG_SET_FIELD(req, MMVM_INVALIDATE_ENG0_REQ, INVALIDATE_L2_PDE0, 1);
+ req = REG_SET_FIELD(req, MMVM_INVALIDATE_ENG0_REQ, INVALIDATE_L2_PDE1, 1);
+ req = REG_SET_FIELD(req, MMVM_INVALIDATE_ENG0_REQ, INVALIDATE_L2_PDE2, 1);
+ req = REG_SET_FIELD(req, MMVM_INVALIDATE_ENG0_REQ, INVALIDATE_L1_PTES, 1);
+ req = REG_SET_FIELD(req, MMVM_INVALIDATE_ENG0_REQ,
+ CLEAR_PROTECTION_FAULT_STATUS_ADDR, 0);
+
+ return req;
+}
+
+static void
+mmhub_v3_0_2_print_l2_protection_fault_status(struct amdgpu_device *adev,
+ uint32_t status)
+{
+ uint32_t cid, rw;
+ const char *mmhub_cid = NULL;
+
+ cid = REG_GET_FIELD(status,
+ MMVM_L2_PROTECTION_FAULT_STATUS, CID);
+ rw = REG_GET_FIELD(status,
+ MMVM_L2_PROTECTION_FAULT_STATUS, RW);
+
+ dev_err(adev->dev,
+ "MMVM_L2_PROTECTION_FAULT_STATUS:0x%08X\n",
+ status);
+
+ mmhub_cid = mmhub_client_ids_v3_0_2[cid][rw];
+ dev_err(adev->dev, "\t Faulty UTCL2 client ID: %s (0x%x)\n",
+ mmhub_cid ? mmhub_cid : "unknown", cid);
+ dev_err(adev->dev, "\t MORE_FAULTS: 0x%lx\n",
+ REG_GET_FIELD(status,
+ MMVM_L2_PROTECTION_FAULT_STATUS, MORE_FAULTS));
+ dev_err(adev->dev, "\t WALKER_ERROR: 0x%lx\n",
+ REG_GET_FIELD(status,
+ MMVM_L2_PROTECTION_FAULT_STATUS, WALKER_ERROR));
+ dev_err(adev->dev, "\t PERMISSION_FAULTS: 0x%lx\n",
+ REG_GET_FIELD(status,
+ MMVM_L2_PROTECTION_FAULT_STATUS, PERMISSION_FAULTS));
+ dev_err(adev->dev, "\t MAPPING_ERROR: 0x%lx\n",
+ REG_GET_FIELD(status,
+ MMVM_L2_PROTECTION_FAULT_STATUS, MAPPING_ERROR));
+ dev_err(adev->dev, "\t RW: 0x%x\n", rw);
+}
+
+static void mmhub_v3_0_2_setup_vm_pt_regs(struct amdgpu_device *adev, uint32_t vmid,
+ uint64_t page_table_base)
+{
+ struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_MMHUB0(0)];
+
+ WREG32_SOC15_OFFSET(MMHUB, 0, regMMVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_LO32,
+ hub->ctx_addr_distance * vmid,
+ lower_32_bits(page_table_base));
+
+ WREG32_SOC15_OFFSET(MMHUB, 0, regMMVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_HI32,
+ hub->ctx_addr_distance * vmid,
+ upper_32_bits(page_table_base));
+}
+
+static void mmhub_v3_0_2_init_gart_aperture_regs(struct amdgpu_device *adev)
+{
+ uint64_t pt_base = amdgpu_gmc_pd_addr(adev->gart.bo);
+
+ mmhub_v3_0_2_setup_vm_pt_regs(adev, 0, pt_base);
+
+ WREG32_SOC15(MMHUB, 0, regMMVM_CONTEXT0_PAGE_TABLE_START_ADDR_LO32,
+ (u32)(adev->gmc.gart_start >> 12));
+ WREG32_SOC15(MMHUB, 0, regMMVM_CONTEXT0_PAGE_TABLE_START_ADDR_HI32,
+ (u32)(adev->gmc.gart_start >> 44));
+
+ WREG32_SOC15(MMHUB, 0, regMMVM_CONTEXT0_PAGE_TABLE_END_ADDR_LO32,
+ (u32)(adev->gmc.gart_end >> 12));
+ WREG32_SOC15(MMHUB, 0, regMMVM_CONTEXT0_PAGE_TABLE_END_ADDR_HI32,
+ (u32)(adev->gmc.gart_end >> 44));
+}
+
+static void mmhub_v3_0_2_init_system_aperture_regs(struct amdgpu_device *adev)
+{
+ uint64_t value;
+ uint32_t tmp;
+
+ /* Program the AGP BAR */
+ WREG32_SOC15(MMHUB, 0, regMMMC_VM_AGP_BASE, 0);
+ WREG32_SOC15(MMHUB, 0, regMMMC_VM_AGP_BOT, adev->gmc.agp_start >> 24);
+ WREG32_SOC15(MMHUB, 0, regMMMC_VM_AGP_TOP, adev->gmc.agp_end >> 24);
+
+ if (!amdgpu_sriov_vf(adev)) {
+ /*
+ * the new L1 policy will block SRIOV guest from writing
+ * these regs, and they will be programed at host.
+ * so skip programing these regs.
+ */
+ /* Program the system aperture low logical page number. */
+ WREG32_SOC15(MMHUB, 0, regMMMC_VM_SYSTEM_APERTURE_LOW_ADDR,
+ min(adev->gmc.fb_start, adev->gmc.agp_start) >> 18);
+ WREG32_SOC15(MMHUB, 0, regMMMC_VM_SYSTEM_APERTURE_HIGH_ADDR,
+ max(adev->gmc.fb_end, adev->gmc.agp_end) >> 18);
+ }
+
+ /* Set default page address. */
+ value = amdgpu_gmc_vram_mc2pa(adev, adev->mem_scratch.gpu_addr);
+ WREG32_SOC15(MMHUB, 0, regMMMC_VM_SYSTEM_APERTURE_DEFAULT_ADDR_LSB,
+ (u32)(value >> 12));
+ WREG32_SOC15(MMHUB, 0, regMMMC_VM_SYSTEM_APERTURE_DEFAULT_ADDR_MSB,
+ (u32)(value >> 44));
+
+ /* Program "protection fault". */
+ WREG32_SOC15(MMHUB, 0, regMMVM_L2_PROTECTION_FAULT_DEFAULT_ADDR_LO32,
+ (u32)(adev->dummy_page_addr >> 12));
+ WREG32_SOC15(MMHUB, 0, regMMVM_L2_PROTECTION_FAULT_DEFAULT_ADDR_HI32,
+ (u32)((u64)adev->dummy_page_addr >> 44));
+
+ tmp = RREG32_SOC15(MMHUB, 0, regMMVM_L2_PROTECTION_FAULT_CNTL2);
+ tmp = REG_SET_FIELD(tmp, MMVM_L2_PROTECTION_FAULT_CNTL2,
+ ACTIVE_PAGE_MIGRATION_PTE_READ_RETRY, 1);
+ WREG32_SOC15(MMHUB, 0, regMMVM_L2_PROTECTION_FAULT_CNTL2, tmp);
+}
+
+static void mmhub_v3_0_2_init_tlb_regs(struct amdgpu_device *adev)
+{
+ uint32_t tmp;
+
+ /* Setup TLB control */
+ tmp = RREG32_SOC15(MMHUB, 0, regMMMC_VM_MX_L1_TLB_CNTL);
+
+ tmp = REG_SET_FIELD(tmp, MMMC_VM_MX_L1_TLB_CNTL, ENABLE_L1_TLB, 1);
+ tmp = REG_SET_FIELD(tmp, MMMC_VM_MX_L1_TLB_CNTL, SYSTEM_ACCESS_MODE, 3);
+ tmp = REG_SET_FIELD(tmp, MMMC_VM_MX_L1_TLB_CNTL,
+ ENABLE_ADVANCED_DRIVER_MODEL, 1);
+ tmp = REG_SET_FIELD(tmp, MMMC_VM_MX_L1_TLB_CNTL,
+ SYSTEM_APERTURE_UNMAPPED_ACCESS, 0);
+ tmp = REG_SET_FIELD(tmp, MMMC_VM_MX_L1_TLB_CNTL, ECO_BITS, 0);
+ tmp = REG_SET_FIELD(tmp, MMMC_VM_MX_L1_TLB_CNTL,
+ MTYPE, MTYPE_UC); /* UC, uncached */
+
+ WREG32_SOC15(MMHUB, 0, regMMMC_VM_MX_L1_TLB_CNTL, tmp);
+}
+
+static void mmhub_v3_0_2_init_cache_regs(struct amdgpu_device *adev)
+{
+ uint32_t tmp;
+
+ /* These registers are not accessible to VF-SRIOV.
+ * The PF will program them instead.
+ */
+ if (amdgpu_sriov_vf(adev))
+ return;
+
+ /* Setup L2 cache */
+ tmp = RREG32_SOC15(MMHUB, 0, regMMVM_L2_CNTL);
+ tmp = REG_SET_FIELD(tmp, MMVM_L2_CNTL, ENABLE_L2_CACHE, 1);
+ tmp = REG_SET_FIELD(tmp, MMVM_L2_CNTL, ENABLE_L2_FRAGMENT_PROCESSING, 0);
+ tmp = REG_SET_FIELD(tmp, MMVM_L2_CNTL,
+ ENABLE_DEFAULT_PAGE_OUT_TO_SYSTEM_MEMORY, 1);
+ /* XXX for emulation, Refer to closed source code.*/
+ tmp = REG_SET_FIELD(tmp, MMVM_L2_CNTL, L2_PDE0_CACHE_TAG_GENERATION_MODE,
+ 0);
+ tmp = REG_SET_FIELD(tmp, MMVM_L2_CNTL, PDE_FAULT_CLASSIFICATION, 0);
+ tmp = REG_SET_FIELD(tmp, MMVM_L2_CNTL, CONTEXT1_IDENTITY_ACCESS_MODE, 1);
+ tmp = REG_SET_FIELD(tmp, MMVM_L2_CNTL, IDENTITY_MODE_FRAGMENT_SIZE, 0);
+ WREG32_SOC15(MMHUB, 0, regMMVM_L2_CNTL, tmp);
+
+ tmp = RREG32_SOC15(MMHUB, 0, regMMVM_L2_CNTL2);
+ tmp = REG_SET_FIELD(tmp, MMVM_L2_CNTL2, INVALIDATE_ALL_L1_TLBS, 1);
+ tmp = REG_SET_FIELD(tmp, MMVM_L2_CNTL2, INVALIDATE_L2_CACHE, 1);
+ WREG32_SOC15(MMHUB, 0, regMMVM_L2_CNTL2, tmp);
+
+ tmp = regMMVM_L2_CNTL3_DEFAULT;
+ if (adev->gmc.translate_further) {
+ tmp = REG_SET_FIELD(tmp, MMVM_L2_CNTL3, BANK_SELECT, 12);
+ tmp = REG_SET_FIELD(tmp, MMVM_L2_CNTL3,
+ L2_CACHE_BIGK_FRAGMENT_SIZE, 9);
+ } else {
+ tmp = REG_SET_FIELD(tmp, MMVM_L2_CNTL3, BANK_SELECT, 9);
+ tmp = REG_SET_FIELD(tmp, MMVM_L2_CNTL3,
+ L2_CACHE_BIGK_FRAGMENT_SIZE, 6);
+ }
+ WREG32_SOC15(MMHUB, 0, regMMVM_L2_CNTL3, tmp);
+
+ tmp = regMMVM_L2_CNTL4_DEFAULT;
+ tmp = REG_SET_FIELD(tmp, MMVM_L2_CNTL4, VMC_TAP_PDE_REQUEST_PHYSICAL, 0);
+ tmp = REG_SET_FIELD(tmp, MMVM_L2_CNTL4, VMC_TAP_PTE_REQUEST_PHYSICAL, 0);
+ WREG32_SOC15(MMHUB, 0, regMMVM_L2_CNTL4, tmp);
+
+ tmp = regMMVM_L2_CNTL5_DEFAULT;
+ tmp = REG_SET_FIELD(tmp, MMVM_L2_CNTL5, L2_CACHE_SMALLK_FRAGMENT_SIZE, 0);
+ WREG32_SOC15(MMHUB, 0, regMMVM_L2_CNTL5, tmp);
+}
+
+static void mmhub_v3_0_2_enable_system_domain(struct amdgpu_device *adev)
+{
+ uint32_t tmp;
+
+ tmp = RREG32_SOC15(MMHUB, 0, regMMVM_CONTEXT0_CNTL);
+ tmp = REG_SET_FIELD(tmp, MMVM_CONTEXT0_CNTL, ENABLE_CONTEXT, 1);
+ tmp = REG_SET_FIELD(tmp, MMVM_CONTEXT0_CNTL, PAGE_TABLE_DEPTH, 0);
+ tmp = REG_SET_FIELD(tmp, MMVM_CONTEXT0_CNTL,
+ RETRY_PERMISSION_OR_INVALID_PAGE_FAULT, 0);
+ WREG32_SOC15(MMHUB, 0, regMMVM_CONTEXT0_CNTL, tmp);
+}
+
+static void mmhub_v3_0_2_disable_identity_aperture(struct amdgpu_device *adev)
+{
+ /* These registers are not accessible to VF-SRIOV.
+ * The PF will program them instead.
+ */
+ if (amdgpu_sriov_vf(adev))
+ return;
+
+ WREG32_SOC15(MMHUB, 0,
+ regMMVM_L2_CONTEXT1_IDENTITY_APERTURE_LOW_ADDR_LO32,
+ 0xFFFFFFFF);
+ WREG32_SOC15(MMHUB, 0,
+ regMMVM_L2_CONTEXT1_IDENTITY_APERTURE_LOW_ADDR_HI32,
+ 0x0000000F);
+
+ WREG32_SOC15(MMHUB, 0,
+ regMMVM_L2_CONTEXT1_IDENTITY_APERTURE_HIGH_ADDR_LO32, 0);
+ WREG32_SOC15(MMHUB, 0,
+ regMMVM_L2_CONTEXT1_IDENTITY_APERTURE_HIGH_ADDR_HI32, 0);
+
+ WREG32_SOC15(MMHUB, 0, regMMVM_L2_CONTEXT_IDENTITY_PHYSICAL_OFFSET_LO32,
+ 0);
+ WREG32_SOC15(MMHUB, 0, regMMVM_L2_CONTEXT_IDENTITY_PHYSICAL_OFFSET_HI32,
+ 0);
+}
+
+static void mmhub_v3_0_2_setup_vmid_config(struct amdgpu_device *adev)
+{
+ struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_MMHUB0(0)];
+ int i;
+ uint32_t tmp;
+
+ for (i = 0; i <= 14; i++) {
+ tmp = RREG32_SOC15_OFFSET(MMHUB, 0, regMMVM_CONTEXT1_CNTL, i * hub->ctx_distance);
+ tmp = REG_SET_FIELD(tmp, MMVM_CONTEXT1_CNTL, ENABLE_CONTEXT, 1);
+ tmp = REG_SET_FIELD(tmp, MMVM_CONTEXT1_CNTL, PAGE_TABLE_DEPTH,
+ adev->vm_manager.num_level);
+ tmp = REG_SET_FIELD(tmp, MMVM_CONTEXT1_CNTL,
+ RANGE_PROTECTION_FAULT_ENABLE_DEFAULT, 1);
+ tmp = REG_SET_FIELD(tmp, MMVM_CONTEXT1_CNTL,
+ DUMMY_PAGE_PROTECTION_FAULT_ENABLE_DEFAULT,
+ 1);
+ tmp = REG_SET_FIELD(tmp, MMVM_CONTEXT1_CNTL,
+ PDE0_PROTECTION_FAULT_ENABLE_DEFAULT, 1);
+ tmp = REG_SET_FIELD(tmp, MMVM_CONTEXT1_CNTL,
+ VALID_PROTECTION_FAULT_ENABLE_DEFAULT, 1);
+ tmp = REG_SET_FIELD(tmp, MMVM_CONTEXT1_CNTL,
+ READ_PROTECTION_FAULT_ENABLE_DEFAULT, 1);
+ tmp = REG_SET_FIELD(tmp, MMVM_CONTEXT1_CNTL,
+ WRITE_PROTECTION_FAULT_ENABLE_DEFAULT, 1);
+ tmp = REG_SET_FIELD(tmp, MMVM_CONTEXT1_CNTL,
+ EXECUTE_PROTECTION_FAULT_ENABLE_DEFAULT, 1);
+ tmp = REG_SET_FIELD(tmp, MMVM_CONTEXT1_CNTL,
+ PAGE_TABLE_BLOCK_SIZE,
+ adev->vm_manager.block_size - 9);
+ /* Send no-retry XNACK on fault to suppress VM fault storm. */
+ tmp = REG_SET_FIELD(tmp, MMVM_CONTEXT1_CNTL,
+ RETRY_PERMISSION_OR_INVALID_PAGE_FAULT,
+ !amdgpu_noretry);
+ WREG32_SOC15_OFFSET(MMHUB, 0, regMMVM_CONTEXT1_CNTL,
+ i * hub->ctx_distance, tmp);
+ WREG32_SOC15_OFFSET(MMHUB, 0, regMMVM_CONTEXT1_PAGE_TABLE_START_ADDR_LO32,
+ i * hub->ctx_addr_distance, 0);
+ WREG32_SOC15_OFFSET(MMHUB, 0, regMMVM_CONTEXT1_PAGE_TABLE_START_ADDR_HI32,
+ i * hub->ctx_addr_distance, 0);
+ WREG32_SOC15_OFFSET(MMHUB, 0, regMMVM_CONTEXT1_PAGE_TABLE_END_ADDR_LO32,
+ i * hub->ctx_addr_distance,
+ lower_32_bits(adev->vm_manager.max_pfn - 1));
+ WREG32_SOC15_OFFSET(MMHUB, 0, regMMVM_CONTEXT1_PAGE_TABLE_END_ADDR_HI32,
+ i * hub->ctx_addr_distance,
+ upper_32_bits(adev->vm_manager.max_pfn - 1));
+ }
+
+ hub->vm_cntx_cntl = tmp;
+}
+
+static void mmhub_v3_0_2_program_invalidation(struct amdgpu_device *adev)
+{
+ struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_MMHUB0(0)];
+ unsigned i;
+
+ for (i = 0; i < 18; ++i) {
+ WREG32_SOC15_OFFSET(MMHUB, 0, regMMVM_INVALIDATE_ENG0_ADDR_RANGE_LO32,
+ i * hub->eng_addr_distance, 0xffffffff);
+ WREG32_SOC15_OFFSET(MMHUB, 0, regMMVM_INVALIDATE_ENG0_ADDR_RANGE_HI32,
+ i * hub->eng_addr_distance, 0x1f);
+ }
+}
+
+static int mmhub_v3_0_2_gart_enable(struct amdgpu_device *adev)
+{
+ /* GART Enable. */
+ mmhub_v3_0_2_init_gart_aperture_regs(adev);
+ mmhub_v3_0_2_init_system_aperture_regs(adev);
+ mmhub_v3_0_2_init_tlb_regs(adev);
+ mmhub_v3_0_2_init_cache_regs(adev);
+
+ mmhub_v3_0_2_enable_system_domain(adev);
+ mmhub_v3_0_2_disable_identity_aperture(adev);
+ mmhub_v3_0_2_setup_vmid_config(adev);
+ mmhub_v3_0_2_program_invalidation(adev);
+
+ return 0;
+}
+
+static void mmhub_v3_0_2_gart_disable(struct amdgpu_device *adev)
+{
+ struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_MMHUB0(0)];
+ u32 tmp;
+ u32 i;
+
+ /* Disable all tables */
+ for (i = 0; i < 16; i++)
+ WREG32_SOC15_OFFSET(MMHUB, 0, regMMVM_CONTEXT0_CNTL,
+ i * hub->ctx_distance, 0);
+
+ /* Setup TLB control */
+ tmp = RREG32_SOC15(MMHUB, 0, regMMMC_VM_MX_L1_TLB_CNTL);
+ tmp = REG_SET_FIELD(tmp, MMMC_VM_MX_L1_TLB_CNTL, ENABLE_L1_TLB, 0);
+ tmp = REG_SET_FIELD(tmp, MMMC_VM_MX_L1_TLB_CNTL,
+ ENABLE_ADVANCED_DRIVER_MODEL, 0);
+ WREG32_SOC15(MMHUB, 0, regMMMC_VM_MX_L1_TLB_CNTL, tmp);
+
+ /* Setup L2 cache */
+ tmp = RREG32_SOC15(MMHUB, 0, regMMVM_L2_CNTL);
+ tmp = REG_SET_FIELD(tmp, MMVM_L2_CNTL, ENABLE_L2_CACHE, 0);
+ WREG32_SOC15(MMHUB, 0, regMMVM_L2_CNTL, tmp);
+ WREG32_SOC15(MMHUB, 0, regMMVM_L2_CNTL3, 0);
+}
+
+/**
+ * mmhub_v3_0_2_set_fault_enable_default - update GART/VM fault handling
+ *
+ * @adev: amdgpu_device pointer
+ * @value: true redirects VM faults to the default page
+ */
+static void mmhub_v3_0_2_set_fault_enable_default(struct amdgpu_device *adev, bool value)
+{
+ u32 tmp;
+
+ /* These registers are not accessible to VF-SRIOV.
+ * The PF will program them instead.
+ */
+ if (amdgpu_sriov_vf(adev))
+ return;
+
+ tmp = RREG32_SOC15(MMHUB, 0, regMMVM_L2_PROTECTION_FAULT_CNTL);
+ tmp = REG_SET_FIELD(tmp, MMVM_L2_PROTECTION_FAULT_CNTL,
+ RANGE_PROTECTION_FAULT_ENABLE_DEFAULT, value);
+ tmp = REG_SET_FIELD(tmp, MMVM_L2_PROTECTION_FAULT_CNTL,
+ PDE0_PROTECTION_FAULT_ENABLE_DEFAULT, value);
+ tmp = REG_SET_FIELD(tmp, MMVM_L2_PROTECTION_FAULT_CNTL,
+ PDE1_PROTECTION_FAULT_ENABLE_DEFAULT, value);
+ tmp = REG_SET_FIELD(tmp, MMVM_L2_PROTECTION_FAULT_CNTL,
+ PDE2_PROTECTION_FAULT_ENABLE_DEFAULT, value);
+ tmp = REG_SET_FIELD(tmp, MMVM_L2_PROTECTION_FAULT_CNTL,
+ TRANSLATE_FURTHER_PROTECTION_FAULT_ENABLE_DEFAULT,
+ value);
+ tmp = REG_SET_FIELD(tmp, MMVM_L2_PROTECTION_FAULT_CNTL,
+ NACK_PROTECTION_FAULT_ENABLE_DEFAULT, value);
+ tmp = REG_SET_FIELD(tmp, MMVM_L2_PROTECTION_FAULT_CNTL,
+ DUMMY_PAGE_PROTECTION_FAULT_ENABLE_DEFAULT, value);
+ tmp = REG_SET_FIELD(tmp, MMVM_L2_PROTECTION_FAULT_CNTL,
+ VALID_PROTECTION_FAULT_ENABLE_DEFAULT, value);
+ tmp = REG_SET_FIELD(tmp, MMVM_L2_PROTECTION_FAULT_CNTL,
+ READ_PROTECTION_FAULT_ENABLE_DEFAULT, value);
+ tmp = REG_SET_FIELD(tmp, MMVM_L2_PROTECTION_FAULT_CNTL,
+ WRITE_PROTECTION_FAULT_ENABLE_DEFAULT, value);
+ tmp = REG_SET_FIELD(tmp, MMVM_L2_PROTECTION_FAULT_CNTL,
+ EXECUTE_PROTECTION_FAULT_ENABLE_DEFAULT, value);
+ if (!value) {
+ tmp = REG_SET_FIELD(tmp, MMVM_L2_PROTECTION_FAULT_CNTL,
+ CRASH_ON_NO_RETRY_FAULT, 1);
+ tmp = REG_SET_FIELD(tmp, MMVM_L2_PROTECTION_FAULT_CNTL,
+ CRASH_ON_RETRY_FAULT, 1);
+ }
+ WREG32_SOC15(MMHUB, 0, regMMVM_L2_PROTECTION_FAULT_CNTL, tmp);
+}
+
+static const struct amdgpu_vmhub_funcs mmhub_v3_0_2_vmhub_funcs = {
+ .print_l2_protection_fault_status = mmhub_v3_0_2_print_l2_protection_fault_status,
+ .get_invalidate_req = mmhub_v3_0_2_get_invalidate_req,
+};
+
+static void mmhub_v3_0_2_init(struct amdgpu_device *adev)
+{
+ struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_MMHUB0(0)];
+
+ hub->ctx0_ptb_addr_lo32 =
+ SOC15_REG_OFFSET(MMHUB, 0,
+ regMMVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_LO32);
+ hub->ctx0_ptb_addr_hi32 =
+ SOC15_REG_OFFSET(MMHUB, 0,
+ regMMVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_HI32);
+ hub->vm_inv_eng0_sem =
+ SOC15_REG_OFFSET(MMHUB, 0, regMMVM_INVALIDATE_ENG0_SEM);
+ hub->vm_inv_eng0_req =
+ SOC15_REG_OFFSET(MMHUB, 0, regMMVM_INVALIDATE_ENG0_REQ);
+ hub->vm_inv_eng0_ack =
+ SOC15_REG_OFFSET(MMHUB, 0, regMMVM_INVALIDATE_ENG0_ACK);
+ hub->vm_context0_cntl =
+ SOC15_REG_OFFSET(MMHUB, 0, regMMVM_CONTEXT0_CNTL);
+ hub->vm_l2_pro_fault_status =
+ SOC15_REG_OFFSET(MMHUB, 0, regMMVM_L2_PROTECTION_FAULT_STATUS);
+ hub->vm_l2_pro_fault_cntl =
+ SOC15_REG_OFFSET(MMHUB, 0, regMMVM_L2_PROTECTION_FAULT_CNTL);
+
+ hub->ctx_distance = regMMVM_CONTEXT1_CNTL - regMMVM_CONTEXT0_CNTL;
+ hub->ctx_addr_distance = regMMVM_CONTEXT1_PAGE_TABLE_BASE_ADDR_LO32 -
+ regMMVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_LO32;
+ hub->eng_distance = regMMVM_INVALIDATE_ENG1_REQ -
+ regMMVM_INVALIDATE_ENG0_REQ;
+ hub->eng_addr_distance = regMMVM_INVALIDATE_ENG1_ADDR_RANGE_LO32 -
+ regMMVM_INVALIDATE_ENG0_ADDR_RANGE_LO32;
+
+ hub->vm_cntx_cntl_vm_fault = MMVM_CONTEXT1_CNTL__RANGE_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK |
+ MMVM_CONTEXT1_CNTL__DUMMY_PAGE_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK |
+ MMVM_CONTEXT1_CNTL__PDE0_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK |
+ MMVM_CONTEXT1_CNTL__VALID_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK |
+ MMVM_CONTEXT1_CNTL__READ_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK |
+ MMVM_CONTEXT1_CNTL__WRITE_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK |
+ MMVM_CONTEXT1_CNTL__EXECUTE_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK;
+
+ hub->vm_l2_bank_select_reserved_cid2 =
+ SOC15_REG_OFFSET(MMHUB, 0, regMMVM_L2_BANK_SELECT_RESERVED_CID2);
+
+ hub->vmhub_funcs = &mmhub_v3_0_2_vmhub_funcs;
+}
+
+static u64 mmhub_v3_0_2_get_fb_location(struct amdgpu_device *adev)
+{
+ u64 base;
+
+ base = RREG32_SOC15(MMHUB, 0, regMMMC_VM_FB_LOCATION_BASE);
+ base &= MMMC_VM_FB_LOCATION_BASE__FB_BASE_MASK;
+ base <<= 24;
+
+ return base;
+}
+
+static u64 mmhub_v3_0_2_get_mc_fb_offset(struct amdgpu_device *adev)
+{
+ return (u64)RREG32_SOC15(MMHUB, 0, regMMMC_VM_FB_OFFSET) << 24;
+}
+
+static void mmhub_v3_0_2_update_medium_grain_clock_gating(struct amdgpu_device *adev,
+ bool enable)
+{
+ //TODO
+}
+
+static void mmhub_v3_0_2_update_medium_grain_light_sleep(struct amdgpu_device *adev,
+ bool enable)
+{
+ //TODO
+}
+
+static int mmhub_v3_0_2_set_clockgating(struct amdgpu_device *adev,
+ enum amd_clockgating_state state)
+{
+ if (amdgpu_sriov_vf(adev))
+ return 0;
+
+ mmhub_v3_0_2_update_medium_grain_clock_gating(adev,
+ state == AMD_CG_STATE_GATE);
+ mmhub_v3_0_2_update_medium_grain_light_sleep(adev,
+ state == AMD_CG_STATE_GATE);
+ return 0;
+}
+
+static void mmhub_v3_0_2_get_clockgating(struct amdgpu_device *adev, u64 *flags)
+{
+ //TODO
+}
+
+const struct amdgpu_mmhub_funcs mmhub_v3_0_2_funcs = {
+ .init = mmhub_v3_0_2_init,
+ .get_fb_location = mmhub_v3_0_2_get_fb_location,
+ .get_mc_fb_offset = mmhub_v3_0_2_get_mc_fb_offset,
+ .gart_enable = mmhub_v3_0_2_gart_enable,
+ .set_fault_enable_default = mmhub_v3_0_2_set_fault_enable_default,
+ .gart_disable = mmhub_v3_0_2_gart_disable,
+ .set_clockgating = mmhub_v3_0_2_set_clockgating,
+ .get_clockgating = mmhub_v3_0_2_get_clockgating,
+ .setup_vm_pt_regs = mmhub_v3_0_2_setup_vm_pt_regs,
+};
diff --git a/drivers/gpu/drm/amd/amdgpu/mmhub_v3_0_2.h b/drivers/gpu/drm/amd/amdgpu/mmhub_v3_0_2.h
new file mode 100644
index 000000000000..23ad7b156cdb
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/mmhub_v3_0_2.h
@@ -0,0 +1,28 @@
+/*
+ * Copyright 2022 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+#ifndef __MMHUB_V3_0_2_H__
+#define __MMHUB_V3_0_2_H__
+
+extern const struct amdgpu_mmhub_funcs mmhub_v3_0_2_funcs;
+
+#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/mmhub_v3_3.c b/drivers/gpu/drm/amd/amdgpu/mmhub_v3_3.c
new file mode 100644
index 000000000000..f6fc9778bc30
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/mmhub_v3_3.c
@@ -0,0 +1,746 @@
+/*
+ * Copyright 2023 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#include "amdgpu.h"
+#include "mmhub_v3_3.h"
+
+#include "mmhub/mmhub_3_3_0_offset.h"
+#include "mmhub/mmhub_3_3_0_sh_mask.h"
+
+#include "navi10_enum.h"
+#include "soc15_common.h"
+
+#define regMMVM_L2_CNTL3_DEFAULT 0x80100007
+#define regMMVM_L2_CNTL4_DEFAULT 0x000000c1
+#define regMMVM_L2_CNTL5_DEFAULT 0x00003fe0
+#define regDAGB0_L1TLB_REG_RW_3_3 0x00a4
+#define regDAGB0_L1TLB_REG_RW_3_3_BASE_IDX 1
+#define regDAGB1_L1TLB_REG_RW_3_3 0x0163
+#define regDAGB1_L1TLB_REG_RW_3_3_BASE_IDX 1
+
+static const char *mmhub_client_ids_v3_3[][2] = {
+ [0][0] = "VMC",
+ [1][0] = "ISPXT",
+ [2][0] = "ISPIXT",
+ [4][0] = "DCEDMC",
+ [6][0] = "MP0",
+ [7][0] = "MP1",
+ [8][0] = "MPM",
+ [9][0] = "ISPPDPRD",
+ [10][0] = "ISPCSTATRD",
+ [11][0] = "ISPBYRPRD",
+ [12][0] = "ISPRGBPRD",
+ [13][0] = "ISPMCFPRD",
+ [14][0] = "ISPMCFPRD1",
+ [15][0] = "ISPYUVPRD",
+ [16][0] = "ISPMCSCRD",
+ [17][0] = "ISPGDCRD",
+ [18][0] = "ISPLMERD",
+ [22][0] = "ISPXT1",
+ [23][0] = "ISPIXT1",
+ [24][0] = "HDP",
+ [25][0] = "LSDMA",
+ [26][0] = "JPEG",
+ [27][0] = "VPE",
+ [28][0] = "VSCH",
+ [29][0] = "VCNU",
+ [30][0] = "VCN",
+ [1][1] = "ISPXT",
+ [2][1] = "ISPIXT",
+ [3][1] = "DCEDWB",
+ [4][1] = "DCEDMC",
+ [5][1] = "ISPCSISWR",
+ [6][1] = "MP0",
+ [7][1] = "MP1",
+ [8][1] = "MPM",
+ [9][1] = "ISPPDPWR",
+ [10][1] = "ISPCSTATWR",
+ [11][1] = "ISPBYRPWR",
+ [12][1] = "ISPRGBPWR",
+ [13][1] = "ISPMCFPWR",
+ [14][1] = "ISPMWR0",
+ [15][1] = "ISPYUVPWR",
+ [16][1] = "ISPMCSCWR",
+ [17][1] = "ISPGDCWR",
+ [18][1] = "ISPLMEWR",
+ [20][1] = "ISPMWR2",
+ [21][1] = "OSSSYS",
+ [22][1] = "ISPXT1",
+ [23][1] = "ISPIXT1",
+ [24][1] = "HDP",
+ [25][1] = "LSDMA",
+ [26][1] = "JPEG",
+ [27][1] = "VPE",
+ [28][1] = "VSCH",
+ [29][1] = "VCNU",
+ [30][1] = "VCN",
+};
+
+static const char *mmhub_client_ids_v3_3_1[][2] = {
+ [0][0] = "VMC",
+ [4][0] = "DCEDMC",
+ [6][0] = "MP0",
+ [7][0] = "MP1",
+ [8][0] = "MPM",
+ [24][0] = "HDP",
+ [25][0] = "LSDMA",
+ [26][0] = "JPEG0",
+ [27][0] = "VPE0",
+ [28][0] = "VSCH",
+ [29][0] = "VCNU0",
+ [30][0] = "VCN0",
+ [32+1][0] = "ISPXT",
+ [32+2][0] = "ISPIXT",
+ [32+9][0] = "ISPPDPRD",
+ [32+10][0] = "ISPCSTATRD",
+ [32+11][0] = "ISPBYRPRD",
+ [32+12][0] = "ISPRGBPRD",
+ [32+13][0] = "ISPMCFPRD",
+ [32+14][0] = "ISPMCFPRD1",
+ [32+15][0] = "ISPYUVPRD",
+ [32+16][0] = "ISPMCSCRD",
+ [32+17][0] = "ISPGDCRD",
+ [32+18][0] = "ISPLMERD",
+ [32+22][0] = "ISPXT1",
+ [32+23][0] = "ISPIXT1",
+ [32+26][0] = "JPEG1",
+ [32+27][0] = "VPE1",
+ [32+29][0] = "VCNU1",
+ [32+30][0] = "VCN1",
+ [3][1] = "DCEDWB",
+ [4][1] = "DCEDMC",
+ [6][1] = "MP0",
+ [7][1] = "MP1",
+ [8][1] = "MPM",
+ [21][1] = "OSSSYS",
+ [24][1] = "HDP",
+ [25][1] = "LSDMA",
+ [26][1] = "JPEG0",
+ [27][1] = "VPE0",
+ [28][1] = "VSCH",
+ [29][1] = "VCNU0",
+ [30][1] = "VCN0",
+ [32+1][1] = "ISPXT",
+ [32+2][1] = "ISPIXT",
+ [32+5][1] = "ISPCSISWR",
+ [32+9][1] = "ISPPDPWR",
+ [32+10][1] = "ISPCSTATWR",
+ [32+11][1] = "ISPBYRPWR",
+ [32+12][1] = "ISPRGBPWR",
+ [32+13][1] = "ISPMCFPWR",
+ [32+14][1] = "ISPMWR0",
+ [32+15][1] = "ISPYUVPWR",
+ [32+16][1] = "ISPMCSCWR",
+ [32+17][1] = "ISPGDCWR",
+ [32+18][1] = "ISPLMEWR",
+ [32+19][1] = "ISPMWR1",
+ [32+20][1] = "ISPMWR2",
+ [32+22][1] = "ISPXT1",
+ [32+23][1] = "ISPIXT1",
+ [32+26][1] = "JPEG1",
+ [32+27][1] = "VPE1",
+ [32+29][1] = "VCNU1",
+ [32+30][1] = "VCN1",
+};
+
+static uint32_t mmhub_v3_3_get_invalidate_req(unsigned int vmid,
+ uint32_t flush_type)
+{
+ u32 req = 0;
+
+ /* invalidate using legacy mode on vmid*/
+ req = REG_SET_FIELD(req, MMVM_INVALIDATE_ENG0_REQ,
+ PER_VMID_INVALIDATE_REQ, 1 << vmid);
+ req = REG_SET_FIELD(req, MMVM_INVALIDATE_ENG0_REQ, FLUSH_TYPE, flush_type ? : 1);
+ req = REG_SET_FIELD(req, MMVM_INVALIDATE_ENG0_REQ, INVALIDATE_L2_PTES, 1);
+ req = REG_SET_FIELD(req, MMVM_INVALIDATE_ENG0_REQ, INVALIDATE_L2_PDE0, 1);
+ req = REG_SET_FIELD(req, MMVM_INVALIDATE_ENG0_REQ, INVALIDATE_L2_PDE1, 1);
+ req = REG_SET_FIELD(req, MMVM_INVALIDATE_ENG0_REQ, INVALIDATE_L2_PDE2, 1);
+ req = REG_SET_FIELD(req, MMVM_INVALIDATE_ENG0_REQ, INVALIDATE_L1_PTES, 1);
+ req = REG_SET_FIELD(req, MMVM_INVALIDATE_ENG0_REQ,
+ CLEAR_PROTECTION_FAULT_STATUS_ADDR, 0);
+
+ return req;
+}
+
+static void
+mmhub_v3_3_print_l2_protection_fault_status(struct amdgpu_device *adev,
+ uint32_t status)
+{
+ uint32_t cid, rw;
+ const char *mmhub_cid = NULL;
+
+ cid = REG_GET_FIELD(status,
+ MMVM_L2_PROTECTION_FAULT_STATUS, CID);
+ rw = REG_GET_FIELD(status,
+ MMVM_L2_PROTECTION_FAULT_STATUS, RW);
+
+ dev_err(adev->dev,
+ "MMVM_L2_PROTECTION_FAULT_STATUS:0x%08X\n",
+ status);
+
+ switch (amdgpu_ip_version(adev, MMHUB_HWIP, 0)) {
+ case IP_VERSION(3, 3, 0):
+ case IP_VERSION(3, 3, 2):
+ mmhub_cid = cid < ARRAY_SIZE(mmhub_client_ids_v3_3) ?
+ mmhub_client_ids_v3_3[cid][rw] :
+ cid == 0x140 ? "UMSCH" : NULL;
+ break;
+ case IP_VERSION(3, 3, 1):
+ mmhub_cid = cid < ARRAY_SIZE(mmhub_client_ids_v3_3_1) ?
+ mmhub_client_ids_v3_3_1[cid][rw] :
+ cid == 0x140 ? "UMSCH" : NULL;
+ break;
+ default:
+ mmhub_cid = NULL;
+ break;
+ }
+
+ dev_err(adev->dev, "\t Faulty UTCL2 client ID: %s (0x%x)\n",
+ mmhub_cid ? mmhub_cid : "unknown", cid);
+ dev_err(adev->dev, "\t MORE_FAULTS: 0x%lx\n",
+ REG_GET_FIELD(status,
+ MMVM_L2_PROTECTION_FAULT_STATUS, MORE_FAULTS));
+ dev_err(adev->dev, "\t WALKER_ERROR: 0x%lx\n",
+ REG_GET_FIELD(status,
+ MMVM_L2_PROTECTION_FAULT_STATUS, WALKER_ERROR));
+ dev_err(adev->dev, "\t PERMISSION_FAULTS: 0x%lx\n",
+ REG_GET_FIELD(status,
+ MMVM_L2_PROTECTION_FAULT_STATUS, PERMISSION_FAULTS));
+ dev_err(adev->dev, "\t MAPPING_ERROR: 0x%lx\n",
+ REG_GET_FIELD(status,
+ MMVM_L2_PROTECTION_FAULT_STATUS, MAPPING_ERROR));
+ dev_err(adev->dev, "\t RW: 0x%x\n", rw);
+}
+
+static void mmhub_v3_3_setup_vm_pt_regs(struct amdgpu_device *adev,
+ uint32_t vmid,
+ uint64_t page_table_base)
+{
+ struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_MMHUB0(0)];
+
+ WREG32_SOC15_OFFSET(MMHUB, 0, regMMVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_LO32,
+ hub->ctx_addr_distance * vmid,
+ lower_32_bits(page_table_base));
+
+ WREG32_SOC15_OFFSET(MMHUB, 0, regMMVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_HI32,
+ hub->ctx_addr_distance * vmid,
+ upper_32_bits(page_table_base));
+
+}
+
+static void mmhub_v3_3_init_gart_aperture_regs(struct amdgpu_device *adev)
+{
+ uint64_t pt_base = amdgpu_gmc_pd_addr(adev->gart.bo);
+
+ mmhub_v3_3_setup_vm_pt_regs(adev, 0, pt_base);
+
+ WREG32_SOC15(MMHUB, 0, regMMVM_CONTEXT0_PAGE_TABLE_START_ADDR_LO32,
+ (u32)(adev->gmc.gart_start >> 12));
+ WREG32_SOC15(MMHUB, 0, regMMVM_CONTEXT0_PAGE_TABLE_START_ADDR_HI32,
+ (u32)(adev->gmc.gart_start >> 44));
+
+ WREG32_SOC15(MMHUB, 0, regMMVM_CONTEXT0_PAGE_TABLE_END_ADDR_LO32,
+ (u32)(adev->gmc.gart_end >> 12));
+ WREG32_SOC15(MMHUB, 0, regMMVM_CONTEXT0_PAGE_TABLE_END_ADDR_HI32,
+ (u32)(adev->gmc.gart_end >> 44));
+}
+
+static void mmhub_v3_3_init_system_aperture_regs(struct amdgpu_device *adev)
+{
+ uint64_t value;
+ uint32_t tmp;
+
+ /* Program the AGP BAR */
+ WREG32_SOC15(MMHUB, 0, regMMMC_VM_AGP_BASE, 0);
+ WREG32_SOC15(MMHUB, 0, regMMMC_VM_AGP_BOT, adev->gmc.agp_start >> 24);
+ WREG32_SOC15(MMHUB, 0, regMMMC_VM_AGP_TOP, adev->gmc.agp_end >> 24);
+
+ /*
+ * the new L1 policy will block SRIOV guest from writing
+ * these regs, and they will be programed at host.
+ * so skip programing these regs.
+ */
+ /* Program the system aperture low logical page number. */
+ WREG32_SOC15(MMHUB, 0, regMMMC_VM_SYSTEM_APERTURE_LOW_ADDR,
+ min(adev->gmc.fb_start, adev->gmc.agp_start) >> 18);
+ WREG32_SOC15(MMHUB, 0, regMMMC_VM_SYSTEM_APERTURE_HIGH_ADDR,
+ max(adev->gmc.fb_end, adev->gmc.agp_end) >> 18);
+
+ /* Set default page address. */
+ value = amdgpu_gmc_vram_mc2pa(adev, adev->mem_scratch.gpu_addr);
+ WREG32_SOC15(MMHUB, 0, regMMMC_VM_SYSTEM_APERTURE_DEFAULT_ADDR_LSB,
+ (u32)(value >> 12));
+ WREG32_SOC15(MMHUB, 0, regMMMC_VM_SYSTEM_APERTURE_DEFAULT_ADDR_MSB,
+ (u32)(value >> 44));
+
+ /* Program "protection fault". */
+ WREG32_SOC15(MMHUB, 0, regMMVM_L2_PROTECTION_FAULT_DEFAULT_ADDR_LO32,
+ (u32)(adev->dummy_page_addr >> 12));
+ WREG32_SOC15(MMHUB, 0, regMMVM_L2_PROTECTION_FAULT_DEFAULT_ADDR_HI32,
+ (u32)((u64)adev->dummy_page_addr >> 44));
+
+ tmp = RREG32_SOC15(MMHUB, 0, regMMVM_L2_PROTECTION_FAULT_CNTL2);
+ tmp = REG_SET_FIELD(tmp, MMVM_L2_PROTECTION_FAULT_CNTL2,
+ ACTIVE_PAGE_MIGRATION_PTE_READ_RETRY, 1);
+ WREG32_SOC15(MMHUB, 0, regMMVM_L2_PROTECTION_FAULT_CNTL2, tmp);
+}
+
+static void mmhub_v3_3_init_tlb_regs(struct amdgpu_device *adev)
+{
+ uint32_t tmp;
+
+ /* Setup TLB control */
+ tmp = RREG32_SOC15(MMHUB, 0, regMMMC_VM_MX_L1_TLB_CNTL);
+
+ tmp = REG_SET_FIELD(tmp, MMMC_VM_MX_L1_TLB_CNTL, ENABLE_L1_TLB, 1);
+ tmp = REG_SET_FIELD(tmp, MMMC_VM_MX_L1_TLB_CNTL, SYSTEM_ACCESS_MODE, 3);
+ tmp = REG_SET_FIELD(tmp, MMMC_VM_MX_L1_TLB_CNTL,
+ ENABLE_ADVANCED_DRIVER_MODEL, 1);
+ tmp = REG_SET_FIELD(tmp, MMMC_VM_MX_L1_TLB_CNTL,
+ SYSTEM_APERTURE_UNMAPPED_ACCESS, 0);
+ tmp = REG_SET_FIELD(tmp, MMMC_VM_MX_L1_TLB_CNTL, ECO_BITS, 0);
+ tmp = REG_SET_FIELD(tmp, MMMC_VM_MX_L1_TLB_CNTL,
+ MTYPE, MTYPE_UC); /* UC, uncached */
+
+ WREG32_SOC15(MMHUB, 0, regMMMC_VM_MX_L1_TLB_CNTL, tmp);
+}
+
+static void mmhub_v3_3_init_cache_regs(struct amdgpu_device *adev)
+{
+ uint32_t tmp;
+
+ /* Setup L2 cache */
+ tmp = RREG32_SOC15(MMHUB, 0, regMMVM_L2_CNTL);
+ tmp = REG_SET_FIELD(tmp, MMVM_L2_CNTL, ENABLE_L2_CACHE, 1);
+ tmp = REG_SET_FIELD(tmp, MMVM_L2_CNTL, ENABLE_L2_FRAGMENT_PROCESSING, 0);
+ tmp = REG_SET_FIELD(tmp, MMVM_L2_CNTL,
+ ENABLE_DEFAULT_PAGE_OUT_TO_SYSTEM_MEMORY, 1);
+ /* XXX for emulation, Refer to closed source code.*/
+ tmp = REG_SET_FIELD(tmp, MMVM_L2_CNTL, L2_PDE0_CACHE_TAG_GENERATION_MODE,
+ 0);
+ tmp = REG_SET_FIELD(tmp, MMVM_L2_CNTL, PDE_FAULT_CLASSIFICATION, 0);
+ tmp = REG_SET_FIELD(tmp, MMVM_L2_CNTL, CONTEXT1_IDENTITY_ACCESS_MODE, 1);
+ tmp = REG_SET_FIELD(tmp, MMVM_L2_CNTL, IDENTITY_MODE_FRAGMENT_SIZE, 0);
+ WREG32_SOC15(MMHUB, 0, regMMVM_L2_CNTL, tmp);
+
+ tmp = RREG32_SOC15(MMHUB, 0, regMMVM_L2_CNTL2);
+ tmp = REG_SET_FIELD(tmp, MMVM_L2_CNTL2, INVALIDATE_ALL_L1_TLBS, 1);
+ tmp = REG_SET_FIELD(tmp, MMVM_L2_CNTL2, INVALIDATE_L2_CACHE, 1);
+ WREG32_SOC15(MMHUB, 0, regMMVM_L2_CNTL2, tmp);
+
+ tmp = regMMVM_L2_CNTL3_DEFAULT;
+ if (adev->gmc.translate_further) {
+ tmp = REG_SET_FIELD(tmp, MMVM_L2_CNTL3, BANK_SELECT, 12);
+ tmp = REG_SET_FIELD(tmp, MMVM_L2_CNTL3,
+ L2_CACHE_BIGK_FRAGMENT_SIZE, 9);
+ } else {
+ tmp = REG_SET_FIELD(tmp, MMVM_L2_CNTL3, BANK_SELECT, 9);
+ tmp = REG_SET_FIELD(tmp, MMVM_L2_CNTL3,
+ L2_CACHE_BIGK_FRAGMENT_SIZE, 6);
+ }
+ WREG32_SOC15(MMHUB, 0, regMMVM_L2_CNTL3, tmp);
+
+ tmp = regMMVM_L2_CNTL4_DEFAULT;
+ tmp = REG_SET_FIELD(tmp, MMVM_L2_CNTL4, VMC_TAP_PDE_REQUEST_PHYSICAL, 0);
+ tmp = REG_SET_FIELD(tmp, MMVM_L2_CNTL4, VMC_TAP_PTE_REQUEST_PHYSICAL, 0);
+ WREG32_SOC15(MMHUB, 0, regMMVM_L2_CNTL4, tmp);
+
+ tmp = regMMVM_L2_CNTL5_DEFAULT;
+ tmp = REG_SET_FIELD(tmp, MMVM_L2_CNTL5, L2_CACHE_SMALLK_FRAGMENT_SIZE, 0);
+ WREG32_SOC15(MMHUB, 0, regMMVM_L2_CNTL5, tmp);
+}
+
+static void mmhub_v3_3_enable_system_domain(struct amdgpu_device *adev)
+{
+ uint32_t tmp;
+
+ tmp = RREG32_SOC15(MMHUB, 0, regMMVM_CONTEXT0_CNTL);
+ tmp = REG_SET_FIELD(tmp, MMVM_CONTEXT0_CNTL, ENABLE_CONTEXT, 1);
+ tmp = REG_SET_FIELD(tmp, MMVM_CONTEXT0_CNTL, PAGE_TABLE_DEPTH, 0);
+ tmp = REG_SET_FIELD(tmp, MMVM_CONTEXT0_CNTL,
+ RETRY_PERMISSION_OR_INVALID_PAGE_FAULT, 0);
+
+ WREG32_SOC15(MMHUB, 0, regMMVM_CONTEXT0_CNTL, tmp);
+}
+
+static void mmhub_v3_3_disable_identity_aperture(struct amdgpu_device *adev)
+{
+ WREG32_SOC15(MMHUB, 0,
+ regMMVM_L2_CONTEXT1_IDENTITY_APERTURE_LOW_ADDR_LO32,
+ 0xFFFFFFFF);
+ WREG32_SOC15(MMHUB, 0,
+ regMMVM_L2_CONTEXT1_IDENTITY_APERTURE_LOW_ADDR_HI32,
+ 0x0000000F);
+
+ WREG32_SOC15(MMHUB, 0,
+ regMMVM_L2_CONTEXT1_IDENTITY_APERTURE_HIGH_ADDR_LO32, 0);
+ WREG32_SOC15(MMHUB, 0,
+ regMMVM_L2_CONTEXT1_IDENTITY_APERTURE_HIGH_ADDR_HI32, 0);
+
+ WREG32_SOC15(MMHUB, 0, regMMVM_L2_CONTEXT_IDENTITY_PHYSICAL_OFFSET_LO32,
+ 0);
+ WREG32_SOC15(MMHUB, 0, regMMVM_L2_CONTEXT_IDENTITY_PHYSICAL_OFFSET_HI32,
+ 0);
+}
+
+static void mmhub_v3_3_setup_vmid_config(struct amdgpu_device *adev)
+{
+ struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_MMHUB0(0)];
+ int i;
+ uint32_t tmp;
+
+ for (i = 0; i <= 14; i++) {
+ tmp = RREG32_SOC15_OFFSET(MMHUB, 0, regMMVM_CONTEXT1_CNTL, i * hub->ctx_distance);
+ tmp = REG_SET_FIELD(tmp, MMVM_CONTEXT1_CNTL, ENABLE_CONTEXT, 1);
+ tmp = REG_SET_FIELD(tmp, MMVM_CONTEXT1_CNTL, PAGE_TABLE_DEPTH,
+ adev->vm_manager.num_level);
+ tmp = REG_SET_FIELD(tmp, MMVM_CONTEXT1_CNTL,
+ RANGE_PROTECTION_FAULT_ENABLE_DEFAULT, 1);
+ tmp = REG_SET_FIELD(tmp, MMVM_CONTEXT1_CNTL,
+ DUMMY_PAGE_PROTECTION_FAULT_ENABLE_DEFAULT,
+ 1);
+ tmp = REG_SET_FIELD(tmp, MMVM_CONTEXT1_CNTL,
+ PDE0_PROTECTION_FAULT_ENABLE_DEFAULT, 1);
+ tmp = REG_SET_FIELD(tmp, MMVM_CONTEXT1_CNTL,
+ VALID_PROTECTION_FAULT_ENABLE_DEFAULT, 1);
+ tmp = REG_SET_FIELD(tmp, MMVM_CONTEXT1_CNTL,
+ READ_PROTECTION_FAULT_ENABLE_DEFAULT, 1);
+ tmp = REG_SET_FIELD(tmp, MMVM_CONTEXT1_CNTL,
+ WRITE_PROTECTION_FAULT_ENABLE_DEFAULT, 1);
+ tmp = REG_SET_FIELD(tmp, MMVM_CONTEXT1_CNTL,
+ EXECUTE_PROTECTION_FAULT_ENABLE_DEFAULT, 1);
+ tmp = REG_SET_FIELD(tmp, MMVM_CONTEXT1_CNTL,
+ PAGE_TABLE_BLOCK_SIZE,
+ adev->vm_manager.block_size - 9);
+ /* Send no-retry XNACK on fault to suppress VM fault storm. */
+ tmp = REG_SET_FIELD(tmp, MMVM_CONTEXT1_CNTL,
+ RETRY_PERMISSION_OR_INVALID_PAGE_FAULT,
+ !amdgpu_noretry);
+ WREG32_SOC15_OFFSET(MMHUB, 0, regMMVM_CONTEXT1_CNTL,
+ i * hub->ctx_distance, tmp);
+ WREG32_SOC15_OFFSET(MMHUB, 0, regMMVM_CONTEXT1_PAGE_TABLE_START_ADDR_LO32,
+ i * hub->ctx_addr_distance, 0);
+ WREG32_SOC15_OFFSET(MMHUB, 0, regMMVM_CONTEXT1_PAGE_TABLE_START_ADDR_HI32,
+ i * hub->ctx_addr_distance, 0);
+ WREG32_SOC15_OFFSET(MMHUB, 0, regMMVM_CONTEXT1_PAGE_TABLE_END_ADDR_LO32,
+ i * hub->ctx_addr_distance,
+ lower_32_bits(adev->vm_manager.max_pfn - 1));
+ WREG32_SOC15_OFFSET(MMHUB, 0, regMMVM_CONTEXT1_PAGE_TABLE_END_ADDR_HI32,
+ i * hub->ctx_addr_distance,
+ upper_32_bits(adev->vm_manager.max_pfn - 1));
+ }
+
+ hub->vm_cntx_cntl = tmp;
+}
+
+static void mmhub_v3_3_program_invalidation(struct amdgpu_device *adev)
+{
+ struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_MMHUB0(0)];
+ unsigned int i;
+
+ for (i = 0; i < 18; ++i) {
+ WREG32_SOC15_OFFSET(MMHUB, 0, regMMVM_INVALIDATE_ENG0_ADDR_RANGE_LO32,
+ i * hub->eng_addr_distance, 0xffffffff);
+ WREG32_SOC15_OFFSET(MMHUB, 0, regMMVM_INVALIDATE_ENG0_ADDR_RANGE_HI32,
+ i * hub->eng_addr_distance, 0x1f);
+ }
+}
+
+static void mmhub_v3_3_init_saw_regs(struct amdgpu_device *adev)
+{
+ uint64_t pt_base = amdgpu_gmc_pd_addr(adev->gart.bo);
+ uint32_t tmp;
+
+ /* Program page table base, gart start, gart end */
+ WREG32_SOC15(MMHUB, 0, regMMVM_L2_SAW_CONTEXT0_PAGE_TABLE_BASE_ADDR_LO32,
+ lower_32_bits(pt_base >> 12));
+ WREG32_SOC15(MMHUB, 0, regMMVM_L2_SAW_CONTEXT0_PAGE_TABLE_BASE_ADDR_HI32,
+ upper_32_bits(pt_base >> 12));
+
+ WREG32_SOC15(MMHUB, 0, regMMVM_L2_SAW_CONTEXT0_PAGE_TABLE_START_ADDR_LO32,
+ (u32)(adev->gmc.gart_start >> 12));
+ WREG32_SOC15(MMHUB, 0, regMMVM_L2_SAW_CONTEXT0_PAGE_TABLE_START_ADDR_HI32,
+ (u32)(adev->gmc.gart_start >> 44));
+
+ WREG32_SOC15(MMHUB, 0, regMMVM_L2_SAW_CONTEXT0_PAGE_TABLE_END_ADDR_LO32,
+ (u32)(adev->gmc.gart_end >> 12));
+ WREG32_SOC15(MMHUB, 0, regMMVM_L2_SAW_CONTEXT0_PAGE_TABLE_END_ADDR_HI32,
+ (u32)(adev->gmc.gart_end >> 44));
+
+ tmp = RREG32_SOC15(MMHUB, 0, regMMVM_L2_SAW_CONTEXT0_CNTL);
+ tmp = REG_SET_FIELD(tmp, MMVM_L2_SAW_CONTEXT0_CNTL, ENABLE_CONTEXT, 1);
+ tmp = REG_SET_FIELD(tmp, MMVM_L2_SAW_CONTEXT0_CNTL, PAGE_TABLE_DEPTH, 0);
+ WREG32_SOC15(MMHUB, 0, regMMVM_L2_SAW_CONTEXT0_CNTL, tmp);
+
+ /* Disable all contexts except context 0 */
+ tmp = 0xfffe;
+ WREG32_SOC15(MMHUB, 0, regMMVM_L2_SAW_CONTEXTS_DISABLE, tmp);
+
+ /* Program saw cntl4 */
+ tmp = RREG32_SOC15(MMHUB, 0, regMMVM_L2_SAW_CNTL4);
+ tmp = REG_SET_FIELD(tmp, MMVM_L2_SAW_CNTL4, VMC_TAP_CONTEXT0_PDE_REQUEST_SNOOP, 1);
+ tmp = REG_SET_FIELD(tmp, MMVM_L2_SAW_CNTL4, VMC_TAP_CONTEXT0_PTE_REQUEST_SNOOP, 1);
+ WREG32_SOC15(MMHUB, 0, regMMVM_L2_SAW_CNTL4, tmp);
+}
+
+static void mmhub_v3_3_enable_tls(struct amdgpu_device *adev)
+{
+ WREG32_SOC15(MMHUB, 0, regDAGB0_L1TLB_REG_RW_3_3, 0);
+ WREG32_SOC15(MMHUB, 0, regDAGB1_L1TLB_REG_RW_3_3, 3);
+}
+
+static int mmhub_v3_3_gart_enable(struct amdgpu_device *adev)
+{
+ /* GART Enable. */
+ mmhub_v3_3_init_gart_aperture_regs(adev);
+ mmhub_v3_3_init_system_aperture_regs(adev);
+ mmhub_v3_3_init_tlb_regs(adev);
+ mmhub_v3_3_init_cache_regs(adev);
+
+ mmhub_v3_3_enable_system_domain(adev);
+ mmhub_v3_3_disable_identity_aperture(adev);
+ mmhub_v3_3_setup_vmid_config(adev);
+ mmhub_v3_3_program_invalidation(adev);
+
+ /* standalone alone walker init */
+ mmhub_v3_3_init_saw_regs(adev);
+
+ /* enable mmhub tls */
+ mmhub_v3_3_enable_tls(adev);
+
+ return 0;
+}
+
+static void mmhub_v3_3_gart_disable(struct amdgpu_device *adev)
+{
+ struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_MMHUB0(0)];
+ u32 tmp;
+ u32 i;
+
+ /* Disable all tables */
+ for (i = 0; i < 16; i++)
+ WREG32_SOC15_OFFSET(MMHUB, 0, regMMVM_CONTEXT0_CNTL,
+ i * hub->ctx_distance, 0);
+
+ /* Setup TLB control */
+ tmp = RREG32_SOC15(MMHUB, 0, regMMMC_VM_MX_L1_TLB_CNTL);
+ tmp = REG_SET_FIELD(tmp, MMMC_VM_MX_L1_TLB_CNTL, ENABLE_L1_TLB, 0);
+ tmp = REG_SET_FIELD(tmp, MMMC_VM_MX_L1_TLB_CNTL,
+ ENABLE_ADVANCED_DRIVER_MODEL, 0);
+ WREG32_SOC15(MMHUB, 0, regMMMC_VM_MX_L1_TLB_CNTL, tmp);
+
+ /* Setup L2 cache */
+ tmp = RREG32_SOC15(MMHUB, 0, regMMVM_L2_CNTL);
+ tmp = REG_SET_FIELD(tmp, MMVM_L2_CNTL, ENABLE_L2_CACHE, 0);
+ WREG32_SOC15(MMHUB, 0, regMMVM_L2_CNTL, tmp);
+ WREG32_SOC15(MMHUB, 0, regMMVM_L2_CNTL3, 0);
+}
+
+/**
+ * mmhub_v3_3_set_fault_enable_default - update GART/VM fault handling
+ *
+ * @adev: amdgpu_device pointer
+ * @value: true redirects VM faults to the default page
+ */
+static void mmhub_v3_3_set_fault_enable_default(struct amdgpu_device *adev,
+ bool value)
+{
+ u32 tmp;
+
+ tmp = RREG32_SOC15(MMHUB, 0, regMMVM_L2_PROTECTION_FAULT_CNTL);
+ tmp = REG_SET_FIELD(tmp, MMVM_L2_PROTECTION_FAULT_CNTL,
+ RANGE_PROTECTION_FAULT_ENABLE_DEFAULT, value);
+ tmp = REG_SET_FIELD(tmp, MMVM_L2_PROTECTION_FAULT_CNTL,
+ PDE0_PROTECTION_FAULT_ENABLE_DEFAULT, value);
+ tmp = REG_SET_FIELD(tmp, MMVM_L2_PROTECTION_FAULT_CNTL,
+ PDE1_PROTECTION_FAULT_ENABLE_DEFAULT, value);
+ tmp = REG_SET_FIELD(tmp, MMVM_L2_PROTECTION_FAULT_CNTL,
+ PDE2_PROTECTION_FAULT_ENABLE_DEFAULT, value);
+ tmp = REG_SET_FIELD(tmp, MMVM_L2_PROTECTION_FAULT_CNTL,
+ TRANSLATE_FURTHER_PROTECTION_FAULT_ENABLE_DEFAULT,
+ value);
+ tmp = REG_SET_FIELD(tmp, MMVM_L2_PROTECTION_FAULT_CNTL,
+ NACK_PROTECTION_FAULT_ENABLE_DEFAULT, value);
+ tmp = REG_SET_FIELD(tmp, MMVM_L2_PROTECTION_FAULT_CNTL,
+ DUMMY_PAGE_PROTECTION_FAULT_ENABLE_DEFAULT, value);
+ tmp = REG_SET_FIELD(tmp, MMVM_L2_PROTECTION_FAULT_CNTL,
+ VALID_PROTECTION_FAULT_ENABLE_DEFAULT, value);
+ tmp = REG_SET_FIELD(tmp, MMVM_L2_PROTECTION_FAULT_CNTL,
+ READ_PROTECTION_FAULT_ENABLE_DEFAULT, value);
+ tmp = REG_SET_FIELD(tmp, MMVM_L2_PROTECTION_FAULT_CNTL,
+ WRITE_PROTECTION_FAULT_ENABLE_DEFAULT, value);
+ tmp = REG_SET_FIELD(tmp, MMVM_L2_PROTECTION_FAULT_CNTL,
+ EXECUTE_PROTECTION_FAULT_ENABLE_DEFAULT, value);
+ if (!value) {
+ tmp = REG_SET_FIELD(tmp, MMVM_L2_PROTECTION_FAULT_CNTL,
+ CRASH_ON_NO_RETRY_FAULT, 1);
+ tmp = REG_SET_FIELD(tmp, MMVM_L2_PROTECTION_FAULT_CNTL,
+ CRASH_ON_RETRY_FAULT, 1);
+ }
+ WREG32_SOC15(MMHUB, 0, regMMVM_L2_PROTECTION_FAULT_CNTL, tmp);
+}
+
+static const struct amdgpu_vmhub_funcs mmhub_v3_3_vmhub_funcs = {
+ .print_l2_protection_fault_status = mmhub_v3_3_print_l2_protection_fault_status,
+ .get_invalidate_req = mmhub_v3_3_get_invalidate_req,
+};
+
+static void mmhub_v3_3_init(struct amdgpu_device *adev)
+{
+ struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_MMHUB0(0)];
+
+ hub->ctx0_ptb_addr_lo32 =
+ SOC15_REG_OFFSET(MMHUB, 0,
+ regMMVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_LO32);
+ hub->ctx0_ptb_addr_hi32 =
+ SOC15_REG_OFFSET(MMHUB, 0,
+ regMMVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_HI32);
+ hub->vm_inv_eng0_sem =
+ SOC15_REG_OFFSET(MMHUB, 0, regMMVM_INVALIDATE_ENG0_SEM);
+ hub->vm_inv_eng0_req =
+ SOC15_REG_OFFSET(MMHUB, 0, regMMVM_INVALIDATE_ENG0_REQ);
+ hub->vm_inv_eng0_ack =
+ SOC15_REG_OFFSET(MMHUB, 0, regMMVM_INVALIDATE_ENG0_ACK);
+ hub->vm_context0_cntl =
+ SOC15_REG_OFFSET(MMHUB, 0, regMMVM_CONTEXT0_CNTL);
+ hub->vm_l2_pro_fault_status =
+ SOC15_REG_OFFSET(MMHUB, 0, regMMVM_L2_PROTECTION_FAULT_STATUS);
+ hub->vm_l2_pro_fault_cntl =
+ SOC15_REG_OFFSET(MMHUB, 0, regMMVM_L2_PROTECTION_FAULT_CNTL);
+
+ hub->ctx_distance = regMMVM_CONTEXT1_CNTL - regMMVM_CONTEXT0_CNTL;
+ hub->ctx_addr_distance = regMMVM_CONTEXT1_PAGE_TABLE_BASE_ADDR_LO32 -
+ regMMVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_LO32;
+ hub->eng_distance = regMMVM_INVALIDATE_ENG1_REQ -
+ regMMVM_INVALIDATE_ENG0_REQ;
+ hub->eng_addr_distance = regMMVM_INVALIDATE_ENG1_ADDR_RANGE_LO32 -
+ regMMVM_INVALIDATE_ENG0_ADDR_RANGE_LO32;
+
+ hub->vm_cntx_cntl_vm_fault = MMVM_CONTEXT1_CNTL__RANGE_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK |
+ MMVM_CONTEXT1_CNTL__DUMMY_PAGE_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK |
+ MMVM_CONTEXT1_CNTL__PDE0_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK |
+ MMVM_CONTEXT1_CNTL__VALID_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK |
+ MMVM_CONTEXT1_CNTL__READ_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK |
+ MMVM_CONTEXT1_CNTL__WRITE_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK |
+ MMVM_CONTEXT1_CNTL__EXECUTE_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK;
+
+ hub->vmhub_funcs = &mmhub_v3_3_vmhub_funcs;
+}
+
+static u64 mmhub_v3_3_get_fb_location(struct amdgpu_device *adev)
+{
+ u64 base;
+
+ base = RREG32_SOC15(MMHUB, 0, regMMMC_VM_FB_LOCATION_BASE);
+ base &= MMMC_VM_FB_LOCATION_BASE__FB_BASE_MASK;
+ base <<= 24;
+
+ return base;
+}
+
+static u64 mmhub_v3_3_get_mc_fb_offset(struct amdgpu_device *adev)
+{
+ u64 offset;
+
+ offset = RREG32_SOC15(MMHUB, 0, regMMMC_VM_FB_OFFSET);
+ offset &= MMMC_VM_FB_OFFSET__FB_OFFSET_MASK;
+ offset <<= 24;
+
+ return offset;
+}
+
+static void mmhub_v3_3_update_medium_grain_clock_gating(struct amdgpu_device *adev,
+ bool enable)
+{
+ uint32_t def, data;
+
+ def = data = RREG32_SOC15(MMHUB, 0, regMM_ATC_L2_MISC_CG);
+
+ if (enable)
+ data |= MM_ATC_L2_MISC_CG__ENABLE_MASK;
+ else
+ data &= ~MM_ATC_L2_MISC_CG__ENABLE_MASK;
+
+ if (def != data)
+ WREG32_SOC15(MMHUB, 0, regMM_ATC_L2_MISC_CG, data);
+}
+
+static void mmhub_v3_3_update_medium_grain_light_sleep(struct amdgpu_device *adev,
+ bool enable)
+{
+ uint32_t def, data;
+
+ def = data = RREG32_SOC15(MMHUB, 0, regMM_ATC_L2_MISC_CG);
+
+ if (enable)
+ data |= MM_ATC_L2_MISC_CG__MEM_LS_ENABLE_MASK;
+ else
+ data &= ~MM_ATC_L2_MISC_CG__MEM_LS_ENABLE_MASK;
+
+ if (def != data)
+ WREG32_SOC15(MMHUB, 0, regMM_ATC_L2_MISC_CG, data);
+}
+
+static int mmhub_v3_3_set_clockgating(struct amdgpu_device *adev,
+ enum amd_clockgating_state state)
+{
+ if (amdgpu_sriov_vf(adev))
+ return 0;
+
+ mmhub_v3_3_update_medium_grain_clock_gating(adev,
+ state == AMD_CG_STATE_GATE);
+ mmhub_v3_3_update_medium_grain_light_sleep(adev,
+ state == AMD_CG_STATE_GATE);
+ return 0;
+}
+
+static void mmhub_v3_3_get_clockgating(struct amdgpu_device *adev, u64 *flags)
+{
+ u32 data;
+
+ if (amdgpu_sriov_vf(adev))
+ *flags = 0;
+
+ data = RREG32_SOC15(MMHUB, 0, regMM_ATC_L2_MISC_CG);
+
+ /* AMD_CG_SUPPORT_MC_MGCG */
+ if (data & MM_ATC_L2_MISC_CG__ENABLE_MASK)
+ *flags |= AMD_CG_SUPPORT_MC_MGCG;
+
+ /* AMD_CG_SUPPORT_MC_LS */
+ if (data & MM_ATC_L2_MISC_CG__MEM_LS_ENABLE_MASK)
+ *flags |= AMD_CG_SUPPORT_MC_LS;
+}
+
+const struct amdgpu_mmhub_funcs mmhub_v3_3_funcs = {
+ .init = mmhub_v3_3_init,
+ .get_fb_location = mmhub_v3_3_get_fb_location,
+ .get_mc_fb_offset = mmhub_v3_3_get_mc_fb_offset,
+ .gart_enable = mmhub_v3_3_gart_enable,
+ .set_fault_enable_default = mmhub_v3_3_set_fault_enable_default,
+ .gart_disable = mmhub_v3_3_gart_disable,
+ .set_clockgating = mmhub_v3_3_set_clockgating,
+ .get_clockgating = mmhub_v3_3_get_clockgating,
+ .setup_vm_pt_regs = mmhub_v3_3_setup_vm_pt_regs,
+};
diff --git a/drivers/gpu/drm/amd/amdgpu/mmhub_v3_3.h b/drivers/gpu/drm/amd/amdgpu/mmhub_v3_3.h
new file mode 100644
index 000000000000..37b62c7e5a4a
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/mmhub_v3_3.h
@@ -0,0 +1,29 @@
+/*
+ * Copyright 2023 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#ifndef __MMHUB_V3_3_H__
+#define __MMHUB_V3_3_H__
+
+extern const struct amdgpu_mmhub_funcs mmhub_v3_3_funcs;
+
+#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/mmhub_v4_1_0.c b/drivers/gpu/drm/amd/amdgpu/mmhub_v4_1_0.c
new file mode 100644
index 000000000000..951998454b25
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/mmhub_v4_1_0.c
@@ -0,0 +1,647 @@
+/*
+ * Copyright 2023 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#include "amdgpu.h"
+#include "mmhub_v4_1_0.h"
+
+#include "mmhub/mmhub_4_1_0_offset.h"
+#include "mmhub/mmhub_4_1_0_sh_mask.h"
+
+#include "soc15_common.h"
+#include "soc24_enum.h"
+
+#define regMMVM_L2_CNTL3_DEFAULT 0x80100007
+#define regMMVM_L2_CNTL4_DEFAULT 0x000000c1
+#define regMMVM_L2_CNTL5_DEFAULT 0x00003fe0
+
+static const char *mmhub_client_ids_v4_1_0[][2] = {
+ [0][0] = "VMC",
+ [4][0] = "DCEDMC",
+ [6][0] = "MP0",
+ [7][0] = "MP1",
+ [8][0] = "MPIO",
+ [16][0] = "LSDMA",
+ [17][0] = "JPEG",
+ [19][0] = "VCNU",
+ [22][0] = "VSCH",
+ [23][0] = "HDP",
+ [32+23][0] = "VCNRD",
+ [3][1] = "DCEDWB",
+ [4][1] = "DCEDMC",
+ [6][1] = "MP0",
+ [7][1] = "MP1",
+ [8][1] = "MPIO",
+ [10][1] = "DBGU0",
+ [11][1] = "DBGU1",
+ [12][1] = "DBGUNBIO",
+ [14][1] = "XDP",
+ [15][1] = "OSSSYS",
+ [16][1] = "LSDMA",
+ [17][1] = "JPEG",
+ [18][1] = "VCNWR",
+ [19][1] = "VCNU",
+ [22][1] = "VSCH",
+ [23][1] = "HDP",
+};
+
+static uint32_t mmhub_v4_1_0_get_invalidate_req(unsigned int vmid,
+ uint32_t flush_type)
+{
+ u32 req = 0;
+
+ /* invalidate using legacy mode on vmid*/
+ req = REG_SET_FIELD(req, MMVM_INVALIDATE_ENG0_REQ,
+ PER_VMID_INVALIDATE_REQ, 1 << vmid);
+ /* Only use legacy inv on mmhub side */
+ req = REG_SET_FIELD(req, MMVM_INVALIDATE_ENG0_REQ, FLUSH_TYPE, 0);
+ req = REG_SET_FIELD(req, MMVM_INVALIDATE_ENG0_REQ, INVALIDATE_L2_PTES, 1);
+ req = REG_SET_FIELD(req, MMVM_INVALIDATE_ENG0_REQ, INVALIDATE_L2_PDE0, 1);
+ req = REG_SET_FIELD(req, MMVM_INVALIDATE_ENG0_REQ, INVALIDATE_L2_PDE1, 1);
+ req = REG_SET_FIELD(req, MMVM_INVALIDATE_ENG0_REQ, INVALIDATE_L2_PDE2, 1);
+ req = REG_SET_FIELD(req, MMVM_INVALIDATE_ENG0_REQ, INVALIDATE_L1_PTES, 1);
+ req = REG_SET_FIELD(req, MMVM_INVALIDATE_ENG0_REQ,
+ CLEAR_PROTECTION_FAULT_STATUS_ADDR, 0);
+
+ return req;
+}
+
+static void
+mmhub_v4_1_0_print_l2_protection_fault_status(struct amdgpu_device *adev,
+ uint32_t status)
+{
+ uint32_t cid, rw;
+ const char *mmhub_cid = NULL;
+
+ cid = REG_GET_FIELD(status,
+ MMVM_L2_PROTECTION_FAULT_STATUS_LO32, CID);
+ rw = REG_GET_FIELD(status,
+ MMVM_L2_PROTECTION_FAULT_STATUS_LO32, RW);
+
+ dev_err(adev->dev,
+ "MMVM_L2_PROTECTION_FAULT_STATUS_LO32:0x%08X\n",
+ status);
+ switch (amdgpu_ip_version(adev, MMHUB_HWIP, 0)) {
+ case IP_VERSION(4, 1, 0):
+ mmhub_cid = mmhub_client_ids_v4_1_0[cid][rw];
+ break;
+ default:
+ mmhub_cid = NULL;
+ break;
+ }
+ dev_err(adev->dev, "\t Faulty UTCL2 client ID: %s (0x%x)\n",
+ mmhub_cid ? mmhub_cid : "unknown", cid);
+ dev_err(adev->dev, "\t MORE_FAULTS: 0x%lx\n",
+ REG_GET_FIELD(status,
+ MMVM_L2_PROTECTION_FAULT_STATUS_LO32, MORE_FAULTS));
+ dev_err(adev->dev, "\t WALKER_ERROR: 0x%lx\n",
+ REG_GET_FIELD(status,
+ MMVM_L2_PROTECTION_FAULT_STATUS_LO32, WALKER_ERROR));
+ dev_err(adev->dev, "\t PERMISSION_FAULTS: 0x%lx\n",
+ REG_GET_FIELD(status,
+ MMVM_L2_PROTECTION_FAULT_STATUS_LO32, PERMISSION_FAULTS));
+ dev_err(adev->dev, "\t MAPPING_ERROR: 0x%lx\n",
+ REG_GET_FIELD(status,
+ MMVM_L2_PROTECTION_FAULT_STATUS_LO32, MAPPING_ERROR));
+ dev_err(adev->dev, "\t RW: 0x%x\n", rw);
+}
+
+static void mmhub_v4_1_0_setup_vm_pt_regs(struct amdgpu_device *adev,
+ uint32_t vmid, uint64_t page_table_base)
+{
+ struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_MMHUB0(0)];
+
+ WREG32_SOC15_OFFSET(MMHUB, 0, regMMVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_LO32,
+ hub->ctx_addr_distance * vmid,
+ lower_32_bits(page_table_base));
+
+ WREG32_SOC15_OFFSET(MMHUB, 0, regMMVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_HI32,
+ hub->ctx_addr_distance * vmid,
+ upper_32_bits(page_table_base));
+}
+
+static void mmhub_v4_1_0_init_gart_aperture_regs(struct amdgpu_device *adev)
+{
+ uint64_t pt_base = amdgpu_gmc_pd_addr(adev->gart.bo);
+
+ mmhub_v4_1_0_setup_vm_pt_regs(adev, 0, pt_base);
+
+ WREG32_SOC15(MMHUB, 0, regMMVM_CONTEXT0_PAGE_TABLE_START_ADDR_LO32,
+ (u32)(adev->gmc.gart_start >> 12));
+ WREG32_SOC15(MMHUB, 0, regMMVM_CONTEXT0_PAGE_TABLE_START_ADDR_HI32,
+ (u32)(adev->gmc.gart_start >> 44));
+
+ WREG32_SOC15(MMHUB, 0, regMMVM_CONTEXT0_PAGE_TABLE_END_ADDR_LO32,
+ (u32)(adev->gmc.gart_end >> 12));
+ WREG32_SOC15(MMHUB, 0, regMMVM_CONTEXT0_PAGE_TABLE_END_ADDR_HI32,
+ (u32)(adev->gmc.gart_end >> 44));
+}
+
+static void mmhub_v4_1_0_init_system_aperture_regs(struct amdgpu_device *adev)
+{
+ uint64_t value;
+ uint32_t tmp;
+
+ /*
+ * the new L1 policy will block SRIOV guest from writing
+ * these regs, and they will be programed at host.
+ * so skip programing these regs.
+ */
+ if (amdgpu_sriov_vf(adev))
+ return;
+
+ /* Program the AGP BAR */
+ WREG32_SOC15(MMHUB, 0, regMMMC_VM_AGP_BASE, 0);
+ WREG32_SOC15(MMHUB, 0, regMMMC_VM_AGP_BOT, adev->gmc.agp_start >> 24);
+ WREG32_SOC15(MMHUB, 0, regMMMC_VM_AGP_TOP, adev->gmc.agp_end >> 24);
+
+ /* Program the system aperture low logical page number. */
+ WREG32_SOC15(MMHUB, 0, regMMMC_VM_SYSTEM_APERTURE_LOW_ADDR,
+ min(adev->gmc.fb_start, adev->gmc.agp_start) >> 18);
+ WREG32_SOC15(MMHUB, 0, regMMMC_VM_SYSTEM_APERTURE_HIGH_ADDR,
+ max(adev->gmc.fb_end, adev->gmc.agp_end) >> 18);
+
+ /* Set default page address. */
+ value = adev->mem_scratch.gpu_addr - adev->gmc.vram_start +
+ adev->vm_manager.vram_base_offset;
+ WREG32_SOC15(MMHUB, 0, regMMMC_VM_SYSTEM_APERTURE_DEFAULT_ADDR_LSB,
+ (u32)(value >> 12));
+ WREG32_SOC15(MMHUB, 0, regMMMC_VM_SYSTEM_APERTURE_DEFAULT_ADDR_MSB,
+ (u32)(value >> 44));
+
+ /* Program "protection fault". */
+ WREG32_SOC15(MMHUB, 0, regMMVM_L2_PROTECTION_FAULT_DEFAULT_ADDR_LO32,
+ (u32)(adev->dummy_page_addr >> 12));
+ WREG32_SOC15(MMHUB, 0, regMMVM_L2_PROTECTION_FAULT_DEFAULT_ADDR_HI32,
+ (u32)((u64)adev->dummy_page_addr >> 44));
+
+ tmp = RREG32_SOC15(MMHUB, 0, regMMVM_L2_PROTECTION_FAULT_CNTL2);
+ tmp = REG_SET_FIELD(tmp, MMVM_L2_PROTECTION_FAULT_CNTL2,
+ ACTIVE_PAGE_MIGRATION_PTE_READ_RETRY, 1);
+ WREG32_SOC15(MMHUB, 0, regMMVM_L2_PROTECTION_FAULT_CNTL2, tmp);
+}
+
+static void mmhub_v4_1_0_init_tlb_regs(struct amdgpu_device *adev)
+{
+ uint32_t tmp;
+
+ /* Setup TLB control */
+ tmp = RREG32_SOC15(MMHUB, 0, regMMMC_VM_MX_L1_TLB_CNTL);
+
+ tmp = REG_SET_FIELD(tmp, MMMC_VM_MX_L1_TLB_CNTL, ENABLE_L1_TLB, 1);
+ tmp = REG_SET_FIELD(tmp, MMMC_VM_MX_L1_TLB_CNTL, SYSTEM_ACCESS_MODE, 3);
+ tmp = REG_SET_FIELD(tmp, MMMC_VM_MX_L1_TLB_CNTL,
+ ENABLE_ADVANCED_DRIVER_MODEL, 1);
+ tmp = REG_SET_FIELD(tmp, MMMC_VM_MX_L1_TLB_CNTL,
+ SYSTEM_APERTURE_UNMAPPED_ACCESS, 0);
+ tmp = REG_SET_FIELD(tmp, MMMC_VM_MX_L1_TLB_CNTL, ECO_BITS, 0);
+ tmp = REG_SET_FIELD(tmp, MMMC_VM_MX_L1_TLB_CNTL,
+ MTYPE, MTYPE_UC); /* UC, uncached */
+
+ WREG32_SOC15(MMHUB, 0, regMMMC_VM_MX_L1_TLB_CNTL, tmp);
+}
+
+static void mmhub_v4_1_0_init_cache_regs(struct amdgpu_device *adev)
+{
+ uint32_t tmp;
+
+ /* These registers are not accessible to VF-SRIOV.
+ * The PF will program them instead.
+ */
+ if (amdgpu_sriov_vf(adev))
+ return;
+
+ /* Setup L2 cache */
+ tmp = RREG32_SOC15(MMHUB, 0, regMMVM_L2_CNTL);
+ tmp = REG_SET_FIELD(tmp, MMVM_L2_CNTL, ENABLE_L2_CACHE, 1);
+ tmp = REG_SET_FIELD(tmp, MMVM_L2_CNTL, ENABLE_L2_FRAGMENT_PROCESSING, 0);
+ tmp = REG_SET_FIELD(tmp, MMVM_L2_CNTL,
+ ENABLE_DEFAULT_PAGE_OUT_TO_SYSTEM_MEMORY, 1);
+ /* XXX for emulation, Refer to closed source code.*/
+ tmp = REG_SET_FIELD(tmp, MMVM_L2_CNTL, L2_PDE0_CACHE_TAG_GENERATION_MODE,
+ 0);
+ tmp = REG_SET_FIELD(tmp, MMVM_L2_CNTL, PDE_FAULT_CLASSIFICATION, 0);
+ tmp = REG_SET_FIELD(tmp, MMVM_L2_CNTL, CONTEXT1_IDENTITY_ACCESS_MODE, 1);
+ tmp = REG_SET_FIELD(tmp, MMVM_L2_CNTL, IDENTITY_MODE_FRAGMENT_SIZE, 0);
+ WREG32_SOC15(MMHUB, 0, regMMVM_L2_CNTL, tmp);
+
+ tmp = RREG32_SOC15(MMHUB, 0, regMMVM_L2_CNTL2);
+ tmp = REG_SET_FIELD(tmp, MMVM_L2_CNTL2, INVALIDATE_ALL_L1_TLBS, 1);
+ tmp = REG_SET_FIELD(tmp, MMVM_L2_CNTL2, INVALIDATE_L2_CACHE, 1);
+ WREG32_SOC15(MMHUB, 0, regMMVM_L2_CNTL2, tmp);
+
+ tmp = regMMVM_L2_CNTL3_DEFAULT;
+ if (adev->gmc.translate_further) {
+ tmp = REG_SET_FIELD(tmp, MMVM_L2_CNTL3, BANK_SELECT, 12);
+ tmp = REG_SET_FIELD(tmp, MMVM_L2_CNTL3,
+ L2_CACHE_BIGK_FRAGMENT_SIZE, 9);
+ } else {
+ tmp = REG_SET_FIELD(tmp, MMVM_L2_CNTL3, BANK_SELECT, 9);
+ tmp = REG_SET_FIELD(tmp, MMVM_L2_CNTL3,
+ L2_CACHE_BIGK_FRAGMENT_SIZE, 6);
+ }
+ WREG32_SOC15(MMHUB, 0, regMMVM_L2_CNTL3, tmp);
+
+ tmp = regMMVM_L2_CNTL4_DEFAULT;
+ tmp = REG_SET_FIELD(tmp, MMVM_L2_CNTL4, VMC_TAP_PDE_REQUEST_PHYSICAL, 0);
+ tmp = REG_SET_FIELD(tmp, MMVM_L2_CNTL4, VMC_TAP_PTE_REQUEST_PHYSICAL, 0);
+ WREG32_SOC15(MMHUB, 0, regMMVM_L2_CNTL4, tmp);
+
+ tmp = regMMVM_L2_CNTL5_DEFAULT;
+ tmp = REG_SET_FIELD(tmp, MMVM_L2_CNTL5, L2_CACHE_SMALLK_FRAGMENT_SIZE, 0);
+ WREG32_SOC15(MMHUB, 0, regMMVM_L2_CNTL5, tmp);
+}
+
+static void mmhub_v4_1_0_enable_system_domain(struct amdgpu_device *adev)
+{
+ uint32_t tmp;
+
+ tmp = RREG32_SOC15(MMHUB, 0, regMMVM_CONTEXT0_CNTL);
+ tmp = REG_SET_FIELD(tmp, MMVM_CONTEXT0_CNTL, ENABLE_CONTEXT, 1);
+ tmp = REG_SET_FIELD(tmp, MMVM_CONTEXT0_CNTL, PAGE_TABLE_DEPTH, 0);
+ tmp = REG_SET_FIELD(tmp, MMVM_CONTEXT0_CNTL,
+ RETRY_PERMISSION_OR_INVALID_PAGE_FAULT, 0);
+ WREG32_SOC15(MMHUB, 0, regMMVM_CONTEXT0_CNTL, tmp);
+}
+
+static void mmhub_v4_1_0_disable_identity_aperture(struct amdgpu_device *adev)
+{
+ /* These registers are not accessible to VF-SRIOV.
+ * The PF will program them instead.
+ */
+ if (amdgpu_sriov_vf(adev))
+ return;
+
+ WREG32_SOC15(MMHUB, 0,
+ regMMVM_L2_CONTEXT1_IDENTITY_APERTURE_LOW_ADDR_LO32,
+ 0xFFFFFFFF);
+ WREG32_SOC15(MMHUB, 0,
+ regMMVM_L2_CONTEXT1_IDENTITY_APERTURE_LOW_ADDR_HI32,
+ 0x0000000F);
+
+ WREG32_SOC15(MMHUB, 0,
+ regMMVM_L2_CONTEXT1_IDENTITY_APERTURE_HIGH_ADDR_LO32, 0);
+ WREG32_SOC15(MMHUB, 0,
+ regMMVM_L2_CONTEXT1_IDENTITY_APERTURE_HIGH_ADDR_HI32, 0);
+
+ WREG32_SOC15(MMHUB, 0, regMMVM_L2_CONTEXT_IDENTITY_PHYSICAL_OFFSET_LO32,
+ 0);
+ WREG32_SOC15(MMHUB, 0, regMMVM_L2_CONTEXT_IDENTITY_PHYSICAL_OFFSET_HI32,
+ 0);
+}
+
+static void mmhub_v4_1_0_setup_vmid_config(struct amdgpu_device *adev)
+{
+ struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_MMHUB0(0)];
+ int i;
+ uint32_t tmp;
+
+ for (i = 0; i <= 14; i++) {
+ tmp = RREG32_SOC15_OFFSET(MMHUB, 0, regMMVM_CONTEXT1_CNTL, i);
+ tmp = REG_SET_FIELD(tmp, MMVM_CONTEXT1_CNTL, ENABLE_CONTEXT, 1);
+ tmp = REG_SET_FIELD(tmp, MMVM_CONTEXT1_CNTL, PAGE_TABLE_DEPTH,
+ adev->vm_manager.num_level);
+ tmp = REG_SET_FIELD(tmp, MMVM_CONTEXT1_CNTL,
+ RANGE_PROTECTION_FAULT_ENABLE_DEFAULT, 1);
+ tmp = REG_SET_FIELD(tmp, MMVM_CONTEXT1_CNTL,
+ DUMMY_PAGE_PROTECTION_FAULT_ENABLE_DEFAULT,
+ 1);
+ tmp = REG_SET_FIELD(tmp, MMVM_CONTEXT1_CNTL,
+ PDE0_PROTECTION_FAULT_ENABLE_DEFAULT, 1);
+ tmp = REG_SET_FIELD(tmp, MMVM_CONTEXT1_CNTL,
+ VALID_PROTECTION_FAULT_ENABLE_DEFAULT, 1);
+ tmp = REG_SET_FIELD(tmp, MMVM_CONTEXT1_CNTL,
+ READ_PROTECTION_FAULT_ENABLE_DEFAULT, 1);
+ tmp = REG_SET_FIELD(tmp, MMVM_CONTEXT1_CNTL,
+ WRITE_PROTECTION_FAULT_ENABLE_DEFAULT, 1);
+ tmp = REG_SET_FIELD(tmp, MMVM_CONTEXT1_CNTL,
+ EXECUTE_PROTECTION_FAULT_ENABLE_DEFAULT, 1);
+ tmp = REG_SET_FIELD(tmp, MMVM_CONTEXT1_CNTL,
+ PAGE_TABLE_BLOCK_SIZE,
+ adev->vm_manager.block_size - 9);
+ /* Send no-retry XNACK on fault to suppress VM fault storm. */
+ tmp = REG_SET_FIELD(tmp, MMVM_CONTEXT1_CNTL,
+ RETRY_PERMISSION_OR_INVALID_PAGE_FAULT,
+ !amdgpu_noretry);
+ WREG32_SOC15_OFFSET(MMHUB, 0, regMMVM_CONTEXT1_CNTL,
+ i * hub->ctx_distance, tmp);
+ WREG32_SOC15_OFFSET(MMHUB, 0, regMMVM_CONTEXT1_PAGE_TABLE_START_ADDR_LO32,
+ i * hub->ctx_addr_distance, 0);
+ WREG32_SOC15_OFFSET(MMHUB, 0, regMMVM_CONTEXT1_PAGE_TABLE_START_ADDR_HI32,
+ i * hub->ctx_addr_distance, 0);
+ WREG32_SOC15_OFFSET(MMHUB, 0, regMMVM_CONTEXT1_PAGE_TABLE_END_ADDR_LO32,
+ i * hub->ctx_addr_distance,
+ lower_32_bits(adev->vm_manager.max_pfn - 1));
+ WREG32_SOC15_OFFSET(MMHUB, 0, regMMVM_CONTEXT1_PAGE_TABLE_END_ADDR_HI32,
+ i * hub->ctx_addr_distance,
+ upper_32_bits(adev->vm_manager.max_pfn - 1));
+ }
+
+ hub->vm_cntx_cntl = tmp;
+}
+
+static void mmhub_v4_1_0_program_invalidation(struct amdgpu_device *adev)
+{
+ struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_MMHUB0(0)];
+ unsigned i;
+
+ for (i = 0; i < 18; ++i) {
+ WREG32_SOC15_OFFSET(MMHUB, 0, regMMVM_INVALIDATE_ENG0_ADDR_RANGE_LO32,
+ i * hub->eng_addr_distance, 0xffffffff);
+ WREG32_SOC15_OFFSET(MMHUB, 0, regMMVM_INVALIDATE_ENG0_ADDR_RANGE_HI32,
+ i * hub->eng_addr_distance, 0x1f);
+ }
+}
+
+static int mmhub_v4_1_0_gart_enable(struct amdgpu_device *adev)
+{
+ /* GART Enable. */
+ mmhub_v4_1_0_init_gart_aperture_regs(adev);
+ mmhub_v4_1_0_init_system_aperture_regs(adev);
+ mmhub_v4_1_0_init_tlb_regs(adev);
+ mmhub_v4_1_0_init_cache_regs(adev);
+
+ mmhub_v4_1_0_enable_system_domain(adev);
+ mmhub_v4_1_0_disable_identity_aperture(adev);
+ mmhub_v4_1_0_setup_vmid_config(adev);
+ mmhub_v4_1_0_program_invalidation(adev);
+
+ return 0;
+}
+
+static void mmhub_v4_1_0_gart_disable(struct amdgpu_device *adev)
+{
+ struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_MMHUB0(0)];
+ u32 tmp;
+ u32 i;
+
+ /* Disable all tables */
+ for (i = 0; i < 16; i++)
+ WREG32_SOC15_OFFSET(MMHUB, 0, regMMVM_CONTEXT0_CNTL,
+ i * hub->ctx_distance, 0);
+
+ /* Setup TLB control */
+ tmp = RREG32_SOC15(MMHUB, 0, regMMMC_VM_MX_L1_TLB_CNTL);
+ tmp = REG_SET_FIELD(tmp, MMMC_VM_MX_L1_TLB_CNTL, ENABLE_L1_TLB, 0);
+ tmp = REG_SET_FIELD(tmp, MMMC_VM_MX_L1_TLB_CNTL,
+ ENABLE_ADVANCED_DRIVER_MODEL, 0);
+ WREG32_SOC15(MMHUB, 0, regMMMC_VM_MX_L1_TLB_CNTL, tmp);
+
+ /* Setup L2 cache */
+ tmp = RREG32_SOC15(MMHUB, 0, regMMVM_L2_CNTL);
+ tmp = REG_SET_FIELD(tmp, MMVM_L2_CNTL, ENABLE_L2_CACHE, 0);
+ WREG32_SOC15(MMHUB, 0, regMMVM_L2_CNTL, tmp);
+ WREG32_SOC15(MMHUB, 0, regMMVM_L2_CNTL3, 0);
+}
+
+/**
+ * mmhub_v4_1_0_set_fault_enable_default - update GART/VM fault handling
+ *
+ * @adev: amdgpu_device pointer
+ * @value: true redirects VM faults to the default page
+ */
+static void
+mmhub_v4_1_0_set_fault_enable_default(struct amdgpu_device *adev, bool value)
+{
+ u32 tmp;
+
+ /* These registers are not accessible to VF-SRIOV.
+ * The PF will program them instead.
+ */
+ if (amdgpu_sriov_vf(adev))
+ return;
+
+ tmp = RREG32_SOC15(MMHUB, 0, regMMVM_L2_PROTECTION_FAULT_CNTL);
+ tmp = REG_SET_FIELD(tmp, MMVM_L2_PROTECTION_FAULT_CNTL,
+ RANGE_PROTECTION_FAULT_ENABLE_DEFAULT, value);
+ tmp = REG_SET_FIELD(tmp, MMVM_L2_PROTECTION_FAULT_CNTL,
+ PDE0_PROTECTION_FAULT_ENABLE_DEFAULT, value);
+ tmp = REG_SET_FIELD(tmp, MMVM_L2_PROTECTION_FAULT_CNTL,
+ PDE1_PROTECTION_FAULT_ENABLE_DEFAULT, value);
+ tmp = REG_SET_FIELD(tmp, MMVM_L2_PROTECTION_FAULT_CNTL,
+ PDE2_PROTECTION_FAULT_ENABLE_DEFAULT, value);
+ tmp = REG_SET_FIELD(tmp, MMVM_L2_PROTECTION_FAULT_CNTL,
+ TRANSLATE_FURTHER_PROTECTION_FAULT_ENABLE_DEFAULT,
+ value);
+ tmp = REG_SET_FIELD(tmp, MMVM_L2_PROTECTION_FAULT_CNTL,
+ NACK_PROTECTION_FAULT_ENABLE_DEFAULT, value);
+ tmp = REG_SET_FIELD(tmp, MMVM_L2_PROTECTION_FAULT_CNTL,
+ DUMMY_PAGE_PROTECTION_FAULT_ENABLE_DEFAULT, value);
+ tmp = REG_SET_FIELD(tmp, MMVM_L2_PROTECTION_FAULT_CNTL,
+ VALID_PROTECTION_FAULT_ENABLE_DEFAULT, value);
+ tmp = REG_SET_FIELD(tmp, MMVM_L2_PROTECTION_FAULT_CNTL,
+ READ_PROTECTION_FAULT_ENABLE_DEFAULT, value);
+ tmp = REG_SET_FIELD(tmp, MMVM_L2_PROTECTION_FAULT_CNTL,
+ WRITE_PROTECTION_FAULT_ENABLE_DEFAULT, value);
+ tmp = REG_SET_FIELD(tmp, MMVM_L2_PROTECTION_FAULT_CNTL,
+ EXECUTE_PROTECTION_FAULT_ENABLE_DEFAULT, value);
+ if (!value) {
+ tmp = REG_SET_FIELD(tmp, MMVM_L2_PROTECTION_FAULT_CNTL,
+ CRASH_ON_NO_RETRY_FAULT, 1);
+ tmp = REG_SET_FIELD(tmp, MMVM_L2_PROTECTION_FAULT_CNTL,
+ CRASH_ON_RETRY_FAULT, 1);
+ }
+ WREG32_SOC15(MMHUB, 0, regMMVM_L2_PROTECTION_FAULT_CNTL, tmp);
+}
+
+static const struct amdgpu_vmhub_funcs mmhub_v4_1_0_vmhub_funcs = {
+ .print_l2_protection_fault_status = mmhub_v4_1_0_print_l2_protection_fault_status,
+ .get_invalidate_req = mmhub_v4_1_0_get_invalidate_req,
+};
+
+static void mmhub_v4_1_0_init(struct amdgpu_device *adev)
+{
+ struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_MMHUB0(0)];
+
+ hub->ctx0_ptb_addr_lo32 =
+ SOC15_REG_OFFSET(MMHUB, 0,
+ regMMVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_LO32);
+ hub->ctx0_ptb_addr_hi32 =
+ SOC15_REG_OFFSET(MMHUB, 0,
+ regMMVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_HI32);
+ hub->vm_inv_eng0_sem =
+ SOC15_REG_OFFSET(MMHUB, 0, regMMVM_INVALIDATE_ENG0_SEM);
+ hub->vm_inv_eng0_req =
+ SOC15_REG_OFFSET(MMHUB, 0, regMMVM_INVALIDATE_ENG0_REQ);
+ hub->vm_inv_eng0_ack =
+ SOC15_REG_OFFSET(MMHUB, 0, regMMVM_INVALIDATE_ENG0_ACK);
+ hub->vm_context0_cntl =
+ SOC15_REG_OFFSET(MMHUB, 0, regMMVM_CONTEXT0_CNTL);
+ hub->vm_l2_pro_fault_status =
+ SOC15_REG_OFFSET(MMHUB, 0, regMMVM_L2_PROTECTION_FAULT_STATUS_LO32);
+ hub->vm_l2_pro_fault_cntl =
+ SOC15_REG_OFFSET(MMHUB, 0, regMMVM_L2_PROTECTION_FAULT_CNTL);
+
+ hub->ctx_distance = regMMVM_CONTEXT1_CNTL - regMMVM_CONTEXT0_CNTL;
+ hub->ctx_addr_distance = regMMVM_CONTEXT1_PAGE_TABLE_BASE_ADDR_LO32 -
+ regMMVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_LO32;
+ hub->eng_distance = regMMVM_INVALIDATE_ENG1_REQ -
+ regMMVM_INVALIDATE_ENG0_REQ;
+ hub->eng_addr_distance = regMMVM_INVALIDATE_ENG1_ADDR_RANGE_LO32 -
+ regMMVM_INVALIDATE_ENG0_ADDR_RANGE_LO32;
+
+ hub->vm_cntx_cntl_vm_fault = MMVM_CONTEXT1_CNTL__RANGE_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK |
+ MMVM_CONTEXT1_CNTL__DUMMY_PAGE_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK |
+ MMVM_CONTEXT1_CNTL__PDE0_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK |
+ MMVM_CONTEXT1_CNTL__VALID_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK |
+ MMVM_CONTEXT1_CNTL__READ_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK |
+ MMVM_CONTEXT1_CNTL__WRITE_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK |
+ MMVM_CONTEXT1_CNTL__EXECUTE_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK;
+
+ hub->vm_l2_bank_select_reserved_cid2 =
+ SOC15_REG_OFFSET(MMHUB, 0, regMMVM_L2_BANK_SELECT_RESERVED_CID2);
+
+ hub->vm_contexts_disable =
+ SOC15_REG_OFFSET(MMHUB, 0, regMMVM_CONTEXTS_DISABLE);
+
+ hub->vmhub_funcs = &mmhub_v4_1_0_vmhub_funcs;
+}
+
+static u64 mmhub_v4_1_0_get_fb_location(struct amdgpu_device *adev)
+{
+ u64 base;
+
+ base = RREG32_SOC15(MMHUB, 0, regMMMC_VM_FB_LOCATION_BASE);
+
+ base &= MMMC_VM_FB_LOCATION_BASE__FB_BASE_MASK;
+ base <<= 24;
+
+ return base;
+}
+
+static u64 mmhub_v4_1_0_get_mc_fb_offset(struct amdgpu_device *adev)
+{
+ return (u64)RREG32_SOC15(MMHUB, 0, regMMMC_VM_FB_OFFSET) << 24;
+}
+
+static void
+mmhub_v4_1_0_update_medium_grain_clock_gating(struct amdgpu_device *adev,
+ bool enable)
+{
+#if 0
+ uint32_t def, data;
+#endif
+ uint32_t def1, data1, def2 = 0, data2 = 0;
+#if 0
+ def = data = RREG32_SOC15(MMHUB, 0, regMM_ATC_L2_MISC_CG);
+#endif
+ def1 = data1 = RREG32_SOC15(MMHUB, 0, regDAGB0_CNTL_MISC2);
+ def2 = data2 = RREG32_SOC15(MMHUB, 0, regDAGB1_CNTL_MISC2);
+
+ if (enable) {
+#if 0
+ data |= MM_ATC_L2_MISC_CG__ENABLE_MASK;
+#endif
+ data1 &= ~(DAGB0_CNTL_MISC2__DISABLE_RDRET_TAP_CHAIN_FGCG_MASK |
+ DAGB0_CNTL_MISC2__DISABLE_WRRET_TAP_CHAIN_FGCG_MASK);
+
+ data2 &= ~(DAGB1_CNTL_MISC2__DISABLE_RDRET_TAP_CHAIN_FGCG_MASK |
+ DAGB1_CNTL_MISC2__DISABLE_WRRET_TAP_CHAIN_FGCG_MASK);
+ } else {
+#if 0
+ data &= ~MM_ATC_L2_MISC_CG__ENABLE_MASK;
+#endif
+ data1 |= (DAGB0_CNTL_MISC2__DISABLE_RDRET_TAP_CHAIN_FGCG_MASK |
+ DAGB0_CNTL_MISC2__DISABLE_WRRET_TAP_CHAIN_FGCG_MASK);
+
+ data2 |= (DAGB1_CNTL_MISC2__DISABLE_RDRET_TAP_CHAIN_FGCG_MASK |
+ DAGB1_CNTL_MISC2__DISABLE_WRRET_TAP_CHAIN_FGCG_MASK);
+ }
+
+#if 0
+ if (def != data)
+ WREG32_SOC15(MMHUB, 0, regMM_ATC_L2_MISC_CG, data);
+#endif
+ if (def1 != data1)
+ WREG32_SOC15(MMHUB, 0, regDAGB0_CNTL_MISC2, data1);
+
+ if (def2 != data2)
+ WREG32_SOC15(MMHUB, 0, regDAGB1_CNTL_MISC2, data2);
+}
+
+static void
+mmhub_v4_1_0_update_medium_grain_light_sleep(struct amdgpu_device *adev,
+ bool enable)
+{
+#if 0
+ uint32_t def, data;
+
+ def = data = RREG32_SOC15(MMHUB, 0, regMM_ATC_L2_MISC_CG);
+
+ if (enable)
+ data |= MM_ATC_L2_MISC_CG__MEM_LS_ENABLE_MASK;
+ else
+ data &= ~MM_ATC_L2_MISC_CG__MEM_LS_ENABLE_MASK;
+
+ if (def != data)
+ WREG32_SOC15(MMHUB, 0, regMM_ATC_L2_MISC_CG, data);
+#endif
+}
+
+static int mmhub_v4_1_0_set_clockgating(struct amdgpu_device *adev,
+ enum amd_clockgating_state state)
+{
+ if (amdgpu_sriov_vf(adev))
+ return 0;
+
+ if (adev->cg_flags & AMD_CG_SUPPORT_MC_MGCG)
+ mmhub_v4_1_0_update_medium_grain_clock_gating(adev,
+ state == AMD_CG_STATE_GATE);
+
+ if (adev->cg_flags & AMD_CG_SUPPORT_MC_LS)
+ mmhub_v4_1_0_update_medium_grain_light_sleep(adev,
+ state == AMD_CG_STATE_GATE);
+
+ return 0;
+}
+
+static void mmhub_v4_1_0_get_clockgating(struct amdgpu_device *adev, u64 *flags)
+{
+#if 0
+ int data;
+
+ if (amdgpu_sriov_vf(adev))
+ *flags = 0;
+
+ data = RREG32_SOC15(MMHUB, 0, regMM_ATC_L2_MISC_CG);
+
+ /* AMD_CG_SUPPORT_MC_MGCG */
+ if (data & MM_ATC_L2_MISC_CG__ENABLE_MASK)
+ *flags |= AMD_CG_SUPPORT_MC_MGCG;
+
+ /* AMD_CG_SUPPORT_MC_LS */
+ if (data & MM_ATC_L2_MISC_CG__MEM_LS_ENABLE_MASK)
+ *flags |= AMD_CG_SUPPORT_MC_LS;
+#endif
+}
+
+const struct amdgpu_mmhub_funcs mmhub_v4_1_0_funcs = {
+ .init = mmhub_v4_1_0_init,
+ .get_fb_location = mmhub_v4_1_0_get_fb_location,
+ .get_mc_fb_offset = mmhub_v4_1_0_get_mc_fb_offset,
+ .gart_enable = mmhub_v4_1_0_gart_enable,
+ .set_fault_enable_default = mmhub_v4_1_0_set_fault_enable_default,
+ .gart_disable = mmhub_v4_1_0_gart_disable,
+ .set_clockgating = mmhub_v4_1_0_set_clockgating,
+ .get_clockgating = mmhub_v4_1_0_get_clockgating,
+ .setup_vm_pt_regs = mmhub_v4_1_0_setup_vm_pt_regs,
+};
diff --git a/drivers/gpu/drm/amd/amdgpu/mmhub_v4_1_0.h b/drivers/gpu/drm/amd/amdgpu/mmhub_v4_1_0.h
new file mode 100644
index 000000000000..3902d653353c
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/mmhub_v4_1_0.h
@@ -0,0 +1,28 @@
+/*
+ * Copyright 2023 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+#ifndef __MMHUB_V4_1_0_H__
+#define __MMHUB_V4_1_0_H__
+
+extern const struct amdgpu_mmhub_funcs mmhub_v4_1_0_funcs;
+
+#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/mmhub_v9_4.c b/drivers/gpu/drm/amd/amdgpu/mmhub_v9_4.c
index 4a31737b6bb0..fe0710b55c3a 100644
--- a/drivers/gpu/drm/amd/amdgpu/mmhub_v9_4.c
+++ b/drivers/gpu/drm/amd/amdgpu/mmhub_v9_4.c
@@ -57,7 +57,7 @@ static u64 mmhub_v9_4_get_fb_location(struct amdgpu_device *adev)
static void mmhub_v9_4_setup_hubid_vm_pt_regs(struct amdgpu_device *adev, int hubid,
uint32_t vmid, uint64_t value)
{
- struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_MMHUB_0];
+ struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_MMHUB0(0)];
WREG32_SOC15_OFFSET(MMHUB, 0,
mmVML2VC0_VM_CONTEXT0_PAGE_TABLE_BASE_ADDR_LO32,
@@ -108,7 +108,7 @@ static void mmhub_v9_4_setup_vm_pt_regs(struct amdgpu_device *adev, uint32_t vmi
}
static void mmhub_v9_4_init_system_aperture_regs(struct amdgpu_device *adev,
- int hubid)
+ int hubid)
{
uint64_t value;
uint32_t tmp;
@@ -136,8 +136,7 @@ static void mmhub_v9_4_init_system_aperture_regs(struct amdgpu_device *adev,
max(adev->gmc.fb_end, adev->gmc.agp_end) >> 18);
/* Set default page address. */
- value = adev->vram_scratch.gpu_addr - adev->gmc.vram_start +
- adev->vm_manager.vram_base_offset;
+ value = amdgpu_gmc_vram_mc2pa(adev, adev->mem_scratch.gpu_addr);
WREG32_SOC15_OFFSET(
MMHUB, 0,
mmVMSHAREDPF0_MC_VM_SYSTEM_APERTURE_DEFAULT_ADDR_LSB,
@@ -191,8 +190,6 @@ static void mmhub_v9_4_init_tlb_regs(struct amdgpu_device *adev, int hubid)
tmp = REG_SET_FIELD(tmp, VMSHAREDVC0_MC_VM_MX_L1_TLB_CNTL,
SYSTEM_APERTURE_UNMAPPED_ACCESS, 0);
tmp = REG_SET_FIELD(tmp, VMSHAREDVC0_MC_VM_MX_L1_TLB_CNTL,
- ECO_BITS, 0);
- tmp = REG_SET_FIELD(tmp, VMSHAREDVC0_MC_VM_MX_L1_TLB_CNTL,
MTYPE, MTYPE_UC);/* XXX for emulation. */
tmp = REG_SET_FIELD(tmp, VMSHAREDVC0_MC_VM_MX_L1_TLB_CNTL,
ATC_EN, 1);
@@ -201,6 +198,36 @@ static void mmhub_v9_4_init_tlb_regs(struct amdgpu_device *adev, int hubid)
hubid * MMHUB_INSTANCE_REGISTER_OFFSET, tmp);
}
+/* Set snoop bit for SDMA so that SDMA writes probe-invalidates RW lines */
+static void mmhub_v9_4_init_snoop_override_regs(struct amdgpu_device *adev, int hubid)
+{
+ uint32_t tmp;
+ int i;
+ uint32_t distance = mmDAGB1_WRCLI_GPU_SNOOP_OVERRIDE -
+ mmDAGB0_WRCLI_GPU_SNOOP_OVERRIDE;
+ uint32_t huboffset = hubid * MMHUB_INSTANCE_REGISTER_OFFSET;
+
+ for (i = 0; i < 5 - (2 * hubid); i++) {
+ /* DAGB instances 0 to 4 are in hub0 and 5 to 7 are in hub1 */
+ tmp = RREG32_SOC15_OFFSET(MMHUB, 0,
+ mmDAGB0_WRCLI_GPU_SNOOP_OVERRIDE,
+ huboffset + i * distance);
+ tmp |= (1 << 15); /* SDMA client is BIT15 */
+ WREG32_SOC15_OFFSET(MMHUB, 0,
+ mmDAGB0_WRCLI_GPU_SNOOP_OVERRIDE,
+ huboffset + i * distance, tmp);
+
+ tmp = RREG32_SOC15_OFFSET(MMHUB, 0,
+ mmDAGB0_WRCLI_GPU_SNOOP_OVERRIDE_VALUE,
+ huboffset + i * distance);
+ tmp |= (1 << 15);
+ WREG32_SOC15_OFFSET(MMHUB, 0,
+ mmDAGB0_WRCLI_GPU_SNOOP_OVERRIDE_VALUE,
+ huboffset + i * distance, tmp);
+ }
+
+}
+
static void mmhub_v9_4_init_cache_regs(struct amdgpu_device *adev, int hubid)
{
uint32_t tmp;
@@ -297,18 +324,26 @@ static void mmhub_v9_4_disable_identity_aperture(struct amdgpu_device *adev,
static void mmhub_v9_4_setup_vmid_config(struct amdgpu_device *adev, int hubid)
{
- struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_MMHUB_0];
+ struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_MMHUB0(0)];
+ unsigned int num_level, block_size;
uint32_t tmp;
int i;
+ num_level = adev->vm_manager.num_level;
+ block_size = adev->vm_manager.block_size;
+ if (adev->gmc.translate_further)
+ num_level -= 1;
+ else
+ block_size -= 9;
+
for (i = 0; i <= 14; i++) {
tmp = RREG32_SOC15_OFFSET(MMHUB, 0, mmVML2VC0_VM_CONTEXT1_CNTL,
- hubid * MMHUB_INSTANCE_REGISTER_OFFSET + i);
+ hubid * MMHUB_INSTANCE_REGISTER_OFFSET + i * hub->ctx_distance);
tmp = REG_SET_FIELD(tmp, VML2VC0_VM_CONTEXT1_CNTL,
ENABLE_CONTEXT, 1);
tmp = REG_SET_FIELD(tmp, VML2VC0_VM_CONTEXT1_CNTL,
PAGE_TABLE_DEPTH,
- adev->vm_manager.num_level);
+ num_level);
tmp = REG_SET_FIELD(tmp, VML2VC0_VM_CONTEXT1_CNTL,
RANGE_PROTECTION_FAULT_ENABLE_DEFAULT, 1);
tmp = REG_SET_FIELD(tmp, VML2VC0_VM_CONTEXT1_CNTL,
@@ -326,7 +361,7 @@ static void mmhub_v9_4_setup_vmid_config(struct amdgpu_device *adev, int hubid)
EXECUTE_PROTECTION_FAULT_ENABLE_DEFAULT, 1);
tmp = REG_SET_FIELD(tmp, VML2VC0_VM_CONTEXT1_CNTL,
PAGE_TABLE_BLOCK_SIZE,
- adev->vm_manager.block_size - 9);
+ block_size);
/* Send no-retry XNACK on fault to suppress VM fault storm. */
tmp = REG_SET_FIELD(tmp, VML2VC0_VM_CONTEXT1_CNTL,
RETRY_PERMISSION_OR_INVALID_PAGE_FAULT,
@@ -358,7 +393,7 @@ static void mmhub_v9_4_setup_vmid_config(struct amdgpu_device *adev, int hubid)
static void mmhub_v9_4_program_invalidation(struct amdgpu_device *adev,
int hubid)
{
- struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_MMHUB_0];
+ struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_MMHUB0(0)];
unsigned i;
for (i = 0; i < 18; ++i) {
@@ -387,6 +422,7 @@ static int mmhub_v9_4_gart_enable(struct amdgpu_device *adev)
if (!amdgpu_sriov_vf(adev))
mmhub_v9_4_init_cache_regs(adev, i);
+ mmhub_v9_4_init_snoop_override_regs(adev, i);
mmhub_v9_4_enable_system_domain(adev, i);
if (!amdgpu_sriov_vf(adev))
mmhub_v9_4_disable_identity_aperture(adev, i);
@@ -399,7 +435,7 @@ static int mmhub_v9_4_gart_enable(struct amdgpu_device *adev)
static void mmhub_v9_4_gart_disable(struct amdgpu_device *adev)
{
- struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_MMHUB_0];
+ struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_MMHUB0(0)];
u32 tmp;
u32 i, j;
@@ -437,7 +473,7 @@ static void mmhub_v9_4_gart_disable(struct amdgpu_device *adev)
}
/**
- * mmhub_v1_0_set_fault_enable_default - update GART/VM fault handling
+ * mmhub_v9_4_set_fault_enable_default - update GART/VM fault handling
*
* @adev: amdgpu_device pointer
* @value: true redirects VM faults to the default page
@@ -502,8 +538,8 @@ static void mmhub_v9_4_set_fault_enable_default(struct amdgpu_device *adev, bool
static void mmhub_v9_4_init(struct amdgpu_device *adev)
{
- struct amdgpu_vmhub *hub[MMHUB_NUM_INSTANCES] =
- {&adev->vmhub[AMDGPU_MMHUB_0], &adev->vmhub[AMDGPU_MMHUB_1]};
+ struct amdgpu_vmhub *hub[MMHUB_NUM_INSTANCES] = {
+ &adev->vmhub[AMDGPU_MMHUB0(0)], &adev->vmhub[AMDGPU_MMHUB1(0)]};
int i;
for (i = 0; i < MMHUB_NUM_INSTANCES; i++) {
@@ -650,9 +686,9 @@ static int mmhub_v9_4_set_clockgating(struct amdgpu_device *adev,
return 0;
}
-static void mmhub_v9_4_get_clockgating(struct amdgpu_device *adev, u32 *flags)
+static void mmhub_v9_4_get_clockgating(struct amdgpu_device *adev, u64 *flags)
{
- int data, data1;
+ u32 data, data1;
if (amdgpu_sriov_vf(adev))
*flags = 0;
@@ -1563,7 +1599,7 @@ static int mmhub_v9_4_get_ras_error_count(struct amdgpu_device *adev,
uint32_t sec_cnt, ded_cnt;
for (i = 0; i < ARRAY_SIZE(mmhub_v9_4_ras_fields); i++) {
- if(mmhub_v9_4_ras_fields[i].reg_offset != reg->reg_offset)
+ if (mmhub_v9_4_ras_fields[i].reg_offset != reg->reg_offset)
continue;
sec_cnt = (value &
@@ -1646,16 +1682,31 @@ static void mmhub_v9_4_query_ras_error_status(struct amdgpu_device *adev)
for (i = 0; i < ARRAY_SIZE(mmhub_v9_4_err_status_regs); i++) {
reg_value =
RREG32(SOC15_REG_ENTRY_OFFSET(mmhub_v9_4_err_status_regs[i]));
- if (reg_value)
+ if (REG_GET_FIELD(reg_value, MMEA0_ERR_STATUS, SDP_RDRSP_STATUS) ||
+ REG_GET_FIELD(reg_value, MMEA0_ERR_STATUS, SDP_WRRSP_STATUS) ||
+ REG_GET_FIELD(reg_value, MMEA0_ERR_STATUS, SDP_RDRSP_DATAPARITY_ERROR)) {
+ /* SDP read/write error/parity error in FUE_IS_FATAL mode
+ * can cause system fatal error in arcturas. Harvest the error
+ * status before GPU reset */
dev_warn(adev->dev, "MMHUB EA err detected at instance: %d, status: 0x%x!\n",
i, reg_value);
+ }
}
}
-const struct amdgpu_mmhub_funcs mmhub_v9_4_funcs = {
- .ras_late_init = amdgpu_mmhub_ras_late_init,
+const struct amdgpu_ras_block_hw_ops mmhub_v9_4_ras_hw_ops = {
.query_ras_error_count = mmhub_v9_4_query_ras_error_count,
.reset_ras_error_count = mmhub_v9_4_reset_ras_error_count,
+ .query_ras_error_status = mmhub_v9_4_query_ras_error_status,
+};
+
+struct amdgpu_mmhub_ras mmhub_v9_4_ras = {
+ .ras_block = {
+ .hw_ops = &mmhub_v9_4_ras_hw_ops,
+ },
+};
+
+const struct amdgpu_mmhub_funcs mmhub_v9_4_funcs = {
.get_fb_location = mmhub_v9_4_get_fb_location,
.init = mmhub_v9_4_init,
.gart_enable = mmhub_v9_4_gart_enable,
@@ -1664,5 +1715,4 @@ const struct amdgpu_mmhub_funcs mmhub_v9_4_funcs = {
.set_clockgating = mmhub_v9_4_set_clockgating,
.get_clockgating = mmhub_v9_4_get_clockgating,
.setup_vm_pt_regs = mmhub_v9_4_setup_vm_pt_regs,
- .query_ras_error_status = mmhub_v9_4_query_ras_error_status,
};
diff --git a/drivers/gpu/drm/amd/amdgpu/mmhub_v9_4.h b/drivers/gpu/drm/amd/amdgpu/mmhub_v9_4.h
index 92404a8f66f3..a48329d95f71 100644
--- a/drivers/gpu/drm/amd/amdgpu/mmhub_v9_4.h
+++ b/drivers/gpu/drm/amd/amdgpu/mmhub_v9_4.h
@@ -24,5 +24,6 @@
#define __MMHUB_V9_4_H__
extern const struct amdgpu_mmhub_funcs mmhub_v9_4_funcs;
+extern struct amdgpu_mmhub_ras mmhub_v9_4_ras;
#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/mmsch_v1_0.h b/drivers/gpu/drm/amd/amdgpu/mmsch_v1_0.h
index 20958639b601..2cdab8062c86 100644
--- a/drivers/gpu/drm/amd/amdgpu/mmsch_v1_0.h
+++ b/drivers/gpu/drm/amd/amdgpu/mmsch_v1_0.h
@@ -24,9 +24,7 @@
#ifndef __MMSCH_V1_0_H__
#define __MMSCH_V1_0_H__
-#define MMSCH_VERSION_MAJOR 1
-#define MMSCH_VERSION_MINOR 0
-#define MMSCH_VERSION (MMSCH_VERSION_MAJOR << 16 | MMSCH_VERSION_MINOR)
+#define MMSCH_VERSION 0x1
enum mmsch_v1_0_command_type {
MMSCH_COMMAND__DIRECT_REG_WRITE = 0,
diff --git a/drivers/gpu/drm/amd/amdgpu/mmsch_v3_0.h b/drivers/gpu/drm/amd/amdgpu/mmsch_v3_0.h
index 3e4e858a6965..a773ef61b78c 100644
--- a/drivers/gpu/drm/amd/amdgpu/mmsch_v3_0.h
+++ b/drivers/gpu/drm/amd/amdgpu/mmsch_v3_0.h
@@ -30,6 +30,8 @@
#define MMSCH_VERSION_MINOR 0
#define MMSCH_VERSION (MMSCH_VERSION_MAJOR << 16 | MMSCH_VERSION_MINOR)
+#define MMSCH_V3_0_VCN_INSTANCES 0x2
+
enum mmsch_v3_0_command_type {
MMSCH_COMMAND__DIRECT_REG_WRITE = 0,
MMSCH_COMMAND__DIRECT_REG_POLLING = 2,
@@ -47,7 +49,7 @@ struct mmsch_v3_0_table_info {
struct mmsch_v3_0_init_header {
uint32_t version;
uint32_t total_size;
- struct mmsch_v3_0_table_info inst[AMDGPU_MAX_VCN_INSTANCES];
+ struct mmsch_v3_0_table_info inst[MMSCH_V3_0_VCN_INSTANCES];
};
struct mmsch_v3_0_cmd_direct_reg_header {
diff --git a/drivers/gpu/drm/amd/amdgpu/mmsch_v4_0.h b/drivers/gpu/drm/amd/amdgpu/mmsch_v4_0.h
new file mode 100644
index 000000000000..ced26cc5123a
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/mmsch_v4_0.h
@@ -0,0 +1,144 @@
+/*
+ * Copyright 2022 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#ifndef __MMSCH_V4_0_H__
+#define __MMSCH_V4_0_H__
+
+#include "amdgpu_vcn.h"
+
+#define MMSCH_VERSION_MAJOR 4
+#define MMSCH_VERSION_MINOR 0
+#define MMSCH_VERSION (MMSCH_VERSION_MAJOR << 16 | MMSCH_VERSION_MINOR)
+
+#define RB_ENABLED (1 << 0)
+#define RB4_ENABLED (1 << 1)
+
+#define MMSCH_VF_ENGINE_STATUS__PASS 0x1
+
+#define MMSCH_VF_MAILBOX_RESP__OK 0x1
+#define MMSCH_VF_MAILBOX_RESP__INCOMPLETE 0x2
+#define MMSCH_VF_MAILBOX_RESP__FAILED 0x3
+#define MMSCH_VF_MAILBOX_RESP__FAILED_SMALL_CTX_SIZE 0x4
+#define MMSCH_VF_MAILBOX_RESP__UNKNOWN_CMD 0x5
+
+#define MMSCH_V4_0_VCN_INSTANCES 0x2
+
+enum mmsch_v4_0_command_type {
+ MMSCH_COMMAND__DIRECT_REG_WRITE = 0,
+ MMSCH_COMMAND__DIRECT_REG_POLLING = 2,
+ MMSCH_COMMAND__DIRECT_REG_READ_MODIFY_WRITE = 3,
+ MMSCH_COMMAND__INDIRECT_REG_WRITE = 8,
+ MMSCH_COMMAND__END = 0xf
+};
+
+struct mmsch_v4_0_table_info {
+ uint32_t init_status;
+ uint32_t table_offset;
+ uint32_t table_size;
+};
+
+struct mmsch_v4_0_init_header {
+ uint32_t version;
+ uint32_t total_size;
+ struct mmsch_v4_0_table_info inst[MMSCH_V4_0_VCN_INSTANCES];
+ struct mmsch_v4_0_table_info jpegdec;
+};
+
+struct mmsch_v4_0_cmd_direct_reg_header {
+ uint32_t reg_offset : 28;
+ uint32_t command_type : 4;
+};
+
+struct mmsch_v4_0_cmd_indirect_reg_header {
+ uint32_t reg_offset : 20;
+ uint32_t reg_idx_space : 8;
+ uint32_t command_type : 4;
+};
+
+struct mmsch_v4_0_cmd_direct_write {
+ struct mmsch_v4_0_cmd_direct_reg_header cmd_header;
+ uint32_t reg_value;
+};
+
+struct mmsch_v4_0_cmd_direct_read_modify_write {
+ struct mmsch_v4_0_cmd_direct_reg_header cmd_header;
+ uint32_t write_data;
+ uint32_t mask_value;
+};
+
+struct mmsch_v4_0_cmd_direct_polling {
+ struct mmsch_v4_0_cmd_direct_reg_header cmd_header;
+ uint32_t mask_value;
+ uint32_t wait_value;
+};
+
+struct mmsch_v4_0_cmd_end {
+ struct mmsch_v4_0_cmd_direct_reg_header cmd_header;
+};
+
+struct mmsch_v4_0_cmd_indirect_write {
+ struct mmsch_v4_0_cmd_indirect_reg_header cmd_header;
+ uint32_t reg_value;
+};
+
+#define MMSCH_V4_0_INSERT_DIRECT_RD_MOD_WT(reg, mask, data) { \
+ size = sizeof(struct mmsch_v4_0_cmd_direct_read_modify_write); \
+ size_dw = size / 4; \
+ direct_rd_mod_wt.cmd_header.reg_offset = reg; \
+ direct_rd_mod_wt.mask_value = mask; \
+ direct_rd_mod_wt.write_data = data; \
+ memcpy((void *)table_loc, &direct_rd_mod_wt, size); \
+ table_loc += size_dw; \
+ table_size += size_dw; \
+}
+
+#define MMSCH_V4_0_INSERT_DIRECT_WT(reg, value) { \
+ size = sizeof(struct mmsch_v4_0_cmd_direct_write); \
+ size_dw = size / 4; \
+ direct_wt.cmd_header.reg_offset = reg; \
+ direct_wt.reg_value = value; \
+ memcpy((void *)table_loc, &direct_wt, size); \
+ table_loc += size_dw; \
+ table_size += size_dw; \
+}
+
+#define MMSCH_V4_0_INSERT_DIRECT_POLL(reg, mask, wait) { \
+ size = sizeof(struct mmsch_v4_0_cmd_direct_polling); \
+ size_dw = size / 4; \
+ direct_poll.cmd_header.reg_offset = reg; \
+ direct_poll.mask_value = mask; \
+ direct_poll.wait_value = wait; \
+ memcpy((void *)table_loc, &direct_poll, size); \
+ table_loc += size_dw; \
+ table_size += size_dw; \
+}
+
+#define MMSCH_V4_0_INSERT_END() { \
+ size = sizeof(struct mmsch_v4_0_cmd_end); \
+ size_dw = size / 4; \
+ memcpy((void *)table_loc, &end, size); \
+ table_loc += size_dw; \
+ table_size += size_dw; \
+}
+
+#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/mmsch_v4_0_3.h b/drivers/gpu/drm/amd/amdgpu/mmsch_v4_0_3.h
new file mode 100644
index 000000000000..db7eb5260295
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/mmsch_v4_0_3.h
@@ -0,0 +1,37 @@
+/*
+ * Copyright 2023 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#ifndef __MMSCH_V4_0_3_H__
+#define __MMSCH_V4_0_3_H__
+
+#include "amdgpu_vcn.h"
+#include "mmsch_v4_0.h"
+
+struct mmsch_v4_0_3_init_header {
+ uint32_t version;
+ uint32_t total_size;
+ struct mmsch_v4_0_table_info vcn0;
+ struct mmsch_v4_0_table_info mjpegdec0[4];
+ struct mmsch_v4_0_table_info mjpegdec1[4];
+};
+#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/mmsch_v5_0.h b/drivers/gpu/drm/amd/amdgpu/mmsch_v5_0.h
new file mode 100644
index 000000000000..6f749814929f
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/mmsch_v5_0.h
@@ -0,0 +1,144 @@
+/*
+ * Copyright 2023 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#ifndef __MMSCH_V5_0_H__
+#define __MMSCH_V5_0_H__
+
+#include "amdgpu_vcn.h"
+
+#define MMSCH_VERSION_MAJOR 5
+#define MMSCH_VERSION_MINOR 0
+#define MMSCH_VERSION (MMSCH_VERSION_MAJOR << 16 | MMSCH_VERSION_MINOR)
+
+#define RB_ENABLED (1 << 0)
+#define RB4_ENABLED (1 << 1)
+
+#define MMSCH_VF_ENGINE_STATUS__PASS 0x1
+
+#define MMSCH_VF_MAILBOX_RESP__OK 0x1
+#define MMSCH_VF_MAILBOX_RESP__INCOMPLETE 0x2
+#define MMSCH_VF_MAILBOX_RESP__FAILED 0x3
+#define MMSCH_VF_MAILBOX_RESP__FAILED_SMALL_CTX_SIZE 0x4
+#define MMSCH_VF_MAILBOX_RESP__UNKNOWN_CMD 0x5
+
+enum mmsch_v5_0_command_type {
+ MMSCH_COMMAND__DIRECT_REG_WRITE = 0,
+ MMSCH_COMMAND__DIRECT_REG_POLLING = 2,
+ MMSCH_COMMAND__DIRECT_REG_READ_MODIFY_WRITE = 3,
+ MMSCH_COMMAND__INDIRECT_REG_WRITE = 8,
+ MMSCH_COMMAND__END = 0xf
+};
+
+struct mmsch_v5_0_table_info {
+ uint32_t init_status;
+ uint32_t table_offset;
+ uint32_t table_size;
+};
+
+struct mmsch_v5_0_init_header {
+ uint32_t version;
+ uint32_t total_size;
+ struct mmsch_v5_0_table_info vcn0;
+ struct mmsch_v5_0_table_info mjpegdec0[5];
+ struct mmsch_v5_0_table_info mjpegdec1[5];
+};
+
+struct mmsch_v5_0_cmd_direct_reg_header {
+ uint32_t reg_offset : 28;
+ uint32_t command_type : 4;
+};
+
+struct mmsch_v5_0_cmd_indirect_reg_header {
+ uint32_t reg_offset : 20;
+ uint32_t reg_idx_space : 8;
+ uint32_t command_type : 4;
+};
+
+struct mmsch_v5_0_cmd_direct_write {
+ struct mmsch_v5_0_cmd_direct_reg_header cmd_header;
+ uint32_t reg_value;
+};
+
+struct mmsch_v5_0_cmd_direct_read_modify_write {
+ struct mmsch_v5_0_cmd_direct_reg_header cmd_header;
+ uint32_t write_data;
+ uint32_t mask_value;
+};
+
+struct mmsch_v5_0_cmd_direct_polling {
+ struct mmsch_v5_0_cmd_direct_reg_header cmd_header;
+ uint32_t mask_value;
+ uint32_t wait_value;
+};
+
+struct mmsch_v5_0_cmd_end {
+ struct mmsch_v5_0_cmd_direct_reg_header cmd_header;
+};
+
+struct mmsch_v5_0_cmd_indirect_write {
+ struct mmsch_v5_0_cmd_indirect_reg_header cmd_header;
+ uint32_t reg_value;
+};
+
+#define MMSCH_V5_0_INSERT_DIRECT_RD_MOD_WT(reg, mask, data) { \
+ size = sizeof(struct mmsch_v5_0_cmd_direct_read_modify_write); \
+ size_dw = size / 4; \
+ direct_rd_mod_wt.cmd_header.reg_offset = reg; \
+ direct_rd_mod_wt.mask_value = mask; \
+ direct_rd_mod_wt.write_data = data; \
+ memcpy((void *)table_loc, &direct_rd_mod_wt, size); \
+ table_loc += size_dw; \
+ table_size += size_dw; \
+}
+
+#define MMSCH_V5_0_INSERT_DIRECT_WT(reg, value) { \
+ size = sizeof(struct mmsch_v5_0_cmd_direct_write); \
+ size_dw = size / 4; \
+ direct_wt.cmd_header.reg_offset = reg; \
+ direct_wt.reg_value = value; \
+ memcpy((void *)table_loc, &direct_wt, size); \
+ table_loc += size_dw; \
+ table_size += size_dw; \
+}
+
+#define MMSCH_V5_0_INSERT_DIRECT_POLL(reg, mask, wait) { \
+ size = sizeof(struct mmsch_v5_0_cmd_direct_polling); \
+ size_dw = size / 4; \
+ direct_poll.cmd_header.reg_offset = reg; \
+ direct_poll.mask_value = mask; \
+ direct_poll.wait_value = wait; \
+ memcpy((void *)table_loc, &direct_poll, size); \
+ table_loc += size_dw; \
+ table_size += size_dw; \
+}
+
+#define MMSCH_V5_0_INSERT_END() { \
+ size = sizeof(struct mmsch_v5_0_cmd_end); \
+ size_dw = size / 4; \
+ memcpy((void *)table_loc, &end, size); \
+ table_loc += size_dw; \
+ table_size += size_dw; \
+}
+
+#endif
+
diff --git a/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c b/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c
index 3ee481557fc9..9a40107a0869 100644
--- a/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c
+++ b/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c
@@ -32,6 +32,8 @@
#include "soc15_common.h"
#include "mxgpu_ai.h"
+#include "amdgpu_reset.h"
+
static void xgpu_ai_mailbox_send_ack(struct amdgpu_device *adev)
{
WREG8(AI_MAIBOX_CONTROL_RCV_OFFSET_BYTE, 2);
@@ -91,7 +93,7 @@ static int xgpu_ai_poll_ack(struct amdgpu_device *adev)
timeout -= 5;
} while (timeout > 1);
- pr_err("Doesn't get TRN_MSG_ACK from pf in %d msec\n", AI_MAILBOX_POLL_ACK_TIMEDOUT);
+ dev_err(adev->dev, "Doesn't get TRN_MSG_ACK from pf in %d msec\n", AI_MAILBOX_POLL_ACK_TIMEDOUT);
return -ETIME;
}
@@ -109,7 +111,7 @@ static int xgpu_ai_poll_msg(struct amdgpu_device *adev, enum idh_event event)
timeout -= 10;
} while (timeout > 1);
- pr_err("Doesn't get msg:%d from pf, error=%d\n", event, r);
+ dev_err(adev->dev, "Doesn't get msg:%d from pf, error=%d\n", event, r);
return -ETIME;
}
@@ -130,7 +132,7 @@ static void xgpu_ai_mailbox_trans_msg (struct amdgpu_device *adev,
xgpu_ai_mailbox_set_valid(adev, false);
trn = xgpu_ai_peek_ack(adev);
if (trn) {
- pr_err("trn=%x ACK should not assert! wait again !\n", trn);
+ dev_err_ratelimited(adev->dev, "trn=%x ACK should not assert! wait again !\n", trn);
msleep(1);
}
} while(trn);
@@ -153,7 +155,7 @@ static void xgpu_ai_mailbox_trans_msg (struct amdgpu_device *adev,
/* start to poll ack */
r = xgpu_ai_poll_ack(adev);
if (r)
- pr_err("Doesn't get ack from pf, continue\n");
+ dev_err(adev->dev, "Doesn't get ack from pf, continue\n");
xgpu_ai_mailbox_set_valid(adev, false);
}
@@ -171,7 +173,7 @@ static int xgpu_ai_send_access_requests(struct amdgpu_device *adev,
req == IDH_REQ_GPU_RESET_ACCESS) {
r = xgpu_ai_poll_msg(adev, IDH_READY_TO_ACCESS_GPU);
if (r) {
- pr_err("Doesn't get READY_TO_ACCESS_GPU from pf, give up\n");
+ dev_err(adev->dev, "Doesn't get READY_TO_ACCESS_GPU from pf, give up\n");
return r;
}
/* Retrieve checksum from mailbox2 */
@@ -180,6 +182,11 @@ static int xgpu_ai_send_access_requests(struct amdgpu_device *adev,
RREG32_NO_KIQ(SOC15_REG_OFFSET(NBIO, 0,
mmBIF_BX_PF0_MAILBOX_MSGBUF_RCV_DW2));
}
+ } else if (req == IDH_REQ_GPU_INIT_DATA){
+ /* Dummy REQ_GPU_INIT_DATA handling */
+ r = xgpu_ai_poll_msg(adev, IDH_REQ_GPU_INIT_DATA_READY);
+ /* version set to 0 since dummy */
+ adev->virt.req_init_data_ver = 0;
}
return 0;
@@ -224,7 +231,7 @@ static int xgpu_ai_mailbox_ack_irq(struct amdgpu_device *adev,
struct amdgpu_irq_src *source,
struct amdgpu_iv_entry *entry)
{
- DRM_DEBUG("get ack intr and do nothing.\n");
+ dev_dbg(adev->dev, "get ack intr and do nothing.\n");
return 0;
}
@@ -242,39 +249,78 @@ static int xgpu_ai_set_mailbox_ack_irq(struct amdgpu_device *adev,
return 0;
}
-static void xgpu_ai_mailbox_flr_work(struct work_struct *work)
+static void xgpu_ai_ready_to_reset(struct amdgpu_device *adev)
{
- struct amdgpu_virt *virt = container_of(work, struct amdgpu_virt, flr_work);
- struct amdgpu_device *adev = container_of(virt, struct amdgpu_device, virt);
- int timeout = AI_MAILBOX_POLL_FLR_TIMEDOUT;
-
- /* block amdgpu_gpu_recover till msg FLR COMPLETE received,
- * otherwise the mailbox msg will be ruined/reseted by
- * the VF FLR.
- */
- if (!down_read_trylock(&adev->reset_sem))
- return;
-
- amdgpu_virt_fini_data_exchange(adev);
- atomic_set(&adev->in_gpu_reset, 1);
+ xgpu_ai_mailbox_trans_msg(adev, IDH_READY_TO_RESET, 0, 0, 0);
+}
+static int xgpu_ai_wait_reset(struct amdgpu_device *adev)
+{
+ int timeout = AI_MAILBOX_POLL_FLR_TIMEDOUT;
do {
- if (xgpu_ai_mailbox_peek_msg(adev) == IDH_FLR_NOTIFICATION_CMPL)
- goto flr_done;
-
+ if (xgpu_ai_mailbox_peek_msg(adev) == IDH_FLR_NOTIFICATION_CMPL) {
+ dev_dbg(adev->dev, "Got AI IDH_FLR_NOTIFICATION_CMPL after %d ms\n", AI_MAILBOX_POLL_FLR_TIMEDOUT - timeout);
+ return 0;
+ }
msleep(10);
timeout -= 10;
} while (timeout > 1);
-flr_done:
- atomic_set(&adev->in_gpu_reset, 0);
- up_read(&adev->reset_sem);
+ dev_dbg(adev->dev, "waiting AI IDH_FLR_NOTIFICATION_CMPL timeout\n");
+ return -ETIME;
+}
+
+static void xgpu_ai_mailbox_flr_work(struct work_struct *work)
+{
+ struct amdgpu_virt *virt = container_of(work, struct amdgpu_virt, flr_work);
+ struct amdgpu_device *adev = container_of(virt, struct amdgpu_device, virt);
+ struct amdgpu_reset_context reset_context = { 0 };
+
+ amdgpu_virt_fini_data_exchange(adev);
/* Trigger recovery for world switch failure if no TDR */
if (amdgpu_device_should_recover_gpu(adev)
&& (!amdgpu_device_has_job_running(adev) ||
- adev->sdma_timeout == MAX_SCHEDULE_TIMEOUT))
- amdgpu_device_gpu_recover(adev, NULL);
+ adev->sdma_timeout == MAX_SCHEDULE_TIMEOUT)) {
+
+ reset_context.method = AMD_RESET_METHOD_NONE;
+ reset_context.reset_req_dev = adev;
+ clear_bit(AMDGPU_NEED_FULL_RESET, &reset_context.flags);
+ set_bit(AMDGPU_HOST_FLR, &reset_context.flags);
+
+ amdgpu_device_gpu_recover(adev, NULL, &reset_context);
+ }
+}
+
+static void xgpu_ai_mailbox_req_bad_pages_work(struct work_struct *work)
+{
+ struct amdgpu_virt *virt = container_of(work, struct amdgpu_virt, req_bad_pages_work);
+ struct amdgpu_device *adev = container_of(virt, struct amdgpu_device, virt);
+
+ if (down_read_trylock(&adev->reset_domain->sem)) {
+ amdgpu_virt_fini_data_exchange(adev);
+ amdgpu_virt_request_bad_pages(adev);
+ up_read(&adev->reset_domain->sem);
+ }
+}
+
+/**
+ * xgpu_ai_mailbox_handle_bad_pages_work - Reinitialize the data exchange region to get fresh bad page information
+ * @work: pointer to the work_struct
+ *
+ * This work handler is triggered when bad pages are ready, and it reinitializes
+ * the data exchange region to retrieve updated bad page information from the host.
+ */
+static void xgpu_ai_mailbox_handle_bad_pages_work(struct work_struct *work)
+{
+ struct amdgpu_virt *virt = container_of(work, struct amdgpu_virt, handle_bad_pages_work);
+ struct amdgpu_device *adev = container_of(virt, struct amdgpu_device, virt);
+
+ if (down_read_trylock(&adev->reset_domain->sem)) {
+ amdgpu_virt_fini_data_exchange(adev);
+ amdgpu_virt_init_data_exchange(adev);
+ up_read(&adev->reset_domain->sem);
+ }
}
static int xgpu_ai_set_mailbox_rcv_irq(struct amdgpu_device *adev,
@@ -296,23 +342,47 @@ static int xgpu_ai_mailbox_rcv_irq(struct amdgpu_device *adev,
struct amdgpu_iv_entry *entry)
{
enum idh_event event = xgpu_ai_mailbox_peek_msg(adev);
+ struct amdgpu_ras *ras = amdgpu_ras_get_context(adev);
switch (event) {
- case IDH_FLR_NOTIFICATION:
+ case IDH_RAS_BAD_PAGES_READY:
+ xgpu_ai_mailbox_send_ack(adev);
if (amdgpu_sriov_runtime(adev))
- schedule_work(&adev->virt.flr_work);
+ schedule_work(&adev->virt.handle_bad_pages_work);
break;
- case IDH_QUERY_ALIVE:
- xgpu_ai_mailbox_send_ack(adev);
- break;
- /* READY_TO_ACCESS_GPU is fetched by kernel polling, IRQ can ignore
- * it byfar since that polling thread will handle it,
- * other msg like flr complete is not handled here.
- */
- case IDH_CLR_MSG_BUF:
- case IDH_FLR_NOTIFICATION_CMPL:
- case IDH_READY_TO_ACCESS_GPU:
- default:
+ case IDH_RAS_BAD_PAGES_NOTIFICATION:
+ xgpu_ai_mailbox_send_ack(adev);
+ if (amdgpu_sriov_runtime(adev))
+ schedule_work(&adev->virt.req_bad_pages_work);
+ break;
+ case IDH_UNRECOV_ERR_NOTIFICATION:
+ xgpu_ai_mailbox_send_ack(adev);
+ ras->is_rma = true;
+ dev_err(adev->dev, "VF is in an unrecoverable state. Runtime Services are halted.\n");
+ if (amdgpu_sriov_runtime(adev))
+ WARN_ONCE(!amdgpu_reset_domain_schedule(adev->reset_domain,
+ &adev->virt.flr_work),
+ "Failed to queue work! at %s",
+ __func__);
+ break;
+ case IDH_FLR_NOTIFICATION:
+ if (amdgpu_sriov_runtime(adev))
+ WARN_ONCE(!amdgpu_reset_domain_schedule(adev->reset_domain,
+ &adev->virt.flr_work),
+ "Failed to queue work! at %s",
+ __func__);
+ break;
+ case IDH_QUERY_ALIVE:
+ xgpu_ai_mailbox_send_ack(adev);
+ break;
+ /* READY_TO_ACCESS_GPU is fetched by kernel polling, IRQ can ignore
+ * it byfar since that polling thread will handle it,
+ * other msg like flr complete is not handled here.
+ */
+ case IDH_CLR_MSG_BUF:
+ case IDH_FLR_NOTIFICATION_CMPL:
+ case IDH_READY_TO_ACCESS_GPU:
+ default:
break;
}
@@ -368,6 +438,8 @@ int xgpu_ai_mailbox_get_irq(struct amdgpu_device *adev)
}
INIT_WORK(&adev->virt.flr_work, xgpu_ai_mailbox_flr_work);
+ INIT_WORK(&adev->virt.req_bad_pages_work, xgpu_ai_mailbox_req_bad_pages_work);
+ INIT_WORK(&adev->virt.handle_bad_pages_work, xgpu_ai_mailbox_handle_bad_pages_work);
return 0;
}
@@ -378,10 +450,32 @@ void xgpu_ai_mailbox_put_irq(struct amdgpu_device *adev)
amdgpu_irq_put(adev, &adev->virt.rcv_irq, 0);
}
+static int xgpu_ai_request_init_data(struct amdgpu_device *adev)
+{
+ return xgpu_ai_send_access_requests(adev, IDH_REQ_GPU_INIT_DATA);
+}
+
+static void xgpu_ai_ras_poison_handler(struct amdgpu_device *adev,
+ enum amdgpu_ras_block block)
+{
+ xgpu_ai_send_access_requests(adev, IDH_RAS_POISON);
+}
+
+static bool xgpu_ai_rcvd_ras_intr(struct amdgpu_device *adev)
+{
+ enum idh_event msg = xgpu_ai_mailbox_peek_msg(adev);
+
+ return (msg == IDH_RAS_ERROR_DETECTED || msg == 0xFFFFFFFF);
+}
+
const struct amdgpu_virt_ops xgpu_ai_virt_ops = {
.req_full_gpu = xgpu_ai_request_full_gpu_access,
.rel_full_gpu = xgpu_ai_release_full_gpu_access,
.reset_gpu = xgpu_ai_request_reset,
- .wait_reset = NULL,
+ .ready_to_reset = xgpu_ai_ready_to_reset,
+ .wait_reset = xgpu_ai_wait_reset,
.trans_msg = xgpu_ai_mailbox_trans_msg,
+ .req_init_data = xgpu_ai_request_init_data,
+ .ras_poison_handler = xgpu_ai_ras_poison_handler,
+ .rcvd_ras_intr = xgpu_ai_rcvd_ras_intr,
};
diff --git a/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.h b/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.h
index 50572635d0f8..874b9f8f9804 100644
--- a/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.h
+++ b/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.h
@@ -26,7 +26,7 @@
#define AI_MAILBOX_POLL_ACK_TIMEDOUT 500
#define AI_MAILBOX_POLL_MSG_TIMEDOUT 6000
-#define AI_MAILBOX_POLL_FLR_TIMEDOUT 5000
+#define AI_MAILBOX_POLL_FLR_TIMEDOUT 10000
#define AI_MAILBOX_POLL_MSG_REP_MAX 11
enum idh_request {
@@ -35,8 +35,12 @@ enum idh_request {
IDH_REQ_GPU_FINI_ACCESS,
IDH_REL_GPU_FINI_ACCESS,
IDH_REQ_GPU_RESET_ACCESS,
+ IDH_REQ_GPU_INIT_DATA,
IDH_LOG_VF_ERROR = 200,
+ IDH_READY_TO_RESET = 201,
+ IDH_RAS_POISON = 202,
+ IDH_REQ_RAS_BAD_PAGES = 205,
};
enum idh_event {
@@ -47,7 +51,13 @@ enum idh_event {
IDH_SUCCESS,
IDH_FAIL,
IDH_QUERY_ALIVE,
-
+ IDH_REQ_GPU_INIT_DATA_READY,
+ IDH_RAS_POISON_READY,
+ IDH_PF_SOFT_FLR_NOTIFICATION,
+ IDH_RAS_ERROR_DETECTED,
+ IDH_RAS_BAD_PAGES_READY = 15,
+ IDH_RAS_BAD_PAGES_NOTIFICATION = 16,
+ IDH_UNRECOV_ERR_NOTIFICATION = 17,
IDH_TEXT_MESSAGE = 255,
};
@@ -58,7 +68,9 @@ int xgpu_ai_mailbox_add_irq_id(struct amdgpu_device *adev);
int xgpu_ai_mailbox_get_irq(struct amdgpu_device *adev);
void xgpu_ai_mailbox_put_irq(struct amdgpu_device *adev);
-#define AI_MAIBOX_CONTROL_TRN_OFFSET_BYTE SOC15_REG_OFFSET(NBIO, 0, mmBIF_BX_PF0_MAILBOX_CONTROL) * 4
-#define AI_MAIBOX_CONTROL_RCV_OFFSET_BYTE SOC15_REG_OFFSET(NBIO, 0, mmBIF_BX_PF0_MAILBOX_CONTROL) * 4 + 1
+#define AI_MAIBOX_CONTROL_TRN_OFFSET_BYTE \
+ (SOC15_REG_OFFSET(NBIO, 0, mmBIF_BX_PF0_MAILBOX_CONTROL) * 4)
+#define AI_MAIBOX_CONTROL_RCV_OFFSET_BYTE \
+ (SOC15_REG_OFFSET(NBIO, 0, mmBIF_BX_PF0_MAILBOX_CONTROL) * 4 + 1)
#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/mxgpu_nv.c b/drivers/gpu/drm/amd/amdgpu/mxgpu_nv.c
index 48e588d3c409..e7cd07383d56 100644
--- a/drivers/gpu/drm/amd/amdgpu/mxgpu_nv.c
+++ b/drivers/gpu/drm/amd/amdgpu/mxgpu_nv.c
@@ -31,6 +31,8 @@
#include "soc15_common.h"
#include "mxgpu_nv.h"
+#include "amdgpu_reset.h"
+
static void xgpu_nv_mailbox_send_ack(struct amdgpu_device *adev)
{
WREG8(NV_MAIBOX_CONTROL_RCV_OFFSET_BYTE, 2);
@@ -59,15 +61,20 @@ static enum idh_event xgpu_nv_mailbox_peek_msg(struct amdgpu_device *adev)
static int xgpu_nv_mailbox_rcv_msg(struct amdgpu_device *adev,
enum idh_event event)
{
+ int r = 0;
u32 reg;
reg = RREG32_NO_KIQ(mmMAILBOX_MSGBUF_RCV_DW0);
- if (reg != event)
+ if (reg == IDH_FAIL)
+ r = -EINVAL;
+ if (reg == IDH_UNRECOV_ERR_NOTIFICATION)
+ r = -ENODEV;
+ else if (reg != event)
return -ENOENT;
xgpu_nv_mailbox_send_ack(adev);
- return 0;
+ return r;
}
static uint8_t xgpu_nv_peek_ack(struct amdgpu_device *adev)
@@ -89,24 +96,40 @@ static int xgpu_nv_poll_ack(struct amdgpu_device *adev)
timeout -= 5;
} while (timeout > 1);
- pr_err("Doesn't get TRN_MSG_ACK from pf in %d msec\n", NV_MAILBOX_POLL_ACK_TIMEDOUT);
+ dev_err(adev->dev, "Doesn't get TRN_MSG_ACK from pf in %d msec \n", NV_MAILBOX_POLL_ACK_TIMEDOUT);
return -ETIME;
}
static int xgpu_nv_poll_msg(struct amdgpu_device *adev, enum idh_event event)
{
- int r, timeout = NV_MAILBOX_POLL_MSG_TIMEDOUT;
+ int r;
+ uint64_t timeout, now;
+ struct amdgpu_ras *ras = amdgpu_ras_get_context(adev);
+
+ now = (uint64_t)ktime_to_ms(ktime_get());
+ timeout = now + NV_MAILBOX_POLL_MSG_TIMEDOUT;
do {
r = xgpu_nv_mailbox_rcv_msg(adev, event);
- if (!r)
+ if (!r) {
+ dev_dbg(adev->dev, "rcv_msg 0x%x after %llu ms\n",
+ event, NV_MAILBOX_POLL_MSG_TIMEDOUT - timeout + now);
return 0;
+ } else if (r == -ENODEV) {
+ if (!amdgpu_ras_is_rma(adev)) {
+ ras->is_rma = true;
+ dev_err(adev->dev, "VF is in an unrecoverable state. "
+ "Runtime Services are halted.\n");
+ }
+ return r;
+ }
msleep(10);
- timeout -= 10;
- } while (timeout > 1);
+ now = (uint64_t)ktime_to_ms(ktime_get());
+ } while (timeout > now);
+ dev_dbg(adev->dev, "nv_poll_msg timed out\n");
return -ETIME;
}
@@ -127,11 +150,12 @@ static void xgpu_nv_mailbox_trans_msg (struct amdgpu_device *adev,
xgpu_nv_mailbox_set_valid(adev, false);
trn = xgpu_nv_peek_ack(adev);
if (trn) {
- pr_err("trn=%x ACK should not assert! wait again !\n", trn);
+ dev_err_ratelimited(adev->dev, "trn=%x ACK should not assert! wait again !\n", trn);
msleep(1);
}
} while (trn);
+ dev_dbg(adev->dev, "trans_msg req = 0x%x, data1 = 0x%x\n", req, data1);
WREG32_NO_KIQ(mmMAILBOX_MSGBUF_TRN_DW0, req);
WREG32_NO_KIQ(mmMAILBOX_MSGBUF_TRN_DW1, data1);
WREG32_NO_KIQ(mmMAILBOX_MSGBUF_TRN_DW2, data2);
@@ -141,18 +165,27 @@ static void xgpu_nv_mailbox_trans_msg (struct amdgpu_device *adev,
/* start to poll ack */
r = xgpu_nv_poll_ack(adev);
if (r)
- pr_err("Doesn't get ack from pf, continue\n");
+ dev_err(adev->dev, "Doesn't get ack from pf, continue\n");
xgpu_nv_mailbox_set_valid(adev, false);
}
-static int xgpu_nv_send_access_requests(struct amdgpu_device *adev,
- enum idh_request req)
+static int xgpu_nv_send_access_requests_with_param(struct amdgpu_device *adev,
+ enum idh_request req, u32 data1, u32 data2, u32 data3)
{
- int r;
+ struct amdgpu_virt *virt = &adev->virt;
+ int r = 0, retry = 1;
enum idh_event event = -1;
- xgpu_nv_mailbox_trans_msg(adev, req, 0, 0, 0);
+ mutex_lock(&virt->access_req_mutex);
+send_request:
+
+ if (amdgpu_ras_is_rma(adev)) {
+ r = -ENODEV;
+ goto out;
+ }
+
+ xgpu_nv_mailbox_trans_msg(adev, req, data1, data2, data3);
switch (req) {
case IDH_REQ_GPU_INIT_ACCESS:
@@ -163,6 +196,19 @@ static int xgpu_nv_send_access_requests(struct amdgpu_device *adev,
case IDH_REQ_GPU_INIT_DATA:
event = IDH_REQ_GPU_INIT_DATA_READY;
break;
+ case IDH_RAS_POISON:
+ if (data1 != 0)
+ event = IDH_RAS_POISON_READY;
+ break;
+ case IDH_REQ_RAS_ERROR_COUNT:
+ event = IDH_RAS_ERROR_COUNT_READY;
+ break;
+ case IDH_REQ_RAS_CPER_DUMP:
+ event = IDH_RAS_CPER_DUMP_READY;
+ break;
+ case IDH_REQ_RAS_CHK_CRITI:
+ event = IDH_REQ_RAS_CHK_CRITI_READY;
+ break;
default:
break;
}
@@ -170,21 +216,30 @@ static int xgpu_nv_send_access_requests(struct amdgpu_device *adev,
if (event != -1) {
r = xgpu_nv_poll_msg(adev, event);
if (r) {
+ if (retry++ < 5)
+ goto send_request;
+
if (req != IDH_REQ_GPU_INIT_DATA) {
- pr_err("Doesn't get msg:%d from pf, error=%d\n", event, r);
- return r;
- }
- else /* host doesn't support REQ_GPU_INIT_DATA handshake */
+ dev_err(adev->dev, "Doesn't get msg:%d from pf, error=%d\n", event, r);
+ goto out;
+ } else /* host doesn't support REQ_GPU_INIT_DATA handshake */
adev->virt.req_init_data_ver = 0;
} else {
- if (req == IDH_REQ_GPU_INIT_DATA)
- {
- adev->virt.req_init_data_ver =
- RREG32_NO_KIQ(mmMAILBOX_MSGBUF_RCV_DW1);
-
- /* assume V1 in case host doesn't set version number */
- if (adev->virt.req_init_data_ver < 1)
- adev->virt.req_init_data_ver = 1;
+ if (req == IDH_REQ_GPU_INIT_DATA) {
+ switch (RREG32_NO_KIQ(mmMAILBOX_MSGBUF_RCV_DW1)) {
+ case GPU_CRIT_REGION_V2:
+ adev->virt.req_init_data_ver = GPU_CRIT_REGION_V2;
+ adev->virt.init_data_header.offset =
+ RREG32_NO_KIQ(mmMAILBOX_MSGBUF_RCV_DW2);
+ adev->virt.init_data_header.size_kb =
+ RREG32_NO_KIQ(mmMAILBOX_MSGBUF_RCV_DW3);
+ break;
+ default:
+ adev->virt.req_init_data_ver = GPU_CRIT_REGION_V1;
+ adev->virt.init_data_header.offset = -1;
+ adev->virt.init_data_header.size_kb = 0;
+ break;
+ }
}
}
@@ -195,7 +250,17 @@ static int xgpu_nv_send_access_requests(struct amdgpu_device *adev,
}
}
- return 0;
+out:
+ mutex_unlock(&virt->access_req_mutex);
+
+ return r;
+}
+
+static int xgpu_nv_send_access_requests(struct amdgpu_device *adev,
+ enum idh_request req)
+{
+ return xgpu_nv_send_access_requests_with_param(adev,
+ req, 0, 0, 0);
}
static int xgpu_nv_request_reset(struct amdgpu_device *adev)
@@ -235,14 +300,15 @@ static int xgpu_nv_release_full_gpu_access(struct amdgpu_device *adev,
static int xgpu_nv_request_init_data(struct amdgpu_device *adev)
{
- return xgpu_nv_send_access_requests(adev, IDH_REQ_GPU_INIT_DATA);
+ return xgpu_nv_send_access_requests_with_param(adev, IDH_REQ_GPU_INIT_DATA,
+ 0, GPU_CRIT_REGION_V2, 0);
}
static int xgpu_nv_mailbox_ack_irq(struct amdgpu_device *adev,
struct amdgpu_irq_src *source,
struct amdgpu_iv_entry *entry)
{
- DRM_DEBUG("get ack intr and do nothing.\n");
+ dev_dbg(adev->dev, "get ack intr and do nothing.\n");
return 0;
}
@@ -263,33 +329,34 @@ static int xgpu_nv_set_mailbox_ack_irq(struct amdgpu_device *adev,
return 0;
}
-static void xgpu_nv_mailbox_flr_work(struct work_struct *work)
+static void xgpu_nv_ready_to_reset(struct amdgpu_device *adev)
{
- struct amdgpu_virt *virt = container_of(work, struct amdgpu_virt, flr_work);
- struct amdgpu_device *adev = container_of(virt, struct amdgpu_device, virt);
- int timeout = NV_MAILBOX_POLL_FLR_TIMEDOUT;
-
- /* block amdgpu_gpu_recover till msg FLR COMPLETE received,
- * otherwise the mailbox msg will be ruined/reseted by
- * the VF FLR.
- */
- if (!down_read_trylock(&adev->reset_sem))
- return;
-
- amdgpu_virt_fini_data_exchange(adev);
- atomic_set(&adev->in_gpu_reset, 1);
+ xgpu_nv_mailbox_trans_msg(adev, IDH_READY_TO_RESET, 0, 0, 0);
+}
+static int xgpu_nv_wait_reset(struct amdgpu_device *adev)
+{
+ int timeout = NV_MAILBOX_POLL_FLR_TIMEDOUT;
do {
- if (xgpu_nv_mailbox_peek_msg(adev) == IDH_FLR_NOTIFICATION_CMPL)
- goto flr_done;
-
+ if (xgpu_nv_mailbox_peek_msg(adev) == IDH_FLR_NOTIFICATION_CMPL) {
+ dev_dbg(adev->dev, "Got NV IDH_FLR_NOTIFICATION_CMPL after %d ms\n", NV_MAILBOX_POLL_FLR_TIMEDOUT - timeout);
+ return 0;
+ }
msleep(10);
timeout -= 10;
} while (timeout > 1);
-flr_done:
- atomic_set(&adev->in_gpu_reset, 0);
- up_read(&adev->reset_sem);
+ dev_dbg(adev->dev, "waiting NV IDH_FLR_NOTIFICATION_CMPL timeout\n");
+ return -ETIME;
+}
+
+static void xgpu_nv_mailbox_flr_work(struct work_struct *work)
+{
+ struct amdgpu_virt *virt = container_of(work, struct amdgpu_virt, flr_work);
+ struct amdgpu_device *adev = container_of(virt, struct amdgpu_device, virt);
+ struct amdgpu_reset_context reset_context = { 0 };
+
+ amdgpu_virt_fini_data_exchange(adev);
/* Trigger recovery for world switch failure if no TDR */
if (amdgpu_device_should_recover_gpu(adev)
@@ -297,8 +364,46 @@ flr_done:
adev->sdma_timeout == MAX_SCHEDULE_TIMEOUT ||
adev->gfx_timeout == MAX_SCHEDULE_TIMEOUT ||
adev->compute_timeout == MAX_SCHEDULE_TIMEOUT ||
- adev->video_timeout == MAX_SCHEDULE_TIMEOUT))
- amdgpu_device_gpu_recover(adev, NULL);
+ adev->video_timeout == MAX_SCHEDULE_TIMEOUT)) {
+
+ reset_context.method = AMD_RESET_METHOD_NONE;
+ reset_context.reset_req_dev = adev;
+ clear_bit(AMDGPU_NEED_FULL_RESET, &reset_context.flags);
+ set_bit(AMDGPU_HOST_FLR, &reset_context.flags);
+
+ amdgpu_device_gpu_recover(adev, NULL, &reset_context);
+ }
+}
+
+static void xgpu_nv_mailbox_req_bad_pages_work(struct work_struct *work)
+{
+ struct amdgpu_virt *virt = container_of(work, struct amdgpu_virt, req_bad_pages_work);
+ struct amdgpu_device *adev = container_of(virt, struct amdgpu_device, virt);
+
+ if (down_read_trylock(&adev->reset_domain->sem)) {
+ amdgpu_virt_fini_data_exchange(adev);
+ amdgpu_virt_request_bad_pages(adev);
+ up_read(&adev->reset_domain->sem);
+ }
+}
+
+/**
+ * xgpu_nv_mailbox_handle_bad_pages_work - Reinitialize the data exchange region to get fresh bad page information
+ * @work: pointer to the work_struct
+ *
+ * This work handler is triggered when bad pages are ready, and it reinitializes
+ * the data exchange region to retrieve updated bad page information from the host.
+ */
+static void xgpu_nv_mailbox_handle_bad_pages_work(struct work_struct *work)
+{
+ struct amdgpu_virt *virt = container_of(work, struct amdgpu_virt, handle_bad_pages_work);
+ struct amdgpu_device *adev = container_of(virt, struct amdgpu_device, virt);
+
+ if (down_read_trylock(&adev->reset_domain->sem)) {
+ amdgpu_virt_fini_data_exchange(adev);
+ amdgpu_virt_init_data_exchange(adev);
+ up_read(&adev->reset_domain->sem);
+ }
}
static int xgpu_nv_set_mailbox_rcv_irq(struct amdgpu_device *adev,
@@ -323,11 +428,38 @@ static int xgpu_nv_mailbox_rcv_irq(struct amdgpu_device *adev,
struct amdgpu_iv_entry *entry)
{
enum idh_event event = xgpu_nv_mailbox_peek_msg(adev);
+ struct amdgpu_ras *ras = amdgpu_ras_get_context(adev);
switch (event) {
+ case IDH_RAS_BAD_PAGES_READY:
+ xgpu_nv_mailbox_send_ack(adev);
+ if (amdgpu_sriov_runtime(adev))
+ schedule_work(&adev->virt.handle_bad_pages_work);
+ break;
+ case IDH_RAS_BAD_PAGES_NOTIFICATION:
+ xgpu_nv_mailbox_send_ack(adev);
+ if (amdgpu_sriov_runtime(adev))
+ schedule_work(&adev->virt.req_bad_pages_work);
+ break;
+ case IDH_UNRECOV_ERR_NOTIFICATION:
+ xgpu_nv_mailbox_send_ack(adev);
+ if (!amdgpu_ras_is_rma(adev)) {
+ ras->is_rma = true;
+ dev_err(adev->dev, "VF is in an unrecoverable state. Runtime Services are halted.\n");
+ }
+
+ if (amdgpu_sriov_runtime(adev))
+ WARN_ONCE(!amdgpu_reset_domain_schedule(adev->reset_domain,
+ &adev->virt.flr_work),
+ "Failed to queue work! at %s",
+ __func__);
+ break;
case IDH_FLR_NOTIFICATION:
if (amdgpu_sriov_runtime(adev))
- schedule_work(&adev->virt.flr_work);
+ WARN_ONCE(!amdgpu_reset_domain_schedule(adev->reset_domain,
+ &adev->virt.flr_work),
+ "Failed to queue work! at %s",
+ __func__);
break;
/* READY_TO_ACCESS_GPU is fetched by kernel polling, IRQ can ignore
* it byfar since that polling thread will handle it,
@@ -392,6 +524,8 @@ int xgpu_nv_mailbox_get_irq(struct amdgpu_device *adev)
}
INIT_WORK(&adev->virt.flr_work, xgpu_nv_mailbox_flr_work);
+ INIT_WORK(&adev->virt.req_bad_pages_work, xgpu_nv_mailbox_req_bad_pages_work);
+ INIT_WORK(&adev->virt.handle_bad_pages_work, xgpu_nv_mailbox_handle_bad_pages_work);
return 0;
}
@@ -402,11 +536,67 @@ void xgpu_nv_mailbox_put_irq(struct amdgpu_device *adev)
amdgpu_irq_put(adev, &adev->virt.rcv_irq, 0);
}
+static void xgpu_nv_ras_poison_handler(struct amdgpu_device *adev,
+ enum amdgpu_ras_block block)
+{
+ if (amdgpu_ip_version(adev, UMC_HWIP, 0) < IP_VERSION(12, 0, 0)) {
+ xgpu_nv_send_access_requests(adev, IDH_RAS_POISON);
+ } else {
+ amdgpu_virt_fini_data_exchange(adev);
+ xgpu_nv_send_access_requests_with_param(adev,
+ IDH_RAS_POISON, block, 0, 0);
+ }
+}
+
+static bool xgpu_nv_rcvd_ras_intr(struct amdgpu_device *adev)
+{
+ enum idh_event msg = xgpu_nv_mailbox_peek_msg(adev);
+
+ return (msg == IDH_RAS_ERROR_DETECTED || msg == 0xFFFFFFFF);
+}
+
+static int xgpu_nv_req_ras_err_count(struct amdgpu_device *adev)
+{
+ return xgpu_nv_send_access_requests(adev, IDH_REQ_RAS_ERROR_COUNT);
+}
+
+static int xgpu_nv_req_ras_cper_dump(struct amdgpu_device *adev, u64 vf_rptr)
+{
+ uint32_t vf_rptr_hi, vf_rptr_lo;
+
+ vf_rptr_hi = (uint32_t)(vf_rptr >> 32);
+ vf_rptr_lo = (uint32_t)(vf_rptr & 0xFFFFFFFF);
+ return xgpu_nv_send_access_requests_with_param(
+ adev, IDH_REQ_RAS_CPER_DUMP, vf_rptr_hi, vf_rptr_lo, 0);
+}
+
+static int xgpu_nv_req_ras_bad_pages(struct amdgpu_device *adev)
+{
+ return xgpu_nv_send_access_requests(adev, IDH_REQ_RAS_BAD_PAGES);
+}
+
+static int xgpu_nv_check_vf_critical_region(struct amdgpu_device *adev, u64 addr)
+{
+ uint32_t addr_hi, addr_lo;
+
+ addr_hi = (uint32_t)(addr >> 32);
+ addr_lo = (uint32_t)(addr & 0xFFFFFFFF);
+ return xgpu_nv_send_access_requests_with_param(
+ adev, IDH_REQ_RAS_CHK_CRITI, addr_hi, addr_lo, 0);
+}
+
const struct amdgpu_virt_ops xgpu_nv_virt_ops = {
.req_full_gpu = xgpu_nv_request_full_gpu_access,
.rel_full_gpu = xgpu_nv_release_full_gpu_access,
.req_init_data = xgpu_nv_request_init_data,
.reset_gpu = xgpu_nv_request_reset,
- .wait_reset = NULL,
+ .ready_to_reset = xgpu_nv_ready_to_reset,
+ .wait_reset = xgpu_nv_wait_reset,
.trans_msg = xgpu_nv_mailbox_trans_msg,
+ .ras_poison_handler = xgpu_nv_ras_poison_handler,
+ .rcvd_ras_intr = xgpu_nv_rcvd_ras_intr,
+ .req_ras_err_count = xgpu_nv_req_ras_err_count,
+ .req_ras_cper_dump = xgpu_nv_req_ras_cper_dump,
+ .req_bad_pages = xgpu_nv_req_ras_bad_pages,
+ .req_ras_chk_criti = xgpu_nv_check_vf_critical_region
};
diff --git a/drivers/gpu/drm/amd/amdgpu/mxgpu_nv.h b/drivers/gpu/drm/amd/amdgpu/mxgpu_nv.h
index 9f5808616174..c1083e5e41e0 100644
--- a/drivers/gpu/drm/amd/amdgpu/mxgpu_nv.h
+++ b/drivers/gpu/drm/amd/amdgpu/mxgpu_nv.h
@@ -25,8 +25,8 @@
#define __MXGPU_NV_H__
#define NV_MAILBOX_POLL_ACK_TIMEDOUT 500
-#define NV_MAILBOX_POLL_MSG_TIMEDOUT 6000
-#define NV_MAILBOX_POLL_FLR_TIMEDOUT 5000
+#define NV_MAILBOX_POLL_MSG_TIMEDOUT 15000
+#define NV_MAILBOX_POLL_FLR_TIMEDOUT 10000
#define NV_MAILBOX_POLL_MSG_REP_MAX 11
enum idh_request {
@@ -37,7 +37,13 @@ enum idh_request {
IDH_REQ_GPU_RESET_ACCESS,
IDH_REQ_GPU_INIT_DATA,
- IDH_LOG_VF_ERROR = 200,
+ IDH_LOG_VF_ERROR = 200,
+ IDH_READY_TO_RESET = 201,
+ IDH_RAS_POISON = 202,
+ IDH_REQ_RAS_ERROR_COUNT = 203,
+ IDH_REQ_RAS_CPER_DUMP = 204,
+ IDH_REQ_RAS_BAD_PAGES = 205,
+ IDH_REQ_RAS_CHK_CRITI = 206
};
enum idh_event {
@@ -49,6 +55,15 @@ enum idh_event {
IDH_FAIL,
IDH_QUERY_ALIVE,
IDH_REQ_GPU_INIT_DATA_READY,
+ IDH_RAS_POISON_READY,
+ IDH_PF_SOFT_FLR_NOTIFICATION,
+ IDH_RAS_ERROR_DETECTED,
+ IDH_RAS_ERROR_COUNT_READY = 11,
+ IDH_RAS_CPER_DUMP_READY = 14,
+ IDH_RAS_BAD_PAGES_READY = 15,
+ IDH_RAS_BAD_PAGES_NOTIFICATION = 16,
+ IDH_UNRECOV_ERR_NOTIFICATION = 17,
+ IDH_REQ_RAS_CHK_CRITI_READY = 18,
IDH_TEXT_MESSAGE = 255,
};
diff --git a/drivers/gpu/drm/amd/amdgpu/mxgpu_vi.c b/drivers/gpu/drm/amd/amdgpu/mxgpu_vi.c
index aef9d059ae52..e1d63bed84bf 100644
--- a/drivers/gpu/drm/amd/amdgpu/mxgpu_vi.c
+++ b/drivers/gpu/drm/amd/amdgpu/mxgpu_vi.c
@@ -42,6 +42,8 @@
#include "smu/smu_7_1_3_d.h"
#include "mxgpu_vi.h"
+#include "amdgpu_reset.h"
+
/* VI golden setting */
static const u32 xgpu_fiji_mgcg_cgcg_init[] = {
mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff,
@@ -332,7 +334,7 @@ static void xgpu_vi_mailbox_send_ack(struct amdgpu_device *adev)
break;
}
mdelay(1);
- timeout -=1;
+ timeout -= 1;
reg = RREG32_NO_KIQ(mmMAILBOX_CONTROL);
}
@@ -513,15 +515,18 @@ static void xgpu_vi_mailbox_flr_work(struct work_struct *work)
struct amdgpu_virt *virt = container_of(work, struct amdgpu_virt, flr_work);
struct amdgpu_device *adev = container_of(virt, struct amdgpu_device, virt);
- /* wait until RCV_MSG become 3 */
- if (xgpu_vi_poll_msg(adev, IDH_FLR_NOTIFICATION_CMPL)) {
- pr_err("failed to receive FLR_CMPL\n");
- return;
- }
-
/* Trigger recovery due to world switch failure */
- if (amdgpu_device_should_recover_gpu(adev))
- amdgpu_device_gpu_recover(adev, NULL);
+ if (amdgpu_device_should_recover_gpu(adev)) {
+ struct amdgpu_reset_context reset_context;
+ memset(&reset_context, 0, sizeof(reset_context));
+
+ reset_context.method = AMD_RESET_METHOD_NONE;
+ reset_context.reset_req_dev = adev;
+ clear_bit(AMDGPU_NEED_FULL_RESET, &reset_context.flags);
+ set_bit(AMDGPU_HOST_FLR, &reset_context.flags);
+
+ amdgpu_device_gpu_recover(adev, NULL, &reset_context);
+ }
}
static int xgpu_vi_set_mailbox_rcv_irq(struct amdgpu_device *adev,
@@ -544,14 +549,17 @@ static int xgpu_vi_mailbox_rcv_irq(struct amdgpu_device *adev,
{
int r;
- /* trigger gpu-reset by hypervisor only if TDR disbaled */
+ /* trigger gpu-reset by hypervisor only if TDR disabled */
if (!amdgpu_gpu_recovery) {
/* see what event we get */
r = xgpu_vi_mailbox_rcv_msg(adev, IDH_FLR_NOTIFICATION);
/* only handle FLR_NOTIFY now */
if (!r)
- schedule_work(&adev->virt.flr_work);
+ WARN_ONCE(!amdgpu_reset_domain_schedule(adev->reset_domain,
+ &adev->virt.flr_work),
+ "Failed to queue work! at %s",
+ __func__);
}
return 0;
diff --git a/drivers/gpu/drm/amd/amdgpu/navi10_ih.c b/drivers/gpu/drm/amd/amdgpu/navi10_ih.c
index f4e4040bbd25..4cd325149b63 100644
--- a/drivers/gpu/drm/amd/amdgpu/navi10_ih.c
+++ b/drivers/gpu/drm/amd/amdgpu/navi10_ih.c
@@ -107,7 +107,7 @@ force_update_wptr_for_self_int(struct amdgpu_device *adev,
{
u32 ih_cntl, ih_rb_cntl;
- if (adev->asic_type < CHIP_SIENNA_CICHLID)
+ if (amdgpu_ip_version(adev, OSSSYS_HWIP, 0) < IP_VERSION(5, 0, 3))
return;
ih_cntl = RREG32_SOC15(OSSSYS, 0, mmIH_CNTL2);
@@ -120,11 +120,23 @@ force_update_wptr_for_self_int(struct amdgpu_device *adev,
ih_rb_cntl = REG_SET_FIELD(ih_rb_cntl, IH_RB_CNTL_RING1,
RB_USED_INT_THRESHOLD, threshold);
- WREG32_SOC15(OSSSYS, 0, mmIH_RB_CNTL_RING1, ih_rb_cntl);
+ if (amdgpu_sriov_vf(adev) && amdgpu_sriov_reg_indirect_ih(adev)) {
+ if (psp_reg_program(&adev->psp, PSP_REG_IH_RB_CNTL_RING1, ih_rb_cntl))
+ return;
+ } else {
+ WREG32_SOC15(OSSSYS, 0, mmIH_RB_CNTL_RING1, ih_rb_cntl);
+ }
+
ih_rb_cntl = RREG32_SOC15(OSSSYS, 0, mmIH_RB_CNTL_RING2);
ih_rb_cntl = REG_SET_FIELD(ih_rb_cntl, IH_RB_CNTL_RING2,
RB_USED_INT_THRESHOLD, threshold);
- WREG32_SOC15(OSSSYS, 0, mmIH_RB_CNTL_RING2, ih_rb_cntl);
+ if (amdgpu_sriov_vf(adev) && amdgpu_sriov_reg_indirect_ih(adev)) {
+ if (psp_reg_program(&adev->psp, PSP_REG_IH_RB_CNTL_RING2, ih_rb_cntl))
+ return;
+ } else {
+ WREG32_SOC15(OSSSYS, 0, mmIH_RB_CNTL_RING2, ih_rb_cntl);
+ }
+
WREG32_SOC15(OSSSYS, 0, mmIH_CNTL2, ih_cntl);
}
@@ -148,10 +160,17 @@ static int navi10_ih_toggle_ring_interrupts(struct amdgpu_device *adev,
tmp = RREG32(ih_regs->ih_rb_cntl);
tmp = REG_SET_FIELD(tmp, IH_RB_CNTL, RB_ENABLE, (enable ? 1 : 0));
+ tmp = REG_SET_FIELD(tmp, IH_RB_CNTL, RB_GPU_TS_ENABLE, 1);
/* enable_intr field is only valid in ring0 */
if (ih == &adev->irq.ih)
tmp = REG_SET_FIELD(tmp, IH_RB_CNTL, ENABLE_INTR, (enable ? 1 : 0));
- WREG32(ih_regs->ih_rb_cntl, tmp);
+
+ if (amdgpu_sriov_vf(adev) && amdgpu_sriov_reg_indirect_ih(adev)) {
+ if (psp_reg_program(&adev->psp, ih_regs->psp_reg_id, tmp))
+ return -ETIMEDOUT;
+ } else {
+ WREG32(ih_regs->ih_rb_cntl, tmp);
+ }
if (enable) {
ih->enabled = true;
@@ -257,11 +276,17 @@ static int navi10_ih_enable_ring(struct amdgpu_device *adev,
tmp = navi10_ih_rb_cntl(ih, tmp);
if (ih == &adev->irq.ih)
tmp = REG_SET_FIELD(tmp, IH_RB_CNTL, RPTR_REARM, !!adev->irq.msi_enabled);
- if (ih == &adev->irq.ih1) {
- tmp = REG_SET_FIELD(tmp, IH_RB_CNTL, WPTR_OVERFLOW_ENABLE, 0);
+ if (ih == &adev->irq.ih1)
tmp = REG_SET_FIELD(tmp, IH_RB_CNTL, RB_FULL_DRAIN_ENABLE, 1);
+
+ if (amdgpu_sriov_vf(adev) && amdgpu_sriov_reg_indirect_ih(adev)) {
+ if (psp_reg_program(&adev->psp, ih_regs->psp_reg_id, tmp)) {
+ DRM_ERROR("PSP program IH_RB_CNTL failed!\n");
+ return -ETIMEDOUT;
+ }
+ } else {
+ WREG32(ih_regs->ih_rb_cntl, tmp);
}
- WREG32(ih_regs->ih_rb_cntl, tmp);
if (ih == &adev->irq.ih) {
/* set the ih ring 0 writeback address whether it's enabled or not */
@@ -293,7 +318,6 @@ static int navi10_ih_irq_init(struct amdgpu_device *adev)
{
struct amdgpu_ih_ring *ih[] = {&adev->irq.ih, &adev->irq.ih1, &adev->irq.ih2};
u32 ih_chicken;
- u32 tmp;
int ret;
int i;
@@ -306,11 +330,10 @@ static int navi10_ih_irq_init(struct amdgpu_device *adev)
if (unlikely(adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT)) {
if (ih[0]->use_bus_addr) {
- switch (adev->asic_type) {
- case CHIP_SIENNA_CICHLID:
- case CHIP_NAVY_FLOUNDER:
- case CHIP_VANGOGH:
- case CHIP_DIMGREY_CAVEFISH:
+ switch (amdgpu_ip_version(adev, OSSSYS_HWIP, 0)) {
+ case IP_VERSION(5, 0, 3):
+ case IP_VERSION(5, 2, 0):
+ case IP_VERSION(5, 2, 1):
ih_chicken = RREG32_SOC15(OSSSYS, 0, mmIH_CHICKEN_Sienna_Cichlid);
ih_chicken = REG_SET_FIELD(ih_chicken,
IH_CHICKEN, MC_SPACE_GPA_ENABLE, 1);
@@ -338,15 +361,6 @@ static int navi10_ih_irq_init(struct amdgpu_device *adev)
adev->nbio.funcs->ih_doorbell_range(adev, ih[0]->use_doorbell,
ih[0]->doorbell_index);
- tmp = RREG32_SOC15(OSSSYS, 0, mmIH_STORM_CLIENT_LIST_CNTL);
- tmp = REG_SET_FIELD(tmp, IH_STORM_CLIENT_LIST_CNTL,
- CLIENT18_IS_STORM_CLIENT, 1);
- WREG32_SOC15(OSSSYS, 0, mmIH_STORM_CLIENT_LIST_CNTL, tmp);
-
- tmp = RREG32_SOC15(OSSSYS, 0, mmIH_INT_FLOOD_CNTL);
- tmp = REG_SET_FIELD(tmp, IH_INT_FLOOD_CNTL, FLOOD_CNTL_ENABLE, 1);
- WREG32_SOC15(OSSSYS, 0, mmIH_INT_FLOOD_CNTL, tmp);
-
pci_set_master(adev->pdev);
/* enable interrupts */
@@ -395,12 +409,21 @@ static u32 navi10_ih_get_wptr(struct amdgpu_device *adev,
u32 wptr, tmp;
struct amdgpu_ih_regs *ih_regs;
- wptr = le32_to_cpu(*ih->wptr_cpu);
- ih_regs = &ih->ih_regs;
+ if (ih == &adev->irq.ih || ih == &adev->irq.ih_soft) {
+ /* Only ring0 supports writeback. On other rings fall back
+ * to register-based code with overflow checking below.
+ * ih_soft ring doesn't have any backing hardware registers,
+ * update wptr and return.
+ */
+ wptr = le32_to_cpu(*ih->wptr_cpu);
- if (!REG_GET_FIELD(wptr, IH_RB_WPTR, RB_OVERFLOW))
- goto out;
+ if (!REG_GET_FIELD(wptr, IH_RB_WPTR, RB_OVERFLOW))
+ goto out;
+ }
+ ih_regs = &ih->ih_regs;
+
+ /* Double check that the overflow wasn't already cleared. */
wptr = RREG32_NO_KIQ(ih_regs->ih_rb_wptr);
if (!REG_GET_FIELD(wptr, IH_RB_WPTR, RB_OVERFLOW))
goto out;
@@ -411,14 +434,19 @@ static u32 navi10_ih_get_wptr(struct amdgpu_device *adev,
* this should allow us to catch up.
*/
tmp = (wptr + 32) & ih->ptr_mask;
- dev_warn(adev->dev, "IH ring buffer overflow "
- "(0x%08X, 0x%08X, 0x%08X)\n",
- wptr, ih->rptr, tmp);
+ dev_warn(adev->dev, "%s ring buffer overflow (0x%08X, 0x%08X, 0x%08X)\n",
+ amdgpu_ih_ring_name(adev, ih), wptr, ih->rptr, tmp);
ih->rptr = tmp;
tmp = RREG32_NO_KIQ(ih_regs->ih_rb_cntl);
tmp = REG_SET_FIELD(tmp, IH_RB_CNTL, WPTR_OVERFLOW_CLEAR, 1);
WREG32_NO_KIQ(ih_regs->ih_rb_cntl, tmp);
+
+ /* Unset the CLEAR_OVERFLOW bit immediately so new overflows
+ * can be detected.
+ */
+ tmp = REG_SET_FIELD(tmp, IH_RB_CNTL, WPTR_OVERFLOW_CLEAR, 0);
+ WREG32_NO_KIQ(ih_regs->ih_rb_cntl, tmp);
out:
return (wptr & ih->ptr_mask);
}
@@ -462,6 +490,9 @@ static void navi10_ih_set_rptr(struct amdgpu_device *adev,
{
struct amdgpu_ih_regs *ih_regs;
+ if (ih == &adev->irq.ih_soft)
+ return;
+
if (ih->use_doorbell) {
/* XXX check if swapping is necessary on BE */
*ih->rptr_cpu = ih->rptr;
@@ -488,15 +519,11 @@ static int navi10_ih_self_irq(struct amdgpu_device *adev,
struct amdgpu_irq_src *source,
struct amdgpu_iv_entry *entry)
{
- uint32_t wptr = cpu_to_le32(entry->src_data[0]);
-
switch (entry->ring_id) {
case 1:
- *adev->irq.ih1.wptr_cpu = wptr;
schedule_work(&adev->irq.ih1_work);
break;
case 2:
- *adev->irq.ih2.wptr_cpu = wptr;
schedule_work(&adev->irq.ih2_work);
break;
default: break;
@@ -514,19 +541,19 @@ static void navi10_ih_set_self_irq_funcs(struct amdgpu_device *adev)
adev->irq.self_irq.funcs = &navi10_ih_self_irq_funcs;
}
-static int navi10_ih_early_init(void *handle)
+static int navi10_ih_early_init(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
navi10_ih_set_interrupt_funcs(adev);
navi10_ih_set_self_irq_funcs(adev);
return 0;
}
-static int navi10_ih_sw_init(void *handle)
+static int navi10_ih_sw_init(struct amdgpu_ip_block *ip_block)
{
int r;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
bool use_bus_addr;
r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_IH, 0,
@@ -543,7 +570,7 @@ static int navi10_ih_sw_init(void *handle)
use_bus_addr = false;
else
use_bus_addr = true;
- r = amdgpu_ih_ring_init(adev, &adev->irq.ih, 256 * 1024, use_bus_addr);
+ r = amdgpu_ih_ring_init(adev, &adev->irq.ih, IH_RING_SIZE, use_bus_addr);
if (r)
return r;
@@ -556,7 +583,7 @@ static int navi10_ih_sw_init(void *handle)
/* initialize ih control registers offset */
navi10_ih_init_register_offset(adev);
- r = amdgpu_ih_ring_init(adev, &adev->irq.ih_soft, PAGE_SIZE, true);
+ r = amdgpu_ih_ring_init(adev, &adev->irq.ih_soft, IH_SW_RING_SIZE, true);
if (r)
return r;
@@ -565,67 +592,52 @@ static int navi10_ih_sw_init(void *handle)
return r;
}
-static int navi10_ih_sw_fini(void *handle)
+static int navi10_ih_sw_fini(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
- amdgpu_irq_fini(adev);
- amdgpu_ih_ring_fini(adev, &adev->irq.ih_soft);
- amdgpu_ih_ring_fini(adev, &adev->irq.ih2);
- amdgpu_ih_ring_fini(adev, &adev->irq.ih1);
- amdgpu_ih_ring_fini(adev, &adev->irq.ih);
+ amdgpu_irq_fini_sw(adev);
return 0;
}
-static int navi10_ih_hw_init(void *handle)
+static int navi10_ih_hw_init(struct amdgpu_ip_block *ip_block)
{
- int r;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
-
- r = navi10_ih_irq_init(adev);
- if (r)
- return r;
+ struct amdgpu_device *adev = ip_block->adev;
- return 0;
+ return navi10_ih_irq_init(adev);
}
-static int navi10_ih_hw_fini(void *handle)
+static int navi10_ih_hw_fini(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
-
- navi10_ih_irq_disable(adev);
+ navi10_ih_irq_disable(ip_block->adev);
return 0;
}
-static int navi10_ih_suspend(void *handle)
+static int navi10_ih_suspend(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
-
- return navi10_ih_hw_fini(adev);
+ return navi10_ih_hw_fini(ip_block);
}
-static int navi10_ih_resume(void *handle)
+static int navi10_ih_resume(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
-
- return navi10_ih_hw_init(adev);
+ return navi10_ih_hw_init(ip_block);
}
-static bool navi10_ih_is_idle(void *handle)
+static bool navi10_ih_is_idle(struct amdgpu_ip_block *ip_block)
{
/* todo */
return true;
}
-static int navi10_ih_wait_for_idle(void *handle)
+static int navi10_ih_wait_for_idle(struct amdgpu_ip_block *ip_block)
{
/* todo */
return -ETIMEDOUT;
}
-static int navi10_ih_soft_reset(void *handle)
+static int navi10_ih_soft_reset(struct amdgpu_ip_block *ip_block)
{
/* todo */
return 0;
@@ -652,40 +664,35 @@ static void navi10_ih_update_clockgating_state(struct amdgpu_device *adev,
if (def != data)
WREG32_SOC15(OSSSYS, 0, mmIH_CLK_CTRL, data);
}
-
- return;
}
-static int navi10_ih_set_clockgating_state(void *handle,
+static int navi10_ih_set_clockgating_state(struct amdgpu_ip_block *ip_block,
enum amd_clockgating_state state)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
navi10_ih_update_clockgating_state(adev,
state == AMD_CG_STATE_GATE);
return 0;
}
-static int navi10_ih_set_powergating_state(void *handle,
+static int navi10_ih_set_powergating_state(struct amdgpu_ip_block *ip_block,
enum amd_powergating_state state)
{
return 0;
}
-static void navi10_ih_get_clockgating_state(void *handle, u32 *flags)
+static void navi10_ih_get_clockgating_state(struct amdgpu_ip_block *ip_block, u64 *flags)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
if (!RREG32_SOC15(OSSSYS, 0, mmIH_CLK_CTRL))
*flags |= AMD_CG_SUPPORT_IH_CG;
-
- return;
}
static const struct amd_ip_funcs navi10_ih_ip_funcs = {
.name = "navi10_ih",
.early_init = navi10_ih_early_init,
- .late_init = NULL,
.sw_init = navi10_ih_sw_init,
.sw_fini = navi10_ih_sw_fini,
.hw_init = navi10_ih_hw_init,
@@ -703,6 +710,7 @@ static const struct amd_ip_funcs navi10_ih_ip_funcs = {
static const struct amdgpu_ih_funcs navi10_ih_funcs = {
.get_wptr = navi10_ih_get_wptr,
.decode_iv = amdgpu_ih_decode_iv_helper,
+ .decode_iv_ts = amdgpu_ih_decode_iv_ts_helper,
.set_rptr = navi10_ih_set_rptr
};
@@ -712,8 +720,7 @@ static void navi10_ih_set_interrupt_funcs(struct amdgpu_device *adev)
adev->irq.ih_funcs = &navi10_ih_funcs;
}
-const struct amdgpu_ip_block_version navi10_ih_ip_block =
-{
+const struct amdgpu_ip_block_version navi10_ih_ip_block = {
.type = AMD_IP_BLOCK_TYPE_IH,
.major = 5,
.minor = 0,
diff --git a/drivers/gpu/drm/amd/amdgpu/navi10_reg_init.c b/drivers/gpu/drm/amd/amdgpu/navi10_reg_init.c
deleted file mode 100644
index 88efaecf9f70..000000000000
--- a/drivers/gpu/drm/amd/amdgpu/navi10_reg_init.c
+++ /dev/null
@@ -1,55 +0,0 @@
-/*
- * Copyright 2018 Advanced Micro Devices, Inc.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in
- * all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
- * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
- * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
- * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
- * OTHER DEALINGS IN THE SOFTWARE.
- *
- */
-#include "amdgpu.h"
-#include "nv.h"
-
-#include "soc15_common.h"
-#include "navi10_ip_offset.h"
-
-int navi10_reg_base_init(struct amdgpu_device *adev)
-{
- int i;
-
- for (i = 0 ; i < MAX_INSTANCE ; ++i) {
- adev->reg_offset[GC_HWIP][i] = (uint32_t *)(&(GC_BASE.instance[i]));
- adev->reg_offset[HDP_HWIP][i] = (uint32_t *)(&(HDP_BASE.instance[i]));
- adev->reg_offset[MMHUB_HWIP][i] = (uint32_t *)(&(MMHUB_BASE.instance[i]));
- adev->reg_offset[ATHUB_HWIP][i] = (uint32_t *)(&(ATHUB_BASE.instance[i]));
- adev->reg_offset[NBIO_HWIP][i] = (uint32_t *)(&(NBIO_BASE.instance[i]));
- adev->reg_offset[MP0_HWIP][i] = (uint32_t *)(&(MP0_BASE.instance[i]));
- adev->reg_offset[MP1_HWIP][i] = (uint32_t *)(&(MP1_BASE.instance[i]));
- adev->reg_offset[VCN_HWIP][i] = (uint32_t *)(&(VCN_BASE.instance[i]));
- adev->reg_offset[DF_HWIP][i] = (uint32_t *)(&(DF_BASE.instance[i]));
- adev->reg_offset[DCE_HWIP][i] = (uint32_t *)(&(DCN_BASE.instance[i]));
- adev->reg_offset[OSSSYS_HWIP][i] = (uint32_t *)(&(OSSSYS_BASE.instance[i]));
- adev->reg_offset[SDMA0_HWIP][i] = (uint32_t *)(&(GC_BASE.instance[i]));
- adev->reg_offset[SDMA1_HWIP][i] = (uint32_t *)(&(GC_BASE.instance[i]));
- adev->reg_offset[SMUIO_HWIP][i] = (uint32_t *)(&(SMUIO_BASE.instance[i]));
- adev->reg_offset[THM_HWIP][i] = (uint32_t *)(&(THM_BASE.instance[i]));
- adev->reg_offset[CLK_HWIP][i] = (uint32_t *)(&(CLK_BASE.instance[i]));
- }
-
- return 0;
-}
-
-
diff --git a/drivers/gpu/drm/amd/amdgpu/navi10_sdma_pkt_open.h b/drivers/gpu/drm/amd/amdgpu/navi10_sdma_pkt_open.h
index a5b60c9a2418..c88284ff92d8 100644
--- a/drivers/gpu/drm/amd/amdgpu/navi10_sdma_pkt_open.h
+++ b/drivers/gpu/drm/amd/amdgpu/navi10_sdma_pkt_open.h
@@ -68,6 +68,7 @@
#define SDMA_SUBOP_POLL_REG_WRITE_MEM 1
#define SDMA_SUBOP_POLL_DBIT_WRITE_MEM 2
#define SDMA_SUBOP_POLL_MEM_VERIFY 3
+#define SDMA_SUBOP_VM_INVALIDATION 4
#define HEADER_AGENT_DISPATCH 4
#define HEADER_BARRIER 5
#define SDMA_OP_AQL_COPY 0
@@ -4041,6 +4042,69 @@
/*
+** Definitions for SDMA_PKT_VM_INVALIDATION packet
+*/
+
+/*define for HEADER word*/
+/*define for op field*/
+#define SDMA_PKT_VM_INVALIDATION_HEADER_op_offset 0
+#define SDMA_PKT_VM_INVALIDATION_HEADER_op_mask 0x000000FF
+#define SDMA_PKT_VM_INVALIDATION_HEADER_op_shift 0
+#define SDMA_PKT_VM_INVALIDATION_HEADER_OP(x) (((x) & SDMA_PKT_VM_INVALIDATION_HEADER_op_mask) << SDMA_PKT_VM_INVALIDATION_HEADER_op_shift)
+
+/*define for sub_op field*/
+#define SDMA_PKT_VM_INVALIDATION_HEADER_sub_op_offset 0
+#define SDMA_PKT_VM_INVALIDATION_HEADER_sub_op_mask 0x000000FF
+#define SDMA_PKT_VM_INVALIDATION_HEADER_sub_op_shift 8
+#define SDMA_PKT_VM_INVALIDATION_HEADER_SUB_OP(x) (((x) & SDMA_PKT_VM_INVALIDATION_HEADER_sub_op_mask) << SDMA_PKT_VM_INVALIDATION_HEADER_sub_op_shift)
+
+/*define for gfx_eng_id field*/
+#define SDMA_PKT_VM_INVALIDATION_HEADER_gfx_eng_id_offset 0
+#define SDMA_PKT_VM_INVALIDATION_HEADER_gfx_eng_id_mask 0x0000001F
+#define SDMA_PKT_VM_INVALIDATION_HEADER_gfx_eng_id_shift 16
+#define SDMA_PKT_VM_INVALIDATION_HEADER_GFX_ENG_ID(x) (((x) & SDMA_PKT_VM_INVALIDATION_HEADER_gfx_eng_id_mask) << SDMA_PKT_VM_INVALIDATION_HEADER_gfx_eng_id_shift)
+
+/*define for mm_eng_id field*/
+#define SDMA_PKT_VM_INVALIDATION_HEADER_mm_eng_id_offset 0
+#define SDMA_PKT_VM_INVALIDATION_HEADER_mm_eng_id_mask 0x0000001F
+#define SDMA_PKT_VM_INVALIDATION_HEADER_mm_eng_id_shift 24
+#define SDMA_PKT_VM_INVALIDATION_HEADER_MM_ENG_ID(x) (((x) & SDMA_PKT_VM_INVALIDATION_HEADER_mm_eng_id_mask) << SDMA_PKT_VM_INVALIDATION_HEADER_mm_eng_id_shift)
+
+/*define for INVALIDATEREQ word*/
+/*define for invalidatereq field*/
+#define SDMA_PKT_VM_INVALIDATION_INVALIDATEREQ_invalidatereq_offset 1
+#define SDMA_PKT_VM_INVALIDATION_INVALIDATEREQ_invalidatereq_mask 0xFFFFFFFF
+#define SDMA_PKT_VM_INVALIDATION_INVALIDATEREQ_invalidatereq_shift 0
+#define SDMA_PKT_VM_INVALIDATION_INVALIDATEREQ_INVALIDATEREQ(x) (((x) & SDMA_PKT_VM_INVALIDATION_INVALIDATEREQ_invalidatereq_mask) << SDMA_PKT_VM_INVALIDATION_INVALIDATEREQ_invalidatereq_shift)
+
+/*define for ADDRESSRANGELO word*/
+/*define for addressrangelo field*/
+#define SDMA_PKT_VM_INVALIDATION_ADDRESSRANGELO_addressrangelo_offset 2
+#define SDMA_PKT_VM_INVALIDATION_ADDRESSRANGELO_addressrangelo_mask 0xFFFFFFFF
+#define SDMA_PKT_VM_INVALIDATION_ADDRESSRANGELO_addressrangelo_shift 0
+#define SDMA_PKT_VM_INVALIDATION_ADDRESSRANGELO_ADDRESSRANGELO(x) (((x) & SDMA_PKT_VM_INVALIDATION_ADDRESSRANGELO_addressrangelo_mask) << SDMA_PKT_VM_INVALIDATION_ADDRESSRANGELO_addressrangelo_shift)
+
+/*define for ADDRESSRANGEHI word*/
+/*define for invalidateack field*/
+#define SDMA_PKT_VM_INVALIDATION_ADDRESSRANGEHI_invalidateack_offset 3
+#define SDMA_PKT_VM_INVALIDATION_ADDRESSRANGEHI_invalidateack_mask 0x0000FFFF
+#define SDMA_PKT_VM_INVALIDATION_ADDRESSRANGEHI_invalidateack_shift 0
+#define SDMA_PKT_VM_INVALIDATION_ADDRESSRANGEHI_INVALIDATEACK(x) (((x) & SDMA_PKT_VM_INVALIDATION_ADDRESSRANGEHI_invalidateack_mask) << SDMA_PKT_VM_INVALIDATION_ADDRESSRANGEHI_invalidateack_shift)
+
+/*define for addressrangehi field*/
+#define SDMA_PKT_VM_INVALIDATION_ADDRESSRANGEHI_addressrangehi_offset 3
+#define SDMA_PKT_VM_INVALIDATION_ADDRESSRANGEHI_addressrangehi_mask 0x0000001F
+#define SDMA_PKT_VM_INVALIDATION_ADDRESSRANGEHI_addressrangehi_shift 16
+#define SDMA_PKT_VM_INVALIDATION_ADDRESSRANGEHI_ADDRESSRANGEHI(x) (((x) & SDMA_PKT_VM_INVALIDATION_ADDRESSRANGEHI_addressrangehi_mask) << SDMA_PKT_VM_INVALIDATION_ADDRESSRANGEHI_addressrangehi_shift)
+
+/*define for reserved field*/
+#define SDMA_PKT_VM_INVALIDATION_ADDRESSRANGEHI_reserved_offset 3
+#define SDMA_PKT_VM_INVALIDATION_ADDRESSRANGEHI_reserved_mask 0x000001FF
+#define SDMA_PKT_VM_INVALIDATION_ADDRESSRANGEHI_reserved_shift 23
+#define SDMA_PKT_VM_INVALIDATION_ADDRESSRANGEHI_RESERVED(x) (((x) & SDMA_PKT_VM_INVALIDATION_ADDRESSRANGEHI_reserved_mask) << SDMA_PKT_VM_INVALIDATION_ADDRESSRANGEHI_reserved_shift)
+
+
+/*
** Definitions for SDMA_PKT_ATOMIC packet
*/
diff --git a/drivers/gpu/drm/amd/amdgpu/navi12_reg_init.c b/drivers/gpu/drm/amd/amdgpu/navi12_reg_init.c
deleted file mode 100644
index a786d159e5e9..000000000000
--- a/drivers/gpu/drm/amd/amdgpu/navi12_reg_init.c
+++ /dev/null
@@ -1,52 +0,0 @@
-/*
- * Copyright 2018 Advanced Micro Devices, Inc.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in
- * all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
- * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
- * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
- * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
- * OTHER DEALINGS IN THE SOFTWARE.
- *
- */
-#include "amdgpu.h"
-#include "nv.h"
-
-#include "soc15_common.h"
-#include "navi12_ip_offset.h"
-
-int navi12_reg_base_init(struct amdgpu_device *adev)
-{
- /* HW has more IP blocks, only initialized the blocks needed by driver */
- uint32_t i;
- for (i = 0 ; i < MAX_INSTANCE ; ++i) {
- adev->reg_offset[GC_HWIP][i] = (uint32_t *)(&(GC_BASE.instance[i]));
- adev->reg_offset[HDP_HWIP][i] = (uint32_t *)(&(HDP_BASE.instance[i]));
- adev->reg_offset[MMHUB_HWIP][i] = (uint32_t *)(&(MMHUB_BASE.instance[i]));
- adev->reg_offset[ATHUB_HWIP][i] = (uint32_t *)(&(ATHUB_BASE.instance[i]));
- adev->reg_offset[NBIO_HWIP][i] = (uint32_t *)(&(NBIF0_BASE.instance[i]));
- adev->reg_offset[MP0_HWIP][i] = (uint32_t *)(&(MP0_BASE.instance[i]));
- adev->reg_offset[MP1_HWIP][i] = (uint32_t *)(&(MP1_BASE.instance[i]));
- adev->reg_offset[VCN_HWIP][i] = (uint32_t *)(&(UVD0_BASE.instance[i]));
- adev->reg_offset[DF_HWIP][i] = (uint32_t *)(&(DF_BASE.instance[i]));
- adev->reg_offset[DCE_HWIP][i] = (uint32_t *)(&(DMU_BASE.instance[i]));
- adev->reg_offset[OSSSYS_HWIP][i] = (uint32_t *)(&(OSSSYS_BASE.instance[i]));
- adev->reg_offset[SDMA0_HWIP][i] = (uint32_t *)(&(GC_BASE.instance[i]));
- adev->reg_offset[SDMA1_HWIP][i] = (uint32_t *)(&(GC_BASE.instance[i]));
- adev->reg_offset[SMUIO_HWIP][i] = (uint32_t *)(&(SMUIO_BASE.instance[i]));
- adev->reg_offset[THM_HWIP][i] = (uint32_t *)(&(THM_BASE.instance[i]));
- adev->reg_offset[CLK_HWIP][i] = (uint32_t *)(&(CLK_BASE.instance[i]));
- }
- return 0;
-}
diff --git a/drivers/gpu/drm/amd/amdgpu/navi14_reg_init.c b/drivers/gpu/drm/amd/amdgpu/navi14_reg_init.c
deleted file mode 100644
index 4ea1e8fbb601..000000000000
--- a/drivers/gpu/drm/amd/amdgpu/navi14_reg_init.c
+++ /dev/null
@@ -1,53 +0,0 @@
-/*
- * Copyright 2018 Advanced Micro Devices, Inc.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in
- * all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
- * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
- * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
- * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
- * OTHER DEALINGS IN THE SOFTWARE.
- *
- */
-#include "amdgpu.h"
-#include "nv.h"
-
-#include "soc15_common.h"
-#include "navi14_ip_offset.h"
-
-int navi14_reg_base_init(struct amdgpu_device *adev)
-{
- int i;
-
- for (i = 0 ; i < MAX_INSTANCE ; ++i) {
- adev->reg_offset[GC_HWIP][i] = (uint32_t *)(&(GC_BASE.instance[i]));
- adev->reg_offset[HDP_HWIP][i] = (uint32_t *)(&(HDP_BASE.instance[i]));
- adev->reg_offset[MMHUB_HWIP][i] = (uint32_t *)(&(MMHUB_BASE.instance[i]));
- adev->reg_offset[ATHUB_HWIP][i] = (uint32_t *)(&(ATHUB_BASE.instance[i]));
- adev->reg_offset[NBIO_HWIP][i] = (uint32_t *)(&(NBIF0_BASE.instance[i]));
- adev->reg_offset[MP0_HWIP][i] = (uint32_t *)(&(MP0_BASE.instance[i]));
- adev->reg_offset[MP1_HWIP][i] = (uint32_t *)(&(MP1_BASE.instance[i]));
- adev->reg_offset[VCN_HWIP][i] = (uint32_t *)(&(UVD0_BASE.instance[i]));
- adev->reg_offset[DF_HWIP][i] = (uint32_t *)(&(DF_BASE.instance[i]));
- adev->reg_offset[DCE_HWIP][i] = (uint32_t *)(&(DMU_BASE.instance[i]));
- adev->reg_offset[OSSSYS_HWIP][i] = (uint32_t *)(&(OSSSYS_BASE.instance[i]));
- adev->reg_offset[SDMA0_HWIP][i] = (uint32_t *)(&(GC_BASE.instance[i]));
- adev->reg_offset[SDMA1_HWIP][i] = (uint32_t *)(&(GC_BASE.instance[i]));
- adev->reg_offset[SMUIO_HWIP][i] = (uint32_t *)(&(SMUIO_BASE.instance[i]));
- adev->reg_offset[THM_HWIP][i] = (uint32_t *)(&(THM_BASE.instance[i]));
- adev->reg_offset[CLK_HWIP][i] = (uint32_t *)(&(CLK_BASE.instance[i]));
- }
-
- return 0;
-}
diff --git a/drivers/gpu/drm/amd/amdgpu/nbif_v6_3_1.c b/drivers/gpu/drm/amd/amdgpu/nbif_v6_3_1.c
new file mode 100644
index 000000000000..9b4025c39e44
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/nbif_v6_3_1.c
@@ -0,0 +1,554 @@
+/*
+ * Copyright 2023 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+#include "amdgpu.h"
+#include "nbif_v6_3_1.h"
+
+#include "nbif/nbif_6_3_1_offset.h"
+#include "nbif/nbif_6_3_1_sh_mask.h"
+#include "pcie/pcie_6_1_0_offset.h"
+#include "pcie/pcie_6_1_0_sh_mask.h"
+#include "ivsrcid/nbio/irqsrcs_nbif_7_4.h"
+#include <uapi/linux/kfd_ioctl.h>
+
+static void nbif_v6_3_1_remap_hdp_registers(struct amdgpu_device *adev)
+{
+ WREG32_SOC15(NBIO, 0, regBIF_BX0_REMAP_HDP_MEM_FLUSH_CNTL,
+ adev->rmmio_remap.reg_offset + KFD_MMIO_REMAP_HDP_MEM_FLUSH_CNTL);
+ WREG32_SOC15(NBIO, 0, regBIF_BX0_REMAP_HDP_REG_FLUSH_CNTL,
+ adev->rmmio_remap.reg_offset + KFD_MMIO_REMAP_HDP_REG_FLUSH_CNTL);
+}
+
+static u32 nbif_v6_3_1_get_rev_id(struct amdgpu_device *adev)
+{
+ u32 tmp = RREG32_SOC15(NBIO, 0, regRCC_STRAP0_RCC_DEV0_EPF0_STRAP0);
+
+ tmp &= RCC_STRAP0_RCC_DEV0_EPF0_STRAP0__STRAP_ATI_REV_ID_DEV0_F0_MASK;
+ tmp >>= RCC_STRAP0_RCC_DEV0_EPF0_STRAP0__STRAP_ATI_REV_ID_DEV0_F0__SHIFT;
+
+ return tmp;
+}
+
+static void nbif_v6_3_1_mc_access_enable(struct amdgpu_device *adev, bool enable)
+{
+ if (enable)
+ WREG32_SOC15(NBIO, 0, regBIF_BX0_BIF_FB_EN,
+ BIF_BX0_BIF_FB_EN__FB_READ_EN_MASK |
+ BIF_BX0_BIF_FB_EN__FB_WRITE_EN_MASK);
+ else
+ WREG32_SOC15(NBIO, 0, regBIF_BX0_BIF_FB_EN, 0);
+}
+
+static u32 nbif_v6_3_1_get_memsize(struct amdgpu_device *adev)
+{
+ return RREG32_SOC15(NBIO, 0, regRCC_DEV0_EPF0_RCC_CONFIG_MEMSIZE);
+}
+
+static void nbif_v6_3_1_sdma_doorbell_range(struct amdgpu_device *adev,
+ int instance, bool use_doorbell,
+ int doorbell_index,
+ int doorbell_size)
+{
+ if (instance == 0) {
+ u32 doorbell_range = RREG32_SOC15(NBIO, 0, regGDC_S2A0_S2A_DOORBELL_ENTRY_2_CTRL);
+
+ if (use_doorbell) {
+ doorbell_range = REG_SET_FIELD(doorbell_range,
+ GDC_S2A0_S2A_DOORBELL_ENTRY_2_CTRL,
+ S2A_DOORBELL_PORT2_ENABLE,
+ 0x1);
+ doorbell_range = REG_SET_FIELD(doorbell_range,
+ GDC_S2A0_S2A_DOORBELL_ENTRY_2_CTRL,
+ S2A_DOORBELL_PORT2_AWID,
+ 0xe);
+ doorbell_range = REG_SET_FIELD(doorbell_range,
+ GDC_S2A0_S2A_DOORBELL_ENTRY_2_CTRL,
+ S2A_DOORBELL_PORT2_RANGE_OFFSET,
+ doorbell_index);
+ doorbell_range = REG_SET_FIELD(doorbell_range,
+ GDC_S2A0_S2A_DOORBELL_ENTRY_2_CTRL,
+ S2A_DOORBELL_PORT2_RANGE_SIZE,
+ doorbell_size);
+ doorbell_range = REG_SET_FIELD(doorbell_range,
+ GDC_S2A0_S2A_DOORBELL_ENTRY_2_CTRL,
+ S2A_DOORBELL_PORT2_AWADDR_31_28_VALUE,
+ 0x3);
+ } else
+ doorbell_range = REG_SET_FIELD(doorbell_range,
+ GDC_S2A0_S2A_DOORBELL_ENTRY_2_CTRL,
+ S2A_DOORBELL_PORT2_RANGE_SIZE,
+ 0);
+
+ WREG32_SOC15(NBIO, 0, regGDC_S2A0_S2A_DOORBELL_ENTRY_2_CTRL, doorbell_range);
+ }
+}
+
+static void nbif_v6_3_1_vcn_doorbell_range(struct amdgpu_device *adev,
+ bool use_doorbell, int doorbell_index,
+ int instance)
+{
+ u32 doorbell_range;
+
+ if (instance)
+ doorbell_range = RREG32_SOC15(NBIO, 0, regGDC_S2A0_S2A_DOORBELL_ENTRY_5_CTRL);
+ else
+ doorbell_range = RREG32_SOC15(NBIO, 0, regGDC_S2A0_S2A_DOORBELL_ENTRY_4_CTRL);
+
+ if (use_doorbell) {
+ doorbell_range = REG_SET_FIELD(doorbell_range,
+ GDC_S2A0_S2A_DOORBELL_ENTRY_4_CTRL,
+ S2A_DOORBELL_PORT4_ENABLE,
+ 0x1);
+ doorbell_range = REG_SET_FIELD(doorbell_range,
+ GDC_S2A0_S2A_DOORBELL_ENTRY_4_CTRL,
+ S2A_DOORBELL_PORT4_AWID,
+ instance ? 0x7 : 0x4);
+ doorbell_range = REG_SET_FIELD(doorbell_range,
+ GDC_S2A0_S2A_DOORBELL_ENTRY_4_CTRL,
+ S2A_DOORBELL_PORT4_RANGE_OFFSET,
+ doorbell_index);
+ doorbell_range = REG_SET_FIELD(doorbell_range,
+ GDC_S2A0_S2A_DOORBELL_ENTRY_4_CTRL,
+ S2A_DOORBELL_PORT4_RANGE_SIZE,
+ 8);
+ doorbell_range = REG_SET_FIELD(doorbell_range,
+ GDC_S2A0_S2A_DOORBELL_ENTRY_4_CTRL,
+ S2A_DOORBELL_PORT4_AWADDR_31_28_VALUE,
+ instance ? 0x7 : 0x4);
+ } else
+ doorbell_range = REG_SET_FIELD(doorbell_range,
+ GDC_S2A0_S2A_DOORBELL_ENTRY_4_CTRL,
+ S2A_DOORBELL_PORT4_RANGE_SIZE,
+ 0);
+
+ if (instance)
+ WREG32_SOC15(NBIO, 0, regGDC_S2A0_S2A_DOORBELL_ENTRY_5_CTRL, doorbell_range);
+ else
+ WREG32_SOC15(NBIO, 0, regGDC_S2A0_S2A_DOORBELL_ENTRY_4_CTRL, doorbell_range);
+}
+
+static void nbif_v6_3_1_gc_doorbell_init(struct amdgpu_device *adev)
+{
+ WREG32_SOC15(NBIO, 0, regGDC_S2A0_S2A_DOORBELL_ENTRY_0_CTRL, 0x30000007);
+ WREG32_SOC15(NBIO, 0, regGDC_S2A0_S2A_DOORBELL_ENTRY_3_CTRL, 0x3000000d);
+}
+
+static void nbif_v6_3_1_enable_doorbell_aperture(struct amdgpu_device *adev,
+ bool enable)
+{
+ WREG32_FIELD15_PREREG(NBIO, 0, RCC_DEV0_EPF0_RCC_DOORBELL_APER_EN,
+ BIF_DOORBELL_APER_EN, enable ? 1 : 0);
+}
+
+static void
+nbif_v6_3_1_enable_doorbell_selfring_aperture(struct amdgpu_device *adev,
+ bool enable)
+{
+ u32 tmp = 0;
+
+ if (enable) {
+ tmp = REG_SET_FIELD(tmp, BIF_BX_PF0_DOORBELL_SELFRING_GPA_APER_CNTL,
+ DOORBELL_SELFRING_GPA_APER_EN, 1) |
+ REG_SET_FIELD(tmp, BIF_BX_PF0_DOORBELL_SELFRING_GPA_APER_CNTL,
+ DOORBELL_SELFRING_GPA_APER_MODE, 1) |
+ REG_SET_FIELD(tmp, BIF_BX_PF0_DOORBELL_SELFRING_GPA_APER_CNTL,
+ DOORBELL_SELFRING_GPA_APER_SIZE, 0);
+
+ WREG32_SOC15(NBIO, 0, regBIF_BX_PF0_DOORBELL_SELFRING_GPA_APER_BASE_LOW,
+ lower_32_bits(adev->doorbell.base));
+ WREG32_SOC15(NBIO, 0, regBIF_BX_PF0_DOORBELL_SELFRING_GPA_APER_BASE_HIGH,
+ upper_32_bits(adev->doorbell.base));
+ }
+
+ WREG32_SOC15(NBIO, 0, regBIF_BX_PF0_DOORBELL_SELFRING_GPA_APER_CNTL, tmp);
+}
+
+static void nbif_v6_3_1_ih_doorbell_range(struct amdgpu_device *adev,
+ bool use_doorbell, int doorbell_index)
+{
+ u32 ih_doorbell_range = RREG32_SOC15(NBIO, 0, regGDC_S2A0_S2A_DOORBELL_ENTRY_1_CTRL);
+
+ if (use_doorbell) {
+ ih_doorbell_range = REG_SET_FIELD(ih_doorbell_range,
+ GDC_S2A0_S2A_DOORBELL_ENTRY_1_CTRL,
+ S2A_DOORBELL_PORT1_ENABLE,
+ 0x1);
+ ih_doorbell_range = REG_SET_FIELD(ih_doorbell_range,
+ GDC_S2A0_S2A_DOORBELL_ENTRY_1_CTRL,
+ S2A_DOORBELL_PORT1_AWID,
+ 0x0);
+ ih_doorbell_range = REG_SET_FIELD(ih_doorbell_range,
+ GDC_S2A0_S2A_DOORBELL_ENTRY_1_CTRL,
+ S2A_DOORBELL_PORT1_RANGE_OFFSET,
+ doorbell_index);
+ ih_doorbell_range = REG_SET_FIELD(ih_doorbell_range,
+ GDC_S2A0_S2A_DOORBELL_ENTRY_1_CTRL,
+ S2A_DOORBELL_PORT1_RANGE_SIZE,
+ 2);
+ ih_doorbell_range = REG_SET_FIELD(ih_doorbell_range,
+ GDC_S2A0_S2A_DOORBELL_ENTRY_1_CTRL,
+ S2A_DOORBELL_PORT1_AWADDR_31_28_VALUE,
+ 0x0);
+ } else
+ ih_doorbell_range = REG_SET_FIELD(ih_doorbell_range,
+ GDC_S2A0_S2A_DOORBELL_ENTRY_1_CTRL,
+ S2A_DOORBELL_PORT1_RANGE_SIZE,
+ 0);
+
+ WREG32_SOC15(NBIO, 0, regGDC_S2A0_S2A_DOORBELL_ENTRY_1_CTRL, ih_doorbell_range);
+}
+
+static void nbif_v6_3_1_ih_control(struct amdgpu_device *adev)
+{
+ u32 interrupt_cntl;
+
+ /* setup interrupt control */
+ WREG32_SOC15(NBIO, 0, regBIF_BX0_INTERRUPT_CNTL2, adev->dummy_page_addr >> 8);
+
+ interrupt_cntl = RREG32_SOC15(NBIO, 0, regBIF_BX0_INTERRUPT_CNTL);
+ /*
+ * BIF_BX0_INTERRUPT_CNTL__IH_DUMMY_RD_OVERRIDE_MASK=0 - dummy read disabled with msi, enabled without msi
+ * BIF_BX0_INTERRUPT_CNTL__IH_DUMMY_RD_OVERRIDE_MASK=1 - dummy read controlled by IH_DUMMY_RD_EN
+ */
+ interrupt_cntl = REG_SET_FIELD(interrupt_cntl, BIF_BX0_INTERRUPT_CNTL,
+ IH_DUMMY_RD_OVERRIDE, 0);
+
+ /* BIF_BX0_INTERRUPT_CNTL__IH_REQ_NONSNOOP_EN_MASK=1 if ring is in non-cacheable memory, e.g., vram */
+ interrupt_cntl = REG_SET_FIELD(interrupt_cntl, BIF_BX0_INTERRUPT_CNTL,
+ IH_REQ_NONSNOOP_EN, 0);
+
+ WREG32_SOC15(NBIO, 0, regBIF_BX0_INTERRUPT_CNTL, interrupt_cntl);
+}
+
+static void
+nbif_v6_3_1_update_medium_grain_clock_gating(struct amdgpu_device *adev,
+ bool enable)
+{
+}
+
+static void
+nbif_v6_3_1_update_medium_grain_light_sleep(struct amdgpu_device *adev,
+ bool enable)
+{
+}
+
+static void
+nbif_v6_3_1_get_clockgating_state(struct amdgpu_device *adev,
+ u64 *flags)
+{
+}
+
+static u32 nbif_v6_3_1_get_hdp_flush_req_offset(struct amdgpu_device *adev)
+{
+ return SOC15_REG_OFFSET(NBIO, 0, regBIF_BX_PF0_GPU_HDP_FLUSH_REQ);
+}
+
+static u32 nbif_v6_3_1_get_hdp_flush_done_offset(struct amdgpu_device *adev)
+{
+ return SOC15_REG_OFFSET(NBIO, 0, regBIF_BX_PF0_GPU_HDP_FLUSH_DONE);
+}
+
+static u32 nbif_v6_3_1_get_pcie_index_offset(struct amdgpu_device *adev)
+{
+ return SOC15_REG_OFFSET(NBIO, 0, regBIF_BX_PF0_RSMU_INDEX);
+}
+
+static u32 nbif_v6_3_1_get_pcie_data_offset(struct amdgpu_device *adev)
+{
+ return SOC15_REG_OFFSET(NBIO, 0, regBIF_BX_PF0_RSMU_DATA);
+}
+
+const struct nbio_hdp_flush_reg nbif_v6_3_1_hdp_flush_reg = {
+ .ref_and_mask_cp0 = BIF_BX_PF0_GPU_HDP_FLUSH_DONE__CP0_MASK,
+ .ref_and_mask_cp1 = BIF_BX_PF0_GPU_HDP_FLUSH_DONE__CP1_MASK,
+ .ref_and_mask_cp2 = BIF_BX_PF0_GPU_HDP_FLUSH_DONE__CP2_MASK,
+ .ref_and_mask_cp3 = BIF_BX_PF0_GPU_HDP_FLUSH_DONE__CP3_MASK,
+ .ref_and_mask_cp4 = BIF_BX_PF0_GPU_HDP_FLUSH_DONE__CP4_MASK,
+ .ref_and_mask_cp5 = BIF_BX_PF0_GPU_HDP_FLUSH_DONE__CP5_MASK,
+ .ref_and_mask_cp6 = BIF_BX_PF0_GPU_HDP_FLUSH_DONE__CP6_MASK,
+ .ref_and_mask_cp7 = BIF_BX_PF0_GPU_HDP_FLUSH_DONE__CP7_MASK,
+ .ref_and_mask_cp8 = BIF_BX_PF0_GPU_HDP_FLUSH_DONE__CP8_MASK,
+ .ref_and_mask_cp9 = BIF_BX_PF0_GPU_HDP_FLUSH_DONE__CP9_MASK,
+ .ref_and_mask_sdma0 = BIF_BX_PF0_GPU_HDP_FLUSH_DONE__SDMA0_MASK,
+ .ref_and_mask_sdma1 = BIF_BX_PF0_GPU_HDP_FLUSH_DONE__SDMA1_MASK,
+};
+
+static void nbif_v6_3_1_init_registers(struct amdgpu_device *adev)
+{
+ uint32_t data;
+
+ data = RREG32_SOC15(NBIO, 0, regRCC_DEV0_EPF2_STRAP2);
+ data &= ~RCC_DEV0_EPF2_STRAP2__STRAP_NO_SOFT_RESET_DEV0_F2_MASK;
+ WREG32_SOC15(NBIO, 0, regRCC_DEV0_EPF2_STRAP2, data);
+}
+
+static u32 nbif_v6_3_1_get_rom_offset(struct amdgpu_device *adev)
+{
+ u32 data, rom_offset;
+
+ data = RREG32_SOC15(NBIO, 0, regREGS_ROM_OFFSET_CTRL);
+ rom_offset = REG_GET_FIELD(data, REGS_ROM_OFFSET_CTRL, ROM_OFFSET);
+
+ return rom_offset;
+}
+
+#ifdef CONFIG_PCIEASPM
+static void nbif_v6_3_1_program_ltr(struct amdgpu_device *adev)
+{
+ uint32_t def, data;
+ u16 devctl2;
+
+ def = RREG32_SOC15(NBIO, 0, regRCC_EP_DEV0_0_EP_PCIE_TX_LTR_CNTL);
+ data = 0x35EB;
+ data &= ~RCC_EP_DEV0_0_EP_PCIE_TX_LTR_CNTL__LTR_PRIV_MSG_DIS_IN_PM_NON_D0_MASK;
+ data &= ~RCC_EP_DEV0_0_EP_PCIE_TX_LTR_CNTL__LTR_PRIV_RST_LTR_IN_DL_DOWN_MASK;
+ if (def != data)
+ WREG32_SOC15(NBIO, 0, regRCC_EP_DEV0_0_EP_PCIE_TX_LTR_CNTL, data);
+
+ def = data = RREG32_SOC15(NBIO, 0, regRCC_STRAP0_RCC_BIF_STRAP2);
+ data &= ~RCC_STRAP0_RCC_BIF_STRAP2__STRAP_LTR_IN_ASPML1_DIS_MASK;
+ if (def != data)
+ WREG32_SOC15(NBIO, 0, regRCC_STRAP0_RCC_BIF_STRAP2, data);
+
+ pcie_capability_read_word(adev->pdev, PCI_EXP_DEVCTL2, &devctl2);
+
+ if (adev->pdev->ltr_path == (devctl2 & PCI_EXP_DEVCTL2_LTR_EN))
+ return;
+
+ if (adev->pdev->ltr_path)
+ pcie_capability_set_word(adev->pdev, PCI_EXP_DEVCTL2, PCI_EXP_DEVCTL2_LTR_EN);
+ else
+ pcie_capability_clear_word(adev->pdev, PCI_EXP_DEVCTL2, PCI_EXP_DEVCTL2_LTR_EN);
+}
+#endif
+
+static void nbif_v6_3_1_program_aspm(struct amdgpu_device *adev)
+{
+#ifdef CONFIG_PCIEASPM
+ uint32_t def, data;
+ u16 devctl2, ltr;
+
+ def = data = RREG32_SOC15(PCIE, 0, regPCIE_LC_CNTL);
+ data &= ~PCIE_LC_CNTL__LC_L1_INACTIVITY_MASK;
+ data &= ~PCIE_LC_CNTL__LC_L0S_INACTIVITY_MASK;
+ data |= PCIE_LC_CNTL__LC_PMI_TO_L1_DIS_MASK;
+ if (def != data)
+ WREG32_SOC15(PCIE, 0, regPCIE_LC_CNTL, data);
+
+ def = data = RREG32_SOC15(PCIE, 0, regPCIE_LC_CNTL7);
+ data |= PCIE_LC_CNTL7__LC_NBIF_ASPM_INPUT_EN_MASK;
+ if (def != data)
+ WREG32_SOC15(PCIE, 0, regPCIE_LC_CNTL7, data);
+
+ def = data = RREG32_SOC15(PCIE, 0, regPCIE_LC_CNTL3);
+ data |= PCIE_LC_CNTL3__LC_DSC_DONT_ENTER_L23_AFTER_PME_ACK_MASK;
+ if (def != data)
+ WREG32_SOC15(PCIE, 0, regPCIE_LC_CNTL3, data);
+
+ def = data = RREG32_SOC15(NBIO, 0, regRCC_STRAP0_RCC_BIF_STRAP3);
+ data &= ~RCC_STRAP0_RCC_BIF_STRAP3__STRAP_VLINK_ASPM_IDLE_TIMER_MASK;
+ data &= ~RCC_STRAP0_RCC_BIF_STRAP3__STRAP_VLINK_PM_L1_ENTRY_TIMER_MASK;
+ if (def != data)
+ WREG32_SOC15(NBIO, 0, regRCC_STRAP0_RCC_BIF_STRAP3, data);
+
+ def = data = RREG32_SOC15(NBIO, 0, regRCC_STRAP0_RCC_BIF_STRAP5);
+ data &= ~RCC_STRAP0_RCC_BIF_STRAP5__STRAP_VLINK_LDN_ENTRY_TIMER_MASK;
+ if (def != data)
+ WREG32_SOC15(NBIO, 0, regRCC_STRAP0_RCC_BIF_STRAP5, data);
+
+ pcie_capability_read_word(adev->pdev, PCI_EXP_DEVCTL2, &devctl2);
+ data = def = devctl2;
+ data &= ~PCI_EXP_DEVCTL2_LTR_EN;
+ if (def != data)
+ pcie_capability_set_word(adev->pdev, PCI_EXP_DEVCTL2, (u16)data);
+
+ ltr = pci_find_ext_capability(adev->pdev, PCI_EXT_CAP_ID_LTR);
+
+ if (ltr) {
+ pci_write_config_dword(adev->pdev, ltr + PCI_LTR_MAX_SNOOP_LAT, 0x10011001);
+ }
+
+#if 0
+ /* regPSWUSP0_PCIE_LC_CNTL2 should be replace by PCIE_LC_CNTL2 or someone else ? */
+ def = data = RREG32_SOC15(NBIO, 0, regPSWUSP0_PCIE_LC_CNTL2);
+ data |= PSWUSP0_PCIE_LC_CNTL2__LC_ALLOW_PDWN_IN_L1_MASK |
+ PSWUSP0_PCIE_LC_CNTL2__LC_ALLOW_PDWN_IN_L23_MASK;
+ data &= ~PSWUSP0_PCIE_LC_CNTL2__LC_RCV_L0_TO_RCV_L0S_DIS_MASK;
+ if (def != data)
+ WREG32_SOC15(NBIO, 0, regPSWUSP0_PCIE_LC_CNTL2, data);
+#endif
+ def = data = RREG32_SOC15(PCIE, 0, regPCIE_LC_CNTL4);
+ data |= PCIE_LC_CNTL4__LC_L1_POWERDOWN_MASK;
+ if (def != data)
+ WREG32_SOC15(PCIE, 0, regPCIE_LC_CNTL4, data);
+
+ def = data = RREG32_SOC15(PCIE, 0, regPCIE_LC_RXRECOVER_RXSTANDBY_CNTL);
+ data |= PCIE_LC_RXRECOVER_RXSTANDBY_CNTL__LC_RX_L0S_STANDBY_EN_MASK;
+ if (def != data)
+ WREG32_SOC15(PCIE, 0, regPCIE_LC_RXRECOVER_RXSTANDBY_CNTL, data);
+
+ nbif_v6_3_1_program_ltr(adev);
+
+ def = data = RREG32_SOC15(NBIO, 0, regRCC_STRAP0_RCC_BIF_STRAP3);
+ data |= 0x5DE0 << RCC_STRAP0_RCC_BIF_STRAP3__STRAP_VLINK_ASPM_IDLE_TIMER__SHIFT;
+ data |= 0x0010 << RCC_STRAP0_RCC_BIF_STRAP3__STRAP_VLINK_PM_L1_ENTRY_TIMER__SHIFT;
+ if (def != data)
+ WREG32_SOC15(NBIO, 0, regRCC_STRAP0_RCC_BIF_STRAP3, data);
+
+ def = data = RREG32_SOC15(NBIO, 0, regRCC_STRAP0_RCC_BIF_STRAP5);
+ data |= 0x0010 << RCC_STRAP0_RCC_BIF_STRAP5__STRAP_VLINK_LDN_ENTRY_TIMER__SHIFT;
+ if (def != data)
+ WREG32_SOC15(NBIO, 0, regRCC_STRAP0_RCC_BIF_STRAP5, data);
+
+ def = data = RREG32_SOC15(PCIE, 0, regPCIE_LC_CNTL);
+ data |= 0x0 << PCIE_LC_CNTL__LC_L0S_INACTIVITY__SHIFT;
+ data |= 0x9 << PCIE_LC_CNTL__LC_L1_INACTIVITY__SHIFT;
+ data &= ~PCIE_LC_CNTL__LC_PMI_TO_L1_DIS_MASK;
+ if (def != data)
+ WREG32_SOC15(PCIE, 0, regPCIE_LC_CNTL, data);
+
+ def = data = RREG32_SOC15(PCIE, 0, regPCIE_LC_CNTL3);
+ data &= ~PCIE_LC_CNTL3__LC_DSC_DONT_ENTER_L23_AFTER_PME_ACK_MASK;
+ if (def != data)
+ WREG32_SOC15(PCIE, 0, regPCIE_LC_CNTL3, data);
+#endif
+}
+
+#define MMIO_REG_HOLE_OFFSET (0x80000 - PAGE_SIZE)
+
+static void nbif_v6_3_1_set_reg_remap(struct amdgpu_device *adev)
+{
+ if (!amdgpu_sriov_vf(adev) && (PAGE_SIZE <= 4096)) {
+ adev->rmmio_remap.reg_offset = MMIO_REG_HOLE_OFFSET;
+ adev->rmmio_remap.bus_addr = adev->rmmio_base + MMIO_REG_HOLE_OFFSET;
+ } else {
+ adev->rmmio_remap.reg_offset = SOC15_REG_OFFSET(NBIO, 0,
+ regBIF_BX_PF0_HDP_MEM_COHERENCY_FLUSH_CNTL) << 2;
+ adev->rmmio_remap.bus_addr = 0;
+ }
+}
+
+const struct amdgpu_nbio_funcs nbif_v6_3_1_funcs = {
+ .get_hdp_flush_req_offset = nbif_v6_3_1_get_hdp_flush_req_offset,
+ .get_hdp_flush_done_offset = nbif_v6_3_1_get_hdp_flush_done_offset,
+ .get_pcie_index_offset = nbif_v6_3_1_get_pcie_index_offset,
+ .get_pcie_data_offset = nbif_v6_3_1_get_pcie_data_offset,
+ .get_rev_id = nbif_v6_3_1_get_rev_id,
+ .mc_access_enable = nbif_v6_3_1_mc_access_enable,
+ .get_memsize = nbif_v6_3_1_get_memsize,
+ .sdma_doorbell_range = nbif_v6_3_1_sdma_doorbell_range,
+ .vcn_doorbell_range = nbif_v6_3_1_vcn_doorbell_range,
+ .gc_doorbell_init = nbif_v6_3_1_gc_doorbell_init,
+ .enable_doorbell_aperture = nbif_v6_3_1_enable_doorbell_aperture,
+ .enable_doorbell_selfring_aperture = nbif_v6_3_1_enable_doorbell_selfring_aperture,
+ .ih_doorbell_range = nbif_v6_3_1_ih_doorbell_range,
+ .update_medium_grain_clock_gating = nbif_v6_3_1_update_medium_grain_clock_gating,
+ .update_medium_grain_light_sleep = nbif_v6_3_1_update_medium_grain_light_sleep,
+ .get_clockgating_state = nbif_v6_3_1_get_clockgating_state,
+ .ih_control = nbif_v6_3_1_ih_control,
+ .init_registers = nbif_v6_3_1_init_registers,
+ .remap_hdp_registers = nbif_v6_3_1_remap_hdp_registers,
+ .get_rom_offset = nbif_v6_3_1_get_rom_offset,
+ .program_aspm = nbif_v6_3_1_program_aspm,
+ .set_reg_remap = nbif_v6_3_1_set_reg_remap,
+};
+
+
+static int nbif_v6_3_1_set_ras_err_event_athub_irq_state(struct amdgpu_device *adev,
+ struct amdgpu_irq_src *src,
+ unsigned type,
+ enum amdgpu_interrupt_state state)
+{
+ /* The ras_controller_irq enablement should be done in psp bl when it
+ * tries to enable ras feature. Driver only need to set the correct interrupt
+ * vector for bare-metal and sriov use case respectively
+ */
+ uint32_t bif_doorbell_int_cntl;
+
+ bif_doorbell_int_cntl = RREG32_SOC15(NBIO, 0, regBIF_BX0_BIF_DOORBELL_INT_CNTL);
+ bif_doorbell_int_cntl = REG_SET_FIELD(bif_doorbell_int_cntl,
+ BIF_BX0_BIF_DOORBELL_INT_CNTL,
+ RAS_ATHUB_ERR_EVENT_INTERRUPT_DISABLE,
+ (state == AMDGPU_IRQ_STATE_ENABLE) ? 0 : 1);
+ WREG32_SOC15(NBIO, 0, regBIF_BX0_BIF_DOORBELL_INT_CNTL, bif_doorbell_int_cntl);
+
+ return 0;
+}
+
+static int nbif_v6_3_1_process_err_event_athub_irq(struct amdgpu_device *adev,
+ struct amdgpu_irq_src *source,
+ struct amdgpu_iv_entry *entry)
+{
+ /* By design, the ih cookie for err_event_athub_irq should be written
+ * to bif ring. since bif ring is not enabled, just leave process callback
+ * as a dummy one.
+ */
+ return 0;
+}
+
+static const struct amdgpu_irq_src_funcs nbif_v6_3_1_ras_err_event_athub_irq_funcs = {
+ .set = nbif_v6_3_1_set_ras_err_event_athub_irq_state,
+ .process = nbif_v6_3_1_process_err_event_athub_irq,
+};
+
+static void nbif_v6_3_1_handle_ras_err_event_athub_intr_no_bifring(struct amdgpu_device *adev)
+{
+ uint32_t bif_doorbell_int_cntl;
+
+ bif_doorbell_int_cntl = RREG32_SOC15(NBIO, 0, regBIF_BX0_BIF_DOORBELL_INT_CNTL);
+ if (REG_GET_FIELD(bif_doorbell_int_cntl,
+ BIF_BX0_BIF_DOORBELL_INT_CNTL,
+ RAS_ATHUB_ERR_EVENT_INTERRUPT_STATUS)) {
+ /* driver has to clear the interrupt status when bif ring is disabled */
+ bif_doorbell_int_cntl = REG_SET_FIELD(bif_doorbell_int_cntl,
+ BIF_BX0_BIF_DOORBELL_INT_CNTL,
+ RAS_ATHUB_ERR_EVENT_INTERRUPT_CLEAR, 1);
+ WREG32_SOC15(NBIO, 0, regBIF_BX0_BIF_DOORBELL_INT_CNTL, bif_doorbell_int_cntl);
+ amdgpu_ras_global_ras_isr(adev);
+ }
+}
+
+static int nbif_v6_3_1_init_ras_err_event_athub_interrupt(struct amdgpu_device *adev)
+{
+ int r;
+
+ /* init the irq funcs */
+ adev->nbio.ras_err_event_athub_irq.funcs =
+ &nbif_v6_3_1_ras_err_event_athub_irq_funcs;
+ adev->nbio.ras_err_event_athub_irq.num_types = 1;
+
+ /* register ras err event athub interrupt
+ * nbif v6_3_1 uses the same irq source as nbio v7_4
+ */
+ r = amdgpu_irq_add_id(adev, SOC21_IH_CLIENTID_BIF,
+ NBIF_7_4__SRCID__ERREVENT_ATHUB_INTERRUPT,
+ &adev->nbio.ras_err_event_athub_irq);
+
+ return r;
+}
+
+struct amdgpu_nbio_ras nbif_v6_3_1_ras = {
+ .handle_ras_err_event_athub_intr_no_bifring =
+ nbif_v6_3_1_handle_ras_err_event_athub_intr_no_bifring,
+ .init_ras_err_event_athub_interrupt =
+ nbif_v6_3_1_init_ras_err_event_athub_interrupt,
+};
diff --git a/drivers/gpu/drm/amd/amdgpu/nbif_v6_3_1.h b/drivers/gpu/drm/amd/amdgpu/nbif_v6_3_1.h
new file mode 100644
index 000000000000..3afec715a9fe
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/nbif_v6_3_1.h
@@ -0,0 +1,33 @@
+/*
+ * Copyright 2023 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#ifndef __NBIO_V6_3_1_H__
+#define __NBIO_V6_3_1_H__
+
+#include "soc15_common.h"
+
+extern const struct nbio_hdp_flush_reg nbif_v6_3_1_hdp_flush_reg;
+extern const struct amdgpu_nbio_funcs nbif_v6_3_1_funcs;
+extern struct amdgpu_nbio_ras nbif_v6_3_1_ras;
+
+#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/nbio_v2_3.c b/drivers/gpu/drm/amd/amdgpu/nbio_v2_3.c
index 05ddec7ba7e2..04041b398781 100644
--- a/drivers/gpu/drm/amd/amdgpu/nbio_v2_3.c
+++ b/drivers/gpu/drm/amd/amdgpu/nbio_v2_3.c
@@ -21,13 +21,13 @@
*
*/
#include "amdgpu.h"
-#include "amdgpu_atombios.h"
#include "nbio_v2_3.h"
#include "nbio/nbio_2_3_default.h"
#include "nbio/nbio_2_3_offset.h"
#include "nbio/nbio_2_3_sh_mask.h"
#include <uapi/linux/kfd_ioctl.h>
+#include <linux/device.h>
#include <linux/pci.h>
#define smnPCIE_CONFIG_CNTL 0x11180044
@@ -51,6 +51,18 @@
#define mmBIF_MMSCH1_DOORBELL_RANGE 0x01d8
#define mmBIF_MMSCH1_DOORBELL_RANGE_BASE_IDX 2
+#define smnPCIE_LC_LINK_WIDTH_CNTL 0x11140288
+
+#define GPU_HDP_FLUSH_DONE__RSVD_ENG0_MASK 0x00001000L /* Don't use. Firmware uses this bit internally */
+#define GPU_HDP_FLUSH_DONE__RSVD_ENG1_MASK 0x00002000L
+#define GPU_HDP_FLUSH_DONE__RSVD_ENG2_MASK 0x00004000L
+#define GPU_HDP_FLUSH_DONE__RSVD_ENG3_MASK 0x00008000L
+#define GPU_HDP_FLUSH_DONE__RSVD_ENG4_MASK 0x00010000L
+#define GPU_HDP_FLUSH_DONE__RSVD_ENG5_MASK 0x00020000L
+#define GPU_HDP_FLUSH_DONE__RSVD_ENG6_MASK 0x00040000L
+#define GPU_HDP_FLUSH_DONE__RSVD_ENG7_MASK 0x00080000L
+#define GPU_HDP_FLUSH_DONE__RSVD_ENG8_MASK 0x00100000L
+
static void nbio_v2_3_remap_hdp_registers(struct amdgpu_device *adev)
{
WREG32_SOC15(NBIO, 0, mmREMAP_HDP_MEM_FLUSH_CNTL,
@@ -218,8 +230,11 @@ static void nbio_v2_3_update_medium_grain_clock_gating(struct amdgpu_device *ade
{
uint32_t def, data;
+ if (!(adev->cg_flags & AMD_CG_SUPPORT_BIF_MGCG))
+ return;
+
def = data = RREG32_PCIE(smnCPM_CONTROL);
- if (enable && (adev->cg_flags & AMD_CG_SUPPORT_BIF_MGCG)) {
+ if (enable) {
data |= (CPM_CONTROL__LCLK_DYN_GATE_ENABLE_MASK |
CPM_CONTROL__TXCLK_DYN_GATE_ENABLE_MASK |
CPM_CONTROL__TXCLK_LCNT_GATE_ENABLE_MASK |
@@ -244,8 +259,11 @@ static void nbio_v2_3_update_medium_grain_light_sleep(struct amdgpu_device *adev
{
uint32_t def, data;
+ if (!(adev->cg_flags & AMD_CG_SUPPORT_BIF_LS))
+ return;
+
def = data = RREG32_PCIE(smnPCIE_CNTL2);
- if (enable && (adev->cg_flags & AMD_CG_SUPPORT_BIF_LS)) {
+ if (enable) {
data |= (PCIE_CNTL2__SLV_MEM_LS_EN_MASK |
PCIE_CNTL2__MST_MEM_LS_EN_MASK |
PCIE_CNTL2__REPLAY_MEM_LS_EN_MASK);
@@ -260,7 +278,7 @@ static void nbio_v2_3_update_medium_grain_light_sleep(struct amdgpu_device *adev
}
static void nbio_v2_3_get_clockgating_state(struct amdgpu_device *adev,
- u32 *flags)
+ u64 *flags)
{
int data;
@@ -323,8 +341,8 @@ static void nbio_v2_3_init_registers(struct amdgpu_device *adev)
}
#define NAVI10_PCIE__LC_L0S_INACTIVITY_DEFAULT 0x00000000 // off by default, no gains over L1
-#define NAVI10_PCIE__LC_L1_INACTIVITY_DEFAULT 0x00000009 // 1=1us, 9=1ms
-#define NAVI10_PCIE__LC_L1_INACTIVITY_TBT_DEFAULT 0x0000000E // 4ms
+#define NAVI10_PCIE__LC_L1_INACTIVITY_DEFAULT 0x0000000A // 1=1us, 9=1ms, 10=4ms
+#define NAVI10_PCIE__LC_L1_INACTIVITY_TBT_DEFAULT 0x0000000E // 400ms
static void nbio_v2_3_enable_aspm(struct amdgpu_device *adev,
bool enable)
@@ -339,14 +357,14 @@ static void nbio_v2_3_enable_aspm(struct amdgpu_device *adev,
data |= NAVI10_PCIE__LC_L0S_INACTIVITY_DEFAULT << PCIE_LC_CNTL__LC_L0S_INACTIVITY__SHIFT;
- if (pci_is_thunderbolt_attached(adev->pdev))
+ if (dev_is_removable(&adev->pdev->dev))
data |= NAVI10_PCIE__LC_L1_INACTIVITY_TBT_DEFAULT << PCIE_LC_CNTL__LC_L1_INACTIVITY__SHIFT;
else
data |= NAVI10_PCIE__LC_L1_INACTIVITY_DEFAULT << PCIE_LC_CNTL__LC_L1_INACTIVITY__SHIFT;
data &= ~PCIE_LC_CNTL__LC_PMI_TO_L1_DIS_MASK;
} else {
- /* Disbale ASPM L1 */
+ /* Disable ASPM L1 */
data &= ~PCIE_LC_CNTL__LC_L1_INACTIVITY_MASK;
/* Disable ASPM TxL0s */
data &= ~PCIE_LC_CNTL__LC_L0S_INACTIVITY_MASK;
@@ -358,6 +376,7 @@ static void nbio_v2_3_enable_aspm(struct amdgpu_device *adev,
WREG32_PCIE(smnPCIE_LC_CNTL, data);
}
+#ifdef CONFIG_PCIEASPM
static void nbio_v2_3_program_ltr(struct amdgpu_device *adev)
{
uint32_t def, data;
@@ -379,9 +398,11 @@ static void nbio_v2_3_program_ltr(struct amdgpu_device *adev)
if (def != data)
WREG32_PCIE(smnBIF_CFG_DEV0_EPF0_DEVICE_CNTL2, data);
}
+#endif
static void nbio_v2_3_program_aspm(struct amdgpu_device *adev)
{
+#ifdef CONFIG_PCIEASPM
uint32_t def, data;
def = data = RREG32_PCIE(smnPCIE_LC_CNTL);
@@ -437,7 +458,10 @@ static void nbio_v2_3_program_aspm(struct amdgpu_device *adev)
if (def != data)
WREG32_PCIE(smnPCIE_LC_CNTL6, data);
- nbio_v2_3_program_ltr(adev);
+ /* Don't bother about LTR if LTR is not enabled
+ * in the path */
+ if (adev->pdev->ltr_path)
+ nbio_v2_3_program_ltr(adev);
def = data = RREG32_SOC15(NBIO, 0, mmRCC_BIF_STRAP3);
data |= 0x5DE0 << RCC_BIF_STRAP3__STRAP_VLINK_ASPM_IDLE_TIMER__SHIFT;
@@ -451,9 +475,12 @@ static void nbio_v2_3_program_aspm(struct amdgpu_device *adev)
WREG32_SOC15(NBIO, 0, mmRCC_BIF_STRAP5, data);
def = data = RREG32_PCIE(smnPCIE_LC_CNTL);
- data &= ~PCIE_LC_CNTL__LC_L0S_INACTIVITY_MASK;
- data |= 0x9 << PCIE_LC_CNTL__LC_L1_INACTIVITY__SHIFT;
- data |= 0x1 << PCIE_LC_CNTL__LC_PMI_TO_L1_DIS__SHIFT;
+ data |= NAVI10_PCIE__LC_L0S_INACTIVITY_DEFAULT << PCIE_LC_CNTL__LC_L0S_INACTIVITY__SHIFT;
+ if (dev_is_removable(&adev->pdev->dev))
+ data |= NAVI10_PCIE__LC_L1_INACTIVITY_TBT_DEFAULT << PCIE_LC_CNTL__LC_L1_INACTIVITY__SHIFT;
+ else
+ data |= NAVI10_PCIE__LC_L1_INACTIVITY_DEFAULT << PCIE_LC_CNTL__LC_L1_INACTIVITY__SHIFT;
+ data &= ~PCIE_LC_CNTL__LC_PMI_TO_L1_DIS_MASK;
if (def != data)
WREG32_PCIE(smnPCIE_LC_CNTL, data);
@@ -461,6 +488,78 @@ static void nbio_v2_3_program_aspm(struct amdgpu_device *adev)
data &= ~PCIE_LC_CNTL3__LC_DSC_DONT_ENTER_L23_AFTER_PME_ACK_MASK;
if (def != data)
WREG32_PCIE(smnPCIE_LC_CNTL3, data);
+#endif
+}
+
+static void nbio_v2_3_apply_lc_spc_mode_wa(struct amdgpu_device *adev)
+{
+ uint32_t reg_data = 0;
+ uint32_t link_width = 0;
+
+ if (!((adev->asic_type >= CHIP_NAVI10) &&
+ (adev->asic_type <= CHIP_NAVI12)))
+ return;
+
+ reg_data = RREG32_PCIE(smnPCIE_LC_LINK_WIDTH_CNTL);
+ link_width = (reg_data & PCIE_LC_LINK_WIDTH_CNTL__LC_LINK_WIDTH_RD_MASK)
+ >> PCIE_LC_LINK_WIDTH_CNTL__LC_LINK_WIDTH_RD__SHIFT;
+
+ /*
+ * Program PCIE_LC_CNTL6.LC_SPC_MODE_8GT to 0x2 (4 symbols per clock data)
+ * if link_width is 0x3 (x4)
+ */
+ if (0x3 == link_width) {
+ reg_data = RREG32_PCIE(smnPCIE_LC_CNTL6);
+ reg_data &= ~PCIE_LC_CNTL6__LC_SPC_MODE_8GT_MASK;
+ reg_data |= (0x2 << PCIE_LC_CNTL6__LC_SPC_MODE_8GT__SHIFT);
+ WREG32_PCIE(smnPCIE_LC_CNTL6, reg_data);
+ }
+}
+
+static void nbio_v2_3_apply_l1_link_width_reconfig_wa(struct amdgpu_device *adev)
+{
+ uint32_t reg_data = 0;
+
+ if (adev->asic_type != CHIP_NAVI10)
+ return;
+
+ reg_data = RREG32_PCIE(smnPCIE_LC_LINK_WIDTH_CNTL);
+ reg_data |= PCIE_LC_LINK_WIDTH_CNTL__LC_L1_RECONFIG_EN_MASK;
+ WREG32_PCIE(smnPCIE_LC_LINK_WIDTH_CNTL, reg_data);
+}
+
+static void nbio_v2_3_clear_doorbell_interrupt(struct amdgpu_device *adev)
+{
+ uint32_t reg, reg_data;
+
+ if (amdgpu_ip_version(adev, NBIO_HWIP, 0) != IP_VERSION(3, 3, 0))
+ return;
+
+ reg = RREG32_SOC15(NBIO, 0, mmBIF_RB_CNTL);
+
+ /* Clear Interrupt Status
+ */
+ if ((reg & BIF_RB_CNTL__RB_ENABLE_MASK) == 0) {
+ reg = RREG32_SOC15(NBIO, 0, mmBIF_DOORBELL_INT_CNTL);
+ if (reg & BIF_DOORBELL_INT_CNTL__DOORBELL_INTERRUPT_STATUS_MASK) {
+ reg_data = 1 << BIF_DOORBELL_INT_CNTL__DOORBELL_INTERRUPT_CLEAR__SHIFT;
+ WREG32_SOC15(NBIO, 0, mmBIF_DOORBELL_INT_CNTL, reg_data);
+ }
+ }
+}
+
+#define MMIO_REG_HOLE_OFFSET (0x80000 - PAGE_SIZE)
+
+static void nbio_v2_3_set_reg_remap(struct amdgpu_device *adev)
+{
+ if (!amdgpu_sriov_vf(adev) && (PAGE_SIZE <= 4096)) {
+ adev->rmmio_remap.reg_offset = MMIO_REG_HOLE_OFFSET;
+ adev->rmmio_remap.bus_addr = adev->rmmio_base + MMIO_REG_HOLE_OFFSET;
+ } else {
+ adev->rmmio_remap.reg_offset = SOC15_REG_OFFSET(NBIO, 0,
+ mmBIF_BX_DEV0_EPF0_VF0_HDP_MEM_COHERENCY_FLUSH_CNTL) << 2;
+ adev->rmmio_remap.bus_addr = 0;
+ }
}
const struct amdgpu_nbio_funcs nbio_v2_3_funcs = {
@@ -484,4 +583,8 @@ const struct amdgpu_nbio_funcs nbio_v2_3_funcs = {
.remap_hdp_registers = nbio_v2_3_remap_hdp_registers,
.enable_aspm = nbio_v2_3_enable_aspm,
.program_aspm = nbio_v2_3_program_aspm,
+ .apply_lc_spc_mode_wa = nbio_v2_3_apply_lc_spc_mode_wa,
+ .apply_l1_link_width_reconfig_wa = nbio_v2_3_apply_l1_link_width_reconfig_wa,
+ .clear_doorbell_interrupt = nbio_v2_3_clear_doorbell_interrupt,
+ .set_reg_remap = nbio_v2_3_set_reg_remap,
};
diff --git a/drivers/gpu/drm/amd/amdgpu/nbio_v4_3.c b/drivers/gpu/drm/amd/amdgpu/nbio_v4_3.c
new file mode 100644
index 000000000000..f89e5f40e1a5
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/nbio_v4_3.c
@@ -0,0 +1,634 @@
+/*
+ * Copyright 2021 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+#include "amdgpu.h"
+#include "nbio_v4_3.h"
+
+#include "nbio/nbio_4_3_0_offset.h"
+#include "nbio/nbio_4_3_0_sh_mask.h"
+#include "ivsrcid/nbio/irqsrcs_nbif_7_4.h"
+#include <uapi/linux/kfd_ioctl.h>
+
+static void nbio_v4_3_remap_hdp_registers(struct amdgpu_device *adev)
+{
+ WREG32_SOC15(NBIO, 0, regBIF_BX0_REMAP_HDP_MEM_FLUSH_CNTL,
+ adev->rmmio_remap.reg_offset + KFD_MMIO_REMAP_HDP_MEM_FLUSH_CNTL);
+ WREG32_SOC15(NBIO, 0, regBIF_BX0_REMAP_HDP_REG_FLUSH_CNTL,
+ adev->rmmio_remap.reg_offset + KFD_MMIO_REMAP_HDP_REG_FLUSH_CNTL);
+}
+
+static u32 nbio_v4_3_get_rev_id(struct amdgpu_device *adev)
+{
+ u32 tmp = RREG32_SOC15(NBIO, 0, regRCC_STRAP0_RCC_DEV0_EPF0_STRAP0);
+
+ tmp &= RCC_STRAP0_RCC_DEV0_EPF0_STRAP0__STRAP_ATI_REV_ID_DEV0_F0_MASK;
+ tmp >>= RCC_STRAP0_RCC_DEV0_EPF0_STRAP0__STRAP_ATI_REV_ID_DEV0_F0__SHIFT;
+
+ return tmp;
+}
+
+static void nbio_v4_3_mc_access_enable(struct amdgpu_device *adev, bool enable)
+{
+ if (enable)
+ WREG32_SOC15(NBIO, 0, regBIF_BX0_BIF_FB_EN,
+ BIF_BX0_BIF_FB_EN__FB_READ_EN_MASK |
+ BIF_BX0_BIF_FB_EN__FB_WRITE_EN_MASK);
+ else
+ WREG32_SOC15(NBIO, 0, regBIF_BX0_BIF_FB_EN, 0);
+}
+
+static u32 nbio_v4_3_get_memsize(struct amdgpu_device *adev)
+{
+ return RREG32_SOC15(NBIO, 0, regRCC_DEV0_EPF0_RCC_CONFIG_MEMSIZE);
+}
+
+static void nbio_v4_3_sdma_doorbell_range(struct amdgpu_device *adev, int instance,
+ bool use_doorbell, int doorbell_index,
+ int doorbell_size)
+{
+ if (instance == 0) {
+ u32 doorbell_range = RREG32_SOC15(NBIO, 0, regS2A_DOORBELL_ENTRY_2_CTRL);
+
+ if (use_doorbell) {
+ doorbell_range = REG_SET_FIELD(doorbell_range,
+ S2A_DOORBELL_ENTRY_2_CTRL,
+ S2A_DOORBELL_PORT2_ENABLE,
+ 0x1);
+ doorbell_range = REG_SET_FIELD(doorbell_range,
+ S2A_DOORBELL_ENTRY_2_CTRL,
+ S2A_DOORBELL_PORT2_AWID,
+ 0xe);
+ doorbell_range = REG_SET_FIELD(doorbell_range,
+ S2A_DOORBELL_ENTRY_2_CTRL,
+ S2A_DOORBELL_PORT2_RANGE_OFFSET,
+ doorbell_index);
+ doorbell_range = REG_SET_FIELD(doorbell_range,
+ S2A_DOORBELL_ENTRY_2_CTRL,
+ S2A_DOORBELL_PORT2_RANGE_SIZE,
+ doorbell_size);
+ doorbell_range = REG_SET_FIELD(doorbell_range,
+ S2A_DOORBELL_ENTRY_2_CTRL,
+ S2A_DOORBELL_PORT2_AWADDR_31_28_VALUE,
+ 0x3);
+ } else
+ doorbell_range = REG_SET_FIELD(doorbell_range,
+ S2A_DOORBELL_ENTRY_2_CTRL,
+ S2A_DOORBELL_PORT2_RANGE_SIZE,
+ 0);
+
+ WREG32_SOC15(NBIO, 0, regS2A_DOORBELL_ENTRY_2_CTRL, doorbell_range);
+ }
+}
+
+static void nbio_v4_3_vcn_doorbell_range(struct amdgpu_device *adev, bool use_doorbell,
+ int doorbell_index, int instance)
+{
+ u32 doorbell_range;
+
+ if (instance)
+ doorbell_range = RREG32_SOC15(NBIO, 0, regS2A_DOORBELL_ENTRY_5_CTRL);
+ else
+ doorbell_range = RREG32_SOC15(NBIO, 0, regS2A_DOORBELL_ENTRY_4_CTRL);
+
+ if (use_doorbell) {
+ doorbell_range = REG_SET_FIELD(doorbell_range,
+ S2A_DOORBELL_ENTRY_4_CTRL,
+ S2A_DOORBELL_PORT4_ENABLE,
+ 0x1);
+ doorbell_range = REG_SET_FIELD(doorbell_range,
+ S2A_DOORBELL_ENTRY_4_CTRL,
+ S2A_DOORBELL_PORT4_AWID,
+ instance ? 0x7 : 0x4);
+ doorbell_range = REG_SET_FIELD(doorbell_range,
+ S2A_DOORBELL_ENTRY_4_CTRL,
+ S2A_DOORBELL_PORT4_RANGE_OFFSET,
+ doorbell_index);
+ doorbell_range = REG_SET_FIELD(doorbell_range,
+ S2A_DOORBELL_ENTRY_4_CTRL,
+ S2A_DOORBELL_PORT4_RANGE_SIZE,
+ 8);
+ doorbell_range = REG_SET_FIELD(doorbell_range,
+ S2A_DOORBELL_ENTRY_4_CTRL,
+ S2A_DOORBELL_PORT4_AWADDR_31_28_VALUE,
+ instance ? 0x7 : 0x4);
+ } else
+ doorbell_range = REG_SET_FIELD(doorbell_range,
+ S2A_DOORBELL_ENTRY_4_CTRL,
+ S2A_DOORBELL_PORT4_RANGE_SIZE,
+ 0);
+
+ if (instance)
+ WREG32_SOC15(NBIO, 0, regS2A_DOORBELL_ENTRY_5_CTRL, doorbell_range);
+ else
+ WREG32_SOC15(NBIO, 0, regS2A_DOORBELL_ENTRY_4_CTRL, doorbell_range);
+}
+
+static void nbio_v4_3_gc_doorbell_init(struct amdgpu_device *adev)
+{
+ WREG32_SOC15(NBIO, 0, regS2A_DOORBELL_ENTRY_0_CTRL, 0x30000007);
+ WREG32_SOC15(NBIO, 0, regS2A_DOORBELL_ENTRY_3_CTRL, 0x3000000d);
+}
+
+static void nbio_v4_3_enable_doorbell_aperture(struct amdgpu_device *adev,
+ bool enable)
+{
+ WREG32_FIELD15_PREREG(NBIO, 0, RCC_DEV0_EPF0_RCC_DOORBELL_APER_EN,
+ BIF_DOORBELL_APER_EN, enable ? 1 : 0);
+}
+
+static void nbio_v4_3_enable_doorbell_selfring_aperture(struct amdgpu_device *adev,
+ bool enable)
+{
+ u32 tmp = 0;
+
+ if (enable) {
+ tmp = REG_SET_FIELD(tmp, BIF_BX_PF0_DOORBELL_SELFRING_GPA_APER_CNTL,
+ DOORBELL_SELFRING_GPA_APER_EN, 1) |
+ REG_SET_FIELD(tmp, BIF_BX_PF0_DOORBELL_SELFRING_GPA_APER_CNTL,
+ DOORBELL_SELFRING_GPA_APER_MODE, 1) |
+ REG_SET_FIELD(tmp, BIF_BX_PF0_DOORBELL_SELFRING_GPA_APER_CNTL,
+ DOORBELL_SELFRING_GPA_APER_SIZE, 0);
+
+ WREG32_SOC15(NBIO, 0, regBIF_BX_PF0_DOORBELL_SELFRING_GPA_APER_BASE_LOW,
+ lower_32_bits(adev->doorbell.base));
+ WREG32_SOC15(NBIO, 0, regBIF_BX_PF0_DOORBELL_SELFRING_GPA_APER_BASE_HIGH,
+ upper_32_bits(adev->doorbell.base));
+ }
+
+ WREG32_SOC15(NBIO, 0, regBIF_BX_PF0_DOORBELL_SELFRING_GPA_APER_CNTL,
+ tmp);
+}
+
+static void nbio_v4_3_ih_doorbell_range(struct amdgpu_device *adev,
+ bool use_doorbell, int doorbell_index)
+{
+ u32 ih_doorbell_range = RREG32_SOC15(NBIO, 0, regS2A_DOORBELL_ENTRY_1_CTRL);
+
+ if (use_doorbell) {
+ ih_doorbell_range = REG_SET_FIELD(ih_doorbell_range,
+ S2A_DOORBELL_ENTRY_1_CTRL,
+ S2A_DOORBELL_PORT1_ENABLE,
+ 0x1);
+ ih_doorbell_range = REG_SET_FIELD(ih_doorbell_range,
+ S2A_DOORBELL_ENTRY_1_CTRL,
+ S2A_DOORBELL_PORT1_AWID,
+ 0x0);
+ ih_doorbell_range = REG_SET_FIELD(ih_doorbell_range,
+ S2A_DOORBELL_ENTRY_1_CTRL,
+ S2A_DOORBELL_PORT1_RANGE_OFFSET,
+ doorbell_index);
+ ih_doorbell_range = REG_SET_FIELD(ih_doorbell_range,
+ S2A_DOORBELL_ENTRY_1_CTRL,
+ S2A_DOORBELL_PORT1_RANGE_SIZE,
+ 2);
+ ih_doorbell_range = REG_SET_FIELD(ih_doorbell_range,
+ S2A_DOORBELL_ENTRY_1_CTRL,
+ S2A_DOORBELL_PORT1_AWADDR_31_28_VALUE,
+ 0x0);
+ } else
+ ih_doorbell_range = REG_SET_FIELD(ih_doorbell_range,
+ S2A_DOORBELL_ENTRY_1_CTRL,
+ S2A_DOORBELL_PORT1_RANGE_SIZE,
+ 0);
+
+ WREG32_SOC15(NBIO, 0, regS2A_DOORBELL_ENTRY_1_CTRL, ih_doorbell_range);
+}
+
+static void nbio_v4_3_ih_control(struct amdgpu_device *adev)
+{
+ u32 interrupt_cntl;
+
+ /* setup interrupt control */
+ WREG32_SOC15(NBIO, 0, regBIF_BX0_INTERRUPT_CNTL2, adev->dummy_page_addr >> 8);
+
+ interrupt_cntl = RREG32_SOC15(NBIO, 0, regBIF_BX0_INTERRUPT_CNTL);
+ /*
+ * BIF_BX0_INTERRUPT_CNTL__IH_DUMMY_RD_OVERRIDE_MASK=0 - dummy read disabled with msi, enabled without msi
+ * BIF_BX0_INTERRUPT_CNTL__IH_DUMMY_RD_OVERRIDE_MASK=1 - dummy read controlled by IH_DUMMY_RD_EN
+ */
+ interrupt_cntl = REG_SET_FIELD(interrupt_cntl, BIF_BX0_INTERRUPT_CNTL,
+ IH_DUMMY_RD_OVERRIDE, 0);
+
+ /* BIF_BX0_INTERRUPT_CNTL__IH_REQ_NONSNOOP_EN_MASK=1 if ring is in non-cacheable memory, e.g., vram */
+ interrupt_cntl = REG_SET_FIELD(interrupt_cntl, BIF_BX0_INTERRUPT_CNTL,
+ IH_REQ_NONSNOOP_EN, 0);
+
+ WREG32_SOC15(NBIO, 0, regBIF_BX0_INTERRUPT_CNTL, interrupt_cntl);
+}
+
+static void nbio_v4_3_update_medium_grain_clock_gating(struct amdgpu_device *adev,
+ bool enable)
+{
+ uint32_t def, data;
+
+ if (enable && !(adev->cg_flags & AMD_CG_SUPPORT_BIF_MGCG))
+ return;
+
+ def = data = RREG32_SOC15(NBIO, 0, regCPM_CONTROL);
+ if (enable) {
+ data |= (CPM_CONTROL__LCLK_DYN_GATE_ENABLE_MASK |
+ CPM_CONTROL__TXCLK_DYN_GATE_ENABLE_MASK |
+ CPM_CONTROL__TXCLK_LCNT_GATE_ENABLE_MASK |
+ CPM_CONTROL__TXCLK_REGS_GATE_ENABLE_MASK |
+ CPM_CONTROL__TXCLK_PRBS_GATE_ENABLE_MASK |
+ CPM_CONTROL__REFCLK_REGS_GATE_ENABLE_MASK);
+ } else {
+ data &= ~(CPM_CONTROL__LCLK_DYN_GATE_ENABLE_MASK |
+ CPM_CONTROL__TXCLK_DYN_GATE_ENABLE_MASK |
+ CPM_CONTROL__TXCLK_LCNT_GATE_ENABLE_MASK |
+ CPM_CONTROL__TXCLK_REGS_GATE_ENABLE_MASK |
+ CPM_CONTROL__TXCLK_PRBS_GATE_ENABLE_MASK |
+ CPM_CONTROL__REFCLK_REGS_GATE_ENABLE_MASK);
+ }
+
+ if (def != data)
+ WREG32_SOC15(NBIO, 0, regCPM_CONTROL, data);
+}
+
+static void nbio_v4_3_update_medium_grain_light_sleep(struct amdgpu_device *adev,
+ bool enable)
+{
+ uint32_t def, data;
+
+ if (enable && !(adev->cg_flags & AMD_CG_SUPPORT_BIF_LS))
+ return;
+
+ /* TODO: need update in future */
+ def = data = RREG32_SOC15(NBIO, 0, regPCIE_CNTL2);
+ if (enable) {
+ data |= PCIE_CNTL2__SLV_MEM_LS_EN_MASK;
+ } else {
+ data &= ~PCIE_CNTL2__SLV_MEM_LS_EN_MASK;
+ }
+
+ if (def != data)
+ WREG32_SOC15(NBIO, 0, regPCIE_CNTL2, data);
+}
+
+static void nbio_v4_3_get_clockgating_state(struct amdgpu_device *adev,
+ u64 *flags)
+{
+ int data;
+
+ /* AMD_CG_SUPPORT_BIF_MGCG */
+ data = RREG32_SOC15(NBIO, 0, regCPM_CONTROL);
+ if (data & CPM_CONTROL__LCLK_DYN_GATE_ENABLE_MASK)
+ *flags |= AMD_CG_SUPPORT_BIF_MGCG;
+
+ /* AMD_CG_SUPPORT_BIF_LS */
+ data = RREG32_SOC15(NBIO, 0, regPCIE_CNTL2);
+ if (data & PCIE_CNTL2__SLV_MEM_LS_EN_MASK)
+ *flags |= AMD_CG_SUPPORT_BIF_LS;
+}
+
+static u32 nbio_v4_3_get_hdp_flush_req_offset(struct amdgpu_device *adev)
+{
+ return SOC15_REG_OFFSET(NBIO, 0, regBIF_BX_PF0_GPU_HDP_FLUSH_REQ);
+}
+
+static u32 nbio_v4_3_get_hdp_flush_done_offset(struct amdgpu_device *adev)
+{
+ return SOC15_REG_OFFSET(NBIO, 0, regBIF_BX_PF0_GPU_HDP_FLUSH_DONE);
+}
+
+static u32 nbio_v4_3_get_pcie_index_offset(struct amdgpu_device *adev)
+{
+ return SOC15_REG_OFFSET(NBIO, 0, regBIF_BX_PF0_RSMU_INDEX);
+}
+
+static u32 nbio_v4_3_get_pcie_data_offset(struct amdgpu_device *adev)
+{
+ return SOC15_REG_OFFSET(NBIO, 0, regBIF_BX_PF0_RSMU_DATA);
+}
+
+const struct nbio_hdp_flush_reg nbio_v4_3_hdp_flush_reg = {
+ .ref_and_mask_cp0 = BIF_BX_PF_GPU_HDP_FLUSH_DONE__CP0_MASK,
+ .ref_and_mask_cp1 = BIF_BX_PF_GPU_HDP_FLUSH_DONE__CP1_MASK,
+ .ref_and_mask_cp2 = BIF_BX_PF_GPU_HDP_FLUSH_DONE__CP2_MASK,
+ .ref_and_mask_cp3 = BIF_BX_PF_GPU_HDP_FLUSH_DONE__CP3_MASK,
+ .ref_and_mask_cp4 = BIF_BX_PF_GPU_HDP_FLUSH_DONE__CP4_MASK,
+ .ref_and_mask_cp5 = BIF_BX_PF_GPU_HDP_FLUSH_DONE__CP5_MASK,
+ .ref_and_mask_cp6 = BIF_BX_PF_GPU_HDP_FLUSH_DONE__CP6_MASK,
+ .ref_and_mask_cp7 = BIF_BX_PF_GPU_HDP_FLUSH_DONE__CP7_MASK,
+ .ref_and_mask_cp8 = BIF_BX_PF_GPU_HDP_FLUSH_DONE__CP8_MASK,
+ .ref_and_mask_cp9 = BIF_BX_PF_GPU_HDP_FLUSH_DONE__CP9_MASK,
+ .ref_and_mask_sdma0 = BIF_BX_PF_GPU_HDP_FLUSH_DONE__SDMA0_MASK,
+ .ref_and_mask_sdma1 = BIF_BX_PF_GPU_HDP_FLUSH_DONE__SDMA1_MASK,
+};
+
+static void nbio_v4_3_init_registers(struct amdgpu_device *adev)
+{
+ if (amdgpu_ip_version(adev, NBIO_HWIP, 0) == IP_VERSION(4, 3, 0)) {
+ uint32_t data;
+
+ data = RREG32_SOC15(NBIO, 0, regRCC_DEV0_EPF2_STRAP2);
+ data &= ~RCC_DEV0_EPF2_STRAP2__STRAP_NO_SOFT_RESET_DEV0_F2_MASK;
+ WREG32_SOC15(NBIO, 0, regRCC_DEV0_EPF2_STRAP2, data);
+ }
+}
+
+static u32 nbio_v4_3_get_rom_offset(struct amdgpu_device *adev)
+{
+ u32 data, rom_offset;
+
+ data = RREG32_SOC15(NBIO, 0, regREGS_ROM_OFFSET_CTRL);
+ rom_offset = REG_GET_FIELD(data, REGS_ROM_OFFSET_CTRL, ROM_OFFSET);
+
+ return rom_offset;
+}
+
+#ifdef CONFIG_PCIEASPM
+static void nbio_v4_3_program_ltr(struct amdgpu_device *adev)
+{
+ uint32_t def, data;
+
+ def = RREG32_SOC15(NBIO, 0, regRCC_EP_DEV0_0_EP_PCIE_TX_LTR_CNTL);
+ data = 0x35EB;
+ data &= ~EP_PCIE_TX_LTR_CNTL__LTR_PRIV_MSG_DIS_IN_PM_NON_D0_MASK;
+ data &= ~EP_PCIE_TX_LTR_CNTL__LTR_PRIV_RST_LTR_IN_DL_DOWN_MASK;
+ if (def != data)
+ WREG32_SOC15(NBIO, 0, regRCC_EP_DEV0_0_EP_PCIE_TX_LTR_CNTL, data);
+
+ def = data = RREG32_SOC15(NBIO, 0, regRCC_STRAP0_RCC_BIF_STRAP2);
+ data &= ~RCC_BIF_STRAP2__STRAP_LTR_IN_ASPML1_DIS_MASK;
+ if (def != data)
+ WREG32_SOC15(NBIO, 0, regRCC_STRAP0_RCC_BIF_STRAP2, data);
+
+ def = data = RREG32_SOC15(NBIO, 0, regBIF_CFG_DEV0_EPF0_DEVICE_CNTL2);
+ if (adev->pdev->ltr_path)
+ data |= BIF_CFG_DEV0_EPF0_DEVICE_CNTL2__LTR_EN_MASK;
+ else
+ data &= ~BIF_CFG_DEV0_EPF0_DEVICE_CNTL2__LTR_EN_MASK;
+ if (def != data)
+ WREG32_SOC15(NBIO, 0, regBIF_CFG_DEV0_EPF0_DEVICE_CNTL2, data);
+}
+#endif
+
+static void nbio_v4_3_program_aspm(struct amdgpu_device *adev)
+{
+#ifdef CONFIG_PCIEASPM
+ uint32_t def, data;
+
+ if (!(amdgpu_ip_version(adev, PCIE_HWIP, 0) == IP_VERSION(7, 4, 0)) &&
+ !(amdgpu_ip_version(adev, PCIE_HWIP, 0) == IP_VERSION(7, 6, 0)))
+ return;
+
+ def = data = RREG32_SOC15(NBIO, 0, regPCIE_LC_CNTL);
+ data &= ~PCIE_LC_CNTL__LC_L1_INACTIVITY_MASK;
+ data &= ~PCIE_LC_CNTL__LC_L0S_INACTIVITY_MASK;
+ data |= PCIE_LC_CNTL__LC_PMI_TO_L1_DIS_MASK;
+ if (def != data)
+ WREG32_SOC15(NBIO, 0, regPCIE_LC_CNTL, data);
+
+ def = data = RREG32_SOC15(NBIO, 0, regPCIE_LC_CNTL7);
+ data |= PCIE_LC_CNTL7__LC_NBIF_ASPM_INPUT_EN_MASK;
+ if (def != data)
+ WREG32_SOC15(NBIO, 0, regPCIE_LC_CNTL7, data);
+
+ def = data = RREG32_SOC15(NBIO, 0, regPCIE_LC_CNTL3);
+ data |= PCIE_LC_CNTL3__LC_DSC_DONT_ENTER_L23_AFTER_PME_ACK_MASK;
+ if (def != data)
+ WREG32_SOC15(NBIO, 0, regPCIE_LC_CNTL3, data);
+
+ def = data = RREG32_SOC15(NBIO, 0, regRCC_STRAP0_RCC_BIF_STRAP3);
+ data &= ~RCC_BIF_STRAP3__STRAP_VLINK_ASPM_IDLE_TIMER_MASK;
+ data &= ~RCC_BIF_STRAP3__STRAP_VLINK_PM_L1_ENTRY_TIMER_MASK;
+ if (def != data)
+ WREG32_SOC15(NBIO, 0, regRCC_STRAP0_RCC_BIF_STRAP3, data);
+
+ def = data = RREG32_SOC15(NBIO, 0, regRCC_STRAP0_RCC_BIF_STRAP5);
+ data &= ~RCC_BIF_STRAP5__STRAP_VLINK_LDN_ENTRY_TIMER_MASK;
+ if (def != data)
+ WREG32_SOC15(NBIO, 0, regRCC_STRAP0_RCC_BIF_STRAP5, data);
+
+ def = data = RREG32_SOC15(NBIO, 0, regBIF_CFG_DEV0_EPF0_DEVICE_CNTL2);
+ data &= ~BIF_CFG_DEV0_EPF0_DEVICE_CNTL2__LTR_EN_MASK;
+ if (def != data)
+ WREG32_SOC15(NBIO, 0, regBIF_CFG_DEV0_EPF0_DEVICE_CNTL2, data);
+
+ WREG32_SOC15(NBIO, 0, regBIF_CFG_DEV0_EPF0_PCIE_LTR_CAP, 0x10011001);
+
+ def = data = RREG32_SOC15(NBIO, 0, regPSWUSP0_PCIE_LC_CNTL2);
+ data |= PSWUSP0_PCIE_LC_CNTL2__LC_ALLOW_PDWN_IN_L1_MASK |
+ PSWUSP0_PCIE_LC_CNTL2__LC_ALLOW_PDWN_IN_L23_MASK;
+ data &= ~PSWUSP0_PCIE_LC_CNTL2__LC_RCV_L0_TO_RCV_L0S_DIS_MASK;
+ if (def != data)
+ WREG32_SOC15(NBIO, 0, regPSWUSP0_PCIE_LC_CNTL2, data);
+
+ def = data = RREG32_SOC15(NBIO, 0, regPCIE_LC_CNTL4);
+ data |= PCIE_LC_CNTL4__LC_L1_POWERDOWN_MASK;
+ if (def != data)
+ WREG32_SOC15(NBIO, 0, regPCIE_LC_CNTL4, data);
+
+ def = data = RREG32_SOC15(NBIO, 0, regPCIE_LC_RXRECOVER_RXSTANDBY_CNTL);
+ data |= PCIE_LC_RXRECOVER_RXSTANDBY_CNTL__LC_RX_L0S_STANDBY_EN_MASK;
+ if (def != data)
+ WREG32_SOC15(NBIO, 0, regPCIE_LC_RXRECOVER_RXSTANDBY_CNTL, data);
+
+ nbio_v4_3_program_ltr(adev);
+
+ def = data = RREG32_SOC15(NBIO, 0, regRCC_STRAP0_RCC_BIF_STRAP3);
+ data |= 0x5DE0 << RCC_BIF_STRAP3__STRAP_VLINK_ASPM_IDLE_TIMER__SHIFT;
+ data |= 0x0010 << RCC_BIF_STRAP3__STRAP_VLINK_PM_L1_ENTRY_TIMER__SHIFT;
+ if (def != data)
+ WREG32_SOC15(NBIO, 0, regRCC_STRAP0_RCC_BIF_STRAP3, data);
+
+ def = data = RREG32_SOC15(NBIO, 0, regRCC_STRAP0_RCC_BIF_STRAP5);
+ data |= 0x0010 << RCC_BIF_STRAP5__STRAP_VLINK_LDN_ENTRY_TIMER__SHIFT;
+ if (def != data)
+ WREG32_SOC15(NBIO, 0, regRCC_STRAP0_RCC_BIF_STRAP5, data);
+
+ def = data = RREG32_SOC15(NBIO, 0, regPCIE_LC_CNTL);
+ data |= 0x0 << PCIE_LC_CNTL__LC_L0S_INACTIVITY__SHIFT;
+ data |= 0x9 << PCIE_LC_CNTL__LC_L1_INACTIVITY__SHIFT;
+ data &= ~PCIE_LC_CNTL__LC_PMI_TO_L1_DIS_MASK;
+ if (def != data)
+ WREG32_SOC15(NBIO, 0, regPCIE_LC_CNTL, data);
+
+ def = data = RREG32_SOC15(NBIO, 0, regPCIE_LC_CNTL3);
+ data &= ~PCIE_LC_CNTL3__LC_DSC_DONT_ENTER_L23_AFTER_PME_ACK_MASK;
+ if (def != data)
+ WREG32_SOC15(NBIO, 0, regPCIE_LC_CNTL3, data);
+#endif
+}
+
+#define MMIO_REG_HOLE_OFFSET (0x80000 - PAGE_SIZE)
+
+static void nbio_v4_3_set_reg_remap(struct amdgpu_device *adev)
+{
+ if (!amdgpu_sriov_vf(adev) && (PAGE_SIZE <= 4096)) {
+ adev->rmmio_remap.reg_offset = MMIO_REG_HOLE_OFFSET;
+ adev->rmmio_remap.bus_addr = adev->rmmio_base + MMIO_REG_HOLE_OFFSET;
+ } else {
+ adev->rmmio_remap.reg_offset = SOC15_REG_OFFSET(NBIO, 0,
+ regBIF_BX_DEV0_EPF0_VF0_HDP_MEM_COHERENCY_FLUSH_CNTL) << 2;
+ adev->rmmio_remap.bus_addr = 0;
+ }
+}
+
+const struct amdgpu_nbio_funcs nbio_v4_3_funcs = {
+ .get_hdp_flush_req_offset = nbio_v4_3_get_hdp_flush_req_offset,
+ .get_hdp_flush_done_offset = nbio_v4_3_get_hdp_flush_done_offset,
+ .get_pcie_index_offset = nbio_v4_3_get_pcie_index_offset,
+ .get_pcie_data_offset = nbio_v4_3_get_pcie_data_offset,
+ .get_rev_id = nbio_v4_3_get_rev_id,
+ .mc_access_enable = nbio_v4_3_mc_access_enable,
+ .get_memsize = nbio_v4_3_get_memsize,
+ .sdma_doorbell_range = nbio_v4_3_sdma_doorbell_range,
+ .vcn_doorbell_range = nbio_v4_3_vcn_doorbell_range,
+ .gc_doorbell_init = nbio_v4_3_gc_doorbell_init,
+ .enable_doorbell_aperture = nbio_v4_3_enable_doorbell_aperture,
+ .enable_doorbell_selfring_aperture = nbio_v4_3_enable_doorbell_selfring_aperture,
+ .ih_doorbell_range = nbio_v4_3_ih_doorbell_range,
+ .update_medium_grain_clock_gating = nbio_v4_3_update_medium_grain_clock_gating,
+ .update_medium_grain_light_sleep = nbio_v4_3_update_medium_grain_light_sleep,
+ .get_clockgating_state = nbio_v4_3_get_clockgating_state,
+ .ih_control = nbio_v4_3_ih_control,
+ .init_registers = nbio_v4_3_init_registers,
+ .remap_hdp_registers = nbio_v4_3_remap_hdp_registers,
+ .get_rom_offset = nbio_v4_3_get_rom_offset,
+ .program_aspm = nbio_v4_3_program_aspm,
+ .set_reg_remap = nbio_v4_3_set_reg_remap,
+};
+
+
+static void nbio_v4_3_sriov_ih_doorbell_range(struct amdgpu_device *adev,
+ bool use_doorbell, int doorbell_index)
+{
+}
+
+static void nbio_v4_3_sriov_sdma_doorbell_range(struct amdgpu_device *adev, int instance,
+ bool use_doorbell, int doorbell_index,
+ int doorbell_size)
+{
+}
+
+static void nbio_v4_3_sriov_vcn_doorbell_range(struct amdgpu_device *adev, bool use_doorbell,
+ int doorbell_index, int instance)
+{
+}
+
+static void nbio_v4_3_sriov_gc_doorbell_init(struct amdgpu_device *adev)
+{
+}
+
+const struct amdgpu_nbio_funcs nbio_v4_3_sriov_funcs = {
+ .get_hdp_flush_req_offset = nbio_v4_3_get_hdp_flush_req_offset,
+ .get_hdp_flush_done_offset = nbio_v4_3_get_hdp_flush_done_offset,
+ .get_pcie_index_offset = nbio_v4_3_get_pcie_index_offset,
+ .get_pcie_data_offset = nbio_v4_3_get_pcie_data_offset,
+ .get_rev_id = nbio_v4_3_get_rev_id,
+ .mc_access_enable = nbio_v4_3_mc_access_enable,
+ .get_memsize = nbio_v4_3_get_memsize,
+ .sdma_doorbell_range = nbio_v4_3_sriov_sdma_doorbell_range,
+ .vcn_doorbell_range = nbio_v4_3_sriov_vcn_doorbell_range,
+ .gc_doorbell_init = nbio_v4_3_sriov_gc_doorbell_init,
+ .enable_doorbell_aperture = nbio_v4_3_enable_doorbell_aperture,
+ .enable_doorbell_selfring_aperture = nbio_v4_3_enable_doorbell_selfring_aperture,
+ .ih_doorbell_range = nbio_v4_3_sriov_ih_doorbell_range,
+ .update_medium_grain_clock_gating = nbio_v4_3_update_medium_grain_clock_gating,
+ .update_medium_grain_light_sleep = nbio_v4_3_update_medium_grain_light_sleep,
+ .get_clockgating_state = nbio_v4_3_get_clockgating_state,
+ .ih_control = nbio_v4_3_ih_control,
+ .init_registers = nbio_v4_3_init_registers,
+ .remap_hdp_registers = nbio_v4_3_remap_hdp_registers,
+ .get_rom_offset = nbio_v4_3_get_rom_offset,
+ .set_reg_remap = nbio_v4_3_set_reg_remap,
+};
+
+static int nbio_v4_3_set_ras_err_event_athub_irq_state(struct amdgpu_device *adev,
+ struct amdgpu_irq_src *src,
+ unsigned type,
+ enum amdgpu_interrupt_state state)
+{
+ /* The ras_controller_irq enablement should be done in psp bl when it
+ * tries to enable ras feature. Driver only need to set the correct interrupt
+ * vector for bare-metal and sriov use case respectively
+ */
+ uint32_t bif_doorbell_int_cntl;
+
+ bif_doorbell_int_cntl = RREG32_SOC15(NBIO, 0, regBIF_BX0_BIF_DOORBELL_INT_CNTL);
+ bif_doorbell_int_cntl = REG_SET_FIELD(bif_doorbell_int_cntl,
+ BIF_BX0_BIF_DOORBELL_INT_CNTL,
+ RAS_ATHUB_ERR_EVENT_INTERRUPT_DISABLE,
+ (state == AMDGPU_IRQ_STATE_ENABLE) ? 0 : 1);
+ WREG32_SOC15(NBIO, 0, regBIF_BX0_BIF_DOORBELL_INT_CNTL, bif_doorbell_int_cntl);
+
+ return 0;
+}
+
+static int nbio_v4_3_process_err_event_athub_irq(struct amdgpu_device *adev,
+ struct amdgpu_irq_src *source,
+ struct amdgpu_iv_entry *entry)
+{
+ /* By design, the ih cookie for err_event_athub_irq should be written
+ * to bif ring. since bif ring is not enabled, just leave process callback
+ * as a dummy one.
+ */
+ return 0;
+}
+
+static const struct amdgpu_irq_src_funcs nbio_v4_3_ras_err_event_athub_irq_funcs = {
+ .set = nbio_v4_3_set_ras_err_event_athub_irq_state,
+ .process = nbio_v4_3_process_err_event_athub_irq,
+};
+
+static void nbio_v4_3_handle_ras_err_event_athub_intr_no_bifring(struct amdgpu_device *adev)
+{
+ uint32_t bif_doorbell_int_cntl;
+
+ bif_doorbell_int_cntl = RREG32_SOC15(NBIO, 0, regBIF_BX0_BIF_DOORBELL_INT_CNTL);
+ if (REG_GET_FIELD(bif_doorbell_int_cntl,
+ BIF_DOORBELL_INT_CNTL,
+ RAS_ATHUB_ERR_EVENT_INTERRUPT_STATUS)) {
+ /* driver has to clear the interrupt status when bif ring is disabled */
+ bif_doorbell_int_cntl = REG_SET_FIELD(bif_doorbell_int_cntl,
+ BIF_DOORBELL_INT_CNTL,
+ RAS_ATHUB_ERR_EVENT_INTERRUPT_CLEAR, 1);
+ WREG32_SOC15(NBIO, 0, regBIF_BX0_BIF_DOORBELL_INT_CNTL, bif_doorbell_int_cntl);
+ amdgpu_ras_global_ras_isr(adev);
+ }
+}
+
+static int nbio_v4_3_init_ras_err_event_athub_interrupt(struct amdgpu_device *adev)
+{
+
+ int r;
+
+ /* init the irq funcs */
+ adev->nbio.ras_err_event_athub_irq.funcs =
+ &nbio_v4_3_ras_err_event_athub_irq_funcs;
+ adev->nbio.ras_err_event_athub_irq.num_types = 1;
+
+ /* register ras err event athub interrupt
+ * nbio v4_3 uses the same irq source as nbio v7_4 */
+ r = amdgpu_irq_add_id(adev, SOC21_IH_CLIENTID_BIF,
+ NBIF_7_4__SRCID__ERREVENT_ATHUB_INTERRUPT,
+ &adev->nbio.ras_err_event_athub_irq);
+
+ return r;
+}
+
+struct amdgpu_nbio_ras nbio_v4_3_ras = {
+ .handle_ras_err_event_athub_intr_no_bifring = nbio_v4_3_handle_ras_err_event_athub_intr_no_bifring,
+ .init_ras_err_event_athub_interrupt = nbio_v4_3_init_ras_err_event_athub_interrupt,
+};
diff --git a/drivers/gpu/drm/amd/amdgpu/nbio_v4_3.h b/drivers/gpu/drm/amd/amdgpu/nbio_v4_3.h
new file mode 100644
index 000000000000..399037cdf4fb
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/nbio_v4_3.h
@@ -0,0 +1,34 @@
+/*
+ * Copyright 2021 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#ifndef __NBIO_V4_3_H__
+#define __NBIO_V4_3_H__
+
+#include "soc15_common.h"
+
+extern const struct nbio_hdp_flush_reg nbio_v4_3_hdp_flush_reg;
+extern const struct amdgpu_nbio_funcs nbio_v4_3_funcs;
+extern const struct amdgpu_nbio_funcs nbio_v4_3_sriov_funcs;
+extern struct amdgpu_nbio_ras nbio_v4_3_ras;
+
+#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/nbio_v6_1.c b/drivers/gpu/drm/amd/amdgpu/nbio_v6_1.c
index 83ea063388fd..e911368c1aeb 100644
--- a/drivers/gpu/drm/amd/amdgpu/nbio_v6_1.c
+++ b/drivers/gpu/drm/amd/amdgpu/nbio_v6_1.c
@@ -21,7 +21,6 @@
*
*/
#include "amdgpu.h"
-#include "amdgpu_atombios.h"
#include "nbio_v6_1.h"
#include "nbio/nbio_6_1_default.h"
@@ -31,6 +30,28 @@
#include "vega10_enum.h"
#include <uapi/linux/kfd_ioctl.h>
+#define smnPCIE_LC_CNTL 0x11140280
+#define smnPCIE_LC_CNTL3 0x111402d4
+#define smnPCIE_LC_CNTL6 0x111402ec
+#define smnPCIE_LC_CNTL7 0x111402f0
+#define smnNBIF_MGCG_CTRL_LCLK 0x1013a05c
+#define NBIF_MGCG_CTRL_LCLK__NBIF_MGCG_REG_DIS_LCLK_MASK 0x00001000L
+#define RCC_BIF_STRAP3__STRAP_VLINK_ASPM_IDLE_TIMER_MASK 0x0000FFFFL
+#define RCC_BIF_STRAP3__STRAP_VLINK_PM_L1_ENTRY_TIMER_MASK 0xFFFF0000L
+#define smnRCC_EP_DEV0_0_EP_PCIE_TX_LTR_CNTL 0x10123530
+#define smnBIF_CFG_DEV0_EPF0_DEVICE_CNTL2 0x1014008c
+#define smnBIF_CFG_DEV0_EPF0_PCIE_LTR_CAP 0x10140324
+#define smnPSWUSP0_PCIE_LC_CNTL2 0x111402c4
+#define smnRCC_BIF_STRAP2 0x10123488
+#define smnRCC_BIF_STRAP3 0x1012348c
+#define smnRCC_BIF_STRAP5 0x10123494
+#define BIF_CFG_DEV0_EPF0_DEVICE_CNTL2__LTR_EN_MASK 0x0400L
+#define RCC_BIF_STRAP5__STRAP_VLINK_LDN_ENTRY_TIMER_MASK 0x0000FFFFL
+#define RCC_BIF_STRAP2__STRAP_LTR_IN_ASPML1_DIS_MASK 0x00004000L
+#define RCC_BIF_STRAP3__STRAP_VLINK_ASPM_IDLE_TIMER__SHIFT 0x0
+#define RCC_BIF_STRAP3__STRAP_VLINK_PM_L1_ENTRY_TIMER__SHIFT 0x10
+#define RCC_BIF_STRAP5__STRAP_VLINK_LDN_ENTRY_TIMER__SHIFT 0x0
+
static void nbio_v6_1_remap_hdp_registers(struct amdgpu_device *adev)
{
WREG32_SOC15(NBIO, 0, mmREMAP_HDP_MEM_FLUSH_CNTL,
@@ -188,7 +209,7 @@ static void nbio_v6_1_update_medium_grain_light_sleep(struct amdgpu_device *adev
}
static void nbio_v6_1_get_clockgating_state(struct amdgpu_device *adev,
- u32 *flags)
+ u64 *flags)
{
int data;
@@ -256,6 +277,133 @@ static void nbio_v6_1_init_registers(struct amdgpu_device *adev)
WREG32_PCIE(smnPCIE_CI_CNTL, data);
}
+#ifdef CONFIG_PCIEASPM
+static void nbio_v6_1_program_ltr(struct amdgpu_device *adev)
+{
+ uint32_t def, data;
+
+ WREG32_PCIE(smnRCC_EP_DEV0_0_EP_PCIE_TX_LTR_CNTL, 0x75EB);
+
+ def = data = RREG32_PCIE(smnRCC_BIF_STRAP2);
+ data &= ~RCC_BIF_STRAP2__STRAP_LTR_IN_ASPML1_DIS_MASK;
+ if (def != data)
+ WREG32_PCIE(smnRCC_BIF_STRAP2, data);
+
+ def = data = RREG32_PCIE(smnRCC_EP_DEV0_0_EP_PCIE_TX_LTR_CNTL);
+ data &= ~EP_PCIE_TX_LTR_CNTL__LTR_PRIV_MSG_DIS_IN_PM_NON_D0_MASK;
+ if (def != data)
+ WREG32_PCIE(smnRCC_EP_DEV0_0_EP_PCIE_TX_LTR_CNTL, data);
+
+ def = data = RREG32_PCIE(smnBIF_CFG_DEV0_EPF0_DEVICE_CNTL2);
+ data |= BIF_CFG_DEV0_EPF0_DEVICE_CNTL2__LTR_EN_MASK;
+ if (def != data)
+ WREG32_PCIE(smnBIF_CFG_DEV0_EPF0_DEVICE_CNTL2, data);
+}
+#endif
+
+static void nbio_v6_1_program_aspm(struct amdgpu_device *adev)
+{
+#ifdef CONFIG_PCIEASPM
+ uint32_t def, data;
+
+ def = data = RREG32_PCIE(smnPCIE_LC_CNTL);
+ data &= ~PCIE_LC_CNTL__LC_L1_INACTIVITY_MASK;
+ data &= ~PCIE_LC_CNTL__LC_L0S_INACTIVITY_MASK;
+ data |= PCIE_LC_CNTL__LC_PMI_TO_L1_DIS_MASK;
+ if (def != data)
+ WREG32_PCIE(smnPCIE_LC_CNTL, data);
+
+ def = data = RREG32_PCIE(smnPCIE_LC_CNTL7);
+ data |= PCIE_LC_CNTL7__LC_NBIF_ASPM_INPUT_EN_MASK;
+ if (def != data)
+ WREG32_PCIE(smnPCIE_LC_CNTL7, data);
+
+ def = data = RREG32_PCIE(smnNBIF_MGCG_CTRL_LCLK);
+ data |= NBIF_MGCG_CTRL_LCLK__NBIF_MGCG_REG_DIS_LCLK_MASK;
+ if (def != data)
+ WREG32_PCIE(smnNBIF_MGCG_CTRL_LCLK, data);
+
+ def = data = RREG32_PCIE(smnPCIE_LC_CNTL3);
+ data |= PCIE_LC_CNTL3__LC_DSC_DONT_ENTER_L23_AFTER_PME_ACK_MASK;
+ if (def != data)
+ WREG32_PCIE(smnPCIE_LC_CNTL3, data);
+
+ def = data = RREG32_PCIE(smnRCC_BIF_STRAP3);
+ data &= ~RCC_BIF_STRAP3__STRAP_VLINK_ASPM_IDLE_TIMER_MASK;
+ data &= ~RCC_BIF_STRAP3__STRAP_VLINK_PM_L1_ENTRY_TIMER_MASK;
+ if (def != data)
+ WREG32_PCIE(smnRCC_BIF_STRAP3, data);
+
+ def = data = RREG32_PCIE(smnRCC_BIF_STRAP5);
+ data &= ~RCC_BIF_STRAP5__STRAP_VLINK_LDN_ENTRY_TIMER_MASK;
+ if (def != data)
+ WREG32_PCIE(smnRCC_BIF_STRAP5, data);
+
+ def = data = RREG32_PCIE(smnBIF_CFG_DEV0_EPF0_DEVICE_CNTL2);
+ data &= ~BIF_CFG_DEV0_EPF0_DEVICE_CNTL2__LTR_EN_MASK;
+ if (def != data)
+ WREG32_PCIE(smnBIF_CFG_DEV0_EPF0_DEVICE_CNTL2, data);
+
+ WREG32_PCIE(smnBIF_CFG_DEV0_EPF0_PCIE_LTR_CAP, 0x10011001);
+
+ def = data = RREG32_PCIE(smnPSWUSP0_PCIE_LC_CNTL2);
+ data |= PSWUSP0_PCIE_LC_CNTL2__LC_ALLOW_PDWN_IN_L1_MASK |
+ PSWUSP0_PCIE_LC_CNTL2__LC_ALLOW_PDWN_IN_L23_MASK;
+ data &= ~PSWUSP0_PCIE_LC_CNTL2__LC_RCV_L0_TO_RCV_L0S_DIS_MASK;
+ if (def != data)
+ WREG32_PCIE(smnPSWUSP0_PCIE_LC_CNTL2, data);
+
+ def = data = RREG32_PCIE(smnPCIE_LC_CNTL6);
+ data |= PCIE_LC_CNTL6__LC_L1_POWERDOWN_MASK |
+ PCIE_LC_CNTL6__LC_RX_L0S_STANDBY_EN_MASK;
+ if (def != data)
+ WREG32_PCIE(smnPCIE_LC_CNTL6, data);
+
+ /* Don't bother about LTR if LTR is not enabled
+ * in the path */
+ if (adev->pdev->ltr_path)
+ nbio_v6_1_program_ltr(adev);
+
+ def = data = RREG32_PCIE(smnRCC_BIF_STRAP3);
+ data |= 0x5DE0 << RCC_BIF_STRAP3__STRAP_VLINK_ASPM_IDLE_TIMER__SHIFT;
+ data |= 0x0010 << RCC_BIF_STRAP3__STRAP_VLINK_PM_L1_ENTRY_TIMER__SHIFT;
+ if (def != data)
+ WREG32_PCIE(smnRCC_BIF_STRAP3, data);
+
+ def = data = RREG32_PCIE(smnRCC_BIF_STRAP5);
+ data |= 0x0010 << RCC_BIF_STRAP5__STRAP_VLINK_LDN_ENTRY_TIMER__SHIFT;
+ if (def != data)
+ WREG32_PCIE(smnRCC_BIF_STRAP5, data);
+
+ def = data = RREG32_PCIE(smnPCIE_LC_CNTL);
+ data &= ~PCIE_LC_CNTL__LC_L0S_INACTIVITY_MASK;
+ data |= 0x9 << PCIE_LC_CNTL__LC_L1_INACTIVITY__SHIFT;
+ data |= 0x1 << PCIE_LC_CNTL__LC_PMI_TO_L1_DIS__SHIFT;
+ if (def != data)
+ WREG32_PCIE(smnPCIE_LC_CNTL, data);
+
+ def = data = RREG32_PCIE(smnPCIE_LC_CNTL3);
+ data &= ~PCIE_LC_CNTL3__LC_DSC_DONT_ENTER_L23_AFTER_PME_ACK_MASK;
+ if (def != data)
+ WREG32_PCIE(smnPCIE_LC_CNTL3, data);
+#endif
+}
+
+#define MMIO_REG_HOLE_OFFSET (0x80000 - PAGE_SIZE)
+
+static void nbio_v6_1_set_reg_remap(struct amdgpu_device *adev)
+{
+ if (!amdgpu_sriov_vf(adev) && (PAGE_SIZE <= 4096)) {
+ adev->rmmio_remap.reg_offset = MMIO_REG_HOLE_OFFSET;
+ adev->rmmio_remap.bus_addr = adev->rmmio_base + MMIO_REG_HOLE_OFFSET;
+ } else {
+ adev->rmmio_remap.reg_offset =
+ SOC15_REG_OFFSET(NBIO, 0,
+ mmBIF_BX_DEV0_EPF0_VF0_HDP_MEM_COHERENCY_FLUSH_CNTL) << 2;
+ adev->rmmio_remap.bus_addr = 0;
+ }
+}
+
const struct amdgpu_nbio_funcs nbio_v6_1_funcs = {
.get_hdp_flush_req_offset = nbio_v6_1_get_hdp_flush_req_offset,
.get_hdp_flush_done_offset = nbio_v6_1_get_hdp_flush_done_offset,
@@ -274,4 +422,6 @@ const struct amdgpu_nbio_funcs nbio_v6_1_funcs = {
.ih_control = nbio_v6_1_ih_control,
.init_registers = nbio_v6_1_init_registers,
.remap_hdp_registers = nbio_v6_1_remap_hdp_registers,
+ .program_aspm = nbio_v6_1_program_aspm,
+ .set_reg_remap = nbio_v6_1_set_reg_remap,
};
diff --git a/drivers/gpu/drm/amd/amdgpu/nbio_v7_0.c b/drivers/gpu/drm/amd/amdgpu/nbio_v7_0.c
index 3c00666a13e1..1569a1e934ec 100644
--- a/drivers/gpu/drm/amd/amdgpu/nbio_v7_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/nbio_v7_0.c
@@ -21,7 +21,6 @@
*
*/
#include "amdgpu.h"
-#include "amdgpu_atombios.h"
#include "nbio_v7_0.h"
#include "nbio/nbio_7_0_default.h"
@@ -205,7 +204,7 @@ static void nbio_v7_0_update_medium_grain_light_sleep(struct amdgpu_device *adev
}
static void nbio_v7_0_get_clockgating_state(struct amdgpu_device *adev,
- u32 *flags)
+ u64 *flags)
{
int data;
@@ -271,9 +270,33 @@ const struct nbio_hdp_flush_reg nbio_v7_0_hdp_flush_reg = {
.ref_and_mask_sdma1 = GPU_HDP_FLUSH_DONE__SDMA1_MASK,
};
+#define regRCC_DEV0_EPF6_STRAP4 0xd304
+#define regRCC_DEV0_EPF6_STRAP4_BASE_IDX 5
+
static void nbio_v7_0_init_registers(struct amdgpu_device *adev)
{
+ uint32_t data;
+
+ switch (amdgpu_ip_version(adev, NBIO_HWIP, 0)) {
+ case IP_VERSION(2, 5, 0):
+ data = RREG32_SOC15(NBIO, 0, regRCC_DEV0_EPF6_STRAP4) & ~BIT(23);
+ WREG32_SOC15(NBIO, 0, regRCC_DEV0_EPF6_STRAP4, data);
+ break;
+ }
+}
+#define MMIO_REG_HOLE_OFFSET (0x80000 - PAGE_SIZE)
+
+static void nbio_v7_0_set_reg_remap(struct amdgpu_device *adev)
+{
+ if (!amdgpu_sriov_vf(adev) && (PAGE_SIZE <= 4096)) {
+ adev->rmmio_remap.reg_offset = MMIO_REG_HOLE_OFFSET;
+ adev->rmmio_remap.bus_addr = adev->rmmio_base + MMIO_REG_HOLE_OFFSET;
+ } else {
+ adev->rmmio_remap.reg_offset =
+ SOC15_REG_OFFSET(NBIO, 0, mmHDP_MEM_COHERENCY_FLUSH_CNTL) << 2;
+ adev->rmmio_remap.bus_addr = 0;
+ }
}
const struct amdgpu_nbio_funcs nbio_v7_0_funcs = {
@@ -295,4 +318,5 @@ const struct amdgpu_nbio_funcs nbio_v7_0_funcs = {
.ih_control = nbio_v7_0_ih_control,
.init_registers = nbio_v7_0_init_registers,
.remap_hdp_registers = nbio_v7_0_remap_hdp_registers,
+ .set_reg_remap = nbio_v7_0_set_reg_remap,
};
diff --git a/drivers/gpu/drm/amd/amdgpu/nbio_v7_11.c b/drivers/gpu/drm/amd/amdgpu/nbio_v7_11.c
new file mode 100644
index 000000000000..bed5ef4d8788
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/nbio_v7_11.c
@@ -0,0 +1,400 @@
+/*
+ * Copyright 2021 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+#include "amdgpu.h"
+#include "nbio_v7_11.h"
+
+#include "nbio/nbio_7_11_0_offset.h"
+#include "nbio/nbio_7_11_0_sh_mask.h"
+#include <uapi/linux/kfd_ioctl.h>
+
+static void nbio_v7_11_remap_hdp_registers(struct amdgpu_device *adev)
+{
+ WREG32_SOC15(NBIO, 0, regBIF_BX0_REMAP_HDP_MEM_FLUSH_CNTL,
+ adev->rmmio_remap.reg_offset + KFD_MMIO_REMAP_HDP_MEM_FLUSH_CNTL);
+ WREG32_SOC15(NBIO, 0, regBIF_BX0_REMAP_HDP_REG_FLUSH_CNTL,
+ adev->rmmio_remap.reg_offset + KFD_MMIO_REMAP_HDP_REG_FLUSH_CNTL);
+}
+
+static u32 nbio_v7_11_get_rev_id(struct amdgpu_device *adev)
+{
+ u32 tmp;
+
+ tmp = RREG32_SOC15(NBIO, 0, regRCC_STRAP1_RCC_DEV0_EPF0_STRAP0);
+ tmp &= RCC_STRAP0_RCC_DEV0_EPF0_STRAP0__STRAP_ATI_REV_ID_DEV0_F0_MASK;
+ tmp >>= RCC_STRAP0_RCC_DEV0_EPF0_STRAP0__STRAP_ATI_REV_ID_DEV0_F0__SHIFT;
+
+ return tmp;
+}
+
+static void nbio_v7_11_mc_access_enable(struct amdgpu_device *adev, bool enable)
+{
+ if (enable)
+ WREG32_SOC15(NBIO, 0, regBIF_BX1_BIF_FB_EN,
+ BIF_BX1_BIF_FB_EN__FB_READ_EN_MASK |
+ BIF_BX1_BIF_FB_EN__FB_WRITE_EN_MASK);
+ else
+ WREG32_SOC15(NBIO, 0, regBIF_BX1_BIF_FB_EN, 0);
+}
+
+static u32 nbio_v7_11_get_memsize(struct amdgpu_device *adev)
+{
+ return RREG32_SOC15(NBIO, 0, regRCC_DEV0_EPF0_0_RCC_CONFIG_MEMSIZE);
+}
+
+static void nbio_v7_11_sdma_doorbell_range(struct amdgpu_device *adev, int instance,
+ bool use_doorbell, int doorbell_index,
+ int doorbell_size)
+{
+ u32 reg = SOC15_REG_OFFSET(NBIO, 0, regGDC0_BIF_CSDMA_DOORBELL_RANGE);
+ u32 doorbell_range = RREG32_PCIE_PORT(reg);
+
+ if (use_doorbell) {
+ doorbell_range = REG_SET_FIELD(doorbell_range,
+ GDC0_BIF_CSDMA_DOORBELL_RANGE,
+ OFFSET, doorbell_index);
+ doorbell_range = REG_SET_FIELD(doorbell_range,
+ GDC0_BIF_CSDMA_DOORBELL_RANGE,
+ SIZE, doorbell_size);
+ } else {
+ doorbell_range = REG_SET_FIELD(doorbell_range,
+ GDC0_BIF_CSDMA_DOORBELL_RANGE,
+ SIZE, 0);
+ }
+
+ WREG32_PCIE_PORT(reg, doorbell_range);
+}
+
+static void nbio_v7_11_vpe_doorbell_range(struct amdgpu_device *adev, int instance,
+ bool use_doorbell, int doorbell_index,
+ int doorbell_size)
+{
+ u32 reg = instance == 0 ?
+ SOC15_REG_OFFSET(NBIO, 0, regGDC0_BIF_VPE_DOORBELL_RANGE) :
+ SOC15_REG_OFFSET(NBIO, 0, regGDC0_BIF_VPE1_DOORBELL_RANGE);
+ u32 doorbell_range = RREG32_PCIE_PORT(reg);
+
+ if (use_doorbell) {
+ doorbell_range = REG_SET_FIELD(doorbell_range,
+ GDC0_BIF_VPE_DOORBELL_RANGE,
+ OFFSET, doorbell_index);
+ doorbell_range = REG_SET_FIELD(doorbell_range,
+ GDC0_BIF_VPE_DOORBELL_RANGE,
+ SIZE, doorbell_size);
+ } else {
+ doorbell_range = REG_SET_FIELD(doorbell_range,
+ GDC0_BIF_VPE_DOORBELL_RANGE,
+ SIZE, 0);
+ }
+
+ WREG32_PCIE_PORT(reg, doorbell_range);
+}
+
+static void nbio_v7_11_vcn_doorbell_range(struct amdgpu_device *adev,
+ bool use_doorbell,
+ int doorbell_index, int instance)
+{
+ u32 reg = instance == 0 ?
+ SOC15_REG_OFFSET(NBIO, 0, regGDC0_BIF_VCN0_DOORBELL_RANGE):
+ SOC15_REG_OFFSET(NBIO, 0, regGDC0_BIF_VCN1_DOORBELL_RANGE);
+
+ u32 doorbell_range = RREG32_PCIE_PORT(reg);
+
+ if (use_doorbell) {
+ doorbell_range = REG_SET_FIELD(doorbell_range,
+ GDC0_BIF_VCN0_DOORBELL_RANGE, OFFSET,
+ doorbell_index);
+ doorbell_range = REG_SET_FIELD(doorbell_range,
+ GDC0_BIF_VCN0_DOORBELL_RANGE, SIZE, 8);
+ } else {
+ doorbell_range = REG_SET_FIELD(doorbell_range,
+ GDC0_BIF_VCN0_DOORBELL_RANGE, SIZE, 0);
+ }
+
+ WREG32_PCIE_PORT(reg, doorbell_range);
+}
+
+static void nbio_v7_11_enable_doorbell_aperture(struct amdgpu_device *adev,
+ bool enable)
+{
+ u32 reg;
+
+
+ reg = RREG32_SOC15(NBIO, 0, regRCC_DEV0_EPF0_0_RCC_DOORBELL_APER_EN);
+ reg = REG_SET_FIELD(reg, RCC_DEV0_EPF0_0_RCC_DOORBELL_APER_EN,
+ BIF_DOORBELL_APER_EN, enable ? 1 : 0);
+
+ WREG32_SOC15(NBIO, 0, regRCC_DEV0_EPF0_0_RCC_DOORBELL_APER_EN, reg);
+}
+
+static void nbio_v7_11_enable_doorbell_selfring_aperture(struct amdgpu_device *adev,
+ bool enable)
+{
+ u32 tmp = 0;
+
+ if (enable) {
+ tmp = REG_SET_FIELD(tmp, BIF_BX_PF1_DOORBELL_SELFRING_GPA_APER_CNTL,
+ DOORBELL_SELFRING_GPA_APER_EN, 1) |
+ REG_SET_FIELD(tmp, BIF_BX_PF1_DOORBELL_SELFRING_GPA_APER_CNTL,
+ DOORBELL_SELFRING_GPA_APER_MODE, 1) |
+ REG_SET_FIELD(tmp, BIF_BX_PF1_DOORBELL_SELFRING_GPA_APER_CNTL,
+ DOORBELL_SELFRING_GPA_APER_SIZE, 0);
+
+ WREG32_SOC15(NBIO, 0,
+ regBIF_BX_PF1_DOORBELL_SELFRING_GPA_APER_BASE_LOW,
+ lower_32_bits(adev->doorbell.base));
+ WREG32_SOC15(NBIO, 0,
+ regBIF_BX_PF1_DOORBELL_SELFRING_GPA_APER_BASE_HIGH,
+ upper_32_bits(adev->doorbell.base));
+ }
+
+ WREG32_SOC15(NBIO, 0, regBIF_BX_PF1_DOORBELL_SELFRING_GPA_APER_CNTL, tmp);
+}
+
+
+static void nbio_v7_11_ih_doorbell_range(struct amdgpu_device *adev,
+ bool use_doorbell, int doorbell_index)
+{
+ u32 ih_doorbell_range = RREG32_SOC15(NBIO, 0,regGDC0_BIF_IH_DOORBELL_RANGE);
+
+ if (use_doorbell) {
+ ih_doorbell_range = REG_SET_FIELD(ih_doorbell_range,
+ GDC0_BIF_IH_DOORBELL_RANGE, OFFSET,
+ doorbell_index);
+ ih_doorbell_range = REG_SET_FIELD(ih_doorbell_range,
+ GDC0_BIF_IH_DOORBELL_RANGE, SIZE,
+ 2);
+ } else {
+ ih_doorbell_range = REG_SET_FIELD(ih_doorbell_range,
+ GDC0_BIF_IH_DOORBELL_RANGE, SIZE,
+ 0);
+ }
+
+ WREG32_SOC15(NBIO, 0, regGDC0_BIF_IH_DOORBELL_RANGE,
+ ih_doorbell_range);
+}
+
+static void nbio_v7_11_ih_control(struct amdgpu_device *adev)
+{
+ u32 interrupt_cntl;
+
+ /* setup interrupt control */
+ WREG32_SOC15(NBIO, 0, regBIF_BX1_INTERRUPT_CNTL2,
+ adev->dummy_page_addr >> 8);
+
+ interrupt_cntl = RREG32_SOC15(NBIO, 0, regBIF_BX1_INTERRUPT_CNTL);
+ /*
+ * INTERRUPT_CNTL__IH_DUMMY_RD_OVERRIDE_MASK=0 - dummy read disabled with msi, enabled without msi
+ * INTERRUPT_CNTL__IH_DUMMY_RD_OVERRIDE_MASK=1 - dummy read controlled by IH_DUMMY_RD_EN
+ */
+ interrupt_cntl = REG_SET_FIELD(interrupt_cntl, BIF_BX1_INTERRUPT_CNTL,
+ IH_DUMMY_RD_OVERRIDE, 0);
+
+ /* INTERRUPT_CNTL__IH_REQ_NONSNOOP_EN_MASK=1 if ring is in non-cacheable memory, e.g., vram */
+ interrupt_cntl = REG_SET_FIELD(interrupt_cntl, BIF_BX1_INTERRUPT_CNTL,
+ IH_REQ_NONSNOOP_EN, 0);
+
+ WREG32_SOC15(NBIO, 0, regBIF_BX1_INTERRUPT_CNTL, interrupt_cntl);
+}
+
+static u32 nbio_v7_11_get_hdp_flush_req_offset(struct amdgpu_device *adev)
+{
+ return SOC15_REG_OFFSET(NBIO, 0, regBIF_BX_PF1_GPU_HDP_FLUSH_REQ);
+}
+
+static u32 nbio_v7_11_get_hdp_flush_done_offset(struct amdgpu_device *adev)
+{
+ return SOC15_REG_OFFSET(NBIO, 0, regBIF_BX_PF1_GPU_HDP_FLUSH_DONE);
+}
+
+static u32 nbio_v7_11_get_pcie_index_offset(struct amdgpu_device *adev)
+{
+ return SOC15_REG_OFFSET(NBIO, 0, regBIF_BX1_PCIE_INDEX2);
+}
+
+static u32 nbio_v7_11_get_pcie_data_offset(struct amdgpu_device *adev)
+{
+ return SOC15_REG_OFFSET(NBIO, 0, regBIF_BX1_PCIE_DATA2);
+}
+
+static u32 nbio_v7_11_get_pcie_port_index_offset(struct amdgpu_device *adev)
+{
+ return SOC15_REG_OFFSET(NBIO, 0, regBIF_BX_PF1_RSMU_INDEX);
+}
+
+static u32 nbio_v7_11_get_pcie_port_data_offset(struct amdgpu_device *adev)
+{
+ return SOC15_REG_OFFSET(NBIO, 0, regBIF_BX_PF1_RSMU_DATA);
+}
+
+const struct nbio_hdp_flush_reg nbio_v7_11_hdp_flush_reg = {
+ .ref_and_mask_cp0 = BIF_BX_PF1_GPU_HDP_FLUSH_DONE__CP0_MASK,
+ .ref_and_mask_cp1 = BIF_BX_PF1_GPU_HDP_FLUSH_DONE__CP1_MASK,
+ .ref_and_mask_cp2 = BIF_BX_PF1_GPU_HDP_FLUSH_DONE__CP2_MASK,
+ .ref_and_mask_cp3 = BIF_BX_PF1_GPU_HDP_FLUSH_DONE__CP3_MASK,
+ .ref_and_mask_cp4 = BIF_BX_PF1_GPU_HDP_FLUSH_DONE__CP4_MASK,
+ .ref_and_mask_cp5 = BIF_BX_PF1_GPU_HDP_FLUSH_DONE__CP5_MASK,
+ .ref_and_mask_cp6 = BIF_BX_PF1_GPU_HDP_FLUSH_DONE__CP6_MASK,
+ .ref_and_mask_cp7 = BIF_BX_PF1_GPU_HDP_FLUSH_DONE__CP7_MASK,
+ .ref_and_mask_cp8 = BIF_BX_PF1_GPU_HDP_FLUSH_DONE__CP8_MASK,
+ .ref_and_mask_cp9 = BIF_BX_PF1_GPU_HDP_FLUSH_DONE__CP9_MASK,
+ .ref_and_mask_sdma0 = BIF_BX_PF1_GPU_HDP_FLUSH_DONE__SDMA0_MASK,
+ .ref_and_mask_sdma1 = BIF_BX_PF1_GPU_HDP_FLUSH_DONE__SDMA1_MASK,
+};
+
+static void nbio_v7_11_init_registers(struct amdgpu_device *adev)
+{
+ uint32_t def, data;
+
+ def = data = RREG32_SOC15(NBIO, 0, regBIF_BIF256_CI256_RC3X4_USB4_PCIE_MST_CTRL_3);
+ data = REG_SET_FIELD(data, BIF_BIF256_CI256_RC3X4_USB4_PCIE_MST_CTRL_3,
+ CI_SWUS_MAX_READ_REQUEST_SIZE_MODE, 1);
+ data = REG_SET_FIELD(data, BIF_BIF256_CI256_RC3X4_USB4_PCIE_MST_CTRL_3,
+ CI_SWUS_MAX_READ_REQUEST_SIZE_PRIV, 1);
+
+ if (def != data)
+ WREG32_SOC15(NBIO, 0, regBIF_BIF256_CI256_RC3X4_USB4_PCIE_MST_CTRL_3, data);
+
+ switch (amdgpu_ip_version(adev, NBIO_HWIP, 0)) {
+ case IP_VERSION(7, 11, 0):
+ case IP_VERSION(7, 11, 1):
+ case IP_VERSION(7, 11, 2):
+ case IP_VERSION(7, 11, 3):
+ data = RREG32_SOC15(NBIO, 0, regRCC_DEV0_EPF5_STRAP4) & ~BIT(23);
+ WREG32_SOC15(NBIO, 0, regRCC_DEV0_EPF5_STRAP4, data);
+ break;
+ }
+}
+
+static void nbio_v7_11_update_medium_grain_clock_gating(struct amdgpu_device *adev,
+ bool enable)
+{
+ uint32_t def, data;
+
+ if (!(adev->cg_flags & AMD_CG_SUPPORT_BIF_MGCG))
+ return;
+
+ def = data = RREG32_SOC15(NBIO, 0, regBIF_BIF256_CI256_RC3X4_USB4_CPM_CONTROL);
+ if (enable) {
+ data |= (BIF_BIF256_CI256_RC3X4_USB4_CPM_CONTROL__LCLK_DYN_GATE_ENABLE_MASK |
+ BIF_BIF256_CI256_RC3X4_USB4_CPM_CONTROL__TXCLK_DYN_GATE_ENABLE_MASK |
+ BIF_BIF256_CI256_RC3X4_USB4_CPM_CONTROL__TXCLK_LCNT_GATE_ENABLE_MASK |
+ BIF_BIF256_CI256_RC3X4_USB4_CPM_CONTROL__TXCLK_REGS_GATE_ENABLE_MASK |
+ BIF_BIF256_CI256_RC3X4_USB4_CPM_CONTROL__TXCLK_PRBS_GATE_ENABLE_MASK |
+ BIF_BIF256_CI256_RC3X4_USB4_CPM_CONTROL__REFCLK_REGS_GATE_ENABLE_MASK);
+ } else {
+ data &= ~(BIF_BIF256_CI256_RC3X4_USB4_CPM_CONTROL__LCLK_DYN_GATE_ENABLE_MASK |
+ BIF_BIF256_CI256_RC3X4_USB4_CPM_CONTROL__TXCLK_DYN_GATE_ENABLE_MASK |
+ BIF_BIF256_CI256_RC3X4_USB4_CPM_CONTROL__TXCLK_LCNT_GATE_ENABLE_MASK |
+ BIF_BIF256_CI256_RC3X4_USB4_CPM_CONTROL__TXCLK_REGS_GATE_ENABLE_MASK |
+ BIF_BIF256_CI256_RC3X4_USB4_CPM_CONTROL__TXCLK_PRBS_GATE_ENABLE_MASK |
+ BIF_BIF256_CI256_RC3X4_USB4_CPM_CONTROL__REFCLK_REGS_GATE_ENABLE_MASK);
+ }
+
+ if (def != data)
+ WREG32_SOC15(NBIO, 0, regBIF_BIF256_CI256_RC3X4_USB4_CPM_CONTROL, data);
+}
+
+static void nbio_v7_11_update_medium_grain_light_sleep(struct amdgpu_device *adev,
+ bool enable)
+{
+ uint32_t def, data;
+
+ if (!(adev->cg_flags & AMD_CG_SUPPORT_BIF_LS))
+ return;
+
+ def = data = RREG32_SOC15(NBIO, 0, regBIF_BIF256_CI256_RC3X4_USB4_PCIE_CNTL2);
+ if (enable)
+ data |= BIF_BIF256_CI256_RC3X4_USB4_PCIE_CNTL2__SLV_MEM_LS_EN_MASK;
+ else
+ data &= ~BIF_BIF256_CI256_RC3X4_USB4_PCIE_CNTL2__SLV_MEM_LS_EN_MASK;
+
+ if (def != data)
+ WREG32_SOC15(NBIO, 0, regBIF_BIF256_CI256_RC3X4_USB4_PCIE_CNTL2, data);
+
+ def = data = RREG32_SOC15(NBIO, 0, regBIF_BIF256_CI256_RC3X4_USB4_PCIE_TX_POWER_CTRL_1);
+ if (enable) {
+ data |= (BIF_BIF256_CI256_RC3X4_USB4_PCIE_TX_POWER_CTRL_1__MST_MEM_LS_EN_MASK |
+ BIF_BIF256_CI256_RC3X4_USB4_PCIE_TX_POWER_CTRL_1__REPLAY_MEM_LS_EN_MASK);
+ } else {
+ data &= ~(BIF_BIF256_CI256_RC3X4_USB4_PCIE_TX_POWER_CTRL_1__MST_MEM_LS_EN_MASK |
+ BIF_BIF256_CI256_RC3X4_USB4_PCIE_TX_POWER_CTRL_1__REPLAY_MEM_LS_EN_MASK);
+ }
+
+ if (def != data)
+ WREG32_SOC15(NBIO, 0, regBIF_BIF256_CI256_RC3X4_USB4_PCIE_TX_POWER_CTRL_1, data);
+}
+
+static void nbio_v7_11_get_clockgating_state(struct amdgpu_device *adev,
+ u64 *flags)
+{
+ uint32_t data;
+
+ /* AMD_CG_SUPPORT_BIF_MGCG */
+ data = RREG32_SOC15(NBIO, 0, regBIF_BIF256_CI256_RC3X4_USB4_CPM_CONTROL);
+ if (data & BIF_BIF256_CI256_RC3X4_USB4_CPM_CONTROL__LCLK_DYN_GATE_ENABLE_MASK)
+ *flags |= AMD_CG_SUPPORT_BIF_MGCG;
+
+ /* AMD_CG_SUPPORT_BIF_LS */
+ data = RREG32_SOC15(NBIO, 0, regBIF_BIF256_CI256_RC3X4_USB4_PCIE_CNTL2);
+ if (data & BIF_BIF256_CI256_RC3X4_USB4_PCIE_CNTL2__SLV_MEM_LS_EN_MASK)
+ *flags |= AMD_CG_SUPPORT_BIF_LS;
+}
+
+#define MMIO_REG_HOLE_OFFSET 0x44000
+
+static void nbio_v7_11_set_reg_remap(struct amdgpu_device *adev)
+{
+ if (!amdgpu_sriov_vf(adev) && (PAGE_SIZE <= 4096)) {
+ adev->rmmio_remap.reg_offset = MMIO_REG_HOLE_OFFSET;
+ adev->rmmio_remap.bus_addr = adev->rmmio_base + MMIO_REG_HOLE_OFFSET;
+ } else {
+ adev->rmmio_remap.reg_offset =
+ SOC15_REG_OFFSET(NBIO, 0, regBIF_BX_PF1_HDP_MEM_COHERENCY_FLUSH_CNTL) << 2;
+ adev->rmmio_remap.bus_addr = 0;
+ }
+}
+
+const struct amdgpu_nbio_funcs nbio_v7_11_funcs = {
+ .get_hdp_flush_req_offset = nbio_v7_11_get_hdp_flush_req_offset,
+ .get_hdp_flush_done_offset = nbio_v7_11_get_hdp_flush_done_offset,
+ .get_pcie_index_offset = nbio_v7_11_get_pcie_index_offset,
+ .get_pcie_data_offset = nbio_v7_11_get_pcie_data_offset,
+ .get_pcie_port_index_offset = nbio_v7_11_get_pcie_port_index_offset,
+ .get_pcie_port_data_offset = nbio_v7_11_get_pcie_port_data_offset,
+ .get_rev_id = nbio_v7_11_get_rev_id,
+ .mc_access_enable = nbio_v7_11_mc_access_enable,
+ .get_memsize = nbio_v7_11_get_memsize,
+ .sdma_doorbell_range = nbio_v7_11_sdma_doorbell_range,
+ .vcn_doorbell_range = nbio_v7_11_vcn_doorbell_range,
+ .vpe_doorbell_range = nbio_v7_11_vpe_doorbell_range,
+ .enable_doorbell_aperture = nbio_v7_11_enable_doorbell_aperture,
+ .enable_doorbell_selfring_aperture = nbio_v7_11_enable_doorbell_selfring_aperture,
+ .ih_doorbell_range = nbio_v7_11_ih_doorbell_range,
+ .update_medium_grain_clock_gating = nbio_v7_11_update_medium_grain_clock_gating,
+ .update_medium_grain_light_sleep = nbio_v7_11_update_medium_grain_light_sleep,
+ .get_clockgating_state = nbio_v7_11_get_clockgating_state,
+ .ih_control = nbio_v7_11_ih_control,
+ .init_registers = nbio_v7_11_init_registers,
+ .remap_hdp_registers = nbio_v7_11_remap_hdp_registers,
+ .set_reg_remap = nbio_v7_11_set_reg_remap,
+};
diff --git a/drivers/gpu/drm/amd/amdgpu/nbio_v7_11.h b/drivers/gpu/drm/amd/amdgpu/nbio_v7_11.h
new file mode 100644
index 000000000000..9d8258ed3f0a
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/nbio_v7_11.h
@@ -0,0 +1,33 @@
+/*
+ * Copyright 2021 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#ifndef __NBIO_V7_11_H__
+#define __NBIO_V7_11_H__
+
+#include "soc15_common.h"
+
+extern const struct nbio_hdp_flush_reg nbio_v7_11_hdp_flush_reg;
+extern const struct amdgpu_nbio_funcs nbio_v7_11_funcs;
+extern const struct amdgpu_nbio_ras_funcs nbio_v7_11_ras_funcs;
+
+#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/nbio_v7_2.c b/drivers/gpu/drm/amd/amdgpu/nbio_v7_2.c
index 598ce0e93627..acc5f363684a 100644
--- a/drivers/gpu/drm/amd/amdgpu/nbio_v7_2.c
+++ b/drivers/gpu/drm/amd/amdgpu/nbio_v7_2.c
@@ -21,13 +21,31 @@
*
*/
#include "amdgpu.h"
-#include "amdgpu_atombios.h"
#include "nbio_v7_2.h"
#include "nbio/nbio_7_2_0_offset.h"
#include "nbio/nbio_7_2_0_sh_mask.h"
#include <uapi/linux/kfd_ioctl.h>
+#define regRCC_STRAP0_RCC_DEV0_EPF0_STRAP0_YC 0x0015
+#define regRCC_STRAP0_RCC_DEV0_EPF0_STRAP0_YC_BASE_IDX 2
+#define regBIF_BX0_BIF_FB_EN_YC 0x0100
+#define regBIF_BX0_BIF_FB_EN_YC_BASE_IDX 2
+#define regBIF1_PCIE_MST_CTRL_3 0x4601c6
+#define regBIF1_PCIE_MST_CTRL_3_BASE_IDX 5
+#define BIF1_PCIE_MST_CTRL_3__CI_SWUS_MAX_READ_REQUEST_SIZE_MODE__SHIFT \
+ 0x1b
+#define BIF1_PCIE_MST_CTRL_3__CI_SWUS_MAX_READ_REQUEST_SIZE_PRIV__SHIFT \
+ 0x1c
+#define BIF1_PCIE_MST_CTRL_3__CI_SWUS_MAX_READ_REQUEST_SIZE_MODE_MASK \
+ 0x08000000L
+#define BIF1_PCIE_MST_CTRL_3__CI_SWUS_MAX_READ_REQUEST_SIZE_PRIV_MASK \
+ 0x30000000L
+#define regBIF1_PCIE_TX_POWER_CTRL_1 0x460187
+#define regBIF1_PCIE_TX_POWER_CTRL_1_BASE_IDX 5
+#define BIF1_PCIE_TX_POWER_CTRL_1__MST_MEM_LS_EN_MASK 0x00000001L
+#define BIF1_PCIE_TX_POWER_CTRL_1__REPLAY_MEM_LS_EN_MASK 0x00000008L
+
static void nbio_v7_2_remap_hdp_registers(struct amdgpu_device *adev)
{
WREG32_SOC15(NBIO, 0, regBIF_BX0_REMAP_HDP_MEM_FLUSH_CNTL,
@@ -38,7 +56,18 @@ static void nbio_v7_2_remap_hdp_registers(struct amdgpu_device *adev)
static u32 nbio_v7_2_get_rev_id(struct amdgpu_device *adev)
{
- u32 tmp = RREG32_SOC15(NBIO, 0, regRCC_STRAP0_RCC_DEV0_EPF0_STRAP0);
+ u32 tmp;
+
+ switch (amdgpu_ip_version(adev, NBIO_HWIP, 0)) {
+ case IP_VERSION(7, 2, 1):
+ case IP_VERSION(7, 3, 0):
+ case IP_VERSION(7, 5, 0):
+ tmp = RREG32_SOC15(NBIO, 0, regRCC_STRAP0_RCC_DEV0_EPF0_STRAP0_YC);
+ break;
+ default:
+ tmp = RREG32_SOC15(NBIO, 0, regRCC_STRAP0_RCC_DEV0_EPF0_STRAP0);
+ break;
+ }
tmp &= RCC_STRAP0_RCC_DEV0_EPF0_STRAP0__STRAP_ATI_REV_ID_DEV0_F0_MASK;
tmp >>= RCC_STRAP0_RCC_DEV0_EPF0_STRAP0__STRAP_ATI_REV_ID_DEV0_F0__SHIFT;
@@ -48,12 +77,26 @@ static u32 nbio_v7_2_get_rev_id(struct amdgpu_device *adev)
static void nbio_v7_2_mc_access_enable(struct amdgpu_device *adev, bool enable)
{
- if (enable)
- WREG32_SOC15(NBIO, 0, regBIF_BX0_BIF_FB_EN,
- BIF_BX0_BIF_FB_EN__FB_READ_EN_MASK |
- BIF_BX0_BIF_FB_EN__FB_WRITE_EN_MASK);
- else
- WREG32_SOC15(NBIO, 0, regBIF_BX0_BIF_FB_EN, 0);
+ switch (amdgpu_ip_version(adev, NBIO_HWIP, 0)) {
+ case IP_VERSION(7, 2, 1):
+ case IP_VERSION(7, 3, 0):
+ case IP_VERSION(7, 5, 0):
+ if (enable)
+ WREG32_SOC15(NBIO, 0, regBIF_BX0_BIF_FB_EN_YC,
+ BIF_BX0_BIF_FB_EN__FB_READ_EN_MASK |
+ BIF_BX0_BIF_FB_EN__FB_WRITE_EN_MASK);
+ else
+ WREG32_SOC15(NBIO, 0, regBIF_BX0_BIF_FB_EN_YC, 0);
+ break;
+ default:
+ if (enable)
+ WREG32_SOC15(NBIO, 0, regBIF_BX0_BIF_FB_EN,
+ BIF_BX0_BIF_FB_EN__FB_READ_EN_MASK |
+ BIF_BX0_BIF_FB_EN__FB_WRITE_EN_MASK);
+ else
+ WREG32_SOC15(NBIO, 0, regBIF_BX0_BIF_FB_EN, 0);
+ break;
+ }
}
static u32 nbio_v7_2_get_memsize(struct amdgpu_device *adev)
@@ -92,13 +135,13 @@ static void nbio_v7_2_vcn_doorbell_range(struct amdgpu_device *adev, bool use_do
if (use_doorbell) {
doorbell_range = REG_SET_FIELD(doorbell_range,
- GDC0_BIF_VCN0_DOORBELL_RANGE, OFFSET,
- doorbell_index);
+ GDC0_BIF_VCN0_DOORBELL_RANGE, OFFSET,
+ doorbell_index);
doorbell_range = REG_SET_FIELD(doorbell_range,
- GDC0_BIF_VCN0_DOORBELL_RANGE, SIZE, 8);
+ GDC0_BIF_VCN0_DOORBELL_RANGE, SIZE, 8);
} else {
doorbell_range = REG_SET_FIELD(doorbell_range,
- GDC0_BIF_VCN0_DOORBELL_RANGE, SIZE, 0);
+ GDC0_BIF_VCN0_DOORBELL_RANGE, SIZE, 0);
}
WREG32_PCIE_PORT(reg, doorbell_range);
@@ -123,22 +166,22 @@ static void nbio_v7_2_enable_doorbell_selfring_aperture(struct amdgpu_device *ad
if (enable) {
tmp = REG_SET_FIELD(tmp, BIF_BX_PF0_DOORBELL_SELFRING_GPA_APER_CNTL,
- DOORBELL_SELFRING_GPA_APER_EN, 1) |
- REG_SET_FIELD(tmp, BIF_BX_PF0_DOORBELL_SELFRING_GPA_APER_CNTL,
- DOORBELL_SELFRING_GPA_APER_MODE, 1) |
- REG_SET_FIELD(tmp, BIF_BX_PF0_DOORBELL_SELFRING_GPA_APER_CNTL,
- DOORBELL_SELFRING_GPA_APER_SIZE, 0);
+ DOORBELL_SELFRING_GPA_APER_EN, 1) |
+ REG_SET_FIELD(tmp, BIF_BX_PF0_DOORBELL_SELFRING_GPA_APER_CNTL,
+ DOORBELL_SELFRING_GPA_APER_MODE, 1) |
+ REG_SET_FIELD(tmp, BIF_BX_PF0_DOORBELL_SELFRING_GPA_APER_CNTL,
+ DOORBELL_SELFRING_GPA_APER_SIZE, 0);
WREG32_SOC15(NBIO, 0,
- regBIF_BX_PF0_DOORBELL_SELFRING_GPA_APER_BASE_LOW,
- lower_32_bits(adev->doorbell.base));
+ regBIF_BX_PF0_DOORBELL_SELFRING_GPA_APER_BASE_LOW,
+ lower_32_bits(adev->doorbell.base));
WREG32_SOC15(NBIO, 0,
- regBIF_BX_PF0_DOORBELL_SELFRING_GPA_APER_BASE_HIGH,
- upper_32_bits(adev->doorbell.base));
+ regBIF_BX_PF0_DOORBELL_SELFRING_GPA_APER_BASE_HIGH,
+ upper_32_bits(adev->doorbell.base));
}
WREG32_SOC15(NBIO, 0, regBIF_BX_PF0_DOORBELL_SELFRING_GPA_APER_CNTL,
- tmp);
+ tmp);
}
@@ -218,23 +261,51 @@ static void nbio_v7_2_update_medium_grain_light_sleep(struct amdgpu_device *adev
{
uint32_t def, data;
- def = data = RREG32_PCIE_PORT(SOC15_REG_OFFSET(NBIO, 0, regPCIE_CNTL2));
- if (enable && (adev->cg_flags & AMD_CG_SUPPORT_BIF_LS)) {
- data |= (PCIE_CNTL2__SLV_MEM_LS_EN_MASK |
- PCIE_CNTL2__MST_MEM_LS_EN_MASK |
- PCIE_CNTL2__REPLAY_MEM_LS_EN_MASK);
- } else {
- data &= ~(PCIE_CNTL2__SLV_MEM_LS_EN_MASK |
- PCIE_CNTL2__MST_MEM_LS_EN_MASK |
- PCIE_CNTL2__REPLAY_MEM_LS_EN_MASK);
+ switch (amdgpu_ip_version(adev, NBIO_HWIP, 0)) {
+ case IP_VERSION(7, 2, 1):
+ case IP_VERSION(7, 3, 0):
+ case IP_VERSION(7, 5, 0):
+ def = data = RREG32_PCIE_PORT(SOC15_REG_OFFSET(NBIO, 0, regPCIE_CNTL2));
+ if (enable && (adev->cg_flags & AMD_CG_SUPPORT_BIF_LS))
+ data |= PCIE_CNTL2__SLV_MEM_LS_EN_MASK;
+ else
+ data &= ~PCIE_CNTL2__SLV_MEM_LS_EN_MASK;
+
+ if (def != data)
+ WREG32_PCIE_PORT(SOC15_REG_OFFSET(NBIO, 0, regPCIE_CNTL2), data);
+
+ def = data = RREG32_PCIE_PORT(SOC15_REG_OFFSET(NBIO, 0,
+ regBIF1_PCIE_TX_POWER_CTRL_1));
+ if (enable && (adev->cg_flags & AMD_CG_SUPPORT_BIF_LS))
+ data |= (BIF1_PCIE_TX_POWER_CTRL_1__MST_MEM_LS_EN_MASK |
+ BIF1_PCIE_TX_POWER_CTRL_1__REPLAY_MEM_LS_EN_MASK);
+ else
+ data &= ~(BIF1_PCIE_TX_POWER_CTRL_1__MST_MEM_LS_EN_MASK |
+ BIF1_PCIE_TX_POWER_CTRL_1__REPLAY_MEM_LS_EN_MASK);
+
+ if (def != data)
+ WREG32_PCIE_PORT(SOC15_REG_OFFSET(NBIO, 0, regBIF1_PCIE_TX_POWER_CTRL_1),
+ data);
+ break;
+ default:
+ def = data = RREG32_PCIE_PORT(SOC15_REG_OFFSET(NBIO, 0, regPCIE_CNTL2));
+ if (enable && (adev->cg_flags & AMD_CG_SUPPORT_BIF_LS))
+ data |= (PCIE_CNTL2__SLV_MEM_LS_EN_MASK |
+ PCIE_CNTL2__MST_MEM_LS_EN_MASK |
+ PCIE_CNTL2__REPLAY_MEM_LS_EN_MASK);
+ else
+ data &= ~(PCIE_CNTL2__SLV_MEM_LS_EN_MASK |
+ PCIE_CNTL2__MST_MEM_LS_EN_MASK |
+ PCIE_CNTL2__REPLAY_MEM_LS_EN_MASK);
+
+ if (def != data)
+ WREG32_PCIE_PORT(SOC15_REG_OFFSET(NBIO, 0, regPCIE_CNTL2), data);
+ break;
}
-
- if (def != data)
- WREG32_PCIE_PORT(SOC15_REG_OFFSET(NBIO, 0, regPCIE_CNTL2), data);
}
static void nbio_v7_2_get_clockgating_state(struct amdgpu_device *adev,
- u32 *flags)
+ u64 *flags)
{
int data;
@@ -297,14 +368,54 @@ const struct nbio_hdp_flush_reg nbio_v7_2_hdp_flush_reg = {
static void nbio_v7_2_init_registers(struct amdgpu_device *adev)
{
uint32_t def, data;
+ switch (amdgpu_ip_version(adev, NBIO_HWIP, 0)) {
+ case IP_VERSION(7, 2, 1):
+ case IP_VERSION(7, 3, 0):
+ case IP_VERSION(7, 5, 0):
+ def = data = RREG32_PCIE_PORT(SOC15_REG_OFFSET(NBIO, 0, regBIF1_PCIE_MST_CTRL_3));
+ data = REG_SET_FIELD(data, BIF1_PCIE_MST_CTRL_3,
+ CI_SWUS_MAX_READ_REQUEST_SIZE_MODE, 1);
+ data = REG_SET_FIELD(data, BIF1_PCIE_MST_CTRL_3,
+ CI_SWUS_MAX_READ_REQUEST_SIZE_PRIV, 1);
+
+ if (def != data)
+ WREG32_PCIE_PORT(SOC15_REG_OFFSET(NBIO, 0, regBIF1_PCIE_MST_CTRL_3), data);
+ break;
+ default:
+ def = data = RREG32_PCIE_PORT(SOC15_REG_OFFSET(NBIO, 0, regPCIE_CONFIG_CNTL));
+ data = REG_SET_FIELD(data, PCIE_CONFIG_CNTL,
+ CI_SWUS_MAX_READ_REQUEST_SIZE_MODE, 1);
+ data = REG_SET_FIELD(data, PCIE_CONFIG_CNTL,
+ CI_SWUS_MAX_READ_REQUEST_SIZE_PRIV, 1);
+
+ if (def != data)
+ WREG32_PCIE_PORT(SOC15_REG_OFFSET(NBIO, 0, regPCIE_CONFIG_CNTL), data);
+ break;
+ }
- def = data = RREG32_PCIE_PORT(SOC15_REG_OFFSET(NBIO, 0, regPCIE_CONFIG_CNTL));
- data = REG_SET_FIELD(data, PCIE_CONFIG_CNTL, CI_SWUS_MAX_READ_REQUEST_SIZE_MODE, 1);
- data = REG_SET_FIELD(data, PCIE_CONFIG_CNTL, CI_SWUS_MAX_READ_REQUEST_SIZE_PRIV, 1);
+ switch (amdgpu_ip_version(adev, NBIO_HWIP, 0)) {
+ case IP_VERSION(7, 3, 0):
+ case IP_VERSION(7, 5, 1):
+ data = RREG32_SOC15(NBIO, 0, regRCC_DEV2_EPF0_STRAP2);
+ data &= ~RCC_DEV2_EPF0_STRAP2__STRAP_NO_SOFT_RESET_DEV2_F0_MASK;
+ WREG32_SOC15(NBIO, 0, regRCC_DEV2_EPF0_STRAP2, data);
+ break;
+ }
+}
- if (def != data)
- WREG32_PCIE_PORT(SOC15_REG_OFFSET(NBIO, 0, regPCIE_CONFIG_CNTL),
- data);
+#define MMIO_REG_HOLE_OFFSET (0x80000 - PAGE_SIZE)
+
+static void nbio_v7_2_set_reg_remap(struct amdgpu_device *adev)
+{
+ if (!amdgpu_sriov_vf(adev) && (PAGE_SIZE <= 4096)) {
+ adev->rmmio_remap.reg_offset = MMIO_REG_HOLE_OFFSET;
+ adev->rmmio_remap.bus_addr = adev->rmmio_base + MMIO_REG_HOLE_OFFSET;
+ } else {
+ adev->rmmio_remap.reg_offset =
+ SOC15_REG_OFFSET(NBIO, 0,
+ regBIF_BX_PF0_HDP_MEM_COHERENCY_FLUSH_CNTL) << 2;
+ adev->rmmio_remap.bus_addr = 0;
+ }
}
const struct amdgpu_nbio_funcs nbio_v7_2_funcs = {
@@ -328,4 +439,5 @@ const struct amdgpu_nbio_funcs nbio_v7_2_funcs = {
.ih_control = nbio_v7_2_ih_control,
.init_registers = nbio_v7_2_init_registers,
.remap_hdp_registers = nbio_v7_2_remap_hdp_registers,
+ .set_reg_remap = nbio_v7_2_set_reg_remap,
};
diff --git a/drivers/gpu/drm/amd/amdgpu/nbio_v7_4.c b/drivers/gpu/drm/amd/amdgpu/nbio_v7_4.c
index 4bc1d1434065..860bc5cb03c8 100644
--- a/drivers/gpu/drm/amd/amdgpu/nbio_v7_4.c
+++ b/drivers/gpu/drm/amd/amdgpu/nbio_v7_4.c
@@ -21,7 +21,6 @@
*
*/
#include "amdgpu.h"
-#include "amdgpu_atombios.h"
#include "nbio_v7_4.h"
#include "amdgpu_ras.h"
@@ -31,18 +30,40 @@
#include "ivsrcid/nbio/irqsrcs_nbif_7_4.h"
#include <uapi/linux/kfd_ioctl.h>
+#define smnPCIE_LC_CNTL 0x11140280
+#define smnPCIE_LC_CNTL3 0x111402d4
+#define smnPCIE_LC_CNTL6 0x111402ec
+#define smnPCIE_LC_CNTL7 0x111402f0
#define smnNBIF_MGCG_CTRL_LCLK 0x1013a21c
+#define smnRCC_BIF_STRAP3 0x1012348c
+#define RCC_BIF_STRAP3__STRAP_VLINK_ASPM_IDLE_TIMER_MASK 0x0000FFFFL
+#define RCC_BIF_STRAP3__STRAP_VLINK_PM_L1_ENTRY_TIMER_MASK 0xFFFF0000L
+#define smnRCC_BIF_STRAP5 0x10123494
+#define RCC_BIF_STRAP5__STRAP_VLINK_LDN_ENTRY_TIMER_MASK 0x0000FFFFL
+#define smnBIF_CFG_DEV0_EPF0_DEVICE_CNTL2 0x1014008c
+#define BIF_CFG_DEV0_EPF0_DEVICE_CNTL2__LTR_EN_MASK 0x0400L
+#define smnBIF_CFG_DEV0_EPF0_PCIE_LTR_CAP 0x10140324
+#define smnPSWUSP0_PCIE_LC_CNTL2 0x111402c4
+#define smnRCC_EP_DEV0_0_EP_PCIE_TX_LTR_CNTL 0x10123538
+#define smnRCC_BIF_STRAP2 0x10123488
+#define RCC_BIF_STRAP2__STRAP_LTR_IN_ASPML1_DIS_MASK 0x00004000L
+#define RCC_BIF_STRAP3__STRAP_VLINK_ASPM_IDLE_TIMER__SHIFT 0x0
+#define RCC_BIF_STRAP3__STRAP_VLINK_PM_L1_ENTRY_TIMER__SHIFT 0x10
+#define RCC_BIF_STRAP5__STRAP_VLINK_LDN_ENTRY_TIMER__SHIFT 0x0
/*
* These are nbio v7_4_1 registers mask. Temporarily define these here since
* nbio v7_4_1 header is incomplete.
*/
-#define GPU_HDP_FLUSH_DONE__RSVD_ENG0_MASK 0x00001000L
+#define GPU_HDP_FLUSH_DONE__RSVD_ENG0_MASK 0x00001000L /* Don't use. Firmware uses this bit internally */
#define GPU_HDP_FLUSH_DONE__RSVD_ENG1_MASK 0x00002000L
#define GPU_HDP_FLUSH_DONE__RSVD_ENG2_MASK 0x00004000L
#define GPU_HDP_FLUSH_DONE__RSVD_ENG3_MASK 0x00008000L
#define GPU_HDP_FLUSH_DONE__RSVD_ENG4_MASK 0x00010000L
#define GPU_HDP_FLUSH_DONE__RSVD_ENG5_MASK 0x00020000L
+#define GPU_HDP_FLUSH_DONE__RSVD_ENG6_MASK 0x00040000L
+#define GPU_HDP_FLUSH_DONE__RSVD_ENG7_MASK 0x00080000L
+#define GPU_HDP_FLUSH_DONE__RSVD_ENG8_MASK 0x00100000L
#define mmBIF_MMSCH1_DOORBELL_RANGE 0x01dc
#define mmBIF_MMSCH1_DOORBELL_RANGE_BASE_IDX 2
@@ -52,6 +73,28 @@
#define BIF_MMSCH1_DOORBELL_RANGE__OFFSET_MASK 0x00000FFCL
#define BIF_MMSCH1_DOORBELL_RANGE__SIZE_MASK 0x001F0000L
+#define BIF_MMSCH1_DOORBELL_RANGE__OFFSET_MASK 0x00000FFCL
+#define BIF_MMSCH1_DOORBELL_RANGE__SIZE_MASK 0x001F0000L
+
+#define mmBIF_MMSCH1_DOORBELL_RANGE_ALDE 0x01d8
+#define mmBIF_MMSCH1_DOORBELL_RANGE_ALDE_BASE_IDX 2
+//BIF_MMSCH1_DOORBELL_ALDE_RANGE
+#define BIF_MMSCH1_DOORBELL_RANGE_ALDE__OFFSET__SHIFT 0x2
+#define BIF_MMSCH1_DOORBELL_RANGE_ALDE__SIZE__SHIFT 0x10
+#define BIF_MMSCH1_DOORBELL_RANGE_ALDE__OFFSET_MASK 0x00000FFCL
+#define BIF_MMSCH1_DOORBELL_RANGE_ALDE__SIZE_MASK 0x001F0000L
+
+#define mmRCC_DEV0_EPF0_STRAP0_ALDE 0x0015
+#define mmRCC_DEV0_EPF0_STRAP0_ALDE_BASE_IDX 2
+
+#define mmBIF_DOORBELL_INT_CNTL_ALDE 0x00fe
+#define mmBIF_DOORBELL_INT_CNTL_ALDE_BASE_IDX 2
+#define BIF_DOORBELL_INT_CNTL_ALDE__DOORBELL_INTERRUPT_DISABLE__SHIFT 0x18
+#define BIF_DOORBELL_INT_CNTL_ALDE__DOORBELL_INTERRUPT_DISABLE_MASK 0x01000000L
+
+#define mmBIF_INTR_CNTL_ALDE 0x0101
+#define mmBIF_INTR_CNTL_ALDE_BASE_IDX 2
+
static void nbio_v7_4_query_ras_error_count(struct amdgpu_device *adev,
void *ras_error_status);
@@ -65,7 +108,12 @@ static void nbio_v7_4_remap_hdp_registers(struct amdgpu_device *adev)
static u32 nbio_v7_4_get_rev_id(struct amdgpu_device *adev)
{
- u32 tmp = RREG32_SOC15(NBIO, 0, mmRCC_DEV0_EPF0_STRAP0);
+ u32 tmp;
+
+ if (adev->asic_type == CHIP_ALDEBARAN)
+ tmp = RREG32_SOC15(NBIO, 0, mmRCC_DEV0_EPF0_STRAP0_ALDE);
+ else
+ tmp = RREG32_SOC15(NBIO, 0, mmRCC_DEV0_EPF0_STRAP0);
tmp &= RCC_DEV0_EPF0_STRAP0__STRAP_ATI_REV_ID_DEV0_F0_MASK;
tmp >>= RCC_DEV0_EPF0_STRAP0__STRAP_ATI_REV_ID_DEV0_F0__SHIFT;
@@ -92,10 +140,10 @@ static void nbio_v7_4_sdma_doorbell_range(struct amdgpu_device *adev, int instan
{
u32 reg, doorbell_range;
- if (instance < 2)
+ if (instance < 2) {
reg = instance +
SOC15_REG_OFFSET(NBIO, 0, mmBIF_SDMA0_DOORBELL_RANGE);
- else
+ } else {
/*
* These registers address of SDMA2~7 is not consecutive
* from SDMA0~1. Need plus 4 dwords offset.
@@ -103,9 +151,19 @@ static void nbio_v7_4_sdma_doorbell_range(struct amdgpu_device *adev, int instan
* BIF_SDMA0_DOORBELL_RANGE: 0x3bc0
* BIF_SDMA1_DOORBELL_RANGE: 0x3bc4
* BIF_SDMA2_DOORBELL_RANGE: 0x3bd8
+ * BIF_SDMA4_DOORBELL_RANGE:
+ * ARCTURUS: 0x3be0
+ * ALDEBARAN: 0x3be4
*/
- reg = instance + 0x4 +
- SOC15_REG_OFFSET(NBIO, 0, mmBIF_SDMA0_DOORBELL_RANGE);
+ if (adev->asic_type == CHIP_ALDEBARAN && instance == 4)
+ reg = instance + 0x4 + 0x1 +
+ SOC15_REG_OFFSET(NBIO, 0,
+ mmBIF_SDMA0_DOORBELL_RANGE);
+ else
+ reg = instance + 0x4 +
+ SOC15_REG_OFFSET(NBIO, 0,
+ mmBIF_SDMA0_DOORBELL_RANGE);
+ }
doorbell_range = RREG32(reg);
@@ -124,9 +182,12 @@ static void nbio_v7_4_vcn_doorbell_range(struct amdgpu_device *adev, bool use_do
u32 reg;
u32 doorbell_range;
- if (instance)
- reg = SOC15_REG_OFFSET(NBIO, 0, mmBIF_MMSCH1_DOORBELL_RANGE);
- else
+ if (instance) {
+ if (adev->asic_type == CHIP_ALDEBARAN)
+ reg = SOC15_REG_OFFSET(NBIO, 0, mmBIF_MMSCH1_DOORBELL_RANGE_ALDE);
+ else
+ reg = SOC15_REG_OFFSET(NBIO, 0, mmBIF_MMSCH1_DOORBELL_RANGE);
+ } else
reg = SOC15_REG_OFFSET(NBIO, 0, mmBIF_MMSCH0_DOORBELL_RANGE);
doorbell_range = RREG32(reg);
@@ -176,7 +237,7 @@ static void nbio_v7_4_ih_doorbell_range(struct amdgpu_device *adev,
if (use_doorbell) {
ih_doorbell_range = REG_SET_FIELD(ih_doorbell_range, BIF_IH_DOORBELL_RANGE, OFFSET, doorbell_index);
- ih_doorbell_range = REG_SET_FIELD(ih_doorbell_range, BIF_IH_DOORBELL_RANGE, SIZE, 4);
+ ih_doorbell_range = REG_SET_FIELD(ih_doorbell_range, BIF_IH_DOORBELL_RANGE, SIZE, 8);
} else
ih_doorbell_range = REG_SET_FIELD(ih_doorbell_range, BIF_IH_DOORBELL_RANGE, SIZE, 0);
@@ -211,7 +272,7 @@ static void nbio_v7_4_update_medium_grain_light_sleep(struct amdgpu_device *adev
}
static void nbio_v7_4_get_clockgating_state(struct amdgpu_device *adev,
- u32 *flags)
+ u64 *flags)
{
int data;
@@ -275,36 +336,53 @@ const struct nbio_hdp_flush_reg nbio_v7_4_hdp_flush_reg = {
.ref_and_mask_cp9 = GPU_HDP_FLUSH_DONE__CP9_MASK,
.ref_and_mask_sdma0 = GPU_HDP_FLUSH_DONE__SDMA0_MASK,
.ref_and_mask_sdma1 = GPU_HDP_FLUSH_DONE__SDMA1_MASK,
- .ref_and_mask_sdma2 = GPU_HDP_FLUSH_DONE__RSVD_ENG0_MASK,
- .ref_and_mask_sdma3 = GPU_HDP_FLUSH_DONE__RSVD_ENG1_MASK,
- .ref_and_mask_sdma4 = GPU_HDP_FLUSH_DONE__RSVD_ENG2_MASK,
- .ref_and_mask_sdma5 = GPU_HDP_FLUSH_DONE__RSVD_ENG3_MASK,
- .ref_and_mask_sdma6 = GPU_HDP_FLUSH_DONE__RSVD_ENG4_MASK,
- .ref_and_mask_sdma7 = GPU_HDP_FLUSH_DONE__RSVD_ENG5_MASK,
};
static void nbio_v7_4_init_registers(struct amdgpu_device *adev)
{
-
+ uint32_t baco_cntl;
+
+ if (amdgpu_ip_version(adev, NBIO_HWIP, 0) == IP_VERSION(7, 4, 4) &&
+ !amdgpu_sriov_vf(adev)) {
+ baco_cntl = RREG32_SOC15(NBIO, 0, mmBACO_CNTL);
+ if (baco_cntl &
+ (BACO_CNTL__BACO_DUMMY_EN_MASK | BACO_CNTL__BACO_EN_MASK)) {
+ baco_cntl &= ~(BACO_CNTL__BACO_DUMMY_EN_MASK |
+ BACO_CNTL__BACO_EN_MASK);
+ dev_dbg(adev->dev, "Unsetting baco dummy mode %x",
+ baco_cntl);
+ WREG32_SOC15(NBIO, 0, mmBACO_CNTL, baco_cntl);
+ }
+ }
}
static void nbio_v7_4_handle_ras_controller_intr_no_bifring(struct amdgpu_device *adev)
{
uint32_t bif_doorbell_intr_cntl;
struct ras_manager *obj = amdgpu_ras_find_obj(adev, adev->nbio.ras_if);
- struct ras_err_data err_data = {0, 0, 0, NULL};
+ struct ras_err_data err_data;
struct amdgpu_ras *ras = amdgpu_ras_get_context(adev);
- bif_doorbell_intr_cntl = RREG32_SOC15(NBIO, 0, mmBIF_DOORBELL_INT_CNTL);
+ if (amdgpu_ras_error_data_init(&err_data))
+ return;
+
+ if (adev->asic_type == CHIP_ALDEBARAN)
+ bif_doorbell_intr_cntl = RREG32_SOC15(NBIO, 0, mmBIF_DOORBELL_INT_CNTL_ALDE);
+ else
+ bif_doorbell_intr_cntl = RREG32_SOC15(NBIO, 0, mmBIF_DOORBELL_INT_CNTL);
+
if (REG_GET_FIELD(bif_doorbell_intr_cntl,
BIF_DOORBELL_INT_CNTL, RAS_CNTLR_INTERRUPT_STATUS)) {
/* driver has to clear the interrupt status when bif ring is disabled */
bif_doorbell_intr_cntl = REG_SET_FIELD(bif_doorbell_intr_cntl,
BIF_DOORBELL_INT_CNTL,
RAS_CNTLR_INTERRUPT_CLEAR, 1);
- WREG32_SOC15(NBIO, 0, mmBIF_DOORBELL_INT_CNTL, bif_doorbell_intr_cntl);
+ if (adev->asic_type == CHIP_ALDEBARAN)
+ WREG32_SOC15(NBIO, 0, mmBIF_DOORBELL_INT_CNTL_ALDE, bif_doorbell_intr_cntl);
+ else
+ WREG32_SOC15(NBIO, 0, mmBIF_DOORBELL_INT_CNTL, bif_doorbell_intr_cntl);
- if (!ras->disable_ras_err_cnt_harvest) {
+ if (ras && !ras->disable_ras_err_cnt_harvest && obj) {
/*
* clear error status after ras_controller_intr
* according to hw team and count ue number
@@ -318,16 +396,15 @@ static void nbio_v7_4_handle_ras_controller_intr_no_bifring(struct amdgpu_device
if (err_data.ce_count)
dev_info(adev->dev, "%ld correctable hardware "
- "errors detected in %s block, "
- "no user action is needed.\n",
+ "errors detected in %s block\n",
obj->err_data.ce_count,
- adev->nbio.ras_if->name);
+ get_ras_block_str(adev->nbio.ras_if));
if (err_data.ue_count)
dev_info(adev->dev, "%ld uncorrectable hardware "
"errors detected in %s block\n",
obj->err_data.ue_count,
- adev->nbio.ras_if->name);
+ get_ras_block_str(adev->nbio.ras_if));
}
dev_info(adev->dev, "RAS controller interrupt triggered "
@@ -336,22 +413,32 @@ static void nbio_v7_4_handle_ras_controller_intr_no_bifring(struct amdgpu_device
/* ras_controller_int is dedicated for nbif ras error,
* not the global interrupt for sync flood
*/
- amdgpu_ras_reset_gpu(adev);
+ amdgpu_ras_global_ras_isr(adev);
}
+
+ amdgpu_ras_error_data_fini(&err_data);
}
static void nbio_v7_4_handle_ras_err_event_athub_intr_no_bifring(struct amdgpu_device *adev)
{
uint32_t bif_doorbell_intr_cntl;
- bif_doorbell_intr_cntl = RREG32_SOC15(NBIO, 0, mmBIF_DOORBELL_INT_CNTL);
+ if (adev->asic_type == CHIP_ALDEBARAN)
+ bif_doorbell_intr_cntl = RREG32_SOC15(NBIO, 0, mmBIF_DOORBELL_INT_CNTL_ALDE);
+ else
+ bif_doorbell_intr_cntl = RREG32_SOC15(NBIO, 0, mmBIF_DOORBELL_INT_CNTL);
+
if (REG_GET_FIELD(bif_doorbell_intr_cntl,
BIF_DOORBELL_INT_CNTL, RAS_ATHUB_ERR_EVENT_INTERRUPT_STATUS)) {
/* driver has to clear the interrupt status when bif ring is disabled */
bif_doorbell_intr_cntl = REG_SET_FIELD(bif_doorbell_intr_cntl,
BIF_DOORBELL_INT_CNTL,
RAS_ATHUB_ERR_EVENT_INTERRUPT_CLEAR, 1);
- WREG32_SOC15(NBIO, 0, mmBIF_DOORBELL_INT_CNTL, bif_doorbell_intr_cntl);
+
+ if (adev->asic_type == CHIP_ALDEBARAN)
+ WREG32_SOC15(NBIO, 0, mmBIF_DOORBELL_INT_CNTL_ALDE, bif_doorbell_intr_cntl);
+ else
+ WREG32_SOC15(NBIO, 0, mmBIF_DOORBELL_INT_CNTL, bif_doorbell_intr_cntl);
amdgpu_ras_global_ras_isr(adev);
}
@@ -369,14 +456,23 @@ static int nbio_v7_4_set_ras_controller_irq_state(struct amdgpu_device *adev,
*/
uint32_t bif_intr_cntl;
- bif_intr_cntl = RREG32_SOC15(NBIO, 0, mmBIF_INTR_CNTL);
+ if (adev->asic_type == CHIP_ALDEBARAN)
+ bif_intr_cntl = RREG32_SOC15(NBIO, 0, mmBIF_INTR_CNTL_ALDE);
+ else
+ bif_intr_cntl = RREG32_SOC15(NBIO, 0, mmBIF_INTR_CNTL);
+
if (state == AMDGPU_IRQ_STATE_ENABLE) {
/* set interrupt vector select bit to 0 to select
* vetcor 1 for bare metal case */
bif_intr_cntl = REG_SET_FIELD(bif_intr_cntl,
BIF_INTR_CNTL,
RAS_INTR_VEC_SEL, 0);
- WREG32_SOC15(NBIO, 0, mmBIF_INTR_CNTL, bif_intr_cntl);
+
+ if (adev->asic_type == CHIP_ALDEBARAN)
+ WREG32_SOC15(NBIO, 0, mmBIF_INTR_CNTL_ALDE, bif_intr_cntl);
+ else
+ WREG32_SOC15(NBIO, 0, mmBIF_INTR_CNTL, bif_intr_cntl);
+
}
return 0;
@@ -405,14 +501,22 @@ static int nbio_v7_4_set_ras_err_event_athub_irq_state(struct amdgpu_device *ade
*/
uint32_t bif_intr_cntl;
- bif_intr_cntl = RREG32_SOC15(NBIO, 0, mmBIF_INTR_CNTL);
+ if (adev->asic_type == CHIP_ALDEBARAN)
+ bif_intr_cntl = RREG32_SOC15(NBIO, 0, mmBIF_INTR_CNTL_ALDE);
+ else
+ bif_intr_cntl = RREG32_SOC15(NBIO, 0, mmBIF_INTR_CNTL);
+
if (state == AMDGPU_IRQ_STATE_ENABLE) {
/* set interrupt vector select bit to 0 to select
* vetcor 1 for bare metal case */
bif_intr_cntl = REG_SET_FIELD(bif_intr_cntl,
BIF_INTR_CNTL,
RAS_INTR_VEC_SEL, 0);
- WREG32_SOC15(NBIO, 0, mmBIF_INTR_CNTL, bif_intr_cntl);
+
+ if (adev->asic_type == CHIP_ALDEBARAN)
+ WREG32_SOC15(NBIO, 0, mmBIF_INTR_CNTL_ALDE, bif_intr_cntl);
+ else
+ WREG32_SOC15(NBIO, 0, mmBIF_INTR_CNTL, bif_intr_cntl);
}
return 0;
@@ -475,7 +579,9 @@ static int nbio_v7_4_init_ras_err_event_athub_interrupt (struct amdgpu_device *a
return r;
}
-#define smnPARITY_ERROR_STATUS_UNCORR_GRP2 0x13a20030
+#define smnPARITY_ERROR_STATUS_UNCORR_GRP2 0x13a20030
+#define smnPARITY_ERROR_STATUS_UNCORR_GRP2_ALDE 0x13b20030
+#define smnRAS_GLOBAL_STATUS_LO_ALDE 0x13b20020
static void nbio_v7_4_query_ras_error_count(struct amdgpu_device *adev,
void *ras_error_status)
@@ -484,12 +590,20 @@ static void nbio_v7_4_query_ras_error_count(struct amdgpu_device *adev,
uint32_t corr, fatal, non_fatal;
struct ras_err_data *err_data = (struct ras_err_data *)ras_error_status;
- global_sts = RREG32_PCIE(smnRAS_GLOBAL_STATUS_LO);
+ if (adev->asic_type == CHIP_ALDEBARAN)
+ global_sts = RREG32_PCIE(smnRAS_GLOBAL_STATUS_LO_ALDE);
+ else
+ global_sts = RREG32_PCIE(smnRAS_GLOBAL_STATUS_LO);
+
corr = REG_GET_FIELD(global_sts, RAS_GLOBAL_STATUS_LO, ParityErrCorr);
fatal = REG_GET_FIELD(global_sts, RAS_GLOBAL_STATUS_LO, ParityErrFatal);
non_fatal = REG_GET_FIELD(global_sts, RAS_GLOBAL_STATUS_LO,
ParityErrNonFatal);
- parity_sts = RREG32_PCIE(smnPARITY_ERROR_STATUS_UNCORR_GRP2);
+
+ if (adev->asic_type == CHIP_ALDEBARAN)
+ parity_sts = RREG32_PCIE(smnPARITY_ERROR_STATUS_UNCORR_GRP2_ALDE);
+ else
+ parity_sts = RREG32_PCIE(smnPARITY_ERROR_STATUS_UNCORR_GRP2);
if (corr)
err_data->ce_count++;
@@ -498,13 +612,21 @@ static void nbio_v7_4_query_ras_error_count(struct amdgpu_device *adev,
if (corr || fatal || non_fatal) {
central_sts = RREG32_PCIE(smnBIFL_RAS_CENTRAL_STATUS);
+
/* clear error status register */
- WREG32_PCIE(smnRAS_GLOBAL_STATUS_LO, global_sts);
+ if (adev->asic_type == CHIP_ALDEBARAN)
+ WREG32_PCIE(smnRAS_GLOBAL_STATUS_LO_ALDE, global_sts);
+ else
+ WREG32_PCIE(smnRAS_GLOBAL_STATUS_LO, global_sts);
if (fatal)
+ {
/* clear parity fatal error indication field */
- WREG32_PCIE(smnPARITY_ERROR_STATUS_UNCORR_GRP2,
- parity_sts);
+ if (adev->asic_type == CHIP_ALDEBARAN)
+ WREG32_PCIE(smnPARITY_ERROR_STATUS_UNCORR_GRP2_ALDE, parity_sts);
+ else
+ WREG32_PCIE(smnPARITY_ERROR_STATUS_UNCORR_GRP2, parity_sts);
+ }
if (REG_GET_FIELD(central_sts, BIFL_RAS_CENTRAL_STATUS,
BIFL_RasContller_Intr_Recv)) {
@@ -521,10 +643,165 @@ static void nbio_v7_4_query_ras_error_count(struct amdgpu_device *adev,
static void nbio_v7_4_enable_doorbell_interrupt(struct amdgpu_device *adev,
bool enable)
{
- WREG32_FIELD15(NBIO, 0, BIF_DOORBELL_INT_CNTL,
+ if (adev->asic_type == CHIP_ALDEBARAN)
+ WREG32_FIELD15(NBIO, 0, BIF_DOORBELL_INT_CNTL_ALDE,
+ DOORBELL_INTERRUPT_DISABLE, enable ? 0 : 1);
+ else
+ WREG32_FIELD15(NBIO, 0, BIF_DOORBELL_INT_CNTL,
DOORBELL_INTERRUPT_DISABLE, enable ? 0 : 1);
}
+const struct amdgpu_ras_block_hw_ops nbio_v7_4_ras_hw_ops = {
+ .query_ras_error_count = nbio_v7_4_query_ras_error_count,
+};
+
+struct amdgpu_nbio_ras nbio_v7_4_ras = {
+ .ras_block = {
+ .ras_comm = {
+ .name = "pcie_bif",
+ .block = AMDGPU_RAS_BLOCK__PCIE_BIF,
+ .type = AMDGPU_RAS_ERROR__MULTI_UNCORRECTABLE,
+ },
+ .hw_ops = &nbio_v7_4_ras_hw_ops,
+ .ras_late_init = amdgpu_nbio_ras_late_init,
+ },
+ .handle_ras_controller_intr_no_bifring = nbio_v7_4_handle_ras_controller_intr_no_bifring,
+ .handle_ras_err_event_athub_intr_no_bifring = nbio_v7_4_handle_ras_err_event_athub_intr_no_bifring,
+ .init_ras_controller_interrupt = nbio_v7_4_init_ras_controller_interrupt,
+ .init_ras_err_event_athub_interrupt = nbio_v7_4_init_ras_err_event_athub_interrupt,
+};
+
+
+#ifdef CONFIG_PCIEASPM
+static void nbio_v7_4_program_ltr(struct amdgpu_device *adev)
+{
+ uint32_t def, data;
+
+ WREG32_PCIE(smnRCC_EP_DEV0_0_EP_PCIE_TX_LTR_CNTL, 0x75EB);
+
+ def = data = RREG32_PCIE(smnRCC_BIF_STRAP2);
+ data &= ~RCC_BIF_STRAP2__STRAP_LTR_IN_ASPML1_DIS_MASK;
+ if (def != data)
+ WREG32_PCIE(smnRCC_BIF_STRAP2, data);
+
+ def = data = RREG32_PCIE(smnRCC_EP_DEV0_0_EP_PCIE_TX_LTR_CNTL);
+ data &= ~EP_PCIE_TX_LTR_CNTL__LTR_PRIV_MSG_DIS_IN_PM_NON_D0_MASK;
+ if (def != data)
+ WREG32_PCIE(smnRCC_EP_DEV0_0_EP_PCIE_TX_LTR_CNTL, data);
+
+ def = data = RREG32_PCIE(smnBIF_CFG_DEV0_EPF0_DEVICE_CNTL2);
+ data |= BIF_CFG_DEV0_EPF0_DEVICE_CNTL2__LTR_EN_MASK;
+ if (def != data)
+ WREG32_PCIE(smnBIF_CFG_DEV0_EPF0_DEVICE_CNTL2, data);
+}
+#endif
+
+static void nbio_v7_4_program_aspm(struct amdgpu_device *adev)
+{
+#ifdef CONFIG_PCIEASPM
+ uint32_t def, data;
+
+ if (amdgpu_ip_version(adev, NBIO_HWIP, 0) == IP_VERSION(7, 4, 4))
+ return;
+
+ def = data = RREG32_PCIE(smnPCIE_LC_CNTL);
+ data &= ~PCIE_LC_CNTL__LC_L1_INACTIVITY_MASK;
+ data &= ~PCIE_LC_CNTL__LC_L0S_INACTIVITY_MASK;
+ data |= PCIE_LC_CNTL__LC_PMI_TO_L1_DIS_MASK;
+ if (def != data)
+ WREG32_PCIE(smnPCIE_LC_CNTL, data);
+
+ def = data = RREG32_PCIE(smnPCIE_LC_CNTL7);
+ data |= PCIE_LC_CNTL7__LC_NBIF_ASPM_INPUT_EN_MASK;
+ if (def != data)
+ WREG32_PCIE(smnPCIE_LC_CNTL7, data);
+
+ def = data = RREG32_PCIE(smnNBIF_MGCG_CTRL_LCLK);
+ data |= NBIF_MGCG_CTRL_LCLK__NBIF_MGCG_REG_DIS_LCLK_MASK;
+ if (def != data)
+ WREG32_PCIE(smnNBIF_MGCG_CTRL_LCLK, data);
+
+ def = data = RREG32_PCIE(smnPCIE_LC_CNTL3);
+ data |= PCIE_LC_CNTL3__LC_DSC_DONT_ENTER_L23_AFTER_PME_ACK_MASK;
+ if (def != data)
+ WREG32_PCIE(smnPCIE_LC_CNTL3, data);
+
+ def = data = RREG32_PCIE(smnRCC_BIF_STRAP3);
+ data &= ~RCC_BIF_STRAP3__STRAP_VLINK_ASPM_IDLE_TIMER_MASK;
+ data &= ~RCC_BIF_STRAP3__STRAP_VLINK_PM_L1_ENTRY_TIMER_MASK;
+ if (def != data)
+ WREG32_PCIE(smnRCC_BIF_STRAP3, data);
+
+ def = data = RREG32_PCIE(smnRCC_BIF_STRAP5);
+ data &= ~RCC_BIF_STRAP5__STRAP_VLINK_LDN_ENTRY_TIMER_MASK;
+ if (def != data)
+ WREG32_PCIE(smnRCC_BIF_STRAP5, data);
+
+ def = data = RREG32_PCIE(smnBIF_CFG_DEV0_EPF0_DEVICE_CNTL2);
+ data &= ~BIF_CFG_DEV0_EPF0_DEVICE_CNTL2__LTR_EN_MASK;
+ if (def != data)
+ WREG32_PCIE(smnBIF_CFG_DEV0_EPF0_DEVICE_CNTL2, data);
+
+ WREG32_PCIE(smnBIF_CFG_DEV0_EPF0_PCIE_LTR_CAP, 0x10011001);
+
+ def = data = RREG32_PCIE(smnPSWUSP0_PCIE_LC_CNTL2);
+ data |= PSWUSP0_PCIE_LC_CNTL2__LC_ALLOW_PDWN_IN_L1_MASK |
+ PSWUSP0_PCIE_LC_CNTL2__LC_ALLOW_PDWN_IN_L23_MASK;
+ data &= ~PSWUSP0_PCIE_LC_CNTL2__LC_RCV_L0_TO_RCV_L0S_DIS_MASK;
+ if (def != data)
+ WREG32_PCIE(smnPSWUSP0_PCIE_LC_CNTL2, data);
+
+ def = data = RREG32_PCIE(smnPCIE_LC_CNTL6);
+ data |= PCIE_LC_CNTL6__LC_L1_POWERDOWN_MASK |
+ PCIE_LC_CNTL6__LC_RX_L0S_STANDBY_EN_MASK;
+ if (def != data)
+ WREG32_PCIE(smnPCIE_LC_CNTL6, data);
+
+ /* Don't bother about LTR if LTR is not enabled
+ * in the path */
+ if (adev->pdev->ltr_path)
+ nbio_v7_4_program_ltr(adev);
+
+ def = data = RREG32_PCIE(smnRCC_BIF_STRAP3);
+ data |= 0x5DE0 << RCC_BIF_STRAP3__STRAP_VLINK_ASPM_IDLE_TIMER__SHIFT;
+ data |= 0x0010 << RCC_BIF_STRAP3__STRAP_VLINK_PM_L1_ENTRY_TIMER__SHIFT;
+ if (def != data)
+ WREG32_PCIE(smnRCC_BIF_STRAP3, data);
+
+ def = data = RREG32_PCIE(smnRCC_BIF_STRAP5);
+ data |= 0x0010 << RCC_BIF_STRAP5__STRAP_VLINK_LDN_ENTRY_TIMER__SHIFT;
+ if (def != data)
+ WREG32_PCIE(smnRCC_BIF_STRAP5, data);
+
+ def = data = RREG32_PCIE(smnPCIE_LC_CNTL);
+ data &= ~PCIE_LC_CNTL__LC_L0S_INACTIVITY_MASK;
+ data |= 0x9 << PCIE_LC_CNTL__LC_L1_INACTIVITY__SHIFT;
+ data |= 0x1 << PCIE_LC_CNTL__LC_PMI_TO_L1_DIS__SHIFT;
+ if (def != data)
+ WREG32_PCIE(smnPCIE_LC_CNTL, data);
+
+ def = data = RREG32_PCIE(smnPCIE_LC_CNTL3);
+ data &= ~PCIE_LC_CNTL3__LC_DSC_DONT_ENTER_L23_AFTER_PME_ACK_MASK;
+ if (def != data)
+ WREG32_PCIE(smnPCIE_LC_CNTL3, data);
+#endif
+}
+
+#define MMIO_REG_HOLE_OFFSET (0x80000 - PAGE_SIZE)
+
+static void nbio_v7_4_set_reg_remap(struct amdgpu_device *adev)
+{
+ if (!amdgpu_sriov_vf(adev) && (PAGE_SIZE <= 4096)) {
+ adev->rmmio_remap.reg_offset = MMIO_REG_HOLE_OFFSET;
+ adev->rmmio_remap.bus_addr = adev->rmmio_base + MMIO_REG_HOLE_OFFSET;
+ } else {
+ adev->rmmio_remap.reg_offset =
+ SOC15_REG_OFFSET(NBIO, 0,
+ mmBIF_BX_DEV0_EPF0_VF0_HDP_MEM_COHERENCY_FLUSH_CNTL) << 2;
+ adev->rmmio_remap.bus_addr = 0;
+ }
+}
+
const struct amdgpu_nbio_funcs nbio_v7_4_funcs = {
.get_hdp_flush_req_offset = nbio_v7_4_get_hdp_flush_req_offset,
.get_hdp_flush_done_offset = nbio_v7_4_get_hdp_flush_done_offset,
@@ -545,10 +822,6 @@ const struct amdgpu_nbio_funcs nbio_v7_4_funcs = {
.ih_control = nbio_v7_4_ih_control,
.init_registers = nbio_v7_4_init_registers,
.remap_hdp_registers = nbio_v7_4_remap_hdp_registers,
- .handle_ras_controller_intr_no_bifring = nbio_v7_4_handle_ras_controller_intr_no_bifring,
- .handle_ras_err_event_athub_intr_no_bifring = nbio_v7_4_handle_ras_err_event_athub_intr_no_bifring,
- .init_ras_controller_interrupt = nbio_v7_4_init_ras_controller_interrupt,
- .init_ras_err_event_athub_interrupt = nbio_v7_4_init_ras_err_event_athub_interrupt,
- .query_ras_error_count = nbio_v7_4_query_ras_error_count,
- .ras_late_init = amdgpu_nbio_ras_late_init,
+ .program_aspm = nbio_v7_4_program_aspm,
+ .set_reg_remap = nbio_v7_4_set_reg_remap,
};
diff --git a/drivers/gpu/drm/amd/amdgpu/nbio_v7_4.h b/drivers/gpu/drm/amd/amdgpu/nbio_v7_4.h
index b1ac82872752..f27c41728822 100644
--- a/drivers/gpu/drm/amd/amdgpu/nbio_v7_4.h
+++ b/drivers/gpu/drm/amd/amdgpu/nbio_v7_4.h
@@ -28,5 +28,6 @@
extern const struct nbio_hdp_flush_reg nbio_v7_4_hdp_flush_reg;
extern const struct amdgpu_nbio_funcs nbio_v7_4_funcs;
+extern struct amdgpu_nbio_ras nbio_v7_4_ras;
#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/nbio_v7_7.c b/drivers/gpu/drm/amd/amdgpu/nbio_v7_7.c
new file mode 100644
index 000000000000..2ee60b8746a6
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/nbio_v7_7.c
@@ -0,0 +1,369 @@
+/*
+ * Copyright 2021 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+#include "amdgpu.h"
+#include "nbio_v7_7.h"
+
+#include "nbio/nbio_7_7_0_offset.h"
+#include "nbio/nbio_7_7_0_sh_mask.h"
+#include <uapi/linux/kfd_ioctl.h>
+
+static void nbio_v7_7_remap_hdp_registers(struct amdgpu_device *adev)
+{
+ WREG32_SOC15(NBIO, 0, regBIF_BX0_REMAP_HDP_MEM_FLUSH_CNTL,
+ adev->rmmio_remap.reg_offset + KFD_MMIO_REMAP_HDP_MEM_FLUSH_CNTL);
+ WREG32_SOC15(NBIO, 0, regBIF_BX0_REMAP_HDP_REG_FLUSH_CNTL,
+ adev->rmmio_remap.reg_offset + KFD_MMIO_REMAP_HDP_REG_FLUSH_CNTL);
+}
+
+static u32 nbio_v7_7_get_rev_id(struct amdgpu_device *adev)
+{
+ u32 tmp;
+
+ tmp = RREG32_SOC15(NBIO, 0, regRCC_STRAP0_RCC_DEV0_EPF0_STRAP0);
+ tmp &= RCC_STRAP0_RCC_DEV0_EPF0_STRAP0__STRAP_ATI_REV_ID_DEV0_F0_MASK;
+ tmp >>= RCC_STRAP0_RCC_DEV0_EPF0_STRAP0__STRAP_ATI_REV_ID_DEV0_F0__SHIFT;
+
+ return tmp;
+}
+
+static void nbio_v7_7_mc_access_enable(struct amdgpu_device *adev, bool enable)
+{
+ if (enable)
+ WREG32_SOC15(NBIO, 0, regBIF_BX1_BIF_FB_EN,
+ BIF_BX1_BIF_FB_EN__FB_READ_EN_MASK |
+ BIF_BX1_BIF_FB_EN__FB_WRITE_EN_MASK);
+ else
+ WREG32_SOC15(NBIO, 0, regBIF_BX1_BIF_FB_EN, 0);
+}
+
+static u32 nbio_v7_7_get_memsize(struct amdgpu_device *adev)
+{
+ return RREG32_SOC15(NBIO, 0, regRCC_DEV0_EPF0_0_RCC_CONFIG_MEMSIZE);
+}
+
+static void nbio_v7_7_sdma_doorbell_range(struct amdgpu_device *adev, int instance,
+ bool use_doorbell, int doorbell_index,
+ int doorbell_size)
+{
+ u32 reg = SOC15_REG_OFFSET(NBIO, 0, regGDC0_BIF_CSDMA_DOORBELL_RANGE);
+ u32 doorbell_range = RREG32_PCIE_PORT(reg);
+
+ if (use_doorbell) {
+ doorbell_range = REG_SET_FIELD(doorbell_range,
+ GDC0_BIF_CSDMA_DOORBELL_RANGE,
+ OFFSET, doorbell_index);
+ doorbell_range = REG_SET_FIELD(doorbell_range,
+ GDC0_BIF_CSDMA_DOORBELL_RANGE,
+ SIZE, doorbell_size);
+ } else {
+ doorbell_range = REG_SET_FIELD(doorbell_range,
+ GDC0_BIF_SDMA0_DOORBELL_RANGE,
+ SIZE, 0);
+ }
+
+ WREG32_PCIE_PORT(reg, doorbell_range);
+}
+
+static void nbio_v7_7_vcn_doorbell_range(struct amdgpu_device *adev, bool use_doorbell,
+ int doorbell_index, int instance)
+{
+ u32 reg = SOC15_REG_OFFSET(NBIO, 0, regGDC0_BIF_VCN0_DOORBELL_RANGE);
+ u32 doorbell_range = RREG32_PCIE_PORT(reg);
+
+ if (use_doorbell) {
+ doorbell_range = REG_SET_FIELD(doorbell_range,
+ GDC0_BIF_VCN0_DOORBELL_RANGE, OFFSET,
+ doorbell_index);
+ doorbell_range = REG_SET_FIELD(doorbell_range,
+ GDC0_BIF_VCN0_DOORBELL_RANGE, SIZE, 8);
+ } else {
+ doorbell_range = REG_SET_FIELD(doorbell_range,
+ GDC0_BIF_VCN0_DOORBELL_RANGE, SIZE, 0);
+ }
+
+ WREG32_PCIE_PORT(reg, doorbell_range);
+}
+
+static void nbio_v7_7_enable_doorbell_aperture(struct amdgpu_device *adev,
+ bool enable)
+{
+ u32 reg;
+
+ reg = RREG32_SOC15(NBIO, 0, regRCC_DEV0_EPF0_0_RCC_DOORBELL_APER_EN);
+ reg = REG_SET_FIELD(reg, RCC_DEV0_EPF0_0_RCC_DOORBELL_APER_EN,
+ BIF_DOORBELL_APER_EN, enable ? 1 : 0);
+
+ WREG32_SOC15(NBIO, 0, regRCC_DEV0_EPF0_0_RCC_DOORBELL_APER_EN, reg);
+}
+
+static void nbio_v7_7_enable_doorbell_selfring_aperture(struct amdgpu_device *adev,
+ bool enable)
+{
+ u32 tmp = 0;
+
+ if (enable) {
+ tmp = REG_SET_FIELD(tmp, BIF_BX_PF0_DOORBELL_SELFRING_GPA_APER_CNTL,
+ DOORBELL_SELFRING_GPA_APER_EN, 1) |
+ REG_SET_FIELD(tmp, BIF_BX_PF0_DOORBELL_SELFRING_GPA_APER_CNTL,
+ DOORBELL_SELFRING_GPA_APER_MODE, 1) |
+ REG_SET_FIELD(tmp, BIF_BX_PF0_DOORBELL_SELFRING_GPA_APER_CNTL,
+ DOORBELL_SELFRING_GPA_APER_SIZE, 0);
+
+ WREG32_SOC15(NBIO, 0,
+ regBIF_BX_PF0_DOORBELL_SELFRING_GPA_APER_BASE_LOW,
+ lower_32_bits(adev->doorbell.base));
+ WREG32_SOC15(NBIO, 0,
+ regBIF_BX_PF0_DOORBELL_SELFRING_GPA_APER_BASE_HIGH,
+ upper_32_bits(adev->doorbell.base));
+ }
+
+ WREG32_SOC15(NBIO, 0, regBIF_BX_PF0_DOORBELL_SELFRING_GPA_APER_CNTL,
+ tmp);
+}
+
+
+static void nbio_v7_7_ih_doorbell_range(struct amdgpu_device *adev,
+ bool use_doorbell, int doorbell_index)
+{
+ u32 ih_doorbell_range = RREG32_SOC15(NBIO, 0,
+ regGDC0_BIF_IH_DOORBELL_RANGE);
+
+ if (use_doorbell) {
+ ih_doorbell_range = REG_SET_FIELD(ih_doorbell_range,
+ GDC0_BIF_IH_DOORBELL_RANGE, OFFSET,
+ doorbell_index);
+ ih_doorbell_range = REG_SET_FIELD(ih_doorbell_range,
+ GDC0_BIF_IH_DOORBELL_RANGE, SIZE,
+ 2);
+ } else {
+ ih_doorbell_range = REG_SET_FIELD(ih_doorbell_range,
+ GDC0_BIF_IH_DOORBELL_RANGE, SIZE,
+ 0);
+ }
+
+ WREG32_SOC15(NBIO, 0, regGDC0_BIF_IH_DOORBELL_RANGE,
+ ih_doorbell_range);
+}
+
+static void nbio_v7_7_ih_control(struct amdgpu_device *adev)
+{
+ u32 interrupt_cntl;
+
+ /* setup interrupt control */
+ WREG32_SOC15(NBIO, 0, regBIF_BX1_INTERRUPT_CNTL2,
+ adev->dummy_page_addr >> 8);
+
+ interrupt_cntl = RREG32_SOC15(NBIO, 0, regBIF_BX1_INTERRUPT_CNTL);
+ /*
+ * INTERRUPT_CNTL__IH_DUMMY_RD_OVERRIDE_MASK=0 - dummy read disabled with msi, enabled without msi
+ * INTERRUPT_CNTL__IH_DUMMY_RD_OVERRIDE_MASK=1 - dummy read controlled by IH_DUMMY_RD_EN
+ */
+ interrupt_cntl = REG_SET_FIELD(interrupt_cntl, BIF_BX1_INTERRUPT_CNTL,
+ IH_DUMMY_RD_OVERRIDE, 0);
+
+ /* INTERRUPT_CNTL__IH_REQ_NONSNOOP_EN_MASK=1 if ring is in non-cacheable memory, e.g., vram */
+ interrupt_cntl = REG_SET_FIELD(interrupt_cntl, BIF_BX1_INTERRUPT_CNTL,
+ IH_REQ_NONSNOOP_EN, 0);
+
+ WREG32_SOC15(NBIO, 0, regBIF_BX1_INTERRUPT_CNTL, interrupt_cntl);
+}
+
+static u32 nbio_v7_7_get_hdp_flush_req_offset(struct amdgpu_device *adev)
+{
+ return SOC15_REG_OFFSET(NBIO, 0, regBIF_BX_PF0_GPU_HDP_FLUSH_REQ);
+}
+
+static u32 nbio_v7_7_get_hdp_flush_done_offset(struct amdgpu_device *adev)
+{
+ return SOC15_REG_OFFSET(NBIO, 0, regBIF_BX_PF0_GPU_HDP_FLUSH_DONE);
+}
+
+static u32 nbio_v7_7_get_pcie_index_offset(struct amdgpu_device *adev)
+{
+ return SOC15_REG_OFFSET(NBIO, 0, regBIF_BX0_PCIE_INDEX2);
+}
+
+static u32 nbio_v7_7_get_pcie_data_offset(struct amdgpu_device *adev)
+{
+ return SOC15_REG_OFFSET(NBIO, 0, regBIF_BX0_PCIE_DATA2);
+}
+
+static u32 nbio_v7_7_get_pcie_port_index_offset(struct amdgpu_device *adev)
+{
+ return SOC15_REG_OFFSET(NBIO, 0, regBIF_BX_PF0_RSMU_INDEX);
+}
+
+static u32 nbio_v7_7_get_pcie_port_data_offset(struct amdgpu_device *adev)
+{
+ return SOC15_REG_OFFSET(NBIO, 0, regBIF_BX_PF0_RSMU_DATA);
+}
+
+const struct nbio_hdp_flush_reg nbio_v7_7_hdp_flush_reg = {
+ .ref_and_mask_cp0 = BIF_BX_PF0_GPU_HDP_FLUSH_DONE__CP0_MASK,
+ .ref_and_mask_cp1 = BIF_BX_PF0_GPU_HDP_FLUSH_DONE__CP1_MASK,
+ .ref_and_mask_cp2 = BIF_BX_PF0_GPU_HDP_FLUSH_DONE__CP2_MASK,
+ .ref_and_mask_cp3 = BIF_BX_PF0_GPU_HDP_FLUSH_DONE__CP3_MASK,
+ .ref_and_mask_cp4 = BIF_BX_PF0_GPU_HDP_FLUSH_DONE__CP4_MASK,
+ .ref_and_mask_cp5 = BIF_BX_PF0_GPU_HDP_FLUSH_DONE__CP5_MASK,
+ .ref_and_mask_cp6 = BIF_BX_PF0_GPU_HDP_FLUSH_DONE__CP6_MASK,
+ .ref_and_mask_cp7 = BIF_BX_PF0_GPU_HDP_FLUSH_DONE__CP7_MASK,
+ .ref_and_mask_cp8 = BIF_BX_PF0_GPU_HDP_FLUSH_DONE__CP8_MASK,
+ .ref_and_mask_cp9 = BIF_BX_PF0_GPU_HDP_FLUSH_DONE__CP9_MASK,
+ .ref_and_mask_sdma0 = BIF_BX_PF0_GPU_HDP_FLUSH_DONE__SDMA0_MASK,
+ .ref_and_mask_sdma1 = BIF_BX_PF0_GPU_HDP_FLUSH_DONE__SDMA1_MASK,
+};
+
+static void nbio_v7_7_init_registers(struct amdgpu_device *adev)
+{
+ uint32_t def, data;
+
+ def = data = RREG32_SOC15(NBIO, 0, regBIF0_PCIE_MST_CTRL_3);
+ data = REG_SET_FIELD(data, BIF0_PCIE_MST_CTRL_3,
+ CI_SWUS_MAX_READ_REQUEST_SIZE_MODE, 1);
+ data = REG_SET_FIELD(data, BIF0_PCIE_MST_CTRL_3,
+ CI_SWUS_MAX_READ_REQUEST_SIZE_PRIV, 1);
+
+ if (def != data)
+ WREG32_SOC15(NBIO, 0, regBIF0_PCIE_MST_CTRL_3, data);
+
+ switch (amdgpu_ip_version(adev, NBIO_HWIP, 0)) {
+ case IP_VERSION(7, 7, 0):
+ data = RREG32_SOC15(NBIO, 0, regRCC_DEV0_EPF5_STRAP4) & ~BIT(23);
+ WREG32_SOC15(NBIO, 0, regRCC_DEV0_EPF5_STRAP4, data);
+ break;
+ }
+}
+
+static void nbio_v7_7_update_medium_grain_clock_gating(struct amdgpu_device *adev,
+ bool enable)
+{
+ uint32_t def, data;
+
+ if (!(adev->cg_flags & AMD_CG_SUPPORT_BIF_MGCG))
+ return;
+
+ def = data = RREG32_SOC15(NBIO, 0, regBIF0_CPM_CONTROL);
+ if (enable) {
+ data |= (BIF0_CPM_CONTROL__LCLK_DYN_GATE_ENABLE_MASK |
+ BIF0_CPM_CONTROL__TXCLK_DYN_GATE_ENABLE_MASK |
+ BIF0_CPM_CONTROL__TXCLK_LCNT_GATE_ENABLE_MASK |
+ BIF0_CPM_CONTROL__TXCLK_REGS_GATE_ENABLE_MASK |
+ BIF0_CPM_CONTROL__TXCLK_PRBS_GATE_ENABLE_MASK |
+ BIF0_CPM_CONTROL__REFCLK_REGS_GATE_ENABLE_MASK);
+ } else {
+ data &= ~(BIF0_CPM_CONTROL__LCLK_DYN_GATE_ENABLE_MASK |
+ BIF0_CPM_CONTROL__TXCLK_DYN_GATE_ENABLE_MASK |
+ BIF0_CPM_CONTROL__TXCLK_LCNT_GATE_ENABLE_MASK |
+ BIF0_CPM_CONTROL__TXCLK_REGS_GATE_ENABLE_MASK |
+ BIF0_CPM_CONTROL__TXCLK_PRBS_GATE_ENABLE_MASK |
+ BIF0_CPM_CONTROL__REFCLK_REGS_GATE_ENABLE_MASK);
+ }
+
+ if (def != data)
+ WREG32_SOC15(NBIO, 0, regBIF0_CPM_CONTROL, data);
+}
+
+static void nbio_v7_7_update_medium_grain_light_sleep(struct amdgpu_device *adev,
+ bool enable)
+{
+ uint32_t def, data;
+
+ if (!(adev->cg_flags & AMD_CG_SUPPORT_BIF_LS))
+ return;
+
+ def = data = RREG32_SOC15(NBIO, 0, regBIF0_PCIE_CNTL2);
+ if (enable)
+ data |= BIF0_PCIE_CNTL2__SLV_MEM_LS_EN_MASK;
+ else
+ data &= ~BIF0_PCIE_CNTL2__SLV_MEM_LS_EN_MASK;
+
+ if (def != data)
+ WREG32_SOC15(NBIO, 0, regBIF0_PCIE_CNTL2, data);
+
+ def = data = RREG32_SOC15(NBIO, 0, regBIF0_PCIE_TX_POWER_CTRL_1);
+ if (enable) {
+ data |= (BIF0_PCIE_TX_POWER_CTRL_1__MST_MEM_LS_EN_MASK |
+ BIF0_PCIE_TX_POWER_CTRL_1__REPLAY_MEM_LS_EN_MASK);
+ } else {
+ data &= ~(BIF0_PCIE_TX_POWER_CTRL_1__MST_MEM_LS_EN_MASK |
+ BIF0_PCIE_TX_POWER_CTRL_1__REPLAY_MEM_LS_EN_MASK);
+ }
+
+ if (def != data)
+ WREG32_SOC15(NBIO, 0, regBIF0_PCIE_TX_POWER_CTRL_1, data);
+}
+
+static void nbio_v7_7_get_clockgating_state(struct amdgpu_device *adev,
+ u64 *flags)
+{
+ uint32_t data;
+
+ /* AMD_CG_SUPPORT_BIF_MGCG */
+ data = RREG32_SOC15(NBIO, 0, regBIF0_CPM_CONTROL);
+ if (data & BIF0_CPM_CONTROL__LCLK_DYN_GATE_ENABLE_MASK)
+ *flags |= AMD_CG_SUPPORT_BIF_MGCG;
+
+ /* AMD_CG_SUPPORT_BIF_LS */
+ data = RREG32_SOC15(NBIO, 0, regBIF0_PCIE_CNTL2);
+ if (data & BIF0_PCIE_CNTL2__SLV_MEM_LS_EN_MASK)
+ *flags |= AMD_CG_SUPPORT_BIF_LS;
+}
+
+#define MMIO_REG_HOLE_OFFSET (0x80000 - PAGE_SIZE)
+
+static void nbio_v7_7_set_reg_remap(struct amdgpu_device *adev)
+{
+ if (!amdgpu_sriov_vf(adev) && (PAGE_SIZE <= 4096)) {
+ adev->rmmio_remap.reg_offset = MMIO_REG_HOLE_OFFSET;
+ adev->rmmio_remap.bus_addr = adev->rmmio_base + MMIO_REG_HOLE_OFFSET;
+ } else {
+ adev->rmmio_remap.reg_offset =
+ SOC15_REG_OFFSET(NBIO, 0,
+ regBIF_BX_PF0_HDP_MEM_COHERENCY_FLUSH_CNTL) << 2;
+ adev->rmmio_remap.bus_addr = 0;
+ }
+}
+
+const struct amdgpu_nbio_funcs nbio_v7_7_funcs = {
+ .get_hdp_flush_req_offset = nbio_v7_7_get_hdp_flush_req_offset,
+ .get_hdp_flush_done_offset = nbio_v7_7_get_hdp_flush_done_offset,
+ .get_pcie_index_offset = nbio_v7_7_get_pcie_index_offset,
+ .get_pcie_data_offset = nbio_v7_7_get_pcie_data_offset,
+ .get_pcie_port_index_offset = nbio_v7_7_get_pcie_port_index_offset,
+ .get_pcie_port_data_offset = nbio_v7_7_get_pcie_port_data_offset,
+ .get_rev_id = nbio_v7_7_get_rev_id,
+ .mc_access_enable = nbio_v7_7_mc_access_enable,
+ .get_memsize = nbio_v7_7_get_memsize,
+ .sdma_doorbell_range = nbio_v7_7_sdma_doorbell_range,
+ .vcn_doorbell_range = nbio_v7_7_vcn_doorbell_range,
+ .enable_doorbell_aperture = nbio_v7_7_enable_doorbell_aperture,
+ .enable_doorbell_selfring_aperture = nbio_v7_7_enable_doorbell_selfring_aperture,
+ .ih_doorbell_range = nbio_v7_7_ih_doorbell_range,
+ .update_medium_grain_clock_gating = nbio_v7_7_update_medium_grain_clock_gating,
+ .update_medium_grain_light_sleep = nbio_v7_7_update_medium_grain_light_sleep,
+ .get_clockgating_state = nbio_v7_7_get_clockgating_state,
+ .ih_control = nbio_v7_7_ih_control,
+ .init_registers = nbio_v7_7_init_registers,
+ .remap_hdp_registers = nbio_v7_7_remap_hdp_registers,
+ .set_reg_remap = nbio_v7_7_set_reg_remap,
+};
diff --git a/drivers/gpu/drm/amd/amdgpu/nbio_v7_7.h b/drivers/gpu/drm/amd/amdgpu/nbio_v7_7.h
new file mode 100644
index 000000000000..2a33b256ba81
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/nbio_v7_7.h
@@ -0,0 +1,33 @@
+/*
+ * Copyright 2021 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#ifndef __NBIO_V7_7_H__
+#define __NBIO_V7_7_H__
+
+#include "soc15_common.h"
+
+extern const struct nbio_hdp_flush_reg nbio_v7_7_hdp_flush_reg;
+extern const struct amdgpu_nbio_funcs nbio_v7_7_funcs;
+extern const struct amdgpu_nbio_ras_funcs nbio_v7_7_ras_funcs;
+
+#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/nbio_v7_9.c b/drivers/gpu/drm/amd/amdgpu/nbio_v7_9.c
new file mode 100644
index 000000000000..bdfd2917e3ca
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/nbio_v7_9.c
@@ -0,0 +1,696 @@
+/*
+ * Copyright 2022 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+#include "amdgpu.h"
+#include "nbio_v7_9.h"
+#include "amdgpu_ras.h"
+
+#include "nbio/nbio_7_9_0_offset.h"
+#include "nbio/nbio_7_9_0_sh_mask.h"
+#include "ivsrcid/nbio/irqsrcs_nbif_7_4.h"
+#include <uapi/linux/kfd_ioctl.h>
+
+#define NPS_MODE_MASK 0x000000FFL
+
+static void nbio_v7_9_remap_hdp_registers(struct amdgpu_device *adev)
+{
+ WREG32_SOC15(NBIO, 0, regBIF_BX0_REMAP_HDP_MEM_FLUSH_CNTL,
+ adev->rmmio_remap.reg_offset + KFD_MMIO_REMAP_HDP_MEM_FLUSH_CNTL);
+ WREG32_SOC15(NBIO, 0, regBIF_BX0_REMAP_HDP_REG_FLUSH_CNTL,
+ adev->rmmio_remap.reg_offset + KFD_MMIO_REMAP_HDP_REG_FLUSH_CNTL);
+}
+
+static u32 nbio_v7_9_get_rev_id(struct amdgpu_device *adev)
+{
+ u32 rev_id;
+
+ /*
+ * fetch the sub-revision field from the IP-discovery table
+ * (returns zero if the table entry is not populated).
+ */
+ if (amdgpu_sriov_vf(adev)) {
+ rev_id = IP_VERSION_SUBREV(amdgpu_ip_version_full(adev, NBIO_HWIP, 0));
+ } else {
+ rev_id = RREG32_SOC15(NBIO, 0, regRCC_STRAP0_RCC_DEV0_EPF0_STRAP0);
+ rev_id = REG_GET_FIELD(rev_id, RCC_STRAP0_RCC_DEV0_EPF0_STRAP0,
+ STRAP_ATI_REV_ID_DEV0_F0);
+ }
+
+ return rev_id;
+}
+
+static void nbio_v7_9_mc_access_enable(struct amdgpu_device *adev, bool enable)
+{
+ if (enable)
+ WREG32_SOC15(NBIO, 0, regBIF_BX0_BIF_FB_EN,
+ BIF_BX0_BIF_FB_EN__FB_READ_EN_MASK | BIF_BX0_BIF_FB_EN__FB_WRITE_EN_MASK);
+ else
+ WREG32_SOC15(NBIO, 0, regBIF_BX0_BIF_FB_EN, 0);
+}
+
+static u32 nbio_v7_9_get_memsize(struct amdgpu_device *adev)
+{
+ return RREG32_SOC15(NBIO, 0, regRCC_DEV0_EPF0_RCC_CONFIG_MEMSIZE);
+}
+
+static void nbio_v7_9_sdma_doorbell_range(struct amdgpu_device *adev, int instance,
+ bool use_doorbell, int doorbell_index, int doorbell_size)
+{
+ u32 doorbell_range = 0, doorbell_ctrl = 0;
+ int aid_id, dev_inst;
+
+ dev_inst = GET_INST(SDMA0, instance);
+ aid_id = adev->sdma.instance[instance].aid_id;
+
+ if (use_doorbell == false)
+ return;
+
+ doorbell_range =
+ REG_SET_FIELD(doorbell_range, DOORBELL0_CTRL_ENTRY_0,
+ BIF_DOORBELL0_RANGE_OFFSET_ENTRY, doorbell_index);
+ doorbell_range =
+ REG_SET_FIELD(doorbell_range, DOORBELL0_CTRL_ENTRY_0,
+ BIF_DOORBELL0_RANGE_SIZE_ENTRY, doorbell_size);
+ doorbell_ctrl =
+ REG_SET_FIELD(doorbell_ctrl, S2A_DOORBELL_ENTRY_1_CTRL,
+ S2A_DOORBELL_PORT1_ENABLE, 1);
+ doorbell_ctrl =
+ REG_SET_FIELD(doorbell_ctrl, S2A_DOORBELL_ENTRY_1_CTRL,
+ S2A_DOORBELL_PORT1_RANGE_SIZE, doorbell_size);
+
+ switch (dev_inst % adev->sdma.num_inst_per_aid) {
+ case 0:
+ WREG32_SOC15_OFFSET(NBIO, 0, regDOORBELL0_CTRL_ENTRY_1,
+ 4 * aid_id, doorbell_range);
+
+ doorbell_ctrl = REG_SET_FIELD(doorbell_ctrl,
+ S2A_DOORBELL_ENTRY_1_CTRL,
+ S2A_DOORBELL_PORT1_AWID, 0xe);
+ doorbell_ctrl = REG_SET_FIELD(doorbell_ctrl,
+ S2A_DOORBELL_ENTRY_1_CTRL,
+ S2A_DOORBELL_PORT1_RANGE_OFFSET, 0xe);
+ doorbell_ctrl = REG_SET_FIELD(doorbell_ctrl,
+ S2A_DOORBELL_ENTRY_1_CTRL,
+ S2A_DOORBELL_PORT1_AWADDR_31_28_VALUE,
+ 0x1);
+ WREG32_SOC15_EXT(NBIO, aid_id, regS2A_DOORBELL_ENTRY_1_CTRL,
+ aid_id, doorbell_ctrl);
+ break;
+ case 1:
+ WREG32_SOC15_OFFSET(NBIO, 0, regDOORBELL0_CTRL_ENTRY_2,
+ 4 * aid_id, doorbell_range);
+
+ doorbell_ctrl = REG_SET_FIELD(doorbell_ctrl,
+ S2A_DOORBELL_ENTRY_1_CTRL,
+ S2A_DOORBELL_PORT1_AWID, 0x8);
+ doorbell_ctrl = REG_SET_FIELD(doorbell_ctrl,
+ S2A_DOORBELL_ENTRY_1_CTRL,
+ S2A_DOORBELL_PORT1_RANGE_OFFSET, 0x8);
+ doorbell_ctrl = REG_SET_FIELD(doorbell_ctrl,
+ S2A_DOORBELL_ENTRY_1_CTRL,
+ S2A_DOORBELL_PORT1_AWADDR_31_28_VALUE,
+ 0x2);
+ WREG32_SOC15_EXT(NBIO, aid_id, regS2A_DOORBELL_ENTRY_2_CTRL,
+ aid_id, doorbell_ctrl);
+ break;
+ case 2:
+ WREG32_SOC15_OFFSET(NBIO, 0, regDOORBELL0_CTRL_ENTRY_3,
+ 4 * aid_id, doorbell_range);
+
+ doorbell_ctrl = REG_SET_FIELD(doorbell_ctrl,
+ S2A_DOORBELL_ENTRY_1_CTRL,
+ S2A_DOORBELL_PORT1_AWID, 0x9);
+ doorbell_ctrl = REG_SET_FIELD(doorbell_ctrl,
+ S2A_DOORBELL_ENTRY_1_CTRL,
+ S2A_DOORBELL_PORT1_RANGE_OFFSET, 0x9);
+ doorbell_ctrl = REG_SET_FIELD(doorbell_ctrl,
+ S2A_DOORBELL_ENTRY_1_CTRL,
+ S2A_DOORBELL_PORT1_AWADDR_31_28_VALUE,
+ 0x8);
+ WREG32_SOC15_EXT(NBIO, aid_id, regS2A_DOORBELL_ENTRY_5_CTRL,
+ aid_id, doorbell_ctrl);
+ break;
+ case 3:
+ WREG32_SOC15_OFFSET(NBIO, 0, regDOORBELL0_CTRL_ENTRY_4,
+ 4 * aid_id, doorbell_range);
+
+ doorbell_ctrl = REG_SET_FIELD(doorbell_ctrl,
+ S2A_DOORBELL_ENTRY_1_CTRL,
+ S2A_DOORBELL_PORT1_AWID, 0xa);
+ doorbell_ctrl = REG_SET_FIELD(doorbell_ctrl,
+ S2A_DOORBELL_ENTRY_1_CTRL,
+ S2A_DOORBELL_PORT1_RANGE_OFFSET, 0xa);
+ doorbell_ctrl = REG_SET_FIELD(doorbell_ctrl,
+ S2A_DOORBELL_ENTRY_1_CTRL,
+ S2A_DOORBELL_PORT1_AWADDR_31_28_VALUE,
+ 0x9);
+ WREG32_SOC15_EXT(NBIO, aid_id, regS2A_DOORBELL_ENTRY_6_CTRL,
+ aid_id, doorbell_ctrl);
+ break;
+ default:
+ break;
+ }
+}
+
+static void nbio_v7_9_vcn_doorbell_range(struct amdgpu_device *adev, bool use_doorbell,
+ int doorbell_index, int instance)
+{
+ u32 doorbell_range = 0, doorbell_ctrl = 0;
+ u32 aid_id = instance;
+ u32 range_size;
+
+ if (use_doorbell) {
+ range_size = (amdgpu_ip_version(adev, GC_HWIP, 0) ==
+ IP_VERSION(9, 5, 0)) ?
+ 0xb : 0x9;
+ doorbell_range = REG_SET_FIELD(doorbell_range,
+ DOORBELL0_CTRL_ENTRY_0,
+ BIF_DOORBELL0_RANGE_OFFSET_ENTRY,
+ doorbell_index);
+ doorbell_range = REG_SET_FIELD(doorbell_range,
+ DOORBELL0_CTRL_ENTRY_0,
+ BIF_DOORBELL0_RANGE_SIZE_ENTRY,
+ range_size);
+ if (aid_id)
+ doorbell_range = REG_SET_FIELD(doorbell_range,
+ DOORBELL0_CTRL_ENTRY_0,
+ DOORBELL0_FENCE_ENABLE_ENTRY,
+ 0x4);
+
+ doorbell_ctrl = REG_SET_FIELD(doorbell_ctrl,
+ S2A_DOORBELL_ENTRY_1_CTRL,
+ S2A_DOORBELL_PORT1_ENABLE, 1);
+ doorbell_ctrl = REG_SET_FIELD(doorbell_ctrl,
+ S2A_DOORBELL_ENTRY_1_CTRL,
+ S2A_DOORBELL_PORT1_AWID, 0x4);
+ doorbell_ctrl = REG_SET_FIELD(doorbell_ctrl,
+ S2A_DOORBELL_ENTRY_1_CTRL,
+ S2A_DOORBELL_PORT1_RANGE_OFFSET, 0x4);
+ doorbell_ctrl = REG_SET_FIELD(doorbell_ctrl,
+ S2A_DOORBELL_ENTRY_1_CTRL,
+ S2A_DOORBELL_PORT1_RANGE_SIZE, range_size);
+ doorbell_ctrl = REG_SET_FIELD(doorbell_ctrl,
+ S2A_DOORBELL_ENTRY_1_CTRL,
+ S2A_DOORBELL_PORT1_AWADDR_31_28_VALUE, 0x4);
+
+ WREG32_SOC15_OFFSET(NBIO, 0, regDOORBELL0_CTRL_ENTRY_17,
+ aid_id, doorbell_range);
+ WREG32_SOC15_EXT(NBIO, aid_id, regS2A_DOORBELL_ENTRY_4_CTRL,
+ aid_id, doorbell_ctrl);
+ } else {
+ doorbell_range = REG_SET_FIELD(doorbell_range,
+ DOORBELL0_CTRL_ENTRY_0,
+ BIF_DOORBELL0_RANGE_SIZE_ENTRY, 0);
+ doorbell_ctrl = REG_SET_FIELD(doorbell_ctrl,
+ S2A_DOORBELL_ENTRY_1_CTRL,
+ S2A_DOORBELL_PORT1_RANGE_SIZE, 0);
+
+ WREG32_SOC15_OFFSET(NBIO, 0, regDOORBELL0_CTRL_ENTRY_17,
+ aid_id, doorbell_range);
+ WREG32_SOC15_EXT(NBIO, aid_id, regS2A_DOORBELL_ENTRY_4_CTRL,
+ aid_id, doorbell_ctrl);
+ }
+}
+
+static void nbio_v7_9_enable_doorbell_aperture(struct amdgpu_device *adev,
+ bool enable)
+{
+ /* Enable to allow doorbell pass thru on pre-silicon bare-metal */
+ WREG32_SOC15(NBIO, 0, regBIFC_DOORBELL_ACCESS_EN_PF, 0xfffff);
+ WREG32_FIELD15_PREREG(NBIO, 0, RCC_DEV0_EPF0_RCC_DOORBELL_APER_EN,
+ BIF_DOORBELL_APER_EN, enable ? 1 : 0);
+}
+
+static void nbio_v7_9_enable_doorbell_selfring_aperture(struct amdgpu_device *adev,
+ bool enable)
+{
+ u32 tmp = 0;
+
+ if (enable) {
+ tmp = REG_SET_FIELD(tmp, BIF_BX_PF0_DOORBELL_SELFRING_GPA_APER_CNTL,
+ DOORBELL_SELFRING_GPA_APER_EN, 1) |
+ REG_SET_FIELD(tmp, BIF_BX_PF0_DOORBELL_SELFRING_GPA_APER_CNTL,
+ DOORBELL_SELFRING_GPA_APER_MODE, 1) |
+ REG_SET_FIELD(tmp, BIF_BX_PF0_DOORBELL_SELFRING_GPA_APER_CNTL,
+ DOORBELL_SELFRING_GPA_APER_SIZE, 0);
+
+ WREG32_SOC15(NBIO, 0, regBIF_BX_PF0_DOORBELL_SELFRING_GPA_APER_BASE_LOW,
+ lower_32_bits(adev->doorbell.base));
+ WREG32_SOC15(NBIO, 0, regBIF_BX_PF0_DOORBELL_SELFRING_GPA_APER_BASE_HIGH,
+ upper_32_bits(adev->doorbell.base));
+ }
+
+ WREG32_SOC15(NBIO, 0, regBIF_BX_PF0_DOORBELL_SELFRING_GPA_APER_CNTL, tmp);
+}
+
+static void nbio_v7_9_ih_doorbell_range(struct amdgpu_device *adev,
+ bool use_doorbell, int doorbell_index)
+{
+ u32 ih_doorbell_range = 0, ih_doorbell_ctrl = 0;
+
+ if (use_doorbell) {
+ ih_doorbell_range = REG_SET_FIELD(ih_doorbell_range,
+ DOORBELL0_CTRL_ENTRY_0,
+ BIF_DOORBELL0_RANGE_OFFSET_ENTRY,
+ doorbell_index);
+ ih_doorbell_range = REG_SET_FIELD(ih_doorbell_range,
+ DOORBELL0_CTRL_ENTRY_0,
+ BIF_DOORBELL0_RANGE_SIZE_ENTRY,
+ 0x8);
+
+ ih_doorbell_ctrl = REG_SET_FIELD(ih_doorbell_ctrl,
+ S2A_DOORBELL_ENTRY_1_CTRL,
+ S2A_DOORBELL_PORT1_ENABLE, 1);
+ ih_doorbell_ctrl = REG_SET_FIELD(ih_doorbell_ctrl,
+ S2A_DOORBELL_ENTRY_1_CTRL,
+ S2A_DOORBELL_PORT1_AWID, 0);
+ ih_doorbell_ctrl = REG_SET_FIELD(ih_doorbell_ctrl,
+ S2A_DOORBELL_ENTRY_1_CTRL,
+ S2A_DOORBELL_PORT1_RANGE_OFFSET, 0);
+ ih_doorbell_ctrl = REG_SET_FIELD(ih_doorbell_ctrl,
+ S2A_DOORBELL_ENTRY_1_CTRL,
+ S2A_DOORBELL_PORT1_RANGE_SIZE, 0x8);
+ ih_doorbell_ctrl = REG_SET_FIELD(ih_doorbell_ctrl,
+ S2A_DOORBELL_ENTRY_1_CTRL,
+ S2A_DOORBELL_PORT1_AWADDR_31_28_VALUE, 0);
+ } else {
+ ih_doorbell_range = REG_SET_FIELD(ih_doorbell_range,
+ DOORBELL0_CTRL_ENTRY_0,
+ BIF_DOORBELL0_RANGE_SIZE_ENTRY, 0);
+ ih_doorbell_ctrl = REG_SET_FIELD(ih_doorbell_ctrl,
+ S2A_DOORBELL_ENTRY_1_CTRL,
+ S2A_DOORBELL_PORT1_RANGE_SIZE, 0);
+ }
+
+ WREG32_SOC15(NBIO, 0, regDOORBELL0_CTRL_ENTRY_0, ih_doorbell_range);
+ WREG32_SOC15(NBIO, 0, regS2A_DOORBELL_ENTRY_3_CTRL, ih_doorbell_ctrl);
+}
+
+
+static void nbio_v7_9_update_medium_grain_clock_gating(struct amdgpu_device *adev,
+ bool enable)
+{
+}
+
+static void nbio_v7_9_update_medium_grain_light_sleep(struct amdgpu_device *adev,
+ bool enable)
+{
+}
+
+static void nbio_v7_9_get_clockgating_state(struct amdgpu_device *adev,
+ u64 *flags)
+{
+}
+
+static void nbio_v7_9_ih_control(struct amdgpu_device *adev)
+{
+ u32 interrupt_cntl;
+
+ /* setup interrupt control */
+ WREG32_SOC15(NBIO, 0, regBIF_BX0_INTERRUPT_CNTL2, adev->dummy_page_addr >> 8);
+ interrupt_cntl = RREG32_SOC15(NBIO, 0, regBIF_BX0_INTERRUPT_CNTL);
+ /* INTERRUPT_CNTL__IH_DUMMY_RD_OVERRIDE_MASK=0 - dummy read disabled with msi, enabled without msi
+ * INTERRUPT_CNTL__IH_DUMMY_RD_OVERRIDE_MASK=1 - dummy read controlled by IH_DUMMY_RD_EN
+ */
+ interrupt_cntl =
+ REG_SET_FIELD(interrupt_cntl, BIF_BX0_INTERRUPT_CNTL, IH_DUMMY_RD_OVERRIDE, 0);
+ /* INTERRUPT_CNTL__IH_REQ_NONSNOOP_EN_MASK=1 if ring is in non-cacheable memory, e.g., vram */
+ interrupt_cntl =
+ REG_SET_FIELD(interrupt_cntl, BIF_BX0_INTERRUPT_CNTL, IH_REQ_NONSNOOP_EN, 0);
+ WREG32_SOC15(NBIO, 0, regBIF_BX0_INTERRUPT_CNTL, interrupt_cntl);
+}
+
+static u32 nbio_v7_9_get_hdp_flush_req_offset(struct amdgpu_device *adev)
+{
+ return SOC15_REG_OFFSET(NBIO, 0, regBIF_BX_PF0_GPU_HDP_FLUSH_REQ);
+}
+
+static u32 nbio_v7_9_get_hdp_flush_done_offset(struct amdgpu_device *adev)
+{
+ return SOC15_REG_OFFSET(NBIO, 0, regBIF_BX_PF0_GPU_HDP_FLUSH_DONE);
+}
+
+static u32 nbio_v7_9_get_pcie_index_offset(struct amdgpu_device *adev)
+{
+ return SOC15_REG_OFFSET(NBIO, 0, regBIF_BX0_PCIE_INDEX2);
+}
+
+static u32 nbio_v7_9_get_pcie_data_offset(struct amdgpu_device *adev)
+{
+ return SOC15_REG_OFFSET(NBIO, 0, regBIF_BX0_PCIE_DATA2);
+}
+
+static u32 nbio_v7_9_get_pcie_index_hi_offset(struct amdgpu_device *adev)
+{
+ return SOC15_REG_OFFSET(NBIO, 0, regBIF_BX0_PCIE_INDEX2_HI);
+}
+
+const struct nbio_hdp_flush_reg nbio_v7_9_hdp_flush_reg = {
+ .ref_and_mask_cp0 = BIF_BX_PF0_GPU_HDP_FLUSH_DONE__CP0_MASK,
+ .ref_and_mask_cp1 = BIF_BX_PF0_GPU_HDP_FLUSH_DONE__CP1_MASK,
+ .ref_and_mask_cp2 = BIF_BX_PF0_GPU_HDP_FLUSH_DONE__CP2_MASK,
+ .ref_and_mask_cp3 = BIF_BX_PF0_GPU_HDP_FLUSH_DONE__CP3_MASK,
+ .ref_and_mask_cp4 = BIF_BX_PF0_GPU_HDP_FLUSH_DONE__CP4_MASK,
+ .ref_and_mask_cp5 = BIF_BX_PF0_GPU_HDP_FLUSH_DONE__CP5_MASK,
+ .ref_and_mask_cp6 = BIF_BX_PF0_GPU_HDP_FLUSH_DONE__CP6_MASK,
+ .ref_and_mask_cp7 = BIF_BX_PF0_GPU_HDP_FLUSH_DONE__CP7_MASK,
+ .ref_and_mask_cp8 = BIF_BX_PF0_GPU_HDP_FLUSH_DONE__CP8_MASK,
+ .ref_and_mask_cp9 = BIF_BX_PF0_GPU_HDP_FLUSH_DONE__CP9_MASK,
+ .ref_and_mask_sdma0 = BIF_BX_PF0_GPU_HDP_FLUSH_DONE__SDMA0_MASK,
+ .ref_and_mask_sdma1 = BIF_BX_PF0_GPU_HDP_FLUSH_DONE__SDMA1_MASK,
+ .ref_and_mask_sdma2 = BIF_BX_PF0_GPU_HDP_FLUSH_DONE__RSVD_ENG0_MASK,
+ .ref_and_mask_sdma3 = BIF_BX_PF0_GPU_HDP_FLUSH_DONE__RSVD_ENG1_MASK,
+ .ref_and_mask_sdma4 = BIF_BX_PF0_GPU_HDP_FLUSH_DONE__RSVD_ENG2_MASK,
+ .ref_and_mask_sdma5 = BIF_BX_PF0_GPU_HDP_FLUSH_DONE__RSVD_ENG3_MASK,
+ .ref_and_mask_sdma6 = BIF_BX_PF0_GPU_HDP_FLUSH_DONE__RSVD_ENG4_MASK,
+ .ref_and_mask_sdma7 = BIF_BX_PF0_GPU_HDP_FLUSH_DONE__RSVD_ENG5_MASK,
+};
+
+static void nbio_v7_9_enable_doorbell_interrupt(struct amdgpu_device *adev,
+ bool enable)
+{
+ WREG32_FIELD15_PREREG(NBIO, 0, BIF_BX0_BIF_DOORBELL_INT_CNTL,
+ DOORBELL_INTERRUPT_DISABLE, enable ? 0 : 1);
+}
+
+static int nbio_v7_9_get_compute_partition_mode(struct amdgpu_device *adev)
+{
+ u32 tmp, px;
+
+ tmp = RREG32_SOC15(NBIO, 0, regBIF_BX_PF0_PARTITION_COMPUTE_STATUS);
+ px = REG_GET_FIELD(tmp, BIF_BX_PF0_PARTITION_COMPUTE_STATUS,
+ PARTITION_MODE);
+
+ return px;
+}
+
+static bool nbio_v7_9_is_nps_switch_requested(struct amdgpu_device *adev)
+{
+ u32 tmp;
+
+ tmp = RREG32_SOC15(NBIO, 0, regBIF_BX_PF0_PARTITION_MEM_STATUS);
+ tmp = REG_GET_FIELD(tmp, BIF_BX_PF0_PARTITION_MEM_STATUS,
+ CHANGE_STATUE);
+
+ /* 0x8 - NPS switch requested */
+ return (tmp == 0x8);
+}
+static u32 nbio_v7_9_get_memory_partition_mode(struct amdgpu_device *adev,
+ u32 *supp_modes)
+{
+ u32 tmp;
+
+ tmp = RREG32_SOC15(NBIO, 0, regBIF_BX_PF0_PARTITION_MEM_STATUS);
+ tmp = REG_GET_FIELD(tmp, BIF_BX_PF0_PARTITION_MEM_STATUS, NPS_MODE);
+
+ if (supp_modes) {
+ *supp_modes =
+ RREG32_SOC15(NBIO, 0, regBIF_BX_PF0_PARTITION_MEM_CAP);
+ }
+
+ return ffs(tmp);
+}
+
+static void nbio_v7_9_init_registers(struct amdgpu_device *adev)
+{
+ u32 inst_mask;
+ int i;
+
+ WREG32_SOC15(NBIO, 0, regXCC_DOORBELL_FENCE,
+ 0xff & ~(adev->gfx.xcc_mask));
+
+ WREG32_SOC15(NBIO, 0, regBIFC_GFX_INT_MONITOR_MASK, 0x7ff);
+
+ inst_mask = adev->aid_mask & ~1U;
+ for_each_inst(i, inst_mask) {
+ WREG32_SOC15_EXT(NBIO, i, regXCC_DOORBELL_FENCE, i,
+ XCC_DOORBELL_FENCE__SHUB_SLV_MODE_MASK);
+
+ }
+
+ if (!amdgpu_sriov_vf(adev)) {
+ u32 baco_cntl;
+ for_each_inst(i, adev->aid_mask) {
+ baco_cntl = RREG32_SOC15(NBIO, i, regBIF_BX0_BACO_CNTL);
+ if (baco_cntl & (BIF_BX0_BACO_CNTL__BACO_DUMMY_EN_MASK |
+ BIF_BX0_BACO_CNTL__BACO_EN_MASK)) {
+ baco_cntl &= ~(
+ BIF_BX0_BACO_CNTL__BACO_DUMMY_EN_MASK |
+ BIF_BX0_BACO_CNTL__BACO_EN_MASK);
+ dev_dbg(adev->dev,
+ "Unsetting baco dummy mode %x",
+ baco_cntl);
+ WREG32_SOC15(NBIO, i, regBIF_BX0_BACO_CNTL,
+ baco_cntl);
+ }
+ }
+ }
+}
+
+#define MMIO_REG_HOLE_OFFSET 0x1A000
+
+static void nbio_v7_9_set_reg_remap(struct amdgpu_device *adev)
+{
+ if (!amdgpu_sriov_vf(adev) && (PAGE_SIZE <= 4096)) {
+ adev->rmmio_remap.reg_offset = MMIO_REG_HOLE_OFFSET;
+ adev->rmmio_remap.bus_addr = adev->rmmio_base + MMIO_REG_HOLE_OFFSET;
+ } else {
+ adev->rmmio_remap.reg_offset =
+ SOC15_REG_OFFSET(
+ NBIO, 0,
+ regBIF_BX_DEV0_EPF0_VF0_HDP_MEM_COHERENCY_FLUSH_CNTL)
+ << 2;
+ adev->rmmio_remap.bus_addr = 0;
+ }
+}
+
+const struct amdgpu_nbio_funcs nbio_v7_9_funcs = {
+ .get_hdp_flush_req_offset = nbio_v7_9_get_hdp_flush_req_offset,
+ .get_hdp_flush_done_offset = nbio_v7_9_get_hdp_flush_done_offset,
+ .get_pcie_index_offset = nbio_v7_9_get_pcie_index_offset,
+ .get_pcie_data_offset = nbio_v7_9_get_pcie_data_offset,
+ .get_pcie_index_hi_offset = nbio_v7_9_get_pcie_index_hi_offset,
+ .get_rev_id = nbio_v7_9_get_rev_id,
+ .mc_access_enable = nbio_v7_9_mc_access_enable,
+ .get_memsize = nbio_v7_9_get_memsize,
+ .sdma_doorbell_range = nbio_v7_9_sdma_doorbell_range,
+ .vcn_doorbell_range = nbio_v7_9_vcn_doorbell_range,
+ .enable_doorbell_aperture = nbio_v7_9_enable_doorbell_aperture,
+ .enable_doorbell_selfring_aperture = nbio_v7_9_enable_doorbell_selfring_aperture,
+ .ih_doorbell_range = nbio_v7_9_ih_doorbell_range,
+ .enable_doorbell_interrupt = nbio_v7_9_enable_doorbell_interrupt,
+ .update_medium_grain_clock_gating = nbio_v7_9_update_medium_grain_clock_gating,
+ .update_medium_grain_light_sleep = nbio_v7_9_update_medium_grain_light_sleep,
+ .get_clockgating_state = nbio_v7_9_get_clockgating_state,
+ .ih_control = nbio_v7_9_ih_control,
+ .remap_hdp_registers = nbio_v7_9_remap_hdp_registers,
+ .get_compute_partition_mode = nbio_v7_9_get_compute_partition_mode,
+ .get_memory_partition_mode = nbio_v7_9_get_memory_partition_mode,
+ .is_nps_switch_requested = nbio_v7_9_is_nps_switch_requested,
+ .init_registers = nbio_v7_9_init_registers,
+ .set_reg_remap = nbio_v7_9_set_reg_remap,
+};
+
+static void nbio_v7_9_query_ras_error_count(struct amdgpu_device *adev,
+ void *ras_error_status)
+{
+}
+
+static void nbio_v7_9_handle_ras_controller_intr_no_bifring(struct amdgpu_device *adev)
+{
+ uint32_t bif_doorbell_intr_cntl;
+ struct ras_manager *obj = amdgpu_ras_find_obj(adev, adev->nbio.ras_if);
+ struct ras_err_data err_data;
+ struct amdgpu_ras *ras = amdgpu_ras_get_context(adev);
+
+ if (amdgpu_ras_error_data_init(&err_data))
+ return;
+
+ bif_doorbell_intr_cntl = RREG32_SOC15(NBIO, 0, regBIF_BX0_BIF_DOORBELL_INT_CNTL);
+
+ if (REG_GET_FIELD(bif_doorbell_intr_cntl,
+ BIF_BX0_BIF_DOORBELL_INT_CNTL, RAS_CNTLR_INTERRUPT_STATUS)) {
+ /* driver has to clear the interrupt status when bif ring is disabled */
+ bif_doorbell_intr_cntl = REG_SET_FIELD(bif_doorbell_intr_cntl,
+ BIF_BX0_BIF_DOORBELL_INT_CNTL,
+ RAS_CNTLR_INTERRUPT_CLEAR, 1);
+ WREG32_SOC15(NBIO, 0, regBIF_BX0_BIF_DOORBELL_INT_CNTL, bif_doorbell_intr_cntl);
+
+ if (!ras->disable_ras_err_cnt_harvest) {
+ /*
+ * clear error status after ras_controller_intr
+ * according to hw team and count ue number
+ * for query
+ */
+ nbio_v7_9_query_ras_error_count(adev, &err_data);
+
+ /* logging on error cnt and printing for awareness */
+ obj->err_data.ue_count += err_data.ue_count;
+ obj->err_data.ce_count += err_data.ce_count;
+
+ if (err_data.ce_count)
+ dev_info(adev->dev, "%ld correctable hardware "
+ "errors detected in %s block\n",
+ obj->err_data.ce_count,
+ get_ras_block_str(adev->nbio.ras_if));
+
+ if (err_data.ue_count)
+ dev_info(adev->dev, "%ld uncorrectable hardware "
+ "errors detected in %s block\n",
+ obj->err_data.ue_count,
+ get_ras_block_str(adev->nbio.ras_if));
+ }
+
+ dev_info(adev->dev, "RAS controller interrupt triggered "
+ "by NBIF error\n");
+ }
+
+ amdgpu_ras_error_data_fini(&err_data);
+}
+
+static void nbio_v7_9_handle_ras_err_event_athub_intr_no_bifring(struct amdgpu_device *adev)
+{
+ uint32_t bif_doorbell_intr_cntl;
+
+ bif_doorbell_intr_cntl = RREG32_SOC15(NBIO, 0, regBIF_BX0_BIF_DOORBELL_INT_CNTL);
+
+ if (REG_GET_FIELD(bif_doorbell_intr_cntl,
+ BIF_BX0_BIF_DOORBELL_INT_CNTL, RAS_ATHUB_ERR_EVENT_INTERRUPT_STATUS)) {
+ /* driver has to clear the interrupt status when bif ring is disabled */
+ bif_doorbell_intr_cntl = REG_SET_FIELD(bif_doorbell_intr_cntl,
+ BIF_BX0_BIF_DOORBELL_INT_CNTL,
+ RAS_ATHUB_ERR_EVENT_INTERRUPT_CLEAR, 1);
+
+ WREG32_SOC15(NBIO, 0, regBIF_BX0_BIF_DOORBELL_INT_CNTL, bif_doorbell_intr_cntl);
+
+ amdgpu_ras_global_ras_isr(adev);
+ }
+}
+
+static int nbio_v7_9_set_ras_controller_irq_state(struct amdgpu_device *adev,
+ struct amdgpu_irq_src *src,
+ unsigned type,
+ enum amdgpu_interrupt_state state)
+{
+ /* Dummy function, there is no initialization operation in driver */
+
+ return 0;
+}
+
+static int nbio_v7_9_process_ras_controller_irq(struct amdgpu_device *adev,
+ struct amdgpu_irq_src *source,
+ struct amdgpu_iv_entry *entry)
+{
+ /* By design, the ih cookie for ras_controller_irq should be written
+ * to BIFring instead of general iv ring. However, due to known bif ring
+ * hw bug, it has to be disabled. There is no chance the process function
+ * will be involked. Just left it as a dummy one.
+ */
+ return 0;
+}
+
+static int nbio_v7_9_set_ras_err_event_athub_irq_state(struct amdgpu_device *adev,
+ struct amdgpu_irq_src *src,
+ unsigned type,
+ enum amdgpu_interrupt_state state)
+{
+ /* Dummy function, there is no initialization operation in driver */
+
+ return 0;
+}
+
+static int nbio_v7_9_process_err_event_athub_irq(struct amdgpu_device *adev,
+ struct amdgpu_irq_src *source,
+ struct amdgpu_iv_entry *entry)
+{
+ /* By design, the ih cookie for err_event_athub_irq should be written
+ * to BIFring instead of general iv ring. However, due to known bif ring
+ * hw bug, it has to be disabled. There is no chance the process function
+ * will be involked. Just left it as a dummy one.
+ */
+ return 0;
+}
+
+static const struct amdgpu_irq_src_funcs nbio_v7_9_ras_controller_irq_funcs = {
+ .set = nbio_v7_9_set_ras_controller_irq_state,
+ .process = nbio_v7_9_process_ras_controller_irq,
+};
+
+static const struct amdgpu_irq_src_funcs nbio_v7_9_ras_err_event_athub_irq_funcs = {
+ .set = nbio_v7_9_set_ras_err_event_athub_irq_state,
+ .process = nbio_v7_9_process_err_event_athub_irq,
+};
+
+static int nbio_v7_9_init_ras_controller_interrupt (struct amdgpu_device *adev)
+{
+ int r;
+
+ /* init the irq funcs */
+ adev->nbio.ras_controller_irq.funcs =
+ &nbio_v7_9_ras_controller_irq_funcs;
+ adev->nbio.ras_controller_irq.num_types = 1;
+
+ /* register ras controller interrupt */
+ r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_BIF,
+ NBIF_7_4__SRCID__RAS_CONTROLLER_INTERRUPT,
+ &adev->nbio.ras_controller_irq);
+
+ return r;
+}
+
+static int nbio_v7_9_init_ras_err_event_athub_interrupt (struct amdgpu_device *adev)
+{
+
+ int r;
+
+ /* init the irq funcs */
+ adev->nbio.ras_err_event_athub_irq.funcs =
+ &nbio_v7_9_ras_err_event_athub_irq_funcs;
+ adev->nbio.ras_err_event_athub_irq.num_types = 1;
+
+ /* register ras err event athub interrupt */
+ r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_BIF,
+ NBIF_7_4__SRCID__ERREVENT_ATHUB_INTERRUPT,
+ &adev->nbio.ras_err_event_athub_irq);
+
+ return r;
+}
+
+const struct amdgpu_ras_block_hw_ops nbio_v7_9_ras_hw_ops = {
+ .query_ras_error_count = nbio_v7_9_query_ras_error_count,
+};
+
+struct amdgpu_nbio_ras nbio_v7_9_ras = {
+ .ras_block = {
+ .ras_comm = {
+ .name = "pcie_bif",
+ .block = AMDGPU_RAS_BLOCK__PCIE_BIF,
+ .type = AMDGPU_RAS_ERROR__MULTI_UNCORRECTABLE,
+ },
+ .hw_ops = &nbio_v7_9_ras_hw_ops,
+ .ras_late_init = amdgpu_nbio_ras_late_init,
+ },
+ .handle_ras_controller_intr_no_bifring = nbio_v7_9_handle_ras_controller_intr_no_bifring,
+ .handle_ras_err_event_athub_intr_no_bifring = nbio_v7_9_handle_ras_err_event_athub_intr_no_bifring,
+ .init_ras_controller_interrupt = nbio_v7_9_init_ras_controller_interrupt,
+ .init_ras_err_event_athub_interrupt = nbio_v7_9_init_ras_err_event_athub_interrupt,
+};
diff --git a/drivers/gpu/drm/amd/amdgpu/nbio_v7_9.h b/drivers/gpu/drm/amd/amdgpu/nbio_v7_9.h
new file mode 100644
index 000000000000..73709771950d
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/nbio_v7_9.h
@@ -0,0 +1,33 @@
+/*
+ * Copyright 2022 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#ifndef __NBIO_V7_9_H__
+#define __NBIO_V7_9_H__
+
+#include "soc15_common.h"
+
+extern const struct nbio_hdp_flush_reg nbio_v7_9_hdp_flush_reg;
+extern const struct amdgpu_nbio_funcs nbio_v7_9_funcs;
+extern struct amdgpu_nbio_ras nbio_v7_9_ras;
+
+#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/nv.c b/drivers/gpu/drm/amd/amdgpu/nv.c
index c625c5d8ed89..50e77d9b30af 100644
--- a/drivers/gpu/drm/amd/amdgpu/nv.c
+++ b/drivers/gpu/drm/amd/amdgpu/nv.c
@@ -25,6 +25,8 @@
#include <linux/module.h>
#include <linux/pci.h>
+#include <drm/amdgpu_drm.h>
+
#include "amdgpu.h"
#include "amdgpu_atombios.h"
#include "amdgpu_ih.h"
@@ -32,7 +34,6 @@
#include "amdgpu_vce.h"
#include "amdgpu_ucode.h"
#include "amdgpu_psp.h"
-#include "amdgpu_smu.h"
#include "atom.h"
#include "amd_pcie.h"
@@ -57,83 +58,221 @@
#include "jpeg_v2_0.h"
#include "vcn_v3_0.h"
#include "jpeg_v3_0.h"
-#include "dce_virtual.h"
-#include "mes_v10_1.h"
+#include "amdgpu_vkms.h"
#include "mxgpu_nv.h"
#include "smuio_v11_0.h"
#include "smuio_v11_0_6.h"
static const struct amd_ip_funcs nv_common_ip_funcs;
-/*
- * Indirect registers accessor
- */
-static u32 nv_pcie_rreg(struct amdgpu_device *adev, u32 reg)
-{
- unsigned long address, data;
- address = adev->nbio.funcs->get_pcie_index_offset(adev);
- data = adev->nbio.funcs->get_pcie_data_offset(adev);
+/* Navi */
+static const struct amdgpu_video_codec_info nv_video_codecs_encode_array[] = {
+ {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4_AVC, 4096, 4096, 0)},
+ {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_HEVC, 4096, 4096, 0)},
+};
- return amdgpu_device_indirect_rreg(adev, address, data, reg);
-}
+static const struct amdgpu_video_codecs nv_video_codecs_encode = {
+ .codec_count = ARRAY_SIZE(nv_video_codecs_encode_array),
+ .codec_array = nv_video_codecs_encode_array,
+};
-static void nv_pcie_wreg(struct amdgpu_device *adev, u32 reg, u32 v)
-{
- unsigned long address, data;
+/* Navi1x */
+static const struct amdgpu_video_codec_info nv_video_codecs_decode_array[] = {
+ {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG2, 1920, 1088, 3)},
+ {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4, 1920, 1088, 5)},
+ {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4_AVC, 4096, 4096, 52)},
+ {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_VC1, 1920, 1088, 4)},
+ {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_HEVC, 8192, 4352, 186)},
+ {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_JPEG, 8192, 8192, 0)},
+ {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_VP9, 8192, 4352, 0)},
+};
- address = adev->nbio.funcs->get_pcie_index_offset(adev);
- data = adev->nbio.funcs->get_pcie_data_offset(adev);
+static const struct amdgpu_video_codecs nv_video_codecs_decode = {
+ .codec_count = ARRAY_SIZE(nv_video_codecs_decode_array),
+ .codec_array = nv_video_codecs_decode_array,
+};
- amdgpu_device_indirect_wreg(adev, address, data, reg, v);
-}
+/* Sienna Cichlid */
+static const struct amdgpu_video_codec_info sc_video_codecs_encode_array[] = {
+ {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4_AVC, 4096, 4096, 0)},
+ {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_HEVC, 8192, 4352, 0)},
+};
-static u64 nv_pcie_rreg64(struct amdgpu_device *adev, u32 reg)
-{
- unsigned long address, data;
- address = adev->nbio.funcs->get_pcie_index_offset(adev);
- data = adev->nbio.funcs->get_pcie_data_offset(adev);
+static const struct amdgpu_video_codecs sc_video_codecs_encode = {
+ .codec_count = ARRAY_SIZE(sc_video_codecs_encode_array),
+ .codec_array = sc_video_codecs_encode_array,
+};
- return amdgpu_device_indirect_rreg64(adev, address, data, reg);
-}
+static const struct amdgpu_video_codec_info sc_video_codecs_decode_array_vcn0[] = {
+ {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG2, 1920, 1088, 3)},
+ {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4, 1920, 1088, 5)},
+ {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4_AVC, 4096, 4096, 52)},
+ {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_VC1, 1920, 1088, 4)},
+ {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_HEVC, 8192, 4352, 186)},
+ {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_JPEG, 16384, 16384, 0)},
+ {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_VP9, 8192, 4352, 0)},
+ {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_AV1, 8192, 4352, 0)},
+};
-static u32 nv_pcie_port_rreg(struct amdgpu_device *adev, u32 reg)
-{
- unsigned long flags, address, data;
- u32 r;
- address = adev->nbio.funcs->get_pcie_port_index_offset(adev);
- data = adev->nbio.funcs->get_pcie_port_data_offset(adev);
+static const struct amdgpu_video_codec_info sc_video_codecs_decode_array_vcn1[] = {
+ {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG2, 1920, 1088, 3)},
+ {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4, 1920, 1088, 5)},
+ {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4_AVC, 4096, 4096, 52)},
+ {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_VC1, 1920, 1088, 4)},
+ {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_HEVC, 8192, 4352, 186)},
+ {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_JPEG, 16384, 16384, 0)},
+ {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_VP9, 8192, 4352, 0)},
+};
- spin_lock_irqsave(&adev->pcie_idx_lock, flags);
- WREG32(address, reg * 4);
- (void)RREG32(address);
- r = RREG32(data);
- spin_unlock_irqrestore(&adev->pcie_idx_lock, flags);
- return r;
-}
+static const struct amdgpu_video_codecs sc_video_codecs_decode_vcn0 = {
+ .codec_count = ARRAY_SIZE(sc_video_codecs_decode_array_vcn0),
+ .codec_array = sc_video_codecs_decode_array_vcn0,
+};
-static void nv_pcie_wreg64(struct amdgpu_device *adev, u32 reg, u64 v)
-{
- unsigned long address, data;
+static const struct amdgpu_video_codecs sc_video_codecs_decode_vcn1 = {
+ .codec_count = ARRAY_SIZE(sc_video_codecs_decode_array_vcn1),
+ .codec_array = sc_video_codecs_decode_array_vcn1,
+};
- address = adev->nbio.funcs->get_pcie_index_offset(adev);
- data = adev->nbio.funcs->get_pcie_data_offset(adev);
+/* SRIOV Sienna Cichlid, not const since data is controlled by host */
+static struct amdgpu_video_codec_info sriov_sc_video_codecs_encode_array[] = {
+ {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4_AVC, 4096, 4096, 0)},
+ {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_HEVC, 8192, 4352, 0)},
+};
- amdgpu_device_indirect_wreg64(adev, address, data, reg, v);
-}
+static struct amdgpu_video_codec_info sriov_sc_video_codecs_decode_array_vcn0[] = {
+ {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG2, 1920, 1088, 3)},
+ {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4, 1920, 1088, 5)},
+ {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4_AVC, 4096, 4096, 52)},
+ {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_VC1, 1920, 1088, 4)},
+ {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_HEVC, 8192, 4352, 186)},
+ {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_JPEG, 16384, 16384, 0)},
+ {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_VP9, 8192, 4352, 0)},
+ {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_AV1, 8192, 4352, 0)},
+};
-static void nv_pcie_port_wreg(struct amdgpu_device *adev, u32 reg, u32 v)
-{
- unsigned long flags, address, data;
+static struct amdgpu_video_codec_info sriov_sc_video_codecs_decode_array_vcn1[] = {
+ {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG2, 1920, 1088, 3)},
+ {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4, 1920, 1088, 5)},
+ {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4_AVC, 4096, 4096, 52)},
+ {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_VC1, 1920, 1088, 4)},
+ {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_HEVC, 8192, 4352, 186)},
+ {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_JPEG, 16384, 16384, 0)},
+ {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_VP9, 8192, 4352, 0)},
+};
+
+static struct amdgpu_video_codecs sriov_sc_video_codecs_encode = {
+ .codec_count = ARRAY_SIZE(sriov_sc_video_codecs_encode_array),
+ .codec_array = sriov_sc_video_codecs_encode_array,
+};
+
+static struct amdgpu_video_codecs sriov_sc_video_codecs_decode_vcn0 = {
+ .codec_count = ARRAY_SIZE(sriov_sc_video_codecs_decode_array_vcn0),
+ .codec_array = sriov_sc_video_codecs_decode_array_vcn0,
+};
+
+static struct amdgpu_video_codecs sriov_sc_video_codecs_decode_vcn1 = {
+ .codec_count = ARRAY_SIZE(sriov_sc_video_codecs_decode_array_vcn1),
+ .codec_array = sriov_sc_video_codecs_decode_array_vcn1,
+};
+
+/* Beige Goby*/
+static const struct amdgpu_video_codec_info bg_video_codecs_decode_array[] = {
+ {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4_AVC, 4096, 4096, 52)},
+ {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_HEVC, 8192, 4352, 186)},
+ {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_VP9, 8192, 4352, 0)},
+};
+
+static const struct amdgpu_video_codecs bg_video_codecs_decode = {
+ .codec_count = ARRAY_SIZE(bg_video_codecs_decode_array),
+ .codec_array = bg_video_codecs_decode_array,
+};
+
+static const struct amdgpu_video_codecs bg_video_codecs_encode = {
+ .codec_count = 0,
+ .codec_array = NULL,
+};
- address = adev->nbio.funcs->get_pcie_port_index_offset(adev);
- data = adev->nbio.funcs->get_pcie_port_data_offset(adev);
+/* Yellow Carp*/
+static const struct amdgpu_video_codec_info yc_video_codecs_decode_array[] = {
+ {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4_AVC, 4096, 4096, 52)},
+ {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_HEVC, 8192, 4352, 186)},
+ {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_VP9, 8192, 4352, 0)},
+ {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_JPEG, 16384, 16384, 0)},
+ {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_AV1, 8192, 4352, 0)},
+};
+
+static const struct amdgpu_video_codecs yc_video_codecs_decode = {
+ .codec_count = ARRAY_SIZE(yc_video_codecs_decode_array),
+ .codec_array = yc_video_codecs_decode_array,
+};
+
+static int nv_query_video_codecs(struct amdgpu_device *adev, bool encode,
+ const struct amdgpu_video_codecs **codecs)
+{
+ if (adev->vcn.num_vcn_inst == hweight8(adev->vcn.harvest_config))
+ return -EINVAL;
- spin_lock_irqsave(&adev->pcie_idx_lock, flags);
- WREG32(address, reg * 4);
- (void)RREG32(address);
- WREG32(data, v);
- (void)RREG32(data);
- spin_unlock_irqrestore(&adev->pcie_idx_lock, flags);
+ switch (amdgpu_ip_version(adev, UVD_HWIP, 0)) {
+ case IP_VERSION(3, 0, 0):
+ case IP_VERSION(3, 0, 64):
+ case IP_VERSION(3, 0, 192):
+ if (amdgpu_sriov_vf(adev)) {
+ if (adev->vcn.harvest_config & AMDGPU_VCN_HARVEST_VCN0) {
+ if (encode)
+ *codecs = &sriov_sc_video_codecs_encode;
+ else
+ *codecs = &sriov_sc_video_codecs_decode_vcn1;
+ } else {
+ if (encode)
+ *codecs = &sriov_sc_video_codecs_encode;
+ else
+ *codecs = &sriov_sc_video_codecs_decode_vcn0;
+ }
+ } else {
+ if (adev->vcn.harvest_config & AMDGPU_VCN_HARVEST_VCN0) {
+ if (encode)
+ *codecs = &sc_video_codecs_encode;
+ else
+ *codecs = &sc_video_codecs_decode_vcn1;
+ } else {
+ if (encode)
+ *codecs = &sc_video_codecs_encode;
+ else
+ *codecs = &sc_video_codecs_decode_vcn0;
+ }
+ }
+ return 0;
+ case IP_VERSION(3, 0, 16):
+ case IP_VERSION(3, 0, 2):
+ if (encode)
+ *codecs = &sc_video_codecs_encode;
+ else
+ *codecs = &sc_video_codecs_decode_vcn0;
+ return 0;
+ case IP_VERSION(3, 1, 1):
+ case IP_VERSION(3, 1, 2):
+ if (encode)
+ *codecs = &sc_video_codecs_encode;
+ else
+ *codecs = &yc_video_codecs_decode;
+ return 0;
+ case IP_VERSION(3, 0, 33):
+ if (encode)
+ *codecs = &bg_video_codecs_encode;
+ else
+ *codecs = &bg_video_codecs_decode;
+ return 0;
+ case IP_VERSION(2, 0, 0):
+ case IP_VERSION(2, 0, 2):
+ if (encode)
+ *codecs = &nv_video_codecs_encode;
+ else
+ *codecs = &nv_video_codecs_decode;
+ return 0;
+ default:
+ return -EINVAL;
+ }
}
static u32 nv_didt_rreg(struct amdgpu_device *adev, u32 reg)
@@ -184,12 +323,7 @@ void nv_grbm_select(struct amdgpu_device *adev,
grbm_gfx_cntl = REG_SET_FIELD(grbm_gfx_cntl, GRBM_GFX_CNTL, VMID, vmid);
grbm_gfx_cntl = REG_SET_FIELD(grbm_gfx_cntl, GRBM_GFX_CNTL, QUEUEID, queue);
- WREG32(SOC15_REG_OFFSET(GC, 0, mmGRBM_GFX_CNTL), grbm_gfx_cntl);
-}
-
-static void nv_vga_set_state(struct amdgpu_device *adev, bool state)
-{
- /* todo */
+ WREG32_SOC15(GC, 0, mmGRBM_GFX_CNTL, grbm_gfx_cntl);
}
static bool nv_read_disabled_bios(struct amdgpu_device *adev)
@@ -198,38 +332,6 @@ static bool nv_read_disabled_bios(struct amdgpu_device *adev)
return false;
}
-static bool nv_read_bios_from_rom(struct amdgpu_device *adev,
- u8 *bios, u32 length_bytes)
-{
- u32 *dw_ptr;
- u32 i, length_dw;
- u32 rom_index_offset, rom_data_offset;
-
- if (bios == NULL)
- return false;
- if (length_bytes == 0)
- return false;
- /* APU vbios image is part of sbios image */
- if (adev->flags & AMD_IS_APU)
- return false;
-
- dw_ptr = (u32 *)bios;
- length_dw = ALIGN(length_bytes, 4) / 4;
-
- rom_index_offset =
- adev->smuio.funcs->get_rom_index_offset(adev);
- rom_data_offset =
- adev->smuio.funcs->get_rom_data_offset(adev);
-
- /* set rom index to 0 */
- WREG32(rom_index_offset, 0);
- /* read out the rom data */
- for (i = 0; i < length_dw; i++)
- dw_ptr[i] = RREG32(rom_data_offset);
-
- return true;
-}
-
static struct soc15_allowed_register_entry nv_allowed_read_registers[] = {
{ SOC15_REG_ENTRY(GC, 0, mmGRBM_STATUS)},
{ SOC15_REG_ENTRY(GC, 0, mmGRBM_STATUS2)},
@@ -259,12 +361,12 @@ static uint32_t nv_read_indexed_register(struct amdgpu_device *adev, u32 se_num,
mutex_lock(&adev->grbm_idx_mutex);
if (se_num != 0xffffffff || sh_num != 0xffffffff)
- amdgpu_gfx_select_se_sh(adev, se_num, sh_num, 0xffffffff);
+ amdgpu_gfx_select_se_sh(adev, se_num, sh_num, 0xffffffff, 0);
val = RREG32(reg_offset);
if (se_num != 0xffffffff || sh_num != 0xffffffff)
- amdgpu_gfx_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
+ amdgpu_gfx_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, 0);
mutex_unlock(&adev->grbm_idx_mutex);
return val;
}
@@ -291,9 +393,10 @@ static int nv_read_register(struct amdgpu_device *adev, u32 se_num,
*value = 0;
for (i = 0; i < ARRAY_SIZE(nv_allowed_read_registers); i++) {
en = &nv_allowed_read_registers[i];
- if ((i == 7 && (adev->sdma.num_instances == 1)) || /* some asics don't have SDMA1 */
- reg_offset !=
- (adev->reg_offset[en->hwip][en->inst][en->seg] + en->reg_offset))
+ if (!adev->reg_offset[en->hwip][en->inst])
+ continue;
+ else if (reg_offset != (adev->reg_offset[en->hwip][en->inst][en->seg]
+ + en->reg_offset))
continue;
*value = nv_get_register_value(adev,
@@ -304,44 +407,6 @@ static int nv_read_register(struct amdgpu_device *adev, u32 se_num,
return -EINVAL;
}
-static int nv_asic_mode1_reset(struct amdgpu_device *adev)
-{
- u32 i;
- int ret = 0;
-
- amdgpu_atombios_scratch_regs_engine_hung(adev, true);
-
- /* disable BM */
- pci_clear_master(adev->pdev);
-
- amdgpu_device_cache_pci_state(adev->pdev);
-
- if (amdgpu_dpm_is_mode1_reset_supported(adev)) {
- dev_info(adev->dev, "GPU smu mode1 reset\n");
- ret = amdgpu_dpm_mode1_reset(adev);
- } else {
- dev_info(adev->dev, "GPU psp mode1 reset\n");
- ret = psp_gpu_reset(adev);
- }
-
- if (ret)
- dev_err(adev->dev, "GPU mode1 reset failed\n");
- amdgpu_device_load_pci_state(adev->pdev);
-
- /* wait for asic to come out of reset */
- for (i = 0; i < adev->usec_timeout; i++) {
- u32 memsize = adev->nbio.funcs->get_memsize(adev);
-
- if (memsize != 0xffffffff)
- break;
- udelay(1);
- }
-
- amdgpu_atombios_scratch_regs_engine_hung(adev, false);
-
- return ret;
-}
-
static int nv_asic_mode2_reset(struct amdgpu_device *adev)
{
u32 i;
@@ -374,21 +439,9 @@ static int nv_asic_mode2_reset(struct amdgpu_device *adev)
return ret;
}
-static bool nv_asic_supports_baco(struct amdgpu_device *adev)
-{
- struct smu_context *smu = &adev->smu;
-
- if (smu_baco_is_support(smu))
- return true;
- else
- return false;
-}
-
static enum amd_reset_method
nv_asic_reset_method(struct amdgpu_device *adev)
{
- struct smu_context *smu = &adev->smu;
-
if (amdgpu_reset_method == AMD_RESET_METHOD_MODE1 ||
amdgpu_reset_method == AMD_RESET_METHOD_MODE2 ||
amdgpu_reset_method == AMD_RESET_METHOD_BACO ||
@@ -399,15 +452,21 @@ nv_asic_reset_method(struct amdgpu_device *adev)
dev_warn(adev->dev, "Specified reset method:%d isn't supported, using AUTO instead.\n",
amdgpu_reset_method);
- switch (adev->asic_type) {
- case CHIP_VANGOGH:
+ switch (amdgpu_ip_version(adev, MP1_HWIP, 0)) {
+ case IP_VERSION(11, 5, 0):
+ case IP_VERSION(11, 5, 2):
+ case IP_VERSION(13, 0, 1):
+ case IP_VERSION(13, 0, 3):
+ case IP_VERSION(13, 0, 5):
+ case IP_VERSION(13, 0, 8):
return AMD_RESET_METHOD_MODE2;
- case CHIP_SIENNA_CICHLID:
- case CHIP_NAVY_FLOUNDER:
- case CHIP_DIMGREY_CAVEFISH:
+ case IP_VERSION(11, 0, 7):
+ case IP_VERSION(11, 0, 11):
+ case IP_VERSION(11, 0, 12):
+ case IP_VERSION(11, 0, 13):
return AMD_RESET_METHOD_MODE1;
default:
- if (smu_baco_is_support(smu))
+ if (amdgpu_dpm_is_baco_supported(adev))
return AMD_RESET_METHOD_BACO;
else
return AMD_RESET_METHOD_MODE1;
@@ -417,11 +476,6 @@ nv_asic_reset_method(struct amdgpu_device *adev)
static int nv_asic_reset(struct amdgpu_device *adev)
{
int ret = 0;
- struct smu_context *smu = &adev->smu;
-
- /* skip reset on vangogh for now */
- if (adev->asic_type == CHIP_VANGOGH)
- return 0;
switch (nv_asic_reset_method(adev)) {
case AMD_RESET_METHOD_PCI:
@@ -430,13 +484,7 @@ static int nv_asic_reset(struct amdgpu_device *adev)
break;
case AMD_RESET_METHOD_BACO:
dev_info(adev->dev, "BACO reset\n");
-
- ret = smu_baco_enter(smu);
- if (ret)
- return ret;
- ret = smu_baco_exit(smu);
- if (ret)
- return ret;
+ ret = amdgpu_dpm_baco_reset(adev);
break;
case AMD_RESET_METHOD_MODE2:
dev_info(adev->dev, "MODE2 reset\n");
@@ -444,7 +492,7 @@ static int nv_asic_reset(struct amdgpu_device *adev)
break;
default:
dev_info(adev->dev, "MODE1 reset\n");
- ret = nv_asic_mode1_reset(adev);
+ ret = amdgpu_device_mode1_reset(adev);
break;
}
@@ -463,42 +511,17 @@ static int nv_set_vce_clocks(struct amdgpu_device *adev, u32 evclk, u32 ecclk)
return 0;
}
-static void nv_pcie_gen3_enable(struct amdgpu_device *adev)
-{
- if (pci_is_root_bus(adev->pdev->bus))
- return;
-
- if (amdgpu_pcie_gen2 == 0)
- return;
-
- if (!(adev->pm.pcie_gen_mask & (CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2 |
- CAIL_PCIE_LINK_SPEED_SUPPORT_GEN3)))
- return;
-
- /* todo */
-}
-
static void nv_program_aspm(struct amdgpu_device *adev)
{
- if (amdgpu_aspm != 1)
+ if (!amdgpu_device_should_use_aspm(adev))
return;
- if ((adev->asic_type >= CHIP_SIENNA_CICHLID) &&
- !(adev->flags & AMD_IS_APU) &&
- (adev->nbio.funcs->program_aspm))
+ if (adev->nbio.funcs->program_aspm)
adev->nbio.funcs->program_aspm(adev);
}
-static void nv_enable_doorbell_aperture(struct amdgpu_device *adev,
- bool enable)
-{
- adev->nbio.funcs->enable_doorbell_aperture(adev, enable);
- adev->nbio.funcs->enable_doorbell_selfring_aperture(adev, enable);
-}
-
-static const struct amdgpu_ip_block_version nv_common_ip_block =
-{
+const struct amdgpu_ip_block_version nv_common_ip_block = {
.type = AMD_IP_BLOCK_TYPE_COMMON,
.major = 1,
.minor = 0,
@@ -506,237 +529,11 @@ static const struct amdgpu_ip_block_version nv_common_ip_block =
.funcs = &nv_common_ip_funcs,
};
-static int nv_reg_base_init(struct amdgpu_device *adev)
-{
- int r;
-
- if (amdgpu_discovery) {
- r = amdgpu_discovery_reg_base_init(adev);
- if (r) {
- DRM_WARN("failed to init reg base from ip discovery table, "
- "fallback to legacy init method\n");
- goto legacy_init;
- }
-
- return 0;
- }
-
-legacy_init:
- switch (adev->asic_type) {
- case CHIP_NAVI10:
- navi10_reg_base_init(adev);
- break;
- case CHIP_NAVI14:
- navi14_reg_base_init(adev);
- break;
- case CHIP_NAVI12:
- navi12_reg_base_init(adev);
- break;
- case CHIP_SIENNA_CICHLID:
- case CHIP_NAVY_FLOUNDER:
- sienna_cichlid_reg_base_init(adev);
- break;
- case CHIP_VANGOGH:
- vangogh_reg_base_init(adev);
- break;
- case CHIP_DIMGREY_CAVEFISH:
- dimgrey_cavefish_reg_base_init(adev);
- break;
- default:
- return -EINVAL;
- }
-
- return 0;
-}
-
void nv_set_virt_ops(struct amdgpu_device *adev)
{
adev->virt.ops = &xgpu_nv_virt_ops;
}
-static bool nv_is_headless_sku(struct pci_dev *pdev)
-{
- if ((pdev->device == 0x731E &&
- (pdev->revision == 0xC6 || pdev->revision == 0xC7)) ||
- (pdev->device == 0x7340 && pdev->revision == 0xC9) ||
- (pdev->device == 0x7360 && pdev->revision == 0xC7))
- return true;
- return false;
-}
-
-int nv_set_ip_blocks(struct amdgpu_device *adev)
-{
- int r;
-
- if (adev->flags & AMD_IS_APU) {
- adev->nbio.funcs = &nbio_v7_2_funcs;
- adev->nbio.hdp_flush_reg = &nbio_v7_2_hdp_flush_reg;
- } else {
- adev->nbio.funcs = &nbio_v2_3_funcs;
- adev->nbio.hdp_flush_reg = &nbio_v2_3_hdp_flush_reg;
- }
- adev->hdp.funcs = &hdp_v5_0_funcs;
-
- if (adev->asic_type >= CHIP_SIENNA_CICHLID)
- adev->smuio.funcs = &smuio_v11_0_6_funcs;
- else
- adev->smuio.funcs = &smuio_v11_0_funcs;
-
- if (adev->asic_type == CHIP_SIENNA_CICHLID)
- adev->gmc.xgmi.supported = true;
-
- /* Set IP register base before any HW register access */
- r = nv_reg_base_init(adev);
- if (r)
- return r;
-
- switch (adev->asic_type) {
- case CHIP_NAVI10:
- case CHIP_NAVI14:
- amdgpu_device_ip_block_add(adev, &nv_common_ip_block);
- amdgpu_device_ip_block_add(adev, &gmc_v10_0_ip_block);
- amdgpu_device_ip_block_add(adev, &navi10_ih_ip_block);
- amdgpu_device_ip_block_add(adev, &psp_v11_0_ip_block);
- if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP &&
- !amdgpu_sriov_vf(adev))
- amdgpu_device_ip_block_add(adev, &smu_v11_0_ip_block);
- if (adev->enable_virtual_display || amdgpu_sriov_vf(adev))
- amdgpu_device_ip_block_add(adev, &dce_virtual_ip_block);
-#if defined(CONFIG_DRM_AMD_DC)
- else if (amdgpu_device_has_dc_support(adev))
- amdgpu_device_ip_block_add(adev, &dm_ip_block);
-#endif
- amdgpu_device_ip_block_add(adev, &gfx_v10_0_ip_block);
- amdgpu_device_ip_block_add(adev, &sdma_v5_0_ip_block);
- if (adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT &&
- !amdgpu_sriov_vf(adev))
- amdgpu_device_ip_block_add(adev, &smu_v11_0_ip_block);
- if (!nv_is_headless_sku(adev->pdev))
- amdgpu_device_ip_block_add(adev, &vcn_v2_0_ip_block);
- amdgpu_device_ip_block_add(adev, &jpeg_v2_0_ip_block);
- if (adev->enable_mes)
- amdgpu_device_ip_block_add(adev, &mes_v10_1_ip_block);
- break;
- case CHIP_NAVI12:
- amdgpu_device_ip_block_add(adev, &nv_common_ip_block);
- amdgpu_device_ip_block_add(adev, &gmc_v10_0_ip_block);
- amdgpu_device_ip_block_add(adev, &navi10_ih_ip_block);
- amdgpu_device_ip_block_add(adev, &psp_v11_0_ip_block);
- if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP)
- amdgpu_device_ip_block_add(adev, &smu_v11_0_ip_block);
- if (adev->enable_virtual_display || amdgpu_sriov_vf(adev))
- amdgpu_device_ip_block_add(adev, &dce_virtual_ip_block);
-#if defined(CONFIG_DRM_AMD_DC)
- else if (amdgpu_device_has_dc_support(adev))
- amdgpu_device_ip_block_add(adev, &dm_ip_block);
-#endif
- amdgpu_device_ip_block_add(adev, &gfx_v10_0_ip_block);
- amdgpu_device_ip_block_add(adev, &sdma_v5_0_ip_block);
- if (adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT &&
- !amdgpu_sriov_vf(adev))
- amdgpu_device_ip_block_add(adev, &smu_v11_0_ip_block);
- if (!nv_is_headless_sku(adev->pdev))
- amdgpu_device_ip_block_add(adev, &vcn_v2_0_ip_block);
- if (!amdgpu_sriov_vf(adev))
- amdgpu_device_ip_block_add(adev, &jpeg_v2_0_ip_block);
- break;
- case CHIP_SIENNA_CICHLID:
- amdgpu_device_ip_block_add(adev, &nv_common_ip_block);
- amdgpu_device_ip_block_add(adev, &gmc_v10_0_ip_block);
- amdgpu_device_ip_block_add(adev, &navi10_ih_ip_block);
- if (likely(adev->firmware.load_type == AMDGPU_FW_LOAD_PSP))
- amdgpu_device_ip_block_add(adev, &psp_v11_0_ip_block);
- if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP &&
- is_support_sw_smu(adev))
- amdgpu_device_ip_block_add(adev, &smu_v11_0_ip_block);
- if (adev->enable_virtual_display || amdgpu_sriov_vf(adev))
- amdgpu_device_ip_block_add(adev, &dce_virtual_ip_block);
-#if defined(CONFIG_DRM_AMD_DC)
- else if (amdgpu_device_has_dc_support(adev))
- amdgpu_device_ip_block_add(adev, &dm_ip_block);
-#endif
- amdgpu_device_ip_block_add(adev, &gfx_v10_0_ip_block);
- amdgpu_device_ip_block_add(adev, &sdma_v5_2_ip_block);
- amdgpu_device_ip_block_add(adev, &vcn_v3_0_ip_block);
- if (!amdgpu_sriov_vf(adev))
- amdgpu_device_ip_block_add(adev, &jpeg_v3_0_ip_block);
-
- if (adev->enable_mes)
- amdgpu_device_ip_block_add(adev, &mes_v10_1_ip_block);
- break;
- case CHIP_NAVY_FLOUNDER:
- amdgpu_device_ip_block_add(adev, &nv_common_ip_block);
- amdgpu_device_ip_block_add(adev, &gmc_v10_0_ip_block);
- amdgpu_device_ip_block_add(adev, &navi10_ih_ip_block);
- if (likely(adev->firmware.load_type == AMDGPU_FW_LOAD_PSP))
- amdgpu_device_ip_block_add(adev, &psp_v11_0_ip_block);
- if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP &&
- is_support_sw_smu(adev))
- amdgpu_device_ip_block_add(adev, &smu_v11_0_ip_block);
- if (adev->enable_virtual_display || amdgpu_sriov_vf(adev))
- amdgpu_device_ip_block_add(adev, &dce_virtual_ip_block);
-#if defined(CONFIG_DRM_AMD_DC)
- else if (amdgpu_device_has_dc_support(adev))
- amdgpu_device_ip_block_add(adev, &dm_ip_block);
-#endif
- amdgpu_device_ip_block_add(adev, &gfx_v10_0_ip_block);
- amdgpu_device_ip_block_add(adev, &sdma_v5_2_ip_block);
- amdgpu_device_ip_block_add(adev, &vcn_v3_0_ip_block);
- amdgpu_device_ip_block_add(adev, &jpeg_v3_0_ip_block);
- if (adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT &&
- is_support_sw_smu(adev))
- amdgpu_device_ip_block_add(adev, &smu_v11_0_ip_block);
- break;
- case CHIP_VANGOGH:
- amdgpu_device_ip_block_add(adev, &nv_common_ip_block);
- amdgpu_device_ip_block_add(adev, &gmc_v10_0_ip_block);
- amdgpu_device_ip_block_add(adev, &navi10_ih_ip_block);
- if (likely(adev->firmware.load_type == AMDGPU_FW_LOAD_PSP))
- amdgpu_device_ip_block_add(adev, &psp_v11_0_ip_block);
- amdgpu_device_ip_block_add(adev, &smu_v11_0_ip_block);
- if (adev->enable_virtual_display || amdgpu_sriov_vf(adev))
- amdgpu_device_ip_block_add(adev, &dce_virtual_ip_block);
-#if defined(CONFIG_DRM_AMD_DC)
- else if (amdgpu_device_has_dc_support(adev))
- amdgpu_device_ip_block_add(adev, &dm_ip_block);
-#endif
- amdgpu_device_ip_block_add(adev, &gfx_v10_0_ip_block);
- amdgpu_device_ip_block_add(adev, &sdma_v5_2_ip_block);
- amdgpu_device_ip_block_add(adev, &vcn_v3_0_ip_block);
- amdgpu_device_ip_block_add(adev, &jpeg_v3_0_ip_block);
- break;
- case CHIP_DIMGREY_CAVEFISH:
- amdgpu_device_ip_block_add(adev, &nv_common_ip_block);
- amdgpu_device_ip_block_add(adev, &gmc_v10_0_ip_block);
- amdgpu_device_ip_block_add(adev, &navi10_ih_ip_block);
- if (likely(adev->firmware.load_type == AMDGPU_FW_LOAD_PSP))
- amdgpu_device_ip_block_add(adev, &psp_v11_0_ip_block);
- if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP &&
- is_support_sw_smu(adev))
- amdgpu_device_ip_block_add(adev, &smu_v11_0_ip_block);
- if (adev->enable_virtual_display || amdgpu_sriov_vf(adev))
- amdgpu_device_ip_block_add(adev, &dce_virtual_ip_block);
-#if defined(CONFIG_DRM_AMD_DC)
- else if (amdgpu_device_has_dc_support(adev))
- amdgpu_device_ip_block_add(adev, &dm_ip_block);
-#endif
- amdgpu_device_ip_block_add(adev, &gfx_v10_0_ip_block);
- amdgpu_device_ip_block_add(adev, &sdma_v5_2_ip_block);
- amdgpu_device_ip_block_add(adev, &vcn_v3_0_ip_block);
- amdgpu_device_ip_block_add(adev, &jpeg_v3_0_ip_block);
- break;
- default:
- return -EINVAL;
- }
-
- return 0;
-}
-
-static uint32_t nv_get_rev_id(struct amdgpu_device *adev)
-{
- return adev->nbio.funcs->get_rev_id(adev);
-}
-
static bool nv_need_full_reset(struct amdgpu_device *adev)
{
return true;
@@ -759,16 +556,6 @@ static bool nv_need_reset_on_init(struct amdgpu_device *adev)
return false;
}
-static uint64_t nv_get_pcie_replay_count(struct amdgpu_device *adev)
-{
-
- /* TODO
- * dummy implement for pcie_replay_count sysfs interface
- * */
-
- return 0;
-}
-
static void nv_init_doorbell_index(struct amdgpu_device *adev)
{
adev->doorbell_index.kiq = AMDGPU_NAVI10_DOORBELL_KIQ;
@@ -784,7 +571,12 @@ static void nv_init_doorbell_index(struct amdgpu_device *adev)
adev->doorbell_index.userqueue_end = AMDGPU_NAVI10_DOORBELL_USERQUEUE_END;
adev->doorbell_index.gfx_ring0 = AMDGPU_NAVI10_DOORBELL_GFX_RING0;
adev->doorbell_index.gfx_ring1 = AMDGPU_NAVI10_DOORBELL_GFX_RING1;
- adev->doorbell_index.mes_ring = AMDGPU_NAVI10_DOORBELL_MES_RING;
+ adev->doorbell_index.gfx_userqueue_start =
+ AMDGPU_NAVI10_DOORBELL_GFX_USERQUEUE_START;
+ adev->doorbell_index.gfx_userqueue_end =
+ AMDGPU_NAVI10_DOORBELL_GFX_USERQUEUE_END;
+ adev->doorbell_index.mes_ring0 = AMDGPU_NAVI10_DOORBELL_MES_RING0;
+ adev->doorbell_index.mes_ring1 = AMDGPU_NAVI10_DOORBELL_MES_RING1;
adev->doorbell_index.sdma_engine[0] = AMDGPU_NAVI10_DOORBELL_sDMA_ENGINE0;
adev->doorbell_index.sdma_engine[1] = AMDGPU_NAVI10_DOORBELL_sDMA_ENGINE1;
adev->doorbell_index.sdma_engine[2] = AMDGPU_NAVI10_DOORBELL_sDMA_ENGINE2;
@@ -809,33 +601,26 @@ static int nv_update_umd_stable_pstate(struct amdgpu_device *adev,
bool enter)
{
if (enter)
- amdgpu_gfx_rlc_enter_safe_mode(adev);
+ amdgpu_gfx_rlc_enter_safe_mode(adev, 0);
else
- amdgpu_gfx_rlc_exit_safe_mode(adev);
+ amdgpu_gfx_rlc_exit_safe_mode(adev, 0);
if (adev->gfx.funcs->update_perfmon_mgcg)
adev->gfx.funcs->update_perfmon_mgcg(adev, !enter);
- /*
- * The ASPM function is not fully enabled and verified on
- * Navi yet. Temporarily skip this until ASPM enabled.
- */
- if ((adev->asic_type >= CHIP_SIENNA_CICHLID) &&
- !(adev->flags & AMD_IS_APU) &&
- (adev->nbio.funcs->enable_aspm))
+ if (adev->nbio.funcs->enable_aspm &&
+ amdgpu_device_should_use_aspm(adev))
adev->nbio.funcs->enable_aspm(adev, !enter);
return 0;
}
-static const struct amdgpu_asic_funcs nv_asic_funcs =
-{
+static const struct amdgpu_asic_funcs nv_asic_funcs = {
.read_disabled_bios = &nv_read_disabled_bios,
- .read_bios_from_rom = &nv_read_bios_from_rom,
+ .read_bios_from_rom = &amdgpu_soc15_read_bios_from_rom,
.read_register = &nv_read_register,
.reset = &nv_asic_reset,
.reset_method = &nv_asic_reset_method,
- .set_vga_state = &nv_vga_set_state,
.get_xclk = &nv_get_xclk,
.set_uvd_clocks = &nv_set_uvd_clocks,
.set_vce_clocks = &nv_set_vce_clocks,
@@ -843,27 +628,26 @@ static const struct amdgpu_asic_funcs nv_asic_funcs =
.init_doorbell_index = &nv_init_doorbell_index,
.need_full_reset = &nv_need_full_reset,
.need_reset_on_init = &nv_need_reset_on_init,
- .get_pcie_replay_count = &nv_get_pcie_replay_count,
- .supports_baco = &nv_asic_supports_baco,
+ .get_pcie_replay_count = &amdgpu_nbio_get_pcie_replay_count,
+ .supports_baco = &amdgpu_dpm_is_baco_supported,
.pre_asic_init = &nv_pre_asic_init,
.update_umd_stable_pstate = &nv_update_umd_stable_pstate,
+ .query_video_codecs = &nv_query_video_codecs,
};
-static int nv_common_early_init(void *handle)
+static int nv_common_early_init(struct amdgpu_ip_block *ip_block)
{
-#define MMIO_REG_HOLE_OFFSET (0x80000 - PAGE_SIZE)
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
- adev->rmmio_remap.reg_offset = MMIO_REG_HOLE_OFFSET;
- adev->rmmio_remap.bus_addr = adev->rmmio_base + MMIO_REG_HOLE_OFFSET;
+ adev->nbio.funcs->set_reg_remap(adev);
adev->smc_rreg = NULL;
adev->smc_wreg = NULL;
- adev->pcie_rreg = &nv_pcie_rreg;
- adev->pcie_wreg = &nv_pcie_wreg;
- adev->pcie_rreg64 = &nv_pcie_rreg64;
- adev->pcie_wreg64 = &nv_pcie_wreg64;
- adev->pciep_rreg = &nv_pcie_port_rreg;
- adev->pciep_wreg = &nv_pcie_port_wreg;
+ adev->pcie_rreg = &amdgpu_device_indirect_rreg;
+ adev->pcie_wreg = &amdgpu_device_indirect_wreg;
+ adev->pcie_rreg64 = &amdgpu_device_indirect_rreg64;
+ adev->pcie_wreg64 = &amdgpu_device_indirect_wreg64;
+ adev->pciep_rreg = amdgpu_device_pcie_port_rreg;
+ adev->pciep_wreg = amdgpu_device_pcie_port_wreg;
/* TODO: will add them during VCN v2 implementation */
adev->uvd_ctx_rreg = NULL;
@@ -874,10 +658,13 @@ static int nv_common_early_init(void *handle)
adev->asic_funcs = &nv_asic_funcs;
- adev->rev_id = nv_get_rev_id(adev);
+ adev->rev_id = amdgpu_device_get_rev_id(adev);
adev->external_rev_id = 0xff;
- switch (adev->asic_type) {
- case CHIP_NAVI10:
+ /* TODO: split the GC and PG flags based on the relevant IP version for which
+ * they are relevant.
+ */
+ switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
+ case IP_VERSION(10, 1, 10):
adev->cg_flags = AMD_CG_SUPPORT_GFX_MGCG |
AMD_CG_SUPPORT_GFX_CGCG |
AMD_CG_SUPPORT_IH_CG |
@@ -899,7 +686,7 @@ static int nv_common_early_init(void *handle)
AMD_PG_SUPPORT_ATHUB;
adev->external_rev_id = adev->rev_id + 0x1;
break;
- case CHIP_NAVI14:
+ case IP_VERSION(10, 1, 1):
adev->cg_flags = AMD_CG_SUPPORT_GFX_MGCG |
AMD_CG_SUPPORT_GFX_CGCG |
AMD_CG_SUPPORT_IH_CG |
@@ -920,7 +707,7 @@ static int nv_common_early_init(void *handle)
AMD_PG_SUPPORT_VCN_DPG;
adev->external_rev_id = adev->rev_id + 20;
break;
- case CHIP_NAVI12:
+ case IP_VERSION(10, 1, 2):
adev->cg_flags = AMD_CG_SUPPORT_GFX_MGCG |
AMD_CG_SUPPORT_GFX_MGLS |
AMD_CG_SUPPORT_GFX_CGCG |
@@ -949,9 +736,10 @@ static int nv_common_early_init(void *handle)
adev->rev_id = 0;
adev->external_rev_id = adev->rev_id + 0xa;
break;
- case CHIP_SIENNA_CICHLID:
+ case IP_VERSION(10, 3, 0):
adev->cg_flags = AMD_CG_SUPPORT_GFX_MGCG |
AMD_CG_SUPPORT_GFX_CGCG |
+ AMD_CG_SUPPORT_GFX_CGLS |
AMD_CG_SUPPORT_GFX_3D_CGCG |
AMD_CG_SUPPORT_MC_MGCG |
AMD_CG_SUPPORT_VCN_MGCG |
@@ -972,9 +760,10 @@ static int nv_common_early_init(void *handle)
}
adev->external_rev_id = adev->rev_id + 0x28;
break;
- case CHIP_NAVY_FLOUNDER:
+ case IP_VERSION(10, 3, 2):
adev->cg_flags = AMD_CG_SUPPORT_GFX_MGCG |
AMD_CG_SUPPORT_GFX_CGCG |
+ AMD_CG_SUPPORT_GFX_CGLS |
AMD_CG_SUPPORT_GFX_3D_CGCG |
AMD_CG_SUPPORT_VCN_MGCG |
AMD_CG_SUPPORT_JPEG_MGCG |
@@ -990,9 +779,7 @@ static int nv_common_early_init(void *handle)
AMD_PG_SUPPORT_MMHUB;
adev->external_rev_id = adev->rev_id + 0x32;
break;
-
- case CHIP_VANGOGH:
- adev->apu_flags |= AMD_APU_IS_VANGOGH;
+ case IP_VERSION(10, 3, 1):
adev->cg_flags = AMD_CG_SUPPORT_GFX_MGCG |
AMD_CG_SUPPORT_GFX_MGLS |
AMD_CG_SUPPORT_GFX_CP_LS |
@@ -1005,6 +792,8 @@ static int nv_common_early_init(void *handle)
AMD_CG_SUPPORT_MC_LS |
AMD_CG_SUPPORT_GFX_FGCG |
AMD_CG_SUPPORT_VCN_MGCG |
+ AMD_CG_SUPPORT_SDMA_MGCG |
+ AMD_CG_SUPPORT_SDMA_LS |
AMD_CG_SUPPORT_JPEG_MGCG;
adev->pg_flags = AMD_PG_SUPPORT_GFX_PG |
AMD_PG_SUPPORT_VCN |
@@ -1013,9 +802,10 @@ static int nv_common_early_init(void *handle)
if (adev->apu_flags & AMD_APU_IS_VANGOGH)
adev->external_rev_id = adev->rev_id + 0x01;
break;
- case CHIP_DIMGREY_CAVEFISH:
+ case IP_VERSION(10, 3, 4):
adev->cg_flags = AMD_CG_SUPPORT_GFX_MGCG |
AMD_CG_SUPPORT_GFX_CGCG |
+ AMD_CG_SUPPORT_GFX_CGLS |
AMD_CG_SUPPORT_GFX_3D_CGCG |
AMD_CG_SUPPORT_VCN_MGCG |
AMD_CG_SUPPORT_JPEG_MGCG |
@@ -1031,11 +821,122 @@ static int nv_common_early_init(void *handle)
AMD_PG_SUPPORT_MMHUB;
adev->external_rev_id = adev->rev_id + 0x3c;
break;
+ case IP_VERSION(10, 3, 5):
+ adev->cg_flags = AMD_CG_SUPPORT_GFX_MGCG |
+ AMD_CG_SUPPORT_GFX_CGCG |
+ AMD_CG_SUPPORT_GFX_CGLS |
+ AMD_CG_SUPPORT_GFX_3D_CGCG |
+ AMD_CG_SUPPORT_MC_MGCG |
+ AMD_CG_SUPPORT_MC_LS |
+ AMD_CG_SUPPORT_HDP_MGCG |
+ AMD_CG_SUPPORT_HDP_LS |
+ AMD_CG_SUPPORT_IH_CG |
+ AMD_CG_SUPPORT_VCN_MGCG;
+ adev->pg_flags = AMD_PG_SUPPORT_VCN |
+ AMD_PG_SUPPORT_VCN_DPG |
+ AMD_PG_SUPPORT_ATHUB |
+ AMD_PG_SUPPORT_MMHUB;
+ adev->external_rev_id = adev->rev_id + 0x46;
+ break;
+ case IP_VERSION(10, 3, 3):
+ adev->cg_flags = AMD_CG_SUPPORT_GFX_MGCG |
+ AMD_CG_SUPPORT_GFX_MGLS |
+ AMD_CG_SUPPORT_GFX_CGCG |
+ AMD_CG_SUPPORT_GFX_CGLS |
+ AMD_CG_SUPPORT_GFX_3D_CGCG |
+ AMD_CG_SUPPORT_GFX_3D_CGLS |
+ AMD_CG_SUPPORT_GFX_RLC_LS |
+ AMD_CG_SUPPORT_GFX_CP_LS |
+ AMD_CG_SUPPORT_GFX_FGCG |
+ AMD_CG_SUPPORT_MC_MGCG |
+ AMD_CG_SUPPORT_MC_LS |
+ AMD_CG_SUPPORT_SDMA_LS |
+ AMD_CG_SUPPORT_HDP_MGCG |
+ AMD_CG_SUPPORT_HDP_LS |
+ AMD_CG_SUPPORT_ATHUB_MGCG |
+ AMD_CG_SUPPORT_ATHUB_LS |
+ AMD_CG_SUPPORT_IH_CG |
+ AMD_CG_SUPPORT_VCN_MGCG |
+ AMD_CG_SUPPORT_JPEG_MGCG |
+ AMD_CG_SUPPORT_SDMA_MGCG;
+ adev->pg_flags = AMD_PG_SUPPORT_GFX_PG |
+ AMD_PG_SUPPORT_VCN |
+ AMD_PG_SUPPORT_VCN_DPG |
+ AMD_PG_SUPPORT_JPEG;
+ if (adev->pdev->device == 0x1681)
+ adev->external_rev_id = 0x20;
+ else
+ adev->external_rev_id = adev->rev_id + 0x01;
+ break;
+ case IP_VERSION(10, 1, 3):
+ case IP_VERSION(10, 1, 4):
+ adev->cg_flags = 0;
+ adev->pg_flags = 0;
+ adev->external_rev_id = adev->rev_id + 0x82;
+ break;
+ case IP_VERSION(10, 3, 6):
+ adev->cg_flags = AMD_CG_SUPPORT_GFX_MGCG |
+ AMD_CG_SUPPORT_GFX_MGLS |
+ AMD_CG_SUPPORT_GFX_CGCG |
+ AMD_CG_SUPPORT_GFX_CGLS |
+ AMD_CG_SUPPORT_GFX_3D_CGCG |
+ AMD_CG_SUPPORT_GFX_3D_CGLS |
+ AMD_CG_SUPPORT_GFX_RLC_LS |
+ AMD_CG_SUPPORT_GFX_CP_LS |
+ AMD_CG_SUPPORT_GFX_FGCG |
+ AMD_CG_SUPPORT_MC_MGCG |
+ AMD_CG_SUPPORT_MC_LS |
+ AMD_CG_SUPPORT_SDMA_LS |
+ AMD_CG_SUPPORT_HDP_MGCG |
+ AMD_CG_SUPPORT_HDP_LS |
+ AMD_CG_SUPPORT_ATHUB_MGCG |
+ AMD_CG_SUPPORT_ATHUB_LS |
+ AMD_CG_SUPPORT_IH_CG |
+ AMD_CG_SUPPORT_VCN_MGCG |
+ AMD_CG_SUPPORT_JPEG_MGCG;
+ adev->pg_flags = AMD_PG_SUPPORT_GFX_PG |
+ AMD_PG_SUPPORT_VCN |
+ AMD_PG_SUPPORT_VCN_DPG |
+ AMD_PG_SUPPORT_JPEG;
+ adev->external_rev_id = adev->rev_id + 0x01;
+ break;
+ case IP_VERSION(10, 3, 7):
+ adev->cg_flags = AMD_CG_SUPPORT_GFX_MGCG |
+ AMD_CG_SUPPORT_GFX_MGLS |
+ AMD_CG_SUPPORT_GFX_CGCG |
+ AMD_CG_SUPPORT_GFX_CGLS |
+ AMD_CG_SUPPORT_GFX_3D_CGCG |
+ AMD_CG_SUPPORT_GFX_3D_CGLS |
+ AMD_CG_SUPPORT_GFX_RLC_LS |
+ AMD_CG_SUPPORT_GFX_CP_LS |
+ AMD_CG_SUPPORT_GFX_FGCG |
+ AMD_CG_SUPPORT_MC_MGCG |
+ AMD_CG_SUPPORT_MC_LS |
+ AMD_CG_SUPPORT_SDMA_LS |
+ AMD_CG_SUPPORT_HDP_MGCG |
+ AMD_CG_SUPPORT_HDP_LS |
+ AMD_CG_SUPPORT_ATHUB_MGCG |
+ AMD_CG_SUPPORT_ATHUB_LS |
+ AMD_CG_SUPPORT_IH_CG |
+ AMD_CG_SUPPORT_VCN_MGCG |
+ AMD_CG_SUPPORT_JPEG_MGCG |
+ AMD_CG_SUPPORT_SDMA_MGCG;
+ adev->pg_flags = AMD_PG_SUPPORT_VCN |
+ AMD_PG_SUPPORT_VCN_DPG |
+ AMD_PG_SUPPORT_JPEG |
+ AMD_PG_SUPPORT_GFX_PG;
+ adev->external_rev_id = adev->rev_id + 0x01;
+ break;
default:
/* FIXME: not supported yet */
return -EINVAL;
}
+ if (adev->harvest_ip_mask & AMD_HARVEST_IP_VCN_MASK)
+ adev->pg_flags &= ~(AMD_PG_SUPPORT_VCN |
+ AMD_PG_SUPPORT_VCN_DPG |
+ AMD_PG_SUPPORT_JPEG);
+
if (amdgpu_sriov_vf(adev)) {
amdgpu_virt_init_setting(adev);
xgpu_nv_mailbox_set_irq_funcs(adev);
@@ -1044,19 +945,38 @@ static int nv_common_early_init(void *handle)
return 0;
}
-static int nv_common_late_init(void *handle)
+static int nv_common_late_init(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
- if (amdgpu_sriov_vf(adev))
+ if (amdgpu_sriov_vf(adev)) {
xgpu_nv_mailbox_get_irq(adev);
+ if (adev->vcn.harvest_config & AMDGPU_VCN_HARVEST_VCN0) {
+ amdgpu_virt_update_sriov_video_codec(adev,
+ sriov_sc_video_codecs_encode_array,
+ ARRAY_SIZE(sriov_sc_video_codecs_encode_array),
+ sriov_sc_video_codecs_decode_array_vcn1,
+ ARRAY_SIZE(sriov_sc_video_codecs_decode_array_vcn1));
+ } else {
+ amdgpu_virt_update_sriov_video_codec(adev,
+ sriov_sc_video_codecs_encode_array,
+ ARRAY_SIZE(sriov_sc_video_codecs_encode_array),
+ sriov_sc_video_codecs_decode_array_vcn0,
+ ARRAY_SIZE(sriov_sc_video_codecs_decode_array_vcn0));
+ }
+ }
+
+ /* Enable selfring doorbell aperture late because doorbell BAR
+ * aperture will change if resize BAR successfully in gmc sw_init.
+ */
+ adev->nbio.funcs->enable_doorbell_selfring_aperture(adev, true);
return 0;
}
-static int nv_common_sw_init(void *handle)
+static int nv_common_sw_init(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
if (amdgpu_sriov_vf(adev))
xgpu_nv_mailbox_add_irq_id(adev);
@@ -1064,17 +984,16 @@ static int nv_common_sw_init(void *handle)
return 0;
}
-static int nv_common_sw_fini(void *handle)
+static int nv_common_hw_init(struct amdgpu_ip_block *ip_block)
{
- return 0;
-}
+ struct amdgpu_device *adev = ip_block->adev;
-static int nv_common_hw_init(void *handle)
-{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ if (adev->nbio.funcs->apply_lc_spc_mode_wa)
+ adev->nbio.funcs->apply_lc_spc_mode_wa(adev);
+
+ if (adev->nbio.funcs->apply_l1_link_width_reconfig_wa)
+ adev->nbio.funcs->apply_l1_link_width_reconfig_wa(adev);
- /* enable pcie gen2/3 link */
- nv_pcie_gen3_enable(adev);
/* enable aspm */
nv_program_aspm(adev);
/* setup nbio registers */
@@ -1083,68 +1002,60 @@ static int nv_common_hw_init(void *handle)
* for the purpose of expose those registers
* to process space
*/
- if (adev->nbio.funcs->remap_hdp_registers)
+ if (adev->nbio.funcs->remap_hdp_registers && !amdgpu_sriov_vf(adev))
adev->nbio.funcs->remap_hdp_registers(adev);
/* enable the doorbell aperture */
- nv_enable_doorbell_aperture(adev, true);
+ adev->nbio.funcs->enable_doorbell_aperture(adev, true);
return 0;
}
-static int nv_common_hw_fini(void *handle)
+static int nv_common_hw_fini(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
- /* disable the doorbell aperture */
- nv_enable_doorbell_aperture(adev, false);
+ /* Disable the doorbell aperture and selfring doorbell aperture
+ * separately in hw_fini because nv_enable_doorbell_aperture
+ * has been removed and there is no need to delay disabling
+ * selfring doorbell.
+ */
+ adev->nbio.funcs->enable_doorbell_aperture(adev, false);
+ adev->nbio.funcs->enable_doorbell_selfring_aperture(adev, false);
return 0;
}
-static int nv_common_suspend(void *handle)
+static int nv_common_suspend(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
-
- return nv_common_hw_fini(adev);
+ return nv_common_hw_fini(ip_block);
}
-static int nv_common_resume(void *handle)
+static int nv_common_resume(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
-
- return nv_common_hw_init(adev);
+ return nv_common_hw_init(ip_block);
}
-static bool nv_common_is_idle(void *handle)
+static bool nv_common_is_idle(struct amdgpu_ip_block *ip_block)
{
return true;
}
-static int nv_common_wait_for_idle(void *handle)
-{
- return 0;
-}
-
-static int nv_common_soft_reset(void *handle)
-{
- return 0;
-}
-
-static int nv_common_set_clockgating_state(void *handle,
+static int nv_common_set_clockgating_state(struct amdgpu_ip_block *ip_block,
enum amd_clockgating_state state)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
if (amdgpu_sriov_vf(adev))
return 0;
- switch (adev->asic_type) {
- case CHIP_NAVI10:
- case CHIP_NAVI14:
- case CHIP_NAVI12:
- case CHIP_SIENNA_CICHLID:
- case CHIP_NAVY_FLOUNDER:
- case CHIP_DIMGREY_CAVEFISH:
+ switch (amdgpu_ip_version(adev, NBIO_HWIP, 0)) {
+ case IP_VERSION(2, 3, 0):
+ case IP_VERSION(2, 3, 1):
+ case IP_VERSION(2, 3, 2):
+ case IP_VERSION(3, 3, 0):
+ case IP_VERSION(3, 3, 1):
+ case IP_VERSION(3, 3, 2):
+ case IP_VERSION(3, 3, 3):
adev->nbio.funcs->update_medium_grain_clock_gating(adev,
state == AMD_CG_STATE_GATE);
adev->nbio.funcs->update_medium_grain_light_sleep(adev,
@@ -1160,16 +1071,16 @@ static int nv_common_set_clockgating_state(void *handle,
return 0;
}
-static int nv_common_set_powergating_state(void *handle,
+static int nv_common_set_powergating_state(struct amdgpu_ip_block *ip_block,
enum amd_powergating_state state)
{
/* TODO */
return 0;
}
-static void nv_common_get_clockgating_state(void *handle, u32 *flags)
+static void nv_common_get_clockgating_state(struct amdgpu_ip_block *ip_block, u64 *flags)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
if (amdgpu_sriov_vf(adev))
*flags = 0;
@@ -1179,8 +1090,6 @@ static void nv_common_get_clockgating_state(void *handle, u32 *flags)
adev->hdp.funcs->get_clock_gating_state(adev, flags);
adev->smuio.funcs->get_clock_gating_state(adev, flags);
-
- return;
}
static const struct amd_ip_funcs nv_common_ip_funcs = {
@@ -1188,14 +1097,11 @@ static const struct amd_ip_funcs nv_common_ip_funcs = {
.early_init = nv_common_early_init,
.late_init = nv_common_late_init,
.sw_init = nv_common_sw_init,
- .sw_fini = nv_common_sw_fini,
.hw_init = nv_common_hw_init,
.hw_fini = nv_common_hw_fini,
.suspend = nv_common_suspend,
.resume = nv_common_resume,
.is_idle = nv_common_is_idle,
- .wait_for_idle = nv_common_wait_for_idle,
- .soft_reset = nv_common_soft_reset,
.set_clockgating_state = nv_common_set_clockgating_state,
.set_powergating_state = nv_common_set_powergating_state,
.get_clockgating_state = nv_common_get_clockgating_state,
diff --git a/drivers/gpu/drm/amd/amdgpu/nv.h b/drivers/gpu/drm/amd/amdgpu/nv.h
index 515d67bf249f..8f4817404f10 100644
--- a/drivers/gpu/drm/amd/amdgpu/nv.h
+++ b/drivers/gpu/drm/amd/amdgpu/nv.h
@@ -26,14 +26,11 @@
#include "nbio_v2_3.h"
+extern const struct amdgpu_ip_block_version nv_common_ip_block;
+
void nv_grbm_select(struct amdgpu_device *adev,
u32 me, u32 pipe, u32 queue, u32 vmid);
void nv_set_virt_ops(struct amdgpu_device *adev);
-int nv_set_ip_blocks(struct amdgpu_device *adev);
-int navi10_reg_base_init(struct amdgpu_device *adev);
-int navi14_reg_base_init(struct amdgpu_device *adev);
-int navi12_reg_base_init(struct amdgpu_device *adev);
-int sienna_cichlid_reg_base_init(struct amdgpu_device *adev);
-void vangogh_reg_base_init(struct amdgpu_device *adev);
-int dimgrey_cavefish_reg_base_init(struct amdgpu_device *adev);
+int cyan_skillfish_reg_base_init(struct amdgpu_device *adev);
+
#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/nvd.h b/drivers/gpu/drm/amd/amdgpu/nvd.h
index fd6b58243b03..56f1bfac0b20 100644
--- a/drivers/gpu/drm/amd/amdgpu/nvd.h
+++ b/drivers/gpu/drm/amd/amdgpu/nvd.h
@@ -64,6 +64,24 @@
#define PACKET3_INDIRECT_BUFFER_CNST_END 0x19
#define PACKET3_ATOMIC_GDS 0x1D
#define PACKET3_ATOMIC_MEM 0x1E
+#define PACKET3_ATOMIC_MEM__ATOMIC(x) ((((unsigned)(x)) & 0x7F) << 0)
+#define PACKET3_ATOMIC_MEM__COMMAND(x) ((((unsigned)(x)) & 0xF) << 8)
+#define PACKET3_ATOMIC_MEM__CACHE_POLICY(x) ((((unsigned)(x)) & 0x3) << 25)
+#define PACKET3_ATOMIC_MEM__ADDR_LO(x) (((unsigned)(x)))
+#define PACKET3_ATOMIC_MEM__ADDR_HI(x) (((unsigned)(x)))
+#define PACKET3_ATOMIC_MEM__SRC_DATA_LO(x) (((unsigned)(x)))
+#define PACKET3_ATOMIC_MEM__SRC_DATA_HI(x) (((unsigned)(x)))
+#define PACKET3_ATOMIC_MEM__CMP_DATA_LO(x) (((unsigned)(x)))
+#define PACKET3_ATOMIC_MEM__CMP_DATA_HI(x) (((unsigned)(x)))
+#define PACKET3_ATOMIC_MEM__LOOP_INTERVAL(x) ((((unsigned)(x)) & 0x1FFF) << 0)
+#define PACKET3_ATOMIC_MEM__COMMAND__SINGLE_PASS_ATOMIC 0
+#define PACKET3_ATOMIC_MEM__COMMAND__LOOP_UNTIL_COMPARE_SATISFIED 1
+#define PACKET3_ATOMIC_MEM__COMMAND__WAIT_FOR_WRITE_CONFIRMATION 2
+#define PACKET3_ATOMIC_MEM__COMMAND__SEND_AND_CONTINUE 3
+#define PACKET3_ATOMIC_MEM__CACHE_POLICY__LRU 0
+#define PACKET3_ATOMIC_MEM__CACHE_POLICY__STREAM 1
+#define PACKET3_ATOMIC_MEM__CACHE_POLICY__NOA 2
+#define PACKET3_ATOMIC_MEM__CACHE_POLICY__BYPASS 3
#define PACKET3_OCCLUSION_QUERY 0x1F
#define PACKET3_SET_PREDICATION 0x20
#define PACKET3_REG_RMW 0x21
@@ -105,6 +123,38 @@
* 1 - pfp
* 2 - ce
*/
+#define PACKET3_WRITE_DATA__DST_SEL(x) ((((unsigned)(x)) & 0xF) << 8)
+#define PACKET3_WRITE_DATA__ADDR_INCR(x) ((((unsigned)(x)) & 0x1) << 16)
+#define PACKET3_WRITE_DATA__WR_CONFIRM(x) ((((unsigned)(x)) & 0x1) << 20)
+#define PACKET3_WRITE_DATA__CACHE_POLICY(x) ((((unsigned)(x)) & 0x3) << 25)
+#define PACKET3_WRITE_DATA__DST_MMREG_ADDR(x) ((((unsigned)(x)) & 0x3FFFF) << 0)
+#define PACKET3_WRITE_DATA__DST_GDS_ADDR(x) ((((unsigned)(x)) & 0xFFFF) << 0)
+#define PACKET3_WRITE_DATA__DST_MEM_ADDR_LO(x) ((((unsigned)(x)) & 0x3FFFFFFF) << 2)
+#define PACKET3_WRITE_DATA__DST_MEM_ADDR_HI(x) ((unsigned)(x))
+#define PACKET3_WRITE_DATA__MODE(x) ((((unsigned)(x)) & 0x1) << 21)
+#define PACKET3_WRITE_DATA__AID_ID(x) ((((unsigned)(x)) & 0x3) << 22)
+#define PACKET3_WRITE_DATA__TEMPORAL(x) ((((unsigned)(x)) & 0x3) << 24)
+#define PACKET3_WRITE_DATA__DST_MMREG_ADDR_LO(x) ((unsigned)(x))
+#define PACKET3_WRITE_DATA__DST_MMREG_ADDR_HI(x) ((((unsigned)(x)) & 0xFF) << 0)
+#define PACKET3_WRITE_DATA__DST_SEL__MEM_MAPPED_REGISTER 0
+#define PACKET3_WRITE_DATA__DST_SEL__TC_L2 2
+#define PACKET3_WRITE_DATA__DST_SEL__GDS 3
+#define PACKET3_WRITE_DATA__DST_SEL__MEMORY 5
+#define PACKET3_WRITE_DATA__DST_SEL__MEMORY_MAPPED_ADC_PERSISTENT_STATE 6
+#define PACKET3_WRITE_DATA__ADDR_INCR__INCREMENT_ADDRESS 0
+#define PACKET3_WRITE_DATA__ADDR_INCR__DO_NOT_INCREMENT_ADDRESS 1
+#define PACKET3_WRITE_DATA__WR_CONFIRM__DO_NOT_WAIT_FOR_WRITE_CONFIRMATION 0
+#define PACKET3_WRITE_DATA__WR_CONFIRM__WAIT_FOR_WRITE_CONFIRMATION 1
+#define PACKET3_WRITE_DATA__MODE__PF_VF_DISABLED 0
+#define PACKET3_WRITE_DATA__MODE__PF_VF_ENABLED 1
+#define PACKET3_WRITE_DATA__TEMPORAL__RT 0
+#define PACKET3_WRITE_DATA__TEMPORAL__NT 1
+#define PACKET3_WRITE_DATA__TEMPORAL__HT 2
+#define PACKET3_WRITE_DATA__TEMPORAL__LU 3
+#define PACKET3_WRITE_DATA__CACHE_POLICY__LRU 0
+#define PACKET3_WRITE_DATA__CACHE_POLICY__STREAM 1
+#define PACKET3_WRITE_DATA__CACHE_POLICY__NOA 2
+#define PACKET3_WRITE_DATA__CACHE_POLICY__BYPASS 3
#define PACKET3_DRAW_INDEX_INDIRECT_MULTI 0x38
#define PACKET3_MEM_SEMAPHORE 0x39
# define PACKET3_SEM_USE_MAILBOX (0x1 << 16)
@@ -135,6 +185,42 @@
/* 0 - me
* 1 - pfp
*/
+#define PACKET3_WAIT_REG_MEM__FUNCTION(x) ((((unsigned)(x)) & 0x7) << 0)
+#define PACKET3_WAIT_REG_MEM__MEM_SPACE(x) ((((unsigned)(x)) & 0x3) << 4)
+#define PACKET3_WAIT_REG_MEM__OPERATION(x) ((((unsigned)(x)) & 0x3) << 6)
+#define PACKET3_WAIT_REG_MEM__MES_INTR_PIPE(x) ((((unsigned)(x)) & 0x3) << 22)
+#define PACKET3_WAIT_REG_MEM__MES_ACTION(x) ((((unsigned)(x)) & 0x1) << 24)
+#define PACKET3_WAIT_REG_MEM__CACHE_POLICY(x) ((((unsigned)(x)) & 0x3) << 25)
+#define PACKET3_WAIT_REG_MEM__TEMPORAL(x) ((((unsigned)(x)) & 0x3) << 25)
+#define PACKET3_WAIT_REG_MEM__MEM_POLL_ADDR_LO(x) ((((unsigned)(x)) & 0x3FFFFFFF) << 2)
+#define PACKET3_WAIT_REG_MEM__REG_POLL_ADDR(x) ((((unsigned)(x)) & 0X3FFFF) << 0)
+#define PACKET3_WAIT_REG_MEM__REG_WRITE_ADDR1(x) ((((unsigned)(x)) & 0X3FFFF) << 0)
+#define PACKET3_WAIT_REG_MEM__MEM_POLL_ADDR_HI(x) ((unsigned)(x))
+#define PACKET3_WAIT_REG_MEM__REG_WRITE_ADDR2(x) ((((unsigned)(x)) & 0x3FFFF) << 0)
+#define PACKET3_WAIT_REG_MEM__REFERENCE(x) ((unsigned)(x))
+#define PACKET3_WAIT_REG_MEM__MASK(x) ((unsigned)(x))
+#define PACKET3_WAIT_REG_MEM__POLL_INTERVAL(x) ((((unsigned)(x)) & 0xFFFF) << 0)
+#define PACKET3_WAIT_REG_MEM__OPTIMIZE_ACE_OFFLOAD_MODE(x) ((((unsigned)(x)) & 0x1) << 31)
+#define PACKET3_WAIT_REG_MEM__FUNCTION__ALWAYS_PASS 0
+#define PACKET3_WAIT_REG_MEM__FUNCTION__LESS_THAN_REF_VALUE 1
+#define PACKET3_WAIT_REG_MEM__FUNCTION__LESS_THAN_EQUAL_TO_THE_REF_VALUE 2
+#define PACKET3_WAIT_REG_MEM__FUNCTION__EQUAL_TO_THE_REFERENCE_VALUE 3
+#define PACKET3_WAIT_REG_MEM__FUNCTION__NOT_EQUAL_REFERENCE_VALUE 4
+#define PACKET3_WAIT_REG_MEM__FUNCTION__GREATER_THAN_OR_EQUAL_REFERENCE_VALUE 5
+#define PACKET3_WAIT_REG_MEM__FUNCTION__GREATER_THAN_REFERENCE_VALUE 6
+#define PACKET3_WAIT_REG_MEM__MEM_SPACE__REGISTER_SPACE 0
+#define PACKET3_WAIT_REG_MEM__MEM_SPACE__MEMORY_SPACE 1
+#define PACKET3_WAIT_REG_MEM__OPERATION__WAIT_REG_MEM 0
+#define PACKET3_WAIT_REG_MEM__OPERATION__WR_WAIT_WR_REG 1
+#define PACKET3_WAIT_REG_MEM__OPERATION__WAIT_MEM_PREEMPTABLE 3
+#define PACKET3_WAIT_REG_MEM__CACHE_POLICY__LRU 0
+#define PACKET3_WAIT_REG_MEM__CACHE_POLICY__STREAM 1
+#define PACKET3_WAIT_REG_MEM__CACHE_POLICY__NOA 2
+#define PACKET3_WAIT_REG_MEM__CACHE_POLICY__BYPASS 3
+#define PACKET3_WAIT_REG_MEM__TEMPORAL__RT 0
+#define PACKET3_WAIT_REG_MEM__TEMPORAL__NT 1
+#define PACKET3_WAIT_REG_MEM__TEMPORAL__HT 2
+#define PACKET3_WAIT_REG_MEM__TEMPORAL__LU 3
#define PACKET3_INDIRECT_BUFFER 0x3F
#define INDIRECT_BUFFER_VALID (1 << 23)
#define INDIRECT_BUFFER_CACHE_POLICY(x) ((x) << 28)
@@ -144,8 +230,94 @@
*/
#define INDIRECT_BUFFER_PRE_ENB(x) ((x) << 21)
#define INDIRECT_BUFFER_PRE_RESUME(x) ((x) << 30)
+#define PACKET3_INDIRECT_BUFFER__IB_BASE_LO(x) ((((unsigned)(x)) & 0x3FFFFFFF) << 2)
+#define PACKET3_INDIRECT_BUFFER__IB_BASE_HI(x) ((unsigned)(x))
+#define PACKET3_INDIRECT_BUFFER__IB_SIZE(x) ((((unsigned)(x)) & 0xFFFFF) << 0)
+#define PACKET3_INDIRECT_BUFFER__CHAIN(x) ((((unsigned)(x)) & 0x1) << 20)
+#define PACKET3_INDIRECT_BUFFER__OFFLOAD_POLLING(x) ((((unsigned)(x)) & 0x1) << 21)
+#define PACKET3_INDIRECT_BUFFER__VALID(x) ((((unsigned)(x)) & 0x1) << 23)
+#define PACKET3_INDIRECT_BUFFER__VMID(x) ((((unsigned)(x)) & 0xF) << 24)
+#define PACKET3_INDIRECT_BUFFER__CACHE_POLICY(x) ((((unsigned)(x)) & 0x3) << 28)
+#define PACKET3_INDIRECT_BUFFER__TEMPORAL(x) ((((unsigned)(x)) & 0x3) << 28)
+#define PACKET3_INDIRECT_BUFFER__PRIV(x) ((((unsigned)(x)) & 0x1) << 31)
+#define PACKET3_INDIRECT_BUFFER__TEMPORAL__RT 0
+#define PACKET3_INDIRECT_BUFFER__TEMPORAL__NT 1
+#define PACKET3_INDIRECT_BUFFER__TEMPORAL__HT 2
+#define PACKET3_INDIRECT_BUFFER__TEMPORAL__LU 3
+#define PACKET3_INDIRECT_BUFFER__CACHE_POLICY__LRU 0
+#define PACKET3_INDIRECT_BUFFER__CACHE_POLICY__STREAM 1
+#define PACKET3_INDIRECT_BUFFER__CACHE_POLICY__NOA 2
+#define PACKET3_INDIRECT_BUFFER__CACHE_POLICY__BYPASS 3
#define PACKET3_COND_INDIRECT_BUFFER 0x3F
#define PACKET3_COPY_DATA 0x40
+#define PACKET3_COPY_DATA__SRC_SEL(x) ((((unsigned)(x)) & 0xF) << 0)
+#define PACKET3_COPY_DATA__DST_SEL(x) ((((unsigned)(x)) & 0xF) << 8)
+#define PACKET3_COPY_DATA__SRC_CACHE_POLICY(x) ((((unsigned)(x)) & 0x3) << 13)
+#define PACKET3_COPY_DATA__SRC_TEMPORAL(x) ((((unsigned)(x)) & 0x3) << 13)
+#define PACKET3_COPY_DATA__COUNT_SEL(x) ((((unsigned)(x)) & 0x1) << 16)
+#define PACKET3_COPY_DATA__WR_CONFIRM(x) ((((unsigned)(x)) & 0x1) << 20)
+#define PACKET3_COPY_DATA__DST_CACHE_POLICY(x) ((((unsigned)(x)) & 0x3) << 25)
+#define PACKET3_COPY_DATA__PQ_EXE_STATUS(x) ((((unsigned)(x)) & 0x1) << 29)
+#define PACKET3_COPY_DATA__SRC_REG_OFFSET(x) ((((unsigned)(x)) & 0x3FFFF) << 0)
+#define PACKET3_COPY_DATA__SRC_32B_ADDR_LO(x) ((((unsigned)(x)) & 0x3FFFFFFF) << 2)
+#define PACKET3_COPY_DATA__SRC_64B_ADDR_LO(x) ((((unsigned)(x)) & 0x1FFFFFFF) << 3)
+#define PACKET3_COPY_DATA__SRC_GDS_ADDR_LO(x) ((((unsigned)(x)) & 0xFFFF) << 0)
+#define PACKET3_COPY_DATA__IMM_DATA(x) ((unsigned)(x))
+#define PACKET3_COPY_DATA__SRC_MEMTC_ADDR_HI(x) ((unsigned)(x))
+#define PACKET3_COPY_DATA__SRC_IMM_DATA(x) ((unsigned)(x))
+#define PACKET3_COPY_DATA__DST_REG_OFFSET(x) ((((unsigned)(x)) & 0x3FFFF) << 0)
+#define PACKET3_COPY_DATA__DST_32B_ADDR_LO(x) ((((unsigned)(x)) & 0x3FFFFFFF) << 2)
+#define PACKET3_COPY_DATA__DST_64B_ADDR_LO(x) ((((unsigned)(x)) & 0x1FFFFFFF) << 3)
+#define PACKET3_COPY_DATA__DST_GDS_ADDR_LO(x) ((((unsigned)(x)) & 0xFFFF) << 0)
+#define PACKET3_COPY_DATA__DST_ADDR_HI(x) ((unsigned)(x))
+#define PACKET3_COPY_DATA__MODE(x) ((((unsigned)(x)) & 0x1) << 21)
+#define PACKET3_COPY_DATA__AID_ID(x) ((((unsigned)(x)) & 0x3) << 23)
+#define PACKET3_COPY_DATA__DST_TEMPORAL(x) ((((unsigned)(x)) & 0x3) << 25)
+#define PACKET3_COPY_DATA__SRC_REG_OFFSET_LO(x) ((unsigned)(x))
+#define PACKET3_COPY_DATA__SRC_REG_OFFSET_HI(x) ((((unsigned)(x)) & 0xFF) << 0)
+#define PACKET3_COPY_DATA__DST_REG_OFFSET_LO(x) ((unsigned)(x))
+#define PACKET3_COPY_DATA__DST_REG_OFFSET_HI(x) ((((unsigned)(x)) & 0xFF) << 0)
+#define PACKET3_COPY_DATA__SRC_SEL__MEM_MAPPED_REGISTER 0
+#define PACKET3_COPY_DATA__SRC_SEL__TC_L2_OBSOLETE 1
+#define PACKET3_COPY_DATA__SRC_SEL__TC_L2 2
+#define PACKET3_COPY_DATA__SRC_SEL__GDS 3
+#define PACKET3_COPY_DATA__SRC_SEL__PERFCOUNTERS 4
+#define PACKET3_COPY_DATA__SRC_SEL__IMMEDIATE_DATA 5
+#define PACKET3_COPY_DATA__SRC_SEL__ATOMIC_RETURN_DATA 6
+#define PACKET3_COPY_DATA__SRC_SEL__GDS_ATOMIC_RETURN_DATA0 7
+#define PACKET3_COPY_DATA__SRC_SEL__GDS_ATOMIC_RETURN_DATA1 8
+#define PACKET3_COPY_DATA__SRC_SEL__GPU_CLOCK_COUNT 9
+#define PACKET3_COPY_DATA__SRC_SEL__SYSTEM_CLOCK_COUNT 10
+#define PACKET3_COPY_DATA__DST_SEL__MEM_MAPPED_REGISTER 0
+#define PACKET3_COPY_DATA__DST_SEL__TC_L2 2
+#define PACKET3_COPY_DATA__DST_SEL__GDS 3
+#define PACKET3_COPY_DATA__DST_SEL__PERFCOUNTERS 4
+#define PACKET3_COPY_DATA__DST_SEL__TC_L2_OBSOLETE 5
+#define PACKET3_COPY_DATA__DST_SEL__MEM_MAPPED_REG_DC 6
+#define PACKET3_COPY_DATA__SRC_TEMPORAL__RT 0
+#define PACKET3_COPY_DATA__SRC_TEMPORAL__NT 1
+#define PACKET3_COPY_DATA__SRC_TEMPORAL__HT 2
+#define PACKET3_COPY_DATA__SRC_TEMPORAL__LU 3
+#define PACKET3_COPY_DATA__SRC_CACHE_POLICY__LRU 0
+#define PACKET3_COPY_DATA__SRC_CACHE_POLICY__STREAM 1
+#define PACKET3_COPY_DATA__SRC_CACHE_POLICY__NOA 2
+#define PACKET3_COPY_DATA__SRC_CACHE_POLICY__BYPASS 3
+#define PACKET3_COPY_DATA__COUNT_SEL__32_BITS_OF_DATA 0
+#define PACKET3_COPY_DATA__COUNT_SEL__64_BITS_OF_DATA 1
+#define PACKET3_COPY_DATA__WR_CONFIRM__DO_NOT_WAIT_FOR_CONFIRMATION 0
+#define PACKET3_COPY_DATA__WR_CONFIRM__WAIT_FOR_CONFIRMATION 1
+#define PACKET3_COPY_DATA__MODE__PF_VF_DISABLED 0
+#define PACKET3_COPY_DATA__MODE__PF_VF_ENABLED 1
+#define PACKET3_COPY_DATA__DST_TEMPORAL__RT 0
+#define PACKET3_COPY_DATA__DST_TEMPORAL__NT 1
+#define PACKET3_COPY_DATA__DST_TEMPORAL__HT 2
+#define PACKET3_COPY_DATA__DST_TEMPORAL__LU 3
+#define PACKET3_COPY_DATA__DST_CACHE_POLICY__LRU 0
+#define PACKET3_COPY_DATA__DST_CACHE_POLICY__STREAM 1
+#define PACKET3_COPY_DATA__DST_CACHE_POLICY__NOA 2
+#define PACKET3_COPY_DATA__DST_CACHE_POLICY__BYPASS 3
+#define PACKET3_COPY_DATA__PQ_EXE_STATUS__DEFAULT 0
+#define PACKET3_COPY_DATA__PQ_EXE_STATUS__PHASE_UPDATE 1
#define PACKET3_CP_DMA 0x41
#define PACKET3_PFP_SYNC_ME 0x42
#define PACKET3_SURFACE_SYNC 0x43
@@ -160,6 +332,23 @@
* 3 - SAMPLE_STREAMOUTSTAT*
* 4 - *S_PARTIAL_FLUSH
*/
+#define PACKET3_EVENT_WRITE__EVENT_TYPE(x) ((((unsigned)(x)) & 0x3F) << 0)
+#define PACKET3_EVENT_WRITE__EVENT_INDEX(x) ((((unsigned)(x)) & 0xF) << 8)
+#define PACKET3_EVENT_WRITE__SAMP_PLST_CNTR_MODE(x) ((((unsigned)(x)) & 0x3) << 29)
+#define PACKET3_EVENT_WRITE__OFFLOAD_ENABLE(x) ((((unsigned)(x)) & 0x1) << 0)
+#define PACKET3_EVENT_WRITE__ADDRESS_LO(x) ((((unsigned)(x)) & 0x1FFFFFFF) << 3)
+#define PACKET3_EVENT_WRITE__ADDRESS_HI(x) ((unsigned)(x))
+#define PACKET3_EVENT_WRITE__EVENT_INDEX__OTHER 0
+#define PACKET3_EVENT_WRITE__EVENT_INDEX__SAMPLE_PIPELINESTAT 2
+#define PACKET3_EVENT_WRITE__EVENT_INDEX__CS_PARTIAL_FLUSH 4
+#define PACKET3_EVENT_WRITE__EVENT_INDEX__SAMPLE_STREAMOUTSTATS 8
+#define PACKET3_EVENT_WRITE__EVENT_INDEX__SAMPLE_STREAMOUTSTATS1 9
+#define PACKET3_EVENT_WRITE__EVENT_INDEX__SAMPLE_STREAMOUTSTATS2 10
+#define PACKET3_EVENT_WRITE__EVENT_INDEX__SAMPLE_STREAMOUTSTATS3 11
+#define PACKET3_EVENT_WRITE__SAMP_PLST_CNTR_MODE__LEGACY_MODE 0
+#define PACKET3_EVENT_WRITE__SAMP_PLST_CNTR_MODE__MIXED_MODE1 1
+#define PACKET3_EVENT_WRITE__SAMP_PLST_CNTR_MODE__NEW_MODE 2
+#define PACKET3_EVENT_WRITE__SAMP_PLST_CNTR_MODE__MIXED_MODE3 3
#define PACKET3_EVENT_WRITE_EOP 0x47
#define PACKET3_EVENT_WRITE_EOS 0x48
#define PACKET3_RELEASE_MEM 0x49
@@ -304,6 +493,12 @@
* 2: REVERSE
*/
#define PACKET3_ACQUIRE_MEM_GCR_RANGE_IS_PA (1 << 18)
+#define PACKET3_ACQUIRE_MEM__COHER_SIZE(x) ((unsigned)(x))
+#define PACKET3_ACQUIRE_MEM__COHER_SIZE_HI(x) ((((unsigned)(x)) & 0xFF) << 0)
+#define PACKET3_ACQUIRE_MEM__COHER_BASE_LO(x) ((unsigned)(x))
+#define PACKET3_ACQUIRE_MEM__COHER_BASE_HI(x) ((((unsigned)(x)) & 0xFFFFFF) << 0)
+#define PACKET3_ACQUIRE_MEM__POLL_INTERVAL(x) ((((unsigned)(x)) & 0xFFFF) << 0)
+#define PACKET3_ACQUIRE_MEM__GCR_CNTL(x) ((((unsigned)(x)) & 0x7FFFF) << 0)
#define PACKET3_REWIND 0x59
#define PACKET3_INTERRUPT 0x5A
#define PACKET3_GEN_PDEPTE 0x5B
@@ -330,11 +525,17 @@
#define PACKET3_SET_SH_REG 0x76
#define PACKET3_SET_SH_REG_START 0x00002c00
#define PACKET3_SET_SH_REG_END 0x00003000
+#define PACKET3_SET_SH_REG__REG_OFFSET(x) ((((unsigned)(x)) & 0xFFFF) << 0)
+#define PACKET3_SET_SH_REG__VMID_SHIFT(x) ((((unsigned)(x)) & 0x1F) << 23)
+#define PACKET3_SET_SH_REG__INDEX(x) ((((unsigned)(x)) & 0xF) << 28)
+#define PACKET3_SET_SH_REG__INDEX__DEFAULT 0
+#define PACKET3_SET_SH_REG__INDEX__INSERT_VMID 1
#define PACKET3_SET_SH_REG_OFFSET 0x77
#define PACKET3_SET_QUEUE_REG 0x78
#define PACKET3_SET_UCONFIG_REG 0x79
#define PACKET3_SET_UCONFIG_REG_START 0x0000c000
#define PACKET3_SET_UCONFIG_REG_END 0x0000c400
+#define PACKET3_SET_UCONFIG_REG__REG_OFFSET(x) ((((unsigned)(x)) & 0xFFFF) << 0)
#define PACKET3_SET_UCONFIG_REG_INDEX 0x7A
#define PACKET3_FORWARD_HEADER 0x7C
#define PACKET3_SCRATCH_RAM_WRITE 0x7D
@@ -369,6 +570,7 @@
# define PACKET3_INVALIDATE_TLBS_DST_SEL(x) ((x) << 0)
# define PACKET3_INVALIDATE_TLBS_ALL_HUB(x) ((x) << 4)
# define PACKET3_INVALIDATE_TLBS_PASID(x) ((x) << 5)
+# define PACKET3_INVALIDATE_TLBS_FLUSH_TYPE(x) ((x) << 29)
#define PACKET3_AQL_PACKET 0x99
#define PACKET3_DMA_DATA_FILL_MULTI 0x9A
#define PACKET3_SET_SH_REG_INDEX 0x9B
@@ -463,5 +665,14 @@
#define PACKET3_RUN_LIST 0xA5
#define PACKET3_MAP_PROCESS_VM 0xA6
+#define PACKET3_RUN_CLEANER_SHADER 0xD2
+/* 1. header
+ * 2. RESERVED [31:0]
+ */
+
+/* GFX11 */
+#define PACKET3_SET_Q_PREEMPTION_MODE 0xF0
+# define PACKET3_SET_Q_PREEMPTION_MODE_IB_VMID(x) ((x) << 0)
+# define PACKET3_SET_Q_PREEMPTION_MODE_INIT_SHADOW_MEM (1 << 0)
#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/psp_gfx_if.h b/drivers/gpu/drm/amd/amdgpu/psp_gfx_if.h
index 3ba7bdfde65d..73f87131a7e9 100644
--- a/drivers/gpu/drm/amd/amdgpu/psp_gfx_if.h
+++ b/drivers/gpu/drm/amd/amdgpu/psp_gfx_if.h
@@ -97,11 +97,32 @@ enum psp_gfx_cmd_id
GFX_CMD_ID_SETUP_VMR = 0x00000009, /* setup VMR region */
GFX_CMD_ID_DESTROY_VMR = 0x0000000A, /* destroy VMR region */
GFX_CMD_ID_PROG_REG = 0x0000000B, /* program regs */
- GFX_CMD_ID_CLEAR_VF_FW = 0x0000000D, /* Clear VF FW, to be used on VF shutdown. */
GFX_CMD_ID_GET_FW_ATTESTATION = 0x0000000F, /* Query GPUVA of the Fw Attestation DB */
/* IDs upto 0x1F are reserved for older programs (Raven, Vega 10/12/20) */
GFX_CMD_ID_LOAD_TOC = 0x00000020, /* Load TOC and obtain TMR size */
GFX_CMD_ID_AUTOLOAD_RLC = 0x00000021, /* Indicates all graphics fw loaded, start RLC autoload */
+ GFX_CMD_ID_BOOT_CFG = 0x00000022, /* Boot Config */
+ GFX_CMD_ID_SRIOV_SPATIAL_PART = 0x00000027, /* Configure spatial partitioning mode */
+ /*IDs of performance monitoring/profiling*/
+ GFX_CMD_ID_CONFIG_SQ_PERFMON = 0x00000046, /* Config CGTT_SQ_CLK_CTRL */
+ /* Dynamic memory partitioninig (NPS mode change)*/
+ GFX_CMD_ID_FB_NPS_MODE = 0x00000048, /* Configure memory partitioning mode */
+ GFX_CMD_ID_FB_FW_RESERV_ADDR = 0x00000050, /* Query FW reservation addr */
+ GFX_CMD_ID_FB_FW_RESERV_EXT_ADDR = 0x00000051, /* Query FW reservation extended addr */
+};
+
+/* PSP boot config sub-commands */
+enum psp_gfx_boot_config_cmd
+{
+ BOOTCFG_CMD_SET = 1, /* Set boot configuration settings */
+ BOOTCFG_CMD_GET = 2, /* Get boot configuration settings */
+ BOOTCFG_CMD_INVALIDATE = 3 /* Reset current boot configuration settings to VBIOS defaults */
+};
+
+/* PSP boot config bitmask values */
+enum psp_gfx_boot_config
+{
+ BOOT_CONFIG_GECC = 0x1,
};
/* Command to load Trusted Application binary into PSP OS. */
@@ -170,10 +191,19 @@ struct psp_gfx_cmd_setup_tmr
uint32_t buf_phy_addr_lo; /* bits [31:0] of GPU Virtual address of TMR buffer (must be 4 KB aligned) */
uint32_t buf_phy_addr_hi; /* bits [63:32] of GPU Virtual address of TMR buffer */
uint32_t buf_size; /* buffer size in bytes (must be multiple of 4 KB) */
+ union {
+ struct {
+ uint32_t sriov_enabled:1; /* whether the device runs under SR-IOV*/
+ uint32_t virt_phy_addr:1; /* driver passes both virtual and physical address to PSP*/
+ uint32_t reserved:30;
+ } bitfield;
+ uint32_t tmr_flags;
+ };
+ uint32_t system_phy_addr_lo; /* bits [31:0] of system physical address of TMR buffer (must be 4 KB aligned) */
+ uint32_t system_phy_addr_hi; /* bits [63:32] of system physical address of TMR buffer */
};
-
/* FW types for GFX_CMD_ID_LOAD_IP_FW command. Limit 31. */
enum psp_gfx_fw_type {
GFX_FW_TYPE_NONE = 0, /* */
@@ -235,6 +265,45 @@ enum psp_gfx_fw_type {
GFX_FW_TYPE_SDMA6 = 56, /* SDMA6 MI */
GFX_FW_TYPE_SDMA7 = 57, /* SDMA7 MI */
GFX_FW_TYPE_VCN1 = 58, /* VCN1 MI */
+ GFX_FW_TYPE_CAP = 62, /* CAP_FW */
+ GFX_FW_TYPE_SE2_TAP_DELAYS = 65, /* SE2 TAP DELAYS NV */
+ GFX_FW_TYPE_SE3_TAP_DELAYS = 66, /* SE3 TAP DELAYS NV */
+ GFX_FW_TYPE_REG_LIST = 67, /* REG_LIST MI */
+ GFX_FW_TYPE_IMU_I = 68, /* IMU Instruction FW SOC21 */
+ GFX_FW_TYPE_IMU_D = 69, /* IMU Data FW SOC21 */
+ GFX_FW_TYPE_LSDMA = 70, /* LSDMA FW SOC21 */
+ GFX_FW_TYPE_SDMA_UCODE_TH0 = 71, /* SDMA Thread 0/CTX SOC21 */
+ GFX_FW_TYPE_SDMA_UCODE_TH1 = 72, /* SDMA Thread 1/CTL SOC21 */
+ GFX_FW_TYPE_PPTABLE = 73, /* PPTABLE SOC21 */
+ GFX_FW_TYPE_DISCRETE_USB4 = 74, /* dUSB4 FW SOC21 */
+ GFX_FW_TYPE_TA = 75, /* SRIOV TA FW UUID SOC21 */
+ GFX_FW_TYPE_RS64_MES = 76, /* RS64 MES ucode SOC21 */
+ GFX_FW_TYPE_RS64_MES_STACK = 77, /* RS64 MES stack ucode SOC21 */
+ GFX_FW_TYPE_RS64_KIQ = 78, /* RS64 KIQ ucode SOC21 */
+ GFX_FW_TYPE_RS64_KIQ_STACK = 79, /* RS64 KIQ Heap stack SOC21 */
+ GFX_FW_TYPE_ISP_DATA = 80, /* ISP DATA SOC21 */
+ GFX_FW_TYPE_CP_MES_KIQ = 81, /* MES KIQ ucode SOC21 */
+ GFX_FW_TYPE_MES_KIQ_STACK = 82, /* MES KIQ stack SOC21 */
+ GFX_FW_TYPE_UMSCH_DATA = 83, /* User Mode Scheduler Data SOC21 */
+ GFX_FW_TYPE_UMSCH_UCODE = 84, /* User Mode Scheduler Ucode SOC21 */
+ GFX_FW_TYPE_UMSCH_CMD_BUFFER = 85, /* User Mode Scheduler Command Buffer SOC21 */
+ GFX_FW_TYPE_USB_DP_COMBO_PHY = 86, /* USB-Display port Combo SOC21 */
+ GFX_FW_TYPE_RS64_PFP = 87, /* RS64 PFP SOC21 */
+ GFX_FW_TYPE_RS64_ME = 88, /* RS64 ME SOC21 */
+ GFX_FW_TYPE_RS64_MEC = 89, /* RS64 MEC SOC21 */
+ GFX_FW_TYPE_RS64_PFP_P0_STACK = 90, /* RS64 PFP stack P0 SOC21 */
+ GFX_FW_TYPE_RS64_PFP_P1_STACK = 91, /* RS64 PFP stack P1 SOC21 */
+ GFX_FW_TYPE_RS64_ME_P0_STACK = 92, /* RS64 ME stack P0 SOC21 */
+ GFX_FW_TYPE_RS64_ME_P1_STACK = 93, /* RS64 ME stack P1 SOC21 */
+ GFX_FW_TYPE_RS64_MEC_P0_STACK = 94, /* RS64 MEC stack P0 SOC21 */
+ GFX_FW_TYPE_RS64_MEC_P1_STACK = 95, /* RS64 MEC stack P1 SOC21 */
+ GFX_FW_TYPE_RS64_MEC_P2_STACK = 96, /* RS64 MEC stack P2 SOC21 */
+ GFX_FW_TYPE_RS64_MEC_P3_STACK = 97, /* RS64 MEC stack P3 SOC21 */
+ GFX_FW_TYPE_VPEC_FW1 = 100, /* VPEC FW1 To Save VPE */
+ GFX_FW_TYPE_VPEC_FW2 = 101, /* VPEC FW2 To Save VPE */
+ GFX_FW_TYPE_VPE = 102,
+ GFX_FW_TYPE_JPEG_RAM = 128, /**< JPEG Command buffer */
+ GFX_FW_TYPE_P2S_TABLE = 129,
GFX_FW_TYPE_MAX
};
@@ -272,6 +341,36 @@ struct psp_gfx_cmd_load_toc
uint32_t toc_size; /* FW buffer size in bytes */
};
+/* Dynamic boot configuration */
+struct psp_gfx_cmd_boot_cfg
+{
+ uint32_t timestamp; /* calendar time as number of seconds */
+ enum psp_gfx_boot_config_cmd sub_cmd; /* sub-command indicating how to process command data */
+ uint32_t boot_config; /* dynamic boot configuration bitmask */
+ uint32_t boot_config_valid; /* dynamic boot configuration valid bits bitmask */
+};
+
+struct psp_gfx_cmd_sriov_spatial_part {
+ uint32_t mode;
+ uint32_t override_ips;
+ uint32_t override_xcds_avail;
+ uint32_t override_this_aid;
+};
+
+/*Structure for sq performance monitoring/profiling enable/disable*/
+struct psp_gfx_cmd_config_sq_perfmon {
+ uint32_t gfx_xcp_mask;
+ uint8_t core_override;
+ uint8_t reg_override;
+ uint8_t perfmon_override;
+ uint8_t reserved[5];
+};
+
+struct psp_gfx_cmd_fb_memory_part {
+ uint32_t mode; /* requested NPS mode */
+ uint32_t resvd;
+};
+
/* All GFX ring buffer commands. */
union psp_gfx_commands
{
@@ -284,6 +383,10 @@ union psp_gfx_commands
struct psp_gfx_cmd_reg_prog cmd_setup_reg_prog;
struct psp_gfx_cmd_setup_tmr cmd_setup_vmr;
struct psp_gfx_cmd_load_toc cmd_load_toc;
+ struct psp_gfx_cmd_boot_cfg boot_cfg;
+ struct psp_gfx_cmd_sriov_spatial_part cmd_spatial_part;
+ struct psp_gfx_cmd_config_sq_perfmon config_sq_perfmon;
+ struct psp_gfx_cmd_fb_memory_part cmd_memory_part;
};
struct psp_gfx_uresp_reserved
@@ -298,11 +401,24 @@ struct psp_gfx_uresp_fwar_db_info
uint32_t fwar_db_addr_hi;
};
+/* Command-specific response for boot config. */
+struct psp_gfx_uresp_bootcfg {
+ uint32_t boot_cfg; /* boot config data */
+};
+
+/* Command-specific response for fw reserve info */
+struct psp_gfx_uresp_fw_reserve_info {
+ uint32_t reserve_base_address_hi;
+ uint32_t reserve_base_address_lo;
+ uint32_t reserve_size;
+};
+
/* Union of command-specific responses for GPCOM ring. */
-union psp_gfx_uresp
-{
- struct psp_gfx_uresp_reserved reserved;
- struct psp_gfx_uresp_fwar_db_info fwar_db_info;
+union psp_gfx_uresp {
+ struct psp_gfx_uresp_reserved reserved;
+ struct psp_gfx_uresp_bootcfg boot_cfg;
+ struct psp_gfx_uresp_fwar_db_info fwar_db_info;
+ struct psp_gfx_uresp_fw_reserve_info fw_reserve_info;
};
/* Structure of GFX Response buffer.
@@ -378,8 +494,9 @@ struct psp_gfx_rb_frame
#define PSP_ERR_UNKNOWN_COMMAND 0x00000100
enum tee_error_code {
- TEE_SUCCESS = 0x00000000,
- TEE_ERROR_NOT_SUPPORTED = 0xFFFF000A,
+ TEE_SUCCESS = 0x00000000,
+ TEE_ERROR_CANCEL = 0xFFFF0002,
+ TEE_ERROR_NOT_SUPPORTED = 0xFFFF000A,
};
#endif /* _PSP_TEE_GFX_IF_H_ */
diff --git a/drivers/gpu/drm/amd/amdgpu/psp_v10_0.c b/drivers/gpu/drm/amd/amdgpu/psp_v10_0.c
index 4b1cc5e9ee92..3584b8c18fd9 100644
--- a/drivers/gpu/drm/amd/amdgpu/psp_v10_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/psp_v10_0.c
@@ -47,111 +47,26 @@ MODULE_FIRMWARE("amdgpu/raven_ta.bin");
static int psp_v10_0_init_microcode(struct psp_context *psp)
{
struct amdgpu_device *adev = psp->adev;
- const char *chip_name;
- char fw_name[30];
+ char ucode_prefix[30];
int err = 0;
- const struct ta_firmware_header_v1_0 *ta_hdr;
DRM_DEBUG("\n");
- switch (adev->asic_type) {
- case CHIP_RAVEN:
- if (adev->apu_flags & AMD_APU_IS_RAVEN2)
- chip_name = "raven2";
- else if (adev->apu_flags & AMD_APU_IS_PICASSO)
- chip_name = "picasso";
- else
- chip_name = "raven";
- break;
- default: BUG();
- }
+ amdgpu_ucode_ip_version_decode(adev, MP0_HWIP, ucode_prefix, sizeof(ucode_prefix));
- err = psp_init_asd_microcode(psp, chip_name);
+ err = psp_init_asd_microcode(psp, ucode_prefix);
if (err)
- goto out;
-
- snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_ta.bin", chip_name);
- err = request_firmware(&adev->psp.ta_fw, fw_name, adev->dev);
- if (err) {
- release_firmware(adev->psp.ta_fw);
- adev->psp.ta_fw = NULL;
- dev_info(adev->dev,
- "psp v10.0: Failed to load firmware \"%s\"\n",
- fw_name);
- } else {
- err = amdgpu_ucode_validate(adev->psp.ta_fw);
- if (err)
- goto out2;
-
- ta_hdr = (const struct ta_firmware_header_v1_0 *)
- adev->psp.ta_fw->data;
- adev->psp.ta_hdcp_ucode_version =
- le32_to_cpu(ta_hdr->ta_hdcp_ucode_version);
- adev->psp.ta_hdcp_ucode_size =
- le32_to_cpu(ta_hdr->ta_hdcp_size_bytes);
- adev->psp.ta_hdcp_start_addr =
- (uint8_t *)ta_hdr +
- le32_to_cpu(ta_hdr->header.ucode_array_offset_bytes);
-
- adev->psp.ta_dtm_ucode_version =
- le32_to_cpu(ta_hdr->ta_dtm_ucode_version);
- adev->psp.ta_dtm_ucode_size =
- le32_to_cpu(ta_hdr->ta_dtm_size_bytes);
- adev->psp.ta_dtm_start_addr =
- (uint8_t *)adev->psp.ta_hdcp_start_addr +
- le32_to_cpu(ta_hdr->ta_dtm_offset_bytes);
-
- adev->psp.ta_securedisplay_ucode_version =
- le32_to_cpu(ta_hdr->ta_securedisplay_ucode_version);
- adev->psp.ta_securedisplay_ucode_size =
- le32_to_cpu(ta_hdr->ta_securedisplay_size_bytes);
- adev->psp.ta_securedisplay_start_addr =
- (uint8_t *)adev->psp.ta_hdcp_start_addr +
- le32_to_cpu(ta_hdr->ta_securedisplay_offset_bytes);
-
- adev->psp.ta_fw_version = le32_to_cpu(ta_hdr->header.ucode_version);
+ return err;
+
+ err = psp_init_ta_microcode(psp, ucode_prefix);
+ if ((amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 1, 0)) &&
+ (adev->pdev->revision == 0xa1) &&
+ (psp->securedisplay_context.context.bin_desc.fw_version >=
+ 0x27000008)) {
+ adev->psp.securedisplay_context.context.bin_desc.size_bytes = 0;
}
-
- return 0;
-
-out2:
- release_firmware(adev->psp.ta_fw);
- adev->psp.ta_fw = NULL;
-out:
- if (err) {
- dev_err(adev->dev,
- "psp v10.0: Failed to load firmware \"%s\"\n",
- fw_name);
- }
-
return err;
}
-static int psp_v10_0_ring_init(struct psp_context *psp,
- enum psp_ring_type ring_type)
-{
- int ret = 0;
- struct psp_ring *ring;
- struct amdgpu_device *adev = psp->adev;
-
- ring = &psp->km_ring;
-
- ring->ring_type = ring_type;
-
- /* allocate 4k Page of Local Frame Buffer memory for ring */
- ring->ring_size = 0x1000;
- ret = amdgpu_bo_create_kernel(adev, ring->ring_size, PAGE_SIZE,
- AMDGPU_GEM_DOMAIN_VRAM,
- &adev->firmware.rbuf,
- &ring->ring_mem_mc_addr,
- (void **)&ring->ring_mem);
- if (ret) {
- ring->ring_size = 0;
- return ret;
- }
-
- return 0;
-}
-
static int psp_v10_0_ring_create(struct psp_context *psp,
enum psp_ring_type ring_type)
{
@@ -179,7 +94,7 @@ static int psp_v10_0_ring_create(struct psp_context *psp,
/* Wait for response flag (bit 31) in C2PMSG_64 */
ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_64),
- 0x80000000, 0x8000FFFF, false);
+ MBOX_TOS_RESP_FLAG, MBOX_TOS_RESP_MASK, 0);
return ret;
}
@@ -200,7 +115,7 @@ static int psp_v10_0_ring_stop(struct psp_context *psp,
/* Wait for response flag (bit 31) in C2PMSG_64 */
ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_64),
- 0x80000000, 0x80000000, false);
+ MBOX_TOS_RESP_FLAG, MBOX_TOS_RESP_MASK, 0);
return ret;
}
@@ -245,7 +160,6 @@ static void psp_v10_0_ring_set_wptr(struct psp_context *psp, uint32_t value)
static const struct psp_funcs psp_v10_0_funcs = {
.init_microcode = psp_v10_0_init_microcode,
- .ring_init = psp_v10_0_ring_init,
.ring_create = psp_v10_0_ring_create,
.ring_stop = psp_v10_0_ring_stop,
.ring_destroy = psp_v10_0_ring_destroy,
diff --git a/drivers/gpu/drm/amd/amdgpu/psp_v11_0.c b/drivers/gpu/drm/amd/amdgpu/psp_v11_0.c
index c325d6f53a71..a9be7a505026 100644
--- a/drivers/gpu/drm/amd/amdgpu/psp_v11_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/psp_v11_0.c
@@ -23,6 +23,7 @@
#include <linux/firmware.h>
#include <linux/module.h>
#include <linux/vmalloc.h>
+#include <drm/drm_drv.h>
#include "amdgpu.h"
#include "amdgpu_psp.h"
@@ -52,17 +53,21 @@ MODULE_FIRMWARE("amdgpu/navi14_ta.bin");
MODULE_FIRMWARE("amdgpu/navi12_sos.bin");
MODULE_FIRMWARE("amdgpu/navi12_asd.bin");
MODULE_FIRMWARE("amdgpu/navi12_ta.bin");
+MODULE_FIRMWARE("amdgpu/navi12_cap.bin");
MODULE_FIRMWARE("amdgpu/arcturus_sos.bin");
MODULE_FIRMWARE("amdgpu/arcturus_asd.bin");
MODULE_FIRMWARE("amdgpu/arcturus_ta.bin");
MODULE_FIRMWARE("amdgpu/sienna_cichlid_sos.bin");
MODULE_FIRMWARE("amdgpu/sienna_cichlid_ta.bin");
+MODULE_FIRMWARE("amdgpu/sienna_cichlid_cap.bin");
MODULE_FIRMWARE("amdgpu/navy_flounder_sos.bin");
MODULE_FIRMWARE("amdgpu/navy_flounder_ta.bin");
MODULE_FIRMWARE("amdgpu/vangogh_asd.bin");
MODULE_FIRMWARE("amdgpu/vangogh_toc.bin");
MODULE_FIRMWARE("amdgpu/dimgrey_cavefish_sos.bin");
MODULE_FIRMWARE("amdgpu/dimgrey_cavefish_ta.bin");
+MODULE_FIRMWARE("amdgpu/beige_goby_sos.bin");
+MODULE_FIRMWARE("amdgpu/beige_goby_ta.bin");
/* address block */
#define smnMP1_FIRMWARE_FLAGS 0x3010024
@@ -77,164 +82,103 @@ MODULE_FIRMWARE("amdgpu/dimgrey_cavefish_ta.bin");
/* For large FW files the time to complete can be very long */
#define USBC_PD_POLLING_LIMIT_S 240
+/* Read USB-PD from LFB */
+#define GFX_CMD_USB_PD_USE_LFB 0x480
+
static int psp_v11_0_init_microcode(struct psp_context *psp)
{
struct amdgpu_device *adev = psp->adev;
- const char *chip_name;
- char fw_name[PSP_FW_NAME_LEN];
+ char ucode_prefix[30];
int err = 0;
- const struct ta_firmware_header_v1_0 *ta_hdr;
DRM_DEBUG("\n");
- switch (adev->asic_type) {
- case CHIP_VEGA20:
- chip_name = "vega20";
- break;
- case CHIP_NAVI10:
- chip_name = "navi10";
- break;
- case CHIP_NAVI14:
- chip_name = "navi14";
- break;
- case CHIP_NAVI12:
- chip_name = "navi12";
- break;
- case CHIP_ARCTURUS:
- chip_name = "arcturus";
- break;
- case CHIP_SIENNA_CICHLID:
- chip_name = "sienna_cichlid";
- break;
- case CHIP_NAVY_FLOUNDER:
- chip_name = "navy_flounder";
- break;
- case CHIP_VANGOGH:
- chip_name = "vangogh";
- break;
- case CHIP_DIMGREY_CAVEFISH:
- chip_name = "dimgrey_cavefish";
- break;
- default:
- BUG();
- }
-
+ amdgpu_ucode_ip_version_decode(adev, MP0_HWIP, ucode_prefix, sizeof(ucode_prefix));
- switch (adev->asic_type) {
- case CHIP_VEGA20:
- case CHIP_ARCTURUS:
- err = psp_init_sos_microcode(psp, chip_name);
+ switch (amdgpu_ip_version(adev, MP0_HWIP, 0)) {
+ case IP_VERSION(11, 0, 2):
+ case IP_VERSION(11, 0, 4):
+ err = psp_init_sos_microcode(psp, ucode_prefix);
if (err)
return err;
- err = psp_init_asd_microcode(psp, chip_name);
+ err = psp_init_asd_microcode(psp, ucode_prefix);
if (err)
return err;
- snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_ta.bin", chip_name);
- err = request_firmware(&adev->psp.ta_fw, fw_name, adev->dev);
- if (err) {
- release_firmware(adev->psp.ta_fw);
- adev->psp.ta_fw = NULL;
- dev_info(adev->dev,
- "psp v11.0: Failed to load firmware \"%s\"\n", fw_name);
- } else {
- err = amdgpu_ucode_validate(adev->psp.ta_fw);
- if (err)
- goto out2;
-
- ta_hdr = (const struct ta_firmware_header_v1_0 *)adev->psp.ta_fw->data;
- adev->psp.ta_xgmi_ucode_version = le32_to_cpu(ta_hdr->ta_xgmi_ucode_version);
- adev->psp.ta_xgmi_ucode_size = le32_to_cpu(ta_hdr->ta_xgmi_size_bytes);
- adev->psp.ta_xgmi_start_addr = (uint8_t *)ta_hdr +
- le32_to_cpu(ta_hdr->header.ucode_array_offset_bytes);
- adev->psp.ta_fw_version = le32_to_cpu(ta_hdr->header.ucode_version);
- adev->psp.ta_ras_ucode_version = le32_to_cpu(ta_hdr->ta_ras_ucode_version);
- adev->psp.ta_ras_ucode_size = le32_to_cpu(ta_hdr->ta_ras_size_bytes);
- adev->psp.ta_ras_start_addr = (uint8_t *)adev->psp.ta_xgmi_start_addr +
- le32_to_cpu(ta_hdr->ta_ras_offset_bytes);
- }
+ err = psp_init_ta_microcode(psp, ucode_prefix);
+ adev->psp.securedisplay_context.context.bin_desc.size_bytes = 0;
break;
- case CHIP_NAVI10:
- case CHIP_NAVI14:
- case CHIP_NAVI12:
- err = psp_init_sos_microcode(psp, chip_name);
+ case IP_VERSION(11, 0, 0):
+ case IP_VERSION(11, 0, 5):
+ case IP_VERSION(11, 0, 9):
+ err = psp_init_sos_microcode(psp, ucode_prefix);
if (err)
return err;
- err = psp_init_asd_microcode(psp, chip_name);
+ err = psp_init_asd_microcode(psp, ucode_prefix);
if (err)
return err;
- if (amdgpu_sriov_vf(adev))
- break;
- snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_ta.bin", chip_name);
- err = request_firmware(&adev->psp.ta_fw, fw_name, adev->dev);
- if (err) {
- release_firmware(adev->psp.ta_fw);
- adev->psp.ta_fw = NULL;
- dev_info(adev->dev,
- "psp v11.0: Failed to load firmware \"%s\"\n", fw_name);
- } else {
- err = amdgpu_ucode_validate(adev->psp.ta_fw);
- if (err)
- goto out2;
-
- ta_hdr = (const struct ta_firmware_header_v1_0 *)adev->psp.ta_fw->data;
- adev->psp.ta_hdcp_ucode_version = le32_to_cpu(ta_hdr->ta_hdcp_ucode_version);
- adev->psp.ta_hdcp_ucode_size = le32_to_cpu(ta_hdr->ta_hdcp_size_bytes);
- adev->psp.ta_hdcp_start_addr = (uint8_t *)ta_hdr +
- le32_to_cpu(ta_hdr->header.ucode_array_offset_bytes);
-
- adev->psp.ta_fw_version = le32_to_cpu(ta_hdr->header.ucode_version);
-
- adev->psp.ta_dtm_ucode_version = le32_to_cpu(ta_hdr->ta_dtm_ucode_version);
- adev->psp.ta_dtm_ucode_size = le32_to_cpu(ta_hdr->ta_dtm_size_bytes);
- adev->psp.ta_dtm_start_addr = (uint8_t *)adev->psp.ta_hdcp_start_addr +
- le32_to_cpu(ta_hdr->ta_dtm_offset_bytes);
- }
+ err = psp_init_ta_microcode(psp, ucode_prefix);
+ adev->psp.securedisplay_context.context.bin_desc.size_bytes = 0;
break;
- case CHIP_SIENNA_CICHLID:
- case CHIP_NAVY_FLOUNDER:
- case CHIP_DIMGREY_CAVEFISH:
- err = psp_init_sos_microcode(psp, chip_name);
- if (err)
- return err;
- err = psp_init_ta_microcode(psp, chip_name);
+ case IP_VERSION(11, 0, 7):
+ case IP_VERSION(11, 0, 11):
+ case IP_VERSION(11, 0, 12):
+ case IP_VERSION(11, 0, 13):
+ err = psp_init_sos_microcode(psp, ucode_prefix);
if (err)
return err;
+ err = psp_init_ta_microcode(psp, ucode_prefix);
break;
- case CHIP_VANGOGH:
- err = psp_init_asd_microcode(psp, chip_name);
- if (err)
- return err;
- err = psp_init_toc_microcode(psp, chip_name);
+ case IP_VERSION(11, 5, 0):
+ case IP_VERSION(11, 5, 2):
+ err = psp_init_asd_microcode(psp, ucode_prefix);
if (err)
return err;
+ err = psp_init_toc_microcode(psp, ucode_prefix);
break;
default:
BUG();
}
- return 0;
-
-out2:
- release_firmware(adev->psp.ta_fw);
- adev->psp.ta_fw = NULL;
return err;
}
-static int psp_v11_0_wait_for_bootloader(struct psp_context *psp)
+static int psp_v11_wait_for_tos_unload(struct psp_context *psp)
{
struct amdgpu_device *adev = psp->adev;
+ uint32_t sol_reg1, sol_reg2;
+ int retry_loop;
+ /* Wait for the TOS to be unloaded */
+ for (retry_loop = 0; retry_loop < 20; retry_loop++) {
+ sol_reg1 = RREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_81);
+ usleep_range(1000, 2000);
+ sol_reg2 = RREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_81);
+ if (sol_reg1 == sol_reg2)
+ return 0;
+ }
+ dev_err(adev->dev, "TOS unload failed, C2PMSG_33: %x C2PMSG_81: %x",
+ RREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_33),
+ RREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_81));
+
+ return -ETIME;
+}
+
+static int psp_v11_0_wait_for_bootloader(struct psp_context *psp)
+{
+ struct amdgpu_device *adev = psp->adev;
int ret;
int retry_loop;
- for (retry_loop = 0; retry_loop < 10; retry_loop++) {
+ /* For a reset done at the end of S3, only wait for TOS to be unloaded */
+ if (adev->in_s3 && !(adev->flags & AMD_IS_APU) && amdgpu_in_reset(adev))
+ return psp_v11_wait_for_tos_unload(psp);
+
+ for (retry_loop = 0; retry_loop < 20; retry_loop++) {
/* Wait for bootloader to signify that is
ready having bit 31 of C2PMSG_35 set to 1 */
- ret = psp_wait_for(psp,
- SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_35),
- 0x80000000,
- 0x80000000,
- false);
+ ret = psp_wait_for(
+ psp, SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_35),
+ 0x80000000, 0x8000FFFF, PSP_WAITREG_NOVERBOSE);
if (ret == 0)
return 0;
@@ -253,13 +197,15 @@ static bool psp_v11_0_is_sos_alive(struct psp_context *psp)
return sol_reg != 0x0;
}
-static int psp_v11_0_bootloader_load_kdb(struct psp_context *psp)
+static int psp_v11_0_bootloader_load_component(struct psp_context *psp,
+ struct psp_bin_desc *bin_desc,
+ enum psp_bootloader_cmd bl_cmd)
{
int ret;
uint32_t psp_gfxdrv_command_reg = 0;
struct amdgpu_device *adev = psp->adev;
- /* Check tOS sign of life register to confirm sys driver and sOS
+ /* Check sOS sign of life register to confirm sys driver and sOS
* are already been loaded.
*/
if (psp_v11_0_is_sos_alive(psp))
@@ -269,15 +215,13 @@ static int psp_v11_0_bootloader_load_kdb(struct psp_context *psp)
if (ret)
return ret;
- memset(psp->fw_pri_buf, 0, PSP_1_MEG);
-
- /* Copy PSP KDB binary to memory */
- memcpy(psp->fw_pri_buf, psp->kdb_start_addr, psp->kdb_bin_size);
+ /* Copy PSP System Driver binary to memory */
+ psp_copy_fw(psp, bin_desc->start_addr, bin_desc->size_bytes);
- /* Provide the PSP KDB to bootloader */
+ /* Provide the sys driver to bootloader */
WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_36,
(uint32_t)(psp->fw_pri_mc_addr >> 20));
- psp_gfxdrv_command_reg = PSP_BL__LOAD_KEY_DATABASE;
+ psp_gfxdrv_command_reg = bl_cmd;
WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_35,
psp_gfxdrv_command_reg);
@@ -286,73 +230,19 @@ static int psp_v11_0_bootloader_load_kdb(struct psp_context *psp)
return ret;
}
-static int psp_v11_0_bootloader_load_spl(struct psp_context *psp)
+static int psp_v11_0_bootloader_load_kdb(struct psp_context *psp)
{
- int ret;
- uint32_t psp_gfxdrv_command_reg = 0;
- struct amdgpu_device *adev = psp->adev;
-
- /* Check tOS sign of life register to confirm sys driver and sOS
- * are already been loaded.
- */
- if (psp_v11_0_is_sos_alive(psp))
- return 0;
-
- ret = psp_v11_0_wait_for_bootloader(psp);
- if (ret)
- return ret;
-
- memset(psp->fw_pri_buf, 0, PSP_1_MEG);
-
- /* Copy PSP SPL binary to memory */
- memcpy(psp->fw_pri_buf, psp->spl_start_addr, psp->spl_bin_size);
-
- /* Provide the PSP SPL to bootloader */
- WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_36,
- (uint32_t)(psp->fw_pri_mc_addr >> 20));
- psp_gfxdrv_command_reg = PSP_BL__LOAD_TOS_SPL_TABLE;
- WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_35,
- psp_gfxdrv_command_reg);
-
- ret = psp_v11_0_wait_for_bootloader(psp);
+ return psp_v11_0_bootloader_load_component(psp, &psp->kdb, PSP_BL__LOAD_KEY_DATABASE);
+}
- return ret;
+static int psp_v11_0_bootloader_load_spl(struct psp_context *psp)
+{
+ return psp_v11_0_bootloader_load_component(psp, &psp->spl, PSP_BL__LOAD_TOS_SPL_TABLE);
}
static int psp_v11_0_bootloader_load_sysdrv(struct psp_context *psp)
{
- int ret;
- uint32_t psp_gfxdrv_command_reg = 0;
- struct amdgpu_device *adev = psp->adev;
-
- /* Check sOS sign of life register to confirm sys driver and sOS
- * are already been loaded.
- */
- if (psp_v11_0_is_sos_alive(psp))
- return 0;
-
- ret = psp_v11_0_wait_for_bootloader(psp);
- if (ret)
- return ret;
-
- memset(psp->fw_pri_buf, 0, PSP_1_MEG);
-
- /* Copy PSP System Driver binary to memory */
- memcpy(psp->fw_pri_buf, psp->sys_start_addr, psp->sys_bin_size);
-
- /* Provide the sys driver to bootloader */
- WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_36,
- (uint32_t)(psp->fw_pri_mc_addr >> 20));
- psp_gfxdrv_command_reg = PSP_BL__LOAD_SYSDRV;
- WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_35,
- psp_gfxdrv_command_reg);
-
- /* there might be handshake issue with hardware which needs delay */
- mdelay(20);
-
- ret = psp_v11_0_wait_for_bootloader(psp);
-
- return ret;
+ return psp_v11_0_bootloader_load_component(psp, &psp->sys, PSP_BL__LOAD_SYSDRV);
}
static int psp_v11_0_bootloader_load_sos(struct psp_context *psp)
@@ -371,10 +261,8 @@ static int psp_v11_0_bootloader_load_sos(struct psp_context *psp)
if (ret)
return ret;
- memset(psp->fw_pri_buf, 0, PSP_1_MEG);
-
/* Copy Secure OS binary to PSP memory */
- memcpy(psp->fw_pri_buf, psp->sos_start_addr, psp->sos_bin_size);
+ psp_copy_fw(psp, psp->sos.start_addr, psp->sos.size_bytes);
/* Provide the PSP secure OS to bootloader */
WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_36,
@@ -386,38 +274,12 @@ static int psp_v11_0_bootloader_load_sos(struct psp_context *psp)
/* there might be handshake issue with hardware which needs delay */
mdelay(20);
ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_81),
- RREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_81),
- 0, true);
+ RREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_81), 0,
+ PSP_WAITREG_CHANGED);
return ret;
}
-static int psp_v11_0_ring_init(struct psp_context *psp,
- enum psp_ring_type ring_type)
-{
- int ret = 0;
- struct psp_ring *ring;
- struct amdgpu_device *adev = psp->adev;
-
- ring = &psp->km_ring;
-
- ring->ring_type = ring_type;
-
- /* allocate 4k Page of Local Frame Buffer memory for ring */
- ring->ring_size = 0x1000;
- ret = amdgpu_bo_create_kernel(adev, ring->ring_size, PAGE_SIZE,
- AMDGPU_GEM_DOMAIN_VRAM,
- &adev->firmware.rbuf,
- &ring->ring_mem_mc_addr,
- (void **)&ring->ring_mem);
- if (ret) {
- ring->ring_size = 0;
- return ret;
- }
-
- return 0;
-}
-
static int psp_v11_0_ring_stop(struct psp_context *psp,
enum psp_ring_type ring_type)
{
@@ -437,11 +299,13 @@ static int psp_v11_0_ring_stop(struct psp_context *psp,
/* Wait for response flag (bit 31) */
if (amdgpu_sriov_vf(adev))
- ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_101),
- 0x80000000, 0x80000000, false);
+ ret = psp_wait_for(
+ psp, SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_101),
+ MBOX_TOS_RESP_FLAG, MBOX_TOS_RESP_MASK, 0);
else
- ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_64),
- 0x80000000, 0x80000000, false);
+ ret = psp_wait_for(
+ psp, SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_64),
+ MBOX_TOS_RESP_FLAG, MBOX_TOS_RESP_MASK, 0);
return ret;
}
@@ -455,6 +319,7 @@ static int psp_v11_0_ring_create(struct psp_context *psp,
struct amdgpu_device *adev = psp->adev;
if (amdgpu_sriov_vf(adev)) {
+ ring->ring_wptr = 0;
ret = psp_v11_0_ring_stop(psp, ring_type);
if (ret) {
DRM_ERROR("psp_v11_0_ring_stop_sriov failed!\n");
@@ -476,13 +341,15 @@ static int psp_v11_0_ring_create(struct psp_context *psp,
mdelay(20);
/* Wait for response flag (bit 31) in C2PMSG_101 */
- ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_101),
- 0x80000000, 0x8000FFFF, false);
+ ret = psp_wait_for(
+ psp, SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_101),
+ MBOX_TOS_RESP_FLAG, MBOX_TOS_RESP_MASK, 0);
} else {
/* Wait for sOS ready for ring creation */
- ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_64),
- 0x80000000, 0x80000000, false);
+ ret = psp_wait_for(
+ psp, SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_64),
+ MBOX_TOS_READY_FLAG, MBOX_TOS_READY_MASK, 0);
if (ret) {
DRM_ERROR("Failed to wait for sOS ready for ring creation\n");
return ret;
@@ -506,8 +373,9 @@ static int psp_v11_0_ring_create(struct psp_context *psp,
mdelay(20);
/* Wait for response flag (bit 31) in C2PMSG_64 */
- ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_64),
- 0x80000000, 0x8000FFFF, false);
+ ret = psp_wait_for(
+ psp, SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_64),
+ MBOX_TOS_RESP_FLAG, MBOX_TOS_RESP_MASK, 0);
}
return ret;
@@ -540,7 +408,8 @@ static int psp_v11_0_mode1_reset(struct psp_context *psp)
offset = SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_64);
- ret = psp_wait_for(psp, offset, 0x80000000, 0x8000FFFF, false);
+ ret = psp_wait_for(psp, offset, MBOX_TOS_READY_FLAG,
+ MBOX_TOS_READY_MASK, 0);
if (ret) {
DRM_INFO("psp is not working correctly before mode1 reset!\n");
@@ -552,17 +421,6 @@ static int psp_v11_0_mode1_reset(struct psp_context *psp)
msleep(500);
- offset = SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_33);
-
- ret = psp_wait_for(psp, offset, 0x80000000, 0x80000000, false);
-
- if (ret) {
- DRM_INFO("psp mode 1 reset failed!\n");
- return -EINVAL;
- }
-
- DRM_INFO("psp mode1 reset succeed \n");
-
return 0;
}
@@ -580,8 +438,9 @@ static int psp_v11_0_memory_training_send_msg(struct psp_context *psp, int msg)
max_wait = MEM_TRAIN_SEND_MSG_TIMEOUT_US / adev->usec_timeout;
for (i = 0; i < max_wait; i++) {
- ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_35),
- 0x80000000, 0x80000000, false);
+ ret = psp_wait_for(
+ psp, SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_35),
+ 0x80000000, 0x80000000, PSP_WAITREG_NOVERBOSE);
if (ret == 0)
break;
}
@@ -598,7 +457,7 @@ static int psp_v11_0_memory_training_send_msg(struct psp_context *psp, int msg)
}
/*
- * save and restore proces
+ * save and restore process
*/
static int psp_v11_0_memory_training(struct psp_context *psp, uint32_t ops)
{
@@ -608,7 +467,7 @@ static int psp_v11_0_memory_training(struct psp_context *psp, uint32_t ops)
uint32_t p2c_header[4];
uint32_t sz;
void *buf;
- int ret;
+ int ret, idx;
if (ctx->init == PSP_MEM_TRAIN_NOT_SUPPORT) {
DRM_DEBUG("Memory training is not supported.\n");
@@ -661,12 +520,12 @@ static int psp_v11_0_memory_training(struct psp_context *psp, uint32_t ops)
if (ops & PSP_MEM_TRAIN_SEND_LONG_MSG) {
/*
- * Long traing will encroach certain mount of bottom VRAM,
- * saving the content of this bottom VRAM to system memory
- * before training, and restoring it after training to avoid
+ * Long training will encroach a certain amount on the bottom of VRAM;
+ * save the content from the bottom of VRAM to system memory
+ * before training, and restore it after training to avoid
* VRAM corruption.
*/
- sz = GDDR6_MEM_TRAINING_ENCROACHED_SIZE;
+ sz = BIST_MEM_TRAINING_ENCROACHED_SIZE;
if (adev->gmc.visible_vram_size < sz || !adev->mman.aper_base_kaddr) {
DRM_ERROR("visible_vram_size %llx or aper_base_kaddr %p is not initialized.\n",
@@ -681,17 +540,24 @@ static int psp_v11_0_memory_training(struct psp_context *psp, uint32_t ops)
return -ENOMEM;
}
- memcpy_fromio(buf, adev->mman.aper_base_kaddr, sz);
- ret = psp_v11_0_memory_training_send_msg(psp, PSP_BL__DRAM_LONG_TRAIN);
- if (ret) {
- DRM_ERROR("Send long training msg failed.\n");
+ if (drm_dev_enter(adev_to_drm(adev), &idx)) {
+ memcpy_fromio(buf, adev->mman.aper_base_kaddr, sz);
+ ret = psp_v11_0_memory_training_send_msg(psp, PSP_BL__DRAM_LONG_TRAIN);
+ if (ret) {
+ DRM_ERROR("Send long training msg failed.\n");
+ vfree(buf);
+ drm_dev_exit(idx);
+ return ret;
+ }
+
+ memcpy_toio(adev->mman.aper_base_kaddr, buf, sz);
+ amdgpu_device_flush_hdp(adev, NULL);
vfree(buf);
- return ret;
+ drm_dev_exit(idx);
+ } else {
+ vfree(buf);
+ return -ENODEV;
}
-
- memcpy_toio(adev->mman.aper_base_kaddr, buf, sz);
- adev->hdp.funcs->flush_hdp(adev, NULL);
- vfree(buf);
}
if (ops & PSP_MEM_TRAIN_SAVE) {
@@ -720,7 +586,7 @@ static uint32_t psp_v11_0_ring_get_wptr(struct psp_context *psp)
struct amdgpu_device *adev = psp->adev;
if (amdgpu_sriov_vf(adev))
- data = RREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_102);
+ data = psp->km_ring.ring_wptr;
else
data = RREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_67);
@@ -734,48 +600,31 @@ static void psp_v11_0_ring_set_wptr(struct psp_context *psp, uint32_t value)
if (amdgpu_sriov_vf(adev)) {
WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_102, value);
WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_101, GFX_CTRL_CMD_ID_CONSUME_CMD);
+ psp->km_ring.ring_wptr = value;
} else
WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_67, value);
}
-static int psp_v11_0_load_usbc_pd_fw(struct psp_context *psp, dma_addr_t dma_addr)
+static int psp_v11_0_load_usbc_pd_fw(struct psp_context *psp, uint64_t fw_pri_mc_addr)
{
struct amdgpu_device *adev = psp->adev;
uint32_t reg_status;
int ret, i = 0;
- /* Write lower 32-bit address of the PD Controller FW */
- WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_36, lower_32_bits(dma_addr));
- ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_35),
- 0x80000000, 0x80000000, false);
- if (ret)
- return ret;
-
- /* Fireup interrupt so PSP can pick up the lower address */
- WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_35, 0x800000);
- ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_35),
- 0x80000000, 0x80000000, false);
- if (ret)
- return ret;
-
- reg_status = RREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_35);
-
- if ((reg_status & 0xFFFF) != 0) {
- DRM_ERROR("Lower address load failed - MP0_SMN_C2PMSG_35.Bits [15:0] = %02x...\n",
- reg_status & 0xFFFF);
- return -EIO;
- }
-
- /* Write upper 32-bit address of the PD Controller FW */
- WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_36, upper_32_bits(dma_addr));
+ /*
+ * LFB address which is aligned to 1MB address and has to be
+ * right-shifted by 20 so that LFB address can be passed on a 32-bit C2P
+ * register
+ */
+ WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_36, (fw_pri_mc_addr >> 20));
ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_35),
- 0x80000000, 0x80000000, false);
+ 0x80000000, 0x80000000, 0);
if (ret)
return ret;
- /* Fireup interrupt so PSP can pick up the upper address */
- WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_35, 0x4000000);
+ /* Fireup interrupt so PSP can pick up the address */
+ WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_35, (GFX_CMD_USB_PD_USE_LFB << 16));
/* FW load takes very long time */
do {
@@ -791,7 +640,7 @@ static int psp_v11_0_load_usbc_pd_fw(struct psp_context *psp, dma_addr_t dma_add
done:
if ((reg_status & 0xFFFF) != 0) {
- DRM_ERROR("Upper address load failed - MP0_SMN_C2PMSG_35.Bits [15:0] = x%04x\n",
+ DRM_ERROR("Address load failed - MP0_SMN_C2PMSG_35.Bits [15:0] = 0x%04x\n",
reg_status & 0xFFFF);
return -EIO;
}
@@ -807,7 +656,7 @@ static int psp_v11_0_read_usbc_pd_fw(struct psp_context *psp, uint32_t *fw_ver)
WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_35, C2PMSG_CMD_GFX_USB_PD_FW_VER);
ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_35),
- 0x80000000, 0x80000000, false);
+ 0x80000000, 0x80000000, 0);
if (!ret)
*fw_ver = RREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_36);
@@ -820,7 +669,6 @@ static const struct psp_funcs psp_v11_0_funcs = {
.bootloader_load_spl = psp_v11_0_bootloader_load_spl,
.bootloader_load_sysdrv = psp_v11_0_bootloader_load_sysdrv,
.bootloader_load_sos = psp_v11_0_bootloader_load_sos,
- .ring_init = psp_v11_0_ring_init,
.ring_create = psp_v11_0_ring_create,
.ring_stop = psp_v11_0_ring_stop,
.ring_destroy = psp_v11_0_ring_destroy,
@@ -829,7 +677,8 @@ static const struct psp_funcs psp_v11_0_funcs = {
.ring_get_wptr = psp_v11_0_ring_get_wptr,
.ring_set_wptr = psp_v11_0_ring_set_wptr,
.load_usbc_pd_fw = psp_v11_0_load_usbc_pd_fw,
- .read_usbc_pd_fw = psp_v11_0_read_usbc_pd_fw
+ .read_usbc_pd_fw = psp_v11_0_read_usbc_pd_fw,
+ .wait_for_bootloader = psp_v11_0_wait_for_bootloader
};
void psp_v11_0_set_psp_funcs(struct psp_context *psp)
diff --git a/drivers/gpu/drm/amd/amdgpu/psp_v11_0_8.c b/drivers/gpu/drm/amd/amdgpu/psp_v11_0_8.c
new file mode 100644
index 000000000000..93787a90d598
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/psp_v11_0_8.c
@@ -0,0 +1,186 @@
+/*
+ * Copyright 2021 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+#include "amdgpu.h"
+#include "amdgpu_psp.h"
+#include "amdgpu_ucode.h"
+#include "soc15_common.h"
+#include "psp_v11_0_8.h"
+
+#include "mp/mp_11_0_8_offset.h"
+
+static int psp_v11_0_8_ring_stop(struct psp_context *psp,
+ enum psp_ring_type ring_type)
+{
+ int ret = 0;
+ struct amdgpu_device *adev = psp->adev;
+
+ if (amdgpu_sriov_vf(adev)) {
+ /* Write the ring destroy command*/
+ WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_101,
+ GFX_CTRL_CMD_ID_DESTROY_GPCOM_RING);
+ /* there might be handshake issue with hardware which needs delay */
+ mdelay(20);
+ /* Wait for response flag (bit 31) */
+ ret = psp_wait_for(
+ psp, SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_101),
+ MBOX_TOS_RESP_FLAG, MBOX_TOS_RESP_MASK, 0);
+ } else {
+ /* Write the ring destroy command*/
+ WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_64,
+ GFX_CTRL_CMD_ID_DESTROY_RINGS);
+ /* there might be handshake issue with hardware which needs delay */
+ mdelay(20);
+ /* Wait for response flag (bit 31) */
+ ret = psp_wait_for(
+ psp, SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_64),
+ MBOX_TOS_RESP_FLAG, MBOX_TOS_RESP_MASK, 0);
+ }
+
+ return ret;
+}
+
+static int psp_v11_0_8_ring_create(struct psp_context *psp,
+ enum psp_ring_type ring_type)
+{
+ int ret = 0;
+ unsigned int psp_ring_reg = 0;
+ struct psp_ring *ring = &psp->km_ring;
+ struct amdgpu_device *adev = psp->adev;
+
+ if (amdgpu_sriov_vf(adev)) {
+ ret = psp_v11_0_8_ring_stop(psp, ring_type);
+ if (ret) {
+ DRM_ERROR("psp_v11_0_8_ring_stop_sriov failed!\n");
+ return ret;
+ }
+
+ /* Write low address of the ring to C2PMSG_102 */
+ psp_ring_reg = lower_32_bits(ring->ring_mem_mc_addr);
+ WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_102, psp_ring_reg);
+ /* Write high address of the ring to C2PMSG_103 */
+ psp_ring_reg = upper_32_bits(ring->ring_mem_mc_addr);
+ WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_103, psp_ring_reg);
+
+ /* Write the ring initialization command to C2PMSG_101 */
+ WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_101,
+ GFX_CTRL_CMD_ID_INIT_GPCOM_RING);
+
+ /* there might be handshake issue with hardware which needs delay */
+ mdelay(20);
+
+ /* Wait for response flag (bit 31) in C2PMSG_101 */
+ ret = psp_wait_for(
+ psp, SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_101),
+ MBOX_TOS_RESP_FLAG, MBOX_TOS_RESP_MASK, 0);
+
+ } else {
+ /* Wait for sOS ready for ring creation */
+ ret = psp_wait_for(
+ psp, SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_64),
+ MBOX_TOS_READY_FLAG, MBOX_TOS_READY_MASK, 0);
+ if (ret) {
+ DRM_ERROR("Failed to wait for trust OS ready for ring creation\n");
+ return ret;
+ }
+
+ /* Write low address of the ring to C2PMSG_69 */
+ psp_ring_reg = lower_32_bits(ring->ring_mem_mc_addr);
+ WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_69, psp_ring_reg);
+ /* Write high address of the ring to C2PMSG_70 */
+ psp_ring_reg = upper_32_bits(ring->ring_mem_mc_addr);
+ WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_70, psp_ring_reg);
+ /* Write size of ring to C2PMSG_71 */
+ psp_ring_reg = ring->ring_size;
+ WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_71, psp_ring_reg);
+ /* Write the ring initialization command to C2PMSG_64 */
+ psp_ring_reg = ring_type;
+ psp_ring_reg = psp_ring_reg << 16;
+ WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_64, psp_ring_reg);
+
+ /* there might be handshake issue with hardware which needs delay */
+ mdelay(20);
+
+ /* Wait for response flag (bit 31) in C2PMSG_64 */
+ ret = psp_wait_for(
+ psp, SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_64),
+ MBOX_TOS_RESP_FLAG, MBOX_TOS_RESP_MASK, 0);
+ }
+
+ return ret;
+}
+
+static int psp_v11_0_8_ring_destroy(struct psp_context *psp,
+ enum psp_ring_type ring_type)
+{
+ int ret = 0;
+ struct psp_ring *ring = &psp->km_ring;
+ struct amdgpu_device *adev = psp->adev;
+
+ ret = psp_v11_0_8_ring_stop(psp, ring_type);
+ if (ret)
+ DRM_ERROR("Fail to stop psp ring\n");
+
+ amdgpu_bo_free_kernel(&adev->firmware.rbuf,
+ &ring->ring_mem_mc_addr,
+ (void **)&ring->ring_mem);
+
+ return ret;
+}
+
+static uint32_t psp_v11_0_8_ring_get_wptr(struct psp_context *psp)
+{
+ uint32_t data;
+ struct amdgpu_device *adev = psp->adev;
+
+ if (amdgpu_sriov_vf(adev))
+ data = RREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_102);
+ else
+ data = RREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_67);
+
+ return data;
+}
+
+static void psp_v11_0_8_ring_set_wptr(struct psp_context *psp, uint32_t value)
+{
+ struct amdgpu_device *adev = psp->adev;
+
+ if (amdgpu_sriov_vf(adev)) {
+ WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_102, value);
+ WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_101,
+ GFX_CTRL_CMD_ID_CONSUME_CMD);
+ } else
+ WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_67, value);
+}
+
+static const struct psp_funcs psp_v11_0_8_funcs = {
+ .ring_create = psp_v11_0_8_ring_create,
+ .ring_stop = psp_v11_0_8_ring_stop,
+ .ring_destroy = psp_v11_0_8_ring_destroy,
+ .ring_get_wptr = psp_v11_0_8_ring_get_wptr,
+ .ring_set_wptr = psp_v11_0_8_ring_set_wptr,
+};
+
+void psp_v11_0_8_set_psp_funcs(struct psp_context *psp)
+{
+ psp->funcs = &psp_v11_0_8_funcs;
+}
diff --git a/drivers/gpu/drm/amd/amdgpu/psp_v11_0_8.h b/drivers/gpu/drm/amd/amdgpu/psp_v11_0_8.h
new file mode 100644
index 000000000000..890377a5afe0
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/psp_v11_0_8.h
@@ -0,0 +1,30 @@
+/*
+ * Copyright 2021 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+#ifndef __PSP_V11_0_8_H__
+#define __PSP_V11_0_8_H__
+
+#include "amdgpu_psp.h"
+
+void psp_v11_0_8_set_psp_funcs(struct psp_context *psp);
+
+#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/psp_v12_0.c b/drivers/gpu/drm/amd/amdgpu/psp_v12_0.c
index c4828bd3264b..4c6450d62299 100644
--- a/drivers/gpu/drm/amd/amdgpu/psp_v12_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/psp_v12_0.c
@@ -34,9 +34,6 @@
#include "sdma0/sdma0_4_0_offset.h"
#include "nbio/nbio_7_4_offset.h"
-#include "oss/osssys_4_0_offset.h"
-#include "oss/osssys_4_0_sh_mask.h"
-
MODULE_FIRMWARE("amdgpu/renoir_asd.bin");
MODULE_FIRMWARE("amdgpu/renoir_ta.bin");
MODULE_FIRMWARE("amdgpu/green_sardine_asd.bin");
@@ -48,74 +45,25 @@ MODULE_FIRMWARE("amdgpu/green_sardine_ta.bin");
static int psp_v12_0_init_microcode(struct psp_context *psp)
{
struct amdgpu_device *adev = psp->adev;
- const char *chip_name;
- char fw_name[30];
+ char ucode_prefix[30];
int err = 0;
- const struct ta_firmware_header_v1_0 *ta_hdr;
DRM_DEBUG("\n");
- switch (adev->asic_type) {
- case CHIP_RENOIR:
- if (adev->apu_flags & AMD_APU_IS_RENOIR)
- chip_name = "renoir";
- else
- chip_name = "green_sardine";
- break;
- default:
- BUG();
- }
+ amdgpu_ucode_ip_version_decode(adev, MP0_HWIP, ucode_prefix, sizeof(ucode_prefix));
- err = psp_init_asd_microcode(psp, chip_name);
+ err = psp_init_asd_microcode(psp, ucode_prefix);
if (err)
- goto out;
-
- snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_ta.bin", chip_name);
- err = request_firmware(&adev->psp.ta_fw, fw_name, adev->dev);
- if (err) {
- release_firmware(adev->psp.ta_fw);
- adev->psp.ta_fw = NULL;
- dev_info(adev->dev,
- "psp v12.0: Failed to load firmware \"%s\"\n",
- fw_name);
- } else {
- err = amdgpu_ucode_validate(adev->psp.ta_fw);
- if (err)
- goto out2;
-
- ta_hdr = (const struct ta_firmware_header_v1_0 *)
- adev->psp.ta_fw->data;
- adev->psp.ta_hdcp_ucode_version =
- le32_to_cpu(ta_hdr->ta_hdcp_ucode_version);
- adev->psp.ta_hdcp_ucode_size =
- le32_to_cpu(ta_hdr->ta_hdcp_size_bytes);
- adev->psp.ta_hdcp_start_addr =
- (uint8_t *)ta_hdr +
- le32_to_cpu(ta_hdr->header.ucode_array_offset_bytes);
-
- adev->psp.ta_fw_version = le32_to_cpu(ta_hdr->header.ucode_version);
-
- adev->psp.ta_dtm_ucode_version =
- le32_to_cpu(ta_hdr->ta_dtm_ucode_version);
- adev->psp.ta_dtm_ucode_size =
- le32_to_cpu(ta_hdr->ta_dtm_size_bytes);
- adev->psp.ta_dtm_start_addr =
- (uint8_t *)adev->psp.ta_hdcp_start_addr +
- le32_to_cpu(ta_hdr->ta_dtm_offset_bytes);
- }
+ return err;
- return 0;
+ err = psp_init_ta_microcode(psp, ucode_prefix);
+ if (err)
+ return err;
-out2:
- release_firmware(adev->psp.ta_fw);
- adev->psp.ta_fw = NULL;
-out:
- if (err) {
- dev_err(adev->dev,
- "psp v12.0: Failed to load firmware \"%s\"\n",
- fw_name);
- }
+ /* only supported on renoir */
+ if (!(adev->apu_flags & AMD_APU_IS_RENOIR))
+ adev->psp.securedisplay_context.context.bin_desc.size_bytes = 0;
- return err;
+ return 0;
}
static int psp_v12_0_bootloader_load_sysdrv(struct psp_context *psp)
@@ -134,14 +82,12 @@ static int psp_v12_0_bootloader_load_sysdrv(struct psp_context *psp)
/* Wait for bootloader to signify that is ready having bit 31 of C2PMSG_35 set to 1 */
ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_35),
- 0x80000000, 0x80000000, false);
+ 0x80000000, 0x80000000, 0);
if (ret)
return ret;
- memset(psp->fw_pri_buf, 0, PSP_1_MEG);
-
/* Copy PSP System Driver binary to memory */
- memcpy(psp->fw_pri_buf, psp->sys_start_addr, psp->sys_bin_size);
+ psp_copy_fw(psp, psp->sys.start_addr, psp->sys.size_bytes);
/* Provide the sys driver to bootloader */
WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_36,
@@ -150,11 +96,8 @@ static int psp_v12_0_bootloader_load_sysdrv(struct psp_context *psp)
WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_35,
psp_gfxdrv_command_reg);
- /* there might be handshake issue with hardware which needs delay */
- mdelay(20);
-
ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_35),
- 0x80000000, 0x80000000, false);
+ 0x80000000, 0x80000000, 0);
return ret;
}
@@ -175,14 +118,12 @@ static int psp_v12_0_bootloader_load_sos(struct psp_context *psp)
/* Wait for bootloader to signify that is ready having bit 31 of C2PMSG_35 set to 1 */
ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_35),
- 0x80000000, 0x80000000, false);
+ 0x80000000, 0x80000000, 0);
if (ret)
return ret;
- memset(psp->fw_pri_buf, 0, PSP_1_MEG);
-
/* Copy Secure OS binary to PSP memory */
- memcpy(psp->fw_pri_buf, psp->sos_start_addr, psp->sos_bin_size);
+ psp_copy_fw(psp, psp->sos.start_addr, psp->sos.size_bytes);
/* Provide the PSP secure OS to bootloader */
WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_36,
@@ -191,74 +132,13 @@ static int psp_v12_0_bootloader_load_sos(struct psp_context *psp)
WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_35,
psp_gfxdrv_command_reg);
- /* there might be handshake issue with hardware which needs delay */
- mdelay(20);
ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_81),
- RREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_81),
- 0, true);
+ RREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_81), 0,
+ PSP_WAITREG_CHANGED);
return ret;
}
-static void psp_v12_0_reroute_ih(struct psp_context *psp)
-{
- struct amdgpu_device *adev = psp->adev;
- uint32_t tmp;
-
- /* Change IH ring for VMC */
- tmp = REG_SET_FIELD(0, IH_CLIENT_CFG_DATA, CREDIT_RETURN_ADDR, 0x1244b);
- tmp = REG_SET_FIELD(tmp, IH_CLIENT_CFG_DATA, CLIENT_TYPE, 1);
- tmp = REG_SET_FIELD(tmp, IH_CLIENT_CFG_DATA, RING_ID, 1);
-
- WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_69, 3);
- WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_70, tmp);
- WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_64, GFX_CTRL_CMD_ID_GBR_IH_SET);
-
- mdelay(20);
- psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_64),
- 0x80000000, 0x8000FFFF, false);
-
- /* Change IH ring for UMC */
- tmp = REG_SET_FIELD(0, IH_CLIENT_CFG_DATA, CREDIT_RETURN_ADDR, 0x1216b);
- tmp = REG_SET_FIELD(tmp, IH_CLIENT_CFG_DATA, RING_ID, 1);
-
- WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_69, 4);
- WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_70, tmp);
- WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_64, GFX_CTRL_CMD_ID_GBR_IH_SET);
-
- mdelay(20);
- psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_64),
- 0x80000000, 0x8000FFFF, false);
-}
-
-static int psp_v12_0_ring_init(struct psp_context *psp,
- enum psp_ring_type ring_type)
-{
- int ret = 0;
- struct psp_ring *ring;
- struct amdgpu_device *adev = psp->adev;
-
- psp_v12_0_reroute_ih(psp);
-
- ring = &psp->km_ring;
-
- ring->ring_type = ring_type;
-
- /* allocate 4k Page of Local Frame Buffer memory for ring */
- ring->ring_size = 0x1000;
- ret = amdgpu_bo_create_kernel(adev, ring->ring_size, PAGE_SIZE,
- AMDGPU_GEM_DOMAIN_VRAM,
- &adev->firmware.rbuf,
- &ring->ring_mem_mc_addr,
- (void **)&ring->ring_mem);
- if (ret) {
- ring->ring_size = 0;
- return ret;
- }
-
- return 0;
-}
-
static int psp_v12_0_ring_create(struct psp_context *psp,
enum psp_ring_type ring_type)
{
@@ -267,47 +147,23 @@ static int psp_v12_0_ring_create(struct psp_context *psp,
struct psp_ring *ring = &psp->km_ring;
struct amdgpu_device *adev = psp->adev;
- if (amdgpu_sriov_vf(psp->adev)) {
- /* Write low address of the ring to C2PMSG_102 */
- psp_ring_reg = lower_32_bits(ring->ring_mem_mc_addr);
- WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_102, psp_ring_reg);
- /* Write high address of the ring to C2PMSG_103 */
- psp_ring_reg = upper_32_bits(ring->ring_mem_mc_addr);
- WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_103, psp_ring_reg);
-
- /* Write the ring initialization command to C2PMSG_101 */
- WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_101,
- GFX_CTRL_CMD_ID_INIT_GPCOM_RING);
-
- /* there might be handshake issue with hardware which needs delay */
- mdelay(20);
-
- /* Wait for response flag (bit 31) in C2PMSG_101 */
- ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_101),
- 0x80000000, 0x8000FFFF, false);
-
- } else {
- /* Write low address of the ring to C2PMSG_69 */
- psp_ring_reg = lower_32_bits(ring->ring_mem_mc_addr);
- WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_69, psp_ring_reg);
- /* Write high address of the ring to C2PMSG_70 */
- psp_ring_reg = upper_32_bits(ring->ring_mem_mc_addr);
- WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_70, psp_ring_reg);
- /* Write size of ring to C2PMSG_71 */
- psp_ring_reg = ring->ring_size;
- WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_71, psp_ring_reg);
- /* Write the ring initialization command to C2PMSG_64 */
- psp_ring_reg = ring_type;
- psp_ring_reg = psp_ring_reg << 16;
- WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_64, psp_ring_reg);
-
- /* there might be handshake issue with hardware which needs delay */
- mdelay(20);
-
- /* Wait for response flag (bit 31) in C2PMSG_64 */
- ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_64),
- 0x80000000, 0x8000FFFF, false);
- }
+ /* Write low address of the ring to C2PMSG_69 */
+ psp_ring_reg = lower_32_bits(ring->ring_mem_mc_addr);
+ WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_69, psp_ring_reg);
+ /* Write high address of the ring to C2PMSG_70 */
+ psp_ring_reg = upper_32_bits(ring->ring_mem_mc_addr);
+ WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_70, psp_ring_reg);
+ /* Write size of ring to C2PMSG_71 */
+ psp_ring_reg = ring->ring_size;
+ WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_71, psp_ring_reg);
+ /* Write the ring initialization command to C2PMSG_64 */
+ psp_ring_reg = ring_type;
+ psp_ring_reg = psp_ring_reg << 16;
+ WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_64, psp_ring_reg);
+
+ /* Wait for response flag (bit 31) in C2PMSG_64 */
+ ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_64),
+ MBOX_TOS_RESP_FLAG, MBOX_TOS_RESP_MASK, 0);
return ret;
}
@@ -326,16 +182,15 @@ static int psp_v12_0_ring_stop(struct psp_context *psp,
WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_64,
GFX_CTRL_CMD_ID_DESTROY_RINGS);
- /* there might be handshake issue with hardware which needs delay */
- mdelay(20);
-
/* Wait for response flag (bit 31) */
if (amdgpu_sriov_vf(adev))
- ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_101),
- 0x80000000, 0x80000000, false);
+ ret = psp_wait_for(
+ psp, SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_101),
+ MBOX_TOS_RESP_FLAG, MBOX_TOS_RESP_MASK, 0);
else
- ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_64),
- 0x80000000, 0x80000000, false);
+ ret = psp_wait_for(
+ psp, SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_64),
+ MBOX_TOS_RESP_FLAG, MBOX_TOS_RESP_MASK, 0);
return ret;
}
@@ -366,7 +221,8 @@ static int psp_v12_0_mode1_reset(struct psp_context *psp)
offset = SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_64);
- ret = psp_wait_for(psp, offset, 0x80000000, 0x8000FFFF, false);
+ ret = psp_wait_for(psp, offset, MBOX_TOS_READY_FLAG,
+ MBOX_TOS_READY_MASK, 0);
if (ret) {
DRM_INFO("psp is not working correctly before mode1 reset!\n");
@@ -380,7 +236,8 @@ static int psp_v12_0_mode1_reset(struct psp_context *psp)
offset = SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_33);
- ret = psp_wait_for(psp, offset, 0x80000000, 0x80000000, false);
+ ret = psp_wait_for(psp, offset, MBOX_TOS_RESP_FLAG, MBOX_TOS_RESP_MASK,
+ 0);
if (ret) {
DRM_INFO("psp mode 1 reset failed!\n");
@@ -420,7 +277,6 @@ static const struct psp_funcs psp_v12_0_funcs = {
.init_microcode = psp_v12_0_init_microcode,
.bootloader_load_sysdrv = psp_v12_0_bootloader_load_sysdrv,
.bootloader_load_sos = psp_v12_0_bootloader_load_sos,
- .ring_init = psp_v12_0_ring_init,
.ring_create = psp_v12_0_ring_create,
.ring_stop = psp_v12_0_ring_stop,
.ring_destroy = psp_v12_0_ring_destroy,
diff --git a/drivers/gpu/drm/amd/amdgpu/psp_v13_0.c b/drivers/gpu/drm/amd/amdgpu/psp_v13_0.c
new file mode 100644
index 000000000000..af4a7d7c4abd
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/psp_v13_0.c
@@ -0,0 +1,977 @@
+/*
+ * Copyright 2020 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+#include <drm/drm_drv.h>
+#include <linux/vmalloc.h>
+#include "amdgpu.h"
+#include "amdgpu_psp.h"
+#include "amdgpu_ucode.h"
+#include "soc15_common.h"
+#include "psp_v13_0.h"
+#include "amdgpu_ras.h"
+
+#include "mp/mp_13_0_2_offset.h"
+#include "mp/mp_13_0_2_sh_mask.h"
+
+MODULE_FIRMWARE("amdgpu/aldebaran_sos.bin");
+MODULE_FIRMWARE("amdgpu/aldebaran_ta.bin");
+MODULE_FIRMWARE("amdgpu/aldebaran_cap.bin");
+MODULE_FIRMWARE("amdgpu/yellow_carp_toc.bin");
+MODULE_FIRMWARE("amdgpu/yellow_carp_ta.bin");
+MODULE_FIRMWARE("amdgpu/psp_13_0_5_toc.bin");
+MODULE_FIRMWARE("amdgpu/psp_13_0_5_ta.bin");
+MODULE_FIRMWARE("amdgpu/psp_13_0_8_toc.bin");
+MODULE_FIRMWARE("amdgpu/psp_13_0_8_ta.bin");
+MODULE_FIRMWARE("amdgpu/psp_13_0_0_sos.bin");
+MODULE_FIRMWARE("amdgpu/psp_13_0_0_sos_kicker.bin");
+MODULE_FIRMWARE("amdgpu/psp_13_0_0_ta.bin");
+MODULE_FIRMWARE("amdgpu/psp_13_0_0_ta_kicker.bin");
+MODULE_FIRMWARE("amdgpu/psp_13_0_7_sos.bin");
+MODULE_FIRMWARE("amdgpu/psp_13_0_7_ta.bin");
+MODULE_FIRMWARE("amdgpu/psp_13_0_10_sos.bin");
+MODULE_FIRMWARE("amdgpu/psp_13_0_10_ta.bin");
+MODULE_FIRMWARE("amdgpu/psp_13_0_11_toc.bin");
+MODULE_FIRMWARE("amdgpu/psp_13_0_11_ta.bin");
+MODULE_FIRMWARE("amdgpu/psp_13_0_6_sos.bin");
+MODULE_FIRMWARE("amdgpu/psp_13_0_6_ta.bin");
+MODULE_FIRMWARE("amdgpu/psp_13_0_12_sos.bin");
+MODULE_FIRMWARE("amdgpu/psp_13_0_12_ta.bin");
+MODULE_FIRMWARE("amdgpu/psp_13_0_14_sos.bin");
+MODULE_FIRMWARE("amdgpu/psp_13_0_14_ta.bin");
+MODULE_FIRMWARE("amdgpu/psp_14_0_0_toc.bin");
+MODULE_FIRMWARE("amdgpu/psp_14_0_0_ta.bin");
+MODULE_FIRMWARE("amdgpu/psp_14_0_1_toc.bin");
+MODULE_FIRMWARE("amdgpu/psp_14_0_1_ta.bin");
+MODULE_FIRMWARE("amdgpu/psp_14_0_4_toc.bin");
+MODULE_FIRMWARE("amdgpu/psp_14_0_4_ta.bin");
+
+/* For large FW files the time to complete can be very long */
+#define USBC_PD_POLLING_LIMIT_S 240
+
+/* Read USB-PD from LFB */
+#define GFX_CMD_USB_PD_USE_LFB 0x480
+
+/* Retry times for vmbx ready wait */
+#define PSP_VMBX_POLLING_LIMIT 3000
+
+/* memory training timeout define */
+#define MEM_TRAIN_SEND_MSG_TIMEOUT_US 3000000
+
+#define regMP1_PUB_SCRATCH0 0x3b10090
+
+#define PSP13_BL_STATUS_SIZE 100
+
+static int psp_v13_0_init_microcode(struct psp_context *psp)
+{
+ struct amdgpu_device *adev = psp->adev;
+ char ucode_prefix[30];
+ int err = 0;
+
+ amdgpu_ucode_ip_version_decode(adev, MP0_HWIP, ucode_prefix, sizeof(ucode_prefix));
+
+ switch (amdgpu_ip_version(adev, MP0_HWIP, 0)) {
+ case IP_VERSION(13, 0, 2):
+ err = psp_init_sos_microcode(psp, ucode_prefix);
+ if (err)
+ return err;
+ /* It's not necessary to load ras ta on Guest side */
+ if (!amdgpu_sriov_vf(adev)) {
+ err = psp_init_ta_microcode(psp, ucode_prefix);
+ if (err)
+ return err;
+ }
+ break;
+ case IP_VERSION(13, 0, 1):
+ case IP_VERSION(13, 0, 3):
+ case IP_VERSION(13, 0, 5):
+ case IP_VERSION(13, 0, 8):
+ case IP_VERSION(13, 0, 11):
+ case IP_VERSION(14, 0, 0):
+ case IP_VERSION(14, 0, 1):
+ case IP_VERSION(14, 0, 4):
+ err = psp_init_toc_microcode(psp, ucode_prefix);
+ if (err)
+ return err;
+ err = psp_init_ta_microcode(psp, ucode_prefix);
+ if (err)
+ return err;
+ break;
+ case IP_VERSION(13, 0, 0):
+ case IP_VERSION(13, 0, 6):
+ case IP_VERSION(13, 0, 7):
+ case IP_VERSION(13, 0, 10):
+ case IP_VERSION(13, 0, 12):
+ case IP_VERSION(13, 0, 14):
+ err = psp_init_sos_microcode(psp, ucode_prefix);
+ if (err)
+ return err;
+ /* It's not necessary to load ras ta on Guest side */
+ err = psp_init_ta_microcode(psp, ucode_prefix);
+ if (err)
+ return err;
+ break;
+ default:
+ BUG();
+ }
+
+ return 0;
+}
+
+static bool psp_v13_0_is_sos_alive(struct psp_context *psp)
+{
+ struct amdgpu_device *adev = psp->adev;
+ uint32_t sol_reg;
+
+ sol_reg = RREG32_SOC15(MP0, 0, regMP0_SMN_C2PMSG_81);
+
+ return sol_reg != 0x0;
+}
+
+static void psp_v13_0_bootloader_print_status(struct psp_context *psp,
+ const char *msg)
+{
+ struct amdgpu_device *adev = psp->adev;
+ u32 bl_status_reg;
+ char bl_status_msg[PSP13_BL_STATUS_SIZE];
+ int i, at;
+
+ if (amdgpu_ip_version(adev, MP0_HWIP, 0) == IP_VERSION(13, 0, 6) ||
+ amdgpu_ip_version(adev, MP0_HWIP, 0) == IP_VERSION(13, 0, 12) ||
+ amdgpu_ip_version(adev, MP0_HWIP, 0) == IP_VERSION(13, 0, 14)) {
+ at = 0;
+ for_each_inst(i, adev->aid_mask) {
+ bl_status_reg =
+ (SOC15_REG_OFFSET(MP0, 0, regMP0_SMN_C2PMSG_92)
+ << 2) +
+ adev->asic_funcs->encode_ext_smn_addressing(i);
+ at += snprintf(bl_status_msg + at,
+ PSP13_BL_STATUS_SIZE - at,
+ " status(%02i): 0x%08x", i,
+ RREG32_PCIE_EXT(bl_status_reg));
+ }
+ dev_info(adev->dev, "%s - %s", msg, bl_status_msg);
+ }
+}
+
+static int psp_v13_0_wait_for_vmbx_ready(struct psp_context *psp)
+{
+ struct amdgpu_device *adev = psp->adev;
+ int retry_loop, ret;
+
+ for (retry_loop = 0; retry_loop < PSP_VMBX_POLLING_LIMIT; retry_loop++) {
+ /* Wait for bootloader to signify that is
+ ready having bit 31 of C2PMSG_33 set to 1 */
+ ret = psp_wait_for(
+ psp, SOC15_REG_OFFSET(MP0, 0, regMP0_SMN_C2PMSG_33),
+ 0x80000000, 0xffffffff, PSP_WAITREG_NOVERBOSE);
+
+ if (ret == 0)
+ break;
+ }
+
+ if (ret)
+ dev_warn(adev->dev, "Bootloader wait timed out");
+
+ return ret;
+}
+
+static int psp_v13_0_wait_for_bootloader(struct psp_context *psp)
+{
+ struct amdgpu_device *adev = psp->adev;
+ int retry_loop, retry_cnt, ret;
+
+ retry_cnt =
+ ((amdgpu_ip_version(adev, MP0_HWIP, 0) == IP_VERSION(13, 0, 6) ||
+ amdgpu_ip_version(adev, MP0_HWIP, 0) == IP_VERSION(13, 0, 12) ||
+ amdgpu_ip_version(adev, MP0_HWIP, 0) == IP_VERSION(13, 0, 14))) ?
+ PSP_VMBX_POLLING_LIMIT :
+ 10;
+ /* Wait for bootloader to signify that it is ready having bit 31 of
+ * C2PMSG_35 set to 1. All other bits are expected to be cleared.
+ * If there is an error in processing command, bits[7:0] will be set.
+ * This is applicable for PSP v13.0.6 and newer.
+ */
+ for (retry_loop = 0; retry_loop < retry_cnt; retry_loop++) {
+ ret = psp_wait_for(
+ psp, SOC15_REG_OFFSET(MP0, 0, regMP0_SMN_C2PMSG_35),
+ 0x80000000, 0xffffffff, PSP_WAITREG_NOVERBOSE);
+
+ if (ret == 0)
+ return 0;
+ if (retry_loop && !(retry_loop % 10))
+ psp_v13_0_bootloader_print_status(
+ psp, "Waiting for bootloader completion");
+ }
+
+ return ret;
+}
+
+static int psp_v13_0_wait_for_bootloader_steady_state(struct psp_context *psp)
+{
+ struct amdgpu_device *adev = psp->adev;
+ int ret;
+
+ if (amdgpu_ip_version(adev, MP0_HWIP, 0) == IP_VERSION(13, 0, 6) ||
+ amdgpu_ip_version(adev, MP0_HWIP, 0) == IP_VERSION(13, 0, 12) ||
+ amdgpu_ip_version(adev, MP0_HWIP, 0) == IP_VERSION(13, 0, 14)) {
+ ret = psp_v13_0_wait_for_vmbx_ready(psp);
+ if (ret)
+ amdgpu_ras_query_boot_status(adev, 4);
+
+ ret = psp_v13_0_wait_for_bootloader(psp);
+ if (ret)
+ amdgpu_ras_query_boot_status(adev, 4);
+
+ return ret;
+ }
+
+ return 0;
+}
+
+static int psp_v13_0_bootloader_load_component(struct psp_context *psp,
+ struct psp_bin_desc *bin_desc,
+ enum psp_bootloader_cmd bl_cmd)
+{
+ int ret;
+ uint32_t psp_gfxdrv_command_reg = 0;
+ struct amdgpu_device *adev = psp->adev;
+
+ /* Check tOS sign of life register to confirm sys driver and sOS
+ * are already been loaded.
+ */
+ if (psp_v13_0_is_sos_alive(psp))
+ return 0;
+
+ ret = psp_v13_0_wait_for_bootloader(psp);
+ if (ret)
+ return ret;
+
+ memset(psp->fw_pri_buf, 0, PSP_1_MEG);
+
+ /* Copy PSP KDB binary to memory */
+ memcpy(psp->fw_pri_buf, bin_desc->start_addr, bin_desc->size_bytes);
+
+ /* Provide the PSP KDB to bootloader */
+ WREG32_SOC15(MP0, 0, regMP0_SMN_C2PMSG_36,
+ (uint32_t)(psp->fw_pri_mc_addr >> 20));
+ psp_gfxdrv_command_reg = bl_cmd;
+ WREG32_SOC15(MP0, 0, regMP0_SMN_C2PMSG_35,
+ psp_gfxdrv_command_reg);
+
+ ret = psp_v13_0_wait_for_bootloader(psp);
+
+ return ret;
+}
+
+static int psp_v13_0_bootloader_load_kdb(struct psp_context *psp)
+{
+ return psp_v13_0_bootloader_load_component(psp, &psp->kdb, PSP_BL__LOAD_KEY_DATABASE);
+}
+
+static int psp_v13_0_bootloader_load_spl(struct psp_context *psp)
+{
+ return psp_v13_0_bootloader_load_component(psp, &psp->kdb, PSP_BL__LOAD_TOS_SPL_TABLE);
+}
+
+static int psp_v13_0_bootloader_load_sysdrv(struct psp_context *psp)
+{
+ return psp_v13_0_bootloader_load_component(psp, &psp->sys, PSP_BL__LOAD_SYSDRV);
+}
+
+static int psp_v13_0_bootloader_load_soc_drv(struct psp_context *psp)
+{
+ return psp_v13_0_bootloader_load_component(psp, &psp->soc_drv, PSP_BL__LOAD_SOCDRV);
+}
+
+static int psp_v13_0_bootloader_load_intf_drv(struct psp_context *psp)
+{
+ return psp_v13_0_bootloader_load_component(psp, &psp->intf_drv, PSP_BL__LOAD_INTFDRV);
+}
+
+static int psp_v13_0_bootloader_load_dbg_drv(struct psp_context *psp)
+{
+ return psp_v13_0_bootloader_load_component(psp, &psp->dbg_drv, PSP_BL__LOAD_DBGDRV);
+}
+
+static int psp_v13_0_bootloader_load_ras_drv(struct psp_context *psp)
+{
+ return psp_v13_0_bootloader_load_component(psp, &psp->ras_drv, PSP_BL__LOAD_RASDRV);
+}
+
+static int psp_v13_0_bootloader_load_spdm_drv(struct psp_context *psp)
+{
+ return psp_v13_0_bootloader_load_component(psp, &psp->spdm_drv, PSP_BL__LOAD_SPDMDRV);
+}
+
+static inline void psp_v13_0_init_sos_version(struct psp_context *psp)
+{
+ struct amdgpu_device *adev = psp->adev;
+
+ psp->sos.fw_version = RREG32_SOC15(MP0, 0, regMP0_SMN_C2PMSG_58);
+}
+
+static int psp_v13_0_bootloader_load_sos(struct psp_context *psp)
+{
+ int ret;
+ unsigned int psp_gfxdrv_command_reg = 0;
+ struct amdgpu_device *adev = psp->adev;
+
+ /* Check sOS sign of life register to confirm sys driver and sOS
+ * are already been loaded.
+ */
+ if (psp_v13_0_is_sos_alive(psp)) {
+ psp_v13_0_init_sos_version(psp);
+ return 0;
+ }
+
+ ret = psp_v13_0_wait_for_bootloader(psp);
+ if (ret)
+ return ret;
+
+ memset(psp->fw_pri_buf, 0, PSP_1_MEG);
+
+ /* Copy Secure OS binary to PSP memory */
+ memcpy(psp->fw_pri_buf, psp->sos.start_addr, psp->sos.size_bytes);
+
+ /* Provide the PSP secure OS to bootloader */
+ WREG32_SOC15(MP0, 0, regMP0_SMN_C2PMSG_36,
+ (uint32_t)(psp->fw_pri_mc_addr >> 20));
+ psp_gfxdrv_command_reg = PSP_BL__LOAD_SOSDRV;
+ WREG32_SOC15(MP0, 0, regMP0_SMN_C2PMSG_35,
+ psp_gfxdrv_command_reg);
+
+ /* there might be handshake issue with hardware which needs delay */
+ mdelay(20);
+ ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, regMP0_SMN_C2PMSG_81),
+ RREG32_SOC15(MP0, 0, regMP0_SMN_C2PMSG_81), 0,
+ PSP_WAITREG_CHANGED);
+
+ if (!ret)
+ psp_v13_0_init_sos_version(psp);
+
+ return ret;
+}
+
+static int psp_v13_0_ring_stop(struct psp_context *psp,
+ enum psp_ring_type ring_type)
+{
+ int ret = 0;
+ struct amdgpu_device *adev = psp->adev;
+
+ if (amdgpu_sriov_vf(adev)) {
+ /* Write the ring destroy command*/
+ WREG32_SOC15(MP0, 0, regMP0_SMN_C2PMSG_101,
+ GFX_CTRL_CMD_ID_DESTROY_GPCOM_RING);
+ /* there might be handshake issue with hardware which needs delay */
+ mdelay(20);
+ /* Wait for response flag (bit 31) */
+ ret = psp_wait_for(
+ psp, SOC15_REG_OFFSET(MP0, 0, regMP0_SMN_C2PMSG_101),
+ MBOX_TOS_RESP_FLAG, MBOX_TOS_RESP_MASK, 0);
+ } else {
+ /* Write the ring destroy command*/
+ WREG32_SOC15(MP0, 0, regMP0_SMN_C2PMSG_64,
+ GFX_CTRL_CMD_ID_DESTROY_RINGS);
+ /* there might be handshake issue with hardware which needs delay */
+ mdelay(20);
+ /* Wait for response flag (bit 31) */
+ ret = psp_wait_for(
+ psp, SOC15_REG_OFFSET(MP0, 0, regMP0_SMN_C2PMSG_64),
+ MBOX_TOS_RESP_FLAG, MBOX_TOS_RESP_MASK, 0);
+ }
+
+ return ret;
+}
+
+static int psp_v13_0_ring_create(struct psp_context *psp,
+ enum psp_ring_type ring_type)
+{
+ int ret = 0;
+ unsigned int psp_ring_reg = 0;
+ struct psp_ring *ring = &psp->km_ring;
+ struct amdgpu_device *adev = psp->adev;
+
+ if (amdgpu_sriov_vf(adev)) {
+ ret = psp_v13_0_ring_stop(psp, ring_type);
+ if (ret) {
+ DRM_ERROR("psp_v13_0_ring_stop_sriov failed!\n");
+ return ret;
+ }
+
+ /* Write low address of the ring to C2PMSG_102 */
+ psp_ring_reg = lower_32_bits(ring->ring_mem_mc_addr);
+ WREG32_SOC15(MP0, 0, regMP0_SMN_C2PMSG_102, psp_ring_reg);
+ /* Write high address of the ring to C2PMSG_103 */
+ psp_ring_reg = upper_32_bits(ring->ring_mem_mc_addr);
+ WREG32_SOC15(MP0, 0, regMP0_SMN_C2PMSG_103, psp_ring_reg);
+
+ /* Write the ring initialization command to C2PMSG_101 */
+ WREG32_SOC15(MP0, 0, regMP0_SMN_C2PMSG_101,
+ GFX_CTRL_CMD_ID_INIT_GPCOM_RING);
+
+ /* there might be handshake issue with hardware which needs delay */
+ mdelay(20);
+
+ /* Wait for response flag (bit 31) in C2PMSG_101 */
+ ret = psp_wait_for(
+ psp, SOC15_REG_OFFSET(MP0, 0, regMP0_SMN_C2PMSG_101),
+ MBOX_TOS_RESP_FLAG, MBOX_TOS_RESP_MASK, 0);
+
+ } else {
+ /* Wait for sOS ready for ring creation */
+ ret = psp_wait_for(
+ psp, SOC15_REG_OFFSET(MP0, 0, regMP0_SMN_C2PMSG_64),
+ MBOX_TOS_READY_FLAG, MBOX_TOS_READY_MASK, 0);
+ if (ret) {
+ DRM_ERROR("Failed to wait for trust OS ready for ring creation\n");
+ return ret;
+ }
+
+ /* Write low address of the ring to C2PMSG_69 */
+ psp_ring_reg = lower_32_bits(ring->ring_mem_mc_addr);
+ WREG32_SOC15(MP0, 0, regMP0_SMN_C2PMSG_69, psp_ring_reg);
+ /* Write high address of the ring to C2PMSG_70 */
+ psp_ring_reg = upper_32_bits(ring->ring_mem_mc_addr);
+ WREG32_SOC15(MP0, 0, regMP0_SMN_C2PMSG_70, psp_ring_reg);
+ /* Write size of ring to C2PMSG_71 */
+ psp_ring_reg = ring->ring_size;
+ WREG32_SOC15(MP0, 0, regMP0_SMN_C2PMSG_71, psp_ring_reg);
+ /* Write the ring initialization command to C2PMSG_64 */
+ psp_ring_reg = ring_type;
+ psp_ring_reg = psp_ring_reg << 16;
+ WREG32_SOC15(MP0, 0, regMP0_SMN_C2PMSG_64, psp_ring_reg);
+
+ /* there might be handshake issue with hardware which needs delay */
+ mdelay(20);
+
+ /* Wait for response flag (bit 31) in C2PMSG_64 */
+ ret = psp_wait_for(
+ psp, SOC15_REG_OFFSET(MP0, 0, regMP0_SMN_C2PMSG_64),
+ MBOX_TOS_RESP_FLAG, MBOX_TOS_RESP_MASK, 0);
+ }
+
+ return ret;
+}
+
+static int psp_v13_0_ring_destroy(struct psp_context *psp,
+ enum psp_ring_type ring_type)
+{
+ int ret = 0;
+ struct psp_ring *ring = &psp->km_ring;
+ struct amdgpu_device *adev = psp->adev;
+
+ ret = psp_v13_0_ring_stop(psp, ring_type);
+ if (ret)
+ DRM_ERROR("Fail to stop psp ring\n");
+
+ amdgpu_bo_free_kernel(&adev->firmware.rbuf,
+ &ring->ring_mem_mc_addr,
+ (void **)&ring->ring_mem);
+
+ return ret;
+}
+
+static uint32_t psp_v13_0_ring_get_wptr(struct psp_context *psp)
+{
+ uint32_t data;
+ struct amdgpu_device *adev = psp->adev;
+
+ if (amdgpu_sriov_vf(adev))
+ data = RREG32_SOC15(MP0, 0, regMP0_SMN_C2PMSG_102);
+ else
+ data = RREG32_SOC15(MP0, 0, regMP0_SMN_C2PMSG_67);
+
+ return data;
+}
+
+static void psp_v13_0_ring_set_wptr(struct psp_context *psp, uint32_t value)
+{
+ struct amdgpu_device *adev = psp->adev;
+
+ if (amdgpu_sriov_vf(adev)) {
+ WREG32_SOC15(MP0, 0, regMP0_SMN_C2PMSG_102, value);
+ WREG32_SOC15(MP0, 0, regMP0_SMN_C2PMSG_101,
+ GFX_CTRL_CMD_ID_CONSUME_CMD);
+ } else
+ WREG32_SOC15(MP0, 0, regMP0_SMN_C2PMSG_67, value);
+}
+
+static int psp_v13_0_memory_training_send_msg(struct psp_context *psp, int msg)
+{
+ int ret;
+ int i;
+ uint32_t data_32;
+ int max_wait;
+ struct amdgpu_device *adev = psp->adev;
+
+ data_32 = (psp->mem_train_ctx.c2p_train_data_offset >> 20);
+ WREG32_SOC15(MP0, 0, regMP0_SMN_C2PMSG_36, data_32);
+ WREG32_SOC15(MP0, 0, regMP0_SMN_C2PMSG_35, msg);
+
+ max_wait = MEM_TRAIN_SEND_MSG_TIMEOUT_US / adev->usec_timeout;
+ for (i = 0; i < max_wait; i++) {
+ ret = psp_wait_for(
+ psp, SOC15_REG_OFFSET(MP0, 0, regMP0_SMN_C2PMSG_35),
+ 0x80000000, 0x80000000, PSP_WAITREG_NOVERBOSE);
+ if (ret == 0)
+ break;
+ }
+ if (i < max_wait)
+ ret = 0;
+ else
+ ret = -ETIME;
+
+ dev_dbg(adev->dev, "training %s %s, cost %d @ %d ms\n",
+ (msg == PSP_BL__DRAM_SHORT_TRAIN) ? "short" : "long",
+ (ret == 0) ? "succeed" : "failed",
+ i, adev->usec_timeout/1000);
+ return ret;
+}
+
+
+static int psp_v13_0_memory_training(struct psp_context *psp, uint32_t ops)
+{
+ struct psp_memory_training_context *ctx = &psp->mem_train_ctx;
+ uint32_t *pcache = (uint32_t *)ctx->sys_cache;
+ struct amdgpu_device *adev = psp->adev;
+ uint32_t p2c_header[4];
+ uint32_t sz;
+ void *buf;
+ int ret, idx;
+
+ if (ctx->init == PSP_MEM_TRAIN_NOT_SUPPORT) {
+ dev_dbg(adev->dev, "Memory training is not supported.\n");
+ return 0;
+ } else if (ctx->init != PSP_MEM_TRAIN_INIT_SUCCESS) {
+ dev_err(adev->dev, "Memory training initialization failure.\n");
+ return -EINVAL;
+ }
+
+ if (psp_v13_0_is_sos_alive(psp)) {
+ dev_dbg(adev->dev, "SOS is alive, skip memory training.\n");
+ return 0;
+ }
+
+ amdgpu_device_vram_access(adev, ctx->p2c_train_data_offset, p2c_header, sizeof(p2c_header), false);
+ dev_dbg(adev->dev, "sys_cache[%08x,%08x,%08x,%08x] p2c_header[%08x,%08x,%08x,%08x]\n",
+ pcache[0], pcache[1], pcache[2], pcache[3],
+ p2c_header[0], p2c_header[1], p2c_header[2], p2c_header[3]);
+
+ if (ops & PSP_MEM_TRAIN_SEND_SHORT_MSG) {
+ dev_dbg(adev->dev, "Short training depends on restore.\n");
+ ops |= PSP_MEM_TRAIN_RESTORE;
+ }
+
+ if ((ops & PSP_MEM_TRAIN_RESTORE) &&
+ pcache[0] != MEM_TRAIN_SYSTEM_SIGNATURE) {
+ dev_dbg(adev->dev, "sys_cache[0] is invalid, restore depends on save.\n");
+ ops |= PSP_MEM_TRAIN_SAVE;
+ }
+
+ if (p2c_header[0] == MEM_TRAIN_SYSTEM_SIGNATURE &&
+ !(pcache[0] == MEM_TRAIN_SYSTEM_SIGNATURE &&
+ pcache[3] == p2c_header[3])) {
+ dev_dbg(adev->dev, "sys_cache is invalid or out-of-date, need save training data to sys_cache.\n");
+ ops |= PSP_MEM_TRAIN_SAVE;
+ }
+
+ if ((ops & PSP_MEM_TRAIN_SAVE) &&
+ p2c_header[0] != MEM_TRAIN_SYSTEM_SIGNATURE) {
+ dev_dbg(adev->dev, "p2c_header[0] is invalid, save depends on long training.\n");
+ ops |= PSP_MEM_TRAIN_SEND_LONG_MSG;
+ }
+
+ if (ops & PSP_MEM_TRAIN_SEND_LONG_MSG) {
+ ops &= ~PSP_MEM_TRAIN_SEND_SHORT_MSG;
+ ops |= PSP_MEM_TRAIN_SAVE;
+ }
+
+ dev_dbg(adev->dev, "Memory training ops:%x.\n", ops);
+
+ if (ops & PSP_MEM_TRAIN_SEND_LONG_MSG) {
+ /*
+ * Long training will encroach a certain amount on the bottom of VRAM;
+ * save the content from the bottom of VRAM to system memory
+ * before training, and restore it after training to avoid
+ * VRAM corruption.
+ */
+ sz = BIST_MEM_TRAINING_ENCROACHED_SIZE;
+
+ if (adev->gmc.visible_vram_size < sz || !adev->mman.aper_base_kaddr) {
+ dev_err(adev->dev, "visible_vram_size %llx or aper_base_kaddr %p is not initialized.\n",
+ adev->gmc.visible_vram_size,
+ adev->mman.aper_base_kaddr);
+ return -EINVAL;
+ }
+
+ buf = vmalloc(sz);
+ if (!buf) {
+ dev_err(adev->dev, "failed to allocate system memory.\n");
+ return -ENOMEM;
+ }
+
+ if (drm_dev_enter(adev_to_drm(adev), &idx)) {
+ memcpy_fromio(buf, adev->mman.aper_base_kaddr, sz);
+ ret = psp_v13_0_memory_training_send_msg(psp, PSP_BL__DRAM_LONG_TRAIN);
+ if (ret) {
+ DRM_ERROR("Send long training msg failed.\n");
+ vfree(buf);
+ drm_dev_exit(idx);
+ return ret;
+ }
+
+ memcpy_toio(adev->mman.aper_base_kaddr, buf, sz);
+ amdgpu_device_flush_hdp(adev, NULL);
+ vfree(buf);
+ drm_dev_exit(idx);
+ } else {
+ vfree(buf);
+ return -ENODEV;
+ }
+ }
+
+ if (ops & PSP_MEM_TRAIN_SAVE) {
+ amdgpu_device_vram_access(psp->adev, ctx->p2c_train_data_offset, ctx->sys_cache, ctx->train_data_size, false);
+ }
+
+ if (ops & PSP_MEM_TRAIN_RESTORE) {
+ amdgpu_device_vram_access(psp->adev, ctx->c2p_train_data_offset, ctx->sys_cache, ctx->train_data_size, true);
+ }
+
+ if (ops & PSP_MEM_TRAIN_SEND_SHORT_MSG) {
+ ret = psp_v13_0_memory_training_send_msg(psp, (amdgpu_force_long_training > 0) ?
+ PSP_BL__DRAM_LONG_TRAIN : PSP_BL__DRAM_SHORT_TRAIN);
+ if (ret) {
+ dev_err(adev->dev, "send training msg failed.\n");
+ return ret;
+ }
+ }
+ ctx->training_cnt++;
+ return 0;
+}
+
+static int psp_v13_0_load_usbc_pd_fw(struct psp_context *psp, uint64_t fw_pri_mc_addr)
+{
+ struct amdgpu_device *adev = psp->adev;
+ uint32_t reg_status;
+ int ret, i = 0;
+
+ /*
+ * LFB address which is aligned to 1MB address and has to be
+ * right-shifted by 20 so that LFB address can be passed on a 32-bit C2P
+ * register
+ */
+ WREG32_SOC15(MP0, 0, regMP0_SMN_C2PMSG_36, (fw_pri_mc_addr >> 20));
+
+ ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, regMP0_SMN_C2PMSG_35),
+ 0x80000000, 0x80000000, 0);
+ if (ret)
+ return ret;
+
+ /* Fireup interrupt so PSP can pick up the address */
+ WREG32_SOC15(MP0, 0, regMP0_SMN_C2PMSG_35, (GFX_CMD_USB_PD_USE_LFB << 16));
+
+ /* FW load takes very long time */
+ do {
+ msleep(1000);
+ reg_status = RREG32_SOC15(MP0, 0, regMP0_SMN_C2PMSG_35);
+
+ if (reg_status & 0x80000000)
+ goto done;
+
+ } while (++i < USBC_PD_POLLING_LIMIT_S);
+
+ return -ETIME;
+done:
+
+ if ((reg_status & 0xFFFF) != 0) {
+ DRM_ERROR("Address load failed - MP0_SMN_C2PMSG_35.Bits [15:0] = %04x\n",
+ reg_status & 0xFFFF);
+ return -EIO;
+ }
+
+ return 0;
+}
+
+static int psp_v13_0_read_usbc_pd_fw(struct psp_context *psp, uint32_t *fw_ver)
+{
+ struct amdgpu_device *adev = psp->adev;
+ int ret;
+
+ WREG32_SOC15(MP0, 0, regMP0_SMN_C2PMSG_35, C2PMSG_CMD_GFX_USB_PD_FW_VER);
+
+ ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, regMP0_SMN_C2PMSG_35),
+ 0x80000000, 0x80000000, 0);
+ if (!ret)
+ *fw_ver = RREG32_SOC15(MP0, 0, regMP0_SMN_C2PMSG_36);
+
+ return ret;
+}
+
+static int psp_v13_0_exec_spi_cmd(struct psp_context *psp, int cmd)
+{
+ uint32_t reg_status = 0, reg_val = 0;
+ struct amdgpu_device *adev = psp->adev;
+ int ret;
+
+ /* clear MBX ready (MBOX_READY_MASK bit is 0) and set update command */
+ reg_val |= (cmd << 16);
+ WREG32_SOC15(MP0, 0, regMP0_SMN_C2PMSG_115, reg_val);
+
+ /* Ring the doorbell */
+ WREG32_SOC15(MP0, 0, regMP0_SMN_C2PMSG_73, 1);
+
+ if (cmd == C2PMSG_CMD_SPI_UPDATE_FLASH_IMAGE ||
+ cmd == C2PMSG_CMD_SPI_GET_FLASH_IMAGE)
+ ret = psp_wait_for_spirom_update(psp, SOC15_REG_OFFSET(MP0, 0, regMP0_SMN_C2PMSG_115),
+ MBOX_READY_FLAG, MBOX_READY_MASK, PSP_SPIROM_UPDATE_TIMEOUT);
+ else
+ ret = psp_wait_for(
+ psp, SOC15_REG_OFFSET(MP0, 0, regMP0_SMN_C2PMSG_115),
+ MBOX_READY_FLAG, MBOX_READY_MASK, 0);
+ if (ret) {
+ dev_err(adev->dev, "SPI cmd %x timed out, ret = %d", cmd, ret);
+ return ret;
+ }
+
+ reg_status = RREG32_SOC15(MP0, 0, regMP0_SMN_C2PMSG_115);
+ if ((reg_status & 0xFFFF) != 0) {
+ dev_err(adev->dev, "SPI cmd %x failed, fail status = %04x\n",
+ cmd, reg_status & 0xFFFF);
+ return -EIO;
+ }
+
+ return 0;
+}
+
+static int psp_v13_0_update_spirom(struct psp_context *psp,
+ uint64_t fw_pri_mc_addr)
+{
+ struct amdgpu_device *adev = psp->adev;
+ int ret;
+
+ /* Confirm PSP is ready to start */
+ ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, regMP0_SMN_C2PMSG_115),
+ MBOX_READY_FLAG, MBOX_READY_MASK, 0);
+ if (ret) {
+ dev_err(adev->dev, "PSP Not ready to start processing, ret = %d", ret);
+ return ret;
+ }
+
+ WREG32_SOC15(MP0, 0, regMP0_SMN_C2PMSG_116, lower_32_bits(fw_pri_mc_addr));
+
+ ret = psp_v13_0_exec_spi_cmd(psp, C2PMSG_CMD_SPI_UPDATE_ROM_IMAGE_ADDR_LO);
+ if (ret)
+ return ret;
+
+ WREG32_SOC15(MP0, 0, regMP0_SMN_C2PMSG_116, upper_32_bits(fw_pri_mc_addr));
+
+ ret = psp_v13_0_exec_spi_cmd(psp, C2PMSG_CMD_SPI_UPDATE_ROM_IMAGE_ADDR_HI);
+ if (ret)
+ return ret;
+
+ psp->vbflash_done = true;
+
+ ret = psp_v13_0_exec_spi_cmd(psp, C2PMSG_CMD_SPI_UPDATE_FLASH_IMAGE);
+ if (ret)
+ return ret;
+
+ return 0;
+}
+
+static int psp_v13_0_dump_spirom(struct psp_context *psp,
+ uint64_t fw_pri_mc_addr)
+{
+ struct amdgpu_device *adev = psp->adev;
+ int ret;
+
+ /* Confirm PSP is ready to start */
+ ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, regMP0_SMN_C2PMSG_115),
+ MBOX_READY_FLAG, MBOX_READY_MASK, 0);
+ if (ret) {
+ dev_err(adev->dev, "PSP Not ready to start processing, ret = %d", ret);
+ return ret;
+ }
+
+ WREG32_SOC15(MP0, 0, regMP0_SMN_C2PMSG_116, lower_32_bits(fw_pri_mc_addr));
+
+ ret = psp_v13_0_exec_spi_cmd(psp, C2PMSG_CMD_SPI_GET_ROM_IMAGE_ADDR_LO);
+ if (ret)
+ return ret;
+
+ WREG32_SOC15(MP0, 0, regMP0_SMN_C2PMSG_116, upper_32_bits(fw_pri_mc_addr));
+
+ ret = psp_v13_0_exec_spi_cmd(psp, C2PMSG_CMD_SPI_GET_ROM_IMAGE_ADDR_HI);
+ if (ret)
+ return ret;
+
+ ret = psp_v13_0_exec_spi_cmd(psp, C2PMSG_CMD_SPI_GET_FLASH_IMAGE);
+
+ return ret;
+}
+
+static int psp_v13_0_vbflash_status(struct psp_context *psp)
+{
+ struct amdgpu_device *adev = psp->adev;
+
+ return RREG32_SOC15(MP0, 0, regMP0_SMN_C2PMSG_115);
+}
+
+static int psp_v13_0_fatal_error_recovery_quirk(struct psp_context *psp)
+{
+ struct amdgpu_device *adev = psp->adev;
+
+ if (amdgpu_ip_version(adev, MP0_HWIP, 0) == IP_VERSION(13, 0, 10)) {
+ uint32_t reg_data;
+ /* MP1 fatal error: trigger PSP dram read to unhalt PSP
+ * during MP1 triggered sync flood.
+ */
+ reg_data = RREG32_SOC15(MP0, 0, regMP0_SMN_C2PMSG_67);
+ WREG32_SOC15(MP0, 0, regMP0_SMN_C2PMSG_67, reg_data + 0x10);
+
+ /* delay 1000ms for the mode1 reset for fatal error
+ * to be recovered back.
+ */
+ msleep(1000);
+ }
+
+ return 0;
+}
+
+static bool psp_v13_0_get_ras_capability(struct psp_context *psp)
+{
+ struct amdgpu_device *adev = psp->adev;
+ struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
+ u32 reg_data;
+
+ /* query ras cap should be done from host side */
+ if (amdgpu_sriov_vf(adev))
+ return false;
+
+ if (!con)
+ return false;
+
+ if ((amdgpu_ip_version(adev, MP0_HWIP, 0) == IP_VERSION(13, 0, 6) ||
+ amdgpu_ip_version(adev, MP0_HWIP, 0) == IP_VERSION(13, 0, 12) ||
+ amdgpu_ip_version(adev, MP0_HWIP, 0) == IP_VERSION(13, 0, 14)) &&
+ (!(adev->flags & AMD_IS_APU))) {
+ reg_data = RREG32_SOC15(MP0, 0, regMP0_SMN_C2PMSG_127);
+ adev->ras_hw_enabled = (reg_data & GENMASK_ULL(23, 0));
+ con->poison_supported = ((reg_data & GENMASK_ULL(24, 24)) >> 24) ? true : false;
+ return true;
+ } else {
+ return false;
+ }
+}
+
+static bool psp_v13_0_is_aux_sos_load_required(struct psp_context *psp)
+{
+ struct amdgpu_device *adev = psp->adev;
+ u32 pmfw_ver;
+
+ if (amdgpu_ip_version(adev, MP0_HWIP, 0) != IP_VERSION(13, 0, 6))
+ return false;
+
+ /* load 4e version of sos if pmfw version less than 85.115.0 */
+ pmfw_ver = RREG32(regMP1_PUB_SCRATCH0 / 4);
+
+ return (pmfw_ver < 0x557300);
+}
+
+static bool psp_v13_0_is_reload_needed(struct psp_context *psp)
+{
+ uint32_t ucode_ver;
+
+ if (!psp_v13_0_is_sos_alive(psp))
+ return false;
+
+ /* Restrict reload support only to specific IP versions */
+ switch (amdgpu_ip_version(psp->adev, MP0_HWIP, 0)) {
+ case IP_VERSION(13, 0, 2):
+ case IP_VERSION(13, 0, 6):
+ case IP_VERSION(13, 0, 14):
+ /* TOS version read from microcode header */
+ ucode_ver = psp->sos.fw_version;
+ /* Read TOS version from hardware */
+ psp_v13_0_init_sos_version(psp);
+ return (ucode_ver != psp->sos.fw_version);
+ default:
+ return false;
+ }
+
+ return false;
+}
+
+static int psp_v13_0_reg_program_no_ring(struct psp_context *psp, uint32_t val,
+ enum psp_reg_prog_id id)
+{
+ struct amdgpu_device *adev = psp->adev;
+ int ret = -EOPNOTSUPP;
+
+ /* PSP will broadcast the value to all instances */
+ if (amdgpu_sriov_vf(adev)) {
+ WREG32_SOC15(MP0, 0, regMP0_SMN_C2PMSG_101, GFX_CTRL_CMD_ID_GBR_IH_SET);
+ WREG32_SOC15(MP0, 0, regMP0_SMN_C2PMSG_102, id);
+ WREG32_SOC15(MP0, 0, regMP0_SMN_C2PMSG_103, val);
+
+ ret = psp_wait_for(
+ psp, SOC15_REG_OFFSET(MP0, 0, regMP0_SMN_C2PMSG_101),
+ 0x80000000, 0x80000000, 0);
+ }
+
+ return ret;
+}
+
+static const struct psp_funcs psp_v13_0_funcs = {
+ .init_microcode = psp_v13_0_init_microcode,
+ .wait_for_bootloader = psp_v13_0_wait_for_bootloader_steady_state,
+ .bootloader_load_kdb = psp_v13_0_bootloader_load_kdb,
+ .bootloader_load_spl = psp_v13_0_bootloader_load_spl,
+ .bootloader_load_sysdrv = psp_v13_0_bootloader_load_sysdrv,
+ .bootloader_load_soc_drv = psp_v13_0_bootloader_load_soc_drv,
+ .bootloader_load_intf_drv = psp_v13_0_bootloader_load_intf_drv,
+ .bootloader_load_dbg_drv = psp_v13_0_bootloader_load_dbg_drv,
+ .bootloader_load_ras_drv = psp_v13_0_bootloader_load_ras_drv,
+ .bootloader_load_spdm_drv = psp_v13_0_bootloader_load_spdm_drv,
+ .bootloader_load_sos = psp_v13_0_bootloader_load_sos,
+ .ring_create = psp_v13_0_ring_create,
+ .ring_stop = psp_v13_0_ring_stop,
+ .ring_destroy = psp_v13_0_ring_destroy,
+ .ring_get_wptr = psp_v13_0_ring_get_wptr,
+ .ring_set_wptr = psp_v13_0_ring_set_wptr,
+ .mem_training = psp_v13_0_memory_training,
+ .load_usbc_pd_fw = psp_v13_0_load_usbc_pd_fw,
+ .read_usbc_pd_fw = psp_v13_0_read_usbc_pd_fw,
+ .update_spirom = psp_v13_0_update_spirom,
+ .dump_spirom = psp_v13_0_dump_spirom,
+ .vbflash_stat = psp_v13_0_vbflash_status,
+ .fatal_error_recovery_quirk = psp_v13_0_fatal_error_recovery_quirk,
+ .get_ras_capability = psp_v13_0_get_ras_capability,
+ .is_aux_sos_load_required = psp_v13_0_is_aux_sos_load_required,
+ .is_reload_needed = psp_v13_0_is_reload_needed,
+ .reg_program_no_ring = psp_v13_0_reg_program_no_ring,
+};
+
+void psp_v13_0_set_psp_funcs(struct psp_context *psp)
+{
+ psp->funcs = &psp_v13_0_funcs;
+}
diff --git a/drivers/gpu/drm/amd/amdgpu/psp_v13_0.h b/drivers/gpu/drm/amd/amdgpu/psp_v13_0.h
new file mode 100644
index 000000000000..de5677ce4330
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/psp_v13_0.h
@@ -0,0 +1,32 @@
+/*
+ * Copyright 2020 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+#ifndef __PSP_V13_0_H__
+#define __PSP_V13_0_H__
+
+#include "amdgpu_psp.h"
+
+#define PSP_SPIROM_UPDATE_TIMEOUT 60000 /* 60s */
+
+void psp_v13_0_set_psp_funcs(struct psp_context *psp);
+
+#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/psp_v13_0_4.c b/drivers/gpu/drm/amd/amdgpu/psp_v13_0_4.c
new file mode 100644
index 000000000000..5f39a2edcc95
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/psp_v13_0_4.c
@@ -0,0 +1,355 @@
+/*
+ * Copyright 2020 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+#include "amdgpu.h"
+#include "amdgpu_psp.h"
+#include "amdgpu_ucode.h"
+#include "soc15_common.h"
+#include "psp_v13_0_4.h"
+
+#include "mp/mp_13_0_4_offset.h"
+#include "mp/mp_13_0_4_sh_mask.h"
+
+MODULE_FIRMWARE("amdgpu/psp_13_0_4_toc.bin");
+MODULE_FIRMWARE("amdgpu/psp_13_0_4_ta.bin");
+
+static int psp_v13_0_4_init_microcode(struct psp_context *psp)
+{
+ struct amdgpu_device *adev = psp->adev;
+ char ucode_prefix[30];
+ int err = 0;
+
+ amdgpu_ucode_ip_version_decode(adev, MP0_HWIP, ucode_prefix, sizeof(ucode_prefix));
+
+ switch (amdgpu_ip_version(adev, MP0_HWIP, 0)) {
+ case IP_VERSION(13, 0, 4):
+ err = psp_init_toc_microcode(psp, ucode_prefix);
+ if (err)
+ return err;
+ err = psp_init_ta_microcode(psp, ucode_prefix);
+ if (err)
+ return err;
+ break;
+ default:
+ BUG();
+ }
+
+ return 0;
+}
+
+static bool psp_v13_0_4_is_sos_alive(struct psp_context *psp)
+{
+ struct amdgpu_device *adev = psp->adev;
+ uint32_t sol_reg;
+
+ sol_reg = RREG32_SOC15(MP0, 0, regMP0_SMN_C2PMSG_81);
+
+ return sol_reg != 0x0;
+}
+
+static int psp_v13_0_4_wait_for_bootloader(struct psp_context *psp)
+{
+ struct amdgpu_device *adev = psp->adev;
+
+ int ret;
+ int retry_loop;
+
+ for (retry_loop = 0; retry_loop < 10; retry_loop++) {
+ /* Wait for bootloader to signify that is
+ ready having bit 31 of C2PMSG_35 set to 1 */
+ ret = psp_wait_for(
+ psp, SOC15_REG_OFFSET(MP0, 0, regMP0_SMN_C2PMSG_35),
+ 0x80000000, 0x80000000, PSP_WAITREG_NOVERBOSE);
+
+ if (ret == 0)
+ return 0;
+ }
+
+ return ret;
+}
+
+static int psp_v13_0_4_bootloader_load_component(struct psp_context *psp,
+ struct psp_bin_desc *bin_desc,
+ enum psp_bootloader_cmd bl_cmd)
+{
+ int ret;
+ uint32_t psp_gfxdrv_command_reg = 0;
+ struct amdgpu_device *adev = psp->adev;
+
+ /* Check tOS sign of life register to confirm sys driver and sOS
+ * are already been loaded.
+ */
+ if (psp_v13_0_4_is_sos_alive(psp))
+ return 0;
+
+ ret = psp_v13_0_4_wait_for_bootloader(psp);
+ if (ret)
+ return ret;
+
+ memset(psp->fw_pri_buf, 0, PSP_1_MEG);
+
+ /* Copy PSP KDB binary to memory */
+ memcpy(psp->fw_pri_buf, bin_desc->start_addr, bin_desc->size_bytes);
+
+ /* Provide the PSP KDB to bootloader */
+ WREG32_SOC15(MP0, 0, regMP0_SMN_C2PMSG_36,
+ (uint32_t)(psp->fw_pri_mc_addr >> 20));
+ psp_gfxdrv_command_reg = bl_cmd;
+ WREG32_SOC15(MP0, 0, regMP0_SMN_C2PMSG_35,
+ psp_gfxdrv_command_reg);
+
+ ret = psp_v13_0_4_wait_for_bootloader(psp);
+
+ return ret;
+}
+
+static int psp_v13_0_4_bootloader_load_kdb(struct psp_context *psp)
+{
+ return psp_v13_0_4_bootloader_load_component(psp, &psp->kdb, PSP_BL__LOAD_KEY_DATABASE);
+}
+
+static int psp_v13_0_4_bootloader_load_spl(struct psp_context *psp)
+{
+ return psp_v13_0_4_bootloader_load_component(psp, &psp->kdb, PSP_BL__LOAD_TOS_SPL_TABLE);
+}
+
+static int psp_v13_0_4_bootloader_load_sysdrv(struct psp_context *psp)
+{
+ return psp_v13_0_4_bootloader_load_component(psp, &psp->sys, PSP_BL__LOAD_SYSDRV);
+}
+
+static int psp_v13_0_4_bootloader_load_soc_drv(struct psp_context *psp)
+{
+ return psp_v13_0_4_bootloader_load_component(psp, &psp->soc_drv, PSP_BL__LOAD_SOCDRV);
+}
+
+static int psp_v13_0_4_bootloader_load_intf_drv(struct psp_context *psp)
+{
+ return psp_v13_0_4_bootloader_load_component(psp, &psp->intf_drv, PSP_BL__LOAD_INTFDRV);
+}
+
+static int psp_v13_0_4_bootloader_load_dbg_drv(struct psp_context *psp)
+{
+ return psp_v13_0_4_bootloader_load_component(psp, &psp->dbg_drv, PSP_BL__LOAD_DBGDRV);
+}
+
+static int psp_v13_0_4_bootloader_load_sos(struct psp_context *psp)
+{
+ int ret;
+ unsigned int psp_gfxdrv_command_reg = 0;
+ struct amdgpu_device *adev = psp->adev;
+
+ /* Check sOS sign of life register to confirm sys driver and sOS
+ * are already been loaded.
+ */
+ if (psp_v13_0_4_is_sos_alive(psp))
+ return 0;
+
+ ret = psp_v13_0_4_wait_for_bootloader(psp);
+ if (ret)
+ return ret;
+
+ memset(psp->fw_pri_buf, 0, PSP_1_MEG);
+
+ /* Copy Secure OS binary to PSP memory */
+ memcpy(psp->fw_pri_buf, psp->sos.start_addr, psp->sos.size_bytes);
+
+ /* Provide the PSP secure OS to bootloader */
+ WREG32_SOC15(MP0, 0, regMP0_SMN_C2PMSG_36,
+ (uint32_t)(psp->fw_pri_mc_addr >> 20));
+ psp_gfxdrv_command_reg = PSP_BL__LOAD_SOSDRV;
+ WREG32_SOC15(MP0, 0, regMP0_SMN_C2PMSG_35,
+ psp_gfxdrv_command_reg);
+
+ /* there might be handshake issue with hardware which needs delay */
+ mdelay(20);
+ ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, regMP0_SMN_C2PMSG_81),
+ RREG32_SOC15(MP0, 0, regMP0_SMN_C2PMSG_81), 0,
+ PSP_WAITREG_CHANGED);
+
+ return ret;
+}
+
+static int psp_v13_0_4_ring_stop(struct psp_context *psp,
+ enum psp_ring_type ring_type)
+{
+ int ret = 0;
+ struct amdgpu_device *adev = psp->adev;
+
+ if (amdgpu_sriov_vf(adev)) {
+ /* Write the ring destroy command*/
+ WREG32_SOC15(MP0, 0, regMP0_SMN_C2PMSG_101,
+ GFX_CTRL_CMD_ID_DESTROY_GPCOM_RING);
+ /* there might be handshake issue with hardware which needs delay */
+ mdelay(20);
+ /* Wait for response flag (bit 31) */
+ ret = psp_wait_for(
+ psp, SOC15_REG_OFFSET(MP0, 0, regMP0_SMN_C2PMSG_101),
+ MBOX_TOS_RESP_FLAG, MBOX_TOS_RESP_MASK, 0);
+ } else {
+ /* Write the ring destroy command*/
+ WREG32_SOC15(MP0, 0, regMP0_SMN_C2PMSG_64,
+ GFX_CTRL_CMD_ID_DESTROY_RINGS);
+ /* there might be handshake issue with hardware which needs delay */
+ mdelay(20);
+ /* Wait for response flag (bit 31) */
+ ret = psp_wait_for(
+ psp, SOC15_REG_OFFSET(MP0, 0, regMP0_SMN_C2PMSG_64),
+ MBOX_TOS_RESP_FLAG, MBOX_TOS_RESP_MASK, 0);
+ }
+
+ return ret;
+}
+
+static int psp_v13_0_4_ring_create(struct psp_context *psp,
+ enum psp_ring_type ring_type)
+{
+ int ret = 0;
+ unsigned int psp_ring_reg = 0;
+ struct psp_ring *ring = &psp->km_ring;
+ struct amdgpu_device *adev = psp->adev;
+
+ if (amdgpu_sriov_vf(adev)) {
+ ret = psp_v13_0_4_ring_stop(psp, ring_type);
+ if (ret) {
+ DRM_ERROR("psp_v13_0_ring_stop_sriov failed!\n");
+ return ret;
+ }
+
+ /* Write low address of the ring to C2PMSG_102 */
+ psp_ring_reg = lower_32_bits(ring->ring_mem_mc_addr);
+ WREG32_SOC15(MP0, 0, regMP0_SMN_C2PMSG_102, psp_ring_reg);
+ /* Write high address of the ring to C2PMSG_103 */
+ psp_ring_reg = upper_32_bits(ring->ring_mem_mc_addr);
+ WREG32_SOC15(MP0, 0, regMP0_SMN_C2PMSG_103, psp_ring_reg);
+
+ /* Write the ring initialization command to C2PMSG_101 */
+ WREG32_SOC15(MP0, 0, regMP0_SMN_C2PMSG_101,
+ GFX_CTRL_CMD_ID_INIT_GPCOM_RING);
+
+ /* there might be handshake issue with hardware which needs delay */
+ mdelay(20);
+
+ /* Wait for response flag (bit 31) in C2PMSG_101 */
+ ret = psp_wait_for(
+ psp, SOC15_REG_OFFSET(MP0, 0, regMP0_SMN_C2PMSG_101),
+ MBOX_TOS_RESP_FLAG, MBOX_TOS_RESP_MASK, 0);
+
+ } else {
+ /* Wait for sOS ready for ring creation */
+ ret = psp_wait_for(
+ psp, SOC15_REG_OFFSET(MP0, 0, regMP0_SMN_C2PMSG_64),
+ MBOX_TOS_READY_FLAG, MBOX_TOS_READY_MASK, 0);
+ if (ret) {
+ DRM_ERROR("Failed to wait for trust OS ready for ring creation\n");
+ return ret;
+ }
+
+ /* Write low address of the ring to C2PMSG_69 */
+ psp_ring_reg = lower_32_bits(ring->ring_mem_mc_addr);
+ WREG32_SOC15(MP0, 0, regMP0_SMN_C2PMSG_69, psp_ring_reg);
+ /* Write high address of the ring to C2PMSG_70 */
+ psp_ring_reg = upper_32_bits(ring->ring_mem_mc_addr);
+ WREG32_SOC15(MP0, 0, regMP0_SMN_C2PMSG_70, psp_ring_reg);
+ /* Write size of ring to C2PMSG_71 */
+ psp_ring_reg = ring->ring_size;
+ WREG32_SOC15(MP0, 0, regMP0_SMN_C2PMSG_71, psp_ring_reg);
+ /* Write the ring initialization command to C2PMSG_64 */
+ psp_ring_reg = ring_type;
+ psp_ring_reg = psp_ring_reg << 16;
+ WREG32_SOC15(MP0, 0, regMP0_SMN_C2PMSG_64, psp_ring_reg);
+
+ /* there might be handshake issue with hardware which needs delay */
+ mdelay(20);
+
+ /* Wait for response flag (bit 31) in C2PMSG_64 */
+ ret = psp_wait_for(
+ psp, SOC15_REG_OFFSET(MP0, 0, regMP0_SMN_C2PMSG_64),
+ MBOX_TOS_RESP_FLAG, MBOX_TOS_RESP_MASK, 0);
+ }
+
+ return ret;
+}
+
+static int psp_v13_0_4_ring_destroy(struct psp_context *psp,
+ enum psp_ring_type ring_type)
+{
+ int ret = 0;
+ struct psp_ring *ring = &psp->km_ring;
+ struct amdgpu_device *adev = psp->adev;
+
+ ret = psp_v13_0_4_ring_stop(psp, ring_type);
+ if (ret)
+ DRM_ERROR("Fail to stop psp ring\n");
+
+ amdgpu_bo_free_kernel(&adev->firmware.rbuf,
+ &ring->ring_mem_mc_addr,
+ (void **)&ring->ring_mem);
+
+ return ret;
+}
+
+static uint32_t psp_v13_0_4_ring_get_wptr(struct psp_context *psp)
+{
+ uint32_t data;
+ struct amdgpu_device *adev = psp->adev;
+
+ if (amdgpu_sriov_vf(adev))
+ data = RREG32_SOC15(MP0, 0, regMP0_SMN_C2PMSG_102);
+ else
+ data = RREG32_SOC15(MP0, 0, regMP0_SMN_C2PMSG_67);
+
+ return data;
+}
+
+static void psp_v13_0_4_ring_set_wptr(struct psp_context *psp, uint32_t value)
+{
+ struct amdgpu_device *adev = psp->adev;
+
+ if (amdgpu_sriov_vf(adev)) {
+ WREG32_SOC15(MP0, 0, regMP0_SMN_C2PMSG_102, value);
+ WREG32_SOC15(MP0, 0, regMP0_SMN_C2PMSG_101,
+ GFX_CTRL_CMD_ID_CONSUME_CMD);
+ } else
+ WREG32_SOC15(MP0, 0, regMP0_SMN_C2PMSG_67, value);
+}
+
+static const struct psp_funcs psp_v13_0_4_funcs = {
+ .init_microcode = psp_v13_0_4_init_microcode,
+ .bootloader_load_kdb = psp_v13_0_4_bootloader_load_kdb,
+ .bootloader_load_spl = psp_v13_0_4_bootloader_load_spl,
+ .bootloader_load_sysdrv = psp_v13_0_4_bootloader_load_sysdrv,
+ .bootloader_load_soc_drv = psp_v13_0_4_bootloader_load_soc_drv,
+ .bootloader_load_intf_drv = psp_v13_0_4_bootloader_load_intf_drv,
+ .bootloader_load_dbg_drv = psp_v13_0_4_bootloader_load_dbg_drv,
+ .bootloader_load_sos = psp_v13_0_4_bootloader_load_sos,
+ .ring_create = psp_v13_0_4_ring_create,
+ .ring_stop = psp_v13_0_4_ring_stop,
+ .ring_destroy = psp_v13_0_4_ring_destroy,
+ .ring_get_wptr = psp_v13_0_4_ring_get_wptr,
+ .ring_set_wptr = psp_v13_0_4_ring_set_wptr,
+};
+
+void psp_v13_0_4_set_psp_funcs(struct psp_context *psp)
+{
+ psp->funcs = &psp_v13_0_4_funcs;
+}
diff --git a/drivers/gpu/drm/amd/amdgpu/psp_v13_0_4.h b/drivers/gpu/drm/amd/amdgpu/psp_v13_0_4.h
new file mode 100644
index 000000000000..8547b8d514d5
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/psp_v13_0_4.h
@@ -0,0 +1,30 @@
+/*
+ * Copyright 2020 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+#ifndef __PSP_V13_0_4_H__
+#define __PSP_V13_0_4_H__
+
+#include "amdgpu_psp.h"
+
+void psp_v13_0_4_set_psp_funcs(struct psp_context *psp);
+
+#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/psp_v14_0.c b/drivers/gpu/drm/amd/amdgpu/psp_v14_0.c
new file mode 100644
index 000000000000..38dfc5c19f2a
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/psp_v14_0.c
@@ -0,0 +1,705 @@
+/*
+ * Copyright 2023 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+#include <drm/drm_drv.h>
+#include <linux/vmalloc.h>
+#include "amdgpu.h"
+#include "amdgpu_psp.h"
+#include "amdgpu_ucode.h"
+#include "soc15_common.h"
+#include "psp_v14_0.h"
+
+#include "mp/mp_14_0_2_offset.h"
+#include "mp/mp_14_0_2_sh_mask.h"
+
+MODULE_FIRMWARE("amdgpu/psp_14_0_2_sos.bin");
+MODULE_FIRMWARE("amdgpu/psp_14_0_2_ta.bin");
+MODULE_FIRMWARE("amdgpu/psp_14_0_3_sos.bin");
+MODULE_FIRMWARE("amdgpu/psp_14_0_3_sos_kicker.bin");
+MODULE_FIRMWARE("amdgpu/psp_14_0_3_ta.bin");
+MODULE_FIRMWARE("amdgpu/psp_14_0_3_ta_kicker.bin");
+MODULE_FIRMWARE("amdgpu/psp_14_0_5_toc.bin");
+MODULE_FIRMWARE("amdgpu/psp_14_0_5_ta.bin");
+
+/* For large FW files the time to complete can be very long */
+#define USBC_PD_POLLING_LIMIT_S 240
+
+/* Read USB-PD from LFB */
+#define GFX_CMD_USB_PD_USE_LFB 0x480
+
+/* VBIOS gfl defines */
+#define MBOX_READY_MASK 0x80000000
+#define MBOX_STATUS_MASK 0x0000FFFF
+#define MBOX_COMMAND_MASK 0x00FF0000
+#define MBOX_READY_FLAG 0x80000000
+#define C2PMSG_CMD_SPI_UPDATE_ROM_IMAGE_ADDR_LO 0x2
+#define C2PMSG_CMD_SPI_UPDATE_ROM_IMAGE_ADDR_HI 0x3
+#define C2PMSG_CMD_SPI_UPDATE_FLASH_IMAGE 0x4
+
+/* memory training timeout define */
+#define MEM_TRAIN_SEND_MSG_TIMEOUT_US 3000000
+
+static int psp_v14_0_init_microcode(struct psp_context *psp)
+{
+ struct amdgpu_device *adev = psp->adev;
+ char ucode_prefix[30];
+ int err = 0;
+
+ amdgpu_ucode_ip_version_decode(adev, MP0_HWIP, ucode_prefix, sizeof(ucode_prefix));
+
+ switch (amdgpu_ip_version(adev, MP0_HWIP, 0)) {
+ case IP_VERSION(14, 0, 2):
+ case IP_VERSION(14, 0, 3):
+ err = psp_init_sos_microcode(psp, ucode_prefix);
+ if (err)
+ return err;
+ err = psp_init_ta_microcode(psp, ucode_prefix);
+ if (err)
+ return err;
+ break;
+ case IP_VERSION(14, 0, 5):
+ err = psp_init_toc_microcode(psp, ucode_prefix);
+ if (err)
+ return err;
+ err = psp_init_ta_microcode(psp, ucode_prefix);
+ if (err)
+ return err;
+ break;
+ default:
+ BUG();
+ }
+
+ return 0;
+}
+
+static bool psp_v14_0_is_sos_alive(struct psp_context *psp)
+{
+ struct amdgpu_device *adev = psp->adev;
+ uint32_t sol_reg;
+
+ sol_reg = RREG32_SOC15(MP0, 0, regMPASP_SMN_C2PMSG_81);
+
+ return sol_reg != 0x0;
+}
+
+static int psp_v14_0_wait_for_bootloader(struct psp_context *psp)
+{
+ struct amdgpu_device *adev = psp->adev;
+
+ int ret;
+ int retry_loop;
+
+ for (retry_loop = 0; retry_loop < 10; retry_loop++) {
+ /* Wait for bootloader to signify that is
+ ready having bit 31 of C2PMSG_35 set to 1 */
+ ret = psp_wait_for(
+ psp, SOC15_REG_OFFSET(MP0, 0, regMPASP_SMN_C2PMSG_35),
+ 0x80000000, 0x80000000, PSP_WAITREG_NOVERBOSE);
+
+ if (ret == 0)
+ return 0;
+ }
+
+ return ret;
+}
+
+static int psp_v14_0_bootloader_load_component(struct psp_context *psp,
+ struct psp_bin_desc *bin_desc,
+ enum psp_bootloader_cmd bl_cmd)
+{
+ int ret;
+ uint32_t psp_gfxdrv_command_reg = 0;
+ struct amdgpu_device *adev = psp->adev;
+
+ /* Check tOS sign of life register to confirm sys driver and sOS
+ * are already been loaded.
+ */
+ if (psp_v14_0_is_sos_alive(psp))
+ return 0;
+
+ ret = psp_v14_0_wait_for_bootloader(psp);
+ if (ret)
+ return ret;
+
+ memset(psp->fw_pri_buf, 0, PSP_1_MEG);
+
+ /* Copy PSP KDB binary to memory */
+ memcpy(psp->fw_pri_buf, bin_desc->start_addr, bin_desc->size_bytes);
+
+ /* Provide the PSP KDB to bootloader */
+ WREG32_SOC15(MP0, 0, regMPASP_SMN_C2PMSG_36,
+ (uint32_t)(psp->fw_pri_mc_addr >> 20));
+ psp_gfxdrv_command_reg = bl_cmd;
+ WREG32_SOC15(MP0, 0, regMPASP_SMN_C2PMSG_35,
+ psp_gfxdrv_command_reg);
+
+ ret = psp_v14_0_wait_for_bootloader(psp);
+
+ return ret;
+}
+
+static int psp_v14_0_bootloader_load_kdb(struct psp_context *psp)
+{
+ return psp_v14_0_bootloader_load_component(psp, &psp->kdb, PSP_BL__LOAD_KEY_DATABASE);
+}
+
+static int psp_v14_0_bootloader_load_spl(struct psp_context *psp)
+{
+ return psp_v14_0_bootloader_load_component(psp, &psp->spl, PSP_BL__LOAD_TOS_SPL_TABLE);
+}
+
+static int psp_v14_0_bootloader_load_sysdrv(struct psp_context *psp)
+{
+ return psp_v14_0_bootloader_load_component(psp, &psp->sys, PSP_BL__LOAD_SYSDRV);
+}
+
+static int psp_v14_0_bootloader_load_soc_drv(struct psp_context *psp)
+{
+ return psp_v14_0_bootloader_load_component(psp, &psp->soc_drv, PSP_BL__LOAD_SOCDRV);
+}
+
+static int psp_v14_0_bootloader_load_intf_drv(struct psp_context *psp)
+{
+ return psp_v14_0_bootloader_load_component(psp, &psp->intf_drv, PSP_BL__LOAD_INTFDRV);
+}
+
+static int psp_v14_0_bootloader_load_dbg_drv(struct psp_context *psp)
+{
+ /* dbg_drv was renamed to had_drv in psp v14 */
+ return psp_v14_0_bootloader_load_component(psp, &psp->dbg_drv, PSP_BL__LOAD_HADDRV);
+}
+
+static int psp_v14_0_bootloader_load_ras_drv(struct psp_context *psp)
+{
+ return psp_v14_0_bootloader_load_component(psp, &psp->ras_drv, PSP_BL__LOAD_RASDRV);
+}
+
+static int psp_v14_0_bootloader_load_ipkeymgr_drv(struct psp_context *psp)
+{
+ return psp_v14_0_bootloader_load_component(psp, &psp->ipkeymgr_drv, PSP_BL__LOAD_IPKEYMGRDRV);
+}
+
+static int psp_v14_0_bootloader_load_sos(struct psp_context *psp)
+{
+ int ret;
+ unsigned int psp_gfxdrv_command_reg = 0;
+ struct amdgpu_device *adev = psp->adev;
+
+ /* Check sOS sign of life register to confirm sys driver and sOS
+ * are already been loaded.
+ */
+ if (psp_v14_0_is_sos_alive(psp))
+ return 0;
+
+ ret = psp_v14_0_wait_for_bootloader(psp);
+ if (ret)
+ return ret;
+
+ memset(psp->fw_pri_buf, 0, PSP_1_MEG);
+
+ /* Copy Secure OS binary to PSP memory */
+ memcpy(psp->fw_pri_buf, psp->sos.start_addr, psp->sos.size_bytes);
+
+ /* Provide the PSP secure OS to bootloader */
+ WREG32_SOC15(MP0, 0, regMPASP_SMN_C2PMSG_36,
+ (uint32_t)(psp->fw_pri_mc_addr >> 20));
+ psp_gfxdrv_command_reg = PSP_BL__LOAD_SOSDRV;
+ WREG32_SOC15(MP0, 0, regMPASP_SMN_C2PMSG_35,
+ psp_gfxdrv_command_reg);
+
+ /* there might be handshake issue with hardware which needs delay */
+ mdelay(20);
+ ret = psp_wait_for(psp,
+ SOC15_REG_OFFSET(MP0, 0, regMPASP_SMN_C2PMSG_81),
+ RREG32_SOC15(MP0, 0, regMPASP_SMN_C2PMSG_81), 0,
+ PSP_WAITREG_CHANGED);
+
+ return ret;
+}
+
+static int psp_v14_0_ring_stop(struct psp_context *psp,
+ enum psp_ring_type ring_type)
+{
+ int ret = 0;
+ struct amdgpu_device *adev = psp->adev;
+
+ if (amdgpu_sriov_vf(adev)) {
+ /* Write the ring destroy command*/
+ WREG32_SOC15(MP0, 0, regMPASP_SMN_C2PMSG_101,
+ GFX_CTRL_CMD_ID_DESTROY_GPCOM_RING);
+ /* there might be handshake issue with hardware which needs delay */
+ mdelay(20);
+ /* Wait for response flag (bit 31) */
+ ret = psp_wait_for(
+ psp, SOC15_REG_OFFSET(MP0, 0, regMPASP_SMN_C2PMSG_101),
+ MBOX_TOS_RESP_FLAG, MBOX_TOS_RESP_MASK, 0);
+ } else {
+ /* Write the ring destroy command*/
+ WREG32_SOC15(MP0, 0, regMPASP_SMN_C2PMSG_64,
+ GFX_CTRL_CMD_ID_DESTROY_RINGS);
+ /* there might be handshake issue with hardware which needs delay */
+ mdelay(20);
+ /* Wait for response flag (bit 31) */
+ ret = psp_wait_for(
+ psp, SOC15_REG_OFFSET(MP0, 0, regMPASP_SMN_C2PMSG_64),
+ MBOX_TOS_RESP_FLAG, MBOX_TOS_RESP_MASK, 0);
+ }
+
+ return ret;
+}
+
+static int psp_v14_0_ring_create(struct psp_context *psp,
+ enum psp_ring_type ring_type)
+{
+ int ret = 0;
+ unsigned int psp_ring_reg = 0;
+ struct psp_ring *ring = &psp->km_ring;
+ struct amdgpu_device *adev = psp->adev;
+
+ if (amdgpu_sriov_vf(adev)) {
+ ret = psp_v14_0_ring_stop(psp, ring_type);
+ if (ret) {
+ DRM_ERROR("psp_v14_0_ring_stop_sriov failed!\n");
+ return ret;
+ }
+
+ /* Write low address of the ring to C2PMSG_102 */
+ psp_ring_reg = lower_32_bits(ring->ring_mem_mc_addr);
+ WREG32_SOC15(MP0, 0, regMPASP_SMN_C2PMSG_102, psp_ring_reg);
+ /* Write high address of the ring to C2PMSG_103 */
+ psp_ring_reg = upper_32_bits(ring->ring_mem_mc_addr);
+ WREG32_SOC15(MP0, 0, regMPASP_SMN_C2PMSG_103, psp_ring_reg);
+
+ /* Write the ring initialization command to C2PMSG_101 */
+ WREG32_SOC15(MP0, 0, regMPASP_SMN_C2PMSG_101,
+ GFX_CTRL_CMD_ID_INIT_GPCOM_RING);
+
+ /* there might be handshake issue with hardware which needs delay */
+ mdelay(20);
+
+ /* Wait for response flag (bit 31) in C2PMSG_101 */
+ ret = psp_wait_for(
+ psp, SOC15_REG_OFFSET(MP0, 0, regMPASP_SMN_C2PMSG_101),
+ MBOX_TOS_RESP_FLAG, MBOX_TOS_RESP_MASK, 0);
+
+ } else {
+ /* Wait for sOS ready for ring creation */
+ ret = psp_wait_for(
+ psp, SOC15_REG_OFFSET(MP0, 0, regMPASP_SMN_C2PMSG_64),
+ MBOX_TOS_READY_FLAG, MBOX_TOS_READY_MASK, 0);
+ if (ret) {
+ DRM_ERROR("Failed to wait for trust OS ready for ring creation\n");
+ return ret;
+ }
+
+ /* Write low address of the ring to C2PMSG_69 */
+ psp_ring_reg = lower_32_bits(ring->ring_mem_mc_addr);
+ WREG32_SOC15(MP0, 0, regMPASP_SMN_C2PMSG_69, psp_ring_reg);
+ /* Write high address of the ring to C2PMSG_70 */
+ psp_ring_reg = upper_32_bits(ring->ring_mem_mc_addr);
+ WREG32_SOC15(MP0, 0, regMPASP_SMN_C2PMSG_70, psp_ring_reg);
+ /* Write size of ring to C2PMSG_71 */
+ psp_ring_reg = ring->ring_size;
+ WREG32_SOC15(MP0, 0, regMPASP_SMN_C2PMSG_71, psp_ring_reg);
+ /* Write the ring initialization command to C2PMSG_64 */
+ psp_ring_reg = ring_type;
+ psp_ring_reg = psp_ring_reg << 16;
+ WREG32_SOC15(MP0, 0, regMPASP_SMN_C2PMSG_64, psp_ring_reg);
+
+ /* there might be handshake issue with hardware which needs delay */
+ mdelay(20);
+
+ /* Wait for response flag (bit 31) in C2PMSG_64 */
+ ret = psp_wait_for(
+ psp, SOC15_REG_OFFSET(MP0, 0, regMPASP_SMN_C2PMSG_64),
+ MBOX_TOS_RESP_FLAG, MBOX_TOS_RESP_MASK, 0);
+ }
+
+ return ret;
+}
+
+static int psp_v14_0_ring_destroy(struct psp_context *psp,
+ enum psp_ring_type ring_type)
+{
+ int ret = 0;
+ struct psp_ring *ring = &psp->km_ring;
+ struct amdgpu_device *adev = psp->adev;
+
+ ret = psp_v14_0_ring_stop(psp, ring_type);
+ if (ret)
+ DRM_ERROR("Fail to stop psp ring\n");
+
+ amdgpu_bo_free_kernel(&adev->firmware.rbuf,
+ &ring->ring_mem_mc_addr,
+ (void **)&ring->ring_mem);
+
+ return ret;
+}
+
+static uint32_t psp_v14_0_ring_get_wptr(struct psp_context *psp)
+{
+ uint32_t data;
+ struct amdgpu_device *adev = psp->adev;
+
+ if (amdgpu_sriov_vf(adev))
+ data = RREG32_SOC15(MP0, 0, regMPASP_SMN_C2PMSG_102);
+ else
+ data = RREG32_SOC15(MP0, 0, regMPASP_SMN_C2PMSG_67);
+
+ return data;
+}
+
+static void psp_v14_0_ring_set_wptr(struct psp_context *psp, uint32_t value)
+{
+ struct amdgpu_device *adev = psp->adev;
+
+ if (amdgpu_sriov_vf(adev)) {
+ WREG32_SOC15(MP0, 0, regMPASP_SMN_C2PMSG_102, value);
+ WREG32_SOC15(MP0, 0, regMPASP_SMN_C2PMSG_101,
+ GFX_CTRL_CMD_ID_CONSUME_CMD);
+ } else
+ WREG32_SOC15(MP0, 0, regMPASP_SMN_C2PMSG_67, value);
+}
+
+static int psp_v14_0_memory_training_send_msg(struct psp_context *psp, int msg)
+{
+ int ret;
+ int i;
+ uint32_t data_32;
+ int max_wait;
+ struct amdgpu_device *adev = psp->adev;
+
+ data_32 = (psp->mem_train_ctx.c2p_train_data_offset >> 20);
+ WREG32_SOC15(MP0, 0, regMPASP_SMN_C2PMSG_36, data_32);
+ WREG32_SOC15(MP0, 0, regMPASP_SMN_C2PMSG_35, msg);
+
+ max_wait = MEM_TRAIN_SEND_MSG_TIMEOUT_US / adev->usec_timeout;
+ for (i = 0; i < max_wait; i++) {
+ ret = psp_wait_for(
+ psp, SOC15_REG_OFFSET(MP0, 0, regMPASP_SMN_C2PMSG_35),
+ 0x80000000, 0x80000000, PSP_WAITREG_NOVERBOSE);
+ if (ret == 0)
+ break;
+ }
+ if (i < max_wait)
+ ret = 0;
+ else
+ ret = -ETIME;
+
+ dev_dbg(adev->dev, "training %s %s, cost %d @ %d ms\n",
+ (msg == PSP_BL__DRAM_SHORT_TRAIN) ? "short" : "long",
+ (ret == 0) ? "succeed" : "failed",
+ i, adev->usec_timeout/1000);
+ return ret;
+}
+
+
+static int psp_v14_0_memory_training(struct psp_context *psp, uint32_t ops)
+{
+ struct psp_memory_training_context *ctx = &psp->mem_train_ctx;
+ uint32_t *pcache = (uint32_t *)ctx->sys_cache;
+ struct amdgpu_device *adev = psp->adev;
+ uint32_t p2c_header[4];
+ uint32_t sz;
+ void *buf;
+ int ret, idx;
+
+ if (ctx->init == PSP_MEM_TRAIN_NOT_SUPPORT) {
+ dev_dbg(adev->dev, "Memory training is not supported.\n");
+ return 0;
+ } else if (ctx->init != PSP_MEM_TRAIN_INIT_SUCCESS) {
+ dev_err(adev->dev, "Memory training initialization failure.\n");
+ return -EINVAL;
+ }
+
+ if (psp_v14_0_is_sos_alive(psp)) {
+ dev_dbg(adev->dev, "SOS is alive, skip memory training.\n");
+ return 0;
+ }
+
+ amdgpu_device_vram_access(adev, ctx->p2c_train_data_offset, p2c_header, sizeof(p2c_header), false);
+ dev_dbg(adev->dev, "sys_cache[%08x,%08x,%08x,%08x] p2c_header[%08x,%08x,%08x,%08x]\n",
+ pcache[0], pcache[1], pcache[2], pcache[3],
+ p2c_header[0], p2c_header[1], p2c_header[2], p2c_header[3]);
+
+ if (ops & PSP_MEM_TRAIN_SEND_SHORT_MSG) {
+ dev_dbg(adev->dev, "Short training depends on restore.\n");
+ ops |= PSP_MEM_TRAIN_RESTORE;
+ }
+
+ if ((ops & PSP_MEM_TRAIN_RESTORE) &&
+ pcache[0] != MEM_TRAIN_SYSTEM_SIGNATURE) {
+ dev_dbg(adev->dev, "sys_cache[0] is invalid, restore depends on save.\n");
+ ops |= PSP_MEM_TRAIN_SAVE;
+ }
+
+ if (p2c_header[0] == MEM_TRAIN_SYSTEM_SIGNATURE &&
+ !(pcache[0] == MEM_TRAIN_SYSTEM_SIGNATURE &&
+ pcache[3] == p2c_header[3])) {
+ dev_dbg(adev->dev, "sys_cache is invalid or out-of-date, need save training data to sys_cache.\n");
+ ops |= PSP_MEM_TRAIN_SAVE;
+ }
+
+ if ((ops & PSP_MEM_TRAIN_SAVE) &&
+ p2c_header[0] != MEM_TRAIN_SYSTEM_SIGNATURE) {
+ dev_dbg(adev->dev, "p2c_header[0] is invalid, save depends on long training.\n");
+ ops |= PSP_MEM_TRAIN_SEND_LONG_MSG;
+ }
+
+ if (ops & PSP_MEM_TRAIN_SEND_LONG_MSG) {
+ ops &= ~PSP_MEM_TRAIN_SEND_SHORT_MSG;
+ ops |= PSP_MEM_TRAIN_SAVE;
+ }
+
+ dev_dbg(adev->dev, "Memory training ops:%x.\n", ops);
+
+ if (ops & PSP_MEM_TRAIN_SEND_LONG_MSG) {
+ /*
+ * Long training will encroach a certain amount on the bottom of VRAM;
+ * save the content from the bottom of VRAM to system memory
+ * before training, and restore it after training to avoid
+ * VRAM corruption.
+ */
+ sz = BIST_MEM_TRAINING_ENCROACHED_SIZE;
+
+ if (adev->gmc.visible_vram_size < sz || !adev->mman.aper_base_kaddr) {
+ dev_err(adev->dev, "visible_vram_size %llx or aper_base_kaddr %p is not initialized.\n",
+ adev->gmc.visible_vram_size,
+ adev->mman.aper_base_kaddr);
+ return -EINVAL;
+ }
+
+ buf = vmalloc(sz);
+ if (!buf) {
+ dev_err(adev->dev, "failed to allocate system memory.\n");
+ return -ENOMEM;
+ }
+
+ if (drm_dev_enter(adev_to_drm(adev), &idx)) {
+ memcpy_fromio(buf, adev->mman.aper_base_kaddr, sz);
+ ret = psp_v14_0_memory_training_send_msg(psp, PSP_BL__DRAM_LONG_TRAIN);
+ if (ret) {
+ DRM_ERROR("Send long training msg failed.\n");
+ vfree(buf);
+ drm_dev_exit(idx);
+ return ret;
+ }
+
+ memcpy_toio(adev->mman.aper_base_kaddr, buf, sz);
+ amdgpu_device_flush_hdp(adev, NULL);
+ vfree(buf);
+ drm_dev_exit(idx);
+ } else {
+ vfree(buf);
+ return -ENODEV;
+ }
+ }
+
+ if (ops & PSP_MEM_TRAIN_SAVE) {
+ amdgpu_device_vram_access(psp->adev, ctx->p2c_train_data_offset, ctx->sys_cache, ctx->train_data_size, false);
+ }
+
+ if (ops & PSP_MEM_TRAIN_RESTORE) {
+ amdgpu_device_vram_access(psp->adev, ctx->c2p_train_data_offset, ctx->sys_cache, ctx->train_data_size, true);
+ }
+
+ if (ops & PSP_MEM_TRAIN_SEND_SHORT_MSG) {
+ ret = psp_v14_0_memory_training_send_msg(psp, (amdgpu_force_long_training > 0) ?
+ PSP_BL__DRAM_LONG_TRAIN : PSP_BL__DRAM_SHORT_TRAIN);
+ if (ret) {
+ dev_err(adev->dev, "send training msg failed.\n");
+ return ret;
+ }
+ }
+ ctx->training_cnt++;
+ return 0;
+}
+
+static int psp_v14_0_load_usbc_pd_fw(struct psp_context *psp, uint64_t fw_pri_mc_addr)
+{
+ struct amdgpu_device *adev = psp->adev;
+ uint32_t reg_status;
+ int ret, i = 0;
+
+ /*
+ * LFB address which is aligned to 1MB address and has to be
+ * right-shifted by 20 so that LFB address can be passed on a 32-bit C2P
+ * register
+ */
+ WREG32_SOC15(MP0, 0, regMPASP_SMN_C2PMSG_36, (fw_pri_mc_addr >> 20));
+
+ ret = psp_wait_for(psp,
+ SOC15_REG_OFFSET(MP0, 0, regMPASP_SMN_C2PMSG_35),
+ 0x80000000, 0x80000000, 0);
+ if (ret)
+ return ret;
+
+ /* Fireup interrupt so PSP can pick up the address */
+ WREG32_SOC15(MP0, 0, regMPASP_SMN_C2PMSG_35, (GFX_CMD_USB_PD_USE_LFB << 16));
+
+ /* FW load takes very long time */
+ do {
+ msleep(1000);
+ reg_status = RREG32_SOC15(MP0, 0, regMPASP_SMN_C2PMSG_35);
+
+ if (reg_status & 0x80000000)
+ goto done;
+
+ } while (++i < USBC_PD_POLLING_LIMIT_S);
+
+ return -ETIME;
+done:
+
+ if ((reg_status & 0xFFFF) != 0) {
+ DRM_ERROR("Address load failed - MP0_SMN_C2PMSG_35.Bits [15:0] = %04x\n",
+ reg_status & 0xFFFF);
+ return -EIO;
+ }
+
+ return 0;
+}
+
+static int psp_v14_0_read_usbc_pd_fw(struct psp_context *psp, uint32_t *fw_ver)
+{
+ struct amdgpu_device *adev = psp->adev;
+ int ret;
+
+ WREG32_SOC15(MP0, 0, regMPASP_SMN_C2PMSG_35, C2PMSG_CMD_GFX_USB_PD_FW_VER);
+
+ ret = psp_wait_for(psp,
+ SOC15_REG_OFFSET(MP0, 0, regMPASP_SMN_C2PMSG_35),
+ 0x80000000, 0x80000000, 0);
+ if (!ret)
+ *fw_ver = RREG32_SOC15(MP0, 0, regMPASP_SMN_C2PMSG_36);
+
+ return ret;
+}
+
+static int psp_v14_0_exec_spi_cmd(struct psp_context *psp, int cmd)
+{
+ uint32_t reg_status = 0, reg_val = 0;
+ struct amdgpu_device *adev = psp->adev;
+ int ret;
+
+ /* clear MBX ready (MBOX_READY_MASK bit is 0) and set update command */
+ reg_val |= (cmd << 16);
+ WREG32_SOC15(MP0, 0, regMPASP_SMN_C2PMSG_115, reg_val);
+
+ /* Ring the doorbell */
+ WREG32_SOC15(MP0, 0, regMPASP_SMN_C2PMSG_73, 1);
+
+ if (cmd == C2PMSG_CMD_SPI_UPDATE_FLASH_IMAGE)
+ ret = psp_wait_for_spirom_update(psp, SOC15_REG_OFFSET(MP0, 0, regMPASP_SMN_C2PMSG_115),
+ MBOX_READY_FLAG, MBOX_READY_MASK, PSP_SPIROM_UPDATE_TIMEOUT);
+ else
+ ret = psp_wait_for(
+ psp, SOC15_REG_OFFSET(MP0, 0, regMPASP_SMN_C2PMSG_115),
+ MBOX_READY_FLAG, MBOX_READY_MASK, 0);
+
+ ret = psp_wait_for(psp,
+ SOC15_REG_OFFSET(MP0, 0, regMPASP_SMN_C2PMSG_115),
+ MBOX_READY_FLAG, MBOX_READY_MASK, 0);
+ if (ret) {
+ dev_err(adev->dev, "SPI cmd %x timed out, ret = %d", cmd, ret);
+ return ret;
+ }
+
+ reg_status = RREG32_SOC15(MP0, 0, regMPASP_SMN_C2PMSG_115);
+ if ((reg_status & 0xFFFF) != 0) {
+ dev_err(adev->dev, "SPI cmd %x failed, fail status = %04x\n",
+ cmd, reg_status & 0xFFFF);
+ return -EIO;
+ }
+
+ return 0;
+}
+
+static int psp_v14_0_update_spirom(struct psp_context *psp,
+ uint64_t fw_pri_mc_addr)
+{
+ struct amdgpu_device *adev = psp->adev;
+ int ret;
+
+ /* Confirm PSP is ready to start */
+ ret = psp_wait_for(psp,
+ SOC15_REG_OFFSET(MP0, 0, regMPASP_SMN_C2PMSG_115),
+ MBOX_READY_FLAG, MBOX_READY_MASK, 0);
+ if (ret) {
+ dev_err(adev->dev, "PSP Not ready to start processing, ret = %d", ret);
+ return ret;
+ }
+
+ WREG32_SOC15(MP0, 0, regMPASP_SMN_C2PMSG_116, lower_32_bits(fw_pri_mc_addr));
+
+ ret = psp_v14_0_exec_spi_cmd(psp, C2PMSG_CMD_SPI_UPDATE_ROM_IMAGE_ADDR_LO);
+ if (ret)
+ return ret;
+
+ WREG32_SOC15(MP0, 0, regMPASP_SMN_C2PMSG_116, upper_32_bits(fw_pri_mc_addr));
+
+ ret = psp_v14_0_exec_spi_cmd(psp, C2PMSG_CMD_SPI_UPDATE_ROM_IMAGE_ADDR_HI);
+ if (ret)
+ return ret;
+
+ psp->vbflash_done = true;
+
+ ret = psp_v14_0_exec_spi_cmd(psp, C2PMSG_CMD_SPI_UPDATE_FLASH_IMAGE);
+ if (ret)
+ return ret;
+
+ return 0;
+}
+
+static int psp_v14_0_vbflash_status(struct psp_context *psp)
+{
+ struct amdgpu_device *adev = psp->adev;
+
+ return RREG32_SOC15(MP0, 0, regMPASP_SMN_C2PMSG_115);
+}
+
+static const struct psp_funcs psp_v14_0_funcs = {
+ .init_microcode = psp_v14_0_init_microcode,
+ .bootloader_load_kdb = psp_v14_0_bootloader_load_kdb,
+ .bootloader_load_spl = psp_v14_0_bootloader_load_spl,
+ .bootloader_load_sysdrv = psp_v14_0_bootloader_load_sysdrv,
+ .bootloader_load_soc_drv = psp_v14_0_bootloader_load_soc_drv,
+ .bootloader_load_intf_drv = psp_v14_0_bootloader_load_intf_drv,
+ .bootloader_load_dbg_drv = psp_v14_0_bootloader_load_dbg_drv,
+ .bootloader_load_ras_drv = psp_v14_0_bootloader_load_ras_drv,
+ .bootloader_load_ipkeymgr_drv = psp_v14_0_bootloader_load_ipkeymgr_drv,
+ .bootloader_load_sos = psp_v14_0_bootloader_load_sos,
+ .ring_create = psp_v14_0_ring_create,
+ .ring_stop = psp_v14_0_ring_stop,
+ .ring_destroy = psp_v14_0_ring_destroy,
+ .ring_get_wptr = psp_v14_0_ring_get_wptr,
+ .ring_set_wptr = psp_v14_0_ring_set_wptr,
+ .mem_training = psp_v14_0_memory_training,
+ .load_usbc_pd_fw = psp_v14_0_load_usbc_pd_fw,
+ .read_usbc_pd_fw = psp_v14_0_read_usbc_pd_fw,
+ .update_spirom = psp_v14_0_update_spirom,
+ .vbflash_stat = psp_v14_0_vbflash_status
+};
+
+void psp_v14_0_set_psp_funcs(struct psp_context *psp)
+{
+ psp->funcs = &psp_v14_0_funcs;
+}
diff --git a/drivers/gpu/drm/amd/amdgpu/psp_v14_0.h b/drivers/gpu/drm/amd/amdgpu/psp_v14_0.h
new file mode 100644
index 000000000000..dd18ba2cfad5
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/psp_v14_0.h
@@ -0,0 +1,32 @@
+/*
+ * Copyright 2023 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+#ifndef __PSP_V14_0_H__
+#define __PSP_V14_0_H__
+
+#include "amdgpu_psp.h"
+
+#define PSP_SPIROM_UPDATE_TIMEOUT 60000 /* 60s */
+
+void psp_v14_0_set_psp_funcs(struct psp_context *psp);
+
+#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/psp_v3_1.c b/drivers/gpu/drm/amd/amdgpu/psp_v3_1.c
index f2e725f72d2f..833830bc3e2e 100644
--- a/drivers/gpu/drm/amd/amdgpu/psp_v3_1.c
+++ b/drivers/gpu/drm/amd/amdgpu/psp_v3_1.c
@@ -44,6 +44,7 @@
MODULE_FIRMWARE("amdgpu/vega10_sos.bin");
MODULE_FIRMWARE("amdgpu/vega10_asd.bin");
+MODULE_FIRMWARE("amdgpu/vega10_cap.bin");
MODULE_FIRMWARE("amdgpu/vega12_sos.bin");
MODULE_FIRMWARE("amdgpu/vega12_asd.bin");
@@ -56,26 +57,18 @@ static int psp_v3_1_ring_stop(struct psp_context *psp,
static int psp_v3_1_init_microcode(struct psp_context *psp)
{
struct amdgpu_device *adev = psp->adev;
- const char *chip_name;
+ char ucode_prefix[30];
int err = 0;
DRM_DEBUG("\n");
- switch (adev->asic_type) {
- case CHIP_VEGA10:
- chip_name = "vega10";
- break;
- case CHIP_VEGA12:
- chip_name = "vega12";
- break;
- default: BUG();
- }
+ amdgpu_ucode_ip_version_decode(adev, MP0_HWIP, ucode_prefix, sizeof(ucode_prefix));
- err = psp_init_sos_microcode(psp, chip_name);
+ err = psp_init_sos_microcode(psp, ucode_prefix);
if (err)
return err;
- err = psp_init_asd_microcode(psp, chip_name);
+ err = psp_init_asd_microcode(psp, ucode_prefix);
if (err)
return err;
@@ -98,14 +91,12 @@ static int psp_v3_1_bootloader_load_sysdrv(struct psp_context *psp)
/* Wait for bootloader to signify that is ready having bit 31 of C2PMSG_35 set to 1 */
ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_35),
- 0x80000000, 0x80000000, false);
+ 0x80000000, 0x80000000, 0);
if (ret)
return ret;
- memset(psp->fw_pri_buf, 0, PSP_1_MEG);
-
/* Copy PSP System Driver binary to memory */
- memcpy(psp->fw_pri_buf, psp->sys_start_addr, psp->sys_bin_size);
+ psp_copy_fw(psp, psp->sys.start_addr, psp->sys.size_bytes);
/* Provide the sys driver to bootloader */
WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_36,
@@ -118,7 +109,7 @@ static int psp_v3_1_bootloader_load_sysdrv(struct psp_context *psp)
mdelay(20);
ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_35),
- 0x80000000, 0x80000000, false);
+ 0x80000000, 0x80000000, 0);
return ret;
}
@@ -139,14 +130,12 @@ static int psp_v3_1_bootloader_load_sos(struct psp_context *psp)
/* Wait for bootloader to signify that is ready having bit 31 of C2PMSG_35 set to 1 */
ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_35),
- 0x80000000, 0x80000000, false);
+ 0x80000000, 0x80000000, 0);
if (ret)
return ret;
- memset(psp->fw_pri_buf, 0, PSP_1_MEG);
-
/* Copy Secure OS binary to PSP memory */
- memcpy(psp->fw_pri_buf, psp->sos_start_addr, psp->sos_bin_size);
+ psp_copy_fw(psp, psp->sos.start_addr, psp->sos.size_bytes);
/* Provide the PSP secure OS to bootloader */
WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_36,
@@ -158,37 +147,11 @@ static int psp_v3_1_bootloader_load_sos(struct psp_context *psp)
/* there might be handshake issue with hardware which needs delay */
mdelay(20);
ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_81),
- RREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_81),
- 0, true);
+ RREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_81), 0,
+ PSP_WAITREG_CHANGED);
return ret;
}
-static int psp_v3_1_ring_init(struct psp_context *psp,
- enum psp_ring_type ring_type)
-{
- int ret = 0;
- struct psp_ring *ring;
- struct amdgpu_device *adev = psp->adev;
-
- ring = &psp->km_ring;
-
- ring->ring_type = ring_type;
-
- /* allocate 4k Page of Local Frame Buffer memory for ring */
- ring->ring_size = 0x1000;
- ret = amdgpu_bo_create_kernel(adev, ring->ring_size, PAGE_SIZE,
- AMDGPU_GEM_DOMAIN_VRAM,
- &adev->firmware.rbuf,
- &ring->ring_mem_mc_addr,
- (void **)&ring->ring_mem);
- if (ret) {
- ring->ring_size = 0;
- return ret;
- }
-
- return 0;
-}
-
static void psp_v3_1_reroute_ih(struct psp_context *psp)
{
struct amdgpu_device *adev = psp->adev;
@@ -205,7 +168,7 @@ static void psp_v3_1_reroute_ih(struct psp_context *psp)
mdelay(20);
psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_64),
- 0x80000000, 0x8000FFFF, false);
+ 0x80000000, 0x8000FFFF, 0);
/* Change IH ring for UMC */
tmp = REG_SET_FIELD(0, IH_CLIENT_CFG_DATA, CREDIT_RETURN_ADDR, 0x1216b);
@@ -217,7 +180,7 @@ static void psp_v3_1_reroute_ih(struct psp_context *psp)
mdelay(20);
psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_64),
- 0x80000000, 0x8000FFFF, false);
+ 0x80000000, 0x8000FFFF, 0);
}
static int psp_v3_1_ring_create(struct psp_context *psp,
@@ -231,6 +194,7 @@ static int psp_v3_1_ring_create(struct psp_context *psp,
psp_v3_1_reroute_ih(psp);
if (amdgpu_sriov_vf(adev)) {
+ ring->ring_wptr = 0;
ret = psp_v3_1_ring_stop(psp, ring_type);
if (ret) {
DRM_ERROR("psp_v3_1_ring_stop_sriov failed!\n");
@@ -253,9 +217,9 @@ static int psp_v3_1_ring_create(struct psp_context *psp,
mdelay(20);
/* Wait for response flag (bit 31) in C2PMSG_101 */
- ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0,
- mmMP0_SMN_C2PMSG_101), 0x80000000,
- 0x8000FFFF, false);
+ ret = psp_wait_for(
+ psp, SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_101),
+ 0x80000000, 0x8000FFFF, 0);
} else {
/* Write low address of the ring to C2PMSG_69 */
@@ -276,10 +240,9 @@ static int psp_v3_1_ring_create(struct psp_context *psp,
mdelay(20);
/* Wait for response flag (bit 31) in C2PMSG_64 */
- ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0,
- mmMP0_SMN_C2PMSG_64), 0x80000000,
- 0x8000FFFF, false);
-
+ ret = psp_wait_for(
+ psp, SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_64),
+ 0x80000000, 0x8000FFFF, 0);
}
return ret;
}
@@ -303,11 +266,13 @@ static int psp_v3_1_ring_stop(struct psp_context *psp,
/* Wait for response flag (bit 31) */
if (amdgpu_sriov_vf(adev))
- ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_101),
- 0x80000000, 0x80000000, false);
+ ret = psp_wait_for(
+ psp, SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_101),
+ 0x80000000, 0x80000000, 0);
else
- ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_64),
- 0x80000000, 0x80000000, false);
+ ret = psp_wait_for(
+ psp, SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_64),
+ 0x80000000, 0x80000000, 0);
return ret;
}
@@ -347,7 +312,7 @@ static int psp_v3_1_mode1_reset(struct psp_context *psp)
offset = SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_64);
- ret = psp_wait_for(psp, offset, 0x80000000, 0x8000FFFF, false);
+ ret = psp_wait_for(psp, offset, 0x80000000, 0x8000FFFF, 0);
if (ret) {
DRM_INFO("psp is not working correctly before mode1 reset!\n");
@@ -361,7 +326,7 @@ static int psp_v3_1_mode1_reset(struct psp_context *psp)
offset = SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_33);
- ret = psp_wait_for(psp, offset, 0x80000000, 0x80000000, false);
+ ret = psp_wait_for(psp, offset, 0x80000000, 0x80000000, 0);
if (ret) {
DRM_INFO("psp mode 1 reset failed!\n");
@@ -379,7 +344,7 @@ static uint32_t psp_v3_1_ring_get_wptr(struct psp_context *psp)
struct amdgpu_device *adev = psp->adev;
if (amdgpu_sriov_vf(adev))
- data = RREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_102);
+ data = psp->km_ring.ring_wptr;
else
data = RREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_67);
return data;
@@ -394,6 +359,7 @@ static void psp_v3_1_ring_set_wptr(struct psp_context *psp, uint32_t value)
/* send interrupt to PSP for SRIOV ring write pointer update */
WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_101,
GFX_CTRL_CMD_ID_CONSUME_CMD);
+ psp->km_ring.ring_wptr = value;
} else
WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_67, value);
}
@@ -402,7 +368,6 @@ static const struct psp_funcs psp_v3_1_funcs = {
.init_microcode = psp_v3_1_init_microcode,
.bootloader_load_sysdrv = psp_v3_1_bootloader_load_sysdrv,
.bootloader_load_sos = psp_v3_1_bootloader_load_sos,
- .ring_init = psp_v3_1_ring_init,
.ring_create = psp_v3_1_ring_create,
.ring_stop = psp_v3_1_ring_stop,
.ring_destroy = psp_v3_1_ring_destroy,
diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c b/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c
index eb5dc6c5b46e..92ce580647cd 100644
--- a/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c
+++ b/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c
@@ -57,22 +57,19 @@ static void sdma_v2_4_set_irq_funcs(struct amdgpu_device *adev);
MODULE_FIRMWARE("amdgpu/topaz_sdma.bin");
MODULE_FIRMWARE("amdgpu/topaz_sdma1.bin");
-static const u32 sdma_offsets[SDMA_MAX_INSTANCE] =
-{
+static const u32 sdma_offsets[SDMA_MAX_INSTANCE] = {
SDMA0_REGISTER_OFFSET,
SDMA1_REGISTER_OFFSET
};
-static const u32 golden_settings_iceland_a11[] =
-{
+static const u32 golden_settings_iceland_a11[] = {
mmSDMA0_CHICKEN_BITS, 0xfc910007, 0x00810007,
mmSDMA0_CLK_CTRL, 0xff000fff, 0x00000000,
mmSDMA1_CHICKEN_BITS, 0xfc910007, 0x00810007,
mmSDMA1_CLK_CTRL, 0xff000fff, 0x00000000,
};
-static const u32 iceland_mgcg_cgcg_init[] =
-{
+static const u32 iceland_mgcg_cgcg_init[] = {
mmSDMA0_CLK_CTRL, 0xff000ff0, 0x00000100,
mmSDMA1_CLK_CTRL, 0xff000ff0, 0x00000100
};
@@ -113,10 +110,9 @@ static void sdma_v2_4_init_golden_registers(struct amdgpu_device *adev)
static void sdma_v2_4_free_microcode(struct amdgpu_device *adev)
{
int i;
- for (i = 0; i < adev->sdma.num_instances; i++) {
- release_firmware(adev->sdma.instance[i].fw);
- adev->sdma.instance[i].fw = NULL;
- }
+
+ for (i = 0; i < adev->sdma.num_instances; i++)
+ amdgpu_ucode_release(&adev->sdma.instance[i].fw);
}
/**
@@ -131,7 +127,6 @@ static void sdma_v2_4_free_microcode(struct amdgpu_device *adev)
static int sdma_v2_4_init_microcode(struct amdgpu_device *adev)
{
const char *chip_name;
- char fw_name[30];
int err = 0, i;
struct amdgpu_firmware_info *info = NULL;
const struct common_firmware_header *header = NULL;
@@ -143,18 +138,19 @@ static int sdma_v2_4_init_microcode(struct amdgpu_device *adev)
case CHIP_TOPAZ:
chip_name = "topaz";
break;
- default: BUG();
+ default:
+ BUG();
}
for (i = 0; i < adev->sdma.num_instances; i++) {
if (i == 0)
- snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_sdma.bin", chip_name);
+ err = amdgpu_ucode_request(adev, &adev->sdma.instance[i].fw,
+ AMDGPU_UCODE_REQUIRED,
+ "amdgpu/%s_sdma.bin", chip_name);
else
- snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_sdma1.bin", chip_name);
- err = request_firmware(&adev->sdma.instance[i].fw, fw_name, adev->dev);
- if (err)
- goto out;
- err = amdgpu_ucode_validate(adev->sdma.instance[i].fw);
+ err = amdgpu_ucode_request(adev, &adev->sdma.instance[i].fw,
+ AMDGPU_UCODE_REQUIRED,
+ "amdgpu/%s_sdma1.bin", chip_name);
if (err)
goto out;
hdr = (const struct sdma_firmware_header_v1_0 *)adev->sdma.instance[i].fw->data;
@@ -175,11 +171,10 @@ static int sdma_v2_4_init_microcode(struct amdgpu_device *adev)
out:
if (err) {
- pr_err("sdma_v2_4: Failed to load firmware \"%s\"\n", fw_name);
- for (i = 0; i < adev->sdma.num_instances; i++) {
- release_firmware(adev->sdma.instance[i].fw);
- adev->sdma.instance[i].fw = NULL;
- }
+ pr_err("sdma_v2_4: Failed to load firmware \"%s_sdma%s.bin\"\n",
+ chip_name, i == 0 ? "" : "1");
+ for (i = 0; i < adev->sdma.num_instances; i++)
+ amdgpu_ucode_release(&adev->sdma.instance[i].fw);
}
return err;
}
@@ -194,7 +189,7 @@ out:
static uint64_t sdma_v2_4_ring_get_rptr(struct amdgpu_ring *ring)
{
/* XXX check if swapping is necessary on BE */
- return ring->adev->wb.wb[ring->rptr_offs] >> 2;
+ return *ring->rptr_cpu_addr >> 2;
}
/**
@@ -223,7 +218,7 @@ static void sdma_v2_4_ring_set_wptr(struct amdgpu_ring *ring)
{
struct amdgpu_device *adev = ring->adev;
- WREG32(mmSDMA0_GFX_RB_WPTR + sdma_offsets[ring->me], lower_32_bits(ring->wptr) << 2);
+ WREG32(mmSDMA0_GFX_RB_WPTR + sdma_offsets[ring->me], ring->wptr << 2);
}
static void sdma_v2_4_ring_insert_nop(struct amdgpu_ring *ring, uint32_t count)
@@ -271,7 +266,7 @@ static void sdma_v2_4_ring_emit_ib(struct amdgpu_ring *ring,
}
/**
- * sdma_v2_4_hdp_flush_ring_emit - emit an hdp flush on the DMA ring
+ * sdma_v2_4_ring_emit_hdp_flush - emit an hdp flush on the DMA ring
*
* @ring: amdgpu ring pointer
*
@@ -342,15 +337,9 @@ static void sdma_v2_4_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, u64 se
*/
static void sdma_v2_4_gfx_stop(struct amdgpu_device *adev)
{
- struct amdgpu_ring *sdma0 = &adev->sdma.instance[0].ring;
- struct amdgpu_ring *sdma1 = &adev->sdma.instance[1].ring;
u32 rb_cntl, ib_cntl;
int i;
- if ((adev->mman.buffer_funcs_ring == sdma0) ||
- (adev->mman.buffer_funcs_ring == sdma1))
- amdgpu_ttm_set_buffer_funcs_status(adev, false);
-
for (i = 0; i < adev->sdma.num_instances; i++) {
rb_cntl = RREG32(mmSDMA0_GFX_RB_CNTL + sdma_offsets[i]);
rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_GFX_RB_CNTL, RB_ENABLE, 0);
@@ -414,12 +403,10 @@ static int sdma_v2_4_gfx_resume(struct amdgpu_device *adev)
struct amdgpu_ring *ring;
u32 rb_cntl, ib_cntl;
u32 rb_bufsz;
- u32 wb_offset;
int i, j, r;
for (i = 0; i < adev->sdma.num_instances; i++) {
ring = &adev->sdma.instance[i].ring;
- wb_offset = (ring->rptr_offs * 4);
mutex_lock(&adev->srbm_mutex);
for (j = 0; j < 16; j++) {
@@ -455,9 +442,9 @@ static int sdma_v2_4_gfx_resume(struct amdgpu_device *adev)
/* set the wb address whether it's enabled or not */
WREG32(mmSDMA0_GFX_RB_RPTR_ADDR_HI + sdma_offsets[i],
- upper_32_bits(adev->wb.gpu_addr + wb_offset) & 0xFFFFFFFF);
+ upper_32_bits(ring->rptr_gpu_addr) & 0xFFFFFFFF);
WREG32(mmSDMA0_GFX_RB_RPTR_ADDR_LO + sdma_offsets[i],
- lower_32_bits(adev->wb.gpu_addr + wb_offset) & 0xFFFFFFFC);
+ lower_32_bits(ring->rptr_gpu_addr) & 0xFFFFFFFC);
rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_GFX_RB_CNTL, RPTR_WRITEBACK_ENABLE, 1);
@@ -465,7 +452,7 @@ static int sdma_v2_4_gfx_resume(struct amdgpu_device *adev)
WREG32(mmSDMA0_GFX_RB_BASE_HI + sdma_offsets[i], ring->gpu_addr >> 40);
ring->wptr = 0;
- WREG32(mmSDMA0_GFX_RB_WPTR + sdma_offsets[i], lower_32_bits(ring->wptr) << 2);
+ WREG32(mmSDMA0_GFX_RB_WPTR + sdma_offsets[i], ring->wptr << 2);
/* enable DMA RB */
rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_GFX_RB_CNTL, RB_ENABLE, 1);
@@ -478,8 +465,6 @@ static int sdma_v2_4_gfx_resume(struct amdgpu_device *adev)
#endif
/* enable DMA IBs */
WREG32(mmSDMA0_GFX_IB_CNTL + sdma_offsets[i], ib_cntl);
-
- ring->sched.ready = true;
}
sdma_v2_4_enable(adev, true);
@@ -488,9 +473,6 @@ static int sdma_v2_4_gfx_resume(struct amdgpu_device *adev)
r = amdgpu_ring_test_helper(ring);
if (r)
return r;
-
- if (adev->mman.buffer_funcs_ring == ring)
- amdgpu_ttm_set_buffer_funcs_status(adev, true);
}
return 0;
@@ -651,7 +633,7 @@ static int sdma_v2_4_ring_test_ib(struct amdgpu_ring *ring, long timeout)
r = -EINVAL;
err1:
- amdgpu_ib_free(adev, &ib, NULL);
+ amdgpu_ib_free(&ib, NULL);
dma_fence_put(f);
err0:
amdgpu_device_wb_free(adev, index);
@@ -827,12 +809,17 @@ static void sdma_v2_4_ring_emit_wreg(struct amdgpu_ring *ring,
amdgpu_ring_write(ring, val);
}
-static int sdma_v2_4_early_init(void *handle)
+static int sdma_v2_4_early_init(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
+ int r;
adev->sdma.num_instances = SDMA_MAX_INSTANCE;
+ r = sdma_v2_4_init_microcode(adev);
+ if (r)
+ return r;
+
sdma_v2_4_set_ring_funcs(adev);
sdma_v2_4_set_buffer_funcs(adev);
sdma_v2_4_set_vm_pte_funcs(adev);
@@ -841,11 +828,11 @@ static int sdma_v2_4_early_init(void *handle)
return 0;
}
-static int sdma_v2_4_sw_init(void *handle)
+static int sdma_v2_4_sw_init(struct amdgpu_ip_block *ip_block)
{
struct amdgpu_ring *ring;
int r, i;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
/* SDMA trap event */
r = amdgpu_irq_add_id(adev, AMDGPU_IRQ_CLIENTID_LEGACY, VISLANDS30_IV_SRCID_SDMA_TRAP,
@@ -865,23 +852,15 @@ static int sdma_v2_4_sw_init(void *handle)
if (r)
return r;
- r = sdma_v2_4_init_microcode(adev);
- if (r) {
- DRM_ERROR("Failed to load sdma firmware!\n");
- return r;
- }
-
for (i = 0; i < adev->sdma.num_instances; i++) {
ring = &adev->sdma.instance[i].ring;
ring->ring_obj = NULL;
ring->use_doorbell = false;
sprintf(ring->name, "sdma%d", i);
- r = amdgpu_ring_init(adev, ring, 1024,
- &adev->sdma.trap_irq,
- (i == 0) ?
- AMDGPU_SDMA_IRQ_INSTANCE0 :
+ r = amdgpu_ring_init(adev, ring, 1024, &adev->sdma.trap_irq,
+ (i == 0) ? AMDGPU_SDMA_IRQ_INSTANCE0 :
AMDGPU_SDMA_IRQ_INSTANCE1,
- AMDGPU_RING_PRIO_DEFAULT);
+ AMDGPU_RING_PRIO_DEFAULT, NULL);
if (r)
return r;
}
@@ -889,9 +868,9 @@ static int sdma_v2_4_sw_init(void *handle)
return r;
}
-static int sdma_v2_4_sw_fini(void *handle)
+static int sdma_v2_4_sw_fini(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
int i;
for (i = 0; i < adev->sdma.num_instances; i++)
@@ -901,10 +880,10 @@ static int sdma_v2_4_sw_fini(void *handle)
return 0;
}
-static int sdma_v2_4_hw_init(void *handle)
+static int sdma_v2_4_hw_init(struct amdgpu_ip_block *ip_block)
{
int r;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
sdma_v2_4_init_golden_registers(adev);
@@ -915,32 +894,26 @@ static int sdma_v2_4_hw_init(void *handle)
return r;
}
-static int sdma_v2_4_hw_fini(void *handle)
+static int sdma_v2_4_hw_fini(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
-
- sdma_v2_4_enable(adev, false);
+ sdma_v2_4_enable(ip_block->adev, false);
return 0;
}
-static int sdma_v2_4_suspend(void *handle)
+static int sdma_v2_4_suspend(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
-
- return sdma_v2_4_hw_fini(adev);
+ return sdma_v2_4_hw_fini(ip_block);
}
-static int sdma_v2_4_resume(void *handle)
+static int sdma_v2_4_resume(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
-
- return sdma_v2_4_hw_init(adev);
+ return sdma_v2_4_hw_init(ip_block);
}
-static bool sdma_v2_4_is_idle(void *handle)
+static bool sdma_v2_4_is_idle(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
u32 tmp = RREG32(mmSRBM_STATUS2);
if (tmp & (SRBM_STATUS2__SDMA_BUSY_MASK |
@@ -950,11 +923,11 @@ static bool sdma_v2_4_is_idle(void *handle)
return true;
}
-static int sdma_v2_4_wait_for_idle(void *handle)
+static int sdma_v2_4_wait_for_idle(struct amdgpu_ip_block *ip_block)
{
unsigned i;
u32 tmp;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
for (i = 0; i < adev->usec_timeout; i++) {
tmp = RREG32(mmSRBM_STATUS2) & (SRBM_STATUS2__SDMA_BUSY_MASK |
@@ -967,10 +940,10 @@ static int sdma_v2_4_wait_for_idle(void *handle)
return -ETIMEDOUT;
}
-static int sdma_v2_4_soft_reset(void *handle)
+static int sdma_v2_4_soft_reset(struct amdgpu_ip_block *ip_block)
{
u32 srbm_soft_reset = 0;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
u32 tmp = RREG32(mmSRBM_STATUS2);
if (tmp & SRBM_STATUS2__SDMA_BUSY_MASK) {
@@ -1109,14 +1082,14 @@ static int sdma_v2_4_process_illegal_inst_irq(struct amdgpu_device *adev,
return 0;
}
-static int sdma_v2_4_set_clockgating_state(void *handle,
+static int sdma_v2_4_set_clockgating_state(struct amdgpu_ip_block *ip_block,
enum amd_clockgating_state state)
{
/* XXX handled via the smc on VI */
return 0;
}
-static int sdma_v2_4_set_powergating_state(void *handle,
+static int sdma_v2_4_set_powergating_state(struct amdgpu_ip_block *ip_block,
enum amd_powergating_state state)
{
return 0;
@@ -1125,7 +1098,6 @@ static int sdma_v2_4_set_powergating_state(void *handle,
static const struct amd_ip_funcs sdma_v2_4_ip_funcs = {
.name = "sdma_v2_4",
.early_init = sdma_v2_4_early_init,
- .late_init = NULL,
.sw_init = sdma_v2_4_sw_init,
.sw_fini = sdma_v2_4_sw_fini,
.hw_init = sdma_v2_4_hw_init,
@@ -1144,6 +1116,7 @@ static const struct amdgpu_ring_funcs sdma_v2_4_ring_funcs = {
.align_mask = 0xf,
.nop = SDMA_PKT_NOP_HEADER_OP(SDMA_OP_NOP),
.support_64bit_ptrs = false,
+ .secure_submission_supported = true,
.get_rptr = sdma_v2_4_ring_get_rptr,
.get_wptr = sdma_v2_4_ring_get_wptr,
.set_wptr = sdma_v2_4_ring_set_wptr,
@@ -1199,7 +1172,7 @@ static void sdma_v2_4_set_irq_funcs(struct amdgpu_device *adev)
* @src_offset: src GPU address
* @dst_offset: dst GPU address
* @byte_count: number of bytes to xfer
- * @tmz: unused
+ * @copy_flags: unused
*
* Copy GPU buffers using the DMA engine (VI).
* Used by the amdgpu ttm implementation to move pages if
@@ -1209,7 +1182,7 @@ static void sdma_v2_4_emit_copy_buffer(struct amdgpu_ib *ib,
uint64_t src_offset,
uint64_t dst_offset,
uint32_t byte_count,
- bool tmz)
+ uint32_t copy_flags)
{
ib->ptr[ib->length_dw++] = SDMA_PKT_HEADER_OP(SDMA_OP_COPY) |
SDMA_PKT_HEADER_SUB_OP(SDMA_SUBOP_COPY_LINEAR);
@@ -1279,8 +1252,7 @@ static void sdma_v2_4_set_vm_pte_funcs(struct amdgpu_device *adev)
adev->vm_manager.vm_pte_num_scheds = adev->sdma.num_instances;
}
-const struct amdgpu_ip_block_version sdma_v2_4_ip_block =
-{
+const struct amdgpu_ip_block_version sdma_v2_4_ip_block = {
.type = AMD_IP_BLOCK_TYPE_SDMA,
.major = 2,
.minor = 4,
diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c
index ad308d8c6d30..1c076bd1cf73 100644
--- a/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c
@@ -250,10 +250,9 @@ static void sdma_v3_0_init_golden_registers(struct amdgpu_device *adev)
static void sdma_v3_0_free_microcode(struct amdgpu_device *adev)
{
int i;
- for (i = 0; i < adev->sdma.num_instances; i++) {
- release_firmware(adev->sdma.instance[i].fw);
- adev->sdma.instance[i].fw = NULL;
- }
+
+ for (i = 0; i < adev->sdma.num_instances; i++)
+ amdgpu_ucode_release(&adev->sdma.instance[i].fw);
}
/**
@@ -268,7 +267,6 @@ static void sdma_v3_0_free_microcode(struct amdgpu_device *adev)
static int sdma_v3_0_init_microcode(struct amdgpu_device *adev)
{
const char *chip_name;
- char fw_name[30];
int err = 0, i;
struct amdgpu_firmware_info *info = NULL;
const struct common_firmware_header *header = NULL;
@@ -306,13 +304,13 @@ static int sdma_v3_0_init_microcode(struct amdgpu_device *adev)
for (i = 0; i < adev->sdma.num_instances; i++) {
if (i == 0)
- snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_sdma.bin", chip_name);
+ err = amdgpu_ucode_request(adev, &adev->sdma.instance[i].fw,
+ AMDGPU_UCODE_REQUIRED,
+ "amdgpu/%s_sdma.bin", chip_name);
else
- snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_sdma1.bin", chip_name);
- err = request_firmware(&adev->sdma.instance[i].fw, fw_name, adev->dev);
- if (err)
- goto out;
- err = amdgpu_ucode_validate(adev->sdma.instance[i].fw);
+ err = amdgpu_ucode_request(adev, &adev->sdma.instance[i].fw,
+ AMDGPU_UCODE_REQUIRED,
+ "amdgpu/%s_sdma1.bin", chip_name);
if (err)
goto out;
hdr = (const struct sdma_firmware_header_v1_0 *)adev->sdma.instance[i].fw->data;
@@ -331,11 +329,10 @@ static int sdma_v3_0_init_microcode(struct amdgpu_device *adev)
}
out:
if (err) {
- pr_err("sdma_v3_0: Failed to load firmware \"%s\"\n", fw_name);
- for (i = 0; i < adev->sdma.num_instances; i++) {
- release_firmware(adev->sdma.instance[i].fw);
- adev->sdma.instance[i].fw = NULL;
- }
+ pr_err("sdma_v3_0: Failed to load firmware \"%s_sdma%s.bin\"\n",
+ chip_name, i == 0 ? "" : "1");
+ for (i = 0; i < adev->sdma.num_instances; i++)
+ amdgpu_ucode_release(&adev->sdma.instance[i].fw);
}
return err;
}
@@ -350,7 +347,7 @@ out:
static uint64_t sdma_v3_0_ring_get_rptr(struct amdgpu_ring *ring)
{
/* XXX check if swapping is necessary on BE */
- return ring->adev->wb.wb[ring->rptr_offs] >> 2;
+ return *ring->rptr_cpu_addr >> 2;
}
/**
@@ -367,7 +364,7 @@ static uint64_t sdma_v3_0_ring_get_wptr(struct amdgpu_ring *ring)
if (ring->use_doorbell || ring->use_pollmem) {
/* XXX check if swapping is necessary on BE */
- wptr = ring->adev->wb.wb[ring->wptr_offs] >> 2;
+ wptr = *ring->wptr_cpu_addr >> 2;
} else {
wptr = RREG32(mmSDMA0_GFX_RB_WPTR + sdma_offsets[ring->me]) >> 2;
}
@@ -387,16 +384,16 @@ static void sdma_v3_0_ring_set_wptr(struct amdgpu_ring *ring)
struct amdgpu_device *adev = ring->adev;
if (ring->use_doorbell) {
- u32 *wb = (u32 *)&adev->wb.wb[ring->wptr_offs];
+ u32 *wb = (u32 *)ring->wptr_cpu_addr;
/* XXX check if swapping is necessary on BE */
- WRITE_ONCE(*wb, (lower_32_bits(ring->wptr) << 2));
- WDOORBELL32(ring->doorbell_index, lower_32_bits(ring->wptr) << 2);
+ WRITE_ONCE(*wb, ring->wptr << 2);
+ WDOORBELL32(ring->doorbell_index, ring->wptr << 2);
} else if (ring->use_pollmem) {
- u32 *wb = (u32 *)&adev->wb.wb[ring->wptr_offs];
+ u32 *wb = (u32 *)ring->wptr_cpu_addr;
- WRITE_ONCE(*wb, (lower_32_bits(ring->wptr) << 2));
+ WRITE_ONCE(*wb, ring->wptr << 2);
} else {
- WREG32(mmSDMA0_GFX_RB_WPTR + sdma_offsets[ring->me], lower_32_bits(ring->wptr) << 2);
+ WREG32(mmSDMA0_GFX_RB_WPTR + sdma_offsets[ring->me], ring->wptr << 2);
}
}
@@ -516,15 +513,9 @@ static void sdma_v3_0_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, u64 se
*/
static void sdma_v3_0_gfx_stop(struct amdgpu_device *adev)
{
- struct amdgpu_ring *sdma0 = &adev->sdma.instance[0].ring;
- struct amdgpu_ring *sdma1 = &adev->sdma.instance[1].ring;
u32 rb_cntl, ib_cntl;
int i;
- if ((adev->mman.buffer_funcs_ring == sdma0) ||
- (adev->mman.buffer_funcs_ring == sdma1))
- amdgpu_ttm_set_buffer_funcs_status(adev, false);
-
for (i = 0; i < adev->sdma.num_instances; i++) {
rb_cntl = RREG32(mmSDMA0_GFX_RB_CNTL + sdma_offsets[i]);
rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_GFX_RB_CNTL, RB_ENABLE, 0);
@@ -649,7 +640,6 @@ static int sdma_v3_0_gfx_resume(struct amdgpu_device *adev)
struct amdgpu_ring *ring;
u32 rb_cntl, ib_cntl, wptr_poll_cntl;
u32 rb_bufsz;
- u32 wb_offset;
u32 doorbell;
u64 wptr_gpu_addr;
int i, j, r;
@@ -657,7 +647,6 @@ static int sdma_v3_0_gfx_resume(struct amdgpu_device *adev)
for (i = 0; i < adev->sdma.num_instances; i++) {
ring = &adev->sdma.instance[i].ring;
amdgpu_ring_clear_ring(ring);
- wb_offset = (ring->rptr_offs * 4);
mutex_lock(&adev->srbm_mutex);
for (j = 0; j < 16; j++) {
@@ -694,9 +683,9 @@ static int sdma_v3_0_gfx_resume(struct amdgpu_device *adev)
/* set the wb address whether it's enabled or not */
WREG32(mmSDMA0_GFX_RB_RPTR_ADDR_HI + sdma_offsets[i],
- upper_32_bits(adev->wb.gpu_addr + wb_offset) & 0xFFFFFFFF);
+ upper_32_bits(ring->rptr_gpu_addr) & 0xFFFFFFFF);
WREG32(mmSDMA0_GFX_RB_RPTR_ADDR_LO + sdma_offsets[i],
- lower_32_bits(adev->wb.gpu_addr + wb_offset) & 0xFFFFFFFC);
+ lower_32_bits(ring->rptr_gpu_addr) & 0xFFFFFFFC);
rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_GFX_RB_CNTL, RPTR_WRITEBACK_ENABLE, 1);
@@ -715,7 +704,7 @@ static int sdma_v3_0_gfx_resume(struct amdgpu_device *adev)
WREG32(mmSDMA0_GFX_DOORBELL + sdma_offsets[i], doorbell);
/* setup the wptr shadow polling */
- wptr_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
+ wptr_gpu_addr = ring->wptr_gpu_addr;
WREG32(mmSDMA0_GFX_RB_WPTR_POLL_ADDR_LO + sdma_offsets[i],
lower_32_bits(wptr_gpu_addr));
@@ -723,7 +712,7 @@ static int sdma_v3_0_gfx_resume(struct amdgpu_device *adev)
upper_32_bits(wptr_gpu_addr));
wptr_poll_cntl = RREG32(mmSDMA0_GFX_RB_WPTR_POLL_CNTL + sdma_offsets[i]);
if (ring->use_pollmem) {
- /*wptr polling is not enogh fast, directly clean the wptr register */
+ /*wptr polling is not enough fast, directly clean the wptr register */
WREG32(mmSDMA0_GFX_RB_WPTR + sdma_offsets[i], 0);
wptr_poll_cntl = REG_SET_FIELD(wptr_poll_cntl,
SDMA0_GFX_RB_WPTR_POLL_CNTL,
@@ -746,8 +735,6 @@ static int sdma_v3_0_gfx_resume(struct amdgpu_device *adev)
#endif
/* enable DMA IBs */
WREG32(mmSDMA0_GFX_IB_CNTL + sdma_offsets[i], ib_cntl);
-
- ring->sched.ready = true;
}
/* unhalt the MEs */
@@ -760,9 +747,6 @@ static int sdma_v3_0_gfx_resume(struct amdgpu_device *adev)
r = amdgpu_ring_test_helper(ring);
if (r)
return r;
-
- if (adev->mman.buffer_funcs_ring == ring)
- amdgpu_ttm_set_buffer_funcs_status(adev, true);
}
return 0;
@@ -922,7 +906,7 @@ static int sdma_v3_0_ring_test_ib(struct amdgpu_ring *ring, long timeout)
else
r = -EINVAL;
err1:
- amdgpu_ib_free(adev, &ib, NULL);
+ amdgpu_ib_free(&ib, NULL);
dma_fence_put(f);
err0:
amdgpu_device_wb_free(adev, index);
@@ -1098,9 +1082,10 @@ static void sdma_v3_0_ring_emit_wreg(struct amdgpu_ring *ring,
amdgpu_ring_write(ring, val);
}
-static int sdma_v3_0_early_init(void *handle)
+static int sdma_v3_0_early_init(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
+ int r;
switch (adev->asic_type) {
case CHIP_STONEY:
@@ -1111,6 +1096,10 @@ static int sdma_v3_0_early_init(void *handle)
break;
}
+ r = sdma_v3_0_init_microcode(adev);
+ if (r)
+ return r;
+
sdma_v3_0_set_ring_funcs(adev);
sdma_v3_0_set_buffer_funcs(adev);
sdma_v3_0_set_vm_pte_funcs(adev);
@@ -1119,11 +1108,11 @@ static int sdma_v3_0_early_init(void *handle)
return 0;
}
-static int sdma_v3_0_sw_init(void *handle)
+static int sdma_v3_0_sw_init(struct amdgpu_ip_block *ip_block)
{
struct amdgpu_ring *ring;
int r, i;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
/* SDMA trap event */
r = amdgpu_irq_add_id(adev, AMDGPU_IRQ_CLIENTID_LEGACY, VISLANDS30_IV_SRCID_SDMA_TRAP,
@@ -1143,12 +1132,6 @@ static int sdma_v3_0_sw_init(void *handle)
if (r)
return r;
- r = sdma_v3_0_init_microcode(adev);
- if (r) {
- DRM_ERROR("Failed to load sdma firmware!\n");
- return r;
- }
-
for (i = 0; i < adev->sdma.num_instances; i++) {
ring = &adev->sdma.instance[i].ring;
ring->ring_obj = NULL;
@@ -1160,12 +1143,10 @@ static int sdma_v3_0_sw_init(void *handle)
}
sprintf(ring->name, "sdma%d", i);
- r = amdgpu_ring_init(adev, ring, 1024,
- &adev->sdma.trap_irq,
- (i == 0) ?
- AMDGPU_SDMA_IRQ_INSTANCE0 :
+ r = amdgpu_ring_init(adev, ring, 1024, &adev->sdma.trap_irq,
+ (i == 0) ? AMDGPU_SDMA_IRQ_INSTANCE0 :
AMDGPU_SDMA_IRQ_INSTANCE1,
- AMDGPU_RING_PRIO_DEFAULT);
+ AMDGPU_RING_PRIO_DEFAULT, NULL);
if (r)
return r;
}
@@ -1173,9 +1154,9 @@ static int sdma_v3_0_sw_init(void *handle)
return r;
}
-static int sdma_v3_0_sw_fini(void *handle)
+static int sdma_v3_0_sw_fini(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
int i;
for (i = 0; i < adev->sdma.num_instances; i++)
@@ -1185,10 +1166,10 @@ static int sdma_v3_0_sw_fini(void *handle)
return 0;
}
-static int sdma_v3_0_hw_init(void *handle)
+static int sdma_v3_0_hw_init(struct amdgpu_ip_block *ip_block)
{
int r;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
sdma_v3_0_init_golden_registers(adev);
@@ -1199,9 +1180,9 @@ static int sdma_v3_0_hw_init(void *handle)
return r;
}
-static int sdma_v3_0_hw_fini(void *handle)
+static int sdma_v3_0_hw_fini(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
sdma_v3_0_ctx_switch_enable(adev, false);
sdma_v3_0_enable(adev, false);
@@ -1209,23 +1190,19 @@ static int sdma_v3_0_hw_fini(void *handle)
return 0;
}
-static int sdma_v3_0_suspend(void *handle)
+static int sdma_v3_0_suspend(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
-
- return sdma_v3_0_hw_fini(adev);
+ return sdma_v3_0_hw_fini(ip_block);
}
-static int sdma_v3_0_resume(void *handle)
+static int sdma_v3_0_resume(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
-
- return sdma_v3_0_hw_init(adev);
+ return sdma_v3_0_hw_init(ip_block);
}
-static bool sdma_v3_0_is_idle(void *handle)
+static bool sdma_v3_0_is_idle(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
u32 tmp = RREG32(mmSRBM_STATUS2);
if (tmp & (SRBM_STATUS2__SDMA_BUSY_MASK |
@@ -1235,11 +1212,11 @@ static bool sdma_v3_0_is_idle(void *handle)
return true;
}
-static int sdma_v3_0_wait_for_idle(void *handle)
+static int sdma_v3_0_wait_for_idle(struct amdgpu_ip_block *ip_block)
{
unsigned i;
u32 tmp;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
for (i = 0; i < adev->usec_timeout; i++) {
tmp = RREG32(mmSRBM_STATUS2) & (SRBM_STATUS2__SDMA_BUSY_MASK |
@@ -1252,9 +1229,9 @@ static int sdma_v3_0_wait_for_idle(void *handle)
return -ETIMEDOUT;
}
-static bool sdma_v3_0_check_soft_reset(void *handle)
+static bool sdma_v3_0_check_soft_reset(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
u32 srbm_soft_reset = 0;
u32 tmp = RREG32(mmSRBM_STATUS2);
@@ -1273,9 +1250,9 @@ static bool sdma_v3_0_check_soft_reset(void *handle)
}
}
-static int sdma_v3_0_pre_soft_reset(void *handle)
+static int sdma_v3_0_pre_soft_reset(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
u32 srbm_soft_reset = 0;
if (!adev->sdma.srbm_soft_reset)
@@ -1292,9 +1269,9 @@ static int sdma_v3_0_pre_soft_reset(void *handle)
return 0;
}
-static int sdma_v3_0_post_soft_reset(void *handle)
+static int sdma_v3_0_post_soft_reset(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
u32 srbm_soft_reset = 0;
if (!adev->sdma.srbm_soft_reset)
@@ -1311,9 +1288,9 @@ static int sdma_v3_0_post_soft_reset(void *handle)
return 0;
}
-static int sdma_v3_0_soft_reset(void *handle)
+static int sdma_v3_0_soft_reset(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
u32 srbm_soft_reset = 0;
u32 tmp;
@@ -1508,10 +1485,10 @@ static void sdma_v3_0_update_sdma_medium_grain_light_sleep(
}
}
-static int sdma_v3_0_set_clockgating_state(void *handle,
+static int sdma_v3_0_set_clockgating_state(struct amdgpu_ip_block *ip_block,
enum amd_clockgating_state state)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
if (amdgpu_sriov_vf(adev))
return 0;
@@ -1531,15 +1508,15 @@ static int sdma_v3_0_set_clockgating_state(void *handle,
return 0;
}
-static int sdma_v3_0_set_powergating_state(void *handle,
+static int sdma_v3_0_set_powergating_state(struct amdgpu_ip_block *ip_block,
enum amd_powergating_state state)
{
return 0;
}
-static void sdma_v3_0_get_clockgating_state(void *handle, u32 *flags)
+static void sdma_v3_0_get_clockgating_state(struct amdgpu_ip_block *ip_block, u64 *flags)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
int data;
if (amdgpu_sriov_vf(adev))
@@ -1559,7 +1536,6 @@ static void sdma_v3_0_get_clockgating_state(void *handle, u32 *flags)
static const struct amd_ip_funcs sdma_v3_0_ip_funcs = {
.name = "sdma_v3_0",
.early_init = sdma_v3_0_early_init,
- .late_init = NULL,
.sw_init = sdma_v3_0_sw_init,
.sw_fini = sdma_v3_0_sw_fini,
.hw_init = sdma_v3_0_hw_init,
@@ -1582,6 +1558,7 @@ static const struct amdgpu_ring_funcs sdma_v3_0_ring_funcs = {
.align_mask = 0xf,
.nop = SDMA_PKT_NOP_HEADER_OP(SDMA_OP_NOP),
.support_64bit_ptrs = false,
+ .secure_submission_supported = true,
.get_rptr = sdma_v3_0_ring_get_rptr,
.get_wptr = sdma_v3_0_ring_get_wptr,
.set_wptr = sdma_v3_0_ring_set_wptr,
@@ -1637,7 +1614,7 @@ static void sdma_v3_0_set_irq_funcs(struct amdgpu_device *adev)
* @src_offset: src GPU address
* @dst_offset: dst GPU address
* @byte_count: number of bytes to xfer
- * @tmz: unused
+ * @copy_flags: unused
*
* Copy GPU buffers using the DMA engine (VI).
* Used by the amdgpu ttm implementation to move pages if
@@ -1647,7 +1624,7 @@ static void sdma_v3_0_emit_copy_buffer(struct amdgpu_ib *ib,
uint64_t src_offset,
uint64_t dst_offset,
uint32_t byte_count,
- bool tmz)
+ uint32_t copy_flags)
{
ib->ptr[ib->length_dw++] = SDMA_PKT_HEADER_OP(SDMA_OP_COPY) |
SDMA_PKT_HEADER_SUB_OP(SDMA_SUBOP_COPY_LINEAR);
diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c
index c8c22c1d1e65..f38004e6064e 100644
--- a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c
@@ -56,6 +56,7 @@
#include "ivsrcid/sdma1/irqsrcs_sdma1_4_0.h"
#include "amdgpu_ras.h"
+#include "sdma_v4_4.h"
MODULE_FIRMWARE("amdgpu/vega10_sdma.bin");
MODULE_FIRMWARE("amdgpu/vega10_sdma1.bin");
@@ -69,6 +70,54 @@ MODULE_FIRMWARE("amdgpu/raven2_sdma.bin");
MODULE_FIRMWARE("amdgpu/arcturus_sdma.bin");
MODULE_FIRMWARE("amdgpu/renoir_sdma.bin");
MODULE_FIRMWARE("amdgpu/green_sardine_sdma.bin");
+MODULE_FIRMWARE("amdgpu/aldebaran_sdma.bin");
+
+static const struct amdgpu_hwip_reg_entry sdma_reg_list_4_0[] = {
+ SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_STATUS_REG),
+ SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_STATUS1_REG),
+ SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_STATUS2_REG),
+ SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_STATUS3_REG),
+ SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_UCODE_CHECKSUM),
+ SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_RB_RPTR_FETCH_HI),
+ SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_RB_RPTR_FETCH),
+ SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_UTCL1_RD_STATUS),
+ SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_UTCL1_WR_STATUS),
+ SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_UTCL1_RD_XNACK0),
+ SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_UTCL1_RD_XNACK1),
+ SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_UTCL1_WR_XNACK0),
+ SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_UTCL1_WR_XNACK1),
+ SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_GFX_RB_CNTL),
+ SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_GFX_RB_RPTR),
+ SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_GFX_RB_RPTR_HI),
+ SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_GFX_RB_WPTR),
+ SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_GFX_RB_WPTR_HI),
+ SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_GFX_IB_OFFSET),
+ SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_GFX_IB_BASE_LO),
+ SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_GFX_IB_BASE_HI),
+ SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_GFX_IB_CNTL),
+ SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_GFX_IB_RPTR),
+ SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_GFX_IB_SUB_REMAIN),
+ SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_GFX_DUMMY_REG),
+ SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_PAGE_RB_CNTL),
+ SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_PAGE_RB_RPTR),
+ SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_PAGE_RB_RPTR_HI),
+ SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_PAGE_RB_WPTR),
+ SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_PAGE_RB_WPTR_HI),
+ SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_PAGE_IB_OFFSET),
+ SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_PAGE_IB_BASE_LO),
+ SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_PAGE_IB_BASE_HI),
+ SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_PAGE_DUMMY_REG),
+ SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_RLC0_RB_CNTL),
+ SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_RLC0_RB_RPTR),
+ SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_RLC0_RB_RPTR_HI),
+ SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_RLC0_RB_WPTR),
+ SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_RLC0_RB_WPTR_HI),
+ SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_RLC0_IB_OFFSET),
+ SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_RLC0_IB_BASE_LO),
+ SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_RLC0_IB_BASE_HI),
+ SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_RLC0_DUMMY_REG),
+ SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_VM_CNTL)
+};
#define SDMA0_POWER_CNTL__ON_OFF_CONDITION_HOLD_TIME_MASK 0x000000F8L
#define SDMA0_POWER_CNTL__ON_OFF_STATUS_DURATION_TIME_MASK 0xFC000000L
@@ -142,7 +191,7 @@ static const struct soc15_reg_golden golden_settings_sdma_4_1[] = {
SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_RLC0_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000),
SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_RLC1_IB_CNTL, 0x800f0111, 0x00000100),
SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_RLC1_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000),
- SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_UTCL1_PAGE, 0x000003ff, 0x000003c0),
+ SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_UTCL1_PAGE, 0x000003ff, 0x000003e0),
SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_UTCL1_WATERMK, 0xfc000000, 0x00000000)
};
@@ -259,6 +308,24 @@ static const struct soc15_reg_golden golden_settings_sdma_arct[] =
SOC15_REG_GOLDEN_VALUE(SDMA7, 0, mmSDMA7_UTCL1_TIMEOUT, 0xffffffff, 0x00010001)
};
+static const struct soc15_reg_golden golden_settings_sdma_aldebaran[] = {
+ SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_GB_ADDR_CONFIG, 0x0018773f, 0x00104002),
+ SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_GB_ADDR_CONFIG_READ, 0x0018773f, 0x00104002),
+ SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_UTCL1_TIMEOUT, 0xffffffff, 0x00010001),
+ SOC15_REG_GOLDEN_VALUE(SDMA1, 0, mmSDMA1_GB_ADDR_CONFIG, 0x0018773f, 0x00104002),
+ SOC15_REG_GOLDEN_VALUE(SDMA1, 0, mmSDMA1_GB_ADDR_CONFIG_READ, 0x0018773f, 0x00104002),
+ SOC15_REG_GOLDEN_VALUE(SDMA1, 0, mmSDMA1_UTCL1_TIMEOUT, 0xffffffff, 0x00010001),
+ SOC15_REG_GOLDEN_VALUE(SDMA2, 0, mmSDMA2_GB_ADDR_CONFIG, 0x0018773f, 0x00104002),
+ SOC15_REG_GOLDEN_VALUE(SDMA2, 0, mmSDMA2_GB_ADDR_CONFIG_READ, 0x0018773f, 0x00104002),
+ SOC15_REG_GOLDEN_VALUE(SDMA3, 0, mmSDMA2_UTCL1_TIMEOUT, 0xffffffff, 0x00010001),
+ SOC15_REG_GOLDEN_VALUE(SDMA3, 0, mmSDMA3_GB_ADDR_CONFIG, 0x0018773f, 0x00104002),
+ SOC15_REG_GOLDEN_VALUE(SDMA3, 0, mmSDMA3_GB_ADDR_CONFIG_READ, 0x0018773f, 0x00104002),
+ SOC15_REG_GOLDEN_VALUE(SDMA3, 0, mmSDMA3_UTCL1_TIMEOUT, 0xffffffff, 0x00010001),
+ SOC15_REG_GOLDEN_VALUE(SDMA4, 0, mmSDMA4_GB_ADDR_CONFIG, 0x0018773f, 0x00104002),
+ SOC15_REG_GOLDEN_VALUE(SDMA4, 0, mmSDMA4_GB_ADDR_CONFIG_READ, 0x0018773f, 0x00104002),
+ SOC15_REG_GOLDEN_VALUE(SDMA4, 0, mmSDMA4_UTCL1_TIMEOUT, 0xffffffff, 0x00010001),
+};
+
static const struct soc15_reg_golden golden_settings_sdma_4_3[] = {
SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_CHICKEN_BITS, 0xfe931f07, 0x02831f07),
SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_CLK_CTRL, 0xffffffff, 0x3f000100),
@@ -268,7 +335,7 @@ static const struct soc15_reg_golden golden_settings_sdma_4_3[] = {
SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_POWER_CNTL, 0x003fff07, 0x40000051),
SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_RLC0_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000),
SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_RLC1_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000),
- SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_UTCL1_PAGE, 0x000003ff, 0x000003c0),
+ SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_UTCL1_PAGE, 0x000003ff, 0x000003e0),
SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_UTCL1_WATERMK, 0xfc000000, 0x03fbe1fe)
};
@@ -449,8 +516,8 @@ static int sdma_v4_0_irq_id_to_seq(unsigned client_id)
static void sdma_v4_0_init_golden_registers(struct amdgpu_device *adev)
{
- switch (adev->asic_type) {
- case CHIP_VEGA10:
+ switch (amdgpu_ip_version(adev, SDMA0_HWIP, 0)) {
+ case IP_VERSION(4, 0, 0):
soc15_program_register_sequence(adev,
golden_settings_sdma_4,
ARRAY_SIZE(golden_settings_sdma_4));
@@ -458,7 +525,7 @@ static void sdma_v4_0_init_golden_registers(struct amdgpu_device *adev)
golden_settings_sdma_vg10,
ARRAY_SIZE(golden_settings_sdma_vg10));
break;
- case CHIP_VEGA12:
+ case IP_VERSION(4, 0, 1):
soc15_program_register_sequence(adev,
golden_settings_sdma_4,
ARRAY_SIZE(golden_settings_sdma_4));
@@ -466,7 +533,7 @@ static void sdma_v4_0_init_golden_registers(struct amdgpu_device *adev)
golden_settings_sdma_vg12,
ARRAY_SIZE(golden_settings_sdma_vg12));
break;
- case CHIP_VEGA20:
+ case IP_VERSION(4, 2, 0):
soc15_program_register_sequence(adev,
golden_settings_sdma0_4_2_init,
ARRAY_SIZE(golden_settings_sdma0_4_2_init));
@@ -477,12 +544,18 @@ static void sdma_v4_0_init_golden_registers(struct amdgpu_device *adev)
golden_settings_sdma1_4_2,
ARRAY_SIZE(golden_settings_sdma1_4_2));
break;
- case CHIP_ARCTURUS:
+ case IP_VERSION(4, 2, 2):
soc15_program_register_sequence(adev,
golden_settings_sdma_arct,
ARRAY_SIZE(golden_settings_sdma_arct));
break;
- case CHIP_RAVEN:
+ case IP_VERSION(4, 4, 0):
+ soc15_program_register_sequence(adev,
+ golden_settings_sdma_aldebaran,
+ ARRAY_SIZE(golden_settings_sdma_aldebaran));
+ break;
+ case IP_VERSION(4, 1, 0):
+ case IP_VERSION(4, 1, 1):
soc15_program_register_sequence(adev,
golden_settings_sdma_4_1,
ARRAY_SIZE(golden_settings_sdma_4_1));
@@ -495,7 +568,7 @@ static void sdma_v4_0_init_golden_registers(struct amdgpu_device *adev)
golden_settings_sdma_rv1,
ARRAY_SIZE(golden_settings_sdma_rv1));
break;
- case CHIP_RENOIR:
+ case IP_VERSION(4, 1, 2):
soc15_program_register_sequence(adev,
golden_settings_sdma_4_3,
ARRAY_SIZE(golden_settings_sdma_4_3));
@@ -513,12 +586,12 @@ static void sdma_v4_0_setup_ulv(struct amdgpu_device *adev)
* The only chips with SDMAv4 and ULV are VG10 and VG20.
* Server SKUs take a different hysteresis setting from other SKUs.
*/
- switch (adev->asic_type) {
- case CHIP_VEGA10:
+ switch (amdgpu_ip_version(adev, SDMA0_HWIP, 0)) {
+ case IP_VERSION(4, 0, 0):
if (adev->pdev->device == 0x6860)
break;
return;
- case CHIP_VEGA20:
+ case IP_VERSION(4, 2, 0):
if (adev->pdev->device == 0x66a1)
break;
return;
@@ -535,43 +608,6 @@ static void sdma_v4_0_setup_ulv(struct amdgpu_device *adev)
}
}
-static int sdma_v4_0_init_inst_ctx(struct amdgpu_sdma_instance *sdma_inst)
-{
- int err = 0;
- const struct sdma_firmware_header_v1_0 *hdr;
-
- err = amdgpu_ucode_validate(sdma_inst->fw);
- if (err)
- return err;
-
- hdr = (const struct sdma_firmware_header_v1_0 *)sdma_inst->fw->data;
- sdma_inst->fw_version = le32_to_cpu(hdr->header.ucode_version);
- sdma_inst->feature_version = le32_to_cpu(hdr->ucode_feature_version);
-
- if (sdma_inst->feature_version >= 20)
- sdma_inst->burst_nop = true;
-
- return 0;
-}
-
-static void sdma_v4_0_destroy_inst_ctx(struct amdgpu_device *adev)
-{
- int i;
-
- for (i = 0; i < adev->sdma.num_instances; i++) {
- release_firmware(adev->sdma.instance[i].fw);
- adev->sdma.instance[i].fw = NULL;
-
- /* arcturus shares the same FW memory across
- all SDMA isntances */
- if (adev->asic_type == CHIP_ARCTURUS)
- break;
- }
-
- memset((void *)adev->sdma.instance, 0,
- sizeof(struct amdgpu_sdma_instance) * AMDGPU_MAX_SDMA_INSTANCES);
-}
-
/**
* sdma_v4_0_init_microcode - load ucode images from disk
*
@@ -586,96 +622,25 @@ static void sdma_v4_0_destroy_inst_ctx(struct amdgpu_device *adev)
// vega10 real chip need to use PSP to load firmware
static int sdma_v4_0_init_microcode(struct amdgpu_device *adev)
{
- const char *chip_name;
- char fw_name[30];
- int err = 0, i;
- struct amdgpu_firmware_info *info = NULL;
- const struct common_firmware_header *header = NULL;
-
- DRM_DEBUG("\n");
+ int ret, i;
- switch (adev->asic_type) {
- case CHIP_VEGA10:
- chip_name = "vega10";
- break;
- case CHIP_VEGA12:
- chip_name = "vega12";
- break;
- case CHIP_VEGA20:
- chip_name = "vega20";
- break;
- case CHIP_RAVEN:
- if (adev->apu_flags & AMD_APU_IS_RAVEN2)
- chip_name = "raven2";
- else if (adev->apu_flags & AMD_APU_IS_PICASSO)
- chip_name = "picasso";
- else
- chip_name = "raven";
- break;
- case CHIP_ARCTURUS:
- chip_name = "arcturus";
- break;
- case CHIP_RENOIR:
- if (adev->apu_flags & AMD_APU_IS_RENOIR)
- chip_name = "renoir";
- else
- chip_name = "green_sardine";
- break;
- default:
- BUG();
- }
-
- snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_sdma.bin", chip_name);
-
- err = request_firmware(&adev->sdma.instance[0].fw, fw_name, adev->dev);
- if (err)
- goto out;
-
- err = sdma_v4_0_init_inst_ctx(&adev->sdma.instance[0]);
- if (err)
- goto out;
-
- for (i = 1; i < adev->sdma.num_instances; i++) {
- if (adev->asic_type == CHIP_ARCTURUS) {
- /* Acturus will leverage the same FW memory
+ for (i = 0; i < adev->sdma.num_instances; i++) {
+ if (amdgpu_ip_version(adev, SDMA0_HWIP, 0) ==
+ IP_VERSION(4, 2, 2) ||
+ amdgpu_ip_version(adev, SDMA0_HWIP, 0) ==
+ IP_VERSION(4, 4, 0)) {
+ /* Acturus & Aldebaran will leverage the same FW memory
for every SDMA instance */
- memcpy((void *)&adev->sdma.instance[i],
- (void *)&adev->sdma.instance[0],
- sizeof(struct amdgpu_sdma_instance));
- }
- else {
- snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_sdma%d.bin", chip_name, i);
-
- err = request_firmware(&adev->sdma.instance[i].fw, fw_name, adev->dev);
- if (err)
- goto out;
-
- err = sdma_v4_0_init_inst_ctx(&adev->sdma.instance[i]);
- if (err)
- goto out;
- }
- }
-
- DRM_DEBUG("psp_load == '%s'\n",
- adev->firmware.load_type == AMDGPU_FW_LOAD_PSP ? "true" : "false");
-
- if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
- for (i = 0; i < adev->sdma.num_instances; i++) {
- info = &adev->firmware.ucode[AMDGPU_UCODE_ID_SDMA0 + i];
- info->ucode_id = AMDGPU_UCODE_ID_SDMA0 + i;
- info->fw = adev->sdma.instance[i].fw;
- header = (const struct common_firmware_header *)info->fw->data;
- adev->firmware.fw_size +=
- ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
+ ret = amdgpu_sdma_init_microcode(adev, 0, true);
+ break;
+ } else {
+ ret = amdgpu_sdma_init_microcode(adev, i, false);
+ if (ret)
+ return ret;
}
}
-out:
- if (err) {
- DRM_ERROR("sdma_v4_0: Failed to load firmware \"%s\"\n", fw_name);
- sdma_v4_0_destroy_inst_ctx(adev);
- }
- return err;
+ return ret;
}
/**
@@ -690,7 +655,7 @@ static uint64_t sdma_v4_0_ring_get_rptr(struct amdgpu_ring *ring)
u64 *rptr;
/* XXX check if swapping is necessary on BE */
- rptr = ((u64 *)&ring->adev->wb.wb[ring->rptr_offs]);
+ rptr = ((u64 *)ring->rptr_cpu_addr);
DRM_DEBUG("rptr before shift == 0x%016llx\n", *rptr);
return ((*rptr) >> 2);
@@ -710,7 +675,7 @@ static uint64_t sdma_v4_0_ring_get_wptr(struct amdgpu_ring *ring)
if (ring->use_doorbell) {
/* XXX check if swapping is necessary on BE */
- wptr = READ_ONCE(*((u64 *)&adev->wb.wb[ring->wptr_offs]));
+ wptr = READ_ONCE(*((u64 *)ring->wptr_cpu_addr));
DRM_DEBUG("wptr/doorbell before shift == 0x%016llx\n", wptr);
} else {
wptr = RREG32_SDMA(ring->me, mmSDMA0_GFX_RB_WPTR_HI);
@@ -724,7 +689,7 @@ static uint64_t sdma_v4_0_ring_get_wptr(struct amdgpu_ring *ring)
}
/**
- * sdma_v4_0_page_ring_set_wptr - commit the write pointer
+ * sdma_v4_0_ring_set_wptr - commit the write pointer
*
* @ring: amdgpu ring pointer
*
@@ -736,12 +701,12 @@ static void sdma_v4_0_ring_set_wptr(struct amdgpu_ring *ring)
DRM_DEBUG("Setting write pointer\n");
if (ring->use_doorbell) {
- u64 *wb = (u64 *)&adev->wb.wb[ring->wptr_offs];
+ u64 *wb = (u64 *)ring->wptr_cpu_addr;
DRM_DEBUG("Using doorbell -- "
"wptr_offs == 0x%08x "
- "lower_32_bits(ring->wptr) << 2 == 0x%08x "
- "upper_32_bits(ring->wptr) << 2 == 0x%08x\n",
+ "lower_32_bits(ring->wptr << 2) == 0x%08x "
+ "upper_32_bits(ring->wptr << 2) == 0x%08x\n",
ring->wptr_offs,
lower_32_bits(ring->wptr << 2),
upper_32_bits(ring->wptr << 2));
@@ -779,7 +744,7 @@ static uint64_t sdma_v4_0_page_ring_get_wptr(struct amdgpu_ring *ring)
if (ring->use_doorbell) {
/* XXX check if swapping is necessary on BE */
- wptr = READ_ONCE(*((u64 *)&adev->wb.wb[ring->wptr_offs]));
+ wptr = READ_ONCE(*((u64 *)ring->wptr_cpu_addr));
} else {
wptr = RREG32_SDMA(ring->me, mmSDMA0_PAGE_RB_WPTR_HI);
wptr = wptr << 32;
@@ -790,7 +755,7 @@ static uint64_t sdma_v4_0_page_ring_get_wptr(struct amdgpu_ring *ring)
}
/**
- * sdma_v4_0_ring_set_wptr - commit the write pointer
+ * sdma_v4_0_page_ring_set_wptr - commit the write pointer
*
* @ring: amdgpu ring pointer
*
@@ -801,7 +766,7 @@ static void sdma_v4_0_page_ring_set_wptr(struct amdgpu_ring *ring)
struct amdgpu_device *adev = ring->adev;
if (ring->use_doorbell) {
- u64 *wb = (u64 *)&adev->wb.wb[ring->wptr_offs];
+ u64 *wb = (u64 *)ring->wptr_cpu_addr;
/* XXX check if swapping is necessary on BE */
WRITE_ONCE(*wb, (ring->wptr << 2));
@@ -948,31 +913,23 @@ static void sdma_v4_0_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, u64 se
/**
- * sdma_v4_0_gfx_stop - stop the gfx async dma engines
+ * sdma_v4_0_gfx_enable - enable the gfx async dma engines
*
* @adev: amdgpu_device pointer
- *
- * Stop the gfx async dma ring buffers (VEGA10).
+ * @enable: enable SDMA RB/IB
+ * control the gfx async dma ring buffers (VEGA10).
*/
-static void sdma_v4_0_gfx_stop(struct amdgpu_device *adev)
+static void sdma_v4_0_gfx_enable(struct amdgpu_device *adev, bool enable)
{
- struct amdgpu_ring *sdma[AMDGPU_MAX_SDMA_INSTANCES];
u32 rb_cntl, ib_cntl;
- int i, unset = 0;
+ int i;
for (i = 0; i < adev->sdma.num_instances; i++) {
- sdma[i] = &adev->sdma.instance[i].ring;
-
- if ((adev->mman.buffer_funcs_ring == sdma[i]) && unset != 1) {
- amdgpu_ttm_set_buffer_funcs_status(adev, false);
- unset = 1;
- }
-
rb_cntl = RREG32_SDMA(i, mmSDMA0_GFX_RB_CNTL);
- rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_GFX_RB_CNTL, RB_ENABLE, 0);
+ rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_GFX_RB_CNTL, RB_ENABLE, enable ? 1 : 0);
WREG32_SDMA(i, mmSDMA0_GFX_RB_CNTL, rb_cntl);
ib_cntl = RREG32_SDMA(i, mmSDMA0_GFX_IB_CNTL);
- ib_cntl = REG_SET_FIELD(ib_cntl, SDMA0_GFX_IB_CNTL, IB_ENABLE, 0);
+ ib_cntl = REG_SET_FIELD(ib_cntl, SDMA0_GFX_IB_CNTL, IB_ENABLE, enable ? 1 : 0);
WREG32_SDMA(i, mmSDMA0_GFX_IB_CNTL, ib_cntl);
}
}
@@ -998,20 +955,10 @@ static void sdma_v4_0_rlc_stop(struct amdgpu_device *adev)
*/
static void sdma_v4_0_page_stop(struct amdgpu_device *adev)
{
- struct amdgpu_ring *sdma[AMDGPU_MAX_SDMA_INSTANCES];
u32 rb_cntl, ib_cntl;
int i;
- bool unset = false;
for (i = 0; i < adev->sdma.num_instances; i++) {
- sdma[i] = &adev->sdma.instance[i].page;
-
- if ((adev->mman.buffer_funcs_ring == sdma[i]) &&
- (!unset)) {
- amdgpu_ttm_set_buffer_funcs_status(adev, false);
- unset = true;
- }
-
rb_cntl = RREG32_SDMA(i, mmSDMA0_PAGE_RB_CNTL);
rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_PAGE_RB_CNTL,
RB_ENABLE, 0);
@@ -1076,9 +1023,12 @@ static void sdma_v4_0_ctx_switch_enable(struct amdgpu_device *adev, bool enable)
* Arcturus for the moment and firmware version 14
* and above.
*/
- if (adev->asic_type == CHIP_ARCTURUS &&
+ if (amdgpu_ip_version(adev, SDMA0_HWIP, 0) ==
+ IP_VERSION(4, 2, 2) &&
adev->sdma.instance[i].fw_version >= 14)
WREG32_SDMA(i, mmSDMA0_PUB_DUMMY_REG2, enable);
+ /* Extend page fault timeout to avoid interrupt storm */
+ WREG32_SDMA(i, mmSDMA0_UTCL1_TIMEOUT, 0x00800080);
}
}
@@ -1097,7 +1047,7 @@ static void sdma_v4_0_enable(struct amdgpu_device *adev, bool enable)
int i;
if (!enable) {
- sdma_v4_0_gfx_stop(adev);
+ sdma_v4_0_gfx_enable(adev, enable);
sdma_v4_0_rlc_stop(adev);
if (adev->sdma.has_page_queue)
sdma_v4_0_page_stop(adev);
@@ -1140,13 +1090,10 @@ static void sdma_v4_0_gfx_resume(struct amdgpu_device *adev, unsigned int i)
{
struct amdgpu_ring *ring = &adev->sdma.instance[i].ring;
u32 rb_cntl, ib_cntl, wptr_poll_cntl;
- u32 wb_offset;
u32 doorbell;
u32 doorbell_offset;
u64 wptr_gpu_addr;
- wb_offset = (ring->rptr_offs * 4);
-
rb_cntl = RREG32_SDMA(i, mmSDMA0_GFX_RB_CNTL);
rb_cntl = sdma_v4_0_rb_cntl(ring, rb_cntl);
WREG32_SDMA(i, mmSDMA0_GFX_RB_CNTL, rb_cntl);
@@ -1159,9 +1106,9 @@ static void sdma_v4_0_gfx_resume(struct amdgpu_device *adev, unsigned int i)
/* set the wb address whether it's enabled or not */
WREG32_SDMA(i, mmSDMA0_GFX_RB_RPTR_ADDR_HI,
- upper_32_bits(adev->wb.gpu_addr + wb_offset) & 0xFFFFFFFF);
+ upper_32_bits(ring->rptr_gpu_addr) & 0xFFFFFFFF);
WREG32_SDMA(i, mmSDMA0_GFX_RB_RPTR_ADDR_LO,
- lower_32_bits(adev->wb.gpu_addr + wb_offset) & 0xFFFFFFFC);
+ lower_32_bits(ring->rptr_gpu_addr) & 0xFFFFFFFC);
rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_GFX_RB_CNTL,
RPTR_WRITEBACK_ENABLE, 1);
@@ -1191,7 +1138,7 @@ static void sdma_v4_0_gfx_resume(struct amdgpu_device *adev, unsigned int i)
WREG32_SDMA(i, mmSDMA0_GFX_MINOR_PTR_UPDATE, 0);
/* setup the wptr shadow polling */
- wptr_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
+ wptr_gpu_addr = ring->wptr_gpu_addr;
WREG32_SDMA(i, mmSDMA0_GFX_RB_WPTR_POLL_ADDR_LO,
lower_32_bits(wptr_gpu_addr));
WREG32_SDMA(i, mmSDMA0_GFX_RB_WPTR_POLL_ADDR_HI,
@@ -1213,8 +1160,6 @@ static void sdma_v4_0_gfx_resume(struct amdgpu_device *adev, unsigned int i)
#endif
/* enable DMA IBs */
WREG32_SDMA(i, mmSDMA0_GFX_IB_CNTL, ib_cntl);
-
- ring->sched.ready = true;
}
/**
@@ -1230,13 +1175,10 @@ static void sdma_v4_0_page_resume(struct amdgpu_device *adev, unsigned int i)
{
struct amdgpu_ring *ring = &adev->sdma.instance[i].page;
u32 rb_cntl, ib_cntl, wptr_poll_cntl;
- u32 wb_offset;
u32 doorbell;
u32 doorbell_offset;
u64 wptr_gpu_addr;
- wb_offset = (ring->rptr_offs * 4);
-
rb_cntl = RREG32_SDMA(i, mmSDMA0_PAGE_RB_CNTL);
rb_cntl = sdma_v4_0_rb_cntl(ring, rb_cntl);
WREG32_SDMA(i, mmSDMA0_PAGE_RB_CNTL, rb_cntl);
@@ -1249,9 +1191,9 @@ static void sdma_v4_0_page_resume(struct amdgpu_device *adev, unsigned int i)
/* set the wb address whether it's enabled or not */
WREG32_SDMA(i, mmSDMA0_PAGE_RB_RPTR_ADDR_HI,
- upper_32_bits(adev->wb.gpu_addr + wb_offset) & 0xFFFFFFFF);
+ upper_32_bits(ring->rptr_gpu_addr) & 0xFFFFFFFF);
WREG32_SDMA(i, mmSDMA0_PAGE_RB_RPTR_ADDR_LO,
- lower_32_bits(adev->wb.gpu_addr + wb_offset) & 0xFFFFFFFC);
+ lower_32_bits(ring->rptr_gpu_addr) & 0xFFFFFFFC);
rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_PAGE_RB_CNTL,
RPTR_WRITEBACK_ENABLE, 1);
@@ -1282,7 +1224,7 @@ static void sdma_v4_0_page_resume(struct amdgpu_device *adev, unsigned int i)
WREG32_SDMA(i, mmSDMA0_PAGE_MINOR_PTR_UPDATE, 0);
/* setup the wptr shadow polling */
- wptr_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
+ wptr_gpu_addr = ring->wptr_gpu_addr;
WREG32_SDMA(i, mmSDMA0_PAGE_RB_WPTR_POLL_ADDR_LO,
lower_32_bits(wptr_gpu_addr));
WREG32_SDMA(i, mmSDMA0_PAGE_RB_WPTR_POLL_ADDR_HI,
@@ -1304,8 +1246,6 @@ static void sdma_v4_0_page_resume(struct amdgpu_device *adev, unsigned int i)
#endif
/* enable DMA IBs */
WREG32_SDMA(i, mmSDMA0_PAGE_IB_CNTL, ib_cntl);
-
- ring->sched.ready = true;
}
static void
@@ -1361,9 +1301,10 @@ static void sdma_v4_0_init_pg(struct amdgpu_device *adev)
if (!(adev->pg_flags & AMD_PG_SUPPORT_SDMA))
return;
- switch (adev->asic_type) {
- case CHIP_RAVEN:
- case CHIP_RENOIR:
+ switch (amdgpu_ip_version(adev, SDMA0_HWIP, 0)) {
+ case IP_VERSION(4, 1, 0):
+ case IP_VERSION(4, 1, 1):
+ case IP_VERSION(4, 1, 2):
sdma_v4_1_init_power_gating(adev);
sdma_v4_1_update_power_gating(adev, true);
break;
@@ -1504,13 +1445,7 @@ static int sdma_v4_0_start(struct amdgpu_device *adev)
r = amdgpu_ring_test_helper(page);
if (r)
return r;
-
- if (adev->mman.buffer_funcs_ring == page)
- amdgpu_ttm_set_buffer_funcs_status(adev, true);
}
-
- if (adev->mman.buffer_funcs_ring == ring)
- amdgpu_ttm_set_buffer_funcs_status(adev, true);
}
return r;
@@ -1630,7 +1565,7 @@ static int sdma_v4_0_ring_test_ib(struct amdgpu_ring *ring, long timeout)
r = -EINVAL;
err1:
- amdgpu_ib_free(adev, &ib, NULL);
+ amdgpu_ib_free(&ib, NULL);
dma_fence_put(f);
err0:
amdgpu_device_wb_free(adev, index);
@@ -1803,39 +1738,31 @@ static bool sdma_v4_0_fw_support_paging_queue(struct amdgpu_device *adev)
{
uint fw_version = adev->sdma.instance[0].fw_version;
- switch (adev->asic_type) {
- case CHIP_VEGA10:
+ switch (amdgpu_ip_version(adev, SDMA0_HWIP, 0)) {
+ case IP_VERSION(4, 0, 0):
return fw_version >= 430;
- case CHIP_VEGA12:
+ case IP_VERSION(4, 0, 1):
/*return fw_version >= 31;*/
return false;
- case CHIP_VEGA20:
+ case IP_VERSION(4, 2, 0):
return fw_version >= 123;
default:
return false;
}
}
-static int sdma_v4_0_early_init(void *handle)
+static int sdma_v4_0_early_init(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
int r;
- if (adev->flags & AMD_IS_APU)
- adev->sdma.num_instances = 1;
- else if (adev->asic_type == CHIP_ARCTURUS)
- adev->sdma.num_instances = 8;
- else
- adev->sdma.num_instances = 2;
-
r = sdma_v4_0_init_microcode(adev);
- if (r) {
- DRM_ERROR("Failed to load sdma firmware!\n");
+ if (r)
return r;
- }
/* TODO: Page queue breaks driver reload under SRIOV */
- if ((adev->asic_type == CHIP_VEGA10) && amdgpu_sriov_vf((adev)))
+ if ((amdgpu_ip_version(adev, SDMA0_HWIP, 0) == IP_VERSION(4, 0, 0)) &&
+ amdgpu_sriov_vf((adev)))
adev->sdma.has_page_queue = false;
else if (sdma_v4_0_fw_support_paging_queue(adev))
adev->sdma.has_page_queue = true;
@@ -1853,29 +1780,25 @@ static int sdma_v4_0_process_ras_data_cb(struct amdgpu_device *adev,
void *err_data,
struct amdgpu_iv_entry *entry);
-static int sdma_v4_0_late_init(void *handle)
+static int sdma_v4_0_late_init(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
- struct ras_ih_if ih_info = {
- .cb = sdma_v4_0_process_ras_data_cb,
- };
+ struct amdgpu_device *adev = ip_block->adev;
sdma_v4_0_setup_ulv(adev);
- if (adev->sdma.funcs && adev->sdma.funcs->reset_ras_error_count)
- adev->sdma.funcs->reset_ras_error_count(adev);
+ if (!amdgpu_persistent_edc_harvesting_supported(adev))
+ amdgpu_ras_reset_error_count(adev, AMDGPU_RAS_BLOCK__SDMA);
- if (adev->sdma.funcs && adev->sdma.funcs->ras_late_init)
- return adev->sdma.funcs->ras_late_init(adev, &ih_info);
- else
- return 0;
+ return 0;
}
-static int sdma_v4_0_sw_init(void *handle)
+static int sdma_v4_0_sw_init(struct amdgpu_ip_block *ip_block)
{
struct amdgpu_ring *ring;
int r, i;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
+ uint32_t reg_count = ARRAY_SIZE(sdma_reg_list_4_0);
+ uint32_t *ptr;
/* SDMA trap event */
for (i = 0; i < adev->sdma.num_instances; i++) {
@@ -1895,6 +1818,33 @@ static int sdma_v4_0_sw_init(void *handle)
return r;
}
+ /* SDMA VM_HOLE/DOORBELL_INV/POLL_TIMEOUT/SRBM_WRITE_PROTECTION event*/
+ for (i = 0; i < adev->sdma.num_instances; i++) {
+ r = amdgpu_irq_add_id(adev, sdma_v4_0_seq_to_irq_id(i),
+ SDMA0_4_0__SRCID__SDMA_VM_HOLE,
+ &adev->sdma.vm_hole_irq);
+ if (r)
+ return r;
+
+ r = amdgpu_irq_add_id(adev, sdma_v4_0_seq_to_irq_id(i),
+ SDMA0_4_0__SRCID__SDMA_DOORBELL_INVALID,
+ &adev->sdma.doorbell_invalid_irq);
+ if (r)
+ return r;
+
+ r = amdgpu_irq_add_id(adev, sdma_v4_0_seq_to_irq_id(i),
+ SDMA0_4_0__SRCID__SDMA_POLL_TIMEOUT,
+ &adev->sdma.pool_timeout_irq);
+ if (r)
+ return r;
+
+ r = amdgpu_irq_add_id(adev, sdma_v4_0_seq_to_irq_id(i),
+ SDMA0_4_0__SRCID__SDMA_SRBMWRITE,
+ &adev->sdma.srbm_write_irq);
+ if (r)
+ return r;
+ }
+
for (i = 0; i < adev->sdma.num_instances; i++) {
ring = &adev->sdma.instance[i].ring;
ring->ring_obj = NULL;
@@ -1906,10 +1856,21 @@ static int sdma_v4_0_sw_init(void *handle)
/* doorbell size is 2 dwords, get DWORD offset */
ring->doorbell_index = adev->doorbell_index.sdma_engine[i] << 1;
+ /*
+ * On Arcturus, SDMA instance 5~7 has a different vmhub
+ * type(AMDGPU_MMHUB1).
+ */
+ if (amdgpu_ip_version(adev, SDMA0_HWIP, 0) ==
+ IP_VERSION(4, 2, 2) &&
+ i >= 5)
+ ring->vm_hub = AMDGPU_MMHUB1(0);
+ else
+ ring->vm_hub = AMDGPU_MMHUB0(0);
+
sprintf(ring->name, "sdma%d", i);
r = amdgpu_ring_init(adev, ring, 1024, &adev->sdma.trap_irq,
AMDGPU_SDMA_IRQ_INSTANCE0 + i,
- AMDGPU_RING_PRIO_DEFAULT);
+ AMDGPU_RING_PRIO_DEFAULT, NULL);
if (r)
return r;
@@ -1921,96 +1882,142 @@ static int sdma_v4_0_sw_init(void *handle)
/* paging queue use same doorbell index/routing as gfx queue
* with 0x400 (4096 dwords) offset on second doorbell page
*/
- ring->doorbell_index = adev->doorbell_index.sdma_engine[i] << 1;
- ring->doorbell_index += 0x400;
+ if (amdgpu_ip_version(adev, SDMA0_HWIP, 0) >=
+ IP_VERSION(4, 0, 0) &&
+ amdgpu_ip_version(adev, SDMA0_HWIP, 0) <
+ IP_VERSION(4, 2, 0)) {
+ ring->doorbell_index =
+ adev->doorbell_index.sdma_engine[i] << 1;
+ ring->doorbell_index += 0x400;
+ } else {
+ /* From vega20, the sdma_doorbell_range in 1st
+ * doorbell page is reserved for page queue.
+ */
+ ring->doorbell_index =
+ (adev->doorbell_index.sdma_engine[i] + 1) << 1;
+ }
+
+ if (amdgpu_ip_version(adev, SDMA0_HWIP, 0) ==
+ IP_VERSION(4, 2, 2) &&
+ i >= 5)
+ ring->vm_hub = AMDGPU_MMHUB1(0);
+ else
+ ring->vm_hub = AMDGPU_MMHUB0(0);
sprintf(ring->name, "page%d", i);
r = amdgpu_ring_init(adev, ring, 1024,
&adev->sdma.trap_irq,
AMDGPU_SDMA_IRQ_INSTANCE0 + i,
- AMDGPU_RING_PRIO_DEFAULT);
+ AMDGPU_RING_PRIO_DEFAULT, NULL);
if (r)
return r;
}
}
+ if (amdgpu_sdma_ras_sw_init(adev)) {
+ dev_err(adev->dev, "Failed to initialize sdma ras block!\n");
+ return -EINVAL;
+ }
+
+ /* Allocate memory for SDMA IP Dump buffer */
+ ptr = kcalloc(adev->sdma.num_instances * reg_count, sizeof(uint32_t), GFP_KERNEL);
+ if (ptr)
+ adev->sdma.ip_dump = ptr;
+ else
+ DRM_ERROR("Failed to allocated memory for SDMA IP Dump\n");
+
return r;
}
-static int sdma_v4_0_sw_fini(void *handle)
+static int sdma_v4_0_sw_fini(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
int i;
- if (adev->sdma.funcs && adev->sdma.funcs->ras_fini)
- adev->sdma.funcs->ras_fini(adev);
-
for (i = 0; i < adev->sdma.num_instances; i++) {
amdgpu_ring_fini(&adev->sdma.instance[i].ring);
if (adev->sdma.has_page_queue)
amdgpu_ring_fini(&adev->sdma.instance[i].page);
}
- sdma_v4_0_destroy_inst_ctx(adev);
+ if (amdgpu_ip_version(adev, SDMA0_HWIP, 0) == IP_VERSION(4, 2, 2) ||
+ amdgpu_ip_version(adev, SDMA0_HWIP, 0) == IP_VERSION(4, 4, 0))
+ amdgpu_sdma_destroy_inst_ctx(adev, true);
+ else
+ amdgpu_sdma_destroy_inst_ctx(adev, false);
+
+ kfree(adev->sdma.ip_dump);
return 0;
}
-static int sdma_v4_0_hw_init(void *handle)
+static int sdma_v4_0_hw_init(struct amdgpu_ip_block *ip_block)
{
- int r;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
if (adev->flags & AMD_IS_APU)
- amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_SDMA, false);
+ amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_SDMA, false, 0);
if (!amdgpu_sriov_vf(adev))
sdma_v4_0_init_golden_registers(adev);
- r = sdma_v4_0_start(adev);
-
- return r;
+ return sdma_v4_0_start(adev);
}
-static int sdma_v4_0_hw_fini(void *handle)
+static int sdma_v4_0_hw_fini(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
int i;
if (amdgpu_sriov_vf(adev))
return 0;
- for (i = 0; i < adev->sdma.num_instances; i++) {
- amdgpu_irq_put(adev, &adev->sdma.ecc_irq,
- AMDGPU_SDMA_IRQ_INSTANCE0 + i);
+ if (amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__SDMA)) {
+ for (i = 0; i < adev->sdma.num_instances; i++) {
+ amdgpu_irq_put(adev, &adev->sdma.ecc_irq,
+ AMDGPU_SDMA_IRQ_INSTANCE0 + i);
+ }
}
sdma_v4_0_ctx_switch_enable(adev, false);
sdma_v4_0_enable(adev, false);
if (adev->flags & AMD_IS_APU)
- amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_SDMA, true);
+ amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_SDMA, true, 0);
return 0;
}
-static int sdma_v4_0_suspend(void *handle)
+static int sdma_v4_0_suspend(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
- return sdma_v4_0_hw_fini(adev);
+ /* SMU saves SDMA state for us */
+ if (adev->in_s0ix) {
+ sdma_v4_0_gfx_enable(adev, false);
+ return 0;
+ }
+
+ return sdma_v4_0_hw_fini(ip_block);
}
-static int sdma_v4_0_resume(void *handle)
+static int sdma_v4_0_resume(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
- return sdma_v4_0_hw_init(adev);
+ /* SMU restores SDMA state for us */
+ if (adev->in_s0ix) {
+ sdma_v4_0_enable(adev, true);
+ sdma_v4_0_gfx_enable(adev, true);
+ return 0;
+ }
+
+ return sdma_v4_0_hw_init(ip_block);
}
-static bool sdma_v4_0_is_idle(void *handle)
+static bool sdma_v4_0_is_idle(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
u32 i;
for (i = 0; i < adev->sdma.num_instances; i++) {
@@ -2023,11 +2030,11 @@ static bool sdma_v4_0_is_idle(void *handle)
return true;
}
-static int sdma_v4_0_wait_for_idle(void *handle)
+static int sdma_v4_0_wait_for_idle(struct amdgpu_ip_block *ip_block)
{
unsigned i, j;
u32 sdma[AMDGPU_MAX_SDMA_INSTANCES];
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
for (i = 0; i < adev->usec_timeout; i++) {
for (j = 0; j < adev->sdma.num_instances; j++) {
@@ -2042,7 +2049,7 @@ static int sdma_v4_0_wait_for_idle(void *handle)
return -ETIMEDOUT;
}
-static int sdma_v4_0_soft_reset(void *handle)
+static int sdma_v4_0_soft_reset(struct amdgpu_ip_block *ip_block)
{
/* todo */
@@ -2068,23 +2075,28 @@ static int sdma_v4_0_process_trap_irq(struct amdgpu_device *adev,
struct amdgpu_irq_src *source,
struct amdgpu_iv_entry *entry)
{
- uint32_t instance;
+ int instance;
DRM_DEBUG("IH: SDMA trap\n");
instance = sdma_v4_0_irq_id_to_seq(entry->client_id);
+ if (instance < 0)
+ return instance;
+
switch (entry->ring_id) {
case 0:
amdgpu_fence_process(&adev->sdma.instance[instance].ring);
break;
case 1:
- if (adev->asic_type == CHIP_VEGA20)
+ if (amdgpu_ip_version(adev, SDMA0_HWIP, 0) ==
+ IP_VERSION(4, 2, 0))
amdgpu_fence_process(&adev->sdma.instance[instance].page);
break;
case 2:
/* XXX compute */
break;
case 3:
- if (adev->asic_type != CHIP_VEGA20)
+ if (amdgpu_ip_version(adev, SDMA0_HWIP, 0) !=
+ IP_VERSION(4, 2, 0))
amdgpu_fence_process(&adev->sdma.instance[instance].page);
break;
}
@@ -2149,6 +2161,77 @@ static int sdma_v4_0_set_ecc_irq_state(struct amdgpu_device *adev,
return 0;
}
+static int sdma_v4_0_print_iv_entry(struct amdgpu_device *adev,
+ struct amdgpu_iv_entry *entry)
+{
+ int instance;
+ struct amdgpu_task_info *task_info;
+ u64 addr;
+
+ instance = sdma_v4_0_irq_id_to_seq(entry->client_id);
+ if (instance < 0 || instance >= adev->sdma.num_instances) {
+ dev_err(adev->dev, "sdma instance invalid %d\n", instance);
+ return -EINVAL;
+ }
+
+ addr = (u64)entry->src_data[0] << 12;
+ addr |= ((u64)entry->src_data[1] & 0xf) << 44;
+
+ dev_dbg_ratelimited(adev->dev,
+ "[sdma%d] address:0x%016llx src_id:%u ring:%u vmid:%u pasid:%u\n",
+ instance, addr, entry->src_id, entry->ring_id, entry->vmid,
+ entry->pasid);
+
+ task_info = amdgpu_vm_get_task_info_pasid(adev, entry->pasid);
+ if (task_info) {
+ dev_dbg_ratelimited(adev->dev,
+ " for process %s pid %d thread %s pid %d\n",
+ task_info->process_name, task_info->tgid,
+ task_info->task.comm, task_info->task.pid);
+ amdgpu_vm_put_task_info(task_info);
+ }
+
+ return 0;
+}
+
+static int sdma_v4_0_process_vm_hole_irq(struct amdgpu_device *adev,
+ struct amdgpu_irq_src *source,
+ struct amdgpu_iv_entry *entry)
+{
+ dev_dbg_ratelimited(adev->dev, "MC or SEM address in VM hole\n");
+ sdma_v4_0_print_iv_entry(adev, entry);
+ return 0;
+}
+
+static int sdma_v4_0_process_doorbell_invalid_irq(struct amdgpu_device *adev,
+ struct amdgpu_irq_src *source,
+ struct amdgpu_iv_entry *entry)
+{
+ dev_dbg_ratelimited(adev->dev, "SDMA received a doorbell from BIF with byte_enable !=0xff\n");
+ sdma_v4_0_print_iv_entry(adev, entry);
+ return 0;
+}
+
+static int sdma_v4_0_process_pool_timeout_irq(struct amdgpu_device *adev,
+ struct amdgpu_irq_src *source,
+ struct amdgpu_iv_entry *entry)
+{
+ dev_dbg_ratelimited(adev->dev,
+ "Polling register/memory timeout executing POLL_REG/MEM with finite timer\n");
+ sdma_v4_0_print_iv_entry(adev, entry);
+ return 0;
+}
+
+static int sdma_v4_0_process_srbm_write_irq(struct amdgpu_device *adev,
+ struct amdgpu_irq_src *source,
+ struct amdgpu_iv_entry *entry)
+{
+ dev_dbg_ratelimited(adev->dev,
+ "SDMA gets an Register Write SRBM_WRITE command in non-privilege command buffer\n");
+ sdma_v4_0_print_iv_entry(adev, entry);
+ return 0;
+}
+
static void sdma_v4_0_update_medium_grain_clock_gating(
struct amdgpu_device *adev,
bool enable)
@@ -2214,42 +2297,32 @@ static void sdma_v4_0_update_medium_grain_light_sleep(
}
}
-static int sdma_v4_0_set_clockgating_state(void *handle,
+static int sdma_v4_0_set_clockgating_state(struct amdgpu_ip_block *ip_block,
enum amd_clockgating_state state)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
if (amdgpu_sriov_vf(adev))
return 0;
- switch (adev->asic_type) {
- case CHIP_VEGA10:
- case CHIP_VEGA12:
- case CHIP_VEGA20:
- case CHIP_RAVEN:
- case CHIP_ARCTURUS:
- case CHIP_RENOIR:
- sdma_v4_0_update_medium_grain_clock_gating(adev,
- state == AMD_CG_STATE_GATE);
- sdma_v4_0_update_medium_grain_light_sleep(adev,
- state == AMD_CG_STATE_GATE);
- break;
- default:
- break;
- }
+ sdma_v4_0_update_medium_grain_clock_gating(adev,
+ state == AMD_CG_STATE_GATE);
+ sdma_v4_0_update_medium_grain_light_sleep(adev,
+ state == AMD_CG_STATE_GATE);
return 0;
}
-static int sdma_v4_0_set_powergating_state(void *handle,
+static int sdma_v4_0_set_powergating_state(struct amdgpu_ip_block *ip_block,
enum amd_powergating_state state)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
- switch (adev->asic_type) {
- case CHIP_RAVEN:
- case CHIP_RENOIR:
+ switch (amdgpu_ip_version(adev, SDMA0_HWIP, 0)) {
+ case IP_VERSION(4, 1, 0):
+ case IP_VERSION(4, 1, 1):
+ case IP_VERSION(4, 1, 2):
sdma_v4_1_update_power_gating(adev,
- state == AMD_PG_STATE_GATE ? true : false);
+ state == AMD_PG_STATE_GATE);
break;
default:
break;
@@ -2258,9 +2331,9 @@ static int sdma_v4_0_set_powergating_state(void *handle,
return 0;
}
-static void sdma_v4_0_get_clockgating_state(void *handle, u32 *flags)
+static void sdma_v4_0_get_clockgating_state(struct amdgpu_ip_block *ip_block, u64 *flags)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
int data;
if (amdgpu_sriov_vf(adev))
@@ -2277,6 +2350,46 @@ static void sdma_v4_0_get_clockgating_state(void *handle, u32 *flags)
*flags |= AMD_CG_SUPPORT_SDMA_LS;
}
+static void sdma_v4_0_print_ip_state(struct amdgpu_ip_block *ip_block, struct drm_printer *p)
+{
+ struct amdgpu_device *adev = ip_block->adev;
+ int i, j;
+ uint32_t reg_count = ARRAY_SIZE(sdma_reg_list_4_0);
+ uint32_t instance_offset;
+
+ if (!adev->sdma.ip_dump)
+ return;
+
+ drm_printf(p, "num_instances:%d\n", adev->sdma.num_instances);
+ for (i = 0; i < adev->sdma.num_instances; i++) {
+ instance_offset = i * reg_count;
+ drm_printf(p, "\nInstance:%d\n", i);
+
+ for (j = 0; j < reg_count; j++)
+ drm_printf(p, "%-50s \t 0x%08x\n", sdma_reg_list_4_0[j].reg_name,
+ adev->sdma.ip_dump[instance_offset + j]);
+ }
+}
+
+static void sdma_v4_0_dump_ip_state(struct amdgpu_ip_block *ip_block)
+{
+ struct amdgpu_device *adev = ip_block->adev;
+ int i, j;
+ uint32_t instance_offset;
+ uint32_t reg_count = ARRAY_SIZE(sdma_reg_list_4_0);
+
+ if (!adev->sdma.ip_dump)
+ return;
+
+ for (i = 0; i < adev->sdma.num_instances; i++) {
+ instance_offset = i * reg_count;
+ for (j = 0; j < reg_count; j++)
+ adev->sdma.ip_dump[instance_offset + j] =
+ RREG32(sdma_v4_0_get_reg_offset(adev, i,
+ sdma_reg_list_4_0[j].reg_offset));
+ }
+}
+
const struct amd_ip_funcs sdma_v4_0_ip_funcs = {
.name = "sdma_v4_0",
.early_init = sdma_v4_0_early_init,
@@ -2293,50 +2406,16 @@ const struct amd_ip_funcs sdma_v4_0_ip_funcs = {
.set_clockgating_state = sdma_v4_0_set_clockgating_state,
.set_powergating_state = sdma_v4_0_set_powergating_state,
.get_clockgating_state = sdma_v4_0_get_clockgating_state,
+ .dump_ip_state = sdma_v4_0_dump_ip_state,
+ .print_ip_state = sdma_v4_0_print_ip_state,
};
static const struct amdgpu_ring_funcs sdma_v4_0_ring_funcs = {
.type = AMDGPU_RING_TYPE_SDMA,
- .align_mask = 0xf,
+ .align_mask = 0xff,
.nop = SDMA_PKT_NOP_HEADER_OP(SDMA_OP_NOP),
.support_64bit_ptrs = true,
- .vmhub = AMDGPU_MMHUB_0,
- .get_rptr = sdma_v4_0_ring_get_rptr,
- .get_wptr = sdma_v4_0_ring_get_wptr,
- .set_wptr = sdma_v4_0_ring_set_wptr,
- .emit_frame_size =
- 6 + /* sdma_v4_0_ring_emit_hdp_flush */
- 3 + /* hdp invalidate */
- 6 + /* sdma_v4_0_ring_emit_pipeline_sync */
- /* sdma_v4_0_ring_emit_vm_flush */
- SOC15_FLUSH_GPU_TLB_NUM_WREG * 3 +
- SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 6 +
- 10 + 10 + 10, /* sdma_v4_0_ring_emit_fence x3 for user fence, vm fence */
- .emit_ib_size = 7 + 6, /* sdma_v4_0_ring_emit_ib */
- .emit_ib = sdma_v4_0_ring_emit_ib,
- .emit_fence = sdma_v4_0_ring_emit_fence,
- .emit_pipeline_sync = sdma_v4_0_ring_emit_pipeline_sync,
- .emit_vm_flush = sdma_v4_0_ring_emit_vm_flush,
- .emit_hdp_flush = sdma_v4_0_ring_emit_hdp_flush,
- .test_ring = sdma_v4_0_ring_test_ring,
- .test_ib = sdma_v4_0_ring_test_ib,
- .insert_nop = sdma_v4_0_ring_insert_nop,
- .pad_ib = sdma_v4_0_ring_pad_ib,
- .emit_wreg = sdma_v4_0_ring_emit_wreg,
- .emit_reg_wait = sdma_v4_0_ring_emit_reg_wait,
- .emit_reg_write_reg_wait = amdgpu_ring_emit_reg_write_reg_wait_helper,
-};
-
-/*
- * On Arcturus, SDMA instance 5~7 has a different vmhub type(AMDGPU_MMHUB_1).
- * So create a individual constant ring_funcs for those instances.
- */
-static const struct amdgpu_ring_funcs sdma_v4_0_ring_funcs_2nd_mmhub = {
- .type = AMDGPU_RING_TYPE_SDMA,
- .align_mask = 0xf,
- .nop = SDMA_PKT_NOP_HEADER_OP(SDMA_OP_NOP),
- .support_64bit_ptrs = true,
- .vmhub = AMDGPU_MMHUB_1,
+ .secure_submission_supported = true,
.get_rptr = sdma_v4_0_ring_get_rptr,
.get_wptr = sdma_v4_0_ring_get_wptr,
.set_wptr = sdma_v4_0_ring_set_wptr,
@@ -2365,42 +2444,10 @@ static const struct amdgpu_ring_funcs sdma_v4_0_ring_funcs_2nd_mmhub = {
static const struct amdgpu_ring_funcs sdma_v4_0_page_ring_funcs = {
.type = AMDGPU_RING_TYPE_SDMA,
- .align_mask = 0xf,
+ .align_mask = 0xff,
.nop = SDMA_PKT_NOP_HEADER_OP(SDMA_OP_NOP),
.support_64bit_ptrs = true,
- .vmhub = AMDGPU_MMHUB_0,
- .get_rptr = sdma_v4_0_ring_get_rptr,
- .get_wptr = sdma_v4_0_page_ring_get_wptr,
- .set_wptr = sdma_v4_0_page_ring_set_wptr,
- .emit_frame_size =
- 6 + /* sdma_v4_0_ring_emit_hdp_flush */
- 3 + /* hdp invalidate */
- 6 + /* sdma_v4_0_ring_emit_pipeline_sync */
- /* sdma_v4_0_ring_emit_vm_flush */
- SOC15_FLUSH_GPU_TLB_NUM_WREG * 3 +
- SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 6 +
- 10 + 10 + 10, /* sdma_v4_0_ring_emit_fence x3 for user fence, vm fence */
- .emit_ib_size = 7 + 6, /* sdma_v4_0_ring_emit_ib */
- .emit_ib = sdma_v4_0_ring_emit_ib,
- .emit_fence = sdma_v4_0_ring_emit_fence,
- .emit_pipeline_sync = sdma_v4_0_ring_emit_pipeline_sync,
- .emit_vm_flush = sdma_v4_0_ring_emit_vm_flush,
- .emit_hdp_flush = sdma_v4_0_ring_emit_hdp_flush,
- .test_ring = sdma_v4_0_ring_test_ring,
- .test_ib = sdma_v4_0_ring_test_ib,
- .insert_nop = sdma_v4_0_ring_insert_nop,
- .pad_ib = sdma_v4_0_ring_pad_ib,
- .emit_wreg = sdma_v4_0_ring_emit_wreg,
- .emit_reg_wait = sdma_v4_0_ring_emit_reg_wait,
- .emit_reg_write_reg_wait = amdgpu_ring_emit_reg_write_reg_wait_helper,
-};
-
-static const struct amdgpu_ring_funcs sdma_v4_0_page_ring_funcs_2nd_mmhub = {
- .type = AMDGPU_RING_TYPE_SDMA,
- .align_mask = 0xf,
- .nop = SDMA_PKT_NOP_HEADER_OP(SDMA_OP_NOP),
- .support_64bit_ptrs = true,
- .vmhub = AMDGPU_MMHUB_1,
+ .secure_submission_supported = true,
.get_rptr = sdma_v4_0_ring_get_rptr,
.get_wptr = sdma_v4_0_page_ring_get_wptr,
.set_wptr = sdma_v4_0_page_ring_set_wptr,
@@ -2432,19 +2479,10 @@ static void sdma_v4_0_set_ring_funcs(struct amdgpu_device *adev)
int i;
for (i = 0; i < adev->sdma.num_instances; i++) {
- if (adev->asic_type == CHIP_ARCTURUS && i >= 5)
- adev->sdma.instance[i].ring.funcs =
- &sdma_v4_0_ring_funcs_2nd_mmhub;
- else
- adev->sdma.instance[i].ring.funcs =
- &sdma_v4_0_ring_funcs;
+ adev->sdma.instance[i].ring.funcs = &sdma_v4_0_ring_funcs;
adev->sdma.instance[i].ring.me = i;
if (adev->sdma.has_page_queue) {
- if (adev->asic_type == CHIP_ARCTURUS && i >= 5)
- adev->sdma.instance[i].page.funcs =
- &sdma_v4_0_page_ring_funcs_2nd_mmhub;
- else
- adev->sdma.instance[i].page.funcs =
+ adev->sdma.instance[i].page.funcs =
&sdma_v4_0_page_ring_funcs;
adev->sdma.instance[i].page.me = i;
}
@@ -2465,28 +2503,45 @@ static const struct amdgpu_irq_src_funcs sdma_v4_0_ecc_irq_funcs = {
.process = amdgpu_sdma_process_ecc_irq,
};
+static const struct amdgpu_irq_src_funcs sdma_v4_0_vm_hole_irq_funcs = {
+ .process = sdma_v4_0_process_vm_hole_irq,
+};
+
+static const struct amdgpu_irq_src_funcs sdma_v4_0_doorbell_invalid_irq_funcs = {
+ .process = sdma_v4_0_process_doorbell_invalid_irq,
+};
+
+static const struct amdgpu_irq_src_funcs sdma_v4_0_pool_timeout_irq_funcs = {
+ .process = sdma_v4_0_process_pool_timeout_irq,
+};
+static const struct amdgpu_irq_src_funcs sdma_v4_0_srbm_write_irq_funcs = {
+ .process = sdma_v4_0_process_srbm_write_irq,
+};
static void sdma_v4_0_set_irq_funcs(struct amdgpu_device *adev)
{
+ adev->sdma.trap_irq.num_types = adev->sdma.num_instances;
+ adev->sdma.ecc_irq.num_types = adev->sdma.num_instances;
+ /*For Arcturus and Aldebaran, add another 4 irq handler*/
switch (adev->sdma.num_instances) {
- case 1:
- adev->sdma.trap_irq.num_types = AMDGPU_SDMA_IRQ_INSTANCE1;
- adev->sdma.ecc_irq.num_types = AMDGPU_SDMA_IRQ_INSTANCE1;
- break;
+ case 5:
case 8:
- adev->sdma.trap_irq.num_types = AMDGPU_SDMA_IRQ_LAST;
- adev->sdma.ecc_irq.num_types = AMDGPU_SDMA_IRQ_LAST;
+ adev->sdma.vm_hole_irq.num_types = adev->sdma.num_instances;
+ adev->sdma.doorbell_invalid_irq.num_types = adev->sdma.num_instances;
+ adev->sdma.pool_timeout_irq.num_types = adev->sdma.num_instances;
+ adev->sdma.srbm_write_irq.num_types = adev->sdma.num_instances;
break;
- case 2:
default:
- adev->sdma.trap_irq.num_types = AMDGPU_SDMA_IRQ_INSTANCE2;
- adev->sdma.ecc_irq.num_types = AMDGPU_SDMA_IRQ_INSTANCE2;
break;
}
adev->sdma.trap_irq.funcs = &sdma_v4_0_trap_irq_funcs;
adev->sdma.illegal_inst_irq.funcs = &sdma_v4_0_illegal_inst_irq_funcs;
adev->sdma.ecc_irq.funcs = &sdma_v4_0_ecc_irq_funcs;
+ adev->sdma.vm_hole_irq.funcs = &sdma_v4_0_vm_hole_irq_funcs;
+ adev->sdma.doorbell_invalid_irq.funcs = &sdma_v4_0_doorbell_invalid_irq_funcs;
+ adev->sdma.pool_timeout_irq.funcs = &sdma_v4_0_pool_timeout_irq_funcs;
+ adev->sdma.srbm_write_irq.funcs = &sdma_v4_0_srbm_write_irq_funcs;
}
/**
@@ -2496,7 +2551,7 @@ static void sdma_v4_0_set_irq_funcs(struct amdgpu_device *adev)
* @src_offset: src GPU address
* @dst_offset: dst GPU address
* @byte_count: number of bytes to xfer
- * @tmz: if a secure copy should be used
+ * @copy_flags: copy flags for the buffers
*
* Copy GPU buffers using the DMA engine (VEGA10/12).
* Used by the amdgpu ttm implementation to move pages if
@@ -2506,11 +2561,11 @@ static void sdma_v4_0_emit_copy_buffer(struct amdgpu_ib *ib,
uint64_t src_offset,
uint64_t dst_offset,
uint32_t byte_count,
- bool tmz)
+ uint32_t copy_flags)
{
ib->ptr[ib->length_dw++] = SDMA_PKT_HEADER_OP(SDMA_OP_COPY) |
SDMA_PKT_HEADER_SUB_OP(SDMA_SUBOP_COPY_LINEAR) |
- SDMA_PKT_COPY_LINEAR_HEADER_TMZ(tmz ? 1 : 0);
+ SDMA_PKT_COPY_LINEAR_HEADER_TMZ((copy_flags & AMDGPU_COPY_FLAGS_TMZ) ? 1 : 0);
ib->ptr[ib->length_dw++] = byte_count - 1;
ib->ptr[ib->length_dw++] = 0; /* src/dst endian swap */
ib->ptr[ib->length_dw++] = lower_32_bits(src_offset);
@@ -2608,7 +2663,7 @@ static void sdma_v4_0_get_ras_error_count(uint32_t value,
}
}
-static int sdma_v4_0_query_ras_error_count(struct amdgpu_device *adev,
+static int sdma_v4_0_query_ras_error_count_by_instance(struct amdgpu_device *adev,
uint32_t instance, void *ras_error_status)
{
struct ras_err_data *err_data = (struct ras_err_data *)ras_error_status;
@@ -2630,6 +2685,18 @@ static int sdma_v4_0_query_ras_error_count(struct amdgpu_device *adev,
return 0;
};
+static void sdma_v4_0_query_ras_error_count(struct amdgpu_device *adev, void *ras_error_status)
+{
+ int i = 0;
+
+ for (i = 0; i < adev->sdma.num_instances; i++) {
+ if (sdma_v4_0_query_ras_error_count_by_instance(adev, i, ras_error_status)) {
+ dev_err(adev->dev, "Query ras error count failed in SDMA%d\n", i);
+ return;
+ }
+ }
+}
+
static void sdma_v4_0_reset_ras_error_count(struct amdgpu_device *adev)
{
int i;
@@ -2641,19 +2708,27 @@ static void sdma_v4_0_reset_ras_error_count(struct amdgpu_device *adev)
}
}
-static const struct amdgpu_sdma_ras_funcs sdma_v4_0_ras_funcs = {
- .ras_late_init = amdgpu_sdma_ras_late_init,
- .ras_fini = amdgpu_sdma_ras_fini,
+const struct amdgpu_ras_block_hw_ops sdma_v4_0_ras_hw_ops = {
.query_ras_error_count = sdma_v4_0_query_ras_error_count,
.reset_ras_error_count = sdma_v4_0_reset_ras_error_count,
};
+static struct amdgpu_sdma_ras sdma_v4_0_ras = {
+ .ras_block = {
+ .hw_ops = &sdma_v4_0_ras_hw_ops,
+ .ras_cb = sdma_v4_0_process_ras_data_cb,
+ },
+};
+
static void sdma_v4_0_set_ras_funcs(struct amdgpu_device *adev)
{
- switch (adev->asic_type) {
- case CHIP_VEGA20:
- case CHIP_ARCTURUS:
- adev->sdma.funcs = &sdma_v4_0_ras_funcs;
+ switch (amdgpu_ip_version(adev, SDMA0_HWIP, 0)) {
+ case IP_VERSION(4, 2, 0):
+ case IP_VERSION(4, 2, 2):
+ adev->sdma.ras = &sdma_v4_0_ras;
+ break;
+ case IP_VERSION(4, 4, 0):
+ adev->sdma.ras = &sdma_v4_4_ras;
break;
default:
break;
diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v4_4.c b/drivers/gpu/drm/amd/amdgpu/sdma_v4_4.c
new file mode 100644
index 000000000000..0ddb6955a6d3
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/sdma_v4_4.c
@@ -0,0 +1,274 @@
+/*
+ * Copyright 2020 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+#include "amdgpu.h"
+#include "sdma/sdma_4_4_0_offset.h"
+#include "sdma/sdma_4_4_0_sh_mask.h"
+#include "soc15.h"
+#include "amdgpu_ras.h"
+
+#define SDMA1_REG_OFFSET 0x600
+#define SDMA2_REG_OFFSET 0x1cda0
+#define SDMA3_REG_OFFSET 0x1d1a0
+#define SDMA4_REG_OFFSET 0x1d5a0
+
+/* helper function that allow only use sdma0 register offset
+ * to calculate register offset for all the sdma instances */
+static uint32_t sdma_v4_4_get_reg_offset(struct amdgpu_device *adev,
+ uint32_t instance,
+ uint32_t offset)
+{
+ uint32_t sdma_base = adev->reg_offset[SDMA0_HWIP][0][0];
+
+ switch (instance) {
+ case 0:
+ return (sdma_base + offset);
+ case 1:
+ return (sdma_base + SDMA1_REG_OFFSET + offset);
+ case 2:
+ return (sdma_base + SDMA2_REG_OFFSET + offset);
+ case 3:
+ return (sdma_base + SDMA3_REG_OFFSET + offset);
+ case 4:
+ return (sdma_base + SDMA4_REG_OFFSET + offset);
+ default:
+ break;
+ }
+ return 0;
+}
+
+static const struct soc15_ras_field_entry sdma_v4_4_ras_fields[] = {
+ { "SDMA_MBANK_DATA_BUF0_SED", SOC15_REG_ENTRY(SDMA0, 0, regSDMA0_EDC_COUNTER),
+ SOC15_REG_FIELD(SDMA0_EDC_COUNTER, SDMA_MBANK_DATA_BUF0_SED),
+ 0, 0,
+ },
+ { "SDMA_MBANK_DATA_BUF1_SED", SOC15_REG_ENTRY(SDMA0, 0, regSDMA0_EDC_COUNTER),
+ SOC15_REG_FIELD(SDMA0_EDC_COUNTER, SDMA_MBANK_DATA_BUF1_SED),
+ 0, 0,
+ },
+ { "SDMA_MBANK_DATA_BUF2_SED", SOC15_REG_ENTRY(SDMA0, 0, regSDMA0_EDC_COUNTER),
+ SOC15_REG_FIELD(SDMA0_EDC_COUNTER, SDMA_MBANK_DATA_BUF2_SED),
+ 0, 0,
+ },
+ { "SDMA_MBANK_DATA_BUF3_SED", SOC15_REG_ENTRY(SDMA0, 0, regSDMA0_EDC_COUNTER),
+ SOC15_REG_FIELD(SDMA0_EDC_COUNTER, SDMA_MBANK_DATA_BUF3_SED),
+ 0, 0,
+ },
+ { "SDMA_MBANK_DATA_BUF4_SED", SOC15_REG_ENTRY(SDMA0, 0, regSDMA0_EDC_COUNTER),
+ SOC15_REG_FIELD(SDMA0_EDC_COUNTER, SDMA_MBANK_DATA_BUF4_SED),
+ 0, 0,
+ },
+ { "SDMA_MBANK_DATA_BUF5_SED", SOC15_REG_ENTRY(SDMA0, 0, regSDMA0_EDC_COUNTER),
+ SOC15_REG_FIELD(SDMA0_EDC_COUNTER, SDMA_MBANK_DATA_BUF5_SED),
+ 0, 0,
+ },
+ { "SDMA_MBANK_DATA_BUF6_SED", SOC15_REG_ENTRY(SDMA0, 0, regSDMA0_EDC_COUNTER),
+ SOC15_REG_FIELD(SDMA0_EDC_COUNTER, SDMA_MBANK_DATA_BUF6_SED),
+ 0, 0,
+ },
+ { "SDMA_MBANK_DATA_BUF7_SED", SOC15_REG_ENTRY(SDMA0, 0, regSDMA0_EDC_COUNTER),
+ SOC15_REG_FIELD(SDMA0_EDC_COUNTER, SDMA_MBANK_DATA_BUF7_SED),
+ 0, 0,
+ },
+ { "SDMA_MBANK_DATA_BUF8_SED", SOC15_REG_ENTRY(SDMA0, 0, regSDMA0_EDC_COUNTER),
+ SOC15_REG_FIELD(SDMA0_EDC_COUNTER, SDMA_MBANK_DATA_BUF8_SED),
+ 0, 0,
+ },
+ { "SDMA_MBANK_DATA_BUF9_SED", SOC15_REG_ENTRY(SDMA0, 0, regSDMA0_EDC_COUNTER),
+ SOC15_REG_FIELD(SDMA0_EDC_COUNTER, SDMA_MBANK_DATA_BUF9_SED),
+ 0, 0,
+ },
+ { "SDMA_MBANK_DATA_BUF10_SED", SOC15_REG_ENTRY(SDMA0, 0, regSDMA0_EDC_COUNTER),
+ SOC15_REG_FIELD(SDMA0_EDC_COUNTER, SDMA_MBANK_DATA_BUF10_SED),
+ 0, 0,
+ },
+ { "SDMA_MBANK_DATA_BUF11_SED", SOC15_REG_ENTRY(SDMA0, 0, regSDMA0_EDC_COUNTER),
+ SOC15_REG_FIELD(SDMA0_EDC_COUNTER, SDMA_MBANK_DATA_BUF11_SED),
+ 0, 0,
+ },
+ { "SDMA_MBANK_DATA_BUF12_SED", SOC15_REG_ENTRY(SDMA0, 0, regSDMA0_EDC_COUNTER),
+ SOC15_REG_FIELD(SDMA0_EDC_COUNTER, SDMA_MBANK_DATA_BUF12_SED),
+ 0, 0,
+ },
+ { "SDMA_MBANK_DATA_BUF13_SED", SOC15_REG_ENTRY(SDMA0, 0, regSDMA0_EDC_COUNTER),
+ SOC15_REG_FIELD(SDMA0_EDC_COUNTER, SDMA_MBANK_DATA_BUF13_SED),
+ 0, 0,
+ },
+ { "SDMA_MBANK_DATA_BUF14_SED", SOC15_REG_ENTRY(SDMA0, 0, regSDMA0_EDC_COUNTER),
+ SOC15_REG_FIELD(SDMA0_EDC_COUNTER, SDMA_MBANK_DATA_BUF14_SED),
+ 0, 0,
+ },
+ { "SDMA_MBANK_DATA_BUF15_SED", SOC15_REG_ENTRY(SDMA0, 0, regSDMA0_EDC_COUNTER),
+ SOC15_REG_FIELD(SDMA0_EDC_COUNTER, SDMA_MBANK_DATA_BUF15_SED),
+ 0, 0,
+ },
+ { "SDMA_UCODE_BUF_SED", SOC15_REG_ENTRY(SDMA0, 0, regSDMA0_EDC_COUNTER2),
+ SOC15_REG_FIELD(SDMA0_EDC_COUNTER2, SDMA_UCODE_BUF_SED),
+ 0, 0,
+ },
+ { "SDMA_RB_CMD_BUF_SED", SOC15_REG_ENTRY(SDMA0, 0, regSDMA0_EDC_COUNTER2),
+ SOC15_REG_FIELD(SDMA0_EDC_COUNTER2, SDMA_RB_CMD_BUF_SED),
+ 0, 0,
+ },
+ { "SDMA_IB_CMD_BUF_SED", SOC15_REG_ENTRY(SDMA0, 0, regSDMA0_EDC_COUNTER2),
+ SOC15_REG_FIELD(SDMA0_EDC_COUNTER2, SDMA_IB_CMD_BUF_SED),
+ 0, 0,
+ },
+ { "SDMA_UTCL1_RD_FIFO_SED", SOC15_REG_ENTRY(SDMA0, 0, regSDMA0_EDC_COUNTER2),
+ SOC15_REG_FIELD(SDMA0_EDC_COUNTER2, SDMA_UTCL1_RD_FIFO_SED),
+ 0, 0,
+ },
+ { "SDMA_UTCL1_RDBST_FIFO_SED", SOC15_REG_ENTRY(SDMA0, 0, regSDMA0_EDC_COUNTER2),
+ SOC15_REG_FIELD(SDMA0_EDC_COUNTER2, SDMA_UTCL1_RDBST_FIFO_SED),
+ 0, 0,
+ },
+ { "SDMA_UTCL1_WR_FIFO_SED", SOC15_REG_ENTRY(SDMA0, 0, regSDMA0_EDC_COUNTER2),
+ SOC15_REG_FIELD(SDMA0_EDC_COUNTER2, SDMA_UTCL1_WR_FIFO_SED),
+ 0, 0,
+ },
+ { "SDMA_DATA_LUT_FIFO_SED", SOC15_REG_ENTRY(SDMA0, 0, regSDMA0_EDC_COUNTER2),
+ SOC15_REG_FIELD(SDMA0_EDC_COUNTER2, SDMA_DATA_LUT_FIFO_SED),
+ 0, 0,
+ },
+ { "SDMA_SPLIT_DATA_BUF_SED", SOC15_REG_ENTRY(SDMA0, 0, regSDMA0_EDC_COUNTER2),
+ SOC15_REG_FIELD(SDMA0_EDC_COUNTER2, SDMA_SPLIT_DATA_BUF_SED),
+ 0, 0,
+ },
+ { "SDMA_MC_WR_ADDR_FIFO_SED", SOC15_REG_ENTRY(SDMA0, 0, regSDMA0_EDC_COUNTER2),
+ SOC15_REG_FIELD(SDMA0_EDC_COUNTER2, SDMA_MC_WR_ADDR_FIFO_SED),
+ 0, 0,
+ },
+ { "SDMA_MC_RDRET_BUF_SED", SOC15_REG_ENTRY(SDMA0, 0, regSDMA0_EDC_COUNTER2),
+ SOC15_REG_FIELD(SDMA0_EDC_COUNTER2, SDMA_MC_WR_ADDR_FIFO_SED),
+ 0, 0,
+ },
+};
+
+static void sdma_v4_4_get_ras_error_count(struct amdgpu_device *adev,
+ uint32_t reg_offset,
+ uint32_t value,
+ uint32_t instance,
+ uint32_t *sec_count)
+{
+ uint32_t i;
+ uint32_t sec_cnt;
+
+ /* double bits error (multiple bits) error detection is not supported */
+ for (i = 0; i < ARRAY_SIZE(sdma_v4_4_ras_fields); i++) {
+ if (sdma_v4_4_ras_fields[i].reg_offset != reg_offset)
+ continue;
+
+ /* the SDMA_EDC_COUNTER register in each sdma instance
+ * shares the same sed shift_mask
+ * */
+ sec_cnt = (value &
+ sdma_v4_4_ras_fields[i].sec_count_mask) >>
+ sdma_v4_4_ras_fields[i].sec_count_shift;
+ if (sec_cnt) {
+ dev_info(adev->dev, "Detected %s in SDMA%d, SED %d\n",
+ sdma_v4_4_ras_fields[i].name,
+ instance, sec_cnt);
+ *sec_count += sec_cnt;
+ }
+ }
+}
+
+static int sdma_v4_4_query_ras_error_count_by_instance(struct amdgpu_device *adev,
+ uint32_t instance,
+ void *ras_error_status)
+{
+ struct ras_err_data *err_data = (struct ras_err_data *)ras_error_status;
+ uint32_t sec_count = 0;
+ uint32_t reg_value = 0;
+ uint32_t reg_offset = 0;
+
+ reg_offset = sdma_v4_4_get_reg_offset(adev, instance, regSDMA0_EDC_COUNTER);
+ reg_value = RREG32(reg_offset);
+ /* double bit error is not supported */
+ if (reg_value)
+ sdma_v4_4_get_ras_error_count(adev, regSDMA0_EDC_COUNTER, reg_value,
+ instance, &sec_count);
+
+ reg_offset = sdma_v4_4_get_reg_offset(adev, instance, regSDMA0_EDC_COUNTER2);
+ reg_value = RREG32(reg_offset);
+ /* double bit error is not supported */
+ if (reg_value)
+ sdma_v4_4_get_ras_error_count(adev, regSDMA0_EDC_COUNTER2, reg_value,
+ instance, &sec_count);
+
+ /*
+ * err_data->ue_count should be initialized to 0
+ * before calling into this function
+ *
+ * SDMA RAS supports single bit uncorrectable error detection.
+ * So, increment uncorrectable error count.
+ */
+ err_data->ue_count += sec_count;
+
+ /*
+ * SDMA RAS does not support correctable errors.
+ * Set ce count to 0.
+ */
+ err_data->ce_count = 0;
+
+ return 0;
+};
+
+static void sdma_v4_4_reset_ras_error_count(struct amdgpu_device *adev)
+{
+ int i;
+ uint32_t reg_offset;
+
+ /* write 0 to EDC_COUNTER reg to clear sdma edc counters */
+ if (amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__SDMA)) {
+ for (i = 0; i < adev->sdma.num_instances; i++) {
+ reg_offset = sdma_v4_4_get_reg_offset(adev, i, regSDMA0_EDC_COUNTER);
+ WREG32(reg_offset, 0);
+ reg_offset = sdma_v4_4_get_reg_offset(adev, i, regSDMA0_EDC_COUNTER2);
+ WREG32(reg_offset, 0);
+ }
+ }
+}
+
+static void sdma_v4_4_query_ras_error_count(struct amdgpu_device *adev, void *ras_error_status)
+{
+ int i = 0;
+
+ for (i = 0; i < adev->sdma.num_instances; i++) {
+ if (sdma_v4_4_query_ras_error_count_by_instance(adev, i, ras_error_status)) {
+ dev_err(adev->dev, "Query ras error count failed in SDMA%d\n", i);
+ return;
+ }
+ }
+
+}
+
+const struct amdgpu_ras_block_hw_ops sdma_v4_4_ras_hw_ops = {
+ .query_ras_error_count = sdma_v4_4_query_ras_error_count,
+ .reset_ras_error_count = sdma_v4_4_reset_ras_error_count,
+};
+
+struct amdgpu_sdma_ras sdma_v4_4_ras = {
+ .ras_block = {
+ .hw_ops = &sdma_v4_4_ras_hw_ops,
+ },
+};
diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v4_4.h b/drivers/gpu/drm/amd/amdgpu/sdma_v4_4.h
new file mode 100644
index 000000000000..a9f0c68359e0
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/sdma_v4_4.h
@@ -0,0 +1,28 @@
+/*
+ * Copyright 2020 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+#ifndef __SDMA_V4_4_H__
+#define __SDMA_V4_4_H__
+
+extern struct amdgpu_sdma_ras sdma_v4_4_ras;
+
+#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c b/drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c
new file mode 100644
index 000000000000..a1443990d5c6
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c
@@ -0,0 +1,2613 @@
+/*
+ * Copyright 2022 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#include <linux/delay.h>
+#include <linux/firmware.h>
+#include <linux/module.h>
+#include <linux/pci.h>
+
+#include "amdgpu.h"
+#include "amdgpu_xcp.h"
+#include "amdgpu_ucode.h"
+#include "amdgpu_trace.h"
+#include "amdgpu_reset.h"
+
+#include "sdma/sdma_4_4_2_offset.h"
+#include "sdma/sdma_4_4_2_sh_mask.h"
+
+#include "soc15_common.h"
+#include "soc15.h"
+#include "vega10_sdma_pkt_open.h"
+
+#include "ivsrcid/sdma0/irqsrcs_sdma0_4_0.h"
+#include "ivsrcid/sdma1/irqsrcs_sdma1_4_0.h"
+
+#include "amdgpu_ras.h"
+
+MODULE_FIRMWARE("amdgpu/sdma_4_4_2.bin");
+MODULE_FIRMWARE("amdgpu/sdma_4_4_4.bin");
+MODULE_FIRMWARE("amdgpu/sdma_4_4_5.bin");
+
+static const struct amdgpu_hwip_reg_entry sdma_reg_list_4_4_2[] = {
+ SOC15_REG_ENTRY_STR(GC, 0, regSDMA_STATUS_REG),
+ SOC15_REG_ENTRY_STR(GC, 0, regSDMA_STATUS1_REG),
+ SOC15_REG_ENTRY_STR(GC, 0, regSDMA_STATUS2_REG),
+ SOC15_REG_ENTRY_STR(GC, 0, regSDMA_STATUS3_REG),
+ SOC15_REG_ENTRY_STR(GC, 0, regSDMA_UCODE_CHECKSUM),
+ SOC15_REG_ENTRY_STR(GC, 0, regSDMA_RB_RPTR_FETCH_HI),
+ SOC15_REG_ENTRY_STR(GC, 0, regSDMA_RB_RPTR_FETCH),
+ SOC15_REG_ENTRY_STR(GC, 0, regSDMA_UTCL1_RD_STATUS),
+ SOC15_REG_ENTRY_STR(GC, 0, regSDMA_UTCL1_WR_STATUS),
+ SOC15_REG_ENTRY_STR(GC, 0, regSDMA_UTCL1_RD_XNACK0),
+ SOC15_REG_ENTRY_STR(GC, 0, regSDMA_UTCL1_RD_XNACK1),
+ SOC15_REG_ENTRY_STR(GC, 0, regSDMA_UTCL1_WR_XNACK0),
+ SOC15_REG_ENTRY_STR(GC, 0, regSDMA_UTCL1_WR_XNACK1),
+ SOC15_REG_ENTRY_STR(GC, 0, regSDMA_GFX_RB_CNTL),
+ SOC15_REG_ENTRY_STR(GC, 0, regSDMA_GFX_RB_RPTR),
+ SOC15_REG_ENTRY_STR(GC, 0, regSDMA_GFX_RB_RPTR_HI),
+ SOC15_REG_ENTRY_STR(GC, 0, regSDMA_GFX_RB_WPTR),
+ SOC15_REG_ENTRY_STR(GC, 0, regSDMA_GFX_RB_WPTR_HI),
+ SOC15_REG_ENTRY_STR(GC, 0, regSDMA_GFX_IB_OFFSET),
+ SOC15_REG_ENTRY_STR(GC, 0, regSDMA_GFX_IB_BASE_LO),
+ SOC15_REG_ENTRY_STR(GC, 0, regSDMA_GFX_IB_BASE_HI),
+ SOC15_REG_ENTRY_STR(GC, 0, regSDMA_GFX_IB_CNTL),
+ SOC15_REG_ENTRY_STR(GC, 0, regSDMA_GFX_IB_RPTR),
+ SOC15_REG_ENTRY_STR(GC, 0, regSDMA_GFX_IB_SUB_REMAIN),
+ SOC15_REG_ENTRY_STR(GC, 0, regSDMA_GFX_DUMMY_REG),
+ SOC15_REG_ENTRY_STR(GC, 0, regSDMA_PAGE_RB_CNTL),
+ SOC15_REG_ENTRY_STR(GC, 0, regSDMA_PAGE_RB_RPTR),
+ SOC15_REG_ENTRY_STR(GC, 0, regSDMA_PAGE_RB_RPTR_HI),
+ SOC15_REG_ENTRY_STR(GC, 0, regSDMA_PAGE_RB_WPTR),
+ SOC15_REG_ENTRY_STR(GC, 0, regSDMA_PAGE_RB_WPTR_HI),
+ SOC15_REG_ENTRY_STR(GC, 0, regSDMA_PAGE_IB_OFFSET),
+ SOC15_REG_ENTRY_STR(GC, 0, regSDMA_PAGE_IB_BASE_LO),
+ SOC15_REG_ENTRY_STR(GC, 0, regSDMA_PAGE_IB_BASE_HI),
+ SOC15_REG_ENTRY_STR(GC, 0, regSDMA_PAGE_DUMMY_REG),
+ SOC15_REG_ENTRY_STR(GC, 0, regSDMA_RLC0_RB_CNTL),
+ SOC15_REG_ENTRY_STR(GC, 0, regSDMA_RLC0_RB_RPTR),
+ SOC15_REG_ENTRY_STR(GC, 0, regSDMA_RLC0_RB_RPTR_HI),
+ SOC15_REG_ENTRY_STR(GC, 0, regSDMA_RLC0_RB_WPTR),
+ SOC15_REG_ENTRY_STR(GC, 0, regSDMA_RLC0_RB_WPTR_HI),
+ SOC15_REG_ENTRY_STR(GC, 0, regSDMA_RLC0_IB_OFFSET),
+ SOC15_REG_ENTRY_STR(GC, 0, regSDMA_RLC0_IB_BASE_LO),
+ SOC15_REG_ENTRY_STR(GC, 0, regSDMA_RLC0_IB_BASE_HI),
+ SOC15_REG_ENTRY_STR(GC, 0, regSDMA_RLC0_DUMMY_REG),
+ SOC15_REG_ENTRY_STR(GC, 0, regSDMA_VM_CNTL)
+};
+
+#define mmSMNAID_AID0_MCA_SMU 0x03b30400
+
+#define WREG32_SDMA(instance, offset, value) \
+ WREG32(sdma_v4_4_2_get_reg_offset(adev, (instance), (offset)), value)
+#define RREG32_SDMA(instance, offset) \
+ RREG32(sdma_v4_4_2_get_reg_offset(adev, (instance), (offset)))
+
+static void sdma_v4_4_2_set_ring_funcs(struct amdgpu_device *adev);
+static void sdma_v4_4_2_set_buffer_funcs(struct amdgpu_device *adev);
+static void sdma_v4_4_2_set_vm_pte_funcs(struct amdgpu_device *adev);
+static void sdma_v4_4_2_set_irq_funcs(struct amdgpu_device *adev);
+static void sdma_v4_4_2_set_ras_funcs(struct amdgpu_device *adev);
+static void sdma_v4_4_2_update_reset_mask(struct amdgpu_device *adev);
+static int sdma_v4_4_2_stop_queue(struct amdgpu_ring *ring);
+static int sdma_v4_4_2_restore_queue(struct amdgpu_ring *ring);
+static int sdma_v4_4_2_soft_reset_engine(struct amdgpu_device *adev,
+ u32 instance_id);
+
+static u32 sdma_v4_4_2_get_reg_offset(struct amdgpu_device *adev,
+ u32 instance, u32 offset)
+{
+ u32 dev_inst = GET_INST(SDMA0, instance);
+
+ return (adev->reg_offset[SDMA0_HWIP][dev_inst][0] + offset);
+}
+
+static unsigned sdma_v4_4_2_seq_to_irq_id(int seq_num)
+{
+ switch (seq_num) {
+ case 0:
+ return SOC15_IH_CLIENTID_SDMA0;
+ case 1:
+ return SOC15_IH_CLIENTID_SDMA1;
+ case 2:
+ return SOC15_IH_CLIENTID_SDMA2;
+ case 3:
+ return SOC15_IH_CLIENTID_SDMA3;
+ default:
+ return -EINVAL;
+ }
+}
+
+static int sdma_v4_4_2_irq_id_to_seq(struct amdgpu_device *adev, unsigned client_id)
+{
+ switch (client_id) {
+ case SOC15_IH_CLIENTID_SDMA0:
+ return 0;
+ case SOC15_IH_CLIENTID_SDMA1:
+ return 1;
+ case SOC15_IH_CLIENTID_SDMA2:
+ if (amdgpu_sriov_vf(adev) && (adev->gfx.xcc_mask == 0x1))
+ return 0;
+ else
+ return 2;
+ case SOC15_IH_CLIENTID_SDMA3:
+ if (amdgpu_sriov_vf(adev) && (adev->gfx.xcc_mask == 0x1))
+ return 1;
+ else
+ return 3;
+ default:
+ return -EINVAL;
+ }
+}
+
+static void sdma_v4_4_2_inst_init_golden_registers(struct amdgpu_device *adev,
+ uint32_t inst_mask)
+{
+ u32 val;
+ int i;
+
+ for (i = 0; i < adev->sdma.num_instances; i++) {
+ val = RREG32_SDMA(i, regSDMA_GB_ADDR_CONFIG);
+ val = REG_SET_FIELD(val, SDMA_GB_ADDR_CONFIG, NUM_BANKS, 4);
+ val = REG_SET_FIELD(val, SDMA_GB_ADDR_CONFIG,
+ PIPE_INTERLEAVE_SIZE, 0);
+ WREG32_SDMA(i, regSDMA_GB_ADDR_CONFIG, val);
+
+ val = RREG32_SDMA(i, regSDMA_GB_ADDR_CONFIG_READ);
+ val = REG_SET_FIELD(val, SDMA_GB_ADDR_CONFIG_READ, NUM_BANKS,
+ 4);
+ val = REG_SET_FIELD(val, SDMA_GB_ADDR_CONFIG_READ,
+ PIPE_INTERLEAVE_SIZE, 0);
+ WREG32_SDMA(i, regSDMA_GB_ADDR_CONFIG_READ, val);
+ }
+}
+
+/**
+ * sdma_v4_4_2_init_microcode - load ucode images from disk
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * Use the firmware interface to load the ucode images into
+ * the driver (not loaded into hw).
+ * Returns 0 on success, error on failure.
+ */
+static int sdma_v4_4_2_init_microcode(struct amdgpu_device *adev)
+{
+ int ret, i;
+
+ for (i = 0; i < adev->sdma.num_instances; i++) {
+ if (amdgpu_ip_version(adev, SDMA0_HWIP, 0) == IP_VERSION(4, 4, 2) ||
+ amdgpu_ip_version(adev, SDMA0_HWIP, 0) == IP_VERSION(4, 4, 4) ||
+ amdgpu_ip_version(adev, SDMA0_HWIP, 0) == IP_VERSION(4, 4, 5)) {
+ ret = amdgpu_sdma_init_microcode(adev, 0, true);
+ break;
+ } else {
+ ret = amdgpu_sdma_init_microcode(adev, i, false);
+ if (ret)
+ return ret;
+ }
+ }
+
+ return ret;
+}
+
+/**
+ * sdma_v4_4_2_ring_get_rptr - get the current read pointer
+ *
+ * @ring: amdgpu ring pointer
+ *
+ * Get the current rptr from the hardware.
+ */
+static uint64_t sdma_v4_4_2_ring_get_rptr(struct amdgpu_ring *ring)
+{
+ u64 rptr;
+
+ /* XXX check if swapping is necessary on BE */
+ rptr = READ_ONCE(*((u64 *)&ring->adev->wb.wb[ring->rptr_offs]));
+
+ DRM_DEBUG("rptr before shift == 0x%016llx\n", rptr);
+ return rptr >> 2;
+}
+
+/**
+ * sdma_v4_4_2_ring_get_wptr - get the current write pointer
+ *
+ * @ring: amdgpu ring pointer
+ *
+ * Get the current wptr from the hardware.
+ */
+static uint64_t sdma_v4_4_2_ring_get_wptr(struct amdgpu_ring *ring)
+{
+ struct amdgpu_device *adev = ring->adev;
+ u64 wptr;
+
+ if (ring->use_doorbell) {
+ /* XXX check if swapping is necessary on BE */
+ wptr = READ_ONCE(*((u64 *)&adev->wb.wb[ring->wptr_offs]));
+ DRM_DEBUG("wptr/doorbell before shift == 0x%016llx\n", wptr);
+ } else {
+ wptr = RREG32_SDMA(ring->me, regSDMA_GFX_RB_WPTR_HI);
+ wptr = wptr << 32;
+ wptr |= RREG32_SDMA(ring->me, regSDMA_GFX_RB_WPTR);
+ DRM_DEBUG("wptr before shift [%i] wptr == 0x%016llx\n",
+ ring->me, wptr);
+ }
+
+ return wptr >> 2;
+}
+
+/**
+ * sdma_v4_4_2_ring_set_wptr - commit the write pointer
+ *
+ * @ring: amdgpu ring pointer
+ *
+ * Write the wptr back to the hardware.
+ */
+static void sdma_v4_4_2_ring_set_wptr(struct amdgpu_ring *ring)
+{
+ struct amdgpu_device *adev = ring->adev;
+
+ DRM_DEBUG("Setting write pointer\n");
+ if (ring->use_doorbell) {
+ u64 *wb = (u64 *)&adev->wb.wb[ring->wptr_offs];
+
+ DRM_DEBUG("Using doorbell -- "
+ "wptr_offs == 0x%08x "
+ "lower_32_bits(ring->wptr) << 2 == 0x%08x "
+ "upper_32_bits(ring->wptr) << 2 == 0x%08x\n",
+ ring->wptr_offs,
+ lower_32_bits(ring->wptr << 2),
+ upper_32_bits(ring->wptr << 2));
+ /* XXX check if swapping is necessary on BE */
+ WRITE_ONCE(*wb, (ring->wptr << 2));
+ DRM_DEBUG("calling WDOORBELL64(0x%08x, 0x%016llx)\n",
+ ring->doorbell_index, ring->wptr << 2);
+ WDOORBELL64(ring->doorbell_index, ring->wptr << 2);
+ } else {
+ DRM_DEBUG("Not using doorbell -- "
+ "regSDMA%i_GFX_RB_WPTR == 0x%08x "
+ "regSDMA%i_GFX_RB_WPTR_HI == 0x%08x\n",
+ ring->me,
+ lower_32_bits(ring->wptr << 2),
+ ring->me,
+ upper_32_bits(ring->wptr << 2));
+ WREG32_SDMA(ring->me, regSDMA_GFX_RB_WPTR,
+ lower_32_bits(ring->wptr << 2));
+ WREG32_SDMA(ring->me, regSDMA_GFX_RB_WPTR_HI,
+ upper_32_bits(ring->wptr << 2));
+ }
+}
+
+/**
+ * sdma_v4_4_2_page_ring_get_wptr - get the current write pointer
+ *
+ * @ring: amdgpu ring pointer
+ *
+ * Get the current wptr from the hardware.
+ */
+static uint64_t sdma_v4_4_2_page_ring_get_wptr(struct amdgpu_ring *ring)
+{
+ struct amdgpu_device *adev = ring->adev;
+ u64 wptr;
+
+ if (ring->use_doorbell) {
+ /* XXX check if swapping is necessary on BE */
+ wptr = READ_ONCE(*((u64 *)&adev->wb.wb[ring->wptr_offs]));
+ } else {
+ wptr = RREG32_SDMA(ring->me, regSDMA_PAGE_RB_WPTR_HI);
+ wptr = wptr << 32;
+ wptr |= RREG32_SDMA(ring->me, regSDMA_PAGE_RB_WPTR);
+ }
+
+ return wptr >> 2;
+}
+
+/**
+ * sdma_v4_4_2_page_ring_set_wptr - commit the write pointer
+ *
+ * @ring: amdgpu ring pointer
+ *
+ * Write the wptr back to the hardware.
+ */
+static void sdma_v4_4_2_page_ring_set_wptr(struct amdgpu_ring *ring)
+{
+ struct amdgpu_device *adev = ring->adev;
+
+ if (ring->use_doorbell) {
+ u64 *wb = (u64 *)&adev->wb.wb[ring->wptr_offs];
+
+ /* XXX check if swapping is necessary on BE */
+ WRITE_ONCE(*wb, (ring->wptr << 2));
+ WDOORBELL64(ring->doorbell_index, ring->wptr << 2);
+ } else {
+ uint64_t wptr = ring->wptr << 2;
+
+ WREG32_SDMA(ring->me, regSDMA_PAGE_RB_WPTR,
+ lower_32_bits(wptr));
+ WREG32_SDMA(ring->me, regSDMA_PAGE_RB_WPTR_HI,
+ upper_32_bits(wptr));
+ }
+}
+
+static void sdma_v4_4_2_ring_insert_nop(struct amdgpu_ring *ring, uint32_t count)
+{
+ struct amdgpu_sdma_instance *sdma = amdgpu_sdma_get_instance_from_ring(ring);
+ int i;
+
+ for (i = 0; i < count; i++)
+ if (sdma && sdma->burst_nop && (i == 0))
+ amdgpu_ring_write(ring, ring->funcs->nop |
+ SDMA_PKT_NOP_HEADER_COUNT(count - 1));
+ else
+ amdgpu_ring_write(ring, ring->funcs->nop);
+}
+
+/**
+ * sdma_v4_4_2_ring_emit_ib - Schedule an IB on the DMA engine
+ *
+ * @ring: amdgpu ring pointer
+ * @job: job to retrieve vmid from
+ * @ib: IB object to schedule
+ * @flags: unused
+ *
+ * Schedule an IB in the DMA ring.
+ */
+static void sdma_v4_4_2_ring_emit_ib(struct amdgpu_ring *ring,
+ struct amdgpu_job *job,
+ struct amdgpu_ib *ib,
+ uint32_t flags)
+{
+ unsigned vmid = AMDGPU_JOB_GET_VMID(job);
+
+ /* IB packet must end on a 8 DW boundary */
+ sdma_v4_4_2_ring_insert_nop(ring, (2 - lower_32_bits(ring->wptr)) & 7);
+
+ amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_INDIRECT) |
+ SDMA_PKT_INDIRECT_HEADER_VMID(vmid & 0xf));
+ /* base must be 32 byte aligned */
+ amdgpu_ring_write(ring, lower_32_bits(ib->gpu_addr) & 0xffffffe0);
+ amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr));
+ amdgpu_ring_write(ring, ib->length_dw);
+ amdgpu_ring_write(ring, 0);
+ amdgpu_ring_write(ring, 0);
+
+}
+
+static void sdma_v4_4_2_wait_reg_mem(struct amdgpu_ring *ring,
+ int mem_space, int hdp,
+ uint32_t addr0, uint32_t addr1,
+ uint32_t ref, uint32_t mask,
+ uint32_t inv)
+{
+ amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_POLL_REGMEM) |
+ SDMA_PKT_POLL_REGMEM_HEADER_HDP_FLUSH(hdp) |
+ SDMA_PKT_POLL_REGMEM_HEADER_MEM_POLL(mem_space) |
+ SDMA_PKT_POLL_REGMEM_HEADER_FUNC(3)); /* == */
+ if (mem_space) {
+ /* memory */
+ amdgpu_ring_write(ring, addr0);
+ amdgpu_ring_write(ring, addr1);
+ } else {
+ /* registers */
+ amdgpu_ring_write(ring, addr0 << 2);
+ amdgpu_ring_write(ring, addr1 << 2);
+ }
+ amdgpu_ring_write(ring, ref); /* reference */
+ amdgpu_ring_write(ring, mask); /* mask */
+ amdgpu_ring_write(ring, SDMA_PKT_POLL_REGMEM_DW5_RETRY_COUNT(0xfff) |
+ SDMA_PKT_POLL_REGMEM_DW5_INTERVAL(inv)); /* retry count, poll interval */
+}
+
+/**
+ * sdma_v4_4_2_ring_emit_hdp_flush - emit an hdp flush on the DMA ring
+ *
+ * @ring: amdgpu ring pointer
+ *
+ * Emit an hdp flush packet on the requested DMA ring.
+ */
+static void sdma_v4_4_2_ring_emit_hdp_flush(struct amdgpu_ring *ring)
+{
+ struct amdgpu_device *adev = ring->adev;
+ u32 ref_and_mask = 0;
+ const struct nbio_hdp_flush_reg *nbio_hf_reg = adev->nbio.hdp_flush_reg;
+
+ ref_and_mask = nbio_hf_reg->ref_and_mask_sdma0
+ << (ring->me % adev->sdma.num_inst_per_aid);
+
+ sdma_v4_4_2_wait_reg_mem(ring, 0, 1,
+ adev->nbio.funcs->get_hdp_flush_done_offset(adev),
+ adev->nbio.funcs->get_hdp_flush_req_offset(adev),
+ ref_and_mask, ref_and_mask, 10);
+}
+
+/**
+ * sdma_v4_4_2_ring_emit_fence - emit a fence on the DMA ring
+ *
+ * @ring: amdgpu ring pointer
+ * @addr: address
+ * @seq: sequence number
+ * @flags: fence related flags
+ *
+ * Add a DMA fence packet to the ring to write
+ * the fence seq number and DMA trap packet to generate
+ * an interrupt if needed.
+ */
+static void sdma_v4_4_2_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, u64 seq,
+ unsigned flags)
+{
+ bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT;
+ /* write the fence */
+ amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_FENCE));
+ /* zero in first two bits */
+ BUG_ON(addr & 0x3);
+ amdgpu_ring_write(ring, lower_32_bits(addr));
+ amdgpu_ring_write(ring, upper_32_bits(addr));
+ amdgpu_ring_write(ring, lower_32_bits(seq));
+
+ /* optionally write high bits as well */
+ if (write64bit) {
+ addr += 4;
+ amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_FENCE));
+ /* zero in first two bits */
+ BUG_ON(addr & 0x3);
+ amdgpu_ring_write(ring, lower_32_bits(addr));
+ amdgpu_ring_write(ring, upper_32_bits(addr));
+ amdgpu_ring_write(ring, upper_32_bits(seq));
+ }
+
+ /* generate an interrupt */
+ amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_TRAP));
+ amdgpu_ring_write(ring, SDMA_PKT_TRAP_INT_CONTEXT_INT_CONTEXT(0));
+}
+
+
+/**
+ * sdma_v4_4_2_inst_gfx_stop - stop the gfx async dma engines
+ *
+ * @adev: amdgpu_device pointer
+ * @inst_mask: mask of dma engine instances to be disabled
+ *
+ * Stop the gfx async dma ring buffers.
+ */
+static void sdma_v4_4_2_inst_gfx_stop(struct amdgpu_device *adev,
+ uint32_t inst_mask)
+{
+ struct amdgpu_ring *sdma[AMDGPU_MAX_SDMA_INSTANCES];
+ u32 doorbell_offset, doorbell;
+ u32 rb_cntl, ib_cntl, sdma_cntl;
+ int i;
+
+ for_each_inst(i, inst_mask) {
+ sdma[i] = &adev->sdma.instance[i].ring;
+
+ rb_cntl = RREG32_SDMA(i, regSDMA_GFX_RB_CNTL);
+ rb_cntl = REG_SET_FIELD(rb_cntl, SDMA_GFX_RB_CNTL, RB_ENABLE, 0);
+ WREG32_SDMA(i, regSDMA_GFX_RB_CNTL, rb_cntl);
+ ib_cntl = RREG32_SDMA(i, regSDMA_GFX_IB_CNTL);
+ ib_cntl = REG_SET_FIELD(ib_cntl, SDMA_GFX_IB_CNTL, IB_ENABLE, 0);
+ WREG32_SDMA(i, regSDMA_GFX_IB_CNTL, ib_cntl);
+ sdma_cntl = RREG32_SDMA(i, regSDMA_CNTL);
+ sdma_cntl = REG_SET_FIELD(sdma_cntl, SDMA_CNTL, UTC_L1_ENABLE, 0);
+ WREG32_SDMA(i, regSDMA_CNTL, sdma_cntl);
+
+ if (sdma[i]->use_doorbell) {
+ doorbell = RREG32_SDMA(i, regSDMA_GFX_DOORBELL);
+ doorbell_offset = RREG32_SDMA(i, regSDMA_GFX_DOORBELL_OFFSET);
+
+ doorbell = REG_SET_FIELD(doorbell, SDMA_GFX_DOORBELL, ENABLE, 0);
+ doorbell_offset = REG_SET_FIELD(doorbell_offset,
+ SDMA_GFX_DOORBELL_OFFSET,
+ OFFSET, 0);
+ WREG32_SDMA(i, regSDMA_GFX_DOORBELL, doorbell);
+ WREG32_SDMA(i, regSDMA_GFX_DOORBELL_OFFSET, doorbell_offset);
+ }
+ }
+}
+
+/**
+ * sdma_v4_4_2_inst_rlc_stop - stop the compute async dma engines
+ *
+ * @adev: amdgpu_device pointer
+ * @inst_mask: mask of dma engine instances to be disabled
+ *
+ * Stop the compute async dma queues.
+ */
+static void sdma_v4_4_2_inst_rlc_stop(struct amdgpu_device *adev,
+ uint32_t inst_mask)
+{
+ /* XXX todo */
+}
+
+/**
+ * sdma_v4_4_2_inst_page_stop - stop the page async dma engines
+ *
+ * @adev: amdgpu_device pointer
+ * @inst_mask: mask of dma engine instances to be disabled
+ *
+ * Stop the page async dma ring buffers.
+ */
+static void sdma_v4_4_2_inst_page_stop(struct amdgpu_device *adev,
+ uint32_t inst_mask)
+{
+ u32 rb_cntl, ib_cntl;
+ int i;
+
+ for_each_inst(i, inst_mask) {
+ rb_cntl = RREG32_SDMA(i, regSDMA_PAGE_RB_CNTL);
+ rb_cntl = REG_SET_FIELD(rb_cntl, SDMA_PAGE_RB_CNTL,
+ RB_ENABLE, 0);
+ WREG32_SDMA(i, regSDMA_PAGE_RB_CNTL, rb_cntl);
+ ib_cntl = RREG32_SDMA(i, regSDMA_PAGE_IB_CNTL);
+ ib_cntl = REG_SET_FIELD(ib_cntl, SDMA_PAGE_IB_CNTL,
+ IB_ENABLE, 0);
+ WREG32_SDMA(i, regSDMA_PAGE_IB_CNTL, ib_cntl);
+ }
+}
+
+/**
+ * sdma_v4_4_2_inst_ctx_switch_enable - stop the async dma engines context switch
+ *
+ * @adev: amdgpu_device pointer
+ * @enable: enable/disable the DMA MEs context switch.
+ * @inst_mask: mask of dma engine instances to be enabled
+ *
+ * Halt or unhalt the async dma engines context switch.
+ */
+static void sdma_v4_4_2_inst_ctx_switch_enable(struct amdgpu_device *adev,
+ bool enable, uint32_t inst_mask)
+{
+ u32 f32_cntl, phase_quantum = 0;
+ int i;
+
+ if (amdgpu_sdma_phase_quantum) {
+ unsigned value = amdgpu_sdma_phase_quantum;
+ unsigned unit = 0;
+
+ while (value > (SDMA_PHASE0_QUANTUM__VALUE_MASK >>
+ SDMA_PHASE0_QUANTUM__VALUE__SHIFT)) {
+ value = (value + 1) >> 1;
+ unit++;
+ }
+ if (unit > (SDMA_PHASE0_QUANTUM__UNIT_MASK >>
+ SDMA_PHASE0_QUANTUM__UNIT__SHIFT)) {
+ value = (SDMA_PHASE0_QUANTUM__VALUE_MASK >>
+ SDMA_PHASE0_QUANTUM__VALUE__SHIFT);
+ unit = (SDMA_PHASE0_QUANTUM__UNIT_MASK >>
+ SDMA_PHASE0_QUANTUM__UNIT__SHIFT);
+ WARN_ONCE(1,
+ "clamping sdma_phase_quantum to %uK clock cycles\n",
+ value << unit);
+ }
+ phase_quantum =
+ value << SDMA_PHASE0_QUANTUM__VALUE__SHIFT |
+ unit << SDMA_PHASE0_QUANTUM__UNIT__SHIFT;
+ }
+
+ for_each_inst(i, inst_mask) {
+ f32_cntl = RREG32_SDMA(i, regSDMA_CNTL);
+ f32_cntl = REG_SET_FIELD(f32_cntl, SDMA_CNTL,
+ AUTO_CTXSW_ENABLE, enable ? 1 : 0);
+ if (enable && amdgpu_sdma_phase_quantum) {
+ WREG32_SDMA(i, regSDMA_PHASE0_QUANTUM, phase_quantum);
+ WREG32_SDMA(i, regSDMA_PHASE1_QUANTUM, phase_quantum);
+ WREG32_SDMA(i, regSDMA_PHASE2_QUANTUM, phase_quantum);
+ }
+ WREG32_SDMA(i, regSDMA_CNTL, f32_cntl);
+
+ /* Extend page fault timeout to avoid interrupt storm */
+ WREG32_SDMA(i, regSDMA_UTCL1_TIMEOUT, 0x00800080);
+ }
+}
+
+/**
+ * sdma_v4_4_2_inst_enable - stop the async dma engines
+ *
+ * @adev: amdgpu_device pointer
+ * @enable: enable/disable the DMA MEs.
+ * @inst_mask: mask of dma engine instances to be enabled
+ *
+ * Halt or unhalt the async dma engines.
+ */
+static void sdma_v4_4_2_inst_enable(struct amdgpu_device *adev, bool enable,
+ uint32_t inst_mask)
+{
+ u32 f32_cntl;
+ int i;
+
+ if (!enable) {
+ sdma_v4_4_2_inst_gfx_stop(adev, inst_mask);
+ sdma_v4_4_2_inst_rlc_stop(adev, inst_mask);
+ if (adev->sdma.has_page_queue)
+ sdma_v4_4_2_inst_page_stop(adev, inst_mask);
+
+ /* SDMA FW needs to respond to FREEZE requests during reset.
+ * Keep it running during reset */
+ if (!amdgpu_sriov_vf(adev) && amdgpu_in_reset(adev))
+ return;
+ }
+
+ if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP)
+ return;
+
+ for_each_inst(i, inst_mask) {
+ f32_cntl = RREG32_SDMA(i, regSDMA_F32_CNTL);
+ f32_cntl = REG_SET_FIELD(f32_cntl, SDMA_F32_CNTL, HALT, enable ? 0 : 1);
+ WREG32_SDMA(i, regSDMA_F32_CNTL, f32_cntl);
+ }
+}
+
+/*
+ * sdma_v4_4_2_rb_cntl - get parameters for rb_cntl
+ */
+static uint32_t sdma_v4_4_2_rb_cntl(struct amdgpu_ring *ring, uint32_t rb_cntl)
+{
+ /* Set ring buffer size in dwords */
+ uint32_t rb_bufsz = order_base_2(ring->ring_size / 4);
+
+ barrier(); /* work around https://llvm.org/pr42576 */
+ rb_cntl = REG_SET_FIELD(rb_cntl, SDMA_GFX_RB_CNTL, RB_SIZE, rb_bufsz);
+#ifdef __BIG_ENDIAN
+ rb_cntl = REG_SET_FIELD(rb_cntl, SDMA_GFX_RB_CNTL, RB_SWAP_ENABLE, 1);
+ rb_cntl = REG_SET_FIELD(rb_cntl, SDMA_GFX_RB_CNTL,
+ RPTR_WRITEBACK_SWAP_ENABLE, 1);
+#endif
+ return rb_cntl;
+}
+
+/**
+ * sdma_v4_4_2_gfx_resume - setup and start the async dma engines
+ *
+ * @adev: amdgpu_device pointer
+ * @i: instance to resume
+ * @restore: used to restore wptr when restart
+ *
+ * Set up the gfx DMA ring buffers and enable them.
+ * Returns 0 for success, error for failure.
+ */
+static void sdma_v4_4_2_gfx_resume(struct amdgpu_device *adev, unsigned int i, bool restore)
+{
+ struct amdgpu_ring *ring = &adev->sdma.instance[i].ring;
+ u32 rb_cntl, ib_cntl, wptr_poll_cntl;
+ u32 wb_offset;
+ u32 doorbell;
+ u32 doorbell_offset;
+ u64 wptr_gpu_addr;
+ u64 rwptr;
+
+ wb_offset = (ring->rptr_offs * 4);
+
+ rb_cntl = RREG32_SDMA(i, regSDMA_GFX_RB_CNTL);
+ rb_cntl = sdma_v4_4_2_rb_cntl(ring, rb_cntl);
+ WREG32_SDMA(i, regSDMA_GFX_RB_CNTL, rb_cntl);
+
+ /* set the wb address whether it's enabled or not */
+ WREG32_SDMA(i, regSDMA_GFX_RB_RPTR_ADDR_HI,
+ upper_32_bits(adev->wb.gpu_addr + wb_offset) & 0xFFFFFFFF);
+ WREG32_SDMA(i, regSDMA_GFX_RB_RPTR_ADDR_LO,
+ lower_32_bits(adev->wb.gpu_addr + wb_offset) & 0xFFFFFFFC);
+
+ rb_cntl = REG_SET_FIELD(rb_cntl, SDMA_GFX_RB_CNTL,
+ RPTR_WRITEBACK_ENABLE, 1);
+
+ WREG32_SDMA(i, regSDMA_GFX_RB_BASE, ring->gpu_addr >> 8);
+ WREG32_SDMA(i, regSDMA_GFX_RB_BASE_HI, ring->gpu_addr >> 40);
+
+ if (!restore)
+ ring->wptr = 0;
+
+ /* before programing wptr to a less value, need set minor_ptr_update first */
+ WREG32_SDMA(i, regSDMA_GFX_MINOR_PTR_UPDATE, 1);
+
+ /* For the guilty queue, set RPTR to the current wptr to skip bad commands,
+ * It is not a guilty queue, restore cache_rptr and continue execution.
+ */
+ if (adev->sdma.instance[i].gfx_guilty)
+ rwptr = ring->wptr;
+ else
+ rwptr = ring->cached_rptr;
+
+ /* Initialize the ring buffer's read and write pointers */
+ if (restore) {
+ WREG32_SDMA(i, regSDMA_GFX_RB_RPTR, lower_32_bits(rwptr << 2));
+ WREG32_SDMA(i, regSDMA_GFX_RB_RPTR_HI, upper_32_bits(rwptr << 2));
+ WREG32_SDMA(i, regSDMA_GFX_RB_WPTR, lower_32_bits(rwptr << 2));
+ WREG32_SDMA(i, regSDMA_GFX_RB_WPTR_HI, upper_32_bits(rwptr << 2));
+ } else {
+ WREG32_SDMA(i, regSDMA_GFX_RB_RPTR, 0);
+ WREG32_SDMA(i, regSDMA_GFX_RB_RPTR_HI, 0);
+ WREG32_SDMA(i, regSDMA_GFX_RB_WPTR, 0);
+ WREG32_SDMA(i, regSDMA_GFX_RB_WPTR_HI, 0);
+ }
+
+ doorbell = RREG32_SDMA(i, regSDMA_GFX_DOORBELL);
+ doorbell_offset = RREG32_SDMA(i, regSDMA_GFX_DOORBELL_OFFSET);
+
+ doorbell = REG_SET_FIELD(doorbell, SDMA_GFX_DOORBELL, ENABLE,
+ ring->use_doorbell);
+ doorbell_offset = REG_SET_FIELD(doorbell_offset,
+ SDMA_GFX_DOORBELL_OFFSET,
+ OFFSET, ring->doorbell_index);
+ WREG32_SDMA(i, regSDMA_GFX_DOORBELL, doorbell);
+ WREG32_SDMA(i, regSDMA_GFX_DOORBELL_OFFSET, doorbell_offset);
+
+ sdma_v4_4_2_ring_set_wptr(ring);
+
+ /* set minor_ptr_update to 0 after wptr programed */
+ WREG32_SDMA(i, regSDMA_GFX_MINOR_PTR_UPDATE, 0);
+
+ /* setup the wptr shadow polling */
+ wptr_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
+ WREG32_SDMA(i, regSDMA_GFX_RB_WPTR_POLL_ADDR_LO,
+ lower_32_bits(wptr_gpu_addr));
+ WREG32_SDMA(i, regSDMA_GFX_RB_WPTR_POLL_ADDR_HI,
+ upper_32_bits(wptr_gpu_addr));
+ wptr_poll_cntl = RREG32_SDMA(i, regSDMA_GFX_RB_WPTR_POLL_CNTL);
+ wptr_poll_cntl = REG_SET_FIELD(wptr_poll_cntl,
+ SDMA_GFX_RB_WPTR_POLL_CNTL,
+ F32_POLL_ENABLE, amdgpu_sriov_vf(adev)? 1 : 0);
+ WREG32_SDMA(i, regSDMA_GFX_RB_WPTR_POLL_CNTL, wptr_poll_cntl);
+
+ /* enable DMA RB */
+ rb_cntl = REG_SET_FIELD(rb_cntl, SDMA_GFX_RB_CNTL, RB_ENABLE, 1);
+ WREG32_SDMA(i, regSDMA_GFX_RB_CNTL, rb_cntl);
+
+ ib_cntl = RREG32_SDMA(i, regSDMA_GFX_IB_CNTL);
+ ib_cntl = REG_SET_FIELD(ib_cntl, SDMA_GFX_IB_CNTL, IB_ENABLE, 1);
+#ifdef __BIG_ENDIAN
+ ib_cntl = REG_SET_FIELD(ib_cntl, SDMA_GFX_IB_CNTL, IB_SWAP_ENABLE, 1);
+#endif
+ /* enable DMA IBs */
+ WREG32_SDMA(i, regSDMA_GFX_IB_CNTL, ib_cntl);
+}
+
+/**
+ * sdma_v4_4_2_page_resume - setup and start the async dma engines
+ *
+ * @adev: amdgpu_device pointer
+ * @i: instance to resume
+ * @restore: boolean to say restore needed or not
+ *
+ * Set up the page DMA ring buffers and enable them.
+ * Returns 0 for success, error for failure.
+ */
+static void sdma_v4_4_2_page_resume(struct amdgpu_device *adev, unsigned int i, bool restore)
+{
+ struct amdgpu_ring *ring = &adev->sdma.instance[i].page;
+ u32 rb_cntl, ib_cntl, wptr_poll_cntl;
+ u32 wb_offset;
+ u32 doorbell;
+ u32 doorbell_offset;
+ u64 wptr_gpu_addr;
+ u64 rwptr;
+
+ wb_offset = (ring->rptr_offs * 4);
+
+ rb_cntl = RREG32_SDMA(i, regSDMA_PAGE_RB_CNTL);
+ rb_cntl = sdma_v4_4_2_rb_cntl(ring, rb_cntl);
+ WREG32_SDMA(i, regSDMA_PAGE_RB_CNTL, rb_cntl);
+
+ /* For the guilty queue, set RPTR to the current wptr to skip bad commands,
+ * It is not a guilty queue, restore cache_rptr and continue execution.
+ */
+ if (adev->sdma.instance[i].page_guilty)
+ rwptr = ring->wptr;
+ else
+ rwptr = ring->cached_rptr;
+
+ /* Initialize the ring buffer's read and write pointers */
+ if (restore) {
+ WREG32_SDMA(i, regSDMA_PAGE_RB_RPTR, lower_32_bits(rwptr << 2));
+ WREG32_SDMA(i, regSDMA_PAGE_RB_RPTR_HI, upper_32_bits(rwptr << 2));
+ WREG32_SDMA(i, regSDMA_PAGE_RB_WPTR, lower_32_bits(rwptr << 2));
+ WREG32_SDMA(i, regSDMA_PAGE_RB_WPTR_HI, upper_32_bits(rwptr << 2));
+ } else {
+ WREG32_SDMA(i, regSDMA_PAGE_RB_RPTR, 0);
+ WREG32_SDMA(i, regSDMA_PAGE_RB_RPTR_HI, 0);
+ WREG32_SDMA(i, regSDMA_PAGE_RB_WPTR, 0);
+ WREG32_SDMA(i, regSDMA_PAGE_RB_WPTR_HI, 0);
+ }
+
+ /* set the wb address whether it's enabled or not */
+ WREG32_SDMA(i, regSDMA_PAGE_RB_RPTR_ADDR_HI,
+ upper_32_bits(adev->wb.gpu_addr + wb_offset) & 0xFFFFFFFF);
+ WREG32_SDMA(i, regSDMA_PAGE_RB_RPTR_ADDR_LO,
+ lower_32_bits(adev->wb.gpu_addr + wb_offset) & 0xFFFFFFFC);
+
+ rb_cntl = REG_SET_FIELD(rb_cntl, SDMA_PAGE_RB_CNTL,
+ RPTR_WRITEBACK_ENABLE, 1);
+
+ WREG32_SDMA(i, regSDMA_PAGE_RB_BASE, ring->gpu_addr >> 8);
+ WREG32_SDMA(i, regSDMA_PAGE_RB_BASE_HI, ring->gpu_addr >> 40);
+
+ if (!restore)
+ ring->wptr = 0;
+
+ /* before programing wptr to a less value, need set minor_ptr_update first */
+ WREG32_SDMA(i, regSDMA_PAGE_MINOR_PTR_UPDATE, 1);
+
+ doorbell = RREG32_SDMA(i, regSDMA_PAGE_DOORBELL);
+ doorbell_offset = RREG32_SDMA(i, regSDMA_PAGE_DOORBELL_OFFSET);
+
+ doorbell = REG_SET_FIELD(doorbell, SDMA_PAGE_DOORBELL, ENABLE,
+ ring->use_doorbell);
+ doorbell_offset = REG_SET_FIELD(doorbell_offset,
+ SDMA_PAGE_DOORBELL_OFFSET,
+ OFFSET, ring->doorbell_index);
+ WREG32_SDMA(i, regSDMA_PAGE_DOORBELL, doorbell);
+ WREG32_SDMA(i, regSDMA_PAGE_DOORBELL_OFFSET, doorbell_offset);
+
+ /* paging queue doorbell range is setup at sdma_v4_4_2_gfx_resume */
+ sdma_v4_4_2_page_ring_set_wptr(ring);
+
+ /* set minor_ptr_update to 0 after wptr programed */
+ WREG32_SDMA(i, regSDMA_PAGE_MINOR_PTR_UPDATE, 0);
+
+ /* setup the wptr shadow polling */
+ wptr_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
+ WREG32_SDMA(i, regSDMA_PAGE_RB_WPTR_POLL_ADDR_LO,
+ lower_32_bits(wptr_gpu_addr));
+ WREG32_SDMA(i, regSDMA_PAGE_RB_WPTR_POLL_ADDR_HI,
+ upper_32_bits(wptr_gpu_addr));
+ wptr_poll_cntl = RREG32_SDMA(i, regSDMA_PAGE_RB_WPTR_POLL_CNTL);
+ wptr_poll_cntl = REG_SET_FIELD(wptr_poll_cntl,
+ SDMA_PAGE_RB_WPTR_POLL_CNTL,
+ F32_POLL_ENABLE, amdgpu_sriov_vf(adev)? 1 : 0);
+ WREG32_SDMA(i, regSDMA_PAGE_RB_WPTR_POLL_CNTL, wptr_poll_cntl);
+
+ /* enable DMA RB */
+ rb_cntl = REG_SET_FIELD(rb_cntl, SDMA_PAGE_RB_CNTL, RB_ENABLE, 1);
+ WREG32_SDMA(i, regSDMA_PAGE_RB_CNTL, rb_cntl);
+
+ ib_cntl = RREG32_SDMA(i, regSDMA_PAGE_IB_CNTL);
+ ib_cntl = REG_SET_FIELD(ib_cntl, SDMA_PAGE_IB_CNTL, IB_ENABLE, 1);
+#ifdef __BIG_ENDIAN
+ ib_cntl = REG_SET_FIELD(ib_cntl, SDMA_PAGE_IB_CNTL, IB_SWAP_ENABLE, 1);
+#endif
+ /* enable DMA IBs */
+ WREG32_SDMA(i, regSDMA_PAGE_IB_CNTL, ib_cntl);
+}
+
+static void sdma_v4_4_2_init_pg(struct amdgpu_device *adev)
+{
+
+}
+
+/**
+ * sdma_v4_4_2_inst_rlc_resume - setup and start the async dma engines
+ *
+ * @adev: amdgpu_device pointer
+ * @inst_mask: mask of dma engine instances to be enabled
+ *
+ * Set up the compute DMA queues and enable them.
+ * Returns 0 for success, error for failure.
+ */
+static int sdma_v4_4_2_inst_rlc_resume(struct amdgpu_device *adev,
+ uint32_t inst_mask)
+{
+ sdma_v4_4_2_init_pg(adev);
+
+ return 0;
+}
+
+/**
+ * sdma_v4_4_2_inst_load_microcode - load the sDMA ME ucode
+ *
+ * @adev: amdgpu_device pointer
+ * @inst_mask: mask of dma engine instances to be enabled
+ *
+ * Loads the sDMA0/1 ucode.
+ * Returns 0 for success, -EINVAL if the ucode is not available.
+ */
+static int sdma_v4_4_2_inst_load_microcode(struct amdgpu_device *adev,
+ uint32_t inst_mask)
+{
+ const struct sdma_firmware_header_v1_0 *hdr;
+ const __le32 *fw_data;
+ u32 fw_size;
+ int i, j;
+
+ /* halt the MEs */
+ sdma_v4_4_2_inst_enable(adev, false, inst_mask);
+
+ for_each_inst(i, inst_mask) {
+ if (!adev->sdma.instance[i].fw)
+ return -EINVAL;
+
+ hdr = (const struct sdma_firmware_header_v1_0 *)adev->sdma.instance[i].fw->data;
+ amdgpu_ucode_print_sdma_hdr(&hdr->header);
+ fw_size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4;
+
+ fw_data = (const __le32 *)
+ (adev->sdma.instance[i].fw->data +
+ le32_to_cpu(hdr->header.ucode_array_offset_bytes));
+
+ WREG32_SDMA(i, regSDMA_UCODE_ADDR, 0);
+
+ for (j = 0; j < fw_size; j++)
+ WREG32_SDMA(i, regSDMA_UCODE_DATA,
+ le32_to_cpup(fw_data++));
+
+ WREG32_SDMA(i, regSDMA_UCODE_ADDR,
+ adev->sdma.instance[i].fw_version);
+ }
+
+ return 0;
+}
+
+/**
+ * sdma_v4_4_2_inst_start - setup and start the async dma engines
+ *
+ * @adev: amdgpu_device pointer
+ * @inst_mask: mask of dma engine instances to be enabled
+ * @restore: boolean to say restore needed or not
+ *
+ * Set up the DMA engines and enable them.
+ * Returns 0 for success, error for failure.
+ */
+static int sdma_v4_4_2_inst_start(struct amdgpu_device *adev,
+ uint32_t inst_mask, bool restore)
+{
+ struct amdgpu_ring *ring;
+ uint32_t tmp_mask;
+ int i, r = 0;
+
+ if (amdgpu_sriov_vf(adev)) {
+ sdma_v4_4_2_inst_ctx_switch_enable(adev, false, inst_mask);
+ sdma_v4_4_2_inst_enable(adev, false, inst_mask);
+ } else {
+ /* bypass sdma microcode loading on Gopher */
+ if (!restore && adev->firmware.load_type != AMDGPU_FW_LOAD_PSP &&
+ adev->sdma.instance[0].fw) {
+ r = sdma_v4_4_2_inst_load_microcode(adev, inst_mask);
+ if (r)
+ return r;
+ }
+
+ /* unhalt the MEs */
+ sdma_v4_4_2_inst_enable(adev, true, inst_mask);
+ /* enable sdma ring preemption */
+ sdma_v4_4_2_inst_ctx_switch_enable(adev, true, inst_mask);
+ }
+
+ /* start the gfx rings and rlc compute queues */
+ tmp_mask = inst_mask;
+ for_each_inst(i, tmp_mask) {
+ uint32_t temp;
+
+ WREG32_SDMA(i, regSDMA_SEM_WAIT_FAIL_TIMER_CNTL, 0);
+ sdma_v4_4_2_gfx_resume(adev, i, restore);
+ if (adev->sdma.has_page_queue)
+ sdma_v4_4_2_page_resume(adev, i, restore);
+
+ /* set utc l1 enable flag always to 1 */
+ temp = RREG32_SDMA(i, regSDMA_CNTL);
+ temp = REG_SET_FIELD(temp, SDMA_CNTL, UTC_L1_ENABLE, 1);
+ WREG32_SDMA(i, regSDMA_CNTL, temp);
+
+ if (amdgpu_ip_version(adev, SDMA0_HWIP, 0) < IP_VERSION(4, 4, 5)) {
+ /* enable context empty interrupt during initialization */
+ temp = REG_SET_FIELD(temp, SDMA_CNTL, CTXEMPTY_INT_ENABLE, 1);
+ WREG32_SDMA(i, regSDMA_CNTL, temp);
+ }
+ if (!amdgpu_sriov_vf(adev)) {
+ if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) {
+ /* unhalt engine */
+ temp = RREG32_SDMA(i, regSDMA_F32_CNTL);
+ temp = REG_SET_FIELD(temp, SDMA_F32_CNTL, HALT, 0);
+ WREG32_SDMA(i, regSDMA_F32_CNTL, temp);
+ }
+ }
+ }
+
+ if (amdgpu_sriov_vf(adev)) {
+ sdma_v4_4_2_inst_ctx_switch_enable(adev, true, inst_mask);
+ sdma_v4_4_2_inst_enable(adev, true, inst_mask);
+ } else {
+ r = sdma_v4_4_2_inst_rlc_resume(adev, inst_mask);
+ if (r)
+ return r;
+ }
+
+ tmp_mask = inst_mask;
+ for_each_inst(i, tmp_mask) {
+ ring = &adev->sdma.instance[i].ring;
+
+ r = amdgpu_ring_test_helper(ring);
+ if (r)
+ return r;
+
+ if (adev->sdma.has_page_queue) {
+ struct amdgpu_ring *page = &adev->sdma.instance[i].page;
+
+ r = amdgpu_ring_test_helper(page);
+ if (r)
+ return r;
+ }
+ }
+
+ return r;
+}
+
+/**
+ * sdma_v4_4_2_ring_test_ring - simple async dma engine test
+ *
+ * @ring: amdgpu_ring structure holding ring information
+ *
+ * Test the DMA engine by writing using it to write an
+ * value to memory.
+ * Returns 0 for success, error for failure.
+ */
+static int sdma_v4_4_2_ring_test_ring(struct amdgpu_ring *ring)
+{
+ struct amdgpu_device *adev = ring->adev;
+ unsigned i;
+ unsigned index;
+ int r;
+ u32 tmp;
+ u64 gpu_addr;
+
+ r = amdgpu_device_wb_get(adev, &index);
+ if (r)
+ return r;
+
+ gpu_addr = adev->wb.gpu_addr + (index * 4);
+ tmp = 0xCAFEDEAD;
+ adev->wb.wb[index] = cpu_to_le32(tmp);
+
+ r = amdgpu_ring_alloc(ring, 5);
+ if (r)
+ goto error_free_wb;
+
+ amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_WRITE) |
+ SDMA_PKT_HEADER_SUB_OP(SDMA_SUBOP_WRITE_LINEAR));
+ amdgpu_ring_write(ring, lower_32_bits(gpu_addr));
+ amdgpu_ring_write(ring, upper_32_bits(gpu_addr));
+ amdgpu_ring_write(ring, SDMA_PKT_WRITE_UNTILED_DW_3_COUNT(0));
+ amdgpu_ring_write(ring, 0xDEADBEEF);
+ amdgpu_ring_commit(ring);
+
+ for (i = 0; i < adev->usec_timeout; i++) {
+ tmp = le32_to_cpu(adev->wb.wb[index]);
+ if (tmp == 0xDEADBEEF)
+ break;
+ udelay(1);
+ }
+
+ if (i >= adev->usec_timeout)
+ r = -ETIMEDOUT;
+
+error_free_wb:
+ amdgpu_device_wb_free(adev, index);
+ return r;
+}
+
+/**
+ * sdma_v4_4_2_ring_test_ib - test an IB on the DMA engine
+ *
+ * @ring: amdgpu_ring structure holding ring information
+ * @timeout: timeout value in jiffies, or MAX_SCHEDULE_TIMEOUT
+ *
+ * Test a simple IB in the DMA ring.
+ * Returns 0 on success, error on failure.
+ */
+static int sdma_v4_4_2_ring_test_ib(struct amdgpu_ring *ring, long timeout)
+{
+ struct amdgpu_device *adev = ring->adev;
+ struct amdgpu_ib ib;
+ struct dma_fence *f = NULL;
+ unsigned index;
+ long r;
+ u32 tmp = 0;
+ u64 gpu_addr;
+
+ r = amdgpu_device_wb_get(adev, &index);
+ if (r)
+ return r;
+
+ gpu_addr = adev->wb.gpu_addr + (index * 4);
+ tmp = 0xCAFEDEAD;
+ adev->wb.wb[index] = cpu_to_le32(tmp);
+ memset(&ib, 0, sizeof(ib));
+ r = amdgpu_ib_get(adev, NULL, 256,
+ AMDGPU_IB_POOL_DIRECT, &ib);
+ if (r)
+ goto err0;
+
+ ib.ptr[0] = SDMA_PKT_HEADER_OP(SDMA_OP_WRITE) |
+ SDMA_PKT_HEADER_SUB_OP(SDMA_SUBOP_WRITE_LINEAR);
+ ib.ptr[1] = lower_32_bits(gpu_addr);
+ ib.ptr[2] = upper_32_bits(gpu_addr);
+ ib.ptr[3] = SDMA_PKT_WRITE_UNTILED_DW_3_COUNT(0);
+ ib.ptr[4] = 0xDEADBEEF;
+ ib.ptr[5] = SDMA_PKT_NOP_HEADER_OP(SDMA_OP_NOP);
+ ib.ptr[6] = SDMA_PKT_NOP_HEADER_OP(SDMA_OP_NOP);
+ ib.ptr[7] = SDMA_PKT_NOP_HEADER_OP(SDMA_OP_NOP);
+ ib.length_dw = 8;
+
+ r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f);
+ if (r)
+ goto err1;
+
+ r = dma_fence_wait_timeout(f, false, timeout);
+ if (r == 0) {
+ r = -ETIMEDOUT;
+ goto err1;
+ } else if (r < 0) {
+ goto err1;
+ }
+ tmp = le32_to_cpu(adev->wb.wb[index]);
+ if (tmp == 0xDEADBEEF)
+ r = 0;
+ else
+ r = -EINVAL;
+
+err1:
+ amdgpu_ib_free(&ib, NULL);
+ dma_fence_put(f);
+err0:
+ amdgpu_device_wb_free(adev, index);
+ return r;
+}
+
+
+/**
+ * sdma_v4_4_2_vm_copy_pte - update PTEs by copying them from the GART
+ *
+ * @ib: indirect buffer to fill with commands
+ * @pe: addr of the page entry
+ * @src: src addr to copy from
+ * @count: number of page entries to update
+ *
+ * Update PTEs by copying them from the GART using sDMA.
+ */
+static void sdma_v4_4_2_vm_copy_pte(struct amdgpu_ib *ib,
+ uint64_t pe, uint64_t src,
+ unsigned count)
+{
+ unsigned bytes = count * 8;
+
+ ib->ptr[ib->length_dw++] = SDMA_PKT_HEADER_OP(SDMA_OP_COPY) |
+ SDMA_PKT_HEADER_SUB_OP(SDMA_SUBOP_COPY_LINEAR);
+ ib->ptr[ib->length_dw++] = bytes - 1;
+ ib->ptr[ib->length_dw++] = 0; /* src/dst endian swap */
+ ib->ptr[ib->length_dw++] = lower_32_bits(src);
+ ib->ptr[ib->length_dw++] = upper_32_bits(src);
+ ib->ptr[ib->length_dw++] = lower_32_bits(pe);
+ ib->ptr[ib->length_dw++] = upper_32_bits(pe);
+
+}
+
+/**
+ * sdma_v4_4_2_vm_write_pte - update PTEs by writing them manually
+ *
+ * @ib: indirect buffer to fill with commands
+ * @pe: addr of the page entry
+ * @value: dst addr to write into pe
+ * @count: number of page entries to update
+ * @incr: increase next addr by incr bytes
+ *
+ * Update PTEs by writing them manually using sDMA.
+ */
+static void sdma_v4_4_2_vm_write_pte(struct amdgpu_ib *ib, uint64_t pe,
+ uint64_t value, unsigned count,
+ uint32_t incr)
+{
+ unsigned ndw = count * 2;
+
+ ib->ptr[ib->length_dw++] = SDMA_PKT_HEADER_OP(SDMA_OP_WRITE) |
+ SDMA_PKT_HEADER_SUB_OP(SDMA_SUBOP_WRITE_LINEAR);
+ ib->ptr[ib->length_dw++] = lower_32_bits(pe);
+ ib->ptr[ib->length_dw++] = upper_32_bits(pe);
+ ib->ptr[ib->length_dw++] = ndw - 1;
+ for (; ndw > 0; ndw -= 2) {
+ ib->ptr[ib->length_dw++] = lower_32_bits(value);
+ ib->ptr[ib->length_dw++] = upper_32_bits(value);
+ value += incr;
+ }
+}
+
+/**
+ * sdma_v4_4_2_vm_set_pte_pde - update the page tables using sDMA
+ *
+ * @ib: indirect buffer to fill with commands
+ * @pe: addr of the page entry
+ * @addr: dst addr to write into pe
+ * @count: number of page entries to update
+ * @incr: increase next addr by incr bytes
+ * @flags: access flags
+ *
+ * Update the page tables using sDMA.
+ */
+static void sdma_v4_4_2_vm_set_pte_pde(struct amdgpu_ib *ib,
+ uint64_t pe,
+ uint64_t addr, unsigned count,
+ uint32_t incr, uint64_t flags)
+{
+ /* for physically contiguous pages (vram) */
+ ib->ptr[ib->length_dw++] = SDMA_PKT_HEADER_OP(SDMA_OP_PTEPDE);
+ ib->ptr[ib->length_dw++] = lower_32_bits(pe); /* dst addr */
+ ib->ptr[ib->length_dw++] = upper_32_bits(pe);
+ ib->ptr[ib->length_dw++] = lower_32_bits(flags); /* mask */
+ ib->ptr[ib->length_dw++] = upper_32_bits(flags);
+ ib->ptr[ib->length_dw++] = lower_32_bits(addr); /* value */
+ ib->ptr[ib->length_dw++] = upper_32_bits(addr);
+ ib->ptr[ib->length_dw++] = incr; /* increment size */
+ ib->ptr[ib->length_dw++] = 0;
+ ib->ptr[ib->length_dw++] = count - 1; /* number of entries */
+}
+
+/**
+ * sdma_v4_4_2_ring_pad_ib - pad the IB to the required number of dw
+ *
+ * @ring: amdgpu_ring structure holding ring information
+ * @ib: indirect buffer to fill with padding
+ */
+static void sdma_v4_4_2_ring_pad_ib(struct amdgpu_ring *ring, struct amdgpu_ib *ib)
+{
+ struct amdgpu_sdma_instance *sdma = amdgpu_sdma_get_instance_from_ring(ring);
+ u32 pad_count;
+ int i;
+
+ pad_count = (-ib->length_dw) & 7;
+ for (i = 0; i < pad_count; i++)
+ if (sdma && sdma->burst_nop && (i == 0))
+ ib->ptr[ib->length_dw++] =
+ SDMA_PKT_HEADER_OP(SDMA_OP_NOP) |
+ SDMA_PKT_NOP_HEADER_COUNT(pad_count - 1);
+ else
+ ib->ptr[ib->length_dw++] =
+ SDMA_PKT_HEADER_OP(SDMA_OP_NOP);
+}
+
+
+/**
+ * sdma_v4_4_2_ring_emit_pipeline_sync - sync the pipeline
+ *
+ * @ring: amdgpu_ring pointer
+ *
+ * Make sure all previous operations are completed (CIK).
+ */
+static void sdma_v4_4_2_ring_emit_pipeline_sync(struct amdgpu_ring *ring)
+{
+ uint32_t seq = ring->fence_drv.sync_seq;
+ uint64_t addr = ring->fence_drv.gpu_addr;
+
+ /* wait for idle */
+ sdma_v4_4_2_wait_reg_mem(ring, 1, 0,
+ addr & 0xfffffffc,
+ upper_32_bits(addr) & 0xffffffff,
+ seq, 0xffffffff, 4);
+}
+
+
+/**
+ * sdma_v4_4_2_ring_emit_vm_flush - vm flush using sDMA
+ *
+ * @ring: amdgpu_ring pointer
+ * @vmid: vmid number to use
+ * @pd_addr: address
+ *
+ * Update the page table base and flush the VM TLB
+ * using sDMA.
+ */
+static void sdma_v4_4_2_ring_emit_vm_flush(struct amdgpu_ring *ring,
+ unsigned vmid, uint64_t pd_addr)
+{
+ amdgpu_gmc_emit_flush_gpu_tlb(ring, vmid, pd_addr);
+}
+
+static void sdma_v4_4_2_ring_emit_wreg(struct amdgpu_ring *ring,
+ uint32_t reg, uint32_t val)
+{
+ amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_SRBM_WRITE) |
+ SDMA_PKT_SRBM_WRITE_HEADER_BYTE_EN(0xf));
+ amdgpu_ring_write(ring, reg);
+ amdgpu_ring_write(ring, val);
+}
+
+static void sdma_v4_4_2_ring_emit_reg_wait(struct amdgpu_ring *ring, uint32_t reg,
+ uint32_t val, uint32_t mask)
+{
+ sdma_v4_4_2_wait_reg_mem(ring, 0, 0, reg, 0, val, mask, 10);
+}
+
+static bool sdma_v4_4_2_fw_support_paging_queue(struct amdgpu_device *adev)
+{
+ switch (amdgpu_ip_version(adev, SDMA0_HWIP, 0)) {
+ case IP_VERSION(4, 4, 2):
+ case IP_VERSION(4, 4, 5):
+ return false;
+ default:
+ return false;
+ }
+}
+
+static const struct amdgpu_sdma_funcs sdma_v4_4_2_sdma_funcs = {
+ .stop_kernel_queue = &sdma_v4_4_2_stop_queue,
+ .start_kernel_queue = &sdma_v4_4_2_restore_queue,
+ .soft_reset_kernel_queue = &sdma_v4_4_2_soft_reset_engine,
+};
+
+static int sdma_v4_4_2_early_init(struct amdgpu_ip_block *ip_block)
+{
+ struct amdgpu_device *adev = ip_block->adev;
+ int r;
+
+ r = sdma_v4_4_2_init_microcode(adev);
+ if (r)
+ return r;
+
+ /* TODO: Page queue breaks driver reload under SRIOV */
+ if (sdma_v4_4_2_fw_support_paging_queue(adev))
+ adev->sdma.has_page_queue = true;
+
+ sdma_v4_4_2_set_ring_funcs(adev);
+ sdma_v4_4_2_set_buffer_funcs(adev);
+ sdma_v4_4_2_set_vm_pte_funcs(adev);
+ sdma_v4_4_2_set_irq_funcs(adev);
+ sdma_v4_4_2_set_ras_funcs(adev);
+ return 0;
+}
+
+#if 0
+static int sdma_v4_4_2_process_ras_data_cb(struct amdgpu_device *adev,
+ void *err_data,
+ struct amdgpu_iv_entry *entry);
+#endif
+
+static int sdma_v4_4_2_late_init(struct amdgpu_ip_block *ip_block)
+{
+ struct amdgpu_device *adev = ip_block->adev;
+#if 0
+ struct ras_ih_if ih_info = {
+ .cb = sdma_v4_4_2_process_ras_data_cb,
+ };
+#endif
+ if (!amdgpu_persistent_edc_harvesting_supported(adev))
+ amdgpu_ras_reset_error_count(adev, AMDGPU_RAS_BLOCK__SDMA);
+
+ /* The initialization is done in the late_init stage to ensure that the SMU
+ * initialization and capability setup are completed before we check the SDMA
+ * reset capability
+ */
+ sdma_v4_4_2_update_reset_mask(adev);
+
+ return 0;
+}
+
+static int sdma_v4_4_2_sw_init(struct amdgpu_ip_block *ip_block)
+{
+ struct amdgpu_ring *ring;
+ int r, i;
+ struct amdgpu_device *adev = ip_block->adev;
+ u32 aid_id;
+ uint32_t reg_count = ARRAY_SIZE(sdma_reg_list_4_4_2);
+ uint32_t *ptr;
+
+ /* SDMA trap event */
+ for (i = 0; i < adev->sdma.num_inst_per_aid; i++) {
+ r = amdgpu_irq_add_id(adev, sdma_v4_4_2_seq_to_irq_id(i),
+ SDMA0_4_0__SRCID__SDMA_TRAP,
+ &adev->sdma.trap_irq);
+ if (r)
+ return r;
+ }
+
+ /* SDMA SRAM ECC event */
+ for (i = 0; i < adev->sdma.num_inst_per_aid; i++) {
+ r = amdgpu_irq_add_id(adev, sdma_v4_4_2_seq_to_irq_id(i),
+ SDMA0_4_0__SRCID__SDMA_SRAM_ECC,
+ &adev->sdma.ecc_irq);
+ if (r)
+ return r;
+ }
+
+ /* SDMA VM_HOLE/DOORBELL_INV/POLL_TIMEOUT/SRBM_WRITE_PROTECTION event*/
+ for (i = 0; i < adev->sdma.num_inst_per_aid; i++) {
+ r = amdgpu_irq_add_id(adev, sdma_v4_4_2_seq_to_irq_id(i),
+ SDMA0_4_0__SRCID__SDMA_VM_HOLE,
+ &adev->sdma.vm_hole_irq);
+ if (r)
+ return r;
+
+ r = amdgpu_irq_add_id(adev, sdma_v4_4_2_seq_to_irq_id(i),
+ SDMA0_4_0__SRCID__SDMA_DOORBELL_INVALID,
+ &adev->sdma.doorbell_invalid_irq);
+ if (r)
+ return r;
+
+ r = amdgpu_irq_add_id(adev, sdma_v4_4_2_seq_to_irq_id(i),
+ SDMA0_4_0__SRCID__SDMA_POLL_TIMEOUT,
+ &adev->sdma.pool_timeout_irq);
+ if (r)
+ return r;
+
+ r = amdgpu_irq_add_id(adev, sdma_v4_4_2_seq_to_irq_id(i),
+ SDMA0_4_0__SRCID__SDMA_SRBMWRITE,
+ &adev->sdma.srbm_write_irq);
+ if (r)
+ return r;
+
+ r = amdgpu_irq_add_id(adev, sdma_v4_4_2_seq_to_irq_id(i),
+ SDMA0_4_0__SRCID__SDMA_CTXEMPTY,
+ &adev->sdma.ctxt_empty_irq);
+ if (r)
+ return r;
+ }
+
+ for (i = 0; i < adev->sdma.num_instances; i++) {
+ mutex_init(&adev->sdma.instance[i].engine_reset_mutex);
+ /* Initialize guilty flags for GFX and PAGE queues */
+ adev->sdma.instance[i].gfx_guilty = false;
+ adev->sdma.instance[i].page_guilty = false;
+ adev->sdma.instance[i].funcs = &sdma_v4_4_2_sdma_funcs;
+
+ ring = &adev->sdma.instance[i].ring;
+ ring->ring_obj = NULL;
+ ring->use_doorbell = true;
+ aid_id = adev->sdma.instance[i].aid_id;
+
+ DRM_DEBUG("SDMA %d use_doorbell being set to: [%s]\n", i,
+ ring->use_doorbell?"true":"false");
+
+ /* doorbell size is 2 dwords, get DWORD offset */
+ ring->doorbell_index = adev->doorbell_index.sdma_engine[i] << 1;
+ ring->vm_hub = AMDGPU_MMHUB0(aid_id);
+
+ sprintf(ring->name, "sdma%d.%d", aid_id,
+ i % adev->sdma.num_inst_per_aid);
+ r = amdgpu_ring_init(adev, ring, 1024, &adev->sdma.trap_irq,
+ AMDGPU_SDMA_IRQ_INSTANCE0 + i,
+ AMDGPU_RING_PRIO_DEFAULT, NULL);
+ if (r)
+ return r;
+
+ if (adev->sdma.has_page_queue) {
+ ring = &adev->sdma.instance[i].page;
+ ring->ring_obj = NULL;
+ ring->use_doorbell = true;
+
+ /* doorbell index of page queue is assigned right after
+ * gfx queue on the same instance
+ */
+ ring->doorbell_index =
+ (adev->doorbell_index.sdma_engine[i] + 1) << 1;
+ ring->vm_hub = AMDGPU_MMHUB0(aid_id);
+
+ sprintf(ring->name, "page%d.%d", aid_id,
+ i % adev->sdma.num_inst_per_aid);
+ r = amdgpu_ring_init(adev, ring, 1024,
+ &adev->sdma.trap_irq,
+ AMDGPU_SDMA_IRQ_INSTANCE0 + i,
+ AMDGPU_RING_PRIO_DEFAULT, NULL);
+ if (r)
+ return r;
+ }
+ }
+
+ adev->sdma.supported_reset =
+ amdgpu_get_soft_full_reset_mask(&adev->sdma.instance[0].ring);
+
+ if (amdgpu_sdma_ras_sw_init(adev)) {
+ dev_err(adev->dev, "fail to initialize sdma ras block\n");
+ return -EINVAL;
+ }
+
+ /* Allocate memory for SDMA IP Dump buffer */
+ ptr = kcalloc(adev->sdma.num_instances * reg_count, sizeof(uint32_t), GFP_KERNEL);
+ if (ptr)
+ adev->sdma.ip_dump = ptr;
+ else
+ DRM_ERROR("Failed to allocated memory for SDMA IP Dump\n");
+
+ r = amdgpu_sdma_sysfs_reset_mask_init(adev);
+ if (r)
+ return r;
+
+ return r;
+}
+
+static int sdma_v4_4_2_sw_fini(struct amdgpu_ip_block *ip_block)
+{
+ struct amdgpu_device *adev = ip_block->adev;
+ int i;
+
+ for (i = 0; i < adev->sdma.num_instances; i++) {
+ amdgpu_ring_fini(&adev->sdma.instance[i].ring);
+ if (adev->sdma.has_page_queue)
+ amdgpu_ring_fini(&adev->sdma.instance[i].page);
+ }
+
+ amdgpu_sdma_sysfs_reset_mask_fini(adev);
+ if (amdgpu_ip_version(adev, SDMA0_HWIP, 0) == IP_VERSION(4, 4, 2) ||
+ amdgpu_ip_version(adev, SDMA0_HWIP, 0) == IP_VERSION(4, 4, 4) ||
+ amdgpu_ip_version(adev, SDMA0_HWIP, 0) == IP_VERSION(4, 4, 5))
+ amdgpu_sdma_destroy_inst_ctx(adev, true);
+ else
+ amdgpu_sdma_destroy_inst_ctx(adev, false);
+
+ kfree(adev->sdma.ip_dump);
+
+ return 0;
+}
+
+static int sdma_v4_4_2_hw_init(struct amdgpu_ip_block *ip_block)
+{
+ int r;
+ struct amdgpu_device *adev = ip_block->adev;
+ uint32_t inst_mask;
+
+ inst_mask = GENMASK(adev->sdma.num_instances - 1, 0);
+ if (!amdgpu_sriov_vf(adev))
+ sdma_v4_4_2_inst_init_golden_registers(adev, inst_mask);
+
+ r = sdma_v4_4_2_inst_start(adev, inst_mask, false);
+
+ return r;
+}
+
+static int sdma_v4_4_2_hw_fini(struct amdgpu_ip_block *ip_block)
+{
+ struct amdgpu_device *adev = ip_block->adev;
+ uint32_t inst_mask;
+ int i;
+
+ if (amdgpu_sriov_vf(adev))
+ return 0;
+
+ inst_mask = GENMASK(adev->sdma.num_instances - 1, 0);
+ if (amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__SDMA)) {
+ for (i = 0; i < adev->sdma.num_instances; i++) {
+ amdgpu_irq_put(adev, &adev->sdma.ecc_irq,
+ AMDGPU_SDMA_IRQ_INSTANCE0 + i);
+ }
+ }
+
+ sdma_v4_4_2_inst_ctx_switch_enable(adev, false, inst_mask);
+ sdma_v4_4_2_inst_enable(adev, false, inst_mask);
+
+ return 0;
+}
+
+static int sdma_v4_4_2_set_clockgating_state(struct amdgpu_ip_block *ip_block,
+ enum amd_clockgating_state state);
+
+static int sdma_v4_4_2_suspend(struct amdgpu_ip_block *ip_block)
+{
+ struct amdgpu_device *adev = ip_block->adev;
+
+ if (amdgpu_in_reset(adev))
+ sdma_v4_4_2_set_clockgating_state(ip_block, AMD_CG_STATE_UNGATE);
+
+ return sdma_v4_4_2_hw_fini(ip_block);
+}
+
+static int sdma_v4_4_2_resume(struct amdgpu_ip_block *ip_block)
+{
+ return sdma_v4_4_2_hw_init(ip_block);
+}
+
+static bool sdma_v4_4_2_is_idle(struct amdgpu_ip_block *ip_block)
+{
+ struct amdgpu_device *adev = ip_block->adev;
+ u32 i;
+
+ for (i = 0; i < adev->sdma.num_instances; i++) {
+ u32 tmp = RREG32_SDMA(i, regSDMA_STATUS_REG);
+
+ if (!(tmp & SDMA_STATUS_REG__IDLE_MASK))
+ return false;
+ }
+
+ return true;
+}
+
+static int sdma_v4_4_2_wait_for_idle(struct amdgpu_ip_block *ip_block)
+{
+ unsigned i, j;
+ u32 sdma[AMDGPU_MAX_SDMA_INSTANCES];
+ struct amdgpu_device *adev = ip_block->adev;
+
+ for (i = 0; i < adev->usec_timeout; i++) {
+ for (j = 0; j < adev->sdma.num_instances; j++) {
+ sdma[j] = RREG32_SDMA(j, regSDMA_STATUS_REG);
+ if (!(sdma[j] & SDMA_STATUS_REG__IDLE_MASK))
+ break;
+ }
+ if (j == adev->sdma.num_instances)
+ return 0;
+ udelay(1);
+ }
+ return -ETIMEDOUT;
+}
+
+static int sdma_v4_4_2_soft_reset(struct amdgpu_ip_block *ip_block)
+{
+ /* todo */
+
+ return 0;
+}
+
+static bool sdma_v4_4_2_is_queue_selected(struct amdgpu_device *adev, uint32_t instance_id, bool is_page_queue)
+{
+ uint32_t reg_offset = is_page_queue ? regSDMA_PAGE_CONTEXT_STATUS : regSDMA_GFX_CONTEXT_STATUS;
+ uint32_t context_status = RREG32(sdma_v4_4_2_get_reg_offset(adev, instance_id, reg_offset));
+
+ /* Check if the SELECTED bit is set */
+ return (context_status & SDMA_GFX_CONTEXT_STATUS__SELECTED_MASK) != 0;
+}
+
+static int sdma_v4_4_2_reset_queue(struct amdgpu_ring *ring,
+ unsigned int vmid,
+ struct amdgpu_fence *timedout_fence)
+{
+ struct amdgpu_device *adev = ring->adev;
+ u32 id = ring->me;
+ int r;
+
+ amdgpu_amdkfd_suspend(adev, true);
+ r = amdgpu_sdma_reset_engine(adev, id, false);
+ amdgpu_amdkfd_resume(adev, true);
+ return r;
+}
+
+static int sdma_v4_4_2_stop_queue(struct amdgpu_ring *ring)
+{
+ struct amdgpu_device *adev = ring->adev;
+ u32 instance_id = ring->me;
+ u32 inst_mask;
+ uint64_t rptr;
+
+ if (amdgpu_sriov_vf(adev))
+ return -EINVAL;
+
+ /* Check if this queue is the guilty one */
+ adev->sdma.instance[instance_id].gfx_guilty =
+ sdma_v4_4_2_is_queue_selected(adev, instance_id, false);
+ if (adev->sdma.has_page_queue)
+ adev->sdma.instance[instance_id].page_guilty =
+ sdma_v4_4_2_is_queue_selected(adev, instance_id, true);
+
+ /* Cache the rptr before reset, after the reset,
+ * all of the registers will be reset to 0
+ */
+ rptr = amdgpu_ring_get_rptr(ring);
+ ring->cached_rptr = rptr;
+ /* Cache the rptr for the page queue if it exists */
+ if (adev->sdma.has_page_queue) {
+ struct amdgpu_ring *page_ring = &adev->sdma.instance[instance_id].page;
+ rptr = amdgpu_ring_get_rptr(page_ring);
+ page_ring->cached_rptr = rptr;
+ }
+
+ /* stop queue */
+ inst_mask = 1 << ring->me;
+ sdma_v4_4_2_inst_gfx_stop(adev, inst_mask);
+ if (adev->sdma.has_page_queue)
+ sdma_v4_4_2_inst_page_stop(adev, inst_mask);
+
+ return 0;
+}
+
+static int sdma_v4_4_2_restore_queue(struct amdgpu_ring *ring)
+{
+ struct amdgpu_device *adev = ring->adev;
+ u32 inst_mask;
+ int i, r;
+
+ inst_mask = 1 << ring->me;
+ udelay(50);
+
+ for (i = 0; i < adev->usec_timeout; i++) {
+ if (!REG_GET_FIELD(RREG32_SDMA(ring->me, regSDMA_F32_CNTL), SDMA_F32_CNTL, HALT))
+ break;
+ udelay(1);
+ }
+
+ if (i == adev->usec_timeout) {
+ dev_err(adev->dev, "timed out waiting for SDMA%d unhalt after reset\n",
+ ring->me);
+ return -ETIMEDOUT;
+ }
+
+ r = sdma_v4_4_2_inst_start(adev, inst_mask, true);
+
+ return r;
+}
+
+static int sdma_v4_4_2_soft_reset_engine(struct amdgpu_device *adev,
+ u32 instance_id)
+{
+ /* For SDMA 4.x, use the existing DPM interface for backward compatibility
+ * we need to convert the logical instance ID to physical instance ID before reset.
+ */
+ return amdgpu_dpm_reset_sdma(adev, 1 << GET_INST(SDMA0, instance_id));
+}
+
+static int sdma_v4_4_2_set_trap_irq_state(struct amdgpu_device *adev,
+ struct amdgpu_irq_src *source,
+ unsigned type,
+ enum amdgpu_interrupt_state state)
+{
+ u32 sdma_cntl;
+
+ sdma_cntl = RREG32_SDMA(type, regSDMA_CNTL);
+ sdma_cntl = REG_SET_FIELD(sdma_cntl, SDMA_CNTL, TRAP_ENABLE,
+ state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0);
+ WREG32_SDMA(type, regSDMA_CNTL, sdma_cntl);
+
+ return 0;
+}
+
+static int sdma_v4_4_2_process_trap_irq(struct amdgpu_device *adev,
+ struct amdgpu_irq_src *source,
+ struct amdgpu_iv_entry *entry)
+{
+ uint32_t instance, i;
+
+ DRM_DEBUG("IH: SDMA trap\n");
+ instance = sdma_v4_4_2_irq_id_to_seq(adev, entry->client_id);
+
+ /* Client id gives the SDMA instance in AID. To know the exact SDMA
+ * instance, interrupt entry gives the node id which corresponds to the AID instance.
+ * Match node id with the AID id associated with the SDMA instance. */
+ for (i = instance; i < adev->sdma.num_instances;
+ i += adev->sdma.num_inst_per_aid) {
+ if (adev->sdma.instance[i].aid_id ==
+ node_id_to_phys_map[entry->node_id])
+ break;
+ }
+
+ if (i >= adev->sdma.num_instances) {
+ dev_WARN_ONCE(
+ adev->dev, 1,
+ "Couldn't find the right sdma instance in trap handler");
+ return 0;
+ }
+
+ switch (entry->ring_id) {
+ case 0:
+ amdgpu_fence_process(&adev->sdma.instance[i].ring);
+ break;
+ case 1:
+ amdgpu_fence_process(&adev->sdma.instance[i].page);
+ break;
+ default:
+ break;
+ }
+ return 0;
+}
+
+#if 0
+static int sdma_v4_4_2_process_ras_data_cb(struct amdgpu_device *adev,
+ void *err_data,
+ struct amdgpu_iv_entry *entry)
+{
+ int instance;
+
+ /* When “Full RAS” is enabled, the per-IP interrupt sources should
+ * be disabled and the driver should only look for the aggregated
+ * interrupt via sync flood
+ */
+ if (amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__SDMA))
+ goto out;
+
+ instance = sdma_v4_4_2_irq_id_to_seq(adev, entry->client_id);
+ if (instance < 0)
+ goto out;
+
+ amdgpu_sdma_process_ras_data_cb(adev, err_data, entry);
+
+out:
+ return AMDGPU_RAS_SUCCESS;
+}
+#endif
+
+static int sdma_v4_4_2_process_illegal_inst_irq(struct amdgpu_device *adev,
+ struct amdgpu_irq_src *source,
+ struct amdgpu_iv_entry *entry)
+{
+ int instance;
+
+ DRM_ERROR("Illegal instruction in SDMA command stream\n");
+
+ instance = sdma_v4_4_2_irq_id_to_seq(adev, entry->client_id);
+ if (instance < 0)
+ return 0;
+
+ switch (entry->ring_id) {
+ case 0:
+ drm_sched_fault(&adev->sdma.instance[instance].ring.sched);
+ break;
+ }
+ return 0;
+}
+
+static int sdma_v4_4_2_set_ecc_irq_state(struct amdgpu_device *adev,
+ struct amdgpu_irq_src *source,
+ unsigned type,
+ enum amdgpu_interrupt_state state)
+{
+ u32 sdma_cntl;
+
+ sdma_cntl = RREG32_SDMA(type, regSDMA_CNTL);
+ sdma_cntl = REG_SET_FIELD(sdma_cntl, SDMA_CNTL, DRAM_ECC_INT_ENABLE,
+ state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0);
+ WREG32_SDMA(type, regSDMA_CNTL, sdma_cntl);
+
+ return 0;
+}
+
+static int sdma_v4_4_2_print_iv_entry(struct amdgpu_device *adev,
+ struct amdgpu_iv_entry *entry)
+{
+ int instance;
+ struct amdgpu_task_info *task_info;
+ u64 addr;
+
+ instance = sdma_v4_4_2_irq_id_to_seq(adev, entry->client_id);
+ if (instance < 0 || instance >= adev->sdma.num_instances) {
+ dev_err(adev->dev, "sdma instance invalid %d\n", instance);
+ return -EINVAL;
+ }
+
+ addr = (u64)entry->src_data[0] << 12;
+ addr |= ((u64)entry->src_data[1] & 0xf) << 44;
+
+ dev_dbg_ratelimited(adev->dev,
+ "[sdma%d] address:0x%016llx src_id:%u ring:%u vmid:%u pasid:%u\n",
+ instance, addr, entry->src_id, entry->ring_id, entry->vmid,
+ entry->pasid);
+
+ task_info = amdgpu_vm_get_task_info_pasid(adev, entry->pasid);
+ if (task_info) {
+ dev_dbg_ratelimited(adev->dev, " for process %s pid %d thread %s pid %d\n",
+ task_info->process_name, task_info->tgid,
+ task_info->task.comm, task_info->task.pid);
+ amdgpu_vm_put_task_info(task_info);
+ }
+
+ return 0;
+}
+
+static int sdma_v4_4_2_process_vm_hole_irq(struct amdgpu_device *adev,
+ struct amdgpu_irq_src *source,
+ struct amdgpu_iv_entry *entry)
+{
+ dev_dbg_ratelimited(adev->dev, "MC or SEM address in VM hole\n");
+ sdma_v4_4_2_print_iv_entry(adev, entry);
+ return 0;
+}
+
+static int sdma_v4_4_2_process_doorbell_invalid_irq(struct amdgpu_device *adev,
+ struct amdgpu_irq_src *source,
+ struct amdgpu_iv_entry *entry)
+{
+
+ dev_dbg_ratelimited(adev->dev, "SDMA received a doorbell from BIF with byte_enable !=0xff\n");
+ sdma_v4_4_2_print_iv_entry(adev, entry);
+ return 0;
+}
+
+static int sdma_v4_4_2_process_pool_timeout_irq(struct amdgpu_device *adev,
+ struct amdgpu_irq_src *source,
+ struct amdgpu_iv_entry *entry)
+{
+ dev_dbg_ratelimited(adev->dev,
+ "Polling register/memory timeout executing POLL_REG/MEM with finite timer\n");
+ sdma_v4_4_2_print_iv_entry(adev, entry);
+ return 0;
+}
+
+static int sdma_v4_4_2_process_srbm_write_irq(struct amdgpu_device *adev,
+ struct amdgpu_irq_src *source,
+ struct amdgpu_iv_entry *entry)
+{
+ dev_dbg_ratelimited(adev->dev,
+ "SDMA gets an Register Write SRBM_WRITE command in non-privilege command buffer\n");
+ sdma_v4_4_2_print_iv_entry(adev, entry);
+ return 0;
+}
+
+static int sdma_v4_4_2_process_ctxt_empty_irq(struct amdgpu_device *adev,
+ struct amdgpu_irq_src *source,
+ struct amdgpu_iv_entry *entry)
+{
+ /* There is nothing useful to be done here, only kept for debug */
+ dev_dbg_ratelimited(adev->dev, "SDMA context empty interrupt");
+ sdma_v4_4_2_print_iv_entry(adev, entry);
+ return 0;
+}
+
+static void sdma_v4_4_2_inst_update_medium_grain_light_sleep(
+ struct amdgpu_device *adev, bool enable, uint32_t inst_mask)
+{
+ uint32_t data, def;
+ int i;
+
+ /* leave as default if it is not driver controlled */
+ if (!(adev->cg_flags & AMD_CG_SUPPORT_SDMA_LS))
+ return;
+
+ if (enable) {
+ for_each_inst(i, inst_mask) {
+ /* 1-not override: enable sdma mem light sleep */
+ def = data = RREG32_SDMA(i, regSDMA_POWER_CNTL);
+ data |= SDMA_POWER_CNTL__MEM_POWER_OVERRIDE_MASK;
+ if (def != data)
+ WREG32_SDMA(i, regSDMA_POWER_CNTL, data);
+ }
+ } else {
+ for_each_inst(i, inst_mask) {
+ /* 0-override:disable sdma mem light sleep */
+ def = data = RREG32_SDMA(i, regSDMA_POWER_CNTL);
+ data &= ~SDMA_POWER_CNTL__MEM_POWER_OVERRIDE_MASK;
+ if (def != data)
+ WREG32_SDMA(i, regSDMA_POWER_CNTL, data);
+ }
+ }
+}
+
+static void sdma_v4_4_2_inst_update_medium_grain_clock_gating(
+ struct amdgpu_device *adev, bool enable, uint32_t inst_mask)
+{
+ uint32_t data, def;
+ int i;
+
+ /* leave as default if it is not driver controlled */
+ if (!(adev->cg_flags & AMD_CG_SUPPORT_SDMA_MGCG))
+ return;
+
+ if (enable) {
+ for_each_inst(i, inst_mask) {
+ def = data = RREG32_SDMA(i, regSDMA_CLK_CTRL);
+ data &= ~(SDMA_CLK_CTRL__SOFT_OVERRIDE5_MASK |
+ SDMA_CLK_CTRL__SOFT_OVERRIDE4_MASK |
+ SDMA_CLK_CTRL__SOFT_OVERRIDE3_MASK |
+ SDMA_CLK_CTRL__SOFT_OVERRIDE2_MASK |
+ SDMA_CLK_CTRL__SOFT_OVERRIDE1_MASK |
+ SDMA_CLK_CTRL__SOFT_OVERRIDE0_MASK);
+ if (def != data)
+ WREG32_SDMA(i, regSDMA_CLK_CTRL, data);
+ }
+ } else {
+ for_each_inst(i, inst_mask) {
+ def = data = RREG32_SDMA(i, regSDMA_CLK_CTRL);
+ data |= (SDMA_CLK_CTRL__SOFT_OVERRIDE5_MASK |
+ SDMA_CLK_CTRL__SOFT_OVERRIDE4_MASK |
+ SDMA_CLK_CTRL__SOFT_OVERRIDE3_MASK |
+ SDMA_CLK_CTRL__SOFT_OVERRIDE2_MASK |
+ SDMA_CLK_CTRL__SOFT_OVERRIDE1_MASK |
+ SDMA_CLK_CTRL__SOFT_OVERRIDE0_MASK);
+ if (def != data)
+ WREG32_SDMA(i, regSDMA_CLK_CTRL, data);
+ }
+ }
+}
+
+static int sdma_v4_4_2_set_clockgating_state(struct amdgpu_ip_block *ip_block,
+ enum amd_clockgating_state state)
+{
+ struct amdgpu_device *adev = ip_block->adev;
+ uint32_t inst_mask;
+
+ if (amdgpu_sriov_vf(adev))
+ return 0;
+
+ inst_mask = GENMASK(adev->sdma.num_instances - 1, 0);
+
+ sdma_v4_4_2_inst_update_medium_grain_clock_gating(
+ adev, state == AMD_CG_STATE_GATE, inst_mask);
+ sdma_v4_4_2_inst_update_medium_grain_light_sleep(
+ adev, state == AMD_CG_STATE_GATE, inst_mask);
+ return 0;
+}
+
+static int sdma_v4_4_2_set_powergating_state(struct amdgpu_ip_block *ip_block,
+ enum amd_powergating_state state)
+{
+ return 0;
+}
+
+static void sdma_v4_4_2_get_clockgating_state(struct amdgpu_ip_block *ip_block, u64 *flags)
+{
+ struct amdgpu_device *adev = ip_block->adev;
+ int data;
+
+ if (amdgpu_sriov_vf(adev))
+ *flags = 0;
+
+ /* AMD_CG_SUPPORT_SDMA_MGCG */
+ data = RREG32(SOC15_REG_OFFSET(SDMA0, GET_INST(SDMA0, 0), regSDMA_CLK_CTRL));
+ if (!(data & SDMA_CLK_CTRL__SOFT_OVERRIDE5_MASK))
+ *flags |= AMD_CG_SUPPORT_SDMA_MGCG;
+
+ /* AMD_CG_SUPPORT_SDMA_LS */
+ data = RREG32(SOC15_REG_OFFSET(SDMA0, GET_INST(SDMA0, 0), regSDMA_POWER_CNTL));
+ if (data & SDMA_POWER_CNTL__MEM_POWER_OVERRIDE_MASK)
+ *flags |= AMD_CG_SUPPORT_SDMA_LS;
+}
+
+static void sdma_v4_4_2_print_ip_state(struct amdgpu_ip_block *ip_block, struct drm_printer *p)
+{
+ struct amdgpu_device *adev = ip_block->adev;
+ int i, j;
+ uint32_t reg_count = ARRAY_SIZE(sdma_reg_list_4_4_2);
+ uint32_t instance_offset;
+
+ if (!adev->sdma.ip_dump)
+ return;
+
+ drm_printf(p, "num_instances:%d\n", adev->sdma.num_instances);
+ for (i = 0; i < adev->sdma.num_instances; i++) {
+ instance_offset = i * reg_count;
+ drm_printf(p, "\nInstance:%d\n", i);
+
+ for (j = 0; j < reg_count; j++)
+ drm_printf(p, "%-50s \t 0x%08x\n", sdma_reg_list_4_4_2[j].reg_name,
+ adev->sdma.ip_dump[instance_offset + j]);
+ }
+}
+
+static void sdma_v4_4_2_dump_ip_state(struct amdgpu_ip_block *ip_block)
+{
+ struct amdgpu_device *adev = ip_block->adev;
+ int i, j;
+ uint32_t instance_offset;
+ uint32_t reg_count = ARRAY_SIZE(sdma_reg_list_4_4_2);
+
+ if (!adev->sdma.ip_dump)
+ return;
+
+ for (i = 0; i < adev->sdma.num_instances; i++) {
+ instance_offset = i * reg_count;
+ for (j = 0; j < reg_count; j++)
+ adev->sdma.ip_dump[instance_offset + j] =
+ RREG32(sdma_v4_4_2_get_reg_offset(adev, i,
+ sdma_reg_list_4_4_2[j].reg_offset));
+ }
+}
+
+const struct amd_ip_funcs sdma_v4_4_2_ip_funcs = {
+ .name = "sdma_v4_4_2",
+ .early_init = sdma_v4_4_2_early_init,
+ .late_init = sdma_v4_4_2_late_init,
+ .sw_init = sdma_v4_4_2_sw_init,
+ .sw_fini = sdma_v4_4_2_sw_fini,
+ .hw_init = sdma_v4_4_2_hw_init,
+ .hw_fini = sdma_v4_4_2_hw_fini,
+ .suspend = sdma_v4_4_2_suspend,
+ .resume = sdma_v4_4_2_resume,
+ .is_idle = sdma_v4_4_2_is_idle,
+ .wait_for_idle = sdma_v4_4_2_wait_for_idle,
+ .soft_reset = sdma_v4_4_2_soft_reset,
+ .set_clockgating_state = sdma_v4_4_2_set_clockgating_state,
+ .set_powergating_state = sdma_v4_4_2_set_powergating_state,
+ .get_clockgating_state = sdma_v4_4_2_get_clockgating_state,
+ .dump_ip_state = sdma_v4_4_2_dump_ip_state,
+ .print_ip_state = sdma_v4_4_2_print_ip_state,
+};
+
+static const struct amdgpu_ring_funcs sdma_v4_4_2_ring_funcs = {
+ .type = AMDGPU_RING_TYPE_SDMA,
+ .align_mask = 0xff,
+ .nop = SDMA_PKT_NOP_HEADER_OP(SDMA_OP_NOP),
+ .support_64bit_ptrs = true,
+ .get_rptr = sdma_v4_4_2_ring_get_rptr,
+ .get_wptr = sdma_v4_4_2_ring_get_wptr,
+ .set_wptr = sdma_v4_4_2_ring_set_wptr,
+ .emit_frame_size =
+ 6 + /* sdma_v4_4_2_ring_emit_hdp_flush */
+ 3 + /* hdp invalidate */
+ 6 + /* sdma_v4_4_2_ring_emit_pipeline_sync */
+ /* sdma_v4_4_2_ring_emit_vm_flush */
+ SOC15_FLUSH_GPU_TLB_NUM_WREG * 3 +
+ SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 6 +
+ 10 + 10 + 10, /* sdma_v4_4_2_ring_emit_fence x3 for user fence, vm fence */
+ .emit_ib_size = 7 + 6, /* sdma_v4_4_2_ring_emit_ib */
+ .emit_ib = sdma_v4_4_2_ring_emit_ib,
+ .emit_fence = sdma_v4_4_2_ring_emit_fence,
+ .emit_pipeline_sync = sdma_v4_4_2_ring_emit_pipeline_sync,
+ .emit_vm_flush = sdma_v4_4_2_ring_emit_vm_flush,
+ .emit_hdp_flush = sdma_v4_4_2_ring_emit_hdp_flush,
+ .test_ring = sdma_v4_4_2_ring_test_ring,
+ .test_ib = sdma_v4_4_2_ring_test_ib,
+ .insert_nop = sdma_v4_4_2_ring_insert_nop,
+ .pad_ib = sdma_v4_4_2_ring_pad_ib,
+ .emit_wreg = sdma_v4_4_2_ring_emit_wreg,
+ .emit_reg_wait = sdma_v4_4_2_ring_emit_reg_wait,
+ .emit_reg_write_reg_wait = amdgpu_ring_emit_reg_write_reg_wait_helper,
+ .reset = sdma_v4_4_2_reset_queue,
+};
+
+static const struct amdgpu_ring_funcs sdma_v4_4_2_page_ring_funcs = {
+ .type = AMDGPU_RING_TYPE_SDMA,
+ .align_mask = 0xff,
+ .nop = SDMA_PKT_NOP_HEADER_OP(SDMA_OP_NOP),
+ .support_64bit_ptrs = true,
+ .get_rptr = sdma_v4_4_2_ring_get_rptr,
+ .get_wptr = sdma_v4_4_2_page_ring_get_wptr,
+ .set_wptr = sdma_v4_4_2_page_ring_set_wptr,
+ .emit_frame_size =
+ 6 + /* sdma_v4_4_2_ring_emit_hdp_flush */
+ 3 + /* hdp invalidate */
+ 6 + /* sdma_v4_4_2_ring_emit_pipeline_sync */
+ /* sdma_v4_4_2_ring_emit_vm_flush */
+ SOC15_FLUSH_GPU_TLB_NUM_WREG * 3 +
+ SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 6 +
+ 10 + 10 + 10, /* sdma_v4_4_2_ring_emit_fence x3 for user fence, vm fence */
+ .emit_ib_size = 7 + 6, /* sdma_v4_4_2_ring_emit_ib */
+ .emit_ib = sdma_v4_4_2_ring_emit_ib,
+ .emit_fence = sdma_v4_4_2_ring_emit_fence,
+ .emit_pipeline_sync = sdma_v4_4_2_ring_emit_pipeline_sync,
+ .emit_vm_flush = sdma_v4_4_2_ring_emit_vm_flush,
+ .emit_hdp_flush = sdma_v4_4_2_ring_emit_hdp_flush,
+ .test_ring = sdma_v4_4_2_ring_test_ring,
+ .test_ib = sdma_v4_4_2_ring_test_ib,
+ .insert_nop = sdma_v4_4_2_ring_insert_nop,
+ .pad_ib = sdma_v4_4_2_ring_pad_ib,
+ .emit_wreg = sdma_v4_4_2_ring_emit_wreg,
+ .emit_reg_wait = sdma_v4_4_2_ring_emit_reg_wait,
+ .emit_reg_write_reg_wait = amdgpu_ring_emit_reg_write_reg_wait_helper,
+ .reset = sdma_v4_4_2_reset_queue,
+};
+
+static void sdma_v4_4_2_set_ring_funcs(struct amdgpu_device *adev)
+{
+ int i, dev_inst;
+
+ for (i = 0; i < adev->sdma.num_instances; i++) {
+ adev->sdma.instance[i].ring.funcs = &sdma_v4_4_2_ring_funcs;
+ adev->sdma.instance[i].ring.me = i;
+ if (adev->sdma.has_page_queue) {
+ adev->sdma.instance[i].page.funcs =
+ &sdma_v4_4_2_page_ring_funcs;
+ adev->sdma.instance[i].page.me = i;
+ }
+
+ dev_inst = GET_INST(SDMA0, i);
+ /* AID to which SDMA belongs depends on physical instance */
+ adev->sdma.instance[i].aid_id =
+ dev_inst / adev->sdma.num_inst_per_aid;
+ }
+}
+
+static const struct amdgpu_irq_src_funcs sdma_v4_4_2_trap_irq_funcs = {
+ .set = sdma_v4_4_2_set_trap_irq_state,
+ .process = sdma_v4_4_2_process_trap_irq,
+};
+
+static const struct amdgpu_irq_src_funcs sdma_v4_4_2_illegal_inst_irq_funcs = {
+ .process = sdma_v4_4_2_process_illegal_inst_irq,
+};
+
+static const struct amdgpu_irq_src_funcs sdma_v4_4_2_ecc_irq_funcs = {
+ .set = sdma_v4_4_2_set_ecc_irq_state,
+ .process = amdgpu_sdma_process_ecc_irq,
+};
+
+static const struct amdgpu_irq_src_funcs sdma_v4_4_2_vm_hole_irq_funcs = {
+ .process = sdma_v4_4_2_process_vm_hole_irq,
+};
+
+static const struct amdgpu_irq_src_funcs sdma_v4_4_2_doorbell_invalid_irq_funcs = {
+ .process = sdma_v4_4_2_process_doorbell_invalid_irq,
+};
+
+static const struct amdgpu_irq_src_funcs sdma_v4_4_2_pool_timeout_irq_funcs = {
+ .process = sdma_v4_4_2_process_pool_timeout_irq,
+};
+
+static const struct amdgpu_irq_src_funcs sdma_v4_4_2_srbm_write_irq_funcs = {
+ .process = sdma_v4_4_2_process_srbm_write_irq,
+};
+
+static const struct amdgpu_irq_src_funcs sdma_v4_4_2_ctxt_empty_irq_funcs = {
+ .process = sdma_v4_4_2_process_ctxt_empty_irq,
+};
+
+static void sdma_v4_4_2_set_irq_funcs(struct amdgpu_device *adev)
+{
+ adev->sdma.trap_irq.num_types = adev->sdma.num_instances;
+ adev->sdma.ecc_irq.num_types = adev->sdma.num_instances;
+ adev->sdma.vm_hole_irq.num_types = adev->sdma.num_instances;
+ adev->sdma.doorbell_invalid_irq.num_types = adev->sdma.num_instances;
+ adev->sdma.pool_timeout_irq.num_types = adev->sdma.num_instances;
+ adev->sdma.srbm_write_irq.num_types = adev->sdma.num_instances;
+ adev->sdma.ctxt_empty_irq.num_types = adev->sdma.num_instances;
+
+ adev->sdma.trap_irq.funcs = &sdma_v4_4_2_trap_irq_funcs;
+ adev->sdma.illegal_inst_irq.funcs = &sdma_v4_4_2_illegal_inst_irq_funcs;
+ adev->sdma.ecc_irq.funcs = &sdma_v4_4_2_ecc_irq_funcs;
+ adev->sdma.vm_hole_irq.funcs = &sdma_v4_4_2_vm_hole_irq_funcs;
+ adev->sdma.doorbell_invalid_irq.funcs = &sdma_v4_4_2_doorbell_invalid_irq_funcs;
+ adev->sdma.pool_timeout_irq.funcs = &sdma_v4_4_2_pool_timeout_irq_funcs;
+ adev->sdma.srbm_write_irq.funcs = &sdma_v4_4_2_srbm_write_irq_funcs;
+ adev->sdma.ctxt_empty_irq.funcs = &sdma_v4_4_2_ctxt_empty_irq_funcs;
+}
+
+/**
+ * sdma_v4_4_2_emit_copy_buffer - copy buffer using the sDMA engine
+ *
+ * @ib: indirect buffer to copy to
+ * @src_offset: src GPU address
+ * @dst_offset: dst GPU address
+ * @byte_count: number of bytes to xfer
+ * @copy_flags: copy flags for the buffers
+ *
+ * Copy GPU buffers using the DMA engine.
+ * Used by the amdgpu ttm implementation to move pages if
+ * registered as the asic copy callback.
+ */
+static void sdma_v4_4_2_emit_copy_buffer(struct amdgpu_ib *ib,
+ uint64_t src_offset,
+ uint64_t dst_offset,
+ uint32_t byte_count,
+ uint32_t copy_flags)
+{
+ ib->ptr[ib->length_dw++] = SDMA_PKT_HEADER_OP(SDMA_OP_COPY) |
+ SDMA_PKT_HEADER_SUB_OP(SDMA_SUBOP_COPY_LINEAR) |
+ SDMA_PKT_COPY_LINEAR_HEADER_TMZ((copy_flags & AMDGPU_COPY_FLAGS_TMZ) ? 1 : 0);
+ ib->ptr[ib->length_dw++] = byte_count - 1;
+ ib->ptr[ib->length_dw++] = 0; /* src/dst endian swap */
+ ib->ptr[ib->length_dw++] = lower_32_bits(src_offset);
+ ib->ptr[ib->length_dw++] = upper_32_bits(src_offset);
+ ib->ptr[ib->length_dw++] = lower_32_bits(dst_offset);
+ ib->ptr[ib->length_dw++] = upper_32_bits(dst_offset);
+}
+
+/**
+ * sdma_v4_4_2_emit_fill_buffer - fill buffer using the sDMA engine
+ *
+ * @ib: indirect buffer to copy to
+ * @src_data: value to write to buffer
+ * @dst_offset: dst GPU address
+ * @byte_count: number of bytes to xfer
+ *
+ * Fill GPU buffers using the DMA engine.
+ */
+static void sdma_v4_4_2_emit_fill_buffer(struct amdgpu_ib *ib,
+ uint32_t src_data,
+ uint64_t dst_offset,
+ uint32_t byte_count)
+{
+ ib->ptr[ib->length_dw++] = SDMA_PKT_HEADER_OP(SDMA_OP_CONST_FILL);
+ ib->ptr[ib->length_dw++] = lower_32_bits(dst_offset);
+ ib->ptr[ib->length_dw++] = upper_32_bits(dst_offset);
+ ib->ptr[ib->length_dw++] = src_data;
+ ib->ptr[ib->length_dw++] = byte_count - 1;
+}
+
+static const struct amdgpu_buffer_funcs sdma_v4_4_2_buffer_funcs = {
+ .copy_max_bytes = 0x400000,
+ .copy_num_dw = 7,
+ .emit_copy_buffer = sdma_v4_4_2_emit_copy_buffer,
+
+ .fill_max_bytes = 0x400000,
+ .fill_num_dw = 5,
+ .emit_fill_buffer = sdma_v4_4_2_emit_fill_buffer,
+};
+
+static void sdma_v4_4_2_set_buffer_funcs(struct amdgpu_device *adev)
+{
+ adev->mman.buffer_funcs = &sdma_v4_4_2_buffer_funcs;
+ if (adev->sdma.has_page_queue)
+ adev->mman.buffer_funcs_ring = &adev->sdma.instance[0].page;
+ else
+ adev->mman.buffer_funcs_ring = &adev->sdma.instance[0].ring;
+}
+
+static const struct amdgpu_vm_pte_funcs sdma_v4_4_2_vm_pte_funcs = {
+ .copy_pte_num_dw = 7,
+ .copy_pte = sdma_v4_4_2_vm_copy_pte,
+
+ .write_pte = sdma_v4_4_2_vm_write_pte,
+ .set_pte_pde = sdma_v4_4_2_vm_set_pte_pde,
+};
+
+static void sdma_v4_4_2_set_vm_pte_funcs(struct amdgpu_device *adev)
+{
+ struct drm_gpu_scheduler *sched;
+ unsigned i;
+
+ adev->vm_manager.vm_pte_funcs = &sdma_v4_4_2_vm_pte_funcs;
+ for (i = 0; i < adev->sdma.num_instances; i++) {
+ if (adev->sdma.has_page_queue)
+ sched = &adev->sdma.instance[i].page.sched;
+ else
+ sched = &adev->sdma.instance[i].ring.sched;
+ adev->vm_manager.vm_pte_scheds[i] = sched;
+ }
+ adev->vm_manager.vm_pte_num_scheds = adev->sdma.num_instances;
+}
+
+/**
+ * sdma_v4_4_2_update_reset_mask - update reset mask for SDMA
+ * @adev: Pointer to the AMDGPU device structure
+ *
+ * This function update reset mask for SDMA and sets the supported
+ * reset types based on the IP version and firmware versions.
+ *
+ */
+static void sdma_v4_4_2_update_reset_mask(struct amdgpu_device *adev)
+{
+ /* per queue reset not supported for SRIOV */
+ if (amdgpu_sriov_vf(adev))
+ return;
+
+ /*
+ * the user queue relies on MEC fw and pmfw when the sdma queue do reset.
+ * it needs to check both of them at here to skip old mec and pmfw.
+ */
+ switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
+ case IP_VERSION(9, 4, 3):
+ case IP_VERSION(9, 4, 4):
+ if ((adev->gfx.mec_fw_version >= 0xb0) &&
+ amdgpu_dpm_reset_sdma_is_supported(adev) &&
+ !adev->debug_disable_gpu_ring_reset)
+ adev->sdma.supported_reset |= AMDGPU_RESET_TYPE_PER_QUEUE;
+ break;
+ case IP_VERSION(9, 5, 0):
+ if ((adev->gfx.mec_fw_version >= 0xf) &&
+ amdgpu_dpm_reset_sdma_is_supported(adev) &&
+ !adev->debug_disable_gpu_ring_reset)
+ adev->sdma.supported_reset |= AMDGPU_RESET_TYPE_PER_QUEUE;
+ break;
+ default:
+ break;
+ }
+
+}
+
+const struct amdgpu_ip_block_version sdma_v4_4_2_ip_block = {
+ .type = AMD_IP_BLOCK_TYPE_SDMA,
+ .major = 4,
+ .minor = 4,
+ .rev = 2,
+ .funcs = &sdma_v4_4_2_ip_funcs,
+};
+
+static int sdma_v4_4_2_xcp_resume(void *handle, uint32_t inst_mask)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ int r;
+
+ if (!amdgpu_sriov_vf(adev))
+ sdma_v4_4_2_inst_init_golden_registers(adev, inst_mask);
+
+ r = sdma_v4_4_2_inst_start(adev, inst_mask, false);
+
+ return r;
+}
+
+static int sdma_v4_4_2_xcp_suspend(void *handle, uint32_t inst_mask)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ uint32_t tmp_mask = inst_mask;
+ int i;
+
+ if (amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__SDMA)) {
+ for_each_inst(i, tmp_mask) {
+ amdgpu_irq_put(adev, &adev->sdma.ecc_irq,
+ AMDGPU_SDMA_IRQ_INSTANCE0 + i);
+ }
+ }
+
+ sdma_v4_4_2_inst_ctx_switch_enable(adev, false, inst_mask);
+ sdma_v4_4_2_inst_enable(adev, false, inst_mask);
+
+ return 0;
+}
+
+struct amdgpu_xcp_ip_funcs sdma_v4_4_2_xcp_funcs = {
+ .suspend = &sdma_v4_4_2_xcp_suspend,
+ .resume = &sdma_v4_4_2_xcp_resume
+};
+
+static const struct amdgpu_ras_err_status_reg_entry sdma_v4_2_2_ue_reg_list[] = {
+ {AMDGPU_RAS_REG_ENTRY(SDMA0, 0, regSDMA_UE_ERR_STATUS_LO, regSDMA_UE_ERR_STATUS_HI),
+ 1, (AMDGPU_RAS_ERR_INFO_VALID | AMDGPU_RAS_ERR_STATUS_VALID), "SDMA"},
+};
+
+static const struct amdgpu_ras_memory_id_entry sdma_v4_4_2_ras_memory_list[] = {
+ {AMDGPU_SDMA_MBANK_DATA_BUF0, "SDMA_MBANK_DATA_BUF0"},
+ {AMDGPU_SDMA_MBANK_DATA_BUF1, "SDMA_MBANK_DATA_BUF1"},
+ {AMDGPU_SDMA_MBANK_DATA_BUF2, "SDMA_MBANK_DATA_BUF2"},
+ {AMDGPU_SDMA_MBANK_DATA_BUF3, "SDMA_MBANK_DATA_BUF3"},
+ {AMDGPU_SDMA_MBANK_DATA_BUF4, "SDMA_MBANK_DATA_BUF4"},
+ {AMDGPU_SDMA_MBANK_DATA_BUF5, "SDMA_MBANK_DATA_BUF5"},
+ {AMDGPU_SDMA_MBANK_DATA_BUF6, "SDMA_MBANK_DATA_BUF6"},
+ {AMDGPU_SDMA_MBANK_DATA_BUF7, "SDMA_MBANK_DATA_BUF7"},
+ {AMDGPU_SDMA_MBANK_DATA_BUF8, "SDMA_MBANK_DATA_BUF8"},
+ {AMDGPU_SDMA_MBANK_DATA_BUF9, "SDMA_MBANK_DATA_BUF9"},
+ {AMDGPU_SDMA_MBANK_DATA_BUF10, "SDMA_MBANK_DATA_BUF10"},
+ {AMDGPU_SDMA_MBANK_DATA_BUF11, "SDMA_MBANK_DATA_BUF11"},
+ {AMDGPU_SDMA_MBANK_DATA_BUF12, "SDMA_MBANK_DATA_BUF12"},
+ {AMDGPU_SDMA_MBANK_DATA_BUF13, "SDMA_MBANK_DATA_BUF13"},
+ {AMDGPU_SDMA_MBANK_DATA_BUF14, "SDMA_MBANK_DATA_BUF14"},
+ {AMDGPU_SDMA_MBANK_DATA_BUF15, "SDMA_MBANK_DATA_BUF15"},
+ {AMDGPU_SDMA_UCODE_BUF, "SDMA_UCODE_BUF"},
+ {AMDGPU_SDMA_RB_CMD_BUF, "SDMA_RB_CMD_BUF"},
+ {AMDGPU_SDMA_IB_CMD_BUF, "SDMA_IB_CMD_BUF"},
+ {AMDGPU_SDMA_UTCL1_RD_FIFO, "SDMA_UTCL1_RD_FIFO"},
+ {AMDGPU_SDMA_UTCL1_RDBST_FIFO, "SDMA_UTCL1_RDBST_FIFO"},
+ {AMDGPU_SDMA_UTCL1_WR_FIFO, "SDMA_UTCL1_WR_FIFO"},
+ {AMDGPU_SDMA_DATA_LUT_FIFO, "SDMA_DATA_LUT_FIFO"},
+ {AMDGPU_SDMA_SPLIT_DAT_BUF, "SDMA_SPLIT_DAT_BUF"},
+};
+
+static void sdma_v4_4_2_inst_query_ras_error_count(struct amdgpu_device *adev,
+ uint32_t sdma_inst,
+ void *ras_err_status)
+{
+ struct ras_err_data *err_data = (struct ras_err_data *)ras_err_status;
+ uint32_t sdma_dev_inst = GET_INST(SDMA0, sdma_inst);
+ unsigned long ue_count = 0;
+ struct amdgpu_smuio_mcm_config_info mcm_info = {
+ .socket_id = adev->smuio.funcs->get_socket_id(adev),
+ .die_id = adev->sdma.instance[sdma_inst].aid_id,
+ };
+
+ /* sdma v4_4_2 doesn't support query ce counts */
+ amdgpu_ras_inst_query_ras_error_count(adev,
+ sdma_v4_2_2_ue_reg_list,
+ ARRAY_SIZE(sdma_v4_2_2_ue_reg_list),
+ sdma_v4_4_2_ras_memory_list,
+ ARRAY_SIZE(sdma_v4_4_2_ras_memory_list),
+ sdma_dev_inst,
+ AMDGPU_RAS_ERROR__MULTI_UNCORRECTABLE,
+ &ue_count);
+
+ amdgpu_ras_error_statistic_ue_count(err_data, &mcm_info, ue_count);
+}
+
+static void sdma_v4_4_2_query_ras_error_count(struct amdgpu_device *adev,
+ void *ras_err_status)
+{
+ uint32_t inst_mask;
+ int i = 0;
+
+ inst_mask = GENMASK(adev->sdma.num_instances - 1, 0);
+ if (amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__SDMA)) {
+ for_each_inst(i, inst_mask)
+ sdma_v4_4_2_inst_query_ras_error_count(adev, i, ras_err_status);
+ } else {
+ dev_warn(adev->dev, "SDMA RAS is not supported\n");
+ }
+}
+
+static void sdma_v4_4_2_inst_reset_ras_error_count(struct amdgpu_device *adev,
+ uint32_t sdma_inst)
+{
+ uint32_t sdma_dev_inst = GET_INST(SDMA0, sdma_inst);
+
+ amdgpu_ras_inst_reset_ras_error_count(adev,
+ sdma_v4_2_2_ue_reg_list,
+ ARRAY_SIZE(sdma_v4_2_2_ue_reg_list),
+ sdma_dev_inst);
+}
+
+static void sdma_v4_4_2_reset_ras_error_count(struct amdgpu_device *adev)
+{
+ uint32_t inst_mask;
+ int i = 0;
+
+ inst_mask = GENMASK(adev->sdma.num_instances - 1, 0);
+ if (amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__SDMA)) {
+ for_each_inst(i, inst_mask)
+ sdma_v4_4_2_inst_reset_ras_error_count(adev, i);
+ } else {
+ dev_warn(adev->dev, "SDMA RAS is not supported\n");
+ }
+}
+
+static const struct amdgpu_ras_block_hw_ops sdma_v4_4_2_ras_hw_ops = {
+ .query_ras_error_count = sdma_v4_4_2_query_ras_error_count,
+ .reset_ras_error_count = sdma_v4_4_2_reset_ras_error_count,
+};
+
+static int sdma_v4_4_2_aca_bank_parser(struct aca_handle *handle, struct aca_bank *bank,
+ enum aca_smu_type type, void *data)
+{
+ struct aca_bank_info info;
+ u64 misc0;
+ int ret;
+
+ ret = aca_bank_info_decode(bank, &info);
+ if (ret)
+ return ret;
+
+ misc0 = bank->regs[ACA_REG_IDX_MISC0];
+ switch (type) {
+ case ACA_SMU_TYPE_UE:
+ bank->aca_err_type = ACA_ERROR_TYPE_UE;
+ ret = aca_error_cache_log_bank_error(handle, &info, ACA_ERROR_TYPE_UE,
+ 1ULL);
+ break;
+ case ACA_SMU_TYPE_CE:
+ bank->aca_err_type = ACA_ERROR_TYPE_CE;
+ ret = aca_error_cache_log_bank_error(handle, &info, bank->aca_err_type,
+ ACA_REG__MISC0__ERRCNT(misc0));
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ return ret;
+}
+
+/* CODE_SDMA0 - CODE_SDMA4, reference to smu driver if header file */
+static int sdma_v4_4_2_err_codes[] = { 33, 34, 35, 36 };
+
+static bool sdma_v4_4_2_aca_bank_is_valid(struct aca_handle *handle, struct aca_bank *bank,
+ enum aca_smu_type type, void *data)
+{
+ u32 instlo;
+
+ instlo = ACA_REG__IPID__INSTANCEIDLO(bank->regs[ACA_REG_IDX_IPID]);
+ instlo &= GENMASK(31, 1);
+
+ if (instlo != mmSMNAID_AID0_MCA_SMU)
+ return false;
+
+ if (aca_bank_check_error_codes(handle->adev, bank,
+ sdma_v4_4_2_err_codes,
+ ARRAY_SIZE(sdma_v4_4_2_err_codes)))
+ return false;
+
+ return true;
+}
+
+static const struct aca_bank_ops sdma_v4_4_2_aca_bank_ops = {
+ .aca_bank_parser = sdma_v4_4_2_aca_bank_parser,
+ .aca_bank_is_valid = sdma_v4_4_2_aca_bank_is_valid,
+};
+
+static const struct aca_info sdma_v4_4_2_aca_info = {
+ .hwip = ACA_HWIP_TYPE_SMU,
+ .mask = ACA_ERROR_UE_MASK,
+ .bank_ops = &sdma_v4_4_2_aca_bank_ops,
+};
+
+static int sdma_v4_4_2_ras_late_init(struct amdgpu_device *adev, struct ras_common_if *ras_block)
+{
+ int r;
+
+ r = amdgpu_sdma_ras_late_init(adev, ras_block);
+ if (r)
+ return r;
+
+ return amdgpu_ras_bind_aca(adev, AMDGPU_RAS_BLOCK__SDMA,
+ &sdma_v4_4_2_aca_info, NULL);
+}
+
+static struct amdgpu_sdma_ras sdma_v4_4_2_ras = {
+ .ras_block = {
+ .hw_ops = &sdma_v4_4_2_ras_hw_ops,
+ .ras_late_init = sdma_v4_4_2_ras_late_init,
+ },
+};
+
+static void sdma_v4_4_2_set_ras_funcs(struct amdgpu_device *adev)
+{
+ adev->sdma.ras = &sdma_v4_4_2_ras;
+}
diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.h b/drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.h
new file mode 100644
index 000000000000..d516145529bb
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.h
@@ -0,0 +1,32 @@
+/*
+ * Copyright 2022 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#ifndef __SDMA_V4_4_2_H__
+#define __SDMA_V4_4_2_H__
+
+extern const struct amd_ip_funcs sdma_v4_4_2_ip_funcs;
+extern const struct amdgpu_ip_block_version sdma_v4_4_2_ip_block;
+
+extern struct amdgpu_xcp_ip_funcs sdma_v4_4_2_xcp_funcs;
+
+#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c
index d345e324837d..8ddc4df06a1f 100644
--- a/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c
@@ -51,15 +51,69 @@ MODULE_FIRMWARE("amdgpu/navi14_sdma1.bin");
MODULE_FIRMWARE("amdgpu/navi12_sdma.bin");
MODULE_FIRMWARE("amdgpu/navi12_sdma1.bin");
+MODULE_FIRMWARE("amdgpu/cyan_skillfish2_sdma.bin");
+MODULE_FIRMWARE("amdgpu/cyan_skillfish2_sdma1.bin");
+
#define SDMA1_REG_OFFSET 0x600
#define SDMA0_HYP_DEC_REG_START 0x5880
#define SDMA0_HYP_DEC_REG_END 0x5893
#define SDMA1_HYP_DEC_REG_OFFSET 0x20
+static const struct amdgpu_hwip_reg_entry sdma_reg_list_5_0[] = {
+ SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_STATUS_REG),
+ SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_STATUS1_REG),
+ SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_STATUS2_REG),
+ SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_STATUS3_REG),
+ SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_UCODE_CHECKSUM),
+ SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_RB_RPTR_FETCH_HI),
+ SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_RB_RPTR_FETCH),
+ SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_UTCL1_RD_STATUS),
+ SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_UTCL1_WR_STATUS),
+ SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_UTCL1_RD_XNACK0),
+ SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_UTCL1_RD_XNACK1),
+ SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_UTCL1_WR_XNACK0),
+ SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_UTCL1_WR_XNACK1),
+ SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_GFX_RB_CNTL),
+ SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_GFX_RB_RPTR),
+ SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_GFX_RB_RPTR_HI),
+ SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_GFX_RB_WPTR),
+ SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_GFX_RB_WPTR_HI),
+ SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_GFX_IB_OFFSET),
+ SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_GFX_IB_BASE_LO),
+ SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_GFX_IB_BASE_HI),
+ SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_GFX_IB_CNTL),
+ SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_GFX_IB_RPTR),
+ SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_GFX_IB_SUB_REMAIN),
+ SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_GFX_DUMMY_REG),
+ SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_PAGE_RB_CNTL),
+ SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_PAGE_RB_RPTR),
+ SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_PAGE_RB_RPTR_HI),
+ SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_PAGE_RB_WPTR),
+ SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_PAGE_RB_WPTR_HI),
+ SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_PAGE_IB_OFFSET),
+ SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_PAGE_IB_BASE_LO),
+ SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_PAGE_IB_BASE_HI),
+ SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_PAGE_DUMMY_REG),
+ SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_RLC0_RB_CNTL),
+ SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_RLC0_RB_RPTR),
+ SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_RLC0_RB_RPTR_HI),
+ SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_RLC0_RB_WPTR),
+ SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_RLC0_RB_WPTR_HI),
+ SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_RLC0_IB_OFFSET),
+ SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_RLC0_IB_BASE_LO),
+ SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_RLC0_IB_BASE_HI),
+ SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_RLC0_DUMMY_REG),
+ SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_INT_STATUS),
+ SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_VM_CNTL),
+ SOC15_REG_ENTRY_STR(GC, 0, mmGRBM_STATUS2)
+};
+
static void sdma_v5_0_set_ring_funcs(struct amdgpu_device *adev);
static void sdma_v5_0_set_buffer_funcs(struct amdgpu_device *adev);
static void sdma_v5_0_set_vm_pte_funcs(struct amdgpu_device *adev);
static void sdma_v5_0_set_irq_funcs(struct amdgpu_device *adev);
+static int sdma_v5_0_stop_queue(struct amdgpu_ring *ring);
+static int sdma_v5_0_restore_queue(struct amdgpu_ring *ring);
static const struct soc15_reg_golden golden_settings_sdma_5[] = {
SOC15_REG_GOLDEN_VALUE(GC, 0, mmSDMA0_CHICKEN_BITS, 0xffbf1f0f, 0x03ab0107),
@@ -123,7 +177,42 @@ static const struct soc15_reg_golden golden_settings_sdma_nv14[] = {
static const struct soc15_reg_golden golden_settings_sdma_nv12[] = {
SOC15_REG_GOLDEN_VALUE(GC, 0, mmSDMA0_RLC3_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmSDMA0_GB_ADDR_CONFIG, 0x001877ff, 0x00000044),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmSDMA0_GB_ADDR_CONFIG_READ, 0x001877ff, 0x00000044),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmSDMA1_GB_ADDR_CONFIG, 0x001877ff, 0x00000044),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmSDMA1_GB_ADDR_CONFIG_READ, 0x001877ff, 0x00000044),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmSDMA1_RLC3_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000),
+};
+
+static const struct soc15_reg_golden golden_settings_sdma_cyan_skillfish[] = {
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmSDMA0_CHICKEN_BITS, 0xffbf1f0f, 0x03ab0107),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmSDMA0_GB_ADDR_CONFIG, 0x001877ff, 0x00000044),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmSDMA0_GB_ADDR_CONFIG_READ, 0x001877ff, 0x00000044),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmSDMA0_GFX_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmSDMA0_PAGE_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmSDMA0_RLC0_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmSDMA0_RLC1_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmSDMA0_RLC2_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmSDMA0_RLC3_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmSDMA0_RLC4_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmSDMA0_RLC5_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmSDMA0_RLC6_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmSDMA0_RLC7_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmSDMA0_UTCL1_PAGE, 0x007fffff, 0x004c5c00),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmSDMA1_CHICKEN_BITS, 0xffbf1f0f, 0x03ab0107),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmSDMA1_GB_ADDR_CONFIG, 0x001877ff, 0x00000044),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmSDMA1_GB_ADDR_CONFIG_READ, 0x001877ff, 0x00000044),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmSDMA1_GFX_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmSDMA1_PAGE_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmSDMA1_RLC0_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmSDMA1_RLC1_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmSDMA1_RLC2_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000),
SOC15_REG_GOLDEN_VALUE(GC, 0, mmSDMA1_RLC3_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmSDMA1_RLC4_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmSDMA1_RLC5_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmSDMA1_RLC6_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmSDMA1_RLC7_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmSDMA1_UTCL1_PAGE, 0x007fffff, 0x004c5c00)
};
static u32 sdma_v5_0_get_reg_offset(struct amdgpu_device *adev, u32 instance, u32 internal_offset)
@@ -146,8 +235,8 @@ static u32 sdma_v5_0_get_reg_offset(struct amdgpu_device *adev, u32 instance, u3
static void sdma_v5_0_init_golden_registers(struct amdgpu_device *adev)
{
- switch (adev->asic_type) {
- case CHIP_NAVI10:
+ switch (amdgpu_ip_version(adev, SDMA0_HWIP, 0)) {
+ case IP_VERSION(5, 0, 0):
soc15_program_register_sequence(adev,
golden_settings_sdma_5,
(const u32)ARRAY_SIZE(golden_settings_sdma_5));
@@ -155,7 +244,7 @@ static void sdma_v5_0_init_golden_registers(struct amdgpu_device *adev)
golden_settings_sdma_nv10,
(const u32)ARRAY_SIZE(golden_settings_sdma_nv10));
break;
- case CHIP_NAVI14:
+ case IP_VERSION(5, 0, 2):
soc15_program_register_sequence(adev,
golden_settings_sdma_5,
(const u32)ARRAY_SIZE(golden_settings_sdma_5));
@@ -163,7 +252,7 @@ static void sdma_v5_0_init_golden_registers(struct amdgpu_device *adev)
golden_settings_sdma_nv14,
(const u32)ARRAY_SIZE(golden_settings_sdma_nv14));
break;
- case CHIP_NAVI12:
+ case IP_VERSION(5, 0, 5):
if (amdgpu_sriov_vf(adev))
soc15_program_register_sequence(adev,
golden_settings_sdma_5_sriov,
@@ -176,6 +265,11 @@ static void sdma_v5_0_init_golden_registers(struct amdgpu_device *adev)
golden_settings_sdma_nv12,
(const u32)ARRAY_SIZE(golden_settings_sdma_nv12));
break;
+ case IP_VERSION(5, 0, 1):
+ soc15_program_register_sequence(adev,
+ golden_settings_sdma_cyan_skillfish,
+ (const u32)ARRAY_SIZE(golden_settings_sdma_cyan_skillfish));
+ break;
default:
break;
}
@@ -195,100 +289,34 @@ static void sdma_v5_0_init_golden_registers(struct amdgpu_device *adev)
// navi10 real chip need to use PSP to load firmware
static int sdma_v5_0_init_microcode(struct amdgpu_device *adev)
{
- const char *chip_name;
- char fw_name[30];
- int err = 0, i;
- struct amdgpu_firmware_info *info = NULL;
- const struct common_firmware_header *header = NULL;
- const struct sdma_firmware_header_v1_0 *hdr;
-
- if (amdgpu_sriov_vf(adev) && (adev->asic_type == CHIP_NAVI12))
- return 0;
-
- DRM_DEBUG("\n");
-
- switch (adev->asic_type) {
- case CHIP_NAVI10:
- chip_name = "navi10";
- break;
- case CHIP_NAVI14:
- chip_name = "navi14";
- break;
- case CHIP_NAVI12:
- chip_name = "navi12";
- break;
- default:
- BUG();
- }
+ int ret, i;
for (i = 0; i < adev->sdma.num_instances; i++) {
- if (i == 0)
- snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_sdma.bin", chip_name);
- else
- snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_sdma1.bin", chip_name);
- err = request_firmware(&adev->sdma.instance[i].fw, fw_name, adev->dev);
- if (err)
- goto out;
- err = amdgpu_ucode_validate(adev->sdma.instance[i].fw);
- if (err)
- goto out;
- hdr = (const struct sdma_firmware_header_v1_0 *)adev->sdma.instance[i].fw->data;
- adev->sdma.instance[i].fw_version = le32_to_cpu(hdr->header.ucode_version);
- adev->sdma.instance[i].feature_version = le32_to_cpu(hdr->ucode_feature_version);
- if (adev->sdma.instance[i].feature_version >= 20)
- adev->sdma.instance[i].burst_nop = true;
- DRM_DEBUG("psp_load == '%s'\n",
- adev->firmware.load_type == AMDGPU_FW_LOAD_PSP ? "true" : "false");
-
- if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
- info = &adev->firmware.ucode[AMDGPU_UCODE_ID_SDMA0 + i];
- info->ucode_id = AMDGPU_UCODE_ID_SDMA0 + i;
- info->fw = adev->sdma.instance[i].fw;
- header = (const struct common_firmware_header *)info->fw->data;
- adev->firmware.fw_size +=
- ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
- }
- }
-out:
- if (err) {
- DRM_ERROR("sdma_v5_0: Failed to load firmware \"%s\"\n", fw_name);
- for (i = 0; i < adev->sdma.num_instances; i++) {
- release_firmware(adev->sdma.instance[i].fw);
- adev->sdma.instance[i].fw = NULL;
- }
+ ret = amdgpu_sdma_init_microcode(adev, i, false);
+ if (ret)
+ return ret;
}
- return err;
+
+ return ret;
}
-static unsigned sdma_v5_0_ring_init_cond_exec(struct amdgpu_ring *ring)
+static unsigned sdma_v5_0_ring_init_cond_exec(struct amdgpu_ring *ring,
+ uint64_t addr)
{
unsigned ret;
amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_COND_EXE));
- amdgpu_ring_write(ring, lower_32_bits(ring->cond_exe_gpu_addr));
- amdgpu_ring_write(ring, upper_32_bits(ring->cond_exe_gpu_addr));
+ amdgpu_ring_write(ring, lower_32_bits(addr));
+ amdgpu_ring_write(ring, upper_32_bits(addr));
amdgpu_ring_write(ring, 1);
- ret = ring->wptr & ring->buf_mask;/* this is the offset we need patch later */
- amdgpu_ring_write(ring, 0x55aa55aa);/* insert dummy here and patch it later */
+ /* this is the offset we need patch later */
+ ret = ring->wptr & ring->buf_mask;
+ /* insert dummy here and patch it later */
+ amdgpu_ring_write(ring, 0);
return ret;
}
-static void sdma_v5_0_ring_patch_cond_exec(struct amdgpu_ring *ring,
- unsigned offset)
-{
- unsigned cur;
-
- BUG_ON(offset > ring->buf_mask);
- BUG_ON(ring->ring[offset] != 0x55aa55aa);
-
- cur = (ring->wptr - 1) & ring->buf_mask;
- if (cur > offset)
- ring->ring[offset] = cur - offset;
- else
- ring->ring[offset] = (ring->buf_mask + 1) - offset + cur;
-}
-
/**
* sdma_v5_0_ring_get_rptr - get the current read pointer
*
@@ -301,7 +329,7 @@ static uint64_t sdma_v5_0_ring_get_rptr(struct amdgpu_ring *ring)
u64 *rptr;
/* XXX check if swapping is necessary on BE */
- rptr = ((u64 *)&ring->adev->wb.wb[ring->rptr_offs]);
+ rptr = (u64 *)ring->rptr_cpu_addr;
DRM_DEBUG("rptr before shift == 0x%016llx\n", *rptr);
return ((*rptr) >> 2);
@@ -321,12 +349,12 @@ static uint64_t sdma_v5_0_ring_get_wptr(struct amdgpu_ring *ring)
if (ring->use_doorbell) {
/* XXX check if swapping is necessary on BE */
- wptr = READ_ONCE(*((u64 *)&adev->wb.wb[ring->wptr_offs]));
+ wptr = READ_ONCE(*((u64 *)ring->wptr_cpu_addr));
DRM_DEBUG("wptr/doorbell before shift == 0x%016llx\n", wptr);
} else {
- wptr = RREG32(sdma_v5_0_get_reg_offset(adev, ring->me, mmSDMA0_GFX_RB_WPTR_HI));
+ wptr = RREG32_SOC15_IP(GC, sdma_v5_0_get_reg_offset(adev, ring->me, mmSDMA0_GFX_RB_WPTR_HI));
wptr = wptr << 32;
- wptr |= RREG32(sdma_v5_0_get_reg_offset(adev, ring->me, mmSDMA0_GFX_RB_WPTR));
+ wptr |= RREG32_SOC15_IP(GC, sdma_v5_0_get_reg_offset(adev, ring->me, mmSDMA0_GFX_RB_WPTR));
DRM_DEBUG("wptr before shift [%i] wptr == 0x%016llx\n", ring->me, wptr);
}
@@ -347,30 +375,32 @@ static void sdma_v5_0_ring_set_wptr(struct amdgpu_ring *ring)
DRM_DEBUG("Setting write pointer\n");
if (ring->use_doorbell) {
DRM_DEBUG("Using doorbell -- "
- "wptr_offs == 0x%08x "
- "lower_32_bits(ring->wptr) << 2 == 0x%08x "
- "upper_32_bits(ring->wptr) << 2 == 0x%08x\n",
- ring->wptr_offs,
- lower_32_bits(ring->wptr << 2),
- upper_32_bits(ring->wptr << 2));
+ "wptr_offs == 0x%08x "
+ "lower_32_bits(ring->wptr) << 2 == 0x%08x "
+ "upper_32_bits(ring->wptr) << 2 == 0x%08x\n",
+ ring->wptr_offs,
+ lower_32_bits(ring->wptr << 2),
+ upper_32_bits(ring->wptr << 2));
/* XXX check if swapping is necessary on BE */
- adev->wb.wb[ring->wptr_offs] = lower_32_bits(ring->wptr << 2);
- adev->wb.wb[ring->wptr_offs + 1] = upper_32_bits(ring->wptr << 2);
+ atomic64_set((atomic64_t *)ring->wptr_cpu_addr,
+ ring->wptr << 2);
DRM_DEBUG("calling WDOORBELL64(0x%08x, 0x%016llx)\n",
- ring->doorbell_index, ring->wptr << 2);
+ ring->doorbell_index, ring->wptr << 2);
WDOORBELL64(ring->doorbell_index, ring->wptr << 2);
} else {
DRM_DEBUG("Not using doorbell -- "
- "mmSDMA%i_GFX_RB_WPTR == 0x%08x "
- "mmSDMA%i_GFX_RB_WPTR_HI == 0x%08x\n",
- ring->me,
- lower_32_bits(ring->wptr << 2),
- ring->me,
+ "mmSDMA%i_GFX_RB_WPTR == 0x%08x "
+ "mmSDMA%i_GFX_RB_WPTR_HI == 0x%08x\n",
+ ring->me,
+ lower_32_bits(ring->wptr << 2),
+ ring->me,
+ upper_32_bits(ring->wptr << 2));
+ WREG32_SOC15_IP(GC, sdma_v5_0_get_reg_offset(adev,
+ ring->me, mmSDMA0_GFX_RB_WPTR),
+ lower_32_bits(ring->wptr << 2));
+ WREG32_SOC15_IP(GC, sdma_v5_0_get_reg_offset(adev,
+ ring->me, mmSDMA0_GFX_RB_WPTR_HI),
upper_32_bits(ring->wptr << 2));
- WREG32(sdma_v5_0_get_reg_offset(adev, ring->me, mmSDMA0_GFX_RB_WPTR),
- lower_32_bits(ring->wptr << 2));
- WREG32(sdma_v5_0_get_reg_offset(adev, ring->me, mmSDMA0_GFX_RB_WPTR_HI),
- upper_32_bits(ring->wptr << 2));
}
}
@@ -405,18 +435,6 @@ static void sdma_v5_0_ring_emit_ib(struct amdgpu_ring *ring,
unsigned vmid = AMDGPU_JOB_GET_VMID(job);
uint64_t csa_mc_addr = amdgpu_sdma_get_csa_mc_addr(ring, vmid);
- /* Invalidate L2, because if we don't do it, we might get stale cache
- * lines from previous IBs.
- */
- amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_GCR_REQ));
- amdgpu_ring_write(ring, 0);
- amdgpu_ring_write(ring, (SDMA_GCR_GL2_INV |
- SDMA_GCR_GL2_WB |
- SDMA_GCR_GLM_INV |
- SDMA_GCR_GLM_WB) << 16);
- amdgpu_ring_write(ring, 0xffffff80);
- amdgpu_ring_write(ring, 0xffff);
-
/* An IB packet must end on a 8 DW boundary--the next dword
* must be on a 8-dword boundary. Our IB packet below is 6
* dwords long, thus add x number of NOPs, such that, in
@@ -438,6 +456,30 @@ static void sdma_v5_0_ring_emit_ib(struct amdgpu_ring *ring,
}
/**
+ * sdma_v5_0_ring_emit_mem_sync - flush the IB by graphics cache rinse
+ *
+ * @ring: amdgpu ring pointer
+ *
+ * flush the IB by graphics cache rinse.
+ */
+static void sdma_v5_0_ring_emit_mem_sync(struct amdgpu_ring *ring)
+{
+ uint32_t gcr_cntl = SDMA_GCR_GL2_INV | SDMA_GCR_GL2_WB | SDMA_GCR_GLM_INV |
+ SDMA_GCR_GL1_INV | SDMA_GCR_GLV_INV | SDMA_GCR_GLK_INV |
+ SDMA_GCR_GLI_INV(1);
+
+ /* flush entire cache L0/L1/L2, this can be optimized by performance requirement */
+ amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_GCR_REQ));
+ amdgpu_ring_write(ring, SDMA_PKT_GCR_REQ_PAYLOAD1_BASE_VA_31_7(0));
+ amdgpu_ring_write(ring, SDMA_PKT_GCR_REQ_PAYLOAD2_GCR_CONTROL_15_0(gcr_cntl) |
+ SDMA_PKT_GCR_REQ_PAYLOAD2_BASE_VA_47_32(0));
+ amdgpu_ring_write(ring, SDMA_PKT_GCR_REQ_PAYLOAD3_LIMIT_VA_31_7(0) |
+ SDMA_PKT_GCR_REQ_PAYLOAD3_GCR_CONTROL_18_16(gcr_cntl >> 16));
+ amdgpu_ring_write(ring, SDMA_PKT_GCR_REQ_PAYLOAD4_LIMIT_VA_47_32(0) |
+ SDMA_PKT_GCR_REQ_PAYLOAD4_VMID(0));
+}
+
+/**
* sdma_v5_0_ring_emit_hdp_flush - emit an hdp flush on the DMA ring
*
* @ring: amdgpu ring pointer
@@ -515,27 +557,21 @@ static void sdma_v5_0_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, u64 se
* sdma_v5_0_gfx_stop - stop the gfx async dma engines
*
* @adev: amdgpu_device pointer
- *
+ * @inst_mask: mask of dma engine instances to be disabled
* Stop the gfx async dma ring buffers (NAVI10).
*/
-static void sdma_v5_0_gfx_stop(struct amdgpu_device *adev)
+static void sdma_v5_0_gfx_stop(struct amdgpu_device *adev, uint32_t inst_mask)
{
- struct amdgpu_ring *sdma0 = &adev->sdma.instance[0].ring;
- struct amdgpu_ring *sdma1 = &adev->sdma.instance[1].ring;
u32 rb_cntl, ib_cntl;
int i;
- if ((adev->mman.buffer_funcs_ring == sdma0) ||
- (adev->mman.buffer_funcs_ring == sdma1))
- amdgpu_ttm_set_buffer_funcs_status(adev, false);
-
- for (i = 0; i < adev->sdma.num_instances; i++) {
- rb_cntl = RREG32(sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_CNTL));
+ for_each_inst(i, inst_mask) {
+ rb_cntl = RREG32_SOC15_IP(GC, sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_CNTL));
rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_GFX_RB_CNTL, RB_ENABLE, 0);
- WREG32(sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_CNTL), rb_cntl);
- ib_cntl = RREG32(sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_GFX_IB_CNTL));
+ WREG32_SOC15_IP(GC, sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_CNTL), rb_cntl);
+ ib_cntl = RREG32_SOC15_IP(GC, sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_GFX_IB_CNTL));
ib_cntl = REG_SET_FIELD(ib_cntl, SDMA0_GFX_IB_CNTL, IB_ENABLE, 0);
- WREG32(sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_GFX_IB_CNTL), ib_cntl);
+ WREG32_SOC15_IP(GC, sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_GFX_IB_CNTL), ib_cntl);
}
}
@@ -552,7 +588,7 @@ static void sdma_v5_0_rlc_stop(struct amdgpu_device *adev)
}
/**
- * sdma_v_0_ctx_switch_enable - stop the async dma engines context switch
+ * sdma_v5_0_ctx_switch_enable - stop the async dma engines context switch
*
* @adev: amdgpu_device pointer
* @enable: enable/disable the DMA MEs context switch.
@@ -596,11 +632,11 @@ static void sdma_v5_0_ctx_switch_enable(struct amdgpu_device *adev, bool enable)
}
if (enable && amdgpu_sdma_phase_quantum) {
- WREG32(sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_PHASE0_QUANTUM),
+ WREG32_SOC15_IP(GC, sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_PHASE0_QUANTUM),
phase_quantum);
- WREG32(sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_PHASE1_QUANTUM),
+ WREG32_SOC15_IP(GC, sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_PHASE1_QUANTUM),
phase_quantum);
- WREG32(sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_PHASE2_QUANTUM),
+ WREG32_SOC15_IP(GC, sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_PHASE2_QUANTUM),
phase_quantum);
}
if (!amdgpu_sriov_vf(adev))
@@ -621,9 +657,11 @@ static void sdma_v5_0_enable(struct amdgpu_device *adev, bool enable)
{
u32 f32_cntl;
int i;
+ uint32_t inst_mask;
+ inst_mask = GENMASK(adev->sdma.num_instances - 1, 0);
if (!enable) {
- sdma_v5_0_gfx_stop(adev);
+ sdma_v5_0_gfx_stop(adev, 1 << inst_mask);
sdma_v5_0_rlc_stop(adev);
}
@@ -638,162 +676,184 @@ static void sdma_v5_0_enable(struct amdgpu_device *adev, bool enable)
}
/**
- * sdma_v5_0_gfx_resume - setup and start the async dma engines
+ * sdma_v5_0_gfx_resume_instance - start/restart a certain sdma engine
*
* @adev: amdgpu_device pointer
+ * @i: instance
+ * @restore: used to restore wptr when restart
*
- * Set up the gfx DMA ring buffers and enable them (NAVI10).
- * Returns 0 for success, error for failure.
+ * Set up the gfx DMA ring buffers and enable them. On restart, we will restore wptr and rptr.
+ * Return 0 for success.
*/
-static int sdma_v5_0_gfx_resume(struct amdgpu_device *adev)
+static int sdma_v5_0_gfx_resume_instance(struct amdgpu_device *adev, int i, bool restore)
{
struct amdgpu_ring *ring;
u32 rb_cntl, ib_cntl;
u32 rb_bufsz;
- u32 wb_offset;
u32 doorbell;
u32 doorbell_offset;
u32 temp;
u32 wptr_poll_cntl;
u64 wptr_gpu_addr;
- int i, r;
- for (i = 0; i < adev->sdma.num_instances; i++) {
- ring = &adev->sdma.instance[i].ring;
- wb_offset = (ring->rptr_offs * 4);
+ ring = &adev->sdma.instance[i].ring;
- if (!amdgpu_sriov_vf(adev))
- WREG32(sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_SEM_WAIT_FAIL_TIMER_CNTL), 0);
+ if (!amdgpu_sriov_vf(adev))
+ WREG32(sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_SEM_WAIT_FAIL_TIMER_CNTL), 0);
- /* Set ring buffer size in dwords */
- rb_bufsz = order_base_2(ring->ring_size / 4);
- rb_cntl = RREG32(sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_CNTL));
- rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_GFX_RB_CNTL, RB_SIZE, rb_bufsz);
+ /* Set ring buffer size in dwords */
+ rb_bufsz = order_base_2(ring->ring_size / 4);
+ rb_cntl = RREG32_SOC15_IP(GC, sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_CNTL));
+ rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_GFX_RB_CNTL, RB_SIZE, rb_bufsz);
#ifdef __BIG_ENDIAN
- rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_GFX_RB_CNTL, RB_SWAP_ENABLE, 1);
- rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_GFX_RB_CNTL,
- RPTR_WRITEBACK_SWAP_ENABLE, 1);
+ rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_GFX_RB_CNTL, RB_SWAP_ENABLE, 1);
+ rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_GFX_RB_CNTL,
+ RPTR_WRITEBACK_SWAP_ENABLE, 1);
#endif
- WREG32(sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_CNTL), rb_cntl);
-
- /* Initialize the ring buffer's read and write pointers */
- WREG32(sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_RPTR), 0);
- WREG32(sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_RPTR_HI), 0);
- WREG32(sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_WPTR), 0);
- WREG32(sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_WPTR_HI), 0);
-
- /* setup the wptr shadow polling */
- wptr_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
- WREG32(sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_WPTR_POLL_ADDR_LO),
- lower_32_bits(wptr_gpu_addr));
- WREG32(sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_WPTR_POLL_ADDR_HI),
- upper_32_bits(wptr_gpu_addr));
- wptr_poll_cntl = RREG32(sdma_v5_0_get_reg_offset(adev, i,
- mmSDMA0_GFX_RB_WPTR_POLL_CNTL));
- wptr_poll_cntl = REG_SET_FIELD(wptr_poll_cntl,
- SDMA0_GFX_RB_WPTR_POLL_CNTL,
- F32_POLL_ENABLE, 1);
- WREG32(sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_WPTR_POLL_CNTL),
- wptr_poll_cntl);
-
- /* set the wb address whether it's enabled or not */
- WREG32(sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_RPTR_ADDR_HI),
- upper_32_bits(adev->wb.gpu_addr + wb_offset) & 0xFFFFFFFF);
- WREG32(sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_RPTR_ADDR_LO),
- lower_32_bits(adev->wb.gpu_addr + wb_offset) & 0xFFFFFFFC);
-
- rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_GFX_RB_CNTL, RPTR_WRITEBACK_ENABLE, 1);
-
- WREG32(sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_BASE), ring->gpu_addr >> 8);
- WREG32(sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_BASE_HI), ring->gpu_addr >> 40);
-
+ WREG32_SOC15_IP(GC, sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_CNTL), rb_cntl);
+
+ /* Initialize the ring buffer's read and write pointers */
+ if (restore) {
+ WREG32_SOC15_IP(GC, sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_RPTR), lower_32_bits(ring->wptr << 2));
+ WREG32_SOC15_IP(GC, sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_RPTR_HI), upper_32_bits(ring->wptr << 2));
+ WREG32_SOC15_IP(GC, sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_WPTR), lower_32_bits(ring->wptr << 2));
+ WREG32_SOC15_IP(GC, sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_WPTR_HI), upper_32_bits(ring->wptr << 2));
+ } else {
+ WREG32_SOC15_IP(GC, sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_RPTR), 0);
+ WREG32_SOC15_IP(GC, sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_RPTR_HI), 0);
+ WREG32_SOC15_IP(GC, sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_WPTR), 0);
+ WREG32_SOC15_IP(GC, sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_WPTR_HI), 0);
+ }
+ /* setup the wptr shadow polling */
+ wptr_gpu_addr = ring->wptr_gpu_addr;
+ WREG32_SOC15_IP(GC, sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_WPTR_POLL_ADDR_LO),
+ lower_32_bits(wptr_gpu_addr));
+ WREG32_SOC15_IP(GC, sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_WPTR_POLL_ADDR_HI),
+ upper_32_bits(wptr_gpu_addr));
+ wptr_poll_cntl = RREG32_SOC15_IP(GC, sdma_v5_0_get_reg_offset(adev, i,
+ mmSDMA0_GFX_RB_WPTR_POLL_CNTL));
+ wptr_poll_cntl = REG_SET_FIELD(wptr_poll_cntl,
+ SDMA0_GFX_RB_WPTR_POLL_CNTL,
+ F32_POLL_ENABLE, 1);
+ WREG32_SOC15_IP(GC, sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_WPTR_POLL_CNTL),
+ wptr_poll_cntl);
+
+ /* set the wb address whether it's enabled or not */
+ WREG32_SOC15_IP(GC, sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_RPTR_ADDR_HI),
+ upper_32_bits(ring->rptr_gpu_addr) & 0xFFFFFFFF);
+ WREG32_SOC15_IP(GC, sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_RPTR_ADDR_LO),
+ lower_32_bits(ring->rptr_gpu_addr) & 0xFFFFFFFC);
+
+ rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_GFX_RB_CNTL, RPTR_WRITEBACK_ENABLE, 1);
+
+ WREG32_SOC15_IP(GC, sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_BASE),
+ ring->gpu_addr >> 8);
+ WREG32_SOC15_IP(GC, sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_BASE_HI),
+ ring->gpu_addr >> 40);
+
+ if (!restore)
ring->wptr = 0;
- /* before programing wptr to a less value, need set minor_ptr_update first */
- WREG32(sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_GFX_MINOR_PTR_UPDATE), 1);
+ /* before programing wptr to a less value, need set minor_ptr_update first */
+ WREG32_SOC15_IP(GC, sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_GFX_MINOR_PTR_UPDATE), 1);
- if (!amdgpu_sriov_vf(adev)) { /* only bare-metal use register write for wptr */
- WREG32(sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_WPTR), lower_32_bits(ring->wptr) << 2);
- WREG32(sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_WPTR_HI), upper_32_bits(ring->wptr) << 2);
- }
+ if (!amdgpu_sriov_vf(adev)) { /* only bare-metal use register write for wptr */
+ WREG32(sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_WPTR),
+ lower_32_bits(ring->wptr << 2));
+ WREG32(sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_WPTR_HI),
+ upper_32_bits(ring->wptr << 2));
+ }
- doorbell = RREG32(sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_GFX_DOORBELL));
- doorbell_offset = RREG32(sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_GFX_DOORBELL_OFFSET));
+ doorbell = RREG32_SOC15_IP(GC, sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_GFX_DOORBELL));
+ doorbell_offset = RREG32_SOC15_IP(GC, sdma_v5_0_get_reg_offset(adev, i,
+ mmSDMA0_GFX_DOORBELL_OFFSET));
- if (ring->use_doorbell) {
- doorbell = REG_SET_FIELD(doorbell, SDMA0_GFX_DOORBELL, ENABLE, 1);
- doorbell_offset = REG_SET_FIELD(doorbell_offset, SDMA0_GFX_DOORBELL_OFFSET,
- OFFSET, ring->doorbell_index);
- } else {
- doorbell = REG_SET_FIELD(doorbell, SDMA0_GFX_DOORBELL, ENABLE, 0);
- }
- WREG32(sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_GFX_DOORBELL), doorbell);
- WREG32(sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_GFX_DOORBELL_OFFSET), doorbell_offset);
+ if (ring->use_doorbell) {
+ doorbell = REG_SET_FIELD(doorbell, SDMA0_GFX_DOORBELL, ENABLE, 1);
+ doorbell_offset = REG_SET_FIELD(doorbell_offset, SDMA0_GFX_DOORBELL_OFFSET,
+ OFFSET, ring->doorbell_index);
+ } else {
+ doorbell = REG_SET_FIELD(doorbell, SDMA0_GFX_DOORBELL, ENABLE, 0);
+ }
+ WREG32_SOC15_IP(GC, sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_GFX_DOORBELL), doorbell);
+ WREG32_SOC15_IP(GC, sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_GFX_DOORBELL_OFFSET),
+ doorbell_offset);
- adev->nbio.funcs->sdma_doorbell_range(adev, i, ring->use_doorbell,
- ring->doorbell_index, 20);
+ adev->nbio.funcs->sdma_doorbell_range(adev, i, ring->use_doorbell,
+ ring->doorbell_index, 20);
- if (amdgpu_sriov_vf(adev))
- sdma_v5_0_ring_set_wptr(ring);
+ if (amdgpu_sriov_vf(adev))
+ sdma_v5_0_ring_set_wptr(ring);
- /* set minor_ptr_update to 0 after wptr programed */
- WREG32(sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_GFX_MINOR_PTR_UPDATE), 0);
+ /* set minor_ptr_update to 0 after wptr programed */
+ WREG32_SOC15_IP(GC, sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_GFX_MINOR_PTR_UPDATE), 0);
- if (!amdgpu_sriov_vf(adev)) {
- /* set utc l1 enable flag always to 1 */
- temp = RREG32(sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_CNTL));
- temp = REG_SET_FIELD(temp, SDMA0_CNTL, UTC_L1_ENABLE, 1);
-
- /* enable MCBP */
- temp = REG_SET_FIELD(temp, SDMA0_CNTL, MIDCMD_PREEMPT_ENABLE, 1);
- WREG32(sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_CNTL), temp);
-
- /* Set up RESP_MODE to non-copy addresses */
- temp = RREG32(sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_UTCL1_CNTL));
- temp = REG_SET_FIELD(temp, SDMA0_UTCL1_CNTL, RESP_MODE, 3);
- temp = REG_SET_FIELD(temp, SDMA0_UTCL1_CNTL, REDO_DELAY, 9);
- WREG32(sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_UTCL1_CNTL), temp);
-
- /* program default cache read and write policy */
- temp = RREG32(sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_UTCL1_PAGE));
- /* clean read policy and write policy bits */
- temp &= 0xFF0FFF;
- temp |= ((CACHE_READ_POLICY_L2__DEFAULT << 12) | (CACHE_WRITE_POLICY_L2__DEFAULT << 14));
- WREG32(sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_UTCL1_PAGE), temp);
- }
+ if (!amdgpu_sriov_vf(adev)) {
+ /* set utc l1 enable flag always to 1 */
+ temp = RREG32(sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_CNTL));
+ temp = REG_SET_FIELD(temp, SDMA0_CNTL, UTC_L1_ENABLE, 1);
+
+ /* enable MCBP */
+ temp = REG_SET_FIELD(temp, SDMA0_CNTL, MIDCMD_PREEMPT_ENABLE, 1);
+ WREG32(sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_CNTL), temp);
+
+ /* Set up RESP_MODE to non-copy addresses */
+ temp = RREG32(sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_UTCL1_CNTL));
+ temp = REG_SET_FIELD(temp, SDMA0_UTCL1_CNTL, RESP_MODE, 3);
+ temp = REG_SET_FIELD(temp, SDMA0_UTCL1_CNTL, REDO_DELAY, 9);
+ WREG32(sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_UTCL1_CNTL), temp);
+
+ /* program default cache read and write policy */
+ temp = RREG32(sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_UTCL1_PAGE));
+ /* clean read policy and write policy bits */
+ temp &= 0xFF0FFF;
+ temp |= ((CACHE_READ_POLICY_L2__DEFAULT << 12) | (CACHE_WRITE_POLICY_L2__DEFAULT << 14));
+ WREG32(sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_UTCL1_PAGE), temp);
+ }
- if (!amdgpu_sriov_vf(adev)) {
- /* unhalt engine */
- temp = RREG32(sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_F32_CNTL));
- temp = REG_SET_FIELD(temp, SDMA0_F32_CNTL, HALT, 0);
- WREG32(sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_F32_CNTL), temp);
- }
+ if (!amdgpu_sriov_vf(adev)) {
+ /* unhalt engine */
+ temp = RREG32(sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_F32_CNTL));
+ temp = REG_SET_FIELD(temp, SDMA0_F32_CNTL, HALT, 0);
+ WREG32(sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_F32_CNTL), temp);
+ }
- /* enable DMA RB */
- rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_GFX_RB_CNTL, RB_ENABLE, 1);
- WREG32(sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_CNTL), rb_cntl);
+ /* enable DMA RB */
+ rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_GFX_RB_CNTL, RB_ENABLE, 1);
+ WREG32_SOC15_IP(GC, sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_CNTL), rb_cntl);
- ib_cntl = RREG32(sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_GFX_IB_CNTL));
- ib_cntl = REG_SET_FIELD(ib_cntl, SDMA0_GFX_IB_CNTL, IB_ENABLE, 1);
+ ib_cntl = RREG32_SOC15_IP(GC, sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_GFX_IB_CNTL));
+ ib_cntl = REG_SET_FIELD(ib_cntl, SDMA0_GFX_IB_CNTL, IB_ENABLE, 1);
#ifdef __BIG_ENDIAN
- ib_cntl = REG_SET_FIELD(ib_cntl, SDMA0_GFX_IB_CNTL, IB_SWAP_ENABLE, 1);
+ ib_cntl = REG_SET_FIELD(ib_cntl, SDMA0_GFX_IB_CNTL, IB_SWAP_ENABLE, 1);
#endif
- /* enable DMA IBs */
- WREG32(sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_GFX_IB_CNTL), ib_cntl);
+ /* enable DMA IBs */
+ WREG32_SOC15_IP(GC, sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_GFX_IB_CNTL), ib_cntl);
- ring->sched.ready = true;
+ if (amdgpu_sriov_vf(adev)) { /* bare-metal sequence doesn't need below to lines */
+ sdma_v5_0_ctx_switch_enable(adev, true);
+ sdma_v5_0_enable(adev, true);
+ }
- if (amdgpu_sriov_vf(adev)) { /* bare-metal sequence doesn't need below to lines */
- sdma_v5_0_ctx_switch_enable(adev, true);
- sdma_v5_0_enable(adev, true);
- }
+ return amdgpu_ring_test_helper(ring);
+}
- r = amdgpu_ring_test_helper(ring);
+/**
+ * sdma_v5_0_gfx_resume - setup and start the async dma engines
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * Set up the gfx DMA ring buffers and enable them (NAVI10).
+ * Returns 0 for success, error for failure.
+ */
+static int sdma_v5_0_gfx_resume(struct amdgpu_device *adev)
+{
+ int i, r;
+
+ for (i = 0; i < adev->sdma.num_instances; i++) {
+ r = sdma_v5_0_gfx_resume_instance(adev, i, false);
if (r)
return r;
-
- if (adev->mman.buffer_funcs_ring == ring)
- amdgpu_ttm_set_buffer_funcs_status(adev, true);
}
return 0;
@@ -897,6 +957,49 @@ static int sdma_v5_0_start(struct amdgpu_device *adev)
return r;
}
+static int sdma_v5_0_mqd_init(struct amdgpu_device *adev, void *mqd,
+ struct amdgpu_mqd_prop *prop)
+{
+ struct v10_sdma_mqd *m = mqd;
+ uint64_t wb_gpu_addr;
+
+ m->sdmax_rlcx_rb_cntl =
+ order_base_2(prop->queue_size / 4) << SDMA0_RLC0_RB_CNTL__RB_SIZE__SHIFT |
+ 1 << SDMA0_RLC0_RB_CNTL__RPTR_WRITEBACK_ENABLE__SHIFT |
+ 6 << SDMA0_RLC0_RB_CNTL__RPTR_WRITEBACK_TIMER__SHIFT |
+ 1 << SDMA0_RLC0_RB_CNTL__RB_PRIV__SHIFT;
+
+ m->sdmax_rlcx_rb_base = lower_32_bits(prop->hqd_base_gpu_addr >> 8);
+ m->sdmax_rlcx_rb_base_hi = upper_32_bits(prop->hqd_base_gpu_addr >> 8);
+
+ m->sdmax_rlcx_rb_wptr_poll_cntl = RREG32(sdma_v5_0_get_reg_offset(adev, 0,
+ mmSDMA0_GFX_RB_WPTR_POLL_CNTL));
+
+ wb_gpu_addr = prop->wptr_gpu_addr;
+ m->sdmax_rlcx_rb_wptr_poll_addr_lo = lower_32_bits(wb_gpu_addr);
+ m->sdmax_rlcx_rb_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr);
+
+ wb_gpu_addr = prop->rptr_gpu_addr;
+ m->sdmax_rlcx_rb_rptr_addr_lo = lower_32_bits(wb_gpu_addr);
+ m->sdmax_rlcx_rb_rptr_addr_hi = upper_32_bits(wb_gpu_addr);
+
+ m->sdmax_rlcx_ib_cntl = RREG32(sdma_v5_0_get_reg_offset(adev, 0,
+ mmSDMA0_GFX_IB_CNTL));
+
+ m->sdmax_rlcx_doorbell_offset =
+ prop->doorbell_index << SDMA0_RLC0_DOORBELL_OFFSET__OFFSET__SHIFT;
+
+ m->sdmax_rlcx_doorbell = REG_SET_FIELD(0, SDMA0_RLC0_DOORBELL, ENABLE, 1);
+
+ return 0;
+}
+
+static void sdma_v5_0_set_mqd_funcs(struct amdgpu_device *adev)
+{
+ adev->mqds[AMDGPU_HW_IP_DMA].mqd_size = sizeof(struct v10_sdma_mqd);
+ adev->mqds[AMDGPU_HW_IP_DMA].init_mqd = sdma_v5_0_mqd_init;
+}
+
/**
* sdma_v5_0_ring_test_ring - simple async dma engine test
*
@@ -915,6 +1018,8 @@ static int sdma_v5_0_ring_test_ring(struct amdgpu_ring *ring)
u32 tmp;
u64 gpu_addr;
+ tmp = 0xCAFEDEAD;
+
r = amdgpu_device_wb_get(adev, &index);
if (r) {
dev_err(adev->dev, "(%d) failed to allocate wb slot\n", r);
@@ -922,10 +1027,9 @@ static int sdma_v5_0_ring_test_ring(struct amdgpu_ring *ring)
}
gpu_addr = adev->wb.gpu_addr + (index * 4);
- tmp = 0xCAFEDEAD;
adev->wb.wb[index] = cpu_to_le32(tmp);
- r = amdgpu_ring_alloc(ring, 5);
+ r = amdgpu_ring_alloc(ring, 20);
if (r) {
DRM_ERROR("amdgpu: dma failed to lock ring %d (%d).\n", ring->idx, r);
amdgpu_device_wb_free(adev, index);
@@ -977,6 +1081,9 @@ static int sdma_v5_0_ring_test_ib(struct amdgpu_ring *ring, long timeout)
u32 tmp = 0;
u64 gpu_addr;
+ tmp = 0xCAFEDEAD;
+ memset(&ib, 0, sizeof(ib));
+
r = amdgpu_device_wb_get(adev, &index);
if (r) {
dev_err(adev->dev, "(%ld) failed to allocate wb slot\n", r);
@@ -984,11 +1091,10 @@ static int sdma_v5_0_ring_test_ib(struct amdgpu_ring *ring, long timeout)
}
gpu_addr = adev->wb.gpu_addr + (index * 4);
- tmp = 0xCAFEDEAD;
adev->wb.wb[index] = cpu_to_le32(tmp);
- memset(&ib, 0, sizeof(ib));
+
r = amdgpu_ib_get(adev, NULL, 256,
- AMDGPU_IB_POOL_DIRECT, &ib);
+ AMDGPU_IB_POOL_DIRECT, &ib);
if (r) {
DRM_ERROR("amdgpu: failed to get ib (%ld).\n", r);
goto err0;
@@ -1018,14 +1124,16 @@ static int sdma_v5_0_ring_test_ib(struct amdgpu_ring *ring, long timeout)
DRM_ERROR("amdgpu: fence wait failed (%ld).\n", r);
goto err1;
}
+
tmp = le32_to_cpu(adev->wb.wb[index]);
+
if (tmp == 0xDEADBEEF)
r = 0;
else
r = -EINVAL;
err1:
- amdgpu_ib_free(adev, &ib, NULL);
+ amdgpu_ib_free(&ib, NULL);
dma_fence_put(f);
err0:
amdgpu_device_wb_free(adev, index);
@@ -1219,26 +1327,62 @@ static void sdma_v5_0_ring_emit_reg_write_reg_wait(struct amdgpu_ring *ring,
amdgpu_ring_emit_reg_wait(ring, reg1, mask, mask);
}
-static int sdma_v5_0_early_init(void *handle)
+static int sdma_v5_0_soft_reset_engine(struct amdgpu_device *adev, u32 instance_id)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ u32 grbm_soft_reset;
+ u32 tmp;
+
+ grbm_soft_reset = REG_SET_FIELD(0,
+ GRBM_SOFT_RESET, SOFT_RESET_SDMA0,
+ 1);
+ grbm_soft_reset <<= instance_id;
+
+ tmp = RREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET);
+ tmp |= grbm_soft_reset;
+ DRM_DEBUG("GRBM_SOFT_RESET=0x%08X\n", tmp);
+ WREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET, tmp);
+ tmp = RREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET);
+
+ udelay(50);
- adev->sdma.num_instances = 2;
+ tmp &= ~grbm_soft_reset;
+ WREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET, tmp);
+ tmp = RREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET);
+ return 0;
+}
+
+static const struct amdgpu_sdma_funcs sdma_v5_0_sdma_funcs = {
+ .stop_kernel_queue = &sdma_v5_0_stop_queue,
+ .start_kernel_queue = &sdma_v5_0_restore_queue,
+ .soft_reset_kernel_queue = &sdma_v5_0_soft_reset_engine,
+};
+
+static int sdma_v5_0_early_init(struct amdgpu_ip_block *ip_block)
+{
+ struct amdgpu_device *adev = ip_block->adev;
+ int r;
+
+ r = sdma_v5_0_init_microcode(adev);
+ if (r)
+ return r;
sdma_v5_0_set_ring_funcs(adev);
sdma_v5_0_set_buffer_funcs(adev);
sdma_v5_0_set_vm_pte_funcs(adev);
sdma_v5_0_set_irq_funcs(adev);
+ sdma_v5_0_set_mqd_funcs(adev);
return 0;
}
-static int sdma_v5_0_sw_init(void *handle)
+static int sdma_v5_0_sw_init(struct amdgpu_ip_block *ip_block)
{
struct amdgpu_ring *ring;
int r, i;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
+ uint32_t reg_count = ARRAY_SIZE(sdma_reg_list_5_0);
+ uint32_t *ptr;
/* SDMA trap event */
r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_SDMA0,
@@ -1254,13 +1398,9 @@ static int sdma_v5_0_sw_init(void *handle)
if (r)
return r;
- r = sdma_v5_0_init_microcode(adev);
- if (r) {
- DRM_ERROR("Failed to load sdma firmware!\n");
- return r;
- }
-
for (i = 0; i < adev->sdma.num_instances; i++) {
+ mutex_init(&adev->sdma.instance[i].engine_reset_mutex);
+ adev->sdma.instance[i].funcs = &sdma_v5_0_sdma_funcs;
ring = &adev->sdma.instance[i].ring;
ring->ring_obj = NULL;
ring->use_doorbell = true;
@@ -1272,39 +1412,65 @@ static int sdma_v5_0_sw_init(void *handle)
(adev->doorbell_index.sdma_engine[0] << 1) //get DWORD offset
: (adev->doorbell_index.sdma_engine[1] << 1); // get DWORD offset
+ ring->vm_hub = AMDGPU_GFXHUB(0);
sprintf(ring->name, "sdma%d", i);
- r = amdgpu_ring_init(adev, ring, 1024,
- &adev->sdma.trap_irq,
- (i == 0) ?
- AMDGPU_SDMA_IRQ_INSTANCE0 :
+ r = amdgpu_ring_init(adev, ring, 1024, &adev->sdma.trap_irq,
+ (i == 0) ? AMDGPU_SDMA_IRQ_INSTANCE0 :
AMDGPU_SDMA_IRQ_INSTANCE1,
- AMDGPU_RING_PRIO_DEFAULT);
+ AMDGPU_RING_PRIO_DEFAULT, NULL);
if (r)
return r;
}
+ adev->sdma.supported_reset =
+ amdgpu_get_soft_full_reset_mask(&adev->sdma.instance[0].ring);
+ switch (amdgpu_ip_version(adev, SDMA0_HWIP, 0)) {
+ case IP_VERSION(5, 0, 0):
+ case IP_VERSION(5, 0, 2):
+ case IP_VERSION(5, 0, 5):
+ if ((adev->sdma.instance[0].fw_version >= 35) &&
+ !amdgpu_sriov_vf(adev) &&
+ !adev->debug_disable_gpu_ring_reset)
+ adev->sdma.supported_reset |= AMDGPU_RESET_TYPE_PER_QUEUE;
+ break;
+ default:
+ break;
+ }
+
+ /* Allocate memory for SDMA IP Dump buffer */
+ ptr = kcalloc(adev->sdma.num_instances * reg_count, sizeof(uint32_t), GFP_KERNEL);
+ if (ptr)
+ adev->sdma.ip_dump = ptr;
+ else
+ DRM_ERROR("Failed to allocated memory for SDMA IP Dump\n");
+
+ r = amdgpu_sdma_sysfs_reset_mask_init(adev);
+ if (r)
+ return r;
+
return r;
}
-static int sdma_v5_0_sw_fini(void *handle)
+static int sdma_v5_0_sw_fini(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
int i;
- for (i = 0; i < adev->sdma.num_instances; i++) {
- release_firmware(adev->sdma.instance[i].fw);
- adev->sdma.instance[i].fw = NULL;
-
+ for (i = 0; i < adev->sdma.num_instances; i++)
amdgpu_ring_fini(&adev->sdma.instance[i].ring);
- }
+
+ amdgpu_sdma_sysfs_reset_mask_fini(adev);
+ amdgpu_sdma_destroy_inst_ctx(adev, false);
+
+ kfree(adev->sdma.ip_dump);
return 0;
}
-static int sdma_v5_0_hw_init(void *handle)
+static int sdma_v5_0_hw_init(struct amdgpu_ip_block *ip_block)
{
int r;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
sdma_v5_0_init_golden_registers(adev);
@@ -1313,9 +1479,9 @@ static int sdma_v5_0_hw_init(void *handle)
return r;
}
-static int sdma_v5_0_hw_fini(void *handle)
+static int sdma_v5_0_hw_fini(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
if (amdgpu_sriov_vf(adev))
return 0;
@@ -1326,23 +1492,19 @@ static int sdma_v5_0_hw_fini(void *handle)
return 0;
}
-static int sdma_v5_0_suspend(void *handle)
+static int sdma_v5_0_suspend(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
-
- return sdma_v5_0_hw_fini(adev);
+ return sdma_v5_0_hw_fini(ip_block);
}
-static int sdma_v5_0_resume(void *handle)
+static int sdma_v5_0_resume(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
-
- return sdma_v5_0_hw_init(adev);
+ return sdma_v5_0_hw_init(ip_block);
}
-static bool sdma_v5_0_is_idle(void *handle)
+static bool sdma_v5_0_is_idle(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
u32 i;
for (i = 0; i < adev->sdma.num_instances; i++) {
@@ -1355,11 +1517,11 @@ static bool sdma_v5_0_is_idle(void *handle)
return true;
}
-static int sdma_v5_0_wait_for_idle(void *handle)
+static int sdma_v5_0_wait_for_idle(struct amdgpu_ip_block *ip_block)
{
unsigned i;
u32 sdma0, sdma1;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
for (i = 0; i < adev->usec_timeout; i++) {
sdma0 = RREG32(sdma_v5_0_get_reg_offset(adev, 0, mmSDMA0_STATUS_REG));
@@ -1372,13 +1534,104 @@ static int sdma_v5_0_wait_for_idle(void *handle)
return -ETIMEDOUT;
}
-static int sdma_v5_0_soft_reset(void *handle)
+static int sdma_v5_0_soft_reset(struct amdgpu_ip_block *ip_block)
{
/* todo */
return 0;
}
+static int sdma_v5_0_reset_queue(struct amdgpu_ring *ring,
+ unsigned int vmid,
+ struct amdgpu_fence *timedout_fence)
+{
+ struct amdgpu_device *adev = ring->adev;
+ int r;
+
+ if (ring->me >= adev->sdma.num_instances) {
+ dev_err(adev->dev, "sdma instance not found\n");
+ return -EINVAL;
+ }
+
+ amdgpu_ring_reset_helper_begin(ring, timedout_fence);
+
+ amdgpu_amdkfd_suspend(adev, true);
+ r = amdgpu_sdma_reset_engine(adev, ring->me, true);
+ amdgpu_amdkfd_resume(adev, true);
+ if (r)
+ return r;
+
+ return amdgpu_ring_reset_helper_end(ring, timedout_fence);
+}
+
+static int sdma_v5_0_stop_queue(struct amdgpu_ring *ring)
+{
+ u32 f32_cntl, freeze, cntl, stat1_reg;
+ struct amdgpu_device *adev = ring->adev;
+ int i, j, r = 0;
+
+ if (amdgpu_sriov_vf(adev))
+ return -EINVAL;
+
+ i = ring->me;
+ amdgpu_gfx_rlc_enter_safe_mode(adev, 0);
+
+ /* stop queue */
+ sdma_v5_0_gfx_stop(adev, 1 << i);
+
+ /* engine stop SDMA1_F32_CNTL.HALT to 1 and SDMAx_FREEZE freeze bit to 1 */
+ freeze = RREG32(sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_FREEZE));
+ freeze = REG_SET_FIELD(freeze, SDMA0_FREEZE, FREEZE, 1);
+ WREG32(sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_FREEZE), freeze);
+
+ for (j = 0; j < adev->usec_timeout; j++) {
+ freeze = RREG32(sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_FREEZE));
+ if (REG_GET_FIELD(freeze, SDMA0_FREEZE, FROZEN) & 1)
+ break;
+ udelay(1);
+ }
+
+ /* check sdma copy engine all idle if frozen not received*/
+ if (j == adev->usec_timeout) {
+ stat1_reg = RREG32(sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_STATUS1_REG));
+ if ((stat1_reg & 0x3FF) != 0x3FF) {
+ DRM_ERROR("cannot soft reset as sdma not idle\n");
+ r = -ETIMEDOUT;
+ goto err0;
+ }
+ }
+
+ f32_cntl = RREG32(sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_F32_CNTL));
+ f32_cntl = REG_SET_FIELD(f32_cntl, SDMA0_F32_CNTL, HALT, 1);
+ WREG32(sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_F32_CNTL), f32_cntl);
+
+ cntl = RREG32(sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_CNTL));
+ cntl = REG_SET_FIELD(cntl, SDMA0_CNTL, UTC_L1_ENABLE, 0);
+ WREG32(sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_CNTL), cntl);
+err0:
+ amdgpu_gfx_rlc_exit_safe_mode(adev, 0);
+ return r;
+}
+
+static int sdma_v5_0_restore_queue(struct amdgpu_ring *ring)
+{
+ struct amdgpu_device *adev = ring->adev;
+ u32 inst_id = ring->me;
+ u32 freeze;
+ int r;
+
+ amdgpu_gfx_rlc_enter_safe_mode(adev, 0);
+ /* unfreeze*/
+ freeze = RREG32(sdma_v5_0_get_reg_offset(adev, inst_id, mmSDMA0_FREEZE));
+ freeze = REG_SET_FIELD(freeze, SDMA0_FREEZE, FREEZE, 0);
+ WREG32(sdma_v5_0_get_reg_offset(adev, inst_id, mmSDMA0_FREEZE), freeze);
+
+ r = sdma_v5_0_gfx_resume_instance(adev, inst_id, true);
+ amdgpu_gfx_rlc_exit_safe_mode(adev, 0);
+
+ return r;
+}
+
static int sdma_v5_0_ring_preempt_ib(struct amdgpu_ring *ring)
{
int i, r = 0;
@@ -1451,7 +1704,25 @@ static int sdma_v5_0_process_trap_irq(struct amdgpu_device *adev,
struct amdgpu_irq_src *source,
struct amdgpu_iv_entry *entry)
{
+ uint32_t mes_queue_id = entry->src_data[0];
+
DRM_DEBUG("IH: SDMA trap\n");
+
+ if (adev->enable_mes && (mes_queue_id & AMDGPU_FENCE_MES_QUEUE_FLAG)) {
+ struct amdgpu_mes_queue *queue;
+
+ mes_queue_id &= AMDGPU_FENCE_MES_QUEUE_ID_MASK;
+
+ spin_lock(&adev->mes.queue_id_lock);
+ queue = idr_find(&adev->mes.queue_id_idr, mes_queue_id);
+ if (queue) {
+ DRM_DEBUG("process smda queue id = %d\n", mes_queue_id);
+ amdgpu_fence_process(queue->ring);
+ }
+ spin_unlock(&adev->mes.queue_id_lock);
+ return 0;
+ }
+
switch (entry->client_id) {
case SOC15_IH_CLIENTID_SDMA0:
switch (entry->ring_id) {
@@ -1558,18 +1829,18 @@ static void sdma_v5_0_update_medium_grain_light_sleep(struct amdgpu_device *adev
}
}
-static int sdma_v5_0_set_clockgating_state(void *handle,
+static int sdma_v5_0_set_clockgating_state(struct amdgpu_ip_block *ip_block,
enum amd_clockgating_state state)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
if (amdgpu_sriov_vf(adev))
return 0;
- switch (adev->asic_type) {
- case CHIP_NAVI10:
- case CHIP_NAVI14:
- case CHIP_NAVI12:
+ switch (amdgpu_ip_version(adev, SDMA0_HWIP, 0)) {
+ case IP_VERSION(5, 0, 0):
+ case IP_VERSION(5, 0, 2):
+ case IP_VERSION(5, 0, 5):
sdma_v5_0_update_medium_grain_clock_gating(adev,
state == AMD_CG_STATE_GATE);
sdma_v5_0_update_medium_grain_light_sleep(adev,
@@ -1582,15 +1853,15 @@ static int sdma_v5_0_set_clockgating_state(void *handle,
return 0;
}
-static int sdma_v5_0_set_powergating_state(void *handle,
+static int sdma_v5_0_set_powergating_state(struct amdgpu_ip_block *ip_block,
enum amd_powergating_state state)
{
return 0;
}
-static void sdma_v5_0_get_clockgating_state(void *handle, u32 *flags)
+static void sdma_v5_0_get_clockgating_state(struct amdgpu_ip_block *ip_block, u64 *flags)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
int data;
if (amdgpu_sriov_vf(adev))
@@ -1607,10 +1878,51 @@ static void sdma_v5_0_get_clockgating_state(void *handle, u32 *flags)
*flags |= AMD_CG_SUPPORT_SDMA_LS;
}
-const struct amd_ip_funcs sdma_v5_0_ip_funcs = {
+static void sdma_v5_0_print_ip_state(struct amdgpu_ip_block *ip_block, struct drm_printer *p)
+{
+ struct amdgpu_device *adev = ip_block->adev;
+ int i, j;
+ uint32_t reg_count = ARRAY_SIZE(sdma_reg_list_5_0);
+ uint32_t instance_offset;
+
+ if (!adev->sdma.ip_dump)
+ return;
+
+ drm_printf(p, "num_instances:%d\n", adev->sdma.num_instances);
+ for (i = 0; i < adev->sdma.num_instances; i++) {
+ instance_offset = i * reg_count;
+ drm_printf(p, "\nInstance:%d\n", i);
+
+ for (j = 0; j < reg_count; j++)
+ drm_printf(p, "%-50s \t 0x%08x\n", sdma_reg_list_5_0[j].reg_name,
+ adev->sdma.ip_dump[instance_offset + j]);
+ }
+}
+
+static void sdma_v5_0_dump_ip_state(struct amdgpu_ip_block *ip_block)
+{
+ struct amdgpu_device *adev = ip_block->adev;
+ int i, j;
+ uint32_t instance_offset;
+ uint32_t reg_count = ARRAY_SIZE(sdma_reg_list_5_0);
+
+ if (!adev->sdma.ip_dump)
+ return;
+
+ amdgpu_gfx_off_ctrl(adev, false);
+ for (i = 0; i < adev->sdma.num_instances; i++) {
+ instance_offset = i * reg_count;
+ for (j = 0; j < reg_count; j++)
+ adev->sdma.ip_dump[instance_offset + j] =
+ RREG32(sdma_v5_0_get_reg_offset(adev, i,
+ sdma_reg_list_5_0[j].reg_offset));
+ }
+ amdgpu_gfx_off_ctrl(adev, true);
+}
+
+static const struct amd_ip_funcs sdma_v5_0_ip_funcs = {
.name = "sdma_v5_0",
.early_init = sdma_v5_0_early_init,
- .late_init = NULL,
.sw_init = sdma_v5_0_sw_init,
.sw_fini = sdma_v5_0_sw_fini,
.hw_init = sdma_v5_0_hw_init,
@@ -1623,6 +1935,8 @@ const struct amd_ip_funcs sdma_v5_0_ip_funcs = {
.set_clockgating_state = sdma_v5_0_set_clockgating_state,
.set_powergating_state = sdma_v5_0_set_powergating_state,
.get_clockgating_state = sdma_v5_0_get_clockgating_state,
+ .dump_ip_state = sdma_v5_0_dump_ip_state,
+ .print_ip_state = sdma_v5_0_print_ip_state,
};
static const struct amdgpu_ring_funcs sdma_v5_0_ring_funcs = {
@@ -1630,7 +1944,7 @@ static const struct amdgpu_ring_funcs sdma_v5_0_ring_funcs = {
.align_mask = 0xf,
.nop = SDMA_PKT_NOP_HEADER_OP(SDMA_OP_NOP),
.support_64bit_ptrs = true,
- .vmhub = AMDGPU_GFXHUB_0,
+ .secure_submission_supported = true,
.get_rptr = sdma_v5_0_ring_get_rptr,
.get_wptr = sdma_v5_0_ring_get_wptr,
.set_wptr = sdma_v5_0_ring_set_wptr,
@@ -1645,6 +1959,7 @@ static const struct amdgpu_ring_funcs sdma_v5_0_ring_funcs = {
10 + 10 + 10, /* sdma_v5_0_ring_emit_fence x3 for user fence, vm fence */
.emit_ib_size = 5 + 7 + 6, /* sdma_v5_0_ring_emit_ib */
.emit_ib = sdma_v5_0_ring_emit_ib,
+ .emit_mem_sync = sdma_v5_0_ring_emit_mem_sync,
.emit_fence = sdma_v5_0_ring_emit_fence,
.emit_pipeline_sync = sdma_v5_0_ring_emit_pipeline_sync,
.emit_vm_flush = sdma_v5_0_ring_emit_vm_flush,
@@ -1657,8 +1972,8 @@ static const struct amdgpu_ring_funcs sdma_v5_0_ring_funcs = {
.emit_reg_wait = sdma_v5_0_ring_emit_reg_wait,
.emit_reg_write_reg_wait = sdma_v5_0_ring_emit_reg_write_reg_wait,
.init_cond_exec = sdma_v5_0_ring_init_cond_exec,
- .patch_cond_exec = sdma_v5_0_ring_patch_cond_exec,
.preempt_ib = sdma_v5_0_ring_preempt_ib,
+ .reset = sdma_v5_0_reset_queue,
};
static void sdma_v5_0_set_ring_funcs(struct amdgpu_device *adev)
@@ -1695,7 +2010,7 @@ static void sdma_v5_0_set_irq_funcs(struct amdgpu_device *adev)
* @src_offset: src GPU address
* @dst_offset: dst GPU address
* @byte_count: number of bytes to xfer
- * @tmz: if a secure copy should be used
+ * @copy_flags: copy flags for the buffers
*
* Copy GPU buffers using the DMA engine (NAVI10).
* Used by the amdgpu ttm implementation to move pages if
@@ -1705,11 +2020,11 @@ static void sdma_v5_0_emit_copy_buffer(struct amdgpu_ib *ib,
uint64_t src_offset,
uint64_t dst_offset,
uint32_t byte_count,
- bool tmz)
+ uint32_t copy_flags)
{
ib->ptr[ib->length_dw++] = SDMA_PKT_HEADER_OP(SDMA_OP_COPY) |
SDMA_PKT_HEADER_SUB_OP(SDMA_SUBOP_COPY_LINEAR) |
- SDMA_PKT_COPY_LINEAR_HEADER_TMZ(tmz ? 1 : 0);
+ SDMA_PKT_COPY_LINEAR_HEADER_TMZ((copy_flags & AMDGPU_COPY_FLAGS_TMZ) ? 1 : 0);
ib->ptr[ib->length_dw++] = byte_count - 1;
ib->ptr[ib->length_dw++] = 0; /* src/dst endian swap */
ib->ptr[ib->length_dw++] = lower_32_bits(src_offset);
diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.h b/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.h
index d4e3c2e696f6..2ab71f21755a 100644
--- a/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.h
+++ b/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.h
@@ -24,7 +24,6 @@
#ifndef __SDMA_V5_0_H__
#define __SDMA_V5_0_H__
-extern const struct amd_ip_funcs sdma_v5_0_ip_funcs;
extern const struct amdgpu_ip_block_version sdma_v5_0_ip_block;
#endif /* __SDMA_V5_0_H__ */
diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c b/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c
index 690a5090475a..51101b0aa2fa 100644
--- a/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c
+++ b/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c
@@ -47,8 +47,12 @@
MODULE_FIRMWARE("amdgpu/sienna_cichlid_sdma.bin");
MODULE_FIRMWARE("amdgpu/navy_flounder_sdma.bin");
MODULE_FIRMWARE("amdgpu/dimgrey_cavefish_sdma.bin");
+MODULE_FIRMWARE("amdgpu/beige_goby_sdma.bin");
MODULE_FIRMWARE("amdgpu/vangogh_sdma.bin");
+MODULE_FIRMWARE("amdgpu/yellow_carp_sdma.bin");
+MODULE_FIRMWARE("amdgpu/sdma_5_2_6.bin");
+MODULE_FIRMWARE("amdgpu/sdma_5_2_7.bin");
#define SDMA1_REG_OFFSET 0x600
#define SDMA3_REG_OFFSET 0x400
@@ -56,10 +60,61 @@ MODULE_FIRMWARE("amdgpu/vangogh_sdma.bin");
#define SDMA0_HYP_DEC_REG_END 0x5893
#define SDMA1_HYP_DEC_REG_OFFSET 0x20
+static const struct amdgpu_hwip_reg_entry sdma_reg_list_5_2[] = {
+ SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_STATUS_REG),
+ SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_STATUS1_REG),
+ SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_STATUS2_REG),
+ SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_STATUS3_REG),
+ SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_UCODE_CHECKSUM),
+ SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_RB_RPTR_FETCH_HI),
+ SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_RB_RPTR_FETCH),
+ SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_UTCL1_RD_STATUS),
+ SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_UTCL1_WR_STATUS),
+ SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_UTCL1_RD_XNACK0),
+ SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_UTCL1_RD_XNACK1),
+ SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_UTCL1_WR_XNACK0),
+ SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_UTCL1_WR_XNACK1),
+ SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_GFX_RB_CNTL),
+ SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_GFX_RB_RPTR),
+ SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_GFX_RB_RPTR_HI),
+ SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_GFX_RB_WPTR),
+ SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_GFX_RB_WPTR_HI),
+ SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_GFX_IB_OFFSET),
+ SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_GFX_IB_BASE_LO),
+ SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_GFX_IB_BASE_HI),
+ SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_GFX_IB_CNTL),
+ SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_GFX_IB_RPTR),
+ SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_GFX_IB_SUB_REMAIN),
+ SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_GFX_DUMMY_REG),
+ SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_PAGE_RB_CNTL),
+ SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_PAGE_RB_RPTR),
+ SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_PAGE_RB_RPTR_HI),
+ SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_PAGE_RB_WPTR),
+ SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_PAGE_RB_WPTR_HI),
+ SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_PAGE_IB_OFFSET),
+ SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_PAGE_IB_BASE_LO),
+ SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_PAGE_IB_BASE_HI),
+ SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_PAGE_DUMMY_REG),
+ SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_RLC0_RB_CNTL),
+ SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_RLC0_RB_RPTR),
+ SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_RLC0_RB_RPTR_HI),
+ SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_RLC0_RB_WPTR),
+ SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_RLC0_RB_WPTR_HI),
+ SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_RLC0_IB_OFFSET),
+ SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_RLC0_IB_BASE_LO),
+ SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_RLC0_IB_BASE_HI),
+ SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_RLC0_DUMMY_REG),
+ SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_INT_STATUS),
+ SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_VM_CNTL),
+ SOC15_REG_ENTRY_STR(GC, 0, mmGRBM_STATUS2)
+};
+
static void sdma_v5_2_set_ring_funcs(struct amdgpu_device *adev);
static void sdma_v5_2_set_buffer_funcs(struct amdgpu_device *adev);
static void sdma_v5_2_set_vm_pte_funcs(struct amdgpu_device *adev);
static void sdma_v5_2_set_irq_funcs(struct amdgpu_device *adev);
+static int sdma_v5_2_stop_queue(struct amdgpu_ring *ring);
+static int sdma_v5_2_restore_queue(struct amdgpu_ring *ring);
static u32 sdma_v5_2_get_reg_offset(struct amdgpu_device *adev, u32 instance, u32 internal_offset)
{
@@ -85,154 +140,23 @@ static u32 sdma_v5_2_get_reg_offset(struct amdgpu_device *adev, u32 instance, u3
return base + internal_offset;
}
-static void sdma_v5_2_init_golden_registers(struct amdgpu_device *adev)
-{
- switch (adev->asic_type) {
- case CHIP_SIENNA_CICHLID:
- case CHIP_NAVY_FLOUNDER:
- case CHIP_VANGOGH:
- case CHIP_DIMGREY_CAVEFISH:
- break;
- default:
- break;
- }
-}
-
-static int sdma_v5_2_init_inst_ctx(struct amdgpu_sdma_instance *sdma_inst)
-{
- int err = 0;
- const struct sdma_firmware_header_v1_0 *hdr;
-
- err = amdgpu_ucode_validate(sdma_inst->fw);
- if (err)
- return err;
-
- hdr = (const struct sdma_firmware_header_v1_0 *)sdma_inst->fw->data;
- sdma_inst->fw_version = le32_to_cpu(hdr->header.ucode_version);
- sdma_inst->feature_version = le32_to_cpu(hdr->ucode_feature_version);
-
- if (sdma_inst->feature_version >= 20)
- sdma_inst->burst_nop = true;
-
- return 0;
-}
-
-static void sdma_v5_2_destroy_inst_ctx(struct amdgpu_device *adev)
-{
- release_firmware(adev->sdma.instance[0].fw);
-
- memset((void *)adev->sdma.instance, 0,
- sizeof(struct amdgpu_sdma_instance) * AMDGPU_MAX_SDMA_INSTANCES);
-}
-
-/**
- * sdma_v5_2_init_microcode - load ucode images from disk
- *
- * @adev: amdgpu_device pointer
- *
- * Use the firmware interface to load the ucode images into
- * the driver (not loaded into hw).
- * Returns 0 on success, error on failure.
- */
-
-// emulation only, won't work on real chip
-// navi10 real chip need to use PSP to load firmware
-static int sdma_v5_2_init_microcode(struct amdgpu_device *adev)
-{
- const char *chip_name;
- char fw_name[40];
- int err = 0, i;
- struct amdgpu_firmware_info *info = NULL;
- const struct common_firmware_header *header = NULL;
-
- if (amdgpu_sriov_vf(adev) && (adev->asic_type == CHIP_SIENNA_CICHLID))
- return 0;
-
- DRM_DEBUG("\n");
-
- switch (adev->asic_type) {
- case CHIP_SIENNA_CICHLID:
- chip_name = "sienna_cichlid";
- break;
- case CHIP_NAVY_FLOUNDER:
- chip_name = "navy_flounder";
- break;
- case CHIP_VANGOGH:
- chip_name = "vangogh";
- break;
- case CHIP_DIMGREY_CAVEFISH:
- chip_name = "dimgrey_cavefish";
- break;
- default:
- BUG();
- }
-
- snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_sdma.bin", chip_name);
-
- err = request_firmware(&adev->sdma.instance[0].fw, fw_name, adev->dev);
- if (err)
- goto out;
-
- err = sdma_v5_2_init_inst_ctx(&adev->sdma.instance[0]);
- if (err)
- goto out;
-
- for (i = 1; i < adev->sdma.num_instances; i++)
- memcpy((void *)&adev->sdma.instance[i],
- (void *)&adev->sdma.instance[0],
- sizeof(struct amdgpu_sdma_instance));
-
- DRM_DEBUG("psp_load == '%s'\n",
- adev->firmware.load_type == AMDGPU_FW_LOAD_PSP ? "true" : "false");
-
- if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
- for (i = 0; i < adev->sdma.num_instances; i++) {
- info = &adev->firmware.ucode[AMDGPU_UCODE_ID_SDMA0 + i];
- info->ucode_id = AMDGPU_UCODE_ID_SDMA0 + i;
- info->fw = adev->sdma.instance[i].fw;
- header = (const struct common_firmware_header *)info->fw->data;
- adev->firmware.fw_size +=
- ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
- }
- }
-
-out:
- if (err) {
- DRM_ERROR("sdma_v5_2: Failed to load firmware \"%s\"\n", fw_name);
- sdma_v5_2_destroy_inst_ctx(adev);
- }
- return err;
-}
-
-static unsigned sdma_v5_2_ring_init_cond_exec(struct amdgpu_ring *ring)
+static unsigned sdma_v5_2_ring_init_cond_exec(struct amdgpu_ring *ring,
+ uint64_t addr)
{
unsigned ret;
amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_COND_EXE));
- amdgpu_ring_write(ring, lower_32_bits(ring->cond_exe_gpu_addr));
- amdgpu_ring_write(ring, upper_32_bits(ring->cond_exe_gpu_addr));
+ amdgpu_ring_write(ring, lower_32_bits(addr));
+ amdgpu_ring_write(ring, upper_32_bits(addr));
amdgpu_ring_write(ring, 1);
- ret = ring->wptr & ring->buf_mask;/* this is the offset we need patch later */
- amdgpu_ring_write(ring, 0x55aa55aa);/* insert dummy here and patch it later */
+ /* this is the offset we need patch later */
+ ret = ring->wptr & ring->buf_mask;
+ /* insert dummy here and patch it later */
+ amdgpu_ring_write(ring, 0);
return ret;
}
-static void sdma_v5_2_ring_patch_cond_exec(struct amdgpu_ring *ring,
- unsigned offset)
-{
- unsigned cur;
-
- BUG_ON(offset > ring->buf_mask);
- BUG_ON(ring->ring[offset] != 0x55aa55aa);
-
- cur = (ring->wptr - 1) & ring->buf_mask;
- if (cur > offset)
- ring->ring[offset] = cur - offset;
- else
- ring->ring[offset] = (ring->buf_mask + 1) - offset + cur;
-}
-
/**
* sdma_v5_2_ring_get_rptr - get the current read pointer
*
@@ -245,7 +169,7 @@ static uint64_t sdma_v5_2_ring_get_rptr(struct amdgpu_ring *ring)
u64 *rptr;
/* XXX check if swapping is necessary on BE */
- rptr = ((u64 *)&ring->adev->wb.wb[ring->rptr_offs]);
+ rptr = (u64 *)ring->rptr_cpu_addr;
DRM_DEBUG("rptr before shift == 0x%016llx\n", *rptr);
return ((*rptr) >> 2);
@@ -265,7 +189,7 @@ static uint64_t sdma_v5_2_ring_get_wptr(struct amdgpu_ring *ring)
if (ring->use_doorbell) {
/* XXX check if swapping is necessary on BE */
- wptr = READ_ONCE(*((u64 *)&adev->wb.wb[ring->wptr_offs]));
+ wptr = READ_ONCE(*((u64 *)ring->wptr_cpu_addr));
DRM_DEBUG("wptr/doorbell before shift == 0x%016llx\n", wptr);
} else {
wptr = RREG32(sdma_v5_2_get_reg_offset(adev, ring->me, mmSDMA0_GFX_RB_WPTR_HI));
@@ -292,17 +216,27 @@ static void sdma_v5_2_ring_set_wptr(struct amdgpu_ring *ring)
if (ring->use_doorbell) {
DRM_DEBUG("Using doorbell -- "
"wptr_offs == 0x%08x "
- "lower_32_bits(ring->wptr) << 2 == 0x%08x "
- "upper_32_bits(ring->wptr) << 2 == 0x%08x\n",
+ "lower_32_bits(ring->wptr << 2) == 0x%08x "
+ "upper_32_bits(ring->wptr << 2) == 0x%08x\n",
ring->wptr_offs,
lower_32_bits(ring->wptr << 2),
upper_32_bits(ring->wptr << 2));
/* XXX check if swapping is necessary on BE */
- adev->wb.wb[ring->wptr_offs] = lower_32_bits(ring->wptr << 2);
- adev->wb.wb[ring->wptr_offs + 1] = upper_32_bits(ring->wptr << 2);
+ atomic64_set((atomic64_t *)ring->wptr_cpu_addr,
+ ring->wptr << 2);
DRM_DEBUG("calling WDOORBELL64(0x%08x, 0x%016llx)\n",
ring->doorbell_index, ring->wptr << 2);
WDOORBELL64(ring->doorbell_index, ring->wptr << 2);
+ if (amdgpu_ip_version(adev, SDMA0_HWIP, 0) == IP_VERSION(5, 2, 1)) {
+ /* SDMA seems to miss doorbells sometimes when powergating kicks in.
+ * Updating the wptr directly will wake it. This is only safe because
+ * we disallow gfxoff in begin_use() and then allow it again in end_use().
+ */
+ WREG32(sdma_v5_2_get_reg_offset(adev, ring->me, mmSDMA0_GFX_RB_WPTR),
+ lower_32_bits(ring->wptr << 2));
+ WREG32(sdma_v5_2_get_reg_offset(adev, ring->me, mmSDMA0_GFX_RB_WPTR_HI),
+ upper_32_bits(ring->wptr << 2));
+ }
} else {
DRM_DEBUG("Not using doorbell -- "
"mmSDMA%i_GFX_RB_WPTR == 0x%08x "
@@ -370,6 +304,31 @@ static void sdma_v5_2_ring_emit_ib(struct amdgpu_ring *ring,
}
/**
+ * sdma_v5_2_ring_emit_mem_sync - flush the IB by graphics cache rinse
+ *
+ * @ring: amdgpu ring pointer
+ *
+ * flush the IB by graphics cache rinse.
+ */
+static void sdma_v5_2_ring_emit_mem_sync(struct amdgpu_ring *ring)
+{
+ uint32_t gcr_cntl = SDMA_GCR_GL2_INV | SDMA_GCR_GL2_WB |
+ SDMA_GCR_GLM_INV | SDMA_GCR_GL1_INV |
+ SDMA_GCR_GLV_INV | SDMA_GCR_GLK_INV |
+ SDMA_GCR_GLI_INV(1);
+
+ /* flush entire cache L0/L1/L2, this can be optimized by performance requirement */
+ amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_GCR_REQ));
+ amdgpu_ring_write(ring, SDMA_PKT_GCR_REQ_PAYLOAD1_BASE_VA_31_7(0));
+ amdgpu_ring_write(ring, SDMA_PKT_GCR_REQ_PAYLOAD2_GCR_CONTROL_15_0(gcr_cntl) |
+ SDMA_PKT_GCR_REQ_PAYLOAD2_BASE_VA_47_32(0));
+ amdgpu_ring_write(ring, SDMA_PKT_GCR_REQ_PAYLOAD3_LIMIT_VA_31_7(0) |
+ SDMA_PKT_GCR_REQ_PAYLOAD3_GCR_CONTROL_18_16(gcr_cntl >> 16));
+ amdgpu_ring_write(ring, SDMA_PKT_GCR_REQ_PAYLOAD4_LIMIT_VA_47_32(0) |
+ SDMA_PKT_GCR_REQ_PAYLOAD4_VMID(0));
+}
+
+/**
* sdma_v5_2_ring_emit_hdp_flush - emit an hdp flush on the DMA ring
*
* @ring: amdgpu ring pointer
@@ -382,17 +341,21 @@ static void sdma_v5_2_ring_emit_hdp_flush(struct amdgpu_ring *ring)
u32 ref_and_mask = 0;
const struct nbio_hdp_flush_reg *nbio_hf_reg = adev->nbio.hdp_flush_reg;
- ref_and_mask = nbio_hf_reg->ref_and_mask_sdma0 << ring->me;
-
- amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_POLL_REGMEM) |
- SDMA_PKT_POLL_REGMEM_HEADER_HDP_FLUSH(1) |
- SDMA_PKT_POLL_REGMEM_HEADER_FUNC(3)); /* == */
- amdgpu_ring_write(ring, (adev->nbio.funcs->get_hdp_flush_done_offset(adev)) << 2);
- amdgpu_ring_write(ring, (adev->nbio.funcs->get_hdp_flush_req_offset(adev)) << 2);
- amdgpu_ring_write(ring, ref_and_mask); /* reference */
- amdgpu_ring_write(ring, ref_and_mask); /* mask */
- amdgpu_ring_write(ring, SDMA_PKT_POLL_REGMEM_DW5_RETRY_COUNT(0xfff) |
- SDMA_PKT_POLL_REGMEM_DW5_INTERVAL(10)); /* retry count, poll interval */
+ if (ring->me > 1) {
+ amdgpu_hdp_flush(adev, ring);
+ } else {
+ ref_and_mask = nbio_hf_reg->ref_and_mask_sdma0 << ring->me;
+
+ amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_POLL_REGMEM) |
+ SDMA_PKT_POLL_REGMEM_HEADER_HDP_FLUSH(1) |
+ SDMA_PKT_POLL_REGMEM_HEADER_FUNC(3)); /* == */
+ amdgpu_ring_write(ring, (adev->nbio.funcs->get_hdp_flush_done_offset(adev)) << 2);
+ amdgpu_ring_write(ring, (adev->nbio.funcs->get_hdp_flush_req_offset(adev)) << 2);
+ amdgpu_ring_write(ring, ref_and_mask); /* reference */
+ amdgpu_ring_write(ring, ref_and_mask); /* mask */
+ amdgpu_ring_write(ring, SDMA_PKT_POLL_REGMEM_DW5_RETRY_COUNT(0xfff) |
+ SDMA_PKT_POLL_REGMEM_DW5_INTERVAL(10)); /* retry count, poll interval */
+ }
}
/**
@@ -432,7 +395,7 @@ static void sdma_v5_2_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, u64 se
amdgpu_ring_write(ring, upper_32_bits(seq));
}
- if (flags & AMDGPU_FENCE_FLAG_INT) {
+ if ((flags & AMDGPU_FENCE_FLAG_INT)) {
/* generate an interrupt */
amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_TRAP));
amdgpu_ring_write(ring, SDMA_PKT_TRAP_INT_CONTEXT_INT_CONTEXT(0));
@@ -444,37 +407,22 @@ static void sdma_v5_2_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, u64 se
* sdma_v5_2_gfx_stop - stop the gfx async dma engines
*
* @adev: amdgpu_device pointer
- *
+ * @inst_mask: mask of dma engine instances to be disabled
* Stop the gfx async dma ring buffers.
*/
-static void sdma_v5_2_gfx_stop(struct amdgpu_device *adev)
+static void sdma_v5_2_gfx_stop(struct amdgpu_device *adev, uint32_t inst_mask)
{
- struct amdgpu_ring *sdma0 = &adev->sdma.instance[0].ring;
- struct amdgpu_ring *sdma1 = &adev->sdma.instance[1].ring;
- struct amdgpu_ring *sdma2 = &adev->sdma.instance[2].ring;
- struct amdgpu_ring *sdma3 = &adev->sdma.instance[3].ring;
u32 rb_cntl, ib_cntl;
int i;
- if ((adev->mman.buffer_funcs_ring == sdma0) ||
- (adev->mman.buffer_funcs_ring == sdma1) ||
- (adev->mman.buffer_funcs_ring == sdma2) ||
- (adev->mman.buffer_funcs_ring == sdma3))
- amdgpu_ttm_set_buffer_funcs_status(adev, false);
-
- for (i = 0; i < adev->sdma.num_instances; i++) {
- rb_cntl = RREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_RB_CNTL));
+ for_each_inst(i, inst_mask) {
+ rb_cntl = RREG32_SOC15_IP(GC, sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_RB_CNTL));
rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_GFX_RB_CNTL, RB_ENABLE, 0);
- WREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_RB_CNTL), rb_cntl);
- ib_cntl = RREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_IB_CNTL));
+ WREG32_SOC15_IP(GC, sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_RB_CNTL), rb_cntl);
+ ib_cntl = RREG32_SOC15_IP(GC, sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_IB_CNTL));
ib_cntl = REG_SET_FIELD(ib_cntl, SDMA0_GFX_IB_CNTL, IB_ENABLE, 0);
- WREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_IB_CNTL), ib_cntl);
+ WREG32_SOC15_IP(GC, sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_IB_CNTL), ib_cntl);
}
-
- sdma0->sched.ready = false;
- sdma1->sched.ready = false;
- sdma2->sched.ready = false;
- sdma3->sched.ready = false;
}
/**
@@ -490,7 +438,7 @@ static void sdma_v5_2_rlc_stop(struct amdgpu_device *adev)
}
/**
- * sdma_v_0_ctx_switch_enable - stop the async dma engines context switch
+ * sdma_v5_2_ctx_switch_enable - stop the async dma engines context switch
*
* @adev: amdgpu_device pointer
* @enable: enable/disable the DMA MEs context switch.
@@ -527,18 +475,21 @@ static void sdma_v5_2_ctx_switch_enable(struct amdgpu_device *adev, bool enable)
}
for (i = 0; i < adev->sdma.num_instances; i++) {
- f32_cntl = RREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_CNTL));
- f32_cntl = REG_SET_FIELD(f32_cntl, SDMA0_CNTL,
- AUTO_CTXSW_ENABLE, enable ? 1 : 0);
if (enable && amdgpu_sdma_phase_quantum) {
- WREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_PHASE0_QUANTUM),
+ WREG32_SOC15_IP(GC, sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_PHASE0_QUANTUM),
phase_quantum);
- WREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_PHASE1_QUANTUM),
+ WREG32_SOC15_IP(GC, sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_PHASE1_QUANTUM),
phase_quantum);
- WREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_PHASE2_QUANTUM),
+ WREG32_SOC15_IP(GC, sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_PHASE2_QUANTUM),
phase_quantum);
}
- WREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_CNTL), f32_cntl);
+
+ if (!amdgpu_sriov_vf(adev)) {
+ f32_cntl = RREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_CNTL));
+ f32_cntl = REG_SET_FIELD(f32_cntl, SDMA0_CNTL,
+ AUTO_CTXSW_ENABLE, enable ? 1 : 0);
+ WREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_CNTL), f32_cntl);
+ }
}
}
@@ -555,121 +506,136 @@ static void sdma_v5_2_enable(struct amdgpu_device *adev, bool enable)
{
u32 f32_cntl;
int i;
+ uint32_t inst_mask;
+ inst_mask = GENMASK(adev->sdma.num_instances - 1, 0);
if (!enable) {
- sdma_v5_2_gfx_stop(adev);
+ sdma_v5_2_gfx_stop(adev, inst_mask);
sdma_v5_2_rlc_stop(adev);
}
- for (i = 0; i < adev->sdma.num_instances; i++) {
- f32_cntl = RREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_F32_CNTL));
- f32_cntl = REG_SET_FIELD(f32_cntl, SDMA0_F32_CNTL, HALT, enable ? 0 : 1);
- WREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_F32_CNTL), f32_cntl);
+ if (!amdgpu_sriov_vf(adev)) {
+ for (i = 0; i < adev->sdma.num_instances; i++) {
+ f32_cntl = RREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_F32_CNTL));
+ f32_cntl = REG_SET_FIELD(f32_cntl, SDMA0_F32_CNTL, HALT, enable ? 0 : 1);
+ WREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_F32_CNTL), f32_cntl);
+ }
}
}
/**
- * sdma_v5_2_gfx_resume - setup and start the async dma engines
+ * sdma_v5_2_gfx_resume_instance - start/restart a certain sdma engine
*
* @adev: amdgpu_device pointer
+ * @i: instance
+ * @restore: used to restore wptr when restart
*
- * Set up the gfx DMA ring buffers and enable them.
- * Returns 0 for success, error for failure.
+ * Set up the gfx DMA ring buffers and enable them. On restart, we will restore wptr and rptr.
+ * Return 0 for success.
*/
-static int sdma_v5_2_gfx_resume(struct amdgpu_device *adev)
+
+static int sdma_v5_2_gfx_resume_instance(struct amdgpu_device *adev, int i, bool restore)
{
struct amdgpu_ring *ring;
u32 rb_cntl, ib_cntl;
u32 rb_bufsz;
- u32 wb_offset;
u32 doorbell;
u32 doorbell_offset;
u32 temp;
u32 wptr_poll_cntl;
u64 wptr_gpu_addr;
- int i, r;
- for (i = 0; i < adev->sdma.num_instances; i++) {
- ring = &adev->sdma.instance[i].ring;
- wb_offset = (ring->rptr_offs * 4);
+ ring = &adev->sdma.instance[i].ring;
- WREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_SEM_WAIT_FAIL_TIMER_CNTL), 0);
+ if (!amdgpu_sriov_vf(adev))
+ WREG32_SOC15_IP(GC, sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_SEM_WAIT_FAIL_TIMER_CNTL), 0);
- /* Set ring buffer size in dwords */
- rb_bufsz = order_base_2(ring->ring_size / 4);
- rb_cntl = RREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_RB_CNTL));
- rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_GFX_RB_CNTL, RB_SIZE, rb_bufsz);
+ /* Set ring buffer size in dwords */
+ rb_bufsz = order_base_2(ring->ring_size / 4);
+ rb_cntl = RREG32_SOC15_IP(GC, sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_RB_CNTL));
+ rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_GFX_RB_CNTL, RB_SIZE, rb_bufsz);
#ifdef __BIG_ENDIAN
- rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_GFX_RB_CNTL, RB_SWAP_ENABLE, 1);
- rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_GFX_RB_CNTL,
- RPTR_WRITEBACK_SWAP_ENABLE, 1);
+ rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_GFX_RB_CNTL, RB_SWAP_ENABLE, 1);
+ rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_GFX_RB_CNTL,
+ RPTR_WRITEBACK_SWAP_ENABLE, 1);
#endif
- WREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_RB_CNTL), rb_cntl);
-
- /* Initialize the ring buffer's read and write pointers */
- WREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_RB_RPTR), 0);
- WREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_RB_RPTR_HI), 0);
- WREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_RB_WPTR), 0);
- WREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_RB_WPTR_HI), 0);
-
- /* setup the wptr shadow polling */
- wptr_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
- WREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_RB_WPTR_POLL_ADDR_LO),
- lower_32_bits(wptr_gpu_addr));
- WREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_RB_WPTR_POLL_ADDR_HI),
- upper_32_bits(wptr_gpu_addr));
- wptr_poll_cntl = RREG32(sdma_v5_2_get_reg_offset(adev, i,
- mmSDMA0_GFX_RB_WPTR_POLL_CNTL));
- wptr_poll_cntl = REG_SET_FIELD(wptr_poll_cntl,
- SDMA0_GFX_RB_WPTR_POLL_CNTL,
- F32_POLL_ENABLE, 1);
- WREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_RB_WPTR_POLL_CNTL),
- wptr_poll_cntl);
-
- /* set the wb address whether it's enabled or not */
- WREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_RB_RPTR_ADDR_HI),
- upper_32_bits(adev->wb.gpu_addr + wb_offset) & 0xFFFFFFFF);
- WREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_RB_RPTR_ADDR_LO),
- lower_32_bits(adev->wb.gpu_addr + wb_offset) & 0xFFFFFFFC);
-
- rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_GFX_RB_CNTL, RPTR_WRITEBACK_ENABLE, 1);
-
- WREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_RB_BASE), ring->gpu_addr >> 8);
- WREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_RB_BASE_HI), ring->gpu_addr >> 40);
+ WREG32_SOC15_IP(GC, sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_RB_CNTL), rb_cntl);
+
+ /* Initialize the ring buffer's read and write pointers */
+ if (restore) {
+ WREG32_SOC15_IP(GC, sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_RB_RPTR), lower_32_bits(ring->wptr << 2));
+ WREG32_SOC15_IP(GC, sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_RB_RPTR_HI), upper_32_bits(ring->wptr << 2));
+ WREG32_SOC15_IP(GC, sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_RB_WPTR), lower_32_bits(ring->wptr << 2));
+ WREG32_SOC15_IP(GC, sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_RB_WPTR_HI), upper_32_bits(ring->wptr << 2));
+ } else {
+ WREG32_SOC15_IP(GC, sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_RB_RPTR), 0);
+ WREG32_SOC15_IP(GC, sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_RB_RPTR_HI), 0);
+ WREG32_SOC15_IP(GC, sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_RB_WPTR), 0);
+ WREG32_SOC15_IP(GC, sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_RB_WPTR_HI), 0);
+ }
+ /* setup the wptr shadow polling */
+ wptr_gpu_addr = ring->wptr_gpu_addr;
+ WREG32_SOC15_IP(GC, sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_RB_WPTR_POLL_ADDR_LO),
+ lower_32_bits(wptr_gpu_addr));
+ WREG32_SOC15_IP(GC, sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_RB_WPTR_POLL_ADDR_HI),
+ upper_32_bits(wptr_gpu_addr));
+ wptr_poll_cntl = RREG32_SOC15_IP(GC, sdma_v5_2_get_reg_offset(adev, i,
+ mmSDMA0_GFX_RB_WPTR_POLL_CNTL));
+ wptr_poll_cntl = REG_SET_FIELD(wptr_poll_cntl,
+ SDMA0_GFX_RB_WPTR_POLL_CNTL,
+ F32_POLL_ENABLE, 1);
+ WREG32_SOC15_IP(GC, sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_RB_WPTR_POLL_CNTL),
+ wptr_poll_cntl);
+
+ /* set the wb address whether it's enabled or not */
+ WREG32_SOC15_IP(GC, sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_RB_RPTR_ADDR_HI),
+ upper_32_bits(ring->rptr_gpu_addr) & 0xFFFFFFFF);
+ WREG32_SOC15_IP(GC, sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_RB_RPTR_ADDR_LO),
+ lower_32_bits(ring->rptr_gpu_addr) & 0xFFFFFFFC);
+
+ rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_GFX_RB_CNTL, RPTR_WRITEBACK_ENABLE, 1);
+
+ WREG32_SOC15_IP(GC, sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_RB_BASE), ring->gpu_addr >> 8);
+ WREG32_SOC15_IP(GC, sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_RB_BASE_HI), ring->gpu_addr >> 40);
+
+ if (!restore)
ring->wptr = 0;
- /* before programing wptr to a less value, need set minor_ptr_update first */
- WREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_MINOR_PTR_UPDATE), 1);
+ /* before programing wptr to a less value, need set minor_ptr_update first */
+ WREG32_SOC15_IP(GC, sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_MINOR_PTR_UPDATE), 1);
- if (!amdgpu_sriov_vf(adev)) { /* only bare-metal use register write for wptr */
- WREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_RB_WPTR), lower_32_bits(ring->wptr) << 2);
- WREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_RB_WPTR_HI), upper_32_bits(ring->wptr) << 2);
- }
+ if (!amdgpu_sriov_vf(adev)) { /* only bare-metal use register write for wptr */
+ WREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_RB_WPTR), lower_32_bits(ring->wptr << 2));
+ WREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_RB_WPTR_HI), upper_32_bits(ring->wptr << 2));
+ }
- doorbell = RREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_DOORBELL));
- doorbell_offset = RREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_DOORBELL_OFFSET));
+ doorbell = RREG32_SOC15_IP(GC, sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_DOORBELL));
+ doorbell_offset = RREG32_SOC15_IP(GC, sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_DOORBELL_OFFSET));
- if (ring->use_doorbell) {
- doorbell = REG_SET_FIELD(doorbell, SDMA0_GFX_DOORBELL, ENABLE, 1);
- doorbell_offset = REG_SET_FIELD(doorbell_offset, SDMA0_GFX_DOORBELL_OFFSET,
- OFFSET, ring->doorbell_index);
- } else {
- doorbell = REG_SET_FIELD(doorbell, SDMA0_GFX_DOORBELL, ENABLE, 0);
- }
- WREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_DOORBELL), doorbell);
- WREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_DOORBELL_OFFSET), doorbell_offset);
+ if (ring->use_doorbell) {
+ doorbell = REG_SET_FIELD(doorbell, SDMA0_GFX_DOORBELL, ENABLE, 1);
+ doorbell_offset = REG_SET_FIELD(doorbell_offset, SDMA0_GFX_DOORBELL_OFFSET,
+ OFFSET, ring->doorbell_index);
+ } else {
+ doorbell = REG_SET_FIELD(doorbell, SDMA0_GFX_DOORBELL, ENABLE, 0);
+ }
+ WREG32_SOC15_IP(GC, sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_DOORBELL), doorbell);
+ WREG32_SOC15_IP(GC, sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_DOORBELL_OFFSET), doorbell_offset);
- adev->nbio.funcs->sdma_doorbell_range(adev, i, ring->use_doorbell,
- ring->doorbell_index,
- adev->doorbell_index.sdma_doorbell_range);
+ adev->nbio.funcs->sdma_doorbell_range(adev, i, ring->use_doorbell,
+ ring->doorbell_index,
+ adev->doorbell_index.sdma_doorbell_range);
- if (amdgpu_sriov_vf(adev))
- sdma_v5_2_ring_set_wptr(ring);
+ if (amdgpu_sriov_vf(adev))
+ sdma_v5_2_ring_set_wptr(ring);
- /* set minor_ptr_update to 0 after wptr programed */
- WREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_MINOR_PTR_UPDATE), 0);
+ /* set minor_ptr_update to 0 after wptr programed */
+ WREG32_SOC15_IP(GC, sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_MINOR_PTR_UPDATE), 0);
+
+ /* SRIOV VF has no control of any of registers below */
+ if (!amdgpu_sriov_vf(adev)) {
/* set utc l1 enable flag always to 1 */
temp = RREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_CNTL));
temp = REG_SET_FIELD(temp, SDMA0_CNTL, UTC_L1_ENABLE, 1);
@@ -679,54 +645,62 @@ static int sdma_v5_2_gfx_resume(struct amdgpu_device *adev)
WREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_CNTL), temp);
/* Set up RESP_MODE to non-copy addresses */
- temp = RREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_UTCL1_CNTL));
+ temp = RREG32_SOC15_IP(GC, sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_UTCL1_CNTL));
temp = REG_SET_FIELD(temp, SDMA0_UTCL1_CNTL, RESP_MODE, 3);
temp = REG_SET_FIELD(temp, SDMA0_UTCL1_CNTL, REDO_DELAY, 9);
- WREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_UTCL1_CNTL), temp);
+ WREG32_SOC15_IP(GC, sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_UTCL1_CNTL), temp);
/* program default cache read and write policy */
- temp = RREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_UTCL1_PAGE));
+ temp = RREG32_SOC15_IP(GC, sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_UTCL1_PAGE));
/* clean read policy and write policy bits */
temp &= 0xFF0FFF;
temp |= ((CACHE_READ_POLICY_L2__DEFAULT << 12) |
(CACHE_WRITE_POLICY_L2__DEFAULT << 14) |
SDMA0_UTCL1_PAGE__LLC_NOALLOC_MASK);
- WREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_UTCL1_PAGE), temp);
+ WREG32_SOC15_IP(GC, sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_UTCL1_PAGE), temp);
- if (!amdgpu_sriov_vf(adev)) {
- /* unhalt engine */
- temp = RREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_F32_CNTL));
- temp = REG_SET_FIELD(temp, SDMA0_F32_CNTL, HALT, 0);
- WREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_F32_CNTL), temp);
- }
+ /* unhalt engine */
+ temp = RREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_F32_CNTL));
+ temp = REG_SET_FIELD(temp, SDMA0_F32_CNTL, HALT, 0);
+ WREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_F32_CNTL), temp);
+ }
- /* enable DMA RB */
- rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_GFX_RB_CNTL, RB_ENABLE, 1);
- WREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_RB_CNTL), rb_cntl);
+ /* enable DMA RB */
+ rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_GFX_RB_CNTL, RB_ENABLE, 1);
+ WREG32_SOC15_IP(GC, sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_RB_CNTL), rb_cntl);
- ib_cntl = RREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_IB_CNTL));
- ib_cntl = REG_SET_FIELD(ib_cntl, SDMA0_GFX_IB_CNTL, IB_ENABLE, 1);
+ ib_cntl = RREG32_SOC15_IP(GC, sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_IB_CNTL));
+ ib_cntl = REG_SET_FIELD(ib_cntl, SDMA0_GFX_IB_CNTL, IB_ENABLE, 1);
#ifdef __BIG_ENDIAN
- ib_cntl = REG_SET_FIELD(ib_cntl, SDMA0_GFX_IB_CNTL, IB_SWAP_ENABLE, 1);
+ ib_cntl = REG_SET_FIELD(ib_cntl, SDMA0_GFX_IB_CNTL, IB_SWAP_ENABLE, 1);
#endif
- /* enable DMA IBs */
- WREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_IB_CNTL), ib_cntl);
+ /* enable DMA IBs */
+ WREG32_SOC15_IP(GC, sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_IB_CNTL), ib_cntl);
- ring->sched.ready = true;
+ if (amdgpu_sriov_vf(adev)) { /* bare-metal sequence doesn't need below to lines */
+ sdma_v5_2_ctx_switch_enable(adev, true);
+ sdma_v5_2_enable(adev, true);
+ }
- if (amdgpu_sriov_vf(adev)) { /* bare-metal sequence doesn't need below to lines */
- sdma_v5_2_ctx_switch_enable(adev, true);
- sdma_v5_2_enable(adev, true);
- }
+ return amdgpu_ring_test_helper(ring);
+}
- r = amdgpu_ring_test_ring(ring);
- if (r) {
- ring->sched.ready = false;
- return r;
- }
+/**
+ * sdma_v5_2_gfx_resume - setup and start the async dma engines
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * Set up the gfx DMA ring buffers and enable them.
+ * Returns 0 for success, error for failure.
+ */
+static int sdma_v5_2_gfx_resume(struct amdgpu_device *adev)
+{
+ int i, r;
- if (adev->mman.buffer_funcs_ring == ring)
- amdgpu_ttm_set_buffer_funcs_status(adev, true);
+ for (i = 0; i < adev->sdma.num_instances; i++) {
+ r = sdma_v5_2_gfx_resume_instance(adev, i, false);
+ if (r)
+ return r;
}
return 0;
@@ -789,37 +763,49 @@ static int sdma_v5_2_load_microcode(struct amdgpu_device *adev)
return 0;
}
-static int sdma_v5_2_soft_reset(void *handle)
+static int sdma_v5_2_soft_reset_engine(struct amdgpu_device *adev, u32 instance_id)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
u32 grbm_soft_reset;
u32 tmp;
- int i;
- for (i = 0; i < adev->sdma.num_instances; i++) {
- grbm_soft_reset = REG_SET_FIELD(0,
- GRBM_SOFT_RESET, SOFT_RESET_SDMA0,
- 1);
- grbm_soft_reset <<= i;
+ grbm_soft_reset = REG_SET_FIELD(0,
+ GRBM_SOFT_RESET, SOFT_RESET_SDMA0,
+ 1);
+ grbm_soft_reset <<= instance_id;
- tmp = RREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET);
- tmp |= grbm_soft_reset;
- DRM_DEBUG("GRBM_SOFT_RESET=0x%08X\n", tmp);
- WREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET, tmp);
- tmp = RREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET);
+ tmp = RREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET);
+ tmp |= grbm_soft_reset;
+ DRM_DEBUG("GRBM_SOFT_RESET=0x%08X\n", tmp);
+ WREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET, tmp);
+ tmp = RREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET);
- udelay(50);
+ udelay(50);
- tmp &= ~grbm_soft_reset;
- WREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET, tmp);
- tmp = RREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET);
+ tmp &= ~grbm_soft_reset;
+ WREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET, tmp);
+ tmp = RREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET);
+ return 0;
+}
+static int sdma_v5_2_soft_reset(struct amdgpu_ip_block *ip_block)
+{
+ struct amdgpu_device *adev = ip_block->adev;
+ int i;
+
+ for (i = 0; i < adev->sdma.num_instances; i++) {
+ sdma_v5_2_soft_reset_engine(adev, i);
udelay(50);
}
return 0;
}
+static const struct amdgpu_sdma_funcs sdma_v5_2_sdma_funcs = {
+ .stop_kernel_queue = &sdma_v5_2_stop_queue,
+ .start_kernel_queue = &sdma_v5_2_restore_queue,
+ .soft_reset_kernel_queue = &sdma_v5_2_soft_reset_engine,
+};
+
/**
* sdma_v5_2_start - setup and start the async dma engines
*
@@ -831,6 +817,7 @@ static int sdma_v5_2_soft_reset(void *handle)
static int sdma_v5_2_start(struct amdgpu_device *adev)
{
int r = 0;
+ struct amdgpu_ip_block *ip_block;
if (amdgpu_sriov_vf(adev)) {
sdma_v5_2_ctx_switch_enable(adev, false);
@@ -851,7 +838,11 @@ static int sdma_v5_2_start(struct amdgpu_device *adev)
msleep(1000);
}
- sdma_v5_2_soft_reset(adev);
+ ip_block = amdgpu_device_ip_get_ip_block(adev, AMD_IP_BLOCK_TYPE_SDMA);
+ if (!ip_block)
+ return -EINVAL;
+
+ sdma_v5_2_soft_reset(ip_block);
/* unhalt the MEs */
sdma_v5_2_enable(adev, true);
/* enable sdma ring preemption */
@@ -866,6 +857,49 @@ static int sdma_v5_2_start(struct amdgpu_device *adev)
return r;
}
+static int sdma_v5_2_mqd_init(struct amdgpu_device *adev, void *mqd,
+ struct amdgpu_mqd_prop *prop)
+{
+ struct v10_sdma_mqd *m = mqd;
+ uint64_t wb_gpu_addr;
+
+ m->sdmax_rlcx_rb_cntl =
+ order_base_2(prop->queue_size / 4) << SDMA0_RLC0_RB_CNTL__RB_SIZE__SHIFT |
+ 1 << SDMA0_RLC0_RB_CNTL__RPTR_WRITEBACK_ENABLE__SHIFT |
+ 6 << SDMA0_RLC0_RB_CNTL__RPTR_WRITEBACK_TIMER__SHIFT |
+ 1 << SDMA0_RLC0_RB_CNTL__RB_PRIV__SHIFT;
+
+ m->sdmax_rlcx_rb_base = lower_32_bits(prop->hqd_base_gpu_addr >> 8);
+ m->sdmax_rlcx_rb_base_hi = upper_32_bits(prop->hqd_base_gpu_addr >> 8);
+
+ m->sdmax_rlcx_rb_wptr_poll_cntl = RREG32(sdma_v5_2_get_reg_offset(adev, 0,
+ mmSDMA0_GFX_RB_WPTR_POLL_CNTL));
+
+ wb_gpu_addr = prop->wptr_gpu_addr;
+ m->sdmax_rlcx_rb_wptr_poll_addr_lo = lower_32_bits(wb_gpu_addr);
+ m->sdmax_rlcx_rb_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr);
+
+ wb_gpu_addr = prop->rptr_gpu_addr;
+ m->sdmax_rlcx_rb_rptr_addr_lo = lower_32_bits(wb_gpu_addr);
+ m->sdmax_rlcx_rb_rptr_addr_hi = upper_32_bits(wb_gpu_addr);
+
+ m->sdmax_rlcx_ib_cntl = RREG32(sdma_v5_2_get_reg_offset(adev, 0,
+ mmSDMA0_GFX_IB_CNTL));
+
+ m->sdmax_rlcx_doorbell_offset =
+ prop->doorbell_index << SDMA0_RLC0_DOORBELL_OFFSET__OFFSET__SHIFT;
+
+ m->sdmax_rlcx_doorbell = REG_SET_FIELD(0, SDMA0_RLC0_DOORBELL, ENABLE, 1);
+
+ return 0;
+}
+
+static void sdma_v5_2_set_mqd_funcs(struct amdgpu_device *adev)
+{
+ adev->mqds[AMDGPU_HW_IP_DMA].mqd_size = sizeof(struct v10_sdma_mqd);
+ adev->mqds[AMDGPU_HW_IP_DMA].init_mqd = sdma_v5_2_mqd_init;
+}
+
/**
* sdma_v5_2_ring_test_ring - simple async dma engine test
*
@@ -884,6 +918,8 @@ static int sdma_v5_2_ring_test_ring(struct amdgpu_ring *ring)
u32 tmp;
u64 gpu_addr;
+ tmp = 0xCAFEDEAD;
+
r = amdgpu_device_wb_get(adev, &index);
if (r) {
dev_err(adev->dev, "(%d) failed to allocate wb slot\n", r);
@@ -891,10 +927,9 @@ static int sdma_v5_2_ring_test_ring(struct amdgpu_ring *ring)
}
gpu_addr = adev->wb.gpu_addr + (index * 4);
- tmp = 0xCAFEDEAD;
adev->wb.wb[index] = cpu_to_le32(tmp);
- r = amdgpu_ring_alloc(ring, 5);
+ r = amdgpu_ring_alloc(ring, 20);
if (r) {
DRM_ERROR("amdgpu: dma failed to lock ring %d (%d).\n", ring->idx, r);
amdgpu_device_wb_free(adev, index);
@@ -946,6 +981,9 @@ static int sdma_v5_2_ring_test_ib(struct amdgpu_ring *ring, long timeout)
u32 tmp = 0;
u64 gpu_addr;
+ tmp = 0xCAFEDEAD;
+ memset(&ib, 0, sizeof(ib));
+
r = amdgpu_device_wb_get(adev, &index);
if (r) {
dev_err(adev->dev, "(%ld) failed to allocate wb slot\n", r);
@@ -953,9 +991,8 @@ static int sdma_v5_2_ring_test_ib(struct amdgpu_ring *ring, long timeout)
}
gpu_addr = adev->wb.gpu_addr + (index * 4);
- tmp = 0xCAFEDEAD;
adev->wb.wb[index] = cpu_to_le32(tmp);
- memset(&ib, 0, sizeof(ib));
+
r = amdgpu_ib_get(adev, NULL, 256, AMDGPU_IB_POOL_DIRECT, &ib);
if (r) {
DRM_ERROR("amdgpu: failed to get ib (%ld).\n", r);
@@ -986,14 +1023,16 @@ static int sdma_v5_2_ring_test_ib(struct amdgpu_ring *ring, long timeout)
DRM_ERROR("amdgpu: fence wait failed (%ld).\n", r);
goto err1;
}
+
tmp = le32_to_cpu(adev->wb.wb[index]);
+
if (tmp == 0xDEADBEEF)
r = 0;
else
r = -EINVAL;
err1:
- amdgpu_ib_free(adev, &ib, NULL);
+ amdgpu_ib_free(&ib, NULL);
dma_fence_put(f);
err0:
amdgpu_device_wb_free(adev, index);
@@ -1152,7 +1191,28 @@ static void sdma_v5_2_ring_emit_pipeline_sync(struct amdgpu_ring *ring)
static void sdma_v5_2_ring_emit_vm_flush(struct amdgpu_ring *ring,
unsigned vmid, uint64_t pd_addr)
{
- amdgpu_gmc_emit_flush_gpu_tlb(ring, vmid, pd_addr);
+ struct amdgpu_vmhub *hub = &ring->adev->vmhub[ring->vm_hub];
+ uint32_t req = hub->vmhub_funcs->get_invalidate_req(vmid, 0);
+
+ /* Update the PD address for this VMID. */
+ amdgpu_ring_emit_wreg(ring, hub->ctx0_ptb_addr_lo32 +
+ (hub->ctx_addr_distance * vmid),
+ lower_32_bits(pd_addr));
+ amdgpu_ring_emit_wreg(ring, hub->ctx0_ptb_addr_hi32 +
+ (hub->ctx_addr_distance * vmid),
+ upper_32_bits(pd_addr));
+
+ /* Trigger invalidation. */
+ amdgpu_ring_write(ring,
+ SDMA_PKT_VM_INVALIDATION_HEADER_OP(SDMA_OP_POLL_REGMEM) |
+ SDMA_PKT_VM_INVALIDATION_HEADER_SUB_OP(SDMA_SUBOP_VM_INVALIDATION) |
+ SDMA_PKT_VM_INVALIDATION_HEADER_GFX_ENG_ID(ring->vm_inv_eng) |
+ SDMA_PKT_VM_INVALIDATION_HEADER_MM_ENG_ID(0x1f));
+ amdgpu_ring_write(ring, req);
+ amdgpu_ring_write(ring, 0xFFFFFFFF);
+ amdgpu_ring_write(ring,
+ SDMA_PKT_VM_INVALIDATION_ADDRESSRANGEHI_INVALIDATEACK(1 << vmid) |
+ SDMA_PKT_VM_INVALIDATION_ADDRESSRANGEHI_ADDRESSRANGEHI(0x1F));
}
static void sdma_v5_2_ring_emit_wreg(struct amdgpu_ring *ring,
@@ -1188,29 +1248,20 @@ static void sdma_v5_2_ring_emit_reg_write_reg_wait(struct amdgpu_ring *ring,
amdgpu_ring_emit_reg_wait(ring, reg1, mask, mask);
}
-static int sdma_v5_2_early_init(void *handle)
+static int sdma_v5_2_early_init(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
+ int r;
- switch (adev->asic_type) {
- case CHIP_SIENNA_CICHLID:
- adev->sdma.num_instances = 4;
- break;
- case CHIP_NAVY_FLOUNDER:
- case CHIP_DIMGREY_CAVEFISH:
- adev->sdma.num_instances = 2;
- break;
- case CHIP_VANGOGH:
- adev->sdma.num_instances = 1;
- break;
- default:
- break;
- }
+ r = amdgpu_sdma_init_microcode(adev, 0, true);
+ if (r)
+ return r;
sdma_v5_2_set_ring_funcs(adev);
sdma_v5_2_set_buffer_funcs(adev);
sdma_v5_2_set_vm_pte_funcs(adev);
sdma_v5_2_set_irq_funcs(adev);
+ sdma_v5_2_set_mqd_funcs(adev);
return 0;
}
@@ -1249,11 +1300,13 @@ static unsigned sdma_v5_2_seq_to_trap_id(int seq_num)
return -EINVAL;
}
-static int sdma_v5_2_sw_init(void *handle)
+static int sdma_v5_2_sw_init(struct amdgpu_ip_block *ip_block)
{
struct amdgpu_ring *ring;
int r, i;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
+ uint32_t reg_count = ARRAY_SIZE(sdma_reg_list_5_2);
+ uint32_t *ptr;
/* SDMA trap event */
for (i = 0; i < adev->sdma.num_instances; i++) {
@@ -1264,13 +1317,9 @@ static int sdma_v5_2_sw_init(void *handle)
return r;
}
- r = sdma_v5_2_init_microcode(adev);
- if (r) {
- DRM_ERROR("Failed to load sdma firmware!\n");
- return r;
- }
-
for (i = 0; i < adev->sdma.num_instances; i++) {
+ mutex_init(&adev->sdma.instance[i].engine_reset_mutex);
+ adev->sdma.instance[i].funcs = &sdma_v5_2_sdma_funcs;
ring = &adev->sdma.instance[i].ring;
ring->ring_obj = NULL;
ring->use_doorbell = true;
@@ -1282,46 +1331,77 @@ static int sdma_v5_2_sw_init(void *handle)
ring->doorbell_index =
(adev->doorbell_index.sdma_engine[i] << 1); //get DWORD offset
+ ring->vm_hub = AMDGPU_GFXHUB(0);
sprintf(ring->name, "sdma%d", i);
- r = amdgpu_ring_init(adev, ring, 1024,
- &adev->sdma.trap_irq,
+ r = amdgpu_ring_init(adev, ring, 1024, &adev->sdma.trap_irq,
AMDGPU_SDMA_IRQ_INSTANCE0 + i,
- AMDGPU_RING_PRIO_DEFAULT);
+ AMDGPU_RING_PRIO_DEFAULT, NULL);
if (r)
return r;
}
+ adev->sdma.supported_reset =
+ amdgpu_get_soft_full_reset_mask(&adev->sdma.instance[0].ring);
+ switch (amdgpu_ip_version(adev, SDMA0_HWIP, 0)) {
+ case IP_VERSION(5, 2, 0):
+ case IP_VERSION(5, 2, 2):
+ case IP_VERSION(5, 2, 3):
+ case IP_VERSION(5, 2, 4):
+ if ((adev->sdma.instance[0].fw_version >= 76) &&
+ !amdgpu_sriov_vf(adev) &&
+ !adev->debug_disable_gpu_ring_reset)
+ adev->sdma.supported_reset |= AMDGPU_RESET_TYPE_PER_QUEUE;
+ break;
+ case IP_VERSION(5, 2, 5):
+ if ((adev->sdma.instance[0].fw_version >= 34) &&
+ !amdgpu_sriov_vf(adev) &&
+ !adev->debug_disable_gpu_ring_reset)
+ adev->sdma.supported_reset |= AMDGPU_RESET_TYPE_PER_QUEUE;
+ break;
+ default:
+ break;
+ }
+
+ /* Allocate memory for SDMA IP Dump buffer */
+ ptr = kcalloc(adev->sdma.num_instances * reg_count, sizeof(uint32_t), GFP_KERNEL);
+ if (ptr)
+ adev->sdma.ip_dump = ptr;
+ else
+ DRM_ERROR("Failed to allocated memory for SDMA IP Dump\n");
+
+ r = amdgpu_sdma_sysfs_reset_mask_init(adev);
+ if (r)
+ return r;
+
return r;
}
-static int sdma_v5_2_sw_fini(void *handle)
+static int sdma_v5_2_sw_fini(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
int i;
for (i = 0; i < adev->sdma.num_instances; i++)
amdgpu_ring_fini(&adev->sdma.instance[i].ring);
- sdma_v5_2_destroy_inst_ctx(adev);
+ amdgpu_sdma_sysfs_reset_mask_fini(adev);
+ amdgpu_sdma_destroy_inst_ctx(adev, true);
+
+ kfree(adev->sdma.ip_dump);
return 0;
}
-static int sdma_v5_2_hw_init(void *handle)
+static int sdma_v5_2_hw_init(struct amdgpu_ip_block *ip_block)
{
- int r;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
-
- sdma_v5_2_init_golden_registers(adev);
+ struct amdgpu_device *adev = ip_block->adev;
- r = sdma_v5_2_start(adev);
-
- return r;
+ return sdma_v5_2_start(adev);
}
-static int sdma_v5_2_hw_fini(void *handle)
+static int sdma_v5_2_hw_fini(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
if (amdgpu_sriov_vf(adev))
return 0;
@@ -1332,23 +1412,19 @@ static int sdma_v5_2_hw_fini(void *handle)
return 0;
}
-static int sdma_v5_2_suspend(void *handle)
+static int sdma_v5_2_suspend(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
-
- return sdma_v5_2_hw_fini(adev);
+ return sdma_v5_2_hw_fini(ip_block);
}
-static int sdma_v5_2_resume(void *handle)
+static int sdma_v5_2_resume(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
-
- return sdma_v5_2_hw_init(adev);
+ return sdma_v5_2_hw_init(ip_block);
}
-static bool sdma_v5_2_is_idle(void *handle)
+static bool sdma_v5_2_is_idle(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
u32 i;
for (i = 0; i < adev->sdma.num_instances; i++) {
@@ -1361,11 +1437,11 @@ static bool sdma_v5_2_is_idle(void *handle)
return true;
}
-static int sdma_v5_2_wait_for_idle(void *handle)
+static int sdma_v5_2_wait_for_idle(struct amdgpu_ip_block *ip_block)
{
unsigned i;
u32 sdma0, sdma1, sdma2, sdma3;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
for (i = 0; i < adev->usec_timeout; i++) {
sdma0 = RREG32(sdma_v5_2_get_reg_offset(adev, 0, mmSDMA0_STATUS_REG));
@@ -1380,6 +1456,100 @@ static int sdma_v5_2_wait_for_idle(void *handle)
return -ETIMEDOUT;
}
+static int sdma_v5_2_reset_queue(struct amdgpu_ring *ring,
+ unsigned int vmid,
+ struct amdgpu_fence *timedout_fence)
+{
+ struct amdgpu_device *adev = ring->adev;
+ int r;
+
+ if (ring->me >= adev->sdma.num_instances) {
+ dev_err(adev->dev, "sdma instance not found\n");
+ return -EINVAL;
+ }
+
+ amdgpu_ring_reset_helper_begin(ring, timedout_fence);
+
+ amdgpu_amdkfd_suspend(adev, true);
+ r = amdgpu_sdma_reset_engine(adev, ring->me, true);
+ amdgpu_amdkfd_resume(adev, true);
+ if (r)
+ return r;
+
+ return amdgpu_ring_reset_helper_end(ring, timedout_fence);
+}
+
+static int sdma_v5_2_stop_queue(struct amdgpu_ring *ring)
+{
+ u32 f32_cntl, freeze, cntl, stat1_reg;
+ struct amdgpu_device *adev = ring->adev;
+ int i, j, r = 0;
+
+ if (amdgpu_sriov_vf(adev))
+ return -EINVAL;
+
+ i = ring->me;
+ amdgpu_gfx_rlc_enter_safe_mode(adev, 0);
+
+ /* stop queue */
+ sdma_v5_2_gfx_stop(adev, 1 << i);
+
+ /*engine stop SDMA1_F32_CNTL.HALT to 1 and SDMAx_FREEZE freeze bit to 1 */
+ freeze = RREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_FREEZE));
+ freeze = REG_SET_FIELD(freeze, SDMA0_FREEZE, FREEZE, 1);
+ WREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_FREEZE), freeze);
+
+ for (j = 0; j < adev->usec_timeout; j++) {
+ freeze = RREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_FREEZE));
+
+ if (REG_GET_FIELD(freeze, SDMA0_FREEZE, FROZEN) & 1)
+ break;
+ udelay(1);
+ }
+
+
+ if (j == adev->usec_timeout) {
+ stat1_reg = RREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_STATUS1_REG));
+ if ((stat1_reg & 0x3FF) != 0x3FF) {
+ DRM_ERROR("cannot soft reset as sdma not idle\n");
+ r = -ETIMEDOUT;
+ goto err0;
+ }
+ }
+
+ f32_cntl = RREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_F32_CNTL));
+ f32_cntl = REG_SET_FIELD(f32_cntl, SDMA0_F32_CNTL, HALT, 1);
+ WREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_F32_CNTL), f32_cntl);
+
+ cntl = RREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_CNTL));
+ cntl = REG_SET_FIELD(cntl, SDMA0_CNTL, UTC_L1_ENABLE, 0);
+ WREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_CNTL), cntl);
+
+err0:
+ amdgpu_gfx_rlc_exit_safe_mode(adev, 0);
+ return r;
+}
+
+static int sdma_v5_2_restore_queue(struct amdgpu_ring *ring)
+{
+ struct amdgpu_device *adev = ring->adev;
+ u32 inst_id = ring->me;
+ u32 freeze;
+ int r;
+
+ amdgpu_gfx_rlc_enter_safe_mode(adev, 0);
+ /* unfreeze and unhalt */
+ freeze = RREG32(sdma_v5_2_get_reg_offset(adev, inst_id, mmSDMA0_FREEZE));
+ freeze = REG_SET_FIELD(freeze, SDMA0_FREEZE, FREEZE, 0);
+ WREG32(sdma_v5_2_get_reg_offset(adev, inst_id, mmSDMA0_FREEZE), freeze);
+
+ r = sdma_v5_2_gfx_resume_instance(adev, inst_id, true);
+
+ amdgpu_gfx_rlc_exit_safe_mode(adev, 0);
+
+ return r;
+}
+
static int sdma_v5_2_ring_preempt_ib(struct amdgpu_ring *ring)
{
int i, r = 0;
@@ -1431,13 +1601,14 @@ static int sdma_v5_2_set_trap_irq_state(struct amdgpu_device *adev,
enum amdgpu_interrupt_state state)
{
u32 sdma_cntl;
-
u32 reg_offset = sdma_v5_2_get_reg_offset(adev, type, mmSDMA0_CNTL);
- sdma_cntl = RREG32(reg_offset);
- sdma_cntl = REG_SET_FIELD(sdma_cntl, SDMA0_CNTL, TRAP_ENABLE,
- state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0);
- WREG32(reg_offset, sdma_cntl);
+ if (!amdgpu_sriov_vf(adev)) {
+ sdma_cntl = RREG32(reg_offset);
+ sdma_cntl = REG_SET_FIELD(sdma_cntl, SDMA0_CNTL, TRAP_ENABLE,
+ state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0);
+ WREG32(reg_offset, sdma_cntl);
+ }
return 0;
}
@@ -1446,7 +1617,25 @@ static int sdma_v5_2_process_trap_irq(struct amdgpu_device *adev,
struct amdgpu_irq_src *source,
struct amdgpu_iv_entry *entry)
{
+ uint32_t mes_queue_id = entry->src_data[0];
+
DRM_DEBUG("IH: SDMA trap\n");
+
+ if (adev->enable_mes && (mes_queue_id & AMDGPU_FENCE_MES_QUEUE_FLAG)) {
+ struct amdgpu_mes_queue *queue;
+
+ mes_queue_id &= AMDGPU_FENCE_MES_QUEUE_ID_MASK;
+
+ spin_lock(&adev->mes.queue_id_lock);
+ queue = idr_find(&adev->mes.queue_id_idr, mes_queue_id);
+ if (queue) {
+ DRM_DEBUG("process smda queue id = %d\n", mes_queue_id);
+ amdgpu_fence_process(queue->ring);
+ }
+ spin_unlock(&adev->mes.queue_id_lock);
+ return 0;
+ }
+
switch (entry->client_id) {
case SOC15_IH_CLIENTID_SDMA0:
switch (entry->ring_id) {
@@ -1523,6 +1712,30 @@ static int sdma_v5_2_process_illegal_inst_irq(struct amdgpu_device *adev,
return 0;
}
+static bool sdma_v5_2_firmware_mgcg_support(struct amdgpu_device *adev,
+ int i)
+{
+ switch (amdgpu_ip_version(adev, SDMA0_HWIP, 0)) {
+ case IP_VERSION(5, 2, 1):
+ if (adev->sdma.instance[i].fw_version < 70)
+ return false;
+ break;
+ case IP_VERSION(5, 2, 3):
+ if (adev->sdma.instance[i].fw_version < 47)
+ return false;
+ break;
+ case IP_VERSION(5, 2, 7):
+ if (adev->sdma.instance[i].fw_version < 9)
+ return false;
+ break;
+ default:
+ return true;
+ }
+
+ return true;
+
+}
+
static void sdma_v5_2_update_medium_grain_clock_gating(struct amdgpu_device *adev,
bool enable)
{
@@ -1530,6 +1743,10 @@ static void sdma_v5_2_update_medium_grain_clock_gating(struct amdgpu_device *ade
int i;
for (i = 0; i < adev->sdma.num_instances; i++) {
+
+ if (!sdma_v5_2_firmware_mgcg_support(adev, i))
+ adev->cg_flags &= ~AMD_CG_SUPPORT_SDMA_MGCG;
+
if (enable && (adev->cg_flags & AMD_CG_SUPPORT_SDMA_MGCG)) {
/* Enable sdma clock gating */
def = data = RREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_CLK_CTRL));
@@ -1563,6 +1780,11 @@ static void sdma_v5_2_update_medium_grain_light_sleep(struct amdgpu_device *adev
int i;
for (i = 0; i < adev->sdma.num_instances; i++) {
+ if (adev->sdma.instance[i].fw_version < 70 &&
+ amdgpu_ip_version(adev, SDMA0_HWIP, 0) ==
+ IP_VERSION(5, 2, 1))
+ adev->cg_flags &= ~AMD_CG_SUPPORT_SDMA_LS;
+
if (enable && (adev->cg_flags & AMD_CG_SUPPORT_SDMA_LS)) {
/* Enable sdma mem light sleep */
def = data = RREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_POWER_CNTL));
@@ -1581,23 +1803,27 @@ static void sdma_v5_2_update_medium_grain_light_sleep(struct amdgpu_device *adev
}
}
-static int sdma_v5_2_set_clockgating_state(void *handle,
+static int sdma_v5_2_set_clockgating_state(struct amdgpu_ip_block *ip_block,
enum amd_clockgating_state state)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
if (amdgpu_sriov_vf(adev))
return 0;
- switch (adev->asic_type) {
- case CHIP_SIENNA_CICHLID:
- case CHIP_NAVY_FLOUNDER:
- case CHIP_VANGOGH:
- case CHIP_DIMGREY_CAVEFISH:
+ switch (amdgpu_ip_version(adev, SDMA0_HWIP, 0)) {
+ case IP_VERSION(5, 2, 0):
+ case IP_VERSION(5, 2, 2):
+ case IP_VERSION(5, 2, 1):
+ case IP_VERSION(5, 2, 4):
+ case IP_VERSION(5, 2, 5):
+ case IP_VERSION(5, 2, 6):
+ case IP_VERSION(5, 2, 3):
+ case IP_VERSION(5, 2, 7):
sdma_v5_2_update_medium_grain_clock_gating(adev,
- state == AMD_CG_STATE_GATE ? true : false);
+ state == AMD_CG_STATE_GATE);
sdma_v5_2_update_medium_grain_light_sleep(adev,
- state == AMD_CG_STATE_GATE ? true : false);
+ state == AMD_CG_STATE_GATE);
break;
default:
break;
@@ -1606,30 +1832,106 @@ static int sdma_v5_2_set_clockgating_state(void *handle,
return 0;
}
-static int sdma_v5_2_set_powergating_state(void *handle,
+static int sdma_v5_2_set_powergating_state(struct amdgpu_ip_block *ip_block,
enum amd_powergating_state state)
{
return 0;
}
-static void sdma_v5_2_get_clockgating_state(void *handle, u32 *flags)
+static void sdma_v5_2_get_clockgating_state(struct amdgpu_ip_block *ip_block, u64 *flags)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
int data;
if (amdgpu_sriov_vf(adev))
*flags = 0;
+ /* AMD_CG_SUPPORT_SDMA_MGCG */
+ data = RREG32(sdma_v5_2_get_reg_offset(adev, 0, mmSDMA0_CLK_CTRL));
+ if (!(data & SDMA0_CLK_CTRL__CGCG_EN_OVERRIDE_MASK))
+ *flags |= AMD_CG_SUPPORT_SDMA_MGCG;
+
/* AMD_CG_SUPPORT_SDMA_LS */
data = RREG32_KIQ(sdma_v5_2_get_reg_offset(adev, 0, mmSDMA0_POWER_CNTL));
if (data & SDMA0_POWER_CNTL__MEM_POWER_OVERRIDE_MASK)
*flags |= AMD_CG_SUPPORT_SDMA_LS;
}
-const struct amd_ip_funcs sdma_v5_2_ip_funcs = {
+static void sdma_v5_2_ring_begin_use(struct amdgpu_ring *ring)
+{
+ struct amdgpu_device *adev = ring->adev;
+
+ /* SDMA 5.2.3 (RMB) FW doesn't seem to properly
+ * disallow GFXOFF in some cases leading to
+ * hangs in SDMA. Disallow GFXOFF while SDMA is active.
+ * We can probably just limit this to 5.2.3,
+ * but it shouldn't hurt for other parts since
+ * this GFXOFF will be disallowed anyway when SDMA is
+ * active, this just makes it explicit.
+ * sdma_v5_2_ring_set_wptr() takes advantage of this
+ * to update the wptr because sometimes SDMA seems to miss
+ * doorbells when entering PG. If you remove this, update
+ * sdma_v5_2_ring_set_wptr() as well!
+ */
+ amdgpu_gfx_off_ctrl(adev, false);
+}
+
+static void sdma_v5_2_ring_end_use(struct amdgpu_ring *ring)
+{
+ struct amdgpu_device *adev = ring->adev;
+
+ /* SDMA 5.2.3 (RMB) FW doesn't seem to properly
+ * disallow GFXOFF in some cases leading to
+ * hangs in SDMA. Allow GFXOFF when SDMA is complete.
+ */
+ amdgpu_gfx_off_ctrl(adev, true);
+}
+
+static void sdma_v5_2_print_ip_state(struct amdgpu_ip_block *ip_block, struct drm_printer *p)
+{
+ struct amdgpu_device *adev = ip_block->adev;
+ int i, j;
+ uint32_t reg_count = ARRAY_SIZE(sdma_reg_list_5_2);
+ uint32_t instance_offset;
+
+ if (!adev->sdma.ip_dump)
+ return;
+
+ drm_printf(p, "num_instances:%d\n", adev->sdma.num_instances);
+ for (i = 0; i < adev->sdma.num_instances; i++) {
+ instance_offset = i * reg_count;
+ drm_printf(p, "\nInstance:%d\n", i);
+
+ for (j = 0; j < reg_count; j++)
+ drm_printf(p, "%-50s \t 0x%08x\n", sdma_reg_list_5_2[j].reg_name,
+ adev->sdma.ip_dump[instance_offset + j]);
+ }
+}
+
+static void sdma_v5_2_dump_ip_state(struct amdgpu_ip_block *ip_block)
+{
+ struct amdgpu_device *adev = ip_block->adev;
+ int i, j;
+ uint32_t instance_offset;
+ uint32_t reg_count = ARRAY_SIZE(sdma_reg_list_5_2);
+
+ if (!adev->sdma.ip_dump)
+ return;
+
+ amdgpu_gfx_off_ctrl(adev, false);
+ for (i = 0; i < adev->sdma.num_instances; i++) {
+ instance_offset = i * reg_count;
+ for (j = 0; j < reg_count; j++)
+ adev->sdma.ip_dump[instance_offset + j] =
+ RREG32(sdma_v5_2_get_reg_offset(adev, i,
+ sdma_reg_list_5_2[j].reg_offset));
+ }
+ amdgpu_gfx_off_ctrl(adev, true);
+}
+
+static const struct amd_ip_funcs sdma_v5_2_ip_funcs = {
.name = "sdma_v5_2",
.early_init = sdma_v5_2_early_init,
- .late_init = NULL,
.sw_init = sdma_v5_2_sw_init,
.sw_fini = sdma_v5_2_sw_fini,
.hw_init = sdma_v5_2_hw_init,
@@ -1642,6 +1944,8 @@ const struct amd_ip_funcs sdma_v5_2_ip_funcs = {
.set_clockgating_state = sdma_v5_2_set_clockgating_state,
.set_powergating_state = sdma_v5_2_set_powergating_state,
.get_clockgating_state = sdma_v5_2_get_clockgating_state,
+ .dump_ip_state = sdma_v5_2_dump_ip_state,
+ .print_ip_state = sdma_v5_2_print_ip_state,
};
static const struct amdgpu_ring_funcs sdma_v5_2_ring_funcs = {
@@ -1649,7 +1953,7 @@ static const struct amdgpu_ring_funcs sdma_v5_2_ring_funcs = {
.align_mask = 0xf,
.nop = SDMA_PKT_NOP_HEADER_OP(SDMA_OP_NOP),
.support_64bit_ptrs = true,
- .vmhub = AMDGPU_GFXHUB_0,
+ .secure_submission_supported = true,
.get_rptr = sdma_v5_2_ring_get_rptr,
.get_wptr = sdma_v5_2_ring_get_wptr,
.set_wptr = sdma_v5_2_ring_set_wptr,
@@ -1664,6 +1968,7 @@ static const struct amdgpu_ring_funcs sdma_v5_2_ring_funcs = {
10 + 10 + 10, /* sdma_v5_2_ring_emit_fence x3 for user fence, vm fence */
.emit_ib_size = 7 + 6, /* sdma_v5_2_ring_emit_ib */
.emit_ib = sdma_v5_2_ring_emit_ib,
+ .emit_mem_sync = sdma_v5_2_ring_emit_mem_sync,
.emit_fence = sdma_v5_2_ring_emit_fence,
.emit_pipeline_sync = sdma_v5_2_ring_emit_pipeline_sync,
.emit_vm_flush = sdma_v5_2_ring_emit_vm_flush,
@@ -1672,12 +1977,14 @@ static const struct amdgpu_ring_funcs sdma_v5_2_ring_funcs = {
.test_ib = sdma_v5_2_ring_test_ib,
.insert_nop = sdma_v5_2_ring_insert_nop,
.pad_ib = sdma_v5_2_ring_pad_ib,
+ .begin_use = sdma_v5_2_ring_begin_use,
+ .end_use = sdma_v5_2_ring_end_use,
.emit_wreg = sdma_v5_2_ring_emit_wreg,
.emit_reg_wait = sdma_v5_2_ring_emit_reg_wait,
.emit_reg_write_reg_wait = sdma_v5_2_ring_emit_reg_write_reg_wait,
.init_cond_exec = sdma_v5_2_ring_init_cond_exec,
- .patch_cond_exec = sdma_v5_2_ring_patch_cond_exec,
.preempt_ib = sdma_v5_2_ring_preempt_ib,
+ .reset = sdma_v5_2_reset_queue,
};
static void sdma_v5_2_set_ring_funcs(struct amdgpu_device *adev)
@@ -1714,7 +2021,7 @@ static void sdma_v5_2_set_irq_funcs(struct amdgpu_device *adev)
* @src_offset: src GPU address
* @dst_offset: dst GPU address
* @byte_count: number of bytes to xfer
- * @tmz: if a secure copy should be used
+ * @copy_flags: copy flags for the buffers
*
* Copy GPU buffers using the DMA engine.
* Used by the amdgpu ttm implementation to move pages if
@@ -1724,11 +2031,11 @@ static void sdma_v5_2_emit_copy_buffer(struct amdgpu_ib *ib,
uint64_t src_offset,
uint64_t dst_offset,
uint32_t byte_count,
- bool tmz)
+ uint32_t copy_flags)
{
ib->ptr[ib->length_dw++] = SDMA_PKT_HEADER_OP(SDMA_OP_COPY) |
SDMA_PKT_HEADER_SUB_OP(SDMA_SUBOP_COPY_LINEAR) |
- SDMA_PKT_COPY_LINEAR_HEADER_TMZ(tmz ? 1 : 0);
+ SDMA_PKT_COPY_LINEAR_HEADER_TMZ((copy_flags & AMDGPU_COPY_FLAGS_TMZ) ? 1 : 0);
ib->ptr[ib->length_dw++] = byte_count - 1;
ib->ptr[ib->length_dw++] = 0; /* src/dst endian swap */
ib->ptr[ib->length_dw++] = lower_32_bits(src_offset);
diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.h b/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.h
index b70414fef2a1..863145b3a77e 100644
--- a/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.h
+++ b/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.h
@@ -24,7 +24,6 @@
#ifndef __SDMA_V5_2_H__
#define __SDMA_V5_2_H__
-extern const struct amd_ip_funcs sdma_v5_2_ip_funcs;
extern const struct amdgpu_ip_block_version sdma_v5_2_ip_block;
#endif /* __SDMA_V5_2_H__ */
diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v6_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v6_0.c
new file mode 100644
index 000000000000..217040044987
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/sdma_v6_0.c
@@ -0,0 +1,1917 @@
+/*
+ * Copyright 2020 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#include <linux/delay.h>
+#include <linux/firmware.h>
+#include <linux/module.h>
+#include <linux/pci.h>
+
+#include "amdgpu.h"
+#include "amdgpu_ucode.h"
+#include "amdgpu_trace.h"
+
+#include "gc/gc_11_0_0_offset.h"
+#include "gc/gc_11_0_0_sh_mask.h"
+#include "gc/gc_11_0_0_default.h"
+#include "hdp/hdp_6_0_0_offset.h"
+#include "ivsrcid/gfx/irqsrcs_gfx_11_0_0.h"
+
+#include "soc15_common.h"
+#include "soc15.h"
+#include "sdma_v6_0_0_pkt_open.h"
+#include "nbio_v4_3.h"
+#include "sdma_common.h"
+#include "sdma_v6_0.h"
+#include "v11_structs.h"
+#include "mes_userqueue.h"
+#include "amdgpu_userq_fence.h"
+
+MODULE_FIRMWARE("amdgpu/sdma_6_0_0.bin");
+MODULE_FIRMWARE("amdgpu/sdma_6_0_1.bin");
+MODULE_FIRMWARE("amdgpu/sdma_6_0_2.bin");
+MODULE_FIRMWARE("amdgpu/sdma_6_0_3.bin");
+MODULE_FIRMWARE("amdgpu/sdma_6_1_0.bin");
+MODULE_FIRMWARE("amdgpu/sdma_6_1_1.bin");
+MODULE_FIRMWARE("amdgpu/sdma_6_1_2.bin");
+MODULE_FIRMWARE("amdgpu/sdma_6_1_3.bin");
+
+#define SDMA1_REG_OFFSET 0x600
+#define SDMA0_HYP_DEC_REG_START 0x5880
+#define SDMA0_HYP_DEC_REG_END 0x589a
+#define SDMA1_HYP_DEC_REG_OFFSET 0x20
+
+static const struct amdgpu_hwip_reg_entry sdma_reg_list_6_0[] = {
+ SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_STATUS_REG),
+ SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_STATUS1_REG),
+ SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_STATUS2_REG),
+ SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_STATUS3_REG),
+ SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_STATUS4_REG),
+ SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_STATUS5_REG),
+ SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_STATUS6_REG),
+ SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_UCODE_CHECKSUM),
+ SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_RB_RPTR_FETCH_HI),
+ SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_RB_RPTR_FETCH),
+ SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_UTCL1_RD_STATUS),
+ SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_UTCL1_WR_STATUS),
+ SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_UTCL1_RD_XNACK0),
+ SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_UTCL1_RD_XNACK1),
+ SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_UTCL1_WR_XNACK0),
+ SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_UTCL1_WR_XNACK1),
+ SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_QUEUE0_RB_CNTL),
+ SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_QUEUE0_RB_RPTR),
+ SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_QUEUE0_RB_RPTR_HI),
+ SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_QUEUE0_RB_WPTR),
+ SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_QUEUE0_RB_WPTR_HI),
+ SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_QUEUE0_IB_OFFSET),
+ SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_QUEUE0_IB_BASE_LO),
+ SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_QUEUE0_IB_BASE_HI),
+ SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_QUEUE0_IB_CNTL),
+ SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_QUEUE0_IB_RPTR),
+ SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_QUEUE0_IB_SUB_REMAIN),
+ SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_QUEUE0_DUMMY_REG),
+ SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_QUEUE_STATUS0),
+ SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_QUEUE1_RB_CNTL),
+ SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_QUEUE1_RB_RPTR),
+ SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_QUEUE1_RB_RPTR_HI),
+ SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_QUEUE1_RB_WPTR),
+ SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_QUEUE1_RB_WPTR_HI),
+ SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_QUEUE1_IB_OFFSET),
+ SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_QUEUE1_IB_BASE_LO),
+ SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_QUEUE1_IB_BASE_HI),
+ SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_QUEUE1_IB_RPTR),
+ SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_QUEUE1_IB_SUB_REMAIN),
+ SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_QUEUE1_DUMMY_REG),
+ SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_QUEUE2_RB_CNTL),
+ SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_QUEUE2_RB_RPTR),
+ SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_QUEUE2_RB_RPTR_HI),
+ SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_QUEUE2_RB_WPTR),
+ SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_QUEUE2_RB_WPTR_HI),
+ SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_QUEUE2_IB_OFFSET),
+ SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_QUEUE2_IB_BASE_LO),
+ SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_QUEUE2_IB_BASE_HI),
+ SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_QUEUE2_IB_RPTR),
+ SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_QUEUE2_IB_SUB_REMAIN),
+ SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_QUEUE2_DUMMY_REG),
+ SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_INT_STATUS),
+ SOC15_REG_ENTRY_STR(GC, 0, regGRBM_STATUS2),
+ SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_CHICKEN_BITS),
+};
+
+static void sdma_v6_0_set_ring_funcs(struct amdgpu_device *adev);
+static void sdma_v6_0_set_buffer_funcs(struct amdgpu_device *adev);
+static void sdma_v6_0_set_vm_pte_funcs(struct amdgpu_device *adev);
+static void sdma_v6_0_set_irq_funcs(struct amdgpu_device *adev);
+static int sdma_v6_0_start(struct amdgpu_device *adev);
+
+static u32 sdma_v6_0_get_reg_offset(struct amdgpu_device *adev, u32 instance, u32 internal_offset)
+{
+ u32 base;
+
+ if (internal_offset >= SDMA0_HYP_DEC_REG_START &&
+ internal_offset <= SDMA0_HYP_DEC_REG_END) {
+ base = adev->reg_offset[GC_HWIP][0][1];
+ if (instance != 0)
+ internal_offset += SDMA1_HYP_DEC_REG_OFFSET * instance;
+ } else {
+ base = adev->reg_offset[GC_HWIP][0][0];
+ if (instance == 1)
+ internal_offset += SDMA1_REG_OFFSET;
+ }
+
+ return base + internal_offset;
+}
+
+static unsigned sdma_v6_0_ring_init_cond_exec(struct amdgpu_ring *ring,
+ uint64_t addr)
+{
+ unsigned ret;
+
+ amdgpu_ring_write(ring, SDMA_PKT_COPY_LINEAR_HEADER_OP(SDMA_OP_COND_EXE));
+ amdgpu_ring_write(ring, lower_32_bits(addr));
+ amdgpu_ring_write(ring, upper_32_bits(addr));
+ amdgpu_ring_write(ring, 1);
+ /* this is the offset we need patch later */
+ ret = ring->wptr & ring->buf_mask;
+ /* insert dummy here and patch it later */
+ amdgpu_ring_write(ring, 0);
+
+ return ret;
+}
+
+/**
+ * sdma_v6_0_ring_get_rptr - get the current read pointer
+ *
+ * @ring: amdgpu ring pointer
+ *
+ * Get the current rptr from the hardware.
+ */
+static uint64_t sdma_v6_0_ring_get_rptr(struct amdgpu_ring *ring)
+{
+ u64 *rptr;
+
+ /* XXX check if swapping is necessary on BE */
+ rptr = (u64 *)ring->rptr_cpu_addr;
+
+ DRM_DEBUG("rptr before shift == 0x%016llx\n", *rptr);
+ return ((*rptr) >> 2);
+}
+
+/**
+ * sdma_v6_0_ring_get_wptr - get the current write pointer
+ *
+ * @ring: amdgpu ring pointer
+ *
+ * Get the current wptr from the hardware.
+ */
+static uint64_t sdma_v6_0_ring_get_wptr(struct amdgpu_ring *ring)
+{
+ u64 wptr = 0;
+
+ if (ring->use_doorbell) {
+ /* XXX check if swapping is necessary on BE */
+ wptr = READ_ONCE(*((u64 *)ring->wptr_cpu_addr));
+ DRM_DEBUG("wptr/doorbell before shift == 0x%016llx\n", wptr);
+ }
+
+ return wptr >> 2;
+}
+
+/**
+ * sdma_v6_0_ring_set_wptr - commit the write pointer
+ *
+ * @ring: amdgpu ring pointer
+ *
+ * Write the wptr back to the hardware.
+ */
+static void sdma_v6_0_ring_set_wptr(struct amdgpu_ring *ring)
+{
+ struct amdgpu_device *adev = ring->adev;
+
+ if (ring->use_doorbell) {
+ DRM_DEBUG("Using doorbell -- "
+ "wptr_offs == 0x%08x "
+ "lower_32_bits(ring->wptr) << 2 == 0x%08x "
+ "upper_32_bits(ring->wptr) << 2 == 0x%08x\n",
+ ring->wptr_offs,
+ lower_32_bits(ring->wptr << 2),
+ upper_32_bits(ring->wptr << 2));
+ /* XXX check if swapping is necessary on BE */
+ atomic64_set((atomic64_t *)ring->wptr_cpu_addr,
+ ring->wptr << 2);
+ DRM_DEBUG("calling WDOORBELL64(0x%08x, 0x%016llx)\n",
+ ring->doorbell_index, ring->wptr << 2);
+ WDOORBELL64(ring->doorbell_index, ring->wptr << 2);
+ } else {
+ DRM_DEBUG("Not using doorbell -- "
+ "regSDMA%i_GFX_RB_WPTR == 0x%08x "
+ "regSDMA%i_GFX_RB_WPTR_HI == 0x%08x\n",
+ ring->me,
+ lower_32_bits(ring->wptr << 2),
+ ring->me,
+ upper_32_bits(ring->wptr << 2));
+ WREG32_SOC15_IP(GC, sdma_v6_0_get_reg_offset(adev,
+ ring->me, regSDMA0_QUEUE0_RB_WPTR),
+ lower_32_bits(ring->wptr << 2));
+ WREG32_SOC15_IP(GC, sdma_v6_0_get_reg_offset(adev,
+ ring->me, regSDMA0_QUEUE0_RB_WPTR_HI),
+ upper_32_bits(ring->wptr << 2));
+ }
+}
+
+static void sdma_v6_0_ring_insert_nop(struct amdgpu_ring *ring, uint32_t count)
+{
+ struct amdgpu_sdma_instance *sdma = amdgpu_sdma_get_instance_from_ring(ring);
+ int i;
+
+ for (i = 0; i < count; i++)
+ if (sdma && sdma->burst_nop && (i == 0))
+ amdgpu_ring_write(ring, ring->funcs->nop |
+ SDMA_PKT_NOP_HEADER_COUNT(count - 1));
+ else
+ amdgpu_ring_write(ring, ring->funcs->nop);
+}
+
+/*
+ * sdma_v6_0_ring_emit_ib - Schedule an IB on the DMA engine
+ *
+ * @ring: amdgpu ring pointer
+ * @ib: IB object to schedule
+ * @flags: unused
+ * @job: job to retrieve vmid from
+ *
+ * Schedule an IB in the DMA ring.
+ */
+static void sdma_v6_0_ring_emit_ib(struct amdgpu_ring *ring,
+ struct amdgpu_job *job,
+ struct amdgpu_ib *ib,
+ uint32_t flags)
+{
+ unsigned vmid = AMDGPU_JOB_GET_VMID(job);
+ uint64_t csa_mc_addr = amdgpu_sdma_get_csa_mc_addr(ring, vmid);
+
+ /* An IB packet must end on a 8 DW boundary--the next dword
+ * must be on a 8-dword boundary. Our IB packet below is 6
+ * dwords long, thus add x number of NOPs, such that, in
+ * modular arithmetic,
+ * wptr + 6 + x = 8k, k >= 0, which in C is,
+ * (wptr + 6 + x) % 8 = 0.
+ * The expression below, is a solution of x.
+ */
+ sdma_v6_0_ring_insert_nop(ring, (2 - lower_32_bits(ring->wptr)) & 7);
+
+ amdgpu_ring_write(ring, SDMA_PKT_COPY_LINEAR_HEADER_OP(SDMA_OP_INDIRECT) |
+ SDMA_PKT_INDIRECT_HEADER_VMID(vmid & 0xf));
+ /* base must be 32 byte aligned */
+ amdgpu_ring_write(ring, lower_32_bits(ib->gpu_addr) & 0xffffffe0);
+ amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr));
+ amdgpu_ring_write(ring, ib->length_dw);
+ amdgpu_ring_write(ring, lower_32_bits(csa_mc_addr));
+ amdgpu_ring_write(ring, upper_32_bits(csa_mc_addr));
+}
+
+/**
+ * sdma_v6_0_ring_emit_mem_sync - flush the IB by graphics cache rinse
+ *
+ * @ring: amdgpu ring pointer
+ *
+ * flush the IB by graphics cache rinse.
+ */
+static void sdma_v6_0_ring_emit_mem_sync(struct amdgpu_ring *ring)
+{
+ uint32_t gcr_cntl = SDMA_GCR_GL2_INV | SDMA_GCR_GL2_WB | SDMA_GCR_GLM_INV |
+ SDMA_GCR_GL1_INV | SDMA_GCR_GLV_INV | SDMA_GCR_GLK_INV |
+ SDMA_GCR_GLI_INV(1);
+
+ /* flush entire cache L0/L1/L2, this can be optimized by performance requirement */
+ amdgpu_ring_write(ring, SDMA_PKT_COPY_LINEAR_HEADER_OP(SDMA_OP_GCR_REQ));
+ amdgpu_ring_write(ring, SDMA_PKT_GCR_REQ_PAYLOAD1_BASE_VA_31_7(0));
+ amdgpu_ring_write(ring, SDMA_PKT_GCR_REQ_PAYLOAD2_GCR_CONTROL_15_0(gcr_cntl) |
+ SDMA_PKT_GCR_REQ_PAYLOAD2_BASE_VA_47_32(0));
+ amdgpu_ring_write(ring, SDMA_PKT_GCR_REQ_PAYLOAD3_LIMIT_VA_31_7(0) |
+ SDMA_PKT_GCR_REQ_PAYLOAD3_GCR_CONTROL_18_16(gcr_cntl >> 16));
+ amdgpu_ring_write(ring, SDMA_PKT_GCR_REQ_PAYLOAD4_LIMIT_VA_47_32(0) |
+ SDMA_PKT_GCR_REQ_PAYLOAD4_VMID(0));
+}
+
+
+/**
+ * sdma_v6_0_ring_emit_hdp_flush - emit an hdp flush on the DMA ring
+ *
+ * @ring: amdgpu ring pointer
+ *
+ * Emit an hdp flush packet on the requested DMA ring.
+ */
+static void sdma_v6_0_ring_emit_hdp_flush(struct amdgpu_ring *ring)
+{
+ struct amdgpu_device *adev = ring->adev;
+ u32 ref_and_mask = 0;
+ const struct nbio_hdp_flush_reg *nbio_hf_reg = adev->nbio.hdp_flush_reg;
+
+ ref_and_mask = nbio_hf_reg->ref_and_mask_sdma0 << ring->me;
+
+ amdgpu_ring_write(ring, SDMA_PKT_COPY_LINEAR_HEADER_OP(SDMA_OP_POLL_REGMEM) |
+ SDMA_PKT_POLL_REGMEM_HEADER_HDP_FLUSH(1) |
+ SDMA_PKT_POLL_REGMEM_HEADER_FUNC(3)); /* == */
+ amdgpu_ring_write(ring, (adev->nbio.funcs->get_hdp_flush_done_offset(adev)) << 2);
+ amdgpu_ring_write(ring, (adev->nbio.funcs->get_hdp_flush_req_offset(adev)) << 2);
+ amdgpu_ring_write(ring, ref_and_mask); /* reference */
+ amdgpu_ring_write(ring, ref_and_mask); /* mask */
+ amdgpu_ring_write(ring, SDMA_PKT_POLL_REGMEM_DW5_RETRY_COUNT(0xfff) |
+ SDMA_PKT_POLL_REGMEM_DW5_INTERVAL(10)); /* retry count, poll interval */
+}
+
+/**
+ * sdma_v6_0_ring_emit_fence - emit a fence on the DMA ring
+ *
+ * @ring: amdgpu ring pointer
+ * @addr: address
+ * @seq: fence seq number
+ * @flags: fence flags
+ *
+ * Add a DMA fence packet to the ring to write
+ * the fence seq number and DMA trap packet to generate
+ * an interrupt if needed.
+ */
+static void sdma_v6_0_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, u64 seq,
+ unsigned flags)
+{
+ bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT;
+ /* write the fence */
+ amdgpu_ring_write(ring, SDMA_PKT_COPY_LINEAR_HEADER_OP(SDMA_OP_FENCE) |
+ SDMA_PKT_FENCE_HEADER_MTYPE(0x3)); /* Ucached(UC) */
+ /* zero in first two bits */
+ BUG_ON(addr & 0x3);
+ amdgpu_ring_write(ring, lower_32_bits(addr));
+ amdgpu_ring_write(ring, upper_32_bits(addr));
+ amdgpu_ring_write(ring, lower_32_bits(seq));
+
+ /* optionally write high bits as well */
+ if (write64bit) {
+ addr += 4;
+ amdgpu_ring_write(ring, SDMA_PKT_COPY_LINEAR_HEADER_OP(SDMA_OP_FENCE) |
+ SDMA_PKT_FENCE_HEADER_MTYPE(0x3));
+ /* zero in first two bits */
+ BUG_ON(addr & 0x3);
+ amdgpu_ring_write(ring, lower_32_bits(addr));
+ amdgpu_ring_write(ring, upper_32_bits(addr));
+ amdgpu_ring_write(ring, upper_32_bits(seq));
+ }
+
+ if (flags & AMDGPU_FENCE_FLAG_INT) {
+ /* generate an interrupt */
+ amdgpu_ring_write(ring, SDMA_PKT_COPY_LINEAR_HEADER_OP(SDMA_OP_TRAP));
+ amdgpu_ring_write(ring, SDMA_PKT_TRAP_INT_CONTEXT_INT_CONTEXT(0));
+ }
+}
+
+/**
+ * sdma_v6_0_gfx_stop - stop the gfx async dma engines
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * Stop the gfx async dma ring buffers.
+ */
+static void sdma_v6_0_gfx_stop(struct amdgpu_device *adev)
+{
+ u32 rb_cntl, ib_cntl;
+ int i;
+
+ for (i = 0; i < adev->sdma.num_instances; i++) {
+ rb_cntl = RREG32_SOC15_IP(GC, sdma_v6_0_get_reg_offset(adev, i, regSDMA0_QUEUE0_RB_CNTL));
+ rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_QUEUE0_RB_CNTL, RB_ENABLE, 0);
+ WREG32_SOC15_IP(GC, sdma_v6_0_get_reg_offset(adev, i, regSDMA0_QUEUE0_RB_CNTL), rb_cntl);
+ ib_cntl = RREG32_SOC15_IP(GC, sdma_v6_0_get_reg_offset(adev, i, regSDMA0_QUEUE0_IB_CNTL));
+ ib_cntl = REG_SET_FIELD(ib_cntl, SDMA0_QUEUE0_IB_CNTL, IB_ENABLE, 0);
+ WREG32_SOC15_IP(GC, sdma_v6_0_get_reg_offset(adev, i, regSDMA0_QUEUE0_IB_CNTL), ib_cntl);
+ }
+}
+
+/**
+ * sdma_v6_0_rlc_stop - stop the compute async dma engines
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * Stop the compute async dma queues.
+ */
+static void sdma_v6_0_rlc_stop(struct amdgpu_device *adev)
+{
+ /* XXX todo */
+}
+
+/**
+ * sdma_v6_0_ctxempty_int_enable - enable or disable context empty interrupts
+ *
+ * @adev: amdgpu_device pointer
+ * @enable: enable/disable context switching due to queue empty conditions
+ *
+ * Enable or disable the async dma engines queue empty context switch.
+ */
+static void sdma_v6_0_ctxempty_int_enable(struct amdgpu_device *adev, bool enable)
+{
+ u32 f32_cntl;
+ int i;
+
+ if (!amdgpu_sriov_vf(adev)) {
+ for (i = 0; i < adev->sdma.num_instances; i++) {
+ f32_cntl = RREG32(sdma_v6_0_get_reg_offset(adev, i, regSDMA0_CNTL));
+ f32_cntl = REG_SET_FIELD(f32_cntl, SDMA0_CNTL,
+ CTXEMPTY_INT_ENABLE, enable ? 1 : 0);
+ WREG32(sdma_v6_0_get_reg_offset(adev, i, regSDMA0_CNTL), f32_cntl);
+ }
+ }
+}
+
+/**
+ * sdma_v6_0_enable - stop the async dma engines
+ *
+ * @adev: amdgpu_device pointer
+ * @enable: enable/disable the DMA MEs.
+ *
+ * Halt or unhalt the async dma engines.
+ */
+static void sdma_v6_0_enable(struct amdgpu_device *adev, bool enable)
+{
+ u32 f32_cntl;
+ int i;
+
+ if (!enable) {
+ sdma_v6_0_gfx_stop(adev);
+ sdma_v6_0_rlc_stop(adev);
+ }
+
+ if (amdgpu_sriov_vf(adev))
+ return;
+
+ for (i = 0; i < adev->sdma.num_instances; i++) {
+ f32_cntl = RREG32_SOC15_IP(GC, sdma_v6_0_get_reg_offset(adev, i, regSDMA0_F32_CNTL));
+ f32_cntl = REG_SET_FIELD(f32_cntl, SDMA0_F32_CNTL, HALT, enable ? 0 : 1);
+ WREG32_SOC15_IP(GC, sdma_v6_0_get_reg_offset(adev, i, regSDMA0_F32_CNTL), f32_cntl);
+ }
+}
+
+/**
+ * sdma_v6_0_gfx_resume_instance - start/restart a certain sdma engine
+ *
+ * @adev: amdgpu_device pointer
+ * @i: instance
+ * @restore: used to restore wptr when restart
+ *
+ * Set up the gfx DMA ring buffers and enable them. On restart, we will restore wptr and rptr.
+ * Return 0 for success.
+ */
+static int sdma_v6_0_gfx_resume_instance(struct amdgpu_device *adev, int i, bool restore)
+{
+ struct amdgpu_ring *ring;
+ u32 rb_cntl, ib_cntl;
+ u32 rb_bufsz;
+ u32 doorbell;
+ u32 doorbell_offset;
+ u32 temp;
+ u64 wptr_gpu_addr;
+
+ ring = &adev->sdma.instance[i].ring;
+ if (!amdgpu_sriov_vf(adev))
+ WREG32_SOC15_IP(GC, sdma_v6_0_get_reg_offset(adev, i, regSDMA0_SEM_WAIT_FAIL_TIMER_CNTL), 0);
+
+ /* Set ring buffer size in dwords */
+ rb_bufsz = order_base_2(ring->ring_size / 4);
+ rb_cntl = RREG32_SOC15_IP(GC, sdma_v6_0_get_reg_offset(adev, i, regSDMA0_QUEUE0_RB_CNTL));
+ rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_QUEUE0_RB_CNTL, RB_SIZE, rb_bufsz);
+#ifdef __BIG_ENDIAN
+ rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_QUEUE0_RB_CNTL, RB_SWAP_ENABLE, 1);
+ rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_QUEUE0_RB_CNTL,
+ RPTR_WRITEBACK_SWAP_ENABLE, 1);
+#endif
+ rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_QUEUE0_RB_CNTL, RB_PRIV, 1);
+ WREG32_SOC15_IP(GC, sdma_v6_0_get_reg_offset(adev, i, regSDMA0_QUEUE0_RB_CNTL), rb_cntl);
+
+ /* Initialize the ring buffer's read and write pointers */
+ if (restore) {
+ WREG32_SOC15_IP(GC, sdma_v6_0_get_reg_offset(adev, i, regSDMA0_QUEUE0_RB_RPTR), lower_32_bits(ring->wptr << 2));
+ WREG32_SOC15_IP(GC, sdma_v6_0_get_reg_offset(adev, i, regSDMA0_QUEUE0_RB_RPTR_HI), upper_32_bits(ring->wptr << 2));
+ WREG32_SOC15_IP(GC, sdma_v6_0_get_reg_offset(adev, i, regSDMA0_QUEUE0_RB_WPTR), lower_32_bits(ring->wptr << 2));
+ WREG32_SOC15_IP(GC, sdma_v6_0_get_reg_offset(adev, i, regSDMA0_QUEUE0_RB_WPTR_HI), upper_32_bits(ring->wptr << 2));
+ } else {
+ WREG32_SOC15_IP(GC, sdma_v6_0_get_reg_offset(adev, i, regSDMA0_QUEUE0_RB_RPTR), 0);
+ WREG32_SOC15_IP(GC, sdma_v6_0_get_reg_offset(adev, i, regSDMA0_QUEUE0_RB_RPTR_HI), 0);
+ WREG32_SOC15_IP(GC, sdma_v6_0_get_reg_offset(adev, i, regSDMA0_QUEUE0_RB_WPTR), 0);
+ WREG32_SOC15_IP(GC, sdma_v6_0_get_reg_offset(adev, i, regSDMA0_QUEUE0_RB_WPTR_HI), 0);
+ }
+ /* setup the wptr shadow polling */
+ wptr_gpu_addr = ring->wptr_gpu_addr;
+ WREG32_SOC15_IP(GC, sdma_v6_0_get_reg_offset(adev, i, regSDMA0_QUEUE0_RB_WPTR_POLL_ADDR_LO),
+ lower_32_bits(wptr_gpu_addr));
+ WREG32_SOC15_IP(GC, sdma_v6_0_get_reg_offset(adev, i, regSDMA0_QUEUE0_RB_WPTR_POLL_ADDR_HI),
+ upper_32_bits(wptr_gpu_addr));
+
+ /* set the wb address whether it's enabled or not */
+ WREG32_SOC15_IP(GC, sdma_v6_0_get_reg_offset(adev, i, regSDMA0_QUEUE0_RB_RPTR_ADDR_HI),
+ upper_32_bits(ring->rptr_gpu_addr) & 0xFFFFFFFF);
+ WREG32_SOC15_IP(GC, sdma_v6_0_get_reg_offset(adev, i, regSDMA0_QUEUE0_RB_RPTR_ADDR_LO),
+ lower_32_bits(ring->rptr_gpu_addr) & 0xFFFFFFFC);
+
+ rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_QUEUE0_RB_CNTL, RPTR_WRITEBACK_ENABLE, 1);
+ rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_QUEUE0_RB_CNTL, WPTR_POLL_ENABLE, 0);
+ rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_QUEUE0_RB_CNTL, F32_WPTR_POLL_ENABLE, 1);
+
+ WREG32_SOC15_IP(GC, sdma_v6_0_get_reg_offset(adev, i, regSDMA0_QUEUE0_RB_BASE), ring->gpu_addr >> 8);
+ WREG32_SOC15_IP(GC, sdma_v6_0_get_reg_offset(adev, i, regSDMA0_QUEUE0_RB_BASE_HI), ring->gpu_addr >> 40);
+
+ if (!restore)
+ ring->wptr = 0;
+
+ /* before programing wptr to a less value, need set minor_ptr_update first */
+ WREG32_SOC15_IP(GC, sdma_v6_0_get_reg_offset(adev, i, regSDMA0_QUEUE0_MINOR_PTR_UPDATE), 1);
+
+ if (!amdgpu_sriov_vf(adev)) { /* only bare-metal use register write for wptr */
+ WREG32_SOC15_IP(GC, sdma_v6_0_get_reg_offset(adev, i, regSDMA0_QUEUE0_RB_WPTR), lower_32_bits(ring->wptr) << 2);
+ WREG32_SOC15_IP(GC, sdma_v6_0_get_reg_offset(adev, i, regSDMA0_QUEUE0_RB_WPTR_HI), upper_32_bits(ring->wptr) << 2);
+ }
+
+ doorbell = RREG32_SOC15_IP(GC, sdma_v6_0_get_reg_offset(adev, i, regSDMA0_QUEUE0_DOORBELL));
+ doorbell_offset = RREG32_SOC15_IP(GC, sdma_v6_0_get_reg_offset(adev, i, regSDMA0_QUEUE0_DOORBELL_OFFSET));
+
+ if (ring->use_doorbell) {
+ doorbell = REG_SET_FIELD(doorbell, SDMA0_QUEUE0_DOORBELL, ENABLE, 1);
+ doorbell_offset = REG_SET_FIELD(doorbell_offset, SDMA0_QUEUE0_DOORBELL_OFFSET,
+ OFFSET, ring->doorbell_index);
+ } else {
+ doorbell = REG_SET_FIELD(doorbell, SDMA0_QUEUE0_DOORBELL, ENABLE, 0);
+ }
+ WREG32_SOC15_IP(GC, sdma_v6_0_get_reg_offset(adev, i, regSDMA0_QUEUE0_DOORBELL), doorbell);
+ WREG32_SOC15_IP(GC, sdma_v6_0_get_reg_offset(adev, i, regSDMA0_QUEUE0_DOORBELL_OFFSET), doorbell_offset);
+
+ if (i == 0)
+ adev->nbio.funcs->sdma_doorbell_range(adev, i, ring->use_doorbell,
+ ring->doorbell_index,
+ adev->doorbell_index.sdma_doorbell_range * adev->sdma.num_instances);
+
+ if (amdgpu_sriov_vf(adev))
+ sdma_v6_0_ring_set_wptr(ring);
+
+ /* set minor_ptr_update to 0 after wptr programed */
+ WREG32_SOC15_IP(GC, sdma_v6_0_get_reg_offset(adev, i, regSDMA0_QUEUE0_MINOR_PTR_UPDATE), 0);
+
+ /* Set up sdma hang watchdog */
+ temp = RREG32_SOC15_IP(GC, sdma_v6_0_get_reg_offset(adev, i, regSDMA0_WATCHDOG_CNTL));
+ /* 100ms per unit */
+ temp = REG_SET_FIELD(temp, SDMA0_WATCHDOG_CNTL, QUEUE_HANG_COUNT,
+ max(adev->usec_timeout/100000, 1));
+ WREG32_SOC15_IP(GC, sdma_v6_0_get_reg_offset(adev, i, regSDMA0_WATCHDOG_CNTL), temp);
+
+ /* Set up RESP_MODE to non-copy addresses */
+ temp = RREG32_SOC15_IP(GC, sdma_v6_0_get_reg_offset(adev, i, regSDMA0_UTCL1_CNTL));
+ temp = REG_SET_FIELD(temp, SDMA0_UTCL1_CNTL, RESP_MODE, 3);
+ temp = REG_SET_FIELD(temp, SDMA0_UTCL1_CNTL, REDO_DELAY, 9);
+ WREG32_SOC15_IP(GC, sdma_v6_0_get_reg_offset(adev, i, regSDMA0_UTCL1_CNTL), temp);
+
+ /* program default cache read and write policy */
+ temp = RREG32_SOC15_IP(GC, sdma_v6_0_get_reg_offset(adev, i, regSDMA0_UTCL1_PAGE));
+ /* clean read policy and write policy bits */
+ temp &= 0xFF0FFF;
+ temp |= ((CACHE_READ_POLICY_L2__DEFAULT << 12) |
+ (CACHE_WRITE_POLICY_L2__DEFAULT << 14) |
+ SDMA0_UTCL1_PAGE__LLC_NOALLOC_MASK);
+ WREG32_SOC15_IP(GC, sdma_v6_0_get_reg_offset(adev, i, regSDMA0_UTCL1_PAGE), temp);
+
+ if (!amdgpu_sriov_vf(adev)) {
+ /* unhalt engine */
+ temp = RREG32_SOC15_IP(GC, sdma_v6_0_get_reg_offset(adev, i, regSDMA0_F32_CNTL));
+ temp = REG_SET_FIELD(temp, SDMA0_F32_CNTL, HALT, 0);
+ temp = REG_SET_FIELD(temp, SDMA0_F32_CNTL, TH1_RESET, 0);
+ WREG32_SOC15_IP(GC, sdma_v6_0_get_reg_offset(adev, i, regSDMA0_F32_CNTL), temp);
+ }
+
+ /* enable DMA RB */
+ rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_QUEUE0_RB_CNTL, RB_ENABLE, 1);
+ WREG32_SOC15_IP(GC, sdma_v6_0_get_reg_offset(adev, i, regSDMA0_QUEUE0_RB_CNTL), rb_cntl);
+
+ ib_cntl = RREG32_SOC15_IP(GC, sdma_v6_0_get_reg_offset(adev, i, regSDMA0_QUEUE0_IB_CNTL));
+ ib_cntl = REG_SET_FIELD(ib_cntl, SDMA0_QUEUE0_IB_CNTL, IB_ENABLE, 1);
+#ifdef __BIG_ENDIAN
+ ib_cntl = REG_SET_FIELD(ib_cntl, SDMA0_QUEUE0_IB_CNTL, IB_SWAP_ENABLE, 1);
+#endif
+ /* enable DMA IBs */
+ WREG32_SOC15_IP(GC, sdma_v6_0_get_reg_offset(adev, i, regSDMA0_QUEUE0_IB_CNTL), ib_cntl);
+
+ if (amdgpu_sriov_vf(adev))
+ sdma_v6_0_enable(adev, true);
+
+ return amdgpu_ring_test_helper(ring);
+}
+
+/**
+ * sdma_v6_0_gfx_resume - setup and start the async dma engines
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * Set up the gfx DMA ring buffers and enable them.
+ * Returns 0 for success, error for failure.
+ */
+static int sdma_v6_0_gfx_resume(struct amdgpu_device *adev)
+{
+ int i, r;
+
+ for (i = 0; i < adev->sdma.num_instances; i++) {
+ r = sdma_v6_0_gfx_resume_instance(adev, i, false);
+ if (r)
+ return r;
+ }
+
+ return 0;
+}
+
+/**
+ * sdma_v6_0_rlc_resume - setup and start the async dma engines
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * Set up the compute DMA queues and enable them.
+ * Returns 0 for success, error for failure.
+ */
+static int sdma_v6_0_rlc_resume(struct amdgpu_device *adev)
+{
+ return 0;
+}
+
+/**
+ * sdma_v6_0_load_microcode - load the sDMA ME ucode
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * Loads the sDMA0/1 ucode.
+ * Returns 0 for success, -EINVAL if the ucode is not available.
+ */
+static int sdma_v6_0_load_microcode(struct amdgpu_device *adev)
+{
+ const struct sdma_firmware_header_v2_0 *hdr;
+ const __le32 *fw_data;
+ u32 fw_size;
+ int i, j;
+ bool use_broadcast;
+
+ /* halt the MEs */
+ sdma_v6_0_enable(adev, false);
+
+ if (!adev->sdma.instance[0].fw)
+ return -EINVAL;
+
+ /* use broadcast mode to load SDMA microcode by default */
+ use_broadcast = true;
+
+ if (use_broadcast) {
+ dev_info(adev->dev, "Use broadcast method to load SDMA firmware\n");
+ /* load Control Thread microcode */
+ hdr = (const struct sdma_firmware_header_v2_0 *)adev->sdma.instance[0].fw->data;
+ amdgpu_ucode_print_sdma_hdr(&hdr->header);
+ fw_size = le32_to_cpu(hdr->ctx_jt_offset + hdr->ctx_jt_size) / 4;
+
+ fw_data = (const __le32 *)
+ (adev->sdma.instance[0].fw->data +
+ le32_to_cpu(hdr->header.ucode_array_offset_bytes));
+
+ WREG32(sdma_v6_0_get_reg_offset(adev, 0, regSDMA0_BROADCAST_UCODE_ADDR), 0);
+
+ for (j = 0; j < fw_size; j++) {
+ if (amdgpu_emu_mode == 1 && j % 500 == 0)
+ msleep(1);
+ WREG32(sdma_v6_0_get_reg_offset(adev, 0, regSDMA0_BROADCAST_UCODE_DATA), le32_to_cpup(fw_data++));
+ }
+
+ /* load Context Switch microcode */
+ fw_size = le32_to_cpu(hdr->ctl_jt_offset + hdr->ctl_jt_size) / 4;
+
+ fw_data = (const __le32 *)
+ (adev->sdma.instance[0].fw->data +
+ le32_to_cpu(hdr->ctl_ucode_offset));
+
+ WREG32(sdma_v6_0_get_reg_offset(adev, 0, regSDMA0_BROADCAST_UCODE_ADDR), 0x8000);
+
+ for (j = 0; j < fw_size; j++) {
+ if (amdgpu_emu_mode == 1 && j % 500 == 0)
+ msleep(1);
+ WREG32(sdma_v6_0_get_reg_offset(adev, 0, regSDMA0_BROADCAST_UCODE_DATA), le32_to_cpup(fw_data++));
+ }
+ } else {
+ dev_info(adev->dev, "Use legacy method to load SDMA firmware\n");
+ for (i = 0; i < adev->sdma.num_instances; i++) {
+ /* load Control Thread microcode */
+ hdr = (const struct sdma_firmware_header_v2_0 *)adev->sdma.instance[0].fw->data;
+ amdgpu_ucode_print_sdma_hdr(&hdr->header);
+ fw_size = le32_to_cpu(hdr->ctx_jt_offset + hdr->ctx_jt_size) / 4;
+
+ fw_data = (const __le32 *)
+ (adev->sdma.instance[0].fw->data +
+ le32_to_cpu(hdr->header.ucode_array_offset_bytes));
+
+ WREG32(sdma_v6_0_get_reg_offset(adev, i, regSDMA0_UCODE_ADDR), 0);
+
+ for (j = 0; j < fw_size; j++) {
+ if (amdgpu_emu_mode == 1 && j % 500 == 0)
+ msleep(1);
+ WREG32(sdma_v6_0_get_reg_offset(adev, i, regSDMA0_UCODE_DATA), le32_to_cpup(fw_data++));
+ }
+
+ WREG32(sdma_v6_0_get_reg_offset(adev, i, regSDMA0_UCODE_ADDR), adev->sdma.instance[0].fw_version);
+
+ /* load Context Switch microcode */
+ fw_size = le32_to_cpu(hdr->ctl_jt_offset + hdr->ctl_jt_size) / 4;
+
+ fw_data = (const __le32 *)
+ (adev->sdma.instance[0].fw->data +
+ le32_to_cpu(hdr->ctl_ucode_offset));
+
+ WREG32(sdma_v6_0_get_reg_offset(adev, i, regSDMA0_UCODE_ADDR), 0x8000);
+
+ for (j = 0; j < fw_size; j++) {
+ if (amdgpu_emu_mode == 1 && j % 500 == 0)
+ msleep(1);
+ WREG32(sdma_v6_0_get_reg_offset(adev, i, regSDMA0_UCODE_DATA), le32_to_cpup(fw_data++));
+ }
+
+ WREG32(sdma_v6_0_get_reg_offset(adev, i, regSDMA0_UCODE_ADDR), adev->sdma.instance[0].fw_version);
+ }
+ }
+
+ return 0;
+}
+
+static int sdma_v6_0_soft_reset(struct amdgpu_ip_block *ip_block)
+{
+ struct amdgpu_device *adev = ip_block->adev;
+ u32 tmp;
+ int i;
+
+ sdma_v6_0_gfx_stop(adev);
+
+ for (i = 0; i < adev->sdma.num_instances; i++) {
+ tmp = RREG32_SOC15_IP(GC, sdma_v6_0_get_reg_offset(adev, i, regSDMA0_FREEZE));
+ tmp |= SDMA0_FREEZE__FREEZE_MASK;
+ WREG32_SOC15_IP(GC, sdma_v6_0_get_reg_offset(adev, i, regSDMA0_FREEZE), tmp);
+ tmp = RREG32_SOC15_IP(GC, sdma_v6_0_get_reg_offset(adev, i, regSDMA0_F32_CNTL));
+ tmp |= SDMA0_F32_CNTL__HALT_MASK;
+ tmp |= SDMA0_F32_CNTL__TH1_RESET_MASK;
+ WREG32_SOC15_IP(GC, sdma_v6_0_get_reg_offset(adev, i, regSDMA0_F32_CNTL), tmp);
+
+ WREG32_SOC15_IP(GC, sdma_v6_0_get_reg_offset(adev, i, regSDMA0_QUEUE0_PREEMPT), 0);
+
+ udelay(100);
+
+ tmp = GRBM_SOFT_RESET__SOFT_RESET_SDMA0_MASK << i;
+ WREG32_SOC15(GC, 0, regGRBM_SOFT_RESET, tmp);
+ tmp = RREG32_SOC15(GC, 0, regGRBM_SOFT_RESET);
+
+ udelay(100);
+
+ WREG32_SOC15(GC, 0, regGRBM_SOFT_RESET, 0);
+ tmp = RREG32_SOC15(GC, 0, regGRBM_SOFT_RESET);
+
+ udelay(100);
+ }
+
+ return sdma_v6_0_start(adev);
+}
+
+static bool sdma_v6_0_check_soft_reset(struct amdgpu_ip_block *ip_block)
+{
+ struct amdgpu_device *adev = ip_block->adev;
+ struct amdgpu_ring *ring;
+ int i, r;
+ long tmo = msecs_to_jiffies(1000);
+
+ for (i = 0; i < adev->sdma.num_instances; i++) {
+ ring = &adev->sdma.instance[i].ring;
+ r = amdgpu_ring_test_ib(ring, tmo);
+ if (r)
+ return true;
+ }
+
+ return false;
+}
+
+/**
+ * sdma_v6_0_start - setup and start the async dma engines
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * Set up the DMA engines and enable them.
+ * Returns 0 for success, error for failure.
+ */
+static int sdma_v6_0_start(struct amdgpu_device *adev)
+{
+ int r = 0;
+
+ if (amdgpu_sriov_vf(adev)) {
+ sdma_v6_0_enable(adev, false);
+
+ /* set RB registers */
+ r = sdma_v6_0_gfx_resume(adev);
+ return r;
+ }
+
+ if (adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT) {
+ r = sdma_v6_0_load_microcode(adev);
+ if (r)
+ return r;
+
+ /* The value of regSDMA_F32_CNTL is invalid the moment after loading fw */
+ if (amdgpu_emu_mode == 1)
+ msleep(1000);
+ }
+
+ /* unhalt the MEs */
+ sdma_v6_0_enable(adev, true);
+ /* enable sdma ring preemption */
+ sdma_v6_0_ctxempty_int_enable(adev, true);
+
+ /* start the gfx rings and rlc compute queues */
+ r = sdma_v6_0_gfx_resume(adev);
+ if (r)
+ return r;
+ r = sdma_v6_0_rlc_resume(adev);
+
+ return r;
+}
+
+static int sdma_v6_0_mqd_init(struct amdgpu_device *adev, void *mqd,
+ struct amdgpu_mqd_prop *prop)
+{
+ struct v11_sdma_mqd *m = mqd;
+ uint64_t wb_gpu_addr;
+
+ m->sdmax_rlcx_rb_cntl =
+ order_base_2(prop->queue_size / 4) << SDMA0_QUEUE0_RB_CNTL__RB_SIZE__SHIFT |
+ 1 << SDMA0_QUEUE0_RB_CNTL__RPTR_WRITEBACK_ENABLE__SHIFT |
+ 4 << SDMA0_QUEUE0_RB_CNTL__RPTR_WRITEBACK_TIMER__SHIFT |
+ 1 << SDMA0_QUEUE0_RB_CNTL__F32_WPTR_POLL_ENABLE__SHIFT;
+
+ m->sdmax_rlcx_rb_base = lower_32_bits(prop->hqd_base_gpu_addr >> 8);
+ m->sdmax_rlcx_rb_base_hi = upper_32_bits(prop->hqd_base_gpu_addr >> 8);
+
+ wb_gpu_addr = prop->wptr_gpu_addr;
+ m->sdmax_rlcx_rb_wptr_poll_addr_lo = lower_32_bits(wb_gpu_addr);
+ m->sdmax_rlcx_rb_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr);
+
+ wb_gpu_addr = prop->rptr_gpu_addr;
+ m->sdmax_rlcx_rb_rptr_addr_lo = lower_32_bits(wb_gpu_addr);
+ m->sdmax_rlcx_rb_rptr_addr_hi = upper_32_bits(wb_gpu_addr);
+
+ m->sdmax_rlcx_ib_cntl = RREG32_SOC15_IP(GC, sdma_v6_0_get_reg_offset(adev, 0,
+ regSDMA0_QUEUE0_IB_CNTL));
+
+ m->sdmax_rlcx_doorbell_offset =
+ prop->doorbell_index << SDMA0_QUEUE0_DOORBELL_OFFSET__OFFSET__SHIFT;
+
+ m->sdmax_rlcx_doorbell = REG_SET_FIELD(0, SDMA0_QUEUE0_DOORBELL, ENABLE, 1);
+
+ m->sdmax_rlcx_skip_cntl = 0;
+ m->sdmax_rlcx_context_status = 0;
+ m->sdmax_rlcx_doorbell_log = 0;
+
+ m->sdmax_rlcx_rb_aql_cntl = regSDMA0_QUEUE0_RB_AQL_CNTL_DEFAULT;
+ m->sdmax_rlcx_dummy_reg = regSDMA0_QUEUE0_DUMMY_REG_DEFAULT;
+
+ m->sdmax_rlcx_csa_addr_lo = lower_32_bits(prop->csa_addr);
+ m->sdmax_rlcx_csa_addr_hi = upper_32_bits(prop->csa_addr);
+
+ m->sdmax_rlcx_f32_dbg0 = lower_32_bits(prop->fence_address);
+ m->sdmax_rlcx_f32_dbg1 = upper_32_bits(prop->fence_address);
+
+ return 0;
+}
+
+static void sdma_v6_0_set_mqd_funcs(struct amdgpu_device *adev)
+{
+ adev->mqds[AMDGPU_HW_IP_DMA].mqd_size = sizeof(struct v11_sdma_mqd);
+ adev->mqds[AMDGPU_HW_IP_DMA].init_mqd = sdma_v6_0_mqd_init;
+}
+
+/**
+ * sdma_v6_0_ring_test_ring - simple async dma engine test
+ *
+ * @ring: amdgpu_ring structure holding ring information
+ *
+ * Test the DMA engine by writing using it to write an
+ * value to memory.
+ * Returns 0 for success, error for failure.
+ */
+static int sdma_v6_0_ring_test_ring(struct amdgpu_ring *ring)
+{
+ struct amdgpu_device *adev = ring->adev;
+ unsigned i;
+ unsigned index;
+ int r;
+ u32 tmp;
+ u64 gpu_addr;
+
+ tmp = 0xCAFEDEAD;
+
+ r = amdgpu_device_wb_get(adev, &index);
+ if (r) {
+ dev_err(adev->dev, "(%d) failed to allocate wb slot\n", r);
+ return r;
+ }
+
+ gpu_addr = adev->wb.gpu_addr + (index * 4);
+ adev->wb.wb[index] = cpu_to_le32(tmp);
+
+ r = amdgpu_ring_alloc(ring, 5);
+ if (r) {
+ DRM_ERROR("amdgpu: dma failed to lock ring %d (%d).\n", ring->idx, r);
+ amdgpu_device_wb_free(adev, index);
+ return r;
+ }
+
+ amdgpu_ring_write(ring, SDMA_PKT_COPY_LINEAR_HEADER_OP(SDMA_OP_WRITE) |
+ SDMA_PKT_COPY_LINEAR_HEADER_SUB_OP(SDMA_SUBOP_WRITE_LINEAR));
+ amdgpu_ring_write(ring, lower_32_bits(gpu_addr));
+ amdgpu_ring_write(ring, upper_32_bits(gpu_addr));
+ amdgpu_ring_write(ring, SDMA_PKT_WRITE_UNTILED_DW_3_COUNT(0));
+ amdgpu_ring_write(ring, 0xDEADBEEF);
+ amdgpu_ring_commit(ring);
+
+ for (i = 0; i < adev->usec_timeout; i++) {
+ tmp = le32_to_cpu(adev->wb.wb[index]);
+ if (tmp == 0xDEADBEEF)
+ break;
+ if (amdgpu_emu_mode == 1)
+ msleep(1);
+ else
+ udelay(1);
+ }
+
+ if (i >= adev->usec_timeout)
+ r = -ETIMEDOUT;
+
+ amdgpu_device_wb_free(adev, index);
+
+ return r;
+}
+
+/*
+ * sdma_v6_0_ring_test_ib - test an IB on the DMA engine
+ *
+ * @ring: amdgpu_ring structure holding ring information
+ * @timeout: timeout value in jiffies, or MAX_SCHEDULE_TIMEOUT
+ *
+ * Test a simple IB in the DMA ring.
+ * Returns 0 on success, error on failure.
+ */
+static int sdma_v6_0_ring_test_ib(struct amdgpu_ring *ring, long timeout)
+{
+ struct amdgpu_device *adev = ring->adev;
+ struct amdgpu_ib ib;
+ struct dma_fence *f = NULL;
+ unsigned index;
+ long r;
+ u32 tmp = 0;
+ u64 gpu_addr;
+
+ tmp = 0xCAFEDEAD;
+ memset(&ib, 0, sizeof(ib));
+
+ r = amdgpu_device_wb_get(adev, &index);
+ if (r) {
+ dev_err(adev->dev, "(%ld) failed to allocate wb slot\n", r);
+ return r;
+ }
+
+ gpu_addr = adev->wb.gpu_addr + (index * 4);
+ adev->wb.wb[index] = cpu_to_le32(tmp);
+
+ r = amdgpu_ib_get(adev, NULL, 256, AMDGPU_IB_POOL_DIRECT, &ib);
+ if (r) {
+ DRM_ERROR("amdgpu: failed to get ib (%ld).\n", r);
+ goto err0;
+ }
+
+ ib.ptr[0] = SDMA_PKT_COPY_LINEAR_HEADER_OP(SDMA_OP_WRITE) |
+ SDMA_PKT_COPY_LINEAR_HEADER_SUB_OP(SDMA_SUBOP_WRITE_LINEAR);
+ ib.ptr[1] = lower_32_bits(gpu_addr);
+ ib.ptr[2] = upper_32_bits(gpu_addr);
+ ib.ptr[3] = SDMA_PKT_WRITE_UNTILED_DW_3_COUNT(0);
+ ib.ptr[4] = 0xDEADBEEF;
+ ib.ptr[5] = SDMA_PKT_NOP_HEADER_OP(SDMA_OP_NOP);
+ ib.ptr[6] = SDMA_PKT_NOP_HEADER_OP(SDMA_OP_NOP);
+ ib.ptr[7] = SDMA_PKT_NOP_HEADER_OP(SDMA_OP_NOP);
+ ib.length_dw = 8;
+
+ r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f);
+ if (r)
+ goto err1;
+
+ r = dma_fence_wait_timeout(f, false, timeout);
+ if (r == 0) {
+ DRM_ERROR("amdgpu: IB test timed out\n");
+ r = -ETIMEDOUT;
+ goto err1;
+ } else if (r < 0) {
+ DRM_ERROR("amdgpu: fence wait failed (%ld).\n", r);
+ goto err1;
+ }
+
+ tmp = le32_to_cpu(adev->wb.wb[index]);
+
+ if (tmp == 0xDEADBEEF)
+ r = 0;
+ else
+ r = -EINVAL;
+
+err1:
+ amdgpu_ib_free(&ib, NULL);
+ dma_fence_put(f);
+err0:
+ amdgpu_device_wb_free(adev, index);
+ return r;
+}
+
+
+/**
+ * sdma_v6_0_vm_copy_pte - update PTEs by copying them from the GART
+ *
+ * @ib: indirect buffer to fill with commands
+ * @pe: addr of the page entry
+ * @src: src addr to copy from
+ * @count: number of page entries to update
+ *
+ * Update PTEs by copying them from the GART using sDMA.
+ */
+static void sdma_v6_0_vm_copy_pte(struct amdgpu_ib *ib,
+ uint64_t pe, uint64_t src,
+ unsigned count)
+{
+ unsigned bytes = count * 8;
+
+ ib->ptr[ib->length_dw++] = SDMA_PKT_COPY_LINEAR_HEADER_OP(SDMA_OP_COPY) |
+ SDMA_PKT_COPY_LINEAR_HEADER_SUB_OP(SDMA_SUBOP_COPY_LINEAR);
+ ib->ptr[ib->length_dw++] = bytes - 1;
+ ib->ptr[ib->length_dw++] = 0; /* src/dst endian swap */
+ ib->ptr[ib->length_dw++] = lower_32_bits(src);
+ ib->ptr[ib->length_dw++] = upper_32_bits(src);
+ ib->ptr[ib->length_dw++] = lower_32_bits(pe);
+ ib->ptr[ib->length_dw++] = upper_32_bits(pe);
+
+}
+
+/**
+ * sdma_v6_0_vm_write_pte - update PTEs by writing them manually
+ *
+ * @ib: indirect buffer to fill with commands
+ * @pe: addr of the page entry
+ * @value: dst addr to write into pe
+ * @count: number of page entries to update
+ * @incr: increase next addr by incr bytes
+ *
+ * Update PTEs by writing them manually using sDMA.
+ */
+static void sdma_v6_0_vm_write_pte(struct amdgpu_ib *ib, uint64_t pe,
+ uint64_t value, unsigned count,
+ uint32_t incr)
+{
+ unsigned ndw = count * 2;
+
+ ib->ptr[ib->length_dw++] = SDMA_PKT_COPY_LINEAR_HEADER_OP(SDMA_OP_WRITE) |
+ SDMA_PKT_COPY_LINEAR_HEADER_SUB_OP(SDMA_SUBOP_WRITE_LINEAR);
+ ib->ptr[ib->length_dw++] = lower_32_bits(pe);
+ ib->ptr[ib->length_dw++] = upper_32_bits(pe);
+ ib->ptr[ib->length_dw++] = ndw - 1;
+ for (; ndw > 0; ndw -= 2) {
+ ib->ptr[ib->length_dw++] = lower_32_bits(value);
+ ib->ptr[ib->length_dw++] = upper_32_bits(value);
+ value += incr;
+ }
+}
+
+/**
+ * sdma_v6_0_vm_set_pte_pde - update the page tables using sDMA
+ *
+ * @ib: indirect buffer to fill with commands
+ * @pe: addr of the page entry
+ * @addr: dst addr to write into pe
+ * @count: number of page entries to update
+ * @incr: increase next addr by incr bytes
+ * @flags: access flags
+ *
+ * Update the page tables using sDMA.
+ */
+static void sdma_v6_0_vm_set_pte_pde(struct amdgpu_ib *ib,
+ uint64_t pe,
+ uint64_t addr, unsigned count,
+ uint32_t incr, uint64_t flags)
+{
+ /* for physically contiguous pages (vram) */
+ ib->ptr[ib->length_dw++] = SDMA_PKT_COPY_LINEAR_HEADER_OP(SDMA_OP_PTEPDE);
+ ib->ptr[ib->length_dw++] = lower_32_bits(pe); /* dst addr */
+ ib->ptr[ib->length_dw++] = upper_32_bits(pe);
+ ib->ptr[ib->length_dw++] = lower_32_bits(flags); /* mask */
+ ib->ptr[ib->length_dw++] = upper_32_bits(flags);
+ ib->ptr[ib->length_dw++] = lower_32_bits(addr); /* value */
+ ib->ptr[ib->length_dw++] = upper_32_bits(addr);
+ ib->ptr[ib->length_dw++] = incr; /* increment size */
+ ib->ptr[ib->length_dw++] = 0;
+ ib->ptr[ib->length_dw++] = count - 1; /* number of entries */
+}
+
+/*
+ * sdma_v6_0_ring_pad_ib - pad the IB
+ * @ib: indirect buffer to fill with padding
+ * @ring: amdgpu ring pointer
+ *
+ * Pad the IB with NOPs to a boundary multiple of 8.
+ */
+static void sdma_v6_0_ring_pad_ib(struct amdgpu_ring *ring, struct amdgpu_ib *ib)
+{
+ struct amdgpu_sdma_instance *sdma = amdgpu_sdma_get_instance_from_ring(ring);
+ u32 pad_count;
+ int i;
+
+ pad_count = (-ib->length_dw) & 0x7;
+ for (i = 0; i < pad_count; i++)
+ if (sdma && sdma->burst_nop && (i == 0))
+ ib->ptr[ib->length_dw++] =
+ SDMA_PKT_COPY_LINEAR_HEADER_OP(SDMA_OP_NOP) |
+ SDMA_PKT_NOP_HEADER_COUNT(pad_count - 1);
+ else
+ ib->ptr[ib->length_dw++] =
+ SDMA_PKT_COPY_LINEAR_HEADER_OP(SDMA_OP_NOP);
+}
+
+/**
+ * sdma_v6_0_ring_emit_pipeline_sync - sync the pipeline
+ *
+ * @ring: amdgpu_ring pointer
+ *
+ * Make sure all previous operations are completed (CIK).
+ */
+static void sdma_v6_0_ring_emit_pipeline_sync(struct amdgpu_ring *ring)
+{
+ uint32_t seq = ring->fence_drv.sync_seq;
+ uint64_t addr = ring->fence_drv.gpu_addr;
+
+ /* wait for idle */
+ amdgpu_ring_write(ring, SDMA_PKT_COPY_LINEAR_HEADER_OP(SDMA_OP_POLL_REGMEM) |
+ SDMA_PKT_POLL_REGMEM_HEADER_HDP_FLUSH(0) |
+ SDMA_PKT_POLL_REGMEM_HEADER_FUNC(3) | /* equal */
+ SDMA_PKT_POLL_REGMEM_HEADER_MEM_POLL(1));
+ amdgpu_ring_write(ring, addr & 0xfffffffc);
+ amdgpu_ring_write(ring, upper_32_bits(addr) & 0xffffffff);
+ amdgpu_ring_write(ring, seq); /* reference */
+ amdgpu_ring_write(ring, 0xffffffff); /* mask */
+ amdgpu_ring_write(ring, SDMA_PKT_POLL_REGMEM_DW5_RETRY_COUNT(0xfff) |
+ SDMA_PKT_POLL_REGMEM_DW5_INTERVAL(4)); /* retry count, poll interval */
+}
+
+/*
+ * sdma_v6_0_ring_emit_vm_flush - vm flush using sDMA
+ *
+ * @ring: amdgpu_ring pointer
+ * @vmid: vmid number to use
+ * @pd_addr: address
+ *
+ * Update the page table base and flush the VM TLB
+ * using sDMA.
+ */
+static void sdma_v6_0_ring_emit_vm_flush(struct amdgpu_ring *ring,
+ unsigned vmid, uint64_t pd_addr)
+{
+ struct amdgpu_vmhub *hub = &ring->adev->vmhub[ring->vm_hub];
+ uint32_t req = hub->vmhub_funcs->get_invalidate_req(vmid, 0);
+
+ /* Update the PD address for this VMID. */
+ amdgpu_ring_emit_wreg(ring, hub->ctx0_ptb_addr_lo32 +
+ (hub->ctx_addr_distance * vmid),
+ lower_32_bits(pd_addr));
+ amdgpu_ring_emit_wreg(ring, hub->ctx0_ptb_addr_hi32 +
+ (hub->ctx_addr_distance * vmid),
+ upper_32_bits(pd_addr));
+
+ /* Trigger invalidation. */
+ amdgpu_ring_write(ring,
+ SDMA_PKT_VM_INVALIDATION_HEADER_OP(SDMA_OP_POLL_REGMEM) |
+ SDMA_PKT_VM_INVALIDATION_HEADER_SUB_OP(SDMA_SUBOP_VM_INVALIDATION) |
+ SDMA_PKT_VM_INVALIDATION_HEADER_GFX_ENG_ID(ring->vm_inv_eng) |
+ SDMA_PKT_VM_INVALIDATION_HEADER_MM_ENG_ID(0x1f));
+ amdgpu_ring_write(ring, req);
+ amdgpu_ring_write(ring, 0xFFFFFFFF);
+ amdgpu_ring_write(ring,
+ SDMA_PKT_VM_INVALIDATION_ADDRESSRANGEHI_INVALIDATEACK(1 << vmid) |
+ SDMA_PKT_VM_INVALIDATION_ADDRESSRANGEHI_ADDRESSRANGEHI(0x1F));
+}
+
+static void sdma_v6_0_ring_emit_wreg(struct amdgpu_ring *ring,
+ uint32_t reg, uint32_t val)
+{
+ amdgpu_ring_write(ring, SDMA_PKT_COPY_LINEAR_HEADER_OP(SDMA_OP_SRBM_WRITE) |
+ SDMA_PKT_SRBM_WRITE_HEADER_BYTE_EN(0xf));
+ amdgpu_ring_write(ring, reg);
+ amdgpu_ring_write(ring, val);
+}
+
+static void sdma_v6_0_ring_emit_reg_wait(struct amdgpu_ring *ring, uint32_t reg,
+ uint32_t val, uint32_t mask)
+{
+ amdgpu_ring_write(ring, SDMA_PKT_COPY_LINEAR_HEADER_OP(SDMA_OP_POLL_REGMEM) |
+ SDMA_PKT_POLL_REGMEM_HEADER_HDP_FLUSH(0) |
+ SDMA_PKT_POLL_REGMEM_HEADER_FUNC(3)); /* equal */
+ amdgpu_ring_write(ring, reg << 2);
+ amdgpu_ring_write(ring, 0);
+ amdgpu_ring_write(ring, val); /* reference */
+ amdgpu_ring_write(ring, mask); /* mask */
+ amdgpu_ring_write(ring, SDMA_PKT_POLL_REGMEM_DW5_RETRY_COUNT(0xfff) |
+ SDMA_PKT_POLL_REGMEM_DW5_INTERVAL(10));
+}
+
+static void sdma_v6_0_ring_emit_reg_write_reg_wait(struct amdgpu_ring *ring,
+ uint32_t reg0, uint32_t reg1,
+ uint32_t ref, uint32_t mask)
+{
+ amdgpu_ring_emit_wreg(ring, reg0, ref);
+ /* wait for a cycle to reset vm_inv_eng*_ack */
+ amdgpu_ring_emit_reg_wait(ring, reg0, 0, 0);
+ amdgpu_ring_emit_reg_wait(ring, reg1, mask, mask);
+}
+
+static struct amdgpu_sdma_ras sdma_v6_0_3_ras = {
+ .ras_block = {
+ .ras_late_init = amdgpu_ras_block_late_init,
+ },
+};
+
+static void sdma_v6_0_set_ras_funcs(struct amdgpu_device *adev)
+{
+ switch (amdgpu_ip_version(adev, SDMA0_HWIP, 0)) {
+ case IP_VERSION(6, 0, 3):
+ adev->sdma.ras = &sdma_v6_0_3_ras;
+ break;
+ default:
+ break;
+ }
+}
+
+static int sdma_v6_0_early_init(struct amdgpu_ip_block *ip_block)
+{
+ struct amdgpu_device *adev = ip_block->adev;
+ int r;
+
+ switch (amdgpu_user_queue) {
+ case -1:
+ case 0:
+ default:
+ adev->sdma.no_user_submission = false;
+ adev->sdma.disable_uq = true;
+ break;
+ case 1:
+ adev->sdma.no_user_submission = false;
+ adev->sdma.disable_uq = false;
+ break;
+ case 2:
+ adev->sdma.no_user_submission = true;
+ adev->sdma.disable_uq = false;
+ break;
+ }
+
+ r = amdgpu_sdma_init_microcode(adev, 0, true);
+ if (r)
+ return r;
+
+ sdma_v6_0_set_ring_funcs(adev);
+ sdma_v6_0_set_buffer_funcs(adev);
+ sdma_v6_0_set_vm_pte_funcs(adev);
+ sdma_v6_0_set_irq_funcs(adev);
+ sdma_v6_0_set_mqd_funcs(adev);
+ sdma_v6_0_set_ras_funcs(adev);
+
+ return 0;
+}
+
+static int sdma_v6_0_sw_init(struct amdgpu_ip_block *ip_block)
+{
+ struct amdgpu_ring *ring;
+ int r, i;
+ struct amdgpu_device *adev = ip_block->adev;
+ uint32_t reg_count = ARRAY_SIZE(sdma_reg_list_6_0);
+ uint32_t *ptr;
+
+ /* SDMA trap event */
+ r = amdgpu_irq_add_id(adev, SOC21_IH_CLIENTID_GFX,
+ GFX_11_0_0__SRCID__SDMA_TRAP,
+ &adev->sdma.trap_irq);
+ if (r)
+ return r;
+
+ /* SDMA user fence event */
+ r = amdgpu_irq_add_id(adev, SOC21_IH_CLIENTID_GFX,
+ GFX_11_0_0__SRCID__SDMA_FENCE,
+ &adev->sdma.fence_irq);
+ if (r)
+ return r;
+
+ for (i = 0; i < adev->sdma.num_instances; i++) {
+ ring = &adev->sdma.instance[i].ring;
+ ring->ring_obj = NULL;
+ ring->use_doorbell = true;
+ ring->me = i;
+ ring->no_user_submission = adev->sdma.no_user_submission;
+
+ DRM_DEBUG("SDMA %d use_doorbell being set to: [%s]\n", i,
+ ring->use_doorbell?"true":"false");
+
+ ring->doorbell_index =
+ (adev->doorbell_index.sdma_engine[i] << 1); // get DWORD offset
+
+ ring->vm_hub = AMDGPU_GFXHUB(0);
+ sprintf(ring->name, "sdma%d", i);
+ r = amdgpu_ring_init(adev, ring, 1024,
+ &adev->sdma.trap_irq,
+ AMDGPU_SDMA_IRQ_INSTANCE0 + i,
+ AMDGPU_RING_PRIO_DEFAULT, NULL);
+ if (r)
+ return r;
+ }
+
+ adev->sdma.supported_reset =
+ amdgpu_get_soft_full_reset_mask(&adev->sdma.instance[0].ring);
+ switch (amdgpu_ip_version(adev, SDMA0_HWIP, 0)) {
+ case IP_VERSION(6, 0, 0):
+ case IP_VERSION(6, 0, 2):
+ case IP_VERSION(6, 0, 3):
+ if ((adev->sdma.instance[0].fw_version >= 21) &&
+ !amdgpu_sriov_vf(adev) &&
+ !adev->debug_disable_gpu_ring_reset)
+ adev->sdma.supported_reset |= AMDGPU_RESET_TYPE_PER_QUEUE;
+ break;
+ default:
+ break;
+ }
+
+ if (amdgpu_sdma_ras_sw_init(adev)) {
+ dev_err(adev->dev, "Failed to initialize sdma ras block!\n");
+ return -EINVAL;
+ }
+
+ /* Allocate memory for SDMA IP Dump buffer */
+ ptr = kcalloc(adev->sdma.num_instances * reg_count, sizeof(uint32_t), GFP_KERNEL);
+ if (ptr)
+ adev->sdma.ip_dump = ptr;
+ else
+ DRM_ERROR("Failed to allocated memory for SDMA IP Dump\n");
+
+ switch (amdgpu_ip_version(adev, SDMA0_HWIP, 0)) {
+ case IP_VERSION(6, 0, 0):
+ if ((adev->sdma.instance[0].fw_version >= 27) && !adev->sdma.disable_uq)
+ adev->userq_funcs[AMDGPU_HW_IP_DMA] = &userq_mes_funcs;
+ break;
+ case IP_VERSION(6, 0, 1):
+ if ((adev->sdma.instance[0].fw_version >= 18) && !adev->sdma.disable_uq)
+ adev->userq_funcs[AMDGPU_HW_IP_DMA] = &userq_mes_funcs;
+ break;
+ case IP_VERSION(6, 0, 2):
+ if ((adev->sdma.instance[0].fw_version >= 23) && !adev->sdma.disable_uq)
+ adev->userq_funcs[AMDGPU_HW_IP_DMA] = &userq_mes_funcs;
+ break;
+ case IP_VERSION(6, 0, 3):
+ if (adev->sdma.instance[0].fw_version >= 29 && !adev->sdma.disable_uq)
+ adev->userq_funcs[AMDGPU_HW_IP_DMA] = &userq_mes_funcs;
+ break;
+ case IP_VERSION(6, 1, 0):
+ if ((adev->sdma.instance[0].fw_version >= 14) && !adev->sdma.disable_uq)
+ adev->userq_funcs[AMDGPU_HW_IP_DMA] = &userq_mes_funcs;
+ break;
+ case IP_VERSION(6, 1, 1):
+ if ((adev->sdma.instance[0].fw_version >= 17) && !adev->sdma.disable_uq)
+ adev->userq_funcs[AMDGPU_HW_IP_DMA] = &userq_mes_funcs;
+ break;
+ case IP_VERSION(6, 1, 2):
+ if ((adev->sdma.instance[0].fw_version >= 15) && !adev->sdma.disable_uq)
+ adev->userq_funcs[AMDGPU_HW_IP_DMA] = &userq_mes_funcs;
+ break;
+ case IP_VERSION(6, 1, 3):
+ if ((adev->sdma.instance[0].fw_version >= 10) && !adev->sdma.disable_uq)
+ adev->userq_funcs[AMDGPU_HW_IP_DMA] = &userq_mes_funcs;
+ break;
+ default:
+ break;
+ }
+
+ r = amdgpu_sdma_sysfs_reset_mask_init(adev);
+ if (r)
+ return r;
+
+ return r;
+}
+
+static int sdma_v6_0_sw_fini(struct amdgpu_ip_block *ip_block)
+{
+ struct amdgpu_device *adev = ip_block->adev;
+ int i;
+
+ for (i = 0; i < adev->sdma.num_instances; i++)
+ amdgpu_ring_fini(&adev->sdma.instance[i].ring);
+
+ amdgpu_sdma_sysfs_reset_mask_fini(adev);
+ amdgpu_sdma_destroy_inst_ctx(adev, true);
+
+ kfree(adev->sdma.ip_dump);
+
+ return 0;
+}
+
+static int sdma_v6_0_set_userq_trap_interrupts(struct amdgpu_device *adev,
+ bool enable)
+{
+ unsigned int irq_type;
+ int i, r;
+
+ if (adev->userq_funcs[AMDGPU_HW_IP_DMA]) {
+ for (i = 0; i < adev->sdma.num_instances; i++) {
+ irq_type = AMDGPU_SDMA_IRQ_INSTANCE0 + i;
+ if (enable)
+ r = amdgpu_irq_get(adev, &adev->sdma.trap_irq,
+ irq_type);
+ else
+ r = amdgpu_irq_put(adev, &adev->sdma.trap_irq,
+ irq_type);
+ if (r)
+ return r;
+ }
+ }
+
+ return 0;
+}
+
+static int sdma_v6_0_hw_init(struct amdgpu_ip_block *ip_block)
+{
+ struct amdgpu_device *adev = ip_block->adev;
+ int r;
+
+ r = sdma_v6_0_start(adev);
+ if (r)
+ return r;
+
+ return sdma_v6_0_set_userq_trap_interrupts(adev, true);
+}
+
+static int sdma_v6_0_hw_fini(struct amdgpu_ip_block *ip_block)
+{
+ struct amdgpu_device *adev = ip_block->adev;
+
+ if (amdgpu_sriov_vf(adev))
+ return 0;
+
+ sdma_v6_0_ctxempty_int_enable(adev, false);
+ sdma_v6_0_enable(adev, false);
+ sdma_v6_0_set_userq_trap_interrupts(adev, false);
+
+ return 0;
+}
+
+static int sdma_v6_0_suspend(struct amdgpu_ip_block *ip_block)
+{
+ return sdma_v6_0_hw_fini(ip_block);
+}
+
+static int sdma_v6_0_resume(struct amdgpu_ip_block *ip_block)
+{
+ return sdma_v6_0_hw_init(ip_block);
+}
+
+static bool sdma_v6_0_is_idle(struct amdgpu_ip_block *ip_block)
+{
+ struct amdgpu_device *adev = ip_block->adev;
+ u32 i;
+
+ for (i = 0; i < adev->sdma.num_instances; i++) {
+ u32 tmp = RREG32(sdma_v6_0_get_reg_offset(adev, i, regSDMA0_STATUS_REG));
+
+ if (!(tmp & SDMA0_STATUS_REG__IDLE_MASK))
+ return false;
+ }
+
+ return true;
+}
+
+static int sdma_v6_0_wait_for_idle(struct amdgpu_ip_block *ip_block)
+{
+ unsigned i;
+ u32 sdma0, sdma1;
+ struct amdgpu_device *adev = ip_block->adev;
+
+ for (i = 0; i < adev->usec_timeout; i++) {
+ sdma0 = RREG32(sdma_v6_0_get_reg_offset(adev, 0, regSDMA0_STATUS_REG));
+ sdma1 = RREG32(sdma_v6_0_get_reg_offset(adev, 1, regSDMA0_STATUS_REG));
+
+ if (sdma0 & sdma1 & SDMA0_STATUS_REG__IDLE_MASK)
+ return 0;
+ udelay(1);
+ }
+ return -ETIMEDOUT;
+}
+
+static int sdma_v6_0_ring_preempt_ib(struct amdgpu_ring *ring)
+{
+ int i, r = 0;
+ struct amdgpu_device *adev = ring->adev;
+ u32 index = 0;
+ u64 sdma_gfx_preempt;
+
+ amdgpu_sdma_get_index_from_ring(ring, &index);
+ sdma_gfx_preempt =
+ sdma_v6_0_get_reg_offset(adev, index, regSDMA0_QUEUE0_PREEMPT);
+
+ /* assert preemption condition */
+ amdgpu_ring_set_preempt_cond_exec(ring, false);
+
+ /* emit the trailing fence */
+ ring->trail_seq += 1;
+ amdgpu_ring_alloc(ring, 10);
+ sdma_v6_0_ring_emit_fence(ring, ring->trail_fence_gpu_addr,
+ ring->trail_seq, 0);
+ amdgpu_ring_commit(ring);
+
+ /* assert IB preemption */
+ WREG32(sdma_gfx_preempt, 1);
+
+ /* poll the trailing fence */
+ for (i = 0; i < adev->usec_timeout; i++) {
+ if (ring->trail_seq ==
+ le32_to_cpu(*(ring->trail_fence_cpu_addr)))
+ break;
+ udelay(1);
+ }
+
+ if (i >= adev->usec_timeout) {
+ r = -EINVAL;
+ DRM_ERROR("ring %d failed to be preempted\n", ring->idx);
+ }
+
+ /* deassert IB preemption */
+ WREG32(sdma_gfx_preempt, 0);
+
+ /* deassert the preemption condition */
+ amdgpu_ring_set_preempt_cond_exec(ring, true);
+ return r;
+}
+
+static int sdma_v6_0_reset_queue(struct amdgpu_ring *ring,
+ unsigned int vmid,
+ struct amdgpu_fence *timedout_fence)
+{
+ struct amdgpu_device *adev = ring->adev;
+ int r;
+
+ if (ring->me >= adev->sdma.num_instances) {
+ dev_err(adev->dev, "sdma instance not found\n");
+ return -EINVAL;
+ }
+
+ amdgpu_ring_reset_helper_begin(ring, timedout_fence);
+
+ r = amdgpu_mes_reset_legacy_queue(adev, ring, vmid, true);
+ if (r)
+ return r;
+
+ r = sdma_v6_0_gfx_resume_instance(adev, ring->me, true);
+ if (r)
+ return r;
+
+ return amdgpu_ring_reset_helper_end(ring, timedout_fence);
+}
+
+static int sdma_v6_0_set_trap_irq_state(struct amdgpu_device *adev,
+ struct amdgpu_irq_src *source,
+ unsigned type,
+ enum amdgpu_interrupt_state state)
+{
+ u32 sdma_cntl;
+
+ u32 reg_offset = sdma_v6_0_get_reg_offset(adev, type, regSDMA0_CNTL);
+
+ if (!amdgpu_sriov_vf(adev)) {
+ sdma_cntl = RREG32(reg_offset);
+ sdma_cntl = REG_SET_FIELD(sdma_cntl, SDMA0_CNTL, TRAP_ENABLE,
+ state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0);
+ WREG32(reg_offset, sdma_cntl);
+ }
+
+ return 0;
+}
+
+static int sdma_v6_0_process_trap_irq(struct amdgpu_device *adev,
+ struct amdgpu_irq_src *source,
+ struct amdgpu_iv_entry *entry)
+{
+ int instances, queue;
+
+ DRM_DEBUG("IH: SDMA trap\n");
+
+ queue = entry->ring_id & 0xf;
+ instances = (entry->ring_id & 0xf0) >> 4;
+ if (instances > 1) {
+ DRM_ERROR("IH: wrong ring_ID detected, as wrong sdma instance\n");
+ return -EINVAL;
+ }
+
+ switch (entry->client_id) {
+ case SOC21_IH_CLIENTID_GFX:
+ switch (queue) {
+ case 0:
+ amdgpu_fence_process(&adev->sdma.instance[instances].ring);
+ break;
+ default:
+ break;
+ }
+ break;
+ }
+ return 0;
+}
+
+static int sdma_v6_0_process_fence_irq(struct amdgpu_device *adev,
+ struct amdgpu_irq_src *source,
+ struct amdgpu_iv_entry *entry)
+{
+ u32 doorbell_offset = entry->src_data[0];
+
+ if (adev->enable_mes && doorbell_offset) {
+ struct amdgpu_userq_fence_driver *fence_drv = NULL;
+ struct xarray *xa = &adev->userq_xa;
+ unsigned long flags;
+
+ doorbell_offset >>= SDMA0_QUEUE0_DOORBELL_OFFSET__OFFSET__SHIFT;
+
+ xa_lock_irqsave(xa, flags);
+ fence_drv = xa_load(xa, doorbell_offset);
+ if (fence_drv)
+ amdgpu_userq_fence_driver_process(fence_drv);
+ xa_unlock_irqrestore(xa, flags);
+ }
+
+ return 0;
+}
+
+static int sdma_v6_0_process_illegal_inst_irq(struct amdgpu_device *adev,
+ struct amdgpu_irq_src *source,
+ struct amdgpu_iv_entry *entry)
+{
+ return 0;
+}
+
+static int sdma_v6_0_set_clockgating_state(struct amdgpu_ip_block *ip_block,
+ enum amd_clockgating_state state)
+{
+ return 0;
+}
+
+static int sdma_v6_0_set_powergating_state(struct amdgpu_ip_block *ip_block,
+ enum amd_powergating_state state)
+{
+ return 0;
+}
+
+static void sdma_v6_0_get_clockgating_state(struct amdgpu_ip_block *ip_block, u64 *flags)
+{
+}
+
+static void sdma_v6_0_print_ip_state(struct amdgpu_ip_block *ip_block, struct drm_printer *p)
+{
+ struct amdgpu_device *adev = ip_block->adev;
+ int i, j;
+ uint32_t reg_count = ARRAY_SIZE(sdma_reg_list_6_0);
+ uint32_t instance_offset;
+
+ if (!adev->sdma.ip_dump)
+ return;
+
+ drm_printf(p, "num_instances:%d\n", adev->sdma.num_instances);
+ for (i = 0; i < adev->sdma.num_instances; i++) {
+ instance_offset = i * reg_count;
+ drm_printf(p, "\nInstance:%d\n", i);
+
+ for (j = 0; j < reg_count; j++)
+ drm_printf(p, "%-50s \t 0x%08x\n", sdma_reg_list_6_0[j].reg_name,
+ adev->sdma.ip_dump[instance_offset + j]);
+ }
+}
+
+static void sdma_v6_0_dump_ip_state(struct amdgpu_ip_block *ip_block)
+{
+ struct amdgpu_device *adev = ip_block->adev;
+ int i, j;
+ uint32_t instance_offset;
+ uint32_t reg_count = ARRAY_SIZE(sdma_reg_list_6_0);
+
+ if (!adev->sdma.ip_dump)
+ return;
+
+ amdgpu_gfx_off_ctrl(adev, false);
+ for (i = 0; i < adev->sdma.num_instances; i++) {
+ instance_offset = i * reg_count;
+ for (j = 0; j < reg_count; j++)
+ adev->sdma.ip_dump[instance_offset + j] =
+ RREG32(sdma_v6_0_get_reg_offset(adev, i,
+ sdma_reg_list_6_0[j].reg_offset));
+ }
+ amdgpu_gfx_off_ctrl(adev, true);
+}
+
+const struct amd_ip_funcs sdma_v6_0_ip_funcs = {
+ .name = "sdma_v6_0",
+ .early_init = sdma_v6_0_early_init,
+ .sw_init = sdma_v6_0_sw_init,
+ .sw_fini = sdma_v6_0_sw_fini,
+ .hw_init = sdma_v6_0_hw_init,
+ .hw_fini = sdma_v6_0_hw_fini,
+ .suspend = sdma_v6_0_suspend,
+ .resume = sdma_v6_0_resume,
+ .is_idle = sdma_v6_0_is_idle,
+ .wait_for_idle = sdma_v6_0_wait_for_idle,
+ .soft_reset = sdma_v6_0_soft_reset,
+ .check_soft_reset = sdma_v6_0_check_soft_reset,
+ .set_clockgating_state = sdma_v6_0_set_clockgating_state,
+ .set_powergating_state = sdma_v6_0_set_powergating_state,
+ .get_clockgating_state = sdma_v6_0_get_clockgating_state,
+ .dump_ip_state = sdma_v6_0_dump_ip_state,
+ .print_ip_state = sdma_v6_0_print_ip_state,
+};
+
+static const struct amdgpu_ring_funcs sdma_v6_0_ring_funcs = {
+ .type = AMDGPU_RING_TYPE_SDMA,
+ .align_mask = 0xf,
+ .nop = SDMA_PKT_NOP_HEADER_OP(SDMA_OP_NOP),
+ .support_64bit_ptrs = true,
+ .secure_submission_supported = true,
+ .get_rptr = sdma_v6_0_ring_get_rptr,
+ .get_wptr = sdma_v6_0_ring_get_wptr,
+ .set_wptr = sdma_v6_0_ring_set_wptr,
+ .emit_frame_size =
+ 5 + /* sdma_v6_0_ring_init_cond_exec */
+ 6 + /* sdma_v6_0_ring_emit_hdp_flush */
+ 6 + /* sdma_v6_0_ring_emit_pipeline_sync */
+ /* sdma_v6_0_ring_emit_vm_flush */
+ SOC15_FLUSH_GPU_TLB_NUM_WREG * 3 +
+ SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 6 +
+ 10 + 10 + 10, /* sdma_v6_0_ring_emit_fence x3 for user fence, vm fence */
+ .emit_ib_size = 5 + 7 + 6, /* sdma_v6_0_ring_emit_ib */
+ .emit_ib = sdma_v6_0_ring_emit_ib,
+ .emit_mem_sync = sdma_v6_0_ring_emit_mem_sync,
+ .emit_fence = sdma_v6_0_ring_emit_fence,
+ .emit_pipeline_sync = sdma_v6_0_ring_emit_pipeline_sync,
+ .emit_vm_flush = sdma_v6_0_ring_emit_vm_flush,
+ .emit_hdp_flush = sdma_v6_0_ring_emit_hdp_flush,
+ .test_ring = sdma_v6_0_ring_test_ring,
+ .test_ib = sdma_v6_0_ring_test_ib,
+ .insert_nop = sdma_v6_0_ring_insert_nop,
+ .pad_ib = sdma_v6_0_ring_pad_ib,
+ .emit_wreg = sdma_v6_0_ring_emit_wreg,
+ .emit_reg_wait = sdma_v6_0_ring_emit_reg_wait,
+ .emit_reg_write_reg_wait = sdma_v6_0_ring_emit_reg_write_reg_wait,
+ .init_cond_exec = sdma_v6_0_ring_init_cond_exec,
+ .preempt_ib = sdma_v6_0_ring_preempt_ib,
+ .reset = sdma_v6_0_reset_queue,
+};
+
+static void sdma_v6_0_set_ring_funcs(struct amdgpu_device *adev)
+{
+ int i;
+
+ for (i = 0; i < adev->sdma.num_instances; i++) {
+ adev->sdma.instance[i].ring.funcs = &sdma_v6_0_ring_funcs;
+ adev->sdma.instance[i].ring.me = i;
+ }
+}
+
+static const struct amdgpu_irq_src_funcs sdma_v6_0_trap_irq_funcs = {
+ .set = sdma_v6_0_set_trap_irq_state,
+ .process = sdma_v6_0_process_trap_irq,
+};
+
+static const struct amdgpu_irq_src_funcs sdma_v6_0_fence_irq_funcs = {
+ .process = sdma_v6_0_process_fence_irq,
+};
+
+static const struct amdgpu_irq_src_funcs sdma_v6_0_illegal_inst_irq_funcs = {
+ .process = sdma_v6_0_process_illegal_inst_irq,
+};
+
+static void sdma_v6_0_set_irq_funcs(struct amdgpu_device *adev)
+{
+ adev->sdma.trap_irq.num_types = AMDGPU_SDMA_IRQ_INSTANCE0 +
+ adev->sdma.num_instances;
+ adev->sdma.trap_irq.funcs = &sdma_v6_0_trap_irq_funcs;
+ adev->sdma.fence_irq.funcs = &sdma_v6_0_fence_irq_funcs;
+ adev->sdma.illegal_inst_irq.funcs = &sdma_v6_0_illegal_inst_irq_funcs;
+}
+
+/**
+ * sdma_v6_0_emit_copy_buffer - copy buffer using the sDMA engine
+ *
+ * @ib: indirect buffer to fill with commands
+ * @src_offset: src GPU address
+ * @dst_offset: dst GPU address
+ * @byte_count: number of bytes to xfer
+ * @copy_flags: copy flags for the buffers
+ *
+ * Copy GPU buffers using the DMA engine.
+ * Used by the amdgpu ttm implementation to move pages if
+ * registered as the asic copy callback.
+ */
+static void sdma_v6_0_emit_copy_buffer(struct amdgpu_ib *ib,
+ uint64_t src_offset,
+ uint64_t dst_offset,
+ uint32_t byte_count,
+ uint32_t copy_flags)
+{
+ ib->ptr[ib->length_dw++] = SDMA_PKT_COPY_LINEAR_HEADER_OP(SDMA_OP_COPY) |
+ SDMA_PKT_COPY_LINEAR_HEADER_SUB_OP(SDMA_SUBOP_COPY_LINEAR) |
+ SDMA_PKT_COPY_LINEAR_HEADER_TMZ((copy_flags & AMDGPU_COPY_FLAGS_TMZ) ? 1 : 0);
+ ib->ptr[ib->length_dw++] = byte_count - 1;
+ ib->ptr[ib->length_dw++] = 0; /* src/dst endian swap */
+ ib->ptr[ib->length_dw++] = lower_32_bits(src_offset);
+ ib->ptr[ib->length_dw++] = upper_32_bits(src_offset);
+ ib->ptr[ib->length_dw++] = lower_32_bits(dst_offset);
+ ib->ptr[ib->length_dw++] = upper_32_bits(dst_offset);
+}
+
+/**
+ * sdma_v6_0_emit_fill_buffer - fill buffer using the sDMA engine
+ *
+ * @ib: indirect buffer to fill
+ * @src_data: value to write to buffer
+ * @dst_offset: dst GPU address
+ * @byte_count: number of bytes to xfer
+ *
+ * Fill GPU buffers using the DMA engine.
+ */
+static void sdma_v6_0_emit_fill_buffer(struct amdgpu_ib *ib,
+ uint32_t src_data,
+ uint64_t dst_offset,
+ uint32_t byte_count)
+{
+ ib->ptr[ib->length_dw++] = SDMA_PKT_CONSTANT_FILL_HEADER_OP(SDMA_OP_CONST_FILL);
+ ib->ptr[ib->length_dw++] = lower_32_bits(dst_offset);
+ ib->ptr[ib->length_dw++] = upper_32_bits(dst_offset);
+ ib->ptr[ib->length_dw++] = src_data;
+ ib->ptr[ib->length_dw++] = byte_count - 1;
+}
+
+static const struct amdgpu_buffer_funcs sdma_v6_0_buffer_funcs = {
+ .copy_max_bytes = 0x400000,
+ .copy_num_dw = 7,
+ .emit_copy_buffer = sdma_v6_0_emit_copy_buffer,
+
+ .fill_max_bytes = 0x400000,
+ .fill_num_dw = 5,
+ .emit_fill_buffer = sdma_v6_0_emit_fill_buffer,
+};
+
+static void sdma_v6_0_set_buffer_funcs(struct amdgpu_device *adev)
+{
+ adev->mman.buffer_funcs = &sdma_v6_0_buffer_funcs;
+ adev->mman.buffer_funcs_ring = &adev->sdma.instance[0].ring;
+}
+
+static const struct amdgpu_vm_pte_funcs sdma_v6_0_vm_pte_funcs = {
+ .copy_pte_num_dw = 7,
+ .copy_pte = sdma_v6_0_vm_copy_pte,
+ .write_pte = sdma_v6_0_vm_write_pte,
+ .set_pte_pde = sdma_v6_0_vm_set_pte_pde,
+};
+
+static void sdma_v6_0_set_vm_pte_funcs(struct amdgpu_device *adev)
+{
+ unsigned i;
+
+ adev->vm_manager.vm_pte_funcs = &sdma_v6_0_vm_pte_funcs;
+ for (i = 0; i < adev->sdma.num_instances; i++) {
+ adev->vm_manager.vm_pte_scheds[i] =
+ &adev->sdma.instance[i].ring.sched;
+ }
+ adev->vm_manager.vm_pte_num_scheds = adev->sdma.num_instances;
+}
+
+const struct amdgpu_ip_block_version sdma_v6_0_ip_block = {
+ .type = AMD_IP_BLOCK_TYPE_SDMA,
+ .major = 6,
+ .minor = 0,
+ .rev = 0,
+ .funcs = &sdma_v6_0_ip_funcs,
+};
diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v6_0.h b/drivers/gpu/drm/amd/amdgpu/sdma_v6_0.h
new file mode 100644
index 000000000000..e473ec7dfc8f
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/sdma_v6_0.h
@@ -0,0 +1,30 @@
+/*
+ * Copyright 2020 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#ifndef __SDMA_V6_0_H__
+#define __SDMA_V6_0_H__
+
+extern const struct amd_ip_funcs sdma_v6_0_ip_funcs;
+extern const struct amdgpu_ip_block_version sdma_v6_0_ip_block;
+
+#endif /* __SDMA_V6_0_H__ */
diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v6_0_0_pkt_open.h b/drivers/gpu/drm/amd/amdgpu/sdma_v6_0_0_pkt_open.h
new file mode 100644
index 000000000000..d8cf830916b9
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/sdma_v6_0_0_pkt_open.h
@@ -0,0 +1,5672 @@
+/*
+ * Copyright 2021 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+#ifndef __SDMA_V6_0_0_PKT_OPEN_H_
+#define __SDMA_V6_0_0_PKT_OPEN_H_
+
+#define SDMA_OP_NOP 0
+#define SDMA_OP_COPY 1
+#define SDMA_OP_WRITE 2
+#define SDMA_OP_INDIRECT 4
+#define SDMA_OP_FENCE 5
+#define SDMA_OP_TRAP 6
+#define SDMA_OP_SEM 7
+#define SDMA_OP_POLL_REGMEM 8
+#define SDMA_OP_COND_EXE 9
+#define SDMA_OP_ATOMIC 10
+#define SDMA_OP_CONST_FILL 11
+#define SDMA_OP_PTEPDE 12
+#define SDMA_OP_TIMESTAMP 13
+#define SDMA_OP_SRBM_WRITE 14
+#define SDMA_OP_PRE_EXE 15
+#define SDMA_OP_GPUVM_INV 16
+#define SDMA_OP_GCR_REQ 17
+#define SDMA_OP_DUMMY_TRAP 32
+#define SDMA_SUBOP_TIMESTAMP_SET 0
+#define SDMA_SUBOP_TIMESTAMP_GET 1
+#define SDMA_SUBOP_TIMESTAMP_GET_GLOBAL 2
+#define SDMA_SUBOP_COPY_LINEAR 0
+#define SDMA_SUBOP_COPY_LINEAR_SUB_WIND 4
+#define SDMA_SUBOP_COPY_TILED 1
+#define SDMA_SUBOP_COPY_TILED_SUB_WIND 5
+#define SDMA_SUBOP_COPY_T2T_SUB_WIND 6
+#define SDMA_SUBOP_COPY_SOA 3
+#define SDMA_SUBOP_COPY_DIRTY_PAGE 7
+#define SDMA_SUBOP_COPY_LINEAR_PHY 8
+#define SDMA_SUBOP_COPY_LINEAR_SUB_WIND_LARGE 36
+#define SDMA_SUBOP_COPY_LINEAR_BC 16
+#define SDMA_SUBOP_COPY_TILED_BC 17
+#define SDMA_SUBOP_COPY_LINEAR_SUB_WIND_BC 20
+#define SDMA_SUBOP_COPY_TILED_SUB_WIND_BC 21
+#define SDMA_SUBOP_COPY_T2T_SUB_WIND_BC 22
+#define SDMA_SUBOP_WRITE_LINEAR 0
+#define SDMA_SUBOP_WRITE_TILED 1
+#define SDMA_SUBOP_WRITE_TILED_BC 17
+#define SDMA_SUBOP_PTEPDE_GEN 0
+#define SDMA_SUBOP_PTEPDE_COPY 1
+#define SDMA_SUBOP_PTEPDE_RMW 2
+#define SDMA_SUBOP_PTEPDE_COPY_BACKWARDS 3
+#define SDMA_SUBOP_MEM_INCR 1
+#define SDMA_SUBOP_DATA_FILL_MULTI 1
+#define SDMA_SUBOP_POLL_REG_WRITE_MEM 1
+#define SDMA_SUBOP_POLL_DBIT_WRITE_MEM 2
+#define SDMA_SUBOP_POLL_MEM_VERIFY 3
+#define SDMA_SUBOP_VM_INVALIDATION 4
+#define HEADER_AGENT_DISPATCH 4
+#define HEADER_BARRIER 5
+#define SDMA_OP_AQL_COPY 0
+#define SDMA_OP_AQL_BARRIER_OR 0
+
+#define SDMA_GCR_RANGE_IS_PA (1 << 18)
+#define SDMA_GCR_SEQ(x) (((x) & 0x3) << 16)
+#define SDMA_GCR_GL2_WB (1 << 15)
+#define SDMA_GCR_GL2_INV (1 << 14)
+#define SDMA_GCR_GL2_DISCARD (1 << 13)
+#define SDMA_GCR_GL2_RANGE(x) (((x) & 0x3) << 11)
+#define SDMA_GCR_GL2_US (1 << 10)
+#define SDMA_GCR_GL1_INV (1 << 9)
+#define SDMA_GCR_GLV_INV (1 << 8)
+#define SDMA_GCR_GLK_INV (1 << 7)
+#define SDMA_GCR_GLK_WB (1 << 6)
+#define SDMA_GCR_GLM_INV (1 << 5)
+#define SDMA_GCR_GLM_WB (1 << 4)
+#define SDMA_GCR_GL1_RANGE(x) (((x) & 0x3) << 2)
+#define SDMA_GCR_GLI_INV(x) (((x) & 0x3) << 0)
+
+#define SDMA_DCC_DATA_FORMAT(x) ((x) & 0x3f)
+#define SDMA_DCC_NUM_TYPE(x) (((x) & 0x7) << 9)
+#define SDMA_DCC_READ_CM(x) (((x) & 0x3) << 16)
+#define SDMA_DCC_WRITE_CM(x) (((x) & 0x3) << 18)
+#define SDMA_DCC_MAX_COM(x) (((x) & 0x3) << 24)
+#define SDMA_DCC_MAX_UCOM(x) (((x) & 0x1) << 26)
+
+/*
+** Definitions for SDMA_PKT_COPY_LINEAR packet
+*/
+
+/*define for HEADER word*/
+/*define for op field*/
+#define SDMA_PKT_COPY_LINEAR_HEADER_op_offset 0
+#define SDMA_PKT_COPY_LINEAR_HEADER_op_mask 0x000000FF
+#define SDMA_PKT_COPY_LINEAR_HEADER_op_shift 0
+#define SDMA_PKT_COPY_LINEAR_HEADER_OP(x) (((x) & SDMA_PKT_COPY_LINEAR_HEADER_op_mask) << SDMA_PKT_COPY_LINEAR_HEADER_op_shift)
+
+/*define for sub_op field*/
+#define SDMA_PKT_COPY_LINEAR_HEADER_sub_op_offset 0
+#define SDMA_PKT_COPY_LINEAR_HEADER_sub_op_mask 0x000000FF
+#define SDMA_PKT_COPY_LINEAR_HEADER_sub_op_shift 8
+#define SDMA_PKT_COPY_LINEAR_HEADER_SUB_OP(x) (((x) & SDMA_PKT_COPY_LINEAR_HEADER_sub_op_mask) << SDMA_PKT_COPY_LINEAR_HEADER_sub_op_shift)
+
+/*define for encrypt field*/
+#define SDMA_PKT_COPY_LINEAR_HEADER_encrypt_offset 0
+#define SDMA_PKT_COPY_LINEAR_HEADER_encrypt_mask 0x00000001
+#define SDMA_PKT_COPY_LINEAR_HEADER_encrypt_shift 16
+#define SDMA_PKT_COPY_LINEAR_HEADER_ENCRYPT(x) (((x) & SDMA_PKT_COPY_LINEAR_HEADER_encrypt_mask) << SDMA_PKT_COPY_LINEAR_HEADER_encrypt_shift)
+
+/*define for tmz field*/
+#define SDMA_PKT_COPY_LINEAR_HEADER_tmz_offset 0
+#define SDMA_PKT_COPY_LINEAR_HEADER_tmz_mask 0x00000001
+#define SDMA_PKT_COPY_LINEAR_HEADER_tmz_shift 18
+#define SDMA_PKT_COPY_LINEAR_HEADER_TMZ(x) (((x) & SDMA_PKT_COPY_LINEAR_HEADER_tmz_mask) << SDMA_PKT_COPY_LINEAR_HEADER_tmz_shift)
+
+/*define for cpv field*/
+#define SDMA_PKT_COPY_LINEAR_HEADER_cpv_offset 0
+#define SDMA_PKT_COPY_LINEAR_HEADER_cpv_mask 0x00000001
+#define SDMA_PKT_COPY_LINEAR_HEADER_cpv_shift 19
+#define SDMA_PKT_COPY_LINEAR_HEADER_CPV(x) (((x) & SDMA_PKT_COPY_LINEAR_HEADER_cpv_mask) << SDMA_PKT_COPY_LINEAR_HEADER_cpv_shift)
+
+/*define for backwards field*/
+#define SDMA_PKT_COPY_LINEAR_HEADER_backwards_offset 0
+#define SDMA_PKT_COPY_LINEAR_HEADER_backwards_mask 0x00000001
+#define SDMA_PKT_COPY_LINEAR_HEADER_backwards_shift 25
+#define SDMA_PKT_COPY_LINEAR_HEADER_BACKWARDS(x) (((x) & SDMA_PKT_COPY_LINEAR_HEADER_backwards_mask) << SDMA_PKT_COPY_LINEAR_HEADER_backwards_shift)
+
+/*define for broadcast field*/
+#define SDMA_PKT_COPY_LINEAR_HEADER_broadcast_offset 0
+#define SDMA_PKT_COPY_LINEAR_HEADER_broadcast_mask 0x00000001
+#define SDMA_PKT_COPY_LINEAR_HEADER_broadcast_shift 27
+#define SDMA_PKT_COPY_LINEAR_HEADER_BROADCAST(x) (((x) & SDMA_PKT_COPY_LINEAR_HEADER_broadcast_mask) << SDMA_PKT_COPY_LINEAR_HEADER_broadcast_shift)
+
+/*define for COUNT word*/
+/*define for count field*/
+#define SDMA_PKT_COPY_LINEAR_COUNT_count_offset 1
+#define SDMA_PKT_COPY_LINEAR_COUNT_count_mask 0x3FFFFFFF
+#define SDMA_PKT_COPY_LINEAR_COUNT_count_shift 0
+#define SDMA_PKT_COPY_LINEAR_COUNT_COUNT(x) (((x) & SDMA_PKT_COPY_LINEAR_COUNT_count_mask) << SDMA_PKT_COPY_LINEAR_COUNT_count_shift)
+
+/*define for PARAMETER word*/
+/*define for dst_sw field*/
+#define SDMA_PKT_COPY_LINEAR_PARAMETER_dst_sw_offset 2
+#define SDMA_PKT_COPY_LINEAR_PARAMETER_dst_sw_mask 0x00000003
+#define SDMA_PKT_COPY_LINEAR_PARAMETER_dst_sw_shift 16
+#define SDMA_PKT_COPY_LINEAR_PARAMETER_DST_SW(x) (((x) & SDMA_PKT_COPY_LINEAR_PARAMETER_dst_sw_mask) << SDMA_PKT_COPY_LINEAR_PARAMETER_dst_sw_shift)
+
+/*define for dst_cache_policy field*/
+#define SDMA_PKT_COPY_LINEAR_PARAMETER_dst_cache_policy_offset 2
+#define SDMA_PKT_COPY_LINEAR_PARAMETER_dst_cache_policy_mask 0x00000007
+#define SDMA_PKT_COPY_LINEAR_PARAMETER_dst_cache_policy_shift 18
+#define SDMA_PKT_COPY_LINEAR_PARAMETER_DST_CACHE_POLICY(x) (((x) & SDMA_PKT_COPY_LINEAR_PARAMETER_dst_cache_policy_mask) << SDMA_PKT_COPY_LINEAR_PARAMETER_dst_cache_policy_shift)
+
+/*define for src_sw field*/
+#define SDMA_PKT_COPY_LINEAR_PARAMETER_src_sw_offset 2
+#define SDMA_PKT_COPY_LINEAR_PARAMETER_src_sw_mask 0x00000003
+#define SDMA_PKT_COPY_LINEAR_PARAMETER_src_sw_shift 24
+#define SDMA_PKT_COPY_LINEAR_PARAMETER_SRC_SW(x) (((x) & SDMA_PKT_COPY_LINEAR_PARAMETER_src_sw_mask) << SDMA_PKT_COPY_LINEAR_PARAMETER_src_sw_shift)
+
+/*define for src_cache_policy field*/
+#define SDMA_PKT_COPY_LINEAR_PARAMETER_src_cache_policy_offset 2
+#define SDMA_PKT_COPY_LINEAR_PARAMETER_src_cache_policy_mask 0x00000007
+#define SDMA_PKT_COPY_LINEAR_PARAMETER_src_cache_policy_shift 26
+#define SDMA_PKT_COPY_LINEAR_PARAMETER_SRC_CACHE_POLICY(x) (((x) & SDMA_PKT_COPY_LINEAR_PARAMETER_src_cache_policy_mask) << SDMA_PKT_COPY_LINEAR_PARAMETER_src_cache_policy_shift)
+
+/*define for SRC_ADDR_LO word*/
+/*define for src_addr_31_0 field*/
+#define SDMA_PKT_COPY_LINEAR_SRC_ADDR_LO_src_addr_31_0_offset 3
+#define SDMA_PKT_COPY_LINEAR_SRC_ADDR_LO_src_addr_31_0_mask 0xFFFFFFFF
+#define SDMA_PKT_COPY_LINEAR_SRC_ADDR_LO_src_addr_31_0_shift 0
+#define SDMA_PKT_COPY_LINEAR_SRC_ADDR_LO_SRC_ADDR_31_0(x) (((x) & SDMA_PKT_COPY_LINEAR_SRC_ADDR_LO_src_addr_31_0_mask) << SDMA_PKT_COPY_LINEAR_SRC_ADDR_LO_src_addr_31_0_shift)
+
+/*define for SRC_ADDR_HI word*/
+/*define for src_addr_63_32 field*/
+#define SDMA_PKT_COPY_LINEAR_SRC_ADDR_HI_src_addr_63_32_offset 4
+#define SDMA_PKT_COPY_LINEAR_SRC_ADDR_HI_src_addr_63_32_mask 0xFFFFFFFF
+#define SDMA_PKT_COPY_LINEAR_SRC_ADDR_HI_src_addr_63_32_shift 0
+#define SDMA_PKT_COPY_LINEAR_SRC_ADDR_HI_SRC_ADDR_63_32(x) (((x) & SDMA_PKT_COPY_LINEAR_SRC_ADDR_HI_src_addr_63_32_mask) << SDMA_PKT_COPY_LINEAR_SRC_ADDR_HI_src_addr_63_32_shift)
+
+/*define for DST_ADDR_LO word*/
+/*define for dst_addr_31_0 field*/
+#define SDMA_PKT_COPY_LINEAR_DST_ADDR_LO_dst_addr_31_0_offset 5
+#define SDMA_PKT_COPY_LINEAR_DST_ADDR_LO_dst_addr_31_0_mask 0xFFFFFFFF
+#define SDMA_PKT_COPY_LINEAR_DST_ADDR_LO_dst_addr_31_0_shift 0
+#define SDMA_PKT_COPY_LINEAR_DST_ADDR_LO_DST_ADDR_31_0(x) (((x) & SDMA_PKT_COPY_LINEAR_DST_ADDR_LO_dst_addr_31_0_mask) << SDMA_PKT_COPY_LINEAR_DST_ADDR_LO_dst_addr_31_0_shift)
+
+/*define for DST_ADDR_HI word*/
+/*define for dst_addr_63_32 field*/
+#define SDMA_PKT_COPY_LINEAR_DST_ADDR_HI_dst_addr_63_32_offset 6
+#define SDMA_PKT_COPY_LINEAR_DST_ADDR_HI_dst_addr_63_32_mask 0xFFFFFFFF
+#define SDMA_PKT_COPY_LINEAR_DST_ADDR_HI_dst_addr_63_32_shift 0
+#define SDMA_PKT_COPY_LINEAR_DST_ADDR_HI_DST_ADDR_63_32(x) (((x) & SDMA_PKT_COPY_LINEAR_DST_ADDR_HI_dst_addr_63_32_mask) << SDMA_PKT_COPY_LINEAR_DST_ADDR_HI_dst_addr_63_32_shift)
+
+
+/*
+** Definitions for SDMA_PKT_COPY_LINEAR_BC packet
+*/
+
+/*define for HEADER word*/
+/*define for op field*/
+#define SDMA_PKT_COPY_LINEAR_BC_HEADER_op_offset 0
+#define SDMA_PKT_COPY_LINEAR_BC_HEADER_op_mask 0x000000FF
+#define SDMA_PKT_COPY_LINEAR_BC_HEADER_op_shift 0
+#define SDMA_PKT_COPY_LINEAR_BC_HEADER_OP(x) (((x) & SDMA_PKT_COPY_LINEAR_BC_HEADER_op_mask) << SDMA_PKT_COPY_LINEAR_BC_HEADER_op_shift)
+
+/*define for sub_op field*/
+#define SDMA_PKT_COPY_LINEAR_BC_HEADER_sub_op_offset 0
+#define SDMA_PKT_COPY_LINEAR_BC_HEADER_sub_op_mask 0x000000FF
+#define SDMA_PKT_COPY_LINEAR_BC_HEADER_sub_op_shift 8
+#define SDMA_PKT_COPY_LINEAR_BC_HEADER_SUB_OP(x) (((x) & SDMA_PKT_COPY_LINEAR_BC_HEADER_sub_op_mask) << SDMA_PKT_COPY_LINEAR_BC_HEADER_sub_op_shift)
+
+/*define for COUNT word*/
+/*define for count field*/
+#define SDMA_PKT_COPY_LINEAR_BC_COUNT_count_offset 1
+#define SDMA_PKT_COPY_LINEAR_BC_COUNT_count_mask 0x003FFFFF
+#define SDMA_PKT_COPY_LINEAR_BC_COUNT_count_shift 0
+#define SDMA_PKT_COPY_LINEAR_BC_COUNT_COUNT(x) (((x) & SDMA_PKT_COPY_LINEAR_BC_COUNT_count_mask) << SDMA_PKT_COPY_LINEAR_BC_COUNT_count_shift)
+
+/*define for PARAMETER word*/
+/*define for dst_sw field*/
+#define SDMA_PKT_COPY_LINEAR_BC_PARAMETER_dst_sw_offset 2
+#define SDMA_PKT_COPY_LINEAR_BC_PARAMETER_dst_sw_mask 0x00000003
+#define SDMA_PKT_COPY_LINEAR_BC_PARAMETER_dst_sw_shift 16
+#define SDMA_PKT_COPY_LINEAR_BC_PARAMETER_DST_SW(x) (((x) & SDMA_PKT_COPY_LINEAR_BC_PARAMETER_dst_sw_mask) << SDMA_PKT_COPY_LINEAR_BC_PARAMETER_dst_sw_shift)
+
+/*define for dst_ha field*/
+#define SDMA_PKT_COPY_LINEAR_BC_PARAMETER_dst_ha_offset 2
+#define SDMA_PKT_COPY_LINEAR_BC_PARAMETER_dst_ha_mask 0x00000001
+#define SDMA_PKT_COPY_LINEAR_BC_PARAMETER_dst_ha_shift 19
+#define SDMA_PKT_COPY_LINEAR_BC_PARAMETER_DST_HA(x) (((x) & SDMA_PKT_COPY_LINEAR_BC_PARAMETER_dst_ha_mask) << SDMA_PKT_COPY_LINEAR_BC_PARAMETER_dst_ha_shift)
+
+/*define for src_sw field*/
+#define SDMA_PKT_COPY_LINEAR_BC_PARAMETER_src_sw_offset 2
+#define SDMA_PKT_COPY_LINEAR_BC_PARAMETER_src_sw_mask 0x00000003
+#define SDMA_PKT_COPY_LINEAR_BC_PARAMETER_src_sw_shift 24
+#define SDMA_PKT_COPY_LINEAR_BC_PARAMETER_SRC_SW(x) (((x) & SDMA_PKT_COPY_LINEAR_BC_PARAMETER_src_sw_mask) << SDMA_PKT_COPY_LINEAR_BC_PARAMETER_src_sw_shift)
+
+/*define for src_ha field*/
+#define SDMA_PKT_COPY_LINEAR_BC_PARAMETER_src_ha_offset 2
+#define SDMA_PKT_COPY_LINEAR_BC_PARAMETER_src_ha_mask 0x00000001
+#define SDMA_PKT_COPY_LINEAR_BC_PARAMETER_src_ha_shift 27
+#define SDMA_PKT_COPY_LINEAR_BC_PARAMETER_SRC_HA(x) (((x) & SDMA_PKT_COPY_LINEAR_BC_PARAMETER_src_ha_mask) << SDMA_PKT_COPY_LINEAR_BC_PARAMETER_src_ha_shift)
+
+/*define for SRC_ADDR_LO word*/
+/*define for src_addr_31_0 field*/
+#define SDMA_PKT_COPY_LINEAR_BC_SRC_ADDR_LO_src_addr_31_0_offset 3
+#define SDMA_PKT_COPY_LINEAR_BC_SRC_ADDR_LO_src_addr_31_0_mask 0xFFFFFFFF
+#define SDMA_PKT_COPY_LINEAR_BC_SRC_ADDR_LO_src_addr_31_0_shift 0
+#define SDMA_PKT_COPY_LINEAR_BC_SRC_ADDR_LO_SRC_ADDR_31_0(x) (((x) & SDMA_PKT_COPY_LINEAR_BC_SRC_ADDR_LO_src_addr_31_0_mask) << SDMA_PKT_COPY_LINEAR_BC_SRC_ADDR_LO_src_addr_31_0_shift)
+
+/*define for SRC_ADDR_HI word*/
+/*define for src_addr_63_32 field*/
+#define SDMA_PKT_COPY_LINEAR_BC_SRC_ADDR_HI_src_addr_63_32_offset 4
+#define SDMA_PKT_COPY_LINEAR_BC_SRC_ADDR_HI_src_addr_63_32_mask 0xFFFFFFFF
+#define SDMA_PKT_COPY_LINEAR_BC_SRC_ADDR_HI_src_addr_63_32_shift 0
+#define SDMA_PKT_COPY_LINEAR_BC_SRC_ADDR_HI_SRC_ADDR_63_32(x) (((x) & SDMA_PKT_COPY_LINEAR_BC_SRC_ADDR_HI_src_addr_63_32_mask) << SDMA_PKT_COPY_LINEAR_BC_SRC_ADDR_HI_src_addr_63_32_shift)
+
+/*define for DST_ADDR_LO word*/
+/*define for dst_addr_31_0 field*/
+#define SDMA_PKT_COPY_LINEAR_BC_DST_ADDR_LO_dst_addr_31_0_offset 5
+#define SDMA_PKT_COPY_LINEAR_BC_DST_ADDR_LO_dst_addr_31_0_mask 0xFFFFFFFF
+#define SDMA_PKT_COPY_LINEAR_BC_DST_ADDR_LO_dst_addr_31_0_shift 0
+#define SDMA_PKT_COPY_LINEAR_BC_DST_ADDR_LO_DST_ADDR_31_0(x) (((x) & SDMA_PKT_COPY_LINEAR_BC_DST_ADDR_LO_dst_addr_31_0_mask) << SDMA_PKT_COPY_LINEAR_BC_DST_ADDR_LO_dst_addr_31_0_shift)
+
+/*define for DST_ADDR_HI word*/
+/*define for dst_addr_63_32 field*/
+#define SDMA_PKT_COPY_LINEAR_BC_DST_ADDR_HI_dst_addr_63_32_offset 6
+#define SDMA_PKT_COPY_LINEAR_BC_DST_ADDR_HI_dst_addr_63_32_mask 0xFFFFFFFF
+#define SDMA_PKT_COPY_LINEAR_BC_DST_ADDR_HI_dst_addr_63_32_shift 0
+#define SDMA_PKT_COPY_LINEAR_BC_DST_ADDR_HI_DST_ADDR_63_32(x) (((x) & SDMA_PKT_COPY_LINEAR_BC_DST_ADDR_HI_dst_addr_63_32_mask) << SDMA_PKT_COPY_LINEAR_BC_DST_ADDR_HI_dst_addr_63_32_shift)
+
+
+/*
+** Definitions for SDMA_PKT_COPY_DIRTY_PAGE packet
+*/
+
+/*define for HEADER word*/
+/*define for op field*/
+#define SDMA_PKT_COPY_DIRTY_PAGE_HEADER_op_offset 0
+#define SDMA_PKT_COPY_DIRTY_PAGE_HEADER_op_mask 0x000000FF
+#define SDMA_PKT_COPY_DIRTY_PAGE_HEADER_op_shift 0
+#define SDMA_PKT_COPY_DIRTY_PAGE_HEADER_OP(x) (((x) & SDMA_PKT_COPY_DIRTY_PAGE_HEADER_op_mask) << SDMA_PKT_COPY_DIRTY_PAGE_HEADER_op_shift)
+
+/*define for sub_op field*/
+#define SDMA_PKT_COPY_DIRTY_PAGE_HEADER_sub_op_offset 0
+#define SDMA_PKT_COPY_DIRTY_PAGE_HEADER_sub_op_mask 0x000000FF
+#define SDMA_PKT_COPY_DIRTY_PAGE_HEADER_sub_op_shift 8
+#define SDMA_PKT_COPY_DIRTY_PAGE_HEADER_SUB_OP(x) (((x) & SDMA_PKT_COPY_DIRTY_PAGE_HEADER_sub_op_mask) << SDMA_PKT_COPY_DIRTY_PAGE_HEADER_sub_op_shift)
+
+/*define for tmz field*/
+#define SDMA_PKT_COPY_DIRTY_PAGE_HEADER_tmz_offset 0
+#define SDMA_PKT_COPY_DIRTY_PAGE_HEADER_tmz_mask 0x00000001
+#define SDMA_PKT_COPY_DIRTY_PAGE_HEADER_tmz_shift 18
+#define SDMA_PKT_COPY_DIRTY_PAGE_HEADER_TMZ(x) (((x) & SDMA_PKT_COPY_DIRTY_PAGE_HEADER_tmz_mask) << SDMA_PKT_COPY_DIRTY_PAGE_HEADER_tmz_shift)
+
+/*define for cpv field*/
+#define SDMA_PKT_COPY_DIRTY_PAGE_HEADER_cpv_offset 0
+#define SDMA_PKT_COPY_DIRTY_PAGE_HEADER_cpv_mask 0x00000001
+#define SDMA_PKT_COPY_DIRTY_PAGE_HEADER_cpv_shift 19
+#define SDMA_PKT_COPY_DIRTY_PAGE_HEADER_CPV(x) (((x) & SDMA_PKT_COPY_DIRTY_PAGE_HEADER_cpv_mask) << SDMA_PKT_COPY_DIRTY_PAGE_HEADER_cpv_shift)
+
+/*define for all field*/
+#define SDMA_PKT_COPY_DIRTY_PAGE_HEADER_all_offset 0
+#define SDMA_PKT_COPY_DIRTY_PAGE_HEADER_all_mask 0x00000001
+#define SDMA_PKT_COPY_DIRTY_PAGE_HEADER_all_shift 31
+#define SDMA_PKT_COPY_DIRTY_PAGE_HEADER_ALL(x) (((x) & SDMA_PKT_COPY_DIRTY_PAGE_HEADER_all_mask) << SDMA_PKT_COPY_DIRTY_PAGE_HEADER_all_shift)
+
+/*define for COUNT word*/
+/*define for count field*/
+#define SDMA_PKT_COPY_DIRTY_PAGE_COUNT_count_offset 1
+#define SDMA_PKT_COPY_DIRTY_PAGE_COUNT_count_mask 0x003FFFFF
+#define SDMA_PKT_COPY_DIRTY_PAGE_COUNT_count_shift 0
+#define SDMA_PKT_COPY_DIRTY_PAGE_COUNT_COUNT(x) (((x) & SDMA_PKT_COPY_DIRTY_PAGE_COUNT_count_mask) << SDMA_PKT_COPY_DIRTY_PAGE_COUNT_count_shift)
+
+/*define for PARAMETER word*/
+/*define for dst_mtype field*/
+#define SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_dst_mtype_offset 2
+#define SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_dst_mtype_mask 0x00000007
+#define SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_dst_mtype_shift 3
+#define SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_DST_MTYPE(x) (((x) & SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_dst_mtype_mask) << SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_dst_mtype_shift)
+
+/*define for dst_l2_policy field*/
+#define SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_dst_l2_policy_offset 2
+#define SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_dst_l2_policy_mask 0x00000003
+#define SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_dst_l2_policy_shift 6
+#define SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_DST_L2_POLICY(x) (((x) & SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_dst_l2_policy_mask) << SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_dst_l2_policy_shift)
+
+/*define for dst_llc field*/
+#define SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_dst_llc_offset 2
+#define SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_dst_llc_mask 0x00000001
+#define SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_dst_llc_shift 8
+#define SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_DST_LLC(x) (((x) & SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_dst_llc_mask) << SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_dst_llc_shift)
+
+/*define for src_mtype field*/
+#define SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_src_mtype_offset 2
+#define SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_src_mtype_mask 0x00000007
+#define SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_src_mtype_shift 11
+#define SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_SRC_MTYPE(x) (((x) & SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_src_mtype_mask) << SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_src_mtype_shift)
+
+/*define for src_l2_policy field*/
+#define SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_src_l2_policy_offset 2
+#define SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_src_l2_policy_mask 0x00000003
+#define SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_src_l2_policy_shift 14
+#define SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_SRC_L2_POLICY(x) (((x) & SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_src_l2_policy_mask) << SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_src_l2_policy_shift)
+
+/*define for src_llc field*/
+#define SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_src_llc_offset 2
+#define SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_src_llc_mask 0x00000001
+#define SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_src_llc_shift 16
+#define SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_SRC_LLC(x) (((x) & SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_src_llc_mask) << SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_src_llc_shift)
+
+/*define for dst_sw field*/
+#define SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_dst_sw_offset 2
+#define SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_dst_sw_mask 0x00000003
+#define SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_dst_sw_shift 17
+#define SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_DST_SW(x) (((x) & SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_dst_sw_mask) << SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_dst_sw_shift)
+
+/*define for dst_gcc field*/
+#define SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_dst_gcc_offset 2
+#define SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_dst_gcc_mask 0x00000001
+#define SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_dst_gcc_shift 19
+#define SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_DST_GCC(x) (((x) & SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_dst_gcc_mask) << SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_dst_gcc_shift)
+
+/*define for dst_sys field*/
+#define SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_dst_sys_offset 2
+#define SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_dst_sys_mask 0x00000001
+#define SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_dst_sys_shift 20
+#define SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_DST_SYS(x) (((x) & SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_dst_sys_mask) << SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_dst_sys_shift)
+
+/*define for dst_snoop field*/
+#define SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_dst_snoop_offset 2
+#define SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_dst_snoop_mask 0x00000001
+#define SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_dst_snoop_shift 22
+#define SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_DST_SNOOP(x) (((x) & SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_dst_snoop_mask) << SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_dst_snoop_shift)
+
+/*define for dst_gpa field*/
+#define SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_dst_gpa_offset 2
+#define SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_dst_gpa_mask 0x00000001
+#define SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_dst_gpa_shift 23
+#define SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_DST_GPA(x) (((x) & SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_dst_gpa_mask) << SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_dst_gpa_shift)
+
+/*define for src_sw field*/
+#define SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_src_sw_offset 2
+#define SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_src_sw_mask 0x00000003
+#define SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_src_sw_shift 24
+#define SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_SRC_SW(x) (((x) & SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_src_sw_mask) << SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_src_sw_shift)
+
+/*define for src_sys field*/
+#define SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_src_sys_offset 2
+#define SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_src_sys_mask 0x00000001
+#define SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_src_sys_shift 28
+#define SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_SRC_SYS(x) (((x) & SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_src_sys_mask) << SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_src_sys_shift)
+
+/*define for src_snoop field*/
+#define SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_src_snoop_offset 2
+#define SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_src_snoop_mask 0x00000001
+#define SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_src_snoop_shift 30
+#define SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_SRC_SNOOP(x) (((x) & SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_src_snoop_mask) << SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_src_snoop_shift)
+
+/*define for src_gpa field*/
+#define SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_src_gpa_offset 2
+#define SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_src_gpa_mask 0x00000001
+#define SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_src_gpa_shift 31
+#define SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_SRC_GPA(x) (((x) & SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_src_gpa_mask) << SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_src_gpa_shift)
+
+/*define for SRC_ADDR_LO word*/
+/*define for src_addr_31_0 field*/
+#define SDMA_PKT_COPY_DIRTY_PAGE_SRC_ADDR_LO_src_addr_31_0_offset 3
+#define SDMA_PKT_COPY_DIRTY_PAGE_SRC_ADDR_LO_src_addr_31_0_mask 0xFFFFFFFF
+#define SDMA_PKT_COPY_DIRTY_PAGE_SRC_ADDR_LO_src_addr_31_0_shift 0
+#define SDMA_PKT_COPY_DIRTY_PAGE_SRC_ADDR_LO_SRC_ADDR_31_0(x) (((x) & SDMA_PKT_COPY_DIRTY_PAGE_SRC_ADDR_LO_src_addr_31_0_mask) << SDMA_PKT_COPY_DIRTY_PAGE_SRC_ADDR_LO_src_addr_31_0_shift)
+
+/*define for SRC_ADDR_HI word*/
+/*define for src_addr_63_32 field*/
+#define SDMA_PKT_COPY_DIRTY_PAGE_SRC_ADDR_HI_src_addr_63_32_offset 4
+#define SDMA_PKT_COPY_DIRTY_PAGE_SRC_ADDR_HI_src_addr_63_32_mask 0xFFFFFFFF
+#define SDMA_PKT_COPY_DIRTY_PAGE_SRC_ADDR_HI_src_addr_63_32_shift 0
+#define SDMA_PKT_COPY_DIRTY_PAGE_SRC_ADDR_HI_SRC_ADDR_63_32(x) (((x) & SDMA_PKT_COPY_DIRTY_PAGE_SRC_ADDR_HI_src_addr_63_32_mask) << SDMA_PKT_COPY_DIRTY_PAGE_SRC_ADDR_HI_src_addr_63_32_shift)
+
+/*define for DST_ADDR_LO word*/
+/*define for dst_addr_31_0 field*/
+#define SDMA_PKT_COPY_DIRTY_PAGE_DST_ADDR_LO_dst_addr_31_0_offset 5
+#define SDMA_PKT_COPY_DIRTY_PAGE_DST_ADDR_LO_dst_addr_31_0_mask 0xFFFFFFFF
+#define SDMA_PKT_COPY_DIRTY_PAGE_DST_ADDR_LO_dst_addr_31_0_shift 0
+#define SDMA_PKT_COPY_DIRTY_PAGE_DST_ADDR_LO_DST_ADDR_31_0(x) (((x) & SDMA_PKT_COPY_DIRTY_PAGE_DST_ADDR_LO_dst_addr_31_0_mask) << SDMA_PKT_COPY_DIRTY_PAGE_DST_ADDR_LO_dst_addr_31_0_shift)
+
+/*define for DST_ADDR_HI word*/
+/*define for dst_addr_63_32 field*/
+#define SDMA_PKT_COPY_DIRTY_PAGE_DST_ADDR_HI_dst_addr_63_32_offset 6
+#define SDMA_PKT_COPY_DIRTY_PAGE_DST_ADDR_HI_dst_addr_63_32_mask 0xFFFFFFFF
+#define SDMA_PKT_COPY_DIRTY_PAGE_DST_ADDR_HI_dst_addr_63_32_shift 0
+#define SDMA_PKT_COPY_DIRTY_PAGE_DST_ADDR_HI_DST_ADDR_63_32(x) (((x) & SDMA_PKT_COPY_DIRTY_PAGE_DST_ADDR_HI_dst_addr_63_32_mask) << SDMA_PKT_COPY_DIRTY_PAGE_DST_ADDR_HI_dst_addr_63_32_shift)
+
+
+/*
+** Definitions for SDMA_PKT_COPY_PHYSICAL_LINEAR packet
+*/
+
+/*define for HEADER word*/
+/*define for op field*/
+#define SDMA_PKT_COPY_PHYSICAL_LINEAR_HEADER_op_offset 0
+#define SDMA_PKT_COPY_PHYSICAL_LINEAR_HEADER_op_mask 0x000000FF
+#define SDMA_PKT_COPY_PHYSICAL_LINEAR_HEADER_op_shift 0
+#define SDMA_PKT_COPY_PHYSICAL_LINEAR_HEADER_OP(x) (((x) & SDMA_PKT_COPY_PHYSICAL_LINEAR_HEADER_op_mask) << SDMA_PKT_COPY_PHYSICAL_LINEAR_HEADER_op_shift)
+
+/*define for sub_op field*/
+#define SDMA_PKT_COPY_PHYSICAL_LINEAR_HEADER_sub_op_offset 0
+#define SDMA_PKT_COPY_PHYSICAL_LINEAR_HEADER_sub_op_mask 0x000000FF
+#define SDMA_PKT_COPY_PHYSICAL_LINEAR_HEADER_sub_op_shift 8
+#define SDMA_PKT_COPY_PHYSICAL_LINEAR_HEADER_SUB_OP(x) (((x) & SDMA_PKT_COPY_PHYSICAL_LINEAR_HEADER_sub_op_mask) << SDMA_PKT_COPY_PHYSICAL_LINEAR_HEADER_sub_op_shift)
+
+/*define for tmz field*/
+#define SDMA_PKT_COPY_PHYSICAL_LINEAR_HEADER_tmz_offset 0
+#define SDMA_PKT_COPY_PHYSICAL_LINEAR_HEADER_tmz_mask 0x00000001
+#define SDMA_PKT_COPY_PHYSICAL_LINEAR_HEADER_tmz_shift 18
+#define SDMA_PKT_COPY_PHYSICAL_LINEAR_HEADER_TMZ(x) (((x) & SDMA_PKT_COPY_PHYSICAL_LINEAR_HEADER_tmz_mask) << SDMA_PKT_COPY_PHYSICAL_LINEAR_HEADER_tmz_shift)
+
+/*define for cpv field*/
+#define SDMA_PKT_COPY_PHYSICAL_LINEAR_HEADER_cpv_offset 0
+#define SDMA_PKT_COPY_PHYSICAL_LINEAR_HEADER_cpv_mask 0x00000001
+#define SDMA_PKT_COPY_PHYSICAL_LINEAR_HEADER_cpv_shift 19
+#define SDMA_PKT_COPY_PHYSICAL_LINEAR_HEADER_CPV(x) (((x) & SDMA_PKT_COPY_PHYSICAL_LINEAR_HEADER_cpv_mask) << SDMA_PKT_COPY_PHYSICAL_LINEAR_HEADER_cpv_shift)
+
+/*define for COUNT word*/
+/*define for count field*/
+#define SDMA_PKT_COPY_PHYSICAL_LINEAR_COUNT_count_offset 1
+#define SDMA_PKT_COPY_PHYSICAL_LINEAR_COUNT_count_mask 0x003FFFFF
+#define SDMA_PKT_COPY_PHYSICAL_LINEAR_COUNT_count_shift 0
+#define SDMA_PKT_COPY_PHYSICAL_LINEAR_COUNT_COUNT(x) (((x) & SDMA_PKT_COPY_PHYSICAL_LINEAR_COUNT_count_mask) << SDMA_PKT_COPY_PHYSICAL_LINEAR_COUNT_count_shift)
+
+/*define for addr_pair_num field*/
+#define SDMA_PKT_COPY_PHYSICAL_LINEAR_COUNT_addr_pair_num_offset 1
+#define SDMA_PKT_COPY_PHYSICAL_LINEAR_COUNT_addr_pair_num_mask 0x000000FF
+#define SDMA_PKT_COPY_PHYSICAL_LINEAR_COUNT_addr_pair_num_shift 24
+#define SDMA_PKT_COPY_PHYSICAL_LINEAR_COUNT_ADDR_PAIR_NUM(x) (((x) & SDMA_PKT_COPY_PHYSICAL_LINEAR_COUNT_addr_pair_num_mask) << SDMA_PKT_COPY_PHYSICAL_LINEAR_COUNT_addr_pair_num_shift)
+
+/*define for PARAMETER word*/
+/*define for dst_mtype field*/
+#define SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_dst_mtype_offset 2
+#define SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_dst_mtype_mask 0x00000007
+#define SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_dst_mtype_shift 3
+#define SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_DST_MTYPE(x) (((x) & SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_dst_mtype_mask) << SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_dst_mtype_shift)
+
+/*define for dst_l2_policy field*/
+#define SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_dst_l2_policy_offset 2
+#define SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_dst_l2_policy_mask 0x00000003
+#define SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_dst_l2_policy_shift 6
+#define SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_DST_L2_POLICY(x) (((x) & SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_dst_l2_policy_mask) << SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_dst_l2_policy_shift)
+
+/*define for dst_llc field*/
+#define SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_dst_llc_offset 2
+#define SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_dst_llc_mask 0x00000001
+#define SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_dst_llc_shift 8
+#define SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_DST_LLC(x) (((x) & SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_dst_llc_mask) << SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_dst_llc_shift)
+
+/*define for src_mtype field*/
+#define SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_src_mtype_offset 2
+#define SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_src_mtype_mask 0x00000007
+#define SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_src_mtype_shift 11
+#define SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_SRC_MTYPE(x) (((x) & SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_src_mtype_mask) << SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_src_mtype_shift)
+
+/*define for src_l2_policy field*/
+#define SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_src_l2_policy_offset 2
+#define SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_src_l2_policy_mask 0x00000003
+#define SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_src_l2_policy_shift 14
+#define SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_SRC_L2_POLICY(x) (((x) & SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_src_l2_policy_mask) << SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_src_l2_policy_shift)
+
+/*define for src_llc field*/
+#define SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_src_llc_offset 2
+#define SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_src_llc_mask 0x00000001
+#define SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_src_llc_shift 16
+#define SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_SRC_LLC(x) (((x) & SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_src_llc_mask) << SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_src_llc_shift)
+
+/*define for dst_sw field*/
+#define SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_dst_sw_offset 2
+#define SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_dst_sw_mask 0x00000003
+#define SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_dst_sw_shift 17
+#define SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_DST_SW(x) (((x) & SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_dst_sw_mask) << SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_dst_sw_shift)
+
+/*define for dst_gcc field*/
+#define SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_dst_gcc_offset 2
+#define SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_dst_gcc_mask 0x00000001
+#define SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_dst_gcc_shift 19
+#define SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_DST_GCC(x) (((x) & SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_dst_gcc_mask) << SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_dst_gcc_shift)
+
+/*define for dst_sys field*/
+#define SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_dst_sys_offset 2
+#define SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_dst_sys_mask 0x00000001
+#define SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_dst_sys_shift 20
+#define SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_DST_SYS(x) (((x) & SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_dst_sys_mask) << SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_dst_sys_shift)
+
+/*define for dst_log field*/
+#define SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_dst_log_offset 2
+#define SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_dst_log_mask 0x00000001
+#define SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_dst_log_shift 21
+#define SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_DST_LOG(x) (((x) & SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_dst_log_mask) << SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_dst_log_shift)
+
+/*define for dst_snoop field*/
+#define SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_dst_snoop_offset 2
+#define SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_dst_snoop_mask 0x00000001
+#define SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_dst_snoop_shift 22
+#define SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_DST_SNOOP(x) (((x) & SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_dst_snoop_mask) << SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_dst_snoop_shift)
+
+/*define for dst_gpa field*/
+#define SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_dst_gpa_offset 2
+#define SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_dst_gpa_mask 0x00000001
+#define SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_dst_gpa_shift 23
+#define SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_DST_GPA(x) (((x) & SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_dst_gpa_mask) << SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_dst_gpa_shift)
+
+/*define for src_sw field*/
+#define SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_src_sw_offset 2
+#define SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_src_sw_mask 0x00000003
+#define SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_src_sw_shift 24
+#define SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_SRC_SW(x) (((x) & SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_src_sw_mask) << SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_src_sw_shift)
+
+/*define for src_gcc field*/
+#define SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_src_gcc_offset 2
+#define SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_src_gcc_mask 0x00000001
+#define SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_src_gcc_shift 27
+#define SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_SRC_GCC(x) (((x) & SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_src_gcc_mask) << SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_src_gcc_shift)
+
+/*define for src_sys field*/
+#define SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_src_sys_offset 2
+#define SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_src_sys_mask 0x00000001
+#define SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_src_sys_shift 28
+#define SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_SRC_SYS(x) (((x) & SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_src_sys_mask) << SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_src_sys_shift)
+
+/*define for src_snoop field*/
+#define SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_src_snoop_offset 2
+#define SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_src_snoop_mask 0x00000001
+#define SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_src_snoop_shift 30
+#define SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_SRC_SNOOP(x) (((x) & SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_src_snoop_mask) << SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_src_snoop_shift)
+
+/*define for src_gpa field*/
+#define SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_src_gpa_offset 2
+#define SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_src_gpa_mask 0x00000001
+#define SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_src_gpa_shift 31
+#define SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_SRC_GPA(x) (((x) & SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_src_gpa_mask) << SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_src_gpa_shift)
+
+/*define for SRC_ADDR_LO word*/
+/*define for src_addr_31_0 field*/
+#define SDMA_PKT_COPY_PHYSICAL_LINEAR_SRC_ADDR_LO_src_addr_31_0_offset 3
+#define SDMA_PKT_COPY_PHYSICAL_LINEAR_SRC_ADDR_LO_src_addr_31_0_mask 0xFFFFFFFF
+#define SDMA_PKT_COPY_PHYSICAL_LINEAR_SRC_ADDR_LO_src_addr_31_0_shift 0
+#define SDMA_PKT_COPY_PHYSICAL_LINEAR_SRC_ADDR_LO_SRC_ADDR_31_0(x) (((x) & SDMA_PKT_COPY_PHYSICAL_LINEAR_SRC_ADDR_LO_src_addr_31_0_mask) << SDMA_PKT_COPY_PHYSICAL_LINEAR_SRC_ADDR_LO_src_addr_31_0_shift)
+
+/*define for SRC_ADDR_HI word*/
+/*define for src_addr_63_32 field*/
+#define SDMA_PKT_COPY_PHYSICAL_LINEAR_SRC_ADDR_HI_src_addr_63_32_offset 4
+#define SDMA_PKT_COPY_PHYSICAL_LINEAR_SRC_ADDR_HI_src_addr_63_32_mask 0xFFFFFFFF
+#define SDMA_PKT_COPY_PHYSICAL_LINEAR_SRC_ADDR_HI_src_addr_63_32_shift 0
+#define SDMA_PKT_COPY_PHYSICAL_LINEAR_SRC_ADDR_HI_SRC_ADDR_63_32(x) (((x) & SDMA_PKT_COPY_PHYSICAL_LINEAR_SRC_ADDR_HI_src_addr_63_32_mask) << SDMA_PKT_COPY_PHYSICAL_LINEAR_SRC_ADDR_HI_src_addr_63_32_shift)
+
+/*define for DST_ADDR_LO word*/
+/*define for dst_addr_31_0 field*/
+#define SDMA_PKT_COPY_PHYSICAL_LINEAR_DST_ADDR_LO_dst_addr_31_0_offset 5
+#define SDMA_PKT_COPY_PHYSICAL_LINEAR_DST_ADDR_LO_dst_addr_31_0_mask 0xFFFFFFFF
+#define SDMA_PKT_COPY_PHYSICAL_LINEAR_DST_ADDR_LO_dst_addr_31_0_shift 0
+#define SDMA_PKT_COPY_PHYSICAL_LINEAR_DST_ADDR_LO_DST_ADDR_31_0(x) (((x) & SDMA_PKT_COPY_PHYSICAL_LINEAR_DST_ADDR_LO_dst_addr_31_0_mask) << SDMA_PKT_COPY_PHYSICAL_LINEAR_DST_ADDR_LO_dst_addr_31_0_shift)
+
+/*define for DST_ADDR_HI word*/
+/*define for dst_addr_63_32 field*/
+#define SDMA_PKT_COPY_PHYSICAL_LINEAR_DST_ADDR_HI_dst_addr_63_32_offset 6
+#define SDMA_PKT_COPY_PHYSICAL_LINEAR_DST_ADDR_HI_dst_addr_63_32_mask 0xFFFFFFFF
+#define SDMA_PKT_COPY_PHYSICAL_LINEAR_DST_ADDR_HI_dst_addr_63_32_shift 0
+#define SDMA_PKT_COPY_PHYSICAL_LINEAR_DST_ADDR_HI_DST_ADDR_63_32(x) (((x) & SDMA_PKT_COPY_PHYSICAL_LINEAR_DST_ADDR_HI_dst_addr_63_32_mask) << SDMA_PKT_COPY_PHYSICAL_LINEAR_DST_ADDR_HI_dst_addr_63_32_shift)
+
+
+/*
+** Definitions for SDMA_PKT_COPY_BROADCAST_LINEAR packet
+*/
+
+/*define for HEADER word*/
+/*define for op field*/
+#define SDMA_PKT_COPY_BROADCAST_LINEAR_HEADER_op_offset 0
+#define SDMA_PKT_COPY_BROADCAST_LINEAR_HEADER_op_mask 0x000000FF
+#define SDMA_PKT_COPY_BROADCAST_LINEAR_HEADER_op_shift 0
+#define SDMA_PKT_COPY_BROADCAST_LINEAR_HEADER_OP(x) (((x) & SDMA_PKT_COPY_BROADCAST_LINEAR_HEADER_op_mask) << SDMA_PKT_COPY_BROADCAST_LINEAR_HEADER_op_shift)
+
+/*define for sub_op field*/
+#define SDMA_PKT_COPY_BROADCAST_LINEAR_HEADER_sub_op_offset 0
+#define SDMA_PKT_COPY_BROADCAST_LINEAR_HEADER_sub_op_mask 0x000000FF
+#define SDMA_PKT_COPY_BROADCAST_LINEAR_HEADER_sub_op_shift 8
+#define SDMA_PKT_COPY_BROADCAST_LINEAR_HEADER_SUB_OP(x) (((x) & SDMA_PKT_COPY_BROADCAST_LINEAR_HEADER_sub_op_mask) << SDMA_PKT_COPY_BROADCAST_LINEAR_HEADER_sub_op_shift)
+
+/*define for encrypt field*/
+#define SDMA_PKT_COPY_BROADCAST_LINEAR_HEADER_encrypt_offset 0
+#define SDMA_PKT_COPY_BROADCAST_LINEAR_HEADER_encrypt_mask 0x00000001
+#define SDMA_PKT_COPY_BROADCAST_LINEAR_HEADER_encrypt_shift 16
+#define SDMA_PKT_COPY_BROADCAST_LINEAR_HEADER_ENCRYPT(x) (((x) & SDMA_PKT_COPY_BROADCAST_LINEAR_HEADER_encrypt_mask) << SDMA_PKT_COPY_BROADCAST_LINEAR_HEADER_encrypt_shift)
+
+/*define for tmz field*/
+#define SDMA_PKT_COPY_BROADCAST_LINEAR_HEADER_tmz_offset 0
+#define SDMA_PKT_COPY_BROADCAST_LINEAR_HEADER_tmz_mask 0x00000001
+#define SDMA_PKT_COPY_BROADCAST_LINEAR_HEADER_tmz_shift 18
+#define SDMA_PKT_COPY_BROADCAST_LINEAR_HEADER_TMZ(x) (((x) & SDMA_PKT_COPY_BROADCAST_LINEAR_HEADER_tmz_mask) << SDMA_PKT_COPY_BROADCAST_LINEAR_HEADER_tmz_shift)
+
+/*define for cpv field*/
+#define SDMA_PKT_COPY_BROADCAST_LINEAR_HEADER_cpv_offset 0
+#define SDMA_PKT_COPY_BROADCAST_LINEAR_HEADER_cpv_mask 0x00000001
+#define SDMA_PKT_COPY_BROADCAST_LINEAR_HEADER_cpv_shift 19
+#define SDMA_PKT_COPY_BROADCAST_LINEAR_HEADER_CPV(x) (((x) & SDMA_PKT_COPY_BROADCAST_LINEAR_HEADER_cpv_mask) << SDMA_PKT_COPY_BROADCAST_LINEAR_HEADER_cpv_shift)
+
+/*define for broadcast field*/
+#define SDMA_PKT_COPY_BROADCAST_LINEAR_HEADER_broadcast_offset 0
+#define SDMA_PKT_COPY_BROADCAST_LINEAR_HEADER_broadcast_mask 0x00000001
+#define SDMA_PKT_COPY_BROADCAST_LINEAR_HEADER_broadcast_shift 27
+#define SDMA_PKT_COPY_BROADCAST_LINEAR_HEADER_BROADCAST(x) (((x) & SDMA_PKT_COPY_BROADCAST_LINEAR_HEADER_broadcast_mask) << SDMA_PKT_COPY_BROADCAST_LINEAR_HEADER_broadcast_shift)
+
+/*define for COUNT word*/
+/*define for count field*/
+#define SDMA_PKT_COPY_BROADCAST_LINEAR_COUNT_count_offset 1
+#define SDMA_PKT_COPY_BROADCAST_LINEAR_COUNT_count_mask 0x3FFFFFFF
+#define SDMA_PKT_COPY_BROADCAST_LINEAR_COUNT_count_shift 0
+#define SDMA_PKT_COPY_BROADCAST_LINEAR_COUNT_COUNT(x) (((x) & SDMA_PKT_COPY_BROADCAST_LINEAR_COUNT_count_mask) << SDMA_PKT_COPY_BROADCAST_LINEAR_COUNT_count_shift)
+
+/*define for PARAMETER word*/
+/*define for dst2_sw field*/
+#define SDMA_PKT_COPY_BROADCAST_LINEAR_PARAMETER_dst2_sw_offset 2
+#define SDMA_PKT_COPY_BROADCAST_LINEAR_PARAMETER_dst2_sw_mask 0x00000003
+#define SDMA_PKT_COPY_BROADCAST_LINEAR_PARAMETER_dst2_sw_shift 8
+#define SDMA_PKT_COPY_BROADCAST_LINEAR_PARAMETER_DST2_SW(x) (((x) & SDMA_PKT_COPY_BROADCAST_LINEAR_PARAMETER_dst2_sw_mask) << SDMA_PKT_COPY_BROADCAST_LINEAR_PARAMETER_dst2_sw_shift)
+
+/*define for dst2_cache_policy field*/
+#define SDMA_PKT_COPY_BROADCAST_LINEAR_PARAMETER_dst2_cache_policy_offset 2
+#define SDMA_PKT_COPY_BROADCAST_LINEAR_PARAMETER_dst2_cache_policy_mask 0x00000007
+#define SDMA_PKT_COPY_BROADCAST_LINEAR_PARAMETER_dst2_cache_policy_shift 10
+#define SDMA_PKT_COPY_BROADCAST_LINEAR_PARAMETER_DST2_CACHE_POLICY(x) (((x) & SDMA_PKT_COPY_BROADCAST_LINEAR_PARAMETER_dst2_cache_policy_mask) << SDMA_PKT_COPY_BROADCAST_LINEAR_PARAMETER_dst2_cache_policy_shift)
+
+/*define for dst1_sw field*/
+#define SDMA_PKT_COPY_BROADCAST_LINEAR_PARAMETER_dst1_sw_offset 2
+#define SDMA_PKT_COPY_BROADCAST_LINEAR_PARAMETER_dst1_sw_mask 0x00000003
+#define SDMA_PKT_COPY_BROADCAST_LINEAR_PARAMETER_dst1_sw_shift 16
+#define SDMA_PKT_COPY_BROADCAST_LINEAR_PARAMETER_DST1_SW(x) (((x) & SDMA_PKT_COPY_BROADCAST_LINEAR_PARAMETER_dst1_sw_mask) << SDMA_PKT_COPY_BROADCAST_LINEAR_PARAMETER_dst1_sw_shift)
+
+/*define for dst1_cache_policy field*/
+#define SDMA_PKT_COPY_BROADCAST_LINEAR_PARAMETER_dst1_cache_policy_offset 2
+#define SDMA_PKT_COPY_BROADCAST_LINEAR_PARAMETER_dst1_cache_policy_mask 0x00000007
+#define SDMA_PKT_COPY_BROADCAST_LINEAR_PARAMETER_dst1_cache_policy_shift 18
+#define SDMA_PKT_COPY_BROADCAST_LINEAR_PARAMETER_DST1_CACHE_POLICY(x) (((x) & SDMA_PKT_COPY_BROADCAST_LINEAR_PARAMETER_dst1_cache_policy_mask) << SDMA_PKT_COPY_BROADCAST_LINEAR_PARAMETER_dst1_cache_policy_shift)
+
+/*define for src_sw field*/
+#define SDMA_PKT_COPY_BROADCAST_LINEAR_PARAMETER_src_sw_offset 2
+#define SDMA_PKT_COPY_BROADCAST_LINEAR_PARAMETER_src_sw_mask 0x00000003
+#define SDMA_PKT_COPY_BROADCAST_LINEAR_PARAMETER_src_sw_shift 24
+#define SDMA_PKT_COPY_BROADCAST_LINEAR_PARAMETER_SRC_SW(x) (((x) & SDMA_PKT_COPY_BROADCAST_LINEAR_PARAMETER_src_sw_mask) << SDMA_PKT_COPY_BROADCAST_LINEAR_PARAMETER_src_sw_shift)
+
+/*define for src_cache_policy field*/
+#define SDMA_PKT_COPY_BROADCAST_LINEAR_PARAMETER_src_cache_policy_offset 2
+#define SDMA_PKT_COPY_BROADCAST_LINEAR_PARAMETER_src_cache_policy_mask 0x00000007
+#define SDMA_PKT_COPY_BROADCAST_LINEAR_PARAMETER_src_cache_policy_shift 26
+#define SDMA_PKT_COPY_BROADCAST_LINEAR_PARAMETER_SRC_CACHE_POLICY(x) (((x) & SDMA_PKT_COPY_BROADCAST_LINEAR_PARAMETER_src_cache_policy_mask) << SDMA_PKT_COPY_BROADCAST_LINEAR_PARAMETER_src_cache_policy_shift)
+
+/*define for SRC_ADDR_LO word*/
+/*define for src_addr_31_0 field*/
+#define SDMA_PKT_COPY_BROADCAST_LINEAR_SRC_ADDR_LO_src_addr_31_0_offset 3
+#define SDMA_PKT_COPY_BROADCAST_LINEAR_SRC_ADDR_LO_src_addr_31_0_mask 0xFFFFFFFF
+#define SDMA_PKT_COPY_BROADCAST_LINEAR_SRC_ADDR_LO_src_addr_31_0_shift 0
+#define SDMA_PKT_COPY_BROADCAST_LINEAR_SRC_ADDR_LO_SRC_ADDR_31_0(x) (((x) & SDMA_PKT_COPY_BROADCAST_LINEAR_SRC_ADDR_LO_src_addr_31_0_mask) << SDMA_PKT_COPY_BROADCAST_LINEAR_SRC_ADDR_LO_src_addr_31_0_shift)
+
+/*define for SRC_ADDR_HI word*/
+/*define for src_addr_63_32 field*/
+#define SDMA_PKT_COPY_BROADCAST_LINEAR_SRC_ADDR_HI_src_addr_63_32_offset 4
+#define SDMA_PKT_COPY_BROADCAST_LINEAR_SRC_ADDR_HI_src_addr_63_32_mask 0xFFFFFFFF
+#define SDMA_PKT_COPY_BROADCAST_LINEAR_SRC_ADDR_HI_src_addr_63_32_shift 0
+#define SDMA_PKT_COPY_BROADCAST_LINEAR_SRC_ADDR_HI_SRC_ADDR_63_32(x) (((x) & SDMA_PKT_COPY_BROADCAST_LINEAR_SRC_ADDR_HI_src_addr_63_32_mask) << SDMA_PKT_COPY_BROADCAST_LINEAR_SRC_ADDR_HI_src_addr_63_32_shift)
+
+/*define for DST1_ADDR_LO word*/
+/*define for dst1_addr_31_0 field*/
+#define SDMA_PKT_COPY_BROADCAST_LINEAR_DST1_ADDR_LO_dst1_addr_31_0_offset 5
+#define SDMA_PKT_COPY_BROADCAST_LINEAR_DST1_ADDR_LO_dst1_addr_31_0_mask 0xFFFFFFFF
+#define SDMA_PKT_COPY_BROADCAST_LINEAR_DST1_ADDR_LO_dst1_addr_31_0_shift 0
+#define SDMA_PKT_COPY_BROADCAST_LINEAR_DST1_ADDR_LO_DST1_ADDR_31_0(x) (((x) & SDMA_PKT_COPY_BROADCAST_LINEAR_DST1_ADDR_LO_dst1_addr_31_0_mask) << SDMA_PKT_COPY_BROADCAST_LINEAR_DST1_ADDR_LO_dst1_addr_31_0_shift)
+
+/*define for DST1_ADDR_HI word*/
+/*define for dst1_addr_63_32 field*/
+#define SDMA_PKT_COPY_BROADCAST_LINEAR_DST1_ADDR_HI_dst1_addr_63_32_offset 6
+#define SDMA_PKT_COPY_BROADCAST_LINEAR_DST1_ADDR_HI_dst1_addr_63_32_mask 0xFFFFFFFF
+#define SDMA_PKT_COPY_BROADCAST_LINEAR_DST1_ADDR_HI_dst1_addr_63_32_shift 0
+#define SDMA_PKT_COPY_BROADCAST_LINEAR_DST1_ADDR_HI_DST1_ADDR_63_32(x) (((x) & SDMA_PKT_COPY_BROADCAST_LINEAR_DST1_ADDR_HI_dst1_addr_63_32_mask) << SDMA_PKT_COPY_BROADCAST_LINEAR_DST1_ADDR_HI_dst1_addr_63_32_shift)
+
+/*define for DST2_ADDR_LO word*/
+/*define for dst2_addr_31_0 field*/
+#define SDMA_PKT_COPY_BROADCAST_LINEAR_DST2_ADDR_LO_dst2_addr_31_0_offset 7
+#define SDMA_PKT_COPY_BROADCAST_LINEAR_DST2_ADDR_LO_dst2_addr_31_0_mask 0xFFFFFFFF
+#define SDMA_PKT_COPY_BROADCAST_LINEAR_DST2_ADDR_LO_dst2_addr_31_0_shift 0
+#define SDMA_PKT_COPY_BROADCAST_LINEAR_DST2_ADDR_LO_DST2_ADDR_31_0(x) (((x) & SDMA_PKT_COPY_BROADCAST_LINEAR_DST2_ADDR_LO_dst2_addr_31_0_mask) << SDMA_PKT_COPY_BROADCAST_LINEAR_DST2_ADDR_LO_dst2_addr_31_0_shift)
+
+/*define for DST2_ADDR_HI word*/
+/*define for dst2_addr_63_32 field*/
+#define SDMA_PKT_COPY_BROADCAST_LINEAR_DST2_ADDR_HI_dst2_addr_63_32_offset 8
+#define SDMA_PKT_COPY_BROADCAST_LINEAR_DST2_ADDR_HI_dst2_addr_63_32_mask 0xFFFFFFFF
+#define SDMA_PKT_COPY_BROADCAST_LINEAR_DST2_ADDR_HI_dst2_addr_63_32_shift 0
+#define SDMA_PKT_COPY_BROADCAST_LINEAR_DST2_ADDR_HI_DST2_ADDR_63_32(x) (((x) & SDMA_PKT_COPY_BROADCAST_LINEAR_DST2_ADDR_HI_dst2_addr_63_32_mask) << SDMA_PKT_COPY_BROADCAST_LINEAR_DST2_ADDR_HI_dst2_addr_63_32_shift)
+
+
+/*
+** Definitions for SDMA_PKT_COPY_LINEAR_SUBWIN packet
+*/
+
+/*define for HEADER word*/
+/*define for op field*/
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_HEADER_op_offset 0
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_HEADER_op_mask 0x000000FF
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_HEADER_op_shift 0
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_HEADER_OP(x) (((x) & SDMA_PKT_COPY_LINEAR_SUBWIN_HEADER_op_mask) << SDMA_PKT_COPY_LINEAR_SUBWIN_HEADER_op_shift)
+
+/*define for sub_op field*/
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_HEADER_sub_op_offset 0
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_HEADER_sub_op_mask 0x000000FF
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_HEADER_sub_op_shift 8
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_HEADER_SUB_OP(x) (((x) & SDMA_PKT_COPY_LINEAR_SUBWIN_HEADER_sub_op_mask) << SDMA_PKT_COPY_LINEAR_SUBWIN_HEADER_sub_op_shift)
+
+/*define for tmz field*/
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_HEADER_tmz_offset 0
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_HEADER_tmz_mask 0x00000001
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_HEADER_tmz_shift 18
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_HEADER_TMZ(x) (((x) & SDMA_PKT_COPY_LINEAR_SUBWIN_HEADER_tmz_mask) << SDMA_PKT_COPY_LINEAR_SUBWIN_HEADER_tmz_shift)
+
+/*define for cpv field*/
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_HEADER_cpv_offset 0
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_HEADER_cpv_mask 0x00000001
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_HEADER_cpv_shift 19
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_HEADER_CPV(x) (((x) & SDMA_PKT_COPY_LINEAR_SUBWIN_HEADER_cpv_mask) << SDMA_PKT_COPY_LINEAR_SUBWIN_HEADER_cpv_shift)
+
+/*define for elementsize field*/
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_HEADER_elementsize_offset 0
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_HEADER_elementsize_mask 0x00000007
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_HEADER_elementsize_shift 29
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_HEADER_ELEMENTSIZE(x) (((x) & SDMA_PKT_COPY_LINEAR_SUBWIN_HEADER_elementsize_mask) << SDMA_PKT_COPY_LINEAR_SUBWIN_HEADER_elementsize_shift)
+
+/*define for SRC_ADDR_LO word*/
+/*define for src_addr_31_0 field*/
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_SRC_ADDR_LO_src_addr_31_0_offset 1
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_SRC_ADDR_LO_src_addr_31_0_mask 0xFFFFFFFF
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_SRC_ADDR_LO_src_addr_31_0_shift 0
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_SRC_ADDR_LO_SRC_ADDR_31_0(x) (((x) & SDMA_PKT_COPY_LINEAR_SUBWIN_SRC_ADDR_LO_src_addr_31_0_mask) << SDMA_PKT_COPY_LINEAR_SUBWIN_SRC_ADDR_LO_src_addr_31_0_shift)
+
+/*define for SRC_ADDR_HI word*/
+/*define for src_addr_63_32 field*/
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_SRC_ADDR_HI_src_addr_63_32_offset 2
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_SRC_ADDR_HI_src_addr_63_32_mask 0xFFFFFFFF
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_SRC_ADDR_HI_src_addr_63_32_shift 0
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_SRC_ADDR_HI_SRC_ADDR_63_32(x) (((x) & SDMA_PKT_COPY_LINEAR_SUBWIN_SRC_ADDR_HI_src_addr_63_32_mask) << SDMA_PKT_COPY_LINEAR_SUBWIN_SRC_ADDR_HI_src_addr_63_32_shift)
+
+/*define for DW_3 word*/
+/*define for src_x field*/
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_3_src_x_offset 3
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_3_src_x_mask 0x00003FFF
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_3_src_x_shift 0
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_3_SRC_X(x) (((x) & SDMA_PKT_COPY_LINEAR_SUBWIN_DW_3_src_x_mask) << SDMA_PKT_COPY_LINEAR_SUBWIN_DW_3_src_x_shift)
+
+/*define for src_y field*/
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_3_src_y_offset 3
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_3_src_y_mask 0x00003FFF
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_3_src_y_shift 16
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_3_SRC_Y(x) (((x) & SDMA_PKT_COPY_LINEAR_SUBWIN_DW_3_src_y_mask) << SDMA_PKT_COPY_LINEAR_SUBWIN_DW_3_src_y_shift)
+
+/*define for DW_4 word*/
+/*define for src_z field*/
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_4_src_z_offset 4
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_4_src_z_mask 0x00001FFF
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_4_src_z_shift 0
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_4_SRC_Z(x) (((x) & SDMA_PKT_COPY_LINEAR_SUBWIN_DW_4_src_z_mask) << SDMA_PKT_COPY_LINEAR_SUBWIN_DW_4_src_z_shift)
+
+/*define for src_pitch field*/
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_4_src_pitch_offset 4
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_4_src_pitch_mask 0x0007FFFF
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_4_src_pitch_shift 13
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_4_SRC_PITCH(x) (((x) & SDMA_PKT_COPY_LINEAR_SUBWIN_DW_4_src_pitch_mask) << SDMA_PKT_COPY_LINEAR_SUBWIN_DW_4_src_pitch_shift)
+
+/*define for DW_5 word*/
+/*define for src_slice_pitch field*/
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_5_src_slice_pitch_offset 5
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_5_src_slice_pitch_mask 0x0FFFFFFF
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_5_src_slice_pitch_shift 0
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_5_SRC_SLICE_PITCH(x) (((x) & SDMA_PKT_COPY_LINEAR_SUBWIN_DW_5_src_slice_pitch_mask) << SDMA_PKT_COPY_LINEAR_SUBWIN_DW_5_src_slice_pitch_shift)
+
+/*define for DST_ADDR_LO word*/
+/*define for dst_addr_31_0 field*/
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_DST_ADDR_LO_dst_addr_31_0_offset 6
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_DST_ADDR_LO_dst_addr_31_0_mask 0xFFFFFFFF
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_DST_ADDR_LO_dst_addr_31_0_shift 0
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_DST_ADDR_LO_DST_ADDR_31_0(x) (((x) & SDMA_PKT_COPY_LINEAR_SUBWIN_DST_ADDR_LO_dst_addr_31_0_mask) << SDMA_PKT_COPY_LINEAR_SUBWIN_DST_ADDR_LO_dst_addr_31_0_shift)
+
+/*define for DST_ADDR_HI word*/
+/*define for dst_addr_63_32 field*/
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_DST_ADDR_HI_dst_addr_63_32_offset 7
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_DST_ADDR_HI_dst_addr_63_32_mask 0xFFFFFFFF
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_DST_ADDR_HI_dst_addr_63_32_shift 0
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_DST_ADDR_HI_DST_ADDR_63_32(x) (((x) & SDMA_PKT_COPY_LINEAR_SUBWIN_DST_ADDR_HI_dst_addr_63_32_mask) << SDMA_PKT_COPY_LINEAR_SUBWIN_DST_ADDR_HI_dst_addr_63_32_shift)
+
+/*define for DW_8 word*/
+/*define for dst_x field*/
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_8_dst_x_offset 8
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_8_dst_x_mask 0x00003FFF
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_8_dst_x_shift 0
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_8_DST_X(x) (((x) & SDMA_PKT_COPY_LINEAR_SUBWIN_DW_8_dst_x_mask) << SDMA_PKT_COPY_LINEAR_SUBWIN_DW_8_dst_x_shift)
+
+/*define for dst_y field*/
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_8_dst_y_offset 8
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_8_dst_y_mask 0x00003FFF
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_8_dst_y_shift 16
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_8_DST_Y(x) (((x) & SDMA_PKT_COPY_LINEAR_SUBWIN_DW_8_dst_y_mask) << SDMA_PKT_COPY_LINEAR_SUBWIN_DW_8_dst_y_shift)
+
+/*define for DW_9 word*/
+/*define for dst_z field*/
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_9_dst_z_offset 9
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_9_dst_z_mask 0x00001FFF
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_9_dst_z_shift 0
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_9_DST_Z(x) (((x) & SDMA_PKT_COPY_LINEAR_SUBWIN_DW_9_dst_z_mask) << SDMA_PKT_COPY_LINEAR_SUBWIN_DW_9_dst_z_shift)
+
+/*define for dst_pitch field*/
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_9_dst_pitch_offset 9
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_9_dst_pitch_mask 0x0007FFFF
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_9_dst_pitch_shift 13
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_9_DST_PITCH(x) (((x) & SDMA_PKT_COPY_LINEAR_SUBWIN_DW_9_dst_pitch_mask) << SDMA_PKT_COPY_LINEAR_SUBWIN_DW_9_dst_pitch_shift)
+
+/*define for DW_10 word*/
+/*define for dst_slice_pitch field*/
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_10_dst_slice_pitch_offset 10
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_10_dst_slice_pitch_mask 0x0FFFFFFF
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_10_dst_slice_pitch_shift 0
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_10_DST_SLICE_PITCH(x) (((x) & SDMA_PKT_COPY_LINEAR_SUBWIN_DW_10_dst_slice_pitch_mask) << SDMA_PKT_COPY_LINEAR_SUBWIN_DW_10_dst_slice_pitch_shift)
+
+/*define for DW_11 word*/
+/*define for rect_x field*/
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_11_rect_x_offset 11
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_11_rect_x_mask 0x00003FFF
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_11_rect_x_shift 0
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_11_RECT_X(x) (((x) & SDMA_PKT_COPY_LINEAR_SUBWIN_DW_11_rect_x_mask) << SDMA_PKT_COPY_LINEAR_SUBWIN_DW_11_rect_x_shift)
+
+/*define for rect_y field*/
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_11_rect_y_offset 11
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_11_rect_y_mask 0x00003FFF
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_11_rect_y_shift 16
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_11_RECT_Y(x) (((x) & SDMA_PKT_COPY_LINEAR_SUBWIN_DW_11_rect_y_mask) << SDMA_PKT_COPY_LINEAR_SUBWIN_DW_11_rect_y_shift)
+
+/*define for DW_12 word*/
+/*define for rect_z field*/
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_12_rect_z_offset 12
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_12_rect_z_mask 0x00001FFF
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_12_rect_z_shift 0
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_12_RECT_Z(x) (((x) & SDMA_PKT_COPY_LINEAR_SUBWIN_DW_12_rect_z_mask) << SDMA_PKT_COPY_LINEAR_SUBWIN_DW_12_rect_z_shift)
+
+/*define for dst_sw field*/
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_12_dst_sw_offset 12
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_12_dst_sw_mask 0x00000003
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_12_dst_sw_shift 16
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_12_DST_SW(x) (((x) & SDMA_PKT_COPY_LINEAR_SUBWIN_DW_12_dst_sw_mask) << SDMA_PKT_COPY_LINEAR_SUBWIN_DW_12_dst_sw_shift)
+
+/*define for dst_cache_policy field*/
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_12_dst_cache_policy_offset 12
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_12_dst_cache_policy_mask 0x00000007
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_12_dst_cache_policy_shift 18
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_12_DST_CACHE_POLICY(x) (((x) & SDMA_PKT_COPY_LINEAR_SUBWIN_DW_12_dst_cache_policy_mask) << SDMA_PKT_COPY_LINEAR_SUBWIN_DW_12_dst_cache_policy_shift)
+
+/*define for src_sw field*/
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_12_src_sw_offset 12
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_12_src_sw_mask 0x00000003
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_12_src_sw_shift 24
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_12_SRC_SW(x) (((x) & SDMA_PKT_COPY_LINEAR_SUBWIN_DW_12_src_sw_mask) << SDMA_PKT_COPY_LINEAR_SUBWIN_DW_12_src_sw_shift)
+
+/*define for src_cache_policy field*/
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_12_src_cache_policy_offset 12
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_12_src_cache_policy_mask 0x00000007
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_12_src_cache_policy_shift 26
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_12_SRC_CACHE_POLICY(x) (((x) & SDMA_PKT_COPY_LINEAR_SUBWIN_DW_12_src_cache_policy_mask) << SDMA_PKT_COPY_LINEAR_SUBWIN_DW_12_src_cache_policy_shift)
+
+
+/*
+** Definitions for SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE packet
+*/
+
+/*define for HEADER word*/
+/*define for op field*/
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_HEADER_op_offset 0
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_HEADER_op_mask 0x000000FF
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_HEADER_op_shift 0
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_HEADER_OP(x) (((x) & SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_HEADER_op_mask) << SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_HEADER_op_shift)
+
+/*define for sub_op field*/
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_HEADER_sub_op_offset 0
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_HEADER_sub_op_mask 0x000000FF
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_HEADER_sub_op_shift 8
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_HEADER_SUB_OP(x) (((x) & SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_HEADER_sub_op_mask) << SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_HEADER_sub_op_shift)
+
+/*define for tmz field*/
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_HEADER_tmz_offset 0
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_HEADER_tmz_mask 0x00000001
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_HEADER_tmz_shift 18
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_HEADER_TMZ(x) (((x) & SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_HEADER_tmz_mask) << SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_HEADER_tmz_shift)
+
+/*define for cpv field*/
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_HEADER_cpv_offset 0
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_HEADER_cpv_mask 0x00000001
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_HEADER_cpv_shift 19
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_HEADER_CPV(x) (((x) & SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_HEADER_cpv_mask) << SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_HEADER_cpv_shift)
+
+/*define for SRC_ADDR_LO word*/
+/*define for src_addr_31_0 field*/
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_SRC_ADDR_LO_src_addr_31_0_offset 1
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_SRC_ADDR_LO_src_addr_31_0_mask 0xFFFFFFFF
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_SRC_ADDR_LO_src_addr_31_0_shift 0
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_SRC_ADDR_LO_SRC_ADDR_31_0(x) (((x) & SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_SRC_ADDR_LO_src_addr_31_0_mask) << SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_SRC_ADDR_LO_src_addr_31_0_shift)
+
+/*define for SRC_ADDR_HI word*/
+/*define for src_addr_63_32 field*/
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_SRC_ADDR_HI_src_addr_63_32_offset 2
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_SRC_ADDR_HI_src_addr_63_32_mask 0xFFFFFFFF
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_SRC_ADDR_HI_src_addr_63_32_shift 0
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_SRC_ADDR_HI_SRC_ADDR_63_32(x) (((x) & SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_SRC_ADDR_HI_src_addr_63_32_mask) << SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_SRC_ADDR_HI_src_addr_63_32_shift)
+
+/*define for DW_3 word*/
+/*define for src_x field*/
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_DW_3_src_x_offset 3
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_DW_3_src_x_mask 0xFFFFFFFF
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_DW_3_src_x_shift 0
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_DW_3_SRC_X(x) (((x) & SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_DW_3_src_x_mask) << SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_DW_3_src_x_shift)
+
+/*define for DW_4 word*/
+/*define for src_y field*/
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_DW_4_src_y_offset 4
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_DW_4_src_y_mask 0xFFFFFFFF
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_DW_4_src_y_shift 0
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_DW_4_SRC_Y(x) (((x) & SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_DW_4_src_y_mask) << SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_DW_4_src_y_shift)
+
+/*define for DW_5 word*/
+/*define for src_z field*/
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_DW_5_src_z_offset 5
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_DW_5_src_z_mask 0xFFFFFFFF
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_DW_5_src_z_shift 0
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_DW_5_SRC_Z(x) (((x) & SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_DW_5_src_z_mask) << SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_DW_5_src_z_shift)
+
+/*define for DW_6 word*/
+/*define for src_pitch field*/
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_DW_6_src_pitch_offset 6
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_DW_6_src_pitch_mask 0xFFFFFFFF
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_DW_6_src_pitch_shift 0
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_DW_6_SRC_PITCH(x) (((x) & SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_DW_6_src_pitch_mask) << SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_DW_6_src_pitch_shift)
+
+/*define for DW_7 word*/
+/*define for src_slice_pitch_31_0 field*/
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_DW_7_src_slice_pitch_31_0_offset 7
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_DW_7_src_slice_pitch_31_0_mask 0xFFFFFFFF
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_DW_7_src_slice_pitch_31_0_shift 0
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_DW_7_SRC_SLICE_PITCH_31_0(x) (((x) & SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_DW_7_src_slice_pitch_31_0_mask) << SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_DW_7_src_slice_pitch_31_0_shift)
+
+/*define for DW_8 word*/
+/*define for src_slice_pitch_47_32 field*/
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_DW_8_src_slice_pitch_47_32_offset 8
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_DW_8_src_slice_pitch_47_32_mask 0x0000FFFF
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_DW_8_src_slice_pitch_47_32_shift 0
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_DW_8_SRC_SLICE_PITCH_47_32(x) (((x) & SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_DW_8_src_slice_pitch_47_32_mask) << SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_DW_8_src_slice_pitch_47_32_shift)
+
+/*define for DST_ADDR_LO word*/
+/*define for dst_addr_31_0 field*/
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_DST_ADDR_LO_dst_addr_31_0_offset 9
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_DST_ADDR_LO_dst_addr_31_0_mask 0xFFFFFFFF
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_DST_ADDR_LO_dst_addr_31_0_shift 0
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_DST_ADDR_LO_DST_ADDR_31_0(x) (((x) & SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_DST_ADDR_LO_dst_addr_31_0_mask) << SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_DST_ADDR_LO_dst_addr_31_0_shift)
+
+/*define for DST_ADDR_HI word*/
+/*define for dst_addr_63_32 field*/
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_DST_ADDR_HI_dst_addr_63_32_offset 10
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_DST_ADDR_HI_dst_addr_63_32_mask 0xFFFFFFFF
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_DST_ADDR_HI_dst_addr_63_32_shift 0
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_DST_ADDR_HI_DST_ADDR_63_32(x) (((x) & SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_DST_ADDR_HI_dst_addr_63_32_mask) << SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_DST_ADDR_HI_dst_addr_63_32_shift)
+
+/*define for DW_11 word*/
+/*define for dst_x field*/
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_DW_11_dst_x_offset 11
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_DW_11_dst_x_mask 0xFFFFFFFF
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_DW_11_dst_x_shift 0
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_DW_11_DST_X(x) (((x) & SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_DW_11_dst_x_mask) << SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_DW_11_dst_x_shift)
+
+/*define for DW_12 word*/
+/*define for dst_y field*/
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_DW_12_dst_y_offset 12
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_DW_12_dst_y_mask 0xFFFFFFFF
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_DW_12_dst_y_shift 0
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_DW_12_DST_Y(x) (((x) & SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_DW_12_dst_y_mask) << SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_DW_12_dst_y_shift)
+
+/*define for DW_13 word*/
+/*define for dst_z field*/
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_DW_13_dst_z_offset 13
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_DW_13_dst_z_mask 0xFFFFFFFF
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_DW_13_dst_z_shift 0
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_DW_13_DST_Z(x) (((x) & SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_DW_13_dst_z_mask) << SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_DW_13_dst_z_shift)
+
+/*define for DW_14 word*/
+/*define for dst_pitch field*/
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_DW_14_dst_pitch_offset 14
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_DW_14_dst_pitch_mask 0xFFFFFFFF
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_DW_14_dst_pitch_shift 0
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_DW_14_DST_PITCH(x) (((x) & SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_DW_14_dst_pitch_mask) << SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_DW_14_dst_pitch_shift)
+
+/*define for DW_15 word*/
+/*define for dst_slice_pitch_31_0 field*/
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_DW_15_dst_slice_pitch_31_0_offset 15
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_DW_15_dst_slice_pitch_31_0_mask 0xFFFFFFFF
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_DW_15_dst_slice_pitch_31_0_shift 0
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_DW_15_DST_SLICE_PITCH_31_0(x) (((x) & SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_DW_15_dst_slice_pitch_31_0_mask) << SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_DW_15_dst_slice_pitch_31_0_shift)
+
+/*define for DW_16 word*/
+/*define for dst_slice_pitch_47_32 field*/
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_DW_16_dst_slice_pitch_47_32_offset 16
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_DW_16_dst_slice_pitch_47_32_mask 0x0000FFFF
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_DW_16_dst_slice_pitch_47_32_shift 0
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_DW_16_DST_SLICE_PITCH_47_32(x) (((x) & SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_DW_16_dst_slice_pitch_47_32_mask) << SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_DW_16_dst_slice_pitch_47_32_shift)
+
+/*define for dst_sw field*/
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_DW_16_dst_sw_offset 16
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_DW_16_dst_sw_mask 0x00000003
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_DW_16_dst_sw_shift 16
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_DW_16_DST_SW(x) (((x) & SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_DW_16_dst_sw_mask) << SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_DW_16_dst_sw_shift)
+
+/*define for dst_policy field*/
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_DW_16_dst_policy_offset 16
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_DW_16_dst_policy_mask 0x00000007
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_DW_16_dst_policy_shift 18
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_DW_16_DST_POLICY(x) (((x) & SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_DW_16_dst_policy_mask) << SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_DW_16_dst_policy_shift)
+
+/*define for src_sw field*/
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_DW_16_src_sw_offset 16
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_DW_16_src_sw_mask 0x00000003
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_DW_16_src_sw_shift 24
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_DW_16_SRC_SW(x) (((x) & SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_DW_16_src_sw_mask) << SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_DW_16_src_sw_shift)
+
+/*define for src_policy field*/
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_DW_16_src_policy_offset 16
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_DW_16_src_policy_mask 0x00000007
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_DW_16_src_policy_shift 26
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_DW_16_SRC_POLICY(x) (((x) & SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_DW_16_src_policy_mask) << SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_DW_16_src_policy_shift)
+
+/*define for DW_17 word*/
+/*define for rect_x field*/
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_DW_17_rect_x_offset 17
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_DW_17_rect_x_mask 0xFFFFFFFF
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_DW_17_rect_x_shift 0
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_DW_17_RECT_X(x) (((x) & SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_DW_17_rect_x_mask) << SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_DW_17_rect_x_shift)
+
+/*define for DW_18 word*/
+/*define for rect_y field*/
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_DW_18_rect_y_offset 18
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_DW_18_rect_y_mask 0xFFFFFFFF
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_DW_18_rect_y_shift 0
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_DW_18_RECT_Y(x) (((x) & SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_DW_18_rect_y_mask) << SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_DW_18_rect_y_shift)
+
+/*define for DW_19 word*/
+/*define for rect_z field*/
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_DW_19_rect_z_offset 19
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_DW_19_rect_z_mask 0xFFFFFFFF
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_DW_19_rect_z_shift 0
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_DW_19_RECT_Z(x) (((x) & SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_DW_19_rect_z_mask) << SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_DW_19_rect_z_shift)
+
+
+/*
+** Definitions for SDMA_PKT_COPY_LINEAR_SUBWIN_BC packet
+*/
+
+/*define for HEADER word*/
+/*define for op field*/
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_HEADER_op_offset 0
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_HEADER_op_mask 0x000000FF
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_HEADER_op_shift 0
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_HEADER_OP(x) (((x) & SDMA_PKT_COPY_LINEAR_SUBWIN_BC_HEADER_op_mask) << SDMA_PKT_COPY_LINEAR_SUBWIN_BC_HEADER_op_shift)
+
+/*define for sub_op field*/
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_HEADER_sub_op_offset 0
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_HEADER_sub_op_mask 0x000000FF
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_HEADER_sub_op_shift 8
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_HEADER_SUB_OP(x) (((x) & SDMA_PKT_COPY_LINEAR_SUBWIN_BC_HEADER_sub_op_mask) << SDMA_PKT_COPY_LINEAR_SUBWIN_BC_HEADER_sub_op_shift)
+
+/*define for elementsize field*/
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_HEADER_elementsize_offset 0
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_HEADER_elementsize_mask 0x00000007
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_HEADER_elementsize_shift 29
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_HEADER_ELEMENTSIZE(x) (((x) & SDMA_PKT_COPY_LINEAR_SUBWIN_BC_HEADER_elementsize_mask) << SDMA_PKT_COPY_LINEAR_SUBWIN_BC_HEADER_elementsize_shift)
+
+/*define for SRC_ADDR_LO word*/
+/*define for src_addr_31_0 field*/
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_SRC_ADDR_LO_src_addr_31_0_offset 1
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_SRC_ADDR_LO_src_addr_31_0_mask 0xFFFFFFFF
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_SRC_ADDR_LO_src_addr_31_0_shift 0
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_SRC_ADDR_LO_SRC_ADDR_31_0(x) (((x) & SDMA_PKT_COPY_LINEAR_SUBWIN_BC_SRC_ADDR_LO_src_addr_31_0_mask) << SDMA_PKT_COPY_LINEAR_SUBWIN_BC_SRC_ADDR_LO_src_addr_31_0_shift)
+
+/*define for SRC_ADDR_HI word*/
+/*define for src_addr_63_32 field*/
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_SRC_ADDR_HI_src_addr_63_32_offset 2
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_SRC_ADDR_HI_src_addr_63_32_mask 0xFFFFFFFF
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_SRC_ADDR_HI_src_addr_63_32_shift 0
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_SRC_ADDR_HI_SRC_ADDR_63_32(x) (((x) & SDMA_PKT_COPY_LINEAR_SUBWIN_BC_SRC_ADDR_HI_src_addr_63_32_mask) << SDMA_PKT_COPY_LINEAR_SUBWIN_BC_SRC_ADDR_HI_src_addr_63_32_shift)
+
+/*define for DW_3 word*/
+/*define for src_x field*/
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_3_src_x_offset 3
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_3_src_x_mask 0x00003FFF
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_3_src_x_shift 0
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_3_SRC_X(x) (((x) & SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_3_src_x_mask) << SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_3_src_x_shift)
+
+/*define for src_y field*/
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_3_src_y_offset 3
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_3_src_y_mask 0x00003FFF
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_3_src_y_shift 16
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_3_SRC_Y(x) (((x) & SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_3_src_y_mask) << SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_3_src_y_shift)
+
+/*define for DW_4 word*/
+/*define for src_z field*/
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_4_src_z_offset 4
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_4_src_z_mask 0x000007FF
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_4_src_z_shift 0
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_4_SRC_Z(x) (((x) & SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_4_src_z_mask) << SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_4_src_z_shift)
+
+/*define for src_pitch field*/
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_4_src_pitch_offset 4
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_4_src_pitch_mask 0x00003FFF
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_4_src_pitch_shift 13
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_4_SRC_PITCH(x) (((x) & SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_4_src_pitch_mask) << SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_4_src_pitch_shift)
+
+/*define for DW_5 word*/
+/*define for src_slice_pitch field*/
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_5_src_slice_pitch_offset 5
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_5_src_slice_pitch_mask 0x0FFFFFFF
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_5_src_slice_pitch_shift 0
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_5_SRC_SLICE_PITCH(x) (((x) & SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_5_src_slice_pitch_mask) << SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_5_src_slice_pitch_shift)
+
+/*define for DST_ADDR_LO word*/
+/*define for dst_addr_31_0 field*/
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DST_ADDR_LO_dst_addr_31_0_offset 6
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DST_ADDR_LO_dst_addr_31_0_mask 0xFFFFFFFF
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DST_ADDR_LO_dst_addr_31_0_shift 0
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DST_ADDR_LO_DST_ADDR_31_0(x) (((x) & SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DST_ADDR_LO_dst_addr_31_0_mask) << SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DST_ADDR_LO_dst_addr_31_0_shift)
+
+/*define for DST_ADDR_HI word*/
+/*define for dst_addr_63_32 field*/
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DST_ADDR_HI_dst_addr_63_32_offset 7
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DST_ADDR_HI_dst_addr_63_32_mask 0xFFFFFFFF
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DST_ADDR_HI_dst_addr_63_32_shift 0
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DST_ADDR_HI_DST_ADDR_63_32(x) (((x) & SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DST_ADDR_HI_dst_addr_63_32_mask) << SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DST_ADDR_HI_dst_addr_63_32_shift)
+
+/*define for DW_8 word*/
+/*define for dst_x field*/
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_8_dst_x_offset 8
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_8_dst_x_mask 0x00003FFF
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_8_dst_x_shift 0
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_8_DST_X(x) (((x) & SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_8_dst_x_mask) << SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_8_dst_x_shift)
+
+/*define for dst_y field*/
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_8_dst_y_offset 8
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_8_dst_y_mask 0x00003FFF
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_8_dst_y_shift 16
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_8_DST_Y(x) (((x) & SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_8_dst_y_mask) << SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_8_dst_y_shift)
+
+/*define for DW_9 word*/
+/*define for dst_z field*/
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_9_dst_z_offset 9
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_9_dst_z_mask 0x000007FF
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_9_dst_z_shift 0
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_9_DST_Z(x) (((x) & SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_9_dst_z_mask) << SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_9_dst_z_shift)
+
+/*define for dst_pitch field*/
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_9_dst_pitch_offset 9
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_9_dst_pitch_mask 0x00003FFF
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_9_dst_pitch_shift 13
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_9_DST_PITCH(x) (((x) & SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_9_dst_pitch_mask) << SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_9_dst_pitch_shift)
+
+/*define for DW_10 word*/
+/*define for dst_slice_pitch field*/
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_10_dst_slice_pitch_offset 10
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_10_dst_slice_pitch_mask 0x0FFFFFFF
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_10_dst_slice_pitch_shift 0
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_10_DST_SLICE_PITCH(x) (((x) & SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_10_dst_slice_pitch_mask) << SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_10_dst_slice_pitch_shift)
+
+/*define for DW_11 word*/
+/*define for rect_x field*/
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_11_rect_x_offset 11
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_11_rect_x_mask 0x00003FFF
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_11_rect_x_shift 0
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_11_RECT_X(x) (((x) & SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_11_rect_x_mask) << SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_11_rect_x_shift)
+
+/*define for rect_y field*/
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_11_rect_y_offset 11
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_11_rect_y_mask 0x00003FFF
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_11_rect_y_shift 16
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_11_RECT_Y(x) (((x) & SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_11_rect_y_mask) << SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_11_rect_y_shift)
+
+/*define for DW_12 word*/
+/*define for rect_z field*/
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_12_rect_z_offset 12
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_12_rect_z_mask 0x000007FF
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_12_rect_z_shift 0
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_12_RECT_Z(x) (((x) & SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_12_rect_z_mask) << SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_12_rect_z_shift)
+
+/*define for dst_sw field*/
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_12_dst_sw_offset 12
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_12_dst_sw_mask 0x00000003
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_12_dst_sw_shift 16
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_12_DST_SW(x) (((x) & SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_12_dst_sw_mask) << SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_12_dst_sw_shift)
+
+/*define for dst_ha field*/
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_12_dst_ha_offset 12
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_12_dst_ha_mask 0x00000001
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_12_dst_ha_shift 19
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_12_DST_HA(x) (((x) & SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_12_dst_ha_mask) << SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_12_dst_ha_shift)
+
+/*define for src_sw field*/
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_12_src_sw_offset 12
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_12_src_sw_mask 0x00000003
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_12_src_sw_shift 24
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_12_SRC_SW(x) (((x) & SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_12_src_sw_mask) << SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_12_src_sw_shift)
+
+/*define for src_ha field*/
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_12_src_ha_offset 12
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_12_src_ha_mask 0x00000001
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_12_src_ha_shift 27
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_12_SRC_HA(x) (((x) & SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_12_src_ha_mask) << SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_12_src_ha_shift)
+
+
+/*
+** Definitions for SDMA_PKT_COPY_TILED packet
+*/
+
+/*define for HEADER word*/
+/*define for op field*/
+#define SDMA_PKT_COPY_TILED_HEADER_op_offset 0
+#define SDMA_PKT_COPY_TILED_HEADER_op_mask 0x000000FF
+#define SDMA_PKT_COPY_TILED_HEADER_op_shift 0
+#define SDMA_PKT_COPY_TILED_HEADER_OP(x) (((x) & SDMA_PKT_COPY_TILED_HEADER_op_mask) << SDMA_PKT_COPY_TILED_HEADER_op_shift)
+
+/*define for sub_op field*/
+#define SDMA_PKT_COPY_TILED_HEADER_sub_op_offset 0
+#define SDMA_PKT_COPY_TILED_HEADER_sub_op_mask 0x000000FF
+#define SDMA_PKT_COPY_TILED_HEADER_sub_op_shift 8
+#define SDMA_PKT_COPY_TILED_HEADER_SUB_OP(x) (((x) & SDMA_PKT_COPY_TILED_HEADER_sub_op_mask) << SDMA_PKT_COPY_TILED_HEADER_sub_op_shift)
+
+/*define for encrypt field*/
+#define SDMA_PKT_COPY_TILED_HEADER_encrypt_offset 0
+#define SDMA_PKT_COPY_TILED_HEADER_encrypt_mask 0x00000001
+#define SDMA_PKT_COPY_TILED_HEADER_encrypt_shift 16
+#define SDMA_PKT_COPY_TILED_HEADER_ENCRYPT(x) (((x) & SDMA_PKT_COPY_TILED_HEADER_encrypt_mask) << SDMA_PKT_COPY_TILED_HEADER_encrypt_shift)
+
+/*define for tmz field*/
+#define SDMA_PKT_COPY_TILED_HEADER_tmz_offset 0
+#define SDMA_PKT_COPY_TILED_HEADER_tmz_mask 0x00000001
+#define SDMA_PKT_COPY_TILED_HEADER_tmz_shift 18
+#define SDMA_PKT_COPY_TILED_HEADER_TMZ(x) (((x) & SDMA_PKT_COPY_TILED_HEADER_tmz_mask) << SDMA_PKT_COPY_TILED_HEADER_tmz_shift)
+
+/*define for cpv field*/
+#define SDMA_PKT_COPY_TILED_HEADER_cpv_offset 0
+#define SDMA_PKT_COPY_TILED_HEADER_cpv_mask 0x00000001
+#define SDMA_PKT_COPY_TILED_HEADER_cpv_shift 19
+#define SDMA_PKT_COPY_TILED_HEADER_CPV(x) (((x) & SDMA_PKT_COPY_TILED_HEADER_cpv_mask) << SDMA_PKT_COPY_TILED_HEADER_cpv_shift)
+
+/*define for detile field*/
+#define SDMA_PKT_COPY_TILED_HEADER_detile_offset 0
+#define SDMA_PKT_COPY_TILED_HEADER_detile_mask 0x00000001
+#define SDMA_PKT_COPY_TILED_HEADER_detile_shift 31
+#define SDMA_PKT_COPY_TILED_HEADER_DETILE(x) (((x) & SDMA_PKT_COPY_TILED_HEADER_detile_mask) << SDMA_PKT_COPY_TILED_HEADER_detile_shift)
+
+/*define for TILED_ADDR_LO word*/
+/*define for tiled_addr_31_0 field*/
+#define SDMA_PKT_COPY_TILED_TILED_ADDR_LO_tiled_addr_31_0_offset 1
+#define SDMA_PKT_COPY_TILED_TILED_ADDR_LO_tiled_addr_31_0_mask 0xFFFFFFFF
+#define SDMA_PKT_COPY_TILED_TILED_ADDR_LO_tiled_addr_31_0_shift 0
+#define SDMA_PKT_COPY_TILED_TILED_ADDR_LO_TILED_ADDR_31_0(x) (((x) & SDMA_PKT_COPY_TILED_TILED_ADDR_LO_tiled_addr_31_0_mask) << SDMA_PKT_COPY_TILED_TILED_ADDR_LO_tiled_addr_31_0_shift)
+
+/*define for TILED_ADDR_HI word*/
+/*define for tiled_addr_63_32 field*/
+#define SDMA_PKT_COPY_TILED_TILED_ADDR_HI_tiled_addr_63_32_offset 2
+#define SDMA_PKT_COPY_TILED_TILED_ADDR_HI_tiled_addr_63_32_mask 0xFFFFFFFF
+#define SDMA_PKT_COPY_TILED_TILED_ADDR_HI_tiled_addr_63_32_shift 0
+#define SDMA_PKT_COPY_TILED_TILED_ADDR_HI_TILED_ADDR_63_32(x) (((x) & SDMA_PKT_COPY_TILED_TILED_ADDR_HI_tiled_addr_63_32_mask) << SDMA_PKT_COPY_TILED_TILED_ADDR_HI_tiled_addr_63_32_shift)
+
+/*define for DW_3 word*/
+/*define for width field*/
+#define SDMA_PKT_COPY_TILED_DW_3_width_offset 3
+#define SDMA_PKT_COPY_TILED_DW_3_width_mask 0x00003FFF
+#define SDMA_PKT_COPY_TILED_DW_3_width_shift 0
+#define SDMA_PKT_COPY_TILED_DW_3_WIDTH(x) (((x) & SDMA_PKT_COPY_TILED_DW_3_width_mask) << SDMA_PKT_COPY_TILED_DW_3_width_shift)
+
+/*define for DW_4 word*/
+/*define for height field*/
+#define SDMA_PKT_COPY_TILED_DW_4_height_offset 4
+#define SDMA_PKT_COPY_TILED_DW_4_height_mask 0x00003FFF
+#define SDMA_PKT_COPY_TILED_DW_4_height_shift 0
+#define SDMA_PKT_COPY_TILED_DW_4_HEIGHT(x) (((x) & SDMA_PKT_COPY_TILED_DW_4_height_mask) << SDMA_PKT_COPY_TILED_DW_4_height_shift)
+
+/*define for depth field*/
+#define SDMA_PKT_COPY_TILED_DW_4_depth_offset 4
+#define SDMA_PKT_COPY_TILED_DW_4_depth_mask 0x00001FFF
+#define SDMA_PKT_COPY_TILED_DW_4_depth_shift 16
+#define SDMA_PKT_COPY_TILED_DW_4_DEPTH(x) (((x) & SDMA_PKT_COPY_TILED_DW_4_depth_mask) << SDMA_PKT_COPY_TILED_DW_4_depth_shift)
+
+/*define for DW_5 word*/
+/*define for element_size field*/
+#define SDMA_PKT_COPY_TILED_DW_5_element_size_offset 5
+#define SDMA_PKT_COPY_TILED_DW_5_element_size_mask 0x00000007
+#define SDMA_PKT_COPY_TILED_DW_5_element_size_shift 0
+#define SDMA_PKT_COPY_TILED_DW_5_ELEMENT_SIZE(x) (((x) & SDMA_PKT_COPY_TILED_DW_5_element_size_mask) << SDMA_PKT_COPY_TILED_DW_5_element_size_shift)
+
+/*define for swizzle_mode field*/
+#define SDMA_PKT_COPY_TILED_DW_5_swizzle_mode_offset 5
+#define SDMA_PKT_COPY_TILED_DW_5_swizzle_mode_mask 0x0000001F
+#define SDMA_PKT_COPY_TILED_DW_5_swizzle_mode_shift 3
+#define SDMA_PKT_COPY_TILED_DW_5_SWIZZLE_MODE(x) (((x) & SDMA_PKT_COPY_TILED_DW_5_swizzle_mode_mask) << SDMA_PKT_COPY_TILED_DW_5_swizzle_mode_shift)
+
+/*define for dimension field*/
+#define SDMA_PKT_COPY_TILED_DW_5_dimension_offset 5
+#define SDMA_PKT_COPY_TILED_DW_5_dimension_mask 0x00000003
+#define SDMA_PKT_COPY_TILED_DW_5_dimension_shift 9
+#define SDMA_PKT_COPY_TILED_DW_5_DIMENSION(x) (((x) & SDMA_PKT_COPY_TILED_DW_5_dimension_mask) << SDMA_PKT_COPY_TILED_DW_5_dimension_shift)
+
+/*define for mip_max field*/
+#define SDMA_PKT_COPY_TILED_DW_5_mip_max_offset 5
+#define SDMA_PKT_COPY_TILED_DW_5_mip_max_mask 0x0000000F
+#define SDMA_PKT_COPY_TILED_DW_5_mip_max_shift 16
+#define SDMA_PKT_COPY_TILED_DW_5_MIP_MAX(x) (((x) & SDMA_PKT_COPY_TILED_DW_5_mip_max_mask) << SDMA_PKT_COPY_TILED_DW_5_mip_max_shift)
+
+/*define for DW_6 word*/
+/*define for x field*/
+#define SDMA_PKT_COPY_TILED_DW_6_x_offset 6
+#define SDMA_PKT_COPY_TILED_DW_6_x_mask 0x00003FFF
+#define SDMA_PKT_COPY_TILED_DW_6_x_shift 0
+#define SDMA_PKT_COPY_TILED_DW_6_X(x) (((x) & SDMA_PKT_COPY_TILED_DW_6_x_mask) << SDMA_PKT_COPY_TILED_DW_6_x_shift)
+
+/*define for y field*/
+#define SDMA_PKT_COPY_TILED_DW_6_y_offset 6
+#define SDMA_PKT_COPY_TILED_DW_6_y_mask 0x00003FFF
+#define SDMA_PKT_COPY_TILED_DW_6_y_shift 16
+#define SDMA_PKT_COPY_TILED_DW_6_Y(x) (((x) & SDMA_PKT_COPY_TILED_DW_6_y_mask) << SDMA_PKT_COPY_TILED_DW_6_y_shift)
+
+/*define for DW_7 word*/
+/*define for z field*/
+#define SDMA_PKT_COPY_TILED_DW_7_z_offset 7
+#define SDMA_PKT_COPY_TILED_DW_7_z_mask 0x00001FFF
+#define SDMA_PKT_COPY_TILED_DW_7_z_shift 0
+#define SDMA_PKT_COPY_TILED_DW_7_Z(x) (((x) & SDMA_PKT_COPY_TILED_DW_7_z_mask) << SDMA_PKT_COPY_TILED_DW_7_z_shift)
+
+/*define for linear_sw field*/
+#define SDMA_PKT_COPY_TILED_DW_7_linear_sw_offset 7
+#define SDMA_PKT_COPY_TILED_DW_7_linear_sw_mask 0x00000003
+#define SDMA_PKT_COPY_TILED_DW_7_linear_sw_shift 16
+#define SDMA_PKT_COPY_TILED_DW_7_LINEAR_SW(x) (((x) & SDMA_PKT_COPY_TILED_DW_7_linear_sw_mask) << SDMA_PKT_COPY_TILED_DW_7_linear_sw_shift)
+
+/*define for linear_cache_policy field*/
+#define SDMA_PKT_COPY_TILED_DW_7_linear_cache_policy_offset 7
+#define SDMA_PKT_COPY_TILED_DW_7_linear_cache_policy_mask 0x00000007
+#define SDMA_PKT_COPY_TILED_DW_7_linear_cache_policy_shift 18
+#define SDMA_PKT_COPY_TILED_DW_7_LINEAR_CACHE_POLICY(x) (((x) & SDMA_PKT_COPY_TILED_DW_7_linear_cache_policy_mask) << SDMA_PKT_COPY_TILED_DW_7_linear_cache_policy_shift)
+
+/*define for tile_sw field*/
+#define SDMA_PKT_COPY_TILED_DW_7_tile_sw_offset 7
+#define SDMA_PKT_COPY_TILED_DW_7_tile_sw_mask 0x00000003
+#define SDMA_PKT_COPY_TILED_DW_7_tile_sw_shift 24
+#define SDMA_PKT_COPY_TILED_DW_7_TILE_SW(x) (((x) & SDMA_PKT_COPY_TILED_DW_7_tile_sw_mask) << SDMA_PKT_COPY_TILED_DW_7_tile_sw_shift)
+
+/*define for tile_cache_policy field*/
+#define SDMA_PKT_COPY_TILED_DW_7_tile_cache_policy_offset 7
+#define SDMA_PKT_COPY_TILED_DW_7_tile_cache_policy_mask 0x00000007
+#define SDMA_PKT_COPY_TILED_DW_7_tile_cache_policy_shift 26
+#define SDMA_PKT_COPY_TILED_DW_7_TILE_CACHE_POLICY(x) (((x) & SDMA_PKT_COPY_TILED_DW_7_tile_cache_policy_mask) << SDMA_PKT_COPY_TILED_DW_7_tile_cache_policy_shift)
+
+/*define for LINEAR_ADDR_LO word*/
+/*define for linear_addr_31_0 field*/
+#define SDMA_PKT_COPY_TILED_LINEAR_ADDR_LO_linear_addr_31_0_offset 8
+#define SDMA_PKT_COPY_TILED_LINEAR_ADDR_LO_linear_addr_31_0_mask 0xFFFFFFFF
+#define SDMA_PKT_COPY_TILED_LINEAR_ADDR_LO_linear_addr_31_0_shift 0
+#define SDMA_PKT_COPY_TILED_LINEAR_ADDR_LO_LINEAR_ADDR_31_0(x) (((x) & SDMA_PKT_COPY_TILED_LINEAR_ADDR_LO_linear_addr_31_0_mask) << SDMA_PKT_COPY_TILED_LINEAR_ADDR_LO_linear_addr_31_0_shift)
+
+/*define for LINEAR_ADDR_HI word*/
+/*define for linear_addr_63_32 field*/
+#define SDMA_PKT_COPY_TILED_LINEAR_ADDR_HI_linear_addr_63_32_offset 9
+#define SDMA_PKT_COPY_TILED_LINEAR_ADDR_HI_linear_addr_63_32_mask 0xFFFFFFFF
+#define SDMA_PKT_COPY_TILED_LINEAR_ADDR_HI_linear_addr_63_32_shift 0
+#define SDMA_PKT_COPY_TILED_LINEAR_ADDR_HI_LINEAR_ADDR_63_32(x) (((x) & SDMA_PKT_COPY_TILED_LINEAR_ADDR_HI_linear_addr_63_32_mask) << SDMA_PKT_COPY_TILED_LINEAR_ADDR_HI_linear_addr_63_32_shift)
+
+/*define for LINEAR_PITCH word*/
+/*define for linear_pitch field*/
+#define SDMA_PKT_COPY_TILED_LINEAR_PITCH_linear_pitch_offset 10
+#define SDMA_PKT_COPY_TILED_LINEAR_PITCH_linear_pitch_mask 0x0007FFFF
+#define SDMA_PKT_COPY_TILED_LINEAR_PITCH_linear_pitch_shift 0
+#define SDMA_PKT_COPY_TILED_LINEAR_PITCH_LINEAR_PITCH(x) (((x) & SDMA_PKT_COPY_TILED_LINEAR_PITCH_linear_pitch_mask) << SDMA_PKT_COPY_TILED_LINEAR_PITCH_linear_pitch_shift)
+
+/*define for LINEAR_SLICE_PITCH word*/
+/*define for linear_slice_pitch field*/
+#define SDMA_PKT_COPY_TILED_LINEAR_SLICE_PITCH_linear_slice_pitch_offset 11
+#define SDMA_PKT_COPY_TILED_LINEAR_SLICE_PITCH_linear_slice_pitch_mask 0xFFFFFFFF
+#define SDMA_PKT_COPY_TILED_LINEAR_SLICE_PITCH_linear_slice_pitch_shift 0
+#define SDMA_PKT_COPY_TILED_LINEAR_SLICE_PITCH_LINEAR_SLICE_PITCH(x) (((x) & SDMA_PKT_COPY_TILED_LINEAR_SLICE_PITCH_linear_slice_pitch_mask) << SDMA_PKT_COPY_TILED_LINEAR_SLICE_PITCH_linear_slice_pitch_shift)
+
+/*define for COUNT word*/
+/*define for count field*/
+#define SDMA_PKT_COPY_TILED_COUNT_count_offset 12
+#define SDMA_PKT_COPY_TILED_COUNT_count_mask 0x3FFFFFFF
+#define SDMA_PKT_COPY_TILED_COUNT_count_shift 0
+#define SDMA_PKT_COPY_TILED_COUNT_COUNT(x) (((x) & SDMA_PKT_COPY_TILED_COUNT_count_mask) << SDMA_PKT_COPY_TILED_COUNT_count_shift)
+
+
+/*
+** Definitions for SDMA_PKT_COPY_TILED_BC packet
+*/
+
+/*define for HEADER word*/
+/*define for op field*/
+#define SDMA_PKT_COPY_TILED_BC_HEADER_op_offset 0
+#define SDMA_PKT_COPY_TILED_BC_HEADER_op_mask 0x000000FF
+#define SDMA_PKT_COPY_TILED_BC_HEADER_op_shift 0
+#define SDMA_PKT_COPY_TILED_BC_HEADER_OP(x) (((x) & SDMA_PKT_COPY_TILED_BC_HEADER_op_mask) << SDMA_PKT_COPY_TILED_BC_HEADER_op_shift)
+
+/*define for sub_op field*/
+#define SDMA_PKT_COPY_TILED_BC_HEADER_sub_op_offset 0
+#define SDMA_PKT_COPY_TILED_BC_HEADER_sub_op_mask 0x000000FF
+#define SDMA_PKT_COPY_TILED_BC_HEADER_sub_op_shift 8
+#define SDMA_PKT_COPY_TILED_BC_HEADER_SUB_OP(x) (((x) & SDMA_PKT_COPY_TILED_BC_HEADER_sub_op_mask) << SDMA_PKT_COPY_TILED_BC_HEADER_sub_op_shift)
+
+/*define for detile field*/
+#define SDMA_PKT_COPY_TILED_BC_HEADER_detile_offset 0
+#define SDMA_PKT_COPY_TILED_BC_HEADER_detile_mask 0x00000001
+#define SDMA_PKT_COPY_TILED_BC_HEADER_detile_shift 31
+#define SDMA_PKT_COPY_TILED_BC_HEADER_DETILE(x) (((x) & SDMA_PKT_COPY_TILED_BC_HEADER_detile_mask) << SDMA_PKT_COPY_TILED_BC_HEADER_detile_shift)
+
+/*define for TILED_ADDR_LO word*/
+/*define for tiled_addr_31_0 field*/
+#define SDMA_PKT_COPY_TILED_BC_TILED_ADDR_LO_tiled_addr_31_0_offset 1
+#define SDMA_PKT_COPY_TILED_BC_TILED_ADDR_LO_tiled_addr_31_0_mask 0xFFFFFFFF
+#define SDMA_PKT_COPY_TILED_BC_TILED_ADDR_LO_tiled_addr_31_0_shift 0
+#define SDMA_PKT_COPY_TILED_BC_TILED_ADDR_LO_TILED_ADDR_31_0(x) (((x) & SDMA_PKT_COPY_TILED_BC_TILED_ADDR_LO_tiled_addr_31_0_mask) << SDMA_PKT_COPY_TILED_BC_TILED_ADDR_LO_tiled_addr_31_0_shift)
+
+/*define for TILED_ADDR_HI word*/
+/*define for tiled_addr_63_32 field*/
+#define SDMA_PKT_COPY_TILED_BC_TILED_ADDR_HI_tiled_addr_63_32_offset 2
+#define SDMA_PKT_COPY_TILED_BC_TILED_ADDR_HI_tiled_addr_63_32_mask 0xFFFFFFFF
+#define SDMA_PKT_COPY_TILED_BC_TILED_ADDR_HI_tiled_addr_63_32_shift 0
+#define SDMA_PKT_COPY_TILED_BC_TILED_ADDR_HI_TILED_ADDR_63_32(x) (((x) & SDMA_PKT_COPY_TILED_BC_TILED_ADDR_HI_tiled_addr_63_32_mask) << SDMA_PKT_COPY_TILED_BC_TILED_ADDR_HI_tiled_addr_63_32_shift)
+
+/*define for DW_3 word*/
+/*define for width field*/
+#define SDMA_PKT_COPY_TILED_BC_DW_3_width_offset 3
+#define SDMA_PKT_COPY_TILED_BC_DW_3_width_mask 0x00003FFF
+#define SDMA_PKT_COPY_TILED_BC_DW_3_width_shift 0
+#define SDMA_PKT_COPY_TILED_BC_DW_3_WIDTH(x) (((x) & SDMA_PKT_COPY_TILED_BC_DW_3_width_mask) << SDMA_PKT_COPY_TILED_BC_DW_3_width_shift)
+
+/*define for DW_4 word*/
+/*define for height field*/
+#define SDMA_PKT_COPY_TILED_BC_DW_4_height_offset 4
+#define SDMA_PKT_COPY_TILED_BC_DW_4_height_mask 0x00003FFF
+#define SDMA_PKT_COPY_TILED_BC_DW_4_height_shift 0
+#define SDMA_PKT_COPY_TILED_BC_DW_4_HEIGHT(x) (((x) & SDMA_PKT_COPY_TILED_BC_DW_4_height_mask) << SDMA_PKT_COPY_TILED_BC_DW_4_height_shift)
+
+/*define for depth field*/
+#define SDMA_PKT_COPY_TILED_BC_DW_4_depth_offset 4
+#define SDMA_PKT_COPY_TILED_BC_DW_4_depth_mask 0x000007FF
+#define SDMA_PKT_COPY_TILED_BC_DW_4_depth_shift 16
+#define SDMA_PKT_COPY_TILED_BC_DW_4_DEPTH(x) (((x) & SDMA_PKT_COPY_TILED_BC_DW_4_depth_mask) << SDMA_PKT_COPY_TILED_BC_DW_4_depth_shift)
+
+/*define for DW_5 word*/
+/*define for element_size field*/
+#define SDMA_PKT_COPY_TILED_BC_DW_5_element_size_offset 5
+#define SDMA_PKT_COPY_TILED_BC_DW_5_element_size_mask 0x00000007
+#define SDMA_PKT_COPY_TILED_BC_DW_5_element_size_shift 0
+#define SDMA_PKT_COPY_TILED_BC_DW_5_ELEMENT_SIZE(x) (((x) & SDMA_PKT_COPY_TILED_BC_DW_5_element_size_mask) << SDMA_PKT_COPY_TILED_BC_DW_5_element_size_shift)
+
+/*define for array_mode field*/
+#define SDMA_PKT_COPY_TILED_BC_DW_5_array_mode_offset 5
+#define SDMA_PKT_COPY_TILED_BC_DW_5_array_mode_mask 0x0000000F
+#define SDMA_PKT_COPY_TILED_BC_DW_5_array_mode_shift 3
+#define SDMA_PKT_COPY_TILED_BC_DW_5_ARRAY_MODE(x) (((x) & SDMA_PKT_COPY_TILED_BC_DW_5_array_mode_mask) << SDMA_PKT_COPY_TILED_BC_DW_5_array_mode_shift)
+
+/*define for mit_mode field*/
+#define SDMA_PKT_COPY_TILED_BC_DW_5_mit_mode_offset 5
+#define SDMA_PKT_COPY_TILED_BC_DW_5_mit_mode_mask 0x00000007
+#define SDMA_PKT_COPY_TILED_BC_DW_5_mit_mode_shift 8
+#define SDMA_PKT_COPY_TILED_BC_DW_5_MIT_MODE(x) (((x) & SDMA_PKT_COPY_TILED_BC_DW_5_mit_mode_mask) << SDMA_PKT_COPY_TILED_BC_DW_5_mit_mode_shift)
+
+/*define for tilesplit_size field*/
+#define SDMA_PKT_COPY_TILED_BC_DW_5_tilesplit_size_offset 5
+#define SDMA_PKT_COPY_TILED_BC_DW_5_tilesplit_size_mask 0x00000007
+#define SDMA_PKT_COPY_TILED_BC_DW_5_tilesplit_size_shift 11
+#define SDMA_PKT_COPY_TILED_BC_DW_5_TILESPLIT_SIZE(x) (((x) & SDMA_PKT_COPY_TILED_BC_DW_5_tilesplit_size_mask) << SDMA_PKT_COPY_TILED_BC_DW_5_tilesplit_size_shift)
+
+/*define for bank_w field*/
+#define SDMA_PKT_COPY_TILED_BC_DW_5_bank_w_offset 5
+#define SDMA_PKT_COPY_TILED_BC_DW_5_bank_w_mask 0x00000003
+#define SDMA_PKT_COPY_TILED_BC_DW_5_bank_w_shift 15
+#define SDMA_PKT_COPY_TILED_BC_DW_5_BANK_W(x) (((x) & SDMA_PKT_COPY_TILED_BC_DW_5_bank_w_mask) << SDMA_PKT_COPY_TILED_BC_DW_5_bank_w_shift)
+
+/*define for bank_h field*/
+#define SDMA_PKT_COPY_TILED_BC_DW_5_bank_h_offset 5
+#define SDMA_PKT_COPY_TILED_BC_DW_5_bank_h_mask 0x00000003
+#define SDMA_PKT_COPY_TILED_BC_DW_5_bank_h_shift 18
+#define SDMA_PKT_COPY_TILED_BC_DW_5_BANK_H(x) (((x) & SDMA_PKT_COPY_TILED_BC_DW_5_bank_h_mask) << SDMA_PKT_COPY_TILED_BC_DW_5_bank_h_shift)
+
+/*define for num_bank field*/
+#define SDMA_PKT_COPY_TILED_BC_DW_5_num_bank_offset 5
+#define SDMA_PKT_COPY_TILED_BC_DW_5_num_bank_mask 0x00000003
+#define SDMA_PKT_COPY_TILED_BC_DW_5_num_bank_shift 21
+#define SDMA_PKT_COPY_TILED_BC_DW_5_NUM_BANK(x) (((x) & SDMA_PKT_COPY_TILED_BC_DW_5_num_bank_mask) << SDMA_PKT_COPY_TILED_BC_DW_5_num_bank_shift)
+
+/*define for mat_aspt field*/
+#define SDMA_PKT_COPY_TILED_BC_DW_5_mat_aspt_offset 5
+#define SDMA_PKT_COPY_TILED_BC_DW_5_mat_aspt_mask 0x00000003
+#define SDMA_PKT_COPY_TILED_BC_DW_5_mat_aspt_shift 24
+#define SDMA_PKT_COPY_TILED_BC_DW_5_MAT_ASPT(x) (((x) & SDMA_PKT_COPY_TILED_BC_DW_5_mat_aspt_mask) << SDMA_PKT_COPY_TILED_BC_DW_5_mat_aspt_shift)
+
+/*define for pipe_config field*/
+#define SDMA_PKT_COPY_TILED_BC_DW_5_pipe_config_offset 5
+#define SDMA_PKT_COPY_TILED_BC_DW_5_pipe_config_mask 0x0000001F
+#define SDMA_PKT_COPY_TILED_BC_DW_5_pipe_config_shift 26
+#define SDMA_PKT_COPY_TILED_BC_DW_5_PIPE_CONFIG(x) (((x) & SDMA_PKT_COPY_TILED_BC_DW_5_pipe_config_mask) << SDMA_PKT_COPY_TILED_BC_DW_5_pipe_config_shift)
+
+/*define for DW_6 word*/
+/*define for x field*/
+#define SDMA_PKT_COPY_TILED_BC_DW_6_x_offset 6
+#define SDMA_PKT_COPY_TILED_BC_DW_6_x_mask 0x00003FFF
+#define SDMA_PKT_COPY_TILED_BC_DW_6_x_shift 0
+#define SDMA_PKT_COPY_TILED_BC_DW_6_X(x) (((x) & SDMA_PKT_COPY_TILED_BC_DW_6_x_mask) << SDMA_PKT_COPY_TILED_BC_DW_6_x_shift)
+
+/*define for y field*/
+#define SDMA_PKT_COPY_TILED_BC_DW_6_y_offset 6
+#define SDMA_PKT_COPY_TILED_BC_DW_6_y_mask 0x00003FFF
+#define SDMA_PKT_COPY_TILED_BC_DW_6_y_shift 16
+#define SDMA_PKT_COPY_TILED_BC_DW_6_Y(x) (((x) & SDMA_PKT_COPY_TILED_BC_DW_6_y_mask) << SDMA_PKT_COPY_TILED_BC_DW_6_y_shift)
+
+/*define for DW_7 word*/
+/*define for z field*/
+#define SDMA_PKT_COPY_TILED_BC_DW_7_z_offset 7
+#define SDMA_PKT_COPY_TILED_BC_DW_7_z_mask 0x000007FF
+#define SDMA_PKT_COPY_TILED_BC_DW_7_z_shift 0
+#define SDMA_PKT_COPY_TILED_BC_DW_7_Z(x) (((x) & SDMA_PKT_COPY_TILED_BC_DW_7_z_mask) << SDMA_PKT_COPY_TILED_BC_DW_7_z_shift)
+
+/*define for linear_sw field*/
+#define SDMA_PKT_COPY_TILED_BC_DW_7_linear_sw_offset 7
+#define SDMA_PKT_COPY_TILED_BC_DW_7_linear_sw_mask 0x00000003
+#define SDMA_PKT_COPY_TILED_BC_DW_7_linear_sw_shift 16
+#define SDMA_PKT_COPY_TILED_BC_DW_7_LINEAR_SW(x) (((x) & SDMA_PKT_COPY_TILED_BC_DW_7_linear_sw_mask) << SDMA_PKT_COPY_TILED_BC_DW_7_linear_sw_shift)
+
+/*define for tile_sw field*/
+#define SDMA_PKT_COPY_TILED_BC_DW_7_tile_sw_offset 7
+#define SDMA_PKT_COPY_TILED_BC_DW_7_tile_sw_mask 0x00000003
+#define SDMA_PKT_COPY_TILED_BC_DW_7_tile_sw_shift 24
+#define SDMA_PKT_COPY_TILED_BC_DW_7_TILE_SW(x) (((x) & SDMA_PKT_COPY_TILED_BC_DW_7_tile_sw_mask) << SDMA_PKT_COPY_TILED_BC_DW_7_tile_sw_shift)
+
+/*define for LINEAR_ADDR_LO word*/
+/*define for linear_addr_31_0 field*/
+#define SDMA_PKT_COPY_TILED_BC_LINEAR_ADDR_LO_linear_addr_31_0_offset 8
+#define SDMA_PKT_COPY_TILED_BC_LINEAR_ADDR_LO_linear_addr_31_0_mask 0xFFFFFFFF
+#define SDMA_PKT_COPY_TILED_BC_LINEAR_ADDR_LO_linear_addr_31_0_shift 0
+#define SDMA_PKT_COPY_TILED_BC_LINEAR_ADDR_LO_LINEAR_ADDR_31_0(x) (((x) & SDMA_PKT_COPY_TILED_BC_LINEAR_ADDR_LO_linear_addr_31_0_mask) << SDMA_PKT_COPY_TILED_BC_LINEAR_ADDR_LO_linear_addr_31_0_shift)
+
+/*define for LINEAR_ADDR_HI word*/
+/*define for linear_addr_63_32 field*/
+#define SDMA_PKT_COPY_TILED_BC_LINEAR_ADDR_HI_linear_addr_63_32_offset 9
+#define SDMA_PKT_COPY_TILED_BC_LINEAR_ADDR_HI_linear_addr_63_32_mask 0xFFFFFFFF
+#define SDMA_PKT_COPY_TILED_BC_LINEAR_ADDR_HI_linear_addr_63_32_shift 0
+#define SDMA_PKT_COPY_TILED_BC_LINEAR_ADDR_HI_LINEAR_ADDR_63_32(x) (((x) & SDMA_PKT_COPY_TILED_BC_LINEAR_ADDR_HI_linear_addr_63_32_mask) << SDMA_PKT_COPY_TILED_BC_LINEAR_ADDR_HI_linear_addr_63_32_shift)
+
+/*define for LINEAR_PITCH word*/
+/*define for linear_pitch field*/
+#define SDMA_PKT_COPY_TILED_BC_LINEAR_PITCH_linear_pitch_offset 10
+#define SDMA_PKT_COPY_TILED_BC_LINEAR_PITCH_linear_pitch_mask 0x0007FFFF
+#define SDMA_PKT_COPY_TILED_BC_LINEAR_PITCH_linear_pitch_shift 0
+#define SDMA_PKT_COPY_TILED_BC_LINEAR_PITCH_LINEAR_PITCH(x) (((x) & SDMA_PKT_COPY_TILED_BC_LINEAR_PITCH_linear_pitch_mask) << SDMA_PKT_COPY_TILED_BC_LINEAR_PITCH_linear_pitch_shift)
+
+/*define for LINEAR_SLICE_PITCH word*/
+/*define for linear_slice_pitch field*/
+#define SDMA_PKT_COPY_TILED_BC_LINEAR_SLICE_PITCH_linear_slice_pitch_offset 11
+#define SDMA_PKT_COPY_TILED_BC_LINEAR_SLICE_PITCH_linear_slice_pitch_mask 0xFFFFFFFF
+#define SDMA_PKT_COPY_TILED_BC_LINEAR_SLICE_PITCH_linear_slice_pitch_shift 0
+#define SDMA_PKT_COPY_TILED_BC_LINEAR_SLICE_PITCH_LINEAR_SLICE_PITCH(x) (((x) & SDMA_PKT_COPY_TILED_BC_LINEAR_SLICE_PITCH_linear_slice_pitch_mask) << SDMA_PKT_COPY_TILED_BC_LINEAR_SLICE_PITCH_linear_slice_pitch_shift)
+
+/*define for COUNT word*/
+/*define for count field*/
+#define SDMA_PKT_COPY_TILED_BC_COUNT_count_offset 12
+#define SDMA_PKT_COPY_TILED_BC_COUNT_count_mask 0x000FFFFF
+#define SDMA_PKT_COPY_TILED_BC_COUNT_count_shift 2
+#define SDMA_PKT_COPY_TILED_BC_COUNT_COUNT(x) (((x) & SDMA_PKT_COPY_TILED_BC_COUNT_count_mask) << SDMA_PKT_COPY_TILED_BC_COUNT_count_shift)
+
+
+/*
+** Definitions for SDMA_PKT_COPY_L2T_BROADCAST packet
+*/
+
+/*define for HEADER word*/
+/*define for op field*/
+#define SDMA_PKT_COPY_L2T_BROADCAST_HEADER_op_offset 0
+#define SDMA_PKT_COPY_L2T_BROADCAST_HEADER_op_mask 0x000000FF
+#define SDMA_PKT_COPY_L2T_BROADCAST_HEADER_op_shift 0
+#define SDMA_PKT_COPY_L2T_BROADCAST_HEADER_OP(x) (((x) & SDMA_PKT_COPY_L2T_BROADCAST_HEADER_op_mask) << SDMA_PKT_COPY_L2T_BROADCAST_HEADER_op_shift)
+
+/*define for sub_op field*/
+#define SDMA_PKT_COPY_L2T_BROADCAST_HEADER_sub_op_offset 0
+#define SDMA_PKT_COPY_L2T_BROADCAST_HEADER_sub_op_mask 0x000000FF
+#define SDMA_PKT_COPY_L2T_BROADCAST_HEADER_sub_op_shift 8
+#define SDMA_PKT_COPY_L2T_BROADCAST_HEADER_SUB_OP(x) (((x) & SDMA_PKT_COPY_L2T_BROADCAST_HEADER_sub_op_mask) << SDMA_PKT_COPY_L2T_BROADCAST_HEADER_sub_op_shift)
+
+/*define for encrypt field*/
+#define SDMA_PKT_COPY_L2T_BROADCAST_HEADER_encrypt_offset 0
+#define SDMA_PKT_COPY_L2T_BROADCAST_HEADER_encrypt_mask 0x00000001
+#define SDMA_PKT_COPY_L2T_BROADCAST_HEADER_encrypt_shift 16
+#define SDMA_PKT_COPY_L2T_BROADCAST_HEADER_ENCRYPT(x) (((x) & SDMA_PKT_COPY_L2T_BROADCAST_HEADER_encrypt_mask) << SDMA_PKT_COPY_L2T_BROADCAST_HEADER_encrypt_shift)
+
+/*define for tmz field*/
+#define SDMA_PKT_COPY_L2T_BROADCAST_HEADER_tmz_offset 0
+#define SDMA_PKT_COPY_L2T_BROADCAST_HEADER_tmz_mask 0x00000001
+#define SDMA_PKT_COPY_L2T_BROADCAST_HEADER_tmz_shift 18
+#define SDMA_PKT_COPY_L2T_BROADCAST_HEADER_TMZ(x) (((x) & SDMA_PKT_COPY_L2T_BROADCAST_HEADER_tmz_mask) << SDMA_PKT_COPY_L2T_BROADCAST_HEADER_tmz_shift)
+
+/*define for cpv field*/
+#define SDMA_PKT_COPY_L2T_BROADCAST_HEADER_cpv_offset 0
+#define SDMA_PKT_COPY_L2T_BROADCAST_HEADER_cpv_mask 0x00000001
+#define SDMA_PKT_COPY_L2T_BROADCAST_HEADER_cpv_shift 19
+#define SDMA_PKT_COPY_L2T_BROADCAST_HEADER_CPV(x) (((x) & SDMA_PKT_COPY_L2T_BROADCAST_HEADER_cpv_mask) << SDMA_PKT_COPY_L2T_BROADCAST_HEADER_cpv_shift)
+
+/*define for videocopy field*/
+#define SDMA_PKT_COPY_L2T_BROADCAST_HEADER_videocopy_offset 0
+#define SDMA_PKT_COPY_L2T_BROADCAST_HEADER_videocopy_mask 0x00000001
+#define SDMA_PKT_COPY_L2T_BROADCAST_HEADER_videocopy_shift 26
+#define SDMA_PKT_COPY_L2T_BROADCAST_HEADER_VIDEOCOPY(x) (((x) & SDMA_PKT_COPY_L2T_BROADCAST_HEADER_videocopy_mask) << SDMA_PKT_COPY_L2T_BROADCAST_HEADER_videocopy_shift)
+
+/*define for broadcast field*/
+#define SDMA_PKT_COPY_L2T_BROADCAST_HEADER_broadcast_offset 0
+#define SDMA_PKT_COPY_L2T_BROADCAST_HEADER_broadcast_mask 0x00000001
+#define SDMA_PKT_COPY_L2T_BROADCAST_HEADER_broadcast_shift 27
+#define SDMA_PKT_COPY_L2T_BROADCAST_HEADER_BROADCAST(x) (((x) & SDMA_PKT_COPY_L2T_BROADCAST_HEADER_broadcast_mask) << SDMA_PKT_COPY_L2T_BROADCAST_HEADER_broadcast_shift)
+
+/*define for TILED_ADDR_LO_0 word*/
+/*define for tiled_addr0_31_0 field*/
+#define SDMA_PKT_COPY_L2T_BROADCAST_TILED_ADDR_LO_0_tiled_addr0_31_0_offset 1
+#define SDMA_PKT_COPY_L2T_BROADCAST_TILED_ADDR_LO_0_tiled_addr0_31_0_mask 0xFFFFFFFF
+#define SDMA_PKT_COPY_L2T_BROADCAST_TILED_ADDR_LO_0_tiled_addr0_31_0_shift 0
+#define SDMA_PKT_COPY_L2T_BROADCAST_TILED_ADDR_LO_0_TILED_ADDR0_31_0(x) (((x) & SDMA_PKT_COPY_L2T_BROADCAST_TILED_ADDR_LO_0_tiled_addr0_31_0_mask) << SDMA_PKT_COPY_L2T_BROADCAST_TILED_ADDR_LO_0_tiled_addr0_31_0_shift)
+
+/*define for TILED_ADDR_HI_0 word*/
+/*define for tiled_addr0_63_32 field*/
+#define SDMA_PKT_COPY_L2T_BROADCAST_TILED_ADDR_HI_0_tiled_addr0_63_32_offset 2
+#define SDMA_PKT_COPY_L2T_BROADCAST_TILED_ADDR_HI_0_tiled_addr0_63_32_mask 0xFFFFFFFF
+#define SDMA_PKT_COPY_L2T_BROADCAST_TILED_ADDR_HI_0_tiled_addr0_63_32_shift 0
+#define SDMA_PKT_COPY_L2T_BROADCAST_TILED_ADDR_HI_0_TILED_ADDR0_63_32(x) (((x) & SDMA_PKT_COPY_L2T_BROADCAST_TILED_ADDR_HI_0_tiled_addr0_63_32_mask) << SDMA_PKT_COPY_L2T_BROADCAST_TILED_ADDR_HI_0_tiled_addr0_63_32_shift)
+
+/*define for TILED_ADDR_LO_1 word*/
+/*define for tiled_addr1_31_0 field*/
+#define SDMA_PKT_COPY_L2T_BROADCAST_TILED_ADDR_LO_1_tiled_addr1_31_0_offset 3
+#define SDMA_PKT_COPY_L2T_BROADCAST_TILED_ADDR_LO_1_tiled_addr1_31_0_mask 0xFFFFFFFF
+#define SDMA_PKT_COPY_L2T_BROADCAST_TILED_ADDR_LO_1_tiled_addr1_31_0_shift 0
+#define SDMA_PKT_COPY_L2T_BROADCAST_TILED_ADDR_LO_1_TILED_ADDR1_31_0(x) (((x) & SDMA_PKT_COPY_L2T_BROADCAST_TILED_ADDR_LO_1_tiled_addr1_31_0_mask) << SDMA_PKT_COPY_L2T_BROADCAST_TILED_ADDR_LO_1_tiled_addr1_31_0_shift)
+
+/*define for TILED_ADDR_HI_1 word*/
+/*define for tiled_addr1_63_32 field*/
+#define SDMA_PKT_COPY_L2T_BROADCAST_TILED_ADDR_HI_1_tiled_addr1_63_32_offset 4
+#define SDMA_PKT_COPY_L2T_BROADCAST_TILED_ADDR_HI_1_tiled_addr1_63_32_mask 0xFFFFFFFF
+#define SDMA_PKT_COPY_L2T_BROADCAST_TILED_ADDR_HI_1_tiled_addr1_63_32_shift 0
+#define SDMA_PKT_COPY_L2T_BROADCAST_TILED_ADDR_HI_1_TILED_ADDR1_63_32(x) (((x) & SDMA_PKT_COPY_L2T_BROADCAST_TILED_ADDR_HI_1_tiled_addr1_63_32_mask) << SDMA_PKT_COPY_L2T_BROADCAST_TILED_ADDR_HI_1_tiled_addr1_63_32_shift)
+
+/*define for DW_5 word*/
+/*define for width field*/
+#define SDMA_PKT_COPY_L2T_BROADCAST_DW_5_width_offset 5
+#define SDMA_PKT_COPY_L2T_BROADCAST_DW_5_width_mask 0x00003FFF
+#define SDMA_PKT_COPY_L2T_BROADCAST_DW_5_width_shift 0
+#define SDMA_PKT_COPY_L2T_BROADCAST_DW_5_WIDTH(x) (((x) & SDMA_PKT_COPY_L2T_BROADCAST_DW_5_width_mask) << SDMA_PKT_COPY_L2T_BROADCAST_DW_5_width_shift)
+
+/*define for DW_6 word*/
+/*define for height field*/
+#define SDMA_PKT_COPY_L2T_BROADCAST_DW_6_height_offset 6
+#define SDMA_PKT_COPY_L2T_BROADCAST_DW_6_height_mask 0x00003FFF
+#define SDMA_PKT_COPY_L2T_BROADCAST_DW_6_height_shift 0
+#define SDMA_PKT_COPY_L2T_BROADCAST_DW_6_HEIGHT(x) (((x) & SDMA_PKT_COPY_L2T_BROADCAST_DW_6_height_mask) << SDMA_PKT_COPY_L2T_BROADCAST_DW_6_height_shift)
+
+/*define for depth field*/
+#define SDMA_PKT_COPY_L2T_BROADCAST_DW_6_depth_offset 6
+#define SDMA_PKT_COPY_L2T_BROADCAST_DW_6_depth_mask 0x00001FFF
+#define SDMA_PKT_COPY_L2T_BROADCAST_DW_6_depth_shift 16
+#define SDMA_PKT_COPY_L2T_BROADCAST_DW_6_DEPTH(x) (((x) & SDMA_PKT_COPY_L2T_BROADCAST_DW_6_depth_mask) << SDMA_PKT_COPY_L2T_BROADCAST_DW_6_depth_shift)
+
+/*define for DW_7 word*/
+/*define for element_size field*/
+#define SDMA_PKT_COPY_L2T_BROADCAST_DW_7_element_size_offset 7
+#define SDMA_PKT_COPY_L2T_BROADCAST_DW_7_element_size_mask 0x00000007
+#define SDMA_PKT_COPY_L2T_BROADCAST_DW_7_element_size_shift 0
+#define SDMA_PKT_COPY_L2T_BROADCAST_DW_7_ELEMENT_SIZE(x) (((x) & SDMA_PKT_COPY_L2T_BROADCAST_DW_7_element_size_mask) << SDMA_PKT_COPY_L2T_BROADCAST_DW_7_element_size_shift)
+
+/*define for swizzle_mode field*/
+#define SDMA_PKT_COPY_L2T_BROADCAST_DW_7_swizzle_mode_offset 7
+#define SDMA_PKT_COPY_L2T_BROADCAST_DW_7_swizzle_mode_mask 0x0000001F
+#define SDMA_PKT_COPY_L2T_BROADCAST_DW_7_swizzle_mode_shift 3
+#define SDMA_PKT_COPY_L2T_BROADCAST_DW_7_SWIZZLE_MODE(x) (((x) & SDMA_PKT_COPY_L2T_BROADCAST_DW_7_swizzle_mode_mask) << SDMA_PKT_COPY_L2T_BROADCAST_DW_7_swizzle_mode_shift)
+
+/*define for dimension field*/
+#define SDMA_PKT_COPY_L2T_BROADCAST_DW_7_dimension_offset 7
+#define SDMA_PKT_COPY_L2T_BROADCAST_DW_7_dimension_mask 0x00000003
+#define SDMA_PKT_COPY_L2T_BROADCAST_DW_7_dimension_shift 9
+#define SDMA_PKT_COPY_L2T_BROADCAST_DW_7_DIMENSION(x) (((x) & SDMA_PKT_COPY_L2T_BROADCAST_DW_7_dimension_mask) << SDMA_PKT_COPY_L2T_BROADCAST_DW_7_dimension_shift)
+
+/*define for mip_max field*/
+#define SDMA_PKT_COPY_L2T_BROADCAST_DW_7_mip_max_offset 7
+#define SDMA_PKT_COPY_L2T_BROADCAST_DW_7_mip_max_mask 0x0000000F
+#define SDMA_PKT_COPY_L2T_BROADCAST_DW_7_mip_max_shift 16
+#define SDMA_PKT_COPY_L2T_BROADCAST_DW_7_MIP_MAX(x) (((x) & SDMA_PKT_COPY_L2T_BROADCAST_DW_7_mip_max_mask) << SDMA_PKT_COPY_L2T_BROADCAST_DW_7_mip_max_shift)
+
+/*define for DW_8 word*/
+/*define for x field*/
+#define SDMA_PKT_COPY_L2T_BROADCAST_DW_8_x_offset 8
+#define SDMA_PKT_COPY_L2T_BROADCAST_DW_8_x_mask 0x00003FFF
+#define SDMA_PKT_COPY_L2T_BROADCAST_DW_8_x_shift 0
+#define SDMA_PKT_COPY_L2T_BROADCAST_DW_8_X(x) (((x) & SDMA_PKT_COPY_L2T_BROADCAST_DW_8_x_mask) << SDMA_PKT_COPY_L2T_BROADCAST_DW_8_x_shift)
+
+/*define for y field*/
+#define SDMA_PKT_COPY_L2T_BROADCAST_DW_8_y_offset 8
+#define SDMA_PKT_COPY_L2T_BROADCAST_DW_8_y_mask 0x00003FFF
+#define SDMA_PKT_COPY_L2T_BROADCAST_DW_8_y_shift 16
+#define SDMA_PKT_COPY_L2T_BROADCAST_DW_8_Y(x) (((x) & SDMA_PKT_COPY_L2T_BROADCAST_DW_8_y_mask) << SDMA_PKT_COPY_L2T_BROADCAST_DW_8_y_shift)
+
+/*define for DW_9 word*/
+/*define for z field*/
+#define SDMA_PKT_COPY_L2T_BROADCAST_DW_9_z_offset 9
+#define SDMA_PKT_COPY_L2T_BROADCAST_DW_9_z_mask 0x00001FFF
+#define SDMA_PKT_COPY_L2T_BROADCAST_DW_9_z_shift 0
+#define SDMA_PKT_COPY_L2T_BROADCAST_DW_9_Z(x) (((x) & SDMA_PKT_COPY_L2T_BROADCAST_DW_9_z_mask) << SDMA_PKT_COPY_L2T_BROADCAST_DW_9_z_shift)
+
+/*define for DW_10 word*/
+/*define for dst2_sw field*/
+#define SDMA_PKT_COPY_L2T_BROADCAST_DW_10_dst2_sw_offset 10
+#define SDMA_PKT_COPY_L2T_BROADCAST_DW_10_dst2_sw_mask 0x00000003
+#define SDMA_PKT_COPY_L2T_BROADCAST_DW_10_dst2_sw_shift 8
+#define SDMA_PKT_COPY_L2T_BROADCAST_DW_10_DST2_SW(x) (((x) & SDMA_PKT_COPY_L2T_BROADCAST_DW_10_dst2_sw_mask) << SDMA_PKT_COPY_L2T_BROADCAST_DW_10_dst2_sw_shift)
+
+/*define for dst2_cache_policy field*/
+#define SDMA_PKT_COPY_L2T_BROADCAST_DW_10_dst2_cache_policy_offset 10
+#define SDMA_PKT_COPY_L2T_BROADCAST_DW_10_dst2_cache_policy_mask 0x00000007
+#define SDMA_PKT_COPY_L2T_BROADCAST_DW_10_dst2_cache_policy_shift 10
+#define SDMA_PKT_COPY_L2T_BROADCAST_DW_10_DST2_CACHE_POLICY(x) (((x) & SDMA_PKT_COPY_L2T_BROADCAST_DW_10_dst2_cache_policy_mask) << SDMA_PKT_COPY_L2T_BROADCAST_DW_10_dst2_cache_policy_shift)
+
+/*define for linear_sw field*/
+#define SDMA_PKT_COPY_L2T_BROADCAST_DW_10_linear_sw_offset 10
+#define SDMA_PKT_COPY_L2T_BROADCAST_DW_10_linear_sw_mask 0x00000003
+#define SDMA_PKT_COPY_L2T_BROADCAST_DW_10_linear_sw_shift 16
+#define SDMA_PKT_COPY_L2T_BROADCAST_DW_10_LINEAR_SW(x) (((x) & SDMA_PKT_COPY_L2T_BROADCAST_DW_10_linear_sw_mask) << SDMA_PKT_COPY_L2T_BROADCAST_DW_10_linear_sw_shift)
+
+/*define for linear_cache_policy field*/
+#define SDMA_PKT_COPY_L2T_BROADCAST_DW_10_linear_cache_policy_offset 10
+#define SDMA_PKT_COPY_L2T_BROADCAST_DW_10_linear_cache_policy_mask 0x00000007
+#define SDMA_PKT_COPY_L2T_BROADCAST_DW_10_linear_cache_policy_shift 18
+#define SDMA_PKT_COPY_L2T_BROADCAST_DW_10_LINEAR_CACHE_POLICY(x) (((x) & SDMA_PKT_COPY_L2T_BROADCAST_DW_10_linear_cache_policy_mask) << SDMA_PKT_COPY_L2T_BROADCAST_DW_10_linear_cache_policy_shift)
+
+/*define for tile_sw field*/
+#define SDMA_PKT_COPY_L2T_BROADCAST_DW_10_tile_sw_offset 10
+#define SDMA_PKT_COPY_L2T_BROADCAST_DW_10_tile_sw_mask 0x00000003
+#define SDMA_PKT_COPY_L2T_BROADCAST_DW_10_tile_sw_shift 24
+#define SDMA_PKT_COPY_L2T_BROADCAST_DW_10_TILE_SW(x) (((x) & SDMA_PKT_COPY_L2T_BROADCAST_DW_10_tile_sw_mask) << SDMA_PKT_COPY_L2T_BROADCAST_DW_10_tile_sw_shift)
+
+/*define for tile_cache_policy field*/
+#define SDMA_PKT_COPY_L2T_BROADCAST_DW_10_tile_cache_policy_offset 10
+#define SDMA_PKT_COPY_L2T_BROADCAST_DW_10_tile_cache_policy_mask 0x00000007
+#define SDMA_PKT_COPY_L2T_BROADCAST_DW_10_tile_cache_policy_shift 26
+#define SDMA_PKT_COPY_L2T_BROADCAST_DW_10_TILE_CACHE_POLICY(x) (((x) & SDMA_PKT_COPY_L2T_BROADCAST_DW_10_tile_cache_policy_mask) << SDMA_PKT_COPY_L2T_BROADCAST_DW_10_tile_cache_policy_shift)
+
+/*define for LINEAR_ADDR_LO word*/
+/*define for linear_addr_31_0 field*/
+#define SDMA_PKT_COPY_L2T_BROADCAST_LINEAR_ADDR_LO_linear_addr_31_0_offset 11
+#define SDMA_PKT_COPY_L2T_BROADCAST_LINEAR_ADDR_LO_linear_addr_31_0_mask 0xFFFFFFFF
+#define SDMA_PKT_COPY_L2T_BROADCAST_LINEAR_ADDR_LO_linear_addr_31_0_shift 0
+#define SDMA_PKT_COPY_L2T_BROADCAST_LINEAR_ADDR_LO_LINEAR_ADDR_31_0(x) (((x) & SDMA_PKT_COPY_L2T_BROADCAST_LINEAR_ADDR_LO_linear_addr_31_0_mask) << SDMA_PKT_COPY_L2T_BROADCAST_LINEAR_ADDR_LO_linear_addr_31_0_shift)
+
+/*define for LINEAR_ADDR_HI word*/
+/*define for linear_addr_63_32 field*/
+#define SDMA_PKT_COPY_L2T_BROADCAST_LINEAR_ADDR_HI_linear_addr_63_32_offset 12
+#define SDMA_PKT_COPY_L2T_BROADCAST_LINEAR_ADDR_HI_linear_addr_63_32_mask 0xFFFFFFFF
+#define SDMA_PKT_COPY_L2T_BROADCAST_LINEAR_ADDR_HI_linear_addr_63_32_shift 0
+#define SDMA_PKT_COPY_L2T_BROADCAST_LINEAR_ADDR_HI_LINEAR_ADDR_63_32(x) (((x) & SDMA_PKT_COPY_L2T_BROADCAST_LINEAR_ADDR_HI_linear_addr_63_32_mask) << SDMA_PKT_COPY_L2T_BROADCAST_LINEAR_ADDR_HI_linear_addr_63_32_shift)
+
+/*define for LINEAR_PITCH word*/
+/*define for linear_pitch field*/
+#define SDMA_PKT_COPY_L2T_BROADCAST_LINEAR_PITCH_linear_pitch_offset 13
+#define SDMA_PKT_COPY_L2T_BROADCAST_LINEAR_PITCH_linear_pitch_mask 0x0007FFFF
+#define SDMA_PKT_COPY_L2T_BROADCAST_LINEAR_PITCH_linear_pitch_shift 0
+#define SDMA_PKT_COPY_L2T_BROADCAST_LINEAR_PITCH_LINEAR_PITCH(x) (((x) & SDMA_PKT_COPY_L2T_BROADCAST_LINEAR_PITCH_linear_pitch_mask) << SDMA_PKT_COPY_L2T_BROADCAST_LINEAR_PITCH_linear_pitch_shift)
+
+/*define for LINEAR_SLICE_PITCH word*/
+/*define for linear_slice_pitch field*/
+#define SDMA_PKT_COPY_L2T_BROADCAST_LINEAR_SLICE_PITCH_linear_slice_pitch_offset 14
+#define SDMA_PKT_COPY_L2T_BROADCAST_LINEAR_SLICE_PITCH_linear_slice_pitch_mask 0xFFFFFFFF
+#define SDMA_PKT_COPY_L2T_BROADCAST_LINEAR_SLICE_PITCH_linear_slice_pitch_shift 0
+#define SDMA_PKT_COPY_L2T_BROADCAST_LINEAR_SLICE_PITCH_LINEAR_SLICE_PITCH(x) (((x) & SDMA_PKT_COPY_L2T_BROADCAST_LINEAR_SLICE_PITCH_linear_slice_pitch_mask) << SDMA_PKT_COPY_L2T_BROADCAST_LINEAR_SLICE_PITCH_linear_slice_pitch_shift)
+
+/*define for COUNT word*/
+/*define for count field*/
+#define SDMA_PKT_COPY_L2T_BROADCAST_COUNT_count_offset 15
+#define SDMA_PKT_COPY_L2T_BROADCAST_COUNT_count_mask 0x3FFFFFFF
+#define SDMA_PKT_COPY_L2T_BROADCAST_COUNT_count_shift 0
+#define SDMA_PKT_COPY_L2T_BROADCAST_COUNT_COUNT(x) (((x) & SDMA_PKT_COPY_L2T_BROADCAST_COUNT_count_mask) << SDMA_PKT_COPY_L2T_BROADCAST_COUNT_count_shift)
+
+
+/*
+** Definitions for SDMA_PKT_COPY_T2T packet
+*/
+
+/*define for HEADER word*/
+/*define for op field*/
+#define SDMA_PKT_COPY_T2T_HEADER_op_offset 0
+#define SDMA_PKT_COPY_T2T_HEADER_op_mask 0x000000FF
+#define SDMA_PKT_COPY_T2T_HEADER_op_shift 0
+#define SDMA_PKT_COPY_T2T_HEADER_OP(x) (((x) & SDMA_PKT_COPY_T2T_HEADER_op_mask) << SDMA_PKT_COPY_T2T_HEADER_op_shift)
+
+/*define for sub_op field*/
+#define SDMA_PKT_COPY_T2T_HEADER_sub_op_offset 0
+#define SDMA_PKT_COPY_T2T_HEADER_sub_op_mask 0x000000FF
+#define SDMA_PKT_COPY_T2T_HEADER_sub_op_shift 8
+#define SDMA_PKT_COPY_T2T_HEADER_SUB_OP(x) (((x) & SDMA_PKT_COPY_T2T_HEADER_sub_op_mask) << SDMA_PKT_COPY_T2T_HEADER_sub_op_shift)
+
+/*define for tmz field*/
+#define SDMA_PKT_COPY_T2T_HEADER_tmz_offset 0
+#define SDMA_PKT_COPY_T2T_HEADER_tmz_mask 0x00000001
+#define SDMA_PKT_COPY_T2T_HEADER_tmz_shift 18
+#define SDMA_PKT_COPY_T2T_HEADER_TMZ(x) (((x) & SDMA_PKT_COPY_T2T_HEADER_tmz_mask) << SDMA_PKT_COPY_T2T_HEADER_tmz_shift)
+
+/*define for dcc field*/
+#define SDMA_PKT_COPY_T2T_HEADER_dcc_offset 0
+#define SDMA_PKT_COPY_T2T_HEADER_dcc_mask 0x00000001
+#define SDMA_PKT_COPY_T2T_HEADER_dcc_shift 19
+#define SDMA_PKT_COPY_T2T_HEADER_DCC(x) (((x) & SDMA_PKT_COPY_T2T_HEADER_dcc_mask) << SDMA_PKT_COPY_T2T_HEADER_dcc_shift)
+
+/*define for cpv field*/
+#define SDMA_PKT_COPY_T2T_HEADER_cpv_offset 0
+#define SDMA_PKT_COPY_T2T_HEADER_cpv_mask 0x00000001
+#define SDMA_PKT_COPY_T2T_HEADER_cpv_shift 28
+#define SDMA_PKT_COPY_T2T_HEADER_CPV(x) (((x) & SDMA_PKT_COPY_T2T_HEADER_cpv_mask) << SDMA_PKT_COPY_T2T_HEADER_cpv_shift)
+
+/*define for dcc_dir field*/
+#define SDMA_PKT_COPY_T2T_HEADER_dcc_dir_offset 0
+#define SDMA_PKT_COPY_T2T_HEADER_dcc_dir_mask 0x00000001
+#define SDMA_PKT_COPY_T2T_HEADER_dcc_dir_shift 31
+#define SDMA_PKT_COPY_T2T_HEADER_DCC_DIR(x) (((x) & SDMA_PKT_COPY_T2T_HEADER_dcc_dir_mask) << SDMA_PKT_COPY_T2T_HEADER_dcc_dir_shift)
+
+/*define for SRC_ADDR_LO word*/
+/*define for src_addr_31_0 field*/
+#define SDMA_PKT_COPY_T2T_SRC_ADDR_LO_src_addr_31_0_offset 1
+#define SDMA_PKT_COPY_T2T_SRC_ADDR_LO_src_addr_31_0_mask 0xFFFFFFFF
+#define SDMA_PKT_COPY_T2T_SRC_ADDR_LO_src_addr_31_0_shift 0
+#define SDMA_PKT_COPY_T2T_SRC_ADDR_LO_SRC_ADDR_31_0(x) (((x) & SDMA_PKT_COPY_T2T_SRC_ADDR_LO_src_addr_31_0_mask) << SDMA_PKT_COPY_T2T_SRC_ADDR_LO_src_addr_31_0_shift)
+
+/*define for SRC_ADDR_HI word*/
+/*define for src_addr_63_32 field*/
+#define SDMA_PKT_COPY_T2T_SRC_ADDR_HI_src_addr_63_32_offset 2
+#define SDMA_PKT_COPY_T2T_SRC_ADDR_HI_src_addr_63_32_mask 0xFFFFFFFF
+#define SDMA_PKT_COPY_T2T_SRC_ADDR_HI_src_addr_63_32_shift 0
+#define SDMA_PKT_COPY_T2T_SRC_ADDR_HI_SRC_ADDR_63_32(x) (((x) & SDMA_PKT_COPY_T2T_SRC_ADDR_HI_src_addr_63_32_mask) << SDMA_PKT_COPY_T2T_SRC_ADDR_HI_src_addr_63_32_shift)
+
+/*define for DW_3 word*/
+/*define for src_x field*/
+#define SDMA_PKT_COPY_T2T_DW_3_src_x_offset 3
+#define SDMA_PKT_COPY_T2T_DW_3_src_x_mask 0x00003FFF
+#define SDMA_PKT_COPY_T2T_DW_3_src_x_shift 0
+#define SDMA_PKT_COPY_T2T_DW_3_SRC_X(x) (((x) & SDMA_PKT_COPY_T2T_DW_3_src_x_mask) << SDMA_PKT_COPY_T2T_DW_3_src_x_shift)
+
+/*define for src_y field*/
+#define SDMA_PKT_COPY_T2T_DW_3_src_y_offset 3
+#define SDMA_PKT_COPY_T2T_DW_3_src_y_mask 0x00003FFF
+#define SDMA_PKT_COPY_T2T_DW_3_src_y_shift 16
+#define SDMA_PKT_COPY_T2T_DW_3_SRC_Y(x) (((x) & SDMA_PKT_COPY_T2T_DW_3_src_y_mask) << SDMA_PKT_COPY_T2T_DW_3_src_y_shift)
+
+/*define for DW_4 word*/
+/*define for src_z field*/
+#define SDMA_PKT_COPY_T2T_DW_4_src_z_offset 4
+#define SDMA_PKT_COPY_T2T_DW_4_src_z_mask 0x00001FFF
+#define SDMA_PKT_COPY_T2T_DW_4_src_z_shift 0
+#define SDMA_PKT_COPY_T2T_DW_4_SRC_Z(x) (((x) & SDMA_PKT_COPY_T2T_DW_4_src_z_mask) << SDMA_PKT_COPY_T2T_DW_4_src_z_shift)
+
+/*define for src_width field*/
+#define SDMA_PKT_COPY_T2T_DW_4_src_width_offset 4
+#define SDMA_PKT_COPY_T2T_DW_4_src_width_mask 0x00003FFF
+#define SDMA_PKT_COPY_T2T_DW_4_src_width_shift 16
+#define SDMA_PKT_COPY_T2T_DW_4_SRC_WIDTH(x) (((x) & SDMA_PKT_COPY_T2T_DW_4_src_width_mask) << SDMA_PKT_COPY_T2T_DW_4_src_width_shift)
+
+/*define for DW_5 word*/
+/*define for src_height field*/
+#define SDMA_PKT_COPY_T2T_DW_5_src_height_offset 5
+#define SDMA_PKT_COPY_T2T_DW_5_src_height_mask 0x00003FFF
+#define SDMA_PKT_COPY_T2T_DW_5_src_height_shift 0
+#define SDMA_PKT_COPY_T2T_DW_5_SRC_HEIGHT(x) (((x) & SDMA_PKT_COPY_T2T_DW_5_src_height_mask) << SDMA_PKT_COPY_T2T_DW_5_src_height_shift)
+
+/*define for src_depth field*/
+#define SDMA_PKT_COPY_T2T_DW_5_src_depth_offset 5
+#define SDMA_PKT_COPY_T2T_DW_5_src_depth_mask 0x00001FFF
+#define SDMA_PKT_COPY_T2T_DW_5_src_depth_shift 16
+#define SDMA_PKT_COPY_T2T_DW_5_SRC_DEPTH(x) (((x) & SDMA_PKT_COPY_T2T_DW_5_src_depth_mask) << SDMA_PKT_COPY_T2T_DW_5_src_depth_shift)
+
+/*define for DW_6 word*/
+/*define for src_element_size field*/
+#define SDMA_PKT_COPY_T2T_DW_6_src_element_size_offset 6
+#define SDMA_PKT_COPY_T2T_DW_6_src_element_size_mask 0x00000007
+#define SDMA_PKT_COPY_T2T_DW_6_src_element_size_shift 0
+#define SDMA_PKT_COPY_T2T_DW_6_SRC_ELEMENT_SIZE(x) (((x) & SDMA_PKT_COPY_T2T_DW_6_src_element_size_mask) << SDMA_PKT_COPY_T2T_DW_6_src_element_size_shift)
+
+/*define for src_swizzle_mode field*/
+#define SDMA_PKT_COPY_T2T_DW_6_src_swizzle_mode_offset 6
+#define SDMA_PKT_COPY_T2T_DW_6_src_swizzle_mode_mask 0x0000001F
+#define SDMA_PKT_COPY_T2T_DW_6_src_swizzle_mode_shift 3
+#define SDMA_PKT_COPY_T2T_DW_6_SRC_SWIZZLE_MODE(x) (((x) & SDMA_PKT_COPY_T2T_DW_6_src_swizzle_mode_mask) << SDMA_PKT_COPY_T2T_DW_6_src_swizzle_mode_shift)
+
+/*define for src_dimension field*/
+#define SDMA_PKT_COPY_T2T_DW_6_src_dimension_offset 6
+#define SDMA_PKT_COPY_T2T_DW_6_src_dimension_mask 0x00000003
+#define SDMA_PKT_COPY_T2T_DW_6_src_dimension_shift 9
+#define SDMA_PKT_COPY_T2T_DW_6_SRC_DIMENSION(x) (((x) & SDMA_PKT_COPY_T2T_DW_6_src_dimension_mask) << SDMA_PKT_COPY_T2T_DW_6_src_dimension_shift)
+
+/*define for src_mip_max field*/
+#define SDMA_PKT_COPY_T2T_DW_6_src_mip_max_offset 6
+#define SDMA_PKT_COPY_T2T_DW_6_src_mip_max_mask 0x0000000F
+#define SDMA_PKT_COPY_T2T_DW_6_src_mip_max_shift 16
+#define SDMA_PKT_COPY_T2T_DW_6_SRC_MIP_MAX(x) (((x) & SDMA_PKT_COPY_T2T_DW_6_src_mip_max_mask) << SDMA_PKT_COPY_T2T_DW_6_src_mip_max_shift)
+
+/*define for src_mip_id field*/
+#define SDMA_PKT_COPY_T2T_DW_6_src_mip_id_offset 6
+#define SDMA_PKT_COPY_T2T_DW_6_src_mip_id_mask 0x0000000F
+#define SDMA_PKT_COPY_T2T_DW_6_src_mip_id_shift 20
+#define SDMA_PKT_COPY_T2T_DW_6_SRC_MIP_ID(x) (((x) & SDMA_PKT_COPY_T2T_DW_6_src_mip_id_mask) << SDMA_PKT_COPY_T2T_DW_6_src_mip_id_shift)
+
+/*define for DST_ADDR_LO word*/
+/*define for dst_addr_31_0 field*/
+#define SDMA_PKT_COPY_T2T_DST_ADDR_LO_dst_addr_31_0_offset 7
+#define SDMA_PKT_COPY_T2T_DST_ADDR_LO_dst_addr_31_0_mask 0xFFFFFFFF
+#define SDMA_PKT_COPY_T2T_DST_ADDR_LO_dst_addr_31_0_shift 0
+#define SDMA_PKT_COPY_T2T_DST_ADDR_LO_DST_ADDR_31_0(x) (((x) & SDMA_PKT_COPY_T2T_DST_ADDR_LO_dst_addr_31_0_mask) << SDMA_PKT_COPY_T2T_DST_ADDR_LO_dst_addr_31_0_shift)
+
+/*define for DST_ADDR_HI word*/
+/*define for dst_addr_63_32 field*/
+#define SDMA_PKT_COPY_T2T_DST_ADDR_HI_dst_addr_63_32_offset 8
+#define SDMA_PKT_COPY_T2T_DST_ADDR_HI_dst_addr_63_32_mask 0xFFFFFFFF
+#define SDMA_PKT_COPY_T2T_DST_ADDR_HI_dst_addr_63_32_shift 0
+#define SDMA_PKT_COPY_T2T_DST_ADDR_HI_DST_ADDR_63_32(x) (((x) & SDMA_PKT_COPY_T2T_DST_ADDR_HI_dst_addr_63_32_mask) << SDMA_PKT_COPY_T2T_DST_ADDR_HI_dst_addr_63_32_shift)
+
+/*define for DW_9 word*/
+/*define for dst_x field*/
+#define SDMA_PKT_COPY_T2T_DW_9_dst_x_offset 9
+#define SDMA_PKT_COPY_T2T_DW_9_dst_x_mask 0x00003FFF
+#define SDMA_PKT_COPY_T2T_DW_9_dst_x_shift 0
+#define SDMA_PKT_COPY_T2T_DW_9_DST_X(x) (((x) & SDMA_PKT_COPY_T2T_DW_9_dst_x_mask) << SDMA_PKT_COPY_T2T_DW_9_dst_x_shift)
+
+/*define for dst_y field*/
+#define SDMA_PKT_COPY_T2T_DW_9_dst_y_offset 9
+#define SDMA_PKT_COPY_T2T_DW_9_dst_y_mask 0x00003FFF
+#define SDMA_PKT_COPY_T2T_DW_9_dst_y_shift 16
+#define SDMA_PKT_COPY_T2T_DW_9_DST_Y(x) (((x) & SDMA_PKT_COPY_T2T_DW_9_dst_y_mask) << SDMA_PKT_COPY_T2T_DW_9_dst_y_shift)
+
+/*define for DW_10 word*/
+/*define for dst_z field*/
+#define SDMA_PKT_COPY_T2T_DW_10_dst_z_offset 10
+#define SDMA_PKT_COPY_T2T_DW_10_dst_z_mask 0x00001FFF
+#define SDMA_PKT_COPY_T2T_DW_10_dst_z_shift 0
+#define SDMA_PKT_COPY_T2T_DW_10_DST_Z(x) (((x) & SDMA_PKT_COPY_T2T_DW_10_dst_z_mask) << SDMA_PKT_COPY_T2T_DW_10_dst_z_shift)
+
+/*define for dst_width field*/
+#define SDMA_PKT_COPY_T2T_DW_10_dst_width_offset 10
+#define SDMA_PKT_COPY_T2T_DW_10_dst_width_mask 0x00003FFF
+#define SDMA_PKT_COPY_T2T_DW_10_dst_width_shift 16
+#define SDMA_PKT_COPY_T2T_DW_10_DST_WIDTH(x) (((x) & SDMA_PKT_COPY_T2T_DW_10_dst_width_mask) << SDMA_PKT_COPY_T2T_DW_10_dst_width_shift)
+
+/*define for DW_11 word*/
+/*define for dst_height field*/
+#define SDMA_PKT_COPY_T2T_DW_11_dst_height_offset 11
+#define SDMA_PKT_COPY_T2T_DW_11_dst_height_mask 0x00003FFF
+#define SDMA_PKT_COPY_T2T_DW_11_dst_height_shift 0
+#define SDMA_PKT_COPY_T2T_DW_11_DST_HEIGHT(x) (((x) & SDMA_PKT_COPY_T2T_DW_11_dst_height_mask) << SDMA_PKT_COPY_T2T_DW_11_dst_height_shift)
+
+/*define for dst_depth field*/
+#define SDMA_PKT_COPY_T2T_DW_11_dst_depth_offset 11
+#define SDMA_PKT_COPY_T2T_DW_11_dst_depth_mask 0x00001FFF
+#define SDMA_PKT_COPY_T2T_DW_11_dst_depth_shift 16
+#define SDMA_PKT_COPY_T2T_DW_11_DST_DEPTH(x) (((x) & SDMA_PKT_COPY_T2T_DW_11_dst_depth_mask) << SDMA_PKT_COPY_T2T_DW_11_dst_depth_shift)
+
+/*define for DW_12 word*/
+/*define for dst_element_size field*/
+#define SDMA_PKT_COPY_T2T_DW_12_dst_element_size_offset 12
+#define SDMA_PKT_COPY_T2T_DW_12_dst_element_size_mask 0x00000007
+#define SDMA_PKT_COPY_T2T_DW_12_dst_element_size_shift 0
+#define SDMA_PKT_COPY_T2T_DW_12_DST_ELEMENT_SIZE(x) (((x) & SDMA_PKT_COPY_T2T_DW_12_dst_element_size_mask) << SDMA_PKT_COPY_T2T_DW_12_dst_element_size_shift)
+
+/*define for dst_swizzle_mode field*/
+#define SDMA_PKT_COPY_T2T_DW_12_dst_swizzle_mode_offset 12
+#define SDMA_PKT_COPY_T2T_DW_12_dst_swizzle_mode_mask 0x0000001F
+#define SDMA_PKT_COPY_T2T_DW_12_dst_swizzle_mode_shift 3
+#define SDMA_PKT_COPY_T2T_DW_12_DST_SWIZZLE_MODE(x) (((x) & SDMA_PKT_COPY_T2T_DW_12_dst_swizzle_mode_mask) << SDMA_PKT_COPY_T2T_DW_12_dst_swizzle_mode_shift)
+
+/*define for dst_dimension field*/
+#define SDMA_PKT_COPY_T2T_DW_12_dst_dimension_offset 12
+#define SDMA_PKT_COPY_T2T_DW_12_dst_dimension_mask 0x00000003
+#define SDMA_PKT_COPY_T2T_DW_12_dst_dimension_shift 9
+#define SDMA_PKT_COPY_T2T_DW_12_DST_DIMENSION(x) (((x) & SDMA_PKT_COPY_T2T_DW_12_dst_dimension_mask) << SDMA_PKT_COPY_T2T_DW_12_dst_dimension_shift)
+
+/*define for dst_mip_max field*/
+#define SDMA_PKT_COPY_T2T_DW_12_dst_mip_max_offset 12
+#define SDMA_PKT_COPY_T2T_DW_12_dst_mip_max_mask 0x0000000F
+#define SDMA_PKT_COPY_T2T_DW_12_dst_mip_max_shift 16
+#define SDMA_PKT_COPY_T2T_DW_12_DST_MIP_MAX(x) (((x) & SDMA_PKT_COPY_T2T_DW_12_dst_mip_max_mask) << SDMA_PKT_COPY_T2T_DW_12_dst_mip_max_shift)
+
+/*define for dst_mip_id field*/
+#define SDMA_PKT_COPY_T2T_DW_12_dst_mip_id_offset 12
+#define SDMA_PKT_COPY_T2T_DW_12_dst_mip_id_mask 0x0000000F
+#define SDMA_PKT_COPY_T2T_DW_12_dst_mip_id_shift 20
+#define SDMA_PKT_COPY_T2T_DW_12_DST_MIP_ID(x) (((x) & SDMA_PKT_COPY_T2T_DW_12_dst_mip_id_mask) << SDMA_PKT_COPY_T2T_DW_12_dst_mip_id_shift)
+
+/*define for DW_13 word*/
+/*define for rect_x field*/
+#define SDMA_PKT_COPY_T2T_DW_13_rect_x_offset 13
+#define SDMA_PKT_COPY_T2T_DW_13_rect_x_mask 0x00003FFF
+#define SDMA_PKT_COPY_T2T_DW_13_rect_x_shift 0
+#define SDMA_PKT_COPY_T2T_DW_13_RECT_X(x) (((x) & SDMA_PKT_COPY_T2T_DW_13_rect_x_mask) << SDMA_PKT_COPY_T2T_DW_13_rect_x_shift)
+
+/*define for rect_y field*/
+#define SDMA_PKT_COPY_T2T_DW_13_rect_y_offset 13
+#define SDMA_PKT_COPY_T2T_DW_13_rect_y_mask 0x00003FFF
+#define SDMA_PKT_COPY_T2T_DW_13_rect_y_shift 16
+#define SDMA_PKT_COPY_T2T_DW_13_RECT_Y(x) (((x) & SDMA_PKT_COPY_T2T_DW_13_rect_y_mask) << SDMA_PKT_COPY_T2T_DW_13_rect_y_shift)
+
+/*define for DW_14 word*/
+/*define for rect_z field*/
+#define SDMA_PKT_COPY_T2T_DW_14_rect_z_offset 14
+#define SDMA_PKT_COPY_T2T_DW_14_rect_z_mask 0x00001FFF
+#define SDMA_PKT_COPY_T2T_DW_14_rect_z_shift 0
+#define SDMA_PKT_COPY_T2T_DW_14_RECT_Z(x) (((x) & SDMA_PKT_COPY_T2T_DW_14_rect_z_mask) << SDMA_PKT_COPY_T2T_DW_14_rect_z_shift)
+
+/*define for dst_sw field*/
+#define SDMA_PKT_COPY_T2T_DW_14_dst_sw_offset 14
+#define SDMA_PKT_COPY_T2T_DW_14_dst_sw_mask 0x00000003
+#define SDMA_PKT_COPY_T2T_DW_14_dst_sw_shift 16
+#define SDMA_PKT_COPY_T2T_DW_14_DST_SW(x) (((x) & SDMA_PKT_COPY_T2T_DW_14_dst_sw_mask) << SDMA_PKT_COPY_T2T_DW_14_dst_sw_shift)
+
+/*define for dst_cache_policy field*/
+#define SDMA_PKT_COPY_T2T_DW_14_dst_cache_policy_offset 14
+#define SDMA_PKT_COPY_T2T_DW_14_dst_cache_policy_mask 0x00000007
+#define SDMA_PKT_COPY_T2T_DW_14_dst_cache_policy_shift 18
+#define SDMA_PKT_COPY_T2T_DW_14_DST_CACHE_POLICY(x) (((x) & SDMA_PKT_COPY_T2T_DW_14_dst_cache_policy_mask) << SDMA_PKT_COPY_T2T_DW_14_dst_cache_policy_shift)
+
+/*define for src_sw field*/
+#define SDMA_PKT_COPY_T2T_DW_14_src_sw_offset 14
+#define SDMA_PKT_COPY_T2T_DW_14_src_sw_mask 0x00000003
+#define SDMA_PKT_COPY_T2T_DW_14_src_sw_shift 24
+#define SDMA_PKT_COPY_T2T_DW_14_SRC_SW(x) (((x) & SDMA_PKT_COPY_T2T_DW_14_src_sw_mask) << SDMA_PKT_COPY_T2T_DW_14_src_sw_shift)
+
+/*define for src_cache_policy field*/
+#define SDMA_PKT_COPY_T2T_DW_14_src_cache_policy_offset 14
+#define SDMA_PKT_COPY_T2T_DW_14_src_cache_policy_mask 0x00000007
+#define SDMA_PKT_COPY_T2T_DW_14_src_cache_policy_shift 26
+#define SDMA_PKT_COPY_T2T_DW_14_SRC_CACHE_POLICY(x) (((x) & SDMA_PKT_COPY_T2T_DW_14_src_cache_policy_mask) << SDMA_PKT_COPY_T2T_DW_14_src_cache_policy_shift)
+
+/*define for META_ADDR_LO word*/
+/*define for meta_addr_31_0 field*/
+#define SDMA_PKT_COPY_T2T_META_ADDR_LO_meta_addr_31_0_offset 15
+#define SDMA_PKT_COPY_T2T_META_ADDR_LO_meta_addr_31_0_mask 0xFFFFFFFF
+#define SDMA_PKT_COPY_T2T_META_ADDR_LO_meta_addr_31_0_shift 0
+#define SDMA_PKT_COPY_T2T_META_ADDR_LO_META_ADDR_31_0(x) (((x) & SDMA_PKT_COPY_T2T_META_ADDR_LO_meta_addr_31_0_mask) << SDMA_PKT_COPY_T2T_META_ADDR_LO_meta_addr_31_0_shift)
+
+/*define for META_ADDR_HI word*/
+/*define for meta_addr_63_32 field*/
+#define SDMA_PKT_COPY_T2T_META_ADDR_HI_meta_addr_63_32_offset 16
+#define SDMA_PKT_COPY_T2T_META_ADDR_HI_meta_addr_63_32_mask 0xFFFFFFFF
+#define SDMA_PKT_COPY_T2T_META_ADDR_HI_meta_addr_63_32_shift 0
+#define SDMA_PKT_COPY_T2T_META_ADDR_HI_META_ADDR_63_32(x) (((x) & SDMA_PKT_COPY_T2T_META_ADDR_HI_meta_addr_63_32_mask) << SDMA_PKT_COPY_T2T_META_ADDR_HI_meta_addr_63_32_shift)
+
+/*define for META_CONFIG word*/
+/*define for data_format field*/
+#define SDMA_PKT_COPY_T2T_META_CONFIG_data_format_offset 17
+#define SDMA_PKT_COPY_T2T_META_CONFIG_data_format_mask 0x0000007F
+#define SDMA_PKT_COPY_T2T_META_CONFIG_data_format_shift 0
+#define SDMA_PKT_COPY_T2T_META_CONFIG_DATA_FORMAT(x) (((x) & SDMA_PKT_COPY_T2T_META_CONFIG_data_format_mask) << SDMA_PKT_COPY_T2T_META_CONFIG_data_format_shift)
+
+/*define for color_transform_disable field*/
+#define SDMA_PKT_COPY_T2T_META_CONFIG_color_transform_disable_offset 17
+#define SDMA_PKT_COPY_T2T_META_CONFIG_color_transform_disable_mask 0x00000001
+#define SDMA_PKT_COPY_T2T_META_CONFIG_color_transform_disable_shift 7
+#define SDMA_PKT_COPY_T2T_META_CONFIG_COLOR_TRANSFORM_DISABLE(x) (((x) & SDMA_PKT_COPY_T2T_META_CONFIG_color_transform_disable_mask) << SDMA_PKT_COPY_T2T_META_CONFIG_color_transform_disable_shift)
+
+/*define for alpha_is_on_msb field*/
+#define SDMA_PKT_COPY_T2T_META_CONFIG_alpha_is_on_msb_offset 17
+#define SDMA_PKT_COPY_T2T_META_CONFIG_alpha_is_on_msb_mask 0x00000001
+#define SDMA_PKT_COPY_T2T_META_CONFIG_alpha_is_on_msb_shift 8
+#define SDMA_PKT_COPY_T2T_META_CONFIG_ALPHA_IS_ON_MSB(x) (((x) & SDMA_PKT_COPY_T2T_META_CONFIG_alpha_is_on_msb_mask) << SDMA_PKT_COPY_T2T_META_CONFIG_alpha_is_on_msb_shift)
+
+/*define for number_type field*/
+#define SDMA_PKT_COPY_T2T_META_CONFIG_number_type_offset 17
+#define SDMA_PKT_COPY_T2T_META_CONFIG_number_type_mask 0x00000007
+#define SDMA_PKT_COPY_T2T_META_CONFIG_number_type_shift 9
+#define SDMA_PKT_COPY_T2T_META_CONFIG_NUMBER_TYPE(x) (((x) & SDMA_PKT_COPY_T2T_META_CONFIG_number_type_mask) << SDMA_PKT_COPY_T2T_META_CONFIG_number_type_shift)
+
+/*define for surface_type field*/
+#define SDMA_PKT_COPY_T2T_META_CONFIG_surface_type_offset 17
+#define SDMA_PKT_COPY_T2T_META_CONFIG_surface_type_mask 0x00000003
+#define SDMA_PKT_COPY_T2T_META_CONFIG_surface_type_shift 12
+#define SDMA_PKT_COPY_T2T_META_CONFIG_SURFACE_TYPE(x) (((x) & SDMA_PKT_COPY_T2T_META_CONFIG_surface_type_mask) << SDMA_PKT_COPY_T2T_META_CONFIG_surface_type_shift)
+
+/*define for meta_llc field*/
+#define SDMA_PKT_COPY_T2T_META_CONFIG_meta_llc_offset 17
+#define SDMA_PKT_COPY_T2T_META_CONFIG_meta_llc_mask 0x00000001
+#define SDMA_PKT_COPY_T2T_META_CONFIG_meta_llc_shift 14
+#define SDMA_PKT_COPY_T2T_META_CONFIG_META_LLC(x) (((x) & SDMA_PKT_COPY_T2T_META_CONFIG_meta_llc_mask) << SDMA_PKT_COPY_T2T_META_CONFIG_meta_llc_shift)
+
+/*define for max_comp_block_size field*/
+#define SDMA_PKT_COPY_T2T_META_CONFIG_max_comp_block_size_offset 17
+#define SDMA_PKT_COPY_T2T_META_CONFIG_max_comp_block_size_mask 0x00000003
+#define SDMA_PKT_COPY_T2T_META_CONFIG_max_comp_block_size_shift 24
+#define SDMA_PKT_COPY_T2T_META_CONFIG_MAX_COMP_BLOCK_SIZE(x) (((x) & SDMA_PKT_COPY_T2T_META_CONFIG_max_comp_block_size_mask) << SDMA_PKT_COPY_T2T_META_CONFIG_max_comp_block_size_shift)
+
+/*define for max_uncomp_block_size field*/
+#define SDMA_PKT_COPY_T2T_META_CONFIG_max_uncomp_block_size_offset 17
+#define SDMA_PKT_COPY_T2T_META_CONFIG_max_uncomp_block_size_mask 0x00000003
+#define SDMA_PKT_COPY_T2T_META_CONFIG_max_uncomp_block_size_shift 26
+#define SDMA_PKT_COPY_T2T_META_CONFIG_MAX_UNCOMP_BLOCK_SIZE(x) (((x) & SDMA_PKT_COPY_T2T_META_CONFIG_max_uncomp_block_size_mask) << SDMA_PKT_COPY_T2T_META_CONFIG_max_uncomp_block_size_shift)
+
+/*define for write_compress_enable field*/
+#define SDMA_PKT_COPY_T2T_META_CONFIG_write_compress_enable_offset 17
+#define SDMA_PKT_COPY_T2T_META_CONFIG_write_compress_enable_mask 0x00000001
+#define SDMA_PKT_COPY_T2T_META_CONFIG_write_compress_enable_shift 28
+#define SDMA_PKT_COPY_T2T_META_CONFIG_WRITE_COMPRESS_ENABLE(x) (((x) & SDMA_PKT_COPY_T2T_META_CONFIG_write_compress_enable_mask) << SDMA_PKT_COPY_T2T_META_CONFIG_write_compress_enable_shift)
+
+/*define for meta_tmz field*/
+#define SDMA_PKT_COPY_T2T_META_CONFIG_meta_tmz_offset 17
+#define SDMA_PKT_COPY_T2T_META_CONFIG_meta_tmz_mask 0x00000001
+#define SDMA_PKT_COPY_T2T_META_CONFIG_meta_tmz_shift 29
+#define SDMA_PKT_COPY_T2T_META_CONFIG_META_TMZ(x) (((x) & SDMA_PKT_COPY_T2T_META_CONFIG_meta_tmz_mask) << SDMA_PKT_COPY_T2T_META_CONFIG_meta_tmz_shift)
+
+/*define for pipe_aligned field*/
+#define SDMA_PKT_COPY_T2T_META_CONFIG_pipe_aligned_offset 17
+#define SDMA_PKT_COPY_T2T_META_CONFIG_pipe_aligned_mask 0x00000001
+#define SDMA_PKT_COPY_T2T_META_CONFIG_pipe_aligned_shift 31
+#define SDMA_PKT_COPY_T2T_META_CONFIG_PIPE_ALIGNED(x) (((x) & SDMA_PKT_COPY_T2T_META_CONFIG_pipe_aligned_mask) << SDMA_PKT_COPY_T2T_META_CONFIG_pipe_aligned_shift)
+
+
+/*
+** Definitions for SDMA_PKT_COPY_T2T_BC packet
+*/
+
+/*define for HEADER word*/
+/*define for op field*/
+#define SDMA_PKT_COPY_T2T_BC_HEADER_op_offset 0
+#define SDMA_PKT_COPY_T2T_BC_HEADER_op_mask 0x000000FF
+#define SDMA_PKT_COPY_T2T_BC_HEADER_op_shift 0
+#define SDMA_PKT_COPY_T2T_BC_HEADER_OP(x) (((x) & SDMA_PKT_COPY_T2T_BC_HEADER_op_mask) << SDMA_PKT_COPY_T2T_BC_HEADER_op_shift)
+
+/*define for sub_op field*/
+#define SDMA_PKT_COPY_T2T_BC_HEADER_sub_op_offset 0
+#define SDMA_PKT_COPY_T2T_BC_HEADER_sub_op_mask 0x000000FF
+#define SDMA_PKT_COPY_T2T_BC_HEADER_sub_op_shift 8
+#define SDMA_PKT_COPY_T2T_BC_HEADER_SUB_OP(x) (((x) & SDMA_PKT_COPY_T2T_BC_HEADER_sub_op_mask) << SDMA_PKT_COPY_T2T_BC_HEADER_sub_op_shift)
+
+/*define for SRC_ADDR_LO word*/
+/*define for src_addr_31_0 field*/
+#define SDMA_PKT_COPY_T2T_BC_SRC_ADDR_LO_src_addr_31_0_offset 1
+#define SDMA_PKT_COPY_T2T_BC_SRC_ADDR_LO_src_addr_31_0_mask 0xFFFFFFFF
+#define SDMA_PKT_COPY_T2T_BC_SRC_ADDR_LO_src_addr_31_0_shift 0
+#define SDMA_PKT_COPY_T2T_BC_SRC_ADDR_LO_SRC_ADDR_31_0(x) (((x) & SDMA_PKT_COPY_T2T_BC_SRC_ADDR_LO_src_addr_31_0_mask) << SDMA_PKT_COPY_T2T_BC_SRC_ADDR_LO_src_addr_31_0_shift)
+
+/*define for SRC_ADDR_HI word*/
+/*define for src_addr_63_32 field*/
+#define SDMA_PKT_COPY_T2T_BC_SRC_ADDR_HI_src_addr_63_32_offset 2
+#define SDMA_PKT_COPY_T2T_BC_SRC_ADDR_HI_src_addr_63_32_mask 0xFFFFFFFF
+#define SDMA_PKT_COPY_T2T_BC_SRC_ADDR_HI_src_addr_63_32_shift 0
+#define SDMA_PKT_COPY_T2T_BC_SRC_ADDR_HI_SRC_ADDR_63_32(x) (((x) & SDMA_PKT_COPY_T2T_BC_SRC_ADDR_HI_src_addr_63_32_mask) << SDMA_PKT_COPY_T2T_BC_SRC_ADDR_HI_src_addr_63_32_shift)
+
+/*define for DW_3 word*/
+/*define for src_x field*/
+#define SDMA_PKT_COPY_T2T_BC_DW_3_src_x_offset 3
+#define SDMA_PKT_COPY_T2T_BC_DW_3_src_x_mask 0x00003FFF
+#define SDMA_PKT_COPY_T2T_BC_DW_3_src_x_shift 0
+#define SDMA_PKT_COPY_T2T_BC_DW_3_SRC_X(x) (((x) & SDMA_PKT_COPY_T2T_BC_DW_3_src_x_mask) << SDMA_PKT_COPY_T2T_BC_DW_3_src_x_shift)
+
+/*define for src_y field*/
+#define SDMA_PKT_COPY_T2T_BC_DW_3_src_y_offset 3
+#define SDMA_PKT_COPY_T2T_BC_DW_3_src_y_mask 0x00003FFF
+#define SDMA_PKT_COPY_T2T_BC_DW_3_src_y_shift 16
+#define SDMA_PKT_COPY_T2T_BC_DW_3_SRC_Y(x) (((x) & SDMA_PKT_COPY_T2T_BC_DW_3_src_y_mask) << SDMA_PKT_COPY_T2T_BC_DW_3_src_y_shift)
+
+/*define for DW_4 word*/
+/*define for src_z field*/
+#define SDMA_PKT_COPY_T2T_BC_DW_4_src_z_offset 4
+#define SDMA_PKT_COPY_T2T_BC_DW_4_src_z_mask 0x000007FF
+#define SDMA_PKT_COPY_T2T_BC_DW_4_src_z_shift 0
+#define SDMA_PKT_COPY_T2T_BC_DW_4_SRC_Z(x) (((x) & SDMA_PKT_COPY_T2T_BC_DW_4_src_z_mask) << SDMA_PKT_COPY_T2T_BC_DW_4_src_z_shift)
+
+/*define for src_width field*/
+#define SDMA_PKT_COPY_T2T_BC_DW_4_src_width_offset 4
+#define SDMA_PKT_COPY_T2T_BC_DW_4_src_width_mask 0x00003FFF
+#define SDMA_PKT_COPY_T2T_BC_DW_4_src_width_shift 16
+#define SDMA_PKT_COPY_T2T_BC_DW_4_SRC_WIDTH(x) (((x) & SDMA_PKT_COPY_T2T_BC_DW_4_src_width_mask) << SDMA_PKT_COPY_T2T_BC_DW_4_src_width_shift)
+
+/*define for DW_5 word*/
+/*define for src_height field*/
+#define SDMA_PKT_COPY_T2T_BC_DW_5_src_height_offset 5
+#define SDMA_PKT_COPY_T2T_BC_DW_5_src_height_mask 0x00003FFF
+#define SDMA_PKT_COPY_T2T_BC_DW_5_src_height_shift 0
+#define SDMA_PKT_COPY_T2T_BC_DW_5_SRC_HEIGHT(x) (((x) & SDMA_PKT_COPY_T2T_BC_DW_5_src_height_mask) << SDMA_PKT_COPY_T2T_BC_DW_5_src_height_shift)
+
+/*define for src_depth field*/
+#define SDMA_PKT_COPY_T2T_BC_DW_5_src_depth_offset 5
+#define SDMA_PKT_COPY_T2T_BC_DW_5_src_depth_mask 0x000007FF
+#define SDMA_PKT_COPY_T2T_BC_DW_5_src_depth_shift 16
+#define SDMA_PKT_COPY_T2T_BC_DW_5_SRC_DEPTH(x) (((x) & SDMA_PKT_COPY_T2T_BC_DW_5_src_depth_mask) << SDMA_PKT_COPY_T2T_BC_DW_5_src_depth_shift)
+
+/*define for DW_6 word*/
+/*define for src_element_size field*/
+#define SDMA_PKT_COPY_T2T_BC_DW_6_src_element_size_offset 6
+#define SDMA_PKT_COPY_T2T_BC_DW_6_src_element_size_mask 0x00000007
+#define SDMA_PKT_COPY_T2T_BC_DW_6_src_element_size_shift 0
+#define SDMA_PKT_COPY_T2T_BC_DW_6_SRC_ELEMENT_SIZE(x) (((x) & SDMA_PKT_COPY_T2T_BC_DW_6_src_element_size_mask) << SDMA_PKT_COPY_T2T_BC_DW_6_src_element_size_shift)
+
+/*define for src_array_mode field*/
+#define SDMA_PKT_COPY_T2T_BC_DW_6_src_array_mode_offset 6
+#define SDMA_PKT_COPY_T2T_BC_DW_6_src_array_mode_mask 0x0000000F
+#define SDMA_PKT_COPY_T2T_BC_DW_6_src_array_mode_shift 3
+#define SDMA_PKT_COPY_T2T_BC_DW_6_SRC_ARRAY_MODE(x) (((x) & SDMA_PKT_COPY_T2T_BC_DW_6_src_array_mode_mask) << SDMA_PKT_COPY_T2T_BC_DW_6_src_array_mode_shift)
+
+/*define for src_mit_mode field*/
+#define SDMA_PKT_COPY_T2T_BC_DW_6_src_mit_mode_offset 6
+#define SDMA_PKT_COPY_T2T_BC_DW_6_src_mit_mode_mask 0x00000007
+#define SDMA_PKT_COPY_T2T_BC_DW_6_src_mit_mode_shift 8
+#define SDMA_PKT_COPY_T2T_BC_DW_6_SRC_MIT_MODE(x) (((x) & SDMA_PKT_COPY_T2T_BC_DW_6_src_mit_mode_mask) << SDMA_PKT_COPY_T2T_BC_DW_6_src_mit_mode_shift)
+
+/*define for src_tilesplit_size field*/
+#define SDMA_PKT_COPY_T2T_BC_DW_6_src_tilesplit_size_offset 6
+#define SDMA_PKT_COPY_T2T_BC_DW_6_src_tilesplit_size_mask 0x00000007
+#define SDMA_PKT_COPY_T2T_BC_DW_6_src_tilesplit_size_shift 11
+#define SDMA_PKT_COPY_T2T_BC_DW_6_SRC_TILESPLIT_SIZE(x) (((x) & SDMA_PKT_COPY_T2T_BC_DW_6_src_tilesplit_size_mask) << SDMA_PKT_COPY_T2T_BC_DW_6_src_tilesplit_size_shift)
+
+/*define for src_bank_w field*/
+#define SDMA_PKT_COPY_T2T_BC_DW_6_src_bank_w_offset 6
+#define SDMA_PKT_COPY_T2T_BC_DW_6_src_bank_w_mask 0x00000003
+#define SDMA_PKT_COPY_T2T_BC_DW_6_src_bank_w_shift 15
+#define SDMA_PKT_COPY_T2T_BC_DW_6_SRC_BANK_W(x) (((x) & SDMA_PKT_COPY_T2T_BC_DW_6_src_bank_w_mask) << SDMA_PKT_COPY_T2T_BC_DW_6_src_bank_w_shift)
+
+/*define for src_bank_h field*/
+#define SDMA_PKT_COPY_T2T_BC_DW_6_src_bank_h_offset 6
+#define SDMA_PKT_COPY_T2T_BC_DW_6_src_bank_h_mask 0x00000003
+#define SDMA_PKT_COPY_T2T_BC_DW_6_src_bank_h_shift 18
+#define SDMA_PKT_COPY_T2T_BC_DW_6_SRC_BANK_H(x) (((x) & SDMA_PKT_COPY_T2T_BC_DW_6_src_bank_h_mask) << SDMA_PKT_COPY_T2T_BC_DW_6_src_bank_h_shift)
+
+/*define for src_num_bank field*/
+#define SDMA_PKT_COPY_T2T_BC_DW_6_src_num_bank_offset 6
+#define SDMA_PKT_COPY_T2T_BC_DW_6_src_num_bank_mask 0x00000003
+#define SDMA_PKT_COPY_T2T_BC_DW_6_src_num_bank_shift 21
+#define SDMA_PKT_COPY_T2T_BC_DW_6_SRC_NUM_BANK(x) (((x) & SDMA_PKT_COPY_T2T_BC_DW_6_src_num_bank_mask) << SDMA_PKT_COPY_T2T_BC_DW_6_src_num_bank_shift)
+
+/*define for src_mat_aspt field*/
+#define SDMA_PKT_COPY_T2T_BC_DW_6_src_mat_aspt_offset 6
+#define SDMA_PKT_COPY_T2T_BC_DW_6_src_mat_aspt_mask 0x00000003
+#define SDMA_PKT_COPY_T2T_BC_DW_6_src_mat_aspt_shift 24
+#define SDMA_PKT_COPY_T2T_BC_DW_6_SRC_MAT_ASPT(x) (((x) & SDMA_PKT_COPY_T2T_BC_DW_6_src_mat_aspt_mask) << SDMA_PKT_COPY_T2T_BC_DW_6_src_mat_aspt_shift)
+
+/*define for src_pipe_config field*/
+#define SDMA_PKT_COPY_T2T_BC_DW_6_src_pipe_config_offset 6
+#define SDMA_PKT_COPY_T2T_BC_DW_6_src_pipe_config_mask 0x0000001F
+#define SDMA_PKT_COPY_T2T_BC_DW_6_src_pipe_config_shift 26
+#define SDMA_PKT_COPY_T2T_BC_DW_6_SRC_PIPE_CONFIG(x) (((x) & SDMA_PKT_COPY_T2T_BC_DW_6_src_pipe_config_mask) << SDMA_PKT_COPY_T2T_BC_DW_6_src_pipe_config_shift)
+
+/*define for DST_ADDR_LO word*/
+/*define for dst_addr_31_0 field*/
+#define SDMA_PKT_COPY_T2T_BC_DST_ADDR_LO_dst_addr_31_0_offset 7
+#define SDMA_PKT_COPY_T2T_BC_DST_ADDR_LO_dst_addr_31_0_mask 0xFFFFFFFF
+#define SDMA_PKT_COPY_T2T_BC_DST_ADDR_LO_dst_addr_31_0_shift 0
+#define SDMA_PKT_COPY_T2T_BC_DST_ADDR_LO_DST_ADDR_31_0(x) (((x) & SDMA_PKT_COPY_T2T_BC_DST_ADDR_LO_dst_addr_31_0_mask) << SDMA_PKT_COPY_T2T_BC_DST_ADDR_LO_dst_addr_31_0_shift)
+
+/*define for DST_ADDR_HI word*/
+/*define for dst_addr_63_32 field*/
+#define SDMA_PKT_COPY_T2T_BC_DST_ADDR_HI_dst_addr_63_32_offset 8
+#define SDMA_PKT_COPY_T2T_BC_DST_ADDR_HI_dst_addr_63_32_mask 0xFFFFFFFF
+#define SDMA_PKT_COPY_T2T_BC_DST_ADDR_HI_dst_addr_63_32_shift 0
+#define SDMA_PKT_COPY_T2T_BC_DST_ADDR_HI_DST_ADDR_63_32(x) (((x) & SDMA_PKT_COPY_T2T_BC_DST_ADDR_HI_dst_addr_63_32_mask) << SDMA_PKT_COPY_T2T_BC_DST_ADDR_HI_dst_addr_63_32_shift)
+
+/*define for DW_9 word*/
+/*define for dst_x field*/
+#define SDMA_PKT_COPY_T2T_BC_DW_9_dst_x_offset 9
+#define SDMA_PKT_COPY_T2T_BC_DW_9_dst_x_mask 0x00003FFF
+#define SDMA_PKT_COPY_T2T_BC_DW_9_dst_x_shift 0
+#define SDMA_PKT_COPY_T2T_BC_DW_9_DST_X(x) (((x) & SDMA_PKT_COPY_T2T_BC_DW_9_dst_x_mask) << SDMA_PKT_COPY_T2T_BC_DW_9_dst_x_shift)
+
+/*define for dst_y field*/
+#define SDMA_PKT_COPY_T2T_BC_DW_9_dst_y_offset 9
+#define SDMA_PKT_COPY_T2T_BC_DW_9_dst_y_mask 0x00003FFF
+#define SDMA_PKT_COPY_T2T_BC_DW_9_dst_y_shift 16
+#define SDMA_PKT_COPY_T2T_BC_DW_9_DST_Y(x) (((x) & SDMA_PKT_COPY_T2T_BC_DW_9_dst_y_mask) << SDMA_PKT_COPY_T2T_BC_DW_9_dst_y_shift)
+
+/*define for DW_10 word*/
+/*define for dst_z field*/
+#define SDMA_PKT_COPY_T2T_BC_DW_10_dst_z_offset 10
+#define SDMA_PKT_COPY_T2T_BC_DW_10_dst_z_mask 0x000007FF
+#define SDMA_PKT_COPY_T2T_BC_DW_10_dst_z_shift 0
+#define SDMA_PKT_COPY_T2T_BC_DW_10_DST_Z(x) (((x) & SDMA_PKT_COPY_T2T_BC_DW_10_dst_z_mask) << SDMA_PKT_COPY_T2T_BC_DW_10_dst_z_shift)
+
+/*define for dst_width field*/
+#define SDMA_PKT_COPY_T2T_BC_DW_10_dst_width_offset 10
+#define SDMA_PKT_COPY_T2T_BC_DW_10_dst_width_mask 0x00003FFF
+#define SDMA_PKT_COPY_T2T_BC_DW_10_dst_width_shift 16
+#define SDMA_PKT_COPY_T2T_BC_DW_10_DST_WIDTH(x) (((x) & SDMA_PKT_COPY_T2T_BC_DW_10_dst_width_mask) << SDMA_PKT_COPY_T2T_BC_DW_10_dst_width_shift)
+
+/*define for DW_11 word*/
+/*define for dst_height field*/
+#define SDMA_PKT_COPY_T2T_BC_DW_11_dst_height_offset 11
+#define SDMA_PKT_COPY_T2T_BC_DW_11_dst_height_mask 0x00003FFF
+#define SDMA_PKT_COPY_T2T_BC_DW_11_dst_height_shift 0
+#define SDMA_PKT_COPY_T2T_BC_DW_11_DST_HEIGHT(x) (((x) & SDMA_PKT_COPY_T2T_BC_DW_11_dst_height_mask) << SDMA_PKT_COPY_T2T_BC_DW_11_dst_height_shift)
+
+/*define for dst_depth field*/
+#define SDMA_PKT_COPY_T2T_BC_DW_11_dst_depth_offset 11
+#define SDMA_PKT_COPY_T2T_BC_DW_11_dst_depth_mask 0x00000FFF
+#define SDMA_PKT_COPY_T2T_BC_DW_11_dst_depth_shift 16
+#define SDMA_PKT_COPY_T2T_BC_DW_11_DST_DEPTH(x) (((x) & SDMA_PKT_COPY_T2T_BC_DW_11_dst_depth_mask) << SDMA_PKT_COPY_T2T_BC_DW_11_dst_depth_shift)
+
+/*define for DW_12 word*/
+/*define for dst_element_size field*/
+#define SDMA_PKT_COPY_T2T_BC_DW_12_dst_element_size_offset 12
+#define SDMA_PKT_COPY_T2T_BC_DW_12_dst_element_size_mask 0x00000007
+#define SDMA_PKT_COPY_T2T_BC_DW_12_dst_element_size_shift 0
+#define SDMA_PKT_COPY_T2T_BC_DW_12_DST_ELEMENT_SIZE(x) (((x) & SDMA_PKT_COPY_T2T_BC_DW_12_dst_element_size_mask) << SDMA_PKT_COPY_T2T_BC_DW_12_dst_element_size_shift)
+
+/*define for dst_array_mode field*/
+#define SDMA_PKT_COPY_T2T_BC_DW_12_dst_array_mode_offset 12
+#define SDMA_PKT_COPY_T2T_BC_DW_12_dst_array_mode_mask 0x0000000F
+#define SDMA_PKT_COPY_T2T_BC_DW_12_dst_array_mode_shift 3
+#define SDMA_PKT_COPY_T2T_BC_DW_12_DST_ARRAY_MODE(x) (((x) & SDMA_PKT_COPY_T2T_BC_DW_12_dst_array_mode_mask) << SDMA_PKT_COPY_T2T_BC_DW_12_dst_array_mode_shift)
+
+/*define for dst_mit_mode field*/
+#define SDMA_PKT_COPY_T2T_BC_DW_12_dst_mit_mode_offset 12
+#define SDMA_PKT_COPY_T2T_BC_DW_12_dst_mit_mode_mask 0x00000007
+#define SDMA_PKT_COPY_T2T_BC_DW_12_dst_mit_mode_shift 8
+#define SDMA_PKT_COPY_T2T_BC_DW_12_DST_MIT_MODE(x) (((x) & SDMA_PKT_COPY_T2T_BC_DW_12_dst_mit_mode_mask) << SDMA_PKT_COPY_T2T_BC_DW_12_dst_mit_mode_shift)
+
+/*define for dst_tilesplit_size field*/
+#define SDMA_PKT_COPY_T2T_BC_DW_12_dst_tilesplit_size_offset 12
+#define SDMA_PKT_COPY_T2T_BC_DW_12_dst_tilesplit_size_mask 0x00000007
+#define SDMA_PKT_COPY_T2T_BC_DW_12_dst_tilesplit_size_shift 11
+#define SDMA_PKT_COPY_T2T_BC_DW_12_DST_TILESPLIT_SIZE(x) (((x) & SDMA_PKT_COPY_T2T_BC_DW_12_dst_tilesplit_size_mask) << SDMA_PKT_COPY_T2T_BC_DW_12_dst_tilesplit_size_shift)
+
+/*define for dst_bank_w field*/
+#define SDMA_PKT_COPY_T2T_BC_DW_12_dst_bank_w_offset 12
+#define SDMA_PKT_COPY_T2T_BC_DW_12_dst_bank_w_mask 0x00000003
+#define SDMA_PKT_COPY_T2T_BC_DW_12_dst_bank_w_shift 15
+#define SDMA_PKT_COPY_T2T_BC_DW_12_DST_BANK_W(x) (((x) & SDMA_PKT_COPY_T2T_BC_DW_12_dst_bank_w_mask) << SDMA_PKT_COPY_T2T_BC_DW_12_dst_bank_w_shift)
+
+/*define for dst_bank_h field*/
+#define SDMA_PKT_COPY_T2T_BC_DW_12_dst_bank_h_offset 12
+#define SDMA_PKT_COPY_T2T_BC_DW_12_dst_bank_h_mask 0x00000003
+#define SDMA_PKT_COPY_T2T_BC_DW_12_dst_bank_h_shift 18
+#define SDMA_PKT_COPY_T2T_BC_DW_12_DST_BANK_H(x) (((x) & SDMA_PKT_COPY_T2T_BC_DW_12_dst_bank_h_mask) << SDMA_PKT_COPY_T2T_BC_DW_12_dst_bank_h_shift)
+
+/*define for dst_num_bank field*/
+#define SDMA_PKT_COPY_T2T_BC_DW_12_dst_num_bank_offset 12
+#define SDMA_PKT_COPY_T2T_BC_DW_12_dst_num_bank_mask 0x00000003
+#define SDMA_PKT_COPY_T2T_BC_DW_12_dst_num_bank_shift 21
+#define SDMA_PKT_COPY_T2T_BC_DW_12_DST_NUM_BANK(x) (((x) & SDMA_PKT_COPY_T2T_BC_DW_12_dst_num_bank_mask) << SDMA_PKT_COPY_T2T_BC_DW_12_dst_num_bank_shift)
+
+/*define for dst_mat_aspt field*/
+#define SDMA_PKT_COPY_T2T_BC_DW_12_dst_mat_aspt_offset 12
+#define SDMA_PKT_COPY_T2T_BC_DW_12_dst_mat_aspt_mask 0x00000003
+#define SDMA_PKT_COPY_T2T_BC_DW_12_dst_mat_aspt_shift 24
+#define SDMA_PKT_COPY_T2T_BC_DW_12_DST_MAT_ASPT(x) (((x) & SDMA_PKT_COPY_T2T_BC_DW_12_dst_mat_aspt_mask) << SDMA_PKT_COPY_T2T_BC_DW_12_dst_mat_aspt_shift)
+
+/*define for dst_pipe_config field*/
+#define SDMA_PKT_COPY_T2T_BC_DW_12_dst_pipe_config_offset 12
+#define SDMA_PKT_COPY_T2T_BC_DW_12_dst_pipe_config_mask 0x0000001F
+#define SDMA_PKT_COPY_T2T_BC_DW_12_dst_pipe_config_shift 26
+#define SDMA_PKT_COPY_T2T_BC_DW_12_DST_PIPE_CONFIG(x) (((x) & SDMA_PKT_COPY_T2T_BC_DW_12_dst_pipe_config_mask) << SDMA_PKT_COPY_T2T_BC_DW_12_dst_pipe_config_shift)
+
+/*define for DW_13 word*/
+/*define for rect_x field*/
+#define SDMA_PKT_COPY_T2T_BC_DW_13_rect_x_offset 13
+#define SDMA_PKT_COPY_T2T_BC_DW_13_rect_x_mask 0x00003FFF
+#define SDMA_PKT_COPY_T2T_BC_DW_13_rect_x_shift 0
+#define SDMA_PKT_COPY_T2T_BC_DW_13_RECT_X(x) (((x) & SDMA_PKT_COPY_T2T_BC_DW_13_rect_x_mask) << SDMA_PKT_COPY_T2T_BC_DW_13_rect_x_shift)
+
+/*define for rect_y field*/
+#define SDMA_PKT_COPY_T2T_BC_DW_13_rect_y_offset 13
+#define SDMA_PKT_COPY_T2T_BC_DW_13_rect_y_mask 0x00003FFF
+#define SDMA_PKT_COPY_T2T_BC_DW_13_rect_y_shift 16
+#define SDMA_PKT_COPY_T2T_BC_DW_13_RECT_Y(x) (((x) & SDMA_PKT_COPY_T2T_BC_DW_13_rect_y_mask) << SDMA_PKT_COPY_T2T_BC_DW_13_rect_y_shift)
+
+/*define for DW_14 word*/
+/*define for rect_z field*/
+#define SDMA_PKT_COPY_T2T_BC_DW_14_rect_z_offset 14
+#define SDMA_PKT_COPY_T2T_BC_DW_14_rect_z_mask 0x000007FF
+#define SDMA_PKT_COPY_T2T_BC_DW_14_rect_z_shift 0
+#define SDMA_PKT_COPY_T2T_BC_DW_14_RECT_Z(x) (((x) & SDMA_PKT_COPY_T2T_BC_DW_14_rect_z_mask) << SDMA_PKT_COPY_T2T_BC_DW_14_rect_z_shift)
+
+/*define for dst_sw field*/
+#define SDMA_PKT_COPY_T2T_BC_DW_14_dst_sw_offset 14
+#define SDMA_PKT_COPY_T2T_BC_DW_14_dst_sw_mask 0x00000003
+#define SDMA_PKT_COPY_T2T_BC_DW_14_dst_sw_shift 16
+#define SDMA_PKT_COPY_T2T_BC_DW_14_DST_SW(x) (((x) & SDMA_PKT_COPY_T2T_BC_DW_14_dst_sw_mask) << SDMA_PKT_COPY_T2T_BC_DW_14_dst_sw_shift)
+
+/*define for src_sw field*/
+#define SDMA_PKT_COPY_T2T_BC_DW_14_src_sw_offset 14
+#define SDMA_PKT_COPY_T2T_BC_DW_14_src_sw_mask 0x00000003
+#define SDMA_PKT_COPY_T2T_BC_DW_14_src_sw_shift 24
+#define SDMA_PKT_COPY_T2T_BC_DW_14_SRC_SW(x) (((x) & SDMA_PKT_COPY_T2T_BC_DW_14_src_sw_mask) << SDMA_PKT_COPY_T2T_BC_DW_14_src_sw_shift)
+
+
+/*
+** Definitions for SDMA_PKT_COPY_TILED_SUBWIN packet
+*/
+
+/*define for HEADER word*/
+/*define for op field*/
+#define SDMA_PKT_COPY_TILED_SUBWIN_HEADER_op_offset 0
+#define SDMA_PKT_COPY_TILED_SUBWIN_HEADER_op_mask 0x000000FF
+#define SDMA_PKT_COPY_TILED_SUBWIN_HEADER_op_shift 0
+#define SDMA_PKT_COPY_TILED_SUBWIN_HEADER_OP(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_HEADER_op_mask) << SDMA_PKT_COPY_TILED_SUBWIN_HEADER_op_shift)
+
+/*define for sub_op field*/
+#define SDMA_PKT_COPY_TILED_SUBWIN_HEADER_sub_op_offset 0
+#define SDMA_PKT_COPY_TILED_SUBWIN_HEADER_sub_op_mask 0x000000FF
+#define SDMA_PKT_COPY_TILED_SUBWIN_HEADER_sub_op_shift 8
+#define SDMA_PKT_COPY_TILED_SUBWIN_HEADER_SUB_OP(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_HEADER_sub_op_mask) << SDMA_PKT_COPY_TILED_SUBWIN_HEADER_sub_op_shift)
+
+/*define for tmz field*/
+#define SDMA_PKT_COPY_TILED_SUBWIN_HEADER_tmz_offset 0
+#define SDMA_PKT_COPY_TILED_SUBWIN_HEADER_tmz_mask 0x00000001
+#define SDMA_PKT_COPY_TILED_SUBWIN_HEADER_tmz_shift 18
+#define SDMA_PKT_COPY_TILED_SUBWIN_HEADER_TMZ(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_HEADER_tmz_mask) << SDMA_PKT_COPY_TILED_SUBWIN_HEADER_tmz_shift)
+
+/*define for dcc field*/
+#define SDMA_PKT_COPY_TILED_SUBWIN_HEADER_dcc_offset 0
+#define SDMA_PKT_COPY_TILED_SUBWIN_HEADER_dcc_mask 0x00000001
+#define SDMA_PKT_COPY_TILED_SUBWIN_HEADER_dcc_shift 19
+#define SDMA_PKT_COPY_TILED_SUBWIN_HEADER_DCC(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_HEADER_dcc_mask) << SDMA_PKT_COPY_TILED_SUBWIN_HEADER_dcc_shift)
+
+/*define for cpv field*/
+#define SDMA_PKT_COPY_TILED_SUBWIN_HEADER_cpv_offset 0
+#define SDMA_PKT_COPY_TILED_SUBWIN_HEADER_cpv_mask 0x00000001
+#define SDMA_PKT_COPY_TILED_SUBWIN_HEADER_cpv_shift 28
+#define SDMA_PKT_COPY_TILED_SUBWIN_HEADER_CPV(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_HEADER_cpv_mask) << SDMA_PKT_COPY_TILED_SUBWIN_HEADER_cpv_shift)
+
+/*define for detile field*/
+#define SDMA_PKT_COPY_TILED_SUBWIN_HEADER_detile_offset 0
+#define SDMA_PKT_COPY_TILED_SUBWIN_HEADER_detile_mask 0x00000001
+#define SDMA_PKT_COPY_TILED_SUBWIN_HEADER_detile_shift 31
+#define SDMA_PKT_COPY_TILED_SUBWIN_HEADER_DETILE(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_HEADER_detile_mask) << SDMA_PKT_COPY_TILED_SUBWIN_HEADER_detile_shift)
+
+/*define for TILED_ADDR_LO word*/
+/*define for tiled_addr_31_0 field*/
+#define SDMA_PKT_COPY_TILED_SUBWIN_TILED_ADDR_LO_tiled_addr_31_0_offset 1
+#define SDMA_PKT_COPY_TILED_SUBWIN_TILED_ADDR_LO_tiled_addr_31_0_mask 0xFFFFFFFF
+#define SDMA_PKT_COPY_TILED_SUBWIN_TILED_ADDR_LO_tiled_addr_31_0_shift 0
+#define SDMA_PKT_COPY_TILED_SUBWIN_TILED_ADDR_LO_TILED_ADDR_31_0(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_TILED_ADDR_LO_tiled_addr_31_0_mask) << SDMA_PKT_COPY_TILED_SUBWIN_TILED_ADDR_LO_tiled_addr_31_0_shift)
+
+/*define for TILED_ADDR_HI word*/
+/*define for tiled_addr_63_32 field*/
+#define SDMA_PKT_COPY_TILED_SUBWIN_TILED_ADDR_HI_tiled_addr_63_32_offset 2
+#define SDMA_PKT_COPY_TILED_SUBWIN_TILED_ADDR_HI_tiled_addr_63_32_mask 0xFFFFFFFF
+#define SDMA_PKT_COPY_TILED_SUBWIN_TILED_ADDR_HI_tiled_addr_63_32_shift 0
+#define SDMA_PKT_COPY_TILED_SUBWIN_TILED_ADDR_HI_TILED_ADDR_63_32(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_TILED_ADDR_HI_tiled_addr_63_32_mask) << SDMA_PKT_COPY_TILED_SUBWIN_TILED_ADDR_HI_tiled_addr_63_32_shift)
+
+/*define for DW_3 word*/
+/*define for tiled_x field*/
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_3_tiled_x_offset 3
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_3_tiled_x_mask 0x00003FFF
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_3_tiled_x_shift 0
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_3_TILED_X(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_DW_3_tiled_x_mask) << SDMA_PKT_COPY_TILED_SUBWIN_DW_3_tiled_x_shift)
+
+/*define for tiled_y field*/
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_3_tiled_y_offset 3
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_3_tiled_y_mask 0x00003FFF
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_3_tiled_y_shift 16
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_3_TILED_Y(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_DW_3_tiled_y_mask) << SDMA_PKT_COPY_TILED_SUBWIN_DW_3_tiled_y_shift)
+
+/*define for DW_4 word*/
+/*define for tiled_z field*/
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_4_tiled_z_offset 4
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_4_tiled_z_mask 0x00001FFF
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_4_tiled_z_shift 0
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_4_TILED_Z(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_DW_4_tiled_z_mask) << SDMA_PKT_COPY_TILED_SUBWIN_DW_4_tiled_z_shift)
+
+/*define for width field*/
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_4_width_offset 4
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_4_width_mask 0x00003FFF
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_4_width_shift 16
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_4_WIDTH(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_DW_4_width_mask) << SDMA_PKT_COPY_TILED_SUBWIN_DW_4_width_shift)
+
+/*define for DW_5 word*/
+/*define for height field*/
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_5_height_offset 5
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_5_height_mask 0x00003FFF
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_5_height_shift 0
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_5_HEIGHT(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_DW_5_height_mask) << SDMA_PKT_COPY_TILED_SUBWIN_DW_5_height_shift)
+
+/*define for depth field*/
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_5_depth_offset 5
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_5_depth_mask 0x00001FFF
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_5_depth_shift 16
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_5_DEPTH(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_DW_5_depth_mask) << SDMA_PKT_COPY_TILED_SUBWIN_DW_5_depth_shift)
+
+/*define for DW_6 word*/
+/*define for element_size field*/
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_6_element_size_offset 6
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_6_element_size_mask 0x00000007
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_6_element_size_shift 0
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_6_ELEMENT_SIZE(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_DW_6_element_size_mask) << SDMA_PKT_COPY_TILED_SUBWIN_DW_6_element_size_shift)
+
+/*define for swizzle_mode field*/
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_6_swizzle_mode_offset 6
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_6_swizzle_mode_mask 0x0000001F
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_6_swizzle_mode_shift 3
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_6_SWIZZLE_MODE(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_DW_6_swizzle_mode_mask) << SDMA_PKT_COPY_TILED_SUBWIN_DW_6_swizzle_mode_shift)
+
+/*define for dimension field*/
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_6_dimension_offset 6
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_6_dimension_mask 0x00000003
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_6_dimension_shift 9
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_6_DIMENSION(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_DW_6_dimension_mask) << SDMA_PKT_COPY_TILED_SUBWIN_DW_6_dimension_shift)
+
+/*define for mip_max field*/
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_6_mip_max_offset 6
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_6_mip_max_mask 0x0000000F
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_6_mip_max_shift 16
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_6_MIP_MAX(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_DW_6_mip_max_mask) << SDMA_PKT_COPY_TILED_SUBWIN_DW_6_mip_max_shift)
+
+/*define for mip_id field*/
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_6_mip_id_offset 6
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_6_mip_id_mask 0x0000000F
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_6_mip_id_shift 20
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_6_MIP_ID(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_DW_6_mip_id_mask) << SDMA_PKT_COPY_TILED_SUBWIN_DW_6_mip_id_shift)
+
+/*define for LINEAR_ADDR_LO word*/
+/*define for linear_addr_31_0 field*/
+#define SDMA_PKT_COPY_TILED_SUBWIN_LINEAR_ADDR_LO_linear_addr_31_0_offset 7
+#define SDMA_PKT_COPY_TILED_SUBWIN_LINEAR_ADDR_LO_linear_addr_31_0_mask 0xFFFFFFFF
+#define SDMA_PKT_COPY_TILED_SUBWIN_LINEAR_ADDR_LO_linear_addr_31_0_shift 0
+#define SDMA_PKT_COPY_TILED_SUBWIN_LINEAR_ADDR_LO_LINEAR_ADDR_31_0(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_LINEAR_ADDR_LO_linear_addr_31_0_mask) << SDMA_PKT_COPY_TILED_SUBWIN_LINEAR_ADDR_LO_linear_addr_31_0_shift)
+
+/*define for LINEAR_ADDR_HI word*/
+/*define for linear_addr_63_32 field*/
+#define SDMA_PKT_COPY_TILED_SUBWIN_LINEAR_ADDR_HI_linear_addr_63_32_offset 8
+#define SDMA_PKT_COPY_TILED_SUBWIN_LINEAR_ADDR_HI_linear_addr_63_32_mask 0xFFFFFFFF
+#define SDMA_PKT_COPY_TILED_SUBWIN_LINEAR_ADDR_HI_linear_addr_63_32_shift 0
+#define SDMA_PKT_COPY_TILED_SUBWIN_LINEAR_ADDR_HI_LINEAR_ADDR_63_32(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_LINEAR_ADDR_HI_linear_addr_63_32_mask) << SDMA_PKT_COPY_TILED_SUBWIN_LINEAR_ADDR_HI_linear_addr_63_32_shift)
+
+/*define for DW_9 word*/
+/*define for linear_x field*/
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_9_linear_x_offset 9
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_9_linear_x_mask 0x00003FFF
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_9_linear_x_shift 0
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_9_LINEAR_X(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_DW_9_linear_x_mask) << SDMA_PKT_COPY_TILED_SUBWIN_DW_9_linear_x_shift)
+
+/*define for linear_y field*/
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_9_linear_y_offset 9
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_9_linear_y_mask 0x00003FFF
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_9_linear_y_shift 16
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_9_LINEAR_Y(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_DW_9_linear_y_mask) << SDMA_PKT_COPY_TILED_SUBWIN_DW_9_linear_y_shift)
+
+/*define for DW_10 word*/
+/*define for linear_z field*/
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_10_linear_z_offset 10
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_10_linear_z_mask 0x00001FFF
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_10_linear_z_shift 0
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_10_LINEAR_Z(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_DW_10_linear_z_mask) << SDMA_PKT_COPY_TILED_SUBWIN_DW_10_linear_z_shift)
+
+/*define for linear_pitch field*/
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_10_linear_pitch_offset 10
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_10_linear_pitch_mask 0x00003FFF
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_10_linear_pitch_shift 16
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_10_LINEAR_PITCH(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_DW_10_linear_pitch_mask) << SDMA_PKT_COPY_TILED_SUBWIN_DW_10_linear_pitch_shift)
+
+/*define for DW_11 word*/
+/*define for linear_slice_pitch field*/
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_11_linear_slice_pitch_offset 11
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_11_linear_slice_pitch_mask 0x0FFFFFFF
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_11_linear_slice_pitch_shift 0
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_11_LINEAR_SLICE_PITCH(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_DW_11_linear_slice_pitch_mask) << SDMA_PKT_COPY_TILED_SUBWIN_DW_11_linear_slice_pitch_shift)
+
+/*define for DW_12 word*/
+/*define for rect_x field*/
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_12_rect_x_offset 12
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_12_rect_x_mask 0x00003FFF
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_12_rect_x_shift 0
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_12_RECT_X(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_DW_12_rect_x_mask) << SDMA_PKT_COPY_TILED_SUBWIN_DW_12_rect_x_shift)
+
+/*define for rect_y field*/
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_12_rect_y_offset 12
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_12_rect_y_mask 0x00003FFF
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_12_rect_y_shift 16
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_12_RECT_Y(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_DW_12_rect_y_mask) << SDMA_PKT_COPY_TILED_SUBWIN_DW_12_rect_y_shift)
+
+/*define for DW_13 word*/
+/*define for rect_z field*/
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_13_rect_z_offset 13
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_13_rect_z_mask 0x00001FFF
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_13_rect_z_shift 0
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_13_RECT_Z(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_DW_13_rect_z_mask) << SDMA_PKT_COPY_TILED_SUBWIN_DW_13_rect_z_shift)
+
+/*define for linear_sw field*/
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_13_linear_sw_offset 13
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_13_linear_sw_mask 0x00000003
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_13_linear_sw_shift 16
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_13_LINEAR_SW(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_DW_13_linear_sw_mask) << SDMA_PKT_COPY_TILED_SUBWIN_DW_13_linear_sw_shift)
+
+/*define for linear_cache_policy field*/
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_13_linear_cache_policy_offset 13
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_13_linear_cache_policy_mask 0x00000007
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_13_linear_cache_policy_shift 18
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_13_LINEAR_CACHE_POLICY(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_DW_13_linear_cache_policy_mask) << SDMA_PKT_COPY_TILED_SUBWIN_DW_13_linear_cache_policy_shift)
+
+/*define for tile_sw field*/
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_13_tile_sw_offset 13
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_13_tile_sw_mask 0x00000003
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_13_tile_sw_shift 24
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_13_TILE_SW(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_DW_13_tile_sw_mask) << SDMA_PKT_COPY_TILED_SUBWIN_DW_13_tile_sw_shift)
+
+/*define for tile_cache_policy field*/
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_13_tile_cache_policy_offset 13
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_13_tile_cache_policy_mask 0x00000007
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_13_tile_cache_policy_shift 26
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_13_TILE_CACHE_POLICY(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_DW_13_tile_cache_policy_mask) << SDMA_PKT_COPY_TILED_SUBWIN_DW_13_tile_cache_policy_shift)
+
+/*define for META_ADDR_LO word*/
+/*define for meta_addr_31_0 field*/
+#define SDMA_PKT_COPY_TILED_SUBWIN_META_ADDR_LO_meta_addr_31_0_offset 14
+#define SDMA_PKT_COPY_TILED_SUBWIN_META_ADDR_LO_meta_addr_31_0_mask 0xFFFFFFFF
+#define SDMA_PKT_COPY_TILED_SUBWIN_META_ADDR_LO_meta_addr_31_0_shift 0
+#define SDMA_PKT_COPY_TILED_SUBWIN_META_ADDR_LO_META_ADDR_31_0(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_META_ADDR_LO_meta_addr_31_0_mask) << SDMA_PKT_COPY_TILED_SUBWIN_META_ADDR_LO_meta_addr_31_0_shift)
+
+/*define for META_ADDR_HI word*/
+/*define for meta_addr_63_32 field*/
+#define SDMA_PKT_COPY_TILED_SUBWIN_META_ADDR_HI_meta_addr_63_32_offset 15
+#define SDMA_PKT_COPY_TILED_SUBWIN_META_ADDR_HI_meta_addr_63_32_mask 0xFFFFFFFF
+#define SDMA_PKT_COPY_TILED_SUBWIN_META_ADDR_HI_meta_addr_63_32_shift 0
+#define SDMA_PKT_COPY_TILED_SUBWIN_META_ADDR_HI_META_ADDR_63_32(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_META_ADDR_HI_meta_addr_63_32_mask) << SDMA_PKT_COPY_TILED_SUBWIN_META_ADDR_HI_meta_addr_63_32_shift)
+
+/*define for META_CONFIG word*/
+/*define for data_format field*/
+#define SDMA_PKT_COPY_TILED_SUBWIN_META_CONFIG_data_format_offset 16
+#define SDMA_PKT_COPY_TILED_SUBWIN_META_CONFIG_data_format_mask 0x0000007F
+#define SDMA_PKT_COPY_TILED_SUBWIN_META_CONFIG_data_format_shift 0
+#define SDMA_PKT_COPY_TILED_SUBWIN_META_CONFIG_DATA_FORMAT(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_META_CONFIG_data_format_mask) << SDMA_PKT_COPY_TILED_SUBWIN_META_CONFIG_data_format_shift)
+
+/*define for color_transform_disable field*/
+#define SDMA_PKT_COPY_TILED_SUBWIN_META_CONFIG_color_transform_disable_offset 16
+#define SDMA_PKT_COPY_TILED_SUBWIN_META_CONFIG_color_transform_disable_mask 0x00000001
+#define SDMA_PKT_COPY_TILED_SUBWIN_META_CONFIG_color_transform_disable_shift 7
+#define SDMA_PKT_COPY_TILED_SUBWIN_META_CONFIG_COLOR_TRANSFORM_DISABLE(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_META_CONFIG_color_transform_disable_mask) << SDMA_PKT_COPY_TILED_SUBWIN_META_CONFIG_color_transform_disable_shift)
+
+/*define for alpha_is_on_msb field*/
+#define SDMA_PKT_COPY_TILED_SUBWIN_META_CONFIG_alpha_is_on_msb_offset 16
+#define SDMA_PKT_COPY_TILED_SUBWIN_META_CONFIG_alpha_is_on_msb_mask 0x00000001
+#define SDMA_PKT_COPY_TILED_SUBWIN_META_CONFIG_alpha_is_on_msb_shift 8
+#define SDMA_PKT_COPY_TILED_SUBWIN_META_CONFIG_ALPHA_IS_ON_MSB(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_META_CONFIG_alpha_is_on_msb_mask) << SDMA_PKT_COPY_TILED_SUBWIN_META_CONFIG_alpha_is_on_msb_shift)
+
+/*define for number_type field*/
+#define SDMA_PKT_COPY_TILED_SUBWIN_META_CONFIG_number_type_offset 16
+#define SDMA_PKT_COPY_TILED_SUBWIN_META_CONFIG_number_type_mask 0x00000007
+#define SDMA_PKT_COPY_TILED_SUBWIN_META_CONFIG_number_type_shift 9
+#define SDMA_PKT_COPY_TILED_SUBWIN_META_CONFIG_NUMBER_TYPE(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_META_CONFIG_number_type_mask) << SDMA_PKT_COPY_TILED_SUBWIN_META_CONFIG_number_type_shift)
+
+/*define for surface_type field*/
+#define SDMA_PKT_COPY_TILED_SUBWIN_META_CONFIG_surface_type_offset 16
+#define SDMA_PKT_COPY_TILED_SUBWIN_META_CONFIG_surface_type_mask 0x00000003
+#define SDMA_PKT_COPY_TILED_SUBWIN_META_CONFIG_surface_type_shift 12
+#define SDMA_PKT_COPY_TILED_SUBWIN_META_CONFIG_SURFACE_TYPE(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_META_CONFIG_surface_type_mask) << SDMA_PKT_COPY_TILED_SUBWIN_META_CONFIG_surface_type_shift)
+
+/*define for meta_llc field*/
+#define SDMA_PKT_COPY_TILED_SUBWIN_META_CONFIG_meta_llc_offset 16
+#define SDMA_PKT_COPY_TILED_SUBWIN_META_CONFIG_meta_llc_mask 0x00000001
+#define SDMA_PKT_COPY_TILED_SUBWIN_META_CONFIG_meta_llc_shift 14
+#define SDMA_PKT_COPY_TILED_SUBWIN_META_CONFIG_META_LLC(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_META_CONFIG_meta_llc_mask) << SDMA_PKT_COPY_TILED_SUBWIN_META_CONFIG_meta_llc_shift)
+
+/*define for max_comp_block_size field*/
+#define SDMA_PKT_COPY_TILED_SUBWIN_META_CONFIG_max_comp_block_size_offset 16
+#define SDMA_PKT_COPY_TILED_SUBWIN_META_CONFIG_max_comp_block_size_mask 0x00000003
+#define SDMA_PKT_COPY_TILED_SUBWIN_META_CONFIG_max_comp_block_size_shift 24
+#define SDMA_PKT_COPY_TILED_SUBWIN_META_CONFIG_MAX_COMP_BLOCK_SIZE(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_META_CONFIG_max_comp_block_size_mask) << SDMA_PKT_COPY_TILED_SUBWIN_META_CONFIG_max_comp_block_size_shift)
+
+/*define for max_uncomp_block_size field*/
+#define SDMA_PKT_COPY_TILED_SUBWIN_META_CONFIG_max_uncomp_block_size_offset 16
+#define SDMA_PKT_COPY_TILED_SUBWIN_META_CONFIG_max_uncomp_block_size_mask 0x00000003
+#define SDMA_PKT_COPY_TILED_SUBWIN_META_CONFIG_max_uncomp_block_size_shift 26
+#define SDMA_PKT_COPY_TILED_SUBWIN_META_CONFIG_MAX_UNCOMP_BLOCK_SIZE(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_META_CONFIG_max_uncomp_block_size_mask) << SDMA_PKT_COPY_TILED_SUBWIN_META_CONFIG_max_uncomp_block_size_shift)
+
+/*define for write_compress_enable field*/
+#define SDMA_PKT_COPY_TILED_SUBWIN_META_CONFIG_write_compress_enable_offset 16
+#define SDMA_PKT_COPY_TILED_SUBWIN_META_CONFIG_write_compress_enable_mask 0x00000001
+#define SDMA_PKT_COPY_TILED_SUBWIN_META_CONFIG_write_compress_enable_shift 28
+#define SDMA_PKT_COPY_TILED_SUBWIN_META_CONFIG_WRITE_COMPRESS_ENABLE(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_META_CONFIG_write_compress_enable_mask) << SDMA_PKT_COPY_TILED_SUBWIN_META_CONFIG_write_compress_enable_shift)
+
+/*define for meta_tmz field*/
+#define SDMA_PKT_COPY_TILED_SUBWIN_META_CONFIG_meta_tmz_offset 16
+#define SDMA_PKT_COPY_TILED_SUBWIN_META_CONFIG_meta_tmz_mask 0x00000001
+#define SDMA_PKT_COPY_TILED_SUBWIN_META_CONFIG_meta_tmz_shift 29
+#define SDMA_PKT_COPY_TILED_SUBWIN_META_CONFIG_META_TMZ(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_META_CONFIG_meta_tmz_mask) << SDMA_PKT_COPY_TILED_SUBWIN_META_CONFIG_meta_tmz_shift)
+
+/*define for pipe_aligned field*/
+#define SDMA_PKT_COPY_TILED_SUBWIN_META_CONFIG_pipe_aligned_offset 16
+#define SDMA_PKT_COPY_TILED_SUBWIN_META_CONFIG_pipe_aligned_mask 0x00000001
+#define SDMA_PKT_COPY_TILED_SUBWIN_META_CONFIG_pipe_aligned_shift 31
+#define SDMA_PKT_COPY_TILED_SUBWIN_META_CONFIG_PIPE_ALIGNED(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_META_CONFIG_pipe_aligned_mask) << SDMA_PKT_COPY_TILED_SUBWIN_META_CONFIG_pipe_aligned_shift)
+
+
+/*
+** Definitions for SDMA_PKT_COPY_TILED_SUBWIN_BC packet
+*/
+
+/*define for HEADER word*/
+/*define for op field*/
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_HEADER_op_offset 0
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_HEADER_op_mask 0x000000FF
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_HEADER_op_shift 0
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_HEADER_OP(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_BC_HEADER_op_mask) << SDMA_PKT_COPY_TILED_SUBWIN_BC_HEADER_op_shift)
+
+/*define for sub_op field*/
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_HEADER_sub_op_offset 0
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_HEADER_sub_op_mask 0x000000FF
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_HEADER_sub_op_shift 8
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_HEADER_SUB_OP(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_BC_HEADER_sub_op_mask) << SDMA_PKT_COPY_TILED_SUBWIN_BC_HEADER_sub_op_shift)
+
+/*define for detile field*/
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_HEADER_detile_offset 0
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_HEADER_detile_mask 0x00000001
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_HEADER_detile_shift 31
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_HEADER_DETILE(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_BC_HEADER_detile_mask) << SDMA_PKT_COPY_TILED_SUBWIN_BC_HEADER_detile_shift)
+
+/*define for TILED_ADDR_LO word*/
+/*define for tiled_addr_31_0 field*/
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_TILED_ADDR_LO_tiled_addr_31_0_offset 1
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_TILED_ADDR_LO_tiled_addr_31_0_mask 0xFFFFFFFF
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_TILED_ADDR_LO_tiled_addr_31_0_shift 0
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_TILED_ADDR_LO_TILED_ADDR_31_0(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_BC_TILED_ADDR_LO_tiled_addr_31_0_mask) << SDMA_PKT_COPY_TILED_SUBWIN_BC_TILED_ADDR_LO_tiled_addr_31_0_shift)
+
+/*define for TILED_ADDR_HI word*/
+/*define for tiled_addr_63_32 field*/
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_TILED_ADDR_HI_tiled_addr_63_32_offset 2
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_TILED_ADDR_HI_tiled_addr_63_32_mask 0xFFFFFFFF
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_TILED_ADDR_HI_tiled_addr_63_32_shift 0
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_TILED_ADDR_HI_TILED_ADDR_63_32(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_BC_TILED_ADDR_HI_tiled_addr_63_32_mask) << SDMA_PKT_COPY_TILED_SUBWIN_BC_TILED_ADDR_HI_tiled_addr_63_32_shift)
+
+/*define for DW_3 word*/
+/*define for tiled_x field*/
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_3_tiled_x_offset 3
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_3_tiled_x_mask 0x00003FFF
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_3_tiled_x_shift 0
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_3_TILED_X(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_3_tiled_x_mask) << SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_3_tiled_x_shift)
+
+/*define for tiled_y field*/
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_3_tiled_y_offset 3
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_3_tiled_y_mask 0x00003FFF
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_3_tiled_y_shift 16
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_3_TILED_Y(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_3_tiled_y_mask) << SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_3_tiled_y_shift)
+
+/*define for DW_4 word*/
+/*define for tiled_z field*/
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_4_tiled_z_offset 4
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_4_tiled_z_mask 0x000007FF
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_4_tiled_z_shift 0
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_4_TILED_Z(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_4_tiled_z_mask) << SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_4_tiled_z_shift)
+
+/*define for width field*/
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_4_width_offset 4
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_4_width_mask 0x00003FFF
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_4_width_shift 16
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_4_WIDTH(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_4_width_mask) << SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_4_width_shift)
+
+/*define for DW_5 word*/
+/*define for height field*/
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_5_height_offset 5
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_5_height_mask 0x00003FFF
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_5_height_shift 0
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_5_HEIGHT(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_5_height_mask) << SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_5_height_shift)
+
+/*define for depth field*/
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_5_depth_offset 5
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_5_depth_mask 0x000007FF
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_5_depth_shift 16
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_5_DEPTH(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_5_depth_mask) << SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_5_depth_shift)
+
+/*define for DW_6 word*/
+/*define for element_size field*/
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_6_element_size_offset 6
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_6_element_size_mask 0x00000007
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_6_element_size_shift 0
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_6_ELEMENT_SIZE(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_6_element_size_mask) << SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_6_element_size_shift)
+
+/*define for array_mode field*/
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_6_array_mode_offset 6
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_6_array_mode_mask 0x0000000F
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_6_array_mode_shift 3
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_6_ARRAY_MODE(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_6_array_mode_mask) << SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_6_array_mode_shift)
+
+/*define for mit_mode field*/
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_6_mit_mode_offset 6
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_6_mit_mode_mask 0x00000007
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_6_mit_mode_shift 8
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_6_MIT_MODE(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_6_mit_mode_mask) << SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_6_mit_mode_shift)
+
+/*define for tilesplit_size field*/
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_6_tilesplit_size_offset 6
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_6_tilesplit_size_mask 0x00000007
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_6_tilesplit_size_shift 11
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_6_TILESPLIT_SIZE(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_6_tilesplit_size_mask) << SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_6_tilesplit_size_shift)
+
+/*define for bank_w field*/
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_6_bank_w_offset 6
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_6_bank_w_mask 0x00000003
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_6_bank_w_shift 15
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_6_BANK_W(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_6_bank_w_mask) << SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_6_bank_w_shift)
+
+/*define for bank_h field*/
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_6_bank_h_offset 6
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_6_bank_h_mask 0x00000003
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_6_bank_h_shift 18
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_6_BANK_H(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_6_bank_h_mask) << SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_6_bank_h_shift)
+
+/*define for num_bank field*/
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_6_num_bank_offset 6
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_6_num_bank_mask 0x00000003
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_6_num_bank_shift 21
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_6_NUM_BANK(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_6_num_bank_mask) << SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_6_num_bank_shift)
+
+/*define for mat_aspt field*/
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_6_mat_aspt_offset 6
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_6_mat_aspt_mask 0x00000003
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_6_mat_aspt_shift 24
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_6_MAT_ASPT(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_6_mat_aspt_mask) << SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_6_mat_aspt_shift)
+
+/*define for pipe_config field*/
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_6_pipe_config_offset 6
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_6_pipe_config_mask 0x0000001F
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_6_pipe_config_shift 26
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_6_PIPE_CONFIG(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_6_pipe_config_mask) << SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_6_pipe_config_shift)
+
+/*define for LINEAR_ADDR_LO word*/
+/*define for linear_addr_31_0 field*/
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_LINEAR_ADDR_LO_linear_addr_31_0_offset 7
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_LINEAR_ADDR_LO_linear_addr_31_0_mask 0xFFFFFFFF
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_LINEAR_ADDR_LO_linear_addr_31_0_shift 0
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_LINEAR_ADDR_LO_LINEAR_ADDR_31_0(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_BC_LINEAR_ADDR_LO_linear_addr_31_0_mask) << SDMA_PKT_COPY_TILED_SUBWIN_BC_LINEAR_ADDR_LO_linear_addr_31_0_shift)
+
+/*define for LINEAR_ADDR_HI word*/
+/*define for linear_addr_63_32 field*/
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_LINEAR_ADDR_HI_linear_addr_63_32_offset 8
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_LINEAR_ADDR_HI_linear_addr_63_32_mask 0xFFFFFFFF
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_LINEAR_ADDR_HI_linear_addr_63_32_shift 0
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_LINEAR_ADDR_HI_LINEAR_ADDR_63_32(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_BC_LINEAR_ADDR_HI_linear_addr_63_32_mask) << SDMA_PKT_COPY_TILED_SUBWIN_BC_LINEAR_ADDR_HI_linear_addr_63_32_shift)
+
+/*define for DW_9 word*/
+/*define for linear_x field*/
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_9_linear_x_offset 9
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_9_linear_x_mask 0x00003FFF
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_9_linear_x_shift 0
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_9_LINEAR_X(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_9_linear_x_mask) << SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_9_linear_x_shift)
+
+/*define for linear_y field*/
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_9_linear_y_offset 9
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_9_linear_y_mask 0x00003FFF
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_9_linear_y_shift 16
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_9_LINEAR_Y(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_9_linear_y_mask) << SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_9_linear_y_shift)
+
+/*define for DW_10 word*/
+/*define for linear_z field*/
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_10_linear_z_offset 10
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_10_linear_z_mask 0x000007FF
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_10_linear_z_shift 0
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_10_LINEAR_Z(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_10_linear_z_mask) << SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_10_linear_z_shift)
+
+/*define for linear_pitch field*/
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_10_linear_pitch_offset 10
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_10_linear_pitch_mask 0x00003FFF
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_10_linear_pitch_shift 16
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_10_LINEAR_PITCH(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_10_linear_pitch_mask) << SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_10_linear_pitch_shift)
+
+/*define for DW_11 word*/
+/*define for linear_slice_pitch field*/
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_11_linear_slice_pitch_offset 11
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_11_linear_slice_pitch_mask 0x0FFFFFFF
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_11_linear_slice_pitch_shift 0
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_11_LINEAR_SLICE_PITCH(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_11_linear_slice_pitch_mask) << SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_11_linear_slice_pitch_shift)
+
+/*define for DW_12 word*/
+/*define for rect_x field*/
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_12_rect_x_offset 12
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_12_rect_x_mask 0x00003FFF
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_12_rect_x_shift 0
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_12_RECT_X(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_12_rect_x_mask) << SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_12_rect_x_shift)
+
+/*define for rect_y field*/
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_12_rect_y_offset 12
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_12_rect_y_mask 0x00003FFF
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_12_rect_y_shift 16
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_12_RECT_Y(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_12_rect_y_mask) << SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_12_rect_y_shift)
+
+/*define for DW_13 word*/
+/*define for rect_z field*/
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_13_rect_z_offset 13
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_13_rect_z_mask 0x000007FF
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_13_rect_z_shift 0
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_13_RECT_Z(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_13_rect_z_mask) << SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_13_rect_z_shift)
+
+/*define for linear_sw field*/
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_13_linear_sw_offset 13
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_13_linear_sw_mask 0x00000003
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_13_linear_sw_shift 16
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_13_LINEAR_SW(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_13_linear_sw_mask) << SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_13_linear_sw_shift)
+
+/*define for tile_sw field*/
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_13_tile_sw_offset 13
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_13_tile_sw_mask 0x00000003
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_13_tile_sw_shift 24
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_13_TILE_SW(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_13_tile_sw_mask) << SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_13_tile_sw_shift)
+
+
+/*
+** Definitions for SDMA_PKT_COPY_STRUCT packet
+*/
+
+/*define for HEADER word*/
+/*define for op field*/
+#define SDMA_PKT_COPY_STRUCT_HEADER_op_offset 0
+#define SDMA_PKT_COPY_STRUCT_HEADER_op_mask 0x000000FF
+#define SDMA_PKT_COPY_STRUCT_HEADER_op_shift 0
+#define SDMA_PKT_COPY_STRUCT_HEADER_OP(x) (((x) & SDMA_PKT_COPY_STRUCT_HEADER_op_mask) << SDMA_PKT_COPY_STRUCT_HEADER_op_shift)
+
+/*define for sub_op field*/
+#define SDMA_PKT_COPY_STRUCT_HEADER_sub_op_offset 0
+#define SDMA_PKT_COPY_STRUCT_HEADER_sub_op_mask 0x000000FF
+#define SDMA_PKT_COPY_STRUCT_HEADER_sub_op_shift 8
+#define SDMA_PKT_COPY_STRUCT_HEADER_SUB_OP(x) (((x) & SDMA_PKT_COPY_STRUCT_HEADER_sub_op_mask) << SDMA_PKT_COPY_STRUCT_HEADER_sub_op_shift)
+
+/*define for tmz field*/
+#define SDMA_PKT_COPY_STRUCT_HEADER_tmz_offset 0
+#define SDMA_PKT_COPY_STRUCT_HEADER_tmz_mask 0x00000001
+#define SDMA_PKT_COPY_STRUCT_HEADER_tmz_shift 18
+#define SDMA_PKT_COPY_STRUCT_HEADER_TMZ(x) (((x) & SDMA_PKT_COPY_STRUCT_HEADER_tmz_mask) << SDMA_PKT_COPY_STRUCT_HEADER_tmz_shift)
+
+/*define for cpv field*/
+#define SDMA_PKT_COPY_STRUCT_HEADER_cpv_offset 0
+#define SDMA_PKT_COPY_STRUCT_HEADER_cpv_mask 0x00000001
+#define SDMA_PKT_COPY_STRUCT_HEADER_cpv_shift 28
+#define SDMA_PKT_COPY_STRUCT_HEADER_CPV(x) (((x) & SDMA_PKT_COPY_STRUCT_HEADER_cpv_mask) << SDMA_PKT_COPY_STRUCT_HEADER_cpv_shift)
+
+/*define for detile field*/
+#define SDMA_PKT_COPY_STRUCT_HEADER_detile_offset 0
+#define SDMA_PKT_COPY_STRUCT_HEADER_detile_mask 0x00000001
+#define SDMA_PKT_COPY_STRUCT_HEADER_detile_shift 31
+#define SDMA_PKT_COPY_STRUCT_HEADER_DETILE(x) (((x) & SDMA_PKT_COPY_STRUCT_HEADER_detile_mask) << SDMA_PKT_COPY_STRUCT_HEADER_detile_shift)
+
+/*define for SB_ADDR_LO word*/
+/*define for sb_addr_31_0 field*/
+#define SDMA_PKT_COPY_STRUCT_SB_ADDR_LO_sb_addr_31_0_offset 1
+#define SDMA_PKT_COPY_STRUCT_SB_ADDR_LO_sb_addr_31_0_mask 0xFFFFFFFF
+#define SDMA_PKT_COPY_STRUCT_SB_ADDR_LO_sb_addr_31_0_shift 0
+#define SDMA_PKT_COPY_STRUCT_SB_ADDR_LO_SB_ADDR_31_0(x) (((x) & SDMA_PKT_COPY_STRUCT_SB_ADDR_LO_sb_addr_31_0_mask) << SDMA_PKT_COPY_STRUCT_SB_ADDR_LO_sb_addr_31_0_shift)
+
+/*define for SB_ADDR_HI word*/
+/*define for sb_addr_63_32 field*/
+#define SDMA_PKT_COPY_STRUCT_SB_ADDR_HI_sb_addr_63_32_offset 2
+#define SDMA_PKT_COPY_STRUCT_SB_ADDR_HI_sb_addr_63_32_mask 0xFFFFFFFF
+#define SDMA_PKT_COPY_STRUCT_SB_ADDR_HI_sb_addr_63_32_shift 0
+#define SDMA_PKT_COPY_STRUCT_SB_ADDR_HI_SB_ADDR_63_32(x) (((x) & SDMA_PKT_COPY_STRUCT_SB_ADDR_HI_sb_addr_63_32_mask) << SDMA_PKT_COPY_STRUCT_SB_ADDR_HI_sb_addr_63_32_shift)
+
+/*define for START_INDEX word*/
+/*define for start_index field*/
+#define SDMA_PKT_COPY_STRUCT_START_INDEX_start_index_offset 3
+#define SDMA_PKT_COPY_STRUCT_START_INDEX_start_index_mask 0xFFFFFFFF
+#define SDMA_PKT_COPY_STRUCT_START_INDEX_start_index_shift 0
+#define SDMA_PKT_COPY_STRUCT_START_INDEX_START_INDEX(x) (((x) & SDMA_PKT_COPY_STRUCT_START_INDEX_start_index_mask) << SDMA_PKT_COPY_STRUCT_START_INDEX_start_index_shift)
+
+/*define for COUNT word*/
+/*define for count field*/
+#define SDMA_PKT_COPY_STRUCT_COUNT_count_offset 4
+#define SDMA_PKT_COPY_STRUCT_COUNT_count_mask 0xFFFFFFFF
+#define SDMA_PKT_COPY_STRUCT_COUNT_count_shift 0
+#define SDMA_PKT_COPY_STRUCT_COUNT_COUNT(x) (((x) & SDMA_PKT_COPY_STRUCT_COUNT_count_mask) << SDMA_PKT_COPY_STRUCT_COUNT_count_shift)
+
+/*define for DW_5 word*/
+/*define for stride field*/
+#define SDMA_PKT_COPY_STRUCT_DW_5_stride_offset 5
+#define SDMA_PKT_COPY_STRUCT_DW_5_stride_mask 0x000007FF
+#define SDMA_PKT_COPY_STRUCT_DW_5_stride_shift 0
+#define SDMA_PKT_COPY_STRUCT_DW_5_STRIDE(x) (((x) & SDMA_PKT_COPY_STRUCT_DW_5_stride_mask) << SDMA_PKT_COPY_STRUCT_DW_5_stride_shift)
+
+/*define for linear_sw field*/
+#define SDMA_PKT_COPY_STRUCT_DW_5_linear_sw_offset 5
+#define SDMA_PKT_COPY_STRUCT_DW_5_linear_sw_mask 0x00000003
+#define SDMA_PKT_COPY_STRUCT_DW_5_linear_sw_shift 16
+#define SDMA_PKT_COPY_STRUCT_DW_5_LINEAR_SW(x) (((x) & SDMA_PKT_COPY_STRUCT_DW_5_linear_sw_mask) << SDMA_PKT_COPY_STRUCT_DW_5_linear_sw_shift)
+
+/*define for linear_cache_policy field*/
+#define SDMA_PKT_COPY_STRUCT_DW_5_linear_cache_policy_offset 5
+#define SDMA_PKT_COPY_STRUCT_DW_5_linear_cache_policy_mask 0x00000007
+#define SDMA_PKT_COPY_STRUCT_DW_5_linear_cache_policy_shift 18
+#define SDMA_PKT_COPY_STRUCT_DW_5_LINEAR_CACHE_POLICY(x) (((x) & SDMA_PKT_COPY_STRUCT_DW_5_linear_cache_policy_mask) << SDMA_PKT_COPY_STRUCT_DW_5_linear_cache_policy_shift)
+
+/*define for struct_sw field*/
+#define SDMA_PKT_COPY_STRUCT_DW_5_struct_sw_offset 5
+#define SDMA_PKT_COPY_STRUCT_DW_5_struct_sw_mask 0x00000003
+#define SDMA_PKT_COPY_STRUCT_DW_5_struct_sw_shift 24
+#define SDMA_PKT_COPY_STRUCT_DW_5_STRUCT_SW(x) (((x) & SDMA_PKT_COPY_STRUCT_DW_5_struct_sw_mask) << SDMA_PKT_COPY_STRUCT_DW_5_struct_sw_shift)
+
+/*define for struct_cache_policy field*/
+#define SDMA_PKT_COPY_STRUCT_DW_5_struct_cache_policy_offset 5
+#define SDMA_PKT_COPY_STRUCT_DW_5_struct_cache_policy_mask 0x00000007
+#define SDMA_PKT_COPY_STRUCT_DW_5_struct_cache_policy_shift 26
+#define SDMA_PKT_COPY_STRUCT_DW_5_STRUCT_CACHE_POLICY(x) (((x) & SDMA_PKT_COPY_STRUCT_DW_5_struct_cache_policy_mask) << SDMA_PKT_COPY_STRUCT_DW_5_struct_cache_policy_shift)
+
+/*define for LINEAR_ADDR_LO word*/
+/*define for linear_addr_31_0 field*/
+#define SDMA_PKT_COPY_STRUCT_LINEAR_ADDR_LO_linear_addr_31_0_offset 6
+#define SDMA_PKT_COPY_STRUCT_LINEAR_ADDR_LO_linear_addr_31_0_mask 0xFFFFFFFF
+#define SDMA_PKT_COPY_STRUCT_LINEAR_ADDR_LO_linear_addr_31_0_shift 0
+#define SDMA_PKT_COPY_STRUCT_LINEAR_ADDR_LO_LINEAR_ADDR_31_0(x) (((x) & SDMA_PKT_COPY_STRUCT_LINEAR_ADDR_LO_linear_addr_31_0_mask) << SDMA_PKT_COPY_STRUCT_LINEAR_ADDR_LO_linear_addr_31_0_shift)
+
+/*define for LINEAR_ADDR_HI word*/
+/*define for linear_addr_63_32 field*/
+#define SDMA_PKT_COPY_STRUCT_LINEAR_ADDR_HI_linear_addr_63_32_offset 7
+#define SDMA_PKT_COPY_STRUCT_LINEAR_ADDR_HI_linear_addr_63_32_mask 0xFFFFFFFF
+#define SDMA_PKT_COPY_STRUCT_LINEAR_ADDR_HI_linear_addr_63_32_shift 0
+#define SDMA_PKT_COPY_STRUCT_LINEAR_ADDR_HI_LINEAR_ADDR_63_32(x) (((x) & SDMA_PKT_COPY_STRUCT_LINEAR_ADDR_HI_linear_addr_63_32_mask) << SDMA_PKT_COPY_STRUCT_LINEAR_ADDR_HI_linear_addr_63_32_shift)
+
+
+/*
+** Definitions for SDMA_PKT_WRITE_UNTILED packet
+*/
+
+/*define for HEADER word*/
+/*define for op field*/
+#define SDMA_PKT_WRITE_UNTILED_HEADER_op_offset 0
+#define SDMA_PKT_WRITE_UNTILED_HEADER_op_mask 0x000000FF
+#define SDMA_PKT_WRITE_UNTILED_HEADER_op_shift 0
+#define SDMA_PKT_WRITE_UNTILED_HEADER_OP(x) (((x) & SDMA_PKT_WRITE_UNTILED_HEADER_op_mask) << SDMA_PKT_WRITE_UNTILED_HEADER_op_shift)
+
+/*define for sub_op field*/
+#define SDMA_PKT_WRITE_UNTILED_HEADER_sub_op_offset 0
+#define SDMA_PKT_WRITE_UNTILED_HEADER_sub_op_mask 0x000000FF
+#define SDMA_PKT_WRITE_UNTILED_HEADER_sub_op_shift 8
+#define SDMA_PKT_WRITE_UNTILED_HEADER_SUB_OP(x) (((x) & SDMA_PKT_WRITE_UNTILED_HEADER_sub_op_mask) << SDMA_PKT_WRITE_UNTILED_HEADER_sub_op_shift)
+
+/*define for encrypt field*/
+#define SDMA_PKT_WRITE_UNTILED_HEADER_encrypt_offset 0
+#define SDMA_PKT_WRITE_UNTILED_HEADER_encrypt_mask 0x00000001
+#define SDMA_PKT_WRITE_UNTILED_HEADER_encrypt_shift 16
+#define SDMA_PKT_WRITE_UNTILED_HEADER_ENCRYPT(x) (((x) & SDMA_PKT_WRITE_UNTILED_HEADER_encrypt_mask) << SDMA_PKT_WRITE_UNTILED_HEADER_encrypt_shift)
+
+/*define for tmz field*/
+#define SDMA_PKT_WRITE_UNTILED_HEADER_tmz_offset 0
+#define SDMA_PKT_WRITE_UNTILED_HEADER_tmz_mask 0x00000001
+#define SDMA_PKT_WRITE_UNTILED_HEADER_tmz_shift 18
+#define SDMA_PKT_WRITE_UNTILED_HEADER_TMZ(x) (((x) & SDMA_PKT_WRITE_UNTILED_HEADER_tmz_mask) << SDMA_PKT_WRITE_UNTILED_HEADER_tmz_shift)
+
+/*define for cpv field*/
+#define SDMA_PKT_WRITE_UNTILED_HEADER_cpv_offset 0
+#define SDMA_PKT_WRITE_UNTILED_HEADER_cpv_mask 0x00000001
+#define SDMA_PKT_WRITE_UNTILED_HEADER_cpv_shift 28
+#define SDMA_PKT_WRITE_UNTILED_HEADER_CPV(x) (((x) & SDMA_PKT_WRITE_UNTILED_HEADER_cpv_mask) << SDMA_PKT_WRITE_UNTILED_HEADER_cpv_shift)
+
+/*define for DST_ADDR_LO word*/
+/*define for dst_addr_31_0 field*/
+#define SDMA_PKT_WRITE_UNTILED_DST_ADDR_LO_dst_addr_31_0_offset 1
+#define SDMA_PKT_WRITE_UNTILED_DST_ADDR_LO_dst_addr_31_0_mask 0xFFFFFFFF
+#define SDMA_PKT_WRITE_UNTILED_DST_ADDR_LO_dst_addr_31_0_shift 0
+#define SDMA_PKT_WRITE_UNTILED_DST_ADDR_LO_DST_ADDR_31_0(x) (((x) & SDMA_PKT_WRITE_UNTILED_DST_ADDR_LO_dst_addr_31_0_mask) << SDMA_PKT_WRITE_UNTILED_DST_ADDR_LO_dst_addr_31_0_shift)
+
+/*define for DST_ADDR_HI word*/
+/*define for dst_addr_63_32 field*/
+#define SDMA_PKT_WRITE_UNTILED_DST_ADDR_HI_dst_addr_63_32_offset 2
+#define SDMA_PKT_WRITE_UNTILED_DST_ADDR_HI_dst_addr_63_32_mask 0xFFFFFFFF
+#define SDMA_PKT_WRITE_UNTILED_DST_ADDR_HI_dst_addr_63_32_shift 0
+#define SDMA_PKT_WRITE_UNTILED_DST_ADDR_HI_DST_ADDR_63_32(x) (((x) & SDMA_PKT_WRITE_UNTILED_DST_ADDR_HI_dst_addr_63_32_mask) << SDMA_PKT_WRITE_UNTILED_DST_ADDR_HI_dst_addr_63_32_shift)
+
+/*define for DW_3 word*/
+/*define for count field*/
+#define SDMA_PKT_WRITE_UNTILED_DW_3_count_offset 3
+#define SDMA_PKT_WRITE_UNTILED_DW_3_count_mask 0x000FFFFF
+#define SDMA_PKT_WRITE_UNTILED_DW_3_count_shift 0
+#define SDMA_PKT_WRITE_UNTILED_DW_3_COUNT(x) (((x) & SDMA_PKT_WRITE_UNTILED_DW_3_count_mask) << SDMA_PKT_WRITE_UNTILED_DW_3_count_shift)
+
+/*define for sw field*/
+#define SDMA_PKT_WRITE_UNTILED_DW_3_sw_offset 3
+#define SDMA_PKT_WRITE_UNTILED_DW_3_sw_mask 0x00000003
+#define SDMA_PKT_WRITE_UNTILED_DW_3_sw_shift 24
+#define SDMA_PKT_WRITE_UNTILED_DW_3_SW(x) (((x) & SDMA_PKT_WRITE_UNTILED_DW_3_sw_mask) << SDMA_PKT_WRITE_UNTILED_DW_3_sw_shift)
+
+/*define for cache_policy field*/
+#define SDMA_PKT_WRITE_UNTILED_DW_3_cache_policy_offset 3
+#define SDMA_PKT_WRITE_UNTILED_DW_3_cache_policy_mask 0x00000007
+#define SDMA_PKT_WRITE_UNTILED_DW_3_cache_policy_shift 26
+#define SDMA_PKT_WRITE_UNTILED_DW_3_CACHE_POLICY(x) (((x) & SDMA_PKT_WRITE_UNTILED_DW_3_cache_policy_mask) << SDMA_PKT_WRITE_UNTILED_DW_3_cache_policy_shift)
+
+/*define for DATA0 word*/
+/*define for data0 field*/
+#define SDMA_PKT_WRITE_UNTILED_DATA0_data0_offset 4
+#define SDMA_PKT_WRITE_UNTILED_DATA0_data0_mask 0xFFFFFFFF
+#define SDMA_PKT_WRITE_UNTILED_DATA0_data0_shift 0
+#define SDMA_PKT_WRITE_UNTILED_DATA0_DATA0(x) (((x) & SDMA_PKT_WRITE_UNTILED_DATA0_data0_mask) << SDMA_PKT_WRITE_UNTILED_DATA0_data0_shift)
+
+
+/*
+** Definitions for SDMA_PKT_WRITE_TILED packet
+*/
+
+/*define for HEADER word*/
+/*define for op field*/
+#define SDMA_PKT_WRITE_TILED_HEADER_op_offset 0
+#define SDMA_PKT_WRITE_TILED_HEADER_op_mask 0x000000FF
+#define SDMA_PKT_WRITE_TILED_HEADER_op_shift 0
+#define SDMA_PKT_WRITE_TILED_HEADER_OP(x) (((x) & SDMA_PKT_WRITE_TILED_HEADER_op_mask) << SDMA_PKT_WRITE_TILED_HEADER_op_shift)
+
+/*define for sub_op field*/
+#define SDMA_PKT_WRITE_TILED_HEADER_sub_op_offset 0
+#define SDMA_PKT_WRITE_TILED_HEADER_sub_op_mask 0x000000FF
+#define SDMA_PKT_WRITE_TILED_HEADER_sub_op_shift 8
+#define SDMA_PKT_WRITE_TILED_HEADER_SUB_OP(x) (((x) & SDMA_PKT_WRITE_TILED_HEADER_sub_op_mask) << SDMA_PKT_WRITE_TILED_HEADER_sub_op_shift)
+
+/*define for encrypt field*/
+#define SDMA_PKT_WRITE_TILED_HEADER_encrypt_offset 0
+#define SDMA_PKT_WRITE_TILED_HEADER_encrypt_mask 0x00000001
+#define SDMA_PKT_WRITE_TILED_HEADER_encrypt_shift 16
+#define SDMA_PKT_WRITE_TILED_HEADER_ENCRYPT(x) (((x) & SDMA_PKT_WRITE_TILED_HEADER_encrypt_mask) << SDMA_PKT_WRITE_TILED_HEADER_encrypt_shift)
+
+/*define for tmz field*/
+#define SDMA_PKT_WRITE_TILED_HEADER_tmz_offset 0
+#define SDMA_PKT_WRITE_TILED_HEADER_tmz_mask 0x00000001
+#define SDMA_PKT_WRITE_TILED_HEADER_tmz_shift 18
+#define SDMA_PKT_WRITE_TILED_HEADER_TMZ(x) (((x) & SDMA_PKT_WRITE_TILED_HEADER_tmz_mask) << SDMA_PKT_WRITE_TILED_HEADER_tmz_shift)
+
+/*define for cpv field*/
+#define SDMA_PKT_WRITE_TILED_HEADER_cpv_offset 0
+#define SDMA_PKT_WRITE_TILED_HEADER_cpv_mask 0x00000001
+#define SDMA_PKT_WRITE_TILED_HEADER_cpv_shift 28
+#define SDMA_PKT_WRITE_TILED_HEADER_CPV(x) (((x) & SDMA_PKT_WRITE_TILED_HEADER_cpv_mask) << SDMA_PKT_WRITE_TILED_HEADER_cpv_shift)
+
+/*define for DST_ADDR_LO word*/
+/*define for dst_addr_31_0 field*/
+#define SDMA_PKT_WRITE_TILED_DST_ADDR_LO_dst_addr_31_0_offset 1
+#define SDMA_PKT_WRITE_TILED_DST_ADDR_LO_dst_addr_31_0_mask 0xFFFFFFFF
+#define SDMA_PKT_WRITE_TILED_DST_ADDR_LO_dst_addr_31_0_shift 0
+#define SDMA_PKT_WRITE_TILED_DST_ADDR_LO_DST_ADDR_31_0(x) (((x) & SDMA_PKT_WRITE_TILED_DST_ADDR_LO_dst_addr_31_0_mask) << SDMA_PKT_WRITE_TILED_DST_ADDR_LO_dst_addr_31_0_shift)
+
+/*define for DST_ADDR_HI word*/
+/*define for dst_addr_63_32 field*/
+#define SDMA_PKT_WRITE_TILED_DST_ADDR_HI_dst_addr_63_32_offset 2
+#define SDMA_PKT_WRITE_TILED_DST_ADDR_HI_dst_addr_63_32_mask 0xFFFFFFFF
+#define SDMA_PKT_WRITE_TILED_DST_ADDR_HI_dst_addr_63_32_shift 0
+#define SDMA_PKT_WRITE_TILED_DST_ADDR_HI_DST_ADDR_63_32(x) (((x) & SDMA_PKT_WRITE_TILED_DST_ADDR_HI_dst_addr_63_32_mask) << SDMA_PKT_WRITE_TILED_DST_ADDR_HI_dst_addr_63_32_shift)
+
+/*define for DW_3 word*/
+/*define for width field*/
+#define SDMA_PKT_WRITE_TILED_DW_3_width_offset 3
+#define SDMA_PKT_WRITE_TILED_DW_3_width_mask 0x00003FFF
+#define SDMA_PKT_WRITE_TILED_DW_3_width_shift 0
+#define SDMA_PKT_WRITE_TILED_DW_3_WIDTH(x) (((x) & SDMA_PKT_WRITE_TILED_DW_3_width_mask) << SDMA_PKT_WRITE_TILED_DW_3_width_shift)
+
+/*define for DW_4 word*/
+/*define for height field*/
+#define SDMA_PKT_WRITE_TILED_DW_4_height_offset 4
+#define SDMA_PKT_WRITE_TILED_DW_4_height_mask 0x00003FFF
+#define SDMA_PKT_WRITE_TILED_DW_4_height_shift 0
+#define SDMA_PKT_WRITE_TILED_DW_4_HEIGHT(x) (((x) & SDMA_PKT_WRITE_TILED_DW_4_height_mask) << SDMA_PKT_WRITE_TILED_DW_4_height_shift)
+
+/*define for depth field*/
+#define SDMA_PKT_WRITE_TILED_DW_4_depth_offset 4
+#define SDMA_PKT_WRITE_TILED_DW_4_depth_mask 0x00001FFF
+#define SDMA_PKT_WRITE_TILED_DW_4_depth_shift 16
+#define SDMA_PKT_WRITE_TILED_DW_4_DEPTH(x) (((x) & SDMA_PKT_WRITE_TILED_DW_4_depth_mask) << SDMA_PKT_WRITE_TILED_DW_4_depth_shift)
+
+/*define for DW_5 word*/
+/*define for element_size field*/
+#define SDMA_PKT_WRITE_TILED_DW_5_element_size_offset 5
+#define SDMA_PKT_WRITE_TILED_DW_5_element_size_mask 0x00000007
+#define SDMA_PKT_WRITE_TILED_DW_5_element_size_shift 0
+#define SDMA_PKT_WRITE_TILED_DW_5_ELEMENT_SIZE(x) (((x) & SDMA_PKT_WRITE_TILED_DW_5_element_size_mask) << SDMA_PKT_WRITE_TILED_DW_5_element_size_shift)
+
+/*define for swizzle_mode field*/
+#define SDMA_PKT_WRITE_TILED_DW_5_swizzle_mode_offset 5
+#define SDMA_PKT_WRITE_TILED_DW_5_swizzle_mode_mask 0x0000001F
+#define SDMA_PKT_WRITE_TILED_DW_5_swizzle_mode_shift 3
+#define SDMA_PKT_WRITE_TILED_DW_5_SWIZZLE_MODE(x) (((x) & SDMA_PKT_WRITE_TILED_DW_5_swizzle_mode_mask) << SDMA_PKT_WRITE_TILED_DW_5_swizzle_mode_shift)
+
+/*define for dimension field*/
+#define SDMA_PKT_WRITE_TILED_DW_5_dimension_offset 5
+#define SDMA_PKT_WRITE_TILED_DW_5_dimension_mask 0x00000003
+#define SDMA_PKT_WRITE_TILED_DW_5_dimension_shift 9
+#define SDMA_PKT_WRITE_TILED_DW_5_DIMENSION(x) (((x) & SDMA_PKT_WRITE_TILED_DW_5_dimension_mask) << SDMA_PKT_WRITE_TILED_DW_5_dimension_shift)
+
+/*define for mip_max field*/
+#define SDMA_PKT_WRITE_TILED_DW_5_mip_max_offset 5
+#define SDMA_PKT_WRITE_TILED_DW_5_mip_max_mask 0x0000000F
+#define SDMA_PKT_WRITE_TILED_DW_5_mip_max_shift 16
+#define SDMA_PKT_WRITE_TILED_DW_5_MIP_MAX(x) (((x) & SDMA_PKT_WRITE_TILED_DW_5_mip_max_mask) << SDMA_PKT_WRITE_TILED_DW_5_mip_max_shift)
+
+/*define for DW_6 word*/
+/*define for x field*/
+#define SDMA_PKT_WRITE_TILED_DW_6_x_offset 6
+#define SDMA_PKT_WRITE_TILED_DW_6_x_mask 0x00003FFF
+#define SDMA_PKT_WRITE_TILED_DW_6_x_shift 0
+#define SDMA_PKT_WRITE_TILED_DW_6_X(x) (((x) & SDMA_PKT_WRITE_TILED_DW_6_x_mask) << SDMA_PKT_WRITE_TILED_DW_6_x_shift)
+
+/*define for y field*/
+#define SDMA_PKT_WRITE_TILED_DW_6_y_offset 6
+#define SDMA_PKT_WRITE_TILED_DW_6_y_mask 0x00003FFF
+#define SDMA_PKT_WRITE_TILED_DW_6_y_shift 16
+#define SDMA_PKT_WRITE_TILED_DW_6_Y(x) (((x) & SDMA_PKT_WRITE_TILED_DW_6_y_mask) << SDMA_PKT_WRITE_TILED_DW_6_y_shift)
+
+/*define for DW_7 word*/
+/*define for z field*/
+#define SDMA_PKT_WRITE_TILED_DW_7_z_offset 7
+#define SDMA_PKT_WRITE_TILED_DW_7_z_mask 0x00001FFF
+#define SDMA_PKT_WRITE_TILED_DW_7_z_shift 0
+#define SDMA_PKT_WRITE_TILED_DW_7_Z(x) (((x) & SDMA_PKT_WRITE_TILED_DW_7_z_mask) << SDMA_PKT_WRITE_TILED_DW_7_z_shift)
+
+/*define for sw field*/
+#define SDMA_PKT_WRITE_TILED_DW_7_sw_offset 7
+#define SDMA_PKT_WRITE_TILED_DW_7_sw_mask 0x00000003
+#define SDMA_PKT_WRITE_TILED_DW_7_sw_shift 24
+#define SDMA_PKT_WRITE_TILED_DW_7_SW(x) (((x) & SDMA_PKT_WRITE_TILED_DW_7_sw_mask) << SDMA_PKT_WRITE_TILED_DW_7_sw_shift)
+
+/*define for cache_policy field*/
+#define SDMA_PKT_WRITE_TILED_DW_7_cache_policy_offset 7
+#define SDMA_PKT_WRITE_TILED_DW_7_cache_policy_mask 0x00000007
+#define SDMA_PKT_WRITE_TILED_DW_7_cache_policy_shift 26
+#define SDMA_PKT_WRITE_TILED_DW_7_CACHE_POLICY(x) (((x) & SDMA_PKT_WRITE_TILED_DW_7_cache_policy_mask) << SDMA_PKT_WRITE_TILED_DW_7_cache_policy_shift)
+
+/*define for COUNT word*/
+/*define for count field*/
+#define SDMA_PKT_WRITE_TILED_COUNT_count_offset 8
+#define SDMA_PKT_WRITE_TILED_COUNT_count_mask 0x000FFFFF
+#define SDMA_PKT_WRITE_TILED_COUNT_count_shift 0
+#define SDMA_PKT_WRITE_TILED_COUNT_COUNT(x) (((x) & SDMA_PKT_WRITE_TILED_COUNT_count_mask) << SDMA_PKT_WRITE_TILED_COUNT_count_shift)
+
+/*define for DATA0 word*/
+/*define for data0 field*/
+#define SDMA_PKT_WRITE_TILED_DATA0_data0_offset 9
+#define SDMA_PKT_WRITE_TILED_DATA0_data0_mask 0xFFFFFFFF
+#define SDMA_PKT_WRITE_TILED_DATA0_data0_shift 0
+#define SDMA_PKT_WRITE_TILED_DATA0_DATA0(x) (((x) & SDMA_PKT_WRITE_TILED_DATA0_data0_mask) << SDMA_PKT_WRITE_TILED_DATA0_data0_shift)
+
+
+/*
+** Definitions for SDMA_PKT_WRITE_TILED_BC packet
+*/
+
+/*define for HEADER word*/
+/*define for op field*/
+#define SDMA_PKT_WRITE_TILED_BC_HEADER_op_offset 0
+#define SDMA_PKT_WRITE_TILED_BC_HEADER_op_mask 0x000000FF
+#define SDMA_PKT_WRITE_TILED_BC_HEADER_op_shift 0
+#define SDMA_PKT_WRITE_TILED_BC_HEADER_OP(x) (((x) & SDMA_PKT_WRITE_TILED_BC_HEADER_op_mask) << SDMA_PKT_WRITE_TILED_BC_HEADER_op_shift)
+
+/*define for sub_op field*/
+#define SDMA_PKT_WRITE_TILED_BC_HEADER_sub_op_offset 0
+#define SDMA_PKT_WRITE_TILED_BC_HEADER_sub_op_mask 0x000000FF
+#define SDMA_PKT_WRITE_TILED_BC_HEADER_sub_op_shift 8
+#define SDMA_PKT_WRITE_TILED_BC_HEADER_SUB_OP(x) (((x) & SDMA_PKT_WRITE_TILED_BC_HEADER_sub_op_mask) << SDMA_PKT_WRITE_TILED_BC_HEADER_sub_op_shift)
+
+/*define for DST_ADDR_LO word*/
+/*define for dst_addr_31_0 field*/
+#define SDMA_PKT_WRITE_TILED_BC_DST_ADDR_LO_dst_addr_31_0_offset 1
+#define SDMA_PKT_WRITE_TILED_BC_DST_ADDR_LO_dst_addr_31_0_mask 0xFFFFFFFF
+#define SDMA_PKT_WRITE_TILED_BC_DST_ADDR_LO_dst_addr_31_0_shift 0
+#define SDMA_PKT_WRITE_TILED_BC_DST_ADDR_LO_DST_ADDR_31_0(x) (((x) & SDMA_PKT_WRITE_TILED_BC_DST_ADDR_LO_dst_addr_31_0_mask) << SDMA_PKT_WRITE_TILED_BC_DST_ADDR_LO_dst_addr_31_0_shift)
+
+/*define for DST_ADDR_HI word*/
+/*define for dst_addr_63_32 field*/
+#define SDMA_PKT_WRITE_TILED_BC_DST_ADDR_HI_dst_addr_63_32_offset 2
+#define SDMA_PKT_WRITE_TILED_BC_DST_ADDR_HI_dst_addr_63_32_mask 0xFFFFFFFF
+#define SDMA_PKT_WRITE_TILED_BC_DST_ADDR_HI_dst_addr_63_32_shift 0
+#define SDMA_PKT_WRITE_TILED_BC_DST_ADDR_HI_DST_ADDR_63_32(x) (((x) & SDMA_PKT_WRITE_TILED_BC_DST_ADDR_HI_dst_addr_63_32_mask) << SDMA_PKT_WRITE_TILED_BC_DST_ADDR_HI_dst_addr_63_32_shift)
+
+/*define for DW_3 word*/
+/*define for width field*/
+#define SDMA_PKT_WRITE_TILED_BC_DW_3_width_offset 3
+#define SDMA_PKT_WRITE_TILED_BC_DW_3_width_mask 0x00003FFF
+#define SDMA_PKT_WRITE_TILED_BC_DW_3_width_shift 0
+#define SDMA_PKT_WRITE_TILED_BC_DW_3_WIDTH(x) (((x) & SDMA_PKT_WRITE_TILED_BC_DW_3_width_mask) << SDMA_PKT_WRITE_TILED_BC_DW_3_width_shift)
+
+/*define for DW_4 word*/
+/*define for height field*/
+#define SDMA_PKT_WRITE_TILED_BC_DW_4_height_offset 4
+#define SDMA_PKT_WRITE_TILED_BC_DW_4_height_mask 0x00003FFF
+#define SDMA_PKT_WRITE_TILED_BC_DW_4_height_shift 0
+#define SDMA_PKT_WRITE_TILED_BC_DW_4_HEIGHT(x) (((x) & SDMA_PKT_WRITE_TILED_BC_DW_4_height_mask) << SDMA_PKT_WRITE_TILED_BC_DW_4_height_shift)
+
+/*define for depth field*/
+#define SDMA_PKT_WRITE_TILED_BC_DW_4_depth_offset 4
+#define SDMA_PKT_WRITE_TILED_BC_DW_4_depth_mask 0x000007FF
+#define SDMA_PKT_WRITE_TILED_BC_DW_4_depth_shift 16
+#define SDMA_PKT_WRITE_TILED_BC_DW_4_DEPTH(x) (((x) & SDMA_PKT_WRITE_TILED_BC_DW_4_depth_mask) << SDMA_PKT_WRITE_TILED_BC_DW_4_depth_shift)
+
+/*define for DW_5 word*/
+/*define for element_size field*/
+#define SDMA_PKT_WRITE_TILED_BC_DW_5_element_size_offset 5
+#define SDMA_PKT_WRITE_TILED_BC_DW_5_element_size_mask 0x00000007
+#define SDMA_PKT_WRITE_TILED_BC_DW_5_element_size_shift 0
+#define SDMA_PKT_WRITE_TILED_BC_DW_5_ELEMENT_SIZE(x) (((x) & SDMA_PKT_WRITE_TILED_BC_DW_5_element_size_mask) << SDMA_PKT_WRITE_TILED_BC_DW_5_element_size_shift)
+
+/*define for array_mode field*/
+#define SDMA_PKT_WRITE_TILED_BC_DW_5_array_mode_offset 5
+#define SDMA_PKT_WRITE_TILED_BC_DW_5_array_mode_mask 0x0000000F
+#define SDMA_PKT_WRITE_TILED_BC_DW_5_array_mode_shift 3
+#define SDMA_PKT_WRITE_TILED_BC_DW_5_ARRAY_MODE(x) (((x) & SDMA_PKT_WRITE_TILED_BC_DW_5_array_mode_mask) << SDMA_PKT_WRITE_TILED_BC_DW_5_array_mode_shift)
+
+/*define for mit_mode field*/
+#define SDMA_PKT_WRITE_TILED_BC_DW_5_mit_mode_offset 5
+#define SDMA_PKT_WRITE_TILED_BC_DW_5_mit_mode_mask 0x00000007
+#define SDMA_PKT_WRITE_TILED_BC_DW_5_mit_mode_shift 8
+#define SDMA_PKT_WRITE_TILED_BC_DW_5_MIT_MODE(x) (((x) & SDMA_PKT_WRITE_TILED_BC_DW_5_mit_mode_mask) << SDMA_PKT_WRITE_TILED_BC_DW_5_mit_mode_shift)
+
+/*define for tilesplit_size field*/
+#define SDMA_PKT_WRITE_TILED_BC_DW_5_tilesplit_size_offset 5
+#define SDMA_PKT_WRITE_TILED_BC_DW_5_tilesplit_size_mask 0x00000007
+#define SDMA_PKT_WRITE_TILED_BC_DW_5_tilesplit_size_shift 11
+#define SDMA_PKT_WRITE_TILED_BC_DW_5_TILESPLIT_SIZE(x) (((x) & SDMA_PKT_WRITE_TILED_BC_DW_5_tilesplit_size_mask) << SDMA_PKT_WRITE_TILED_BC_DW_5_tilesplit_size_shift)
+
+/*define for bank_w field*/
+#define SDMA_PKT_WRITE_TILED_BC_DW_5_bank_w_offset 5
+#define SDMA_PKT_WRITE_TILED_BC_DW_5_bank_w_mask 0x00000003
+#define SDMA_PKT_WRITE_TILED_BC_DW_5_bank_w_shift 15
+#define SDMA_PKT_WRITE_TILED_BC_DW_5_BANK_W(x) (((x) & SDMA_PKT_WRITE_TILED_BC_DW_5_bank_w_mask) << SDMA_PKT_WRITE_TILED_BC_DW_5_bank_w_shift)
+
+/*define for bank_h field*/
+#define SDMA_PKT_WRITE_TILED_BC_DW_5_bank_h_offset 5
+#define SDMA_PKT_WRITE_TILED_BC_DW_5_bank_h_mask 0x00000003
+#define SDMA_PKT_WRITE_TILED_BC_DW_5_bank_h_shift 18
+#define SDMA_PKT_WRITE_TILED_BC_DW_5_BANK_H(x) (((x) & SDMA_PKT_WRITE_TILED_BC_DW_5_bank_h_mask) << SDMA_PKT_WRITE_TILED_BC_DW_5_bank_h_shift)
+
+/*define for num_bank field*/
+#define SDMA_PKT_WRITE_TILED_BC_DW_5_num_bank_offset 5
+#define SDMA_PKT_WRITE_TILED_BC_DW_5_num_bank_mask 0x00000003
+#define SDMA_PKT_WRITE_TILED_BC_DW_5_num_bank_shift 21
+#define SDMA_PKT_WRITE_TILED_BC_DW_5_NUM_BANK(x) (((x) & SDMA_PKT_WRITE_TILED_BC_DW_5_num_bank_mask) << SDMA_PKT_WRITE_TILED_BC_DW_5_num_bank_shift)
+
+/*define for mat_aspt field*/
+#define SDMA_PKT_WRITE_TILED_BC_DW_5_mat_aspt_offset 5
+#define SDMA_PKT_WRITE_TILED_BC_DW_5_mat_aspt_mask 0x00000003
+#define SDMA_PKT_WRITE_TILED_BC_DW_5_mat_aspt_shift 24
+#define SDMA_PKT_WRITE_TILED_BC_DW_5_MAT_ASPT(x) (((x) & SDMA_PKT_WRITE_TILED_BC_DW_5_mat_aspt_mask) << SDMA_PKT_WRITE_TILED_BC_DW_5_mat_aspt_shift)
+
+/*define for pipe_config field*/
+#define SDMA_PKT_WRITE_TILED_BC_DW_5_pipe_config_offset 5
+#define SDMA_PKT_WRITE_TILED_BC_DW_5_pipe_config_mask 0x0000001F
+#define SDMA_PKT_WRITE_TILED_BC_DW_5_pipe_config_shift 26
+#define SDMA_PKT_WRITE_TILED_BC_DW_5_PIPE_CONFIG(x) (((x) & SDMA_PKT_WRITE_TILED_BC_DW_5_pipe_config_mask) << SDMA_PKT_WRITE_TILED_BC_DW_5_pipe_config_shift)
+
+/*define for DW_6 word*/
+/*define for x field*/
+#define SDMA_PKT_WRITE_TILED_BC_DW_6_x_offset 6
+#define SDMA_PKT_WRITE_TILED_BC_DW_6_x_mask 0x00003FFF
+#define SDMA_PKT_WRITE_TILED_BC_DW_6_x_shift 0
+#define SDMA_PKT_WRITE_TILED_BC_DW_6_X(x) (((x) & SDMA_PKT_WRITE_TILED_BC_DW_6_x_mask) << SDMA_PKT_WRITE_TILED_BC_DW_6_x_shift)
+
+/*define for y field*/
+#define SDMA_PKT_WRITE_TILED_BC_DW_6_y_offset 6
+#define SDMA_PKT_WRITE_TILED_BC_DW_6_y_mask 0x00003FFF
+#define SDMA_PKT_WRITE_TILED_BC_DW_6_y_shift 16
+#define SDMA_PKT_WRITE_TILED_BC_DW_6_Y(x) (((x) & SDMA_PKT_WRITE_TILED_BC_DW_6_y_mask) << SDMA_PKT_WRITE_TILED_BC_DW_6_y_shift)
+
+/*define for DW_7 word*/
+/*define for z field*/
+#define SDMA_PKT_WRITE_TILED_BC_DW_7_z_offset 7
+#define SDMA_PKT_WRITE_TILED_BC_DW_7_z_mask 0x000007FF
+#define SDMA_PKT_WRITE_TILED_BC_DW_7_z_shift 0
+#define SDMA_PKT_WRITE_TILED_BC_DW_7_Z(x) (((x) & SDMA_PKT_WRITE_TILED_BC_DW_7_z_mask) << SDMA_PKT_WRITE_TILED_BC_DW_7_z_shift)
+
+/*define for sw field*/
+#define SDMA_PKT_WRITE_TILED_BC_DW_7_sw_offset 7
+#define SDMA_PKT_WRITE_TILED_BC_DW_7_sw_mask 0x00000003
+#define SDMA_PKT_WRITE_TILED_BC_DW_7_sw_shift 24
+#define SDMA_PKT_WRITE_TILED_BC_DW_7_SW(x) (((x) & SDMA_PKT_WRITE_TILED_BC_DW_7_sw_mask) << SDMA_PKT_WRITE_TILED_BC_DW_7_sw_shift)
+
+/*define for COUNT word*/
+/*define for count field*/
+#define SDMA_PKT_WRITE_TILED_BC_COUNT_count_offset 8
+#define SDMA_PKT_WRITE_TILED_BC_COUNT_count_mask 0x000FFFFF
+#define SDMA_PKT_WRITE_TILED_BC_COUNT_count_shift 2
+#define SDMA_PKT_WRITE_TILED_BC_COUNT_COUNT(x) (((x) & SDMA_PKT_WRITE_TILED_BC_COUNT_count_mask) << SDMA_PKT_WRITE_TILED_BC_COUNT_count_shift)
+
+/*define for DATA0 word*/
+/*define for data0 field*/
+#define SDMA_PKT_WRITE_TILED_BC_DATA0_data0_offset 9
+#define SDMA_PKT_WRITE_TILED_BC_DATA0_data0_mask 0xFFFFFFFF
+#define SDMA_PKT_WRITE_TILED_BC_DATA0_data0_shift 0
+#define SDMA_PKT_WRITE_TILED_BC_DATA0_DATA0(x) (((x) & SDMA_PKT_WRITE_TILED_BC_DATA0_data0_mask) << SDMA_PKT_WRITE_TILED_BC_DATA0_data0_shift)
+
+
+/*
+** Definitions for SDMA_PKT_PTEPDE_COPY packet
+*/
+
+/*define for HEADER word*/
+/*define for op field*/
+#define SDMA_PKT_PTEPDE_COPY_HEADER_op_offset 0
+#define SDMA_PKT_PTEPDE_COPY_HEADER_op_mask 0x000000FF
+#define SDMA_PKT_PTEPDE_COPY_HEADER_op_shift 0
+#define SDMA_PKT_PTEPDE_COPY_HEADER_OP(x) (((x) & SDMA_PKT_PTEPDE_COPY_HEADER_op_mask) << SDMA_PKT_PTEPDE_COPY_HEADER_op_shift)
+
+/*define for sub_op field*/
+#define SDMA_PKT_PTEPDE_COPY_HEADER_sub_op_offset 0
+#define SDMA_PKT_PTEPDE_COPY_HEADER_sub_op_mask 0x000000FF
+#define SDMA_PKT_PTEPDE_COPY_HEADER_sub_op_shift 8
+#define SDMA_PKT_PTEPDE_COPY_HEADER_SUB_OP(x) (((x) & SDMA_PKT_PTEPDE_COPY_HEADER_sub_op_mask) << SDMA_PKT_PTEPDE_COPY_HEADER_sub_op_shift)
+
+/*define for tmz field*/
+#define SDMA_PKT_PTEPDE_COPY_HEADER_tmz_offset 0
+#define SDMA_PKT_PTEPDE_COPY_HEADER_tmz_mask 0x00000001
+#define SDMA_PKT_PTEPDE_COPY_HEADER_tmz_shift 18
+#define SDMA_PKT_PTEPDE_COPY_HEADER_TMZ(x) (((x) & SDMA_PKT_PTEPDE_COPY_HEADER_tmz_mask) << SDMA_PKT_PTEPDE_COPY_HEADER_tmz_shift)
+
+/*define for cpv field*/
+#define SDMA_PKT_PTEPDE_COPY_HEADER_cpv_offset 0
+#define SDMA_PKT_PTEPDE_COPY_HEADER_cpv_mask 0x00000001
+#define SDMA_PKT_PTEPDE_COPY_HEADER_cpv_shift 28
+#define SDMA_PKT_PTEPDE_COPY_HEADER_CPV(x) (((x) & SDMA_PKT_PTEPDE_COPY_HEADER_cpv_mask) << SDMA_PKT_PTEPDE_COPY_HEADER_cpv_shift)
+
+/*define for ptepde_op field*/
+#define SDMA_PKT_PTEPDE_COPY_HEADER_ptepde_op_offset 0
+#define SDMA_PKT_PTEPDE_COPY_HEADER_ptepde_op_mask 0x00000001
+#define SDMA_PKT_PTEPDE_COPY_HEADER_ptepde_op_shift 31
+#define SDMA_PKT_PTEPDE_COPY_HEADER_PTEPDE_OP(x) (((x) & SDMA_PKT_PTEPDE_COPY_HEADER_ptepde_op_mask) << SDMA_PKT_PTEPDE_COPY_HEADER_ptepde_op_shift)
+
+/*define for SRC_ADDR_LO word*/
+/*define for src_addr_31_0 field*/
+#define SDMA_PKT_PTEPDE_COPY_SRC_ADDR_LO_src_addr_31_0_offset 1
+#define SDMA_PKT_PTEPDE_COPY_SRC_ADDR_LO_src_addr_31_0_mask 0xFFFFFFFF
+#define SDMA_PKT_PTEPDE_COPY_SRC_ADDR_LO_src_addr_31_0_shift 0
+#define SDMA_PKT_PTEPDE_COPY_SRC_ADDR_LO_SRC_ADDR_31_0(x) (((x) & SDMA_PKT_PTEPDE_COPY_SRC_ADDR_LO_src_addr_31_0_mask) << SDMA_PKT_PTEPDE_COPY_SRC_ADDR_LO_src_addr_31_0_shift)
+
+/*define for SRC_ADDR_HI word*/
+/*define for src_addr_63_32 field*/
+#define SDMA_PKT_PTEPDE_COPY_SRC_ADDR_HI_src_addr_63_32_offset 2
+#define SDMA_PKT_PTEPDE_COPY_SRC_ADDR_HI_src_addr_63_32_mask 0xFFFFFFFF
+#define SDMA_PKT_PTEPDE_COPY_SRC_ADDR_HI_src_addr_63_32_shift 0
+#define SDMA_PKT_PTEPDE_COPY_SRC_ADDR_HI_SRC_ADDR_63_32(x) (((x) & SDMA_PKT_PTEPDE_COPY_SRC_ADDR_HI_src_addr_63_32_mask) << SDMA_PKT_PTEPDE_COPY_SRC_ADDR_HI_src_addr_63_32_shift)
+
+/*define for DST_ADDR_LO word*/
+/*define for dst_addr_31_0 field*/
+#define SDMA_PKT_PTEPDE_COPY_DST_ADDR_LO_dst_addr_31_0_offset 3
+#define SDMA_PKT_PTEPDE_COPY_DST_ADDR_LO_dst_addr_31_0_mask 0xFFFFFFFF
+#define SDMA_PKT_PTEPDE_COPY_DST_ADDR_LO_dst_addr_31_0_shift 0
+#define SDMA_PKT_PTEPDE_COPY_DST_ADDR_LO_DST_ADDR_31_0(x) (((x) & SDMA_PKT_PTEPDE_COPY_DST_ADDR_LO_dst_addr_31_0_mask) << SDMA_PKT_PTEPDE_COPY_DST_ADDR_LO_dst_addr_31_0_shift)
+
+/*define for DST_ADDR_HI word*/
+/*define for dst_addr_63_32 field*/
+#define SDMA_PKT_PTEPDE_COPY_DST_ADDR_HI_dst_addr_63_32_offset 4
+#define SDMA_PKT_PTEPDE_COPY_DST_ADDR_HI_dst_addr_63_32_mask 0xFFFFFFFF
+#define SDMA_PKT_PTEPDE_COPY_DST_ADDR_HI_dst_addr_63_32_shift 0
+#define SDMA_PKT_PTEPDE_COPY_DST_ADDR_HI_DST_ADDR_63_32(x) (((x) & SDMA_PKT_PTEPDE_COPY_DST_ADDR_HI_dst_addr_63_32_mask) << SDMA_PKT_PTEPDE_COPY_DST_ADDR_HI_dst_addr_63_32_shift)
+
+/*define for MASK_DW0 word*/
+/*define for mask_dw0 field*/
+#define SDMA_PKT_PTEPDE_COPY_MASK_DW0_mask_dw0_offset 5
+#define SDMA_PKT_PTEPDE_COPY_MASK_DW0_mask_dw0_mask 0xFFFFFFFF
+#define SDMA_PKT_PTEPDE_COPY_MASK_DW0_mask_dw0_shift 0
+#define SDMA_PKT_PTEPDE_COPY_MASK_DW0_MASK_DW0(x) (((x) & SDMA_PKT_PTEPDE_COPY_MASK_DW0_mask_dw0_mask) << SDMA_PKT_PTEPDE_COPY_MASK_DW0_mask_dw0_shift)
+
+/*define for MASK_DW1 word*/
+/*define for mask_dw1 field*/
+#define SDMA_PKT_PTEPDE_COPY_MASK_DW1_mask_dw1_offset 6
+#define SDMA_PKT_PTEPDE_COPY_MASK_DW1_mask_dw1_mask 0xFFFFFFFF
+#define SDMA_PKT_PTEPDE_COPY_MASK_DW1_mask_dw1_shift 0
+#define SDMA_PKT_PTEPDE_COPY_MASK_DW1_MASK_DW1(x) (((x) & SDMA_PKT_PTEPDE_COPY_MASK_DW1_mask_dw1_mask) << SDMA_PKT_PTEPDE_COPY_MASK_DW1_mask_dw1_shift)
+
+/*define for COUNT word*/
+/*define for count field*/
+#define SDMA_PKT_PTEPDE_COPY_COUNT_count_offset 7
+#define SDMA_PKT_PTEPDE_COPY_COUNT_count_mask 0x0007FFFF
+#define SDMA_PKT_PTEPDE_COPY_COUNT_count_shift 0
+#define SDMA_PKT_PTEPDE_COPY_COUNT_COUNT(x) (((x) & SDMA_PKT_PTEPDE_COPY_COUNT_count_mask) << SDMA_PKT_PTEPDE_COPY_COUNT_count_shift)
+
+/*define for dst_cache_policy field*/
+#define SDMA_PKT_PTEPDE_COPY_COUNT_dst_cache_policy_offset 7
+#define SDMA_PKT_PTEPDE_COPY_COUNT_dst_cache_policy_mask 0x00000007
+#define SDMA_PKT_PTEPDE_COPY_COUNT_dst_cache_policy_shift 22
+#define SDMA_PKT_PTEPDE_COPY_COUNT_DST_CACHE_POLICY(x) (((x) & SDMA_PKT_PTEPDE_COPY_COUNT_dst_cache_policy_mask) << SDMA_PKT_PTEPDE_COPY_COUNT_dst_cache_policy_shift)
+
+/*define for src_cache_policy field*/
+#define SDMA_PKT_PTEPDE_COPY_COUNT_src_cache_policy_offset 7
+#define SDMA_PKT_PTEPDE_COPY_COUNT_src_cache_policy_mask 0x00000007
+#define SDMA_PKT_PTEPDE_COPY_COUNT_src_cache_policy_shift 29
+#define SDMA_PKT_PTEPDE_COPY_COUNT_SRC_CACHE_POLICY(x) (((x) & SDMA_PKT_PTEPDE_COPY_COUNT_src_cache_policy_mask) << SDMA_PKT_PTEPDE_COPY_COUNT_src_cache_policy_shift)
+
+
+/*
+** Definitions for SDMA_PKT_PTEPDE_COPY_BACKWARDS packet
+*/
+
+/*define for HEADER word*/
+/*define for op field*/
+#define SDMA_PKT_PTEPDE_COPY_BACKWARDS_HEADER_op_offset 0
+#define SDMA_PKT_PTEPDE_COPY_BACKWARDS_HEADER_op_mask 0x000000FF
+#define SDMA_PKT_PTEPDE_COPY_BACKWARDS_HEADER_op_shift 0
+#define SDMA_PKT_PTEPDE_COPY_BACKWARDS_HEADER_OP(x) (((x) & SDMA_PKT_PTEPDE_COPY_BACKWARDS_HEADER_op_mask) << SDMA_PKT_PTEPDE_COPY_BACKWARDS_HEADER_op_shift)
+
+/*define for sub_op field*/
+#define SDMA_PKT_PTEPDE_COPY_BACKWARDS_HEADER_sub_op_offset 0
+#define SDMA_PKT_PTEPDE_COPY_BACKWARDS_HEADER_sub_op_mask 0x000000FF
+#define SDMA_PKT_PTEPDE_COPY_BACKWARDS_HEADER_sub_op_shift 8
+#define SDMA_PKT_PTEPDE_COPY_BACKWARDS_HEADER_SUB_OP(x) (((x) & SDMA_PKT_PTEPDE_COPY_BACKWARDS_HEADER_sub_op_mask) << SDMA_PKT_PTEPDE_COPY_BACKWARDS_HEADER_sub_op_shift)
+
+/*define for pte_size field*/
+#define SDMA_PKT_PTEPDE_COPY_BACKWARDS_HEADER_pte_size_offset 0
+#define SDMA_PKT_PTEPDE_COPY_BACKWARDS_HEADER_pte_size_mask 0x00000003
+#define SDMA_PKT_PTEPDE_COPY_BACKWARDS_HEADER_pte_size_shift 28
+#define SDMA_PKT_PTEPDE_COPY_BACKWARDS_HEADER_PTE_SIZE(x) (((x) & SDMA_PKT_PTEPDE_COPY_BACKWARDS_HEADER_pte_size_mask) << SDMA_PKT_PTEPDE_COPY_BACKWARDS_HEADER_pte_size_shift)
+
+/*define for direction field*/
+#define SDMA_PKT_PTEPDE_COPY_BACKWARDS_HEADER_direction_offset 0
+#define SDMA_PKT_PTEPDE_COPY_BACKWARDS_HEADER_direction_mask 0x00000001
+#define SDMA_PKT_PTEPDE_COPY_BACKWARDS_HEADER_direction_shift 30
+#define SDMA_PKT_PTEPDE_COPY_BACKWARDS_HEADER_DIRECTION(x) (((x) & SDMA_PKT_PTEPDE_COPY_BACKWARDS_HEADER_direction_mask) << SDMA_PKT_PTEPDE_COPY_BACKWARDS_HEADER_direction_shift)
+
+/*define for ptepde_op field*/
+#define SDMA_PKT_PTEPDE_COPY_BACKWARDS_HEADER_ptepde_op_offset 0
+#define SDMA_PKT_PTEPDE_COPY_BACKWARDS_HEADER_ptepde_op_mask 0x00000001
+#define SDMA_PKT_PTEPDE_COPY_BACKWARDS_HEADER_ptepde_op_shift 31
+#define SDMA_PKT_PTEPDE_COPY_BACKWARDS_HEADER_PTEPDE_OP(x) (((x) & SDMA_PKT_PTEPDE_COPY_BACKWARDS_HEADER_ptepde_op_mask) << SDMA_PKT_PTEPDE_COPY_BACKWARDS_HEADER_ptepde_op_shift)
+
+/*define for SRC_ADDR_LO word*/
+/*define for src_addr_31_0 field*/
+#define SDMA_PKT_PTEPDE_COPY_BACKWARDS_SRC_ADDR_LO_src_addr_31_0_offset 1
+#define SDMA_PKT_PTEPDE_COPY_BACKWARDS_SRC_ADDR_LO_src_addr_31_0_mask 0xFFFFFFFF
+#define SDMA_PKT_PTEPDE_COPY_BACKWARDS_SRC_ADDR_LO_src_addr_31_0_shift 0
+#define SDMA_PKT_PTEPDE_COPY_BACKWARDS_SRC_ADDR_LO_SRC_ADDR_31_0(x) (((x) & SDMA_PKT_PTEPDE_COPY_BACKWARDS_SRC_ADDR_LO_src_addr_31_0_mask) << SDMA_PKT_PTEPDE_COPY_BACKWARDS_SRC_ADDR_LO_src_addr_31_0_shift)
+
+/*define for SRC_ADDR_HI word*/
+/*define for src_addr_63_32 field*/
+#define SDMA_PKT_PTEPDE_COPY_BACKWARDS_SRC_ADDR_HI_src_addr_63_32_offset 2
+#define SDMA_PKT_PTEPDE_COPY_BACKWARDS_SRC_ADDR_HI_src_addr_63_32_mask 0xFFFFFFFF
+#define SDMA_PKT_PTEPDE_COPY_BACKWARDS_SRC_ADDR_HI_src_addr_63_32_shift 0
+#define SDMA_PKT_PTEPDE_COPY_BACKWARDS_SRC_ADDR_HI_SRC_ADDR_63_32(x) (((x) & SDMA_PKT_PTEPDE_COPY_BACKWARDS_SRC_ADDR_HI_src_addr_63_32_mask) << SDMA_PKT_PTEPDE_COPY_BACKWARDS_SRC_ADDR_HI_src_addr_63_32_shift)
+
+/*define for DST_ADDR_LO word*/
+/*define for dst_addr_31_0 field*/
+#define SDMA_PKT_PTEPDE_COPY_BACKWARDS_DST_ADDR_LO_dst_addr_31_0_offset 3
+#define SDMA_PKT_PTEPDE_COPY_BACKWARDS_DST_ADDR_LO_dst_addr_31_0_mask 0xFFFFFFFF
+#define SDMA_PKT_PTEPDE_COPY_BACKWARDS_DST_ADDR_LO_dst_addr_31_0_shift 0
+#define SDMA_PKT_PTEPDE_COPY_BACKWARDS_DST_ADDR_LO_DST_ADDR_31_0(x) (((x) & SDMA_PKT_PTEPDE_COPY_BACKWARDS_DST_ADDR_LO_dst_addr_31_0_mask) << SDMA_PKT_PTEPDE_COPY_BACKWARDS_DST_ADDR_LO_dst_addr_31_0_shift)
+
+/*define for DST_ADDR_HI word*/
+/*define for dst_addr_63_32 field*/
+#define SDMA_PKT_PTEPDE_COPY_BACKWARDS_DST_ADDR_HI_dst_addr_63_32_offset 4
+#define SDMA_PKT_PTEPDE_COPY_BACKWARDS_DST_ADDR_HI_dst_addr_63_32_mask 0xFFFFFFFF
+#define SDMA_PKT_PTEPDE_COPY_BACKWARDS_DST_ADDR_HI_dst_addr_63_32_shift 0
+#define SDMA_PKT_PTEPDE_COPY_BACKWARDS_DST_ADDR_HI_DST_ADDR_63_32(x) (((x) & SDMA_PKT_PTEPDE_COPY_BACKWARDS_DST_ADDR_HI_dst_addr_63_32_mask) << SDMA_PKT_PTEPDE_COPY_BACKWARDS_DST_ADDR_HI_dst_addr_63_32_shift)
+
+/*define for MASK_BIT_FOR_DW word*/
+/*define for mask_first_xfer field*/
+#define SDMA_PKT_PTEPDE_COPY_BACKWARDS_MASK_BIT_FOR_DW_mask_first_xfer_offset 5
+#define SDMA_PKT_PTEPDE_COPY_BACKWARDS_MASK_BIT_FOR_DW_mask_first_xfer_mask 0x000000FF
+#define SDMA_PKT_PTEPDE_COPY_BACKWARDS_MASK_BIT_FOR_DW_mask_first_xfer_shift 0
+#define SDMA_PKT_PTEPDE_COPY_BACKWARDS_MASK_BIT_FOR_DW_MASK_FIRST_XFER(x) (((x) & SDMA_PKT_PTEPDE_COPY_BACKWARDS_MASK_BIT_FOR_DW_mask_first_xfer_mask) << SDMA_PKT_PTEPDE_COPY_BACKWARDS_MASK_BIT_FOR_DW_mask_first_xfer_shift)
+
+/*define for mask_last_xfer field*/
+#define SDMA_PKT_PTEPDE_COPY_BACKWARDS_MASK_BIT_FOR_DW_mask_last_xfer_offset 5
+#define SDMA_PKT_PTEPDE_COPY_BACKWARDS_MASK_BIT_FOR_DW_mask_last_xfer_mask 0x000000FF
+#define SDMA_PKT_PTEPDE_COPY_BACKWARDS_MASK_BIT_FOR_DW_mask_last_xfer_shift 8
+#define SDMA_PKT_PTEPDE_COPY_BACKWARDS_MASK_BIT_FOR_DW_MASK_LAST_XFER(x) (((x) & SDMA_PKT_PTEPDE_COPY_BACKWARDS_MASK_BIT_FOR_DW_mask_last_xfer_mask) << SDMA_PKT_PTEPDE_COPY_BACKWARDS_MASK_BIT_FOR_DW_mask_last_xfer_shift)
+
+/*define for COUNT_IN_32B_XFER word*/
+/*define for count field*/
+#define SDMA_PKT_PTEPDE_COPY_BACKWARDS_COUNT_IN_32B_XFER_count_offset 6
+#define SDMA_PKT_PTEPDE_COPY_BACKWARDS_COUNT_IN_32B_XFER_count_mask 0x0001FFFF
+#define SDMA_PKT_PTEPDE_COPY_BACKWARDS_COUNT_IN_32B_XFER_count_shift 0
+#define SDMA_PKT_PTEPDE_COPY_BACKWARDS_COUNT_IN_32B_XFER_COUNT(x) (((x) & SDMA_PKT_PTEPDE_COPY_BACKWARDS_COUNT_IN_32B_XFER_count_mask) << SDMA_PKT_PTEPDE_COPY_BACKWARDS_COUNT_IN_32B_XFER_count_shift)
+
+
+/*
+** Definitions for SDMA_PKT_PTEPDE_RMW packet
+*/
+
+/*define for HEADER word*/
+/*define for op field*/
+#define SDMA_PKT_PTEPDE_RMW_HEADER_op_offset 0
+#define SDMA_PKT_PTEPDE_RMW_HEADER_op_mask 0x000000FF
+#define SDMA_PKT_PTEPDE_RMW_HEADER_op_shift 0
+#define SDMA_PKT_PTEPDE_RMW_HEADER_OP(x) (((x) & SDMA_PKT_PTEPDE_RMW_HEADER_op_mask) << SDMA_PKT_PTEPDE_RMW_HEADER_op_shift)
+
+/*define for sub_op field*/
+#define SDMA_PKT_PTEPDE_RMW_HEADER_sub_op_offset 0
+#define SDMA_PKT_PTEPDE_RMW_HEADER_sub_op_mask 0x000000FF
+#define SDMA_PKT_PTEPDE_RMW_HEADER_sub_op_shift 8
+#define SDMA_PKT_PTEPDE_RMW_HEADER_SUB_OP(x) (((x) & SDMA_PKT_PTEPDE_RMW_HEADER_sub_op_mask) << SDMA_PKT_PTEPDE_RMW_HEADER_sub_op_shift)
+
+/*define for mtype field*/
+#define SDMA_PKT_PTEPDE_RMW_HEADER_mtype_offset 0
+#define SDMA_PKT_PTEPDE_RMW_HEADER_mtype_mask 0x00000007
+#define SDMA_PKT_PTEPDE_RMW_HEADER_mtype_shift 16
+#define SDMA_PKT_PTEPDE_RMW_HEADER_MTYPE(x) (((x) & SDMA_PKT_PTEPDE_RMW_HEADER_mtype_mask) << SDMA_PKT_PTEPDE_RMW_HEADER_mtype_shift)
+
+/*define for gcc field*/
+#define SDMA_PKT_PTEPDE_RMW_HEADER_gcc_offset 0
+#define SDMA_PKT_PTEPDE_RMW_HEADER_gcc_mask 0x00000001
+#define SDMA_PKT_PTEPDE_RMW_HEADER_gcc_shift 19
+#define SDMA_PKT_PTEPDE_RMW_HEADER_GCC(x) (((x) & SDMA_PKT_PTEPDE_RMW_HEADER_gcc_mask) << SDMA_PKT_PTEPDE_RMW_HEADER_gcc_shift)
+
+/*define for sys field*/
+#define SDMA_PKT_PTEPDE_RMW_HEADER_sys_offset 0
+#define SDMA_PKT_PTEPDE_RMW_HEADER_sys_mask 0x00000001
+#define SDMA_PKT_PTEPDE_RMW_HEADER_sys_shift 20
+#define SDMA_PKT_PTEPDE_RMW_HEADER_SYS(x) (((x) & SDMA_PKT_PTEPDE_RMW_HEADER_sys_mask) << SDMA_PKT_PTEPDE_RMW_HEADER_sys_shift)
+
+/*define for snp field*/
+#define SDMA_PKT_PTEPDE_RMW_HEADER_snp_offset 0
+#define SDMA_PKT_PTEPDE_RMW_HEADER_snp_mask 0x00000001
+#define SDMA_PKT_PTEPDE_RMW_HEADER_snp_shift 22
+#define SDMA_PKT_PTEPDE_RMW_HEADER_SNP(x) (((x) & SDMA_PKT_PTEPDE_RMW_HEADER_snp_mask) << SDMA_PKT_PTEPDE_RMW_HEADER_snp_shift)
+
+/*define for gpa field*/
+#define SDMA_PKT_PTEPDE_RMW_HEADER_gpa_offset 0
+#define SDMA_PKT_PTEPDE_RMW_HEADER_gpa_mask 0x00000001
+#define SDMA_PKT_PTEPDE_RMW_HEADER_gpa_shift 23
+#define SDMA_PKT_PTEPDE_RMW_HEADER_GPA(x) (((x) & SDMA_PKT_PTEPDE_RMW_HEADER_gpa_mask) << SDMA_PKT_PTEPDE_RMW_HEADER_gpa_shift)
+
+/*define for l2_policy field*/
+#define SDMA_PKT_PTEPDE_RMW_HEADER_l2_policy_offset 0
+#define SDMA_PKT_PTEPDE_RMW_HEADER_l2_policy_mask 0x00000003
+#define SDMA_PKT_PTEPDE_RMW_HEADER_l2_policy_shift 24
+#define SDMA_PKT_PTEPDE_RMW_HEADER_L2_POLICY(x) (((x) & SDMA_PKT_PTEPDE_RMW_HEADER_l2_policy_mask) << SDMA_PKT_PTEPDE_RMW_HEADER_l2_policy_shift)
+
+/*define for llc_policy field*/
+#define SDMA_PKT_PTEPDE_RMW_HEADER_llc_policy_offset 0
+#define SDMA_PKT_PTEPDE_RMW_HEADER_llc_policy_mask 0x00000001
+#define SDMA_PKT_PTEPDE_RMW_HEADER_llc_policy_shift 26
+#define SDMA_PKT_PTEPDE_RMW_HEADER_LLC_POLICY(x) (((x) & SDMA_PKT_PTEPDE_RMW_HEADER_llc_policy_mask) << SDMA_PKT_PTEPDE_RMW_HEADER_llc_policy_shift)
+
+/*define for cpv field*/
+#define SDMA_PKT_PTEPDE_RMW_HEADER_cpv_offset 0
+#define SDMA_PKT_PTEPDE_RMW_HEADER_cpv_mask 0x00000001
+#define SDMA_PKT_PTEPDE_RMW_HEADER_cpv_shift 28
+#define SDMA_PKT_PTEPDE_RMW_HEADER_CPV(x) (((x) & SDMA_PKT_PTEPDE_RMW_HEADER_cpv_mask) << SDMA_PKT_PTEPDE_RMW_HEADER_cpv_shift)
+
+/*define for ADDR_LO word*/
+/*define for addr_31_0 field*/
+#define SDMA_PKT_PTEPDE_RMW_ADDR_LO_addr_31_0_offset 1
+#define SDMA_PKT_PTEPDE_RMW_ADDR_LO_addr_31_0_mask 0xFFFFFFFF
+#define SDMA_PKT_PTEPDE_RMW_ADDR_LO_addr_31_0_shift 0
+#define SDMA_PKT_PTEPDE_RMW_ADDR_LO_ADDR_31_0(x) (((x) & SDMA_PKT_PTEPDE_RMW_ADDR_LO_addr_31_0_mask) << SDMA_PKT_PTEPDE_RMW_ADDR_LO_addr_31_0_shift)
+
+/*define for ADDR_HI word*/
+/*define for addr_63_32 field*/
+#define SDMA_PKT_PTEPDE_RMW_ADDR_HI_addr_63_32_offset 2
+#define SDMA_PKT_PTEPDE_RMW_ADDR_HI_addr_63_32_mask 0xFFFFFFFF
+#define SDMA_PKT_PTEPDE_RMW_ADDR_HI_addr_63_32_shift 0
+#define SDMA_PKT_PTEPDE_RMW_ADDR_HI_ADDR_63_32(x) (((x) & SDMA_PKT_PTEPDE_RMW_ADDR_HI_addr_63_32_mask) << SDMA_PKT_PTEPDE_RMW_ADDR_HI_addr_63_32_shift)
+
+/*define for MASK_LO word*/
+/*define for mask_31_0 field*/
+#define SDMA_PKT_PTEPDE_RMW_MASK_LO_mask_31_0_offset 3
+#define SDMA_PKT_PTEPDE_RMW_MASK_LO_mask_31_0_mask 0xFFFFFFFF
+#define SDMA_PKT_PTEPDE_RMW_MASK_LO_mask_31_0_shift 0
+#define SDMA_PKT_PTEPDE_RMW_MASK_LO_MASK_31_0(x) (((x) & SDMA_PKT_PTEPDE_RMW_MASK_LO_mask_31_0_mask) << SDMA_PKT_PTEPDE_RMW_MASK_LO_mask_31_0_shift)
+
+/*define for MASK_HI word*/
+/*define for mask_63_32 field*/
+#define SDMA_PKT_PTEPDE_RMW_MASK_HI_mask_63_32_offset 4
+#define SDMA_PKT_PTEPDE_RMW_MASK_HI_mask_63_32_mask 0xFFFFFFFF
+#define SDMA_PKT_PTEPDE_RMW_MASK_HI_mask_63_32_shift 0
+#define SDMA_PKT_PTEPDE_RMW_MASK_HI_MASK_63_32(x) (((x) & SDMA_PKT_PTEPDE_RMW_MASK_HI_mask_63_32_mask) << SDMA_PKT_PTEPDE_RMW_MASK_HI_mask_63_32_shift)
+
+/*define for VALUE_LO word*/
+/*define for value_31_0 field*/
+#define SDMA_PKT_PTEPDE_RMW_VALUE_LO_value_31_0_offset 5
+#define SDMA_PKT_PTEPDE_RMW_VALUE_LO_value_31_0_mask 0xFFFFFFFF
+#define SDMA_PKT_PTEPDE_RMW_VALUE_LO_value_31_0_shift 0
+#define SDMA_PKT_PTEPDE_RMW_VALUE_LO_VALUE_31_0(x) (((x) & SDMA_PKT_PTEPDE_RMW_VALUE_LO_value_31_0_mask) << SDMA_PKT_PTEPDE_RMW_VALUE_LO_value_31_0_shift)
+
+/*define for VALUE_HI word*/
+/*define for value_63_32 field*/
+#define SDMA_PKT_PTEPDE_RMW_VALUE_HI_value_63_32_offset 6
+#define SDMA_PKT_PTEPDE_RMW_VALUE_HI_value_63_32_mask 0xFFFFFFFF
+#define SDMA_PKT_PTEPDE_RMW_VALUE_HI_value_63_32_shift 0
+#define SDMA_PKT_PTEPDE_RMW_VALUE_HI_VALUE_63_32(x) (((x) & SDMA_PKT_PTEPDE_RMW_VALUE_HI_value_63_32_mask) << SDMA_PKT_PTEPDE_RMW_VALUE_HI_value_63_32_shift)
+
+/*define for COUNT word*/
+/*define for num_of_pte field*/
+#define SDMA_PKT_PTEPDE_RMW_COUNT_num_of_pte_offset 7
+#define SDMA_PKT_PTEPDE_RMW_COUNT_num_of_pte_mask 0xFFFFFFFF
+#define SDMA_PKT_PTEPDE_RMW_COUNT_num_of_pte_shift 0
+#define SDMA_PKT_PTEPDE_RMW_COUNT_NUM_OF_PTE(x) (((x) & SDMA_PKT_PTEPDE_RMW_COUNT_num_of_pte_mask) << SDMA_PKT_PTEPDE_RMW_COUNT_num_of_pte_shift)
+
+
+/*
+** Definitions for SDMA_PKT_REGISTER_RMW packet
+*/
+
+/*define for HEADER word*/
+/*define for op field*/
+#define SDMA_PKT_REGISTER_RMW_HEADER_op_offset 0
+#define SDMA_PKT_REGISTER_RMW_HEADER_op_mask 0x000000FF
+#define SDMA_PKT_REGISTER_RMW_HEADER_op_shift 0
+#define SDMA_PKT_REGISTER_RMW_HEADER_OP(x) (((x) & SDMA_PKT_REGISTER_RMW_HEADER_op_mask) << SDMA_PKT_REGISTER_RMW_HEADER_op_shift)
+
+/*define for sub_op field*/
+#define SDMA_PKT_REGISTER_RMW_HEADER_sub_op_offset 0
+#define SDMA_PKT_REGISTER_RMW_HEADER_sub_op_mask 0x000000FF
+#define SDMA_PKT_REGISTER_RMW_HEADER_sub_op_shift 8
+#define SDMA_PKT_REGISTER_RMW_HEADER_SUB_OP(x) (((x) & SDMA_PKT_REGISTER_RMW_HEADER_sub_op_mask) << SDMA_PKT_REGISTER_RMW_HEADER_sub_op_shift)
+
+/*define for ADDR word*/
+/*define for addr field*/
+#define SDMA_PKT_REGISTER_RMW_ADDR_addr_offset 1
+#define SDMA_PKT_REGISTER_RMW_ADDR_addr_mask 0x000FFFFF
+#define SDMA_PKT_REGISTER_RMW_ADDR_addr_shift 0
+#define SDMA_PKT_REGISTER_RMW_ADDR_ADDR(x) (((x) & SDMA_PKT_REGISTER_RMW_ADDR_addr_mask) << SDMA_PKT_REGISTER_RMW_ADDR_addr_shift)
+
+/*define for aperture_id field*/
+#define SDMA_PKT_REGISTER_RMW_ADDR_aperture_id_offset 1
+#define SDMA_PKT_REGISTER_RMW_ADDR_aperture_id_mask 0x00000FFF
+#define SDMA_PKT_REGISTER_RMW_ADDR_aperture_id_shift 20
+#define SDMA_PKT_REGISTER_RMW_ADDR_APERTURE_ID(x) (((x) & SDMA_PKT_REGISTER_RMW_ADDR_aperture_id_mask) << SDMA_PKT_REGISTER_RMW_ADDR_aperture_id_shift)
+
+/*define for MASK word*/
+/*define for mask field*/
+#define SDMA_PKT_REGISTER_RMW_MASK_mask_offset 2
+#define SDMA_PKT_REGISTER_RMW_MASK_mask_mask 0xFFFFFFFF
+#define SDMA_PKT_REGISTER_RMW_MASK_mask_shift 0
+#define SDMA_PKT_REGISTER_RMW_MASK_MASK(x) (((x) & SDMA_PKT_REGISTER_RMW_MASK_mask_mask) << SDMA_PKT_REGISTER_RMW_MASK_mask_shift)
+
+/*define for VALUE word*/
+/*define for value field*/
+#define SDMA_PKT_REGISTER_RMW_VALUE_value_offset 3
+#define SDMA_PKT_REGISTER_RMW_VALUE_value_mask 0xFFFFFFFF
+#define SDMA_PKT_REGISTER_RMW_VALUE_value_shift 0
+#define SDMA_PKT_REGISTER_RMW_VALUE_VALUE(x) (((x) & SDMA_PKT_REGISTER_RMW_VALUE_value_mask) << SDMA_PKT_REGISTER_RMW_VALUE_value_shift)
+
+/*define for MISC word*/
+/*define for stride field*/
+#define SDMA_PKT_REGISTER_RMW_MISC_stride_offset 4
+#define SDMA_PKT_REGISTER_RMW_MISC_stride_mask 0x000FFFFF
+#define SDMA_PKT_REGISTER_RMW_MISC_stride_shift 0
+#define SDMA_PKT_REGISTER_RMW_MISC_STRIDE(x) (((x) & SDMA_PKT_REGISTER_RMW_MISC_stride_mask) << SDMA_PKT_REGISTER_RMW_MISC_stride_shift)
+
+/*define for num_of_reg field*/
+#define SDMA_PKT_REGISTER_RMW_MISC_num_of_reg_offset 4
+#define SDMA_PKT_REGISTER_RMW_MISC_num_of_reg_mask 0x00000FFF
+#define SDMA_PKT_REGISTER_RMW_MISC_num_of_reg_shift 20
+#define SDMA_PKT_REGISTER_RMW_MISC_NUM_OF_REG(x) (((x) & SDMA_PKT_REGISTER_RMW_MISC_num_of_reg_mask) << SDMA_PKT_REGISTER_RMW_MISC_num_of_reg_shift)
+
+
+/*
+** Definitions for SDMA_PKT_WRITE_INCR packet
+*/
+
+/*define for HEADER word*/
+/*define for op field*/
+#define SDMA_PKT_WRITE_INCR_HEADER_op_offset 0
+#define SDMA_PKT_WRITE_INCR_HEADER_op_mask 0x000000FF
+#define SDMA_PKT_WRITE_INCR_HEADER_op_shift 0
+#define SDMA_PKT_WRITE_INCR_HEADER_OP(x) (((x) & SDMA_PKT_WRITE_INCR_HEADER_op_mask) << SDMA_PKT_WRITE_INCR_HEADER_op_shift)
+
+/*define for sub_op field*/
+#define SDMA_PKT_WRITE_INCR_HEADER_sub_op_offset 0
+#define SDMA_PKT_WRITE_INCR_HEADER_sub_op_mask 0x000000FF
+#define SDMA_PKT_WRITE_INCR_HEADER_sub_op_shift 8
+#define SDMA_PKT_WRITE_INCR_HEADER_SUB_OP(x) (((x) & SDMA_PKT_WRITE_INCR_HEADER_sub_op_mask) << SDMA_PKT_WRITE_INCR_HEADER_sub_op_shift)
+
+/*define for cache_policy field*/
+#define SDMA_PKT_WRITE_INCR_HEADER_cache_policy_offset 0
+#define SDMA_PKT_WRITE_INCR_HEADER_cache_policy_mask 0x00000007
+#define SDMA_PKT_WRITE_INCR_HEADER_cache_policy_shift 24
+#define SDMA_PKT_WRITE_INCR_HEADER_CACHE_POLICY(x) (((x) & SDMA_PKT_WRITE_INCR_HEADER_cache_policy_mask) << SDMA_PKT_WRITE_INCR_HEADER_cache_policy_shift)
+
+/*define for cpv field*/
+#define SDMA_PKT_WRITE_INCR_HEADER_cpv_offset 0
+#define SDMA_PKT_WRITE_INCR_HEADER_cpv_mask 0x00000001
+#define SDMA_PKT_WRITE_INCR_HEADER_cpv_shift 28
+#define SDMA_PKT_WRITE_INCR_HEADER_CPV(x) (((x) & SDMA_PKT_WRITE_INCR_HEADER_cpv_mask) << SDMA_PKT_WRITE_INCR_HEADER_cpv_shift)
+
+/*define for DST_ADDR_LO word*/
+/*define for dst_addr_31_0 field*/
+#define SDMA_PKT_WRITE_INCR_DST_ADDR_LO_dst_addr_31_0_offset 1
+#define SDMA_PKT_WRITE_INCR_DST_ADDR_LO_dst_addr_31_0_mask 0xFFFFFFFF
+#define SDMA_PKT_WRITE_INCR_DST_ADDR_LO_dst_addr_31_0_shift 0
+#define SDMA_PKT_WRITE_INCR_DST_ADDR_LO_DST_ADDR_31_0(x) (((x) & SDMA_PKT_WRITE_INCR_DST_ADDR_LO_dst_addr_31_0_mask) << SDMA_PKT_WRITE_INCR_DST_ADDR_LO_dst_addr_31_0_shift)
+
+/*define for DST_ADDR_HI word*/
+/*define for dst_addr_63_32 field*/
+#define SDMA_PKT_WRITE_INCR_DST_ADDR_HI_dst_addr_63_32_offset 2
+#define SDMA_PKT_WRITE_INCR_DST_ADDR_HI_dst_addr_63_32_mask 0xFFFFFFFF
+#define SDMA_PKT_WRITE_INCR_DST_ADDR_HI_dst_addr_63_32_shift 0
+#define SDMA_PKT_WRITE_INCR_DST_ADDR_HI_DST_ADDR_63_32(x) (((x) & SDMA_PKT_WRITE_INCR_DST_ADDR_HI_dst_addr_63_32_mask) << SDMA_PKT_WRITE_INCR_DST_ADDR_HI_dst_addr_63_32_shift)
+
+/*define for MASK_DW0 word*/
+/*define for mask_dw0 field*/
+#define SDMA_PKT_WRITE_INCR_MASK_DW0_mask_dw0_offset 3
+#define SDMA_PKT_WRITE_INCR_MASK_DW0_mask_dw0_mask 0xFFFFFFFF
+#define SDMA_PKT_WRITE_INCR_MASK_DW0_mask_dw0_shift 0
+#define SDMA_PKT_WRITE_INCR_MASK_DW0_MASK_DW0(x) (((x) & SDMA_PKT_WRITE_INCR_MASK_DW0_mask_dw0_mask) << SDMA_PKT_WRITE_INCR_MASK_DW0_mask_dw0_shift)
+
+/*define for MASK_DW1 word*/
+/*define for mask_dw1 field*/
+#define SDMA_PKT_WRITE_INCR_MASK_DW1_mask_dw1_offset 4
+#define SDMA_PKT_WRITE_INCR_MASK_DW1_mask_dw1_mask 0xFFFFFFFF
+#define SDMA_PKT_WRITE_INCR_MASK_DW1_mask_dw1_shift 0
+#define SDMA_PKT_WRITE_INCR_MASK_DW1_MASK_DW1(x) (((x) & SDMA_PKT_WRITE_INCR_MASK_DW1_mask_dw1_mask) << SDMA_PKT_WRITE_INCR_MASK_DW1_mask_dw1_shift)
+
+/*define for INIT_DW0 word*/
+/*define for init_dw0 field*/
+#define SDMA_PKT_WRITE_INCR_INIT_DW0_init_dw0_offset 5
+#define SDMA_PKT_WRITE_INCR_INIT_DW0_init_dw0_mask 0xFFFFFFFF
+#define SDMA_PKT_WRITE_INCR_INIT_DW0_init_dw0_shift 0
+#define SDMA_PKT_WRITE_INCR_INIT_DW0_INIT_DW0(x) (((x) & SDMA_PKT_WRITE_INCR_INIT_DW0_init_dw0_mask) << SDMA_PKT_WRITE_INCR_INIT_DW0_init_dw0_shift)
+
+/*define for INIT_DW1 word*/
+/*define for init_dw1 field*/
+#define SDMA_PKT_WRITE_INCR_INIT_DW1_init_dw1_offset 6
+#define SDMA_PKT_WRITE_INCR_INIT_DW1_init_dw1_mask 0xFFFFFFFF
+#define SDMA_PKT_WRITE_INCR_INIT_DW1_init_dw1_shift 0
+#define SDMA_PKT_WRITE_INCR_INIT_DW1_INIT_DW1(x) (((x) & SDMA_PKT_WRITE_INCR_INIT_DW1_init_dw1_mask) << SDMA_PKT_WRITE_INCR_INIT_DW1_init_dw1_shift)
+
+/*define for INCR_DW0 word*/
+/*define for incr_dw0 field*/
+#define SDMA_PKT_WRITE_INCR_INCR_DW0_incr_dw0_offset 7
+#define SDMA_PKT_WRITE_INCR_INCR_DW0_incr_dw0_mask 0xFFFFFFFF
+#define SDMA_PKT_WRITE_INCR_INCR_DW0_incr_dw0_shift 0
+#define SDMA_PKT_WRITE_INCR_INCR_DW0_INCR_DW0(x) (((x) & SDMA_PKT_WRITE_INCR_INCR_DW0_incr_dw0_mask) << SDMA_PKT_WRITE_INCR_INCR_DW0_incr_dw0_shift)
+
+/*define for INCR_DW1 word*/
+/*define for incr_dw1 field*/
+#define SDMA_PKT_WRITE_INCR_INCR_DW1_incr_dw1_offset 8
+#define SDMA_PKT_WRITE_INCR_INCR_DW1_incr_dw1_mask 0xFFFFFFFF
+#define SDMA_PKT_WRITE_INCR_INCR_DW1_incr_dw1_shift 0
+#define SDMA_PKT_WRITE_INCR_INCR_DW1_INCR_DW1(x) (((x) & SDMA_PKT_WRITE_INCR_INCR_DW1_incr_dw1_mask) << SDMA_PKT_WRITE_INCR_INCR_DW1_incr_dw1_shift)
+
+/*define for COUNT word*/
+/*define for count field*/
+#define SDMA_PKT_WRITE_INCR_COUNT_count_offset 9
+#define SDMA_PKT_WRITE_INCR_COUNT_count_mask 0x0007FFFF
+#define SDMA_PKT_WRITE_INCR_COUNT_count_shift 0
+#define SDMA_PKT_WRITE_INCR_COUNT_COUNT(x) (((x) & SDMA_PKT_WRITE_INCR_COUNT_count_mask) << SDMA_PKT_WRITE_INCR_COUNT_count_shift)
+
+
+/*
+** Definitions for SDMA_PKT_INDIRECT packet
+*/
+
+/*define for HEADER word*/
+/*define for op field*/
+#define SDMA_PKT_INDIRECT_HEADER_op_offset 0
+#define SDMA_PKT_INDIRECT_HEADER_op_mask 0x000000FF
+#define SDMA_PKT_INDIRECT_HEADER_op_shift 0
+#define SDMA_PKT_INDIRECT_HEADER_OP(x) (((x) & SDMA_PKT_INDIRECT_HEADER_op_mask) << SDMA_PKT_INDIRECT_HEADER_op_shift)
+
+/*define for sub_op field*/
+#define SDMA_PKT_INDIRECT_HEADER_sub_op_offset 0
+#define SDMA_PKT_INDIRECT_HEADER_sub_op_mask 0x000000FF
+#define SDMA_PKT_INDIRECT_HEADER_sub_op_shift 8
+#define SDMA_PKT_INDIRECT_HEADER_SUB_OP(x) (((x) & SDMA_PKT_INDIRECT_HEADER_sub_op_mask) << SDMA_PKT_INDIRECT_HEADER_sub_op_shift)
+
+/*define for vmid field*/
+#define SDMA_PKT_INDIRECT_HEADER_vmid_offset 0
+#define SDMA_PKT_INDIRECT_HEADER_vmid_mask 0x0000000F
+#define SDMA_PKT_INDIRECT_HEADER_vmid_shift 16
+#define SDMA_PKT_INDIRECT_HEADER_VMID(x) (((x) & SDMA_PKT_INDIRECT_HEADER_vmid_mask) << SDMA_PKT_INDIRECT_HEADER_vmid_shift)
+
+/*define for priv field*/
+#define SDMA_PKT_INDIRECT_HEADER_priv_offset 0
+#define SDMA_PKT_INDIRECT_HEADER_priv_mask 0x00000001
+#define SDMA_PKT_INDIRECT_HEADER_priv_shift 31
+#define SDMA_PKT_INDIRECT_HEADER_PRIV(x) (((x) & SDMA_PKT_INDIRECT_HEADER_priv_mask) << SDMA_PKT_INDIRECT_HEADER_priv_shift)
+
+/*define for BASE_LO word*/
+/*define for ib_base_31_0 field*/
+#define SDMA_PKT_INDIRECT_BASE_LO_ib_base_31_0_offset 1
+#define SDMA_PKT_INDIRECT_BASE_LO_ib_base_31_0_mask 0xFFFFFFFF
+#define SDMA_PKT_INDIRECT_BASE_LO_ib_base_31_0_shift 0
+#define SDMA_PKT_INDIRECT_BASE_LO_IB_BASE_31_0(x) (((x) & SDMA_PKT_INDIRECT_BASE_LO_ib_base_31_0_mask) << SDMA_PKT_INDIRECT_BASE_LO_ib_base_31_0_shift)
+
+/*define for BASE_HI word*/
+/*define for ib_base_63_32 field*/
+#define SDMA_PKT_INDIRECT_BASE_HI_ib_base_63_32_offset 2
+#define SDMA_PKT_INDIRECT_BASE_HI_ib_base_63_32_mask 0xFFFFFFFF
+#define SDMA_PKT_INDIRECT_BASE_HI_ib_base_63_32_shift 0
+#define SDMA_PKT_INDIRECT_BASE_HI_IB_BASE_63_32(x) (((x) & SDMA_PKT_INDIRECT_BASE_HI_ib_base_63_32_mask) << SDMA_PKT_INDIRECT_BASE_HI_ib_base_63_32_shift)
+
+/*define for IB_SIZE word*/
+/*define for ib_size field*/
+#define SDMA_PKT_INDIRECT_IB_SIZE_ib_size_offset 3
+#define SDMA_PKT_INDIRECT_IB_SIZE_ib_size_mask 0x000FFFFF
+#define SDMA_PKT_INDIRECT_IB_SIZE_ib_size_shift 0
+#define SDMA_PKT_INDIRECT_IB_SIZE_IB_SIZE(x) (((x) & SDMA_PKT_INDIRECT_IB_SIZE_ib_size_mask) << SDMA_PKT_INDIRECT_IB_SIZE_ib_size_shift)
+
+/*define for CSA_ADDR_LO word*/
+/*define for csa_addr_31_0 field*/
+#define SDMA_PKT_INDIRECT_CSA_ADDR_LO_csa_addr_31_0_offset 4
+#define SDMA_PKT_INDIRECT_CSA_ADDR_LO_csa_addr_31_0_mask 0xFFFFFFFF
+#define SDMA_PKT_INDIRECT_CSA_ADDR_LO_csa_addr_31_0_shift 0
+#define SDMA_PKT_INDIRECT_CSA_ADDR_LO_CSA_ADDR_31_0(x) (((x) & SDMA_PKT_INDIRECT_CSA_ADDR_LO_csa_addr_31_0_mask) << SDMA_PKT_INDIRECT_CSA_ADDR_LO_csa_addr_31_0_shift)
+
+/*define for CSA_ADDR_HI word*/
+/*define for csa_addr_63_32 field*/
+#define SDMA_PKT_INDIRECT_CSA_ADDR_HI_csa_addr_63_32_offset 5
+#define SDMA_PKT_INDIRECT_CSA_ADDR_HI_csa_addr_63_32_mask 0xFFFFFFFF
+#define SDMA_PKT_INDIRECT_CSA_ADDR_HI_csa_addr_63_32_shift 0
+#define SDMA_PKT_INDIRECT_CSA_ADDR_HI_CSA_ADDR_63_32(x) (((x) & SDMA_PKT_INDIRECT_CSA_ADDR_HI_csa_addr_63_32_mask) << SDMA_PKT_INDIRECT_CSA_ADDR_HI_csa_addr_63_32_shift)
+
+
+/*
+** Definitions for SDMA_PKT_SEMAPHORE packet
+*/
+
+/*define for HEADER word*/
+/*define for op field*/
+#define SDMA_PKT_SEMAPHORE_HEADER_op_offset 0
+#define SDMA_PKT_SEMAPHORE_HEADER_op_mask 0x000000FF
+#define SDMA_PKT_SEMAPHORE_HEADER_op_shift 0
+#define SDMA_PKT_SEMAPHORE_HEADER_OP(x) (((x) & SDMA_PKT_SEMAPHORE_HEADER_op_mask) << SDMA_PKT_SEMAPHORE_HEADER_op_shift)
+
+/*define for sub_op field*/
+#define SDMA_PKT_SEMAPHORE_HEADER_sub_op_offset 0
+#define SDMA_PKT_SEMAPHORE_HEADER_sub_op_mask 0x000000FF
+#define SDMA_PKT_SEMAPHORE_HEADER_sub_op_shift 8
+#define SDMA_PKT_SEMAPHORE_HEADER_SUB_OP(x) (((x) & SDMA_PKT_SEMAPHORE_HEADER_sub_op_mask) << SDMA_PKT_SEMAPHORE_HEADER_sub_op_shift)
+
+/*define for write_one field*/
+#define SDMA_PKT_SEMAPHORE_HEADER_write_one_offset 0
+#define SDMA_PKT_SEMAPHORE_HEADER_write_one_mask 0x00000001
+#define SDMA_PKT_SEMAPHORE_HEADER_write_one_shift 29
+#define SDMA_PKT_SEMAPHORE_HEADER_WRITE_ONE(x) (((x) & SDMA_PKT_SEMAPHORE_HEADER_write_one_mask) << SDMA_PKT_SEMAPHORE_HEADER_write_one_shift)
+
+/*define for signal field*/
+#define SDMA_PKT_SEMAPHORE_HEADER_signal_offset 0
+#define SDMA_PKT_SEMAPHORE_HEADER_signal_mask 0x00000001
+#define SDMA_PKT_SEMAPHORE_HEADER_signal_shift 30
+#define SDMA_PKT_SEMAPHORE_HEADER_SIGNAL(x) (((x) & SDMA_PKT_SEMAPHORE_HEADER_signal_mask) << SDMA_PKT_SEMAPHORE_HEADER_signal_shift)
+
+/*define for mailbox field*/
+#define SDMA_PKT_SEMAPHORE_HEADER_mailbox_offset 0
+#define SDMA_PKT_SEMAPHORE_HEADER_mailbox_mask 0x00000001
+#define SDMA_PKT_SEMAPHORE_HEADER_mailbox_shift 31
+#define SDMA_PKT_SEMAPHORE_HEADER_MAILBOX(x) (((x) & SDMA_PKT_SEMAPHORE_HEADER_mailbox_mask) << SDMA_PKT_SEMAPHORE_HEADER_mailbox_shift)
+
+/*define for ADDR_LO word*/
+/*define for addr_31_0 field*/
+#define SDMA_PKT_SEMAPHORE_ADDR_LO_addr_31_0_offset 1
+#define SDMA_PKT_SEMAPHORE_ADDR_LO_addr_31_0_mask 0xFFFFFFFF
+#define SDMA_PKT_SEMAPHORE_ADDR_LO_addr_31_0_shift 0
+#define SDMA_PKT_SEMAPHORE_ADDR_LO_ADDR_31_0(x) (((x) & SDMA_PKT_SEMAPHORE_ADDR_LO_addr_31_0_mask) << SDMA_PKT_SEMAPHORE_ADDR_LO_addr_31_0_shift)
+
+/*define for ADDR_HI word*/
+/*define for addr_63_32 field*/
+#define SDMA_PKT_SEMAPHORE_ADDR_HI_addr_63_32_offset 2
+#define SDMA_PKT_SEMAPHORE_ADDR_HI_addr_63_32_mask 0xFFFFFFFF
+#define SDMA_PKT_SEMAPHORE_ADDR_HI_addr_63_32_shift 0
+#define SDMA_PKT_SEMAPHORE_ADDR_HI_ADDR_63_32(x) (((x) & SDMA_PKT_SEMAPHORE_ADDR_HI_addr_63_32_mask) << SDMA_PKT_SEMAPHORE_ADDR_HI_addr_63_32_shift)
+
+
+/*
+** Definitions for SDMA_PKT_MEM_INCR packet
+*/
+
+/*define for HEADER word*/
+/*define for op field*/
+#define SDMA_PKT_MEM_INCR_HEADER_op_offset 0
+#define SDMA_PKT_MEM_INCR_HEADER_op_mask 0x000000FF
+#define SDMA_PKT_MEM_INCR_HEADER_op_shift 0
+#define SDMA_PKT_MEM_INCR_HEADER_OP(x) (((x) & SDMA_PKT_MEM_INCR_HEADER_op_mask) << SDMA_PKT_MEM_INCR_HEADER_op_shift)
+
+/*define for sub_op field*/
+#define SDMA_PKT_MEM_INCR_HEADER_sub_op_offset 0
+#define SDMA_PKT_MEM_INCR_HEADER_sub_op_mask 0x000000FF
+#define SDMA_PKT_MEM_INCR_HEADER_sub_op_shift 8
+#define SDMA_PKT_MEM_INCR_HEADER_SUB_OP(x) (((x) & SDMA_PKT_MEM_INCR_HEADER_sub_op_mask) << SDMA_PKT_MEM_INCR_HEADER_sub_op_shift)
+
+/*define for l2_policy field*/
+#define SDMA_PKT_MEM_INCR_HEADER_l2_policy_offset 0
+#define SDMA_PKT_MEM_INCR_HEADER_l2_policy_mask 0x00000003
+#define SDMA_PKT_MEM_INCR_HEADER_l2_policy_shift 24
+#define SDMA_PKT_MEM_INCR_HEADER_L2_POLICY(x) (((x) & SDMA_PKT_MEM_INCR_HEADER_l2_policy_mask) << SDMA_PKT_MEM_INCR_HEADER_l2_policy_shift)
+
+/*define for llc_policy field*/
+#define SDMA_PKT_MEM_INCR_HEADER_llc_policy_offset 0
+#define SDMA_PKT_MEM_INCR_HEADER_llc_policy_mask 0x00000001
+#define SDMA_PKT_MEM_INCR_HEADER_llc_policy_shift 26
+#define SDMA_PKT_MEM_INCR_HEADER_LLC_POLICY(x) (((x) & SDMA_PKT_MEM_INCR_HEADER_llc_policy_mask) << SDMA_PKT_MEM_INCR_HEADER_llc_policy_shift)
+
+/*define for cpv field*/
+#define SDMA_PKT_MEM_INCR_HEADER_cpv_offset 0
+#define SDMA_PKT_MEM_INCR_HEADER_cpv_mask 0x00000001
+#define SDMA_PKT_MEM_INCR_HEADER_cpv_shift 28
+#define SDMA_PKT_MEM_INCR_HEADER_CPV(x) (((x) & SDMA_PKT_MEM_INCR_HEADER_cpv_mask) << SDMA_PKT_MEM_INCR_HEADER_cpv_shift)
+
+/*define for ADDR_LO word*/
+/*define for addr_31_0 field*/
+#define SDMA_PKT_MEM_INCR_ADDR_LO_addr_31_0_offset 1
+#define SDMA_PKT_MEM_INCR_ADDR_LO_addr_31_0_mask 0xFFFFFFFF
+#define SDMA_PKT_MEM_INCR_ADDR_LO_addr_31_0_shift 0
+#define SDMA_PKT_MEM_INCR_ADDR_LO_ADDR_31_0(x) (((x) & SDMA_PKT_MEM_INCR_ADDR_LO_addr_31_0_mask) << SDMA_PKT_MEM_INCR_ADDR_LO_addr_31_0_shift)
+
+/*define for ADDR_HI word*/
+/*define for addr_63_32 field*/
+#define SDMA_PKT_MEM_INCR_ADDR_HI_addr_63_32_offset 2
+#define SDMA_PKT_MEM_INCR_ADDR_HI_addr_63_32_mask 0xFFFFFFFF
+#define SDMA_PKT_MEM_INCR_ADDR_HI_addr_63_32_shift 0
+#define SDMA_PKT_MEM_INCR_ADDR_HI_ADDR_63_32(x) (((x) & SDMA_PKT_MEM_INCR_ADDR_HI_addr_63_32_mask) << SDMA_PKT_MEM_INCR_ADDR_HI_addr_63_32_shift)
+
+
+/*
+** Definitions for SDMA_PKT_VM_INVALIDATION packet
+*/
+
+/*define for HEADER word*/
+/*define for op field*/
+#define SDMA_PKT_VM_INVALIDATION_HEADER_op_offset 0
+#define SDMA_PKT_VM_INVALIDATION_HEADER_op_mask 0x000000FF
+#define SDMA_PKT_VM_INVALIDATION_HEADER_op_shift 0
+#define SDMA_PKT_VM_INVALIDATION_HEADER_OP(x) (((x) & SDMA_PKT_VM_INVALIDATION_HEADER_op_mask) << SDMA_PKT_VM_INVALIDATION_HEADER_op_shift)
+
+/*define for sub_op field*/
+#define SDMA_PKT_VM_INVALIDATION_HEADER_sub_op_offset 0
+#define SDMA_PKT_VM_INVALIDATION_HEADER_sub_op_mask 0x000000FF
+#define SDMA_PKT_VM_INVALIDATION_HEADER_sub_op_shift 8
+#define SDMA_PKT_VM_INVALIDATION_HEADER_SUB_OP(x) (((x) & SDMA_PKT_VM_INVALIDATION_HEADER_sub_op_mask) << SDMA_PKT_VM_INVALIDATION_HEADER_sub_op_shift)
+
+/*define for gfx_eng_id field*/
+#define SDMA_PKT_VM_INVALIDATION_HEADER_gfx_eng_id_offset 0
+#define SDMA_PKT_VM_INVALIDATION_HEADER_gfx_eng_id_mask 0x0000001F
+#define SDMA_PKT_VM_INVALIDATION_HEADER_gfx_eng_id_shift 16
+#define SDMA_PKT_VM_INVALIDATION_HEADER_GFX_ENG_ID(x) (((x) & SDMA_PKT_VM_INVALIDATION_HEADER_gfx_eng_id_mask) << SDMA_PKT_VM_INVALIDATION_HEADER_gfx_eng_id_shift)
+
+/*define for mm_eng_id field*/
+#define SDMA_PKT_VM_INVALIDATION_HEADER_mm_eng_id_offset 0
+#define SDMA_PKT_VM_INVALIDATION_HEADER_mm_eng_id_mask 0x0000001F
+#define SDMA_PKT_VM_INVALIDATION_HEADER_mm_eng_id_shift 24
+#define SDMA_PKT_VM_INVALIDATION_HEADER_MM_ENG_ID(x) (((x) & SDMA_PKT_VM_INVALIDATION_HEADER_mm_eng_id_mask) << SDMA_PKT_VM_INVALIDATION_HEADER_mm_eng_id_shift)
+
+/*define for INVALIDATEREQ word*/
+/*define for invalidatereq field*/
+#define SDMA_PKT_VM_INVALIDATION_INVALIDATEREQ_invalidatereq_offset 1
+#define SDMA_PKT_VM_INVALIDATION_INVALIDATEREQ_invalidatereq_mask 0xFFFFFFFF
+#define SDMA_PKT_VM_INVALIDATION_INVALIDATEREQ_invalidatereq_shift 0
+#define SDMA_PKT_VM_INVALIDATION_INVALIDATEREQ_INVALIDATEREQ(x) (((x) & SDMA_PKT_VM_INVALIDATION_INVALIDATEREQ_invalidatereq_mask) << SDMA_PKT_VM_INVALIDATION_INVALIDATEREQ_invalidatereq_shift)
+
+/*define for ADDRESSRANGELO word*/
+/*define for addressrangelo field*/
+#define SDMA_PKT_VM_INVALIDATION_ADDRESSRANGELO_addressrangelo_offset 2
+#define SDMA_PKT_VM_INVALIDATION_ADDRESSRANGELO_addressrangelo_mask 0xFFFFFFFF
+#define SDMA_PKT_VM_INVALIDATION_ADDRESSRANGELO_addressrangelo_shift 0
+#define SDMA_PKT_VM_INVALIDATION_ADDRESSRANGELO_ADDRESSRANGELO(x) (((x) & SDMA_PKT_VM_INVALIDATION_ADDRESSRANGELO_addressrangelo_mask) << SDMA_PKT_VM_INVALIDATION_ADDRESSRANGELO_addressrangelo_shift)
+
+/*define for ADDRESSRANGEHI word*/
+/*define for invalidateack field*/
+#define SDMA_PKT_VM_INVALIDATION_ADDRESSRANGEHI_invalidateack_offset 3
+#define SDMA_PKT_VM_INVALIDATION_ADDRESSRANGEHI_invalidateack_mask 0x0000FFFF
+#define SDMA_PKT_VM_INVALIDATION_ADDRESSRANGEHI_invalidateack_shift 0
+#define SDMA_PKT_VM_INVALIDATION_ADDRESSRANGEHI_INVALIDATEACK(x) (((x) & SDMA_PKT_VM_INVALIDATION_ADDRESSRANGEHI_invalidateack_mask) << SDMA_PKT_VM_INVALIDATION_ADDRESSRANGEHI_invalidateack_shift)
+
+/*define for addressrangehi field*/
+#define SDMA_PKT_VM_INVALIDATION_ADDRESSRANGEHI_addressrangehi_offset 3
+#define SDMA_PKT_VM_INVALIDATION_ADDRESSRANGEHI_addressrangehi_mask 0x0000001F
+#define SDMA_PKT_VM_INVALIDATION_ADDRESSRANGEHI_addressrangehi_shift 16
+#define SDMA_PKT_VM_INVALIDATION_ADDRESSRANGEHI_ADDRESSRANGEHI(x) (((x) & SDMA_PKT_VM_INVALIDATION_ADDRESSRANGEHI_addressrangehi_mask) << SDMA_PKT_VM_INVALIDATION_ADDRESSRANGEHI_addressrangehi_shift)
+
+/*define for reserved field*/
+#define SDMA_PKT_VM_INVALIDATION_ADDRESSRANGEHI_reserved_offset 3
+#define SDMA_PKT_VM_INVALIDATION_ADDRESSRANGEHI_reserved_mask 0x000001FF
+#define SDMA_PKT_VM_INVALIDATION_ADDRESSRANGEHI_reserved_shift 23
+#define SDMA_PKT_VM_INVALIDATION_ADDRESSRANGEHI_RESERVED(x) (((x) & SDMA_PKT_VM_INVALIDATION_ADDRESSRANGEHI_reserved_mask) << SDMA_PKT_VM_INVALIDATION_ADDRESSRANGEHI_reserved_shift)
+
+
+/*
+** Definitions for SDMA_PKT_FENCE packet
+*/
+
+/*define for HEADER word*/
+/*define for op field*/
+#define SDMA_PKT_FENCE_HEADER_op_offset 0
+#define SDMA_PKT_FENCE_HEADER_op_mask 0x000000FF
+#define SDMA_PKT_FENCE_HEADER_op_shift 0
+#define SDMA_PKT_FENCE_HEADER_OP(x) (((x) & SDMA_PKT_FENCE_HEADER_op_mask) << SDMA_PKT_FENCE_HEADER_op_shift)
+
+/*define for sub_op field*/
+#define SDMA_PKT_FENCE_HEADER_sub_op_offset 0
+#define SDMA_PKT_FENCE_HEADER_sub_op_mask 0x000000FF
+#define SDMA_PKT_FENCE_HEADER_sub_op_shift 8
+#define SDMA_PKT_FENCE_HEADER_SUB_OP(x) (((x) & SDMA_PKT_FENCE_HEADER_sub_op_mask) << SDMA_PKT_FENCE_HEADER_sub_op_shift)
+
+/*define for mtype field*/
+#define SDMA_PKT_FENCE_HEADER_mtype_offset 0
+#define SDMA_PKT_FENCE_HEADER_mtype_mask 0x00000007
+#define SDMA_PKT_FENCE_HEADER_mtype_shift 16
+#define SDMA_PKT_FENCE_HEADER_MTYPE(x) (((x) & SDMA_PKT_FENCE_HEADER_mtype_mask) << SDMA_PKT_FENCE_HEADER_mtype_shift)
+
+/*define for gcc field*/
+#define SDMA_PKT_FENCE_HEADER_gcc_offset 0
+#define SDMA_PKT_FENCE_HEADER_gcc_mask 0x00000001
+#define SDMA_PKT_FENCE_HEADER_gcc_shift 19
+#define SDMA_PKT_FENCE_HEADER_GCC(x) (((x) & SDMA_PKT_FENCE_HEADER_gcc_mask) << SDMA_PKT_FENCE_HEADER_gcc_shift)
+
+/*define for sys field*/
+#define SDMA_PKT_FENCE_HEADER_sys_offset 0
+#define SDMA_PKT_FENCE_HEADER_sys_mask 0x00000001
+#define SDMA_PKT_FENCE_HEADER_sys_shift 20
+#define SDMA_PKT_FENCE_HEADER_SYS(x) (((x) & SDMA_PKT_FENCE_HEADER_sys_mask) << SDMA_PKT_FENCE_HEADER_sys_shift)
+
+/*define for snp field*/
+#define SDMA_PKT_FENCE_HEADER_snp_offset 0
+#define SDMA_PKT_FENCE_HEADER_snp_mask 0x00000001
+#define SDMA_PKT_FENCE_HEADER_snp_shift 22
+#define SDMA_PKT_FENCE_HEADER_SNP(x) (((x) & SDMA_PKT_FENCE_HEADER_snp_mask) << SDMA_PKT_FENCE_HEADER_snp_shift)
+
+/*define for gpa field*/
+#define SDMA_PKT_FENCE_HEADER_gpa_offset 0
+#define SDMA_PKT_FENCE_HEADER_gpa_mask 0x00000001
+#define SDMA_PKT_FENCE_HEADER_gpa_shift 23
+#define SDMA_PKT_FENCE_HEADER_GPA(x) (((x) & SDMA_PKT_FENCE_HEADER_gpa_mask) << SDMA_PKT_FENCE_HEADER_gpa_shift)
+
+/*define for l2_policy field*/
+#define SDMA_PKT_FENCE_HEADER_l2_policy_offset 0
+#define SDMA_PKT_FENCE_HEADER_l2_policy_mask 0x00000003
+#define SDMA_PKT_FENCE_HEADER_l2_policy_shift 24
+#define SDMA_PKT_FENCE_HEADER_L2_POLICY(x) (((x) & SDMA_PKT_FENCE_HEADER_l2_policy_mask) << SDMA_PKT_FENCE_HEADER_l2_policy_shift)
+
+/*define for llc_policy field*/
+#define SDMA_PKT_FENCE_HEADER_llc_policy_offset 0
+#define SDMA_PKT_FENCE_HEADER_llc_policy_mask 0x00000001
+#define SDMA_PKT_FENCE_HEADER_llc_policy_shift 26
+#define SDMA_PKT_FENCE_HEADER_LLC_POLICY(x) (((x) & SDMA_PKT_FENCE_HEADER_llc_policy_mask) << SDMA_PKT_FENCE_HEADER_llc_policy_shift)
+
+/*define for cpv field*/
+#define SDMA_PKT_FENCE_HEADER_cpv_offset 0
+#define SDMA_PKT_FENCE_HEADER_cpv_mask 0x00000001
+#define SDMA_PKT_FENCE_HEADER_cpv_shift 28
+#define SDMA_PKT_FENCE_HEADER_CPV(x) (((x) & SDMA_PKT_FENCE_HEADER_cpv_mask) << SDMA_PKT_FENCE_HEADER_cpv_shift)
+
+/*define for ADDR_LO word*/
+/*define for addr_31_0 field*/
+#define SDMA_PKT_FENCE_ADDR_LO_addr_31_0_offset 1
+#define SDMA_PKT_FENCE_ADDR_LO_addr_31_0_mask 0xFFFFFFFF
+#define SDMA_PKT_FENCE_ADDR_LO_addr_31_0_shift 0
+#define SDMA_PKT_FENCE_ADDR_LO_ADDR_31_0(x) (((x) & SDMA_PKT_FENCE_ADDR_LO_addr_31_0_mask) << SDMA_PKT_FENCE_ADDR_LO_addr_31_0_shift)
+
+/*define for ADDR_HI word*/
+/*define for addr_63_32 field*/
+#define SDMA_PKT_FENCE_ADDR_HI_addr_63_32_offset 2
+#define SDMA_PKT_FENCE_ADDR_HI_addr_63_32_mask 0xFFFFFFFF
+#define SDMA_PKT_FENCE_ADDR_HI_addr_63_32_shift 0
+#define SDMA_PKT_FENCE_ADDR_HI_ADDR_63_32(x) (((x) & SDMA_PKT_FENCE_ADDR_HI_addr_63_32_mask) << SDMA_PKT_FENCE_ADDR_HI_addr_63_32_shift)
+
+/*define for DATA word*/
+/*define for data field*/
+#define SDMA_PKT_FENCE_DATA_data_offset 3
+#define SDMA_PKT_FENCE_DATA_data_mask 0xFFFFFFFF
+#define SDMA_PKT_FENCE_DATA_data_shift 0
+#define SDMA_PKT_FENCE_DATA_DATA(x) (((x) & SDMA_PKT_FENCE_DATA_data_mask) << SDMA_PKT_FENCE_DATA_data_shift)
+
+
+/*
+** Definitions for SDMA_PKT_SRBM_WRITE packet
+*/
+
+/*define for HEADER word*/
+/*define for op field*/
+#define SDMA_PKT_SRBM_WRITE_HEADER_op_offset 0
+#define SDMA_PKT_SRBM_WRITE_HEADER_op_mask 0x000000FF
+#define SDMA_PKT_SRBM_WRITE_HEADER_op_shift 0
+#define SDMA_PKT_SRBM_WRITE_HEADER_OP(x) (((x) & SDMA_PKT_SRBM_WRITE_HEADER_op_mask) << SDMA_PKT_SRBM_WRITE_HEADER_op_shift)
+
+/*define for sub_op field*/
+#define SDMA_PKT_SRBM_WRITE_HEADER_sub_op_offset 0
+#define SDMA_PKT_SRBM_WRITE_HEADER_sub_op_mask 0x000000FF
+#define SDMA_PKT_SRBM_WRITE_HEADER_sub_op_shift 8
+#define SDMA_PKT_SRBM_WRITE_HEADER_SUB_OP(x) (((x) & SDMA_PKT_SRBM_WRITE_HEADER_sub_op_mask) << SDMA_PKT_SRBM_WRITE_HEADER_sub_op_shift)
+
+/*define for byte_en field*/
+#define SDMA_PKT_SRBM_WRITE_HEADER_byte_en_offset 0
+#define SDMA_PKT_SRBM_WRITE_HEADER_byte_en_mask 0x0000000F
+#define SDMA_PKT_SRBM_WRITE_HEADER_byte_en_shift 28
+#define SDMA_PKT_SRBM_WRITE_HEADER_BYTE_EN(x) (((x) & SDMA_PKT_SRBM_WRITE_HEADER_byte_en_mask) << SDMA_PKT_SRBM_WRITE_HEADER_byte_en_shift)
+
+/*define for ADDR word*/
+/*define for addr field*/
+#define SDMA_PKT_SRBM_WRITE_ADDR_addr_offset 1
+#define SDMA_PKT_SRBM_WRITE_ADDR_addr_mask 0x0003FFFF
+#define SDMA_PKT_SRBM_WRITE_ADDR_addr_shift 0
+#define SDMA_PKT_SRBM_WRITE_ADDR_ADDR(x) (((x) & SDMA_PKT_SRBM_WRITE_ADDR_addr_mask) << SDMA_PKT_SRBM_WRITE_ADDR_addr_shift)
+
+/*define for apertureid field*/
+#define SDMA_PKT_SRBM_WRITE_ADDR_apertureid_offset 1
+#define SDMA_PKT_SRBM_WRITE_ADDR_apertureid_mask 0x00000FFF
+#define SDMA_PKT_SRBM_WRITE_ADDR_apertureid_shift 20
+#define SDMA_PKT_SRBM_WRITE_ADDR_APERTUREID(x) (((x) & SDMA_PKT_SRBM_WRITE_ADDR_apertureid_mask) << SDMA_PKT_SRBM_WRITE_ADDR_apertureid_shift)
+
+/*define for DATA word*/
+/*define for data field*/
+#define SDMA_PKT_SRBM_WRITE_DATA_data_offset 2
+#define SDMA_PKT_SRBM_WRITE_DATA_data_mask 0xFFFFFFFF
+#define SDMA_PKT_SRBM_WRITE_DATA_data_shift 0
+#define SDMA_PKT_SRBM_WRITE_DATA_DATA(x) (((x) & SDMA_PKT_SRBM_WRITE_DATA_data_mask) << SDMA_PKT_SRBM_WRITE_DATA_data_shift)
+
+
+/*
+** Definitions for SDMA_PKT_PRE_EXE packet
+*/
+
+/*define for HEADER word*/
+/*define for op field*/
+#define SDMA_PKT_PRE_EXE_HEADER_op_offset 0
+#define SDMA_PKT_PRE_EXE_HEADER_op_mask 0x000000FF
+#define SDMA_PKT_PRE_EXE_HEADER_op_shift 0
+#define SDMA_PKT_PRE_EXE_HEADER_OP(x) (((x) & SDMA_PKT_PRE_EXE_HEADER_op_mask) << SDMA_PKT_PRE_EXE_HEADER_op_shift)
+
+/*define for sub_op field*/
+#define SDMA_PKT_PRE_EXE_HEADER_sub_op_offset 0
+#define SDMA_PKT_PRE_EXE_HEADER_sub_op_mask 0x000000FF
+#define SDMA_PKT_PRE_EXE_HEADER_sub_op_shift 8
+#define SDMA_PKT_PRE_EXE_HEADER_SUB_OP(x) (((x) & SDMA_PKT_PRE_EXE_HEADER_sub_op_mask) << SDMA_PKT_PRE_EXE_HEADER_sub_op_shift)
+
+/*define for dev_sel field*/
+#define SDMA_PKT_PRE_EXE_HEADER_dev_sel_offset 0
+#define SDMA_PKT_PRE_EXE_HEADER_dev_sel_mask 0x000000FF
+#define SDMA_PKT_PRE_EXE_HEADER_dev_sel_shift 16
+#define SDMA_PKT_PRE_EXE_HEADER_DEV_SEL(x) (((x) & SDMA_PKT_PRE_EXE_HEADER_dev_sel_mask) << SDMA_PKT_PRE_EXE_HEADER_dev_sel_shift)
+
+/*define for EXEC_COUNT word*/
+/*define for exec_count field*/
+#define SDMA_PKT_PRE_EXE_EXEC_COUNT_exec_count_offset 1
+#define SDMA_PKT_PRE_EXE_EXEC_COUNT_exec_count_mask 0x00003FFF
+#define SDMA_PKT_PRE_EXE_EXEC_COUNT_exec_count_shift 0
+#define SDMA_PKT_PRE_EXE_EXEC_COUNT_EXEC_COUNT(x) (((x) & SDMA_PKT_PRE_EXE_EXEC_COUNT_exec_count_mask) << SDMA_PKT_PRE_EXE_EXEC_COUNT_exec_count_shift)
+
+
+/*
+** Definitions for SDMA_PKT_COND_EXE packet
+*/
+
+/*define for HEADER word*/
+/*define for op field*/
+#define SDMA_PKT_COND_EXE_HEADER_op_offset 0
+#define SDMA_PKT_COND_EXE_HEADER_op_mask 0x000000FF
+#define SDMA_PKT_COND_EXE_HEADER_op_shift 0
+#define SDMA_PKT_COND_EXE_HEADER_OP(x) (((x) & SDMA_PKT_COND_EXE_HEADER_op_mask) << SDMA_PKT_COND_EXE_HEADER_op_shift)
+
+/*define for sub_op field*/
+#define SDMA_PKT_COND_EXE_HEADER_sub_op_offset 0
+#define SDMA_PKT_COND_EXE_HEADER_sub_op_mask 0x000000FF
+#define SDMA_PKT_COND_EXE_HEADER_sub_op_shift 8
+#define SDMA_PKT_COND_EXE_HEADER_SUB_OP(x) (((x) & SDMA_PKT_COND_EXE_HEADER_sub_op_mask) << SDMA_PKT_COND_EXE_HEADER_sub_op_shift)
+
+/*define for cache_policy field*/
+#define SDMA_PKT_COND_EXE_HEADER_cache_policy_offset 0
+#define SDMA_PKT_COND_EXE_HEADER_cache_policy_mask 0x00000007
+#define SDMA_PKT_COND_EXE_HEADER_cache_policy_shift 24
+#define SDMA_PKT_COND_EXE_HEADER_CACHE_POLICY(x) (((x) & SDMA_PKT_COND_EXE_HEADER_cache_policy_mask) << SDMA_PKT_COND_EXE_HEADER_cache_policy_shift)
+
+/*define for cpv field*/
+#define SDMA_PKT_COND_EXE_HEADER_cpv_offset 0
+#define SDMA_PKT_COND_EXE_HEADER_cpv_mask 0x00000001
+#define SDMA_PKT_COND_EXE_HEADER_cpv_shift 28
+#define SDMA_PKT_COND_EXE_HEADER_CPV(x) (((x) & SDMA_PKT_COND_EXE_HEADER_cpv_mask) << SDMA_PKT_COND_EXE_HEADER_cpv_shift)
+
+/*define for ADDR_LO word*/
+/*define for addr_31_0 field*/
+#define SDMA_PKT_COND_EXE_ADDR_LO_addr_31_0_offset 1
+#define SDMA_PKT_COND_EXE_ADDR_LO_addr_31_0_mask 0xFFFFFFFF
+#define SDMA_PKT_COND_EXE_ADDR_LO_addr_31_0_shift 0
+#define SDMA_PKT_COND_EXE_ADDR_LO_ADDR_31_0(x) (((x) & SDMA_PKT_COND_EXE_ADDR_LO_addr_31_0_mask) << SDMA_PKT_COND_EXE_ADDR_LO_addr_31_0_shift)
+
+/*define for ADDR_HI word*/
+/*define for addr_63_32 field*/
+#define SDMA_PKT_COND_EXE_ADDR_HI_addr_63_32_offset 2
+#define SDMA_PKT_COND_EXE_ADDR_HI_addr_63_32_mask 0xFFFFFFFF
+#define SDMA_PKT_COND_EXE_ADDR_HI_addr_63_32_shift 0
+#define SDMA_PKT_COND_EXE_ADDR_HI_ADDR_63_32(x) (((x) & SDMA_PKT_COND_EXE_ADDR_HI_addr_63_32_mask) << SDMA_PKT_COND_EXE_ADDR_HI_addr_63_32_shift)
+
+/*define for REFERENCE word*/
+/*define for reference field*/
+#define SDMA_PKT_COND_EXE_REFERENCE_reference_offset 3
+#define SDMA_PKT_COND_EXE_REFERENCE_reference_mask 0xFFFFFFFF
+#define SDMA_PKT_COND_EXE_REFERENCE_reference_shift 0
+#define SDMA_PKT_COND_EXE_REFERENCE_REFERENCE(x) (((x) & SDMA_PKT_COND_EXE_REFERENCE_reference_mask) << SDMA_PKT_COND_EXE_REFERENCE_reference_shift)
+
+/*define for EXEC_COUNT word*/
+/*define for exec_count field*/
+#define SDMA_PKT_COND_EXE_EXEC_COUNT_exec_count_offset 4
+#define SDMA_PKT_COND_EXE_EXEC_COUNT_exec_count_mask 0x00003FFF
+#define SDMA_PKT_COND_EXE_EXEC_COUNT_exec_count_shift 0
+#define SDMA_PKT_COND_EXE_EXEC_COUNT_EXEC_COUNT(x) (((x) & SDMA_PKT_COND_EXE_EXEC_COUNT_exec_count_mask) << SDMA_PKT_COND_EXE_EXEC_COUNT_exec_count_shift)
+
+
+/*
+** Definitions for SDMA_PKT_CONSTANT_FILL packet
+*/
+
+/*define for HEADER word*/
+/*define for op field*/
+#define SDMA_PKT_CONSTANT_FILL_HEADER_op_offset 0
+#define SDMA_PKT_CONSTANT_FILL_HEADER_op_mask 0x000000FF
+#define SDMA_PKT_CONSTANT_FILL_HEADER_op_shift 0
+#define SDMA_PKT_CONSTANT_FILL_HEADER_OP(x) (((x) & SDMA_PKT_CONSTANT_FILL_HEADER_op_mask) << SDMA_PKT_CONSTANT_FILL_HEADER_op_shift)
+
+/*define for sub_op field*/
+#define SDMA_PKT_CONSTANT_FILL_HEADER_sub_op_offset 0
+#define SDMA_PKT_CONSTANT_FILL_HEADER_sub_op_mask 0x000000FF
+#define SDMA_PKT_CONSTANT_FILL_HEADER_sub_op_shift 8
+#define SDMA_PKT_CONSTANT_FILL_HEADER_SUB_OP(x) (((x) & SDMA_PKT_CONSTANT_FILL_HEADER_sub_op_mask) << SDMA_PKT_CONSTANT_FILL_HEADER_sub_op_shift)
+
+/*define for sw field*/
+#define SDMA_PKT_CONSTANT_FILL_HEADER_sw_offset 0
+#define SDMA_PKT_CONSTANT_FILL_HEADER_sw_mask 0x00000003
+#define SDMA_PKT_CONSTANT_FILL_HEADER_sw_shift 16
+#define SDMA_PKT_CONSTANT_FILL_HEADER_SW(x) (((x) & SDMA_PKT_CONSTANT_FILL_HEADER_sw_mask) << SDMA_PKT_CONSTANT_FILL_HEADER_sw_shift)
+
+/*define for cache_policy field*/
+#define SDMA_PKT_CONSTANT_FILL_HEADER_cache_policy_offset 0
+#define SDMA_PKT_CONSTANT_FILL_HEADER_cache_policy_mask 0x00000007
+#define SDMA_PKT_CONSTANT_FILL_HEADER_cache_policy_shift 24
+#define SDMA_PKT_CONSTANT_FILL_HEADER_CACHE_POLICY(x) (((x) & SDMA_PKT_CONSTANT_FILL_HEADER_cache_policy_mask) << SDMA_PKT_CONSTANT_FILL_HEADER_cache_policy_shift)
+
+/*define for cpv field*/
+#define SDMA_PKT_CONSTANT_FILL_HEADER_cpv_offset 0
+#define SDMA_PKT_CONSTANT_FILL_HEADER_cpv_mask 0x00000001
+#define SDMA_PKT_CONSTANT_FILL_HEADER_cpv_shift 28
+#define SDMA_PKT_CONSTANT_FILL_HEADER_CPV(x) (((x) & SDMA_PKT_CONSTANT_FILL_HEADER_cpv_mask) << SDMA_PKT_CONSTANT_FILL_HEADER_cpv_shift)
+
+/*define for fillsize field*/
+#define SDMA_PKT_CONSTANT_FILL_HEADER_fillsize_offset 0
+#define SDMA_PKT_CONSTANT_FILL_HEADER_fillsize_mask 0x00000003
+#define SDMA_PKT_CONSTANT_FILL_HEADER_fillsize_shift 30
+#define SDMA_PKT_CONSTANT_FILL_HEADER_FILLSIZE(x) (((x) & SDMA_PKT_CONSTANT_FILL_HEADER_fillsize_mask) << SDMA_PKT_CONSTANT_FILL_HEADER_fillsize_shift)
+
+/*define for DST_ADDR_LO word*/
+/*define for dst_addr_31_0 field*/
+#define SDMA_PKT_CONSTANT_FILL_DST_ADDR_LO_dst_addr_31_0_offset 1
+#define SDMA_PKT_CONSTANT_FILL_DST_ADDR_LO_dst_addr_31_0_mask 0xFFFFFFFF
+#define SDMA_PKT_CONSTANT_FILL_DST_ADDR_LO_dst_addr_31_0_shift 0
+#define SDMA_PKT_CONSTANT_FILL_DST_ADDR_LO_DST_ADDR_31_0(x) (((x) & SDMA_PKT_CONSTANT_FILL_DST_ADDR_LO_dst_addr_31_0_mask) << SDMA_PKT_CONSTANT_FILL_DST_ADDR_LO_dst_addr_31_0_shift)
+
+/*define for DST_ADDR_HI word*/
+/*define for dst_addr_63_32 field*/
+#define SDMA_PKT_CONSTANT_FILL_DST_ADDR_HI_dst_addr_63_32_offset 2
+#define SDMA_PKT_CONSTANT_FILL_DST_ADDR_HI_dst_addr_63_32_mask 0xFFFFFFFF
+#define SDMA_PKT_CONSTANT_FILL_DST_ADDR_HI_dst_addr_63_32_shift 0
+#define SDMA_PKT_CONSTANT_FILL_DST_ADDR_HI_DST_ADDR_63_32(x) (((x) & SDMA_PKT_CONSTANT_FILL_DST_ADDR_HI_dst_addr_63_32_mask) << SDMA_PKT_CONSTANT_FILL_DST_ADDR_HI_dst_addr_63_32_shift)
+
+/*define for DATA word*/
+/*define for src_data_31_0 field*/
+#define SDMA_PKT_CONSTANT_FILL_DATA_src_data_31_0_offset 3
+#define SDMA_PKT_CONSTANT_FILL_DATA_src_data_31_0_mask 0xFFFFFFFF
+#define SDMA_PKT_CONSTANT_FILL_DATA_src_data_31_0_shift 0
+#define SDMA_PKT_CONSTANT_FILL_DATA_SRC_DATA_31_0(x) (((x) & SDMA_PKT_CONSTANT_FILL_DATA_src_data_31_0_mask) << SDMA_PKT_CONSTANT_FILL_DATA_src_data_31_0_shift)
+
+/*define for COUNT word*/
+/*define for count field*/
+#define SDMA_PKT_CONSTANT_FILL_COUNT_count_offset 4
+#define SDMA_PKT_CONSTANT_FILL_COUNT_count_mask 0x3FFFFFFF
+#define SDMA_PKT_CONSTANT_FILL_COUNT_count_shift 0
+#define SDMA_PKT_CONSTANT_FILL_COUNT_COUNT(x) (((x) & SDMA_PKT_CONSTANT_FILL_COUNT_count_mask) << SDMA_PKT_CONSTANT_FILL_COUNT_count_shift)
+
+
+/*
+** Definitions for SDMA_PKT_DATA_FILL_MULTI packet
+*/
+
+/*define for HEADER word*/
+/*define for op field*/
+#define SDMA_PKT_DATA_FILL_MULTI_HEADER_op_offset 0
+#define SDMA_PKT_DATA_FILL_MULTI_HEADER_op_mask 0x000000FF
+#define SDMA_PKT_DATA_FILL_MULTI_HEADER_op_shift 0
+#define SDMA_PKT_DATA_FILL_MULTI_HEADER_OP(x) (((x) & SDMA_PKT_DATA_FILL_MULTI_HEADER_op_mask) << SDMA_PKT_DATA_FILL_MULTI_HEADER_op_shift)
+
+/*define for sub_op field*/
+#define SDMA_PKT_DATA_FILL_MULTI_HEADER_sub_op_offset 0
+#define SDMA_PKT_DATA_FILL_MULTI_HEADER_sub_op_mask 0x000000FF
+#define SDMA_PKT_DATA_FILL_MULTI_HEADER_sub_op_shift 8
+#define SDMA_PKT_DATA_FILL_MULTI_HEADER_SUB_OP(x) (((x) & SDMA_PKT_DATA_FILL_MULTI_HEADER_sub_op_mask) << SDMA_PKT_DATA_FILL_MULTI_HEADER_sub_op_shift)
+
+/*define for cache_policy field*/
+#define SDMA_PKT_DATA_FILL_MULTI_HEADER_cache_policy_offset 0
+#define SDMA_PKT_DATA_FILL_MULTI_HEADER_cache_policy_mask 0x00000007
+#define SDMA_PKT_DATA_FILL_MULTI_HEADER_cache_policy_shift 24
+#define SDMA_PKT_DATA_FILL_MULTI_HEADER_CACHE_POLICY(x) (((x) & SDMA_PKT_DATA_FILL_MULTI_HEADER_cache_policy_mask) << SDMA_PKT_DATA_FILL_MULTI_HEADER_cache_policy_shift)
+
+/*define for cpv field*/
+#define SDMA_PKT_DATA_FILL_MULTI_HEADER_cpv_offset 0
+#define SDMA_PKT_DATA_FILL_MULTI_HEADER_cpv_mask 0x00000001
+#define SDMA_PKT_DATA_FILL_MULTI_HEADER_cpv_shift 28
+#define SDMA_PKT_DATA_FILL_MULTI_HEADER_CPV(x) (((x) & SDMA_PKT_DATA_FILL_MULTI_HEADER_cpv_mask) << SDMA_PKT_DATA_FILL_MULTI_HEADER_cpv_shift)
+
+/*define for memlog_clr field*/
+#define SDMA_PKT_DATA_FILL_MULTI_HEADER_memlog_clr_offset 0
+#define SDMA_PKT_DATA_FILL_MULTI_HEADER_memlog_clr_mask 0x00000001
+#define SDMA_PKT_DATA_FILL_MULTI_HEADER_memlog_clr_shift 31
+#define SDMA_PKT_DATA_FILL_MULTI_HEADER_MEMLOG_CLR(x) (((x) & SDMA_PKT_DATA_FILL_MULTI_HEADER_memlog_clr_mask) << SDMA_PKT_DATA_FILL_MULTI_HEADER_memlog_clr_shift)
+
+/*define for BYTE_STRIDE word*/
+/*define for byte_stride field*/
+#define SDMA_PKT_DATA_FILL_MULTI_BYTE_STRIDE_byte_stride_offset 1
+#define SDMA_PKT_DATA_FILL_MULTI_BYTE_STRIDE_byte_stride_mask 0xFFFFFFFF
+#define SDMA_PKT_DATA_FILL_MULTI_BYTE_STRIDE_byte_stride_shift 0
+#define SDMA_PKT_DATA_FILL_MULTI_BYTE_STRIDE_BYTE_STRIDE(x) (((x) & SDMA_PKT_DATA_FILL_MULTI_BYTE_STRIDE_byte_stride_mask) << SDMA_PKT_DATA_FILL_MULTI_BYTE_STRIDE_byte_stride_shift)
+
+/*define for DMA_COUNT word*/
+/*define for dma_count field*/
+#define SDMA_PKT_DATA_FILL_MULTI_DMA_COUNT_dma_count_offset 2
+#define SDMA_PKT_DATA_FILL_MULTI_DMA_COUNT_dma_count_mask 0xFFFFFFFF
+#define SDMA_PKT_DATA_FILL_MULTI_DMA_COUNT_dma_count_shift 0
+#define SDMA_PKT_DATA_FILL_MULTI_DMA_COUNT_DMA_COUNT(x) (((x) & SDMA_PKT_DATA_FILL_MULTI_DMA_COUNT_dma_count_mask) << SDMA_PKT_DATA_FILL_MULTI_DMA_COUNT_dma_count_shift)
+
+/*define for DST_ADDR_LO word*/
+/*define for dst_addr_31_0 field*/
+#define SDMA_PKT_DATA_FILL_MULTI_DST_ADDR_LO_dst_addr_31_0_offset 3
+#define SDMA_PKT_DATA_FILL_MULTI_DST_ADDR_LO_dst_addr_31_0_mask 0xFFFFFFFF
+#define SDMA_PKT_DATA_FILL_MULTI_DST_ADDR_LO_dst_addr_31_0_shift 0
+#define SDMA_PKT_DATA_FILL_MULTI_DST_ADDR_LO_DST_ADDR_31_0(x) (((x) & SDMA_PKT_DATA_FILL_MULTI_DST_ADDR_LO_dst_addr_31_0_mask) << SDMA_PKT_DATA_FILL_MULTI_DST_ADDR_LO_dst_addr_31_0_shift)
+
+/*define for DST_ADDR_HI word*/
+/*define for dst_addr_63_32 field*/
+#define SDMA_PKT_DATA_FILL_MULTI_DST_ADDR_HI_dst_addr_63_32_offset 4
+#define SDMA_PKT_DATA_FILL_MULTI_DST_ADDR_HI_dst_addr_63_32_mask 0xFFFFFFFF
+#define SDMA_PKT_DATA_FILL_MULTI_DST_ADDR_HI_dst_addr_63_32_shift 0
+#define SDMA_PKT_DATA_FILL_MULTI_DST_ADDR_HI_DST_ADDR_63_32(x) (((x) & SDMA_PKT_DATA_FILL_MULTI_DST_ADDR_HI_dst_addr_63_32_mask) << SDMA_PKT_DATA_FILL_MULTI_DST_ADDR_HI_dst_addr_63_32_shift)
+
+/*define for BYTE_COUNT word*/
+/*define for count field*/
+#define SDMA_PKT_DATA_FILL_MULTI_BYTE_COUNT_count_offset 5
+#define SDMA_PKT_DATA_FILL_MULTI_BYTE_COUNT_count_mask 0x03FFFFFF
+#define SDMA_PKT_DATA_FILL_MULTI_BYTE_COUNT_count_shift 0
+#define SDMA_PKT_DATA_FILL_MULTI_BYTE_COUNT_COUNT(x) (((x) & SDMA_PKT_DATA_FILL_MULTI_BYTE_COUNT_count_mask) << SDMA_PKT_DATA_FILL_MULTI_BYTE_COUNT_count_shift)
+
+
+/*
+** Definitions for SDMA_PKT_POLL_REGMEM packet
+*/
+
+/*define for HEADER word*/
+/*define for op field*/
+#define SDMA_PKT_POLL_REGMEM_HEADER_op_offset 0
+#define SDMA_PKT_POLL_REGMEM_HEADER_op_mask 0x000000FF
+#define SDMA_PKT_POLL_REGMEM_HEADER_op_shift 0
+#define SDMA_PKT_POLL_REGMEM_HEADER_OP(x) (((x) & SDMA_PKT_POLL_REGMEM_HEADER_op_mask) << SDMA_PKT_POLL_REGMEM_HEADER_op_shift)
+
+/*define for sub_op field*/
+#define SDMA_PKT_POLL_REGMEM_HEADER_sub_op_offset 0
+#define SDMA_PKT_POLL_REGMEM_HEADER_sub_op_mask 0x000000FF
+#define SDMA_PKT_POLL_REGMEM_HEADER_sub_op_shift 8
+#define SDMA_PKT_POLL_REGMEM_HEADER_SUB_OP(x) (((x) & SDMA_PKT_POLL_REGMEM_HEADER_sub_op_mask) << SDMA_PKT_POLL_REGMEM_HEADER_sub_op_shift)
+
+/*define for cache_policy field*/
+#define SDMA_PKT_POLL_REGMEM_HEADER_cache_policy_offset 0
+#define SDMA_PKT_POLL_REGMEM_HEADER_cache_policy_mask 0x00000007
+#define SDMA_PKT_POLL_REGMEM_HEADER_cache_policy_shift 20
+#define SDMA_PKT_POLL_REGMEM_HEADER_CACHE_POLICY(x) (((x) & SDMA_PKT_POLL_REGMEM_HEADER_cache_policy_mask) << SDMA_PKT_POLL_REGMEM_HEADER_cache_policy_shift)
+
+/*define for cpv field*/
+#define SDMA_PKT_POLL_REGMEM_HEADER_cpv_offset 0
+#define SDMA_PKT_POLL_REGMEM_HEADER_cpv_mask 0x00000001
+#define SDMA_PKT_POLL_REGMEM_HEADER_cpv_shift 24
+#define SDMA_PKT_POLL_REGMEM_HEADER_CPV(x) (((x) & SDMA_PKT_POLL_REGMEM_HEADER_cpv_mask) << SDMA_PKT_POLL_REGMEM_HEADER_cpv_shift)
+
+/*define for hdp_flush field*/
+#define SDMA_PKT_POLL_REGMEM_HEADER_hdp_flush_offset 0
+#define SDMA_PKT_POLL_REGMEM_HEADER_hdp_flush_mask 0x00000001
+#define SDMA_PKT_POLL_REGMEM_HEADER_hdp_flush_shift 26
+#define SDMA_PKT_POLL_REGMEM_HEADER_HDP_FLUSH(x) (((x) & SDMA_PKT_POLL_REGMEM_HEADER_hdp_flush_mask) << SDMA_PKT_POLL_REGMEM_HEADER_hdp_flush_shift)
+
+/*define for func field*/
+#define SDMA_PKT_POLL_REGMEM_HEADER_func_offset 0
+#define SDMA_PKT_POLL_REGMEM_HEADER_func_mask 0x00000007
+#define SDMA_PKT_POLL_REGMEM_HEADER_func_shift 28
+#define SDMA_PKT_POLL_REGMEM_HEADER_FUNC(x) (((x) & SDMA_PKT_POLL_REGMEM_HEADER_func_mask) << SDMA_PKT_POLL_REGMEM_HEADER_func_shift)
+
+/*define for mem_poll field*/
+#define SDMA_PKT_POLL_REGMEM_HEADER_mem_poll_offset 0
+#define SDMA_PKT_POLL_REGMEM_HEADER_mem_poll_mask 0x00000001
+#define SDMA_PKT_POLL_REGMEM_HEADER_mem_poll_shift 31
+#define SDMA_PKT_POLL_REGMEM_HEADER_MEM_POLL(x) (((x) & SDMA_PKT_POLL_REGMEM_HEADER_mem_poll_mask) << SDMA_PKT_POLL_REGMEM_HEADER_mem_poll_shift)
+
+/*define for ADDR_LO word*/
+/*define for addr_31_0 field*/
+#define SDMA_PKT_POLL_REGMEM_ADDR_LO_addr_31_0_offset 1
+#define SDMA_PKT_POLL_REGMEM_ADDR_LO_addr_31_0_mask 0xFFFFFFFF
+#define SDMA_PKT_POLL_REGMEM_ADDR_LO_addr_31_0_shift 0
+#define SDMA_PKT_POLL_REGMEM_ADDR_LO_ADDR_31_0(x) (((x) & SDMA_PKT_POLL_REGMEM_ADDR_LO_addr_31_0_mask) << SDMA_PKT_POLL_REGMEM_ADDR_LO_addr_31_0_shift)
+
+/*define for ADDR_HI word*/
+/*define for addr_63_32 field*/
+#define SDMA_PKT_POLL_REGMEM_ADDR_HI_addr_63_32_offset 2
+#define SDMA_PKT_POLL_REGMEM_ADDR_HI_addr_63_32_mask 0xFFFFFFFF
+#define SDMA_PKT_POLL_REGMEM_ADDR_HI_addr_63_32_shift 0
+#define SDMA_PKT_POLL_REGMEM_ADDR_HI_ADDR_63_32(x) (((x) & SDMA_PKT_POLL_REGMEM_ADDR_HI_addr_63_32_mask) << SDMA_PKT_POLL_REGMEM_ADDR_HI_addr_63_32_shift)
+
+/*define for VALUE word*/
+/*define for value field*/
+#define SDMA_PKT_POLL_REGMEM_VALUE_value_offset 3
+#define SDMA_PKT_POLL_REGMEM_VALUE_value_mask 0xFFFFFFFF
+#define SDMA_PKT_POLL_REGMEM_VALUE_value_shift 0
+#define SDMA_PKT_POLL_REGMEM_VALUE_VALUE(x) (((x) & SDMA_PKT_POLL_REGMEM_VALUE_value_mask) << SDMA_PKT_POLL_REGMEM_VALUE_value_shift)
+
+/*define for MASK word*/
+/*define for mask field*/
+#define SDMA_PKT_POLL_REGMEM_MASK_mask_offset 4
+#define SDMA_PKT_POLL_REGMEM_MASK_mask_mask 0xFFFFFFFF
+#define SDMA_PKT_POLL_REGMEM_MASK_mask_shift 0
+#define SDMA_PKT_POLL_REGMEM_MASK_MASK(x) (((x) & SDMA_PKT_POLL_REGMEM_MASK_mask_mask) << SDMA_PKT_POLL_REGMEM_MASK_mask_shift)
+
+/*define for DW5 word*/
+/*define for interval field*/
+#define SDMA_PKT_POLL_REGMEM_DW5_interval_offset 5
+#define SDMA_PKT_POLL_REGMEM_DW5_interval_mask 0x0000FFFF
+#define SDMA_PKT_POLL_REGMEM_DW5_interval_shift 0
+#define SDMA_PKT_POLL_REGMEM_DW5_INTERVAL(x) (((x) & SDMA_PKT_POLL_REGMEM_DW5_interval_mask) << SDMA_PKT_POLL_REGMEM_DW5_interval_shift)
+
+/*define for retry_count field*/
+#define SDMA_PKT_POLL_REGMEM_DW5_retry_count_offset 5
+#define SDMA_PKT_POLL_REGMEM_DW5_retry_count_mask 0x00000FFF
+#define SDMA_PKT_POLL_REGMEM_DW5_retry_count_shift 16
+#define SDMA_PKT_POLL_REGMEM_DW5_RETRY_COUNT(x) (((x) & SDMA_PKT_POLL_REGMEM_DW5_retry_count_mask) << SDMA_PKT_POLL_REGMEM_DW5_retry_count_shift)
+
+
+/*
+** Definitions for SDMA_PKT_POLL_REG_WRITE_MEM packet
+*/
+
+/*define for HEADER word*/
+/*define for op field*/
+#define SDMA_PKT_POLL_REG_WRITE_MEM_HEADER_op_offset 0
+#define SDMA_PKT_POLL_REG_WRITE_MEM_HEADER_op_mask 0x000000FF
+#define SDMA_PKT_POLL_REG_WRITE_MEM_HEADER_op_shift 0
+#define SDMA_PKT_POLL_REG_WRITE_MEM_HEADER_OP(x) (((x) & SDMA_PKT_POLL_REG_WRITE_MEM_HEADER_op_mask) << SDMA_PKT_POLL_REG_WRITE_MEM_HEADER_op_shift)
+
+/*define for sub_op field*/
+#define SDMA_PKT_POLL_REG_WRITE_MEM_HEADER_sub_op_offset 0
+#define SDMA_PKT_POLL_REG_WRITE_MEM_HEADER_sub_op_mask 0x000000FF
+#define SDMA_PKT_POLL_REG_WRITE_MEM_HEADER_sub_op_shift 8
+#define SDMA_PKT_POLL_REG_WRITE_MEM_HEADER_SUB_OP(x) (((x) & SDMA_PKT_POLL_REG_WRITE_MEM_HEADER_sub_op_mask) << SDMA_PKT_POLL_REG_WRITE_MEM_HEADER_sub_op_shift)
+
+/*define for cache_policy field*/
+#define SDMA_PKT_POLL_REG_WRITE_MEM_HEADER_cache_policy_offset 0
+#define SDMA_PKT_POLL_REG_WRITE_MEM_HEADER_cache_policy_mask 0x00000007
+#define SDMA_PKT_POLL_REG_WRITE_MEM_HEADER_cache_policy_shift 24
+#define SDMA_PKT_POLL_REG_WRITE_MEM_HEADER_CACHE_POLICY(x) (((x) & SDMA_PKT_POLL_REG_WRITE_MEM_HEADER_cache_policy_mask) << SDMA_PKT_POLL_REG_WRITE_MEM_HEADER_cache_policy_shift)
+
+/*define for cpv field*/
+#define SDMA_PKT_POLL_REG_WRITE_MEM_HEADER_cpv_offset 0
+#define SDMA_PKT_POLL_REG_WRITE_MEM_HEADER_cpv_mask 0x00000001
+#define SDMA_PKT_POLL_REG_WRITE_MEM_HEADER_cpv_shift 28
+#define SDMA_PKT_POLL_REG_WRITE_MEM_HEADER_CPV(x) (((x) & SDMA_PKT_POLL_REG_WRITE_MEM_HEADER_cpv_mask) << SDMA_PKT_POLL_REG_WRITE_MEM_HEADER_cpv_shift)
+
+/*define for SRC_ADDR word*/
+/*define for addr_31_2 field*/
+#define SDMA_PKT_POLL_REG_WRITE_MEM_SRC_ADDR_addr_31_2_offset 1
+#define SDMA_PKT_POLL_REG_WRITE_MEM_SRC_ADDR_addr_31_2_mask 0x3FFFFFFF
+#define SDMA_PKT_POLL_REG_WRITE_MEM_SRC_ADDR_addr_31_2_shift 2
+#define SDMA_PKT_POLL_REG_WRITE_MEM_SRC_ADDR_ADDR_31_2(x) (((x) & SDMA_PKT_POLL_REG_WRITE_MEM_SRC_ADDR_addr_31_2_mask) << SDMA_PKT_POLL_REG_WRITE_MEM_SRC_ADDR_addr_31_2_shift)
+
+/*define for DST_ADDR_LO word*/
+/*define for addr_31_0 field*/
+#define SDMA_PKT_POLL_REG_WRITE_MEM_DST_ADDR_LO_addr_31_0_offset 2
+#define SDMA_PKT_POLL_REG_WRITE_MEM_DST_ADDR_LO_addr_31_0_mask 0xFFFFFFFF
+#define SDMA_PKT_POLL_REG_WRITE_MEM_DST_ADDR_LO_addr_31_0_shift 0
+#define SDMA_PKT_POLL_REG_WRITE_MEM_DST_ADDR_LO_ADDR_31_0(x) (((x) & SDMA_PKT_POLL_REG_WRITE_MEM_DST_ADDR_LO_addr_31_0_mask) << SDMA_PKT_POLL_REG_WRITE_MEM_DST_ADDR_LO_addr_31_0_shift)
+
+/*define for DST_ADDR_HI word*/
+/*define for addr_63_32 field*/
+#define SDMA_PKT_POLL_REG_WRITE_MEM_DST_ADDR_HI_addr_63_32_offset 3
+#define SDMA_PKT_POLL_REG_WRITE_MEM_DST_ADDR_HI_addr_63_32_mask 0xFFFFFFFF
+#define SDMA_PKT_POLL_REG_WRITE_MEM_DST_ADDR_HI_addr_63_32_shift 0
+#define SDMA_PKT_POLL_REG_WRITE_MEM_DST_ADDR_HI_ADDR_63_32(x) (((x) & SDMA_PKT_POLL_REG_WRITE_MEM_DST_ADDR_HI_addr_63_32_mask) << SDMA_PKT_POLL_REG_WRITE_MEM_DST_ADDR_HI_addr_63_32_shift)
+
+
+/*
+** Definitions for SDMA_PKT_POLL_DBIT_WRITE_MEM packet
+*/
+
+/*define for HEADER word*/
+/*define for op field*/
+#define SDMA_PKT_POLL_DBIT_WRITE_MEM_HEADER_op_offset 0
+#define SDMA_PKT_POLL_DBIT_WRITE_MEM_HEADER_op_mask 0x000000FF
+#define SDMA_PKT_POLL_DBIT_WRITE_MEM_HEADER_op_shift 0
+#define SDMA_PKT_POLL_DBIT_WRITE_MEM_HEADER_OP(x) (((x) & SDMA_PKT_POLL_DBIT_WRITE_MEM_HEADER_op_mask) << SDMA_PKT_POLL_DBIT_WRITE_MEM_HEADER_op_shift)
+
+/*define for sub_op field*/
+#define SDMA_PKT_POLL_DBIT_WRITE_MEM_HEADER_sub_op_offset 0
+#define SDMA_PKT_POLL_DBIT_WRITE_MEM_HEADER_sub_op_mask 0x000000FF
+#define SDMA_PKT_POLL_DBIT_WRITE_MEM_HEADER_sub_op_shift 8
+#define SDMA_PKT_POLL_DBIT_WRITE_MEM_HEADER_SUB_OP(x) (((x) & SDMA_PKT_POLL_DBIT_WRITE_MEM_HEADER_sub_op_mask) << SDMA_PKT_POLL_DBIT_WRITE_MEM_HEADER_sub_op_shift)
+
+/*define for ea field*/
+#define SDMA_PKT_POLL_DBIT_WRITE_MEM_HEADER_ea_offset 0
+#define SDMA_PKT_POLL_DBIT_WRITE_MEM_HEADER_ea_mask 0x00000003
+#define SDMA_PKT_POLL_DBIT_WRITE_MEM_HEADER_ea_shift 16
+#define SDMA_PKT_POLL_DBIT_WRITE_MEM_HEADER_EA(x) (((x) & SDMA_PKT_POLL_DBIT_WRITE_MEM_HEADER_ea_mask) << SDMA_PKT_POLL_DBIT_WRITE_MEM_HEADER_ea_shift)
+
+/*define for cache_policy field*/
+#define SDMA_PKT_POLL_DBIT_WRITE_MEM_HEADER_cache_policy_offset 0
+#define SDMA_PKT_POLL_DBIT_WRITE_MEM_HEADER_cache_policy_mask 0x00000007
+#define SDMA_PKT_POLL_DBIT_WRITE_MEM_HEADER_cache_policy_shift 24
+#define SDMA_PKT_POLL_DBIT_WRITE_MEM_HEADER_CACHE_POLICY(x) (((x) & SDMA_PKT_POLL_DBIT_WRITE_MEM_HEADER_cache_policy_mask) << SDMA_PKT_POLL_DBIT_WRITE_MEM_HEADER_cache_policy_shift)
+
+/*define for cpv field*/
+#define SDMA_PKT_POLL_DBIT_WRITE_MEM_HEADER_cpv_offset 0
+#define SDMA_PKT_POLL_DBIT_WRITE_MEM_HEADER_cpv_mask 0x00000001
+#define SDMA_PKT_POLL_DBIT_WRITE_MEM_HEADER_cpv_shift 28
+#define SDMA_PKT_POLL_DBIT_WRITE_MEM_HEADER_CPV(x) (((x) & SDMA_PKT_POLL_DBIT_WRITE_MEM_HEADER_cpv_mask) << SDMA_PKT_POLL_DBIT_WRITE_MEM_HEADER_cpv_shift)
+
+/*define for DST_ADDR_LO word*/
+/*define for addr_31_0 field*/
+#define SDMA_PKT_POLL_DBIT_WRITE_MEM_DST_ADDR_LO_addr_31_0_offset 1
+#define SDMA_PKT_POLL_DBIT_WRITE_MEM_DST_ADDR_LO_addr_31_0_mask 0xFFFFFFFF
+#define SDMA_PKT_POLL_DBIT_WRITE_MEM_DST_ADDR_LO_addr_31_0_shift 0
+#define SDMA_PKT_POLL_DBIT_WRITE_MEM_DST_ADDR_LO_ADDR_31_0(x) (((x) & SDMA_PKT_POLL_DBIT_WRITE_MEM_DST_ADDR_LO_addr_31_0_mask) << SDMA_PKT_POLL_DBIT_WRITE_MEM_DST_ADDR_LO_addr_31_0_shift)
+
+/*define for DST_ADDR_HI word*/
+/*define for addr_63_32 field*/
+#define SDMA_PKT_POLL_DBIT_WRITE_MEM_DST_ADDR_HI_addr_63_32_offset 2
+#define SDMA_PKT_POLL_DBIT_WRITE_MEM_DST_ADDR_HI_addr_63_32_mask 0xFFFFFFFF
+#define SDMA_PKT_POLL_DBIT_WRITE_MEM_DST_ADDR_HI_addr_63_32_shift 0
+#define SDMA_PKT_POLL_DBIT_WRITE_MEM_DST_ADDR_HI_ADDR_63_32(x) (((x) & SDMA_PKT_POLL_DBIT_WRITE_MEM_DST_ADDR_HI_addr_63_32_mask) << SDMA_PKT_POLL_DBIT_WRITE_MEM_DST_ADDR_HI_addr_63_32_shift)
+
+/*define for START_PAGE word*/
+/*define for addr_31_4 field*/
+#define SDMA_PKT_POLL_DBIT_WRITE_MEM_START_PAGE_addr_31_4_offset 3
+#define SDMA_PKT_POLL_DBIT_WRITE_MEM_START_PAGE_addr_31_4_mask 0x0FFFFFFF
+#define SDMA_PKT_POLL_DBIT_WRITE_MEM_START_PAGE_addr_31_4_shift 4
+#define SDMA_PKT_POLL_DBIT_WRITE_MEM_START_PAGE_ADDR_31_4(x) (((x) & SDMA_PKT_POLL_DBIT_WRITE_MEM_START_PAGE_addr_31_4_mask) << SDMA_PKT_POLL_DBIT_WRITE_MEM_START_PAGE_addr_31_4_shift)
+
+/*define for PAGE_NUM word*/
+/*define for page_num_31_0 field*/
+#define SDMA_PKT_POLL_DBIT_WRITE_MEM_PAGE_NUM_page_num_31_0_offset 4
+#define SDMA_PKT_POLL_DBIT_WRITE_MEM_PAGE_NUM_page_num_31_0_mask 0xFFFFFFFF
+#define SDMA_PKT_POLL_DBIT_WRITE_MEM_PAGE_NUM_page_num_31_0_shift 0
+#define SDMA_PKT_POLL_DBIT_WRITE_MEM_PAGE_NUM_PAGE_NUM_31_0(x) (((x) & SDMA_PKT_POLL_DBIT_WRITE_MEM_PAGE_NUM_page_num_31_0_mask) << SDMA_PKT_POLL_DBIT_WRITE_MEM_PAGE_NUM_page_num_31_0_shift)
+
+
+/*
+** Definitions for SDMA_PKT_POLL_MEM_VERIFY packet
+*/
+
+/*define for HEADER word*/
+/*define for op field*/
+#define SDMA_PKT_POLL_MEM_VERIFY_HEADER_op_offset 0
+#define SDMA_PKT_POLL_MEM_VERIFY_HEADER_op_mask 0x000000FF
+#define SDMA_PKT_POLL_MEM_VERIFY_HEADER_op_shift 0
+#define SDMA_PKT_POLL_MEM_VERIFY_HEADER_OP(x) (((x) & SDMA_PKT_POLL_MEM_VERIFY_HEADER_op_mask) << SDMA_PKT_POLL_MEM_VERIFY_HEADER_op_shift)
+
+/*define for sub_op field*/
+#define SDMA_PKT_POLL_MEM_VERIFY_HEADER_sub_op_offset 0
+#define SDMA_PKT_POLL_MEM_VERIFY_HEADER_sub_op_mask 0x000000FF
+#define SDMA_PKT_POLL_MEM_VERIFY_HEADER_sub_op_shift 8
+#define SDMA_PKT_POLL_MEM_VERIFY_HEADER_SUB_OP(x) (((x) & SDMA_PKT_POLL_MEM_VERIFY_HEADER_sub_op_mask) << SDMA_PKT_POLL_MEM_VERIFY_HEADER_sub_op_shift)
+
+/*define for cache_policy field*/
+#define SDMA_PKT_POLL_MEM_VERIFY_HEADER_cache_policy_offset 0
+#define SDMA_PKT_POLL_MEM_VERIFY_HEADER_cache_policy_mask 0x00000007
+#define SDMA_PKT_POLL_MEM_VERIFY_HEADER_cache_policy_shift 24
+#define SDMA_PKT_POLL_MEM_VERIFY_HEADER_CACHE_POLICY(x) (((x) & SDMA_PKT_POLL_MEM_VERIFY_HEADER_cache_policy_mask) << SDMA_PKT_POLL_MEM_VERIFY_HEADER_cache_policy_shift)
+
+/*define for cpv field*/
+#define SDMA_PKT_POLL_MEM_VERIFY_HEADER_cpv_offset 0
+#define SDMA_PKT_POLL_MEM_VERIFY_HEADER_cpv_mask 0x00000001
+#define SDMA_PKT_POLL_MEM_VERIFY_HEADER_cpv_shift 28
+#define SDMA_PKT_POLL_MEM_VERIFY_HEADER_CPV(x) (((x) & SDMA_PKT_POLL_MEM_VERIFY_HEADER_cpv_mask) << SDMA_PKT_POLL_MEM_VERIFY_HEADER_cpv_shift)
+
+/*define for mode field*/
+#define SDMA_PKT_POLL_MEM_VERIFY_HEADER_mode_offset 0
+#define SDMA_PKT_POLL_MEM_VERIFY_HEADER_mode_mask 0x00000001
+#define SDMA_PKT_POLL_MEM_VERIFY_HEADER_mode_shift 31
+#define SDMA_PKT_POLL_MEM_VERIFY_HEADER_MODE(x) (((x) & SDMA_PKT_POLL_MEM_VERIFY_HEADER_mode_mask) << SDMA_PKT_POLL_MEM_VERIFY_HEADER_mode_shift)
+
+/*define for PATTERN word*/
+/*define for pattern field*/
+#define SDMA_PKT_POLL_MEM_VERIFY_PATTERN_pattern_offset 1
+#define SDMA_PKT_POLL_MEM_VERIFY_PATTERN_pattern_mask 0xFFFFFFFF
+#define SDMA_PKT_POLL_MEM_VERIFY_PATTERN_pattern_shift 0
+#define SDMA_PKT_POLL_MEM_VERIFY_PATTERN_PATTERN(x) (((x) & SDMA_PKT_POLL_MEM_VERIFY_PATTERN_pattern_mask) << SDMA_PKT_POLL_MEM_VERIFY_PATTERN_pattern_shift)
+
+/*define for CMP0_ADDR_START_LO word*/
+/*define for cmp0_start_31_0 field*/
+#define SDMA_PKT_POLL_MEM_VERIFY_CMP0_ADDR_START_LO_cmp0_start_31_0_offset 2
+#define SDMA_PKT_POLL_MEM_VERIFY_CMP0_ADDR_START_LO_cmp0_start_31_0_mask 0xFFFFFFFF
+#define SDMA_PKT_POLL_MEM_VERIFY_CMP0_ADDR_START_LO_cmp0_start_31_0_shift 0
+#define SDMA_PKT_POLL_MEM_VERIFY_CMP0_ADDR_START_LO_CMP0_START_31_0(x) (((x) & SDMA_PKT_POLL_MEM_VERIFY_CMP0_ADDR_START_LO_cmp0_start_31_0_mask) << SDMA_PKT_POLL_MEM_VERIFY_CMP0_ADDR_START_LO_cmp0_start_31_0_shift)
+
+/*define for CMP0_ADDR_START_HI word*/
+/*define for cmp0_start_63_32 field*/
+#define SDMA_PKT_POLL_MEM_VERIFY_CMP0_ADDR_START_HI_cmp0_start_63_32_offset 3
+#define SDMA_PKT_POLL_MEM_VERIFY_CMP0_ADDR_START_HI_cmp0_start_63_32_mask 0xFFFFFFFF
+#define SDMA_PKT_POLL_MEM_VERIFY_CMP0_ADDR_START_HI_cmp0_start_63_32_shift 0
+#define SDMA_PKT_POLL_MEM_VERIFY_CMP0_ADDR_START_HI_CMP0_START_63_32(x) (((x) & SDMA_PKT_POLL_MEM_VERIFY_CMP0_ADDR_START_HI_cmp0_start_63_32_mask) << SDMA_PKT_POLL_MEM_VERIFY_CMP0_ADDR_START_HI_cmp0_start_63_32_shift)
+
+/*define for CMP0_ADDR_END_LO word*/
+/*define for cmp0_end_31_0 field*/
+#define SDMA_PKT_POLL_MEM_VERIFY_CMP0_ADDR_END_LO_cmp0_end_31_0_offset 4
+#define SDMA_PKT_POLL_MEM_VERIFY_CMP0_ADDR_END_LO_cmp0_end_31_0_mask 0xFFFFFFFF
+#define SDMA_PKT_POLL_MEM_VERIFY_CMP0_ADDR_END_LO_cmp0_end_31_0_shift 0
+#define SDMA_PKT_POLL_MEM_VERIFY_CMP0_ADDR_END_LO_CMP0_END_31_0(x) (((x) & SDMA_PKT_POLL_MEM_VERIFY_CMP0_ADDR_END_LO_cmp0_end_31_0_mask) << SDMA_PKT_POLL_MEM_VERIFY_CMP0_ADDR_END_LO_cmp0_end_31_0_shift)
+
+/*define for CMP0_ADDR_END_HI word*/
+/*define for cmp0_end_63_32 field*/
+#define SDMA_PKT_POLL_MEM_VERIFY_CMP0_ADDR_END_HI_cmp0_end_63_32_offset 5
+#define SDMA_PKT_POLL_MEM_VERIFY_CMP0_ADDR_END_HI_cmp0_end_63_32_mask 0xFFFFFFFF
+#define SDMA_PKT_POLL_MEM_VERIFY_CMP0_ADDR_END_HI_cmp0_end_63_32_shift 0
+#define SDMA_PKT_POLL_MEM_VERIFY_CMP0_ADDR_END_HI_CMP0_END_63_32(x) (((x) & SDMA_PKT_POLL_MEM_VERIFY_CMP0_ADDR_END_HI_cmp0_end_63_32_mask) << SDMA_PKT_POLL_MEM_VERIFY_CMP0_ADDR_END_HI_cmp0_end_63_32_shift)
+
+/*define for CMP1_ADDR_START_LO word*/
+/*define for cmp1_start_31_0 field*/
+#define SDMA_PKT_POLL_MEM_VERIFY_CMP1_ADDR_START_LO_cmp1_start_31_0_offset 6
+#define SDMA_PKT_POLL_MEM_VERIFY_CMP1_ADDR_START_LO_cmp1_start_31_0_mask 0xFFFFFFFF
+#define SDMA_PKT_POLL_MEM_VERIFY_CMP1_ADDR_START_LO_cmp1_start_31_0_shift 0
+#define SDMA_PKT_POLL_MEM_VERIFY_CMP1_ADDR_START_LO_CMP1_START_31_0(x) (((x) & SDMA_PKT_POLL_MEM_VERIFY_CMP1_ADDR_START_LO_cmp1_start_31_0_mask) << SDMA_PKT_POLL_MEM_VERIFY_CMP1_ADDR_START_LO_cmp1_start_31_0_shift)
+
+/*define for CMP1_ADDR_START_HI word*/
+/*define for cmp1_start_63_32 field*/
+#define SDMA_PKT_POLL_MEM_VERIFY_CMP1_ADDR_START_HI_cmp1_start_63_32_offset 7
+#define SDMA_PKT_POLL_MEM_VERIFY_CMP1_ADDR_START_HI_cmp1_start_63_32_mask 0xFFFFFFFF
+#define SDMA_PKT_POLL_MEM_VERIFY_CMP1_ADDR_START_HI_cmp1_start_63_32_shift 0
+#define SDMA_PKT_POLL_MEM_VERIFY_CMP1_ADDR_START_HI_CMP1_START_63_32(x) (((x) & SDMA_PKT_POLL_MEM_VERIFY_CMP1_ADDR_START_HI_cmp1_start_63_32_mask) << SDMA_PKT_POLL_MEM_VERIFY_CMP1_ADDR_START_HI_cmp1_start_63_32_shift)
+
+/*define for CMP1_ADDR_END_LO word*/
+/*define for cmp1_end_31_0 field*/
+#define SDMA_PKT_POLL_MEM_VERIFY_CMP1_ADDR_END_LO_cmp1_end_31_0_offset 8
+#define SDMA_PKT_POLL_MEM_VERIFY_CMP1_ADDR_END_LO_cmp1_end_31_0_mask 0xFFFFFFFF
+#define SDMA_PKT_POLL_MEM_VERIFY_CMP1_ADDR_END_LO_cmp1_end_31_0_shift 0
+#define SDMA_PKT_POLL_MEM_VERIFY_CMP1_ADDR_END_LO_CMP1_END_31_0(x) (((x) & SDMA_PKT_POLL_MEM_VERIFY_CMP1_ADDR_END_LO_cmp1_end_31_0_mask) << SDMA_PKT_POLL_MEM_VERIFY_CMP1_ADDR_END_LO_cmp1_end_31_0_shift)
+
+/*define for CMP1_ADDR_END_HI word*/
+/*define for cmp1_end_63_32 field*/
+#define SDMA_PKT_POLL_MEM_VERIFY_CMP1_ADDR_END_HI_cmp1_end_63_32_offset 9
+#define SDMA_PKT_POLL_MEM_VERIFY_CMP1_ADDR_END_HI_cmp1_end_63_32_mask 0xFFFFFFFF
+#define SDMA_PKT_POLL_MEM_VERIFY_CMP1_ADDR_END_HI_cmp1_end_63_32_shift 0
+#define SDMA_PKT_POLL_MEM_VERIFY_CMP1_ADDR_END_HI_CMP1_END_63_32(x) (((x) & SDMA_PKT_POLL_MEM_VERIFY_CMP1_ADDR_END_HI_cmp1_end_63_32_mask) << SDMA_PKT_POLL_MEM_VERIFY_CMP1_ADDR_END_HI_cmp1_end_63_32_shift)
+
+/*define for REC_ADDR_LO word*/
+/*define for rec_31_0 field*/
+#define SDMA_PKT_POLL_MEM_VERIFY_REC_ADDR_LO_rec_31_0_offset 10
+#define SDMA_PKT_POLL_MEM_VERIFY_REC_ADDR_LO_rec_31_0_mask 0xFFFFFFFF
+#define SDMA_PKT_POLL_MEM_VERIFY_REC_ADDR_LO_rec_31_0_shift 0
+#define SDMA_PKT_POLL_MEM_VERIFY_REC_ADDR_LO_REC_31_0(x) (((x) & SDMA_PKT_POLL_MEM_VERIFY_REC_ADDR_LO_rec_31_0_mask) << SDMA_PKT_POLL_MEM_VERIFY_REC_ADDR_LO_rec_31_0_shift)
+
+/*define for REC_ADDR_HI word*/
+/*define for rec_63_32 field*/
+#define SDMA_PKT_POLL_MEM_VERIFY_REC_ADDR_HI_rec_63_32_offset 11
+#define SDMA_PKT_POLL_MEM_VERIFY_REC_ADDR_HI_rec_63_32_mask 0xFFFFFFFF
+#define SDMA_PKT_POLL_MEM_VERIFY_REC_ADDR_HI_rec_63_32_shift 0
+#define SDMA_PKT_POLL_MEM_VERIFY_REC_ADDR_HI_REC_63_32(x) (((x) & SDMA_PKT_POLL_MEM_VERIFY_REC_ADDR_HI_rec_63_32_mask) << SDMA_PKT_POLL_MEM_VERIFY_REC_ADDR_HI_rec_63_32_shift)
+
+/*define for RESERVED word*/
+/*define for reserved field*/
+#define SDMA_PKT_POLL_MEM_VERIFY_RESERVED_reserved_offset 12
+#define SDMA_PKT_POLL_MEM_VERIFY_RESERVED_reserved_mask 0xFFFFFFFF
+#define SDMA_PKT_POLL_MEM_VERIFY_RESERVED_reserved_shift 0
+#define SDMA_PKT_POLL_MEM_VERIFY_RESERVED_RESERVED(x) (((x) & SDMA_PKT_POLL_MEM_VERIFY_RESERVED_reserved_mask) << SDMA_PKT_POLL_MEM_VERIFY_RESERVED_reserved_shift)
+
+
+/*
+** Definitions for SDMA_PKT_ATOMIC packet
+*/
+
+/*define for HEADER word*/
+/*define for op field*/
+#define SDMA_PKT_ATOMIC_HEADER_op_offset 0
+#define SDMA_PKT_ATOMIC_HEADER_op_mask 0x000000FF
+#define SDMA_PKT_ATOMIC_HEADER_op_shift 0
+#define SDMA_PKT_ATOMIC_HEADER_OP(x) (((x) & SDMA_PKT_ATOMIC_HEADER_op_mask) << SDMA_PKT_ATOMIC_HEADER_op_shift)
+
+/*define for loop field*/
+#define SDMA_PKT_ATOMIC_HEADER_loop_offset 0
+#define SDMA_PKT_ATOMIC_HEADER_loop_mask 0x00000001
+#define SDMA_PKT_ATOMIC_HEADER_loop_shift 16
+#define SDMA_PKT_ATOMIC_HEADER_LOOP(x) (((x) & SDMA_PKT_ATOMIC_HEADER_loop_mask) << SDMA_PKT_ATOMIC_HEADER_loop_shift)
+
+/*define for tmz field*/
+#define SDMA_PKT_ATOMIC_HEADER_tmz_offset 0
+#define SDMA_PKT_ATOMIC_HEADER_tmz_mask 0x00000001
+#define SDMA_PKT_ATOMIC_HEADER_tmz_shift 18
+#define SDMA_PKT_ATOMIC_HEADER_TMZ(x) (((x) & SDMA_PKT_ATOMIC_HEADER_tmz_mask) << SDMA_PKT_ATOMIC_HEADER_tmz_shift)
+
+/*define for cache_policy field*/
+#define SDMA_PKT_ATOMIC_HEADER_cache_policy_offset 0
+#define SDMA_PKT_ATOMIC_HEADER_cache_policy_mask 0x00000007
+#define SDMA_PKT_ATOMIC_HEADER_cache_policy_shift 20
+#define SDMA_PKT_ATOMIC_HEADER_CACHE_POLICY(x) (((x) & SDMA_PKT_ATOMIC_HEADER_cache_policy_mask) << SDMA_PKT_ATOMIC_HEADER_cache_policy_shift)
+
+/*define for cpv field*/
+#define SDMA_PKT_ATOMIC_HEADER_cpv_offset 0
+#define SDMA_PKT_ATOMIC_HEADER_cpv_mask 0x00000001
+#define SDMA_PKT_ATOMIC_HEADER_cpv_shift 24
+#define SDMA_PKT_ATOMIC_HEADER_CPV(x) (((x) & SDMA_PKT_ATOMIC_HEADER_cpv_mask) << SDMA_PKT_ATOMIC_HEADER_cpv_shift)
+
+/*define for atomic_op field*/
+#define SDMA_PKT_ATOMIC_HEADER_atomic_op_offset 0
+#define SDMA_PKT_ATOMIC_HEADER_atomic_op_mask 0x0000007F
+#define SDMA_PKT_ATOMIC_HEADER_atomic_op_shift 25
+#define SDMA_PKT_ATOMIC_HEADER_ATOMIC_OP(x) (((x) & SDMA_PKT_ATOMIC_HEADER_atomic_op_mask) << SDMA_PKT_ATOMIC_HEADER_atomic_op_shift)
+
+/*define for ADDR_LO word*/
+/*define for addr_31_0 field*/
+#define SDMA_PKT_ATOMIC_ADDR_LO_addr_31_0_offset 1
+#define SDMA_PKT_ATOMIC_ADDR_LO_addr_31_0_mask 0xFFFFFFFF
+#define SDMA_PKT_ATOMIC_ADDR_LO_addr_31_0_shift 0
+#define SDMA_PKT_ATOMIC_ADDR_LO_ADDR_31_0(x) (((x) & SDMA_PKT_ATOMIC_ADDR_LO_addr_31_0_mask) << SDMA_PKT_ATOMIC_ADDR_LO_addr_31_0_shift)
+
+/*define for ADDR_HI word*/
+/*define for addr_63_32 field*/
+#define SDMA_PKT_ATOMIC_ADDR_HI_addr_63_32_offset 2
+#define SDMA_PKT_ATOMIC_ADDR_HI_addr_63_32_mask 0xFFFFFFFF
+#define SDMA_PKT_ATOMIC_ADDR_HI_addr_63_32_shift 0
+#define SDMA_PKT_ATOMIC_ADDR_HI_ADDR_63_32(x) (((x) & SDMA_PKT_ATOMIC_ADDR_HI_addr_63_32_mask) << SDMA_PKT_ATOMIC_ADDR_HI_addr_63_32_shift)
+
+/*define for SRC_DATA_LO word*/
+/*define for src_data_31_0 field*/
+#define SDMA_PKT_ATOMIC_SRC_DATA_LO_src_data_31_0_offset 3
+#define SDMA_PKT_ATOMIC_SRC_DATA_LO_src_data_31_0_mask 0xFFFFFFFF
+#define SDMA_PKT_ATOMIC_SRC_DATA_LO_src_data_31_0_shift 0
+#define SDMA_PKT_ATOMIC_SRC_DATA_LO_SRC_DATA_31_0(x) (((x) & SDMA_PKT_ATOMIC_SRC_DATA_LO_src_data_31_0_mask) << SDMA_PKT_ATOMIC_SRC_DATA_LO_src_data_31_0_shift)
+
+/*define for SRC_DATA_HI word*/
+/*define for src_data_63_32 field*/
+#define SDMA_PKT_ATOMIC_SRC_DATA_HI_src_data_63_32_offset 4
+#define SDMA_PKT_ATOMIC_SRC_DATA_HI_src_data_63_32_mask 0xFFFFFFFF
+#define SDMA_PKT_ATOMIC_SRC_DATA_HI_src_data_63_32_shift 0
+#define SDMA_PKT_ATOMIC_SRC_DATA_HI_SRC_DATA_63_32(x) (((x) & SDMA_PKT_ATOMIC_SRC_DATA_HI_src_data_63_32_mask) << SDMA_PKT_ATOMIC_SRC_DATA_HI_src_data_63_32_shift)
+
+/*define for CMP_DATA_LO word*/
+/*define for cmp_data_31_0 field*/
+#define SDMA_PKT_ATOMIC_CMP_DATA_LO_cmp_data_31_0_offset 5
+#define SDMA_PKT_ATOMIC_CMP_DATA_LO_cmp_data_31_0_mask 0xFFFFFFFF
+#define SDMA_PKT_ATOMIC_CMP_DATA_LO_cmp_data_31_0_shift 0
+#define SDMA_PKT_ATOMIC_CMP_DATA_LO_CMP_DATA_31_0(x) (((x) & SDMA_PKT_ATOMIC_CMP_DATA_LO_cmp_data_31_0_mask) << SDMA_PKT_ATOMIC_CMP_DATA_LO_cmp_data_31_0_shift)
+
+/*define for CMP_DATA_HI word*/
+/*define for cmp_data_63_32 field*/
+#define SDMA_PKT_ATOMIC_CMP_DATA_HI_cmp_data_63_32_offset 6
+#define SDMA_PKT_ATOMIC_CMP_DATA_HI_cmp_data_63_32_mask 0xFFFFFFFF
+#define SDMA_PKT_ATOMIC_CMP_DATA_HI_cmp_data_63_32_shift 0
+#define SDMA_PKT_ATOMIC_CMP_DATA_HI_CMP_DATA_63_32(x) (((x) & SDMA_PKT_ATOMIC_CMP_DATA_HI_cmp_data_63_32_mask) << SDMA_PKT_ATOMIC_CMP_DATA_HI_cmp_data_63_32_shift)
+
+/*define for LOOP_INTERVAL word*/
+/*define for loop_interval field*/
+#define SDMA_PKT_ATOMIC_LOOP_INTERVAL_loop_interval_offset 7
+#define SDMA_PKT_ATOMIC_LOOP_INTERVAL_loop_interval_mask 0x00001FFF
+#define SDMA_PKT_ATOMIC_LOOP_INTERVAL_loop_interval_shift 0
+#define SDMA_PKT_ATOMIC_LOOP_INTERVAL_LOOP_INTERVAL(x) (((x) & SDMA_PKT_ATOMIC_LOOP_INTERVAL_loop_interval_mask) << SDMA_PKT_ATOMIC_LOOP_INTERVAL_loop_interval_shift)
+
+
+/*
+** Definitions for SDMA_PKT_TIMESTAMP_SET packet
+*/
+
+/*define for HEADER word*/
+/*define for op field*/
+#define SDMA_PKT_TIMESTAMP_SET_HEADER_op_offset 0
+#define SDMA_PKT_TIMESTAMP_SET_HEADER_op_mask 0x000000FF
+#define SDMA_PKT_TIMESTAMP_SET_HEADER_op_shift 0
+#define SDMA_PKT_TIMESTAMP_SET_HEADER_OP(x) (((x) & SDMA_PKT_TIMESTAMP_SET_HEADER_op_mask) << SDMA_PKT_TIMESTAMP_SET_HEADER_op_shift)
+
+/*define for sub_op field*/
+#define SDMA_PKT_TIMESTAMP_SET_HEADER_sub_op_offset 0
+#define SDMA_PKT_TIMESTAMP_SET_HEADER_sub_op_mask 0x000000FF
+#define SDMA_PKT_TIMESTAMP_SET_HEADER_sub_op_shift 8
+#define SDMA_PKT_TIMESTAMP_SET_HEADER_SUB_OP(x) (((x) & SDMA_PKT_TIMESTAMP_SET_HEADER_sub_op_mask) << SDMA_PKT_TIMESTAMP_SET_HEADER_sub_op_shift)
+
+/*define for INIT_DATA_LO word*/
+/*define for init_data_31_0 field*/
+#define SDMA_PKT_TIMESTAMP_SET_INIT_DATA_LO_init_data_31_0_offset 1
+#define SDMA_PKT_TIMESTAMP_SET_INIT_DATA_LO_init_data_31_0_mask 0xFFFFFFFF
+#define SDMA_PKT_TIMESTAMP_SET_INIT_DATA_LO_init_data_31_0_shift 0
+#define SDMA_PKT_TIMESTAMP_SET_INIT_DATA_LO_INIT_DATA_31_0(x) (((x) & SDMA_PKT_TIMESTAMP_SET_INIT_DATA_LO_init_data_31_0_mask) << SDMA_PKT_TIMESTAMP_SET_INIT_DATA_LO_init_data_31_0_shift)
+
+/*define for INIT_DATA_HI word*/
+/*define for init_data_63_32 field*/
+#define SDMA_PKT_TIMESTAMP_SET_INIT_DATA_HI_init_data_63_32_offset 2
+#define SDMA_PKT_TIMESTAMP_SET_INIT_DATA_HI_init_data_63_32_mask 0xFFFFFFFF
+#define SDMA_PKT_TIMESTAMP_SET_INIT_DATA_HI_init_data_63_32_shift 0
+#define SDMA_PKT_TIMESTAMP_SET_INIT_DATA_HI_INIT_DATA_63_32(x) (((x) & SDMA_PKT_TIMESTAMP_SET_INIT_DATA_HI_init_data_63_32_mask) << SDMA_PKT_TIMESTAMP_SET_INIT_DATA_HI_init_data_63_32_shift)
+
+
+/*
+** Definitions for SDMA_PKT_TIMESTAMP_GET packet
+*/
+
+/*define for HEADER word*/
+/*define for op field*/
+#define SDMA_PKT_TIMESTAMP_GET_HEADER_op_offset 0
+#define SDMA_PKT_TIMESTAMP_GET_HEADER_op_mask 0x000000FF
+#define SDMA_PKT_TIMESTAMP_GET_HEADER_op_shift 0
+#define SDMA_PKT_TIMESTAMP_GET_HEADER_OP(x) (((x) & SDMA_PKT_TIMESTAMP_GET_HEADER_op_mask) << SDMA_PKT_TIMESTAMP_GET_HEADER_op_shift)
+
+/*define for sub_op field*/
+#define SDMA_PKT_TIMESTAMP_GET_HEADER_sub_op_offset 0
+#define SDMA_PKT_TIMESTAMP_GET_HEADER_sub_op_mask 0x000000FF
+#define SDMA_PKT_TIMESTAMP_GET_HEADER_sub_op_shift 8
+#define SDMA_PKT_TIMESTAMP_GET_HEADER_SUB_OP(x) (((x) & SDMA_PKT_TIMESTAMP_GET_HEADER_sub_op_mask) << SDMA_PKT_TIMESTAMP_GET_HEADER_sub_op_shift)
+
+/*define for l2_policy field*/
+#define SDMA_PKT_TIMESTAMP_GET_HEADER_l2_policy_offset 0
+#define SDMA_PKT_TIMESTAMP_GET_HEADER_l2_policy_mask 0x00000003
+#define SDMA_PKT_TIMESTAMP_GET_HEADER_l2_policy_shift 24
+#define SDMA_PKT_TIMESTAMP_GET_HEADER_L2_POLICY(x) (((x) & SDMA_PKT_TIMESTAMP_GET_HEADER_l2_policy_mask) << SDMA_PKT_TIMESTAMP_GET_HEADER_l2_policy_shift)
+
+/*define for llc_policy field*/
+#define SDMA_PKT_TIMESTAMP_GET_HEADER_llc_policy_offset 0
+#define SDMA_PKT_TIMESTAMP_GET_HEADER_llc_policy_mask 0x00000001
+#define SDMA_PKT_TIMESTAMP_GET_HEADER_llc_policy_shift 26
+#define SDMA_PKT_TIMESTAMP_GET_HEADER_LLC_POLICY(x) (((x) & SDMA_PKT_TIMESTAMP_GET_HEADER_llc_policy_mask) << SDMA_PKT_TIMESTAMP_GET_HEADER_llc_policy_shift)
+
+/*define for cpv field*/
+#define SDMA_PKT_TIMESTAMP_GET_HEADER_cpv_offset 0
+#define SDMA_PKT_TIMESTAMP_GET_HEADER_cpv_mask 0x00000001
+#define SDMA_PKT_TIMESTAMP_GET_HEADER_cpv_shift 28
+#define SDMA_PKT_TIMESTAMP_GET_HEADER_CPV(x) (((x) & SDMA_PKT_TIMESTAMP_GET_HEADER_cpv_mask) << SDMA_PKT_TIMESTAMP_GET_HEADER_cpv_shift)
+
+/*define for WRITE_ADDR_LO word*/
+/*define for write_addr_31_3 field*/
+#define SDMA_PKT_TIMESTAMP_GET_WRITE_ADDR_LO_write_addr_31_3_offset 1
+#define SDMA_PKT_TIMESTAMP_GET_WRITE_ADDR_LO_write_addr_31_3_mask 0x1FFFFFFF
+#define SDMA_PKT_TIMESTAMP_GET_WRITE_ADDR_LO_write_addr_31_3_shift 3
+#define SDMA_PKT_TIMESTAMP_GET_WRITE_ADDR_LO_WRITE_ADDR_31_3(x) (((x) & SDMA_PKT_TIMESTAMP_GET_WRITE_ADDR_LO_write_addr_31_3_mask) << SDMA_PKT_TIMESTAMP_GET_WRITE_ADDR_LO_write_addr_31_3_shift)
+
+/*define for WRITE_ADDR_HI word*/
+/*define for write_addr_63_32 field*/
+#define SDMA_PKT_TIMESTAMP_GET_WRITE_ADDR_HI_write_addr_63_32_offset 2
+#define SDMA_PKT_TIMESTAMP_GET_WRITE_ADDR_HI_write_addr_63_32_mask 0xFFFFFFFF
+#define SDMA_PKT_TIMESTAMP_GET_WRITE_ADDR_HI_write_addr_63_32_shift 0
+#define SDMA_PKT_TIMESTAMP_GET_WRITE_ADDR_HI_WRITE_ADDR_63_32(x) (((x) & SDMA_PKT_TIMESTAMP_GET_WRITE_ADDR_HI_write_addr_63_32_mask) << SDMA_PKT_TIMESTAMP_GET_WRITE_ADDR_HI_write_addr_63_32_shift)
+
+
+/*
+** Definitions for SDMA_PKT_TIMESTAMP_GET_GLOBAL packet
+*/
+
+/*define for HEADER word*/
+/*define for op field*/
+#define SDMA_PKT_TIMESTAMP_GET_GLOBAL_HEADER_op_offset 0
+#define SDMA_PKT_TIMESTAMP_GET_GLOBAL_HEADER_op_mask 0x000000FF
+#define SDMA_PKT_TIMESTAMP_GET_GLOBAL_HEADER_op_shift 0
+#define SDMA_PKT_TIMESTAMP_GET_GLOBAL_HEADER_OP(x) (((x) & SDMA_PKT_TIMESTAMP_GET_GLOBAL_HEADER_op_mask) << SDMA_PKT_TIMESTAMP_GET_GLOBAL_HEADER_op_shift)
+
+/*define for sub_op field*/
+#define SDMA_PKT_TIMESTAMP_GET_GLOBAL_HEADER_sub_op_offset 0
+#define SDMA_PKT_TIMESTAMP_GET_GLOBAL_HEADER_sub_op_mask 0x000000FF
+#define SDMA_PKT_TIMESTAMP_GET_GLOBAL_HEADER_sub_op_shift 8
+#define SDMA_PKT_TIMESTAMP_GET_GLOBAL_HEADER_SUB_OP(x) (((x) & SDMA_PKT_TIMESTAMP_GET_GLOBAL_HEADER_sub_op_mask) << SDMA_PKT_TIMESTAMP_GET_GLOBAL_HEADER_sub_op_shift)
+
+/*define for l2_policy field*/
+#define SDMA_PKT_TIMESTAMP_GET_GLOBAL_HEADER_l2_policy_offset 0
+#define SDMA_PKT_TIMESTAMP_GET_GLOBAL_HEADER_l2_policy_mask 0x00000003
+#define SDMA_PKT_TIMESTAMP_GET_GLOBAL_HEADER_l2_policy_shift 24
+#define SDMA_PKT_TIMESTAMP_GET_GLOBAL_HEADER_L2_POLICY(x) (((x) & SDMA_PKT_TIMESTAMP_GET_GLOBAL_HEADER_l2_policy_mask) << SDMA_PKT_TIMESTAMP_GET_GLOBAL_HEADER_l2_policy_shift)
+
+/*define for llc_policy field*/
+#define SDMA_PKT_TIMESTAMP_GET_GLOBAL_HEADER_llc_policy_offset 0
+#define SDMA_PKT_TIMESTAMP_GET_GLOBAL_HEADER_llc_policy_mask 0x00000001
+#define SDMA_PKT_TIMESTAMP_GET_GLOBAL_HEADER_llc_policy_shift 26
+#define SDMA_PKT_TIMESTAMP_GET_GLOBAL_HEADER_LLC_POLICY(x) (((x) & SDMA_PKT_TIMESTAMP_GET_GLOBAL_HEADER_llc_policy_mask) << SDMA_PKT_TIMESTAMP_GET_GLOBAL_HEADER_llc_policy_shift)
+
+/*define for cpv field*/
+#define SDMA_PKT_TIMESTAMP_GET_GLOBAL_HEADER_cpv_offset 0
+#define SDMA_PKT_TIMESTAMP_GET_GLOBAL_HEADER_cpv_mask 0x00000001
+#define SDMA_PKT_TIMESTAMP_GET_GLOBAL_HEADER_cpv_shift 28
+#define SDMA_PKT_TIMESTAMP_GET_GLOBAL_HEADER_CPV(x) (((x) & SDMA_PKT_TIMESTAMP_GET_GLOBAL_HEADER_cpv_mask) << SDMA_PKT_TIMESTAMP_GET_GLOBAL_HEADER_cpv_shift)
+
+/*define for WRITE_ADDR_LO word*/
+/*define for write_addr_31_3 field*/
+#define SDMA_PKT_TIMESTAMP_GET_GLOBAL_WRITE_ADDR_LO_write_addr_31_3_offset 1
+#define SDMA_PKT_TIMESTAMP_GET_GLOBAL_WRITE_ADDR_LO_write_addr_31_3_mask 0x1FFFFFFF
+#define SDMA_PKT_TIMESTAMP_GET_GLOBAL_WRITE_ADDR_LO_write_addr_31_3_shift 3
+#define SDMA_PKT_TIMESTAMP_GET_GLOBAL_WRITE_ADDR_LO_WRITE_ADDR_31_3(x) (((x) & SDMA_PKT_TIMESTAMP_GET_GLOBAL_WRITE_ADDR_LO_write_addr_31_3_mask) << SDMA_PKT_TIMESTAMP_GET_GLOBAL_WRITE_ADDR_LO_write_addr_31_3_shift)
+
+/*define for WRITE_ADDR_HI word*/
+/*define for write_addr_63_32 field*/
+#define SDMA_PKT_TIMESTAMP_GET_GLOBAL_WRITE_ADDR_HI_write_addr_63_32_offset 2
+#define SDMA_PKT_TIMESTAMP_GET_GLOBAL_WRITE_ADDR_HI_write_addr_63_32_mask 0xFFFFFFFF
+#define SDMA_PKT_TIMESTAMP_GET_GLOBAL_WRITE_ADDR_HI_write_addr_63_32_shift 0
+#define SDMA_PKT_TIMESTAMP_GET_GLOBAL_WRITE_ADDR_HI_WRITE_ADDR_63_32(x) (((x) & SDMA_PKT_TIMESTAMP_GET_GLOBAL_WRITE_ADDR_HI_write_addr_63_32_mask) << SDMA_PKT_TIMESTAMP_GET_GLOBAL_WRITE_ADDR_HI_write_addr_63_32_shift)
+
+
+/*
+** Definitions for SDMA_PKT_TRAP packet
+*/
+
+/*define for HEADER word*/
+/*define for op field*/
+#define SDMA_PKT_TRAP_HEADER_op_offset 0
+#define SDMA_PKT_TRAP_HEADER_op_mask 0x000000FF
+#define SDMA_PKT_TRAP_HEADER_op_shift 0
+#define SDMA_PKT_TRAP_HEADER_OP(x) (((x) & SDMA_PKT_TRAP_HEADER_op_mask) << SDMA_PKT_TRAP_HEADER_op_shift)
+
+/*define for sub_op field*/
+#define SDMA_PKT_TRAP_HEADER_sub_op_offset 0
+#define SDMA_PKT_TRAP_HEADER_sub_op_mask 0x000000FF
+#define SDMA_PKT_TRAP_HEADER_sub_op_shift 8
+#define SDMA_PKT_TRAP_HEADER_SUB_OP(x) (((x) & SDMA_PKT_TRAP_HEADER_sub_op_mask) << SDMA_PKT_TRAP_HEADER_sub_op_shift)
+
+/*define for INT_CONTEXT word*/
+/*define for int_context field*/
+#define SDMA_PKT_TRAP_INT_CONTEXT_int_context_offset 1
+#define SDMA_PKT_TRAP_INT_CONTEXT_int_context_mask 0x0FFFFFFF
+#define SDMA_PKT_TRAP_INT_CONTEXT_int_context_shift 0
+#define SDMA_PKT_TRAP_INT_CONTEXT_INT_CONTEXT(x) (((x) & SDMA_PKT_TRAP_INT_CONTEXT_int_context_mask) << SDMA_PKT_TRAP_INT_CONTEXT_int_context_shift)
+
+
+/*
+** Definitions for SDMA_PKT_DUMMY_TRAP packet
+*/
+
+/*define for HEADER word*/
+/*define for op field*/
+#define SDMA_PKT_DUMMY_TRAP_HEADER_op_offset 0
+#define SDMA_PKT_DUMMY_TRAP_HEADER_op_mask 0x000000FF
+#define SDMA_PKT_DUMMY_TRAP_HEADER_op_shift 0
+#define SDMA_PKT_DUMMY_TRAP_HEADER_OP(x) (((x) & SDMA_PKT_DUMMY_TRAP_HEADER_op_mask) << SDMA_PKT_DUMMY_TRAP_HEADER_op_shift)
+
+/*define for sub_op field*/
+#define SDMA_PKT_DUMMY_TRAP_HEADER_sub_op_offset 0
+#define SDMA_PKT_DUMMY_TRAP_HEADER_sub_op_mask 0x000000FF
+#define SDMA_PKT_DUMMY_TRAP_HEADER_sub_op_shift 8
+#define SDMA_PKT_DUMMY_TRAP_HEADER_SUB_OP(x) (((x) & SDMA_PKT_DUMMY_TRAP_HEADER_sub_op_mask) << SDMA_PKT_DUMMY_TRAP_HEADER_sub_op_shift)
+
+/*define for INT_CONTEXT word*/
+/*define for int_context field*/
+#define SDMA_PKT_DUMMY_TRAP_INT_CONTEXT_int_context_offset 1
+#define SDMA_PKT_DUMMY_TRAP_INT_CONTEXT_int_context_mask 0x0FFFFFFF
+#define SDMA_PKT_DUMMY_TRAP_INT_CONTEXT_int_context_shift 0
+#define SDMA_PKT_DUMMY_TRAP_INT_CONTEXT_INT_CONTEXT(x) (((x) & SDMA_PKT_DUMMY_TRAP_INT_CONTEXT_int_context_mask) << SDMA_PKT_DUMMY_TRAP_INT_CONTEXT_int_context_shift)
+
+
+/*
+** Definitions for SDMA_PKT_GPUVM_INV packet
+*/
+
+/*define for HEADER word*/
+/*define for op field*/
+#define SDMA_PKT_GPUVM_INV_HEADER_op_offset 0
+#define SDMA_PKT_GPUVM_INV_HEADER_op_mask 0x000000FF
+#define SDMA_PKT_GPUVM_INV_HEADER_op_shift 0
+#define SDMA_PKT_GPUVM_INV_HEADER_OP(x) (((x) & SDMA_PKT_GPUVM_INV_HEADER_op_mask) << SDMA_PKT_GPUVM_INV_HEADER_op_shift)
+
+/*define for sub_op field*/
+#define SDMA_PKT_GPUVM_INV_HEADER_sub_op_offset 0
+#define SDMA_PKT_GPUVM_INV_HEADER_sub_op_mask 0x000000FF
+#define SDMA_PKT_GPUVM_INV_HEADER_sub_op_shift 8
+#define SDMA_PKT_GPUVM_INV_HEADER_SUB_OP(x) (((x) & SDMA_PKT_GPUVM_INV_HEADER_sub_op_mask) << SDMA_PKT_GPUVM_INV_HEADER_sub_op_shift)
+
+/*define for PAYLOAD1 word*/
+/*define for per_vmid_inv_req field*/
+#define SDMA_PKT_GPUVM_INV_PAYLOAD1_per_vmid_inv_req_offset 1
+#define SDMA_PKT_GPUVM_INV_PAYLOAD1_per_vmid_inv_req_mask 0x0000FFFF
+#define SDMA_PKT_GPUVM_INV_PAYLOAD1_per_vmid_inv_req_shift 0
+#define SDMA_PKT_GPUVM_INV_PAYLOAD1_PER_VMID_INV_REQ(x) (((x) & SDMA_PKT_GPUVM_INV_PAYLOAD1_per_vmid_inv_req_mask) << SDMA_PKT_GPUVM_INV_PAYLOAD1_per_vmid_inv_req_shift)
+
+/*define for flush_type field*/
+#define SDMA_PKT_GPUVM_INV_PAYLOAD1_flush_type_offset 1
+#define SDMA_PKT_GPUVM_INV_PAYLOAD1_flush_type_mask 0x00000007
+#define SDMA_PKT_GPUVM_INV_PAYLOAD1_flush_type_shift 16
+#define SDMA_PKT_GPUVM_INV_PAYLOAD1_FLUSH_TYPE(x) (((x) & SDMA_PKT_GPUVM_INV_PAYLOAD1_flush_type_mask) << SDMA_PKT_GPUVM_INV_PAYLOAD1_flush_type_shift)
+
+/*define for l2_ptes field*/
+#define SDMA_PKT_GPUVM_INV_PAYLOAD1_l2_ptes_offset 1
+#define SDMA_PKT_GPUVM_INV_PAYLOAD1_l2_ptes_mask 0x00000001
+#define SDMA_PKT_GPUVM_INV_PAYLOAD1_l2_ptes_shift 19
+#define SDMA_PKT_GPUVM_INV_PAYLOAD1_L2_PTES(x) (((x) & SDMA_PKT_GPUVM_INV_PAYLOAD1_l2_ptes_mask) << SDMA_PKT_GPUVM_INV_PAYLOAD1_l2_ptes_shift)
+
+/*define for l2_pde0 field*/
+#define SDMA_PKT_GPUVM_INV_PAYLOAD1_l2_pde0_offset 1
+#define SDMA_PKT_GPUVM_INV_PAYLOAD1_l2_pde0_mask 0x00000001
+#define SDMA_PKT_GPUVM_INV_PAYLOAD1_l2_pde0_shift 20
+#define SDMA_PKT_GPUVM_INV_PAYLOAD1_L2_PDE0(x) (((x) & SDMA_PKT_GPUVM_INV_PAYLOAD1_l2_pde0_mask) << SDMA_PKT_GPUVM_INV_PAYLOAD1_l2_pde0_shift)
+
+/*define for l2_pde1 field*/
+#define SDMA_PKT_GPUVM_INV_PAYLOAD1_l2_pde1_offset 1
+#define SDMA_PKT_GPUVM_INV_PAYLOAD1_l2_pde1_mask 0x00000001
+#define SDMA_PKT_GPUVM_INV_PAYLOAD1_l2_pde1_shift 21
+#define SDMA_PKT_GPUVM_INV_PAYLOAD1_L2_PDE1(x) (((x) & SDMA_PKT_GPUVM_INV_PAYLOAD1_l2_pde1_mask) << SDMA_PKT_GPUVM_INV_PAYLOAD1_l2_pde1_shift)
+
+/*define for l2_pde2 field*/
+#define SDMA_PKT_GPUVM_INV_PAYLOAD1_l2_pde2_offset 1
+#define SDMA_PKT_GPUVM_INV_PAYLOAD1_l2_pde2_mask 0x00000001
+#define SDMA_PKT_GPUVM_INV_PAYLOAD1_l2_pde2_shift 22
+#define SDMA_PKT_GPUVM_INV_PAYLOAD1_L2_PDE2(x) (((x) & SDMA_PKT_GPUVM_INV_PAYLOAD1_l2_pde2_mask) << SDMA_PKT_GPUVM_INV_PAYLOAD1_l2_pde2_shift)
+
+/*define for l1_ptes field*/
+#define SDMA_PKT_GPUVM_INV_PAYLOAD1_l1_ptes_offset 1
+#define SDMA_PKT_GPUVM_INV_PAYLOAD1_l1_ptes_mask 0x00000001
+#define SDMA_PKT_GPUVM_INV_PAYLOAD1_l1_ptes_shift 23
+#define SDMA_PKT_GPUVM_INV_PAYLOAD1_L1_PTES(x) (((x) & SDMA_PKT_GPUVM_INV_PAYLOAD1_l1_ptes_mask) << SDMA_PKT_GPUVM_INV_PAYLOAD1_l1_ptes_shift)
+
+/*define for clr_protection_fault_status_addr field*/
+#define SDMA_PKT_GPUVM_INV_PAYLOAD1_clr_protection_fault_status_addr_offset 1
+#define SDMA_PKT_GPUVM_INV_PAYLOAD1_clr_protection_fault_status_addr_mask 0x00000001
+#define SDMA_PKT_GPUVM_INV_PAYLOAD1_clr_protection_fault_status_addr_shift 24
+#define SDMA_PKT_GPUVM_INV_PAYLOAD1_CLR_PROTECTION_FAULT_STATUS_ADDR(x) (((x) & SDMA_PKT_GPUVM_INV_PAYLOAD1_clr_protection_fault_status_addr_mask) << SDMA_PKT_GPUVM_INV_PAYLOAD1_clr_protection_fault_status_addr_shift)
+
+/*define for log_request field*/
+#define SDMA_PKT_GPUVM_INV_PAYLOAD1_log_request_offset 1
+#define SDMA_PKT_GPUVM_INV_PAYLOAD1_log_request_mask 0x00000001
+#define SDMA_PKT_GPUVM_INV_PAYLOAD1_log_request_shift 25
+#define SDMA_PKT_GPUVM_INV_PAYLOAD1_LOG_REQUEST(x) (((x) & SDMA_PKT_GPUVM_INV_PAYLOAD1_log_request_mask) << SDMA_PKT_GPUVM_INV_PAYLOAD1_log_request_shift)
+
+/*define for four_kilobytes field*/
+#define SDMA_PKT_GPUVM_INV_PAYLOAD1_four_kilobytes_offset 1
+#define SDMA_PKT_GPUVM_INV_PAYLOAD1_four_kilobytes_mask 0x00000001
+#define SDMA_PKT_GPUVM_INV_PAYLOAD1_four_kilobytes_shift 26
+#define SDMA_PKT_GPUVM_INV_PAYLOAD1_FOUR_KILOBYTES(x) (((x) & SDMA_PKT_GPUVM_INV_PAYLOAD1_four_kilobytes_mask) << SDMA_PKT_GPUVM_INV_PAYLOAD1_four_kilobytes_shift)
+
+/*define for PAYLOAD2 word*/
+/*define for s field*/
+#define SDMA_PKT_GPUVM_INV_PAYLOAD2_s_offset 2
+#define SDMA_PKT_GPUVM_INV_PAYLOAD2_s_mask 0x00000001
+#define SDMA_PKT_GPUVM_INV_PAYLOAD2_s_shift 0
+#define SDMA_PKT_GPUVM_INV_PAYLOAD2_S(x) (((x) & SDMA_PKT_GPUVM_INV_PAYLOAD2_s_mask) << SDMA_PKT_GPUVM_INV_PAYLOAD2_s_shift)
+
+/*define for page_va_42_12 field*/
+#define SDMA_PKT_GPUVM_INV_PAYLOAD2_page_va_42_12_offset 2
+#define SDMA_PKT_GPUVM_INV_PAYLOAD2_page_va_42_12_mask 0x7FFFFFFF
+#define SDMA_PKT_GPUVM_INV_PAYLOAD2_page_va_42_12_shift 1
+#define SDMA_PKT_GPUVM_INV_PAYLOAD2_PAGE_VA_42_12(x) (((x) & SDMA_PKT_GPUVM_INV_PAYLOAD2_page_va_42_12_mask) << SDMA_PKT_GPUVM_INV_PAYLOAD2_page_va_42_12_shift)
+
+/*define for PAYLOAD3 word*/
+/*define for page_va_47_43 field*/
+#define SDMA_PKT_GPUVM_INV_PAYLOAD3_page_va_47_43_offset 3
+#define SDMA_PKT_GPUVM_INV_PAYLOAD3_page_va_47_43_mask 0x0000003F
+#define SDMA_PKT_GPUVM_INV_PAYLOAD3_page_va_47_43_shift 0
+#define SDMA_PKT_GPUVM_INV_PAYLOAD3_PAGE_VA_47_43(x) (((x) & SDMA_PKT_GPUVM_INV_PAYLOAD3_page_va_47_43_mask) << SDMA_PKT_GPUVM_INV_PAYLOAD3_page_va_47_43_shift)
+
+
+/*
+** Definitions for SDMA_PKT_GCR_REQ packet
+*/
+
+/*define for HEADER word*/
+/*define for op field*/
+#define SDMA_PKT_GCR_REQ_HEADER_op_offset 0
+#define SDMA_PKT_GCR_REQ_HEADER_op_mask 0x000000FF
+#define SDMA_PKT_GCR_REQ_HEADER_op_shift 0
+#define SDMA_PKT_GCR_REQ_HEADER_OP(x) (((x) & SDMA_PKT_GCR_REQ_HEADER_op_mask) << SDMA_PKT_GCR_REQ_HEADER_op_shift)
+
+/*define for sub_op field*/
+#define SDMA_PKT_GCR_REQ_HEADER_sub_op_offset 0
+#define SDMA_PKT_GCR_REQ_HEADER_sub_op_mask 0x000000FF
+#define SDMA_PKT_GCR_REQ_HEADER_sub_op_shift 8
+#define SDMA_PKT_GCR_REQ_HEADER_SUB_OP(x) (((x) & SDMA_PKT_GCR_REQ_HEADER_sub_op_mask) << SDMA_PKT_GCR_REQ_HEADER_sub_op_shift)
+
+/*define for PAYLOAD1 word*/
+/*define for base_va_31_7 field*/
+#define SDMA_PKT_GCR_REQ_PAYLOAD1_base_va_31_7_offset 1
+#define SDMA_PKT_GCR_REQ_PAYLOAD1_base_va_31_7_mask 0x01FFFFFF
+#define SDMA_PKT_GCR_REQ_PAYLOAD1_base_va_31_7_shift 7
+#define SDMA_PKT_GCR_REQ_PAYLOAD1_BASE_VA_31_7(x) (((x) & SDMA_PKT_GCR_REQ_PAYLOAD1_base_va_31_7_mask) << SDMA_PKT_GCR_REQ_PAYLOAD1_base_va_31_7_shift)
+
+/*define for PAYLOAD2 word*/
+/*define for base_va_47_32 field*/
+#define SDMA_PKT_GCR_REQ_PAYLOAD2_base_va_47_32_offset 2
+#define SDMA_PKT_GCR_REQ_PAYLOAD2_base_va_47_32_mask 0x0000FFFF
+#define SDMA_PKT_GCR_REQ_PAYLOAD2_base_va_47_32_shift 0
+#define SDMA_PKT_GCR_REQ_PAYLOAD2_BASE_VA_47_32(x) (((x) & SDMA_PKT_GCR_REQ_PAYLOAD2_base_va_47_32_mask) << SDMA_PKT_GCR_REQ_PAYLOAD2_base_va_47_32_shift)
+
+/*define for gcr_control_15_0 field*/
+#define SDMA_PKT_GCR_REQ_PAYLOAD2_gcr_control_15_0_offset 2
+#define SDMA_PKT_GCR_REQ_PAYLOAD2_gcr_control_15_0_mask 0x0000FFFF
+#define SDMA_PKT_GCR_REQ_PAYLOAD2_gcr_control_15_0_shift 16
+#define SDMA_PKT_GCR_REQ_PAYLOAD2_GCR_CONTROL_15_0(x) (((x) & SDMA_PKT_GCR_REQ_PAYLOAD2_gcr_control_15_0_mask) << SDMA_PKT_GCR_REQ_PAYLOAD2_gcr_control_15_0_shift)
+
+/*define for PAYLOAD3 word*/
+/*define for gcr_control_18_16 field*/
+#define SDMA_PKT_GCR_REQ_PAYLOAD3_gcr_control_18_16_offset 3
+#define SDMA_PKT_GCR_REQ_PAYLOAD3_gcr_control_18_16_mask 0x00000007
+#define SDMA_PKT_GCR_REQ_PAYLOAD3_gcr_control_18_16_shift 0
+#define SDMA_PKT_GCR_REQ_PAYLOAD3_GCR_CONTROL_18_16(x) (((x) & SDMA_PKT_GCR_REQ_PAYLOAD3_gcr_control_18_16_mask) << SDMA_PKT_GCR_REQ_PAYLOAD3_gcr_control_18_16_shift)
+
+/*define for limit_va_31_7 field*/
+#define SDMA_PKT_GCR_REQ_PAYLOAD3_limit_va_31_7_offset 3
+#define SDMA_PKT_GCR_REQ_PAYLOAD3_limit_va_31_7_mask 0x01FFFFFF
+#define SDMA_PKT_GCR_REQ_PAYLOAD3_limit_va_31_7_shift 7
+#define SDMA_PKT_GCR_REQ_PAYLOAD3_LIMIT_VA_31_7(x) (((x) & SDMA_PKT_GCR_REQ_PAYLOAD3_limit_va_31_7_mask) << SDMA_PKT_GCR_REQ_PAYLOAD3_limit_va_31_7_shift)
+
+/*define for PAYLOAD4 word*/
+/*define for limit_va_47_32 field*/
+#define SDMA_PKT_GCR_REQ_PAYLOAD4_limit_va_47_32_offset 4
+#define SDMA_PKT_GCR_REQ_PAYLOAD4_limit_va_47_32_mask 0x0000FFFF
+#define SDMA_PKT_GCR_REQ_PAYLOAD4_limit_va_47_32_shift 0
+#define SDMA_PKT_GCR_REQ_PAYLOAD4_LIMIT_VA_47_32(x) (((x) & SDMA_PKT_GCR_REQ_PAYLOAD4_limit_va_47_32_mask) << SDMA_PKT_GCR_REQ_PAYLOAD4_limit_va_47_32_shift)
+
+/*define for vmid field*/
+#define SDMA_PKT_GCR_REQ_PAYLOAD4_vmid_offset 4
+#define SDMA_PKT_GCR_REQ_PAYLOAD4_vmid_mask 0x0000000F
+#define SDMA_PKT_GCR_REQ_PAYLOAD4_vmid_shift 24
+#define SDMA_PKT_GCR_REQ_PAYLOAD4_VMID(x) (((x) & SDMA_PKT_GCR_REQ_PAYLOAD4_vmid_mask) << SDMA_PKT_GCR_REQ_PAYLOAD4_vmid_shift)
+
+
+/*
+** Definitions for SDMA_PKT_NOP packet
+*/
+
+/*define for HEADER word*/
+/*define for op field*/
+#define SDMA_PKT_NOP_HEADER_op_offset 0
+#define SDMA_PKT_NOP_HEADER_op_mask 0x000000FF
+#define SDMA_PKT_NOP_HEADER_op_shift 0
+#define SDMA_PKT_NOP_HEADER_OP(x) (((x) & SDMA_PKT_NOP_HEADER_op_mask) << SDMA_PKT_NOP_HEADER_op_shift)
+
+/*define for sub_op field*/
+#define SDMA_PKT_NOP_HEADER_sub_op_offset 0
+#define SDMA_PKT_NOP_HEADER_sub_op_mask 0x000000FF
+#define SDMA_PKT_NOP_HEADER_sub_op_shift 8
+#define SDMA_PKT_NOP_HEADER_SUB_OP(x) (((x) & SDMA_PKT_NOP_HEADER_sub_op_mask) << SDMA_PKT_NOP_HEADER_sub_op_shift)
+
+/*define for count field*/
+#define SDMA_PKT_NOP_HEADER_count_offset 0
+#define SDMA_PKT_NOP_HEADER_count_mask 0x00003FFF
+#define SDMA_PKT_NOP_HEADER_count_shift 16
+#define SDMA_PKT_NOP_HEADER_COUNT(x) (((x) & SDMA_PKT_NOP_HEADER_count_mask) << SDMA_PKT_NOP_HEADER_count_shift)
+
+/*define for DATA0 word*/
+/*define for data0 field*/
+#define SDMA_PKT_NOP_DATA0_data0_offset 1
+#define SDMA_PKT_NOP_DATA0_data0_mask 0xFFFFFFFF
+#define SDMA_PKT_NOP_DATA0_data0_shift 0
+#define SDMA_PKT_NOP_DATA0_DATA0(x) (((x) & SDMA_PKT_NOP_DATA0_data0_mask) << SDMA_PKT_NOP_DATA0_data0_shift)
+
+
+/*
+** Definitions for SDMA_AQL_PKT_HEADER packet
+*/
+
+/*define for HEADER word*/
+/*define for format field*/
+#define SDMA_AQL_PKT_HEADER_HEADER_format_offset 0
+#define SDMA_AQL_PKT_HEADER_HEADER_format_mask 0x000000FF
+#define SDMA_AQL_PKT_HEADER_HEADER_format_shift 0
+#define SDMA_AQL_PKT_HEADER_HEADER_FORMAT(x) (((x) & SDMA_AQL_PKT_HEADER_HEADER_format_mask) << SDMA_AQL_PKT_HEADER_HEADER_format_shift)
+
+/*define for barrier field*/
+#define SDMA_AQL_PKT_HEADER_HEADER_barrier_offset 0
+#define SDMA_AQL_PKT_HEADER_HEADER_barrier_mask 0x00000001
+#define SDMA_AQL_PKT_HEADER_HEADER_barrier_shift 8
+#define SDMA_AQL_PKT_HEADER_HEADER_BARRIER(x) (((x) & SDMA_AQL_PKT_HEADER_HEADER_barrier_mask) << SDMA_AQL_PKT_HEADER_HEADER_barrier_shift)
+
+/*define for acquire_fence_scope field*/
+#define SDMA_AQL_PKT_HEADER_HEADER_acquire_fence_scope_offset 0
+#define SDMA_AQL_PKT_HEADER_HEADER_acquire_fence_scope_mask 0x00000003
+#define SDMA_AQL_PKT_HEADER_HEADER_acquire_fence_scope_shift 9
+#define SDMA_AQL_PKT_HEADER_HEADER_ACQUIRE_FENCE_SCOPE(x) (((x) & SDMA_AQL_PKT_HEADER_HEADER_acquire_fence_scope_mask) << SDMA_AQL_PKT_HEADER_HEADER_acquire_fence_scope_shift)
+
+/*define for release_fence_scope field*/
+#define SDMA_AQL_PKT_HEADER_HEADER_release_fence_scope_offset 0
+#define SDMA_AQL_PKT_HEADER_HEADER_release_fence_scope_mask 0x00000003
+#define SDMA_AQL_PKT_HEADER_HEADER_release_fence_scope_shift 11
+#define SDMA_AQL_PKT_HEADER_HEADER_RELEASE_FENCE_SCOPE(x) (((x) & SDMA_AQL_PKT_HEADER_HEADER_release_fence_scope_mask) << SDMA_AQL_PKT_HEADER_HEADER_release_fence_scope_shift)
+
+/*define for reserved field*/
+#define SDMA_AQL_PKT_HEADER_HEADER_reserved_offset 0
+#define SDMA_AQL_PKT_HEADER_HEADER_reserved_mask 0x00000007
+#define SDMA_AQL_PKT_HEADER_HEADER_reserved_shift 13
+#define SDMA_AQL_PKT_HEADER_HEADER_RESERVED(x) (((x) & SDMA_AQL_PKT_HEADER_HEADER_reserved_mask) << SDMA_AQL_PKT_HEADER_HEADER_reserved_shift)
+
+/*define for op field*/
+#define SDMA_AQL_PKT_HEADER_HEADER_op_offset 0
+#define SDMA_AQL_PKT_HEADER_HEADER_op_mask 0x0000000F
+#define SDMA_AQL_PKT_HEADER_HEADER_op_shift 16
+#define SDMA_AQL_PKT_HEADER_HEADER_OP(x) (((x) & SDMA_AQL_PKT_HEADER_HEADER_op_mask) << SDMA_AQL_PKT_HEADER_HEADER_op_shift)
+
+/*define for subop field*/
+#define SDMA_AQL_PKT_HEADER_HEADER_subop_offset 0
+#define SDMA_AQL_PKT_HEADER_HEADER_subop_mask 0x00000007
+#define SDMA_AQL_PKT_HEADER_HEADER_subop_shift 20
+#define SDMA_AQL_PKT_HEADER_HEADER_SUBOP(x) (((x) & SDMA_AQL_PKT_HEADER_HEADER_subop_mask) << SDMA_AQL_PKT_HEADER_HEADER_subop_shift)
+
+/*define for cpv field*/
+#define SDMA_AQL_PKT_HEADER_HEADER_cpv_offset 0
+#define SDMA_AQL_PKT_HEADER_HEADER_cpv_mask 0x00000001
+#define SDMA_AQL_PKT_HEADER_HEADER_cpv_shift 28
+#define SDMA_AQL_PKT_HEADER_HEADER_CPV(x) (((x) & SDMA_AQL_PKT_HEADER_HEADER_cpv_mask) << SDMA_AQL_PKT_HEADER_HEADER_cpv_shift)
+
+
+/*
+** Definitions for SDMA_AQL_PKT_COPY_LINEAR packet
+*/
+
+/*define for HEADER word*/
+/*define for format field*/
+#define SDMA_AQL_PKT_COPY_LINEAR_HEADER_format_offset 0
+#define SDMA_AQL_PKT_COPY_LINEAR_HEADER_format_mask 0x000000FF
+#define SDMA_AQL_PKT_COPY_LINEAR_HEADER_format_shift 0
+#define SDMA_AQL_PKT_COPY_LINEAR_HEADER_FORMAT(x) (((x) & SDMA_AQL_PKT_COPY_LINEAR_HEADER_format_mask) << SDMA_AQL_PKT_COPY_LINEAR_HEADER_format_shift)
+
+/*define for barrier field*/
+#define SDMA_AQL_PKT_COPY_LINEAR_HEADER_barrier_offset 0
+#define SDMA_AQL_PKT_COPY_LINEAR_HEADER_barrier_mask 0x00000001
+#define SDMA_AQL_PKT_COPY_LINEAR_HEADER_barrier_shift 8
+#define SDMA_AQL_PKT_COPY_LINEAR_HEADER_BARRIER(x) (((x) & SDMA_AQL_PKT_COPY_LINEAR_HEADER_barrier_mask) << SDMA_AQL_PKT_COPY_LINEAR_HEADER_barrier_shift)
+
+/*define for acquire_fence_scope field*/
+#define SDMA_AQL_PKT_COPY_LINEAR_HEADER_acquire_fence_scope_offset 0
+#define SDMA_AQL_PKT_COPY_LINEAR_HEADER_acquire_fence_scope_mask 0x00000003
+#define SDMA_AQL_PKT_COPY_LINEAR_HEADER_acquire_fence_scope_shift 9
+#define SDMA_AQL_PKT_COPY_LINEAR_HEADER_ACQUIRE_FENCE_SCOPE(x) (((x) & SDMA_AQL_PKT_COPY_LINEAR_HEADER_acquire_fence_scope_mask) << SDMA_AQL_PKT_COPY_LINEAR_HEADER_acquire_fence_scope_shift)
+
+/*define for release_fence_scope field*/
+#define SDMA_AQL_PKT_COPY_LINEAR_HEADER_release_fence_scope_offset 0
+#define SDMA_AQL_PKT_COPY_LINEAR_HEADER_release_fence_scope_mask 0x00000003
+#define SDMA_AQL_PKT_COPY_LINEAR_HEADER_release_fence_scope_shift 11
+#define SDMA_AQL_PKT_COPY_LINEAR_HEADER_RELEASE_FENCE_SCOPE(x) (((x) & SDMA_AQL_PKT_COPY_LINEAR_HEADER_release_fence_scope_mask) << SDMA_AQL_PKT_COPY_LINEAR_HEADER_release_fence_scope_shift)
+
+/*define for reserved field*/
+#define SDMA_AQL_PKT_COPY_LINEAR_HEADER_reserved_offset 0
+#define SDMA_AQL_PKT_COPY_LINEAR_HEADER_reserved_mask 0x00000007
+#define SDMA_AQL_PKT_COPY_LINEAR_HEADER_reserved_shift 13
+#define SDMA_AQL_PKT_COPY_LINEAR_HEADER_RESERVED(x) (((x) & SDMA_AQL_PKT_COPY_LINEAR_HEADER_reserved_mask) << SDMA_AQL_PKT_COPY_LINEAR_HEADER_reserved_shift)
+
+/*define for op field*/
+#define SDMA_AQL_PKT_COPY_LINEAR_HEADER_op_offset 0
+#define SDMA_AQL_PKT_COPY_LINEAR_HEADER_op_mask 0x0000000F
+#define SDMA_AQL_PKT_COPY_LINEAR_HEADER_op_shift 16
+#define SDMA_AQL_PKT_COPY_LINEAR_HEADER_OP(x) (((x) & SDMA_AQL_PKT_COPY_LINEAR_HEADER_op_mask) << SDMA_AQL_PKT_COPY_LINEAR_HEADER_op_shift)
+
+/*define for subop field*/
+#define SDMA_AQL_PKT_COPY_LINEAR_HEADER_subop_offset 0
+#define SDMA_AQL_PKT_COPY_LINEAR_HEADER_subop_mask 0x00000007
+#define SDMA_AQL_PKT_COPY_LINEAR_HEADER_subop_shift 20
+#define SDMA_AQL_PKT_COPY_LINEAR_HEADER_SUBOP(x) (((x) & SDMA_AQL_PKT_COPY_LINEAR_HEADER_subop_mask) << SDMA_AQL_PKT_COPY_LINEAR_HEADER_subop_shift)
+
+/*define for cpv field*/
+#define SDMA_AQL_PKT_COPY_LINEAR_HEADER_cpv_offset 0
+#define SDMA_AQL_PKT_COPY_LINEAR_HEADER_cpv_mask 0x00000001
+#define SDMA_AQL_PKT_COPY_LINEAR_HEADER_cpv_shift 28
+#define SDMA_AQL_PKT_COPY_LINEAR_HEADER_CPV(x) (((x) & SDMA_AQL_PKT_COPY_LINEAR_HEADER_cpv_mask) << SDMA_AQL_PKT_COPY_LINEAR_HEADER_cpv_shift)
+
+/*define for RESERVED_DW1 word*/
+/*define for reserved_dw1 field*/
+#define SDMA_AQL_PKT_COPY_LINEAR_RESERVED_DW1_reserved_dw1_offset 1
+#define SDMA_AQL_PKT_COPY_LINEAR_RESERVED_DW1_reserved_dw1_mask 0xFFFFFFFF
+#define SDMA_AQL_PKT_COPY_LINEAR_RESERVED_DW1_reserved_dw1_shift 0
+#define SDMA_AQL_PKT_COPY_LINEAR_RESERVED_DW1_RESERVED_DW1(x) (((x) & SDMA_AQL_PKT_COPY_LINEAR_RESERVED_DW1_reserved_dw1_mask) << SDMA_AQL_PKT_COPY_LINEAR_RESERVED_DW1_reserved_dw1_shift)
+
+/*define for RETURN_ADDR_LO word*/
+/*define for return_addr_31_0 field*/
+#define SDMA_AQL_PKT_COPY_LINEAR_RETURN_ADDR_LO_return_addr_31_0_offset 2
+#define SDMA_AQL_PKT_COPY_LINEAR_RETURN_ADDR_LO_return_addr_31_0_mask 0xFFFFFFFF
+#define SDMA_AQL_PKT_COPY_LINEAR_RETURN_ADDR_LO_return_addr_31_0_shift 0
+#define SDMA_AQL_PKT_COPY_LINEAR_RETURN_ADDR_LO_RETURN_ADDR_31_0(x) (((x) & SDMA_AQL_PKT_COPY_LINEAR_RETURN_ADDR_LO_return_addr_31_0_mask) << SDMA_AQL_PKT_COPY_LINEAR_RETURN_ADDR_LO_return_addr_31_0_shift)
+
+/*define for RETURN_ADDR_HI word*/
+/*define for return_addr_63_32 field*/
+#define SDMA_AQL_PKT_COPY_LINEAR_RETURN_ADDR_HI_return_addr_63_32_offset 3
+#define SDMA_AQL_PKT_COPY_LINEAR_RETURN_ADDR_HI_return_addr_63_32_mask 0xFFFFFFFF
+#define SDMA_AQL_PKT_COPY_LINEAR_RETURN_ADDR_HI_return_addr_63_32_shift 0
+#define SDMA_AQL_PKT_COPY_LINEAR_RETURN_ADDR_HI_RETURN_ADDR_63_32(x) (((x) & SDMA_AQL_PKT_COPY_LINEAR_RETURN_ADDR_HI_return_addr_63_32_mask) << SDMA_AQL_PKT_COPY_LINEAR_RETURN_ADDR_HI_return_addr_63_32_shift)
+
+/*define for COUNT word*/
+/*define for count field*/
+#define SDMA_AQL_PKT_COPY_LINEAR_COUNT_count_offset 4
+#define SDMA_AQL_PKT_COPY_LINEAR_COUNT_count_mask 0x003FFFFF
+#define SDMA_AQL_PKT_COPY_LINEAR_COUNT_count_shift 0
+#define SDMA_AQL_PKT_COPY_LINEAR_COUNT_COUNT(x) (((x) & SDMA_AQL_PKT_COPY_LINEAR_COUNT_count_mask) << SDMA_AQL_PKT_COPY_LINEAR_COUNT_count_shift)
+
+/*define for PARAMETER word*/
+/*define for dst_sw field*/
+#define SDMA_AQL_PKT_COPY_LINEAR_PARAMETER_dst_sw_offset 5
+#define SDMA_AQL_PKT_COPY_LINEAR_PARAMETER_dst_sw_mask 0x00000003
+#define SDMA_AQL_PKT_COPY_LINEAR_PARAMETER_dst_sw_shift 16
+#define SDMA_AQL_PKT_COPY_LINEAR_PARAMETER_DST_SW(x) (((x) & SDMA_AQL_PKT_COPY_LINEAR_PARAMETER_dst_sw_mask) << SDMA_AQL_PKT_COPY_LINEAR_PARAMETER_dst_sw_shift)
+
+/*define for dst_cache_policy field*/
+#define SDMA_AQL_PKT_COPY_LINEAR_PARAMETER_dst_cache_policy_offset 5
+#define SDMA_AQL_PKT_COPY_LINEAR_PARAMETER_dst_cache_policy_mask 0x00000007
+#define SDMA_AQL_PKT_COPY_LINEAR_PARAMETER_dst_cache_policy_shift 18
+#define SDMA_AQL_PKT_COPY_LINEAR_PARAMETER_DST_CACHE_POLICY(x) (((x) & SDMA_AQL_PKT_COPY_LINEAR_PARAMETER_dst_cache_policy_mask) << SDMA_AQL_PKT_COPY_LINEAR_PARAMETER_dst_cache_policy_shift)
+
+/*define for src_sw field*/
+#define SDMA_AQL_PKT_COPY_LINEAR_PARAMETER_src_sw_offset 5
+#define SDMA_AQL_PKT_COPY_LINEAR_PARAMETER_src_sw_mask 0x00000003
+#define SDMA_AQL_PKT_COPY_LINEAR_PARAMETER_src_sw_shift 24
+#define SDMA_AQL_PKT_COPY_LINEAR_PARAMETER_SRC_SW(x) (((x) & SDMA_AQL_PKT_COPY_LINEAR_PARAMETER_src_sw_mask) << SDMA_AQL_PKT_COPY_LINEAR_PARAMETER_src_sw_shift)
+
+/*define for src_cache_policy field*/
+#define SDMA_AQL_PKT_COPY_LINEAR_PARAMETER_src_cache_policy_offset 5
+#define SDMA_AQL_PKT_COPY_LINEAR_PARAMETER_src_cache_policy_mask 0x00000007
+#define SDMA_AQL_PKT_COPY_LINEAR_PARAMETER_src_cache_policy_shift 26
+#define SDMA_AQL_PKT_COPY_LINEAR_PARAMETER_SRC_CACHE_POLICY(x) (((x) & SDMA_AQL_PKT_COPY_LINEAR_PARAMETER_src_cache_policy_mask) << SDMA_AQL_PKT_COPY_LINEAR_PARAMETER_src_cache_policy_shift)
+
+/*define for SRC_ADDR_LO word*/
+/*define for src_addr_31_0 field*/
+#define SDMA_AQL_PKT_COPY_LINEAR_SRC_ADDR_LO_src_addr_31_0_offset 6
+#define SDMA_AQL_PKT_COPY_LINEAR_SRC_ADDR_LO_src_addr_31_0_mask 0xFFFFFFFF
+#define SDMA_AQL_PKT_COPY_LINEAR_SRC_ADDR_LO_src_addr_31_0_shift 0
+#define SDMA_AQL_PKT_COPY_LINEAR_SRC_ADDR_LO_SRC_ADDR_31_0(x) (((x) & SDMA_AQL_PKT_COPY_LINEAR_SRC_ADDR_LO_src_addr_31_0_mask) << SDMA_AQL_PKT_COPY_LINEAR_SRC_ADDR_LO_src_addr_31_0_shift)
+
+/*define for SRC_ADDR_HI word*/
+/*define for src_addr_63_32 field*/
+#define SDMA_AQL_PKT_COPY_LINEAR_SRC_ADDR_HI_src_addr_63_32_offset 7
+#define SDMA_AQL_PKT_COPY_LINEAR_SRC_ADDR_HI_src_addr_63_32_mask 0xFFFFFFFF
+#define SDMA_AQL_PKT_COPY_LINEAR_SRC_ADDR_HI_src_addr_63_32_shift 0
+#define SDMA_AQL_PKT_COPY_LINEAR_SRC_ADDR_HI_SRC_ADDR_63_32(x) (((x) & SDMA_AQL_PKT_COPY_LINEAR_SRC_ADDR_HI_src_addr_63_32_mask) << SDMA_AQL_PKT_COPY_LINEAR_SRC_ADDR_HI_src_addr_63_32_shift)
+
+/*define for DST_ADDR_LO word*/
+/*define for dst_addr_31_0 field*/
+#define SDMA_AQL_PKT_COPY_LINEAR_DST_ADDR_LO_dst_addr_31_0_offset 8
+#define SDMA_AQL_PKT_COPY_LINEAR_DST_ADDR_LO_dst_addr_31_0_mask 0xFFFFFFFF
+#define SDMA_AQL_PKT_COPY_LINEAR_DST_ADDR_LO_dst_addr_31_0_shift 0
+#define SDMA_AQL_PKT_COPY_LINEAR_DST_ADDR_LO_DST_ADDR_31_0(x) (((x) & SDMA_AQL_PKT_COPY_LINEAR_DST_ADDR_LO_dst_addr_31_0_mask) << SDMA_AQL_PKT_COPY_LINEAR_DST_ADDR_LO_dst_addr_31_0_shift)
+
+/*define for DST_ADDR_HI word*/
+/*define for dst_addr_63_32 field*/
+#define SDMA_AQL_PKT_COPY_LINEAR_DST_ADDR_HI_dst_addr_63_32_offset 9
+#define SDMA_AQL_PKT_COPY_LINEAR_DST_ADDR_HI_dst_addr_63_32_mask 0xFFFFFFFF
+#define SDMA_AQL_PKT_COPY_LINEAR_DST_ADDR_HI_dst_addr_63_32_shift 0
+#define SDMA_AQL_PKT_COPY_LINEAR_DST_ADDR_HI_DST_ADDR_63_32(x) (((x) & SDMA_AQL_PKT_COPY_LINEAR_DST_ADDR_HI_dst_addr_63_32_mask) << SDMA_AQL_PKT_COPY_LINEAR_DST_ADDR_HI_dst_addr_63_32_shift)
+
+/*define for RESERVED_DW10 word*/
+/*define for reserved_dw10 field*/
+#define SDMA_AQL_PKT_COPY_LINEAR_RESERVED_DW10_reserved_dw10_offset 10
+#define SDMA_AQL_PKT_COPY_LINEAR_RESERVED_DW10_reserved_dw10_mask 0xFFFFFFFF
+#define SDMA_AQL_PKT_COPY_LINEAR_RESERVED_DW10_reserved_dw10_shift 0
+#define SDMA_AQL_PKT_COPY_LINEAR_RESERVED_DW10_RESERVED_DW10(x) (((x) & SDMA_AQL_PKT_COPY_LINEAR_RESERVED_DW10_reserved_dw10_mask) << SDMA_AQL_PKT_COPY_LINEAR_RESERVED_DW10_reserved_dw10_shift)
+
+/*define for RESERVED_DW11 word*/
+/*define for reserved_dw11 field*/
+#define SDMA_AQL_PKT_COPY_LINEAR_RESERVED_DW11_reserved_dw11_offset 11
+#define SDMA_AQL_PKT_COPY_LINEAR_RESERVED_DW11_reserved_dw11_mask 0xFFFFFFFF
+#define SDMA_AQL_PKT_COPY_LINEAR_RESERVED_DW11_reserved_dw11_shift 0
+#define SDMA_AQL_PKT_COPY_LINEAR_RESERVED_DW11_RESERVED_DW11(x) (((x) & SDMA_AQL_PKT_COPY_LINEAR_RESERVED_DW11_reserved_dw11_mask) << SDMA_AQL_PKT_COPY_LINEAR_RESERVED_DW11_reserved_dw11_shift)
+
+/*define for RESERVED_DW12 word*/
+/*define for reserved_dw12 field*/
+#define SDMA_AQL_PKT_COPY_LINEAR_RESERVED_DW12_reserved_dw12_offset 12
+#define SDMA_AQL_PKT_COPY_LINEAR_RESERVED_DW12_reserved_dw12_mask 0xFFFFFFFF
+#define SDMA_AQL_PKT_COPY_LINEAR_RESERVED_DW12_reserved_dw12_shift 0
+#define SDMA_AQL_PKT_COPY_LINEAR_RESERVED_DW12_RESERVED_DW12(x) (((x) & SDMA_AQL_PKT_COPY_LINEAR_RESERVED_DW12_reserved_dw12_mask) << SDMA_AQL_PKT_COPY_LINEAR_RESERVED_DW12_reserved_dw12_shift)
+
+/*define for RESERVED_DW13 word*/
+/*define for reserved_dw13 field*/
+#define SDMA_AQL_PKT_COPY_LINEAR_RESERVED_DW13_reserved_dw13_offset 13
+#define SDMA_AQL_PKT_COPY_LINEAR_RESERVED_DW13_reserved_dw13_mask 0xFFFFFFFF
+#define SDMA_AQL_PKT_COPY_LINEAR_RESERVED_DW13_reserved_dw13_shift 0
+#define SDMA_AQL_PKT_COPY_LINEAR_RESERVED_DW13_RESERVED_DW13(x) (((x) & SDMA_AQL_PKT_COPY_LINEAR_RESERVED_DW13_reserved_dw13_mask) << SDMA_AQL_PKT_COPY_LINEAR_RESERVED_DW13_reserved_dw13_shift)
+
+/*define for COMPLETION_SIGNAL_LO word*/
+/*define for completion_signal_31_0 field*/
+#define SDMA_AQL_PKT_COPY_LINEAR_COMPLETION_SIGNAL_LO_completion_signal_31_0_offset 14
+#define SDMA_AQL_PKT_COPY_LINEAR_COMPLETION_SIGNAL_LO_completion_signal_31_0_mask 0xFFFFFFFF
+#define SDMA_AQL_PKT_COPY_LINEAR_COMPLETION_SIGNAL_LO_completion_signal_31_0_shift 0
+#define SDMA_AQL_PKT_COPY_LINEAR_COMPLETION_SIGNAL_LO_COMPLETION_SIGNAL_31_0(x) (((x) & SDMA_AQL_PKT_COPY_LINEAR_COMPLETION_SIGNAL_LO_completion_signal_31_0_mask) << SDMA_AQL_PKT_COPY_LINEAR_COMPLETION_SIGNAL_LO_completion_signal_31_0_shift)
+
+/*define for COMPLETION_SIGNAL_HI word*/
+/*define for completion_signal_63_32 field*/
+#define SDMA_AQL_PKT_COPY_LINEAR_COMPLETION_SIGNAL_HI_completion_signal_63_32_offset 15
+#define SDMA_AQL_PKT_COPY_LINEAR_COMPLETION_SIGNAL_HI_completion_signal_63_32_mask 0xFFFFFFFF
+#define SDMA_AQL_PKT_COPY_LINEAR_COMPLETION_SIGNAL_HI_completion_signal_63_32_shift 0
+#define SDMA_AQL_PKT_COPY_LINEAR_COMPLETION_SIGNAL_HI_COMPLETION_SIGNAL_63_32(x) (((x) & SDMA_AQL_PKT_COPY_LINEAR_COMPLETION_SIGNAL_HI_completion_signal_63_32_mask) << SDMA_AQL_PKT_COPY_LINEAR_COMPLETION_SIGNAL_HI_completion_signal_63_32_shift)
+
+
+/*
+** Definitions for SDMA_AQL_PKT_BARRIER_OR packet
+*/
+
+/*define for HEADER word*/
+/*define for format field*/
+#define SDMA_AQL_PKT_BARRIER_OR_HEADER_format_offset 0
+#define SDMA_AQL_PKT_BARRIER_OR_HEADER_format_mask 0x000000FF
+#define SDMA_AQL_PKT_BARRIER_OR_HEADER_format_shift 0
+#define SDMA_AQL_PKT_BARRIER_OR_HEADER_FORMAT(x) (((x) & SDMA_AQL_PKT_BARRIER_OR_HEADER_format_mask) << SDMA_AQL_PKT_BARRIER_OR_HEADER_format_shift)
+
+/*define for barrier field*/
+#define SDMA_AQL_PKT_BARRIER_OR_HEADER_barrier_offset 0
+#define SDMA_AQL_PKT_BARRIER_OR_HEADER_barrier_mask 0x00000001
+#define SDMA_AQL_PKT_BARRIER_OR_HEADER_barrier_shift 8
+#define SDMA_AQL_PKT_BARRIER_OR_HEADER_BARRIER(x) (((x) & SDMA_AQL_PKT_BARRIER_OR_HEADER_barrier_mask) << SDMA_AQL_PKT_BARRIER_OR_HEADER_barrier_shift)
+
+/*define for acquire_fence_scope field*/
+#define SDMA_AQL_PKT_BARRIER_OR_HEADER_acquire_fence_scope_offset 0
+#define SDMA_AQL_PKT_BARRIER_OR_HEADER_acquire_fence_scope_mask 0x00000003
+#define SDMA_AQL_PKT_BARRIER_OR_HEADER_acquire_fence_scope_shift 9
+#define SDMA_AQL_PKT_BARRIER_OR_HEADER_ACQUIRE_FENCE_SCOPE(x) (((x) & SDMA_AQL_PKT_BARRIER_OR_HEADER_acquire_fence_scope_mask) << SDMA_AQL_PKT_BARRIER_OR_HEADER_acquire_fence_scope_shift)
+
+/*define for release_fence_scope field*/
+#define SDMA_AQL_PKT_BARRIER_OR_HEADER_release_fence_scope_offset 0
+#define SDMA_AQL_PKT_BARRIER_OR_HEADER_release_fence_scope_mask 0x00000003
+#define SDMA_AQL_PKT_BARRIER_OR_HEADER_release_fence_scope_shift 11
+#define SDMA_AQL_PKT_BARRIER_OR_HEADER_RELEASE_FENCE_SCOPE(x) (((x) & SDMA_AQL_PKT_BARRIER_OR_HEADER_release_fence_scope_mask) << SDMA_AQL_PKT_BARRIER_OR_HEADER_release_fence_scope_shift)
+
+/*define for reserved field*/
+#define SDMA_AQL_PKT_BARRIER_OR_HEADER_reserved_offset 0
+#define SDMA_AQL_PKT_BARRIER_OR_HEADER_reserved_mask 0x00000007
+#define SDMA_AQL_PKT_BARRIER_OR_HEADER_reserved_shift 13
+#define SDMA_AQL_PKT_BARRIER_OR_HEADER_RESERVED(x) (((x) & SDMA_AQL_PKT_BARRIER_OR_HEADER_reserved_mask) << SDMA_AQL_PKT_BARRIER_OR_HEADER_reserved_shift)
+
+/*define for op field*/
+#define SDMA_AQL_PKT_BARRIER_OR_HEADER_op_offset 0
+#define SDMA_AQL_PKT_BARRIER_OR_HEADER_op_mask 0x0000000F
+#define SDMA_AQL_PKT_BARRIER_OR_HEADER_op_shift 16
+#define SDMA_AQL_PKT_BARRIER_OR_HEADER_OP(x) (((x) & SDMA_AQL_PKT_BARRIER_OR_HEADER_op_mask) << SDMA_AQL_PKT_BARRIER_OR_HEADER_op_shift)
+
+/*define for subop field*/
+#define SDMA_AQL_PKT_BARRIER_OR_HEADER_subop_offset 0
+#define SDMA_AQL_PKT_BARRIER_OR_HEADER_subop_mask 0x00000007
+#define SDMA_AQL_PKT_BARRIER_OR_HEADER_subop_shift 20
+#define SDMA_AQL_PKT_BARRIER_OR_HEADER_SUBOP(x) (((x) & SDMA_AQL_PKT_BARRIER_OR_HEADER_subop_mask) << SDMA_AQL_PKT_BARRIER_OR_HEADER_subop_shift)
+
+/*define for cpv field*/
+#define SDMA_AQL_PKT_BARRIER_OR_HEADER_cpv_offset 0
+#define SDMA_AQL_PKT_BARRIER_OR_HEADER_cpv_mask 0x00000001
+#define SDMA_AQL_PKT_BARRIER_OR_HEADER_cpv_shift 28
+#define SDMA_AQL_PKT_BARRIER_OR_HEADER_CPV(x) (((x) & SDMA_AQL_PKT_BARRIER_OR_HEADER_cpv_mask) << SDMA_AQL_PKT_BARRIER_OR_HEADER_cpv_shift)
+
+/*define for RESERVED_DW1 word*/
+/*define for reserved_dw1 field*/
+#define SDMA_AQL_PKT_BARRIER_OR_RESERVED_DW1_reserved_dw1_offset 1
+#define SDMA_AQL_PKT_BARRIER_OR_RESERVED_DW1_reserved_dw1_mask 0xFFFFFFFF
+#define SDMA_AQL_PKT_BARRIER_OR_RESERVED_DW1_reserved_dw1_shift 0
+#define SDMA_AQL_PKT_BARRIER_OR_RESERVED_DW1_RESERVED_DW1(x) (((x) & SDMA_AQL_PKT_BARRIER_OR_RESERVED_DW1_reserved_dw1_mask) << SDMA_AQL_PKT_BARRIER_OR_RESERVED_DW1_reserved_dw1_shift)
+
+/*define for DEPENDENT_ADDR_0_LO word*/
+/*define for dependent_addr_0_31_0 field*/
+#define SDMA_AQL_PKT_BARRIER_OR_DEPENDENT_ADDR_0_LO_dependent_addr_0_31_0_offset 2
+#define SDMA_AQL_PKT_BARRIER_OR_DEPENDENT_ADDR_0_LO_dependent_addr_0_31_0_mask 0xFFFFFFFF
+#define SDMA_AQL_PKT_BARRIER_OR_DEPENDENT_ADDR_0_LO_dependent_addr_0_31_0_shift 0
+#define SDMA_AQL_PKT_BARRIER_OR_DEPENDENT_ADDR_0_LO_DEPENDENT_ADDR_0_31_0(x) (((x) & SDMA_AQL_PKT_BARRIER_OR_DEPENDENT_ADDR_0_LO_dependent_addr_0_31_0_mask) << SDMA_AQL_PKT_BARRIER_OR_DEPENDENT_ADDR_0_LO_dependent_addr_0_31_0_shift)
+
+/*define for DEPENDENT_ADDR_0_HI word*/
+/*define for dependent_addr_0_63_32 field*/
+#define SDMA_AQL_PKT_BARRIER_OR_DEPENDENT_ADDR_0_HI_dependent_addr_0_63_32_offset 3
+#define SDMA_AQL_PKT_BARRIER_OR_DEPENDENT_ADDR_0_HI_dependent_addr_0_63_32_mask 0xFFFFFFFF
+#define SDMA_AQL_PKT_BARRIER_OR_DEPENDENT_ADDR_0_HI_dependent_addr_0_63_32_shift 0
+#define SDMA_AQL_PKT_BARRIER_OR_DEPENDENT_ADDR_0_HI_DEPENDENT_ADDR_0_63_32(x) (((x) & SDMA_AQL_PKT_BARRIER_OR_DEPENDENT_ADDR_0_HI_dependent_addr_0_63_32_mask) << SDMA_AQL_PKT_BARRIER_OR_DEPENDENT_ADDR_0_HI_dependent_addr_0_63_32_shift)
+
+/*define for DEPENDENT_ADDR_1_LO word*/
+/*define for dependent_addr_1_31_0 field*/
+#define SDMA_AQL_PKT_BARRIER_OR_DEPENDENT_ADDR_1_LO_dependent_addr_1_31_0_offset 4
+#define SDMA_AQL_PKT_BARRIER_OR_DEPENDENT_ADDR_1_LO_dependent_addr_1_31_0_mask 0xFFFFFFFF
+#define SDMA_AQL_PKT_BARRIER_OR_DEPENDENT_ADDR_1_LO_dependent_addr_1_31_0_shift 0
+#define SDMA_AQL_PKT_BARRIER_OR_DEPENDENT_ADDR_1_LO_DEPENDENT_ADDR_1_31_0(x) (((x) & SDMA_AQL_PKT_BARRIER_OR_DEPENDENT_ADDR_1_LO_dependent_addr_1_31_0_mask) << SDMA_AQL_PKT_BARRIER_OR_DEPENDENT_ADDR_1_LO_dependent_addr_1_31_0_shift)
+
+/*define for DEPENDENT_ADDR_1_HI word*/
+/*define for dependent_addr_1_63_32 field*/
+#define SDMA_AQL_PKT_BARRIER_OR_DEPENDENT_ADDR_1_HI_dependent_addr_1_63_32_offset 5
+#define SDMA_AQL_PKT_BARRIER_OR_DEPENDENT_ADDR_1_HI_dependent_addr_1_63_32_mask 0xFFFFFFFF
+#define SDMA_AQL_PKT_BARRIER_OR_DEPENDENT_ADDR_1_HI_dependent_addr_1_63_32_shift 0
+#define SDMA_AQL_PKT_BARRIER_OR_DEPENDENT_ADDR_1_HI_DEPENDENT_ADDR_1_63_32(x) (((x) & SDMA_AQL_PKT_BARRIER_OR_DEPENDENT_ADDR_1_HI_dependent_addr_1_63_32_mask) << SDMA_AQL_PKT_BARRIER_OR_DEPENDENT_ADDR_1_HI_dependent_addr_1_63_32_shift)
+
+/*define for DEPENDENT_ADDR_2_LO word*/
+/*define for dependent_addr_2_31_0 field*/
+#define SDMA_AQL_PKT_BARRIER_OR_DEPENDENT_ADDR_2_LO_dependent_addr_2_31_0_offset 6
+#define SDMA_AQL_PKT_BARRIER_OR_DEPENDENT_ADDR_2_LO_dependent_addr_2_31_0_mask 0xFFFFFFFF
+#define SDMA_AQL_PKT_BARRIER_OR_DEPENDENT_ADDR_2_LO_dependent_addr_2_31_0_shift 0
+#define SDMA_AQL_PKT_BARRIER_OR_DEPENDENT_ADDR_2_LO_DEPENDENT_ADDR_2_31_0(x) (((x) & SDMA_AQL_PKT_BARRIER_OR_DEPENDENT_ADDR_2_LO_dependent_addr_2_31_0_mask) << SDMA_AQL_PKT_BARRIER_OR_DEPENDENT_ADDR_2_LO_dependent_addr_2_31_0_shift)
+
+/*define for DEPENDENT_ADDR_2_HI word*/
+/*define for dependent_addr_2_63_32 field*/
+#define SDMA_AQL_PKT_BARRIER_OR_DEPENDENT_ADDR_2_HI_dependent_addr_2_63_32_offset 7
+#define SDMA_AQL_PKT_BARRIER_OR_DEPENDENT_ADDR_2_HI_dependent_addr_2_63_32_mask 0xFFFFFFFF
+#define SDMA_AQL_PKT_BARRIER_OR_DEPENDENT_ADDR_2_HI_dependent_addr_2_63_32_shift 0
+#define SDMA_AQL_PKT_BARRIER_OR_DEPENDENT_ADDR_2_HI_DEPENDENT_ADDR_2_63_32(x) (((x) & SDMA_AQL_PKT_BARRIER_OR_DEPENDENT_ADDR_2_HI_dependent_addr_2_63_32_mask) << SDMA_AQL_PKT_BARRIER_OR_DEPENDENT_ADDR_2_HI_dependent_addr_2_63_32_shift)
+
+/*define for DEPENDENT_ADDR_3_LO word*/
+/*define for dependent_addr_3_31_0 field*/
+#define SDMA_AQL_PKT_BARRIER_OR_DEPENDENT_ADDR_3_LO_dependent_addr_3_31_0_offset 8
+#define SDMA_AQL_PKT_BARRIER_OR_DEPENDENT_ADDR_3_LO_dependent_addr_3_31_0_mask 0xFFFFFFFF
+#define SDMA_AQL_PKT_BARRIER_OR_DEPENDENT_ADDR_3_LO_dependent_addr_3_31_0_shift 0
+#define SDMA_AQL_PKT_BARRIER_OR_DEPENDENT_ADDR_3_LO_DEPENDENT_ADDR_3_31_0(x) (((x) & SDMA_AQL_PKT_BARRIER_OR_DEPENDENT_ADDR_3_LO_dependent_addr_3_31_0_mask) << SDMA_AQL_PKT_BARRIER_OR_DEPENDENT_ADDR_3_LO_dependent_addr_3_31_0_shift)
+
+/*define for DEPENDENT_ADDR_3_HI word*/
+/*define for dependent_addr_3_63_32 field*/
+#define SDMA_AQL_PKT_BARRIER_OR_DEPENDENT_ADDR_3_HI_dependent_addr_3_63_32_offset 9
+#define SDMA_AQL_PKT_BARRIER_OR_DEPENDENT_ADDR_3_HI_dependent_addr_3_63_32_mask 0xFFFFFFFF
+#define SDMA_AQL_PKT_BARRIER_OR_DEPENDENT_ADDR_3_HI_dependent_addr_3_63_32_shift 0
+#define SDMA_AQL_PKT_BARRIER_OR_DEPENDENT_ADDR_3_HI_DEPENDENT_ADDR_3_63_32(x) (((x) & SDMA_AQL_PKT_BARRIER_OR_DEPENDENT_ADDR_3_HI_dependent_addr_3_63_32_mask) << SDMA_AQL_PKT_BARRIER_OR_DEPENDENT_ADDR_3_HI_dependent_addr_3_63_32_shift)
+
+/*define for DEPENDENT_ADDR_4_LO word*/
+/*define for dependent_addr_4_31_0 field*/
+#define SDMA_AQL_PKT_BARRIER_OR_DEPENDENT_ADDR_4_LO_dependent_addr_4_31_0_offset 10
+#define SDMA_AQL_PKT_BARRIER_OR_DEPENDENT_ADDR_4_LO_dependent_addr_4_31_0_mask 0xFFFFFFFF
+#define SDMA_AQL_PKT_BARRIER_OR_DEPENDENT_ADDR_4_LO_dependent_addr_4_31_0_shift 0
+#define SDMA_AQL_PKT_BARRIER_OR_DEPENDENT_ADDR_4_LO_DEPENDENT_ADDR_4_31_0(x) (((x) & SDMA_AQL_PKT_BARRIER_OR_DEPENDENT_ADDR_4_LO_dependent_addr_4_31_0_mask) << SDMA_AQL_PKT_BARRIER_OR_DEPENDENT_ADDR_4_LO_dependent_addr_4_31_0_shift)
+
+/*define for DEPENDENT_ADDR_4_HI word*/
+/*define for dependent_addr_4_63_32 field*/
+#define SDMA_AQL_PKT_BARRIER_OR_DEPENDENT_ADDR_4_HI_dependent_addr_4_63_32_offset 11
+#define SDMA_AQL_PKT_BARRIER_OR_DEPENDENT_ADDR_4_HI_dependent_addr_4_63_32_mask 0xFFFFFFFF
+#define SDMA_AQL_PKT_BARRIER_OR_DEPENDENT_ADDR_4_HI_dependent_addr_4_63_32_shift 0
+#define SDMA_AQL_PKT_BARRIER_OR_DEPENDENT_ADDR_4_HI_DEPENDENT_ADDR_4_63_32(x) (((x) & SDMA_AQL_PKT_BARRIER_OR_DEPENDENT_ADDR_4_HI_dependent_addr_4_63_32_mask) << SDMA_AQL_PKT_BARRIER_OR_DEPENDENT_ADDR_4_HI_dependent_addr_4_63_32_shift)
+
+/*define for CACHE_POLICY word*/
+/*define for cache_policy0 field*/
+#define SDMA_AQL_PKT_BARRIER_OR_CACHE_POLICY_cache_policy0_offset 12
+#define SDMA_AQL_PKT_BARRIER_OR_CACHE_POLICY_cache_policy0_mask 0x00000007
+#define SDMA_AQL_PKT_BARRIER_OR_CACHE_POLICY_cache_policy0_shift 0
+#define SDMA_AQL_PKT_BARRIER_OR_CACHE_POLICY_CACHE_POLICY0(x) (((x) & SDMA_AQL_PKT_BARRIER_OR_CACHE_POLICY_cache_policy0_mask) << SDMA_AQL_PKT_BARRIER_OR_CACHE_POLICY_cache_policy0_shift)
+
+/*define for cache_policy1 field*/
+#define SDMA_AQL_PKT_BARRIER_OR_CACHE_POLICY_cache_policy1_offset 12
+#define SDMA_AQL_PKT_BARRIER_OR_CACHE_POLICY_cache_policy1_mask 0x00000007
+#define SDMA_AQL_PKT_BARRIER_OR_CACHE_POLICY_cache_policy1_shift 5
+#define SDMA_AQL_PKT_BARRIER_OR_CACHE_POLICY_CACHE_POLICY1(x) (((x) & SDMA_AQL_PKT_BARRIER_OR_CACHE_POLICY_cache_policy1_mask) << SDMA_AQL_PKT_BARRIER_OR_CACHE_POLICY_cache_policy1_shift)
+
+/*define for cache_policy2 field*/
+#define SDMA_AQL_PKT_BARRIER_OR_CACHE_POLICY_cache_policy2_offset 12
+#define SDMA_AQL_PKT_BARRIER_OR_CACHE_POLICY_cache_policy2_mask 0x00000007
+#define SDMA_AQL_PKT_BARRIER_OR_CACHE_POLICY_cache_policy2_shift 10
+#define SDMA_AQL_PKT_BARRIER_OR_CACHE_POLICY_CACHE_POLICY2(x) (((x) & SDMA_AQL_PKT_BARRIER_OR_CACHE_POLICY_cache_policy2_mask) << SDMA_AQL_PKT_BARRIER_OR_CACHE_POLICY_cache_policy2_shift)
+
+/*define for cache_policy3 field*/
+#define SDMA_AQL_PKT_BARRIER_OR_CACHE_POLICY_cache_policy3_offset 12
+#define SDMA_AQL_PKT_BARRIER_OR_CACHE_POLICY_cache_policy3_mask 0x00000007
+#define SDMA_AQL_PKT_BARRIER_OR_CACHE_POLICY_cache_policy3_shift 15
+#define SDMA_AQL_PKT_BARRIER_OR_CACHE_POLICY_CACHE_POLICY3(x) (((x) & SDMA_AQL_PKT_BARRIER_OR_CACHE_POLICY_cache_policy3_mask) << SDMA_AQL_PKT_BARRIER_OR_CACHE_POLICY_cache_policy3_shift)
+
+/*define for cache_policy4 field*/
+#define SDMA_AQL_PKT_BARRIER_OR_CACHE_POLICY_cache_policy4_offset 12
+#define SDMA_AQL_PKT_BARRIER_OR_CACHE_POLICY_cache_policy4_mask 0x00000007
+#define SDMA_AQL_PKT_BARRIER_OR_CACHE_POLICY_cache_policy4_shift 20
+#define SDMA_AQL_PKT_BARRIER_OR_CACHE_POLICY_CACHE_POLICY4(x) (((x) & SDMA_AQL_PKT_BARRIER_OR_CACHE_POLICY_cache_policy4_mask) << SDMA_AQL_PKT_BARRIER_OR_CACHE_POLICY_cache_policy4_shift)
+
+/*define for RESERVED_DW13 word*/
+/*define for reserved_dw13 field*/
+#define SDMA_AQL_PKT_BARRIER_OR_RESERVED_DW13_reserved_dw13_offset 13
+#define SDMA_AQL_PKT_BARRIER_OR_RESERVED_DW13_reserved_dw13_mask 0xFFFFFFFF
+#define SDMA_AQL_PKT_BARRIER_OR_RESERVED_DW13_reserved_dw13_shift 0
+#define SDMA_AQL_PKT_BARRIER_OR_RESERVED_DW13_RESERVED_DW13(x) (((x) & SDMA_AQL_PKT_BARRIER_OR_RESERVED_DW13_reserved_dw13_mask) << SDMA_AQL_PKT_BARRIER_OR_RESERVED_DW13_reserved_dw13_shift)
+
+/*define for COMPLETION_SIGNAL_LO word*/
+/*define for completion_signal_31_0 field*/
+#define SDMA_AQL_PKT_BARRIER_OR_COMPLETION_SIGNAL_LO_completion_signal_31_0_offset 14
+#define SDMA_AQL_PKT_BARRIER_OR_COMPLETION_SIGNAL_LO_completion_signal_31_0_mask 0xFFFFFFFF
+#define SDMA_AQL_PKT_BARRIER_OR_COMPLETION_SIGNAL_LO_completion_signal_31_0_shift 0
+#define SDMA_AQL_PKT_BARRIER_OR_COMPLETION_SIGNAL_LO_COMPLETION_SIGNAL_31_0(x) (((x) & SDMA_AQL_PKT_BARRIER_OR_COMPLETION_SIGNAL_LO_completion_signal_31_0_mask) << SDMA_AQL_PKT_BARRIER_OR_COMPLETION_SIGNAL_LO_completion_signal_31_0_shift)
+
+/*define for COMPLETION_SIGNAL_HI word*/
+/*define for completion_signal_63_32 field*/
+#define SDMA_AQL_PKT_BARRIER_OR_COMPLETION_SIGNAL_HI_completion_signal_63_32_offset 15
+#define SDMA_AQL_PKT_BARRIER_OR_COMPLETION_SIGNAL_HI_completion_signal_63_32_mask 0xFFFFFFFF
+#define SDMA_AQL_PKT_BARRIER_OR_COMPLETION_SIGNAL_HI_completion_signal_63_32_shift 0
+#define SDMA_AQL_PKT_BARRIER_OR_COMPLETION_SIGNAL_HI_COMPLETION_SIGNAL_63_32(x) (((x) & SDMA_AQL_PKT_BARRIER_OR_COMPLETION_SIGNAL_HI_completion_signal_63_32_mask) << SDMA_AQL_PKT_BARRIER_OR_COMPLETION_SIGNAL_HI_completion_signal_63_32_shift)
+
+
+#endif /* __SDMA_V6_0_0_PKT_OPEN_H_ */
diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v7_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v7_0.c
new file mode 100644
index 000000000000..2b81344dcd66
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/sdma_v7_0.c
@@ -0,0 +1,1859 @@
+/*
+ * Copyright 2023 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#include <linux/delay.h>
+#include <linux/firmware.h>
+#include <linux/module.h>
+#include <linux/pci.h>
+
+#include "amdgpu.h"
+#include "amdgpu_ucode.h"
+#include "amdgpu_trace.h"
+
+#include "gc/gc_12_0_0_offset.h"
+#include "gc/gc_12_0_0_sh_mask.h"
+#include "hdp/hdp_6_0_0_offset.h"
+#include "ivsrcid/gfx/irqsrcs_gfx_12_0_0.h"
+
+#include "soc15_common.h"
+#include "soc15.h"
+#include "sdma_v6_0_0_pkt_open.h"
+#include "nbio_v4_3.h"
+#include "sdma_common.h"
+#include "sdma_v7_0.h"
+#include "v12_structs.h"
+#include "mes_userqueue.h"
+#include "amdgpu_userq_fence.h"
+
+MODULE_FIRMWARE("amdgpu/sdma_7_0_0.bin");
+MODULE_FIRMWARE("amdgpu/sdma_7_0_1.bin");
+
+#define SDMA1_REG_OFFSET 0x600
+#define SDMA0_HYP_DEC_REG_START 0x5880
+#define SDMA0_HYP_DEC_REG_END 0x589a
+#define SDMA1_HYP_DEC_REG_OFFSET 0x20
+
+/*define for compression field for sdma7*/
+#define SDMA_PKT_CONSTANT_FILL_HEADER_compress_offset 0
+#define SDMA_PKT_CONSTANT_FILL_HEADER_compress_mask 0x00000001
+#define SDMA_PKT_CONSTANT_FILL_HEADER_compress_shift 16
+#define SDMA_PKT_CONSTANT_FILL_HEADER_COMPRESS(x) (((x) & SDMA_PKT_CONSTANT_FILL_HEADER_compress_mask) << SDMA_PKT_CONSTANT_FILL_HEADER_compress_shift)
+
+static const struct amdgpu_hwip_reg_entry sdma_reg_list_7_0[] = {
+ SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_STATUS_REG),
+ SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_STATUS1_REG),
+ SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_STATUS2_REG),
+ SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_STATUS3_REG),
+ SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_STATUS4_REG),
+ SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_STATUS5_REG),
+ SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_STATUS6_REG),
+ SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_UCODE_REV),
+ SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_RB_RPTR_FETCH_HI),
+ SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_RB_RPTR_FETCH),
+ SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_UTCL1_RD_STATUS),
+ SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_UTCL1_WR_STATUS),
+ SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_UTCL1_RD_XNACK0),
+ SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_UTCL1_RD_XNACK1),
+ SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_UTCL1_WR_XNACK0),
+ SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_UTCL1_WR_XNACK1),
+ SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_QUEUE0_RB_CNTL),
+ SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_QUEUE0_RB_RPTR),
+ SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_QUEUE0_RB_RPTR_HI),
+ SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_QUEUE0_RB_WPTR),
+ SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_QUEUE0_RB_WPTR_HI),
+ SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_QUEUE0_IB_OFFSET),
+ SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_QUEUE0_IB_BASE_LO),
+ SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_QUEUE0_IB_BASE_HI),
+ SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_QUEUE0_IB_CNTL),
+ SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_QUEUE0_IB_RPTR),
+ SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_QUEUE0_IB_SUB_REMAIN),
+ SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_QUEUE0_DUMMY_REG),
+ SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_QUEUE_STATUS0),
+ SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_QUEUE1_RB_CNTL),
+ SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_QUEUE1_RB_RPTR),
+ SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_QUEUE1_RB_RPTR_HI),
+ SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_QUEUE1_RB_WPTR),
+ SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_QUEUE1_RB_WPTR_HI),
+ SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_QUEUE1_IB_OFFSET),
+ SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_QUEUE1_IB_BASE_LO),
+ SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_QUEUE1_IB_BASE_HI),
+ SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_QUEUE1_IB_RPTR),
+ SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_QUEUE1_IB_SUB_REMAIN),
+ SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_QUEUE1_DUMMY_REG),
+ SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_QUEUE2_RB_CNTL),
+ SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_QUEUE2_RB_RPTR),
+ SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_QUEUE2_RB_RPTR_HI),
+ SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_QUEUE2_RB_WPTR),
+ SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_QUEUE2_RB_WPTR_HI),
+ SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_QUEUE2_IB_OFFSET),
+ SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_QUEUE2_IB_BASE_LO),
+ SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_QUEUE2_IB_BASE_HI),
+ SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_QUEUE2_IB_RPTR),
+ SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_QUEUE2_IB_SUB_REMAIN),
+ SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_QUEUE2_DUMMY_REG),
+ SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_INT_STATUS),
+ SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_VM_CNTL),
+ SOC15_REG_ENTRY_STR(GC, 0, regGRBM_STATUS2),
+ SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_CHICKEN_BITS),
+};
+
+static void sdma_v7_0_set_ring_funcs(struct amdgpu_device *adev);
+static void sdma_v7_0_set_buffer_funcs(struct amdgpu_device *adev);
+static void sdma_v7_0_set_vm_pte_funcs(struct amdgpu_device *adev);
+static void sdma_v7_0_set_irq_funcs(struct amdgpu_device *adev);
+static int sdma_v7_0_start(struct amdgpu_device *adev);
+
+static u32 sdma_v7_0_get_reg_offset(struct amdgpu_device *adev, u32 instance, u32 internal_offset)
+{
+ u32 base;
+
+ if (internal_offset >= SDMA0_HYP_DEC_REG_START &&
+ internal_offset <= SDMA0_HYP_DEC_REG_END) {
+ base = adev->reg_offset[GC_HWIP][0][1];
+ if (instance != 0)
+ internal_offset += SDMA1_HYP_DEC_REG_OFFSET * instance;
+ } else {
+ base = adev->reg_offset[GC_HWIP][0][0];
+ if (instance == 1)
+ internal_offset += SDMA1_REG_OFFSET;
+ }
+
+ return base + internal_offset;
+}
+
+static unsigned sdma_v7_0_ring_init_cond_exec(struct amdgpu_ring *ring,
+ uint64_t addr)
+{
+ unsigned ret;
+
+ amdgpu_ring_write(ring, SDMA_PKT_COPY_LINEAR_HEADER_OP(SDMA_OP_COND_EXE));
+ amdgpu_ring_write(ring, lower_32_bits(addr));
+ amdgpu_ring_write(ring, upper_32_bits(addr));
+ amdgpu_ring_write(ring, 1);
+ /* this is the offset we need patch later */
+ ret = ring->wptr & ring->buf_mask;
+ /* insert dummy here and patch it later */
+ amdgpu_ring_write(ring, 0);
+
+ return ret;
+}
+
+/**
+ * sdma_v7_0_ring_get_rptr - get the current read pointer
+ *
+ * @ring: amdgpu ring pointer
+ *
+ * Get the current rptr from the hardware.
+ */
+static uint64_t sdma_v7_0_ring_get_rptr(struct amdgpu_ring *ring)
+{
+ u64 *rptr;
+
+ /* XXX check if swapping is necessary on BE */
+ rptr = (u64 *)ring->rptr_cpu_addr;
+
+ DRM_DEBUG("rptr before shift == 0x%016llx\n", *rptr);
+ return ((*rptr) >> 2);
+}
+
+/**
+ * sdma_v7_0_ring_get_wptr - get the current write pointer
+ *
+ * @ring: amdgpu ring pointer
+ *
+ * Get the current wptr from the hardware.
+ */
+static uint64_t sdma_v7_0_ring_get_wptr(struct amdgpu_ring *ring)
+{
+ u64 wptr = 0;
+
+ if (ring->use_doorbell) {
+ /* XXX check if swapping is necessary on BE */
+ wptr = READ_ONCE(*((u64 *)ring->wptr_cpu_addr));
+ DRM_DEBUG("wptr/doorbell before shift == 0x%016llx\n", wptr);
+ }
+
+ return wptr >> 2;
+}
+
+/**
+ * sdma_v7_0_ring_set_wptr - commit the write pointer
+ *
+ * @ring: amdgpu ring pointer
+ *
+ * Write the wptr back to the hardware.
+ */
+static void sdma_v7_0_ring_set_wptr(struct amdgpu_ring *ring)
+{
+ struct amdgpu_device *adev = ring->adev;
+
+ DRM_DEBUG("Setting write pointer\n");
+
+ if (ring->use_doorbell) {
+ DRM_DEBUG("Using doorbell -- "
+ "wptr_offs == 0x%08x "
+ "lower_32_bits(ring->wptr) << 2 == 0x%08x "
+ "upper_32_bits(ring->wptr) << 2 == 0x%08x\n",
+ ring->wptr_offs,
+ lower_32_bits(ring->wptr << 2),
+ upper_32_bits(ring->wptr << 2));
+ /* XXX check if swapping is necessary on BE */
+ atomic64_set((atomic64_t *)ring->wptr_cpu_addr,
+ ring->wptr << 2);
+ DRM_DEBUG("calling WDOORBELL64(0x%08x, 0x%016llx)\n",
+ ring->doorbell_index, ring->wptr << 2);
+ WDOORBELL64(ring->doorbell_index, ring->wptr << 2);
+ } else {
+ DRM_DEBUG("Not using doorbell -- "
+ "regSDMA%i_GFX_RB_WPTR == 0x%08x "
+ "regSDMA%i_GFX_RB_WPTR_HI == 0x%08x\n",
+ ring->me,
+ lower_32_bits(ring->wptr << 2),
+ ring->me,
+ upper_32_bits(ring->wptr << 2));
+ WREG32_SOC15_IP(GC, sdma_v7_0_get_reg_offset(adev,
+ ring->me,
+ regSDMA0_QUEUE0_RB_WPTR),
+ lower_32_bits(ring->wptr << 2));
+ WREG32_SOC15_IP(GC, sdma_v7_0_get_reg_offset(adev,
+ ring->me,
+ regSDMA0_QUEUE0_RB_WPTR_HI),
+ upper_32_bits(ring->wptr << 2));
+ }
+}
+
+static void sdma_v7_0_ring_insert_nop(struct amdgpu_ring *ring, uint32_t count)
+{
+ struct amdgpu_sdma_instance *sdma = amdgpu_sdma_get_instance_from_ring(ring);
+ int i;
+
+ for (i = 0; i < count; i++)
+ if (sdma && sdma->burst_nop && (i == 0))
+ amdgpu_ring_write(ring, ring->funcs->nop |
+ SDMA_PKT_NOP_HEADER_COUNT(count - 1));
+ else
+ amdgpu_ring_write(ring, ring->funcs->nop);
+}
+
+/**
+ * sdma_v7_0_ring_emit_ib - Schedule an IB on the DMA engine
+ *
+ * @ring: amdgpu ring pointer
+ * @job: job to retrieve vmid from
+ * @ib: IB object to schedule
+ * @flags: unused
+ *
+ * Schedule an IB in the DMA ring.
+ */
+static void sdma_v7_0_ring_emit_ib(struct amdgpu_ring *ring,
+ struct amdgpu_job *job,
+ struct amdgpu_ib *ib,
+ uint32_t flags)
+{
+ unsigned vmid = AMDGPU_JOB_GET_VMID(job);
+ uint64_t csa_mc_addr = amdgpu_sdma_get_csa_mc_addr(ring, vmid);
+
+ /* An IB packet must end on a 8 DW boundary--the next dword
+ * must be on a 8-dword boundary. Our IB packet below is 6
+ * dwords long, thus add x number of NOPs, such that, in
+ * modular arithmetic,
+ * wptr + 6 + x = 8k, k >= 0, which in C is,
+ * (wptr + 6 + x) % 8 = 0.
+ * The expression below, is a solution of x.
+ */
+ sdma_v7_0_ring_insert_nop(ring, (2 - lower_32_bits(ring->wptr)) & 7);
+
+ amdgpu_ring_write(ring, SDMA_PKT_COPY_LINEAR_HEADER_OP(SDMA_OP_INDIRECT) |
+ SDMA_PKT_INDIRECT_HEADER_VMID(vmid & 0xf));
+ /* base must be 32 byte aligned */
+ amdgpu_ring_write(ring, lower_32_bits(ib->gpu_addr) & 0xffffffe0);
+ amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr));
+ amdgpu_ring_write(ring, ib->length_dw);
+ amdgpu_ring_write(ring, lower_32_bits(csa_mc_addr));
+ amdgpu_ring_write(ring, upper_32_bits(csa_mc_addr));
+}
+
+/**
+ * sdma_v7_0_ring_emit_mem_sync - flush the IB by graphics cache rinse
+ *
+ * @ring: amdgpu ring pointer
+ *
+ * flush the IB by graphics cache rinse.
+ */
+static void sdma_v7_0_ring_emit_mem_sync(struct amdgpu_ring *ring)
+{
+ uint32_t gcr_cntl = SDMA_GCR_GL2_INV | SDMA_GCR_GL2_WB | SDMA_GCR_GLM_INV |
+ SDMA_GCR_GL1_INV | SDMA_GCR_GLV_INV | SDMA_GCR_GLK_INV |
+ SDMA_GCR_GLI_INV(1);
+
+ /* flush entire cache L0/L1/L2, this can be optimized by performance requirement */
+ amdgpu_ring_write(ring, SDMA_PKT_COPY_LINEAR_HEADER_OP(SDMA_OP_GCR_REQ));
+ amdgpu_ring_write(ring, SDMA_PKT_GCR_REQ_PAYLOAD1_BASE_VA_31_7(0));
+ amdgpu_ring_write(ring, SDMA_PKT_GCR_REQ_PAYLOAD2_GCR_CONTROL_15_0(gcr_cntl) |
+ SDMA_PKT_GCR_REQ_PAYLOAD2_BASE_VA_47_32(0));
+ amdgpu_ring_write(ring, SDMA_PKT_GCR_REQ_PAYLOAD3_LIMIT_VA_31_7(0) |
+ SDMA_PKT_GCR_REQ_PAYLOAD3_GCR_CONTROL_18_16(gcr_cntl >> 16));
+ amdgpu_ring_write(ring, SDMA_PKT_GCR_REQ_PAYLOAD4_LIMIT_VA_47_32(0) |
+ SDMA_PKT_GCR_REQ_PAYLOAD4_VMID(0));
+}
+
+
+/**
+ * sdma_v7_0_ring_emit_hdp_flush - emit an hdp flush on the DMA ring
+ *
+ * @ring: amdgpu ring pointer
+ *
+ * Emit an hdp flush packet on the requested DMA ring.
+ */
+static void sdma_v7_0_ring_emit_hdp_flush(struct amdgpu_ring *ring)
+{
+ struct amdgpu_device *adev = ring->adev;
+ u32 ref_and_mask = 0;
+ const struct nbio_hdp_flush_reg *nbio_hf_reg = adev->nbio.hdp_flush_reg;
+
+ ref_and_mask = nbio_hf_reg->ref_and_mask_sdma0 << ring->me;
+
+ amdgpu_ring_write(ring, SDMA_PKT_COPY_LINEAR_HEADER_OP(SDMA_OP_POLL_REGMEM) |
+ SDMA_PKT_POLL_REGMEM_HEADER_HDP_FLUSH(1) |
+ SDMA_PKT_POLL_REGMEM_HEADER_FUNC(3)); /* == */
+ amdgpu_ring_write(ring, (adev->nbio.funcs->get_hdp_flush_done_offset(adev)) << 2);
+ amdgpu_ring_write(ring, (adev->nbio.funcs->get_hdp_flush_req_offset(adev)) << 2);
+ amdgpu_ring_write(ring, ref_and_mask); /* reference */
+ amdgpu_ring_write(ring, ref_and_mask); /* mask */
+ amdgpu_ring_write(ring, SDMA_PKT_POLL_REGMEM_DW5_RETRY_COUNT(0xfff) |
+ SDMA_PKT_POLL_REGMEM_DW5_INTERVAL(10)); /* retry count, poll interval */
+}
+
+/**
+ * sdma_v7_0_ring_emit_fence - emit a fence on the DMA ring
+ *
+ * @ring: amdgpu ring pointer
+ * @addr: address
+ * @seq: fence seq number
+ * @flags: fence flags
+ *
+ * Add a DMA fence packet to the ring to write
+ * the fence seq number and DMA trap packet to generate
+ * an interrupt if needed.
+ */
+static void sdma_v7_0_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, u64 seq,
+ unsigned flags)
+{
+ bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT;
+ /* write the fence */
+ amdgpu_ring_write(ring, SDMA_PKT_COPY_LINEAR_HEADER_OP(SDMA_OP_FENCE) |
+ SDMA_PKT_FENCE_HEADER_MTYPE(0x3)); /* Ucached(UC) */
+ /* zero in first two bits */
+ BUG_ON(addr & 0x3);
+ amdgpu_ring_write(ring, lower_32_bits(addr));
+ amdgpu_ring_write(ring, upper_32_bits(addr));
+ amdgpu_ring_write(ring, lower_32_bits(seq));
+
+ /* optionally write high bits as well */
+ if (write64bit) {
+ addr += 4;
+ amdgpu_ring_write(ring, SDMA_PKT_COPY_LINEAR_HEADER_OP(SDMA_OP_FENCE) |
+ SDMA_PKT_FENCE_HEADER_MTYPE(0x3));
+ /* zero in first two bits */
+ BUG_ON(addr & 0x3);
+ amdgpu_ring_write(ring, lower_32_bits(addr));
+ amdgpu_ring_write(ring, upper_32_bits(addr));
+ amdgpu_ring_write(ring, upper_32_bits(seq));
+ }
+
+ if (flags & AMDGPU_FENCE_FLAG_INT) {
+ /* generate an interrupt */
+ amdgpu_ring_write(ring, SDMA_PKT_COPY_LINEAR_HEADER_OP(SDMA_OP_TRAP));
+ amdgpu_ring_write(ring, SDMA_PKT_TRAP_INT_CONTEXT_INT_CONTEXT(0));
+ }
+}
+
+/**
+ * sdma_v7_0_gfx_stop - stop the gfx async dma engines
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * Stop the gfx async dma ring buffers.
+ */
+static void sdma_v7_0_gfx_stop(struct amdgpu_device *adev)
+{
+ u32 rb_cntl, ib_cntl;
+ int i;
+
+ for (i = 0; i < adev->sdma.num_instances; i++) {
+ rb_cntl = RREG32_SOC15_IP(GC, sdma_v7_0_get_reg_offset(adev, i, regSDMA0_QUEUE0_RB_CNTL));
+ rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_QUEUE0_RB_CNTL, RB_ENABLE, 0);
+ WREG32_SOC15_IP(GC, sdma_v7_0_get_reg_offset(adev, i, regSDMA0_QUEUE0_RB_CNTL), rb_cntl);
+ ib_cntl = RREG32_SOC15_IP(GC, sdma_v7_0_get_reg_offset(adev, i, regSDMA0_QUEUE0_IB_CNTL));
+ ib_cntl = REG_SET_FIELD(ib_cntl, SDMA0_QUEUE0_IB_CNTL, IB_ENABLE, 0);
+ WREG32_SOC15_IP(GC, sdma_v7_0_get_reg_offset(adev, i, regSDMA0_QUEUE0_IB_CNTL), ib_cntl);
+ }
+}
+
+/**
+ * sdma_v7_0_rlc_stop - stop the compute async dma engines
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * Stop the compute async dma queues.
+ */
+static void sdma_v7_0_rlc_stop(struct amdgpu_device *adev)
+{
+ /* XXX todo */
+}
+
+/**
+ * sdma_v7_0_ctx_switch_enable - stop the async dma engines context switch
+ *
+ * @adev: amdgpu_device pointer
+ * @enable: enable/disable the DMA MEs context switch.
+ *
+ * Halt or unhalt the async dma engines context switch.
+ */
+static void sdma_v7_0_ctx_switch_enable(struct amdgpu_device *adev, bool enable)
+{
+}
+
+/**
+ * sdma_v7_0_enable - stop the async dma engines
+ *
+ * @adev: amdgpu_device pointer
+ * @enable: enable/disable the DMA MEs.
+ *
+ * Halt or unhalt the async dma engines.
+ */
+static void sdma_v7_0_enable(struct amdgpu_device *adev, bool enable)
+{
+ u32 mcu_cntl;
+ int i;
+
+ if (!enable) {
+ sdma_v7_0_gfx_stop(adev);
+ sdma_v7_0_rlc_stop(adev);
+ }
+
+ if (amdgpu_sriov_vf(adev))
+ return;
+
+ for (i = 0; i < adev->sdma.num_instances; i++) {
+ mcu_cntl = RREG32_SOC15_IP(GC, sdma_v7_0_get_reg_offset(adev, i, regSDMA0_MCU_CNTL));
+ mcu_cntl = REG_SET_FIELD(mcu_cntl, SDMA0_MCU_CNTL, HALT, enable ? 0 : 1);
+ WREG32_SOC15_IP(GC, sdma_v7_0_get_reg_offset(adev, i, regSDMA0_MCU_CNTL), mcu_cntl);
+ }
+}
+
+/**
+ * sdma_v7_0_gfx_resume_instance - start/restart a certain sdma engine
+ *
+ * @adev: amdgpu_device pointer
+ * @i: instance
+ * @restore: used to restore wptr when restart
+ *
+ * Set up the gfx DMA ring buffers and enable them. On restart, we will restore wptr and rptr.
+ * Return 0 for success.
+ */
+static int sdma_v7_0_gfx_resume_instance(struct amdgpu_device *adev, int i, bool restore)
+{
+ struct amdgpu_ring *ring;
+ u32 rb_cntl, ib_cntl;
+ u32 rb_bufsz;
+ u32 doorbell;
+ u32 doorbell_offset;
+ u32 temp;
+ u64 wptr_gpu_addr;
+ int r;
+
+ ring = &adev->sdma.instance[i].ring;
+
+ /* Set ring buffer size in dwords */
+ rb_bufsz = order_base_2(ring->ring_size / 4);
+ rb_cntl = RREG32_SOC15_IP(GC, sdma_v7_0_get_reg_offset(adev, i, regSDMA0_QUEUE0_RB_CNTL));
+ rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_QUEUE0_RB_CNTL, RB_SIZE, rb_bufsz);
+#ifdef __BIG_ENDIAN
+ rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_QUEUE0_RB_CNTL, RB_SWAP_ENABLE, 1);
+ rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_QUEUE0_RB_CNTL,
+ RPTR_WRITEBACK_SWAP_ENABLE, 1);
+#endif
+ rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_QUEUE0_RB_CNTL, RB_PRIV, 1);
+ WREG32_SOC15_IP(GC, sdma_v7_0_get_reg_offset(adev, i, regSDMA0_QUEUE0_RB_CNTL), rb_cntl);
+
+ /* Initialize the ring buffer's read and write pointers */
+ if (restore) {
+ WREG32_SOC15_IP(GC, sdma_v7_0_get_reg_offset(adev, i, regSDMA0_QUEUE0_RB_RPTR), lower_32_bits(ring->wptr << 2));
+ WREG32_SOC15_IP(GC, sdma_v7_0_get_reg_offset(adev, i, regSDMA0_QUEUE0_RB_RPTR_HI), upper_32_bits(ring->wptr << 2));
+ WREG32_SOC15_IP(GC, sdma_v7_0_get_reg_offset(adev, i, regSDMA0_QUEUE0_RB_WPTR), lower_32_bits(ring->wptr << 2));
+ WREG32_SOC15_IP(GC, sdma_v7_0_get_reg_offset(adev, i, regSDMA0_QUEUE0_RB_WPTR_HI), upper_32_bits(ring->wptr << 2));
+ } else {
+ WREG32_SOC15_IP(GC, sdma_v7_0_get_reg_offset(adev, i, regSDMA0_QUEUE0_RB_RPTR), 0);
+ WREG32_SOC15_IP(GC, sdma_v7_0_get_reg_offset(adev, i, regSDMA0_QUEUE0_RB_RPTR_HI), 0);
+ WREG32_SOC15_IP(GC, sdma_v7_0_get_reg_offset(adev, i, regSDMA0_QUEUE0_RB_WPTR), 0);
+ WREG32_SOC15_IP(GC, sdma_v7_0_get_reg_offset(adev, i, regSDMA0_QUEUE0_RB_WPTR_HI), 0);
+ }
+ /* setup the wptr shadow polling */
+ wptr_gpu_addr = ring->wptr_gpu_addr;
+ WREG32_SOC15_IP(GC, sdma_v7_0_get_reg_offset(adev, i, regSDMA0_QUEUE0_RB_WPTR_POLL_ADDR_LO),
+ lower_32_bits(wptr_gpu_addr));
+ WREG32_SOC15_IP(GC, sdma_v7_0_get_reg_offset(adev, i, regSDMA0_QUEUE0_RB_WPTR_POLL_ADDR_HI),
+ upper_32_bits(wptr_gpu_addr));
+
+ /* set the wb address whether it's enabled or not */
+ WREG32_SOC15_IP(GC, sdma_v7_0_get_reg_offset(adev, i, regSDMA0_QUEUE0_RB_RPTR_ADDR_HI),
+ upper_32_bits(ring->rptr_gpu_addr) & 0xFFFFFFFF);
+ WREG32_SOC15_IP(GC, sdma_v7_0_get_reg_offset(adev, i, regSDMA0_QUEUE0_RB_RPTR_ADDR_LO),
+ lower_32_bits(ring->rptr_gpu_addr) & 0xFFFFFFFC);
+
+ rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_QUEUE0_RB_CNTL, RPTR_WRITEBACK_ENABLE, 1);
+ if (amdgpu_sriov_vf(adev))
+ rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_QUEUE0_RB_CNTL, WPTR_POLL_ENABLE, 1);
+ else
+ rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_QUEUE0_RB_CNTL, WPTR_POLL_ENABLE, 0);
+
+ rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_QUEUE0_RB_CNTL, MCU_WPTR_POLL_ENABLE, 1);
+
+ WREG32_SOC15_IP(GC, sdma_v7_0_get_reg_offset(adev, i, regSDMA0_QUEUE0_RB_BASE), ring->gpu_addr >> 8);
+ WREG32_SOC15_IP(GC, sdma_v7_0_get_reg_offset(adev, i, regSDMA0_QUEUE0_RB_BASE_HI), ring->gpu_addr >> 40);
+
+ if (!restore)
+ ring->wptr = 0;
+
+ /* before programing wptr to a less value, need set minor_ptr_update first */
+ WREG32_SOC15_IP(GC, sdma_v7_0_get_reg_offset(adev, i, regSDMA0_QUEUE0_MINOR_PTR_UPDATE), 1);
+
+ if (!amdgpu_sriov_vf(adev)) { /* only bare-metal use register write for wptr */
+ WREG32_SOC15_IP(GC, sdma_v7_0_get_reg_offset(adev, i, regSDMA0_QUEUE0_RB_WPTR), lower_32_bits(ring->wptr) << 2);
+ WREG32_SOC15_IP(GC, sdma_v7_0_get_reg_offset(adev, i, regSDMA0_QUEUE0_RB_WPTR_HI), upper_32_bits(ring->wptr) << 2);
+ }
+
+ doorbell = RREG32_SOC15_IP(GC, sdma_v7_0_get_reg_offset(adev, i, regSDMA0_QUEUE0_DOORBELL));
+ doorbell_offset = RREG32_SOC15_IP(GC, sdma_v7_0_get_reg_offset(adev, i, regSDMA0_QUEUE0_DOORBELL_OFFSET));
+
+ if (ring->use_doorbell) {
+ doorbell = REG_SET_FIELD(doorbell, SDMA0_QUEUE0_DOORBELL, ENABLE, 1);
+ doorbell_offset = REG_SET_FIELD(doorbell_offset, SDMA0_QUEUE0_DOORBELL_OFFSET,
+ OFFSET, ring->doorbell_index);
+ } else {
+ doorbell = REG_SET_FIELD(doorbell, SDMA0_QUEUE0_DOORBELL, ENABLE, 0);
+ }
+ WREG32_SOC15_IP(GC, sdma_v7_0_get_reg_offset(adev, i, regSDMA0_QUEUE0_DOORBELL), doorbell);
+ WREG32_SOC15_IP(GC, sdma_v7_0_get_reg_offset(adev, i, regSDMA0_QUEUE0_DOORBELL_OFFSET), doorbell_offset);
+
+ if (i == 0)
+ adev->nbio.funcs->sdma_doorbell_range(adev, i, ring->use_doorbell,
+ ring->doorbell_index,
+ adev->doorbell_index.sdma_doorbell_range * adev->sdma.num_instances);
+
+ if (amdgpu_sriov_vf(adev))
+ sdma_v7_0_ring_set_wptr(ring);
+
+ /* set minor_ptr_update to 0 after wptr programed */
+ WREG32_SOC15_IP(GC, sdma_v7_0_get_reg_offset(adev, i, regSDMA0_QUEUE0_MINOR_PTR_UPDATE), 0);
+
+ /* Set up sdma hang watchdog */
+ temp = RREG32_SOC15_IP(GC, sdma_v7_0_get_reg_offset(adev, i, regSDMA0_WATCHDOG_CNTL));
+ /* 100ms per unit */
+ temp = REG_SET_FIELD(temp, SDMA0_WATCHDOG_CNTL, QUEUE_HANG_COUNT,
+ max(adev->usec_timeout/100000, 1));
+ WREG32_SOC15_IP(GC, sdma_v7_0_get_reg_offset(adev, i, regSDMA0_WATCHDOG_CNTL), temp);
+
+ /* Set up RESP_MODE to non-copy addresses */
+ temp = RREG32_SOC15_IP(GC, sdma_v7_0_get_reg_offset(adev, i, regSDMA0_UTCL1_CNTL));
+ temp = REG_SET_FIELD(temp, SDMA0_UTCL1_CNTL, RESP_MODE, 3);
+ temp = REG_SET_FIELD(temp, SDMA0_UTCL1_CNTL, REDO_DELAY, 9);
+ WREG32_SOC15_IP(GC, sdma_v7_0_get_reg_offset(adev, i, regSDMA0_UTCL1_CNTL), temp);
+
+ /* program default cache read and write policy */
+ temp = RREG32_SOC15_IP(GC, sdma_v7_0_get_reg_offset(adev, i, regSDMA0_UTCL1_PAGE));
+ /* clean read policy and write policy bits */
+ temp &= 0xFF0FFF;
+ temp |= ((CACHE_READ_POLICY_L2__DEFAULT << 12) |
+ (CACHE_WRITE_POLICY_L2__DEFAULT << 14));
+ WREG32_SOC15_IP(GC, sdma_v7_0_get_reg_offset(adev, i, regSDMA0_UTCL1_PAGE), temp);
+
+ if (!amdgpu_sriov_vf(adev)) {
+ /* unhalt engine */
+ temp = RREG32_SOC15_IP(GC, sdma_v7_0_get_reg_offset(adev, i, regSDMA0_MCU_CNTL));
+ temp = REG_SET_FIELD(temp, SDMA0_MCU_CNTL, HALT, 0);
+ temp = REG_SET_FIELD(temp, SDMA0_MCU_CNTL, RESET, 0);
+ WREG32_SOC15_IP(GC, sdma_v7_0_get_reg_offset(adev, i, regSDMA0_MCU_CNTL), temp);
+ }
+
+ /* enable DMA RB */
+ rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_QUEUE0_RB_CNTL, RB_ENABLE, 1);
+ WREG32_SOC15_IP(GC, sdma_v7_0_get_reg_offset(adev, i, regSDMA0_QUEUE0_RB_CNTL), rb_cntl);
+
+ ib_cntl = RREG32_SOC15_IP(GC, sdma_v7_0_get_reg_offset(adev, i, regSDMA0_QUEUE0_IB_CNTL));
+ ib_cntl = REG_SET_FIELD(ib_cntl, SDMA0_QUEUE0_IB_CNTL, IB_ENABLE, 1);
+#ifdef __BIG_ENDIAN
+ ib_cntl = REG_SET_FIELD(ib_cntl, SDMA0_QUEUE0_IB_CNTL, IB_SWAP_ENABLE, 1);
+#endif
+ /* enable DMA IBs */
+ WREG32_SOC15_IP(GC, sdma_v7_0_get_reg_offset(adev, i, regSDMA0_QUEUE0_IB_CNTL), ib_cntl);
+ ring->sched.ready = true;
+
+ if (amdgpu_sriov_vf(adev)) { /* bare-metal sequence doesn't need below to lines */
+ sdma_v7_0_ctx_switch_enable(adev, true);
+ sdma_v7_0_enable(adev, true);
+ }
+
+ r = amdgpu_ring_test_helper(ring);
+ if (r)
+ ring->sched.ready = false;
+
+ return r;
+}
+
+/**
+ * sdma_v7_0_gfx_resume - setup and start the async dma engines
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * Set up the gfx DMA ring buffers and enable them.
+ * Returns 0 for success, error for failure.
+ */
+static int sdma_v7_0_gfx_resume(struct amdgpu_device *adev)
+{
+ int i, r;
+
+ for (i = 0; i < adev->sdma.num_instances; i++) {
+ r = sdma_v7_0_gfx_resume_instance(adev, i, false);
+ if (r)
+ return r;
+ }
+
+ return 0;
+
+}
+
+/**
+ * sdma_v7_0_rlc_resume - setup and start the async dma engines
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * Set up the compute DMA queues and enable them.
+ * Returns 0 for success, error for failure.
+ */
+static int sdma_v7_0_rlc_resume(struct amdgpu_device *adev)
+{
+ return 0;
+}
+
+static void sdma_v12_0_free_ucode_buffer(struct amdgpu_device *adev)
+{
+ int i;
+
+ for (i = 0; i < adev->sdma.num_instances; i++) {
+ amdgpu_bo_free_kernel(&adev->sdma.instance[i].sdma_fw_obj,
+ &adev->sdma.instance[i].sdma_fw_gpu_addr,
+ (void **)&adev->sdma.instance[i].sdma_fw_ptr);
+ }
+}
+
+/**
+ * sdma_v7_0_load_microcode - load the sDMA ME ucode
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * Loads the sDMA0/1 ucode.
+ * Returns 0 for success, -EINVAL if the ucode is not available.
+ */
+static int sdma_v7_0_load_microcode(struct amdgpu_device *adev)
+{
+ const struct sdma_firmware_header_v3_0 *hdr;
+ const __le32 *fw_data;
+ u32 fw_size;
+ uint32_t tmp, sdma_status, ic_op_cntl;
+ int i, r, j;
+
+ /* halt the MEs */
+ sdma_v7_0_enable(adev, false);
+
+ if (!adev->sdma.instance[0].fw)
+ return -EINVAL;
+
+ hdr = (const struct sdma_firmware_header_v3_0 *)
+ adev->sdma.instance[0].fw->data;
+ amdgpu_ucode_print_sdma_hdr(&hdr->header);
+
+ fw_data = (const __le32 *)(adev->sdma.instance[0].fw->data +
+ le32_to_cpu(hdr->ucode_offset_bytes));
+ fw_size = le32_to_cpu(hdr->ucode_size_bytes);
+
+ for (i = 0; i < adev->sdma.num_instances; i++) {
+ r = amdgpu_bo_create_reserved(adev, fw_size,
+ PAGE_SIZE,
+ AMDGPU_GEM_DOMAIN_VRAM,
+ &adev->sdma.instance[i].sdma_fw_obj,
+ &adev->sdma.instance[i].sdma_fw_gpu_addr,
+ (void **)&adev->sdma.instance[i].sdma_fw_ptr);
+ if (r) {
+ dev_err(adev->dev, "(%d) failed to create sdma ucode bo\n", r);
+ return r;
+ }
+
+ memcpy(adev->sdma.instance[i].sdma_fw_ptr, fw_data, fw_size);
+
+ amdgpu_bo_kunmap(adev->sdma.instance[i].sdma_fw_obj);
+ amdgpu_bo_unreserve(adev->sdma.instance[i].sdma_fw_obj);
+
+ tmp = RREG32_SOC15_IP(GC, sdma_v7_0_get_reg_offset(adev, i, regSDMA0_IC_CNTL));
+ tmp = REG_SET_FIELD(tmp, SDMA0_IC_CNTL, GPA, 0);
+ WREG32_SOC15_IP(GC, sdma_v7_0_get_reg_offset(adev, i, regSDMA0_IC_CNTL), tmp);
+
+ WREG32_SOC15_IP(GC, sdma_v7_0_get_reg_offset(adev, i, regSDMA0_IC_BASE_LO),
+ lower_32_bits(adev->sdma.instance[i].sdma_fw_gpu_addr));
+ WREG32_SOC15_IP(GC, sdma_v7_0_get_reg_offset(adev, i, regSDMA0_IC_BASE_HI),
+ upper_32_bits(adev->sdma.instance[i].sdma_fw_gpu_addr));
+
+ tmp = RREG32_SOC15_IP(GC, sdma_v7_0_get_reg_offset(adev, i, regSDMA0_IC_OP_CNTL));
+ tmp = REG_SET_FIELD(tmp, SDMA0_IC_OP_CNTL, PRIME_ICACHE, 1);
+ WREG32_SOC15_IP(GC, sdma_v7_0_get_reg_offset(adev, i, regSDMA0_IC_OP_CNTL), tmp);
+
+ /* Wait for sdma ucode init complete */
+ for (j = 0; j < adev->usec_timeout; j++) {
+ ic_op_cntl = RREG32_SOC15_IP(GC,
+ sdma_v7_0_get_reg_offset(adev, i, regSDMA0_IC_OP_CNTL));
+ sdma_status = RREG32_SOC15_IP(GC,
+ sdma_v7_0_get_reg_offset(adev, i, regSDMA0_STATUS_REG));
+ if ((REG_GET_FIELD(ic_op_cntl, SDMA0_IC_OP_CNTL, ICACHE_PRIMED) == 1) &&
+ (REG_GET_FIELD(sdma_status, SDMA0_STATUS_REG, UCODE_INIT_DONE) == 1))
+ break;
+ udelay(1);
+ }
+
+ if (j >= adev->usec_timeout) {
+ dev_err(adev->dev, "failed to init sdma ucode\n");
+ return -EINVAL;
+ }
+ }
+
+ return 0;
+}
+
+static int sdma_v7_0_soft_reset(struct amdgpu_ip_block *ip_block)
+{
+ struct amdgpu_device *adev = ip_block->adev;
+ u32 tmp;
+ int i;
+
+ sdma_v7_0_gfx_stop(adev);
+
+ for (i = 0; i < adev->sdma.num_instances; i++) {
+ //tmp = RREG32_SOC15_IP(GC, sdma_v7_0_get_reg_offset(adev, i, regSDMA0_FREEZE));
+ //tmp |= SDMA0_FREEZE__FREEZE_MASK;
+ //WREG32_SOC15_IP(GC, sdma_v7_0_get_reg_offset(adev, i, regSDMA0_FREEZE), tmp);
+ tmp = RREG32_SOC15_IP(GC, sdma_v7_0_get_reg_offset(adev, i, regSDMA0_MCU_CNTL));
+ tmp |= SDMA0_MCU_CNTL__HALT_MASK;
+ tmp |= SDMA0_MCU_CNTL__RESET_MASK;
+ WREG32_SOC15_IP(GC, sdma_v7_0_get_reg_offset(adev, i, regSDMA0_MCU_CNTL), tmp);
+
+ WREG32_SOC15_IP(GC, sdma_v7_0_get_reg_offset(adev, i, regSDMA0_QUEUE0_PREEMPT), 0);
+
+ udelay(100);
+
+ tmp = GRBM_SOFT_RESET__SOFT_RESET_SDMA0_MASK << i;
+ WREG32_SOC15(GC, 0, regGRBM_SOFT_RESET, tmp);
+ tmp = RREG32_SOC15(GC, 0, regGRBM_SOFT_RESET);
+
+ udelay(100);
+
+ WREG32_SOC15(GC, 0, regGRBM_SOFT_RESET, 0);
+ tmp = RREG32_SOC15(GC, 0, regGRBM_SOFT_RESET);
+
+ udelay(100);
+ }
+
+ return sdma_v7_0_start(adev);
+}
+
+static bool sdma_v7_0_check_soft_reset(struct amdgpu_ip_block *ip_block)
+{
+ struct amdgpu_device *adev = ip_block->adev;
+ struct amdgpu_ring *ring;
+ int i, r;
+ long tmo = msecs_to_jiffies(1000);
+
+ for (i = 0; i < adev->sdma.num_instances; i++) {
+ ring = &adev->sdma.instance[i].ring;
+ r = amdgpu_ring_test_ib(ring, tmo);
+ if (r)
+ return true;
+ }
+
+ return false;
+}
+
+static int sdma_v7_0_reset_queue(struct amdgpu_ring *ring,
+ unsigned int vmid,
+ struct amdgpu_fence *timedout_fence)
+{
+ struct amdgpu_device *adev = ring->adev;
+ int r;
+
+ if (ring->me >= adev->sdma.num_instances) {
+ dev_err(adev->dev, "sdma instance not found\n");
+ return -EINVAL;
+ }
+
+ amdgpu_ring_reset_helper_begin(ring, timedout_fence);
+
+ r = amdgpu_mes_reset_legacy_queue(adev, ring, vmid, true);
+ if (r)
+ return r;
+
+ r = sdma_v7_0_gfx_resume_instance(adev, ring->me, true);
+ if (r)
+ return r;
+
+ return amdgpu_ring_reset_helper_end(ring, timedout_fence);
+}
+
+/**
+ * sdma_v7_0_start - setup and start the async dma engines
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * Set up the DMA engines and enable them.
+ * Returns 0 for success, error for failure.
+ */
+static int sdma_v7_0_start(struct amdgpu_device *adev)
+{
+ int r = 0;
+
+ if (amdgpu_sriov_vf(adev)) {
+ sdma_v7_0_ctx_switch_enable(adev, false);
+ sdma_v7_0_enable(adev, false);
+
+ /* set RB registers */
+ r = sdma_v7_0_gfx_resume(adev);
+ return r;
+ }
+
+ if (adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT) {
+ r = sdma_v7_0_load_microcode(adev);
+ if (r) {
+ sdma_v12_0_free_ucode_buffer(adev);
+ return r;
+ }
+
+ if (amdgpu_emu_mode == 1)
+ msleep(1000);
+ }
+
+ /* unhalt the MEs */
+ sdma_v7_0_enable(adev, true);
+ /* enable sdma ring preemption */
+ sdma_v7_0_ctx_switch_enable(adev, true);
+
+ /* start the gfx rings and rlc compute queues */
+ r = sdma_v7_0_gfx_resume(adev);
+ if (r)
+ return r;
+ r = sdma_v7_0_rlc_resume(adev);
+
+ return r;
+}
+
+static int sdma_v7_0_mqd_init(struct amdgpu_device *adev, void *mqd,
+ struct amdgpu_mqd_prop *prop)
+{
+ struct v12_sdma_mqd *m = mqd;
+ uint64_t wb_gpu_addr;
+
+ m->sdmax_rlcx_rb_cntl =
+ order_base_2(prop->queue_size / 4) << SDMA0_QUEUE0_RB_CNTL__RB_SIZE__SHIFT |
+ 1 << SDMA0_QUEUE0_RB_CNTL__RPTR_WRITEBACK_ENABLE__SHIFT |
+ 4 << SDMA0_QUEUE0_RB_CNTL__RPTR_WRITEBACK_TIMER__SHIFT |
+ 1 << SDMA0_QUEUE0_RB_CNTL__MCU_WPTR_POLL_ENABLE__SHIFT;
+
+ m->sdmax_rlcx_rb_base = lower_32_bits(prop->hqd_base_gpu_addr >> 8);
+ m->sdmax_rlcx_rb_base_hi = upper_32_bits(prop->hqd_base_gpu_addr >> 8);
+
+ wb_gpu_addr = prop->wptr_gpu_addr;
+ m->sdmax_rlcx_rb_wptr_poll_addr_lo = lower_32_bits(wb_gpu_addr);
+ m->sdmax_rlcx_rb_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr);
+
+ wb_gpu_addr = prop->rptr_gpu_addr;
+ m->sdmax_rlcx_rb_rptr_addr_lo = lower_32_bits(wb_gpu_addr);
+ m->sdmax_rlcx_rb_rptr_addr_hi = upper_32_bits(wb_gpu_addr);
+
+ m->sdmax_rlcx_ib_cntl = RREG32_SOC15_IP(GC, sdma_v7_0_get_reg_offset(adev, 0,
+ regSDMA0_QUEUE0_IB_CNTL));
+
+ m->sdmax_rlcx_doorbell_offset =
+ prop->doorbell_index << SDMA0_QUEUE0_DOORBELL_OFFSET__OFFSET__SHIFT;
+
+ m->sdmax_rlcx_doorbell = REG_SET_FIELD(0, SDMA0_QUEUE0_DOORBELL, ENABLE, 1);
+
+ m->sdmax_rlcx_doorbell_log = 0;
+ m->sdmax_rlcx_rb_aql_cntl = 0x4000; //regSDMA0_QUEUE0_RB_AQL_CNTL_DEFAULT;
+ m->sdmax_rlcx_dummy_reg = 0xf; //regSDMA0_QUEUE0_DUMMY_REG_DEFAULT;
+
+ m->sdmax_rlcx_csa_addr_lo = lower_32_bits(prop->csa_addr);
+ m->sdmax_rlcx_csa_addr_hi = upper_32_bits(prop->csa_addr);
+
+ m->sdmax_rlcx_mcu_dbg0 = lower_32_bits(prop->fence_address);
+ m->sdmax_rlcx_mcu_dbg1 = upper_32_bits(prop->fence_address);
+
+ return 0;
+}
+
+static void sdma_v7_0_set_mqd_funcs(struct amdgpu_device *adev)
+{
+ adev->mqds[AMDGPU_HW_IP_DMA].mqd_size = sizeof(struct v12_sdma_mqd);
+ adev->mqds[AMDGPU_HW_IP_DMA].init_mqd = sdma_v7_0_mqd_init;
+}
+
+/**
+ * sdma_v7_0_ring_test_ring - simple async dma engine test
+ *
+ * @ring: amdgpu_ring structure holding ring information
+ *
+ * Test the DMA engine by writing using it to write an
+ * value to memory.
+ * Returns 0 for success, error for failure.
+ */
+static int sdma_v7_0_ring_test_ring(struct amdgpu_ring *ring)
+{
+ struct amdgpu_device *adev = ring->adev;
+ unsigned i;
+ unsigned index;
+ int r;
+ u32 tmp;
+ u64 gpu_addr;
+
+ tmp = 0xCAFEDEAD;
+
+ r = amdgpu_device_wb_get(adev, &index);
+ if (r) {
+ dev_err(adev->dev, "(%d) failed to allocate wb slot\n", r);
+ return r;
+ }
+
+ gpu_addr = adev->wb.gpu_addr + (index * 4);
+ adev->wb.wb[index] = cpu_to_le32(tmp);
+
+ r = amdgpu_ring_alloc(ring, 5);
+ if (r) {
+ DRM_ERROR("amdgpu: dma failed to lock ring %d (%d).\n", ring->idx, r);
+ amdgpu_device_wb_free(adev, index);
+ return r;
+ }
+
+ amdgpu_ring_write(ring, SDMA_PKT_COPY_LINEAR_HEADER_OP(SDMA_OP_WRITE) |
+ SDMA_PKT_COPY_LINEAR_HEADER_SUB_OP(SDMA_SUBOP_WRITE_LINEAR));
+ amdgpu_ring_write(ring, lower_32_bits(gpu_addr));
+ amdgpu_ring_write(ring, upper_32_bits(gpu_addr));
+ amdgpu_ring_write(ring, SDMA_PKT_WRITE_UNTILED_DW_3_COUNT(0));
+ amdgpu_ring_write(ring, 0xDEADBEEF);
+ amdgpu_ring_commit(ring);
+
+ for (i = 0; i < adev->usec_timeout; i++) {
+ tmp = le32_to_cpu(adev->wb.wb[index]);
+ if (tmp == 0xDEADBEEF)
+ break;
+ if (amdgpu_emu_mode == 1)
+ msleep(1);
+ else
+ udelay(1);
+ }
+
+ if (i >= adev->usec_timeout)
+ r = -ETIMEDOUT;
+
+ amdgpu_device_wb_free(adev, index);
+
+ return r;
+}
+
+/**
+ * sdma_v7_0_ring_test_ib - test an IB on the DMA engine
+ *
+ * @ring: amdgpu_ring structure holding ring information
+ * @timeout: timeout value in jiffies, or MAX_SCHEDULE_TIMEOUT
+ *
+ * Test a simple IB in the DMA ring.
+ * Returns 0 on success, error on failure.
+ */
+static int sdma_v7_0_ring_test_ib(struct amdgpu_ring *ring, long timeout)
+{
+ struct amdgpu_device *adev = ring->adev;
+ struct amdgpu_ib ib;
+ struct dma_fence *f = NULL;
+ unsigned index;
+ long r;
+ u32 tmp = 0;
+ u64 gpu_addr;
+
+ tmp = 0xCAFEDEAD;
+ memset(&ib, 0, sizeof(ib));
+
+ r = amdgpu_device_wb_get(adev, &index);
+ if (r) {
+ dev_err(adev->dev, "(%ld) failed to allocate wb slot\n", r);
+ return r;
+ }
+
+ gpu_addr = adev->wb.gpu_addr + (index * 4);
+ adev->wb.wb[index] = cpu_to_le32(tmp);
+
+ r = amdgpu_ib_get(adev, NULL, 256, AMDGPU_IB_POOL_DIRECT, &ib);
+ if (r) {
+ DRM_ERROR("amdgpu: failed to get ib (%ld).\n", r);
+ goto err0;
+ }
+
+ ib.ptr[0] = SDMA_PKT_COPY_LINEAR_HEADER_OP(SDMA_OP_WRITE) |
+ SDMA_PKT_COPY_LINEAR_HEADER_SUB_OP(SDMA_SUBOP_WRITE_LINEAR);
+ ib.ptr[1] = lower_32_bits(gpu_addr);
+ ib.ptr[2] = upper_32_bits(gpu_addr);
+ ib.ptr[3] = SDMA_PKT_WRITE_UNTILED_DW_3_COUNT(0);
+ ib.ptr[4] = 0xDEADBEEF;
+ ib.ptr[5] = SDMA_PKT_NOP_HEADER_OP(SDMA_OP_NOP);
+ ib.ptr[6] = SDMA_PKT_NOP_HEADER_OP(SDMA_OP_NOP);
+ ib.ptr[7] = SDMA_PKT_NOP_HEADER_OP(SDMA_OP_NOP);
+ ib.length_dw = 8;
+
+ r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f);
+ if (r)
+ goto err1;
+
+ r = dma_fence_wait_timeout(f, false, timeout);
+ if (r == 0) {
+ DRM_ERROR("amdgpu: IB test timed out\n");
+ r = -ETIMEDOUT;
+ goto err1;
+ } else if (r < 0) {
+ DRM_ERROR("amdgpu: fence wait failed (%ld).\n", r);
+ goto err1;
+ }
+
+ tmp = le32_to_cpu(adev->wb.wb[index]);
+
+ if (tmp == 0xDEADBEEF)
+ r = 0;
+ else
+ r = -EINVAL;
+
+err1:
+ amdgpu_ib_free(&ib, NULL);
+ dma_fence_put(f);
+err0:
+ amdgpu_device_wb_free(adev, index);
+ return r;
+}
+
+
+/**
+ * sdma_v7_0_vm_copy_pte - update PTEs by copying them from the GART
+ *
+ * @ib: indirect buffer to fill with commands
+ * @pe: addr of the page entry
+ * @src: src addr to copy from
+ * @count: number of page entries to update
+ *
+ * Update PTEs by copying them from the GART using sDMA.
+ */
+static void sdma_v7_0_vm_copy_pte(struct amdgpu_ib *ib,
+ uint64_t pe, uint64_t src,
+ unsigned count)
+{
+ unsigned bytes = count * 8;
+
+ ib->ptr[ib->length_dw++] = SDMA_PKT_COPY_LINEAR_HEADER_OP(SDMA_OP_COPY) |
+ SDMA_PKT_COPY_LINEAR_HEADER_SUB_OP(SDMA_SUBOP_COPY_LINEAR) |
+ SDMA_PKT_COPY_LINEAR_HEADER_CPV(1);
+
+ ib->ptr[ib->length_dw++] = bytes - 1;
+ ib->ptr[ib->length_dw++] = 0; /* src/dst endian swap */
+ ib->ptr[ib->length_dw++] = lower_32_bits(src);
+ ib->ptr[ib->length_dw++] = upper_32_bits(src);
+ ib->ptr[ib->length_dw++] = lower_32_bits(pe);
+ ib->ptr[ib->length_dw++] = upper_32_bits(pe);
+ ib->ptr[ib->length_dw++] = 0;
+
+}
+
+/**
+ * sdma_v7_0_vm_write_pte - update PTEs by writing them manually
+ *
+ * @ib: indirect buffer to fill with commands
+ * @pe: addr of the page entry
+ * @value: dst addr to write into pe
+ * @count: number of page entries to update
+ * @incr: increase next addr by incr bytes
+ *
+ * Update PTEs by writing them manually using sDMA.
+ */
+static void sdma_v7_0_vm_write_pte(struct amdgpu_ib *ib, uint64_t pe,
+ uint64_t value, unsigned count,
+ uint32_t incr)
+{
+ unsigned ndw = count * 2;
+
+ ib->ptr[ib->length_dw++] = SDMA_PKT_COPY_LINEAR_HEADER_OP(SDMA_OP_WRITE) |
+ SDMA_PKT_COPY_LINEAR_HEADER_SUB_OP(SDMA_SUBOP_WRITE_LINEAR);
+ ib->ptr[ib->length_dw++] = lower_32_bits(pe);
+ ib->ptr[ib->length_dw++] = upper_32_bits(pe);
+ ib->ptr[ib->length_dw++] = ndw - 1;
+ for (; ndw > 0; ndw -= 2) {
+ ib->ptr[ib->length_dw++] = lower_32_bits(value);
+ ib->ptr[ib->length_dw++] = upper_32_bits(value);
+ value += incr;
+ }
+}
+
+/**
+ * sdma_v7_0_vm_set_pte_pde - update the page tables using sDMA
+ *
+ * @ib: indirect buffer to fill with commands
+ * @pe: addr of the page entry
+ * @addr: dst addr to write into pe
+ * @count: number of page entries to update
+ * @incr: increase next addr by incr bytes
+ * @flags: access flags
+ *
+ * Update the page tables using sDMA.
+ */
+static void sdma_v7_0_vm_set_pte_pde(struct amdgpu_ib *ib,
+ uint64_t pe,
+ uint64_t addr, unsigned count,
+ uint32_t incr, uint64_t flags)
+{
+ /* for physically contiguous pages (vram) */
+ ib->ptr[ib->length_dw++] = SDMA_PKT_COPY_LINEAR_HEADER_OP(SDMA_OP_PTEPDE);
+ ib->ptr[ib->length_dw++] = lower_32_bits(pe); /* dst addr */
+ ib->ptr[ib->length_dw++] = upper_32_bits(pe);
+ ib->ptr[ib->length_dw++] = lower_32_bits(flags); /* mask */
+ ib->ptr[ib->length_dw++] = upper_32_bits(flags);
+ ib->ptr[ib->length_dw++] = lower_32_bits(addr); /* value */
+ ib->ptr[ib->length_dw++] = upper_32_bits(addr);
+ ib->ptr[ib->length_dw++] = incr; /* increment size */
+ ib->ptr[ib->length_dw++] = 0;
+ ib->ptr[ib->length_dw++] = count - 1; /* number of entries */
+}
+
+/**
+ * sdma_v7_0_ring_pad_ib - pad the IB
+ *
+ * @ring: amdgpu ring pointer
+ * @ib: indirect buffer to fill with padding
+ *
+ * Pad the IB with NOPs to a boundary multiple of 8.
+ */
+static void sdma_v7_0_ring_pad_ib(struct amdgpu_ring *ring, struct amdgpu_ib *ib)
+{
+ struct amdgpu_sdma_instance *sdma = amdgpu_sdma_get_instance_from_ring(ring);
+ u32 pad_count;
+ int i;
+
+ pad_count = (-ib->length_dw) & 0x7;
+ for (i = 0; i < pad_count; i++)
+ if (sdma && sdma->burst_nop && (i == 0))
+ ib->ptr[ib->length_dw++] =
+ SDMA_PKT_COPY_LINEAR_HEADER_OP(SDMA_OP_NOP) |
+ SDMA_PKT_NOP_HEADER_COUNT(pad_count - 1);
+ else
+ ib->ptr[ib->length_dw++] =
+ SDMA_PKT_COPY_LINEAR_HEADER_OP(SDMA_OP_NOP);
+}
+
+/**
+ * sdma_v7_0_ring_emit_pipeline_sync - sync the pipeline
+ *
+ * @ring: amdgpu_ring pointer
+ *
+ * Make sure all previous operations are completed (CIK).
+ */
+static void sdma_v7_0_ring_emit_pipeline_sync(struct amdgpu_ring *ring)
+{
+ uint32_t seq = ring->fence_drv.sync_seq;
+ uint64_t addr = ring->fence_drv.gpu_addr;
+
+ /* wait for idle */
+ amdgpu_ring_write(ring, SDMA_PKT_COPY_LINEAR_HEADER_OP(SDMA_OP_POLL_REGMEM) |
+ SDMA_PKT_POLL_REGMEM_HEADER_HDP_FLUSH(0) |
+ SDMA_PKT_POLL_REGMEM_HEADER_FUNC(3) | /* equal */
+ SDMA_PKT_POLL_REGMEM_HEADER_MEM_POLL(1));
+ amdgpu_ring_write(ring, addr & 0xfffffffc);
+ amdgpu_ring_write(ring, upper_32_bits(addr) & 0xffffffff);
+ amdgpu_ring_write(ring, seq); /* reference */
+ amdgpu_ring_write(ring, 0xffffffff); /* mask */
+ amdgpu_ring_write(ring, SDMA_PKT_POLL_REGMEM_DW5_RETRY_COUNT(0xfff) |
+ SDMA_PKT_POLL_REGMEM_DW5_INTERVAL(4)); /* retry count, poll interval */
+}
+
+/**
+ * sdma_v7_0_ring_emit_vm_flush - vm flush using sDMA
+ *
+ * @ring: amdgpu_ring pointer
+ * @vmid: vmid number to use
+ * @pd_addr: address
+ *
+ * Update the page table base and flush the VM TLB
+ * using sDMA.
+ */
+static void sdma_v7_0_ring_emit_vm_flush(struct amdgpu_ring *ring,
+ unsigned vmid, uint64_t pd_addr)
+{
+ amdgpu_gmc_emit_flush_gpu_tlb(ring, vmid, pd_addr);
+}
+
+static void sdma_v7_0_ring_emit_wreg(struct amdgpu_ring *ring,
+ uint32_t reg, uint32_t val)
+{
+ /* SRBM WRITE command will not support on sdma v7.
+ * Use Register WRITE command instead, which OPCODE is same as SRBM WRITE
+ */
+ amdgpu_ring_write(ring, SDMA_PKT_COPY_LINEAR_HEADER_OP(SDMA_OP_SRBM_WRITE));
+ amdgpu_ring_write(ring, reg << 2);
+ amdgpu_ring_write(ring, val);
+}
+
+static void sdma_v7_0_ring_emit_reg_wait(struct amdgpu_ring *ring, uint32_t reg,
+ uint32_t val, uint32_t mask)
+{
+ amdgpu_ring_write(ring, SDMA_PKT_COPY_LINEAR_HEADER_OP(SDMA_OP_POLL_REGMEM) |
+ SDMA_PKT_POLL_REGMEM_HEADER_HDP_FLUSH(0) |
+ SDMA_PKT_POLL_REGMEM_HEADER_FUNC(3)); /* equal */
+ amdgpu_ring_write(ring, reg << 2);
+ amdgpu_ring_write(ring, 0);
+ amdgpu_ring_write(ring, val); /* reference */
+ amdgpu_ring_write(ring, mask); /* mask */
+ amdgpu_ring_write(ring, SDMA_PKT_POLL_REGMEM_DW5_RETRY_COUNT(0xfff) |
+ SDMA_PKT_POLL_REGMEM_DW5_INTERVAL(10));
+}
+
+static void sdma_v7_0_ring_emit_reg_write_reg_wait(struct amdgpu_ring *ring,
+ uint32_t reg0, uint32_t reg1,
+ uint32_t ref, uint32_t mask)
+{
+ amdgpu_ring_emit_wreg(ring, reg0, ref);
+ /* wait for a cycle to reset vm_inv_eng*_ack */
+ amdgpu_ring_emit_reg_wait(ring, reg0, 0, 0);
+ amdgpu_ring_emit_reg_wait(ring, reg1, mask, mask);
+}
+
+static int sdma_v7_0_early_init(struct amdgpu_ip_block *ip_block)
+{
+ struct amdgpu_device *adev = ip_block->adev;
+ int r;
+
+ switch (amdgpu_user_queue) {
+ case -1:
+ case 0:
+ default:
+ adev->sdma.no_user_submission = false;
+ adev->sdma.disable_uq = true;
+ break;
+ case 1:
+ adev->sdma.no_user_submission = false;
+ adev->sdma.disable_uq = false;
+ break;
+ case 2:
+ adev->sdma.no_user_submission = true;
+ adev->sdma.disable_uq = false;
+ break;
+ }
+
+ r = amdgpu_sdma_init_microcode(adev, 0, true);
+ if (r) {
+ DRM_ERROR("Failed to init sdma firmware!\n");
+ return r;
+ }
+
+ sdma_v7_0_set_ring_funcs(adev);
+ sdma_v7_0_set_buffer_funcs(adev);
+ sdma_v7_0_set_vm_pte_funcs(adev);
+ sdma_v7_0_set_irq_funcs(adev);
+ sdma_v7_0_set_mqd_funcs(adev);
+
+ return 0;
+}
+
+static int sdma_v7_0_sw_init(struct amdgpu_ip_block *ip_block)
+{
+ struct amdgpu_ring *ring;
+ int r, i;
+ struct amdgpu_device *adev = ip_block->adev;
+ uint32_t reg_count = ARRAY_SIZE(sdma_reg_list_7_0);
+ uint32_t *ptr;
+
+ /* SDMA trap event */
+ r = amdgpu_irq_add_id(adev, SOC21_IH_CLIENTID_GFX,
+ GFX_12_0_0__SRCID__SDMA_TRAP,
+ &adev->sdma.trap_irq);
+ if (r)
+ return r;
+
+ /* SDMA user fence event */
+ r = amdgpu_irq_add_id(adev, SOC21_IH_CLIENTID_GFX,
+ GFX_12_0_0__SRCID__SDMA_FENCE,
+ &adev->sdma.fence_irq);
+ if (r)
+ return r;
+
+ for (i = 0; i < adev->sdma.num_instances; i++) {
+ ring = &adev->sdma.instance[i].ring;
+ ring->ring_obj = NULL;
+ ring->use_doorbell = true;
+ ring->me = i;
+ ring->no_user_submission = adev->sdma.no_user_submission;
+
+ DRM_DEBUG("SDMA %d use_doorbell being set to: [%s]\n", i,
+ ring->use_doorbell?"true":"false");
+
+ ring->doorbell_index =
+ (adev->doorbell_index.sdma_engine[i] << 1); // get DWORD offset
+
+ ring->vm_hub = AMDGPU_GFXHUB(0);
+ sprintf(ring->name, "sdma%d", i);
+ r = amdgpu_ring_init(adev, ring, 1024,
+ &adev->sdma.trap_irq,
+ AMDGPU_SDMA_IRQ_INSTANCE0 + i,
+ AMDGPU_RING_PRIO_DEFAULT, NULL);
+ if (r)
+ return r;
+ }
+
+ adev->sdma.supported_reset =
+ amdgpu_get_soft_full_reset_mask(&adev->sdma.instance[0].ring);
+ if (!amdgpu_sriov_vf(adev) &&
+ !adev->debug_disable_gpu_ring_reset)
+ adev->sdma.supported_reset |= AMDGPU_RESET_TYPE_PER_QUEUE;
+
+ r = amdgpu_sdma_sysfs_reset_mask_init(adev);
+ if (r)
+ return r;
+ /* Allocate memory for SDMA IP Dump buffer */
+ ptr = kcalloc(adev->sdma.num_instances * reg_count, sizeof(uint32_t), GFP_KERNEL);
+ if (ptr)
+ adev->sdma.ip_dump = ptr;
+ else
+ DRM_ERROR("Failed to allocated memory for SDMA IP Dump\n");
+
+ switch (amdgpu_ip_version(adev, SDMA0_HWIP, 0)) {
+ case IP_VERSION(7, 0, 0):
+ case IP_VERSION(7, 0, 1):
+ if ((adev->sdma.instance[0].fw_version >= 7966358) && !adev->sdma.disable_uq)
+ adev->userq_funcs[AMDGPU_HW_IP_DMA] = &userq_mes_funcs;
+ break;
+ default:
+ break;
+ }
+
+ return r;
+}
+
+static int sdma_v7_0_sw_fini(struct amdgpu_ip_block *ip_block)
+{
+ struct amdgpu_device *adev = ip_block->adev;
+ int i;
+
+ for (i = 0; i < adev->sdma.num_instances; i++)
+ amdgpu_ring_fini(&adev->sdma.instance[i].ring);
+
+ amdgpu_sdma_sysfs_reset_mask_fini(adev);
+ amdgpu_sdma_destroy_inst_ctx(adev, true);
+
+ if (adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT)
+ sdma_v12_0_free_ucode_buffer(adev);
+
+ kfree(adev->sdma.ip_dump);
+
+ return 0;
+}
+
+static int sdma_v7_0_set_userq_trap_interrupts(struct amdgpu_device *adev,
+ bool enable)
+{
+ unsigned int irq_type;
+ int i, r;
+
+ if (adev->userq_funcs[AMDGPU_HW_IP_DMA]) {
+ for (i = 0; i < adev->sdma.num_instances; i++) {
+ irq_type = AMDGPU_SDMA_IRQ_INSTANCE0 + i;
+ if (enable)
+ r = amdgpu_irq_get(adev, &adev->sdma.trap_irq,
+ irq_type);
+ else
+ r = amdgpu_irq_put(adev, &adev->sdma.trap_irq,
+ irq_type);
+ if (r)
+ return r;
+ }
+ }
+
+ return 0;
+}
+
+static int sdma_v7_0_hw_init(struct amdgpu_ip_block *ip_block)
+{
+ struct amdgpu_device *adev = ip_block->adev;
+ int r;
+
+ r = sdma_v7_0_start(adev);
+ if (r)
+ return r;
+
+ return sdma_v7_0_set_userq_trap_interrupts(adev, true);
+}
+
+static int sdma_v7_0_hw_fini(struct amdgpu_ip_block *ip_block)
+{
+ struct amdgpu_device *adev = ip_block->adev;
+
+ if (amdgpu_sriov_vf(adev))
+ return 0;
+
+ sdma_v7_0_ctx_switch_enable(adev, false);
+ sdma_v7_0_enable(adev, false);
+ sdma_v7_0_set_userq_trap_interrupts(adev, false);
+
+ return 0;
+}
+
+static int sdma_v7_0_suspend(struct amdgpu_ip_block *ip_block)
+{
+ return sdma_v7_0_hw_fini(ip_block);
+}
+
+static int sdma_v7_0_resume(struct amdgpu_ip_block *ip_block)
+{
+ return sdma_v7_0_hw_init(ip_block);
+}
+
+static bool sdma_v7_0_is_idle(struct amdgpu_ip_block *ip_block)
+{
+ struct amdgpu_device *adev = ip_block->adev;
+ u32 i;
+
+ for (i = 0; i < adev->sdma.num_instances; i++) {
+ u32 tmp = RREG32(sdma_v7_0_get_reg_offset(adev, i, regSDMA0_STATUS_REG));
+
+ if (!(tmp & SDMA0_STATUS_REG__IDLE_MASK))
+ return false;
+ }
+
+ return true;
+}
+
+static int sdma_v7_0_wait_for_idle(struct amdgpu_ip_block *ip_block)
+{
+ unsigned i;
+ u32 sdma0, sdma1;
+ struct amdgpu_device *adev = ip_block->adev;
+
+ for (i = 0; i < adev->usec_timeout; i++) {
+ sdma0 = RREG32(sdma_v7_0_get_reg_offset(adev, 0, regSDMA0_STATUS_REG));
+ sdma1 = RREG32(sdma_v7_0_get_reg_offset(adev, 1, regSDMA0_STATUS_REG));
+
+ if (sdma0 & sdma1 & SDMA0_STATUS_REG__IDLE_MASK)
+ return 0;
+ udelay(1);
+ }
+ return -ETIMEDOUT;
+}
+
+static int sdma_v7_0_ring_preempt_ib(struct amdgpu_ring *ring)
+{
+ int i, r = 0;
+ struct amdgpu_device *adev = ring->adev;
+ u32 index = 0;
+ u64 sdma_gfx_preempt;
+
+ amdgpu_sdma_get_index_from_ring(ring, &index);
+ sdma_gfx_preempt =
+ sdma_v7_0_get_reg_offset(adev, index, regSDMA0_QUEUE0_PREEMPT);
+
+ /* assert preemption condition */
+ amdgpu_ring_set_preempt_cond_exec(ring, false);
+
+ /* emit the trailing fence */
+ ring->trail_seq += 1;
+ r = amdgpu_ring_alloc(ring, 10);
+ if (r) {
+ DRM_ERROR("ring %d failed to be allocated \n", ring->idx);
+ return r;
+ }
+ sdma_v7_0_ring_emit_fence(ring, ring->trail_fence_gpu_addr,
+ ring->trail_seq, 0);
+ amdgpu_ring_commit(ring);
+
+ /* assert IB preemption */
+ WREG32(sdma_gfx_preempt, 1);
+
+ /* poll the trailing fence */
+ for (i = 0; i < adev->usec_timeout; i++) {
+ if (ring->trail_seq ==
+ le32_to_cpu(*(ring->trail_fence_cpu_addr)))
+ break;
+ udelay(1);
+ }
+
+ if (i >= adev->usec_timeout) {
+ r = -EINVAL;
+ DRM_ERROR("ring %d failed to be preempted\n", ring->idx);
+ }
+
+ /* deassert IB preemption */
+ WREG32(sdma_gfx_preempt, 0);
+
+ /* deassert the preemption condition */
+ amdgpu_ring_set_preempt_cond_exec(ring, true);
+ return r;
+}
+
+static int sdma_v7_0_set_trap_irq_state(struct amdgpu_device *adev,
+ struct amdgpu_irq_src *source,
+ unsigned type,
+ enum amdgpu_interrupt_state state)
+{
+ u32 sdma_cntl;
+
+ u32 reg_offset = sdma_v7_0_get_reg_offset(adev, type, regSDMA0_CNTL);
+
+ sdma_cntl = RREG32(reg_offset);
+ sdma_cntl = REG_SET_FIELD(sdma_cntl, SDMA0_CNTL, TRAP_ENABLE,
+ state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0);
+ WREG32(reg_offset, sdma_cntl);
+
+ return 0;
+}
+
+static int sdma_v7_0_process_trap_irq(struct amdgpu_device *adev,
+ struct amdgpu_irq_src *source,
+ struct amdgpu_iv_entry *entry)
+{
+ int instances, queue;
+
+ DRM_DEBUG("IH: SDMA trap\n");
+
+ queue = entry->ring_id & 0xf;
+ instances = (entry->ring_id & 0xf0) >> 4;
+ if (instances > 1) {
+ DRM_ERROR("IH: wrong ring_ID detected, as wrong sdma instance\n");
+ return -EINVAL;
+ }
+
+ switch (entry->client_id) {
+ case SOC21_IH_CLIENTID_GFX:
+ switch (queue) {
+ case 0:
+ amdgpu_fence_process(&adev->sdma.instance[instances].ring);
+ break;
+ default:
+ break;
+ }
+ break;
+ }
+ return 0;
+}
+
+static int sdma_v7_0_process_fence_irq(struct amdgpu_device *adev,
+ struct amdgpu_irq_src *source,
+ struct amdgpu_iv_entry *entry)
+{
+ u32 doorbell_offset = entry->src_data[0];
+
+ if (adev->enable_mes && doorbell_offset) {
+ struct amdgpu_userq_fence_driver *fence_drv = NULL;
+ struct xarray *xa = &adev->userq_xa;
+ unsigned long flags;
+
+ doorbell_offset >>= SDMA0_QUEUE0_DOORBELL_OFFSET__OFFSET__SHIFT;
+
+ xa_lock_irqsave(xa, flags);
+ fence_drv = xa_load(xa, doorbell_offset);
+ if (fence_drv)
+ amdgpu_userq_fence_driver_process(fence_drv);
+ xa_unlock_irqrestore(xa, flags);
+ }
+
+ return 0;
+}
+
+static int sdma_v7_0_process_illegal_inst_irq(struct amdgpu_device *adev,
+ struct amdgpu_irq_src *source,
+ struct amdgpu_iv_entry *entry)
+{
+ return 0;
+}
+
+static int sdma_v7_0_set_clockgating_state(struct amdgpu_ip_block *ip_block,
+ enum amd_clockgating_state state)
+{
+ return 0;
+}
+
+static int sdma_v7_0_set_powergating_state(struct amdgpu_ip_block *ip_block,
+ enum amd_powergating_state state)
+{
+ return 0;
+}
+
+static void sdma_v7_0_get_clockgating_state(struct amdgpu_ip_block *ip_block, u64 *flags)
+{
+}
+
+static void sdma_v7_0_print_ip_state(struct amdgpu_ip_block *ip_block, struct drm_printer *p)
+{
+ struct amdgpu_device *adev = ip_block->adev;
+ int i, j;
+ uint32_t reg_count = ARRAY_SIZE(sdma_reg_list_7_0);
+ uint32_t instance_offset;
+
+ if (!adev->sdma.ip_dump)
+ return;
+
+ drm_printf(p, "num_instances:%d\n", adev->sdma.num_instances);
+ for (i = 0; i < adev->sdma.num_instances; i++) {
+ instance_offset = i * reg_count;
+ drm_printf(p, "\nInstance:%d\n", i);
+
+ for (j = 0; j < reg_count; j++)
+ drm_printf(p, "%-50s \t 0x%08x\n", sdma_reg_list_7_0[j].reg_name,
+ adev->sdma.ip_dump[instance_offset + j]);
+ }
+}
+
+static void sdma_v7_0_dump_ip_state(struct amdgpu_ip_block *ip_block)
+{
+ struct amdgpu_device *adev = ip_block->adev;
+ int i, j;
+ uint32_t instance_offset;
+ uint32_t reg_count = ARRAY_SIZE(sdma_reg_list_7_0);
+
+ if (!adev->sdma.ip_dump)
+ return;
+
+ amdgpu_gfx_off_ctrl(adev, false);
+ for (i = 0; i < adev->sdma.num_instances; i++) {
+ instance_offset = i * reg_count;
+ for (j = 0; j < reg_count; j++)
+ adev->sdma.ip_dump[instance_offset + j] =
+ RREG32(sdma_v7_0_get_reg_offset(adev, i,
+ sdma_reg_list_7_0[j].reg_offset));
+ }
+ amdgpu_gfx_off_ctrl(adev, true);
+}
+
+const struct amd_ip_funcs sdma_v7_0_ip_funcs = {
+ .name = "sdma_v7_0",
+ .early_init = sdma_v7_0_early_init,
+ .late_init = NULL,
+ .sw_init = sdma_v7_0_sw_init,
+ .sw_fini = sdma_v7_0_sw_fini,
+ .hw_init = sdma_v7_0_hw_init,
+ .hw_fini = sdma_v7_0_hw_fini,
+ .suspend = sdma_v7_0_suspend,
+ .resume = sdma_v7_0_resume,
+ .is_idle = sdma_v7_0_is_idle,
+ .wait_for_idle = sdma_v7_0_wait_for_idle,
+ .soft_reset = sdma_v7_0_soft_reset,
+ .check_soft_reset = sdma_v7_0_check_soft_reset,
+ .set_clockgating_state = sdma_v7_0_set_clockgating_state,
+ .set_powergating_state = sdma_v7_0_set_powergating_state,
+ .get_clockgating_state = sdma_v7_0_get_clockgating_state,
+ .dump_ip_state = sdma_v7_0_dump_ip_state,
+ .print_ip_state = sdma_v7_0_print_ip_state,
+};
+
+static const struct amdgpu_ring_funcs sdma_v7_0_ring_funcs = {
+ .type = AMDGPU_RING_TYPE_SDMA,
+ .align_mask = 0xf,
+ .nop = SDMA_PKT_NOP_HEADER_OP(SDMA_OP_NOP),
+ .support_64bit_ptrs = true,
+ .secure_submission_supported = true,
+ .get_rptr = sdma_v7_0_ring_get_rptr,
+ .get_wptr = sdma_v7_0_ring_get_wptr,
+ .set_wptr = sdma_v7_0_ring_set_wptr,
+ .emit_frame_size =
+ 5 + /* sdma_v7_0_ring_init_cond_exec */
+ 6 + /* sdma_v7_0_ring_emit_hdp_flush */
+ 6 + /* sdma_v7_0_ring_emit_pipeline_sync */
+ /* sdma_v7_0_ring_emit_vm_flush */
+ SOC15_FLUSH_GPU_TLB_NUM_WREG * 3 +
+ SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 6 +
+ 10 + 10 + 10, /* sdma_v7_0_ring_emit_fence x3 for user fence, vm fence */
+ .emit_ib_size = 5 + 7 + 6, /* sdma_v7_0_ring_emit_ib */
+ .emit_ib = sdma_v7_0_ring_emit_ib,
+ .emit_mem_sync = sdma_v7_0_ring_emit_mem_sync,
+ .emit_fence = sdma_v7_0_ring_emit_fence,
+ .emit_pipeline_sync = sdma_v7_0_ring_emit_pipeline_sync,
+ .emit_vm_flush = sdma_v7_0_ring_emit_vm_flush,
+ .emit_hdp_flush = sdma_v7_0_ring_emit_hdp_flush,
+ .test_ring = sdma_v7_0_ring_test_ring,
+ .test_ib = sdma_v7_0_ring_test_ib,
+ .insert_nop = sdma_v7_0_ring_insert_nop,
+ .pad_ib = sdma_v7_0_ring_pad_ib,
+ .emit_wreg = sdma_v7_0_ring_emit_wreg,
+ .emit_reg_wait = sdma_v7_0_ring_emit_reg_wait,
+ .emit_reg_write_reg_wait = sdma_v7_0_ring_emit_reg_write_reg_wait,
+ .init_cond_exec = sdma_v7_0_ring_init_cond_exec,
+ .preempt_ib = sdma_v7_0_ring_preempt_ib,
+ .reset = sdma_v7_0_reset_queue,
+};
+
+static void sdma_v7_0_set_ring_funcs(struct amdgpu_device *adev)
+{
+ int i;
+
+ for (i = 0; i < adev->sdma.num_instances; i++) {
+ adev->sdma.instance[i].ring.funcs = &sdma_v7_0_ring_funcs;
+ adev->sdma.instance[i].ring.me = i;
+ }
+}
+
+static const struct amdgpu_irq_src_funcs sdma_v7_0_trap_irq_funcs = {
+ .set = sdma_v7_0_set_trap_irq_state,
+ .process = sdma_v7_0_process_trap_irq,
+};
+
+static const struct amdgpu_irq_src_funcs sdma_v7_0_fence_irq_funcs = {
+ .process = sdma_v7_0_process_fence_irq,
+};
+
+static const struct amdgpu_irq_src_funcs sdma_v7_0_illegal_inst_irq_funcs = {
+ .process = sdma_v7_0_process_illegal_inst_irq,
+};
+
+static void sdma_v7_0_set_irq_funcs(struct amdgpu_device *adev)
+{
+ adev->sdma.trap_irq.num_types = AMDGPU_SDMA_IRQ_INSTANCE0 +
+ adev->sdma.num_instances;
+ adev->sdma.trap_irq.funcs = &sdma_v7_0_trap_irq_funcs;
+ adev->sdma.fence_irq.funcs = &sdma_v7_0_fence_irq_funcs;
+ adev->sdma.illegal_inst_irq.funcs = &sdma_v7_0_illegal_inst_irq_funcs;
+}
+
+/**
+ * sdma_v7_0_emit_copy_buffer - copy buffer using the sDMA engine
+ *
+ * @ib: indirect buffer to fill with commands
+ * @src_offset: src GPU address
+ * @dst_offset: dst GPU address
+ * @byte_count: number of bytes to xfer
+ * @copy_flags: copy flags for the buffers
+ *
+ * Copy GPU buffers using the DMA engine.
+ * Used by the amdgpu ttm implementation to move pages if
+ * registered as the asic copy callback.
+ */
+static void sdma_v7_0_emit_copy_buffer(struct amdgpu_ib *ib,
+ uint64_t src_offset,
+ uint64_t dst_offset,
+ uint32_t byte_count,
+ uint32_t copy_flags)
+{
+ uint32_t num_type, data_format, max_com, write_cm;
+
+ max_com = AMDGPU_COPY_FLAGS_GET(copy_flags, MAX_COMPRESSED);
+ data_format = AMDGPU_COPY_FLAGS_GET(copy_flags, DATA_FORMAT);
+ num_type = AMDGPU_COPY_FLAGS_GET(copy_flags, NUMBER_TYPE);
+ write_cm = AMDGPU_COPY_FLAGS_GET(copy_flags, WRITE_COMPRESS_DISABLE) ? 2 : 1;
+
+ ib->ptr[ib->length_dw++] = SDMA_PKT_COPY_LINEAR_HEADER_OP(SDMA_OP_COPY) |
+ SDMA_PKT_COPY_LINEAR_HEADER_SUB_OP(SDMA_SUBOP_COPY_LINEAR) |
+ SDMA_PKT_COPY_LINEAR_HEADER_TMZ((copy_flags & AMDGPU_COPY_FLAGS_TMZ) ? 1 : 0) |
+ SDMA_PKT_COPY_LINEAR_HEADER_CPV(1);
+
+ ib->ptr[ib->length_dw++] = byte_count - 1;
+ ib->ptr[ib->length_dw++] = 0; /* src/dst endian swap */
+ ib->ptr[ib->length_dw++] = lower_32_bits(src_offset);
+ ib->ptr[ib->length_dw++] = upper_32_bits(src_offset);
+ ib->ptr[ib->length_dw++] = lower_32_bits(dst_offset);
+ ib->ptr[ib->length_dw++] = upper_32_bits(dst_offset);
+
+ if ((copy_flags & (AMDGPU_COPY_FLAGS_READ_DECOMPRESSED | AMDGPU_COPY_FLAGS_WRITE_COMPRESSED)))
+ ib->ptr[ib->length_dw++] = SDMA_DCC_DATA_FORMAT(data_format) | SDMA_DCC_NUM_TYPE(num_type) |
+ ((copy_flags & AMDGPU_COPY_FLAGS_READ_DECOMPRESSED) ? SDMA_DCC_READ_CM(2) : 0) |
+ ((copy_flags & AMDGPU_COPY_FLAGS_WRITE_COMPRESSED) ? SDMA_DCC_WRITE_CM(write_cm) : 0) |
+ SDMA_DCC_MAX_COM(max_com) | SDMA_DCC_MAX_UCOM(1);
+ else
+ ib->ptr[ib->length_dw++] = 0;
+}
+
+/**
+ * sdma_v7_0_emit_fill_buffer - fill buffer using the sDMA engine
+ *
+ * @ib: indirect buffer to fill
+ * @src_data: value to write to buffer
+ * @dst_offset: dst GPU address
+ * @byte_count: number of bytes to xfer
+ *
+ * Fill GPU buffers using the DMA engine.
+ */
+static void sdma_v7_0_emit_fill_buffer(struct amdgpu_ib *ib,
+ uint32_t src_data,
+ uint64_t dst_offset,
+ uint32_t byte_count)
+{
+ ib->ptr[ib->length_dw++] = SDMA_PKT_CONSTANT_FILL_HEADER_OP(SDMA_OP_CONST_FILL) |
+ SDMA_PKT_CONSTANT_FILL_HEADER_COMPRESS(1);
+ ib->ptr[ib->length_dw++] = lower_32_bits(dst_offset);
+ ib->ptr[ib->length_dw++] = upper_32_bits(dst_offset);
+ ib->ptr[ib->length_dw++] = src_data;
+ ib->ptr[ib->length_dw++] = byte_count - 1;
+}
+
+static const struct amdgpu_buffer_funcs sdma_v7_0_buffer_funcs = {
+ .copy_max_bytes = 0x400000,
+ .copy_num_dw = 8,
+ .emit_copy_buffer = sdma_v7_0_emit_copy_buffer,
+ .fill_max_bytes = 0x400000,
+ .fill_num_dw = 5,
+ .emit_fill_buffer = sdma_v7_0_emit_fill_buffer,
+};
+
+static void sdma_v7_0_set_buffer_funcs(struct amdgpu_device *adev)
+{
+ adev->mman.buffer_funcs = &sdma_v7_0_buffer_funcs;
+ adev->mman.buffer_funcs_ring = &adev->sdma.instance[0].ring;
+}
+
+static const struct amdgpu_vm_pte_funcs sdma_v7_0_vm_pte_funcs = {
+ .copy_pte_num_dw = 8,
+ .copy_pte = sdma_v7_0_vm_copy_pte,
+ .write_pte = sdma_v7_0_vm_write_pte,
+ .set_pte_pde = sdma_v7_0_vm_set_pte_pde,
+};
+
+static void sdma_v7_0_set_vm_pte_funcs(struct amdgpu_device *adev)
+{
+ unsigned i;
+
+ adev->vm_manager.vm_pte_funcs = &sdma_v7_0_vm_pte_funcs;
+ for (i = 0; i < adev->sdma.num_instances; i++) {
+ adev->vm_manager.vm_pte_scheds[i] =
+ &adev->sdma.instance[i].ring.sched;
+ }
+ adev->vm_manager.vm_pte_num_scheds = adev->sdma.num_instances;
+}
+
+const struct amdgpu_ip_block_version sdma_v7_0_ip_block = {
+ .type = AMD_IP_BLOCK_TYPE_SDMA,
+ .major = 7,
+ .minor = 0,
+ .rev = 0,
+ .funcs = &sdma_v7_0_ip_funcs,
+};
diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v7_0.h b/drivers/gpu/drm/amd/amdgpu/sdma_v7_0.h
new file mode 100644
index 000000000000..5af863bb39c4
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/sdma_v7_0.h
@@ -0,0 +1,30 @@
+/*
+ * Copyright 2023 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#ifndef __SDMA_V7_0_H__
+#define __SDMA_V7_0_H__
+
+extern const struct amd_ip_funcs sdma_v7_0_ip_funcs;
+extern const struct amdgpu_ip_block_version sdma_v7_0_ip_block;
+
+#endif /* __SDMA_V7_0_H__ */
diff --git a/drivers/gpu/drm/amd/amdgpu/si.c b/drivers/gpu/drm/amd/amdgpu/si.c
index 6b5cf7882a12..f7288372ee61 100644
--- a/drivers/gpu/drm/amd/amdgpu/si.c
+++ b/drivers/gpu/drm/amd/amdgpu/si.c
@@ -26,6 +26,8 @@
#include <linux/module.h>
#include <linux/pci.h>
+#include <drm/amdgpu_drm.h>
+
#include "amdgpu.h"
#include "amdgpu_atombios.h"
#include "amdgpu_ih.h"
@@ -33,6 +35,7 @@
#include "amdgpu_vce.h"
#include "atom.h"
#include "amd_pcie.h"
+
#include "si_dpm.h"
#include "sid.h"
#include "si_ih.h"
@@ -42,17 +45,31 @@
#include "dce_v6_0.h"
#include "si.h"
#include "uvd_v3_1.h"
-#include "dce_virtual.h"
+#include "vce_v1_0.h"
+
+#include "uvd/uvd_4_0_d.h"
+
+#include "smu/smu_6_0_d.h"
+#include "smu/smu_6_0_sh_mask.h"
+
#include "gca/gfx_6_0_d.h"
+#include "gca/gfx_6_0_sh_mask.h"
+
#include "oss/oss_1_0_d.h"
#include "oss/oss_1_0_sh_mask.h"
+
#include "gmc/gmc_6_0_d.h"
+#include"gmc/gmc_6_0_sh_mask.h"
+
#include "dce/dce_6_0_d.h"
-#include "uvd/uvd_4_0_d.h"
+#include "dce/dce_6_0_sh_mask.h"
+
#include "bif/bif_3_0_d.h"
#include "bif/bif_3_0_sh_mask.h"
+#include "si_enums.h"
#include "amdgpu_dm.h"
+#include "amdgpu_vkms.h"
static const u32 tahiti_golden_registers[] =
{
@@ -905,6 +922,106 @@ static const u32 hainan_mgcg_cgcg_init[] =
0x3630, 0xfffffff0, 0x00000100,
};
+/* tahiti, pitcairn, verde */
+static const struct amdgpu_video_codec_info tahiti_video_codecs_encode_array[] =
+{
+ {
+ .codec_type = AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4_AVC,
+ .max_width = 2048,
+ .max_height = 1152,
+ .max_pixels_per_frame = 2048 * 1152,
+ .max_level = 0,
+ },
+};
+
+static const struct amdgpu_video_codecs tahiti_video_codecs_encode =
+{
+ .codec_count = ARRAY_SIZE(tahiti_video_codecs_encode_array),
+ .codec_array = tahiti_video_codecs_encode_array,
+};
+
+/* oland and hainan don't support encode */
+static const struct amdgpu_video_codecs hainan_video_codecs_encode =
+{
+ .codec_count = 0,
+ .codec_array = NULL,
+};
+
+/* tahiti, pitcairn, verde, oland */
+static const struct amdgpu_video_codec_info tahiti_video_codecs_decode_array[] =
+{
+ {
+ .codec_type = AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG2,
+ .max_width = 2048,
+ .max_height = 1152,
+ .max_pixels_per_frame = 2048 * 1152,
+ .max_level = 3,
+ },
+ {
+ .codec_type = AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4,
+ .max_width = 2048,
+ .max_height = 1152,
+ .max_pixels_per_frame = 2048 * 1152,
+ .max_level = 5,
+ },
+ {
+ .codec_type = AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4_AVC,
+ .max_width = 2048,
+ .max_height = 1152,
+ .max_pixels_per_frame = 2048 * 1152,
+ .max_level = 41,
+ },
+ {
+ .codec_type = AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_VC1,
+ .max_width = 2048,
+ .max_height = 1152,
+ .max_pixels_per_frame = 2048 * 1152,
+ .max_level = 4,
+ },
+};
+
+static const struct amdgpu_video_codecs tahiti_video_codecs_decode =
+{
+ .codec_count = ARRAY_SIZE(tahiti_video_codecs_decode_array),
+ .codec_array = tahiti_video_codecs_decode_array,
+};
+
+/* hainan doesn't support decode */
+static const struct amdgpu_video_codecs hainan_video_codecs_decode =
+{
+ .codec_count = 0,
+ .codec_array = NULL,
+};
+
+static int si_query_video_codecs(struct amdgpu_device *adev, bool encode,
+ const struct amdgpu_video_codecs **codecs)
+{
+ switch (adev->asic_type) {
+ case CHIP_VERDE:
+ case CHIP_TAHITI:
+ case CHIP_PITCAIRN:
+ if (encode)
+ *codecs = &tahiti_video_codecs_encode;
+ else
+ *codecs = &tahiti_video_codecs_decode;
+ return 0;
+ case CHIP_OLAND:
+ if (encode)
+ *codecs = &hainan_video_codecs_encode;
+ else
+ *codecs = &tahiti_video_codecs_decode;
+ return 0;
+ case CHIP_HAINAN:
+ if (encode)
+ *codecs = &hainan_video_codecs_encode;
+ else
+ *codecs = &hainan_video_codecs_decode;
+ return 0;
+ default:
+ return -EINVAL;
+ }
+}
+
static u32 si_pcie_rreg(struct amdgpu_device *adev, u32 reg)
{
unsigned long flags;
@@ -961,8 +1078,8 @@ static u32 si_smc_rreg(struct amdgpu_device *adev, u32 reg)
u32 r;
spin_lock_irqsave(&adev->smc_idx_lock, flags);
- WREG32(SMC_IND_INDEX_0, (reg));
- r = RREG32(SMC_IND_DATA_0);
+ WREG32(mmSMC_IND_INDEX_0, (reg));
+ r = RREG32(mmSMC_IND_DATA_0);
spin_unlock_irqrestore(&adev->smc_idx_lock, flags);
return r;
}
@@ -972,8 +1089,8 @@ static void si_smc_wreg(struct amdgpu_device *adev, u32 reg, u32 v)
unsigned long flags;
spin_lock_irqsave(&adev->smc_idx_lock, flags);
- WREG32(SMC_IND_INDEX_0, (reg));
- WREG32(SMC_IND_DATA_0, (v));
+ WREG32(mmSMC_IND_INDEX_0, (reg));
+ WREG32(mmSMC_IND_DATA_0, (v));
spin_unlock_irqrestore(&adev->smc_idx_lock, flags);
}
@@ -1000,55 +1117,55 @@ static void si_uvd_ctx_wreg(struct amdgpu_device *adev, u32 reg, u32 v)
}
static struct amdgpu_allowed_register_entry si_allowed_read_registers[] = {
- {GRBM_STATUS},
+ {mmGRBM_STATUS},
{mmGRBM_STATUS2},
{mmGRBM_STATUS_SE0},
{mmGRBM_STATUS_SE1},
{mmSRBM_STATUS},
{mmSRBM_STATUS2},
- {DMA_STATUS_REG + DMA0_REGISTER_OFFSET},
- {DMA_STATUS_REG + DMA1_REGISTER_OFFSET},
+ {mmDMA_STATUS_REG + DMA0_REGISTER_OFFSET},
+ {mmDMA_STATUS_REG + DMA1_REGISTER_OFFSET},
{mmCP_STAT},
{mmCP_STALLED_STAT1},
{mmCP_STALLED_STAT2},
{mmCP_STALLED_STAT3},
- {GB_ADDR_CONFIG},
- {MC_ARB_RAMCFG},
- {GB_TILE_MODE0},
- {GB_TILE_MODE1},
- {GB_TILE_MODE2},
- {GB_TILE_MODE3},
- {GB_TILE_MODE4},
- {GB_TILE_MODE5},
- {GB_TILE_MODE6},
- {GB_TILE_MODE7},
- {GB_TILE_MODE8},
- {GB_TILE_MODE9},
- {GB_TILE_MODE10},
- {GB_TILE_MODE11},
- {GB_TILE_MODE12},
- {GB_TILE_MODE13},
- {GB_TILE_MODE14},
- {GB_TILE_MODE15},
- {GB_TILE_MODE16},
- {GB_TILE_MODE17},
- {GB_TILE_MODE18},
- {GB_TILE_MODE19},
- {GB_TILE_MODE20},
- {GB_TILE_MODE21},
- {GB_TILE_MODE22},
- {GB_TILE_MODE23},
- {GB_TILE_MODE24},
- {GB_TILE_MODE25},
- {GB_TILE_MODE26},
- {GB_TILE_MODE27},
- {GB_TILE_MODE28},
- {GB_TILE_MODE29},
- {GB_TILE_MODE30},
- {GB_TILE_MODE31},
- {CC_RB_BACKEND_DISABLE, true},
- {GC_USER_RB_BACKEND_DISABLE, true},
- {PA_SC_RASTER_CONFIG, true},
+ {mmGB_ADDR_CONFIG},
+ {mmMC_ARB_RAMCFG},
+ {mmGB_TILE_MODE0},
+ {mmGB_TILE_MODE1},
+ {mmGB_TILE_MODE2},
+ {mmGB_TILE_MODE3},
+ {mmGB_TILE_MODE4},
+ {mmGB_TILE_MODE5},
+ {mmGB_TILE_MODE6},
+ {mmGB_TILE_MODE7},
+ {mmGB_TILE_MODE8},
+ {mmGB_TILE_MODE9},
+ {mmGB_TILE_MODE10},
+ {mmGB_TILE_MODE11},
+ {mmGB_TILE_MODE12},
+ {mmGB_TILE_MODE13},
+ {mmGB_TILE_MODE14},
+ {mmGB_TILE_MODE15},
+ {mmGB_TILE_MODE16},
+ {mmGB_TILE_MODE17},
+ {mmGB_TILE_MODE18},
+ {mmGB_TILE_MODE19},
+ {mmGB_TILE_MODE20},
+ {mmGB_TILE_MODE21},
+ {mmGB_TILE_MODE22},
+ {mmGB_TILE_MODE23},
+ {mmGB_TILE_MODE24},
+ {mmGB_TILE_MODE25},
+ {mmGB_TILE_MODE26},
+ {mmGB_TILE_MODE27},
+ {mmGB_TILE_MODE28},
+ {mmGB_TILE_MODE29},
+ {mmGB_TILE_MODE30},
+ {mmGB_TILE_MODE31},
+ {mmCC_RB_BACKEND_DISABLE, true},
+ {mmGC_USER_RB_BACKEND_DISABLE, true},
+ {mmPA_SC_RASTER_CONFIG, true},
};
static uint32_t si_get_register_value(struct amdgpu_device *adev,
@@ -1071,12 +1188,12 @@ static uint32_t si_get_register_value(struct amdgpu_device *adev,
mutex_lock(&adev->grbm_idx_mutex);
if (se_num != 0xffffffff || sh_num != 0xffffffff)
- amdgpu_gfx_select_se_sh(adev, se_num, sh_num, 0xffffffff);
+ amdgpu_gfx_select_se_sh(adev, se_num, sh_num, 0xffffffff, 0);
val = RREG32(reg_offset);
if (se_num != 0xffffffff || sh_num != 0xffffffff)
- amdgpu_gfx_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
+ amdgpu_gfx_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, 0);
mutex_unlock(&adev->grbm_idx_mutex);
return val;
} else {
@@ -1154,37 +1271,37 @@ static bool si_read_disabled_bios(struct amdgpu_device *adev)
u32 rom_cntl;
bool r;
- bus_cntl = RREG32(R600_BUS_CNTL);
+ bus_cntl = RREG32(mmBUS_CNTL);
if (adev->mode_info.num_crtc) {
- d1vga_control = RREG32(AVIVO_D1VGA_CONTROL);
- d2vga_control = RREG32(AVIVO_D2VGA_CONTROL);
- vga_render_control = RREG32(VGA_RENDER_CONTROL);
+ d1vga_control = RREG32(mmD1VGA_CONTROL);
+ d2vga_control = RREG32(mmD2VGA_CONTROL);
+ vga_render_control = RREG32(mmVGA_RENDER_CONTROL);
}
rom_cntl = RREG32(R600_ROM_CNTL);
/* enable the rom */
- WREG32(R600_BUS_CNTL, (bus_cntl & ~R600_BIOS_ROM_DIS));
+ WREG32(mmBUS_CNTL, (bus_cntl & ~BUS_CNTL__BIOS_ROM_DIS_MASK));
if (adev->mode_info.num_crtc) {
/* Disable VGA mode */
- WREG32(AVIVO_D1VGA_CONTROL,
- (d1vga_control & ~(AVIVO_DVGA_CONTROL_MODE_ENABLE |
- AVIVO_DVGA_CONTROL_TIMING_SELECT)));
- WREG32(AVIVO_D2VGA_CONTROL,
- (d2vga_control & ~(AVIVO_DVGA_CONTROL_MODE_ENABLE |
- AVIVO_DVGA_CONTROL_TIMING_SELECT)));
- WREG32(VGA_RENDER_CONTROL,
- (vga_render_control & C_000300_VGA_VSTATUS_CNTL));
+ WREG32(mmD1VGA_CONTROL,
+ (d1vga_control & ~(D1VGA_CONTROL__D1VGA_MODE_ENABLE_MASK |
+ D1VGA_CONTROL__D1VGA_TIMING_SELECT_MASK)));
+ WREG32(mmD2VGA_CONTROL,
+ (d2vga_control & ~(D1VGA_CONTROL__D1VGA_MODE_ENABLE_MASK |
+ D1VGA_CONTROL__D1VGA_TIMING_SELECT_MASK)));
+ WREG32(mmVGA_RENDER_CONTROL,
+ (vga_render_control & ~VGA_RENDER_CONTROL__VGA_VSTATUS_CNTL_MASK));
}
WREG32(R600_ROM_CNTL, rom_cntl | R600_SCK_OVERWRITE);
r = amdgpu_read_bios(adev);
/* restore regs */
- WREG32(R600_BUS_CNTL, bus_cntl);
+ WREG32(mmBUS_CNTL, bus_cntl);
if (adev->mode_info.num_crtc) {
- WREG32(AVIVO_D1VGA_CONTROL, d1vga_control);
- WREG32(AVIVO_D2VGA_CONTROL, d2vga_control);
- WREG32(VGA_RENDER_CONTROL, vga_render_control);
+ WREG32(mmD1VGA_CONTROL, d1vga_control);
+ WREG32(mmD2VGA_CONTROL, d2vga_control);
+ WREG32(mmVGA_RENDER_CONTROL, vga_render_control);
}
WREG32(R600_ROM_CNTL, rom_cntl);
return r;
@@ -1221,23 +1338,24 @@ static void si_set_clk_bypass_mode(struct amdgpu_device *adev)
{
u32 tmp, i;
- tmp = RREG32(CG_SPLL_FUNC_CNTL);
- tmp |= SPLL_BYPASS_EN;
- WREG32(CG_SPLL_FUNC_CNTL, tmp);
+ tmp = RREG32(mmCG_SPLL_FUNC_CNTL);
+ tmp |= CG_SPLL_FUNC_CNTL__SPLL_BYPASS_EN_MASK;
+ WREG32(mmCG_SPLL_FUNC_CNTL, tmp);
- tmp = RREG32(CG_SPLL_FUNC_CNTL_2);
- tmp |= SPLL_CTLREQ_CHG;
- WREG32(CG_SPLL_FUNC_CNTL_2, tmp);
+ tmp = RREG32(mmCG_SPLL_FUNC_CNTL_2);
+ tmp |= CG_SPLL_FUNC_CNTL_2__SPLL_CTLREQ_CHG_MASK;
+ WREG32(mmCG_SPLL_FUNC_CNTL_2, tmp);
for (i = 0; i < adev->usec_timeout; i++) {
- if (RREG32(SPLL_STATUS) & SPLL_CHG_STATUS)
+ if (RREG32(mmCG_SPLL_STATUS) & CG_SPLL_STATUS__SPLL_CHG_STATUS_MASK)
break;
udelay(1);
}
- tmp = RREG32(CG_SPLL_FUNC_CNTL_2);
- tmp &= ~(SPLL_CTLREQ_CHG | SCLK_MUX_UPDATE);
- WREG32(CG_SPLL_FUNC_CNTL_2, tmp);
+ tmp = RREG32(mmCG_SPLL_FUNC_CNTL_2);
+ tmp &= ~(CG_SPLL_FUNC_CNTL_2__SPLL_CTLREQ_CHG_MASK |
+ CG_SPLL_FUNC_CNTL_2__SCLK_MUX_UPDATE_MASK);
+ WREG32(mmCG_SPLL_FUNC_CNTL_2, tmp);
tmp = RREG32(MPLL_CNTL_MODE);
tmp &= ~MPLL_MCLK_SEL;
@@ -1248,21 +1366,21 @@ static void si_spll_powerdown(struct amdgpu_device *adev)
{
u32 tmp;
- tmp = RREG32(SPLL_CNTL_MODE);
- tmp |= SPLL_SW_DIR_CONTROL;
- WREG32(SPLL_CNTL_MODE, tmp);
+ tmp = RREG32(mmSPLL_CNTL_MODE);
+ tmp |= SPLL_CNTL_MODE__SPLL_SW_DIR_CONTROL_MASK;
+ WREG32(mmSPLL_CNTL_MODE, tmp);
- tmp = RREG32(CG_SPLL_FUNC_CNTL);
- tmp |= SPLL_RESET;
- WREG32(CG_SPLL_FUNC_CNTL, tmp);
+ tmp = RREG32(mmCG_SPLL_FUNC_CNTL);
+ tmp |= CG_SPLL_FUNC_CNTL__SPLL_RESET_MASK;
+ WREG32(mmCG_SPLL_FUNC_CNTL, tmp);
- tmp = RREG32(CG_SPLL_FUNC_CNTL);
- tmp |= SPLL_SLEEP;
- WREG32(CG_SPLL_FUNC_CNTL, tmp);
+ tmp = RREG32(mmCG_SPLL_FUNC_CNTL);
+ tmp |= CG_SPLL_FUNC_CNTL__SPLL_SLEEP_MASK;
+ WREG32(mmCG_SPLL_FUNC_CNTL, tmp);
- tmp = RREG32(SPLL_CNTL_MODE);
- tmp &= ~SPLL_SW_DIR_CONTROL;
- WREG32(SPLL_CNTL_MODE, tmp);
+ tmp = RREG32(mmSPLL_CNTL_MODE);
+ tmp &= ~SPLL_CNTL_MODE__SPLL_SW_DIR_CONTROL_MASK;
+ WREG32(mmSPLL_CNTL_MODE, tmp);
}
static int si_gpu_pci_config_reset(struct amdgpu_device *adev)
@@ -1299,9 +1417,9 @@ static int si_gpu_pci_config_reset(struct amdgpu_device *adev)
return r;
}
-static bool si_asic_supports_baco(struct amdgpu_device *adev)
+static int si_asic_supports_baco(struct amdgpu_device *adev)
{
- return false;
+ return 0;
}
static enum amd_reset_method
@@ -1344,14 +1462,14 @@ static void si_vga_set_state(struct amdgpu_device *adev, bool state)
{
uint32_t temp;
- temp = RREG32(CONFIG_CNTL);
+ temp = RREG32(mmCONFIG_CNTL);
if (!state) {
temp &= ~(1<<0);
temp |= (1<<1);
} else {
temp &= ~(1<<1);
}
- WREG32(CONFIG_CNTL, temp);
+ WREG32(mmCONFIG_CNTL, temp);
}
static u32 si_get_xclk(struct amdgpu_device *adev)
@@ -1359,12 +1477,12 @@ static u32 si_get_xclk(struct amdgpu_device *adev)
u32 reference_clock = adev->clock.spll.reference_freq;
u32 tmp;
- tmp = RREG32(CG_CLKPIN_CNTL_2);
- if (tmp & MUX_TCLK_TO_XCLK)
+ tmp = RREG32(mmCG_CLKPIN_CNTL_2);
+ if (tmp & CG_CLKPIN_CNTL_2__MUX_TCLK_TO_XCLK_MASK)
return TCLK;
- tmp = RREG32(CG_CLKPIN_CNTL);
- if (tmp & XTALIN_DIVIDE)
+ tmp = RREG32(mmCG_CLKPIN_CNTL);
+ if (tmp & CG_CLKPIN_CNTL__XTALIN_DIVIDE_MASK)
return reference_clock / 4;
return reference_clock;
@@ -1409,9 +1527,9 @@ static int si_get_pcie_lanes(struct amdgpu_device *adev)
if (adev->flags & AMD_IS_APU)
return 0;
- link_width_cntl = RREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL);
+ link_width_cntl = RREG32_PCIE_PORT(ixPCIE_LC_LINK_WIDTH_CNTL);
- switch ((link_width_cntl & LC_LINK_WIDTH_RD_MASK) >> LC_LINK_WIDTH_RD_SHIFT) {
+ switch ((link_width_cntl & PCIE_LC_LINK_WIDTH_CNTL__LC_LINK_WIDTH_RD_MASK) >> PCIE_LC_LINK_WIDTH_CNTL__LC_LINK_WIDTH_RD__SHIFT) {
case LC_LINK_WIDTH_X1:
return 1;
case LC_LINK_WIDTH_X2:
@@ -1458,13 +1576,13 @@ static void si_set_pcie_lanes(struct amdgpu_device *adev, int lanes)
return;
}
- link_width_cntl = RREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL);
- link_width_cntl &= ~LC_LINK_WIDTH_MASK;
- link_width_cntl |= mask << LC_LINK_WIDTH_SHIFT;
- link_width_cntl |= (LC_RECONFIG_NOW |
- LC_RECONFIG_ARC_MISSING_ESCAPE);
+ link_width_cntl = RREG32_PCIE_PORT(ixPCIE_LC_LINK_WIDTH_CNTL);
+ link_width_cntl &= ~PCIE_LC_LINK_WIDTH_CNTL__LC_LINK_WIDTH_MASK;
+ link_width_cntl |= mask << PCIE_LC_LINK_WIDTH_CNTL__LC_LINK_WIDTH__SHIFT;
+ link_width_cntl |= (PCIE_LC_LINK_WIDTH_CNTL__LC_RECONFIG_NOW_MASK |
+ PCIE_LC_LINK_WIDTH_CNTL__LC_RECONFIG_ARC_MISSING_ESCAPE_MASK);
- WREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL, link_width_cntl);
+ WREG32_PCIE_PORT(ixPCIE_LC_LINK_WIDTH_CNTL, link_width_cntl);
}
static void si_get_pcie_usage(struct amdgpu_device *adev, uint64_t *count0,
@@ -1778,7 +1896,7 @@ static int si_vce_send_vcepll_ctlreq(struct amdgpu_device *adev)
WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL, 0, ~UPLL_CTLREQ_MASK);
if (i == SI_MAX_CTLACKS_ASSERTION_WAIT) {
- DRM_ERROR("Timeout setting UVD clocks!\n");
+ DRM_ERROR("Timeout setting VCE clocks!\n");
return -ETIMEDOUT;
}
@@ -1800,6 +1918,14 @@ static int si_set_vce_clocks(struct amdgpu_device *adev, u32 evclk, u32 ecclk)
~VCEPLL_BYPASS_EN_MASK);
if (!evclk || !ecclk) {
+ /*
+ * On some chips, the PLL takes way too long to get out of
+ * sleep mode, causing a timeout waiting on CTLACK/CTLACK2.
+ * Leave the PLL running in bypass mode.
+ */
+ if (adev->pdev->device == 0x6780)
+ return 0;
+
/* Keep the Bypass mode, put PLL to sleep */
WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL, VCEPLL_SLEEP_MASK,
~VCEPLL_SLEEP_MASK);
@@ -1903,17 +2029,18 @@ static const struct amdgpu_asic_funcs si_asic_funcs =
.get_pcie_replay_count = &si_get_pcie_replay_count,
.supports_baco = &si_asic_supports_baco,
.pre_asic_init = &si_pre_asic_init,
+ .query_video_codecs = &si_query_video_codecs,
};
static uint32_t si_get_rev_id(struct amdgpu_device *adev)
{
- return (RREG32(CC_DRM_ID_STRAPS) & CC_DRM_ID_STRAPS__ATI_REV_ID_MASK)
+ return (RREG32(mmCC_DRM_ID_STRAPS) & CC_DRM_ID_STRAPS__ATI_REV_ID_MASK)
>> CC_DRM_ID_STRAPS__ATI_REV_ID__SHIFT;
}
-static int si_common_early_init(void *handle)
+static int si_common_early_init(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
adev->smc_rreg = &si_smc_rreg;
adev->smc_wreg = &si_smc_wreg;
@@ -2037,17 +2164,6 @@ static int si_common_early_init(void *handle)
return 0;
}
-static int si_common_sw_init(void *handle)
-{
- return 0;
-}
-
-static int si_common_sw_fini(void *handle)
-{
- return 0;
-}
-
-
static void si_init_golden_registers(struct amdgpu_device *adev)
{
switch (adev->asic_type) {
@@ -2139,9 +2255,9 @@ static void si_pcie_gen3_enable(struct amdgpu_device *adev)
CAIL_PCIE_LINK_SPEED_SUPPORT_GEN3)))
return;
- speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
- current_data_rate = (speed_cntl & LC_CURRENT_DATA_RATE_MASK) >>
- LC_CURRENT_DATA_RATE_SHIFT;
+ speed_cntl = RREG32_PCIE_PORT(ixPCIE_LC_SPEED_CNTL);
+ current_data_rate = (speed_cntl & PCIE_LC_SPEED_CNTL__LC_CURRENT_DATA_RATE_MASK) >>
+ PCIE_LC_SPEED_CNTL__LC_CURRENT_DATA_RATE__SHIFT;
if (adev->pm.pcie_gen_mask & CAIL_PCIE_LINK_SPEED_SUPPORT_GEN3) {
if (current_data_rate == 2) {
DRM_INFO("PCIE gen 3 link speeds already enabled\n");
@@ -2165,29 +2281,20 @@ static void si_pcie_gen3_enable(struct amdgpu_device *adev)
u16 bridge_cfg2, gpu_cfg2;
u32 max_lw, current_lw, tmp;
- pcie_capability_read_word(root, PCI_EXP_LNKCTL,
- &bridge_cfg);
- pcie_capability_read_word(adev->pdev, PCI_EXP_LNKCTL,
- &gpu_cfg);
-
- tmp16 = bridge_cfg | PCI_EXP_LNKCTL_HAWD;
- pcie_capability_write_word(root, PCI_EXP_LNKCTL, tmp16);
+ pcie_capability_set_word(root, PCI_EXP_LNKCTL, PCI_EXP_LNKCTL_HAWD);
+ pcie_capability_set_word(adev->pdev, PCI_EXP_LNKCTL, PCI_EXP_LNKCTL_HAWD);
- tmp16 = gpu_cfg | PCI_EXP_LNKCTL_HAWD;
- pcie_capability_write_word(adev->pdev, PCI_EXP_LNKCTL,
- tmp16);
-
- tmp = RREG32_PCIE(PCIE_LC_STATUS1);
- max_lw = (tmp & LC_DETECTED_LINK_WIDTH_MASK) >> LC_DETECTED_LINK_WIDTH_SHIFT;
- current_lw = (tmp & LC_OPERATING_LINK_WIDTH_MASK) >> LC_OPERATING_LINK_WIDTH_SHIFT;
+ tmp = RREG32_PCIE(ixPCIE_LC_STATUS1);
+ max_lw = (tmp & PCIE_LC_STATUS1__LC_DETECTED_LINK_WIDTH_MASK) >> PCIE_LC_STATUS1__LC_DETECTED_LINK_WIDTH__SHIFT;
+ current_lw = (tmp & PCIE_LC_STATUS1__LC_OPERATING_LINK_WIDTH_MASK) >> PCIE_LC_STATUS1__LC_OPERATING_LINK_WIDTH__SHIFT;
if (current_lw < max_lw) {
- tmp = RREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL);
- if (tmp & LC_RENEGOTIATION_SUPPORT) {
- tmp &= ~(LC_LINK_WIDTH_MASK | LC_UPCONFIGURE_DIS);
- tmp |= (max_lw << LC_LINK_WIDTH_SHIFT);
- tmp |= LC_UPCONFIGURE_SUPPORT | LC_RENEGOTIATE_EN | LC_RECONFIG_NOW;
- WREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL, tmp);
+ tmp = RREG32_PCIE_PORT(ixPCIE_LC_LINK_WIDTH_CNTL);
+ if (tmp & PCIE_LC_LINK_WIDTH_CNTL__LC_RENEGOTIATION_SUPPORT_MASK) {
+ tmp &= ~(PCIE_LC_LINK_WIDTH_CNTL__LC_LINK_WIDTH_MASK | PCIE_LC_LINK_WIDTH_CNTL__LC_UPCONFIGURE_DIS_MASK);
+ tmp |= (max_lw << PCIE_LC_LINK_WIDTH_CNTL__LC_LINK_WIDTH__SHIFT);
+ tmp |= PCIE_LC_LINK_WIDTH_CNTL__LC_UPCONFIGURE_SUPPORT_MASK | PCIE_LC_LINK_WIDTH_CNTL__LC_RENEGOTIATE_EN_MASK | PCIE_LC_LINK_WIDTH_CNTL__LC_RECONFIG_NOW_MASK;
+ WREG32_PCIE_PORT(ixPCIE_LC_LINK_WIDTH_CNTL, tmp);
}
}
@@ -2210,84 +2317,66 @@ static void si_pcie_gen3_enable(struct amdgpu_device *adev)
PCI_EXP_LNKCTL2,
&gpu_cfg2);
- tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
- tmp |= LC_SET_QUIESCE;
- WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp);
+ tmp = RREG32_PCIE_PORT(ixPCIE_LC_CNTL4);
+ tmp |= PCIE_LC_CNTL4__LC_SET_QUIESCE_MASK;
+ WREG32_PCIE_PORT(ixPCIE_LC_CNTL4, tmp);
- tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
- tmp |= LC_REDO_EQ;
- WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp);
+ tmp = RREG32_PCIE_PORT(ixPCIE_LC_CNTL4);
+ tmp |= PCIE_LC_CNTL4__LC_REDO_EQ_MASK;
+ WREG32_PCIE_PORT(ixPCIE_LC_CNTL4, tmp);
mdelay(100);
- pcie_capability_read_word(root, PCI_EXP_LNKCTL,
- &tmp16);
- tmp16 &= ~PCI_EXP_LNKCTL_HAWD;
- tmp16 |= (bridge_cfg & PCI_EXP_LNKCTL_HAWD);
- pcie_capability_write_word(root, PCI_EXP_LNKCTL,
- tmp16);
-
- pcie_capability_read_word(adev->pdev,
- PCI_EXP_LNKCTL,
- &tmp16);
- tmp16 &= ~PCI_EXP_LNKCTL_HAWD;
- tmp16 |= (gpu_cfg & PCI_EXP_LNKCTL_HAWD);
- pcie_capability_write_word(adev->pdev,
- PCI_EXP_LNKCTL,
- tmp16);
-
- pcie_capability_read_word(root, PCI_EXP_LNKCTL2,
- &tmp16);
- tmp16 &= ~(PCI_EXP_LNKCTL2_ENTER_COMP |
- PCI_EXP_LNKCTL2_TX_MARGIN);
- tmp16 |= (bridge_cfg2 &
- (PCI_EXP_LNKCTL2_ENTER_COMP |
- PCI_EXP_LNKCTL2_TX_MARGIN));
- pcie_capability_write_word(root,
- PCI_EXP_LNKCTL2,
- tmp16);
-
- pcie_capability_read_word(adev->pdev,
- PCI_EXP_LNKCTL2,
- &tmp16);
- tmp16 &= ~(PCI_EXP_LNKCTL2_ENTER_COMP |
- PCI_EXP_LNKCTL2_TX_MARGIN);
- tmp16 |= (gpu_cfg2 &
- (PCI_EXP_LNKCTL2_ENTER_COMP |
- PCI_EXP_LNKCTL2_TX_MARGIN));
- pcie_capability_write_word(adev->pdev,
- PCI_EXP_LNKCTL2,
- tmp16);
-
- tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
- tmp &= ~LC_SET_QUIESCE;
- WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp);
+ pcie_capability_clear_and_set_word(root, PCI_EXP_LNKCTL,
+ PCI_EXP_LNKCTL_HAWD,
+ bridge_cfg &
+ PCI_EXP_LNKCTL_HAWD);
+ pcie_capability_clear_and_set_word(adev->pdev, PCI_EXP_LNKCTL,
+ PCI_EXP_LNKCTL_HAWD,
+ gpu_cfg &
+ PCI_EXP_LNKCTL_HAWD);
+
+ pcie_capability_clear_and_set_word(root, PCI_EXP_LNKCTL2,
+ PCI_EXP_LNKCTL2_ENTER_COMP |
+ PCI_EXP_LNKCTL2_TX_MARGIN,
+ bridge_cfg2 &
+ (PCI_EXP_LNKCTL2_ENTER_COMP |
+ PCI_EXP_LNKCTL2_TX_MARGIN));
+ pcie_capability_clear_and_set_word(adev->pdev, PCI_EXP_LNKCTL2,
+ PCI_EXP_LNKCTL2_ENTER_COMP |
+ PCI_EXP_LNKCTL2_TX_MARGIN,
+ gpu_cfg2 &
+ (PCI_EXP_LNKCTL2_ENTER_COMP |
+ PCI_EXP_LNKCTL2_TX_MARGIN));
+
+ tmp = RREG32_PCIE_PORT(ixPCIE_LC_CNTL4);
+ tmp &= ~PCIE_LC_CNTL4__LC_SET_QUIESCE_MASK;
+ WREG32_PCIE_PORT(ixPCIE_LC_CNTL4, tmp);
}
}
}
- speed_cntl |= LC_FORCE_EN_SW_SPEED_CHANGE | LC_FORCE_DIS_HW_SPEED_CHANGE;
- speed_cntl &= ~LC_FORCE_DIS_SW_SPEED_CHANGE;
- WREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL, speed_cntl);
-
- pcie_capability_read_word(adev->pdev, PCI_EXP_LNKCTL2, &tmp16);
- tmp16 &= ~PCI_EXP_LNKCTL2_TLS;
+ speed_cntl |= PCIE_LC_SPEED_CNTL__LC_FORCE_EN_SW_SPEED_CHANGE_MASK | PCIE_LC_SPEED_CNTL__LC_FORCE_DIS_HW_SPEED_CHANGE_MASK;
+ speed_cntl &= ~PCIE_LC_SPEED_CNTL__LC_FORCE_DIS_SW_SPEED_CHANGE_MASK;
+ WREG32_PCIE_PORT(ixPCIE_LC_SPEED_CNTL, speed_cntl);
+ tmp16 = 0;
if (adev->pm.pcie_gen_mask & CAIL_PCIE_LINK_SPEED_SUPPORT_GEN3)
tmp16 |= PCI_EXP_LNKCTL2_TLS_8_0GT; /* gen3 */
else if (adev->pm.pcie_gen_mask & CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2)
tmp16 |= PCI_EXP_LNKCTL2_TLS_5_0GT; /* gen2 */
else
tmp16 |= PCI_EXP_LNKCTL2_TLS_2_5GT; /* gen1 */
- pcie_capability_write_word(adev->pdev, PCI_EXP_LNKCTL2, tmp16);
+ pcie_capability_clear_and_set_word(adev->pdev, PCI_EXP_LNKCTL2,
+ PCI_EXP_LNKCTL2_TLS, tmp16);
- speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
- speed_cntl |= LC_INITIATE_LINK_SPEED_CHANGE;
- WREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL, speed_cntl);
+ speed_cntl = RREG32_PCIE_PORT(ixPCIE_LC_SPEED_CNTL);
+ speed_cntl |= PCIE_LC_SPEED_CNTL__LC_INITIATE_LINK_SPEED_CHANGE_MASK;
+ WREG32_PCIE_PORT(ixPCIE_LC_SPEED_CNTL, speed_cntl);
for (i = 0; i < adev->usec_timeout; i++) {
- speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
- if ((speed_cntl & LC_INITIATE_LINK_SPEED_CHANGE) == 0)
+ speed_cntl = RREG32_PCIE_PORT(ixPCIE_LC_SPEED_CNTL);
+ if ((speed_cntl & PCIE_LC_SPEED_CNTL__LC_INITIATE_LINK_SPEED_CHANGE_MASK) == 0)
break;
udelay(1);
}
@@ -2342,126 +2431,124 @@ static void si_program_aspm(struct amdgpu_device *adev)
bool disable_l0s = false, disable_l1 = false, disable_plloff_in_l1 = false;
bool disable_clkreq = false;
- if (amdgpu_aspm == 0)
+ if (!amdgpu_device_should_use_aspm(adev))
return;
- if (adev->flags & AMD_IS_APU)
- return;
- orig = data = RREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL);
- data &= ~LC_XMIT_N_FTS_MASK;
- data |= LC_XMIT_N_FTS(0x24) | LC_XMIT_N_FTS_OVERRIDE_EN;
+ orig = data = RREG32_PCIE_PORT(ixPCIE_LC_N_FTS_CNTL);
+ data &= ~PCIE_LC_N_FTS_CNTL__LC_XMIT_N_FTS_MASK;
+ data |= (0x24 << PCIE_LC_N_FTS_CNTL__LC_XMIT_N_FTS__SHIFT) | PCIE_LC_N_FTS_CNTL__LC_XMIT_N_FTS_OVERRIDE_EN_MASK;
if (orig != data)
- WREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL, data);
+ WREG32_PCIE_PORT(ixPCIE_LC_N_FTS_CNTL, data);
- orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL3);
- data |= LC_GO_TO_RECOVERY;
+ orig = data = RREG32_PCIE_PORT(ixPCIE_LC_CNTL3);
+ data |= PCIE_LC_CNTL3__LC_GO_TO_RECOVERY_MASK;
if (orig != data)
- WREG32_PCIE_PORT(PCIE_LC_CNTL3, data);
+ WREG32_PCIE_PORT(ixPCIE_LC_CNTL3, data);
- orig = data = RREG32_PCIE(PCIE_P_CNTL);
- data |= P_IGNORE_EDB_ERR;
+ orig = data = RREG32_PCIE(ixPCIE_P_CNTL);
+ data |= PCIE_P_CNTL__P_IGNORE_EDB_ERR_MASK;
if (orig != data)
- WREG32_PCIE(PCIE_P_CNTL, data);
+ WREG32_PCIE(ixPCIE_P_CNTL, data);
- orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL);
- data &= ~(LC_L0S_INACTIVITY_MASK | LC_L1_INACTIVITY_MASK);
- data |= LC_PMI_TO_L1_DIS;
+ orig = data = RREG32_PCIE_PORT(ixPCIE_LC_CNTL);
+ data &= ~(PCIE_LC_CNTL__LC_L0S_INACTIVITY_MASK | PCIE_LC_CNTL__LC_L1_INACTIVITY_MASK);
+ data |= PCIE_LC_CNTL__LC_PMI_TO_L1_DIS_MASK;
if (!disable_l0s)
- data |= LC_L0S_INACTIVITY(7);
+ data |= (7 << PCIE_LC_CNTL__LC_L0S_INACTIVITY__SHIFT);
if (!disable_l1) {
- data |= LC_L1_INACTIVITY(7);
- data &= ~LC_PMI_TO_L1_DIS;
+ data |= (7 << PCIE_LC_CNTL__LC_L1_INACTIVITY__SHIFT);
+ data &= ~PCIE_LC_CNTL__LC_PMI_TO_L1_DIS_MASK;
if (orig != data)
- WREG32_PCIE_PORT(PCIE_LC_CNTL, data);
+ WREG32_PCIE_PORT(ixPCIE_LC_CNTL, data);
if (!disable_plloff_in_l1) {
bool clk_req_support;
- orig = data = si_pif_phy0_rreg(adev,PB0_PIF_PWRDOWN_0);
- data &= ~(PLL_POWER_STATE_IN_OFF_0_MASK | PLL_POWER_STATE_IN_TXS2_0_MASK);
- data |= PLL_POWER_STATE_IN_OFF_0(7) | PLL_POWER_STATE_IN_TXS2_0(7);
+ orig = data = si_pif_phy0_rreg(adev,ixPB0_PIF_PWRDOWN_0);
+ data &= ~(PB0_PIF_PWRDOWN_0__PLL_POWER_STATE_IN_OFF_0_MASK | PB0_PIF_PWRDOWN_0__PLL_POWER_STATE_IN_TXS2_0_MASK);
+ data |= (7 << PB0_PIF_PWRDOWN_0__PLL_POWER_STATE_IN_OFF_0__SHIFT) | (7 << PB0_PIF_PWRDOWN_0__PLL_POWER_STATE_IN_TXS2_0__SHIFT);
if (orig != data)
- si_pif_phy0_wreg(adev,PB0_PIF_PWRDOWN_0, data);
+ si_pif_phy0_wreg(adev,ixPB0_PIF_PWRDOWN_0, data);
- orig = data = si_pif_phy0_rreg(adev,PB0_PIF_PWRDOWN_1);
- data &= ~(PLL_POWER_STATE_IN_OFF_1_MASK | PLL_POWER_STATE_IN_TXS2_1_MASK);
- data |= PLL_POWER_STATE_IN_OFF_1(7) | PLL_POWER_STATE_IN_TXS2_1(7);
+ orig = data = si_pif_phy0_rreg(adev,ixPB0_PIF_PWRDOWN_1);
+ data &= ~(PB0_PIF_PWRDOWN_1__PLL_POWER_STATE_IN_OFF_1_MASK | PB0_PIF_PWRDOWN_1__PLL_POWER_STATE_IN_TXS2_1_MASK);
+ data |= (7 << PB0_PIF_PWRDOWN_1__PLL_POWER_STATE_IN_OFF_1__SHIFT) | (7 << PB0_PIF_PWRDOWN_1__PLL_POWER_STATE_IN_TXS2_1__SHIFT);
if (orig != data)
- si_pif_phy0_wreg(adev,PB0_PIF_PWRDOWN_1, data);
+ si_pif_phy0_wreg(adev,ixPB0_PIF_PWRDOWN_1, data);
- orig = data = si_pif_phy1_rreg(adev,PB1_PIF_PWRDOWN_0);
- data &= ~(PLL_POWER_STATE_IN_OFF_0_MASK | PLL_POWER_STATE_IN_TXS2_0_MASK);
- data |= PLL_POWER_STATE_IN_OFF_0(7) | PLL_POWER_STATE_IN_TXS2_0(7);
+ orig = data = si_pif_phy1_rreg(adev,ixPB1_PIF_PWRDOWN_0);
+ data &= ~(PB1_PIF_PWRDOWN_0__PLL_POWER_STATE_IN_OFF_0_MASK | PB1_PIF_PWRDOWN_0__PLL_POWER_STATE_IN_TXS2_0_MASK);
+ data |= (7 << PB1_PIF_PWRDOWN_0__PLL_POWER_STATE_IN_OFF_0__SHIFT) | (7 << PB1_PIF_PWRDOWN_0__PLL_POWER_STATE_IN_TXS2_0__SHIFT);
if (orig != data)
- si_pif_phy1_wreg(adev,PB1_PIF_PWRDOWN_0, data);
+ si_pif_phy1_wreg(adev,ixPB1_PIF_PWRDOWN_0, data);
- orig = data = si_pif_phy1_rreg(adev,PB1_PIF_PWRDOWN_1);
- data &= ~(PLL_POWER_STATE_IN_OFF_1_MASK | PLL_POWER_STATE_IN_TXS2_1_MASK);
- data |= PLL_POWER_STATE_IN_OFF_1(7) | PLL_POWER_STATE_IN_TXS2_1(7);
+ orig = data = si_pif_phy1_rreg(adev,ixPB1_PIF_PWRDOWN_1);
+ data &= ~(PB1_PIF_PWRDOWN_1__PLL_POWER_STATE_IN_OFF_1_MASK | PB1_PIF_PWRDOWN_1__PLL_POWER_STATE_IN_TXS2_1_MASK);
+ data |= (7 << PB1_PIF_PWRDOWN_1__PLL_POWER_STATE_IN_OFF_1__SHIFT) | (7 << PB1_PIF_PWRDOWN_1__PLL_POWER_STATE_IN_TXS2_1__SHIFT);
if (orig != data)
- si_pif_phy1_wreg(adev,PB1_PIF_PWRDOWN_1, data);
+ si_pif_phy1_wreg(adev,ixPB1_PIF_PWRDOWN_1, data);
if ((adev->asic_type != CHIP_OLAND) && (adev->asic_type != CHIP_HAINAN)) {
- orig = data = si_pif_phy0_rreg(adev,PB0_PIF_PWRDOWN_0);
- data &= ~PLL_RAMP_UP_TIME_0_MASK;
+ orig = data = si_pif_phy0_rreg(adev,ixPB0_PIF_PWRDOWN_0);
+ data &= ~PB0_PIF_PWRDOWN_0__PLL_RAMP_UP_TIME_0_MASK;
if (orig != data)
- si_pif_phy0_wreg(adev,PB0_PIF_PWRDOWN_0, data);
+ si_pif_phy0_wreg(adev,ixPB0_PIF_PWRDOWN_0, data);
- orig = data = si_pif_phy0_rreg(adev,PB0_PIF_PWRDOWN_1);
- data &= ~PLL_RAMP_UP_TIME_1_MASK;
+ orig = data = si_pif_phy0_rreg(adev,ixPB0_PIF_PWRDOWN_1);
+ data &= ~PB0_PIF_PWRDOWN_1__PLL_RAMP_UP_TIME_1_MASK;
if (orig != data)
- si_pif_phy0_wreg(adev,PB0_PIF_PWRDOWN_1, data);
+ si_pif_phy0_wreg(adev,ixPB0_PIF_PWRDOWN_1, data);
- orig = data = si_pif_phy0_rreg(adev,PB0_PIF_PWRDOWN_2);
- data &= ~PLL_RAMP_UP_TIME_2_MASK;
+ orig = data = si_pif_phy0_rreg(adev,ixPB0_PIF_PWRDOWN_2);
+ data &= ~PB0_PIF_PWRDOWN_2__PLL_RAMP_UP_TIME_2_MASK;
if (orig != data)
- si_pif_phy0_wreg(adev,PB0_PIF_PWRDOWN_2, data);
+ si_pif_phy0_wreg(adev,ixPB0_PIF_PWRDOWN_2, data);
- orig = data = si_pif_phy0_rreg(adev,PB0_PIF_PWRDOWN_3);
- data &= ~PLL_RAMP_UP_TIME_3_MASK;
+ orig = data = si_pif_phy0_rreg(adev,ixPB0_PIF_PWRDOWN_3);
+ data &= ~PB0_PIF_PWRDOWN_3__PLL_RAMP_UP_TIME_3_MASK;
if (orig != data)
- si_pif_phy0_wreg(adev,PB0_PIF_PWRDOWN_3, data);
+ si_pif_phy0_wreg(adev,ixPB0_PIF_PWRDOWN_3, data);
- orig = data = si_pif_phy1_rreg(adev,PB1_PIF_PWRDOWN_0);
- data &= ~PLL_RAMP_UP_TIME_0_MASK;
+ orig = data = si_pif_phy1_rreg(adev,ixPB1_PIF_PWRDOWN_0);
+ data &= ~PB1_PIF_PWRDOWN_0__PLL_RAMP_UP_TIME_0_MASK;
if (orig != data)
- si_pif_phy1_wreg(adev,PB1_PIF_PWRDOWN_0, data);
+ si_pif_phy1_wreg(adev,ixPB1_PIF_PWRDOWN_0, data);
- orig = data = si_pif_phy1_rreg(adev,PB1_PIF_PWRDOWN_1);
- data &= ~PLL_RAMP_UP_TIME_1_MASK;
+ orig = data = si_pif_phy1_rreg(adev,ixPB1_PIF_PWRDOWN_1);
+ data &= ~PB1_PIF_PWRDOWN_1__PLL_RAMP_UP_TIME_1_MASK;
if (orig != data)
- si_pif_phy1_wreg(adev,PB1_PIF_PWRDOWN_1, data);
+ si_pif_phy1_wreg(adev,ixPB1_PIF_PWRDOWN_1, data);
- orig = data = si_pif_phy1_rreg(adev,PB1_PIF_PWRDOWN_2);
- data &= ~PLL_RAMP_UP_TIME_2_MASK;
+ orig = data = si_pif_phy1_rreg(adev,ixPB1_PIF_PWRDOWN_2);
+ data &= ~PB1_PIF_PWRDOWN_2__PLL_RAMP_UP_TIME_2_MASK;
if (orig != data)
- si_pif_phy1_wreg(adev,PB1_PIF_PWRDOWN_2, data);
+ si_pif_phy1_wreg(adev,ixPB1_PIF_PWRDOWN_2, data);
- orig = data = si_pif_phy1_rreg(adev,PB1_PIF_PWRDOWN_3);
- data &= ~PLL_RAMP_UP_TIME_3_MASK;
+ orig = data = si_pif_phy1_rreg(adev,ixPB1_PIF_PWRDOWN_3);
+ data &= ~PB1_PIF_PWRDOWN_3__PLL_RAMP_UP_TIME_3_MASK;
if (orig != data)
- si_pif_phy1_wreg(adev,PB1_PIF_PWRDOWN_3, data);
+ si_pif_phy1_wreg(adev,ixPB1_PIF_PWRDOWN_3, data);
}
- orig = data = RREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL);
- data &= ~LC_DYN_LANES_PWR_STATE_MASK;
- data |= LC_DYN_LANES_PWR_STATE(3);
+ orig = data = RREG32_PCIE_PORT(ixPCIE_LC_LINK_WIDTH_CNTL);
+ data &= ~PCIE_LC_LINK_WIDTH_CNTL__LC_DYN_LANES_PWR_STATE_MASK;
+ data |= (3 << PCIE_LC_LINK_WIDTH_CNTL__LC_DYN_LANES_PWR_STATE__SHIFT);
if (orig != data)
- WREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL, data);
+ WREG32_PCIE_PORT(ixPCIE_LC_LINK_WIDTH_CNTL, data);
- orig = data = si_pif_phy0_rreg(adev,PB0_PIF_CNTL);
- data &= ~LS2_EXIT_TIME_MASK;
+ orig = data = si_pif_phy0_rreg(adev,ixPB0_PIF_CNTL);
+ data &= ~PB0_PIF_CNTL__LS2_EXIT_TIME_MASK;
if ((adev->asic_type == CHIP_OLAND) || (adev->asic_type == CHIP_HAINAN))
- data |= LS2_EXIT_TIME(5);
+ data |= (5 << PB0_PIF_CNTL__LS2_EXIT_TIME__SHIFT);
if (orig != data)
- si_pif_phy0_wreg(adev,PB0_PIF_CNTL, data);
+ si_pif_phy0_wreg(adev,ixPB0_PIF_CNTL, data);
- orig = data = si_pif_phy1_rreg(adev,PB1_PIF_CNTL);
- data &= ~LS2_EXIT_TIME_MASK;
+ orig = data = si_pif_phy1_rreg(adev,ixPB1_PIF_CNTL);
+ data &= ~PB1_PIF_CNTL__LS2_EXIT_TIME_MASK;
if ((adev->asic_type == CHIP_OLAND) || (adev->asic_type == CHIP_HAINAN))
- data |= LS2_EXIT_TIME(5);
+ data |= (5 << PB1_PIF_CNTL__LS2_EXIT_TIME__SHIFT);
if (orig != data)
- si_pif_phy1_wreg(adev,PB1_PIF_CNTL, data);
+ si_pif_phy1_wreg(adev,ixPB1_PIF_CNTL, data);
if (!disable_clkreq &&
!pci_is_root_bus(adev->pdev->bus)) {
@@ -2477,64 +2564,64 @@ static void si_program_aspm(struct amdgpu_device *adev)
}
if (clk_req_support) {
- orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL2);
- data |= LC_ALLOW_PDWN_IN_L1 | LC_ALLOW_PDWN_IN_L23;
+ orig = data = RREG32_PCIE_PORT(ixPCIE_LC_CNTL2);
+ data |= PCIE_LC_CNTL2__LC_ALLOW_PDWN_IN_L1_MASK | PCIE_LC_CNTL2__LC_ALLOW_PDWN_IN_L23_MASK;
if (orig != data)
- WREG32_PCIE_PORT(PCIE_LC_CNTL2, data);
+ WREG32_PCIE_PORT(ixPCIE_LC_CNTL2, data);
- orig = data = RREG32(THM_CLK_CNTL);
- data &= ~(CMON_CLK_SEL_MASK | TMON_CLK_SEL_MASK);
- data |= CMON_CLK_SEL(1) | TMON_CLK_SEL(1);
+ orig = data = RREG32(mmTHM_CLK_CNTL);
+ data &= ~(THM_CLK_CNTL__CMON_CLK_SEL_MASK | THM_CLK_CNTL__TMON_CLK_SEL_MASK);
+ data |= (1 << THM_CLK_CNTL__CMON_CLK_SEL__SHIFT) | (1 << THM_CLK_CNTL__TMON_CLK_SEL__SHIFT);
if (orig != data)
- WREG32(THM_CLK_CNTL, data);
+ WREG32(mmTHM_CLK_CNTL, data);
- orig = data = RREG32(MISC_CLK_CNTL);
- data &= ~(DEEP_SLEEP_CLK_SEL_MASK | ZCLK_SEL_MASK);
- data |= DEEP_SLEEP_CLK_SEL(1) | ZCLK_SEL(1);
+ orig = data = RREG32(mmMISC_CLK_CNTL);
+ data &= ~(MISC_CLK_CNTL__DEEP_SLEEP_CLK_SEL_MASK | MISC_CLK_CNTL__ZCLK_SEL_MASK);
+ data |= (1 << MISC_CLK_CNTL__DEEP_SLEEP_CLK_SEL__SHIFT) | (1 << MISC_CLK_CNTL__ZCLK_SEL__SHIFT);
if (orig != data)
- WREG32(MISC_CLK_CNTL, data);
+ WREG32(mmMISC_CLK_CNTL, data);
- orig = data = RREG32(CG_CLKPIN_CNTL);
- data &= ~BCLK_AS_XCLK;
+ orig = data = RREG32(mmCG_CLKPIN_CNTL);
+ data &= ~CG_CLKPIN_CNTL__BCLK_AS_XCLK_MASK;
if (orig != data)
- WREG32(CG_CLKPIN_CNTL, data);
+ WREG32(mmCG_CLKPIN_CNTL, data);
- orig = data = RREG32(CG_CLKPIN_CNTL_2);
- data &= ~FORCE_BIF_REFCLK_EN;
+ orig = data = RREG32(mmCG_CLKPIN_CNTL_2);
+ data &= ~CG_CLKPIN_CNTL_2__FORCE_BIF_REFCLK_EN_MASK;
if (orig != data)
- WREG32(CG_CLKPIN_CNTL_2, data);
+ WREG32(mmCG_CLKPIN_CNTL_2, data);
- orig = data = RREG32(MPLL_BYPASSCLK_SEL);
- data &= ~MPLL_CLKOUT_SEL_MASK;
- data |= MPLL_CLKOUT_SEL(4);
+ orig = data = RREG32(mmMPLL_BYPASSCLK_SEL);
+ data &= ~MPLL_BYPASSCLK_SEL__MPLL_CLKOUT_SEL_MASK;
+ data |= 4 << MPLL_BYPASSCLK_SEL__MPLL_CLKOUT_SEL__SHIFT;
if (orig != data)
- WREG32(MPLL_BYPASSCLK_SEL, data);
+ WREG32(mmMPLL_BYPASSCLK_SEL, data);
- orig = data = RREG32(SPLL_CNTL_MODE);
- data &= ~SPLL_REFCLK_SEL_MASK;
+ orig = data = RREG32(mmSPLL_CNTL_MODE);
+ data &= ~SPLL_CNTL_MODE__SPLL_REFCLK_SEL_MASK;
if (orig != data)
- WREG32(SPLL_CNTL_MODE, data);
+ WREG32(mmSPLL_CNTL_MODE, data);
}
}
} else {
if (orig != data)
- WREG32_PCIE_PORT(PCIE_LC_CNTL, data);
+ WREG32_PCIE_PORT(ixPCIE_LC_CNTL, data);
}
- orig = data = RREG32_PCIE(PCIE_CNTL2);
- data |= SLV_MEM_LS_EN | MST_MEM_LS_EN | REPLAY_MEM_LS_EN;
+ orig = data = RREG32_PCIE(ixPCIE_CNTL2);
+ data |= PCIE_CNTL2__SLV_MEM_LS_EN_MASK | PCIE_CNTL2__MST_MEM_LS_EN_MASK | PCIE_CNTL2__REPLAY_MEM_LS_EN_MASK;
if (orig != data)
- WREG32_PCIE(PCIE_CNTL2, data);
+ WREG32_PCIE(ixPCIE_CNTL2, data);
if (!disable_l0s) {
- data = RREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL);
- if((data & LC_N_FTS_MASK) == LC_N_FTS_MASK) {
- data = RREG32_PCIE(PCIE_LC_STATUS1);
- if ((data & LC_REVERSE_XMIT) && (data & LC_REVERSE_RCVR)) {
- orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL);
- data &= ~LC_L0S_INACTIVITY_MASK;
+ data = RREG32_PCIE_PORT(ixPCIE_LC_N_FTS_CNTL);
+ if((data & PCIE_LC_N_FTS_CNTL__LC_N_FTS_MASK) == PCIE_LC_N_FTS_CNTL__LC_N_FTS_MASK) {
+ data = RREG32_PCIE(ixPCIE_LC_STATUS1);
+ if ((data & PCIE_LC_STATUS1__LC_REVERSE_XMIT_MASK) && (data & PCIE_LC_STATUS1__LC_REVERSE_RCVR_MASK)) {
+ orig = data = RREG32_PCIE_PORT(ixPCIE_LC_CNTL);
+ data &= ~PCIE_LC_CNTL__LC_L0S_INACTIVITY_MASK;
if (orig != data)
- WREG32_PCIE_PORT(PCIE_LC_CNTL, data);
+ WREG32_PCIE_PORT(ixPCIE_LC_CNTL, data);
}
}
}
@@ -2551,9 +2638,9 @@ static void si_fix_pci_max_read_req_size(struct amdgpu_device *adev)
pcie_set_readrq(adev->pdev, 512);
}
-static int si_common_hw_init(void *handle)
+static int si_common_hw_init(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
si_fix_pci_max_read_req_size(adev);
si_init_golden_registers(adev);
@@ -2563,47 +2650,28 @@ static int si_common_hw_init(void *handle)
return 0;
}
-static int si_common_hw_fini(void *handle)
+static int si_common_hw_fini(struct amdgpu_ip_block *ip_block)
{
return 0;
}
-static int si_common_suspend(void *handle)
-{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
-
- return si_common_hw_fini(adev);
-}
-
-static int si_common_resume(void *handle)
+static int si_common_resume(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
-
- return si_common_hw_init(adev);
+ return si_common_hw_init(ip_block);
}
-static bool si_common_is_idle(void *handle)
+static bool si_common_is_idle(struct amdgpu_ip_block *ip_block)
{
return true;
}
-static int si_common_wait_for_idle(void *handle)
-{
- return 0;
-}
-
-static int si_common_soft_reset(void *handle)
-{
- return 0;
-}
-
-static int si_common_set_clockgating_state(void *handle,
+static int si_common_set_clockgating_state(struct amdgpu_ip_block *ip_block,
enum amd_clockgating_state state)
{
return 0;
}
-static int si_common_set_powergating_state(void *handle,
+static int si_common_set_powergating_state(struct amdgpu_ip_block *ip_block,
enum amd_powergating_state state)
{
return 0;
@@ -2612,16 +2680,10 @@ static int si_common_set_powergating_state(void *handle,
static const struct amd_ip_funcs si_common_ip_funcs = {
.name = "si_common",
.early_init = si_common_early_init,
- .late_init = NULL,
- .sw_init = si_common_sw_init,
- .sw_fini = si_common_sw_fini,
.hw_init = si_common_hw_init,
.hw_fini = si_common_hw_fini,
- .suspend = si_common_suspend,
.resume = si_common_resume,
.is_idle = si_common_is_idle,
- .wait_for_idle = si_common_wait_for_idle,
- .soft_reset = si_common_soft_reset,
.set_clockgating_state = si_common_set_clockgating_state,
.set_powergating_state = si_common_set_powergating_state,
};
@@ -2648,7 +2710,7 @@ int si_set_ip_blocks(struct amdgpu_device *adev)
amdgpu_device_ip_block_add(adev, &si_dma_ip_block);
amdgpu_device_ip_block_add(adev, &si_smu_ip_block);
if (adev->enable_virtual_display)
- amdgpu_device_ip_block_add(adev, &dce_virtual_ip_block);
+ amdgpu_device_ip_block_add(adev, &amdgpu_vkms_ip_block);
#if defined(CONFIG_DRM_AMD_DC) && defined(CONFIG_DRM_AMD_DC_SI)
else if (amdgpu_device_has_dc_support(adev))
amdgpu_device_ip_block_add(adev, &dm_ip_block);
@@ -2656,7 +2718,7 @@ int si_set_ip_blocks(struct amdgpu_device *adev)
else
amdgpu_device_ip_block_add(adev, &dce_v6_0_ip_block);
amdgpu_device_ip_block_add(adev, &uvd_v3_1_ip_block);
- /* amdgpu_device_ip_block_add(adev, &vce_v1_0_ip_block); */
+ amdgpu_device_ip_block_add(adev, &vce_v1_0_ip_block);
break;
case CHIP_OLAND:
amdgpu_device_ip_block_add(adev, &si_common_ip_block);
@@ -2666,7 +2728,7 @@ int si_set_ip_blocks(struct amdgpu_device *adev)
amdgpu_device_ip_block_add(adev, &si_dma_ip_block);
amdgpu_device_ip_block_add(adev, &si_smu_ip_block);
if (adev->enable_virtual_display)
- amdgpu_device_ip_block_add(adev, &dce_virtual_ip_block);
+ amdgpu_device_ip_block_add(adev, &amdgpu_vkms_ip_block);
#if defined(CONFIG_DRM_AMD_DC) && defined(CONFIG_DRM_AMD_DC_SI)
else if (amdgpu_device_has_dc_support(adev))
amdgpu_device_ip_block_add(adev, &dm_ip_block);
@@ -2674,7 +2736,6 @@ int si_set_ip_blocks(struct amdgpu_device *adev)
else
amdgpu_device_ip_block_add(adev, &dce_v6_4_ip_block);
amdgpu_device_ip_block_add(adev, &uvd_v3_1_ip_block);
- /* amdgpu_device_ip_block_add(adev, &vce_v1_0_ip_block); */
break;
case CHIP_HAINAN:
amdgpu_device_ip_block_add(adev, &si_common_ip_block);
@@ -2684,7 +2745,7 @@ int si_set_ip_blocks(struct amdgpu_device *adev)
amdgpu_device_ip_block_add(adev, &si_dma_ip_block);
amdgpu_device_ip_block_add(adev, &si_smu_ip_block);
if (adev->enable_virtual_display)
- amdgpu_device_ip_block_add(adev, &dce_virtual_ip_block);
+ amdgpu_device_ip_block_add(adev, &amdgpu_vkms_ip_block);
break;
default:
BUG();
diff --git a/drivers/gpu/drm/amd/amdgpu/si_dma.c b/drivers/gpu/drm/amd/amdgpu/si_dma.c
index 488497ad5e0c..7f18e4875287 100644
--- a/drivers/gpu/drm/amd/amdgpu/si_dma.c
+++ b/drivers/gpu/drm/amd/amdgpu/si_dma.c
@@ -27,6 +27,8 @@
#include "si.h"
#include "sid.h"
+#include "oss/oss_1_0_d.h"
+#include "oss/oss_1_0_sh_mask.h"
const u32 sdma_offsets[SDMA_MAX_INSTANCE] =
{
DMA0_REGISTER_OFFSET,
@@ -38,17 +40,31 @@ static void si_dma_set_buffer_funcs(struct amdgpu_device *adev);
static void si_dma_set_vm_pte_funcs(struct amdgpu_device *adev);
static void si_dma_set_irq_funcs(struct amdgpu_device *adev);
+/**
+ * si_dma_ring_get_rptr - get the current read pointer
+ *
+ * @ring: amdgpu ring pointer
+ *
+ * Get the current rptr from the hardware (SI).
+ */
static uint64_t si_dma_ring_get_rptr(struct amdgpu_ring *ring)
{
- return ring->adev->wb.wb[ring->rptr_offs>>2];
+ return *ring->rptr_cpu_addr;
}
+/**
+ * si_dma_ring_get_wptr - get the current write pointer
+ *
+ * @ring: amdgpu ring pointer
+ *
+ * Get the current wptr from the hardware (SI).
+ */
static uint64_t si_dma_ring_get_wptr(struct amdgpu_ring *ring)
{
struct amdgpu_device *adev = ring->adev;
u32 me = (ring == &adev->sdma.instance[0].ring) ? 0 : 1;
- return (RREG32(DMA_RB_WPTR + sdma_offsets[me]) & 0x3fffc) >> 2;
+ return (RREG32(mmDMA_GFX_RB_WPTR + sdma_offsets[me]) & 0x3fffc) >> 2;
}
static void si_dma_ring_set_wptr(struct amdgpu_ring *ring)
@@ -56,8 +72,7 @@ static void si_dma_ring_set_wptr(struct amdgpu_ring *ring)
struct amdgpu_device *adev = ring->adev;
u32 me = (ring == &adev->sdma.instance[0].ring) ? 0 : 1;
- WREG32(DMA_RB_WPTR + sdma_offsets[me],
- (lower_32_bits(ring->wptr) << 2) & 0x3fffc);
+ WREG32(mmDMA_GFX_RB_WPTR + sdma_offsets[me], (ring->wptr << 2) & 0x3fffc);
}
static void si_dma_ring_emit_ib(struct amdgpu_ring *ring,
@@ -113,19 +128,14 @@ static void si_dma_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, u64 seq,
static void si_dma_stop(struct amdgpu_device *adev)
{
- struct amdgpu_ring *ring;
u32 rb_cntl;
unsigned i;
for (i = 0; i < adev->sdma.num_instances; i++) {
- ring = &adev->sdma.instance[i].ring;
/* dma0 */
- rb_cntl = RREG32(DMA_RB_CNTL + sdma_offsets[i]);
- rb_cntl &= ~DMA_RB_ENABLE;
- WREG32(DMA_RB_CNTL + sdma_offsets[i], rb_cntl);
-
- if (adev->mman.buffer_funcs_ring == ring)
- amdgpu_ttm_set_buffer_funcs_status(adev, false);
+ rb_cntl = RREG32(mmDMA_GFX_RB_CNTL + sdma_offsets[i]);
+ rb_cntl &= ~DMA_GFX_RB_CNTL__RB_ENABLE_MASK;
+ WREG32(mmDMA_GFX_RB_CNTL + sdma_offsets[i], rb_cntl);
}
}
@@ -139,53 +149,48 @@ static int si_dma_start(struct amdgpu_device *adev)
for (i = 0; i < adev->sdma.num_instances; i++) {
ring = &adev->sdma.instance[i].ring;
- WREG32(DMA_SEM_INCOMPLETE_TIMER_CNTL + sdma_offsets[i], 0);
- WREG32(DMA_SEM_WAIT_FAIL_TIMER_CNTL + sdma_offsets[i], 0);
+ WREG32(mmDMA_SEM_INCOMPLETE_TIMER_CNTL + sdma_offsets[i], 0);
+ WREG32(mmDMA_SEM_WAIT_FAIL_TIMER_CNTL + sdma_offsets[i], 0);
/* Set ring buffer size in dwords */
rb_bufsz = order_base_2(ring->ring_size / 4);
rb_cntl = rb_bufsz << 1;
#ifdef __BIG_ENDIAN
- rb_cntl |= DMA_RB_SWAP_ENABLE | DMA_RPTR_WRITEBACK_SWAP_ENABLE;
+ rb_cntl |= DMA_GFX_RB_CNTL__RB_SWAP_ENABLE_MASK | DMA_GFX_RB_CNTL__RPTR_WRITEBACK_SWAP_ENABLE_MASK;
#endif
- WREG32(DMA_RB_CNTL + sdma_offsets[i], rb_cntl);
+ WREG32(mmDMA_GFX_RB_CNTL + sdma_offsets[i], rb_cntl);
/* Initialize the ring buffer's read and write pointers */
- WREG32(DMA_RB_RPTR + sdma_offsets[i], 0);
- WREG32(DMA_RB_WPTR + sdma_offsets[i], 0);
+ WREG32(mmDMA_GFX_RB_RPTR + sdma_offsets[i], 0);
+ WREG32(mmDMA_GFX_RB_WPTR + sdma_offsets[i], 0);
- rptr_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4);
+ rptr_addr = ring->rptr_gpu_addr;
- WREG32(DMA_RB_RPTR_ADDR_LO + sdma_offsets[i], lower_32_bits(rptr_addr));
- WREG32(DMA_RB_RPTR_ADDR_HI + sdma_offsets[i], upper_32_bits(rptr_addr) & 0xFF);
+ WREG32(mmDMA_GFX_RB_RPTR_ADDR_LO + sdma_offsets[i], lower_32_bits(rptr_addr));
+ WREG32(mmDMA_GFX_RB_RPTR_ADDR_HI + sdma_offsets[i], upper_32_bits(rptr_addr) & 0xFF);
- rb_cntl |= DMA_RPTR_WRITEBACK_ENABLE;
+ rb_cntl |= DMA_GFX_RB_CNTL__RPTR_WRITEBACK_ENABLE_MASK;
- WREG32(DMA_RB_BASE + sdma_offsets[i], ring->gpu_addr >> 8);
+ WREG32(mmDMA_GFX_RB_BASE + sdma_offsets[i], ring->gpu_addr >> 8);
/* enable DMA IBs */
- ib_cntl = DMA_IB_ENABLE | CMD_VMID_FORCE;
+ ib_cntl = DMA_GFX_IB_CNTL__IB_ENABLE_MASK | DMA_GFX_IB_CNTL__CMD_VMID_FORCE_MASK;
#ifdef __BIG_ENDIAN
- ib_cntl |= DMA_IB_SWAP_ENABLE;
+ ib_cntl |= DMA_GFX_IB_CNTL__IB_SWAP_ENABLE_MASK;
#endif
- WREG32(DMA_IB_CNTL + sdma_offsets[i], ib_cntl);
+ WREG32(mmDMA_GFX_IB_CNTL + sdma_offsets[i], ib_cntl);
- dma_cntl = RREG32(DMA_CNTL + sdma_offsets[i]);
- dma_cntl &= ~CTXEMPTY_INT_ENABLE;
- WREG32(DMA_CNTL + sdma_offsets[i], dma_cntl);
+ dma_cntl = RREG32(mmDMA_CNTL + sdma_offsets[i]);
+ dma_cntl &= ~DMA_CNTL__CTXEMPTY_INT_ENABLE_MASK;
+ WREG32(mmDMA_CNTL + sdma_offsets[i], dma_cntl);
ring->wptr = 0;
- WREG32(DMA_RB_WPTR + sdma_offsets[i], lower_32_bits(ring->wptr) << 2);
- WREG32(DMA_RB_CNTL + sdma_offsets[i], rb_cntl | DMA_RB_ENABLE);
-
- ring->sched.ready = true;
+ WREG32(mmDMA_GFX_RB_WPTR + sdma_offsets[i], ring->wptr << 2);
+ WREG32(mmDMA_GFX_RB_CNTL + sdma_offsets[i], rb_cntl | DMA_GFX_RB_CNTL__RB_ENABLE_MASK);
r = amdgpu_ring_test_helper(ring);
if (r)
return r;
-
- if (adev->mman.buffer_funcs_ring == ring)
- amdgpu_ttm_set_buffer_funcs_status(adev, true);
}
return 0;
@@ -297,7 +302,7 @@ static int si_dma_ring_test_ib(struct amdgpu_ring *ring, long timeout)
r = -EINVAL;
err1:
- amdgpu_ib_free(adev, &ib, NULL);
+ amdgpu_ib_free(&ib, NULL);
dma_fence_put(f);
err0:
amdgpu_device_wb_free(adev, index);
@@ -305,7 +310,7 @@ err0:
}
/**
- * cik_dma_vm_copy_pte - update PTEs by copying them from the GART
+ * si_dma_vm_copy_pte - update PTEs by copying them from the GART
*
* @ib: indirect buffer to fill with commands
* @pe: addr of the page entry
@@ -402,7 +407,7 @@ static void si_dma_vm_set_pte_pde(struct amdgpu_ib *ib,
}
/**
- * si_dma_pad_ib - pad the IB to the required number of dw
+ * si_dma_ring_pad_ib - pad the IB to the required number of dw
*
* @ring: amdgpu_ring pointer
* @ib: indirect buffer to fill with padding
@@ -415,7 +420,7 @@ static void si_dma_ring_pad_ib(struct amdgpu_ring *ring, struct amdgpu_ib *ib)
}
/**
- * cik_sdma_ring_emit_pipeline_sync - sync the pipeline
+ * si_dma_ring_emit_pipeline_sync - sync the pipeline
*
* @ring: amdgpu_ring pointer
*
@@ -468,11 +473,11 @@ static void si_dma_ring_emit_wreg(struct amdgpu_ring *ring,
amdgpu_ring_write(ring, val);
}
-static int si_dma_early_init(void *handle)
+static int si_dma_early_init(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
- adev->sdma.num_instances = 2;
+ adev->sdma.num_instances = SDMA_MAX_INSTANCE;
si_dma_set_ring_funcs(adev);
si_dma_set_buffer_funcs(adev);
@@ -482,11 +487,11 @@ static int si_dma_early_init(void *handle)
return 0;
}
-static int si_dma_sw_init(void *handle)
+static int si_dma_sw_init(struct amdgpu_ip_block *ip_block)
{
struct amdgpu_ring *ring;
int r, i;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
/* DMA0 trap event */
r = amdgpu_irq_add_id(adev, AMDGPU_IRQ_CLIENTID_LEGACY, 224,
@@ -507,10 +512,9 @@ static int si_dma_sw_init(void *handle)
sprintf(ring->name, "sdma%d", i);
r = amdgpu_ring_init(adev, ring, 1024,
&adev->sdma.trap_irq,
- (i == 0) ?
- AMDGPU_SDMA_IRQ_INSTANCE0 :
+ (i == 0) ? AMDGPU_SDMA_IRQ_INSTANCE0 :
AMDGPU_SDMA_IRQ_INSTANCE1,
- AMDGPU_RING_PRIO_DEFAULT);
+ AMDGPU_RING_PRIO_DEFAULT, NULL);
if (r)
return r;
}
@@ -518,9 +522,9 @@ static int si_dma_sw_init(void *handle)
return r;
}
-static int si_dma_sw_fini(void *handle)
+static int si_dma_sw_fini(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
int i;
for (i = 0; i < adev->sdma.num_instances; i++)
@@ -529,61 +533,56 @@ static int si_dma_sw_fini(void *handle)
return 0;
}
-static int si_dma_hw_init(void *handle)
+static int si_dma_hw_init(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
return si_dma_start(adev);
}
-static int si_dma_hw_fini(void *handle)
+static int si_dma_hw_fini(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
-
- si_dma_stop(adev);
+ si_dma_stop(ip_block->adev);
return 0;
}
-static int si_dma_suspend(void *handle)
+static int si_dma_suspend(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
-
- return si_dma_hw_fini(adev);
+ return si_dma_hw_fini(ip_block);
}
-static int si_dma_resume(void *handle)
+static int si_dma_resume(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
-
- return si_dma_hw_init(adev);
+ return si_dma_hw_init(ip_block);
}
-static bool si_dma_is_idle(void *handle)
+static bool si_dma_is_idle(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
- u32 tmp = RREG32(SRBM_STATUS2);
+ struct amdgpu_device *adev = ip_block->adev;
+
+ u32 tmp = RREG32(mmSRBM_STATUS2);
- if (tmp & (DMA_BUSY_MASK | DMA1_BUSY_MASK))
+ if (tmp & (SRBM_STATUS2__DMA_BUSY_MASK | SRBM_STATUS2__DMA1_BUSY_MASK))
return false;
return true;
}
-static int si_dma_wait_for_idle(void *handle)
+static int si_dma_wait_for_idle(struct amdgpu_ip_block *ip_block)
{
unsigned i;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
for (i = 0; i < adev->usec_timeout; i++) {
- if (si_dma_is_idle(handle))
+ if (si_dma_is_idle(ip_block))
return 0;
udelay(1);
}
return -ETIMEDOUT;
}
-static int si_dma_soft_reset(void *handle)
+static int si_dma_soft_reset(struct amdgpu_ip_block *ip_block)
{
DRM_INFO("si_dma_soft_reset --- not implemented !!!!!!!\n");
return 0;
@@ -600,14 +599,14 @@ static int si_dma_set_trap_irq_state(struct amdgpu_device *adev,
case AMDGPU_SDMA_IRQ_INSTANCE0:
switch (state) {
case AMDGPU_IRQ_STATE_DISABLE:
- sdma_cntl = RREG32(DMA_CNTL + DMA0_REGISTER_OFFSET);
- sdma_cntl &= ~TRAP_ENABLE;
- WREG32(DMA_CNTL + DMA0_REGISTER_OFFSET, sdma_cntl);
+ sdma_cntl = RREG32(mmDMA_CNTL + DMA0_REGISTER_OFFSET);
+ sdma_cntl &= ~DMA_CNTL__TRAP_ENABLE_MASK;
+ WREG32(mmDMA_CNTL + DMA0_REGISTER_OFFSET, sdma_cntl);
break;
case AMDGPU_IRQ_STATE_ENABLE:
- sdma_cntl = RREG32(DMA_CNTL + DMA0_REGISTER_OFFSET);
- sdma_cntl |= TRAP_ENABLE;
- WREG32(DMA_CNTL + DMA0_REGISTER_OFFSET, sdma_cntl);
+ sdma_cntl = RREG32(mmDMA_CNTL + DMA0_REGISTER_OFFSET);
+ sdma_cntl |= DMA_CNTL__TRAP_ENABLE_MASK;
+ WREG32(mmDMA_CNTL + DMA0_REGISTER_OFFSET, sdma_cntl);
break;
default:
break;
@@ -616,14 +615,14 @@ static int si_dma_set_trap_irq_state(struct amdgpu_device *adev,
case AMDGPU_SDMA_IRQ_INSTANCE1:
switch (state) {
case AMDGPU_IRQ_STATE_DISABLE:
- sdma_cntl = RREG32(DMA_CNTL + DMA1_REGISTER_OFFSET);
- sdma_cntl &= ~TRAP_ENABLE;
- WREG32(DMA_CNTL + DMA1_REGISTER_OFFSET, sdma_cntl);
+ sdma_cntl = RREG32(mmDMA_CNTL + DMA1_REGISTER_OFFSET);
+ sdma_cntl &= ~DMA_CNTL__TRAP_ENABLE_MASK;
+ WREG32(mmDMA_CNTL + DMA1_REGISTER_OFFSET, sdma_cntl);
break;
case AMDGPU_IRQ_STATE_ENABLE:
- sdma_cntl = RREG32(DMA_CNTL + DMA1_REGISTER_OFFSET);
- sdma_cntl |= TRAP_ENABLE;
- WREG32(DMA_CNTL + DMA1_REGISTER_OFFSET, sdma_cntl);
+ sdma_cntl = RREG32(mmDMA_CNTL + DMA1_REGISTER_OFFSET);
+ sdma_cntl |= DMA_CNTL__TRAP_ENABLE_MASK;
+ WREG32(mmDMA_CNTL + DMA1_REGISTER_OFFSET, sdma_cntl);
break;
default:
break;
@@ -646,13 +645,13 @@ static int si_dma_process_trap_irq(struct amdgpu_device *adev,
return 0;
}
-static int si_dma_set_clockgating_state(void *handle,
+static int si_dma_set_clockgating_state(struct amdgpu_ip_block *ip_block,
enum amd_clockgating_state state)
{
u32 orig, data, offset;
int i;
bool enable;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
enable = (state == AMD_CG_STATE_GATE);
@@ -662,11 +661,11 @@ static int si_dma_set_clockgating_state(void *handle,
offset = DMA0_REGISTER_OFFSET;
else
offset = DMA1_REGISTER_OFFSET;
- orig = data = RREG32(DMA_POWER_CNTL + offset);
- data &= ~MEM_POWER_OVERRIDE;
+ orig = data = RREG32(mmDMA_POWER_CNTL + offset);
+ data &= ~DMA_POWER_CNTL__MEM_POWER_OVERRIDE_MASK;
if (data != orig)
- WREG32(DMA_POWER_CNTL + offset, data);
- WREG32(DMA_CLK_CTRL + offset, 0x00000100);
+ WREG32(mmDMA_POWER_CNTL + offset, data);
+ WREG32(mmDMA_CLK_CTRL + offset, 0x00000100);
}
} else {
for (i = 0; i < adev->sdma.num_instances; i++) {
@@ -674,33 +673,33 @@ static int si_dma_set_clockgating_state(void *handle,
offset = DMA0_REGISTER_OFFSET;
else
offset = DMA1_REGISTER_OFFSET;
- orig = data = RREG32(DMA_POWER_CNTL + offset);
- data |= MEM_POWER_OVERRIDE;
+ orig = data = RREG32(mmDMA_POWER_CNTL + offset);
+ data |= DMA_POWER_CNTL__MEM_POWER_OVERRIDE_MASK;
if (data != orig)
- WREG32(DMA_POWER_CNTL + offset, data);
+ WREG32(mmDMA_POWER_CNTL + offset, data);
- orig = data = RREG32(DMA_CLK_CTRL + offset);
+ orig = data = RREG32(mmDMA_CLK_CTRL + offset);
data = 0xff000000;
if (data != orig)
- WREG32(DMA_CLK_CTRL + offset, data);
+ WREG32(mmDMA_CLK_CTRL + offset, data);
}
}
return 0;
}
-static int si_dma_set_powergating_state(void *handle,
+static int si_dma_set_powergating_state(struct amdgpu_ip_block *ip_block,
enum amd_powergating_state state)
{
u32 tmp;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
- WREG32(DMA_PGFSM_WRITE, 0x00002000);
- WREG32(DMA_PGFSM_CONFIG, 0x100010ff);
+ WREG32(mmDMA_PGFSM_WRITE, 0x00002000);
+ WREG32(mmDMA_PGFSM_CONFIG, 0x100010ff);
for (tmp = 0; tmp < 5; tmp++)
- WREG32(DMA_PGFSM_WRITE, 0);
+ WREG32(mmDMA_PGFSM_WRITE, 0);
return 0;
}
@@ -708,7 +707,6 @@ static int si_dma_set_powergating_state(void *handle,
static const struct amd_ip_funcs si_dma_ip_funcs = {
.name = "si_dma",
.early_init = si_dma_early_init,
- .late_init = NULL,
.sw_init = si_dma_sw_init,
.sw_fini = si_dma_sw_fini,
.hw_init = si_dma_hw_init,
@@ -773,7 +771,7 @@ static void si_dma_set_irq_funcs(struct amdgpu_device *adev)
* @src_offset: src GPU address
* @dst_offset: dst GPU address
* @byte_count: number of bytes to xfer
- * @tmz: is this a secure operation
+ * @copy_flags: unused
*
* Copy GPU buffers using the DMA engine (VI).
* Used by the amdgpu ttm implementation to move pages if
@@ -783,7 +781,7 @@ static void si_dma_emit_copy_buffer(struct amdgpu_ib *ib,
uint64_t src_offset,
uint64_t dst_offset,
uint32_t byte_count,
- bool tmz)
+ uint32_t copy_flags)
{
ib->ptr[ib->length_dw++] = DMA_PACKET(DMA_PACKET_COPY,
1, 0, 0, byte_count);
diff --git a/drivers/gpu/drm/amd/amdgpu/si_enums.h b/drivers/gpu/drm/amd/amdgpu/si_enums.h
index 4e935baa7b91..6da65778292b 100644
--- a/drivers/gpu/drm/amd/amdgpu/si_enums.h
+++ b/drivers/gpu/drm/amd/amdgpu/si_enums.h
@@ -23,123 +23,15 @@
#ifndef SI_ENUMS_H
#define SI_ENUMS_H
-#define VBLANK_INT_MASK (1 << 0)
-#define DC_HPDx_INT_EN (1 << 16)
-#define VBLANK_ACK (1 << 4)
-#define VLINE_ACK (1 << 4)
-
-#define CURSOR_WIDTH 64
-#define CURSOR_HEIGHT 64
-
-#define VGA_VSTATUS_CNTL 0xFFFCFFFF
#define PRIORITY_MARK_MASK 0x7fff
#define PRIORITY_OFF (1 << 16)
#define PRIORITY_ALWAYS_ON (1 << 20)
-#define INTERLEAVE_EN (1 << 0)
-
-#define LATENCY_WATERMARK_MASK(x) ((x) << 16)
-#define DC_LB_MEMORY_CONFIG(x) ((x) << 20)
-#define ICON_DEGAMMA_MODE(x) (((x) & 0x3) << 8)
-
-#define GRPH_ENDIAN_SWAP(x) (((x) & 0x3) << 0)
-#define GRPH_ENDIAN_NONE 0
-#define GRPH_ENDIAN_8IN16 1
-#define GRPH_ENDIAN_8IN32 2
-#define GRPH_ENDIAN_8IN64 3
-#define GRPH_RED_CROSSBAR(x) (((x) & 0x3) << 4)
-#define GRPH_RED_SEL_R 0
-#define GRPH_RED_SEL_G 1
-#define GRPH_RED_SEL_B 2
-#define GRPH_RED_SEL_A 3
-#define GRPH_GREEN_CROSSBAR(x) (((x) & 0x3) << 6)
-#define GRPH_GREEN_SEL_G 0
-#define GRPH_GREEN_SEL_B 1
-#define GRPH_GREEN_SEL_A 2
-#define GRPH_GREEN_SEL_R 3
-#define GRPH_BLUE_CROSSBAR(x) (((x) & 0x3) << 8)
-#define GRPH_BLUE_SEL_B 0
-#define GRPH_BLUE_SEL_A 1
-#define GRPH_BLUE_SEL_R 2
-#define GRPH_BLUE_SEL_G 3
-#define GRPH_ALPHA_CROSSBAR(x) (((x) & 0x3) << 10)
-#define GRPH_ALPHA_SEL_A 0
-#define GRPH_ALPHA_SEL_R 1
-#define GRPH_ALPHA_SEL_G 2
-#define GRPH_ALPHA_SEL_B 3
-
-#define GRPH_DEPTH(x) (((x) & 0x3) << 0)
-#define GRPH_DEPTH_8BPP 0
-#define GRPH_DEPTH_16BPP 1
-#define GRPH_DEPTH_32BPP 2
-
-#define GRPH_FORMAT(x) (((x) & 0x7) << 8)
-#define GRPH_FORMAT_INDEXED 0
-#define GRPH_FORMAT_ARGB1555 0
-#define GRPH_FORMAT_ARGB565 1
-#define GRPH_FORMAT_ARGB4444 2
-#define GRPH_FORMAT_AI88 3
-#define GRPH_FORMAT_MONO16 4
-#define GRPH_FORMAT_BGRA5551 5
-#define GRPH_FORMAT_ARGB8888 0
-#define GRPH_FORMAT_ARGB2101010 1
-#define GRPH_FORMAT_32BPP_DIG 2
-#define GRPH_FORMAT_8B_ARGB2101010 3
-#define GRPH_FORMAT_BGRA1010102 4
-#define GRPH_FORMAT_8B_BGRA1010102 5
-#define GRPH_FORMAT_RGB111110 6
-#define GRPH_FORMAT_BGR101111 7
-
-#define GRPH_NUM_BANKS(x) (((x) & 0x3) << 2)
-#define GRPH_ARRAY_MODE(x) (((x) & 0x7) << 20)
-#define GRPH_ARRAY_LINEAR_GENERAL 0
-#define GRPH_ARRAY_LINEAR_ALIGNED 1
-#define GRPH_ARRAY_1D_TILED_THIN1 2
-#define GRPH_ARRAY_2D_TILED_THIN1 4
-#define GRPH_TILE_SPLIT(x) (((x) & 0x7) << 13)
-#define GRPH_BANK_WIDTH(x) (((x) & 0x3) << 6)
-#define GRPH_BANK_HEIGHT(x) (((x) & 0x3) << 11)
-#define GRPH_MACRO_TILE_ASPECT(x) (((x) & 0x3) << 18)
-#define GRPH_ARRAY_MODE(x) (((x) & 0x7) << 20)
-#define GRPH_PIPE_CONFIG(x) (((x) & 0x1f) << 24)
-
-#define CURSOR_EN (1 << 0)
-#define CURSOR_MODE(x) (((x) & 0x3) << 8)
-#define CURSOR_MONO 0
-#define CURSOR_24_1 1
-#define CURSOR_24_8_PRE_MULT 2
-#define CURSOR_24_8_UNPRE_MULT 3
-#define CURSOR_2X_MAGNIFY (1 << 16)
-#define CURSOR_FORCE_MC_ON (1 << 20)
-#define CURSOR_URGENT_CONTROL(x) (((x) & 0x7) << 24)
-#define CURSOR_URGENT_ALWAYS 0
-#define CURSOR_URGENT_1_8 1
-#define CURSOR_URGENT_1_4 2
-#define CURSOR_URGENT_3_8 3
-#define CURSOR_URGENT_1_2 4
-#define CURSOR_UPDATE_PENDING (1 << 0)
-#define CURSOR_UPDATE_TAKEN (1 << 1)
-#define CURSOR_UPDATE_LOCK (1 << 16)
-#define CURSOR_DISABLE_MULTIPLE_UPDATE (1 << 24)
-
-#define SI_CRTC0_REGISTER_OFFSET 0
-#define SI_CRTC1_REGISTER_OFFSET 0x300
-#define SI_CRTC2_REGISTER_OFFSET 0x2600
-#define SI_CRTC3_REGISTER_OFFSET 0x2900
-#define SI_CRTC4_REGISTER_OFFSET 0x2c00
-#define SI_CRTC5_REGISTER_OFFSET 0x2f00
-
-#define DMA0_REGISTER_OFFSET 0x000
-#define DMA1_REGISTER_OFFSET 0x200
-#define ES_AND_GS_AUTO 3
-#define RADEON_PACKET_TYPE3 3
-#define CE_PARTITION_BASE 3
-#define BUF_SWAP_32BIT (2 << 16)
#define GFX_POWER_STATUS (1 << 1)
#define GFX_CLOCK_STATUS (1 << 2)
#define GFX_LS_STATUS (1 << 3)
-#define RLC_BUSY_STATUS (1 << 0)
+#define RLC_BUSY_STATUS (1 << 0)
#define RLC_PUD(x) ((x) << 0)
#define RLC_PUD_MASK (0xff << 0)
#define RLC_PDD(x) ((x) << 8)
@@ -148,144 +40,8 @@
#define RLC_TTPD_MASK (0xff << 16)
#define RLC_MSD(x) ((x) << 24)
#define RLC_MSD_MASK (0xff << 24)
-#define WRITE_DATA_ENGINE_SEL(x) ((x) << 30)
-#define WRITE_DATA_DST_SEL(x) ((x) << 8)
-#define EVENT_TYPE(x) ((x) << 0)
-#define EVENT_INDEX(x) ((x) << 8)
-#define WAIT_REG_MEM_MEM_SPACE(x) ((x) << 4)
-#define WAIT_REG_MEM_FUNCTION(x) ((x) << 0)
-#define WAIT_REG_MEM_ENGINE(x) ((x) << 8)
-#define GFX6_NUM_GFX_RINGS 1
-#define GFX6_NUM_COMPUTE_RINGS 2
#define RLC_SAVE_AND_RESTORE_STARTING_OFFSET 0x90
#define RLC_CLEAR_STATE_DESCRIPTOR_OFFSET 0x3D
-#define TAHITI_GB_ADDR_CONFIG_GOLDEN 0x12011003
-#define VERDE_GB_ADDR_CONFIG_GOLDEN 0x02010002
-#define HAINAN_GB_ADDR_CONFIG_GOLDEN 0x02011003
-
-#define PACKET3(op, n) ((RADEON_PACKET_TYPE3 << 30) | \
- (((op) & 0xFF) << 8) | \
- ((n) & 0x3FFF) << 16)
-#define PACKET3_COMPUTE(op, n) (PACKET3(op, n) | 1 << 1)
-#define PACKET3_NOP 0x10
-#define PACKET3_SET_BASE 0x11
-#define PACKET3_BASE_INDEX(x) ((x) << 0)
-#define PACKET3_CLEAR_STATE 0x12
-#define PACKET3_INDEX_BUFFER_SIZE 0x13
-#define PACKET3_DISPATCH_DIRECT 0x15
-#define PACKET3_DISPATCH_INDIRECT 0x16
-#define PACKET3_ALLOC_GDS 0x1B
-#define PACKET3_WRITE_GDS_RAM 0x1C
-#define PACKET3_ATOMIC_GDS 0x1D
-#define PACKET3_ATOMIC 0x1E
-#define PACKET3_OCCLUSION_QUERY 0x1F
-#define PACKET3_SET_PREDICATION 0x20
-#define PACKET3_REG_RMW 0x21
-#define PACKET3_COND_EXEC 0x22
-#define PACKET3_PRED_EXEC 0x23
-#define PACKET3_DRAW_INDIRECT 0x24
-#define PACKET3_DRAW_INDEX_INDIRECT 0x25
-#define PACKET3_INDEX_BASE 0x26
-#define PACKET3_DRAW_INDEX_2 0x27
-#define PACKET3_CONTEXT_CONTROL 0x28
-#define PACKET3_INDEX_TYPE 0x2A
-#define PACKET3_DRAW_INDIRECT_MULTI 0x2C
-#define PACKET3_DRAW_INDEX_AUTO 0x2D
-#define PACKET3_DRAW_INDEX_IMMD 0x2E
-#define PACKET3_NUM_INSTANCES 0x2F
-#define PACKET3_DRAW_INDEX_MULTI_AUTO 0x30
-#define PACKET3_INDIRECT_BUFFER_CONST 0x31
-#define PACKET3_INDIRECT_BUFFER 0x3F
-#define PACKET3_STRMOUT_BUFFER_UPDATE 0x34
-#define PACKET3_DRAW_INDEX_OFFSET_2 0x35
-#define PACKET3_DRAW_INDEX_MULTI_ELEMENT 0x36
-#define PACKET3_WRITE_DATA 0x37
-#define PACKET3_DRAW_INDEX_INDIRECT_MULTI 0x38
-#define PACKET3_MEM_SEMAPHORE 0x39
-#define PACKET3_MPEG_INDEX 0x3A
-#define PACKET3_COPY_DW 0x3B
-#define PACKET3_WAIT_REG_MEM 0x3C
-#define PACKET3_MEM_WRITE 0x3D
-#define PACKET3_COPY_DATA 0x40
-#define PACKET3_CP_DMA 0x41
-# define PACKET3_CP_DMA_DST_SEL(x) ((x) << 20)
-# define PACKET3_CP_DMA_ENGINE(x) ((x) << 27)
-# define PACKET3_CP_DMA_SRC_SEL(x) ((x) << 29)
-# define PACKET3_CP_DMA_CP_SYNC (1 << 31)
-# define PACKET3_CP_DMA_DIS_WC (1 << 21)
-# define PACKET3_CP_DMA_CMD_SRC_SWAP(x) ((x) << 22)
-# define PACKET3_CP_DMA_CMD_DST_SWAP(x) ((x) << 24)
-# define PACKET3_CP_DMA_CMD_SAS (1 << 26)
-# define PACKET3_CP_DMA_CMD_DAS (1 << 27)
-# define PACKET3_CP_DMA_CMD_SAIC (1 << 28)
-# define PACKET3_CP_DMA_CMD_DAIC (1 << 29)
-# define PACKET3_CP_DMA_CMD_RAW_WAIT (1 << 30)
-#define PACKET3_PFP_SYNC_ME 0x42
-#define PACKET3_SURFACE_SYNC 0x43
-# define PACKET3_DEST_BASE_0_ENA (1 << 0)
-# define PACKET3_DEST_BASE_1_ENA (1 << 1)
-# define PACKET3_CB0_DEST_BASE_ENA (1 << 6)
-# define PACKET3_CB1_DEST_BASE_ENA (1 << 7)
-# define PACKET3_CB2_DEST_BASE_ENA (1 << 8)
-# define PACKET3_CB3_DEST_BASE_ENA (1 << 9)
-# define PACKET3_CB4_DEST_BASE_ENA (1 << 10)
-# define PACKET3_CB5_DEST_BASE_ENA (1 << 11)
-# define PACKET3_CB6_DEST_BASE_ENA (1 << 12)
-# define PACKET3_CB7_DEST_BASE_ENA (1 << 13)
-# define PACKET3_DB_DEST_BASE_ENA (1 << 14)
-# define PACKET3_DEST_BASE_2_ENA (1 << 19)
-# define PACKET3_DEST_BASE_3_ENA (1 << 21)
-# define PACKET3_TCL1_ACTION_ENA (1 << 22)
-# define PACKET3_TC_ACTION_ENA (1 << 23)
-# define PACKET3_CB_ACTION_ENA (1 << 25)
-# define PACKET3_DB_ACTION_ENA (1 << 26)
-# define PACKET3_SH_KCACHE_ACTION_ENA (1 << 27)
-# define PACKET3_SH_ICACHE_ACTION_ENA (1 << 29)
-#define PACKET3_ME_INITIALIZE 0x44
-#define PACKET3_ME_INITIALIZE_DEVICE_ID(x) ((x) << 16)
-#define PACKET3_COND_WRITE 0x45
-#define PACKET3_EVENT_WRITE 0x46
-#define PACKET3_EVENT_WRITE_EOP 0x47
-#define PACKET3_EVENT_WRITE_EOS 0x48
-#define PACKET3_PREAMBLE_CNTL 0x4A
-# define PACKET3_PREAMBLE_BEGIN_CLEAR_STATE (2 << 28)
-# define PACKET3_PREAMBLE_END_CLEAR_STATE (3 << 28)
-#define PACKET3_ONE_REG_WRITE 0x57
-#define PACKET3_LOAD_CONFIG_REG 0x5F
-#define PACKET3_LOAD_CONTEXT_REG 0x60
-#define PACKET3_LOAD_SH_REG 0x61
-#define PACKET3_SET_CONFIG_REG 0x68
-#define PACKET3_SET_CONFIG_REG_START 0x00002000
-#define PACKET3_SET_CONFIG_REG_END 0x00002c00
-#define PACKET3_SET_CONTEXT_REG 0x69
-#define PACKET3_SET_CONTEXT_REG_START 0x000a000
-#define PACKET3_SET_CONTEXT_REG_END 0x000a400
-#define PACKET3_SET_CONTEXT_REG_INDIRECT 0x73
-#define PACKET3_SET_RESOURCE_INDIRECT 0x74
-#define PACKET3_SET_SH_REG 0x76
-#define PACKET3_SET_SH_REG_START 0x00002c00
-#define PACKET3_SET_SH_REG_END 0x00003000
-#define PACKET3_SET_SH_REG_OFFSET 0x77
-#define PACKET3_ME_WRITE 0x7A
-#define PACKET3_SCRATCH_RAM_WRITE 0x7D
-#define PACKET3_SCRATCH_RAM_READ 0x7E
-#define PACKET3_CE_WRITE 0x7F
-#define PACKET3_LOAD_CONST_RAM 0x80
-#define PACKET3_WRITE_CONST_RAM 0x81
-#define PACKET3_WRITE_CONST_RAM_OFFSET 0x82
-#define PACKET3_DUMP_CONST_RAM 0x83
-#define PACKET3_INCREMENT_CE_COUNTER 0x84
-#define PACKET3_INCREMENT_DE_COUNTER 0x85
-#define PACKET3_WAIT_ON_CE_COUNTER 0x86
-#define PACKET3_WAIT_ON_DE_COUNTER 0x87
-#define PACKET3_WAIT_ON_DE_COUNTER_DIFF 0x88
-#define PACKET3_SET_CE_DE_COUNTERS 0x89
-#define PACKET3_WAIT_ON_AVAIL_BUFFER 0x8A
-#define PACKET3_SWITCH_BUFFER 0x8B
-#define PACKET3_SEM_WAIT_ON_SIGNAL (0x1 << 12)
-#define PACKET3_SEM_SEL_SIGNAL (0x6 << 29)
-#define PACKET3_SEM_SEL_WAIT (0x7 << 29)
-
#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/si_ih.c b/drivers/gpu/drm/amd/amdgpu/si_ih.c
index 51880f6ef634..66f650f87243 100644
--- a/drivers/gpu/drm/amd/amdgpu/si_ih.c
+++ b/drivers/gpu/drm/amd/amdgpu/si_ih.c
@@ -27,6 +27,7 @@
#include "amdgpu_ih.h"
#include "sid.h"
#include "si_ih.h"
+
#include "oss/oss_1_0_d.h"
#include "oss/oss_1_0_sh_mask.h"
@@ -95,6 +96,9 @@ static int si_ih_irq_init(struct amdgpu_device *adev)
pci_set_master(adev->pdev);
si_ih_enable_interrupts(adev);
+ if (adev->irq.ih_soft.ring_size)
+ adev->irq.ih_soft.enabled = true;
+
return 0;
}
@@ -111,6 +115,9 @@ static u32 si_ih_get_wptr(struct amdgpu_device *adev,
wptr = le32_to_cpu(*ih->wptr_cpu);
+ if (ih == &adev->irq.ih_soft)
+ goto out;
+
if (wptr & IH_RB_WPTR__RB_OVERFLOW_MASK) {
wptr &= ~IH_RB_WPTR__RB_OVERFLOW_MASK;
dev_warn(adev->dev, "IH ring buffer overflow (0x%08X, 0x%08X, 0x%08X)\n",
@@ -119,7 +126,15 @@ static u32 si_ih_get_wptr(struct amdgpu_device *adev,
tmp = RREG32(IH_RB_CNTL);
tmp |= IH_RB_CNTL__WPTR_OVERFLOW_CLEAR_MASK;
WREG32(IH_RB_CNTL, tmp);
+
+ /* Unset the CLEAR_OVERFLOW bit immediately so new overflows
+ * can be detected.
+ */
+ tmp &= ~IH_RB_CNTL__WPTR_OVERFLOW_CLEAR_MASK;
+ WREG32(IH_RB_CNTL, tmp);
}
+
+out:
return (wptr & ih->ptr_mask);
}
@@ -150,71 +165,68 @@ static void si_ih_set_rptr(struct amdgpu_device *adev,
WREG32(IH_RB_RPTR, ih->rptr);
}
-static int si_ih_early_init(void *handle)
+static int si_ih_early_init(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
si_ih_set_interrupt_funcs(adev);
return 0;
}
-static int si_ih_sw_init(void *handle)
+static int si_ih_sw_init(struct amdgpu_ip_block *ip_block)
{
int r;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
r = amdgpu_ih_ring_init(adev, &adev->irq.ih, 64 * 1024, false);
if (r)
return r;
+ r = amdgpu_ih_ring_init(adev, &adev->irq.ih_soft, IH_SW_RING_SIZE, true);
+ if (r)
+ return r;
+
return amdgpu_irq_init(adev);
}
-static int si_ih_sw_fini(void *handle)
+static int si_ih_sw_fini(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
- amdgpu_irq_fini(adev);
- amdgpu_ih_ring_fini(adev, &adev->irq.ih);
+ amdgpu_irq_fini_sw(adev);
return 0;
}
-static int si_ih_hw_init(void *handle)
+static int si_ih_hw_init(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
return si_ih_irq_init(adev);
}
-static int si_ih_hw_fini(void *handle)
+static int si_ih_hw_fini(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
-
- si_ih_irq_disable(adev);
+ si_ih_irq_disable(ip_block->adev);
return 0;
}
-static int si_ih_suspend(void *handle)
+static int si_ih_suspend(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
-
- return si_ih_hw_fini(adev);
+ return si_ih_hw_fini(ip_block);
}
-static int si_ih_resume(void *handle)
+static int si_ih_resume(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
-
- return si_ih_hw_init(adev);
+ return si_ih_hw_init(ip_block);
}
-static bool si_ih_is_idle(void *handle)
+static bool si_ih_is_idle(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
- u32 tmp = RREG32(SRBM_STATUS);
+ struct amdgpu_device *adev = ip_block->adev;
+ u32 tmp = RREG32(mmSRBM_STATUS);
if (tmp & SRBM_STATUS__IH_BUSY_MASK)
return false;
@@ -222,41 +234,41 @@ static bool si_ih_is_idle(void *handle)
return true;
}
-static int si_ih_wait_for_idle(void *handle)
+static int si_ih_wait_for_idle(struct amdgpu_ip_block *ip_block)
{
unsigned i;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
for (i = 0; i < adev->usec_timeout; i++) {
- if (si_ih_is_idle(handle))
+ if (si_ih_is_idle(ip_block))
return 0;
udelay(1);
}
return -ETIMEDOUT;
}
-static int si_ih_soft_reset(void *handle)
+static int si_ih_soft_reset(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
u32 srbm_soft_reset = 0;
- u32 tmp = RREG32(SRBM_STATUS);
+ u32 tmp = RREG32(mmSRBM_STATUS);
if (tmp & SRBM_STATUS__IH_BUSY_MASK)
srbm_soft_reset |= SRBM_SOFT_RESET__SOFT_RESET_IH_MASK;
if (srbm_soft_reset) {
- tmp = RREG32(SRBM_SOFT_RESET);
+ tmp = RREG32(mmSRBM_SOFT_RESET);
tmp |= srbm_soft_reset;
- dev_info(adev->dev, "SRBM_SOFT_RESET=0x%08X\n", tmp);
- WREG32(SRBM_SOFT_RESET, tmp);
- tmp = RREG32(SRBM_SOFT_RESET);
+ dev_info(adev->dev, "mmSRBM_SOFT_RESET=0x%08X\n", tmp);
+ WREG32(mmSRBM_SOFT_RESET, tmp);
+ tmp = RREG32(mmSRBM_SOFT_RESET);
udelay(50);
tmp &= ~srbm_soft_reset;
- WREG32(SRBM_SOFT_RESET, tmp);
- tmp = RREG32(SRBM_SOFT_RESET);
+ WREG32(mmSRBM_SOFT_RESET, tmp);
+ tmp = RREG32(mmSRBM_SOFT_RESET);
udelay(50);
}
@@ -264,13 +276,13 @@ static int si_ih_soft_reset(void *handle)
return 0;
}
-static int si_ih_set_clockgating_state(void *handle,
+static int si_ih_set_clockgating_state(struct amdgpu_ip_block *ip_block,
enum amd_clockgating_state state)
{
return 0;
}
-static int si_ih_set_powergating_state(void *handle,
+static int si_ih_set_powergating_state(struct amdgpu_ip_block *ip_block,
enum amd_powergating_state state)
{
return 0;
@@ -279,7 +291,6 @@ static int si_ih_set_powergating_state(void *handle,
static const struct amd_ip_funcs si_ih_ip_funcs = {
.name = "si_ih",
.early_init = si_ih_early_init,
- .late_init = NULL,
.sw_init = si_ih_sw_init,
.sw_fini = si_ih_sw_fini,
.hw_init = si_ih_hw_init,
diff --git a/drivers/gpu/drm/amd/amdgpu/sid.h b/drivers/gpu/drm/amd/amdgpu/sid.h
index 9a39cbfe6db9..561462a8332e 100644
--- a/drivers/gpu/drm/amd/amdgpu/sid.h
+++ b/drivers/gpu/drm/amd/amdgpu/sid.h
@@ -24,47 +24,12 @@
#ifndef SI_H
#define SI_H
-#define TAHITI_RB_BITMAP_WIDTH_PER_SH 2
-
-#define TAHITI_GB_ADDR_CONFIG_GOLDEN 0x12011003
-#define VERDE_GB_ADDR_CONFIG_GOLDEN 0x12010002
-#define HAINAN_GB_ADDR_CONFIG_GOLDEN 0x02010001
-
-#define SI_MAX_SH_GPRS 256
-#define SI_MAX_TEMP_GPRS 16
-#define SI_MAX_SH_THREADS 256
-#define SI_MAX_SH_STACK_ENTRIES 4096
-#define SI_MAX_FRC_EOV_CNT 16384
-#define SI_MAX_BACKENDS 8
-#define SI_MAX_BACKENDS_MASK 0xFF
-#define SI_MAX_BACKENDS_PER_SE_MASK 0x0F
-#define SI_MAX_SIMDS 12
-#define SI_MAX_SIMDS_MASK 0x0FFF
-#define SI_MAX_SIMDS_PER_SE_MASK 0x00FF
-#define SI_MAX_PIPES 8
-#define SI_MAX_PIPES_MASK 0xFF
-#define SI_MAX_PIPES_PER_SIMD_MASK 0x3F
-#define SI_MAX_LDS_NUM 0xFFFF
-#define SI_MAX_TCC 16
-#define SI_MAX_TCC_MASK 0xFFFF
#define SI_MAX_CTLACKS_ASSERTION_WAIT 100
-/* SMC IND accessor regs */
-#define SMC_IND_INDEX_0 0x80
-#define SMC_IND_DATA_0 0x81
-
-#define SMC_IND_ACCESS_CNTL 0x8A
-# define AUTO_INCREMENT_IND_0 (1 << 0)
-#define SMC_MESSAGE_0 0x8B
-#define SMC_RESP_0 0x8C
-
/* CG IND registers are accessed via SMC indirect space + SMC_CG_IND_START */
#define SMC_CG_IND_START 0xc0030000
#define SMC_CG_IND_END 0xc0040000
-#define CG_CGTT_LOCAL_0 0x400
-#define CG_CGTT_LOCAL_1 0x401
-
/* SMC IND registers */
#define SMC_SYSCON_RESET_CNTL 0x80000000
# define RST_REG (1 << 0)
@@ -72,9 +37,6 @@
# define CK_DISABLE (1 << 0)
# define CKEN (1 << 24)
-#define VGA_HDP_CONTROL 0xCA
-#define VGA_MEMORY_DISABLE (1 << 4)
-
#define DCCG_DISP_SLOW_SELECT_REG 0x13F
#define DCCG_DISP1_SLOW_SELECT(x) ((x) << 0)
#define DCCG_DISP1_SLOW_SELECT_MASK (7 << 0)
@@ -83,47 +45,6 @@
#define DCCG_DISP2_SLOW_SELECT_MASK (7 << 4)
#define DCCG_DISP2_SLOW_SELECT_SHIFT 4
-#define CG_SPLL_FUNC_CNTL 0x180
-#define SPLL_RESET (1 << 0)
-#define SPLL_SLEEP (1 << 1)
-#define SPLL_BYPASS_EN (1 << 3)
-#define SPLL_REF_DIV(x) ((x) << 4)
-#define SPLL_REF_DIV_MASK (0x3f << 4)
-#define SPLL_PDIV_A(x) ((x) << 20)
-#define SPLL_PDIV_A_MASK (0x7f << 20)
-#define SPLL_PDIV_A_SHIFT 20
-#define CG_SPLL_FUNC_CNTL_2 0x181
-#define SCLK_MUX_SEL(x) ((x) << 0)
-#define SCLK_MUX_SEL_MASK (0x1ff << 0)
-#define SPLL_CTLREQ_CHG (1 << 23)
-#define SCLK_MUX_UPDATE (1 << 26)
-#define CG_SPLL_FUNC_CNTL_3 0x182
-#define SPLL_FB_DIV(x) ((x) << 0)
-#define SPLL_FB_DIV_MASK (0x3ffffff << 0)
-#define SPLL_FB_DIV_SHIFT 0
-#define SPLL_DITHEN (1 << 28)
-#define CG_SPLL_FUNC_CNTL_4 0x183
-
-#define SPLL_STATUS 0x185
-#define SPLL_CHG_STATUS (1 << 1)
-#define SPLL_CNTL_MODE 0x186
-#define SPLL_SW_DIR_CONTROL (1 << 0)
-# define SPLL_REFCLK_SEL(x) ((x) << 26)
-# define SPLL_REFCLK_SEL_MASK (3 << 26)
-
-#define CG_SPLL_SPREAD_SPECTRUM 0x188
-#define SSEN (1 << 0)
-#define CLK_S(x) ((x) << 4)
-#define CLK_S_MASK (0xfff << 4)
-#define CLK_S_SHIFT 4
-#define CG_SPLL_SPREAD_SPECTRUM_2 0x189
-#define CLK_V(x) ((x) << 0)
-#define CLK_V_MASK (0x3ffffff << 0)
-#define CLK_V_SHIFT 0
-
-#define CG_SPLL_AUTOSCALE_CNTL 0x18b
-# define AUTOSCALE_ON_SS_CLEAR (1 << 9)
-
/* discrete uvd clocks */
#define CG_UPLL_FUNC_CNTL 0x18d
# define UPLL_RESET_MASK 0x00000001
@@ -153,317 +74,13 @@
#define CG_UPLL_SPREAD_SPECTRUM 0x194
# define SSEN_MASK 0x00000001
-#define MPLL_BYPASSCLK_SEL 0x197
-# define MPLL_CLKOUT_SEL(x) ((x) << 8)
-# define MPLL_CLKOUT_SEL_MASK 0xFF00
-
-#define CG_CLKPIN_CNTL 0x198
-# define XTALIN_DIVIDE (1 << 1)
-# define BCLK_AS_XCLK (1 << 2)
-#define CG_CLKPIN_CNTL_2 0x199
-# define FORCE_BIF_REFCLK_EN (1 << 3)
-# define MUX_TCLK_TO_XCLK (1 << 8)
-
-#define THM_CLK_CNTL 0x19b
-# define CMON_CLK_SEL(x) ((x) << 0)
-# define CMON_CLK_SEL_MASK 0xFF
-# define TMON_CLK_SEL(x) ((x) << 8)
-# define TMON_CLK_SEL_MASK 0xFF00
-#define MISC_CLK_CNTL 0x19c
-# define DEEP_SLEEP_CLK_SEL(x) ((x) << 0)
-# define DEEP_SLEEP_CLK_SEL_MASK 0xFF
-# define ZCLK_SEL(x) ((x) << 8)
-# define ZCLK_SEL_MASK 0xFF00
-
-#define CG_THERMAL_CTRL 0x1c0
-#define DPM_EVENT_SRC(x) ((x) << 0)
-#define DPM_EVENT_SRC_MASK (7 << 0)
-#define DIG_THERM_DPM(x) ((x) << 14)
-#define DIG_THERM_DPM_MASK 0x003FC000
-#define DIG_THERM_DPM_SHIFT 14
-#define CG_THERMAL_STATUS 0x1c1
-#define FDO_PWM_DUTY(x) ((x) << 9)
-#define FDO_PWM_DUTY_MASK (0xff << 9)
-#define FDO_PWM_DUTY_SHIFT 9
-#define CG_THERMAL_INT 0x1c2
-#define DIG_THERM_INTH(x) ((x) << 8)
-#define DIG_THERM_INTH_MASK 0x0000FF00
-#define DIG_THERM_INTH_SHIFT 8
-#define DIG_THERM_INTL(x) ((x) << 16)
-#define DIG_THERM_INTL_MASK 0x00FF0000
-#define DIG_THERM_INTL_SHIFT 16
-#define THERM_INT_MASK_HIGH (1 << 24)
-#define THERM_INT_MASK_LOW (1 << 25)
-
-#define CG_MULT_THERMAL_CTRL 0x1c4
-#define TEMP_SEL(x) ((x) << 20)
-#define TEMP_SEL_MASK (0xff << 20)
-#define TEMP_SEL_SHIFT 20
-#define CG_MULT_THERMAL_STATUS 0x1c5
-#define ASIC_MAX_TEMP(x) ((x) << 0)
-#define ASIC_MAX_TEMP_MASK 0x000001ff
-#define ASIC_MAX_TEMP_SHIFT 0
-#define CTF_TEMP(x) ((x) << 9)
-#define CTF_TEMP_MASK 0x0003fe00
-#define CTF_TEMP_SHIFT 9
-
-#define CG_FDO_CTRL0 0x1d5
-#define FDO_STATIC_DUTY(x) ((x) << 0)
-#define FDO_STATIC_DUTY_MASK 0x000000FF
-#define FDO_STATIC_DUTY_SHIFT 0
-#define CG_FDO_CTRL1 0x1d6
-#define FMAX_DUTY100(x) ((x) << 0)
-#define FMAX_DUTY100_MASK 0x000000FF
-#define FMAX_DUTY100_SHIFT 0
-#define CG_FDO_CTRL2 0x1d7
-#define TMIN(x) ((x) << 0)
-#define TMIN_MASK 0x000000FF
-#define TMIN_SHIFT 0
-#define FDO_PWM_MODE(x) ((x) << 11)
-#define FDO_PWM_MODE_MASK (7 << 11)
-#define FDO_PWM_MODE_SHIFT 11
-#define TACH_PWM_RESP_RATE(x) ((x) << 25)
-#define TACH_PWM_RESP_RATE_MASK (0x7f << 25)
-#define TACH_PWM_RESP_RATE_SHIFT 25
-
-#define CG_TACH_CTRL 0x1dc
-# define EDGE_PER_REV(x) ((x) << 0)
-# define EDGE_PER_REV_MASK (0x7 << 0)
-# define EDGE_PER_REV_SHIFT 0
-# define TARGET_PERIOD(x) ((x) << 3)
-# define TARGET_PERIOD_MASK 0xfffffff8
-# define TARGET_PERIOD_SHIFT 3
-#define CG_TACH_STATUS 0x1dd
-# define TACH_PERIOD(x) ((x) << 0)
-# define TACH_PERIOD_MASK 0xffffffff
-# define TACH_PERIOD_SHIFT 0
-
-#define GENERAL_PWRMGT 0x1e0
-# define GLOBAL_PWRMGT_EN (1 << 0)
-# define STATIC_PM_EN (1 << 1)
-# define THERMAL_PROTECTION_DIS (1 << 2)
-# define THERMAL_PROTECTION_TYPE (1 << 3)
-# define SW_SMIO_INDEX(x) ((x) << 6)
-# define SW_SMIO_INDEX_MASK (1 << 6)
-# define SW_SMIO_INDEX_SHIFT 6
-# define VOLT_PWRMGT_EN (1 << 10)
-# define DYN_SPREAD_SPECTRUM_EN (1 << 23)
-#define CG_TPC 0x1e1
-#define SCLK_PWRMGT_CNTL 0x1e2
-# define SCLK_PWRMGT_OFF (1 << 0)
-# define SCLK_LOW_D1 (1 << 1)
-# define FIR_RESET (1 << 4)
-# define FIR_FORCE_TREND_SEL (1 << 5)
-# define FIR_TREND_MODE (1 << 6)
-# define DYN_GFX_CLK_OFF_EN (1 << 7)
-# define GFX_CLK_FORCE_ON (1 << 8)
-# define GFX_CLK_REQUEST_OFF (1 << 9)
-# define GFX_CLK_FORCE_OFF (1 << 10)
-# define GFX_CLK_OFF_ACPI_D1 (1 << 11)
-# define GFX_CLK_OFF_ACPI_D2 (1 << 12)
-# define GFX_CLK_OFF_ACPI_D3 (1 << 13)
-# define DYN_LIGHT_SLEEP_EN (1 << 14)
-
-#define TARGET_AND_CURRENT_PROFILE_INDEX 0x1e6
-# define CURRENT_STATE_INDEX_MASK (0xf << 4)
-# define CURRENT_STATE_INDEX_SHIFT 4
-
-#define CG_FTV 0x1ef
-
-#define CG_FFCT_0 0x1f0
-# define UTC_0(x) ((x) << 0)
-# define UTC_0_MASK (0x3ff << 0)
-# define DTC_0(x) ((x) << 10)
-# define DTC_0_MASK (0x3ff << 10)
-
-#define CG_BSP 0x1ff
-# define BSP(x) ((x) << 0)
-# define BSP_MASK (0xffff << 0)
-# define BSU(x) ((x) << 16)
-# define BSU_MASK (0xf << 16)
-#define CG_AT 0x200
-# define CG_R(x) ((x) << 0)
-# define CG_R_MASK (0xffff << 0)
-# define CG_L(x) ((x) << 16)
-# define CG_L_MASK (0xffff << 16)
-
-#define CG_GIT 0x201
-# define CG_GICST(x) ((x) << 0)
-# define CG_GICST_MASK (0xffff << 0)
-# define CG_GIPOT(x) ((x) << 16)
-# define CG_GIPOT_MASK (0xffff << 16)
-
-#define CG_SSP 0x203
-# define SST(x) ((x) << 0)
-# define SST_MASK (0xffff << 0)
-# define SSTU(x) ((x) << 16)
-# define SSTU_MASK (0xf << 16)
-
-#define CG_DISPLAY_GAP_CNTL 0x20a
-# define DISP1_GAP(x) ((x) << 0)
-# define DISP1_GAP_MASK (3 << 0)
-# define DISP2_GAP(x) ((x) << 2)
-# define DISP2_GAP_MASK (3 << 2)
-# define VBI_TIMER_COUNT(x) ((x) << 4)
-# define VBI_TIMER_COUNT_MASK (0x3fff << 4)
-# define VBI_TIMER_UNIT(x) ((x) << 20)
-# define VBI_TIMER_UNIT_MASK (7 << 20)
-# define DISP1_GAP_MCHG(x) ((x) << 24)
-# define DISP1_GAP_MCHG_MASK (3 << 24)
-# define DISP2_GAP_MCHG(x) ((x) << 26)
-# define DISP2_GAP_MCHG_MASK (3 << 26)
-
-#define CG_ULV_CONTROL 0x21e
-#define CG_ULV_PARAMETER 0x21f
-
-#define SMC_SCRATCH0 0x221
-
-#define CG_CAC_CTRL 0x22e
-# define CAC_WINDOW(x) ((x) << 0)
-# define CAC_WINDOW_MASK 0x00ffffff
-
-#define DMIF_ADDR_CONFIG 0x2F5
-
-#define DMIF_ADDR_CALC 0x300
-
-#define PIPE0_DMIF_BUFFER_CONTROL 0x0328
-# define DMIF_BUFFERS_ALLOCATED(x) ((x) << 0)
-# define DMIF_BUFFERS_ALLOCATED_COMPLETED (1 << 4)
-
-#define SRBM_STATUS 0x394
-#define GRBM_RQ_PENDING (1 << 5)
-#define VMC_BUSY (1 << 8)
-#define MCB_BUSY (1 << 9)
-#define MCB_NON_DISPLAY_BUSY (1 << 10)
-#define MCC_BUSY (1 << 11)
-#define MCD_BUSY (1 << 12)
-#define SEM_BUSY (1 << 14)
-#define IH_BUSY (1 << 17)
-
-#define SRBM_SOFT_RESET 0x398
-#define SOFT_RESET_BIF (1 << 1)
-#define SOFT_RESET_DC (1 << 5)
-#define SOFT_RESET_DMA1 (1 << 6)
-#define SOFT_RESET_GRBM (1 << 8)
-#define SOFT_RESET_HDP (1 << 9)
-#define SOFT_RESET_IH (1 << 10)
-#define SOFT_RESET_MC (1 << 11)
-#define SOFT_RESET_ROM (1 << 14)
-#define SOFT_RESET_SEM (1 << 15)
-#define SOFT_RESET_VMC (1 << 17)
-#define SOFT_RESET_DMA (1 << 20)
-#define SOFT_RESET_TST (1 << 21)
-#define SOFT_RESET_REGBB (1 << 22)
-#define SOFT_RESET_ORB (1 << 23)
-
-#define CC_SYS_RB_BACKEND_DISABLE 0x3A0
-#define GC_USER_SYS_RB_BACKEND_DISABLE 0x3A1
-
-#define SRBM_READ_ERROR 0x3A6
-#define SRBM_INT_CNTL 0x3A8
-#define SRBM_INT_ACK 0x3AA
-
-#define SRBM_STATUS2 0x3B1
-#define DMA_BUSY (1 << 5)
-#define DMA1_BUSY (1 << 6)
-
-#define VM_L2_CNTL 0x500
-#define ENABLE_L2_CACHE (1 << 0)
-#define ENABLE_L2_FRAGMENT_PROCESSING (1 << 1)
-#define L2_CACHE_PTE_ENDIAN_SWAP_MODE(x) ((x) << 2)
-#define L2_CACHE_PDE_ENDIAN_SWAP_MODE(x) ((x) << 4)
-#define ENABLE_L2_PTE_CACHE_LRU_UPDATE_BY_WRITE (1 << 9)
-#define ENABLE_L2_PDE0_CACHE_LRU_UPDATE_BY_WRITE (1 << 10)
-#define EFFECTIVE_L2_QUEUE_SIZE(x) (((x) & 7) << 15)
-#define CONTEXT1_IDENTITY_ACCESS_MODE(x) (((x) & 3) << 19)
-#define VM_L2_CNTL2 0x501
-#define INVALIDATE_ALL_L1_TLBS (1 << 0)
-#define INVALIDATE_L2_CACHE (1 << 1)
-#define INVALIDATE_CACHE_MODE(x) ((x) << 26)
-#define INVALIDATE_PTE_AND_PDE_CACHES 0
-#define INVALIDATE_ONLY_PTE_CACHES 1
-#define INVALIDATE_ONLY_PDE_CACHES 2
-#define VM_L2_CNTL3 0x502
-#define BANK_SELECT(x) ((x) << 0)
-#define L2_CACHE_UPDATE_MODE(x) ((x) << 6)
-#define L2_CACHE_BIGK_FRAGMENT_SIZE(x) ((x) << 15)
-#define L2_CACHE_BIGK_ASSOCIATIVITY (1 << 20)
-#define VM_L2_STATUS 0x503
-#define L2_BUSY (1 << 0)
-#define VM_CONTEXT0_CNTL 0x504
-#define ENABLE_CONTEXT (1 << 0)
-#define PAGE_TABLE_DEPTH(x) (((x) & 3) << 1)
-#define RANGE_PROTECTION_FAULT_ENABLE_INTERRUPT (1 << 3)
-#define RANGE_PROTECTION_FAULT_ENABLE_DEFAULT (1 << 4)
-#define DUMMY_PAGE_PROTECTION_FAULT_ENABLE_INTERRUPT (1 << 6)
-#define DUMMY_PAGE_PROTECTION_FAULT_ENABLE_DEFAULT (1 << 7)
-#define PDE0_PROTECTION_FAULT_ENABLE_INTERRUPT (1 << 9)
-#define PDE0_PROTECTION_FAULT_ENABLE_DEFAULT (1 << 10)
-#define VALID_PROTECTION_FAULT_ENABLE_INTERRUPT (1 << 12)
-#define VALID_PROTECTION_FAULT_ENABLE_DEFAULT (1 << 13)
-#define READ_PROTECTION_FAULT_ENABLE_INTERRUPT (1 << 15)
-#define READ_PROTECTION_FAULT_ENABLE_DEFAULT (1 << 16)
-#define WRITE_PROTECTION_FAULT_ENABLE_INTERRUPT (1 << 18)
-#define WRITE_PROTECTION_FAULT_ENABLE_DEFAULT (1 << 19)
-#define PAGE_TABLE_BLOCK_SIZE(x) (((x) & 0xF) << 24)
-#define VM_CONTEXT1_CNTL 0x505
-#define VM_CONTEXT0_CNTL2 0x50C
-#define VM_CONTEXT1_CNTL2 0x50D
-#define VM_CONTEXT8_PAGE_TABLE_BASE_ADDR 0x50E
-#define VM_CONTEXT9_PAGE_TABLE_BASE_ADDR 0x50F
-#define VM_CONTEXT10_PAGE_TABLE_BASE_ADDR 0x510
-#define VM_CONTEXT11_PAGE_TABLE_BASE_ADDR 0x511
-#define VM_CONTEXT12_PAGE_TABLE_BASE_ADDR 0x512
-#define VM_CONTEXT13_PAGE_TABLE_BASE_ADDR 0x513
-#define VM_CONTEXT14_PAGE_TABLE_BASE_ADDR 0x514
-#define VM_CONTEXT15_PAGE_TABLE_BASE_ADDR 0x515
-
-#define VM_CONTEXT1_PROTECTION_FAULT_ADDR 0x53f
-#define VM_CONTEXT1_PROTECTION_FAULT_STATUS 0x537
-#define PROTECTIONS_MASK (0xf << 0)
-#define PROTECTIONS_SHIFT 0
- /* bit 0: range
- * bit 1: pde0
- * bit 2: valid
- * bit 3: read
- * bit 4: write
- */
-#define MEMORY_CLIENT_ID_MASK (0xff << 12)
-#define MEMORY_CLIENT_ID_SHIFT 12
-#define MEMORY_CLIENT_RW_MASK (1 << 24)
-#define MEMORY_CLIENT_RW_SHIFT 24
-#define FAULT_VMID_MASK (0xf << 25)
-#define FAULT_VMID_SHIFT 25
-
#define VM_INVALIDATE_REQUEST 0x51E
#define VM_INVALIDATE_RESPONSE 0x51F
-#define VM_CONTEXT0_PROTECTION_FAULT_DEFAULT_ADDR 0x546
-#define VM_CONTEXT1_PROTECTION_FAULT_DEFAULT_ADDR 0x547
-
-#define VM_CONTEXT0_PAGE_TABLE_BASE_ADDR 0x54F
-#define VM_CONTEXT1_PAGE_TABLE_BASE_ADDR 0x550
-#define VM_CONTEXT2_PAGE_TABLE_BASE_ADDR 0x551
-#define VM_CONTEXT3_PAGE_TABLE_BASE_ADDR 0x552
-#define VM_CONTEXT4_PAGE_TABLE_BASE_ADDR 0x553
-#define VM_CONTEXT5_PAGE_TABLE_BASE_ADDR 0x554
-#define VM_CONTEXT6_PAGE_TABLE_BASE_ADDR 0x555
-#define VM_CONTEXT7_PAGE_TABLE_BASE_ADDR 0x556
-#define VM_CONTEXT0_PAGE_TABLE_START_ADDR 0x557
-#define VM_CONTEXT1_PAGE_TABLE_START_ADDR 0x558
-
-#define VM_CONTEXT0_PAGE_TABLE_END_ADDR 0x55F
-#define VM_CONTEXT1_PAGE_TABLE_END_ADDR 0x560
-
#define VM_L2_CG 0x570
#define MC_CG_ENABLE (1 << 18)
#define MC_LS_ENABLE (1 << 19)
-#define MC_SHARED_CHMAP 0x801
-#define NOOFCHAN_SHIFT 12
-#define NOOFCHAN_MASK 0x0000f000
-#define MC_SHARED_CHREMAP 0x802
-
#define MC_VM_FB_LOCATION 0x809
#define MC_VM_AGP_TOP 0x80A
#define MC_VM_AGP_BOT 0x80B
@@ -495,21 +112,6 @@
#define MC_CITF_MISC_WR_CG 0x993
#define MC_CITF_MISC_VM_CG 0x994
-#define MC_ARB_RAMCFG 0x9D8
-#define NOOFBANK_SHIFT 0
-#define NOOFBANK_MASK 0x00000003
-#define NOOFRANK_SHIFT 2
-#define NOOFRANK_MASK 0x00000004
-#define NOOFROWS_SHIFT 3
-#define NOOFROWS_MASK 0x00000038
-#define NOOFCOLS_SHIFT 6
-#define NOOFCOLS_MASK 0x000000C0
-#define CHANSIZE_SHIFT 8
-#define CHANSIZE_MASK 0x00000100
-#define CHANSIZE_OVERRIDE (1 << 11)
-#define NOOFGROUPS_SHIFT 12
-#define NOOFGROUPS_MASK 0x00001000
-
#define MC_ARB_DRAM_TIMING 0x9DD
#define MC_ARB_DRAM_TIMING2 0x9DE
@@ -635,20 +237,6 @@
#define CLKS(x) ((x) << 0)
#define CLKS_MASK (0xfff << 0)
-#define HDP_HOST_PATH_CNTL 0xB00
-#define CLOCK_GATING_DIS (1 << 23)
-#define HDP_NONSURFACE_BASE 0xB01
-#define HDP_NONSURFACE_INFO 0xB02
-#define HDP_NONSURFACE_SIZE 0xB03
-
-#define HDP_DEBUG0 0xBCC
-
-#define HDP_ADDR_CONFIG 0xBD2
-#define HDP_MISC_CNTL 0xBD3
-#define HDP_FLUSH_INVALIDATE_CACHE (1 << 0)
-#define HDP_MEM_POWER_LS 0xBD4
-#define HDP_LS_ENABLE (1 << 0)
-
#define ATC_MISC_CG 0xCD4
#define IH_RB_CNTL 0xF80
@@ -678,8 +266,6 @@
# define MC_WR_CLEAN_CNT(x) ((x) << 20)
# define MC_VMID(x) ((x) << 25)
-#define CONFIG_MEMSIZE 0x150A
-
#define INTERRUPT_CNTL 0x151A
# define IH_DUMMY_RD_OVERRIDE (1 << 0)
# define IH_DUMMY_RD_EN (1 << 1)
@@ -687,734 +273,28 @@
# define GEN_IH_INT_EN (1 << 8)
#define INTERRUPT_CNTL2 0x151B
-#define HDP_MEM_COHERENCY_FLUSH_CNTL 0x1520
-
-#define BIF_FB_EN 0x1524
-#define FB_READ_EN (1 << 0)
-#define FB_WRITE_EN (1 << 1)
-
-#define HDP_REG_COHERENCY_FLUSH_CNTL 0x1528
-
-/* DCE6 ELD audio interface */
-#define AZ_F0_CODEC_ENDPOINT_INDEX 0x1780
-# define AZ_ENDPOINT_REG_INDEX(x) (((x) & 0xff) << 0)
-# define AZ_ENDPOINT_REG_WRITE_EN (1 << 8)
-#define AZ_F0_CODEC_ENDPOINT_DATA 0x1781
-
-#define AZ_F0_CODEC_PIN_CONTROL_CHANNEL_SPEAKER 0x25
-#define SPEAKER_ALLOCATION(x) (((x) & 0x7f) << 0)
-#define SPEAKER_ALLOCATION_MASK (0x7f << 0)
-#define SPEAKER_ALLOCATION_SHIFT 0
-#define HDMI_CONNECTION (1 << 16)
-#define DP_CONNECTION (1 << 17)
-
-#define AZ_F0_CODEC_PIN_CONTROL_AUDIO_DESCRIPTOR0 0x28 /* LPCM */
-#define AZ_F0_CODEC_PIN_CONTROL_AUDIO_DESCRIPTOR1 0x29 /* AC3 */
-#define AZ_F0_CODEC_PIN_CONTROL_AUDIO_DESCRIPTOR2 0x2A /* MPEG1 */
-#define AZ_F0_CODEC_PIN_CONTROL_AUDIO_DESCRIPTOR3 0x2B /* MP3 */
-#define AZ_F0_CODEC_PIN_CONTROL_AUDIO_DESCRIPTOR4 0x2C /* MPEG2 */
-#define AZ_F0_CODEC_PIN_CONTROL_AUDIO_DESCRIPTOR5 0x2D /* AAC */
-#define AZ_F0_CODEC_PIN_CONTROL_AUDIO_DESCRIPTOR6 0x2E /* DTS */
-#define AZ_F0_CODEC_PIN_CONTROL_AUDIO_DESCRIPTOR7 0x2F /* ATRAC */
-#define AZ_F0_CODEC_PIN_CONTROL_AUDIO_DESCRIPTOR8 0x30 /* one bit audio - leave at 0 (default) */
-#define AZ_F0_CODEC_PIN_CONTROL_AUDIO_DESCRIPTOR9 0x31 /* Dolby Digital */
-#define AZ_F0_CODEC_PIN_CONTROL_AUDIO_DESCRIPTOR10 0x32 /* DTS-HD */
-#define AZ_F0_CODEC_PIN_CONTROL_AUDIO_DESCRIPTOR11 0x33 /* MAT-MLP */
-#define AZ_F0_CODEC_PIN_CONTROL_AUDIO_DESCRIPTOR12 0x34 /* DTS */
-#define AZ_F0_CODEC_PIN_CONTROL_AUDIO_DESCRIPTOR13 0x35 /* WMA Pro */
-# define MAX_CHANNELS(x) (((x) & 0x7) << 0)
-/* max channels minus one. 7 = 8 channels */
-# define SUPPORTED_FREQUENCIES(x) (((x) & 0xff) << 8)
-# define DESCRIPTOR_BYTE_2(x) (((x) & 0xff) << 16)
-# define SUPPORTED_FREQUENCIES_STEREO(x) (((x) & 0xff) << 24) /* LPCM only */
-/* SUPPORTED_FREQUENCIES, SUPPORTED_FREQUENCIES_STEREO
- * bit0 = 32 kHz
- * bit1 = 44.1 kHz
- * bit2 = 48 kHz
- * bit3 = 88.2 kHz
- * bit4 = 96 kHz
- * bit5 = 176.4 kHz
- * bit6 = 192 kHz
- */
-
-#define AZ_F0_CODEC_PIN_CONTROL_RESPONSE_LIPSYNC 0x37
-# define VIDEO_LIPSYNC(x) (((x) & 0xff) << 0)
-# define AUDIO_LIPSYNC(x) (((x) & 0xff) << 8)
-/* VIDEO_LIPSYNC, AUDIO_LIPSYNC
- * 0 = invalid
- * x = legal delay value
- * 255 = sync not supported
- */
-#define AZ_F0_CODEC_PIN_CONTROL_RESPONSE_HBR 0x38
-# define HBR_CAPABLE (1 << 0) /* enabled by default */
-
-#define AZ_F0_CODEC_PIN_CONTROL_SINK_INFO0 0x3a
-# define MANUFACTURER_ID(x) (((x) & 0xffff) << 0)
-# define PRODUCT_ID(x) (((x) & 0xffff) << 16)
-#define AZ_F0_CODEC_PIN_CONTROL_SINK_INFO1 0x3b
-# define SINK_DESCRIPTION_LEN(x) (((x) & 0xff) << 0)
-#define AZ_F0_CODEC_PIN_CONTROL_SINK_INFO2 0x3c
-# define PORT_ID0(x) (((x) & 0xffffffff) << 0)
-#define AZ_F0_CODEC_PIN_CONTROL_SINK_INFO3 0x3d
-# define PORT_ID1(x) (((x) & 0xffffffff) << 0)
-#define AZ_F0_CODEC_PIN_CONTROL_SINK_INFO4 0x3e
-# define DESCRIPTION0(x) (((x) & 0xff) << 0)
-# define DESCRIPTION1(x) (((x) & 0xff) << 8)
-# define DESCRIPTION2(x) (((x) & 0xff) << 16)
-# define DESCRIPTION3(x) (((x) & 0xff) << 24)
-#define AZ_F0_CODEC_PIN_CONTROL_SINK_INFO5 0x3f
-# define DESCRIPTION4(x) (((x) & 0xff) << 0)
-# define DESCRIPTION5(x) (((x) & 0xff) << 8)
-# define DESCRIPTION6(x) (((x) & 0xff) << 16)
-# define DESCRIPTION7(x) (((x) & 0xff) << 24)
-#define AZ_F0_CODEC_PIN_CONTROL_SINK_INFO6 0x40
-# define DESCRIPTION8(x) (((x) & 0xff) << 0)
-# define DESCRIPTION9(x) (((x) & 0xff) << 8)
-# define DESCRIPTION10(x) (((x) & 0xff) << 16)
-# define DESCRIPTION11(x) (((x) & 0xff) << 24)
-#define AZ_F0_CODEC_PIN_CONTROL_SINK_INFO7 0x41
-# define DESCRIPTION12(x) (((x) & 0xff) << 0)
-# define DESCRIPTION13(x) (((x) & 0xff) << 8)
-# define DESCRIPTION14(x) (((x) & 0xff) << 16)
-# define DESCRIPTION15(x) (((x) & 0xff) << 24)
-#define AZ_F0_CODEC_PIN_CONTROL_SINK_INFO8 0x42
-# define DESCRIPTION16(x) (((x) & 0xff) << 0)
-# define DESCRIPTION17(x) (((x) & 0xff) << 8)
-
-#define AZ_F0_CODEC_PIN_CONTROL_HOT_PLUG_CONTROL 0x54
-# define AUDIO_ENABLED (1 << 31)
-
-#define AZ_F0_CODEC_PIN_CONTROL_RESPONSE_CONFIGURATION_DEFAULT 0x56
-#define PORT_CONNECTIVITY_MASK (3 << 30)
-#define PORT_CONNECTIVITY_SHIFT 30
-
-#define DC_LB_MEMORY_SPLIT 0x1AC3
-#define DC_LB_MEMORY_CONFIG(x) ((x) << 20)
-
-#define PRIORITY_A_CNT 0x1AC6
-#define PRIORITY_MARK_MASK 0x7fff
-#define PRIORITY_OFF (1 << 16)
-#define PRIORITY_ALWAYS_ON (1 << 20)
-#define PRIORITY_B_CNT 0x1AC7
-
-#define DPG_PIPE_ARBITRATION_CONTROL3 0x1B32
-# define LATENCY_WATERMARK_MASK(x) ((x) << 16)
-#define DPG_PIPE_LATENCY_CONTROL 0x1B33
-# define LATENCY_LOW_WATERMARK(x) ((x) << 0)
-# define LATENCY_HIGH_WATERMARK(x) ((x) << 16)
-
-/* 0x6bb8, 0x77b8, 0x103b8, 0x10fb8, 0x11bb8, 0x127b8 */
-#define VLINE_STATUS 0x1AEE
-# define VLINE_OCCURRED (1 << 0)
-# define VLINE_ACK (1 << 4)
-# define VLINE_STAT (1 << 12)
-# define VLINE_INTERRUPT (1 << 16)
-# define VLINE_INTERRUPT_TYPE (1 << 17)
-/* 0x6bbc, 0x77bc, 0x103bc, 0x10fbc, 0x11bbc, 0x127bc */
-#define VBLANK_STATUS 0x1AEF
-# define VBLANK_OCCURRED (1 << 0)
-# define VBLANK_ACK (1 << 4)
-# define VBLANK_STAT (1 << 12)
-# define VBLANK_INTERRUPT (1 << 16)
-# define VBLANK_INTERRUPT_TYPE (1 << 17)
-
-/* 0x6b40, 0x7740, 0x10340, 0x10f40, 0x11b40, 0x12740 */
-#define INT_MASK 0x1AD0
-# define VBLANK_INT_MASK (1 << 0)
-# define VLINE_INT_MASK (1 << 4)
-
-#define DISP_INTERRUPT_STATUS 0x183D
-# define LB_D1_VLINE_INTERRUPT (1 << 2)
-# define LB_D1_VBLANK_INTERRUPT (1 << 3)
-# define DC_HPD1_INTERRUPT (1 << 17)
-# define DC_HPD1_RX_INTERRUPT (1 << 18)
-# define DACA_AUTODETECT_INTERRUPT (1 << 22)
-# define DACB_AUTODETECT_INTERRUPT (1 << 23)
-# define DC_I2C_SW_DONE_INTERRUPT (1 << 24)
-# define DC_I2C_HW_DONE_INTERRUPT (1 << 25)
-#define DISP_INTERRUPT_STATUS_CONTINUE 0x183E
-# define LB_D2_VLINE_INTERRUPT (1 << 2)
-# define LB_D2_VBLANK_INTERRUPT (1 << 3)
-# define DC_HPD2_INTERRUPT (1 << 17)
-# define DC_HPD2_RX_INTERRUPT (1 << 18)
-# define DISP_TIMER_INTERRUPT (1 << 24)
-#define DISP_INTERRUPT_STATUS_CONTINUE2 0x183F
-# define LB_D3_VLINE_INTERRUPT (1 << 2)
-# define LB_D3_VBLANK_INTERRUPT (1 << 3)
-# define DC_HPD3_INTERRUPT (1 << 17)
-# define DC_HPD3_RX_INTERRUPT (1 << 18)
-#define DISP_INTERRUPT_STATUS_CONTINUE3 0x1840
-# define LB_D4_VLINE_INTERRUPT (1 << 2)
-# define LB_D4_VBLANK_INTERRUPT (1 << 3)
-# define DC_HPD4_INTERRUPT (1 << 17)
-# define DC_HPD4_RX_INTERRUPT (1 << 18)
-#define DISP_INTERRUPT_STATUS_CONTINUE4 0x1853
-# define LB_D5_VLINE_INTERRUPT (1 << 2)
-# define LB_D5_VBLANK_INTERRUPT (1 << 3)
-# define DC_HPD5_INTERRUPT (1 << 17)
-# define DC_HPD5_RX_INTERRUPT (1 << 18)
-#define DISP_INTERRUPT_STATUS_CONTINUE5 0x1854
-# define LB_D6_VLINE_INTERRUPT (1 << 2)
-# define LB_D6_VBLANK_INTERRUPT (1 << 3)
-# define DC_HPD6_INTERRUPT (1 << 17)
-# define DC_HPD6_RX_INTERRUPT (1 << 18)
-
-/* 0x6858, 0x7458, 0x10058, 0x10c58, 0x11858, 0x12458 */
-#define GRPH_INT_STATUS 0x1A16
-# define GRPH_PFLIP_INT_OCCURRED (1 << 0)
-# define GRPH_PFLIP_INT_CLEAR (1 << 8)
-/* 0x685c, 0x745c, 0x1005c, 0x10c5c, 0x1185c, 0x1245c */
-#define GRPH_INT_CONTROL 0x1A17
-# define GRPH_PFLIP_INT_MASK (1 << 0)
-# define GRPH_PFLIP_INT_TYPE (1 << 8)
-
-#define DAC_AUTODETECT_INT_CONTROL 0x19F2
-
-#define DC_HPD1_INT_STATUS 0x1807
-#define DC_HPD2_INT_STATUS 0x180A
-#define DC_HPD3_INT_STATUS 0x180D
-#define DC_HPD4_INT_STATUS 0x1810
-#define DC_HPD5_INT_STATUS 0x1813
-#define DC_HPD6_INT_STATUS 0x1816
-# define DC_HPDx_INT_STATUS (1 << 0)
-# define DC_HPDx_SENSE (1 << 1)
-# define DC_HPDx_RX_INT_STATUS (1 << 8)
-
-#define DC_HPD1_INT_CONTROL 0x1808
-#define DC_HPD2_INT_CONTROL 0x180B
-#define DC_HPD3_INT_CONTROL 0x180E
-#define DC_HPD4_INT_CONTROL 0x1811
-#define DC_HPD5_INT_CONTROL 0x1814
-#define DC_HPD6_INT_CONTROL 0x1817
-# define DC_HPDx_INT_ACK (1 << 0)
-# define DC_HPDx_INT_POLARITY (1 << 8)
-# define DC_HPDx_INT_EN (1 << 16)
-# define DC_HPDx_RX_INT_ACK (1 << 20)
-# define DC_HPDx_RX_INT_EN (1 << 24)
-
-#define DC_HPD1_CONTROL 0x1809
-#define DC_HPD2_CONTROL 0x180C
-#define DC_HPD3_CONTROL 0x180F
-#define DC_HPD4_CONTROL 0x1812
-#define DC_HPD5_CONTROL 0x1815
-#define DC_HPD6_CONTROL 0x1818
-# define DC_HPDx_CONNECTION_TIMER(x) ((x) << 0)
-# define DC_HPDx_RX_INT_TIMER(x) ((x) << 16)
-# define DC_HPDx_EN (1 << 28)
-
-#define DPG_PIPE_STUTTER_CONTROL 0x1B35
-# define STUTTER_ENABLE (1 << 0)
-
-/* 0x6e98, 0x7a98, 0x10698, 0x11298, 0x11e98, 0x12a98 */
-#define CRTC_STATUS_FRAME_COUNT 0x1BA6
-
-/* Audio clocks */
-#define DCCG_AUDIO_DTO_SOURCE 0x05ac
-# define DCCG_AUDIO_DTO0_SOURCE_SEL(x) ((x) << 0) /* crtc0 - crtc5 */
-# define DCCG_AUDIO_DTO_SEL (1 << 4) /* 0=dto0 1=dto1 */
-
-#define DCCG_AUDIO_DTO0_PHASE 0x05b0
-#define DCCG_AUDIO_DTO0_MODULE 0x05b4
-#define DCCG_AUDIO_DTO1_PHASE 0x05c0
-#define DCCG_AUDIO_DTO1_MODULE 0x05c4
-
-#define AFMT_AUDIO_SRC_CONTROL 0x1c4f
-#define AFMT_AUDIO_SRC_SELECT(x) (((x) & 7) << 0)
-/* AFMT_AUDIO_SRC_SELECT
- * 0 = stream0
- * 1 = stream1
- * 2 = stream2
- * 3 = stream3
- * 4 = stream4
- * 5 = stream5
- */
-
-#define GRBM_CNTL 0x2000
-#define GRBM_READ_TIMEOUT(x) ((x) << 0)
-
-#define GRBM_STATUS2 0x2002
-#define RLC_RQ_PENDING (1 << 0)
-#define RLC_BUSY (1 << 8)
-#define TC_BUSY (1 << 9)
-
-#define GRBM_STATUS 0x2004
-#define CMDFIFO_AVAIL_MASK 0x0000000F
-#define RING2_RQ_PENDING (1 << 4)
-#define SRBM_RQ_PENDING (1 << 5)
-#define RING1_RQ_PENDING (1 << 6)
-#define CF_RQ_PENDING (1 << 7)
-#define PF_RQ_PENDING (1 << 8)
-#define GDS_DMA_RQ_PENDING (1 << 9)
-#define GRBM_EE_BUSY (1 << 10)
-#define DB_CLEAN (1 << 12)
-#define CB_CLEAN (1 << 13)
-#define TA_BUSY (1 << 14)
-#define GDS_BUSY (1 << 15)
-#define VGT_BUSY (1 << 17)
-#define IA_BUSY_NO_DMA (1 << 18)
-#define IA_BUSY (1 << 19)
-#define SX_BUSY (1 << 20)
-#define SPI_BUSY (1 << 22)
-#define BCI_BUSY (1 << 23)
-#define SC_BUSY (1 << 24)
-#define PA_BUSY (1 << 25)
-#define DB_BUSY (1 << 26)
-#define CP_COHERENCY_BUSY (1 << 28)
-#define CP_BUSY (1 << 29)
-#define CB_BUSY (1 << 30)
-#define GUI_ACTIVE (1 << 31)
-#define GRBM_STATUS_SE0 0x2005
-#define GRBM_STATUS_SE1 0x2006
-#define SE_DB_CLEAN (1 << 1)
-#define SE_CB_CLEAN (1 << 2)
-#define SE_BCI_BUSY (1 << 22)
-#define SE_VGT_BUSY (1 << 23)
-#define SE_PA_BUSY (1 << 24)
-#define SE_TA_BUSY (1 << 25)
-#define SE_SX_BUSY (1 << 26)
-#define SE_SPI_BUSY (1 << 27)
-#define SE_SC_BUSY (1 << 29)
-#define SE_DB_BUSY (1 << 30)
-#define SE_CB_BUSY (1 << 31)
-
-#define GRBM_SOFT_RESET 0x2008
-#define SOFT_RESET_CP (1 << 0)
-#define SOFT_RESET_CB (1 << 1)
-#define SOFT_RESET_RLC (1 << 2)
-#define SOFT_RESET_DB (1 << 3)
-#define SOFT_RESET_GDS (1 << 4)
-#define SOFT_RESET_PA (1 << 5)
-#define SOFT_RESET_SC (1 << 6)
-#define SOFT_RESET_BCI (1 << 7)
-#define SOFT_RESET_SPI (1 << 8)
-#define SOFT_RESET_SX (1 << 10)
-#define SOFT_RESET_TC (1 << 11)
-#define SOFT_RESET_TA (1 << 12)
-#define SOFT_RESET_VGT (1 << 14)
-#define SOFT_RESET_IA (1 << 15)
-
-#define GRBM_GFX_INDEX 0x200B
-#define INSTANCE_INDEX(x) ((x) << 0)
-#define SH_INDEX(x) ((x) << 8)
-#define SE_INDEX(x) ((x) << 16)
-#define SH_BROADCAST_WRITES (1 << 29)
-#define INSTANCE_BROADCAST_WRITES (1 << 30)
-#define SE_BROADCAST_WRITES (1 << 31)
-
-#define GRBM_INT_CNTL 0x2018
-# define RDERR_INT_ENABLE (1 << 0)
-# define GUI_IDLE_INT_ENABLE (1 << 19)
-
-#define CP_STRMOUT_CNTL 0x213F
-#define SCRATCH_REG0 0x2140
-#define SCRATCH_REG1 0x2141
-#define SCRATCH_REG2 0x2142
-#define SCRATCH_REG3 0x2143
-#define SCRATCH_REG4 0x2144
-#define SCRATCH_REG5 0x2145
-#define SCRATCH_REG6 0x2146
-#define SCRATCH_REG7 0x2147
-
-#define SCRATCH_UMSK 0x2150
-#define SCRATCH_ADDR 0x2151
-
-#define CP_SEM_WAIT_TIMER 0x216F
-
-#define CP_SEM_INCOMPLETE_TIMER_CNTL 0x2172
-
-#define CP_ME_CNTL 0x21B6
-#define CP_CE_HALT (1 << 24)
-#define CP_PFP_HALT (1 << 26)
-#define CP_ME_HALT (1 << 28)
-
-#define CP_COHER_CNTL2 0x217A
-
-#define CP_RB2_RPTR 0x21BE
-#define CP_RB1_RPTR 0x21BF
-#define CP_RB0_RPTR 0x21C0
-#define CP_RB_WPTR_DELAY 0x21C1
-
-#define CP_QUEUE_THRESHOLDS 0x21D8
-#define ROQ_IB1_START(x) ((x) << 0)
-#define ROQ_IB2_START(x) ((x) << 8)
-#define CP_MEQ_THRESHOLDS 0x21D9
-#define MEQ1_START(x) ((x) << 0)
-#define MEQ2_START(x) ((x) << 8)
-
-#define CP_PERFMON_CNTL 0x21FF
-
#define VGT_VTX_VECT_EJECT_REG 0x222C
-
-#define VGT_CACHE_INVALIDATION 0x2231
-#define CACHE_INVALIDATION(x) ((x) << 0)
-#define VC_ONLY 0
-#define TC_ONLY 1
-#define VC_AND_TC 2
-#define AUTO_INVLD_EN(x) ((x) << 6)
-#define NO_AUTO 0
-#define ES_AUTO 1
-#define GS_AUTO 2
-#define ES_AND_GS_AUTO 3
#define VGT_ESGS_RING_SIZE 0x2232
#define VGT_GSVS_RING_SIZE 0x2233
-
#define VGT_GS_VERTEX_REUSE 0x2235
-
#define VGT_PRIMITIVE_TYPE 0x2256
#define VGT_INDEX_TYPE 0x2257
-
#define VGT_NUM_INDICES 0x225C
#define VGT_NUM_INSTANCES 0x225D
-
#define VGT_TF_RING_SIZE 0x2262
-
#define VGT_HS_OFFCHIP_PARAM 0x226C
-
#define VGT_TF_MEMORY_BASE 0x226E
-#define CC_GC_SHADER_ARRAY_CONFIG 0x226F
-#define INACTIVE_CUS_MASK 0xFFFF0000
-#define INACTIVE_CUS_SHIFT 16
-#define GC_USER_SHADER_ARRAY_CONFIG 0x2270
-
-#define PA_CL_ENHANCE 0x2285
-#define CLIP_VTX_REORDER_ENA (1 << 0)
-#define NUM_CLIP_SEQ(x) ((x) << 1)
-
-#define PA_SU_LINE_STIPPLE_VALUE 0x2298
-
-#define PA_SC_LINE_STIPPLE_STATE 0x22C4
-
-#define PA_SC_FORCE_EOV_MAX_CNTS 0x22C9
-#define FORCE_EOV_MAX_CLK_CNT(x) ((x) << 0)
-#define FORCE_EOV_MAX_REZ_CNT(x) ((x) << 16)
-
-#define PA_SC_FIFO_SIZE 0x22F3
-#define SC_FRONTEND_PRIM_FIFO_SIZE(x) ((x) << 0)
-#define SC_BACKEND_PRIM_FIFO_SIZE(x) ((x) << 6)
-#define SC_HIZ_TILE_FIFO_SIZE(x) ((x) << 15)
-#define SC_EARLYZ_TILE_FIFO_SIZE(x) ((x) << 23)
-
#define PA_SC_ENHANCE 0x22FC
-#define SQ_CONFIG 0x2300
-
-#define SQC_CACHES 0x2302
-
-#define SQ_POWER_THROTTLE 0x2396
-#define MIN_POWER(x) ((x) << 0)
-#define MIN_POWER_MASK (0x3fff << 0)
-#define MIN_POWER_SHIFT 0
-#define MAX_POWER(x) ((x) << 16)
-#define MAX_POWER_MASK (0x3fff << 16)
-#define MAX_POWER_SHIFT 0
-#define SQ_POWER_THROTTLE2 0x2397
-#define MAX_POWER_DELTA(x) ((x) << 0)
-#define MAX_POWER_DELTA_MASK (0x3fff << 0)
-#define MAX_POWER_DELTA_SHIFT 0
-#define STI_SIZE(x) ((x) << 16)
-#define STI_SIZE_MASK (0x3ff << 16)
-#define STI_SIZE_SHIFT 16
-#define LTI_RATIO(x) ((x) << 27)
-#define LTI_RATIO_MASK (0xf << 27)
-#define LTI_RATIO_SHIFT 27
-
-#define SX_DEBUG_1 0x2418
-
-#define SPI_STATIC_THREAD_MGMT_1 0x2438
-#define SPI_STATIC_THREAD_MGMT_2 0x2439
-#define SPI_STATIC_THREAD_MGMT_3 0x243A
-#define SPI_PS_MAX_WAVE_ID 0x243B
-
-#define SPI_CONFIG_CNTL 0x2440
-
-#define SPI_CONFIG_CNTL_1 0x244F
-#define VTX_DONE_DELAY(x) ((x) << 0)
-#define INTERP_ONE_PRIM_PER_ROW (1 << 4)
-
-#define CGTS_TCC_DISABLE 0x2452
-#define CGTS_USER_TCC_DISABLE 0x2453
-#define TCC_DISABLE_MASK 0xFFFF0000
-#define TCC_DISABLE_SHIFT 16
-#define CGTS_SM_CTRL_REG 0x2454
-#define OVERRIDE (1 << 21)
-#define LS_OVERRIDE (1 << 22)
-
-#define SPI_LB_CU_MASK 0x24D5
-
#define TA_CNTL_AUX 0x2542
-#define CC_RB_BACKEND_DISABLE 0x263D
-#define BACKEND_DISABLE(x) ((x) << 16)
-#define GB_ADDR_CONFIG 0x263E
-#define NUM_PIPES(x) ((x) << 0)
-#define NUM_PIPES_MASK 0x00000007
-#define NUM_PIPES_SHIFT 0
-#define PIPE_INTERLEAVE_SIZE(x) ((x) << 4)
-#define PIPE_INTERLEAVE_SIZE_MASK 0x00000070
-#define PIPE_INTERLEAVE_SIZE_SHIFT 4
-#define NUM_SHADER_ENGINES(x) ((x) << 12)
-#define NUM_SHADER_ENGINES_MASK 0x00003000
-#define NUM_SHADER_ENGINES_SHIFT 12
-#define SHADER_ENGINE_TILE_SIZE(x) ((x) << 16)
-#define SHADER_ENGINE_TILE_SIZE_MASK 0x00070000
-#define SHADER_ENGINE_TILE_SIZE_SHIFT 16
-#define NUM_GPUS(x) ((x) << 20)
-#define NUM_GPUS_MASK 0x00700000
-#define NUM_GPUS_SHIFT 20
-#define MULTI_GPU_TILE_SIZE(x) ((x) << 24)
-#define MULTI_GPU_TILE_SIZE_MASK 0x03000000
-#define MULTI_GPU_TILE_SIZE_SHIFT 24
-#define ROW_SIZE(x) ((x) << 28)
-#define ROW_SIZE_MASK 0x30000000
-#define ROW_SIZE_SHIFT 28
-
-#define GB_TILE_MODE0 0x2644
-# define MICRO_TILE_MODE(x) ((x) << 0)
-# define ADDR_SURF_DISPLAY_MICRO_TILING 0
-# define ADDR_SURF_THIN_MICRO_TILING 1
-# define ADDR_SURF_DEPTH_MICRO_TILING 2
-# define ARRAY_MODE(x) ((x) << 2)
-# define ARRAY_LINEAR_GENERAL 0
-# define ARRAY_LINEAR_ALIGNED 1
-# define ARRAY_1D_TILED_THIN1 2
-# define ARRAY_2D_TILED_THIN1 4
-# define PIPE_CONFIG(x) ((x) << 6)
-# define ADDR_SURF_P2 0
-# define ADDR_SURF_P4_8x16 4
-# define ADDR_SURF_P4_16x16 5
-# define ADDR_SURF_P4_16x32 6
-# define ADDR_SURF_P4_32x32 7
-# define ADDR_SURF_P8_16x16_8x16 8
-# define ADDR_SURF_P8_16x32_8x16 9
-# define ADDR_SURF_P8_32x32_8x16 10
-# define ADDR_SURF_P8_16x32_16x16 11
-# define ADDR_SURF_P8_32x32_16x16 12
-# define ADDR_SURF_P8_32x32_16x32 13
-# define ADDR_SURF_P8_32x64_32x32 14
-# define TILE_SPLIT(x) ((x) << 11)
-# define ADDR_SURF_TILE_SPLIT_64B 0
-# define ADDR_SURF_TILE_SPLIT_128B 1
-# define ADDR_SURF_TILE_SPLIT_256B 2
-# define ADDR_SURF_TILE_SPLIT_512B 3
-# define ADDR_SURF_TILE_SPLIT_1KB 4
-# define ADDR_SURF_TILE_SPLIT_2KB 5
-# define ADDR_SURF_TILE_SPLIT_4KB 6
-# define BANK_WIDTH(x) ((x) << 14)
-# define ADDR_SURF_BANK_WIDTH_1 0
-# define ADDR_SURF_BANK_WIDTH_2 1
-# define ADDR_SURF_BANK_WIDTH_4 2
-# define ADDR_SURF_BANK_WIDTH_8 3
-# define BANK_HEIGHT(x) ((x) << 16)
-# define ADDR_SURF_BANK_HEIGHT_1 0
-# define ADDR_SURF_BANK_HEIGHT_2 1
-# define ADDR_SURF_BANK_HEIGHT_4 2
-# define ADDR_SURF_BANK_HEIGHT_8 3
-# define MACRO_TILE_ASPECT(x) ((x) << 18)
-# define ADDR_SURF_MACRO_ASPECT_1 0
-# define ADDR_SURF_MACRO_ASPECT_2 1
-# define ADDR_SURF_MACRO_ASPECT_4 2
-# define ADDR_SURF_MACRO_ASPECT_8 3
-# define NUM_BANKS(x) ((x) << 20)
-# define ADDR_SURF_2_BANK 0
-# define ADDR_SURF_4_BANK 1
-# define ADDR_SURF_8_BANK 2
-# define ADDR_SURF_16_BANK 3
-#define GB_TILE_MODE1 0x2645
-#define GB_TILE_MODE2 0x2646
-#define GB_TILE_MODE3 0x2647
-#define GB_TILE_MODE4 0x2648
-#define GB_TILE_MODE5 0x2649
-#define GB_TILE_MODE6 0x264a
-#define GB_TILE_MODE7 0x264b
-#define GB_TILE_MODE8 0x264c
-#define GB_TILE_MODE9 0x264d
-#define GB_TILE_MODE10 0x264e
-#define GB_TILE_MODE11 0x264f
-#define GB_TILE_MODE12 0x2650
-#define GB_TILE_MODE13 0x2651
-#define GB_TILE_MODE14 0x2652
-#define GB_TILE_MODE15 0x2653
-#define GB_TILE_MODE16 0x2654
-#define GB_TILE_MODE17 0x2655
-#define GB_TILE_MODE18 0x2656
-#define GB_TILE_MODE19 0x2657
-#define GB_TILE_MODE20 0x2658
-#define GB_TILE_MODE21 0x2659
-#define GB_TILE_MODE22 0x265a
-#define GB_TILE_MODE23 0x265b
-#define GB_TILE_MODE24 0x265c
-#define GB_TILE_MODE25 0x265d
-#define GB_TILE_MODE26 0x265e
-#define GB_TILE_MODE27 0x265f
-#define GB_TILE_MODE28 0x2660
-#define GB_TILE_MODE29 0x2661
-#define GB_TILE_MODE30 0x2662
-#define GB_TILE_MODE31 0x2663
-
-#define CB_PERFCOUNTER0_SELECT0 0x2688
-#define CB_PERFCOUNTER0_SELECT1 0x2689
-#define CB_PERFCOUNTER1_SELECT0 0x268A
-#define CB_PERFCOUNTER1_SELECT1 0x268B
-#define CB_PERFCOUNTER2_SELECT0 0x268C
-#define CB_PERFCOUNTER2_SELECT1 0x268D
-#define CB_PERFCOUNTER3_SELECT0 0x268E
-#define CB_PERFCOUNTER3_SELECT1 0x268F
-
-#define CB_CGTT_SCLK_CTRL 0x2698
-
-#define GC_USER_RB_BACKEND_DISABLE 0x26DF
-#define BACKEND_DISABLE_MASK 0x00FF0000
-#define BACKEND_DISABLE_SHIFT 16
-
-#define TCP_CHAN_STEER_LO 0x2B03
-#define TCP_CHAN_STEER_HI 0x2B94
-
-#define CP_RB0_BASE 0x3040
-#define CP_RB0_CNTL 0x3041
-#define RB_BUFSZ(x) ((x) << 0)
-#define RB_BLKSZ(x) ((x) << 8)
-#define BUF_SWAP_32BIT (2 << 16)
-#define RB_NO_UPDATE (1 << 27)
-#define RB_RPTR_WR_ENA (1 << 31)
-
-#define CP_RB0_RPTR_ADDR 0x3043
-#define CP_RB0_RPTR_ADDR_HI 0x3044
-#define CP_RB0_WPTR 0x3045
-
-#define CP_PFP_UCODE_ADDR 0x3054
-#define CP_PFP_UCODE_DATA 0x3055
-#define CP_ME_RAM_RADDR 0x3056
-#define CP_ME_RAM_WADDR 0x3057
-#define CP_ME_RAM_DATA 0x3058
-
-#define CP_CE_UCODE_ADDR 0x305A
-#define CP_CE_UCODE_DATA 0x305B
-
-#define CP_RB1_BASE 0x3060
-#define CP_RB1_CNTL 0x3061
-#define CP_RB1_RPTR_ADDR 0x3062
-#define CP_RB1_RPTR_ADDR_HI 0x3063
-#define CP_RB1_WPTR 0x3064
-#define CP_RB2_BASE 0x3065
-#define CP_RB2_CNTL 0x3066
-#define CP_RB2_RPTR_ADDR 0x3067
-#define CP_RB2_RPTR_ADDR_HI 0x3068
-#define CP_RB2_WPTR 0x3069
-#define CP_INT_CNTL_RING0 0x306A
-#define CP_INT_CNTL_RING1 0x306B
-#define CP_INT_CNTL_RING2 0x306C
-# define CNTX_BUSY_INT_ENABLE (1 << 19)
-# define CNTX_EMPTY_INT_ENABLE (1 << 20)
-# define WAIT_MEM_SEM_INT_ENABLE (1 << 21)
-# define TIME_STAMP_INT_ENABLE (1 << 26)
-# define CP_RINGID2_INT_ENABLE (1 << 29)
-# define CP_RINGID1_INT_ENABLE (1 << 30)
-# define CP_RINGID0_INT_ENABLE (1 << 31)
-#define CP_INT_STATUS_RING0 0x306D
-#define CP_INT_STATUS_RING1 0x306E
-#define CP_INT_STATUS_RING2 0x306F
-# define WAIT_MEM_SEM_INT_STAT (1 << 21)
-# define TIME_STAMP_INT_STAT (1 << 26)
-# define CP_RINGID2_INT_STAT (1 << 29)
-# define CP_RINGID1_INT_STAT (1 << 30)
-# define CP_RINGID0_INT_STAT (1 << 31)
-
-#define CP_MEM_SLP_CNTL 0x3079
-# define CP_MEM_LS_EN (1 << 0)
-
-#define CP_DEBUG 0x307F
-
-#define RLC_CNTL 0x30C0
-# define RLC_ENABLE (1 << 0)
-#define RLC_RL_BASE 0x30C1
-#define RLC_RL_SIZE 0x30C2
-#define RLC_LB_CNTL 0x30C3
-# define LOAD_BALANCE_ENABLE (1 << 0)
-#define RLC_SAVE_AND_RESTORE_BASE 0x30C4
-#define RLC_LB_CNTR_MAX 0x30C5
-#define RLC_LB_CNTR_INIT 0x30C6
-
-#define RLC_CLEAR_STATE_RESTORE_BASE 0x30C8
-
-#define RLC_UCODE_ADDR 0x30CB
-#define RLC_UCODE_DATA 0x30CC
-
-#define RLC_GPU_CLOCK_COUNT_LSB 0x30CE
-#define RLC_GPU_CLOCK_COUNT_MSB 0x30CF
-#define RLC_CAPTURE_GPU_CLOCK_COUNT 0x30D0
-#define RLC_MC_CNTL 0x30D1
-#define RLC_UCODE_CNTL 0x30D2
-#define RLC_STAT 0x30D3
-# define RLC_BUSY_STATUS (1 << 0)
-# define GFX_POWER_STATUS (1 << 1)
-# define GFX_CLOCK_STATUS (1 << 2)
-# define GFX_LS_STATUS (1 << 3)
-
-#define RLC_PG_CNTL 0x30D7
-# define GFX_PG_ENABLE (1 << 0)
-# define GFX_PG_SRC (1 << 1)
-
-#define RLC_CGTT_MGCG_OVERRIDE 0x3100
-#define RLC_CGCG_CGLS_CTRL 0x3101
-# define CGCG_EN (1 << 0)
-# define CGLS_EN (1 << 1)
-
-#define RLC_TTOP_D 0x3105
-# define RLC_PUD(x) ((x) << 0)
-# define RLC_PUD_MASK (0xff << 0)
-# define RLC_PDD(x) ((x) << 8)
-# define RLC_PDD_MASK (0xff << 8)
-# define RLC_TTPD(x) ((x) << 16)
-# define RLC_TTPD_MASK (0xff << 16)
-# define RLC_MSD(x) ((x) << 24)
-# define RLC_MSD_MASK (0xff << 24)
-
-#define RLC_LB_INIT_CU_MASK 0x3107
-
-#define RLC_PG_AO_CU_MASK 0x310B
-#define RLC_MAX_PG_CU 0x310C
-# define MAX_PU_CU(x) ((x) << 0)
-# define MAX_PU_CU_MASK (0xff << 0)
-#define RLC_AUTO_PG_CTRL 0x310C
-# define AUTO_PG_EN (1 << 0)
-# define GRBM_REG_SGIT(x) ((x) << 3)
-# define GRBM_REG_SGIT_MASK (0xffff << 3)
-# define PG_AFTER_GRBM_REG_ST(x) ((x) << 19)
-# define PG_AFTER_GRBM_REG_ST_MASK (0x1fff << 19)
-
-#define RLC_SERDES_WR_MASTER_MASK_0 0x3115
-#define RLC_SERDES_WR_MASTER_MASK_1 0x3116
-#define RLC_SERDES_WR_CTRL 0x3117
-
-#define RLC_SERDES_MASTER_BUSY_0 0x3119
-#define RLC_SERDES_MASTER_BUSY_1 0x311A
-
-#define RLC_GCPM_GENERAL_3 0x311E
-
-#define DB_RENDER_CONTROL 0xA000
-
-#define DB_DEPTH_INFO 0xA00F
-
-#define PA_SC_RASTER_CONFIG 0xA0D4
-# define RB_MAP_PKR0(x) ((x) << 0)
-# define RB_MAP_PKR0_MASK (0x3 << 0)
-# define RB_MAP_PKR1(x) ((x) << 2)
-# define RB_MAP_PKR1_MASK (0x3 << 2)
-# define RASTER_CONFIG_RB_MAP_0 0
-# define RASTER_CONFIG_RB_MAP_1 1
-# define RASTER_CONFIG_RB_MAP_2 2
-# define RASTER_CONFIG_RB_MAP_3 3
+// #define PA_SC_RASTER_CONFIG 0xA0D4
# define RB_XSEL2(x) ((x) << 4)
# define RB_XSEL2_MASK (0x3 << 4)
# define RB_XSEL (1 << 6)
# define RB_YSEL (1 << 7)
# define PKR_MAP(x) ((x) << 8)
-# define PKR_MAP_MASK (0x3 << 8)
-# define RASTER_CONFIG_PKR_MAP_0 0
-# define RASTER_CONFIG_PKR_MAP_1 1
-# define RASTER_CONFIG_PKR_MAP_2 2
-# define RASTER_CONFIG_PKR_MAP_3 3
# define PKR_XSEL(x) ((x) << 10)
# define PKR_XSEL_MASK (0x3 << 10)
# define PKR_YSEL(x) ((x) << 12)
@@ -1426,221 +306,19 @@
# define SC_YSEL(x) ((x) << 20)
# define SC_YSEL_MASK (0x3 << 20)
# define SE_MAP(x) ((x) << 24)
-# define SE_MAP_MASK (0x3 << 24)
-# define RASTER_CONFIG_SE_MAP_0 0
-# define RASTER_CONFIG_SE_MAP_1 1
-# define RASTER_CONFIG_SE_MAP_2 2
-# define RASTER_CONFIG_SE_MAP_3 3
# define SE_XSEL(x) ((x) << 26)
# define SE_XSEL_MASK (0x3 << 26)
# define SE_YSEL(x) ((x) << 28)
# define SE_YSEL_MASK (0x3 << 28)
-
-#define VGT_EVENT_INITIATOR 0xA2A4
-# define SAMPLE_STREAMOUTSTATS1 (1 << 0)
-# define SAMPLE_STREAMOUTSTATS2 (2 << 0)
-# define SAMPLE_STREAMOUTSTATS3 (3 << 0)
-# define CACHE_FLUSH_TS (4 << 0)
-# define CACHE_FLUSH (6 << 0)
-# define CS_PARTIAL_FLUSH (7 << 0)
-# define VGT_STREAMOUT_RESET (10 << 0)
-# define END_OF_PIPE_INCR_DE (11 << 0)
-# define END_OF_PIPE_IB_END (12 << 0)
-# define RST_PIX_CNT (13 << 0)
-# define VS_PARTIAL_FLUSH (15 << 0)
-# define PS_PARTIAL_FLUSH (16 << 0)
-# define CACHE_FLUSH_AND_INV_TS_EVENT (20 << 0)
-# define ZPASS_DONE (21 << 0)
-# define CACHE_FLUSH_AND_INV_EVENT (22 << 0)
-# define PERFCOUNTER_START (23 << 0)
-# define PERFCOUNTER_STOP (24 << 0)
-# define PIPELINESTAT_START (25 << 0)
-# define PIPELINESTAT_STOP (26 << 0)
-# define PERFCOUNTER_SAMPLE (27 << 0)
-# define SAMPLE_PIPELINESTAT (30 << 0)
-# define SAMPLE_STREAMOUTSTATS (32 << 0)
-# define RESET_VTX_CNT (33 << 0)
-# define VGT_FLUSH (36 << 0)
-# define BOTTOM_OF_PIPE_TS (40 << 0)
-# define DB_CACHE_FLUSH_AND_INV (42 << 0)
-# define FLUSH_AND_INV_DB_DATA_TS (43 << 0)
-# define FLUSH_AND_INV_DB_META (44 << 0)
-# define FLUSH_AND_INV_CB_DATA_TS (45 << 0)
-# define FLUSH_AND_INV_CB_META (46 << 0)
-# define CS_DONE (47 << 0)
-# define PS_DONE (48 << 0)
-# define FLUSH_AND_INV_CB_PIXEL_DATA (49 << 0)
-# define THREAD_TRACE_START (51 << 0)
-# define THREAD_TRACE_STOP (52 << 0)
-# define THREAD_TRACE_FLUSH (54 << 0)
-# define THREAD_TRACE_FINISH (55 << 0)
-
-/* PIF PHY0 registers idx/data 0x8/0xc */
-#define PB0_PIF_CNTL 0x10
-# define LS2_EXIT_TIME(x) ((x) << 17)
-# define LS2_EXIT_TIME_MASK (0x7 << 17)
-# define LS2_EXIT_TIME_SHIFT 17
-#define PB0_PIF_PAIRING 0x11
-# define MULTI_PIF (1 << 25)
-#define PB0_PIF_PWRDOWN_0 0x12
-# define PLL_POWER_STATE_IN_TXS2_0(x) ((x) << 7)
-# define PLL_POWER_STATE_IN_TXS2_0_MASK (0x7 << 7)
-# define PLL_POWER_STATE_IN_TXS2_0_SHIFT 7
-# define PLL_POWER_STATE_IN_OFF_0(x) ((x) << 10)
-# define PLL_POWER_STATE_IN_OFF_0_MASK (0x7 << 10)
-# define PLL_POWER_STATE_IN_OFF_0_SHIFT 10
-# define PLL_RAMP_UP_TIME_0(x) ((x) << 24)
-# define PLL_RAMP_UP_TIME_0_MASK (0x7 << 24)
-# define PLL_RAMP_UP_TIME_0_SHIFT 24
-#define PB0_PIF_PWRDOWN_1 0x13
-# define PLL_POWER_STATE_IN_TXS2_1(x) ((x) << 7)
-# define PLL_POWER_STATE_IN_TXS2_1_MASK (0x7 << 7)
-# define PLL_POWER_STATE_IN_TXS2_1_SHIFT 7
-# define PLL_POWER_STATE_IN_OFF_1(x) ((x) << 10)
-# define PLL_POWER_STATE_IN_OFF_1_MASK (0x7 << 10)
-# define PLL_POWER_STATE_IN_OFF_1_SHIFT 10
-# define PLL_RAMP_UP_TIME_1(x) ((x) << 24)
-# define PLL_RAMP_UP_TIME_1_MASK (0x7 << 24)
-# define PLL_RAMP_UP_TIME_1_SHIFT 24
-
-#define PB0_PIF_PWRDOWN_2 0x17
-# define PLL_POWER_STATE_IN_TXS2_2(x) ((x) << 7)
-# define PLL_POWER_STATE_IN_TXS2_2_MASK (0x7 << 7)
-# define PLL_POWER_STATE_IN_TXS2_2_SHIFT 7
-# define PLL_POWER_STATE_IN_OFF_2(x) ((x) << 10)
-# define PLL_POWER_STATE_IN_OFF_2_MASK (0x7 << 10)
-# define PLL_POWER_STATE_IN_OFF_2_SHIFT 10
-# define PLL_RAMP_UP_TIME_2(x) ((x) << 24)
-# define PLL_RAMP_UP_TIME_2_MASK (0x7 << 24)
-# define PLL_RAMP_UP_TIME_2_SHIFT 24
-#define PB0_PIF_PWRDOWN_3 0x18
-# define PLL_POWER_STATE_IN_TXS2_3(x) ((x) << 7)
-# define PLL_POWER_STATE_IN_TXS2_3_MASK (0x7 << 7)
-# define PLL_POWER_STATE_IN_TXS2_3_SHIFT 7
-# define PLL_POWER_STATE_IN_OFF_3(x) ((x) << 10)
-# define PLL_POWER_STATE_IN_OFF_3_MASK (0x7 << 10)
-# define PLL_POWER_STATE_IN_OFF_3_SHIFT 10
-# define PLL_RAMP_UP_TIME_3(x) ((x) << 24)
-# define PLL_RAMP_UP_TIME_3_MASK (0x7 << 24)
-# define PLL_RAMP_UP_TIME_3_SHIFT 24
-/* PIF PHY1 registers idx/data 0x10/0x14 */
-#define PB1_PIF_CNTL 0x10
-#define PB1_PIF_PAIRING 0x11
-#define PB1_PIF_PWRDOWN_0 0x12
-#define PB1_PIF_PWRDOWN_1 0x13
-
-#define PB1_PIF_PWRDOWN_2 0x17
-#define PB1_PIF_PWRDOWN_3 0x18
-/* PCIE registers idx/data 0x30/0x34 */
-#define PCIE_CNTL2 0x1c /* PCIE */
-# define SLV_MEM_LS_EN (1 << 16)
-# define SLV_MEM_AGGRESSIVE_LS_EN (1 << 17)
-# define MST_MEM_LS_EN (1 << 18)
-# define REPLAY_MEM_LS_EN (1 << 19)
-#define PCIE_LC_STATUS1 0x28 /* PCIE */
-# define LC_REVERSE_RCVR (1 << 0)
-# define LC_REVERSE_XMIT (1 << 1)
-# define LC_OPERATING_LINK_WIDTH_MASK (0x7 << 2)
-# define LC_OPERATING_LINK_WIDTH_SHIFT 2
-# define LC_DETECTED_LINK_WIDTH_MASK (0x7 << 5)
-# define LC_DETECTED_LINK_WIDTH_SHIFT 5
-
-#define PCIE_P_CNTL 0x40 /* PCIE */
-# define P_IGNORE_EDB_ERR (1 << 6)
-
/* PCIE PORT registers idx/data 0x38/0x3c */
-#define PCIE_LC_CNTL 0xa0
-# define LC_L0S_INACTIVITY(x) ((x) << 8)
-# define LC_L0S_INACTIVITY_MASK (0xf << 8)
-# define LC_L0S_INACTIVITY_SHIFT 8
-# define LC_L1_INACTIVITY(x) ((x) << 12)
-# define LC_L1_INACTIVITY_MASK (0xf << 12)
-# define LC_L1_INACTIVITY_SHIFT 12
-# define LC_PMI_TO_L1_DIS (1 << 16)
-# define LC_ASPM_TO_L1_DIS (1 << 24)
-#define PCIE_LC_LINK_WIDTH_CNTL 0xa2 /* PCIE_P */
-# define LC_LINK_WIDTH_SHIFT 0
-# define LC_LINK_WIDTH_MASK 0x7
+// #define PCIE_LC_LINK_WIDTH_CNTL 0xa2 /* PCIE_P */
# define LC_LINK_WIDTH_X0 0
# define LC_LINK_WIDTH_X1 1
# define LC_LINK_WIDTH_X2 2
# define LC_LINK_WIDTH_X4 3
# define LC_LINK_WIDTH_X8 4
# define LC_LINK_WIDTH_X16 6
-# define LC_LINK_WIDTH_RD_SHIFT 4
-# define LC_LINK_WIDTH_RD_MASK 0x70
-# define LC_RECONFIG_ARC_MISSING_ESCAPE (1 << 7)
-# define LC_RECONFIG_NOW (1 << 8)
-# define LC_RENEGOTIATION_SUPPORT (1 << 9)
-# define LC_RENEGOTIATE_EN (1 << 10)
-# define LC_SHORT_RECONFIG_EN (1 << 11)
-# define LC_UPCONFIGURE_SUPPORT (1 << 12)
-# define LC_UPCONFIGURE_DIS (1 << 13)
-# define LC_DYN_LANES_PWR_STATE(x) ((x) << 21)
-# define LC_DYN_LANES_PWR_STATE_MASK (0x3 << 21)
-# define LC_DYN_LANES_PWR_STATE_SHIFT 21
-#define PCIE_LC_N_FTS_CNTL 0xa3 /* PCIE_P */
-# define LC_XMIT_N_FTS(x) ((x) << 0)
-# define LC_XMIT_N_FTS_MASK (0xff << 0)
-# define LC_XMIT_N_FTS_SHIFT 0
-# define LC_XMIT_N_FTS_OVERRIDE_EN (1 << 8)
-# define LC_N_FTS_MASK (0xff << 24)
-#define PCIE_LC_SPEED_CNTL 0xa4 /* PCIE_P */
-# define LC_GEN2_EN_STRAP (1 << 0)
-# define LC_GEN3_EN_STRAP (1 << 1)
-# define LC_TARGET_LINK_SPEED_OVERRIDE_EN (1 << 2)
-# define LC_TARGET_LINK_SPEED_OVERRIDE_MASK (0x3 << 3)
-# define LC_TARGET_LINK_SPEED_OVERRIDE_SHIFT 3
-# define LC_FORCE_EN_SW_SPEED_CHANGE (1 << 5)
-# define LC_FORCE_DIS_SW_SPEED_CHANGE (1 << 6)
-# define LC_FORCE_EN_HW_SPEED_CHANGE (1 << 7)
-# define LC_FORCE_DIS_HW_SPEED_CHANGE (1 << 8)
-# define LC_INITIATE_LINK_SPEED_CHANGE (1 << 9)
-# define LC_SPEED_CHANGE_ATTEMPTS_ALLOWED_MASK (0x3 << 10)
-# define LC_SPEED_CHANGE_ATTEMPTS_ALLOWED_SHIFT 10
-# define LC_CURRENT_DATA_RATE_MASK (0x3 << 13) /* 0/1/2 = gen1/2/3 */
-# define LC_CURRENT_DATA_RATE_SHIFT 13
-# define LC_CLR_FAILED_SPD_CHANGE_CNT (1 << 16)
-# define LC_OTHER_SIDE_EVER_SENT_GEN2 (1 << 18)
-# define LC_OTHER_SIDE_SUPPORTS_GEN2 (1 << 19)
-# define LC_OTHER_SIDE_EVER_SENT_GEN3 (1 << 20)
-# define LC_OTHER_SIDE_SUPPORTS_GEN3 (1 << 21)
-
-#define PCIE_LC_CNTL2 0xb1
-# define LC_ALLOW_PDWN_IN_L1 (1 << 17)
-# define LC_ALLOW_PDWN_IN_L23 (1 << 18)
-
-#define PCIE_LC_CNTL3 0xb5 /* PCIE_P */
-# define LC_GO_TO_RECOVERY (1 << 30)
-#define PCIE_LC_CNTL4 0xb6 /* PCIE_P */
-# define LC_REDO_EQ (1 << 5)
-# define LC_SET_QUIESCE (1 << 13)
-
-/*
- * UVD
- */
-#define UVD_UDEC_ADDR_CONFIG 0x3bd3
-#define UVD_UDEC_DB_ADDR_CONFIG 0x3bd4
-#define UVD_UDEC_DBW_ADDR_CONFIG 0x3bd5
-#define UVD_RBC_RB_RPTR 0x3da4
-#define UVD_RBC_RB_WPTR 0x3da5
-#define UVD_STATUS 0x3daf
-
-#define UVD_CGC_CTRL 0x3dc2
-# define DCM (1 << 0)
-# define CG_DT(x) ((x) << 2)
-# define CG_DT_MASK (0xf << 2)
-# define CLK_OD(x) ((x) << 6)
-# define CLK_OD_MASK (0x1f << 6)
-
- /* UVD CTX indirect */
-#define UVD_CGC_MEM_CTRL 0xC0
-#define UVD_CGC_CTRL2 0xC1
-# define DYN_OR_EN (1 << 0)
-# define DYN_RR_EN (1 << 1)
-# define G_DIV_ID(x) ((x) << 2)
-# define G_DIV_ID_MASK (0x7 << 2)
/*
* PM4
@@ -1874,45 +552,7 @@
/* ASYNC DMA - first instance at 0xd000, second at 0xd800 */
#define DMA0_REGISTER_OFFSET 0x0 /* not a register */
#define DMA1_REGISTER_OFFSET 0x200 /* not a register */
-
-#define DMA_RB_CNTL 0x3400
-# define DMA_RB_ENABLE (1 << 0)
-# define DMA_RB_SIZE(x) ((x) << 1) /* log2 */
-# define DMA_RB_SWAP_ENABLE (1 << 9) /* 8IN32 */
-# define DMA_RPTR_WRITEBACK_ENABLE (1 << 12)
-# define DMA_RPTR_WRITEBACK_SWAP_ENABLE (1 << 13) /* 8IN32 */
-# define DMA_RPTR_WRITEBACK_TIMER(x) ((x) << 16) /* log2 */
-#define DMA_RB_BASE 0x3401
-#define DMA_RB_RPTR 0x3402
-#define DMA_RB_WPTR 0x3403
-
-#define DMA_RB_RPTR_ADDR_HI 0x3407
-#define DMA_RB_RPTR_ADDR_LO 0x3408
-
-#define DMA_IB_CNTL 0x3409
-# define DMA_IB_ENABLE (1 << 0)
-# define DMA_IB_SWAP_ENABLE (1 << 4)
-# define CMD_VMID_FORCE (1 << 31)
-#define DMA_IB_RPTR 0x340a
-#define DMA_CNTL 0x340b
-# define TRAP_ENABLE (1 << 0)
-# define SEM_INCOMPLETE_INT_ENABLE (1 << 1)
-# define SEM_WAIT_INT_ENABLE (1 << 2)
-# define DATA_SWAP_ENABLE (1 << 3)
-# define FENCE_SWAP_ENABLE (1 << 4)
-# define CTXEMPTY_INT_ENABLE (1 << 28)
-#define DMA_STATUS_REG 0x340d
-# define DMA_IDLE (1 << 0)
-#define DMA_TILING_CONFIG 0x342e
-
-#define DMA_POWER_CNTL 0x342f
-# define MEM_POWER_OVERRIDE (1 << 8)
-#define DMA_CLK_CTRL 0x3430
-
-#define DMA_PG 0x3435
-# define PG_CNTL_ENABLE (1 << 0)
-#define DMA_PGFSM_CONFIG 0x3436
-#define DMA_PGFSM_WRITE 0x3437
+#define SDMA_MAX_INSTANCE 2
#define DMA_PACKET(cmd, b, t, s, n) ((((cmd) & 0xF) << 28) | \
(((b) & 0x1) << 26) | \
@@ -1941,45 +581,7 @@
#define DMA_PACKET_POLL_REG_MEM 0xe
#define DMA_PACKET_NOP 0xf
-#define VCE_STATUS 0x20004
-#define VCE_VCPU_CNTL 0x20014
-#define VCE_CLK_EN (1 << 0)
-#define VCE_VCPU_CACHE_OFFSET0 0x20024
-#define VCE_VCPU_CACHE_SIZE0 0x20028
-#define VCE_VCPU_CACHE_OFFSET1 0x2002c
-#define VCE_VCPU_CACHE_SIZE1 0x20030
-#define VCE_VCPU_CACHE_OFFSET2 0x20034
-#define VCE_VCPU_CACHE_SIZE2 0x20038
-#define VCE_SOFT_RESET 0x20120
-#define VCE_ECPU_SOFT_RESET (1 << 0)
-#define VCE_FME_SOFT_RESET (1 << 2)
-#define VCE_RB_BASE_LO2 0x2016c
-#define VCE_RB_BASE_HI2 0x20170
-#define VCE_RB_SIZE2 0x20174
-#define VCE_RB_RPTR2 0x20178
-#define VCE_RB_WPTR2 0x2017c
-#define VCE_RB_BASE_LO 0x20180
-#define VCE_RB_BASE_HI 0x20184
-#define VCE_RB_SIZE 0x20188
-#define VCE_RB_RPTR 0x2018c
-#define VCE_RB_WPTR 0x20190
-#define VCE_CLOCK_GATING_A 0x202f8
-#define VCE_CLOCK_GATING_B 0x202fc
-#define VCE_UENC_CLOCK_GATING 0x205bc
-#define VCE_UENC_REG_CLOCK_GATING 0x205c0
-#define VCE_FW_REG_STATUS 0x20e10
-# define VCE_FW_REG_STATUS_BUSY (1 << 0)
-# define VCE_FW_REG_STATUS_PASS (1 << 3)
-# define VCE_FW_REG_STATUS_DONE (1 << 11)
-#define VCE_LMI_FW_START_KEYSEL 0x20e18
-#define VCE_LMI_FW_PERIODIC_CTRL 0x20e20
-#define VCE_LMI_CTRL2 0x20e74
-#define VCE_LMI_CTRL 0x20e98
-#define VCE_LMI_VM_CTRL 0x20ea0
-#define VCE_LMI_SWAP_CNTL 0x20eb4
-#define VCE_LMI_SWAP_CNTL1 0x20eb8
-#define VCE_LMI_CACHE_CTRL 0x20ef4
-
+/* VCE */
#define VCE_CMD_NO_OP 0x00000000
#define VCE_CMD_END 0x00000001
#define VCE_CMD_IB 0x00000002
@@ -1988,434 +590,146 @@
#define VCE_CMD_IB_AUTO 0x00000005
#define VCE_CMD_SEMAPHORE 0x00000006
-
//#dce stupp
/* display controller offsets used for crtc/cur/lut/grph/viewport/etc. */
-#define SI_CRTC0_REGISTER_OFFSET 0 //(0x6df0 - 0x6df0)/4
-#define SI_CRTC1_REGISTER_OFFSET 0x300 //(0x79f0 - 0x6df0)/4
-#define SI_CRTC2_REGISTER_OFFSET 0x2600 //(0x105f0 - 0x6df0)/4
-#define SI_CRTC3_REGISTER_OFFSET 0x2900 //(0x111f0 - 0x6df0)/4
-#define SI_CRTC4_REGISTER_OFFSET 0x2c00 //(0x11df0 - 0x6df0)/4
-#define SI_CRTC5_REGISTER_OFFSET 0x2f00 //(0x129f0 - 0x6df0)/4
+#define CRTC0_REGISTER_OFFSET (0x1b7c - 0x1b7c) //(0x6df0 - 0x6df0)/4
+#define CRTC1_REGISTER_OFFSET (0x1e7c - 0x1b7c) //(0x79f0 - 0x6df0)/4
+#define CRTC2_REGISTER_OFFSET (0x417c - 0x1b7c) //(0x105f0 - 0x6df0)/4
+#define CRTC3_REGISTER_OFFSET (0x447c - 0x1b7c) //(0x111f0 - 0x6df0)/4
+#define CRTC4_REGISTER_OFFSET (0x477c - 0x1b7c) //(0x11df0 - 0x6df0)/4
+#define CRTC5_REGISTER_OFFSET (0x4a7c - 0x1b7c) //(0x129f0 - 0x6df0)/4
+
+/* hpd instance offsets */
+#define HPD0_REGISTER_OFFSET (0x1807 - 0x1807)
+#define HPD1_REGISTER_OFFSET (0x180a - 0x1807)
+#define HPD2_REGISTER_OFFSET (0x180d - 0x1807)
+#define HPD3_REGISTER_OFFSET (0x1810 - 0x1807)
+#define HPD4_REGISTER_OFFSET (0x1813 - 0x1807)
+#define HPD5_REGISTER_OFFSET (0x1816 - 0x1807)
+
+/* audio endpt instance offsets */
+#define AUD0_REGISTER_OFFSET (0x1780 - 0x1780)
+#define AUD1_REGISTER_OFFSET (0x1786 - 0x1780)
+#define AUD2_REGISTER_OFFSET (0x178c - 0x1780)
+#define AUD3_REGISTER_OFFSET (0x1792 - 0x1780)
+#define AUD4_REGISTER_OFFSET (0x1798 - 0x1780)
+#define AUD5_REGISTER_OFFSET (0x179d - 0x1780)
+#define AUD6_REGISTER_OFFSET (0x17a4 - 0x1780)
#define CURSOR_WIDTH 64
#define CURSOR_HEIGHT 64
-#define AMDGPU_MM_INDEX 0x0000
-#define AMDGPU_MM_DATA 0x0001
-
-#define VERDE_NUM_CRTC 6
-#define BLACKOUT_MODE_MASK 0x00000007
-#define VGA_RENDER_CONTROL 0xC0
-#define R_000300_VGA_RENDER_CONTROL 0xC0
-#define C_000300_VGA_VSTATUS_CNTL 0xFFFCFFFF
-#define EVERGREEN_CRTC_STATUS 0x1BA3
-#define EVERGREEN_CRTC_V_BLANK (1 << 0)
-#define EVERGREEN_CRTC_STATUS_POSITION 0x1BA4
-/* CRTC blocks at 0x6df0, 0x79f0, 0x105f0, 0x111f0, 0x11df0, 0x129f0 */
-#define EVERGREEN_CRTC_V_BLANK_START_END 0x1b8d
-#define EVERGREEN_CRTC_CONTROL 0x1b9c
-#define EVERGREEN_CRTC_MASTER_EN (1 << 0)
-#define EVERGREEN_CRTC_DISP_READ_REQUEST_DISABLE (1 << 24)
-#define EVERGREEN_CRTC_BLANK_CONTROL 0x1b9d
-#define EVERGREEN_CRTC_BLANK_DATA_EN (1 << 8)
-#define EVERGREEN_CRTC_V_BLANK (1 << 0)
-#define EVERGREEN_CRTC_STATUS_HV_COUNT 0x1ba8
-#define EVERGREEN_CRTC_UPDATE_LOCK 0x1bb5
-#define EVERGREEN_MASTER_UPDATE_LOCK 0x1bbd
-#define EVERGREEN_MASTER_UPDATE_MODE 0x1bbe
-#define EVERGREEN_GRPH_UPDATE_LOCK (1 << 16)
-#define EVERGREEN_GRPH_PRIMARY_SURFACE_ADDRESS_HIGH 0x1a07
-#define EVERGREEN_GRPH_SECONDARY_SURFACE_ADDRESS_HIGH 0x1a08
-#define EVERGREEN_GRPH_PRIMARY_SURFACE_ADDRESS 0x1a04
-#define EVERGREEN_GRPH_SECONDARY_SURFACE_ADDRESS 0x1a05
-#define EVERGREEN_GRPH_UPDATE 0x1a11
-#define EVERGREEN_VGA_MEMORY_BASE_ADDRESS 0xc4
-#define EVERGREEN_VGA_MEMORY_BASE_ADDRESS_HIGH 0xc9
-#define EVERGREEN_GRPH_SURFACE_UPDATE_PENDING (1 << 2)
-
-#define EVERGREEN_DATA_FORMAT 0x1ac0
-# define EVERGREEN_INTERLEAVE_EN (1 << 0)
-
-#define MC_SHARED_CHMAP__NOOFCHAN_MASK 0xf000
-#define MC_SHARED_CHMAP__NOOFCHAN__SHIFT 0xc
-
-#define R600_D1GRPH_ARRAY_MODE_LINEAR_GENERAL (0 << 20)
-#define R600_D1GRPH_ARRAY_MODE_LINEAR_ALIGNED (1 << 20)
-#define R600_D1GRPH_ARRAY_MODE_1D_TILED_THIN1 (2 << 20)
-#define R600_D1GRPH_ARRAY_MODE_2D_TILED_THIN1 (4 << 20)
-
-#define R700_D1GRPH_PRIMARY_SURFACE_ADDRESS_HIGH 0x1a45
-#define R700_D2GRPH_PRIMARY_SURFACE_ADDRESS_HIGH 0x1845
-
-#define R700_D2GRPH_SECONDARY_SURFACE_ADDRESS_HIGH 0x1847
-#define R700_D1GRPH_SECONDARY_SURFACE_ADDRESS_HIGH 0x1a47
-
-#define DISP_INTERRUPT_STATUS__LB_D1_VBLANK_INTERRUPT_MASK 0x8
-#define DISP_INTERRUPT_STATUS_CONTINUE__LB_D2_VBLANK_INTERRUPT_MASK 0x8
-#define DISP_INTERRUPT_STATUS_CONTINUE2__LB_D3_VBLANK_INTERRUPT_MASK 0x8
-#define DISP_INTERRUPT_STATUS_CONTINUE3__LB_D4_VBLANK_INTERRUPT_MASK 0x8
-#define DISP_INTERRUPT_STATUS_CONTINUE4__LB_D5_VBLANK_INTERRUPT_MASK 0x8
-#define DISP_INTERRUPT_STATUS_CONTINUE5__LB_D6_VBLANK_INTERRUPT_MASK 0x8
-
-#define DISP_INTERRUPT_STATUS__LB_D1_VLINE_INTERRUPT_MASK 0x4
-#define DISP_INTERRUPT_STATUS_CONTINUE__LB_D2_VLINE_INTERRUPT_MASK 0x4
-#define DISP_INTERRUPT_STATUS_CONTINUE2__LB_D3_VLINE_INTERRUPT_MASK 0x4
-#define DISP_INTERRUPT_STATUS_CONTINUE3__LB_D4_VLINE_INTERRUPT_MASK 0x4
-#define DISP_INTERRUPT_STATUS_CONTINUE4__LB_D5_VLINE_INTERRUPT_MASK 0x4
-#define DISP_INTERRUPT_STATUS_CONTINUE5__LB_D6_VLINE_INTERRUPT_MASK 0x4
-
-#define DISP_INTERRUPT_STATUS__DC_HPD1_INTERRUPT_MASK 0x20000
-#define DISP_INTERRUPT_STATUS_CONTINUE__DC_HPD2_INTERRUPT_MASK 0x20000
-#define DISP_INTERRUPT_STATUS_CONTINUE2__DC_HPD3_INTERRUPT_MASK 0x20000
-#define DISP_INTERRUPT_STATUS_CONTINUE3__DC_HPD4_INTERRUPT_MASK 0x20000
-#define DISP_INTERRUPT_STATUS_CONTINUE4__DC_HPD5_INTERRUPT_MASK 0x20000
-#define DISP_INTERRUPT_STATUS_CONTINUE5__DC_HPD6_INTERRUPT_MASK 0x20000
-
-#define GRPH_INTERRUPT_STATUS__GRPH_PFLIP_INT_OCCURRED_MASK 0x1
-#define GRPH_INTERRUPT_STATUS__GRPH_PFLIP_INT_CLEAR_MASK 0x100
-
-#define DC_HPD1_INT_CONTROL__DC_HPD1_INT_ACK_MASK 0x1
-
-#define R600_D1GRPH_SWAP_CONTROL 0x1843
-#define R600_D1GRPH_SWAP_ENDIAN_NONE (0 << 0)
-#define R600_D1GRPH_SWAP_ENDIAN_16BIT (1 << 0)
-#define R600_D1GRPH_SWAP_ENDIAN_32BIT (2 << 0)
-#define R600_D1GRPH_SWAP_ENDIAN_64BIT (3 << 0)
-
-#define AVIVO_D1VGA_CONTROL 0x00cc
-# define AVIVO_DVGA_CONTROL_MODE_ENABLE (1 << 0)
-# define AVIVO_DVGA_CONTROL_TIMING_SELECT (1 << 8)
-# define AVIVO_DVGA_CONTROL_SYNC_POLARITY_SELECT (1 << 9)
-# define AVIVO_DVGA_CONTROL_OVERSCAN_TIMING_SELECT (1 << 10)
-# define AVIVO_DVGA_CONTROL_OVERSCAN_COLOR_EN (1 << 16)
-# define AVIVO_DVGA_CONTROL_ROTATE (1 << 24)
-#define AVIVO_D2VGA_CONTROL 0x00ce
-
-#define R600_BUS_CNTL 0x1508
-# define R600_BIOS_ROM_DIS (1 << 1)
+
#define R600_ROM_CNTL 0x580
# define R600_SCK_OVERWRITE (1 << 1)
# define R600_SCK_PRESCALE_CRYSTAL_CLK_SHIFT 28
# define R600_SCK_PRESCALE_CRYSTAL_CLK_MASK (0xf << 28)
-#define GRPH_INTERRUPT_CONTROL__GRPH_PFLIP_INT_MASK_MASK 0x1
-
-#define FMT_BIT_DEPTH_CONTROL 0x1bf2
-#define FMT_TRUNCATE_EN (1 << 0)
-#define FMT_TRUNCATE_DEPTH (1 << 4)
-#define FMT_SPATIAL_DITHER_EN (1 << 8)
-#define FMT_SPATIAL_DITHER_MODE(x) ((x) << 9)
-#define FMT_SPATIAL_DITHER_DEPTH (1 << 12)
-#define FMT_FRAME_RANDOM_ENABLE (1 << 13)
-#define FMT_RGB_RANDOM_ENABLE (1 << 14)
-#define FMT_HIGHPASS_RANDOM_ENABLE (1 << 15)
-#define FMT_TEMPORAL_DITHER_EN (1 << 16)
-#define FMT_TEMPORAL_DITHER_DEPTH (1 << 20)
-#define FMT_TEMPORAL_DITHER_OFFSET(x) ((x) << 21)
-#define FMT_TEMPORAL_LEVEL (1 << 24)
-#define FMT_TEMPORAL_DITHER_RESET (1 << 25)
-#define FMT_25FRC_SEL(x) ((x) << 26)
-#define FMT_50FRC_SEL(x) ((x) << 28)
-#define FMT_75FRC_SEL(x) ((x) << 30)
-
-#define EVERGREEN_DC_LUT_CONTROL 0x1a80
-#define EVERGREEN_DC_LUT_BLACK_OFFSET_BLUE 0x1a81
-#define EVERGREEN_DC_LUT_BLACK_OFFSET_GREEN 0x1a82
-#define EVERGREEN_DC_LUT_BLACK_OFFSET_RED 0x1a83
-#define EVERGREEN_DC_LUT_WHITE_OFFSET_BLUE 0x1a84
-#define EVERGREEN_DC_LUT_WHITE_OFFSET_GREEN 0x1a85
-#define EVERGREEN_DC_LUT_WHITE_OFFSET_RED 0x1a86
-#define EVERGREEN_DC_LUT_30_COLOR 0x1a7c
-#define EVERGREEN_DC_LUT_RW_INDEX 0x1a79
-#define EVERGREEN_DC_LUT_WRITE_EN_MASK 0x1a7e
-#define EVERGREEN_DC_LUT_RW_MODE 0x1a78
-
-#define EVERGREEN_GRPH_ENABLE 0x1a00
-#define EVERGREEN_GRPH_CONTROL 0x1a01
-#define EVERGREEN_GRPH_DEPTH(x) (((x) & 0x3) << 0)
-#define EVERGREEN_GRPH_DEPTH_8BPP 0
-#define EVERGREEN_GRPH_DEPTH_16BPP 1
-#define EVERGREEN_GRPH_DEPTH_32BPP 2
-#define EVERGREEN_GRPH_NUM_BANKS(x) (((x) & 0x3) << 2)
-#define EVERGREEN_ADDR_SURF_2_BANK 0
-#define EVERGREEN_ADDR_SURF_4_BANK 1
-#define EVERGREEN_ADDR_SURF_8_BANK 2
-#define EVERGREEN_ADDR_SURF_16_BANK 3
-#define EVERGREEN_GRPH_Z(x) (((x) & 0x3) << 4)
-#define EVERGREEN_GRPH_BANK_WIDTH(x) (((x) & 0x3) << 6)
-#define EVERGREEN_ADDR_SURF_BANK_WIDTH_1 0
-#define EVERGREEN_ADDR_SURF_BANK_WIDTH_2 1
-#define EVERGREEN_ADDR_SURF_BANK_WIDTH_4 2
-#define EVERGREEN_ADDR_SURF_BANK_WIDTH_8 3
-#define EVERGREEN_GRPH_FORMAT(x) (((x) & 0x7) << 8)
-
-#define EVERGREEN_GRPH_FORMAT_INDEXED 0
-#define EVERGREEN_GRPH_FORMAT_ARGB1555 0
-#define EVERGREEN_GRPH_FORMAT_ARGB565 1
-#define EVERGREEN_GRPH_FORMAT_ARGB4444 2
-#define EVERGREEN_GRPH_FORMAT_AI88 3
-#define EVERGREEN_GRPH_FORMAT_MONO16 4
-#define EVERGREEN_GRPH_FORMAT_BGRA5551 5
+#define GRPH_ARRAY_LINEAR_GENERAL 0
+#define GRPH_ARRAY_LINEAR_ALIGNED 1
+#define GRPH_ARRAY_1D_TILED_THIN1 2
+#define GRPH_ARRAY_2D_TILED_THIN1 4
+
+#define ES_AND_GS_AUTO 3
+#define BUF_SWAP_32BIT (2 << 16)
+
+#define GRPH_DEPTH_8BPP 0
+#define GRPH_DEPTH_16BPP 1
+#define GRPH_DEPTH_32BPP 2
+
+/* 8 BPP */
+#define GRPH_FORMAT_INDEXED 0
+
+/* 16 BPP */
+#define GRPH_FORMAT_ARGB1555 0
+#define GRPH_FORMAT_ARGB565 1
+#define GRPH_FORMAT_ARGB4444 2
+#define GRPH_FORMAT_AI88 3
+#define GRPH_FORMAT_MONO16 4
+#define GRPH_FORMAT_BGRA5551 5
/* 32 BPP */
-#define EVERGREEN_GRPH_FORMAT_ARGB8888 0
-#define EVERGREEN_GRPH_FORMAT_ARGB2101010 1
-#define EVERGREEN_GRPH_FORMAT_32BPP_DIG 2
-#define EVERGREEN_GRPH_FORMAT_8B_ARGB2101010 3
-#define EVERGREEN_GRPH_FORMAT_BGRA1010102 4
-#define EVERGREEN_GRPH_FORMAT_8B_BGRA1010102 5
-#define EVERGREEN_GRPH_FORMAT_RGB111110 6
-#define EVERGREEN_GRPH_FORMAT_BGR101111 7
-#define EVERGREEN_GRPH_BANK_HEIGHT(x) (((x) & 0x3) << 11)
-#define EVERGREEN_ADDR_SURF_BANK_HEIGHT_1 0
-#define EVERGREEN_ADDR_SURF_BANK_HEIGHT_2 1
-#define EVERGREEN_ADDR_SURF_BANK_HEIGHT_4 2
-#define EVERGREEN_ADDR_SURF_BANK_HEIGHT_8 3
-#define EVERGREEN_GRPH_TILE_SPLIT(x) (((x) & 0x7) << 13)
-#define EVERGREEN_ADDR_SURF_TILE_SPLIT_64B 0
-#define EVERGREEN_ADDR_SURF_TILE_SPLIT_128B 1
-#define EVERGREEN_ADDR_SURF_TILE_SPLIT_256B 2
-#define EVERGREEN_ADDR_SURF_TILE_SPLIT_512B 3
-#define EVERGREEN_ADDR_SURF_TILE_SPLIT_1KB 4
-#define EVERGREEN_ADDR_SURF_TILE_SPLIT_2KB 5
-#define EVERGREEN_ADDR_SURF_TILE_SPLIT_4KB 6
-#define EVERGREEN_GRPH_MACRO_TILE_ASPECT(x) (((x) & 0x3) << 18)
-#define EVERGREEN_ADDR_SURF_MACRO_TILE_ASPECT_1 0
-#define EVERGREEN_ADDR_SURF_MACRO_TILE_ASPECT_2 1
-#define EVERGREEN_ADDR_SURF_MACRO_TILE_ASPECT_4 2
-#define EVERGREEN_ADDR_SURF_MACRO_TILE_ASPECT_8 3
-#define EVERGREEN_GRPH_ARRAY_MODE(x) (((x) & 0x7) << 20)
-#define EVERGREEN_GRPH_ARRAY_LINEAR_GENERAL 0
-#define EVERGREEN_GRPH_ARRAY_LINEAR_ALIGNED 1
-#define EVERGREEN_GRPH_ARRAY_1D_TILED_THIN1 2
-#define EVERGREEN_GRPH_ARRAY_2D_TILED_THIN1 4
-#define EVERGREEN_ADDR_SURF_MACRO_TILE_ASPECT_1 0
-#define EVERGREEN_ADDR_SURF_MACRO_TILE_ASPECT_2 1
-#define EVERGREEN_ADDR_SURF_MACRO_TILE_ASPECT_4 2
-#define EVERGREEN_ADDR_SURF_MACRO_TILE_ASPECT_8 3
-
-#define EVERGREEN_GRPH_SWAP_CONTROL 0x1a03
-#define EVERGREEN_GRPH_ENDIAN_SWAP(x) (((x) & 0x3) << 0)
-# define EVERGREEN_GRPH_ENDIAN_NONE 0
-# define EVERGREEN_GRPH_ENDIAN_8IN16 1
-# define EVERGREEN_GRPH_ENDIAN_8IN32 2
-# define EVERGREEN_GRPH_ENDIAN_8IN64 3
-#define EVERGREEN_GRPH_RED_CROSSBAR(x) (((x) & 0x3) << 4)
-# define EVERGREEN_GRPH_RED_SEL_R 0
-# define EVERGREEN_GRPH_RED_SEL_G 1
-# define EVERGREEN_GRPH_RED_SEL_B 2
-# define EVERGREEN_GRPH_RED_SEL_A 3
-#define EVERGREEN_GRPH_GREEN_CROSSBAR(x) (((x) & 0x3) << 6)
-# define EVERGREEN_GRPH_GREEN_SEL_G 0
-# define EVERGREEN_GRPH_GREEN_SEL_B 1
-# define EVERGREEN_GRPH_GREEN_SEL_A 2
-# define EVERGREEN_GRPH_GREEN_SEL_R 3
-#define EVERGREEN_GRPH_BLUE_CROSSBAR(x) (((x) & 0x3) << 8)
-# define EVERGREEN_GRPH_BLUE_SEL_B 0
-# define EVERGREEN_GRPH_BLUE_SEL_A 1
-# define EVERGREEN_GRPH_BLUE_SEL_R 2
-# define EVERGREEN_GRPH_BLUE_SEL_G 3
-#define EVERGREEN_GRPH_ALPHA_CROSSBAR(x) (((x) & 0x3) << 10)
-# define EVERGREEN_GRPH_ALPHA_SEL_A 0
-# define EVERGREEN_GRPH_ALPHA_SEL_R 1
-# define EVERGREEN_GRPH_ALPHA_SEL_G 2
-# define EVERGREEN_GRPH_ALPHA_SEL_B 3
-
-#define EVERGREEN_D3VGA_CONTROL 0xf8
-#define EVERGREEN_D4VGA_CONTROL 0xf9
-#define EVERGREEN_D5VGA_CONTROL 0xfa
-#define EVERGREEN_D6VGA_CONTROL 0xfb
-
-#define EVERGREEN_GRPH_SURFACE_ADDRESS_MASK 0xffffff00
-
-#define EVERGREEN_GRPH_LUT_10BIT_BYPASS_CONTROL 0x1a02
-#define EVERGREEN_LUT_10BIT_BYPASS_EN (1 << 8)
-
-#define EVERGREEN_GRPH_PITCH 0x1a06
-#define EVERGREEN_GRPH_PRIMARY_SURFACE_ADDRESS_HIGH 0x1a07
-#define EVERGREEN_GRPH_SECONDARY_SURFACE_ADDRESS_HIGH 0x1a08
-#define EVERGREEN_GRPH_SURFACE_OFFSET_X 0x1a09
-#define EVERGREEN_GRPH_SURFACE_OFFSET_Y 0x1a0a
-#define EVERGREEN_GRPH_X_START 0x1a0b
-#define EVERGREEN_GRPH_Y_START 0x1a0c
-#define EVERGREEN_GRPH_X_END 0x1a0d
-#define EVERGREEN_GRPH_Y_END 0x1a0e
-#define EVERGREEN_GRPH_UPDATE 0x1a11
-#define EVERGREEN_GRPH_SURFACE_UPDATE_PENDING (1 << 2)
-#define EVERGREEN_GRPH_UPDATE_LOCK (1 << 16)
-#define EVERGREEN_GRPH_FLIP_CONTROL 0x1a12
-#define EVERGREEN_GRPH_SURFACE_UPDATE_H_RETRACE_EN (1 << 0)
-
-#define EVERGREEN_VIEWPORT_START 0x1b5c
-#define EVERGREEN_VIEWPORT_SIZE 0x1b5d
-#define EVERGREEN_DESKTOP_HEIGHT 0x1ac1
-
-/* CUR blocks at 0x6998, 0x7598, 0x10198, 0x10d98, 0x11998, 0x12598 */
-#define EVERGREEN_CUR_CONTROL 0x1a66
-# define EVERGREEN_CURSOR_EN (1 << 0)
-# define EVERGREEN_CURSOR_MODE(x) (((x) & 0x3) << 8)
-# define EVERGREEN_CURSOR_MONO 0
-# define EVERGREEN_CURSOR_24_1 1
-# define EVERGREEN_CURSOR_24_8_PRE_MULT 2
-# define EVERGREEN_CURSOR_24_8_UNPRE_MULT 3
-# define EVERGREEN_CURSOR_2X_MAGNIFY (1 << 16)
-# define EVERGREEN_CURSOR_FORCE_MC_ON (1 << 20)
-# define EVERGREEN_CURSOR_URGENT_CONTROL(x) (((x) & 0x7) << 24)
-# define EVERGREEN_CURSOR_URGENT_ALWAYS 0
-# define EVERGREEN_CURSOR_URGENT_1_8 1
-# define EVERGREEN_CURSOR_URGENT_1_4 2
-# define EVERGREEN_CURSOR_URGENT_3_8 3
-# define EVERGREEN_CURSOR_URGENT_1_2 4
-#define EVERGREEN_CUR_SURFACE_ADDRESS 0x1a67
-# define EVERGREEN_CUR_SURFACE_ADDRESS_MASK 0xfffff000
-#define EVERGREEN_CUR_SIZE 0x1a68
-#define EVERGREEN_CUR_SURFACE_ADDRESS_HIGH 0x1a69
-#define EVERGREEN_CUR_POSITION 0x1a6a
-#define EVERGREEN_CUR_HOT_SPOT 0x1a6b
-#define EVERGREEN_CUR_COLOR1 0x1a6c
-#define EVERGREEN_CUR_COLOR2 0x1a6d
-#define EVERGREEN_CUR_UPDATE 0x1a6e
-# define EVERGREEN_CURSOR_UPDATE_PENDING (1 << 0)
-# define EVERGREEN_CURSOR_UPDATE_TAKEN (1 << 1)
-# define EVERGREEN_CURSOR_UPDATE_LOCK (1 << 16)
-# define EVERGREEN_CURSOR_DISABLE_MULTIPLE_UPDATE (1 << 24)
-
-
-#define NI_INPUT_CSC_CONTROL 0x1a35
-# define NI_INPUT_CSC_GRPH_MODE(x) (((x) & 0x3) << 0)
-# define NI_INPUT_CSC_BYPASS 0
-# define NI_INPUT_CSC_PROG_COEFF 1
-# define NI_INPUT_CSC_PROG_SHARED_MATRIXA 2
-# define NI_INPUT_CSC_OVL_MODE(x) (((x) & 0x3) << 4)
-
-#define NI_OUTPUT_CSC_CONTROL 0x1a3c
-# define NI_OUTPUT_CSC_GRPH_MODE(x) (((x) & 0x7) << 0)
-# define NI_OUTPUT_CSC_BYPASS 0
-# define NI_OUTPUT_CSC_TV_RGB 1
-# define NI_OUTPUT_CSC_YCBCR_601 2
-# define NI_OUTPUT_CSC_YCBCR_709 3
-# define NI_OUTPUT_CSC_PROG_COEFF 4
-# define NI_OUTPUT_CSC_PROG_SHARED_MATRIXB 5
-# define NI_OUTPUT_CSC_OVL_MODE(x) (((x) & 0x7) << 4)
-
-#define NI_DEGAMMA_CONTROL 0x1a58
-# define NI_GRPH_DEGAMMA_MODE(x) (((x) & 0x3) << 0)
-# define NI_DEGAMMA_BYPASS 0
-# define NI_DEGAMMA_SRGB_24 1
-# define NI_DEGAMMA_XVYCC_222 2
-# define NI_OVL_DEGAMMA_MODE(x) (((x) & 0x3) << 4)
-# define NI_ICON_DEGAMMA_MODE(x) (((x) & 0x3) << 8)
-# define NI_CURSOR_DEGAMMA_MODE(x) (((x) & 0x3) << 12)
-
-#define NI_GAMUT_REMAP_CONTROL 0x1a59
-# define NI_GRPH_GAMUT_REMAP_MODE(x) (((x) & 0x3) << 0)
-# define NI_GAMUT_REMAP_BYPASS 0
-# define NI_GAMUT_REMAP_PROG_COEFF 1
-# define NI_GAMUT_REMAP_PROG_SHARED_MATRIXA 2
-# define NI_GAMUT_REMAP_PROG_SHARED_MATRIXB 3
-# define NI_OVL_GAMUT_REMAP_MODE(x) (((x) & 0x3) << 4)
-
-#define NI_REGAMMA_CONTROL 0x1aa0
-# define NI_GRPH_REGAMMA_MODE(x) (((x) & 0x7) << 0)
-# define NI_REGAMMA_BYPASS 0
-# define NI_REGAMMA_SRGB_24 1
-# define NI_REGAMMA_XVYCC_222 2
-# define NI_REGAMMA_PROG_A 3
-# define NI_REGAMMA_PROG_B 4
-# define NI_OVL_REGAMMA_MODE(x) (((x) & 0x7) << 4)
-
-
-#define NI_PRESCALE_GRPH_CONTROL 0x1a2d
-# define NI_GRPH_PRESCALE_BYPASS (1 << 4)
-
-#define NI_PRESCALE_OVL_CONTROL 0x1a31
-# define NI_OVL_PRESCALE_BYPASS (1 << 4)
-
-#define NI_INPUT_GAMMA_CONTROL 0x1a10
-# define NI_GRPH_INPUT_GAMMA_MODE(x) (((x) & 0x3) << 0)
-# define NI_INPUT_GAMMA_USE_LUT 0
-# define NI_INPUT_GAMMA_BYPASS 1
-# define NI_INPUT_GAMMA_SRGB_24 2
-# define NI_INPUT_GAMMA_XVYCC_222 3
-# define NI_OVL_INPUT_GAMMA_MODE(x) (((x) & 0x3) << 4)
-
-#define BLACKOUT_MODE_MASK 0x00000007
-#define VGA_RENDER_CONTROL 0xC0
-#define R_000300_VGA_RENDER_CONTROL 0xC0
-#define C_000300_VGA_VSTATUS_CNTL 0xFFFCFFFF
-#define EVERGREEN_CRTC_STATUS 0x1BA3
-#define EVERGREEN_CRTC_V_BLANK (1 << 0)
-#define EVERGREEN_CRTC_STATUS_POSITION 0x1BA4
-/* CRTC blocks at 0x6df0, 0x79f0, 0x105f0, 0x111f0, 0x11df0, 0x129f0 */
-#define EVERGREEN_CRTC_V_BLANK_START_END 0x1b8d
-#define EVERGREEN_CRTC_CONTROL 0x1b9c
-# define EVERGREEN_CRTC_MASTER_EN (1 << 0)
-# define EVERGREEN_CRTC_DISP_READ_REQUEST_DISABLE (1 << 24)
-#define EVERGREEN_CRTC_BLANK_CONTROL 0x1b9d
-# define EVERGREEN_CRTC_BLANK_DATA_EN (1 << 8)
-# define EVERGREEN_CRTC_V_BLANK (1 << 0)
-#define EVERGREEN_CRTC_STATUS_HV_COUNT 0x1ba8
-#define EVERGREEN_CRTC_UPDATE_LOCK 0x1bb5
-#define EVERGREEN_MASTER_UPDATE_LOCK 0x1bbd
-#define EVERGREEN_MASTER_UPDATE_MODE 0x1bbe
-#define EVERGREEN_GRPH_UPDATE_LOCK (1 << 16)
-#define EVERGREEN_GRPH_PRIMARY_SURFACE_ADDRESS_HIGH 0x1a07
-#define EVERGREEN_GRPH_SECONDARY_SURFACE_ADDRESS_HIGH 0x1a08
-#define EVERGREEN_GRPH_PRIMARY_SURFACE_ADDRESS 0x1a04
-#define EVERGREEN_GRPH_SECONDARY_SURFACE_ADDRESS 0x1a05
-#define EVERGREEN_GRPH_UPDATE 0x1a11
-#define EVERGREEN_VGA_MEMORY_BASE_ADDRESS 0xc4
-#define EVERGREEN_VGA_MEMORY_BASE_ADDRESS_HIGH 0xc9
-#define EVERGREEN_GRPH_SURFACE_UPDATE_PENDING (1 << 2)
-
-#define mmVM_CONTEXT1_CNTL__xxRANGE_PROTECTION_FAULT_ENABLE_DEFAULT_MASK 0x10
-#define mmVM_CONTEXT1_CNTL__xxRANGE_PROTECTION_FAULT_ENABLE_DEFAULT__SHIFT 0x4
-#define mmVM_CONTEXT1_CNTL__xxDUMMY_PAGE_PROTECTION_FAULT_ENABLE_DEFAULT_MASK 0x80
-#define mmVM_CONTEXT1_CNTL__xxDUMMY_PAGE_PROTECTION_FAULT_ENABLE_DEFAULT__SHIFT 0x7
-#define mmVM_CONTEXT1_CNTL__xxPDE0_PROTECTION_FAULT_ENABLE_DEFAULT_MASK 0x400
-#define mmVM_CONTEXT1_CNTL__xxPDE0_PROTECTION_FAULT_ENABLE_DEFAULT__SHIFT 0xa
-#define mmVM_CONTEXT1_CNTL__xxVALID_PROTECTION_FAULT_ENABLE_DEFAULT_MASK 0x2000
-#define mmVM_CONTEXT1_CNTL__xxVALID_PROTECTION_FAULT_ENABLE_DEFAULT__SHIFT 0xd
-#define mmVM_CONTEXT1_CNTL__xxREAD_PROTECTION_FAULT_ENABLE_DEFAULT_MASK 0x10000
-#define mmVM_CONTEXT1_CNTL__xxREAD_PROTECTION_FAULT_ENABLE_DEFAULT__SHIFT 0x10
-#define mmVM_CONTEXT1_CNTL__xxWRITE_PROTECTION_FAULT_ENABLE_DEFAULT_MASK 0x80000
-#define mmVM_CONTEXT1_CNTL__xxWRITE_PROTECTION_FAULT_ENABLE_DEFAULT__SHIFT 0x13
-
-#define mmVM_CONTEXT1_PROTECTION_FAULT_STATUS__xxVMID_MASK 0x1e000000
-#define mmVM_CONTEXT1_PROTECTION_FAULT_STATUS__xxVMID__SHIFT 0x19
-#define mmVM_CONTEXT1_PROTECTION_FAULT_STATUS__xxPROTECTIONS_MASK 0xff
-#define mmVM_CONTEXT1_PROTECTION_FAULT_STATUS__xxPROTECTIONS__SHIFT 0x0
-#define mmVM_CONTEXT1_PROTECTION_FAULT_STATUS__xxMEMORY_CLIENT_ID_MASK 0xff000
-#define mmVM_CONTEXT1_PROTECTION_FAULT_STATUS__xxMEMORY_CLIENT_ID__SHIFT 0xc
-#define mmVM_CONTEXT1_PROTECTION_FAULT_STATUS__xxMEMORY_CLIENT_RW_MASK 0x1000000
-#define mmVM_CONTEXT1_PROTECTION_FAULT_STATUS__xxMEMORY_CLIENT_RW__SHIFT 0x18
-
-#define mmMC_SHARED_BLACKOUT_CNTL__xxBLACKOUT_MODE_MASK 0x7
-#define mmMC_SHARED_BLACKOUT_CNTL__xxBLACKOUT_MODE__SHIFT 0x0
-
-#define mmBIF_FB_EN__xxFB_READ_EN_MASK 0x1
-#define mmBIF_FB_EN__xxFB_READ_EN__SHIFT 0x0
-#define mmBIF_FB_EN__xxFB_WRITE_EN_MASK 0x2
-#define mmBIF_FB_EN__xxFB_WRITE_EN__SHIFT 0x1
-
-#define mmSRBM_SOFT_RESET__xxSOFT_RESET_VMC_MASK 0x20000
-#define mmSRBM_SOFT_RESET__xxSOFT_RESET_VMC__SHIFT 0x11
-#define mmSRBM_SOFT_RESET__xxSOFT_RESET_MC_MASK 0x800
-#define mmSRBM_SOFT_RESET__xxSOFT_RESET_MC__SHIFT 0xb
-
-#define VM_CONTEXT1_CNTL__RANGE_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK 0x8
-#define VM_CONTEXT1_CNTL__RANGE_PROTECTION_FAULT_ENABLE_INTERRUPT__SHIFT 0x3
-#define VM_CONTEXT1_CNTL__DUMMY_PAGE_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK 0x40
-#define VM_CONTEXT1_CNTL__DUMMY_PAGE_PROTECTION_FAULT_ENABLE_INTERRUPT__SHIFT 0x6
-#define VM_CONTEXT1_CNTL__PDE0_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK 0x200
-#define VM_CONTEXT1_CNTL__PDE0_PROTECTION_FAULT_ENABLE_INTERRUPT__SHIFT 0x9
-#define VM_CONTEXT1_CNTL__VALID_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK 0x1000
-#define VM_CONTEXT1_CNTL__VALID_PROTECTION_FAULT_ENABLE_INTERRUPT__SHIFT 0xc
-#define VM_CONTEXT1_CNTL__READ_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK 0x8000
-#define VM_CONTEXT1_CNTL__READ_PROTECTION_FAULT_ENABLE_INTERRUPT__SHIFT 0xf
-#define VM_CONTEXT1_CNTL__WRITE_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK 0x40000
-#define VM_CONTEXT1_CNTL__WRITE_PROTECTION_FAULT_ENABLE_INTERRUPT__SHIFT 0x12
+#define GRPH_FORMAT_ARGB8888 0
+#define GRPH_FORMAT_ARGB2101010 1
+#define GRPH_FORMAT_32BPP_DIG 2
+#define GRPH_FORMAT_8B_ARGB2101010 3
+#define GRPH_FORMAT_BGRA1010102 4
+#define GRPH_FORMAT_8B_BGRA1010102 5
+#define GRPH_FORMAT_RGB111110 6
+#define GRPH_FORMAT_BGR101111 7
+
+#define GRPH_ENDIAN_NONE 0
+#define GRPH_ENDIAN_8IN16 1
+#define GRPH_ENDIAN_8IN32 2
+#define GRPH_ENDIAN_8IN64 3
+#define GRPH_RED_SEL_R 0
+#define GRPH_RED_SEL_G 1
+#define GRPH_RED_SEL_B 2
+#define GRPH_RED_SEL_A 3
+
+#define GRPH_GREEN_SEL_G 0
+#define GRPH_GREEN_SEL_B 1
+#define GRPH_GREEN_SEL_A 2
+#define GRPH_GREEN_SEL_R 3
+
+#define GRPH_BLUE_SEL_B 0
+#define GRPH_BLUE_SEL_A 1
+#define GRPH_BLUE_SEL_R 2
+#define GRPH_BLUE_SEL_G 3
+
+#define GRPH_ALPHA_SEL_A 0
+#define GRPH_ALPHA_SEL_R 1
+#define GRPH_ALPHA_SEL_G 2
+#define GRPH_ALPHA_SEL_B 3
+
+/* CUR_CONTROL */
+ #define CURSOR_MONO 0
+ #define CURSOR_24_1 1
+ #define CURSOR_24_8_PRE_MULT 2
+ #define CURSOR_24_8_UNPRE_MULT 3
+ #define CURSOR_URGENT_ALWAYS 0
+ #define CURSOR_URGENT_1_8 1
+ #define CURSOR_URGENT_1_4 2
+ #define CURSOR_URGENT_3_8 3
+ #define CURSOR_URGENT_1_2 4
+
+/* INPUT_CSC_CONTROL */
+# define INPUT_CSC_BYPASS 0
+# define INPUT_CSC_PROG_COEFF 1
+# define INPUT_CSC_PROG_SHARED_MATRIXA 2
+
+/* OUTPUT_CSC_CONTROL */
+# define OUTPUT_CSC_BYPASS 0
+# define OUTPUT_CSC_TV_RGB 1
+# define OUTPUT_CSC_YCBCR_601 2
+# define OUTPUT_CSC_YCBCR_709 3
+# define OUTPUT_CSC_PROG_COEFF 4
+# define OUTPUT_CSC_PROG_SHARED_MATRIXB 5
+
+/* DEGAMMA_CONTROL */
+# define DEGAMMA_BYPASS 0
+# define DEGAMMA_SRGB_24 1
+# define DEGAMMA_XVYCC_222 2
+
+/* GAMUT_REMAP_CONTROL */
+# define GAMUT_REMAP_BYPASS 0
+# define GAMUT_REMAP_PROG_COEFF 1
+# define GAMUT_REMAP_PROG_SHARED_MATRIXA 2
+# define GAMUT_REMAP_PROG_SHARED_MATRIXB 3
+
+/* REGAMMA_CONTROL */
+# define REGAMMA_BYPASS 0
+# define REGAMMA_SRGB_24 1
+# define REGAMMA_XVYCC_222 2
+# define REGAMMA_PROG_A 3
+# define REGAMMA_PROG_B 4
+
+
+/* INPUT_GAMMA_CONTROL */
+# define INPUT_GAMMA_USE_LUT 0
+# define INPUT_GAMMA_BYPASS 1
+# define INPUT_GAMMA_SRGB_24 2
+# define INPUT_GAMMA_XVYCC_222 3
#define MC_SEQ_MISC0__MT__MASK 0xf0000000
#define MC_SEQ_MISC0__MT__GDDR1 0x10000000
@@ -2426,28 +740,14 @@
#define MC_SEQ_MISC0__MT__HBM 0x60000000
#define MC_SEQ_MISC0__MT__DDR3 0xB0000000
-#define GRBM_STATUS__GUI_ACTIVE_MASK 0x80000000
#define CP_INT_CNTL_RING__TIME_STAMP_INT_ENABLE_MASK 0x4000000
-#define CP_INT_CNTL_RING0__PRIV_REG_INT_ENABLE_MASK 0x800000
-#define CP_INT_CNTL_RING0__PRIV_INSTR_INT_ENABLE_MASK 0x400000
#define PACKET3_SEM_WAIT_ON_SIGNAL (0x1 << 12)
#define PACKET3_SEM_SEL_SIGNAL (0x6 << 29)
#define PACKET3_SEM_SEL_WAIT (0x7 << 29)
-#define CONFIG_CNTL 0x1509
-#define CC_DRM_ID_STRAPS 0X1559
#define AMDGPU_PCIE_INDEX 0xc
#define AMDGPU_PCIE_DATA 0xd
-#define DMA_SEM_INCOMPLETE_TIMER_CNTL 0x3411
-#define DMA_SEM_WAIT_FAIL_TIMER_CNTL 0x3412
-#define DMA_MODE 0x342f
-#define DMA_RB_RPTR_ADDR_HI 0x3407
-#define DMA_RB_RPTR_ADDR_LO 0x3408
-#define DMA_BUSY_MASK 0x20
-#define DMA1_BUSY_MASK 0X40
-#define SDMA_MAX_INSTANCE 2
-
#define PCIE_BUS_CLK 10000
#define TCLK (PCIE_BUS_CLK / 10)
#define PCIE_PORT_INDEX 0xe
@@ -2457,8 +757,6 @@
#define EVERGREEN_PIF_PHY1_INDEX 0x10
#define EVERGREEN_PIF_PHY1_DATA 0x14
-#define MC_VM_FB_OFFSET 0x81a
-
/* Discrete VCE clocks */
#define CG_VCEPLL_FUNC_CNTL 0xc0030600
#define VCEPLL_RESET_MASK 0x00000001
diff --git a/drivers/gpu/drm/amd/amdgpu/sienna_cichlid.c b/drivers/gpu/drm/amd/amdgpu/sienna_cichlid.c
new file mode 100644
index 000000000000..2594467bdd87
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/sienna_cichlid.c
@@ -0,0 +1,298 @@
+/*
+ * Copyright 2021 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#include "sienna_cichlid.h"
+#include "amdgpu_reset.h"
+#include "amdgpu_amdkfd.h"
+#include "amdgpu_dpm.h"
+#include "amdgpu_job.h"
+#include "amdgpu_ring.h"
+#include "amdgpu_ras.h"
+#include "amdgpu_psp.h"
+#include "amdgpu_xgmi.h"
+
+static bool sienna_cichlid_is_mode2_default(struct amdgpu_reset_control *reset_ctl)
+{
+#if 0
+ struct amdgpu_device *adev = (struct amdgpu_device *)reset_ctl->handle;
+
+ if (amdgpu_ip_version(adev, MP1_HWIP, 0) == IP_VERSION(11, 0, 7) &&
+ adev->pm.fw_version >= 0x3a5500 && !amdgpu_sriov_vf(adev))
+ return true;
+#endif
+ return amdgpu_reset_method == AMD_RESET_METHOD_MODE2;
+}
+
+static struct amdgpu_reset_handler *
+sienna_cichlid_get_reset_handler(struct amdgpu_reset_control *reset_ctl,
+ struct amdgpu_reset_context *reset_context)
+{
+ struct amdgpu_reset_handler *handler;
+ int i;
+
+ if (reset_context->method != AMD_RESET_METHOD_NONE) {
+ for_each_handler(i, handler, reset_ctl) {
+ if (handler->reset_method == reset_context->method)
+ return handler;
+ }
+ }
+
+ if (sienna_cichlid_is_mode2_default(reset_ctl)) {
+ for_each_handler(i, handler, reset_ctl) {
+ if (handler->reset_method == AMD_RESET_METHOD_MODE2)
+ return handler;
+ }
+ }
+
+ return NULL;
+}
+
+static int sienna_cichlid_mode2_suspend_ip(struct amdgpu_device *adev)
+{
+ int r, i;
+
+ amdgpu_device_set_pg_state(adev, AMD_PG_STATE_UNGATE);
+ amdgpu_device_set_cg_state(adev, AMD_CG_STATE_UNGATE);
+
+ for (i = adev->num_ip_blocks - 1; i >= 0; i--) {
+ if (!(adev->ip_blocks[i].version->type ==
+ AMD_IP_BLOCK_TYPE_GFX ||
+ adev->ip_blocks[i].version->type ==
+ AMD_IP_BLOCK_TYPE_SDMA))
+ continue;
+
+ r = amdgpu_ip_block_suspend(&adev->ip_blocks[i]);
+ if (r)
+ return r;
+ }
+
+ return 0;
+}
+
+static int
+sienna_cichlid_mode2_prepare_hwcontext(struct amdgpu_reset_control *reset_ctl,
+ struct amdgpu_reset_context *reset_context)
+{
+ int r = 0;
+ struct amdgpu_device *adev = (struct amdgpu_device *)reset_ctl->handle;
+
+ if (!amdgpu_sriov_vf(adev)) {
+ if (adev->gfxhub.funcs->mode2_save_regs)
+ adev->gfxhub.funcs->mode2_save_regs(adev);
+ if (adev->gfxhub.funcs->halt)
+ adev->gfxhub.funcs->halt(adev);
+ r = sienna_cichlid_mode2_suspend_ip(adev);
+ }
+
+ return r;
+}
+
+static void sienna_cichlid_async_reset(struct work_struct *work)
+{
+ struct amdgpu_reset_handler *handler;
+ struct amdgpu_reset_control *reset_ctl =
+ container_of(work, struct amdgpu_reset_control, reset_work);
+ struct amdgpu_device *adev = (struct amdgpu_device *)reset_ctl->handle;
+ int i;
+
+ for_each_handler(i, handler, reset_ctl) {
+ if (handler->reset_method == reset_ctl->active_reset) {
+ dev_dbg(adev->dev, "Resetting device\n");
+ handler->do_reset(adev);
+ break;
+ }
+ }
+}
+
+static int sienna_cichlid_mode2_reset(struct amdgpu_device *adev)
+{
+ /* disable BM */
+ pci_clear_master(adev->pdev);
+ return amdgpu_dpm_mode2_reset(adev);
+}
+
+static int
+sienna_cichlid_mode2_perform_reset(struct amdgpu_reset_control *reset_ctl,
+ struct amdgpu_reset_context *reset_context)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)reset_ctl->handle;
+ int r;
+
+ r = sienna_cichlid_mode2_reset(adev);
+ if (r) {
+ dev_err(adev->dev,
+ "ASIC reset failed with error, %d ", r);
+ }
+ return r;
+}
+
+static int sienna_cichlid_mode2_restore_ip(struct amdgpu_device *adev)
+{
+ int i, r;
+ struct psp_context *psp = &adev->psp;
+
+ r = psp_rlc_autoload_start(psp);
+ if (r) {
+ dev_err(adev->dev, "Failed to start rlc autoload\n");
+ return r;
+ }
+
+ /* Reinit GFXHUB */
+ if (adev->gfxhub.funcs->mode2_restore_regs)
+ adev->gfxhub.funcs->mode2_restore_regs(adev);
+ adev->gfxhub.funcs->init(adev);
+ r = adev->gfxhub.funcs->gart_enable(adev);
+ if (r) {
+ dev_err(adev->dev, "GFXHUB gart reenable failed after reset\n");
+ return r;
+ }
+
+ for (i = 0; i < adev->num_ip_blocks; i++) {
+ if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_IH) {
+ r = amdgpu_ip_block_resume(&adev->ip_blocks[i]);
+ if (r)
+ return r;
+ }
+ }
+
+ for (i = 0; i < adev->num_ip_blocks; i++) {
+ if (!(adev->ip_blocks[i].version->type ==
+ AMD_IP_BLOCK_TYPE_GFX ||
+ adev->ip_blocks[i].version->type ==
+ AMD_IP_BLOCK_TYPE_SDMA))
+ continue;
+ r = amdgpu_ip_block_resume(&adev->ip_blocks[i]);
+ if (r)
+ return r;
+ }
+
+ for (i = 0; i < adev->num_ip_blocks; i++) {
+ if (!(adev->ip_blocks[i].version->type ==
+ AMD_IP_BLOCK_TYPE_GFX ||
+ adev->ip_blocks[i].version->type ==
+ AMD_IP_BLOCK_TYPE_SDMA))
+ continue;
+
+ if (adev->ip_blocks[i].version->funcs->late_init) {
+ r = adev->ip_blocks[i].version->funcs->late_init(
+ &adev->ip_blocks[i]);
+ if (r) {
+ dev_err(adev->dev,
+ "late_init of IP block <%s> failed %d after reset\n",
+ adev->ip_blocks[i].version->funcs->name,
+ r);
+ return r;
+ }
+ }
+ adev->ip_blocks[i].status.late_initialized = true;
+ }
+
+ amdgpu_device_set_cg_state(adev, AMD_CG_STATE_GATE);
+ amdgpu_device_set_pg_state(adev, AMD_PG_STATE_GATE);
+
+ return r;
+}
+
+static int
+sienna_cichlid_mode2_restore_hwcontext(struct amdgpu_reset_control *reset_ctl,
+ struct amdgpu_reset_context *reset_context)
+{
+ int r;
+ struct amdgpu_device *tmp_adev = (struct amdgpu_device *)reset_ctl->handle;
+
+ amdgpu_set_init_level(tmp_adev, AMDGPU_INIT_LEVEL_RESET_RECOVERY);
+ dev_info(tmp_adev->dev,
+ "GPU reset succeeded, trying to resume\n");
+ r = sienna_cichlid_mode2_restore_ip(tmp_adev);
+ if (r)
+ goto end;
+
+ /*
+ * Add this ASIC as tracked as reset was already
+ * complete successfully.
+ */
+ amdgpu_register_gpu_instance(tmp_adev);
+
+ /* Resume RAS */
+ amdgpu_ras_resume(tmp_adev);
+
+ amdgpu_irq_gpu_reset_resume_helper(tmp_adev);
+
+ amdgpu_set_init_level(tmp_adev, AMDGPU_INIT_LEVEL_DEFAULT);
+ r = amdgpu_ib_ring_tests(tmp_adev);
+ if (r) {
+ dev_err(tmp_adev->dev,
+ "ib ring test failed (%d).\n", r);
+ r = -EAGAIN;
+ goto end;
+ }
+
+end:
+ if (r)
+ return -EAGAIN;
+ else
+ return r;
+}
+
+static struct amdgpu_reset_handler sienna_cichlid_mode2_handler = {
+ .reset_method = AMD_RESET_METHOD_MODE2,
+ .prepare_env = NULL,
+ .prepare_hwcontext = sienna_cichlid_mode2_prepare_hwcontext,
+ .perform_reset = sienna_cichlid_mode2_perform_reset,
+ .restore_hwcontext = sienna_cichlid_mode2_restore_hwcontext,
+ .restore_env = NULL,
+ .do_reset = sienna_cichlid_mode2_reset,
+};
+
+static struct amdgpu_reset_handler
+ *sienna_cichlid_rst_handlers[AMDGPU_RESET_MAX_HANDLERS] = {
+ &sienna_cichlid_mode2_handler,
+ };
+
+int sienna_cichlid_reset_init(struct amdgpu_device *adev)
+{
+ struct amdgpu_reset_control *reset_ctl;
+
+ reset_ctl = kzalloc(sizeof(*reset_ctl), GFP_KERNEL);
+ if (!reset_ctl)
+ return -ENOMEM;
+
+ reset_ctl->handle = adev;
+ reset_ctl->async_reset = sienna_cichlid_async_reset;
+ reset_ctl->active_reset = AMD_RESET_METHOD_NONE;
+ reset_ctl->get_reset_handler = sienna_cichlid_get_reset_handler;
+
+ INIT_WORK(&reset_ctl->reset_work, reset_ctl->async_reset);
+ /* Only mode2 is handled through reset control now */
+ reset_ctl->reset_handlers = &sienna_cichlid_rst_handlers;
+ adev->reset_cntl = reset_ctl;
+
+ return 0;
+}
+
+int sienna_cichlid_reset_fini(struct amdgpu_device *adev)
+{
+ kfree(adev->reset_cntl);
+ adev->reset_cntl = NULL;
+ return 0;
+}
diff --git a/drivers/gpu/drm/amd/amdgpu/sienna_cichlid.h b/drivers/gpu/drm/amd/amdgpu/sienna_cichlid.h
new file mode 100644
index 000000000000..5213b162dacd
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/sienna_cichlid.h
@@ -0,0 +1,32 @@
+/*
+ * Copyright 2021 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#ifndef __SIENNA_CICHLID_H__
+#define __SIENNA_CICHLID_H__
+
+#include "amdgpu.h"
+
+int sienna_cichlid_reset_init(struct amdgpu_device *adev);
+int sienna_cichlid_reset_fini(struct amdgpu_device *adev);
+
+#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/sienna_cichlid_reg_init.c b/drivers/gpu/drm/amd/amdgpu/sienna_cichlid_reg_init.c
deleted file mode 100644
index 5ee69f70c49b..000000000000
--- a/drivers/gpu/drm/amd/amdgpu/sienna_cichlid_reg_init.c
+++ /dev/null
@@ -1,54 +0,0 @@
-/*
- * Copyright 2019 Advanced Micro Devices, Inc.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in
- * all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
- * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
- * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
- * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
- * OTHER DEALINGS IN THE SOFTWARE.
- *
- */
-#include "amdgpu.h"
-#include "nv.h"
-
-#include "soc15_common.h"
-#include "soc15_hw_ip.h"
-#include "sienna_cichlid_ip_offset.h"
-
-int sienna_cichlid_reg_base_init(struct amdgpu_device *adev)
-{
- /* HW has more IP blocks, only initialized the blocke needed by driver */
- uint32_t i;
- for (i = 0 ; i < MAX_INSTANCE ; ++i) {
- adev->reg_offset[GC_HWIP][i] = (uint32_t *)(&(GC_BASE.instance[i]));
- adev->reg_offset[HDP_HWIP][i] = (uint32_t *)(&(HDP_BASE.instance[i]));
- adev->reg_offset[MMHUB_HWIP][i] = (uint32_t *)(&(MMHUB_BASE.instance[i]));
- adev->reg_offset[ATHUB_HWIP][i] = (uint32_t *)(&(ATHUB_BASE.instance[i]));
- adev->reg_offset[NBIO_HWIP][i] = (uint32_t *)(&(NBIO_BASE.instance[i]));
- adev->reg_offset[MP0_HWIP][i] = (uint32_t *)(&(MP0_BASE.instance[i]));
- adev->reg_offset[MP1_HWIP][i] = (uint32_t *)(&(MP1_BASE.instance[i]));
- adev->reg_offset[VCN_HWIP][i] = (uint32_t *)(&(VCN_BASE.instance[i]));
- adev->reg_offset[DF_HWIP][i] = (uint32_t *)(&(DF_BASE.instance[i]));
- adev->reg_offset[DCE_HWIP][i] = (uint32_t *)(&(DCN_BASE.instance[i]));
- adev->reg_offset[OSSSYS_HWIP][i] = (uint32_t *)(&(OSSSYS_BASE.instance[i]));
- adev->reg_offset[SDMA0_HWIP][i] = (uint32_t *)(&(GC_BASE.instance[i]));
- adev->reg_offset[SDMA1_HWIP][i] = (uint32_t *)(&(GC_BASE.instance[i]));
- adev->reg_offset[SDMA2_HWIP][i] = (uint32_t *)(&(GC_BASE.instance[i]));
- adev->reg_offset[SDMA3_HWIP][i] = (uint32_t *)(&(GC_BASE.instance[i]));
- adev->reg_offset[SMUIO_HWIP][i] = (uint32_t *)(&(SMUIO_BASE.instance[i]));
- adev->reg_offset[THM_HWIP][i] = (uint32_t *)(&(THM_BASE.instance[i]));
- }
- return 0;
-}
diff --git a/drivers/gpu/drm/amd/amdgpu/smu_v11_0_i2c.c b/drivers/gpu/drm/amd/amdgpu/smu_v11_0_i2c.c
index 5c7d769aee3f..68aef47254a9 100644
--- a/drivers/gpu/drm/amd/amdgpu/smu_v11_0_i2c.c
+++ b/drivers/gpu/drm/amd/amdgpu/smu_v11_0_i2c.c
@@ -26,6 +26,7 @@
#include "smu_v11_0_i2c.h"
#include "amdgpu.h"
+#include "amdgpu_dpm.h"
#include "soc15_common.h"
#include <drm/drm_fixed.h>
#include <drm/drm_drv.h>
@@ -41,32 +42,67 @@
#define I2C_SW_TIMEOUT 8
#define I2C_ABORT 0x10
-/* I2C transaction flags */
-#define I2C_NO_STOP 1
-#define I2C_RESTART 2
-
-#define to_amdgpu_device(x) (container_of(x, struct amdgpu_device, pm.smu_i2c))
+#define I2C_X_RESTART BIT(31)
static void smu_v11_0_i2c_set_clock_gating(struct i2c_adapter *control, bool en)
{
- struct amdgpu_device *adev = to_amdgpu_device(control);
+ struct amdgpu_smu_i2c_bus *smu_i2c = i2c_get_adapdata(control);
+ struct amdgpu_device *adev = smu_i2c->adev;
uint32_t reg = RREG32_SOC15(SMUIO, 0, mmSMUIO_PWRMGT);
reg = REG_SET_FIELD(reg, SMUIO_PWRMGT, i2c_clk_gate_en, en ? 1 : 0);
WREG32_SOC15(SMUIO, 0, mmSMUIO_PWRMGT, reg);
}
+/* The T_I2C_POLL_US is defined as follows:
+ *
+ * "Define a timer interval (t_i2c_poll) equal to 10 times the
+ * signalling period for the highest I2C transfer speed used in the
+ * system and supported by DW_apb_i2c. For instance, if the highest
+ * I2C data transfer mode is 400 kb/s, then t_i2c_poll is 25 us." --
+ * DesignWare DW_apb_i2c Databook, Version 1.21a, section 3.8.3.1,
+ * page 56, with grammar and syntax corrections.
+ *
+ * Vcc for our device is at 1.8V which puts it at 400 kHz,
+ * see Atmel AT24CM02 datasheet, section 8.3 DC Characteristics table, page 14.
+ *
+ * The procedure to disable the IP block is described in section
+ * 3.8.3 Disabling DW_apb_i2c on page 56.
+ */
+#define I2C_SPEED_MODE_FAST 2
+#define T_I2C_POLL_US 25
+#define I2C_MAX_T_POLL_COUNT 1000
-static void smu_v11_0_i2c_enable(struct i2c_adapter *control, bool enable)
+static int smu_v11_0_i2c_enable(struct i2c_adapter *control, bool enable)
{
- struct amdgpu_device *adev = to_amdgpu_device(control);
+ struct amdgpu_smu_i2c_bus *smu_i2c = i2c_get_adapdata(control);
+ struct amdgpu_device *adev = smu_i2c->adev;
WREG32_SOC15(SMUIO, 0, mmCKSVII2C_IC_ENABLE, enable ? 1 : 0);
+
+ if (!enable) {
+ int ii;
+
+ for (ii = I2C_MAX_T_POLL_COUNT; ii > 0; ii--) {
+ u32 en_stat = RREG32_SOC15(SMUIO,
+ 0,
+ mmCKSVII2C_IC_ENABLE_STATUS);
+ if (REG_GET_FIELD(en_stat, CKSVII2C_IC_ENABLE_STATUS, IC_EN))
+ udelay(T_I2C_POLL_US);
+ else
+ return I2C_OK;
+ }
+
+ return I2C_ABORT;
+ }
+
+ return I2C_OK;
}
static void smu_v11_0_i2c_clear_status(struct i2c_adapter *control)
{
- struct amdgpu_device *adev = to_amdgpu_device(control);
+ struct amdgpu_smu_i2c_bus *smu_i2c = i2c_get_adapdata(control);
+ struct amdgpu_device *adev = smu_i2c->adev;
/* do */
{
RREG32_SOC15(SMUIO, 0, mmCKSVII2C_IC_CLR_INTR);
@@ -76,15 +112,21 @@ static void smu_v11_0_i2c_clear_status(struct i2c_adapter *control)
static void smu_v11_0_i2c_configure(struct i2c_adapter *control)
{
- struct amdgpu_device *adev = to_amdgpu_device(control);
+ struct amdgpu_smu_i2c_bus *smu_i2c = i2c_get_adapdata(control);
+ struct amdgpu_device *adev = smu_i2c->adev;
uint32_t reg = 0;
reg = REG_SET_FIELD(reg, CKSVII2C_IC_CON, IC_SLAVE_DISABLE, 1);
reg = REG_SET_FIELD(reg, CKSVII2C_IC_CON, IC_RESTART_EN, 1);
reg = REG_SET_FIELD(reg, CKSVII2C_IC_CON, IC_10BITADDR_MASTER, 0);
reg = REG_SET_FIELD(reg, CKSVII2C_IC_CON, IC_10BITADDR_SLAVE, 0);
- /* Standard mode */
- reg = REG_SET_FIELD(reg, CKSVII2C_IC_CON, IC_MAX_SPEED_MODE, 2);
+ /* The values of IC_MAX_SPEED_MODE are,
+ * 1: standard mode, 0 - 100 Kb/s,
+ * 2: fast mode, <= 400 Kb/s, or fast mode plus, <= 1000 Kb/s,
+ * 3: high speed mode, <= 3.4 Mb/s.
+ */
+ reg = REG_SET_FIELD(reg, CKSVII2C_IC_CON, IC_MAX_SPEED_MODE,
+ I2C_SPEED_MODE_FAST);
reg = REG_SET_FIELD(reg, CKSVII2C_IC_CON, IC_MASTER_MODE, 1);
WREG32_SOC15(SMUIO, 0, mmCKSVII2C_IC_CON, reg);
@@ -92,7 +134,8 @@ static void smu_v11_0_i2c_configure(struct i2c_adapter *control)
static void smu_v11_0_i2c_set_clock(struct i2c_adapter *control)
{
- struct amdgpu_device *adev = to_amdgpu_device(control);
+ struct amdgpu_smu_i2c_bus *smu_i2c = i2c_get_adapdata(control);
+ struct amdgpu_device *adev = smu_i2c->adev;
/*
* Standard mode speed, These values are taken from SMUIO MAS,
@@ -113,18 +156,22 @@ static void smu_v11_0_i2c_set_clock(struct i2c_adapter *control)
WREG32_SOC15(SMUIO, 0, mmCKSVII2C_IC_SDA_HOLD, 20);
}
-static void smu_v11_0_i2c_set_address(struct i2c_adapter *control, uint8_t address)
+static void smu_v11_0_i2c_set_address(struct i2c_adapter *control, u16 address)
{
- struct amdgpu_device *adev = to_amdgpu_device(control);
+ struct amdgpu_smu_i2c_bus *smu_i2c = i2c_get_adapdata(control);
+ struct amdgpu_device *adev = smu_i2c->adev;
- /* Convert fromr 8-bit to 7-bit address */
- address >>= 1;
- WREG32_SOC15(SMUIO, 0, mmCKSVII2C_IC_TAR, (address & 0xFF));
+ /* The IC_TAR::IC_TAR field is 10-bits wide.
+ * It takes a 7-bit or 10-bit addresses as an address,
+ * i.e. no read/write bit--no wire format, just the address.
+ */
+ WREG32_SOC15(SMUIO, 0, mmCKSVII2C_IC_TAR, address & 0x3FF);
}
static uint32_t smu_v11_0_i2c_poll_tx_status(struct i2c_adapter *control)
{
- struct amdgpu_device *adev = to_amdgpu_device(control);
+ struct amdgpu_smu_i2c_bus *smu_i2c = i2c_get_adapdata(control);
+ struct amdgpu_device *adev = smu_i2c->adev;
uint32_t ret = I2C_OK;
uint32_t reg, reg_c_tx_abrt_source;
@@ -175,7 +222,8 @@ static uint32_t smu_v11_0_i2c_poll_tx_status(struct i2c_adapter *control)
static uint32_t smu_v11_0_i2c_poll_rx_status(struct i2c_adapter *control)
{
- struct amdgpu_device *adev = to_amdgpu_device(control);
+ struct amdgpu_smu_i2c_bus *smu_i2c = i2c_get_adapdata(control);
+ struct amdgpu_device *adev = smu_i2c->adev;
uint32_t ret = I2C_OK;
uint32_t reg_ic_status, reg_c_tx_abrt_source;
@@ -206,9 +254,6 @@ static uint32_t smu_v11_0_i2c_poll_rx_status(struct i2c_adapter *control)
return ret;
}
-
-
-
/**
* smu_v11_0_i2c_transmit - Send a block of data over the I2C bus to a slave device.
*
@@ -221,17 +266,18 @@ static uint32_t smu_v11_0_i2c_poll_rx_status(struct i2c_adapter *control)
* Returns 0 on success or error.
*/
static uint32_t smu_v11_0_i2c_transmit(struct i2c_adapter *control,
- uint8_t address, uint8_t *data,
- uint32_t numbytes, uint32_t i2c_flag)
+ u16 address, u8 *data,
+ u32 numbytes, u32 i2c_flag)
{
- struct amdgpu_device *adev = to_amdgpu_device(control);
- uint32_t bytes_sent, reg, ret = 0;
+ struct amdgpu_smu_i2c_bus *smu_i2c = i2c_get_adapdata(control);
+ struct amdgpu_device *adev = smu_i2c->adev;
+ u32 bytes_sent, reg, ret = I2C_OK;
unsigned long timeout_counter;
bytes_sent = 0;
DRM_DEBUG_DRIVER("I2C_Transmit(), address = %x, bytes = %d , data: ",
- (uint16_t)address, numbytes);
+ address, numbytes);
if (drm_debug_enabled(DRM_UT_DRIVER)) {
print_hex_dump(KERN_INFO, "data: ", DUMP_PREFIX_NONE,
@@ -246,53 +292,49 @@ static uint32_t smu_v11_0_i2c_transmit(struct i2c_adapter *control,
/* Clear status bits */
smu_v11_0_i2c_clear_status(control);
-
timeout_counter = jiffies + msecs_to_jiffies(20);
while (numbytes > 0) {
reg = RREG32_SOC15(SMUIO, 0, mmCKSVII2C_IC_STATUS);
- if (REG_GET_FIELD(reg, CKSVII2C_IC_STATUS, TFNF)) {
- do {
- reg = 0;
- /*
- * Prepare transaction, no need to set RESTART. I2C engine will send
- * START as soon as it sees data in TXFIFO
- */
- if (bytes_sent == 0)
- reg = REG_SET_FIELD(reg, CKSVII2C_IC_DATA_CMD, RESTART,
- (i2c_flag & I2C_RESTART) ? 1 : 0);
- reg = REG_SET_FIELD(reg, CKSVII2C_IC_DATA_CMD, DAT, data[bytes_sent]);
-
- /* determine if we need to send STOP bit or not */
- if (numbytes == 1)
- /* Final transaction, so send stop unless I2C_NO_STOP */
- reg = REG_SET_FIELD(reg, CKSVII2C_IC_DATA_CMD, STOP,
- (i2c_flag & I2C_NO_STOP) ? 0 : 1);
- /* Write */
- reg = REG_SET_FIELD(reg, CKSVII2C_IC_DATA_CMD, CMD, 0);
- WREG32_SOC15(SMUIO, 0, mmCKSVII2C_IC_DATA_CMD, reg);
-
- /* Record that the bytes were transmitted */
- bytes_sent++;
- numbytes--;
-
- reg = RREG32_SOC15(SMUIO, 0, mmCKSVII2C_IC_STATUS);
-
- } while (numbytes && REG_GET_FIELD(reg, CKSVII2C_IC_STATUS, TFNF));
- }
-
- /*
- * We waited too long for the transmission FIFO to become not-full.
- * Exit the loop with error.
- */
- if (time_after(jiffies, timeout_counter)) {
- ret |= I2C_SW_TIMEOUT;
- goto Err;
+ if (!REG_GET_FIELD(reg, CKSVII2C_IC_STATUS, TFNF)) {
+ /*
+ * We waited for too long for the transmission
+ * FIFO to become not-full. Exit the loop
+ * with error.
+ */
+ if (time_after(jiffies, timeout_counter)) {
+ ret |= I2C_SW_TIMEOUT;
+ goto Err;
+ }
+ } else {
+ reg = REG_SET_FIELD(reg, CKSVII2C_IC_DATA_CMD, DAT,
+ data[bytes_sent]);
+
+ /* Final message, final byte, must generate a
+ * STOP to release the bus, i.e. don't hold
+ * SCL low.
+ */
+ if (numbytes == 1 && i2c_flag & I2C_M_STOP)
+ reg = REG_SET_FIELD(reg,
+ CKSVII2C_IC_DATA_CMD,
+ STOP, 1);
+
+ if (bytes_sent == 0 && i2c_flag & I2C_X_RESTART)
+ reg = REG_SET_FIELD(reg,
+ CKSVII2C_IC_DATA_CMD,
+ RESTART, 1);
+
+ /* Write */
+ reg = REG_SET_FIELD(reg, CKSVII2C_IC_DATA_CMD, CMD, 0);
+ WREG32_SOC15(SMUIO, 0, mmCKSVII2C_IC_DATA_CMD, reg);
+
+ /* Record that the bytes were transmitted */
+ bytes_sent++;
+ numbytes--;
}
}
ret = smu_v11_0_i2c_poll_tx_status(control);
-
Err:
/* Any error, no point in proceeding */
if (ret != I2C_OK) {
@@ -323,10 +365,11 @@ Err:
* Returns 0 on success or error.
*/
static uint32_t smu_v11_0_i2c_receive(struct i2c_adapter *control,
- uint8_t address, uint8_t *data,
- uint32_t numbytes, uint8_t i2c_flag)
+ u16 address, u8 *data,
+ u32 numbytes, u32 i2c_flag)
{
- struct amdgpu_device *adev = to_amdgpu_device(control);
+ struct amdgpu_smu_i2c_bus *smu_i2c = i2c_get_adapdata(control);
+ struct amdgpu_device *adev = smu_i2c->adev;
uint32_t bytes_received, ret = I2C_OK;
bytes_received = 0;
@@ -342,23 +385,21 @@ static uint32_t smu_v11_0_i2c_receive(struct i2c_adapter *control,
smu_v11_0_i2c_clear_status(control);
-
/* Prepare transaction */
-
- /* Each time we disable I2C, so this is not a restart */
- if (bytes_received == 0)
- reg = REG_SET_FIELD(reg, CKSVII2C_IC_DATA_CMD, RESTART,
- (i2c_flag & I2C_RESTART) ? 1 : 0);
-
reg = REG_SET_FIELD(reg, CKSVII2C_IC_DATA_CMD, DAT, 0);
/* Read */
reg = REG_SET_FIELD(reg, CKSVII2C_IC_DATA_CMD, CMD, 1);
- /* Transmitting last byte */
- if (numbytes == 1)
- /* Final transaction, so send stop if requested */
- reg = REG_SET_FIELD(reg, CKSVII2C_IC_DATA_CMD, STOP,
- (i2c_flag & I2C_NO_STOP) ? 0 : 1);
+ /* Final message, final byte, must generate a STOP
+ * to release the bus, i.e. don't hold SCL low.
+ */
+ if (numbytes == 1 && i2c_flag & I2C_M_STOP)
+ reg = REG_SET_FIELD(reg, CKSVII2C_IC_DATA_CMD,
+ STOP, 1);
+
+ if (bytes_received == 0 && i2c_flag & I2C_X_RESTART)
+ reg = REG_SET_FIELD(reg, CKSVII2C_IC_DATA_CMD,
+ RESTART, 1);
WREG32_SOC15(SMUIO, 0, mmCKSVII2C_IC_DATA_CMD, reg);
@@ -399,7 +440,8 @@ static uint32_t smu_v11_0_i2c_receive(struct i2c_adapter *control,
static void smu_v11_0_i2c_abort(struct i2c_adapter *control)
{
- struct amdgpu_device *adev = to_amdgpu_device(control);
+ struct amdgpu_smu_i2c_bus *smu_i2c = i2c_get_adapdata(control);
+ struct amdgpu_device *adev = smu_i2c->adev;
uint32_t reg = 0;
/* Enable I2C engine; */
@@ -413,10 +455,10 @@ static void smu_v11_0_i2c_abort(struct i2c_adapter *control)
DRM_DEBUG_DRIVER("I2C_Abort() Done.");
}
-
static bool smu_v11_0_i2c_activity_done(struct i2c_adapter *control)
{
- struct amdgpu_device *adev = to_amdgpu_device(control);
+ struct amdgpu_smu_i2c_bus *smu_i2c = i2c_get_adapdata(control);
+ struct amdgpu_device *adev = smu_i2c->adev;
const uint32_t IDLE_TIMEOUT = 1024;
uint32_t timeout_count = 0;
@@ -425,7 +467,6 @@ static bool smu_v11_0_i2c_activity_done(struct i2c_adapter *control)
reg_ic_enable_status = RREG32_SOC15(SMUIO, 0, mmCKSVII2C_IC_ENABLE_STATUS);
reg_ic_enable = RREG32_SOC15(SMUIO, 0, mmCKSVII2C_IC_ENABLE);
-
if ((REG_GET_FIELD(reg_ic_enable, CKSVII2C_IC_ENABLE, ENABLE) == 0) &&
(REG_GET_FIELD(reg_ic_enable_status, CKSVII2C_IC_ENABLE_STATUS, IC_EN) == 1)) {
/*
@@ -455,6 +496,8 @@ static bool smu_v11_0_i2c_activity_done(struct i2c_adapter *control)
static void smu_v11_0_i2c_init(struct i2c_adapter *control)
{
+ int res;
+
/* Disable clock gating */
smu_v11_0_i2c_set_clock_gating(control, false);
@@ -462,7 +505,9 @@ static void smu_v11_0_i2c_init(struct i2c_adapter *control)
DRM_WARN("I2C busy !");
/* Disable I2C */
- smu_v11_0_i2c_enable(control, false);
+ res = smu_v11_0_i2c_enable(control, false);
+ if (res != I2C_OK)
+ smu_v11_0_i2c_abort(control);
/* Configure I2C to operate as master and in standard mode */
smu_v11_0_i2c_configure(control);
@@ -474,22 +519,24 @@ static void smu_v11_0_i2c_init(struct i2c_adapter *control)
static void smu_v11_0_i2c_fini(struct i2c_adapter *control)
{
- struct amdgpu_device *adev = to_amdgpu_device(control);
- uint32_t reg_ic_enable_status, reg_ic_enable;
+ struct amdgpu_smu_i2c_bus *smu_i2c = i2c_get_adapdata(control);
+ struct amdgpu_device *adev = smu_i2c->adev;
+ u32 status, enable, en_stat;
+ int res;
- smu_v11_0_i2c_enable(control, false);
+ res = smu_v11_0_i2c_enable(control, false);
+ if (res != I2C_OK) {
+ status = RREG32_SOC15(SMUIO, 0, mmCKSVII2C_IC_STATUS);
+ enable = RREG32_SOC15(SMUIO, 0, mmCKSVII2C_IC_ENABLE);
+ en_stat = RREG32_SOC15(SMUIO, 0, mmCKSVII2C_IC_ENABLE_STATUS);
- /* Double check if disabled, else force abort */
- reg_ic_enable_status = RREG32_SOC15(SMUIO, 0, mmCKSVII2C_IC_ENABLE_STATUS);
- reg_ic_enable = RREG32_SOC15(SMUIO, 0, mmCKSVII2C_IC_ENABLE);
-
- if ((REG_GET_FIELD(reg_ic_enable, CKSVII2C_IC_ENABLE, ENABLE) == 0) &&
- (REG_GET_FIELD(reg_ic_enable_status,
- CKSVII2C_IC_ENABLE_STATUS, IC_EN) == 1)) {
- /*
- * Nobody is using I2C engine, but engine remains active because
- * someone missed to send STOP
+ /* Nobody is using the I2C engine, yet it remains
+ * active, possibly because someone missed to send
+ * STOP.
*/
+ DRM_DEBUG_DRIVER("Aborting from fini: status:0x%08x "
+ "enable:0x%08x enable_stat:0x%08x",
+ status, enable, en_stat);
smu_v11_0_i2c_abort(control);
}
@@ -508,7 +555,8 @@ static void smu_v11_0_i2c_fini(struct i2c_adapter *control)
static bool smu_v11_0_i2c_bus_lock(struct i2c_adapter *control)
{
- struct amdgpu_device *adev = to_amdgpu_device(control);
+ struct amdgpu_smu_i2c_bus *smu_i2c = i2c_get_adapdata(control);
+ struct amdgpu_device *adev = smu_i2c->adev;
/* Send PPSMC_MSG_RequestI2CBus */
if (!amdgpu_dpm_smu_i2c_bus_access(adev, true))
@@ -519,7 +567,8 @@ static bool smu_v11_0_i2c_bus_lock(struct i2c_adapter *control)
static bool smu_v11_0_i2c_bus_unlock(struct i2c_adapter *control)
{
- struct amdgpu_device *adev = to_amdgpu_device(control);
+ struct amdgpu_smu_i2c_bus *smu_i2c = i2c_get_adapdata(control);
+ struct amdgpu_device *adev = smu_i2c->adev;
/* Send PPSMC_MSG_ReleaseI2CBus */
if (!amdgpu_dpm_smu_i2c_bus_access(adev, false))
@@ -531,22 +580,12 @@ static bool smu_v11_0_i2c_bus_unlock(struct i2c_adapter *control)
/***************************** I2C GLUE ****************************/
static uint32_t smu_v11_0_i2c_read_data(struct i2c_adapter *control,
- uint8_t address,
- uint8_t *data,
- uint32_t numbytes)
+ struct i2c_msg *msg, uint32_t i2c_flag)
{
- uint32_t ret = 0;
-
- /* First 2 bytes are dummy write to set EEPROM address */
- ret = smu_v11_0_i2c_transmit(control, address, data, 2, I2C_NO_STOP);
- if (ret != I2C_OK)
- goto Fail;
+ uint32_t ret;
- /* Now read data starting with that address */
- ret = smu_v11_0_i2c_receive(control, address, data + 2, numbytes - 2,
- I2C_RESTART);
+ ret = smu_v11_0_i2c_receive(control, msg->addr, msg->buf, msg->len, i2c_flag);
-Fail:
if (ret != I2C_OK)
DRM_ERROR("ReadData() - I2C error occurred :%x", ret);
@@ -554,27 +593,14 @@ Fail:
}
static uint32_t smu_v11_0_i2c_write_data(struct i2c_adapter *control,
- uint8_t address,
- uint8_t *data,
- uint32_t numbytes)
+ struct i2c_msg *msg, uint32_t i2c_flag)
{
uint32_t ret;
- ret = smu_v11_0_i2c_transmit(control, address, data, numbytes, 0);
+ ret = smu_v11_0_i2c_transmit(control, msg->addr, msg->buf, msg->len, i2c_flag);
if (ret != I2C_OK)
DRM_ERROR("WriteI2CData() - I2C error occurred :%x", ret);
- else
- /*
- * According to EEPROM spec there is a MAX of 10 ms required for
- * EEPROM to flush internal RX buffer after STOP was issued at the
- * end of write transaction. During this time the EEPROM will not be
- * responsive to any more commands - so wait a bit more.
- *
- * TODO Improve to wait for first ACK for slave address after
- * internal write cycle done.
- */
- msleep(10);
return ret;
@@ -582,14 +608,14 @@ static uint32_t smu_v11_0_i2c_write_data(struct i2c_adapter *control,
static void lock_bus(struct i2c_adapter *i2c, unsigned int flags)
{
- struct amdgpu_device *adev = to_amdgpu_device(i2c);
+ struct amdgpu_smu_i2c_bus *smu_i2c = i2c_get_adapdata(i2c);
+ struct amdgpu_device *adev = smu_i2c->adev;
- if (!smu_v11_0_i2c_bus_lock(i2c)) {
+ mutex_lock(&smu_i2c->mutex);
+ if (!smu_v11_0_i2c_bus_lock(i2c))
DRM_ERROR("Failed to lock the bus from SMU");
- return;
- }
-
- adev->pm.bus_locked = true;
+ else
+ adev->pm.bus_locked = true;
}
static int trylock_bus(struct i2c_adapter *i2c, unsigned int flags)
@@ -600,14 +626,14 @@ static int trylock_bus(struct i2c_adapter *i2c, unsigned int flags)
static void unlock_bus(struct i2c_adapter *i2c, unsigned int flags)
{
- struct amdgpu_device *adev = to_amdgpu_device(i2c);
+ struct amdgpu_smu_i2c_bus *smu_i2c = i2c_get_adapdata(i2c);
+ struct amdgpu_device *adev = smu_i2c->adev;
- if (!smu_v11_0_i2c_bus_unlock(i2c)) {
+ if (!smu_v11_0_i2c_bus_unlock(i2c))
DRM_ERROR("Failed to unlock the bus from SMU");
- return;
- }
-
- adev->pm.bus_locked = false;
+ else
+ adev->pm.bus_locked = false;
+ mutex_unlock(&smu_i2c->mutex);
}
static const struct i2c_lock_operations smu_v11_0_i2c_i2c_lock_ops = {
@@ -617,27 +643,60 @@ static const struct i2c_lock_operations smu_v11_0_i2c_i2c_lock_ops = {
};
static int smu_v11_0_i2c_xfer(struct i2c_adapter *i2c_adap,
- struct i2c_msg *msgs, int num)
+ struct i2c_msg *msg, int num)
{
int i, ret;
- struct amdgpu_device *adev = to_amdgpu_device(i2c_adap);
-
- if (!adev->pm.bus_locked) {
- DRM_ERROR("I2C bus unlocked, stopping transaction!");
- return -EIO;
- }
+ u16 addr, dir;
smu_v11_0_i2c_init(i2c_adap);
+ /* From the client's point of view, this sequence of
+ * messages-- the array i2c_msg *msg, is a single transaction
+ * on the bus, starting with START and ending with STOP.
+ *
+ * The client is welcome to send any sequence of messages in
+ * this array, as processing under this function here is
+ * striving to be agnostic.
+ *
+ * Record the first address and direction we see. If either
+ * changes for a subsequent message, generate ReSTART. The
+ * DW_apb_i2c databook, v1.21a, specifies that ReSTART is
+ * generated when the direction changes, with the default IP
+ * block parameter settings, but it doesn't specify if ReSTART
+ * is generated when the address changes (possibly...). We
+ * don't rely on the default IP block parameter settings as
+ * the block is shared and they may change.
+ */
+ if (num > 0) {
+ addr = msg[0].addr;
+ dir = msg[0].flags & I2C_M_RD;
+ }
+
for (i = 0; i < num; i++) {
- if (msgs[i].flags & I2C_M_RD)
+ u32 i2c_flag = 0;
+
+ if (msg[i].addr != addr || (msg[i].flags ^ dir) & I2C_M_RD) {
+ addr = msg[i].addr;
+ dir = msg[i].flags & I2C_M_RD;
+ i2c_flag |= I2C_X_RESTART;
+ }
+
+ if (i == num - 1) {
+ /* Set the STOP bit on the last message, so
+ * that the IP block generates a STOP after
+ * the last byte of the message.
+ */
+ i2c_flag |= I2C_M_STOP;
+ }
+
+ if (msg[i].flags & I2C_M_RD)
ret = smu_v11_0_i2c_read_data(i2c_adap,
- (uint8_t)msgs[i].addr,
- msgs[i].buf, msgs[i].len);
+ msg + i,
+ i2c_flag);
else
ret = smu_v11_0_i2c_write_data(i2c_adap,
- (uint8_t)msgs[i].addr,
- msgs[i].buf, msgs[i].len);
+ msg + i,
+ i2c_flag);
if (ret != I2C_OK) {
num = -EIO;
@@ -654,34 +713,47 @@ static u32 smu_v11_0_i2c_func(struct i2c_adapter *adap)
return I2C_FUNC_I2C | I2C_FUNC_SMBUS_EMUL;
}
-
static const struct i2c_algorithm smu_v11_0_i2c_algo = {
.master_xfer = smu_v11_0_i2c_xfer,
.functionality = smu_v11_0_i2c_func,
};
-int smu_v11_0_i2c_control_init(struct i2c_adapter *control)
+static const struct i2c_adapter_quirks smu_v11_0_i2c_control_quirks = {
+ .flags = I2C_AQ_NO_ZERO_LEN,
+};
+
+int smu_v11_0_i2c_control_init(struct amdgpu_device *adev)
{
- struct amdgpu_device *adev = to_amdgpu_device(control);
+ struct amdgpu_smu_i2c_bus *smu_i2c = &adev->pm.smu_i2c[0];
+ struct i2c_adapter *control = &smu_i2c->adapter;
int res;
+ smu_i2c->adev = adev;
+ smu_i2c->port = 0;
+ mutex_init(&smu_i2c->mutex);
control->owner = THIS_MODULE;
- control->class = I2C_CLASS_SPD;
+ control->class = I2C_CLASS_HWMON;
control->dev.parent = &adev->pdev->dev;
control->algo = &smu_v11_0_i2c_algo;
- snprintf(control->name, sizeof(control->name), "AMDGPU SMU");
+ snprintf(control->name, sizeof(control->name), "AMDGPU SMU 0");
control->lock_ops = &smu_v11_0_i2c_i2c_lock_ops;
+ control->quirks = &smu_v11_0_i2c_control_quirks;
+ i2c_set_adapdata(control, smu_i2c);
+
+ adev->pm.ras_eeprom_i2c_bus = &adev->pm.smu_i2c[0].adapter;
+ adev->pm.fru_eeprom_i2c_bus = &adev->pm.smu_i2c[0].adapter;
- res = i2c_add_adapter(control);
+ res = devm_i2c_add_adapter(adev->dev, control);
if (res)
DRM_ERROR("Failed to register hw i2c, err: %d\n", res);
return res;
}
-void smu_v11_0_i2c_control_fini(struct i2c_adapter *control)
+void smu_v11_0_i2c_control_fini(struct amdgpu_device *adev)
{
- i2c_del_adapter(control);
+ adev->pm.ras_eeprom_i2c_bus = NULL;
+ adev->pm.fru_eeprom_i2c_bus = NULL;
}
/*
diff --git a/drivers/gpu/drm/amd/amdgpu/smu_v11_0_i2c.h b/drivers/gpu/drm/amd/amdgpu/smu_v11_0_i2c.h
index 44467c05f642..96ad14288a0c 100644
--- a/drivers/gpu/drm/amd/amdgpu/smu_v11_0_i2c.h
+++ b/drivers/gpu/drm/amd/amdgpu/smu_v11_0_i2c.h
@@ -26,9 +26,9 @@
#include <linux/types.h>
-struct i2c_adapter;
+struct amdgpu_device;
-int smu_v11_0_i2c_control_init(struct i2c_adapter *control);
-void smu_v11_0_i2c_control_fini(struct i2c_adapter *control);
+int smu_v11_0_i2c_control_init(struct amdgpu_device *adev);
+void smu_v11_0_i2c_control_fini(struct amdgpu_device *adev);
#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/smu_v13_0_10.c b/drivers/gpu/drm/amd/amdgpu/smu_v13_0_10.c
new file mode 100644
index 000000000000..70569ea906bc
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/smu_v13_0_10.c
@@ -0,0 +1,296 @@
+/*
+ * Copyright 2023 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#include "smu_v13_0_10.h"
+#include "amdgpu_reset.h"
+#include "amdgpu_dpm.h"
+#include "amdgpu_job.h"
+#include "amdgpu_ring.h"
+#include "amdgpu_ras.h"
+#include "amdgpu_psp.h"
+
+static bool smu_v13_0_10_is_mode2_default(struct amdgpu_reset_control *reset_ctl)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)reset_ctl->handle;
+ if (adev->pm.fw_version >= 0x00502005 && !amdgpu_sriov_vf(adev))
+ return true;
+
+ return false;
+}
+
+static struct amdgpu_reset_handler *
+smu_v13_0_10_get_reset_handler(struct amdgpu_reset_control *reset_ctl,
+ struct amdgpu_reset_context *reset_context)
+{
+ struct amdgpu_reset_handler *handler;
+ struct amdgpu_device *adev = (struct amdgpu_device *)reset_ctl->handle;
+ int i;
+
+ if (reset_context->method != AMD_RESET_METHOD_NONE) {
+ for_each_handler(i, handler, reset_ctl) {
+ if (handler->reset_method == reset_context->method)
+ return handler;
+ }
+ }
+
+ if (smu_v13_0_10_is_mode2_default(reset_ctl) &&
+ amdgpu_asic_reset_method(adev) == AMD_RESET_METHOD_MODE2) {
+ for_each_handler(i, handler, reset_ctl) {
+ if (handler->reset_method == AMD_RESET_METHOD_MODE2)
+ return handler;
+ }
+ }
+
+ return NULL;
+}
+
+static int smu_v13_0_10_mode2_suspend_ip(struct amdgpu_device *adev)
+{
+ int r, i;
+
+ amdgpu_device_set_pg_state(adev, AMD_PG_STATE_UNGATE);
+ amdgpu_device_set_cg_state(adev, AMD_CG_STATE_UNGATE);
+
+ for (i = adev->num_ip_blocks - 1; i >= 0; i--) {
+ if (!(adev->ip_blocks[i].version->type ==
+ AMD_IP_BLOCK_TYPE_GFX ||
+ adev->ip_blocks[i].version->type ==
+ AMD_IP_BLOCK_TYPE_SDMA ||
+ adev->ip_blocks[i].version->type ==
+ AMD_IP_BLOCK_TYPE_MES))
+ continue;
+
+ r = amdgpu_ip_block_suspend(&adev->ip_blocks[i]);
+ if (r)
+ return r;
+ }
+
+ return 0;
+}
+
+static int
+smu_v13_0_10_mode2_prepare_hwcontext(struct amdgpu_reset_control *reset_ctl,
+ struct amdgpu_reset_context *reset_context)
+{
+ int r = 0;
+ struct amdgpu_device *adev = (struct amdgpu_device *)reset_ctl->handle;
+
+ if (!amdgpu_sriov_vf(adev))
+ r = smu_v13_0_10_mode2_suspend_ip(adev);
+
+ return r;
+}
+
+static int smu_v13_0_10_mode2_reset(struct amdgpu_device *adev)
+{
+ return amdgpu_dpm_mode2_reset(adev);
+}
+
+static void smu_v13_0_10_async_reset(struct work_struct *work)
+{
+ struct amdgpu_reset_handler *handler;
+ struct amdgpu_reset_control *reset_ctl =
+ container_of(work, struct amdgpu_reset_control, reset_work);
+ struct amdgpu_device *adev = (struct amdgpu_device *)reset_ctl->handle;
+ int i;
+
+ for_each_handler(i, handler, reset_ctl) {
+ if (handler->reset_method == reset_ctl->active_reset) {
+ dev_dbg(adev->dev, "Resetting device\n");
+ handler->do_reset(adev);
+ break;
+ }
+ }
+}
+static int
+smu_v13_0_10_mode2_perform_reset(struct amdgpu_reset_control *reset_ctl,
+ struct amdgpu_reset_context *reset_context)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)reset_ctl->handle;
+ int r;
+
+ r = smu_v13_0_10_mode2_reset(adev);
+ if (r) {
+ dev_err(adev->dev,
+ "ASIC reset failed with error, %d ", r);
+ }
+ return r;
+}
+
+static int smu_v13_0_10_mode2_restore_ip(struct amdgpu_device *adev)
+{
+ int i, r;
+ struct psp_context *psp = &adev->psp;
+ struct amdgpu_firmware_info *ucode;
+ struct amdgpu_firmware_info *ucode_list[2];
+ int ucode_count = 0;
+
+ for (i = 0; i < adev->firmware.max_ucodes; i++) {
+ ucode = &adev->firmware.ucode[i];
+
+ switch (ucode->ucode_id) {
+ case AMDGPU_UCODE_ID_IMU_I:
+ case AMDGPU_UCODE_ID_IMU_D:
+ ucode_list[ucode_count++] = ucode;
+ break;
+ default:
+ break;
+ }
+ }
+
+ r = psp_load_fw_list(psp, ucode_list, ucode_count);
+ if (r) {
+ dev_err(adev->dev, "IMU ucode load failed after mode2 reset\n");
+ return r;
+ }
+
+ r = psp_rlc_autoload_start(psp);
+ if (r) {
+ DRM_ERROR("Failed to start rlc autoload after mode2 reset\n");
+ return r;
+ }
+
+ amdgpu_dpm_enable_gfx_features(adev);
+
+ for (i = 0; i < adev->num_ip_blocks; i++) {
+ if (!(adev->ip_blocks[i].version->type ==
+ AMD_IP_BLOCK_TYPE_GFX ||
+ adev->ip_blocks[i].version->type ==
+ AMD_IP_BLOCK_TYPE_MES ||
+ adev->ip_blocks[i].version->type ==
+ AMD_IP_BLOCK_TYPE_SDMA))
+ continue;
+ r = amdgpu_ip_block_resume(&adev->ip_blocks[i]);
+ if (r)
+ return r;
+ }
+
+ for (i = 0; i < adev->num_ip_blocks; i++) {
+ if (!(adev->ip_blocks[i].version->type ==
+ AMD_IP_BLOCK_TYPE_GFX ||
+ adev->ip_blocks[i].version->type ==
+ AMD_IP_BLOCK_TYPE_MES ||
+ adev->ip_blocks[i].version->type ==
+ AMD_IP_BLOCK_TYPE_SDMA))
+ continue;
+
+ if (adev->ip_blocks[i].version->funcs->late_init) {
+ r = adev->ip_blocks[i].version->funcs->late_init(
+ &adev->ip_blocks[i]);
+ if (r) {
+ dev_err(adev->dev,
+ "late_init of IP block <%s> failed %d after reset\n",
+ adev->ip_blocks[i].version->funcs->name,
+ r);
+ return r;
+ }
+ }
+ adev->ip_blocks[i].status.late_initialized = true;
+ }
+
+ amdgpu_device_set_cg_state(adev, AMD_CG_STATE_GATE);
+ amdgpu_device_set_pg_state(adev, AMD_PG_STATE_GATE);
+
+ return r;
+}
+
+static int
+smu_v13_0_10_mode2_restore_hwcontext(struct amdgpu_reset_control *reset_ctl,
+ struct amdgpu_reset_context *reset_context)
+{
+ int r;
+ struct amdgpu_device *tmp_adev = (struct amdgpu_device *)reset_ctl->handle;
+
+ amdgpu_set_init_level(tmp_adev, AMDGPU_INIT_LEVEL_RESET_RECOVERY);
+ dev_info(tmp_adev->dev,
+ "GPU reset succeeded, trying to resume\n");
+ r = smu_v13_0_10_mode2_restore_ip(tmp_adev);
+ if (r)
+ goto end;
+
+ amdgpu_register_gpu_instance(tmp_adev);
+
+ /* Resume RAS */
+ amdgpu_ras_resume(tmp_adev);
+
+ amdgpu_irq_gpu_reset_resume_helper(tmp_adev);
+
+ amdgpu_set_init_level(tmp_adev, AMDGPU_INIT_LEVEL_DEFAULT);
+ r = amdgpu_ib_ring_tests(tmp_adev);
+ if (r) {
+ dev_err(tmp_adev->dev,
+ "ib ring test failed (%d).\n", r);
+ r = -EAGAIN;
+ goto end;
+ }
+
+end:
+ if (r)
+ return -EAGAIN;
+ else
+ return r;
+}
+
+static struct amdgpu_reset_handler smu_v13_0_10_mode2_handler = {
+ .reset_method = AMD_RESET_METHOD_MODE2,
+ .prepare_env = NULL,
+ .prepare_hwcontext = smu_v13_0_10_mode2_prepare_hwcontext,
+ .perform_reset = smu_v13_0_10_mode2_perform_reset,
+ .restore_hwcontext = smu_v13_0_10_mode2_restore_hwcontext,
+ .restore_env = NULL,
+ .do_reset = smu_v13_0_10_mode2_reset,
+};
+
+static struct amdgpu_reset_handler
+ *smu_v13_0_10_rst_handlers[AMDGPU_RESET_MAX_HANDLERS] = {
+ &smu_v13_0_10_mode2_handler,
+ };
+
+int smu_v13_0_10_reset_init(struct amdgpu_device *adev)
+{
+ struct amdgpu_reset_control *reset_ctl;
+
+ reset_ctl = kzalloc(sizeof(*reset_ctl), GFP_KERNEL);
+ if (!reset_ctl)
+ return -ENOMEM;
+
+ reset_ctl->handle = adev;
+ reset_ctl->async_reset = smu_v13_0_10_async_reset;
+ reset_ctl->active_reset = AMD_RESET_METHOD_NONE;
+ reset_ctl->get_reset_handler = smu_v13_0_10_get_reset_handler;
+
+ INIT_WORK(&reset_ctl->reset_work, reset_ctl->async_reset);
+ /* Only mode2 is handled through reset control now */
+ reset_ctl->reset_handlers = &smu_v13_0_10_rst_handlers;
+
+ adev->reset_cntl = reset_ctl;
+
+ return 0;
+}
+
+int smu_v13_0_10_reset_fini(struct amdgpu_device *adev)
+{
+ kfree(adev->reset_cntl);
+ adev->reset_cntl = NULL;
+ return 0;
+}
diff --git a/drivers/gpu/drm/amd/amdgpu/smu_v13_0_10.h b/drivers/gpu/drm/amd/amdgpu/smu_v13_0_10.h
new file mode 100644
index 000000000000..e0cb72a0eec6
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/smu_v13_0_10.h
@@ -0,0 +1,32 @@
+/*
+ * Copyright 2023 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#ifndef __SMU_V13_0_10_H__
+#define __SMU_V13_0_10_H__
+
+#include "amdgpu.h"
+
+int smu_v13_0_10_reset_init(struct amdgpu_device *adev);
+int smu_v13_0_10_reset_fini(struct amdgpu_device *adev);
+
+#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/smuio_v11_0.c b/drivers/gpu/drm/amd/amdgpu/smuio_v11_0.c
index e9c474c217ec..acdc40f99ab3 100644
--- a/drivers/gpu/drm/amd/amdgpu/smuio_v11_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/smuio_v11_0.c
@@ -43,9 +43,12 @@ static void smuio_v11_0_update_rom_clock_gating(struct amdgpu_device *adev, bool
if (adev->flags & AMD_IS_APU)
return;
+ if (!(adev->cg_flags & AMD_CG_SUPPORT_ROM_MGCG))
+ return;
+
def = data = RREG32_SOC15(SMUIO, 0, mmCGTT_ROM_CLK_CTRL0);
- if (enable && (adev->cg_flags & AMD_CG_SUPPORT_ROM_MGCG))
+ if (enable)
data &= ~(CGTT_ROM_CLK_CTRL0__SOFT_OVERRIDE0_MASK |
CGTT_ROM_CLK_CTRL0__SOFT_OVERRIDE1_MASK);
else
@@ -56,7 +59,7 @@ static void smuio_v11_0_update_rom_clock_gating(struct amdgpu_device *adev, bool
WREG32_SOC15(SMUIO, 0, mmCGTT_ROM_CLK_CTRL0, data);
}
-static void smuio_v11_0_get_clock_gating_state(struct amdgpu_device *adev, u32 *flags)
+static void smuio_v11_0_get_clock_gating_state(struct amdgpu_device *adev, u64 *flags)
{
u32 data;
diff --git a/drivers/gpu/drm/amd/amdgpu/smuio_v11_0_6.c b/drivers/gpu/drm/amd/amdgpu/smuio_v11_0_6.c
index 3a18dbb55c32..2afeb8b37f62 100644
--- a/drivers/gpu/drm/amd/amdgpu/smuio_v11_0_6.c
+++ b/drivers/gpu/drm/amd/amdgpu/smuio_v11_0_6.c
@@ -56,7 +56,7 @@ static void smuio_v11_0_6_update_rom_clock_gating(struct amdgpu_device *adev, bo
WREG32_SOC15(SMUIO, 0, mmCGTT_ROM_CLK_CTRL0, data);
}
-static void smuio_v11_0_6_get_clock_gating_state(struct amdgpu_device *adev, u32 *flags)
+static void smuio_v11_0_6_get_clock_gating_state(struct amdgpu_device *adev, u64 *flags)
{
u32 data;
diff --git a/drivers/gpu/drm/amd/amdgpu/smuio_v13_0.c b/drivers/gpu/drm/amd/amdgpu/smuio_v13_0.c
new file mode 100644
index 000000000000..bf8b8e5ddf5d
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/smuio_v13_0.c
@@ -0,0 +1,161 @@
+/*
+ * Copyright 2020 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+#include "amdgpu.h"
+#include "smuio_v13_0.h"
+#include "smuio/smuio_13_0_2_offset.h"
+#include "smuio/smuio_13_0_2_sh_mask.h"
+
+#define SMUIO_MCM_CONFIG__HOST_GPU_XGMI_MASK 0x00000001L
+
+static u32 smuio_v13_0_get_rom_index_offset(struct amdgpu_device *adev)
+{
+ return SOC15_REG_OFFSET(SMUIO, 0, regROM_INDEX);
+}
+
+static u32 smuio_v13_0_get_rom_data_offset(struct amdgpu_device *adev)
+{
+ return SOC15_REG_OFFSET(SMUIO, 0, regROM_DATA);
+}
+
+static void smuio_v13_0_update_rom_clock_gating(struct amdgpu_device *adev, bool enable)
+{
+ u32 def, data;
+
+ /* enable/disable ROM CG is not supported on APU */
+ if (adev->flags & AMD_IS_APU)
+ return;
+
+ def = data = RREG32_SOC15(SMUIO, 0, regCGTT_ROM_CLK_CTRL0);
+
+ if (enable && (adev->cg_flags & AMD_CG_SUPPORT_ROM_MGCG))
+ data &= ~(CGTT_ROM_CLK_CTRL0__SOFT_OVERRIDE0_MASK |
+ CGTT_ROM_CLK_CTRL0__SOFT_OVERRIDE1_MASK);
+ else
+ data |= CGTT_ROM_CLK_CTRL0__SOFT_OVERRIDE0_MASK |
+ CGTT_ROM_CLK_CTRL0__SOFT_OVERRIDE1_MASK;
+
+ if (def != data)
+ WREG32_SOC15(SMUIO, 0, regCGTT_ROM_CLK_CTRL0, data);
+}
+
+static void smuio_v13_0_get_clock_gating_state(struct amdgpu_device *adev, u64 *flags)
+{
+ u32 data;
+
+ /* CGTT_ROM_CLK_CTRL0 is not available for APU */
+ if (adev->flags & AMD_IS_APU)
+ return;
+
+ data = RREG32_SOC15(SMUIO, 0, regCGTT_ROM_CLK_CTRL0);
+ if (!(data & CGTT_ROM_CLK_CTRL0__SOFT_OVERRIDE0_MASK))
+ *flags |= AMD_CG_SUPPORT_ROM_MGCG;
+}
+
+/**
+ * smuio_v13_0_get_die_id - query die id from FCH.
+ *
+ * @adev: amdgpu device pointer
+ *
+ * Returns die id
+ */
+static u32 smuio_v13_0_get_die_id(struct amdgpu_device *adev)
+{
+ u32 data, die_id;
+
+ data = RREG32_SOC15(SMUIO, 0, regSMUIO_MCM_CONFIG);
+ die_id = REG_GET_FIELD(data, SMUIO_MCM_CONFIG, DIE_ID);
+
+ return die_id;
+}
+
+/**
+ * smuio_v13_0_get_socket_id - query socket id from FCH
+ *
+ * @adev: amdgpu device pointer
+ *
+ * Returns socket id
+ */
+static u32 smuio_v13_0_get_socket_id(struct amdgpu_device *adev)
+{
+ u32 data, socket_id;
+
+ data = RREG32_SOC15(SMUIO, 0, regSMUIO_MCM_CONFIG);
+ socket_id = REG_GET_FIELD(data, SMUIO_MCM_CONFIG, SOCKET_ID);
+
+ return socket_id;
+}
+
+/**
+ * smuio_v13_0_is_host_gpu_xgmi_supported - detect xgmi interface between cpu and gpu/s.
+ *
+ * @adev: amdgpu device pointer
+ *
+ * Returns true on success or false otherwise.
+ */
+static bool smuio_v13_0_is_host_gpu_xgmi_supported(struct amdgpu_device *adev)
+{
+ u32 data;
+
+ data = RREG32_SOC15(SMUIO, 0, regSMUIO_MCM_CONFIG);
+ data = REG_GET_FIELD(data, SMUIO_MCM_CONFIG, TOPOLOGY_ID);
+ /* data[4:0]
+ * bit 0 == 0 host-gpu interface is PCIE
+ * bit 0 == 1 host-gpu interface is Alternate Protocal
+ * for AMD, this is XGMI
+ */
+ data &= SMUIO_MCM_CONFIG__HOST_GPU_XGMI_MASK;
+
+ return data ? true : false;
+}
+
+static enum amdgpu_pkg_type smuio_v13_0_get_pkg_type(struct amdgpu_device *adev)
+{
+ enum amdgpu_pkg_type pkg_type;
+ u32 data;
+
+ data = RREG32_SOC15(SMUIO, 0, regSMUIO_MCM_CONFIG);
+ data = REG_GET_FIELD(data, SMUIO_MCM_CONFIG, TOPOLOGY_ID);
+
+ switch (data) {
+ case 0x4:
+ case 0xC:
+ pkg_type = AMDGPU_PKG_TYPE_CEM;
+ break;
+ default:
+ pkg_type = AMDGPU_PKG_TYPE_OAM;
+ break;
+ }
+
+ return pkg_type;
+}
+
+const struct amdgpu_smuio_funcs smuio_v13_0_funcs = {
+ .get_rom_index_offset = smuio_v13_0_get_rom_index_offset,
+ .get_rom_data_offset = smuio_v13_0_get_rom_data_offset,
+ .get_die_id = smuio_v13_0_get_die_id,
+ .get_socket_id = smuio_v13_0_get_socket_id,
+ .is_host_gpu_xgmi_supported = smuio_v13_0_is_host_gpu_xgmi_supported,
+ .update_rom_clock_gating = smuio_v13_0_update_rom_clock_gating,
+ .get_clock_gating_state = smuio_v13_0_get_clock_gating_state,
+ .get_pkg_type = smuio_v13_0_get_pkg_type,
+};
diff --git a/drivers/gpu/drm/amd/amdgpu/smuio_v13_0.h b/drivers/gpu/drm/amd/amdgpu/smuio_v13_0.h
new file mode 100644
index 000000000000..a3bfe3e4fb46
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/smuio_v13_0.h
@@ -0,0 +1,30 @@
+/*
+ * Copyright 2020 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+#ifndef __SMUIO_V13_0_H__
+#define __SMUIO_V13_0_H__
+
+#include "soc15_common.h"
+
+extern const struct amdgpu_smuio_funcs smuio_v13_0_funcs;
+
+#endif /* __SMUIO_V13_0_H__ */
diff --git a/drivers/gpu/drm/amd/amdgpu/smuio_v13_0_3.c b/drivers/gpu/drm/amd/amdgpu/smuio_v13_0_3.c
new file mode 100644
index 000000000000..5461b5289793
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/smuio_v13_0_3.c
@@ -0,0 +1,109 @@
+/*
+ * Copyright 2022 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+#include "amdgpu.h"
+#include "smuio_v13_0_3.h"
+#include "soc15_common.h"
+#include "smuio/smuio_13_0_3_offset.h"
+#include "smuio/smuio_13_0_3_sh_mask.h"
+
+#define PKG_TYPE_MASK 0x00000003L
+
+/**
+ * smuio_v13_0_3_get_die_id - query die id from FCH.
+ *
+ * @adev: amdgpu device pointer
+ *
+ * Returns die id
+ */
+static u32 smuio_v13_0_3_get_die_id(struct amdgpu_device *adev)
+{
+ u32 data, die_id;
+
+ data = RREG32_SOC15(SMUIO, 0, regSMUIO_MCM_CONFIG);
+ die_id = REG_GET_FIELD(data, SMUIO_MCM_CONFIG, DIE_ID);
+
+ return die_id;
+}
+
+/**
+ * smuio_v13_0_3_get_socket_id - query socket id from FCH
+ *
+ * @adev: amdgpu device pointer
+ *
+ * Returns socket id
+ */
+static u32 smuio_v13_0_3_get_socket_id(struct amdgpu_device *adev)
+{
+ u32 data, socket_id;
+
+ data = RREG32_SOC15(SMUIO, 0, regSMUIO_MCM_CONFIG);
+ socket_id = REG_GET_FIELD(data, SMUIO_MCM_CONFIG, SOCKET_ID);
+
+ return socket_id;
+}
+
+/**
+ * smuio_v13_0_3_get_pkg_type - query package type set by MP1/bootcode
+ *
+ * @adev: amdgpu device pointer
+ *
+ * Returns package type
+ */
+
+static enum amdgpu_pkg_type smuio_v13_0_3_get_pkg_type(struct amdgpu_device *adev)
+{
+ enum amdgpu_pkg_type pkg_type;
+ u32 data;
+
+ data = RREG32_SOC15(SMUIO, 0, regSMUIO_MCM_CONFIG);
+ data = REG_GET_FIELD(data, SMUIO_MCM_CONFIG, PKG_TYPE);
+ /* pkg_type[4:0]
+ *
+ * bit 1 == 1 APU form factor
+ *
+ * b0100 - b1111 - Reserved
+ */
+ switch (data & PKG_TYPE_MASK) {
+ case 0x0:
+ pkg_type = AMDGPU_PKG_TYPE_CEM;
+ break;
+ case 0x1:
+ pkg_type = AMDGPU_PKG_TYPE_OAM;
+ break;
+ case 0x2:
+ pkg_type = AMDGPU_PKG_TYPE_APU;
+ break;
+ default:
+ pkg_type = AMDGPU_PKG_TYPE_UNKNOWN;
+ break;
+ }
+
+ return pkg_type;
+}
+
+
+const struct amdgpu_smuio_funcs smuio_v13_0_3_funcs = {
+ .get_die_id = smuio_v13_0_3_get_die_id,
+ .get_socket_id = smuio_v13_0_3_get_socket_id,
+ .get_pkg_type = smuio_v13_0_3_get_pkg_type,
+};
diff --git a/drivers/gpu/drm/amd/amdgpu/smuio_v13_0_3.h b/drivers/gpu/drm/amd/amdgpu/smuio_v13_0_3.h
new file mode 100644
index 000000000000..795f66c5a58b
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/smuio_v13_0_3.h
@@ -0,0 +1,30 @@
+/*
+ * Copyright 2022 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+#ifndef __SMUIO_V13_0_3_H__
+#define __SMUIO_V13_0_3_H__
+
+#include "soc15_common.h"
+
+extern const struct amdgpu_smuio_funcs smuio_v13_0_3_funcs;
+
+#endif /* __SMUIO_V13_0_3_H__ */
diff --git a/drivers/gpu/drm/amd/amdgpu/smuio_v13_0_6.c b/drivers/gpu/drm/amd/amdgpu/smuio_v13_0_6.c
new file mode 100644
index 000000000000..de998e328b08
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/smuio_v13_0_6.c
@@ -0,0 +1,41 @@
+/*
+ * Copyright 2022 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+#include "amdgpu.h"
+#include "smuio_v13_0_6.h"
+#include "smuio/smuio_13_0_6_offset.h"
+#include "smuio/smuio_13_0_6_sh_mask.h"
+
+static u32 smuio_v13_0_6_get_rom_index_offset(struct amdgpu_device *adev)
+{
+ return SOC15_REG_OFFSET(SMUIO, 0, regROM_INDEX);
+}
+
+static u32 smuio_v13_0_6_get_rom_data_offset(struct amdgpu_device *adev)
+{
+ return SOC15_REG_OFFSET(SMUIO, 0, regROM_DATA);
+}
+
+const struct amdgpu_smuio_funcs smuio_v13_0_6_funcs = {
+ .get_rom_index_offset = smuio_v13_0_6_get_rom_index_offset,
+ .get_rom_data_offset = smuio_v13_0_6_get_rom_data_offset,
+};
diff --git a/drivers/gpu/drm/amd/amdgpu/smuio_v13_0_6.h b/drivers/gpu/drm/amd/amdgpu/smuio_v13_0_6.h
new file mode 100644
index 000000000000..c75621de5ab5
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/smuio_v13_0_6.h
@@ -0,0 +1,30 @@
+/*
+ * Copyright 2022 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+#ifndef __SMUIO_V13_0_6_H__
+#define __SMUIO_V13_0_6_H__
+
+#include "soc15_common.h"
+
+extern const struct amdgpu_smuio_funcs smuio_v13_0_6_funcs;
+
+#endif /* __SMUIO_V13_0_6_H__ */
diff --git a/drivers/gpu/drm/amd/amdgpu/smuio_v14_0_2.c b/drivers/gpu/drm/amd/amdgpu/smuio_v14_0_2.c
new file mode 100644
index 000000000000..2a51a70d4846
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/smuio_v14_0_2.c
@@ -0,0 +1,62 @@
+/*
+ * Copyright 2023 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+#include "amdgpu.h"
+#include "smuio_v14_0_2.h"
+#include "smuio/smuio_14_0_2_offset.h"
+#include "smuio/smuio_14_0_2_sh_mask.h"
+#include <linux/preempt.h>
+
+static u32 smuio_v14_0_2_get_rom_index_offset(struct amdgpu_device *adev)
+{
+ return SOC15_REG_OFFSET(SMUIO, 0, regROM_INDEX);
+}
+
+static u32 smuio_v14_0_2_get_rom_data_offset(struct amdgpu_device *adev)
+{
+ return SOC15_REG_OFFSET(SMUIO, 0, regROM_DATA);
+}
+
+static u64 smuio_v14_0_2_get_gpu_clock_counter(struct amdgpu_device *adev)
+{
+ u64 clock;
+ u64 clock_counter_lo, clock_counter_hi_pre, clock_counter_hi_after;
+
+ preempt_disable();
+ clock_counter_hi_pre = (u64)RREG32_SOC15(SMUIO, 0, regGOLDEN_TSC_COUNT_UPPER);
+ clock_counter_lo = (u64)RREG32_SOC15(SMUIO, 0, regGOLDEN_TSC_COUNT_LOWER);
+ /* the clock counter may be udpated during polling the counters */
+ clock_counter_hi_after = (u64)RREG32_SOC15(SMUIO, 0, regGOLDEN_TSC_COUNT_UPPER);
+ if (clock_counter_hi_pre != clock_counter_hi_after)
+ clock_counter_lo = (u64)RREG32_SOC15(SMUIO, 0, regGOLDEN_TSC_COUNT_LOWER);
+ preempt_enable();
+
+ clock = clock_counter_lo | (clock_counter_hi_after << 32ULL);
+
+ return clock;
+}
+
+const struct amdgpu_smuio_funcs smuio_v14_0_2_funcs = {
+ .get_rom_index_offset = smuio_v14_0_2_get_rom_index_offset,
+ .get_rom_data_offset = smuio_v14_0_2_get_rom_data_offset,
+ .get_gpu_clock_counter = smuio_v14_0_2_get_gpu_clock_counter,
+};
diff --git a/drivers/gpu/drm/amd/amdgpu/smuio_v14_0_2.h b/drivers/gpu/drm/amd/amdgpu/smuio_v14_0_2.h
new file mode 100644
index 000000000000..6e617f832d90
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/smuio_v14_0_2.h
@@ -0,0 +1,30 @@
+/*
+ * Copyright 2023 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+#ifndef __SMUIO_V14_0_2_H__
+#define __SMUIO_V14_0_2_H__
+
+#include "soc15_common.h"
+
+extern const struct amdgpu_smuio_funcs smuio_v14_0_2_funcs;
+
+#endif /* __SMUIO_V14_0_2_H__ */
diff --git a/drivers/gpu/drm/amd/amdgpu/smuio_v9_0.c b/drivers/gpu/drm/amd/amdgpu/smuio_v9_0.c
index 8417890af227..c04fdd2d5b38 100644
--- a/drivers/gpu/drm/amd/amdgpu/smuio_v9_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/smuio_v9_0.c
@@ -56,11 +56,11 @@ static void smuio_v9_0_update_rom_clock_gating(struct amdgpu_device *adev, bool
WREG32_SOC15(SMUIO, 0, mmCGTT_ROM_CLK_CTRL0, data);
}
-static void smuio_v9_0_get_clock_gating_state(struct amdgpu_device *adev, u32 *flags)
+static void smuio_v9_0_get_clock_gating_state(struct amdgpu_device *adev, u64 *flags)
{
u32 data;
- /* CGTT_ROM_CLK_CTRL0 is not availabe for APUs */
+ /* CGTT_ROM_CLK_CTRL0 is not available for APUs */
if (adev->flags & AMD_IS_APU)
return;
diff --git a/drivers/gpu/drm/amd/amdgpu/soc15.c b/drivers/gpu/drm/amd/amdgpu/soc15.c
index 1221aa6b40a9..42f5d9c0e3af 100644
--- a/drivers/gpu/drm/amd/amdgpu/soc15.c
+++ b/drivers/gpu/drm/amd/amdgpu/soc15.c
@@ -25,8 +25,9 @@
#include <linux/module.h>
#include <linux/pci.h>
+#include <drm/amdgpu_drm.h>
+
#include "amdgpu.h"
-#include "amdgpu_atombios.h"
#include "amdgpu_ih.h"
#include "amdgpu_uvd.h"
#include "amdgpu_vce.h"
@@ -71,9 +72,9 @@
#include "jpeg_v2_5.h"
#include "smuio_v9_0.h"
#include "smuio_v11_0.h"
-#include "dce_virtual.h"
+#include "smuio_v13_0.h"
+#include "amdgpu_vkms.h"
#include "mxgpu_ai.h"
-#include "amdgpu_smu.h"
#include "amdgpu_ras.h"
#include "amdgpu_xgmi.h"
#include <uapi/linux/kfd_ioctl.h>
@@ -83,45 +84,157 @@
#define mmMP0_MISC_LIGHT_SLEEP_CTRL 0x01ba
#define mmMP0_MISC_LIGHT_SLEEP_CTRL_BASE_IDX 0
-/*
- * Indirect registers accessor
- */
-static u32 soc15_pcie_rreg(struct amdgpu_device *adev, u32 reg)
+static const struct amd_ip_funcs soc15_common_ip_funcs;
+
+/* Vega, Raven, Arcturus */
+static const struct amdgpu_video_codec_info vega_video_codecs_encode_array[] =
{
- unsigned long address, data;
- address = adev->nbio.funcs->get_pcie_index_offset(adev);
- data = adev->nbio.funcs->get_pcie_data_offset(adev);
+ {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4_AVC, 4096, 4096, 0)},
+ {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_HEVC, 4096, 4096, 0)},
+};
- return amdgpu_device_indirect_rreg(adev, address, data, reg);
-}
+static const struct amdgpu_video_codecs vega_video_codecs_encode =
+{
+ .codec_count = ARRAY_SIZE(vega_video_codecs_encode_array),
+ .codec_array = vega_video_codecs_encode_array,
+};
-static void soc15_pcie_wreg(struct amdgpu_device *adev, u32 reg, u32 v)
+/* Vega */
+static const struct amdgpu_video_codec_info vega_video_codecs_decode_array[] =
{
- unsigned long address, data;
+ {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG2, 1920, 1088, 3)},
+ {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4, 1920, 1088, 5)},
+ {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4_AVC, 4096, 4096, 52)},
+ {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_VC1, 1920, 1088, 4)},
+ {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_HEVC, 4096, 4096, 186)},
+};
- address = adev->nbio.funcs->get_pcie_index_offset(adev);
- data = adev->nbio.funcs->get_pcie_data_offset(adev);
+static const struct amdgpu_video_codecs vega_video_codecs_decode =
+{
+ .codec_count = ARRAY_SIZE(vega_video_codecs_decode_array),
+ .codec_array = vega_video_codecs_decode_array,
+};
- amdgpu_device_indirect_wreg(adev, address, data, reg, v);
-}
+/* Raven */
+static const struct amdgpu_video_codec_info rv_video_codecs_decode_array[] =
+{
+ {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG2, 1920, 1088, 3)},
+ {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4, 1920, 1088, 5)},
+ {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4_AVC, 4096, 4096, 52)},
+ {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_VC1, 1920, 1088, 4)},
+ {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_HEVC, 4096, 4096, 186)},
+ {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_JPEG, 8192, 8192, 0)},
+ {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_VP9, 4096, 4096, 0)},
+};
-static u64 soc15_pcie_rreg64(struct amdgpu_device *adev, u32 reg)
+static const struct amdgpu_video_codecs rv_video_codecs_decode =
{
- unsigned long address, data;
- address = adev->nbio.funcs->get_pcie_index_offset(adev);
- data = adev->nbio.funcs->get_pcie_data_offset(adev);
+ .codec_count = ARRAY_SIZE(rv_video_codecs_decode_array),
+ .codec_array = rv_video_codecs_decode_array,
+};
- return amdgpu_device_indirect_rreg64(adev, address, data, reg);
-}
+/* Renoir, Arcturus */
+static const struct amdgpu_video_codec_info rn_video_codecs_decode_array[] =
+{
+ {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG2, 1920, 1088, 3)},
+ {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4, 1920, 1088, 5)},
+ {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4_AVC, 4096, 4096, 52)},
+ {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_VC1, 1920, 1088, 4)},
+ {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_HEVC, 8192, 4352, 186)},
+ {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_JPEG, 16384, 16384, 0)},
+ {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_VP9, 8192, 4352, 0)},
+};
-static void soc15_pcie_wreg64(struct amdgpu_device *adev, u32 reg, u64 v)
+static const struct amdgpu_video_codecs rn_video_codecs_decode =
{
- unsigned long address, data;
+ .codec_count = ARRAY_SIZE(rn_video_codecs_decode_array),
+ .codec_array = rn_video_codecs_decode_array,
+};
+
+static const struct amdgpu_video_codec_info vcn_4_0_3_video_codecs_decode_array[] = {
+ {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4_AVC, 4096, 4096, 52)},
+ {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_HEVC, 8192, 4352, 186)},
+ {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_JPEG, 16384, 16384, 0)},
+ {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_VP9, 8192, 4352, 0)},
+ {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_AV1, 8192, 4352, 0)},
+};
+
+static const struct amdgpu_video_codecs vcn_4_0_3_video_codecs_decode = {
+ .codec_count = ARRAY_SIZE(vcn_4_0_3_video_codecs_decode_array),
+ .codec_array = vcn_4_0_3_video_codecs_decode_array,
+};
+
+static const struct amdgpu_video_codecs vcn_4_0_3_video_codecs_encode = {
+ .codec_count = 0,
+ .codec_array = NULL,
+};
+
+static const struct amdgpu_video_codecs vcn_5_0_1_video_codecs_encode_vcn0 = {
+ .codec_count = 0,
+ .codec_array = NULL,
+};
+
+static const struct amdgpu_video_codec_info vcn_5_0_1_video_codecs_decode_array_vcn0[] = {
+ {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4_AVC, 4096, 4096, 52)},
+ {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_HEVC, 8192, 4352, 186)},
+ {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_JPEG, 16384, 16384, 0)},
+ {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_VP9, 8192, 4352, 0)},
+ {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_AV1, 8192, 4352, 0)},
+};
- address = adev->nbio.funcs->get_pcie_index_offset(adev);
- data = adev->nbio.funcs->get_pcie_data_offset(adev);
+static const struct amdgpu_video_codecs vcn_5_0_1_video_codecs_decode_vcn0 = {
+ .codec_count = ARRAY_SIZE(vcn_5_0_1_video_codecs_decode_array_vcn0),
+ .codec_array = vcn_5_0_1_video_codecs_decode_array_vcn0,
+};
- amdgpu_device_indirect_wreg64(adev, address, data, reg, v);
+static int soc15_query_video_codecs(struct amdgpu_device *adev, bool encode,
+ const struct amdgpu_video_codecs **codecs)
+{
+ if (amdgpu_ip_version(adev, VCE_HWIP, 0)) {
+ switch (amdgpu_ip_version(adev, VCE_HWIP, 0)) {
+ case IP_VERSION(4, 0, 0):
+ case IP_VERSION(4, 1, 0):
+ if (encode)
+ *codecs = &vega_video_codecs_encode;
+ else
+ *codecs = &vega_video_codecs_decode;
+ return 0;
+ default:
+ return -EINVAL;
+ }
+ } else {
+ switch (amdgpu_ip_version(adev, UVD_HWIP, 0)) {
+ case IP_VERSION(1, 0, 0):
+ case IP_VERSION(1, 0, 1):
+ if (encode)
+ *codecs = &vega_video_codecs_encode;
+ else
+ *codecs = &rv_video_codecs_decode;
+ return 0;
+ case IP_VERSION(2, 5, 0):
+ case IP_VERSION(2, 6, 0):
+ case IP_VERSION(2, 2, 0):
+ if (encode)
+ *codecs = &vega_video_codecs_encode;
+ else
+ *codecs = &rn_video_codecs_decode;
+ return 0;
+ case IP_VERSION(4, 0, 3):
+ if (encode)
+ *codecs = &vcn_4_0_3_video_codecs_encode;
+ else
+ *codecs = &vcn_4_0_3_video_codecs_decode;
+ return 0;
+ case IP_VERSION(5, 0, 1):
+ if (encode)
+ *codecs = &vcn_5_0_1_video_codecs_encode_vcn0;
+ else
+ *codecs = &vcn_5_0_1_video_codecs_decode_vcn0;
+ return 0;
+ default:
+ return -EINVAL;
+ }
+ }
}
static u32 soc15_uvd_ctx_rreg(struct amdgpu_device *adev, u32 reg)
@@ -233,9 +346,14 @@ static u32 soc15_get_xclk(struct amdgpu_device *adev)
{
u32 reference_clock = adev->clock.spll.reference_freq;
- if (adev->asic_type == CHIP_RENOIR)
+ if (amdgpu_ip_version(adev, MP1_HWIP, 0) == IP_VERSION(12, 0, 0) ||
+ amdgpu_ip_version(adev, MP1_HWIP, 0) == IP_VERSION(12, 0, 1) ||
+ amdgpu_ip_version(adev, MP1_HWIP, 0) == IP_VERSION(13, 0, 6) ||
+ amdgpu_ip_version(adev, MP1_HWIP, 0) == IP_VERSION(13, 0, 12) ||
+ amdgpu_ip_version(adev, MP1_HWIP, 0) == IP_VERSION(13, 0, 14))
return 10000;
- if (adev->asic_type == CHIP_RAVEN)
+ if (amdgpu_ip_version(adev, MP1_HWIP, 0) == IP_VERSION(10, 0, 0) ||
+ amdgpu_ip_version(adev, MP1_HWIP, 0) == IP_VERSION(10, 0, 1))
return reference_clock / 4;
return reference_clock;
@@ -243,7 +361,7 @@ static u32 soc15_get_xclk(struct amdgpu_device *adev)
void soc15_grbm_select(struct amdgpu_device *adev,
- u32 me, u32 pipe, u32 queue, u32 vmid)
+ u32 me, u32 pipe, u32 queue, u32 vmid, int xcc_id)
{
u32 grbm_gfx_cntl = 0;
grbm_gfx_cntl = REG_SET_FIELD(grbm_gfx_cntl, GRBM_GFX_CNTL, PIPEID, pipe);
@@ -251,12 +369,7 @@ void soc15_grbm_select(struct amdgpu_device *adev,
grbm_gfx_cntl = REG_SET_FIELD(grbm_gfx_cntl, GRBM_GFX_CNTL, VMID, vmid);
grbm_gfx_cntl = REG_SET_FIELD(grbm_gfx_cntl, GRBM_GFX_CNTL, QUEUEID, queue);
- WREG32_SOC15_RLC_SHADOW(GC, 0, mmGRBM_GFX_CNTL, grbm_gfx_cntl);
-}
-
-static void soc15_vga_set_state(struct amdgpu_device *adev, bool state)
-{
- /* todo */
+ WREG32_SOC15_RLC_SHADOW(GC, xcc_id, mmGRBM_GFX_CNTL, grbm_gfx_cntl);
}
static bool soc15_read_disabled_bios(struct amdgpu_device *adev)
@@ -265,39 +378,6 @@ static bool soc15_read_disabled_bios(struct amdgpu_device *adev)
return false;
}
-static bool soc15_read_bios_from_rom(struct amdgpu_device *adev,
- u8 *bios, u32 length_bytes)
-{
- u32 *dw_ptr;
- u32 i, length_dw;
- uint32_t rom_index_offset;
- uint32_t rom_data_offset;
-
- if (bios == NULL)
- return false;
- if (length_bytes == 0)
- return false;
- /* APU vbios image is part of sbios image */
- if (adev->flags & AMD_IS_APU)
- return false;
-
- dw_ptr = (u32 *)bios;
- length_dw = ALIGN(length_bytes, 4) / 4;
-
- rom_index_offset =
- adev->smuio.funcs->get_rom_index_offset(adev);
- rom_data_offset =
- adev->smuio.funcs->get_rom_data_offset(adev);
-
- /* set rom index to 0 */
- WREG32(rom_index_offset, 0);
- /* read out the rom data */
- for (i = 0; i < length_dw; i++)
- dw_ptr[i] = RREG32(rom_data_offset);
-
- return true;
-}
-
static struct soc15_allowed_register_entry soc15_allowed_read_registers[] = {
{ SOC15_REG_ENTRY(GC, 0, mmGRBM_STATUS)},
{ SOC15_REG_ENTRY(GC, 0, mmGRBM_STATUS2)},
@@ -328,12 +408,12 @@ static uint32_t soc15_read_indexed_register(struct amdgpu_device *adev, u32 se_n
mutex_lock(&adev->grbm_idx_mutex);
if (se_num != 0xffffffff || sh_num != 0xffffffff)
- amdgpu_gfx_select_se_sh(adev, se_num, sh_num, 0xffffffff);
+ amdgpu_gfx_select_se_sh(adev, se_num, sh_num, 0xffffffff, 0);
val = RREG32(reg_offset);
if (se_num != 0xffffffff || sh_num != 0xffffffff)
- amdgpu_gfx_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
+ amdgpu_gfx_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, 0);
mutex_unlock(&adev->grbm_idx_mutex);
return val;
}
@@ -362,8 +442,9 @@ static int soc15_read_register(struct amdgpu_device *adev, u32 se_num,
*value = 0;
for (i = 0; i < ARRAY_SIZE(soc15_allowed_read_registers); i++) {
en = &soc15_allowed_read_registers[i];
- if (adev->reg_offset[en->hwip][en->inst] &&
- reg_offset != (adev->reg_offset[en->hwip][en->inst][en->seg]
+ if (!adev->reg_offset[en->hwip][en->inst])
+ continue;
+ else if (reg_offset != (adev->reg_offset[en->hwip][en->inst][en->seg]
+ en->reg_offset))
continue;
@@ -402,7 +483,9 @@ void soc15_program_register_sequence(struct amdgpu_device *adev,
if (entry->and_mask == 0xffffffff) {
tmp = entry->or_mask;
} else {
- tmp = RREG32(reg);
+ tmp = (entry->hwip == GC_HWIP) ?
+ RREG32_SOC15_IP(GC, reg) : RREG32(reg);
+
tmp &= ~(entry->and_mask);
tmp |= (entry->or_mask & entry->and_mask);
}
@@ -413,44 +496,11 @@ void soc15_program_register_sequence(struct amdgpu_device *adev,
reg == SOC15_REG_OFFSET(GC, 0, mmSH_MEM_CONFIG))
WREG32_RLC(reg, tmp);
else
- WREG32(reg, tmp);
-
- }
-
-}
+ (entry->hwip == GC_HWIP) ?
+ WREG32_SOC15_IP(GC, reg, tmp) : WREG32(reg, tmp);
-static int soc15_asic_mode1_reset(struct amdgpu_device *adev)
-{
- u32 i;
- int ret = 0;
-
- amdgpu_atombios_scratch_regs_engine_hung(adev, true);
-
- dev_info(adev->dev, "GPU mode1 reset\n");
-
- /* disable BM */
- pci_clear_master(adev->pdev);
-
- amdgpu_device_cache_pci_state(adev->pdev);
-
- ret = psp_gpu_reset(adev);
- if (ret)
- dev_err(adev->dev, "GPU mode1 reset failed\n");
-
- amdgpu_device_load_pci_state(adev->pdev);
-
- /* wait for asic to come out of reset */
- for (i = 0; i < adev->usec_timeout; i++) {
- u32 memsize = adev->nbio.funcs->get_memsize(adev);
-
- if (memsize != 0xffffffff)
- break;
- udelay(1);
}
- amdgpu_atombios_scratch_regs_engine_hung(adev, false);
-
- return ret;
}
static int soc15_asic_baco_reset(struct amdgpu_device *adev)
@@ -459,7 +509,7 @@ static int soc15_asic_baco_reset(struct amdgpu_device *adev)
int ret = 0;
/* avoid NBIF got stuck when do RAS recovery in BACO reset */
- if (ras && ras->supported)
+ if (ras && adev->ras_enabled)
adev->nbio.funcs->enable_doorbell_interrupt(adev, false);
ret = amdgpu_dpm_baco_reset(adev);
@@ -467,7 +517,7 @@ static int soc15_asic_baco_reset(struct amdgpu_device *adev)
return ret;
/* re-enable doorbell interrupt after BACO exit */
- if (ras && ras->supported)
+ if (ras && adev->ras_enabled)
adev->nbio.funcs->enable_doorbell_interrupt(adev, true);
return 0;
@@ -476,39 +526,72 @@ static int soc15_asic_baco_reset(struct amdgpu_device *adev)
static enum amd_reset_method
soc15_asic_reset_method(struct amdgpu_device *adev)
{
- bool baco_reset = false;
+ int baco_reset = 0;
+ bool connected_to_cpu = false;
struct amdgpu_ras *ras = amdgpu_ras_get_context(adev);
+ if (adev->gmc.xgmi.supported && adev->gmc.xgmi.connected_to_cpu)
+ connected_to_cpu = true;
+
if (amdgpu_reset_method == AMD_RESET_METHOD_MODE1 ||
amdgpu_reset_method == AMD_RESET_METHOD_MODE2 ||
amdgpu_reset_method == AMD_RESET_METHOD_BACO ||
- amdgpu_reset_method == AMD_RESET_METHOD_PCI)
- return amdgpu_reset_method;
+ amdgpu_reset_method == AMD_RESET_METHOD_PCI) {
+ /* If connected to cpu, driver only support mode2 */
+ if (connected_to_cpu)
+ return AMD_RESET_METHOD_MODE2;
+ return amdgpu_reset_method;
+ }
if (amdgpu_reset_method != -1)
dev_warn(adev->dev, "Specified reset method:%d isn't supported, using AUTO instead.\n",
amdgpu_reset_method);
- switch (adev->asic_type) {
- case CHIP_RAVEN:
- case CHIP_RENOIR:
+ switch (amdgpu_ip_version(adev, MP1_HWIP, 0)) {
+ case IP_VERSION(10, 0, 0):
+ case IP_VERSION(10, 0, 1):
+ case IP_VERSION(12, 0, 0):
+ case IP_VERSION(12, 0, 1):
return AMD_RESET_METHOD_MODE2;
- case CHIP_VEGA10:
- case CHIP_VEGA12:
- case CHIP_ARCTURUS:
- baco_reset = amdgpu_dpm_is_baco_supported(adev);
- break;
- case CHIP_VEGA20:
- if (adev->psp.sos_fw_version >= 0x80067)
+ case IP_VERSION(9, 0, 0):
+ case IP_VERSION(11, 0, 2):
+ if (adev->asic_type == CHIP_VEGA20) {
+ if (adev->psp.sos.fw_version >= 0x80067)
+ baco_reset = amdgpu_dpm_is_baco_supported(adev);
+ /*
+ * 1. PMFW version > 0x284300: all cases use baco
+ * 2. PMFW version <= 0x284300: only sGPU w/o RAS use baco
+ */
+ if (ras && adev->ras_enabled &&
+ adev->pm.fw_version <= 0x283400)
+ baco_reset = 0;
+ } else {
baco_reset = amdgpu_dpm_is_baco_supported(adev);
-
- /*
- * 1. PMFW version > 0x284300: all cases use baco
- * 2. PMFW version <= 0x284300: only sGPU w/o RAS use baco
+ }
+ break;
+ case IP_VERSION(13, 0, 2):
+ /*
+ * 1.connected to cpu: driver issue mode2 reset
+ * 2.discret gpu: driver issue mode1 reset
*/
- if ((ras && ras->supported) && adev->pm.fw_version <= 0x283400)
- baco_reset = false;
+ if (connected_to_cpu)
+ return AMD_RESET_METHOD_MODE2;
break;
+ case IP_VERSION(13, 0, 6):
+ case IP_VERSION(13, 0, 14):
+ case IP_VERSION(13, 0, 12):
+ /* Use gpu_recovery param to target a reset method.
+ * Enable triggering of GPU reset only if specified
+ * by module parameter.
+ */
+ if (adev->pcie_reset_ctx.in_link_reset)
+ return AMD_RESET_METHOD_LINK;
+ if (amdgpu_gpu_recovery == 4 || amdgpu_gpu_recovery == 5)
+ return AMD_RESET_METHOD_MODE2;
+ else if (!(adev->flags & AMD_IS_APU))
+ return AMD_RESET_METHOD_MODE1;
+ else
+ return AMD_RESET_METHOD_MODE2;
default:
break;
}
@@ -519,13 +602,36 @@ soc15_asic_reset_method(struct amdgpu_device *adev)
return AMD_RESET_METHOD_MODE1;
}
+static bool soc15_need_reset_on_resume(struct amdgpu_device *adev)
+{
+ /* Will reset for the following suspend abort cases.
+ * 1) S3 suspend aborted in the normal S3 suspend
+ * 2) S3 suspend aborted in performing pm core test.
+ */
+ if (adev->in_s3 && !pm_resume_via_firmware())
+ return true;
+ else
+ return false;
+}
+
static int soc15_asic_reset(struct amdgpu_device *adev)
{
/* original raven doesn't have full asic reset */
- if ((adev->apu_flags & AMD_APU_IS_RAVEN) &&
- !(adev->apu_flags & AMD_APU_IS_RAVEN2))
+ /* On the latest Raven, the GPU reset can be performed
+ * successfully. So now, temporarily enable it for the
+ * S3 suspend abort case.
+ */
+
+ if ((adev->apu_flags & AMD_APU_IS_PICASSO ||
+ !(adev->apu_flags & AMD_APU_IS_RAVEN)) &&
+ soc15_need_reset_on_resume(adev))
+ goto asic_reset;
+
+ if ((adev->apu_flags & AMD_APU_IS_RAVEN) ||
+ (adev->apu_flags & AMD_APU_IS_RAVEN2))
return 0;
+asic_reset:
switch (soc15_asic_reset_method(adev)) {
case AMD_RESET_METHOD_PCI:
dev_info(adev->dev, "PCI reset\n");
@@ -536,25 +642,30 @@ static int soc15_asic_reset(struct amdgpu_device *adev)
case AMD_RESET_METHOD_MODE2:
dev_info(adev->dev, "MODE2 reset\n");
return amdgpu_dpm_mode2_reset(adev);
+ case AMD_RESET_METHOD_LINK:
+ dev_info(adev->dev, "Link reset\n");
+ return amdgpu_device_link_reset(adev);
default:
dev_info(adev->dev, "MODE1 reset\n");
- return soc15_asic_mode1_reset(adev);
+ return amdgpu_device_mode1_reset(adev);
}
}
-static bool soc15_supports_baco(struct amdgpu_device *adev)
+static int soc15_supports_baco(struct amdgpu_device *adev)
{
- switch (adev->asic_type) {
- case CHIP_VEGA10:
- case CHIP_VEGA12:
- case CHIP_ARCTURUS:
- return amdgpu_dpm_is_baco_supported(adev);
- case CHIP_VEGA20:
- if (adev->psp.sos_fw_version >= 0x80067)
+ switch (amdgpu_ip_version(adev, MP1_HWIP, 0)) {
+ case IP_VERSION(9, 0, 0):
+ case IP_VERSION(11, 0, 2):
+ if (adev->asic_type == CHIP_VEGA20) {
+ if (adev->psp.sos.fw_version >= 0x80067)
+ return amdgpu_dpm_is_baco_supported(adev);
+ return 0;
+ } else {
return amdgpu_dpm_is_baco_supported(adev);
- return false;
+ }
+ break;
default:
- return false;
+ return 0;
}
}
@@ -584,41 +695,16 @@ static int soc15_set_vce_clocks(struct amdgpu_device *adev, u32 evclk, u32 ecclk
return 0;
}
-static void soc15_pcie_gen3_enable(struct amdgpu_device *adev)
-{
- if (pci_is_root_bus(adev->pdev->bus))
- return;
-
- if (amdgpu_pcie_gen2 == 0)
- return;
-
- if (adev->flags & AMD_IS_APU)
- return;
-
- if (!(adev->pm.pcie_gen_mask & (CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2 |
- CAIL_PCIE_LINK_SPEED_SUPPORT_GEN3)))
- return;
-
- /* todo */
-}
-
static void soc15_program_aspm(struct amdgpu_device *adev)
{
-
- if (amdgpu_aspm == 0)
+ if (!amdgpu_device_should_use_aspm(adev))
return;
- /* todo */
+ if (adev->nbio.funcs->program_aspm)
+ adev->nbio.funcs->program_aspm(adev);
}
-static void soc15_enable_doorbell_aperture(struct amdgpu_device *adev,
- bool enable)
-{
- adev->nbio.funcs->enable_doorbell_aperture(adev, enable);
- adev->nbio.funcs->enable_doorbell_selfring_aperture(adev, enable);
-}
-
-static const struct amdgpu_ip_block_version vega10_common_ip_block =
+const struct amdgpu_ip_block_version vega10_common_ip_block =
{
.type = AMD_IP_BLOCK_TYPE_COMMON,
.major = 2,
@@ -627,32 +713,14 @@ static const struct amdgpu_ip_block_version vega10_common_ip_block =
.funcs = &soc15_common_ip_funcs,
};
-static uint32_t soc15_get_rev_id(struct amdgpu_device *adev)
-{
- return adev->nbio.funcs->get_rev_id(adev);
-}
-
static void soc15_reg_base_init(struct amdgpu_device *adev)
{
- int r;
-
/* Set IP register base before any HW register access */
switch (adev->asic_type) {
case CHIP_VEGA10:
case CHIP_VEGA12:
case CHIP_RAVEN:
- vega10_reg_base_init(adev);
- break;
case CHIP_RENOIR:
- /* It's safe to do ip discovery here for Renior,
- * it doesn't support SRIOV. */
- if (amdgpu_discovery) {
- r = amdgpu_discovery_reg_base_init(adev);
- if (r == 0)
- break;
- DRM_WARN("failed to init reg base from ip discovery table, "
- "fallback to legacy init method\n");
- }
vega10_reg_base_init(adev);
break;
case CHIP_VEGA20:
@@ -661,6 +729,9 @@ static void soc15_reg_base_init(struct amdgpu_device *adev)
case CHIP_ARCTURUS:
arct_reg_base_init(adev);
break;
+ case CHIP_ALDEBARAN:
+ aldebaran_reg_base_init(adev);
+ break;
default:
DRM_ERROR("Unsupported asic type: %d!\n", adev->asic_type);
break;
@@ -670,169 +741,12 @@ static void soc15_reg_base_init(struct amdgpu_device *adev)
void soc15_set_virt_ops(struct amdgpu_device *adev)
{
adev->virt.ops = &xgpu_ai_virt_ops;
-
/* init soc15 reg base early enough so we can
* request request full access for sriov before
* set_ip_blocks. */
soc15_reg_base_init(adev);
}
-int soc15_set_ip_blocks(struct amdgpu_device *adev)
-{
- /* for bare metal case */
- if (!amdgpu_sriov_vf(adev))
- soc15_reg_base_init(adev);
-
- if (adev->asic_type == CHIP_VEGA20 || adev->asic_type == CHIP_ARCTURUS)
- adev->gmc.xgmi.supported = true;
-
- if (adev->flags & AMD_IS_APU) {
- adev->nbio.funcs = &nbio_v7_0_funcs;
- adev->nbio.hdp_flush_reg = &nbio_v7_0_hdp_flush_reg;
- } else if (adev->asic_type == CHIP_VEGA20 ||
- adev->asic_type == CHIP_ARCTURUS) {
- adev->nbio.funcs = &nbio_v7_4_funcs;
- adev->nbio.hdp_flush_reg = &nbio_v7_4_hdp_flush_reg;
- } else {
- adev->nbio.funcs = &nbio_v6_1_funcs;
- adev->nbio.hdp_flush_reg = &nbio_v6_1_hdp_flush_reg;
- }
- adev->hdp.funcs = &hdp_v4_0_funcs;
-
- if (adev->asic_type == CHIP_VEGA20 || adev->asic_type == CHIP_ARCTURUS)
- adev->df.funcs = &df_v3_6_funcs;
- else
- adev->df.funcs = &df_v1_7_funcs;
-
- if (adev->asic_type == CHIP_VEGA20 ||
- adev->asic_type == CHIP_ARCTURUS)
- adev->smuio.funcs = &smuio_v11_0_funcs;
- else
- adev->smuio.funcs = &smuio_v9_0_funcs;
-
- adev->rev_id = soc15_get_rev_id(adev);
-
- switch (adev->asic_type) {
- case CHIP_VEGA10:
- case CHIP_VEGA12:
- case CHIP_VEGA20:
- amdgpu_device_ip_block_add(adev, &vega10_common_ip_block);
- amdgpu_device_ip_block_add(adev, &gmc_v9_0_ip_block);
-
- /* For Vega10 SR-IOV, PSP need to be initialized before IH */
- if (amdgpu_sriov_vf(adev)) {
- if (likely(adev->firmware.load_type == AMDGPU_FW_LOAD_PSP)) {
- if (adev->asic_type == CHIP_VEGA20)
- amdgpu_device_ip_block_add(adev, &psp_v11_0_ip_block);
- else
- amdgpu_device_ip_block_add(adev, &psp_v3_1_ip_block);
- }
- if (adev->asic_type == CHIP_VEGA20)
- amdgpu_device_ip_block_add(adev, &vega20_ih_ip_block);
- else
- amdgpu_device_ip_block_add(adev, &vega10_ih_ip_block);
- } else {
- if (adev->asic_type == CHIP_VEGA20)
- amdgpu_device_ip_block_add(adev, &vega20_ih_ip_block);
- else
- amdgpu_device_ip_block_add(adev, &vega10_ih_ip_block);
- if (likely(adev->firmware.load_type == AMDGPU_FW_LOAD_PSP)) {
- if (adev->asic_type == CHIP_VEGA20)
- amdgpu_device_ip_block_add(adev, &psp_v11_0_ip_block);
- else
- amdgpu_device_ip_block_add(adev, &psp_v3_1_ip_block);
- }
- }
- amdgpu_device_ip_block_add(adev, &gfx_v9_0_ip_block);
- amdgpu_device_ip_block_add(adev, &sdma_v4_0_ip_block);
- if (is_support_sw_smu(adev)) {
- if (!amdgpu_sriov_vf(adev))
- amdgpu_device_ip_block_add(adev, &smu_v11_0_ip_block);
- } else {
- amdgpu_device_ip_block_add(adev, &pp_smu_ip_block);
- }
- if (adev->enable_virtual_display || amdgpu_sriov_vf(adev))
- amdgpu_device_ip_block_add(adev, &dce_virtual_ip_block);
-#if defined(CONFIG_DRM_AMD_DC)
- else if (amdgpu_device_has_dc_support(adev))
- amdgpu_device_ip_block_add(adev, &dm_ip_block);
-#endif
- if (!(adev->asic_type == CHIP_VEGA20 && amdgpu_sriov_vf(adev))) {
- amdgpu_device_ip_block_add(adev, &uvd_v7_0_ip_block);
- amdgpu_device_ip_block_add(adev, &vce_v4_0_ip_block);
- }
- break;
- case CHIP_RAVEN:
- amdgpu_device_ip_block_add(adev, &vega10_common_ip_block);
- amdgpu_device_ip_block_add(adev, &gmc_v9_0_ip_block);
- amdgpu_device_ip_block_add(adev, &vega10_ih_ip_block);
- if (likely(adev->firmware.load_type == AMDGPU_FW_LOAD_PSP))
- amdgpu_device_ip_block_add(adev, &psp_v10_0_ip_block);
- amdgpu_device_ip_block_add(adev, &gfx_v9_0_ip_block);
- amdgpu_device_ip_block_add(adev, &sdma_v4_0_ip_block);
- amdgpu_device_ip_block_add(adev, &pp_smu_ip_block);
- if (adev->enable_virtual_display || amdgpu_sriov_vf(adev))
- amdgpu_device_ip_block_add(adev, &dce_virtual_ip_block);
-#if defined(CONFIG_DRM_AMD_DC)
- else if (amdgpu_device_has_dc_support(adev))
- amdgpu_device_ip_block_add(adev, &dm_ip_block);
-#endif
- amdgpu_device_ip_block_add(adev, &vcn_v1_0_ip_block);
- break;
- case CHIP_ARCTURUS:
- amdgpu_device_ip_block_add(adev, &vega10_common_ip_block);
- amdgpu_device_ip_block_add(adev, &gmc_v9_0_ip_block);
-
- if (amdgpu_sriov_vf(adev)) {
- if (likely(adev->firmware.load_type == AMDGPU_FW_LOAD_PSP))
- amdgpu_device_ip_block_add(adev, &psp_v11_0_ip_block);
- amdgpu_device_ip_block_add(adev, &vega20_ih_ip_block);
- } else {
- amdgpu_device_ip_block_add(adev, &vega20_ih_ip_block);
- if (likely(adev->firmware.load_type == AMDGPU_FW_LOAD_PSP))
- amdgpu_device_ip_block_add(adev, &psp_v11_0_ip_block);
- }
-
- if (adev->enable_virtual_display || amdgpu_sriov_vf(adev))
- amdgpu_device_ip_block_add(adev, &dce_virtual_ip_block);
- amdgpu_device_ip_block_add(adev, &gfx_v9_0_ip_block);
- amdgpu_device_ip_block_add(adev, &sdma_v4_0_ip_block);
- amdgpu_device_ip_block_add(adev, &smu_v11_0_ip_block);
-
- if (amdgpu_sriov_vf(adev)) {
- if (likely(adev->firmware.load_type == AMDGPU_FW_LOAD_PSP))
- amdgpu_device_ip_block_add(adev, &vcn_v2_5_ip_block);
- } else {
- amdgpu_device_ip_block_add(adev, &vcn_v2_5_ip_block);
- }
- if (!amdgpu_sriov_vf(adev))
- amdgpu_device_ip_block_add(adev, &jpeg_v2_5_ip_block);
- break;
- case CHIP_RENOIR:
- amdgpu_device_ip_block_add(adev, &vega10_common_ip_block);
- amdgpu_device_ip_block_add(adev, &gmc_v9_0_ip_block);
- amdgpu_device_ip_block_add(adev, &vega10_ih_ip_block);
- if (likely(adev->firmware.load_type == AMDGPU_FW_LOAD_PSP))
- amdgpu_device_ip_block_add(adev, &psp_v12_0_ip_block);
- amdgpu_device_ip_block_add(adev, &smu_v12_0_ip_block);
- amdgpu_device_ip_block_add(adev, &gfx_v9_0_ip_block);
- amdgpu_device_ip_block_add(adev, &sdma_v4_0_ip_block);
- if (adev->enable_virtual_display || amdgpu_sriov_vf(adev))
- amdgpu_device_ip_block_add(adev, &dce_virtual_ip_block);
-#if defined(CONFIG_DRM_AMD_DC)
- else if (amdgpu_device_has_dc_support(adev))
- amdgpu_device_ip_block_add(adev, &dm_ip_block);
-#endif
- amdgpu_device_ip_block_add(adev, &vcn_v2_0_ip_block);
- amdgpu_device_ip_block_add(adev, &jpeg_v2_0_ip_block);
- break;
- default:
- return -EINVAL;
- }
-
- return 0;
-}
-
static bool soc15_need_full_reset(struct amdgpu_device *adev)
{
/* change this when we implement soft reset */
@@ -939,6 +853,10 @@ static bool soc15_need_reset_on_init(struct amdgpu_device *adev)
{
u32 sol_reg;
+ if (amdgpu_gmc_need_reset_on_init(adev))
+ return true;
+ if (amdgpu_psp_tos_reload_needed(adev))
+ return true;
/* Just return false for soc15 GPUs. Reset does not seem to
* be necessary.
*/
@@ -978,11 +896,10 @@ static void soc15_pre_asic_init(struct amdgpu_device *adev)
static const struct amdgpu_asic_funcs soc15_asic_funcs =
{
.read_disabled_bios = &soc15_read_disabled_bios,
- .read_bios_from_rom = &soc15_read_bios_from_rom,
+ .read_bios_from_rom = &amdgpu_soc15_read_bios_from_rom,
.read_register = &soc15_read_register,
.reset = &soc15_asic_reset,
.reset_method = &soc15_asic_reset_method,
- .set_vga_state = &soc15_vga_set_state,
.get_xclk = &soc15_get_xclk,
.set_uvd_clocks = &soc15_set_uvd_clocks,
.set_vce_clocks = &soc15_set_vce_clocks,
@@ -994,16 +911,16 @@ static const struct amdgpu_asic_funcs soc15_asic_funcs =
.get_pcie_replay_count = &soc15_get_pcie_replay_count,
.supports_baco = &soc15_supports_baco,
.pre_asic_init = &soc15_pre_asic_init,
+ .query_video_codecs = &soc15_query_video_codecs,
};
static const struct amdgpu_asic_funcs vega20_asic_funcs =
{
.read_disabled_bios = &soc15_read_disabled_bios,
- .read_bios_from_rom = &soc15_read_bios_from_rom,
+ .read_bios_from_rom = &amdgpu_soc15_read_bios_from_rom,
.read_register = &soc15_read_register,
.reset = &soc15_asic_reset,
.reset_method = &soc15_asic_reset_method,
- .set_vga_state = &soc15_vga_set_state,
.get_xclk = &soc15_get_xclk,
.set_uvd_clocks = &soc15_set_uvd_clocks,
.set_vce_clocks = &soc15_set_vce_clocks,
@@ -1015,21 +932,46 @@ static const struct amdgpu_asic_funcs vega20_asic_funcs =
.get_pcie_replay_count = &soc15_get_pcie_replay_count,
.supports_baco = &soc15_supports_baco,
.pre_asic_init = &soc15_pre_asic_init,
+ .query_video_codecs = &soc15_query_video_codecs,
};
-static int soc15_common_early_init(void *handle)
+static const struct amdgpu_asic_funcs aqua_vanjaram_asic_funcs =
{
-#define MMIO_REG_HOLE_OFFSET (0x80000 - PAGE_SIZE)
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ .read_disabled_bios = &soc15_read_disabled_bios,
+ .read_bios_from_rom = &amdgpu_soc15_read_bios_from_rom,
+ .read_register = &soc15_read_register,
+ .reset = &soc15_asic_reset,
+ .reset_method = &soc15_asic_reset_method,
+ .get_xclk = &soc15_get_xclk,
+ .set_uvd_clocks = &soc15_set_uvd_clocks,
+ .set_vce_clocks = &soc15_set_vce_clocks,
+ .get_config_memsize = &soc15_get_config_memsize,
+ .need_full_reset = &soc15_need_full_reset,
+ .init_doorbell_index = &aqua_vanjaram_doorbell_index_init,
+ .need_reset_on_init = &soc15_need_reset_on_init,
+ .get_pcie_replay_count = &amdgpu_nbio_get_pcie_replay_count,
+ .supports_baco = &soc15_supports_baco,
+ .pre_asic_init = &soc15_pre_asic_init,
+ .query_video_codecs = &soc15_query_video_codecs,
+ .encode_ext_smn_addressing = &aqua_vanjaram_encode_ext_smn_addressing,
+ .get_reg_state = &aqua_vanjaram_get_reg_state,
+};
+
+static int soc15_common_early_init(struct amdgpu_ip_block *ip_block)
+{
+ struct amdgpu_device *adev = ip_block->adev;
- adev->rmmio_remap.reg_offset = MMIO_REG_HOLE_OFFSET;
- adev->rmmio_remap.bus_addr = adev->rmmio_base + MMIO_REG_HOLE_OFFSET;
+ adev->nbio.funcs->set_reg_remap(adev);
adev->smc_rreg = NULL;
adev->smc_wreg = NULL;
- adev->pcie_rreg = &soc15_pcie_rreg;
- adev->pcie_wreg = &soc15_pcie_wreg;
- adev->pcie_rreg64 = &soc15_pcie_rreg64;
- adev->pcie_wreg64 = &soc15_pcie_wreg64;
+ adev->pcie_rreg = &amdgpu_device_indirect_rreg;
+ adev->pcie_wreg = &amdgpu_device_indirect_wreg;
+ adev->pcie_rreg_ext = &amdgpu_device_indirect_rreg_ext;
+ adev->pcie_wreg_ext = &amdgpu_device_indirect_wreg_ext;
+ adev->pcie_rreg64 = &amdgpu_device_indirect_rreg64;
+ adev->pcie_wreg64 = &amdgpu_device_indirect_wreg64;
+ adev->pcie_rreg64_ext = &amdgpu_device_indirect_rreg64_ext;
+ adev->pcie_wreg64_ext = &amdgpu_device_indirect_wreg64_ext;
adev->uvd_ctx_rreg = &soc15_uvd_ctx_rreg;
adev->uvd_ctx_wreg = &soc15_uvd_ctx_wreg;
adev->didt_rreg = &soc15_didt_rreg;
@@ -1039,10 +981,13 @@ static int soc15_common_early_init(void *handle)
adev->se_cac_rreg = &soc15_se_cac_rreg;
adev->se_cac_wreg = &soc15_se_cac_wreg;
-
+ adev->rev_id = amdgpu_device_get_rev_id(adev);
adev->external_rev_id = 0xFF;
- switch (adev->asic_type) {
- case CHIP_VEGA10:
+ /* TODO: split the GC and PG flags based on the relevant IP version for which
+ * they are relevant.
+ */
+ switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
+ case IP_VERSION(9, 0, 1):
adev->asic_funcs = &soc15_asic_funcs;
adev->cg_flags = AMD_CG_SUPPORT_GFX_MGCG |
AMD_CG_SUPPORT_GFX_MGLS |
@@ -1066,7 +1011,7 @@ static int soc15_common_early_init(void *handle)
adev->pg_flags = 0;
adev->external_rev_id = 0x1;
break;
- case CHIP_VEGA12:
+ case IP_VERSION(9, 2, 1):
adev->asic_funcs = &soc15_asic_funcs;
adev->cg_flags = AMD_CG_SUPPORT_GFX_MGCG |
AMD_CG_SUPPORT_GFX_MGLS |
@@ -1089,7 +1034,7 @@ static int soc15_common_early_init(void *handle)
adev->pg_flags = 0;
adev->external_rev_id = adev->rev_id + 0x14;
break;
- case CHIP_VEGA20:
+ case IP_VERSION(9, 4, 0):
adev->asic_funcs = &vega20_asic_funcs;
adev->cg_flags = AMD_CG_SUPPORT_GFX_MGCG |
AMD_CG_SUPPORT_GFX_MGLS |
@@ -1112,12 +1057,10 @@ static int soc15_common_early_init(void *handle)
adev->pg_flags = 0;
adev->external_rev_id = adev->rev_id + 0x28;
break;
- case CHIP_RAVEN:
+ case IP_VERSION(9, 1, 0):
+ case IP_VERSION(9, 2, 2):
adev->asic_funcs = &soc15_asic_funcs;
- if (adev->pdev->device == 0x15dd)
- adev->apu_flags |= AMD_APU_IS_RAVEN;
- if (adev->pdev->device == 0x15d8)
- adev->apu_flags |= AMD_APU_IS_PICASSO;
+
if (adev->rev_id >= 0x8)
adev->apu_flags |= AMD_APU_IS_RAVEN2;
@@ -1151,7 +1094,6 @@ static int soc15_common_early_init(void *handle)
adev->cg_flags = AMD_CG_SUPPORT_GFX_MGCG |
AMD_CG_SUPPORT_GFX_MGLS |
AMD_CG_SUPPORT_GFX_CP_LS |
- AMD_CG_SUPPORT_GFX_3D_CGCG |
AMD_CG_SUPPORT_GFX_3D_CGLS |
AMD_CG_SUPPORT_GFX_CGCG |
AMD_CG_SUPPORT_GFX_CGLS |
@@ -1160,17 +1102,20 @@ static int soc15_common_early_init(void *handle)
AMD_CG_SUPPORT_MC_MGCG |
AMD_CG_SUPPORT_MC_LS |
AMD_CG_SUPPORT_SDMA_MGCG |
- AMD_CG_SUPPORT_SDMA_LS;
+ AMD_CG_SUPPORT_SDMA_LS |
+ AMD_CG_SUPPORT_VCN_MGCG;
+ /*
+ * MMHUB PG needs to be disabled for Picasso for
+ * stability reasons.
+ */
adev->pg_flags = AMD_PG_SUPPORT_SDMA |
- AMD_PG_SUPPORT_MMHUB |
AMD_PG_SUPPORT_VCN;
} else {
adev->cg_flags = AMD_CG_SUPPORT_GFX_MGCG |
AMD_CG_SUPPORT_GFX_MGLS |
AMD_CG_SUPPORT_GFX_RLC_LS |
AMD_CG_SUPPORT_GFX_CP_LS |
- AMD_CG_SUPPORT_GFX_3D_CGCG |
AMD_CG_SUPPORT_GFX_3D_CGLS |
AMD_CG_SUPPORT_GFX_CGCG |
AMD_CG_SUPPORT_GFX_CGLS |
@@ -1189,7 +1134,7 @@ static int soc15_common_early_init(void *handle)
adev->pg_flags = AMD_PG_SUPPORT_SDMA | AMD_PG_SUPPORT_VCN;
}
break;
- case CHIP_ARCTURUS:
+ case IP_VERSION(9, 4, 1):
adev->asic_funcs = &vega20_asic_funcs;
adev->cg_flags = AMD_CG_SUPPORT_GFX_MGCG |
AMD_CG_SUPPORT_GFX_MGLS |
@@ -1208,13 +1153,8 @@ static int soc15_common_early_init(void *handle)
adev->pg_flags = AMD_PG_SUPPORT_VCN | AMD_PG_SUPPORT_VCN_DPG;
adev->external_rev_id = adev->rev_id + 0x32;
break;
- case CHIP_RENOIR:
+ case IP_VERSION(9, 3, 0):
adev->asic_funcs = &soc15_asic_funcs;
- if ((adev->pdev->device == 0x1636) ||
- (adev->pdev->device == 0x164c))
- adev->apu_flags |= AMD_APU_IS_RENOIR;
- else
- adev->apu_flags |= AMD_APU_IS_GREEN_SARDINE;
if (adev->apu_flags & AMD_APU_IS_RENOIR)
adev->external_rev_id = adev->rev_id + 0x91;
@@ -1244,6 +1184,38 @@ static int soc15_common_early_init(void *handle)
AMD_PG_SUPPORT_JPEG |
AMD_PG_SUPPORT_VCN_DPG;
break;
+ case IP_VERSION(9, 4, 2):
+ adev->asic_funcs = &vega20_asic_funcs;
+ adev->cg_flags = AMD_CG_SUPPORT_GFX_MGCG |
+ AMD_CG_SUPPORT_GFX_MGLS |
+ AMD_CG_SUPPORT_GFX_CP_LS |
+ AMD_CG_SUPPORT_HDP_LS |
+ AMD_CG_SUPPORT_SDMA_MGCG |
+ AMD_CG_SUPPORT_SDMA_LS |
+ AMD_CG_SUPPORT_IH_CG |
+ AMD_CG_SUPPORT_VCN_MGCG | AMD_CG_SUPPORT_JPEG_MGCG;
+ adev->pg_flags = AMD_PG_SUPPORT_VCN_DPG;
+ adev->external_rev_id = adev->rev_id + 0x3c;
+ break;
+ case IP_VERSION(9, 4, 3):
+ case IP_VERSION(9, 4, 4):
+ case IP_VERSION(9, 5, 0):
+ adev->asic_funcs = &aqua_vanjaram_asic_funcs;
+ adev->cg_flags =
+ AMD_CG_SUPPORT_GFX_MGCG | AMD_CG_SUPPORT_GFX_CGCG |
+ AMD_CG_SUPPORT_GFX_CGLS | AMD_CG_SUPPORT_SDMA_MGCG |
+ AMD_CG_SUPPORT_GFX_FGCG | AMD_CG_SUPPORT_REPEATER_FGCG |
+ AMD_CG_SUPPORT_VCN_MGCG | AMD_CG_SUPPORT_JPEG_MGCG |
+ AMD_CG_SUPPORT_IH_CG;
+ adev->pg_flags =
+ AMD_PG_SUPPORT_VCN |
+ AMD_PG_SUPPORT_VCN_DPG |
+ AMD_PG_SUPPORT_JPEG;
+ /*TODO: need a new external_rev_id for GC 9.4.4? */
+ adev->external_rev_id = adev->rev_id + 0x46;
+ if (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 5, 0))
+ adev->external_rev_id = adev->rev_id + 0x50;
+ break;
default:
/* FIXME: not supported yet */
return -EINVAL;
@@ -1257,69 +1229,63 @@ static int soc15_common_early_init(void *handle)
return 0;
}
-static int soc15_common_late_init(void *handle)
+static int soc15_common_late_init(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
- int r = 0;
+ struct amdgpu_device *adev = ip_block->adev;
if (amdgpu_sriov_vf(adev))
xgpu_ai_mailbox_get_irq(adev);
- if (adev->hdp.funcs->reset_ras_error_count)
- adev->hdp.funcs->reset_ras_error_count(adev);
-
- if (adev->nbio.funcs->ras_late_init)
- r = adev->nbio.funcs->ras_late_init(adev);
+ /* Enable selfring doorbell aperture late because doorbell BAR
+ * aperture will change if resize BAR successfully in gmc sw_init.
+ */
+ adev->nbio.funcs->enable_doorbell_selfring_aperture(adev, true);
- return r;
+ return 0;
}
-static int soc15_common_sw_init(void *handle)
+static int soc15_common_sw_init(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
if (amdgpu_sriov_vf(adev))
xgpu_ai_mailbox_add_irq_id(adev);
- adev->df.funcs->sw_init(adev);
+ if (adev->df.funcs &&
+ adev->df.funcs->sw_init)
+ adev->df.funcs->sw_init(adev);
return 0;
}
-static int soc15_common_sw_fini(void *handle)
+static int soc15_common_sw_fini(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
- amdgpu_nbio_ras_fini(adev);
- adev->df.funcs->sw_fini(adev);
+ if (adev->df.funcs &&
+ adev->df.funcs->sw_fini)
+ adev->df.funcs->sw_fini(adev);
return 0;
}
-static void soc15_doorbell_range_init(struct amdgpu_device *adev)
+static void soc15_sdma_doorbell_range_init(struct amdgpu_device *adev)
{
int i;
- struct amdgpu_ring *ring;
- /* sdma/ih doorbell range are programed by hypervisor */
+ /* sdma doorbell range is programed by hypervisor */
if (!amdgpu_sriov_vf(adev)) {
for (i = 0; i < adev->sdma.num_instances; i++) {
- ring = &adev->sdma.instance[i].ring;
adev->nbio.funcs->sdma_doorbell_range(adev, i,
- ring->use_doorbell, ring->doorbell_index,
+ true, adev->doorbell_index.sdma_engine[i] << 1,
adev->doorbell_index.sdma_doorbell_range);
}
-
- adev->nbio.funcs->ih_doorbell_range(adev, adev->irq.ih.use_doorbell,
- adev->irq.ih.doorbell_index);
}
}
-static int soc15_common_hw_init(void *handle)
+static int soc15_common_hw_init(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
- /* enable pcie gen2/3 link */
- soc15_pcie_gen3_enable(adev);
/* enable aspm */
soc15_program_aspm(adev);
/* setup nbio registers */
@@ -1328,70 +1294,78 @@ static int soc15_common_hw_init(void *handle)
* for the purpose of expose those registers
* to process space
*/
- if (adev->nbio.funcs->remap_hdp_registers)
+ if (adev->nbio.funcs->remap_hdp_registers && !amdgpu_sriov_vf(adev))
adev->nbio.funcs->remap_hdp_registers(adev);
/* enable the doorbell aperture */
- soc15_enable_doorbell_aperture(adev, true);
+ adev->nbio.funcs->enable_doorbell_aperture(adev, true);
+
/* HW doorbell routing policy: doorbell writing not
* in SDMA/IH/MM/ACV range will be routed to CP. So
- * we need to init SDMA/IH/MM/ACV doorbell range prior
- * to CP ip block init and ring test.
+ * we need to init SDMA doorbell range prior
+ * to CP ip block init and ring test. IH already
+ * happens before CP.
*/
- soc15_doorbell_range_init(adev);
+ soc15_sdma_doorbell_range_init(adev);
return 0;
}
-static int soc15_common_hw_fini(void *handle)
+static int soc15_common_hw_fini(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
+
+ /* Disable the doorbell aperture and selfring doorbell aperture
+ * separately in hw_fini because soc15_enable_doorbell_aperture
+ * has been removed and there is no need to delay disabling
+ * selfring doorbell.
+ */
+ adev->nbio.funcs->enable_doorbell_aperture(adev, false);
+ adev->nbio.funcs->enable_doorbell_selfring_aperture(adev, false);
- /* disable the doorbell aperture */
- soc15_enable_doorbell_aperture(adev, false);
if (amdgpu_sriov_vf(adev))
xgpu_ai_mailbox_put_irq(adev);
- if (adev->nbio.ras_if &&
+ /*
+ * For minimal init, late_init is not called, hence RAS irqs are not
+ * enabled.
+ */
+ if ((!amdgpu_sriov_vf(adev)) &&
+ (adev->init_lvl->level != AMDGPU_INIT_LEVEL_MINIMAL_XGMI) &&
+ adev->nbio.ras_if &&
amdgpu_ras_is_supported(adev, adev->nbio.ras_if->block)) {
- if (adev->nbio.funcs->init_ras_controller_interrupt)
+ if (adev->nbio.ras &&
+ adev->nbio.ras->init_ras_controller_interrupt)
amdgpu_irq_put(adev, &adev->nbio.ras_controller_irq, 0);
- if (adev->nbio.funcs->init_ras_err_event_athub_interrupt)
+ if (adev->nbio.ras &&
+ adev->nbio.ras->init_ras_err_event_athub_interrupt)
amdgpu_irq_put(adev, &adev->nbio.ras_err_event_athub_irq, 0);
}
return 0;
}
-static int soc15_common_suspend(void *handle)
+static int soc15_common_suspend(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
-
- return soc15_common_hw_fini(adev);
+ return soc15_common_hw_fini(ip_block);
}
-static int soc15_common_resume(void *handle)
+static int soc15_common_resume(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
- return soc15_common_hw_init(adev);
+ if (soc15_need_reset_on_resume(adev)) {
+ dev_info(adev->dev, "S3 suspend abort case, let's reset ASIC.\n");
+ soc15_asic_reset(adev);
+ }
+ return soc15_common_hw_init(ip_block);
}
-static bool soc15_common_is_idle(void *handle)
+static bool soc15_common_is_idle(struct amdgpu_ip_block *ip_block)
{
return true;
}
-static int soc15_common_wait_for_idle(void *handle)
-{
- return 0;
-}
-
-static int soc15_common_soft_reset(void *handle)
-{
- return 0;
-}
-
static void soc15_update_drm_clock_gating(struct amdgpu_device *adev, bool enable)
{
uint32_t def, data;
@@ -1436,18 +1410,18 @@ static void soc15_update_drm_light_sleep(struct amdgpu_device *adev, bool enable
WREG32(SOC15_REG_OFFSET(MP0, 0, mmMP0_MISC_LIGHT_SLEEP_CTRL), data);
}
-static int soc15_common_set_clockgating_state(void *handle,
+static int soc15_common_set_clockgating_state(struct amdgpu_ip_block *ip_block,
enum amd_clockgating_state state)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
if (amdgpu_sriov_vf(adev))
return 0;
- switch (adev->asic_type) {
- case CHIP_VEGA10:
- case CHIP_VEGA12:
- case CHIP_VEGA20:
+ switch (amdgpu_ip_version(adev, NBIO_HWIP, 0)) {
+ case IP_VERSION(6, 1, 0):
+ case IP_VERSION(6, 2, 0):
+ case IP_VERSION(7, 4, 0):
adev->nbio.funcs->update_medium_grain_clock_gating(adev,
state == AMD_CG_STATE_GATE);
adev->nbio.funcs->update_medium_grain_light_sleep(adev,
@@ -1463,8 +1437,9 @@ static int soc15_common_set_clockgating_state(void *handle,
adev->df.funcs->update_medium_grain_clock_gating(adev,
state == AMD_CG_STATE_GATE);
break;
- case CHIP_RAVEN:
- case CHIP_RENOIR:
+ case IP_VERSION(7, 0, 0):
+ case IP_VERSION(7, 0, 1):
+ case IP_VERSION(2, 5, 0):
adev->nbio.funcs->update_medium_grain_clock_gating(adev,
state == AMD_CG_STATE_GATE);
adev->nbio.funcs->update_medium_grain_light_sleep(adev,
@@ -1476,7 +1451,8 @@ static int soc15_common_set_clockgating_state(void *handle,
soc15_update_drm_light_sleep(adev,
state == AMD_CG_STATE_GATE);
break;
- case CHIP_ARCTURUS:
+ case IP_VERSION(7, 4, 1):
+ case IP_VERSION(7, 4, 4):
adev->hdp.funcs->update_clock_gating(adev,
state == AMD_CG_STATE_GATE);
break;
@@ -1486,42 +1462,51 @@ static int soc15_common_set_clockgating_state(void *handle,
return 0;
}
-static void soc15_common_get_clockgating_state(void *handle, u32 *flags)
+static void soc15_common_get_clockgating_state(struct amdgpu_ip_block *ip_block, u64 *flags)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
int data;
if (amdgpu_sriov_vf(adev))
*flags = 0;
- adev->nbio.funcs->get_clockgating_state(adev, flags);
-
- adev->hdp.funcs->get_clock_gating_state(adev, flags);
-
- /* AMD_CG_SUPPORT_DRM_MGCG */
- data = RREG32(SOC15_REG_OFFSET(MP0, 0, mmMP0_MISC_CGTT_CTRL0));
- if (!(data & 0x01000000))
- *flags |= AMD_CG_SUPPORT_DRM_MGCG;
-
- /* AMD_CG_SUPPORT_DRM_LS */
- data = RREG32(SOC15_REG_OFFSET(MP0, 0, mmMP0_MISC_LIGHT_SLEEP_CTRL));
- if (data & 0x1)
- *flags |= AMD_CG_SUPPORT_DRM_LS;
+ if (adev->nbio.funcs && adev->nbio.funcs->get_clockgating_state)
+ adev->nbio.funcs->get_clockgating_state(adev, flags);
+
+ if (adev->hdp.funcs && adev->hdp.funcs->get_clock_gating_state)
+ adev->hdp.funcs->get_clock_gating_state(adev, flags);
+
+ if ((amdgpu_ip_version(adev, MP0_HWIP, 0) != IP_VERSION(13, 0, 2)) &&
+ (amdgpu_ip_version(adev, MP0_HWIP, 0) != IP_VERSION(13, 0, 6)) &&
+ (amdgpu_ip_version(adev, MP0_HWIP, 0) != IP_VERSION(13, 0, 12)) &&
+ (amdgpu_ip_version(adev, MP0_HWIP, 0) != IP_VERSION(13, 0, 14))) {
+ /* AMD_CG_SUPPORT_DRM_MGCG */
+ data = RREG32(SOC15_REG_OFFSET(MP0, 0, mmMP0_MISC_CGTT_CTRL0));
+ if (!(data & 0x01000000))
+ *flags |= AMD_CG_SUPPORT_DRM_MGCG;
+
+ /* AMD_CG_SUPPORT_DRM_LS */
+ data = RREG32(SOC15_REG_OFFSET(MP0, 0, mmMP0_MISC_LIGHT_SLEEP_CTRL));
+ if (data & 0x1)
+ *flags |= AMD_CG_SUPPORT_DRM_LS;
+ }
/* AMD_CG_SUPPORT_ROM_MGCG */
- adev->smuio.funcs->get_clock_gating_state(adev, flags);
+ if (adev->smuio.funcs && adev->smuio.funcs->get_clock_gating_state)
+ adev->smuio.funcs->get_clock_gating_state(adev, flags);
- adev->df.funcs->get_clockgating_state(adev, flags);
+ if (adev->df.funcs && adev->df.funcs->get_clockgating_state)
+ adev->df.funcs->get_clockgating_state(adev, flags);
}
-static int soc15_common_set_powergating_state(void *handle,
+static int soc15_common_set_powergating_state(struct amdgpu_ip_block *ip_block,
enum amd_powergating_state state)
{
/* todo */
return 0;
}
-const struct amd_ip_funcs soc15_common_ip_funcs = {
+static const struct amd_ip_funcs soc15_common_ip_funcs = {
.name = "soc15_common",
.early_init = soc15_common_early_init,
.late_init = soc15_common_late_init,
@@ -1532,8 +1517,6 @@ const struct amd_ip_funcs soc15_common_ip_funcs = {
.suspend = soc15_common_suspend,
.resume = soc15_common_resume,
.is_idle = soc15_common_is_idle,
- .wait_for_idle = soc15_common_wait_for_idle,
- .soft_reset = soc15_common_soft_reset,
.set_clockgating_state = soc15_common_set_clockgating_state,
.set_powergating_state = soc15_common_set_powergating_state,
.get_clockgating_state= soc15_common_get_clockgating_state,
diff --git a/drivers/gpu/drm/amd/amdgpu/soc15.h b/drivers/gpu/drm/amd/amdgpu/soc15.h
index 8f38f047265b..c8ac11a9cdef 100644
--- a/drivers/gpu/drm/amd/amdgpu/soc15.h
+++ b/drivers/gpu/drm/amd/amdgpu/soc15.h
@@ -27,12 +27,13 @@
#include "nbio_v6_1.h"
#include "nbio_v7_0.h"
#include "nbio_v7_4.h"
+#include "amdgpu_reg_state.h"
+
+extern const struct amdgpu_ip_block_version vega10_common_ip_block;
#define SOC15_FLUSH_GPU_TLB_NUM_WREG 6
#define SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT 3
-extern const struct amd_ip_funcs soc15_common_ip_funcs;
-
struct soc15_reg_golden {
u32 hwip;
u32 instance;
@@ -49,6 +50,13 @@ struct soc15_reg_rlcg {
u32 reg;
};
+struct soc15_reg {
+ uint32_t hwip;
+ uint32_t inst;
+ uint32_t seg;
+ uint32_t reg_offset;
+};
+
struct soc15_reg_entry {
uint32_t hwip;
uint32_t inst;
@@ -80,18 +88,27 @@ struct soc15_ras_field_entry {
};
#define SOC15_REG_ENTRY(ip, inst, reg) ip##_HWIP, inst, reg##_BASE_IDX, reg
+#define SOC15_REG_ENTRY_STR(ip, inst, reg) \
+ { ip##_HWIP, inst, reg##_BASE_IDX, reg, #reg }
#define SOC15_REG_ENTRY_OFFSET(entry) (adev->reg_offset[entry.hwip][entry.inst][entry.seg] + entry.reg_offset)
+/* Over ride the instance id */
+#define SOC15_REG_ENTRY_OFFSET_INST(entry, inst) \
+ (adev->reg_offset[entry.hwip][inst][entry.seg] + entry.reg_offset)
+
#define SOC15_REG_GOLDEN_VALUE(ip, inst, reg, and_mask, or_mask) \
{ ip##_HWIP, inst, reg##_BASE_IDX, reg, and_mask, or_mask }
#define SOC15_REG_FIELD(reg, field) reg##__##field##_MASK, reg##__##field##__SHIFT
+#define SOC15_REG_FIELD_VAL(val, mask, shift) (((val) & mask) >> shift)
+
+#define SOC15_RAS_REG_FIELD_VAL(val, entry, field) SOC15_REG_FIELD_VAL((val), (entry).field##_count_mask, (entry).field##_count_shift)
+
void soc15_grbm_select(struct amdgpu_device *adev,
- u32 me, u32 pipe, u32 queue, u32 vmid);
+ u32 me, u32 pipe, u32 queue, u32 vmid, int xcc_id);
void soc15_set_virt_ops(struct amdgpu_device *adev);
-int soc15_set_ip_blocks(struct amdgpu_device *adev);
void soc15_program_register_sequence(struct amdgpu_device *adev,
const struct soc15_reg_golden *registers,
@@ -100,7 +117,14 @@ void soc15_program_register_sequence(struct amdgpu_device *adev,
int vega10_reg_base_init(struct amdgpu_device *adev);
int vega20_reg_base_init(struct amdgpu_device *adev);
int arct_reg_base_init(struct amdgpu_device *adev);
+int aldebaran_reg_base_init(struct amdgpu_device *adev);
+u64 aqua_vanjaram_encode_ext_smn_addressing(int ext_id);
+int aqua_vanjaram_init_soc_config(struct amdgpu_device *adev);
+ssize_t aqua_vanjaram_get_reg_state(struct amdgpu_device *adev,
+ enum amdgpu_reg_state reg_state, void *buf,
+ size_t max_size);
void vega10_doorbell_index_init(struct amdgpu_device *adev);
void vega20_doorbell_index_init(struct amdgpu_device *adev);
+void aqua_vanjaram_doorbell_index_init(struct amdgpu_device *adev);
#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/soc15_common.h b/drivers/gpu/drm/amd/amdgpu/soc15_common.h
index a5c00ab8b021..242b24f73c17 100644
--- a/drivers/gpu/drm/amd/amdgpu/soc15_common.h
+++ b/drivers/gpu/drm/amd/amdgpu/soc15_common.h
@@ -24,66 +24,100 @@
#ifndef __SOC15_COMMON_H__
#define __SOC15_COMMON_H__
+/* GET_INST returns the physical instance corresponding to a logical instance */
+#define GET_INST(ip, inst) \
+ (adev->ip_map.logical_to_dev_inst ? \
+ adev->ip_map.logical_to_dev_inst(adev, ip##_HWIP, inst) : inst)
+#define GET_MASK(ip, mask) \
+ (adev->ip_map.logical_to_dev_mask ? \
+ adev->ip_map.logical_to_dev_mask(adev, ip##_HWIP, mask) : mask)
+
/* Register Access Macros */
#define SOC15_REG_OFFSET(ip, inst, reg) (adev->reg_offset[ip##_HWIP][inst][reg##_BASE_IDX] + reg)
+#define SOC15_REG_OFFSET1(ip, inst, reg, offset) \
+ (adev->reg_offset[ip##_HWIP][inst][reg##_BASE_IDX] + (reg)+(offset))
+
+#define __WREG32_SOC15_RLC__(reg, value, flag, hwip, inst) \
+ ((amdgpu_sriov_vf(adev) && adev->gfx.rlc.funcs && adev->gfx.rlc.rlcg_reg_access_supported) ? \
+ amdgpu_sriov_wreg(adev, reg, value, flag, hwip, inst) : \
+ WREG32(reg, value))
+
+#define __RREG32_SOC15_RLC__(reg, flag, hwip, inst) \
+ ((amdgpu_sriov_vf(adev) && adev->gfx.rlc.funcs && adev->gfx.rlc.rlcg_reg_access_supported) ? \
+ amdgpu_sriov_rreg(adev, reg, flag, hwip, inst) : \
+ RREG32(reg))
#define WREG32_FIELD15(ip, idx, reg, field, val) \
- WREG32(adev->reg_offset[ip##_HWIP][idx][mm##reg##_BASE_IDX] + mm##reg, \
- (RREG32(adev->reg_offset[ip##_HWIP][idx][mm##reg##_BASE_IDX] + mm##reg) \
- & ~REG_FIELD_MASK(reg, field)) | (val) << REG_FIELD_SHIFT(reg, field))
+ __WREG32_SOC15_RLC__(adev->reg_offset[ip##_HWIP][idx][mm##reg##_BASE_IDX] + mm##reg, \
+ (__RREG32_SOC15_RLC__( \
+ adev->reg_offset[ip##_HWIP][idx][mm##reg##_BASE_IDX] + mm##reg, \
+ 0, ip##_HWIP, idx) & \
+ ~REG_FIELD_MASK(reg, field)) | (val) << REG_FIELD_SHIFT(reg, field), \
+ 0, ip##_HWIP, idx)
+
+#define WREG32_FIELD15_PREREG(ip, idx, reg_name, field, val) \
+ __WREG32_SOC15_RLC__(adev->reg_offset[ip##_HWIP][idx][reg##reg_name##_BASE_IDX] + reg##reg_name, \
+ (__RREG32_SOC15_RLC__( \
+ adev->reg_offset[ip##_HWIP][idx][reg##reg_name##_BASE_IDX] + reg##reg_name, \
+ 0, ip##_HWIP, idx) & \
+ ~REG_FIELD_MASK(reg_name, field)) | (val) << REG_FIELD_SHIFT(reg_name, field), \
+ 0, ip##_HWIP, idx)
#define RREG32_SOC15(ip, inst, reg) \
- RREG32(adev->reg_offset[ip##_HWIP][inst][reg##_BASE_IDX] + reg)
+ __RREG32_SOC15_RLC__(adev->reg_offset[ip##_HWIP][inst][reg##_BASE_IDX] + reg, \
+ 0, ip##_HWIP, inst)
+
+#define RREG32_SOC15_IP(ip, reg) __RREG32_SOC15_RLC__(reg, 0, ip##_HWIP, 0)
+
+#define RREG32_SOC15_IP_NO_KIQ(ip, reg, inst) __RREG32_SOC15_RLC__(reg, AMDGPU_REGS_NO_KIQ, ip##_HWIP, inst)
#define RREG32_SOC15_NO_KIQ(ip, inst, reg) \
- RREG32_NO_KIQ(adev->reg_offset[ip##_HWIP][inst][reg##_BASE_IDX] + reg)
+ __RREG32_SOC15_RLC__(adev->reg_offset[ip##_HWIP][inst][reg##_BASE_IDX] + reg, \
+ AMDGPU_REGS_NO_KIQ, ip##_HWIP, inst)
#define RREG32_SOC15_OFFSET(ip, inst, reg, offset) \
- RREG32((adev->reg_offset[ip##_HWIP][inst][reg##_BASE_IDX] + reg) + offset)
+ __RREG32_SOC15_RLC__((adev->reg_offset[ip##_HWIP][inst][reg##_BASE_IDX] + (reg)) + \
+ (offset), 0, ip##_HWIP, inst)
#define WREG32_SOC15(ip, inst, reg, value) \
- WREG32((adev->reg_offset[ip##_HWIP][inst][reg##_BASE_IDX] + reg), value)
+ __WREG32_SOC15_RLC__((adev->reg_offset[ip##_HWIP][inst][reg##_BASE_IDX] + reg), \
+ value, 0, ip##_HWIP, inst)
+
+#define WREG32_SOC15_IP(ip, reg, value) \
+ __WREG32_SOC15_RLC__(reg, value, 0, ip##_HWIP, 0)
+
+#define WREG32_SOC15_IP_NO_KIQ(ip, reg, value, inst) \
+ __WREG32_SOC15_RLC__(reg, value, AMDGPU_REGS_NO_KIQ, ip##_HWIP, inst)
#define WREG32_SOC15_NO_KIQ(ip, inst, reg, value) \
- WREG32_NO_KIQ((adev->reg_offset[ip##_HWIP][inst][reg##_BASE_IDX] + reg), value)
+ __WREG32_SOC15_RLC__(adev->reg_offset[ip##_HWIP][inst][reg##_BASE_IDX] + reg, \
+ value, AMDGPU_REGS_NO_KIQ, ip##_HWIP, inst)
#define WREG32_SOC15_OFFSET(ip, inst, reg, offset, value) \
- WREG32((adev->reg_offset[ip##_HWIP][inst][reg##_BASE_IDX] + reg) + offset, value)
+ __WREG32_SOC15_RLC__((adev->reg_offset[ip##_HWIP][inst][reg##_BASE_IDX] + reg) + offset, \
+ value, 0, ip##_HWIP, inst)
-#define SOC15_WAIT_ON_RREG(ip, inst, reg, expected_value, mask) \
-({ int ret = 0; \
- do { \
- uint32_t old_ = 0; \
- uint32_t tmp_ = RREG32(adev->reg_offset[ip##_HWIP][inst][reg##_BASE_IDX] + reg); \
- uint32_t loop = adev->usec_timeout; \
- ret = 0; \
- while ((tmp_ & (mask)) != (expected_value)) { \
- if (old_ != tmp_) { \
- loop = adev->usec_timeout; \
- old_ = tmp_; \
- } else \
- udelay(1); \
- tmp_ = RREG32(adev->reg_offset[ip##_HWIP][inst][reg##_BASE_IDX] + reg); \
- loop--; \
- if (!loop) { \
- DRM_WARN("Register(%d) [%s] failed to reach value 0x%08x != 0x%08x\n", \
- inst, #reg, (unsigned)expected_value, (unsigned)(tmp_ & (mask))); \
- ret = -ETIMEDOUT; \
- break; \
- } \
- } \
- } while (0); \
- ret; \
-})
+#define SOC15_WAIT_ON_RREG(ip, inst, reg, expected_value, mask) \
+ amdgpu_device_wait_on_rreg(adev, inst, \
+ (adev->reg_offset[ip##_HWIP][inst][reg##_BASE_IDX] + (reg)), \
+ #reg, expected_value, mask)
+
+#define SOC15_WAIT_ON_RREG_OFFSET(ip, inst, reg, offset, expected_value, mask) \
+ amdgpu_device_wait_on_rreg(adev, inst, \
+ (adev->reg_offset[ip##_HWIP][inst][reg##_BASE_IDX] + (reg) + (offset)), \
+ #reg, expected_value, mask)
#define WREG32_RLC(reg, value) \
+ __WREG32_SOC15_RLC__(reg, value, AMDGPU_REGS_RLC, GC_HWIP, 0)
+
+#define WREG32_RLC_EX(prefix, reg, value, inst) \
do { \
if (amdgpu_sriov_fullaccess(adev)) { \
uint32_t i = 0; \
uint32_t retries = 50000; \
- uint32_t r0 = adev->reg_offset[GC_HWIP][0][mmSCRATCH_REG0_BASE_IDX] + mmSCRATCH_REG0; \
- uint32_t r1 = adev->reg_offset[GC_HWIP][0][mmSCRATCH_REG1_BASE_IDX] + mmSCRATCH_REG1; \
- uint32_t spare_int = adev->reg_offset[GC_HWIP][0][mmRLC_SPARE_INT_BASE_IDX] + mmRLC_SPARE_INT; \
+ uint32_t r0 = adev->reg_offset[GC_HWIP][inst][prefix##SCRATCH_REG0_BASE_IDX] + prefix##SCRATCH_REG0; \
+ uint32_t r1 = adev->reg_offset[GC_HWIP][inst][prefix##SCRATCH_REG1_BASE_IDX] + prefix##SCRATCH_REG1; \
+ uint32_t spare_int = adev->reg_offset[GC_HWIP][inst][prefix##RLC_SPARE_INT_BASE_IDX] + prefix##RLC_SPARE_INT; \
WREG32(r0, value); \
WREG32(r1, (reg | 0x80000000)); \
WREG32(spare_int, 0x1); \
@@ -100,14 +134,28 @@
} \
} while (0)
+/* shadow the registers in the callback function */
#define WREG32_SOC15_RLC_SHADOW(ip, inst, reg, value) \
+ __WREG32_SOC15_RLC__((adev->reg_offset[ip##_HWIP][inst][reg##_BASE_IDX] + reg), value, AMDGPU_REGS_RLC, GC_HWIP, inst)
+
+/* for GC only */
+#define RREG32_RLC(reg) \
+ __RREG32_SOC15_RLC__(reg, AMDGPU_REGS_RLC, GC_HWIP, 0)
+
+#define WREG32_RLC_NO_KIQ(reg, value, hwip) \
+ __WREG32_SOC15_RLC__(reg, value, AMDGPU_REGS_NO_KIQ | AMDGPU_REGS_RLC, hwip, 0)
+
+#define RREG32_RLC_NO_KIQ(reg, hwip) \
+ __RREG32_SOC15_RLC__(reg, AMDGPU_REGS_NO_KIQ | AMDGPU_REGS_RLC, hwip, 0)
+
+#define WREG32_SOC15_RLC_SHADOW_EX(prefix, ip, inst, reg, value) \
do { \
uint32_t target_reg = adev->reg_offset[ip##_HWIP][inst][reg##_BASE_IDX] + reg;\
if (amdgpu_sriov_fullaccess(adev)) { \
- uint32_t r2 = adev->reg_offset[GC_HWIP][0][mmSCRATCH_REG1_BASE_IDX] + mmSCRATCH_REG2; \
- uint32_t r3 = adev->reg_offset[GC_HWIP][0][mmSCRATCH_REG1_BASE_IDX] + mmSCRATCH_REG3; \
- uint32_t grbm_cntl = adev->reg_offset[GC_HWIP][0][mmGRBM_GFX_CNTL_BASE_IDX] + mmGRBM_GFX_CNTL; \
- uint32_t grbm_idx = adev->reg_offset[GC_HWIP][0][mmGRBM_GFX_INDEX_BASE_IDX] + mmGRBM_GFX_INDEX; \
+ uint32_t r2 = adev->reg_offset[GC_HWIP][inst][prefix##SCRATCH_REG1_BASE_IDX] + prefix##SCRATCH_REG2; \
+ uint32_t r3 = adev->reg_offset[GC_HWIP][inst][prefix##SCRATCH_REG1_BASE_IDX] + prefix##SCRATCH_REG3; \
+ uint32_t grbm_cntl = adev->reg_offset[GC_HWIP][inst][prefix##GRBM_GFX_CNTL_BASE_IDX] + prefix##GRBM_GFX_CNTL; \
+ uint32_t grbm_idx = adev->reg_offset[GC_HWIP][inst][prefix##GRBM_GFX_INDEX_BASE_IDX] + prefix##GRBM_GFX_INDEX; \
if (target_reg == grbm_cntl) \
WREG32(r2, value); \
else if (target_reg == grbm_idx) \
@@ -118,18 +166,48 @@
} \
} while (0)
+#define RREG32_SOC15_RLC(ip, inst, reg) \
+ __RREG32_SOC15_RLC__(adev->reg_offset[ip##_HWIP][inst][reg##_BASE_IDX] + reg, AMDGPU_REGS_RLC, ip##_HWIP, inst)
+
#define WREG32_SOC15_RLC(ip, inst, reg, value) \
do { \
- uint32_t target_reg = adev->reg_offset[GC_HWIP][0][reg##_BASE_IDX] + reg;\
- WREG32_RLC(target_reg, value); \
+ uint32_t target_reg = adev->reg_offset[ip##_HWIP][inst][reg##_BASE_IDX] + reg;\
+ __WREG32_SOC15_RLC__(target_reg, value, AMDGPU_REGS_RLC, ip##_HWIP, inst); \
+ } while (0)
+
+#define WREG32_SOC15_RLC_EX(prefix, ip, inst, reg, value) \
+ do { \
+ uint32_t target_reg = adev->reg_offset[GC_HWIP][inst][reg##_BASE_IDX] + reg;\
+ WREG32_RLC_EX(prefix, target_reg, value, inst); \
} while (0)
#define WREG32_FIELD15_RLC(ip, idx, reg, field, val) \
- WREG32_RLC((adev->reg_offset[ip##_HWIP][idx][mm##reg##_BASE_IDX] + mm##reg), \
- (RREG32(adev->reg_offset[ip##_HWIP][idx][mm##reg##_BASE_IDX] + mm##reg) \
- & ~REG_FIELD_MASK(reg, field)) | (val) << REG_FIELD_SHIFT(reg, field))
+ __WREG32_SOC15_RLC__((adev->reg_offset[ip##_HWIP][idx][mm##reg##_BASE_IDX] + mm##reg), \
+ (__RREG32_SOC15_RLC__(adev->reg_offset[ip##_HWIP][idx][mm##reg##_BASE_IDX] + mm##reg, \
+ AMDGPU_REGS_RLC, ip##_HWIP, idx) & \
+ ~REG_FIELD_MASK(reg, field)) | (val) << REG_FIELD_SHIFT(reg, field), \
+ AMDGPU_REGS_RLC, ip##_HWIP, idx)
#define WREG32_SOC15_OFFSET_RLC(ip, inst, reg, offset, value) \
- WREG32_RLC(((adev->reg_offset[ip##_HWIP][inst][reg##_BASE_IDX] + reg) + offset), value)
+ __WREG32_SOC15_RLC__((adev->reg_offset[ip##_HWIP][inst][reg##_BASE_IDX] + reg) + offset, value, AMDGPU_REGS_RLC, ip##_HWIP, inst)
+
+#define RREG32_SOC15_OFFSET_RLC(ip, inst, reg, offset) \
+ __RREG32_SOC15_RLC__((adev->reg_offset[ip##_HWIP][inst][reg##_BASE_IDX] + reg) + offset, AMDGPU_REGS_RLC, ip##_HWIP, inst)
+
+/* inst equals to ext for some IPs */
+#define RREG32_SOC15_EXT(ip, inst, reg, ext) \
+ RREG32_PCIE_EXT((adev->reg_offset[ip##_HWIP][inst][reg##_BASE_IDX] + reg) * 4 \
+ + adev->asic_funcs->encode_ext_smn_addressing(ext)) \
+
+#define WREG32_SOC15_EXT(ip, inst, reg, ext, value) \
+ WREG32_PCIE_EXT((adev->reg_offset[ip##_HWIP][inst][reg##_BASE_IDX] + reg) * 4 \
+ + adev->asic_funcs->encode_ext_smn_addressing(ext), \
+ value) \
+
+#define RREG64_MCA(ext, mca_base, idx) \
+ RREG64_PCIE_EXT(adev->asic_funcs->encode_ext_smn_addressing(ext) + mca_base + (idx * 8))
+
+#define WREG64_MCA(ext, mca_base, idx, val) \
+ WREG64_PCIE_EXT(adev->asic_funcs->encode_ext_smn_addressing(ext) + mca_base + (idx * 8), val)
#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/soc15d.h b/drivers/gpu/drm/amd/amdgpu/soc15d.h
index 799925d22fc8..cf93fa477674 100644
--- a/drivers/gpu/drm/amd/amdgpu/soc15d.h
+++ b/drivers/gpu/drm/amd/amdgpu/soc15d.h
@@ -76,6 +76,12 @@
((cond & 0xF) << 24) | \
((type & 0xF) << 28))
+#define CP_PACKETJ_NOP 0x60000000
+#define CP_PACKETJ_GET_REG(x) ((x) & 0x3FFFF)
+#define CP_PACKETJ_GET_RES(x) (((x) >> 18) & 0x3F)
+#define CP_PACKETJ_GET_COND(x) (((x) >> 24) & 0xF)
+#define CP_PACKETJ_GET_TYPE(x) (((x) >> 28) & 0xF)
+
/* Packet 3 types */
#define PACKET3_NOP 0x10
#define PACKET3_SET_BASE 0x11
@@ -87,11 +93,25 @@
#define PACKET3_DISPATCH_INDIRECT 0x16
#define PACKET3_ATOMIC_GDS 0x1D
#define PACKET3_ATOMIC_MEM 0x1E
+#define PACKET3_ATOMIC_MEM__ATOMIC(x) ((((unsigned)(x)) & 0x3F) << 0)
+#define PACKET3_ATOMIC_MEM__COMMAND(x) ((((unsigned)(x)) & 0xF) << 8)
+#define PACKET3_ATOMIC_MEM__CACHE_POLICY(x) ((((unsigned)(x)) & 0x3) << 25)
+#define PACKET3_ATOMIC_MEM__ADDR_LO(x) (((unsigned)(x)) << 0)
+#define PACKET3_ATOMIC_MEM__ADDR_HI(x) (((unsigned)(x)) << 0)
+#define PACKET3_ATOMIC_MEM__SRC_DATA_LO(x) (((unsigned)(x)) << 0)
+#define PACKET3_ATOMIC_MEM__SRC_DATA_HI(x) (((unsigned)(x)) << 0)
+#define PACKET3_ATOMIC_MEM__CMP_DATA_LO(x) (((unsigned)(x)) << 0)
+#define PACKET3_ATOMIC_MEM__CMP_DATA_HI(x) (((unsigned)(x)) << 0)
+#define PACKET3_ATOMIC_MEM__LOOP_INTERVAL(x) ((((unsigned)(x)) & 0x1FFF) << 0)
+#define PACKET3_ATOMIC_MEM__COMMAND__SINGLE_PASS_ATOMIC 0
+#define PACKET3_ATOMIC_MEM__COMMAND__LOOP_UNTIL_COMPARE_SATISFIED 1
#define PACKET3_OCCLUSION_QUERY 0x1F
#define PACKET3_SET_PREDICATION 0x20
#define PACKET3_REG_RMW 0x21
#define PACKET3_COND_EXEC 0x22
#define PACKET3_PRED_EXEC 0x23
+#define PACKET3_PRED_EXEC__EXEC_COUNT(x) ((((unsigned)(x)) & 0x3FFF) << 0)
+#define PACKET3_PRED_EXEC__VIRTUAL_XCC_ID_SELECT(x) ((((unsigned)(x)) & 0xFF) << 24)
#define PACKET3_DRAW_INDIRECT 0x24
#define PACKET3_DRAW_INDEX_INDIRECT 0x25
#define PACKET3_INDEX_BASE 0x26
@@ -126,6 +146,28 @@
* 1 - pfp
* 2 - ce
*/
+#define PACKET3_WRITE_DATA__DST_SEL(x) ((((unsigned)(x)) & 0xF) << 8)
+#define PACKET3_WRITE_DATA__ADDR_INCR(x) ((((unsigned)(x)) & 0x1) << 16)
+#define PACKET3_WRITE_DATA__RESUME_VF_MI300(x) ((((unsigned)(x)) & 0x1) << 19)
+#define PACKET3_WRITE_DATA__WR_CONFIRM(x) ((((unsigned)(x)) & 0x1) << 20)
+#define PACKET3_WRITE_DATA__CACHE_POLICY(x) ((((unsigned)(x)) & 0x3) << 25)
+#define PACKET3_WRITE_DATA__DST_MMREG_ADDR(x) ((((unsigned)(x)) & 0x3FFFF) << 0)
+#define PACKET3_WRITE_DATA__DST_GDS_ADDR(x) ((((unsigned)(x)) & 0xFFFF) << 0)
+#define PACKET3_WRITE_DATA__DST_MEM_ADDR_LO(x) ((((unsigned)(x)) & 0x3FFFFFFF) << 2)
+#define PACKET3_WRITE_DATA__DST_MEM_ADDR_HI(x) ((unsigned)(x))
+#define PACKET3_WRITE_DATA__DST_SEL__MEM_MAPPED_REGISTER 0
+#define PACKET3_WRITE_DATA__DST_SEL__TC_L2 2
+#define PACKET3_WRITE_DATA__DST_SEL__GDS 3
+#define PACKET3_WRITE_DATA__DST_SEL__MEMORY 5
+#define PACKET3_WRITE_DATA__DST_SEL__MEMORY_MAPPED_ADC_PERSISTENT_STATE 6
+#define PACKET3_WRITE_DATA__ADDR_INCR__INCREMENT_ADDRESS 0
+#define PACKET3_WRITE_DATA__ADDR_INCR__DO_NOT_INCREMENT_ADDRESS 1
+#define PACKET3_WRITE_DATA__WR_CONFIRM__DO_NOT_WAIT_FOR_WRITE_CONFIRMATION 0
+#define PACKET3_WRITE_DATA__WR_CONFIRM__WAIT_FOR_WRITE_CONFIRMATION 1
+#define PACKET3_WRITE_DATA__CACHE_POLICY__LRU 0
+#define PACKET3_WRITE_DATA__CACHE_POLICY__STREAM 1
+#define PACKET3_WRITE_DATA__CACHE_POLICY__NOA 2
+#define PACKET3_WRITE_DATA__CACHE_POLICY__BYPASS 3
#define PACKET3_DRAW_INDEX_INDIRECT_MULTI 0x38
#define PACKET3_MEM_SEMAPHORE 0x39
# define PACKET3_SEM_USE_MAILBOX (0x1 << 16)
@@ -154,6 +196,33 @@
/* 0 - me
* 1 - pfp
*/
+#define PACKET3_WAIT_REG_MEM__FUNCTION(x) ((((unsigned)(x)) & 0x7) << 0)
+#define PACKET3_WAIT_REG_MEM__MEM_SPACE(x) ((((unsigned)(x)) & 0x3) << 4)
+#define PACKET3_WAIT_REG_MEM__OPERATION(x) ((((unsigned)(x)) & 0x3) << 6)
+#define PACKET3_WAIT_REG_MEM__MES_INTR_PIPE(x) ((((unsigned)(x)) & 0x3) << 22)
+#define PACKET3_WAIT_REG_MEM__MES_ACTION(x) ((((unsigned)(x)) & 0x1) << 24)
+#define PACKET3_WAIT_REG_MEM__CACHE_POLICY(x) ((((unsigned)(x)) & 0x3) << 25)
+#define PACKET3_WAIT_REG_MEM__MEM_POLL_ADDR_LO(x) ((((unsigned)(x)) & 0x3FFFFFFF) << 2)
+#define PACKET3_WAIT_REG_MEM__REG_POLL_ADDR(x) ((((unsigned)(x)) & 0x3FFFF) << 0)
+#define PACKET3_WAIT_REG_MEM__REG_WRITE_ADDR1(x) ((((unsigned)(x)) & 0x3FFFF) << 0)
+#define PACKET3_WAIT_REG_MEM__MEM_POLL_ADDR_HI(x) ((unsigned)(x))
+#define PACKET3_WAIT_REG_MEM__REG_WRITE_ADDR2(x) ((((unsigned)(x)) & 0x3FFFF) << 0)
+#define PACKET3_WAIT_REG_MEM__REFERENCE(x) ((unsigned)(x))
+#define PACKET3_WAIT_REG_MEM__MASK(x) ((unsigned)(x))
+#define PACKET3_WAIT_REG_MEM__POLL_INTERVAL(x) ((((unsigned)(x)) & 0xFFFF) << 0)
+#define PACKET3_WAIT_REG_MEM__OPTIMIZE_ACE_OFFLOAD_MODE(x) ((((unsigned)(x)) & 0x1) << 31)
+#define PACKET3_WAIT_REG_MEM__FUNCTION__ALWAYS_PASS 0
+#define PACKET3_WAIT_REG_MEM__FUNCTION__LESS_THAN_REF_VALUE 1
+#define PACKET3_WAIT_REG_MEM__FUNCTION__LESS_THAN_EQUAL_TO_THE_REF_VALUE 2
+#define PACKET3_WAIT_REG_MEM__FUNCTION__EQUAL_TO_THE_REFERENCE_VALUE 3
+#define PACKET3_WAIT_REG_MEM__FUNCTION__NOT_EQUAL_REFERENCE_VALUE 4
+#define PACKET3_WAIT_REG_MEM__FUNCTION__GREATER_THAN_OR_EQUAL_REFERENCE_VALUE 5
+#define PACKET3_WAIT_REG_MEM__FUNCTION__GREATER_THAN_REFERENCE_VALUE 6
+#define PACKET3_WAIT_REG_MEM__MEM_SPACE__REGISTER_SPACE 0
+#define PACKET3_WAIT_REG_MEM__MEM_SPACE__MEMORY_SPACE 1
+#define PACKET3_WAIT_REG_MEM__OPERATION__WAIT_REG_MEM 0
+#define PACKET3_WAIT_REG_MEM__OPERATION__WR_WAIT_WR_REG 1
+#define PACKET3_WAIT_REG_MEM__OPERATION__WAIT_MEM_PREEMPTABLE 3
#define PACKET3_INDIRECT_BUFFER 0x3F
#define INDIRECT_BUFFER_VALID (1 << 23)
#define INDIRECT_BUFFER_CACHE_POLICY(x) ((x) << 28)
@@ -162,7 +231,64 @@
* 2 - Bypass
*/
#define INDIRECT_BUFFER_PRE_ENB(x) ((x) << 21)
+#define INDIRECT_BUFFER_PRE_RESUME(x) ((x) << 30)
+#define PACKET3_INDIRECT_BUFFER__IB_BASE_LO(x) ((((unsigned)(x)) & 0x3FFFFFFF) << 2)
+#define PACKET3_INDIRECT_BUFFER__IB_BASE_HI(x) ((unsigned)(x))
+#define PACKET3_INDIRECT_BUFFER__IB_SIZE(x) ((((unsigned)(x)) & 0xFFFFF) << 0)
+#define PACKET3_INDIRECT_BUFFER__CHAIN(x) ((((unsigned)(x)) & 0x1) << 20)
+#define PACKET3_INDIRECT_BUFFER__OFFLOAD_POLLING(x) ((((unsigned)(x)) & 0x1) << 21)
+#define PACKET3_INDIRECT_BUFFER__VALID(x) ((((unsigned)(x)) & 0x1) << 23)
+#define PACKET3_INDIRECT_BUFFER__VMID(x) ((((unsigned)(x)) & 0xF) << 24)
+#define PACKET3_INDIRECT_BUFFER__CACHE_POLICY(x) ((((unsigned)(x)) & 0x3) << 28)
+#define PACKET3_INDIRECT_BUFFER__PRIV(x) ((((unsigned)(x)) & 0x1) << 31)
+#define PACKET3_INDIRECT_BUFFER__CACHE_POLICY__LRU 0
+#define PACKET3_INDIRECT_BUFFER__CACHE_POLICY__STREAM 1
#define PACKET3_COPY_DATA 0x40
+#define PACKET3_COPY_DATA__SRC_SEL(x) ((((unsigned)(x)) & 0xF) << 0)
+#define PACKET3_COPY_DATA__DST_SEL(x) ((((unsigned)(x)) & 0xF) << 8)
+#define PACKET3_COPY_DATA__SRC_CACHE_POLICY(x) ((((unsigned)(x)) & 0x3) << 13)
+#define PACKET3_COPY_DATA__COUNT_SEL(x) ((((unsigned)(x)) & 0x1) << 16)
+#define PACKET3_COPY_DATA__WR_CONFIRM(x) ((((unsigned)(x)) & 0x1) << 20)
+#define PACKET3_COPY_DATA__DST_CACHE_POLICY(x) ((((unsigned)(x)) & 0x3) << 25)
+#define PACKET3_COPY_DATA__PQ_EXE_STATUS(x) ((((unsigned)(x)) & 0x1) << 29)
+#define PACKET3_COPY_DATA__SRC_REG_OFFSET(x) ((((unsigned)(x)) & 0x3FFFF) << 0)
+#define PACKET3_COPY_DATA__SRC_32B_ADDR_LO(x) ((((unsigned)(x)) & 0x3FFFFFFF) << 2)
+#define PACKET3_COPY_DATA__SRC_64B_ADDR_LO(x) ((((unsigned)(x)) & 0x1FFFFFFF) << 3)
+#define PACKET3_COPY_DATA__SRC_GDS_ADDR_LO(x) ((((unsigned)(x)) & 0xFFFF) << 0)
+#define PACKET3_COPY_DATA__IMM_DATA(x) ((unsigned)(x))
+#define PACKET3_COPY_DATA__SRC_MEMTC_ADDR_HI(x) ((unsigned)(x))
+#define PACKET3_COPY_DATA__SRC_IMM_DATA(x) ((unsigned)(x))
+#define PACKET3_COPY_DATA__DST_REG_OFFSET(x) ((((unsigned)(x)) & 0x3FFFF) << 0)
+#define PACKET3_COPY_DATA__DST_32B_ADDR_LO(x) ((((unsigned)(x)) & 0x3FFFFFFF) << 2)
+#define PACKET3_COPY_DATA__DST_64B_ADDR_LO(x) ((((unsigned)(x)) & 0x1FFFFFFF) << 3)
+#define PACKET3_COPY_DATA__DST_GDS_ADDR_LO(x) ((((unsigned)(x)) & 0xFFFF) << 0)
+#define PACKET3_COPY_DATA__DST_ADDR_HI(x) ((unsigned)(x))
+#define PACKET3_COPY_DATA__SRC_SEL__MEM_MAPPED_REGISTER 0
+#define PACKET3_COPY_DATA__SRC_SEL__MEMORY 1
+#define PACKET3_COPY_DATA__SRC_SEL__TC_L2 2
+#define PACKET3_COPY_DATA__SRC_SEL__GDS 3
+#define PACKET3_COPY_DATA__SRC_SEL__PERFCOUNTERS 4
+#define PACKET3_COPY_DATA__SRC_SEL__IMMEDIATE_DATA 5
+#define PACKET3_COPY_DATA__SRC_SEL__ATOMIC_RETURN_DATA 6
+#define PACKET3_COPY_DATA__SRC_SEL__GDS_ATOMIC_RETURN_DATA0 7
+#define PACKET3_COPY_DATA__SRC_SEL__GDS_ATOMIC_RETURN_DATA1 8
+#define PACKET3_COPY_DATA__SRC_SEL__GPU_CLOCK_COUNT 9
+#define PACKET3_COPY_DATA__DST_SEL__MEM_MAPPED_REGISTER 0
+#define PACKET3_COPY_DATA__DST_SEL__TC_L2 2
+#define PACKET3_COPY_DATA__DST_SEL__GDS 3
+#define PACKET3_COPY_DATA__DST_SEL__PERFCOUNTERS 4
+#define PACKET3_COPY_DATA__DST_SEL__MEMORY 5
+#define PACKET3_COPY_DATA__DST_SEL__MEM_MAPPED_REG_DC 6
+#define PACKET3_COPY_DATA__SRC_CACHE_POLICY__LRU 0
+#define PACKET3_COPY_DATA__SRC_CACHE_POLICY__STREAM 1
+#define PACKET3_COPY_DATA__COUNT_SEL__32_BITS_OF_DATA 0
+#define PACKET3_COPY_DATA__COUNT_SEL__64_BITS_OF_DATA 1
+#define PACKET3_COPY_DATA__WR_CONFIRM__DO_NOT_WAIT_FOR_CONFIRMATION 0
+#define PACKET3_COPY_DATA__WR_CONFIRM__WAIT_FOR_CONFIRMATION 1
+#define PACKET3_COPY_DATA__DST_CACHE_POLICY__LRU 0
+#define PACKET3_COPY_DATA__DST_CACHE_POLICY__STREAM 1
+#define PACKET3_COPY_DATA__PQ_EXE_STATUS__DEFAULT 0
+#define PACKET3_COPY_DATA__PQ_EXE_STATUS__PHASE_UPDATE 1
#define PACKET3_PFP_SYNC_ME 0x42
#define PACKET3_COND_WRITE 0x45
#define PACKET3_EVENT_WRITE 0x46
@@ -174,6 +300,15 @@
* 3 - SAMPLE_STREAMOUTSTAT*
* 4 - *S_PARTIAL_FLUSH
*/
+#define PACKET3_EVENT_WRITE__EVENT_TYPE(x) ((((unsigned)(x)) & 0x3F) << 0)
+#define PACKET3_EVENT_WRITE__EVENT_INDEX(x) ((((unsigned)(x)) & 0xF) << 8)
+#define PACKET3_EVENT_WRITE__OFFLOAD_ENABLE(x) ((((unsigned)(x)) & 0x1) << 31)
+#define PACKET3_EVENT_WRITE__SAMP_PLST_CNTR_MODE(x) ((((unsigned)(x)) & 0x3) << 29)
+#define PACKET3_EVENT_WRITE__ADDRESS_LO(x) ((((unsigned)(x)) & 0x1FFFFFFF) << 3)
+#define PACKET3_EVENT_WRITE__ADDRESS_HI(x) (((unsigned)(x)) << 0)
+#define PACKET3_EVENT_WRITE__EVENT_INDEX__OTHER 0
+#define PACKET3_EVENT_WRITE__EVENT_INDEX__SAMPLE_PIPELINESTATS 2
+#define PACKET3_EVENT_WRITE__EVENT_INDEX__CS_PARTIAL_FLUSH 4
#define PACKET3_RELEASE_MEM 0x49
#define EVENT_TYPE(x) ((x) << 0)
#define EVENT_INDEX(x) ((x) << 8)
@@ -184,6 +319,7 @@
#define EOP_TC_ACTION_EN (1 << 17) /* L2 */
#define EOP_TC_NC_ACTION_EN (1 << 19)
#define EOP_TC_MD_ACTION_EN (1 << 21) /* L2 metadata */
+#define EOP_EXEC (1 << 28) /* For Trailing Fence */
#define DATA_SEL(x) ((x) << 29)
/* 0 - discard
@@ -278,6 +414,13 @@
#define PACKET3_ACQUIRE_MEM_CP_COHER_CNTL_SH_ICACHE_ACTION_ENA(x) ((x) << 29)
#define PACKET3_ACQUIRE_MEM_CP_COHER_CNTL_SH_KCACHE_WB_ACTION_ENA(x) ((x) << 30)
#define PACKET3_REWIND 0x59
+#define PACKET3_ACQUIRE_MEM__COHER_SIZE(x) ((unsigned)(x))
+#define PACKET3_ACQUIRE_MEM__COHER_SIZE_HI(x) ((((unsigned)(x)) & 0xFF) << 0)
+#define PACKET3_ACQUIRE_MEM__COHER_SIZE_HI_VG10(x) ((((unsigned)(x)) & 0xFFFFFF) << 0)
+#define PACKET3_ACQUIRE_MEM__COHER_BASE_LO(x) ((unsigned)(x))
+#define PACKET3_ACQUIRE_MEM__COHER_BASE_HI(x) ((((unsigned)(x)) & 0xFFFFFF) << 0)
+#define PACKET3_ACQUIRE_MEM__POLL_INTERVAL(x) ((((unsigned)(x)) & 0xFFFF) << 0)
+#define PACKET3_ACQUIRE_MEM__GCR_CNTL(x) ((((unsigned)(x)) & 0x7FF) << 0)
#define PACKET3_LOAD_UCONFIG_REG 0x5E
#define PACKET3_LOAD_SH_REG 0x5F
#define PACKET3_LOAD_CONFIG_REG 0x60
@@ -292,12 +435,16 @@
#define PACKET3_SET_SH_REG 0x76
#define PACKET3_SET_SH_REG_START 0x00002c00
#define PACKET3_SET_SH_REG_END 0x00003000
+#define PACKET3_SET_SH_REG__REG_OFFSET(x) ((((unsigned)(x)) & 0xFFFF) << 0)
+#define PACKET3_SET_SH_REG__VMID_SHIFT(x) ((((unsigned)(x)) & 0x1F) << 23)
+#define PACKET3_SET_SH_REG__INDEX(x) ((((unsigned)(x)) & 0xF) << 28)
#define PACKET3_SET_SH_REG_OFFSET 0x77
#define PACKET3_SET_QUEUE_REG 0x78
#define PACKET3_SET_UCONFIG_REG 0x79
#define PACKET3_SET_UCONFIG_REG_START 0x0000c000
#define PACKET3_SET_UCONFIG_REG_END 0x0000c400
#define PACKET3_SET_UCONFIG_REG_INDEX_TYPE (2 << 28)
+#define PACKET3_SET_UCONFIG_REG__REG_OFFSET(x) ((((unsigned)(x)) & 0xFFFF) << 0)
#define PACKET3_SCRATCH_RAM_WRITE 0x7D
#define PACKET3_SCRATCH_RAM_READ 0x7E
#define PACKET3_LOAD_CONST_RAM 0x80
@@ -405,6 +552,15 @@
# define PACKET3_QUERY_STATUS_DOORBELL_OFFSET(x) ((x) << 2)
# define PACKET3_QUERY_STATUS_ENG_SEL(x) ((x) << 25)
+#define PACKET3_RUN_CLEANER_SHADER_9_0 0xD7
+/* 1. header
+ * 2. RESERVED [31:0]
+ */
+
+#define PACKET3_RUN_CLEANER_SHADER 0xD2
+/* 1. header
+ * 2. RESERVED [31:0]
+ */
#define VCE_CMD_NO_OP 0x00000000
#define VCE_CMD_END 0x00000001
diff --git a/drivers/gpu/drm/amd/amdgpu/soc21.c b/drivers/gpu/drm/amd/amdgpu/soc21.c
new file mode 100644
index 000000000000..ad36c96478a8
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/soc21.c
@@ -0,0 +1,1021 @@
+/*
+ * Copyright 2021 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+#include <linux/firmware.h>
+#include <linux/slab.h>
+#include <linux/module.h>
+#include <linux/pci.h>
+
+#include "amdgpu.h"
+#include "amdgpu_atombios.h"
+#include "amdgpu_ih.h"
+#include "amdgpu_uvd.h"
+#include "amdgpu_vce.h"
+#include "amdgpu_ucode.h"
+#include "amdgpu_psp.h"
+#include "amdgpu_smu.h"
+#include "atom.h"
+#include "amd_pcie.h"
+
+#include "gc/gc_11_0_0_offset.h"
+#include "gc/gc_11_0_0_sh_mask.h"
+#include "mp/mp_13_0_0_offset.h"
+
+#include "soc15.h"
+#include "soc15_common.h"
+#include "soc21.h"
+#include "mxgpu_nv.h"
+
+static const struct amd_ip_funcs soc21_common_ip_funcs;
+
+/* SOC21 */
+static const struct amdgpu_video_codec_info vcn_4_0_0_video_codecs_encode_array_vcn0[] = {
+ {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4_AVC, 4096, 4096, 0)},
+ {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_HEVC, 8192, 4352, 0)},
+ {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_AV1, 8192, 4352, 0)},
+};
+
+static const struct amdgpu_video_codec_info vcn_4_0_0_video_codecs_encode_array_vcn1[] = {
+ {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4_AVC, 4096, 4096, 0)},
+ {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_HEVC, 8192, 4352, 0)},
+};
+
+static const struct amdgpu_video_codecs vcn_4_0_0_video_codecs_encode_vcn0 = {
+ .codec_count = ARRAY_SIZE(vcn_4_0_0_video_codecs_encode_array_vcn0),
+ .codec_array = vcn_4_0_0_video_codecs_encode_array_vcn0,
+};
+
+static const struct amdgpu_video_codecs vcn_4_0_0_video_codecs_encode_vcn1 = {
+ .codec_count = ARRAY_SIZE(vcn_4_0_0_video_codecs_encode_array_vcn1),
+ .codec_array = vcn_4_0_0_video_codecs_encode_array_vcn1,
+};
+
+static const struct amdgpu_video_codec_info vcn_4_0_0_video_codecs_decode_array_vcn0[] = {
+ {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4_AVC, 4096, 4096, 52)},
+ {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_HEVC, 8192, 4352, 186)},
+ {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_JPEG, 16384, 16384, 0)},
+ {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_VP9, 8192, 4352, 0)},
+ {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_AV1, 8192, 4352, 0)},
+};
+
+static const struct amdgpu_video_codec_info vcn_4_0_0_video_codecs_decode_array_vcn1[] = {
+ {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4_AVC, 4096, 4096, 52)},
+ {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_HEVC, 8192, 4352, 186)},
+ {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_JPEG, 16384, 16384, 0)},
+ {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_VP9, 8192, 4352, 0)},
+};
+
+static const struct amdgpu_video_codecs vcn_4_0_0_video_codecs_decode_vcn0 = {
+ .codec_count = ARRAY_SIZE(vcn_4_0_0_video_codecs_decode_array_vcn0),
+ .codec_array = vcn_4_0_0_video_codecs_decode_array_vcn0,
+};
+
+static const struct amdgpu_video_codecs vcn_4_0_0_video_codecs_decode_vcn1 = {
+ .codec_count = ARRAY_SIZE(vcn_4_0_0_video_codecs_decode_array_vcn1),
+ .codec_array = vcn_4_0_0_video_codecs_decode_array_vcn1,
+};
+
+/* SRIOV SOC21, not const since data is controlled by host */
+static struct amdgpu_video_codec_info sriov_vcn_4_0_0_video_codecs_encode_array_vcn0[] = {
+ {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4_AVC, 4096, 4096, 0)},
+ {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_HEVC, 8192, 4352, 0)},
+ {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_AV1, 8192, 4352, 0)},
+};
+
+static struct amdgpu_video_codec_info sriov_vcn_4_0_0_video_codecs_encode_array_vcn1[] = {
+ {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4_AVC, 4096, 4096, 0)},
+ {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_HEVC, 8192, 4352, 0)},
+};
+
+static struct amdgpu_video_codecs sriov_vcn_4_0_0_video_codecs_encode_vcn0 = {
+ .codec_count = ARRAY_SIZE(sriov_vcn_4_0_0_video_codecs_encode_array_vcn0),
+ .codec_array = sriov_vcn_4_0_0_video_codecs_encode_array_vcn0,
+};
+
+static struct amdgpu_video_codecs sriov_vcn_4_0_0_video_codecs_encode_vcn1 = {
+ .codec_count = ARRAY_SIZE(sriov_vcn_4_0_0_video_codecs_encode_array_vcn1),
+ .codec_array = sriov_vcn_4_0_0_video_codecs_encode_array_vcn1,
+};
+
+static struct amdgpu_video_codec_info sriov_vcn_4_0_0_video_codecs_decode_array_vcn0[] = {
+ {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4_AVC, 4096, 4096, 52)},
+ {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_HEVC, 8192, 4352, 186)},
+ {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_JPEG, 16384, 16384, 0)},
+ {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_VP9, 8192, 4352, 0)},
+ {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_AV1, 8192, 4352, 0)},
+};
+
+static struct amdgpu_video_codec_info sriov_vcn_4_0_0_video_codecs_decode_array_vcn1[] = {
+ {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4_AVC, 4096, 4096, 52)},
+ {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_HEVC, 8192, 4352, 186)},
+ {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_JPEG, 16384, 16384, 0)},
+ {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_VP9, 8192, 4352, 0)},
+};
+
+static struct amdgpu_video_codecs sriov_vcn_4_0_0_video_codecs_decode_vcn0 = {
+ .codec_count = ARRAY_SIZE(sriov_vcn_4_0_0_video_codecs_decode_array_vcn0),
+ .codec_array = sriov_vcn_4_0_0_video_codecs_decode_array_vcn0,
+};
+
+static struct amdgpu_video_codecs sriov_vcn_4_0_0_video_codecs_decode_vcn1 = {
+ .codec_count = ARRAY_SIZE(sriov_vcn_4_0_0_video_codecs_decode_array_vcn1),
+ .codec_array = sriov_vcn_4_0_0_video_codecs_decode_array_vcn1,
+};
+
+static int soc21_query_video_codecs(struct amdgpu_device *adev, bool encode,
+ const struct amdgpu_video_codecs **codecs)
+{
+ if (adev->vcn.num_vcn_inst == hweight8(adev->vcn.harvest_config))
+ return -EINVAL;
+
+ switch (amdgpu_ip_version(adev, UVD_HWIP, 0)) {
+ case IP_VERSION(4, 0, 0):
+ case IP_VERSION(4, 0, 2):
+ case IP_VERSION(4, 0, 4):
+ case IP_VERSION(4, 0, 5):
+ if (amdgpu_sriov_vf(adev)) {
+ if ((adev->vcn.harvest_config & AMDGPU_VCN_HARVEST_VCN0) ||
+ !amdgpu_sriov_is_av1_support(adev)) {
+ if (encode)
+ *codecs = &sriov_vcn_4_0_0_video_codecs_encode_vcn1;
+ else
+ *codecs = &sriov_vcn_4_0_0_video_codecs_decode_vcn1;
+ } else {
+ if (encode)
+ *codecs = &sriov_vcn_4_0_0_video_codecs_encode_vcn0;
+ else
+ *codecs = &sriov_vcn_4_0_0_video_codecs_decode_vcn0;
+ }
+ } else {
+ if ((adev->vcn.harvest_config & AMDGPU_VCN_HARVEST_VCN0)) {
+ if (encode)
+ *codecs = &vcn_4_0_0_video_codecs_encode_vcn1;
+ else
+ *codecs = &vcn_4_0_0_video_codecs_decode_vcn1;
+ } else {
+ if (encode)
+ *codecs = &vcn_4_0_0_video_codecs_encode_vcn0;
+ else
+ *codecs = &vcn_4_0_0_video_codecs_decode_vcn0;
+ }
+ }
+ return 0;
+ case IP_VERSION(4, 0, 6):
+ if (encode)
+ *codecs = &vcn_4_0_0_video_codecs_encode_vcn0;
+ else
+ *codecs = &vcn_4_0_0_video_codecs_decode_vcn0;
+ return 0;
+ default:
+ return -EINVAL;
+ }
+}
+
+static u32 soc21_didt_rreg(struct amdgpu_device *adev, u32 reg)
+{
+ unsigned long flags, address, data;
+ u32 r;
+
+ address = SOC15_REG_OFFSET(GC, 0, regDIDT_IND_INDEX);
+ data = SOC15_REG_OFFSET(GC, 0, regDIDT_IND_DATA);
+
+ spin_lock_irqsave(&adev->didt_idx_lock, flags);
+ WREG32(address, (reg));
+ r = RREG32(data);
+ spin_unlock_irqrestore(&adev->didt_idx_lock, flags);
+ return r;
+}
+
+static void soc21_didt_wreg(struct amdgpu_device *adev, u32 reg, u32 v)
+{
+ unsigned long flags, address, data;
+
+ address = SOC15_REG_OFFSET(GC, 0, regDIDT_IND_INDEX);
+ data = SOC15_REG_OFFSET(GC, 0, regDIDT_IND_DATA);
+
+ spin_lock_irqsave(&adev->didt_idx_lock, flags);
+ WREG32(address, (reg));
+ WREG32(data, (v));
+ spin_unlock_irqrestore(&adev->didt_idx_lock, flags);
+}
+
+static u32 soc21_get_config_memsize(struct amdgpu_device *adev)
+{
+ return adev->nbio.funcs->get_memsize(adev);
+}
+
+static u32 soc21_get_xclk(struct amdgpu_device *adev)
+{
+ return adev->clock.spll.reference_freq;
+}
+
+
+void soc21_grbm_select(struct amdgpu_device *adev,
+ u32 me, u32 pipe, u32 queue, u32 vmid)
+{
+ u32 grbm_gfx_cntl = 0;
+ grbm_gfx_cntl = REG_SET_FIELD(grbm_gfx_cntl, GRBM_GFX_CNTL, PIPEID, pipe);
+ grbm_gfx_cntl = REG_SET_FIELD(grbm_gfx_cntl, GRBM_GFX_CNTL, MEID, me);
+ grbm_gfx_cntl = REG_SET_FIELD(grbm_gfx_cntl, GRBM_GFX_CNTL, VMID, vmid);
+ grbm_gfx_cntl = REG_SET_FIELD(grbm_gfx_cntl, GRBM_GFX_CNTL, QUEUEID, queue);
+
+ WREG32_SOC15(GC, 0, regGRBM_GFX_CNTL, grbm_gfx_cntl);
+}
+
+static bool soc21_read_disabled_bios(struct amdgpu_device *adev)
+{
+ /* todo */
+ return false;
+}
+
+static struct soc15_allowed_register_entry soc21_allowed_read_registers[] = {
+ { SOC15_REG_ENTRY(GC, 0, regGRBM_STATUS)},
+ { SOC15_REG_ENTRY(GC, 0, regGRBM_STATUS2)},
+ { SOC15_REG_ENTRY(GC, 0, regGRBM_STATUS_SE0)},
+ { SOC15_REG_ENTRY(GC, 0, regGRBM_STATUS_SE1)},
+ { SOC15_REG_ENTRY(GC, 0, regGRBM_STATUS_SE2)},
+ { SOC15_REG_ENTRY(GC, 0, regGRBM_STATUS_SE3)},
+ { SOC15_REG_ENTRY(SDMA0, 0, regSDMA0_STATUS_REG)},
+ { SOC15_REG_ENTRY(SDMA1, 0, regSDMA1_STATUS_REG)},
+ { SOC15_REG_ENTRY(GC, 0, regCP_STAT)},
+ { SOC15_REG_ENTRY(GC, 0, regCP_STALLED_STAT1)},
+ { SOC15_REG_ENTRY(GC, 0, regCP_STALLED_STAT2)},
+ { SOC15_REG_ENTRY(GC, 0, regCP_STALLED_STAT3)},
+ { SOC15_REG_ENTRY(GC, 0, regCP_CPF_BUSY_STAT)},
+ { SOC15_REG_ENTRY(GC, 0, regCP_CPF_STALLED_STAT1)},
+ { SOC15_REG_ENTRY(GC, 0, regCP_CPF_STATUS)},
+ { SOC15_REG_ENTRY(GC, 0, regCP_CPC_BUSY_STAT)},
+ { SOC15_REG_ENTRY(GC, 0, regCP_CPC_STALLED_STAT1)},
+ { SOC15_REG_ENTRY(GC, 0, regCP_CPC_STATUS)},
+ { SOC15_REG_ENTRY(GC, 0, regGB_ADDR_CONFIG)},
+};
+
+static uint32_t soc21_read_indexed_register(struct amdgpu_device *adev, u32 se_num,
+ u32 sh_num, u32 reg_offset)
+{
+ uint32_t val;
+
+ mutex_lock(&adev->grbm_idx_mutex);
+ if (se_num != 0xffffffff || sh_num != 0xffffffff)
+ amdgpu_gfx_select_se_sh(adev, se_num, sh_num, 0xffffffff, 0);
+
+ val = RREG32(reg_offset);
+
+ if (se_num != 0xffffffff || sh_num != 0xffffffff)
+ amdgpu_gfx_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, 0);
+ mutex_unlock(&adev->grbm_idx_mutex);
+ return val;
+}
+
+static uint32_t soc21_get_register_value(struct amdgpu_device *adev,
+ bool indexed, u32 se_num,
+ u32 sh_num, u32 reg_offset)
+{
+ if (indexed) {
+ return soc21_read_indexed_register(adev, se_num, sh_num, reg_offset);
+ } else {
+ if (reg_offset == SOC15_REG_OFFSET(GC, 0, regGB_ADDR_CONFIG) && adev->gfx.config.gb_addr_config)
+ return adev->gfx.config.gb_addr_config;
+ return RREG32(reg_offset);
+ }
+}
+
+static int soc21_read_register(struct amdgpu_device *adev, u32 se_num,
+ u32 sh_num, u32 reg_offset, u32 *value)
+{
+ uint32_t i;
+ struct soc15_allowed_register_entry *en;
+
+ *value = 0;
+ for (i = 0; i < ARRAY_SIZE(soc21_allowed_read_registers); i++) {
+ en = &soc21_allowed_read_registers[i];
+ if (!adev->reg_offset[en->hwip][en->inst])
+ continue;
+ else if (reg_offset != (adev->reg_offset[en->hwip][en->inst][en->seg]
+ + en->reg_offset))
+ continue;
+
+ *value = soc21_get_register_value(adev,
+ soc21_allowed_read_registers[i].grbm_indexed,
+ se_num, sh_num, reg_offset);
+ return 0;
+ }
+ return -EINVAL;
+}
+
+#if 0
+static int soc21_asic_mode1_reset(struct amdgpu_device *adev)
+{
+ u32 i;
+ int ret = 0;
+
+ amdgpu_atombios_scratch_regs_engine_hung(adev, true);
+
+ /* disable BM */
+ pci_clear_master(adev->pdev);
+
+ amdgpu_device_cache_pci_state(adev->pdev);
+
+ if (amdgpu_dpm_is_mode1_reset_supported(adev)) {
+ dev_info(adev->dev, "GPU smu mode1 reset\n");
+ ret = amdgpu_dpm_mode1_reset(adev);
+ } else {
+ dev_info(adev->dev, "GPU psp mode1 reset\n");
+ ret = psp_gpu_reset(adev);
+ }
+
+ if (ret)
+ dev_err(adev->dev, "GPU mode1 reset failed\n");
+ amdgpu_device_load_pci_state(adev->pdev);
+
+ /* wait for asic to come out of reset */
+ for (i = 0; i < adev->usec_timeout; i++) {
+ u32 memsize = adev->nbio.funcs->get_memsize(adev);
+
+ if (memsize != 0xffffffff)
+ break;
+ udelay(1);
+ }
+
+ amdgpu_atombios_scratch_regs_engine_hung(adev, false);
+
+ return ret;
+}
+#endif
+
+static enum amd_reset_method
+soc21_asic_reset_method(struct amdgpu_device *adev)
+{
+ if (amdgpu_reset_method == AMD_RESET_METHOD_MODE1 ||
+ amdgpu_reset_method == AMD_RESET_METHOD_MODE2 ||
+ amdgpu_reset_method == AMD_RESET_METHOD_BACO)
+ return amdgpu_reset_method;
+
+ if (amdgpu_reset_method != -1)
+ dev_warn(adev->dev, "Specified reset method:%d isn't supported, using AUTO instead.\n",
+ amdgpu_reset_method);
+
+ switch (amdgpu_ip_version(adev, MP1_HWIP, 0)) {
+ case IP_VERSION(13, 0, 0):
+ case IP_VERSION(13, 0, 7):
+ case IP_VERSION(13, 0, 10):
+ return AMD_RESET_METHOD_MODE1;
+ case IP_VERSION(13, 0, 4):
+ case IP_VERSION(13, 0, 11):
+ case IP_VERSION(14, 0, 0):
+ case IP_VERSION(14, 0, 1):
+ case IP_VERSION(14, 0, 4):
+ case IP_VERSION(14, 0, 5):
+ return AMD_RESET_METHOD_MODE2;
+ default:
+ if (amdgpu_dpm_is_baco_supported(adev))
+ return AMD_RESET_METHOD_BACO;
+ else
+ return AMD_RESET_METHOD_MODE1;
+ }
+}
+
+static int soc21_asic_reset(struct amdgpu_device *adev)
+{
+ int ret = 0;
+
+ switch (soc21_asic_reset_method(adev)) {
+ case AMD_RESET_METHOD_PCI:
+ dev_info(adev->dev, "PCI reset\n");
+ ret = amdgpu_device_pci_reset(adev);
+ break;
+ case AMD_RESET_METHOD_BACO:
+ dev_info(adev->dev, "BACO reset\n");
+ ret = amdgpu_dpm_baco_reset(adev);
+ break;
+ case AMD_RESET_METHOD_MODE2:
+ dev_info(adev->dev, "MODE2 reset\n");
+ ret = amdgpu_dpm_mode2_reset(adev);
+ break;
+ default:
+ dev_info(adev->dev, "MODE1 reset\n");
+ ret = amdgpu_device_mode1_reset(adev);
+ break;
+ }
+
+ return ret;
+}
+
+static int soc21_set_uvd_clocks(struct amdgpu_device *adev, u32 vclk, u32 dclk)
+{
+ /* todo */
+ return 0;
+}
+
+static int soc21_set_vce_clocks(struct amdgpu_device *adev, u32 evclk, u32 ecclk)
+{
+ /* todo */
+ return 0;
+}
+
+static void soc21_program_aspm(struct amdgpu_device *adev)
+{
+ if (!amdgpu_device_should_use_aspm(adev))
+ return;
+
+ if (adev->nbio.funcs->program_aspm)
+ adev->nbio.funcs->program_aspm(adev);
+}
+
+const struct amdgpu_ip_block_version soc21_common_ip_block = {
+ .type = AMD_IP_BLOCK_TYPE_COMMON,
+ .major = 1,
+ .minor = 0,
+ .rev = 0,
+ .funcs = &soc21_common_ip_funcs,
+};
+
+static bool soc21_need_full_reset(struct amdgpu_device *adev)
+{
+ switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
+ case IP_VERSION(11, 0, 0):
+ case IP_VERSION(11, 0, 2):
+ case IP_VERSION(11, 0, 3):
+ default:
+ return true;
+ }
+}
+
+static bool soc21_need_reset_on_init(struct amdgpu_device *adev)
+{
+ u32 sol_reg;
+
+ if (adev->flags & AMD_IS_APU)
+ return false;
+
+ /* Check sOS sign of life register to confirm sys driver and sOS
+ * are already been loaded.
+ */
+ sol_reg = RREG32_SOC15(MP0, 0, regMP0_SMN_C2PMSG_81);
+ if (sol_reg)
+ return true;
+
+ return false;
+}
+
+static void soc21_init_doorbell_index(struct amdgpu_device *adev)
+{
+ adev->doorbell_index.kiq = AMDGPU_NAVI10_DOORBELL_KIQ;
+ adev->doorbell_index.mec_ring0 = AMDGPU_NAVI10_DOORBELL_MEC_RING0;
+ adev->doorbell_index.mec_ring1 = AMDGPU_NAVI10_DOORBELL_MEC_RING1;
+ adev->doorbell_index.mec_ring2 = AMDGPU_NAVI10_DOORBELL_MEC_RING2;
+ adev->doorbell_index.mec_ring3 = AMDGPU_NAVI10_DOORBELL_MEC_RING3;
+ adev->doorbell_index.mec_ring4 = AMDGPU_NAVI10_DOORBELL_MEC_RING4;
+ adev->doorbell_index.mec_ring5 = AMDGPU_NAVI10_DOORBELL_MEC_RING5;
+ adev->doorbell_index.mec_ring6 = AMDGPU_NAVI10_DOORBELL_MEC_RING6;
+ adev->doorbell_index.mec_ring7 = AMDGPU_NAVI10_DOORBELL_MEC_RING7;
+ adev->doorbell_index.userqueue_start = AMDGPU_NAVI10_DOORBELL_USERQUEUE_START;
+ adev->doorbell_index.userqueue_end = AMDGPU_NAVI10_DOORBELL_USERQUEUE_END;
+ adev->doorbell_index.gfx_ring0 = AMDGPU_NAVI10_DOORBELL_GFX_RING0;
+ adev->doorbell_index.gfx_ring1 = AMDGPU_NAVI10_DOORBELL_GFX_RING1;
+ adev->doorbell_index.gfx_userqueue_start =
+ AMDGPU_NAVI10_DOORBELL_GFX_USERQUEUE_START;
+ adev->doorbell_index.gfx_userqueue_end =
+ AMDGPU_NAVI10_DOORBELL_GFX_USERQUEUE_END;
+ adev->doorbell_index.mes_ring0 = AMDGPU_NAVI10_DOORBELL_MES_RING0;
+ adev->doorbell_index.mes_ring1 = AMDGPU_NAVI10_DOORBELL_MES_RING1;
+ adev->doorbell_index.sdma_engine[0] = AMDGPU_NAVI10_DOORBELL_sDMA_ENGINE0;
+ adev->doorbell_index.sdma_engine[1] = AMDGPU_NAVI10_DOORBELL_sDMA_ENGINE1;
+ adev->doorbell_index.ih = AMDGPU_NAVI10_DOORBELL_IH;
+ adev->doorbell_index.vcn.vcn_ring0_1 = AMDGPU_NAVI10_DOORBELL64_VCN0_1;
+ adev->doorbell_index.vcn.vcn_ring2_3 = AMDGPU_NAVI10_DOORBELL64_VCN2_3;
+ adev->doorbell_index.vcn.vcn_ring4_5 = AMDGPU_NAVI10_DOORBELL64_VCN4_5;
+ adev->doorbell_index.vcn.vcn_ring6_7 = AMDGPU_NAVI10_DOORBELL64_VCN6_7;
+ adev->doorbell_index.vpe_ring = AMDGPU_NAVI10_DOORBELL64_VPE;
+ adev->doorbell_index.first_non_cp = AMDGPU_NAVI10_DOORBELL64_FIRST_NON_CP;
+ adev->doorbell_index.last_non_cp = AMDGPU_NAVI10_DOORBELL64_LAST_NON_CP;
+
+ adev->doorbell_index.max_assignment = AMDGPU_NAVI10_DOORBELL_MAX_ASSIGNMENT << 1;
+ adev->doorbell_index.sdma_doorbell_range = 20;
+}
+
+static void soc21_pre_asic_init(struct amdgpu_device *adev)
+{
+}
+
+static int soc21_update_umd_stable_pstate(struct amdgpu_device *adev,
+ bool enter)
+{
+ if (enter)
+ amdgpu_gfx_rlc_enter_safe_mode(adev, 0);
+ else
+ amdgpu_gfx_rlc_exit_safe_mode(adev, 0);
+
+ if (adev->gfx.funcs->update_perfmon_mgcg)
+ adev->gfx.funcs->update_perfmon_mgcg(adev, !enter);
+
+ return 0;
+}
+
+static const struct amdgpu_asic_funcs soc21_asic_funcs = {
+ .read_disabled_bios = &soc21_read_disabled_bios,
+ .read_bios_from_rom = &amdgpu_soc15_read_bios_from_rom,
+ .read_register = &soc21_read_register,
+ .reset = &soc21_asic_reset,
+ .reset_method = &soc21_asic_reset_method,
+ .get_xclk = &soc21_get_xclk,
+ .set_uvd_clocks = &soc21_set_uvd_clocks,
+ .set_vce_clocks = &soc21_set_vce_clocks,
+ .get_config_memsize = &soc21_get_config_memsize,
+ .init_doorbell_index = &soc21_init_doorbell_index,
+ .need_full_reset = &soc21_need_full_reset,
+ .need_reset_on_init = &soc21_need_reset_on_init,
+ .get_pcie_replay_count = &amdgpu_nbio_get_pcie_replay_count,
+ .supports_baco = &amdgpu_dpm_is_baco_supported,
+ .pre_asic_init = &soc21_pre_asic_init,
+ .query_video_codecs = &soc21_query_video_codecs,
+ .update_umd_stable_pstate = &soc21_update_umd_stable_pstate,
+};
+
+static int soc21_common_early_init(struct amdgpu_ip_block *ip_block)
+{
+ struct amdgpu_device *adev = ip_block->adev;
+
+ adev->nbio.funcs->set_reg_remap(adev);
+ adev->smc_rreg = NULL;
+ adev->smc_wreg = NULL;
+ adev->pcie_rreg = &amdgpu_device_indirect_rreg;
+ adev->pcie_wreg = &amdgpu_device_indirect_wreg;
+ adev->pcie_rreg64 = &amdgpu_device_indirect_rreg64;
+ adev->pcie_wreg64 = &amdgpu_device_indirect_wreg64;
+ adev->pciep_rreg = amdgpu_device_pcie_port_rreg;
+ adev->pciep_wreg = amdgpu_device_pcie_port_wreg;
+
+ /* TODO: will add them during VCN v2 implementation */
+ adev->uvd_ctx_rreg = NULL;
+ adev->uvd_ctx_wreg = NULL;
+
+ adev->didt_rreg = &soc21_didt_rreg;
+ adev->didt_wreg = &soc21_didt_wreg;
+
+ adev->asic_funcs = &soc21_asic_funcs;
+
+ adev->rev_id = amdgpu_device_get_rev_id(adev);
+ adev->external_rev_id = 0xff;
+ switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
+ case IP_VERSION(11, 0, 0):
+ adev->cg_flags = AMD_CG_SUPPORT_GFX_CGCG |
+ AMD_CG_SUPPORT_GFX_CGLS |
+#if 0
+ AMD_CG_SUPPORT_GFX_3D_CGCG |
+ AMD_CG_SUPPORT_GFX_3D_CGLS |
+#endif
+ AMD_CG_SUPPORT_GFX_MGCG |
+ AMD_CG_SUPPORT_REPEATER_FGCG |
+ AMD_CG_SUPPORT_GFX_FGCG |
+ AMD_CG_SUPPORT_GFX_PERF_CLK |
+ AMD_CG_SUPPORT_VCN_MGCG |
+ AMD_CG_SUPPORT_JPEG_MGCG |
+ AMD_CG_SUPPORT_ATHUB_MGCG |
+ AMD_CG_SUPPORT_ATHUB_LS |
+ AMD_CG_SUPPORT_MC_MGCG |
+ AMD_CG_SUPPORT_MC_LS |
+ AMD_CG_SUPPORT_IH_CG |
+ AMD_CG_SUPPORT_HDP_SD;
+ adev->pg_flags = AMD_PG_SUPPORT_VCN |
+ AMD_PG_SUPPORT_VCN_DPG |
+ AMD_PG_SUPPORT_JPEG |
+ AMD_PG_SUPPORT_ATHUB |
+ AMD_PG_SUPPORT_MMHUB;
+ adev->external_rev_id = adev->rev_id + 0x1; // TODO: need update
+ break;
+ case IP_VERSION(11, 0, 2):
+ adev->cg_flags =
+ AMD_CG_SUPPORT_GFX_CGCG |
+ AMD_CG_SUPPORT_GFX_CGLS |
+ AMD_CG_SUPPORT_REPEATER_FGCG |
+ AMD_CG_SUPPORT_VCN_MGCG |
+ AMD_CG_SUPPORT_JPEG_MGCG |
+ AMD_CG_SUPPORT_ATHUB_MGCG |
+ AMD_CG_SUPPORT_ATHUB_LS |
+ AMD_CG_SUPPORT_IH_CG |
+ AMD_CG_SUPPORT_HDP_SD;
+ adev->pg_flags =
+ AMD_PG_SUPPORT_VCN |
+ AMD_PG_SUPPORT_VCN_DPG |
+ AMD_PG_SUPPORT_JPEG |
+ AMD_PG_SUPPORT_ATHUB |
+ AMD_PG_SUPPORT_MMHUB;
+ adev->external_rev_id = adev->rev_id + 0x10;
+ break;
+ case IP_VERSION(11, 0, 1):
+ adev->cg_flags =
+ AMD_CG_SUPPORT_GFX_CGCG |
+ AMD_CG_SUPPORT_GFX_CGLS |
+ AMD_CG_SUPPORT_GFX_MGCG |
+ AMD_CG_SUPPORT_GFX_FGCG |
+ AMD_CG_SUPPORT_REPEATER_FGCG |
+ AMD_CG_SUPPORT_GFX_PERF_CLK |
+ AMD_CG_SUPPORT_MC_MGCG |
+ AMD_CG_SUPPORT_MC_LS |
+ AMD_CG_SUPPORT_HDP_MGCG |
+ AMD_CG_SUPPORT_HDP_LS |
+ AMD_CG_SUPPORT_ATHUB_MGCG |
+ AMD_CG_SUPPORT_ATHUB_LS |
+ AMD_CG_SUPPORT_IH_CG |
+ AMD_CG_SUPPORT_BIF_MGCG |
+ AMD_CG_SUPPORT_BIF_LS |
+ AMD_CG_SUPPORT_VCN_MGCG |
+ AMD_CG_SUPPORT_JPEG_MGCG;
+ adev->pg_flags =
+ AMD_PG_SUPPORT_GFX_PG |
+ AMD_PG_SUPPORT_VCN |
+ AMD_PG_SUPPORT_VCN_DPG |
+ AMD_PG_SUPPORT_JPEG;
+ adev->external_rev_id = adev->rev_id + 0x1;
+ break;
+ case IP_VERSION(11, 0, 3):
+ adev->cg_flags = AMD_CG_SUPPORT_VCN_MGCG |
+ AMD_CG_SUPPORT_JPEG_MGCG |
+ AMD_CG_SUPPORT_GFX_CGCG |
+ AMD_CG_SUPPORT_GFX_CGLS |
+ AMD_CG_SUPPORT_REPEATER_FGCG |
+ AMD_CG_SUPPORT_GFX_MGCG |
+ AMD_CG_SUPPORT_HDP_SD |
+ AMD_CG_SUPPORT_ATHUB_MGCG |
+ AMD_CG_SUPPORT_ATHUB_LS;
+ adev->pg_flags = AMD_PG_SUPPORT_VCN |
+ AMD_PG_SUPPORT_VCN_DPG |
+ AMD_PG_SUPPORT_JPEG;
+ adev->external_rev_id = adev->rev_id + 0x20;
+ break;
+ case IP_VERSION(11, 0, 4):
+ adev->cg_flags =
+ AMD_CG_SUPPORT_GFX_CGCG |
+ AMD_CG_SUPPORT_GFX_CGLS |
+ AMD_CG_SUPPORT_GFX_MGCG |
+ AMD_CG_SUPPORT_GFX_FGCG |
+ AMD_CG_SUPPORT_REPEATER_FGCG |
+ AMD_CG_SUPPORT_GFX_PERF_CLK |
+ AMD_CG_SUPPORT_MC_MGCG |
+ AMD_CG_SUPPORT_MC_LS |
+ AMD_CG_SUPPORT_HDP_MGCG |
+ AMD_CG_SUPPORT_HDP_LS |
+ AMD_CG_SUPPORT_ATHUB_MGCG |
+ AMD_CG_SUPPORT_ATHUB_LS |
+ AMD_CG_SUPPORT_IH_CG |
+ AMD_CG_SUPPORT_BIF_MGCG |
+ AMD_CG_SUPPORT_BIF_LS |
+ AMD_CG_SUPPORT_VCN_MGCG |
+ AMD_CG_SUPPORT_JPEG_MGCG;
+ adev->pg_flags = AMD_PG_SUPPORT_VCN |
+ AMD_PG_SUPPORT_VCN_DPG |
+ AMD_PG_SUPPORT_GFX_PG |
+ AMD_PG_SUPPORT_JPEG;
+ adev->external_rev_id = adev->rev_id + 0x80;
+ break;
+ case IP_VERSION(11, 5, 0):
+ adev->cg_flags = AMD_CG_SUPPORT_VCN_MGCG |
+ AMD_CG_SUPPORT_JPEG_MGCG |
+ AMD_CG_SUPPORT_GFX_CGCG |
+ AMD_CG_SUPPORT_GFX_CGLS |
+ AMD_CG_SUPPORT_GFX_MGCG |
+ AMD_CG_SUPPORT_GFX_FGCG |
+ AMD_CG_SUPPORT_REPEATER_FGCG |
+ AMD_CG_SUPPORT_GFX_PERF_CLK |
+ AMD_CG_SUPPORT_GFX_3D_CGCG |
+ AMD_CG_SUPPORT_GFX_3D_CGLS |
+ AMD_CG_SUPPORT_MC_MGCG |
+ AMD_CG_SUPPORT_MC_LS |
+ AMD_CG_SUPPORT_HDP_LS |
+ AMD_CG_SUPPORT_HDP_DS |
+ AMD_CG_SUPPORT_HDP_SD |
+ AMD_CG_SUPPORT_ATHUB_MGCG |
+ AMD_CG_SUPPORT_ATHUB_LS |
+ AMD_CG_SUPPORT_IH_CG |
+ AMD_CG_SUPPORT_BIF_MGCG |
+ AMD_CG_SUPPORT_BIF_LS;
+ adev->pg_flags = AMD_PG_SUPPORT_VCN_DPG |
+ AMD_PG_SUPPORT_JPEG_DPG |
+ AMD_PG_SUPPORT_VCN |
+ AMD_PG_SUPPORT_JPEG |
+ AMD_PG_SUPPORT_GFX_PG;
+ if (adev->rev_id == 0)
+ adev->external_rev_id = 0x1;
+ else
+ adev->external_rev_id = adev->rev_id + 0x10;
+ break;
+ case IP_VERSION(11, 5, 1):
+ adev->cg_flags =
+ AMD_CG_SUPPORT_GFX_CGCG |
+ AMD_CG_SUPPORT_GFX_CGLS |
+ AMD_CG_SUPPORT_GFX_MGCG |
+ AMD_CG_SUPPORT_GFX_FGCG |
+ AMD_CG_SUPPORT_REPEATER_FGCG |
+ AMD_CG_SUPPORT_GFX_PERF_CLK |
+ AMD_CG_SUPPORT_GFX_3D_CGCG |
+ AMD_CG_SUPPORT_GFX_3D_CGLS |
+ AMD_CG_SUPPORT_MC_MGCG |
+ AMD_CG_SUPPORT_MC_LS |
+ AMD_CG_SUPPORT_HDP_LS |
+ AMD_CG_SUPPORT_HDP_DS |
+ AMD_CG_SUPPORT_HDP_SD |
+ AMD_CG_SUPPORT_ATHUB_MGCG |
+ AMD_CG_SUPPORT_ATHUB_LS |
+ AMD_CG_SUPPORT_IH_CG |
+ AMD_CG_SUPPORT_BIF_MGCG |
+ AMD_CG_SUPPORT_BIF_LS |
+ AMD_CG_SUPPORT_VCN_MGCG |
+ AMD_CG_SUPPORT_JPEG_MGCG;
+ adev->pg_flags =
+ AMD_PG_SUPPORT_GFX_PG |
+ AMD_PG_SUPPORT_VCN |
+ AMD_PG_SUPPORT_VCN_DPG |
+ AMD_PG_SUPPORT_JPEG;
+ adev->external_rev_id = adev->rev_id + 0xc1;
+ break;
+ case IP_VERSION(11, 5, 2):
+ adev->cg_flags = AMD_CG_SUPPORT_VCN_MGCG |
+ AMD_CG_SUPPORT_JPEG_MGCG |
+ AMD_CG_SUPPORT_GFX_CGCG |
+ AMD_CG_SUPPORT_GFX_CGLS |
+ AMD_CG_SUPPORT_GFX_MGCG |
+ AMD_CG_SUPPORT_GFX_FGCG |
+ AMD_CG_SUPPORT_REPEATER_FGCG |
+ AMD_CG_SUPPORT_GFX_PERF_CLK |
+ AMD_CG_SUPPORT_GFX_3D_CGCG |
+ AMD_CG_SUPPORT_GFX_3D_CGLS |
+ AMD_CG_SUPPORT_MC_MGCG |
+ AMD_CG_SUPPORT_MC_LS |
+ AMD_CG_SUPPORT_HDP_LS |
+ AMD_CG_SUPPORT_HDP_DS |
+ AMD_CG_SUPPORT_HDP_SD |
+ AMD_CG_SUPPORT_ATHUB_MGCG |
+ AMD_CG_SUPPORT_ATHUB_LS |
+ AMD_CG_SUPPORT_IH_CG |
+ AMD_CG_SUPPORT_BIF_MGCG |
+ AMD_CG_SUPPORT_BIF_LS;
+ adev->pg_flags = AMD_PG_SUPPORT_VCN_DPG |
+ AMD_PG_SUPPORT_VCN |
+ AMD_PG_SUPPORT_JPEG_DPG |
+ AMD_PG_SUPPORT_JPEG |
+ AMD_PG_SUPPORT_GFX_PG;
+ adev->external_rev_id = adev->rev_id + 0x40;
+ break;
+ case IP_VERSION(11, 5, 3):
+ adev->cg_flags = AMD_CG_SUPPORT_VCN_MGCG |
+ AMD_CG_SUPPORT_JPEG_MGCG |
+ AMD_CG_SUPPORT_GFX_CGCG |
+ AMD_CG_SUPPORT_GFX_CGLS |
+ AMD_CG_SUPPORT_GFX_MGCG |
+ AMD_CG_SUPPORT_GFX_FGCG |
+ AMD_CG_SUPPORT_REPEATER_FGCG |
+ AMD_CG_SUPPORT_GFX_PERF_CLK |
+ AMD_CG_SUPPORT_GFX_3D_CGCG |
+ AMD_CG_SUPPORT_GFX_3D_CGLS |
+ AMD_CG_SUPPORT_MC_MGCG |
+ AMD_CG_SUPPORT_MC_LS |
+ AMD_CG_SUPPORT_HDP_LS |
+ AMD_CG_SUPPORT_HDP_DS |
+ AMD_CG_SUPPORT_HDP_SD |
+ AMD_CG_SUPPORT_ATHUB_MGCG |
+ AMD_CG_SUPPORT_ATHUB_LS |
+ AMD_CG_SUPPORT_IH_CG |
+ AMD_CG_SUPPORT_BIF_MGCG |
+ AMD_CG_SUPPORT_BIF_LS;
+ adev->pg_flags = AMD_PG_SUPPORT_VCN_DPG |
+ AMD_PG_SUPPORT_VCN |
+ AMD_PG_SUPPORT_JPEG_DPG |
+ AMD_PG_SUPPORT_JPEG |
+ AMD_PG_SUPPORT_GFX_PG;
+ adev->external_rev_id = adev->rev_id + 0x50;
+ break;
+ default:
+ /* FIXME: not supported yet */
+ return -EINVAL;
+ }
+
+ if (amdgpu_sriov_vf(adev)) {
+ amdgpu_virt_init_setting(adev);
+ xgpu_nv_mailbox_set_irq_funcs(adev);
+ }
+
+ return 0;
+}
+
+static int soc21_common_late_init(struct amdgpu_ip_block *ip_block)
+{
+ struct amdgpu_device *adev = ip_block->adev;
+
+ if (amdgpu_sriov_vf(adev)) {
+ xgpu_nv_mailbox_get_irq(adev);
+ if ((adev->vcn.harvest_config & AMDGPU_VCN_HARVEST_VCN0) ||
+ !amdgpu_sriov_is_av1_support(adev)) {
+ amdgpu_virt_update_sriov_video_codec(adev,
+ sriov_vcn_4_0_0_video_codecs_encode_array_vcn1,
+ ARRAY_SIZE(sriov_vcn_4_0_0_video_codecs_encode_array_vcn1),
+ sriov_vcn_4_0_0_video_codecs_decode_array_vcn1,
+ ARRAY_SIZE(sriov_vcn_4_0_0_video_codecs_decode_array_vcn1));
+ } else {
+ amdgpu_virt_update_sriov_video_codec(adev,
+ sriov_vcn_4_0_0_video_codecs_encode_array_vcn0,
+ ARRAY_SIZE(sriov_vcn_4_0_0_video_codecs_encode_array_vcn0),
+ sriov_vcn_4_0_0_video_codecs_decode_array_vcn0,
+ ARRAY_SIZE(sriov_vcn_4_0_0_video_codecs_decode_array_vcn0));
+ }
+ } else {
+ if (adev->nbio.ras &&
+ adev->nbio.ras_err_event_athub_irq.funcs)
+ /* don't need to fail gpu late init
+ * if enabling athub_err_event interrupt failed
+ * nbio v4_3 only support fatal error hanlding
+ * just enable the interrupt directly */
+ amdgpu_irq_get(adev, &adev->nbio.ras_err_event_athub_irq, 0);
+ }
+
+ /* Enable selfring doorbell aperture late because doorbell BAR
+ * aperture will change if resize BAR successfully in gmc sw_init.
+ */
+ adev->nbio.funcs->enable_doorbell_selfring_aperture(adev, true);
+
+ return 0;
+}
+
+static int soc21_common_sw_init(struct amdgpu_ip_block *ip_block)
+{
+ struct amdgpu_device *adev = ip_block->adev;
+
+ if (amdgpu_sriov_vf(adev))
+ xgpu_nv_mailbox_add_irq_id(adev);
+
+ return 0;
+}
+
+static int soc21_common_hw_init(struct amdgpu_ip_block *ip_block)
+{
+ struct amdgpu_device *adev = ip_block->adev;
+
+ /* enable aspm */
+ soc21_program_aspm(adev);
+ /* setup nbio registers */
+ adev->nbio.funcs->init_registers(adev);
+ /* remap HDP registers to a hole in mmio space,
+ * for the purpose of expose those registers
+ * to process space
+ */
+ if (adev->nbio.funcs->remap_hdp_registers && !amdgpu_sriov_vf(adev))
+ adev->nbio.funcs->remap_hdp_registers(adev);
+ /* enable the doorbell aperture */
+ adev->nbio.funcs->enable_doorbell_aperture(adev, true);
+
+ return 0;
+}
+
+static int soc21_common_hw_fini(struct amdgpu_ip_block *ip_block)
+{
+ struct amdgpu_device *adev = ip_block->adev;
+
+ /* Disable the doorbell aperture and selfring doorbell aperture
+ * separately in hw_fini because soc21_enable_doorbell_aperture
+ * has been removed and there is no need to delay disabling
+ * selfring doorbell.
+ */
+ adev->nbio.funcs->enable_doorbell_aperture(adev, false);
+ adev->nbio.funcs->enable_doorbell_selfring_aperture(adev, false);
+
+ if (amdgpu_sriov_vf(adev)) {
+ xgpu_nv_mailbox_put_irq(adev);
+ } else {
+ if (adev->nbio.ras &&
+ adev->nbio.ras_err_event_athub_irq.funcs)
+ amdgpu_irq_put(adev, &adev->nbio.ras_err_event_athub_irq, 0);
+ }
+
+ return 0;
+}
+
+static int soc21_common_suspend(struct amdgpu_ip_block *ip_block)
+{
+ return soc21_common_hw_fini(ip_block);
+}
+
+static bool soc21_need_reset_on_resume(struct amdgpu_device *adev)
+{
+ u32 sol_reg1, sol_reg2;
+
+ /* Will reset for the following suspend abort cases.
+ * 1) Only reset dGPU side.
+ * 2) S3 suspend got aborted and TOS is active.
+ * As for dGPU suspend abort cases the SOL value
+ * will be kept as zero at this resume point.
+ */
+ if (!(adev->flags & AMD_IS_APU) && adev->in_s3) {
+ sol_reg1 = RREG32_SOC15(MP0, 0, regMP0_SMN_C2PMSG_81);
+ msleep(100);
+ sol_reg2 = RREG32_SOC15(MP0, 0, regMP0_SMN_C2PMSG_81);
+
+ return (sol_reg1 != sol_reg2);
+ }
+
+ return false;
+}
+
+static int soc21_common_resume(struct amdgpu_ip_block *ip_block)
+{
+ struct amdgpu_device *adev = ip_block->adev;
+
+ if (soc21_need_reset_on_resume(adev)) {
+ dev_info(adev->dev, "S3 suspend aborted, resetting...");
+ soc21_asic_reset(adev);
+ }
+
+ return soc21_common_hw_init(ip_block);
+}
+
+static bool soc21_common_is_idle(struct amdgpu_ip_block *ip_block)
+{
+ return true;
+}
+
+static int soc21_common_set_clockgating_state(struct amdgpu_ip_block *ip_block,
+ enum amd_clockgating_state state)
+{
+ struct amdgpu_device *adev = ip_block->adev;
+
+ switch (amdgpu_ip_version(adev, NBIO_HWIP, 0)) {
+ case IP_VERSION(4, 3, 0):
+ case IP_VERSION(4, 3, 1):
+ case IP_VERSION(7, 7, 0):
+ case IP_VERSION(7, 7, 1):
+ case IP_VERSION(7, 11, 0):
+ case IP_VERSION(7, 11, 1):
+ case IP_VERSION(7, 11, 2):
+ case IP_VERSION(7, 11, 3):
+ adev->nbio.funcs->update_medium_grain_clock_gating(adev,
+ state == AMD_CG_STATE_GATE);
+ adev->nbio.funcs->update_medium_grain_light_sleep(adev,
+ state == AMD_CG_STATE_GATE);
+ adev->hdp.funcs->update_clock_gating(adev,
+ state == AMD_CG_STATE_GATE);
+ break;
+ default:
+ break;
+ }
+ return 0;
+}
+
+static int soc21_common_set_powergating_state(struct amdgpu_ip_block *ip_block,
+ enum amd_powergating_state state)
+{
+ struct amdgpu_device *adev = ip_block->adev;
+
+ switch (amdgpu_ip_version(adev, LSDMA_HWIP, 0)) {
+ case IP_VERSION(6, 0, 0):
+ case IP_VERSION(6, 0, 2):
+ adev->lsdma.funcs->update_memory_power_gating(adev,
+ state == AMD_PG_STATE_GATE);
+ break;
+ default:
+ break;
+ }
+
+ return 0;
+}
+
+static void soc21_common_get_clockgating_state(struct amdgpu_ip_block *ip_block, u64 *flags)
+{
+ struct amdgpu_device *adev = ip_block->adev;
+
+ adev->nbio.funcs->get_clockgating_state(adev, flags);
+
+ adev->hdp.funcs->get_clock_gating_state(adev, flags);
+}
+
+static const struct amd_ip_funcs soc21_common_ip_funcs = {
+ .name = "soc21_common",
+ .early_init = soc21_common_early_init,
+ .late_init = soc21_common_late_init,
+ .sw_init = soc21_common_sw_init,
+ .hw_init = soc21_common_hw_init,
+ .hw_fini = soc21_common_hw_fini,
+ .suspend = soc21_common_suspend,
+ .resume = soc21_common_resume,
+ .is_idle = soc21_common_is_idle,
+ .set_clockgating_state = soc21_common_set_clockgating_state,
+ .set_powergating_state = soc21_common_set_powergating_state,
+ .get_clockgating_state = soc21_common_get_clockgating_state,
+};
diff --git a/drivers/gpu/drm/amd/amdgpu/soc21.h b/drivers/gpu/drm/amd/amdgpu/soc21.h
new file mode 100644
index 000000000000..4c8067af1b65
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/soc21.h
@@ -0,0 +1,30 @@
+/*
+ * Copyright 2021 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+#ifndef __SOC21_H__
+#define __SOC21_H__
+
+extern const struct amdgpu_ip_block_version soc21_common_ip_block;
+
+void soc21_grbm_select(struct amdgpu_device *adev,
+ u32 me, u32 pipe, u32 queue, u32 vmid);
+#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/soc24.c b/drivers/gpu/drm/amd/amdgpu/soc24.c
new file mode 100644
index 000000000000..972b449ab89f
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/soc24.c
@@ -0,0 +1,601 @@
+/*
+ * Copyright 2023 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+#include <linux/firmware.h>
+#include <linux/slab.h>
+#include <linux/module.h>
+#include <linux/pci.h>
+
+#include "amdgpu.h"
+#include "amdgpu_ih.h"
+#include "amdgpu_uvd.h"
+#include "amdgpu_vce.h"
+#include "amdgpu_ucode.h"
+#include "amdgpu_psp.h"
+#include "amdgpu_smu.h"
+#include "atom.h"
+#include "amd_pcie.h"
+
+#include "gc/gc_12_0_0_offset.h"
+#include "gc/gc_12_0_0_sh_mask.h"
+#include "mp/mp_14_0_2_offset.h"
+
+#include "soc15.h"
+#include "soc15_common.h"
+#include "soc24.h"
+#include "mxgpu_nv.h"
+
+static const struct amd_ip_funcs soc24_common_ip_funcs;
+
+static const struct amdgpu_video_codec_info vcn_5_0_0_video_codecs_encode_array_vcn0[] = {
+ {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4_AVC, 4096, 4096, 0)},
+ {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_HEVC, 8192, 4352, 0)},
+ {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_AV1, 8192, 4352, 0)},
+};
+
+static const struct amdgpu_video_codecs vcn_5_0_0_video_codecs_encode_vcn0 = {
+ .codec_count = ARRAY_SIZE(vcn_5_0_0_video_codecs_encode_array_vcn0),
+ .codec_array = vcn_5_0_0_video_codecs_encode_array_vcn0,
+};
+
+static const struct amdgpu_video_codec_info vcn_5_0_0_video_codecs_decode_array_vcn0[] = {
+ {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4_AVC, 4096, 4096, 52)},
+ {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_HEVC, 8192, 4352, 186)},
+ {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_JPEG, 16384, 16384, 0)},
+ {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_VP9, 8192, 4352, 0)},
+ {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_AV1, 8192, 4352, 0)},
+};
+
+static const struct amdgpu_video_codecs vcn_5_0_0_video_codecs_decode_vcn0 = {
+ .codec_count = ARRAY_SIZE(vcn_5_0_0_video_codecs_decode_array_vcn0),
+ .codec_array = vcn_5_0_0_video_codecs_decode_array_vcn0,
+};
+
+static int soc24_query_video_codecs(struct amdgpu_device *adev, bool encode,
+ const struct amdgpu_video_codecs **codecs)
+{
+ if (adev->vcn.num_vcn_inst == hweight8(adev->vcn.harvest_config))
+ return -EINVAL;
+
+ switch (amdgpu_ip_version(adev, UVD_HWIP, 0)) {
+ case IP_VERSION(5, 0, 0):
+ if (encode)
+ *codecs = &vcn_5_0_0_video_codecs_encode_vcn0;
+ else
+ *codecs = &vcn_5_0_0_video_codecs_decode_vcn0;
+ return 0;
+ default:
+ return -EINVAL;
+ }
+}
+
+static u32 soc24_get_config_memsize(struct amdgpu_device *adev)
+{
+ return adev->nbio.funcs->get_memsize(adev);
+}
+
+static u32 soc24_get_xclk(struct amdgpu_device *adev)
+{
+ return adev->clock.spll.reference_freq;
+}
+
+void soc24_grbm_select(struct amdgpu_device *adev,
+ u32 me, u32 pipe, u32 queue, u32 vmid)
+{
+ u32 grbm_gfx_cntl = 0;
+ grbm_gfx_cntl = REG_SET_FIELD(grbm_gfx_cntl, GRBM_GFX_CNTL, PIPEID, pipe);
+ grbm_gfx_cntl = REG_SET_FIELD(grbm_gfx_cntl, GRBM_GFX_CNTL, MEID, me);
+ grbm_gfx_cntl = REG_SET_FIELD(grbm_gfx_cntl, GRBM_GFX_CNTL, VMID, vmid);
+ grbm_gfx_cntl = REG_SET_FIELD(grbm_gfx_cntl, GRBM_GFX_CNTL, QUEUEID, queue);
+
+ WREG32_SOC15(GC, 0, regGRBM_GFX_CNTL, grbm_gfx_cntl);
+}
+
+static struct soc15_allowed_register_entry soc24_allowed_read_registers[] = {
+ { SOC15_REG_ENTRY(GC, 0, regGRBM_STATUS)},
+ { SOC15_REG_ENTRY(GC, 0, regGRBM_STATUS2)},
+ { SOC15_REG_ENTRY(GC, 0, regGRBM_STATUS_SE0)},
+ { SOC15_REG_ENTRY(GC, 0, regGRBM_STATUS_SE1)},
+ { SOC15_REG_ENTRY(GC, 0, regGRBM_STATUS_SE2)},
+ { SOC15_REG_ENTRY(GC, 0, regGRBM_STATUS_SE3)},
+ { SOC15_REG_ENTRY(SDMA0, 0, regSDMA0_STATUS_REG)},
+ { SOC15_REG_ENTRY(SDMA1, 0, regSDMA1_STATUS_REG)},
+ { SOC15_REG_ENTRY(GC, 0, regCP_STAT)},
+ { SOC15_REG_ENTRY(GC, 0, regCP_STALLED_STAT1)},
+ { SOC15_REG_ENTRY(GC, 0, regCP_STALLED_STAT2)},
+ { SOC15_REG_ENTRY(GC, 0, regCP_STALLED_STAT3)},
+ { SOC15_REG_ENTRY(GC, 0, regCP_CPF_BUSY_STAT)},
+ { SOC15_REG_ENTRY(GC, 0, regCP_CPF_STALLED_STAT1)},
+ { SOC15_REG_ENTRY(GC, 0, regCP_CPF_STATUS)},
+ { SOC15_REG_ENTRY(GC, 0, regCP_CPC_BUSY_STAT)},
+ { SOC15_REG_ENTRY(GC, 0, regCP_CPC_STALLED_STAT1)},
+ { SOC15_REG_ENTRY(GC, 0, regCP_CPC_STATUS)},
+ { SOC15_REG_ENTRY(GC, 0, regGB_ADDR_CONFIG)},
+};
+
+static uint32_t soc24_read_indexed_register(struct amdgpu_device *adev,
+ u32 se_num,
+ u32 sh_num,
+ u32 reg_offset)
+{
+ uint32_t val;
+
+ mutex_lock(&adev->grbm_idx_mutex);
+ if (se_num != 0xffffffff || sh_num != 0xffffffff)
+ amdgpu_gfx_select_se_sh(adev, se_num, sh_num, 0xffffffff, 0);
+
+ val = RREG32(reg_offset);
+
+ if (se_num != 0xffffffff || sh_num != 0xffffffff)
+ amdgpu_gfx_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, 0);
+ mutex_unlock(&adev->grbm_idx_mutex);
+ return val;
+}
+
+static uint32_t soc24_get_register_value(struct amdgpu_device *adev,
+ bool indexed, u32 se_num,
+ u32 sh_num, u32 reg_offset)
+{
+ if (indexed) {
+ return soc24_read_indexed_register(adev, se_num, sh_num, reg_offset);
+ } else {
+ if (reg_offset == SOC15_REG_OFFSET(GC, 0, regGB_ADDR_CONFIG) &&
+ adev->gfx.config.gb_addr_config)
+ return adev->gfx.config.gb_addr_config;
+ return RREG32(reg_offset);
+ }
+}
+
+static int soc24_read_register(struct amdgpu_device *adev, u32 se_num,
+ u32 sh_num, u32 reg_offset, u32 *value)
+{
+ uint32_t i;
+ struct soc15_allowed_register_entry *en;
+
+ *value = 0;
+ for (i = 0; i < ARRAY_SIZE(soc24_allowed_read_registers); i++) {
+ en = &soc24_allowed_read_registers[i];
+ if (!adev->reg_offset[en->hwip][en->inst])
+ continue;
+ else if (reg_offset != (adev->reg_offset[en->hwip][en->inst][en->seg]
+ + en->reg_offset))
+ continue;
+
+ *value = soc24_get_register_value(adev,
+ soc24_allowed_read_registers[i].grbm_indexed,
+ se_num, sh_num, reg_offset);
+ return 0;
+ }
+ return -EINVAL;
+}
+
+static enum amd_reset_method
+soc24_asic_reset_method(struct amdgpu_device *adev)
+{
+ if (amdgpu_reset_method == AMD_RESET_METHOD_MODE1 ||
+ amdgpu_reset_method == AMD_RESET_METHOD_MODE2 ||
+ amdgpu_reset_method == AMD_RESET_METHOD_BACO)
+ return amdgpu_reset_method;
+
+ if (amdgpu_reset_method != -1)
+ dev_warn(adev->dev,
+ "Specified reset method:%d isn't supported, using AUTO instead.\n",
+ amdgpu_reset_method);
+
+ switch (amdgpu_ip_version(adev, MP1_HWIP, 0)) {
+ case IP_VERSION(14, 0, 2):
+ case IP_VERSION(14, 0, 3):
+ return AMD_RESET_METHOD_MODE1;
+ default:
+ if (amdgpu_dpm_is_baco_supported(adev))
+ return AMD_RESET_METHOD_BACO;
+ else
+ return AMD_RESET_METHOD_MODE1;
+ }
+}
+
+static int soc24_asic_reset(struct amdgpu_device *adev)
+{
+ int ret = 0;
+
+ switch (soc24_asic_reset_method(adev)) {
+ case AMD_RESET_METHOD_PCI:
+ dev_info(adev->dev, "PCI reset\n");
+ ret = amdgpu_device_pci_reset(adev);
+ break;
+ case AMD_RESET_METHOD_BACO:
+ dev_info(adev->dev, "BACO reset\n");
+ ret = amdgpu_dpm_baco_reset(adev);
+ break;
+ case AMD_RESET_METHOD_MODE2:
+ dev_info(adev->dev, "MODE2 reset\n");
+ ret = amdgpu_dpm_mode2_reset(adev);
+ break;
+ default:
+ dev_info(adev->dev, "MODE1 reset\n");
+ ret = amdgpu_device_mode1_reset(adev);
+ break;
+ }
+
+ return ret;
+}
+
+static void soc24_program_aspm(struct amdgpu_device *adev)
+{
+ if (!amdgpu_device_should_use_aspm(adev))
+ return;
+
+ if (!(adev->flags & AMD_IS_APU) &&
+ (adev->nbio.funcs->program_aspm))
+ adev->nbio.funcs->program_aspm(adev);
+}
+
+const struct amdgpu_ip_block_version soc24_common_ip_block = {
+ .type = AMD_IP_BLOCK_TYPE_COMMON,
+ .major = 1,
+ .minor = 0,
+ .rev = 0,
+ .funcs = &soc24_common_ip_funcs,
+};
+
+static bool soc24_need_full_reset(struct amdgpu_device *adev)
+{
+ switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
+ case IP_VERSION(12, 0, 0):
+ case IP_VERSION(12, 0, 1):
+ default:
+ return true;
+ }
+}
+
+static bool soc24_need_reset_on_init(struct amdgpu_device *adev)
+{
+ u32 sol_reg;
+
+ if (adev->flags & AMD_IS_APU)
+ return false;
+
+ /* Check sOS sign of life register to confirm sys driver and sOS
+ * are already been loaded.
+ */
+ sol_reg = RREG32_SOC15(MP0, 0, regMPASP_SMN_C2PMSG_81);
+ if (sol_reg)
+ return true;
+
+ return false;
+}
+
+static uint64_t soc24_get_pcie_replay_count(struct amdgpu_device *adev)
+{
+ /* TODO
+ * dummy implement for pcie_replay_count sysfs interface
+ * */
+ return 0;
+}
+
+static void soc24_init_doorbell_index(struct amdgpu_device *adev)
+{
+ adev->doorbell_index.kiq = AMDGPU_NAVI10_DOORBELL_KIQ;
+ adev->doorbell_index.mec_ring0 = AMDGPU_NAVI10_DOORBELL_MEC_RING0;
+ adev->doorbell_index.mec_ring1 = AMDGPU_NAVI10_DOORBELL_MEC_RING1;
+ adev->doorbell_index.mec_ring2 = AMDGPU_NAVI10_DOORBELL_MEC_RING2;
+ adev->doorbell_index.mec_ring3 = AMDGPU_NAVI10_DOORBELL_MEC_RING3;
+ adev->doorbell_index.mec_ring4 = AMDGPU_NAVI10_DOORBELL_MEC_RING4;
+ adev->doorbell_index.mec_ring5 = AMDGPU_NAVI10_DOORBELL_MEC_RING5;
+ adev->doorbell_index.mec_ring6 = AMDGPU_NAVI10_DOORBELL_MEC_RING6;
+ adev->doorbell_index.mec_ring7 = AMDGPU_NAVI10_DOORBELL_MEC_RING7;
+ adev->doorbell_index.userqueue_start = AMDGPU_NAVI10_DOORBELL_USERQUEUE_START;
+ adev->doorbell_index.userqueue_end = AMDGPU_NAVI10_DOORBELL_USERQUEUE_END;
+ adev->doorbell_index.gfx_ring0 = AMDGPU_NAVI10_DOORBELL_GFX_RING0;
+ adev->doorbell_index.gfx_ring1 = AMDGPU_NAVI10_DOORBELL_GFX_RING1;
+ adev->doorbell_index.gfx_userqueue_start =
+ AMDGPU_NAVI10_DOORBELL_GFX_USERQUEUE_START;
+ adev->doorbell_index.gfx_userqueue_end =
+ AMDGPU_NAVI10_DOORBELL_GFX_USERQUEUE_END;
+ adev->doorbell_index.mes_ring0 = AMDGPU_NAVI10_DOORBELL_MES_RING0;
+ adev->doorbell_index.mes_ring1 = AMDGPU_NAVI10_DOORBELL_MES_RING1;
+ adev->doorbell_index.sdma_engine[0] = AMDGPU_NAVI10_DOORBELL_sDMA_ENGINE0;
+ adev->doorbell_index.sdma_engine[1] = AMDGPU_NAVI10_DOORBELL_sDMA_ENGINE1;
+ adev->doorbell_index.ih = AMDGPU_NAVI10_DOORBELL_IH;
+ adev->doorbell_index.vcn.vcn_ring0_1 = AMDGPU_NAVI10_DOORBELL64_VCN0_1;
+ adev->doorbell_index.vcn.vcn_ring2_3 = AMDGPU_NAVI10_DOORBELL64_VCN2_3;
+ adev->doorbell_index.vcn.vcn_ring4_5 = AMDGPU_NAVI10_DOORBELL64_VCN4_5;
+ adev->doorbell_index.vcn.vcn_ring6_7 = AMDGPU_NAVI10_DOORBELL64_VCN6_7;
+ adev->doorbell_index.first_non_cp = AMDGPU_NAVI10_DOORBELL64_FIRST_NON_CP;
+ adev->doorbell_index.last_non_cp = AMDGPU_NAVI10_DOORBELL64_LAST_NON_CP;
+
+ adev->doorbell_index.max_assignment = AMDGPU_NAVI10_DOORBELL_MAX_ASSIGNMENT << 1;
+ adev->doorbell_index.sdma_doorbell_range = 20;
+}
+
+static void soc24_pre_asic_init(struct amdgpu_device *adev)
+{
+}
+
+static int soc24_update_umd_stable_pstate(struct amdgpu_device *adev,
+ bool enter)
+{
+ if (enter)
+ amdgpu_gfx_rlc_enter_safe_mode(adev, 0);
+ else
+ amdgpu_gfx_rlc_exit_safe_mode(adev, 0);
+
+ if (adev->gfx.funcs->update_perfmon_mgcg)
+ adev->gfx.funcs->update_perfmon_mgcg(adev, !enter);
+
+ return 0;
+}
+
+static const struct amdgpu_asic_funcs soc24_asic_funcs = {
+ .read_bios_from_rom = &amdgpu_soc15_read_bios_from_rom,
+ .read_register = &soc24_read_register,
+ .reset = &soc24_asic_reset,
+ .reset_method = &soc24_asic_reset_method,
+ .get_xclk = &soc24_get_xclk,
+ .get_config_memsize = &soc24_get_config_memsize,
+ .init_doorbell_index = &soc24_init_doorbell_index,
+ .need_full_reset = &soc24_need_full_reset,
+ .need_reset_on_init = &soc24_need_reset_on_init,
+ .get_pcie_replay_count = &soc24_get_pcie_replay_count,
+ .supports_baco = &amdgpu_dpm_is_baco_supported,
+ .pre_asic_init = &soc24_pre_asic_init,
+ .query_video_codecs = &soc24_query_video_codecs,
+ .update_umd_stable_pstate = &soc24_update_umd_stable_pstate,
+};
+
+static int soc24_common_early_init(struct amdgpu_ip_block *ip_block)
+{
+ struct amdgpu_device *adev = ip_block->adev;
+
+ adev->nbio.funcs->set_reg_remap(adev);
+ adev->smc_rreg = NULL;
+ adev->smc_wreg = NULL;
+ adev->pcie_rreg = &amdgpu_device_indirect_rreg;
+ adev->pcie_wreg = &amdgpu_device_indirect_wreg;
+ adev->pcie_rreg64 = &amdgpu_device_indirect_rreg64;
+ adev->pcie_wreg64 = &amdgpu_device_indirect_wreg64;
+ adev->pciep_rreg = amdgpu_device_pcie_port_rreg;
+ adev->pciep_wreg = amdgpu_device_pcie_port_wreg;
+ adev->uvd_ctx_rreg = NULL;
+ adev->uvd_ctx_wreg = NULL;
+ adev->didt_rreg = NULL;
+ adev->didt_wreg = NULL;
+
+ adev->asic_funcs = &soc24_asic_funcs;
+
+ adev->rev_id = amdgpu_device_get_rev_id(adev);
+ adev->external_rev_id = 0xff;
+
+ switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
+ case IP_VERSION(12, 0, 0):
+ adev->cg_flags = AMD_CG_SUPPORT_GFX_CGCG |
+ AMD_CG_SUPPORT_GFX_CGLS |
+ AMD_CG_SUPPORT_GFX_MGCG |
+ AMD_CG_SUPPORT_GFX_3D_CGCG |
+ AMD_CG_SUPPORT_GFX_3D_CGLS |
+ AMD_CG_SUPPORT_REPEATER_FGCG |
+ AMD_CG_SUPPORT_GFX_FGCG |
+ AMD_CG_SUPPORT_GFX_PERF_CLK |
+ AMD_CG_SUPPORT_ATHUB_MGCG |
+ AMD_CG_SUPPORT_ATHUB_LS |
+ AMD_CG_SUPPORT_MC_MGCG |
+ AMD_CG_SUPPORT_HDP_SD |
+ AMD_CG_SUPPORT_MC_LS;
+ adev->pg_flags = AMD_PG_SUPPORT_VCN |
+ AMD_PG_SUPPORT_JPEG |
+ AMD_PG_SUPPORT_VCN_DPG;
+ adev->external_rev_id = adev->rev_id + 0x40;
+ break;
+ case IP_VERSION(12, 0, 1):
+ adev->cg_flags = AMD_CG_SUPPORT_GFX_CGCG |
+ AMD_CG_SUPPORT_GFX_CGLS |
+ AMD_CG_SUPPORT_GFX_MGCG |
+ AMD_CG_SUPPORT_GFX_3D_CGCG |
+ AMD_CG_SUPPORT_GFX_3D_CGLS |
+ AMD_CG_SUPPORT_REPEATER_FGCG |
+ AMD_CG_SUPPORT_GFX_FGCG |
+ AMD_CG_SUPPORT_GFX_PERF_CLK |
+ AMD_CG_SUPPORT_ATHUB_MGCG |
+ AMD_CG_SUPPORT_ATHUB_LS |
+ AMD_CG_SUPPORT_MC_MGCG |
+ AMD_CG_SUPPORT_HDP_SD |
+ AMD_CG_SUPPORT_MC_LS;
+
+ adev->pg_flags = AMD_PG_SUPPORT_VCN |
+ AMD_PG_SUPPORT_JPEG |
+ AMD_PG_SUPPORT_JPEG_DPG |
+ AMD_PG_SUPPORT_VCN_DPG;
+ adev->external_rev_id = adev->rev_id + 0x50;
+ break;
+ default:
+ /* FIXME: not supported yet */
+ return -EINVAL;
+ }
+
+ if (amdgpu_sriov_vf(adev)) {
+ amdgpu_virt_init_setting(adev);
+ xgpu_nv_mailbox_set_irq_funcs(adev);
+ }
+
+ return 0;
+}
+
+static int soc24_common_late_init(struct amdgpu_ip_block *ip_block)
+{
+ struct amdgpu_device *adev = ip_block->adev;
+
+ if (amdgpu_sriov_vf(adev)) {
+ xgpu_nv_mailbox_get_irq(adev);
+ } else {
+ if (adev->nbio.ras &&
+ adev->nbio.ras_err_event_athub_irq.funcs)
+ /* don't need to fail gpu late init
+ * if enabling athub_err_event interrupt failed
+ * nbif v6_3_1 only support fatal error hanlding
+ * just enable the interrupt directly
+ */
+ amdgpu_irq_get(adev, &adev->nbio.ras_err_event_athub_irq, 0);
+ }
+
+ /* Enable selfring doorbell aperture late because doorbell BAR
+ * aperture will change if resize BAR successfully in gmc sw_init.
+ */
+ adev->nbio.funcs->enable_doorbell_selfring_aperture(adev, true);
+
+ return 0;
+}
+
+static int soc24_common_sw_init(struct amdgpu_ip_block *ip_block)
+{
+ struct amdgpu_device *adev = ip_block->adev;
+
+ if (amdgpu_sriov_vf(adev))
+ xgpu_nv_mailbox_add_irq_id(adev);
+
+ return 0;
+}
+
+static int soc24_common_hw_init(struct amdgpu_ip_block *ip_block)
+{
+ struct amdgpu_device *adev = ip_block->adev;
+
+ /* enable aspm */
+ soc24_program_aspm(adev);
+ /* setup nbio registers */
+ adev->nbio.funcs->init_registers(adev);
+ /* remap HDP registers to a hole in mmio space,
+ * for the purpose of expose those registers
+ * to process space
+ */
+ if (adev->nbio.funcs->remap_hdp_registers)
+ adev->nbio.funcs->remap_hdp_registers(adev);
+
+ if (adev->df.funcs->hw_init)
+ adev->df.funcs->hw_init(adev);
+
+ /* enable the doorbell aperture */
+ adev->nbio.funcs->enable_doorbell_aperture(adev, true);
+
+ return 0;
+}
+
+static int soc24_common_hw_fini(struct amdgpu_ip_block *ip_block)
+{
+ struct amdgpu_device *adev = ip_block->adev;
+
+ /* Disable the doorbell aperture and selfring doorbell aperture
+ * separately in hw_fini because soc21_enable_doorbell_aperture
+ * has been removed and there is no need to delay disabling
+ * selfring doorbell.
+ */
+ adev->nbio.funcs->enable_doorbell_aperture(adev, false);
+ adev->nbio.funcs->enable_doorbell_selfring_aperture(adev, false);
+
+ if (amdgpu_sriov_vf(adev)) {
+ xgpu_nv_mailbox_put_irq(adev);
+ } else {
+ if (adev->nbio.ras &&
+ adev->nbio.ras_err_event_athub_irq.funcs)
+ amdgpu_irq_put(adev, &adev->nbio.ras_err_event_athub_irq, 0);
+ }
+
+ return 0;
+}
+
+static int soc24_common_suspend(struct amdgpu_ip_block *ip_block)
+{
+ return soc24_common_hw_fini(ip_block);
+}
+
+static int soc24_common_resume(struct amdgpu_ip_block *ip_block)
+{
+ return soc24_common_hw_init(ip_block);
+}
+
+static bool soc24_common_is_idle(struct amdgpu_ip_block *ip_block)
+{
+ return true;
+}
+
+static int soc24_common_set_clockgating_state(struct amdgpu_ip_block *ip_block,
+ enum amd_clockgating_state state)
+{
+ struct amdgpu_device *adev = ip_block->adev;
+
+ switch (amdgpu_ip_version(adev, NBIO_HWIP, 0)) {
+ case IP_VERSION(6, 3, 1):
+ adev->nbio.funcs->update_medium_grain_clock_gating(adev,
+ state == AMD_CG_STATE_GATE);
+ adev->nbio.funcs->update_medium_grain_light_sleep(adev,
+ state == AMD_CG_STATE_GATE);
+ adev->hdp.funcs->update_clock_gating(adev,
+ state == AMD_CG_STATE_GATE);
+ break;
+ default:
+ break;
+ }
+ return 0;
+}
+
+static int soc24_common_set_powergating_state(struct amdgpu_ip_block *ip_block,
+ enum amd_powergating_state state)
+{
+ struct amdgpu_device *adev = ip_block->adev;
+
+ switch (amdgpu_ip_version(adev, LSDMA_HWIP, 0)) {
+ case IP_VERSION(7, 0, 0):
+ case IP_VERSION(7, 0, 1):
+ adev->lsdma.funcs->update_memory_power_gating(adev,
+ state == AMD_PG_STATE_GATE);
+ break;
+ default:
+ break;
+ }
+
+ return 0;
+}
+
+static void soc24_common_get_clockgating_state(struct amdgpu_ip_block *ip_block, u64 *flags)
+{
+ struct amdgpu_device *adev = ip_block->adev;
+
+ adev->nbio.funcs->get_clockgating_state(adev, flags);
+
+ adev->hdp.funcs->get_clock_gating_state(adev, flags);
+
+ return;
+}
+
+static const struct amd_ip_funcs soc24_common_ip_funcs = {
+ .name = "soc24_common",
+ .early_init = soc24_common_early_init,
+ .late_init = soc24_common_late_init,
+ .sw_init = soc24_common_sw_init,
+ .hw_init = soc24_common_hw_init,
+ .hw_fini = soc24_common_hw_fini,
+ .suspend = soc24_common_suspend,
+ .resume = soc24_common_resume,
+ .is_idle = soc24_common_is_idle,
+ .set_clockgating_state = soc24_common_set_clockgating_state,
+ .set_powergating_state = soc24_common_set_powergating_state,
+ .get_clockgating_state = soc24_common_get_clockgating_state,
+};
diff --git a/drivers/gpu/drm/amd/amdgpu/soc24.h b/drivers/gpu/drm/amd/amdgpu/soc24.h
new file mode 100644
index 000000000000..fa7e442e0b62
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/soc24.h
@@ -0,0 +1,30 @@
+/*
+ * Copyright 2023 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+#ifndef __SOC24_H__
+#define __SOC24_H__
+
+extern const struct amdgpu_ip_block_version soc24_common_ip_block;
+
+void soc24_grbm_select(struct amdgpu_device *adev,
+ u32 me, u32 pipe, u32 queue, u32 vmid);
+#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/ta_ras_if.h b/drivers/gpu/drm/amd/amdgpu/ta_ras_if.h
index 745ed0fba1ed..8a3f326474e5 100644
--- a/drivers/gpu/drm/amd/amdgpu/ta_ras_if.h
+++ b/drivers/gpu/drm/amd/amdgpu/ta_ras_if.h
@@ -30,17 +30,22 @@
#define RSP_ID_MASK (1U << 31)
#define RSP_ID(cmdId) (((uint32_t)(cmdId)) | RSP_ID_MASK)
+/* invalid node instance value */
+#define TA_RAS_INV_NODE 0xffff
+
/* RAS related enumerations */
/**********************************************************/
enum ras_command {
TA_RAS_COMMAND__ENABLE_FEATURES = 0,
TA_RAS_COMMAND__DISABLE_FEATURES,
TA_RAS_COMMAND__TRIGGER_ERROR,
+ TA_RAS_COMMAND__QUERY_BLOCK_INFO,
+ TA_RAS_COMMAND__QUERY_SUB_BLOCK_INFO,
+ TA_RAS_COMMAND__QUERY_ADDRESS,
};
-enum ta_ras_status
-{
- TA_RAS_STATUS__SUCCESS = 0x00,
+enum ta_ras_status {
+ TA_RAS_STATUS__SUCCESS = 0x0000,
TA_RAS_STATUS__RESET_NEEDED = 0xA001,
TA_RAS_STATUS__ERROR_INVALID_PARAMETER = 0xA002,
TA_RAS_STATUS__ERROR_RAS_NOT_AVAILABLE = 0xA003,
@@ -55,7 +60,18 @@ enum ta_ras_status
TA_RAS_STATUS__ERROR_GET_DEV_INFO = 0xA00C,
TA_RAS_STATUS__ERROR_UNSUPPORTED_DEV = 0xA00D,
TA_RAS_STATUS__ERROR_NOT_INITIALIZED = 0xA00E,
- TA_RAS_STATUS__ERROR_TEE_INTERNAL = 0xA00F
+ TA_RAS_STATUS__ERROR_TEE_INTERNAL = 0xA00F,
+ TA_RAS_STATUS__ERROR_UNSUPPORTED_FUNCTION = 0xA010,
+ TA_RAS_STATUS__ERROR_SYS_DRV_REG_ACCESS = 0xA011,
+ TA_RAS_STATUS__ERROR_RAS_READ_WRITE = 0xA012,
+ TA_RAS_STATUS__ERROR_NULL_PTR = 0xA013,
+ TA_RAS_STATUS__ERROR_UNSUPPORTED_IP = 0xA014,
+ TA_RAS_STATUS__ERROR_PCS_STATE_QUIET = 0xA015,
+ TA_RAS_STATUS__ERROR_PCS_STATE_ERROR = 0xA016,
+ TA_RAS_STATUS__ERROR_PCS_STATE_HANG = 0xA017,
+ TA_RAS_STATUS__ERROR_PCS_STATE_UNKNOWN = 0xA018,
+ TA_RAS_STATUS__ERROR_UNSUPPORTED_ERROR_INJ = 0xA019,
+ TA_RAS_STATUS__TEE_ERROR_ACCESS_DENIED = 0xA01A
};
enum ta_ras_block {
@@ -73,9 +89,23 @@ enum ta_ras_block {
TA_RAS_BLOCK__MP0,
TA_RAS_BLOCK__MP1,
TA_RAS_BLOCK__FUSE,
+ TA_RAS_BLOCK__MCA,
+ TA_RAS_BLOCK__VCN,
+ TA_RAS_BLOCK__JPEG,
+ TA_RAS_BLOCK__IH,
+ TA_RAS_BLOCK__MPIO,
+ TA_RAS_BLOCK__MMSCH,
TA_NUM_BLOCK_MAX
};
+enum ta_ras_mca_block {
+ TA_RAS_MCA_BLOCK__MP0 = 0,
+ TA_RAS_MCA_BLOCK__MP1 = 1,
+ TA_RAS_MCA_BLOCK__MPIO = 2,
+ TA_RAS_MCA_BLOCK__IOHC = 3,
+ TA_MCA_NUM_BLOCK_MAX
+};
+
enum ta_ras_error_type {
TA_RAS_ERROR__NONE = 0,
TA_RAS_ERROR__PARITY = 1,
@@ -84,6 +114,19 @@ enum ta_ras_error_type {
TA_RAS_ERROR__POISON = 8,
};
+enum ta_ras_address_type {
+ TA_RAS_MCA_TO_PA,
+ TA_RAS_PA_TO_MCA,
+};
+
+enum ta_ras_nps_mode {
+ TA_RAS_UNKNOWN_MODE = 0,
+ TA_RAS_NPS1_MODE = 1,
+ TA_RAS_NPS2_MODE = 2,
+ TA_RAS_NPS4_MODE = 4,
+ TA_RAS_NPS8_MODE = 8,
+};
+
/* Input/output structures for RAS commands */
/**********************************************************/
@@ -105,28 +148,65 @@ struct ta_ras_trigger_error_input {
uint64_t value; // method if error injection. i.e persistent, coherent etc.
};
-struct ta_ras_output_flags
-{
- uint8_t ras_init_success_flag;
- uint8_t err_inject_switch_disable_flag;
- uint8_t reg_access_failure_flag;
+struct ta_ras_init_flags {
+ uint8_t poison_mode_en;
+ uint8_t dgpu_mode;
+ uint16_t xcc_mask;
+ uint8_t channel_dis_num;
+ uint8_t nps_mode;
+ uint32_t active_umc_mask;
+};
+
+struct ta_ras_mca_addr {
+ uint64_t err_addr;
+ uint32_t ch_inst;
+ uint32_t umc_inst;
+ uint32_t node_inst;
+ uint32_t socket_id;
+};
+
+struct ta_ras_phy_addr {
+ uint64_t pa;
+ uint32_t bank;
+ uint32_t channel_idx;
+};
+
+struct ta_ras_query_address_input {
+ enum ta_ras_address_type addr_type;
+ struct ta_ras_mca_addr ma;
+ struct ta_ras_phy_addr pa;
+};
+
+struct ta_ras_output_flags {
+ uint8_t ras_init_success_flag;
+ uint8_t err_inject_switch_disable_flag;
+ uint8_t reg_access_failure_flag;
+};
+
+struct ta_ras_query_address_output {
+ /* don't use the flags here */
+ struct ta_ras_output_flags flags;
+ struct ta_ras_mca_addr ma;
+ struct ta_ras_phy_addr pa;
};
/* Common input structure for RAS callbacks */
/**********************************************************/
union ta_ras_cmd_input {
+ struct ta_ras_init_flags init_flags;
struct ta_ras_enable_features_input enable_features;
struct ta_ras_disable_features_input disable_features;
struct ta_ras_trigger_error_input trigger_error;
+ struct ta_ras_query_address_input address;
- uint32_t reserve_pad[256];
+ uint32_t reserve_pad[256];
};
-union ta_ras_cmd_output
-{
- struct ta_ras_output_flags flags;
+union ta_ras_cmd_output {
+ struct ta_ras_output_flags flags;
+ struct ta_ras_query_address_output address;
- uint32_t reserve_pad[256];
+ uint32_t reserve_pad[256];
};
/* Shared Memory structures */
diff --git a/drivers/gpu/drm/amd/amdgpu/ta_secureDisplay_if.h b/drivers/gpu/drm/amd/amdgpu/ta_secureDisplay_if.h
index 5039375bb1d4..9ec2e03d41c7 100644
--- a/drivers/gpu/drm/amd/amdgpu/ta_secureDisplay_if.h
+++ b/drivers/gpu/drm/amd/amdgpu/ta_secureDisplay_if.h
@@ -31,10 +31,12 @@
* Secure Display Command ID
*/
enum ta_securedisplay_command {
- /* Query whether TA is responding used only for validation purpose */
+ /* Query whether TA is responding. It is used only for validation purpose */
TA_SECUREDISPLAY_COMMAND__QUERY_TA = 1,
/* Send region of Interest and CRC value to I2C */
TA_SECUREDISPLAY_COMMAND__SEND_ROI_CRC = 2,
+ /* V2 to send multiple regions of Interest and CRC value to I2C */
+ TA_SECUREDISPLAY_COMMAND__SEND_ROI_CRC_V2 = 3,
/* Maximum Command ID */
TA_SECUREDISPLAY_COMMAND__MAX_ID = 0x7FFFFFFF,
};
@@ -50,14 +52,15 @@ enum ta_securedisplay_status {
TA_SECUREDISPLAY_STATUS__I2C_WRITE_ERROR = 0x04, /* Fail to Write to I2C */
TA_SECUREDISPLAY_STATUS__READ_DIO_SCRATCH_ERROR = 0x05, /*Fail Read DIO Scratch Register*/
TA_SECUREDISPLAY_STATUS__READ_CRC_ERROR = 0x06, /* Fail to Read CRC*/
+ TA_SECUREDISPLAY_STATUS__I2C_INIT_ERROR = 0x07, /* Failed to initialize I2C */
TA_SECUREDISPLAY_STATUS__MAX = 0x7FFFFFFF,/* Maximum Value for status*/
};
-/** @enum ta_securedisplay_max_phy
+/** @enum ta_securedisplay_phy_ID
* Physical ID number to use for reading corresponding DIO Scratch register for ROI
*/
-enum ta_securedisplay_max_phy {
+enum ta_securedisplay_phy_ID {
TA_SECUREDISPLAY_PHY0 = 0,
TA_SECUREDISPLAY_PHY1 = 1,
TA_SECUREDISPLAY_PHY2 = 2,
@@ -82,6 +85,8 @@ enum ta_securedisplay_ta_query_cmd_ret {
enum ta_securedisplay_buffer_size {
/* 15 bytes = 8 byte (ROI) + 6 byte(CRC) + 1 byte(phy_id) */
TA_SECUREDISPLAY_I2C_BUFFER_SIZE = 15,
+ /* 16 bytes = 8 byte (ROI) + 6 byte(CRC) + 1 byte(phy_id) + 1 byte(roi_idx) */
+ TA_SECUREDISPLAY_V2_I2C_BUFFER_SIZE = 16,
};
/** Input/output structures for Secure Display commands */
@@ -94,7 +99,15 @@ enum ta_securedisplay_buffer_size {
* Physical ID to determine which DIO scratch register should be used to get ROI
*/
struct ta_securedisplay_send_roi_crc_input {
- uint32_t phy_id; /* Physical ID */
+ /* Physical ID */
+ uint32_t phy_id;
+};
+
+struct ta_securedisplay_send_roi_crc_v2_input {
+ /* Physical ID */
+ uint32_t phy_id;
+ /* Region of interest index */
+ uint8_t roi_idx;
};
/** @union ta_securedisplay_cmd_input
@@ -103,6 +116,8 @@ struct ta_securedisplay_send_roi_crc_input {
union ta_securedisplay_cmd_input {
/* send ROI and CRC input buffer format */
struct ta_securedisplay_send_roi_crc_input send_roi_crc;
+ /* send ROI and CRC input buffer format, v2 adds a ROI index */
+ struct ta_securedisplay_send_roi_crc_v2_input send_roi_crc_v2;
uint32_t reserved[4];
};
@@ -127,6 +142,10 @@ struct ta_securedisplay_send_roi_crc_output {
uint8_t reserved;
};
+struct ta_securedisplay_send_roi_crc_v2_output {
+ uint8_t i2c_buf[TA_SECUREDISPLAY_V2_I2C_BUFFER_SIZE]; /* I2C buffer */
+};
+
/** @union ta_securedisplay_cmd_output
* Output buffer
*/
@@ -135,19 +154,21 @@ union ta_securedisplay_cmd_output {
struct ta_securedisplay_query_ta_output query_ta;
/* Send ROI CRC output buffer format used only for validation purpose */
struct ta_securedisplay_send_roi_crc_output send_roi_crc;
+ /* Send ROI CRC output buffer format used only for validation purpose */
+ struct ta_securedisplay_send_roi_crc_v2_output send_roi_crc_v2;
uint32_t reserved[4];
};
-/** @struct securedisplay_cmd
- * Secure Display Command which is shared buffer memory
- */
-struct securedisplay_cmd {
- uint32_t cmd_id; /* +0 Bytes Command ID */
- enum ta_securedisplay_status status; /* +4 Bytes Status of Secure Display TA */
- uint32_t reserved[2]; /* +8 Bytes Reserved */
- union ta_securedisplay_cmd_input securedisplay_in_message; /* +16 Bytes Input Buffer */
- union ta_securedisplay_cmd_output securedisplay_out_message;/* +32 Bytes Output Buffer */
- /**@note Total 48 Bytes */
+/** @struct ta_securedisplay_cmd
+* Secure display command which is shared buffer memory
+*/
+struct ta_securedisplay_cmd {
+ uint32_t cmd_id; /**< +0 Bytes Command ID */
+ enum ta_securedisplay_status status; /**< +4 Bytes Status code returned by the secure display TA */
+ uint32_t reserved[2]; /**< +8 Bytes Reserved */
+ union ta_securedisplay_cmd_input securedisplay_in_message; /**< +16 Bytes Command input buffer */
+ union ta_securedisplay_cmd_output securedisplay_out_message; /**< +32 Bytes Command output buffer */
+ /**@note Total 48 Bytes */
};
#endif //_TA_SECUREDISPLAY_IF_H
diff --git a/drivers/gpu/drm/amd/amdgpu/ta_xgmi_if.h b/drivers/gpu/drm/amd/amdgpu/ta_xgmi_if.h
index ac2c27b7630c..d5748032674e 100644
--- a/drivers/gpu/drm/amd/amdgpu/ta_xgmi_if.h
+++ b/drivers/gpu/drm/amd/amdgpu/ta_xgmi_if.h
@@ -1,5 +1,5 @@
/*
- * Copyright 2018 Advanced Micro Devices, Inc.
+ * Copyright 2018-2022 Advanced Micro Devices, Inc.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
@@ -20,7 +20,6 @@
* OTHER DEALINGS IN THE SOFTWARE.
*
*/
-
#ifndef _TA_XGMI_IF_H
#define _TA_XGMI_IF_H
@@ -28,19 +27,31 @@
#define RSP_ID_MASK (1U << 31)
#define RSP_ID(cmdId) (((uint32_t)(cmdId)) | RSP_ID_MASK)
+#define EXTEND_PEER_LINK_INFO_CMD_FLAG 1
+
enum ta_command_xgmi {
+ /* Initialize the Context and Session Topology */
TA_COMMAND_XGMI__INITIALIZE = 0x00,
+ /* Gets the current GPU's node ID */
TA_COMMAND_XGMI__GET_NODE_ID = 0x01,
+ /* Gets the current GPU's hive ID */
TA_COMMAND_XGMI__GET_HIVE_ID = 0x02,
- TA_COMMAND_XGMI__GET_GET_TOPOLOGY_INFO = 0x03,
- TA_COMMAND_XGMI__SET_TOPOLOGY_INFO = 0x04
+ /* Gets the Peer's topology Information */
+ TA_COMMAND_XGMI__GET_TOPOLOGY_INFO = 0x03,
+ /* Sets the Peer's topology Information */
+ TA_COMMAND_XGMI__SET_TOPOLOGY_INFO = 0x04,
+ /* Gets the total links between adjacent peer dies in hive */
+ TA_COMMAND_XGMI__GET_PEER_LINKS = 0x0B,
+ /* Gets the total links and connected port numbers between adjacent peer dies in hive */
+ TA_COMMAND_XGMI__GET_EXTEND_PEER_LINKS = 0x0C
};
/* XGMI related enumerations */
/**********************************************************/;
-enum ta_xgmi_connected_nodes {
- TA_XGMI__MAX_CONNECTED_NODES = 64
-};
+enum { TA_XGMI__MAX_CONNECTED_NODES = 64 };
+enum { TA_XGMI__MAX_INTERNAL_STATE = 32 };
+enum { TA_XGMI__MAX_INTERNAL_STATE_BUFFER = 128 };
+enum { TA_XGMI__MAX_PORT_NUM = 8 };
enum ta_xgmi_status {
TA_XGMI_STATUS__SUCCESS = 0x00,
@@ -75,6 +86,23 @@ struct ta_xgmi_node_info {
enum ta_xgmi_assigned_sdma_engine sdma_engine;
};
+struct ta_xgmi_peer_link_info {
+ uint64_t node_id;
+ uint8_t num_links;
+};
+
+struct xgmi_connected_port_num {
+ uint8_t dst_xgmi_port_num;
+ uint8_t src_xgmi_port_num;
+};
+
+/* support both the port num and num_links */
+struct ta_xgmi_extend_peer_link_info {
+ uint64_t node_id;
+ uint8_t num_links;
+ struct xgmi_connected_port_num port_num[TA_XGMI__MAX_PORT_NUM];
+};
+
struct ta_xgmi_cmd_initialize_output {
uint32_t status;
};
@@ -102,6 +130,16 @@ struct ta_xgmi_cmd_set_topology_info_input {
struct ta_xgmi_node_info nodes[TA_XGMI__MAX_CONNECTED_NODES];
};
+/* support XGMI TA w/ and w/o port_num both so two similar structs defined */
+struct ta_xgmi_cmd_get_peer_link_info {
+ uint32_t num_nodes;
+ struct ta_xgmi_peer_link_info nodes[TA_XGMI__MAX_CONNECTED_NODES];
+};
+
+struct ta_xgmi_cmd_get_extend_peer_link_info {
+ uint32_t num_nodes;
+ struct ta_xgmi_extend_peer_link_info nodes[TA_XGMI__MAX_CONNECTED_NODES];
+};
/**********************************************************/
/* Common input structure for XGMI callbacks */
union ta_xgmi_cmd_input {
@@ -115,14 +153,23 @@ union ta_xgmi_cmd_output {
struct ta_xgmi_cmd_get_node_id_output get_node_id;
struct ta_xgmi_cmd_get_hive_id_output get_hive_id;
struct ta_xgmi_cmd_get_topology_info_output get_topology_info;
+ struct ta_xgmi_cmd_get_peer_link_info get_link_info;
+ struct ta_xgmi_cmd_get_extend_peer_link_info get_extend_link_info;
};
-/**********************************************************/
struct ta_xgmi_shared_memory {
uint32_t cmd_id;
uint32_t resp_id;
enum ta_xgmi_status xgmi_status;
- uint32_t reserved;
+
+ /* if the number of xgmi link record is more than 128, driver will set the
+ * flag 0 to get the first 128 of the link records and will set to 1, to get
+ * the second set
+ */
+ uint8_t flag_extend_link_record;
+ /* bit0: port_num info support flag for GET_EXTEND_PEER_LINKS commmand */
+ uint8_t caps_flag;
+ uint8_t reserved[2];
union ta_xgmi_cmd_input xgmi_in_message;
union ta_xgmi_cmd_output xgmi_out_message;
};
diff --git a/drivers/gpu/drm/amd/amdgpu/tonga_ih.c b/drivers/gpu/drm/amd/amdgpu/tonga_ih.c
index 249fcbee7871..ee8038df17e3 100644
--- a/drivers/gpu/drm/amd/amdgpu/tonga_ih.c
+++ b/drivers/gpu/drm/amd/amdgpu/tonga_ih.c
@@ -159,6 +159,9 @@ static int tonga_ih_irq_init(struct amdgpu_device *adev)
/* enable interrupts */
tonga_ih_enable_interrupts(adev);
+ if (adev->irq.ih_soft.ring_size)
+ adev->irq.ih_soft.enabled = true;
+
return 0;
}
@@ -196,6 +199,9 @@ static u32 tonga_ih_get_wptr(struct amdgpu_device *adev,
wptr = le32_to_cpu(*ih->wptr_cpu);
+ if (ih == &adev->irq.ih_soft)
+ goto out;
+
if (!REG_GET_FIELD(wptr, IH_RB_WPTR, RB_OVERFLOW))
goto out;
@@ -219,6 +225,12 @@ static u32 tonga_ih_get_wptr(struct amdgpu_device *adev,
tmp = REG_SET_FIELD(tmp, IH_RB_CNTL, WPTR_OVERFLOW_CLEAR, 1);
WREG32(mmIH_RB_CNTL, tmp);
+ /* Unset the CLEAR_OVERFLOW bit immediately so new overflows
+ * can be detected.
+ */
+ tmp = REG_SET_FIELD(tmp, IH_RB_CNTL, WPTR_OVERFLOW_CLEAR, 0);
+ WREG32(mmIH_RB_CNTL, tmp);
+
out:
return (wptr & ih->ptr_mask);
}
@@ -277,9 +289,9 @@ static void tonga_ih_set_rptr(struct amdgpu_device *adev,
}
}
-static int tonga_ih_early_init(void *handle)
+static int tonga_ih_early_init(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
int ret;
ret = amdgpu_irq_add_domain(adev);
@@ -291,15 +303,19 @@ static int tonga_ih_early_init(void *handle)
return 0;
}
-static int tonga_ih_sw_init(void *handle)
+static int tonga_ih_sw_init(struct amdgpu_ip_block *ip_block)
{
int r;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
r = amdgpu_ih_ring_init(adev, &adev->irq.ih, 64 * 1024, true);
if (r)
return r;
+ r = amdgpu_ih_ring_init(adev, &adev->irq.ih_soft, IH_SW_RING_SIZE, true);
+ if (r)
+ return r;
+
adev->irq.ih.use_doorbell = true;
adev->irq.ih.doorbell_index = adev->doorbell_index.ih;
@@ -308,21 +324,20 @@ static int tonga_ih_sw_init(void *handle)
return r;
}
-static int tonga_ih_sw_fini(void *handle)
+static int tonga_ih_sw_fini(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
- amdgpu_irq_fini(adev);
- amdgpu_ih_ring_fini(adev, &adev->irq.ih);
+ amdgpu_irq_fini_sw(adev);
amdgpu_irq_remove_domain(adev);
return 0;
}
-static int tonga_ih_hw_init(void *handle)
+static int tonga_ih_hw_init(struct amdgpu_ip_block *ip_block)
{
int r;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
r = tonga_ih_irq_init(adev);
if (r)
@@ -331,32 +346,26 @@ static int tonga_ih_hw_init(void *handle)
return 0;
}
-static int tonga_ih_hw_fini(void *handle)
+static int tonga_ih_hw_fini(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
-
- tonga_ih_irq_disable(adev);
+ tonga_ih_irq_disable(ip_block->adev);
return 0;
}
-static int tonga_ih_suspend(void *handle)
+static int tonga_ih_suspend(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
-
- return tonga_ih_hw_fini(adev);
+ return tonga_ih_hw_fini(ip_block);
}
-static int tonga_ih_resume(void *handle)
+static int tonga_ih_resume(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
-
- return tonga_ih_hw_init(adev);
+ return tonga_ih_hw_init(ip_block);
}
-static bool tonga_ih_is_idle(void *handle)
+static bool tonga_ih_is_idle(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
u32 tmp = RREG32(mmSRBM_STATUS);
if (REG_GET_FIELD(tmp, SRBM_STATUS, IH_BUSY))
@@ -365,11 +374,11 @@ static bool tonga_ih_is_idle(void *handle)
return true;
}
-static int tonga_ih_wait_for_idle(void *handle)
+static int tonga_ih_wait_for_idle(struct amdgpu_ip_block *ip_block)
{
unsigned i;
u32 tmp;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
for (i = 0; i < adev->usec_timeout; i++) {
/* read MC_STATUS */
@@ -381,9 +390,9 @@ static int tonga_ih_wait_for_idle(void *handle)
return -ETIMEDOUT;
}
-static bool tonga_ih_check_soft_reset(void *handle)
+static bool tonga_ih_check_soft_reset(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
u32 srbm_soft_reset = 0;
u32 tmp = RREG32(mmSRBM_STATUS);
@@ -400,29 +409,27 @@ static bool tonga_ih_check_soft_reset(void *handle)
}
}
-static int tonga_ih_pre_soft_reset(void *handle)
+static int tonga_ih_pre_soft_reset(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
-
- if (!adev->irq.srbm_soft_reset)
+ if (!ip_block->adev->irq.srbm_soft_reset)
return 0;
- return tonga_ih_hw_fini(adev);
+ return tonga_ih_hw_fini(ip_block);
}
-static int tonga_ih_post_soft_reset(void *handle)
+static int tonga_ih_post_soft_reset(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
if (!adev->irq.srbm_soft_reset)
return 0;
- return tonga_ih_hw_init(adev);
+ return tonga_ih_hw_init(ip_block);
}
-static int tonga_ih_soft_reset(void *handle)
+static int tonga_ih_soft_reset(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
u32 srbm_soft_reset;
if (!adev->irq.srbm_soft_reset)
@@ -451,13 +458,13 @@ static int tonga_ih_soft_reset(void *handle)
return 0;
}
-static int tonga_ih_set_clockgating_state(void *handle,
+static int tonga_ih_set_clockgating_state(struct amdgpu_ip_block *ip_block,
enum amd_clockgating_state state)
{
return 0;
}
-static int tonga_ih_set_powergating_state(void *handle,
+static int tonga_ih_set_powergating_state(struct amdgpu_ip_block *ip_block,
enum amd_powergating_state state)
{
return 0;
@@ -466,7 +473,6 @@ static int tonga_ih_set_powergating_state(void *handle,
static const struct amd_ip_funcs tonga_ih_ip_funcs = {
.name = "tonga_ih",
.early_init = tonga_ih_early_init,
- .late_init = NULL,
.sw_init = tonga_ih_sw_init,
.sw_fini = tonga_ih_sw_fini,
.hw_init = tonga_ih_hw_init,
@@ -494,8 +500,7 @@ static void tonga_ih_set_interrupt_funcs(struct amdgpu_device *adev)
adev->irq.ih_funcs = &tonga_ih_funcs;
}
-const struct amdgpu_ip_block_version tonga_ih_ip_block =
-{
+const struct amdgpu_ip_block_version tonga_ih_ip_block = {
.type = AMD_IP_BLOCK_TYPE_IH,
.major = 3,
.minor = 0,
diff --git a/drivers/gpu/drm/amd/amdgpu/umc_v12_0.c b/drivers/gpu/drm/amd/amdgpu/umc_v12_0.c
new file mode 100644
index 000000000000..0f5b1719fda5
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/umc_v12_0.c
@@ -0,0 +1,742 @@
+/*
+ * Copyright 2023 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+#include "umc_v12_0.h"
+#include "amdgpu_ras.h"
+#include "amdgpu_umc.h"
+#include "amdgpu.h"
+#include "umc/umc_12_0_0_offset.h"
+#include "umc/umc_12_0_0_sh_mask.h"
+#include "mp/mp_13_0_6_sh_mask.h"
+
+#define MAX_ECC_NUM_PER_RETIREMENT 32
+#define DELAYED_TIME_FOR_GPU_RESET 1000 //ms
+
+static inline uint64_t get_umc_v12_0_reg_offset(struct amdgpu_device *adev,
+ uint32_t node_inst,
+ uint32_t umc_inst,
+ uint32_t ch_inst)
+{
+ uint32_t index = umc_inst * adev->umc.channel_inst_num + ch_inst;
+ uint64_t cross_node_offset = (node_inst == 0) ? 0 : UMC_V12_0_CROSS_NODE_OFFSET;
+
+ umc_inst = index / 4;
+ ch_inst = index % 4;
+
+ return adev->umc.channel_offs * ch_inst + UMC_V12_0_INST_DIST * umc_inst +
+ UMC_V12_0_NODE_DIST * node_inst + cross_node_offset;
+}
+
+static int umc_v12_0_reset_error_count_per_channel(struct amdgpu_device *adev,
+ uint32_t node_inst, uint32_t umc_inst,
+ uint32_t ch_inst, void *data)
+{
+ uint64_t odecc_err_cnt_addr;
+ uint64_t umc_reg_offset =
+ get_umc_v12_0_reg_offset(adev, node_inst, umc_inst, ch_inst);
+
+ odecc_err_cnt_addr =
+ SOC15_REG_OFFSET(UMC, 0, regUMCCH0_OdEccErrCnt);
+
+ /* clear error count */
+ WREG32_PCIE_EXT((odecc_err_cnt_addr + umc_reg_offset) * 4,
+ UMC_V12_0_CE_CNT_INIT);
+
+ return 0;
+}
+
+static void umc_v12_0_reset_error_count(struct amdgpu_device *adev)
+{
+ amdgpu_umc_loop_channels(adev,
+ umc_v12_0_reset_error_count_per_channel, NULL);
+}
+
+bool umc_v12_0_is_deferred_error(struct amdgpu_device *adev, uint64_t mc_umc_status)
+{
+ dev_dbg(adev->dev,
+ "MCA_UMC_STATUS(0x%llx): Val:%llu, Poison:%llu, Deferred:%llu, PCC:%llu, UC:%llu, TCC:%llu\n",
+ mc_umc_status,
+ REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, Val),
+ REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, Poison),
+ REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, Deferred),
+ REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, PCC),
+ REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, UC),
+ REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, TCC)
+ );
+
+ return (amdgpu_ras_is_poison_mode_supported(adev) &&
+ (REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, Val) == 1) &&
+ ((REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, Deferred) == 1) ||
+ (REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, Poison) == 1)));
+}
+
+bool umc_v12_0_is_uncorrectable_error(struct amdgpu_device *adev, uint64_t mc_umc_status)
+{
+ if (umc_v12_0_is_deferred_error(adev, mc_umc_status))
+ return false;
+
+ return ((REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, Val) == 1) &&
+ (REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, PCC) == 1 ||
+ REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, UC) == 1 ||
+ REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, TCC) == 1));
+}
+
+bool umc_v12_0_is_correctable_error(struct amdgpu_device *adev, uint64_t mc_umc_status)
+{
+ if (umc_v12_0_is_deferred_error(adev, mc_umc_status))
+ return false;
+
+ return (REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, Val) == 1 &&
+ (REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, CECC) == 1 ||
+ (REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, UECC) == 1 &&
+ REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, UC) == 0) ||
+ /* Identify data parity error in replay mode */
+ ((REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, ErrorCodeExt) == 0x5 ||
+ REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, ErrorCodeExt) == 0xb) &&
+ !(umc_v12_0_is_uncorrectable_error(adev, mc_umc_status)))));
+}
+
+static void umc_v12_0_query_error_count_per_type(struct amdgpu_device *adev,
+ uint64_t umc_reg_offset,
+ unsigned long *error_count,
+ check_error_type_func error_type_func)
+{
+ uint64_t mc_umc_status;
+ uint64_t mc_umc_status_addr;
+
+ mc_umc_status_addr =
+ SOC15_REG_OFFSET(UMC, 0, regMCA_UMC_UMC0_MCUMC_STATUST0);
+
+ /* Check MCUMC_STATUS */
+ mc_umc_status =
+ RREG64_PCIE_EXT((mc_umc_status_addr + umc_reg_offset) * 4);
+
+ if (error_type_func(adev, mc_umc_status))
+ *error_count += 1;
+}
+
+static int umc_v12_0_query_error_count(struct amdgpu_device *adev,
+ uint32_t node_inst, uint32_t umc_inst,
+ uint32_t ch_inst, void *data)
+{
+ struct ras_err_data *err_data = (struct ras_err_data *)data;
+ unsigned long ue_count = 0, ce_count = 0, de_count = 0;
+
+ /* NOTE: node_inst is converted by adev->umc.active_mask and the range is [0-3],
+ * which can be used as die ID directly */
+ struct amdgpu_smuio_mcm_config_info mcm_info = {
+ .socket_id = adev->smuio.funcs->get_socket_id(adev),
+ .die_id = node_inst,
+ };
+
+ uint64_t umc_reg_offset =
+ get_umc_v12_0_reg_offset(adev, node_inst, umc_inst, ch_inst);
+
+ umc_v12_0_query_error_count_per_type(adev, umc_reg_offset,
+ &ce_count, umc_v12_0_is_correctable_error);
+ umc_v12_0_query_error_count_per_type(adev, umc_reg_offset,
+ &ue_count, umc_v12_0_is_uncorrectable_error);
+ umc_v12_0_query_error_count_per_type(adev, umc_reg_offset,
+ &de_count, umc_v12_0_is_deferred_error);
+
+ amdgpu_ras_error_statistic_ue_count(err_data, &mcm_info, ue_count);
+ amdgpu_ras_error_statistic_ce_count(err_data, &mcm_info, ce_count);
+ amdgpu_ras_error_statistic_de_count(err_data, &mcm_info, de_count);
+
+ return 0;
+}
+
+static void umc_v12_0_query_ras_error_count(struct amdgpu_device *adev,
+ void *ras_error_status)
+{
+ amdgpu_umc_loop_channels(adev,
+ umc_v12_0_query_error_count, ras_error_status);
+
+ umc_v12_0_reset_error_count(adev);
+}
+
+static void umc_v12_0_get_retire_flip_bits(struct amdgpu_device *adev)
+{
+ enum amdgpu_memory_partition nps = AMDGPU_NPS1_PARTITION_MODE;
+ uint32_t vram_type = adev->gmc.vram_type;
+ struct amdgpu_umc_flip_bits *flip_bits = &(adev->umc.flip_bits);
+
+ if (adev->gmc.gmc_funcs->query_mem_partition_mode)
+ nps = adev->gmc.gmc_funcs->query_mem_partition_mode(adev);
+
+ /* default setting */
+ flip_bits->flip_bits_in_pa[0] = UMC_V12_0_PA_C2_BIT;
+ flip_bits->flip_bits_in_pa[1] = UMC_V12_0_PA_C3_BIT;
+ flip_bits->flip_bits_in_pa[2] = UMC_V12_0_PA_C4_BIT;
+ flip_bits->flip_bits_in_pa[3] = UMC_V12_0_PA_R13_BIT;
+ flip_bits->flip_row_bit = 13;
+ flip_bits->bit_num = 4;
+ flip_bits->r13_in_pa = UMC_V12_0_PA_R13_BIT;
+
+ if (nps == AMDGPU_NPS2_PARTITION_MODE) {
+ flip_bits->flip_bits_in_pa[0] = UMC_V12_0_PA_CH5_BIT;
+ flip_bits->flip_bits_in_pa[1] = UMC_V12_0_PA_C2_BIT;
+ flip_bits->flip_bits_in_pa[2] = UMC_V12_0_PA_B1_BIT;
+ flip_bits->r13_in_pa = UMC_V12_0_PA_R12_BIT;
+ } else if (nps == AMDGPU_NPS4_PARTITION_MODE) {
+ flip_bits->flip_bits_in_pa[0] = UMC_V12_0_PA_CH4_BIT;
+ flip_bits->flip_bits_in_pa[1] = UMC_V12_0_PA_CH5_BIT;
+ flip_bits->flip_bits_in_pa[2] = UMC_V12_0_PA_B0_BIT;
+ flip_bits->r13_in_pa = UMC_V12_0_PA_R11_BIT;
+ }
+
+ switch (vram_type) {
+ case AMDGPU_VRAM_TYPE_HBM:
+ /* other nps modes are taken as nps1 */
+ if (nps == AMDGPU_NPS2_PARTITION_MODE)
+ flip_bits->flip_bits_in_pa[3] = UMC_V12_0_PA_R12_BIT;
+ else if (nps == AMDGPU_NPS4_PARTITION_MODE)
+ flip_bits->flip_bits_in_pa[3] = UMC_V12_0_PA_R11_BIT;
+
+ break;
+ case AMDGPU_VRAM_TYPE_HBM3E:
+ flip_bits->flip_bits_in_pa[3] = UMC_V12_0_PA_R12_BIT;
+ flip_bits->flip_row_bit = 12;
+
+ if (nps == AMDGPU_NPS2_PARTITION_MODE)
+ flip_bits->flip_bits_in_pa[3] = UMC_V12_0_PA_R11_BIT;
+ else if (nps == AMDGPU_NPS4_PARTITION_MODE)
+ flip_bits->flip_bits_in_pa[3] = UMC_V12_0_PA_R10_BIT;
+
+ break;
+ default:
+ dev_warn(adev->dev,
+ "Unknown HBM type, set RAS retire flip bits to the value in NPS1 mode.\n");
+ break;
+ }
+
+ adev->umc.retire_unit = 0x1 << flip_bits->bit_num;
+}
+
+static int umc_v12_0_convert_error_address(struct amdgpu_device *adev,
+ struct ras_err_data *err_data,
+ struct ta_ras_query_address_input *addr_in,
+ struct ta_ras_query_address_output *addr_out,
+ bool dump_addr)
+{
+ uint32_t col, col_lower, row, row_lower, row_high, bank;
+ uint32_t channel_index = 0, umc_inst = 0;
+ uint32_t i, bit_num, retire_unit, *flip_bits;
+ uint64_t soc_pa, column, err_addr;
+ struct ta_ras_query_address_output addr_out_tmp;
+ struct ta_ras_query_address_output *paddr_out;
+ int ret = 0;
+
+ if (!addr_out)
+ paddr_out = &addr_out_tmp;
+ else
+ paddr_out = addr_out;
+
+ err_addr = bank = 0;
+ if (addr_in) {
+ err_addr = addr_in->ma.err_addr;
+ addr_in->addr_type = TA_RAS_MCA_TO_PA;
+ ret = psp_ras_query_address(&adev->psp, addr_in, paddr_out);
+ if (ret) {
+ dev_warn(adev->dev, "Failed to query RAS physical address for 0x%llx",
+ err_addr);
+
+ goto out;
+ }
+
+ bank = paddr_out->pa.bank;
+ /* no need to care about umc inst if addr_in is NULL */
+ umc_inst = addr_in->ma.umc_inst;
+ }
+
+ flip_bits = adev->umc.flip_bits.flip_bits_in_pa;
+ bit_num = adev->umc.flip_bits.bit_num;
+ retire_unit = adev->umc.retire_unit;
+
+ soc_pa = paddr_out->pa.pa;
+ channel_index = paddr_out->pa.channel_idx;
+ /* clear loop bits in soc physical address */
+ for (i = 0; i < bit_num; i++)
+ soc_pa &= ~BIT_ULL(flip_bits[i]);
+
+ paddr_out->pa.pa = soc_pa;
+ /* get column bit 0 and 1 in mca address */
+ col_lower = (err_addr >> 1) & 0x3ULL;
+ /* extra row bit will be handled later */
+ row_lower = (err_addr >> UMC_V12_0_MA_R0_BIT) & 0x1fffULL;
+ row_lower &= ~BIT_ULL(adev->umc.flip_bits.flip_row_bit);
+
+ if (amdgpu_ip_version(adev, GC_HWIP, 0) >= IP_VERSION(9, 5, 0)) {
+ row_high = (soc_pa >> adev->umc.flip_bits.r13_in_pa) & 0x3ULL;
+ /* it's 2.25GB in each channel, from MCA address to PA
+ * [R14 R13] is converted if the two bits value are 0x3,
+ * get them from PA instead of MCA address.
+ */
+ row_lower |= (row_high << 13);
+ }
+
+ if (!err_data && !dump_addr)
+ goto out;
+
+ /* loop for all possibilities of retired bits */
+ for (column = 0; column < retire_unit; column++) {
+ soc_pa = paddr_out->pa.pa;
+ for (i = 0; i < bit_num; i++)
+ soc_pa |= (((column >> i) & 0x1ULL) << flip_bits[i]);
+
+ col = ((column & 0x7) << 2) | col_lower;
+ /* handle extra row bit */
+ if (bit_num == RETIRE_FLIP_BITS_NUM)
+ row = ((column >> 3) << adev->umc.flip_bits.flip_row_bit) |
+ row_lower;
+
+ if (dump_addr)
+ dev_info(adev->dev,
+ "Error Address(PA):0x%-10llx Row:0x%-4x Col:0x%-2x Bank:0x%x Channel:0x%x\n",
+ soc_pa, row, col, bank, channel_index);
+
+ if (err_data)
+ amdgpu_umc_fill_error_record(err_data, err_addr,
+ soc_pa, channel_index, umc_inst);
+ }
+
+out:
+ return ret;
+}
+
+static int umc_v12_0_query_error_address(struct amdgpu_device *adev,
+ uint32_t node_inst, uint32_t umc_inst,
+ uint32_t ch_inst, void *data)
+{
+ struct ras_err_data *err_data = (struct ras_err_data *)data;
+ struct ta_ras_query_address_input addr_in;
+ uint64_t mc_umc_status_addr;
+ uint64_t mc_umc_status, err_addr;
+ uint64_t mc_umc_addrt0;
+ uint64_t umc_reg_offset =
+ get_umc_v12_0_reg_offset(adev, node_inst, umc_inst, ch_inst);
+
+ mc_umc_status_addr =
+ SOC15_REG_OFFSET(UMC, 0, regMCA_UMC_UMC0_MCUMC_STATUST0);
+
+ mc_umc_status = RREG64_PCIE_EXT((mc_umc_status_addr + umc_reg_offset) * 4);
+
+ if (mc_umc_status == 0)
+ return 0;
+
+ if (!err_data->err_addr) {
+ /* clear umc status */
+ WREG64_PCIE_EXT((mc_umc_status_addr + umc_reg_offset) * 4, 0x0ULL);
+
+ return 0;
+ }
+
+ /* calculate error address if ue error is detected */
+ if (umc_v12_0_is_uncorrectable_error(adev, mc_umc_status) ||
+ umc_v12_0_is_deferred_error(adev, mc_umc_status)) {
+ mc_umc_addrt0 =
+ SOC15_REG_OFFSET(UMC, 0, regMCA_UMC_UMC0_MCUMC_ADDRT0);
+
+ err_addr = RREG64_PCIE_EXT((mc_umc_addrt0 + umc_reg_offset) * 4);
+
+ err_addr = REG_GET_FIELD(err_addr, MCA_UMC_UMC0_MCUMC_ADDRT0, ErrorAddr);
+
+ if (!adev->aid_mask &&
+ adev->smuio.funcs &&
+ adev->smuio.funcs->get_socket_id)
+ addr_in.ma.socket_id = adev->smuio.funcs->get_socket_id(adev);
+ else
+ addr_in.ma.socket_id = 0;
+
+ addr_in.ma.err_addr = err_addr;
+ addr_in.ma.ch_inst = ch_inst;
+ addr_in.ma.umc_inst = umc_inst;
+ addr_in.ma.node_inst = node_inst;
+
+ umc_v12_0_convert_error_address(adev, err_data, &addr_in, NULL, true);
+ }
+
+ /* clear umc status */
+ WREG64_PCIE_EXT((mc_umc_status_addr + umc_reg_offset) * 4, 0x0ULL);
+
+ return 0;
+}
+
+static void umc_v12_0_query_ras_error_address(struct amdgpu_device *adev,
+ void *ras_error_status)
+{
+ amdgpu_umc_loop_channels(adev,
+ umc_v12_0_query_error_address, ras_error_status);
+}
+
+static int umc_v12_0_err_cnt_init_per_channel(struct amdgpu_device *adev,
+ uint32_t node_inst, uint32_t umc_inst,
+ uint32_t ch_inst, void *data)
+{
+ uint32_t odecc_cnt_sel;
+ uint64_t odecc_cnt_sel_addr, odecc_err_cnt_addr;
+ uint64_t umc_reg_offset =
+ get_umc_v12_0_reg_offset(adev, node_inst, umc_inst, ch_inst);
+
+ odecc_cnt_sel_addr =
+ SOC15_REG_OFFSET(UMC, 0, regUMCCH0_OdEccCntSel);
+ odecc_err_cnt_addr =
+ SOC15_REG_OFFSET(UMC, 0, regUMCCH0_OdEccErrCnt);
+
+ odecc_cnt_sel = RREG32_PCIE_EXT((odecc_cnt_sel_addr + umc_reg_offset) * 4);
+
+ /* set ce error interrupt type to APIC based interrupt */
+ odecc_cnt_sel = REG_SET_FIELD(odecc_cnt_sel, UMCCH0_OdEccCntSel,
+ OdEccErrInt, 0x1);
+ WREG32_PCIE_EXT((odecc_cnt_sel_addr + umc_reg_offset) * 4, odecc_cnt_sel);
+
+ /* set error count to initial value */
+ WREG32_PCIE_EXT((odecc_err_cnt_addr + umc_reg_offset) * 4, UMC_V12_0_CE_CNT_INIT);
+
+ return 0;
+}
+
+static bool umc_v12_0_check_ecc_err_status(struct amdgpu_device *adev,
+ enum amdgpu_mca_error_type type, void *ras_error_status)
+{
+ uint64_t mc_umc_status = *(uint64_t *)ras_error_status;
+
+ switch (type) {
+ case AMDGPU_MCA_ERROR_TYPE_UE:
+ return umc_v12_0_is_uncorrectable_error(adev, mc_umc_status);
+ case AMDGPU_MCA_ERROR_TYPE_CE:
+ return umc_v12_0_is_correctable_error(adev, mc_umc_status);
+ case AMDGPU_MCA_ERROR_TYPE_DE:
+ return umc_v12_0_is_deferred_error(adev, mc_umc_status);
+ default:
+ return false;
+ }
+
+ return false;
+}
+
+static void umc_v12_0_err_cnt_init(struct amdgpu_device *adev)
+{
+ amdgpu_umc_loop_channels(adev,
+ umc_v12_0_err_cnt_init_per_channel, NULL);
+}
+
+static bool umc_v12_0_query_ras_poison_mode(struct amdgpu_device *adev)
+{
+ /*
+ * Force return true, because regUMCCH0_EccCtrl
+ * is not accessible from host side
+ */
+ return true;
+}
+
+const struct amdgpu_ras_block_hw_ops umc_v12_0_ras_hw_ops = {
+ .query_ras_error_count = umc_v12_0_query_ras_error_count,
+ .query_ras_error_address = umc_v12_0_query_ras_error_address,
+};
+
+static int umc_v12_0_aca_bank_parser(struct aca_handle *handle, struct aca_bank *bank,
+ enum aca_smu_type type, void *data)
+{
+ struct amdgpu_device *adev = handle->adev;
+ struct aca_bank_info info;
+ enum aca_error_type err_type;
+ u64 status, count;
+ u32 ext_error_code;
+ int ret;
+
+ status = bank->regs[ACA_REG_IDX_STATUS];
+ if (umc_v12_0_is_deferred_error(adev, status))
+ err_type = ACA_ERROR_TYPE_DEFERRED;
+ else if (umc_v12_0_is_uncorrectable_error(adev, status))
+ err_type = ACA_ERROR_TYPE_UE;
+ else if (umc_v12_0_is_correctable_error(adev, status))
+ err_type = ACA_ERROR_TYPE_CE;
+ else
+ return 0;
+ bank->aca_err_type = err_type;
+
+ ret = aca_bank_info_decode(bank, &info);
+ if (ret)
+ return ret;
+
+ amdgpu_umc_update_ecc_status(adev,
+ bank->regs[ACA_REG_IDX_STATUS],
+ bank->regs[ACA_REG_IDX_IPID],
+ bank->regs[ACA_REG_IDX_ADDR]);
+
+ ext_error_code = ACA_REG__STATUS__ERRORCODEEXT(status);
+ if (umc_v12_0_is_deferred_error(adev, status))
+ count = ext_error_code == 0 ?
+ adev->umc.err_addr_cnt / adev->umc.retire_unit : 1ULL;
+ else
+ count = ext_error_code == 0 ?
+ ACA_REG__MISC0__ERRCNT(bank->regs[ACA_REG_IDX_MISC0]) : 1ULL;
+
+ return aca_error_cache_log_bank_error(handle, &info, err_type, count);
+}
+
+static const struct aca_bank_ops umc_v12_0_aca_bank_ops = {
+ .aca_bank_parser = umc_v12_0_aca_bank_parser,
+};
+
+const struct aca_info umc_v12_0_aca_info = {
+ .hwip = ACA_HWIP_TYPE_UMC,
+ .mask = ACA_ERROR_UE_MASK | ACA_ERROR_CE_MASK | ACA_ERROR_DEFERRED_MASK,
+ .bank_ops = &umc_v12_0_aca_bank_ops,
+};
+
+static int umc_v12_0_ras_late_init(struct amdgpu_device *adev, struct ras_common_if *ras_block)
+{
+ int ret;
+
+ ret = amdgpu_umc_ras_late_init(adev, ras_block);
+ if (ret)
+ return ret;
+
+ ret = amdgpu_ras_bind_aca(adev, AMDGPU_RAS_BLOCK__UMC,
+ &umc_v12_0_aca_info, NULL);
+ if (ret)
+ return ret;
+
+ return 0;
+}
+
+static int umc_v12_0_update_ecc_status(struct amdgpu_device *adev,
+ uint64_t status, uint64_t ipid, uint64_t addr)
+{
+ struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
+ uint16_t hwid, mcatype;
+ uint64_t page_pfn[UMC_V12_0_BAD_PAGE_NUM_PER_CHANNEL];
+ uint64_t err_addr, pa_addr = 0;
+ struct ras_ecc_err *ecc_err;
+ struct ta_ras_query_address_output addr_out;
+ uint32_t shift_bit = adev->umc.flip_bits.flip_bits_in_pa[2];
+ int count, ret, i;
+
+ hwid = REG_GET_FIELD(ipid, MCMP1_IPIDT0, HardwareID);
+ mcatype = REG_GET_FIELD(ipid, MCMP1_IPIDT0, McaType);
+
+ /* The IP block decode of consumption is SMU */
+ if (hwid != MCA_UMC_HWID_V12_0 || mcatype != MCA_UMC_MCATYPE_V12_0) {
+ con->umc_ecc_log.consumption_q_count++;
+ return 0;
+ }
+
+ if (!status)
+ return 0;
+
+ if (!umc_v12_0_is_deferred_error(adev, status))
+ return 0;
+
+ err_addr = REG_GET_FIELD(addr,
+ MCA_UMC_UMC0_MCUMC_ADDRT0, ErrorAddr);
+
+ dev_dbg(adev->dev,
+ "UMC:IPID:0x%llx, socket:%llu, aid:%llu, inst:%llu, ch:%llu, err_addr:0x%llx\n",
+ ipid,
+ MCA_IPID_2_SOCKET_ID(ipid),
+ MCA_IPID_2_DIE_ID(ipid),
+ MCA_IPID_2_UMC_INST(ipid),
+ MCA_IPID_2_UMC_CH(ipid),
+ err_addr);
+
+ ret = amdgpu_umc_mca_to_addr(adev,
+ err_addr, MCA_IPID_2_UMC_CH(ipid),
+ MCA_IPID_2_UMC_INST(ipid), MCA_IPID_2_DIE_ID(ipid),
+ MCA_IPID_2_SOCKET_ID(ipid), &addr_out, true);
+ if (ret)
+ return ret;
+
+ ecc_err = kzalloc(sizeof(*ecc_err), GFP_KERNEL);
+ if (!ecc_err)
+ return -ENOMEM;
+
+ pa_addr = addr_out.pa.pa;
+ ecc_err->status = status;
+ ecc_err->ipid = ipid;
+ ecc_err->addr = addr;
+ ecc_err->pa_pfn = pa_addr >> AMDGPU_GPU_PAGE_SHIFT;
+ ecc_err->channel_idx = addr_out.pa.channel_idx;
+
+ /* If converted pa_pfn is 0, use pa C4 pfn. */
+ if (!ecc_err->pa_pfn)
+ ecc_err->pa_pfn = BIT_ULL(shift_bit) >> AMDGPU_GPU_PAGE_SHIFT;
+
+ ret = amdgpu_umc_logs_ecc_err(adev, &con->umc_ecc_log.de_page_tree, ecc_err);
+ if (ret) {
+ if (ret == -EEXIST)
+ con->umc_ecc_log.de_queried_count++;
+ else
+ dev_err(adev->dev, "Fail to log ecc error! ret:%d\n", ret);
+
+ kfree(ecc_err);
+ return ret;
+ }
+
+ con->umc_ecc_log.de_queried_count++;
+
+ memset(page_pfn, 0, sizeof(page_pfn));
+ count = amdgpu_umc_lookup_bad_pages_in_a_row(adev,
+ pa_addr,
+ page_pfn, ARRAY_SIZE(page_pfn));
+ if (count <= 0) {
+ dev_warn(adev->dev, "Fail to convert error address! count:%d\n", count);
+ return 0;
+ }
+
+ /* Reserve memory */
+ for (i = 0; i < count; i++)
+ amdgpu_ras_reserve_page(adev, page_pfn[i]);
+
+ /* The problem case is as follows:
+ * 1. GPU A triggers a gpu ras reset, and GPU A drives
+ * GPU B to also perform a gpu ras reset.
+ * 2. After gpu B ras reset started, gpu B queried a DE
+ * data. Since the DE data was queried in the ras reset
+ * thread instead of the page retirement thread, bad
+ * page retirement work would not be triggered. Then
+ * even if all gpu resets are completed, the bad pages
+ * will be cached in RAM until GPU B's bad page retirement
+ * work is triggered again and then saved to eeprom.
+ * Trigger delayed work to save the bad pages to eeprom in time
+ * after gpu ras reset is completed.
+ */
+ if (amdgpu_ras_in_recovery(adev))
+ schedule_delayed_work(&con->page_retirement_dwork,
+ msecs_to_jiffies(DELAYED_TIME_FOR_GPU_RESET));
+
+ return 0;
+}
+
+static int umc_v12_0_fill_error_record(struct amdgpu_device *adev,
+ struct ras_ecc_err *ecc_err, void *ras_error_status)
+{
+ struct ras_err_data *err_data = (struct ras_err_data *)ras_error_status;
+ uint64_t page_pfn[UMC_V12_0_BAD_PAGE_NUM_PER_CHANNEL];
+ int ret, i, count;
+
+ if (!err_data || !ecc_err)
+ return -EINVAL;
+
+ memset(page_pfn, 0, sizeof(page_pfn));
+ count = amdgpu_umc_lookup_bad_pages_in_a_row(adev,
+ ecc_err->pa_pfn << AMDGPU_GPU_PAGE_SHIFT,
+ page_pfn, ARRAY_SIZE(page_pfn));
+
+ for (i = 0; i < count; i++) {
+ ret = amdgpu_umc_fill_error_record(err_data,
+ ecc_err->addr,
+ page_pfn[i] << AMDGPU_GPU_PAGE_SHIFT,
+ ecc_err->channel_idx,
+ MCA_IPID_2_UMC_INST(ecc_err->ipid));
+ if (ret)
+ break;
+ }
+
+ err_data->de_count++;
+
+ return ret;
+}
+
+static void umc_v12_0_query_ras_ecc_err_addr(struct amdgpu_device *adev,
+ void *ras_error_status)
+{
+ struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
+ struct ras_ecc_err *entries[MAX_ECC_NUM_PER_RETIREMENT];
+ struct radix_tree_root *ecc_tree;
+ int new_detected, ret, i;
+
+ ecc_tree = &con->umc_ecc_log.de_page_tree;
+
+ mutex_lock(&con->umc_ecc_log.lock);
+ new_detected = radix_tree_gang_lookup_tag(ecc_tree, (void **)entries,
+ 0, ARRAY_SIZE(entries), UMC_ECC_NEW_DETECTED_TAG);
+ for (i = 0; i < new_detected; i++) {
+ if (!entries[i])
+ continue;
+
+ ret = umc_v12_0_fill_error_record(adev, entries[i], ras_error_status);
+ if (ret) {
+ dev_err(adev->dev, "Fail to fill umc error record, ret:%d\n", ret);
+ break;
+ }
+ radix_tree_tag_clear(ecc_tree,
+ entries[i]->pa_pfn, UMC_ECC_NEW_DETECTED_TAG);
+ }
+ mutex_unlock(&con->umc_ecc_log.lock);
+}
+
+static uint32_t umc_v12_0_get_die_id(struct amdgpu_device *adev,
+ uint64_t mca_addr, uint64_t retired_page)
+{
+ uint32_t die = 0;
+
+ /* we only calculate die id for nps1 mode right now */
+ die += ((((retired_page >> 12) & 0x1ULL)^
+ ((retired_page >> 20) & 0x1ULL) ^
+ ((retired_page >> 27) & 0x1ULL) ^
+ ((retired_page >> 34) & 0x1ULL) ^
+ ((retired_page >> 41) & 0x1ULL)) << 0);
+
+ /* the original PA_C4 and PA_R13 may be cleared in retired_page, so
+ * get them from mca_addr.
+ */
+ die += ((((retired_page >> 13) & 0x1ULL) ^
+ ((mca_addr >> 5) & 0x1ULL) ^
+ ((retired_page >> 28) & 0x1ULL) ^
+ ((mca_addr >> 23) & 0x1ULL) ^
+ ((retired_page >> 42) & 0x1ULL)) << 1);
+ die &= 3;
+
+ return die;
+}
+
+static void umc_v12_0_mca_ipid_parse(struct amdgpu_device *adev, uint64_t ipid,
+ uint32_t *did, uint32_t *ch, uint32_t *umc_inst, uint32_t *sid)
+{
+ if (did)
+ *did = MCA_IPID_2_DIE_ID(ipid);
+ if (ch)
+ *ch = MCA_IPID_2_UMC_CH(ipid);
+ if (umc_inst)
+ *umc_inst = MCA_IPID_2_UMC_INST(ipid);
+ if (sid)
+ *sid = MCA_IPID_2_SOCKET_ID(ipid);
+}
+
+struct amdgpu_umc_ras umc_v12_0_ras = {
+ .ras_block = {
+ .hw_ops = &umc_v12_0_ras_hw_ops,
+ .ras_late_init = umc_v12_0_ras_late_init,
+ },
+ .err_cnt_init = umc_v12_0_err_cnt_init,
+ .query_ras_poison_mode = umc_v12_0_query_ras_poison_mode,
+ .ecc_info_query_ras_error_address = umc_v12_0_query_ras_ecc_err_addr,
+ .check_ecc_err_status = umc_v12_0_check_ecc_err_status,
+ .update_ecc_status = umc_v12_0_update_ecc_status,
+ .convert_ras_err_addr = umc_v12_0_convert_error_address,
+ .get_die_id_from_pa = umc_v12_0_get_die_id,
+ .get_retire_flip_bits = umc_v12_0_get_retire_flip_bits,
+ .mca_ipid_parse = umc_v12_0_mca_ipid_parse,
+};
+
diff --git a/drivers/gpu/drm/amd/amdgpu/umc_v12_0.h b/drivers/gpu/drm/amd/amdgpu/umc_v12_0.h
new file mode 100644
index 000000000000..63b7e7254526
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/umc_v12_0.h
@@ -0,0 +1,105 @@
+/*
+ * Copyright 2023 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+#ifndef __UMC_V12_0_H__
+#define __UMC_V12_0_H__
+
+#include "soc15_common.h"
+#include "amdgpu.h"
+
+#define UMC_V12_0_NODE_DIST 0x40000000
+#define UMC_V12_0_INST_DIST 0x40000
+
+/* UMC register per channel offset */
+#define UMC_V12_0_PER_CHANNEL_OFFSET 0x400
+
+/* UMC cross node offset */
+#define UMC_V12_0_CROSS_NODE_OFFSET 0x100000000
+
+/* OdEccErrCnt max value */
+#define UMC_V12_0_CE_CNT_MAX 0xffff
+/* umc ce interrupt threshold */
+#define UMC_V12_0_CE_INT_THRESHOLD 0xffff
+/* umc ce count initial value */
+#define UMC_V12_0_CE_CNT_INIT (UMC_V12_0_CE_CNT_MAX - UMC_V12_0_CE_INT_THRESHOLD)
+
+/* number of umc channel instance with memory map register access */
+#define UMC_V12_0_CHANNEL_INSTANCE_NUM 8
+/* number of umc instance with memory map register access */
+#define UMC_V12_0_UMC_INSTANCE_NUM 4
+
+/* Total channel instances for all available umc nodes */
+#define UMC_V12_0_TOTAL_CHANNEL_NUM(adev) \
+ (UMC_V12_0_CHANNEL_INSTANCE_NUM * (adev)->gmc.num_umc)
+
+/* one piece of normalized address is mapped to 8 pieces of physical address */
+#define UMC_V12_0_NA_MAP_PA_NUM 8
+/* R13 bit shift should be considered, double the number */
+#define UMC_V12_0_BAD_PAGE_NUM_PER_CHANNEL (UMC_V12_0_NA_MAP_PA_NUM * 2)
+
+/* column bits in SOC physical address */
+#define UMC_V12_0_PA_C2_BIT 15
+#define UMC_V12_0_PA_C3_BIT 16
+#define UMC_V12_0_PA_C4_BIT 21
+/* row bits in SOC physical address */
+#define UMC_V12_0_PA_R0_BIT 22
+#define UMC_V12_0_PA_R10_BIT 32
+#define UMC_V12_0_PA_R11_BIT 33
+#define UMC_V12_0_PA_R12_BIT 34
+#define UMC_V12_0_PA_R13_BIT 35
+/* channel bit in SOC physical address */
+#define UMC_V12_0_PA_CH4_BIT 12
+#define UMC_V12_0_PA_CH5_BIT 13
+/* bank bit in SOC physical address */
+#define UMC_V12_0_PA_B0_BIT 19
+#define UMC_V12_0_PA_B1_BIT 20
+/* row bits in MCA address */
+#define UMC_V12_0_MA_R0_BIT 10
+
+#define MCA_UMC_HWID_V12_0 0x96
+#define MCA_UMC_MCATYPE_V12_0 0x0
+
+#define MCA_IPID_LO_2_UMC_CH(_ipid_lo) (((((_ipid_lo) >> 20) & 0x1) * 4) + \
+ (((_ipid_lo) >> 12) & 0xF))
+#define MCA_IPID_LO_2_UMC_INST(_ipid_lo) (((_ipid_lo) >> 21) & 0x7)
+
+#define MCA_IPID_2_DIE_ID(ipid) ((REG_GET_FIELD(ipid, MCMP1_IPIDT0, InstanceIdHi) >> 2) & 0x03)
+
+#define MCA_IPID_2_UMC_CH(ipid) \
+ (MCA_IPID_LO_2_UMC_CH(REG_GET_FIELD(ipid, MCMP1_IPIDT0, InstanceIdLo)))
+
+#define MCA_IPID_2_UMC_INST(ipid) \
+ (MCA_IPID_LO_2_UMC_INST(REG_GET_FIELD(ipid, MCMP1_IPIDT0, InstanceIdLo)))
+
+#define MCA_IPID_2_SOCKET_ID(ipid) \
+ (((REG_GET_FIELD(ipid, MCMP1_IPIDT0, InstanceIdLo) & 0x1) << 2) | \
+ (REG_GET_FIELD(ipid, MCMP1_IPIDT0, InstanceIdHi) & 0x03))
+
+bool umc_v12_0_is_deferred_error(struct amdgpu_device *adev, uint64_t mc_umc_status);
+bool umc_v12_0_is_uncorrectable_error(struct amdgpu_device *adev, uint64_t mc_umc_status);
+bool umc_v12_0_is_correctable_error(struct amdgpu_device *adev, uint64_t mc_umc_status);
+
+typedef bool (*check_error_type_func)(struct amdgpu_device *adev, uint64_t mc_umc_status);
+
+extern struct amdgpu_umc_ras umc_v12_0_ras;
+
+#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/umc_v6_0.c b/drivers/gpu/drm/amd/amdgpu/umc_v6_0.c
index 0d6b50528d76..97fa88ed770c 100644
--- a/drivers/gpu/drm/amd/amdgpu/umc_v6_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/umc_v6_0.c
@@ -25,7 +25,7 @@
static void umc_v6_0_init_registers(struct amdgpu_device *adev)
{
- unsigned i,j;
+ unsigned i, j;
for (i = 0; i < 4; i++)
for (j = 0; j < 4; j++)
diff --git a/drivers/gpu/drm/amd/amdgpu/umc_v6_1.c b/drivers/gpu/drm/amd/amdgpu/umc_v6_1.c
index 96d7769609f4..f17d297b594b 100644
--- a/drivers/gpu/drm/amd/amdgpu/umc_v6_1.c
+++ b/drivers/gpu/drm/amd/amdgpu/umc_v6_1.c
@@ -22,6 +22,7 @@
*/
#include "umc_v6_1.h"
#include "amdgpu_ras.h"
+#include "amdgpu_umc.h"
#include "amdgpu.h"
#include "rsmu/rsmu_0_0_2_offset.h"
@@ -299,7 +300,6 @@ static void umc_v6_1_query_error_address(struct amdgpu_device *adev,
{
uint32_t lsb, mc_umc_status_addr;
uint64_t mc_umc_status, err_addr, retired_page, mc_umc_addrt0;
- struct eeprom_table_record *err_rec;
uint32_t channel_index = adev->umc.channel_idx_tbl[umc_inst * adev->umc.channel_inst_num + ch_inst];
if (adev->asic_type == CHIP_ARCTURUS) {
@@ -327,12 +327,9 @@ static void umc_v6_1_query_error_address(struct amdgpu_device *adev,
return;
}
- err_rec = &err_data->err_addr[err_data->err_addr_cnt];
-
- /* calculate error address if ue/ce error is detected */
+ /* calculate error address if ue error is detected */
if (REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, Val) == 1 &&
- (REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, UECC) == 1 ||
- REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, CECC) == 1)) {
+ REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, UECC) == 1) {
err_addr = RREG64_PCIE((mc_umc_addrt0 + umc_reg_offset) * 4);
/* the lowest lsb bits should be ignored */
@@ -345,20 +342,8 @@ static void umc_v6_1_query_error_address(struct amdgpu_device *adev,
ADDR_OF_256B_BLOCK(channel_index) |
OFFSET_IN_256B_BLOCK(err_addr);
- /* we only save ue error information currently, ce is skipped */
- if (REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, UECC)
- == 1) {
- err_rec->address = err_addr;
- /* page frame address is saved */
- err_rec->retired_page = retired_page >> AMDGPU_GPU_PAGE_SHIFT;
- err_rec->ts = (uint64_t)ktime_get_real_seconds();
- err_rec->err_type = AMDGPU_RAS_EEPROM_ERR_NON_RECOVERABLE;
- err_rec->cu = 0;
- err_rec->mem_channel = channel_index;
- err_rec->mcumc_id = umc_inst;
-
- err_data->err_addr_cnt++;
- }
+ amdgpu_umc_fill_error_record(err_data, err_addr,
+ retired_page, channel_index, umc_inst);
}
/* clear umc status */
@@ -464,9 +449,14 @@ static void umc_v6_1_err_cnt_init(struct amdgpu_device *adev)
umc_v6_1_enable_umc_index_mode(adev);
}
-const struct amdgpu_umc_funcs umc_v6_1_funcs = {
- .err_cnt_init = umc_v6_1_err_cnt_init,
- .ras_late_init = amdgpu_umc_ras_late_init,
+const struct amdgpu_ras_block_hw_ops umc_v6_1_ras_hw_ops = {
.query_ras_error_count = umc_v6_1_query_ras_error_count,
.query_ras_error_address = umc_v6_1_query_ras_error_address,
};
+
+struct amdgpu_umc_ras umc_v6_1_ras = {
+ .ras_block = {
+ .hw_ops = &umc_v6_1_ras_hw_ops,
+ },
+ .err_cnt_init = umc_v6_1_err_cnt_init,
+}; \ No newline at end of file
diff --git a/drivers/gpu/drm/amd/amdgpu/umc_v6_1.h b/drivers/gpu/drm/amd/amdgpu/umc_v6_1.h
index 0ce1d323cfdd..50c632eb4cc6 100644
--- a/drivers/gpu/drm/amd/amdgpu/umc_v6_1.h
+++ b/drivers/gpu/drm/amd/amdgpu/umc_v6_1.h
@@ -45,7 +45,7 @@
/* umc ce count initial value */
#define UMC_V6_1_CE_CNT_INIT (UMC_V6_1_CE_CNT_MAX - UMC_V6_1_CE_INT_THRESHOLD)
-extern const struct amdgpu_umc_funcs umc_v6_1_funcs;
+extern struct amdgpu_umc_ras umc_v6_1_ras;
extern const uint32_t
umc_v6_1_channel_idx_tbl[UMC_V6_1_UMC_INSTANCE_NUM][UMC_V6_1_CHANNEL_INSTANCE_NUM];
diff --git a/drivers/gpu/drm/amd/amdgpu/umc_v6_7.c b/drivers/gpu/drm/amd/amdgpu/umc_v6_7.c
new file mode 100644
index 000000000000..a3ee3c4c650f
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/umc_v6_7.c
@@ -0,0 +1,528 @@
+/*
+ * Copyright 2021 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+#include "umc_v6_7.h"
+#include "amdgpu_ras.h"
+#include "amdgpu_umc.h"
+#include "amdgpu.h"
+
+#include "umc/umc_6_7_0_offset.h"
+#include "umc/umc_6_7_0_sh_mask.h"
+
+const uint32_t
+ umc_v6_7_channel_idx_tbl_second[UMC_V6_7_UMC_INSTANCE_NUM][UMC_V6_7_CHANNEL_INSTANCE_NUM] = {
+ {28, 20, 24, 16, 12, 4, 8, 0},
+ {6, 30, 2, 26, 22, 14, 18, 10},
+ {19, 11, 15, 7, 3, 27, 31, 23},
+ {9, 1, 5, 29, 25, 17, 21, 13}
+};
+const uint32_t
+ umc_v6_7_channel_idx_tbl_first[UMC_V6_7_UMC_INSTANCE_NUM][UMC_V6_7_CHANNEL_INSTANCE_NUM] = {
+ {19, 11, 15, 7, 3, 27, 31, 23},
+ {9, 1, 5, 29, 25, 17, 21, 13},
+ {28, 20, 24, 16, 12, 4, 8, 0},
+ {6, 30, 2, 26, 22, 14, 18, 10},
+};
+
+static inline uint32_t get_umc_v6_7_reg_offset(struct amdgpu_device *adev,
+ uint32_t umc_inst,
+ uint32_t ch_inst)
+{
+ uint32_t index = umc_inst * adev->umc.channel_inst_num + ch_inst;
+
+ /* adjust umc and channel index offset,
+ * the register address is not linear on each umc instace */
+ umc_inst = index / 4;
+ ch_inst = index % 4;
+
+ return adev->umc.channel_offs * ch_inst + UMC_V6_7_INST_DIST * umc_inst;
+}
+
+static void umc_v6_7_query_error_status_helper(struct amdgpu_device *adev,
+ uint64_t mc_umc_status, uint32_t umc_reg_offset)
+{
+ uint32_t mc_umc_addr;
+ uint64_t reg_value;
+
+ if (REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, Deferred) == 1)
+ dev_info(adev->dev, "Deferred error\n");
+
+ if (mc_umc_status)
+ dev_info(adev->dev, "MCA STATUS 0x%llx, umc_reg_offset 0x%x\n", mc_umc_status, umc_reg_offset);
+
+ /* print IPID registers value */
+ mc_umc_addr =
+ SOC15_REG_OFFSET(UMC, 0, regMCA_UMC_UMC0_MCUMC_IPIDT0);
+ reg_value = RREG64_PCIE((mc_umc_addr + umc_reg_offset) * 4);
+ if (reg_value)
+ dev_info(adev->dev, "MCA IPID 0x%llx, umc_reg_offset 0x%x\n", reg_value, umc_reg_offset);
+
+ /* print SYND registers value */
+ mc_umc_addr =
+ SOC15_REG_OFFSET(UMC, 0, regMCA_UMC_UMC0_MCUMC_SYNDT0);
+ reg_value = RREG64_PCIE((mc_umc_addr + umc_reg_offset) * 4);
+ if (reg_value)
+ dev_info(adev->dev, "MCA SYND 0x%llx, umc_reg_offset 0x%x\n", reg_value, umc_reg_offset);
+
+ /* print MISC0 registers value */
+ mc_umc_addr =
+ SOC15_REG_OFFSET(UMC, 0, regMCA_UMC_UMC0_MCUMC_MISC0T0);
+ reg_value = RREG64_PCIE((mc_umc_addr + umc_reg_offset) * 4);
+ if (reg_value)
+ dev_info(adev->dev, "MCA MISC0 0x%llx, umc_reg_offset 0x%x\n", reg_value, umc_reg_offset);
+}
+
+static void umc_v6_7_ecc_info_query_correctable_error_count(struct amdgpu_device *adev,
+ uint32_t umc_inst, uint32_t ch_inst,
+ unsigned long *error_count)
+{
+ uint64_t mc_umc_status;
+ uint32_t eccinfo_table_idx;
+ uint32_t umc_reg_offset;
+ struct amdgpu_ras *ras = amdgpu_ras_get_context(adev);
+
+ umc_reg_offset = get_umc_v6_7_reg_offset(adev,
+ umc_inst, ch_inst);
+
+ eccinfo_table_idx = umc_inst * adev->umc.channel_inst_num + ch_inst;
+ /* check for SRAM correctable error
+ MCUMC_STATUS is a 64 bit register */
+ mc_umc_status = ras->umc_ecc.ecc[eccinfo_table_idx].mca_umc_status;
+ if (REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, Val) == 1 &&
+ REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, CECC) == 1) {
+ *error_count += 1;
+
+ umc_v6_7_query_error_status_helper(adev, mc_umc_status, umc_reg_offset);
+
+ if (ras->umc_ecc.record_ce_addr_supported) {
+ uint64_t err_addr, soc_pa;
+ uint32_t channel_index =
+ adev->umc.channel_idx_tbl[umc_inst * adev->umc.channel_inst_num + ch_inst];
+
+ err_addr = ras->umc_ecc.ecc[eccinfo_table_idx].mca_ceumc_addr;
+ err_addr = REG_GET_FIELD(err_addr, MCA_UMC_UMC0_MCUMC_ADDRT0, ErrorAddr);
+ /* translate umc channel address to soc pa, 3 parts are included */
+ soc_pa = ADDR_OF_8KB_BLOCK(err_addr) |
+ ADDR_OF_256B_BLOCK(channel_index) |
+ OFFSET_IN_256B_BLOCK(err_addr);
+
+ /* The umc channel bits are not original values, they are hashed */
+ SET_CHANNEL_HASH(channel_index, soc_pa);
+
+ dev_info(adev->dev, "Error Address(PA): 0x%llx\n", soc_pa);
+ }
+ }
+}
+
+static void umc_v6_7_ecc_info_querry_uncorrectable_error_count(struct amdgpu_device *adev,
+ uint32_t umc_inst, uint32_t ch_inst,
+ unsigned long *error_count)
+{
+ uint64_t mc_umc_status;
+ uint32_t eccinfo_table_idx;
+ uint32_t umc_reg_offset;
+ struct amdgpu_ras *ras = amdgpu_ras_get_context(adev);
+
+ umc_reg_offset = get_umc_v6_7_reg_offset(adev,
+ umc_inst, ch_inst);
+
+ eccinfo_table_idx = umc_inst * adev->umc.channel_inst_num + ch_inst;
+ /* check the MCUMC_STATUS */
+ mc_umc_status = ras->umc_ecc.ecc[eccinfo_table_idx].mca_umc_status;
+ if ((REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, Val) == 1) &&
+ (REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, Deferred) == 1 ||
+ REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, UECC) == 1 ||
+ REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, PCC) == 1 ||
+ REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, UC) == 1 ||
+ REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, TCC) == 1)) {
+ *error_count += 1;
+
+ umc_v6_7_query_error_status_helper(adev, mc_umc_status, umc_reg_offset);
+ }
+}
+
+static int umc_v6_7_ecc_info_querry_ecc_error_count(struct amdgpu_device *adev,
+ uint32_t node_inst, uint32_t umc_inst,
+ uint32_t ch_inst, void *data)
+{
+ struct ras_err_data *err_data = (struct ras_err_data *)data;
+
+ umc_v6_7_ecc_info_query_correctable_error_count(adev,
+ umc_inst, ch_inst,
+ &(err_data->ce_count));
+
+ umc_v6_7_ecc_info_querry_uncorrectable_error_count(adev,
+ umc_inst, ch_inst,
+ &(err_data->ue_count));
+
+ return 0;
+}
+
+static void umc_v6_7_ecc_info_query_ras_error_count(struct amdgpu_device *adev,
+ void *ras_error_status)
+{
+ amdgpu_umc_loop_channels(adev,
+ umc_v6_7_ecc_info_querry_ecc_error_count, ras_error_status);
+}
+
+void umc_v6_7_convert_error_address(struct amdgpu_device *adev,
+ struct ras_err_data *err_data, uint64_t err_addr,
+ uint32_t ch_inst, uint32_t umc_inst)
+{
+ uint32_t channel_index;
+ uint64_t soc_pa, retired_page, column;
+
+ channel_index =
+ adev->umc.channel_idx_tbl[umc_inst * adev->umc.channel_inst_num + ch_inst];
+ /* translate umc channel address to soc pa, 3 parts are included */
+ soc_pa = ADDR_OF_8KB_BLOCK(err_addr) |
+ ADDR_OF_256B_BLOCK(channel_index) |
+ OFFSET_IN_256B_BLOCK(err_addr);
+
+ /* The umc channel bits are not original values, they are hashed */
+ SET_CHANNEL_HASH(channel_index, soc_pa);
+
+ /* clear [C4 C3 C2] in soc physical address */
+ soc_pa &= ~(0x7ULL << UMC_V6_7_PA_C2_BIT);
+
+ /* loop for all possibilities of [C4 C3 C2] */
+ for (column = 0; column < UMC_V6_7_NA_MAP_PA_NUM; column++) {
+ retired_page = soc_pa | (column << UMC_V6_7_PA_C2_BIT);
+ dev_info(adev->dev, "Error Address(PA): 0x%llx\n", retired_page);
+ amdgpu_umc_fill_error_record(err_data, err_addr,
+ retired_page, channel_index, umc_inst);
+
+ /* shift R14 bit */
+ retired_page ^= (0x1ULL << UMC_V6_7_PA_R14_BIT);
+ dev_info(adev->dev, "Error Address(PA): 0x%llx\n", retired_page);
+ amdgpu_umc_fill_error_record(err_data, err_addr,
+ retired_page, channel_index, umc_inst);
+ }
+}
+
+static int umc_v6_7_ecc_info_query_error_address(struct amdgpu_device *adev,
+ uint32_t node_inst, uint32_t umc_inst,
+ uint32_t ch_inst, void *data)
+{
+ uint64_t mc_umc_status, err_addr;
+ uint32_t eccinfo_table_idx;
+ struct amdgpu_ras *ras = amdgpu_ras_get_context(adev);
+ struct ras_err_data *err_data = (struct ras_err_data *)data;
+
+ eccinfo_table_idx = umc_inst * adev->umc.channel_inst_num + ch_inst;
+ mc_umc_status = ras->umc_ecc.ecc[eccinfo_table_idx].mca_umc_status;
+
+ if (mc_umc_status == 0)
+ return 0;
+
+ if (!err_data->err_addr)
+ return 0;
+
+ /* calculate error address if ue error is detected */
+ if (REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, Val) == 1 &&
+ REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, UECC) == 1) {
+
+ err_addr = ras->umc_ecc.ecc[eccinfo_table_idx].mca_umc_addr;
+ err_addr = REG_GET_FIELD(err_addr, MCA_UMC_UMC0_MCUMC_ADDRT0, ErrorAddr);
+
+ umc_v6_7_convert_error_address(adev, err_data, err_addr,
+ ch_inst, umc_inst);
+ }
+
+ return 0;
+}
+
+static void umc_v6_7_ecc_info_query_ras_error_address(struct amdgpu_device *adev,
+ void *ras_error_status)
+{
+ amdgpu_umc_loop_channels(adev,
+ umc_v6_7_ecc_info_query_error_address, ras_error_status);
+}
+
+static void umc_v6_7_query_correctable_error_count(struct amdgpu_device *adev,
+ uint32_t umc_reg_offset,
+ unsigned long *error_count,
+ uint32_t ch_inst,
+ uint32_t umc_inst)
+{
+ uint32_t ecc_err_cnt_sel, ecc_err_cnt_sel_addr;
+ uint32_t ecc_err_cnt, ecc_err_cnt_addr;
+ uint64_t mc_umc_status;
+ uint32_t mc_umc_status_addr;
+
+ /* UMC 6_1_1 registers */
+ ecc_err_cnt_sel_addr =
+ SOC15_REG_OFFSET(UMC, 0, regUMCCH0_0_EccErrCntSel);
+ ecc_err_cnt_addr =
+ SOC15_REG_OFFSET(UMC, 0, regUMCCH0_0_EccErrCnt);
+ mc_umc_status_addr =
+ SOC15_REG_OFFSET(UMC, 0, regMCA_UMC_UMC0_MCUMC_STATUST0);
+
+ /* select the lower chip and check the error count */
+ ecc_err_cnt_sel = RREG32_PCIE((ecc_err_cnt_sel_addr + umc_reg_offset) * 4);
+ ecc_err_cnt_sel = REG_SET_FIELD(ecc_err_cnt_sel, UMCCH0_0_EccErrCntSel,
+ EccErrCntCsSel, 0);
+ WREG32_PCIE((ecc_err_cnt_sel_addr + umc_reg_offset) * 4, ecc_err_cnt_sel);
+
+ ecc_err_cnt = RREG32_PCIE((ecc_err_cnt_addr + umc_reg_offset) * 4);
+ *error_count +=
+ (REG_GET_FIELD(ecc_err_cnt, UMCCH0_0_EccErrCnt, EccErrCnt) -
+ UMC_V6_7_CE_CNT_INIT);
+
+ /* select the higher chip and check the err counter */
+ ecc_err_cnt_sel = REG_SET_FIELD(ecc_err_cnt_sel, UMCCH0_0_EccErrCntSel,
+ EccErrCntCsSel, 1);
+ WREG32_PCIE((ecc_err_cnt_sel_addr + umc_reg_offset) * 4, ecc_err_cnt_sel);
+
+ ecc_err_cnt = RREG32_PCIE((ecc_err_cnt_addr + umc_reg_offset) * 4);
+ *error_count +=
+ (REG_GET_FIELD(ecc_err_cnt, UMCCH0_0_EccErrCnt, EccErrCnt) -
+ UMC_V6_7_CE_CNT_INIT);
+
+ /* check for SRAM correctable error
+ MCUMC_STATUS is a 64 bit register */
+ mc_umc_status = RREG64_PCIE((mc_umc_status_addr + umc_reg_offset) * 4);
+ if (REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, Val) == 1 &&
+ REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, CECC) == 1) {
+ *error_count += 1;
+
+ umc_v6_7_query_error_status_helper(adev, mc_umc_status, umc_reg_offset);
+
+ {
+ uint64_t err_addr, soc_pa;
+ uint32_t mc_umc_addrt0;
+ uint32_t channel_index;
+
+ mc_umc_addrt0 =
+ SOC15_REG_OFFSET(UMC, 0, regMCA_UMC_UMC0_MCUMC_ADDRT0);
+
+ channel_index =
+ adev->umc.channel_idx_tbl[umc_inst * adev->umc.channel_inst_num + ch_inst];
+
+ err_addr = RREG64_PCIE((mc_umc_addrt0 + umc_reg_offset) * 4);
+ err_addr = REG_GET_FIELD(err_addr, MCA_UMC_UMC0_MCUMC_ADDRT0, ErrorAddr);
+
+ /* translate umc channel address to soc pa, 3 parts are included */
+ soc_pa = ADDR_OF_8KB_BLOCK(err_addr) |
+ ADDR_OF_256B_BLOCK(channel_index) |
+ OFFSET_IN_256B_BLOCK(err_addr);
+
+ /* The umc channel bits are not original values, they are hashed */
+ SET_CHANNEL_HASH(channel_index, soc_pa);
+
+ dev_info(adev->dev, "Error Address(PA): 0x%llx\n", soc_pa);
+ }
+ }
+}
+
+static void umc_v6_7_querry_uncorrectable_error_count(struct amdgpu_device *adev,
+ uint32_t umc_reg_offset,
+ unsigned long *error_count)
+{
+ uint64_t mc_umc_status;
+ uint32_t mc_umc_status_addr;
+
+ mc_umc_status_addr =
+ SOC15_REG_OFFSET(UMC, 0, regMCA_UMC_UMC0_MCUMC_STATUST0);
+
+ /* check the MCUMC_STATUS */
+ mc_umc_status = RREG64_PCIE((mc_umc_status_addr + umc_reg_offset) * 4);
+ if ((REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, Val) == 1) &&
+ (REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, Deferred) == 1 ||
+ REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, UECC) == 1 ||
+ REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, PCC) == 1 ||
+ REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, UC) == 1 ||
+ REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, TCC) == 1)) {
+ *error_count += 1;
+
+ umc_v6_7_query_error_status_helper(adev, mc_umc_status, umc_reg_offset);
+ }
+}
+
+static int umc_v6_7_reset_error_count_per_channel(struct amdgpu_device *adev,
+ uint32_t node_inst, uint32_t umc_inst,
+ uint32_t ch_inst, void *data)
+{
+ uint32_t ecc_err_cnt_addr;
+ uint32_t ecc_err_cnt_sel, ecc_err_cnt_sel_addr;
+ uint32_t umc_reg_offset =
+ get_umc_v6_7_reg_offset(adev, umc_inst, ch_inst);
+
+ ecc_err_cnt_sel_addr =
+ SOC15_REG_OFFSET(UMC, 0,
+ regUMCCH0_0_EccErrCntSel);
+ ecc_err_cnt_addr =
+ SOC15_REG_OFFSET(UMC, 0,
+ regUMCCH0_0_EccErrCnt);
+
+ /* select the lower chip */
+ ecc_err_cnt_sel = RREG32_PCIE((ecc_err_cnt_sel_addr +
+ umc_reg_offset) * 4);
+ ecc_err_cnt_sel = REG_SET_FIELD(ecc_err_cnt_sel,
+ UMCCH0_0_EccErrCntSel,
+ EccErrCntCsSel, 0);
+ WREG32_PCIE((ecc_err_cnt_sel_addr + umc_reg_offset) * 4,
+ ecc_err_cnt_sel);
+
+ /* clear lower chip error count */
+ WREG32_PCIE((ecc_err_cnt_addr + umc_reg_offset) * 4,
+ UMC_V6_7_CE_CNT_INIT);
+
+ /* select the higher chip */
+ ecc_err_cnt_sel = RREG32_PCIE((ecc_err_cnt_sel_addr +
+ umc_reg_offset) * 4);
+ ecc_err_cnt_sel = REG_SET_FIELD(ecc_err_cnt_sel,
+ UMCCH0_0_EccErrCntSel,
+ EccErrCntCsSel, 1);
+ WREG32_PCIE((ecc_err_cnt_sel_addr + umc_reg_offset) * 4,
+ ecc_err_cnt_sel);
+
+ /* clear higher chip error count */
+ WREG32_PCIE((ecc_err_cnt_addr + umc_reg_offset) * 4,
+ UMC_V6_7_CE_CNT_INIT);
+
+ return 0;
+}
+
+static void umc_v6_7_reset_error_count(struct amdgpu_device *adev)
+{
+ amdgpu_umc_loop_channels(adev,
+ umc_v6_7_reset_error_count_per_channel, NULL);
+}
+
+static int umc_v6_7_query_ecc_error_count(struct amdgpu_device *adev,
+ uint32_t node_inst, uint32_t umc_inst,
+ uint32_t ch_inst, void *data)
+{
+ struct ras_err_data *err_data = (struct ras_err_data *)data;
+ uint32_t umc_reg_offset =
+ get_umc_v6_7_reg_offset(adev, umc_inst, ch_inst);
+
+ umc_v6_7_query_correctable_error_count(adev,
+ umc_reg_offset,
+ &(err_data->ce_count),
+ ch_inst, umc_inst);
+
+ umc_v6_7_querry_uncorrectable_error_count(adev,
+ umc_reg_offset,
+ &(err_data->ue_count));
+
+ return 0;
+}
+
+static void umc_v6_7_query_ras_error_count(struct amdgpu_device *adev,
+ void *ras_error_status)
+{
+ amdgpu_umc_loop_channels(adev,
+ umc_v6_7_query_ecc_error_count, ras_error_status);
+
+ umc_v6_7_reset_error_count(adev);
+}
+
+static int umc_v6_7_query_error_address(struct amdgpu_device *adev,
+ uint32_t node_inst, uint32_t umc_inst,
+ uint32_t ch_inst, void *data)
+{
+ uint32_t mc_umc_status_addr;
+ uint64_t mc_umc_status = 0, mc_umc_addrt0, err_addr;
+ struct ras_err_data *err_data = (struct ras_err_data *)data;
+ uint32_t umc_reg_offset =
+ get_umc_v6_7_reg_offset(adev, umc_inst, ch_inst);
+
+ mc_umc_status_addr =
+ SOC15_REG_OFFSET(UMC, 0, regMCA_UMC_UMC0_MCUMC_STATUST0);
+ mc_umc_addrt0 =
+ SOC15_REG_OFFSET(UMC, 0, regMCA_UMC_UMC0_MCUMC_ADDRT0);
+
+ mc_umc_status = RREG64_PCIE((mc_umc_status_addr + umc_reg_offset) * 4);
+
+ if (mc_umc_status == 0)
+ return 0;
+
+ if (!err_data->err_addr) {
+ /* clear umc status */
+ WREG64_PCIE((mc_umc_status_addr + umc_reg_offset) * 4, 0x0ULL);
+ return 0;
+ }
+
+ /* calculate error address if ue error is detected */
+ if (REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, Val) == 1 &&
+ REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, UECC) == 1) {
+ err_addr = RREG64_PCIE((mc_umc_addrt0 + umc_reg_offset) * 4);
+ err_addr =
+ REG_GET_FIELD(err_addr, MCA_UMC_UMC0_MCUMC_ADDRT0, ErrorAddr);
+
+ umc_v6_7_convert_error_address(adev, err_data, err_addr,
+ ch_inst, umc_inst);
+ }
+
+ /* clear umc status */
+ WREG64_PCIE((mc_umc_status_addr + umc_reg_offset) * 4, 0x0ULL);
+
+ return 0;
+}
+
+static void umc_v6_7_query_ras_error_address(struct amdgpu_device *adev,
+ void *ras_error_status)
+{
+ amdgpu_umc_loop_channels(adev,
+ umc_v6_7_query_error_address, ras_error_status);
+}
+
+static uint32_t umc_v6_7_query_ras_poison_mode_per_channel(
+ struct amdgpu_device *adev,
+ uint32_t umc_reg_offset)
+{
+ uint32_t ecc_ctrl_addr, ecc_ctrl;
+
+ ecc_ctrl_addr =
+ SOC15_REG_OFFSET(UMC, 0, regUMCCH0_0_EccCtrl);
+ ecc_ctrl = RREG32_PCIE((ecc_ctrl_addr +
+ umc_reg_offset) * 4);
+
+ return REG_GET_FIELD(ecc_ctrl, UMCCH0_0_EccCtrl, UCFatalEn);
+}
+
+static bool umc_v6_7_query_ras_poison_mode(struct amdgpu_device *adev)
+{
+ uint32_t umc_reg_offset = 0;
+
+ /* Enabling fatal error in umc instance0 channel0 will be
+ * considered as fatal error mode
+ */
+ umc_reg_offset = get_umc_v6_7_reg_offset(adev, 0, 0);
+ return !umc_v6_7_query_ras_poison_mode_per_channel(adev, umc_reg_offset);
+}
+
+const struct amdgpu_ras_block_hw_ops umc_v6_7_ras_hw_ops = {
+ .query_ras_error_count = umc_v6_7_query_ras_error_count,
+ .query_ras_error_address = umc_v6_7_query_ras_error_address,
+};
+
+struct amdgpu_umc_ras umc_v6_7_ras = {
+ .ras_block = {
+ .hw_ops = &umc_v6_7_ras_hw_ops,
+ },
+ .query_ras_poison_mode = umc_v6_7_query_ras_poison_mode,
+ .ecc_info_query_ras_error_count = umc_v6_7_ecc_info_query_ras_error_count,
+ .ecc_info_query_ras_error_address = umc_v6_7_ecc_info_query_ras_error_address,
+};
diff --git a/drivers/gpu/drm/amd/amdgpu/umc_v6_7.h b/drivers/gpu/drm/amd/amdgpu/umc_v6_7.h
new file mode 100644
index 000000000000..105245d5b6e5
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/umc_v6_7.h
@@ -0,0 +1,77 @@
+/*
+ * Copyright 2021 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+#ifndef __UMC_V6_7_H__
+#define __UMC_V6_7_H__
+
+#include "soc15_common.h"
+#include "amdgpu.h"
+
+/* EccErrCnt max value */
+#define UMC_V6_7_CE_CNT_MAX 0xffff
+/* umc ce interrupt threshold */
+#define UMC_V6_7_CE_INT_THRESHOLD 0xffff
+/* umc ce count initial value */
+#define UMC_V6_7_CE_CNT_INIT (UMC_V6_7_CE_CNT_MAX - UMC_V6_7_CE_INT_THRESHOLD)
+
+#define UMC_V6_7_INST_DIST 0x40000
+
+/* number of umc channel instance with memory map register access */
+#define UMC_V6_7_UMC_INSTANCE_NUM 4
+/* number of umc instance with memory map register access */
+#define UMC_V6_7_CHANNEL_INSTANCE_NUM 8
+/* total channel instances in one umc block */
+#define UMC_V6_7_TOTAL_CHANNEL_NUM (UMC_V6_7_CHANNEL_INSTANCE_NUM * UMC_V6_7_UMC_INSTANCE_NUM)
+/* one piece of normalizing address is mapped to 8 pieces of physical address */
+#define UMC_V6_7_NA_MAP_PA_NUM 8
+/* R14 bit shift should be considered, double the number */
+#define UMC_V6_7_BAD_PAGE_NUM_PER_CHANNEL (UMC_V6_7_NA_MAP_PA_NUM * 2)
+/* The CH4 bit in SOC physical address */
+#define UMC_V6_7_PA_CH4_BIT 12
+/* The C2 bit in SOC physical address */
+#define UMC_V6_7_PA_C2_BIT 17
+/* The R14 bit in SOC physical address */
+#define UMC_V6_7_PA_R14_BIT 34
+/* UMC regiser per channel offset */
+#define UMC_V6_7_PER_CHANNEL_OFFSET 0x400
+
+/* XOR bit 20, 25, 34 of PA into CH4 bit (bit 12 of PA),
+ * hash bit is only effective when related setting is enabled
+ */
+#define CHANNEL_HASH(channel_idx, pa) (((channel_idx) >> 4) ^ \
+ (((pa) >> 20) & 0x1ULL & adev->df.hash_status.hash_64k) ^ \
+ (((pa) >> 25) & 0x1ULL & adev->df.hash_status.hash_2m) ^ \
+ (((pa) >> 34) & 0x1ULL & adev->df.hash_status.hash_1g))
+#define SET_CHANNEL_HASH(channel_idx, pa) do { \
+ (pa) &= ~(0x1ULL << UMC_V6_7_PA_CH4_BIT); \
+ (pa) |= (CHANNEL_HASH(channel_idx, pa) << UMC_V6_7_PA_CH4_BIT); \
+ } while (0)
+
+extern struct amdgpu_umc_ras umc_v6_7_ras;
+extern const uint32_t
+ umc_v6_7_channel_idx_tbl_second[UMC_V6_7_UMC_INSTANCE_NUM][UMC_V6_7_CHANNEL_INSTANCE_NUM];
+extern const uint32_t
+ umc_v6_7_channel_idx_tbl_first[UMC_V6_7_UMC_INSTANCE_NUM][UMC_V6_7_CHANNEL_INSTANCE_NUM];
+void umc_v6_7_convert_error_address(struct amdgpu_device *adev,
+ struct ras_err_data *err_data, uint64_t err_addr,
+ uint32_t ch_inst, uint32_t umc_inst);
+#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/umc_v8_10.c b/drivers/gpu/drm/amd/amdgpu/umc_v8_10.c
new file mode 100644
index 000000000000..a32f87992f20
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/umc_v8_10.c
@@ -0,0 +1,458 @@
+/*
+ * Copyright 2022 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+#include "umc_v8_10.h"
+#include "amdgpu_ras.h"
+#include "amdgpu_umc.h"
+#include "amdgpu.h"
+#include "umc/umc_8_10_0_offset.h"
+#include "umc/umc_8_10_0_sh_mask.h"
+
+#define UMC_8_NODE_DIST 0x800000
+#define UMC_8_INST_DIST 0x4000
+
+struct channelnum_map_colbit {
+ uint32_t channel_num;
+ uint32_t col_bit;
+};
+
+const struct channelnum_map_colbit umc_v8_10_channelnum_map_colbit_table[] = {
+ {24, 13},
+ {20, 13},
+ {16, 12},
+ {14, 12},
+ {12, 12},
+ {10, 12},
+ {6, 11},
+};
+
+const uint32_t
+ umc_v8_10_channel_idx_tbl_ext0[]
+ [UMC_V8_10_UMC_INSTANCE_NUM]
+ [UMC_V8_10_CHANNEL_INSTANCE_NUM] = {
+ {{1, 5}, {7, 3}},
+ {{14, 15}, {13, 12}},
+ {{10, 11}, {9, 8}},
+ {{6, 2}, {0, 4}}
+ };
+
+const uint32_t
+ umc_v8_10_channel_idx_tbl[]
+ [UMC_V8_10_UMC_INSTANCE_NUM]
+ [UMC_V8_10_CHANNEL_INSTANCE_NUM] = {
+ {{16, 18}, {17, 19}},
+ {{15, 11}, {3, 7}},
+ {{1, 5}, {13, 9}},
+ {{23, 21}, {22, 20}},
+ {{0, 4}, {12, 8}},
+ {{14, 10}, {2, 6}}
+ };
+
+static inline uint32_t get_umc_v8_10_reg_offset(struct amdgpu_device *adev,
+ uint32_t node_inst,
+ uint32_t umc_inst,
+ uint32_t ch_inst)
+{
+ return adev->umc.channel_offs * ch_inst + UMC_8_INST_DIST * umc_inst +
+ UMC_8_NODE_DIST * node_inst;
+}
+
+static int umc_v8_10_clear_error_count_per_channel(struct amdgpu_device *adev,
+ uint32_t node_inst, uint32_t umc_inst,
+ uint32_t ch_inst, void *data)
+{
+ uint32_t ecc_err_cnt_addr;
+ uint32_t umc_reg_offset =
+ get_umc_v8_10_reg_offset(adev, node_inst, umc_inst, ch_inst);
+
+ ecc_err_cnt_addr =
+ SOC15_REG_OFFSET(UMC, 0, regUMCCH0_0_GeccErrCnt);
+
+ /* clear error count */
+ WREG32_PCIE((ecc_err_cnt_addr + umc_reg_offset) * 4,
+ UMC_V8_10_CE_CNT_INIT);
+
+ return 0;
+}
+
+static void umc_v8_10_clear_error_count(struct amdgpu_device *adev)
+{
+ amdgpu_umc_loop_channels(adev,
+ umc_v8_10_clear_error_count_per_channel, NULL);
+}
+
+static void umc_v8_10_query_correctable_error_count(struct amdgpu_device *adev,
+ uint32_t umc_reg_offset,
+ unsigned long *error_count)
+{
+ uint64_t mc_umc_status;
+ uint32_t mc_umc_status_addr;
+
+ /* UMC 8_10 registers */
+ mc_umc_status_addr =
+ SOC15_REG_OFFSET(UMC, 0, regMCA_UMC_UMC0_MCUMC_STATUST0);
+
+ /* Rely on MCUMC_STATUS for correctable error counter
+ * MCUMC_STATUS is a 64 bit register
+ */
+ mc_umc_status = RREG64_PCIE((mc_umc_status_addr + umc_reg_offset) * 4);
+ if (REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, Val) == 1 &&
+ REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, CECC) == 1)
+ *error_count += 1;
+}
+
+static void umc_v8_10_query_uncorrectable_error_count(struct amdgpu_device *adev,
+ uint32_t umc_reg_offset,
+ unsigned long *error_count)
+{
+ uint64_t mc_umc_status;
+ uint32_t mc_umc_status_addr;
+
+ mc_umc_status_addr = SOC15_REG_OFFSET(UMC, 0, regMCA_UMC_UMC0_MCUMC_STATUST0);
+
+ /* Check the MCUMC_STATUS. */
+ mc_umc_status = RREG64_PCIE((mc_umc_status_addr + umc_reg_offset) * 4);
+ if ((REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, Val) == 1) &&
+ (REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, Deferred) == 1 ||
+ REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, UECC) == 1 ||
+ REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, PCC) == 1 ||
+ REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, UC) == 1 ||
+ REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, TCC) == 1))
+ *error_count += 1;
+}
+
+static int umc_v8_10_query_ecc_error_count(struct amdgpu_device *adev,
+ uint32_t node_inst, uint32_t umc_inst,
+ uint32_t ch_inst, void *data)
+{
+ struct ras_err_data *err_data = (struct ras_err_data *)data;
+ uint32_t umc_reg_offset =
+ get_umc_v8_10_reg_offset(adev, node_inst, umc_inst, ch_inst);
+
+ umc_v8_10_query_correctable_error_count(adev,
+ umc_reg_offset,
+ &(err_data->ce_count));
+ umc_v8_10_query_uncorrectable_error_count(adev,
+ umc_reg_offset,
+ &(err_data->ue_count));
+
+ return 0;
+}
+
+static void umc_v8_10_query_ras_error_count(struct amdgpu_device *adev,
+ void *ras_error_status)
+{
+ amdgpu_umc_loop_channels(adev,
+ umc_v8_10_query_ecc_error_count, ras_error_status);
+
+ umc_v8_10_clear_error_count(adev);
+}
+
+static uint32_t umc_v8_10_get_col_bit(uint32_t channel_num)
+{
+ uint32_t t = 0;
+
+ for (t = 0; t < ARRAY_SIZE(umc_v8_10_channelnum_map_colbit_table); t++)
+ if (channel_num == umc_v8_10_channelnum_map_colbit_table[t].channel_num)
+ return umc_v8_10_channelnum_map_colbit_table[t].col_bit;
+
+ /* Failed to get col_bit. */
+ return U32_MAX;
+}
+
+/*
+ * Mapping normal address to soc physical address in swizzle mode.
+ */
+static int umc_v8_10_swizzle_mode_na_to_pa(struct amdgpu_device *adev,
+ uint32_t channel_idx,
+ uint64_t na, uint64_t *soc_pa)
+{
+ uint32_t channel_num = UMC_V8_10_TOTAL_CHANNEL_NUM(adev);
+ uint32_t col_bit = umc_v8_10_get_col_bit(channel_num);
+ uint64_t tmp_addr;
+
+ if (col_bit == U32_MAX)
+ return -1;
+
+ tmp_addr = SWIZZLE_MODE_TMP_ADDR(na, channel_num, channel_idx);
+ *soc_pa = SWIZZLE_MODE_ADDR_HI(tmp_addr, col_bit) |
+ SWIZZLE_MODE_ADDR_MID(na, col_bit) |
+ SWIZZLE_MODE_ADDR_LOW(tmp_addr, col_bit) |
+ SWIZZLE_MODE_ADDR_LSB(na);
+
+ return 0;
+}
+
+static void umc_v8_10_convert_error_address(struct amdgpu_device *adev,
+ struct ras_err_data *err_data, uint64_t err_addr,
+ uint32_t ch_inst, uint32_t umc_inst,
+ uint32_t node_inst, uint64_t mc_umc_status)
+{
+ uint64_t na_err_addr_base;
+ uint64_t na_err_addr, retired_page_addr;
+ uint32_t channel_index, addr_lsb, col = 0;
+ int ret = 0;
+
+ channel_index =
+ adev->umc.channel_idx_tbl[node_inst * adev->umc.umc_inst_num *
+ adev->umc.channel_inst_num +
+ umc_inst * adev->umc.channel_inst_num +
+ ch_inst];
+
+ /* the lowest lsb bits should be ignored */
+ addr_lsb = REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, AddrLsb);
+ err_addr &= ~((0x1ULL << addr_lsb) - 1);
+ na_err_addr_base = err_addr & ~(0x3ULL << UMC_V8_10_NA_C5_BIT);
+
+ /* loop for all possibilities of [C6 C5] in normal address. */
+ for (col = 0; col < UMC_V8_10_NA_COL_2BITS_POWER_OF_2_NUM; col++) {
+ na_err_addr = na_err_addr_base | (col << UMC_V8_10_NA_C5_BIT);
+
+ /* Mapping normal error address to retired soc physical address. */
+ ret = umc_v8_10_swizzle_mode_na_to_pa(adev, channel_index,
+ na_err_addr, &retired_page_addr);
+ if (ret) {
+ dev_err(adev->dev, "Failed to map pa from umc na.\n");
+ break;
+ }
+ dev_info(adev->dev, "Error Address(PA): 0x%llx\n",
+ retired_page_addr);
+ amdgpu_umc_fill_error_record(err_data, na_err_addr,
+ retired_page_addr, channel_index, umc_inst);
+ }
+}
+
+static int umc_v8_10_query_error_address(struct amdgpu_device *adev,
+ uint32_t node_inst, uint32_t umc_inst,
+ uint32_t ch_inst, void *data)
+{
+ uint64_t mc_umc_status_addr;
+ uint64_t mc_umc_status, err_addr;
+ uint64_t mc_umc_addrt0;
+ struct ras_err_data *err_data = (struct ras_err_data *)data;
+ uint32_t umc_reg_offset =
+ get_umc_v8_10_reg_offset(adev, node_inst, umc_inst, ch_inst);
+
+ mc_umc_status_addr =
+ SOC15_REG_OFFSET(UMC, 0, regMCA_UMC_UMC0_MCUMC_STATUST0);
+ mc_umc_status = RREG64_PCIE((mc_umc_status_addr + umc_reg_offset) * 4);
+
+ if (mc_umc_status == 0)
+ return 0;
+
+ if (!err_data->err_addr) {
+ /* clear umc status */
+ WREG64_PCIE((mc_umc_status_addr + umc_reg_offset) * 4, 0x0ULL);
+ return 0;
+ }
+
+ /* calculate error address if ue error is detected */
+ if (REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, Val) == 1 &&
+ REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, AddrV) == 1 &&
+ REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, UECC) == 1) {
+
+ mc_umc_addrt0 = SOC15_REG_OFFSET(UMC, 0, regMCA_UMC_UMC0_MCUMC_ADDRT0);
+ err_addr = RREG64_PCIE((mc_umc_addrt0 + umc_reg_offset) * 4);
+ err_addr = REG_GET_FIELD(err_addr, MCA_UMC_UMC0_MCUMC_ADDRT0, ErrorAddr);
+
+ umc_v8_10_convert_error_address(adev, err_data, err_addr,
+ ch_inst, umc_inst, node_inst, mc_umc_status);
+ }
+
+ /* clear umc status */
+ WREG64_PCIE((mc_umc_status_addr + umc_reg_offset) * 4, 0x0ULL);
+
+ return 0;
+}
+
+static void umc_v8_10_query_ras_error_address(struct amdgpu_device *adev,
+ void *ras_error_status)
+{
+ amdgpu_umc_loop_channels(adev,
+ umc_v8_10_query_error_address, ras_error_status);
+}
+
+static int umc_v8_10_err_cnt_init_per_channel(struct amdgpu_device *adev,
+ uint32_t node_inst, uint32_t umc_inst,
+ uint32_t ch_inst, void *data)
+{
+ uint32_t ecc_err_cnt_sel, ecc_err_cnt_sel_addr;
+ uint32_t ecc_err_cnt_addr;
+ uint32_t umc_reg_offset =
+ get_umc_v8_10_reg_offset(adev, node_inst, umc_inst, ch_inst);
+
+ ecc_err_cnt_sel_addr =
+ SOC15_REG_OFFSET(UMC, 0, regUMCCH0_0_GeccErrCntSel);
+ ecc_err_cnt_addr =
+ SOC15_REG_OFFSET(UMC, 0, regUMCCH0_0_GeccErrCnt);
+
+ ecc_err_cnt_sel = RREG32_PCIE((ecc_err_cnt_sel_addr + umc_reg_offset) * 4);
+
+ /* set ce error interrupt type to APIC based interrupt */
+ ecc_err_cnt_sel = REG_SET_FIELD(ecc_err_cnt_sel, UMCCH0_0_GeccErrCntSel,
+ GeccErrInt, 0x1);
+ WREG32_PCIE((ecc_err_cnt_sel_addr + umc_reg_offset) * 4, ecc_err_cnt_sel);
+ /* set error count to initial value */
+ WREG32_PCIE((ecc_err_cnt_addr + umc_reg_offset) * 4, UMC_V8_10_CE_CNT_INIT);
+
+ return 0;
+}
+
+static void umc_v8_10_err_cnt_init(struct amdgpu_device *adev)
+{
+ amdgpu_umc_loop_channels(adev,
+ umc_v8_10_err_cnt_init_per_channel, NULL);
+}
+
+static bool umc_v8_10_query_ras_poison_mode(struct amdgpu_device *adev)
+{
+ /*
+ * Force return true, because UMCCH0_0_GeccCtrl
+ * is not accessible from host side
+ */
+ return true;
+}
+
+static void umc_v8_10_ecc_info_query_correctable_error_count(struct amdgpu_device *adev,
+ uint32_t node_inst, uint32_t umc_inst, uint32_t ch_inst,
+ unsigned long *error_count)
+{
+ uint16_t ecc_ce_cnt;
+ uint32_t eccinfo_table_idx;
+ struct amdgpu_ras *ras = amdgpu_ras_get_context(adev);
+
+ eccinfo_table_idx = node_inst * adev->umc.umc_inst_num *
+ adev->umc.channel_inst_num +
+ umc_inst * adev->umc.channel_inst_num +
+ ch_inst;
+
+ /* Retrieve CE count */
+ ecc_ce_cnt = ras->umc_ecc.ecc[eccinfo_table_idx].ce_count_lo_chip;
+ if (ecc_ce_cnt)
+ *error_count += ecc_ce_cnt;
+}
+
+static void umc_v8_10_ecc_info_query_uncorrectable_error_count(struct amdgpu_device *adev,
+ uint32_t node_inst, uint32_t umc_inst, uint32_t ch_inst,
+ unsigned long *error_count)
+{
+ uint64_t mc_umc_status;
+ uint32_t eccinfo_table_idx;
+ struct amdgpu_ras *ras = amdgpu_ras_get_context(adev);
+
+ eccinfo_table_idx = node_inst * adev->umc.umc_inst_num *
+ adev->umc.channel_inst_num +
+ umc_inst * adev->umc.channel_inst_num +
+ ch_inst;
+
+ /* check the MCUMC_STATUS */
+ mc_umc_status = ras->umc_ecc.ecc[eccinfo_table_idx].mca_umc_status;
+ if ((REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, Val) == 1) &&
+ (REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, Deferred) == 1 ||
+ REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, UECC) == 1 ||
+ REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, PCC) == 1 ||
+ REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, UC) == 1 ||
+ REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, TCC) == 1)) {
+ *error_count += 1;
+ }
+}
+
+static int umc_v8_10_ecc_info_query_ecc_error_count(struct amdgpu_device *adev,
+ uint32_t node_inst, uint32_t umc_inst,
+ uint32_t ch_inst, void *data)
+{
+ struct ras_err_data *err_data = (struct ras_err_data *)data;
+
+ umc_v8_10_ecc_info_query_correctable_error_count(adev,
+ node_inst, umc_inst, ch_inst,
+ &(err_data->ce_count));
+ umc_v8_10_ecc_info_query_uncorrectable_error_count(adev,
+ node_inst, umc_inst, ch_inst,
+ &(err_data->ue_count));
+ return 0;
+}
+
+static void umc_v8_10_ecc_info_query_ras_error_count(struct amdgpu_device *adev,
+ void *ras_error_status)
+{
+ amdgpu_umc_loop_channels(adev,
+ umc_v8_10_ecc_info_query_ecc_error_count, ras_error_status);
+}
+
+static int umc_v8_10_ecc_info_query_error_address(struct amdgpu_device *adev,
+ uint32_t node_inst, uint32_t umc_inst,
+ uint32_t ch_inst, void *data)
+{
+ uint32_t eccinfo_table_idx;
+ uint64_t mc_umc_status, err_addr;
+ struct ras_err_data *err_data = (struct ras_err_data *)data;
+ struct amdgpu_ras *ras = amdgpu_ras_get_context(adev);
+
+ eccinfo_table_idx = node_inst * adev->umc.umc_inst_num *
+ adev->umc.channel_inst_num +
+ umc_inst * adev->umc.channel_inst_num +
+ ch_inst;
+
+ mc_umc_status = ras->umc_ecc.ecc[eccinfo_table_idx].mca_umc_status;
+
+ if (mc_umc_status == 0)
+ return 0;
+
+ if (!err_data->err_addr)
+ return 0;
+
+ /* calculate error address if ue error is detected */
+ if (REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, Val) == 1 &&
+ REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, AddrV) == 1 &&
+ (REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, UECC) == 1)) {
+
+ err_addr = ras->umc_ecc.ecc[eccinfo_table_idx].mca_umc_addr;
+ err_addr = REG_GET_FIELD(err_addr, MCA_UMC_UMC0_MCUMC_ADDRT0, ErrorAddr);
+
+ umc_v8_10_convert_error_address(adev, err_data, err_addr,
+ ch_inst, umc_inst, node_inst, mc_umc_status);
+ }
+
+ return 0;
+}
+
+static void umc_v8_10_ecc_info_query_ras_error_address(struct amdgpu_device *adev,
+ void *ras_error_status)
+{
+ amdgpu_umc_loop_channels(adev,
+ umc_v8_10_ecc_info_query_error_address, ras_error_status);
+}
+
+const struct amdgpu_ras_block_hw_ops umc_v8_10_ras_hw_ops = {
+ .query_ras_error_count = umc_v8_10_query_ras_error_count,
+ .query_ras_error_address = umc_v8_10_query_ras_error_address,
+};
+
+struct amdgpu_umc_ras umc_v8_10_ras = {
+ .ras_block = {
+ .hw_ops = &umc_v8_10_ras_hw_ops,
+ },
+ .err_cnt_init = umc_v8_10_err_cnt_init,
+ .query_ras_poison_mode = umc_v8_10_query_ras_poison_mode,
+ .ecc_info_query_ras_error_count = umc_v8_10_ecc_info_query_ras_error_count,
+ .ecc_info_query_ras_error_address = umc_v8_10_ecc_info_query_ras_error_address,
+};
diff --git a/drivers/gpu/drm/amd/amdgpu/umc_v8_10.h b/drivers/gpu/drm/amd/amdgpu/umc_v8_10.h
new file mode 100644
index 000000000000..dc12e0af5451
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/umc_v8_10.h
@@ -0,0 +1,75 @@
+/*
+ * Copyright 2022 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+#ifndef __UMC_V8_10_H__
+#define __UMC_V8_10_H__
+
+#include "soc15_common.h"
+#include "amdgpu.h"
+
+/* number of umc channel instance with memory map register access */
+#define UMC_V8_10_CHANNEL_INSTANCE_NUM 2
+/* number of umc instance with memory map register access */
+#define UMC_V8_10_UMC_INSTANCE_NUM 2
+
+/* Total channel instances for all available umc nodes */
+#define UMC_V8_10_TOTAL_CHANNEL_NUM(adev) \
+ (UMC_V8_10_CHANNEL_INSTANCE_NUM * UMC_V8_10_UMC_INSTANCE_NUM * \
+ (adev)->gmc.num_umc - hweight32((adev)->gmc.m_half_use) * 2)
+
+/* UMC regiser per channel offset */
+#define UMC_V8_10_PER_CHANNEL_OFFSET 0x400
+
+/* EccErrCnt max value */
+#define UMC_V8_10_CE_CNT_MAX 0xffff
+/* umc ce interrupt threshold */
+#define UUMC_V8_10_CE_INT_THRESHOLD 0xffff
+/* umc ce count initial value */
+#define UMC_V8_10_CE_CNT_INIT (UMC_V8_10_CE_CNT_MAX - UUMC_V8_10_CE_INT_THRESHOLD)
+
+#define UMC_V8_10_NA_COL_2BITS_POWER_OF_2_NUM 4
+
+/* The C5 bit in NA address */
+#define UMC_V8_10_NA_C5_BIT 14
+
+/* Map to swizzle mode address */
+#define SWIZZLE_MODE_TMP_ADDR(na, ch_num, ch_idx) \
+ ((((na) >> 10) * (ch_num) + (ch_idx)) << 10)
+#define SWIZZLE_MODE_ADDR_HI(addr, col_bit) \
+ (((addr) >> ((col_bit) + 2)) << ((col_bit) + 2))
+#define SWIZZLE_MODE_ADDR_MID(na, col_bit) ((((na) >> 8) & 0x3) << (col_bit))
+#define SWIZZLE_MODE_ADDR_LOW(addr, col_bit) \
+ ((((addr) >> 10) & ((0x1ULL << (col_bit - 8)) - 1)) << 8)
+#define SWIZZLE_MODE_ADDR_LSB(na) ((na) & 0xFF)
+
+extern struct amdgpu_umc_ras umc_v8_10_ras;
+extern const uint32_t
+ umc_v8_10_channel_idx_tbl[]
+ [UMC_V8_10_UMC_INSTANCE_NUM]
+ [UMC_V8_10_CHANNEL_INSTANCE_NUM];
+
+extern const uint32_t
+ umc_v8_10_channel_idx_tbl_ext0[]
+ [UMC_V8_10_UMC_INSTANCE_NUM]
+ [UMC_V8_10_CHANNEL_INSTANCE_NUM];
+#endif
+
diff --git a/drivers/gpu/drm/amd/amdgpu/umc_v8_14.c b/drivers/gpu/drm/amd/amdgpu/umc_v8_14.c
new file mode 100644
index 000000000000..eaca10a3c4a9
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/umc_v8_14.c
@@ -0,0 +1,160 @@
+/*
+ * Copyright 2024 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+#include "umc_v8_14.h"
+#include "amdgpu_ras.h"
+#include "amdgpu_umc.h"
+#include "amdgpu.h"
+#include "umc/umc_8_14_0_offset.h"
+#include "umc/umc_8_14_0_sh_mask.h"
+
+static inline uint32_t get_umc_v8_14_reg_offset(struct amdgpu_device *adev,
+ uint32_t umc_inst,
+ uint32_t ch_inst)
+{
+ return adev->umc.channel_offs * ch_inst + UMC_V8_14_INST_DIST * umc_inst;
+}
+
+static int umc_v8_14_clear_error_count_per_channel(struct amdgpu_device *adev,
+ uint32_t node_inst, uint32_t umc_inst,
+ uint32_t ch_inst, void *data)
+{
+ uint32_t ecc_err_cnt_addr;
+ uint32_t umc_reg_offset =
+ get_umc_v8_14_reg_offset(adev, umc_inst, ch_inst);
+
+ ecc_err_cnt_addr =
+ SOC15_REG_OFFSET(UMC, 0, regUMCCH0_GeccErrCnt);
+
+ /* clear error count */
+ WREG32_PCIE((ecc_err_cnt_addr + umc_reg_offset) * 4,
+ UMC_V8_14_CE_CNT_INIT);
+
+ return 0;
+}
+
+static void umc_v8_14_clear_error_count(struct amdgpu_device *adev)
+{
+ amdgpu_umc_loop_channels(adev,
+ umc_v8_14_clear_error_count_per_channel, NULL);
+}
+
+static void umc_v8_14_query_correctable_error_count(struct amdgpu_device *adev,
+ uint32_t umc_reg_offset,
+ unsigned long *error_count)
+{
+ uint32_t ecc_err_cnt, ecc_err_cnt_addr;
+
+ /* UMC 8_14 registers */
+ ecc_err_cnt_addr =
+ SOC15_REG_OFFSET(UMC, 0, regUMCCH0_GeccErrCnt);
+
+ ecc_err_cnt = RREG32_PCIE((ecc_err_cnt_addr + umc_reg_offset) * 4);
+ *error_count +=
+ (REG_GET_FIELD(ecc_err_cnt, UMCCH0_GeccErrCnt, GeccErrCnt) -
+ UMC_V8_14_CE_CNT_INIT);
+}
+
+static void umc_v8_14_query_uncorrectable_error_count(struct amdgpu_device *adev,
+ uint32_t umc_reg_offset,
+ unsigned long *error_count)
+{
+ uint32_t ecc_err_cnt, ecc_err_cnt_addr;
+ /* UMC 8_14 registers */
+ ecc_err_cnt_addr =
+ SOC15_REG_OFFSET(UMC, 0, regUMCCH0_GeccErrCnt);
+
+ ecc_err_cnt = RREG32_PCIE((ecc_err_cnt_addr + umc_reg_offset) * 4);
+ *error_count +=
+ (REG_GET_FIELD(ecc_err_cnt, UMCCH0_GeccErrCnt, GeccUnCorrErrCnt) -
+ UMC_V8_14_CE_CNT_INIT);
+}
+
+static int umc_v8_14_query_error_count_per_channel(struct amdgpu_device *adev,
+ uint32_t node_inst, uint32_t umc_inst,
+ uint32_t ch_inst, void *data)
+{
+ struct ras_err_data *err_data = (struct ras_err_data *)data;
+ uint32_t umc_reg_offset =
+ get_umc_v8_14_reg_offset(adev, umc_inst, ch_inst);
+
+ umc_v8_14_query_correctable_error_count(adev,
+ umc_reg_offset,
+ &(err_data->ce_count));
+ umc_v8_14_query_uncorrectable_error_count(adev,
+ umc_reg_offset,
+ &(err_data->ue_count));
+
+ return 0;
+}
+
+static void umc_v8_14_query_ras_error_count(struct amdgpu_device *adev,
+ void *ras_error_status)
+{
+ amdgpu_umc_loop_channels(adev,
+ umc_v8_14_query_error_count_per_channel, ras_error_status);
+
+ umc_v8_14_clear_error_count(adev);
+}
+
+static int umc_v8_14_err_cnt_init_per_channel(struct amdgpu_device *adev,
+ uint32_t node_inst, uint32_t umc_inst,
+ uint32_t ch_inst, void *data)
+{
+ uint32_t ecc_err_cnt_sel, ecc_err_cnt_sel_addr;
+ uint32_t ecc_err_cnt_addr;
+ uint32_t umc_reg_offset =
+ get_umc_v8_14_reg_offset(adev, umc_inst, ch_inst);
+
+ ecc_err_cnt_sel_addr =
+ SOC15_REG_OFFSET(UMC, 0, regUMCCH0_GeccErrCntSel);
+ ecc_err_cnt_addr =
+ SOC15_REG_OFFSET(UMC, 0, regUMCCH0_GeccErrCnt);
+
+ ecc_err_cnt_sel = RREG32_PCIE((ecc_err_cnt_sel_addr + umc_reg_offset) * 4);
+
+ /* set ce error interrupt type to APIC based interrupt */
+ ecc_err_cnt_sel = REG_SET_FIELD(ecc_err_cnt_sel, UMCCH0_GeccErrCntSel,
+ GeccErrInt, 0x1);
+ WREG32_PCIE((ecc_err_cnt_sel_addr + umc_reg_offset) * 4, ecc_err_cnt_sel);
+ /* set error count to initial value */
+ WREG32_PCIE((ecc_err_cnt_addr + umc_reg_offset) * 4, UMC_V8_14_CE_CNT_INIT);
+
+ return 0;
+}
+
+static void umc_v8_14_err_cnt_init(struct amdgpu_device *adev)
+{
+ amdgpu_umc_loop_channels(adev,
+ umc_v8_14_err_cnt_init_per_channel, NULL);
+}
+
+const struct amdgpu_ras_block_hw_ops umc_v8_14_ras_hw_ops = {
+ .query_ras_error_count = umc_v8_14_query_ras_error_count,
+};
+
+struct amdgpu_umc_ras umc_v8_14_ras = {
+ .ras_block = {
+ .hw_ops = &umc_v8_14_ras_hw_ops,
+ },
+ .err_cnt_init = umc_v8_14_err_cnt_init,
+};
diff --git a/drivers/gpu/drm/amd/amdgpu/umc_v8_14.h b/drivers/gpu/drm/amd/amdgpu/umc_v8_14.h
new file mode 100644
index 000000000000..20a258f0017a
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/umc_v8_14.h
@@ -0,0 +1,51 @@
+/*
+ * Copyright 2024 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+#ifndef __UMC_V8_14_H__
+#define __UMC_V8_14_H__
+
+#include "soc15_common.h"
+#include "amdgpu.h"
+
+/* number of umc channel instance with memory map register access */
+#define UMC_V8_14_CHANNEL_INSTANCE_NUM 2
+/* number of umc instance with memory map register access */
+#define UMC_V8_14_UMC_INSTANCE_NUM(adev) ((adev)->umc.node_inst_num)
+
+/* Total channel instances for all available umc nodes */
+#define UMC_V8_14_TOTAL_CHANNEL_NUM(adev) \
+ (UMC_V8_14_CHANNEL_INSTANCE_NUM * (adev)->gmc.num_umc)
+
+/* UMC register per channel offset */
+#define UMC_V8_14_PER_CHANNEL_OFFSET 0x400
+
+#define UMC_V8_14_INST_DIST 0x40000
+
+/* EccErrCnt max value */
+#define UMC_V8_14_CE_CNT_MAX 0xffff
+/* umc ce interrupt threshold */
+#define UMC_V8_14_CE_INT_THRESHOLD 0xffff
+/* umc ce count initial value */
+#define UMC_V8_14_CE_CNT_INIT (UMC_V8_14_CE_CNT_MAX - UMC_V8_14_CE_INT_THRESHOLD)
+
+extern struct amdgpu_umc_ras umc_v8_14_ras;
+#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/umc_v8_7.c b/drivers/gpu/drm/amd/amdgpu/umc_v8_7.c
index a064c097690c..b717fdaa46e4 100644
--- a/drivers/gpu/drm/amd/amdgpu/umc_v8_7.c
+++ b/drivers/gpu/drm/amd/amdgpu/umc_v8_7.c
@@ -22,6 +22,7 @@
*/
#include "umc_v8_7.h"
#include "amdgpu_ras.h"
+#include "amdgpu_umc.h"
#include "amdgpu.h"
#include "rsmu/rsmu_0_0_2_offset.h"
@@ -39,13 +40,143 @@ const uint32_t
{9, 0}, {15, 6}
};
-static inline uint32_t get_umc_8_reg_offset(struct amdgpu_device *adev,
+static inline uint32_t get_umc_v8_7_reg_offset(struct amdgpu_device *adev,
uint32_t umc_inst,
uint32_t ch_inst)
{
return adev->umc.channel_offs*ch_inst + UMC_8_INST_DIST*umc_inst;
}
+static void umc_v8_7_ecc_info_query_correctable_error_count(struct amdgpu_device *adev,
+ uint32_t umc_inst, uint32_t ch_inst,
+ unsigned long *error_count)
+{
+ uint64_t mc_umc_status;
+ uint32_t eccinfo_table_idx;
+ struct amdgpu_ras *ras = amdgpu_ras_get_context(adev);
+
+ eccinfo_table_idx = umc_inst * adev->umc.channel_inst_num + ch_inst;
+
+ /* check for SRAM correctable error
+ * MCUMC_STATUS is a 64 bit register
+ */
+ mc_umc_status = ras->umc_ecc.ecc[eccinfo_table_idx].mca_umc_status;
+ if (REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, Val) == 1 &&
+ REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, CECC) == 1)
+ *error_count += 1;
+}
+
+static void umc_v8_7_ecc_info_querry_uncorrectable_error_count(struct amdgpu_device *adev,
+ uint32_t umc_inst, uint32_t ch_inst,
+ unsigned long *error_count)
+{
+ uint64_t mc_umc_status;
+ uint32_t eccinfo_table_idx;
+ struct amdgpu_ras *ras = amdgpu_ras_get_context(adev);
+
+ eccinfo_table_idx = umc_inst * adev->umc.channel_inst_num + ch_inst;
+
+ /* check the MCUMC_STATUS */
+ mc_umc_status = ras->umc_ecc.ecc[eccinfo_table_idx].mca_umc_status;
+ if ((REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, Val) == 1) &&
+ (REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, Deferred) == 1 ||
+ REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, UECC) == 1 ||
+ REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, PCC) == 1 ||
+ REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, UC) == 1 ||
+ REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, TCC) == 1))
+ *error_count += 1;
+}
+
+static void umc_v8_7_ecc_info_query_ras_error_count(struct amdgpu_device *adev,
+ void *ras_error_status)
+{
+ struct ras_err_data *err_data = (struct ras_err_data *)ras_error_status;
+
+ uint32_t umc_inst = 0;
+ uint32_t ch_inst = 0;
+
+ /* TODO: driver needs to toggle DF Cstate to ensure
+ * safe access of UMC registers. Will add the protection
+ */
+ LOOP_UMC_INST_AND_CH(umc_inst, ch_inst) {
+ umc_v8_7_ecc_info_query_correctable_error_count(adev,
+ umc_inst, ch_inst,
+ &(err_data->ce_count));
+ umc_v8_7_ecc_info_querry_uncorrectable_error_count(adev,
+ umc_inst, ch_inst,
+ &(err_data->ue_count));
+ }
+}
+
+static void umc_v8_7_convert_error_address(struct amdgpu_device *adev,
+ struct ras_err_data *err_data, uint64_t err_addr,
+ uint32_t ch_inst, uint32_t umc_inst)
+{
+ uint64_t retired_page;
+ uint32_t channel_index;
+
+ channel_index =
+ adev->umc.channel_idx_tbl[umc_inst * adev->umc.channel_inst_num + ch_inst];
+
+ /* translate umc channel address to soc pa, 3 parts are included */
+ retired_page = ADDR_OF_4KB_BLOCK(err_addr) |
+ ADDR_OF_256B_BLOCK(channel_index) |
+ OFFSET_IN_256B_BLOCK(err_addr);
+
+ amdgpu_umc_fill_error_record(err_data, err_addr,
+ retired_page, channel_index, umc_inst);
+}
+
+static void umc_v8_7_ecc_info_query_error_address(struct amdgpu_device *adev,
+ struct ras_err_data *err_data,
+ uint32_t ch_inst,
+ uint32_t umc_inst)
+{
+ uint64_t mc_umc_status, err_addr;
+ uint32_t eccinfo_table_idx;
+ struct amdgpu_ras *ras = amdgpu_ras_get_context(adev);
+
+ eccinfo_table_idx = umc_inst * adev->umc.channel_inst_num + ch_inst;
+ mc_umc_status = ras->umc_ecc.ecc[eccinfo_table_idx].mca_umc_status;
+
+ if (mc_umc_status == 0)
+ return;
+
+ if (!err_data->err_addr)
+ return;
+
+ /* calculate error address if ue error is detected */
+ if (REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, Val) == 1 &&
+ REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, UECC) == 1) {
+
+ err_addr = ras->umc_ecc.ecc[eccinfo_table_idx].mca_umc_addr;
+ err_addr = REG_GET_FIELD(err_addr, MCA_UMC_UMC0_MCUMC_ADDRT0, ErrorAddr);
+
+ umc_v8_7_convert_error_address(adev, err_data, err_addr,
+ ch_inst, umc_inst);
+ }
+}
+
+static void umc_v8_7_ecc_info_query_ras_error_address(struct amdgpu_device *adev,
+ void *ras_error_status)
+{
+ struct ras_err_data *err_data = (struct ras_err_data *)ras_error_status;
+
+ uint32_t umc_inst = 0;
+ uint32_t ch_inst = 0;
+
+ /* TODO: driver needs to toggle DF Cstate to ensure
+ * safe access of UMC resgisters. Will add the protection
+ * when firmware interface is ready
+ */
+ LOOP_UMC_INST_AND_CH(umc_inst, ch_inst) {
+ umc_v8_7_ecc_info_query_error_address(adev,
+ err_data,
+ ch_inst,
+ umc_inst);
+ }
+}
+
static void umc_v8_7_clear_error_count_per_channel(struct amdgpu_device *adev,
uint32_t umc_reg_offset)
{
@@ -91,7 +222,7 @@ static void umc_v8_7_clear_error_count(struct amdgpu_device *adev)
uint32_t umc_reg_offset = 0;
LOOP_UMC_INST_AND_CH(umc_inst, ch_inst) {
- umc_reg_offset = get_umc_8_reg_offset(adev,
+ umc_reg_offset = get_umc_v8_7_reg_offset(adev,
umc_inst,
ch_inst);
@@ -177,7 +308,7 @@ static void umc_v8_7_query_ras_error_count(struct amdgpu_device *adev,
uint32_t umc_reg_offset = 0;
LOOP_UMC_INST_AND_CH(umc_inst, ch_inst) {
- umc_reg_offset = get_umc_8_reg_offset(adev,
+ umc_reg_offset = get_umc_v8_7_reg_offset(adev,
umc_inst,
ch_inst);
@@ -199,15 +330,12 @@ static void umc_v8_7_query_error_address(struct amdgpu_device *adev,
uint32_t umc_inst)
{
uint32_t lsb, mc_umc_status_addr;
- uint64_t mc_umc_status, err_addr, retired_page, mc_umc_addrt0;
- struct eeprom_table_record *err_rec;
- uint32_t channel_index = adev->umc.channel_idx_tbl[umc_inst * adev->umc.channel_inst_num + ch_inst];
+ uint64_t mc_umc_status, err_addr, mc_umc_addrt0;
mc_umc_status_addr =
SOC15_REG_OFFSET(UMC, 0, mmMCA_UMC_UMC0_MCUMC_STATUST0);
mc_umc_addrt0 =
SOC15_REG_OFFSET(UMC, 0, mmMCA_UMC_UMC0_MCUMC_ADDRT0);
-
mc_umc_status = RREG64_PCIE((mc_umc_status_addr + umc_reg_offset) * 4);
if (mc_umc_status == 0)
@@ -219,12 +347,9 @@ static void umc_v8_7_query_error_address(struct amdgpu_device *adev,
return;
}
- err_rec = &err_data->err_addr[err_data->err_addr_cnt];
-
- /* calculate error address if ue/ce error is detected */
+ /* calculate error address if ue error is detected */
if (REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, Val) == 1 &&
- (REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, UECC) == 1 ||
- REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, CECC) == 1)) {
+ REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, UECC) == 1) {
err_addr = RREG64_PCIE((mc_umc_addrt0 + umc_reg_offset) * 4);
/* the lowest lsb bits should be ignored */
@@ -232,25 +357,8 @@ static void umc_v8_7_query_error_address(struct amdgpu_device *adev,
err_addr = REG_GET_FIELD(err_addr, MCA_UMC_UMC0_MCUMC_ADDRT0, ErrorAddr);
err_addr &= ~((0x1ULL << lsb) - 1);
- /* translate umc channel address to soc pa, 3 parts are included */
- retired_page = ADDR_OF_8KB_BLOCK(err_addr) |
- ADDR_OF_256B_BLOCK(channel_index) |
- OFFSET_IN_256B_BLOCK(err_addr);
-
- /* we only save ue error information currently, ce is skipped */
- if (REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, UECC)
- == 1) {
- err_rec->address = err_addr;
- /* page frame address is saved */
- err_rec->retired_page = retired_page >> AMDGPU_GPU_PAGE_SHIFT;
- err_rec->ts = (uint64_t)ktime_get_real_seconds();
- err_rec->err_type = AMDGPU_RAS_EEPROM_ERR_NON_RECOVERABLE;
- err_rec->cu = 0;
- err_rec->mem_channel = channel_index;
- err_rec->mcumc_id = umc_inst;
-
- err_data->err_addr_cnt++;
- }
+ umc_v8_7_convert_error_address(adev, err_data, err_addr,
+ ch_inst, umc_inst);
}
/* clear umc status */
@@ -267,7 +375,7 @@ static void umc_v8_7_query_ras_error_address(struct amdgpu_device *adev,
uint32_t umc_reg_offset = 0;
LOOP_UMC_INST_AND_CH(umc_inst, ch_inst) {
- umc_reg_offset = get_umc_8_reg_offset(adev,
+ umc_reg_offset = get_umc_v8_7_reg_offset(adev,
umc_inst,
ch_inst);
@@ -315,7 +423,7 @@ static void umc_v8_7_err_cnt_init(struct amdgpu_device *adev)
uint32_t umc_reg_offset = 0;
LOOP_UMC_INST_AND_CH(umc_inst, ch_inst) {
- umc_reg_offset = get_umc_8_reg_offset(adev,
+ umc_reg_offset = get_umc_v8_7_reg_offset(adev,
umc_inst,
ch_inst);
@@ -323,9 +431,16 @@ static void umc_v8_7_err_cnt_init(struct amdgpu_device *adev)
}
}
-const struct amdgpu_umc_funcs umc_v8_7_funcs = {
- .err_cnt_init = umc_v8_7_err_cnt_init,
- .ras_late_init = amdgpu_umc_ras_late_init,
+const struct amdgpu_ras_block_hw_ops umc_v8_7_ras_hw_ops = {
.query_ras_error_count = umc_v8_7_query_ras_error_count,
.query_ras_error_address = umc_v8_7_query_ras_error_address,
};
+
+struct amdgpu_umc_ras umc_v8_7_ras = {
+ .ras_block = {
+ .hw_ops = &umc_v8_7_ras_hw_ops,
+ },
+ .err_cnt_init = umc_v8_7_err_cnt_init,
+ .ecc_info_query_ras_error_count = umc_v8_7_ecc_info_query_ras_error_count,
+ .ecc_info_query_ras_error_address = umc_v8_7_ecc_info_query_ras_error_address,
+};
diff --git a/drivers/gpu/drm/amd/amdgpu/umc_v8_7.h b/drivers/gpu/drm/amd/amdgpu/umc_v8_7.h
index d4d0468e3df5..dd4993f5f78f 100644
--- a/drivers/gpu/drm/amd/amdgpu/umc_v8_7.h
+++ b/drivers/gpu/drm/amd/amdgpu/umc_v8_7.h
@@ -44,7 +44,7 @@
/* umc ce count initial value */
#define UMC_V8_7_CE_CNT_INIT (UMC_V8_7_CE_CNT_MAX - UMC_V8_7_CE_INT_THRESHOLD)
-extern const struct amdgpu_umc_funcs umc_v8_7_funcs;
+extern struct amdgpu_umc_ras umc_v8_7_ras;
extern const uint32_t
umc_v8_7_channel_idx_tbl[UMC_V8_7_UMC_INSTANCE_NUM][UMC_V8_7_CHANNEL_INSTANCE_NUM];
diff --git a/drivers/gpu/drm/amd/amdgpu/umsch_mm_v4_0.c b/drivers/gpu/drm/amd/amdgpu/umsch_mm_v4_0.c
new file mode 100644
index 000000000000..ce3bb12e3572
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/umsch_mm_v4_0.c
@@ -0,0 +1,434 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright 2023 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#include <linux/firmware.h>
+#include <linux/module.h>
+#include <linux/debugfs.h>
+#include "amdgpu.h"
+#include "soc15_common.h"
+#include "soc21.h"
+#include "vcn/vcn_4_0_0_offset.h"
+#include "vcn/vcn_4_0_0_sh_mask.h"
+
+#include "amdgpu_umsch_mm.h"
+#include "umsch_mm_4_0_api_def.h"
+#include "umsch_mm_v4_0.h"
+
+#define regUVD_IPX_DLDO_CONFIG 0x0064
+#define regUVD_IPX_DLDO_CONFIG_BASE_IDX 1
+#define regUVD_IPX_DLDO_STATUS 0x0065
+#define regUVD_IPX_DLDO_STATUS_BASE_IDX 1
+
+#define UVD_IPX_DLDO_CONFIG__ONO0_PWR_CONFIG__SHIFT 0x00000002
+#define UVD_IPX_DLDO_CONFIG__ONO0_PWR_CONFIG_MASK 0x0000000cUL
+#define UVD_IPX_DLDO_STATUS__ONO0_PWR_STATUS__SHIFT 0x00000001
+#define UVD_IPX_DLDO_STATUS__ONO0_PWR_STATUS_MASK 0x00000002UL
+
+static int umsch_mm_v4_0_load_microcode(struct amdgpu_umsch_mm *umsch)
+{
+ struct amdgpu_device *adev = umsch->ring.adev;
+ uint64_t data;
+ int r;
+
+ r = amdgpu_umsch_mm_allocate_ucode_buffer(umsch);
+ if (r)
+ return r;
+
+ r = amdgpu_umsch_mm_allocate_ucode_data_buffer(umsch);
+ if (r)
+ goto err_free_ucode_bo;
+
+ umsch->cmd_buf_curr_ptr = umsch->cmd_buf_ptr;
+
+ if (amdgpu_ip_version(adev, VCN_HWIP, 0) >= IP_VERSION(4, 0, 5)) {
+ WREG32_SOC15(VCN, 0, regUVD_IPX_DLDO_CONFIG,
+ 1 << UVD_IPX_DLDO_CONFIG__ONO0_PWR_CONFIG__SHIFT);
+ SOC15_WAIT_ON_RREG(VCN, 0, regUVD_IPX_DLDO_STATUS,
+ 0 << UVD_IPX_DLDO_STATUS__ONO0_PWR_STATUS__SHIFT,
+ UVD_IPX_DLDO_STATUS__ONO0_PWR_STATUS_MASK);
+ }
+
+ data = RREG32_SOC15(VCN, 0, regUMSCH_MES_RESET_CTRL);
+ data = REG_SET_FIELD(data, UMSCH_MES_RESET_CTRL, MES_CORE_SOFT_RESET, 0);
+ WREG32_SOC15_UMSCH(regUMSCH_MES_RESET_CTRL, data);
+
+ data = RREG32_SOC15(VCN, 0, regVCN_MES_CNTL);
+ data = REG_SET_FIELD(data, VCN_MES_CNTL, MES_INVALIDATE_ICACHE, 1);
+ data = REG_SET_FIELD(data, VCN_MES_CNTL, MES_PIPE0_RESET, 1);
+ data = REG_SET_FIELD(data, VCN_MES_CNTL, MES_PIPE0_ACTIVE, 0);
+ data = REG_SET_FIELD(data, VCN_MES_CNTL, MES_HALT, 1);
+ WREG32_SOC15_UMSCH(regVCN_MES_CNTL, data);
+
+ data = RREG32_SOC15(VCN, 0, regVCN_MES_IC_BASE_CNTL);
+ data = REG_SET_FIELD(data, VCN_MES_IC_BASE_CNTL, VMID, 0);
+ data = REG_SET_FIELD(data, VCN_MES_IC_BASE_CNTL, EXE_DISABLE, 0);
+ data = REG_SET_FIELD(data, VCN_MES_IC_BASE_CNTL, CACHE_POLICY, 0);
+ WREG32_SOC15_UMSCH(regVCN_MES_IC_BASE_CNTL, data);
+
+ WREG32_SOC15_UMSCH(regVCN_MES_INTR_ROUTINE_START,
+ lower_32_bits(adev->umsch_mm.irq_start_addr >> 2));
+ WREG32_SOC15_UMSCH(regVCN_MES_INTR_ROUTINE_START_HI,
+ upper_32_bits(adev->umsch_mm.irq_start_addr >> 2));
+
+ WREG32_SOC15_UMSCH(regVCN_MES_PRGRM_CNTR_START,
+ lower_32_bits(adev->umsch_mm.uc_start_addr >> 2));
+ WREG32_SOC15_UMSCH(regVCN_MES_PRGRM_CNTR_START_HI,
+ upper_32_bits(adev->umsch_mm.uc_start_addr >> 2));
+
+ WREG32_SOC15_UMSCH(regVCN_MES_LOCAL_INSTR_BASE_LO, 0);
+ WREG32_SOC15_UMSCH(regVCN_MES_LOCAL_INSTR_BASE_HI, 0);
+
+ data = adev->umsch_mm.uc_start_addr + adev->umsch_mm.ucode_size - 1;
+ WREG32_SOC15_UMSCH(regVCN_MES_LOCAL_INSTR_MASK_LO, lower_32_bits(data));
+ WREG32_SOC15_UMSCH(regVCN_MES_LOCAL_INSTR_MASK_HI, upper_32_bits(data));
+
+ data = adev->firmware.load_type == AMDGPU_FW_LOAD_PSP ?
+ 0 : adev->umsch_mm.ucode_fw_gpu_addr;
+ WREG32_SOC15_UMSCH(regVCN_MES_IC_BASE_LO, lower_32_bits(data));
+ WREG32_SOC15_UMSCH(regVCN_MES_IC_BASE_HI, upper_32_bits(data));
+
+ WREG32_SOC15_UMSCH(regVCN_MES_MIBOUND_LO, 0x1FFFFF);
+
+ WREG32_SOC15_UMSCH(regVCN_MES_LOCAL_BASE0_LO,
+ lower_32_bits(adev->umsch_mm.data_start_addr));
+ WREG32_SOC15_UMSCH(regVCN_MES_LOCAL_BASE0_HI,
+ upper_32_bits(adev->umsch_mm.data_start_addr));
+
+ WREG32_SOC15_UMSCH(regVCN_MES_LOCAL_MASK0_LO,
+ adev->umsch_mm.data_size - 1);
+ WREG32_SOC15_UMSCH(regVCN_MES_LOCAL_MASK0_HI, 0);
+
+ data = adev->firmware.load_type == AMDGPU_FW_LOAD_PSP ?
+ 0 : adev->umsch_mm.data_fw_gpu_addr;
+ WREG32_SOC15_UMSCH(regVCN_MES_DC_BASE_LO, lower_32_bits(data));
+ WREG32_SOC15_UMSCH(regVCN_MES_DC_BASE_HI, upper_32_bits(data));
+
+ WREG32_SOC15_UMSCH(regVCN_MES_MDBOUND_LO, 0x3FFFF);
+
+ data = RREG32_SOC15(VCN, 0, regUVD_UMSCH_FORCE);
+ data = REG_SET_FIELD(data, UVD_UMSCH_FORCE, IC_FORCE_GPUVM, 1);
+ data = REG_SET_FIELD(data, UVD_UMSCH_FORCE, DC_FORCE_GPUVM, 1);
+ WREG32_SOC15_UMSCH(regUVD_UMSCH_FORCE, data);
+
+ data = RREG32_SOC15(VCN, 0, regVCN_MES_IC_OP_CNTL);
+ data = REG_SET_FIELD(data, VCN_MES_IC_OP_CNTL, PRIME_ICACHE, 0);
+ data = REG_SET_FIELD(data, VCN_MES_IC_OP_CNTL, INVALIDATE_CACHE, 1);
+ WREG32_SOC15_UMSCH(regVCN_MES_IC_OP_CNTL, data);
+
+ data = RREG32_SOC15(VCN, 0, regVCN_MES_IC_OP_CNTL);
+ data = REG_SET_FIELD(data, VCN_MES_IC_OP_CNTL, PRIME_ICACHE, 1);
+ WREG32_SOC15_UMSCH(regVCN_MES_IC_OP_CNTL, data);
+
+ WREG32_SOC15_UMSCH(regVCN_MES_GP0_LO, 0);
+ WREG32_SOC15_UMSCH(regVCN_MES_GP0_HI, 0);
+
+#if defined(CONFIG_DEBUG_FS)
+ WREG32_SOC15_UMSCH(regVCN_MES_GP0_LO, lower_32_bits(umsch->log_gpu_addr));
+ WREG32_SOC15_UMSCH(regVCN_MES_GP0_HI, upper_32_bits(umsch->log_gpu_addr));
+#endif
+
+ WREG32_SOC15_UMSCH(regVCN_MES_GP1_LO, 0);
+ WREG32_SOC15_UMSCH(regVCN_MES_GP1_HI, 0);
+
+ data = RREG32_SOC15(VCN, 0, regVCN_MES_CNTL);
+ data = REG_SET_FIELD(data, VCN_MES_CNTL, MES_INVALIDATE_ICACHE, 0);
+ data = REG_SET_FIELD(data, VCN_MES_CNTL, MES_PIPE0_RESET, 0);
+ data = REG_SET_FIELD(data, VCN_MES_CNTL, MES_HALT, 0);
+ data = REG_SET_FIELD(data, VCN_MES_CNTL, MES_PIPE0_ACTIVE, 1);
+ WREG32_SOC15_UMSCH(regVCN_MES_CNTL, data);
+
+ if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP)
+ amdgpu_umsch_mm_psp_execute_cmd_buf(umsch);
+
+ r = SOC15_WAIT_ON_RREG(VCN, 0, regVCN_MES_MSTATUS_LO, 0xAAAAAAAA, 0xFFFFFFFF);
+ if (r) {
+ dev_err(adev->dev, "UMSCH FW Load: Failed, regVCN_MES_MSTATUS_LO: 0x%08x\n",
+ RREG32_SOC15(VCN, 0, regVCN_MES_MSTATUS_LO));
+ goto err_free_data_bo;
+ }
+
+ return 0;
+
+err_free_data_bo:
+ amdgpu_bo_free_kernel(&adev->umsch_mm.data_fw_obj,
+ &adev->umsch_mm.data_fw_gpu_addr,
+ (void **)&adev->umsch_mm.data_fw_ptr);
+err_free_ucode_bo:
+ amdgpu_bo_free_kernel(&adev->umsch_mm.ucode_fw_obj,
+ &adev->umsch_mm.ucode_fw_gpu_addr,
+ (void **)&adev->umsch_mm.ucode_fw_ptr);
+ return r;
+}
+
+static void umsch_mm_v4_0_aggregated_doorbell_init(struct amdgpu_umsch_mm *umsch)
+{
+ struct amdgpu_device *adev = umsch->ring.adev;
+ uint32_t data;
+
+ data = RREG32_SOC15(VCN, 0, regVCN_AGDB_CTRL0);
+ data = REG_SET_FIELD(data, VCN_AGDB_CTRL0, OFFSET,
+ umsch->agdb_index[CONTEXT_PRIORITY_LEVEL_REALTIME]);
+ data = REG_SET_FIELD(data, VCN_AGDB_CTRL0, EN, 1);
+ WREG32_SOC15(VCN, 0, regVCN_AGDB_CTRL0, data);
+
+ data = RREG32_SOC15(VCN, 0, regVCN_AGDB_CTRL1);
+ data = REG_SET_FIELD(data, VCN_AGDB_CTRL1, OFFSET,
+ umsch->agdb_index[CONTEXT_PRIORITY_LEVEL_FOCUS]);
+ data = REG_SET_FIELD(data, VCN_AGDB_CTRL1, EN, 1);
+ WREG32_SOC15(VCN, 0, regVCN_AGDB_CTRL1, data);
+
+ data = RREG32_SOC15(VCN, 0, regVCN_AGDB_CTRL2);
+ data = REG_SET_FIELD(data, VCN_AGDB_CTRL2, OFFSET,
+ umsch->agdb_index[CONTEXT_PRIORITY_LEVEL_NORMAL]);
+ data = REG_SET_FIELD(data, VCN_AGDB_CTRL2, EN, 1);
+ WREG32_SOC15(VCN, 0, regVCN_AGDB_CTRL2, data);
+
+ data = RREG32_SOC15(VCN, 0, regVCN_AGDB_CTRL3);
+ data = REG_SET_FIELD(data, VCN_AGDB_CTRL3, OFFSET,
+ umsch->agdb_index[CONTEXT_PRIORITY_LEVEL_IDLE]);
+ data = REG_SET_FIELD(data, VCN_AGDB_CTRL3, EN, 1);
+ WREG32_SOC15(VCN, 0, regVCN_AGDB_CTRL3, data);
+}
+
+static int umsch_mm_v4_0_ring_start(struct amdgpu_umsch_mm *umsch)
+{
+ struct amdgpu_ring *ring = &umsch->ring;
+ struct amdgpu_device *adev = ring->adev;
+ uint32_t data;
+
+ data = RREG32_SOC15(VCN, 0, regVCN_UMSCH_RB_DB_CTRL);
+ data = REG_SET_FIELD(data, VCN_UMSCH_RB_DB_CTRL, OFFSET, ring->doorbell_index);
+ data = REG_SET_FIELD(data, VCN_UMSCH_RB_DB_CTRL, EN, 1);
+ WREG32_SOC15(VCN, 0, regVCN_UMSCH_RB_DB_CTRL, data);
+
+ adev->nbio.funcs->vcn_doorbell_range(adev, ring->use_doorbell,
+ (adev->doorbell_index.vcn.vcn_ring0_1 << 1), 0);
+
+ WREG32_SOC15(VCN, 0, regVCN_UMSCH_RB_BASE_LO, lower_32_bits(ring->gpu_addr));
+ WREG32_SOC15(VCN, 0, regVCN_UMSCH_RB_BASE_HI, upper_32_bits(ring->gpu_addr));
+
+ WREG32_SOC15(VCN, 0, regVCN_UMSCH_RB_SIZE, ring->ring_size);
+
+ ring->wptr = 0;
+
+ data = RREG32_SOC15(VCN, 0, regVCN_RB_ENABLE);
+ data &= ~(VCN_RB_ENABLE__AUDIO_RB_EN_MASK);
+ WREG32_SOC15(VCN, 0, regVCN_RB_ENABLE, data);
+
+ umsch_mm_v4_0_aggregated_doorbell_init(umsch);
+
+ return 0;
+}
+
+static int umsch_mm_v4_0_ring_stop(struct amdgpu_umsch_mm *umsch)
+{
+ struct amdgpu_ring *ring = &umsch->ring;
+ struct amdgpu_device *adev = ring->adev;
+ uint32_t data;
+
+ data = RREG32_SOC15(VCN, 0, regVCN_RB_ENABLE);
+ data = REG_SET_FIELD(data, VCN_RB_ENABLE, UMSCH_RB_EN, 0);
+ WREG32_SOC15(VCN, 0, regVCN_RB_ENABLE, data);
+
+ data = RREG32_SOC15(VCN, 0, regVCN_UMSCH_RB_DB_CTRL);
+ data = REG_SET_FIELD(data, VCN_UMSCH_RB_DB_CTRL, EN, 0);
+ WREG32_SOC15(VCN, 0, regVCN_UMSCH_RB_DB_CTRL, data);
+
+ if (amdgpu_ip_version(adev, VCN_HWIP, 0) >= IP_VERSION(4, 0, 5)) {
+ WREG32_SOC15(VCN, 0, regUVD_IPX_DLDO_CONFIG,
+ 2 << UVD_IPX_DLDO_CONFIG__ONO0_PWR_CONFIG__SHIFT);
+ SOC15_WAIT_ON_RREG(VCN, 0, regUVD_IPX_DLDO_STATUS,
+ 1 << UVD_IPX_DLDO_STATUS__ONO0_PWR_STATUS__SHIFT,
+ UVD_IPX_DLDO_STATUS__ONO0_PWR_STATUS_MASK);
+ }
+
+ return 0;
+}
+
+static int umsch_mm_v4_0_set_hw_resources(struct amdgpu_umsch_mm *umsch)
+{
+ union UMSCHAPI__SET_HW_RESOURCES set_hw_resources = {};
+ struct amdgpu_device *adev = umsch->ring.adev;
+ int r;
+
+ set_hw_resources.header.type = UMSCH_API_TYPE_SCHEDULER;
+ set_hw_resources.header.opcode = UMSCH_API_SET_HW_RSRC;
+ set_hw_resources.header.dwsize = API_FRAME_SIZE_IN_DWORDS;
+
+ set_hw_resources.vmid_mask_mm_vcn = umsch->vmid_mask_mm_vcn;
+ set_hw_resources.vmid_mask_mm_vpe = umsch->vmid_mask_mm_vpe;
+ set_hw_resources.collaboration_mask_vpe =
+ adev->vpe.collaborate_mode ? 0x3 : 0x0;
+ set_hw_resources.engine_mask = umsch->engine_mask;
+
+ set_hw_resources.vcn0_hqd_mask[0] = umsch->vcn0_hqd_mask;
+ set_hw_resources.vcn1_hqd_mask[0] = umsch->vcn1_hqd_mask;
+ set_hw_resources.vcn_hqd_mask[0] = umsch->vcn_hqd_mask[0];
+ set_hw_resources.vcn_hqd_mask[1] = umsch->vcn_hqd_mask[1];
+ set_hw_resources.vpe_hqd_mask[0] = umsch->vpe_hqd_mask;
+
+ set_hw_resources.g_sch_ctx_gpu_mc_ptr = umsch->sch_ctx_gpu_addr;
+
+ set_hw_resources.enable_level_process_quantum_check = 1;
+
+ memcpy(set_hw_resources.mmhub_base, adev->reg_offset[MMHUB_HWIP][0],
+ sizeof(uint32_t) * 5);
+ set_hw_resources.mmhub_version =
+ IP_VERSION_MAJ_MIN_REV(amdgpu_ip_version(adev, MMHUB_HWIP, 0));
+
+ memcpy(set_hw_resources.osssys_base, adev->reg_offset[OSSSYS_HWIP][0],
+ sizeof(uint32_t) * 5);
+ set_hw_resources.osssys_version =
+ IP_VERSION_MAJ_MIN_REV(amdgpu_ip_version(adev, OSSSYS_HWIP, 0));
+
+ set_hw_resources.vcn_version =
+ IP_VERSION_MAJ_MIN_REV(amdgpu_ip_version(adev, VCN_HWIP, 0));
+ set_hw_resources.vpe_version =
+ IP_VERSION_MAJ_MIN_REV(amdgpu_ip_version(adev, VPE_HWIP, 0));
+
+ set_hw_resources.api_status.api_completion_fence_addr = umsch->ring.fence_drv.gpu_addr;
+ set_hw_resources.api_status.api_completion_fence_value = ++umsch->ring.fence_drv.sync_seq;
+
+ r = amdgpu_umsch_mm_submit_pkt(umsch, &set_hw_resources.max_dwords_in_api,
+ API_FRAME_SIZE_IN_DWORDS);
+ if (r)
+ return r;
+
+ r = amdgpu_umsch_mm_query_fence(umsch);
+ if (r) {
+ dev_err(adev->dev, "UMSCH SET_HW_RESOURCES: Failed\n");
+ return r;
+ }
+
+ return 0;
+}
+
+static int umsch_mm_v4_0_add_queue(struct amdgpu_umsch_mm *umsch,
+ struct umsch_mm_add_queue_input *input_ptr)
+{
+ struct amdgpu_device *adev = umsch->ring.adev;
+ union UMSCHAPI__ADD_QUEUE add_queue = {};
+ int r;
+
+ add_queue.header.type = UMSCH_API_TYPE_SCHEDULER;
+ add_queue.header.opcode = UMSCH_API_ADD_QUEUE;
+ add_queue.header.dwsize = API_FRAME_SIZE_IN_DWORDS;
+
+ add_queue.process_id = input_ptr->process_id;
+ add_queue.page_table_base_addr = input_ptr->page_table_base_addr;
+ add_queue.process_va_start = input_ptr->process_va_start;
+ add_queue.process_va_end = input_ptr->process_va_end;
+ add_queue.process_quantum = input_ptr->process_quantum;
+ add_queue.process_csa_addr = input_ptr->process_csa_addr;
+ add_queue.context_quantum = input_ptr->context_quantum;
+ add_queue.context_csa_addr = input_ptr->context_csa_addr;
+ add_queue.inprocess_context_priority = input_ptr->inprocess_context_priority;
+ add_queue.context_global_priority_level =
+ (enum UMSCH_AMD_PRIORITY_LEVEL)input_ptr->context_global_priority_level;
+ add_queue.doorbell_offset_0 = input_ptr->doorbell_offset_0;
+ add_queue.doorbell_offset_1 = input_ptr->doorbell_offset_1;
+ add_queue.affinity.u32All = input_ptr->affinity;
+ add_queue.mqd_addr = input_ptr->mqd_addr;
+ add_queue.engine_type = (enum UMSCH_ENGINE_TYPE)input_ptr->engine_type;
+ add_queue.h_context = input_ptr->h_context;
+ add_queue.h_queue = input_ptr->h_queue;
+ add_queue.vm_context_cntl = input_ptr->vm_context_cntl;
+ add_queue.is_context_suspended = input_ptr->is_context_suspended;
+ add_queue.collaboration_mode = adev->vpe.collaborate_mode ? 1 : 0;
+
+ add_queue.api_status.api_completion_fence_addr = umsch->ring.fence_drv.gpu_addr;
+ add_queue.api_status.api_completion_fence_value = ++umsch->ring.fence_drv.sync_seq;
+
+ r = amdgpu_umsch_mm_submit_pkt(umsch, &add_queue.max_dwords_in_api,
+ API_FRAME_SIZE_IN_DWORDS);
+ if (r)
+ return r;
+
+ r = amdgpu_umsch_mm_query_fence(umsch);
+ if (r) {
+ dev_err(adev->dev, "UMSCH ADD_QUEUE: Failed\n");
+ return r;
+ }
+
+ return 0;
+}
+
+static int umsch_mm_v4_0_remove_queue(struct amdgpu_umsch_mm *umsch,
+ struct umsch_mm_remove_queue_input *input_ptr)
+{
+ union UMSCHAPI__REMOVE_QUEUE remove_queue = {};
+ struct amdgpu_device *adev = umsch->ring.adev;
+ int r;
+
+ remove_queue.header.type = UMSCH_API_TYPE_SCHEDULER;
+ remove_queue.header.opcode = UMSCH_API_REMOVE_QUEUE;
+ remove_queue.header.dwsize = API_FRAME_SIZE_IN_DWORDS;
+
+ remove_queue.doorbell_offset_0 = input_ptr->doorbell_offset_0;
+ remove_queue.doorbell_offset_1 = input_ptr->doorbell_offset_1;
+ remove_queue.context_csa_addr = input_ptr->context_csa_addr;
+
+ remove_queue.api_status.api_completion_fence_addr = umsch->ring.fence_drv.gpu_addr;
+ remove_queue.api_status.api_completion_fence_value = ++umsch->ring.fence_drv.sync_seq;
+
+ r = amdgpu_umsch_mm_submit_pkt(umsch, &remove_queue.max_dwords_in_api,
+ API_FRAME_SIZE_IN_DWORDS);
+ if (r)
+ return r;
+
+ r = amdgpu_umsch_mm_query_fence(umsch);
+ if (r) {
+ dev_err(adev->dev, "UMSCH REMOVE_QUEUE: Failed\n");
+ return r;
+ }
+
+ return 0;
+}
+
+static int umsch_mm_v4_0_set_regs(struct amdgpu_umsch_mm *umsch)
+{
+ struct amdgpu_device *adev = container_of(umsch, struct amdgpu_device, umsch_mm);
+
+ umsch->rb_wptr = SOC15_REG_OFFSET(VCN, 0, regVCN_UMSCH_RB_WPTR);
+ umsch->rb_rptr = SOC15_REG_OFFSET(VCN, 0, regVCN_UMSCH_RB_RPTR);
+
+ return 0;
+}
+
+static const struct umsch_mm_funcs umsch_mm_v4_0_funcs = {
+ .set_hw_resources = umsch_mm_v4_0_set_hw_resources,
+ .add_queue = umsch_mm_v4_0_add_queue,
+ .remove_queue = umsch_mm_v4_0_remove_queue,
+ .set_regs = umsch_mm_v4_0_set_regs,
+ .init_microcode = amdgpu_umsch_mm_init_microcode,
+ .load_microcode = umsch_mm_v4_0_load_microcode,
+ .ring_init = amdgpu_umsch_mm_ring_init,
+ .ring_start = umsch_mm_v4_0_ring_start,
+ .ring_stop = umsch_mm_v4_0_ring_stop,
+};
+
+void umsch_mm_v4_0_set_funcs(struct amdgpu_umsch_mm *umsch)
+{
+ umsch->funcs = &umsch_mm_v4_0_funcs;
+}
diff --git a/drivers/gpu/drm/amd/amdgpu/umsch_mm_v4_0.h b/drivers/gpu/drm/amd/amdgpu/umsch_mm_v4_0.h
new file mode 100644
index 000000000000..06bc0fa74996
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/umsch_mm_v4_0.h
@@ -0,0 +1,30 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright 2023 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#ifndef __UMSCH_MM_V4_0_H__
+#define __UMSCH_MM_V4_0_H__
+
+void umsch_mm_v4_0_set_funcs(struct amdgpu_umsch_mm *umsch);
+
+#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/uvd_v3_1.c b/drivers/gpu/drm/amd/amdgpu/uvd_v3_1.c
index 10ecae257b18..2e79a3afc774 100644
--- a/drivers/gpu/drm/amd/amdgpu/uvd_v3_1.c
+++ b/drivers/gpu/drm/amd/amdgpu/uvd_v3_1.c
@@ -98,7 +98,7 @@ static void uvd_v3_1_ring_emit_ib(struct amdgpu_ring *ring,
}
/**
- * uvd_v3_1_ring_emit_fence - emit an fence & trap command
+ * uvd_v3_1_ring_emit_fence - emit a fence & trap command
*
* @ring: amdgpu_ring pointer
* @addr: address
@@ -242,7 +242,7 @@ static void uvd_v3_1_mc_resume(struct amdgpu_device *adev)
uint64_t addr;
uint32_t size;
- /* programm the VCPU memory controller bits 0-27 */
+ /* program the VCPU memory controller bits 0-27 */
addr = (adev->uvd.inst->gpu_addr + AMDGPU_UVD_FIRMWARE_OFFSET) >> 3;
size = AMDGPU_UVD_FIRMWARE_SIZE(adev) >> 3;
WREG32(mmUVD_VCPU_CACHE_OFFSET0, addr);
@@ -340,7 +340,7 @@ static int uvd_v3_1_start(struct amdgpu_device *adev)
/* enable VCPU clock */
WREG32(mmUVD_VCPU_CNTL, 1 << 9);
- /* disable interupt */
+ /* disable interrupt */
WREG32_P(mmUVD_MASTINT_EN, 0, ~(1 << 1));
#ifdef __BIG_ENDIAN
@@ -405,7 +405,7 @@ static int uvd_v3_1_start(struct amdgpu_device *adev)
return r;
}
- /* enable interupt */
+ /* enable interrupt */
WREG32_P(mmUVD_MASTINT_EN, 3<<1, ~(3 << 1));
WREG32_P(mmUVD_STATUS, 0, ~(1<<2));
@@ -416,7 +416,7 @@ static int uvd_v3_1_start(struct amdgpu_device *adev)
/* Set the write pointer delay */
WREG32(mmUVD_RBC_RB_WPTR_CNTL, 0);
- /* programm the 4GB memory segment for rptr and ring buffer */
+ /* Program the 4GB memory segment for rptr and ring buffer */
WREG32(mmUVD_LMI_EXT40_ADDR, upper_32_bits(ring->gpu_addr) |
(0x7 << 16) | (0x1 << 31));
@@ -531,9 +531,9 @@ static void uvd_v3_1_set_irq_funcs(struct amdgpu_device *adev)
}
-static int uvd_v3_1_early_init(void *handle)
+static int uvd_v3_1_early_init(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
adev->uvd.num_uvd_inst = 1;
uvd_v3_1_set_ring_funcs(adev);
@@ -542,10 +542,10 @@ static int uvd_v3_1_early_init(void *handle)
return 0;
}
-static int uvd_v3_1_sw_init(void *handle)
+static int uvd_v3_1_sw_init(struct amdgpu_ip_block *ip_block)
{
struct amdgpu_ring *ring;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
int r;
void *ptr;
uint32_t ucode_len;
@@ -562,7 +562,7 @@ static int uvd_v3_1_sw_init(void *handle)
ring = &adev->uvd.inst->ring;
sprintf(ring->name, "uvd");
r = amdgpu_ring_init(adev, ring, 512, &adev->uvd.inst->irq, 0,
- AMDGPU_RING_PRIO_DEFAULT);
+ AMDGPU_RING_PRIO_DEFAULT, NULL);
if (r)
return r;
@@ -577,15 +577,13 @@ static int uvd_v3_1_sw_init(void *handle)
ptr += ucode_len;
memcpy(&adev->uvd.keyselect, ptr, 4);
- r = amdgpu_uvd_entity_init(adev);
-
return r;
}
-static int uvd_v3_1_sw_fini(void *handle)
+static int uvd_v3_1_sw_fini(struct amdgpu_ip_block *ip_block)
{
int r;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
r = amdgpu_uvd_suspend(adev);
if (r)
@@ -623,18 +621,42 @@ static void uvd_v3_1_enable_mgcg(struct amdgpu_device *adev,
/**
* uvd_v3_1_hw_init - start and test UVD block
*
- * @handle: handle used to pass amdgpu_device pointer
+ * @ip_block: Pointer to the amdgpu_ip_block for this hw instance.
+ *
+ * Initialize the hardware, boot up the VCPU and do some testing.
*
- * Initialize the hardware, boot up the VCPU and do some testing
+ * On SI, the UVD is meant to be used in a specific power state,
+ * or alternatively the driver can manually enable its clock.
+ * In amdgpu we use the dedicated UVD power state when DPM is enabled.
+ * Calling amdgpu_dpm_enable_uvd makes DPM select the UVD power state
+ * for the SMU and afterwards enables the UVD clock.
+ * This is automatically done by amdgpu_uvd_ring_begin_use when work
+ * is submitted to the UVD ring. Here, we have to call it manually
+ * in order to power up UVD before firmware validation.
+ *
+ * Note that we must not disable the UVD clock here, as that would
+ * cause the ring test to fail. However, UVD is powered off
+ * automatically after the ring test: amdgpu_uvd_ring_end_use calls
+ * the UVD idle work handler which will disable the UVD clock when
+ * all fences are signalled.
*/
-static int uvd_v3_1_hw_init(void *handle)
+static int uvd_v3_1_hw_init(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
struct amdgpu_ring *ring = &adev->uvd.inst->ring;
uint32_t tmp;
int r;
uvd_v3_1_mc_resume(adev);
+ uvd_v3_1_enable_mgcg(adev, true);
+
+ /* Make sure UVD is powered during FW validation.
+ * It's going to be automatically powered off after the ring test.
+ */
+ if (adev->pm.dpm_enabled)
+ amdgpu_dpm_enable_uvd(adev, true);
+ else
+ amdgpu_asic_set_uvd_clocks(adev, 53300, 40000);
r = uvd_v3_1_fw_validate(adev);
if (r) {
@@ -642,9 +664,6 @@ static int uvd_v3_1_hw_init(void *handle)
return r;
}
- uvd_v3_1_enable_mgcg(adev, true);
- amdgpu_asic_set_uvd_clocks(adev, 53300, 40000);
-
uvd_v3_1_start(adev);
r = amdgpu_ring_test_helper(ring);
@@ -690,13 +709,15 @@ done:
/**
* uvd_v3_1_hw_fini - stop the hardware block
*
- * @handle: handle used to pass amdgpu_device pointer
+ * @ip_block: Pointer to the amdgpu_ip_block for this hw instance.
*
* Stop the UVD block, mark ring as not ready any more
*/
-static int uvd_v3_1_hw_fini(void *handle)
+static int uvd_v3_1_hw_fini(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
+
+ cancel_delayed_work_sync(&adev->uvd.idle_work);
if (RREG32(mmUVD_STATUS) != 0)
uvd_v3_1_stop(adev);
@@ -704,41 +725,71 @@ static int uvd_v3_1_hw_fini(void *handle)
return 0;
}
-static int uvd_v3_1_suspend(void *handle)
+static int uvd_v3_1_prepare_suspend(struct amdgpu_ip_block *ip_block)
+{
+ struct amdgpu_device *adev = ip_block->adev;
+
+ return amdgpu_uvd_prepare_suspend(adev);
+}
+
+static int uvd_v3_1_suspend(struct amdgpu_ip_block *ip_block)
{
int r;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
+
+ /*
+ * Proper cleanups before halting the HW engine:
+ * - cancel the delayed idle work
+ * - enable powergating
+ * - enable clockgating
+ * - disable dpm
+ *
+ * TODO: to align with the VCN implementation, move the
+ * jobs for clockgating/powergating/dpm setting to
+ * ->set_powergating_state().
+ */
+ cancel_delayed_work_sync(&adev->uvd.idle_work);
+
+ if (adev->pm.dpm_enabled) {
+ amdgpu_dpm_enable_uvd(adev, false);
+ } else {
+ amdgpu_asic_set_uvd_clocks(adev, 0, 0);
+ /* shutdown the UVD block */
+ amdgpu_device_ip_set_powergating_state(adev, AMD_IP_BLOCK_TYPE_UVD,
+ AMD_PG_STATE_GATE);
+ amdgpu_device_ip_set_clockgating_state(adev, AMD_IP_BLOCK_TYPE_UVD,
+ AMD_CG_STATE_GATE);
+ }
- r = uvd_v3_1_hw_fini(adev);
+ r = uvd_v3_1_hw_fini(ip_block);
if (r)
return r;
return amdgpu_uvd_suspend(adev);
}
-static int uvd_v3_1_resume(void *handle)
+static int uvd_v3_1_resume(struct amdgpu_ip_block *ip_block)
{
int r;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
- r = amdgpu_uvd_resume(adev);
+ r = amdgpu_uvd_resume(ip_block->adev);
if (r)
return r;
- return uvd_v3_1_hw_init(adev);
+ return uvd_v3_1_hw_init(ip_block);
}
-static bool uvd_v3_1_is_idle(void *handle)
+static bool uvd_v3_1_is_idle(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
return !(RREG32(mmSRBM_STATUS) & SRBM_STATUS__UVD_BUSY_MASK);
}
-static int uvd_v3_1_wait_for_idle(void *handle)
+static int uvd_v3_1_wait_for_idle(struct amdgpu_ip_block *ip_block)
{
unsigned i;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
for (i = 0; i < adev->usec_timeout; i++) {
if (!(RREG32(mmSRBM_STATUS) & SRBM_STATUS__UVD_BUSY_MASK))
@@ -747,9 +798,9 @@ static int uvd_v3_1_wait_for_idle(void *handle)
return -ETIMEDOUT;
}
-static int uvd_v3_1_soft_reset(void *handle)
+static int uvd_v3_1_soft_reset(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
uvd_v3_1_stop(adev);
@@ -760,13 +811,13 @@ static int uvd_v3_1_soft_reset(void *handle)
return uvd_v3_1_start(adev);
}
-static int uvd_v3_1_set_clockgating_state(void *handle,
+static int uvd_v3_1_set_clockgating_state(struct amdgpu_ip_block *ip_block,
enum amd_clockgating_state state)
{
return 0;
}
-static int uvd_v3_1_set_powergating_state(void *handle,
+static int uvd_v3_1_set_powergating_state(struct amdgpu_ip_block *ip_block,
enum amd_powergating_state state)
{
return 0;
@@ -775,11 +826,11 @@ static int uvd_v3_1_set_powergating_state(void *handle,
static const struct amd_ip_funcs uvd_v3_1_ip_funcs = {
.name = "uvd_v3_1",
.early_init = uvd_v3_1_early_init,
- .late_init = NULL,
.sw_init = uvd_v3_1_sw_init,
.sw_fini = uvd_v3_1_sw_fini,
.hw_init = uvd_v3_1_hw_init,
.hw_fini = uvd_v3_1_hw_fini,
+ .prepare_suspend = uvd_v3_1_prepare_suspend,
.suspend = uvd_v3_1_suspend,
.resume = uvd_v3_1_resume,
.is_idle = uvd_v3_1_is_idle,
@@ -789,8 +840,7 @@ static const struct amd_ip_funcs uvd_v3_1_ip_funcs = {
.set_powergating_state = uvd_v3_1_set_powergating_state,
};
-const struct amdgpu_ip_block_version uvd_v3_1_ip_block =
-{
+const struct amdgpu_ip_block_version uvd_v3_1_ip_block = {
.type = AMD_IP_BLOCK_TYPE_UVD,
.major = 3,
.minor = 1,
diff --git a/drivers/gpu/drm/amd/amdgpu/uvd_v4_2.c b/drivers/gpu/drm/amd/amdgpu/uvd_v4_2.c
index a70d2a0de316..4b96fd583772 100644
--- a/drivers/gpu/drm/amd/amdgpu/uvd_v4_2.c
+++ b/drivers/gpu/drm/amd/amdgpu/uvd_v4_2.c
@@ -44,7 +44,7 @@ static void uvd_v4_2_set_ring_funcs(struct amdgpu_device *adev);
static void uvd_v4_2_set_irq_funcs(struct amdgpu_device *adev);
static int uvd_v4_2_start(struct amdgpu_device *adev);
static void uvd_v4_2_stop(struct amdgpu_device *adev);
-static int uvd_v4_2_set_clockgating_state(void *handle,
+static int uvd_v4_2_set_clockgating_state(struct amdgpu_ip_block *ip_block,
enum amd_clockgating_state state);
static void uvd_v4_2_set_dcm(struct amdgpu_device *adev,
bool sw_mode);
@@ -90,9 +90,9 @@ static void uvd_v4_2_ring_set_wptr(struct amdgpu_ring *ring)
WREG32(mmUVD_RBC_RB_WPTR, lower_32_bits(ring->wptr));
}
-static int uvd_v4_2_early_init(void *handle)
+static int uvd_v4_2_early_init(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
adev->uvd.num_uvd_inst = 1;
uvd_v4_2_set_ring_funcs(adev);
@@ -101,10 +101,10 @@ static int uvd_v4_2_early_init(void *handle)
return 0;
}
-static int uvd_v4_2_sw_init(void *handle)
+static int uvd_v4_2_sw_init(struct amdgpu_ip_block *ip_block)
{
struct amdgpu_ring *ring;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
int r;
/* UVD TRAP */
@@ -119,7 +119,7 @@ static int uvd_v4_2_sw_init(void *handle)
ring = &adev->uvd.inst->ring;
sprintf(ring->name, "uvd");
r = amdgpu_ring_init(adev, ring, 512, &adev->uvd.inst->irq, 0,
- AMDGPU_RING_PRIO_DEFAULT);
+ AMDGPU_RING_PRIO_DEFAULT, NULL);
if (r)
return r;
@@ -127,15 +127,13 @@ static int uvd_v4_2_sw_init(void *handle)
if (r)
return r;
- r = amdgpu_uvd_entity_init(adev);
-
return r;
}
-static int uvd_v4_2_sw_fini(void *handle)
+static int uvd_v4_2_sw_fini(struct amdgpu_ip_block *ip_block)
{
int r;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
r = amdgpu_uvd_suspend(adev);
if (r)
@@ -149,13 +147,13 @@ static void uvd_v4_2_enable_mgcg(struct amdgpu_device *adev,
/**
* uvd_v4_2_hw_init - start and test UVD block
*
- * @handle: handle used to pass amdgpu_device pointer
+ * @ip_block: Pointer to the amdgpu_ip_block for this hw instance.
*
* Initialize the hardware, boot up the VCPU and do some testing
*/
-static int uvd_v4_2_hw_init(void *handle)
+static int uvd_v4_2_hw_init(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
struct amdgpu_ring *ring = &adev->uvd.inst->ring;
uint32_t tmp;
int r;
@@ -204,13 +202,15 @@ done:
/**
* uvd_v4_2_hw_fini - stop the hardware block
*
- * @handle: handle used to pass amdgpu_device pointer
+ * @ip_block: Pointer to the amdgpu_ip_block for this hw instance.
*
* Stop the UVD block, mark ring as not ready any more
*/
-static int uvd_v4_2_hw_fini(void *handle)
+static int uvd_v4_2_hw_fini(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
+
+ cancel_delayed_work_sync(&adev->uvd.idle_work);
if (RREG32(mmUVD_STATUS) != 0)
uvd_v4_2_stop(adev);
@@ -218,28 +218,58 @@ static int uvd_v4_2_hw_fini(void *handle)
return 0;
}
-static int uvd_v4_2_suspend(void *handle)
+static int uvd_v4_2_prepare_suspend(struct amdgpu_ip_block *ip_block)
+{
+ struct amdgpu_device *adev = ip_block->adev;
+
+ return amdgpu_uvd_prepare_suspend(adev);
+}
+
+static int uvd_v4_2_suspend(struct amdgpu_ip_block *ip_block)
{
int r;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
+
+ /*
+ * Proper cleanups before halting the HW engine:
+ * - cancel the delayed idle work
+ * - enable powergating
+ * - enable clockgating
+ * - disable dpm
+ *
+ * TODO: to align with the VCN implementation, move the
+ * jobs for clockgating/powergating/dpm setting to
+ * ->set_powergating_state().
+ */
+ cancel_delayed_work_sync(&adev->uvd.idle_work);
+
+ if (adev->pm.dpm_enabled) {
+ amdgpu_dpm_enable_uvd(adev, false);
+ } else {
+ amdgpu_asic_set_uvd_clocks(adev, 0, 0);
+ /* shutdown the UVD block */
+ amdgpu_device_ip_set_powergating_state(adev, AMD_IP_BLOCK_TYPE_UVD,
+ AMD_PG_STATE_GATE);
+ amdgpu_device_ip_set_clockgating_state(adev, AMD_IP_BLOCK_TYPE_UVD,
+ AMD_CG_STATE_GATE);
+ }
- r = uvd_v4_2_hw_fini(adev);
+ r = uvd_v4_2_hw_fini(ip_block);
if (r)
return r;
return amdgpu_uvd_suspend(adev);
}
-static int uvd_v4_2_resume(void *handle)
+static int uvd_v4_2_resume(struct amdgpu_ip_block *ip_block)
{
int r;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
- r = amdgpu_uvd_resume(adev);
+ r = amdgpu_uvd_resume(ip_block->adev);
if (r)
return r;
- return uvd_v4_2_hw_init(adev);
+ return uvd_v4_2_hw_init(ip_block);
}
/**
@@ -272,7 +302,7 @@ static int uvd_v4_2_start(struct amdgpu_device *adev)
/* enable VCPU clock */
WREG32(mmUVD_VCPU_CNTL, 1 << 9);
- /* disable interupt */
+ /* disable interrupt */
WREG32_P(mmUVD_MASTINT_EN, 0, ~(1 << 1));
#ifdef __BIG_ENDIAN
@@ -282,6 +312,7 @@ static int uvd_v4_2_start(struct amdgpu_device *adev)
#endif
WREG32(mmUVD_LMI_SWAP_CNTL, lmi_swap_cntl);
WREG32(mmUVD_MP_SWAP_CNTL, mp_swap_cntl);
+
/* initialize UVD memory controller */
WREG32(mmUVD_LMI_CTRL, 0x203108);
@@ -628,17 +659,17 @@ static void uvd_v4_2_set_dcm(struct amdgpu_device *adev,
WREG32_UVD_CTX(ixUVD_CGC_CTRL2, tmp2);
}
-static bool uvd_v4_2_is_idle(void *handle)
+static bool uvd_v4_2_is_idle(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
return !(RREG32(mmSRBM_STATUS) & SRBM_STATUS__UVD_BUSY_MASK);
}
-static int uvd_v4_2_wait_for_idle(void *handle)
+static int uvd_v4_2_wait_for_idle(struct amdgpu_ip_block *ip_block)
{
unsigned i;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
for (i = 0; i < adev->usec_timeout; i++) {
if (!(RREG32(mmSRBM_STATUS) & SRBM_STATUS__UVD_BUSY_MASK))
@@ -647,9 +678,9 @@ static int uvd_v4_2_wait_for_idle(void *handle)
return -ETIMEDOUT;
}
-static int uvd_v4_2_soft_reset(void *handle)
+static int uvd_v4_2_soft_reset(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
uvd_v4_2_stop(adev);
@@ -678,13 +709,13 @@ static int uvd_v4_2_process_interrupt(struct amdgpu_device *adev,
return 0;
}
-static int uvd_v4_2_set_clockgating_state(void *handle,
+static int uvd_v4_2_set_clockgating_state(struct amdgpu_ip_block *ip_block,
enum amd_clockgating_state state)
{
return 0;
}
-static int uvd_v4_2_set_powergating_state(void *handle,
+static int uvd_v4_2_set_powergating_state(struct amdgpu_ip_block *ip_block,
enum amd_powergating_state state)
{
/* This doesn't actually powergate the UVD block.
@@ -694,7 +725,7 @@ static int uvd_v4_2_set_powergating_state(void *handle,
* revisit this when there is a cleaner line between
* the smc and the hw blocks
*/
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
if (state == AMD_PG_STATE_GATE) {
uvd_v4_2_stop(adev);
@@ -725,11 +756,11 @@ static int uvd_v4_2_set_powergating_state(void *handle,
static const struct amd_ip_funcs uvd_v4_2_ip_funcs = {
.name = "uvd_v4_2",
.early_init = uvd_v4_2_early_init,
- .late_init = NULL,
.sw_init = uvd_v4_2_sw_init,
.sw_fini = uvd_v4_2_sw_fini,
.hw_init = uvd_v4_2_hw_init,
.hw_fini = uvd_v4_2_hw_fini,
+ .prepare_suspend = uvd_v4_2_prepare_suspend,
.suspend = uvd_v4_2_suspend,
.resume = uvd_v4_2_resume,
.is_idle = uvd_v4_2_is_idle,
diff --git a/drivers/gpu/drm/amd/amdgpu/uvd_v5_0.c b/drivers/gpu/drm/amd/amdgpu/uvd_v5_0.c
index f3b0a927101b..71409ad8b7ed 100644
--- a/drivers/gpu/drm/amd/amdgpu/uvd_v5_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/uvd_v5_0.c
@@ -42,7 +42,7 @@ static void uvd_v5_0_set_ring_funcs(struct amdgpu_device *adev);
static void uvd_v5_0_set_irq_funcs(struct amdgpu_device *adev);
static int uvd_v5_0_start(struct amdgpu_device *adev);
static void uvd_v5_0_stop(struct amdgpu_device *adev);
-static int uvd_v5_0_set_clockgating_state(void *handle,
+static int uvd_v5_0_set_clockgating_state(struct amdgpu_ip_block *ip_block,
enum amd_clockgating_state state);
static void uvd_v5_0_enable_mgcg(struct amdgpu_device *adev,
bool enable);
@@ -88,9 +88,9 @@ static void uvd_v5_0_ring_set_wptr(struct amdgpu_ring *ring)
WREG32(mmUVD_RBC_RB_WPTR, lower_32_bits(ring->wptr));
}
-static int uvd_v5_0_early_init(void *handle)
+static int uvd_v5_0_early_init(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
adev->uvd.num_uvd_inst = 1;
uvd_v5_0_set_ring_funcs(adev);
@@ -99,10 +99,10 @@ static int uvd_v5_0_early_init(void *handle)
return 0;
}
-static int uvd_v5_0_sw_init(void *handle)
+static int uvd_v5_0_sw_init(struct amdgpu_ip_block *ip_block)
{
struct amdgpu_ring *ring;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
int r;
/* UVD TRAP */
@@ -117,7 +117,7 @@ static int uvd_v5_0_sw_init(void *handle)
ring = &adev->uvd.inst->ring;
sprintf(ring->name, "uvd");
r = amdgpu_ring_init(adev, ring, 512, &adev->uvd.inst->irq, 0,
- AMDGPU_RING_PRIO_DEFAULT);
+ AMDGPU_RING_PRIO_DEFAULT, NULL);
if (r)
return r;
@@ -125,15 +125,13 @@ static int uvd_v5_0_sw_init(void *handle)
if (r)
return r;
- r = amdgpu_uvd_entity_init(adev);
-
return r;
}
-static int uvd_v5_0_sw_fini(void *handle)
+static int uvd_v5_0_sw_fini(struct amdgpu_ip_block *ip_block)
{
int r;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
r = amdgpu_uvd_suspend(adev);
if (r)
@@ -145,19 +143,19 @@ static int uvd_v5_0_sw_fini(void *handle)
/**
* uvd_v5_0_hw_init - start and test UVD block
*
- * @handle: handle used to pass amdgpu_device pointer
+ * @ip_block: Pointer to the amdgpu_ip_block for this hw instance.
*
* Initialize the hardware, boot up the VCPU and do some testing
*/
-static int uvd_v5_0_hw_init(void *handle)
+static int uvd_v5_0_hw_init(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
struct amdgpu_ring *ring = &adev->uvd.inst->ring;
uint32_t tmp;
int r;
amdgpu_asic_set_uvd_clocks(adev, 10000, 10000);
- uvd_v5_0_set_clockgating_state(adev, AMD_CG_STATE_UNGATE);
+ uvd_v5_0_set_clockgating_state(ip_block, AMD_CG_STATE_UNGATE);
uvd_v5_0_enable_mgcg(adev, true);
r = amdgpu_ring_test_helper(ring);
@@ -202,13 +200,15 @@ done:
/**
* uvd_v5_0_hw_fini - stop the hardware block
*
- * @handle: handle used to pass amdgpu_device pointer
+ * @ip_block: Pointer to the amdgpu_ip_block for this hw instance.
*
* Stop the UVD block, mark ring as not ready any more
*/
-static int uvd_v5_0_hw_fini(void *handle)
+static int uvd_v5_0_hw_fini(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
+
+ cancel_delayed_work_sync(&adev->uvd.idle_work);
if (RREG32(mmUVD_STATUS) != 0)
uvd_v5_0_stop(adev);
@@ -216,29 +216,58 @@ static int uvd_v5_0_hw_fini(void *handle)
return 0;
}
-static int uvd_v5_0_suspend(void *handle)
+static int uvd_v5_0_prepare_suspend(struct amdgpu_ip_block *ip_block)
+{
+ struct amdgpu_device *adev = ip_block->adev;
+
+ return amdgpu_uvd_prepare_suspend(adev);
+}
+
+static int uvd_v5_0_suspend(struct amdgpu_ip_block *ip_block)
{
int r;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
+
+ /*
+ * Proper cleanups before halting the HW engine:
+ * - cancel the delayed idle work
+ * - enable powergating
+ * - enable clockgating
+ * - disable dpm
+ *
+ * TODO: to align with the VCN implementation, move the
+ * jobs for clockgating/powergating/dpm setting to
+ * ->set_powergating_state().
+ */
+ cancel_delayed_work_sync(&adev->uvd.idle_work);
- r = uvd_v5_0_hw_fini(adev);
+ if (adev->pm.dpm_enabled) {
+ amdgpu_dpm_enable_uvd(adev, false);
+ } else {
+ amdgpu_asic_set_uvd_clocks(adev, 0, 0);
+ /* shutdown the UVD block */
+ amdgpu_device_ip_set_powergating_state(adev, AMD_IP_BLOCK_TYPE_UVD,
+ AMD_PG_STATE_GATE);
+ amdgpu_device_ip_set_clockgating_state(adev, AMD_IP_BLOCK_TYPE_UVD,
+ AMD_CG_STATE_GATE);
+ }
+
+ r = uvd_v5_0_hw_fini(ip_block);
if (r)
return r;
- uvd_v5_0_set_clockgating_state(adev, AMD_CG_STATE_GATE);
return amdgpu_uvd_suspend(adev);
}
-static int uvd_v5_0_resume(void *handle)
+static int uvd_v5_0_resume(struct amdgpu_ip_block *ip_block)
{
int r;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
- r = amdgpu_uvd_resume(adev);
+ r = amdgpu_uvd_resume(ip_block->adev);
if (r)
return r;
- return uvd_v5_0_hw_init(adev);
+ return uvd_v5_0_hw_init(ip_block);
}
/**
@@ -551,17 +580,17 @@ static void uvd_v5_0_ring_insert_nop(struct amdgpu_ring *ring, uint32_t count)
}
}
-static bool uvd_v5_0_is_idle(void *handle)
+static bool uvd_v5_0_is_idle(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
return !(RREG32(mmSRBM_STATUS) & SRBM_STATUS__UVD_BUSY_MASK);
}
-static int uvd_v5_0_wait_for_idle(void *handle)
+static int uvd_v5_0_wait_for_idle(struct amdgpu_ip_block *ip_block)
{
unsigned i;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
for (i = 0; i < adev->usec_timeout; i++) {
if (!(RREG32(mmSRBM_STATUS) & SRBM_STATUS__UVD_BUSY_MASK))
@@ -570,9 +599,9 @@ static int uvd_v5_0_wait_for_idle(void *handle)
return -ETIMEDOUT;
}
-static int uvd_v5_0_soft_reset(void *handle)
+static int uvd_v5_0_soft_reset(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
uvd_v5_0_stop(adev);
@@ -761,15 +790,15 @@ static void uvd_v5_0_enable_mgcg(struct amdgpu_device *adev,
}
}
-static int uvd_v5_0_set_clockgating_state(void *handle,
+static int uvd_v5_0_set_clockgating_state(struct amdgpu_ip_block *ip_block,
enum amd_clockgating_state state)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
bool enable = (state == AMD_CG_STATE_GATE);
if (enable) {
/* wait for STATUS to clear */
- if (uvd_v5_0_wait_for_idle(handle))
+ if (uvd_v5_0_wait_for_idle(ip_block))
return -EBUSY;
uvd_v5_0_enable_clock_gating(adev, true);
@@ -783,7 +812,7 @@ static int uvd_v5_0_set_clockgating_state(void *handle,
return 0;
}
-static int uvd_v5_0_set_powergating_state(void *handle,
+static int uvd_v5_0_set_powergating_state(struct amdgpu_ip_block *ip_block,
enum amd_powergating_state state)
{
/* This doesn't actually powergate the UVD block.
@@ -793,7 +822,7 @@ static int uvd_v5_0_set_powergating_state(void *handle,
* revisit this when there is a cleaner line between
* the smc and the hw blocks
*/
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
int ret = 0;
if (state == AMD_PG_STATE_GATE) {
@@ -808,9 +837,9 @@ out:
return ret;
}
-static void uvd_v5_0_get_clockgating_state(void *handle, u32 *flags)
+static void uvd_v5_0_get_clockgating_state(struct amdgpu_ip_block *ip_block, u64 *flags)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
int data;
mutex_lock(&adev->pm.mutex);
@@ -833,11 +862,11 @@ out:
static const struct amd_ip_funcs uvd_v5_0_ip_funcs = {
.name = "uvd_v5_0",
.early_init = uvd_v5_0_early_init,
- .late_init = NULL,
.sw_init = uvd_v5_0_sw_init,
.sw_fini = uvd_v5_0_sw_fini,
.hw_init = uvd_v5_0_hw_init,
.hw_fini = uvd_v5_0_hw_fini,
+ .prepare_suspend = uvd_v5_0_prepare_suspend,
.suspend = uvd_v5_0_suspend,
.resume = uvd_v5_0_resume,
.is_idle = uvd_v5_0_is_idle,
diff --git a/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c b/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c
index 760859880c1e..ceb94bbb03a4 100644
--- a/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c
@@ -48,7 +48,7 @@ static void uvd_v6_0_set_irq_funcs(struct amdgpu_device *adev);
static int uvd_v6_0_start(struct amdgpu_device *adev);
static void uvd_v6_0_stop(struct amdgpu_device *adev);
static void uvd_v6_0_set_sw_clock_gating(struct amdgpu_device *adev);
-static int uvd_v6_0_set_clockgating_state(void *handle,
+static int uvd_v6_0_set_clockgating_state(struct amdgpu_ip_block *ip_block,
enum amd_clockgating_state state);
static void uvd_v6_0_enable_mgcg(struct amdgpu_device *adev,
bool enable);
@@ -216,8 +216,9 @@ static int uvd_v6_0_enc_get_create_msg(struct amdgpu_ring *ring, uint32_t handle
uint64_t addr;
int i, r;
- r = amdgpu_job_alloc_with_ib(ring->adev, ib_size_dw * 4,
- AMDGPU_IB_POOL_DIRECT, &job);
+ r = amdgpu_job_alloc_with_ib(ring->adev, NULL, NULL, ib_size_dw * 4,
+ AMDGPU_IB_POOL_DIRECT, &job,
+ AMDGPU_KERNEL_JOB_ID_VCN_RING_TEST);
if (r)
return r;
@@ -280,8 +281,9 @@ static int uvd_v6_0_enc_get_destroy_msg(struct amdgpu_ring *ring,
uint64_t addr;
int i, r;
- r = amdgpu_job_alloc_with_ib(ring->adev, ib_size_dw * 4,
- AMDGPU_IB_POOL_DIRECT, &job);
+ r = amdgpu_job_alloc_with_ib(ring->adev, NULL, NULL, ib_size_dw * 4,
+ AMDGPU_IB_POOL_DIRECT, &job,
+ AMDGPU_KERNEL_JOB_ID_VCN_RING_TEST);
if (r)
return r;
@@ -332,15 +334,9 @@ err:
static int uvd_v6_0_enc_ring_test_ib(struct amdgpu_ring *ring, long timeout)
{
struct dma_fence *fence = NULL;
- struct amdgpu_bo *bo = NULL;
+ struct amdgpu_bo *bo = ring->adev->uvd.ib_bo;
long r;
- r = amdgpu_bo_create_reserved(ring->adev, 128 * 1024, PAGE_SIZE,
- AMDGPU_GEM_DOMAIN_VRAM,
- &bo, NULL, NULL);
- if (r)
- return r;
-
r = uvd_v6_0_enc_get_create_msg(ring, 1, bo, NULL);
if (r)
goto error;
@@ -357,14 +353,12 @@ static int uvd_v6_0_enc_ring_test_ib(struct amdgpu_ring *ring, long timeout)
error:
dma_fence_put(fence);
- amdgpu_bo_unreserve(bo);
- amdgpu_bo_unref(&bo);
return r;
}
-static int uvd_v6_0_early_init(void *handle)
+static int uvd_v6_0_early_init(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
adev->uvd.num_uvd_inst = 1;
if (!(adev->flags & AMD_IS_APU) &&
@@ -383,11 +377,11 @@ static int uvd_v6_0_early_init(void *handle)
return 0;
}
-static int uvd_v6_0_sw_init(void *handle)
+static int uvd_v6_0_sw_init(struct amdgpu_ip_block *ip_block)
{
struct amdgpu_ring *ring;
int i, r;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
/* UVD TRAP */
r = amdgpu_irq_add_id(adev, AMDGPU_IRQ_CLIENTID_LEGACY, VISLANDS30_IV_SRCID_UVD_SYSTEM_MESSAGE, &adev->uvd.inst->irq);
@@ -420,7 +414,7 @@ static int uvd_v6_0_sw_init(void *handle)
ring = &adev->uvd.inst->ring;
sprintf(ring->name, "uvd");
r = amdgpu_ring_init(adev, ring, 512, &adev->uvd.inst->irq, 0,
- AMDGPU_RING_PRIO_DEFAULT);
+ AMDGPU_RING_PRIO_DEFAULT, NULL);
if (r)
return r;
@@ -434,21 +428,19 @@ static int uvd_v6_0_sw_init(void *handle)
sprintf(ring->name, "uvd_enc%d", i);
r = amdgpu_ring_init(adev, ring, 512,
&adev->uvd.inst->irq, 0,
- AMDGPU_RING_PRIO_DEFAULT);
+ AMDGPU_RING_PRIO_DEFAULT, NULL);
if (r)
return r;
}
}
- r = amdgpu_uvd_entity_init(adev);
-
return r;
}
-static int uvd_v6_0_sw_fini(void *handle)
+static int uvd_v6_0_sw_fini(struct amdgpu_ip_block *ip_block)
{
int i, r;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
r = amdgpu_uvd_suspend(adev);
if (r)
@@ -465,19 +457,19 @@ static int uvd_v6_0_sw_fini(void *handle)
/**
* uvd_v6_0_hw_init - start and test UVD block
*
- * @handle: handle used to pass amdgpu_device pointer
+ * @ip_block: Pointer to the amdgpu_ip_block for this hw instance.
*
* Initialize the hardware, boot up the VCPU and do some testing
*/
-static int uvd_v6_0_hw_init(void *handle)
+static int uvd_v6_0_hw_init(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
struct amdgpu_ring *ring = &adev->uvd.inst->ring;
uint32_t tmp;
int i, r;
amdgpu_asic_set_uvd_clocks(adev, 10000, 10000);
- uvd_v6_0_set_clockgating_state(adev, AMD_CG_STATE_UNGATE);
+ uvd_v6_0_set_clockgating_state(ip_block, AMD_CG_STATE_UNGATE);
uvd_v6_0_enable_mgcg(adev, true);
r = amdgpu_ring_test_helper(ring);
@@ -534,13 +526,15 @@ done:
/**
* uvd_v6_0_hw_fini - stop the hardware block
*
- * @handle: handle used to pass amdgpu_device pointer
+ * @ip_block: Pointer to the amdgpu_ip_block for this hw instance.
*
* Stop the UVD block, mark ring as not ready any more
*/
-static int uvd_v6_0_hw_fini(void *handle)
+static int uvd_v6_0_hw_fini(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
+
+ cancel_delayed_work_sync(&adev->uvd.idle_work);
if (RREG32(mmUVD_STATUS) != 0)
uvd_v6_0_stop(adev);
@@ -548,28 +542,58 @@ static int uvd_v6_0_hw_fini(void *handle)
return 0;
}
-static int uvd_v6_0_suspend(void *handle)
+static int uvd_v6_0_prepare_suspend(struct amdgpu_ip_block *ip_block)
+{
+ struct amdgpu_device *adev = ip_block->adev;
+
+ return amdgpu_uvd_prepare_suspend(adev);
+}
+
+static int uvd_v6_0_suspend(struct amdgpu_ip_block *ip_block)
{
int r;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
+
+ /*
+ * Proper cleanups before halting the HW engine:
+ * - cancel the delayed idle work
+ * - enable powergating
+ * - enable clockgating
+ * - disable dpm
+ *
+ * TODO: to align with the VCN implementation, move the
+ * jobs for clockgating/powergating/dpm setting to
+ * ->set_powergating_state().
+ */
+ cancel_delayed_work_sync(&adev->uvd.idle_work);
- r = uvd_v6_0_hw_fini(adev);
+ if (adev->pm.dpm_enabled) {
+ amdgpu_dpm_enable_uvd(adev, false);
+ } else {
+ amdgpu_asic_set_uvd_clocks(adev, 0, 0);
+ /* shutdown the UVD block */
+ amdgpu_device_ip_set_powergating_state(adev, AMD_IP_BLOCK_TYPE_UVD,
+ AMD_PG_STATE_GATE);
+ amdgpu_device_ip_set_clockgating_state(adev, AMD_IP_BLOCK_TYPE_UVD,
+ AMD_CG_STATE_GATE);
+ }
+
+ r = uvd_v6_0_hw_fini(ip_block);
if (r)
return r;
return amdgpu_uvd_suspend(adev);
}
-static int uvd_v6_0_resume(void *handle)
+static int uvd_v6_0_resume(struct amdgpu_ip_block *ip_block)
{
int r;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
- r = amdgpu_uvd_resume(adev);
+ r = amdgpu_uvd_resume(ip_block->adev);
if (r)
return r;
- return uvd_v6_0_hw_init(adev);
+ return uvd_v6_0_hw_init(ip_block);
}
/**
@@ -1121,29 +1145,29 @@ static void uvd_v6_0_enc_ring_emit_vm_flush(struct amdgpu_ring *ring,
amdgpu_ring_write(ring, vmid);
}
-static bool uvd_v6_0_is_idle(void *handle)
+static bool uvd_v6_0_is_idle(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
return !(RREG32(mmSRBM_STATUS) & SRBM_STATUS__UVD_BUSY_MASK);
}
-static int uvd_v6_0_wait_for_idle(void *handle)
+static int uvd_v6_0_wait_for_idle(struct amdgpu_ip_block *ip_block)
{
unsigned i;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
for (i = 0; i < adev->usec_timeout; i++) {
- if (uvd_v6_0_is_idle(handle))
+ if (uvd_v6_0_is_idle(ip_block))
return 0;
}
return -ETIMEDOUT;
}
#define AMDGPU_UVD_STATUS_BUSY_MASK 0xfd
-static bool uvd_v6_0_check_soft_reset(void *handle)
+static bool uvd_v6_0_check_soft_reset(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
u32 srbm_soft_reset = 0;
u32 tmp = RREG32(mmSRBM_STATUS);
@@ -1161,9 +1185,9 @@ static bool uvd_v6_0_check_soft_reset(void *handle)
}
}
-static int uvd_v6_0_pre_soft_reset(void *handle)
+static int uvd_v6_0_pre_soft_reset(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
if (!adev->uvd.inst->srbm_soft_reset)
return 0;
@@ -1172,9 +1196,9 @@ static int uvd_v6_0_pre_soft_reset(void *handle)
return 0;
}
-static int uvd_v6_0_soft_reset(void *handle)
+static int uvd_v6_0_soft_reset(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
u32 srbm_soft_reset;
if (!adev->uvd.inst->srbm_soft_reset)
@@ -1203,9 +1227,9 @@ static int uvd_v6_0_soft_reset(void *handle)
return 0;
}
-static int uvd_v6_0_post_soft_reset(void *handle)
+static int uvd_v6_0_post_soft_reset(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
if (!adev->uvd.inst->srbm_soft_reset)
return 0;
@@ -1428,15 +1452,15 @@ static void uvd_v6_0_enable_mgcg(struct amdgpu_device *adev,
}
}
-static int uvd_v6_0_set_clockgating_state(void *handle,
+static int uvd_v6_0_set_clockgating_state(struct amdgpu_ip_block *ip_block,
enum amd_clockgating_state state)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
bool enable = (state == AMD_CG_STATE_GATE);
if (enable) {
/* wait for STATUS to clear */
- if (uvd_v6_0_wait_for_idle(handle))
+ if (uvd_v6_0_wait_for_idle(ip_block))
return -EBUSY;
uvd_v6_0_enable_clock_gating(adev, true);
/* enable HW gates because UVD is idle */
@@ -1449,7 +1473,7 @@ static int uvd_v6_0_set_clockgating_state(void *handle,
return 0;
}
-static int uvd_v6_0_set_powergating_state(void *handle,
+static int uvd_v6_0_set_powergating_state(struct amdgpu_ip_block *ip_block,
enum amd_powergating_state state)
{
/* This doesn't actually powergate the UVD block.
@@ -1459,7 +1483,7 @@ static int uvd_v6_0_set_powergating_state(void *handle,
* revisit this when there is a cleaner line between
* the smc and the hw blocks
*/
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
int ret = 0;
WREG32(mmUVD_POWER_STATUS, UVD_POWER_STATUS__UVD_PG_EN_MASK);
@@ -1476,9 +1500,9 @@ out:
return ret;
}
-static void uvd_v6_0_get_clockgating_state(void *handle, u32 *flags)
+static void uvd_v6_0_get_clockgating_state(struct amdgpu_ip_block *ip_block, u64 *flags)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
int data;
mutex_lock(&adev->pm.mutex);
@@ -1505,11 +1529,11 @@ out:
static const struct amd_ip_funcs uvd_v6_0_ip_funcs = {
.name = "uvd_v6_0",
.early_init = uvd_v6_0_early_init,
- .late_init = NULL,
.sw_init = uvd_v6_0_sw_init,
.sw_fini = uvd_v6_0_sw_fini,
.hw_init = uvd_v6_0_hw_init,
.hw_fini = uvd_v6_0_hw_fini,
+ .prepare_suspend = uvd_v6_0_prepare_suspend,
.suspend = uvd_v6_0_suspend,
.resume = uvd_v6_0_resume,
.is_idle = uvd_v6_0_is_idle,
diff --git a/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c b/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c
index 7cd67cb2ac5f..1f8866f3f63c 100644
--- a/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c
@@ -25,6 +25,7 @@
#include "amdgpu.h"
#include "amdgpu_uvd.h"
+#include "amdgpu_cs.h"
#include "soc15.h"
#include "soc15d.h"
#include "soc15_common.h"
@@ -117,7 +118,7 @@ static uint64_t uvd_v7_0_enc_ring_get_wptr(struct amdgpu_ring *ring)
struct amdgpu_device *adev = ring->adev;
if (ring->use_doorbell)
- return adev->wb.wb[ring->wptr_offs];
+ return *ring->wptr_cpu_addr;
if (ring == &adev->uvd.inst[ring->me].ring_enc[0])
return RREG32_SOC15(UVD, ring->me, mmUVD_RB_WPTR);
@@ -152,7 +153,7 @@ static void uvd_v7_0_enc_ring_set_wptr(struct amdgpu_ring *ring)
if (ring->use_doorbell) {
/* XXX check if swapping is necessary on BE */
- adev->wb.wb[ring->wptr_offs] = lower_32_bits(ring->wptr);
+ *ring->wptr_cpu_addr = lower_32_bits(ring->wptr);
WDOORBELL32(ring->doorbell_index, lower_32_bits(ring->wptr));
return;
}
@@ -212,7 +213,7 @@ static int uvd_v7_0_enc_ring_test_ring(struct amdgpu_ring *ring)
*
* Open up a stream for HW test
*/
-static int uvd_v7_0_enc_get_create_msg(struct amdgpu_ring *ring, uint32_t handle,
+static int uvd_v7_0_enc_get_create_msg(struct amdgpu_ring *ring, u32 handle,
struct amdgpu_bo *bo,
struct dma_fence **fence)
{
@@ -223,8 +224,9 @@ static int uvd_v7_0_enc_get_create_msg(struct amdgpu_ring *ring, uint32_t handle
uint64_t addr;
int i, r;
- r = amdgpu_job_alloc_with_ib(ring->adev, ib_size_dw * 4,
- AMDGPU_IB_POOL_DIRECT, &job);
+ r = amdgpu_job_alloc_with_ib(ring->adev, NULL, NULL, ib_size_dw * 4,
+ AMDGPU_IB_POOL_DIRECT, &job,
+ AMDGPU_KERNEL_JOB_ID_VCN_RING_TEST);
if (r)
return r;
@@ -275,7 +277,7 @@ err:
*
* Close up a stream for HW test or if userspace failed to do so
*/
-static int uvd_v7_0_enc_get_destroy_msg(struct amdgpu_ring *ring, uint32_t handle,
+static int uvd_v7_0_enc_get_destroy_msg(struct amdgpu_ring *ring, u32 handle,
struct amdgpu_bo *bo,
struct dma_fence **fence)
{
@@ -286,8 +288,9 @@ static int uvd_v7_0_enc_get_destroy_msg(struct amdgpu_ring *ring, uint32_t handl
uint64_t addr;
int i, r;
- r = amdgpu_job_alloc_with_ib(ring->adev, ib_size_dw * 4,
- AMDGPU_IB_POOL_DIRECT, &job);
+ r = amdgpu_job_alloc_with_ib(ring->adev, NULL, NULL, ib_size_dw * 4,
+ AMDGPU_IB_POOL_DIRECT, &job,
+ AMDGPU_KERNEL_JOB_ID_VCN_RING_TEST);
if (r)
return r;
@@ -338,15 +341,9 @@ err:
static int uvd_v7_0_enc_ring_test_ib(struct amdgpu_ring *ring, long timeout)
{
struct dma_fence *fence = NULL;
- struct amdgpu_bo *bo = NULL;
+ struct amdgpu_bo *bo = ring->adev->uvd.ib_bo;
long r;
- r = amdgpu_bo_create_reserved(ring->adev, 128 * 1024, PAGE_SIZE,
- AMDGPU_GEM_DOMAIN_VRAM,
- &bo, NULL, NULL);
- if (r)
- return r;
-
r = uvd_v7_0_enc_get_create_msg(ring, 1, bo, NULL);
if (r)
goto error;
@@ -363,14 +360,12 @@ static int uvd_v7_0_enc_ring_test_ib(struct amdgpu_ring *ring, long timeout)
error:
dma_fence_put(fence);
- amdgpu_bo_unreserve(bo);
- amdgpu_bo_unref(&bo);
return r;
}
-static int uvd_v7_0_early_init(void *handle)
+static int uvd_v7_0_early_init(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
if (adev->asic_type == CHIP_VEGA20) {
u32 harvest;
@@ -402,12 +397,12 @@ static int uvd_v7_0_early_init(void *handle)
return 0;
}
-static int uvd_v7_0_sw_init(void *handle)
+static int uvd_v7_0_sw_init(struct amdgpu_ip_block *ip_block)
{
struct amdgpu_ring *ring;
int i, j, r;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
for (j = 0; j < adev->uvd.num_uvd_inst; j++) {
if (adev->uvd.harvest_config & (1 << j))
@@ -451,16 +446,18 @@ static int uvd_v7_0_sw_init(void *handle)
continue;
if (!amdgpu_sriov_vf(adev)) {
ring = &adev->uvd.inst[j].ring;
+ ring->vm_hub = AMDGPU_MMHUB0(0);
sprintf(ring->name, "uvd_%d", ring->me);
r = amdgpu_ring_init(adev, ring, 512,
&adev->uvd.inst[j].irq, 0,
- AMDGPU_RING_PRIO_DEFAULT);
+ AMDGPU_RING_PRIO_DEFAULT, NULL);
if (r)
return r;
}
for (i = 0; i < adev->uvd.num_enc_rings; ++i) {
ring = &adev->uvd.inst[j].ring_enc[i];
+ ring->vm_hub = AMDGPU_MMHUB0(0);
sprintf(ring->name, "uvd_enc_%d.%d", ring->me, i);
if (amdgpu_sriov_vf(adev)) {
ring->use_doorbell = true;
@@ -475,7 +472,7 @@ static int uvd_v7_0_sw_init(void *handle)
}
r = amdgpu_ring_init(adev, ring, 512,
&adev->uvd.inst[j].irq, 0,
- AMDGPU_RING_PRIO_DEFAULT);
+ AMDGPU_RING_PRIO_DEFAULT, NULL);
if (r)
return r;
}
@@ -485,10 +482,6 @@ static int uvd_v7_0_sw_init(void *handle)
if (r)
return r;
- r = amdgpu_uvd_entity_init(adev);
- if (r)
- return r;
-
r = amdgpu_virt_alloc_mm_table(adev);
if (r)
return r;
@@ -496,10 +489,10 @@ static int uvd_v7_0_sw_init(void *handle)
return r;
}
-static int uvd_v7_0_sw_fini(void *handle)
+static int uvd_v7_0_sw_fini(struct amdgpu_ip_block *ip_block)
{
int i, j, r;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
amdgpu_virt_free_mm_table(adev);
@@ -519,13 +512,13 @@ static int uvd_v7_0_sw_fini(void *handle)
/**
* uvd_v7_0_hw_init - start and test UVD block
*
- * @handle: handle used to pass amdgpu_device pointer
+ * @ip_block: Pointer to the amdgpu_ip_block for this hw instance.
*
* Initialize the hardware, boot up the VCPU and do some testing
*/
-static int uvd_v7_0_hw_init(void *handle)
+static int uvd_v7_0_hw_init(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
struct amdgpu_ring *ring;
uint32_t tmp;
int i, j, r;
@@ -597,13 +590,15 @@ done:
/**
* uvd_v7_0_hw_fini - stop the hardware block
*
- * @handle: handle used to pass amdgpu_device pointer
+ * @ip_block: Pointer to the amdgpu_ip_block for this hw instance.
*
* Stop the UVD block, mark ring as not ready any more
*/
-static int uvd_v7_0_hw_fini(void *handle)
+static int uvd_v7_0_hw_fini(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
+
+ cancel_delayed_work_sync(&adev->uvd.idle_work);
if (!amdgpu_sriov_vf(adev))
uvd_v7_0_stop(adev);
@@ -615,28 +610,58 @@ static int uvd_v7_0_hw_fini(void *handle)
return 0;
}
-static int uvd_v7_0_suspend(void *handle)
+static int uvd_v7_0_prepare_suspend(struct amdgpu_ip_block *ip_block)
+{
+ struct amdgpu_device *adev = ip_block->adev;
+
+ return amdgpu_uvd_prepare_suspend(adev);
+}
+
+static int uvd_v7_0_suspend(struct amdgpu_ip_block *ip_block)
{
int r;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
+
+ /*
+ * Proper cleanups before halting the HW engine:
+ * - cancel the delayed idle work
+ * - enable powergating
+ * - enable clockgating
+ * - disable dpm
+ *
+ * TODO: to align with the VCN implementation, move the
+ * jobs for clockgating/powergating/dpm setting to
+ * ->set_powergating_state().
+ */
+ cancel_delayed_work_sync(&adev->uvd.idle_work);
- r = uvd_v7_0_hw_fini(adev);
+ if (adev->pm.dpm_enabled) {
+ amdgpu_dpm_enable_uvd(adev, false);
+ } else {
+ amdgpu_asic_set_uvd_clocks(adev, 0, 0);
+ /* shutdown the UVD block */
+ amdgpu_device_ip_set_powergating_state(adev, AMD_IP_BLOCK_TYPE_UVD,
+ AMD_PG_STATE_GATE);
+ amdgpu_device_ip_set_clockgating_state(adev, AMD_IP_BLOCK_TYPE_UVD,
+ AMD_CG_STATE_GATE);
+ }
+
+ r = uvd_v7_0_hw_fini(ip_block);
if (r)
return r;
return amdgpu_uvd_suspend(adev);
}
-static int uvd_v7_0_resume(void *handle)
+static int uvd_v7_0_resume(struct amdgpu_ip_block *ip_block)
{
int r;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
- r = amdgpu_uvd_resume(adev);
+ r = amdgpu_uvd_resume(ip_block->adev);
if (r)
return r;
- return uvd_v7_0_hw_init(adev);
+ return uvd_v7_0_hw_init(ip_block);
}
/**
@@ -658,11 +683,11 @@ static void uvd_v7_0_mc_resume(struct amdgpu_device *adev)
if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
WREG32_SOC15(UVD, i, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW,
i == 0 ?
- adev->firmware.ucode[AMDGPU_UCODE_ID_UVD].tmr_mc_addr_lo:
+ adev->firmware.ucode[AMDGPU_UCODE_ID_UVD].tmr_mc_addr_lo :
adev->firmware.ucode[AMDGPU_UCODE_ID_UVD1].tmr_mc_addr_lo);
WREG32_SOC15(UVD, i, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH,
i == 0 ?
- adev->firmware.ucode[AMDGPU_UCODE_ID_UVD].tmr_mc_addr_hi:
+ adev->firmware.ucode[AMDGPU_UCODE_ID_UVD].tmr_mc_addr_hi :
adev->firmware.ucode[AMDGPU_UCODE_ID_UVD1].tmr_mc_addr_hi);
WREG32_SOC15(UVD, i, mmUVD_VCPU_CACHE_OFFSET0, 0);
offset = 0;
@@ -735,7 +760,7 @@ static int uvd_v7_0_mmsch_start(struct amdgpu_device *adev,
if (adev->uvd.harvest_config & (1 << i))
continue;
WDOORBELL32(adev->uvd.inst[i].ring_enc[0].doorbell_index, 0);
- adev->wb.wb[adev->uvd.inst[i].ring_enc[0].wptr_offs] = 0;
+ *adev->uvd.inst[i].ring_enc[0].wptr_cpu_addr = 0;
adev->uvd.inst[i].ring_enc[0].wptr = 0;
adev->uvd.inst[i].ring_enc[0].wptr_old = 0;
}
@@ -1257,14 +1282,15 @@ static int uvd_v7_0_ring_test_ring(struct amdgpu_ring *ring)
* uvd_v7_0_ring_patch_cs_in_place - Patch the IB for command submission.
*
* @p: the CS parser with the IBs
- * @ib_idx: which IB to patch
+ * @job: which job this ib is in
+ * @ib: which IB to patch
*
*/
static int uvd_v7_0_ring_patch_cs_in_place(struct amdgpu_cs_parser *p,
- uint32_t ib_idx)
+ struct amdgpu_job *job,
+ struct amdgpu_ib *ib)
{
- struct amdgpu_ring *ring = to_amdgpu_ring(p->entity->rq->sched);
- struct amdgpu_ib *ib = &p->job->ibs[ib_idx];
+ struct amdgpu_ring *ring = amdgpu_job_ring(job);
unsigned i;
/* No patching necessary for the first instance */
@@ -1272,12 +1298,12 @@ static int uvd_v7_0_ring_patch_cs_in_place(struct amdgpu_cs_parser *p,
return 0;
for (i = 0; i < ib->length_dw; i += 2) {
- uint32_t reg = amdgpu_get_ib_value(p, ib_idx, i);
+ uint32_t reg = amdgpu_ib_get_value(ib, i);
reg -= p->adev->reg_offset[UVD_HWIP][0][1];
reg += p->adev->reg_offset[UVD_HWIP][1][1];
- amdgpu_set_ib_value(p, ib_idx, i, reg);
+ amdgpu_ib_set_value(ib, i, reg);
}
return 0;
}
@@ -1377,7 +1403,7 @@ static void uvd_v7_0_ring_emit_reg_wait(struct amdgpu_ring *ring, uint32_t reg,
static void uvd_v7_0_ring_emit_vm_flush(struct amdgpu_ring *ring,
unsigned vmid, uint64_t pd_addr)
{
- struct amdgpu_vmhub *hub = &ring->adev->vmhub[ring->funcs->vmhub];
+ struct amdgpu_vmhub *hub = &ring->adev->vmhub[ring->vm_hub];
uint32_t data0, data1, mask;
pd_addr = amdgpu_gmc_emit_flush_gpu_tlb(ring, vmid, pd_addr);
@@ -1420,7 +1446,7 @@ static void uvd_v7_0_enc_ring_emit_reg_wait(struct amdgpu_ring *ring,
static void uvd_v7_0_enc_ring_emit_vm_flush(struct amdgpu_ring *ring,
unsigned int vmid, uint64_t pd_addr)
{
- struct amdgpu_vmhub *hub = &ring->adev->vmhub[ring->funcs->vmhub];
+ struct amdgpu_vmhub *hub = &ring->adev->vmhub[ring->vm_hub];
pd_addr = amdgpu_gmc_emit_flush_gpu_tlb(ring, vmid, pd_addr);
@@ -1438,104 +1464,6 @@ static void uvd_v7_0_enc_ring_emit_wreg(struct amdgpu_ring *ring,
amdgpu_ring_write(ring, val);
}
-#if 0
-static bool uvd_v7_0_is_idle(void *handle)
-{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
-
- return !(RREG32(mmSRBM_STATUS) & SRBM_STATUS__UVD_BUSY_MASK);
-}
-
-static int uvd_v7_0_wait_for_idle(void *handle)
-{
- unsigned i;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
-
- for (i = 0; i < adev->usec_timeout; i++) {
- if (uvd_v7_0_is_idle(handle))
- return 0;
- }
- return -ETIMEDOUT;
-}
-
-#define AMDGPU_UVD_STATUS_BUSY_MASK 0xfd
-static bool uvd_v7_0_check_soft_reset(void *handle)
-{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
- u32 srbm_soft_reset = 0;
- u32 tmp = RREG32(mmSRBM_STATUS);
-
- if (REG_GET_FIELD(tmp, SRBM_STATUS, UVD_RQ_PENDING) ||
- REG_GET_FIELD(tmp, SRBM_STATUS, UVD_BUSY) ||
- (RREG32_SOC15(UVD, ring->me, mmUVD_STATUS) &
- AMDGPU_UVD_STATUS_BUSY_MASK))
- srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset,
- SRBM_SOFT_RESET, SOFT_RESET_UVD, 1);
-
- if (srbm_soft_reset) {
- adev->uvd.inst[ring->me].srbm_soft_reset = srbm_soft_reset;
- return true;
- } else {
- adev->uvd.inst[ring->me].srbm_soft_reset = 0;
- return false;
- }
-}
-
-static int uvd_v7_0_pre_soft_reset(void *handle)
-{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
-
- if (!adev->uvd.inst[ring->me].srbm_soft_reset)
- return 0;
-
- uvd_v7_0_stop(adev);
- return 0;
-}
-
-static int uvd_v7_0_soft_reset(void *handle)
-{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
- u32 srbm_soft_reset;
-
- if (!adev->uvd.inst[ring->me].srbm_soft_reset)
- return 0;
- srbm_soft_reset = adev->uvd.inst[ring->me].srbm_soft_reset;
-
- if (srbm_soft_reset) {
- u32 tmp;
-
- tmp = RREG32(mmSRBM_SOFT_RESET);
- tmp |= srbm_soft_reset;
- dev_info(adev->dev, "SRBM_SOFT_RESET=0x%08X\n", tmp);
- WREG32(mmSRBM_SOFT_RESET, tmp);
- tmp = RREG32(mmSRBM_SOFT_RESET);
-
- udelay(50);
-
- tmp &= ~srbm_soft_reset;
- WREG32(mmSRBM_SOFT_RESET, tmp);
- tmp = RREG32(mmSRBM_SOFT_RESET);
-
- /* Wait a little for things to settle down */
- udelay(50);
- }
-
- return 0;
-}
-
-static int uvd_v7_0_post_soft_reset(void *handle)
-{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
-
- if (!adev->uvd.inst[ring->me].srbm_soft_reset)
- return 0;
-
- mdelay(5);
-
- return uvd_v7_0_start(adev);
-}
-#endif
-
static int uvd_v7_0_set_interrupt_state(struct amdgpu_device *adev,
struct amdgpu_irq_src *source,
unsigned type,
@@ -1585,172 +1513,7 @@ static int uvd_v7_0_process_interrupt(struct amdgpu_device *adev,
return 0;
}
-#if 0
-static void uvd_v7_0_set_sw_clock_gating(struct amdgpu_device *adev)
-{
- uint32_t data, data1, data2, suvd_flags;
-
- data = RREG32_SOC15(UVD, ring->me, mmUVD_CGC_CTRL);
- data1 = RREG32_SOC15(UVD, ring->me, mmUVD_SUVD_CGC_GATE);
- data2 = RREG32_SOC15(UVD, ring->me, mmUVD_SUVD_CGC_CTRL);
-
- data &= ~(UVD_CGC_CTRL__CLK_OFF_DELAY_MASK |
- UVD_CGC_CTRL__CLK_GATE_DLY_TIMER_MASK);
-
- suvd_flags = UVD_SUVD_CGC_GATE__SRE_MASK |
- UVD_SUVD_CGC_GATE__SIT_MASK |
- UVD_SUVD_CGC_GATE__SMP_MASK |
- UVD_SUVD_CGC_GATE__SCM_MASK |
- UVD_SUVD_CGC_GATE__SDB_MASK;
-
- data |= UVD_CGC_CTRL__DYN_CLOCK_MODE_MASK |
- (1 << REG_FIELD_SHIFT(UVD_CGC_CTRL, CLK_GATE_DLY_TIMER)) |
- (4 << REG_FIELD_SHIFT(UVD_CGC_CTRL, CLK_OFF_DELAY));
-
- data &= ~(UVD_CGC_CTRL__UDEC_RE_MODE_MASK |
- UVD_CGC_CTRL__UDEC_CM_MODE_MASK |
- UVD_CGC_CTRL__UDEC_IT_MODE_MASK |
- UVD_CGC_CTRL__UDEC_DB_MODE_MASK |
- UVD_CGC_CTRL__UDEC_MP_MODE_MASK |
- UVD_CGC_CTRL__SYS_MODE_MASK |
- UVD_CGC_CTRL__UDEC_MODE_MASK |
- UVD_CGC_CTRL__MPEG2_MODE_MASK |
- UVD_CGC_CTRL__REGS_MODE_MASK |
- UVD_CGC_CTRL__RBC_MODE_MASK |
- UVD_CGC_CTRL__LMI_MC_MODE_MASK |
- UVD_CGC_CTRL__LMI_UMC_MODE_MASK |
- UVD_CGC_CTRL__IDCT_MODE_MASK |
- UVD_CGC_CTRL__MPRD_MODE_MASK |
- UVD_CGC_CTRL__MPC_MODE_MASK |
- UVD_CGC_CTRL__LBSI_MODE_MASK |
- UVD_CGC_CTRL__LRBBM_MODE_MASK |
- UVD_CGC_CTRL__WCB_MODE_MASK |
- UVD_CGC_CTRL__VCPU_MODE_MASK |
- UVD_CGC_CTRL__JPEG_MODE_MASK |
- UVD_CGC_CTRL__JPEG2_MODE_MASK |
- UVD_CGC_CTRL__SCPU_MODE_MASK);
- data2 &= ~(UVD_SUVD_CGC_CTRL__SRE_MODE_MASK |
- UVD_SUVD_CGC_CTRL__SIT_MODE_MASK |
- UVD_SUVD_CGC_CTRL__SMP_MODE_MASK |
- UVD_SUVD_CGC_CTRL__SCM_MODE_MASK |
- UVD_SUVD_CGC_CTRL__SDB_MODE_MASK);
- data1 |= suvd_flags;
-
- WREG32_SOC15(UVD, ring->me, mmUVD_CGC_CTRL, data);
- WREG32_SOC15(UVD, ring->me, mmUVD_CGC_GATE, 0);
- WREG32_SOC15(UVD, ring->me, mmUVD_SUVD_CGC_GATE, data1);
- WREG32_SOC15(UVD, ring->me, mmUVD_SUVD_CGC_CTRL, data2);
-}
-
-static void uvd_v7_0_set_hw_clock_gating(struct amdgpu_device *adev)
-{
- uint32_t data, data1, cgc_flags, suvd_flags;
-
- data = RREG32_SOC15(UVD, ring->me, mmUVD_CGC_GATE);
- data1 = RREG32_SOC15(UVD, ring->me, mmUVD_SUVD_CGC_GATE);
-
- cgc_flags = UVD_CGC_GATE__SYS_MASK |
- UVD_CGC_GATE__UDEC_MASK |
- UVD_CGC_GATE__MPEG2_MASK |
- UVD_CGC_GATE__RBC_MASK |
- UVD_CGC_GATE__LMI_MC_MASK |
- UVD_CGC_GATE__IDCT_MASK |
- UVD_CGC_GATE__MPRD_MASK |
- UVD_CGC_GATE__MPC_MASK |
- UVD_CGC_GATE__LBSI_MASK |
- UVD_CGC_GATE__LRBBM_MASK |
- UVD_CGC_GATE__UDEC_RE_MASK |
- UVD_CGC_GATE__UDEC_CM_MASK |
- UVD_CGC_GATE__UDEC_IT_MASK |
- UVD_CGC_GATE__UDEC_DB_MASK |
- UVD_CGC_GATE__UDEC_MP_MASK |
- UVD_CGC_GATE__WCB_MASK |
- UVD_CGC_GATE__VCPU_MASK |
- UVD_CGC_GATE__SCPU_MASK |
- UVD_CGC_GATE__JPEG_MASK |
- UVD_CGC_GATE__JPEG2_MASK;
-
- suvd_flags = UVD_SUVD_CGC_GATE__SRE_MASK |
- UVD_SUVD_CGC_GATE__SIT_MASK |
- UVD_SUVD_CGC_GATE__SMP_MASK |
- UVD_SUVD_CGC_GATE__SCM_MASK |
- UVD_SUVD_CGC_GATE__SDB_MASK;
-
- data |= cgc_flags;
- data1 |= suvd_flags;
-
- WREG32_SOC15(UVD, ring->me, mmUVD_CGC_GATE, data);
- WREG32_SOC15(UVD, ring->me, mmUVD_SUVD_CGC_GATE, data1);
-}
-
-static void uvd_v7_0_set_bypass_mode(struct amdgpu_device *adev, bool enable)
-{
- u32 tmp = RREG32_SMC(ixGCK_DFS_BYPASS_CNTL);
-
- if (enable)
- tmp |= (GCK_DFS_BYPASS_CNTL__BYPASSDCLK_MASK |
- GCK_DFS_BYPASS_CNTL__BYPASSVCLK_MASK);
- else
- tmp &= ~(GCK_DFS_BYPASS_CNTL__BYPASSDCLK_MASK |
- GCK_DFS_BYPASS_CNTL__BYPASSVCLK_MASK);
-
- WREG32_SMC(ixGCK_DFS_BYPASS_CNTL, tmp);
-}
-
-
-static int uvd_v7_0_set_clockgating_state(void *handle,
- enum amd_clockgating_state state)
-{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
- bool enable = (state == AMD_CG_STATE_GATE);
-
- uvd_v7_0_set_bypass_mode(adev, enable);
-
- if (!(adev->cg_flags & AMD_CG_SUPPORT_UVD_MGCG))
- return 0;
-
- if (enable) {
- /* disable HW gating and enable Sw gating */
- uvd_v7_0_set_sw_clock_gating(adev);
- } else {
- /* wait for STATUS to clear */
- if (uvd_v7_0_wait_for_idle(handle))
- return -EBUSY;
-
- /* enable HW gates because UVD is idle */
- /* uvd_v7_0_set_hw_clock_gating(adev); */
- }
-
- return 0;
-}
-
-static int uvd_v7_0_set_powergating_state(void *handle,
- enum amd_powergating_state state)
-{
- /* This doesn't actually powergate the UVD block.
- * That's done in the dpm code via the SMC. This
- * just re-inits the block as necessary. The actual
- * gating still happens in the dpm code. We should
- * revisit this when there is a cleaner line between
- * the smc and the hw blocks
- */
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
-
- if (!(adev->pg_flags & AMD_PG_SUPPORT_UVD))
- return 0;
-
- WREG32_SOC15(UVD, ring->me, mmUVD_POWER_STATUS, UVD_POWER_STATUS__UVD_PG_EN_MASK);
-
- if (state == AMD_PG_STATE_GATE) {
- uvd_v7_0_stop(adev);
- return 0;
- } else {
- return uvd_v7_0_start(adev);
- }
-}
-#endif
-
-static int uvd_v7_0_set_clockgating_state(void *handle,
+static int uvd_v7_0_set_clockgating_state(struct amdgpu_ip_block *ip_block,
enum amd_clockgating_state state)
{
/* needed for driver unload*/
@@ -1760,19 +1523,13 @@ static int uvd_v7_0_set_clockgating_state(void *handle,
const struct amd_ip_funcs uvd_v7_0_ip_funcs = {
.name = "uvd_v7_0",
.early_init = uvd_v7_0_early_init,
- .late_init = NULL,
.sw_init = uvd_v7_0_sw_init,
.sw_fini = uvd_v7_0_sw_fini,
.hw_init = uvd_v7_0_hw_init,
.hw_fini = uvd_v7_0_hw_fini,
+ .prepare_suspend = uvd_v7_0_prepare_suspend,
.suspend = uvd_v7_0_suspend,
.resume = uvd_v7_0_resume,
- .is_idle = NULL /* uvd_v7_0_is_idle */,
- .wait_for_idle = NULL /* uvd_v7_0_wait_for_idle */,
- .check_soft_reset = NULL /* uvd_v7_0_check_soft_reset */,
- .pre_soft_reset = NULL /* uvd_v7_0_pre_soft_reset */,
- .soft_reset = NULL /* uvd_v7_0_soft_reset */,
- .post_soft_reset = NULL /* uvd_v7_0_post_soft_reset */,
.set_clockgating_state = uvd_v7_0_set_clockgating_state,
.set_powergating_state = NULL /* uvd_v7_0_set_powergating_state */,
};
@@ -1782,7 +1539,6 @@ static const struct amdgpu_ring_funcs uvd_v7_0_ring_vm_funcs = {
.align_mask = 0xf,
.support_64bit_ptrs = false,
.no_user_fence = true,
- .vmhub = AMDGPU_MMHUB_0,
.get_rptr = uvd_v7_0_ring_get_rptr,
.get_wptr = uvd_v7_0_ring_get_wptr,
.set_wptr = uvd_v7_0_ring_set_wptr,
@@ -1815,7 +1571,6 @@ static const struct amdgpu_ring_funcs uvd_v7_0_enc_ring_vm_funcs = {
.nop = HEVC_ENC_CMD_NO_OP,
.support_64bit_ptrs = false,
.no_user_fence = true,
- .vmhub = AMDGPU_MMHUB_0,
.get_rptr = uvd_v7_0_enc_ring_get_rptr,
.get_wptr = uvd_v7_0_enc_ring_get_wptr,
.set_wptr = uvd_v7_0_enc_ring_set_wptr,
@@ -1888,8 +1643,7 @@ static void uvd_v7_0_set_irq_funcs(struct amdgpu_device *adev)
}
}
-const struct amdgpu_ip_block_version uvd_v7_0_ip_block =
-{
+const struct amdgpu_ip_block_version uvd_v7_0_ip_block = {
.type = AMD_IP_BLOCK_TYPE_UVD,
.major = 7,
.minor = 0,
diff --git a/drivers/gpu/drm/amd/amdgpu/vangogh_reg_init.c b/drivers/gpu/drm/amd/amdgpu/vangogh_reg_init.c
deleted file mode 100644
index d64d681a05dc..000000000000
--- a/drivers/gpu/drm/amd/amdgpu/vangogh_reg_init.c
+++ /dev/null
@@ -1,50 +0,0 @@
-/*
- * Copyright 2019 Advanced Micro Devices, Inc.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in
- * all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
- * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
- * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
- * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
- * OTHER DEALINGS IN THE SOFTWARE.
- *
- */
-#include "amdgpu.h"
-#include "nv.h"
-
-#include "soc15_common.h"
-#include "soc15_hw_ip.h"
-#include "vangogh_ip_offset.h"
-
-void vangogh_reg_base_init(struct amdgpu_device *adev)
-{
- /* HW has more IP blocks, only initialized the blocke needed by driver */
- uint32_t i;
- for (i = 0 ; i < MAX_INSTANCE ; ++i) {
- adev->reg_offset[GC_HWIP][i] = (uint32_t *)(&(GC_BASE.instance[i]));
- adev->reg_offset[HDP_HWIP][i] = (uint32_t *)(&(HDP_BASE.instance[i]));
- adev->reg_offset[MMHUB_HWIP][i] = (uint32_t *)(&(MMHUB_BASE.instance[i]));
- adev->reg_offset[ATHUB_HWIP][i] = (uint32_t *)(&(ATHUB_BASE.instance[i]));
- adev->reg_offset[NBIO_HWIP][i] = (uint32_t *)(&(NBIO_BASE.instance[i]));
- adev->reg_offset[MP0_HWIP][i] = (uint32_t *)(&(MP0_BASE.instance[i]));
- adev->reg_offset[MP1_HWIP][i] = (uint32_t *)(&(MP1_BASE.instance[i]));
- adev->reg_offset[VCN_HWIP][i] = (uint32_t *)(&(VCN_BASE.instance[i]));
- adev->reg_offset[DF_HWIP][i] = (uint32_t *)(&(DF_BASE.instance[i]));
- adev->reg_offset[DCE_HWIP][i] = (uint32_t *)(&(DCN_BASE.instance[i]));
- adev->reg_offset[OSSSYS_HWIP][i] = (uint32_t *)(&(OSSSYS_BASE.instance[i]));
- adev->reg_offset[SDMA0_HWIP][i] = (uint32_t *)(&(GC_BASE.instance[i]));
- adev->reg_offset[SMUIO_HWIP][i] = (uint32_t *)(&(SMUIO_BASE.instance[i]));
- adev->reg_offset[THM_HWIP][i] = (uint32_t *)(&(THM_BASE.instance[i]));
- }
-}
diff --git a/drivers/gpu/drm/amd/amdgpu/vce_v1_0.c b/drivers/gpu/drm/amd/amdgpu/vce_v1_0.c
new file mode 100644
index 000000000000..9ae424618556
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/vce_v1_0.c
@@ -0,0 +1,839 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright 2013 Advanced Micro Devices, Inc.
+ * Copyright 2025 Valve Corporation
+ * Copyright 2025 Alexandre Demers
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * Authors: Christian König <christian.koenig@amd.com>
+ * Timur Kristóf <timur.kristof@gmail.com>
+ * Alexandre Demers <alexandre.f.demers@gmail.com>
+ */
+
+#include <linux/firmware.h>
+
+#include "amdgpu.h"
+#include "amdgpu_vce.h"
+#include "amdgpu_gart.h"
+#include "sid.h"
+#include "vce_v1_0.h"
+#include "vce/vce_1_0_d.h"
+#include "vce/vce_1_0_sh_mask.h"
+#include "oss/oss_1_0_d.h"
+#include "oss/oss_1_0_sh_mask.h"
+
+#define VCE_V1_0_FW_SIZE (256 * 1024)
+#define VCE_V1_0_STACK_SIZE (64 * 1024)
+#define VCE_V1_0_DATA_SIZE (7808 * (AMDGPU_MAX_VCE_HANDLES + 1))
+#define VCE_STATUS_VCPU_REPORT_FW_LOADED_MASK 0x02
+
+#define VCE_V1_0_GART_PAGE_START \
+ (AMDGPU_GTT_MAX_TRANSFER_SIZE * AMDGPU_GTT_NUM_TRANSFER_WINDOWS)
+#define VCE_V1_0_GART_ADDR_START \
+ (VCE_V1_0_GART_PAGE_START * AMDGPU_GPU_PAGE_SIZE)
+
+static void vce_v1_0_set_ring_funcs(struct amdgpu_device *adev);
+static void vce_v1_0_set_irq_funcs(struct amdgpu_device *adev);
+
+struct vce_v1_0_fw_signature {
+ int32_t offset;
+ uint32_t length;
+ int32_t number;
+ struct {
+ uint32_t chip_id;
+ uint32_t keyselect;
+ uint32_t nonce[4];
+ uint32_t sigval[4];
+ } val[8];
+};
+
+/**
+ * vce_v1_0_ring_get_rptr - get read pointer
+ *
+ * @ring: amdgpu_ring pointer
+ *
+ * Returns the current hardware read pointer
+ */
+static uint64_t vce_v1_0_ring_get_rptr(struct amdgpu_ring *ring)
+{
+ struct amdgpu_device *adev = ring->adev;
+
+ if (ring->me == 0)
+ return RREG32(mmVCE_RB_RPTR);
+ else
+ return RREG32(mmVCE_RB_RPTR2);
+}
+
+/**
+ * vce_v1_0_ring_get_wptr - get write pointer
+ *
+ * @ring: amdgpu_ring pointer
+ *
+ * Returns the current hardware write pointer
+ */
+static uint64_t vce_v1_0_ring_get_wptr(struct amdgpu_ring *ring)
+{
+ struct amdgpu_device *adev = ring->adev;
+
+ if (ring->me == 0)
+ return RREG32(mmVCE_RB_WPTR);
+ else
+ return RREG32(mmVCE_RB_WPTR2);
+}
+
+/**
+ * vce_v1_0_ring_set_wptr - set write pointer
+ *
+ * @ring: amdgpu_ring pointer
+ *
+ * Commits the write pointer to the hardware
+ */
+static void vce_v1_0_ring_set_wptr(struct amdgpu_ring *ring)
+{
+ struct amdgpu_device *adev = ring->adev;
+
+ if (ring->me == 0)
+ WREG32(mmVCE_RB_WPTR, lower_32_bits(ring->wptr));
+ else
+ WREG32(mmVCE_RB_WPTR2, lower_32_bits(ring->wptr));
+}
+
+static int vce_v1_0_lmi_clean(struct amdgpu_device *adev)
+{
+ int i, j;
+
+ for (i = 0; i < 10; ++i) {
+ for (j = 0; j < 100; ++j) {
+ if (RREG32(mmVCE_LMI_STATUS) & 0x337f)
+ return 0;
+
+ mdelay(10);
+ }
+ }
+
+ return -ETIMEDOUT;
+}
+
+static int vce_v1_0_firmware_loaded(struct amdgpu_device *adev)
+{
+ int i, j;
+
+ for (i = 0; i < 10; ++i) {
+ for (j = 0; j < 100; ++j) {
+ if (RREG32(mmVCE_STATUS) & VCE_STATUS_VCPU_REPORT_FW_LOADED_MASK)
+ return 0;
+ mdelay(10);
+ }
+
+ dev_err(adev->dev, "VCE not responding, trying to reset the ECPU\n");
+
+ WREG32_P(mmVCE_SOFT_RESET,
+ VCE_SOFT_RESET__ECPU_SOFT_RESET_MASK,
+ ~VCE_SOFT_RESET__ECPU_SOFT_RESET_MASK);
+ mdelay(10);
+ WREG32_P(mmVCE_SOFT_RESET, 0,
+ ~VCE_SOFT_RESET__ECPU_SOFT_RESET_MASK);
+ mdelay(10);
+ }
+
+ return -ETIMEDOUT;
+}
+
+static void vce_v1_0_init_cg(struct amdgpu_device *adev)
+{
+ u32 tmp;
+
+ tmp = RREG32(mmVCE_CLOCK_GATING_A);
+ tmp |= VCE_CLOCK_GATING_A__CGC_DYN_CLOCK_MODE_MASK;
+ WREG32(mmVCE_CLOCK_GATING_A, tmp);
+
+ tmp = RREG32(mmVCE_CLOCK_GATING_B);
+ tmp |= 0x1e;
+ tmp &= ~0xe100e1;
+ WREG32(mmVCE_CLOCK_GATING_B, tmp);
+
+ tmp = RREG32(mmVCE_UENC_CLOCK_GATING);
+ tmp &= ~0xff9ff000;
+ WREG32(mmVCE_UENC_CLOCK_GATING, tmp);
+
+ tmp = RREG32(mmVCE_UENC_REG_CLOCK_GATING);
+ tmp &= ~0x3ff;
+ WREG32(mmVCE_UENC_REG_CLOCK_GATING, tmp);
+}
+
+/**
+ * vce_v1_0_load_fw_signature - load firmware signature into VCPU BO
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * The VCE1 firmware validation mechanism needs a firmware signature.
+ * This function finds the signature appropriate for the current
+ * ASIC and writes that into the VCPU BO.
+ */
+static int vce_v1_0_load_fw_signature(struct amdgpu_device *adev)
+{
+ const struct common_firmware_header *hdr;
+ struct vce_v1_0_fw_signature *sign;
+ unsigned int ucode_offset;
+ uint32_t chip_id;
+ u32 *cpu_addr;
+ int i;
+
+ hdr = (const struct common_firmware_header *)adev->vce.fw->data;
+ ucode_offset = le32_to_cpu(hdr->ucode_array_offset_bytes);
+ cpu_addr = adev->vce.cpu_addr;
+
+ sign = (void *)adev->vce.fw->data + ucode_offset;
+
+ switch (adev->asic_type) {
+ case CHIP_TAHITI:
+ chip_id = 0x01000014;
+ break;
+ case CHIP_VERDE:
+ chip_id = 0x01000015;
+ break;
+ case CHIP_PITCAIRN:
+ chip_id = 0x01000016;
+ break;
+ default:
+ dev_err(adev->dev, "asic_type %#010x was not found!", adev->asic_type);
+ return -EINVAL;
+ }
+
+ for (i = 0; i < le32_to_cpu(sign->number); ++i) {
+ if (le32_to_cpu(sign->val[i].chip_id) == chip_id)
+ break;
+ }
+
+ if (i == le32_to_cpu(sign->number)) {
+ dev_err(adev->dev, "chip_id 0x%x for %s was not found in VCE firmware",
+ chip_id, amdgpu_asic_name[adev->asic_type]);
+ return -EINVAL;
+ }
+
+ cpu_addr += (256 - 64) / 4;
+ memcpy_toio(&cpu_addr[0], &sign->val[i].nonce[0], 16);
+ cpu_addr[4] = cpu_to_le32(le32_to_cpu(sign->length) + 64);
+
+ memset_io(&cpu_addr[5], 0, 44);
+ memcpy_toio(&cpu_addr[16], &sign[1], hdr->ucode_size_bytes - sizeof(*sign));
+
+ cpu_addr += (le32_to_cpu(sign->length) + 64) / 4;
+ memcpy_toio(&cpu_addr[0], &sign->val[i].sigval[0], 16);
+
+ adev->vce.keyselect = le32_to_cpu(sign->val[i].keyselect);
+
+ return 0;
+}
+
+static int vce_v1_0_wait_for_fw_validation(struct amdgpu_device *adev)
+{
+ int i;
+
+ dev_dbg(adev->dev, "VCE keyselect: %d", adev->vce.keyselect);
+ WREG32(mmVCE_LMI_FW_START_KEYSEL, adev->vce.keyselect);
+
+ for (i = 0; i < 10; ++i) {
+ mdelay(10);
+ if (RREG32(mmVCE_FW_REG_STATUS) & VCE_FW_REG_STATUS__DONE_MASK)
+ break;
+ }
+
+ if (!(RREG32(mmVCE_FW_REG_STATUS) & VCE_FW_REG_STATUS__DONE_MASK)) {
+ dev_err(adev->dev, "VCE FW validation timeout\n");
+ return -ETIMEDOUT;
+ }
+
+ if (!(RREG32(mmVCE_FW_REG_STATUS) & VCE_FW_REG_STATUS__PASS_MASK)) {
+ dev_err(adev->dev, "VCE FW validation failed\n");
+ return -EINVAL;
+ }
+
+ for (i = 0; i < 10; ++i) {
+ mdelay(10);
+ if (!(RREG32(mmVCE_FW_REG_STATUS) & VCE_FW_REG_STATUS__BUSY_MASK))
+ break;
+ }
+
+ if (RREG32(mmVCE_FW_REG_STATUS) & VCE_FW_REG_STATUS__BUSY_MASK) {
+ dev_err(adev->dev, "VCE FW busy timeout\n");
+ return -ETIMEDOUT;
+ }
+
+ return 0;
+}
+
+static int vce_v1_0_mc_resume(struct amdgpu_device *adev)
+{
+ uint32_t offset;
+ uint32_t size;
+
+ /*
+ * When the keyselect is already set, don't perturb VCE FW.
+ * Validation seems to always fail the second time.
+ */
+ if (RREG32(mmVCE_LMI_FW_START_KEYSEL)) {
+ dev_dbg(adev->dev, "keyselect already set: 0x%x (on CPU: 0x%x)\n",
+ RREG32(mmVCE_LMI_FW_START_KEYSEL), adev->vce.keyselect);
+
+ WREG32_P(mmVCE_LMI_CTRL2, 0x0, ~0x100);
+ return 0;
+ }
+
+ WREG32_P(mmVCE_CLOCK_GATING_A, 0, ~(1 << 16));
+ WREG32_P(mmVCE_UENC_CLOCK_GATING, 0x1FF000, ~0xFF9FF000);
+ WREG32_P(mmVCE_UENC_REG_CLOCK_GATING, 0x3F, ~0x3F);
+ WREG32(mmVCE_CLOCK_GATING_B, 0);
+
+ WREG32_P(mmVCE_LMI_FW_PERIODIC_CTRL, 0x4, ~0x4);
+
+ WREG32(mmVCE_LMI_CTRL, 0x00398000);
+
+ WREG32_P(mmVCE_LMI_CACHE_CTRL, 0x0, ~0x1);
+ WREG32(mmVCE_LMI_SWAP_CNTL, 0);
+ WREG32(mmVCE_LMI_SWAP_CNTL1, 0);
+ WREG32(mmVCE_LMI_VM_CTRL, 0);
+
+ WREG32(mmVCE_VCPU_SCRATCH7, AMDGPU_MAX_VCE_HANDLES);
+
+ offset = adev->vce.gpu_addr + AMDGPU_VCE_FIRMWARE_OFFSET;
+ size = VCE_V1_0_FW_SIZE;
+ WREG32(mmVCE_VCPU_CACHE_OFFSET0, offset & 0x7fffffff);
+ WREG32(mmVCE_VCPU_CACHE_SIZE0, size);
+
+ offset += size;
+ size = VCE_V1_0_STACK_SIZE;
+ WREG32(mmVCE_VCPU_CACHE_OFFSET1, offset & 0x7fffffff);
+ WREG32(mmVCE_VCPU_CACHE_SIZE1, size);
+
+ offset += size;
+ size = VCE_V1_0_DATA_SIZE;
+ WREG32(mmVCE_VCPU_CACHE_OFFSET2, offset & 0x7fffffff);
+ WREG32(mmVCE_VCPU_CACHE_SIZE2, size);
+
+ WREG32_P(mmVCE_LMI_CTRL2, 0x0, ~0x100);
+
+ return vce_v1_0_wait_for_fw_validation(adev);
+}
+
+/**
+ * vce_v1_0_is_idle() - Check idle status of VCE1 IP block
+ *
+ * @ip_block: amdgpu_ip_block pointer
+ *
+ * Check whether VCE is busy according to VCE_STATUS.
+ * Also check whether the SRBM thinks VCE is busy, although
+ * SRBM_STATUS.VCE_BUSY seems to be bogus because it
+ * appears to mirror the VCE_STATUS.VCPU_REPORT_FW_LOADED bit.
+ */
+static bool vce_v1_0_is_idle(struct amdgpu_ip_block *ip_block)
+{
+ struct amdgpu_device *adev = ip_block->adev;
+ bool busy =
+ (RREG32(mmVCE_STATUS) & (VCE_STATUS__JOB_BUSY_MASK | VCE_STATUS__UENC_BUSY_MASK)) ||
+ (RREG32(mmSRBM_STATUS2) & SRBM_STATUS2__VCE_BUSY_MASK);
+
+ return !busy;
+}
+
+static int vce_v1_0_wait_for_idle(struct amdgpu_ip_block *ip_block)
+{
+ struct amdgpu_device *adev = ip_block->adev;
+ unsigned int i;
+
+ for (i = 0; i < adev->usec_timeout; i++) {
+ udelay(1);
+ if (vce_v1_0_is_idle(ip_block))
+ return 0;
+ }
+ return -ETIMEDOUT;
+}
+
+/**
+ * vce_v1_0_start - start VCE block
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * Setup and start the VCE block
+ */
+static int vce_v1_0_start(struct amdgpu_device *adev)
+{
+ struct amdgpu_ring *ring;
+ int r;
+
+ WREG32_P(mmVCE_STATUS, 1, ~1);
+
+ r = vce_v1_0_mc_resume(adev);
+ if (r)
+ return r;
+
+ ring = &adev->vce.ring[0];
+ WREG32(mmVCE_RB_RPTR, lower_32_bits(ring->wptr));
+ WREG32(mmVCE_RB_WPTR, lower_32_bits(ring->wptr));
+ WREG32(mmVCE_RB_BASE_LO, lower_32_bits(ring->gpu_addr));
+ WREG32(mmVCE_RB_BASE_HI, upper_32_bits(ring->gpu_addr));
+ WREG32(mmVCE_RB_SIZE, ring->ring_size / 4);
+
+ ring = &adev->vce.ring[1];
+ WREG32(mmVCE_RB_RPTR2, lower_32_bits(ring->wptr));
+ WREG32(mmVCE_RB_WPTR2, lower_32_bits(ring->wptr));
+ WREG32(mmVCE_RB_BASE_LO2, lower_32_bits(ring->gpu_addr));
+ WREG32(mmVCE_RB_BASE_HI2, upper_32_bits(ring->gpu_addr));
+ WREG32(mmVCE_RB_SIZE2, ring->ring_size / 4);
+
+ WREG32_P(mmVCE_VCPU_CNTL, VCE_VCPU_CNTL__CLK_EN_MASK,
+ ~VCE_VCPU_CNTL__CLK_EN_MASK);
+
+ WREG32_P(mmVCE_SOFT_RESET,
+ VCE_SOFT_RESET__ECPU_SOFT_RESET_MASK |
+ VCE_SOFT_RESET__FME_SOFT_RESET_MASK,
+ ~(VCE_SOFT_RESET__ECPU_SOFT_RESET_MASK |
+ VCE_SOFT_RESET__FME_SOFT_RESET_MASK));
+
+ mdelay(100);
+
+ WREG32_P(mmVCE_SOFT_RESET, 0,
+ ~(VCE_SOFT_RESET__ECPU_SOFT_RESET_MASK |
+ VCE_SOFT_RESET__FME_SOFT_RESET_MASK));
+
+ r = vce_v1_0_firmware_loaded(adev);
+
+ /* Clear VCE_STATUS, otherwise SRBM thinks VCE1 is busy. */
+ WREG32(mmVCE_STATUS, 0);
+
+ if (r) {
+ dev_err(adev->dev, "VCE not responding, giving up\n");
+ return r;
+ }
+
+ return 0;
+}
+
+static int vce_v1_0_stop(struct amdgpu_device *adev)
+{
+ struct amdgpu_ip_block *ip_block;
+ int status;
+ int i;
+
+ ip_block = amdgpu_device_ip_get_ip_block(adev, AMD_IP_BLOCK_TYPE_VCE);
+ if (!ip_block)
+ return -EINVAL;
+
+ if (vce_v1_0_lmi_clean(adev))
+ dev_warn(adev->dev, "VCE not idle\n");
+
+ if (vce_v1_0_wait_for_idle(ip_block))
+ dev_warn(adev->dev, "VCE busy: VCE_STATUS=0x%x, SRBM_STATUS2=0x%x\n",
+ RREG32(mmVCE_STATUS), RREG32(mmSRBM_STATUS2));
+
+ /* Stall UMC and register bus before resetting VCPU */
+ WREG32_P(mmVCE_LMI_CTRL2, 1 << 8, ~(1 << 8));
+
+ for (i = 0; i < 100; ++i) {
+ status = RREG32(mmVCE_LMI_STATUS);
+ if (status & 0x240)
+ break;
+ mdelay(1);
+ }
+
+ WREG32_P(mmVCE_VCPU_CNTL, 0, ~VCE_VCPU_CNTL__CLK_EN_MASK);
+
+ WREG32_P(mmVCE_SOFT_RESET,
+ VCE_SOFT_RESET__ECPU_SOFT_RESET_MASK |
+ VCE_SOFT_RESET__FME_SOFT_RESET_MASK,
+ ~(VCE_SOFT_RESET__ECPU_SOFT_RESET_MASK |
+ VCE_SOFT_RESET__FME_SOFT_RESET_MASK));
+
+ WREG32(mmVCE_STATUS, 0);
+
+ return 0;
+}
+
+static void vce_v1_0_enable_mgcg(struct amdgpu_device *adev, bool enable)
+{
+ u32 tmp;
+
+ if (enable && (adev->cg_flags & AMD_CG_SUPPORT_VCE_MGCG)) {
+ tmp = RREG32(mmVCE_CLOCK_GATING_A);
+ tmp |= VCE_CLOCK_GATING_A__CGC_DYN_CLOCK_MODE_MASK;
+ WREG32(mmVCE_CLOCK_GATING_A, tmp);
+
+ tmp = RREG32(mmVCE_UENC_CLOCK_GATING);
+ tmp &= ~0x1ff000;
+ tmp |= 0xff800000;
+ WREG32(mmVCE_UENC_CLOCK_GATING, tmp);
+
+ tmp = RREG32(mmVCE_UENC_REG_CLOCK_GATING);
+ tmp &= ~0x3ff;
+ WREG32(mmVCE_UENC_REG_CLOCK_GATING, tmp);
+ } else {
+ tmp = RREG32(mmVCE_CLOCK_GATING_A);
+ tmp &= ~VCE_CLOCK_GATING_A__CGC_DYN_CLOCK_MODE_MASK;
+ WREG32(mmVCE_CLOCK_GATING_A, tmp);
+
+ tmp = RREG32(mmVCE_UENC_CLOCK_GATING);
+ tmp |= 0x1ff000;
+ tmp &= ~0xff800000;
+ WREG32(mmVCE_UENC_CLOCK_GATING, tmp);
+
+ tmp = RREG32(mmVCE_UENC_REG_CLOCK_GATING);
+ tmp |= 0x3ff;
+ WREG32(mmVCE_UENC_REG_CLOCK_GATING, tmp);
+ }
+}
+
+static int vce_v1_0_early_init(struct amdgpu_ip_block *ip_block)
+{
+ struct amdgpu_device *adev = ip_block->adev;
+ int r;
+
+ r = amdgpu_vce_early_init(adev);
+ if (r)
+ return r;
+
+ adev->vce.num_rings = 2;
+
+ vce_v1_0_set_ring_funcs(adev);
+ vce_v1_0_set_irq_funcs(adev);
+
+ return 0;
+}
+
+/**
+ * vce_v1_0_ensure_vcpu_bo_32bit_addr() - ensure the VCPU BO has a 32-bit address
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * Due to various hardware limitations, the VCE1 requires
+ * the VCPU BO to be in the low 32 bit address range.
+ * Ensure that the VCPU BO has a 32-bit GPU address,
+ * or return an error code when that isn't possible.
+ *
+ * To accomodate that, we put GART to the LOW address range
+ * and reserve some GART pages where we map the VCPU BO,
+ * so that it gets a 32-bit address.
+ */
+static int vce_v1_0_ensure_vcpu_bo_32bit_addr(struct amdgpu_device *adev)
+{
+ u64 gpu_addr = amdgpu_bo_gpu_offset(adev->vce.vcpu_bo);
+ u64 bo_size = amdgpu_bo_size(adev->vce.vcpu_bo);
+ u64 max_vcpu_bo_addr = 0xffffffff - bo_size;
+ u64 num_pages = ALIGN(bo_size, AMDGPU_GPU_PAGE_SIZE) / AMDGPU_GPU_PAGE_SIZE;
+ u64 pa = amdgpu_gmc_vram_pa(adev, adev->vce.vcpu_bo);
+ u64 flags = AMDGPU_PTE_READABLE | AMDGPU_PTE_WRITEABLE | AMDGPU_PTE_VALID;
+
+ /*
+ * Check if the VCPU BO already has a 32-bit address.
+ * Eg. if MC is configured to put VRAM in the low address range.
+ */
+ if (gpu_addr <= max_vcpu_bo_addr)
+ return 0;
+
+ /* Check if we can map the VCPU BO in GART to a 32-bit address. */
+ if (adev->gmc.gart_start + VCE_V1_0_GART_ADDR_START > max_vcpu_bo_addr)
+ return -EINVAL;
+
+ amdgpu_gart_map_vram_range(adev, pa, VCE_V1_0_GART_PAGE_START,
+ num_pages, flags, adev->gart.ptr);
+ adev->vce.gpu_addr = adev->gmc.gart_start + VCE_V1_0_GART_ADDR_START;
+ if (adev->vce.gpu_addr > max_vcpu_bo_addr)
+ return -EINVAL;
+
+ return 0;
+}
+
+static int vce_v1_0_sw_init(struct amdgpu_ip_block *ip_block)
+{
+ struct amdgpu_device *adev = ip_block->adev;
+ struct amdgpu_ring *ring;
+ int r, i;
+
+ r = amdgpu_irq_add_id(adev, AMDGPU_IRQ_CLIENTID_LEGACY, 167, &adev->vce.irq);
+ if (r)
+ return r;
+
+ r = amdgpu_vce_sw_init(adev, VCE_V1_0_FW_SIZE +
+ VCE_V1_0_STACK_SIZE + VCE_V1_0_DATA_SIZE);
+ if (r)
+ return r;
+
+ r = amdgpu_vce_resume(adev);
+ if (r)
+ return r;
+ r = vce_v1_0_load_fw_signature(adev);
+ if (r)
+ return r;
+ r = vce_v1_0_ensure_vcpu_bo_32bit_addr(adev);
+ if (r)
+ return r;
+
+ for (i = 0; i < adev->vce.num_rings; i++) {
+ enum amdgpu_ring_priority_level hw_prio = amdgpu_vce_get_ring_prio(i);
+
+ ring = &adev->vce.ring[i];
+ sprintf(ring->name, "vce%d", i);
+ r = amdgpu_ring_init(adev, ring, 512, &adev->vce.irq, 0,
+ hw_prio, NULL);
+ if (r)
+ return r;
+ }
+
+ return r;
+}
+
+static int vce_v1_0_sw_fini(struct amdgpu_ip_block *ip_block)
+{
+ struct amdgpu_device *adev = ip_block->adev;
+ int r;
+
+ r = amdgpu_vce_suspend(adev);
+ if (r)
+ return r;
+
+ return amdgpu_vce_sw_fini(adev);
+}
+
+/**
+ * vce_v1_0_hw_init - start and test VCE block
+ *
+ * @ip_block: Pointer to the amdgpu_ip_block for this hw instance.
+ *
+ * Initialize the hardware, boot up the VCPU and do some testing
+ */
+static int vce_v1_0_hw_init(struct amdgpu_ip_block *ip_block)
+{
+ struct amdgpu_device *adev = ip_block->adev;
+ int i, r;
+
+ if (adev->pm.dpm_enabled)
+ amdgpu_dpm_enable_vce(adev, true);
+ else
+ amdgpu_asic_set_vce_clocks(adev, 10000, 10000);
+
+ for (i = 0; i < adev->vce.num_rings; i++) {
+ r = amdgpu_ring_test_helper(&adev->vce.ring[i]);
+ if (r)
+ return r;
+ }
+
+ dev_info(adev->dev, "VCE initialized successfully.\n");
+
+ return 0;
+}
+
+static int vce_v1_0_hw_fini(struct amdgpu_ip_block *ip_block)
+{
+ int r;
+
+ r = vce_v1_0_stop(ip_block->adev);
+ if (r)
+ return r;
+
+ cancel_delayed_work_sync(&ip_block->adev->vce.idle_work);
+ return 0;
+}
+
+static int vce_v1_0_suspend(struct amdgpu_ip_block *ip_block)
+{
+ struct amdgpu_device *adev = ip_block->adev;
+ int r;
+
+ /*
+ * Proper cleanups before halting the HW engine:
+ * - cancel the delayed idle work
+ * - enable powergating
+ * - enable clockgating
+ * - disable dpm
+ *
+ * TODO: to align with the VCN implementation, move the
+ * jobs for clockgating/powergating/dpm setting to
+ * ->set_powergating_state().
+ */
+ cancel_delayed_work_sync(&adev->vce.idle_work);
+
+ if (adev->pm.dpm_enabled) {
+ amdgpu_dpm_enable_vce(adev, false);
+ } else {
+ amdgpu_asic_set_vce_clocks(adev, 0, 0);
+ amdgpu_device_ip_set_powergating_state(adev, AMD_IP_BLOCK_TYPE_VCE,
+ AMD_PG_STATE_GATE);
+ amdgpu_device_ip_set_clockgating_state(adev, AMD_IP_BLOCK_TYPE_VCE,
+ AMD_CG_STATE_GATE);
+ }
+
+ r = vce_v1_0_hw_fini(ip_block);
+ if (r) {
+ dev_err(adev->dev, "vce_v1_0_hw_fini() failed with error %i", r);
+ return r;
+ }
+
+ return amdgpu_vce_suspend(adev);
+}
+
+static int vce_v1_0_resume(struct amdgpu_ip_block *ip_block)
+{
+ struct amdgpu_device *adev = ip_block->adev;
+ int r;
+
+ r = amdgpu_vce_resume(adev);
+ if (r)
+ return r;
+ r = vce_v1_0_load_fw_signature(adev);
+ if (r)
+ return r;
+ r = vce_v1_0_ensure_vcpu_bo_32bit_addr(adev);
+ if (r)
+ return r;
+
+ return vce_v1_0_hw_init(ip_block);
+}
+
+static int vce_v1_0_set_interrupt_state(struct amdgpu_device *adev,
+ struct amdgpu_irq_src *source,
+ unsigned int type,
+ enum amdgpu_interrupt_state state)
+{
+ uint32_t val = 0;
+
+ if (state == AMDGPU_IRQ_STATE_ENABLE)
+ val |= VCE_SYS_INT_EN__VCE_SYS_INT_TRAP_INTERRUPT_EN_MASK;
+
+ WREG32_P(mmVCE_SYS_INT_EN, val,
+ ~VCE_SYS_INT_EN__VCE_SYS_INT_TRAP_INTERRUPT_EN_MASK);
+ return 0;
+}
+
+static int vce_v1_0_process_interrupt(struct amdgpu_device *adev,
+ struct amdgpu_irq_src *source,
+ struct amdgpu_iv_entry *entry)
+{
+ dev_dbg(adev->dev, "IH: VCE\n");
+ switch (entry->src_data[0]) {
+ case 0:
+ case 1:
+ amdgpu_fence_process(&adev->vce.ring[entry->src_data[0]]);
+ break;
+ default:
+ dev_err(adev->dev, "Unhandled interrupt: %d %d\n",
+ entry->src_id, entry->src_data[0]);
+ break;
+ }
+
+ return 0;
+}
+
+static int vce_v1_0_set_clockgating_state(struct amdgpu_ip_block *ip_block,
+ enum amd_clockgating_state state)
+{
+ struct amdgpu_device *adev = ip_block->adev;
+
+ vce_v1_0_init_cg(adev);
+ vce_v1_0_enable_mgcg(adev, state == AMD_CG_STATE_GATE);
+
+ return 0;
+}
+
+static int vce_v1_0_set_powergating_state(struct amdgpu_ip_block *ip_block,
+ enum amd_powergating_state state)
+{
+ struct amdgpu_device *adev = ip_block->adev;
+
+ /*
+ * This doesn't actually powergate the VCE block.
+ * That's done in the dpm code via the SMC. This
+ * just re-inits the block as necessary. The actual
+ * gating still happens in the dpm code. We should
+ * revisit this when there is a cleaner line between
+ * the smc and the hw blocks
+ */
+ if (state == AMD_PG_STATE_GATE)
+ return vce_v1_0_stop(adev);
+ else
+ return vce_v1_0_start(adev);
+}
+
+static const struct amd_ip_funcs vce_v1_0_ip_funcs = {
+ .name = "vce_v1_0",
+ .early_init = vce_v1_0_early_init,
+ .sw_init = vce_v1_0_sw_init,
+ .sw_fini = vce_v1_0_sw_fini,
+ .hw_init = vce_v1_0_hw_init,
+ .hw_fini = vce_v1_0_hw_fini,
+ .suspend = vce_v1_0_suspend,
+ .resume = vce_v1_0_resume,
+ .is_idle = vce_v1_0_is_idle,
+ .wait_for_idle = vce_v1_0_wait_for_idle,
+ .set_clockgating_state = vce_v1_0_set_clockgating_state,
+ .set_powergating_state = vce_v1_0_set_powergating_state,
+};
+
+static const struct amdgpu_ring_funcs vce_v1_0_ring_funcs = {
+ .type = AMDGPU_RING_TYPE_VCE,
+ .align_mask = 0xf,
+ .nop = VCE_CMD_NO_OP,
+ .support_64bit_ptrs = false,
+ .no_user_fence = true,
+ .get_rptr = vce_v1_0_ring_get_rptr,
+ .get_wptr = vce_v1_0_ring_get_wptr,
+ .set_wptr = vce_v1_0_ring_set_wptr,
+ .parse_cs = amdgpu_vce_ring_parse_cs,
+ .emit_frame_size = 6, /* amdgpu_vce_ring_emit_fence x1 no user fence */
+ .emit_ib_size = 4, /* amdgpu_vce_ring_emit_ib */
+ .emit_ib = amdgpu_vce_ring_emit_ib,
+ .emit_fence = amdgpu_vce_ring_emit_fence,
+ .test_ring = amdgpu_vce_ring_test_ring,
+ .test_ib = amdgpu_vce_ring_test_ib,
+ .insert_nop = amdgpu_ring_insert_nop,
+ .pad_ib = amdgpu_ring_generic_pad_ib,
+ .begin_use = amdgpu_vce_ring_begin_use,
+ .end_use = amdgpu_vce_ring_end_use,
+};
+
+static void vce_v1_0_set_ring_funcs(struct amdgpu_device *adev)
+{
+ int i;
+
+ for (i = 0; i < adev->vce.num_rings; i++) {
+ adev->vce.ring[i].funcs = &vce_v1_0_ring_funcs;
+ adev->vce.ring[i].me = i;
+ }
+};
+
+static const struct amdgpu_irq_src_funcs vce_v1_0_irq_funcs = {
+ .set = vce_v1_0_set_interrupt_state,
+ .process = vce_v1_0_process_interrupt,
+};
+
+static void vce_v1_0_set_irq_funcs(struct amdgpu_device *adev)
+{
+ adev->vce.irq.num_types = 1;
+ adev->vce.irq.funcs = &vce_v1_0_irq_funcs;
+};
+
+const struct amdgpu_ip_block_version vce_v1_0_ip_block = {
+ .type = AMD_IP_BLOCK_TYPE_VCE,
+ .major = 1,
+ .minor = 0,
+ .rev = 0,
+ .funcs = &vce_v1_0_ip_funcs,
+};
diff --git a/drivers/gpu/drm/amd/amdgpu/vce_v1_0.h b/drivers/gpu/drm/amd/amdgpu/vce_v1_0.h
new file mode 100644
index 000000000000..206e7bec897f
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/vce_v1_0.h
@@ -0,0 +1,32 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright 2025 Advanced Micro Devices, Inc.
+ * Copyright 2025 Valve Corporation
+ * Copyright 2025 Alexandre Demers
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#ifndef __VCE_V1_0_H__
+#define __VCE_V1_0_H__
+
+extern const struct amdgpu_ip_block_version vce_v1_0_ip_block;
+
+#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/vce_v2_0.c b/drivers/gpu/drm/amd/amdgpu/vce_v2_0.c
index 0e2945baf0f1..8ea8a6193492 100644
--- a/drivers/gpu/drm/amd/amdgpu/vce_v2_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/vce_v2_0.c
@@ -201,20 +201,20 @@ static void vce_v2_0_mc_resume(struct amdgpu_device *adev)
WREG32_FIELD(VCE_SYS_INT_EN, VCE_SYS_INT_TRAP_INTERRUPT_EN, 1);
}
-static bool vce_v2_0_is_idle(void *handle)
+static bool vce_v2_0_is_idle(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
return !(RREG32(mmSRBM_STATUS2) & SRBM_STATUS2__VCE_BUSY_MASK);
}
-static int vce_v2_0_wait_for_idle(void *handle)
+static int vce_v2_0_wait_for_idle(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
unsigned i;
for (i = 0; i < adev->usec_timeout; i++) {
- if (vce_v2_0_is_idle(handle))
+ if (vce_v2_0_is_idle(ip_block))
return 0;
}
return -ETIMEDOUT;
@@ -274,15 +274,21 @@ static int vce_v2_0_start(struct amdgpu_device *adev)
static int vce_v2_0_stop(struct amdgpu_device *adev)
{
+ struct amdgpu_ip_block *ip_block;
int i;
int status;
+
if (vce_v2_0_lmi_clean(adev)) {
- DRM_INFO("vce is not idle \n");
+ DRM_INFO("VCE is not idle \n");
return 0;
}
- if (vce_v2_0_wait_for_idle(adev)) {
+ ip_block = amdgpu_device_ip_get_ip_block(adev, AMD_IP_BLOCK_TYPE_VCE);
+ if (!ip_block)
+ return -EINVAL;
+
+ if (vce_v2_0_wait_for_idle(ip_block)) {
DRM_INFO("VCE is busy, Can't set clock gating");
return 0;
}
@@ -398,9 +404,14 @@ static void vce_v2_0_enable_mgcg(struct amdgpu_device *adev, bool enable,
}
}
-static int vce_v2_0_early_init(void *handle)
+static int vce_v2_0_early_init(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
+ int r;
+
+ r = amdgpu_vce_early_init(adev);
+ if (r)
+ return r;
adev->vce.num_rings = 2;
@@ -410,11 +421,11 @@ static int vce_v2_0_early_init(void *handle)
return 0;
}
-static int vce_v2_0_sw_init(void *handle)
+static int vce_v2_0_sw_init(struct amdgpu_ip_block *ip_block)
{
struct amdgpu_ring *ring;
int r, i;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
/* VCE */
r = amdgpu_irq_add_id(adev, AMDGPU_IRQ_CLIENTID_LEGACY, 167, &adev->vce.irq);
@@ -431,24 +442,23 @@ static int vce_v2_0_sw_init(void *handle)
return r;
for (i = 0; i < adev->vce.num_rings; i++) {
+ enum amdgpu_ring_priority_level hw_prio = amdgpu_vce_get_ring_prio(i);
+
ring = &adev->vce.ring[i];
sprintf(ring->name, "vce%d", i);
- r = amdgpu_ring_init(adev, ring, 512,
- &adev->vce.irq, 0,
- AMDGPU_RING_PRIO_DEFAULT);
+ r = amdgpu_ring_init(adev, ring, 512, &adev->vce.irq, 0,
+ hw_prio, NULL);
if (r)
return r;
}
- r = amdgpu_vce_entity_init(adev);
-
return r;
}
-static int vce_v2_0_sw_fini(void *handle)
+static int vce_v2_0_sw_fini(struct amdgpu_ip_block *ip_block)
{
int r;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
r = amdgpu_vce_suspend(adev);
if (r)
@@ -457,10 +467,10 @@ static int vce_v2_0_sw_fini(void *handle)
return amdgpu_vce_sw_fini(adev);
}
-static int vce_v2_0_hw_init(void *handle)
+static int vce_v2_0_hw_init(struct amdgpu_ip_block *ip_block)
{
int r, i;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
amdgpu_asic_set_vce_clocks(adev, 10000, 10000);
vce_v2_0_enable_mgcg(adev, true, false);
@@ -476,38 +486,63 @@ static int vce_v2_0_hw_init(void *handle)
return 0;
}
-static int vce_v2_0_hw_fini(void *handle)
+static int vce_v2_0_hw_fini(struct amdgpu_ip_block *ip_block)
{
+ cancel_delayed_work_sync(&ip_block->adev->vce.idle_work);
+
return 0;
}
-static int vce_v2_0_suspend(void *handle)
+static int vce_v2_0_suspend(struct amdgpu_ip_block *ip_block)
{
int r;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
+
+
+ /*
+ * Proper cleanups before halting the HW engine:
+ * - cancel the delayed idle work
+ * - enable powergating
+ * - enable clockgating
+ * - disable dpm
+ *
+ * TODO: to align with the VCN implementation, move the
+ * jobs for clockgating/powergating/dpm setting to
+ * ->set_powergating_state().
+ */
+ cancel_delayed_work_sync(&adev->vce.idle_work);
- r = vce_v2_0_hw_fini(adev);
+ if (adev->pm.dpm_enabled) {
+ amdgpu_dpm_enable_vce(adev, false);
+ } else {
+ amdgpu_asic_set_vce_clocks(adev, 0, 0);
+ amdgpu_device_ip_set_powergating_state(adev, AMD_IP_BLOCK_TYPE_VCE,
+ AMD_PG_STATE_GATE);
+ amdgpu_device_ip_set_clockgating_state(adev, AMD_IP_BLOCK_TYPE_VCE,
+ AMD_CG_STATE_GATE);
+ }
+
+ r = vce_v2_0_hw_fini(ip_block);
if (r)
return r;
return amdgpu_vce_suspend(adev);
}
-static int vce_v2_0_resume(void *handle)
+static int vce_v2_0_resume(struct amdgpu_ip_block *ip_block)
{
int r;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
- r = amdgpu_vce_resume(adev);
+ r = amdgpu_vce_resume(ip_block->adev);
if (r)
return r;
- return vce_v2_0_hw_init(adev);
+ return vce_v2_0_hw_init(ip_block);
}
-static int vce_v2_0_soft_reset(void *handle)
+static int vce_v2_0_soft_reset(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
WREG32_FIELD(SRBM_SOFT_RESET, SOFT_RESET_VCE, 1);
mdelay(5);
@@ -548,13 +583,13 @@ static int vce_v2_0_process_interrupt(struct amdgpu_device *adev,
return 0;
}
-static int vce_v2_0_set_clockgating_state(void *handle,
+static int vce_v2_0_set_clockgating_state(struct amdgpu_ip_block *ip_block,
enum amd_clockgating_state state)
{
bool gate = false;
bool sw_cg = false;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
if (state == AMD_CG_STATE_GATE) {
gate = true;
@@ -566,7 +601,7 @@ static int vce_v2_0_set_clockgating_state(void *handle,
return 0;
}
-static int vce_v2_0_set_powergating_state(void *handle,
+static int vce_v2_0_set_powergating_state(struct amdgpu_ip_block *ip_block,
enum amd_powergating_state state)
{
/* This doesn't actually powergate the VCE block.
@@ -576,7 +611,7 @@ static int vce_v2_0_set_powergating_state(void *handle,
* revisit this when there is a cleaner line between
* the smc and the hw blocks
*/
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
if (state == AMD_PG_STATE_GATE)
return vce_v2_0_stop(adev);
@@ -587,7 +622,6 @@ static int vce_v2_0_set_powergating_state(void *handle,
static const struct amd_ip_funcs vce_v2_0_ip_funcs = {
.name = "vce_v2_0",
.early_init = vce_v2_0_early_init,
- .late_init = NULL,
.sw_init = vce_v2_0_sw_init,
.sw_fini = vce_v2_0_sw_fini,
.hw_init = vce_v2_0_hw_init,
diff --git a/drivers/gpu/drm/amd/amdgpu/vce_v3_0.c b/drivers/gpu/drm/amd/amdgpu/vce_v3_0.c
index 6d9108fa22e0..719e9643c43d 100644
--- a/drivers/gpu/drm/amd/amdgpu/vce_v3_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/vce_v3_0.c
@@ -64,8 +64,8 @@
static void vce_v3_0_mc_resume(struct amdgpu_device *adev, int idx);
static void vce_v3_0_set_ring_funcs(struct amdgpu_device *adev);
static void vce_v3_0_set_irq_funcs(struct amdgpu_device *adev);
-static int vce_v3_0_wait_for_idle(void *handle);
-static int vce_v3_0_set_clockgating_state(void *handle,
+static int vce_v3_0_wait_for_idle(struct amdgpu_ip_block *ip_block);
+static int vce_v3_0_set_clockgating_state(struct amdgpu_ip_block *ip_block,
enum amd_clockgating_state state);
/**
* vce_v3_0_ring_get_rptr - get read pointer
@@ -396,9 +396,10 @@ static unsigned vce_v3_0_get_harvest_config(struct amdgpu_device *adev)
}
}
-static int vce_v3_0_early_init(void *handle)
+static int vce_v3_0_early_init(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
+ int r;
adev->vce.harvest_config = vce_v3_0_get_harvest_config(adev);
@@ -407,6 +408,10 @@ static int vce_v3_0_early_init(void *handle)
(AMDGPU_VCE_HARVEST_VCE0 | AMDGPU_VCE_HARVEST_VCE1))
return -ENOENT;
+ r = amdgpu_vce_early_init(adev);
+ if (r)
+ return r;
+
adev->vce.num_rings = 3;
vce_v3_0_set_ring_funcs(adev);
@@ -415,9 +420,9 @@ static int vce_v3_0_early_init(void *handle)
return 0;
}
-static int vce_v3_0_sw_init(void *handle)
+static int vce_v3_0_sw_init(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
struct amdgpu_ring *ring;
int r, i;
@@ -440,23 +445,23 @@ static int vce_v3_0_sw_init(void *handle)
return r;
for (i = 0; i < adev->vce.num_rings; i++) {
+ enum amdgpu_ring_priority_level hw_prio = amdgpu_vce_get_ring_prio(i);
+
ring = &adev->vce.ring[i];
sprintf(ring->name, "vce%d", i);
r = amdgpu_ring_init(adev, ring, 512, &adev->vce.irq, 0,
- AMDGPU_RING_PRIO_DEFAULT);
+ hw_prio, NULL);
if (r)
return r;
}
- r = amdgpu_vce_entity_init(adev);
-
return r;
}
-static int vce_v3_0_sw_fini(void *handle)
+static int vce_v3_0_sw_fini(struct amdgpu_ip_block *ip_block)
{
int r;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
r = amdgpu_vce_suspend(adev);
if (r)
@@ -465,10 +470,10 @@ static int vce_v3_0_sw_fini(void *handle)
return amdgpu_vce_sw_fini(adev);
}
-static int vce_v3_0_hw_init(void *handle)
+static int vce_v3_0_hw_init(struct amdgpu_ip_block *ip_block)
{
int r, i;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
vce_v3_0_override_vce_clock_gating(adev, true);
@@ -485,41 +490,65 @@ static int vce_v3_0_hw_init(void *handle)
return 0;
}
-static int vce_v3_0_hw_fini(void *handle)
+static int vce_v3_0_hw_fini(struct amdgpu_ip_block *ip_block)
{
int r;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
- r = vce_v3_0_wait_for_idle(handle);
+ cancel_delayed_work_sync(&adev->vce.idle_work);
+
+ r = vce_v3_0_wait_for_idle(ip_block);
if (r)
return r;
vce_v3_0_stop(adev);
- return vce_v3_0_set_clockgating_state(adev, AMD_CG_STATE_GATE);
+ return vce_v3_0_set_clockgating_state(ip_block, AMD_CG_STATE_GATE);
}
-static int vce_v3_0_suspend(void *handle)
+static int vce_v3_0_suspend(struct amdgpu_ip_block *ip_block)
{
int r;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
+
+ /*
+ * Proper cleanups before halting the HW engine:
+ * - cancel the delayed idle work
+ * - enable powergating
+ * - enable clockgating
+ * - disable dpm
+ *
+ * TODO: to align with the VCN implementation, move the
+ * jobs for clockgating/powergating/dpm setting to
+ * ->set_powergating_state().
+ */
+ cancel_delayed_work_sync(&adev->vce.idle_work);
+
+ if (adev->pm.dpm_enabled) {
+ amdgpu_dpm_enable_vce(adev, false);
+ } else {
+ amdgpu_asic_set_vce_clocks(adev, 0, 0);
+ amdgpu_device_ip_set_powergating_state(adev, AMD_IP_BLOCK_TYPE_VCE,
+ AMD_PG_STATE_GATE);
+ amdgpu_device_ip_set_clockgating_state(adev, AMD_IP_BLOCK_TYPE_VCE,
+ AMD_CG_STATE_GATE);
+ }
- r = vce_v3_0_hw_fini(adev);
+ r = vce_v3_0_hw_fini(ip_block);
if (r)
return r;
return amdgpu_vce_suspend(adev);
}
-static int vce_v3_0_resume(void *handle)
+static int vce_v3_0_resume(struct amdgpu_ip_block *ip_block)
{
int r;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
- r = amdgpu_vce_resume(adev);
+ r = amdgpu_vce_resume(ip_block->adev);
if (r)
return r;
- return vce_v3_0_hw_init(adev);
+ return vce_v3_0_hw_init(ip_block);
}
static void vce_v3_0_mc_resume(struct amdgpu_device *adev, int idx)
@@ -573,9 +602,9 @@ static void vce_v3_0_mc_resume(struct amdgpu_device *adev, int idx)
WREG32_FIELD(VCE_SYS_INT_EN, VCE_SYS_INT_TRAP_INTERRUPT_EN, 1);
}
-static bool vce_v3_0_is_idle(void *handle)
+static bool vce_v3_0_is_idle(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
u32 mask = 0;
mask |= (adev->vce.harvest_config & AMDGPU_VCE_HARVEST_VCE0) ? 0 : SRBM_STATUS2__VCE0_BUSY_MASK;
@@ -584,13 +613,13 @@ static bool vce_v3_0_is_idle(void *handle)
return !(RREG32(mmSRBM_STATUS2) & mask);
}
-static int vce_v3_0_wait_for_idle(void *handle)
+static int vce_v3_0_wait_for_idle(struct amdgpu_ip_block *ip_block)
{
unsigned i;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
for (i = 0; i < adev->usec_timeout; i++)
- if (vce_v3_0_is_idle(handle))
+ if (vce_v3_0_is_idle(ip_block))
return 0;
return -ETIMEDOUT;
@@ -602,9 +631,9 @@ static int vce_v3_0_wait_for_idle(void *handle)
#define AMDGPU_VCE_STATUS_BUSY_MASK (VCE_STATUS_VCPU_REPORT_AUTO_BUSY_MASK | \
VCE_STATUS_VCPU_REPORT_RB0_BUSY_MASK)
-static bool vce_v3_0_check_soft_reset(void *handle)
+static bool vce_v3_0_check_soft_reset(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
u32 srbm_soft_reset = 0;
/* According to VCE team , we should use VCE_STATUS instead
@@ -643,9 +672,9 @@ static bool vce_v3_0_check_soft_reset(void *handle)
}
}
-static int vce_v3_0_soft_reset(void *handle)
+static int vce_v3_0_soft_reset(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
u32 srbm_soft_reset;
if (!adev->vce.srbm_soft_reset)
@@ -674,29 +703,29 @@ static int vce_v3_0_soft_reset(void *handle)
return 0;
}
-static int vce_v3_0_pre_soft_reset(void *handle)
+static int vce_v3_0_pre_soft_reset(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
if (!adev->vce.srbm_soft_reset)
return 0;
mdelay(5);
- return vce_v3_0_suspend(adev);
+ return vce_v3_0_suspend(ip_block);
}
-static int vce_v3_0_post_soft_reset(void *handle)
+static int vce_v3_0_post_soft_reset(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
if (!adev->vce.srbm_soft_reset)
return 0;
mdelay(5);
- return vce_v3_0_resume(adev);
+ return vce_v3_0_resume(ip_block);
}
static int vce_v3_0_set_interrupt_state(struct amdgpu_device *adev,
@@ -736,10 +765,10 @@ static int vce_v3_0_process_interrupt(struct amdgpu_device *adev,
return 0;
}
-static int vce_v3_0_set_clockgating_state(void *handle,
+static int vce_v3_0_set_clockgating_state(struct amdgpu_ip_block *ip_block,
enum amd_clockgating_state state)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
bool enable = (state == AMD_CG_STATE_GATE);
int i;
@@ -777,7 +806,7 @@ static int vce_v3_0_set_clockgating_state(void *handle,
return 0;
}
-static int vce_v3_0_set_powergating_state(void *handle,
+static int vce_v3_0_set_powergating_state(struct amdgpu_ip_block *ip_block,
enum amd_powergating_state state)
{
/* This doesn't actually powergate the VCE block.
@@ -787,7 +816,7 @@ static int vce_v3_0_set_powergating_state(void *handle,
* revisit this when there is a cleaner line between
* the smc and the hw blocks
*/
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
int ret = 0;
if (state == AMD_PG_STATE_GATE) {
@@ -804,9 +833,9 @@ out:
return ret;
}
-static void vce_v3_0_get_clockgating_state(void *handle, u32 *flags)
+static void vce_v3_0_get_clockgating_state(struct amdgpu_ip_block *ip_block, u64 *flags)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
int data;
mutex_lock(&adev->pm.mutex);
@@ -872,7 +901,6 @@ static void vce_v3_0_emit_pipeline_sync(struct amdgpu_ring *ring)
static const struct amd_ip_funcs vce_v3_0_ip_funcs = {
.name = "vce_v3_0",
.early_init = vce_v3_0_early_init,
- .late_init = NULL,
.sw_init = vce_v3_0_sw_init,
.sw_fini = vce_v3_0_sw_fini,
.hw_init = vce_v3_0_hw_init,
@@ -923,7 +951,7 @@ static const struct amdgpu_ring_funcs vce_v3_0_ring_vm_funcs = {
.get_rptr = vce_v3_0_ring_get_rptr,
.get_wptr = vce_v3_0_ring_get_wptr,
.set_wptr = vce_v3_0_ring_set_wptr,
- .parse_cs = amdgpu_vce_ring_parse_cs_vm,
+ .patch_cs_in_place = amdgpu_vce_ring_parse_cs_vm,
.emit_frame_size =
6 + /* vce_v3_0_emit_vm_flush */
4 + /* vce_v3_0_emit_pipeline_sync */
@@ -971,8 +999,7 @@ static void vce_v3_0_set_irq_funcs(struct amdgpu_device *adev)
adev->vce.irq.funcs = &vce_v3_0_irq_funcs;
};
-const struct amdgpu_ip_block_version vce_v3_0_ip_block =
-{
+const struct amdgpu_ip_block_version vce_v3_0_ip_block = {
.type = AMD_IP_BLOCK_TYPE_VCE,
.major = 3,
.minor = 0,
@@ -980,8 +1007,7 @@ const struct amdgpu_ip_block_version vce_v3_0_ip_block =
.funcs = &vce_v3_0_ip_funcs,
};
-const struct amdgpu_ip_block_version vce_v3_1_ip_block =
-{
+const struct amdgpu_ip_block_version vce_v3_1_ip_block = {
.type = AMD_IP_BLOCK_TYPE_VCE,
.major = 3,
.minor = 1,
@@ -989,8 +1015,7 @@ const struct amdgpu_ip_block_version vce_v3_1_ip_block =
.funcs = &vce_v3_0_ip_funcs,
};
-const struct amdgpu_ip_block_version vce_v3_4_ip_block =
-{
+const struct amdgpu_ip_block_version vce_v3_4_ip_block = {
.type = AMD_IP_BLOCK_TYPE_VCE,
.major = 3,
.minor = 4,
diff --git a/drivers/gpu/drm/amd/amdgpu/vce_v4_0.c b/drivers/gpu/drm/amd/amdgpu/vce_v4_0.c
index 37fa163393fd..2d64002bed61 100644
--- a/drivers/gpu/drm/amd/amdgpu/vce_v4_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/vce_v4_0.c
@@ -25,6 +25,7 @@
*/
#include <linux/firmware.h>
+#include <drm/drm_drv.h>
#include "amdgpu.h"
#include "amdgpu_vce.h"
@@ -82,7 +83,7 @@ static uint64_t vce_v4_0_ring_get_wptr(struct amdgpu_ring *ring)
struct amdgpu_device *adev = ring->adev;
if (ring->use_doorbell)
- return adev->wb.wb[ring->wptr_offs];
+ return *ring->wptr_cpu_addr;
if (ring->me == 0)
return RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_WPTR));
@@ -105,7 +106,7 @@ static void vce_v4_0_ring_set_wptr(struct amdgpu_ring *ring)
if (ring->use_doorbell) {
/* XXX check if swapping is necessary on BE */
- adev->wb.wb[ring->wptr_offs] = lower_32_bits(ring->wptr);
+ *ring->wptr_cpu_addr = lower_32_bits(ring->wptr);
WDOORBELL32(ring->doorbell_index, lower_32_bits(ring->wptr));
return;
}
@@ -176,7 +177,7 @@ static int vce_v4_0_mmsch_start(struct amdgpu_device *adev,
WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_MAILBOX_RESP), 0);
WDOORBELL32(adev->vce.ring[0].doorbell_index, 0);
- adev->wb.wb[adev->vce.ring[0].wptr_offs] = 0;
+ *adev->vce.ring[0].wptr_cpu_addr = 0;
adev->vce.ring[0].wptr = 0;
adev->vce.ring[0].wptr_old = 0;
@@ -406,9 +407,14 @@ static int vce_v4_0_stop(struct amdgpu_device *adev)
return 0;
}
-static int vce_v4_0_early_init(void *handle)
+static int vce_v4_0_early_init(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
+ int r;
+
+ r = amdgpu_vce_early_init(adev);
+ if (r)
+ return r;
if (amdgpu_sriov_vf(adev)) /* currently only VCN0 support SRIOV */
adev->vce.num_rings = 1;
@@ -421,9 +427,9 @@ static int vce_v4_0_early_init(void *handle)
return 0;
}
-static int vce_v4_0_sw_init(void *handle)
+static int vce_v4_0_sw_init(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
struct amdgpu_ring *ring;
unsigned size;
@@ -462,7 +468,10 @@ static int vce_v4_0_sw_init(void *handle)
}
for (i = 0; i < adev->vce.num_rings; i++) {
+ enum amdgpu_ring_priority_level hw_prio = amdgpu_vce_get_ring_prio(i);
+
ring = &adev->vce.ring[i];
+ ring->vm_hub = AMDGPU_MMHUB0(0);
sprintf(ring->name, "vce%d", i);
if (amdgpu_sriov_vf(adev)) {
/* DOORBELL only works under SRIOV */
@@ -477,16 +486,11 @@ static int vce_v4_0_sw_init(void *handle)
ring->doorbell_index = adev->doorbell_index.uvd_vce.vce_ring2_3 * 2 + 1;
}
r = amdgpu_ring_init(adev, ring, 512, &adev->vce.irq, 0,
- AMDGPU_RING_PRIO_DEFAULT);
+ hw_prio, NULL);
if (r)
return r;
}
-
- r = amdgpu_vce_entity_init(adev);
- if (r)
- return r;
-
r = amdgpu_virt_alloc_mm_table(adev);
if (r)
return r;
@@ -494,10 +498,10 @@ static int vce_v4_0_sw_init(void *handle)
return r;
}
-static int vce_v4_0_sw_fini(void *handle)
+static int vce_v4_0_sw_fini(struct amdgpu_ip_block *ip_block)
{
int r;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
/* free MM table */
amdgpu_virt_free_mm_table(adev);
@@ -514,10 +518,10 @@ static int vce_v4_0_sw_fini(void *handle)
return amdgpu_vce_sw_fini(adev);
}
-static int vce_v4_0_hw_init(void *handle)
+static int vce_v4_0_hw_init(struct amdgpu_ip_block *ip_block)
{
int r, i;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
if (amdgpu_sriov_vf(adev))
r = vce_v4_0_sriov_start(adev);
@@ -537,12 +541,14 @@ static int vce_v4_0_hw_init(void *handle)
return 0;
}
-static int vce_v4_0_hw_fini(void *handle)
+static int vce_v4_0_hw_fini(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
+
+ cancel_delayed_work_sync(&adev->vce.idle_work);
if (!amdgpu_sriov_vf(adev)) {
- /* vce_v4_0_wait_for_idle(handle); */
+ /* vce_v4_0_wait_for_idle(ip_block); */
vce_v4_0_stop(adev);
} else {
/* full access mode, so don't touch any VCE register */
@@ -552,48 +558,78 @@ static int vce_v4_0_hw_fini(void *handle)
return 0;
}
-static int vce_v4_0_suspend(void *handle)
+static int vce_v4_0_suspend(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
- int r;
+ struct amdgpu_device *adev = ip_block->adev;
+ int r, idx;
if (adev->vce.vcpu_bo == NULL)
return 0;
- if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
- unsigned size = amdgpu_bo_size(adev->vce.vcpu_bo);
- void *ptr = adev->vce.cpu_addr;
+ if (drm_dev_enter(adev_to_drm(adev), &idx)) {
+ if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
+ unsigned size = amdgpu_bo_size(adev->vce.vcpu_bo);
+ void *ptr = adev->vce.cpu_addr;
- memcpy_fromio(adev->vce.saved_bo, ptr, size);
+ memcpy_fromio(adev->vce.saved_bo, ptr, size);
+ }
+ drm_dev_exit(idx);
+ }
+
+ /*
+ * Proper cleanups before halting the HW engine:
+ * - cancel the delayed idle work
+ * - enable powergating
+ * - enable clockgating
+ * - disable dpm
+ *
+ * TODO: to align with the VCN implementation, move the
+ * jobs for clockgating/powergating/dpm setting to
+ * ->set_powergating_state().
+ */
+ cancel_delayed_work_sync(&adev->vce.idle_work);
+
+ if (adev->pm.dpm_enabled) {
+ amdgpu_dpm_enable_vce(adev, false);
+ } else {
+ amdgpu_asic_set_vce_clocks(adev, 0, 0);
+ amdgpu_device_ip_set_powergating_state(adev, AMD_IP_BLOCK_TYPE_VCE,
+ AMD_PG_STATE_GATE);
+ amdgpu_device_ip_set_clockgating_state(adev, AMD_IP_BLOCK_TYPE_VCE,
+ AMD_CG_STATE_GATE);
}
- r = vce_v4_0_hw_fini(adev);
+ r = vce_v4_0_hw_fini(ip_block);
if (r)
return r;
return amdgpu_vce_suspend(adev);
}
-static int vce_v4_0_resume(void *handle)
+static int vce_v4_0_resume(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
- int r;
+ struct amdgpu_device *adev = ip_block->adev;
+ int r, idx;
if (adev->vce.vcpu_bo == NULL)
return -EINVAL;
if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
- unsigned size = amdgpu_bo_size(adev->vce.vcpu_bo);
- void *ptr = adev->vce.cpu_addr;
- memcpy_toio(ptr, adev->vce.saved_bo, size);
+ if (drm_dev_enter(adev_to_drm(adev), &idx)) {
+ unsigned size = amdgpu_bo_size(adev->vce.vcpu_bo);
+ void *ptr = adev->vce.cpu_addr;
+
+ memcpy_toio(ptr, adev->vce.saved_bo, size);
+ drm_dev_exit(idx);
+ }
} else {
r = amdgpu_vce_resume(adev);
if (r)
return r;
}
- return vce_v4_0_hw_init(adev);
+ return vce_v4_0_hw_init(ip_block);
}
static void vce_v4_0_mc_resume(struct amdgpu_device *adev)
@@ -653,281 +689,14 @@ static void vce_v4_0_mc_resume(struct amdgpu_device *adev)
~VCE_SYS_INT_EN__VCE_SYS_INT_TRAP_INTERRUPT_EN_MASK);
}
-static int vce_v4_0_set_clockgating_state(void *handle,
+static int vce_v4_0_set_clockgating_state(struct amdgpu_ip_block *ip_block,
enum amd_clockgating_state state)
{
/* needed for driver unload*/
return 0;
}
-#if 0
-static bool vce_v4_0_is_idle(void *handle)
-{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
- u32 mask = 0;
-
- mask |= (adev->vce.harvest_config & AMDGPU_VCE_HARVEST_VCE0) ? 0 : SRBM_STATUS2__VCE0_BUSY_MASK;
- mask |= (adev->vce.harvest_config & AMDGPU_VCE_HARVEST_VCE1) ? 0 : SRBM_STATUS2__VCE1_BUSY_MASK;
-
- return !(RREG32(mmSRBM_STATUS2) & mask);
-}
-
-static int vce_v4_0_wait_for_idle(void *handle)
-{
- unsigned i;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
-
- for (i = 0; i < adev->usec_timeout; i++)
- if (vce_v4_0_is_idle(handle))
- return 0;
-
- return -ETIMEDOUT;
-}
-
-#define VCE_STATUS_VCPU_REPORT_AUTO_BUSY_MASK 0x00000008L /* AUTO_BUSY */
-#define VCE_STATUS_VCPU_REPORT_RB0_BUSY_MASK 0x00000010L /* RB0_BUSY */
-#define VCE_STATUS_VCPU_REPORT_RB1_BUSY_MASK 0x00000020L /* RB1_BUSY */
-#define AMDGPU_VCE_STATUS_BUSY_MASK (VCE_STATUS_VCPU_REPORT_AUTO_BUSY_MASK | \
- VCE_STATUS_VCPU_REPORT_RB0_BUSY_MASK)
-
-static bool vce_v4_0_check_soft_reset(void *handle)
-{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
- u32 srbm_soft_reset = 0;
-
- /* According to VCE team , we should use VCE_STATUS instead
- * SRBM_STATUS.VCE_BUSY bit for busy status checking.
- * GRBM_GFX_INDEX.INSTANCE_INDEX is used to specify which VCE
- * instance's registers are accessed
- * (0 for 1st instance, 10 for 2nd instance).
- *
- *VCE_STATUS
- *|UENC|ACPI|AUTO ACTIVE|RB1 |RB0 |RB2 | |FW_LOADED|JOB |
- *|----+----+-----------+----+----+----+----------+---------+----|
- *|bit8|bit7| bit6 |bit5|bit4|bit3| bit2 | bit1 |bit0|
- *
- * VCE team suggest use bit 3--bit 6 for busy status check
- */
- mutex_lock(&adev->grbm_idx_mutex);
- WREG32_FIELD(GRBM_GFX_INDEX, INSTANCE_INDEX, 0);
- if (RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_STATUS) & AMDGPU_VCE_STATUS_BUSY_MASK) {
- srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset, SRBM_SOFT_RESET, SOFT_RESET_VCE0, 1);
- srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset, SRBM_SOFT_RESET, SOFT_RESET_VCE1, 1);
- }
- WREG32_FIELD(GRBM_GFX_INDEX, INSTANCE_INDEX, 0x10);
- if (RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_STATUS) & AMDGPU_VCE_STATUS_BUSY_MASK) {
- srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset, SRBM_SOFT_RESET, SOFT_RESET_VCE0, 1);
- srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset, SRBM_SOFT_RESET, SOFT_RESET_VCE1, 1);
- }
- WREG32_FIELD(GRBM_GFX_INDEX, INSTANCE_INDEX, 0);
- mutex_unlock(&adev->grbm_idx_mutex);
-
- if (srbm_soft_reset) {
- adev->vce.srbm_soft_reset = srbm_soft_reset;
- return true;
- } else {
- adev->vce.srbm_soft_reset = 0;
- return false;
- }
-}
-
-static int vce_v4_0_soft_reset(void *handle)
-{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
- u32 srbm_soft_reset;
-
- if (!adev->vce.srbm_soft_reset)
- return 0;
- srbm_soft_reset = adev->vce.srbm_soft_reset;
-
- if (srbm_soft_reset) {
- u32 tmp;
-
- tmp = RREG32(mmSRBM_SOFT_RESET);
- tmp |= srbm_soft_reset;
- dev_info(adev->dev, "SRBM_SOFT_RESET=0x%08X\n", tmp);
- WREG32(mmSRBM_SOFT_RESET, tmp);
- tmp = RREG32(mmSRBM_SOFT_RESET);
-
- udelay(50);
-
- tmp &= ~srbm_soft_reset;
- WREG32(mmSRBM_SOFT_RESET, tmp);
- tmp = RREG32(mmSRBM_SOFT_RESET);
-
- /* Wait a little for things to settle down */
- udelay(50);
- }
-
- return 0;
-}
-
-static int vce_v4_0_pre_soft_reset(void *handle)
-{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
-
- if (!adev->vce.srbm_soft_reset)
- return 0;
-
- mdelay(5);
-
- return vce_v4_0_suspend(adev);
-}
-
-
-static int vce_v4_0_post_soft_reset(void *handle)
-{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
-
- if (!adev->vce.srbm_soft_reset)
- return 0;
-
- mdelay(5);
-
- return vce_v4_0_resume(adev);
-}
-
-static void vce_v4_0_override_vce_clock_gating(struct amdgpu_device *adev, bool override)
-{
- u32 tmp, data;
-
- tmp = data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_ARB_CTRL));
- if (override)
- data |= VCE_RB_ARB_CTRL__VCE_CGTT_OVERRIDE_MASK;
- else
- data &= ~VCE_RB_ARB_CTRL__VCE_CGTT_OVERRIDE_MASK;
-
- if (tmp != data)
- WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_ARB_CTRL), data);
-}
-
-static void vce_v4_0_set_vce_sw_clock_gating(struct amdgpu_device *adev,
- bool gated)
-{
- u32 data;
-
- /* Set Override to disable Clock Gating */
- vce_v4_0_override_vce_clock_gating(adev, true);
-
- /* This function enables MGCG which is controlled by firmware.
- With the clocks in the gated state the core is still
- accessible but the firmware will throttle the clocks on the
- fly as necessary.
- */
- if (gated) {
- data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_CLOCK_GATING_B));
- data |= 0x1ff;
- data &= ~0xef0000;
- WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_CLOCK_GATING_B), data);
-
- data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING));
- data |= 0x3ff000;
- data &= ~0xffc00000;
- WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING), data);
-
- data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING_2));
- data |= 0x2;
- data &= ~0x00010000;
- WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING_2), data);
-
- data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_REG_CLOCK_GATING));
- data |= 0x37f;
- WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_REG_CLOCK_GATING), data);
-
- data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_DMA_DCLK_CTRL));
- data |= VCE_UENC_DMA_DCLK_CTRL__WRDMCLK_FORCEON_MASK |
- VCE_UENC_DMA_DCLK_CTRL__RDDMCLK_FORCEON_MASK |
- VCE_UENC_DMA_DCLK_CTRL__REGCLK_FORCEON_MASK |
- 0x8;
- WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_DMA_DCLK_CTRL), data);
- } else {
- data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_CLOCK_GATING_B));
- data &= ~0x80010;
- data |= 0xe70008;
- WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_CLOCK_GATING_B), data);
-
- data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING));
- data |= 0xffc00000;
- WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING), data);
-
- data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING_2));
- data |= 0x10000;
- WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING_2), data);
-
- data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_REG_CLOCK_GATING));
- data &= ~0xffc00000;
- WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_REG_CLOCK_GATING), data);
-
- data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_DMA_DCLK_CTRL));
- data &= ~(VCE_UENC_DMA_DCLK_CTRL__WRDMCLK_FORCEON_MASK |
- VCE_UENC_DMA_DCLK_CTRL__RDDMCLK_FORCEON_MASK |
- VCE_UENC_DMA_DCLK_CTRL__REGCLK_FORCEON_MASK |
- 0x8);
- WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_DMA_DCLK_CTRL), data);
- }
- vce_v4_0_override_vce_clock_gating(adev, false);
-}
-
-static void vce_v4_0_set_bypass_mode(struct amdgpu_device *adev, bool enable)
-{
- u32 tmp = RREG32_SMC(ixGCK_DFS_BYPASS_CNTL);
-
- if (enable)
- tmp |= GCK_DFS_BYPASS_CNTL__BYPASSECLK_MASK;
- else
- tmp &= ~GCK_DFS_BYPASS_CNTL__BYPASSECLK_MASK;
-
- WREG32_SMC(ixGCK_DFS_BYPASS_CNTL, tmp);
-}
-
-static int vce_v4_0_set_clockgating_state(void *handle,
- enum amd_clockgating_state state)
-{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
- bool enable = (state == AMD_CG_STATE_GATE);
- int i;
-
- if ((adev->asic_type == CHIP_POLARIS10) ||
- (adev->asic_type == CHIP_TONGA) ||
- (adev->asic_type == CHIP_FIJI))
- vce_v4_0_set_bypass_mode(adev, enable);
-
- if (!(adev->cg_flags & AMD_CG_SUPPORT_VCE_MGCG))
- return 0;
-
- mutex_lock(&adev->grbm_idx_mutex);
- for (i = 0; i < 2; i++) {
- /* Program VCE Instance 0 or 1 if not harvested */
- if (adev->vce.harvest_config & (1 << i))
- continue;
-
- WREG32_FIELD(GRBM_GFX_INDEX, VCE_INSTANCE, i);
-
- if (enable) {
- /* initialize VCE_CLOCK_GATING_A: Clock ON/OFF delay */
- uint32_t data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_CLOCK_GATING_A);
- data &= ~(0xf | 0xff0);
- data |= ((0x0 << 0) | (0x04 << 4));
- WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_CLOCK_GATING_A, data);
-
- /* initialize VCE_UENC_CLOCK_GATING: Clock ON/OFF delay */
- data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING);
- data &= ~(0xf | 0xff0);
- data |= ((0x0 << 0) | (0x04 << 4));
- WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING, data);
- }
-
- vce_v4_0_set_vce_sw_clock_gating(adev, enable);
- }
-
- WREG32_FIELD(GRBM_GFX_INDEX, VCE_INSTANCE, 0);
- mutex_unlock(&adev->grbm_idx_mutex);
-
- return 0;
-}
-#endif
-
-static int vce_v4_0_set_powergating_state(void *handle,
+static int vce_v4_0_set_powergating_state(struct amdgpu_ip_block *ip_block,
enum amd_powergating_state state)
{
/* This doesn't actually powergate the VCE block.
@@ -937,7 +706,7 @@ static int vce_v4_0_set_powergating_state(void *handle,
* revisit this when there is a cleaner line between
* the smc and the hw blocks
*/
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
if (state == AMD_PG_STATE_GATE)
return vce_v4_0_stop(adev);
@@ -986,7 +755,7 @@ static void vce_v4_0_emit_reg_wait(struct amdgpu_ring *ring, uint32_t reg,
static void vce_v4_0_emit_vm_flush(struct amdgpu_ring *ring,
unsigned int vmid, uint64_t pd_addr)
{
- struct amdgpu_vmhub *hub = &ring->adev->vmhub[ring->funcs->vmhub];
+ struct amdgpu_vmhub *hub = &ring->adev->vmhub[ring->vm_hub];
pd_addr = amdgpu_gmc_emit_flush_gpu_tlb(ring, vmid, pd_addr);
@@ -1045,19 +814,12 @@ static int vce_v4_0_process_interrupt(struct amdgpu_device *adev,
const struct amd_ip_funcs vce_v4_0_ip_funcs = {
.name = "vce_v4_0",
.early_init = vce_v4_0_early_init,
- .late_init = NULL,
.sw_init = vce_v4_0_sw_init,
.sw_fini = vce_v4_0_sw_fini,
.hw_init = vce_v4_0_hw_init,
.hw_fini = vce_v4_0_hw_fini,
.suspend = vce_v4_0_suspend,
.resume = vce_v4_0_resume,
- .is_idle = NULL /* vce_v4_0_is_idle */,
- .wait_for_idle = NULL /* vce_v4_0_wait_for_idle */,
- .check_soft_reset = NULL /* vce_v4_0_check_soft_reset */,
- .pre_soft_reset = NULL /* vce_v4_0_pre_soft_reset */,
- .soft_reset = NULL /* vce_v4_0_soft_reset */,
- .post_soft_reset = NULL /* vce_v4_0_post_soft_reset */,
.set_clockgating_state = vce_v4_0_set_clockgating_state,
.set_powergating_state = vce_v4_0_set_powergating_state,
};
@@ -1068,11 +830,10 @@ static const struct amdgpu_ring_funcs vce_v4_0_ring_vm_funcs = {
.nop = VCE_CMD_NO_OP,
.support_64bit_ptrs = false,
.no_user_fence = true,
- .vmhub = AMDGPU_MMHUB_0,
.get_rptr = vce_v4_0_ring_get_rptr,
.get_wptr = vce_v4_0_ring_get_wptr,
.set_wptr = vce_v4_0_ring_set_wptr,
- .parse_cs = amdgpu_vce_ring_parse_cs_vm,
+ .patch_cs_in_place = amdgpu_vce_ring_parse_cs_vm,
.emit_frame_size =
SOC15_FLUSH_GPU_TLB_NUM_WREG * 3 +
SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 4 +
diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_sw_ring.c b/drivers/gpu/drm/amd/amdgpu/vcn_sw_ring.c
new file mode 100644
index 000000000000..2b9ddb3d2fe1
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/vcn_sw_ring.c
@@ -0,0 +1,86 @@
+/*
+ * Copyright 2022 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#include "amdgpu.h"
+#include "vcn_sw_ring.h"
+
+void vcn_dec_sw_ring_emit_fence(struct amdgpu_ring *ring, u64 addr,
+ u64 seq, uint32_t flags)
+{
+ WARN_ON(flags & AMDGPU_FENCE_FLAG_64BIT);
+
+ amdgpu_ring_write(ring, VCN_DEC_SW_CMD_FENCE);
+ amdgpu_ring_write(ring, addr);
+ amdgpu_ring_write(ring, upper_32_bits(addr));
+ amdgpu_ring_write(ring, seq);
+ amdgpu_ring_write(ring, VCN_DEC_SW_CMD_TRAP);
+}
+
+void vcn_dec_sw_ring_insert_end(struct amdgpu_ring *ring)
+{
+ amdgpu_ring_write(ring, VCN_DEC_SW_CMD_END);
+}
+
+void vcn_dec_sw_ring_emit_ib(struct amdgpu_ring *ring, struct amdgpu_job *job,
+ struct amdgpu_ib *ib, uint32_t flags)
+{
+ uint32_t vmid = AMDGPU_JOB_GET_VMID(job);
+
+ amdgpu_ring_write(ring, VCN_DEC_SW_CMD_IB);
+ amdgpu_ring_write(ring, vmid);
+ amdgpu_ring_write(ring, lower_32_bits(ib->gpu_addr));
+ amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr));
+ amdgpu_ring_write(ring, ib->length_dw);
+}
+
+void vcn_dec_sw_ring_emit_reg_wait(struct amdgpu_ring *ring, uint32_t reg,
+ uint32_t val, uint32_t mask)
+{
+ amdgpu_ring_write(ring, VCN_DEC_SW_CMD_REG_WAIT);
+ amdgpu_ring_write(ring, reg << 2);
+ amdgpu_ring_write(ring, mask);
+ amdgpu_ring_write(ring, val);
+}
+
+void vcn_dec_sw_ring_emit_vm_flush(struct amdgpu_ring *ring,
+ uint32_t vmid, uint64_t pd_addr)
+{
+ struct amdgpu_vmhub *hub = &ring->adev->vmhub[ring->vm_hub];
+ uint32_t data0, data1, mask;
+
+ pd_addr = amdgpu_gmc_emit_flush_gpu_tlb(ring, vmid, pd_addr);
+
+ /* wait for register write */
+ data0 = hub->ctx0_ptb_addr_lo32 + vmid * hub->ctx_addr_distance;
+ data1 = lower_32_bits(pd_addr);
+ mask = 0xffffffff;
+ vcn_dec_sw_ring_emit_reg_wait(ring, data0, data1, mask);
+}
+
+void vcn_dec_sw_ring_emit_wreg(struct amdgpu_ring *ring, uint32_t reg,
+ uint32_t val)
+{
+ amdgpu_ring_write(ring, VCN_DEC_SW_CMD_REG_WRITE);
+ amdgpu_ring_write(ring, reg << 2);
+ amdgpu_ring_write(ring, val);
+}
diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_sw_ring.h b/drivers/gpu/drm/amd/amdgpu/vcn_sw_ring.h
new file mode 100644
index 000000000000..7e775725f120
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/vcn_sw_ring.h
@@ -0,0 +1,44 @@
+/*
+ * Copyright 2022 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#ifndef __VCN_SW_RING_H__
+#define __VCN_SW_RING_H__
+
+#define VCN_SW_RING_EMIT_FRAME_SIZE \
+ (4 + /* vcn_dec_sw_ring_emit_vm_flush */ \
+ 5 + 5 + /* vcn_dec_sw_ring_emit_fence x2 vm fence */ \
+ 1) /* vcn_dec_sw_ring_insert_end */
+
+void vcn_dec_sw_ring_emit_fence(struct amdgpu_ring *ring, u64 addr,
+ u64 seq, uint32_t flags);
+void vcn_dec_sw_ring_insert_end(struct amdgpu_ring *ring);
+void vcn_dec_sw_ring_emit_ib(struct amdgpu_ring *ring, struct amdgpu_job *job,
+ struct amdgpu_ib *ib, uint32_t flags);
+void vcn_dec_sw_ring_emit_reg_wait(struct amdgpu_ring *ring, uint32_t reg,
+ uint32_t val, uint32_t mask);
+void vcn_dec_sw_ring_emit_vm_flush(struct amdgpu_ring *ring,
+ uint32_t vmid, uint64_t pd_addr);
+void vcn_dec_sw_ring_emit_wreg(struct amdgpu_ring *ring, uint32_t reg,
+ uint32_t val);
+
+#endif /* __VCN_SW_RING_H__ */
diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c b/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c
index 6117931fa8d7..a316797875a8 100644
--- a/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c
@@ -24,6 +24,7 @@
#include <linux/firmware.h>
#include "amdgpu.h"
+#include "amdgpu_cs.h"
#include "amdgpu_vcn.h"
#include "amdgpu_pm.h"
#include "soc15.h"
@@ -44,52 +45,92 @@
#define mmUVD_REG_XX_MASK_1_0 0x05ac
#define mmUVD_REG_XX_MASK_1_0_BASE_IDX 1
-static int vcn_v1_0_stop(struct amdgpu_device *adev);
+static const struct amdgpu_hwip_reg_entry vcn_reg_list_1_0[] = {
+ SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_POWER_STATUS),
+ SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_STATUS),
+ SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_CONTEXT_ID),
+ SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_CONTEXT_ID2),
+ SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_GPCOM_VCPU_DATA0),
+ SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_GPCOM_VCPU_DATA1),
+ SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_GPCOM_VCPU_CMD),
+ SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_RB_BASE_HI),
+ SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_RB_BASE_LO),
+ SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_RB_BASE_HI2),
+ SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_RB_BASE_LO2),
+ SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_RB_BASE_HI3),
+ SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_RB_BASE_LO3),
+ SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_RB_BASE_HI4),
+ SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_RB_BASE_LO4),
+ SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_RB_RPTR),
+ SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_RB_WPTR),
+ SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_RB_RPTR2),
+ SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_RB_WPTR2),
+ SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_RB_RPTR3),
+ SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_RB_WPTR3),
+ SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_RB_RPTR4),
+ SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_RB_WPTR4),
+ SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_RB_SIZE),
+ SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_RB_SIZE2),
+ SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_RB_SIZE3),
+ SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_RB_SIZE4),
+ SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_PGFSM_CONFIG),
+ SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_PGFSM_STATUS),
+ SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_DPG_LMA_CTL),
+ SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_DPG_LMA_DATA),
+ SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_DPG_LMA_MASK),
+ SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_DPG_PAUSE)
+};
+
+static int vcn_v1_0_stop(struct amdgpu_vcn_inst *vinst);
static void vcn_v1_0_set_dec_ring_funcs(struct amdgpu_device *adev);
static void vcn_v1_0_set_enc_ring_funcs(struct amdgpu_device *adev);
static void vcn_v1_0_set_irq_funcs(struct amdgpu_device *adev);
-static int vcn_v1_0_set_powergating_state(void *handle, enum amd_powergating_state state);
-static int vcn_v1_0_pause_dpg_mode(struct amdgpu_device *adev,
- int inst_idx, struct dpg_pause_state *new_state);
+static int vcn_v1_0_set_pg_state(struct amdgpu_vcn_inst *vinst,
+ enum amd_powergating_state state);
+static int vcn_v1_0_pause_dpg_mode(struct amdgpu_vcn_inst *vinst,
+ struct dpg_pause_state *new_state);
static void vcn_v1_0_idle_work_handler(struct work_struct *work);
static void vcn_v1_0_ring_begin_use(struct amdgpu_ring *ring);
/**
- * vcn_v1_0_early_init - set function pointers
+ * vcn_v1_0_early_init - set function pointers and load microcode
*
- * @handle: amdgpu_device pointer
+ * @ip_block: Pointer to the amdgpu_ip_block for this hw instance.
*
* Set ring and irq function pointers
+ * Load microcode from filesystem
*/
-static int vcn_v1_0_early_init(void *handle)
+static int vcn_v1_0_early_init(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
- adev->vcn.num_vcn_inst = 1;
- adev->vcn.num_enc_rings = 2;
+ adev->vcn.inst[0].num_enc_rings = 2;
+ adev->vcn.inst[0].set_pg_state = vcn_v1_0_set_pg_state;
vcn_v1_0_set_dec_ring_funcs(adev);
vcn_v1_0_set_enc_ring_funcs(adev);
vcn_v1_0_set_irq_funcs(adev);
- jpeg_v1_0_early_init(handle);
+ jpeg_v1_0_early_init(ip_block);
- return 0;
+ return amdgpu_vcn_early_init(adev, 0);
}
/**
* vcn_v1_0_sw_init - sw init for VCN block
*
- * @handle: amdgpu_device pointer
+ * @ip_block: Pointer to the amdgpu_ip_block for this hw instance.
*
* Load firmware and sw initialization
*/
-static int vcn_v1_0_sw_init(void *handle)
+static int vcn_v1_0_sw_init(struct amdgpu_ip_block *ip_block)
{
struct amdgpu_ring *ring;
int i, r;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ uint32_t reg_count = ARRAY_SIZE(vcn_reg_list_1_0);
+ uint32_t *ptr;
+ struct amdgpu_device *adev = ip_block->adev;
/* VCN DEC TRAP */
r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_VCN,
@@ -98,124 +139,130 @@ static int vcn_v1_0_sw_init(void *handle)
return r;
/* VCN ENC TRAP */
- for (i = 0; i < adev->vcn.num_enc_rings; ++i) {
+ for (i = 0; i < adev->vcn.inst[0].num_enc_rings; ++i) {
r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_VCN, i + VCN_1_0__SRCID__UVD_ENC_GENERAL_PURPOSE,
&adev->vcn.inst->irq);
if (r)
return r;
}
- r = amdgpu_vcn_sw_init(adev);
+ r = amdgpu_vcn_sw_init(adev, 0);
if (r)
return r;
/* Override the work func */
- adev->vcn.idle_work.work.func = vcn_v1_0_idle_work_handler;
+ adev->vcn.inst[0].idle_work.work.func = vcn_v1_0_idle_work_handler;
- if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
- const struct common_firmware_header *hdr;
- hdr = (const struct common_firmware_header *)adev->vcn.fw->data;
- adev->firmware.ucode[AMDGPU_UCODE_ID_VCN].ucode_id = AMDGPU_UCODE_ID_VCN;
- adev->firmware.ucode[AMDGPU_UCODE_ID_VCN].fw = adev->vcn.fw;
- adev->firmware.fw_size +=
- ALIGN(le32_to_cpu(hdr->ucode_size_bytes), PAGE_SIZE);
- DRM_INFO("PSP loading VCN firmware\n");
- }
+ amdgpu_vcn_setup_ucode(adev, 0);
- r = amdgpu_vcn_resume(adev);
+ r = amdgpu_vcn_resume(adev, 0);
if (r)
return r;
ring = &adev->vcn.inst->ring_dec;
+ ring->vm_hub = AMDGPU_MMHUB0(0);
sprintf(ring->name, "vcn_dec");
r = amdgpu_ring_init(adev, ring, 512, &adev->vcn.inst->irq, 0,
- AMDGPU_RING_PRIO_DEFAULT);
+ AMDGPU_RING_PRIO_DEFAULT, NULL);
if (r)
return r;
- adev->vcn.internal.scratch9 = adev->vcn.inst->external.scratch9 =
+ adev->vcn.inst[0].internal.scratch9 = adev->vcn.inst->external.scratch9 =
SOC15_REG_OFFSET(UVD, 0, mmUVD_SCRATCH9);
- adev->vcn.internal.data0 = adev->vcn.inst->external.data0 =
+ adev->vcn.inst[0].internal.data0 = adev->vcn.inst->external.data0 =
SOC15_REG_OFFSET(UVD, 0, mmUVD_GPCOM_VCPU_DATA0);
- adev->vcn.internal.data1 = adev->vcn.inst->external.data1 =
+ adev->vcn.inst[0].internal.data1 = adev->vcn.inst->external.data1 =
SOC15_REG_OFFSET(UVD, 0, mmUVD_GPCOM_VCPU_DATA1);
- adev->vcn.internal.cmd = adev->vcn.inst->external.cmd =
+ adev->vcn.inst[0].internal.cmd = adev->vcn.inst->external.cmd =
SOC15_REG_OFFSET(UVD, 0, mmUVD_GPCOM_VCPU_CMD);
- adev->vcn.internal.nop = adev->vcn.inst->external.nop =
+ adev->vcn.inst[0].internal.nop = adev->vcn.inst->external.nop =
SOC15_REG_OFFSET(UVD, 0, mmUVD_NO_OP);
- for (i = 0; i < adev->vcn.num_enc_rings; ++i) {
+ for (i = 0; i < adev->vcn.inst[0].num_enc_rings; ++i) {
+ enum amdgpu_ring_priority_level hw_prio = amdgpu_vcn_get_enc_ring_prio(i);
+
ring = &adev->vcn.inst->ring_enc[i];
+ ring->vm_hub = AMDGPU_MMHUB0(0);
sprintf(ring->name, "vcn_enc%d", i);
r = amdgpu_ring_init(adev, ring, 512, &adev->vcn.inst->irq, 0,
- AMDGPU_RING_PRIO_DEFAULT);
+ hw_prio, NULL);
if (r)
return r;
}
- adev->vcn.pause_dpg_mode = vcn_v1_0_pause_dpg_mode;
+ adev->vcn.inst[0].pause_dpg_mode = vcn_v1_0_pause_dpg_mode;
- r = jpeg_v1_0_sw_init(handle);
+ if (amdgpu_vcnfw_log) {
+ struct amdgpu_fw_shared *fw_shared = adev->vcn.inst->fw_shared.cpu_addr;
+ fw_shared->present_flag_0 = 0;
+ amdgpu_vcn_fwlog_init(adev->vcn.inst);
+ }
+
+ r = jpeg_v1_0_sw_init(ip_block);
+
+ /* Allocate memory for VCN IP Dump buffer */
+ ptr = kcalloc(adev->vcn.num_vcn_inst * reg_count, sizeof(uint32_t), GFP_KERNEL);
+ if (!ptr) {
+ DRM_ERROR("Failed to allocate memory for VCN IP Dump\n");
+ adev->vcn.ip_dump = NULL;
+ } else {
+ adev->vcn.ip_dump = ptr;
+ }
return r;
}
/**
* vcn_v1_0_sw_fini - sw fini for VCN block
*
- * @handle: amdgpu_device pointer
+ * @ip_block: Pointer to the amdgpu_ip_block for this hw instance.
*
* VCN suspend and free up sw allocation
*/
-static int vcn_v1_0_sw_fini(void *handle)
+static int vcn_v1_0_sw_fini(struct amdgpu_ip_block *ip_block)
{
int r;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
- r = amdgpu_vcn_suspend(adev);
+ r = amdgpu_vcn_suspend(adev, 0);
if (r)
return r;
- jpeg_v1_0_sw_fini(handle);
+ jpeg_v1_0_sw_fini(ip_block);
- r = amdgpu_vcn_sw_fini(adev);
+ amdgpu_vcn_sw_fini(adev, 0);
- return r;
+ kfree(adev->vcn.ip_dump);
+
+ return 0;
}
/**
* vcn_v1_0_hw_init - start and test VCN block
*
- * @handle: amdgpu_device pointer
+ * @ip_block: Pointer to the amdgpu_ip_block for this hw instance.
*
* Initialize the hardware, boot up the VCPU and do some testing
*/
-static int vcn_v1_0_hw_init(void *handle)
+static int vcn_v1_0_hw_init(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
struct amdgpu_ring *ring = &adev->vcn.inst->ring_dec;
int i, r;
r = amdgpu_ring_test_helper(ring);
if (r)
- goto done;
+ return r;
- for (i = 0; i < adev->vcn.num_enc_rings; ++i) {
+ for (i = 0; i < adev->vcn.inst[0].num_enc_rings; ++i) {
ring = &adev->vcn.inst->ring_enc[i];
r = amdgpu_ring_test_helper(ring);
if (r)
- goto done;
+ return r;
}
- ring = &adev->jpeg.inst->ring_dec;
+ ring = adev->jpeg.inst->ring_dec;
r = amdgpu_ring_test_helper(ring);
- if (r)
- goto done;
-
-done:
- if (!r)
- DRM_INFO("VCN decode and encode initialized successfully(under %s).\n",
- (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG)?"DPG Mode":"SPG Mode");
return r;
}
@@ -223,17 +270,22 @@ done:
/**
* vcn_v1_0_hw_fini - stop the hardware block
*
- * @handle: amdgpu_device pointer
+ * @ip_block: Pointer to the amdgpu_ip_block for this hw instance.
*
* Stop the VCN block, mark ring as not ready any more
*/
-static int vcn_v1_0_hw_fini(void *handle)
+static int vcn_v1_0_hw_fini(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
+ struct amdgpu_vcn_inst *vinst = adev->vcn.inst;
+
+ cancel_delayed_work_sync(&vinst->idle_work);
if ((adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG) ||
- RREG32_SOC15(VCN, 0, mmUVD_STATUS))
- vcn_v1_0_set_powergating_state(adev, AMD_PG_STATE_GATE);
+ (vinst->cur_state != AMD_PG_STATE_GATE &&
+ RREG32_SOC15(VCN, 0, mmUVD_STATUS))) {
+ vinst->set_pg_state(vinst, AMD_PG_STATE_GATE);
+ }
return 0;
}
@@ -241,20 +293,27 @@ static int vcn_v1_0_hw_fini(void *handle)
/**
* vcn_v1_0_suspend - suspend VCN block
*
- * @handle: amdgpu_device pointer
+ * @ip_block: Pointer to the amdgpu_ip_block for this hw instance.
*
* HW fini and suspend VCN block
*/
-static int vcn_v1_0_suspend(void *handle)
+static int vcn_v1_0_suspend(struct amdgpu_ip_block *ip_block)
{
int r;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
+ bool idle_work_unexecuted;
+
+ idle_work_unexecuted = cancel_delayed_work_sync(&adev->vcn.inst[0].idle_work);
+ if (idle_work_unexecuted) {
+ if (adev->pm.dpm_enabled)
+ amdgpu_dpm_enable_vcn(adev, false, 0);
+ }
- r = vcn_v1_0_hw_fini(adev);
+ r = vcn_v1_0_hw_fini(ip_block);
if (r)
return r;
- r = amdgpu_vcn_suspend(adev);
+ r = amdgpu_vcn_suspend(adev, 0);
return r;
}
@@ -262,20 +321,19 @@ static int vcn_v1_0_suspend(void *handle)
/**
* vcn_v1_0_resume - resume VCN block
*
- * @handle: amdgpu_device pointer
+ * @ip_block: Pointer to the amdgpu_ip_block for this hw instance.
*
* Resume firmware and hw init VCN block
*/
-static int vcn_v1_0_resume(void *handle)
+static int vcn_v1_0_resume(struct amdgpu_ip_block *ip_block)
{
int r;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
- r = amdgpu_vcn_resume(adev);
+ r = amdgpu_vcn_resume(ip_block->adev, 0);
if (r)
return r;
- r = vcn_v1_0_hw_init(adev);
+ r = vcn_v1_0_hw_init(ip_block);
return r;
}
@@ -283,13 +341,14 @@ static int vcn_v1_0_resume(void *handle)
/**
* vcn_v1_0_mc_resume_spg_mode - memory controller programming
*
- * @adev: amdgpu_device pointer
+ * @vinst: VCN instance
*
* Let the VCN memory controller know it's offsets
*/
-static void vcn_v1_0_mc_resume_spg_mode(struct amdgpu_device *adev)
+static void vcn_v1_0_mc_resume_spg_mode(struct amdgpu_vcn_inst *vinst)
{
- uint32_t size = AMDGPU_GPU_PAGE_ALIGN(adev->vcn.fw->size + 4);
+ struct amdgpu_device *adev = vinst->adev;
+ uint32_t size = AMDGPU_GPU_PAGE_ALIGN(adev->vcn.inst[0].fw->size + 4);
uint32_t offset;
/* cache window 0: fw */
@@ -354,9 +413,10 @@ static void vcn_v1_0_mc_resume_spg_mode(struct amdgpu_device *adev)
adev->gfx.config.gb_addr_config);
}
-static void vcn_v1_0_mc_resume_dpg_mode(struct amdgpu_device *adev)
+static void vcn_v1_0_mc_resume_dpg_mode(struct amdgpu_vcn_inst *vinst)
{
- uint32_t size = AMDGPU_GPU_PAGE_ALIGN(adev->vcn.fw->size + 4);
+ struct amdgpu_device *adev = vinst->adev;
+ uint32_t size = AMDGPU_GPU_PAGE_ALIGN(adev->vcn.inst[0].fw->size + 4);
uint32_t offset;
/* cache window 0: fw */
@@ -429,12 +489,13 @@ static void vcn_v1_0_mc_resume_dpg_mode(struct amdgpu_device *adev)
/**
* vcn_v1_0_disable_clock_gating - disable VCN clock gating
*
- * @adev: amdgpu_device pointer
+ * @vinst: VCN instance
*
* Disable clock gating for VCN block
*/
-static void vcn_v1_0_disable_clock_gating(struct amdgpu_device *adev)
+static void vcn_v1_0_disable_clock_gating(struct amdgpu_vcn_inst *vinst)
{
+ struct amdgpu_device *adev = vinst->adev;
uint32_t data;
/* JPEG disable CGC */
@@ -458,7 +519,7 @@ static void vcn_v1_0_disable_clock_gating(struct amdgpu_device *adev)
if (adev->cg_flags & AMD_CG_SUPPORT_VCN_MGCG)
data |= 1 << UVD_CGC_CTRL__DYN_CLOCK_MODE__SHIFT;
else
- data &= ~ UVD_CGC_CTRL__DYN_CLOCK_MODE_MASK;
+ data &= ~UVD_CGC_CTRL__DYN_CLOCK_MODE_MASK;
data |= 1 << UVD_CGC_CTRL__CLK_GATE_DLY_TIMER__SHIFT;
data |= 4 << UVD_CGC_CTRL__CLK_OFF_DELAY__SHIFT;
@@ -555,12 +616,13 @@ static void vcn_v1_0_disable_clock_gating(struct amdgpu_device *adev)
/**
* vcn_v1_0_enable_clock_gating - enable VCN clock gating
*
- * @adev: amdgpu_device pointer
+ * @vinst: Pointer to the VCN instance structure
*
* Enable clock gating for VCN block
*/
-static void vcn_v1_0_enable_clock_gating(struct amdgpu_device *adev)
+static void vcn_v1_0_enable_clock_gating(struct amdgpu_vcn_inst *vinst)
{
+ struct amdgpu_device *adev = vinst->adev;
uint32_t data = 0;
/* enable JPEG CGC */
@@ -624,8 +686,10 @@ static void vcn_v1_0_enable_clock_gating(struct amdgpu_device *adev)
WREG32_SOC15(VCN, 0, mmUVD_SUVD_CGC_CTRL, data);
}
-static void vcn_v1_0_clock_gating_dpg_mode(struct amdgpu_device *adev, uint8_t sram_sel)
+static void vcn_v1_0_clock_gating_dpg_mode(struct amdgpu_vcn_inst *vinst,
+ uint8_t sram_sel)
{
+ struct amdgpu_device *adev = vinst->adev;
uint32_t reg_data = 0;
/* disable JPEG CGC */
@@ -678,8 +742,9 @@ static void vcn_v1_0_clock_gating_dpg_mode(struct amdgpu_device *adev, uint8_t s
WREG32_SOC15_DPG_MODE_1_0(UVD, 0, mmUVD_SUVD_CGC_CTRL, 0, 0xFFFFFFFF, sram_sel);
}
-static void vcn_1_0_disable_static_power_gating(struct amdgpu_device *adev)
+static void vcn_1_0_disable_static_power_gating(struct amdgpu_vcn_inst *vinst)
{
+ struct amdgpu_device *adev = vinst->adev;
uint32_t data = 0;
if (adev->pg_flags & AMD_PG_SUPPORT_VCN) {
@@ -723,8 +788,9 @@ static void vcn_1_0_disable_static_power_gating(struct amdgpu_device *adev)
WREG32_SOC15(VCN, 0, mmUVD_POWER_STATUS, data);
}
-static void vcn_1_0_enable_static_power_gating(struct amdgpu_device *adev)
+static void vcn_1_0_enable_static_power_gating(struct amdgpu_vcn_inst *vinst)
{
+ struct amdgpu_device *adev = vinst->adev;
uint32_t data = 0;
if (adev->pg_flags & AMD_PG_SUPPORT_VCN) {
@@ -765,14 +831,15 @@ static void vcn_1_0_enable_static_power_gating(struct amdgpu_device *adev)
}
/**
- * vcn_v1_0_start - start VCN block
+ * vcn_v1_0_start_spg_mode - start VCN block
*
- * @adev: amdgpu_device pointer
+ * @vinst: VCN instance
*
* Setup and start the VCN block
*/
-static int vcn_v1_0_start_spg_mode(struct amdgpu_device *adev)
+static int vcn_v1_0_start_spg_mode(struct amdgpu_vcn_inst *vinst)
{
+ struct amdgpu_device *adev = vinst->adev;
struct amdgpu_ring *ring = &adev->vcn.inst->ring_dec;
uint32_t rb_bufsz, tmp;
uint32_t lmi_swap_cntl;
@@ -781,13 +848,13 @@ static int vcn_v1_0_start_spg_mode(struct amdgpu_device *adev)
/* disable byte swapping */
lmi_swap_cntl = 0;
- vcn_1_0_disable_static_power_gating(adev);
+ vcn_1_0_disable_static_power_gating(vinst);
tmp = RREG32_SOC15(UVD, 0, mmUVD_STATUS) | UVD_STATUS__UVD_BUSY;
WREG32_SOC15(UVD, 0, mmUVD_STATUS, tmp);
/* disable clock gating */
- vcn_v1_0_disable_clock_gating(adev);
+ vcn_v1_0_disable_clock_gating(vinst);
/* disable interupt */
WREG32_P(SOC15_REG_OFFSET(UVD, 0, mmUVD_MASTINT_EN), 0,
@@ -829,7 +896,7 @@ static int vcn_v1_0_start_spg_mode(struct amdgpu_device *adev)
(0x1 << UVD_MPC_SET_MUX__SET_1__SHIFT) |
(0x2 << UVD_MPC_SET_MUX__SET_2__SHIFT)));
- vcn_v1_0_mc_resume_spg_mode(adev);
+ vcn_v1_0_mc_resume_spg_mode(vinst);
WREG32_SOC15(UVD, 0, mmUVD_REG_XX_MASK_1_0, 0x10);
WREG32_SOC15(UVD, 0, mmUVD_RBC_XX_IB_REG_CHECK_1_0,
@@ -942,11 +1009,17 @@ static int vcn_v1_0_start_spg_mode(struct amdgpu_device *adev)
jpeg_v1_0_start(adev, 0);
+ /* Keeping one read-back to ensure all register writes are done,
+ * otherwise it may introduce race conditions.
+ */
+ RREG32_SOC15(UVD, 0, mmUVD_STATUS);
+
return 0;
}
-static int vcn_v1_0_start_dpg_mode(struct amdgpu_device *adev)
+static int vcn_v1_0_start_dpg_mode(struct amdgpu_vcn_inst *vinst)
{
+ struct amdgpu_device *adev = vinst->adev;
struct amdgpu_ring *ring = &adev->vcn.inst->ring_dec;
uint32_t rb_bufsz, tmp;
uint32_t lmi_swap_cntl;
@@ -954,7 +1027,7 @@ static int vcn_v1_0_start_dpg_mode(struct amdgpu_device *adev)
/* disable byte swapping */
lmi_swap_cntl = 0;
- vcn_1_0_enable_static_power_gating(adev);
+ vcn_1_0_enable_static_power_gating(vinst);
/* enable dynamic power gating mode */
tmp = RREG32_SOC15(UVD, 0, mmUVD_POWER_STATUS);
@@ -963,7 +1036,7 @@ static int vcn_v1_0_start_dpg_mode(struct amdgpu_device *adev)
WREG32_SOC15(UVD, 0, mmUVD_POWER_STATUS, tmp);
/* enable clock gating */
- vcn_v1_0_clock_gating_dpg_mode(adev, 0);
+ vcn_v1_0_clock_gating_dpg_mode(vinst, 0);
/* enable VCPU clock */
tmp = (0xFF << UVD_VCPU_CNTL__PRB_TIMEOUT_VAL__SHIFT);
@@ -1012,7 +1085,7 @@ static int vcn_v1_0_start_dpg_mode(struct amdgpu_device *adev)
(0x1 << UVD_MPC_SET_MUX__SET_1__SHIFT) |
(0x2 << UVD_MPC_SET_MUX__SET_2__SHIFT)), 0xFFFFFFFF, 0);
- vcn_v1_0_mc_resume_dpg_mode(adev);
+ vcn_v1_0_mc_resume_dpg_mode(vinst);
WREG32_SOC15_DPG_MODE_1_0(UVD, 0, mmUVD_REG_XX_MASK, 0x10, 0xFFFFFFFF, 0);
WREG32_SOC15_DPG_MODE_1_0(UVD, 0, mmUVD_RBC_XX_IB_REG_CHECK, 0x3, 0xFFFFFFFF, 0);
@@ -1029,7 +1102,7 @@ static int vcn_v1_0_start_dpg_mode(struct amdgpu_device *adev)
WREG32_SOC15_DPG_MODE_1_0(UVD, 0, mmUVD_MASTINT_EN,
UVD_MASTINT_EN__VCPU_EN_MASK, UVD_MASTINT_EN__VCPU_EN_MASK, 0);
- vcn_v1_0_clock_gating_dpg_mode(adev, 1);
+ vcn_v1_0_clock_gating_dpg_mode(vinst, 1);
/* setup mmUVD_LMI_CTRL */
WREG32_SOC15_DPG_MODE_1_0(UVD, 0, mmUVD_LMI_CTRL,
(8 << UVD_LMI_CTRL__WRITE_CLEAN_TIMER__SHIFT) |
@@ -1086,29 +1159,32 @@ static int vcn_v1_0_start_dpg_mode(struct amdgpu_device *adev)
jpeg_v1_0_start(adev, 1);
+ /* Keeping one read-back to ensure all register writes are done,
+ * otherwise it may introduce race conditions.
+ */
+ RREG32_SOC15(UVD, 0, mmUVD_STATUS);
+
return 0;
}
-static int vcn_v1_0_start(struct amdgpu_device *adev)
+static int vcn_v1_0_start(struct amdgpu_vcn_inst *vinst)
{
- int r;
+ struct amdgpu_device *adev = vinst->adev;
- if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG)
- r = vcn_v1_0_start_dpg_mode(adev);
- else
- r = vcn_v1_0_start_spg_mode(adev);
- return r;
+ return (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG) ?
+ vcn_v1_0_start_dpg_mode(vinst) : vcn_v1_0_start_spg_mode(vinst);
}
/**
- * vcn_v1_0_stop - stop VCN block
+ * vcn_v1_0_stop_spg_mode - stop VCN block
*
- * @adev: amdgpu_device pointer
+ * @vinst: VCN instance
*
* stop the VCN block
*/
-static int vcn_v1_0_stop_spg_mode(struct amdgpu_device *adev)
+static int vcn_v1_0_stop_spg_mode(struct amdgpu_vcn_inst *vinst)
{
+ struct amdgpu_device *adev = vinst->adev;
int tmp;
SOC15_WAIT_ON_RREG(UVD, 0, mmUVD_STATUS, UVD_STATUS__IDLE, 0x7);
@@ -1119,10 +1195,10 @@ static int vcn_v1_0_stop_spg_mode(struct amdgpu_device *adev)
UVD_LMI_STATUS__WRITE_CLEAN_RAW_MASK;
SOC15_WAIT_ON_RREG(UVD, 0, mmUVD_LMI_STATUS, tmp, tmp);
- /* put VCPU into reset */
- WREG32_P(SOC15_REG_OFFSET(UVD, 0, mmUVD_SOFT_RESET),
- UVD_SOFT_RESET__VCPU_SOFT_RESET_MASK,
- ~UVD_SOFT_RESET__VCPU_SOFT_RESET_MASK);
+ /* stall UMC channel */
+ WREG32_P(SOC15_REG_OFFSET(UVD, 0, mmUVD_LMI_CTRL2),
+ UVD_LMI_CTRL2__STALL_ARB_UMC_MASK,
+ ~UVD_LMI_CTRL2__STALL_ARB_UMC_MASK);
tmp = UVD_LMI_STATUS__UMC_READ_CLEAN_RAW_MASK |
UVD_LMI_STATUS__UMC_WRITE_CLEAN_RAW_MASK;
@@ -1141,15 +1217,27 @@ static int vcn_v1_0_stop_spg_mode(struct amdgpu_device *adev)
UVD_SOFT_RESET__LMI_SOFT_RESET_MASK,
~UVD_SOFT_RESET__LMI_SOFT_RESET_MASK);
+ /* put VCPU into reset */
+ WREG32_P(SOC15_REG_OFFSET(UVD, 0, mmUVD_SOFT_RESET),
+ UVD_SOFT_RESET__VCPU_SOFT_RESET_MASK,
+ ~UVD_SOFT_RESET__VCPU_SOFT_RESET_MASK);
+
WREG32_SOC15(UVD, 0, mmUVD_STATUS, 0);
- vcn_v1_0_enable_clock_gating(adev);
- vcn_1_0_enable_static_power_gating(adev);
+ vcn_v1_0_enable_clock_gating(vinst);
+ vcn_1_0_enable_static_power_gating(vinst);
+
+ /* Keeping one read-back to ensure all register writes are done,
+ * otherwise it may introduce race conditions.
+ */
+ RREG32_SOC15(UVD, 0, mmUVD_STATUS);
+
return 0;
}
-static int vcn_v1_0_stop_dpg_mode(struct amdgpu_device *adev)
+static int vcn_v1_0_stop_dpg_mode(struct amdgpu_vcn_inst *vinst)
{
+ struct amdgpu_device *adev = vinst->adev;
uint32_t tmp;
/* Wait for power status to be UVD_POWER_STATUS__UVD_POWER_STATUS_TILES_OFF */
@@ -1178,24 +1266,32 @@ static int vcn_v1_0_stop_dpg_mode(struct amdgpu_device *adev)
WREG32_P(SOC15_REG_OFFSET(UVD, 0, mmUVD_POWER_STATUS), 0,
~UVD_POWER_STATUS__UVD_PG_MODE_MASK);
+ /* Keeping one read-back to ensure all register writes are done,
+ * otherwise it may introduce race conditions.
+ */
+ RREG32_SOC15(UVD, 0, mmUVD_STATUS);
+
return 0;
}
-static int vcn_v1_0_stop(struct amdgpu_device *adev)
+static int vcn_v1_0_stop(struct amdgpu_vcn_inst *vinst)
{
+ struct amdgpu_device *adev = vinst->adev;
int r;
if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG)
- r = vcn_v1_0_stop_dpg_mode(adev);
+ r = vcn_v1_0_stop_dpg_mode(vinst);
else
- r = vcn_v1_0_stop_spg_mode(adev);
+ r = vcn_v1_0_stop_spg_mode(vinst);
return r;
}
-static int vcn_v1_0_pause_dpg_mode(struct amdgpu_device *adev,
- int inst_idx, struct dpg_pause_state *new_state)
+static int vcn_v1_0_pause_dpg_mode(struct amdgpu_vcn_inst *vinst,
+ struct dpg_pause_state *new_state)
{
+ struct amdgpu_device *adev = vinst->adev;
+ int inst_idx = vinst->inst;
int ret_code;
uint32_t reg_data = 0;
uint32_t reg_data2 = 0;
@@ -1242,7 +1338,6 @@ static int vcn_v1_0_pause_dpg_mode(struct amdgpu_device *adev,
WREG32_SOC15(UVD, 0, mmUVD_RB_RPTR2, lower_32_bits(ring->wptr));
WREG32_SOC15(UVD, 0, mmUVD_RB_WPTR2, lower_32_bits(ring->wptr));
- ring = &adev->vcn.inst->ring_dec;
WREG32_SOC15(UVD, 0, mmUVD_RBC_RB_WPTR,
RREG32_SOC15(UVD, 0, mmUVD_SCRATCH2) & 0x7FFFFFFF);
SOC15_WAIT_ON_RREG(UVD, 0, mmUVD_POWER_STATUS,
@@ -1289,7 +1384,7 @@ static int vcn_v1_0_pause_dpg_mode(struct amdgpu_device *adev,
UVD_DPG_PAUSE__JPEG_PAUSE_DPG_ACK_MASK);
/* Restore */
- ring = &adev->jpeg.inst->ring_dec;
+ ring = adev->jpeg.inst->ring_dec;
WREG32_SOC15(UVD, 0, mmUVD_LMI_JRBC_RB_VMID, 0);
WREG32_SOC15(UVD, 0, mmUVD_JRBC_RB_CNTL,
UVD_JRBC_RB_CNTL__RB_NO_FETCH_MASK |
@@ -1303,7 +1398,6 @@ static int vcn_v1_0_pause_dpg_mode(struct amdgpu_device *adev,
WREG32_SOC15(UVD, 0, mmUVD_JRBC_RB_CNTL,
UVD_JRBC_RB_CNTL__RB_RPTR_WR_EN_MASK);
- ring = &adev->vcn.inst->ring_dec;
WREG32_SOC15(UVD, 0, mmUVD_RBC_RB_WPTR,
RREG32_SOC15(UVD, 0, mmUVD_SCRATCH2) & 0x7FFFFFFF);
SOC15_WAIT_ON_RREG(UVD, 0, mmUVD_POWER_STATUS,
@@ -1321,16 +1415,16 @@ static int vcn_v1_0_pause_dpg_mode(struct amdgpu_device *adev,
return 0;
}
-static bool vcn_v1_0_is_idle(void *handle)
+static bool vcn_v1_0_is_idle(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
return (RREG32_SOC15(VCN, 0, mmUVD_STATUS) == UVD_STATUS__IDLE);
}
-static int vcn_v1_0_wait_for_idle(void *handle)
+static int vcn_v1_0_wait_for_idle(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
int ret;
ret = SOC15_WAIT_ON_RREG(VCN, 0, mmUVD_STATUS, UVD_STATUS__IDLE,
@@ -1339,20 +1433,21 @@ static int vcn_v1_0_wait_for_idle(void *handle)
return ret;
}
-static int vcn_v1_0_set_clockgating_state(void *handle,
+static int vcn_v1_0_set_clockgating_state(struct amdgpu_ip_block *ip_block,
enum amd_clockgating_state state)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
+ struct amdgpu_vcn_inst *vinst = adev->vcn.inst;
bool enable = (state == AMD_CG_STATE_GATE);
if (enable) {
/* wait for STATUS to clear */
- if (!vcn_v1_0_is_idle(handle))
+ if (!vcn_v1_0_is_idle(ip_block))
return -EBUSY;
- vcn_v1_0_enable_clock_gating(adev);
+ vcn_v1_0_enable_clock_gating(vinst);
} else {
/* disable HW gating and enable Sw gating */
- vcn_v1_0_disable_clock_gating(adev);
+ vcn_v1_0_disable_clock_gating(vinst);
}
return 0;
}
@@ -1535,7 +1630,7 @@ static void vcn_v1_0_dec_ring_emit_reg_wait(struct amdgpu_ring *ring,
static void vcn_v1_0_dec_ring_emit_vm_flush(struct amdgpu_ring *ring,
unsigned vmid, uint64_t pd_addr)
{
- struct amdgpu_vmhub *hub = &ring->adev->vmhub[ring->funcs->vmhub];
+ struct amdgpu_vmhub *hub = &ring->adev->vmhub[ring->vm_hub];
uint32_t data0, data1, mask;
pd_addr = amdgpu_gmc_emit_flush_gpu_tlb(ring, vmid, pd_addr);
@@ -1680,7 +1775,7 @@ static void vcn_v1_0_enc_ring_emit_reg_wait(struct amdgpu_ring *ring,
static void vcn_v1_0_enc_ring_emit_vm_flush(struct amdgpu_ring *ring,
unsigned int vmid, uint64_t pd_addr)
{
- struct amdgpu_vmhub *hub = &ring->adev->vmhub[ring->funcs->vmhub];
+ struct amdgpu_vmhub *hub = &ring->adev->vmhub[ring->vm_hub];
pd_addr = amdgpu_gmc_emit_flush_gpu_tlb(ring, vmid, pd_addr);
@@ -1744,8 +1839,8 @@ static void vcn_v1_0_dec_ring_insert_nop(struct amdgpu_ring *ring, uint32_t coun
}
}
-static int vcn_v1_0_set_powergating_state(void *handle,
- enum amd_powergating_state state)
+static int vcn_v1_0_set_pg_state(struct amdgpu_vcn_inst *vinst,
+ enum amd_powergating_state state)
{
/* This doesn't actually powergate the VCN block.
* That's done in the dpm code via the SMC. This
@@ -1755,28 +1850,29 @@ static int vcn_v1_0_set_powergating_state(void *handle,
* the smc and the hw blocks
*/
int ret;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
- if(state == adev->vcn.cur_state)
+ if (state == vinst->cur_state)
return 0;
if (state == AMD_PG_STATE_GATE)
- ret = vcn_v1_0_stop(adev);
+ ret = vcn_v1_0_stop(vinst);
else
- ret = vcn_v1_0_start(adev);
+ ret = vcn_v1_0_start(vinst);
+
+ if (!ret)
+ vinst->cur_state = state;
- if(!ret)
- adev->vcn.cur_state = state;
return ret;
}
static void vcn_v1_0_idle_work_handler(struct work_struct *work)
{
- struct amdgpu_device *adev =
- container_of(work, struct amdgpu_device, vcn.idle_work.work);
+ struct amdgpu_vcn_inst *vcn_inst =
+ container_of(work, struct amdgpu_vcn_inst, idle_work.work);
+ struct amdgpu_device *adev = vcn_inst->adev;
unsigned int fences = 0, i;
- for (i = 0; i < adev->vcn.num_enc_rings; ++i)
+ for (i = 0; i < adev->vcn.inst[0].num_enc_rings; ++i)
fences += amdgpu_fence_count_emitted(&adev->vcn.inst->ring_enc[i]);
if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG) {
@@ -1787,37 +1883,37 @@ static void vcn_v1_0_idle_work_handler(struct work_struct *work)
else
new_state.fw_based = VCN_DPG_STATE__UNPAUSE;
- if (amdgpu_fence_count_emitted(&adev->jpeg.inst->ring_dec))
+ if (amdgpu_fence_count_emitted(adev->jpeg.inst->ring_dec))
new_state.jpeg = VCN_DPG_STATE__PAUSE;
else
new_state.jpeg = VCN_DPG_STATE__UNPAUSE;
- adev->vcn.pause_dpg_mode(adev, 0, &new_state);
+ adev->vcn.inst->pause_dpg_mode(vcn_inst, &new_state);
}
- fences += amdgpu_fence_count_emitted(&adev->jpeg.inst->ring_dec);
+ fences += amdgpu_fence_count_emitted(adev->jpeg.inst->ring_dec);
fences += amdgpu_fence_count_emitted(&adev->vcn.inst->ring_dec);
if (fences == 0) {
amdgpu_gfx_off_ctrl(adev, true);
if (adev->pm.dpm_enabled)
- amdgpu_dpm_enable_uvd(adev, false);
+ amdgpu_dpm_enable_vcn(adev, false, 0);
else
amdgpu_device_ip_set_powergating_state(adev, AMD_IP_BLOCK_TYPE_VCN,
AMD_PG_STATE_GATE);
} else {
- schedule_delayed_work(&adev->vcn.idle_work, VCN_IDLE_TIMEOUT);
+ schedule_delayed_work(&adev->vcn.inst[0].idle_work, VCN_IDLE_TIMEOUT);
}
}
static void vcn_v1_0_ring_begin_use(struct amdgpu_ring *ring)
{
struct amdgpu_device *adev = ring->adev;
- bool set_clocks = !cancel_delayed_work_sync(&adev->vcn.idle_work);
+ bool set_clocks = !cancel_delayed_work_sync(&adev->vcn.inst[0].idle_work);
- mutex_lock(&adev->vcn.vcn1_jpeg1_workaround);
+ mutex_lock(&adev->vcn.inst[0].vcn1_jpeg1_workaround);
- if (amdgpu_fence_wait_empty(&ring->adev->jpeg.inst->ring_dec))
+ if (amdgpu_fence_wait_empty(ring->adev->jpeg.inst->ring_dec))
DRM_ERROR("VCN dec: jpeg dec ring may not be empty\n");
vcn_v1_0_set_pg_for_begin_use(ring, set_clocks);
@@ -1831,7 +1927,7 @@ void vcn_v1_0_set_pg_for_begin_use(struct amdgpu_ring *ring, bool set_clocks)
if (set_clocks) {
amdgpu_gfx_off_ctrl(adev, false);
if (adev->pm.dpm_enabled)
- amdgpu_dpm_enable_uvd(adev, true);
+ amdgpu_dpm_enable_vcn(adev, true, 0);
else
amdgpu_device_ip_set_powergating_state(adev, AMD_IP_BLOCK_TYPE_VCN,
AMD_PG_STATE_UNGATE);
@@ -1841,7 +1937,7 @@ void vcn_v1_0_set_pg_for_begin_use(struct amdgpu_ring *ring, bool set_clocks)
struct dpg_pause_state new_state;
unsigned int fences = 0, i;
- for (i = 0; i < adev->vcn.num_enc_rings; ++i)
+ for (i = 0; i < adev->vcn.inst[0].num_enc_rings; ++i)
fences += amdgpu_fence_count_emitted(&adev->vcn.inst->ring_enc[i]);
if (fences)
@@ -1849,7 +1945,7 @@ void vcn_v1_0_set_pg_for_begin_use(struct amdgpu_ring *ring, bool set_clocks)
else
new_state.fw_based = VCN_DPG_STATE__UNPAUSE;
- if (amdgpu_fence_count_emitted(&adev->jpeg.inst->ring_dec))
+ if (amdgpu_fence_count_emitted(adev->jpeg.inst->ring_dec))
new_state.jpeg = VCN_DPG_STATE__PAUSE;
else
new_state.jpeg = VCN_DPG_STATE__UNPAUSE;
@@ -1859,20 +1955,79 @@ void vcn_v1_0_set_pg_for_begin_use(struct amdgpu_ring *ring, bool set_clocks)
else if (ring->funcs->type == AMDGPU_RING_TYPE_VCN_JPEG)
new_state.jpeg = VCN_DPG_STATE__PAUSE;
- adev->vcn.pause_dpg_mode(adev, 0, &new_state);
+ adev->vcn.inst->pause_dpg_mode(adev->vcn.inst, &new_state);
}
}
void vcn_v1_0_ring_end_use(struct amdgpu_ring *ring)
{
- schedule_delayed_work(&ring->adev->vcn.idle_work, VCN_IDLE_TIMEOUT);
- mutex_unlock(&ring->adev->vcn.vcn1_jpeg1_workaround);
+ schedule_delayed_work(&ring->adev->vcn.inst[0].idle_work, VCN_IDLE_TIMEOUT);
+ mutex_unlock(&ring->adev->vcn.inst[0].vcn1_jpeg1_workaround);
+}
+
+static void vcn_v1_0_print_ip_state(struct amdgpu_ip_block *ip_block, struct drm_printer *p)
+{
+ struct amdgpu_device *adev = ip_block->adev;
+ int i, j;
+ uint32_t reg_count = ARRAY_SIZE(vcn_reg_list_1_0);
+ uint32_t inst_off, is_powered;
+
+ if (!adev->vcn.ip_dump)
+ return;
+
+ drm_printf(p, "num_instances:%d\n", adev->vcn.num_vcn_inst);
+ for (i = 0; i < adev->vcn.num_vcn_inst; i++) {
+ if (adev->vcn.harvest_config & (1 << i)) {
+ drm_printf(p, "\nHarvested Instance:VCN%d Skipping dump\n", i);
+ continue;
+ }
+
+ inst_off = i * reg_count;
+ is_powered = (adev->vcn.ip_dump[inst_off] &
+ UVD_POWER_STATUS__UVD_POWER_STATUS_MASK) != 1;
+
+ if (is_powered) {
+ drm_printf(p, "\nActive Instance:VCN%d\n", i);
+ for (j = 0; j < reg_count; j++)
+ drm_printf(p, "%-50s \t 0x%08x\n", vcn_reg_list_1_0[j].reg_name,
+ adev->vcn.ip_dump[inst_off + j]);
+ } else {
+ drm_printf(p, "\nInactive Instance:VCN%d\n", i);
+ }
+ }
+}
+
+static void vcn_v1_0_dump_ip_state(struct amdgpu_ip_block *ip_block)
+{
+ struct amdgpu_device *adev = ip_block->adev;
+ int i, j;
+ bool is_powered;
+ uint32_t inst_off;
+ uint32_t reg_count = ARRAY_SIZE(vcn_reg_list_1_0);
+
+ if (!adev->vcn.ip_dump)
+ return;
+
+ for (i = 0; i < adev->vcn.num_vcn_inst; i++) {
+ if (adev->vcn.harvest_config & (1 << i))
+ continue;
+
+ inst_off = i * reg_count;
+ /* mmUVD_POWER_STATUS is always readable and is first element of the array */
+ adev->vcn.ip_dump[inst_off] = RREG32_SOC15(VCN, i, mmUVD_POWER_STATUS);
+ is_powered = (adev->vcn.ip_dump[inst_off] &
+ UVD_POWER_STATUS__UVD_POWER_STATUS_MASK) != 1;
+
+ if (is_powered)
+ for (j = 1; j < reg_count; j++)
+ adev->vcn.ip_dump[inst_off + j] =
+ RREG32(SOC15_REG_ENTRY_OFFSET_INST(vcn_reg_list_1_0[j], i));
+ }
}
static const struct amd_ip_funcs vcn_v1_0_ip_funcs = {
.name = "vcn_v1_0",
.early_init = vcn_v1_0_early_init,
- .late_init = NULL,
.sw_init = vcn_v1_0_sw_init,
.sw_fini = vcn_v1_0_sw_fini,
.hw_init = vcn_v1_0_hw_init,
@@ -1881,23 +2036,91 @@ static const struct amd_ip_funcs vcn_v1_0_ip_funcs = {
.resume = vcn_v1_0_resume,
.is_idle = vcn_v1_0_is_idle,
.wait_for_idle = vcn_v1_0_wait_for_idle,
- .check_soft_reset = NULL /* vcn_v1_0_check_soft_reset */,
- .pre_soft_reset = NULL /* vcn_v1_0_pre_soft_reset */,
- .soft_reset = NULL /* vcn_v1_0_soft_reset */,
- .post_soft_reset = NULL /* vcn_v1_0_post_soft_reset */,
.set_clockgating_state = vcn_v1_0_set_clockgating_state,
- .set_powergating_state = vcn_v1_0_set_powergating_state,
+ .set_powergating_state = vcn_set_powergating_state,
+ .dump_ip_state = vcn_v1_0_dump_ip_state,
+ .print_ip_state = vcn_v1_0_print_ip_state,
};
+/*
+ * It is a hardware issue that VCN can't handle a GTT TMZ buffer on
+ * CHIP_RAVEN series ASIC. Move such a GTT TMZ buffer to VRAM domain
+ * before command submission as a workaround.
+ */
+static int vcn_v1_0_validate_bo(struct amdgpu_cs_parser *parser,
+ struct amdgpu_job *job,
+ uint64_t addr)
+{
+ struct ttm_operation_ctx ctx = { false, false };
+ struct amdgpu_fpriv *fpriv = parser->filp->driver_priv;
+ struct amdgpu_vm *vm = &fpriv->vm;
+ struct amdgpu_bo_va_mapping *mapping;
+ struct amdgpu_bo *bo;
+ int r;
+
+ addr &= AMDGPU_GMC_HOLE_MASK;
+ if (addr & 0x7) {
+ DRM_ERROR("VCN messages must be 8 byte aligned!\n");
+ return -EINVAL;
+ }
+
+ mapping = amdgpu_vm_bo_lookup_mapping(vm, addr/AMDGPU_GPU_PAGE_SIZE);
+ if (!mapping || !mapping->bo_va || !mapping->bo_va->base.bo)
+ return -EINVAL;
+
+ bo = mapping->bo_va->base.bo;
+ if (!(bo->flags & AMDGPU_GEM_CREATE_ENCRYPTED))
+ return 0;
+
+ amdgpu_bo_placement_from_domain(bo, AMDGPU_GEM_DOMAIN_VRAM);
+ r = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx);
+ if (r) {
+ DRM_ERROR("Failed to validate the VCN message BO (%d)!\n", r);
+ return r;
+ }
+
+ return r;
+}
+
+static int vcn_v1_0_ring_patch_cs_in_place(struct amdgpu_cs_parser *p,
+ struct amdgpu_job *job,
+ struct amdgpu_ib *ib)
+{
+ uint32_t msg_lo = 0, msg_hi = 0;
+ int i, r;
+
+ if (!(ib->flags & AMDGPU_IB_FLAGS_SECURE))
+ return 0;
+
+ for (i = 0; i < ib->length_dw; i += 2) {
+ uint32_t reg = amdgpu_ib_get_value(ib, i);
+ uint32_t val = amdgpu_ib_get_value(ib, i + 1);
+
+ if (reg == PACKET0(p->adev->vcn.inst[0].internal.data0, 0)) {
+ msg_lo = val;
+ } else if (reg == PACKET0(p->adev->vcn.inst[0].internal.data1, 0)) {
+ msg_hi = val;
+ } else if (reg == PACKET0(p->adev->vcn.inst[0].internal.cmd, 0)) {
+ r = vcn_v1_0_validate_bo(p, job,
+ ((u64)msg_hi) << 32 | msg_lo);
+ if (r)
+ return r;
+ }
+ }
+
+ return 0;
+}
+
static const struct amdgpu_ring_funcs vcn_v1_0_dec_ring_vm_funcs = {
.type = AMDGPU_RING_TYPE_VCN_DEC,
.align_mask = 0xf,
.support_64bit_ptrs = false,
.no_user_fence = true,
- .vmhub = AMDGPU_MMHUB_0,
+ .secure_submission_supported = true,
.get_rptr = vcn_v1_0_dec_ring_get_rptr,
.get_wptr = vcn_v1_0_dec_ring_get_wptr,
.set_wptr = vcn_v1_0_dec_ring_set_wptr,
+ .patch_cs_in_place = vcn_v1_0_ring_patch_cs_in_place,
.emit_frame_size =
6 + 6 + /* hdp invalidate / flush */
SOC15_FLUSH_GPU_TLB_NUM_WREG * 6 +
@@ -1928,7 +2151,6 @@ static const struct amdgpu_ring_funcs vcn_v1_0_enc_ring_vm_funcs = {
.nop = VCN_ENC_CMD_NO_OP,
.support_64bit_ptrs = false,
.no_user_fence = true,
- .vmhub = AMDGPU_MMHUB_0,
.get_rptr = vcn_v1_0_enc_ring_get_rptr,
.get_wptr = vcn_v1_0_enc_ring_get_wptr,
.set_wptr = vcn_v1_0_enc_ring_set_wptr,
@@ -1957,17 +2179,14 @@ static const struct amdgpu_ring_funcs vcn_v1_0_enc_ring_vm_funcs = {
static void vcn_v1_0_set_dec_ring_funcs(struct amdgpu_device *adev)
{
adev->vcn.inst->ring_dec.funcs = &vcn_v1_0_dec_ring_vm_funcs;
- DRM_INFO("VCN decode is enabled in VM mode\n");
}
static void vcn_v1_0_set_enc_ring_funcs(struct amdgpu_device *adev)
{
int i;
- for (i = 0; i < adev->vcn.num_enc_rings; ++i)
+ for (i = 0; i < adev->vcn.inst[0].num_enc_rings; ++i)
adev->vcn.inst->ring_enc[i].funcs = &vcn_v1_0_enc_ring_vm_funcs;
-
- DRM_INFO("VCN encode is enabled in VM mode\n");
}
static const struct amdgpu_irq_src_funcs vcn_v1_0_irq_funcs = {
@@ -1977,12 +2196,11 @@ static const struct amdgpu_irq_src_funcs vcn_v1_0_irq_funcs = {
static void vcn_v1_0_set_irq_funcs(struct amdgpu_device *adev)
{
- adev->vcn.inst->irq.num_types = adev->vcn.num_enc_rings + 2;
+ adev->vcn.inst->irq.num_types = adev->vcn.inst[0].num_enc_rings + 2;
adev->vcn.inst->irq.funcs = &vcn_v1_0_irq_funcs;
}
-const struct amdgpu_ip_block_version vcn_v1_0_ip_block =
-{
+const struct amdgpu_ip_block_version vcn_v1_0_ip_block = {
.type = AMD_IP_BLOCK_TYPE_VCN,
.major = 1,
.minor = 0,
diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v2_0.c b/drivers/gpu/drm/amd/amdgpu/vcn_v2_0.c
index d63198c945bf..8897dcc9c1a0 100644
--- a/drivers/gpu/drm/amd/amdgpu/vcn_v2_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/vcn_v2_0.c
@@ -22,6 +22,7 @@
*/
#include <linux/firmware.h>
+#include <drm/drm_drv.h>
#include "amdgpu.h"
#include "amdgpu_vcn.h"
@@ -36,6 +37,10 @@
#include "vcn/vcn_2_0_0_sh_mask.h"
#include "ivsrcid/vcn/irqsrcs_vcn_2_0.h"
+#define VCN_VID_SOC_ADDRESS_2_0 0x1fa00
+#define VCN1_VID_SOC_ADDRESS_3_0 0x48200
+#define VCN1_AON_SOC_ADDRESS_3_0 0x48000
+
#define mmUVD_CONTEXT_ID_INTERNAL_OFFSET 0x1fd
#define mmUVD_GPCOM_VCPU_CMD_INTERNAL_OFFSET 0x503
#define mmUVD_GPCOM_VCPU_DATA0_INTERNAL_OFFSET 0x504
@@ -49,51 +54,90 @@
#define mmUVD_LMI_RBC_IB_64BIT_BAR_LOW_INTERNAL_OFFSET 0x5a7
#define mmUVD_RBC_IB_SIZE_INTERNAL_OFFSET 0x1e2
+static const struct amdgpu_hwip_reg_entry vcn_reg_list_2_0[] = {
+ SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_POWER_STATUS),
+ SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_STATUS),
+ SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_CONTEXT_ID),
+ SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_CONTEXT_ID2),
+ SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_GPCOM_VCPU_DATA0),
+ SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_GPCOM_VCPU_DATA1),
+ SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_GPCOM_VCPU_CMD),
+ SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_RB_BASE_HI),
+ SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_RB_BASE_LO),
+ SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_RB_BASE_HI2),
+ SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_RB_BASE_LO2),
+ SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_RB_BASE_HI3),
+ SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_RB_BASE_LO3),
+ SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_RB_BASE_HI4),
+ SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_RB_BASE_LO4),
+ SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_RB_RPTR),
+ SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_RB_WPTR),
+ SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_RB_RPTR2),
+ SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_RB_WPTR2),
+ SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_RB_RPTR3),
+ SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_RB_WPTR3),
+ SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_RB_RPTR4),
+ SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_RB_WPTR4),
+ SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_RB_SIZE),
+ SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_RB_SIZE2),
+ SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_RB_SIZE3),
+ SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_RB_SIZE4),
+ SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_PGFSM_CONFIG),
+ SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_PGFSM_STATUS),
+ SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_DPG_LMA_CTL),
+ SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_DPG_LMA_DATA),
+ SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_DPG_LMA_MASK),
+ SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_DPG_PAUSE)
+};
+
static void vcn_v2_0_set_dec_ring_funcs(struct amdgpu_device *adev);
static void vcn_v2_0_set_enc_ring_funcs(struct amdgpu_device *adev);
static void vcn_v2_0_set_irq_funcs(struct amdgpu_device *adev);
-static int vcn_v2_0_set_powergating_state(void *handle,
- enum amd_powergating_state state);
-static int vcn_v2_0_pause_dpg_mode(struct amdgpu_device *adev,
- int inst_idx, struct dpg_pause_state *new_state);
+static int vcn_v2_0_set_pg_state(struct amdgpu_vcn_inst *vinst,
+ enum amd_powergating_state state);
+static int vcn_v2_0_pause_dpg_mode(struct amdgpu_vcn_inst *vinst,
+ struct dpg_pause_state *new_state);
static int vcn_v2_0_start_sriov(struct amdgpu_device *adev);
+static int vcn_v2_0_reset(struct amdgpu_vcn_inst *vinst);
+
/**
- * vcn_v2_0_early_init - set function pointers
+ * vcn_v2_0_early_init - set function pointers and load microcode
*
- * @handle: amdgpu_device pointer
+ * @ip_block: Pointer to the amdgpu_ip_block for this hw instance.
*
* Set ring and irq function pointers
+ * Load microcode from filesystem
*/
-static int vcn_v2_0_early_init(void *handle)
+static int vcn_v2_0_early_init(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
- adev->vcn.num_vcn_inst = 1;
if (amdgpu_sriov_vf(adev))
- adev->vcn.num_enc_rings = 1;
+ adev->vcn.inst[0].num_enc_rings = 1;
else
- adev->vcn.num_enc_rings = 2;
+ adev->vcn.inst[0].num_enc_rings = 2;
+ adev->vcn.inst->set_pg_state = vcn_v2_0_set_pg_state;
vcn_v2_0_set_dec_ring_funcs(adev);
vcn_v2_0_set_enc_ring_funcs(adev);
vcn_v2_0_set_irq_funcs(adev);
- return 0;
+ return amdgpu_vcn_early_init(adev, 0);
}
/**
* vcn_v2_0_sw_init - sw init for VCN block
*
- * @handle: amdgpu_device pointer
+ * @ip_block: Pointer to the amdgpu_ip_block for this hw instance.
*
* Load firmware and sw initialization
*/
-static int vcn_v2_0_sw_init(void *handle)
+static int vcn_v2_0_sw_init(struct amdgpu_ip_block *ip_block)
{
struct amdgpu_ring *ring;
int i, r;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
- volatile struct amdgpu_fw_shared *fw_shared;
+ struct amdgpu_device *adev = ip_block->adev;
+ struct amdgpu_fw_shared *fw_shared;
/* VCN DEC TRAP */
r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_VCN,
@@ -103,7 +147,7 @@ static int vcn_v2_0_sw_init(void *handle)
return r;
/* VCN ENC TRAP */
- for (i = 0; i < adev->vcn.num_enc_rings; ++i) {
+ for (i = 0; i < adev->vcn.inst[0].num_enc_rings; ++i) {
r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_VCN,
i + VCN_2_0__SRCID__UVD_ENC_GENERAL_PURPOSE,
&adev->vcn.inst->irq);
@@ -111,21 +155,13 @@ static int vcn_v2_0_sw_init(void *handle)
return r;
}
- r = amdgpu_vcn_sw_init(adev);
+ r = amdgpu_vcn_sw_init(adev, 0);
if (r)
return r;
- if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
- const struct common_firmware_header *hdr;
- hdr = (const struct common_firmware_header *)adev->vcn.fw->data;
- adev->firmware.ucode[AMDGPU_UCODE_ID_VCN].ucode_id = AMDGPU_UCODE_ID_VCN;
- adev->firmware.ucode[AMDGPU_UCODE_ID_VCN].fw = adev->vcn.fw;
- adev->firmware.fw_size +=
- ALIGN(le32_to_cpu(hdr->ucode_size_bytes), PAGE_SIZE);
- DRM_INFO("PSP loading VCN firmware\n");
- }
+ amdgpu_vcn_setup_ucode(adev, 0);
- r = amdgpu_vcn_resume(adev);
+ r = amdgpu_vcn_resume(adev, 0);
if (r)
return r;
@@ -133,92 +169,119 @@ static int vcn_v2_0_sw_init(void *handle)
ring->use_doorbell = true;
ring->doorbell_index = adev->doorbell_index.vcn.vcn_ring0_1 << 1;
+ ring->vm_hub = AMDGPU_MMHUB0(0);
sprintf(ring->name, "vcn_dec");
r = amdgpu_ring_init(adev, ring, 512, &adev->vcn.inst->irq, 0,
- AMDGPU_RING_PRIO_DEFAULT);
+ AMDGPU_RING_PRIO_DEFAULT, NULL);
if (r)
return r;
- adev->vcn.internal.context_id = mmUVD_CONTEXT_ID_INTERNAL_OFFSET;
- adev->vcn.internal.ib_vmid = mmUVD_LMI_RBC_IB_VMID_INTERNAL_OFFSET;
- adev->vcn.internal.ib_bar_low = mmUVD_LMI_RBC_IB_64BIT_BAR_LOW_INTERNAL_OFFSET;
- adev->vcn.internal.ib_bar_high = mmUVD_LMI_RBC_IB_64BIT_BAR_HIGH_INTERNAL_OFFSET;
- adev->vcn.internal.ib_size = mmUVD_RBC_IB_SIZE_INTERNAL_OFFSET;
- adev->vcn.internal.gp_scratch8 = mmUVD_GP_SCRATCH8_INTERNAL_OFFSET;
+ adev->vcn.inst[0].internal.context_id = mmUVD_CONTEXT_ID_INTERNAL_OFFSET;
+ adev->vcn.inst[0].internal.ib_vmid = mmUVD_LMI_RBC_IB_VMID_INTERNAL_OFFSET;
+ adev->vcn.inst[0].internal.ib_bar_low = mmUVD_LMI_RBC_IB_64BIT_BAR_LOW_INTERNAL_OFFSET;
+ adev->vcn.inst[0].internal.ib_bar_high = mmUVD_LMI_RBC_IB_64BIT_BAR_HIGH_INTERNAL_OFFSET;
+ adev->vcn.inst[0].internal.ib_size = mmUVD_RBC_IB_SIZE_INTERNAL_OFFSET;
+ adev->vcn.inst[0].internal.gp_scratch8 = mmUVD_GP_SCRATCH8_INTERNAL_OFFSET;
- adev->vcn.internal.scratch9 = mmUVD_SCRATCH9_INTERNAL_OFFSET;
+ adev->vcn.inst[0].internal.scratch9 = mmUVD_SCRATCH9_INTERNAL_OFFSET;
adev->vcn.inst->external.scratch9 = SOC15_REG_OFFSET(UVD, 0, mmUVD_SCRATCH9);
- adev->vcn.internal.data0 = mmUVD_GPCOM_VCPU_DATA0_INTERNAL_OFFSET;
+ adev->vcn.inst[0].internal.data0 = mmUVD_GPCOM_VCPU_DATA0_INTERNAL_OFFSET;
adev->vcn.inst->external.data0 = SOC15_REG_OFFSET(UVD, 0, mmUVD_GPCOM_VCPU_DATA0);
- adev->vcn.internal.data1 = mmUVD_GPCOM_VCPU_DATA1_INTERNAL_OFFSET;
+ adev->vcn.inst[0].internal.data1 = mmUVD_GPCOM_VCPU_DATA1_INTERNAL_OFFSET;
adev->vcn.inst->external.data1 = SOC15_REG_OFFSET(UVD, 0, mmUVD_GPCOM_VCPU_DATA1);
- adev->vcn.internal.cmd = mmUVD_GPCOM_VCPU_CMD_INTERNAL_OFFSET;
+ adev->vcn.inst[0].internal.cmd = mmUVD_GPCOM_VCPU_CMD_INTERNAL_OFFSET;
adev->vcn.inst->external.cmd = SOC15_REG_OFFSET(UVD, 0, mmUVD_GPCOM_VCPU_CMD);
- adev->vcn.internal.nop = mmUVD_NO_OP_INTERNAL_OFFSET;
+ adev->vcn.inst[0].internal.nop = mmUVD_NO_OP_INTERNAL_OFFSET;
adev->vcn.inst->external.nop = SOC15_REG_OFFSET(UVD, 0, mmUVD_NO_OP);
- for (i = 0; i < adev->vcn.num_enc_rings; ++i) {
+ for (i = 0; i < adev->vcn.inst[0].num_enc_rings; ++i) {
+ enum amdgpu_ring_priority_level hw_prio = amdgpu_vcn_get_enc_ring_prio(i);
+
ring = &adev->vcn.inst->ring_enc[i];
ring->use_doorbell = true;
+ ring->vm_hub = AMDGPU_MMHUB0(0);
if (!amdgpu_sriov_vf(adev))
ring->doorbell_index = (adev->doorbell_index.vcn.vcn_ring0_1 << 1) + 2 + i;
else
ring->doorbell_index = (adev->doorbell_index.vcn.vcn_ring0_1 << 1) + 1 + i;
sprintf(ring->name, "vcn_enc%d", i);
r = amdgpu_ring_init(adev, ring, 512, &adev->vcn.inst->irq, 0,
- AMDGPU_RING_PRIO_DEFAULT);
+ hw_prio, NULL);
if (r)
return r;
}
- adev->vcn.pause_dpg_mode = vcn_v2_0_pause_dpg_mode;
+ adev->vcn.inst[0].pause_dpg_mode = vcn_v2_0_pause_dpg_mode;
+ adev->vcn.inst[0].reset = vcn_v2_0_reset;
+
+ adev->vcn.supported_reset =
+ amdgpu_get_soft_full_reset_mask(&adev->vcn.inst[0].ring_enc[0]);
+ if (!amdgpu_sriov_vf(adev))
+ adev->vcn.supported_reset |= AMDGPU_RESET_TYPE_PER_QUEUE;
r = amdgpu_virt_alloc_mm_table(adev);
if (r)
return r;
- fw_shared = adev->vcn.inst->fw_shared_cpu_addr;
+ fw_shared = adev->vcn.inst->fw_shared.cpu_addr;
fw_shared->present_flag_0 = cpu_to_le32(AMDGPU_VCN_MULTI_QUEUE_FLAG);
+
+ if (amdgpu_vcnfw_log)
+ amdgpu_vcn_fwlog_init(adev->vcn.inst);
+
+ r = amdgpu_vcn_reg_dump_init(adev, vcn_reg_list_2_0, ARRAY_SIZE(vcn_reg_list_2_0));
+ if (r)
+ return r;
+
+ r = amdgpu_vcn_sysfs_reset_mask_init(adev);
+ if (r)
+ return r;
+
return 0;
}
/**
* vcn_v2_0_sw_fini - sw fini for VCN block
*
- * @handle: amdgpu_device pointer
+ * @ip_block: Pointer to the amdgpu_ip_block for this hw instance.
*
* VCN suspend and free up sw allocation
*/
-static int vcn_v2_0_sw_fini(void *handle)
+static int vcn_v2_0_sw_fini(struct amdgpu_ip_block *ip_block)
{
- int r;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
- volatile struct amdgpu_fw_shared *fw_shared = adev->vcn.inst->fw_shared_cpu_addr;
+ int r, idx;
+ struct amdgpu_device *adev = ip_block->adev;
+ struct amdgpu_fw_shared *fw_shared = adev->vcn.inst->fw_shared.cpu_addr;
- fw_shared->present_flag_0 = 0;
+ if (drm_dev_enter(adev_to_drm(adev), &idx)) {
+ fw_shared->present_flag_0 = 0;
+ drm_dev_exit(idx);
+ }
amdgpu_virt_free_mm_table(adev);
- r = amdgpu_vcn_suspend(adev);
+ r = amdgpu_vcn_suspend(adev, 0);
if (r)
return r;
- r = amdgpu_vcn_sw_fini(adev);
+ amdgpu_vcn_sysfs_reset_mask_fini(adev);
- return r;
+ amdgpu_vcn_sw_fini(adev, 0);
+
+ return 0;
}
/**
* vcn_v2_0_hw_init - start and test VCN block
*
- * @handle: amdgpu_device pointer
+ * @ip_block: Pointer to the amdgpu_ip_block for this hw instance.
*
* Initialize the hardware, boot up the VCPU and do some testing
*/
-static int vcn_v2_0_hw_init(void *handle)
+static int vcn_v2_0_hw_init(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
struct amdgpu_ring *ring = &adev->vcn.inst->ring_dec;
int i, r;
@@ -230,42 +293,40 @@ static int vcn_v2_0_hw_init(void *handle)
r = amdgpu_ring_test_helper(ring);
if (r)
- goto done;
+ return r;
//Disable vcn decode for sriov
if (amdgpu_sriov_vf(adev))
ring->sched.ready = false;
- for (i = 0; i < adev->vcn.num_enc_rings; ++i) {
+ for (i = 0; i < adev->vcn.inst[0].num_enc_rings; ++i) {
ring = &adev->vcn.inst->ring_enc[i];
r = amdgpu_ring_test_helper(ring);
if (r)
- goto done;
+ return r;
}
-done:
- if (!r)
- DRM_INFO("VCN decode and encode initialized successfully(under %s).\n",
- (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG)?"DPG Mode":"SPG Mode");
-
- return r;
+ return 0;
}
/**
* vcn_v2_0_hw_fini - stop the hardware block
*
- * @handle: amdgpu_device pointer
+ * @ip_block: Pointer to the amdgpu_ip_block for this hw instance.
*
* Stop the VCN block, mark ring as not ready any more
*/
-static int vcn_v2_0_hw_fini(void *handle)
+static int vcn_v2_0_hw_fini(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
+ struct amdgpu_vcn_inst *vinst = adev->vcn.inst;
+
+ cancel_delayed_work_sync(&vinst->idle_work);
if ((adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG) ||
- (adev->vcn.cur_state != AMD_PG_STATE_GATE &&
- RREG32_SOC15(VCN, 0, mmUVD_STATUS)))
- vcn_v2_0_set_powergating_state(adev, AMD_PG_STATE_GATE);
+ (vinst->cur_state != AMD_PG_STATE_GATE &&
+ RREG32_SOC15(VCN, 0, mmUVD_STATUS)))
+ vinst->set_pg_state(vinst, AMD_PG_STATE_GATE);
return 0;
}
@@ -273,20 +334,19 @@ static int vcn_v2_0_hw_fini(void *handle)
/**
* vcn_v2_0_suspend - suspend VCN block
*
- * @handle: amdgpu_device pointer
+ * @ip_block: Pointer to the amdgpu_ip_block for this hw instance.
*
* HW fini and suspend VCN block
*/
-static int vcn_v2_0_suspend(void *handle)
+static int vcn_v2_0_suspend(struct amdgpu_ip_block *ip_block)
{
int r;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
- r = vcn_v2_0_hw_fini(adev);
+ r = vcn_v2_0_hw_fini(ip_block);
if (r)
return r;
- r = amdgpu_vcn_suspend(adev);
+ r = amdgpu_vcn_suspend(ip_block->adev, 0);
return r;
}
@@ -294,20 +354,19 @@ static int vcn_v2_0_suspend(void *handle)
/**
* vcn_v2_0_resume - resume VCN block
*
- * @handle: amdgpu_device pointer
+ * @ip_block: Pointer to the amdgpu_ip_block for this hw instance.
*
* Resume firmware and hw init VCN block
*/
-static int vcn_v2_0_resume(void *handle)
+static int vcn_v2_0_resume(struct amdgpu_ip_block *ip_block)
{
int r;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
- r = amdgpu_vcn_resume(adev);
+ r = amdgpu_vcn_resume(ip_block->adev, 0);
if (r)
return r;
- r = vcn_v2_0_hw_init(adev);
+ r = vcn_v2_0_hw_init(ip_block);
return r;
}
@@ -315,13 +374,14 @@ static int vcn_v2_0_resume(void *handle)
/**
* vcn_v2_0_mc_resume - memory controller programming
*
- * @adev: amdgpu_device pointer
+ * @vinst: Pointer to the VCN instance structure
*
* Let the VCN memory controller know it's offsets
*/
-static void vcn_v2_0_mc_resume(struct amdgpu_device *adev)
+static void vcn_v2_0_mc_resume(struct amdgpu_vcn_inst *vinst)
{
- uint32_t size = AMDGPU_GPU_PAGE_ALIGN(adev->vcn.fw->size + 4);
+ struct amdgpu_device *adev = vinst->adev;
+ uint32_t size = AMDGPU_GPU_PAGE_ALIGN(adev->vcn.inst[0].fw->size + 4);
uint32_t offset;
if (amdgpu_sriov_vf(adev))
@@ -365,9 +425,9 @@ static void vcn_v2_0_mc_resume(struct amdgpu_device *adev)
/* non-cache window */
WREG32_SOC15(UVD, 0, mmUVD_LMI_VCPU_NC0_64BIT_BAR_LOW,
- lower_32_bits(adev->vcn.inst->fw_shared_gpu_addr));
+ lower_32_bits(adev->vcn.inst->fw_shared.gpu_addr));
WREG32_SOC15(UVD, 0, mmUVD_LMI_VCPU_NC0_64BIT_BAR_HIGH,
- upper_32_bits(adev->vcn.inst->fw_shared_gpu_addr));
+ upper_32_bits(adev->vcn.inst->fw_shared.gpu_addr));
WREG32_SOC15(UVD, 0, mmUVD_VCPU_NONCACHE_OFFSET0, 0);
WREG32_SOC15(UVD, 0, mmUVD_VCPU_NONCACHE_SIZE0,
AMDGPU_GPU_PAGE_ALIGN(sizeof(struct amdgpu_fw_shared)));
@@ -375,9 +435,11 @@ static void vcn_v2_0_mc_resume(struct amdgpu_device *adev)
WREG32_SOC15(UVD, 0, mmUVD_GFX10_ADDR_CONFIG, adev->gfx.config.gb_addr_config);
}
-static void vcn_v2_0_mc_resume_dpg_mode(struct amdgpu_device *adev, bool indirect)
+static void vcn_v2_0_mc_resume_dpg_mode(struct amdgpu_vcn_inst *vinst,
+ bool indirect)
{
- uint32_t size = AMDGPU_GPU_PAGE_ALIGN(adev->vcn.fw->size + 4);
+ struct amdgpu_device *adev = vinst->adev;
+ uint32_t size = AMDGPU_GPU_PAGE_ALIGN(adev->vcn.inst[0].fw->size + 4);
uint32_t offset;
/* cache window 0: fw */
@@ -456,10 +518,10 @@ static void vcn_v2_0_mc_resume_dpg_mode(struct amdgpu_device *adev, bool indirec
/* non-cache window */
WREG32_SOC15_DPG_MODE(0, SOC15_DPG_MODE_OFFSET(
UVD, 0, mmUVD_LMI_VCPU_NC0_64BIT_BAR_LOW),
- lower_32_bits(adev->vcn.inst->fw_shared_gpu_addr), 0, indirect);
+ lower_32_bits(adev->vcn.inst->fw_shared.gpu_addr), 0, indirect);
WREG32_SOC15_DPG_MODE(0, SOC15_DPG_MODE_OFFSET(
UVD, 0, mmUVD_LMI_VCPU_NC0_64BIT_BAR_HIGH),
- upper_32_bits(adev->vcn.inst->fw_shared_gpu_addr), 0, indirect);
+ upper_32_bits(adev->vcn.inst->fw_shared.gpu_addr), 0, indirect);
WREG32_SOC15_DPG_MODE(0, SOC15_DPG_MODE_OFFSET(
UVD, 0, mmUVD_VCPU_NONCACHE_OFFSET0), 0, 0, indirect);
WREG32_SOC15_DPG_MODE(0, SOC15_DPG_MODE_OFFSET(
@@ -474,12 +536,13 @@ static void vcn_v2_0_mc_resume_dpg_mode(struct amdgpu_device *adev, bool indirec
/**
* vcn_v2_0_disable_clock_gating - disable VCN clock gating
*
- * @adev: amdgpu_device pointer
+ * @vinst: VCN instance
*
* Disable clock gating for VCN block
*/
-static void vcn_v2_0_disable_clock_gating(struct amdgpu_device *adev)
+static void vcn_v2_0_disable_clock_gating(struct amdgpu_vcn_inst *vinst)
{
+ struct amdgpu_device *adev = vinst->adev;
uint32_t data;
if (amdgpu_sriov_vf(adev))
@@ -583,9 +646,10 @@ static void vcn_v2_0_disable_clock_gating(struct amdgpu_device *adev)
WREG32_SOC15(VCN, 0, mmUVD_SUVD_CGC_CTRL, data);
}
-static void vcn_v2_0_clock_gating_dpg_mode(struct amdgpu_device *adev,
+static void vcn_v2_0_clock_gating_dpg_mode(struct amdgpu_vcn_inst *vinst,
uint8_t sram_sel, uint8_t indirect)
{
+ struct amdgpu_device *adev = vinst->adev;
uint32_t reg_data = 0;
/* enable sw clock gating control */
@@ -634,12 +698,13 @@ static void vcn_v2_0_clock_gating_dpg_mode(struct amdgpu_device *adev,
/**
* vcn_v2_0_enable_clock_gating - enable VCN clock gating
*
- * @adev: amdgpu_device pointer
+ * @vinst: VCN instance
*
* Enable clock gating for VCN block
*/
-static void vcn_v2_0_enable_clock_gating(struct amdgpu_device *adev)
+static void vcn_v2_0_enable_clock_gating(struct amdgpu_vcn_inst *vinst)
{
+ struct amdgpu_device *adev = vinst->adev;
uint32_t data = 0;
if (amdgpu_sriov_vf(adev))
@@ -692,8 +757,9 @@ static void vcn_v2_0_enable_clock_gating(struct amdgpu_device *adev)
WREG32_SOC15(VCN, 0, mmUVD_SUVD_CGC_CTRL, data);
}
-static void vcn_v2_0_disable_static_power_gating(struct amdgpu_device *adev)
+static void vcn_v2_0_disable_static_power_gating(struct amdgpu_vcn_inst *vinst)
{
+ struct amdgpu_device *adev = vinst->adev;
uint32_t data = 0;
if (amdgpu_sriov_vf(adev))
@@ -741,8 +807,9 @@ static void vcn_v2_0_disable_static_power_gating(struct amdgpu_device *adev)
WREG32_SOC15(VCN, 0, mmUVD_POWER_STATUS, data);
}
-static void vcn_v2_0_enable_static_power_gating(struct amdgpu_device *adev)
+static void vcn_v2_0_enable_static_power_gating(struct amdgpu_vcn_inst *vinst)
{
+ struct amdgpu_device *adev = vinst->adev;
uint32_t data = 0;
if (amdgpu_sriov_vf(adev))
@@ -783,13 +850,15 @@ static void vcn_v2_0_enable_static_power_gating(struct amdgpu_device *adev)
}
}
-static int vcn_v2_0_start_dpg_mode(struct amdgpu_device *adev, bool indirect)
+static int vcn_v2_0_start_dpg_mode(struct amdgpu_vcn_inst *vinst, bool indirect)
{
- volatile struct amdgpu_fw_shared *fw_shared = adev->vcn.inst->fw_shared_cpu_addr;
+ struct amdgpu_device *adev = vinst->adev;
+ struct amdgpu_fw_shared *fw_shared = adev->vcn.inst->fw_shared.cpu_addr;
struct amdgpu_ring *ring = &adev->vcn.inst->ring_dec;
uint32_t rb_bufsz, tmp;
+ int ret;
- vcn_v2_0_enable_static_power_gating(adev);
+ vcn_v2_0_enable_static_power_gating(vinst);
/* enable dynamic power gating mode */
tmp = RREG32_SOC15(UVD, 0, mmUVD_POWER_STATUS);
@@ -801,7 +870,7 @@ static int vcn_v2_0_start_dpg_mode(struct amdgpu_device *adev, bool indirect)
adev->vcn.inst->dpg_sram_curr_addr = (uint32_t *)adev->vcn.inst->dpg_sram_cpu_addr;
/* enable clock gating */
- vcn_v2_0_clock_gating_dpg_mode(adev, 0, indirect);
+ vcn_v2_0_clock_gating_dpg_mode(vinst, 0, indirect);
/* enable VCPU clock */
tmp = (0xFF << UVD_VCPU_CNTL__PRB_TIMEOUT_VAL__SHIFT);
@@ -850,7 +919,7 @@ static int vcn_v2_0_start_dpg_mode(struct amdgpu_device *adev, bool indirect)
(0x1 << UVD_MPC_SET_MUX__SET_1__SHIFT) |
(0x2 << UVD_MPC_SET_MUX__SET_2__SHIFT)), 0, indirect);
- vcn_v2_0_mc_resume_dpg_mode(adev, indirect);
+ vcn_v2_0_mc_resume_dpg_mode(vinst, indirect);
WREG32_SOC15_DPG_MODE(0, SOC15_DPG_MODE_OFFSET(
UVD, 0, mmUVD_REG_XX_MASK), 0x10, 0, indirect);
@@ -871,10 +940,13 @@ static int vcn_v2_0_start_dpg_mode(struct amdgpu_device *adev, bool indirect)
UVD, 0, mmUVD_MASTINT_EN),
UVD_MASTINT_EN__VCPU_EN_MASK, 0, indirect);
- if (indirect)
- psp_update_vcn_sram(adev, 0, adev->vcn.inst->dpg_sram_gpu_addr,
- (uint32_t)((uintptr_t)adev->vcn.inst->dpg_sram_curr_addr -
- (uintptr_t)adev->vcn.inst->dpg_sram_cpu_addr));
+ if (indirect) {
+ ret = amdgpu_vcn_psp_update_sram(adev, 0, 0);
+ if (ret) {
+ dev_err(adev->dev, "vcn sram load failed %d\n", ret);
+ return ret;
+ }
+ }
/* force RBC into idle state */
rb_bufsz = order_base_2(ring->ring_size);
@@ -917,31 +989,38 @@ static int vcn_v2_0_start_dpg_mode(struct amdgpu_device *adev, bool indirect)
/* Unstall DPG */
WREG32_P(SOC15_REG_OFFSET(UVD, 0, mmUVD_POWER_STATUS),
0, ~UVD_POWER_STATUS__STALL_DPG_POWER_UP_MASK);
+
+ /* Keeping one read-back to ensure all register writes are done,
+ * otherwise it may introduce race conditions.
+ */
+ RREG32_SOC15(UVD, 0, mmUVD_STATUS);
+
return 0;
}
-static int vcn_v2_0_start(struct amdgpu_device *adev)
+static int vcn_v2_0_start(struct amdgpu_vcn_inst *vinst)
{
- volatile struct amdgpu_fw_shared *fw_shared = adev->vcn.inst->fw_shared_cpu_addr;
+ struct amdgpu_device *adev = vinst->adev;
+ struct amdgpu_fw_shared *fw_shared = adev->vcn.inst->fw_shared.cpu_addr;
struct amdgpu_ring *ring = &adev->vcn.inst->ring_dec;
uint32_t rb_bufsz, tmp;
uint32_t lmi_swap_cntl;
int i, j, r;
if (adev->pm.dpm_enabled)
- amdgpu_dpm_enable_uvd(adev, true);
+ amdgpu_dpm_enable_vcn(adev, true, 0);
if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG)
- return vcn_v2_0_start_dpg_mode(adev, adev->vcn.indirect_sram);
+ return vcn_v2_0_start_dpg_mode(vinst, adev->vcn.inst->indirect_sram);
- vcn_v2_0_disable_static_power_gating(adev);
+ vcn_v2_0_disable_static_power_gating(vinst);
/* set uvd status busy */
tmp = RREG32_SOC15(UVD, 0, mmUVD_STATUS) | UVD_STATUS__UVD_BUSY;
WREG32_SOC15(UVD, 0, mmUVD_STATUS, tmp);
/*SW clock gating */
- vcn_v2_0_disable_clock_gating(adev);
+ vcn_v2_0_disable_clock_gating(vinst);
/* enable VCPU clock */
WREG32_P(SOC15_REG_OFFSET(UVD, 0, mmUVD_VCPU_CNTL),
@@ -985,7 +1064,7 @@ static int vcn_v2_0_start(struct amdgpu_device *adev)
(0x1 << UVD_MPC_SET_MUX__SET_1__SHIFT) |
(0x2 << UVD_MPC_SET_MUX__SET_2__SHIFT)));
- vcn_v2_0_mc_resume(adev);
+ vcn_v2_0_mc_resume(vinst);
/* release VCPU reset to boot */
WREG32_P(SOC15_REG_OFFSET(UVD, 0, mmUVD_SOFT_RESET), 0,
@@ -1090,13 +1169,21 @@ static int vcn_v2_0_start(struct amdgpu_device *adev)
WREG32_SOC15(UVD, 0, mmUVD_RB_SIZE2, ring->ring_size / 4);
fw_shared->multi_queue.encode_lowlatency_queue_mode &= ~FW_QUEUE_RING_RESET;
+ /* Keeping one read-back to ensure all register writes are done,
+ * otherwise it may introduce race conditions.
+ */
+ RREG32_SOC15(UVD, 0, mmUVD_STATUS);
+
return 0;
}
-static int vcn_v2_0_stop_dpg_mode(struct amdgpu_device *adev)
+static int vcn_v2_0_stop_dpg_mode(struct amdgpu_vcn_inst *vinst)
{
+ struct amdgpu_device *adev = vinst->adev;
+ struct dpg_pause_state state = {.fw_based = VCN_DPG_STATE__UNPAUSE};
uint32_t tmp;
+ vcn_v2_0_pause_dpg_mode(vinst, &state);
/* Wait for power status to be 1 */
SOC15_WAIT_ON_RREG(UVD, 0, mmUVD_POWER_STATUS, 1,
UVD_POWER_STATUS__UVD_POWER_STATUS_MASK);
@@ -1118,16 +1205,22 @@ static int vcn_v2_0_stop_dpg_mode(struct amdgpu_device *adev)
WREG32_P(SOC15_REG_OFFSET(UVD, 0, mmUVD_POWER_STATUS), 0,
~UVD_POWER_STATUS__UVD_PG_MODE_MASK);
+ /* Keeping one read-back to ensure all register writes are done,
+ * otherwise it may introduce race conditions.
+ */
+ RREG32_SOC15(UVD, 0, mmUVD_STATUS);
+
return 0;
}
-static int vcn_v2_0_stop(struct amdgpu_device *adev)
+static int vcn_v2_0_stop(struct amdgpu_vcn_inst *vinst)
{
+ struct amdgpu_device *adev = vinst->adev;
uint32_t tmp;
int r;
if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG) {
- r = vcn_v2_0_stop_dpg_mode(adev);
+ r = vcn_v2_0_stop_dpg_mode(vinst);
if (r)
return r;
goto power_off;
@@ -1179,19 +1272,26 @@ static int vcn_v2_0_stop(struct amdgpu_device *adev)
/* clear status */
WREG32_SOC15(VCN, 0, mmUVD_STATUS, 0);
- vcn_v2_0_enable_clock_gating(adev);
- vcn_v2_0_enable_static_power_gating(adev);
+ vcn_v2_0_enable_clock_gating(vinst);
+ vcn_v2_0_enable_static_power_gating(vinst);
+
+ /* Keeping one read-back to ensure all register writes are done,
+ * otherwise it may introduce race conditions.
+ */
+ RREG32_SOC15(VCN, 0, mmUVD_STATUS);
power_off:
if (adev->pm.dpm_enabled)
- amdgpu_dpm_enable_uvd(adev, false);
+ amdgpu_dpm_enable_vcn(adev, false, 0);
return 0;
}
-static int vcn_v2_0_pause_dpg_mode(struct amdgpu_device *adev,
- int inst_idx, struct dpg_pause_state *new_state)
+static int vcn_v2_0_pause_dpg_mode(struct amdgpu_vcn_inst *vinst,
+ struct dpg_pause_state *new_state)
{
+ struct amdgpu_device *adev = vinst->adev;
+ int inst_idx = vinst->inst;
struct amdgpu_ring *ring;
uint32_t reg_data = 0;
int ret_code;
@@ -1208,7 +1308,7 @@ static int vcn_v2_0_pause_dpg_mode(struct amdgpu_device *adev,
UVD_POWER_STATUS__UVD_POWER_STATUS_MASK);
if (!ret_code) {
- volatile struct amdgpu_fw_shared *fw_shared = adev->vcn.inst->fw_shared_cpu_addr;
+ struct amdgpu_fw_shared *fw_shared = adev->vcn.inst->fw_shared.cpu_addr;
/* pause DPG */
reg_data |= UVD_DPG_PAUSE__NJ_PAUSE_DPG_REQ_MASK;
WREG32_SOC15(UVD, 0, mmUVD_DPG_PAUSE, reg_data);
@@ -1266,16 +1366,26 @@ static int vcn_v2_0_pause_dpg_mode(struct amdgpu_device *adev,
return 0;
}
-static bool vcn_v2_0_is_idle(void *handle)
+static int vcn_v2_0_reset(struct amdgpu_vcn_inst *vinst)
+{
+ int r;
+
+ r = vcn_v2_0_stop(vinst);
+ if (r)
+ return r;
+ return vcn_v2_0_start(vinst);
+}
+
+static bool vcn_v2_0_is_idle(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
return (RREG32_SOC15(VCN, 0, mmUVD_STATUS) == UVD_STATUS__IDLE);
}
-static int vcn_v2_0_wait_for_idle(void *handle)
+static int vcn_v2_0_wait_for_idle(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
int ret;
ret = SOC15_WAIT_ON_RREG(VCN, 0, mmUVD_STATUS, UVD_STATUS__IDLE,
@@ -1284,10 +1394,10 @@ static int vcn_v2_0_wait_for_idle(void *handle)
return ret;
}
-static int vcn_v2_0_set_clockgating_state(void *handle,
+static int vcn_v2_0_set_clockgating_state(struct amdgpu_ip_block *ip_block,
enum amd_clockgating_state state)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
bool enable = (state == AMD_CG_STATE_GATE);
if (amdgpu_sriov_vf(adev))
@@ -1295,12 +1405,12 @@ static int vcn_v2_0_set_clockgating_state(void *handle,
if (enable) {
/* wait for STATUS to clear */
- if (!vcn_v2_0_is_idle(handle))
+ if (!vcn_v2_0_is_idle(ip_block))
return -EBUSY;
- vcn_v2_0_enable_clock_gating(adev);
+ vcn_v2_0_enable_clock_gating(&adev->vcn.inst[0]);
} else {
/* disable HW gating and enable Sw gating */
- vcn_v2_0_disable_clock_gating(adev);
+ vcn_v2_0_disable_clock_gating(&adev->vcn.inst[0]);
}
return 0;
}
@@ -1331,7 +1441,7 @@ static uint64_t vcn_v2_0_dec_ring_get_wptr(struct amdgpu_ring *ring)
struct amdgpu_device *adev = ring->adev;
if (ring->use_doorbell)
- return adev->wb.wb[ring->wptr_offs];
+ return *ring->wptr_cpu_addr;
else
return RREG32_SOC15(UVD, 0, mmUVD_RBC_RB_WPTR);
}
@@ -1352,7 +1462,7 @@ static void vcn_v2_0_dec_ring_set_wptr(struct amdgpu_ring *ring)
lower_32_bits(ring->wptr) | 0x80000000);
if (ring->use_doorbell) {
- adev->wb.wb[ring->wptr_offs] = lower_32_bits(ring->wptr);
+ *ring->wptr_cpu_addr = lower_32_bits(ring->wptr);
WDOORBELL32(ring->doorbell_index, lower_32_bits(ring->wptr));
} else {
WREG32_SOC15(UVD, 0, mmUVD_RBC_RB_WPTR, lower_32_bits(ring->wptr));
@@ -1370,9 +1480,9 @@ void vcn_v2_0_dec_ring_insert_start(struct amdgpu_ring *ring)
{
struct amdgpu_device *adev = ring->adev;
- amdgpu_ring_write(ring, PACKET0(adev->vcn.internal.data0, 0));
+ amdgpu_ring_write(ring, PACKET0(adev->vcn.inst[ring->me].internal.data0, 0));
amdgpu_ring_write(ring, 0);
- amdgpu_ring_write(ring, PACKET0(adev->vcn.internal.cmd, 0));
+ amdgpu_ring_write(ring, PACKET0(adev->vcn.inst[ring->me].internal.cmd, 0));
amdgpu_ring_write(ring, VCN_DEC_KMD_CMD | (VCN_DEC_CMD_PACKET_START << 1));
}
@@ -1387,7 +1497,7 @@ void vcn_v2_0_dec_ring_insert_end(struct amdgpu_ring *ring)
{
struct amdgpu_device *adev = ring->adev;
- amdgpu_ring_write(ring, PACKET0(adev->vcn.internal.cmd, 0));
+ amdgpu_ring_write(ring, PACKET0(adev->vcn.inst[0].internal.cmd, 0));
amdgpu_ring_write(ring, VCN_DEC_KMD_CMD | (VCN_DEC_CMD_PACKET_END << 1));
}
@@ -1407,7 +1517,7 @@ void vcn_v2_0_dec_ring_insert_nop(struct amdgpu_ring *ring, uint32_t count)
WARN_ON(ring->wptr % 2 || count % 2);
for (i = 0; i < count / 2; i++) {
- amdgpu_ring_write(ring, PACKET0(adev->vcn.internal.nop, 0));
+ amdgpu_ring_write(ring, PACKET0(adev->vcn.inst[ring->me].internal.nop, 0));
amdgpu_ring_write(ring, 0);
}
}
@@ -1428,25 +1538,25 @@ void vcn_v2_0_dec_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, u64 seq,
struct amdgpu_device *adev = ring->adev;
WARN_ON(flags & AMDGPU_FENCE_FLAG_64BIT);
- amdgpu_ring_write(ring, PACKET0(adev->vcn.internal.context_id, 0));
+ amdgpu_ring_write(ring, PACKET0(adev->vcn.inst[ring->me].internal.context_id, 0));
amdgpu_ring_write(ring, seq);
- amdgpu_ring_write(ring, PACKET0(adev->vcn.internal.data0, 0));
+ amdgpu_ring_write(ring, PACKET0(adev->vcn.inst[ring->me].internal.data0, 0));
amdgpu_ring_write(ring, addr & 0xffffffff);
- amdgpu_ring_write(ring, PACKET0(adev->vcn.internal.data1, 0));
+ amdgpu_ring_write(ring, PACKET0(adev->vcn.inst[ring->me].internal.data1, 0));
amdgpu_ring_write(ring, upper_32_bits(addr) & 0xff);
- amdgpu_ring_write(ring, PACKET0(adev->vcn.internal.cmd, 0));
+ amdgpu_ring_write(ring, PACKET0(adev->vcn.inst[ring->me].internal.cmd, 0));
amdgpu_ring_write(ring, VCN_DEC_KMD_CMD | (VCN_DEC_CMD_FENCE << 1));
- amdgpu_ring_write(ring, PACKET0(adev->vcn.internal.data0, 0));
+ amdgpu_ring_write(ring, PACKET0(adev->vcn.inst[ring->me].internal.data0, 0));
amdgpu_ring_write(ring, 0);
- amdgpu_ring_write(ring, PACKET0(adev->vcn.internal.data1, 0));
+ amdgpu_ring_write(ring, PACKET0(adev->vcn.inst[ring->me].internal.data1, 0));
amdgpu_ring_write(ring, 0);
- amdgpu_ring_write(ring, PACKET0(adev->vcn.internal.cmd, 0));
+ amdgpu_ring_write(ring, PACKET0(adev->vcn.inst[ring->me].internal.cmd, 0));
amdgpu_ring_write(ring, VCN_DEC_KMD_CMD | (VCN_DEC_CMD_TRAP << 1));
}
@@ -1469,14 +1579,14 @@ void vcn_v2_0_dec_ring_emit_ib(struct amdgpu_ring *ring,
struct amdgpu_device *adev = ring->adev;
unsigned vmid = AMDGPU_JOB_GET_VMID(job);
- amdgpu_ring_write(ring, PACKET0(adev->vcn.internal.ib_vmid, 0));
+ amdgpu_ring_write(ring, PACKET0(adev->vcn.inst[ring->me].internal.ib_vmid, 0));
amdgpu_ring_write(ring, vmid);
- amdgpu_ring_write(ring, PACKET0(adev->vcn.internal.ib_bar_low, 0));
+ amdgpu_ring_write(ring, PACKET0(adev->vcn.inst[ring->me].internal.ib_bar_low, 0));
amdgpu_ring_write(ring, lower_32_bits(ib->gpu_addr));
- amdgpu_ring_write(ring, PACKET0(adev->vcn.internal.ib_bar_high, 0));
+ amdgpu_ring_write(ring, PACKET0(adev->vcn.inst[ring->me].internal.ib_bar_high, 0));
amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr));
- amdgpu_ring_write(ring, PACKET0(adev->vcn.internal.ib_size, 0));
+ amdgpu_ring_write(ring, PACKET0(adev->vcn.inst[ring->me].internal.ib_size, 0));
amdgpu_ring_write(ring, ib->length_dw);
}
@@ -1485,16 +1595,16 @@ void vcn_v2_0_dec_ring_emit_reg_wait(struct amdgpu_ring *ring, uint32_t reg,
{
struct amdgpu_device *adev = ring->adev;